diff --git a/python/ClipDetection/COPYING b/python/ClipDetection/COPYING index 19dc35b2..c33078f3 100644 --- a/python/ClipDetection/COPYING +++ b/python/ClipDetection/COPYING @@ -1,175 +1,175 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. \ No newline at end of file diff --git a/python/ClipDetection/CoOp/.gitignore b/python/ClipDetection/CoOp/.gitignore new file mode 100644 index 00000000..ef81d188 --- /dev/null +++ b/python/ClipDetection/CoOp/.gitignore @@ -0,0 +1,133 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Custom +# output/ +debug.sh diff --git a/python/ClipDetection/CoOp/LICENSE b/python/ClipDetection/CoOp/LICENSE new file mode 100644 index 00000000..26d793c7 --- /dev/null +++ b/python/ClipDetection/CoOp/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 Kaiyang Zhou + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/python/ClipDetection/CoOp/MODIFICATIONS b/python/ClipDetection/CoOp/MODIFICATIONS new file mode 100644 index 00000000..d0dc8356 --- /dev/null +++ b/python/ClipDetection/CoOp/MODIFICATIONS @@ -0,0 +1,20 @@ +The files in the following directories have been created for use in the ClipDetection repo: + +./configs/CoOp/vit_l14_ep50.yaml + + +The files in the following directories have been modified (see top of file for description of changes): + +./train.py +./trainers/coop.py +./clip/clip.py + + +The files in the following directories CAN PROBABLY BE DELETED: + +./configs/* (except for files listed above) +./datasets/* +./lpclip/* +./output/* (once trained model files are saved in Docker) +./saved_outputs/* +./scripts/* \ No newline at end of file diff --git a/python/ClipDetection/CoOp/README.md b/python/ClipDetection/CoOp/README.md new file mode 100644 index 00000000..31ee299c --- /dev/null +++ b/python/ClipDetection/CoOp/README.md @@ -0,0 +1,64 @@ +# Prompt Learning for Vision-Language Models + +This repo contains the codebase of a series of research projects focused on adapting vision-language models like [CLIP](https://arxiv.org/abs/2103.00020) to downstream datasets via *prompt learning*: + +* [Conditional Prompt Learning for Vision-Language Models](https://arxiv.org/abs/2203.05557), in CVPR, 2022. +* [Learning to Prompt for Vision-Language Models](https://arxiv.org/abs/2109.01134), IJCV, 2022. + +## Updates + +- **07.10.2022**: Just added to both [CoOp](https://arxiv.org/abs/2109.01134) and [CoCoOp](https://arxiv.org/abs/2203.05557) (in their appendices) the results on the newly proposed DOSCO (DOmain Shift in COntext) benchmark, which focuses on contextual domain shift and covers a diverse set of classification problems. (The paper about DOSCO is [here](https://arxiv.org/abs/2209.07521) and the code for running CoOp/CoCoOp on DOSCO is [here](https://github.com/KaiyangZhou/on-device-dg).) + +- **17.09.2022**: [Call for Papers](https://kaiyangzhou.github.io/assets/cfp_ijcv_lvms.html): IJCV Special Issue on *The Promises and Dangers of Large Vision Models*. + +- **16.07.2022**: CoOp has been accepted to IJCV for publication! + +- **10.06.2022**: Our latest work, [Neural Prompt Search](https://arxiv.org/abs/2206.04673), has just been released on arxiv. It provides a novel perspective for fine-tuning large vision models like [ViT](https://arxiv.org/abs/2010.11929), so please check it out if you're interested in parameter-efficient fine-tuning/transfer learning. The code is also made public [here](https://github.com/Davidzhangyuanhan/NOAH). + +- **08.06.2022**: If you're looking for the code to draw the few-shot performance curves (like the ones we show in the CoOp's paper), see `draw_curves.py`. + +- **09.04.2022**: The pre-trained weights of CoOp on ImageNet are released [here](#pre-trained-models). + +- **11.03.2022**: The code of our CVPR'22 paper, "[Conditional Prompt Learning for Vision-Language Models](https://arxiv.org/abs/2203.05557)," is released. + +- **15.10.2021**: We find that the `best_val` model and the `last_step` model achieve similar performance, so we set `TEST.FINAL_MODEL = "last_step"` for all datasets to save training time. Why we used `best_val`: the ([tiny](https://github.com/KaiyangZhou/CoOp/blob/main/datasets/oxford_pets.py#L32)) validation set was designed for the linear probe approach, which requires extensive tuning for its hyperparameters, so we used the `best_val` model for CoOp as well for fair comparison (in this way, both approaches have access to the validation set). + +- **09.10.2021**: Important changes are made to Dassl's transforms.py. Please pull the latest commits from https://github.com/KaiyangZhou/Dassl.pytorch and this repo to make sure the code works properly. In particular, 1) `center_crop` now becomes a default transform in testing (applied after resizing the smaller edge to a certain size to keep the image aspect ratio), and 2) for training, `Resize(cfg.INPUT.SIZE)` is deactivated when `random_crop` or `random_resized_crop` is used. Please read this [issue](https://github.com/KaiyangZhou/CoOp/issues/8) on how these changes might affect the performance. + +- **18.09.2021**: We have fixed an error in Dassl which could cause a training data loader to have zero length (so no training will be performed) when the dataset size is smaller than the batch size (due to `drop_last=True`). Please pull the latest commit for Dassl (>= `8eecc3c`). This error led to lower results for CoOp in EuroSAT's 1- and 2-shot settings (others are all correct). We will update the paper on arxiv to fix this error. + +## How to Install +This code is built on top of the awesome toolbox [Dassl.pytorch](https://github.com/KaiyangZhou/Dassl.pytorch) so you need to install the `dassl` environment first. Simply follow the instructions described [here](https://github.com/KaiyangZhou/Dassl.pytorch#installation) to install `dassl` as well as PyTorch. After that, run `pip install -r requirements.txt` under `CoOp/` to install a few more packages required by [CLIP](https://github.com/openai/CLIP) (this should be done when `dassl` is activated). Then, you are ready to go. + +Follow [DATASETS.md](DATASETS.md) to install the datasets. + +## How to Run + +Click a paper below to see the detailed instructions on how to run the code to reproduce the results. + +* [Learning to Prompt for Vision-Language Models](COOP.md) +* [Conditional Prompt Learning for Vision-Language Models](COCOOP.md) + +## Models and Results + +- The pre-trained weights of CoOp (both M=16 & M=4) on ImageNet based on RN50, RN101, ViT-B/16 and ViT-B/32 can be downloaded altogether via this [link](https://drive.google.com/file/d/18ypxfd82RR0pizc5MM1ZWDYDk4j0BtPF/view?usp=sharing). The weights can be used to reproduce the results in Table 1 of CoOp's paper (i.e., the results on ImageNet and its four variants with domain shift). To load the weights and run the evaluation code, you will need to specify `--model-dir` and `--load-epoch` (see this [script](https://github.com/KaiyangZhou/CoOp/blob/main/scripts/eval.sh) for example). +- The raw numerical results can be found at this [google drive link](https://docs.google.com/spreadsheets/d/12_kaFdD0nct9aUIrDoreY0qDunQ9q9tv/edit?usp=sharing&ouid=100312610418109826457&rtpof=true&sd=true). + +## Citation +If you use this code in your research, please kindly cite the following papers + +```bash +@inproceedings{zhou2022cocoop, + title={Conditional Prompt Learning for Vision-Language Models}, + author={Zhou, Kaiyang and Yang, Jingkang and Loy, Chen Change and Liu, Ziwei}, + booktitle={IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + year={2022} +} + +@article{zhou2022coop, + title={Learning to Prompt for Vision-Language Models}, + author={Zhou, Kaiyang and Yang, Jingkang and Loy, Chen Change and Liu, Ziwei}, + journal={International Journal of Computer Vision (IJCV)}, + year={2022} +} +``` diff --git a/python/ClipDetection/CoOp/clip/__init__.py b/python/ClipDetection/CoOp/clip/__init__.py new file mode 100644 index 00000000..dcc56195 --- /dev/null +++ b/python/ClipDetection/CoOp/clip/__init__.py @@ -0,0 +1 @@ +from .clip import * diff --git a/python/ClipDetection/CoOp/clip/bpe_simple_vocab_16e6.txt.gz b/python/ClipDetection/CoOp/clip/bpe_simple_vocab_16e6.txt.gz new file mode 100644 index 00000000..7b5088a5 Binary files /dev/null and b/python/ClipDetection/CoOp/clip/bpe_simple_vocab_16e6.txt.gz differ diff --git a/python/ClipDetection/CoOp/clip/clip.py b/python/ClipDetection/CoOp/clip/clip.py new file mode 100644 index 00000000..7cfaf724 --- /dev/null +++ b/python/ClipDetection/CoOp/clip/clip.py @@ -0,0 +1,253 @@ +################################################################ +# CHANGES MADE TO FILE # +# ------------------------------------------------------------ # +# Modified CLIP files to support ViT-L/14 model # +# - From OpenAI source code for CLIP # +# # +################################################################ + +import hashlib +import os +import urllib +import warnings +from typing import Any, Union, List +from pkg_resources import packaging + +import torch +from PIL import Image +from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize +from tqdm import tqdm + +from .model import build_model +from .simple_tokenizer import SimpleTokenizer as _Tokenizer + +try: + from torchvision.transforms import InterpolationMode + BICUBIC = InterpolationMode.BICUBIC +except ImportError: + BICUBIC = Image.BICUBIC + + +if packaging.version.parse(torch.__version__) < packaging.version.parse("1.7.1"): + warnings.warn("PyTorch version 1.7.1 or higher is recommended") + + +__all__ = ["available_models", "load", "tokenize"] +_tokenizer = _Tokenizer() + +_MODELS = { + "RN50": "https://openaipublic.azureedge.net/clip/models/afeb0e10f9e5a86da6080e35cf09123aca3b358a0c3e3b6c78a7b63bc04b6762/RN50.pt", + "RN101": "https://openaipublic.azureedge.net/clip/models/8fa8567bab74a42d41c5915025a8e4538c3bdbe8804a470a72f30b0d94fab599/RN101.pt", + "RN50x4": "https://openaipublic.azureedge.net/clip/models/7e526bd135e493cef0776de27d5f42653e6b4c8bf9e0f653bb11773263205fdd/RN50x4.pt", + "RN50x16": "https://openaipublic.azureedge.net/clip/models/52378b407f34354e150460fe41077663dd5b39c54cd0bfd2b27167a4a06ec9aa/RN50x16.pt", + "RN50x64": "https://openaipublic.azureedge.net/clip/models/be1cfb55d75a9666199fb2206c106743da0f6468c9d327f3e0d0a543a9919d9c/RN50x64.pt", + "ViT-B/32": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt", + "ViT-B/16": "https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt", + "ViT-L/14": "https://openaipublic.azureedge.net/clip/models/b8cca3fd41ae0c99ba7e8951adf17d267cdb84cd88be6f7c2e0eca1737a03836/ViT-L-14.pt", + "ViT-L/14@336px": "https://openaipublic.azureedge.net/clip/models/3035c92b350959924f9f00213499208652fc7ea050643e8b385c2dac08641f02/ViT-L-14-336px.pt", +} + + +def _download(url: str, root: str = os.path.expanduser("~/.cache/clip")): + os.makedirs(root, exist_ok=True) + filename = os.path.basename(url) + + expected_sha256 = url.split("/")[-2] + download_target = os.path.join(root, filename) + + if os.path.exists(download_target) and not os.path.isfile(download_target): + raise RuntimeError(f"{download_target} exists and is not a regular file") + + if os.path.isfile(download_target): + if hashlib.sha256(open(download_target, "rb").read()).hexdigest() == expected_sha256: + return download_target + else: + warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file") + + with urllib.request.urlopen(url) as source, open(download_target, "wb") as output: + with tqdm(total=int(source.info().get("Content-Length")), ncols=80, unit='iB', unit_scale=True, unit_divisor=1024) as loop: + while True: + buffer = source.read(8192) + if not buffer: + break + + output.write(buffer) + loop.update(len(buffer)) + + if hashlib.sha256(open(download_target, "rb").read()).hexdigest() != expected_sha256: + raise RuntimeError("Model has been downloaded but the SHA256 checksum does not not match") + + return download_target + + +def _convert_image_to_rgb(image): + return image.convert("RGB") + + +def _transform(n_px): + return Compose([ + Resize(n_px, interpolation=BICUBIC), + CenterCrop(n_px), + _convert_image_to_rgb, + ToTensor(), + Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), + ]) + + +def available_models() -> List[str]: + """Returns the names of available CLIP models""" + return list(_MODELS.keys()) + + +def load(name: str, device: Union[str, torch.device] = "cuda" if torch.cuda.is_available() else "cpu", jit: bool = False, download_root: str = None): + """Load a CLIP model + + Parameters + ---------- + name : str + A model name listed by `clip.available_models()`, or the path to a model checkpoint containing the state_dict + + device : Union[str, torch.device] + The device to put the loaded model + + jit : bool + Whether to load the optimized JIT model or more hackable non-JIT model (default). + + download_root: str + path to download the model files; by default, it uses "~/.cache/clip" + + Returns + ------- + model : torch.nn.Module + The CLIP model + + preprocess : Callable[[PIL.Image], torch.Tensor] + A torchvision transform that converts a PIL image into a tensor that the returned model can take as its input + """ + if name in _MODELS: + model_path = _download(_MODELS[name], download_root or os.path.expanduser("~/.cache/clip")) + elif os.path.isfile(name): + model_path = name + else: + raise RuntimeError(f"Model {name} not found; available models = {available_models()}") + + with open(model_path, 'rb') as opened_file: + try: + # loading JIT archive + model = torch.jit.load(opened_file, map_location=device if jit else "cpu").eval() + state_dict = None + except RuntimeError: + # loading saved state dict + if jit: + warnings.warn(f"File {model_path} is not a JIT archive. Loading as a state dict instead") + jit = False + state_dict = torch.load(opened_file, map_location="cpu") + + if not jit: + model = build_model(state_dict or model.state_dict()).to(device) + if str(device) == "cpu": + model.float() + return model, _transform(model.visual.input_resolution) + + # patch the device names + device_holder = torch.jit.trace(lambda: torch.ones([]).to(torch.device(device)), example_inputs=[]) + device_node = [n for n in device_holder.graph.findAllNodes("prim::Constant") if "Device" in repr(n)][-1] + + def _node_get(node: torch._C.Node, key: str): + """Gets attributes of a node which is polymorphic over return type. + + From https://github.com/pytorch/pytorch/pull/82628 + """ + sel = node.kindOf(key) + return getattr(node, sel)(key) + + def patch_device(module): + try: + graphs = [module.graph] if hasattr(module, "graph") else [] + except RuntimeError: + graphs = [] + + if hasattr(module, "forward1"): + graphs.append(module.forward1.graph) + + for graph in graphs: + for node in graph.findAllNodes("prim::Constant"): + if "value" in node.attributeNames() and str(_node_get(node, "value")).startswith("cuda"): + node.copyAttributes(device_node) + + model.apply(patch_device) + patch_device(model.encode_image) + patch_device(model.encode_text) + + # patch dtype to float32 on CPU + if str(device) == "cpu": + float_holder = torch.jit.trace(lambda: torch.ones([]).float(), example_inputs=[]) + float_input = list(float_holder.graph.findNode("aten::to").inputs())[1] + float_node = float_input.node() + + def patch_float(module): + try: + graphs = [module.graph] if hasattr(module, "graph") else [] + except RuntimeError: + graphs = [] + + if hasattr(module, "forward1"): + graphs.append(module.forward1.graph) + + for graph in graphs: + for node in graph.findAllNodes("aten::to"): + inputs = list(node.inputs()) + for i in [1, 2]: # dtype can be the second or third argument to aten::to() + if _node_get(inputs[i].node(), "value") == 5: + inputs[i].node().copyAttributes(float_node) + + model.apply(patch_float) + patch_float(model.encode_image) + patch_float(model.encode_text) + + model.float() + + return model, _transform(model.input_resolution.item()) + + +def tokenize(texts: Union[str, List[str]], context_length: int = 77, truncate: bool = False) -> Union[torch.IntTensor, torch.LongTensor]: + """ + Returns the tokenized representation of given input string(s) + + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize + + context_length : int + The context length to use; all CLIP models use 77 as the context length + + truncate: bool + Whether to truncate the text in case its encoding is longer than the context length + + Returns + ------- + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length]. + We return LongTensor when torch version is <1.8.0, since older index_select requires indices to be long. + """ + if isinstance(texts, str): + texts = [texts] + + sot_token = _tokenizer.encoder["<|startoftext|>"] + eot_token = _tokenizer.encoder["<|endoftext|>"] + all_tokens = [[sot_token] + _tokenizer.encode(text) + [eot_token] for text in texts] + if packaging.version.parse(torch.__version__) < packaging.version.parse("1.8.0"): + result = torch.zeros(len(all_tokens), context_length, dtype=torch.long) + else: + result = torch.zeros(len(all_tokens), context_length, dtype=torch.int) + + for i, tokens in enumerate(all_tokens): + if len(tokens) > context_length: + if truncate: + tokens = tokens[:context_length] + tokens[-1] = eot_token + else: + raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") + result[i, :len(tokens)] = torch.tensor(tokens) + + return result diff --git a/python/ClipDetection/CoOp/clip/model.py b/python/ClipDetection/CoOp/clip/model.py new file mode 100644 index 00000000..232b7792 --- /dev/null +++ b/python/ClipDetection/CoOp/clip/model.py @@ -0,0 +1,436 @@ +from collections import OrderedDict +from typing import Tuple, Union + +import numpy as np +import torch +import torch.nn.functional as F +from torch import nn + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1): + super().__init__() + + # all conv layers have stride 1. an avgpool is performed after the second convolution when stride > 1 + self.conv1 = nn.Conv2d(inplanes, planes, 1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.relu1 = nn.ReLU(inplace=True) + + self.conv2 = nn.Conv2d(planes, planes, 3, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.relu2 = nn.ReLU(inplace=True) + + self.avgpool = nn.AvgPool2d(stride) if stride > 1 else nn.Identity() + + self.conv3 = nn.Conv2d(planes, planes * self.expansion, 1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu3 = nn.ReLU(inplace=True) + + self.downsample = None + self.stride = stride + + if stride > 1 or inplanes != planes * Bottleneck.expansion: + # downsampling layer is prepended with an avgpool, and the subsequent convolution has stride 1 + self.downsample = nn.Sequential(OrderedDict([ + ("-1", nn.AvgPool2d(stride)), + ("0", nn.Conv2d(inplanes, planes * self.expansion, 1, stride=1, bias=False)), + ("1", nn.BatchNorm2d(planes * self.expansion)) + ])) + + def forward(self, x: torch.Tensor): + identity = x + + out = self.relu1(self.bn1(self.conv1(x))) + out = self.relu2(self.bn2(self.conv2(out))) + out = self.avgpool(out) + out = self.bn3(self.conv3(out)) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu3(out) + return out + + +class AttentionPool2d(nn.Module): + def __init__(self, spacial_dim: int, embed_dim: int, num_heads: int, output_dim: int = None): + super().__init__() + self.positional_embedding = nn.Parameter(torch.randn(spacial_dim ** 2 + 1, embed_dim) / embed_dim ** 0.5) + self.k_proj = nn.Linear(embed_dim, embed_dim) + self.q_proj = nn.Linear(embed_dim, embed_dim) + self.v_proj = nn.Linear(embed_dim, embed_dim) + self.c_proj = nn.Linear(embed_dim, output_dim or embed_dim) + self.num_heads = num_heads + + def forward(self, x): + x = x.flatten(start_dim=2).permute(2, 0, 1) # NCHW -> (HW)NC + x = torch.cat([x.mean(dim=0, keepdim=True), x], dim=0) # (HW+1)NC + x = x + self.positional_embedding[:, None, :].to(x.dtype) # (HW+1)NC + x, _ = F.multi_head_attention_forward( + query=x[:1], key=x, value=x, + embed_dim_to_check=x.shape[-1], + num_heads=self.num_heads, + q_proj_weight=self.q_proj.weight, + k_proj_weight=self.k_proj.weight, + v_proj_weight=self.v_proj.weight, + in_proj_weight=None, + in_proj_bias=torch.cat([self.q_proj.bias, self.k_proj.bias, self.v_proj.bias]), + bias_k=None, + bias_v=None, + add_zero_attn=False, + dropout_p=0, + out_proj_weight=self.c_proj.weight, + out_proj_bias=self.c_proj.bias, + use_separate_proj_weight=True, + training=self.training, + need_weights=False + ) + return x.squeeze(0) + + +class ModifiedResNet(nn.Module): + """ + A ResNet class that is similar to torchvision's but contains the following changes: + - There are now 3 "stem" convolutions as opposed to 1, with an average pool instead of a max pool. + - Performs anti-aliasing strided convolutions, where an avgpool is prepended to convolutions with stride > 1 + - The final pooling layer is a QKV attention instead of an average pool + """ + + def __init__(self, layers, output_dim, heads, input_resolution=224, width=64): + super().__init__() + self.output_dim = output_dim + self.input_resolution = input_resolution + + # the 3-layer stem + self.conv1 = nn.Conv2d(3, width // 2, kernel_size=3, stride=2, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(width // 2) + self.relu1 = nn.ReLU(inplace=True) + self.conv2 = nn.Conv2d(width // 2, width // 2, kernel_size=3, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(width // 2) + self.relu2 = nn.ReLU(inplace=True) + self.conv3 = nn.Conv2d(width // 2, width, kernel_size=3, padding=1, bias=False) + self.bn3 = nn.BatchNorm2d(width) + self.relu3 = nn.ReLU(inplace=True) + self.avgpool = nn.AvgPool2d(2) + + # residual layers + self._inplanes = width # this is a *mutable* variable used during construction + self.layer1 = self._make_layer(width, layers[0]) + self.layer2 = self._make_layer(width * 2, layers[1], stride=2) + self.layer3 = self._make_layer(width * 4, layers[2], stride=2) + self.layer4 = self._make_layer(width * 8, layers[3], stride=2) + + embed_dim = width * 32 # the ResNet feature dimension + self.attnpool = AttentionPool2d(input_resolution // 32, embed_dim, heads, output_dim) + + def _make_layer(self, planes, blocks, stride=1): + layers = [Bottleneck(self._inplanes, planes, stride)] + + self._inplanes = planes * Bottleneck.expansion + for _ in range(1, blocks): + layers.append(Bottleneck(self._inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + def stem(x): + x = self.relu1(self.bn1(self.conv1(x))) + x = self.relu2(self.bn2(self.conv2(x))) + x = self.relu3(self.bn3(self.conv3(x))) + x = self.avgpool(x) + return x + + x = x.type(self.conv1.weight.dtype) + x = stem(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.attnpool(x) + + return x + + +class LayerNorm(nn.LayerNorm): + """Subclass torch's LayerNorm to handle fp16.""" + + def forward(self, x: torch.Tensor): + orig_type = x.dtype + ret = super().forward(x.type(torch.float32)) + return ret.type(orig_type) + + +class QuickGELU(nn.Module): + def forward(self, x: torch.Tensor): + return x * torch.sigmoid(1.702 * x) + + +class ResidualAttentionBlock(nn.Module): + def __init__(self, d_model: int, n_head: int, attn_mask: torch.Tensor = None): + super().__init__() + + self.attn = nn.MultiheadAttention(d_model, n_head) + self.ln_1 = LayerNorm(d_model) + self.mlp = nn.Sequential(OrderedDict([ + ("c_fc", nn.Linear(d_model, d_model * 4)), + ("gelu", QuickGELU()), + ("c_proj", nn.Linear(d_model * 4, d_model)) + ])) + self.ln_2 = LayerNorm(d_model) + self.attn_mask = attn_mask + + def attention(self, x: torch.Tensor): + self.attn_mask = self.attn_mask.to(dtype=x.dtype, device=x.device) if self.attn_mask is not None else None + return self.attn(x, x, x, need_weights=False, attn_mask=self.attn_mask)[0] + + def forward(self, x: torch.Tensor): + x = x + self.attention(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return x + + +class Transformer(nn.Module): + def __init__(self, width: int, layers: int, heads: int, attn_mask: torch.Tensor = None): + super().__init__() + self.width = width + self.layers = layers + self.resblocks = nn.Sequential(*[ResidualAttentionBlock(width, heads, attn_mask) for _ in range(layers)]) + + def forward(self, x: torch.Tensor): + return self.resblocks(x) + + +class VisionTransformer(nn.Module): + def __init__(self, input_resolution: int, patch_size: int, width: int, layers: int, heads: int, output_dim: int): + super().__init__() + self.input_resolution = input_resolution + self.output_dim = output_dim + self.conv1 = nn.Conv2d(in_channels=3, out_channels=width, kernel_size=patch_size, stride=patch_size, bias=False) + + scale = width ** -0.5 + self.class_embedding = nn.Parameter(scale * torch.randn(width)) + self.positional_embedding = nn.Parameter(scale * torch.randn((input_resolution // patch_size) ** 2 + 1, width)) + self.ln_pre = LayerNorm(width) + + self.transformer = Transformer(width, layers, heads) + + self.ln_post = LayerNorm(width) + self.proj = nn.Parameter(scale * torch.randn(width, output_dim)) + + def forward(self, x: torch.Tensor): + x = self.conv1(x) # shape = [*, width, grid, grid] + x = x.reshape(x.shape[0], x.shape[1], -1) # shape = [*, width, grid ** 2] + x = x.permute(0, 2, 1) # shape = [*, grid ** 2, width] + x = torch.cat([self.class_embedding.to(x.dtype) + torch.zeros(x.shape[0], 1, x.shape[-1], dtype=x.dtype, device=x.device), x], dim=1) # shape = [*, grid ** 2 + 1, width] + x = x + self.positional_embedding.to(x.dtype) + x = self.ln_pre(x) + + x = x.permute(1, 0, 2) # NLD -> LND + x = self.transformer(x) + x = x.permute(1, 0, 2) # LND -> NLD + + x = self.ln_post(x[:, 0, :]) + + if self.proj is not None: + x = x @ self.proj + + return x + + +class CLIP(nn.Module): + def __init__(self, + embed_dim: int, + # vision + image_resolution: int, + vision_layers: Union[Tuple[int, int, int, int], int], + vision_width: int, + vision_patch_size: int, + # text + context_length: int, + vocab_size: int, + transformer_width: int, + transformer_heads: int, + transformer_layers: int + ): + super().__init__() + + self.context_length = context_length + + if isinstance(vision_layers, (tuple, list)): + vision_heads = vision_width * 32 // 64 + self.visual = ModifiedResNet( + layers=vision_layers, + output_dim=embed_dim, + heads=vision_heads, + input_resolution=image_resolution, + width=vision_width + ) + else: + vision_heads = vision_width // 64 + self.visual = VisionTransformer( + input_resolution=image_resolution, + patch_size=vision_patch_size, + width=vision_width, + layers=vision_layers, + heads=vision_heads, + output_dim=embed_dim + ) + + self.transformer = Transformer( + width=transformer_width, + layers=transformer_layers, + heads=transformer_heads, + attn_mask=self.build_attention_mask() + ) + + self.vocab_size = vocab_size + self.token_embedding = nn.Embedding(vocab_size, transformer_width) + self.positional_embedding = nn.Parameter(torch.empty(self.context_length, transformer_width)) + self.ln_final = LayerNorm(transformer_width) + + self.text_projection = nn.Parameter(torch.empty(transformer_width, embed_dim)) + self.logit_scale = nn.Parameter(torch.ones([]) * np.log(1 / 0.07)) + + self.initialize_parameters() + + def initialize_parameters(self): + nn.init.normal_(self.token_embedding.weight, std=0.02) + nn.init.normal_(self.positional_embedding, std=0.01) + + if isinstance(self.visual, ModifiedResNet): + if self.visual.attnpool is not None: + std = self.visual.attnpool.c_proj.in_features ** -0.5 + nn.init.normal_(self.visual.attnpool.q_proj.weight, std=std) + nn.init.normal_(self.visual.attnpool.k_proj.weight, std=std) + nn.init.normal_(self.visual.attnpool.v_proj.weight, std=std) + nn.init.normal_(self.visual.attnpool.c_proj.weight, std=std) + + for resnet_block in [self.visual.layer1, self.visual.layer2, self.visual.layer3, self.visual.layer4]: + for name, param in resnet_block.named_parameters(): + if name.endswith("bn3.weight"): + nn.init.zeros_(param) + + proj_std = (self.transformer.width ** -0.5) * ((2 * self.transformer.layers) ** -0.5) + attn_std = self.transformer.width ** -0.5 + fc_std = (2 * self.transformer.width) ** -0.5 + for block in self.transformer.resblocks: + nn.init.normal_(block.attn.in_proj_weight, std=attn_std) + nn.init.normal_(block.attn.out_proj.weight, std=proj_std) + nn.init.normal_(block.mlp.c_fc.weight, std=fc_std) + nn.init.normal_(block.mlp.c_proj.weight, std=proj_std) + + if self.text_projection is not None: + nn.init.normal_(self.text_projection, std=self.transformer.width ** -0.5) + + def build_attention_mask(self): + # lazily create causal attention mask, with full attention between the vision tokens + # pytorch uses additive attention mask; fill with -inf + mask = torch.empty(self.context_length, self.context_length) + mask.fill_(float("-inf")) + mask.triu_(1) # zero out the lower diagonal + return mask + + @property + def dtype(self): + return self.visual.conv1.weight.dtype + + def encode_image(self, image): + return self.visual(image.type(self.dtype)) + + def encode_text(self, text): + x = self.token_embedding(text).type(self.dtype) # [batch_size, n_ctx, d_model] + + x = x + self.positional_embedding.type(self.dtype) + x = x.permute(1, 0, 2) # NLD -> LND + x = self.transformer(x) + x = x.permute(1, 0, 2) # LND -> NLD + x = self.ln_final(x).type(self.dtype) + + # x.shape = [batch_size, n_ctx, transformer.width] + # take features from the eot embedding (eot_token is the highest number in each sequence) + x = x[torch.arange(x.shape[0]), text.argmax(dim=-1)] @ self.text_projection + + return x + + def forward(self, image, text): + image_features = self.encode_image(image) + text_features = self.encode_text(text) + + # normalized features + image_features = image_features / image_features.norm(dim=1, keepdim=True) + text_features = text_features / text_features.norm(dim=1, keepdim=True) + + # cosine similarity as logits + logit_scale = self.logit_scale.exp() + logits_per_image = logit_scale * image_features @ text_features.t() + logits_per_text = logits_per_image.t() + + # shape = [global_batch_size, global_batch_size] + return logits_per_image, logits_per_text + + +def convert_weights(model: nn.Module): + """Convert applicable model parameters to fp16""" + + def _convert_weights_to_fp16(l): + if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Linear)): + l.weight.data = l.weight.data.half() + if l.bias is not None: + l.bias.data = l.bias.data.half() + + if isinstance(l, nn.MultiheadAttention): + for attr in [*[f"{s}_proj_weight" for s in ["in", "q", "k", "v"]], "in_proj_bias", "bias_k", "bias_v"]: + tensor = getattr(l, attr) + if tensor is not None: + tensor.data = tensor.data.half() + + for name in ["text_projection", "proj"]: + if hasattr(l, name): + attr = getattr(l, name) + if attr is not None: + attr.data = attr.data.half() + + model.apply(_convert_weights_to_fp16) + + +def build_model(state_dict: dict): + vit = "visual.proj" in state_dict + + if vit: + vision_width = state_dict["visual.conv1.weight"].shape[0] + vision_layers = len([k for k in state_dict.keys() if k.startswith("visual.") and k.endswith(".attn.in_proj_weight")]) + vision_patch_size = state_dict["visual.conv1.weight"].shape[-1] + grid_size = round((state_dict["visual.positional_embedding"].shape[0] - 1) ** 0.5) + image_resolution = vision_patch_size * grid_size + else: + counts: list = [len(set(k.split(".")[2] for k in state_dict if k.startswith(f"visual.layer{b}"))) for b in [1, 2, 3, 4]] + vision_layers = tuple(counts) + vision_width = state_dict["visual.layer1.0.conv1.weight"].shape[0] + output_width = round((state_dict["visual.attnpool.positional_embedding"].shape[0] - 1) ** 0.5) + vision_patch_size = None + assert output_width ** 2 + 1 == state_dict["visual.attnpool.positional_embedding"].shape[0] + image_resolution = output_width * 32 + + embed_dim = state_dict["text_projection"].shape[1] + context_length = state_dict["positional_embedding"].shape[0] + vocab_size = state_dict["token_embedding.weight"].shape[0] + transformer_width = state_dict["ln_final.weight"].shape[0] + transformer_heads = transformer_width // 64 + transformer_layers = len(set(k.split(".")[2] for k in state_dict if k.startswith("transformer.resblocks"))) + + model = CLIP( + embed_dim, + image_resolution, vision_layers, vision_width, vision_patch_size, + context_length, vocab_size, transformer_width, transformer_heads, transformer_layers + ) + + for key in ["input_resolution", "context_length", "vocab_size"]: + if key in state_dict: + del state_dict[key] + + convert_weights(model) + model.load_state_dict(state_dict) + return model.eval() diff --git a/python/ClipDetection/CoOp/clip/simple_tokenizer.py b/python/ClipDetection/CoOp/clip/simple_tokenizer.py new file mode 100644 index 00000000..0a66286b --- /dev/null +++ b/python/ClipDetection/CoOp/clip/simple_tokenizer.py @@ -0,0 +1,132 @@ +import gzip +import html +import os +from functools import lru_cache + +import ftfy +import regex as re + + +@lru_cache() +def default_bpe(): + return os.path.join(os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz") + + +@lru_cache() +def bytes_to_unicode(): + """ + Returns list of utf-8 byte and a corresponding list of unicode strings. + The reversible bpe codes work on unicode strings. + This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. + When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. + This is a signficant percentage of your normal, say, 32K bpe vocab. + To avoid that, we want lookup tables between utf-8 bytes and unicode strings. + And avoids mapping to whitespace/control characters the bpe code barfs on. + """ + bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1)) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8+n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +def get_pairs(word): + """Return set of symbol pairs in a word. + Word is represented as tuple of symbols (symbols being variable-length strings). + """ + pairs = set() + prev_char = word[0] + for char in word[1:]: + pairs.add((prev_char, char)) + prev_char = char + return pairs + + +def basic_clean(text): + text = ftfy.fix_text(text) + text = html.unescape(html.unescape(text)) + return text.strip() + + +def whitespace_clean(text): + text = re.sub(r'\s+', ' ', text) + text = text.strip() + return text + + +class SimpleTokenizer(object): + def __init__(self, bpe_path: str = default_bpe()): + self.byte_encoder = bytes_to_unicode() + self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} + merges = gzip.open(bpe_path).read().decode("utf-8").split('\n') + merges = merges[1:49152-256-2+1] + merges = [tuple(merge.split()) for merge in merges] + vocab = list(bytes_to_unicode().values()) + vocab = vocab + [v+'' for v in vocab] + for merge in merges: + vocab.append(''.join(merge)) + vocab.extend(['<|startoftext|>', '<|endoftext|>']) + self.encoder = dict(zip(vocab, range(len(vocab)))) + self.decoder = {v: k for k, v in self.encoder.items()} + self.bpe_ranks = dict(zip(merges, range(len(merges)))) + self.cache = {'<|startoftext|>': '<|startoftext|>', '<|endoftext|>': '<|endoftext|>'} + self.pat = re.compile(r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", re.IGNORECASE) + + def bpe(self, token): + if token in self.cache: + return self.cache[token] + word = tuple(token[:-1]) + ( token[-1] + '',) + pairs = get_pairs(word) + + if not pairs: + return token+'' + + while True: + bigram = min(pairs, key = lambda pair: self.bpe_ranks.get(pair, float('inf'))) + if bigram not in self.bpe_ranks: + break + first, second = bigram + new_word = [] + i = 0 + while i < len(word): + try: + j = word.index(first, i) + new_word.extend(word[i:j]) + i = j + except: + new_word.extend(word[i:]) + break + + if word[i] == first and i < len(word)-1 and word[i+1] == second: + new_word.append(first+second) + i += 2 + else: + new_word.append(word[i]) + i += 1 + new_word = tuple(new_word) + word = new_word + if len(word) == 1: + break + else: + pairs = get_pairs(word) + word = ' '.join(word) + self.cache[token] = word + return word + + def encode(self, text): + bpe_tokens = [] + text = whitespace_clean(basic_clean(text)).lower() + for token in re.findall(self.pat, text): + token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8')) + bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' ')) + return bpe_tokens + + def decode(self, tokens): + text = ''.join([self.decoder[token] for token in tokens]) + text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors="replace").replace('', ' ') + return text diff --git a/python/ClipDetection/CoOp/configs/trainers/CoOp/vit_l14_ep50.yaml b/python/ClipDetection/CoOp/configs/trainers/CoOp/vit_l14_ep50.yaml new file mode 100644 index 00000000..2319b286 --- /dev/null +++ b/python/ClipDetection/CoOp/configs/trainers/CoOp/vit_l14_ep50.yaml @@ -0,0 +1,29 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 32 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 50 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 5 + +MODEL: + BACKBONE: + NAME: "ViT-L/14" \ No newline at end of file diff --git a/python/ClipDetection/CoOp/coop_test_results.txt b/python/ClipDetection/CoOp/coop_test_results.txt new file mode 100644 index 00000000..e3ef2a5b --- /dev/null +++ b/python/ClipDetection/CoOp/coop_test_results.txt @@ -0,0 +1,1008 @@ +CoOp Results: + +tench: 1/50 +goldfish: 36/50 +great white shark: 45/50 +tiger shark: 13/50 +hammerhead shark: 23/50 +electric ray: 0/50 +stingray: 0/50 +rooster: 0/50 +hen: 25/50 +ostrich: 2/50 +brambling: 16/50 +goldfinch: 5/50 +house finch: 48/50 +junco: 40/50 +indigo bunting: 49/50 +American robin: 49/50 +bulbul: 10/50 +jay: 40/50 +magpie: 25/50 +chickadee: 47/50 +American dipper: 14/50 +kite (bird of prey): 48/50 +bald eagle: 48/50 +vulture: 17/50 +great grey owl: 49/50 +fire salamander: 7/50 +smooth newt: 0/50 +newt: 0/50 +spotted salamander: 49/50 +axolotl: 42/50 +American bullfrog: 22/50 +tree frog: 12/50 +tailed frog: 9/50 +loggerhead sea turtle: 50/50 +leatherback sea turtle: 9/50 +mud turtle: 1/50 +terrapin: 4/50 +box turtle: 19/50 +banded gecko: 22/50 +green iguana: 7/50 +Carolina anole: 28/50 +desert grassland whiptail lizard: 46/50 +agama: 0/50 +frilled-necked lizard: 42/50 +alligator lizard: 13/50 +Gila monster: 24/50 +European green lizard: 0/50 +chameleon: 0/50 +Komodo dragon: 1/50 +Nile crocodile: 17/50 +American alligator: 0/50 +triceratops: 50/50 +worm snake: 1/50 +ring-necked snake: 49/50 +eastern hog-nosed snake: 0/50 +smooth green snake: 5/50 +kingsnake: 0/50 +garter snake: 0/50 +water snake: 0/50 +vine snake: 1/50 +night snake: 0/50 +boa constrictor: 2/50 +African rock python: 22/50 +Indian cobra: 7/50 +green mamba: 0/50 +sea snake: 0/50 +Saharan horned viper: 4/50 +eastern diamondback rattlesnake: 50/50 +sidewinder rattlesnake: 0/50 +trilobite: 5/50 +harvestman: 6/50 +scorpion: 0/50 +yellow garden spider: 5/50 +barn spider: 1/50 +European garden spider: 49/50 +southern black widow: 0/50 +tarantula: 37/50 +wolf spider: 2/50 +tick: 0/50 +centipede: 0/50 +black grouse: 19/50 +ptarmigan: 20/50 +ruffed grouse: 46/50 +prairie grouse: 11/50 +peafowl: 31/50 +quail: 45/50 +partridge: 0/50 +african grey parrot: 48/50 +macaw: 49/50 +sulphur-crested cockatoo: 44/50 +lorikeet: 49/50 +coucal: 39/50 +bee eater: 46/50 +hornbill: 34/50 +hummingbird: 18/50 +jacamar: 44/50 +toucan: 49/50 +duck: 21/50 +red-breasted merganser: 47/50 +goose: 19/50 +black swan: 2/50 +tusker: 29/50 +echidna: 41/50 +platypus: 2/50 +wallaby: 41/50 +koala: 1/50 +wombat: 30/50 +jellyfish: 0/50 +sea anemone: 0/50 +brain coral: 38/50 +flatworm: 25/50 +nematode: 0/50 +conch: 15/50 +snail: 42/50 +slug: 12/50 +sea slug: 9/50 +chiton: 0/50 +chambered nautilus: 7/50 +Dungeness crab: 46/50 +rock crab: 6/50 +fiddler crab: 4/50 +red king crab: 3/50 +American lobster: 43/50 +spiny lobster: 44/50 +crayfish: 13/50 +hermit crab: 39/50 +isopod: 33/50 +white stork: 18/50 +black stork: 6/50 +spoonbill: 39/50 +flamingo: 0/50 +little blue heron: 29/50 +great egret: 47/50 +bittern bird: 8/50 +crane bird: 5/50 +limpkin: 48/50 +common gallinule: 20/50 +American coot: 20/50 +bustard: 0/50 +ruddy turnstone: 50/50 +dunlin: 25/50 +common redshank: 0/50 +dowitcher: 39/50 +oystercatcher: 28/50 +pelican: 0/50 +king penguin: 50/50 +albatross: 31/50 +grey whale: 2/50 +killer whale: 46/50 +dugong: 47/50 +sea lion: 44/50 +Chihuahua: 5/50 +Japanese Chin: 35/50 +Maltese: 0/50 +Pekingese: 32/50 +Shih Tzu: 40/50 +King Charles Spaniel: 43/50 +Papillon: 0/50 +toy terrier: 23/50 +Rhodesian Ridgeback: 40/50 +Afghan Hound: 21/50 +Basset Hound: 26/50 +Beagle: 2/50 +Bloodhound: 12/50 +Bluetick Coonhound: 37/50 +Black and Tan Coonhound: 35/50 +Treeing Walker Coonhound: 48/50 +English foxhound: 16/50 +Redbone Coonhound: 27/50 +borzoi: 44/50 +Irish Wolfhound: 21/50 +Italian Greyhound: 44/50 +Whippet: 21/50 +Ibizan Hound: 40/50 +Norwegian Elkhound: 39/50 +Otterhound: 21/50 +Saluki: 28/50 +Scottish Deerhound: 46/50 +Weimaraner: 46/50 +Staffordshire Bull Terrier: 38/50 +American Staffordshire Terrier: 36/50 +Bedlington Terrier: 37/50 +Border Terrier: 41/50 +Kerry Blue Terrier: 5/50 +Irish Terrier: 33/50 +Norfolk Terrier: 34/50 +Norwich Terrier: 0/50 +Yorkshire Terrier: 33/50 +Wire Fox Terrier: 34/50 +Lakeland Terrier: 16/50 +Sealyham Terrier: 43/50 +Airedale Terrier: 26/50 +Cairn Terrier: 31/50 +Australian Terrier: 7/50 +Dandie Dinmont Terrier: 8/50 +Boston Terrier: 41/50 +Miniature Schnauzer: 25/50 +Giant Schnauzer: 27/50 +Standard Schnauzer: 33/50 +Scottish Terrier: 36/50 +Tibetan Terrier: 33/50 +Australian Silky Terrier: 9/50 +Soft-coated Wheaten Terrier: 42/50 +West Highland White Terrier: 48/50 +Lhasa Apso: 0/50 +Flat-Coated Retriever: 45/50 +Curly-coated Retriever: 16/50 +Golden Retriever: 33/50 +Labrador Retriever: 38/50 +Chesapeake Bay Retriever: 37/50 +German Shorthaired Pointer: 41/50 +Vizsla: 44/50 +English Setter: 28/50 +Irish Setter: 25/50 +Gordon Setter: 34/50 +Brittany dog: 0/50 +Clumber Spaniel: 44/50 +English Springer Spaniel: 41/50 +Welsh Springer Spaniel: 44/50 +Cocker Spaniel: 0/50 +Sussex Spaniel: 21/50 +Irish Water Spaniel: 41/50 +Kuvasz: 0/50 +Schipperke: 33/50 +Groenendael dog: 5/50 +Malinois: 36/50 +Briard: 0/50 +Australian Kelpie: 33/50 +Komondor: 26/50 +Old English Sheepdog: 33/50 +Shetland Sheepdog: 34/50 +collie: 1/50 +Border Collie: 27/50 +Bouvier des Flandres dog: 27/50 +Rottweiler: 42/50 +German Shepherd Dog: 48/50 +Dobermann: 31/50 +Miniature Pinscher: 45/50 +Greater Swiss Mountain Dog: 18/50 +Bernese Mountain Dog: 41/50 +Appenzeller Sennenhund: 0/50 +Entlebucher Sennenhund: 44/50 +Boxer: 27/50 +Bullmastiff: 42/50 +Tibetan Mastiff: 44/50 +French Bulldog: 44/50 +Great Dane: 30/50 +St. Bernard: 0/50 +husky: 0/50 +Alaskan Malamute: 31/50 +Siberian Husky: 28/50 +Dalmatian: 29/50 +Affenpinscher: 34/50 +Basenji: 0/50 +pug: 11/50 +Leonberger: 31/50 +Newfoundland dog: 42/50 +Great Pyrenees dog: 45/50 +Samoyed: 48/50 +Pomeranian: 30/50 +Chow Chow: 0/50 +Keeshond: 46/50 +brussels griffon: 29/50 +Pembroke Welsh Corgi: 44/50 +Cardigan Welsh Corgi: 25/50 +Toy Poodle: 35/50 +Miniature Poodle: 14/50 +Standard Poodle: 26/50 +Mexican hairless dog (xoloitzcuintli): 49/50 +grey wolf: 0/50 +Alaskan tundra wolf: 42/50 +red wolf or maned wolf: 36/50 +coyote: 0/50 +dingo: 0/50 +dhole: 7/50 +African wild dog: 48/50 +hyena: 33/50 +red fox: 0/50 +kit fox: 0/50 +Arctic fox: 3/50 +grey fox: 4/50 +tabby cat: 49/50 +tiger cat: 7/50 +Persian cat: 42/50 +Siamese cat: 50/50 +Egyptian Mau: 8/50 +cougar: 18/50 +lynx: 0/50 +leopard: 0/50 +snow leopard: 37/50 +jaguar: 15/50 +lion: 2/50 +tiger: 0/50 +cheetah: 0/50 +brown bear: 17/50 +American black bear: 42/50 +polar bear: 43/50 +sloth bear: 36/50 +mongoose: 0/50 +meerkat: 11/50 +tiger beetle: 1/50 +ladybug: 16/50 +ground beetle: 20/50 +longhorn beetle: 28/50 +leaf beetle: 2/50 +dung beetle: 2/50 +rhinoceros beetle: 49/50 +weevil: 1/50 +fly: 0/50 +bee: 25/50 +ant: 0/50 +grasshopper: 0/50 +cricket insect: 0/50 +stick insect: 38/50 +cockroach: 6/50 +praying mantis: 15/50 +cicada: 6/50 +leafhopper: 50/50 +lacewing: 20/50 +dragonfly: 0/50 +damselfly: 50/50 +red admiral butterfly: 17/50 +ringlet butterfly: 39/50 +monarch butterfly: 38/50 +small white butterfly: 29/50 +sulphur butterfly: 47/50 +gossamer-winged butterfly: 4/50 +starfish: 13/50 +sea urchin: 2/50 +sea cucumber: 0/50 +cottontail rabbit: 44/50 +hare: 0/50 +Angora rabbit: 48/50 +hamster: 50/50 +porcupine: 11/50 +fox squirrel: 1/50 +marmot: 35/50 +beaver: 0/50 +guinea pig: 36/50 +common sorrel horse: 0/50 +zebra: 0/50 +pig: 35/50 +wild boar: 0/50 +warthog: 44/50 +hippopotamus: 45/50 +ox: 0/50 +water buffalo: 0/50 +bison: 25/50 +ram (adult male sheep): 24/50 +bighorn sheep: 48/50 +Alpine ibex: 31/50 +hartebeest: 0/50 +impala (antelope): 6/50 +gazelle: 1/50 +arabian camel: 40/50 +llama: 22/50 +weasel: 0/50 +mink: 0/50 +European polecat: 13/50 +black-footed ferret: 45/50 +otter: 46/50 +skunk: 0/50 +badger: 1/50 +armadillo: 6/50 +three-toed sloth: 50/50 +orangutan: 46/50 +gorilla: 0/50 +chimpanzee: 28/50 +gibbon: 0/50 +siamang: 0/50 +guenon: 0/50 +patas monkey: 0/50 +baboon: 30/50 +macaque: 21/50 +langur: 41/50 +black-and-white colobus: 49/50 +proboscis monkey: 48/50 +marmoset: 0/50 +white-headed capuchin: 20/50 +howler monkey: 3/50 +titi monkey: 0/50 +Geoffroy's spider monkey: 0/50 +common squirrel monkey: 18/50 +ring-tailed lemur: 48/50 +indri: 0/50 +Asian elephant: 5/50 +African bush elephant: 48/50 +red panda: 47/50 +giant panda: 49/50 +snoek fish: 8/50 +eel: 0/50 +silver salmon: 0/50 +rock beauty fish: 0/50 +clownfish: 50/50 +sturgeon: 0/50 +gar fish: 0/50 +lionfish: 44/50 +pufferfish: 46/50 +abacus: 2/50 +abaya: 46/50 +academic gown: 50/50 +accordion: 0/50 +acoustic guitar: 43/50 +aircraft carrier: 47/50 +airliner: 22/50 +airship: 16/50 +altar: 10/50 +ambulance: 43/50 +amphibious vehicle: 33/50 +analog clock: 1/50 +apiary: 45/50 +apron: 20/50 +trash can: 1/50 +assault rifle: 18/50 +backpack: 21/50 +bakery: 3/50 +balance beam: 0/50 +balloon: 1/50 +ballpoint pen: 32/50 +Band-Aid: 43/50 +banjo: 0/50 +baluster / handrail: 40/50 +barbell: 13/50 +barber chair: 33/50 +barbershop: 4/50 +barn: 0/50 +barometer: 0/50 +barrel: 2/50 +wheelbarrow: 30/50 +baseball: 32/50 +basketball: 49/50 +bassinet: 0/50 +bassoon: 45/50 +swimming cap: 29/50 +bath towel: 15/50 +bathtub: 2/50 +station wagon: 30/50 +lighthouse: 0/50 +beaker: 10/50 +military hat (bearskin or shako): 42/50 +beer bottle: 36/50 +beer glass: 35/50 +bell tower: 9/50 +baby bib: 40/50 +tandem bicycle: 50/50 +bikini: 28/50 +ring binder: 3/50 +binoculars: 43/50 +birdhouse: 40/50 +boathouse: 0/50 +bobsleigh: 25/50 +bolo tie: 49/50 +poke bonnet: 0/50 +bookcase: 31/50 +bookstore: 31/50 +bottle cap: 2/50 +hunting bow: 25/50 +bow tie: 38/50 +brass memorial plaque: 33/50 +bra: 32/50 +breakwater: 0/50 +breastplate: 2/50 +broom: 0/50 +bucket: 0/50 +buckle: 8/50 +bulletproof vest: 29/50 +high-speed train: 46/50 +butcher shop: 5/50 +taxicab: 35/50 +cauldron: 12/50 +candle: 10/50 +cannon: 10/50 +canoe: 2/50 +can opener: 14/50 +cardigan: 41/50 +car mirror: 4/50 +carousel: 1/50 +tool kit: 31/50 +cardboard box / carton: 12/50 +car wheel: 12/50 +automated teller machine: 45/50 +cassette: 0/50 +cassette player: 9/50 +castle: 0/50 +catamaran: 0/50 +CD player: 19/50 +cello: 0/50 +mobile phone: 19/50 +chain: 0/50 +chain-link fence: 17/50 +chain mail: 30/50 +chainsaw: 36/50 +storage chest: 2/50 +chiffonier: 6/50 +bell or wind chime: 15/50 +china cabinet: 38/50 +Christmas stocking: 47/50 +church: 0/50 +movie theater: 49/50 +cleaver: 0/50 +cliff dwelling: 40/50 +cloak: 0/50 +clogs: 21/50 +cocktail shaker: 4/50 +coffee mug: 30/50 +coffeemaker: 8/50 +spiral or coil: 2/50 +combination lock: 38/50 +computer keyboard: 1/50 +candy store: 24/50 +container ship: 50/50 +convertible: 0/50 +corkscrew: 23/50 +cornet: 0/50 +cowboy boot: 41/50 +cowboy hat: 34/50 +cradle: 0/50 +construction crane: 20/50 +crash helmet: 41/50 +crate: 0/50 +infant bed: 48/50 +Crock Pot: 45/50 +croquet ball: 44/50 +crutch: 13/50 +cuirass: 50/50 +dam: 0/50 +desk: 13/50 +desktop computer: 0/50 +rotary dial telephone: 48/50 +diaper: 23/50 +digital clock: 24/50 +digital watch: 28/50 +dining table: 31/50 +dishcloth: 45/50 +dishwasher: 0/50 +disc brake: 12/50 +dock: 0/50 +dog sled: 49/50 +dome: 0/50 +doormat: 20/50 +drilling rig: 39/50 +drum: 0/50 +drumstick: 15/50 +dumbbell: 1/50 +Dutch oven: 1/50 +electric fan: 3/50 +electric guitar: 40/50 +electric locomotive: 48/50 +entertainment center: 0/50 +envelope: 0/50 +espresso machine: 8/50 +face powder: 34/50 +feather boa: 22/50 +filing cabinet: 45/50 +fireboat: 50/50 +fire truck: 48/50 +fire screen: 4/50 +flagpole: 26/50 +flute: 0/50 +folding chair: 37/50 +football helmet: 47/50 +forklift: 35/50 +fountain: 0/50 +fountain pen: 28/50 +four-poster bed: 42/50 +freight car: 47/50 +French horn: 37/50 +frying pan: 38/50 +fur coat: 42/50 +garbage truck: 41/50 +gas mask or respirator: 48/50 +gas pump: 47/50 +goblet: 9/50 +go-kart: 47/50 +golf ball: 23/50 +golf cart: 37/50 +gondola: 3/50 +gong: 0/50 +gown: 8/50 +grand piano: 1/50 +greenhouse: 0/50 +radiator grille: 37/50 +grocery store: 7/50 +guillotine: 20/50 +hair clip: 13/50 +hair spray: 12/50 +half-track: 6/50 +hammer: 0/50 +hamper: 26/50 +hair dryer: 11/50 +hand-held computer: 0/50 +handkerchief: 0/50 +hard disk drive: 50/50 +harmonica: 6/50 +harp: 0/50 +combine harvester: 14/50 +hatchet: 27/50 +holster: 35/50 +home theater: 18/50 +honeycomb: 0/50 +hook: 0/50 +hoop skirt: 46/50 +gymnastic horizontal bar: 45/50 +horse-drawn vehicle: 46/50 +hourglass: 1/50 +iPod: 0/50 +clothes iron: 45/50 +carved pumpkin: 49/50 +jeans: 32/50 +jeep: 2/50 +T-shirt: 16/50 +jigsaw puzzle: 49/50 +rickshaw: 7/50 +joystick: 31/50 +kimono: 47/50 +knee pad: 26/50 +knot: 0/50 +lab coat: 33/50 +ladle: 2/50 +lampshade: 4/50 +laptop computer: 4/50 +lawn mower: 0/50 +lens cap: 7/50 +letter opener: 19/50 +library: 0/50 +lifeboat: 5/50 +lighter: 2/50 +limousine: 34/50 +ocean liner: 0/50 +lipstick: 23/50 +slip-on shoe: 45/50 +lotion: 19/50 +music speaker: 29/50 +loupe magnifying glass: 7/50 +sawmill: 11/50 +magnetic compass: 16/50 +messenger bag: 22/50 +mailbox: 6/50 +tights: 0/50 +one-piece bathing suit: 40/50 +manhole cover: 48/50 +maraca: 10/50 +marimba: 0/50 +mask: 11/50 +matchstick: 12/50 +maypole: 8/50 +maze: 0/50 +measuring cup: 34/50 +medicine cabinet: 30/50 +megalith: 9/50 +microphone: 14/50 +microwave oven: 39/50 +military uniform: 19/50 +milk can: 12/50 +minibus: 32/50 +miniskirt: 7/50 +minivan: 17/50 +missile: 37/50 +mitten: 27/50 +mixing bowl: 1/50 +mobile home: 8/50 +ford model t: 22/50 +modem: 1/50 +monastery: 0/50 +monitor: 0/50 +moped: 27/50 +mortar and pestle: 38/50 +graduation cap: 8/50 +mosque: 19/50 +mosquito net: 20/50 +vespa: 13/50 +mountain bike: 39/50 +tent: 1/50 +computer mouse: 4/50 +mousetrap: 27/50 +moving van: 5/50 +muzzle: 3/50 +metal nail: 0/50 +neck brace: 39/50 +necklace: 14/50 +baby pacifier: 33/50 +notebook computer: 0/50 +obelisk: 10/50 +oboe: 0/50 +ocarina: 16/50 +odometer: 49/50 +oil filter: 15/50 +pipe organ: 49/50 +oscilloscope: 45/50 +overskirt: 0/50 +bullock cart: 40/50 +oxygen mask: 10/50 +product packet / packaging: 0/50 +paddle: 0/50 +paddle wheel: 31/50 +padlock: 13/50 +paintbrush: 10/50 +pajamas: 34/50 +palace: 0/50 +pan flute: 40/50 +paper towel: 0/50 +parachute: 0/50 +parallel bars: 1/50 +park bench: 30/50 +parking meter: 16/50 +railroad car: 0/50 +patio: 0/50 +payphone: 38/50 +pedestal: 0/50 +pencil case: 23/50 +pencil sharpener: 27/50 +perfume: 38/50 +Petri dish: 2/50 +photocopier: 11/50 +plectrum: 0/50 +Pickelhaube: 27/50 +picket fence: 30/50 +pickup truck: 35/50 +pier: 0/50 +piggy bank: 47/50 +pill bottle: 31/50 +pillow: 1/50 +ping-pong ball: 41/50 +pinwheel: 17/50 +pirate ship: 28/50 +drink pitcher: 19/50 +block plane: 44/50 +planetarium: 35/50 +plastic bag: 6/50 +plate rack: 35/50 +farm plow: 0/50 +plunger: 14/50 +Polaroid camera: 9/50 +pole: 3/50 +police van: 45/50 +poncho: 8/50 +pool table: 37/50 +soda bottle: 6/50 +plant pot: 2/50 +potter's wheel: 14/50 +power drill: 29/50 +prayer rug: 40/50 +printer: 6/50 +prison: 36/50 +missile: 32/50 +projector: 0/50 +hockey puck: 48/50 +punching bag: 37/50 +purse: 32/50 +quill: 4/50 +quilt: 2/50 +race car: 44/50 +racket: 0/50 +radiator: 0/50 +radio: 9/50 +radio telescope: 38/50 +rain barrel: 40/50 +recreational vehicle: 45/50 +fishing casting reel: 48/50 +reflex camera: 0/50 +refrigerator: 0/50 +remote control: 0/50 +restaurant: 1/50 +revolver: 0/50 +rifle: 38/50 +rocking chair: 22/50 +rotisserie: 25/50 +eraser: 0/50 +rugby ball: 49/50 +ruler measuring stick: 37/50 +sneaker: 5/50 +safe: 13/50 +safety pin: 15/50 +salt shaker: 11/50 +sandal: 4/50 +sarong: 35/50 +saxophone: 30/50 +scabbard: 0/50 +weighing scale: 27/50 +school bus: 49/50 +schooner: 0/50 +scoreboard: 36/50 +CRT monitor: 0/50 +screw: 0/50 +screwdriver: 1/50 +seat belt: 18/50 +sewing machine: 40/50 +shield: 0/50 +shoe store: 13/50 +shoji screen / room divider: 10/50 +shopping basket: 3/50 +shopping cart: 9/50 +shovel: 1/50 +shower cap: 30/50 +shower curtain: 31/50 +ski: 22/50 +balaclava ski mask: 46/50 +sleeping bag: 36/50 +slide rule: 35/50 +sliding door: 6/50 +slot machine: 49/50 +snorkel: 18/50 +snowmobile: 50/50 +snowplow: 46/50 +soap dispenser: 21/50 +soccer ball: 37/50 +sock: 19/50 +solar thermal collector: 22/50 +sombrero: 22/50 +soup bowl: 29/50 +keyboard space bar: 14/50 +space heater: 11/50 +space shuttle: 2/50 +spatula: 0/50 +motorboat: 0/50 +spider web: 0/50 +spindle: 1/50 +sports car: 26/50 +spotlight: 0/50 +stage: 0/50 +steam locomotive: 47/50 +through arch bridge: 0/50 +steel drum: 9/50 +stethoscope: 18/50 +scarf: 39/50 +stone wall: 11/50 +stopwatch: 34/50 +stove: 9/50 +strainer: 0/50 +tram: 29/50 +stretcher: 1/50 +couch: 29/50 +stupa: 41/50 +submarine: 0/50 +suit: 0/50 +sundial: 6/50 +sunglasses: 16/50 +sunglasses: 13/50 +sunscreen: 6/50 +suspension bridge: 23/50 +mop: 0/50 +sweatshirt: 13/50 +swim trunks / shorts: 32/50 +swing: 7/50 +electrical switch: 1/50 +syringe: 1/50 +table lamp: 32/50 +tank: 0/50 +tape player: 21/50 +teapot: 47/50 +teddy bear: 31/50 +television: 3/50 +tennis ball: 45/50 +thatched roof: 39/50 +front curtain: 0/50 +thimble: 0/50 +threshing machine: 37/50 +throne: 35/50 +tile roof: 25/50 +toaster: 28/50 +tobacco shop: 16/50 +toilet seat: 17/50 +torch: 1/50 +totem pole: 44/50 +tow truck: 25/50 +toy store: 32/50 +tractor: 0/50 +semi-trailer truck: 47/50 +tray: 2/50 +trench coat: 39/50 +tricycle: 15/50 +trimaran: 44/50 +tripod: 0/50 +triumphal arch: 36/50 +trolleybus: 11/50 +trombone: 6/50 +hot tub: 8/50 +turnstile: 14/50 +typewriter keyboard: 50/50 +umbrella: 8/50 +unicycle: 10/50 +upright piano: 8/50 +vacuum cleaner: 44/50 +vase: 29/50 +vaulted or arched ceiling: 12/50 +velvet fabric: 1/50 +vending machine: 10/50 +vestment: 0/50 +viaduct: 24/50 +violin: 8/50 +volleyball: 45/50 +waffle iron: 40/50 +wall clock: 39/50 +wallet: 8/50 +wardrobe: 4/50 +military aircraft: 2/50 +sink: 0/50 +washing machine: 21/50 +water bottle: 15/50 +water jug: 1/50 +water tower: 20/50 +whiskey jug: 29/50 +whistle: 0/50 +hair wig: 34/50 +window screen: 6/50 +window shade: 28/50 +Windsor tie: 15/50 +wine bottle: 28/50 +airplane wing: 4/50 +wok: 14/50 +wooden spoon: 23/50 +wool: 13/50 +split-rail fence: 26/50 +shipwreck: 0/50 +sailboat: 26/50 +yurt: 46/50 +website: 2/50 +comic book: 42/50 +crossword: 44/50 +traffic or street sign: 21/50 +traffic light: 0/50 +dust jacket: 0/50 +menu: 1/50 +plate: 0/50 +guacamole: 40/50 +consomme: 0/50 +hot pot: 41/50 +trifle: 35/50 +ice cream: 19/50 +popsicle: 7/50 +baguette: 0/50 +bagel: 45/50 +pretzel: 48/50 +cheeseburger: 44/50 +hot dog: 50/50 +mashed potatoes: 43/50 +cabbage: 29/50 +broccoli: 44/50 +cauliflower: 40/50 +zucchini: 35/50 +spaghetti squash: 35/50 +acorn squash: 28/50 +butternut squash: 35/50 +cucumber: 9/50 +artichoke: 16/50 +bell pepper: 39/50 +cardoon: 36/50 +mushroom: 0/50 +Granny Smith apple: 46/50 +strawberry: 0/50 +orange: 0/50 +lemon: 6/50 +fig: 22/50 +pineapple: 26/50 +banana: 1/50 +jackfruit: 16/50 +cherimoya (custard apple): 47/50 +pomegranate: 11/50 +hay: 1/50 +carbonara: 39/50 +chocolate syrup: 31/50 +dough: 18/50 +meatloaf: 48/50 +pizza: 38/50 +pot pie: 47/50 +burrito: 50/50 +red wine: 9/50 +espresso: 0/50 +tea cup: 20/50 +eggnog: 27/50 +mountain: 0/50 +bubble: 0/50 +cliff: 0/50 +coral reef: 0/50 +geyser: 1/50 +lakeshore: 0/50 +promontory: 0/50 +sandbar: 0/50 +beach: 0/50 +valley: 0/50 +volcano: 0/50 +baseball player: 38/50 +bridegroom: 21/50 +scuba diver: 22/50 +rapeseed: 45/50 +daisy: 10/50 +yellow lady's slipper: 50/50 +corn: 0/50 +acorn: 0/50 +rose hip: 21/50 +horse chestnut seed: 40/50 +coral fungus: 41/50 +agaric: 8/50 +gyromitra: 1/50 +stinkhorn mushroom: 43/50 +earth star fungus: 0/50 +hen of the woods mushroom: 49/50 +bolete: 30/50 +corn cob: 35/50 +toilet paper: 37/50 + + +Accuracy Results: +Total: 50,000 +Correct: 20,771 +Accuracy: 41.5% \ No newline at end of file diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/explore_outputs.ipynb b/python/ClipDetection/CoOp/output/imagenet/CoOp/explore_outputs.ipynb new file mode 100644 index 00000000..d82ad482 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/explore_outputs.ipynb @@ -0,0 +1,171 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/ckb-nfs/home/zcafego/git/openmpf-projects/openmpf-components/python/ClipDetection/venv/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import torch\n", + "import clip\n", + "from PIL import Image\n", + "import torch" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "device = 'cuda:2'\n", + "\n", + "vitl14_path = './vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50'\n", + "vitb32_path = './vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50'\n", + "\n", + "vitl14_check = torch.load(vitl14_path, map_location='cpu')\n", + "vitb32_check = torch.load(vitb32_path, map_location='cpu')\n", + "\n", + "vitl14_state_dict = vitl14_check['state_dict']\n", + "vitl14_tnsr = vitl14_state_dict['ctx']\n", + "\n", + "vitb32_state_dict = vitb32_check['state_dict']\n", + "vitb32_tnsr = vitb32_state_dict['ctx']\n", + "\n", + "img = Image.open('/ckb-nfs/home/zcafego/test_images/sturgeon.JPEG')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "l14_model, l14_preprocessor = clip.load('ViT-L/14', device=device)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "_IncompatibleKeys(missing_keys=['positional_embedding', 'text_projection', 'logit_scale', 'visual.class_embedding', 'visual.positional_embedding', 'visual.proj', 'visual.conv1.weight', 'visual.ln_pre.weight', 'visual.ln_pre.bias', 'visual.transformer.resblocks.0.attn.in_proj_weight', 'visual.transformer.resblocks.0.attn.in_proj_bias', 'visual.transformer.resblocks.0.attn.out_proj.weight', 'visual.transformer.resblocks.0.attn.out_proj.bias', 'visual.transformer.resblocks.0.ln_1.weight', 'visual.transformer.resblocks.0.ln_1.bias', 'visual.transformer.resblocks.0.mlp.c_fc.weight', 'visual.transformer.resblocks.0.mlp.c_fc.bias', 'visual.transformer.resblocks.0.mlp.c_proj.weight', 'visual.transformer.resblocks.0.mlp.c_proj.bias', 'visual.transformer.resblocks.0.ln_2.weight', 'visual.transformer.resblocks.0.ln_2.bias', 'visual.transformer.resblocks.1.attn.in_proj_weight', 'visual.transformer.resblocks.1.attn.in_proj_bias', 'visual.transformer.resblocks.1.attn.out_proj.weight', 'visual.transformer.resblocks.1.attn.out_proj.bias', 'visual.transformer.resblocks.1.ln_1.weight', 'visual.transformer.resblocks.1.ln_1.bias', 'visual.transformer.resblocks.1.mlp.c_fc.weight', 'visual.transformer.resblocks.1.mlp.c_fc.bias', 'visual.transformer.resblocks.1.mlp.c_proj.weight', 'visual.transformer.resblocks.1.mlp.c_proj.bias', 'visual.transformer.resblocks.1.ln_2.weight', 'visual.transformer.resblocks.1.ln_2.bias', 'visual.transformer.resblocks.2.attn.in_proj_weight', 'visual.transformer.resblocks.2.attn.in_proj_bias', 'visual.transformer.resblocks.2.attn.out_proj.weight', 'visual.transformer.resblocks.2.attn.out_proj.bias', 'visual.transformer.resblocks.2.ln_1.weight', 'visual.transformer.resblocks.2.ln_1.bias', 'visual.transformer.resblocks.2.mlp.c_fc.weight', 'visual.transformer.resblocks.2.mlp.c_fc.bias', 'visual.transformer.resblocks.2.mlp.c_proj.weight', 'visual.transformer.resblocks.2.mlp.c_proj.bias', 'visual.transformer.resblocks.2.ln_2.weight', 'visual.transformer.resblocks.2.ln_2.bias', 'visual.transformer.resblocks.3.attn.in_proj_weight', 'visual.transformer.resblocks.3.attn.in_proj_bias', 'visual.transformer.resblocks.3.attn.out_proj.weight', 'visual.transformer.resblocks.3.attn.out_proj.bias', 'visual.transformer.resblocks.3.ln_1.weight', 'visual.transformer.resblocks.3.ln_1.bias', 'visual.transformer.resblocks.3.mlp.c_fc.weight', 'visual.transformer.resblocks.3.mlp.c_fc.bias', 'visual.transformer.resblocks.3.mlp.c_proj.weight', 'visual.transformer.resblocks.3.mlp.c_proj.bias', 'visual.transformer.resblocks.3.ln_2.weight', 'visual.transformer.resblocks.3.ln_2.bias', 'visual.transformer.resblocks.4.attn.in_proj_weight', 'visual.transformer.resblocks.4.attn.in_proj_bias', 'visual.transformer.resblocks.4.attn.out_proj.weight', 'visual.transformer.resblocks.4.attn.out_proj.bias', 'visual.transformer.resblocks.4.ln_1.weight', 'visual.transformer.resblocks.4.ln_1.bias', 'visual.transformer.resblocks.4.mlp.c_fc.weight', 'visual.transformer.resblocks.4.mlp.c_fc.bias', 'visual.transformer.resblocks.4.mlp.c_proj.weight', 'visual.transformer.resblocks.4.mlp.c_proj.bias', 'visual.transformer.resblocks.4.ln_2.weight', 'visual.transformer.resblocks.4.ln_2.bias', 'visual.transformer.resblocks.5.attn.in_proj_weight', 'visual.transformer.resblocks.5.attn.in_proj_bias', 'visual.transformer.resblocks.5.attn.out_proj.weight', 'visual.transformer.resblocks.5.attn.out_proj.bias', 'visual.transformer.resblocks.5.ln_1.weight', 'visual.transformer.resblocks.5.ln_1.bias', 'visual.transformer.resblocks.5.mlp.c_fc.weight', 'visual.transformer.resblocks.5.mlp.c_fc.bias', 'visual.transformer.resblocks.5.mlp.c_proj.weight', 'visual.transformer.resblocks.5.mlp.c_proj.bias', 'visual.transformer.resblocks.5.ln_2.weight', 'visual.transformer.resblocks.5.ln_2.bias', 'visual.transformer.resblocks.6.attn.in_proj_weight', 'visual.transformer.resblocks.6.attn.in_proj_bias', 'visual.transformer.resblocks.6.attn.out_proj.weight', 'visual.transformer.resblocks.6.attn.out_proj.bias', 'visual.transformer.resblocks.6.ln_1.weight', 'visual.transformer.resblocks.6.ln_1.bias', 'visual.transformer.resblocks.6.mlp.c_fc.weight', 'visual.transformer.resblocks.6.mlp.c_fc.bias', 'visual.transformer.resblocks.6.mlp.c_proj.weight', 'visual.transformer.resblocks.6.mlp.c_proj.bias', 'visual.transformer.resblocks.6.ln_2.weight', 'visual.transformer.resblocks.6.ln_2.bias', 'visual.transformer.resblocks.7.attn.in_proj_weight', 'visual.transformer.resblocks.7.attn.in_proj_bias', 'visual.transformer.resblocks.7.attn.out_proj.weight', 'visual.transformer.resblocks.7.attn.out_proj.bias', 'visual.transformer.resblocks.7.ln_1.weight', 'visual.transformer.resblocks.7.ln_1.bias', 'visual.transformer.resblocks.7.mlp.c_fc.weight', 'visual.transformer.resblocks.7.mlp.c_fc.bias', 'visual.transformer.resblocks.7.mlp.c_proj.weight', 'visual.transformer.resblocks.7.mlp.c_proj.bias', 'visual.transformer.resblocks.7.ln_2.weight', 'visual.transformer.resblocks.7.ln_2.bias', 'visual.transformer.resblocks.8.attn.in_proj_weight', 'visual.transformer.resblocks.8.attn.in_proj_bias', 'visual.transformer.resblocks.8.attn.out_proj.weight', 'visual.transformer.resblocks.8.attn.out_proj.bias', 'visual.transformer.resblocks.8.ln_1.weight', 'visual.transformer.resblocks.8.ln_1.bias', 'visual.transformer.resblocks.8.mlp.c_fc.weight', 'visual.transformer.resblocks.8.mlp.c_fc.bias', 'visual.transformer.resblocks.8.mlp.c_proj.weight', 'visual.transformer.resblocks.8.mlp.c_proj.bias', 'visual.transformer.resblocks.8.ln_2.weight', 'visual.transformer.resblocks.8.ln_2.bias', 'visual.transformer.resblocks.9.attn.in_proj_weight', 'visual.transformer.resblocks.9.attn.in_proj_bias', 'visual.transformer.resblocks.9.attn.out_proj.weight', 'visual.transformer.resblocks.9.attn.out_proj.bias', 'visual.transformer.resblocks.9.ln_1.weight', 'visual.transformer.resblocks.9.ln_1.bias', 'visual.transformer.resblocks.9.mlp.c_fc.weight', 'visual.transformer.resblocks.9.mlp.c_fc.bias', 'visual.transformer.resblocks.9.mlp.c_proj.weight', 'visual.transformer.resblocks.9.mlp.c_proj.bias', 'visual.transformer.resblocks.9.ln_2.weight', 'visual.transformer.resblocks.9.ln_2.bias', 'visual.transformer.resblocks.10.attn.in_proj_weight', 'visual.transformer.resblocks.10.attn.in_proj_bias', 'visual.transformer.resblocks.10.attn.out_proj.weight', 'visual.transformer.resblocks.10.attn.out_proj.bias', 'visual.transformer.resblocks.10.ln_1.weight', 'visual.transformer.resblocks.10.ln_1.bias', 'visual.transformer.resblocks.10.mlp.c_fc.weight', 'visual.transformer.resblocks.10.mlp.c_fc.bias', 'visual.transformer.resblocks.10.mlp.c_proj.weight', 'visual.transformer.resblocks.10.mlp.c_proj.bias', 'visual.transformer.resblocks.10.ln_2.weight', 'visual.transformer.resblocks.10.ln_2.bias', 'visual.transformer.resblocks.11.attn.in_proj_weight', 'visual.transformer.resblocks.11.attn.in_proj_bias', 'visual.transformer.resblocks.11.attn.out_proj.weight', 'visual.transformer.resblocks.11.attn.out_proj.bias', 'visual.transformer.resblocks.11.ln_1.weight', 'visual.transformer.resblocks.11.ln_1.bias', 'visual.transformer.resblocks.11.mlp.c_fc.weight', 'visual.transformer.resblocks.11.mlp.c_fc.bias', 'visual.transformer.resblocks.11.mlp.c_proj.weight', 'visual.transformer.resblocks.11.mlp.c_proj.bias', 'visual.transformer.resblocks.11.ln_2.weight', 'visual.transformer.resblocks.11.ln_2.bias', 'visual.transformer.resblocks.12.attn.in_proj_weight', 'visual.transformer.resblocks.12.attn.in_proj_bias', 'visual.transformer.resblocks.12.attn.out_proj.weight', 'visual.transformer.resblocks.12.attn.out_proj.bias', 'visual.transformer.resblocks.12.ln_1.weight', 'visual.transformer.resblocks.12.ln_1.bias', 'visual.transformer.resblocks.12.mlp.c_fc.weight', 'visual.transformer.resblocks.12.mlp.c_fc.bias', 'visual.transformer.resblocks.12.mlp.c_proj.weight', 'visual.transformer.resblocks.12.mlp.c_proj.bias', 'visual.transformer.resblocks.12.ln_2.weight', 'visual.transformer.resblocks.12.ln_2.bias', 'visual.transformer.resblocks.13.attn.in_proj_weight', 'visual.transformer.resblocks.13.attn.in_proj_bias', 'visual.transformer.resblocks.13.attn.out_proj.weight', 'visual.transformer.resblocks.13.attn.out_proj.bias', 'visual.transformer.resblocks.13.ln_1.weight', 'visual.transformer.resblocks.13.ln_1.bias', 'visual.transformer.resblocks.13.mlp.c_fc.weight', 'visual.transformer.resblocks.13.mlp.c_fc.bias', 'visual.transformer.resblocks.13.mlp.c_proj.weight', 'visual.transformer.resblocks.13.mlp.c_proj.bias', 'visual.transformer.resblocks.13.ln_2.weight', 'visual.transformer.resblocks.13.ln_2.bias', 'visual.transformer.resblocks.14.attn.in_proj_weight', 'visual.transformer.resblocks.14.attn.in_proj_bias', 'visual.transformer.resblocks.14.attn.out_proj.weight', 'visual.transformer.resblocks.14.attn.out_proj.bias', 'visual.transformer.resblocks.14.ln_1.weight', 'visual.transformer.resblocks.14.ln_1.bias', 'visual.transformer.resblocks.14.mlp.c_fc.weight', 'visual.transformer.resblocks.14.mlp.c_fc.bias', 'visual.transformer.resblocks.14.mlp.c_proj.weight', 'visual.transformer.resblocks.14.mlp.c_proj.bias', 'visual.transformer.resblocks.14.ln_2.weight', 'visual.transformer.resblocks.14.ln_2.bias', 'visual.transformer.resblocks.15.attn.in_proj_weight', 'visual.transformer.resblocks.15.attn.in_proj_bias', 'visual.transformer.resblocks.15.attn.out_proj.weight', 'visual.transformer.resblocks.15.attn.out_proj.bias', 'visual.transformer.resblocks.15.ln_1.weight', 'visual.transformer.resblocks.15.ln_1.bias', 'visual.transformer.resblocks.15.mlp.c_fc.weight', 'visual.transformer.resblocks.15.mlp.c_fc.bias', 'visual.transformer.resblocks.15.mlp.c_proj.weight', 'visual.transformer.resblocks.15.mlp.c_proj.bias', 'visual.transformer.resblocks.15.ln_2.weight', 'visual.transformer.resblocks.15.ln_2.bias', 'visual.transformer.resblocks.16.attn.in_proj_weight', 'visual.transformer.resblocks.16.attn.in_proj_bias', 'visual.transformer.resblocks.16.attn.out_proj.weight', 'visual.transformer.resblocks.16.attn.out_proj.bias', 'visual.transformer.resblocks.16.ln_1.weight', 'visual.transformer.resblocks.16.ln_1.bias', 'visual.transformer.resblocks.16.mlp.c_fc.weight', 'visual.transformer.resblocks.16.mlp.c_fc.bias', 'visual.transformer.resblocks.16.mlp.c_proj.weight', 'visual.transformer.resblocks.16.mlp.c_proj.bias', 'visual.transformer.resblocks.16.ln_2.weight', 'visual.transformer.resblocks.16.ln_2.bias', 'visual.transformer.resblocks.17.attn.in_proj_weight', 'visual.transformer.resblocks.17.attn.in_proj_bias', 'visual.transformer.resblocks.17.attn.out_proj.weight', 'visual.transformer.resblocks.17.attn.out_proj.bias', 'visual.transformer.resblocks.17.ln_1.weight', 'visual.transformer.resblocks.17.ln_1.bias', 'visual.transformer.resblocks.17.mlp.c_fc.weight', 'visual.transformer.resblocks.17.mlp.c_fc.bias', 'visual.transformer.resblocks.17.mlp.c_proj.weight', 'visual.transformer.resblocks.17.mlp.c_proj.bias', 'visual.transformer.resblocks.17.ln_2.weight', 'visual.transformer.resblocks.17.ln_2.bias', 'visual.transformer.resblocks.18.attn.in_proj_weight', 'visual.transformer.resblocks.18.attn.in_proj_bias', 'visual.transformer.resblocks.18.attn.out_proj.weight', 'visual.transformer.resblocks.18.attn.out_proj.bias', 'visual.transformer.resblocks.18.ln_1.weight', 'visual.transformer.resblocks.18.ln_1.bias', 'visual.transformer.resblocks.18.mlp.c_fc.weight', 'visual.transformer.resblocks.18.mlp.c_fc.bias', 'visual.transformer.resblocks.18.mlp.c_proj.weight', 'visual.transformer.resblocks.18.mlp.c_proj.bias', 'visual.transformer.resblocks.18.ln_2.weight', 'visual.transformer.resblocks.18.ln_2.bias', 'visual.transformer.resblocks.19.attn.in_proj_weight', 'visual.transformer.resblocks.19.attn.in_proj_bias', 'visual.transformer.resblocks.19.attn.out_proj.weight', 'visual.transformer.resblocks.19.attn.out_proj.bias', 'visual.transformer.resblocks.19.ln_1.weight', 'visual.transformer.resblocks.19.ln_1.bias', 'visual.transformer.resblocks.19.mlp.c_fc.weight', 'visual.transformer.resblocks.19.mlp.c_fc.bias', 'visual.transformer.resblocks.19.mlp.c_proj.weight', 'visual.transformer.resblocks.19.mlp.c_proj.bias', 'visual.transformer.resblocks.19.ln_2.weight', 'visual.transformer.resblocks.19.ln_2.bias', 'visual.transformer.resblocks.20.attn.in_proj_weight', 'visual.transformer.resblocks.20.attn.in_proj_bias', 'visual.transformer.resblocks.20.attn.out_proj.weight', 'visual.transformer.resblocks.20.attn.out_proj.bias', 'visual.transformer.resblocks.20.ln_1.weight', 'visual.transformer.resblocks.20.ln_1.bias', 'visual.transformer.resblocks.20.mlp.c_fc.weight', 'visual.transformer.resblocks.20.mlp.c_fc.bias', 'visual.transformer.resblocks.20.mlp.c_proj.weight', 'visual.transformer.resblocks.20.mlp.c_proj.bias', 'visual.transformer.resblocks.20.ln_2.weight', 'visual.transformer.resblocks.20.ln_2.bias', 'visual.transformer.resblocks.21.attn.in_proj_weight', 'visual.transformer.resblocks.21.attn.in_proj_bias', 'visual.transformer.resblocks.21.attn.out_proj.weight', 'visual.transformer.resblocks.21.attn.out_proj.bias', 'visual.transformer.resblocks.21.ln_1.weight', 'visual.transformer.resblocks.21.ln_1.bias', 'visual.transformer.resblocks.21.mlp.c_fc.weight', 'visual.transformer.resblocks.21.mlp.c_fc.bias', 'visual.transformer.resblocks.21.mlp.c_proj.weight', 'visual.transformer.resblocks.21.mlp.c_proj.bias', 'visual.transformer.resblocks.21.ln_2.weight', 'visual.transformer.resblocks.21.ln_2.bias', 'visual.transformer.resblocks.22.attn.in_proj_weight', 'visual.transformer.resblocks.22.attn.in_proj_bias', 'visual.transformer.resblocks.22.attn.out_proj.weight', 'visual.transformer.resblocks.22.attn.out_proj.bias', 'visual.transformer.resblocks.22.ln_1.weight', 'visual.transformer.resblocks.22.ln_1.bias', 'visual.transformer.resblocks.22.mlp.c_fc.weight', 'visual.transformer.resblocks.22.mlp.c_fc.bias', 'visual.transformer.resblocks.22.mlp.c_proj.weight', 'visual.transformer.resblocks.22.mlp.c_proj.bias', 'visual.transformer.resblocks.22.ln_2.weight', 'visual.transformer.resblocks.22.ln_2.bias', 'visual.transformer.resblocks.23.attn.in_proj_weight', 'visual.transformer.resblocks.23.attn.in_proj_bias', 'visual.transformer.resblocks.23.attn.out_proj.weight', 'visual.transformer.resblocks.23.attn.out_proj.bias', 'visual.transformer.resblocks.23.ln_1.weight', 'visual.transformer.resblocks.23.ln_1.bias', 'visual.transformer.resblocks.23.mlp.c_fc.weight', 'visual.transformer.resblocks.23.mlp.c_fc.bias', 'visual.transformer.resblocks.23.mlp.c_proj.weight', 'visual.transformer.resblocks.23.mlp.c_proj.bias', 'visual.transformer.resblocks.23.ln_2.weight', 'visual.transformer.resblocks.23.ln_2.bias', 'visual.ln_post.weight', 'visual.ln_post.bias', 'transformer.resblocks.0.attn.in_proj_weight', 'transformer.resblocks.0.attn.in_proj_bias', 'transformer.resblocks.0.attn.out_proj.weight', 'transformer.resblocks.0.attn.out_proj.bias', 'transformer.resblocks.0.ln_1.weight', 'transformer.resblocks.0.ln_1.bias', 'transformer.resblocks.0.mlp.c_fc.weight', 'transformer.resblocks.0.mlp.c_fc.bias', 'transformer.resblocks.0.mlp.c_proj.weight', 'transformer.resblocks.0.mlp.c_proj.bias', 'transformer.resblocks.0.ln_2.weight', 'transformer.resblocks.0.ln_2.bias', 'transformer.resblocks.1.attn.in_proj_weight', 'transformer.resblocks.1.attn.in_proj_bias', 'transformer.resblocks.1.attn.out_proj.weight', 'transformer.resblocks.1.attn.out_proj.bias', 'transformer.resblocks.1.ln_1.weight', 'transformer.resblocks.1.ln_1.bias', 'transformer.resblocks.1.mlp.c_fc.weight', 'transformer.resblocks.1.mlp.c_fc.bias', 'transformer.resblocks.1.mlp.c_proj.weight', 'transformer.resblocks.1.mlp.c_proj.bias', 'transformer.resblocks.1.ln_2.weight', 'transformer.resblocks.1.ln_2.bias', 'transformer.resblocks.2.attn.in_proj_weight', 'transformer.resblocks.2.attn.in_proj_bias', 'transformer.resblocks.2.attn.out_proj.weight', 'transformer.resblocks.2.attn.out_proj.bias', 'transformer.resblocks.2.ln_1.weight', 'transformer.resblocks.2.ln_1.bias', 'transformer.resblocks.2.mlp.c_fc.weight', 'transformer.resblocks.2.mlp.c_fc.bias', 'transformer.resblocks.2.mlp.c_proj.weight', 'transformer.resblocks.2.mlp.c_proj.bias', 'transformer.resblocks.2.ln_2.weight', 'transformer.resblocks.2.ln_2.bias', 'transformer.resblocks.3.attn.in_proj_weight', 'transformer.resblocks.3.attn.in_proj_bias', 'transformer.resblocks.3.attn.out_proj.weight', 'transformer.resblocks.3.attn.out_proj.bias', 'transformer.resblocks.3.ln_1.weight', 'transformer.resblocks.3.ln_1.bias', 'transformer.resblocks.3.mlp.c_fc.weight', 'transformer.resblocks.3.mlp.c_fc.bias', 'transformer.resblocks.3.mlp.c_proj.weight', 'transformer.resblocks.3.mlp.c_proj.bias', 'transformer.resblocks.3.ln_2.weight', 'transformer.resblocks.3.ln_2.bias', 'transformer.resblocks.4.attn.in_proj_weight', 'transformer.resblocks.4.attn.in_proj_bias', 'transformer.resblocks.4.attn.out_proj.weight', 'transformer.resblocks.4.attn.out_proj.bias', 'transformer.resblocks.4.ln_1.weight', 'transformer.resblocks.4.ln_1.bias', 'transformer.resblocks.4.mlp.c_fc.weight', 'transformer.resblocks.4.mlp.c_fc.bias', 'transformer.resblocks.4.mlp.c_proj.weight', 'transformer.resblocks.4.mlp.c_proj.bias', 'transformer.resblocks.4.ln_2.weight', 'transformer.resblocks.4.ln_2.bias', 'transformer.resblocks.5.attn.in_proj_weight', 'transformer.resblocks.5.attn.in_proj_bias', 'transformer.resblocks.5.attn.out_proj.weight', 'transformer.resblocks.5.attn.out_proj.bias', 'transformer.resblocks.5.ln_1.weight', 'transformer.resblocks.5.ln_1.bias', 'transformer.resblocks.5.mlp.c_fc.weight', 'transformer.resblocks.5.mlp.c_fc.bias', 'transformer.resblocks.5.mlp.c_proj.weight', 'transformer.resblocks.5.mlp.c_proj.bias', 'transformer.resblocks.5.ln_2.weight', 'transformer.resblocks.5.ln_2.bias', 'transformer.resblocks.6.attn.in_proj_weight', 'transformer.resblocks.6.attn.in_proj_bias', 'transformer.resblocks.6.attn.out_proj.weight', 'transformer.resblocks.6.attn.out_proj.bias', 'transformer.resblocks.6.ln_1.weight', 'transformer.resblocks.6.ln_1.bias', 'transformer.resblocks.6.mlp.c_fc.weight', 'transformer.resblocks.6.mlp.c_fc.bias', 'transformer.resblocks.6.mlp.c_proj.weight', 'transformer.resblocks.6.mlp.c_proj.bias', 'transformer.resblocks.6.ln_2.weight', 'transformer.resblocks.6.ln_2.bias', 'transformer.resblocks.7.attn.in_proj_weight', 'transformer.resblocks.7.attn.in_proj_bias', 'transformer.resblocks.7.attn.out_proj.weight', 'transformer.resblocks.7.attn.out_proj.bias', 'transformer.resblocks.7.ln_1.weight', 'transformer.resblocks.7.ln_1.bias', 'transformer.resblocks.7.mlp.c_fc.weight', 'transformer.resblocks.7.mlp.c_fc.bias', 'transformer.resblocks.7.mlp.c_proj.weight', 'transformer.resblocks.7.mlp.c_proj.bias', 'transformer.resblocks.7.ln_2.weight', 'transformer.resblocks.7.ln_2.bias', 'transformer.resblocks.8.attn.in_proj_weight', 'transformer.resblocks.8.attn.in_proj_bias', 'transformer.resblocks.8.attn.out_proj.weight', 'transformer.resblocks.8.attn.out_proj.bias', 'transformer.resblocks.8.ln_1.weight', 'transformer.resblocks.8.ln_1.bias', 'transformer.resblocks.8.mlp.c_fc.weight', 'transformer.resblocks.8.mlp.c_fc.bias', 'transformer.resblocks.8.mlp.c_proj.weight', 'transformer.resblocks.8.mlp.c_proj.bias', 'transformer.resblocks.8.ln_2.weight', 'transformer.resblocks.8.ln_2.bias', 'transformer.resblocks.9.attn.in_proj_weight', 'transformer.resblocks.9.attn.in_proj_bias', 'transformer.resblocks.9.attn.out_proj.weight', 'transformer.resblocks.9.attn.out_proj.bias', 'transformer.resblocks.9.ln_1.weight', 'transformer.resblocks.9.ln_1.bias', 'transformer.resblocks.9.mlp.c_fc.weight', 'transformer.resblocks.9.mlp.c_fc.bias', 'transformer.resblocks.9.mlp.c_proj.weight', 'transformer.resblocks.9.mlp.c_proj.bias', 'transformer.resblocks.9.ln_2.weight', 'transformer.resblocks.9.ln_2.bias', 'transformer.resblocks.10.attn.in_proj_weight', 'transformer.resblocks.10.attn.in_proj_bias', 'transformer.resblocks.10.attn.out_proj.weight', 'transformer.resblocks.10.attn.out_proj.bias', 'transformer.resblocks.10.ln_1.weight', 'transformer.resblocks.10.ln_1.bias', 'transformer.resblocks.10.mlp.c_fc.weight', 'transformer.resblocks.10.mlp.c_fc.bias', 'transformer.resblocks.10.mlp.c_proj.weight', 'transformer.resblocks.10.mlp.c_proj.bias', 'transformer.resblocks.10.ln_2.weight', 'transformer.resblocks.10.ln_2.bias', 'transformer.resblocks.11.attn.in_proj_weight', 'transformer.resblocks.11.attn.in_proj_bias', 'transformer.resblocks.11.attn.out_proj.weight', 'transformer.resblocks.11.attn.out_proj.bias', 'transformer.resblocks.11.ln_1.weight', 'transformer.resblocks.11.ln_1.bias', 'transformer.resblocks.11.mlp.c_fc.weight', 'transformer.resblocks.11.mlp.c_fc.bias', 'transformer.resblocks.11.mlp.c_proj.weight', 'transformer.resblocks.11.mlp.c_proj.bias', 'transformer.resblocks.11.ln_2.weight', 'transformer.resblocks.11.ln_2.bias', 'token_embedding.weight', 'ln_final.weight', 'ln_final.bias'], unexpected_keys=['ctx', 'token_prefix', 'token_suffix'])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "l14_model.load_state_dict(vitl14_state_dict, strict=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "categories = []\n", + "with open('/ckb-nfs/home/zcafego/imagenet_labels/synset_words.txt') as f:\n", + " for line in f.readlines():\n", + " line = line.strip()\n", + " categories.append(' '.join(line.split(' ')[1:]).split(', ')[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def get_classifications_l14(img):\n", + " preproc_img = l14_preprocessor(img).unsqueeze(0).to(device)\n", + " tokens = clip.tokenize([f\"a photo of a {category}\" for category in categories]).to(device)\n", + "\n", + " with torch.no_grad():\n", + " image_features = l14_model.encode_image(preproc_img)\n", + " text_features = l14_model.encode_text(tokens)\n", + "\n", + " logits_per_image, _ = l14_model(image_features, text_features)\n", + " probs = logits_per_image.softmax(dim=-1).cpu().numpy()\n", + " \n", + " print(\"Label probs: \", probs)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "ename": "RuntimeError", + "evalue": "Expected 4-dimensional input for 4-dimensional weight [1024, 3, 14, 14], but got 2-dimensional input of size [1, 768] instead", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mget_classifications_l14\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg\u001b[49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[6], line 9\u001b[0m, in \u001b[0;36mget_classifications_l14\u001b[0;34m(img)\u001b[0m\n\u001b[1;32m 6\u001b[0m image_features \u001b[38;5;241m=\u001b[39m l14_model\u001b[38;5;241m.\u001b[39mencode_image(preproc_img)\n\u001b[1;32m 7\u001b[0m text_features \u001b[38;5;241m=\u001b[39m l14_model\u001b[38;5;241m.\u001b[39mencode_text(tokens)\n\u001b[0;32m----> 9\u001b[0m logits_per_image, _ \u001b[38;5;241m=\u001b[39m \u001b[43ml14_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimage_features\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtext_features\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 10\u001b[0m probs \u001b[38;5;241m=\u001b[39m logits_per_image\u001b[38;5;241m.\u001b[39msoftmax(dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\u001b[38;5;241m.\u001b[39mcpu()\u001b[38;5;241m.\u001b[39mnumpy()\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLabel probs: \u001b[39m\u001b[38;5;124m\"\u001b[39m, probs)\n", + "File \u001b[0;32m~/git/openmpf-projects/openmpf-components/python/ClipDetection/venv/lib/python3.8/site-packages/torch/nn/modules/module.py:727\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 725\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_slow_forward(\u001b[38;5;241m*\u001b[39m\u001b[38;5;28minput\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 726\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 727\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 728\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m hook \u001b[38;5;129;01min\u001b[39;00m itertools\u001b[38;5;241m.\u001b[39mchain(\n\u001b[1;32m 729\u001b[0m _global_forward_hooks\u001b[38;5;241m.\u001b[39mvalues(),\n\u001b[1;32m 730\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks\u001b[38;5;241m.\u001b[39mvalues()):\n\u001b[1;32m 731\u001b[0m hook_result \u001b[38;5;241m=\u001b[39m hook(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m, result)\n", + "File \u001b[0;32m~/git/openmpf-projects/openmpf-components/python/ClipDetection/venv/lib/python3.8/site-packages/clip/model.py:359\u001b[0m, in \u001b[0;36mCLIP.forward\u001b[0;34m(self, image, text)\u001b[0m\n\u001b[1;32m 358\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, image, text):\n\u001b[0;32m--> 359\u001b[0m image_features \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencode_image\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimage\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 360\u001b[0m text_features \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mencode_text(text)\n\u001b[1;32m 362\u001b[0m \u001b[38;5;66;03m# normalized features\u001b[39;00m\n", + "File \u001b[0;32m~/git/openmpf-projects/openmpf-components/python/ClipDetection/venv/lib/python3.8/site-packages/clip/model.py:341\u001b[0m, in \u001b[0;36mCLIP.encode_image\u001b[0;34m(self, image)\u001b[0m\n\u001b[1;32m 340\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mencode_image\u001b[39m(\u001b[38;5;28mself\u001b[39m, image):\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvisual\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtype\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/git/openmpf-projects/openmpf-components/python/ClipDetection/venv/lib/python3.8/site-packages/torch/nn/modules/module.py:727\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 725\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_slow_forward(\u001b[38;5;241m*\u001b[39m\u001b[38;5;28minput\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 726\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 727\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 728\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m hook \u001b[38;5;129;01min\u001b[39;00m itertools\u001b[38;5;241m.\u001b[39mchain(\n\u001b[1;32m 729\u001b[0m _global_forward_hooks\u001b[38;5;241m.\u001b[39mvalues(),\n\u001b[1;32m 730\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks\u001b[38;5;241m.\u001b[39mvalues()):\n\u001b[1;32m 731\u001b[0m hook_result \u001b[38;5;241m=\u001b[39m hook(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m, result)\n", + "File \u001b[0;32m~/git/openmpf-projects/openmpf-components/python/ClipDetection/venv/lib/python3.8/site-packages/clip/model.py:224\u001b[0m, in \u001b[0;36mVisionTransformer.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 223\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x: torch\u001b[38;5;241m.\u001b[39mTensor):\n\u001b[0;32m--> 224\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconv1\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# shape = [*, width, grid, grid]\u001b[39;00m\n\u001b[1;32m 225\u001b[0m x \u001b[38;5;241m=\u001b[39m x\u001b[38;5;241m.\u001b[39mreshape(x\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m], x\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m], \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m) \u001b[38;5;66;03m# shape = [*, width, grid ** 2]\u001b[39;00m\n\u001b[1;32m 226\u001b[0m x \u001b[38;5;241m=\u001b[39m x\u001b[38;5;241m.\u001b[39mpermute(\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m1\u001b[39m) \u001b[38;5;66;03m# shape = [*, grid ** 2, width]\u001b[39;00m\n", + "File \u001b[0;32m~/git/openmpf-projects/openmpf-components/python/ClipDetection/venv/lib/python3.8/site-packages/torch/nn/modules/module.py:727\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 725\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_slow_forward(\u001b[38;5;241m*\u001b[39m\u001b[38;5;28minput\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 726\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 727\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 728\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m hook \u001b[38;5;129;01min\u001b[39;00m itertools\u001b[38;5;241m.\u001b[39mchain(\n\u001b[1;32m 729\u001b[0m _global_forward_hooks\u001b[38;5;241m.\u001b[39mvalues(),\n\u001b[1;32m 730\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks\u001b[38;5;241m.\u001b[39mvalues()):\n\u001b[1;32m 731\u001b[0m hook_result \u001b[38;5;241m=\u001b[39m hook(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m, result)\n", + "File \u001b[0;32m~/git/openmpf-projects/openmpf-components/python/ClipDetection/venv/lib/python3.8/site-packages/torch/nn/modules/conv.py:423\u001b[0m, in \u001b[0;36mConv2d.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 422\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tensor:\n\u001b[0;32m--> 423\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_conv_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/git/openmpf-projects/openmpf-components/python/ClipDetection/venv/lib/python3.8/site-packages/torch/nn/modules/conv.py:419\u001b[0m, in \u001b[0;36mConv2d._conv_forward\u001b[0;34m(self, input, weight)\u001b[0m\n\u001b[1;32m 415\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpadding_mode \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mzeros\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m 416\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m F\u001b[38;5;241m.\u001b[39mconv2d(F\u001b[38;5;241m.\u001b[39mpad(\u001b[38;5;28minput\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reversed_padding_repeated_twice, mode\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpadding_mode),\n\u001b[1;32m 417\u001b[0m weight, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbias, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstride,\n\u001b[1;32m 418\u001b[0m _pair(\u001b[38;5;241m0\u001b[39m), \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdilation, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgroups)\n\u001b[0;32m--> 419\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconv2d\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstride\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpadding\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdilation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgroups\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mRuntimeError\u001b[0m: Expected 4-dimensional input for 4-dimensional weight [1024, 3, 14, 14], but got 2-dimensional input of size [1, 768] instead" + ] + } + ], + "source": [ + "get_classifications_l14(img)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# print(sorted(likenesses, key=lambda x: x[1], reverse=True))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/explore_outputs.py b/python/ClipDetection/CoOp/output/imagenet/CoOp/explore_outputs.py new file mode 100644 index 00000000..86d41a91 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/explore_outputs.py @@ -0,0 +1,62 @@ +import torch +import clip +from PIL import Image +import torch +import os + +import trainers + +device = 'cuda:1' + +vitl14_path = os.path.join(os.getcwd(), 'vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50') +vitb32_path = os.path.join(os.getcwd(), 'vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50') + +vitl14_check = torch.load(vitl14_path, map_location=None) + +vitl14_state_dict = vitl14_check['state_dict'] +vitl14_tnsr = vitl14_state_dict['ctx'] + +images = [ + Image.open('/ckb-nfs/home/zcafego/test_images/sturgeon.JPEG'), + Image.open('/ckb-nfs/home/zcafego/test_images/val2017/000000000139.jpg'), + Image.open('/ckb-nfs/home/zcafego/test_images/val2017/000000000285.jpg'), + ] + +# l14_model, l14_preprocessor = clip.load('ViT-L/14', device=device) + +categories = [] +with open('/ckb-nfs/home/zcafego/imagenet_labels/synset_words.txt') as f: + for line in f.readlines(): + line = line.strip() + categories.append(' '.join(line.split(' ')[1:]).split(', ')[0]) + +tokens = torch.cat([clip.tokenize(f"a photo of a {c}") for c in categories]).to(device) + +print(vitl14_check.keys()) + +def get_classifications_l14(img): + # for img in images: + preproc_img = l14_preprocessor(img).unsqueeze(0).to(device) + return l14_model(preproc_img) + + # with torch.no_grad(): + # image_features = l14_model.encode_image(preproc_img) + # text_features = l14_model.encode_text(tokens) + + + # image_features /= image_features.norm(dim=-1, keepdim=True) + # text_features /= text_features.norm(dim=-1, keepdim=True) + # similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1) + # values, indices = similarity[0].topk(5) + + # print("\nTop predictions:\n") + # for value, index in zip(values, indices): + # print(f"{categories[index]:>20s}: {100 * value.item():.2f}%") + +# print("WITHOUT STATE DICT:\n") +# get_classifications_l14(images) + +# l14_model.load_state_dict(vitl14_state_dict, strict=False) +# print(get_classifications_l14(images[0])) +# print("\nWITH STATE DICT:\n") +# get_classifications_l14(images[0]) \ No newline at end of file diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/output_1.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/output_1.txt new file mode 100644 index 00000000..aaf53968 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/output_1.txt @@ -0,0 +1,449 @@ +CLIP( + (visual): VisionTransformer( + (conv1): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14), bias=False) + (ln_pre): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (transformer): Transformer( + (resblocks): Sequential( + (0): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (1): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (2): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (3): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (4): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (5): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (6): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (7): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (8): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (9): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (10): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (11): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (12): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (13): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (14): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (15): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (16): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (17): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (18): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (19): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (20): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (21): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (22): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (23): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + ) + ) + (ln_post): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (transformer): Transformer( + (resblocks): Sequential( + (0): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (1): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (2): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (3): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (4): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (5): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (6): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (7): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (8): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (9): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (10): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (11): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + ) + ) + (token_embedding): Embedding(49408, 768) + (ln_final): LayerNorm((768,), eps=1e-05, elementwise_affine=True) +) \ No newline at end of file diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/output_2.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/output_2.txt new file mode 100644 index 00000000..b657d773 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/output_2.txt @@ -0,0 +1,449 @@ + CLIP( + (visual): VisionTransformer( + (conv1): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14), bias=False) + (ln_pre): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (transformer): Transformer( + (resblocks): Sequential( + (0): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (1): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (2): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (3): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (4): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (5): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (6): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (7): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (8): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (9): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (10): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (11): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (12): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (13): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (14): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (15): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (16): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (17): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (18): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (19): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (20): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (21): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (22): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (23): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True) + ) + (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=4096, out_features=1024, bias=True) + ) + (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + ) + ) + (ln_post): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + ) + (transformer): Transformer( + (resblocks): Sequential( + (0): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (1): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (2): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (3): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (4): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (5): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (6): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (7): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (8): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (9): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (10): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (11): ResidualAttentionBlock( + (attn): MultiheadAttention( + (out_proj): _LinearWithBias(in_features=768, out_features=768, bias=True) + ) + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): QuickGELU() + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + ) + ) + (token_embedding): Embedding(49408, 768) + (ln_final): LayerNorm((768,), eps=1e-05, elementwise_affine=True) +) \ No newline at end of file diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed1/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed1/log.txt new file mode 100644 index 00000000..cf1a0bec --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed1/log.txt @@ -0,0 +1,1539 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_b32.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '1'] +output_dir: output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed1 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 1 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 1 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/32 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 200 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 8.4.0-3ubuntu2) 8.4.0 +Clang version: 10.0.0-4ubuntu1 +CMake version: version 3.23.2 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-113-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: Tesla V100-SXM2-32GB +GPU 1: Tesla V100-SXM2-32GB +GPU 2: Tesla V100-SXM2-32GB +GPU 3: Tesla V100-SXM2-32GB + +Nvidia driver version: 510.73.05 +cuDNN version: Probably one of the following: +/usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 +/usr/lib/x86_64-linux-gnu/libcudnn.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.4.1 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 46 bits physical, 48 bits virtual +CPU(s): 64 +On-line CPU(s) list: 0-63 +Thread(s) per core: 2 +Core(s) per socket: 16 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +CPU family: 6 +Model: 85 +Model name: Intel(R) Xeon(R) Gold 6242 CPU @ 2.80GHz +Stepping: 7 +CPU MHz: 1687.224 +CPU max MHz: 3900.0000 +CPU min MHz: 1200.0000 +BogoMIPS: 5600.00 +Virtualization: VT-x +L1d cache: 1 MiB +L1i cache: 1 MiB +L2 cache: 32 MiB +L3 cache: 44 MiB +NUMA node0 CPU(s): 0-15,32-47 +NUMA node1 CPU(s): 16-31,48-63 +Vulnerability Itlb multihit: KVM: Mitigation: Split huge pages +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Mitigation; TSX disabled +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cdp_l3 invpcid_single intel_ppin ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm mpx rdt_a avx512f avx512dq rdseed adx smap clflushopt clwb intel_pt avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts pku ospke avx512_vnni md_clear flush_l1d arch_capabilities + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Loading preprocessed few-shot data from /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_1-seed_1.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 1,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-B/32) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed1/tensorboard) +epoch [1/200] batch [5/31] time 0.711 (1.572) data 0.000 (0.178) loss 4.3398 (4.6867) acc 25.0000 (18.1250) lr 1.0000e-05 eta 2:42:17 +epoch [1/200] batch [10/31] time 0.712 (1.143) data 0.000 (0.089) loss 3.7773 (4.3557) acc 21.8750 (21.8750) lr 1.0000e-05 eta 1:57:56 +epoch [1/200] batch [15/31] time 0.709 (0.998) data 0.000 (0.059) loss 3.6875 (4.2160) acc 28.1250 (23.5417) lr 1.0000e-05 eta 1:42:52 +epoch [1/200] batch [20/31] time 0.710 (0.926) data 0.000 (0.045) loss 3.5723 (4.0763) acc 25.0000 (24.5312) lr 1.0000e-05 eta 1:35:20 +epoch [1/200] batch [25/31] time 0.720 (0.883) data 0.000 (0.036) loss 2.6855 (3.8645) acc 43.7500 (27.5000) lr 1.0000e-05 eta 1:30:50 +epoch [1/200] batch [30/31] time 0.709 (0.854) data 0.000 (0.030) loss 3.4629 (3.7541) acc 31.2500 (28.7500) lr 1.0000e-05 eta 1:27:49 +epoch [2/200] batch [5/31] time 0.712 (0.912) data 0.000 (0.184) loss 1.8252 (2.5494) acc 59.3750 (42.5000) lr 2.0000e-03 eta 1:33:43 +epoch [2/200] batch [10/31] time 0.711 (0.813) data 0.000 (0.092) loss 2.0430 (2.4092) acc 50.0000 (46.5625) lr 2.0000e-03 eta 1:23:27 +epoch [2/200] batch [15/31] time 0.722 (0.782) data 0.000 (0.062) loss 2.0762 (2.2622) acc 43.7500 (48.9583) lr 2.0000e-03 eta 1:20:12 +epoch [2/200] batch [20/31] time 0.720 (0.764) data 0.000 (0.046) loss 1.6934 (2.2380) acc 56.2500 (50.1562) lr 2.0000e-03 eta 1:18:17 +epoch [2/200] batch [25/31] time 0.713 (0.753) data 0.000 (0.037) loss 2.3320 (2.1624) acc 40.6250 (51.7500) lr 2.0000e-03 eta 1:17:07 +epoch [2/200] batch [30/31] time 0.743 (0.748) data 0.000 (0.031) loss 2.1250 (2.1692) acc 40.6250 (50.3125) lr 2.0000e-03 eta 1:16:29 +epoch [3/200] batch [5/31] time 0.713 (0.890) data 0.000 (0.171) loss 1.3066 (1.8510) acc 68.7500 (55.0000) lr 1.9999e-03 eta 1:31:00 +epoch [3/200] batch [10/31] time 0.706 (0.813) data 0.000 (0.086) loss 2.2305 (1.9200) acc 56.2500 (55.0000) lr 1.9999e-03 eta 1:23:02 +epoch [3/200] batch [15/31] time 0.704 (0.779) data 0.000 (0.057) loss 2.0352 (1.9470) acc 56.2500 (54.1667) lr 1.9999e-03 eta 1:19:30 +epoch [3/200] batch [20/31] time 0.705 (0.761) data 0.000 (0.043) loss 1.6963 (1.8799) acc 56.2500 (55.1562) lr 1.9999e-03 eta 1:17:37 +epoch [3/200] batch [25/31] time 0.717 (0.751) data 0.000 (0.034) loss 1.8242 (1.9009) acc 68.7500 (55.0000) lr 1.9999e-03 eta 1:16:32 +epoch [3/200] batch [30/31] time 0.710 (0.744) data 0.000 (0.029) loss 2.3730 (1.8986) acc 53.1250 (54.8958) lr 1.9999e-03 eta 1:15:42 +epoch [4/200] batch [5/31] time 0.711 (0.891) data 0.000 (0.172) loss 1.6074 (1.9096) acc 53.1250 (48.1250) lr 1.9995e-03 eta 1:30:36 +epoch [4/200] batch [10/31] time 0.709 (0.802) data 0.000 (0.086) loss 1.7686 (1.7490) acc 59.3750 (54.3750) lr 1.9995e-03 eta 1:21:30 +epoch [4/200] batch [15/31] time 0.705 (0.772) data 0.000 (0.058) loss 1.9619 (1.7015) acc 59.3750 (56.8750) lr 1.9995e-03 eta 1:18:22 +epoch [4/200] batch [20/31] time 0.708 (0.756) data 0.000 (0.043) loss 1.8096 (1.7618) acc 62.5000 (57.8125) lr 1.9995e-03 eta 1:16:42 +epoch [4/200] batch [25/31] time 0.708 (0.747) data 0.000 (0.035) loss 2.4961 (1.7508) acc 62.5000 (57.8750) lr 1.9995e-03 eta 1:15:45 +epoch [4/200] batch [30/31] time 0.708 (0.741) data 0.000 (0.029) loss 2.0410 (1.7606) acc 53.1250 (58.2292) lr 1.9995e-03 eta 1:15:01 +epoch [5/200] batch [5/31] time 0.711 (0.892) data 0.000 (0.172) loss 1.6562 (1.8258) acc 59.3750 (60.0000) lr 1.9989e-03 eta 1:30:14 +epoch [5/200] batch [10/31] time 0.715 (0.803) data 0.000 (0.086) loss 1.2363 (1.7505) acc 75.0000 (59.6875) lr 1.9989e-03 eta 1:21:12 +epoch [5/200] batch [15/31] time 0.701 (0.774) data 0.000 (0.058) loss 2.6504 (1.8702) acc 34.3750 (57.5000) lr 1.9989e-03 eta 1:18:09 +epoch [5/200] batch [20/31] time 0.725 (0.758) data 0.000 (0.043) loss 1.0527 (1.7463) acc 65.6250 (58.4375) lr 1.9989e-03 eta 1:16:31 +epoch [5/200] batch [25/31] time 0.707 (0.748) data 0.000 (0.035) loss 1.4512 (1.7601) acc 62.5000 (58.1250) lr 1.9989e-03 eta 1:15:25 +epoch [5/200] batch [30/31] time 0.705 (0.741) data 0.000 (0.029) loss 2.1270 (1.7994) acc 43.7500 (57.1875) lr 1.9989e-03 eta 1:14:42 +epoch [6/200] batch [5/31] time 0.709 (0.883) data 0.000 (0.159) loss 1.5957 (1.8076) acc 59.3750 (55.6250) lr 1.9980e-03 eta 1:28:50 +epoch [6/200] batch [10/31] time 0.740 (0.801) data 0.000 (0.080) loss 1.8115 (1.9626) acc 56.2500 (55.9375) lr 1.9980e-03 eta 1:20:31 +epoch [6/200] batch [15/31] time 0.723 (0.771) data 0.000 (0.053) loss 1.9209 (1.8530) acc 62.5000 (58.7500) lr 1.9980e-03 eta 1:17:32 +epoch [6/200] batch [20/31] time 0.707 (0.756) data 0.000 (0.040) loss 1.6807 (1.8609) acc 65.6250 (58.1250) lr 1.9980e-03 eta 1:15:53 +epoch [6/200] batch [25/31] time 0.708 (0.746) data 0.000 (0.032) loss 1.9551 (1.8631) acc 46.8750 (57.6250) lr 1.9980e-03 eta 1:14:51 +epoch [6/200] batch [30/31] time 0.709 (0.739) data 0.000 (0.027) loss 2.0332 (1.8683) acc 56.2500 (57.6042) lr 1.9980e-03 eta 1:14:07 +epoch [7/200] batch [5/31] time 0.711 (0.909) data 0.000 (0.186) loss 1.4756 (1.7725) acc 59.3750 (56.2500) lr 1.9969e-03 eta 1:31:01 +epoch [7/200] batch [10/31] time 0.707 (0.812) data 0.000 (0.093) loss 1.9033 (1.7049) acc 65.6250 (59.0625) lr 1.9969e-03 eta 1:21:12 +epoch [7/200] batch [15/31] time 0.714 (0.781) data 0.000 (0.062) loss 1.6963 (1.7694) acc 65.6250 (58.5417) lr 1.9969e-03 eta 1:18:02 +epoch [7/200] batch [20/31] time 0.708 (0.763) data 0.000 (0.047) loss 2.5566 (1.7866) acc 43.7500 (58.7500) lr 1.9969e-03 eta 1:16:11 +epoch [7/200] batch [25/31] time 0.727 (0.754) data 0.000 (0.037) loss 1.5820 (1.7530) acc 65.6250 (59.3750) lr 1.9969e-03 eta 1:15:14 +epoch [7/200] batch [30/31] time 0.701 (0.746) data 0.000 (0.031) loss 2.1797 (1.7648) acc 53.1250 (59.0625) lr 1.9969e-03 eta 1:14:24 +epoch [8/200] batch [5/31] time 0.723 (0.891) data 0.000 (0.164) loss 1.4297 (1.6346) acc 62.5000 (56.8750) lr 1.9956e-03 eta 1:28:47 +epoch [8/200] batch [10/31] time 0.709 (0.802) data 0.000 (0.082) loss 1.6357 (1.7118) acc 68.7500 (58.1250) lr 1.9956e-03 eta 1:19:47 +epoch [8/200] batch [15/31] time 0.727 (0.772) data 0.000 (0.055) loss 1.1309 (1.6551) acc 71.8750 (60.0000) lr 1.9956e-03 eta 1:16:48 +epoch [8/200] batch [20/31] time 0.708 (0.765) data 0.000 (0.041) loss 1.4277 (1.6852) acc 50.0000 (59.0625) lr 1.9956e-03 eta 1:16:03 +epoch [8/200] batch [25/31] time 0.711 (0.755) data 0.000 (0.033) loss 2.0273 (1.7451) acc 46.8750 (57.5000) lr 1.9956e-03 eta 1:14:57 +epoch [8/200] batch [30/31] time 0.711 (0.748) data 0.000 (0.028) loss 2.1055 (1.7774) acc 59.3750 (57.6042) lr 1.9956e-03 eta 1:14:15 +epoch [9/200] batch [5/31] time 0.724 (0.893) data 0.000 (0.171) loss 1.6455 (1.8209) acc 62.5000 (58.1250) lr 1.9940e-03 eta 1:28:32 +epoch [9/200] batch [10/31] time 0.720 (0.803) data 0.000 (0.086) loss 1.8076 (1.7759) acc 59.3750 (60.9375) lr 1.9940e-03 eta 1:19:33 +epoch [9/200] batch [15/31] time 0.707 (0.775) data 0.000 (0.057) loss 0.9829 (1.7989) acc 75.0000 (59.7917) lr 1.9940e-03 eta 1:16:39 +epoch [9/200] batch [20/31] time 0.718 (0.759) data 0.000 (0.043) loss 1.8350 (1.8015) acc 62.5000 (60.3125) lr 1.9940e-03 eta 1:15:03 +epoch [9/200] batch [25/31] time 0.710 (0.749) data 0.000 (0.034) loss 1.7461 (1.7790) acc 65.6250 (60.8750) lr 1.9940e-03 eta 1:14:00 +epoch [9/200] batch [30/31] time 0.715 (0.743) data 0.000 (0.029) loss 1.3887 (1.7526) acc 65.6250 (61.1458) lr 1.9940e-03 eta 1:13:22 +epoch [10/200] batch [5/31] time 0.709 (0.888) data 0.000 (0.167) loss 1.7490 (1.9891) acc 59.3750 (54.3750) lr 1.9921e-03 eta 1:27:34 +epoch [10/200] batch [10/31] time 0.709 (0.803) data 0.000 (0.084) loss 1.6328 (1.8029) acc 59.3750 (58.7500) lr 1.9921e-03 eta 1:19:07 +epoch [10/200] batch [15/31] time 0.711 (0.773) data 0.000 (0.056) loss 1.2393 (1.7419) acc 59.3750 (56.6667) lr 1.9921e-03 eta 1:16:03 +epoch [10/200] batch [20/31] time 0.718 (0.759) data 0.000 (0.042) loss 1.7695 (1.7196) acc 62.5000 (57.9688) lr 1.9921e-03 eta 1:14:39 +epoch [10/200] batch [25/31] time 0.712 (0.750) data 0.000 (0.034) loss 2.2559 (1.7396) acc 43.7500 (57.7500) lr 1.9921e-03 eta 1:13:39 +epoch [10/200] batch [30/31] time 0.717 (0.745) data 0.000 (0.028) loss 1.8574 (1.7366) acc 50.0000 (57.0833) lr 1.9921e-03 eta 1:13:08 +epoch [11/200] batch [5/31] time 0.711 (0.928) data 0.000 (0.176) loss 1.4873 (1.5555) acc 65.6250 (59.3750) lr 1.9900e-03 eta 1:31:00 +epoch [11/200] batch [10/31] time 0.715 (0.820) data 0.000 (0.088) loss 2.1152 (1.7263) acc 53.1250 (56.8750) lr 1.9900e-03 eta 1:20:23 +epoch [11/200] batch [15/31] time 0.709 (0.784) data 0.000 (0.059) loss 2.0176 (1.6775) acc 59.3750 (59.3750) lr 1.9900e-03 eta 1:16:45 +epoch [11/200] batch [20/31] time 0.710 (0.766) data 0.000 (0.044) loss 1.7217 (1.6669) acc 65.6250 (59.8438) lr 1.9900e-03 eta 1:14:53 +epoch [11/200] batch [25/31] time 0.707 (0.754) data 0.000 (0.035) loss 1.3838 (1.6910) acc 68.7500 (59.8750) lr 1.9900e-03 eta 1:13:44 +epoch [11/200] batch [30/31] time 0.708 (0.747) data 0.000 (0.030) loss 1.8154 (1.7088) acc 59.3750 (59.7917) lr 1.9900e-03 eta 1:12:58 +epoch [12/200] batch [5/31] time 0.710 (0.885) data 0.000 (0.162) loss 1.3545 (1.6881) acc 68.7500 (61.8750) lr 1.9877e-03 eta 1:26:18 +epoch [12/200] batch [10/31] time 0.709 (0.798) data 0.000 (0.081) loss 2.1875 (1.7482) acc 53.1250 (58.7500) lr 1.9877e-03 eta 1:17:50 +epoch [12/200] batch [15/31] time 0.706 (0.771) data 0.000 (0.054) loss 1.2832 (1.7414) acc 62.5000 (57.7083) lr 1.9877e-03 eta 1:15:04 +epoch [12/200] batch [20/31] time 0.711 (0.755) data 0.000 (0.041) loss 2.0586 (1.7296) acc 50.0000 (58.5938) lr 1.9877e-03 eta 1:13:25 +epoch [12/200] batch [25/31] time 0.703 (0.745) data 0.000 (0.033) loss 2.0371 (1.7016) acc 56.2500 (59.0000) lr 1.9877e-03 eta 1:12:24 +epoch [12/200] batch [30/31] time 0.724 (0.739) data 0.000 (0.027) loss 1.7979 (1.7188) acc 46.8750 (58.8542) lr 1.9877e-03 eta 1:11:47 +epoch [13/200] batch [5/31] time 0.710 (0.902) data 0.000 (0.176) loss 1.9678 (1.7258) acc 53.1250 (60.0000) lr 1.9851e-03 eta 1:27:29 +epoch [13/200] batch [10/31] time 0.707 (0.807) data 0.000 (0.088) loss 1.7227 (1.6686) acc 59.3750 (59.6875) lr 1.9851e-03 eta 1:18:16 +epoch [13/200] batch [15/31] time 0.710 (0.778) data 0.000 (0.059) loss 1.8682 (1.6587) acc 50.0000 (59.1667) lr 1.9851e-03 eta 1:15:22 +epoch [13/200] batch [20/31] time 0.705 (0.761) data 0.000 (0.044) loss 1.7012 (1.6873) acc 56.2500 (59.0625) lr 1.9851e-03 eta 1:13:40 +epoch [13/200] batch [25/31] time 0.705 (0.751) data 0.000 (0.035) loss 1.8242 (1.7188) acc 59.3750 (58.8750) lr 1.9851e-03 eta 1:12:35 +epoch [13/200] batch [30/31] time 0.718 (0.743) data 0.000 (0.029) loss 1.6240 (1.7295) acc 56.2500 (58.6458) lr 1.9851e-03 eta 1:11:50 +epoch [14/200] batch [5/31] time 0.707 (0.898) data 0.000 (0.180) loss 1.3320 (1.5926) acc 65.6250 (58.1250) lr 1.9823e-03 eta 1:26:40 +epoch [14/200] batch [10/31] time 0.709 (0.804) data 0.000 (0.090) loss 1.8955 (1.5830) acc 56.2500 (59.6875) lr 1.9823e-03 eta 1:17:32 +epoch [14/200] batch [15/31] time 0.706 (0.773) data 0.000 (0.060) loss 1.9883 (1.6517) acc 53.1250 (57.9167) lr 1.9823e-03 eta 1:14:26 +epoch [14/200] batch [20/31] time 0.716 (0.758) data 0.000 (0.045) loss 1.7725 (1.7143) acc 75.0000 (58.5938) lr 1.9823e-03 eta 1:12:58 +epoch [14/200] batch [25/31] time 0.705 (0.748) data 0.000 (0.036) loss 1.5264 (1.7246) acc 68.7500 (58.5000) lr 1.9823e-03 eta 1:11:58 +epoch [14/200] batch [30/31] time 0.720 (0.742) data 0.000 (0.030) loss 1.5762 (1.7026) acc 62.5000 (59.0625) lr 1.9823e-03 eta 1:11:21 +epoch [15/200] batch [5/31] time 0.720 (0.902) data 0.000 (0.171) loss 2.1699 (1.7445) acc 46.8750 (54.3750) lr 1.9792e-03 eta 1:26:33 +epoch [15/200] batch [10/31] time 0.709 (0.810) data 0.000 (0.086) loss 1.5830 (1.7466) acc 65.6250 (56.8750) lr 1.9792e-03 eta 1:17:45 +epoch [15/200] batch [15/31] time 0.713 (0.778) data 0.001 (0.057) loss 1.9014 (1.7294) acc 50.0000 (56.6667) lr 1.9792e-03 eta 1:14:32 +epoch [15/200] batch [20/31] time 0.721 (0.763) data 0.000 (0.043) loss 2.1738 (1.6663) acc 59.3750 (58.4375) lr 1.9792e-03 eta 1:13:02 +epoch [15/200] batch [25/31] time 0.702 (0.752) data 0.000 (0.035) loss 2.1934 (1.6591) acc 50.0000 (57.8750) lr 1.9792e-03 eta 1:11:59 +epoch [15/200] batch [30/31] time 0.709 (0.745) data 0.000 (0.029) loss 2.4648 (1.6822) acc 43.7500 (57.7083) lr 1.9792e-03 eta 1:11:14 +epoch [16/200] batch [5/31] time 0.706 (0.905) data 0.000 (0.185) loss 1.9795 (1.5635) acc 59.3750 (61.2500) lr 1.9759e-03 eta 1:26:28 +epoch [16/200] batch [10/31] time 0.711 (0.810) data 0.000 (0.093) loss 1.4102 (1.5801) acc 65.6250 (61.2500) lr 1.9759e-03 eta 1:17:14 +epoch [16/200] batch [15/31] time 0.725 (0.791) data 0.000 (0.062) loss 1.7178 (1.5733) acc 59.3750 (62.7083) lr 1.9759e-03 eta 1:15:25 +epoch [16/200] batch [20/31] time 0.704 (0.770) data 0.000 (0.047) loss 1.5518 (1.6393) acc 53.1250 (61.0938) lr 1.9759e-03 eta 1:13:21 +epoch [16/200] batch [25/31] time 0.707 (0.759) data 0.000 (0.037) loss 1.6895 (1.6398) acc 65.6250 (61.0000) lr 1.9759e-03 eta 1:12:13 +epoch [16/200] batch [30/31] time 0.705 (0.751) data 0.000 (0.031) loss 1.3799 (1.6521) acc 56.2500 (60.2083) lr 1.9759e-03 eta 1:11:23 +epoch [17/200] batch [5/31] time 0.705 (0.891) data 0.000 (0.170) loss 1.3887 (1.4385) acc 46.8750 (61.2500) lr 1.9724e-03 eta 1:24:35 +epoch [17/200] batch [10/31] time 0.712 (0.803) data 0.000 (0.085) loss 1.8457 (1.5687) acc 62.5000 (62.1875) lr 1.9724e-03 eta 1:16:14 +epoch [17/200] batch [15/31] time 0.717 (0.773) data 0.000 (0.057) loss 1.8740 (1.6507) acc 59.3750 (61.8750) lr 1.9724e-03 eta 1:13:19 +epoch [17/200] batch [20/31] time 0.703 (0.757) data 0.000 (0.043) loss 2.4453 (1.6857) acc 50.0000 (61.5625) lr 1.9724e-03 eta 1:11:43 +epoch [17/200] batch [25/31] time 0.711 (0.749) data 0.000 (0.034) loss 1.4980 (1.6797) acc 56.2500 (60.7500) lr 1.9724e-03 eta 1:10:53 +epoch [17/200] batch [30/31] time 0.712 (0.743) data 0.000 (0.029) loss 1.9141 (1.6737) acc 62.5000 (61.1458) lr 1.9724e-03 eta 1:10:15 +epoch [18/200] batch [5/31] time 0.710 (0.924) data 0.000 (0.168) loss 1.2793 (1.3951) acc 75.0000 (66.2500) lr 1.9686e-03 eta 1:27:17 +epoch [18/200] batch [10/31] time 0.710 (0.818) data 0.000 (0.084) loss 2.4766 (1.5546) acc 34.3750 (62.1875) lr 1.9686e-03 eta 1:17:12 +epoch [18/200] batch [15/31] time 0.707 (0.783) data 0.000 (0.056) loss 1.5098 (1.5853) acc 62.5000 (61.8750) lr 1.9686e-03 eta 1:13:47 +epoch [18/200] batch [20/31] time 0.708 (0.766) data 0.000 (0.042) loss 1.3301 (1.6377) acc 62.5000 (60.6250) lr 1.9686e-03 eta 1:12:08 +epoch [18/200] batch [25/31] time 0.706 (0.755) data 0.000 (0.034) loss 1.7822 (1.6350) acc 56.2500 (61.0000) lr 1.9686e-03 eta 1:11:02 +epoch [18/200] batch [30/31] time 0.710 (0.747) data 0.000 (0.028) loss 1.7852 (1.6222) acc 53.1250 (60.9375) lr 1.9686e-03 eta 1:10:17 +epoch [19/200] batch [5/31] time 0.712 (0.898) data 0.000 (0.176) loss 1.6748 (1.5188) acc 62.5000 (60.0000) lr 1.9646e-03 eta 1:24:19 +epoch [19/200] batch [10/31] time 0.708 (0.806) data 0.000 (0.088) loss 1.1914 (1.5285) acc 81.2500 (64.0625) lr 1.9646e-03 eta 1:15:37 +epoch [19/200] batch [15/31] time 0.713 (0.774) data 0.000 (0.059) loss 1.8184 (1.5650) acc 56.2500 (63.1250) lr 1.9646e-03 eta 1:12:36 +epoch [19/200] batch [20/31] time 0.706 (0.759) data 0.000 (0.044) loss 2.0117 (1.5730) acc 50.0000 (62.8125) lr 1.9646e-03 eta 1:11:04 +epoch [19/200] batch [25/31] time 0.721 (0.750) data 0.000 (0.036) loss 1.5332 (1.5559) acc 62.5000 (63.1250) lr 1.9646e-03 eta 1:10:13 +epoch [19/200] batch [30/31] time 0.704 (0.744) data 0.000 (0.030) loss 2.3242 (1.6340) acc 50.0000 (61.4583) lr 1.9646e-03 eta 1:09:35 +epoch [20/200] batch [5/31] time 0.709 (0.896) data 0.000 (0.169) loss 1.7559 (1.8162) acc 46.8750 (56.8750) lr 1.9603e-03 eta 1:23:40 +epoch [20/200] batch [10/31] time 0.714 (0.821) data 0.001 (0.085) loss 1.2012 (1.8496) acc 75.0000 (57.5000) lr 1.9603e-03 eta 1:16:36 +epoch [20/200] batch [15/31] time 0.711 (0.784) data 0.000 (0.057) loss 0.9756 (1.8039) acc 78.1250 (59.3750) lr 1.9603e-03 eta 1:13:09 +epoch [20/200] batch [20/31] time 0.721 (0.766) data 0.000 (0.043) loss 1.1113 (1.7025) acc 68.7500 (61.0938) lr 1.9603e-03 eta 1:11:23 +epoch [20/200] batch [25/31] time 0.711 (0.755) data 0.000 (0.034) loss 1.5732 (1.6668) acc 62.5000 (61.1250) lr 1.9603e-03 eta 1:10:17 +epoch [20/200] batch [30/31] time 0.712 (0.748) data 0.000 (0.028) loss 1.7510 (1.6528) acc 59.3750 (61.9792) lr 1.9603e-03 eta 1:09:34 +epoch [21/200] batch [5/31] time 0.714 (0.897) data 0.000 (0.168) loss 1.1738 (1.4449) acc 71.8750 (66.2500) lr 1.9558e-03 eta 1:23:18 +epoch [21/200] batch [10/31] time 0.715 (0.806) data 0.000 (0.085) loss 2.1172 (1.6761) acc 53.1250 (61.8750) lr 1.9558e-03 eta 1:14:48 +epoch [21/200] batch [15/31] time 0.712 (0.776) data 0.000 (0.056) loss 1.6172 (1.6149) acc 56.2500 (62.9167) lr 1.9558e-03 eta 1:11:59 +epoch [21/200] batch [20/31] time 0.724 (0.762) data 0.000 (0.042) loss 1.0518 (1.6031) acc 75.0000 (63.2812) lr 1.9558e-03 eta 1:10:35 +epoch [21/200] batch [25/31] time 0.718 (0.753) data 0.000 (0.034) loss 1.9141 (1.6096) acc 59.3750 (63.5000) lr 1.9558e-03 eta 1:09:40 +epoch [21/200] batch [30/31] time 0.705 (0.745) data 0.000 (0.028) loss 1.5928 (1.6053) acc 56.2500 (63.0208) lr 1.9558e-03 eta 1:08:56 +epoch [22/200] batch [5/31] time 0.713 (1.112) data 0.000 (0.384) loss 2.0977 (1.7074) acc 46.8750 (58.7500) lr 1.9511e-03 eta 1:42:44 +epoch [22/200] batch [10/31] time 0.712 (0.915) data 0.000 (0.192) loss 2.0039 (1.5941) acc 50.0000 (60.9375) lr 1.9511e-03 eta 1:24:25 +epoch [22/200] batch [15/31] time 0.708 (0.847) data 0.000 (0.128) loss 1.2227 (1.5408) acc 62.5000 (61.6667) lr 1.9511e-03 eta 1:18:09 +epoch [22/200] batch [20/31] time 0.713 (0.815) data 0.000 (0.096) loss 1.5420 (1.5118) acc 62.5000 (62.6562) lr 1.9511e-03 eta 1:15:03 +epoch [22/200] batch [25/31] time 0.707 (0.794) data 0.001 (0.077) loss 1.5889 (1.5085) acc 62.5000 (63.0000) lr 1.9511e-03 eta 1:13:04 +epoch [22/200] batch [30/31] time 0.733 (0.781) data 0.000 (0.064) loss 0.9424 (1.5407) acc 78.1250 (62.9167) lr 1.9511e-03 eta 1:11:51 +epoch [23/200] batch [5/31] time 0.701 (0.951) data 0.000 (0.231) loss 1.3447 (1.5031) acc 75.0000 (63.1250) lr 1.9461e-03 eta 1:27:23 +epoch [23/200] batch [10/31] time 0.724 (0.835) data 0.001 (0.116) loss 2.1523 (1.4777) acc 43.7500 (65.0000) lr 1.9461e-03 eta 1:16:38 +epoch [23/200] batch [15/31] time 0.704 (0.795) data 0.000 (0.077) loss 1.8359 (1.5530) acc 56.2500 (63.7500) lr 1.9461e-03 eta 1:12:52 +epoch [23/200] batch [20/31] time 0.728 (0.774) data 0.000 (0.058) loss 1.0088 (1.5466) acc 68.7500 (61.8750) lr 1.9461e-03 eta 1:10:57 +epoch [23/200] batch [25/31] time 0.714 (0.762) data 0.000 (0.046) loss 1.5205 (1.5622) acc 56.2500 (61.8750) lr 1.9461e-03 eta 1:09:48 +epoch [23/200] batch [30/31] time 0.704 (0.754) data 0.000 (0.039) loss 0.8818 (1.5678) acc 78.1250 (62.1875) lr 1.9461e-03 eta 1:08:59 +epoch [24/200] batch [5/31] time 0.709 (0.896) data 0.000 (0.173) loss 1.1826 (1.4061) acc 65.6250 (62.5000) lr 1.9409e-03 eta 1:21:51 +epoch [24/200] batch [10/31] time 0.715 (0.804) data 0.000 (0.087) loss 1.8906 (1.5122) acc 53.1250 (61.5625) lr 1.9409e-03 eta 1:13:23 +epoch [24/200] batch [15/31] time 0.709 (0.773) data 0.000 (0.058) loss 2.1484 (1.6277) acc 56.2500 (61.2500) lr 1.9409e-03 eta 1:10:31 +epoch [24/200] batch [20/31] time 0.729 (0.759) data 0.000 (0.044) loss 1.7422 (1.6058) acc 62.5000 (62.9688) lr 1.9409e-03 eta 1:09:08 +epoch [24/200] batch [25/31] time 0.705 (0.749) data 0.000 (0.035) loss 0.7568 (1.5840) acc 84.3750 (63.5000) lr 1.9409e-03 eta 1:08:12 +epoch [24/200] batch [30/31] time 0.715 (0.743) data 0.000 (0.029) loss 1.6357 (1.5512) acc 65.6250 (64.4792) lr 1.9409e-03 eta 1:07:32 +epoch [25/200] batch [5/31] time 0.707 (0.884) data 0.000 (0.156) loss 1.2002 (1.4219) acc 62.5000 (64.3750) lr 1.9354e-03 eta 1:20:18 +epoch [25/200] batch [10/31] time 0.707 (0.798) data 0.000 (0.078) loss 1.8672 (1.5957) acc 50.0000 (61.5625) lr 1.9354e-03 eta 1:12:26 +epoch [25/200] batch [15/31] time 0.708 (0.769) data 0.000 (0.052) loss 1.6777 (1.5340) acc 65.6250 (62.7083) lr 1.9354e-03 eta 1:09:45 +epoch [25/200] batch [20/31] time 0.719 (0.756) data 0.000 (0.039) loss 1.1836 (1.5415) acc 71.8750 (62.9688) lr 1.9354e-03 eta 1:08:28 +epoch [25/200] batch [25/31] time 0.708 (0.747) data 0.000 (0.031) loss 2.5977 (1.5414) acc 46.8750 (63.6250) lr 1.9354e-03 eta 1:07:36 +epoch [25/200] batch [30/31] time 0.711 (0.741) data 0.000 (0.026) loss 1.8369 (1.5195) acc 62.5000 (64.1667) lr 1.9354e-03 eta 1:07:02 +epoch [26/200] batch [5/31] time 0.712 (0.890) data 0.000 (0.167) loss 1.4199 (1.5111) acc 78.1250 (63.1250) lr 1.9298e-03 eta 1:20:21 +epoch [26/200] batch [10/31] time 0.704 (0.814) data 0.000 (0.084) loss 1.8633 (1.4776) acc 59.3750 (63.4375) lr 1.9298e-03 eta 1:13:25 +epoch [26/200] batch [15/31] time 0.712 (0.780) data 0.000 (0.056) loss 1.4678 (1.4529) acc 62.5000 (64.1667) lr 1.9298e-03 eta 1:10:17 +epoch [26/200] batch [20/31] time 0.710 (0.763) data 0.000 (0.042) loss 2.0684 (1.5083) acc 46.8750 (62.5000) lr 1.9298e-03 eta 1:08:42 +epoch [26/200] batch [25/31] time 0.711 (0.753) data 0.000 (0.034) loss 1.5801 (1.4553) acc 56.2500 (63.3750) lr 1.9298e-03 eta 1:07:47 +epoch [26/200] batch [30/31] time 0.706 (0.746) data 0.000 (0.028) loss 1.5205 (1.4601) acc 62.5000 (63.7500) lr 1.9298e-03 eta 1:07:04 +epoch [27/200] batch [5/31] time 0.720 (0.927) data 0.001 (0.201) loss 0.9888 (1.4114) acc 78.1250 (70.0000) lr 1.9239e-03 eta 1:23:15 +epoch [27/200] batch [10/31] time 0.714 (0.820) data 0.001 (0.101) loss 1.7891 (1.4137) acc 65.6250 (68.1250) lr 1.9239e-03 eta 1:13:37 +epoch [27/200] batch [15/31] time 0.717 (0.785) data 0.000 (0.067) loss 1.3154 (1.4047) acc 59.3750 (66.6667) lr 1.9239e-03 eta 1:10:22 +epoch [27/200] batch [20/31] time 0.714 (0.767) data 0.000 (0.051) loss 1.6797 (1.4494) acc 62.5000 (65.4688) lr 1.9239e-03 eta 1:08:39 +epoch [27/200] batch [25/31] time 0.716 (0.756) data 0.000 (0.040) loss 1.6348 (1.4745) acc 56.2500 (64.5000) lr 1.9239e-03 eta 1:07:41 +epoch [27/200] batch [30/31] time 0.723 (0.754) data 0.000 (0.034) loss 1.3418 (1.4528) acc 65.6250 (65.0000) lr 1.9239e-03 eta 1:07:24 +epoch [28/200] batch [5/31] time 0.711 (0.885) data 0.000 (0.161) loss 1.5762 (1.5309) acc 65.6250 (66.2500) lr 1.9178e-03 eta 1:18:59 +epoch [28/200] batch [10/31] time 0.709 (0.801) data 0.000 (0.081) loss 1.6895 (1.5498) acc 65.6250 (63.7500) lr 1.9178e-03 eta 1:11:27 +epoch [28/200] batch [15/31] time 0.713 (0.771) data 0.001 (0.054) loss 1.8770 (1.5628) acc 53.1250 (63.5417) lr 1.9178e-03 eta 1:08:45 +epoch [28/200] batch [20/31] time 0.720 (0.757) data 0.000 (0.041) loss 1.9033 (1.5223) acc 53.1250 (64.0625) lr 1.9178e-03 eta 1:07:23 +epoch [28/200] batch [25/31] time 0.712 (0.749) data 0.000 (0.033) loss 2.4121 (1.5376) acc 56.2500 (63.7500) lr 1.9178e-03 eta 1:06:37 +epoch [28/200] batch [30/31] time 0.721 (0.743) data 0.000 (0.027) loss 1.4248 (1.5352) acc 68.7500 (63.6458) lr 1.9178e-03 eta 1:06:04 +epoch [29/200] batch [5/31] time 0.721 (0.887) data 0.000 (0.163) loss 1.1621 (1.3381) acc 75.0000 (66.8750) lr 1.9114e-03 eta 1:18:44 +epoch [29/200] batch [10/31] time 0.715 (0.801) data 0.001 (0.082) loss 1.1680 (1.2593) acc 68.7500 (67.8125) lr 1.9114e-03 eta 1:11:05 +epoch [29/200] batch [15/31] time 0.714 (0.772) data 0.000 (0.055) loss 1.7246 (1.3561) acc 56.2500 (65.8333) lr 1.9114e-03 eta 1:08:26 +epoch [29/200] batch [20/31] time 0.708 (0.759) data 0.000 (0.041) loss 1.4873 (1.3947) acc 62.5000 (65.4688) lr 1.9114e-03 eta 1:07:09 +epoch [29/200] batch [25/31] time 0.709 (0.749) data 0.000 (0.033) loss 1.3682 (1.4582) acc 65.6250 (64.6250) lr 1.9114e-03 eta 1:06:16 +epoch [29/200] batch [30/31] time 0.719 (0.747) data 0.000 (0.027) loss 0.8374 (1.4585) acc 75.0000 (65.1042) lr 1.9114e-03 eta 1:06:02 +epoch [30/200] batch [5/31] time 0.710 (0.900) data 0.000 (0.177) loss 1.3594 (1.3506) acc 71.8750 (68.1250) lr 1.9048e-03 eta 1:19:24 +epoch [30/200] batch [10/31] time 0.703 (0.806) data 0.001 (0.089) loss 1.7393 (1.5329) acc 71.8750 (66.5625) lr 1.9048e-03 eta 1:11:05 +epoch [30/200] batch [15/31] time 0.705 (0.775) data 0.000 (0.059) loss 0.9868 (1.4479) acc 71.8750 (66.2500) lr 1.9048e-03 eta 1:08:16 +epoch [30/200] batch [20/31] time 0.707 (0.760) data 0.000 (0.044) loss 1.2100 (1.3892) acc 75.0000 (67.6562) lr 1.9048e-03 eta 1:06:53 +epoch [30/200] batch [25/31] time 0.704 (0.750) data 0.000 (0.036) loss 1.3799 (1.4492) acc 71.8750 (66.3750) lr 1.9048e-03 eta 1:05:56 +epoch [30/200] batch [30/31] time 0.704 (0.743) data 0.000 (0.030) loss 1.3701 (1.4609) acc 59.3750 (65.0000) lr 1.9048e-03 eta 1:05:16 +epoch [31/200] batch [5/31] time 0.726 (0.898) data 0.000 (0.170) loss 1.4111 (1.4047) acc 68.7500 (66.2500) lr 1.8980e-03 eta 1:18:49 +epoch [31/200] batch [10/31] time 0.706 (0.804) data 0.000 (0.085) loss 1.8164 (1.3408) acc 68.7500 (69.6875) lr 1.8980e-03 eta 1:10:27 +epoch [31/200] batch [15/31] time 0.705 (0.773) data 0.000 (0.057) loss 2.0078 (1.4847) acc 43.7500 (65.2083) lr 1.8980e-03 eta 1:07:44 +epoch [31/200] batch [20/31] time 0.707 (0.760) data 0.000 (0.043) loss 2.5137 (1.5404) acc 62.5000 (64.8438) lr 1.8980e-03 eta 1:06:28 +epoch [31/200] batch [25/31] time 0.727 (0.752) data 0.000 (0.034) loss 1.4697 (1.5094) acc 62.5000 (64.8750) lr 1.8980e-03 eta 1:05:44 +epoch [31/200] batch [30/31] time 0.707 (0.745) data 0.000 (0.029) loss 1.6572 (1.5263) acc 59.3750 (63.8542) lr 1.8980e-03 eta 1:05:02 +epoch [32/200] batch [5/31] time 0.717 (0.888) data 0.000 (0.164) loss 1.5283 (1.7588) acc 56.2500 (61.2500) lr 1.8910e-03 eta 1:17:25 +epoch [32/200] batch [10/31] time 0.720 (0.802) data 0.001 (0.082) loss 1.2295 (1.4462) acc 65.6250 (66.5625) lr 1.8910e-03 eta 1:09:54 +epoch [32/200] batch [15/31] time 0.710 (0.772) data 0.000 (0.055) loss 1.7441 (1.4882) acc 50.0000 (65.8333) lr 1.8910e-03 eta 1:07:13 +epoch [32/200] batch [20/31] time 0.725 (0.757) data 0.000 (0.041) loss 1.4854 (1.4649) acc 56.2500 (65.3125) lr 1.8910e-03 eta 1:05:51 +epoch [32/200] batch [25/31] time 0.710 (0.747) data 0.000 (0.033) loss 1.2324 (1.4621) acc 75.0000 (65.6250) lr 1.8910e-03 eta 1:04:54 +epoch [32/200] batch [30/31] time 0.706 (0.740) data 0.000 (0.028) loss 0.9575 (1.4693) acc 68.7500 (65.2083) lr 1.8910e-03 eta 1:04:15 +epoch [33/200] batch [5/31] time 0.725 (0.890) data 0.000 (0.167) loss 0.9561 (1.4658) acc 78.1250 (66.8750) lr 1.8838e-03 eta 1:17:10 +epoch [33/200] batch [10/31] time 0.711 (0.802) data 0.000 (0.084) loss 1.4922 (1.4775) acc 59.3750 (61.8750) lr 1.8838e-03 eta 1:09:28 +epoch [33/200] batch [15/31] time 0.708 (0.782) data 0.000 (0.056) loss 1.4639 (1.4171) acc 56.2500 (63.1250) lr 1.8838e-03 eta 1:07:39 +epoch [33/200] batch [20/31] time 0.712 (0.765) data 0.000 (0.042) loss 1.8086 (1.4706) acc 62.5000 (63.1250) lr 1.8838e-03 eta 1:06:06 +epoch [33/200] batch [25/31] time 0.714 (0.754) data 0.000 (0.034) loss 0.9116 (1.4451) acc 75.0000 (64.6250) lr 1.8838e-03 eta 1:05:10 +epoch [33/200] batch [30/31] time 0.712 (0.748) data 0.000 (0.028) loss 1.3076 (1.4343) acc 65.6250 (64.8958) lr 1.8838e-03 eta 1:04:30 +epoch [34/200] batch [5/31] time 0.714 (0.912) data 0.000 (0.187) loss 2.0156 (1.3594) acc 53.1250 (68.1250) lr 1.8763e-03 eta 1:18:36 +epoch [34/200] batch [10/31] time 0.710 (0.814) data 0.000 (0.094) loss 1.1758 (1.3618) acc 68.7500 (66.8750) lr 1.8763e-03 eta 1:10:07 +epoch [34/200] batch [15/31] time 0.720 (0.781) data 0.000 (0.063) loss 1.4180 (1.3044) acc 75.0000 (67.7083) lr 1.8763e-03 eta 1:07:12 +epoch [34/200] batch [20/31] time 0.714 (0.764) data 0.000 (0.047) loss 1.4775 (1.3242) acc 71.8750 (67.1875) lr 1.8763e-03 eta 1:05:41 +epoch [34/200] batch [25/31] time 0.724 (0.754) data 0.000 (0.038) loss 1.6543 (1.3461) acc 68.7500 (66.7500) lr 1.8763e-03 eta 1:04:43 +epoch [34/200] batch [30/31] time 0.722 (0.747) data 0.000 (0.031) loss 1.6904 (1.3554) acc 56.2500 (66.0417) lr 1.8763e-03 eta 1:04:05 +epoch [35/200] batch [5/31] time 0.708 (0.895) data 0.000 (0.173) loss 1.4434 (1.6496) acc 62.5000 (63.7500) lr 1.8686e-03 eta 1:16:41 +epoch [35/200] batch [10/31] time 0.723 (0.805) data 0.000 (0.087) loss 1.1914 (1.4513) acc 71.8750 (66.5625) lr 1.8686e-03 eta 1:08:53 +epoch [35/200] batch [15/31] time 0.721 (0.775) data 0.000 (0.058) loss 1.2617 (1.4355) acc 81.2500 (66.0417) lr 1.8686e-03 eta 1:06:16 +epoch [35/200] batch [20/31] time 0.706 (0.759) data 0.000 (0.044) loss 1.6260 (1.4639) acc 65.6250 (65.9375) lr 1.8686e-03 eta 1:04:49 +epoch [35/200] batch [25/31] time 0.714 (0.749) data 0.000 (0.035) loss 1.0508 (1.4284) acc 65.6250 (66.1250) lr 1.8686e-03 eta 1:03:56 +epoch [35/200] batch [30/31] time 0.708 (0.743) data 0.000 (0.029) loss 1.3027 (1.4138) acc 62.5000 (65.9375) lr 1.8686e-03 eta 1:03:21 +epoch [36/200] batch [5/31] time 0.709 (0.926) data 0.000 (0.176) loss 1.3301 (1.2494) acc 75.0000 (71.2500) lr 1.8607e-03 eta 1:18:50 +epoch [36/200] batch [10/31] time 0.727 (0.821) data 0.000 (0.088) loss 1.3857 (1.4205) acc 71.8750 (66.5625) lr 1.8607e-03 eta 1:09:50 +epoch [36/200] batch [15/31] time 0.718 (0.785) data 0.000 (0.059) loss 1.8545 (1.4310) acc 50.0000 (65.0000) lr 1.8607e-03 eta 1:06:45 +epoch [36/200] batch [20/31] time 0.721 (0.766) data 0.000 (0.044) loss 0.7578 (1.3446) acc 81.2500 (66.8750) lr 1.8607e-03 eta 1:05:04 +epoch [36/200] batch [25/31] time 0.708 (0.755) data 0.000 (0.035) loss 1.3643 (1.3351) acc 75.0000 (67.8750) lr 1.8607e-03 eta 1:04:05 +epoch [36/200] batch [30/31] time 0.709 (0.748) data 0.000 (0.030) loss 1.3262 (1.3590) acc 59.3750 (66.5625) lr 1.8607e-03 eta 1:03:21 +epoch [37/200] batch [5/31] time 0.712 (0.886) data 0.000 (0.161) loss 1.8818 (1.5181) acc 59.3750 (63.1250) lr 1.8526e-03 eta 1:15:00 +epoch [37/200] batch [10/31] time 0.702 (0.802) data 0.000 (0.081) loss 1.6582 (1.5112) acc 43.7500 (63.4375) lr 1.8526e-03 eta 1:07:46 +epoch [37/200] batch [15/31] time 0.709 (0.772) data 0.000 (0.054) loss 1.5908 (1.4418) acc 62.5000 (64.1667) lr 1.8526e-03 eta 1:05:11 +epoch [37/200] batch [20/31] time 0.704 (0.756) data 0.000 (0.041) loss 1.6328 (1.3729) acc 53.1250 (65.6250) lr 1.8526e-03 eta 1:03:50 +epoch [37/200] batch [25/31] time 0.709 (0.753) data 0.000 (0.032) loss 2.0254 (1.4101) acc 62.5000 (66.0000) lr 1.8526e-03 eta 1:03:28 +epoch [37/200] batch [30/31] time 0.707 (0.745) data 0.000 (0.027) loss 1.3262 (1.4346) acc 65.6250 (65.4167) lr 1.8526e-03 eta 1:02:47 +epoch [38/200] batch [5/31] time 0.702 (0.901) data 0.000 (0.178) loss 1.4600 (1.7178) acc 62.5000 (57.5000) lr 1.8443e-03 eta 1:15:47 +epoch [38/200] batch [10/31] time 0.709 (0.806) data 0.000 (0.089) loss 1.4209 (1.5158) acc 62.5000 (62.1875) lr 1.8443e-03 eta 1:07:44 +epoch [38/200] batch [15/31] time 0.719 (0.775) data 0.000 (0.060) loss 1.5020 (1.4426) acc 59.3750 (63.1250) lr 1.8443e-03 eta 1:05:02 +epoch [38/200] batch [20/31] time 0.716 (0.760) data 0.000 (0.045) loss 1.2598 (1.3960) acc 59.3750 (64.0625) lr 1.8443e-03 eta 1:03:47 +epoch [38/200] batch [25/31] time 0.725 (0.752) data 0.000 (0.036) loss 1.6719 (1.3972) acc 62.5000 (64.0000) lr 1.8443e-03 eta 1:02:58 +epoch [38/200] batch [30/31] time 0.724 (0.746) data 0.000 (0.030) loss 1.3877 (1.3619) acc 65.6250 (65.2083) lr 1.8443e-03 eta 1:02:24 +epoch [39/200] batch [5/31] time 0.710 (0.888) data 0.000 (0.163) loss 1.1162 (1.1290) acc 68.7500 (69.3750) lr 1.8358e-03 eta 1:14:14 +epoch [39/200] batch [10/31] time 0.717 (0.801) data 0.000 (0.082) loss 1.4346 (1.3062) acc 65.6250 (67.8125) lr 1.8358e-03 eta 1:06:55 +epoch [39/200] batch [15/31] time 0.710 (0.771) data 0.000 (0.055) loss 0.9214 (1.2983) acc 78.1250 (66.8750) lr 1.8358e-03 eta 1:04:19 +epoch [39/200] batch [20/31] time 0.712 (0.763) data 0.000 (0.041) loss 0.9746 (1.2859) acc 59.3750 (67.0312) lr 1.8358e-03 eta 1:03:36 +epoch [39/200] batch [25/31] time 0.706 (0.753) data 0.000 (0.033) loss 1.5098 (1.2556) acc 65.6250 (67.6250) lr 1.8358e-03 eta 1:02:40 +epoch [39/200] batch [30/31] time 0.707 (0.746) data 0.000 (0.027) loss 1.2891 (1.2693) acc 65.6250 (67.2917) lr 1.8358e-03 eta 1:02:02 +epoch [40/200] batch [5/31] time 0.709 (0.890) data 0.000 (0.171) loss 0.9771 (1.2476) acc 78.1250 (69.3750) lr 1.8271e-03 eta 1:13:59 +epoch [40/200] batch [10/31] time 0.712 (0.802) data 0.000 (0.086) loss 1.6221 (1.3299) acc 78.1250 (68.7500) lr 1.8271e-03 eta 1:06:35 +epoch [40/200] batch [15/31] time 0.711 (0.773) data 0.000 (0.057) loss 0.9404 (1.2808) acc 65.6250 (67.7083) lr 1.8271e-03 eta 1:04:04 +epoch [40/200] batch [20/31] time 0.708 (0.758) data 0.000 (0.043) loss 1.4453 (1.3503) acc 71.8750 (66.8750) lr 1.8271e-03 eta 1:02:45 +epoch [40/200] batch [25/31] time 0.707 (0.748) data 0.000 (0.034) loss 1.0469 (1.3454) acc 71.8750 (66.3750) lr 1.8271e-03 eta 1:01:55 +epoch [40/200] batch [30/31] time 0.708 (0.742) data 0.000 (0.029) loss 1.2812 (1.3810) acc 68.7500 (66.6667) lr 1.8271e-03 eta 1:01:22 +epoch [41/200] batch [5/31] time 0.711 (0.890) data 0.000 (0.171) loss 1.6348 (1.1295) acc 65.6250 (71.8750) lr 1.8181e-03 eta 1:13:28 +epoch [41/200] batch [10/31] time 0.707 (0.802) data 0.000 (0.086) loss 1.3467 (1.1717) acc 59.3750 (71.2500) lr 1.8181e-03 eta 1:06:08 +epoch [41/200] batch [15/31] time 0.710 (0.773) data 0.000 (0.057) loss 1.1416 (1.1598) acc 71.8750 (71.6667) lr 1.8181e-03 eta 1:03:41 +epoch [41/200] batch [20/31] time 0.705 (0.757) data 0.000 (0.043) loss 1.9580 (1.1895) acc 53.1250 (70.6250) lr 1.8181e-03 eta 1:02:20 +epoch [41/200] batch [25/31] time 0.713 (0.748) data 0.000 (0.034) loss 1.4434 (1.2464) acc 71.8750 (69.3750) lr 1.8181e-03 eta 1:01:31 +epoch [41/200] batch [30/31] time 0.706 (0.741) data 0.000 (0.029) loss 1.2832 (1.2789) acc 75.0000 (68.6458) lr 1.8181e-03 eta 1:00:54 +epoch [42/200] batch [5/31] time 0.715 (0.886) data 0.000 (0.160) loss 1.3018 (1.1553) acc 71.8750 (71.8750) lr 1.8090e-03 eta 1:12:41 +epoch [42/200] batch [10/31] time 0.719 (0.800) data 0.000 (0.080) loss 1.4346 (1.2361) acc 62.5000 (69.0625) lr 1.8090e-03 eta 1:05:36 +epoch [42/200] batch [15/31] time 0.729 (0.782) data 0.000 (0.054) loss 1.3525 (1.2668) acc 62.5000 (68.9583) lr 1.8090e-03 eta 1:04:01 +epoch [42/200] batch [20/31] time 0.716 (0.764) data 0.000 (0.040) loss 1.4668 (1.2613) acc 62.5000 (68.5938) lr 1.8090e-03 eta 1:02:31 +epoch [42/200] batch [25/31] time 0.709 (0.754) data 0.000 (0.032) loss 1.0176 (1.2471) acc 75.0000 (68.8750) lr 1.8090e-03 eta 1:01:37 +epoch [42/200] batch [30/31] time 0.709 (0.747) data 0.000 (0.027) loss 1.5088 (1.2647) acc 62.5000 (68.6458) lr 1.8090e-03 eta 1:00:59 +epoch [43/200] batch [5/31] time 0.719 (0.900) data 0.000 (0.177) loss 1.5967 (1.1591) acc 68.7500 (71.2500) lr 1.7997e-03 eta 1:13:25 +epoch [43/200] batch [10/31] time 0.713 (0.807) data 0.000 (0.089) loss 1.7900 (1.2375) acc 46.8750 (67.5000) lr 1.7997e-03 eta 1:05:43 +epoch [43/200] batch [15/31] time 0.716 (0.776) data 0.001 (0.059) loss 1.5703 (1.3606) acc 62.5000 (65.2083) lr 1.7997e-03 eta 1:03:08 +epoch [43/200] batch [20/31] time 0.718 (0.760) data 0.000 (0.045) loss 1.4688 (1.3949) acc 65.6250 (64.6875) lr 1.7997e-03 eta 1:01:46 +epoch [43/200] batch [25/31] time 0.715 (0.749) data 0.000 (0.036) loss 1.1641 (1.3602) acc 59.3750 (65.8750) lr 1.7997e-03 eta 1:00:51 +epoch [43/200] batch [30/31] time 0.714 (0.743) data 0.000 (0.030) loss 1.9023 (1.3662) acc 59.3750 (65.4167) lr 1.7997e-03 eta 1:00:17 +epoch [44/200] batch [5/31] time 0.723 (0.897) data 0.000 (0.169) loss 0.9995 (1.4472) acc 75.0000 (68.1250) lr 1.7902e-03 eta 1:12:41 +epoch [44/200] batch [10/31] time 0.705 (0.804) data 0.000 (0.085) loss 1.8330 (1.4146) acc 59.3750 (65.9375) lr 1.7902e-03 eta 1:05:04 +epoch [44/200] batch [15/31] time 0.705 (0.773) data 0.000 (0.056) loss 0.8350 (1.3103) acc 75.0000 (67.9167) lr 1.7902e-03 eta 1:02:29 +epoch [44/200] batch [20/31] time 0.699 (0.755) data 0.000 (0.042) loss 2.0781 (1.3262) acc 59.3750 (67.9688) lr 1.7902e-03 eta 1:01:00 +epoch [44/200] batch [25/31] time 0.709 (0.746) data 0.000 (0.034) loss 1.2480 (1.3144) acc 75.0000 (68.2500) lr 1.7902e-03 eta 1:00:14 +epoch [44/200] batch [30/31] time 0.708 (0.740) data 0.000 (0.028) loss 1.2705 (1.3378) acc 68.7500 (67.7083) lr 1.7902e-03 eta 0:59:40 +epoch [45/200] batch [5/31] time 0.704 (0.886) data 0.000 (0.165) loss 1.2656 (1.2112) acc 71.8750 (70.6250) lr 1.7804e-03 eta 1:11:21 +epoch [45/200] batch [10/31] time 0.712 (0.797) data 0.000 (0.082) loss 1.4980 (1.1662) acc 65.6250 (69.0625) lr 1.7804e-03 eta 1:04:06 +epoch [45/200] batch [15/31] time 0.722 (0.769) data 0.000 (0.055) loss 1.4609 (1.2732) acc 59.3750 (69.3750) lr 1.7804e-03 eta 1:01:48 +epoch [45/200] batch [20/31] time 0.711 (0.754) data 0.000 (0.041) loss 1.3906 (1.2818) acc 65.6250 (68.1250) lr 1.7804e-03 eta 1:00:30 +epoch [45/200] batch [25/31] time 0.713 (0.744) data 0.000 (0.033) loss 1.7207 (1.3181) acc 59.3750 (67.8750) lr 1.7804e-03 eta 0:59:39 +epoch [45/200] batch [30/31] time 0.716 (0.738) data 0.000 (0.028) loss 1.1875 (1.3142) acc 68.7500 (68.5417) lr 1.7804e-03 eta 0:59:08 +epoch [46/200] batch [5/31] time 0.714 (0.885) data 0.000 (0.168) loss 1.0039 (1.1536) acc 65.6250 (71.2500) lr 1.7705e-03 eta 1:10:47 +epoch [46/200] batch [10/31] time 0.711 (0.799) data 0.000 (0.084) loss 1.1035 (1.0986) acc 78.1250 (70.9375) lr 1.7705e-03 eta 1:03:50 +epoch [46/200] batch [15/31] time 0.713 (0.770) data 0.000 (0.056) loss 0.8477 (1.1070) acc 81.2500 (72.0833) lr 1.7705e-03 eta 1:01:30 +epoch [46/200] batch [20/31] time 0.711 (0.755) data 0.000 (0.042) loss 1.2178 (1.1507) acc 65.6250 (71.2500) lr 1.7705e-03 eta 1:00:11 +epoch [46/200] batch [25/31] time 0.706 (0.746) data 0.000 (0.034) loss 2.1719 (1.2212) acc 50.0000 (68.7500) lr 1.7705e-03 eta 0:59:24 +epoch [46/200] batch [30/31] time 0.723 (0.740) data 0.000 (0.028) loss 0.9341 (1.2412) acc 75.0000 (68.7500) lr 1.7705e-03 eta 0:58:55 +epoch [47/200] batch [5/31] time 0.723 (0.884) data 0.000 (0.157) loss 1.3408 (1.3288) acc 68.7500 (66.2500) lr 1.7604e-03 eta 1:10:15 +epoch [47/200] batch [10/31] time 0.718 (0.799) data 0.000 (0.079) loss 1.2793 (1.2798) acc 71.8750 (68.1250) lr 1.7604e-03 eta 1:03:26 +epoch [47/200] batch [15/31] time 0.710 (0.780) data 0.000 (0.053) loss 1.3271 (1.2611) acc 68.7500 (68.9583) lr 1.7604e-03 eta 1:01:50 +epoch [47/200] batch [20/31] time 0.705 (0.762) data 0.000 (0.039) loss 1.0596 (1.2573) acc 75.0000 (68.7500) lr 1.7604e-03 eta 1:00:22 +epoch [47/200] batch [25/31] time 0.710 (0.751) data 0.000 (0.032) loss 0.4519 (1.2367) acc 87.5000 (69.7500) lr 1.7604e-03 eta 0:59:28 +epoch [47/200] batch [30/31] time 0.707 (0.744) data 0.000 (0.026) loss 1.0322 (1.2158) acc 62.5000 (69.4792) lr 1.7604e-03 eta 0:58:50 +epoch [48/200] batch [5/31] time 0.710 (0.879) data 0.000 (0.157) loss 1.2891 (1.2121) acc 71.8750 (71.2500) lr 1.7501e-03 eta 1:09:23 +epoch [48/200] batch [10/31] time 0.730 (0.802) data 0.000 (0.079) loss 1.2344 (1.3850) acc 75.0000 (68.7500) lr 1.7501e-03 eta 1:03:13 +epoch [48/200] batch [15/31] time 0.721 (0.773) data 0.000 (0.053) loss 1.1641 (1.3187) acc 71.8750 (70.4167) lr 1.7501e-03 eta 1:00:56 +epoch [48/200] batch [20/31] time 0.709 (0.758) data 0.000 (0.040) loss 1.6045 (1.3734) acc 59.3750 (68.1250) lr 1.7501e-03 eta 0:59:39 +epoch [48/200] batch [25/31] time 0.706 (0.748) data 0.000 (0.032) loss 1.1455 (1.3869) acc 78.1250 (67.8750) lr 1.7501e-03 eta 0:58:50 +epoch [48/200] batch [30/31] time 0.707 (0.742) data 0.000 (0.026) loss 1.5596 (1.3716) acc 68.7500 (68.3333) lr 1.7501e-03 eta 0:58:17 +epoch [49/200] batch [5/31] time 0.719 (0.934) data 0.000 (0.181) loss 1.0127 (1.2389) acc 75.0000 (70.0000) lr 1.7396e-03 eta 1:13:18 +epoch [49/200] batch [10/31] time 0.727 (0.824) data 0.000 (0.091) loss 1.6055 (1.4552) acc 65.6250 (65.0000) lr 1.7396e-03 eta 1:04:34 +epoch [49/200] batch [15/31] time 0.706 (0.785) data 0.000 (0.061) loss 1.7207 (1.5197) acc 46.8750 (62.9167) lr 1.7396e-03 eta 1:01:28 +epoch [49/200] batch [20/31] time 0.711 (0.766) data 0.000 (0.046) loss 1.8984 (1.6438) acc 56.2500 (61.5625) lr 1.7396e-03 eta 0:59:53 +epoch [49/200] batch [25/31] time 0.704 (0.754) data 0.000 (0.036) loss 1.4150 (1.6191) acc 65.6250 (62.5000) lr 1.7396e-03 eta 0:58:55 +epoch [49/200] batch [30/31] time 0.704 (0.747) data 0.000 (0.030) loss 1.7266 (1.6575) acc 59.3750 (61.4583) lr 1.7396e-03 eta 0:58:16 +epoch [50/200] batch [5/31] time 0.714 (0.901) data 0.001 (0.171) loss 2.0078 (1.6591) acc 56.2500 (60.0000) lr 1.7290e-03 eta 1:10:11 +epoch [50/200] batch [10/31] time 0.703 (0.806) data 0.000 (0.086) loss 1.4707 (1.4701) acc 65.6250 (64.0625) lr 1.7290e-03 eta 1:02:43 +epoch [50/200] batch [15/31] time 0.712 (0.775) data 0.001 (0.057) loss 1.7910 (1.6411) acc 65.6250 (61.6667) lr 1.7290e-03 eta 1:00:16 +epoch [50/200] batch [20/31] time 0.733 (0.760) data 0.000 (0.043) loss 1.2900 (1.5617) acc 65.6250 (62.5000) lr 1.7290e-03 eta 0:59:04 +epoch [50/200] batch [25/31] time 0.710 (0.751) data 0.000 (0.034) loss 1.5938 (1.5311) acc 59.3750 (62.7500) lr 1.7290e-03 eta 0:58:17 +epoch [50/200] batch [30/31] time 0.712 (0.745) data 0.000 (0.029) loss 1.3008 (1.4811) acc 71.8750 (64.5833) lr 1.7290e-03 eta 0:57:44 +epoch [51/200] batch [5/31] time 0.710 (0.882) data 0.000 (0.162) loss 1.6240 (1.3512) acc 59.3750 (65.0000) lr 1.7181e-03 eta 1:08:15 +epoch [51/200] batch [10/31] time 0.716 (0.811) data 0.001 (0.081) loss 1.6514 (1.2967) acc 62.5000 (65.9375) lr 1.7181e-03 eta 1:02:45 +epoch [51/200] batch [15/31] time 0.714 (0.779) data 0.000 (0.054) loss 1.3975 (1.3299) acc 65.6250 (66.2500) lr 1.7181e-03 eta 1:00:10 +epoch [51/200] batch [20/31] time 0.724 (0.763) data 0.000 (0.041) loss 0.9966 (1.3583) acc 71.8750 (65.0000) lr 1.7181e-03 eta 0:58:52 +epoch [51/200] batch [25/31] time 0.710 (0.752) data 0.000 (0.033) loss 0.9307 (1.3385) acc 62.5000 (65.6250) lr 1.7181e-03 eta 0:57:58 +epoch [51/200] batch [30/31] time 0.729 (0.746) data 0.000 (0.027) loss 2.7930 (1.3603) acc 43.7500 (65.7292) lr 1.7181e-03 eta 0:57:24 +epoch [52/200] batch [5/31] time 0.722 (0.905) data 0.000 (0.184) loss 1.2236 (1.3674) acc 68.7500 (67.5000) lr 1.7071e-03 eta 1:09:37 +epoch [52/200] batch [10/31] time 0.712 (0.811) data 0.000 (0.092) loss 1.4014 (1.2605) acc 68.7500 (70.6250) lr 1.7071e-03 eta 1:02:17 +epoch [52/200] batch [15/31] time 0.708 (0.778) data 0.000 (0.061) loss 1.1299 (1.2772) acc 75.0000 (71.4583) lr 1.7071e-03 eta 0:59:41 +epoch [52/200] batch [20/31] time 0.712 (0.761) data 0.000 (0.046) loss 1.6807 (1.3192) acc 56.2500 (69.3750) lr 1.7071e-03 eta 0:58:19 +epoch [52/200] batch [25/31] time 0.709 (0.751) data 0.000 (0.037) loss 1.5166 (1.3456) acc 68.7500 (68.3750) lr 1.7071e-03 eta 0:57:31 +epoch [52/200] batch [30/31] time 0.709 (0.745) data 0.000 (0.031) loss 1.6934 (1.3514) acc 59.3750 (68.0208) lr 1.7071e-03 eta 0:56:58 +epoch [53/200] batch [5/31] time 0.720 (0.900) data 0.000 (0.173) loss 1.5674 (1.3682) acc 56.2500 (64.3750) lr 1.6959e-03 eta 1:08:44 +epoch [53/200] batch [10/31] time 0.712 (0.806) data 0.000 (0.087) loss 1.0547 (1.3753) acc 71.8750 (63.7500) lr 1.6959e-03 eta 1:01:30 +epoch [53/200] batch [15/31] time 0.711 (0.775) data 0.000 (0.058) loss 1.5947 (1.3698) acc 59.3750 (65.8333) lr 1.6959e-03 eta 0:59:02 +epoch [53/200] batch [20/31] time 0.713 (0.759) data 0.000 (0.044) loss 1.7773 (1.3995) acc 62.5000 (67.1875) lr 1.6959e-03 eta 0:57:45 +epoch [53/200] batch [25/31] time 0.708 (0.749) data 0.000 (0.035) loss 0.9033 (1.3669) acc 71.8750 (67.7500) lr 1.6959e-03 eta 0:56:56 +epoch [53/200] batch [30/31] time 0.709 (0.742) data 0.000 (0.029) loss 1.2061 (1.3483) acc 71.8750 (68.0208) lr 1.6959e-03 eta 0:56:23 +epoch [54/200] batch [5/31] time 0.722 (0.899) data 0.000 (0.168) loss 1.4404 (1.2627) acc 65.6250 (69.3750) lr 1.6845e-03 eta 1:08:11 +epoch [54/200] batch [10/31] time 0.708 (0.806) data 0.000 (0.084) loss 1.8086 (1.2527) acc 59.3750 (69.6875) lr 1.6845e-03 eta 1:01:04 +epoch [54/200] batch [15/31] time 0.718 (0.775) data 0.000 (0.056) loss 2.5430 (1.3552) acc 50.0000 (67.7083) lr 1.6845e-03 eta 0:58:39 +epoch [54/200] batch [20/31] time 0.721 (0.761) data 0.000 (0.042) loss 1.2158 (1.2867) acc 71.8750 (69.2188) lr 1.6845e-03 eta 0:57:33 +epoch [54/200] batch [25/31] time 0.715 (0.752) data 0.000 (0.034) loss 0.6274 (1.2499) acc 81.2500 (70.1250) lr 1.6845e-03 eta 0:56:47 +epoch [54/200] batch [30/31] time 0.718 (0.745) data 0.000 (0.028) loss 0.6313 (1.2275) acc 81.2500 (70.5208) lr 1.6845e-03 eta 0:56:12 +epoch [55/200] batch [5/31] time 0.712 (0.901) data 0.000 (0.179) loss 1.0957 (1.0381) acc 71.8750 (74.3750) lr 1.6730e-03 eta 1:07:51 +epoch [55/200] batch [10/31] time 0.709 (0.809) data 0.000 (0.090) loss 0.8735 (1.1243) acc 84.3750 (73.1250) lr 1.6730e-03 eta 1:00:53 +epoch [55/200] batch [15/31] time 0.711 (0.777) data 0.001 (0.060) loss 1.4219 (1.1931) acc 71.8750 (71.8750) lr 1.6730e-03 eta 0:58:25 +epoch [55/200] batch [20/31] time 0.726 (0.763) data 0.000 (0.045) loss 1.4580 (1.2529) acc 71.8750 (69.8438) lr 1.6730e-03 eta 0:57:16 +epoch [55/200] batch [25/31] time 0.707 (0.752) data 0.000 (0.036) loss 1.1826 (1.2750) acc 71.8750 (69.0000) lr 1.6730e-03 eta 0:56:24 +epoch [55/200] batch [30/31] time 0.707 (0.745) data 0.000 (0.030) loss 1.2119 (1.2451) acc 75.0000 (69.7917) lr 1.6730e-03 eta 0:55:49 +epoch [56/200] batch [5/31] time 0.728 (0.899) data 0.001 (0.172) loss 1.7539 (1.4712) acc 59.3750 (63.7500) lr 1.6613e-03 eta 1:07:18 +epoch [56/200] batch [10/31] time 0.716 (0.818) data 0.000 (0.086) loss 1.6367 (1.4598) acc 56.2500 (65.6250) lr 1.6613e-03 eta 1:01:08 +epoch [56/200] batch [15/31] time 0.736 (0.792) data 0.000 (0.058) loss 1.6270 (1.3674) acc 59.3750 (66.6667) lr 1.6613e-03 eta 0:59:09 +epoch [56/200] batch [20/31] time 0.722 (0.778) data 0.000 (0.043) loss 1.7754 (1.3502) acc 62.5000 (67.3438) lr 1.6613e-03 eta 0:58:01 +epoch [56/200] batch [25/31] time 0.852 (0.772) data 0.000 (0.035) loss 1.6074 (1.3583) acc 59.3750 (67.6250) lr 1.6613e-03 eta 0:57:30 +epoch [56/200] batch [30/31] time 0.731 (0.763) data 0.000 (0.029) loss 0.9287 (1.3148) acc 78.1250 (68.7500) lr 1.6613e-03 eta 0:56:48 +epoch [57/200] batch [5/31] time 0.706 (0.902) data 0.000 (0.180) loss 1.4941 (1.5188) acc 65.6250 (65.0000) lr 1.6494e-03 eta 1:07:01 +epoch [57/200] batch [10/31] time 0.706 (0.819) data 0.000 (0.090) loss 1.7949 (1.4789) acc 62.5000 (65.9375) lr 1.6494e-03 eta 1:00:48 +epoch [57/200] batch [15/31] time 0.711 (0.783) data 0.000 (0.060) loss 0.8442 (1.3643) acc 81.2500 (67.9167) lr 1.6494e-03 eta 0:58:01 +epoch [57/200] batch [20/31] time 0.709 (0.765) data 0.000 (0.045) loss 0.7666 (1.2658) acc 81.2500 (69.8438) lr 1.6494e-03 eta 0:56:38 +epoch [57/200] batch [25/31] time 0.710 (0.754) data 0.000 (0.036) loss 1.1465 (1.2260) acc 71.8750 (70.1250) lr 1.6494e-03 eta 0:55:46 +epoch [57/200] batch [30/31] time 0.705 (0.747) data 0.000 (0.030) loss 1.3359 (1.2215) acc 78.1250 (70.8333) lr 1.6494e-03 eta 0:55:10 +epoch [58/200] batch [5/31] time 0.723 (0.928) data 0.000 (0.207) loss 1.1631 (0.9800) acc 71.8750 (75.6250) lr 1.6374e-03 eta 1:08:28 +epoch [58/200] batch [10/31] time 0.719 (0.821) data 0.000 (0.104) loss 1.4082 (1.1567) acc 65.6250 (73.4375) lr 1.6374e-03 eta 1:00:31 +epoch [58/200] batch [15/31] time 0.710 (0.785) data 0.000 (0.069) loss 1.7363 (1.2286) acc 65.6250 (70.8333) lr 1.6374e-03 eta 0:57:46 +epoch [58/200] batch [20/31] time 0.723 (0.767) data 0.000 (0.052) loss 0.8057 (1.2072) acc 81.2500 (72.0312) lr 1.6374e-03 eta 0:56:25 +epoch [58/200] batch [25/31] time 0.709 (0.756) data 0.000 (0.042) loss 0.7881 (1.2406) acc 84.3750 (71.6250) lr 1.6374e-03 eta 0:55:32 +epoch [58/200] batch [30/31] time 0.717 (0.753) data 0.000 (0.035) loss 1.3125 (1.2317) acc 59.3750 (71.0417) lr 1.6374e-03 eta 0:55:14 +epoch [59/200] batch [5/31] time 0.714 (0.888) data 0.000 (0.161) loss 1.1045 (1.1432) acc 78.1250 (66.8750) lr 1.6252e-03 eta 1:05:05 +epoch [59/200] batch [10/31] time 0.704 (0.803) data 0.000 (0.081) loss 0.7183 (1.0753) acc 84.3750 (70.9375) lr 1.6252e-03 eta 0:58:47 +epoch [59/200] batch [15/31] time 0.719 (0.774) data 0.000 (0.054) loss 1.8057 (1.0909) acc 62.5000 (70.6250) lr 1.6252e-03 eta 0:56:35 +epoch [59/200] batch [20/31] time 0.707 (0.759) data 0.000 (0.041) loss 0.8945 (1.1313) acc 81.2500 (70.4688) lr 1.6252e-03 eta 0:55:26 +epoch [59/200] batch [25/31] time 0.713 (0.751) data 0.000 (0.033) loss 1.6650 (1.1644) acc 65.6250 (70.2500) lr 1.6252e-03 eta 0:54:47 +epoch [59/200] batch [30/31] time 0.713 (0.745) data 0.000 (0.027) loss 1.4668 (1.1752) acc 62.5000 (70.3125) lr 1.6252e-03 eta 0:54:15 +epoch [60/200] batch [5/31] time 0.704 (0.913) data 0.000 (0.185) loss 1.3252 (1.1565) acc 65.6250 (74.3750) lr 1.6129e-03 eta 1:06:25 +epoch [60/200] batch [10/31] time 0.724 (0.814) data 0.001 (0.093) loss 1.2539 (1.1555) acc 65.6250 (72.5000) lr 1.6129e-03 eta 0:59:07 +epoch [60/200] batch [15/31] time 0.707 (0.780) data 0.000 (0.062) loss 1.1582 (1.1199) acc 62.5000 (72.9167) lr 1.6129e-03 eta 0:56:36 +epoch [60/200] batch [20/31] time 0.732 (0.763) data 0.000 (0.047) loss 0.9321 (1.1024) acc 75.0000 (72.3438) lr 1.6129e-03 eta 0:55:21 +epoch [60/200] batch [25/31] time 0.706 (0.753) data 0.000 (0.037) loss 1.2012 (1.1240) acc 68.7500 (71.8750) lr 1.6129e-03 eta 0:54:34 +epoch [60/200] batch [30/31] time 0.718 (0.752) data 0.000 (0.031) loss 1.2559 (1.1475) acc 68.7500 (70.9375) lr 1.6129e-03 eta 0:54:25 +epoch [61/200] batch [5/31] time 0.710 (0.874) data 0.001 (0.155) loss 1.2061 (1.2390) acc 65.6250 (73.7500) lr 1.6004e-03 eta 1:03:07 +epoch [61/200] batch [10/31] time 0.703 (0.791) data 0.000 (0.078) loss 0.8306 (1.2485) acc 71.8750 (70.0000) lr 1.6004e-03 eta 0:57:04 +epoch [61/200] batch [15/31] time 0.710 (0.764) data 0.000 (0.052) loss 1.2930 (1.1924) acc 65.6250 (69.5833) lr 1.6004e-03 eta 0:55:02 +epoch [61/200] batch [20/31] time 0.704 (0.750) data 0.000 (0.039) loss 1.4248 (1.2516) acc 62.5000 (68.1250) lr 1.6004e-03 eta 0:53:58 +epoch [61/200] batch [25/31] time 0.709 (0.742) data 0.000 (0.031) loss 1.0967 (1.2729) acc 71.8750 (67.6250) lr 1.6004e-03 eta 0:53:20 +epoch [61/200] batch [30/31] time 0.706 (0.737) data 0.000 (0.026) loss 1.0371 (1.2612) acc 68.7500 (67.8125) lr 1.6004e-03 eta 0:52:55 +epoch [62/200] batch [5/31] time 0.729 (0.897) data 0.001 (0.167) loss 1.1768 (1.2534) acc 68.7500 (67.5000) lr 1.5878e-03 eta 1:04:19 +epoch [62/200] batch [10/31] time 0.729 (0.806) data 0.001 (0.084) loss 1.4492 (1.2541) acc 62.5000 (70.3125) lr 1.5878e-03 eta 0:57:46 +epoch [62/200] batch [15/31] time 0.716 (0.777) data 0.000 (0.056) loss 1.0947 (1.1755) acc 75.0000 (72.5000) lr 1.5878e-03 eta 0:55:34 +epoch [62/200] batch [20/31] time 0.710 (0.760) data 0.000 (0.042) loss 0.8398 (1.2032) acc 81.2500 (71.5625) lr 1.5878e-03 eta 0:54:18 +epoch [62/200] batch [25/31] time 0.709 (0.750) data 0.000 (0.034) loss 1.3506 (1.1648) acc 68.7500 (72.1250) lr 1.5878e-03 eta 0:53:33 +epoch [62/200] batch [30/31] time 0.709 (0.743) data 0.000 (0.028) loss 0.6885 (1.1580) acc 84.3750 (71.6667) lr 1.5878e-03 eta 0:53:00 +epoch [63/200] batch [5/31] time 0.722 (0.897) data 0.001 (0.170) loss 1.2822 (1.2255) acc 71.8750 (75.0000) lr 1.5750e-03 eta 1:03:53 +epoch [63/200] batch [10/31] time 0.715 (0.804) data 0.000 (0.085) loss 1.2012 (1.2581) acc 62.5000 (72.1875) lr 1.5750e-03 eta 0:57:11 +epoch [63/200] batch [15/31] time 0.721 (0.773) data 0.000 (0.057) loss 1.7900 (1.2564) acc 53.1250 (70.8333) lr 1.5750e-03 eta 0:54:56 +epoch [63/200] batch [20/31] time 0.726 (0.759) data 0.000 (0.043) loss 1.7500 (1.2675) acc 56.2500 (69.6875) lr 1.5750e-03 eta 0:53:51 +epoch [63/200] batch [25/31] time 0.708 (0.749) data 0.000 (0.034) loss 1.4502 (1.2679) acc 65.6250 (69.8750) lr 1.5750e-03 eta 0:53:04 +epoch [63/200] batch [30/31] time 0.707 (0.742) data 0.000 (0.029) loss 1.3506 (1.2491) acc 65.6250 (70.4167) lr 1.5750e-03 eta 0:52:32 +epoch [64/200] batch [5/31] time 0.724 (0.886) data 0.000 (0.161) loss 1.5166 (1.3912) acc 75.0000 (67.5000) lr 1.5621e-03 eta 1:02:39 +epoch [64/200] batch [10/31] time 0.714 (0.800) data 0.000 (0.081) loss 1.3887 (1.1825) acc 65.6250 (70.3125) lr 1.5621e-03 eta 0:56:28 +epoch [64/200] batch [15/31] time 0.708 (0.781) data 0.000 (0.054) loss 0.5806 (1.1611) acc 75.0000 (70.2083) lr 1.5621e-03 eta 0:55:03 +epoch [64/200] batch [20/31] time 0.711 (0.764) data 0.000 (0.040) loss 1.9893 (1.1782) acc 53.1250 (70.9375) lr 1.5621e-03 eta 0:53:49 +epoch [64/200] batch [25/31] time 0.709 (0.754) data 0.000 (0.032) loss 1.0713 (1.1938) acc 68.7500 (70.5000) lr 1.5621e-03 eta 0:53:03 +epoch [64/200] batch [30/31] time 0.702 (0.747) data 0.000 (0.027) loss 1.1152 (1.1606) acc 81.2500 (71.7708) lr 1.5621e-03 eta 0:52:29 +epoch [65/200] batch [5/31] time 0.711 (0.963) data 0.000 (0.237) loss 1.3691 (1.0904) acc 65.6250 (73.7500) lr 1.5490e-03 eta 1:07:35 +epoch [65/200] batch [10/31] time 0.712 (0.839) data 0.000 (0.119) loss 1.5547 (1.1688) acc 65.6250 (70.6250) lr 1.5490e-03 eta 0:58:47 +epoch [65/200] batch [15/31] time 0.710 (0.797) data 0.000 (0.079) loss 1.1680 (1.1847) acc 68.7500 (71.6667) lr 1.5490e-03 eta 0:55:46 +epoch [65/200] batch [20/31] time 0.712 (0.775) data 0.000 (0.060) loss 1.1748 (1.1587) acc 68.7500 (72.1875) lr 1.5490e-03 eta 0:54:10 +epoch [65/200] batch [25/31] time 0.706 (0.763) data 0.000 (0.048) loss 1.1582 (1.1853) acc 68.7500 (71.0000) lr 1.5490e-03 eta 0:53:17 +epoch [65/200] batch [30/31] time 0.709 (0.754) data 0.000 (0.040) loss 1.1377 (1.1996) acc 65.6250 (70.3125) lr 1.5490e-03 eta 0:52:36 +epoch [66/200] batch [5/31] time 0.733 (0.893) data 0.000 (0.168) loss 0.8032 (1.0429) acc 84.3750 (78.1250) lr 1.5358e-03 eta 1:02:12 +epoch [66/200] batch [10/31] time 0.710 (0.803) data 0.000 (0.084) loss 1.0771 (1.0712) acc 71.8750 (75.3125) lr 1.5358e-03 eta 0:55:53 +epoch [66/200] batch [15/31] time 0.716 (0.774) data 0.000 (0.056) loss 0.7388 (1.0677) acc 81.2500 (75.4167) lr 1.5358e-03 eta 0:53:48 +epoch [66/200] batch [20/31] time 0.711 (0.759) data 0.000 (0.042) loss 1.0166 (1.1056) acc 71.8750 (73.7500) lr 1.5358e-03 eta 0:52:40 +epoch [66/200] batch [25/31] time 0.707 (0.749) data 0.000 (0.034) loss 0.9497 (1.1310) acc 75.0000 (73.0000) lr 1.5358e-03 eta 0:51:55 +epoch [66/200] batch [30/31] time 0.704 (0.742) data 0.000 (0.028) loss 1.5029 (1.1226) acc 68.7500 (73.6458) lr 1.5358e-03 eta 0:51:24 +epoch [67/200] batch [5/31] time 0.712 (0.972) data 0.000 (0.225) loss 1.1865 (0.9486) acc 78.1250 (74.3750) lr 1.5225e-03 eta 1:07:12 +epoch [67/200] batch [10/31] time 0.718 (0.845) data 0.000 (0.113) loss 1.1553 (1.0389) acc 71.8750 (74.6875) lr 1.5225e-03 eta 0:58:19 +epoch [67/200] batch [15/31] time 0.726 (0.803) data 0.001 (0.075) loss 1.0449 (1.0841) acc 59.3750 (72.7083) lr 1.5225e-03 eta 0:55:23 +epoch [67/200] batch [20/31] time 0.707 (0.779) data 0.000 (0.056) loss 1.2920 (1.1082) acc 75.0000 (72.5000) lr 1.5225e-03 eta 0:53:42 +epoch [67/200] batch [25/31] time 0.708 (0.766) data 0.000 (0.045) loss 0.8125 (1.0769) acc 78.1250 (72.7500) lr 1.5225e-03 eta 0:52:40 +epoch [67/200] batch [30/31] time 0.709 (0.756) data 0.000 (0.038) loss 1.1875 (1.0978) acc 71.8750 (72.3958) lr 1.5225e-03 eta 0:51:58 +epoch [68/200] batch [5/31] time 0.713 (0.891) data 0.000 (0.172) loss 2.1055 (1.2907) acc 59.3750 (71.2500) lr 1.5090e-03 eta 1:01:08 +epoch [68/200] batch [10/31] time 0.710 (0.801) data 0.000 (0.086) loss 1.5322 (1.2720) acc 75.0000 (72.5000) lr 1.5090e-03 eta 0:54:52 +epoch [68/200] batch [15/31] time 0.726 (0.772) data 0.000 (0.058) loss 0.9443 (1.2511) acc 71.8750 (71.6667) lr 1.5090e-03 eta 0:52:52 +epoch [68/200] batch [20/31] time 0.709 (0.757) data 0.000 (0.043) loss 0.6562 (1.1742) acc 78.1250 (71.8750) lr 1.5090e-03 eta 0:51:44 +epoch [68/200] batch [25/31] time 0.706 (0.753) data 0.000 (0.035) loss 1.3545 (1.1470) acc 71.8750 (72.2500) lr 1.5090e-03 eta 0:51:24 +epoch [68/200] batch [30/31] time 0.705 (0.745) data 0.000 (0.029) loss 0.9175 (1.1403) acc 84.3750 (72.1875) lr 1.5090e-03 eta 0:50:50 +epoch [69/200] batch [5/31] time 0.726 (0.891) data 0.000 (0.168) loss 1.0127 (1.2826) acc 68.7500 (70.6250) lr 1.4955e-03 eta 1:00:42 +epoch [69/200] batch [10/31] time 0.722 (0.804) data 0.000 (0.084) loss 0.5190 (1.2032) acc 93.7500 (72.1875) lr 1.4955e-03 eta 0:54:43 +epoch [69/200] batch [15/31] time 0.713 (0.773) data 0.000 (0.056) loss 0.9756 (1.1885) acc 75.0000 (71.4583) lr 1.4955e-03 eta 0:52:32 +epoch [69/200] batch [20/31] time 0.710 (0.758) data 0.000 (0.042) loss 1.2979 (1.1266) acc 65.6250 (72.0312) lr 1.4955e-03 eta 0:51:28 +epoch [69/200] batch [25/31] time 0.709 (0.750) data 0.000 (0.034) loss 1.5068 (1.1386) acc 65.6250 (71.7500) lr 1.4955e-03 eta 0:50:49 +epoch [69/200] batch [30/31] time 0.710 (0.744) data 0.000 (0.028) loss 1.5107 (1.1707) acc 65.6250 (70.4167) lr 1.4955e-03 eta 0:50:20 +epoch [70/200] batch [5/31] time 0.712 (0.881) data 0.000 (0.157) loss 1.4531 (1.2404) acc 71.8750 (76.2500) lr 1.4818e-03 eta 0:59:32 +epoch [70/200] batch [10/31] time 0.726 (0.798) data 0.000 (0.079) loss 1.3262 (1.2332) acc 68.7500 (71.5625) lr 1.4818e-03 eta 0:53:53 +epoch [70/200] batch [15/31] time 0.713 (0.770) data 0.000 (0.053) loss 1.2764 (1.1627) acc 65.6250 (72.7083) lr 1.4818e-03 eta 0:51:54 +epoch [70/200] batch [20/31] time 0.725 (0.763) data 0.000 (0.039) loss 1.3604 (1.1542) acc 62.5000 (72.8125) lr 1.4818e-03 eta 0:51:22 +epoch [70/200] batch [25/31] time 0.708 (0.752) data 0.000 (0.032) loss 1.3369 (1.1453) acc 75.0000 (74.0000) lr 1.4818e-03 eta 0:50:36 +epoch [70/200] batch [30/31] time 0.718 (0.746) data 0.000 (0.026) loss 0.4058 (1.1477) acc 87.5000 (73.6458) lr 1.4818e-03 eta 0:50:08 +epoch [71/200] batch [5/31] time 0.726 (0.884) data 0.000 (0.161) loss 1.0918 (1.1428) acc 75.0000 (73.1250) lr 1.4679e-03 eta 0:59:18 +epoch [71/200] batch [10/31] time 0.721 (0.802) data 0.000 (0.081) loss 0.9180 (1.0507) acc 71.8750 (73.7500) lr 1.4679e-03 eta 0:53:42 +epoch [71/200] batch [15/31] time 0.714 (0.772) data 0.000 (0.054) loss 1.3096 (1.1071) acc 78.1250 (74.5833) lr 1.4679e-03 eta 0:51:41 +epoch [71/200] batch [20/31] time 0.723 (0.758) data 0.000 (0.041) loss 1.0059 (1.0864) acc 84.3750 (75.9375) lr 1.4679e-03 eta 0:50:38 +epoch [71/200] batch [25/31] time 0.722 (0.749) data 0.000 (0.033) loss 1.3906 (1.0636) acc 65.6250 (76.2500) lr 1.4679e-03 eta 0:50:01 +epoch [71/200] batch [30/31] time 0.710 (0.744) data 0.000 (0.027) loss 1.0293 (1.0926) acc 78.1250 (75.7292) lr 1.4679e-03 eta 0:49:35 +epoch [72/200] batch [5/31] time 0.707 (0.889) data 0.000 (0.166) loss 1.1924 (0.9493) acc 75.0000 (77.5000) lr 1.4540e-03 eta 0:59:10 +epoch [72/200] batch [10/31] time 0.712 (0.801) data 0.001 (0.083) loss 0.4585 (0.9887) acc 87.5000 (76.5625) lr 1.4540e-03 eta 0:53:14 +epoch [72/200] batch [15/31] time 0.712 (0.770) data 0.000 (0.055) loss 1.0586 (0.9752) acc 68.7500 (76.4583) lr 1.4540e-03 eta 0:51:08 +epoch [72/200] batch [20/31] time 0.710 (0.755) data 0.000 (0.042) loss 1.4023 (0.9987) acc 59.3750 (75.3125) lr 1.4540e-03 eta 0:50:02 +epoch [72/200] batch [25/31] time 0.706 (0.745) data 0.000 (0.033) loss 0.5957 (1.0267) acc 81.2500 (74.5000) lr 1.4540e-03 eta 0:49:20 +epoch [72/200] batch [30/31] time 0.707 (0.739) data 0.000 (0.028) loss 0.8555 (1.0158) acc 75.0000 (74.3750) lr 1.4540e-03 eta 0:48:52 +epoch [73/200] batch [5/31] time 0.710 (0.899) data 0.000 (0.179) loss 1.3838 (1.1236) acc 65.6250 (71.2500) lr 1.4399e-03 eta 0:59:22 +epoch [73/200] batch [10/31] time 0.710 (0.805) data 0.000 (0.090) loss 0.8535 (1.0756) acc 75.0000 (71.8750) lr 1.4399e-03 eta 0:53:06 +epoch [73/200] batch [15/31] time 0.708 (0.783) data 0.000 (0.060) loss 0.9160 (1.1197) acc 75.0000 (72.2917) lr 1.4399e-03 eta 0:51:34 +epoch [73/200] batch [20/31] time 0.704 (0.764) data 0.000 (0.045) loss 0.8955 (1.1320) acc 84.3750 (72.3438) lr 1.4399e-03 eta 0:50:14 +epoch [73/200] batch [25/31] time 0.709 (0.753) data 0.000 (0.036) loss 1.1445 (1.1663) acc 75.0000 (72.1250) lr 1.4399e-03 eta 0:49:27 +epoch [73/200] batch [30/31] time 0.708 (0.746) data 0.000 (0.030) loss 1.1572 (1.1476) acc 81.2500 (72.3958) lr 1.4399e-03 eta 0:48:57 +epoch [74/200] batch [5/31] time 0.707 (0.890) data 0.000 (0.170) loss 1.2764 (1.1547) acc 75.0000 (73.7500) lr 1.4258e-03 eta 0:58:18 +epoch [74/200] batch [10/31] time 0.718 (0.803) data 0.000 (0.085) loss 0.8662 (1.0787) acc 75.0000 (72.8125) lr 1.4258e-03 eta 0:52:34 +epoch [74/200] batch [15/31] time 0.711 (0.772) data 0.000 (0.057) loss 1.0127 (1.0847) acc 78.1250 (72.9167) lr 1.4258e-03 eta 0:50:27 +epoch [74/200] batch [20/31] time 0.706 (0.757) data 0.000 (0.043) loss 0.5728 (1.0426) acc 87.5000 (74.5312) lr 1.4258e-03 eta 0:49:25 +epoch [74/200] batch [25/31] time 0.705 (0.748) data 0.000 (0.034) loss 1.0947 (1.0496) acc 81.2500 (75.3750) lr 1.4258e-03 eta 0:48:45 +epoch [74/200] batch [30/31] time 0.721 (0.742) data 0.000 (0.029) loss 1.1523 (1.0580) acc 62.5000 (74.2708) lr 1.4258e-03 eta 0:48:19 +epoch [75/200] batch [5/31] time 0.713 (0.885) data 0.000 (0.161) loss 1.1729 (1.0305) acc 75.0000 (71.2500) lr 1.4115e-03 eta 0:57:31 +epoch [75/200] batch [10/31] time 0.706 (0.798) data 0.000 (0.081) loss 0.8760 (1.0029) acc 81.2500 (73.1250) lr 1.4115e-03 eta 0:51:48 +epoch [75/200] batch [15/31] time 0.703 (0.770) data 0.000 (0.054) loss 0.5889 (1.0115) acc 75.0000 (73.5417) lr 1.4115e-03 eta 0:49:56 +epoch [75/200] batch [20/31] time 0.703 (0.756) data 0.000 (0.041) loss 1.6006 (1.0301) acc 65.6250 (73.5938) lr 1.4115e-03 eta 0:48:59 +epoch [75/200] batch [25/31] time 0.707 (0.747) data 0.000 (0.033) loss 0.9731 (1.0668) acc 75.0000 (73.3750) lr 1.4115e-03 eta 0:48:19 +epoch [75/200] batch [30/31] time 0.710 (0.741) data 0.000 (0.027) loss 1.5156 (1.0694) acc 71.8750 (73.9583) lr 1.4115e-03 eta 0:47:52 +epoch [76/200] batch [5/31] time 0.709 (0.879) data 0.000 (0.162) loss 0.8374 (0.8097) acc 81.2500 (79.3750) lr 1.3971e-03 eta 0:56:40 +epoch [76/200] batch [10/31] time 0.715 (0.797) data 0.000 (0.081) loss 1.4219 (0.9579) acc 65.6250 (75.0000) lr 1.3971e-03 eta 0:51:19 +epoch [76/200] batch [15/31] time 0.711 (0.768) data 0.000 (0.054) loss 1.0830 (0.9690) acc 71.8750 (74.7917) lr 1.3971e-03 eta 0:49:25 +epoch [76/200] batch [20/31] time 0.709 (0.753) data 0.000 (0.041) loss 1.9854 (1.0683) acc 62.5000 (73.7500) lr 1.3971e-03 eta 0:48:24 +epoch [76/200] batch [25/31] time 0.712 (0.746) data 0.000 (0.033) loss 1.0820 (1.0657) acc 78.1250 (74.2500) lr 1.3971e-03 eta 0:47:50 +epoch [76/200] batch [30/31] time 0.708 (0.739) data 0.000 (0.027) loss 0.7529 (1.0366) acc 71.8750 (74.2708) lr 1.3971e-03 eta 0:47:23 +epoch [77/200] batch [5/31] time 0.711 (0.885) data 0.000 (0.163) loss 1.1133 (0.8532) acc 78.1250 (81.2500) lr 1.3827e-03 eta 0:56:36 +epoch [77/200] batch [10/31] time 0.718 (0.804) data 0.000 (0.082) loss 0.7148 (0.8771) acc 81.2500 (77.1875) lr 1.3827e-03 eta 0:51:23 +epoch [77/200] batch [15/31] time 0.712 (0.775) data 0.000 (0.055) loss 0.8052 (0.8955) acc 78.1250 (78.1250) lr 1.3827e-03 eta 0:49:25 +epoch [77/200] batch [20/31] time 0.709 (0.760) data 0.000 (0.041) loss 0.9521 (0.9423) acc 65.6250 (77.0312) lr 1.3827e-03 eta 0:48:25 +epoch [77/200] batch [25/31] time 0.707 (0.750) data 0.000 (0.033) loss 0.7485 (0.9413) acc 84.3750 (77.5000) lr 1.3827e-03 eta 0:47:42 +epoch [77/200] batch [30/31] time 0.703 (0.743) data 0.000 (0.027) loss 0.8574 (0.9628) acc 71.8750 (76.5625) lr 1.3827e-03 eta 0:47:14 +epoch [78/200] batch [5/31] time 0.707 (0.884) data 0.000 (0.158) loss 0.6890 (0.9268) acc 81.2500 (76.8750) lr 1.3681e-03 eta 0:56:07 +epoch [78/200] batch [10/31] time 0.709 (0.798) data 0.000 (0.079) loss 0.7852 (0.9184) acc 81.2500 (77.8125) lr 1.3681e-03 eta 0:50:35 +epoch [78/200] batch [15/31] time 0.714 (0.778) data 0.000 (0.053) loss 0.9487 (0.9619) acc 59.3750 (74.7917) lr 1.3681e-03 eta 0:49:16 +epoch [78/200] batch [20/31] time 0.720 (0.762) data 0.000 (0.040) loss 0.5830 (0.9362) acc 81.2500 (76.2500) lr 1.3681e-03 eta 0:48:10 +epoch [78/200] batch [25/31] time 0.707 (0.753) data 0.000 (0.032) loss 1.0264 (0.9540) acc 75.0000 (75.7500) lr 1.3681e-03 eta 0:47:31 +epoch [78/200] batch [30/31] time 0.710 (0.746) data 0.000 (0.027) loss 1.0498 (0.9770) acc 71.8750 (75.4167) lr 1.3681e-03 eta 0:47:00 +epoch [79/200] batch [5/31] time 0.718 (0.959) data 0.000 (0.234) loss 0.9399 (1.1291) acc 87.5000 (73.1250) lr 1.3535e-03 eta 1:00:22 +epoch [79/200] batch [10/31] time 0.708 (0.835) data 0.000 (0.117) loss 1.0840 (1.1003) acc 75.0000 (75.9375) lr 1.3535e-03 eta 0:52:30 +epoch [79/200] batch [15/31] time 0.715 (0.795) data 0.000 (0.078) loss 1.0938 (1.0869) acc 78.1250 (76.2500) lr 1.3535e-03 eta 0:49:55 +epoch [79/200] batch [20/31] time 0.709 (0.773) data 0.000 (0.059) loss 1.8311 (1.1623) acc 75.0000 (75.0000) lr 1.3535e-03 eta 0:48:29 +epoch [79/200] batch [25/31] time 0.707 (0.760) data 0.000 (0.047) loss 0.7480 (1.1028) acc 84.3750 (76.1250) lr 1.3535e-03 eta 0:47:36 +epoch [79/200] batch [30/31] time 0.724 (0.752) data 0.000 (0.039) loss 1.2412 (1.1133) acc 65.6250 (74.6875) lr 1.3535e-03 eta 0:47:00 +epoch [80/200] batch [5/31] time 0.713 (0.917) data 0.001 (0.166) loss 1.6699 (1.0312) acc 71.8750 (77.5000) lr 1.3387e-03 eta 0:57:15 +epoch [80/200] batch [10/31] time 0.708 (0.817) data 0.000 (0.083) loss 1.1943 (1.0123) acc 65.6250 (76.2500) lr 1.3387e-03 eta 0:50:54 +epoch [80/200] batch [15/31] time 0.716 (0.782) data 0.000 (0.055) loss 1.2598 (0.9637) acc 78.1250 (77.0833) lr 1.3387e-03 eta 0:48:42 +epoch [80/200] batch [20/31] time 0.704 (0.764) data 0.000 (0.042) loss 1.4229 (1.0721) acc 59.3750 (74.0625) lr 1.3387e-03 eta 0:47:29 +epoch [80/200] batch [25/31] time 0.709 (0.752) data 0.000 (0.033) loss 0.6406 (1.0382) acc 84.3750 (75.0000) lr 1.3387e-03 eta 0:46:43 +epoch [80/200] batch [30/31] time 0.702 (0.745) data 0.000 (0.028) loss 1.5107 (1.0767) acc 65.6250 (74.4792) lr 1.3387e-03 eta 0:46:11 +epoch [81/200] batch [5/31] time 0.713 (0.894) data 0.000 (0.167) loss 1.0576 (0.9582) acc 71.8750 (76.8750) lr 1.3239e-03 eta 0:55:20 +epoch [81/200] batch [10/31] time 0.712 (0.805) data 0.000 (0.084) loss 1.1846 (1.0502) acc 78.1250 (75.0000) lr 1.3239e-03 eta 0:49:47 +epoch [81/200] batch [15/31] time 0.709 (0.774) data 0.000 (0.056) loss 0.7505 (1.0307) acc 78.1250 (75.8333) lr 1.3239e-03 eta 0:47:46 +epoch [81/200] batch [20/31] time 0.716 (0.758) data 0.000 (0.042) loss 0.8848 (1.0207) acc 81.2500 (75.6250) lr 1.3239e-03 eta 0:46:43 +epoch [81/200] batch [25/31] time 0.711 (0.748) data 0.000 (0.034) loss 1.6270 (1.0462) acc 59.3750 (74.3750) lr 1.3239e-03 eta 0:46:03 +epoch [81/200] batch [30/31] time 0.704 (0.741) data 0.000 (0.028) loss 1.0059 (1.0710) acc 68.7500 (73.3333) lr 1.3239e-03 eta 0:45:35 +epoch [82/200] batch [5/31] time 0.716 (0.884) data 0.000 (0.165) loss 0.9888 (1.0786) acc 81.2500 (76.2500) lr 1.3090e-03 eta 0:54:15 +epoch [82/200] batch [10/31] time 0.713 (0.815) data 0.000 (0.083) loss 1.1514 (0.9877) acc 81.2500 (78.4375) lr 1.3090e-03 eta 0:49:56 +epoch [82/200] batch [15/31] time 0.725 (0.781) data 0.000 (0.055) loss 0.4187 (0.9899) acc 90.6250 (77.7083) lr 1.3090e-03 eta 0:47:51 +epoch [82/200] batch [20/31] time 0.709 (0.765) data 0.000 (0.042) loss 1.0811 (1.0476) acc 81.2500 (76.2500) lr 1.3090e-03 eta 0:46:47 +epoch [82/200] batch [25/31] time 0.710 (0.755) data 0.000 (0.033) loss 1.2305 (1.0398) acc 71.8750 (76.1250) lr 1.3090e-03 eta 0:46:06 +epoch [82/200] batch [30/31] time 0.713 (0.747) data 0.000 (0.028) loss 1.6836 (1.0398) acc 65.6250 (76.2500) lr 1.3090e-03 eta 0:45:34 +epoch [83/200] batch [5/31] time 0.716 (0.890) data 0.000 (0.167) loss 0.7827 (0.8812) acc 78.1250 (77.5000) lr 1.2940e-03 eta 0:54:12 +epoch [83/200] batch [10/31] time 0.721 (0.801) data 0.000 (0.084) loss 0.9741 (0.9058) acc 78.1250 (78.1250) lr 1.2940e-03 eta 0:48:43 +epoch [83/200] batch [15/31] time 0.721 (0.775) data 0.000 (0.056) loss 0.9150 (0.8731) acc 81.2500 (79.3750) lr 1.2940e-03 eta 0:47:01 +epoch [83/200] batch [20/31] time 0.708 (0.759) data 0.000 (0.042) loss 1.2715 (0.9141) acc 65.6250 (77.6562) lr 1.2940e-03 eta 0:46:02 +epoch [83/200] batch [25/31] time 0.724 (0.750) data 0.000 (0.034) loss 0.9048 (0.8996) acc 84.3750 (78.1250) lr 1.2940e-03 eta 0:45:23 +epoch [83/200] batch [30/31] time 0.717 (0.743) data 0.000 (0.028) loss 0.8428 (0.9352) acc 75.0000 (77.3958) lr 1.2940e-03 eta 0:44:54 +epoch [84/200] batch [5/31] time 0.721 (0.884) data 0.000 (0.167) loss 0.9131 (0.7614) acc 78.1250 (81.8750) lr 1.2790e-03 eta 0:53:22 +epoch [84/200] batch [10/31] time 0.707 (0.796) data 0.000 (0.084) loss 1.1113 (1.0029) acc 78.1250 (77.8125) lr 1.2790e-03 eta 0:48:00 +epoch [84/200] batch [15/31] time 0.714 (0.769) data 0.000 (0.056) loss 1.5898 (1.0713) acc 62.5000 (74.7917) lr 1.2790e-03 eta 0:46:15 +epoch [84/200] batch [20/31] time 0.713 (0.755) data 0.000 (0.042) loss 1.2959 (1.0637) acc 62.5000 (74.5312) lr 1.2790e-03 eta 0:45:21 +epoch [84/200] batch [25/31] time 0.710 (0.745) data 0.000 (0.034) loss 0.7876 (1.0183) acc 84.3750 (75.2500) lr 1.2790e-03 eta 0:44:45 +epoch [84/200] batch [30/31] time 0.712 (0.740) data 0.000 (0.028) loss 1.0664 (1.0047) acc 75.0000 (75.4167) lr 1.2790e-03 eta 0:44:20 +epoch [85/200] batch [5/31] time 0.718 (0.898) data 0.000 (0.179) loss 1.4834 (1.0037) acc 62.5000 (76.2500) lr 1.2639e-03 eta 0:53:46 +epoch [85/200] batch [10/31] time 0.708 (0.804) data 0.000 (0.090) loss 0.7627 (1.0327) acc 75.0000 (76.5625) lr 1.2639e-03 eta 0:48:04 +epoch [85/200] batch [15/31] time 0.710 (0.774) data 0.000 (0.060) loss 0.8364 (1.0074) acc 78.1250 (76.2500) lr 1.2639e-03 eta 0:46:11 +epoch [85/200] batch [20/31] time 0.704 (0.758) data 0.000 (0.045) loss 0.7021 (1.0507) acc 71.8750 (74.5312) lr 1.2639e-03 eta 0:45:09 +epoch [85/200] batch [25/31] time 0.732 (0.749) data 0.000 (0.036) loss 1.2998 (1.0954) acc 75.0000 (73.7500) lr 1.2639e-03 eta 0:44:36 +epoch [85/200] batch [30/31] time 0.702 (0.742) data 0.000 (0.030) loss 0.7368 (1.0640) acc 81.2500 (74.6875) lr 1.2639e-03 eta 0:44:07 +epoch [86/200] batch [5/31] time 0.719 (0.892) data 0.000 (0.166) loss 0.8862 (1.1224) acc 71.8750 (76.2500) lr 1.2487e-03 eta 0:52:56 +epoch [86/200] batch [10/31] time 0.709 (0.800) data 0.000 (0.083) loss 0.6299 (0.9767) acc 87.5000 (76.8750) lr 1.2487e-03 eta 0:47:22 +epoch [86/200] batch [15/31] time 0.707 (0.770) data 0.000 (0.056) loss 1.1377 (1.0217) acc 75.0000 (75.2083) lr 1.2487e-03 eta 0:45:35 +epoch [86/200] batch [20/31] time 0.707 (0.757) data 0.000 (0.042) loss 1.2598 (1.0165) acc 71.8750 (75.4688) lr 1.2487e-03 eta 0:44:42 +epoch [86/200] batch [25/31] time 0.706 (0.747) data 0.000 (0.034) loss 0.4802 (1.0595) acc 84.3750 (74.5000) lr 1.2487e-03 eta 0:44:04 +epoch [86/200] batch [30/31] time 0.708 (0.741) data 0.000 (0.028) loss 1.2344 (1.0799) acc 75.0000 (74.2708) lr 1.2487e-03 eta 0:43:39 +epoch [87/200] batch [5/31] time 0.713 (0.889) data 0.000 (0.163) loss 1.3779 (1.1402) acc 75.0000 (75.0000) lr 1.2334e-03 eta 0:52:15 +epoch [87/200] batch [10/31] time 0.712 (0.800) data 0.000 (0.082) loss 0.9810 (1.0836) acc 68.7500 (74.6875) lr 1.2334e-03 eta 0:46:57 +epoch [87/200] batch [15/31] time 0.717 (0.772) data 0.000 (0.054) loss 1.6104 (1.0912) acc 71.8750 (76.0417) lr 1.2334e-03 eta 0:45:17 +epoch [87/200] batch [20/31] time 0.711 (0.757) data 0.000 (0.041) loss 0.9512 (1.1314) acc 81.2500 (75.6250) lr 1.2334e-03 eta 0:44:20 +epoch [87/200] batch [25/31] time 0.707 (0.749) data 0.000 (0.033) loss 0.7163 (1.0836) acc 84.3750 (76.0000) lr 1.2334e-03 eta 0:43:47 +epoch [87/200] batch [30/31] time 0.709 (0.743) data 0.000 (0.027) loss 1.0186 (1.0531) acc 81.2500 (76.4583) lr 1.2334e-03 eta 0:43:22 +epoch [88/200] batch [5/31] time 0.706 (0.888) data 0.000 (0.170) loss 1.0244 (1.0691) acc 81.2500 (76.2500) lr 1.2181e-03 eta 0:51:44 +epoch [88/200] batch [10/31] time 0.705 (0.815) data 0.000 (0.085) loss 0.6899 (1.0060) acc 78.1250 (77.8125) lr 1.2181e-03 eta 0:47:25 +epoch [88/200] batch [15/31] time 0.705 (0.781) data 0.000 (0.057) loss 0.8945 (0.9544) acc 81.2500 (78.3333) lr 1.2181e-03 eta 0:45:23 +epoch [88/200] batch [20/31] time 0.710 (0.763) data 0.000 (0.043) loss 0.9609 (0.9768) acc 81.2500 (77.6562) lr 1.2181e-03 eta 0:44:18 +epoch [88/200] batch [25/31] time 0.706 (0.753) data 0.000 (0.034) loss 1.1533 (1.0046) acc 71.8750 (76.6250) lr 1.2181e-03 eta 0:43:39 +epoch [88/200] batch [30/31] time 0.706 (0.746) data 0.000 (0.029) loss 0.8296 (0.9946) acc 81.2500 (76.5625) lr 1.2181e-03 eta 0:43:11 +epoch [89/200] batch [5/31] time 0.706 (0.878) data 0.000 (0.158) loss 0.8457 (0.8825) acc 71.8750 (77.5000) lr 1.2028e-03 eta 0:50:45 +epoch [89/200] batch [10/31] time 0.712 (0.798) data 0.000 (0.079) loss 1.0889 (0.8913) acc 75.0000 (78.4375) lr 1.2028e-03 eta 0:46:01 +epoch [89/200] batch [15/31] time 0.724 (0.770) data 0.000 (0.053) loss 1.2529 (0.9262) acc 71.8750 (78.3333) lr 1.2028e-03 eta 0:44:22 +epoch [89/200] batch [20/31] time 0.704 (0.755) data 0.000 (0.040) loss 0.8438 (0.9710) acc 78.1250 (77.5000) lr 1.2028e-03 eta 0:43:26 +epoch [89/200] batch [25/31] time 0.726 (0.747) data 0.000 (0.032) loss 1.3184 (0.9646) acc 65.6250 (77.5000) lr 1.2028e-03 eta 0:42:55 +epoch [89/200] batch [30/31] time 0.721 (0.746) data 0.000 (0.027) loss 1.0225 (0.9604) acc 68.7500 (77.6042) lr 1.2028e-03 eta 0:42:48 +epoch [90/200] batch [5/31] time 0.710 (0.897) data 0.000 (0.176) loss 0.9995 (1.2467) acc 78.1250 (75.6250) lr 1.1874e-03 eta 0:51:22 +epoch [90/200] batch [10/31] time 0.707 (0.806) data 0.000 (0.088) loss 0.9619 (1.1160) acc 84.3750 (76.8750) lr 1.1874e-03 eta 0:46:06 +epoch [90/200] batch [15/31] time 0.724 (0.777) data 0.000 (0.059) loss 0.8652 (1.1157) acc 81.2500 (75.6250) lr 1.1874e-03 eta 0:44:20 +epoch [90/200] batch [20/31] time 0.708 (0.759) data 0.000 (0.044) loss 0.9243 (1.0543) acc 81.2500 (77.0312) lr 1.1874e-03 eta 0:43:17 +epoch [90/200] batch [25/31] time 0.710 (0.750) data 0.000 (0.035) loss 1.0039 (1.0223) acc 78.1250 (77.2500) lr 1.1874e-03 eta 0:42:42 +epoch [90/200] batch [30/31] time 0.722 (0.745) data 0.000 (0.030) loss 1.3213 (1.0286) acc 71.8750 (76.5625) lr 1.1874e-03 eta 0:42:20 +epoch [91/200] batch [5/31] time 0.715 (0.880) data 0.000 (0.157) loss 1.0352 (1.0076) acc 75.0000 (73.1250) lr 1.1719e-03 eta 0:49:55 +epoch [91/200] batch [10/31] time 0.716 (0.798) data 0.000 (0.079) loss 1.0273 (1.0225) acc 75.0000 (76.2500) lr 1.1719e-03 eta 0:45:12 +epoch [91/200] batch [15/31] time 0.710 (0.769) data 0.000 (0.053) loss 1.2070 (1.0931) acc 71.8750 (75.2083) lr 1.1719e-03 eta 0:43:29 +epoch [91/200] batch [20/31] time 0.710 (0.754) data 0.000 (0.040) loss 1.1328 (1.0558) acc 81.2500 (75.6250) lr 1.1719e-03 eta 0:42:35 +epoch [91/200] batch [25/31] time 0.704 (0.745) data 0.000 (0.032) loss 0.7314 (1.0626) acc 75.0000 (74.8750) lr 1.1719e-03 eta 0:42:01 +epoch [91/200] batch [30/31] time 0.714 (0.744) data 0.000 (0.026) loss 1.0635 (1.0646) acc 78.1250 (75.4167) lr 1.1719e-03 eta 0:41:54 +epoch [92/200] batch [5/31] time 0.705 (0.885) data 0.000 (0.163) loss 0.8999 (1.0622) acc 78.1250 (77.5000) lr 1.1564e-03 eta 0:49:46 +epoch [92/200] batch [10/31] time 0.711 (0.799) data 0.000 (0.082) loss 0.6831 (0.9900) acc 87.5000 (79.0625) lr 1.1564e-03 eta 0:44:50 +epoch [92/200] batch [15/31] time 0.712 (0.769) data 0.000 (0.054) loss 1.3232 (1.0821) acc 68.7500 (75.8333) lr 1.1564e-03 eta 0:43:08 +epoch [92/200] batch [20/31] time 0.715 (0.755) data 0.000 (0.041) loss 1.0986 (1.0250) acc 81.2500 (78.1250) lr 1.1564e-03 eta 0:42:14 +epoch [92/200] batch [25/31] time 0.710 (0.745) data 0.000 (0.033) loss 0.8013 (1.0146) acc 68.7500 (76.8750) lr 1.1564e-03 eta 0:41:39 +epoch [92/200] batch [30/31] time 0.719 (0.740) data 0.000 (0.027) loss 0.9424 (0.9893) acc 75.0000 (77.5000) lr 1.1564e-03 eta 0:41:18 +epoch [93/200] batch [5/31] time 0.713 (0.899) data 0.000 (0.178) loss 0.8159 (1.0604) acc 75.0000 (77.5000) lr 1.1409e-03 eta 0:50:04 +epoch [93/200] batch [10/31] time 0.710 (0.805) data 0.000 (0.089) loss 0.9321 (1.0227) acc 78.1250 (77.1875) lr 1.1409e-03 eta 0:44:46 +epoch [93/200] batch [15/31] time 0.709 (0.773) data 0.000 (0.059) loss 0.8765 (1.0240) acc 81.2500 (76.8750) lr 1.1409e-03 eta 0:42:57 +epoch [93/200] batch [20/31] time 0.708 (0.758) data 0.000 (0.045) loss 1.1162 (0.9896) acc 78.1250 (76.7188) lr 1.1409e-03 eta 0:42:02 +epoch [93/200] batch [25/31] time 0.711 (0.748) data 0.000 (0.036) loss 1.4238 (1.0084) acc 62.5000 (76.3750) lr 1.1409e-03 eta 0:41:26 +epoch [93/200] batch [30/31] time 0.706 (0.742) data 0.000 (0.030) loss 1.4033 (0.9911) acc 75.0000 (76.3542) lr 1.1409e-03 eta 0:41:02 +epoch [94/200] batch [5/31] time 0.714 (0.889) data 0.000 (0.168) loss 1.2158 (1.0956) acc 68.7500 (75.6250) lr 1.1253e-03 eta 0:49:03 +epoch [94/200] batch [10/31] time 0.709 (0.801) data 0.000 (0.084) loss 0.6123 (0.9396) acc 84.3750 (79.3750) lr 1.1253e-03 eta 0:44:10 +epoch [94/200] batch [15/31] time 0.706 (0.773) data 0.000 (0.056) loss 1.0176 (0.9739) acc 75.0000 (77.7083) lr 1.1253e-03 eta 0:42:31 +epoch [94/200] batch [20/31] time 0.718 (0.758) data 0.000 (0.042) loss 1.1094 (0.9429) acc 75.0000 (77.9688) lr 1.1253e-03 eta 0:41:37 +epoch [94/200] batch [25/31] time 0.706 (0.748) data 0.000 (0.034) loss 0.6641 (0.9665) acc 84.3750 (77.2500) lr 1.1253e-03 eta 0:41:01 +epoch [94/200] batch [30/31] time 0.704 (0.742) data 0.000 (0.028) loss 1.7930 (1.0403) acc 65.6250 (76.3542) lr 1.1253e-03 eta 0:40:39 +epoch [95/200] batch [5/31] time 0.718 (0.895) data 0.000 (0.173) loss 0.7598 (1.0541) acc 75.0000 (74.3750) lr 1.1097e-03 eta 0:48:55 +epoch [95/200] batch [10/31] time 0.709 (0.803) data 0.000 (0.087) loss 1.3711 (1.0140) acc 75.0000 (75.3125) lr 1.1097e-03 eta 0:43:49 +epoch [95/200] batch [15/31] time 0.717 (0.783) data 0.000 (0.058) loss 0.8496 (1.0377) acc 78.1250 (75.4167) lr 1.1097e-03 eta 0:42:40 +epoch [95/200] batch [20/31] time 0.704 (0.766) data 0.000 (0.043) loss 0.9761 (1.0054) acc 81.2500 (77.0312) lr 1.1097e-03 eta 0:41:42 +epoch [95/200] batch [25/31] time 0.710 (0.756) data 0.001 (0.035) loss 0.6299 (0.9529) acc 84.3750 (78.5000) lr 1.1097e-03 eta 0:41:05 +epoch [95/200] batch [30/31] time 0.712 (0.748) data 0.000 (0.029) loss 0.6226 (0.9509) acc 87.5000 (78.0208) lr 1.1097e-03 eta 0:40:36 +epoch [96/200] batch [5/31] time 0.724 (0.905) data 0.000 (0.174) loss 0.6836 (0.9341) acc 78.1250 (79.3750) lr 1.0941e-03 eta 0:49:00 +epoch [96/200] batch [10/31] time 0.708 (0.810) data 0.000 (0.087) loss 0.8633 (0.8844) acc 90.6250 (82.5000) lr 1.0941e-03 eta 0:43:48 +epoch [96/200] batch [15/31] time 0.724 (0.780) data 0.000 (0.058) loss 0.6909 (0.8602) acc 75.0000 (80.2083) lr 1.0941e-03 eta 0:42:07 +epoch [96/200] batch [20/31] time 0.710 (0.764) data 0.000 (0.044) loss 0.5728 (0.8174) acc 90.6250 (81.0938) lr 1.0941e-03 eta 0:41:10 +epoch [96/200] batch [25/31] time 0.710 (0.753) data 0.000 (0.035) loss 1.3340 (0.8402) acc 68.7500 (80.1250) lr 1.0941e-03 eta 0:40:33 +epoch [96/200] batch [30/31] time 0.721 (0.746) data 0.000 (0.029) loss 1.1406 (0.8577) acc 71.8750 (79.2708) lr 1.0941e-03 eta 0:40:06 +epoch [97/200] batch [5/31] time 0.719 (0.887) data 0.000 (0.162) loss 1.4727 (0.8685) acc 75.0000 (81.2500) lr 1.0785e-03 eta 0:47:35 +epoch [97/200] batch [10/31] time 0.715 (0.798) data 0.000 (0.081) loss 1.1709 (0.8337) acc 65.6250 (79.0625) lr 1.0785e-03 eta 0:42:45 +epoch [97/200] batch [15/31] time 0.703 (0.769) data 0.000 (0.054) loss 1.2490 (0.8951) acc 75.0000 (78.9583) lr 1.0785e-03 eta 0:41:08 +epoch [97/200] batch [20/31] time 0.702 (0.754) data 0.000 (0.041) loss 1.0391 (0.9098) acc 78.1250 (79.5312) lr 1.0785e-03 eta 0:40:15 +epoch [97/200] batch [25/31] time 0.711 (0.745) data 0.000 (0.033) loss 1.3135 (0.9194) acc 65.6250 (78.8750) lr 1.0785e-03 eta 0:39:44 +epoch [97/200] batch [30/31] time 0.703 (0.740) data 0.000 (0.027) loss 1.0518 (0.9394) acc 75.0000 (78.0208) lr 1.0785e-03 eta 0:39:22 +epoch [98/200] batch [5/31] time 0.706 (0.909) data 0.000 (0.161) loss 0.8062 (0.8481) acc 84.3750 (82.5000) lr 1.0628e-03 eta 0:48:16 +epoch [98/200] batch [10/31] time 0.709 (0.810) data 0.000 (0.081) loss 1.3662 (0.9354) acc 68.7500 (79.0625) lr 1.0628e-03 eta 0:42:56 +epoch [98/200] batch [15/31] time 0.734 (0.778) data 0.000 (0.054) loss 1.3945 (0.9134) acc 62.5000 (78.3333) lr 1.0628e-03 eta 0:41:12 +epoch [98/200] batch [20/31] time 0.710 (0.761) data 0.000 (0.041) loss 0.9951 (0.9264) acc 81.2500 (78.4375) lr 1.0628e-03 eta 0:40:14 +epoch [98/200] batch [25/31] time 0.713 (0.750) data 0.000 (0.033) loss 0.7012 (0.9440) acc 71.8750 (78.0000) lr 1.0628e-03 eta 0:39:37 +epoch [98/200] batch [30/31] time 0.707 (0.743) data 0.000 (0.027) loss 1.3506 (0.9633) acc 71.8750 (76.9792) lr 1.0628e-03 eta 0:39:10 +epoch [99/200] batch [5/31] time 0.711 (0.875) data 0.000 (0.155) loss 0.7026 (0.9981) acc 84.3750 (76.8750) lr 1.0471e-03 eta 0:46:02 +epoch [99/200] batch [10/31] time 0.706 (0.794) data 0.000 (0.078) loss 1.0420 (0.9859) acc 78.1250 (78.7500) lr 1.0471e-03 eta 0:41:42 +epoch [99/200] batch [15/31] time 0.706 (0.765) data 0.000 (0.052) loss 0.5303 (0.9925) acc 87.5000 (78.5417) lr 1.0471e-03 eta 0:40:07 +epoch [99/200] batch [20/31] time 0.709 (0.751) data 0.000 (0.039) loss 1.7812 (0.9719) acc 59.3750 (78.4375) lr 1.0471e-03 eta 0:39:19 +epoch [99/200] batch [25/31] time 0.709 (0.747) data 0.000 (0.031) loss 0.9849 (0.9627) acc 78.1250 (78.2500) lr 1.0471e-03 eta 0:39:04 +epoch [99/200] batch [30/31] time 0.706 (0.740) data 0.000 (0.026) loss 1.0957 (0.9336) acc 71.8750 (78.9583) lr 1.0471e-03 eta 0:38:38 +epoch [100/200] batch [5/31] time 0.722 (0.896) data 0.000 (0.167) loss 0.5776 (0.7513) acc 87.5000 (83.1250) lr 1.0314e-03 eta 0:46:39 +epoch [100/200] batch [10/31] time 0.715 (0.804) data 0.000 (0.084) loss 1.0400 (0.8494) acc 75.0000 (81.5625) lr 1.0314e-03 eta 0:41:50 +epoch [100/200] batch [15/31] time 0.716 (0.774) data 0.000 (0.056) loss 0.8066 (0.9346) acc 68.7500 (79.3750) lr 1.0314e-03 eta 0:40:12 +epoch [100/200] batch [20/31] time 0.716 (0.758) data 0.000 (0.042) loss 1.1777 (0.9171) acc 75.0000 (79.3750) lr 1.0314e-03 eta 0:39:17 +epoch [100/200] batch [25/31] time 0.705 (0.748) data 0.000 (0.034) loss 1.4463 (0.9202) acc 68.7500 (79.1250) lr 1.0314e-03 eta 0:38:44 +epoch [100/200] batch [30/31] time 0.707 (0.743) data 0.000 (0.028) loss 0.8457 (0.9607) acc 71.8750 (78.2292) lr 1.0314e-03 eta 0:38:22 +epoch [101/200] batch [5/31] time 0.711 (0.907) data 0.000 (0.183) loss 0.9839 (0.9618) acc 81.2500 (78.1250) lr 1.0157e-03 eta 0:46:45 +epoch [101/200] batch [10/31] time 0.715 (0.811) data 0.000 (0.092) loss 0.3662 (0.7287) acc 87.5000 (83.7500) lr 1.0157e-03 eta 0:41:45 +epoch [101/200] batch [15/31] time 0.706 (0.780) data 0.000 (0.061) loss 1.1758 (0.7495) acc 81.2500 (83.3333) lr 1.0157e-03 eta 0:40:04 +epoch [101/200] batch [20/31] time 0.707 (0.770) data 0.000 (0.046) loss 1.0137 (0.8191) acc 75.0000 (82.5000) lr 1.0157e-03 eta 0:39:30 +epoch [101/200] batch [25/31] time 0.718 (0.758) data 0.000 (0.037) loss 0.4802 (0.8205) acc 93.7500 (82.0000) lr 1.0157e-03 eta 0:38:50 +epoch [101/200] batch [30/31] time 0.710 (0.750) data 0.000 (0.031) loss 0.7212 (0.8374) acc 81.2500 (81.4583) lr 1.0157e-03 eta 0:38:22 +epoch [102/200] batch [5/31] time 0.712 (0.895) data 0.000 (0.173) loss 0.5903 (0.6872) acc 84.3750 (81.8750) lr 1.0000e-03 eta 0:45:43 +epoch [102/200] batch [10/31] time 0.713 (0.806) data 0.000 (0.087) loss 1.0547 (0.8875) acc 71.8750 (78.7500) lr 1.0000e-03 eta 0:41:06 +epoch [102/200] batch [15/31] time 0.713 (0.775) data 0.000 (0.058) loss 0.7075 (0.9023) acc 90.6250 (78.3333) lr 1.0000e-03 eta 0:39:28 +epoch [102/200] batch [20/31] time 0.724 (0.760) data 0.000 (0.044) loss 1.4121 (0.9504) acc 71.8750 (77.9688) lr 1.0000e-03 eta 0:38:36 +epoch [102/200] batch [25/31] time 0.708 (0.750) data 0.000 (0.035) loss 0.9619 (0.9352) acc 84.3750 (78.3750) lr 1.0000e-03 eta 0:38:03 +epoch [102/200] batch [30/31] time 0.705 (0.745) data 0.000 (0.029) loss 0.7593 (0.9216) acc 75.0000 (78.5417) lr 1.0000e-03 eta 0:37:44 +epoch [103/200] batch [5/31] time 0.706 (0.890) data 0.000 (0.167) loss 0.6494 (1.1119) acc 81.2500 (76.8750) lr 9.8429e-04 eta 0:44:58 +epoch [103/200] batch [10/31] time 0.717 (0.800) data 0.000 (0.084) loss 0.6636 (0.9971) acc 81.2500 (77.8125) lr 9.8429e-04 eta 0:40:23 +epoch [103/200] batch [15/31] time 0.705 (0.772) data 0.000 (0.056) loss 0.7939 (1.0315) acc 78.1250 (77.2917) lr 9.8429e-04 eta 0:38:53 +epoch [103/200] batch [20/31] time 0.709 (0.757) data 0.000 (0.042) loss 0.7744 (0.9570) acc 87.5000 (78.9062) lr 9.8429e-04 eta 0:38:03 +epoch [103/200] batch [25/31] time 0.710 (0.747) data 0.000 (0.034) loss 0.8379 (0.9197) acc 78.1250 (79.0000) lr 9.8429e-04 eta 0:37:30 +epoch [103/200] batch [30/31] time 0.711 (0.741) data 0.000 (0.028) loss 0.6582 (0.9529) acc 84.3750 (78.4375) lr 9.8429e-04 eta 0:37:07 +epoch [104/200] batch [5/31] time 0.710 (0.891) data 0.000 (0.165) loss 0.7725 (0.7976) acc 87.5000 (80.6250) lr 9.6859e-04 eta 0:44:34 +epoch [104/200] batch [10/31] time 0.720 (0.806) data 0.001 (0.083) loss 1.3652 (0.9497) acc 62.5000 (76.2500) lr 9.6859e-04 eta 0:40:14 +epoch [104/200] batch [15/31] time 0.728 (0.786) data 0.000 (0.055) loss 0.8926 (0.9360) acc 68.7500 (75.8333) lr 9.6859e-04 eta 0:39:11 +epoch [104/200] batch [20/31] time 0.710 (0.769) data 0.000 (0.042) loss 0.9595 (0.9387) acc 84.3750 (76.5625) lr 9.6859e-04 eta 0:38:16 +epoch [104/200] batch [25/31] time 0.711 (0.757) data 0.000 (0.033) loss 0.6221 (0.9108) acc 75.0000 (76.7500) lr 9.6859e-04 eta 0:37:38 +epoch [104/200] batch [30/31] time 0.708 (0.750) data 0.000 (0.028) loss 1.1309 (0.9168) acc 68.7500 (76.6667) lr 9.6859e-04 eta 0:37:13 +epoch [105/200] batch [5/31] time 0.710 (0.889) data 0.000 (0.167) loss 0.4558 (0.6226) acc 93.7500 (88.1250) lr 9.5289e-04 eta 0:44:00 +epoch [105/200] batch [10/31] time 0.709 (0.802) data 0.000 (0.084) loss 0.5654 (0.6357) acc 81.2500 (86.2500) lr 9.5289e-04 eta 0:39:38 +epoch [105/200] batch [15/31] time 0.707 (0.772) data 0.000 (0.056) loss 1.2451 (0.7085) acc 71.8750 (83.7500) lr 9.5289e-04 eta 0:38:05 +epoch [105/200] batch [20/31] time 0.708 (0.757) data 0.000 (0.042) loss 0.9595 (0.7290) acc 71.8750 (82.9688) lr 9.5289e-04 eta 0:37:18 +epoch [105/200] batch [25/31] time 0.713 (0.748) data 0.000 (0.034) loss 0.7910 (0.7594) acc 81.2500 (82.5000) lr 9.5289e-04 eta 0:36:45 +epoch [105/200] batch [30/31] time 0.710 (0.741) data 0.000 (0.028) loss 1.0430 (0.8093) acc 68.7500 (80.8333) lr 9.5289e-04 eta 0:36:22 +epoch [106/200] batch [5/31] time 0.710 (0.894) data 0.000 (0.174) loss 0.7314 (0.8500) acc 84.3750 (85.0000) lr 9.3721e-04 eta 0:43:47 +epoch [106/200] batch [10/31] time 0.716 (0.804) data 0.000 (0.087) loss 0.7681 (0.8214) acc 84.3750 (84.6875) lr 9.3721e-04 eta 0:39:19 +epoch [106/200] batch [15/31] time 0.708 (0.774) data 0.000 (0.058) loss 0.9263 (0.8055) acc 84.3750 (84.5833) lr 9.3721e-04 eta 0:37:46 +epoch [106/200] batch [20/31] time 0.712 (0.758) data 0.000 (0.044) loss 0.7012 (0.8279) acc 90.6250 (83.2812) lr 9.3721e-04 eta 0:36:56 +epoch [106/200] batch [25/31] time 0.709 (0.749) data 0.000 (0.035) loss 1.0713 (0.8395) acc 71.8750 (81.8750) lr 9.3721e-04 eta 0:36:26 +epoch [106/200] batch [30/31] time 0.713 (0.742) data 0.000 (0.029) loss 0.9072 (0.8590) acc 75.0000 (81.1458) lr 9.3721e-04 eta 0:36:03 +epoch [107/200] batch [5/31] time 0.711 (0.901) data 0.000 (0.175) loss 0.6274 (0.8429) acc 87.5000 (80.6250) lr 9.2154e-04 eta 0:43:42 +epoch [107/200] batch [10/31] time 0.740 (0.809) data 0.001 (0.088) loss 1.1055 (0.8428) acc 71.8750 (80.6250) lr 9.2154e-04 eta 0:39:08 +epoch [107/200] batch [15/31] time 0.712 (0.776) data 0.001 (0.059) loss 0.9102 (0.8718) acc 75.0000 (79.7917) lr 9.2154e-04 eta 0:37:30 +epoch [107/200] batch [20/31] time 0.715 (0.761) data 0.000 (0.044) loss 1.0928 (0.9303) acc 75.0000 (78.7500) lr 9.2154e-04 eta 0:36:40 +epoch [107/200] batch [25/31] time 0.712 (0.750) data 0.000 (0.035) loss 0.6206 (0.9263) acc 87.5000 (78.6250) lr 9.2154e-04 eta 0:36:08 +epoch [107/200] batch [30/31] time 0.714 (0.744) data 0.000 (0.029) loss 1.0791 (0.9177) acc 71.8750 (78.7500) lr 9.2154e-04 eta 0:35:44 +epoch [108/200] batch [5/31] time 0.732 (0.890) data 0.001 (0.167) loss 0.6616 (0.9625) acc 84.3750 (78.1250) lr 9.0589e-04 eta 0:42:41 +epoch [108/200] batch [10/31] time 0.711 (0.801) data 0.000 (0.084) loss 0.9688 (0.9071) acc 75.0000 (77.8125) lr 9.0589e-04 eta 0:38:22 +epoch [108/200] batch [15/31] time 0.713 (0.772) data 0.000 (0.056) loss 1.0605 (0.9065) acc 75.0000 (77.7083) lr 9.0589e-04 eta 0:36:53 +epoch [108/200] batch [20/31] time 0.710 (0.758) data 0.000 (0.042) loss 1.4658 (0.9759) acc 78.1250 (76.4062) lr 9.0589e-04 eta 0:36:09 +epoch [108/200] batch [25/31] time 0.709 (0.748) data 0.000 (0.034) loss 1.2002 (0.9389) acc 71.8750 (77.2500) lr 9.0589e-04 eta 0:35:38 +epoch [108/200] batch [30/31] time 0.713 (0.742) data 0.000 (0.028) loss 0.5420 (0.9139) acc 90.6250 (77.7083) lr 9.0589e-04 eta 0:35:16 +epoch [109/200] batch [5/31] time 0.709 (0.895) data 0.000 (0.171) loss 1.1270 (0.9878) acc 81.2500 (78.1250) lr 8.9027e-04 eta 0:42:28 +epoch [109/200] batch [10/31] time 0.709 (0.803) data 0.000 (0.086) loss 0.8423 (0.9792) acc 71.8750 (75.9375) lr 8.9027e-04 eta 0:38:01 +epoch [109/200] batch [15/31] time 0.708 (0.781) data 0.000 (0.057) loss 0.7568 (0.9564) acc 81.2500 (77.7083) lr 8.9027e-04 eta 0:36:56 +epoch [109/200] batch [20/31] time 0.719 (0.763) data 0.000 (0.043) loss 0.7622 (0.9144) acc 87.5000 (79.0625) lr 8.9027e-04 eta 0:35:59 +epoch [109/200] batch [25/31] time 0.704 (0.753) data 0.000 (0.035) loss 0.7524 (0.9126) acc 81.2500 (79.3750) lr 8.9027e-04 eta 0:35:27 +epoch [109/200] batch [30/31] time 0.709 (0.745) data 0.000 (0.029) loss 0.8145 (0.9435) acc 81.2500 (78.3333) lr 8.9027e-04 eta 0:35:03 +epoch [110/200] batch [5/31] time 0.710 (0.883) data 0.000 (0.162) loss 0.7969 (0.8738) acc 84.3750 (82.5000) lr 8.7467e-04 eta 0:41:26 +epoch [110/200] batch [10/31] time 0.713 (0.797) data 0.000 (0.081) loss 1.8223 (1.0432) acc 59.3750 (78.4375) lr 8.7467e-04 eta 0:37:20 +epoch [110/200] batch [15/31] time 0.711 (0.771) data 0.000 (0.054) loss 0.9946 (1.0512) acc 71.8750 (76.6667) lr 8.7467e-04 eta 0:36:02 +epoch [110/200] batch [20/31] time 0.714 (0.755) data 0.000 (0.041) loss 0.9575 (0.9717) acc 81.2500 (77.8125) lr 8.7467e-04 eta 0:35:14 +epoch [110/200] batch [25/31] time 0.701 (0.744) data 0.000 (0.033) loss 0.7671 (0.9321) acc 81.2500 (78.7500) lr 8.7467e-04 eta 0:34:41 +epoch [110/200] batch [30/31] time 0.706 (0.738) data 0.000 (0.027) loss 0.9092 (0.9413) acc 71.8750 (78.0208) lr 8.7467e-04 eta 0:34:20 +epoch [111/200] batch [5/31] time 0.707 (0.917) data 0.000 (0.172) loss 0.9351 (0.7711) acc 71.8750 (80.0000) lr 8.5910e-04 eta 0:42:33 +epoch [111/200] batch [10/31] time 0.714 (0.814) data 0.000 (0.086) loss 0.8540 (0.7712) acc 81.2500 (80.6250) lr 8.5910e-04 eta 0:37:42 +epoch [111/200] batch [15/31] time 0.710 (0.779) data 0.000 (0.058) loss 1.0566 (0.8011) acc 65.6250 (78.7500) lr 8.5910e-04 eta 0:36:02 +epoch [111/200] batch [20/31] time 0.722 (0.763) data 0.000 (0.043) loss 0.7695 (0.8416) acc 78.1250 (78.4375) lr 8.5910e-04 eta 0:35:14 +epoch [111/200] batch [25/31] time 0.709 (0.753) data 0.000 (0.035) loss 0.7642 (0.8715) acc 81.2500 (78.7500) lr 8.5910e-04 eta 0:34:41 +epoch [111/200] batch [30/31] time 0.708 (0.745) data 0.000 (0.029) loss 1.3604 (0.8914) acc 81.2500 (78.7500) lr 8.5910e-04 eta 0:34:17 +epoch [112/200] batch [5/31] time 0.716 (0.875) data 0.000 (0.153) loss 0.8413 (0.8303) acc 75.0000 (81.8750) lr 8.4357e-04 eta 0:40:11 +epoch [112/200] batch [10/31] time 0.715 (0.795) data 0.000 (0.077) loss 0.9580 (0.9747) acc 84.3750 (79.6875) lr 8.4357e-04 eta 0:36:24 +epoch [112/200] batch [15/31] time 0.727 (0.767) data 0.000 (0.051) loss 0.6494 (0.9777) acc 78.1250 (78.7500) lr 8.4357e-04 eta 0:35:05 +epoch [112/200] batch [20/31] time 0.705 (0.753) data 0.000 (0.039) loss 0.6372 (0.9282) acc 84.3750 (79.2188) lr 8.4357e-04 eta 0:34:21 +epoch [112/200] batch [25/31] time 0.709 (0.744) data 0.000 (0.031) loss 0.9297 (0.8905) acc 75.0000 (79.5000) lr 8.4357e-04 eta 0:33:54 +epoch [112/200] batch [30/31] time 0.713 (0.739) data 0.000 (0.026) loss 0.7246 (0.8880) acc 78.1250 (78.8542) lr 8.4357e-04 eta 0:33:35 +epoch [113/200] batch [5/31] time 0.712 (0.879) data 0.000 (0.159) loss 0.9575 (0.7682) acc 78.1250 (84.3750) lr 8.2807e-04 eta 0:39:52 +epoch [113/200] batch [10/31] time 0.713 (0.814) data 0.000 (0.080) loss 1.0381 (0.8754) acc 71.8750 (80.3125) lr 8.2807e-04 eta 0:36:51 +epoch [113/200] batch [15/31] time 0.724 (0.781) data 0.000 (0.053) loss 0.7686 (0.8738) acc 84.3750 (80.2083) lr 8.2807e-04 eta 0:35:18 +epoch [113/200] batch [20/31] time 0.709 (0.764) data 0.000 (0.040) loss 0.6299 (0.8692) acc 81.2500 (80.4688) lr 8.2807e-04 eta 0:34:28 +epoch [113/200] batch [25/31] time 0.713 (0.753) data 0.000 (0.032) loss 1.1152 (0.8665) acc 75.0000 (80.6250) lr 8.2807e-04 eta 0:33:56 +epoch [113/200] batch [30/31] time 0.709 (0.746) data 0.000 (0.027) loss 0.8423 (0.8706) acc 75.0000 (79.8958) lr 8.2807e-04 eta 0:33:32 +epoch [114/200] batch [5/31] time 0.726 (0.894) data 0.000 (0.172) loss 0.7783 (0.8693) acc 78.1250 (80.6250) lr 8.1262e-04 eta 0:40:06 +epoch [114/200] batch [10/31] time 0.707 (0.802) data 0.000 (0.086) loss 1.1641 (0.9164) acc 81.2500 (80.0000) lr 8.1262e-04 eta 0:35:55 +epoch [114/200] batch [15/31] time 0.722 (0.773) data 0.000 (0.058) loss 1.2129 (0.9332) acc 78.1250 (79.7917) lr 8.1262e-04 eta 0:34:32 +epoch [114/200] batch [20/31] time 0.710 (0.757) data 0.000 (0.043) loss 0.7817 (0.8959) acc 78.1250 (80.1562) lr 8.1262e-04 eta 0:33:46 +epoch [114/200] batch [25/31] time 0.707 (0.748) data 0.000 (0.035) loss 0.6763 (0.9242) acc 84.3750 (78.6250) lr 8.1262e-04 eta 0:33:17 +epoch [114/200] batch [30/31] time 0.703 (0.741) data 0.000 (0.029) loss 0.6772 (0.9220) acc 81.2500 (78.2292) lr 8.1262e-04 eta 0:32:56 +epoch [115/200] batch [5/31] time 0.709 (0.881) data 0.000 (0.167) loss 0.6284 (0.7193) acc 84.3750 (83.1250) lr 7.9721e-04 eta 0:39:03 +epoch [115/200] batch [10/31] time 0.710 (0.796) data 0.000 (0.084) loss 0.9248 (0.8601) acc 68.7500 (77.8125) lr 7.9721e-04 eta 0:35:14 +epoch [115/200] batch [15/31] time 0.709 (0.767) data 0.000 (0.056) loss 1.3223 (0.8961) acc 75.0000 (78.1250) lr 7.9721e-04 eta 0:33:52 +epoch [115/200] batch [20/31] time 0.707 (0.752) data 0.000 (0.042) loss 0.9648 (0.8533) acc 84.3750 (80.0000) lr 7.9721e-04 eta 0:33:09 +epoch [115/200] batch [25/31] time 0.708 (0.743) data 0.000 (0.034) loss 1.0479 (0.8654) acc 78.1250 (80.2500) lr 7.9721e-04 eta 0:32:43 +epoch [115/200] batch [30/31] time 0.706 (0.738) data 0.000 (0.028) loss 0.8076 (0.8599) acc 81.2500 (80.1042) lr 7.9721e-04 eta 0:32:25 +epoch [116/200] batch [5/31] time 0.703 (0.874) data 0.000 (0.151) loss 0.9023 (0.8695) acc 81.2500 (80.0000) lr 7.8186e-04 eta 0:38:19 +epoch [116/200] batch [10/31] time 0.709 (0.794) data 0.000 (0.076) loss 0.8579 (0.9929) acc 84.3750 (79.3750) lr 7.8186e-04 eta 0:34:43 +epoch [116/200] batch [15/31] time 0.723 (0.767) data 0.000 (0.051) loss 1.3428 (0.9562) acc 62.5000 (78.3333) lr 7.8186e-04 eta 0:33:29 +epoch [116/200] batch [20/31] time 0.702 (0.752) data 0.000 (0.038) loss 1.3164 (0.9420) acc 84.3750 (79.8438) lr 7.8186e-04 eta 0:32:47 +epoch [116/200] batch [25/31] time 0.711 (0.744) data 0.000 (0.031) loss 0.8262 (0.9315) acc 87.5000 (80.2500) lr 7.8186e-04 eta 0:32:22 +epoch [116/200] batch [30/31] time 0.706 (0.738) data 0.000 (0.025) loss 1.1455 (0.9465) acc 71.8750 (79.4792) lr 7.8186e-04 eta 0:32:03 +epoch [117/200] batch [5/31] time 0.709 (0.895) data 0.000 (0.170) loss 0.7466 (0.6870) acc 78.1250 (83.1250) lr 7.6655e-04 eta 0:38:46 +epoch [117/200] batch [10/31] time 0.712 (0.803) data 0.000 (0.085) loss 0.5986 (0.7199) acc 90.6250 (82.5000) lr 7.6655e-04 eta 0:34:44 +epoch [117/200] batch [15/31] time 0.709 (0.773) data 0.000 (0.057) loss 1.3799 (0.8590) acc 65.6250 (79.1667) lr 7.6655e-04 eta 0:33:21 +epoch [117/200] batch [20/31] time 0.708 (0.757) data 0.000 (0.043) loss 0.8579 (0.8393) acc 75.0000 (79.6875) lr 7.6655e-04 eta 0:32:35 +epoch [117/200] batch [25/31] time 0.706 (0.747) data 0.000 (0.034) loss 0.7261 (0.8255) acc 81.2500 (80.5000) lr 7.6655e-04 eta 0:32:06 +epoch [117/200] batch [30/31] time 0.706 (0.741) data 0.000 (0.029) loss 1.2998 (0.8647) acc 78.1250 (79.7917) lr 7.6655e-04 eta 0:31:47 +epoch [118/200] batch [5/31] time 0.706 (0.885) data 0.000 (0.161) loss 0.5264 (0.8800) acc 90.6250 (82.5000) lr 7.5131e-04 eta 0:37:51 +epoch [118/200] batch [10/31] time 0.712 (0.801) data 0.000 (0.081) loss 0.4246 (0.9792) acc 90.6250 (79.3750) lr 7.5131e-04 eta 0:34:12 +epoch [118/200] batch [15/31] time 0.729 (0.772) data 0.000 (0.054) loss 1.1465 (0.9652) acc 78.1250 (80.4167) lr 7.5131e-04 eta 0:32:55 +epoch [118/200] batch [20/31] time 0.707 (0.756) data 0.000 (0.040) loss 1.0039 (0.9398) acc 78.1250 (79.8438) lr 7.5131e-04 eta 0:32:09 +epoch [118/200] batch [25/31] time 0.706 (0.747) data 0.000 (0.032) loss 0.8340 (0.9293) acc 81.2500 (79.8750) lr 7.5131e-04 eta 0:31:43 +epoch [118/200] batch [30/31] time 0.721 (0.741) data 0.000 (0.027) loss 0.9048 (0.8971) acc 71.8750 (80.1042) lr 7.5131e-04 eta 0:31:24 +epoch [119/200] batch [5/31] time 0.712 (0.879) data 0.000 (0.158) loss 1.4102 (0.9029) acc 71.8750 (80.0000) lr 7.3613e-04 eta 0:37:10 +epoch [119/200] batch [10/31] time 0.709 (0.810) data 0.000 (0.079) loss 1.1475 (0.9095) acc 68.7500 (80.0000) lr 7.3613e-04 eta 0:34:09 +epoch [119/200] batch [15/31] time 0.717 (0.778) data 0.000 (0.053) loss 1.7012 (1.0045) acc 68.7500 (77.7083) lr 7.3613e-04 eta 0:32:46 +epoch [119/200] batch [20/31] time 0.710 (0.762) data 0.000 (0.040) loss 0.7158 (0.9371) acc 81.2500 (78.9062) lr 7.3613e-04 eta 0:32:00 +epoch [119/200] batch [25/31] time 0.719 (0.751) data 0.000 (0.032) loss 0.8110 (0.9117) acc 71.8750 (79.1250) lr 7.3613e-04 eta 0:31:30 +epoch [119/200] batch [30/31] time 0.709 (0.744) data 0.000 (0.027) loss 0.7002 (0.9244) acc 90.6250 (79.0625) lr 7.3613e-04 eta 0:31:08 +epoch [120/200] batch [5/31] time 0.707 (0.879) data 0.001 (0.159) loss 0.7524 (0.7249) acc 81.2500 (83.1250) lr 7.2101e-04 eta 0:36:41 +epoch [120/200] batch [10/31] time 0.724 (0.796) data 0.000 (0.080) loss 0.7656 (0.7152) acc 75.0000 (81.8750) lr 7.2101e-04 eta 0:33:11 +epoch [120/200] batch [15/31] time 0.708 (0.766) data 0.000 (0.053) loss 1.3818 (0.7984) acc 65.6250 (79.7917) lr 7.2101e-04 eta 0:31:52 +epoch [120/200] batch [20/31] time 0.701 (0.751) data 0.000 (0.040) loss 0.5659 (0.7917) acc 81.2500 (80.3125) lr 7.2101e-04 eta 0:31:11 +epoch [120/200] batch [25/31] time 0.710 (0.742) data 0.000 (0.032) loss 0.6953 (0.8117) acc 81.2500 (80.5000) lr 7.2101e-04 eta 0:30:45 +epoch [120/200] batch [30/31] time 0.708 (0.741) data 0.000 (0.027) loss 0.8037 (0.8270) acc 84.3750 (80.3125) lr 7.2101e-04 eta 0:30:37 +epoch [121/200] batch [5/31] time 0.718 (0.887) data 0.000 (0.162) loss 1.7168 (1.0878) acc 71.8750 (77.5000) lr 7.0596e-04 eta 0:36:34 +epoch [121/200] batch [10/31] time 0.716 (0.799) data 0.000 (0.081) loss 0.9751 (0.8816) acc 78.1250 (81.2500) lr 7.0596e-04 eta 0:32:53 +epoch [121/200] batch [15/31] time 0.715 (0.771) data 0.000 (0.054) loss 0.6060 (0.8621) acc 87.5000 (81.0417) lr 7.0596e-04 eta 0:31:39 +epoch [121/200] batch [20/31] time 0.724 (0.756) data 0.000 (0.041) loss 0.7046 (0.8823) acc 78.1250 (79.8438) lr 7.0596e-04 eta 0:31:00 +epoch [121/200] batch [25/31] time 0.709 (0.748) data 0.000 (0.033) loss 0.4033 (0.8285) acc 93.7500 (80.5000) lr 7.0596e-04 eta 0:30:35 +epoch [121/200] batch [30/31] time 0.704 (0.741) data 0.000 (0.027) loss 0.8760 (0.8657) acc 78.1250 (79.6875) lr 7.0596e-04 eta 0:30:16 +epoch [122/200] batch [5/31] time 0.709 (0.886) data 0.000 (0.159) loss 1.0605 (0.9888) acc 78.1250 (78.7500) lr 6.9098e-04 eta 0:36:04 +epoch [122/200] batch [10/31] time 0.709 (0.801) data 0.000 (0.080) loss 0.5269 (0.8597) acc 87.5000 (82.1875) lr 6.9098e-04 eta 0:32:33 +epoch [122/200] batch [15/31] time 0.713 (0.773) data 0.000 (0.053) loss 0.5337 (0.9237) acc 81.2500 (80.0000) lr 6.9098e-04 eta 0:31:22 +epoch [122/200] batch [20/31] time 0.717 (0.758) data 0.000 (0.040) loss 1.2129 (0.9461) acc 75.0000 (80.1562) lr 6.9098e-04 eta 0:30:40 +epoch [122/200] batch [25/31] time 0.716 (0.749) data 0.000 (0.032) loss 0.8936 (0.9245) acc 75.0000 (79.6250) lr 6.9098e-04 eta 0:30:15 +epoch [122/200] batch [30/31] time 0.710 (0.747) data 0.000 (0.027) loss 0.7773 (0.9151) acc 84.3750 (79.4792) lr 6.9098e-04 eta 0:30:08 +epoch [123/200] batch [5/31] time 0.709 (0.941) data 0.000 (0.223) loss 1.3311 (0.8557) acc 71.8750 (81.2500) lr 6.7608e-04 eta 0:37:50 +epoch [123/200] batch [10/31] time 0.711 (0.829) data 0.000 (0.112) loss 0.8301 (0.7892) acc 81.2500 (82.8125) lr 6.7608e-04 eta 0:33:16 +epoch [123/200] batch [15/31] time 0.710 (0.790) data 0.000 (0.075) loss 0.5889 (0.8648) acc 81.2500 (80.4167) lr 6.7608e-04 eta 0:31:38 +epoch [123/200] batch [20/31] time 0.704 (0.770) data 0.000 (0.056) loss 0.5029 (0.8203) acc 90.6250 (80.6250) lr 6.7608e-04 eta 0:30:45 +epoch [123/200] batch [25/31] time 0.708 (0.758) data 0.000 (0.045) loss 1.0449 (0.8515) acc 84.3750 (80.2500) lr 6.7608e-04 eta 0:30:14 +epoch [123/200] batch [30/31] time 0.710 (0.750) data 0.000 (0.037) loss 1.0742 (0.8531) acc 81.2500 (80.4167) lr 6.7608e-04 eta 0:29:51 +epoch [124/200] batch [5/31] time 0.712 (0.885) data 0.000 (0.155) loss 0.8442 (0.8339) acc 78.1250 (78.7500) lr 6.6126e-04 eta 0:35:08 +epoch [124/200] batch [10/31] time 0.723 (0.805) data 0.000 (0.078) loss 0.9033 (0.7936) acc 78.1250 (80.6250) lr 6.6126e-04 eta 0:31:52 +epoch [124/200] batch [15/31] time 0.710 (0.773) data 0.000 (0.052) loss 0.4939 (0.7576) acc 87.5000 (82.5000) lr 6.6126e-04 eta 0:30:34 +epoch [124/200] batch [20/31] time 0.713 (0.757) data 0.000 (0.039) loss 0.5376 (0.7611) acc 84.3750 (82.1875) lr 6.6126e-04 eta 0:29:52 +epoch [124/200] batch [25/31] time 0.710 (0.748) data 0.000 (0.031) loss 0.8569 (0.7822) acc 81.2500 (82.1250) lr 6.6126e-04 eta 0:29:27 +epoch [124/200] batch [30/31] time 0.710 (0.743) data 0.000 (0.026) loss 0.8169 (0.8008) acc 87.5000 (81.9792) lr 6.6126e-04 eta 0:29:11 +epoch [125/200] batch [5/31] time 0.707 (0.884) data 0.000 (0.169) loss 1.1934 (1.0067) acc 75.0000 (79.3750) lr 6.4653e-04 eta 0:34:38 +epoch [125/200] batch [10/31] time 0.727 (0.801) data 0.000 (0.085) loss 0.6904 (0.9094) acc 87.5000 (80.9375) lr 6.4653e-04 eta 0:31:18 +epoch [125/200] batch [15/31] time 0.713 (0.772) data 0.000 (0.057) loss 0.6475 (0.9250) acc 81.2500 (79.7917) lr 6.4653e-04 eta 0:30:07 +epoch [125/200] batch [20/31] time 0.711 (0.755) data 0.000 (0.043) loss 0.8418 (0.9101) acc 78.1250 (80.3125) lr 6.4653e-04 eta 0:29:24 +epoch [125/200] batch [25/31] time 0.725 (0.746) data 0.000 (0.034) loss 1.0293 (0.8867) acc 71.8750 (80.7500) lr 6.4653e-04 eta 0:28:58 +epoch [125/200] batch [30/31] time 0.711 (0.740) data 0.000 (0.028) loss 0.8784 (0.8893) acc 87.5000 (81.1458) lr 6.4653e-04 eta 0:28:40 +epoch [126/200] batch [5/31] time 0.709 (0.893) data 0.000 (0.165) loss 0.4905 (0.7195) acc 87.5000 (85.0000) lr 6.3188e-04 eta 0:34:31 +epoch [126/200] batch [10/31] time 0.708 (0.803) data 0.000 (0.083) loss 0.6636 (0.7906) acc 78.1250 (83.4375) lr 6.3188e-04 eta 0:30:57 +epoch [126/200] batch [15/31] time 0.713 (0.782) data 0.000 (0.055) loss 0.7407 (0.8089) acc 78.1250 (82.0833) lr 6.3188e-04 eta 0:30:06 +epoch [126/200] batch [20/31] time 0.710 (0.764) data 0.000 (0.042) loss 0.5312 (0.8436) acc 90.6250 (81.8750) lr 6.3188e-04 eta 0:29:21 +epoch [126/200] batch [25/31] time 0.708 (0.753) data 0.000 (0.033) loss 0.5410 (0.8129) acc 87.5000 (82.2500) lr 6.3188e-04 eta 0:28:52 +epoch [126/200] batch [30/31] time 0.709 (0.746) data 0.000 (0.028) loss 0.4597 (0.8301) acc 93.7500 (81.9792) lr 6.3188e-04 eta 0:28:31 +epoch [127/200] batch [5/31] time 0.717 (0.893) data 0.000 (0.170) loss 0.4519 (0.8018) acc 93.7500 (84.3750) lr 6.1732e-04 eta 0:34:03 +epoch [127/200] batch [10/31] time 0.723 (0.806) data 0.000 (0.085) loss 0.3691 (0.7308) acc 90.6250 (83.4375) lr 6.1732e-04 eta 0:30:41 +epoch [127/200] batch [15/31] time 0.707 (0.776) data 0.000 (0.057) loss 1.3340 (0.8141) acc 78.1250 (81.0417) lr 6.1732e-04 eta 0:29:29 +epoch [127/200] batch [20/31] time 0.709 (0.761) data 0.000 (0.043) loss 1.4033 (0.8925) acc 75.0000 (79.3750) lr 6.1732e-04 eta 0:28:50 +epoch [127/200] batch [25/31] time 0.720 (0.751) data 0.000 (0.034) loss 0.5269 (0.8787) acc 90.6250 (80.2500) lr 6.1732e-04 eta 0:28:24 +epoch [127/200] batch [30/31] time 0.706 (0.744) data 0.000 (0.029) loss 0.6987 (0.8982) acc 81.2500 (79.3750) lr 6.1732e-04 eta 0:28:05 +epoch [128/200] batch [5/31] time 0.714 (0.896) data 0.001 (0.177) loss 1.3340 (0.7607) acc 78.1250 (85.6250) lr 6.0285e-04 eta 0:33:42 +epoch [128/200] batch [10/31] time 0.729 (0.804) data 0.000 (0.089) loss 0.9683 (0.7669) acc 78.1250 (83.1250) lr 6.0285e-04 eta 0:30:12 +epoch [128/200] batch [15/31] time 0.708 (0.772) data 0.001 (0.059) loss 0.4866 (0.7080) acc 90.6250 (84.7917) lr 6.0285e-04 eta 0:28:56 +epoch [128/200] batch [20/31] time 0.711 (0.757) data 0.000 (0.045) loss 0.4321 (0.7763) acc 93.7500 (83.5938) lr 6.0285e-04 eta 0:28:18 +epoch [128/200] batch [25/31] time 0.706 (0.748) data 0.000 (0.036) loss 1.0332 (0.7507) acc 65.6250 (83.7500) lr 6.0285e-04 eta 0:27:53 +epoch [128/200] batch [30/31] time 0.710 (0.742) data 0.000 (0.030) loss 0.9712 (0.7680) acc 75.0000 (83.2292) lr 6.0285e-04 eta 0:27:37 +epoch [129/200] batch [5/31] time 0.705 (0.919) data 0.000 (0.169) loss 0.7197 (0.7680) acc 90.6250 (85.0000) lr 5.8849e-04 eta 0:34:06 +epoch [129/200] batch [10/31] time 0.709 (0.815) data 0.000 (0.085) loss 0.8311 (0.8151) acc 78.1250 (81.8750) lr 5.8849e-04 eta 0:30:09 +epoch [129/200] batch [15/31] time 0.711 (0.779) data 0.000 (0.057) loss 0.3257 (0.8188) acc 93.7500 (81.8750) lr 5.8849e-04 eta 0:28:47 +epoch [129/200] batch [20/31] time 0.722 (0.763) data 0.000 (0.043) loss 0.4265 (0.7872) acc 96.8750 (82.8125) lr 5.8849e-04 eta 0:28:07 +epoch [129/200] batch [25/31] time 0.719 (0.753) data 0.000 (0.034) loss 1.0850 (0.8208) acc 75.0000 (81.2500) lr 5.8849e-04 eta 0:27:40 +epoch [129/200] batch [30/31] time 0.708 (0.745) data 0.000 (0.028) loss 0.7002 (0.8042) acc 84.3750 (81.9792) lr 5.8849e-04 eta 0:27:21 +epoch [130/200] batch [5/31] time 0.713 (0.886) data 0.000 (0.163) loss 0.6802 (0.7865) acc 78.1250 (78.7500) lr 5.7422e-04 eta 0:32:25 +epoch [130/200] batch [10/31] time 0.710 (0.798) data 0.000 (0.081) loss 0.4912 (0.6768) acc 87.5000 (81.8750) lr 5.7422e-04 eta 0:29:08 +epoch [130/200] batch [15/31] time 0.717 (0.770) data 0.001 (0.054) loss 0.3135 (0.6513) acc 90.6250 (83.1250) lr 5.7422e-04 eta 0:28:02 +epoch [130/200] batch [20/31] time 0.710 (0.756) data 0.000 (0.041) loss 1.2588 (0.7352) acc 81.2500 (82.8125) lr 5.7422e-04 eta 0:27:27 +epoch [130/200] batch [25/31] time 0.706 (0.753) data 0.000 (0.033) loss 0.6157 (0.7232) acc 90.6250 (83.2500) lr 5.7422e-04 eta 0:27:17 +epoch [130/200] batch [30/31] time 0.709 (0.746) data 0.000 (0.027) loss 0.7041 (0.7305) acc 78.1250 (83.0208) lr 5.7422e-04 eta 0:26:59 +epoch [131/200] batch [5/31] time 0.707 (0.888) data 0.000 (0.167) loss 0.6045 (0.5813) acc 87.5000 (85.0000) lr 5.6006e-04 eta 0:32:02 +epoch [131/200] batch [10/31] time 0.713 (0.800) data 0.000 (0.084) loss 0.9224 (0.6878) acc 75.0000 (83.4375) lr 5.6006e-04 eta 0:28:47 +epoch [131/200] batch [15/31] time 0.721 (0.771) data 0.000 (0.056) loss 0.6582 (0.7081) acc 78.1250 (83.1250) lr 5.6006e-04 eta 0:27:42 +epoch [131/200] batch [20/31] time 0.713 (0.755) data 0.000 (0.042) loss 0.5791 (0.7404) acc 90.6250 (82.3438) lr 5.6006e-04 eta 0:27:04 +epoch [131/200] batch [25/31] time 0.706 (0.746) data 0.000 (0.034) loss 1.1211 (0.7427) acc 75.0000 (82.2500) lr 5.6006e-04 eta 0:26:39 +epoch [131/200] batch [30/31] time 0.709 (0.739) data 0.000 (0.028) loss 0.7515 (0.7653) acc 84.3750 (82.0833) lr 5.6006e-04 eta 0:26:22 +epoch [132/200] batch [5/31] time 0.710 (0.885) data 0.000 (0.165) loss 0.7451 (0.8187) acc 71.8750 (78.7500) lr 5.4601e-04 eta 0:31:29 +epoch [132/200] batch [10/31] time 0.729 (0.800) data 0.000 (0.082) loss 0.4861 (0.6950) acc 87.5000 (83.4375) lr 5.4601e-04 eta 0:28:23 +epoch [132/200] batch [15/31] time 0.709 (0.769) data 0.000 (0.055) loss 0.8745 (0.7558) acc 81.2500 (82.0833) lr 5.4601e-04 eta 0:27:14 +epoch [132/200] batch [20/31] time 0.708 (0.762) data 0.000 (0.041) loss 1.1172 (0.7498) acc 75.0000 (82.5000) lr 5.4601e-04 eta 0:26:54 +epoch [132/200] batch [25/31] time 0.705 (0.752) data 0.000 (0.033) loss 1.0264 (0.7554) acc 78.1250 (83.0000) lr 5.4601e-04 eta 0:26:28 +epoch [132/200] batch [30/31] time 0.715 (0.745) data 0.000 (0.028) loss 0.8638 (0.7589) acc 75.0000 (82.2917) lr 5.4601e-04 eta 0:26:10 +epoch [133/200] batch [5/31] time 0.711 (0.896) data 0.000 (0.176) loss 0.7612 (0.7155) acc 87.5000 (83.7500) lr 5.3207e-04 eta 0:31:24 +epoch [133/200] batch [10/31] time 0.708 (0.805) data 0.000 (0.088) loss 0.6738 (0.8229) acc 81.2500 (80.6250) lr 5.3207e-04 eta 0:28:07 +epoch [133/200] batch [15/31] time 0.711 (0.773) data 0.000 (0.059) loss 0.7886 (0.7805) acc 81.2500 (81.6667) lr 5.3207e-04 eta 0:26:57 +epoch [133/200] batch [20/31] time 0.706 (0.757) data 0.000 (0.044) loss 0.6084 (0.7483) acc 78.1250 (82.0312) lr 5.3207e-04 eta 0:26:19 +epoch [133/200] batch [25/31] time 0.717 (0.748) data 0.000 (0.035) loss 0.5889 (0.7329) acc 81.2500 (82.7500) lr 5.3207e-04 eta 0:25:57 +epoch [133/200] batch [30/31] time 0.710 (0.741) data 0.000 (0.030) loss 0.5645 (0.6990) acc 90.6250 (84.1667) lr 5.3207e-04 eta 0:25:39 +epoch [134/200] batch [5/31] time 0.717 (0.888) data 0.000 (0.160) loss 0.6597 (1.0743) acc 87.5000 (76.8750) lr 5.1825e-04 eta 0:30:40 +epoch [134/200] batch [10/31] time 0.708 (0.802) data 0.000 (0.080) loss 0.5659 (0.9038) acc 84.3750 (80.0000) lr 5.1825e-04 eta 0:27:38 +epoch [134/200] batch [15/31] time 0.709 (0.771) data 0.000 (0.054) loss 0.9692 (0.9455) acc 75.0000 (78.9583) lr 5.1825e-04 eta 0:26:29 +epoch [134/200] batch [20/31] time 0.714 (0.755) data 0.000 (0.040) loss 0.6812 (0.9193) acc 87.5000 (80.1562) lr 5.1825e-04 eta 0:25:52 +epoch [134/200] batch [25/31] time 0.721 (0.746) data 0.000 (0.032) loss 0.9790 (0.8904) acc 75.0000 (80.6250) lr 5.1825e-04 eta 0:25:31 +epoch [134/200] batch [30/31] time 0.721 (0.740) data 0.000 (0.027) loss 1.0742 (0.8595) acc 78.1250 (81.3542) lr 5.1825e-04 eta 0:25:15 +epoch [135/200] batch [5/31] time 0.713 (0.886) data 0.000 (0.163) loss 0.8076 (0.6327) acc 81.2500 (86.8750) lr 5.0454e-04 eta 0:30:08 +epoch [135/200] batch [10/31] time 0.712 (0.801) data 0.000 (0.082) loss 0.6172 (0.6496) acc 84.3750 (85.9375) lr 5.0454e-04 eta 0:27:11 +epoch [135/200] batch [15/31] time 0.720 (0.781) data 0.000 (0.055) loss 0.9360 (0.6737) acc 75.0000 (85.2083) lr 5.0454e-04 eta 0:26:27 +epoch [135/200] batch [20/31] time 0.721 (0.767) data 0.000 (0.041) loss 0.6260 (0.6850) acc 87.5000 (84.8438) lr 5.0454e-04 eta 0:25:53 +epoch [135/200] batch [25/31] time 0.710 (0.755) data 0.000 (0.033) loss 0.5498 (0.7128) acc 90.6250 (84.2500) lr 5.0454e-04 eta 0:25:26 +epoch [135/200] batch [30/31] time 0.707 (0.747) data 0.000 (0.027) loss 0.4429 (0.7244) acc 87.5000 (83.7500) lr 5.0454e-04 eta 0:25:06 +epoch [136/200] batch [5/31] time 0.710 (0.892) data 0.000 (0.166) loss 0.6274 (0.6294) acc 90.6250 (85.6250) lr 4.9096e-04 eta 0:29:53 +epoch [136/200] batch [10/31] time 0.722 (0.803) data 0.001 (0.083) loss 0.5039 (0.7121) acc 87.5000 (85.6250) lr 4.9096e-04 eta 0:26:50 +epoch [136/200] batch [15/31] time 0.711 (0.773) data 0.000 (0.056) loss 0.6265 (0.8088) acc 81.2500 (82.7083) lr 4.9096e-04 eta 0:25:45 +epoch [136/200] batch [20/31] time 0.709 (0.757) data 0.000 (0.042) loss 0.6060 (0.7548) acc 90.6250 (83.5938) lr 4.9096e-04 eta 0:25:10 +epoch [136/200] batch [25/31] time 0.708 (0.747) data 0.000 (0.033) loss 0.8379 (0.7523) acc 81.2500 (83.1250) lr 4.9096e-04 eta 0:24:47 +epoch [136/200] batch [30/31] time 0.716 (0.741) data 0.000 (0.028) loss 0.9170 (0.7462) acc 78.1250 (83.2292) lr 4.9096e-04 eta 0:24:31 +epoch [137/200] batch [5/31] time 0.724 (0.875) data 0.000 (0.154) loss 0.9336 (1.0090) acc 68.7500 (76.2500) lr 4.7750e-04 eta 0:28:52 +epoch [137/200] batch [10/31] time 0.712 (0.793) data 0.000 (0.077) loss 0.7373 (0.9229) acc 81.2500 (78.4375) lr 4.7750e-04 eta 0:26:05 +epoch [137/200] batch [15/31] time 0.720 (0.768) data 0.000 (0.052) loss 0.6304 (0.8812) acc 81.2500 (78.9583) lr 4.7750e-04 eta 0:25:12 +epoch [137/200] batch [20/31] time 0.705 (0.753) data 0.000 (0.039) loss 0.7139 (0.8245) acc 78.1250 (79.8438) lr 4.7750e-04 eta 0:24:37 +epoch [137/200] batch [25/31] time 0.708 (0.744) data 0.000 (0.031) loss 0.6255 (0.7917) acc 87.5000 (80.8750) lr 4.7750e-04 eta 0:24:17 +epoch [137/200] batch [30/31] time 0.721 (0.739) data 0.000 (0.026) loss 0.5977 (0.7807) acc 81.2500 (81.2500) lr 4.7750e-04 eta 0:24:04 +epoch [138/200] batch [5/31] time 0.710 (0.885) data 0.000 (0.161) loss 0.7207 (0.7713) acc 81.2500 (83.1250) lr 4.6417e-04 eta 0:28:43 +epoch [138/200] batch [10/31] time 0.718 (0.803) data 0.000 (0.081) loss 0.7368 (0.7942) acc 78.1250 (81.2500) lr 4.6417e-04 eta 0:26:00 +epoch [138/200] batch [15/31] time 0.710 (0.773) data 0.000 (0.054) loss 0.7593 (0.7611) acc 84.3750 (82.7083) lr 4.6417e-04 eta 0:24:58 +epoch [138/200] batch [20/31] time 0.719 (0.759) data 0.000 (0.040) loss 0.7456 (0.7111) acc 81.2500 (82.9688) lr 4.6417e-04 eta 0:24:26 +epoch [138/200] batch [25/31] time 0.707 (0.749) data 0.000 (0.032) loss 0.5342 (0.7263) acc 90.6250 (83.1250) lr 4.6417e-04 eta 0:24:04 +epoch [138/200] batch [30/31] time 0.718 (0.744) data 0.000 (0.027) loss 0.7881 (0.7357) acc 81.2500 (82.9167) lr 4.6417e-04 eta 0:23:49 +epoch [139/200] batch [5/31] time 0.709 (0.889) data 0.000 (0.168) loss 0.8774 (0.7444) acc 78.1250 (84.3750) lr 4.5098e-04 eta 0:28:24 +epoch [139/200] batch [10/31] time 0.710 (0.801) data 0.000 (0.084) loss 0.7817 (0.7551) acc 84.3750 (84.6875) lr 4.5098e-04 eta 0:25:32 +epoch [139/200] batch [15/31] time 0.708 (0.772) data 0.000 (0.056) loss 0.7358 (0.7448) acc 90.6250 (85.0000) lr 4.5098e-04 eta 0:24:31 +epoch [139/200] batch [20/31] time 0.709 (0.756) data 0.000 (0.042) loss 0.6406 (0.7686) acc 87.5000 (83.7500) lr 4.5098e-04 eta 0:23:58 +epoch [139/200] batch [25/31] time 0.704 (0.747) data 0.000 (0.034) loss 1.0176 (0.7654) acc 75.0000 (83.6250) lr 4.5098e-04 eta 0:23:36 +epoch [139/200] batch [30/31] time 0.706 (0.741) data 0.000 (0.028) loss 0.9102 (0.7875) acc 81.2500 (83.4375) lr 4.5098e-04 eta 0:23:21 +epoch [140/200] batch [5/31] time 0.708 (0.967) data 0.000 (0.250) loss 0.5303 (0.6862) acc 90.6250 (83.7500) lr 4.3792e-04 eta 0:30:23 +epoch [140/200] batch [10/31] time 0.723 (0.841) data 0.001 (0.125) loss 0.6069 (0.7175) acc 81.2500 (81.8750) lr 4.3792e-04 eta 0:26:22 +epoch [140/200] batch [15/31] time 0.718 (0.809) data 0.000 (0.084) loss 0.7222 (0.7306) acc 81.2500 (81.4583) lr 4.3792e-04 eta 0:25:17 +epoch [140/200] batch [20/31] time 0.711 (0.784) data 0.000 (0.063) loss 0.7563 (0.7185) acc 78.1250 (82.1875) lr 4.3792e-04 eta 0:24:27 +epoch [140/200] batch [25/31] time 0.708 (0.769) data 0.000 (0.050) loss 0.4158 (0.7073) acc 87.5000 (82.1250) lr 4.3792e-04 eta 0:23:54 +epoch [140/200] batch [30/31] time 0.708 (0.759) data 0.000 (0.042) loss 0.8799 (0.7245) acc 87.5000 (82.2917) lr 4.3792e-04 eta 0:23:32 +epoch [141/200] batch [5/31] time 0.714 (0.882) data 0.000 (0.160) loss 0.4824 (0.6285) acc 90.6250 (87.5000) lr 4.2499e-04 eta 0:27:16 +epoch [141/200] batch [10/31] time 0.715 (0.797) data 0.000 (0.080) loss 1.0283 (0.6881) acc 84.3750 (86.8750) lr 4.2499e-04 eta 0:24:34 +epoch [141/200] batch [15/31] time 0.717 (0.768) data 0.000 (0.053) loss 0.4399 (0.7278) acc 90.6250 (85.8333) lr 4.2499e-04 eta 0:23:37 +epoch [141/200] batch [20/31] time 0.706 (0.755) data 0.000 (0.040) loss 0.8813 (0.7822) acc 81.2500 (83.5938) lr 4.2499e-04 eta 0:23:09 +epoch [141/200] batch [25/31] time 0.712 (0.746) data 0.000 (0.032) loss 0.9580 (0.7745) acc 84.3750 (84.0000) lr 4.2499e-04 eta 0:22:48 +epoch [141/200] batch [30/31] time 0.714 (0.740) data 0.000 (0.027) loss 0.5210 (0.7843) acc 90.6250 (83.9583) lr 4.2499e-04 eta 0:22:34 +epoch [142/200] batch [5/31] time 0.713 (0.919) data 0.001 (0.171) loss 0.9365 (0.9032) acc 78.1250 (78.7500) lr 4.1221e-04 eta 0:27:55 +epoch [142/200] batch [10/31] time 0.716 (0.817) data 0.000 (0.085) loss 0.8550 (0.8764) acc 81.2500 (78.1250) lr 4.1221e-04 eta 0:24:45 +epoch [142/200] batch [15/31] time 0.717 (0.783) data 0.000 (0.057) loss 0.5098 (0.8957) acc 81.2500 (77.0833) lr 4.1221e-04 eta 0:23:40 +epoch [142/200] batch [20/31] time 0.712 (0.765) data 0.000 (0.043) loss 0.8535 (0.8964) acc 81.2500 (77.3438) lr 4.1221e-04 eta 0:23:04 +epoch [142/200] batch [25/31] time 0.704 (0.753) data 0.000 (0.034) loss 0.6929 (0.8832) acc 78.1250 (77.8750) lr 4.1221e-04 eta 0:22:38 +epoch [142/200] batch [30/31] time 0.727 (0.746) data 0.000 (0.029) loss 1.2158 (0.8832) acc 78.1250 (77.9167) lr 4.1221e-04 eta 0:22:22 +epoch [143/200] batch [5/31] time 0.704 (0.913) data 0.000 (0.187) loss 0.6523 (0.8952) acc 87.5000 (81.2500) lr 3.9958e-04 eta 0:27:16 +epoch [143/200] batch [10/31] time 0.714 (0.814) data 0.000 (0.094) loss 0.6851 (0.8061) acc 90.6250 (83.1250) lr 3.9958e-04 eta 0:24:16 +epoch [143/200] batch [15/31] time 0.707 (0.780) data 0.000 (0.063) loss 0.9956 (0.8056) acc 78.1250 (83.5417) lr 3.9958e-04 eta 0:23:09 +epoch [143/200] batch [20/31] time 0.707 (0.762) data 0.000 (0.047) loss 0.7646 (0.8212) acc 81.2500 (82.8125) lr 3.9958e-04 eta 0:22:35 +epoch [143/200] batch [25/31] time 0.708 (0.752) data 0.000 (0.038) loss 0.8325 (0.7981) acc 81.2500 (82.7500) lr 3.9958e-04 eta 0:22:12 +epoch [143/200] batch [30/31] time 0.706 (0.745) data 0.000 (0.031) loss 0.3728 (0.7487) acc 87.5000 (83.3333) lr 3.9958e-04 eta 0:21:57 +epoch [144/200] batch [5/31] time 0.706 (0.898) data 0.000 (0.179) loss 0.9883 (0.7090) acc 78.1250 (81.8750) lr 3.8709e-04 eta 0:26:22 +epoch [144/200] batch [10/31] time 0.721 (0.822) data 0.000 (0.090) loss 0.5962 (0.7476) acc 81.2500 (82.1875) lr 3.8709e-04 eta 0:24:04 +epoch [144/200] batch [15/31] time 0.710 (0.785) data 0.000 (0.060) loss 0.5571 (0.7532) acc 90.6250 (82.5000) lr 3.8709e-04 eta 0:22:55 +epoch [144/200] batch [20/31] time 0.716 (0.766) data 0.000 (0.045) loss 0.8379 (0.7497) acc 81.2500 (82.6562) lr 3.8709e-04 eta 0:22:18 +epoch [144/200] batch [25/31] time 0.714 (0.755) data 0.000 (0.036) loss 0.9331 (0.7851) acc 75.0000 (82.1250) lr 3.8709e-04 eta 0:21:55 +epoch [144/200] batch [30/31] time 0.708 (0.748) data 0.000 (0.030) loss 0.7402 (0.8093) acc 84.3750 (81.2500) lr 3.8709e-04 eta 0:21:38 +epoch [145/200] batch [5/31] time 0.708 (0.888) data 0.000 (0.171) loss 0.5063 (0.7838) acc 84.3750 (82.5000) lr 3.7476e-04 eta 0:25:36 +epoch [145/200] batch [10/31] time 0.710 (0.803) data 0.000 (0.086) loss 0.6602 (0.7728) acc 84.3750 (81.8750) lr 3.7476e-04 eta 0:23:05 +epoch [145/200] batch [15/31] time 0.713 (0.774) data 0.000 (0.057) loss 0.7842 (0.8065) acc 81.2500 (82.0833) lr 3.7476e-04 eta 0:22:12 +epoch [145/200] batch [20/31] time 0.712 (0.759) data 0.000 (0.043) loss 1.1182 (0.8445) acc 81.2500 (81.4062) lr 3.7476e-04 eta 0:21:41 +epoch [145/200] batch [25/31] time 0.707 (0.749) data 0.000 (0.034) loss 1.0869 (0.8354) acc 78.1250 (81.8750) lr 3.7476e-04 eta 0:21:21 +epoch [145/200] batch [30/31] time 0.720 (0.743) data 0.000 (0.029) loss 0.8506 (0.8260) acc 81.2500 (81.6667) lr 3.7476e-04 eta 0:21:08 +epoch [146/200] batch [5/31] time 0.705 (0.881) data 0.000 (0.164) loss 0.4995 (0.7398) acc 90.6250 (84.3750) lr 3.6258e-04 eta 0:24:56 +epoch [146/200] batch [10/31] time 0.709 (0.794) data 0.000 (0.082) loss 0.8110 (0.7793) acc 81.2500 (83.7500) lr 3.6258e-04 eta 0:22:26 +epoch [146/200] batch [15/31] time 0.713 (0.766) data 0.000 (0.055) loss 0.8813 (0.8173) acc 81.2500 (83.5417) lr 3.6258e-04 eta 0:21:33 +epoch [146/200] batch [20/31] time 0.703 (0.751) data 0.000 (0.041) loss 0.3318 (0.7570) acc 96.8750 (84.6875) lr 3.6258e-04 eta 0:21:06 +epoch [146/200] batch [25/31] time 0.718 (0.744) data 0.000 (0.033) loss 1.2432 (0.7576) acc 75.0000 (84.2500) lr 3.6258e-04 eta 0:20:49 +epoch [146/200] batch [30/31] time 0.705 (0.738) data 0.000 (0.028) loss 1.0596 (0.7518) acc 87.5000 (84.5833) lr 3.6258e-04 eta 0:20:35 +epoch [147/200] batch [5/31] time 0.712 (0.896) data 0.000 (0.172) loss 0.9902 (0.8390) acc 84.3750 (83.7500) lr 3.5055e-04 eta 0:24:56 +epoch [147/200] batch [10/31] time 0.707 (0.805) data 0.000 (0.086) loss 0.8306 (0.8851) acc 71.8750 (80.6250) lr 3.5055e-04 eta 0:22:18 +epoch [147/200] batch [15/31] time 0.713 (0.774) data 0.000 (0.058) loss 0.3115 (0.8207) acc 93.7500 (82.0833) lr 3.5055e-04 eta 0:21:24 +epoch [147/200] batch [20/31] time 0.713 (0.760) data 0.000 (0.043) loss 0.6611 (0.8154) acc 84.3750 (81.8750) lr 3.5055e-04 eta 0:20:56 +epoch [147/200] batch [25/31] time 0.720 (0.751) data 0.000 (0.035) loss 0.9800 (0.8296) acc 75.0000 (81.5000) lr 3.5055e-04 eta 0:20:37 +epoch [147/200] batch [30/31] time 0.708 (0.744) data 0.000 (0.029) loss 1.0469 (0.8306) acc 71.8750 (80.9375) lr 3.5055e-04 eta 0:20:23 +epoch [148/200] batch [5/31] time 0.709 (0.893) data 0.000 (0.175) loss 0.6470 (0.9816) acc 87.5000 (80.0000) lr 3.3869e-04 eta 0:24:22 +epoch [148/200] batch [10/31] time 0.719 (0.805) data 0.000 (0.088) loss 0.7012 (0.8833) acc 84.3750 (80.9375) lr 3.3869e-04 eta 0:21:54 +epoch [148/200] batch [15/31] time 0.722 (0.775) data 0.000 (0.058) loss 0.7339 (0.7904) acc 87.5000 (82.5000) lr 3.3869e-04 eta 0:21:01 +epoch [148/200] batch [20/31] time 0.709 (0.758) data 0.000 (0.044) loss 1.1650 (0.8040) acc 68.7500 (82.1875) lr 3.3869e-04 eta 0:20:30 +epoch [148/200] batch [25/31] time 0.703 (0.748) data 0.000 (0.035) loss 0.8145 (0.7799) acc 84.3750 (82.6250) lr 3.3869e-04 eta 0:20:10 +epoch [148/200] batch [30/31] time 0.718 (0.742) data 0.000 (0.029) loss 0.7642 (0.8014) acc 87.5000 (82.7083) lr 3.3869e-04 eta 0:19:56 +epoch [149/200] batch [5/31] time 0.706 (0.891) data 0.000 (0.169) loss 0.4456 (0.7868) acc 84.3750 (83.1250) lr 3.2699e-04 eta 0:23:51 +epoch [149/200] batch [10/31] time 0.710 (0.804) data 0.000 (0.084) loss 0.4470 (0.7217) acc 90.6250 (83.4375) lr 3.2699e-04 eta 0:21:28 +epoch [149/200] batch [15/31] time 0.713 (0.775) data 0.000 (0.056) loss 0.7969 (0.7517) acc 90.6250 (84.5833) lr 3.2699e-04 eta 0:20:36 +epoch [149/200] batch [20/31] time 0.709 (0.758) data 0.000 (0.042) loss 1.1094 (0.8001) acc 75.0000 (82.8125) lr 3.2699e-04 eta 0:20:07 +epoch [149/200] batch [25/31] time 0.708 (0.749) data 0.000 (0.034) loss 0.8276 (0.7867) acc 87.5000 (82.8750) lr 3.2699e-04 eta 0:19:48 +epoch [149/200] batch [30/31] time 0.707 (0.742) data 0.000 (0.028) loss 0.2795 (0.8010) acc 96.8750 (83.0208) lr 3.2699e-04 eta 0:19:33 +epoch [150/200] batch [5/31] time 0.707 (0.893) data 0.000 (0.177) loss 0.9199 (0.8400) acc 81.2500 (82.5000) lr 3.1545e-04 eta 0:23:27 +epoch [150/200] batch [10/31] time 0.710 (0.817) data 0.000 (0.089) loss 0.5200 (0.7820) acc 87.5000 (81.5625) lr 3.1545e-04 eta 0:21:23 +epoch [150/200] batch [15/31] time 0.709 (0.782) data 0.000 (0.059) loss 0.3137 (0.7540) acc 93.7500 (82.7083) lr 3.1545e-04 eta 0:20:24 +epoch [150/200] batch [20/31] time 0.718 (0.764) data 0.000 (0.044) loss 0.7573 (0.7288) acc 81.2500 (83.4375) lr 3.1545e-04 eta 0:19:52 +epoch [150/200] batch [25/31] time 0.704 (0.753) data 0.000 (0.036) loss 0.5415 (0.7392) acc 87.5000 (83.2500) lr 3.1545e-04 eta 0:19:31 +epoch [150/200] batch [30/31] time 0.712 (0.746) data 0.000 (0.030) loss 0.6733 (0.7642) acc 87.5000 (82.8125) lr 3.1545e-04 eta 0:19:16 +epoch [151/200] batch [5/31] time 0.713 (0.886) data 0.000 (0.164) loss 1.3779 (0.9138) acc 71.8750 (80.0000) lr 3.0409e-04 eta 0:22:48 +epoch [151/200] batch [10/31] time 0.707 (0.799) data 0.000 (0.082) loss 0.5625 (0.8505) acc 90.6250 (82.8125) lr 3.0409e-04 eta 0:20:30 +epoch [151/200] batch [15/31] time 0.715 (0.770) data 0.000 (0.055) loss 1.1816 (0.8875) acc 78.1250 (81.2500) lr 3.0409e-04 eta 0:19:41 +epoch [151/200] batch [20/31] time 0.704 (0.755) data 0.000 (0.041) loss 0.7256 (0.8461) acc 84.3750 (82.5000) lr 3.0409e-04 eta 0:19:15 +epoch [151/200] batch [25/31] time 0.717 (0.747) data 0.000 (0.033) loss 1.1748 (0.8231) acc 71.8750 (82.8750) lr 3.0409e-04 eta 0:18:59 +epoch [151/200] batch [30/31] time 0.723 (0.746) data 0.000 (0.028) loss 0.7568 (0.8003) acc 81.2500 (83.0208) lr 3.0409e-04 eta 0:18:54 +epoch [152/200] batch [5/31] time 0.720 (0.896) data 0.000 (0.172) loss 0.5308 (0.6998) acc 87.5000 (85.6250) lr 2.9289e-04 eta 0:22:37 +epoch [152/200] batch [10/31] time 0.712 (0.807) data 0.000 (0.086) loss 0.4775 (0.6575) acc 84.3750 (85.9375) lr 2.9289e-04 eta 0:20:17 +epoch [152/200] batch [15/31] time 0.713 (0.775) data 0.000 (0.058) loss 1.0615 (0.7639) acc 71.8750 (83.7500) lr 2.9289e-04 eta 0:19:25 +epoch [152/200] batch [20/31] time 0.706 (0.758) data 0.000 (0.043) loss 0.8311 (0.7937) acc 87.5000 (84.0625) lr 2.9289e-04 eta 0:18:56 +epoch [152/200] batch [25/31] time 0.709 (0.748) data 0.000 (0.035) loss 0.7720 (0.7923) acc 78.1250 (83.8750) lr 2.9289e-04 eta 0:18:37 +epoch [152/200] batch [30/31] time 0.709 (0.742) data 0.000 (0.029) loss 0.8188 (0.8179) acc 87.5000 (83.4375) lr 2.9289e-04 eta 0:18:24 +epoch [153/200] batch [5/31] time 0.709 (0.910) data 0.000 (0.190) loss 0.7334 (0.6516) acc 87.5000 (86.8750) lr 2.8187e-04 eta 0:22:29 +epoch [153/200] batch [10/31] time 0.711 (0.812) data 0.000 (0.095) loss 0.3291 (0.6452) acc 90.6250 (85.0000) lr 2.8187e-04 eta 0:19:59 +epoch [153/200] batch [15/31] time 0.710 (0.778) data 0.000 (0.064) loss 1.0508 (0.6558) acc 78.1250 (84.7917) lr 2.8187e-04 eta 0:19:05 +epoch [153/200] batch [20/31] time 0.713 (0.761) data 0.000 (0.048) loss 1.2666 (0.7268) acc 75.0000 (84.2188) lr 2.8187e-04 eta 0:18:37 +epoch [153/200] batch [25/31] time 0.709 (0.752) data 0.000 (0.038) loss 0.7324 (0.7676) acc 84.3750 (83.3750) lr 2.8187e-04 eta 0:18:19 +epoch [153/200] batch [30/31] time 0.707 (0.749) data 0.000 (0.032) loss 0.4900 (0.7633) acc 90.6250 (83.2292) lr 2.8187e-04 eta 0:18:12 +epoch [154/200] batch [5/31] time 0.708 (0.890) data 0.000 (0.170) loss 0.5723 (0.7344) acc 78.1250 (80.0000) lr 2.7103e-04 eta 0:21:32 +epoch [154/200] batch [10/31] time 0.726 (0.803) data 0.000 (0.085) loss 0.9170 (0.7400) acc 78.1250 (81.8750) lr 2.7103e-04 eta 0:19:21 +epoch [154/200] batch [15/31] time 0.711 (0.773) data 0.000 (0.057) loss 1.2354 (0.7440) acc 71.8750 (82.5000) lr 2.7103e-04 eta 0:18:34 +epoch [154/200] batch [20/31] time 0.705 (0.757) data 0.000 (0.043) loss 0.3677 (0.7395) acc 87.5000 (82.0312) lr 2.7103e-04 eta 0:18:07 +epoch [154/200] batch [25/31] time 0.705 (0.747) data 0.000 (0.034) loss 1.0889 (0.7503) acc 75.0000 (81.8750) lr 2.7103e-04 eta 0:17:49 +epoch [154/200] batch [30/31] time 0.726 (0.742) data 0.000 (0.029) loss 0.9521 (0.7635) acc 78.1250 (81.9792) lr 2.7103e-04 eta 0:17:38 +epoch [155/200] batch [5/31] time 0.718 (0.880) data 0.000 (0.161) loss 0.4561 (0.6539) acc 90.6250 (86.8750) lr 2.6037e-04 eta 0:20:50 +epoch [155/200] batch [10/31] time 0.710 (0.795) data 0.000 (0.080) loss 0.7222 (0.7280) acc 87.5000 (85.0000) lr 2.6037e-04 eta 0:18:45 +epoch [155/200] batch [15/31] time 0.715 (0.768) data 0.000 (0.054) loss 0.8403 (0.6949) acc 87.5000 (85.2083) lr 2.6037e-04 eta 0:18:04 +epoch [155/200] batch [20/31] time 0.713 (0.755) data 0.000 (0.040) loss 0.5664 (0.6637) acc 87.5000 (85.6250) lr 2.6037e-04 eta 0:17:41 +epoch [155/200] batch [25/31] time 0.711 (0.746) data 0.000 (0.032) loss 0.5522 (0.6971) acc 81.2500 (83.8750) lr 2.6037e-04 eta 0:17:25 +epoch [155/200] batch [30/31] time 0.707 (0.741) data 0.000 (0.027) loss 0.2332 (0.7008) acc 93.7500 (83.9583) lr 2.6037e-04 eta 0:17:13 +epoch [156/200] batch [5/31] time 0.720 (0.890) data 0.000 (0.162) loss 0.5552 (0.7265) acc 87.5000 (85.6250) lr 2.4989e-04 eta 0:20:36 +epoch [156/200] batch [10/31] time 0.710 (0.800) data 0.000 (0.081) loss 0.3604 (0.7428) acc 93.7500 (84.6875) lr 2.4989e-04 eta 0:18:27 +epoch [156/200] batch [15/31] time 0.712 (0.770) data 0.001 (0.054) loss 0.6655 (0.7286) acc 87.5000 (84.7917) lr 2.4989e-04 eta 0:17:42 +epoch [156/200] batch [20/31] time 0.710 (0.755) data 0.000 (0.041) loss 1.3955 (0.7239) acc 71.8750 (84.2188) lr 2.4989e-04 eta 0:17:17 +epoch [156/200] batch [25/31] time 0.736 (0.747) data 0.000 (0.033) loss 0.9209 (0.7382) acc 87.5000 (84.5000) lr 2.4989e-04 eta 0:17:03 +epoch [156/200] batch [30/31] time 0.708 (0.741) data 0.000 (0.027) loss 0.6807 (0.7498) acc 84.3750 (83.7500) lr 2.4989e-04 eta 0:16:51 +epoch [157/200] batch [5/31] time 0.709 (0.887) data 0.000 (0.165) loss 0.7002 (0.7662) acc 84.3750 (80.0000) lr 2.3959e-04 eta 0:20:04 +epoch [157/200] batch [10/31] time 0.721 (0.804) data 0.000 (0.083) loss 0.9302 (0.7529) acc 81.2500 (81.5625) lr 2.3959e-04 eta 0:18:07 +epoch [157/200] batch [15/31] time 0.711 (0.782) data 0.000 (0.055) loss 0.6323 (0.7382) acc 84.3750 (83.1250) lr 2.3959e-04 eta 0:17:34 +epoch [157/200] batch [20/31] time 0.709 (0.764) data 0.000 (0.041) loss 0.3779 (0.7118) acc 81.2500 (83.4375) lr 2.3959e-04 eta 0:17:06 +epoch [157/200] batch [25/31] time 0.708 (0.754) data 0.000 (0.033) loss 0.9956 (0.7238) acc 68.7500 (83.5000) lr 2.3959e-04 eta 0:16:49 +epoch [157/200] batch [30/31] time 0.702 (0.746) data 0.000 (0.028) loss 0.7524 (0.7172) acc 87.5000 (83.9583) lr 2.3959e-04 eta 0:16:34 +epoch [158/200] batch [5/31] time 0.702 (0.902) data 0.000 (0.181) loss 0.7183 (0.6450) acc 84.3750 (86.2500) lr 2.2949e-04 eta 0:19:57 +epoch [158/200] batch [10/31] time 0.710 (0.809) data 0.000 (0.091) loss 0.8096 (0.8016) acc 81.2500 (82.5000) lr 2.2949e-04 eta 0:17:49 +epoch [158/200] batch [15/31] time 0.714 (0.778) data 0.000 (0.061) loss 0.5322 (0.7897) acc 84.3750 (82.7083) lr 2.2949e-04 eta 0:17:05 +epoch [158/200] batch [20/31] time 0.723 (0.762) data 0.000 (0.046) loss 0.3035 (0.7638) acc 96.8750 (82.9688) lr 2.2949e-04 eta 0:16:40 +epoch [158/200] batch [25/31] time 0.709 (0.753) data 0.000 (0.037) loss 0.8306 (0.7514) acc 81.2500 (83.0000) lr 2.2949e-04 eta 0:16:25 +epoch [158/200] batch [30/31] time 0.728 (0.747) data 0.000 (0.031) loss 0.7417 (0.7451) acc 84.3750 (83.2292) lr 2.2949e-04 eta 0:16:13 +epoch [159/200] batch [5/31] time 0.712 (0.888) data 0.000 (0.166) loss 0.6099 (0.8009) acc 87.5000 (79.3750) lr 2.1957e-04 eta 0:19:11 +epoch [159/200] batch [10/31] time 0.707 (0.801) data 0.000 (0.083) loss 0.9351 (0.9036) acc 78.1250 (78.4375) lr 2.1957e-04 eta 0:17:15 +epoch [159/200] batch [15/31] time 0.713 (0.772) data 0.000 (0.056) loss 1.0234 (0.8938) acc 84.3750 (80.2083) lr 2.1957e-04 eta 0:16:33 +epoch [159/200] batch [20/31] time 0.714 (0.757) data 0.000 (0.042) loss 0.6763 (0.8399) acc 81.2500 (81.0938) lr 2.1957e-04 eta 0:16:10 +epoch [159/200] batch [25/31] time 0.718 (0.748) data 0.000 (0.033) loss 1.1406 (0.8321) acc 71.8750 (81.7500) lr 2.1957e-04 eta 0:15:55 +epoch [159/200] batch [30/31] time 0.710 (0.743) data 0.000 (0.028) loss 0.4480 (0.8365) acc 93.7500 (81.7708) lr 2.1957e-04 eta 0:15:45 +epoch [160/200] batch [5/31] time 0.721 (0.974) data 0.000 (0.230) loss 1.1064 (0.9741) acc 81.2500 (78.7500) lr 2.0984e-04 eta 0:20:32 +epoch [160/200] batch [10/31] time 0.709 (0.844) data 0.000 (0.115) loss 0.3464 (0.8076) acc 93.7500 (81.5625) lr 2.0984e-04 eta 0:17:44 +epoch [160/200] batch [15/31] time 0.712 (0.800) data 0.000 (0.077) loss 0.7217 (0.7915) acc 81.2500 (82.0833) lr 2.0984e-04 eta 0:16:44 +epoch [160/200] batch [20/31] time 0.723 (0.779) data 0.000 (0.058) loss 0.6758 (0.7290) acc 84.3750 (83.7500) lr 2.0984e-04 eta 0:16:14 +epoch [160/200] batch [25/31] time 0.714 (0.766) data 0.000 (0.046) loss 1.0752 (0.7449) acc 75.0000 (83.5000) lr 2.0984e-04 eta 0:15:54 +epoch [160/200] batch [30/31] time 0.706 (0.757) data 0.000 (0.039) loss 0.3545 (0.7223) acc 90.6250 (83.6458) lr 2.0984e-04 eta 0:15:39 +epoch [161/200] batch [5/31] time 0.712 (0.889) data 0.000 (0.169) loss 0.7754 (0.7020) acc 81.2500 (83.7500) lr 2.0032e-04 eta 0:18:17 +epoch [161/200] batch [10/31] time 0.726 (0.803) data 0.000 (0.085) loss 0.7056 (0.6918) acc 84.3750 (84.3750) lr 2.0032e-04 eta 0:16:28 +epoch [161/200] batch [15/31] time 0.720 (0.773) data 0.000 (0.057) loss 0.9883 (0.7706) acc 71.8750 (81.6667) lr 2.0032e-04 eta 0:15:46 +epoch [161/200] batch [20/31] time 0.714 (0.757) data 0.000 (0.043) loss 0.3914 (0.7628) acc 90.6250 (82.8125) lr 2.0032e-04 eta 0:15:23 +epoch [161/200] batch [25/31] time 0.709 (0.754) data 0.000 (0.034) loss 0.9048 (0.7916) acc 75.0000 (82.0000) lr 2.0032e-04 eta 0:15:16 +epoch [161/200] batch [30/31] time 0.712 (0.747) data 0.000 (0.028) loss 1.0518 (0.7997) acc 81.2500 (82.1875) lr 2.0032e-04 eta 0:15:03 +epoch [162/200] batch [5/31] time 0.709 (0.880) data 0.000 (0.160) loss 0.7173 (0.6419) acc 75.0000 (83.1250) lr 1.9098e-04 eta 0:17:39 +epoch [162/200] batch [10/31] time 0.714 (0.798) data 0.000 (0.080) loss 0.6074 (0.6458) acc 90.6250 (85.6250) lr 1.9098e-04 eta 0:15:56 +epoch [162/200] batch [15/31] time 0.712 (0.769) data 0.000 (0.054) loss 0.7480 (0.6962) acc 84.3750 (84.3750) lr 1.9098e-04 eta 0:15:18 +epoch [162/200] batch [20/31] time 0.711 (0.756) data 0.000 (0.040) loss 0.2284 (0.6782) acc 100.0000 (85.3125) lr 1.9098e-04 eta 0:14:58 +epoch [162/200] batch [25/31] time 0.710 (0.747) data 0.000 (0.032) loss 0.5322 (0.6896) acc 90.6250 (85.3750) lr 1.9098e-04 eta 0:14:44 +epoch [162/200] batch [30/31] time 0.716 (0.741) data 0.000 (0.027) loss 0.5117 (0.6956) acc 87.5000 (84.8958) lr 1.9098e-04 eta 0:14:33 +epoch [163/200] batch [5/31] time 0.721 (0.907) data 0.000 (0.181) loss 0.5151 (0.6531) acc 87.5000 (85.0000) lr 1.8185e-04 eta 0:17:44 +epoch [163/200] batch [10/31] time 0.706 (0.810) data 0.000 (0.091) loss 0.3574 (0.6857) acc 87.5000 (84.3750) lr 1.8185e-04 eta 0:15:45 +epoch [163/200] batch [15/31] time 0.713 (0.778) data 0.000 (0.061) loss 0.3323 (0.6344) acc 90.6250 (86.4583) lr 1.8185e-04 eta 0:15:04 +epoch [163/200] batch [20/31] time 0.722 (0.769) data 0.000 (0.046) loss 0.2250 (0.6518) acc 96.8750 (85.9375) lr 1.8185e-04 eta 0:14:50 +epoch [163/200] batch [25/31] time 0.721 (0.759) data 0.000 (0.037) loss 1.1719 (0.7060) acc 71.8750 (84.2500) lr 1.8185e-04 eta 0:14:35 +epoch [163/200] batch [30/31] time 0.716 (0.751) data 0.000 (0.030) loss 1.0342 (0.7695) acc 75.0000 (82.9167) lr 1.8185e-04 eta 0:14:22 +epoch [164/200] batch [5/31] time 0.710 (0.889) data 0.000 (0.163) loss 0.9893 (0.6941) acc 71.8750 (83.1250) lr 1.7292e-04 eta 0:16:55 +epoch [164/200] batch [10/31] time 0.723 (0.804) data 0.000 (0.082) loss 0.3174 (0.6652) acc 96.8750 (84.0625) lr 1.7292e-04 eta 0:15:13 +epoch [164/200] batch [15/31] time 0.710 (0.774) data 0.000 (0.055) loss 0.8340 (0.7143) acc 87.5000 (85.0000) lr 1.7292e-04 eta 0:14:35 +epoch [164/200] batch [20/31] time 0.712 (0.758) data 0.000 (0.041) loss 0.9321 (0.7298) acc 75.0000 (84.2188) lr 1.7292e-04 eta 0:14:13 +epoch [164/200] batch [25/31] time 0.713 (0.749) data 0.000 (0.033) loss 0.9868 (0.7311) acc 71.8750 (84.0000) lr 1.7292e-04 eta 0:13:59 +epoch [164/200] batch [30/31] time 0.711 (0.742) data 0.000 (0.027) loss 1.0029 (0.7180) acc 75.0000 (83.6458) lr 1.7292e-04 eta 0:13:48 +epoch [165/200] batch [5/31] time 0.723 (0.885) data 0.000 (0.159) loss 1.1270 (0.5989) acc 78.1250 (85.6250) lr 1.6419e-04 eta 0:16:23 +epoch [165/200] batch [10/31] time 0.707 (0.797) data 0.000 (0.079) loss 0.3904 (0.6206) acc 90.6250 (86.2500) lr 1.6419e-04 eta 0:14:42 +epoch [165/200] batch [15/31] time 0.707 (0.769) data 0.000 (0.053) loss 0.5381 (0.6114) acc 84.3750 (86.0417) lr 1.6419e-04 eta 0:14:06 +epoch [165/200] batch [20/31] time 0.706 (0.754) data 0.000 (0.040) loss 0.5264 (0.6195) acc 90.6250 (85.7812) lr 1.6419e-04 eta 0:13:45 +epoch [165/200] batch [25/31] time 0.708 (0.746) data 0.000 (0.032) loss 0.9541 (0.6577) acc 87.5000 (85.8750) lr 1.6419e-04 eta 0:13:33 +epoch [165/200] batch [30/31] time 0.711 (0.740) data 0.000 (0.027) loss 0.8999 (0.6627) acc 78.1250 (85.2083) lr 1.6419e-04 eta 0:13:23 +epoch [166/200] batch [5/31] time 0.727 (0.908) data 0.000 (0.178) loss 0.6431 (0.6479) acc 87.5000 (85.6250) lr 1.5567e-04 eta 0:16:20 +epoch [166/200] batch [10/31] time 0.720 (0.813) data 0.000 (0.089) loss 0.6597 (0.7063) acc 81.2500 (84.6875) lr 1.5567e-04 eta 0:14:34 +epoch [166/200] batch [15/31] time 0.721 (0.790) data 0.000 (0.059) loss 0.9824 (0.7189) acc 75.0000 (83.9583) lr 1.5567e-04 eta 0:14:05 +epoch [166/200] batch [20/31] time 0.713 (0.772) data 0.000 (0.045) loss 0.2350 (0.7274) acc 96.8750 (84.6875) lr 1.5567e-04 eta 0:13:42 +epoch [166/200] batch [25/31] time 0.714 (0.761) data 0.000 (0.036) loss 0.9185 (0.7424) acc 75.0000 (83.7500) lr 1.5567e-04 eta 0:13:27 +epoch [166/200] batch [30/31] time 0.710 (0.754) data 0.000 (0.030) loss 0.8872 (0.7580) acc 78.1250 (83.3333) lr 1.5567e-04 eta 0:13:15 +epoch [167/200] batch [5/31] time 0.727 (0.892) data 0.000 (0.165) loss 1.1631 (0.7402) acc 75.0000 (83.1250) lr 1.4736e-04 eta 0:15:35 +epoch [167/200] batch [10/31] time 0.712 (0.801) data 0.000 (0.083) loss 0.7285 (0.7742) acc 87.5000 (82.1875) lr 1.4736e-04 eta 0:13:56 +epoch [167/200] batch [15/31] time 0.716 (0.771) data 0.000 (0.055) loss 0.9111 (0.8280) acc 78.1250 (81.8750) lr 1.4736e-04 eta 0:13:21 +epoch [167/200] batch [20/31] time 0.710 (0.756) data 0.000 (0.041) loss 0.7861 (0.8332) acc 78.1250 (81.5625) lr 1.4736e-04 eta 0:13:02 +epoch [167/200] batch [25/31] time 0.718 (0.748) data 0.000 (0.033) loss 1.0820 (0.8323) acc 81.2500 (82.2500) lr 1.4736e-04 eta 0:12:49 +epoch [167/200] batch [30/31] time 0.723 (0.742) data 0.000 (0.028) loss 0.3718 (0.8166) acc 93.7500 (82.6042) lr 1.4736e-04 eta 0:12:40 +epoch [168/200] batch [5/31] time 0.712 (0.881) data 0.000 (0.159) loss 0.7549 (0.7868) acc 84.3750 (80.6250) lr 1.3926e-04 eta 0:14:57 +epoch [168/200] batch [10/31] time 0.727 (0.801) data 0.000 (0.080) loss 0.4021 (0.7368) acc 90.6250 (82.5000) lr 1.3926e-04 eta 0:13:31 +epoch [168/200] batch [15/31] time 0.721 (0.772) data 0.000 (0.053) loss 0.7031 (0.7374) acc 81.2500 (82.2917) lr 1.3926e-04 eta 0:12:58 +epoch [168/200] batch [20/31] time 0.721 (0.758) data 0.000 (0.040) loss 0.8193 (0.7505) acc 87.5000 (82.3438) lr 1.3926e-04 eta 0:12:40 +epoch [168/200] batch [25/31] time 0.706 (0.748) data 0.000 (0.032) loss 1.0947 (0.7849) acc 78.1250 (81.8750) lr 1.3926e-04 eta 0:12:26 +epoch [168/200] batch [30/31] time 0.709 (0.742) data 0.000 (0.027) loss 1.2314 (0.7875) acc 71.8750 (82.0833) lr 1.3926e-04 eta 0:12:16 +epoch [169/200] batch [5/31] time 0.709 (0.896) data 0.000 (0.172) loss 0.7622 (0.9365) acc 87.5000 (83.7500) lr 1.3137e-04 eta 0:14:44 +epoch [169/200] batch [10/31] time 0.725 (0.809) data 0.000 (0.086) loss 0.5522 (0.8743) acc 84.3750 (83.7500) lr 1.3137e-04 eta 0:13:14 +epoch [169/200] batch [15/31] time 0.712 (0.778) data 0.000 (0.058) loss 0.8970 (0.8509) acc 84.3750 (83.1250) lr 1.3137e-04 eta 0:12:40 +epoch [169/200] batch [20/31] time 0.712 (0.762) data 0.000 (0.043) loss 1.1074 (0.8220) acc 78.1250 (83.2812) lr 1.3137e-04 eta 0:12:20 +epoch [169/200] batch [25/31] time 0.707 (0.751) data 0.000 (0.035) loss 0.9678 (0.8263) acc 78.1250 (82.7500) lr 1.3137e-04 eta 0:12:06 +epoch [169/200] batch [30/31] time 0.711 (0.745) data 0.000 (0.029) loss 0.6836 (0.8025) acc 81.2500 (83.1250) lr 1.3137e-04 eta 0:11:57 +epoch [170/200] batch [5/31] time 0.720 (0.914) data 0.000 (0.190) loss 0.9355 (0.6228) acc 75.0000 (83.7500) lr 1.2369e-04 eta 0:14:34 +epoch [170/200] batch [10/31] time 0.708 (0.813) data 0.000 (0.095) loss 1.0625 (0.6800) acc 75.0000 (83.7500) lr 1.2369e-04 eta 0:12:53 +epoch [170/200] batch [15/31] time 0.712 (0.781) data 0.000 (0.064) loss 0.5527 (0.6917) acc 90.6250 (83.9583) lr 1.2369e-04 eta 0:12:18 +epoch [170/200] batch [20/31] time 0.705 (0.763) data 0.000 (0.048) loss 0.6792 (0.6840) acc 84.3750 (84.0625) lr 1.2369e-04 eta 0:11:58 +epoch [170/200] batch [25/31] time 0.707 (0.752) data 0.000 (0.038) loss 0.6089 (0.6727) acc 90.6250 (84.2500) lr 1.2369e-04 eta 0:11:44 +epoch [170/200] batch [30/31] time 0.707 (0.745) data 0.000 (0.032) loss 0.8750 (0.6848) acc 81.2500 (84.2708) lr 1.2369e-04 eta 0:11:33 +epoch [171/200] batch [5/31] time 0.707 (0.951) data 0.000 (0.233) loss 0.8682 (0.7141) acc 78.1250 (83.1250) lr 1.1623e-04 eta 0:14:39 +epoch [171/200] batch [10/31] time 0.713 (0.830) data 0.000 (0.117) loss 0.9106 (0.7387) acc 84.3750 (82.8125) lr 1.1623e-04 eta 0:12:43 +epoch [171/200] batch [15/31] time 0.709 (0.805) data 0.000 (0.078) loss 0.4377 (0.7098) acc 90.6250 (84.3750) lr 1.1623e-04 eta 0:12:16 +epoch [171/200] batch [20/31] time 0.714 (0.782) data 0.000 (0.059) loss 0.7397 (0.7179) acc 84.3750 (84.6875) lr 1.1623e-04 eta 0:11:51 +epoch [171/200] batch [25/31] time 0.706 (0.767) data 0.000 (0.047) loss 0.8989 (0.7356) acc 75.0000 (84.2500) lr 1.1623e-04 eta 0:11:34 +epoch [171/200] batch [30/31] time 0.706 (0.757) data 0.000 (0.039) loss 0.6738 (0.7535) acc 81.2500 (83.8542) lr 1.1623e-04 eta 0:11:21 +epoch [172/200] batch [5/31] time 0.707 (1.095) data 0.000 (0.366) loss 0.7764 (0.6868) acc 81.2500 (85.6250) lr 1.0899e-04 eta 0:16:19 +epoch [172/200] batch [10/31] time 0.710 (0.905) data 0.000 (0.183) loss 0.9141 (0.8467) acc 81.2500 (82.8125) lr 1.0899e-04 eta 0:13:24 +epoch [172/200] batch [15/31] time 0.710 (0.840) data 0.000 (0.122) loss 0.5669 (0.8248) acc 90.6250 (82.9167) lr 1.0899e-04 eta 0:12:22 +epoch [172/200] batch [20/31] time 0.704 (0.807) data 0.000 (0.092) loss 0.6890 (0.7838) acc 84.3750 (83.7500) lr 1.0899e-04 eta 0:11:49 +epoch [172/200] batch [25/31] time 0.708 (0.787) data 0.000 (0.073) loss 0.4180 (0.7978) acc 87.5000 (83.2500) lr 1.0899e-04 eta 0:11:27 +epoch [172/200] batch [30/31] time 0.706 (0.774) data 0.000 (0.061) loss 0.5747 (0.7525) acc 84.3750 (84.3750) lr 1.0899e-04 eta 0:11:12 +epoch [173/200] batch [5/31] time 0.726 (0.915) data 0.000 (0.164) loss 0.9473 (0.6766) acc 84.3750 (88.7500) lr 1.0197e-04 eta 0:13:09 +epoch [173/200] batch [10/31] time 0.704 (0.813) data 0.000 (0.082) loss 0.4839 (0.6791) acc 93.7500 (87.8125) lr 1.0197e-04 eta 0:11:37 +epoch [173/200] batch [15/31] time 0.708 (0.781) data 0.000 (0.055) loss 0.6240 (0.6329) acc 87.5000 (87.0833) lr 1.0197e-04 eta 0:11:06 +epoch [173/200] batch [20/31] time 0.701 (0.763) data 0.000 (0.041) loss 0.8071 (0.6929) acc 90.6250 (86.0938) lr 1.0197e-04 eta 0:10:46 +epoch [173/200] batch [25/31] time 0.711 (0.752) data 0.000 (0.033) loss 0.8525 (0.7220) acc 84.3750 (85.5000) lr 1.0197e-04 eta 0:10:33 +epoch [173/200] batch [30/31] time 0.707 (0.744) data 0.000 (0.027) loss 0.6992 (0.7455) acc 87.5000 (85.2083) lr 1.0197e-04 eta 0:10:23 +epoch [174/200] batch [5/31] time 0.713 (0.904) data 0.000 (0.177) loss 0.5889 (0.5899) acc 84.3750 (86.2500) lr 9.5173e-05 eta 0:12:31 +epoch [174/200] batch [10/31] time 0.718 (0.809) data 0.000 (0.089) loss 1.0439 (0.6407) acc 71.8750 (84.6875) lr 9.5173e-05 eta 0:11:09 +epoch [174/200] batch [15/31] time 0.710 (0.778) data 0.000 (0.059) loss 0.6797 (0.6547) acc 81.2500 (83.5417) lr 9.5173e-05 eta 0:10:39 +epoch [174/200] batch [20/31] time 0.706 (0.761) data 0.000 (0.045) loss 0.5308 (0.7214) acc 90.6250 (82.9688) lr 9.5173e-05 eta 0:10:21 +epoch [174/200] batch [25/31] time 0.705 (0.750) data 0.000 (0.036) loss 0.9131 (0.7112) acc 78.1250 (83.2500) lr 9.5173e-05 eta 0:10:09 +epoch [174/200] batch [30/31] time 0.703 (0.742) data 0.000 (0.030) loss 0.6318 (0.6944) acc 78.1250 (83.4375) lr 9.5173e-05 eta 0:09:59 +epoch [175/200] batch [5/31] time 0.711 (0.893) data 0.000 (0.168) loss 1.3535 (0.8475) acc 75.0000 (83.1250) lr 8.8597e-05 eta 0:11:55 +epoch [175/200] batch [10/31] time 0.723 (0.819) data 0.000 (0.084) loss 0.8730 (0.8052) acc 84.3750 (83.7500) lr 8.8597e-05 eta 0:10:51 +epoch [175/200] batch [15/31] time 0.709 (0.784) data 0.000 (0.056) loss 1.3203 (0.7945) acc 78.1250 (84.1667) lr 8.8597e-05 eta 0:10:19 +epoch [175/200] batch [20/31] time 0.711 (0.766) data 0.000 (0.042) loss 0.5601 (0.7716) acc 81.2500 (83.9062) lr 8.8597e-05 eta 0:10:01 +epoch [175/200] batch [25/31] time 0.707 (0.754) data 0.000 (0.034) loss 0.2041 (0.7307) acc 100.0000 (84.6250) lr 8.8597e-05 eta 0:09:48 +epoch [175/200] batch [30/31] time 0.710 (0.746) data 0.000 (0.028) loss 0.5229 (0.7209) acc 84.3750 (84.7917) lr 8.8597e-05 eta 0:09:39 +epoch [176/200] batch [5/31] time 0.711 (0.892) data 0.000 (0.171) loss 0.5933 (0.6273) acc 81.2500 (83.1250) lr 8.2245e-05 eta 0:11:26 +epoch [176/200] batch [10/31] time 0.706 (0.803) data 0.000 (0.085) loss 0.8521 (0.7238) acc 84.3750 (83.7500) lr 8.2245e-05 eta 0:10:13 +epoch [176/200] batch [15/31] time 0.711 (0.772) data 0.000 (0.057) loss 0.5288 (0.7037) acc 87.5000 (83.9583) lr 8.2245e-05 eta 0:09:46 +epoch [176/200] batch [20/31] time 0.707 (0.757) data 0.000 (0.043) loss 1.0830 (0.7051) acc 68.7500 (83.1250) lr 8.2245e-05 eta 0:09:31 +epoch [176/200] batch [25/31] time 0.710 (0.747) data 0.000 (0.034) loss 0.5859 (0.6891) acc 87.5000 (83.6250) lr 8.2245e-05 eta 0:09:20 +epoch [176/200] batch [30/31] time 0.705 (0.741) data 0.000 (0.029) loss 0.9746 (0.7306) acc 71.8750 (82.5000) lr 8.2245e-05 eta 0:09:11 +epoch [177/200] batch [5/31] time 0.704 (0.948) data 0.000 (0.230) loss 0.6235 (0.7865) acc 78.1250 (79.3750) lr 7.6120e-05 eta 0:11:40 +epoch [177/200] batch [10/31] time 0.726 (0.832) data 0.000 (0.115) loss 1.0576 (0.9145) acc 78.1250 (80.3125) lr 7.6120e-05 eta 0:10:10 +epoch [177/200] batch [15/31] time 0.706 (0.791) data 0.000 (0.077) loss 0.5371 (0.9074) acc 81.2500 (80.2083) lr 7.6120e-05 eta 0:09:36 +epoch [177/200] batch [20/31] time 0.706 (0.771) data 0.000 (0.058) loss 1.2861 (0.8944) acc 81.2500 (81.5625) lr 7.6120e-05 eta 0:09:18 +epoch [177/200] batch [25/31] time 0.714 (0.758) data 0.000 (0.046) loss 0.2400 (0.8582) acc 100.0000 (82.3750) lr 7.6120e-05 eta 0:09:05 +epoch [177/200] batch [30/31] time 0.709 (0.750) data 0.000 (0.038) loss 1.0859 (0.8640) acc 78.1250 (82.2917) lr 7.6120e-05 eta 0:08:55 +epoch [178/200] batch [5/31] time 0.708 (0.884) data 0.000 (0.162) loss 0.6978 (0.7887) acc 87.5000 (84.3750) lr 7.0224e-05 eta 0:10:25 +epoch [178/200] batch [10/31] time 0.708 (0.798) data 0.000 (0.081) loss 0.9556 (0.7823) acc 75.0000 (80.9375) lr 7.0224e-05 eta 0:09:20 +epoch [178/200] batch [15/31] time 0.726 (0.769) data 0.000 (0.054) loss 1.5508 (0.7649) acc 75.0000 (82.9167) lr 7.0224e-05 eta 0:08:56 +epoch [178/200] batch [20/31] time 0.704 (0.754) data 0.000 (0.041) loss 0.9370 (0.7779) acc 78.1250 (82.1875) lr 7.0224e-05 eta 0:08:42 +epoch [178/200] batch [25/31] time 0.723 (0.744) data 0.000 (0.033) loss 0.4270 (0.7633) acc 90.6250 (82.3750) lr 7.0224e-05 eta 0:08:32 +epoch [178/200] batch [30/31] time 0.703 (0.739) data 0.000 (0.027) loss 0.4819 (0.7459) acc 84.3750 (82.5000) lr 7.0224e-05 eta 0:08:24 +epoch [179/200] batch [5/31] time 0.715 (0.891) data 0.000 (0.169) loss 1.0449 (0.6051) acc 71.8750 (86.8750) lr 6.4556e-05 eta 0:10:03 +epoch [179/200] batch [10/31] time 0.707 (0.802) data 0.000 (0.084) loss 1.0186 (0.6132) acc 75.0000 (87.1875) lr 6.4556e-05 eta 0:08:58 +epoch [179/200] batch [15/31] time 0.710 (0.772) data 0.000 (0.056) loss 0.5142 (0.5865) acc 87.5000 (88.3333) lr 6.4556e-05 eta 0:08:34 +epoch [179/200] batch [20/31] time 0.705 (0.756) data 0.000 (0.042) loss 0.3127 (0.5819) acc 90.6250 (87.6562) lr 6.4556e-05 eta 0:08:20 +epoch [179/200] batch [25/31] time 0.705 (0.747) data 0.000 (0.034) loss 0.4829 (0.6154) acc 87.5000 (86.7500) lr 6.4556e-05 eta 0:08:10 +epoch [179/200] batch [30/31] time 0.706 (0.740) data 0.000 (0.028) loss 0.5454 (0.6125) acc 87.5000 (86.9792) lr 6.4556e-05 eta 0:08:02 +epoch [180/200] batch [5/31] time 0.709 (0.882) data 0.000 (0.162) loss 0.8843 (0.7483) acc 84.3750 (84.3750) lr 5.9119e-05 eta 0:09:29 +epoch [180/200] batch [10/31] time 0.707 (0.797) data 0.000 (0.081) loss 1.2412 (0.8760) acc 71.8750 (81.5625) lr 5.9119e-05 eta 0:08:30 +epoch [180/200] batch [15/31] time 0.710 (0.769) data 0.000 (0.054) loss 0.4678 (0.7663) acc 87.5000 (83.5417) lr 5.9119e-05 eta 0:08:09 +epoch [180/200] batch [20/31] time 0.709 (0.754) data 0.000 (0.041) loss 0.7690 (0.7433) acc 84.3750 (84.3750) lr 5.9119e-05 eta 0:07:55 +epoch [180/200] batch [25/31] time 0.709 (0.745) data 0.000 (0.033) loss 0.9785 (0.7228) acc 75.0000 (84.2500) lr 5.9119e-05 eta 0:07:46 +epoch [180/200] batch [30/31] time 0.713 (0.740) data 0.000 (0.027) loss 1.1162 (0.7600) acc 75.0000 (83.4375) lr 5.9119e-05 eta 0:07:39 +epoch [181/200] batch [5/31] time 0.704 (0.878) data 0.000 (0.165) loss 0.3230 (0.7510) acc 90.6250 (81.8750) lr 5.3915e-05 eta 0:09:00 +epoch [181/200] batch [10/31] time 0.711 (0.811) data 0.000 (0.083) loss 0.9717 (0.8182) acc 81.2500 (81.2500) lr 5.3915e-05 eta 0:08:14 +epoch [181/200] batch [15/31] time 0.710 (0.779) data 0.000 (0.055) loss 1.0273 (0.7685) acc 75.0000 (82.5000) lr 5.3915e-05 eta 0:07:51 +epoch [181/200] batch [20/31] time 0.704 (0.761) data 0.000 (0.041) loss 0.6636 (0.7521) acc 81.2500 (82.6562) lr 5.3915e-05 eta 0:07:36 +epoch [181/200] batch [25/31] time 0.711 (0.751) data 0.000 (0.033) loss 0.4961 (0.7790) acc 87.5000 (82.3750) lr 5.3915e-05 eta 0:07:26 +epoch [181/200] batch [30/31] time 0.705 (0.745) data 0.000 (0.028) loss 0.6982 (0.7715) acc 93.7500 (82.6042) lr 5.3915e-05 eta 0:07:19 +epoch [182/200] batch [5/31] time 0.714 (0.903) data 0.000 (0.176) loss 0.6030 (0.8054) acc 87.5000 (83.1250) lr 4.8943e-05 eta 0:08:47 +epoch [182/200] batch [10/31] time 0.722 (0.807) data 0.000 (0.088) loss 0.8579 (0.7450) acc 78.1250 (83.4375) lr 4.8943e-05 eta 0:07:47 +epoch [182/200] batch [15/31] time 0.708 (0.774) data 0.000 (0.059) loss 0.8613 (0.7418) acc 84.3750 (84.1667) lr 4.8943e-05 eta 0:07:24 +epoch [182/200] batch [20/31] time 0.709 (0.759) data 0.000 (0.044) loss 0.7881 (0.7205) acc 84.3750 (84.5312) lr 4.8943e-05 eta 0:07:12 +epoch [182/200] batch [25/31] time 0.707 (0.750) data 0.000 (0.036) loss 0.5376 (0.6979) acc 84.3750 (84.8750) lr 4.8943e-05 eta 0:07:03 +epoch [182/200] batch [30/31] time 0.709 (0.748) data 0.000 (0.030) loss 0.7100 (0.7050) acc 84.3750 (85.0000) lr 4.8943e-05 eta 0:06:58 +epoch [183/200] batch [5/31] time 0.723 (0.883) data 0.000 (0.160) loss 0.6924 (0.7314) acc 84.3750 (82.5000) lr 4.4207e-05 eta 0:08:08 +epoch [183/200] batch [10/31] time 0.713 (0.798) data 0.000 (0.080) loss 1.0693 (0.7697) acc 78.1250 (81.8750) lr 4.4207e-05 eta 0:07:17 +epoch [183/200] batch [15/31] time 0.709 (0.772) data 0.000 (0.054) loss 0.8525 (0.7725) acc 81.2500 (82.2917) lr 4.4207e-05 eta 0:06:59 +epoch [183/200] batch [20/31] time 0.712 (0.757) data 0.000 (0.040) loss 0.7964 (0.7530) acc 81.2500 (82.1875) lr 4.4207e-05 eta 0:06:47 +epoch [183/200] batch [25/31] time 0.715 (0.748) data 0.000 (0.032) loss 1.0820 (0.7428) acc 78.1250 (82.8750) lr 4.4207e-05 eta 0:06:38 +epoch [183/200] batch [30/31] time 0.712 (0.742) data 0.000 (0.027) loss 1.0068 (0.7723) acc 75.0000 (82.6042) lr 4.4207e-05 eta 0:06:31 +epoch [184/200] batch [5/31] time 0.715 (0.894) data 0.000 (0.168) loss 0.5234 (0.8359) acc 87.5000 (85.0000) lr 3.9706e-05 eta 0:07:46 +epoch [184/200] batch [10/31] time 0.714 (0.805) data 0.000 (0.084) loss 0.5420 (0.7664) acc 87.5000 (85.0000) lr 3.9706e-05 eta 0:06:56 +epoch [184/200] batch [15/31] time 0.717 (0.775) data 0.000 (0.056) loss 0.8696 (0.7473) acc 84.3750 (84.3750) lr 3.9706e-05 eta 0:06:36 +epoch [184/200] batch [20/31] time 0.717 (0.759) data 0.000 (0.042) loss 0.3762 (0.7119) acc 90.6250 (85.3125) lr 3.9706e-05 eta 0:06:24 +epoch [184/200] batch [25/31] time 0.716 (0.749) data 0.000 (0.034) loss 0.3215 (0.6861) acc 87.5000 (85.7500) lr 3.9706e-05 eta 0:06:15 +epoch [184/200] batch [30/31] time 0.710 (0.747) data 0.000 (0.028) loss 0.5781 (0.6771) acc 90.6250 (85.7292) lr 3.9706e-05 eta 0:06:11 +epoch [185/200] batch [5/31] time 0.716 (0.892) data 0.000 (0.171) loss 0.5586 (0.5991) acc 90.6250 (86.8750) lr 3.5443e-05 eta 0:07:18 +epoch [185/200] batch [10/31] time 0.714 (0.802) data 0.000 (0.086) loss 0.3252 (0.5902) acc 93.7500 (86.5625) lr 3.5443e-05 eta 0:06:29 +epoch [185/200] batch [15/31] time 0.715 (0.772) data 0.000 (0.057) loss 0.7676 (0.6615) acc 78.1250 (85.6250) lr 3.5443e-05 eta 0:06:11 +epoch [185/200] batch [20/31] time 0.712 (0.759) data 0.000 (0.043) loss 0.6948 (0.7239) acc 84.3750 (84.5312) lr 3.5443e-05 eta 0:06:01 +epoch [185/200] batch [25/31] time 0.732 (0.750) data 0.000 (0.035) loss 0.8535 (0.7604) acc 78.1250 (83.7500) lr 3.5443e-05 eta 0:05:53 +epoch [185/200] batch [30/31] time 0.709 (0.743) data 0.000 (0.029) loss 0.2104 (0.7236) acc 93.7500 (83.9583) lr 3.5443e-05 eta 0:05:46 +epoch [186/200] batch [5/31] time 0.715 (0.902) data 0.001 (0.172) loss 0.4229 (0.8721) acc 90.6250 (81.8750) lr 3.1417e-05 eta 0:06:54 +epoch [186/200] batch [10/31] time 0.722 (0.809) data 0.000 (0.086) loss 0.6729 (0.7452) acc 84.3750 (85.0000) lr 3.1417e-05 eta 0:06:07 +epoch [186/200] batch [15/31] time 0.718 (0.777) data 0.001 (0.058) loss 1.0605 (0.7484) acc 81.2500 (85.2083) lr 3.1417e-05 eta 0:05:49 +epoch [186/200] batch [20/31] time 0.711 (0.761) data 0.000 (0.043) loss 0.6313 (0.7623) acc 84.3750 (84.3750) lr 3.1417e-05 eta 0:05:38 +epoch [186/200] batch [25/31] time 0.718 (0.751) data 0.000 (0.035) loss 0.7129 (0.7467) acc 84.3750 (84.2500) lr 3.1417e-05 eta 0:05:30 +epoch [186/200] batch [30/31] time 0.710 (0.744) data 0.000 (0.029) loss 0.7969 (0.7419) acc 78.1250 (83.8542) lr 3.1417e-05 eta 0:05:23 +epoch [187/200] batch [5/31] time 0.705 (0.890) data 0.000 (0.164) loss 0.8354 (0.7680) acc 87.5000 (86.8750) lr 2.7630e-05 eta 0:06:21 +epoch [187/200] batch [10/31] time 0.715 (0.800) data 0.000 (0.082) loss 0.5273 (0.6995) acc 90.6250 (87.1875) lr 2.7630e-05 eta 0:05:39 +epoch [187/200] batch [15/31] time 0.711 (0.772) data 0.000 (0.055) loss 0.8091 (0.7177) acc 75.0000 (86.0417) lr 2.7630e-05 eta 0:05:23 +epoch [187/200] batch [20/31] time 0.706 (0.757) data 0.000 (0.041) loss 0.4780 (0.6612) acc 90.6250 (86.5625) lr 2.7630e-05 eta 0:05:13 +epoch [187/200] batch [25/31] time 0.705 (0.747) data 0.000 (0.033) loss 0.7876 (0.6743) acc 81.2500 (86.5000) lr 2.7630e-05 eta 0:05:05 +epoch [187/200] batch [30/31] time 0.726 (0.741) data 0.000 (0.028) loss 0.6514 (0.6885) acc 84.3750 (86.1458) lr 2.7630e-05 eta 0:04:59 +epoch [188/200] batch [5/31] time 0.713 (0.894) data 0.000 (0.172) loss 0.3982 (0.6818) acc 93.7500 (84.3750) lr 2.4083e-05 eta 0:05:55 +epoch [188/200] batch [10/31] time 0.711 (0.803) data 0.000 (0.086) loss 0.7197 (0.7517) acc 84.3750 (82.5000) lr 2.4083e-05 eta 0:05:15 +epoch [188/200] batch [15/31] time 0.705 (0.783) data 0.000 (0.057) loss 0.7969 (0.7696) acc 81.2500 (82.5000) lr 2.4083e-05 eta 0:05:03 +epoch [188/200] batch [20/31] time 0.704 (0.764) data 0.000 (0.043) loss 0.5186 (0.8018) acc 93.7500 (81.7188) lr 2.4083e-05 eta 0:04:52 +epoch [188/200] batch [25/31] time 0.724 (0.753) data 0.000 (0.035) loss 1.2070 (0.7971) acc 71.8750 (82.1250) lr 2.4083e-05 eta 0:04:44 +epoch [188/200] batch [30/31] time 0.706 (0.746) data 0.000 (0.029) loss 0.9873 (0.8035) acc 78.1250 (81.6667) lr 2.4083e-05 eta 0:04:38 +epoch [189/200] batch [5/31] time 0.708 (0.897) data 0.000 (0.172) loss 0.2439 (0.6713) acc 93.7500 (84.3750) lr 2.0777e-05 eta 0:05:29 +epoch [189/200] batch [10/31] time 0.728 (0.811) data 0.000 (0.086) loss 0.3848 (0.7087) acc 93.7500 (84.3750) lr 2.0777e-05 eta 0:04:53 +epoch [189/200] batch [15/31] time 0.709 (0.778) data 0.000 (0.058) loss 0.4995 (0.6674) acc 87.5000 (85.6250) lr 2.0777e-05 eta 0:04:37 +epoch [189/200] batch [20/31] time 0.715 (0.762) data 0.000 (0.043) loss 0.5601 (0.6456) acc 81.2500 (85.4688) lr 2.0777e-05 eta 0:04:28 +epoch [189/200] batch [25/31] time 0.708 (0.752) data 0.000 (0.035) loss 0.3650 (0.6524) acc 96.8750 (85.8750) lr 2.0777e-05 eta 0:04:20 +epoch [189/200] batch [30/31] time 0.707 (0.745) data 0.000 (0.029) loss 0.5166 (0.6895) acc 87.5000 (84.8958) lr 2.0777e-05 eta 0:04:14 +epoch [190/200] batch [5/31] time 0.712 (0.884) data 0.000 (0.169) loss 0.4917 (0.5958) acc 90.6250 (88.1250) lr 1.7713e-05 eta 0:04:57 +epoch [190/200] batch [10/31] time 0.737 (0.801) data 0.000 (0.084) loss 0.5303 (0.6551) acc 90.6250 (86.2500) lr 1.7713e-05 eta 0:04:25 +epoch [190/200] batch [15/31] time 0.706 (0.770) data 0.000 (0.056) loss 1.0166 (0.7169) acc 78.1250 (85.0000) lr 1.7713e-05 eta 0:04:11 +epoch [190/200] batch [20/31] time 0.710 (0.756) data 0.000 (0.042) loss 0.6606 (0.6733) acc 90.6250 (86.2500) lr 1.7713e-05 eta 0:04:02 +epoch [190/200] batch [25/31] time 0.705 (0.747) data 0.000 (0.034) loss 0.8394 (0.6960) acc 81.2500 (85.2500) lr 1.7713e-05 eta 0:03:55 +epoch [190/200] batch [30/31] time 0.706 (0.741) data 0.000 (0.028) loss 1.1250 (0.6848) acc 75.0000 (85.5208) lr 1.7713e-05 eta 0:03:50 +epoch [191/200] batch [5/31] time 0.710 (0.909) data 0.000 (0.164) loss 0.5679 (0.6293) acc 78.1250 (85.6250) lr 1.4891e-05 eta 0:04:37 +epoch [191/200] batch [10/31] time 0.708 (0.811) data 0.000 (0.082) loss 0.6665 (0.6602) acc 84.3750 (85.6250) lr 1.4891e-05 eta 0:04:03 +epoch [191/200] batch [15/31] time 0.724 (0.780) data 0.000 (0.055) loss 0.4470 (0.6358) acc 87.5000 (86.0417) lr 1.4891e-05 eta 0:03:50 +epoch [191/200] batch [20/31] time 0.704 (0.763) data 0.000 (0.041) loss 0.7495 (0.6833) acc 87.5000 (84.8438) lr 1.4891e-05 eta 0:03:41 +epoch [191/200] batch [25/31] time 0.709 (0.753) data 0.000 (0.033) loss 0.7207 (0.7123) acc 84.3750 (85.1250) lr 1.4891e-05 eta 0:03:34 +epoch [191/200] batch [30/31] time 0.713 (0.746) data 0.000 (0.028) loss 0.8784 (0.7039) acc 87.5000 (85.0000) lr 1.4891e-05 eta 0:03:28 +epoch [192/200] batch [5/31] time 0.712 (0.887) data 0.000 (0.162) loss 0.5439 (0.7986) acc 84.3750 (81.2500) lr 1.2312e-05 eta 0:04:03 +epoch [192/200] batch [10/31] time 0.712 (0.800) data 0.000 (0.081) loss 0.4316 (0.7144) acc 87.5000 (82.8125) lr 1.2312e-05 eta 0:03:35 +epoch [192/200] batch [15/31] time 0.715 (0.772) data 0.000 (0.054) loss 1.3730 (0.7334) acc 71.8750 (82.7083) lr 1.2312e-05 eta 0:03:23 +epoch [192/200] batch [20/31] time 0.712 (0.757) data 0.000 (0.041) loss 0.6909 (0.7417) acc 81.2500 (82.9688) lr 1.2312e-05 eta 0:03:15 +epoch [192/200] batch [25/31] time 0.723 (0.754) data 0.000 (0.033) loss 0.9912 (0.7260) acc 84.3750 (83.6250) lr 1.2312e-05 eta 0:03:11 +epoch [192/200] batch [30/31] time 0.706 (0.747) data 0.000 (0.027) loss 0.7197 (0.7483) acc 90.6250 (83.1250) lr 1.2312e-05 eta 0:03:06 +epoch [193/200] batch [5/31] time 0.725 (0.884) data 0.000 (0.159) loss 0.8877 (0.5646) acc 78.1250 (86.8750) lr 9.9763e-06 eta 0:03:34 +epoch [193/200] batch [10/31] time 0.710 (0.800) data 0.000 (0.080) loss 0.8521 (0.6180) acc 81.2500 (85.3125) lr 9.9763e-06 eta 0:03:10 +epoch [193/200] batch [15/31] time 0.730 (0.771) data 0.000 (0.053) loss 0.9541 (0.6455) acc 84.3750 (85.4167) lr 9.9763e-06 eta 0:02:59 +epoch [193/200] batch [20/31] time 0.712 (0.757) data 0.000 (0.040) loss 0.6719 (0.7032) acc 90.6250 (84.2188) lr 9.9763e-06 eta 0:02:52 +epoch [193/200] batch [25/31] time 0.709 (0.747) data 0.000 (0.032) loss 0.8574 (0.7170) acc 81.2500 (83.8750) lr 9.9763e-06 eta 0:02:46 +epoch [193/200] batch [30/31] time 0.708 (0.741) data 0.000 (0.027) loss 1.1191 (0.7226) acc 81.2500 (83.8542) lr 9.9763e-06 eta 0:02:41 +epoch [194/200] batch [5/31] time 0.728 (0.890) data 0.001 (0.161) loss 0.7114 (0.8904) acc 84.3750 (81.8750) lr 7.8853e-06 eta 0:03:08 +epoch [194/200] batch [10/31] time 0.715 (0.801) data 0.000 (0.081) loss 0.3550 (0.7301) acc 90.6250 (83.1250) lr 7.8853e-06 eta 0:02:45 +epoch [194/200] batch [15/31] time 0.709 (0.771) data 0.000 (0.054) loss 0.3108 (0.6939) acc 93.7500 (83.5417) lr 7.8853e-06 eta 0:02:35 +epoch [194/200] batch [20/31] time 0.708 (0.763) data 0.000 (0.041) loss 0.6396 (0.6879) acc 84.3750 (83.5938) lr 7.8853e-06 eta 0:02:30 +epoch [194/200] batch [25/31] time 0.701 (0.753) data 0.000 (0.033) loss 0.8364 (0.6926) acc 81.2500 (83.5000) lr 7.8853e-06 eta 0:02:24 +epoch [194/200] batch [30/31] time 0.706 (0.745) data 0.000 (0.027) loss 0.6538 (0.7017) acc 87.5000 (83.3333) lr 7.8853e-06 eta 0:02:19 +epoch [195/200] batch [5/31] time 0.707 (0.899) data 0.000 (0.178) loss 0.3264 (0.6654) acc 96.8750 (82.5000) lr 6.0390e-06 eta 0:02:42 +epoch [195/200] batch [10/31] time 0.712 (0.806) data 0.000 (0.089) loss 0.6553 (0.6235) acc 90.6250 (85.6250) lr 6.0390e-06 eta 0:02:21 +epoch [195/200] batch [15/31] time 0.713 (0.778) data 0.001 (0.059) loss 0.4456 (0.6377) acc 84.3750 (85.6250) lr 6.0390e-06 eta 0:02:13 +epoch [195/200] batch [20/31] time 0.711 (0.761) data 0.000 (0.045) loss 1.2979 (0.6597) acc 87.5000 (85.9375) lr 6.0390e-06 eta 0:02:06 +epoch [195/200] batch [25/31] time 0.713 (0.752) data 0.000 (0.036) loss 1.0176 (0.6794) acc 78.1250 (85.5000) lr 6.0390e-06 eta 0:02:01 +epoch [195/200] batch [30/31] time 0.725 (0.746) data 0.000 (0.030) loss 1.0342 (0.7042) acc 71.8750 (84.4792) lr 6.0390e-06 eta 0:01:56 +epoch [196/200] batch [5/31] time 0.715 (0.886) data 0.000 (0.167) loss 0.6230 (0.7889) acc 81.2500 (82.5000) lr 4.4380e-06 eta 0:02:12 +epoch [196/200] batch [10/31] time 0.711 (0.798) data 0.000 (0.084) loss 0.6069 (0.7774) acc 81.2500 (82.5000) lr 4.4380e-06 eta 0:01:55 +epoch [196/200] batch [15/31] time 0.709 (0.769) data 0.000 (0.056) loss 1.0127 (0.7665) acc 87.5000 (83.7500) lr 4.4380e-06 eta 0:01:47 +epoch [196/200] batch [20/31] time 0.702 (0.754) data 0.000 (0.042) loss 0.5303 (0.7511) acc 84.3750 (82.8125) lr 4.4380e-06 eta 0:01:41 +epoch [196/200] batch [25/31] time 0.711 (0.745) data 0.000 (0.034) loss 0.5986 (0.7430) acc 78.1250 (83.0000) lr 4.4380e-06 eta 0:01:36 +epoch [196/200] batch [30/31] time 0.705 (0.738) data 0.000 (0.028) loss 0.6211 (0.7341) acc 81.2500 (83.2292) lr 4.4380e-06 eta 0:01:32 +epoch [197/200] batch [5/31] time 0.708 (0.893) data 0.000 (0.172) loss 0.5566 (0.6500) acc 84.3750 (87.5000) lr 3.0827e-06 eta 0:01:46 +epoch [197/200] batch [10/31] time 0.711 (0.804) data 0.000 (0.086) loss 1.0439 (0.6165) acc 78.1250 (87.5000) lr 3.0827e-06 eta 0:01:31 +epoch [197/200] batch [15/31] time 0.705 (0.783) data 0.000 (0.057) loss 0.3193 (0.6039) acc 93.7500 (87.5000) lr 3.0827e-06 eta 0:01:25 +epoch [197/200] batch [20/31] time 0.714 (0.765) data 0.000 (0.043) loss 0.4773 (0.6542) acc 84.3750 (86.0938) lr 3.0827e-06 eta 0:01:19 +epoch [197/200] batch [25/31] time 0.722 (0.753) data 0.000 (0.035) loss 0.6729 (0.6991) acc 87.5000 (85.7500) lr 3.0827e-06 eta 0:01:14 +epoch [197/200] batch [30/31] time 0.708 (0.746) data 0.000 (0.029) loss 0.5718 (0.6882) acc 87.5000 (85.5208) lr 3.0827e-06 eta 0:01:10 +epoch [198/200] batch [5/31] time 0.715 (0.889) data 0.000 (0.166) loss 1.0898 (0.6305) acc 84.3750 (86.8750) lr 1.9733e-06 eta 0:01:18 +epoch [198/200] batch [10/31] time 0.715 (0.800) data 0.000 (0.083) loss 0.6802 (0.6902) acc 84.3750 (85.0000) lr 1.9733e-06 eta 0:01:06 +epoch [198/200] batch [15/31] time 0.712 (0.771) data 0.000 (0.056) loss 0.7754 (0.7208) acc 87.5000 (84.7917) lr 1.9733e-06 eta 0:01:00 +epoch [198/200] batch [20/31] time 0.715 (0.756) data 0.000 (0.042) loss 0.9370 (0.7311) acc 84.3750 (85.6250) lr 1.9733e-06 eta 0:00:55 +epoch [198/200] batch [25/31] time 0.722 (0.748) data 0.000 (0.033) loss 0.6304 (0.7143) acc 87.5000 (85.2500) lr 1.9733e-06 eta 0:00:50 +epoch [198/200] batch [30/31] time 0.715 (0.742) data 0.000 (0.028) loss 0.8320 (0.6965) acc 90.6250 (85.4167) lr 1.9733e-06 eta 0:00:46 +epoch [199/200] batch [5/31] time 0.724 (0.894) data 0.000 (0.175) loss 0.5249 (0.6249) acc 90.6250 (86.2500) lr 1.1101e-06 eta 0:00:50 +epoch [199/200] batch [10/31] time 0.707 (0.801) data 0.000 (0.088) loss 0.5200 (0.6340) acc 78.1250 (85.0000) lr 1.1101e-06 eta 0:00:41 +epoch [199/200] batch [15/31] time 0.721 (0.772) data 0.000 (0.059) loss 1.0742 (0.7109) acc 78.1250 (83.5417) lr 1.1101e-06 eta 0:00:36 +epoch [199/200] batch [20/31] time 0.708 (0.757) data 0.000 (0.044) loss 0.7495 (0.6770) acc 78.1250 (83.7500) lr 1.1101e-06 eta 0:00:31 +epoch [199/200] batch [25/31] time 0.713 (0.747) data 0.000 (0.035) loss 0.6880 (0.7011) acc 90.6250 (83.6250) lr 1.1101e-06 eta 0:00:27 +epoch [199/200] batch [30/31] time 0.710 (0.742) data 0.000 (0.029) loss 0.5269 (0.6959) acc 81.2500 (84.2708) lr 1.1101e-06 eta 0:00:23 +epoch [200/200] batch [5/31] time 0.712 (0.896) data 0.000 (0.174) loss 0.5693 (0.7124) acc 78.1250 (83.7500) lr 4.9344e-07 eta 0:00:23 +epoch [200/200] batch [10/31] time 0.717 (0.803) data 0.000 (0.087) loss 1.0322 (0.7587) acc 71.8750 (83.7500) lr 4.9344e-07 eta 0:00:16 +epoch [200/200] batch [15/31] time 0.712 (0.772) data 0.000 (0.058) loss 0.4985 (0.7351) acc 87.5000 (83.5417) lr 4.9344e-07 eta 0:00:12 +epoch [200/200] batch [20/31] time 0.707 (0.756) data 0.000 (0.044) loss 0.4678 (0.7178) acc 87.5000 (83.7500) lr 4.9344e-07 eta 0:00:08 +epoch [200/200] batch [25/31] time 0.717 (0.747) data 0.000 (0.035) loss 0.7026 (0.7015) acc 81.2500 (83.8750) lr 4.9344e-07 eta 0:00:04 +epoch [200/200] batch [30/31] time 0.704 (0.741) data 0.000 (0.029) loss 0.7373 (0.6828) acc 81.2500 (84.1667) lr 4.9344e-07 eta 0:00:00 +Checkpoint saved to output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-200 +Finish training +Deploy the last-epoch model +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 26,715 +* accuracy: 53.4% +* error: 46.6% +* macro_f1: 51.9% +Elapsed: 1:20:05 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint new file mode 100644 index 00000000..b2929f7c --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint @@ -0,0 +1 @@ +model.pth.tar-200 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-200 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-200 new file mode 100644 index 00000000..fe2bf7b5 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-200 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1698072893.ckb-gpu-lambda.1310895.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1698072893.ckb-gpu-lambda.1310895.0 new file mode 100644 index 00000000..b400e30a Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1698072893.ckb-gpu-lambda.1310895.0 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed2/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed2/log.txt new file mode 100644 index 00000000..3e5e85c3 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed2/log.txt @@ -0,0 +1,1539 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_b32.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '1'] +output_dir: output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed2 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 2 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 1 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/32 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 200 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 8.4.0-3ubuntu2) 8.4.0 +Clang version: 10.0.0-4ubuntu1 +CMake version: version 3.23.2 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-113-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: Tesla V100-SXM2-32GB +GPU 1: Tesla V100-SXM2-32GB +GPU 2: Tesla V100-SXM2-32GB +GPU 3: Tesla V100-SXM2-32GB + +Nvidia driver version: 510.73.05 +cuDNN version: Probably one of the following: +/usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 +/usr/lib/x86_64-linux-gnu/libcudnn.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.4.1 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 46 bits physical, 48 bits virtual +CPU(s): 64 +On-line CPU(s) list: 0-63 +Thread(s) per core: 2 +Core(s) per socket: 16 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +CPU family: 6 +Model: 85 +Model name: Intel(R) Xeon(R) Gold 6242 CPU @ 2.80GHz +Stepping: 7 +CPU MHz: 1200.083 +CPU max MHz: 3900.0000 +CPU min MHz: 1200.0000 +BogoMIPS: 5600.00 +Virtualization: VT-x +L1d cache: 1 MiB +L1i cache: 1 MiB +L2 cache: 32 MiB +L3 cache: 44 MiB +NUMA node0 CPU(s): 0-15,32-47 +NUMA node1 CPU(s): 16-31,48-63 +Vulnerability Itlb multihit: KVM: Mitigation: Split huge pages +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Mitigation; TSX disabled +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cdp_l3 invpcid_single intel_ppin ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm mpx rdt_a avx512f avx512dq rdseed adx smap clflushopt clwb intel_pt avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts pku ospke avx512_vnni md_clear flush_l1d arch_capabilities + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Loading preprocessed few-shot data from /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_1-seed_2.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 1,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-B/32) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed2/tensorboard) +epoch [1/200] batch [5/31] time 0.710 (1.596) data 0.000 (0.178) loss 3.5605 (3.5969) acc 25.0000 (32.5000) lr 1.0000e-05 eta 2:44:44 +epoch [1/200] batch [10/31] time 0.716 (1.153) data 0.000 (0.089) loss 3.8535 (3.4861) acc 18.7500 (34.0625) lr 1.0000e-05 eta 1:58:54 +epoch [1/200] batch [15/31] time 0.707 (1.003) data 0.000 (0.060) loss 3.4395 (3.3546) acc 43.7500 (35.6250) lr 1.0000e-05 eta 1:43:26 +epoch [1/200] batch [20/31] time 0.707 (0.929) data 0.000 (0.045) loss 3.0215 (3.2204) acc 34.3750 (37.3438) lr 1.0000e-05 eta 1:35:43 +epoch [1/200] batch [25/31] time 0.706 (0.885) data 0.000 (0.036) loss 2.7930 (3.1371) acc 40.6250 (37.3750) lr 1.0000e-05 eta 1:31:04 +epoch [1/200] batch [30/31] time 0.705 (0.855) data 0.000 (0.030) loss 2.6113 (3.0720) acc 43.7500 (37.7083) lr 1.0000e-05 eta 1:27:53 +epoch [2/200] batch [5/31] time 0.721 (0.892) data 0.000 (0.168) loss 2.0879 (2.5738) acc 56.2500 (46.8750) lr 2.0000e-03 eta 1:31:35 +epoch [2/200] batch [10/31] time 0.705 (0.803) data 0.000 (0.084) loss 1.0957 (2.2741) acc 59.3750 (50.0000) lr 2.0000e-03 eta 1:22:24 +epoch [2/200] batch [15/31] time 0.710 (0.773) data 0.000 (0.056) loss 2.2617 (2.2068) acc 46.8750 (51.8750) lr 2.0000e-03 eta 1:19:15 +epoch [2/200] batch [20/31] time 0.707 (0.756) data 0.000 (0.042) loss 1.9941 (2.1865) acc 56.2500 (52.1875) lr 2.0000e-03 eta 1:17:31 +epoch [2/200] batch [25/31] time 0.708 (0.747) data 0.000 (0.034) loss 2.1230 (2.1198) acc 46.8750 (52.6250) lr 2.0000e-03 eta 1:16:27 +epoch [2/200] batch [30/31] time 0.701 (0.740) data 0.000 (0.028) loss 1.9346 (2.0827) acc 53.1250 (52.7083) lr 2.0000e-03 eta 1:15:44 +epoch [3/200] batch [5/31] time 0.706 (0.907) data 0.000 (0.187) loss 2.2754 (2.2189) acc 46.8750 (50.6250) lr 1.9999e-03 eta 1:32:41 +epoch [3/200] batch [10/31] time 0.721 (0.811) data 0.000 (0.094) loss 1.9531 (2.2127) acc 59.3750 (52.1875) lr 1.9999e-03 eta 1:22:47 +epoch [3/200] batch [15/31] time 0.708 (0.778) data 0.000 (0.063) loss 1.6689 (2.1538) acc 59.3750 (53.9583) lr 1.9999e-03 eta 1:19:26 +epoch [3/200] batch [20/31] time 0.718 (0.762) data 0.000 (0.047) loss 1.6582 (2.0384) acc 65.6250 (56.2500) lr 1.9999e-03 eta 1:17:41 +epoch [3/200] batch [25/31] time 0.707 (0.752) data 0.000 (0.038) loss 1.4814 (1.9633) acc 68.7500 (57.0000) lr 1.9999e-03 eta 1:16:34 +epoch [3/200] batch [30/31] time 0.703 (0.744) data 0.000 (0.031) loss 1.9043 (1.9651) acc 46.8750 (56.2500) lr 1.9999e-03 eta 1:15:46 +epoch [4/200] batch [5/31] time 0.704 (0.889) data 0.000 (0.170) loss 1.8135 (1.7633) acc 53.1250 (55.0000) lr 1.9995e-03 eta 1:30:21 +epoch [4/200] batch [10/31] time 0.707 (0.801) data 0.000 (0.085) loss 1.9248 (1.8852) acc 68.7500 (55.3125) lr 1.9995e-03 eta 1:21:22 +epoch [4/200] batch [15/31] time 0.705 (0.769) data 0.000 (0.057) loss 2.2285 (1.8395) acc 53.1250 (56.6667) lr 1.9995e-03 eta 1:18:06 +epoch [4/200] batch [20/31] time 0.700 (0.753) data 0.000 (0.043) loss 1.3516 (1.8691) acc 68.7500 (56.2500) lr 1.9995e-03 eta 1:16:26 +epoch [4/200] batch [25/31] time 0.709 (0.744) data 0.000 (0.034) loss 1.8662 (1.8464) acc 53.1250 (56.8750) lr 1.9995e-03 eta 1:15:25 +epoch [4/200] batch [30/31] time 0.705 (0.739) data 0.000 (0.029) loss 1.8809 (1.8522) acc 62.5000 (55.9375) lr 1.9995e-03 eta 1:14:49 +epoch [5/200] batch [5/31] time 0.708 (0.886) data 0.000 (0.164) loss 1.9033 (1.9619) acc 59.3750 (56.8750) lr 1.9989e-03 eta 1:29:38 +epoch [5/200] batch [10/31] time 0.724 (0.799) data 0.000 (0.082) loss 2.3125 (1.8031) acc 46.8750 (59.6875) lr 1.9989e-03 eta 1:20:48 +epoch [5/200] batch [15/31] time 0.710 (0.770) data 0.000 (0.055) loss 1.7676 (1.8131) acc 56.2500 (58.5417) lr 1.9989e-03 eta 1:17:48 +epoch [5/200] batch [20/31] time 0.716 (0.755) data 0.000 (0.041) loss 1.7295 (1.7872) acc 62.5000 (59.0625) lr 1.9989e-03 eta 1:16:15 +epoch [5/200] batch [25/31] time 0.717 (0.747) data 0.000 (0.033) loss 1.5283 (1.7675) acc 59.3750 (57.6250) lr 1.9989e-03 eta 1:15:19 +epoch [5/200] batch [30/31] time 0.711 (0.742) data 0.000 (0.028) loss 1.9746 (1.8165) acc 53.1250 (56.8750) lr 1.9989e-03 eta 1:14:46 +epoch [6/200] batch [5/31] time 0.711 (0.901) data 0.000 (0.183) loss 2.3320 (1.8479) acc 50.0000 (55.6250) lr 1.9980e-03 eta 1:30:44 +epoch [6/200] batch [10/31] time 0.711 (0.807) data 0.000 (0.092) loss 2.0547 (1.6919) acc 50.0000 (57.8125) lr 1.9980e-03 eta 1:21:12 +epoch [6/200] batch [15/31] time 0.724 (0.777) data 0.000 (0.061) loss 1.4570 (1.6798) acc 65.6250 (58.3333) lr 1.9980e-03 eta 1:18:03 +epoch [6/200] batch [20/31] time 0.717 (0.761) data 0.000 (0.046) loss 1.9863 (1.6690) acc 56.2500 (58.2812) lr 1.9980e-03 eta 1:16:24 +epoch [6/200] batch [25/31] time 0.716 (0.751) data 0.000 (0.037) loss 2.4844 (1.7117) acc 50.0000 (58.8750) lr 1.9980e-03 eta 1:15:23 +epoch [6/200] batch [30/31] time 0.710 (0.745) data 0.000 (0.031) loss 1.7656 (1.7181) acc 50.0000 (58.0208) lr 1.9980e-03 eta 1:14:38 +epoch [7/200] batch [5/31] time 0.712 (0.906) data 0.000 (0.179) loss 1.3223 (1.4590) acc 68.7500 (65.0000) lr 1.9969e-03 eta 1:30:45 +epoch [7/200] batch [10/31] time 0.726 (0.810) data 0.000 (0.090) loss 1.0371 (1.5126) acc 75.0000 (61.5625) lr 1.9969e-03 eta 1:21:01 +epoch [7/200] batch [15/31] time 0.710 (0.779) data 0.000 (0.060) loss 1.6631 (1.5686) acc 59.3750 (62.0833) lr 1.9969e-03 eta 1:17:56 +epoch [7/200] batch [20/31] time 0.715 (0.762) data 0.000 (0.045) loss 1.7842 (1.6277) acc 50.0000 (59.6875) lr 1.9969e-03 eta 1:16:09 +epoch [7/200] batch [25/31] time 0.722 (0.752) data 0.000 (0.036) loss 3.2754 (1.7041) acc 53.1250 (59.8750) lr 1.9969e-03 eta 1:15:05 +epoch [7/200] batch [30/31] time 0.712 (0.745) data 0.000 (0.030) loss 2.0488 (1.7237) acc 56.2500 (59.8958) lr 1.9969e-03 eta 1:14:18 +epoch [8/200] batch [5/31] time 0.710 (0.921) data 0.000 (0.197) loss 1.7734 (1.6432) acc 62.5000 (61.8750) lr 1.9956e-03 eta 1:31:48 +epoch [8/200] batch [10/31] time 0.719 (0.820) data 0.000 (0.099) loss 1.6279 (1.7293) acc 65.6250 (57.8125) lr 1.9956e-03 eta 1:21:40 +epoch [8/200] batch [15/31] time 0.711 (0.793) data 0.000 (0.066) loss 1.3457 (1.7110) acc 68.7500 (59.5833) lr 1.9956e-03 eta 1:18:50 +epoch [8/200] batch [20/31] time 0.719 (0.772) data 0.000 (0.049) loss 1.1836 (1.6800) acc 75.0000 (59.8438) lr 1.9956e-03 eta 1:16:43 +epoch [8/200] batch [25/31] time 0.712 (0.760) data 0.000 (0.040) loss 1.3223 (1.6485) acc 56.2500 (59.7500) lr 1.9956e-03 eta 1:15:26 +epoch [8/200] batch [30/31] time 0.706 (0.751) data 0.000 (0.033) loss 1.5898 (1.6585) acc 68.7500 (59.5833) lr 1.9956e-03 eta 1:14:31 +epoch [9/200] batch [5/31] time 0.714 (0.898) data 0.000 (0.175) loss 1.8975 (2.0045) acc 50.0000 (53.1250) lr 1.9940e-03 eta 1:29:00 +epoch [9/200] batch [10/31] time 0.710 (0.807) data 0.000 (0.088) loss 1.8945 (1.7647) acc 50.0000 (56.8750) lr 1.9940e-03 eta 1:19:53 +epoch [9/200] batch [15/31] time 0.716 (0.775) data 0.000 (0.059) loss 1.2324 (1.7289) acc 65.6250 (58.3333) lr 1.9940e-03 eta 1:16:42 +epoch [9/200] batch [20/31] time 0.706 (0.759) data 0.000 (0.044) loss 1.9004 (1.7571) acc 56.2500 (57.5000) lr 1.9940e-03 eta 1:15:03 +epoch [9/200] batch [25/31] time 0.714 (0.749) data 0.000 (0.035) loss 1.0039 (1.7193) acc 65.6250 (58.0000) lr 1.9940e-03 eta 1:14:00 +epoch [9/200] batch [30/31] time 0.719 (0.743) data 0.000 (0.029) loss 1.0703 (1.6933) acc 78.1250 (59.1667) lr 1.9940e-03 eta 1:13:21 +epoch [10/200] batch [5/31] time 0.710 (0.911) data 0.000 (0.162) loss 0.9082 (1.4324) acc 78.1250 (61.8750) lr 1.9921e-03 eta 1:29:47 +epoch [10/200] batch [10/31] time 0.734 (0.813) data 0.000 (0.081) loss 1.6406 (1.6057) acc 59.3750 (58.1250) lr 1.9921e-03 eta 1:20:03 +epoch [10/200] batch [15/31] time 0.710 (0.778) data 0.000 (0.054) loss 2.0156 (1.6736) acc 56.2500 (56.8750) lr 1.9921e-03 eta 1:16:36 +epoch [10/200] batch [20/31] time 0.714 (0.762) data 0.000 (0.041) loss 1.3242 (1.6848) acc 65.6250 (58.2812) lr 1.9921e-03 eta 1:14:53 +epoch [10/200] batch [25/31] time 0.714 (0.751) data 0.000 (0.033) loss 2.5039 (1.7126) acc 53.1250 (58.3750) lr 1.9921e-03 eta 1:13:48 +epoch [10/200] batch [30/31] time 0.709 (0.744) data 0.000 (0.027) loss 1.7520 (1.7195) acc 62.5000 (58.8542) lr 1.9921e-03 eta 1:13:03 +epoch [11/200] batch [5/31] time 0.710 (0.901) data 0.000 (0.179) loss 1.7119 (1.4965) acc 65.6250 (63.7500) lr 1.9900e-03 eta 1:28:24 +epoch [11/200] batch [10/31] time 0.714 (0.807) data 0.000 (0.090) loss 2.3203 (1.6101) acc 59.3750 (64.6875) lr 1.9900e-03 eta 1:19:02 +epoch [11/200] batch [15/31] time 0.710 (0.775) data 0.000 (0.060) loss 1.4805 (1.6162) acc 65.6250 (63.5417) lr 1.9900e-03 eta 1:15:53 +epoch [11/200] batch [20/31] time 0.726 (0.760) data 0.000 (0.045) loss 1.6592 (1.6488) acc 71.8750 (62.8125) lr 1.9900e-03 eta 1:14:18 +epoch [11/200] batch [25/31] time 0.717 (0.750) data 0.000 (0.036) loss 1.6016 (1.6448) acc 68.7500 (61.5000) lr 1.9900e-03 eta 1:13:17 +epoch [11/200] batch [30/31] time 0.715 (0.743) data 0.000 (0.030) loss 1.4756 (1.6655) acc 62.5000 (61.6667) lr 1.9900e-03 eta 1:12:36 +epoch [12/200] batch [5/31] time 0.716 (0.896) data 0.000 (0.169) loss 1.2451 (1.3867) acc 65.6250 (66.2500) lr 1.9877e-03 eta 1:27:23 +epoch [12/200] batch [10/31] time 0.706 (0.818) data 0.000 (0.085) loss 1.3574 (1.5429) acc 68.7500 (63.1250) lr 1.9877e-03 eta 1:19:46 +epoch [12/200] batch [15/31] time 0.708 (0.784) data 0.000 (0.057) loss 2.2227 (1.5992) acc 56.2500 (62.2917) lr 1.9877e-03 eta 1:16:23 +epoch [12/200] batch [20/31] time 0.705 (0.766) data 0.000 (0.043) loss 1.6787 (1.5937) acc 65.6250 (62.9688) lr 1.9877e-03 eta 1:14:34 +epoch [12/200] batch [25/31] time 0.704 (0.756) data 0.000 (0.034) loss 2.2539 (1.5611) acc 53.1250 (64.0000) lr 1.9877e-03 eta 1:13:28 +epoch [12/200] batch [30/31] time 0.705 (0.748) data 0.000 (0.028) loss 2.2129 (1.5934) acc 46.8750 (62.9167) lr 1.9877e-03 eta 1:12:38 +epoch [13/200] batch [5/31] time 0.705 (0.886) data 0.000 (0.166) loss 1.2119 (1.4874) acc 75.0000 (65.6250) lr 1.9851e-03 eta 1:25:56 +epoch [13/200] batch [10/31] time 0.710 (0.797) data 0.000 (0.083) loss 1.9912 (1.5226) acc 53.1250 (64.6875) lr 1.9851e-03 eta 1:17:19 +epoch [13/200] batch [15/31] time 0.707 (0.767) data 0.000 (0.056) loss 1.3291 (1.5239) acc 50.0000 (63.9583) lr 1.9851e-03 eta 1:14:19 +epoch [13/200] batch [20/31] time 0.718 (0.753) data 0.000 (0.042) loss 1.3105 (1.5052) acc 75.0000 (64.5312) lr 1.9851e-03 eta 1:12:55 +epoch [13/200] batch [25/31] time 0.709 (0.744) data 0.000 (0.033) loss 2.4004 (1.6205) acc 59.3750 (62.2500) lr 1.9851e-03 eta 1:11:57 +epoch [13/200] batch [30/31] time 0.719 (0.738) data 0.000 (0.028) loss 1.7314 (1.6473) acc 62.5000 (61.5625) lr 1.9851e-03 eta 1:11:18 +epoch [14/200] batch [5/31] time 0.710 (0.911) data 0.000 (0.183) loss 1.0049 (1.3799) acc 68.7500 (66.2500) lr 1.9823e-03 eta 1:27:54 +epoch [14/200] batch [10/31] time 0.717 (0.813) data 0.000 (0.091) loss 1.6992 (1.5213) acc 59.3750 (62.1875) lr 1.9823e-03 eta 1:18:23 +epoch [14/200] batch [15/31] time 0.723 (0.779) data 0.000 (0.061) loss 1.2461 (1.5607) acc 71.8750 (62.5000) lr 1.9823e-03 eta 1:15:06 +epoch [14/200] batch [20/31] time 0.708 (0.761) data 0.000 (0.046) loss 1.2842 (1.5458) acc 56.2500 (61.4062) lr 1.9823e-03 eta 1:13:16 +epoch [14/200] batch [25/31] time 0.706 (0.751) data 0.000 (0.037) loss 1.8486 (1.6007) acc 65.6250 (60.7500) lr 1.9823e-03 eta 1:12:13 +epoch [14/200] batch [30/31] time 0.707 (0.744) data 0.000 (0.031) loss 1.4971 (1.5874) acc 59.3750 (61.2500) lr 1.9823e-03 eta 1:11:28 +epoch [15/200] batch [5/31] time 0.708 (0.901) data 0.000 (0.174) loss 2.0059 (1.7086) acc 53.1250 (57.5000) lr 1.9792e-03 eta 1:26:32 +epoch [15/200] batch [10/31] time 0.710 (0.807) data 0.000 (0.087) loss 1.2607 (1.5692) acc 71.8750 (60.9375) lr 1.9792e-03 eta 1:17:26 +epoch [15/200] batch [15/31] time 0.706 (0.776) data 0.000 (0.058) loss 1.7539 (1.5779) acc 50.0000 (61.0417) lr 1.9792e-03 eta 1:14:20 +epoch [15/200] batch [20/31] time 0.705 (0.759) data 0.000 (0.044) loss 1.2344 (1.4814) acc 59.3750 (62.5000) lr 1.9792e-03 eta 1:12:43 +epoch [15/200] batch [25/31] time 0.719 (0.750) data 0.000 (0.035) loss 1.2197 (1.4660) acc 75.0000 (63.3750) lr 1.9792e-03 eta 1:11:45 +epoch [15/200] batch [30/31] time 0.707 (0.743) data 0.000 (0.029) loss 1.6533 (1.5096) acc 65.6250 (63.3333) lr 1.9792e-03 eta 1:11:02 +epoch [16/200] batch [5/31] time 0.704 (0.894) data 0.000 (0.169) loss 2.0254 (1.6373) acc 50.0000 (56.2500) lr 1.9759e-03 eta 1:25:24 +epoch [16/200] batch [10/31] time 0.707 (0.803) data 0.000 (0.085) loss 1.4082 (1.4854) acc 65.6250 (61.8750) lr 1.9759e-03 eta 1:16:37 +epoch [16/200] batch [15/31] time 0.708 (0.772) data 0.000 (0.056) loss 1.4795 (1.5250) acc 68.7500 (62.2917) lr 1.9759e-03 eta 1:13:36 +epoch [16/200] batch [20/31] time 0.706 (0.756) data 0.000 (0.042) loss 1.1113 (1.5537) acc 75.0000 (62.6562) lr 1.9759e-03 eta 1:12:00 +epoch [16/200] batch [25/31] time 0.707 (0.747) data 0.000 (0.034) loss 1.3115 (1.5492) acc 71.8750 (63.7500) lr 1.9759e-03 eta 1:11:05 +epoch [16/200] batch [30/31] time 0.706 (0.741) data 0.000 (0.028) loss 1.6768 (1.6050) acc 62.5000 (63.1250) lr 1.9759e-03 eta 1:10:25 +epoch [17/200] batch [5/31] time 0.707 (0.888) data 0.000 (0.166) loss 1.9883 (1.3701) acc 62.5000 (70.0000) lr 1.9724e-03 eta 1:24:18 +epoch [17/200] batch [10/31] time 0.714 (0.800) data 0.000 (0.083) loss 2.2266 (1.5816) acc 40.6250 (62.5000) lr 1.9724e-03 eta 1:15:57 +epoch [17/200] batch [15/31] time 0.715 (0.771) data 0.000 (0.056) loss 1.4238 (1.5874) acc 71.8750 (63.1250) lr 1.9724e-03 eta 1:13:06 +epoch [17/200] batch [20/31] time 0.704 (0.756) data 0.000 (0.042) loss 1.4277 (1.6436) acc 53.1250 (62.0312) lr 1.9724e-03 eta 1:11:35 +epoch [17/200] batch [25/31] time 0.711 (0.746) data 0.000 (0.033) loss 1.2168 (1.5995) acc 68.7500 (62.6250) lr 1.9724e-03 eta 1:10:39 +epoch [17/200] batch [30/31] time 0.705 (0.740) data 0.000 (0.028) loss 1.7480 (1.5471) acc 62.5000 (62.9167) lr 1.9724e-03 eta 1:09:59 +epoch [18/200] batch [5/31] time 0.722 (0.887) data 0.000 (0.163) loss 0.9150 (1.2980) acc 75.0000 (65.6250) lr 1.9686e-03 eta 1:23:48 +epoch [18/200] batch [10/31] time 0.706 (0.812) data 0.000 (0.082) loss 1.3486 (1.4133) acc 65.6250 (63.1250) lr 1.9686e-03 eta 1:16:39 +epoch [18/200] batch [15/31] time 0.711 (0.779) data 0.000 (0.054) loss 1.4473 (1.4678) acc 65.6250 (63.9583) lr 1.9686e-03 eta 1:13:24 +epoch [18/200] batch [20/31] time 0.706 (0.761) data 0.000 (0.041) loss 2.5938 (1.5574) acc 46.8750 (62.9688) lr 1.9686e-03 eta 1:11:40 +epoch [18/200] batch [25/31] time 0.710 (0.750) data 0.000 (0.033) loss 0.9106 (1.4862) acc 68.7500 (64.3750) lr 1.9686e-03 eta 1:10:36 +epoch [18/200] batch [30/31] time 0.718 (0.743) data 0.000 (0.027) loss 1.1240 (1.5014) acc 65.6250 (63.7500) lr 1.9686e-03 eta 1:09:55 +epoch [19/200] batch [5/31] time 0.713 (0.900) data 0.000 (0.176) loss 1.3574 (1.2799) acc 62.5000 (68.1250) lr 1.9646e-03 eta 1:24:35 +epoch [19/200] batch [10/31] time 0.710 (0.805) data 0.000 (0.088) loss 0.9297 (1.2675) acc 81.2500 (69.3750) lr 1.9646e-03 eta 1:15:35 +epoch [19/200] batch [15/31] time 0.715 (0.775) data 0.000 (0.059) loss 1.3994 (1.3542) acc 68.7500 (69.1667) lr 1.9646e-03 eta 1:12:40 +epoch [19/200] batch [20/31] time 0.724 (0.759) data 0.000 (0.044) loss 1.4502 (1.4104) acc 65.6250 (66.7188) lr 1.9646e-03 eta 1:11:04 +epoch [19/200] batch [25/31] time 0.713 (0.749) data 0.000 (0.035) loss 1.3457 (1.4309) acc 65.6250 (66.2500) lr 1.9646e-03 eta 1:10:09 +epoch [19/200] batch [30/31] time 0.711 (0.747) data 0.000 (0.030) loss 1.2256 (1.4542) acc 56.2500 (65.5208) lr 1.9646e-03 eta 1:09:51 +epoch [20/200] batch [5/31] time 0.712 (0.898) data 0.000 (0.171) loss 1.5264 (1.3443) acc 59.3750 (66.8750) lr 1.9603e-03 eta 1:23:56 +epoch [20/200] batch [10/31] time 0.711 (0.807) data 0.000 (0.086) loss 1.5547 (1.5037) acc 59.3750 (61.2500) lr 1.9603e-03 eta 1:15:17 +epoch [20/200] batch [15/31] time 0.715 (0.776) data 0.000 (0.057) loss 1.3594 (1.6210) acc 53.1250 (60.4167) lr 1.9603e-03 eta 1:12:20 +epoch [20/200] batch [20/31] time 0.711 (0.759) data 0.000 (0.043) loss 1.8477 (1.6438) acc 59.3750 (60.9375) lr 1.9603e-03 eta 1:10:43 +epoch [20/200] batch [25/31] time 0.706 (0.750) data 0.000 (0.035) loss 1.3623 (1.6299) acc 65.6250 (61.1250) lr 1.9603e-03 eta 1:09:47 +epoch [20/200] batch [30/31] time 0.715 (0.743) data 0.000 (0.029) loss 1.7422 (1.5922) acc 65.6250 (61.8750) lr 1.9603e-03 eta 1:09:04 +epoch [21/200] batch [5/31] time 0.713 (0.906) data 0.000 (0.180) loss 1.5723 (1.5600) acc 53.1250 (61.8750) lr 1.9558e-03 eta 1:24:09 +epoch [21/200] batch [10/31] time 0.720 (0.810) data 0.000 (0.090) loss 1.0439 (1.5809) acc 71.8750 (61.2500) lr 1.9558e-03 eta 1:15:10 +epoch [21/200] batch [15/31] time 0.706 (0.776) data 0.000 (0.060) loss 1.0967 (1.5252) acc 78.1250 (62.5000) lr 1.9558e-03 eta 1:11:59 +epoch [21/200] batch [20/31] time 0.710 (0.759) data 0.000 (0.045) loss 2.6113 (1.5333) acc 40.6250 (62.9688) lr 1.9558e-03 eta 1:10:19 +epoch [21/200] batch [25/31] time 0.710 (0.749) data 0.000 (0.036) loss 1.6475 (1.5275) acc 75.0000 (62.8750) lr 1.9558e-03 eta 1:09:20 +epoch [21/200] batch [30/31] time 0.710 (0.747) data 0.000 (0.030) loss 1.2607 (1.5149) acc 75.0000 (63.3333) lr 1.9558e-03 eta 1:09:05 +epoch [22/200] batch [5/31] time 0.710 (0.890) data 0.000 (0.163) loss 1.0918 (1.4898) acc 78.1250 (67.5000) lr 1.9511e-03 eta 1:22:14 +epoch [22/200] batch [10/31] time 0.717 (0.802) data 0.000 (0.082) loss 1.7939 (1.5938) acc 59.3750 (64.3750) lr 1.9511e-03 eta 1:14:04 +epoch [22/200] batch [15/31] time 0.726 (0.775) data 0.000 (0.055) loss 0.9863 (1.4718) acc 78.1250 (66.2500) lr 1.9511e-03 eta 1:11:28 +epoch [22/200] batch [20/31] time 0.706 (0.758) data 0.000 (0.041) loss 1.7197 (1.4912) acc 62.5000 (65.7812) lr 1.9511e-03 eta 1:09:51 +epoch [22/200] batch [25/31] time 0.704 (0.749) data 0.000 (0.033) loss 1.1885 (1.4946) acc 68.7500 (65.8750) lr 1.9511e-03 eta 1:08:55 +epoch [22/200] batch [30/31] time 0.706 (0.742) data 0.000 (0.027) loss 0.9688 (1.4779) acc 78.1250 (65.7292) lr 1.9511e-03 eta 1:08:15 +epoch [23/200] batch [5/31] time 0.709 (0.881) data 0.000 (0.160) loss 1.3223 (1.3904) acc 71.8750 (68.7500) lr 1.9461e-03 eta 1:20:56 +epoch [23/200] batch [10/31] time 0.709 (0.797) data 0.000 (0.080) loss 1.2295 (1.3744) acc 75.0000 (67.5000) lr 1.9461e-03 eta 1:13:08 +epoch [23/200] batch [15/31] time 0.705 (0.769) data 0.000 (0.054) loss 1.7686 (1.4312) acc 68.7500 (66.8750) lr 1.9461e-03 eta 1:10:30 +epoch [23/200] batch [20/31] time 0.706 (0.754) data 0.000 (0.040) loss 1.8906 (1.4900) acc 56.2500 (65.9375) lr 1.9461e-03 eta 1:09:07 +epoch [23/200] batch [25/31] time 0.722 (0.745) data 0.000 (0.032) loss 1.1631 (1.4517) acc 78.1250 (66.2500) lr 1.9461e-03 eta 1:08:14 +epoch [23/200] batch [30/31] time 0.715 (0.739) data 0.000 (0.027) loss 1.3252 (1.4356) acc 62.5000 (65.4167) lr 1.9461e-03 eta 1:07:37 +epoch [24/200] batch [5/31] time 0.704 (0.879) data 0.000 (0.160) loss 1.2061 (1.3590) acc 71.8750 (69.3750) lr 1.9409e-03 eta 1:20:19 +epoch [24/200] batch [10/31] time 0.705 (0.794) data 0.000 (0.080) loss 1.4326 (1.4139) acc 56.2500 (67.1875) lr 1.9409e-03 eta 1:12:31 +epoch [24/200] batch [15/31] time 0.708 (0.767) data 0.000 (0.054) loss 0.7871 (1.3886) acc 81.2500 (66.6667) lr 1.9409e-03 eta 1:09:59 +epoch [24/200] batch [20/31] time 0.708 (0.754) data 0.000 (0.040) loss 0.9004 (1.4382) acc 81.2500 (64.8438) lr 1.9409e-03 eta 1:08:41 +epoch [24/200] batch [25/31] time 0.700 (0.744) data 0.000 (0.032) loss 1.7500 (1.4537) acc 62.5000 (65.1250) lr 1.9409e-03 eta 1:07:44 +epoch [24/200] batch [30/31] time 0.707 (0.738) data 0.000 (0.027) loss 1.0254 (1.4352) acc 68.7500 (65.0000) lr 1.9409e-03 eta 1:07:09 +epoch [25/200] batch [5/31] time 0.716 (0.901) data 0.000 (0.177) loss 2.1543 (1.3973) acc 50.0000 (67.5000) lr 1.9354e-03 eta 1:21:52 +epoch [25/200] batch [10/31] time 0.707 (0.806) data 0.000 (0.089) loss 0.9854 (1.3256) acc 78.1250 (68.1250) lr 1.9354e-03 eta 1:13:10 +epoch [25/200] batch [15/31] time 0.711 (0.784) data 0.000 (0.059) loss 1.5303 (1.4061) acc 68.7500 (66.4583) lr 1.9354e-03 eta 1:11:06 +epoch [25/200] batch [20/31] time 0.717 (0.766) data 0.000 (0.045) loss 0.9492 (1.3573) acc 81.2500 (66.4062) lr 1.9354e-03 eta 1:09:24 +epoch [25/200] batch [25/31] time 0.721 (0.756) data 0.000 (0.036) loss 0.7319 (1.3530) acc 81.2500 (66.2500) lr 1.9354e-03 eta 1:08:24 +epoch [25/200] batch [30/31] time 0.705 (0.748) data 0.000 (0.030) loss 1.7617 (1.4227) acc 43.7500 (64.3750) lr 1.9354e-03 eta 1:07:38 +epoch [26/200] batch [5/31] time 0.721 (0.890) data 0.000 (0.165) loss 1.4189 (1.3033) acc 65.6250 (68.1250) lr 1.9298e-03 eta 1:20:23 +epoch [26/200] batch [10/31] time 0.714 (0.800) data 0.000 (0.083) loss 1.8096 (1.3895) acc 56.2500 (66.5625) lr 1.9298e-03 eta 1:12:14 +epoch [26/200] batch [15/31] time 0.712 (0.770) data 0.000 (0.055) loss 1.6670 (1.4081) acc 68.7500 (66.2500) lr 1.9298e-03 eta 1:09:28 +epoch [26/200] batch [20/31] time 0.710 (0.755) data 0.000 (0.041) loss 1.2959 (1.3476) acc 65.6250 (67.5000) lr 1.9298e-03 eta 1:08:02 +epoch [26/200] batch [25/31] time 0.709 (0.746) data 0.000 (0.033) loss 1.2637 (1.3820) acc 75.0000 (66.7500) lr 1.9298e-03 eta 1:07:08 +epoch [26/200] batch [30/31] time 0.710 (0.740) data 0.000 (0.028) loss 1.3682 (1.3798) acc 56.2500 (66.2500) lr 1.9298e-03 eta 1:06:30 +epoch [27/200] batch [5/31] time 0.713 (0.890) data 0.000 (0.164) loss 1.5430 (1.2675) acc 65.6250 (68.1250) lr 1.9239e-03 eta 1:19:54 +epoch [27/200] batch [10/31] time 0.708 (0.800) data 0.000 (0.082) loss 1.2900 (1.2529) acc 75.0000 (69.6875) lr 1.9239e-03 eta 1:11:45 +epoch [27/200] batch [15/31] time 0.706 (0.770) data 0.000 (0.055) loss 1.5684 (1.2194) acc 59.3750 (70.8333) lr 1.9239e-03 eta 1:09:00 +epoch [27/200] batch [20/31] time 0.708 (0.755) data 0.000 (0.041) loss 1.4053 (1.1948) acc 71.8750 (71.2500) lr 1.9239e-03 eta 1:07:38 +epoch [27/200] batch [25/31] time 0.703 (0.746) data 0.000 (0.033) loss 1.3311 (1.2259) acc 71.8750 (71.1250) lr 1.9239e-03 eta 1:06:46 +epoch [27/200] batch [30/31] time 0.705 (0.740) data 0.000 (0.028) loss 1.0303 (1.2319) acc 71.8750 (70.8333) lr 1.9239e-03 eta 1:06:11 +epoch [28/200] batch [5/31] time 0.720 (0.917) data 0.001 (0.172) loss 1.1875 (1.3803) acc 78.1250 (68.7500) lr 1.9178e-03 eta 1:21:53 +epoch [28/200] batch [10/31] time 0.705 (0.814) data 0.000 (0.086) loss 0.9546 (1.2801) acc 78.1250 (71.2500) lr 1.9178e-03 eta 1:12:36 +epoch [28/200] batch [15/31] time 0.716 (0.780) data 0.000 (0.058) loss 1.2383 (1.2547) acc 68.7500 (70.6250) lr 1.9178e-03 eta 1:09:30 +epoch [28/200] batch [20/31] time 0.713 (0.763) data 0.000 (0.043) loss 1.5020 (1.3092) acc 68.7500 (68.5938) lr 1.9178e-03 eta 1:07:58 +epoch [28/200] batch [25/31] time 0.721 (0.753) data 0.000 (0.035) loss 1.2754 (1.3385) acc 68.7500 (68.0000) lr 1.9178e-03 eta 1:07:00 +epoch [28/200] batch [30/31] time 0.721 (0.746) data 0.000 (0.029) loss 1.4902 (1.3263) acc 71.8750 (68.7500) lr 1.9178e-03 eta 1:06:18 +epoch [29/200] batch [5/31] time 0.721 (0.887) data 0.000 (0.164) loss 1.2480 (1.5316) acc 75.0000 (65.0000) lr 1.9114e-03 eta 1:18:47 +epoch [29/200] batch [10/31] time 0.709 (0.802) data 0.000 (0.082) loss 1.2734 (1.4518) acc 78.1250 (67.5000) lr 1.9114e-03 eta 1:11:05 +epoch [29/200] batch [15/31] time 0.713 (0.772) data 0.000 (0.055) loss 1.8232 (1.5140) acc 53.1250 (65.8333) lr 1.9114e-03 eta 1:08:22 +epoch [29/200] batch [20/31] time 0.709 (0.756) data 0.000 (0.041) loss 1.2920 (1.5168) acc 65.6250 (66.4062) lr 1.9114e-03 eta 1:06:57 +epoch [29/200] batch [25/31] time 0.714 (0.753) data 0.000 (0.033) loss 1.2158 (1.4847) acc 71.8750 (67.1250) lr 1.9114e-03 eta 1:06:36 +epoch [29/200] batch [30/31] time 0.701 (0.745) data 0.000 (0.028) loss 1.5205 (1.4481) acc 75.0000 (67.6042) lr 1.9114e-03 eta 1:05:52 +epoch [30/200] batch [5/31] time 0.712 (0.888) data 0.000 (0.163) loss 1.6006 (1.3410) acc 59.3750 (64.3750) lr 1.9048e-03 eta 1:18:20 +epoch [30/200] batch [10/31] time 0.711 (0.800) data 0.000 (0.082) loss 1.2734 (1.3031) acc 65.6250 (65.9375) lr 1.9048e-03 eta 1:10:34 +epoch [30/200] batch [15/31] time 0.716 (0.772) data 0.000 (0.055) loss 1.0107 (1.2144) acc 65.6250 (67.9167) lr 1.9048e-03 eta 1:08:02 +epoch [30/200] batch [20/31] time 0.716 (0.757) data 0.000 (0.041) loss 1.2002 (1.2260) acc 75.0000 (68.5938) lr 1.9048e-03 eta 1:06:38 +epoch [30/200] batch [25/31] time 0.700 (0.748) data 0.000 (0.033) loss 2.0273 (1.2600) acc 65.6250 (68.1250) lr 1.9048e-03 eta 1:05:47 +epoch [30/200] batch [30/31] time 0.703 (0.741) data 0.000 (0.027) loss 0.9941 (1.2480) acc 78.1250 (68.3333) lr 1.9048e-03 eta 1:05:07 +epoch [31/200] batch [5/31] time 0.707 (0.889) data 0.000 (0.172) loss 1.3799 (1.2212) acc 71.8750 (71.8750) lr 1.8980e-03 eta 1:18:03 +epoch [31/200] batch [10/31] time 0.711 (0.800) data 0.000 (0.086) loss 0.8843 (1.1234) acc 78.1250 (72.8125) lr 1.8980e-03 eta 1:10:06 +epoch [31/200] batch [15/31] time 0.725 (0.773) data 0.000 (0.057) loss 1.6025 (1.2293) acc 56.2500 (70.4167) lr 1.8980e-03 eta 1:07:40 +epoch [31/200] batch [20/31] time 0.702 (0.763) data 0.000 (0.043) loss 1.0381 (1.2475) acc 75.0000 (69.5312) lr 1.8980e-03 eta 1:06:44 +epoch [31/200] batch [25/31] time 0.704 (0.752) data 0.000 (0.035) loss 1.0439 (1.2499) acc 81.2500 (69.0000) lr 1.8980e-03 eta 1:05:43 +epoch [31/200] batch [30/31] time 0.707 (0.745) data 0.000 (0.029) loss 0.9580 (1.2721) acc 78.1250 (68.3333) lr 1.8980e-03 eta 1:05:02 +epoch [32/200] batch [5/31] time 0.712 (0.893) data 0.000 (0.170) loss 2.1367 (1.5207) acc 65.6250 (65.0000) lr 1.8910e-03 eta 1:17:54 +epoch [32/200] batch [10/31] time 0.711 (0.805) data 0.000 (0.085) loss 0.7417 (1.3432) acc 75.0000 (66.5625) lr 1.8910e-03 eta 1:10:10 +epoch [32/200] batch [15/31] time 0.707 (0.774) data 0.000 (0.057) loss 1.5762 (1.3950) acc 65.6250 (67.2917) lr 1.8910e-03 eta 1:07:22 +epoch [32/200] batch [20/31] time 0.710 (0.759) data 0.000 (0.043) loss 1.2568 (1.3318) acc 62.5000 (68.2812) lr 1.8910e-03 eta 1:06:01 +epoch [32/200] batch [25/31] time 0.703 (0.750) data 0.000 (0.034) loss 0.9727 (1.3208) acc 75.0000 (67.8750) lr 1.8910e-03 eta 1:05:08 +epoch [32/200] batch [30/31] time 0.708 (0.743) data 0.000 (0.029) loss 1.4053 (1.3628) acc 56.2500 (66.7708) lr 1.8910e-03 eta 1:04:32 +epoch [33/200] batch [5/31] time 0.708 (0.895) data 0.000 (0.172) loss 1.8398 (1.4444) acc 65.6250 (63.1250) lr 1.8838e-03 eta 1:17:34 +epoch [33/200] batch [10/31] time 0.709 (0.806) data 0.000 (0.086) loss 1.0244 (1.3591) acc 71.8750 (66.5625) lr 1.8838e-03 eta 1:09:47 +epoch [33/200] batch [15/31] time 0.711 (0.775) data 0.000 (0.058) loss 0.9995 (1.3831) acc 78.1250 (66.8750) lr 1.8838e-03 eta 1:07:03 +epoch [33/200] batch [20/31] time 0.704 (0.758) data 0.000 (0.043) loss 1.4229 (1.4116) acc 62.5000 (66.5625) lr 1.8838e-03 eta 1:05:32 +epoch [33/200] batch [25/31] time 0.709 (0.748) data 0.000 (0.035) loss 2.3926 (1.4704) acc 50.0000 (65.6250) lr 1.8838e-03 eta 1:04:37 +epoch [33/200] batch [30/31] time 0.709 (0.742) data 0.000 (0.029) loss 1.4141 (1.4561) acc 53.1250 (65.3125) lr 1.8838e-03 eta 1:04:02 +epoch [34/200] batch [5/31] time 0.716 (0.893) data 0.000 (0.172) loss 1.3047 (1.2647) acc 68.7500 (74.3750) lr 1.8763e-03 eta 1:16:57 +epoch [34/200] batch [10/31] time 0.706 (0.817) data 0.000 (0.086) loss 1.5098 (1.2665) acc 62.5000 (71.5625) lr 1.8763e-03 eta 1:10:23 +epoch [34/200] batch [15/31] time 0.721 (0.783) data 0.000 (0.057) loss 0.9292 (1.2187) acc 84.3750 (72.5000) lr 1.8763e-03 eta 1:07:23 +epoch [34/200] batch [20/31] time 0.722 (0.765) data 0.000 (0.043) loss 1.0488 (1.1543) acc 81.2500 (73.5938) lr 1.8763e-03 eta 1:05:44 +epoch [34/200] batch [25/31] time 0.709 (0.754) data 0.000 (0.035) loss 0.7725 (1.1695) acc 78.1250 (72.3750) lr 1.8763e-03 eta 1:04:47 +epoch [34/200] batch [30/31] time 0.708 (0.747) data 0.000 (0.029) loss 0.9634 (1.1605) acc 75.0000 (72.5000) lr 1.8763e-03 eta 1:04:03 +epoch [35/200] batch [5/31] time 0.706 (0.891) data 0.000 (0.169) loss 1.0283 (1.1815) acc 71.8750 (72.5000) lr 1.8686e-03 eta 1:16:18 +epoch [35/200] batch [10/31] time 0.711 (0.802) data 0.000 (0.085) loss 1.0088 (1.0931) acc 81.2500 (75.9375) lr 1.8686e-03 eta 1:08:38 +epoch [35/200] batch [15/31] time 0.707 (0.773) data 0.000 (0.056) loss 1.0254 (1.1422) acc 75.0000 (73.5417) lr 1.8686e-03 eta 1:06:07 +epoch [35/200] batch [20/31] time 0.714 (0.759) data 0.000 (0.042) loss 1.8867 (1.2304) acc 65.6250 (71.5625) lr 1.8686e-03 eta 1:04:52 +epoch [35/200] batch [25/31] time 0.712 (0.750) data 0.000 (0.034) loss 1.2285 (1.2264) acc 71.8750 (71.2500) lr 1.8686e-03 eta 1:04:00 +epoch [35/200] batch [30/31] time 0.706 (0.744) data 0.000 (0.028) loss 1.0723 (1.2102) acc 75.0000 (71.4583) lr 1.8686e-03 eta 1:03:23 +epoch [36/200] batch [5/31] time 0.706 (0.893) data 0.000 (0.174) loss 1.0859 (1.3477) acc 68.7500 (63.7500) lr 1.8607e-03 eta 1:16:03 +epoch [36/200] batch [10/31] time 0.709 (0.802) data 0.000 (0.087) loss 0.8882 (1.2473) acc 75.0000 (67.1875) lr 1.8607e-03 eta 1:08:13 +epoch [36/200] batch [15/31] time 0.710 (0.772) data 0.000 (0.058) loss 1.1289 (1.1653) acc 68.7500 (69.1667) lr 1.8607e-03 eta 1:05:37 +epoch [36/200] batch [20/31] time 0.717 (0.758) data 0.000 (0.044) loss 1.6602 (1.1740) acc 62.5000 (69.5312) lr 1.8607e-03 eta 1:04:23 +epoch [36/200] batch [25/31] time 0.706 (0.749) data 0.000 (0.035) loss 1.8232 (1.2296) acc 59.3750 (69.2500) lr 1.8607e-03 eta 1:03:29 +epoch [36/200] batch [30/31] time 0.707 (0.742) data 0.000 (0.029) loss 1.0879 (1.2484) acc 71.8750 (68.5417) lr 1.8607e-03 eta 1:02:50 +epoch [37/200] batch [5/31] time 0.723 (0.883) data 0.000 (0.155) loss 1.1553 (1.1924) acc 62.5000 (70.6250) lr 1.8526e-03 eta 1:14:46 +epoch [37/200] batch [10/31] time 0.712 (0.798) data 0.000 (0.078) loss 0.6147 (1.1826) acc 81.2500 (71.8750) lr 1.8526e-03 eta 1:07:30 +epoch [37/200] batch [15/31] time 0.715 (0.772) data 0.000 (0.052) loss 0.5962 (1.1624) acc 84.3750 (72.9167) lr 1.8526e-03 eta 1:05:12 +epoch [37/200] batch [20/31] time 0.716 (0.757) data 0.000 (0.039) loss 1.0146 (1.1400) acc 75.0000 (73.2812) lr 1.8526e-03 eta 1:03:54 +epoch [37/200] batch [25/31] time 0.713 (0.749) data 0.000 (0.031) loss 1.4541 (1.1564) acc 65.6250 (71.6250) lr 1.8526e-03 eta 1:03:10 +epoch [37/200] batch [30/31] time 0.710 (0.744) data 0.000 (0.026) loss 1.3281 (1.1611) acc 68.7500 (72.0833) lr 1.8526e-03 eta 1:02:38 +epoch [38/200] batch [5/31] time 0.710 (0.906) data 0.000 (0.181) loss 0.9365 (1.1433) acc 71.8750 (70.0000) lr 1.8443e-03 eta 1:16:15 +epoch [38/200] batch [10/31] time 0.720 (0.812) data 0.000 (0.091) loss 1.9795 (1.3288) acc 62.5000 (68.1250) lr 1.8443e-03 eta 1:08:13 +epoch [38/200] batch [15/31] time 0.713 (0.780) data 0.000 (0.061) loss 0.8506 (1.3156) acc 78.1250 (69.1667) lr 1.8443e-03 eta 1:05:27 +epoch [38/200] batch [20/31] time 0.709 (0.763) data 0.000 (0.046) loss 0.9331 (1.2685) acc 75.0000 (70.4688) lr 1.8443e-03 eta 1:03:59 +epoch [38/200] batch [25/31] time 0.712 (0.753) data 0.000 (0.037) loss 1.5879 (1.2992) acc 68.7500 (69.5000) lr 1.8443e-03 eta 1:03:03 +epoch [38/200] batch [30/31] time 0.710 (0.746) data 0.000 (0.031) loss 1.6182 (1.3041) acc 68.7500 (69.7917) lr 1.8443e-03 eta 1:02:27 +epoch [39/200] batch [5/31] time 0.706 (0.889) data 0.000 (0.163) loss 0.9160 (1.0393) acc 81.2500 (76.8750) lr 1.8358e-03 eta 1:14:20 +epoch [39/200] batch [10/31] time 0.712 (0.800) data 0.000 (0.082) loss 0.7837 (1.1090) acc 81.2500 (74.0625) lr 1.8358e-03 eta 1:06:51 +epoch [39/200] batch [15/31] time 0.711 (0.779) data 0.000 (0.054) loss 0.9272 (1.1308) acc 81.2500 (73.3333) lr 1.8358e-03 eta 1:05:01 +epoch [39/200] batch [20/31] time 0.709 (0.763) data 0.000 (0.041) loss 1.2441 (1.1119) acc 71.8750 (74.2188) lr 1.8358e-03 eta 1:03:35 +epoch [39/200] batch [25/31] time 0.711 (0.752) data 0.000 (0.033) loss 1.1064 (1.1846) acc 71.8750 (72.8750) lr 1.8358e-03 eta 1:02:38 +epoch [39/200] batch [30/31] time 0.710 (0.746) data 0.000 (0.027) loss 0.9985 (1.1548) acc 78.1250 (73.8542) lr 1.8358e-03 eta 1:02:02 +epoch [40/200] batch [5/31] time 0.726 (0.892) data 0.000 (0.169) loss 1.2832 (1.2320) acc 65.6250 (71.2500) lr 1.8271e-03 eta 1:14:06 +epoch [40/200] batch [10/31] time 0.710 (0.802) data 0.000 (0.085) loss 1.3906 (1.0862) acc 75.0000 (74.6875) lr 1.8271e-03 eta 1:06:36 +epoch [40/200] batch [15/31] time 0.714 (0.773) data 0.000 (0.057) loss 1.4746 (1.1654) acc 68.7500 (73.5417) lr 1.8271e-03 eta 1:04:07 +epoch [40/200] batch [20/31] time 0.708 (0.758) data 0.000 (0.042) loss 1.3389 (1.1772) acc 71.8750 (73.5938) lr 1.8271e-03 eta 1:02:45 +epoch [40/200] batch [25/31] time 0.711 (0.748) data 0.000 (0.034) loss 1.1064 (1.1383) acc 65.6250 (73.8750) lr 1.8271e-03 eta 1:01:55 +epoch [40/200] batch [30/31] time 0.711 (0.742) data 0.000 (0.028) loss 1.0986 (1.1665) acc 75.0000 (73.2292) lr 1.8271e-03 eta 1:01:20 +epoch [41/200] batch [5/31] time 0.711 (0.934) data 0.000 (0.179) loss 0.8877 (1.0958) acc 81.2500 (74.3750) lr 1.8181e-03 eta 1:17:06 +epoch [41/200] batch [10/31] time 0.724 (0.824) data 0.000 (0.090) loss 1.0898 (1.0850) acc 65.6250 (75.0000) lr 1.8181e-03 eta 1:07:57 +epoch [41/200] batch [15/31] time 0.711 (0.788) data 0.000 (0.060) loss 0.6719 (1.0832) acc 84.3750 (75.4167) lr 1.8181e-03 eta 1:04:56 +epoch [41/200] batch [20/31] time 0.709 (0.769) data 0.000 (0.045) loss 1.6367 (1.1223) acc 62.5000 (75.0000) lr 1.8181e-03 eta 1:03:17 +epoch [41/200] batch [25/31] time 0.708 (0.756) data 0.000 (0.036) loss 1.2480 (1.1169) acc 71.8750 (74.5000) lr 1.8181e-03 eta 1:02:10 +epoch [41/200] batch [30/31] time 0.705 (0.749) data 0.000 (0.030) loss 1.6943 (1.1543) acc 56.2500 (73.6458) lr 1.8181e-03 eta 1:01:30 +epoch [42/200] batch [5/31] time 0.710 (0.871) data 0.000 (0.150) loss 0.9985 (1.2604) acc 78.1250 (72.5000) lr 1.8090e-03 eta 1:11:29 +epoch [42/200] batch [10/31] time 0.705 (0.790) data 0.000 (0.075) loss 0.8203 (1.2399) acc 75.0000 (72.8125) lr 1.8090e-03 eta 1:04:45 +epoch [42/200] batch [15/31] time 0.710 (0.762) data 0.000 (0.050) loss 1.4307 (1.2690) acc 71.8750 (73.1250) lr 1.8090e-03 eta 1:02:24 +epoch [42/200] batch [20/31] time 0.705 (0.749) data 0.000 (0.038) loss 1.0146 (1.1871) acc 78.1250 (75.1562) lr 1.8090e-03 eta 1:01:15 +epoch [42/200] batch [25/31] time 0.717 (0.740) data 0.000 (0.030) loss 1.1631 (1.1557) acc 75.0000 (75.5000) lr 1.8090e-03 eta 1:00:30 +epoch [42/200] batch [30/31] time 0.704 (0.734) data 0.000 (0.025) loss 1.2607 (1.1341) acc 75.0000 (75.3125) lr 1.8090e-03 eta 0:59:56 +epoch [43/200] batch [5/31] time 0.709 (0.898) data 0.000 (0.177) loss 0.6685 (1.2877) acc 84.3750 (73.1250) lr 1.7997e-03 eta 1:13:12 +epoch [43/200] batch [10/31] time 0.708 (0.821) data 0.000 (0.089) loss 1.1162 (1.2531) acc 65.6250 (71.2500) lr 1.7997e-03 eta 1:06:51 +epoch [43/200] batch [15/31] time 0.717 (0.784) data 0.000 (0.059) loss 1.0967 (1.1652) acc 75.0000 (72.2917) lr 1.7997e-03 eta 1:03:46 +epoch [43/200] batch [20/31] time 0.709 (0.765) data 0.000 (0.044) loss 1.0127 (1.1812) acc 75.0000 (72.1875) lr 1.7997e-03 eta 1:02:11 +epoch [43/200] batch [25/31] time 0.703 (0.753) data 0.000 (0.036) loss 1.0566 (1.2244) acc 75.0000 (71.5000) lr 1.7997e-03 eta 1:01:11 +epoch [43/200] batch [30/31] time 0.719 (0.747) data 0.000 (0.030) loss 1.2412 (1.2240) acc 75.0000 (72.1875) lr 1.7997e-03 eta 1:00:36 +epoch [44/200] batch [5/31] time 0.723 (0.903) data 0.000 (0.177) loss 0.9160 (1.0473) acc 87.5000 (79.3750) lr 1.7902e-03 eta 1:13:11 +epoch [44/200] batch [10/31] time 0.713 (0.807) data 0.000 (0.088) loss 0.8853 (1.0206) acc 81.2500 (76.5625) lr 1.7902e-03 eta 1:05:20 +epoch [44/200] batch [15/31] time 0.726 (0.778) data 0.001 (0.059) loss 0.8242 (1.0256) acc 78.1250 (76.2500) lr 1.7902e-03 eta 1:02:55 +epoch [44/200] batch [20/31] time 0.709 (0.761) data 0.000 (0.044) loss 0.9126 (1.0890) acc 75.0000 (74.3750) lr 1.7902e-03 eta 1:01:29 +epoch [44/200] batch [25/31] time 0.728 (0.752) data 0.000 (0.036) loss 2.5371 (1.1311) acc 53.1250 (73.8750) lr 1.7902e-03 eta 1:00:40 +epoch [44/200] batch [30/31] time 0.719 (0.746) data 0.000 (0.030) loss 1.1895 (1.1464) acc 84.3750 (74.1667) lr 1.7902e-03 eta 1:00:06 +epoch [45/200] batch [5/31] time 0.721 (0.897) data 0.000 (0.173) loss 0.6030 (0.8919) acc 90.6250 (82.5000) lr 1.7804e-03 eta 1:12:13 +epoch [45/200] batch [10/31] time 0.710 (0.804) data 0.000 (0.087) loss 1.8184 (0.9588) acc 56.2500 (78.4375) lr 1.7804e-03 eta 1:04:39 +epoch [45/200] batch [15/31] time 0.711 (0.773) data 0.000 (0.058) loss 0.7993 (0.9408) acc 81.2500 (78.3333) lr 1.7804e-03 eta 1:02:08 +epoch [45/200] batch [20/31] time 0.708 (0.758) data 0.000 (0.043) loss 0.9927 (1.0252) acc 71.8750 (76.0938) lr 1.7804e-03 eta 1:00:51 +epoch [45/200] batch [25/31] time 0.716 (0.749) data 0.000 (0.035) loss 1.8213 (1.0654) acc 65.6250 (75.6250) lr 1.7804e-03 eta 1:00:05 +epoch [45/200] batch [30/31] time 0.704 (0.743) data 0.000 (0.029) loss 1.3604 (1.0978) acc 68.7500 (75.2083) lr 1.7804e-03 eta 0:59:29 +epoch [46/200] batch [5/31] time 0.713 (0.901) data 0.000 (0.176) loss 1.5088 (1.2048) acc 68.7500 (73.1250) lr 1.7705e-03 eta 1:12:03 +epoch [46/200] batch [10/31] time 0.719 (0.809) data 0.000 (0.088) loss 0.5786 (1.0357) acc 84.3750 (76.2500) lr 1.7705e-03 eta 1:04:40 +epoch [46/200] batch [15/31] time 0.712 (0.777) data 0.000 (0.059) loss 0.8809 (0.9519) acc 75.0000 (77.7083) lr 1.7705e-03 eta 1:02:03 +epoch [46/200] batch [20/31] time 0.712 (0.761) data 0.000 (0.044) loss 0.8184 (0.9405) acc 78.1250 (76.8750) lr 1.7705e-03 eta 1:00:41 +epoch [46/200] batch [25/31] time 0.706 (0.750) data 0.000 (0.036) loss 1.0254 (1.0038) acc 84.3750 (76.0000) lr 1.7705e-03 eta 0:59:45 +epoch [46/200] batch [30/31] time 0.708 (0.744) data 0.000 (0.030) loss 1.7998 (1.0583) acc 59.3750 (75.0000) lr 1.7705e-03 eta 0:59:11 +epoch [47/200] batch [5/31] time 0.708 (0.891) data 0.000 (0.169) loss 0.8628 (1.0028) acc 81.2500 (74.3750) lr 1.7604e-03 eta 1:10:47 +epoch [47/200] batch [10/31] time 0.723 (0.803) data 0.001 (0.085) loss 2.5332 (1.1530) acc 59.3750 (73.1250) lr 1.7604e-03 eta 1:03:44 +epoch [47/200] batch [15/31] time 0.709 (0.772) data 0.000 (0.057) loss 1.2178 (1.0997) acc 62.5000 (73.3333) lr 1.7604e-03 eta 1:01:15 +epoch [47/200] batch [20/31] time 0.709 (0.756) data 0.000 (0.043) loss 0.6997 (1.1233) acc 81.2500 (74.3750) lr 1.7604e-03 eta 0:59:55 +epoch [47/200] batch [25/31] time 0.709 (0.747) data 0.000 (0.034) loss 0.8164 (1.1001) acc 68.7500 (73.5000) lr 1.7604e-03 eta 0:59:06 +epoch [47/200] batch [30/31] time 0.713 (0.740) data 0.000 (0.028) loss 1.2080 (1.0831) acc 75.0000 (74.0625) lr 1.7604e-03 eta 0:58:32 +epoch [48/200] batch [5/31] time 0.712 (0.885) data 0.000 (0.165) loss 0.6836 (1.1936) acc 78.1250 (70.0000) lr 1.7501e-03 eta 1:09:53 +epoch [48/200] batch [10/31] time 0.719 (0.802) data 0.001 (0.083) loss 0.7603 (1.0762) acc 78.1250 (72.8125) lr 1.7501e-03 eta 1:03:14 +epoch [48/200] batch [15/31] time 0.719 (0.772) data 0.000 (0.055) loss 0.9565 (1.0506) acc 71.8750 (74.3750) lr 1.7501e-03 eta 1:00:51 +epoch [48/200] batch [20/31] time 0.711 (0.757) data 0.000 (0.042) loss 1.0205 (1.0575) acc 81.2500 (74.3750) lr 1.7501e-03 eta 0:59:35 +epoch [48/200] batch [25/31] time 0.710 (0.749) data 0.000 (0.033) loss 0.7373 (1.0900) acc 81.2500 (73.6250) lr 1.7501e-03 eta 0:58:52 +epoch [48/200] batch [30/31] time 0.708 (0.743) data 0.000 (0.028) loss 1.1631 (1.0859) acc 78.1250 (73.6458) lr 1.7501e-03 eta 0:58:23 +epoch [49/200] batch [5/31] time 0.707 (0.896) data 0.000 (0.171) loss 0.4526 (0.9847) acc 93.7500 (77.5000) lr 1.7396e-03 eta 1:10:18 +epoch [49/200] batch [10/31] time 0.706 (0.822) data 0.000 (0.086) loss 1.0635 (1.0084) acc 75.0000 (76.5625) lr 1.7396e-03 eta 1:04:22 +epoch [49/200] batch [15/31] time 0.708 (0.786) data 0.000 (0.057) loss 0.7266 (1.0304) acc 87.5000 (76.2500) lr 1.7396e-03 eta 1:01:30 +epoch [49/200] batch [20/31] time 0.705 (0.767) data 0.000 (0.043) loss 1.0605 (1.0943) acc 81.2500 (75.1562) lr 1.7396e-03 eta 0:59:58 +epoch [49/200] batch [25/31] time 0.715 (0.756) data 0.000 (0.035) loss 1.0879 (1.1188) acc 68.7500 (73.8750) lr 1.7396e-03 eta 0:59:04 +epoch [49/200] batch [30/31] time 0.708 (0.748) data 0.000 (0.029) loss 0.9570 (1.1014) acc 71.8750 (74.4792) lr 1.7396e-03 eta 0:58:24 +epoch [50/200] batch [5/31] time 0.705 (0.890) data 0.000 (0.168) loss 1.4326 (1.2431) acc 65.6250 (69.3750) lr 1.7290e-03 eta 1:09:21 +epoch [50/200] batch [10/31] time 0.712 (0.802) data 0.000 (0.084) loss 0.7861 (1.0663) acc 84.3750 (74.3750) lr 1.7290e-03 eta 1:02:28 +epoch [50/200] batch [15/31] time 0.710 (0.772) data 0.000 (0.056) loss 1.2090 (1.0489) acc 81.2500 (74.1667) lr 1.7290e-03 eta 1:00:02 +epoch [50/200] batch [20/31] time 0.713 (0.758) data 0.000 (0.042) loss 1.4902 (1.0822) acc 75.0000 (73.7500) lr 1.7290e-03 eta 0:58:52 +epoch [50/200] batch [25/31] time 0.723 (0.749) data 0.000 (0.034) loss 1.4834 (1.1054) acc 68.7500 (73.6250) lr 1.7290e-03 eta 0:58:05 +epoch [50/200] batch [30/31] time 0.840 (0.747) data 0.000 (0.028) loss 1.4248 (1.1148) acc 68.7500 (73.6458) lr 1.7290e-03 eta 0:57:54 +epoch [51/200] batch [5/31] time 0.722 (0.900) data 0.000 (0.171) loss 1.0635 (1.0951) acc 75.0000 (75.0000) lr 1.7181e-03 eta 1:09:39 +epoch [51/200] batch [10/31] time 0.712 (0.809) data 0.000 (0.086) loss 1.4082 (1.1963) acc 75.0000 (73.4375) lr 1.7181e-03 eta 1:02:32 +epoch [51/200] batch [15/31] time 0.712 (0.779) data 0.000 (0.057) loss 0.9053 (1.1396) acc 87.5000 (75.2083) lr 1.7181e-03 eta 1:00:08 +epoch [51/200] batch [20/31] time 0.707 (0.761) data 0.000 (0.043) loss 1.1113 (1.1453) acc 81.2500 (74.8438) lr 1.7181e-03 eta 0:58:44 +epoch [51/200] batch [25/31] time 0.709 (0.751) data 0.000 (0.034) loss 0.8486 (1.1332) acc 75.0000 (74.3750) lr 1.7181e-03 eta 0:57:52 +epoch [51/200] batch [30/31] time 0.710 (0.744) data 0.000 (0.029) loss 1.1318 (1.1206) acc 68.7500 (74.0625) lr 1.7181e-03 eta 0:57:19 +epoch [52/200] batch [5/31] time 0.726 (0.895) data 0.000 (0.170) loss 1.1240 (1.1956) acc 68.7500 (68.7500) lr 1.7071e-03 eta 1:08:50 +epoch [52/200] batch [10/31] time 0.724 (0.806) data 0.000 (0.085) loss 1.4053 (1.1259) acc 59.3750 (70.6250) lr 1.7071e-03 eta 1:01:53 +epoch [52/200] batch [15/31] time 0.727 (0.776) data 0.000 (0.057) loss 1.2422 (1.0687) acc 68.7500 (72.7083) lr 1.7071e-03 eta 0:59:32 +epoch [52/200] batch [20/31] time 0.705 (0.759) data 0.000 (0.043) loss 0.8721 (1.0360) acc 78.1250 (73.7500) lr 1.7071e-03 eta 0:58:12 +epoch [52/200] batch [25/31] time 0.718 (0.750) data 0.000 (0.034) loss 0.8799 (1.0253) acc 84.3750 (74.7500) lr 1.7071e-03 eta 0:57:26 +epoch [52/200] batch [30/31] time 0.706 (0.745) data 0.000 (0.029) loss 0.8442 (1.0355) acc 75.0000 (74.3750) lr 1.7071e-03 eta 0:56:57 +epoch [53/200] batch [5/31] time 0.710 (0.921) data 0.000 (0.165) loss 0.7524 (0.8082) acc 90.6250 (83.7500) lr 1.6959e-03 eta 1:10:23 +epoch [53/200] batch [10/31] time 0.716 (0.818) data 0.000 (0.083) loss 1.2070 (0.8647) acc 84.3750 (81.8750) lr 1.6959e-03 eta 1:02:26 +epoch [53/200] batch [15/31] time 0.715 (0.784) data 0.000 (0.055) loss 0.6113 (0.9235) acc 84.3750 (81.2500) lr 1.6959e-03 eta 0:59:45 +epoch [53/200] batch [20/31] time 0.713 (0.767) data 0.000 (0.042) loss 0.7329 (0.9233) acc 87.5000 (81.0938) lr 1.6959e-03 eta 0:58:24 +epoch [53/200] batch [25/31] time 0.710 (0.756) data 0.000 (0.033) loss 1.5957 (0.9681) acc 62.5000 (80.0000) lr 1.6959e-03 eta 0:57:30 +epoch [53/200] batch [30/31] time 0.722 (0.749) data 0.000 (0.028) loss 1.3496 (1.0048) acc 68.7500 (78.0208) lr 1.6959e-03 eta 0:56:53 +epoch [54/200] batch [5/31] time 0.710 (0.898) data 0.000 (0.177) loss 1.9609 (1.1019) acc 62.5000 (76.2500) lr 1.6845e-03 eta 1:08:06 +epoch [54/200] batch [10/31] time 0.706 (0.808) data 0.000 (0.089) loss 1.5225 (1.1220) acc 65.6250 (74.3750) lr 1.6845e-03 eta 1:01:14 +epoch [54/200] batch [15/31] time 0.710 (0.777) data 0.000 (0.059) loss 1.1875 (1.1311) acc 75.0000 (73.5417) lr 1.6845e-03 eta 0:58:49 +epoch [54/200] batch [20/31] time 0.707 (0.761) data 0.000 (0.045) loss 0.7700 (1.0845) acc 78.1250 (74.5312) lr 1.6845e-03 eta 0:57:30 +epoch [54/200] batch [25/31] time 0.703 (0.750) data 0.000 (0.036) loss 0.4976 (1.0392) acc 84.3750 (75.5000) lr 1.6845e-03 eta 0:56:38 +epoch [54/200] batch [30/31] time 0.705 (0.743) data 0.000 (0.030) loss 1.3193 (1.1216) acc 59.3750 (73.7500) lr 1.6845e-03 eta 0:56:04 +epoch [55/200] batch [5/31] time 0.710 (0.882) data 0.000 (0.159) loss 0.9082 (1.0521) acc 84.3750 (76.2500) lr 1.6730e-03 eta 1:06:25 +epoch [55/200] batch [10/31] time 0.706 (0.798) data 0.001 (0.080) loss 1.2344 (1.1098) acc 71.8750 (73.7500) lr 1.6730e-03 eta 1:00:04 +epoch [55/200] batch [15/31] time 0.711 (0.770) data 0.000 (0.053) loss 1.3779 (1.1625) acc 81.2500 (73.7500) lr 1.6730e-03 eta 0:57:52 +epoch [55/200] batch [20/31] time 0.710 (0.756) data 0.000 (0.040) loss 0.5571 (1.1065) acc 90.6250 (75.4688) lr 1.6730e-03 eta 0:56:44 +epoch [55/200] batch [25/31] time 0.718 (0.747) data 0.000 (0.032) loss 0.9819 (1.0998) acc 75.0000 (75.6250) lr 1.6730e-03 eta 0:56:03 +epoch [55/200] batch [30/31] time 0.708 (0.741) data 0.000 (0.027) loss 0.4954 (1.0830) acc 87.5000 (75.9375) lr 1.6730e-03 eta 0:55:30 +epoch [56/200] batch [5/31] time 0.721 (0.891) data 0.000 (0.166) loss 1.3252 (1.1895) acc 65.6250 (71.2500) lr 1.6613e-03 eta 1:06:38 +epoch [56/200] batch [10/31] time 0.721 (0.806) data 0.000 (0.083) loss 0.7676 (0.9908) acc 75.0000 (75.3125) lr 1.6613e-03 eta 1:00:15 +epoch [56/200] batch [15/31] time 0.716 (0.775) data 0.000 (0.056) loss 0.5391 (1.0216) acc 87.5000 (76.0417) lr 1.6613e-03 eta 0:57:53 +epoch [56/200] batch [20/31] time 0.707 (0.760) data 0.000 (0.042) loss 1.2891 (1.0437) acc 62.5000 (75.7812) lr 1.6613e-03 eta 0:56:39 +epoch [56/200] batch [25/31] time 0.712 (0.750) data 0.000 (0.033) loss 1.0254 (1.0630) acc 65.6250 (75.1250) lr 1.6613e-03 eta 0:55:51 +epoch [56/200] batch [30/31] time 0.852 (0.749) data 0.000 (0.028) loss 1.2197 (1.0309) acc 68.7500 (75.3125) lr 1.6613e-03 eta 0:55:42 +epoch [57/200] batch [5/31] time 0.710 (0.900) data 0.000 (0.176) loss 0.7041 (0.7594) acc 90.6250 (83.1250) lr 1.6494e-03 eta 1:06:52 +epoch [57/200] batch [10/31] time 0.711 (0.807) data 0.000 (0.088) loss 0.7534 (0.7976) acc 81.2500 (81.2500) lr 1.6494e-03 eta 0:59:54 +epoch [57/200] batch [15/31] time 0.712 (0.776) data 0.000 (0.059) loss 1.5420 (0.8455) acc 65.6250 (79.1667) lr 1.6494e-03 eta 0:57:30 +epoch [57/200] batch [20/31] time 0.711 (0.760) data 0.000 (0.044) loss 1.4033 (0.9597) acc 62.5000 (76.5625) lr 1.6494e-03 eta 0:56:15 +epoch [57/200] batch [25/31] time 0.712 (0.750) data 0.000 (0.035) loss 0.6714 (0.9473) acc 78.1250 (76.2500) lr 1.6494e-03 eta 0:55:31 +epoch [57/200] batch [30/31] time 0.711 (0.744) data 0.000 (0.030) loss 0.5039 (0.9238) acc 87.5000 (77.0833) lr 1.6494e-03 eta 0:54:57 +epoch [58/200] batch [5/31] time 0.710 (0.886) data 0.000 (0.162) loss 0.7266 (0.9266) acc 78.1250 (74.3750) lr 1.6374e-03 eta 1:05:22 +epoch [58/200] batch [10/31] time 0.714 (0.799) data 0.000 (0.081) loss 0.7090 (1.0394) acc 78.1250 (74.3750) lr 1.6374e-03 eta 0:58:54 +epoch [58/200] batch [15/31] time 0.716 (0.771) data 0.001 (0.054) loss 0.7139 (1.0514) acc 90.6250 (75.4167) lr 1.6374e-03 eta 0:56:44 +epoch [58/200] batch [20/31] time 0.720 (0.757) data 0.000 (0.041) loss 0.6196 (1.0288) acc 81.2500 (76.2500) lr 1.6374e-03 eta 0:55:41 +epoch [58/200] batch [25/31] time 0.719 (0.749) data 0.000 (0.033) loss 0.9854 (1.0444) acc 81.2500 (75.7500) lr 1.6374e-03 eta 0:54:59 +epoch [58/200] batch [30/31] time 0.712 (0.742) data 0.000 (0.027) loss 1.0713 (1.0431) acc 71.8750 (75.8333) lr 1.6374e-03 eta 0:54:28 +epoch [59/200] batch [5/31] time 0.708 (0.907) data 0.000 (0.179) loss 1.3828 (1.0278) acc 71.8750 (75.6250) lr 1.6252e-03 eta 1:06:28 +epoch [59/200] batch [10/31] time 0.709 (0.825) data 0.000 (0.090) loss 1.1240 (1.0417) acc 78.1250 (75.0000) lr 1.6252e-03 eta 1:00:23 +epoch [59/200] batch [15/31] time 0.709 (0.787) data 0.000 (0.060) loss 1.0332 (0.9418) acc 81.2500 (76.8750) lr 1.6252e-03 eta 0:57:34 +epoch [59/200] batch [20/31] time 0.711 (0.768) data 0.000 (0.045) loss 0.5459 (0.9188) acc 90.6250 (77.9688) lr 1.6252e-03 eta 0:56:05 +epoch [59/200] batch [25/31] time 0.728 (0.757) data 0.000 (0.036) loss 1.0117 (0.9641) acc 75.0000 (76.3750) lr 1.6252e-03 eta 0:55:14 +epoch [59/200] batch [30/31] time 0.709 (0.750) data 0.000 (0.030) loss 1.2822 (0.9803) acc 71.8750 (76.1458) lr 1.6252e-03 eta 0:54:39 +epoch [60/200] batch [5/31] time 0.715 (0.889) data 0.000 (0.166) loss 1.0312 (1.0624) acc 68.7500 (74.3750) lr 1.6129e-03 eta 1:04:40 +epoch [60/200] batch [10/31] time 0.714 (0.800) data 0.000 (0.083) loss 1.0234 (1.0222) acc 78.1250 (77.1875) lr 1.6129e-03 eta 0:58:08 +epoch [60/200] batch [15/31] time 0.715 (0.771) data 0.000 (0.055) loss 0.7734 (0.9901) acc 78.1250 (77.2917) lr 1.6129e-03 eta 0:55:57 +epoch [60/200] batch [20/31] time 0.718 (0.756) data 0.000 (0.042) loss 1.2744 (0.9822) acc 75.0000 (77.0312) lr 1.6129e-03 eta 0:54:49 +epoch [60/200] batch [25/31] time 0.849 (0.753) data 0.000 (0.033) loss 1.0693 (0.9440) acc 68.7500 (78.1250) lr 1.6129e-03 eta 0:54:32 +epoch [60/200] batch [30/31] time 0.716 (0.747) data 0.000 (0.028) loss 1.3838 (0.9666) acc 68.7500 (77.3958) lr 1.6129e-03 eta 0:54:00 +epoch [61/200] batch [5/31] time 0.715 (0.893) data 0.000 (0.166) loss 0.7178 (0.7832) acc 90.6250 (83.7500) lr 1.6004e-03 eta 1:04:28 +epoch [61/200] batch [10/31] time 0.710 (0.805) data 0.000 (0.083) loss 0.6035 (0.8480) acc 78.1250 (80.3125) lr 1.6004e-03 eta 0:58:06 +epoch [61/200] batch [15/31] time 0.710 (0.774) data 0.000 (0.056) loss 1.3828 (0.8871) acc 78.1250 (80.6250) lr 1.6004e-03 eta 0:55:46 +epoch [61/200] batch [20/31] time 0.720 (0.759) data 0.000 (0.042) loss 0.6963 (0.8404) acc 87.5000 (81.0938) lr 1.6004e-03 eta 0:54:40 +epoch [61/200] batch [25/31] time 0.715 (0.749) data 0.000 (0.033) loss 1.1572 (0.9154) acc 75.0000 (79.5000) lr 1.6004e-03 eta 0:53:54 +epoch [61/200] batch [30/31] time 0.709 (0.743) data 0.000 (0.028) loss 1.2549 (0.8978) acc 62.5000 (79.1667) lr 1.6004e-03 eta 0:53:23 +epoch [62/200] batch [5/31] time 0.720 (0.892) data 0.000 (0.167) loss 1.1104 (1.0601) acc 75.0000 (74.3750) lr 1.5878e-03 eta 1:03:58 +epoch [62/200] batch [10/31] time 0.714 (0.803) data 0.000 (0.084) loss 0.4602 (1.0689) acc 90.6250 (75.6250) lr 1.5878e-03 eta 0:57:30 +epoch [62/200] batch [15/31] time 0.730 (0.774) data 0.000 (0.056) loss 1.4258 (0.9628) acc 65.6250 (78.1250) lr 1.5878e-03 eta 0:55:24 +epoch [62/200] batch [20/31] time 0.710 (0.759) data 0.000 (0.042) loss 1.0850 (1.0055) acc 68.7500 (77.1875) lr 1.5878e-03 eta 0:54:15 +epoch [62/200] batch [25/31] time 0.708 (0.755) data 0.000 (0.034) loss 0.7529 (1.0256) acc 81.2500 (76.1250) lr 1.5878e-03 eta 0:53:54 +epoch [62/200] batch [30/31] time 0.710 (0.748) data 0.000 (0.028) loss 0.7607 (1.0239) acc 81.2500 (76.1458) lr 1.5878e-03 eta 0:53:19 +epoch [63/200] batch [5/31] time 0.719 (0.883) data 0.000 (0.159) loss 1.0059 (0.9597) acc 78.1250 (78.7500) lr 1.5750e-03 eta 1:02:54 +epoch [63/200] batch [10/31] time 0.717 (0.799) data 0.000 (0.080) loss 0.8374 (0.8254) acc 81.2500 (81.8750) lr 1.5750e-03 eta 0:56:49 +epoch [63/200] batch [15/31] time 0.713 (0.770) data 0.000 (0.053) loss 0.9912 (0.8359) acc 81.2500 (82.9167) lr 1.5750e-03 eta 0:54:41 +epoch [63/200] batch [20/31] time 0.708 (0.755) data 0.000 (0.040) loss 0.8721 (0.8729) acc 81.2500 (81.0938) lr 1.5750e-03 eta 0:53:32 +epoch [63/200] batch [25/31] time 0.709 (0.745) data 0.000 (0.032) loss 0.8525 (0.9247) acc 84.3750 (80.0000) lr 1.5750e-03 eta 0:52:49 +epoch [63/200] batch [30/31] time 0.718 (0.740) data 0.000 (0.027) loss 1.1064 (0.9317) acc 71.8750 (79.7917) lr 1.5750e-03 eta 0:52:22 +epoch [64/200] batch [5/31] time 0.712 (0.898) data 0.000 (0.171) loss 0.8232 (1.2529) acc 71.8750 (70.0000) lr 1.5621e-03 eta 1:03:27 +epoch [64/200] batch [10/31] time 0.723 (0.808) data 0.000 (0.086) loss 1.0518 (1.1038) acc 81.2500 (76.8750) lr 1.5621e-03 eta 0:57:02 +epoch [64/200] batch [15/31] time 0.720 (0.777) data 0.000 (0.057) loss 0.6973 (1.0217) acc 90.6250 (78.7500) lr 1.5621e-03 eta 0:54:46 +epoch [64/200] batch [20/31] time 0.708 (0.761) data 0.000 (0.043) loss 0.6304 (0.9911) acc 81.2500 (78.7500) lr 1.5621e-03 eta 0:53:34 +epoch [64/200] batch [25/31] time 0.726 (0.751) data 0.000 (0.034) loss 0.9058 (0.9596) acc 75.0000 (79.0000) lr 1.5621e-03 eta 0:52:49 +epoch [64/200] batch [30/31] time 0.708 (0.743) data 0.000 (0.029) loss 0.8174 (0.9919) acc 81.2500 (78.1250) lr 1.5621e-03 eta 0:52:15 +epoch [65/200] batch [5/31] time 0.728 (0.902) data 0.000 (0.173) loss 0.9858 (1.0256) acc 84.3750 (77.5000) lr 1.5490e-03 eta 1:03:19 +epoch [65/200] batch [10/31] time 0.721 (0.808) data 0.000 (0.086) loss 0.9629 (1.0090) acc 75.0000 (78.7500) lr 1.5490e-03 eta 0:56:37 +epoch [65/200] batch [15/31] time 0.712 (0.777) data 0.000 (0.058) loss 0.6411 (0.9461) acc 84.3750 (80.2083) lr 1.5490e-03 eta 0:54:26 +epoch [65/200] batch [20/31] time 0.706 (0.760) data 0.000 (0.043) loss 1.4492 (0.9861) acc 71.8750 (78.7500) lr 1.5490e-03 eta 0:53:10 +epoch [65/200] batch [25/31] time 0.706 (0.750) data 0.000 (0.035) loss 1.1797 (0.9816) acc 71.8750 (78.5000) lr 1.5490e-03 eta 0:52:22 +epoch [65/200] batch [30/31] time 0.861 (0.748) data 0.000 (0.029) loss 1.2197 (1.0189) acc 65.6250 (77.3958) lr 1.5490e-03 eta 0:52:12 +epoch [66/200] batch [5/31] time 0.705 (0.889) data 0.000 (0.164) loss 0.5791 (0.7976) acc 90.6250 (85.6250) lr 1.5358e-03 eta 1:01:56 +epoch [66/200] batch [10/31] time 0.727 (0.803) data 0.000 (0.082) loss 1.2861 (0.8313) acc 78.1250 (81.2500) lr 1.5358e-03 eta 0:55:52 +epoch [66/200] batch [15/31] time 0.715 (0.773) data 0.000 (0.055) loss 0.5522 (0.8999) acc 84.3750 (79.5833) lr 1.5358e-03 eta 0:53:42 +epoch [66/200] batch [20/31] time 0.711 (0.757) data 0.000 (0.041) loss 1.2100 (0.9085) acc 75.0000 (79.0625) lr 1.5358e-03 eta 0:52:31 +epoch [66/200] batch [25/31] time 0.721 (0.747) data 0.000 (0.033) loss 0.9263 (0.9140) acc 75.0000 (78.1250) lr 1.5358e-03 eta 0:51:47 +epoch [66/200] batch [30/31] time 0.710 (0.740) data 0.000 (0.028) loss 0.5288 (0.9043) acc 90.6250 (78.6458) lr 1.5358e-03 eta 0:51:16 +epoch [67/200] batch [5/31] time 0.715 (0.883) data 0.000 (0.160) loss 0.7603 (0.8209) acc 75.0000 (81.2500) lr 1.5225e-03 eta 1:01:02 +epoch [67/200] batch [10/31] time 0.712 (0.797) data 0.000 (0.080) loss 0.6484 (0.8672) acc 81.2500 (79.6875) lr 1.5225e-03 eta 0:55:02 +epoch [67/200] batch [15/31] time 0.708 (0.769) data 0.000 (0.054) loss 0.9644 (0.9350) acc 75.0000 (78.5417) lr 1.5225e-03 eta 0:53:03 +epoch [67/200] batch [20/31] time 0.712 (0.755) data 0.000 (0.040) loss 1.0049 (0.9524) acc 81.2500 (79.3750) lr 1.5225e-03 eta 0:52:00 +epoch [67/200] batch [25/31] time 0.726 (0.748) data 0.000 (0.032) loss 1.6074 (0.9457) acc 68.7500 (79.3750) lr 1.5225e-03 eta 0:51:29 +epoch [67/200] batch [30/31] time 0.714 (0.742) data 0.000 (0.027) loss 0.8101 (0.9312) acc 78.1250 (78.8542) lr 1.5225e-03 eta 0:51:01 +epoch [68/200] batch [5/31] time 0.713 (0.890) data 0.000 (0.170) loss 0.6206 (0.7244) acc 90.6250 (85.0000) lr 1.5090e-03 eta 1:01:03 +epoch [68/200] batch [10/31] time 0.710 (0.801) data 0.000 (0.085) loss 1.0908 (0.7890) acc 65.6250 (82.8125) lr 1.5090e-03 eta 0:54:54 +epoch [68/200] batch [15/31] time 0.708 (0.771) data 0.000 (0.057) loss 1.1592 (0.8509) acc 65.6250 (80.8333) lr 1.5090e-03 eta 0:52:48 +epoch [68/200] batch [20/31] time 0.705 (0.756) data 0.000 (0.043) loss 0.7456 (0.8364) acc 90.6250 (81.8750) lr 1.5090e-03 eta 0:51:43 +epoch [68/200] batch [25/31] time 0.709 (0.748) data 0.000 (0.034) loss 1.1299 (0.8798) acc 65.6250 (80.7500) lr 1.5090e-03 eta 0:51:07 +epoch [68/200] batch [30/31] time 0.710 (0.742) data 0.000 (0.029) loss 0.8804 (0.9078) acc 78.1250 (80.0000) lr 1.5090e-03 eta 0:50:35 +epoch [69/200] batch [5/31] time 0.719 (0.889) data 0.000 (0.167) loss 0.6973 (0.7882) acc 78.1250 (80.6250) lr 1.4955e-03 eta 1:00:33 +epoch [69/200] batch [10/31] time 0.722 (0.804) data 0.000 (0.084) loss 0.5811 (0.9014) acc 87.5000 (79.6875) lr 1.4955e-03 eta 0:54:40 +epoch [69/200] batch [15/31] time 0.716 (0.774) data 0.000 (0.056) loss 1.6191 (0.9311) acc 65.6250 (77.7083) lr 1.4955e-03 eta 0:52:36 +epoch [69/200] batch [20/31] time 0.705 (0.758) data 0.000 (0.042) loss 0.8428 (0.9284) acc 71.8750 (78.1250) lr 1.4955e-03 eta 0:51:26 +epoch [69/200] batch [25/31] time 0.713 (0.748) data 0.000 (0.034) loss 0.7212 (0.9232) acc 81.2500 (78.5000) lr 1.4955e-03 eta 0:50:43 +epoch [69/200] batch [30/31] time 0.708 (0.742) data 0.000 (0.028) loss 1.0479 (0.9732) acc 78.1250 (78.0208) lr 1.4955e-03 eta 0:50:12 +epoch [70/200] batch [5/31] time 0.711 (0.883) data 0.000 (0.161) loss 0.6401 (1.1542) acc 84.3750 (75.6250) lr 1.4818e-03 eta 0:59:41 +epoch [70/200] batch [10/31] time 0.710 (0.800) data 0.000 (0.081) loss 0.8779 (1.0454) acc 78.1250 (74.6875) lr 1.4818e-03 eta 0:53:59 +epoch [70/200] batch [15/31] time 0.715 (0.770) data 0.000 (0.054) loss 0.5259 (0.9753) acc 87.5000 (76.8750) lr 1.4818e-03 eta 0:51:56 +epoch [70/200] batch [20/31] time 0.714 (0.761) data 0.000 (0.040) loss 1.1035 (0.9797) acc 75.0000 (76.8750) lr 1.4818e-03 eta 0:51:17 +epoch [70/200] batch [25/31] time 0.702 (0.750) data 0.000 (0.032) loss 0.4739 (0.9609) acc 84.3750 (77.1250) lr 1.4818e-03 eta 0:50:28 +epoch [70/200] batch [30/31] time 0.706 (0.744) data 0.000 (0.027) loss 0.4932 (0.9514) acc 87.5000 (77.2917) lr 1.4818e-03 eta 0:49:59 +epoch [71/200] batch [5/31] time 0.708 (0.879) data 0.000 (0.156) loss 0.9189 (0.8467) acc 78.1250 (79.3750) lr 1.4679e-03 eta 0:58:59 +epoch [71/200] batch [10/31] time 0.715 (0.796) data 0.000 (0.078) loss 0.8745 (0.8266) acc 81.2500 (81.8750) lr 1.4679e-03 eta 0:53:19 +epoch [71/200] batch [15/31] time 0.709 (0.767) data 0.000 (0.052) loss 0.7720 (0.7848) acc 75.0000 (81.6667) lr 1.4679e-03 eta 0:51:21 +epoch [71/200] batch [20/31] time 0.710 (0.753) data 0.000 (0.039) loss 0.5811 (0.7727) acc 87.5000 (81.8750) lr 1.4679e-03 eta 0:50:19 +epoch [71/200] batch [25/31] time 0.706 (0.745) data 0.000 (0.032) loss 1.1074 (0.8282) acc 65.6250 (80.1250) lr 1.4679e-03 eta 0:49:42 +epoch [71/200] batch [30/31] time 0.723 (0.739) data 0.000 (0.026) loss 1.0820 (0.8457) acc 75.0000 (79.6875) lr 1.4679e-03 eta 0:49:16 +epoch [72/200] batch [5/31] time 0.703 (0.889) data 0.001 (0.166) loss 0.4941 (0.7712) acc 87.5000 (85.0000) lr 1.4540e-03 eta 0:59:09 +epoch [72/200] batch [10/31] time 0.724 (0.817) data 0.000 (0.083) loss 0.8223 (0.8019) acc 78.1250 (81.5625) lr 1.4540e-03 eta 0:54:19 +epoch [72/200] batch [15/31] time 0.710 (0.781) data 0.000 (0.056) loss 1.0547 (0.8304) acc 68.7500 (80.4167) lr 1.4540e-03 eta 0:51:51 +epoch [72/200] batch [20/31] time 0.708 (0.764) data 0.000 (0.042) loss 1.0850 (0.9077) acc 81.2500 (78.7500) lr 1.4540e-03 eta 0:50:40 +epoch [72/200] batch [25/31] time 0.707 (0.753) data 0.000 (0.033) loss 0.5273 (0.8805) acc 90.6250 (79.2500) lr 1.4540e-03 eta 0:49:51 +epoch [72/200] batch [30/31] time 0.705 (0.745) data 0.000 (0.028) loss 0.8213 (0.9376) acc 78.1250 (78.3333) lr 1.4540e-03 eta 0:49:18 +epoch [73/200] batch [5/31] time 0.710 (0.897) data 0.000 (0.176) loss 0.5063 (0.8502) acc 81.2500 (81.8750) lr 1.4399e-03 eta 0:59:15 +epoch [73/200] batch [10/31] time 0.706 (0.804) data 0.000 (0.088) loss 0.3916 (0.8189) acc 87.5000 (81.8750) lr 1.4399e-03 eta 0:53:02 +epoch [73/200] batch [15/31] time 0.710 (0.773) data 0.000 (0.059) loss 0.9917 (0.8601) acc 65.6250 (79.5833) lr 1.4399e-03 eta 0:50:55 +epoch [73/200] batch [20/31] time 0.708 (0.756) data 0.000 (0.044) loss 1.2168 (0.8396) acc 75.0000 (80.3125) lr 1.4399e-03 eta 0:49:46 +epoch [73/200] batch [25/31] time 0.707 (0.747) data 0.000 (0.035) loss 1.0107 (0.8575) acc 78.1250 (80.3750) lr 1.4399e-03 eta 0:49:04 +epoch [73/200] batch [30/31] time 0.706 (0.740) data 0.000 (0.030) loss 1.0605 (0.9146) acc 78.1250 (79.2708) lr 1.4399e-03 eta 0:48:35 +epoch [74/200] batch [5/31] time 0.728 (0.893) data 0.000 (0.168) loss 0.5923 (0.6671) acc 84.3750 (86.2500) lr 1.4258e-03 eta 0:58:30 +epoch [74/200] batch [10/31] time 0.710 (0.803) data 0.000 (0.084) loss 0.8691 (0.7609) acc 78.1250 (84.0625) lr 1.4258e-03 eta 0:52:34 +epoch [74/200] batch [15/31] time 0.715 (0.773) data 0.000 (0.056) loss 1.3584 (0.8624) acc 75.0000 (81.4583) lr 1.4258e-03 eta 0:50:31 +epoch [74/200] batch [20/31] time 0.722 (0.765) data 0.000 (0.042) loss 1.6230 (0.8567) acc 65.6250 (80.9375) lr 1.4258e-03 eta 0:49:57 +epoch [74/200] batch [25/31] time 0.708 (0.754) data 0.000 (0.034) loss 0.6802 (0.8528) acc 84.3750 (80.5000) lr 1.4258e-03 eta 0:49:09 +epoch [74/200] batch [30/31] time 0.707 (0.746) data 0.000 (0.028) loss 0.8398 (0.8445) acc 81.2500 (80.3125) lr 1.4258e-03 eta 0:48:35 +epoch [75/200] batch [5/31] time 0.706 (0.883) data 0.000 (0.160) loss 1.1055 (0.7630) acc 78.1250 (78.1250) lr 1.4115e-03 eta 0:57:23 +epoch [75/200] batch [10/31] time 0.711 (0.797) data 0.000 (0.080) loss 0.8833 (1.0153) acc 78.1250 (77.8125) lr 1.4115e-03 eta 0:51:45 +epoch [75/200] batch [15/31] time 0.713 (0.771) data 0.000 (0.054) loss 0.5679 (0.9113) acc 81.2500 (79.3750) lr 1.4115e-03 eta 0:49:58 +epoch [75/200] batch [20/31] time 0.707 (0.755) data 0.000 (0.040) loss 0.5317 (0.9466) acc 93.7500 (79.5312) lr 1.4115e-03 eta 0:48:53 +epoch [75/200] batch [25/31] time 0.707 (0.745) data 0.000 (0.032) loss 0.8008 (0.9095) acc 87.5000 (80.0000) lr 1.4115e-03 eta 0:48:13 +epoch [75/200] batch [30/31] time 0.706 (0.740) data 0.000 (0.027) loss 1.1689 (0.9016) acc 68.7500 (79.2708) lr 1.4115e-03 eta 0:47:47 +epoch [76/200] batch [5/31] time 0.706 (0.887) data 0.000 (0.165) loss 0.7949 (0.8105) acc 81.2500 (80.0000) lr 1.3971e-03 eta 0:57:11 +epoch [76/200] batch [10/31] time 0.719 (0.799) data 0.000 (0.083) loss 0.6680 (0.8132) acc 90.6250 (80.3125) lr 1.3971e-03 eta 0:51:26 +epoch [76/200] batch [15/31] time 0.713 (0.770) data 0.000 (0.055) loss 0.5493 (0.8544) acc 87.5000 (78.5417) lr 1.3971e-03 eta 0:49:30 +epoch [76/200] batch [20/31] time 0.703 (0.754) data 0.000 (0.041) loss 0.7290 (0.8453) acc 84.3750 (78.7500) lr 1.3971e-03 eta 0:48:27 +epoch [76/200] batch [25/31] time 0.705 (0.745) data 0.000 (0.033) loss 1.5742 (0.9375) acc 71.8750 (77.7500) lr 1.3971e-03 eta 0:47:49 +epoch [76/200] batch [30/31] time 0.708 (0.740) data 0.000 (0.028) loss 0.7588 (0.9327) acc 75.0000 (78.0208) lr 1.3971e-03 eta 0:47:24 +epoch [77/200] batch [5/31] time 0.710 (0.875) data 0.001 (0.155) loss 0.7109 (0.5509) acc 81.2500 (88.1250) lr 1.3827e-03 eta 0:56:00 +epoch [77/200] batch [10/31] time 0.708 (0.793) data 0.000 (0.077) loss 1.1865 (0.7139) acc 81.2500 (83.4375) lr 1.3827e-03 eta 0:50:41 +epoch [77/200] batch [15/31] time 0.707 (0.767) data 0.000 (0.052) loss 1.0225 (0.7619) acc 81.2500 (82.0833) lr 1.3827e-03 eta 0:48:57 +epoch [77/200] batch [20/31] time 0.708 (0.752) data 0.000 (0.039) loss 0.5645 (0.8112) acc 87.5000 (82.1875) lr 1.3827e-03 eta 0:47:56 +epoch [77/200] batch [25/31] time 0.705 (0.743) data 0.000 (0.031) loss 1.2939 (0.8216) acc 75.0000 (81.7500) lr 1.3827e-03 eta 0:47:19 +epoch [77/200] batch [30/31] time 0.720 (0.738) data 0.000 (0.026) loss 0.4624 (0.8363) acc 90.6250 (80.9375) lr 1.3827e-03 eta 0:46:54 +epoch [78/200] batch [5/31] time 0.711 (0.885) data 0.000 (0.164) loss 0.6284 (0.7621) acc 84.3750 (80.0000) lr 1.3681e-03 eta 0:56:08 +epoch [78/200] batch [10/31] time 0.713 (0.799) data 0.000 (0.082) loss 0.7329 (0.8227) acc 81.2500 (79.6875) lr 1.3681e-03 eta 0:50:38 +epoch [78/200] batch [15/31] time 0.707 (0.769) data 0.000 (0.055) loss 0.7578 (0.8381) acc 81.2500 (79.7917) lr 1.3681e-03 eta 0:48:39 +epoch [78/200] batch [20/31] time 0.707 (0.753) data 0.000 (0.041) loss 0.8188 (0.8820) acc 81.2500 (79.5312) lr 1.3681e-03 eta 0:47:37 +epoch [78/200] batch [25/31] time 0.707 (0.745) data 0.000 (0.033) loss 0.9692 (0.9130) acc 84.3750 (79.7500) lr 1.3681e-03 eta 0:47:01 +epoch [78/200] batch [30/31] time 0.707 (0.739) data 0.000 (0.028) loss 0.6133 (0.8738) acc 90.6250 (80.7292) lr 1.3681e-03 eta 0:46:36 +epoch [79/200] batch [5/31] time 0.709 (0.891) data 0.000 (0.167) loss 0.4802 (0.8384) acc 84.3750 (76.2500) lr 1.3535e-03 eta 0:56:06 +epoch [79/200] batch [10/31] time 0.712 (0.802) data 0.000 (0.084) loss 0.7974 (0.8196) acc 87.5000 (78.1250) lr 1.3535e-03 eta 0:50:25 +epoch [79/200] batch [15/31] time 0.706 (0.772) data 0.000 (0.056) loss 0.9824 (0.8865) acc 81.2500 (78.3333) lr 1.3535e-03 eta 0:48:27 +epoch [79/200] batch [20/31] time 0.710 (0.757) data 0.000 (0.042) loss 1.0068 (0.8809) acc 75.0000 (79.0625) lr 1.3535e-03 eta 0:47:27 +epoch [79/200] batch [25/31] time 0.719 (0.748) data 0.000 (0.034) loss 0.6846 (0.8885) acc 87.5000 (79.8750) lr 1.3535e-03 eta 0:46:49 +epoch [79/200] batch [30/31] time 0.707 (0.741) data 0.000 (0.028) loss 0.8545 (0.8712) acc 81.2500 (80.1042) lr 1.3535e-03 eta 0:46:21 +epoch [80/200] batch [5/31] time 0.709 (0.880) data 0.000 (0.156) loss 1.2979 (1.0133) acc 75.0000 (78.7500) lr 1.3387e-03 eta 0:54:56 +epoch [80/200] batch [10/31] time 0.710 (0.795) data 0.000 (0.078) loss 0.6499 (0.9366) acc 84.3750 (79.6875) lr 1.3387e-03 eta 0:49:34 +epoch [80/200] batch [15/31] time 0.709 (0.775) data 0.000 (0.052) loss 0.4370 (0.8756) acc 87.5000 (81.4583) lr 1.3387e-03 eta 0:48:17 +epoch [80/200] batch [20/31] time 0.713 (0.759) data 0.000 (0.039) loss 0.8081 (0.8782) acc 81.2500 (81.4062) lr 1.3387e-03 eta 0:47:10 +epoch [80/200] batch [25/31] time 0.705 (0.748) data 0.000 (0.031) loss 0.7842 (0.8666) acc 81.2500 (80.6250) lr 1.3387e-03 eta 0:46:28 +epoch [80/200] batch [30/31] time 0.714 (0.742) data 0.000 (0.026) loss 0.5264 (0.8731) acc 87.5000 (80.3125) lr 1.3387e-03 eta 0:46:02 +epoch [81/200] batch [5/31] time 0.712 (0.885) data 0.001 (0.159) loss 1.1543 (0.7513) acc 71.8750 (81.2500) lr 1.3239e-03 eta 0:54:47 +epoch [81/200] batch [10/31] time 0.719 (0.798) data 0.000 (0.080) loss 0.4946 (0.7209) acc 90.6250 (83.7500) lr 1.3239e-03 eta 0:49:21 +epoch [81/200] batch [15/31] time 0.706 (0.767) data 0.000 (0.053) loss 0.5767 (0.7378) acc 90.6250 (83.1250) lr 1.3239e-03 eta 0:47:23 +epoch [81/200] batch [20/31] time 0.727 (0.753) data 0.000 (0.040) loss 0.3379 (0.7729) acc 100.0000 (81.7188) lr 1.3239e-03 eta 0:46:24 +epoch [81/200] batch [25/31] time 0.707 (0.744) data 0.000 (0.032) loss 0.6597 (0.8046) acc 84.3750 (81.6250) lr 1.3239e-03 eta 0:45:49 +epoch [81/200] batch [30/31] time 0.713 (0.738) data 0.000 (0.027) loss 0.6543 (0.7959) acc 90.6250 (81.7708) lr 1.3239e-03 eta 0:45:23 +epoch [82/200] batch [5/31] time 0.708 (0.926) data 0.000 (0.170) loss 0.7847 (0.6763) acc 81.2500 (86.2500) lr 1.3090e-03 eta 0:56:52 +epoch [82/200] batch [10/31] time 0.720 (0.821) data 0.000 (0.085) loss 0.4282 (0.7077) acc 87.5000 (84.3750) lr 1.3090e-03 eta 0:50:20 +epoch [82/200] batch [15/31] time 0.712 (0.786) data 0.000 (0.057) loss 0.6157 (0.7077) acc 90.6250 (84.3750) lr 1.3090e-03 eta 0:48:06 +epoch [82/200] batch [20/31] time 0.707 (0.766) data 0.000 (0.043) loss 1.2080 (0.7701) acc 62.5000 (82.5000) lr 1.3090e-03 eta 0:46:50 +epoch [82/200] batch [25/31] time 0.712 (0.755) data 0.000 (0.034) loss 1.0830 (0.8105) acc 75.0000 (81.7500) lr 1.3090e-03 eta 0:46:05 +epoch [82/200] batch [30/31] time 0.719 (0.748) data 0.000 (0.029) loss 1.0811 (0.8283) acc 68.7500 (81.2500) lr 1.3090e-03 eta 0:45:38 +epoch [83/200] batch [5/31] time 0.723 (0.895) data 0.001 (0.169) loss 0.9951 (0.7367) acc 75.0000 (83.1250) lr 1.2940e-03 eta 0:54:29 +epoch [83/200] batch [10/31] time 0.722 (0.808) data 0.000 (0.085) loss 0.8140 (0.8472) acc 78.1250 (80.6250) lr 1.2940e-03 eta 0:49:07 +epoch [83/200] batch [15/31] time 0.714 (0.777) data 0.000 (0.057) loss 0.5376 (0.8013) acc 87.5000 (81.4583) lr 1.2940e-03 eta 0:47:12 +epoch [83/200] batch [20/31] time 0.706 (0.762) data 0.000 (0.043) loss 1.0771 (0.8867) acc 78.1250 (80.0000) lr 1.2940e-03 eta 0:46:10 +epoch [83/200] batch [25/31] time 0.707 (0.752) data 0.000 (0.034) loss 0.7944 (0.8869) acc 87.5000 (80.6250) lr 1.2940e-03 eta 0:45:30 +epoch [83/200] batch [30/31] time 0.707 (0.745) data 0.000 (0.029) loss 0.6377 (0.8476) acc 84.3750 (81.4583) lr 1.2940e-03 eta 0:45:01 +epoch [84/200] batch [5/31] time 0.902 (0.921) data 0.000 (0.164) loss 1.4287 (0.9998) acc 75.0000 (79.3750) lr 1.2790e-03 eta 0:55:34 +epoch [84/200] batch [10/31] time 0.716 (0.817) data 0.000 (0.082) loss 1.1738 (0.8972) acc 68.7500 (80.6250) lr 1.2790e-03 eta 0:49:14 +epoch [84/200] batch [15/31] time 0.702 (0.781) data 0.000 (0.055) loss 0.8184 (0.8897) acc 78.1250 (78.9583) lr 1.2790e-03 eta 0:47:01 +epoch [84/200] batch [20/31] time 0.711 (0.763) data 0.000 (0.041) loss 0.6602 (0.9229) acc 87.5000 (79.0625) lr 1.2790e-03 eta 0:45:52 +epoch [84/200] batch [25/31] time 0.710 (0.752) data 0.000 (0.033) loss 0.9971 (0.9379) acc 75.0000 (78.8750) lr 1.2790e-03 eta 0:45:08 +epoch [84/200] batch [30/31] time 0.713 (0.745) data 0.000 (0.028) loss 0.3396 (0.9575) acc 93.7500 (78.5417) lr 1.2790e-03 eta 0:44:39 +epoch [85/200] batch [5/31] time 0.719 (0.886) data 0.001 (0.160) loss 0.8276 (0.8039) acc 87.5000 (83.1250) lr 1.2639e-03 eta 0:53:02 +epoch [85/200] batch [10/31] time 0.712 (0.801) data 0.000 (0.080) loss 0.4414 (0.7759) acc 90.6250 (82.5000) lr 1.2639e-03 eta 0:47:51 +epoch [85/200] batch [15/31] time 0.716 (0.771) data 0.000 (0.053) loss 0.7002 (0.8440) acc 84.3750 (81.6667) lr 1.2639e-03 eta 0:46:01 +epoch [85/200] batch [20/31] time 0.714 (0.756) data 0.000 (0.040) loss 0.8525 (0.8449) acc 81.2500 (82.1875) lr 1.2639e-03 eta 0:45:03 +epoch [85/200] batch [25/31] time 0.709 (0.747) data 0.000 (0.032) loss 1.1328 (0.8323) acc 78.1250 (82.6250) lr 1.2639e-03 eta 0:44:28 +epoch [85/200] batch [30/31] time 0.708 (0.741) data 0.000 (0.027) loss 0.6670 (0.8265) acc 87.5000 (82.5000) lr 1.2639e-03 eta 0:44:02 +epoch [86/200] batch [5/31] time 0.707 (0.894) data 0.000 (0.169) loss 0.9590 (0.8114) acc 87.5000 (83.1250) lr 1.2487e-03 eta 0:53:03 +epoch [86/200] batch [10/31] time 0.719 (0.803) data 0.000 (0.085) loss 1.0713 (0.8371) acc 75.0000 (80.9375) lr 1.2487e-03 eta 0:47:35 +epoch [86/200] batch [15/31] time 0.712 (0.772) data 0.000 (0.057) loss 1.0029 (0.8273) acc 75.0000 (80.2083) lr 1.2487e-03 eta 0:45:41 +epoch [86/200] batch [20/31] time 0.706 (0.757) data 0.000 (0.043) loss 0.6475 (0.7872) acc 81.2500 (81.0938) lr 1.2487e-03 eta 0:44:41 +epoch [86/200] batch [25/31] time 0.707 (0.748) data 0.000 (0.034) loss 1.1006 (0.8451) acc 71.8750 (80.1250) lr 1.2487e-03 eta 0:44:08 +epoch [86/200] batch [30/31] time 0.731 (0.743) data 0.000 (0.028) loss 1.1318 (0.8924) acc 75.0000 (79.3750) lr 1.2487e-03 eta 0:43:46 +epoch [87/200] batch [5/31] time 0.706 (0.874) data 0.000 (0.158) loss 0.5684 (0.7517) acc 87.5000 (81.8750) lr 1.2334e-03 eta 0:51:24 +epoch [87/200] batch [10/31] time 0.711 (0.792) data 0.000 (0.079) loss 0.9614 (0.8340) acc 84.3750 (80.6250) lr 1.2334e-03 eta 0:46:30 +epoch [87/200] batch [15/31] time 0.707 (0.764) data 0.000 (0.053) loss 0.5044 (0.7661) acc 90.6250 (81.8750) lr 1.2334e-03 eta 0:44:49 +epoch [87/200] batch [20/31] time 0.703 (0.750) data 0.000 (0.040) loss 1.1260 (0.8178) acc 78.1250 (81.4062) lr 1.2334e-03 eta 0:43:56 +epoch [87/200] batch [25/31] time 0.719 (0.742) data 0.000 (0.032) loss 1.2754 (0.8326) acc 75.0000 (80.7500) lr 1.2334e-03 eta 0:43:23 +epoch [87/200] batch [30/31] time 0.701 (0.736) data 0.000 (0.027) loss 0.9658 (0.8508) acc 84.3750 (80.5208) lr 1.2334e-03 eta 0:42:57 +epoch [88/200] batch [5/31] time 0.712 (0.891) data 0.000 (0.166) loss 0.5103 (0.5886) acc 90.6250 (88.1250) lr 1.2181e-03 eta 0:51:57 +epoch [88/200] batch [10/31] time 0.707 (0.801) data 0.000 (0.083) loss 0.5103 (0.5898) acc 87.5000 (87.5000) lr 1.2181e-03 eta 0:46:36 +epoch [88/200] batch [15/31] time 0.723 (0.771) data 0.000 (0.056) loss 0.7539 (0.6264) acc 71.8750 (85.8333) lr 1.2181e-03 eta 0:44:48 +epoch [88/200] batch [20/31] time 0.727 (0.757) data 0.000 (0.042) loss 0.5410 (0.7120) acc 84.3750 (84.3750) lr 1.2181e-03 eta 0:43:56 +epoch [88/200] batch [25/31] time 0.709 (0.747) data 0.000 (0.033) loss 1.0312 (0.7183) acc 71.8750 (83.7500) lr 1.2181e-03 eta 0:43:19 +epoch [88/200] batch [30/31] time 0.706 (0.742) data 0.000 (0.028) loss 0.8125 (0.7401) acc 87.5000 (83.6458) lr 1.2181e-03 eta 0:42:55 +epoch [89/200] batch [5/31] time 0.705 (0.880) data 0.000 (0.164) loss 0.6797 (0.6598) acc 87.5000 (84.3750) lr 1.2028e-03 eta 0:50:51 +epoch [89/200] batch [10/31] time 0.712 (0.795) data 0.001 (0.082) loss 0.7632 (0.7780) acc 78.1250 (82.1875) lr 1.2028e-03 eta 0:45:53 +epoch [89/200] batch [15/31] time 0.705 (0.766) data 0.000 (0.055) loss 1.0059 (0.7219) acc 84.3750 (83.9583) lr 1.2028e-03 eta 0:44:09 +epoch [89/200] batch [20/31] time 0.726 (0.753) data 0.000 (0.041) loss 0.8633 (0.7620) acc 78.1250 (82.9688) lr 1.2028e-03 eta 0:43:18 +epoch [89/200] batch [25/31] time 0.714 (0.745) data 0.000 (0.033) loss 1.0469 (0.7931) acc 81.2500 (82.6250) lr 1.2028e-03 eta 0:42:46 +epoch [89/200] batch [30/31] time 0.715 (0.739) data 0.000 (0.028) loss 0.7866 (0.8230) acc 81.2500 (81.6667) lr 1.2028e-03 eta 0:42:24 +epoch [90/200] batch [5/31] time 0.724 (0.886) data 0.000 (0.165) loss 0.5620 (0.9082) acc 81.2500 (76.8750) lr 1.1874e-03 eta 0:50:44 +epoch [90/200] batch [10/31] time 0.714 (0.811) data 0.000 (0.083) loss 0.6270 (0.8752) acc 87.5000 (79.6875) lr 1.1874e-03 eta 0:46:22 +epoch [90/200] batch [15/31] time 0.713 (0.778) data 0.000 (0.055) loss 0.9473 (0.8470) acc 81.2500 (79.5833) lr 1.1874e-03 eta 0:44:26 +epoch [90/200] batch [20/31] time 0.709 (0.761) data 0.000 (0.041) loss 0.6992 (0.8094) acc 81.2500 (80.6250) lr 1.1874e-03 eta 0:43:24 +epoch [90/200] batch [25/31] time 0.713 (0.751) data 0.000 (0.033) loss 0.5083 (0.8268) acc 81.2500 (81.0000) lr 1.1874e-03 eta 0:42:45 +epoch [90/200] batch [30/31] time 0.708 (0.744) data 0.000 (0.028) loss 1.1484 (0.8179) acc 81.2500 (81.2500) lr 1.1874e-03 eta 0:42:17 +epoch [91/200] batch [5/31] time 0.711 (0.879) data 0.000 (0.155) loss 1.1641 (0.9163) acc 65.6250 (78.1250) lr 1.1719e-03 eta 0:49:51 +epoch [91/200] batch [10/31] time 0.711 (0.796) data 0.000 (0.078) loss 0.7734 (0.8656) acc 75.0000 (79.0625) lr 1.1719e-03 eta 0:45:05 +epoch [91/200] batch [15/31] time 0.716 (0.769) data 0.000 (0.052) loss 0.8169 (0.7919) acc 84.3750 (81.6667) lr 1.1719e-03 eta 0:43:31 +epoch [91/200] batch [20/31] time 0.707 (0.755) data 0.000 (0.039) loss 0.9937 (0.8031) acc 81.2500 (82.1875) lr 1.1719e-03 eta 0:42:39 +epoch [91/200] batch [25/31] time 0.712 (0.746) data 0.000 (0.031) loss 0.7134 (0.7911) acc 78.1250 (82.1250) lr 1.1719e-03 eta 0:42:05 +epoch [91/200] batch [30/31] time 0.709 (0.745) data 0.000 (0.026) loss 1.7715 (0.8309) acc 59.3750 (80.9375) lr 1.1719e-03 eta 0:41:58 +epoch [92/200] batch [5/31] time 0.721 (0.899) data 0.000 (0.172) loss 1.3262 (0.8471) acc 75.0000 (81.8750) lr 1.1564e-03 eta 0:50:32 +epoch [92/200] batch [10/31] time 0.730 (0.807) data 0.000 (0.086) loss 0.9780 (0.8137) acc 78.1250 (82.1875) lr 1.1564e-03 eta 0:45:18 +epoch [92/200] batch [15/31] time 0.706 (0.777) data 0.000 (0.058) loss 0.9062 (0.7156) acc 84.3750 (85.0000) lr 1.1564e-03 eta 0:43:33 +epoch [92/200] batch [20/31] time 0.707 (0.761) data 0.000 (0.043) loss 0.9106 (0.7775) acc 78.1250 (83.4375) lr 1.1564e-03 eta 0:42:35 +epoch [92/200] batch [25/31] time 0.709 (0.751) data 0.000 (0.035) loss 1.3193 (0.7832) acc 81.2500 (83.2500) lr 1.1564e-03 eta 0:41:58 +epoch [92/200] batch [30/31] time 0.709 (0.744) data 0.000 (0.029) loss 1.1758 (0.7947) acc 68.7500 (82.7083) lr 1.1564e-03 eta 0:41:32 +epoch [93/200] batch [5/31] time 0.708 (0.893) data 0.000 (0.168) loss 0.6377 (0.8100) acc 87.5000 (80.0000) lr 1.1409e-03 eta 0:49:44 +epoch [93/200] batch [10/31] time 0.729 (0.803) data 0.000 (0.084) loss 0.9795 (0.8256) acc 75.0000 (80.9375) lr 1.1409e-03 eta 0:44:41 +epoch [93/200] batch [15/31] time 0.710 (0.772) data 0.000 (0.056) loss 0.6919 (0.8050) acc 81.2500 (81.2500) lr 1.1409e-03 eta 0:42:53 +epoch [93/200] batch [20/31] time 0.712 (0.756) data 0.000 (0.042) loss 0.9995 (0.8288) acc 71.8750 (80.9375) lr 1.1409e-03 eta 0:41:54 +epoch [93/200] batch [25/31] time 0.704 (0.746) data 0.000 (0.034) loss 0.9961 (0.8159) acc 78.1250 (81.1250) lr 1.1409e-03 eta 0:41:17 +epoch [93/200] batch [30/31] time 0.701 (0.744) data 0.000 (0.028) loss 0.5293 (0.7956) acc 81.2500 (81.1458) lr 1.1409e-03 eta 0:41:07 +epoch [94/200] batch [5/31] time 0.712 (0.880) data 0.000 (0.155) loss 0.3064 (0.8126) acc 96.8750 (82.5000) lr 1.1253e-03 eta 0:48:32 +epoch [94/200] batch [10/31] time 0.713 (0.796) data 0.000 (0.078) loss 0.4961 (0.6713) acc 90.6250 (85.3125) lr 1.1253e-03 eta 0:43:53 +epoch [94/200] batch [15/31] time 0.710 (0.768) data 0.000 (0.052) loss 0.6099 (0.6668) acc 84.3750 (84.7917) lr 1.1253e-03 eta 0:42:14 +epoch [94/200] batch [20/31] time 0.707 (0.753) data 0.000 (0.039) loss 0.4404 (0.6496) acc 93.7500 (85.4688) lr 1.1253e-03 eta 0:41:22 +epoch [94/200] batch [25/31] time 0.709 (0.744) data 0.000 (0.031) loss 0.5220 (0.6529) acc 84.3750 (84.8750) lr 1.1253e-03 eta 0:40:49 +epoch [94/200] batch [30/31] time 0.706 (0.738) data 0.000 (0.026) loss 0.9424 (0.7055) acc 81.2500 (83.6458) lr 1.1253e-03 eta 0:40:25 +epoch [95/200] batch [5/31] time 0.715 (0.898) data 0.000 (0.178) loss 0.9258 (0.8713) acc 81.2500 (78.7500) lr 1.1097e-03 eta 0:49:07 +epoch [95/200] batch [10/31] time 0.725 (0.809) data 0.000 (0.089) loss 0.7886 (0.7984) acc 84.3750 (81.2500) lr 1.1097e-03 eta 0:44:11 +epoch [95/200] batch [15/31] time 0.711 (0.777) data 0.000 (0.060) loss 0.5903 (0.8172) acc 90.6250 (81.8750) lr 1.1097e-03 eta 0:42:21 +epoch [95/200] batch [20/31] time 0.708 (0.760) data 0.000 (0.045) loss 0.9209 (0.8198) acc 81.2500 (82.3438) lr 1.1097e-03 eta 0:41:23 +epoch [95/200] batch [25/31] time 0.714 (0.750) data 0.000 (0.036) loss 0.3567 (0.8107) acc 90.6250 (82.3750) lr 1.1097e-03 eta 0:40:45 +epoch [95/200] batch [30/31] time 0.706 (0.743) data 0.000 (0.030) loss 0.8428 (0.7802) acc 81.2500 (82.6042) lr 1.1097e-03 eta 0:40:20 +epoch [96/200] batch [5/31] time 0.706 (0.884) data 0.000 (0.161) loss 1.5703 (0.9208) acc 75.0000 (80.0000) lr 1.0941e-03 eta 0:47:54 +epoch [96/200] batch [10/31] time 0.711 (0.799) data 0.000 (0.081) loss 0.7563 (0.7917) acc 84.3750 (83.1250) lr 1.0941e-03 eta 0:43:13 +epoch [96/200] batch [15/31] time 0.708 (0.772) data 0.000 (0.054) loss 0.7944 (0.8032) acc 75.0000 (82.5000) lr 1.0941e-03 eta 0:41:40 +epoch [96/200] batch [20/31] time 0.719 (0.757) data 0.000 (0.041) loss 0.6924 (0.8642) acc 87.5000 (82.5000) lr 1.0941e-03 eta 0:40:48 +epoch [96/200] batch [25/31] time 0.717 (0.748) data 0.000 (0.032) loss 0.6436 (0.8639) acc 84.3750 (83.0000) lr 1.0941e-03 eta 0:40:15 +epoch [96/200] batch [30/31] time 0.707 (0.742) data 0.000 (0.027) loss 0.6958 (0.8383) acc 78.1250 (82.7083) lr 1.0941e-03 eta 0:39:52 +epoch [97/200] batch [5/31] time 0.711 (0.896) data 0.000 (0.174) loss 1.1719 (0.8977) acc 84.3750 (85.6250) lr 1.0785e-03 eta 0:48:05 +epoch [97/200] batch [10/31] time 0.710 (0.803) data 0.000 (0.087) loss 0.6743 (0.8142) acc 87.5000 (85.9375) lr 1.0785e-03 eta 0:43:01 +epoch [97/200] batch [15/31] time 0.710 (0.783) data 0.000 (0.058) loss 0.5151 (0.7905) acc 90.6250 (85.0000) lr 1.0785e-03 eta 0:41:54 +epoch [97/200] batch [20/31] time 0.723 (0.765) data 0.000 (0.044) loss 0.8535 (0.8069) acc 81.2500 (83.7500) lr 1.0785e-03 eta 0:40:50 +epoch [97/200] batch [25/31] time 0.709 (0.754) data 0.000 (0.035) loss 0.8750 (0.8143) acc 71.8750 (82.8750) lr 1.0785e-03 eta 0:40:10 +epoch [97/200] batch [30/31] time 0.721 (0.746) data 0.000 (0.029) loss 1.3467 (0.8290) acc 75.0000 (82.3958) lr 1.0785e-03 eta 0:39:43 +epoch [98/200] batch [5/31] time 0.709 (0.882) data 0.000 (0.159) loss 0.6279 (0.5445) acc 84.3750 (86.8750) lr 1.0628e-03 eta 0:46:51 +epoch [98/200] batch [10/31] time 0.721 (0.799) data 0.000 (0.080) loss 0.5142 (0.6745) acc 87.5000 (83.1250) lr 1.0628e-03 eta 0:42:22 +epoch [98/200] batch [15/31] time 0.705 (0.769) data 0.000 (0.053) loss 1.3018 (0.7201) acc 65.6250 (81.4583) lr 1.0628e-03 eta 0:40:45 +epoch [98/200] batch [20/31] time 0.709 (0.755) data 0.000 (0.040) loss 0.8970 (0.7736) acc 81.2500 (80.7812) lr 1.0628e-03 eta 0:39:54 +epoch [98/200] batch [25/31] time 0.717 (0.746) data 0.000 (0.032) loss 0.7314 (0.7609) acc 84.3750 (81.2500) lr 1.0628e-03 eta 0:39:23 +epoch [98/200] batch [30/31] time 0.705 (0.739) data 0.000 (0.027) loss 0.8555 (0.7518) acc 84.3750 (81.6667) lr 1.0628e-03 eta 0:38:58 +epoch [99/200] batch [5/31] time 0.714 (0.890) data 0.000 (0.170) loss 1.0938 (0.7581) acc 81.2500 (85.0000) lr 1.0471e-03 eta 0:46:50 +epoch [99/200] batch [10/31] time 0.713 (0.802) data 0.000 (0.085) loss 0.8423 (0.7312) acc 81.2500 (84.0625) lr 1.0471e-03 eta 0:42:09 +epoch [99/200] batch [15/31] time 0.714 (0.773) data 0.001 (0.057) loss 0.6484 (0.7358) acc 84.3750 (83.3333) lr 1.0471e-03 eta 0:40:31 +epoch [99/200] batch [20/31] time 0.711 (0.757) data 0.000 (0.043) loss 0.7935 (0.7290) acc 84.3750 (84.5312) lr 1.0471e-03 eta 0:39:38 +epoch [99/200] batch [25/31] time 0.711 (0.748) data 0.000 (0.034) loss 1.0098 (0.7517) acc 84.3750 (84.1250) lr 1.0471e-03 eta 0:39:05 +epoch [99/200] batch [30/31] time 0.717 (0.742) data 0.000 (0.029) loss 1.0439 (0.7496) acc 81.2500 (84.0625) lr 1.0471e-03 eta 0:38:44 +epoch [100/200] batch [5/31] time 0.712 (0.920) data 0.000 (0.174) loss 0.8164 (0.8050) acc 90.6250 (84.3750) lr 1.0314e-03 eta 0:47:54 +epoch [100/200] batch [10/31] time 0.715 (0.818) data 0.000 (0.087) loss 1.2178 (0.8594) acc 75.0000 (81.8750) lr 1.0314e-03 eta 0:42:33 +epoch [100/200] batch [15/31] time 0.710 (0.784) data 0.000 (0.058) loss 0.9863 (0.8124) acc 71.8750 (81.8750) lr 1.0314e-03 eta 0:40:41 +epoch [100/200] batch [20/31] time 0.711 (0.766) data 0.000 (0.044) loss 0.5752 (0.7938) acc 84.3750 (82.1875) lr 1.0314e-03 eta 0:39:42 +epoch [100/200] batch [25/31] time 0.711 (0.755) data 0.000 (0.035) loss 0.8042 (0.7901) acc 75.0000 (82.3750) lr 1.0314e-03 eta 0:39:04 +epoch [100/200] batch [30/31] time 0.711 (0.747) data 0.000 (0.029) loss 0.5239 (0.7892) acc 90.6250 (82.3958) lr 1.0314e-03 eta 0:38:37 +epoch [101/200] batch [5/31] time 0.715 (0.902) data 0.000 (0.178) loss 0.5806 (0.8465) acc 93.7500 (84.3750) lr 1.0157e-03 eta 0:46:32 +epoch [101/200] batch [10/31] time 0.726 (0.810) data 0.000 (0.089) loss 0.9204 (0.8231) acc 71.8750 (82.1875) lr 1.0157e-03 eta 0:41:42 +epoch [101/200] batch [15/31] time 0.710 (0.777) data 0.000 (0.060) loss 1.3184 (0.8108) acc 78.1250 (83.3333) lr 1.0157e-03 eta 0:39:57 +epoch [101/200] batch [20/31] time 0.708 (0.761) data 0.000 (0.045) loss 0.4844 (0.7786) acc 93.7500 (84.3750) lr 1.0157e-03 eta 0:39:05 +epoch [101/200] batch [25/31] time 0.707 (0.756) data 0.000 (0.036) loss 0.8467 (0.7880) acc 81.2500 (84.0000) lr 1.0157e-03 eta 0:38:44 +epoch [101/200] batch [30/31] time 0.707 (0.748) data 0.000 (0.030) loss 0.7539 (0.7704) acc 81.2500 (84.2708) lr 1.0157e-03 eta 0:38:16 +epoch [102/200] batch [5/31] time 0.705 (0.889) data 0.000 (0.166) loss 0.6841 (0.8191) acc 84.3750 (83.7500) lr 1.0000e-03 eta 0:45:25 +epoch [102/200] batch [10/31] time 0.710 (0.801) data 0.000 (0.083) loss 0.3867 (0.7238) acc 87.5000 (84.0625) lr 1.0000e-03 eta 0:40:49 +epoch [102/200] batch [15/31] time 0.708 (0.771) data 0.000 (0.056) loss 0.6396 (0.7064) acc 90.6250 (83.5417) lr 1.0000e-03 eta 0:39:14 +epoch [102/200] batch [20/31] time 0.709 (0.756) data 0.000 (0.042) loss 0.9658 (0.7464) acc 78.1250 (83.1250) lr 1.0000e-03 eta 0:38:23 +epoch [102/200] batch [25/31] time 0.713 (0.747) data 0.000 (0.033) loss 0.4150 (0.7564) acc 90.6250 (83.0000) lr 1.0000e-03 eta 0:37:52 +epoch [102/200] batch [30/31] time 0.707 (0.741) data 0.000 (0.028) loss 0.5332 (0.7441) acc 90.6250 (82.7083) lr 1.0000e-03 eta 0:37:30 +epoch [103/200] batch [5/31] time 0.717 (0.900) data 0.000 (0.169) loss 0.6333 (0.7164) acc 87.5000 (83.7500) lr 9.8429e-04 eta 0:45:28 +epoch [103/200] batch [10/31] time 0.711 (0.806) data 0.000 (0.085) loss 0.4998 (0.6178) acc 81.2500 (87.1875) lr 9.8429e-04 eta 0:40:41 +epoch [103/200] batch [15/31] time 0.716 (0.778) data 0.000 (0.057) loss 0.3438 (0.6015) acc 87.5000 (87.0833) lr 9.8429e-04 eta 0:39:11 +epoch [103/200] batch [20/31] time 0.718 (0.769) data 0.000 (0.043) loss 0.7793 (0.6536) acc 81.2500 (85.3125) lr 9.8429e-04 eta 0:38:41 +epoch [103/200] batch [25/31] time 0.719 (0.759) data 0.000 (0.034) loss 0.8379 (0.6712) acc 71.8750 (85.1250) lr 9.8429e-04 eta 0:38:06 +epoch [103/200] batch [30/31] time 0.712 (0.751) data 0.000 (0.029) loss 0.6768 (0.7085) acc 87.5000 (83.6458) lr 9.8429e-04 eta 0:37:39 +epoch [104/200] batch [5/31] time 0.706 (0.895) data 0.001 (0.172) loss 0.2930 (0.7681) acc 100.0000 (86.8750) lr 9.6859e-04 eta 0:44:46 +epoch [104/200] batch [10/31] time 0.710 (0.802) data 0.000 (0.086) loss 0.9556 (0.7092) acc 81.2500 (86.8750) lr 9.6859e-04 eta 0:40:05 +epoch [104/200] batch [15/31] time 0.710 (0.773) data 0.000 (0.057) loss 0.7549 (0.6933) acc 84.3750 (87.2917) lr 9.6859e-04 eta 0:38:32 +epoch [104/200] batch [20/31] time 0.713 (0.759) data 0.000 (0.043) loss 0.4128 (0.6734) acc 90.6250 (86.8750) lr 9.6859e-04 eta 0:37:47 +epoch [104/200] batch [25/31] time 0.726 (0.750) data 0.000 (0.035) loss 0.7456 (0.6818) acc 81.2500 (86.0000) lr 9.6859e-04 eta 0:37:16 +epoch [104/200] batch [30/31] time 0.705 (0.743) data 0.000 (0.029) loss 0.4998 (0.6902) acc 87.5000 (85.7292) lr 9.6859e-04 eta 0:36:51 +epoch [105/200] batch [5/31] time 0.713 (0.898) data 0.000 (0.179) loss 0.7822 (0.5012) acc 78.1250 (88.1250) lr 9.5289e-04 eta 0:44:27 +epoch [105/200] batch [10/31] time 0.719 (0.807) data 0.000 (0.090) loss 0.7466 (0.6132) acc 78.1250 (85.0000) lr 9.5289e-04 eta 0:39:54 +epoch [105/200] batch [15/31] time 0.706 (0.775) data 0.000 (0.060) loss 0.2971 (0.6733) acc 96.8750 (84.3750) lr 9.5289e-04 eta 0:38:13 +epoch [105/200] batch [20/31] time 0.704 (0.758) data 0.000 (0.045) loss 0.2888 (0.6700) acc 96.8750 (85.0000) lr 9.5289e-04 eta 0:37:19 +epoch [105/200] batch [25/31] time 0.708 (0.748) data 0.000 (0.036) loss 0.5645 (0.6707) acc 87.5000 (85.0000) lr 9.5289e-04 eta 0:36:47 +epoch [105/200] batch [30/31] time 0.710 (0.741) data 0.000 (0.030) loss 0.6519 (0.6759) acc 75.0000 (84.3750) lr 9.5289e-04 eta 0:36:23 +epoch [106/200] batch [5/31] time 0.711 (0.902) data 0.000 (0.172) loss 0.4487 (0.7596) acc 90.6250 (86.8750) lr 9.3721e-04 eta 0:44:12 +epoch [106/200] batch [10/31] time 0.712 (0.808) data 0.000 (0.086) loss 0.8169 (0.6975) acc 75.0000 (87.5000) lr 9.3721e-04 eta 0:39:32 +epoch [106/200] batch [15/31] time 0.717 (0.786) data 0.000 (0.057) loss 1.0889 (0.7181) acc 81.2500 (86.8750) lr 9.3721e-04 eta 0:38:24 +epoch [106/200] batch [20/31] time 0.711 (0.769) data 0.000 (0.043) loss 1.3662 (0.7292) acc 71.8750 (85.3125) lr 9.3721e-04 eta 0:37:28 +epoch [106/200] batch [25/31] time 0.714 (0.757) data 0.000 (0.035) loss 0.7891 (0.7796) acc 84.3750 (84.1250) lr 9.3721e-04 eta 0:36:50 +epoch [106/200] batch [30/31] time 0.709 (0.749) data 0.000 (0.029) loss 1.2119 (0.8049) acc 71.8750 (83.1250) lr 9.3721e-04 eta 0:36:23 +epoch [107/200] batch [5/31] time 0.706 (0.913) data 0.000 (0.191) loss 0.4941 (0.8385) acc 93.7500 (81.2500) lr 9.2154e-04 eta 0:44:16 +epoch [107/200] batch [10/31] time 0.711 (0.814) data 0.000 (0.096) loss 0.8145 (0.7314) acc 84.3750 (84.3750) lr 9.2154e-04 eta 0:39:22 +epoch [107/200] batch [15/31] time 0.720 (0.781) data 0.000 (0.064) loss 0.5625 (0.7884) acc 84.3750 (84.3750) lr 9.2154e-04 eta 0:37:43 +epoch [107/200] batch [20/31] time 0.707 (0.762) data 0.000 (0.048) loss 0.9868 (0.8178) acc 81.2500 (83.5938) lr 9.2154e-04 eta 0:36:46 +epoch [107/200] batch [25/31] time 0.710 (0.752) data 0.000 (0.038) loss 0.3171 (0.7644) acc 90.6250 (84.1250) lr 9.2154e-04 eta 0:36:13 +epoch [107/200] batch [30/31] time 0.708 (0.745) data 0.000 (0.032) loss 0.9580 (0.7515) acc 78.1250 (83.8542) lr 9.2154e-04 eta 0:35:49 +epoch [108/200] batch [5/31] time 0.711 (0.890) data 0.000 (0.163) loss 0.7979 (0.7418) acc 84.3750 (81.8750) lr 9.0589e-04 eta 0:42:41 +epoch [108/200] batch [10/31] time 0.709 (0.805) data 0.000 (0.082) loss 0.3096 (0.6960) acc 96.8750 (84.3750) lr 9.0589e-04 eta 0:38:34 +epoch [108/200] batch [15/31] time 0.706 (0.774) data 0.000 (0.054) loss 0.5488 (0.6375) acc 84.3750 (84.7917) lr 9.0589e-04 eta 0:36:59 +epoch [108/200] batch [20/31] time 0.714 (0.759) data 0.000 (0.041) loss 0.9663 (0.6664) acc 84.3750 (84.5312) lr 9.0589e-04 eta 0:36:12 +epoch [108/200] batch [25/31] time 0.706 (0.749) data 0.000 (0.033) loss 1.0596 (0.6717) acc 81.2500 (84.6250) lr 9.0589e-04 eta 0:35:39 +epoch [108/200] batch [30/31] time 0.703 (0.742) data 0.000 (0.027) loss 0.7192 (0.6796) acc 87.5000 (84.0625) lr 9.0589e-04 eta 0:35:16 +epoch [109/200] batch [5/31] time 0.710 (0.894) data 0.000 (0.169) loss 0.2905 (0.6389) acc 96.8750 (88.1250) lr 8.9027e-04 eta 0:42:25 +epoch [109/200] batch [10/31] time 0.712 (0.803) data 0.000 (0.085) loss 0.6831 (0.6036) acc 81.2500 (87.8125) lr 8.9027e-04 eta 0:38:03 +epoch [109/200] batch [15/31] time 0.707 (0.773) data 0.000 (0.056) loss 0.7939 (0.6999) acc 78.1250 (85.4167) lr 8.9027e-04 eta 0:36:33 +epoch [109/200] batch [20/31] time 0.710 (0.757) data 0.000 (0.042) loss 0.4861 (0.6798) acc 90.6250 (85.4688) lr 8.9027e-04 eta 0:35:43 +epoch [109/200] batch [25/31] time 0.703 (0.746) data 0.000 (0.034) loss 0.8262 (0.6925) acc 87.5000 (85.8750) lr 8.9027e-04 eta 0:35:09 +epoch [109/200] batch [30/31] time 0.702 (0.740) data 0.000 (0.028) loss 0.9077 (0.6741) acc 78.1250 (85.5208) lr 8.9027e-04 eta 0:34:48 +epoch [110/200] batch [5/31] time 0.709 (0.895) data 0.000 (0.175) loss 1.2305 (0.9114) acc 65.6250 (79.3750) lr 8.7467e-04 eta 0:42:00 +epoch [110/200] batch [10/31] time 0.718 (0.806) data 0.000 (0.088) loss 0.5029 (0.8653) acc 93.7500 (81.2500) lr 8.7467e-04 eta 0:37:45 +epoch [110/200] batch [15/31] time 0.722 (0.775) data 0.000 (0.059) loss 0.9414 (0.8041) acc 75.0000 (81.8750) lr 8.7467e-04 eta 0:36:15 +epoch [110/200] batch [20/31] time 0.722 (0.759) data 0.000 (0.044) loss 0.4568 (0.7395) acc 90.6250 (83.4375) lr 8.7467e-04 eta 0:35:25 +epoch [110/200] batch [25/31] time 0.709 (0.750) data 0.000 (0.035) loss 0.6147 (0.7820) acc 78.1250 (82.7500) lr 8.7467e-04 eta 0:34:55 +epoch [110/200] batch [30/31] time 0.711 (0.743) data 0.000 (0.029) loss 0.8818 (0.7861) acc 87.5000 (83.1250) lr 8.7467e-04 eta 0:34:32 +epoch [111/200] batch [5/31] time 0.713 (0.899) data 0.000 (0.175) loss 0.9351 (0.7889) acc 81.2500 (83.7500) lr 8.5910e-04 eta 0:41:44 +epoch [111/200] batch [10/31] time 0.710 (0.810) data 0.000 (0.088) loss 0.7461 (0.6846) acc 84.3750 (85.6250) lr 8.5910e-04 eta 0:37:30 +epoch [111/200] batch [15/31] time 0.720 (0.787) data 0.000 (0.059) loss 0.9155 (0.7327) acc 81.2500 (84.5833) lr 8.5910e-04 eta 0:36:24 +epoch [111/200] batch [20/31] time 0.711 (0.768) data 0.000 (0.044) loss 0.7627 (0.6980) acc 87.5000 (85.4688) lr 8.5910e-04 eta 0:35:27 +epoch [111/200] batch [25/31] time 0.729 (0.758) data 0.000 (0.035) loss 0.8667 (0.7219) acc 84.3750 (85.0000) lr 8.5910e-04 eta 0:34:54 +epoch [111/200] batch [30/31] time 0.708 (0.750) data 0.000 (0.029) loss 1.1787 (0.7682) acc 81.2500 (84.1667) lr 8.5910e-04 eta 0:34:29 +epoch [112/200] batch [5/31] time 0.710 (0.887) data 0.000 (0.161) loss 0.3428 (0.8071) acc 93.7500 (85.6250) lr 8.4357e-04 eta 0:40:43 +epoch [112/200] batch [10/31] time 0.721 (0.800) data 0.000 (0.081) loss 0.8716 (0.8726) acc 78.1250 (81.5625) lr 8.4357e-04 eta 0:36:38 +epoch [112/200] batch [15/31] time 0.711 (0.770) data 0.001 (0.054) loss 0.4443 (0.8624) acc 90.6250 (82.0833) lr 8.4357e-04 eta 0:35:12 +epoch [112/200] batch [20/31] time 0.704 (0.754) data 0.000 (0.041) loss 0.7100 (0.8155) acc 87.5000 (82.5000) lr 8.4357e-04 eta 0:34:25 +epoch [112/200] batch [25/31] time 0.723 (0.745) data 0.000 (0.032) loss 0.8511 (0.7924) acc 81.2500 (82.8750) lr 8.4357e-04 eta 0:33:55 +epoch [112/200] batch [30/31] time 0.709 (0.739) data 0.000 (0.027) loss 0.7432 (0.7886) acc 90.6250 (83.2292) lr 8.4357e-04 eta 0:33:37 +epoch [113/200] batch [5/31] time 0.710 (0.921) data 0.000 (0.165) loss 1.1396 (0.9380) acc 75.0000 (76.8750) lr 8.2807e-04 eta 0:41:47 +epoch [113/200] batch [10/31] time 0.709 (0.816) data 0.000 (0.083) loss 0.5635 (0.8429) acc 81.2500 (77.5000) lr 8.2807e-04 eta 0:36:58 +epoch [113/200] batch [15/31] time 0.715 (0.782) data 0.000 (0.055) loss 0.8022 (0.7937) acc 84.3750 (80.8333) lr 8.2807e-04 eta 0:35:21 +epoch [113/200] batch [20/31] time 0.705 (0.763) data 0.000 (0.042) loss 0.9653 (0.7752) acc 71.8750 (80.3125) lr 8.2807e-04 eta 0:34:27 +epoch [113/200] batch [25/31] time 0.708 (0.753) data 0.000 (0.033) loss 0.7402 (0.7587) acc 81.2500 (81.1250) lr 8.2807e-04 eta 0:33:56 +epoch [113/200] batch [30/31] time 0.707 (0.746) data 0.000 (0.028) loss 0.5034 (0.7515) acc 84.3750 (81.3542) lr 8.2807e-04 eta 0:33:32 +epoch [114/200] batch [5/31] time 0.725 (0.899) data 0.000 (0.171) loss 0.5845 (0.6365) acc 84.3750 (84.3750) lr 8.1262e-04 eta 0:40:20 +epoch [114/200] batch [10/31] time 0.711 (0.807) data 0.000 (0.085) loss 0.8574 (0.7103) acc 75.0000 (83.1250) lr 8.1262e-04 eta 0:36:07 +epoch [114/200] batch [15/31] time 0.705 (0.779) data 0.000 (0.057) loss 0.7456 (0.6753) acc 84.3750 (84.5833) lr 8.1262e-04 eta 0:34:48 +epoch [114/200] batch [20/31] time 0.708 (0.763) data 0.000 (0.043) loss 0.7197 (0.6454) acc 81.2500 (85.1562) lr 8.1262e-04 eta 0:34:02 +epoch [114/200] batch [25/31] time 0.732 (0.754) data 0.000 (0.034) loss 0.7607 (0.6672) acc 87.5000 (84.8750) lr 8.1262e-04 eta 0:33:33 +epoch [114/200] batch [30/31] time 0.708 (0.747) data 0.000 (0.029) loss 0.9297 (0.6720) acc 78.1250 (84.8958) lr 8.1262e-04 eta 0:33:11 +epoch [115/200] batch [5/31] time 0.710 (0.897) data 0.000 (0.173) loss 0.8096 (0.6687) acc 81.2500 (86.2500) lr 7.9721e-04 eta 0:39:45 +epoch [115/200] batch [10/31] time 0.727 (0.821) data 0.000 (0.087) loss 0.6079 (0.7329) acc 84.3750 (85.0000) lr 7.9721e-04 eta 0:36:19 +epoch [115/200] batch [15/31] time 0.711 (0.785) data 0.000 (0.058) loss 0.5127 (0.6564) acc 81.2500 (85.8333) lr 7.9721e-04 eta 0:34:40 +epoch [115/200] batch [20/31] time 0.715 (0.768) data 0.000 (0.044) loss 0.9829 (0.6881) acc 84.3750 (85.6250) lr 7.9721e-04 eta 0:33:52 +epoch [115/200] batch [25/31] time 0.706 (0.757) data 0.000 (0.035) loss 0.7251 (0.7016) acc 87.5000 (85.7500) lr 7.9721e-04 eta 0:33:19 +epoch [115/200] batch [30/31] time 0.710 (0.749) data 0.000 (0.029) loss 0.8154 (0.7290) acc 84.3750 (85.1042) lr 7.9721e-04 eta 0:32:55 +epoch [116/200] batch [5/31] time 0.714 (0.899) data 0.000 (0.177) loss 0.8984 (0.7073) acc 81.2500 (84.3750) lr 7.8186e-04 eta 0:39:24 +epoch [116/200] batch [10/31] time 0.708 (0.808) data 0.000 (0.089) loss 0.8257 (0.7413) acc 81.2500 (84.0625) lr 7.8186e-04 eta 0:35:20 +epoch [116/200] batch [15/31] time 0.709 (0.775) data 0.000 (0.059) loss 0.5781 (0.7194) acc 84.3750 (84.3750) lr 7.8186e-04 eta 0:33:50 +epoch [116/200] batch [20/31] time 0.707 (0.758) data 0.000 (0.044) loss 0.5122 (0.6742) acc 84.3750 (84.2188) lr 7.8186e-04 eta 0:33:03 +epoch [116/200] batch [25/31] time 0.710 (0.748) data 0.000 (0.036) loss 0.5591 (0.6804) acc 87.5000 (83.8750) lr 7.8186e-04 eta 0:32:33 +epoch [116/200] batch [30/31] time 0.712 (0.743) data 0.000 (0.030) loss 0.5327 (0.6731) acc 87.5000 (84.1667) lr 7.8186e-04 eta 0:32:14 +epoch [117/200] batch [5/31] time 0.725 (0.898) data 0.000 (0.174) loss 0.3118 (0.5791) acc 93.7500 (91.2500) lr 7.6655e-04 eta 0:38:54 +epoch [117/200] batch [10/31] time 0.710 (0.804) data 0.000 (0.087) loss 0.2957 (0.5522) acc 90.6250 (89.6875) lr 7.6655e-04 eta 0:34:46 +epoch [117/200] batch [15/31] time 0.714 (0.774) data 0.000 (0.058) loss 0.5444 (0.5906) acc 81.2500 (87.7083) lr 7.6655e-04 eta 0:33:23 +epoch [117/200] batch [20/31] time 0.722 (0.758) data 0.000 (0.044) loss 1.1797 (0.6336) acc 71.8750 (86.0938) lr 7.6655e-04 eta 0:32:39 +epoch [117/200] batch [25/31] time 0.711 (0.748) data 0.000 (0.035) loss 0.4211 (0.6438) acc 96.8750 (85.8750) lr 7.6655e-04 eta 0:32:09 +epoch [117/200] batch [30/31] time 0.709 (0.741) data 0.000 (0.029) loss 0.6484 (0.6464) acc 81.2500 (85.8333) lr 7.6655e-04 eta 0:31:48 +epoch [118/200] batch [5/31] time 0.712 (0.881) data 0.000 (0.165) loss 0.4048 (0.7296) acc 96.8750 (83.1250) lr 7.5131e-04 eta 0:37:42 +epoch [118/200] batch [10/31] time 0.724 (0.798) data 0.000 (0.083) loss 0.5771 (0.7812) acc 87.5000 (82.1875) lr 7.5131e-04 eta 0:34:04 +epoch [118/200] batch [15/31] time 0.717 (0.768) data 0.000 (0.055) loss 0.4548 (0.7834) acc 90.6250 (83.3333) lr 7.5131e-04 eta 0:32:45 +epoch [118/200] batch [20/31] time 0.704 (0.754) data 0.000 (0.041) loss 0.5464 (0.7527) acc 87.5000 (83.7500) lr 7.5131e-04 eta 0:32:03 +epoch [118/200] batch [25/31] time 0.713 (0.746) data 0.000 (0.033) loss 1.1191 (0.7772) acc 75.0000 (83.2500) lr 7.5131e-04 eta 0:31:41 +epoch [118/200] batch [30/31] time 0.709 (0.740) data 0.000 (0.028) loss 0.9902 (0.7876) acc 71.8750 (83.0208) lr 7.5131e-04 eta 0:31:22 +epoch [119/200] batch [5/31] time 0.707 (0.886) data 0.000 (0.164) loss 0.4695 (0.5883) acc 90.6250 (83.7500) lr 7.3613e-04 eta 0:37:26 +epoch [119/200] batch [10/31] time 0.715 (0.799) data 0.000 (0.082) loss 0.6982 (0.6667) acc 84.3750 (83.4375) lr 7.3613e-04 eta 0:33:44 +epoch [119/200] batch [15/31] time 0.714 (0.771) data 0.000 (0.055) loss 1.0898 (0.7079) acc 78.1250 (82.2917) lr 7.3613e-04 eta 0:32:29 +epoch [119/200] batch [20/31] time 0.723 (0.756) data 0.000 (0.041) loss 0.5728 (0.6720) acc 78.1250 (83.5938) lr 7.3613e-04 eta 0:31:47 +epoch [119/200] batch [25/31] time 0.704 (0.746) data 0.000 (0.033) loss 0.5693 (0.6404) acc 87.5000 (84.5000) lr 7.3613e-04 eta 0:31:18 +epoch [119/200] batch [30/31] time 0.726 (0.741) data 0.000 (0.028) loss 0.5635 (0.6384) acc 84.3750 (84.3750) lr 7.3613e-04 eta 0:31:00 +epoch [120/200] batch [5/31] time 0.710 (0.880) data 0.000 (0.161) loss 0.5825 (0.7280) acc 87.5000 (81.2500) lr 7.2101e-04 eta 0:36:45 +epoch [120/200] batch [10/31] time 0.712 (0.797) data 0.000 (0.080) loss 0.8818 (0.7444) acc 78.1250 (81.5625) lr 7.2101e-04 eta 0:33:13 +epoch [120/200] batch [15/31] time 0.722 (0.770) data 0.000 (0.054) loss 1.0635 (0.7345) acc 71.8750 (81.8750) lr 7.2101e-04 eta 0:32:02 +epoch [120/200] batch [20/31] time 0.714 (0.755) data 0.000 (0.040) loss 0.2299 (0.7125) acc 93.7500 (82.5000) lr 7.2101e-04 eta 0:31:19 +epoch [120/200] batch [25/31] time 0.710 (0.746) data 0.000 (0.032) loss 0.4961 (0.7338) acc 93.7500 (82.6250) lr 7.2101e-04 eta 0:30:54 +epoch [120/200] batch [30/31] time 0.708 (0.740) data 0.000 (0.027) loss 0.6182 (0.7173) acc 87.5000 (83.1250) lr 7.2101e-04 eta 0:30:36 +epoch [121/200] batch [5/31] time 0.706 (0.887) data 0.000 (0.161) loss 0.6279 (0.7965) acc 87.5000 (80.0000) lr 7.0596e-04 eta 0:36:34 +epoch [121/200] batch [10/31] time 0.707 (0.814) data 0.000 (0.081) loss 0.8936 (0.7013) acc 78.1250 (83.1250) lr 7.0596e-04 eta 0:33:30 +epoch [121/200] batch [15/31] time 0.712 (0.780) data 0.000 (0.054) loss 0.4597 (0.6797) acc 81.2500 (83.3333) lr 7.0596e-04 eta 0:32:02 +epoch [121/200] batch [20/31] time 0.710 (0.763) data 0.000 (0.040) loss 0.8320 (0.6628) acc 81.2500 (83.7500) lr 7.0596e-04 eta 0:31:17 +epoch [121/200] batch [25/31] time 0.714 (0.753) data 0.000 (0.032) loss 1.0840 (0.6641) acc 75.0000 (84.0000) lr 7.0596e-04 eta 0:30:49 +epoch [121/200] batch [30/31] time 0.714 (0.746) data 0.000 (0.027) loss 0.5825 (0.6835) acc 81.2500 (83.8542) lr 7.0596e-04 eta 0:30:27 +epoch [122/200] batch [5/31] time 0.706 (0.894) data 0.000 (0.173) loss 0.6455 (0.6196) acc 87.5000 (86.8750) lr 6.9098e-04 eta 0:36:24 +epoch [122/200] batch [10/31] time 0.715 (0.804) data 0.000 (0.087) loss 0.3645 (0.6125) acc 93.7500 (85.9375) lr 6.9098e-04 eta 0:32:39 +epoch [122/200] batch [15/31] time 0.722 (0.774) data 0.000 (0.058) loss 1.0664 (0.7111) acc 78.1250 (84.3750) lr 6.9098e-04 eta 0:31:22 +epoch [122/200] batch [20/31] time 0.725 (0.758) data 0.000 (0.043) loss 0.4360 (0.6724) acc 90.6250 (84.5312) lr 6.9098e-04 eta 0:30:42 +epoch [122/200] batch [25/31] time 0.714 (0.749) data 0.000 (0.035) loss 1.0107 (0.6805) acc 78.1250 (84.2500) lr 6.9098e-04 eta 0:30:15 +epoch [122/200] batch [30/31] time 0.709 (0.747) data 0.000 (0.029) loss 0.6104 (0.6555) acc 87.5000 (84.5833) lr 6.9098e-04 eta 0:30:07 +epoch [123/200] batch [5/31] time 0.705 (0.884) data 0.000 (0.168) loss 0.5659 (0.6336) acc 87.5000 (85.6250) lr 6.7608e-04 eta 0:35:32 +epoch [123/200] batch [10/31] time 0.722 (0.802) data 0.000 (0.084) loss 0.6689 (0.6721) acc 84.3750 (85.6250) lr 6.7608e-04 eta 0:32:10 +epoch [123/200] batch [15/31] time 0.711 (0.772) data 0.000 (0.056) loss 0.3123 (0.6438) acc 93.7500 (85.8333) lr 6.7608e-04 eta 0:30:54 +epoch [123/200] batch [20/31] time 0.713 (0.756) data 0.000 (0.042) loss 0.7197 (0.6540) acc 84.3750 (85.6250) lr 6.7608e-04 eta 0:30:13 +epoch [123/200] batch [25/31] time 0.711 (0.747) data 0.000 (0.034) loss 0.5688 (0.6775) acc 87.5000 (84.8750) lr 6.7608e-04 eta 0:29:48 +epoch [123/200] batch [30/31] time 0.707 (0.741) data 0.000 (0.028) loss 0.4668 (0.6749) acc 87.5000 (84.2708) lr 6.7608e-04 eta 0:29:29 +epoch [124/200] batch [5/31] time 0.716 (0.900) data 0.000 (0.178) loss 0.3452 (0.6195) acc 84.3750 (82.5000) lr 6.6126e-04 eta 0:35:44 +epoch [124/200] batch [10/31] time 0.725 (0.808) data 0.000 (0.089) loss 0.5249 (0.7508) acc 90.6250 (82.1875) lr 6.6126e-04 eta 0:32:00 +epoch [124/200] batch [15/31] time 0.711 (0.775) data 0.000 (0.060) loss 0.9199 (0.7828) acc 78.1250 (82.7083) lr 6.6126e-04 eta 0:30:37 +epoch [124/200] batch [20/31] time 0.707 (0.758) data 0.000 (0.045) loss 0.9375 (0.7331) acc 75.0000 (83.1250) lr 6.6126e-04 eta 0:29:54 +epoch [124/200] batch [25/31] time 0.711 (0.749) data 0.000 (0.036) loss 0.8540 (0.7296) acc 78.1250 (83.3750) lr 6.6126e-04 eta 0:29:28 +epoch [124/200] batch [30/31] time 0.713 (0.747) data 0.000 (0.030) loss 0.2974 (0.6961) acc 96.8750 (84.4792) lr 6.6126e-04 eta 0:29:20 +epoch [125/200] batch [5/31] time 0.723 (0.901) data 0.000 (0.176) loss 0.7266 (0.6770) acc 87.5000 (85.6250) lr 6.4653e-04 eta 0:35:18 +epoch [125/200] batch [10/31] time 0.707 (0.806) data 0.000 (0.088) loss 0.6660 (0.6041) acc 90.6250 (87.5000) lr 6.4653e-04 eta 0:31:30 +epoch [125/200] batch [15/31] time 0.718 (0.776) data 0.000 (0.059) loss 0.8066 (0.6241) acc 84.3750 (86.8750) lr 6.4653e-04 eta 0:30:15 +epoch [125/200] batch [20/31] time 0.703 (0.759) data 0.000 (0.044) loss 0.7266 (0.6792) acc 78.1250 (85.9375) lr 6.4653e-04 eta 0:29:33 +epoch [125/200] batch [25/31] time 0.701 (0.750) data 0.000 (0.035) loss 0.7568 (0.6730) acc 87.5000 (86.0000) lr 6.4653e-04 eta 0:29:08 +epoch [125/200] batch [30/31] time 0.705 (0.743) data 0.000 (0.029) loss 1.1201 (0.6712) acc 78.1250 (85.9375) lr 6.4653e-04 eta 0:28:48 +epoch [126/200] batch [5/31] time 0.714 (0.899) data 0.000 (0.173) loss 1.3174 (0.9265) acc 78.1250 (79.3750) lr 6.3188e-04 eta 0:34:46 +epoch [126/200] batch [10/31] time 0.715 (0.807) data 0.000 (0.087) loss 0.5796 (0.7894) acc 87.5000 (82.1875) lr 6.3188e-04 eta 0:31:08 +epoch [126/200] batch [15/31] time 0.718 (0.776) data 0.000 (0.058) loss 0.8594 (0.7746) acc 87.5000 (83.3333) lr 6.3188e-04 eta 0:29:53 +epoch [126/200] batch [20/31] time 0.706 (0.761) data 0.000 (0.044) loss 0.8301 (0.7647) acc 87.5000 (83.5938) lr 6.3188e-04 eta 0:29:13 +epoch [126/200] batch [25/31] time 0.712 (0.751) data 0.000 (0.035) loss 1.0801 (0.7479) acc 75.0000 (83.7500) lr 6.3188e-04 eta 0:28:46 +epoch [126/200] batch [30/31] time 0.710 (0.744) data 0.000 (0.029) loss 1.2812 (0.7419) acc 68.7500 (83.4375) lr 6.3188e-04 eta 0:28:27 +epoch [127/200] batch [5/31] time 0.719 (0.892) data 0.000 (0.165) loss 0.3616 (0.6596) acc 90.6250 (85.6250) lr 6.1732e-04 eta 0:34:02 +epoch [127/200] batch [10/31] time 0.705 (0.804) data 0.000 (0.083) loss 0.6475 (0.7219) acc 87.5000 (84.6875) lr 6.1732e-04 eta 0:30:35 +epoch [127/200] batch [15/31] time 0.713 (0.775) data 0.000 (0.055) loss 0.5635 (0.7374) acc 87.5000 (84.1667) lr 6.1732e-04 eta 0:29:26 +epoch [127/200] batch [20/31] time 0.710 (0.759) data 0.000 (0.042) loss 0.4934 (0.7306) acc 90.6250 (85.0000) lr 6.1732e-04 eta 0:28:45 +epoch [127/200] batch [25/31] time 0.706 (0.749) data 0.000 (0.033) loss 0.6909 (0.7501) acc 87.5000 (85.3750) lr 6.1732e-04 eta 0:28:19 +epoch [127/200] batch [30/31] time 0.714 (0.743) data 0.000 (0.028) loss 0.7935 (0.7242) acc 84.3750 (85.5208) lr 6.1732e-04 eta 0:28:01 +epoch [128/200] batch [5/31] time 0.706 (0.899) data 0.000 (0.176) loss 0.5591 (0.5838) acc 84.3750 (89.3750) lr 6.0285e-04 eta 0:33:49 +epoch [128/200] batch [10/31] time 0.710 (0.806) data 0.000 (0.088) loss 0.6802 (0.6523) acc 75.0000 (86.2500) lr 6.0285e-04 eta 0:30:16 +epoch [128/200] batch [15/31] time 0.707 (0.783) data 0.000 (0.059) loss 0.6709 (0.7304) acc 84.3750 (84.3750) lr 6.0285e-04 eta 0:29:19 +epoch [128/200] batch [20/31] time 0.710 (0.765) data 0.000 (0.044) loss 0.4597 (0.6777) acc 87.5000 (85.1562) lr 6.0285e-04 eta 0:28:36 +epoch [128/200] batch [25/31] time 0.726 (0.755) data 0.000 (0.035) loss 0.5879 (0.6810) acc 87.5000 (84.7500) lr 6.0285e-04 eta 0:28:09 +epoch [128/200] batch [30/31] time 0.713 (0.749) data 0.000 (0.030) loss 0.6875 (0.6812) acc 87.5000 (84.6875) lr 6.0285e-04 eta 0:27:51 +epoch [129/200] batch [5/31] time 0.726 (0.910) data 0.000 (0.179) loss 0.4146 (0.7052) acc 87.5000 (85.0000) lr 5.8849e-04 eta 0:33:47 +epoch [129/200] batch [10/31] time 0.711 (0.812) data 0.000 (0.090) loss 0.9009 (0.7036) acc 78.1250 (85.0000) lr 5.8849e-04 eta 0:30:03 +epoch [129/200] batch [15/31] time 0.713 (0.778) data 0.000 (0.060) loss 0.6812 (0.7677) acc 87.5000 (83.7500) lr 5.8849e-04 eta 0:28:45 +epoch [129/200] batch [20/31] time 0.723 (0.762) data 0.000 (0.045) loss 0.5889 (0.7003) acc 87.5000 (85.3125) lr 5.8849e-04 eta 0:28:06 +epoch [129/200] batch [25/31] time 0.721 (0.752) data 0.000 (0.036) loss 0.5488 (0.6994) acc 84.3750 (84.5000) lr 5.8849e-04 eta 0:27:40 +epoch [129/200] batch [30/31] time 0.709 (0.746) data 0.000 (0.030) loss 0.5889 (0.6845) acc 84.3750 (84.6875) lr 5.8849e-04 eta 0:27:22 +epoch [130/200] batch [5/31] time 0.705 (0.896) data 0.000 (0.173) loss 1.2754 (0.7520) acc 78.1250 (86.8750) lr 5.7422e-04 eta 0:32:47 +epoch [130/200] batch [10/31] time 0.709 (0.805) data 0.000 (0.087) loss 0.6929 (0.7220) acc 84.3750 (84.0625) lr 5.7422e-04 eta 0:29:24 +epoch [130/200] batch [15/31] time 0.708 (0.773) data 0.000 (0.058) loss 0.7065 (0.7381) acc 81.2500 (83.7500) lr 5.7422e-04 eta 0:28:10 +epoch [130/200] batch [20/31] time 0.705 (0.758) data 0.000 (0.044) loss 0.7075 (0.7195) acc 81.2500 (84.3750) lr 5.7422e-04 eta 0:27:33 +epoch [130/200] batch [25/31] time 0.708 (0.748) data 0.000 (0.035) loss 0.5337 (0.6997) acc 90.6250 (84.8750) lr 5.7422e-04 eta 0:27:08 +epoch [130/200] batch [30/31] time 0.707 (0.742) data 0.000 (0.029) loss 0.6597 (0.6917) acc 93.7500 (85.6250) lr 5.7422e-04 eta 0:26:50 +epoch [131/200] batch [5/31] time 0.728 (0.932) data 0.000 (0.179) loss 0.6714 (0.4930) acc 90.6250 (90.6250) lr 5.6006e-04 eta 0:33:38 +epoch [131/200] batch [10/31] time 0.713 (0.826) data 0.000 (0.090) loss 0.6675 (0.6665) acc 87.5000 (86.2500) lr 5.6006e-04 eta 0:29:43 +epoch [131/200] batch [15/31] time 0.714 (0.788) data 0.001 (0.060) loss 0.6528 (0.6150) acc 87.5000 (87.5000) lr 5.6006e-04 eta 0:28:17 +epoch [131/200] batch [20/31] time 0.732 (0.771) data 0.000 (0.045) loss 0.4783 (0.6280) acc 81.2500 (86.8750) lr 5.6006e-04 eta 0:27:38 +epoch [131/200] batch [25/31] time 0.722 (0.760) data 0.000 (0.036) loss 0.9692 (0.6780) acc 87.5000 (86.3750) lr 5.6006e-04 eta 0:27:11 +epoch [131/200] batch [30/31] time 0.709 (0.753) data 0.000 (0.030) loss 0.9585 (0.6767) acc 75.0000 (86.0417) lr 5.6006e-04 eta 0:26:50 +epoch [132/200] batch [5/31] time 0.725 (0.894) data 0.001 (0.170) loss 0.6011 (0.7045) acc 87.5000 (85.0000) lr 5.4601e-04 eta 0:31:47 +epoch [132/200] batch [10/31] time 0.707 (0.804) data 0.000 (0.085) loss 0.8955 (0.6593) acc 75.0000 (85.0000) lr 5.4601e-04 eta 0:28:30 +epoch [132/200] batch [15/31] time 0.718 (0.774) data 0.000 (0.057) loss 0.5474 (0.6061) acc 84.3750 (86.4583) lr 5.4601e-04 eta 0:27:23 +epoch [132/200] batch [20/31] time 0.707 (0.758) data 0.000 (0.043) loss 0.6650 (0.6033) acc 87.5000 (86.8750) lr 5.4601e-04 eta 0:26:46 +epoch [132/200] batch [25/31] time 0.712 (0.754) data 0.000 (0.034) loss 0.4524 (0.6034) acc 90.6250 (87.2500) lr 5.4601e-04 eta 0:26:34 +epoch [132/200] batch [30/31] time 0.715 (0.746) data 0.000 (0.029) loss 0.6821 (0.6394) acc 84.3750 (86.4583) lr 5.4601e-04 eta 0:26:14 +epoch [133/200] batch [5/31] time 0.708 (0.909) data 0.000 (0.184) loss 0.5801 (0.5788) acc 87.5000 (88.1250) lr 5.3207e-04 eta 0:31:51 +epoch [133/200] batch [10/31] time 0.709 (0.812) data 0.000 (0.092) loss 0.4358 (0.5819) acc 87.5000 (85.9375) lr 5.3207e-04 eta 0:28:23 +epoch [133/200] batch [15/31] time 0.709 (0.779) data 0.000 (0.061) loss 0.7095 (0.6293) acc 81.2500 (83.9583) lr 5.3207e-04 eta 0:27:11 +epoch [133/200] batch [20/31] time 0.702 (0.762) data 0.000 (0.046) loss 0.5752 (0.6397) acc 87.5000 (84.8438) lr 5.3207e-04 eta 0:26:30 +epoch [133/200] batch [25/31] time 0.715 (0.753) data 0.000 (0.037) loss 0.7871 (0.6689) acc 78.1250 (84.8750) lr 5.3207e-04 eta 0:26:07 +epoch [133/200] batch [30/31] time 0.714 (0.746) data 0.000 (0.031) loss 0.5278 (0.6389) acc 87.5000 (85.7292) lr 5.3207e-04 eta 0:25:50 +epoch [134/200] batch [5/31] time 0.716 (0.903) data 0.000 (0.176) loss 0.7969 (0.9018) acc 84.3750 (84.3750) lr 5.1825e-04 eta 0:31:10 +epoch [134/200] batch [10/31] time 0.709 (0.808) data 0.000 (0.088) loss 0.5098 (0.7353) acc 87.5000 (84.6875) lr 5.1825e-04 eta 0:27:51 +epoch [134/200] batch [15/31] time 0.714 (0.778) data 0.000 (0.059) loss 0.1979 (0.6519) acc 96.8750 (85.8333) lr 5.1825e-04 eta 0:26:44 +epoch [134/200] batch [20/31] time 0.710 (0.768) data 0.000 (0.044) loss 0.6411 (0.6316) acc 90.6250 (86.5625) lr 5.1825e-04 eta 0:26:18 +epoch [134/200] batch [25/31] time 0.705 (0.756) data 0.000 (0.036) loss 0.5049 (0.6418) acc 90.6250 (86.7500) lr 5.1825e-04 eta 0:25:50 +epoch [134/200] batch [30/31] time 0.709 (0.748) data 0.000 (0.030) loss 0.5376 (0.6237) acc 93.7500 (86.9792) lr 5.1825e-04 eta 0:25:31 +epoch [135/200] batch [5/31] time 0.704 (0.900) data 0.000 (0.179) loss 0.4106 (0.6683) acc 90.6250 (83.1250) lr 5.0454e-04 eta 0:30:36 +epoch [135/200] batch [10/31] time 0.707 (0.805) data 0.000 (0.090) loss 0.4893 (0.6570) acc 96.8750 (85.6250) lr 5.0454e-04 eta 0:27:19 +epoch [135/200] batch [15/31] time 0.711 (0.774) data 0.000 (0.060) loss 1.0967 (0.6805) acc 81.2500 (85.6250) lr 5.0454e-04 eta 0:26:11 +epoch [135/200] batch [20/31] time 0.704 (0.758) data 0.000 (0.045) loss 0.3682 (0.6677) acc 96.8750 (86.0938) lr 5.0454e-04 eta 0:25:36 +epoch [135/200] batch [25/31] time 0.708 (0.749) data 0.000 (0.036) loss 0.5752 (0.6220) acc 81.2500 (86.7500) lr 5.0454e-04 eta 0:25:13 +epoch [135/200] batch [30/31] time 0.707 (0.743) data 0.000 (0.030) loss 0.7246 (0.6084) acc 84.3750 (87.3958) lr 5.0454e-04 eta 0:24:58 +epoch [136/200] batch [5/31] time 0.709 (0.891) data 0.001 (0.169) loss 0.9165 (0.7196) acc 78.1250 (83.1250) lr 4.9096e-04 eta 0:29:51 +epoch [136/200] batch [10/31] time 0.717 (0.801) data 0.000 (0.085) loss 0.5649 (0.6750) acc 87.5000 (84.6875) lr 4.9096e-04 eta 0:26:46 +epoch [136/200] batch [15/31] time 0.726 (0.774) data 0.000 (0.056) loss 0.3616 (0.6325) acc 90.6250 (86.0417) lr 4.9096e-04 eta 0:25:47 +epoch [136/200] batch [20/31] time 0.720 (0.758) data 0.000 (0.042) loss 0.5615 (0.6464) acc 84.3750 (86.0938) lr 4.9096e-04 eta 0:25:12 +epoch [136/200] batch [25/31] time 0.707 (0.749) data 0.000 (0.034) loss 0.6807 (0.6445) acc 81.2500 (85.7500) lr 4.9096e-04 eta 0:24:50 +epoch [136/200] batch [30/31] time 0.716 (0.743) data 0.000 (0.028) loss 0.4465 (0.6255) acc 87.5000 (85.8333) lr 4.9096e-04 eta 0:24:34 +epoch [137/200] batch [5/31] time 0.711 (0.890) data 0.000 (0.168) loss 1.0010 (0.7956) acc 87.5000 (84.3750) lr 4.7750e-04 eta 0:29:20 +epoch [137/200] batch [10/31] time 0.716 (0.801) data 0.000 (0.084) loss 0.5801 (0.8358) acc 87.5000 (81.8750) lr 4.7750e-04 eta 0:26:20 +epoch [137/200] batch [15/31] time 0.725 (0.781) data 0.000 (0.056) loss 0.9844 (0.8363) acc 81.2500 (82.2917) lr 4.7750e-04 eta 0:25:38 +epoch [137/200] batch [20/31] time 0.711 (0.765) data 0.000 (0.042) loss 0.6802 (0.8111) acc 81.2500 (82.3438) lr 4.7750e-04 eta 0:25:02 +epoch [137/200] batch [25/31] time 0.710 (0.755) data 0.000 (0.034) loss 0.6519 (0.7539) acc 90.6250 (83.8750) lr 4.7750e-04 eta 0:24:38 +epoch [137/200] batch [30/31] time 0.715 (0.747) data 0.000 (0.028) loss 0.6738 (0.7347) acc 78.1250 (84.0625) lr 4.7750e-04 eta 0:24:20 +epoch [138/200] batch [5/31] time 0.713 (0.884) data 0.000 (0.163) loss 0.3853 (0.7988) acc 84.3750 (81.2500) lr 4.6417e-04 eta 0:28:41 +epoch [138/200] batch [10/31] time 0.723 (0.799) data 0.001 (0.082) loss 0.8062 (0.7479) acc 81.2500 (82.8125) lr 4.6417e-04 eta 0:25:53 +epoch [138/200] batch [15/31] time 0.710 (0.770) data 0.000 (0.055) loss 0.9497 (0.7739) acc 65.6250 (81.0417) lr 4.6417e-04 eta 0:24:52 +epoch [138/200] batch [20/31] time 0.703 (0.754) data 0.000 (0.041) loss 1.0293 (0.8196) acc 81.2500 (81.4062) lr 4.6417e-04 eta 0:24:17 +epoch [138/200] batch [25/31] time 0.705 (0.745) data 0.000 (0.033) loss 0.5977 (0.7954) acc 84.3750 (81.8750) lr 4.6417e-04 eta 0:23:57 +epoch [138/200] batch [30/31] time 0.704 (0.739) data 0.000 (0.027) loss 0.5410 (0.7500) acc 87.5000 (82.8125) lr 4.6417e-04 eta 0:23:40 +epoch [139/200] batch [5/31] time 0.713 (0.896) data 0.000 (0.171) loss 0.6392 (0.6563) acc 84.3750 (86.2500) lr 4.5098e-04 eta 0:28:37 +epoch [139/200] batch [10/31] time 0.724 (0.804) data 0.000 (0.086) loss 0.8271 (0.6775) acc 84.3750 (85.3125) lr 4.5098e-04 eta 0:25:37 +epoch [139/200] batch [15/31] time 0.716 (0.773) data 0.000 (0.057) loss 0.6763 (0.6284) acc 81.2500 (86.8750) lr 4.5098e-04 eta 0:24:33 +epoch [139/200] batch [20/31] time 0.712 (0.757) data 0.000 (0.043) loss 0.8828 (0.6283) acc 84.3750 (87.6562) lr 4.5098e-04 eta 0:23:59 +epoch [139/200] batch [25/31] time 0.723 (0.748) data 0.000 (0.034) loss 1.1270 (0.6456) acc 78.1250 (87.1250) lr 4.5098e-04 eta 0:23:38 +epoch [139/200] batch [30/31] time 0.709 (0.742) data 0.000 (0.029) loss 0.9678 (0.6524) acc 78.1250 (86.6667) lr 4.5098e-04 eta 0:23:23 +epoch [140/200] batch [5/31] time 0.728 (0.896) data 0.000 (0.175) loss 0.5928 (0.6452) acc 84.3750 (84.3750) lr 4.3792e-04 eta 0:28:09 +epoch [140/200] batch [10/31] time 0.713 (0.807) data 0.000 (0.088) loss 0.3777 (0.5865) acc 93.7500 (86.2500) lr 4.3792e-04 eta 0:25:17 +epoch [140/200] batch [15/31] time 0.714 (0.776) data 0.000 (0.059) loss 0.4175 (0.6655) acc 87.5000 (85.4167) lr 4.3792e-04 eta 0:24:15 +epoch [140/200] batch [20/31] time 0.722 (0.761) data 0.000 (0.044) loss 0.5166 (0.6768) acc 96.8750 (85.6250) lr 4.3792e-04 eta 0:23:43 +epoch [140/200] batch [25/31] time 0.709 (0.751) data 0.000 (0.035) loss 0.5942 (0.6444) acc 81.2500 (86.1250) lr 4.3792e-04 eta 0:23:20 +epoch [140/200] batch [30/31] time 0.709 (0.744) data 0.000 (0.029) loss 1.1318 (0.6682) acc 62.5000 (85.3125) lr 4.3792e-04 eta 0:23:04 +epoch [141/200] batch [5/31] time 0.717 (0.908) data 0.000 (0.186) loss 0.5396 (0.5665) acc 87.5000 (84.3750) lr 4.2499e-04 eta 0:28:03 +epoch [141/200] batch [10/31] time 0.724 (0.814) data 0.001 (0.093) loss 0.7949 (0.5938) acc 84.3750 (85.3125) lr 4.2499e-04 eta 0:25:06 +epoch [141/200] batch [15/31] time 0.710 (0.784) data 0.000 (0.062) loss 0.9321 (0.5960) acc 81.2500 (86.0417) lr 4.2499e-04 eta 0:24:05 +epoch [141/200] batch [20/31] time 0.708 (0.766) data 0.000 (0.047) loss 0.8262 (0.6281) acc 81.2500 (85.1562) lr 4.2499e-04 eta 0:23:29 +epoch [141/200] batch [25/31] time 0.709 (0.755) data 0.000 (0.038) loss 1.0059 (0.6371) acc 78.1250 (85.5000) lr 4.2499e-04 eta 0:23:04 +epoch [141/200] batch [30/31] time 0.724 (0.747) data 0.000 (0.031) loss 0.4702 (0.6271) acc 90.6250 (86.0417) lr 4.2499e-04 eta 0:22:47 +epoch [142/200] batch [5/31] time 0.712 (0.891) data 0.000 (0.168) loss 0.4453 (0.7059) acc 93.7500 (83.7500) lr 4.1221e-04 eta 0:27:04 +epoch [142/200] batch [10/31] time 0.709 (0.802) data 0.000 (0.084) loss 0.7607 (0.6442) acc 71.8750 (84.0625) lr 4.1221e-04 eta 0:24:17 +epoch [142/200] batch [15/31] time 0.708 (0.782) data 0.000 (0.056) loss 0.6836 (0.6324) acc 81.2500 (84.5833) lr 4.1221e-04 eta 0:23:37 +epoch [142/200] batch [20/31] time 0.708 (0.764) data 0.000 (0.042) loss 0.6582 (0.6587) acc 87.5000 (84.8438) lr 4.1221e-04 eta 0:23:01 +epoch [142/200] batch [25/31] time 0.736 (0.754) data 0.000 (0.034) loss 0.7612 (0.6609) acc 84.3750 (84.3750) lr 4.1221e-04 eta 0:22:39 +epoch [142/200] batch [30/31] time 0.710 (0.747) data 0.000 (0.028) loss 0.3398 (0.6256) acc 90.6250 (85.3125) lr 4.1221e-04 eta 0:22:23 +epoch [143/200] batch [5/31] time 0.709 (0.905) data 0.000 (0.182) loss 0.3564 (0.5954) acc 90.6250 (86.8750) lr 3.9958e-04 eta 0:27:02 +epoch [143/200] batch [10/31] time 0.715 (0.808) data 0.000 (0.091) loss 0.6953 (0.6324) acc 84.3750 (86.8750) lr 3.9958e-04 eta 0:24:05 +epoch [143/200] batch [15/31] time 0.718 (0.777) data 0.000 (0.061) loss 0.8403 (0.6026) acc 78.1250 (87.0833) lr 3.9958e-04 eta 0:23:05 +epoch [143/200] batch [20/31] time 0.708 (0.761) data 0.000 (0.046) loss 0.3179 (0.6276) acc 90.6250 (86.2500) lr 3.9958e-04 eta 0:22:32 +epoch [143/200] batch [25/31] time 0.707 (0.750) data 0.000 (0.037) loss 0.6064 (0.6402) acc 87.5000 (86.0000) lr 3.9958e-04 eta 0:22:09 +epoch [143/200] batch [30/31] time 0.710 (0.743) data 0.000 (0.031) loss 0.4617 (0.6644) acc 84.3750 (85.7292) lr 3.9958e-04 eta 0:21:54 +epoch [144/200] batch [5/31] time 0.711 (0.920) data 0.000 (0.171) loss 0.7915 (0.6670) acc 81.2500 (86.8750) lr 3.8709e-04 eta 0:27:01 +epoch [144/200] batch [10/31] time 0.714 (0.819) data 0.000 (0.086) loss 0.5840 (0.6104) acc 90.6250 (86.8750) lr 3.8709e-04 eta 0:23:58 +epoch [144/200] batch [15/31] time 0.722 (0.785) data 0.000 (0.057) loss 0.6216 (0.5720) acc 84.3750 (88.1250) lr 3.8709e-04 eta 0:22:55 +epoch [144/200] batch [20/31] time 0.709 (0.767) data 0.000 (0.043) loss 0.6348 (0.5987) acc 84.3750 (87.1875) lr 3.8709e-04 eta 0:22:20 +epoch [144/200] batch [25/31] time 0.729 (0.757) data 0.000 (0.034) loss 0.4617 (0.6308) acc 84.3750 (86.2500) lr 3.8709e-04 eta 0:21:58 +epoch [144/200] batch [30/31] time 0.708 (0.749) data 0.000 (0.029) loss 0.7393 (0.6637) acc 84.3750 (85.4167) lr 3.8709e-04 eta 0:21:40 +epoch [145/200] batch [5/31] time 0.706 (0.892) data 0.000 (0.171) loss 0.4536 (0.6332) acc 87.5000 (85.6250) lr 3.7476e-04 eta 0:25:43 +epoch [145/200] batch [10/31] time 0.716 (0.801) data 0.000 (0.086) loss 0.4553 (0.5728) acc 87.5000 (86.5625) lr 3.7476e-04 eta 0:23:02 +epoch [145/200] batch [15/31] time 0.706 (0.771) data 0.000 (0.057) loss 0.5000 (0.6059) acc 90.6250 (86.2500) lr 3.7476e-04 eta 0:22:06 +epoch [145/200] batch [20/31] time 0.723 (0.756) data 0.000 (0.043) loss 0.5845 (0.6077) acc 93.7500 (86.4062) lr 3.7476e-04 eta 0:21:37 +epoch [145/200] batch [25/31] time 0.710 (0.747) data 0.000 (0.035) loss 0.7993 (0.6071) acc 81.2500 (86.5000) lr 3.7476e-04 eta 0:21:18 +epoch [145/200] batch [30/31] time 0.719 (0.742) data 0.000 (0.029) loss 0.4250 (0.5837) acc 87.5000 (86.6667) lr 3.7476e-04 eta 0:21:05 +epoch [146/200] batch [5/31] time 0.717 (0.895) data 0.000 (0.173) loss 1.0918 (0.8884) acc 84.3750 (83.7500) lr 3.6258e-04 eta 0:25:22 +epoch [146/200] batch [10/31] time 0.709 (0.821) data 0.000 (0.087) loss 0.3525 (0.7287) acc 93.7500 (85.6250) lr 3.6258e-04 eta 0:23:10 +epoch [146/200] batch [15/31] time 0.706 (0.785) data 0.000 (0.058) loss 0.9805 (0.7206) acc 78.1250 (85.6250) lr 3.6258e-04 eta 0:22:07 +epoch [146/200] batch [20/31] time 0.707 (0.767) data 0.000 (0.043) loss 0.5869 (0.6577) acc 84.3750 (86.0938) lr 3.6258e-04 eta 0:21:31 +epoch [146/200] batch [25/31] time 0.716 (0.755) data 0.000 (0.035) loss 0.6045 (0.6298) acc 84.3750 (86.0000) lr 3.6258e-04 eta 0:21:09 +epoch [146/200] batch [30/31] time 0.709 (0.748) data 0.000 (0.029) loss 0.3364 (0.6265) acc 96.8750 (86.4583) lr 3.6258e-04 eta 0:20:52 +epoch [147/200] batch [5/31] time 0.707 (0.886) data 0.000 (0.167) loss 0.2532 (0.4211) acc 96.8750 (93.1250) lr 3.5055e-04 eta 0:24:38 +epoch [147/200] batch [10/31] time 0.710 (0.800) data 0.000 (0.084) loss 0.7358 (0.5034) acc 81.2500 (90.0000) lr 3.5055e-04 eta 0:22:11 +epoch [147/200] batch [15/31] time 0.710 (0.771) data 0.000 (0.056) loss 0.7935 (0.5558) acc 84.3750 (88.3333) lr 3.5055e-04 eta 0:21:19 +epoch [147/200] batch [20/31] time 0.727 (0.756) data 0.000 (0.042) loss 0.6880 (0.5456) acc 81.2500 (88.4375) lr 3.5055e-04 eta 0:20:50 +epoch [147/200] batch [25/31] time 0.706 (0.747) data 0.000 (0.034) loss 0.2759 (0.5423) acc 96.8750 (88.6250) lr 3.5055e-04 eta 0:20:31 +epoch [147/200] batch [30/31] time 0.707 (0.740) data 0.000 (0.028) loss 0.4087 (0.5698) acc 90.6250 (88.5417) lr 3.5055e-04 eta 0:20:17 +epoch [148/200] batch [5/31] time 0.713 (0.898) data 0.000 (0.172) loss 0.3728 (0.5808) acc 90.6250 (86.8750) lr 3.3869e-04 eta 0:24:31 +epoch [148/200] batch [10/31] time 0.709 (0.805) data 0.000 (0.086) loss 0.6987 (0.6266) acc 84.3750 (85.9375) lr 3.3869e-04 eta 0:21:54 +epoch [148/200] batch [15/31] time 0.709 (0.774) data 0.000 (0.058) loss 0.5601 (0.6166) acc 87.5000 (86.2500) lr 3.3869e-04 eta 0:21:00 +epoch [148/200] batch [20/31] time 0.704 (0.757) data 0.000 (0.043) loss 0.5151 (0.6076) acc 90.6250 (86.5625) lr 3.3869e-04 eta 0:20:28 +epoch [148/200] batch [25/31] time 0.706 (0.747) data 0.000 (0.035) loss 0.5107 (0.6098) acc 81.2500 (85.8750) lr 3.3869e-04 eta 0:20:09 +epoch [148/200] batch [30/31] time 0.710 (0.741) data 0.000 (0.029) loss 0.9053 (0.6418) acc 87.5000 (85.5208) lr 3.3869e-04 eta 0:19:55 +epoch [149/200] batch [5/31] time 0.715 (0.896) data 0.000 (0.176) loss 0.5269 (0.5782) acc 84.3750 (88.7500) lr 3.2699e-04 eta 0:24:00 +epoch [149/200] batch [10/31] time 0.726 (0.803) data 0.000 (0.088) loss 0.9292 (0.6535) acc 78.1250 (86.5625) lr 3.2699e-04 eta 0:21:27 +epoch [149/200] batch [15/31] time 0.716 (0.775) data 0.000 (0.059) loss 0.5503 (0.5849) acc 87.5000 (87.2917) lr 3.2699e-04 eta 0:20:37 +epoch [149/200] batch [20/31] time 0.706 (0.758) data 0.000 (0.044) loss 0.9883 (0.6010) acc 84.3750 (87.0312) lr 3.2699e-04 eta 0:20:06 +epoch [149/200] batch [25/31] time 0.723 (0.749) data 0.000 (0.035) loss 0.8438 (0.6242) acc 75.0000 (85.8750) lr 3.2699e-04 eta 0:19:48 +epoch [149/200] batch [30/31] time 0.711 (0.743) data 0.000 (0.030) loss 0.4019 (0.6084) acc 90.6250 (86.3542) lr 3.2699e-04 eta 0:19:35 +epoch [150/200] batch [5/31] time 0.710 (0.889) data 0.000 (0.167) loss 0.2690 (0.5608) acc 90.6250 (86.8750) lr 3.1545e-04 eta 0:23:21 +epoch [150/200] batch [10/31] time 0.710 (0.801) data 0.000 (0.084) loss 0.4067 (0.5371) acc 90.6250 (89.3750) lr 3.1545e-04 eta 0:20:58 +epoch [150/200] batch [15/31] time 0.710 (0.771) data 0.000 (0.056) loss 0.8271 (0.5619) acc 78.1250 (87.7083) lr 3.1545e-04 eta 0:20:07 +epoch [150/200] batch [20/31] time 0.718 (0.757) data 0.000 (0.042) loss 0.7739 (0.5902) acc 81.2500 (87.6562) lr 3.1545e-04 eta 0:19:41 +epoch [150/200] batch [25/31] time 0.708 (0.747) data 0.000 (0.034) loss 0.3962 (0.5723) acc 87.5000 (87.6250) lr 3.1545e-04 eta 0:19:22 +epoch [150/200] batch [30/31] time 0.707 (0.741) data 0.000 (0.028) loss 0.7549 (0.5782) acc 75.0000 (87.0833) lr 3.1545e-04 eta 0:19:08 +epoch [151/200] batch [5/31] time 0.735 (0.909) data 0.000 (0.179) loss 0.8257 (0.6170) acc 81.2500 (86.8750) lr 3.0409e-04 eta 0:23:23 +epoch [151/200] batch [10/31] time 0.710 (0.811) data 0.000 (0.089) loss 0.0870 (0.6670) acc 100.0000 (85.6250) lr 3.0409e-04 eta 0:20:48 +epoch [151/200] batch [15/31] time 0.709 (0.779) data 0.000 (0.060) loss 0.9072 (0.7244) acc 81.2500 (84.1667) lr 3.0409e-04 eta 0:19:55 +epoch [151/200] batch [20/31] time 0.709 (0.762) data 0.000 (0.045) loss 0.6001 (0.6889) acc 87.5000 (85.0000) lr 3.0409e-04 eta 0:19:25 +epoch [151/200] batch [25/31] time 0.708 (0.752) data 0.000 (0.036) loss 0.9160 (0.6844) acc 75.0000 (85.3750) lr 3.0409e-04 eta 0:19:07 +epoch [151/200] batch [30/31] time 0.709 (0.745) data 0.000 (0.030) loss 0.5020 (0.6662) acc 87.5000 (85.9375) lr 3.0409e-04 eta 0:18:52 +epoch [152/200] batch [5/31] time 0.704 (0.878) data 0.000 (0.154) loss 0.8975 (0.7658) acc 78.1250 (83.7500) lr 2.9289e-04 eta 0:22:08 +epoch [152/200] batch [10/31] time 0.714 (0.808) data 0.000 (0.077) loss 0.5713 (0.6397) acc 84.3750 (85.6250) lr 2.9289e-04 eta 0:20:19 +epoch [152/200] batch [15/31] time 0.707 (0.775) data 0.000 (0.052) loss 0.5137 (0.6382) acc 90.6250 (86.4583) lr 2.9289e-04 eta 0:19:25 +epoch [152/200] batch [20/31] time 0.714 (0.758) data 0.000 (0.039) loss 0.8105 (0.6134) acc 84.3750 (86.5625) lr 2.9289e-04 eta 0:18:55 +epoch [152/200] batch [25/31] time 0.704 (0.747) data 0.000 (0.031) loss 0.7773 (0.6141) acc 81.2500 (85.8750) lr 2.9289e-04 eta 0:18:36 +epoch [152/200] batch [30/31] time 0.718 (0.741) data 0.000 (0.026) loss 1.0176 (0.6254) acc 84.3750 (86.1458) lr 2.9289e-04 eta 0:18:22 +epoch [153/200] batch [5/31] time 0.711 (0.894) data 0.000 (0.173) loss 0.6289 (0.5830) acc 84.3750 (85.0000) lr 2.8187e-04 eta 0:22:06 +epoch [153/200] batch [10/31] time 0.711 (0.804) data 0.000 (0.087) loss 0.5771 (0.7253) acc 87.5000 (85.3125) lr 2.8187e-04 eta 0:19:47 +epoch [153/200] batch [15/31] time 0.708 (0.774) data 0.000 (0.058) loss 0.8384 (0.6783) acc 87.5000 (86.2500) lr 2.8187e-04 eta 0:19:00 +epoch [153/200] batch [20/31] time 0.714 (0.758) data 0.000 (0.044) loss 0.8862 (0.6664) acc 78.1250 (86.2500) lr 2.8187e-04 eta 0:18:33 +epoch [153/200] batch [25/31] time 0.711 (0.750) data 0.000 (0.035) loss 0.2074 (0.6378) acc 93.7500 (86.3750) lr 2.8187e-04 eta 0:18:16 +epoch [153/200] batch [30/31] time 0.709 (0.748) data 0.000 (0.029) loss 0.3928 (0.6135) acc 90.6250 (86.9792) lr 2.8187e-04 eta 0:18:09 +epoch [154/200] batch [5/31] time 0.709 (0.890) data 0.000 (0.167) loss 0.5190 (0.5680) acc 87.5000 (86.8750) lr 2.7103e-04 eta 0:21:31 +epoch [154/200] batch [10/31] time 0.722 (0.803) data 0.000 (0.084) loss 0.5728 (0.5514) acc 90.6250 (87.5000) lr 2.7103e-04 eta 0:19:21 +epoch [154/200] batch [15/31] time 0.710 (0.771) data 0.000 (0.056) loss 0.8726 (0.5437) acc 87.5000 (87.7083) lr 2.7103e-04 eta 0:18:32 +epoch [154/200] batch [20/31] time 0.718 (0.757) data 0.000 (0.042) loss 0.2747 (0.5560) acc 96.8750 (87.0312) lr 2.7103e-04 eta 0:18:07 +epoch [154/200] batch [25/31] time 0.710 (0.748) data 0.000 (0.034) loss 0.7480 (0.5549) acc 81.2500 (87.1250) lr 2.7103e-04 eta 0:17:51 +epoch [154/200] batch [30/31] time 0.717 (0.742) data 0.000 (0.028) loss 1.0322 (0.5624) acc 84.3750 (87.2917) lr 2.7103e-04 eta 0:17:38 +epoch [155/200] batch [5/31] time 0.710 (0.902) data 0.000 (0.177) loss 0.5518 (0.6205) acc 87.5000 (84.3750) lr 2.6037e-04 eta 0:21:21 +epoch [155/200] batch [10/31] time 0.720 (0.808) data 0.000 (0.089) loss 0.2581 (0.5613) acc 96.8750 (85.9375) lr 2.6037e-04 eta 0:19:04 +epoch [155/200] batch [15/31] time 0.719 (0.776) data 0.000 (0.059) loss 0.4783 (0.5926) acc 90.6250 (85.6250) lr 2.6037e-04 eta 0:18:15 +epoch [155/200] batch [20/31] time 0.704 (0.760) data 0.000 (0.045) loss 0.6704 (0.6146) acc 84.3750 (85.4688) lr 2.6037e-04 eta 0:17:49 +epoch [155/200] batch [25/31] time 0.720 (0.752) data 0.000 (0.036) loss 0.5303 (0.6145) acc 93.7500 (85.7500) lr 2.6037e-04 eta 0:17:33 +epoch [155/200] batch [30/31] time 0.705 (0.749) data 0.000 (0.030) loss 0.4419 (0.6318) acc 87.5000 (85.4167) lr 2.6037e-04 eta 0:17:25 +epoch [156/200] batch [5/31] time 0.708 (0.880) data 0.000 (0.163) loss 0.4004 (0.4869) acc 96.8750 (88.7500) lr 2.4989e-04 eta 0:20:23 +epoch [156/200] batch [10/31] time 0.706 (0.799) data 0.000 (0.082) loss 0.7969 (0.5705) acc 81.2500 (88.1250) lr 2.4989e-04 eta 0:18:26 +epoch [156/200] batch [15/31] time 0.709 (0.770) data 0.000 (0.055) loss 0.2947 (0.5327) acc 90.6250 (89.3750) lr 2.4989e-04 eta 0:17:42 +epoch [156/200] batch [20/31] time 0.704 (0.755) data 0.000 (0.041) loss 0.5923 (0.5550) acc 87.5000 (88.4375) lr 2.4989e-04 eta 0:17:17 +epoch [156/200] batch [25/31] time 0.707 (0.746) data 0.000 (0.033) loss 0.5522 (0.5750) acc 93.7500 (88.1250) lr 2.4989e-04 eta 0:17:01 +epoch [156/200] batch [30/31] time 0.715 (0.740) data 0.000 (0.027) loss 0.6489 (0.5643) acc 81.2500 (87.9167) lr 2.4989e-04 eta 0:16:49 +epoch [157/200] batch [5/31] time 0.706 (0.875) data 0.000 (0.156) loss 0.5923 (0.5037) acc 90.6250 (86.8750) lr 2.3959e-04 eta 0:19:49 +epoch [157/200] batch [10/31] time 0.723 (0.795) data 0.000 (0.078) loss 0.7231 (0.5888) acc 81.2500 (85.9375) lr 2.3959e-04 eta 0:17:56 +epoch [157/200] batch [15/31] time 0.711 (0.767) data 0.000 (0.052) loss 0.3684 (0.5603) acc 87.5000 (87.2917) lr 2.3959e-04 eta 0:17:14 +epoch [157/200] batch [20/31] time 0.710 (0.754) data 0.000 (0.039) loss 0.5342 (0.6102) acc 87.5000 (85.9375) lr 2.3959e-04 eta 0:16:53 +epoch [157/200] batch [25/31] time 0.710 (0.745) data 0.000 (0.032) loss 0.5986 (0.6168) acc 87.5000 (86.3750) lr 2.3959e-04 eta 0:16:37 +epoch [157/200] batch [30/31] time 0.705 (0.739) data 0.000 (0.026) loss 0.3064 (0.5991) acc 93.7500 (86.7708) lr 2.3959e-04 eta 0:16:25 +epoch [158/200] batch [5/31] time 0.729 (0.891) data 0.000 (0.165) loss 0.4019 (0.6582) acc 90.6250 (86.8750) lr 2.2949e-04 eta 0:19:43 +epoch [158/200] batch [10/31] time 0.714 (0.804) data 0.000 (0.083) loss 0.2920 (0.6787) acc 90.6250 (86.5625) lr 2.2949e-04 eta 0:17:43 +epoch [158/200] batch [15/31] time 0.715 (0.774) data 0.000 (0.055) loss 0.7119 (0.6890) acc 84.3750 (86.4583) lr 2.2949e-04 eta 0:17:00 +epoch [158/200] batch [20/31] time 0.728 (0.758) data 0.000 (0.041) loss 1.0273 (0.6800) acc 78.1250 (86.4062) lr 2.2949e-04 eta 0:16:35 +epoch [158/200] batch [25/31] time 0.705 (0.749) data 0.000 (0.033) loss 0.7729 (0.7150) acc 81.2500 (85.5000) lr 2.2949e-04 eta 0:16:19 +epoch [158/200] batch [30/31] time 0.702 (0.742) data 0.000 (0.028) loss 0.7168 (0.6946) acc 87.5000 (86.1458) lr 2.2949e-04 eta 0:16:06 +epoch [159/200] batch [5/31] time 0.710 (0.887) data 0.000 (0.164) loss 0.7554 (0.7813) acc 87.5000 (83.1250) lr 2.1957e-04 eta 0:19:10 +epoch [159/200] batch [10/31] time 0.713 (0.799) data 0.000 (0.082) loss 0.8198 (0.6836) acc 87.5000 (85.6250) lr 2.1957e-04 eta 0:17:12 +epoch [159/200] batch [15/31] time 0.722 (0.780) data 0.000 (0.055) loss 0.4812 (0.6116) acc 87.5000 (87.0833) lr 2.1957e-04 eta 0:16:44 +epoch [159/200] batch [20/31] time 0.707 (0.763) data 0.000 (0.041) loss 0.2477 (0.6017) acc 90.6250 (87.1875) lr 2.1957e-04 eta 0:16:17 +epoch [159/200] batch [25/31] time 0.702 (0.752) data 0.000 (0.033) loss 0.7842 (0.6017) acc 84.3750 (87.0000) lr 2.1957e-04 eta 0:15:59 +epoch [159/200] batch [30/31] time 0.710 (0.744) data 0.000 (0.028) loss 0.6260 (0.6139) acc 78.1250 (86.3542) lr 2.1957e-04 eta 0:15:46 +epoch [160/200] batch [5/31] time 0.711 (0.897) data 0.000 (0.174) loss 0.6704 (0.5728) acc 81.2500 (86.2500) lr 2.0984e-04 eta 0:18:55 +epoch [160/200] batch [10/31] time 0.710 (0.805) data 0.000 (0.087) loss 0.5332 (0.6738) acc 90.6250 (84.6875) lr 2.0984e-04 eta 0:16:55 +epoch [160/200] batch [15/31] time 0.710 (0.774) data 0.000 (0.058) loss 0.3306 (0.6359) acc 93.7500 (86.0417) lr 2.0984e-04 eta 0:16:12 +epoch [160/200] batch [20/31] time 0.703 (0.757) data 0.000 (0.044) loss 0.8135 (0.6231) acc 84.3750 (87.0312) lr 2.0984e-04 eta 0:15:47 +epoch [160/200] batch [25/31] time 0.707 (0.747) data 0.000 (0.035) loss 0.6484 (0.6360) acc 90.6250 (87.1250) lr 2.0984e-04 eta 0:15:30 +epoch [160/200] batch [30/31] time 0.709 (0.742) data 0.000 (0.029) loss 0.5381 (0.5997) acc 93.7500 (87.9167) lr 2.0984e-04 eta 0:15:20 +epoch [161/200] batch [5/31] time 0.706 (0.882) data 0.000 (0.163) loss 0.8613 (0.6719) acc 81.2500 (86.2500) lr 2.0032e-04 eta 0:18:09 +epoch [161/200] batch [10/31] time 0.709 (0.796) data 0.000 (0.082) loss 0.2759 (0.6449) acc 93.7500 (85.6250) lr 2.0032e-04 eta 0:16:19 +epoch [161/200] batch [15/31] time 0.722 (0.769) data 0.000 (0.055) loss 0.8457 (0.6152) acc 84.3750 (86.4583) lr 2.0032e-04 eta 0:15:42 +epoch [161/200] batch [20/31] time 0.701 (0.754) data 0.000 (0.041) loss 0.7549 (0.6393) acc 78.1250 (86.0938) lr 2.0032e-04 eta 0:15:19 +epoch [161/200] batch [25/31] time 0.704 (0.746) data 0.000 (0.033) loss 0.4641 (0.6096) acc 93.7500 (86.8750) lr 2.0032e-04 eta 0:15:06 +epoch [161/200] batch [30/31] time 0.700 (0.739) data 0.000 (0.027) loss 0.3362 (0.5869) acc 90.6250 (87.1875) lr 2.0032e-04 eta 0:14:54 +epoch [162/200] batch [5/31] time 0.712 (0.918) data 0.001 (0.163) loss 0.3196 (0.4371) acc 93.7500 (90.0000) lr 1.9098e-04 eta 0:18:25 +epoch [162/200] batch [10/31] time 0.723 (0.816) data 0.000 (0.082) loss 0.5859 (0.5002) acc 84.3750 (88.7500) lr 1.9098e-04 eta 0:16:17 +epoch [162/200] batch [15/31] time 0.724 (0.781) data 0.000 (0.055) loss 0.5337 (0.5190) acc 87.5000 (88.1250) lr 1.9098e-04 eta 0:15:33 +epoch [162/200] batch [20/31] time 0.706 (0.764) data 0.000 (0.041) loss 0.5171 (0.5498) acc 87.5000 (87.9688) lr 1.9098e-04 eta 0:15:08 +epoch [162/200] batch [25/31] time 0.716 (0.754) data 0.000 (0.033) loss 0.8384 (0.5715) acc 84.3750 (87.5000) lr 1.9098e-04 eta 0:14:52 +epoch [162/200] batch [30/31] time 0.719 (0.747) data 0.000 (0.027) loss 0.8228 (0.5833) acc 84.3750 (87.3958) lr 1.9098e-04 eta 0:14:40 +epoch [163/200] batch [5/31] time 0.730 (0.884) data 0.001 (0.159) loss 0.9390 (0.8017) acc 78.1250 (81.8750) lr 1.8185e-04 eta 0:17:16 +epoch [163/200] batch [10/31] time 0.711 (0.796) data 0.000 (0.080) loss 0.4282 (0.5674) acc 93.7500 (88.4375) lr 1.8185e-04 eta 0:15:29 +epoch [163/200] batch [15/31] time 0.711 (0.767) data 0.000 (0.053) loss 0.3855 (0.5517) acc 93.7500 (89.7917) lr 1.8185e-04 eta 0:14:51 +epoch [163/200] batch [20/31] time 0.714 (0.753) data 0.000 (0.040) loss 1.2422 (0.6478) acc 75.0000 (88.2812) lr 1.8185e-04 eta 0:14:31 +epoch [163/200] batch [25/31] time 0.711 (0.750) data 0.000 (0.032) loss 0.5166 (0.6135) acc 87.5000 (88.0000) lr 1.8185e-04 eta 0:14:24 +epoch [163/200] batch [30/31] time 0.706 (0.743) data 0.000 (0.027) loss 0.7314 (0.6141) acc 84.3750 (87.6042) lr 1.8185e-04 eta 0:14:13 +epoch [164/200] batch [5/31] time 0.712 (0.908) data 0.000 (0.183) loss 0.3894 (0.5526) acc 84.3750 (87.5000) lr 1.7292e-04 eta 0:17:16 +epoch [164/200] batch [10/31] time 0.722 (0.812) data 0.000 (0.092) loss 1.4824 (0.6670) acc 62.5000 (84.0625) lr 1.7292e-04 eta 0:15:23 +epoch [164/200] batch [15/31] time 0.713 (0.779) data 0.000 (0.061) loss 0.9170 (0.6671) acc 81.2500 (84.7917) lr 1.7292e-04 eta 0:14:42 +epoch [164/200] batch [20/31] time 0.705 (0.763) data 0.000 (0.046) loss 0.5771 (0.6388) acc 87.5000 (85.1562) lr 1.7292e-04 eta 0:14:19 +epoch [164/200] batch [25/31] time 0.710 (0.753) data 0.000 (0.037) loss 0.9893 (0.6546) acc 78.1250 (84.8750) lr 1.7292e-04 eta 0:14:04 +epoch [164/200] batch [30/31] time 0.722 (0.746) data 0.000 (0.031) loss 0.5933 (0.6376) acc 87.5000 (85.0000) lr 1.7292e-04 eta 0:13:53 +epoch [165/200] batch [5/31] time 0.711 (0.895) data 0.000 (0.168) loss 0.3267 (0.4342) acc 93.7500 (90.6250) lr 1.6419e-04 eta 0:16:33 +epoch [165/200] batch [10/31] time 0.722 (0.805) data 0.001 (0.084) loss 0.7607 (0.5198) acc 87.5000 (89.3750) lr 1.6419e-04 eta 0:14:50 +epoch [165/200] batch [15/31] time 0.709 (0.774) data 0.000 (0.056) loss 0.4583 (0.5473) acc 87.5000 (88.5417) lr 1.6419e-04 eta 0:14:12 +epoch [165/200] batch [20/31] time 0.710 (0.765) data 0.000 (0.042) loss 0.3782 (0.5748) acc 90.6250 (88.2812) lr 1.6419e-04 eta 0:13:58 +epoch [165/200] batch [25/31] time 0.708 (0.754) data 0.000 (0.034) loss 0.6074 (0.5923) acc 81.2500 (87.7500) lr 1.6419e-04 eta 0:13:42 +epoch [165/200] batch [30/31] time 0.712 (0.747) data 0.000 (0.028) loss 0.7729 (0.6036) acc 84.3750 (87.7083) lr 1.6419e-04 eta 0:13:30 +epoch [166/200] batch [5/31] time 0.711 (0.882) data 0.000 (0.159) loss 1.0332 (0.7159) acc 81.2500 (86.8750) lr 1.5567e-04 eta 0:15:52 +epoch [166/200] batch [10/31] time 0.709 (0.797) data 0.000 (0.080) loss 0.6460 (0.6417) acc 87.5000 (87.8125) lr 1.5567e-04 eta 0:14:16 +epoch [166/200] batch [15/31] time 0.710 (0.767) data 0.000 (0.053) loss 0.6445 (0.6633) acc 81.2500 (85.2083) lr 1.5567e-04 eta 0:13:41 +epoch [166/200] batch [20/31] time 0.711 (0.752) data 0.000 (0.040) loss 1.1123 (0.7416) acc 81.2500 (84.0625) lr 1.5567e-04 eta 0:13:21 +epoch [166/200] batch [25/31] time 0.709 (0.744) data 0.000 (0.032) loss 0.6182 (0.7048) acc 84.3750 (85.1250) lr 1.5567e-04 eta 0:13:08 +epoch [166/200] batch [30/31] time 0.715 (0.739) data 0.000 (0.027) loss 0.5161 (0.6744) acc 84.3750 (85.3125) lr 1.5567e-04 eta 0:12:59 +epoch [167/200] batch [5/31] time 0.709 (0.890) data 0.000 (0.166) loss 0.6016 (0.4863) acc 87.5000 (91.8750) lr 1.4736e-04 eta 0:15:33 +epoch [167/200] batch [10/31] time 0.709 (0.804) data 0.000 (0.083) loss 0.4407 (0.5702) acc 87.5000 (89.3750) lr 1.4736e-04 eta 0:13:59 +epoch [167/200] batch [15/31] time 0.705 (0.773) data 0.000 (0.056) loss 0.3997 (0.5775) acc 87.5000 (88.5417) lr 1.4736e-04 eta 0:13:23 +epoch [167/200] batch [20/31] time 0.705 (0.758) data 0.000 (0.042) loss 0.6338 (0.5898) acc 87.5000 (87.9688) lr 1.4736e-04 eta 0:13:03 +epoch [167/200] batch [25/31] time 0.704 (0.749) data 0.000 (0.034) loss 0.9062 (0.6261) acc 87.5000 (87.1250) lr 1.4736e-04 eta 0:12:50 +epoch [167/200] batch [30/31] time 0.706 (0.742) data 0.000 (0.028) loss 0.8608 (0.6122) acc 81.2500 (87.3958) lr 1.4736e-04 eta 0:12:39 +epoch [168/200] batch [5/31] time 0.712 (0.891) data 0.000 (0.172) loss 0.6113 (0.5404) acc 90.6250 (85.6250) lr 1.3926e-04 eta 0:15:07 +epoch [168/200] batch [10/31] time 0.711 (0.803) data 0.000 (0.086) loss 1.1162 (0.5629) acc 81.2500 (87.5000) lr 1.3926e-04 eta 0:13:33 +epoch [168/200] batch [15/31] time 0.710 (0.784) data 0.000 (0.058) loss 0.4529 (0.6087) acc 90.6250 (86.4583) lr 1.3926e-04 eta 0:13:09 +epoch [168/200] batch [20/31] time 0.712 (0.766) data 0.000 (0.043) loss 0.3025 (0.5364) acc 90.6250 (87.9688) lr 1.3926e-04 eta 0:12:47 +epoch [168/200] batch [25/31] time 0.720 (0.756) data 0.000 (0.035) loss 0.6670 (0.5490) acc 84.3750 (87.6250) lr 1.3926e-04 eta 0:12:33 +epoch [168/200] batch [30/31] time 0.711 (0.749) data 0.000 (0.029) loss 0.5557 (0.5592) acc 87.5000 (87.5000) lr 1.3926e-04 eta 0:12:23 +epoch [169/200] batch [5/31] time 0.718 (0.892) data 0.000 (0.163) loss 0.6353 (0.5063) acc 93.7500 (91.2500) lr 1.3137e-04 eta 0:14:39 +epoch [169/200] batch [10/31] time 0.710 (0.803) data 0.000 (0.082) loss 0.6206 (0.5847) acc 87.5000 (90.3125) lr 1.3137e-04 eta 0:13:08 +epoch [169/200] batch [15/31] time 0.704 (0.773) data 0.000 (0.055) loss 0.3787 (0.5529) acc 93.7500 (90.0000) lr 1.3137e-04 eta 0:12:34 +epoch [169/200] batch [20/31] time 0.708 (0.757) data 0.000 (0.041) loss 1.1494 (0.6131) acc 68.7500 (87.5000) lr 1.3137e-04 eta 0:12:15 +epoch [169/200] batch [25/31] time 0.709 (0.747) data 0.000 (0.033) loss 0.5435 (0.5994) acc 87.5000 (87.6250) lr 1.3137e-04 eta 0:12:02 +epoch [169/200] batch [30/31] time 0.709 (0.741) data 0.000 (0.027) loss 0.8975 (0.5986) acc 75.0000 (87.6042) lr 1.3137e-04 eta 0:11:53 +epoch [170/200] batch [5/31] time 0.703 (0.892) data 0.000 (0.169) loss 0.4976 (0.6230) acc 93.7500 (86.2500) lr 1.2369e-04 eta 0:14:13 +epoch [170/200] batch [10/31] time 0.708 (0.803) data 0.000 (0.084) loss 0.7153 (0.6108) acc 81.2500 (86.2500) lr 1.2369e-04 eta 0:12:43 +epoch [170/200] batch [15/31] time 0.708 (0.772) data 0.000 (0.056) loss 0.7681 (0.6029) acc 84.3750 (86.0417) lr 1.2369e-04 eta 0:12:10 +epoch [170/200] batch [20/31] time 0.717 (0.756) data 0.000 (0.042) loss 0.6191 (0.6357) acc 87.5000 (85.7812) lr 1.2369e-04 eta 0:11:51 +epoch [170/200] batch [25/31] time 0.710 (0.747) data 0.000 (0.034) loss 0.3696 (0.6274) acc 87.5000 (86.2500) lr 1.2369e-04 eta 0:11:38 +epoch [170/200] batch [30/31] time 0.710 (0.740) data 0.000 (0.028) loss 0.4866 (0.6197) acc 84.3750 (85.9375) lr 1.2369e-04 eta 0:11:29 +epoch [171/200] batch [5/31] time 0.709 (0.903) data 0.000 (0.175) loss 0.5269 (0.5320) acc 87.5000 (88.7500) lr 1.1623e-04 eta 0:13:54 +epoch [171/200] batch [10/31] time 0.709 (0.810) data 0.000 (0.088) loss 0.5591 (0.6229) acc 90.6250 (87.5000) lr 1.1623e-04 eta 0:12:25 +epoch [171/200] batch [15/31] time 0.705 (0.777) data 0.000 (0.059) loss 0.4385 (0.6176) acc 87.5000 (86.8750) lr 1.1623e-04 eta 0:11:51 +epoch [171/200] batch [20/31] time 0.712 (0.760) data 0.000 (0.044) loss 0.5352 (0.6187) acc 87.5000 (86.4062) lr 1.1623e-04 eta 0:11:31 +epoch [171/200] batch [25/31] time 0.702 (0.749) data 0.000 (0.035) loss 0.3828 (0.6264) acc 93.7500 (86.7500) lr 1.1623e-04 eta 0:11:18 +epoch [171/200] batch [30/31] time 0.712 (0.743) data 0.000 (0.029) loss 0.4360 (0.6250) acc 93.7500 (86.4583) lr 1.1623e-04 eta 0:11:08 +epoch [172/200] batch [5/31] time 0.711 (0.901) data 0.000 (0.173) loss 0.7773 (0.7014) acc 84.3750 (85.6250) lr 1.0899e-04 eta 0:13:25 +epoch [172/200] batch [10/31] time 0.721 (0.807) data 0.000 (0.087) loss 0.6460 (0.7230) acc 87.5000 (85.6250) lr 1.0899e-04 eta 0:11:57 +epoch [172/200] batch [15/31] time 0.728 (0.777) data 0.000 (0.058) loss 0.7168 (0.6794) acc 93.7500 (87.0833) lr 1.0899e-04 eta 0:11:27 +epoch [172/200] batch [20/31] time 0.713 (0.761) data 0.000 (0.044) loss 0.4043 (0.6261) acc 90.6250 (87.8125) lr 1.0899e-04 eta 0:11:08 +epoch [172/200] batch [25/31] time 0.707 (0.750) data 0.000 (0.035) loss 0.5200 (0.6174) acc 87.5000 (87.6250) lr 1.0899e-04 eta 0:10:55 +epoch [172/200] batch [30/31] time 0.710 (0.744) data 0.000 (0.029) loss 0.4929 (0.5926) acc 87.5000 (87.7083) lr 1.0899e-04 eta 0:10:46 +epoch [173/200] batch [5/31] time 0.726 (0.887) data 0.001 (0.165) loss 0.3508 (0.4687) acc 93.7500 (87.5000) lr 1.0197e-04 eta 0:12:45 +epoch [173/200] batch [10/31] time 0.723 (0.801) data 0.001 (0.083) loss 0.6450 (0.6043) acc 84.3750 (84.6875) lr 1.0197e-04 eta 0:11:27 +epoch [173/200] batch [15/31] time 0.712 (0.781) data 0.000 (0.055) loss 0.6763 (0.6402) acc 81.2500 (83.7500) lr 1.0197e-04 eta 0:11:06 +epoch [173/200] batch [20/31] time 0.705 (0.763) data 0.000 (0.042) loss 0.8271 (0.6426) acc 75.0000 (84.2188) lr 1.0197e-04 eta 0:10:47 +epoch [173/200] batch [25/31] time 0.722 (0.752) data 0.000 (0.033) loss 0.5010 (0.6314) acc 84.3750 (85.0000) lr 1.0197e-04 eta 0:10:33 +epoch [173/200] batch [30/31] time 0.707 (0.745) data 0.000 (0.028) loss 0.5508 (0.6279) acc 93.7500 (85.7292) lr 1.0197e-04 eta 0:10:23 +epoch [174/200] batch [5/31] time 0.710 (0.901) data 0.000 (0.173) loss 0.7266 (0.5912) acc 87.5000 (88.7500) lr 9.5173e-05 eta 0:12:29 +epoch [174/200] batch [10/31] time 0.708 (0.806) data 0.000 (0.087) loss 0.7427 (0.6382) acc 87.5000 (87.1875) lr 9.5173e-05 eta 0:11:06 +epoch [174/200] batch [15/31] time 0.711 (0.776) data 0.000 (0.058) loss 0.4963 (0.5954) acc 84.3750 (87.5000) lr 9.5173e-05 eta 0:10:37 +epoch [174/200] batch [20/31] time 0.707 (0.766) data 0.000 (0.044) loss 0.2418 (0.5434) acc 96.8750 (88.7500) lr 9.5173e-05 eta 0:10:25 +epoch [174/200] batch [25/31] time 0.715 (0.755) data 0.000 (0.035) loss 0.5269 (0.5440) acc 87.5000 (88.5000) lr 9.5173e-05 eta 0:10:12 +epoch [174/200] batch [30/31] time 0.711 (0.747) data 0.000 (0.029) loss 1.1562 (0.5602) acc 78.1250 (88.5417) lr 9.5173e-05 eta 0:10:02 +epoch [175/200] batch [5/31] time 0.703 (0.914) data 0.000 (0.160) loss 0.8169 (0.5844) acc 78.1250 (86.8750) lr 8.8597e-05 eta 0:12:12 +epoch [175/200] batch [10/31] time 0.717 (0.813) data 0.001 (0.080) loss 0.5620 (0.5424) acc 90.6250 (88.4375) lr 8.8597e-05 eta 0:10:47 +epoch [175/200] batch [15/31] time 0.712 (0.778) data 0.000 (0.053) loss 0.2869 (0.4883) acc 93.7500 (89.7917) lr 8.8597e-05 eta 0:10:15 +epoch [175/200] batch [20/31] time 0.707 (0.761) data 0.000 (0.040) loss 0.4363 (0.5203) acc 81.2500 (87.9688) lr 8.8597e-05 eta 0:09:57 +epoch [175/200] batch [25/31] time 0.705 (0.751) data 0.000 (0.032) loss 0.5015 (0.5214) acc 81.2500 (87.8750) lr 8.8597e-05 eta 0:09:46 +epoch [175/200] batch [30/31] time 0.705 (0.744) data 0.000 (0.027) loss 0.4695 (0.5430) acc 93.7500 (87.8125) lr 8.8597e-05 eta 0:09:37 +epoch [176/200] batch [5/31] time 0.722 (0.899) data 0.000 (0.173) loss 0.1683 (0.3217) acc 96.8750 (93.7500) lr 8.2245e-05 eta 0:11:31 +epoch [176/200] batch [10/31] time 0.716 (0.806) data 0.000 (0.087) loss 1.0186 (0.5385) acc 81.2500 (89.0625) lr 8.2245e-05 eta 0:10:16 +epoch [176/200] batch [15/31] time 0.713 (0.775) data 0.000 (0.058) loss 0.5796 (0.5643) acc 90.6250 (88.1250) lr 8.2245e-05 eta 0:09:49 +epoch [176/200] batch [20/31] time 0.715 (0.759) data 0.001 (0.044) loss 0.6602 (0.5910) acc 81.2500 (87.0312) lr 8.2245e-05 eta 0:09:32 +epoch [176/200] batch [25/31] time 0.707 (0.749) data 0.000 (0.035) loss 0.3362 (0.6069) acc 96.8750 (86.7500) lr 8.2245e-05 eta 0:09:21 +epoch [176/200] batch [30/31] time 0.712 (0.743) data 0.000 (0.029) loss 0.6313 (0.6124) acc 87.5000 (86.7708) lr 8.2245e-05 eta 0:09:13 +epoch [177/200] batch [5/31] time 0.709 (0.886) data 0.000 (0.167) loss 0.5581 (0.5735) acc 87.5000 (88.7500) lr 7.6120e-05 eta 0:10:54 +epoch [177/200] batch [10/31] time 0.709 (0.813) data 0.000 (0.084) loss 0.5322 (0.5829) acc 90.6250 (87.5000) lr 7.6120e-05 eta 0:09:56 +epoch [177/200] batch [15/31] time 0.711 (0.779) data 0.000 (0.056) loss 0.4473 (0.5855) acc 87.5000 (86.2500) lr 7.6120e-05 eta 0:09:27 +epoch [177/200] batch [20/31] time 0.709 (0.762) data 0.000 (0.042) loss 0.5884 (0.5730) acc 84.3750 (86.5625) lr 7.6120e-05 eta 0:09:11 +epoch [177/200] batch [25/31] time 0.709 (0.751) data 0.000 (0.034) loss 0.7061 (0.5702) acc 81.2500 (86.3750) lr 7.6120e-05 eta 0:08:59 +epoch [177/200] batch [30/31] time 0.705 (0.743) data 0.000 (0.028) loss 0.6611 (0.5674) acc 87.5000 (86.8750) lr 7.6120e-05 eta 0:08:50 +epoch [178/200] batch [5/31] time 0.710 (0.890) data 0.000 (0.162) loss 0.3616 (0.4978) acc 90.6250 (90.0000) lr 7.0224e-05 eta 0:10:30 +epoch [178/200] batch [10/31] time 0.706 (0.800) data 0.000 (0.081) loss 0.5820 (0.6231) acc 93.7500 (88.4375) lr 7.0224e-05 eta 0:09:22 +epoch [178/200] batch [15/31] time 0.724 (0.771) data 0.000 (0.054) loss 0.6938 (0.6008) acc 87.5000 (88.9583) lr 7.0224e-05 eta 0:08:58 +epoch [178/200] batch [20/31] time 0.709 (0.755) data 0.000 (0.041) loss 0.8560 (0.6145) acc 81.2500 (87.8125) lr 7.0224e-05 eta 0:08:43 +epoch [178/200] batch [25/31] time 0.709 (0.746) data 0.000 (0.033) loss 0.1896 (0.6040) acc 100.0000 (87.7500) lr 7.0224e-05 eta 0:08:33 +epoch [178/200] batch [30/31] time 0.708 (0.741) data 0.000 (0.027) loss 0.6572 (0.5992) acc 87.5000 (88.2292) lr 7.0224e-05 eta 0:08:25 +epoch [179/200] batch [5/31] time 0.713 (0.892) data 0.000 (0.166) loss 0.8350 (0.5417) acc 90.6250 (88.7500) lr 6.4556e-05 eta 0:10:04 +epoch [179/200] batch [10/31] time 0.713 (0.803) data 0.000 (0.083) loss 1.1553 (0.5717) acc 71.8750 (87.5000) lr 6.4556e-05 eta 0:08:59 +epoch [179/200] batch [15/31] time 0.716 (0.772) data 0.000 (0.056) loss 0.5703 (0.5405) acc 84.3750 (87.5000) lr 6.4556e-05 eta 0:08:35 +epoch [179/200] batch [20/31] time 0.723 (0.757) data 0.000 (0.042) loss 0.8901 (0.5437) acc 81.2500 (87.6562) lr 6.4556e-05 eta 0:08:21 +epoch [179/200] batch [25/31] time 0.722 (0.748) data 0.000 (0.033) loss 0.5913 (0.5209) acc 78.1250 (87.7500) lr 6.4556e-05 eta 0:08:11 +epoch [179/200] batch [30/31] time 0.708 (0.742) data 0.000 (0.028) loss 0.4658 (0.5516) acc 84.3750 (87.6042) lr 6.4556e-05 eta 0:08:03 +epoch [180/200] batch [5/31] time 0.715 (0.893) data 0.000 (0.168) loss 0.4045 (0.5302) acc 84.3750 (84.3750) lr 5.9119e-05 eta 0:09:36 +epoch [180/200] batch [10/31] time 0.736 (0.808) data 0.001 (0.084) loss 0.5542 (0.5750) acc 84.3750 (84.6875) lr 5.9119e-05 eta 0:08:37 +epoch [180/200] batch [15/31] time 0.714 (0.778) data 0.000 (0.056) loss 0.5498 (0.5834) acc 90.6250 (85.0000) lr 5.9119e-05 eta 0:08:14 +epoch [180/200] batch [20/31] time 0.722 (0.762) data 0.000 (0.042) loss 0.5371 (0.6483) acc 81.2500 (83.4375) lr 5.9119e-05 eta 0:08:00 +epoch [180/200] batch [25/31] time 0.708 (0.751) data 0.000 (0.034) loss 0.6025 (0.6429) acc 84.3750 (84.2500) lr 5.9119e-05 eta 0:07:50 +epoch [180/200] batch [30/31] time 0.724 (0.746) data 0.000 (0.028) loss 0.7544 (0.6465) acc 84.3750 (84.4792) lr 5.9119e-05 eta 0:07:42 +epoch [181/200] batch [5/31] time 0.720 (0.891) data 0.000 (0.165) loss 0.7476 (0.5499) acc 84.3750 (85.0000) lr 5.3915e-05 eta 0:09:07 +epoch [181/200] batch [10/31] time 0.711 (0.801) data 0.000 (0.083) loss 0.6519 (0.5311) acc 87.5000 (87.5000) lr 5.3915e-05 eta 0:08:08 +epoch [181/200] batch [15/31] time 0.724 (0.773) data 0.000 (0.055) loss 0.6196 (0.5980) acc 78.1250 (85.4167) lr 5.3915e-05 eta 0:07:47 +epoch [181/200] batch [20/31] time 0.709 (0.758) data 0.000 (0.042) loss 0.5967 (0.5751) acc 90.6250 (86.5625) lr 5.3915e-05 eta 0:07:34 +epoch [181/200] batch [25/31] time 0.725 (0.748) data 0.000 (0.033) loss 0.5063 (0.5695) acc 90.6250 (86.8750) lr 5.3915e-05 eta 0:07:25 +epoch [181/200] batch [30/31] time 0.705 (0.742) data 0.000 (0.028) loss 0.5635 (0.5775) acc 93.7500 (86.8750) lr 5.3915e-05 eta 0:07:17 +epoch [182/200] batch [5/31] time 0.706 (0.879) data 0.000 (0.157) loss 0.2404 (0.5932) acc 100.0000 (88.1250) lr 4.8943e-05 eta 0:08:33 +epoch [182/200] batch [10/31] time 0.720 (0.798) data 0.000 (0.079) loss 0.9395 (0.5359) acc 71.8750 (88.7500) lr 4.8943e-05 eta 0:07:42 +epoch [182/200] batch [15/31] time 0.712 (0.771) data 0.000 (0.053) loss 1.0830 (0.5897) acc 87.5000 (88.7500) lr 4.8943e-05 eta 0:07:22 +epoch [182/200] batch [20/31] time 0.714 (0.756) data 0.000 (0.040) loss 0.3093 (0.5795) acc 93.7500 (89.2188) lr 4.8943e-05 eta 0:07:10 +epoch [182/200] batch [25/31] time 0.709 (0.747) data 0.000 (0.032) loss 0.9180 (0.6252) acc 75.0000 (87.3750) lr 4.8943e-05 eta 0:07:01 +epoch [182/200] batch [30/31] time 0.714 (0.741) data 0.001 (0.026) loss 0.5176 (0.6280) acc 87.5000 (87.5000) lr 4.8943e-05 eta 0:06:54 +epoch [183/200] batch [5/31] time 0.713 (0.891) data 0.000 (0.171) loss 0.5386 (0.5809) acc 93.7500 (90.0000) lr 4.4207e-05 eta 0:08:12 +epoch [183/200] batch [10/31] time 0.705 (0.814) data 0.000 (0.086) loss 0.4717 (0.5368) acc 87.5000 (89.3750) lr 4.4207e-05 eta 0:07:26 +epoch [183/200] batch [15/31] time 0.717 (0.781) data 0.000 (0.057) loss 0.5303 (0.4941) acc 84.3750 (89.7917) lr 4.4207e-05 eta 0:07:04 +epoch [183/200] batch [20/31] time 0.702 (0.764) data 0.000 (0.043) loss 0.4265 (0.5296) acc 81.2500 (87.9688) lr 4.4207e-05 eta 0:06:50 +epoch [183/200] batch [25/31] time 0.724 (0.754) data 0.000 (0.035) loss 0.7822 (0.5236) acc 87.5000 (88.3750) lr 4.4207e-05 eta 0:06:41 +epoch [183/200] batch [30/31] time 0.706 (0.747) data 0.000 (0.029) loss 0.4143 (0.5198) acc 90.6250 (88.3333) lr 4.4207e-05 eta 0:06:34 +epoch [184/200] batch [5/31] time 0.707 (0.882) data 0.000 (0.161) loss 0.5049 (0.6072) acc 84.3750 (85.6250) lr 3.9706e-05 eta 0:07:40 +epoch [184/200] batch [10/31] time 0.711 (0.800) data 0.000 (0.081) loss 0.7520 (0.5722) acc 81.2500 (86.5625) lr 3.9706e-05 eta 0:06:53 +epoch [184/200] batch [15/31] time 0.712 (0.773) data 0.000 (0.054) loss 0.4946 (0.5528) acc 87.5000 (87.7083) lr 3.9706e-05 eta 0:06:35 +epoch [184/200] batch [20/31] time 0.710 (0.758) data 0.000 (0.041) loss 0.7236 (0.5680) acc 81.2500 (87.0312) lr 3.9706e-05 eta 0:06:24 +epoch [184/200] batch [25/31] time 0.719 (0.748) data 0.000 (0.033) loss 1.1182 (0.5801) acc 75.0000 (86.5000) lr 3.9706e-05 eta 0:06:15 +epoch [184/200] batch [30/31] time 0.712 (0.745) data 0.000 (0.027) loss 0.3198 (0.5521) acc 93.7500 (87.6042) lr 3.9706e-05 eta 0:06:10 +epoch [185/200] batch [5/31] time 0.713 (0.874) data 0.000 (0.153) loss 0.6382 (0.6290) acc 87.5000 (86.2500) lr 3.5443e-05 eta 0:07:08 +epoch [185/200] batch [10/31] time 0.713 (0.793) data 0.000 (0.077) loss 0.3572 (0.5741) acc 87.5000 (86.8750) lr 3.5443e-05 eta 0:06:25 +epoch [185/200] batch [15/31] time 0.715 (0.765) data 0.000 (0.051) loss 0.8857 (0.5881) acc 81.2500 (86.8750) lr 3.5443e-05 eta 0:06:07 +epoch [185/200] batch [20/31] time 0.713 (0.752) data 0.000 (0.039) loss 0.8726 (0.5935) acc 78.1250 (86.8750) lr 3.5443e-05 eta 0:05:57 +epoch [185/200] batch [25/31] time 0.712 (0.744) data 0.000 (0.031) loss 0.3704 (0.5776) acc 90.6250 (87.2500) lr 3.5443e-05 eta 0:05:50 +epoch [185/200] batch [30/31] time 0.715 (0.738) data 0.000 (0.026) loss 0.8257 (0.5900) acc 81.2500 (87.0833) lr 3.5443e-05 eta 0:05:44 +epoch [186/200] batch [5/31] time 0.741 (0.922) data 0.001 (0.192) loss 0.3872 (0.4430) acc 93.7500 (91.8750) lr 3.1417e-05 eta 0:07:04 +epoch [186/200] batch [10/31] time 0.712 (0.820) data 0.000 (0.096) loss 0.7388 (0.4554) acc 81.2500 (91.2500) lr 3.1417e-05 eta 0:06:13 +epoch [186/200] batch [15/31] time 0.707 (0.785) data 0.000 (0.064) loss 0.6182 (0.5277) acc 81.2500 (88.9583) lr 3.1417e-05 eta 0:05:53 +epoch [186/200] batch [20/31] time 0.709 (0.767) data 0.000 (0.048) loss 0.2395 (0.5238) acc 93.7500 (89.5312) lr 3.1417e-05 eta 0:05:41 +epoch [186/200] batch [25/31] time 0.715 (0.756) data 0.000 (0.039) loss 0.8022 (0.5392) acc 81.2500 (89.1250) lr 3.1417e-05 eta 0:05:32 +epoch [186/200] batch [30/31] time 0.709 (0.754) data 0.000 (0.032) loss 0.5327 (0.5387) acc 93.7500 (89.4792) lr 3.1417e-05 eta 0:05:27 +epoch [187/200] batch [5/31] time 0.705 (0.895) data 0.000 (0.175) loss 0.2761 (0.6041) acc 93.7500 (86.8750) lr 2.7630e-05 eta 0:06:23 +epoch [187/200] batch [10/31] time 0.726 (0.805) data 0.001 (0.088) loss 0.6479 (0.5062) acc 90.6250 (88.7500) lr 2.7630e-05 eta 0:05:41 +epoch [187/200] batch [15/31] time 0.705 (0.775) data 0.000 (0.059) loss 0.6367 (0.5393) acc 84.3750 (87.2917) lr 2.7630e-05 eta 0:05:24 +epoch [187/200] batch [20/31] time 0.711 (0.759) data 0.000 (0.044) loss 0.7871 (0.5439) acc 87.5000 (87.9688) lr 2.7630e-05 eta 0:05:14 +epoch [187/200] batch [25/31] time 0.724 (0.750) data 0.000 (0.035) loss 0.6035 (0.6067) acc 90.6250 (86.7500) lr 2.7630e-05 eta 0:05:06 +epoch [187/200] batch [30/31] time 0.710 (0.743) data 0.000 (0.030) loss 0.6079 (0.6341) acc 75.0000 (85.6250) lr 2.7630e-05 eta 0:05:00 +epoch [188/200] batch [5/31] time 0.712 (0.913) data 0.000 (0.185) loss 0.6992 (0.7520) acc 84.3750 (85.0000) lr 2.4083e-05 eta 0:06:03 +epoch [188/200] batch [10/31] time 0.727 (0.816) data 0.000 (0.093) loss 0.4399 (0.6442) acc 90.6250 (87.1875) lr 2.4083e-05 eta 0:05:20 +epoch [188/200] batch [15/31] time 0.711 (0.781) data 0.000 (0.062) loss 0.3320 (0.5493) acc 96.8750 (89.3750) lr 2.4083e-05 eta 0:05:03 +epoch [188/200] batch [20/31] time 0.715 (0.765) data 0.000 (0.047) loss 0.6611 (0.5645) acc 84.3750 (88.9062) lr 2.4083e-05 eta 0:04:52 +epoch [188/200] batch [25/31] time 0.705 (0.754) data 0.000 (0.037) loss 0.7095 (0.5775) acc 90.6250 (89.0000) lr 2.4083e-05 eta 0:04:45 +epoch [188/200] batch [30/31] time 0.716 (0.746) data 0.000 (0.031) loss 0.2886 (0.5689) acc 93.7500 (88.6458) lr 2.4083e-05 eta 0:04:38 +epoch [189/200] batch [5/31] time 0.711 (0.887) data 0.000 (0.162) loss 0.3789 (0.6108) acc 90.6250 (86.8750) lr 2.0777e-05 eta 0:05:25 +epoch [189/200] batch [10/31] time 0.709 (0.799) data 0.000 (0.081) loss 0.5737 (0.6161) acc 87.5000 (87.5000) lr 2.0777e-05 eta 0:04:49 +epoch [189/200] batch [15/31] time 0.710 (0.770) data 0.000 (0.054) loss 0.5146 (0.6103) acc 90.6250 (88.5417) lr 2.0777e-05 eta 0:04:34 +epoch [189/200] batch [20/31] time 0.721 (0.755) data 0.000 (0.041) loss 0.7349 (0.5909) acc 84.3750 (87.8125) lr 2.0777e-05 eta 0:04:25 +epoch [189/200] batch [25/31] time 0.702 (0.747) data 0.000 (0.033) loss 0.9917 (0.5817) acc 81.2500 (87.5000) lr 2.0777e-05 eta 0:04:19 +epoch [189/200] batch [30/31] time 0.709 (0.740) data 0.000 (0.027) loss 0.9937 (0.5987) acc 81.2500 (87.1875) lr 2.0777e-05 eta 0:04:13 +epoch [190/200] batch [5/31] time 0.718 (0.899) data 0.000 (0.175) loss 0.7642 (0.6048) acc 84.3750 (86.2500) lr 1.7713e-05 eta 0:05:02 +epoch [190/200] batch [10/31] time 0.705 (0.806) data 0.000 (0.088) loss 0.5015 (0.5355) acc 90.6250 (89.6875) lr 1.7713e-05 eta 0:04:26 +epoch [190/200] batch [15/31] time 0.714 (0.786) data 0.000 (0.058) loss 0.8770 (0.5124) acc 78.1250 (88.9583) lr 1.7713e-05 eta 0:04:16 +epoch [190/200] batch [20/31] time 0.710 (0.768) data 0.000 (0.044) loss 0.6099 (0.5617) acc 81.2500 (88.5938) lr 1.7713e-05 eta 0:04:06 +epoch [190/200] batch [25/31] time 0.704 (0.757) data 0.000 (0.035) loss 0.4763 (0.5519) acc 90.6250 (88.7500) lr 1.7713e-05 eta 0:03:59 +epoch [190/200] batch [30/31] time 0.712 (0.749) data 0.000 (0.029) loss 0.7725 (0.5718) acc 87.5000 (88.4375) lr 1.7713e-05 eta 0:03:53 +epoch [191/200] batch [5/31] time 0.708 (0.890) data 0.000 (0.172) loss 0.5405 (0.4627) acc 87.5000 (86.2500) lr 1.4891e-05 eta 0:04:31 +epoch [191/200] batch [10/31] time 0.723 (0.803) data 0.000 (0.086) loss 0.7803 (0.5803) acc 81.2500 (85.3125) lr 1.4891e-05 eta 0:04:00 +epoch [191/200] batch [15/31] time 0.706 (0.773) data 0.000 (0.058) loss 0.7739 (0.5909) acc 87.5000 (86.6667) lr 1.4891e-05 eta 0:03:47 +epoch [191/200] batch [20/31] time 0.714 (0.757) data 0.000 (0.043) loss 0.3147 (0.5663) acc 90.6250 (87.0312) lr 1.4891e-05 eta 0:03:39 +epoch [191/200] batch [25/31] time 0.709 (0.747) data 0.000 (0.035) loss 0.5723 (0.5360) acc 81.2500 (87.5000) lr 1.4891e-05 eta 0:03:32 +epoch [191/200] batch [30/31] time 0.714 (0.741) data 0.000 (0.029) loss 0.4197 (0.5261) acc 87.5000 (87.8125) lr 1.4891e-05 eta 0:03:27 +epoch [192/200] batch [5/31] time 0.713 (0.889) data 0.000 (0.172) loss 0.5356 (0.5300) acc 87.5000 (87.5000) lr 1.2312e-05 eta 0:04:03 +epoch [192/200] batch [10/31] time 0.708 (0.804) data 0.000 (0.086) loss 0.6499 (0.5417) acc 84.3750 (87.1875) lr 1.2312e-05 eta 0:03:36 +epoch [192/200] batch [15/31] time 0.702 (0.774) data 0.000 (0.058) loss 0.3933 (0.5610) acc 87.5000 (87.2917) lr 1.2312e-05 eta 0:03:24 +epoch [192/200] batch [20/31] time 0.709 (0.758) data 0.000 (0.043) loss 1.1787 (0.6332) acc 78.1250 (86.4062) lr 1.2312e-05 eta 0:03:16 +epoch [192/200] batch [25/31] time 0.706 (0.749) data 0.000 (0.035) loss 0.2935 (0.6070) acc 90.6250 (86.5000) lr 1.2312e-05 eta 0:03:10 +epoch [192/200] batch [30/31] time 0.707 (0.743) data 0.000 (0.029) loss 1.1133 (0.5975) acc 84.3750 (87.2917) lr 1.2312e-05 eta 0:03:04 +epoch [193/200] batch [5/31] time 0.713 (0.909) data 0.000 (0.155) loss 0.4404 (0.4575) acc 87.5000 (90.6250) lr 9.9763e-06 eta 0:03:40 +epoch [193/200] batch [10/31] time 0.709 (0.810) data 0.000 (0.078) loss 0.3950 (0.4993) acc 93.7500 (90.0000) lr 9.9763e-06 eta 0:03:12 +epoch [193/200] batch [15/31] time 0.712 (0.779) data 0.000 (0.052) loss 0.4463 (0.5383) acc 90.6250 (88.5417) lr 9.9763e-06 eta 0:03:01 +epoch [193/200] batch [20/31] time 0.706 (0.762) data 0.000 (0.039) loss 0.4087 (0.5476) acc 90.6250 (88.2812) lr 9.9763e-06 eta 0:02:53 +epoch [193/200] batch [25/31] time 0.720 (0.752) data 0.000 (0.031) loss 0.4824 (0.5555) acc 87.5000 (87.8750) lr 9.9763e-06 eta 0:02:47 +epoch [193/200] batch [30/31] time 0.717 (0.745) data 0.000 (0.026) loss 0.5425 (0.5620) acc 90.6250 (88.1250) lr 9.9763e-06 eta 0:02:42 +epoch [194/200] batch [5/31] time 0.712 (0.888) data 0.000 (0.166) loss 0.5293 (0.4829) acc 87.5000 (89.3750) lr 7.8853e-06 eta 0:03:08 +epoch [194/200] batch [10/31] time 0.723 (0.802) data 0.000 (0.083) loss 0.5112 (0.5417) acc 90.6250 (86.8750) lr 7.8853e-06 eta 0:02:46 +epoch [194/200] batch [15/31] time 0.708 (0.774) data 0.000 (0.055) loss 0.6260 (0.5466) acc 84.3750 (87.5000) lr 7.8853e-06 eta 0:02:36 +epoch [194/200] batch [20/31] time 0.727 (0.759) data 0.000 (0.042) loss 0.5225 (0.5578) acc 90.6250 (87.6562) lr 7.8853e-06 eta 0:02:29 +epoch [194/200] batch [25/31] time 0.706 (0.754) data 0.000 (0.033) loss 0.1947 (0.5088) acc 96.8750 (89.0000) lr 7.8853e-06 eta 0:02:24 +epoch [194/200] batch [30/31] time 0.709 (0.747) data 0.000 (0.028) loss 0.8818 (0.5127) acc 78.1250 (88.6458) lr 7.8853e-06 eta 0:02:19 +epoch [195/200] batch [5/31] time 0.713 (0.878) data 0.000 (0.157) loss 0.4041 (0.4655) acc 90.6250 (88.1250) lr 6.0390e-06 eta 0:02:38 +epoch [195/200] batch [10/31] time 0.715 (0.794) data 0.000 (0.079) loss 0.7734 (0.5737) acc 81.2500 (85.9375) lr 6.0390e-06 eta 0:02:19 +epoch [195/200] batch [15/31] time 0.720 (0.768) data 0.001 (0.052) loss 0.8638 (0.6191) acc 84.3750 (85.8333) lr 6.0390e-06 eta 0:02:11 +epoch [195/200] batch [20/31] time 0.706 (0.753) data 0.000 (0.039) loss 0.4734 (0.5753) acc 90.6250 (86.5625) lr 6.0390e-06 eta 0:02:04 +epoch [195/200] batch [25/31] time 0.707 (0.743) data 0.000 (0.032) loss 0.4275 (0.5761) acc 87.5000 (86.5000) lr 6.0390e-06 eta 0:01:59 +epoch [195/200] batch [30/31] time 0.729 (0.738) data 0.000 (0.026) loss 0.7178 (0.5929) acc 81.2500 (85.7292) lr 6.0390e-06 eta 0:01:55 +epoch [196/200] batch [5/31] time 0.711 (0.890) data 0.000 (0.170) loss 0.7964 (0.6464) acc 84.3750 (86.8750) lr 4.4380e-06 eta 0:02:13 +epoch [196/200] batch [10/31] time 0.731 (0.803) data 0.000 (0.085) loss 0.3560 (0.5503) acc 93.7500 (88.4375) lr 4.4380e-06 eta 0:01:56 +epoch [196/200] batch [15/31] time 0.716 (0.772) data 0.000 (0.057) loss 0.9194 (0.5799) acc 87.5000 (88.3333) lr 4.4380e-06 eta 0:01:48 +epoch [196/200] batch [20/31] time 0.710 (0.764) data 0.000 (0.043) loss 0.3303 (0.5499) acc 96.8750 (89.0625) lr 4.4380e-06 eta 0:01:43 +epoch [196/200] batch [25/31] time 0.725 (0.755) data 0.000 (0.034) loss 0.3518 (0.5533) acc 96.8750 (89.0000) lr 4.4380e-06 eta 0:01:38 +epoch [196/200] batch [30/31] time 0.707 (0.747) data 0.000 (0.029) loss 0.6284 (0.5708) acc 90.6250 (88.6458) lr 4.4380e-06 eta 0:01:33 +epoch [197/200] batch [5/31] time 0.718 (0.888) data 0.000 (0.168) loss 0.7339 (0.5548) acc 81.2500 (87.5000) lr 3.0827e-06 eta 0:01:45 +epoch [197/200] batch [10/31] time 0.733 (0.801) data 0.000 (0.084) loss 0.6167 (0.5221) acc 81.2500 (88.1250) lr 3.0827e-06 eta 0:01:31 +epoch [197/200] batch [15/31] time 0.727 (0.772) data 0.000 (0.056) loss 0.4768 (0.5226) acc 84.3750 (88.3333) lr 3.0827e-06 eta 0:01:24 +epoch [197/200] batch [20/31] time 0.713 (0.756) data 0.000 (0.042) loss 0.8550 (0.5702) acc 84.3750 (87.1875) lr 3.0827e-06 eta 0:01:18 +epoch [197/200] batch [25/31] time 0.708 (0.747) data 0.000 (0.034) loss 0.5371 (0.5292) acc 87.5000 (88.6250) lr 3.0827e-06 eta 0:01:13 +epoch [197/200] batch [30/31] time 0.723 (0.742) data 0.000 (0.028) loss 0.5938 (0.5348) acc 90.6250 (88.6458) lr 3.0827e-06 eta 0:01:09 +epoch [198/200] batch [5/31] time 0.718 (0.886) data 0.000 (0.163) loss 0.6973 (0.5996) acc 78.1250 (86.8750) lr 1.9733e-06 eta 0:01:17 +epoch [198/200] batch [10/31] time 0.712 (0.800) data 0.000 (0.082) loss 0.8091 (0.6579) acc 84.3750 (86.2500) lr 1.9733e-06 eta 0:01:06 +epoch [198/200] batch [15/31] time 0.710 (0.770) data 0.000 (0.055) loss 0.5361 (0.6286) acc 87.5000 (87.0833) lr 1.9733e-06 eta 0:01:00 +epoch [198/200] batch [20/31] time 0.723 (0.758) data 0.000 (0.041) loss 0.3972 (0.5813) acc 90.6250 (87.9688) lr 1.9733e-06 eta 0:00:55 +epoch [198/200] batch [25/31] time 0.713 (0.748) data 0.000 (0.033) loss 0.6602 (0.6342) acc 81.2500 (86.8750) lr 1.9733e-06 eta 0:00:50 +epoch [198/200] batch [30/31] time 0.714 (0.743) data 0.000 (0.027) loss 0.5698 (0.6117) acc 84.3750 (86.8750) lr 1.9733e-06 eta 0:00:46 +epoch [199/200] batch [5/31] time 0.717 (0.892) data 0.000 (0.163) loss 0.1622 (0.4615) acc 100.0000 (90.6250) lr 1.1101e-06 eta 0:00:50 +epoch [199/200] batch [10/31] time 0.706 (0.802) data 0.000 (0.082) loss 0.8735 (0.5222) acc 81.2500 (89.3750) lr 1.1101e-06 eta 0:00:41 +epoch [199/200] batch [15/31] time 0.715 (0.781) data 0.000 (0.055) loss 0.8413 (0.5243) acc 87.5000 (89.7917) lr 1.1101e-06 eta 0:00:36 +epoch [199/200] batch [20/31] time 0.701 (0.763) data 0.000 (0.041) loss 0.4297 (0.5435) acc 96.8750 (89.2188) lr 1.1101e-06 eta 0:00:32 +epoch [199/200] batch [25/31] time 0.706 (0.752) data 0.000 (0.033) loss 0.6406 (0.5718) acc 90.6250 (88.5000) lr 1.1101e-06 eta 0:00:27 +epoch [199/200] batch [30/31] time 0.706 (0.746) data 0.000 (0.027) loss 0.1531 (0.5661) acc 100.0000 (88.4375) lr 1.1101e-06 eta 0:00:23 +epoch [200/200] batch [5/31] time 0.709 (0.896) data 0.000 (0.176) loss 0.4402 (0.4069) acc 96.8750 (91.8750) lr 4.9344e-07 eta 0:00:23 +epoch [200/200] batch [10/31] time 0.709 (0.805) data 0.000 (0.088) loss 0.3171 (0.4749) acc 96.8750 (90.9375) lr 4.9344e-07 eta 0:00:16 +epoch [200/200] batch [15/31] time 0.713 (0.774) data 0.000 (0.059) loss 0.5254 (0.5168) acc 90.6250 (90.2083) lr 4.9344e-07 eta 0:00:12 +epoch [200/200] batch [20/31] time 0.714 (0.759) data 0.000 (0.044) loss 0.5879 (0.5126) acc 90.6250 (90.0000) lr 4.9344e-07 eta 0:00:08 +epoch [200/200] batch [25/31] time 0.711 (0.750) data 0.000 (0.035) loss 0.8896 (0.5643) acc 78.1250 (88.2500) lr 4.9344e-07 eta 0:00:04 +epoch [200/200] batch [30/31] time 0.711 (0.743) data 0.000 (0.030) loss 0.8833 (0.5708) acc 84.3750 (88.1250) lr 4.9344e-07 eta 0:00:00 +Checkpoint saved to output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-200 +Finish training +Deploy the last-epoch model +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 25,605 +* accuracy: 51.2% +* error: 48.8% +* macro_f1: 50.0% +Elapsed: 1:20:00 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/checkpoint b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/checkpoint new file mode 100644 index 00000000..b2929f7c --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/checkpoint @@ -0,0 +1 @@ +model.pth.tar-200 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-200 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-200 new file mode 100644 index 00000000..fee77e54 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-200 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1698077718.ckb-gpu-lambda.1396520.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1698077718.ckb-gpu-lambda.1396520.0 new file mode 100644 index 00000000..42a83a3f Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1698077718.ckb-gpu-lambda.1396520.0 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed3/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed3/log.txt new file mode 100644 index 00000000..5689f2c6 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed3/log.txt @@ -0,0 +1,1539 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_b32.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '1'] +output_dir: output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed3 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 3 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 1 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/32 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 200 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 8.4.0-3ubuntu2) 8.4.0 +Clang version: 10.0.0-4ubuntu1 +CMake version: version 3.23.2 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-113-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: Tesla V100-SXM2-32GB +GPU 1: Tesla V100-SXM2-32GB +GPU 2: Tesla V100-SXM2-32GB +GPU 3: Tesla V100-SXM2-32GB + +Nvidia driver version: 510.73.05 +cuDNN version: Probably one of the following: +/usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 +/usr/lib/x86_64-linux-gnu/libcudnn.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.4.1 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 46 bits physical, 48 bits virtual +CPU(s): 64 +On-line CPU(s) list: 0-63 +Thread(s) per core: 2 +Core(s) per socket: 16 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +CPU family: 6 +Model: 85 +Model name: Intel(R) Xeon(R) Gold 6242 CPU @ 2.80GHz +Stepping: 7 +CPU MHz: 1590.721 +CPU max MHz: 3900.0000 +CPU min MHz: 1200.0000 +BogoMIPS: 5600.00 +Virtualization: VT-x +L1d cache: 1 MiB +L1i cache: 1 MiB +L2 cache: 32 MiB +L3 cache: 44 MiB +NUMA node0 CPU(s): 0-15,32-47 +NUMA node1 CPU(s): 16-31,48-63 +Vulnerability Itlb multihit: KVM: Mitigation: Split huge pages +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Mitigation; TSX disabled +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cdp_l3 invpcid_single intel_ppin ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm mpx rdt_a avx512f avx512dq rdseed adx smap clflushopt clwb intel_pt avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts pku ospke avx512_vnni md_clear flush_l1d arch_capabilities + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Loading preprocessed few-shot data from /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_1-seed_3.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 1,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-B/32) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed3/tensorboard) +epoch [1/200] batch [5/31] time 0.709 (1.654) data 0.000 (0.209) loss 3.8047 (3.6379) acc 34.3750 (34.3750) lr 1.0000e-05 eta 2:50:44 +epoch [1/200] batch [10/31] time 0.711 (1.184) data 0.000 (0.105) loss 2.9688 (3.6266) acc 53.1250 (35.6250) lr 1.0000e-05 eta 2:02:09 +epoch [1/200] batch [15/31] time 0.729 (1.028) data 0.001 (0.070) loss 2.9219 (3.4673) acc 43.7500 (37.9167) lr 1.0000e-05 eta 1:45:57 +epoch [1/200] batch [20/31] time 0.716 (0.951) data 0.000 (0.053) loss 2.0703 (3.3063) acc 59.3750 (39.8438) lr 1.0000e-05 eta 1:37:55 +epoch [1/200] batch [25/31] time 0.712 (0.903) data 0.000 (0.042) loss 2.7500 (3.2285) acc 31.2500 (38.1250) lr 1.0000e-05 eta 1:32:54 +epoch [1/200] batch [30/31] time 0.716 (0.871) data 0.000 (0.035) loss 2.1719 (3.1133) acc 43.7500 (39.7917) lr 1.0000e-05 eta 1:29:33 +epoch [2/200] batch [5/31] time 0.707 (0.893) data 0.000 (0.169) loss 2.0625 (2.5371) acc 59.3750 (48.1250) lr 2.0000e-03 eta 1:31:44 +epoch [2/200] batch [10/31] time 0.724 (0.805) data 0.001 (0.085) loss 1.9395 (2.3554) acc 53.1250 (50.6250) lr 2.0000e-03 eta 1:22:36 +epoch [2/200] batch [15/31] time 0.716 (0.773) data 0.000 (0.057) loss 1.4062 (2.2290) acc 65.6250 (51.4583) lr 2.0000e-03 eta 1:19:19 +epoch [2/200] batch [20/31] time 0.719 (0.758) data 0.000 (0.043) loss 1.9258 (2.2071) acc 53.1250 (50.7812) lr 2.0000e-03 eta 1:17:43 +epoch [2/200] batch [25/31] time 0.709 (0.750) data 0.000 (0.034) loss 2.2695 (2.1543) acc 53.1250 (52.2500) lr 2.0000e-03 eta 1:16:45 +epoch [2/200] batch [30/31] time 0.715 (0.744) data 0.000 (0.028) loss 2.0996 (2.1356) acc 43.7500 (51.3542) lr 2.0000e-03 eta 1:16:05 +epoch [3/200] batch [5/31] time 0.711 (0.890) data 0.000 (0.165) loss 2.0098 (2.1277) acc 50.0000 (53.1250) lr 1.9999e-03 eta 1:30:56 +epoch [3/200] batch [10/31] time 0.727 (0.804) data 0.001 (0.083) loss 2.2578 (2.0598) acc 46.8750 (54.6875) lr 1.9999e-03 eta 1:22:06 +epoch [3/200] batch [15/31] time 0.713 (0.774) data 0.000 (0.055) loss 1.7539 (2.0201) acc 56.2500 (55.0000) lr 1.9999e-03 eta 1:18:58 +epoch [3/200] batch [20/31] time 0.705 (0.758) data 0.000 (0.041) loss 1.1543 (1.9505) acc 68.7500 (55.7812) lr 1.9999e-03 eta 1:17:17 +epoch [3/200] batch [25/31] time 0.725 (0.749) data 0.000 (0.033) loss 1.8516 (1.9237) acc 53.1250 (56.5000) lr 1.9999e-03 eta 1:16:16 +epoch [3/200] batch [30/31] time 0.726 (0.743) data 0.000 (0.028) loss 1.8340 (1.9159) acc 53.1250 (55.7292) lr 1.9999e-03 eta 1:15:37 +epoch [4/200] batch [5/31] time 0.709 (0.896) data 0.001 (0.176) loss 1.6562 (1.5807) acc 53.1250 (61.2500) lr 1.9995e-03 eta 1:31:06 +epoch [4/200] batch [10/31] time 0.706 (0.804) data 0.000 (0.088) loss 1.5430 (1.7313) acc 65.6250 (58.7500) lr 1.9995e-03 eta 1:21:40 +epoch [4/200] batch [15/31] time 0.709 (0.773) data 0.000 (0.059) loss 1.2949 (1.7861) acc 65.6250 (55.8333) lr 1.9995e-03 eta 1:18:28 +epoch [4/200] batch [20/31] time 0.717 (0.757) data 0.000 (0.044) loss 1.5830 (1.7467) acc 56.2500 (56.0938) lr 1.9995e-03 eta 1:16:47 +epoch [4/200] batch [25/31] time 0.726 (0.748) data 0.000 (0.036) loss 2.2402 (1.8125) acc 56.2500 (55.3750) lr 1.9995e-03 eta 1:15:50 +epoch [4/200] batch [30/31] time 0.710 (0.743) data 0.000 (0.030) loss 1.4385 (1.7637) acc 78.1250 (56.3542) lr 1.9995e-03 eta 1:15:15 +epoch [5/200] batch [5/31] time 0.723 (0.897) data 0.000 (0.179) loss 1.4658 (1.7893) acc 62.5000 (61.8750) lr 1.9989e-03 eta 1:30:45 +epoch [5/200] batch [10/31] time 0.713 (0.804) data 0.000 (0.090) loss 1.7021 (1.7466) acc 56.2500 (60.3125) lr 1.9989e-03 eta 1:21:15 +epoch [5/200] batch [15/31] time 0.710 (0.773) data 0.000 (0.060) loss 1.8281 (1.8020) acc 50.0000 (57.7083) lr 1.9989e-03 eta 1:18:05 +epoch [5/200] batch [20/31] time 0.712 (0.758) data 0.000 (0.045) loss 1.1445 (1.8159) acc 78.1250 (57.3438) lr 1.9989e-03 eta 1:16:28 +epoch [5/200] batch [25/31] time 0.703 (0.748) data 0.000 (0.036) loss 1.5664 (1.8088) acc 53.1250 (56.2500) lr 1.9989e-03 eta 1:15:24 +epoch [5/200] batch [30/31] time 0.727 (0.742) data 0.000 (0.030) loss 1.5967 (1.7736) acc 65.6250 (57.0833) lr 1.9989e-03 eta 1:14:45 +epoch [6/200] batch [5/31] time 0.719 (0.905) data 0.000 (0.173) loss 2.1777 (1.8941) acc 53.1250 (54.3750) lr 1.9980e-03 eta 1:31:03 +epoch [6/200] batch [10/31] time 0.709 (0.809) data 0.000 (0.087) loss 1.5176 (1.8242) acc 46.8750 (53.4375) lr 1.9980e-03 eta 1:21:25 +epoch [6/200] batch [15/31] time 0.705 (0.777) data 0.000 (0.058) loss 2.5938 (1.8923) acc 50.0000 (53.9583) lr 1.9980e-03 eta 1:18:03 +epoch [6/200] batch [20/31] time 0.703 (0.759) data 0.000 (0.044) loss 1.7578 (1.8480) acc 62.5000 (55.0000) lr 1.9980e-03 eta 1:16:13 +epoch [6/200] batch [25/31] time 0.709 (0.750) data 0.000 (0.035) loss 1.7266 (1.7841) acc 65.6250 (56.7500) lr 1.9980e-03 eta 1:15:16 +epoch [6/200] batch [30/31] time 0.704 (0.743) data 0.000 (0.029) loss 1.8564 (1.7594) acc 53.1250 (57.3958) lr 1.9980e-03 eta 1:14:30 +epoch [7/200] batch [5/31] time 0.728 (0.887) data 0.000 (0.159) loss 1.8535 (1.7396) acc 56.2500 (61.2500) lr 1.9969e-03 eta 1:28:49 +epoch [7/200] batch [10/31] time 0.725 (0.800) data 0.001 (0.080) loss 2.1523 (1.7740) acc 46.8750 (56.2500) lr 1.9969e-03 eta 1:20:04 +epoch [7/200] batch [15/31] time 0.716 (0.771) data 0.000 (0.053) loss 1.6943 (1.8325) acc 50.0000 (54.7917) lr 1.9969e-03 eta 1:17:03 +epoch [7/200] batch [20/31] time 0.710 (0.756) data 0.000 (0.040) loss 0.9897 (1.8052) acc 71.8750 (55.3125) lr 1.9969e-03 eta 1:15:30 +epoch [7/200] batch [25/31] time 0.712 (0.747) data 0.000 (0.032) loss 1.7158 (1.8285) acc 50.0000 (55.2500) lr 1.9969e-03 eta 1:14:34 +epoch [7/200] batch [30/31] time 0.708 (0.741) data 0.000 (0.027) loss 2.7559 (1.8856) acc 43.7500 (55.0000) lr 1.9969e-03 eta 1:13:55 +epoch [8/200] batch [5/31] time 0.712 (0.892) data 0.000 (0.167) loss 2.5781 (2.0729) acc 43.7500 (51.2500) lr 1.9956e-03 eta 1:28:52 +epoch [8/200] batch [10/31] time 0.718 (0.805) data 0.000 (0.084) loss 1.4668 (1.8531) acc 62.5000 (55.3125) lr 1.9956e-03 eta 1:20:09 +epoch [8/200] batch [15/31] time 0.719 (0.775) data 0.001 (0.056) loss 1.5166 (1.8248) acc 65.6250 (55.2083) lr 1.9956e-03 eta 1:17:08 +epoch [8/200] batch [20/31] time 0.723 (0.760) data 0.000 (0.042) loss 1.5000 (1.8031) acc 68.7500 (56.4062) lr 1.9956e-03 eta 1:15:32 +epoch [8/200] batch [25/31] time 0.708 (0.750) data 0.000 (0.034) loss 1.2197 (1.7323) acc 71.8750 (57.1250) lr 1.9956e-03 eta 1:14:28 +epoch [8/200] batch [30/31] time 0.724 (0.750) data 0.000 (0.028) loss 1.7861 (1.6822) acc 65.6250 (58.6458) lr 1.9956e-03 eta 1:14:23 +epoch [9/200] batch [5/31] time 0.726 (0.902) data 0.000 (0.176) loss 1.8379 (1.9916) acc 50.0000 (53.1250) lr 1.9940e-03 eta 1:29:26 +epoch [9/200] batch [10/31] time 0.711 (0.808) data 0.000 (0.088) loss 1.4307 (1.8373) acc 56.2500 (56.2500) lr 1.9940e-03 eta 1:20:00 +epoch [9/200] batch [15/31] time 0.715 (0.775) data 0.000 (0.059) loss 2.0996 (1.8068) acc 40.6250 (54.5833) lr 1.9940e-03 eta 1:16:43 +epoch [9/200] batch [20/31] time 0.705 (0.759) data 0.000 (0.044) loss 2.5938 (1.7920) acc 43.7500 (55.4688) lr 1.9940e-03 eta 1:15:01 +epoch [9/200] batch [25/31] time 0.709 (0.749) data 0.000 (0.035) loss 1.5518 (1.7948) acc 62.5000 (56.2500) lr 1.9940e-03 eta 1:13:58 +epoch [9/200] batch [30/31] time 0.711 (0.743) data 0.000 (0.030) loss 1.2998 (1.7383) acc 65.6250 (57.1875) lr 1.9940e-03 eta 1:13:22 +epoch [10/200] batch [5/31] time 0.711 (0.914) data 0.000 (0.188) loss 1.5527 (1.7641) acc 59.3750 (58.1250) lr 1.9921e-03 eta 1:30:08 +epoch [10/200] batch [10/31] time 0.714 (0.814) data 0.000 (0.094) loss 1.8535 (1.8920) acc 56.2500 (56.5625) lr 1.9921e-03 eta 1:20:14 +epoch [10/200] batch [15/31] time 0.716 (0.780) data 0.000 (0.063) loss 1.5098 (1.8217) acc 59.3750 (56.4583) lr 1.9921e-03 eta 1:16:48 +epoch [10/200] batch [20/31] time 0.713 (0.763) data 0.000 (0.047) loss 1.4785 (1.8319) acc 59.3750 (56.0938) lr 1.9921e-03 eta 1:15:01 +epoch [10/200] batch [25/31] time 0.710 (0.752) data 0.000 (0.038) loss 2.0527 (1.7996) acc 56.2500 (56.3750) lr 1.9921e-03 eta 1:13:52 +epoch [10/200] batch [30/31] time 0.725 (0.746) data 0.000 (0.032) loss 1.8027 (1.7868) acc 53.1250 (55.9375) lr 1.9921e-03 eta 1:13:13 +epoch [11/200] batch [5/31] time 0.714 (0.902) data 0.000 (0.171) loss 1.4980 (1.8707) acc 62.5000 (56.2500) lr 1.9900e-03 eta 1:28:25 +epoch [11/200] batch [10/31] time 0.714 (0.807) data 0.000 (0.086) loss 1.5693 (1.7632) acc 65.6250 (59.3750) lr 1.9900e-03 eta 1:19:02 +epoch [11/200] batch [15/31] time 0.711 (0.775) data 0.000 (0.057) loss 1.3018 (1.6317) acc 59.3750 (61.2500) lr 1.9900e-03 eta 1:15:51 +epoch [11/200] batch [20/31] time 0.721 (0.759) data 0.000 (0.043) loss 1.5176 (1.6302) acc 62.5000 (60.3125) lr 1.9900e-03 eta 1:14:13 +epoch [11/200] batch [25/31] time 0.709 (0.749) data 0.000 (0.034) loss 1.1523 (1.6337) acc 75.0000 (61.0000) lr 1.9900e-03 eta 1:13:14 +epoch [11/200] batch [30/31] time 0.704 (0.742) data 0.000 (0.029) loss 2.3574 (1.6860) acc 43.7500 (59.2708) lr 1.9900e-03 eta 1:12:28 +epoch [12/200] batch [5/31] time 0.709 (0.910) data 0.000 (0.179) loss 1.6855 (1.6955) acc 50.0000 (56.8750) lr 1.9877e-03 eta 1:28:44 +epoch [12/200] batch [10/31] time 0.726 (0.827) data 0.000 (0.090) loss 1.3057 (1.5716) acc 65.6250 (59.0625) lr 1.9877e-03 eta 1:20:36 +epoch [12/200] batch [15/31] time 0.716 (0.790) data 0.000 (0.060) loss 1.2666 (1.5922) acc 71.8750 (59.5833) lr 1.9877e-03 eta 1:16:56 +epoch [12/200] batch [20/31] time 0.710 (0.771) data 0.000 (0.045) loss 1.4961 (1.6080) acc 53.1250 (59.2188) lr 1.9877e-03 eta 1:14:59 +epoch [12/200] batch [25/31] time 0.713 (0.760) data 0.000 (0.036) loss 2.2773 (1.6615) acc 56.2500 (58.0000) lr 1.9877e-03 eta 1:13:51 +epoch [12/200] batch [30/31] time 0.727 (0.753) data 0.001 (0.030) loss 2.1875 (1.6562) acc 50.0000 (57.8125) lr 1.9877e-03 eta 1:13:06 +epoch [13/200] batch [5/31] time 0.710 (0.891) data 0.000 (0.175) loss 1.3691 (1.6475) acc 53.1250 (55.6250) lr 1.9851e-03 eta 1:26:28 +epoch [13/200] batch [10/31] time 0.706 (0.800) data 0.000 (0.088) loss 1.7178 (1.6712) acc 56.2500 (58.4375) lr 1.9851e-03 eta 1:17:35 +epoch [13/200] batch [15/31] time 0.716 (0.771) data 0.000 (0.059) loss 2.4395 (1.7760) acc 53.1250 (57.0833) lr 1.9851e-03 eta 1:14:41 +epoch [13/200] batch [20/31] time 0.707 (0.756) data 0.000 (0.044) loss 1.6436 (1.6654) acc 53.1250 (59.5312) lr 1.9851e-03 eta 1:13:09 +epoch [13/200] batch [25/31] time 0.725 (0.747) data 0.000 (0.035) loss 1.9404 (1.6903) acc 59.3750 (59.2500) lr 1.9851e-03 eta 1:12:17 +epoch [13/200] batch [30/31] time 0.710 (0.741) data 0.000 (0.030) loss 1.2393 (1.7023) acc 71.8750 (58.3333) lr 1.9851e-03 eta 1:11:35 +epoch [14/200] batch [5/31] time 0.708 (0.879) data 0.000 (0.158) loss 1.2393 (1.4477) acc 68.7500 (63.7500) lr 1.9823e-03 eta 1:24:53 +epoch [14/200] batch [10/31] time 0.706 (0.796) data 0.000 (0.079) loss 1.8350 (1.6563) acc 59.3750 (59.6875) lr 1.9823e-03 eta 1:16:48 +epoch [14/200] batch [15/31] time 0.709 (0.768) data 0.001 (0.053) loss 1.0703 (1.6360) acc 75.0000 (60.2083) lr 1.9823e-03 eta 1:13:59 +epoch [14/200] batch [20/31] time 0.710 (0.753) data 0.000 (0.040) loss 1.4248 (1.6474) acc 65.6250 (60.4688) lr 1.9823e-03 eta 1:12:32 +epoch [14/200] batch [25/31] time 0.710 (0.745) data 0.000 (0.032) loss 1.2930 (1.5828) acc 62.5000 (62.0000) lr 1.9823e-03 eta 1:11:41 +epoch [14/200] batch [30/31] time 0.706 (0.739) data 0.000 (0.027) loss 1.3760 (1.6078) acc 68.7500 (61.5625) lr 1.9823e-03 eta 1:11:02 +epoch [15/200] batch [5/31] time 0.708 (0.919) data 0.000 (0.171) loss 1.9395 (1.6592) acc 43.7500 (54.3750) lr 1.9792e-03 eta 1:28:13 +epoch [15/200] batch [10/31] time 0.710 (0.814) data 0.000 (0.086) loss 1.2656 (1.6342) acc 62.5000 (56.8750) lr 1.9792e-03 eta 1:18:04 +epoch [15/200] batch [15/31] time 0.719 (0.780) data 0.000 (0.057) loss 1.3818 (1.5405) acc 75.0000 (60.2083) lr 1.9792e-03 eta 1:14:45 +epoch [15/200] batch [20/31] time 0.709 (0.762) data 0.000 (0.043) loss 1.7295 (1.5274) acc 59.3750 (61.4062) lr 1.9792e-03 eta 1:12:57 +epoch [15/200] batch [25/31] time 0.704 (0.751) data 0.000 (0.034) loss 1.7002 (1.5720) acc 68.7500 (60.7500) lr 1.9792e-03 eta 1:11:49 +epoch [15/200] batch [30/31] time 0.708 (0.743) data 0.000 (0.029) loss 2.2090 (1.5871) acc 46.8750 (60.3125) lr 1.9792e-03 eta 1:11:04 +epoch [16/200] batch [5/31] time 0.717 (0.905) data 0.000 (0.182) loss 1.4619 (1.6145) acc 65.6250 (60.6250) lr 1.9759e-03 eta 1:26:25 +epoch [16/200] batch [10/31] time 0.718 (0.810) data 0.000 (0.091) loss 1.4346 (1.6315) acc 59.3750 (60.9375) lr 1.9759e-03 eta 1:17:15 +epoch [16/200] batch [15/31] time 0.702 (0.779) data 0.000 (0.061) loss 1.6279 (1.5653) acc 59.3750 (60.8333) lr 1.9759e-03 eta 1:14:13 +epoch [16/200] batch [20/31] time 0.727 (0.762) data 0.000 (0.046) loss 1.4170 (1.5965) acc 62.5000 (59.5312) lr 1.9759e-03 eta 1:12:36 +epoch [16/200] batch [25/31] time 0.709 (0.758) data 0.000 (0.037) loss 1.4219 (1.5759) acc 71.8750 (60.6250) lr 1.9759e-03 eta 1:12:07 +epoch [16/200] batch [30/31] time 0.708 (0.750) data 0.000 (0.031) loss 1.7422 (1.6062) acc 59.3750 (60.5208) lr 1.9759e-03 eta 1:11:21 +epoch [17/200] batch [5/31] time 0.714 (0.899) data 0.000 (0.176) loss 1.1543 (1.6408) acc 68.7500 (60.6250) lr 1.9724e-03 eta 1:25:23 +epoch [17/200] batch [10/31] time 0.712 (0.807) data 0.000 (0.088) loss 1.3730 (1.6330) acc 65.6250 (63.1250) lr 1.9724e-03 eta 1:16:33 +epoch [17/200] batch [15/31] time 0.707 (0.775) data 0.000 (0.059) loss 1.3711 (1.6711) acc 59.3750 (60.6250) lr 1.9724e-03 eta 1:13:28 +epoch [17/200] batch [20/31] time 0.708 (0.759) data 0.000 (0.044) loss 1.4805 (1.6729) acc 71.8750 (61.4062) lr 1.9724e-03 eta 1:11:54 +epoch [17/200] batch [25/31] time 0.707 (0.749) data 0.000 (0.035) loss 1.8496 (1.6331) acc 65.6250 (61.6250) lr 1.9724e-03 eta 1:10:53 +epoch [17/200] batch [30/31] time 0.709 (0.743) data 0.000 (0.030) loss 1.1377 (1.6218) acc 75.0000 (61.6667) lr 1.9724e-03 eta 1:10:15 +epoch [18/200] batch [5/31] time 0.715 (0.938) data 0.000 (0.214) loss 1.3115 (1.6750) acc 59.3750 (56.8750) lr 1.9686e-03 eta 1:28:35 +epoch [18/200] batch [10/31] time 0.711 (0.826) data 0.000 (0.107) loss 1.0293 (1.5705) acc 71.8750 (59.6875) lr 1.9686e-03 eta 1:17:55 +epoch [18/200] batch [15/31] time 0.715 (0.789) data 0.000 (0.072) loss 1.7881 (1.6779) acc 50.0000 (58.3333) lr 1.9686e-03 eta 1:14:22 +epoch [18/200] batch [20/31] time 0.707 (0.775) data 0.000 (0.054) loss 1.1055 (1.5758) acc 71.8750 (60.4688) lr 1.9686e-03 eta 1:13:03 +epoch [18/200] batch [25/31] time 0.718 (0.763) data 0.000 (0.043) loss 1.8184 (1.5340) acc 46.8750 (61.6250) lr 1.9686e-03 eta 1:11:48 +epoch [18/200] batch [30/31] time 0.706 (0.754) data 0.000 (0.036) loss 1.5215 (1.5260) acc 62.5000 (62.1875) lr 1.9686e-03 eta 1:10:54 +epoch [19/200] batch [5/31] time 0.711 (0.898) data 0.000 (0.172) loss 1.6133 (1.5410) acc 56.2500 (64.3750) lr 1.9646e-03 eta 1:24:19 +epoch [19/200] batch [10/31] time 0.712 (0.806) data 0.000 (0.086) loss 2.0273 (1.6455) acc 53.1250 (59.6875) lr 1.9646e-03 eta 1:15:37 +epoch [19/200] batch [15/31] time 0.712 (0.775) data 0.000 (0.058) loss 1.8350 (1.6132) acc 56.2500 (60.2083) lr 1.9646e-03 eta 1:12:42 +epoch [19/200] batch [20/31] time 0.709 (0.760) data 0.000 (0.043) loss 1.4590 (1.6078) acc 65.6250 (60.7812) lr 1.9646e-03 eta 1:11:12 +epoch [19/200] batch [25/31] time 0.714 (0.750) data 0.000 (0.035) loss 1.3887 (1.6261) acc 62.5000 (60.1250) lr 1.9646e-03 eta 1:10:14 +epoch [19/200] batch [30/31] time 0.719 (0.744) data 0.000 (0.029) loss 1.6357 (1.5813) acc 59.3750 (61.7708) lr 1.9646e-03 eta 1:09:36 +epoch [20/200] batch [5/31] time 0.723 (0.908) data 0.000 (0.184) loss 1.4756 (1.6254) acc 65.6250 (60.6250) lr 1.9603e-03 eta 1:24:52 +epoch [20/200] batch [10/31] time 0.709 (0.811) data 0.000 (0.092) loss 1.3955 (1.6717) acc 62.5000 (59.3750) lr 1.9603e-03 eta 1:15:44 +epoch [20/200] batch [15/31] time 0.716 (0.779) data 0.000 (0.062) loss 1.4043 (1.6029) acc 65.6250 (60.6250) lr 1.9603e-03 eta 1:12:39 +epoch [20/200] batch [20/31] time 0.722 (0.762) data 0.000 (0.046) loss 1.6367 (1.6331) acc 62.5000 (59.6875) lr 1.9603e-03 eta 1:11:02 +epoch [20/200] batch [25/31] time 0.711 (0.753) data 0.000 (0.037) loss 2.2227 (1.6453) acc 43.7500 (59.3750) lr 1.9603e-03 eta 1:10:05 +epoch [20/200] batch [30/31] time 0.707 (0.746) data 0.000 (0.031) loss 1.9463 (1.6697) acc 62.5000 (59.2708) lr 1.9603e-03 eta 1:09:25 +epoch [21/200] batch [5/31] time 0.709 (0.898) data 0.000 (0.176) loss 2.6191 (1.6785) acc 50.0000 (58.7500) lr 1.9558e-03 eta 1:23:28 +epoch [21/200] batch [10/31] time 0.713 (0.822) data 0.000 (0.088) loss 1.1572 (1.7097) acc 68.7500 (59.0625) lr 1.9558e-03 eta 1:16:16 +epoch [21/200] batch [15/31] time 0.724 (0.786) data 0.000 (0.059) loss 1.6855 (1.6930) acc 62.5000 (58.7500) lr 1.9558e-03 eta 1:12:53 +epoch [21/200] batch [20/31] time 0.709 (0.767) data 0.000 (0.044) loss 1.7627 (1.6282) acc 56.2500 (60.3125) lr 1.9558e-03 eta 1:11:06 +epoch [21/200] batch [25/31] time 0.708 (0.756) data 0.000 (0.036) loss 1.4521 (1.6083) acc 68.7500 (61.3750) lr 1.9558e-03 eta 1:09:58 +epoch [21/200] batch [30/31] time 0.708 (0.749) data 0.000 (0.030) loss 1.5371 (1.6189) acc 59.3750 (60.4167) lr 1.9558e-03 eta 1:09:15 +epoch [22/200] batch [5/31] time 0.726 (0.930) data 0.000 (0.204) loss 1.7803 (1.8082) acc 56.2500 (58.7500) lr 1.9511e-03 eta 1:25:58 +epoch [22/200] batch [10/31] time 0.708 (0.821) data 0.000 (0.102) loss 2.0586 (1.6085) acc 46.8750 (60.3125) lr 1.9511e-03 eta 1:15:45 +epoch [22/200] batch [15/31] time 0.708 (0.786) data 0.000 (0.068) loss 1.1543 (1.5619) acc 65.6250 (61.0417) lr 1.9511e-03 eta 1:12:28 +epoch [22/200] batch [20/31] time 0.720 (0.767) data 0.000 (0.051) loss 1.7148 (1.5820) acc 53.1250 (61.2500) lr 1.9511e-03 eta 1:10:43 +epoch [22/200] batch [25/31] time 0.710 (0.756) data 0.000 (0.041) loss 2.0762 (1.6579) acc 56.2500 (59.7500) lr 1.9511e-03 eta 1:09:37 +epoch [22/200] batch [30/31] time 0.730 (0.749) data 0.000 (0.034) loss 1.8047 (1.6624) acc 68.7500 (60.1042) lr 1.9511e-03 eta 1:08:54 +epoch [23/200] batch [5/31] time 0.721 (0.891) data 0.000 (0.165) loss 1.1797 (1.4531) acc 65.6250 (65.0000) lr 1.9461e-03 eta 1:21:52 +epoch [23/200] batch [10/31] time 0.702 (0.802) data 0.000 (0.083) loss 1.8398 (1.5933) acc 68.7500 (60.9375) lr 1.9461e-03 eta 1:13:37 +epoch [23/200] batch [15/31] time 0.710 (0.773) data 0.000 (0.055) loss 1.5977 (1.5777) acc 59.3750 (61.8750) lr 1.9461e-03 eta 1:10:54 +epoch [23/200] batch [20/31] time 0.701 (0.757) data 0.000 (0.041) loss 1.5234 (1.5854) acc 59.3750 (61.0938) lr 1.9461e-03 eta 1:09:19 +epoch [23/200] batch [25/31] time 0.708 (0.747) data 0.000 (0.033) loss 1.0410 (1.5160) acc 62.5000 (62.2500) lr 1.9461e-03 eta 1:08:25 +epoch [23/200] batch [30/31] time 0.708 (0.742) data 0.000 (0.028) loss 1.7539 (1.5126) acc 56.2500 (62.2917) lr 1.9461e-03 eta 1:07:50 +epoch [24/200] batch [5/31] time 0.715 (0.896) data 0.001 (0.171) loss 1.2979 (1.8410) acc 75.0000 (56.8750) lr 1.9409e-03 eta 1:21:52 +epoch [24/200] batch [10/31] time 0.710 (0.806) data 0.000 (0.086) loss 0.8516 (1.6808) acc 71.8750 (60.0000) lr 1.9409e-03 eta 1:13:36 +epoch [24/200] batch [15/31] time 0.712 (0.776) data 0.000 (0.057) loss 1.9893 (1.6862) acc 53.1250 (60.6250) lr 1.9409e-03 eta 1:10:47 +epoch [24/200] batch [20/31] time 0.719 (0.760) data 0.000 (0.043) loss 1.5117 (1.6150) acc 68.7500 (61.5625) lr 1.9409e-03 eta 1:09:14 +epoch [24/200] batch [25/31] time 0.720 (0.750) data 0.000 (0.034) loss 2.4199 (1.6154) acc 43.7500 (61.5000) lr 1.9409e-03 eta 1:08:18 +epoch [24/200] batch [30/31] time 0.711 (0.745) data 0.000 (0.029) loss 1.4199 (1.6281) acc 56.2500 (61.6667) lr 1.9409e-03 eta 1:07:43 +epoch [25/200] batch [5/31] time 0.725 (0.910) data 0.000 (0.185) loss 2.1660 (1.6475) acc 59.3750 (61.8750) lr 1.9354e-03 eta 1:22:41 +epoch [25/200] batch [10/31] time 0.714 (0.812) data 0.001 (0.093) loss 1.2109 (1.4724) acc 65.6250 (63.7500) lr 1.9354e-03 eta 1:13:41 +epoch [25/200] batch [15/31] time 0.718 (0.780) data 0.001 (0.062) loss 1.3203 (1.4885) acc 62.5000 (63.3333) lr 1.9354e-03 eta 1:10:43 +epoch [25/200] batch [20/31] time 0.712 (0.763) data 0.000 (0.047) loss 1.0986 (1.4538) acc 75.0000 (65.0000) lr 1.9354e-03 eta 1:09:07 +epoch [25/200] batch [25/31] time 0.706 (0.752) data 0.000 (0.037) loss 1.3779 (1.4501) acc 59.3750 (64.7500) lr 1.9354e-03 eta 1:08:05 +epoch [25/200] batch [30/31] time 0.714 (0.745) data 0.000 (0.031) loss 1.9180 (1.4219) acc 62.5000 (65.5208) lr 1.9354e-03 eta 1:07:22 +epoch [26/200] batch [5/31] time 0.727 (0.908) data 0.001 (0.176) loss 1.8496 (1.5141) acc 53.1250 (62.5000) lr 1.9298e-03 eta 1:22:00 +epoch [26/200] batch [10/31] time 0.711 (0.812) data 0.000 (0.088) loss 1.2754 (1.4520) acc 65.6250 (64.6875) lr 1.9298e-03 eta 1:13:16 +epoch [26/200] batch [15/31] time 0.714 (0.787) data 0.000 (0.059) loss 1.3545 (1.4771) acc 75.0000 (65.4167) lr 1.9298e-03 eta 1:10:59 +epoch [26/200] batch [20/31] time 0.707 (0.768) data 0.001 (0.044) loss 1.1719 (1.4102) acc 68.7500 (66.5625) lr 1.9298e-03 eta 1:09:12 +epoch [26/200] batch [25/31] time 0.703 (0.756) data 0.000 (0.035) loss 1.7588 (1.4304) acc 62.5000 (66.1250) lr 1.9298e-03 eta 1:08:04 +epoch [26/200] batch [30/31] time 0.711 (0.749) data 0.000 (0.030) loss 1.7705 (1.4262) acc 62.5000 (66.0417) lr 1.9298e-03 eta 1:07:18 +epoch [27/200] batch [5/31] time 0.711 (0.899) data 0.000 (0.170) loss 1.8711 (1.4176) acc 53.1250 (63.7500) lr 1.9239e-03 eta 1:20:45 +epoch [27/200] batch [10/31] time 0.712 (0.805) data 0.000 (0.085) loss 1.9883 (1.5915) acc 62.5000 (62.5000) lr 1.9239e-03 eta 1:12:14 +epoch [27/200] batch [15/31] time 0.713 (0.776) data 0.000 (0.057) loss 1.5889 (1.5369) acc 62.5000 (63.7500) lr 1.9239e-03 eta 1:09:33 +epoch [27/200] batch [20/31] time 0.711 (0.760) data 0.000 (0.043) loss 1.1221 (1.5687) acc 68.7500 (62.3438) lr 1.9239e-03 eta 1:08:03 +epoch [27/200] batch [25/31] time 0.710 (0.750) data 0.000 (0.034) loss 1.4814 (1.5453) acc 59.3750 (63.1250) lr 1.9239e-03 eta 1:07:07 +epoch [27/200] batch [30/31] time 0.709 (0.744) data 0.000 (0.029) loss 2.0332 (1.5292) acc 56.2500 (64.0625) lr 1.9239e-03 eta 1:06:30 +epoch [28/200] batch [5/31] time 0.707 (0.923) data 0.000 (0.167) loss 0.8174 (1.3941) acc 78.1250 (57.5000) lr 1.9178e-03 eta 1:22:24 +epoch [28/200] batch [10/31] time 0.720 (0.816) data 0.000 (0.084) loss 0.9771 (1.3794) acc 71.8750 (62.5000) lr 1.9178e-03 eta 1:12:48 +epoch [28/200] batch [15/31] time 0.703 (0.780) data 0.000 (0.056) loss 1.5088 (1.4621) acc 68.7500 (62.9167) lr 1.9178e-03 eta 1:09:30 +epoch [28/200] batch [20/31] time 0.700 (0.761) data 0.000 (0.042) loss 1.5986 (1.4273) acc 53.1250 (63.5938) lr 1.9178e-03 eta 1:07:45 +epoch [28/200] batch [25/31] time 0.704 (0.750) data 0.000 (0.034) loss 1.6172 (1.4335) acc 53.1250 (63.5000) lr 1.9178e-03 eta 1:06:43 +epoch [28/200] batch [30/31] time 0.706 (0.743) data 0.000 (0.028) loss 1.2891 (1.4090) acc 68.7500 (63.9583) lr 1.9178e-03 eta 1:06:02 +epoch [29/200] batch [5/31] time 0.710 (0.899) data 0.000 (0.176) loss 1.5146 (1.4457) acc 68.7500 (67.5000) lr 1.9114e-03 eta 1:19:50 +epoch [29/200] batch [10/31] time 0.735 (0.807) data 0.000 (0.088) loss 1.2363 (1.4146) acc 56.2500 (67.1875) lr 1.9114e-03 eta 1:11:37 +epoch [29/200] batch [15/31] time 0.712 (0.777) data 0.000 (0.059) loss 1.4961 (1.4114) acc 75.0000 (66.4583) lr 1.9114e-03 eta 1:08:48 +epoch [29/200] batch [20/31] time 0.714 (0.761) data 0.000 (0.044) loss 2.0039 (1.4705) acc 56.2500 (65.3125) lr 1.9114e-03 eta 1:07:19 +epoch [29/200] batch [25/31] time 0.708 (0.751) data 0.000 (0.036) loss 2.1445 (1.4887) acc 56.2500 (65.0000) lr 1.9114e-03 eta 1:06:26 +epoch [29/200] batch [30/31] time 0.714 (0.744) data 0.000 (0.030) loss 1.1572 (1.4825) acc 78.1250 (64.6875) lr 1.9114e-03 eta 1:05:47 +epoch [30/200] batch [5/31] time 0.703 (0.895) data 0.000 (0.175) loss 1.1729 (1.4143) acc 59.3750 (64.3750) lr 1.9048e-03 eta 1:18:59 +epoch [30/200] batch [10/31] time 0.714 (0.822) data 0.000 (0.088) loss 1.8906 (1.5108) acc 53.1250 (62.1875) lr 1.9048e-03 eta 1:12:26 +epoch [30/200] batch [15/31] time 0.717 (0.786) data 0.000 (0.058) loss 1.5479 (1.5159) acc 56.2500 (63.9583) lr 1.9048e-03 eta 1:09:12 +epoch [30/200] batch [20/31] time 0.710 (0.767) data 0.000 (0.044) loss 1.5361 (1.4953) acc 59.3750 (63.2812) lr 1.9048e-03 eta 1:07:30 +epoch [30/200] batch [25/31] time 0.710 (0.755) data 0.000 (0.035) loss 1.4824 (1.4379) acc 65.6250 (65.0000) lr 1.9048e-03 eta 1:06:22 +epoch [30/200] batch [30/31] time 0.708 (0.747) data 0.000 (0.029) loss 1.2314 (1.4636) acc 62.5000 (63.7500) lr 1.9048e-03 eta 1:05:35 +epoch [31/200] batch [5/31] time 0.715 (0.891) data 0.000 (0.169) loss 1.1211 (1.3624) acc 62.5000 (61.2500) lr 1.8980e-03 eta 1:18:12 +epoch [31/200] batch [10/31] time 0.711 (0.801) data 0.000 (0.084) loss 1.3213 (1.2927) acc 65.6250 (65.6250) lr 1.8980e-03 eta 1:10:15 +epoch [31/200] batch [15/31] time 0.720 (0.773) data 0.000 (0.056) loss 1.2451 (1.4194) acc 62.5000 (63.7500) lr 1.8980e-03 eta 1:07:40 +epoch [31/200] batch [20/31] time 0.710 (0.758) data 0.000 (0.042) loss 1.3242 (1.4593) acc 65.6250 (62.8125) lr 1.8980e-03 eta 1:06:18 +epoch [31/200] batch [25/31] time 0.710 (0.748) data 0.000 (0.034) loss 1.2988 (1.4593) acc 65.6250 (63.1250) lr 1.8980e-03 eta 1:05:24 +epoch [31/200] batch [30/31] time 0.709 (0.743) data 0.000 (0.028) loss 1.5781 (1.4691) acc 62.5000 (63.2292) lr 1.8980e-03 eta 1:04:50 +epoch [32/200] batch [5/31] time 0.709 (0.922) data 0.000 (0.199) loss 1.6514 (1.4010) acc 59.3750 (66.8750) lr 1.8910e-03 eta 1:20:23 +epoch [32/200] batch [10/31] time 0.708 (0.817) data 0.000 (0.099) loss 1.0176 (1.4223) acc 75.0000 (65.0000) lr 1.8910e-03 eta 1:11:11 +epoch [32/200] batch [15/31] time 0.704 (0.782) data 0.000 (0.066) loss 1.8574 (1.3962) acc 59.3750 (66.6667) lr 1.8910e-03 eta 1:08:04 +epoch [32/200] batch [20/31] time 0.725 (0.765) data 0.000 (0.050) loss 1.6641 (1.4007) acc 59.3750 (66.7188) lr 1.8910e-03 eta 1:06:32 +epoch [32/200] batch [25/31] time 0.712 (0.755) data 0.000 (0.040) loss 1.2793 (1.3878) acc 71.8750 (66.6250) lr 1.8910e-03 eta 1:05:38 +epoch [32/200] batch [30/31] time 0.723 (0.748) data 0.000 (0.033) loss 1.3955 (1.3495) acc 62.5000 (67.5000) lr 1.8910e-03 eta 1:04:58 +epoch [33/200] batch [5/31] time 0.731 (0.891) data 0.000 (0.169) loss 1.6602 (1.3835) acc 62.5000 (66.8750) lr 1.8838e-03 eta 1:17:17 +epoch [33/200] batch [10/31] time 0.713 (0.803) data 0.000 (0.085) loss 0.9902 (1.3822) acc 78.1250 (68.1250) lr 1.8838e-03 eta 1:09:36 +epoch [33/200] batch [15/31] time 0.712 (0.776) data 0.000 (0.057) loss 1.3467 (1.4030) acc 59.3750 (66.6667) lr 1.8838e-03 eta 1:07:09 +epoch [33/200] batch [20/31] time 0.707 (0.759) data 0.000 (0.043) loss 1.2441 (1.3847) acc 71.8750 (66.0938) lr 1.8838e-03 eta 1:05:35 +epoch [33/200] batch [25/31] time 0.716 (0.749) data 0.000 (0.034) loss 1.3086 (1.3956) acc 56.2500 (65.7500) lr 1.8838e-03 eta 1:04:44 +epoch [33/200] batch [30/31] time 0.728 (0.744) data 0.000 (0.028) loss 1.7373 (1.4295) acc 59.3750 (65.2083) lr 1.8838e-03 eta 1:04:14 +epoch [34/200] batch [5/31] time 0.709 (0.913) data 0.000 (0.184) loss 1.1162 (1.1192) acc 78.1250 (70.6250) lr 1.8763e-03 eta 1:18:40 +epoch [34/200] batch [10/31] time 0.737 (0.817) data 0.000 (0.092) loss 0.9731 (1.1569) acc 71.8750 (69.3750) lr 1.8763e-03 eta 1:10:19 +epoch [34/200] batch [15/31] time 0.715 (0.782) data 0.000 (0.062) loss 1.3623 (1.1947) acc 71.8750 (67.0833) lr 1.8763e-03 eta 1:07:16 +epoch [34/200] batch [20/31] time 0.719 (0.766) data 0.000 (0.046) loss 1.5068 (1.2394) acc 53.1250 (66.7188) lr 1.8763e-03 eta 1:05:50 +epoch [34/200] batch [25/31] time 0.710 (0.755) data 0.000 (0.037) loss 1.3535 (1.3258) acc 59.3750 (66.2500) lr 1.8763e-03 eta 1:04:50 +epoch [34/200] batch [30/31] time 0.713 (0.748) data 0.000 (0.031) loss 0.9297 (1.3132) acc 75.0000 (65.9375) lr 1.8763e-03 eta 1:04:10 +epoch [35/200] batch [5/31] time 0.713 (0.896) data 0.000 (0.169) loss 1.8164 (1.4322) acc 59.3750 (67.5000) lr 1.8686e-03 eta 1:16:44 +epoch [35/200] batch [10/31] time 0.707 (0.804) data 0.000 (0.085) loss 1.1211 (1.3814) acc 71.8750 (66.2500) lr 1.8686e-03 eta 1:08:50 +epoch [35/200] batch [15/31] time 0.724 (0.775) data 0.000 (0.057) loss 1.9600 (1.4862) acc 50.0000 (63.7500) lr 1.8686e-03 eta 1:06:14 +epoch [35/200] batch [20/31] time 0.706 (0.758) data 0.000 (0.043) loss 1.5479 (1.4174) acc 59.3750 (64.6875) lr 1.8686e-03 eta 1:04:44 +epoch [35/200] batch [25/31] time 0.713 (0.748) data 0.000 (0.034) loss 1.4697 (1.4525) acc 65.6250 (64.0000) lr 1.8686e-03 eta 1:03:51 +epoch [35/200] batch [30/31] time 0.709 (0.742) data 0.000 (0.028) loss 1.1992 (1.4119) acc 59.3750 (65.0000) lr 1.8686e-03 eta 1:03:16 +epoch [36/200] batch [5/31] time 0.717 (0.882) data 0.000 (0.162) loss 1.5312 (1.2885) acc 59.3750 (64.3750) lr 1.8607e-03 eta 1:15:05 +epoch [36/200] batch [10/31] time 0.704 (0.812) data 0.000 (0.081) loss 1.4375 (1.3363) acc 56.2500 (66.2500) lr 1.8607e-03 eta 1:09:05 +epoch [36/200] batch [15/31] time 0.712 (0.779) data 0.000 (0.054) loss 1.2988 (1.3466) acc 65.6250 (66.4583) lr 1.8607e-03 eta 1:06:10 +epoch [36/200] batch [20/31] time 0.703 (0.761) data 0.000 (0.041) loss 1.5303 (1.3474) acc 71.8750 (67.3438) lr 1.8607e-03 eta 1:04:38 +epoch [36/200] batch [25/31] time 0.709 (0.751) data 0.000 (0.033) loss 1.1025 (1.3688) acc 71.8750 (67.0000) lr 1.8607e-03 eta 1:03:43 +epoch [36/200] batch [30/31] time 0.704 (0.744) data 0.000 (0.027) loss 1.6143 (1.3615) acc 53.1250 (66.6667) lr 1.8607e-03 eta 1:03:02 +epoch [37/200] batch [5/31] time 0.713 (0.899) data 0.000 (0.178) loss 1.2412 (1.3699) acc 68.7500 (68.7500) lr 1.8526e-03 eta 1:16:06 +epoch [37/200] batch [10/31] time 0.720 (0.808) data 0.000 (0.089) loss 1.1436 (1.3761) acc 87.5000 (68.1250) lr 1.8526e-03 eta 1:08:19 +epoch [37/200] batch [15/31] time 0.712 (0.775) data 0.000 (0.060) loss 1.8086 (1.3442) acc 59.3750 (68.7500) lr 1.8526e-03 eta 1:05:29 +epoch [37/200] batch [20/31] time 0.713 (0.759) data 0.000 (0.045) loss 0.7715 (1.3500) acc 81.2500 (67.9688) lr 1.8526e-03 eta 1:04:04 +epoch [37/200] batch [25/31] time 0.719 (0.750) data 0.000 (0.036) loss 1.5449 (1.3515) acc 56.2500 (67.3750) lr 1.8526e-03 eta 1:03:16 +epoch [37/200] batch [30/31] time 0.708 (0.748) data 0.000 (0.030) loss 1.7646 (1.3754) acc 53.1250 (66.1458) lr 1.8526e-03 eta 1:03:02 +epoch [38/200] batch [5/31] time 0.710 (0.886) data 0.000 (0.165) loss 1.4717 (1.3408) acc 59.3750 (63.1250) lr 1.8443e-03 eta 1:14:33 +epoch [38/200] batch [10/31] time 0.708 (0.802) data 0.000 (0.083) loss 1.8008 (1.3038) acc 53.1250 (65.0000) lr 1.8443e-03 eta 1:07:23 +epoch [38/200] batch [15/31] time 0.719 (0.775) data 0.000 (0.055) loss 1.0771 (1.2716) acc 78.1250 (67.7083) lr 1.8443e-03 eta 1:05:02 +epoch [38/200] batch [20/31] time 0.706 (0.758) data 0.000 (0.042) loss 0.9546 (1.2540) acc 71.8750 (68.5938) lr 1.8443e-03 eta 1:03:33 +epoch [38/200] batch [25/31] time 0.722 (0.748) data 0.000 (0.033) loss 1.5156 (1.2609) acc 75.0000 (68.8750) lr 1.8443e-03 eta 1:02:39 +epoch [38/200] batch [30/31] time 0.705 (0.741) data 0.000 (0.028) loss 1.2930 (1.2801) acc 65.6250 (68.7500) lr 1.8443e-03 eta 1:02:01 +epoch [39/200] batch [5/31] time 0.729 (0.902) data 0.000 (0.173) loss 1.0117 (1.0393) acc 75.0000 (71.2500) lr 1.8358e-03 eta 1:15:24 +epoch [39/200] batch [10/31] time 0.712 (0.810) data 0.000 (0.087) loss 1.0859 (1.2317) acc 75.0000 (69.6875) lr 1.8358e-03 eta 1:07:39 +epoch [39/200] batch [15/31] time 0.711 (0.778) data 0.000 (0.058) loss 0.7188 (1.2293) acc 75.0000 (69.7917) lr 1.8358e-03 eta 1:04:53 +epoch [39/200] batch [20/31] time 0.709 (0.761) data 0.000 (0.044) loss 1.2529 (1.2355) acc 78.1250 (70.3125) lr 1.8358e-03 eta 1:03:24 +epoch [39/200] batch [25/31] time 0.710 (0.751) data 0.000 (0.035) loss 1.1523 (1.2279) acc 78.1250 (70.6250) lr 1.8358e-03 eta 1:02:31 +epoch [39/200] batch [30/31] time 0.727 (0.749) data 0.000 (0.029) loss 1.4209 (1.2868) acc 68.7500 (70.0000) lr 1.8358e-03 eta 1:02:21 +epoch [40/200] batch [5/31] time 0.723 (0.899) data 0.000 (0.177) loss 1.2715 (1.1658) acc 71.8750 (69.3750) lr 1.8271e-03 eta 1:14:43 +epoch [40/200] batch [10/31] time 0.709 (0.806) data 0.000 (0.089) loss 1.1924 (1.2437) acc 75.0000 (70.3125) lr 1.8271e-03 eta 1:06:52 +epoch [40/200] batch [15/31] time 0.711 (0.774) data 0.000 (0.059) loss 1.0713 (1.2579) acc 71.8750 (69.7917) lr 1.8271e-03 eta 1:04:09 +epoch [40/200] batch [20/31] time 0.708 (0.758) data 0.000 (0.044) loss 1.3164 (1.3072) acc 68.7500 (68.9062) lr 1.8271e-03 eta 1:02:48 +epoch [40/200] batch [25/31] time 0.710 (0.748) data 0.000 (0.036) loss 1.4365 (1.3027) acc 68.7500 (68.0000) lr 1.8271e-03 eta 1:01:57 +epoch [40/200] batch [30/31] time 0.705 (0.742) data 0.000 (0.030) loss 1.6543 (1.2883) acc 50.0000 (68.0208) lr 1.8271e-03 eta 1:01:18 +epoch [41/200] batch [5/31] time 0.727 (0.884) data 0.000 (0.161) loss 1.2734 (1.5713) acc 68.7500 (61.2500) lr 1.8181e-03 eta 1:13:01 +epoch [41/200] batch [10/31] time 0.719 (0.798) data 0.000 (0.081) loss 1.4287 (1.4781) acc 68.7500 (66.2500) lr 1.8181e-03 eta 1:05:50 +epoch [41/200] batch [15/31] time 0.715 (0.770) data 0.000 (0.054) loss 1.2686 (1.4055) acc 65.6250 (66.4583) lr 1.8181e-03 eta 1:03:27 +epoch [41/200] batch [20/31] time 0.721 (0.756) data 0.000 (0.040) loss 1.1758 (1.3556) acc 75.0000 (68.2812) lr 1.8181e-03 eta 1:02:12 +epoch [41/200] batch [25/31] time 0.714 (0.746) data 0.000 (0.032) loss 1.4512 (1.3166) acc 62.5000 (69.6250) lr 1.8181e-03 eta 1:01:20 +epoch [41/200] batch [30/31] time 0.709 (0.740) data 0.000 (0.027) loss 1.1357 (1.3166) acc 65.6250 (69.2708) lr 1.8181e-03 eta 1:00:46 +epoch [42/200] batch [5/31] time 0.713 (0.887) data 0.000 (0.162) loss 1.7129 (1.4745) acc 59.3750 (65.6250) lr 1.8090e-03 eta 1:12:46 +epoch [42/200] batch [10/31] time 0.724 (0.802) data 0.000 (0.081) loss 1.3965 (1.4895) acc 62.5000 (63.4375) lr 1.8090e-03 eta 1:05:46 +epoch [42/200] batch [15/31] time 0.722 (0.775) data 0.000 (0.054) loss 0.9536 (1.3758) acc 75.0000 (66.8750) lr 1.8090e-03 eta 1:03:26 +epoch [42/200] batch [20/31] time 0.709 (0.758) data 0.000 (0.041) loss 0.9302 (1.2970) acc 78.1250 (68.5938) lr 1.8090e-03 eta 1:02:02 +epoch [42/200] batch [25/31] time 0.715 (0.750) data 0.000 (0.033) loss 1.2734 (1.2943) acc 68.7500 (69.3750) lr 1.8090e-03 eta 1:01:18 +epoch [42/200] batch [30/31] time 0.722 (0.745) data 0.000 (0.027) loss 1.2842 (1.3381) acc 71.8750 (69.2708) lr 1.8090e-03 eta 1:00:51 +epoch [43/200] batch [5/31] time 0.707 (0.882) data 0.000 (0.158) loss 1.1064 (1.3065) acc 75.0000 (68.7500) lr 1.7997e-03 eta 1:11:55 +epoch [43/200] batch [10/31] time 0.705 (0.794) data 0.000 (0.079) loss 1.6416 (1.2849) acc 65.6250 (70.0000) lr 1.7997e-03 eta 1:04:43 +epoch [43/200] batch [15/31] time 0.722 (0.775) data 0.000 (0.053) loss 1.1885 (1.2095) acc 75.0000 (70.4167) lr 1.7997e-03 eta 1:03:06 +epoch [43/200] batch [20/31] time 0.714 (0.760) data 0.000 (0.040) loss 1.4854 (1.2602) acc 46.8750 (68.2812) lr 1.7997e-03 eta 1:01:47 +epoch [43/200] batch [25/31] time 0.710 (0.750) data 0.000 (0.032) loss 1.3408 (1.2542) acc 62.5000 (68.0000) lr 1.7997e-03 eta 1:00:55 +epoch [43/200] batch [30/31] time 0.707 (0.743) data 0.000 (0.027) loss 1.5146 (1.2656) acc 56.2500 (68.0208) lr 1.7997e-03 eta 1:00:18 +epoch [44/200] batch [5/31] time 0.716 (0.885) data 0.000 (0.165) loss 1.4043 (1.1279) acc 65.6250 (71.8750) lr 1.7902e-03 eta 1:11:43 +epoch [44/200] batch [10/31] time 0.711 (0.799) data 0.000 (0.083) loss 1.2324 (1.2136) acc 71.8750 (72.1875) lr 1.7902e-03 eta 1:04:38 +epoch [44/200] batch [15/31] time 0.711 (0.769) data 0.000 (0.055) loss 0.9185 (1.2636) acc 71.8750 (71.0417) lr 1.7902e-03 eta 1:02:10 +epoch [44/200] batch [20/31] time 0.709 (0.754) data 0.000 (0.041) loss 1.5479 (1.3493) acc 59.3750 (67.8125) lr 1.7902e-03 eta 1:00:54 +epoch [44/200] batch [25/31] time 0.701 (0.744) data 0.000 (0.033) loss 1.8086 (1.3352) acc 62.5000 (68.5000) lr 1.7902e-03 eta 1:00:02 +epoch [44/200] batch [30/31] time 0.703 (0.739) data 0.000 (0.028) loss 1.1133 (1.3279) acc 75.0000 (68.6458) lr 1.7902e-03 eta 0:59:34 +epoch [45/200] batch [5/31] time 0.710 (0.882) data 0.000 (0.158) loss 1.0693 (0.9290) acc 68.7500 (78.1250) lr 1.7804e-03 eta 1:11:01 +epoch [45/200] batch [10/31] time 0.706 (0.797) data 0.000 (0.079) loss 1.7324 (1.1303) acc 62.5000 (73.4375) lr 1.7804e-03 eta 1:04:06 +epoch [45/200] batch [15/31] time 0.709 (0.768) data 0.000 (0.053) loss 1.1670 (1.1578) acc 68.7500 (71.8750) lr 1.7804e-03 eta 1:01:43 +epoch [45/200] batch [20/31] time 0.707 (0.753) data 0.000 (0.040) loss 1.1768 (1.1464) acc 71.8750 (72.3438) lr 1.7804e-03 eta 1:00:27 +epoch [45/200] batch [25/31] time 0.707 (0.745) data 0.000 (0.032) loss 1.3809 (1.2009) acc 71.8750 (71.3750) lr 1.7804e-03 eta 0:59:41 +epoch [45/200] batch [30/31] time 0.712 (0.739) data 0.000 (0.027) loss 1.7080 (1.2651) acc 53.1250 (70.3125) lr 1.7804e-03 eta 0:59:09 +epoch [46/200] batch [5/31] time 0.728 (0.914) data 0.000 (0.169) loss 1.2617 (1.1603) acc 68.7500 (71.2500) lr 1.7705e-03 eta 1:13:08 +epoch [46/200] batch [10/31] time 0.713 (0.815) data 0.000 (0.085) loss 1.3369 (1.0712) acc 81.2500 (73.7500) lr 1.7705e-03 eta 1:05:06 +epoch [46/200] batch [15/31] time 0.710 (0.781) data 0.000 (0.056) loss 1.1787 (1.0844) acc 71.8750 (73.1250) lr 1.7705e-03 eta 1:02:19 +epoch [46/200] batch [20/31] time 0.712 (0.765) data 0.000 (0.042) loss 1.6631 (1.1601) acc 65.6250 (71.2500) lr 1.7705e-03 eta 1:01:02 +epoch [46/200] batch [25/31] time 0.713 (0.754) data 0.000 (0.034) loss 0.9531 (1.2054) acc 75.0000 (70.1250) lr 1.7705e-03 eta 1:00:05 +epoch [46/200] batch [30/31] time 0.707 (0.747) data 0.000 (0.028) loss 1.2295 (1.1798) acc 75.0000 (70.7292) lr 1.7705e-03 eta 0:59:28 +epoch [47/200] batch [5/31] time 0.718 (0.908) data 0.000 (0.187) loss 1.3066 (0.8886) acc 71.8750 (79.3750) lr 1.7604e-03 eta 1:12:09 +epoch [47/200] batch [10/31] time 0.713 (0.811) data 0.000 (0.093) loss 1.2852 (1.1177) acc 62.5000 (74.6875) lr 1.7604e-03 eta 1:04:22 +epoch [47/200] batch [15/31] time 0.714 (0.778) data 0.000 (0.062) loss 1.2979 (1.1367) acc 75.0000 (74.3750) lr 1.7604e-03 eta 1:01:41 +epoch [47/200] batch [20/31] time 0.708 (0.761) data 0.000 (0.047) loss 1.2451 (1.1269) acc 75.0000 (74.6875) lr 1.7604e-03 eta 1:00:17 +epoch [47/200] batch [25/31] time 0.709 (0.756) data 0.000 (0.038) loss 1.2412 (1.1353) acc 62.5000 (73.7500) lr 1.7604e-03 eta 0:59:50 +epoch [47/200] batch [30/31] time 0.704 (0.748) data 0.000 (0.031) loss 1.5371 (1.1969) acc 59.3750 (72.2917) lr 1.7604e-03 eta 0:59:10 +epoch [48/200] batch [5/31] time 0.706 (0.903) data 0.000 (0.182) loss 1.0967 (1.3510) acc 68.7500 (68.7500) lr 1.7501e-03 eta 1:11:20 +epoch [48/200] batch [10/31] time 0.714 (0.808) data 0.000 (0.091) loss 0.7759 (1.2142) acc 90.6250 (72.1875) lr 1.7501e-03 eta 1:03:42 +epoch [48/200] batch [15/31] time 0.710 (0.777) data 0.000 (0.061) loss 1.1953 (1.1903) acc 56.2500 (71.6667) lr 1.7501e-03 eta 1:01:14 +epoch [48/200] batch [20/31] time 0.731 (0.762) data 0.000 (0.046) loss 1.6914 (1.2390) acc 56.2500 (69.6875) lr 1.7501e-03 eta 0:59:58 +epoch [48/200] batch [25/31] time 0.715 (0.751) data 0.000 (0.037) loss 0.8862 (1.1944) acc 65.6250 (71.2500) lr 1.7501e-03 eta 0:59:05 +epoch [48/200] batch [30/31] time 0.717 (0.745) data 0.000 (0.031) loss 1.2988 (1.2449) acc 75.0000 (70.6250) lr 1.7501e-03 eta 0:58:30 +epoch [49/200] batch [5/31] time 0.705 (0.902) data 0.000 (0.178) loss 1.2969 (1.0531) acc 68.7500 (72.5000) lr 1.7396e-03 eta 1:10:46 +epoch [49/200] batch [10/31] time 0.708 (0.808) data 0.000 (0.089) loss 0.9067 (1.1353) acc 78.1250 (71.8750) lr 1.7396e-03 eta 1:03:17 +epoch [49/200] batch [15/31] time 0.720 (0.780) data 0.000 (0.060) loss 1.8965 (1.1436) acc 65.6250 (73.1250) lr 1.7396e-03 eta 1:01:03 +epoch [49/200] batch [20/31] time 0.721 (0.770) data 0.000 (0.045) loss 1.7764 (1.2355) acc 56.2500 (70.9375) lr 1.7396e-03 eta 1:00:13 +epoch [49/200] batch [25/31] time 0.712 (0.759) data 0.000 (0.036) loss 0.7139 (1.2477) acc 87.5000 (71.6250) lr 1.7396e-03 eta 0:59:15 +epoch [49/200] batch [30/31] time 0.710 (0.751) data 0.000 (0.030) loss 2.3301 (1.2515) acc 62.5000 (71.1458) lr 1.7396e-03 eta 0:58:35 +epoch [50/200] batch [5/31] time 0.710 (0.912) data 0.000 (0.186) loss 1.3027 (1.1680) acc 68.7500 (70.6250) lr 1.7290e-03 eta 1:11:04 +epoch [50/200] batch [10/31] time 0.718 (0.816) data 0.000 (0.093) loss 1.0889 (1.1694) acc 75.0000 (70.6250) lr 1.7290e-03 eta 1:03:31 +epoch [50/200] batch [15/31] time 0.709 (0.782) data 0.000 (0.062) loss 1.6338 (1.1758) acc 56.2500 (70.2083) lr 1.7290e-03 eta 1:00:49 +epoch [50/200] batch [20/31] time 0.708 (0.764) data 0.000 (0.047) loss 1.0967 (1.2041) acc 68.7500 (70.0000) lr 1.7290e-03 eta 0:59:20 +epoch [50/200] batch [25/31] time 0.707 (0.753) data 0.000 (0.038) loss 1.0029 (1.2025) acc 75.0000 (70.2500) lr 1.7290e-03 eta 0:58:26 +epoch [50/200] batch [30/31] time 0.711 (0.747) data 0.000 (0.031) loss 0.9634 (1.1763) acc 81.2500 (71.2500) lr 1.7290e-03 eta 0:57:54 +epoch [51/200] batch [5/31] time 0.712 (0.940) data 0.000 (0.217) loss 1.7549 (1.5416) acc 65.6250 (71.2500) lr 1.7181e-03 eta 1:12:44 +epoch [51/200] batch [10/31] time 0.711 (0.826) data 0.000 (0.109) loss 1.0410 (1.2755) acc 71.8750 (73.7500) lr 1.7181e-03 eta 1:03:53 +epoch [51/200] batch [15/31] time 0.714 (0.789) data 0.000 (0.072) loss 0.8477 (1.2250) acc 81.2500 (73.1250) lr 1.7181e-03 eta 1:00:59 +epoch [51/200] batch [20/31] time 0.707 (0.770) data 0.000 (0.054) loss 1.3467 (1.2293) acc 62.5000 (72.3438) lr 1.7181e-03 eta 0:59:22 +epoch [51/200] batch [25/31] time 0.707 (0.757) data 0.000 (0.044) loss 1.6406 (1.2446) acc 65.6250 (72.0000) lr 1.7181e-03 eta 0:58:22 +epoch [51/200] batch [30/31] time 0.701 (0.749) data 0.000 (0.036) loss 0.9868 (1.2307) acc 78.1250 (72.1875) lr 1.7181e-03 eta 0:57:40 +epoch [52/200] batch [5/31] time 0.714 (0.894) data 0.000 (0.170) loss 1.1963 (1.0649) acc 68.7500 (74.3750) lr 1.7071e-03 eta 1:08:43 +epoch [52/200] batch [10/31] time 0.714 (0.819) data 0.000 (0.085) loss 1.0010 (1.1237) acc 75.0000 (72.5000) lr 1.7071e-03 eta 1:02:52 +epoch [52/200] batch [15/31] time 0.711 (0.784) data 0.000 (0.057) loss 1.0693 (1.2225) acc 75.0000 (70.4167) lr 1.7071e-03 eta 1:00:11 +epoch [52/200] batch [20/31] time 0.711 (0.767) data 0.000 (0.043) loss 1.3672 (1.1717) acc 68.7500 (71.4062) lr 1.7071e-03 eta 0:58:46 +epoch [52/200] batch [25/31] time 0.707 (0.756) data 0.000 (0.034) loss 1.6240 (1.1925) acc 65.6250 (71.3750) lr 1.7071e-03 eta 0:57:51 +epoch [52/200] batch [30/31] time 0.713 (0.748) data 0.000 (0.028) loss 1.5186 (1.1984) acc 53.1250 (71.0417) lr 1.7071e-03 eta 0:57:13 +epoch [53/200] batch [5/31] time 0.712 (0.893) data 0.000 (0.169) loss 0.7017 (1.0003) acc 81.2500 (76.2500) lr 1.6959e-03 eta 1:08:14 +epoch [53/200] batch [10/31] time 0.722 (0.805) data 0.000 (0.085) loss 1.4688 (0.9739) acc 68.7500 (76.2500) lr 1.6959e-03 eta 1:01:23 +epoch [53/200] batch [15/31] time 0.715 (0.777) data 0.000 (0.057) loss 1.2480 (1.0915) acc 71.8750 (73.1250) lr 1.6959e-03 eta 0:59:12 +epoch [53/200] batch [20/31] time 0.710 (0.761) data 0.000 (0.043) loss 1.7725 (1.1736) acc 68.7500 (72.0312) lr 1.6959e-03 eta 0:57:57 +epoch [53/200] batch [25/31] time 0.704 (0.751) data 0.000 (0.034) loss 0.7700 (1.1618) acc 84.3750 (72.5000) lr 1.6959e-03 eta 0:57:05 +epoch [53/200] batch [30/31] time 0.711 (0.743) data 0.000 (0.028) loss 0.9438 (1.1467) acc 75.0000 (72.5000) lr 1.6959e-03 eta 0:56:28 +epoch [54/200] batch [5/31] time 0.709 (0.900) data 0.000 (0.181) loss 1.1396 (1.1856) acc 65.6250 (72.5000) lr 1.6845e-03 eta 1:08:15 +epoch [54/200] batch [10/31] time 0.710 (0.806) data 0.000 (0.091) loss 0.4087 (1.1499) acc 93.7500 (73.1250) lr 1.6845e-03 eta 1:01:06 +epoch [54/200] batch [15/31] time 0.720 (0.775) data 0.000 (0.061) loss 1.3984 (1.1411) acc 68.7500 (73.3333) lr 1.6845e-03 eta 0:58:41 +epoch [54/200] batch [20/31] time 0.708 (0.761) data 0.000 (0.045) loss 1.2402 (1.1667) acc 68.7500 (72.3438) lr 1.6845e-03 eta 0:57:30 +epoch [54/200] batch [25/31] time 0.708 (0.750) data 0.000 (0.036) loss 0.6621 (1.1452) acc 84.3750 (72.7500) lr 1.6845e-03 eta 0:56:40 +epoch [54/200] batch [30/31] time 0.713 (0.744) data 0.000 (0.030) loss 1.2256 (1.1801) acc 62.5000 (72.2917) lr 1.6845e-03 eta 0:56:08 +epoch [55/200] batch [5/31] time 0.720 (0.888) data 0.000 (0.170) loss 0.6631 (1.0503) acc 81.2500 (71.2500) lr 1.6730e-03 eta 1:06:54 +epoch [55/200] batch [10/31] time 0.713 (0.798) data 0.000 (0.085) loss 2.1562 (1.2543) acc 53.1250 (69.3750) lr 1.6730e-03 eta 1:00:05 +epoch [55/200] batch [15/31] time 0.713 (0.770) data 0.000 (0.057) loss 1.2119 (1.1994) acc 81.2500 (72.2917) lr 1.6730e-03 eta 0:57:52 +epoch [55/200] batch [20/31] time 0.729 (0.755) data 0.000 (0.043) loss 1.7461 (1.2020) acc 59.3750 (71.7188) lr 1.6730e-03 eta 0:56:40 +epoch [55/200] batch [25/31] time 0.729 (0.746) data 0.000 (0.034) loss 1.3145 (1.2005) acc 62.5000 (71.1250) lr 1.6730e-03 eta 0:55:58 +epoch [55/200] batch [30/31] time 0.719 (0.740) data 0.000 (0.029) loss 1.2812 (1.1889) acc 78.1250 (71.5625) lr 1.6730e-03 eta 0:55:28 +epoch [56/200] batch [5/31] time 0.726 (0.895) data 0.001 (0.164) loss 1.2031 (1.1549) acc 71.8750 (73.1250) lr 1.6613e-03 eta 1:06:59 +epoch [56/200] batch [10/31] time 0.707 (0.804) data 0.000 (0.082) loss 0.9624 (1.0627) acc 78.1250 (75.0000) lr 1.6613e-03 eta 1:00:06 +epoch [56/200] batch [15/31] time 0.724 (0.776) data 0.000 (0.055) loss 1.6553 (1.1162) acc 62.5000 (74.1667) lr 1.6613e-03 eta 0:57:55 +epoch [56/200] batch [20/31] time 0.717 (0.761) data 0.000 (0.041) loss 0.8447 (1.1027) acc 75.0000 (74.0625) lr 1.6613e-03 eta 0:56:44 +epoch [56/200] batch [25/31] time 0.720 (0.752) data 0.000 (0.033) loss 1.3037 (1.1314) acc 78.1250 (74.6250) lr 1.6613e-03 eta 0:56:00 +epoch [56/200] batch [30/31] time 0.720 (0.745) data 0.000 (0.028) loss 1.7578 (1.1753) acc 71.8750 (74.3750) lr 1.6613e-03 eta 0:55:26 +epoch [57/200] batch [5/31] time 0.711 (0.894) data 0.000 (0.166) loss 0.9209 (0.8169) acc 78.1250 (76.8750) lr 1.6494e-03 eta 1:06:26 +epoch [57/200] batch [10/31] time 0.726 (0.805) data 0.000 (0.083) loss 0.8623 (0.8554) acc 78.1250 (77.1875) lr 1.6494e-03 eta 0:59:44 +epoch [57/200] batch [15/31] time 0.722 (0.784) data 0.000 (0.056) loss 1.7354 (0.9829) acc 68.7500 (76.4583) lr 1.6494e-03 eta 0:58:06 +epoch [57/200] batch [20/31] time 0.710 (0.765) data 0.000 (0.042) loss 1.3174 (0.9870) acc 75.0000 (75.7812) lr 1.6494e-03 eta 0:56:40 +epoch [57/200] batch [25/31] time 0.715 (0.755) data 0.000 (0.034) loss 1.6562 (1.0325) acc 71.8750 (75.3750) lr 1.6494e-03 eta 0:55:49 +epoch [57/200] batch [30/31] time 0.708 (0.747) data 0.000 (0.028) loss 1.0566 (1.0582) acc 84.3750 (74.8958) lr 1.6494e-03 eta 0:55:14 +epoch [58/200] batch [5/31] time 0.716 (0.889) data 0.000 (0.168) loss 0.6855 (0.8999) acc 81.2500 (76.2500) lr 1.6374e-03 eta 1:05:35 +epoch [58/200] batch [10/31] time 0.713 (0.801) data 0.000 (0.084) loss 0.7056 (0.9570) acc 84.3750 (77.1875) lr 1.6374e-03 eta 0:59:00 +epoch [58/200] batch [15/31] time 0.712 (0.771) data 0.000 (0.056) loss 0.8940 (0.9972) acc 75.0000 (75.6250) lr 1.6374e-03 eta 0:56:44 +epoch [58/200] batch [20/31] time 0.710 (0.756) data 0.000 (0.042) loss 1.3926 (1.0493) acc 68.7500 (75.3125) lr 1.6374e-03 eta 0:55:37 +epoch [58/200] batch [25/31] time 0.709 (0.748) data 0.000 (0.034) loss 0.7783 (1.0657) acc 81.2500 (74.5000) lr 1.6374e-03 eta 0:54:55 +epoch [58/200] batch [30/31] time 0.713 (0.741) data 0.000 (0.028) loss 1.0732 (1.1136) acc 75.0000 (73.5417) lr 1.6374e-03 eta 0:54:21 +epoch [59/200] batch [5/31] time 0.725 (0.927) data 0.000 (0.167) loss 1.2168 (1.2180) acc 68.7500 (70.6250) lr 1.6252e-03 eta 1:07:56 +epoch [59/200] batch [10/31] time 0.714 (0.821) data 0.000 (0.083) loss 1.0342 (1.1566) acc 68.7500 (73.7500) lr 1.6252e-03 eta 1:00:04 +epoch [59/200] batch [15/31] time 0.711 (0.784) data 0.000 (0.056) loss 1.5195 (1.1260) acc 62.5000 (74.5833) lr 1.6252e-03 eta 0:57:17 +epoch [59/200] batch [20/31] time 0.707 (0.765) data 0.000 (0.042) loss 0.7905 (1.0862) acc 75.0000 (74.6875) lr 1.6252e-03 eta 0:55:53 +epoch [59/200] batch [25/31] time 0.708 (0.754) data 0.000 (0.034) loss 0.7407 (1.0607) acc 78.1250 (75.0000) lr 1.6252e-03 eta 0:54:58 +epoch [59/200] batch [30/31] time 0.714 (0.746) data 0.000 (0.028) loss 0.9995 (1.0828) acc 78.1250 (74.4792) lr 1.6252e-03 eta 0:54:21 +epoch [60/200] batch [5/31] time 0.715 (0.887) data 0.000 (0.165) loss 0.9663 (0.9911) acc 71.8750 (73.7500) lr 1.6129e-03 eta 1:04:31 +epoch [60/200] batch [10/31] time 0.710 (0.802) data 0.000 (0.083) loss 1.2686 (1.0214) acc 68.7500 (74.3750) lr 1.6129e-03 eta 0:58:17 +epoch [60/200] batch [15/31] time 0.715 (0.772) data 0.000 (0.055) loss 1.2441 (1.0629) acc 75.0000 (74.1667) lr 1.6129e-03 eta 0:56:04 +epoch [60/200] batch [20/31] time 0.712 (0.757) data 0.000 (0.042) loss 1.2520 (1.0678) acc 75.0000 (74.8438) lr 1.6129e-03 eta 0:54:53 +epoch [60/200] batch [25/31] time 0.724 (0.748) data 0.000 (0.033) loss 1.4785 (1.1482) acc 65.6250 (74.2500) lr 1.6129e-03 eta 0:54:12 +epoch [60/200] batch [30/31] time 0.708 (0.742) data 0.000 (0.028) loss 0.8003 (1.1317) acc 81.2500 (73.8542) lr 1.6129e-03 eta 0:53:41 +epoch [61/200] batch [5/31] time 0.710 (0.891) data 0.001 (0.171) loss 1.1094 (1.1159) acc 71.8750 (75.6250) lr 1.6004e-03 eta 1:04:22 +epoch [61/200] batch [10/31] time 0.712 (0.816) data 0.000 (0.086) loss 1.3799 (1.0962) acc 59.3750 (74.3750) lr 1.6004e-03 eta 0:58:54 +epoch [61/200] batch [15/31] time 0.711 (0.782) data 0.000 (0.057) loss 1.3008 (1.1191) acc 62.5000 (72.7083) lr 1.6004e-03 eta 0:56:22 +epoch [61/200] batch [20/31] time 0.709 (0.764) data 0.000 (0.043) loss 0.7617 (1.1344) acc 78.1250 (71.5625) lr 1.6004e-03 eta 0:55:02 +epoch [61/200] batch [25/31] time 0.705 (0.754) data 0.000 (0.034) loss 1.0410 (1.1126) acc 78.1250 (72.5000) lr 1.6004e-03 eta 0:54:15 +epoch [61/200] batch [30/31] time 0.712 (0.748) data 0.000 (0.029) loss 1.1230 (1.1420) acc 75.0000 (71.6667) lr 1.6004e-03 eta 0:53:44 +epoch [62/200] batch [5/31] time 0.710 (0.889) data 0.000 (0.167) loss 0.7866 (0.9048) acc 81.2500 (80.0000) lr 1.5878e-03 eta 1:03:45 +epoch [62/200] batch [10/31] time 0.710 (0.801) data 0.001 (0.084) loss 1.0947 (0.9982) acc 65.6250 (76.8750) lr 1.5878e-03 eta 0:57:21 +epoch [62/200] batch [15/31] time 0.714 (0.771) data 0.000 (0.056) loss 0.8613 (1.0666) acc 84.3750 (75.8333) lr 1.5878e-03 eta 0:55:12 +epoch [62/200] batch [20/31] time 0.709 (0.756) data 0.000 (0.042) loss 1.1562 (1.0688) acc 75.0000 (75.3125) lr 1.5878e-03 eta 0:54:03 +epoch [62/200] batch [25/31] time 0.722 (0.748) data 0.000 (0.034) loss 0.6260 (1.1745) acc 87.5000 (73.3750) lr 1.5878e-03 eta 0:53:23 +epoch [62/200] batch [30/31] time 0.707 (0.742) data 0.000 (0.028) loss 1.6309 (1.1712) acc 59.3750 (73.1250) lr 1.5878e-03 eta 0:52:53 +epoch [63/200] batch [5/31] time 0.727 (0.901) data 0.000 (0.182) loss 1.5312 (1.0208) acc 53.1250 (76.2500) lr 1.5750e-03 eta 1:04:07 +epoch [63/200] batch [10/31] time 0.707 (0.806) data 0.000 (0.091) loss 0.9971 (1.0391) acc 78.1250 (75.6250) lr 1.5750e-03 eta 0:57:17 +epoch [63/200] batch [15/31] time 0.726 (0.776) data 0.000 (0.061) loss 1.4766 (1.1292) acc 59.3750 (72.5000) lr 1.5750e-03 eta 0:55:09 +epoch [63/200] batch [20/31] time 0.711 (0.760) data 0.000 (0.046) loss 1.4814 (1.1495) acc 62.5000 (71.4062) lr 1.5750e-03 eta 0:53:57 +epoch [63/200] batch [25/31] time 0.716 (0.750) data 0.000 (0.037) loss 1.0928 (1.1230) acc 68.7500 (72.2500) lr 1.5750e-03 eta 0:53:09 +epoch [63/200] batch [30/31] time 0.708 (0.743) data 0.000 (0.031) loss 1.3730 (1.1068) acc 68.7500 (72.5000) lr 1.5750e-03 eta 0:52:37 +epoch [64/200] batch [5/31] time 0.713 (0.901) data 0.000 (0.178) loss 1.4590 (0.9732) acc 75.0000 (73.7500) lr 1.5621e-03 eta 1:03:41 +epoch [64/200] batch [10/31] time 0.709 (0.808) data 0.000 (0.089) loss 0.8057 (0.9215) acc 78.1250 (77.1875) lr 1.5621e-03 eta 0:57:04 +epoch [64/200] batch [15/31] time 0.711 (0.778) data 0.000 (0.060) loss 0.7251 (0.9933) acc 81.2500 (77.5000) lr 1.5621e-03 eta 0:54:52 +epoch [64/200] batch [20/31] time 0.709 (0.761) data 0.000 (0.045) loss 1.1680 (1.0136) acc 78.1250 (76.0938) lr 1.5621e-03 eta 0:53:37 +epoch [64/200] batch [25/31] time 0.710 (0.751) data 0.000 (0.036) loss 1.0928 (1.1105) acc 75.0000 (74.8750) lr 1.5621e-03 eta 0:52:50 +epoch [64/200] batch [30/31] time 0.712 (0.745) data 0.000 (0.030) loss 1.4121 (1.0991) acc 59.3750 (74.1667) lr 1.5621e-03 eta 0:52:20 +epoch [65/200] batch [5/31] time 0.711 (0.892) data 0.001 (0.167) loss 0.7510 (0.9054) acc 75.0000 (78.1250) lr 1.5490e-03 eta 1:02:34 +epoch [65/200] batch [10/31] time 0.708 (0.802) data 0.000 (0.084) loss 1.5117 (0.9949) acc 62.5000 (77.1875) lr 1.5490e-03 eta 0:56:11 +epoch [65/200] batch [15/31] time 0.708 (0.771) data 0.000 (0.056) loss 1.1494 (1.0370) acc 68.7500 (74.7917) lr 1.5490e-03 eta 0:53:59 +epoch [65/200] batch [20/31] time 0.708 (0.756) data 0.000 (0.042) loss 2.3262 (1.1996) acc 50.0000 (71.7188) lr 1.5490e-03 eta 0:52:50 +epoch [65/200] batch [25/31] time 0.706 (0.746) data 0.000 (0.034) loss 0.9126 (1.1746) acc 81.2500 (72.5000) lr 1.5490e-03 eta 0:52:07 +epoch [65/200] batch [30/31] time 0.711 (0.740) data 0.000 (0.028) loss 1.0625 (1.1690) acc 65.6250 (72.5000) lr 1.5490e-03 eta 0:51:38 +epoch [66/200] batch [5/31] time 0.719 (0.894) data 0.000 (0.176) loss 1.2783 (0.9384) acc 81.2500 (79.3750) lr 1.5358e-03 eta 1:02:16 +epoch [66/200] batch [10/31] time 0.712 (0.803) data 0.000 (0.088) loss 1.0312 (1.0421) acc 78.1250 (75.3125) lr 1.5358e-03 eta 0:55:54 +epoch [66/200] batch [15/31] time 0.724 (0.775) data 0.000 (0.059) loss 1.4199 (1.1219) acc 71.8750 (74.1667) lr 1.5358e-03 eta 0:53:50 +epoch [66/200] batch [20/31] time 0.717 (0.759) data 0.000 (0.044) loss 1.5850 (1.1445) acc 68.7500 (72.8125) lr 1.5358e-03 eta 0:52:40 +epoch [66/200] batch [25/31] time 0.709 (0.749) data 0.000 (0.035) loss 1.3330 (1.1725) acc 75.0000 (72.6250) lr 1.5358e-03 eta 0:51:55 +epoch [66/200] batch [30/31] time 0.716 (0.743) data 0.000 (0.030) loss 1.0068 (1.1335) acc 75.0000 (73.6458) lr 1.5358e-03 eta 0:51:26 +epoch [67/200] batch [5/31] time 0.712 (0.892) data 0.000 (0.170) loss 0.9419 (1.1756) acc 75.0000 (73.7500) lr 1.5225e-03 eta 1:01:38 +epoch [67/200] batch [10/31] time 0.716 (0.820) data 0.000 (0.085) loss 1.2373 (1.2099) acc 68.7500 (71.8750) lr 1.5225e-03 eta 0:56:38 +epoch [67/200] batch [15/31] time 0.713 (0.786) data 0.000 (0.057) loss 1.2881 (1.2462) acc 65.6250 (71.0417) lr 1.5225e-03 eta 0:54:13 +epoch [67/200] batch [20/31] time 0.712 (0.767) data 0.000 (0.043) loss 0.8418 (1.1891) acc 78.1250 (71.7188) lr 1.5225e-03 eta 0:52:51 +epoch [67/200] batch [25/31] time 0.714 (0.756) data 0.001 (0.034) loss 0.8999 (1.1186) acc 78.1250 (73.2500) lr 1.5225e-03 eta 0:52:00 +epoch [67/200] batch [30/31] time 0.725 (0.749) data 0.000 (0.029) loss 1.1201 (1.1057) acc 71.8750 (73.3333) lr 1.5225e-03 eta 0:51:29 +epoch [68/200] batch [5/31] time 0.711 (0.886) data 0.000 (0.163) loss 0.7808 (0.9430) acc 81.2500 (76.8750) lr 1.5090e-03 eta 1:00:48 +epoch [68/200] batch [10/31] time 0.714 (0.800) data 0.000 (0.082) loss 0.9180 (0.9782) acc 78.1250 (77.1875) lr 1.5090e-03 eta 0:54:49 +epoch [68/200] batch [15/31] time 0.710 (0.771) data 0.000 (0.055) loss 0.9023 (1.0845) acc 81.2500 (74.7917) lr 1.5090e-03 eta 0:52:47 +epoch [68/200] batch [20/31] time 0.708 (0.756) data 0.000 (0.041) loss 0.4165 (1.0443) acc 90.6250 (75.3125) lr 1.5090e-03 eta 0:51:40 +epoch [68/200] batch [25/31] time 0.716 (0.748) data 0.000 (0.033) loss 1.4404 (1.0478) acc 68.7500 (74.7500) lr 1.5090e-03 eta 0:51:04 +epoch [68/200] batch [30/31] time 0.706 (0.746) data 0.000 (0.027) loss 0.7397 (1.0145) acc 81.2500 (76.0417) lr 1.5090e-03 eta 0:50:55 +epoch [69/200] batch [5/31] time 0.719 (0.909) data 0.000 (0.179) loss 0.7339 (0.8790) acc 75.0000 (75.6250) lr 1.4955e-03 eta 1:01:54 +epoch [69/200] batch [10/31] time 0.721 (0.812) data 0.000 (0.090) loss 1.3691 (0.9016) acc 65.6250 (76.5625) lr 1.4955e-03 eta 0:55:14 +epoch [69/200] batch [15/31] time 0.709 (0.780) data 0.000 (0.060) loss 1.3682 (1.0440) acc 71.8750 (75.4167) lr 1.4955e-03 eta 0:53:00 +epoch [69/200] batch [20/31] time 0.714 (0.763) data 0.000 (0.045) loss 1.7139 (1.0796) acc 56.2500 (75.3125) lr 1.4955e-03 eta 0:51:45 +epoch [69/200] batch [25/31] time 0.711 (0.753) data 0.000 (0.036) loss 1.2451 (1.0564) acc 68.7500 (75.1250) lr 1.4955e-03 eta 0:51:04 +epoch [69/200] batch [30/31] time 0.705 (0.746) data 0.000 (0.030) loss 1.2002 (1.0285) acc 75.0000 (75.9375) lr 1.4955e-03 eta 0:50:30 +epoch [70/200] batch [5/31] time 0.708 (0.934) data 0.000 (0.213) loss 1.2041 (1.0688) acc 71.8750 (75.6250) lr 1.4818e-03 eta 1:03:09 +epoch [70/200] batch [10/31] time 0.707 (0.824) data 0.000 (0.107) loss 1.2305 (1.1247) acc 71.8750 (74.6875) lr 1.4818e-03 eta 0:55:37 +epoch [70/200] batch [15/31] time 0.711 (0.787) data 0.000 (0.071) loss 0.5601 (1.0799) acc 93.7500 (75.8333) lr 1.4818e-03 eta 0:53:02 +epoch [70/200] batch [20/31] time 0.707 (0.767) data 0.000 (0.053) loss 1.2295 (1.0663) acc 68.7500 (76.2500) lr 1.4818e-03 eta 0:51:38 +epoch [70/200] batch [25/31] time 0.704 (0.755) data 0.000 (0.043) loss 1.0059 (1.0719) acc 68.7500 (76.5000) lr 1.4818e-03 eta 0:50:45 +epoch [70/200] batch [30/31] time 0.704 (0.751) data 0.000 (0.036) loss 0.7075 (1.0361) acc 84.3750 (77.3958) lr 1.4818e-03 eta 0:50:26 +epoch [71/200] batch [5/31] time 0.727 (0.899) data 0.000 (0.170) loss 0.9917 (1.1644) acc 78.1250 (71.8750) lr 1.4679e-03 eta 1:00:20 +epoch [71/200] batch [10/31] time 0.710 (0.809) data 0.000 (0.085) loss 0.6958 (1.0361) acc 84.3750 (75.3125) lr 1.4679e-03 eta 0:54:10 +epoch [71/200] batch [15/31] time 0.725 (0.778) data 0.000 (0.057) loss 0.8511 (1.0410) acc 78.1250 (76.2500) lr 1.4679e-03 eta 0:52:03 +epoch [71/200] batch [20/31] time 0.717 (0.762) data 0.000 (0.043) loss 0.6606 (1.0802) acc 78.1250 (75.1562) lr 1.4679e-03 eta 0:50:55 +epoch [71/200] batch [25/31] time 0.713 (0.751) data 0.000 (0.034) loss 1.1943 (1.0499) acc 75.0000 (75.7500) lr 1.4679e-03 eta 0:50:07 +epoch [71/200] batch [30/31] time 0.707 (0.743) data 0.000 (0.029) loss 1.0645 (1.0571) acc 71.8750 (75.5208) lr 1.4679e-03 eta 0:49:32 +epoch [72/200] batch [5/31] time 0.710 (0.895) data 0.000 (0.170) loss 0.6763 (1.0054) acc 87.5000 (79.3750) lr 1.4540e-03 eta 0:59:34 +epoch [72/200] batch [10/31] time 0.710 (0.806) data 0.000 (0.085) loss 0.9707 (0.9912) acc 68.7500 (77.1875) lr 1.4540e-03 eta 0:53:36 +epoch [72/200] batch [15/31] time 0.709 (0.776) data 0.000 (0.057) loss 0.9126 (1.0075) acc 78.1250 (76.4583) lr 1.4540e-03 eta 0:51:31 +epoch [72/200] batch [20/31] time 0.707 (0.759) data 0.000 (0.043) loss 1.1650 (1.0493) acc 78.1250 (76.0938) lr 1.4540e-03 eta 0:50:20 +epoch [72/200] batch [25/31] time 0.707 (0.750) data 0.000 (0.034) loss 0.8066 (1.0406) acc 68.7500 (75.1250) lr 1.4540e-03 eta 0:49:41 +epoch [72/200] batch [30/31] time 0.707 (0.743) data 0.000 (0.029) loss 1.3496 (1.0534) acc 68.7500 (74.3750) lr 1.4540e-03 eta 0:49:10 +epoch [73/200] batch [5/31] time 0.720 (0.900) data 0.000 (0.176) loss 0.5161 (0.8323) acc 93.7500 (85.0000) lr 1.4399e-03 eta 0:59:24 +epoch [73/200] batch [10/31] time 0.727 (0.807) data 0.000 (0.088) loss 0.7227 (0.8146) acc 71.8750 (81.8750) lr 1.4399e-03 eta 0:53:13 +epoch [73/200] batch [15/31] time 0.702 (0.777) data 0.000 (0.059) loss 0.9736 (0.9338) acc 81.2500 (79.7917) lr 1.4399e-03 eta 0:51:11 +epoch [73/200] batch [20/31] time 0.712 (0.761) data 0.000 (0.044) loss 1.4531 (0.9623) acc 75.0000 (79.2188) lr 1.4399e-03 eta 0:50:06 +epoch [73/200] batch [25/31] time 0.704 (0.751) data 0.000 (0.035) loss 0.7583 (0.9761) acc 78.1250 (78.7500) lr 1.4399e-03 eta 0:49:19 +epoch [73/200] batch [30/31] time 0.713 (0.744) data 0.000 (0.030) loss 0.6978 (0.9634) acc 75.0000 (78.5417) lr 1.4399e-03 eta 0:48:51 +epoch [74/200] batch [5/31] time 0.713 (0.897) data 0.000 (0.170) loss 1.2549 (1.1246) acc 68.7500 (73.7500) lr 1.4258e-03 eta 0:58:46 +epoch [74/200] batch [10/31] time 0.707 (0.807) data 0.000 (0.085) loss 0.2329 (1.0755) acc 100.0000 (75.3125) lr 1.4258e-03 eta 0:52:49 +epoch [74/200] batch [15/31] time 0.714 (0.786) data 0.000 (0.057) loss 0.5825 (1.0024) acc 84.3750 (76.6667) lr 1.4258e-03 eta 0:51:22 +epoch [74/200] batch [20/31] time 0.720 (0.767) data 0.000 (0.043) loss 1.3125 (1.0258) acc 78.1250 (76.5625) lr 1.4258e-03 eta 0:50:05 +epoch [74/200] batch [25/31] time 0.708 (0.756) data 0.000 (0.034) loss 1.0244 (1.0354) acc 75.0000 (76.7500) lr 1.4258e-03 eta 0:49:16 +epoch [74/200] batch [30/31] time 0.727 (0.749) data 0.000 (0.029) loss 1.3291 (1.0099) acc 59.3750 (76.7708) lr 1.4258e-03 eta 0:48:48 +epoch [75/200] batch [5/31] time 0.705 (0.874) data 0.000 (0.156) loss 0.6196 (0.8568) acc 78.1250 (77.5000) lr 1.4115e-03 eta 0:56:48 +epoch [75/200] batch [10/31] time 0.720 (0.795) data 0.000 (0.078) loss 0.6465 (0.8503) acc 84.3750 (79.6875) lr 1.4115e-03 eta 0:51:36 +epoch [75/200] batch [15/31] time 0.721 (0.769) data 0.000 (0.052) loss 1.1543 (0.9968) acc 81.2500 (77.2917) lr 1.4115e-03 eta 0:49:51 +epoch [75/200] batch [20/31] time 0.721 (0.755) data 0.000 (0.039) loss 0.7700 (1.0176) acc 81.2500 (76.7188) lr 1.4115e-03 eta 0:48:54 +epoch [75/200] batch [25/31] time 0.713 (0.747) data 0.000 (0.032) loss 1.1064 (1.0460) acc 81.2500 (76.0000) lr 1.4115e-03 eta 0:48:19 +epoch [75/200] batch [30/31] time 0.712 (0.741) data 0.000 (0.026) loss 0.8418 (1.0910) acc 71.8750 (74.5833) lr 1.4115e-03 eta 0:47:52 +epoch [76/200] batch [5/31] time 0.708 (0.892) data 0.000 (0.173) loss 0.7876 (1.0881) acc 78.1250 (75.0000) lr 1.3971e-03 eta 0:57:31 +epoch [76/200] batch [10/31] time 0.708 (0.800) data 0.000 (0.087) loss 1.3311 (1.0258) acc 68.7500 (75.9375) lr 1.3971e-03 eta 0:51:33 +epoch [76/200] batch [15/31] time 0.709 (0.771) data 0.000 (0.058) loss 1.1396 (0.9766) acc 78.1250 (77.9167) lr 1.3971e-03 eta 0:49:34 +epoch [76/200] batch [20/31] time 0.715 (0.755) data 0.000 (0.044) loss 0.7510 (0.9655) acc 87.5000 (77.9688) lr 1.3971e-03 eta 0:48:31 +epoch [76/200] batch [25/31] time 0.719 (0.747) data 0.000 (0.035) loss 1.0371 (0.9688) acc 71.8750 (77.3750) lr 1.3971e-03 eta 0:47:55 +epoch [76/200] batch [30/31] time 0.708 (0.741) data 0.000 (0.029) loss 1.0518 (0.9868) acc 78.1250 (77.1875) lr 1.3971e-03 eta 0:47:28 +epoch [77/200] batch [5/31] time 0.715 (0.942) data 0.000 (0.191) loss 1.0508 (0.7029) acc 71.8750 (81.8750) lr 1.3827e-03 eta 1:00:17 +epoch [77/200] batch [10/31] time 0.706 (0.827) data 0.000 (0.096) loss 0.7036 (0.8145) acc 90.6250 (80.3125) lr 1.3827e-03 eta 0:52:51 +epoch [77/200] batch [15/31] time 0.713 (0.790) data 0.000 (0.064) loss 1.0791 (0.8166) acc 81.2500 (80.0000) lr 1.3827e-03 eta 0:50:24 +epoch [77/200] batch [20/31] time 0.714 (0.771) data 0.000 (0.048) loss 1.1123 (0.8587) acc 68.7500 (78.4375) lr 1.3827e-03 eta 0:49:06 +epoch [77/200] batch [25/31] time 0.713 (0.759) data 0.000 (0.038) loss 1.4727 (0.9244) acc 65.6250 (77.6250) lr 1.3827e-03 eta 0:48:19 +epoch [77/200] batch [30/31] time 0.708 (0.751) data 0.000 (0.032) loss 1.3916 (0.9429) acc 62.5000 (77.3958) lr 1.3827e-03 eta 0:47:45 +epoch [78/200] batch [5/31] time 0.706 (0.894) data 0.000 (0.174) loss 0.7583 (0.9369) acc 81.2500 (78.1250) lr 1.3681e-03 eta 0:56:45 +epoch [78/200] batch [10/31] time 0.714 (0.804) data 0.000 (0.087) loss 1.3145 (1.0224) acc 68.7500 (75.6250) lr 1.3681e-03 eta 0:50:55 +epoch [78/200] batch [15/31] time 0.726 (0.773) data 0.000 (0.058) loss 0.8462 (0.9478) acc 81.2500 (76.6667) lr 1.3681e-03 eta 0:48:57 +epoch [78/200] batch [20/31] time 0.705 (0.757) data 0.000 (0.044) loss 1.1592 (0.9911) acc 78.1250 (76.2500) lr 1.3681e-03 eta 0:47:50 +epoch [78/200] batch [25/31] time 0.710 (0.753) data 0.000 (0.035) loss 1.3291 (1.0328) acc 78.1250 (76.0000) lr 1.3681e-03 eta 0:47:32 +epoch [78/200] batch [30/31] time 0.720 (0.747) data 0.000 (0.029) loss 0.9976 (1.0157) acc 71.8750 (76.3542) lr 1.3681e-03 eta 0:47:05 +epoch [79/200] batch [5/31] time 0.708 (0.888) data 0.000 (0.165) loss 1.0400 (1.1393) acc 78.1250 (71.8750) lr 1.3535e-03 eta 0:55:53 +epoch [79/200] batch [10/31] time 0.708 (0.800) data 0.000 (0.083) loss 1.0215 (1.0544) acc 78.1250 (75.3125) lr 1.3535e-03 eta 0:50:19 +epoch [79/200] batch [15/31] time 0.718 (0.772) data 0.000 (0.055) loss 0.7002 (1.0129) acc 90.6250 (77.2917) lr 1.3535e-03 eta 0:48:29 +epoch [79/200] batch [20/31] time 0.706 (0.756) data 0.000 (0.042) loss 0.5142 (0.9471) acc 87.5000 (78.7500) lr 1.3535e-03 eta 0:47:25 +epoch [79/200] batch [25/31] time 0.714 (0.747) data 0.000 (0.033) loss 1.4814 (0.9611) acc 59.3750 (78.6250) lr 1.3535e-03 eta 0:46:47 +epoch [79/200] batch [30/31] time 0.723 (0.741) data 0.000 (0.028) loss 0.7173 (0.9604) acc 81.2500 (78.8542) lr 1.3535e-03 eta 0:46:21 +epoch [80/200] batch [5/31] time 0.717 (0.895) data 0.000 (0.174) loss 0.7104 (1.0014) acc 78.1250 (80.0000) lr 1.3387e-03 eta 0:55:51 +epoch [80/200] batch [10/31] time 0.712 (0.803) data 0.000 (0.087) loss 0.7173 (1.0364) acc 81.2500 (79.3750) lr 1.3387e-03 eta 0:50:04 +epoch [80/200] batch [15/31] time 0.711 (0.772) data 0.000 (0.058) loss 0.8887 (1.0513) acc 87.5000 (78.3333) lr 1.3387e-03 eta 0:48:04 +epoch [80/200] batch [20/31] time 0.704 (0.763) data 0.000 (0.044) loss 0.9673 (1.0645) acc 81.2500 (77.9688) lr 1.3387e-03 eta 0:47:27 +epoch [80/200] batch [25/31] time 0.723 (0.753) data 0.000 (0.035) loss 1.6162 (1.0713) acc 65.6250 (77.5000) lr 1.3387e-03 eta 0:46:44 +epoch [80/200] batch [30/31] time 0.708 (0.746) data 0.000 (0.029) loss 0.9268 (1.0544) acc 75.0000 (77.0833) lr 1.3387e-03 eta 0:46:14 +epoch [81/200] batch [5/31] time 0.706 (0.899) data 0.000 (0.182) loss 1.1182 (0.9557) acc 75.0000 (79.3750) lr 1.3239e-03 eta 0:55:41 +epoch [81/200] batch [10/31] time 0.706 (0.804) data 0.000 (0.091) loss 0.7979 (0.9204) acc 75.0000 (77.8125) lr 1.3239e-03 eta 0:49:44 +epoch [81/200] batch [15/31] time 0.707 (0.774) data 0.000 (0.061) loss 0.7969 (0.9347) acc 81.2500 (77.7083) lr 1.3239e-03 eta 0:47:45 +epoch [81/200] batch [20/31] time 0.709 (0.758) data 0.000 (0.046) loss 0.9155 (0.9375) acc 78.1250 (78.2812) lr 1.3239e-03 eta 0:46:45 +epoch [81/200] batch [25/31] time 0.709 (0.751) data 0.001 (0.037) loss 0.8384 (0.9549) acc 84.3750 (78.1250) lr 1.3239e-03 eta 0:46:14 +epoch [81/200] batch [30/31] time 0.722 (0.745) data 0.000 (0.031) loss 0.9546 (0.9289) acc 68.7500 (78.6458) lr 1.3239e-03 eta 0:45:47 +epoch [82/200] batch [5/31] time 0.715 (0.898) data 0.000 (0.173) loss 1.1553 (0.9832) acc 65.6250 (77.5000) lr 1.3090e-03 eta 0:55:08 +epoch [82/200] batch [10/31] time 0.712 (0.808) data 0.000 (0.087) loss 0.8877 (0.9383) acc 75.0000 (78.1250) lr 1.3090e-03 eta 0:49:34 +epoch [82/200] batch [15/31] time 0.728 (0.778) data 0.000 (0.058) loss 0.6562 (0.9240) acc 75.0000 (77.9167) lr 1.3090e-03 eta 0:47:37 +epoch [82/200] batch [20/31] time 0.711 (0.761) data 0.000 (0.044) loss 0.6377 (0.9449) acc 78.1250 (77.6562) lr 1.3090e-03 eta 0:46:33 +epoch [82/200] batch [25/31] time 0.721 (0.751) data 0.000 (0.035) loss 1.4561 (0.9799) acc 78.1250 (77.0000) lr 1.3090e-03 eta 0:45:53 +epoch [82/200] batch [30/31] time 0.738 (0.746) data 0.000 (0.029) loss 1.0020 (0.9614) acc 71.8750 (76.9792) lr 1.3090e-03 eta 0:45:28 +epoch [83/200] batch [5/31] time 0.721 (0.891) data 0.000 (0.170) loss 0.9346 (0.8575) acc 78.1250 (78.1250) lr 1.2940e-03 eta 0:54:13 +epoch [83/200] batch [10/31] time 0.713 (0.818) data 0.000 (0.085) loss 0.8198 (0.9346) acc 84.3750 (77.5000) lr 1.2940e-03 eta 0:49:43 +epoch [83/200] batch [15/31] time 0.707 (0.782) data 0.001 (0.057) loss 0.6382 (0.9565) acc 84.3750 (78.1250) lr 1.2940e-03 eta 0:47:29 +epoch [83/200] batch [20/31] time 0.706 (0.765) data 0.000 (0.043) loss 0.7314 (0.9486) acc 75.0000 (77.9688) lr 1.2940e-03 eta 0:46:21 +epoch [83/200] batch [25/31] time 0.722 (0.754) data 0.000 (0.034) loss 1.3633 (1.0016) acc 71.8750 (77.7500) lr 1.2940e-03 eta 0:45:38 +epoch [83/200] batch [30/31] time 0.703 (0.746) data 0.000 (0.029) loss 1.1787 (1.0103) acc 78.1250 (77.0833) lr 1.2940e-03 eta 0:45:07 +epoch [84/200] batch [5/31] time 0.711 (0.912) data 0.000 (0.186) loss 0.6895 (0.8202) acc 87.5000 (83.7500) lr 1.2790e-03 eta 0:55:02 +epoch [84/200] batch [10/31] time 0.731 (0.816) data 0.000 (0.093) loss 1.5078 (0.9271) acc 62.5000 (79.6875) lr 1.2790e-03 eta 0:49:10 +epoch [84/200] batch [15/31] time 0.712 (0.782) data 0.000 (0.062) loss 0.6953 (0.9381) acc 75.0000 (77.9167) lr 1.2790e-03 eta 0:47:04 +epoch [84/200] batch [20/31] time 0.707 (0.766) data 0.000 (0.047) loss 1.1250 (0.9377) acc 78.1250 (78.2812) lr 1.2790e-03 eta 0:46:01 +epoch [84/200] batch [25/31] time 0.710 (0.754) data 0.000 (0.037) loss 0.9502 (0.9089) acc 81.2500 (79.0000) lr 1.2790e-03 eta 0:45:16 +epoch [84/200] batch [30/31] time 0.708 (0.746) data 0.000 (0.031) loss 0.9175 (0.9530) acc 81.2500 (78.2292) lr 1.2790e-03 eta 0:44:44 +epoch [85/200] batch [5/31] time 0.709 (0.883) data 0.000 (0.163) loss 0.7129 (1.0091) acc 81.2500 (75.0000) lr 1.2639e-03 eta 0:52:52 +epoch [85/200] batch [10/31] time 0.710 (0.796) data 0.000 (0.082) loss 0.5991 (0.8801) acc 87.5000 (78.1250) lr 1.2639e-03 eta 0:47:36 +epoch [85/200] batch [15/31] time 0.716 (0.769) data 0.000 (0.055) loss 0.8472 (0.8798) acc 75.0000 (77.5000) lr 1.2639e-03 eta 0:45:52 +epoch [85/200] batch [20/31] time 0.710 (0.753) data 0.000 (0.041) loss 0.6431 (0.8880) acc 78.1250 (78.4375) lr 1.2639e-03 eta 0:44:54 +epoch [85/200] batch [25/31] time 0.710 (0.744) data 0.000 (0.033) loss 0.8203 (0.9190) acc 75.0000 (77.8750) lr 1.2639e-03 eta 0:44:17 +epoch [85/200] batch [30/31] time 0.705 (0.739) data 0.000 (0.027) loss 1.4297 (0.9353) acc 65.6250 (77.8125) lr 1.2639e-03 eta 0:43:54 +epoch [86/200] batch [5/31] time 0.711 (0.888) data 0.000 (0.161) loss 0.7808 (0.7039) acc 81.2500 (83.7500) lr 1.2487e-03 eta 0:52:40 +epoch [86/200] batch [10/31] time 0.710 (0.799) data 0.000 (0.081) loss 0.9614 (0.8322) acc 71.8750 (80.0000) lr 1.2487e-03 eta 0:47:21 +epoch [86/200] batch [15/31] time 0.706 (0.770) data 0.000 (0.054) loss 0.4751 (0.8955) acc 87.5000 (78.7500) lr 1.2487e-03 eta 0:45:32 +epoch [86/200] batch [20/31] time 0.703 (0.755) data 0.000 (0.041) loss 1.7432 (0.9620) acc 65.6250 (77.8125) lr 1.2487e-03 eta 0:44:37 +epoch [86/200] batch [25/31] time 0.706 (0.746) data 0.000 (0.032) loss 1.0117 (0.9439) acc 78.1250 (78.7500) lr 1.2487e-03 eta 0:44:00 +epoch [86/200] batch [30/31] time 0.708 (0.740) data 0.000 (0.027) loss 1.2422 (0.9446) acc 75.0000 (78.7500) lr 1.2487e-03 eta 0:43:36 +epoch [87/200] batch [5/31] time 0.707 (0.888) data 0.000 (0.169) loss 1.2178 (0.9286) acc 65.6250 (75.0000) lr 1.2334e-03 eta 0:52:14 +epoch [87/200] batch [10/31] time 0.715 (0.802) data 0.000 (0.085) loss 1.0234 (0.9604) acc 62.5000 (73.7500) lr 1.2334e-03 eta 0:47:06 +epoch [87/200] batch [15/31] time 0.707 (0.771) data 0.000 (0.056) loss 0.5845 (0.9503) acc 84.3750 (75.4167) lr 1.2334e-03 eta 0:45:12 +epoch [87/200] batch [20/31] time 0.701 (0.755) data 0.000 (0.042) loss 1.0576 (0.9519) acc 75.0000 (75.7812) lr 1.2334e-03 eta 0:44:13 +epoch [87/200] batch [25/31] time 0.712 (0.745) data 0.000 (0.034) loss 0.5737 (0.9331) acc 87.5000 (76.3750) lr 1.2334e-03 eta 0:43:35 +epoch [87/200] batch [30/31] time 0.710 (0.740) data 0.000 (0.028) loss 1.2324 (0.9372) acc 71.8750 (76.7708) lr 1.2334e-03 eta 0:43:14 +epoch [88/200] batch [5/31] time 0.722 (0.886) data 0.000 (0.167) loss 0.7339 (0.9051) acc 87.5000 (80.6250) lr 1.2181e-03 eta 0:51:39 +epoch [88/200] batch [10/31] time 0.710 (0.799) data 0.000 (0.084) loss 0.5371 (0.8134) acc 90.6250 (82.5000) lr 1.2181e-03 eta 0:46:31 +epoch [88/200] batch [15/31] time 0.707 (0.779) data 0.000 (0.056) loss 1.2568 (0.9527) acc 68.7500 (79.1667) lr 1.2181e-03 eta 0:45:18 +epoch [88/200] batch [20/31] time 0.727 (0.764) data 0.000 (0.042) loss 0.7666 (0.9248) acc 90.6250 (80.4688) lr 1.2181e-03 eta 0:44:20 +epoch [88/200] batch [25/31] time 0.718 (0.754) data 0.000 (0.034) loss 0.7183 (0.8892) acc 84.3750 (80.7500) lr 1.2181e-03 eta 0:43:41 +epoch [88/200] batch [30/31] time 0.704 (0.746) data 0.000 (0.028) loss 0.7173 (0.9128) acc 75.0000 (79.6875) lr 1.2181e-03 eta 0:43:11 +epoch [89/200] batch [5/31] time 0.706 (0.898) data 0.000 (0.178) loss 0.9590 (1.0636) acc 78.1250 (75.6250) lr 1.2028e-03 eta 0:51:52 +epoch [89/200] batch [10/31] time 0.707 (0.806) data 0.000 (0.089) loss 0.5825 (0.9108) acc 81.2500 (79.0625) lr 1.2028e-03 eta 0:46:31 +epoch [89/200] batch [15/31] time 0.719 (0.775) data 0.001 (0.060) loss 1.0771 (0.8991) acc 78.1250 (79.1667) lr 1.2028e-03 eta 0:44:38 +epoch [89/200] batch [20/31] time 0.708 (0.758) data 0.000 (0.045) loss 1.6660 (0.9724) acc 75.0000 (78.1250) lr 1.2028e-03 eta 0:43:36 +epoch [89/200] batch [25/31] time 0.724 (0.748) data 0.000 (0.036) loss 1.0518 (0.9665) acc 78.1250 (77.8750) lr 1.2028e-03 eta 0:42:58 +epoch [89/200] batch [30/31] time 0.711 (0.741) data 0.000 (0.030) loss 0.7246 (0.9435) acc 78.1250 (78.2292) lr 1.2028e-03 eta 0:42:31 +epoch [90/200] batch [5/31] time 0.707 (0.916) data 0.000 (0.168) loss 1.0723 (0.9240) acc 84.3750 (83.1250) lr 1.1874e-03 eta 0:52:26 +epoch [90/200] batch [10/31] time 0.709 (0.815) data 0.000 (0.084) loss 0.6973 (0.7908) acc 71.8750 (83.1250) lr 1.1874e-03 eta 0:46:36 +epoch [90/200] batch [15/31] time 0.717 (0.784) data 0.000 (0.056) loss 1.0332 (0.7796) acc 81.2500 (83.3333) lr 1.1874e-03 eta 0:44:45 +epoch [90/200] batch [20/31] time 0.708 (0.765) data 0.000 (0.042) loss 0.7681 (0.7881) acc 78.1250 (82.3438) lr 1.1874e-03 eta 0:43:38 +epoch [90/200] batch [25/31] time 0.708 (0.754) data 0.000 (0.034) loss 0.9888 (0.8063) acc 84.3750 (81.6250) lr 1.1874e-03 eta 0:42:55 +epoch [90/200] batch [30/31] time 0.707 (0.747) data 0.000 (0.028) loss 1.4307 (0.8565) acc 71.8750 (80.2083) lr 1.1874e-03 eta 0:42:27 +epoch [91/200] batch [5/31] time 0.710 (0.929) data 0.000 (0.210) loss 1.4600 (1.0439) acc 71.8750 (78.1250) lr 1.1719e-03 eta 0:52:41 +epoch [91/200] batch [10/31] time 0.710 (0.820) data 0.000 (0.105) loss 1.0332 (0.9629) acc 78.1250 (78.4375) lr 1.1719e-03 eta 0:46:26 +epoch [91/200] batch [15/31] time 0.716 (0.785) data 0.000 (0.070) loss 0.7954 (0.9773) acc 71.8750 (76.2500) lr 1.1719e-03 eta 0:44:23 +epoch [91/200] batch [20/31] time 0.712 (0.766) data 0.000 (0.053) loss 0.9023 (0.9696) acc 78.1250 (76.4062) lr 1.1719e-03 eta 0:43:15 +epoch [91/200] batch [25/31] time 0.712 (0.754) data 0.000 (0.042) loss 1.0752 (0.9702) acc 78.1250 (77.0000) lr 1.1719e-03 eta 0:42:33 +epoch [91/200] batch [30/31] time 0.710 (0.747) data 0.000 (0.035) loss 0.6777 (0.9331) acc 78.1250 (77.5000) lr 1.1719e-03 eta 0:42:04 +epoch [92/200] batch [5/31] time 0.722 (0.898) data 0.000 (0.167) loss 0.8394 (0.6870) acc 87.5000 (83.7500) lr 1.1564e-03 eta 0:50:30 +epoch [92/200] batch [10/31] time 0.703 (0.824) data 0.000 (0.083) loss 0.6050 (0.7031) acc 84.3750 (84.0625) lr 1.1564e-03 eta 0:46:16 +epoch [92/200] batch [15/31] time 0.704 (0.787) data 0.000 (0.056) loss 0.6040 (0.7618) acc 87.5000 (83.5417) lr 1.1564e-03 eta 0:44:07 +epoch [92/200] batch [20/31] time 0.710 (0.768) data 0.000 (0.042) loss 0.4604 (0.7861) acc 87.5000 (81.4062) lr 1.1564e-03 eta 0:42:59 +epoch [92/200] batch [25/31] time 0.721 (0.758) data 0.000 (0.034) loss 0.5938 (0.7812) acc 84.3750 (81.2500) lr 1.1564e-03 eta 0:42:21 +epoch [92/200] batch [30/31] time 0.705 (0.749) data 0.000 (0.028) loss 0.7612 (0.7706) acc 84.3750 (81.7708) lr 1.1564e-03 eta 0:41:48 +epoch [93/200] batch [5/31] time 0.719 (0.906) data 0.000 (0.175) loss 0.8506 (0.9299) acc 84.3750 (76.8750) lr 1.1409e-03 eta 0:50:28 +epoch [93/200] batch [10/31] time 0.709 (0.809) data 0.000 (0.088) loss 1.3887 (0.9624) acc 75.0000 (77.1875) lr 1.1409e-03 eta 0:44:58 +epoch [93/200] batch [15/31] time 0.709 (0.777) data 0.000 (0.058) loss 0.6226 (0.9381) acc 81.2500 (76.0417) lr 1.1409e-03 eta 0:43:09 +epoch [93/200] batch [20/31] time 0.706 (0.760) data 0.000 (0.044) loss 0.9365 (0.8738) acc 90.6250 (79.0625) lr 1.1409e-03 eta 0:42:08 +epoch [93/200] batch [25/31] time 0.708 (0.750) data 0.000 (0.035) loss 0.9863 (0.8418) acc 84.3750 (80.0000) lr 1.1409e-03 eta 0:41:33 +epoch [93/200] batch [30/31] time 0.711 (0.744) data 0.000 (0.029) loss 0.8394 (0.8673) acc 68.7500 (79.6875) lr 1.1409e-03 eta 0:41:08 +epoch [94/200] batch [5/31] time 0.714 (0.916) data 0.000 (0.187) loss 0.9609 (0.9039) acc 81.2500 (81.8750) lr 1.1253e-03 eta 0:50:34 +epoch [94/200] batch [10/31] time 0.713 (0.816) data 0.000 (0.094) loss 0.8853 (0.8604) acc 75.0000 (79.6875) lr 1.1253e-03 eta 0:44:59 +epoch [94/200] batch [15/31] time 0.710 (0.783) data 0.000 (0.062) loss 1.2764 (0.9278) acc 68.7500 (79.3750) lr 1.1253e-03 eta 0:43:04 +epoch [94/200] batch [20/31] time 0.711 (0.765) data 0.000 (0.047) loss 0.5786 (0.8905) acc 87.5000 (79.6875) lr 1.1253e-03 eta 0:42:01 +epoch [94/200] batch [25/31] time 0.711 (0.755) data 0.000 (0.038) loss 0.8364 (0.8548) acc 81.2500 (80.3750) lr 1.1253e-03 eta 0:41:26 +epoch [94/200] batch [30/31] time 0.709 (0.748) data 0.000 (0.031) loss 0.8833 (0.8811) acc 87.5000 (80.2083) lr 1.1253e-03 eta 0:40:57 +epoch [95/200] batch [5/31] time 0.733 (0.900) data 0.000 (0.166) loss 1.1836 (0.8109) acc 75.0000 (81.8750) lr 1.1097e-03 eta 0:49:12 +epoch [95/200] batch [10/31] time 0.724 (0.809) data 0.000 (0.083) loss 0.6396 (0.9524) acc 81.2500 (79.3750) lr 1.1097e-03 eta 0:44:08 +epoch [95/200] batch [15/31] time 0.714 (0.777) data 0.000 (0.056) loss 1.0820 (0.9198) acc 87.5000 (79.3750) lr 1.1097e-03 eta 0:42:21 +epoch [95/200] batch [20/31] time 0.717 (0.760) data 0.000 (0.042) loss 0.5801 (0.8782) acc 87.5000 (80.4688) lr 1.1097e-03 eta 0:41:22 +epoch [95/200] batch [25/31] time 0.716 (0.750) data 0.000 (0.034) loss 0.8643 (0.9190) acc 84.3750 (79.7500) lr 1.1097e-03 eta 0:40:46 +epoch [95/200] batch [30/31] time 0.710 (0.743) data 0.000 (0.028) loss 1.0146 (0.9143) acc 75.0000 (79.3750) lr 1.1097e-03 eta 0:40:20 +epoch [96/200] batch [5/31] time 0.707 (0.929) data 0.000 (0.214) loss 0.9648 (0.9265) acc 84.3750 (80.0000) lr 1.0941e-03 eta 0:50:18 +epoch [96/200] batch [10/31] time 0.711 (0.820) data 0.000 (0.107) loss 0.7383 (0.8647) acc 84.3750 (79.3750) lr 1.0941e-03 eta 0:44:19 +epoch [96/200] batch [15/31] time 0.705 (0.782) data 0.000 (0.072) loss 0.4597 (0.8006) acc 90.6250 (81.4583) lr 1.0941e-03 eta 0:42:13 +epoch [96/200] batch [20/31] time 0.725 (0.764) data 0.000 (0.054) loss 1.3975 (0.8767) acc 71.8750 (79.6875) lr 1.0941e-03 eta 0:41:11 +epoch [96/200] batch [25/31] time 0.708 (0.753) data 0.000 (0.043) loss 1.5430 (0.9144) acc 68.7500 (79.8750) lr 1.0941e-03 eta 0:40:31 +epoch [96/200] batch [30/31] time 0.706 (0.746) data 0.001 (0.036) loss 1.7412 (0.9225) acc 65.6250 (79.6875) lr 1.0941e-03 eta 0:40:05 +epoch [97/200] batch [5/31] time 0.714 (0.900) data 0.000 (0.181) loss 1.0186 (0.6266) acc 87.5000 (86.2500) lr 1.0785e-03 eta 0:48:16 +epoch [97/200] batch [10/31] time 0.724 (0.808) data 0.000 (0.091) loss 0.9199 (0.6778) acc 81.2500 (84.3750) lr 1.0785e-03 eta 0:43:15 +epoch [97/200] batch [15/31] time 0.713 (0.776) data 0.000 (0.061) loss 0.9219 (0.7371) acc 84.3750 (83.3333) lr 1.0785e-03 eta 0:41:31 +epoch [97/200] batch [20/31] time 0.720 (0.760) data 0.000 (0.045) loss 1.0400 (0.8320) acc 65.6250 (80.6250) lr 1.0785e-03 eta 0:40:36 +epoch [97/200] batch [25/31] time 0.707 (0.751) data 0.000 (0.036) loss 1.1992 (0.8542) acc 81.2500 (80.3750) lr 1.0785e-03 eta 0:40:01 +epoch [97/200] batch [30/31] time 0.708 (0.744) data 0.000 (0.030) loss 0.6211 (0.8543) acc 81.2500 (80.2083) lr 1.0785e-03 eta 0:39:35 +epoch [98/200] batch [5/31] time 0.717 (0.903) data 0.000 (0.182) loss 0.6309 (0.7431) acc 87.5000 (81.8750) lr 1.0628e-03 eta 0:47:59 +epoch [98/200] batch [10/31] time 0.703 (0.825) data 0.000 (0.091) loss 0.6821 (0.8510) acc 84.3750 (80.6250) lr 1.0628e-03 eta 0:43:44 +epoch [98/200] batch [15/31] time 0.712 (0.788) data 0.000 (0.061) loss 0.7188 (0.8307) acc 84.3750 (81.4583) lr 1.0628e-03 eta 0:41:45 +epoch [98/200] batch [20/31] time 0.708 (0.769) data 0.000 (0.046) loss 0.5820 (0.8222) acc 87.5000 (80.9375) lr 1.0628e-03 eta 0:40:40 +epoch [98/200] batch [25/31] time 0.715 (0.758) data 0.000 (0.037) loss 1.2939 (0.8435) acc 81.2500 (81.0000) lr 1.0628e-03 eta 0:40:02 +epoch [98/200] batch [30/31] time 0.707 (0.751) data 0.000 (0.031) loss 0.6421 (0.8497) acc 84.3750 (80.7292) lr 1.0628e-03 eta 0:39:34 +epoch [99/200] batch [5/31] time 0.717 (0.895) data 0.000 (0.169) loss 0.8521 (0.8138) acc 78.1250 (83.1250) lr 1.0471e-03 eta 0:47:04 +epoch [99/200] batch [10/31] time 0.711 (0.804) data 0.000 (0.085) loss 0.7637 (0.8609) acc 78.1250 (81.8750) lr 1.0471e-03 eta 0:42:15 +epoch [99/200] batch [15/31] time 0.728 (0.775) data 0.000 (0.057) loss 1.0791 (0.9205) acc 65.6250 (79.5833) lr 1.0471e-03 eta 0:40:37 +epoch [99/200] batch [20/31] time 0.709 (0.758) data 0.000 (0.043) loss 0.7505 (0.9364) acc 75.0000 (79.2188) lr 1.0471e-03 eta 0:39:40 +epoch [99/200] batch [25/31] time 0.706 (0.747) data 0.000 (0.034) loss 1.5332 (0.9791) acc 62.5000 (77.7500) lr 1.0471e-03 eta 0:39:04 +epoch [99/200] batch [30/31] time 0.849 (0.745) data 0.000 (0.029) loss 0.9111 (0.9329) acc 78.1250 (78.8542) lr 1.0471e-03 eta 0:38:54 +epoch [100/200] batch [5/31] time 0.706 (0.914) data 0.000 (0.186) loss 1.0117 (0.7892) acc 78.1250 (81.8750) lr 1.0314e-03 eta 0:47:37 +epoch [100/200] batch [10/31] time 0.709 (0.812) data 0.000 (0.093) loss 1.1885 (0.7762) acc 78.1250 (81.2500) lr 1.0314e-03 eta 0:42:14 +epoch [100/200] batch [15/31] time 0.725 (0.780) data 0.001 (0.062) loss 1.1357 (0.7544) acc 78.1250 (81.6667) lr 1.0314e-03 eta 0:40:29 +epoch [100/200] batch [20/31] time 0.709 (0.763) data 0.000 (0.047) loss 0.4072 (0.7890) acc 87.5000 (80.0000) lr 1.0314e-03 eta 0:39:33 +epoch [100/200] batch [25/31] time 0.709 (0.753) data 0.001 (0.038) loss 1.3223 (0.8183) acc 71.8750 (80.0000) lr 1.0314e-03 eta 0:39:00 +epoch [100/200] batch [30/31] time 0.704 (0.747) data 0.000 (0.031) loss 1.0410 (0.8448) acc 75.0000 (79.2708) lr 1.0314e-03 eta 0:38:35 +epoch [101/200] batch [5/31] time 0.719 (0.878) data 0.000 (0.154) loss 0.7573 (0.8078) acc 84.3750 (83.1250) lr 1.0157e-03 eta 0:45:16 +epoch [101/200] batch [10/31] time 0.715 (0.795) data 0.001 (0.077) loss 0.4285 (0.7540) acc 93.7500 (83.4375) lr 1.0157e-03 eta 0:40:57 +epoch [101/200] batch [15/31] time 0.710 (0.768) data 0.000 (0.052) loss 1.0088 (0.8063) acc 81.2500 (81.6667) lr 1.0157e-03 eta 0:39:29 +epoch [101/200] batch [20/31] time 0.723 (0.754) data 0.000 (0.039) loss 1.5850 (0.8775) acc 65.6250 (79.6875) lr 1.0157e-03 eta 0:38:42 +epoch [101/200] batch [25/31] time 0.710 (0.746) data 0.000 (0.031) loss 1.5967 (0.8653) acc 65.6250 (80.3750) lr 1.0157e-03 eta 0:38:13 +epoch [101/200] batch [30/31] time 0.713 (0.741) data 0.000 (0.026) loss 0.6963 (0.8566) acc 84.3750 (80.5208) lr 1.0157e-03 eta 0:37:54 +epoch [102/200] batch [5/31] time 0.711 (0.927) data 0.001 (0.175) loss 0.7778 (0.9541) acc 90.6250 (80.6250) lr 1.0000e-03 eta 0:47:20 +epoch [102/200] batch [10/31] time 0.709 (0.821) data 0.000 (0.088) loss 0.8091 (0.9443) acc 75.0000 (78.7500) lr 1.0000e-03 eta 0:41:52 +epoch [102/200] batch [15/31] time 0.710 (0.785) data 0.000 (0.059) loss 0.9014 (0.9318) acc 78.1250 (78.9583) lr 1.0000e-03 eta 0:39:56 +epoch [102/200] batch [20/31] time 0.713 (0.767) data 0.000 (0.044) loss 1.1113 (0.9218) acc 65.6250 (78.2812) lr 1.0000e-03 eta 0:38:57 +epoch [102/200] batch [25/31] time 0.716 (0.757) data 0.000 (0.035) loss 0.6196 (0.8860) acc 84.3750 (79.0000) lr 1.0000e-03 eta 0:38:22 +epoch [102/200] batch [30/31] time 0.720 (0.750) data 0.000 (0.029) loss 1.2041 (0.8855) acc 75.0000 (79.0625) lr 1.0000e-03 eta 0:37:57 +epoch [103/200] batch [5/31] time 0.713 (0.891) data 0.000 (0.169) loss 0.5688 (0.7270) acc 87.5000 (85.6250) lr 9.8429e-04 eta 0:45:01 +epoch [103/200] batch [10/31] time 0.711 (0.803) data 0.001 (0.085) loss 0.6382 (0.7063) acc 81.2500 (85.0000) lr 9.8429e-04 eta 0:40:31 +epoch [103/200] batch [15/31] time 0.735 (0.776) data 0.000 (0.057) loss 0.7388 (0.7374) acc 84.3750 (83.3333) lr 9.8429e-04 eta 0:39:06 +epoch [103/200] batch [20/31] time 0.707 (0.761) data 0.000 (0.043) loss 0.8345 (0.7488) acc 81.2500 (82.8125) lr 9.8429e-04 eta 0:38:15 +epoch [103/200] batch [25/31] time 0.714 (0.751) data 0.000 (0.034) loss 1.1162 (0.8013) acc 75.0000 (81.5000) lr 9.8429e-04 eta 0:37:43 +epoch [103/200] batch [30/31] time 0.707 (0.745) data 0.000 (0.028) loss 0.5581 (0.7797) acc 87.5000 (82.0833) lr 9.8429e-04 eta 0:37:21 +epoch [104/200] batch [5/31] time 0.720 (0.885) data 0.000 (0.166) loss 0.7500 (0.6031) acc 87.5000 (88.1250) lr 9.6859e-04 eta 0:44:17 +epoch [104/200] batch [10/31] time 0.705 (0.799) data 0.000 (0.083) loss 0.8369 (0.7243) acc 81.2500 (84.6875) lr 9.6859e-04 eta 0:39:55 +epoch [104/200] batch [15/31] time 0.703 (0.769) data 0.000 (0.056) loss 1.0293 (0.7636) acc 84.3750 (83.9583) lr 9.6859e-04 eta 0:38:20 +epoch [104/200] batch [20/31] time 0.716 (0.753) data 0.000 (0.042) loss 0.7529 (0.8257) acc 78.1250 (82.8125) lr 9.6859e-04 eta 0:37:30 +epoch [104/200] batch [25/31] time 0.705 (0.745) data 0.000 (0.033) loss 0.5264 (0.8260) acc 87.5000 (82.5000) lr 9.6859e-04 eta 0:37:01 +epoch [104/200] batch [30/31] time 0.706 (0.740) data 0.000 (0.028) loss 1.0137 (0.8344) acc 81.2500 (82.3958) lr 9.6859e-04 eta 0:36:41 +epoch [105/200] batch [5/31] time 0.710 (0.897) data 0.000 (0.174) loss 0.7500 (0.7664) acc 81.2500 (81.2500) lr 9.5289e-04 eta 0:44:24 +epoch [105/200] batch [10/31] time 0.709 (0.805) data 0.000 (0.087) loss 0.8638 (0.9265) acc 81.2500 (78.7500) lr 9.5289e-04 eta 0:39:48 +epoch [105/200] batch [15/31] time 0.709 (0.774) data 0.000 (0.058) loss 0.6133 (0.9053) acc 84.3750 (79.1667) lr 9.5289e-04 eta 0:38:13 +epoch [105/200] batch [20/31] time 0.724 (0.761) data 0.000 (0.044) loss 0.5645 (0.8427) acc 87.5000 (80.7812) lr 9.5289e-04 eta 0:37:29 +epoch [105/200] batch [25/31] time 0.709 (0.751) data 0.000 (0.035) loss 0.9907 (0.8776) acc 78.1250 (79.5000) lr 9.5289e-04 eta 0:36:55 +epoch [105/200] batch [30/31] time 0.857 (0.750) data 0.000 (0.029) loss 0.5088 (0.8850) acc 90.6250 (79.8958) lr 9.5289e-04 eta 0:36:48 +epoch [106/200] batch [5/31] time 0.707 (0.900) data 0.000 (0.180) loss 0.5957 (0.6646) acc 84.3750 (82.5000) lr 9.3721e-04 eta 0:44:07 +epoch [106/200] batch [10/31] time 0.714 (0.809) data 0.000 (0.090) loss 0.4470 (0.6770) acc 90.6250 (84.6875) lr 9.3721e-04 eta 0:39:34 +epoch [106/200] batch [15/31] time 0.713 (0.779) data 0.000 (0.060) loss 0.5581 (0.7548) acc 87.5000 (83.1250) lr 9.3721e-04 eta 0:38:02 +epoch [106/200] batch [20/31] time 0.711 (0.763) data 0.000 (0.045) loss 0.8584 (0.7351) acc 81.2500 (83.5938) lr 9.3721e-04 eta 0:37:10 +epoch [106/200] batch [25/31] time 0.720 (0.753) data 0.000 (0.036) loss 0.6255 (0.7467) acc 81.2500 (83.6250) lr 9.3721e-04 eta 0:36:38 +epoch [106/200] batch [30/31] time 0.709 (0.746) data 0.000 (0.030) loss 0.4785 (0.7534) acc 87.5000 (83.1250) lr 9.3721e-04 eta 0:36:15 +epoch [107/200] batch [5/31] time 0.711 (0.881) data 0.000 (0.156) loss 0.7920 (1.0671) acc 78.1250 (73.7500) lr 9.2154e-04 eta 0:42:42 +epoch [107/200] batch [10/31] time 0.724 (0.798) data 0.000 (0.078) loss 0.5337 (0.9378) acc 90.6250 (76.5625) lr 9.2154e-04 eta 0:38:36 +epoch [107/200] batch [15/31] time 0.713 (0.769) data 0.000 (0.052) loss 1.0117 (0.9000) acc 68.7500 (77.2917) lr 9.2154e-04 eta 0:37:10 +epoch [107/200] batch [20/31] time 0.717 (0.755) data 0.000 (0.039) loss 1.2285 (0.9104) acc 78.1250 (77.9688) lr 9.2154e-04 eta 0:36:23 +epoch [107/200] batch [25/31] time 0.706 (0.746) data 0.000 (0.031) loss 0.8921 (0.9059) acc 81.2500 (78.3750) lr 9.2154e-04 eta 0:35:55 +epoch [107/200] batch [30/31] time 0.712 (0.741) data 0.000 (0.026) loss 0.6104 (0.8805) acc 84.3750 (79.2708) lr 9.2154e-04 eta 0:35:36 +epoch [108/200] batch [5/31] time 0.706 (0.895) data 0.000 (0.175) loss 0.8398 (0.7985) acc 81.2500 (85.0000) lr 9.0589e-04 eta 0:42:54 +epoch [108/200] batch [10/31] time 0.729 (0.820) data 0.000 (0.087) loss 0.4614 (0.8425) acc 87.5000 (82.5000) lr 9.0589e-04 eta 0:39:15 +epoch [108/200] batch [15/31] time 0.712 (0.785) data 0.000 (0.058) loss 1.1621 (0.8852) acc 71.8750 (81.4583) lr 9.0589e-04 eta 0:37:30 +epoch [108/200] batch [20/31] time 0.708 (0.766) data 0.000 (0.044) loss 0.4790 (0.8121) acc 81.2500 (81.8750) lr 9.0589e-04 eta 0:36:32 +epoch [108/200] batch [25/31] time 0.715 (0.755) data 0.000 (0.035) loss 1.0029 (0.8454) acc 68.7500 (81.0000) lr 9.0589e-04 eta 0:35:58 +epoch [108/200] batch [30/31] time 0.706 (0.748) data 0.000 (0.029) loss 0.9121 (0.8445) acc 78.1250 (80.8333) lr 9.0589e-04 eta 0:35:33 +epoch [109/200] batch [5/31] time 0.714 (0.889) data 0.000 (0.169) loss 0.8359 (0.7940) acc 78.1250 (81.8750) lr 8.9027e-04 eta 0:42:10 +epoch [109/200] batch [10/31] time 0.714 (0.801) data 0.000 (0.085) loss 0.5591 (0.8286) acc 90.6250 (82.8125) lr 8.9027e-04 eta 0:37:56 +epoch [109/200] batch [15/31] time 0.711 (0.773) data 0.000 (0.057) loss 0.6304 (0.8539) acc 87.5000 (80.8333) lr 8.9027e-04 eta 0:36:32 +epoch [109/200] batch [20/31] time 0.710 (0.757) data 0.000 (0.043) loss 0.4434 (0.8129) acc 87.5000 (81.0938) lr 8.9027e-04 eta 0:35:43 +epoch [109/200] batch [25/31] time 0.868 (0.755) data 0.000 (0.034) loss 1.2432 (0.8491) acc 75.0000 (81.0000) lr 8.9027e-04 eta 0:35:33 +epoch [109/200] batch [30/31] time 0.723 (0.748) data 0.000 (0.029) loss 0.9131 (0.8680) acc 78.1250 (80.6250) lr 8.9027e-04 eta 0:35:09 +epoch [110/200] batch [5/31] time 0.732 (0.895) data 0.000 (0.166) loss 1.2520 (1.0128) acc 78.1250 (82.5000) lr 8.7467e-04 eta 0:42:01 +epoch [110/200] batch [10/31] time 0.720 (0.805) data 0.000 (0.083) loss 0.6538 (0.9006) acc 84.3750 (81.8750) lr 8.7467e-04 eta 0:37:42 +epoch [110/200] batch [15/31] time 0.713 (0.774) data 0.000 (0.056) loss 1.3350 (0.8833) acc 75.0000 (81.6667) lr 8.7467e-04 eta 0:36:12 +epoch [110/200] batch [20/31] time 0.728 (0.759) data 0.000 (0.042) loss 0.6885 (0.8304) acc 78.1250 (82.1875) lr 8.7467e-04 eta 0:35:25 +epoch [110/200] batch [25/31] time 0.710 (0.749) data 0.000 (0.034) loss 0.6450 (0.8072) acc 81.2500 (82.2500) lr 8.7467e-04 eta 0:34:53 +epoch [110/200] batch [30/31] time 0.723 (0.743) data 0.000 (0.028) loss 1.0479 (0.8516) acc 71.8750 (81.2500) lr 8.7467e-04 eta 0:34:33 +epoch [111/200] batch [5/31] time 0.712 (0.908) data 0.000 (0.184) loss 0.9009 (0.8575) acc 78.1250 (78.1250) lr 8.5910e-04 eta 0:42:07 +epoch [111/200] batch [10/31] time 0.724 (0.811) data 0.000 (0.092) loss 0.7207 (0.7584) acc 81.2500 (79.6875) lr 8.5910e-04 eta 0:37:35 +epoch [111/200] batch [15/31] time 0.725 (0.780) data 0.001 (0.062) loss 1.5488 (0.8923) acc 75.0000 (77.2917) lr 8.5910e-04 eta 0:36:05 +epoch [111/200] batch [20/31] time 0.717 (0.764) data 0.000 (0.046) loss 0.9902 (0.8354) acc 71.8750 (79.2188) lr 8.5910e-04 eta 0:35:17 +epoch [111/200] batch [25/31] time 0.710 (0.759) data 0.000 (0.037) loss 0.7402 (0.8310) acc 81.2500 (79.7500) lr 8.5910e-04 eta 0:34:59 +epoch [111/200] batch [30/31] time 0.706 (0.751) data 0.000 (0.031) loss 1.3857 (0.8158) acc 62.5000 (79.8958) lr 8.5910e-04 eta 0:34:33 +epoch [112/200] batch [5/31] time 0.726 (0.929) data 0.000 (0.196) loss 0.7612 (0.6398) acc 81.2500 (85.0000) lr 8.4357e-04 eta 0:42:37 +epoch [112/200] batch [10/31] time 0.711 (0.824) data 0.000 (0.098) loss 0.9175 (0.7201) acc 87.5000 (83.7500) lr 8.4357e-04 eta 0:37:45 +epoch [112/200] batch [15/31] time 0.710 (0.788) data 0.000 (0.066) loss 0.9453 (0.7194) acc 75.0000 (83.5417) lr 8.4357e-04 eta 0:36:01 +epoch [112/200] batch [20/31] time 0.738 (0.771) data 0.000 (0.049) loss 0.8857 (0.7227) acc 78.1250 (83.2812) lr 8.4357e-04 eta 0:35:10 +epoch [112/200] batch [25/31] time 0.718 (0.758) data 0.000 (0.039) loss 1.0596 (0.7664) acc 81.2500 (82.1250) lr 8.4357e-04 eta 0:34:33 +epoch [112/200] batch [30/31] time 0.726 (0.751) data 0.000 (0.033) loss 0.8184 (0.7536) acc 78.1250 (82.7083) lr 8.4357e-04 eta 0:34:10 +epoch [113/200] batch [5/31] time 0.720 (0.890) data 0.000 (0.166) loss 1.0068 (0.7497) acc 87.5000 (86.2500) lr 8.2807e-04 eta 0:40:24 +epoch [113/200] batch [10/31] time 0.714 (0.801) data 0.000 (0.083) loss 0.8311 (0.7739) acc 87.5000 (84.6875) lr 8.2807e-04 eta 0:36:16 +epoch [113/200] batch [15/31] time 0.721 (0.772) data 0.000 (0.056) loss 0.9863 (0.8234) acc 68.7500 (82.2917) lr 8.2807e-04 eta 0:34:53 +epoch [113/200] batch [20/31] time 0.711 (0.757) data 0.000 (0.042) loss 1.0713 (0.8447) acc 78.1250 (82.0312) lr 8.2807e-04 eta 0:34:09 +epoch [113/200] batch [25/31] time 0.727 (0.749) data 0.000 (0.034) loss 0.9229 (0.8248) acc 78.1250 (82.0000) lr 8.2807e-04 eta 0:33:43 +epoch [113/200] batch [30/31] time 0.716 (0.744) data 0.000 (0.028) loss 0.8398 (0.8150) acc 81.2500 (82.1875) lr 8.2807e-04 eta 0:33:26 +epoch [114/200] batch [5/31] time 0.713 (0.911) data 0.000 (0.191) loss 0.6914 (0.7789) acc 84.3750 (80.6250) lr 8.1262e-04 eta 0:40:52 +epoch [114/200] batch [10/31] time 0.713 (0.813) data 0.000 (0.096) loss 1.3564 (0.8196) acc 71.8750 (80.0000) lr 8.1262e-04 eta 0:36:23 +epoch [114/200] batch [15/31] time 0.710 (0.780) data 0.001 (0.064) loss 0.6318 (0.8402) acc 84.3750 (80.0000) lr 8.1262e-04 eta 0:34:51 +epoch [114/200] batch [20/31] time 0.702 (0.762) data 0.000 (0.048) loss 1.0830 (0.8711) acc 71.8750 (78.7500) lr 8.1262e-04 eta 0:34:00 +epoch [114/200] batch [25/31] time 0.706 (0.752) data 0.000 (0.039) loss 0.7749 (0.8458) acc 90.6250 (80.2500) lr 8.1262e-04 eta 0:33:28 +epoch [114/200] batch [30/31] time 0.845 (0.749) data 0.000 (0.032) loss 0.9097 (0.8438) acc 81.2500 (80.4167) lr 8.1262e-04 eta 0:33:16 +epoch [115/200] batch [5/31] time 0.708 (0.897) data 0.000 (0.174) loss 0.7422 (0.9867) acc 93.7500 (83.7500) lr 7.9721e-04 eta 0:39:47 +epoch [115/200] batch [10/31] time 0.712 (0.808) data 0.000 (0.087) loss 0.6470 (0.8804) acc 84.3750 (81.5625) lr 7.9721e-04 eta 0:35:45 +epoch [115/200] batch [15/31] time 0.706 (0.775) data 0.000 (0.058) loss 1.1787 (0.9255) acc 81.2500 (81.2500) lr 7.9721e-04 eta 0:34:13 +epoch [115/200] batch [20/31] time 0.707 (0.758) data 0.000 (0.044) loss 1.1895 (0.9040) acc 75.0000 (81.0938) lr 7.9721e-04 eta 0:33:26 +epoch [115/200] batch [25/31] time 0.710 (0.749) data 0.000 (0.035) loss 1.3281 (0.9146) acc 75.0000 (81.3750) lr 7.9721e-04 eta 0:32:58 +epoch [115/200] batch [30/31] time 0.709 (0.743) data 0.000 (0.029) loss 0.4326 (0.8649) acc 93.7500 (82.0833) lr 7.9721e-04 eta 0:32:38 +epoch [116/200] batch [5/31] time 0.711 (0.883) data 0.000 (0.166) loss 1.0830 (0.9023) acc 71.8750 (78.7500) lr 7.8186e-04 eta 0:38:43 +epoch [116/200] batch [10/31] time 0.724 (0.802) data 0.001 (0.083) loss 0.7168 (0.7979) acc 81.2500 (81.8750) lr 7.8186e-04 eta 0:35:05 +epoch [116/200] batch [15/31] time 0.723 (0.775) data 0.000 (0.056) loss 0.6050 (0.7674) acc 84.3750 (83.5417) lr 7.8186e-04 eta 0:33:51 +epoch [116/200] batch [20/31] time 0.712 (0.759) data 0.000 (0.042) loss 1.4072 (0.7690) acc 65.6250 (82.5000) lr 7.8186e-04 eta 0:33:03 +epoch [116/200] batch [25/31] time 0.715 (0.750) data 0.000 (0.034) loss 0.5771 (0.7290) acc 87.5000 (83.6250) lr 7.8186e-04 eta 0:32:36 +epoch [116/200] batch [30/31] time 0.709 (0.743) data 0.000 (0.028) loss 0.7808 (0.7400) acc 84.3750 (83.4375) lr 7.8186e-04 eta 0:32:16 +epoch [117/200] batch [5/31] time 0.735 (0.920) data 0.000 (0.187) loss 0.9834 (0.9380) acc 62.5000 (75.0000) lr 7.6655e-04 eta 0:39:51 +epoch [117/200] batch [10/31] time 0.735 (0.820) data 0.000 (0.094) loss 0.9922 (0.7979) acc 78.1250 (82.1875) lr 7.6655e-04 eta 0:35:26 +epoch [117/200] batch [15/31] time 0.709 (0.784) data 0.000 (0.063) loss 0.6851 (0.7731) acc 84.3750 (82.2917) lr 7.6655e-04 eta 0:33:51 +epoch [117/200] batch [20/31] time 0.726 (0.767) data 0.000 (0.047) loss 0.9033 (0.7638) acc 84.3750 (82.8125) lr 7.6655e-04 eta 0:33:02 +epoch [117/200] batch [25/31] time 0.709 (0.756) data 0.000 (0.038) loss 1.0020 (0.7729) acc 71.8750 (82.0000) lr 7.6655e-04 eta 0:32:28 +epoch [117/200] batch [30/31] time 0.715 (0.748) data 0.000 (0.031) loss 0.4275 (0.7453) acc 87.5000 (82.7083) lr 7.6655e-04 eta 0:32:05 +epoch [118/200] batch [5/31] time 0.706 (0.888) data 0.000 (0.168) loss 0.9048 (0.7375) acc 78.1250 (85.0000) lr 7.5131e-04 eta 0:37:59 +epoch [118/200] batch [10/31] time 0.722 (0.800) data 0.000 (0.084) loss 0.5435 (0.8278) acc 87.5000 (81.5625) lr 7.5131e-04 eta 0:34:11 +epoch [118/200] batch [15/31] time 0.713 (0.770) data 0.000 (0.056) loss 0.4907 (0.7801) acc 87.5000 (82.9167) lr 7.5131e-04 eta 0:32:50 +epoch [118/200] batch [20/31] time 0.712 (0.755) data 0.000 (0.042) loss 0.9004 (0.8366) acc 81.2500 (81.7188) lr 7.5131e-04 eta 0:32:07 +epoch [118/200] batch [25/31] time 0.712 (0.746) data 0.000 (0.034) loss 0.5596 (0.8451) acc 90.6250 (81.5000) lr 7.5131e-04 eta 0:31:41 +epoch [118/200] batch [30/31] time 0.713 (0.740) data 0.000 (0.028) loss 0.6523 (0.8284) acc 81.2500 (81.9792) lr 7.5131e-04 eta 0:31:21 +epoch [119/200] batch [5/31] time 0.710 (0.881) data 0.000 (0.159) loss 0.8843 (0.8646) acc 78.1250 (80.0000) lr 7.3613e-04 eta 0:37:14 +epoch [119/200] batch [10/31] time 0.714 (0.796) data 0.000 (0.079) loss 0.8228 (0.7897) acc 81.2500 (82.5000) lr 7.3613e-04 eta 0:33:35 +epoch [119/200] batch [15/31] time 0.707 (0.767) data 0.000 (0.053) loss 0.5093 (0.7333) acc 90.6250 (83.5417) lr 7.3613e-04 eta 0:32:18 +epoch [119/200] batch [20/31] time 0.705 (0.760) data 0.000 (0.040) loss 0.3118 (0.7398) acc 93.7500 (83.4375) lr 7.3613e-04 eta 0:31:57 +epoch [119/200] batch [25/31] time 0.707 (0.750) data 0.000 (0.032) loss 0.6978 (0.7494) acc 84.3750 (83.5000) lr 7.3613e-04 eta 0:31:26 +epoch [119/200] batch [30/31] time 0.705 (0.743) data 0.000 (0.027) loss 0.7236 (0.7318) acc 78.1250 (83.9583) lr 7.3613e-04 eta 0:31:06 +epoch [120/200] batch [5/31] time 0.709 (0.891) data 0.000 (0.170) loss 1.4482 (0.8655) acc 78.1250 (81.2500) lr 7.2101e-04 eta 0:37:12 +epoch [120/200] batch [10/31] time 0.707 (0.800) data 0.000 (0.085) loss 0.9268 (0.7482) acc 87.5000 (84.0625) lr 7.2101e-04 eta 0:33:20 +epoch [120/200] batch [15/31] time 0.710 (0.772) data 0.000 (0.057) loss 0.9775 (0.7325) acc 78.1250 (83.5417) lr 7.2101e-04 eta 0:32:07 +epoch [120/200] batch [20/31] time 0.725 (0.759) data 0.000 (0.043) loss 0.4963 (0.7098) acc 81.2500 (83.1250) lr 7.2101e-04 eta 0:31:30 +epoch [120/200] batch [25/31] time 0.725 (0.751) data 0.000 (0.034) loss 0.5957 (0.7186) acc 84.3750 (82.2500) lr 7.2101e-04 eta 0:31:07 +epoch [120/200] batch [30/31] time 0.703 (0.744) data 0.000 (0.029) loss 0.4524 (0.7386) acc 93.7500 (82.1875) lr 7.2101e-04 eta 0:30:45 +epoch [121/200] batch [5/31] time 0.716 (0.876) data 0.000 (0.160) loss 0.7788 (0.9491) acc 78.1250 (79.3750) lr 7.0596e-04 eta 0:36:08 +epoch [121/200] batch [10/31] time 0.709 (0.807) data 0.001 (0.080) loss 1.4512 (0.8794) acc 78.1250 (81.2500) lr 7.0596e-04 eta 0:33:13 +epoch [121/200] batch [15/31] time 0.720 (0.777) data 0.000 (0.054) loss 0.4817 (0.8631) acc 93.7500 (81.8750) lr 7.0596e-04 eta 0:31:56 +epoch [121/200] batch [20/31] time 0.725 (0.762) data 0.000 (0.040) loss 1.1719 (0.8239) acc 78.1250 (83.4375) lr 7.0596e-04 eta 0:31:15 +epoch [121/200] batch [25/31] time 0.717 (0.753) data 0.000 (0.032) loss 0.6357 (0.7724) acc 90.6250 (84.3750) lr 7.0596e-04 eta 0:30:47 +epoch [121/200] batch [30/31] time 0.704 (0.745) data 0.000 (0.027) loss 0.5391 (0.7556) acc 93.7500 (84.1667) lr 7.0596e-04 eta 0:30:25 +epoch [122/200] batch [5/31] time 0.708 (0.893) data 0.000 (0.169) loss 1.0977 (0.7866) acc 75.0000 (81.2500) lr 6.9098e-04 eta 0:36:23 +epoch [122/200] batch [10/31] time 0.716 (0.803) data 0.000 (0.085) loss 0.8530 (0.7686) acc 84.3750 (80.3125) lr 6.9098e-04 eta 0:32:39 +epoch [122/200] batch [15/31] time 0.711 (0.773) data 0.000 (0.056) loss 0.6245 (0.7472) acc 87.5000 (82.2917) lr 6.9098e-04 eta 0:31:20 +epoch [122/200] batch [20/31] time 0.709 (0.757) data 0.000 (0.042) loss 0.6621 (0.7584) acc 87.5000 (83.1250) lr 6.9098e-04 eta 0:30:39 +epoch [122/200] batch [25/31] time 0.716 (0.748) data 0.000 (0.034) loss 1.4912 (0.7919) acc 71.8750 (82.5000) lr 6.9098e-04 eta 0:30:12 +epoch [122/200] batch [30/31] time 0.727 (0.742) data 0.000 (0.028) loss 0.9263 (0.8228) acc 71.8750 (81.9792) lr 6.9098e-04 eta 0:29:54 +epoch [123/200] batch [5/31] time 0.712 (0.935) data 0.000 (0.209) loss 0.5410 (0.7052) acc 90.6250 (83.1250) lr 6.7608e-04 eta 0:37:35 +epoch [123/200] batch [10/31] time 0.714 (0.823) data 0.000 (0.104) loss 1.2588 (0.7707) acc 78.1250 (83.1250) lr 6.7608e-04 eta 0:33:02 +epoch [123/200] batch [15/31] time 0.711 (0.786) data 0.000 (0.070) loss 0.5913 (0.7029) acc 81.2500 (84.1667) lr 6.7608e-04 eta 0:31:27 +epoch [123/200] batch [20/31] time 0.708 (0.774) data 0.000 (0.052) loss 0.5137 (0.7338) acc 90.6250 (83.2812) lr 6.7608e-04 eta 0:30:55 +epoch [123/200] batch [25/31] time 0.710 (0.761) data 0.000 (0.042) loss 0.7231 (0.7422) acc 84.3750 (83.0000) lr 6.7608e-04 eta 0:30:20 +epoch [123/200] batch [30/31] time 0.707 (0.753) data 0.000 (0.035) loss 0.4417 (0.7367) acc 90.6250 (82.5000) lr 6.7608e-04 eta 0:29:58 +epoch [124/200] batch [5/31] time 0.712 (0.903) data 0.000 (0.183) loss 0.8652 (0.9706) acc 87.5000 (79.3750) lr 6.6126e-04 eta 0:35:50 +epoch [124/200] batch [10/31] time 0.717 (0.812) data 0.000 (0.092) loss 1.2100 (0.8911) acc 65.6250 (80.9375) lr 6.6126e-04 eta 0:32:09 +epoch [124/200] batch [15/31] time 0.711 (0.778) data 0.000 (0.061) loss 0.8354 (0.8628) acc 78.1250 (80.8333) lr 6.6126e-04 eta 0:30:45 +epoch [124/200] batch [20/31] time 0.704 (0.761) data 0.000 (0.046) loss 0.7715 (0.8344) acc 78.1250 (81.2500) lr 6.6126e-04 eta 0:30:00 +epoch [124/200] batch [25/31] time 0.707 (0.751) data 0.000 (0.037) loss 0.4336 (0.8056) acc 90.6250 (82.0000) lr 6.6126e-04 eta 0:29:33 +epoch [124/200] batch [30/31] time 0.703 (0.744) data 0.000 (0.031) loss 0.7690 (0.7854) acc 87.5000 (82.7083) lr 6.6126e-04 eta 0:29:14 +epoch [125/200] batch [5/31] time 0.712 (0.907) data 0.000 (0.182) loss 0.9292 (0.7033) acc 81.2500 (85.0000) lr 6.4653e-04 eta 0:35:32 +epoch [125/200] batch [10/31] time 0.707 (0.809) data 0.000 (0.091) loss 0.5786 (0.7348) acc 87.5000 (83.4375) lr 6.4653e-04 eta 0:31:36 +epoch [125/200] batch [15/31] time 0.708 (0.777) data 0.000 (0.061) loss 1.2090 (0.7282) acc 75.0000 (83.3333) lr 6.4653e-04 eta 0:30:19 +epoch [125/200] batch [20/31] time 0.725 (0.762) data 0.000 (0.046) loss 1.0479 (0.7523) acc 78.1250 (82.9688) lr 6.4653e-04 eta 0:29:39 +epoch [125/200] batch [25/31] time 0.719 (0.751) data 0.000 (0.037) loss 0.7915 (0.7399) acc 75.0000 (82.7500) lr 6.4653e-04 eta 0:29:11 +epoch [125/200] batch [30/31] time 0.712 (0.744) data 0.000 (0.031) loss 1.0391 (0.7399) acc 75.0000 (82.6042) lr 6.4653e-04 eta 0:28:51 +epoch [126/200] batch [5/31] time 0.708 (0.889) data 0.000 (0.168) loss 1.2002 (0.8356) acc 78.1250 (82.5000) lr 6.3188e-04 eta 0:34:22 +epoch [126/200] batch [10/31] time 0.710 (0.801) data 0.000 (0.084) loss 0.9688 (0.7999) acc 78.1250 (81.8750) lr 6.3188e-04 eta 0:30:54 +epoch [126/200] batch [15/31] time 0.707 (0.771) data 0.000 (0.056) loss 0.8638 (0.7464) acc 84.3750 (82.5000) lr 6.3188e-04 eta 0:29:41 +epoch [126/200] batch [20/31] time 0.706 (0.755) data 0.000 (0.042) loss 0.4023 (0.7809) acc 90.6250 (82.6562) lr 6.3188e-04 eta 0:29:00 +epoch [126/200] batch [25/31] time 0.716 (0.746) data 0.000 (0.034) loss 0.7695 (0.8054) acc 87.5000 (82.3750) lr 6.3188e-04 eta 0:28:36 +epoch [126/200] batch [30/31] time 0.706 (0.740) data 0.000 (0.028) loss 1.3057 (0.8181) acc 78.1250 (82.0833) lr 6.3188e-04 eta 0:28:19 +epoch [127/200] batch [5/31] time 0.710 (0.892) data 0.000 (0.169) loss 0.6240 (0.6752) acc 81.2500 (84.3750) lr 6.1732e-04 eta 0:34:02 +epoch [127/200] batch [10/31] time 0.710 (0.803) data 0.000 (0.085) loss 0.8140 (0.7100) acc 84.3750 (84.0625) lr 6.1732e-04 eta 0:30:33 +epoch [127/200] batch [15/31] time 0.708 (0.773) data 0.000 (0.056) loss 0.8203 (0.7807) acc 81.2500 (83.1250) lr 6.1732e-04 eta 0:29:21 +epoch [127/200] batch [20/31] time 0.725 (0.759) data 0.000 (0.042) loss 0.4470 (0.8436) acc 84.3750 (80.7812) lr 6.1732e-04 eta 0:28:44 +epoch [127/200] batch [25/31] time 0.710 (0.749) data 0.000 (0.034) loss 1.3574 (0.8541) acc 75.0000 (80.8750) lr 6.1732e-04 eta 0:28:20 +epoch [127/200] batch [30/31] time 0.708 (0.743) data 0.000 (0.028) loss 0.9302 (0.8573) acc 81.2500 (81.0417) lr 6.1732e-04 eta 0:28:02 +epoch [128/200] batch [5/31] time 0.710 (0.908) data 0.000 (0.181) loss 1.0322 (0.6433) acc 78.1250 (85.6250) lr 6.0285e-04 eta 0:34:10 +epoch [128/200] batch [10/31] time 0.710 (0.810) data 0.000 (0.091) loss 0.6885 (0.7055) acc 75.0000 (83.7500) lr 6.0285e-04 eta 0:30:25 +epoch [128/200] batch [15/31] time 0.707 (0.777) data 0.000 (0.061) loss 1.0273 (0.7538) acc 81.2500 (82.9167) lr 6.0285e-04 eta 0:29:07 +epoch [128/200] batch [20/31] time 0.708 (0.761) data 0.000 (0.045) loss 0.3284 (0.7235) acc 90.6250 (84.0625) lr 6.0285e-04 eta 0:28:26 +epoch [128/200] batch [25/31] time 0.711 (0.751) data 0.000 (0.036) loss 0.8867 (0.7486) acc 75.0000 (83.1250) lr 6.0285e-04 eta 0:28:01 +epoch [128/200] batch [30/31] time 0.728 (0.746) data 0.000 (0.030) loss 0.5059 (0.7655) acc 84.3750 (82.6042) lr 6.0285e-04 eta 0:27:45 +epoch [129/200] batch [5/31] time 0.713 (0.901) data 0.000 (0.174) loss 1.1885 (0.7460) acc 75.0000 (85.0000) lr 5.8849e-04 eta 0:33:27 +epoch [129/200] batch [10/31] time 0.709 (0.809) data 0.001 (0.087) loss 0.2817 (0.7352) acc 93.7500 (84.0625) lr 5.8849e-04 eta 0:29:58 +epoch [129/200] batch [15/31] time 0.705 (0.787) data 0.000 (0.058) loss 1.0527 (0.7930) acc 68.7500 (83.1250) lr 5.8849e-04 eta 0:29:04 +epoch [129/200] batch [20/31] time 0.707 (0.767) data 0.000 (0.044) loss 0.5933 (0.7853) acc 87.5000 (83.2812) lr 5.8849e-04 eta 0:28:17 +epoch [129/200] batch [25/31] time 0.703 (0.755) data 0.000 (0.035) loss 0.5791 (0.7794) acc 81.2500 (83.1250) lr 5.8849e-04 eta 0:27:46 +epoch [129/200] batch [30/31] time 0.708 (0.747) data 0.000 (0.029) loss 0.6733 (0.7715) acc 87.5000 (83.4375) lr 5.8849e-04 eta 0:27:25 +epoch [130/200] batch [5/31] time 0.718 (0.902) data 0.000 (0.177) loss 0.6182 (0.7546) acc 87.5000 (81.8750) lr 5.7422e-04 eta 0:33:00 +epoch [130/200] batch [10/31] time 0.724 (0.811) data 0.000 (0.089) loss 0.8550 (0.7959) acc 78.1250 (80.6250) lr 5.7422e-04 eta 0:29:35 +epoch [130/200] batch [15/31] time 0.712 (0.779) data 0.000 (0.059) loss 0.4897 (0.7379) acc 87.5000 (82.0833) lr 5.7422e-04 eta 0:28:21 +epoch [130/200] batch [20/31] time 0.721 (0.763) data 0.000 (0.045) loss 0.3220 (0.7460) acc 93.7500 (82.9688) lr 5.7422e-04 eta 0:27:43 +epoch [130/200] batch [25/31] time 0.710 (0.753) data 0.000 (0.036) loss 0.3157 (0.7178) acc 90.6250 (83.6250) lr 5.7422e-04 eta 0:27:18 +epoch [130/200] batch [30/31] time 0.710 (0.746) data 0.000 (0.030) loss 1.1064 (0.7283) acc 75.0000 (83.7500) lr 5.7422e-04 eta 0:27:00 +epoch [131/200] batch [5/31] time 0.715 (0.922) data 0.000 (0.170) loss 1.1338 (0.8454) acc 71.8750 (81.2500) lr 5.6006e-04 eta 0:33:17 +epoch [131/200] batch [10/31] time 0.707 (0.817) data 0.000 (0.085) loss 0.7217 (0.7819) acc 81.2500 (83.1250) lr 5.6006e-04 eta 0:29:24 +epoch [131/200] batch [15/31] time 0.727 (0.783) data 0.000 (0.057) loss 0.9624 (0.7433) acc 75.0000 (84.1667) lr 5.6006e-04 eta 0:28:06 +epoch [131/200] batch [20/31] time 0.723 (0.766) data 0.000 (0.043) loss 0.6895 (0.7559) acc 84.3750 (84.0625) lr 5.6006e-04 eta 0:27:27 +epoch [131/200] batch [25/31] time 0.716 (0.756) data 0.000 (0.034) loss 0.6338 (0.7414) acc 78.1250 (83.7500) lr 5.6006e-04 eta 0:27:01 +epoch [131/200] batch [30/31] time 0.711 (0.748) data 0.000 (0.029) loss 0.5547 (0.7703) acc 90.6250 (83.8542) lr 5.6006e-04 eta 0:26:39 +epoch [132/200] batch [5/31] time 0.709 (0.885) data 0.001 (0.166) loss 0.9360 (0.8242) acc 81.2500 (83.1250) lr 5.4601e-04 eta 0:31:29 +epoch [132/200] batch [10/31] time 0.712 (0.800) data 0.000 (0.083) loss 0.7119 (0.8773) acc 81.2500 (80.6250) lr 5.4601e-04 eta 0:28:22 +epoch [132/200] batch [15/31] time 0.711 (0.770) data 0.000 (0.056) loss 0.2815 (0.7727) acc 93.7500 (82.9167) lr 5.4601e-04 eta 0:27:15 +epoch [132/200] batch [20/31] time 0.709 (0.756) data 0.000 (0.042) loss 0.4502 (0.7978) acc 84.3750 (82.5000) lr 5.4601e-04 eta 0:26:42 +epoch [132/200] batch [25/31] time 0.713 (0.748) data 0.000 (0.033) loss 1.0127 (0.8006) acc 81.2500 (82.1250) lr 5.4601e-04 eta 0:26:21 +epoch [132/200] batch [30/31] time 0.713 (0.742) data 0.000 (0.028) loss 1.0850 (0.8000) acc 75.0000 (82.2917) lr 5.4601e-04 eta 0:26:04 +epoch [133/200] batch [5/31] time 0.856 (0.931) data 0.000 (0.175) loss 0.6328 (0.7206) acc 84.3750 (86.8750) lr 5.3207e-04 eta 0:32:38 +epoch [133/200] batch [10/31] time 0.713 (0.826) data 0.000 (0.088) loss 0.7993 (0.7467) acc 75.0000 (83.4375) lr 5.3207e-04 eta 0:28:52 +epoch [133/200] batch [15/31] time 0.711 (0.788) data 0.000 (0.059) loss 0.9375 (0.7334) acc 84.3750 (83.1250) lr 5.3207e-04 eta 0:27:28 +epoch [133/200] batch [20/31] time 0.703 (0.771) data 0.000 (0.044) loss 1.4814 (0.7571) acc 84.3750 (83.5938) lr 5.3207e-04 eta 0:26:49 +epoch [133/200] batch [25/31] time 0.712 (0.760) data 0.000 (0.035) loss 0.5806 (0.7408) acc 90.6250 (84.0000) lr 5.3207e-04 eta 0:26:23 +epoch [133/200] batch [30/31] time 0.708 (0.752) data 0.000 (0.030) loss 0.5366 (0.7320) acc 87.5000 (84.0625) lr 5.3207e-04 eta 0:26:02 +epoch [134/200] batch [5/31] time 0.711 (0.896) data 0.000 (0.179) loss 0.7378 (0.7357) acc 81.2500 (81.2500) lr 5.1825e-04 eta 0:30:56 +epoch [134/200] batch [10/31] time 0.708 (0.805) data 0.000 (0.090) loss 0.6279 (0.7605) acc 87.5000 (82.1875) lr 5.1825e-04 eta 0:27:44 +epoch [134/200] batch [15/31] time 0.712 (0.775) data 0.000 (0.060) loss 0.4297 (0.6672) acc 90.6250 (83.9583) lr 5.1825e-04 eta 0:26:38 +epoch [134/200] batch [20/31] time 0.704 (0.758) data 0.000 (0.045) loss 0.6855 (0.7039) acc 78.1250 (83.2812) lr 5.1825e-04 eta 0:26:00 +epoch [134/200] batch [25/31] time 0.716 (0.749) data 0.000 (0.036) loss 0.8394 (0.7857) acc 75.0000 (81.1250) lr 5.1825e-04 eta 0:25:37 +epoch [134/200] batch [30/31] time 0.701 (0.742) data 0.000 (0.030) loss 1.1084 (0.8070) acc 84.3750 (80.9375) lr 5.1825e-04 eta 0:25:19 +epoch [135/200] batch [5/31] time 0.729 (0.883) data 0.000 (0.161) loss 0.9463 (0.7812) acc 81.2500 (80.6250) lr 5.0454e-04 eta 0:30:02 +epoch [135/200] batch [10/31] time 0.711 (0.798) data 0.000 (0.081) loss 0.7466 (0.7115) acc 75.0000 (82.5000) lr 5.0454e-04 eta 0:27:05 +epoch [135/200] batch [15/31] time 0.707 (0.772) data 0.000 (0.054) loss 0.2485 (0.6654) acc 96.8750 (83.7500) lr 5.0454e-04 eta 0:26:07 +epoch [135/200] batch [20/31] time 0.721 (0.757) data 0.001 (0.040) loss 0.8511 (0.6640) acc 78.1250 (83.5938) lr 5.0454e-04 eta 0:25:33 +epoch [135/200] batch [25/31] time 0.716 (0.749) data 0.000 (0.032) loss 1.5244 (0.7048) acc 75.0000 (83.7500) lr 5.0454e-04 eta 0:25:13 +epoch [135/200] batch [30/31] time 0.709 (0.743) data 0.000 (0.027) loss 1.0352 (0.7145) acc 75.0000 (83.3333) lr 5.0454e-04 eta 0:24:58 +epoch [136/200] batch [5/31] time 0.727 (0.890) data 0.000 (0.158) loss 0.4873 (0.6913) acc 87.5000 (85.0000) lr 4.9096e-04 eta 0:29:49 +epoch [136/200] batch [10/31] time 0.719 (0.802) data 0.000 (0.079) loss 0.9814 (0.6610) acc 81.2500 (86.5625) lr 4.9096e-04 eta 0:26:47 +epoch [136/200] batch [15/31] time 0.712 (0.775) data 0.000 (0.053) loss 1.1133 (0.6313) acc 78.1250 (86.6667) lr 4.9096e-04 eta 0:25:49 +epoch [136/200] batch [20/31] time 0.712 (0.759) data 0.000 (0.040) loss 0.5981 (0.6691) acc 90.6250 (85.6250) lr 4.9096e-04 eta 0:25:14 +epoch [136/200] batch [25/31] time 0.706 (0.749) data 0.000 (0.032) loss 0.5923 (0.7312) acc 87.5000 (84.3750) lr 4.9096e-04 eta 0:24:50 +epoch [136/200] batch [30/31] time 0.720 (0.743) data 0.000 (0.027) loss 0.7217 (0.7360) acc 81.2500 (84.2708) lr 4.9096e-04 eta 0:24:35 +epoch [137/200] batch [5/31] time 0.704 (0.885) data 0.000 (0.164) loss 1.0781 (0.8096) acc 81.2500 (83.1250) lr 4.7750e-04 eta 0:29:11 +epoch [137/200] batch [10/31] time 0.726 (0.801) data 0.000 (0.082) loss 1.3711 (0.8601) acc 81.2500 (83.4375) lr 4.7750e-04 eta 0:26:20 +epoch [137/200] batch [15/31] time 0.712 (0.773) data 0.000 (0.055) loss 0.9194 (0.8508) acc 78.1250 (82.7083) lr 4.7750e-04 eta 0:25:21 +epoch [137/200] batch [20/31] time 0.706 (0.758) data 0.000 (0.041) loss 0.5938 (0.8146) acc 87.5000 (82.5000) lr 4.7750e-04 eta 0:24:48 +epoch [137/200] batch [25/31] time 0.706 (0.748) data 0.000 (0.033) loss 0.6587 (0.8264) acc 84.3750 (82.1250) lr 4.7750e-04 eta 0:24:25 +epoch [137/200] batch [30/31] time 0.706 (0.741) data 0.000 (0.028) loss 0.7095 (0.8252) acc 81.2500 (82.0833) lr 4.7750e-04 eta 0:24:08 +epoch [138/200] batch [5/31] time 0.719 (0.893) data 0.000 (0.171) loss 1.4395 (0.7536) acc 71.8750 (83.7500) lr 4.6417e-04 eta 0:28:59 +epoch [138/200] batch [10/31] time 0.722 (0.803) data 0.001 (0.086) loss 0.5146 (0.7638) acc 87.5000 (84.3750) lr 4.6417e-04 eta 0:26:01 +epoch [138/200] batch [15/31] time 0.716 (0.775) data 0.000 (0.057) loss 0.7935 (0.7960) acc 84.3750 (83.5417) lr 4.6417e-04 eta 0:25:02 +epoch [138/200] batch [20/31] time 0.720 (0.762) data 0.000 (0.043) loss 0.8281 (0.7724) acc 81.2500 (84.5312) lr 4.6417e-04 eta 0:24:32 +epoch [138/200] batch [25/31] time 0.718 (0.753) data 0.000 (0.034) loss 0.7612 (0.7640) acc 90.6250 (85.0000) lr 4.6417e-04 eta 0:24:10 +epoch [138/200] batch [30/31] time 0.708 (0.746) data 0.000 (0.029) loss 1.1221 (0.7465) acc 87.5000 (85.6250) lr 4.6417e-04 eta 0:23:54 +epoch [139/200] batch [5/31] time 0.709 (0.899) data 0.000 (0.178) loss 0.5850 (0.4964) acc 90.6250 (90.6250) lr 4.5098e-04 eta 0:28:43 +epoch [139/200] batch [10/31] time 0.711 (0.821) data 0.000 (0.089) loss 0.4666 (0.6672) acc 87.5000 (85.0000) lr 4.5098e-04 eta 0:26:09 +epoch [139/200] batch [15/31] time 0.709 (0.784) data 0.000 (0.060) loss 1.0098 (0.6556) acc 75.0000 (84.1667) lr 4.5098e-04 eta 0:24:55 +epoch [139/200] batch [20/31] time 0.707 (0.765) data 0.000 (0.045) loss 1.0576 (0.7324) acc 71.8750 (82.1875) lr 4.5098e-04 eta 0:24:15 +epoch [139/200] batch [25/31] time 0.710 (0.755) data 0.000 (0.036) loss 0.7666 (0.6964) acc 90.6250 (83.8750) lr 4.5098e-04 eta 0:23:51 +epoch [139/200] batch [30/31] time 0.707 (0.748) data 0.000 (0.030) loss 0.7202 (0.7370) acc 90.6250 (83.4375) lr 4.5098e-04 eta 0:23:34 +epoch [140/200] batch [5/31] time 0.711 (0.901) data 0.000 (0.176) loss 0.7827 (0.6321) acc 90.6250 (90.0000) lr 4.3792e-04 eta 0:28:18 +epoch [140/200] batch [10/31] time 0.707 (0.807) data 0.000 (0.088) loss 0.6860 (0.6706) acc 84.3750 (87.5000) lr 4.3792e-04 eta 0:25:17 +epoch [140/200] batch [15/31] time 0.721 (0.775) data 0.000 (0.059) loss 0.7036 (0.6729) acc 78.1250 (86.4583) lr 4.3792e-04 eta 0:24:13 +epoch [140/200] batch [20/31] time 0.706 (0.758) data 0.000 (0.044) loss 0.8950 (0.7165) acc 81.2500 (85.1562) lr 4.3792e-04 eta 0:23:38 +epoch [140/200] batch [25/31] time 0.717 (0.749) data 0.000 (0.035) loss 0.8818 (0.7328) acc 75.0000 (84.1250) lr 4.3792e-04 eta 0:23:17 +epoch [140/200] batch [30/31] time 0.706 (0.747) data 0.000 (0.030) loss 1.0957 (0.7428) acc 87.5000 (84.5833) lr 4.3792e-04 eta 0:23:10 +epoch [141/200] batch [5/31] time 0.711 (0.880) data 0.000 (0.154) loss 0.9312 (0.9029) acc 90.6250 (78.1250) lr 4.2499e-04 eta 0:27:12 +epoch [141/200] batch [10/31] time 0.713 (0.797) data 0.000 (0.077) loss 0.3555 (0.7220) acc 90.6250 (81.5625) lr 4.2499e-04 eta 0:24:34 +epoch [141/200] batch [15/31] time 0.711 (0.770) data 0.000 (0.052) loss 0.8257 (0.7484) acc 81.2500 (82.5000) lr 4.2499e-04 eta 0:23:41 +epoch [141/200] batch [20/31] time 0.709 (0.755) data 0.000 (0.039) loss 0.3413 (0.7461) acc 96.8750 (82.5000) lr 4.2499e-04 eta 0:23:09 +epoch [141/200] batch [25/31] time 0.716 (0.746) data 0.000 (0.031) loss 0.9912 (0.7191) acc 84.3750 (83.8750) lr 4.2499e-04 eta 0:22:48 +epoch [141/200] batch [30/31] time 0.709 (0.740) data 0.000 (0.026) loss 0.5376 (0.7086) acc 81.2500 (84.0625) lr 4.2499e-04 eta 0:22:34 +epoch [142/200] batch [5/31] time 0.710 (0.905) data 0.000 (0.184) loss 0.9141 (0.6373) acc 75.0000 (87.5000) lr 4.1221e-04 eta 0:27:30 +epoch [142/200] batch [10/31] time 0.709 (0.808) data 0.000 (0.092) loss 0.3110 (0.6365) acc 96.8750 (86.5625) lr 4.1221e-04 eta 0:24:30 +epoch [142/200] batch [15/31] time 0.712 (0.777) data 0.000 (0.061) loss 0.3027 (0.6709) acc 93.7500 (86.2500) lr 4.1221e-04 eta 0:23:29 +epoch [142/200] batch [20/31] time 0.712 (0.760) data 0.000 (0.046) loss 0.4976 (0.6823) acc 75.0000 (85.3125) lr 4.1221e-04 eta 0:22:55 +epoch [142/200] batch [25/31] time 0.707 (0.751) data 0.000 (0.037) loss 0.7061 (0.6680) acc 84.3750 (85.5000) lr 4.1221e-04 eta 0:22:34 +epoch [142/200] batch [30/31] time 0.709 (0.749) data 0.000 (0.031) loss 0.8530 (0.6625) acc 90.6250 (85.6250) lr 4.1221e-04 eta 0:22:27 +epoch [143/200] batch [5/31] time 0.732 (0.898) data 0.000 (0.173) loss 0.8716 (0.6062) acc 81.2500 (87.5000) lr 3.9958e-04 eta 0:26:50 +epoch [143/200] batch [10/31] time 0.724 (0.808) data 0.000 (0.087) loss 0.5615 (0.6853) acc 93.7500 (85.6250) lr 3.9958e-04 eta 0:24:04 +epoch [143/200] batch [15/31] time 0.721 (0.776) data 0.000 (0.058) loss 1.0625 (0.7225) acc 81.2500 (84.1667) lr 3.9958e-04 eta 0:23:03 +epoch [143/200] batch [20/31] time 0.701 (0.759) data 0.000 (0.044) loss 0.7275 (0.6877) acc 78.1250 (85.0000) lr 3.9958e-04 eta 0:22:30 +epoch [143/200] batch [25/31] time 0.705 (0.749) data 0.000 (0.035) loss 0.5566 (0.7314) acc 84.3750 (83.8750) lr 3.9958e-04 eta 0:22:07 +epoch [143/200] batch [30/31] time 0.708 (0.743) data 0.000 (0.029) loss 0.5439 (0.7415) acc 87.5000 (83.0208) lr 3.9958e-04 eta 0:21:52 +epoch [144/200] batch [5/31] time 0.709 (0.884) data 0.000 (0.159) loss 0.4448 (0.5592) acc 84.3750 (85.0000) lr 3.8709e-04 eta 0:25:58 +epoch [144/200] batch [10/31] time 0.706 (0.798) data 0.000 (0.080) loss 1.7002 (0.6476) acc 62.5000 (85.3125) lr 3.8709e-04 eta 0:23:21 +epoch [144/200] batch [15/31] time 0.709 (0.768) data 0.000 (0.053) loss 0.5718 (0.6559) acc 90.6250 (85.8333) lr 3.8709e-04 eta 0:22:26 +epoch [144/200] batch [20/31] time 0.705 (0.753) data 0.000 (0.040) loss 0.5854 (0.6995) acc 87.5000 (84.8438) lr 3.8709e-04 eta 0:21:55 +epoch [144/200] batch [25/31] time 0.705 (0.744) data 0.000 (0.032) loss 0.4463 (0.7412) acc 87.5000 (84.0000) lr 3.8709e-04 eta 0:21:35 +epoch [144/200] batch [30/31] time 0.714 (0.738) data 0.000 (0.027) loss 0.5752 (0.7512) acc 93.7500 (84.2708) lr 3.8709e-04 eta 0:21:22 +epoch [145/200] batch [5/31] time 0.729 (0.895) data 0.001 (0.165) loss 0.8320 (0.7407) acc 84.3750 (85.0000) lr 3.7476e-04 eta 0:25:50 +epoch [145/200] batch [10/31] time 0.714 (0.804) data 0.000 (0.083) loss 0.8486 (0.7048) acc 78.1250 (85.3125) lr 3.7476e-04 eta 0:23:07 +epoch [145/200] batch [15/31] time 0.715 (0.775) data 0.000 (0.055) loss 0.9609 (0.7637) acc 75.0000 (83.7500) lr 3.7476e-04 eta 0:22:13 +epoch [145/200] batch [20/31] time 0.708 (0.759) data 0.000 (0.041) loss 0.5566 (0.7424) acc 84.3750 (84.2188) lr 3.7476e-04 eta 0:21:42 +epoch [145/200] batch [25/31] time 0.705 (0.750) data 0.000 (0.033) loss 0.7490 (0.7376) acc 81.2500 (84.2500) lr 3.7476e-04 eta 0:21:22 +epoch [145/200] batch [30/31] time 0.712 (0.744) data 0.000 (0.028) loss 0.7769 (0.7366) acc 87.5000 (83.8542) lr 3.7476e-04 eta 0:21:09 +epoch [146/200] batch [5/31] time 0.712 (0.904) data 0.000 (0.177) loss 0.2810 (0.6997) acc 96.8750 (85.6250) lr 3.6258e-04 eta 0:25:37 +epoch [146/200] batch [10/31] time 0.708 (0.809) data 0.000 (0.089) loss 0.5947 (0.8019) acc 87.5000 (83.7500) lr 3.6258e-04 eta 0:22:51 +epoch [146/200] batch [15/31] time 0.722 (0.787) data 0.000 (0.059) loss 0.5640 (0.7861) acc 84.3750 (84.1667) lr 3.6258e-04 eta 0:22:09 +epoch [146/200] batch [20/31] time 0.722 (0.769) data 0.000 (0.045) loss 0.5830 (0.7936) acc 87.5000 (83.1250) lr 3.6258e-04 eta 0:21:36 +epoch [146/200] batch [25/31] time 0.701 (0.758) data 0.000 (0.036) loss 0.3743 (0.7562) acc 90.6250 (83.6250) lr 3.6258e-04 eta 0:21:13 +epoch [146/200] batch [30/31] time 0.702 (0.749) data 0.000 (0.030) loss 0.7388 (0.7800) acc 90.6250 (83.3333) lr 3.6258e-04 eta 0:20:54 +epoch [147/200] batch [5/31] time 0.705 (0.889) data 0.000 (0.163) loss 0.2150 (0.5499) acc 96.8750 (86.2500) lr 3.5055e-04 eta 0:24:43 +epoch [147/200] batch [10/31] time 0.729 (0.802) data 0.000 (0.082) loss 0.8496 (0.6507) acc 87.5000 (87.1875) lr 3.5055e-04 eta 0:22:15 +epoch [147/200] batch [15/31] time 0.713 (0.772) data 0.000 (0.054) loss 0.5488 (0.6634) acc 93.7500 (86.4583) lr 3.5055e-04 eta 0:21:20 +epoch [147/200] batch [20/31] time 0.711 (0.757) data 0.000 (0.041) loss 0.5469 (0.6473) acc 87.5000 (86.8750) lr 3.5055e-04 eta 0:20:51 +epoch [147/200] batch [25/31] time 0.707 (0.748) data 0.000 (0.033) loss 0.4622 (0.6180) acc 87.5000 (87.1250) lr 3.5055e-04 eta 0:20:33 +epoch [147/200] batch [30/31] time 0.714 (0.743) data 0.000 (0.027) loss 0.7363 (0.6261) acc 78.1250 (86.2500) lr 3.5055e-04 eta 0:20:21 +epoch [148/200] batch [5/31] time 0.705 (0.897) data 0.000 (0.176) loss 0.3967 (0.8936) acc 93.7500 (82.5000) lr 3.3869e-04 eta 0:24:30 +epoch [148/200] batch [10/31] time 0.711 (0.805) data 0.000 (0.088) loss 0.5415 (0.7740) acc 90.6250 (84.6875) lr 3.3869e-04 eta 0:21:53 +epoch [148/200] batch [15/31] time 0.711 (0.773) data 0.000 (0.059) loss 0.7808 (0.7332) acc 84.3750 (84.5833) lr 3.3869e-04 eta 0:20:57 +epoch [148/200] batch [20/31] time 0.709 (0.755) data 0.000 (0.044) loss 0.5967 (0.7339) acc 78.1250 (84.6875) lr 3.3869e-04 eta 0:20:26 +epoch [148/200] batch [25/31] time 0.705 (0.745) data 0.000 (0.036) loss 0.3650 (0.7156) acc 90.6250 (84.8750) lr 3.3869e-04 eta 0:20:04 +epoch [148/200] batch [30/31] time 0.707 (0.739) data 0.000 (0.030) loss 1.1299 (0.7389) acc 81.2500 (84.5833) lr 3.3869e-04 eta 0:19:51 +epoch [149/200] batch [5/31] time 0.713 (0.934) data 0.000 (0.187) loss 0.4236 (0.8028) acc 87.5000 (83.1250) lr 3.2699e-04 eta 0:25:00 +epoch [149/200] batch [10/31] time 0.713 (0.822) data 0.000 (0.094) loss 1.0947 (0.8184) acc 84.3750 (82.8125) lr 3.2699e-04 eta 0:21:56 +epoch [149/200] batch [15/31] time 0.715 (0.785) data 0.000 (0.063) loss 0.1643 (0.6932) acc 100.0000 (86.4583) lr 3.2699e-04 eta 0:20:53 +epoch [149/200] batch [20/31] time 0.708 (0.765) data 0.000 (0.047) loss 0.7964 (0.6815) acc 84.3750 (86.7188) lr 3.2699e-04 eta 0:20:18 +epoch [149/200] batch [25/31] time 0.710 (0.755) data 0.000 (0.038) loss 0.9307 (0.6816) acc 84.3750 (86.8750) lr 3.2699e-04 eta 0:19:57 +epoch [149/200] batch [30/31] time 0.711 (0.747) data 0.000 (0.031) loss 0.5571 (0.6859) acc 87.5000 (86.3542) lr 3.2699e-04 eta 0:19:41 +epoch [150/200] batch [5/31] time 0.712 (0.896) data 0.000 (0.172) loss 0.6626 (0.7502) acc 90.6250 (86.2500) lr 3.1545e-04 eta 0:23:31 +epoch [150/200] batch [10/31] time 0.711 (0.804) data 0.000 (0.086) loss 1.2734 (0.7649) acc 75.0000 (85.3125) lr 3.1545e-04 eta 0:21:03 +epoch [150/200] batch [15/31] time 0.714 (0.773) data 0.000 (0.058) loss 0.4756 (0.7596) acc 87.5000 (84.1667) lr 3.1545e-04 eta 0:20:11 +epoch [150/200] batch [20/31] time 0.709 (0.758) data 0.000 (0.043) loss 0.4211 (0.7427) acc 93.7500 (85.0000) lr 3.1545e-04 eta 0:19:42 +epoch [150/200] batch [25/31] time 0.709 (0.753) data 0.000 (0.035) loss 0.6812 (0.7458) acc 78.1250 (84.6250) lr 3.1545e-04 eta 0:19:31 +epoch [150/200] batch [30/31] time 0.707 (0.745) data 0.000 (0.029) loss 0.5786 (0.7638) acc 87.5000 (83.8542) lr 3.1545e-04 eta 0:19:15 +epoch [151/200] batch [5/31] time 0.708 (0.875) data 0.000 (0.152) loss 0.9541 (0.6565) acc 81.2500 (85.0000) lr 3.0409e-04 eta 0:22:31 +epoch [151/200] batch [10/31] time 0.711 (0.793) data 0.000 (0.076) loss 0.9409 (0.6537) acc 84.3750 (85.9375) lr 3.0409e-04 eta 0:20:20 +epoch [151/200] batch [15/31] time 0.708 (0.767) data 0.001 (0.051) loss 0.4805 (0.6798) acc 87.5000 (84.7917) lr 3.0409e-04 eta 0:19:36 +epoch [151/200] batch [20/31] time 0.711 (0.751) data 0.000 (0.038) loss 0.8164 (0.7201) acc 75.0000 (84.0625) lr 3.0409e-04 eta 0:19:09 +epoch [151/200] batch [25/31] time 0.718 (0.743) data 0.000 (0.031) loss 0.2722 (0.6773) acc 96.8750 (85.1250) lr 3.0409e-04 eta 0:18:52 +epoch [151/200] batch [30/31] time 0.709 (0.737) data 0.000 (0.026) loss 0.3628 (0.6857) acc 93.7500 (84.7917) lr 3.0409e-04 eta 0:18:40 +epoch [152/200] batch [5/31] time 0.705 (0.886) data 0.000 (0.159) loss 0.4431 (0.5921) acc 90.6250 (86.2500) lr 2.9289e-04 eta 0:22:21 +epoch [152/200] batch [10/31] time 0.709 (0.798) data 0.000 (0.080) loss 0.8438 (0.6692) acc 71.8750 (85.3125) lr 2.9289e-04 eta 0:20:03 +epoch [152/200] batch [15/31] time 0.714 (0.771) data 0.000 (0.053) loss 1.2705 (0.7264) acc 78.1250 (84.7917) lr 2.9289e-04 eta 0:19:18 +epoch [152/200] batch [20/31] time 0.712 (0.763) data 0.000 (0.040) loss 0.6875 (0.7017) acc 84.3750 (85.3125) lr 2.9289e-04 eta 0:19:03 +epoch [152/200] batch [25/31] time 0.719 (0.754) data 0.000 (0.032) loss 0.4570 (0.7021) acc 87.5000 (85.6250) lr 2.9289e-04 eta 0:18:45 +epoch [152/200] batch [30/31] time 0.721 (0.747) data 0.000 (0.027) loss 0.8408 (0.7319) acc 84.3750 (85.1042) lr 2.9289e-04 eta 0:18:31 +epoch [153/200] batch [5/31] time 0.722 (0.890) data 0.000 (0.163) loss 0.8394 (0.7726) acc 75.0000 (81.8750) lr 2.8187e-04 eta 0:21:59 +epoch [153/200] batch [10/31] time 0.708 (0.801) data 0.000 (0.081) loss 0.4211 (0.6391) acc 87.5000 (84.0625) lr 2.8187e-04 eta 0:19:44 +epoch [153/200] batch [15/31] time 0.708 (0.772) data 0.000 (0.054) loss 0.5903 (0.6551) acc 87.5000 (83.9583) lr 2.8187e-04 eta 0:18:56 +epoch [153/200] batch [20/31] time 0.705 (0.757) data 0.000 (0.041) loss 0.4929 (0.6409) acc 87.5000 (84.5312) lr 2.8187e-04 eta 0:18:31 +epoch [153/200] batch [25/31] time 0.715 (0.748) data 0.000 (0.033) loss 0.4724 (0.6298) acc 90.6250 (84.8750) lr 2.8187e-04 eta 0:18:14 +epoch [153/200] batch [30/31] time 0.704 (0.741) data 0.000 (0.027) loss 0.6626 (0.6387) acc 84.3750 (84.5833) lr 2.8187e-04 eta 0:18:00 +epoch [154/200] batch [5/31] time 0.713 (0.896) data 0.000 (0.173) loss 0.2435 (0.5099) acc 96.8750 (91.2500) lr 2.7103e-04 eta 0:21:40 +epoch [154/200] batch [10/31] time 0.710 (0.805) data 0.000 (0.087) loss 0.3542 (0.6247) acc 96.8750 (88.4375) lr 2.7103e-04 eta 0:19:24 +epoch [154/200] batch [15/31] time 0.711 (0.775) data 0.000 (0.058) loss 0.7520 (0.6525) acc 84.3750 (87.2917) lr 2.7103e-04 eta 0:18:37 +epoch [154/200] batch [20/31] time 0.704 (0.759) data 0.000 (0.044) loss 0.4846 (0.6601) acc 90.6250 (87.0312) lr 2.7103e-04 eta 0:18:10 +epoch [154/200] batch [25/31] time 0.709 (0.750) data 0.000 (0.035) loss 0.6221 (0.6786) acc 87.5000 (86.3750) lr 2.7103e-04 eta 0:17:53 +epoch [154/200] batch [30/31] time 0.712 (0.743) data 0.000 (0.029) loss 0.3914 (0.6717) acc 87.5000 (86.5625) lr 2.7103e-04 eta 0:17:40 +epoch [155/200] batch [5/31] time 0.710 (0.884) data 0.001 (0.157) loss 0.6548 (0.6990) acc 78.1250 (83.7500) lr 2.6037e-04 eta 0:20:55 +epoch [155/200] batch [10/31] time 0.715 (0.795) data 0.000 (0.079) loss 0.9482 (0.7057) acc 71.8750 (82.5000) lr 2.6037e-04 eta 0:18:46 +epoch [155/200] batch [15/31] time 0.711 (0.776) data 0.000 (0.053) loss 0.9561 (0.7281) acc 84.3750 (82.7083) lr 2.6037e-04 eta 0:18:15 +epoch [155/200] batch [20/31] time 0.716 (0.760) data 0.000 (0.039) loss 0.7617 (0.7086) acc 81.2500 (83.4375) lr 2.6037e-04 eta 0:17:49 +epoch [155/200] batch [25/31] time 0.706 (0.750) data 0.000 (0.032) loss 1.1992 (0.6931) acc 78.1250 (84.1250) lr 2.6037e-04 eta 0:17:30 +epoch [155/200] batch [30/31] time 0.713 (0.743) data 0.000 (0.026) loss 0.7432 (0.6996) acc 87.5000 (84.3750) lr 2.6037e-04 eta 0:17:16 +epoch [156/200] batch [5/31] time 0.726 (0.905) data 0.000 (0.176) loss 0.4189 (0.6431) acc 90.6250 (86.2500) lr 2.4989e-04 eta 0:20:57 +epoch [156/200] batch [10/31] time 0.703 (0.808) data 0.000 (0.088) loss 0.7451 (0.5950) acc 84.3750 (86.5625) lr 2.4989e-04 eta 0:18:39 +epoch [156/200] batch [15/31] time 0.710 (0.775) data 0.000 (0.059) loss 0.8354 (0.6104) acc 71.8750 (85.2083) lr 2.4989e-04 eta 0:17:49 +epoch [156/200] batch [20/31] time 0.707 (0.759) data 0.000 (0.044) loss 0.6416 (0.6323) acc 87.5000 (84.8438) lr 2.4989e-04 eta 0:17:23 +epoch [156/200] batch [25/31] time 0.704 (0.748) data 0.000 (0.036) loss 0.5835 (0.7085) acc 87.5000 (83.3750) lr 2.4989e-04 eta 0:17:04 +epoch [156/200] batch [30/31] time 0.723 (0.741) data 0.000 (0.030) loss 0.6758 (0.7070) acc 90.6250 (83.8542) lr 2.4989e-04 eta 0:16:51 +epoch [157/200] batch [5/31] time 0.704 (0.887) data 0.000 (0.167) loss 0.2971 (0.5287) acc 93.7500 (86.2500) lr 2.3959e-04 eta 0:20:05 +epoch [157/200] batch [10/31] time 0.710 (0.803) data 0.000 (0.084) loss 0.4448 (0.4873) acc 87.5000 (88.4375) lr 2.3959e-04 eta 0:18:07 +epoch [157/200] batch [15/31] time 0.722 (0.773) data 0.000 (0.056) loss 0.4836 (0.5950) acc 96.8750 (87.9167) lr 2.3959e-04 eta 0:17:23 +epoch [157/200] batch [20/31] time 0.704 (0.757) data 0.000 (0.042) loss 0.4873 (0.6606) acc 93.7500 (86.7188) lr 2.3959e-04 eta 0:16:57 +epoch [157/200] batch [25/31] time 0.707 (0.748) data 0.000 (0.034) loss 0.7642 (0.7235) acc 81.2500 (85.6250) lr 2.3959e-04 eta 0:16:41 +epoch [157/200] batch [30/31] time 0.708 (0.742) data 0.000 (0.028) loss 0.8354 (0.6960) acc 81.2500 (85.7292) lr 2.3959e-04 eta 0:16:29 +epoch [158/200] batch [5/31] time 0.708 (0.879) data 0.000 (0.160) loss 0.9360 (0.6445) acc 78.1250 (85.6250) lr 2.2949e-04 eta 0:19:27 +epoch [158/200] batch [10/31] time 0.708 (0.797) data 0.000 (0.080) loss 0.2815 (0.6993) acc 93.7500 (85.0000) lr 2.2949e-04 eta 0:17:34 +epoch [158/200] batch [15/31] time 0.713 (0.768) data 0.000 (0.054) loss 0.6309 (0.6699) acc 93.7500 (85.6250) lr 2.2949e-04 eta 0:16:52 +epoch [158/200] batch [20/31] time 0.703 (0.753) data 0.000 (0.040) loss 0.3511 (0.6507) acc 96.8750 (86.4062) lr 2.2949e-04 eta 0:16:28 +epoch [158/200] batch [25/31] time 0.709 (0.744) data 0.000 (0.032) loss 0.6987 (0.6708) acc 87.5000 (85.8750) lr 2.2949e-04 eta 0:16:13 +epoch [158/200] batch [30/31] time 0.710 (0.738) data 0.000 (0.027) loss 0.6816 (0.6883) acc 81.2500 (84.8958) lr 2.2949e-04 eta 0:16:01 +epoch [159/200] batch [5/31] time 0.713 (0.928) data 0.000 (0.208) loss 0.2998 (0.6145) acc 90.6250 (88.1250) lr 2.1957e-04 eta 0:20:03 +epoch [159/200] batch [10/31] time 0.706 (0.818) data 0.000 (0.104) loss 0.6914 (0.7420) acc 90.6250 (85.9375) lr 2.1957e-04 eta 0:17:37 +epoch [159/200] batch [15/31] time 0.711 (0.781) data 0.000 (0.070) loss 0.8867 (0.7384) acc 84.3750 (85.4167) lr 2.1957e-04 eta 0:16:45 +epoch [159/200] batch [20/31] time 0.707 (0.762) data 0.000 (0.052) loss 0.4592 (0.6701) acc 90.6250 (86.8750) lr 2.1957e-04 eta 0:16:17 +epoch [159/200] batch [25/31] time 0.698 (0.752) data 0.000 (0.042) loss 1.4707 (0.7378) acc 71.8750 (85.7500) lr 2.1957e-04 eta 0:15:59 +epoch [159/200] batch [30/31] time 0.705 (0.744) data 0.000 (0.035) loss 0.4453 (0.7678) acc 84.3750 (84.6875) lr 2.1957e-04 eta 0:15:46 +epoch [160/200] batch [5/31] time 0.725 (0.923) data 0.000 (0.193) loss 0.4790 (0.6323) acc 90.6250 (86.8750) lr 2.0984e-04 eta 0:19:28 +epoch [160/200] batch [10/31] time 0.711 (0.818) data 0.000 (0.097) loss 0.9028 (0.6385) acc 71.8750 (84.3750) lr 2.0984e-04 eta 0:17:11 +epoch [160/200] batch [15/31] time 0.709 (0.793) data 0.001 (0.065) loss 0.5591 (0.6107) acc 90.6250 (85.2083) lr 2.0984e-04 eta 0:16:36 +epoch [160/200] batch [20/31] time 0.730 (0.775) data 0.001 (0.049) loss 0.5337 (0.6602) acc 90.6250 (84.6875) lr 2.0984e-04 eta 0:16:09 +epoch [160/200] batch [25/31] time 0.724 (0.763) data 0.000 (0.039) loss 0.7930 (0.6846) acc 87.5000 (85.3750) lr 2.0984e-04 eta 0:15:50 +epoch [160/200] batch [30/31] time 0.718 (0.755) data 0.001 (0.033) loss 0.9648 (0.6722) acc 84.3750 (86.0417) lr 2.0984e-04 eta 0:15:37 +epoch [161/200] batch [5/31] time 0.719 (0.906) data 0.000 (0.173) loss 0.5596 (0.6274) acc 93.7500 (88.7500) lr 2.0032e-04 eta 0:18:39 +epoch [161/200] batch [10/31] time 0.718 (0.809) data 0.000 (0.087) loss 0.7432 (0.6267) acc 75.0000 (85.0000) lr 2.0032e-04 eta 0:16:35 +epoch [161/200] batch [15/31] time 0.714 (0.777) data 0.000 (0.058) loss 1.2812 (0.7228) acc 65.6250 (83.3333) lr 2.0032e-04 eta 0:15:51 +epoch [161/200] batch [20/31] time 0.711 (0.761) data 0.000 (0.044) loss 0.6318 (0.6892) acc 84.3750 (83.5938) lr 2.0032e-04 eta 0:15:28 +epoch [161/200] batch [25/31] time 0.717 (0.751) data 0.000 (0.035) loss 1.0244 (0.6882) acc 81.2500 (84.5000) lr 2.0032e-04 eta 0:15:12 +epoch [161/200] batch [30/31] time 0.727 (0.745) data 0.000 (0.029) loss 0.4099 (0.7120) acc 84.3750 (84.2708) lr 2.0032e-04 eta 0:15:01 +epoch [162/200] batch [5/31] time 0.710 (0.942) data 0.000 (0.168) loss 0.6299 (0.6133) acc 84.3750 (88.1250) lr 1.9098e-04 eta 0:18:54 +epoch [162/200] batch [10/31] time 0.708 (0.828) data 0.000 (0.084) loss 0.5439 (0.6222) acc 90.6250 (88.1250) lr 1.9098e-04 eta 0:16:33 +epoch [162/200] batch [15/31] time 0.709 (0.789) data 0.000 (0.056) loss 0.7251 (0.6362) acc 84.3750 (86.8750) lr 1.9098e-04 eta 0:15:42 +epoch [162/200] batch [20/31] time 0.710 (0.770) data 0.000 (0.042) loss 0.4238 (0.5994) acc 90.6250 (87.1875) lr 1.9098e-04 eta 0:15:15 +epoch [162/200] batch [25/31] time 0.707 (0.758) data 0.000 (0.034) loss 0.9482 (0.6372) acc 78.1250 (86.7500) lr 1.9098e-04 eta 0:14:57 +epoch [162/200] batch [30/31] time 0.709 (0.750) data 0.000 (0.028) loss 0.5562 (0.6302) acc 87.5000 (86.8750) lr 1.9098e-04 eta 0:14:44 +epoch [163/200] batch [5/31] time 0.712 (0.902) data 0.000 (0.176) loss 0.5122 (0.5862) acc 90.6250 (84.3750) lr 1.8185e-04 eta 0:17:37 +epoch [163/200] batch [10/31] time 0.714 (0.810) data 0.000 (0.088) loss 0.3005 (0.6182) acc 93.7500 (85.3125) lr 1.8185e-04 eta 0:15:45 +epoch [163/200] batch [15/31] time 0.728 (0.779) data 0.000 (0.059) loss 0.2734 (0.6376) acc 96.8750 (86.0417) lr 1.8185e-04 eta 0:15:06 +epoch [163/200] batch [20/31] time 0.704 (0.762) data 0.000 (0.044) loss 0.6265 (0.6557) acc 90.6250 (86.2500) lr 1.8185e-04 eta 0:14:42 +epoch [163/200] batch [25/31] time 0.710 (0.752) data 0.000 (0.035) loss 0.7168 (0.6387) acc 81.2500 (86.2500) lr 1.8185e-04 eta 0:14:26 +epoch [163/200] batch [30/31] time 0.705 (0.744) data 0.000 (0.030) loss 0.3740 (0.6034) acc 93.7500 (87.0833) lr 1.8185e-04 eta 0:14:14 +epoch [164/200] batch [5/31] time 0.710 (0.890) data 0.000 (0.169) loss 0.3730 (0.7549) acc 90.6250 (84.3750) lr 1.7292e-04 eta 0:16:56 +epoch [164/200] batch [10/31] time 0.707 (0.815) data 0.000 (0.085) loss 0.8145 (0.8112) acc 87.5000 (82.8125) lr 1.7292e-04 eta 0:15:26 +epoch [164/200] batch [15/31] time 0.711 (0.780) data 0.000 (0.056) loss 1.4639 (0.8004) acc 59.3750 (81.6667) lr 1.7292e-04 eta 0:14:42 +epoch [164/200] batch [20/31] time 0.711 (0.762) data 0.000 (0.042) loss 1.4492 (0.7864) acc 68.7500 (81.8750) lr 1.7292e-04 eta 0:14:18 +epoch [164/200] batch [25/31] time 0.707 (0.751) data 0.000 (0.034) loss 0.5679 (0.7883) acc 90.6250 (81.8750) lr 1.7292e-04 eta 0:14:02 +epoch [164/200] batch [30/31] time 0.703 (0.745) data 0.000 (0.028) loss 0.8188 (0.7674) acc 78.1250 (82.3958) lr 1.7292e-04 eta 0:13:51 +epoch [165/200] batch [5/31] time 0.711 (0.930) data 0.000 (0.203) loss 0.3792 (0.6162) acc 96.8750 (86.8750) lr 1.6419e-04 eta 0:17:13 +epoch [165/200] batch [10/31] time 0.706 (0.824) data 0.000 (0.102) loss 0.3521 (0.6748) acc 90.6250 (85.9375) lr 1.6419e-04 eta 0:15:11 +epoch [165/200] batch [15/31] time 0.714 (0.788) data 0.000 (0.068) loss 0.5825 (0.6077) acc 87.5000 (87.5000) lr 1.6419e-04 eta 0:14:27 +epoch [165/200] batch [20/31] time 0.708 (0.769) data 0.000 (0.051) loss 0.4653 (0.6193) acc 87.5000 (86.5625) lr 1.6419e-04 eta 0:14:02 +epoch [165/200] batch [25/31] time 0.710 (0.757) data 0.000 (0.041) loss 0.7666 (0.6590) acc 78.1250 (85.2500) lr 1.6419e-04 eta 0:13:46 +epoch [165/200] batch [30/31] time 0.711 (0.749) data 0.000 (0.034) loss 1.1875 (0.6759) acc 75.0000 (84.8958) lr 1.6419e-04 eta 0:13:33 +epoch [166/200] batch [5/31] time 0.713 (0.888) data 0.000 (0.168) loss 0.7974 (0.7339) acc 84.3750 (87.5000) lr 1.5567e-04 eta 0:15:59 +epoch [166/200] batch [10/31] time 0.719 (0.802) data 0.000 (0.084) loss 0.6118 (0.6850) acc 90.6250 (86.8750) lr 1.5567e-04 eta 0:14:21 +epoch [166/200] batch [15/31] time 0.715 (0.773) data 0.000 (0.056) loss 0.3389 (0.6431) acc 96.8750 (88.7500) lr 1.5567e-04 eta 0:13:47 +epoch [166/200] batch [20/31] time 0.720 (0.759) data 0.000 (0.042) loss 1.0205 (0.6763) acc 81.2500 (87.0312) lr 1.5567e-04 eta 0:13:28 +epoch [166/200] batch [25/31] time 0.716 (0.750) data 0.000 (0.034) loss 0.4880 (0.6383) acc 87.5000 (87.6250) lr 1.5567e-04 eta 0:13:15 +epoch [166/200] batch [30/31] time 0.712 (0.744) data 0.000 (0.028) loss 0.6636 (0.6481) acc 87.5000 (86.6667) lr 1.5567e-04 eta 0:13:04 +epoch [167/200] batch [5/31] time 0.709 (0.929) data 0.000 (0.205) loss 0.4873 (0.6091) acc 87.5000 (86.2500) lr 1.4736e-04 eta 0:16:14 +epoch [167/200] batch [10/31] time 0.710 (0.820) data 0.000 (0.103) loss 0.7358 (0.7475) acc 78.1250 (83.7500) lr 1.4736e-04 eta 0:14:16 +epoch [167/200] batch [15/31] time 0.710 (0.784) data 0.000 (0.069) loss 0.8535 (0.7927) acc 81.2500 (82.5000) lr 1.4736e-04 eta 0:13:34 +epoch [167/200] batch [20/31] time 0.706 (0.765) data 0.000 (0.052) loss 0.6758 (0.7336) acc 84.3750 (83.9062) lr 1.4736e-04 eta 0:13:10 +epoch [167/200] batch [25/31] time 0.712 (0.754) data 0.000 (0.041) loss 0.8398 (0.7554) acc 87.5000 (84.1250) lr 1.4736e-04 eta 0:12:56 +epoch [167/200] batch [30/31] time 0.713 (0.747) data 0.000 (0.034) loss 0.7031 (0.7403) acc 78.1250 (83.9583) lr 1.4736e-04 eta 0:12:44 +epoch [168/200] batch [5/31] time 0.710 (0.888) data 0.000 (0.164) loss 0.5166 (0.7529) acc 93.7500 (86.8750) lr 1.3926e-04 eta 0:15:04 +epoch [168/200] batch [10/31] time 0.709 (0.800) data 0.000 (0.082) loss 0.6050 (0.7598) acc 81.2500 (83.7500) lr 1.3926e-04 eta 0:13:30 +epoch [168/200] batch [15/31] time 0.710 (0.771) data 0.000 (0.055) loss 0.6416 (0.6633) acc 87.5000 (86.0417) lr 1.3926e-04 eta 0:12:57 +epoch [168/200] batch [20/31] time 0.710 (0.756) data 0.000 (0.041) loss 0.5771 (0.6265) acc 87.5000 (86.5625) lr 1.3926e-04 eta 0:12:37 +epoch [168/200] batch [25/31] time 0.713 (0.747) data 0.000 (0.033) loss 0.6699 (0.6243) acc 84.3750 (87.0000) lr 1.3926e-04 eta 0:12:25 +epoch [168/200] batch [30/31] time 0.710 (0.741) data 0.000 (0.028) loss 0.5859 (0.6404) acc 90.6250 (86.9792) lr 1.3926e-04 eta 0:12:16 +epoch [169/200] batch [5/31] time 0.707 (0.906) data 0.000 (0.177) loss 0.4294 (0.6848) acc 87.5000 (83.7500) lr 1.3137e-04 eta 0:14:53 +epoch [169/200] batch [10/31] time 0.708 (0.807) data 0.000 (0.089) loss 0.6475 (0.5643) acc 78.1250 (87.1875) lr 1.3137e-04 eta 0:13:12 +epoch [169/200] batch [15/31] time 0.714 (0.777) data 0.000 (0.059) loss 0.5840 (0.6436) acc 90.6250 (86.4583) lr 1.3137e-04 eta 0:12:38 +epoch [169/200] batch [20/31] time 0.708 (0.759) data 0.000 (0.045) loss 0.4026 (0.6284) acc 84.3750 (85.3125) lr 1.3137e-04 eta 0:12:18 +epoch [169/200] batch [25/31] time 0.714 (0.750) data 0.000 (0.036) loss 0.7021 (0.6541) acc 84.3750 (84.8750) lr 1.3137e-04 eta 0:12:05 +epoch [169/200] batch [30/31] time 0.711 (0.744) data 0.000 (0.030) loss 1.1055 (0.6932) acc 81.2500 (84.8958) lr 1.3137e-04 eta 0:11:55 +epoch [170/200] batch [5/31] time 0.715 (0.907) data 0.000 (0.184) loss 0.2930 (0.4368) acc 96.8750 (90.0000) lr 1.2369e-04 eta 0:14:27 +epoch [170/200] batch [10/31] time 0.709 (0.824) data 0.000 (0.092) loss 0.9937 (0.5843) acc 81.2500 (87.1875) lr 1.2369e-04 eta 0:13:03 +epoch [170/200] batch [15/31] time 0.714 (0.787) data 0.000 (0.062) loss 0.4583 (0.6393) acc 90.6250 (86.4583) lr 1.2369e-04 eta 0:12:24 +epoch [170/200] batch [20/31] time 0.711 (0.768) data 0.000 (0.046) loss 1.0732 (0.6404) acc 75.0000 (86.0938) lr 1.2369e-04 eta 0:12:03 +epoch [170/200] batch [25/31] time 0.703 (0.756) data 0.000 (0.037) loss 0.6636 (0.6827) acc 81.2500 (85.0000) lr 1.2369e-04 eta 0:11:47 +epoch [170/200] batch [30/31] time 0.702 (0.748) data 0.000 (0.031) loss 0.9033 (0.6728) acc 78.1250 (84.7917) lr 1.2369e-04 eta 0:11:36 +epoch [171/200] batch [5/31] time 0.707 (0.893) data 0.000 (0.173) loss 0.4802 (0.6397) acc 90.6250 (86.2500) lr 1.1623e-04 eta 0:13:46 +epoch [171/200] batch [10/31] time 0.717 (0.803) data 0.000 (0.087) loss 0.3997 (0.5491) acc 90.6250 (87.5000) lr 1.1623e-04 eta 0:12:19 +epoch [171/200] batch [15/31] time 0.705 (0.771) data 0.000 (0.058) loss 0.9956 (0.6216) acc 71.8750 (84.7917) lr 1.1623e-04 eta 0:11:45 +epoch [171/200] batch [20/31] time 0.708 (0.755) data 0.000 (0.044) loss 1.3213 (0.6443) acc 78.1250 (84.3750) lr 1.1623e-04 eta 0:11:27 +epoch [171/200] batch [25/31] time 0.703 (0.745) data 0.000 (0.035) loss 0.4651 (0.6307) acc 90.6250 (84.3750) lr 1.1623e-04 eta 0:11:14 +epoch [171/200] batch [30/31] time 0.707 (0.744) data 0.001 (0.029) loss 0.6899 (0.6304) acc 81.2500 (84.6875) lr 1.1623e-04 eta 0:11:09 +epoch [172/200] batch [5/31] time 0.713 (0.889) data 0.000 (0.168) loss 0.7578 (0.5959) acc 90.6250 (90.0000) lr 1.0899e-04 eta 0:13:14 +epoch [172/200] batch [10/31] time 0.716 (0.800) data 0.000 (0.084) loss 0.6353 (0.6232) acc 78.1250 (87.1875) lr 1.0899e-04 eta 0:11:51 +epoch [172/200] batch [15/31] time 0.711 (0.770) data 0.000 (0.056) loss 0.9399 (0.6540) acc 81.2500 (86.4583) lr 1.0899e-04 eta 0:11:21 +epoch [172/200] batch [20/31] time 0.708 (0.755) data 0.000 (0.042) loss 0.7544 (0.6998) acc 84.3750 (85.9375) lr 1.0899e-04 eta 0:11:03 +epoch [172/200] batch [25/31] time 0.715 (0.747) data 0.000 (0.034) loss 0.6289 (0.6891) acc 84.3750 (85.3750) lr 1.0899e-04 eta 0:10:52 +epoch [172/200] batch [30/31] time 0.705 (0.741) data 0.000 (0.028) loss 0.9282 (0.7276) acc 78.1250 (85.0000) lr 1.0899e-04 eta 0:10:44 +epoch [173/200] batch [5/31] time 0.711 (0.884) data 0.000 (0.164) loss 0.6660 (0.6570) acc 84.3750 (86.2500) lr 1.0197e-04 eta 0:12:42 +epoch [173/200] batch [10/31] time 0.714 (0.798) data 0.000 (0.082) loss 0.9692 (0.6400) acc 78.1250 (84.6875) lr 1.0197e-04 eta 0:11:24 +epoch [173/200] batch [15/31] time 0.723 (0.770) data 0.000 (0.055) loss 0.4922 (0.6721) acc 84.3750 (83.3333) lr 1.0197e-04 eta 0:10:56 +epoch [173/200] batch [20/31] time 0.711 (0.755) data 0.000 (0.041) loss 0.4392 (0.6596) acc 84.3750 (83.5938) lr 1.0197e-04 eta 0:10:40 +epoch [173/200] batch [25/31] time 0.714 (0.747) data 0.000 (0.033) loss 0.7324 (0.6637) acc 87.5000 (83.8750) lr 1.0197e-04 eta 0:10:29 +epoch [173/200] batch [30/31] time 0.726 (0.747) data 0.000 (0.028) loss 0.9985 (0.6677) acc 75.0000 (83.5417) lr 1.0197e-04 eta 0:10:25 +epoch [174/200] batch [5/31] time 0.712 (0.896) data 0.000 (0.178) loss 0.3994 (0.4251) acc 90.6250 (91.8750) lr 9.5173e-05 eta 0:12:25 +epoch [174/200] batch [10/31] time 0.705 (0.806) data 0.001 (0.089) loss 0.6938 (0.5560) acc 84.3750 (88.1250) lr 9.5173e-05 eta 0:11:06 +epoch [174/200] batch [15/31] time 0.716 (0.775) data 0.000 (0.060) loss 0.9023 (0.6509) acc 81.2500 (85.8333) lr 9.5173e-05 eta 0:10:37 +epoch [174/200] batch [20/31] time 0.721 (0.760) data 0.000 (0.045) loss 1.1455 (0.7061) acc 84.3750 (85.6250) lr 9.5173e-05 eta 0:10:20 +epoch [174/200] batch [25/31] time 0.711 (0.750) data 0.000 (0.036) loss 1.3252 (0.7469) acc 71.8750 (84.6250) lr 9.5173e-05 eta 0:10:08 +epoch [174/200] batch [30/31] time 0.708 (0.744) data 0.000 (0.030) loss 0.5757 (0.7656) acc 78.1250 (84.2708) lr 9.5173e-05 eta 0:10:00 +epoch [175/200] batch [5/31] time 0.710 (0.890) data 0.000 (0.170) loss 0.6318 (0.5857) acc 90.6250 (89.3750) lr 8.8597e-05 eta 0:11:52 +epoch [175/200] batch [10/31] time 0.710 (0.803) data 0.000 (0.085) loss 0.3733 (0.5734) acc 90.6250 (87.8125) lr 8.8597e-05 eta 0:10:39 +epoch [175/200] batch [15/31] time 0.712 (0.773) data 0.000 (0.057) loss 0.8779 (0.6261) acc 78.1250 (86.0417) lr 8.8597e-05 eta 0:10:11 +epoch [175/200] batch [20/31] time 0.711 (0.757) data 0.000 (0.043) loss 0.3232 (0.6384) acc 93.7500 (86.7188) lr 8.8597e-05 eta 0:09:55 +epoch [175/200] batch [25/31] time 0.709 (0.748) data 0.000 (0.034) loss 0.5239 (0.6465) acc 93.7500 (87.2500) lr 8.8597e-05 eta 0:09:44 +epoch [175/200] batch [30/31] time 0.709 (0.742) data 0.000 (0.029) loss 1.0283 (0.6576) acc 78.1250 (86.6667) lr 8.8597e-05 eta 0:09:35 +epoch [176/200] batch [5/31] time 0.719 (0.898) data 0.000 (0.166) loss 0.8130 (0.8509) acc 90.6250 (88.1250) lr 8.2245e-05 eta 0:11:31 +epoch [176/200] batch [10/31] time 0.714 (0.807) data 0.000 (0.083) loss 0.7573 (0.8090) acc 84.3750 (86.5625) lr 8.2245e-05 eta 0:10:16 +epoch [176/200] batch [15/31] time 0.718 (0.778) data 0.000 (0.056) loss 0.6792 (0.7566) acc 87.5000 (86.0417) lr 8.2245e-05 eta 0:09:51 +epoch [176/200] batch [20/31] time 0.715 (0.762) data 0.000 (0.042) loss 0.4673 (0.6981) acc 93.7500 (86.8750) lr 8.2245e-05 eta 0:09:35 +epoch [176/200] batch [25/31] time 0.708 (0.751) data 0.000 (0.034) loss 0.6738 (0.6716) acc 81.2500 (86.7500) lr 8.2245e-05 eta 0:09:23 +epoch [176/200] batch [30/31] time 0.710 (0.744) data 0.000 (0.028) loss 0.5596 (0.6852) acc 84.3750 (86.9792) lr 8.2245e-05 eta 0:09:14 +epoch [177/200] batch [5/31] time 0.704 (0.888) data 0.000 (0.165) loss 0.5679 (0.3647) acc 87.5000 (92.5000) lr 7.6120e-05 eta 0:10:56 +epoch [177/200] batch [10/31] time 0.719 (0.800) data 0.000 (0.082) loss 0.4319 (0.5216) acc 84.3750 (88.1250) lr 7.6120e-05 eta 0:09:47 +epoch [177/200] batch [15/31] time 0.709 (0.781) data 0.000 (0.055) loss 1.1074 (0.6225) acc 71.8750 (86.2500) lr 7.6120e-05 eta 0:09:29 +epoch [177/200] batch [20/31] time 0.704 (0.764) data 0.000 (0.041) loss 0.4250 (0.6190) acc 87.5000 (85.9375) lr 7.6120e-05 eta 0:09:13 +epoch [177/200] batch [25/31] time 0.709 (0.754) data 0.000 (0.033) loss 0.3643 (0.6161) acc 90.6250 (85.8750) lr 7.6120e-05 eta 0:09:02 +epoch [177/200] batch [30/31] time 0.709 (0.747) data 0.000 (0.028) loss 0.7661 (0.6791) acc 84.3750 (84.7917) lr 7.6120e-05 eta 0:08:53 +epoch [178/200] batch [5/31] time 0.714 (0.898) data 0.000 (0.173) loss 0.4297 (0.4943) acc 93.7500 (89.3750) lr 7.0224e-05 eta 0:10:35 +epoch [178/200] batch [10/31] time 0.720 (0.808) data 0.000 (0.087) loss 0.4028 (0.5487) acc 84.3750 (86.5625) lr 7.0224e-05 eta 0:09:28 +epoch [178/200] batch [15/31] time 0.717 (0.775) data 0.000 (0.058) loss 0.8262 (0.6323) acc 81.2500 (85.4167) lr 7.0224e-05 eta 0:09:01 +epoch [178/200] batch [20/31] time 0.708 (0.758) data 0.000 (0.044) loss 0.8018 (0.6249) acc 81.2500 (85.4688) lr 7.0224e-05 eta 0:08:45 +epoch [178/200] batch [25/31] time 0.706 (0.749) data 0.000 (0.035) loss 0.6255 (0.6318) acc 87.5000 (85.6250) lr 7.0224e-05 eta 0:08:35 +epoch [178/200] batch [30/31] time 0.701 (0.741) data 0.000 (0.029) loss 0.7002 (0.6759) acc 90.6250 (85.1042) lr 7.0224e-05 eta 0:08:26 +epoch [179/200] batch [5/31] time 0.719 (0.896) data 0.000 (0.178) loss 0.7759 (0.5881) acc 84.3750 (83.1250) lr 6.4556e-05 eta 0:10:06 +epoch [179/200] batch [10/31] time 0.721 (0.806) data 0.000 (0.089) loss 0.7969 (0.6392) acc 71.8750 (83.1250) lr 6.4556e-05 eta 0:09:01 +epoch [179/200] batch [15/31] time 0.710 (0.773) data 0.000 (0.059) loss 0.5005 (0.5920) acc 87.5000 (85.2083) lr 6.4556e-05 eta 0:08:35 +epoch [179/200] batch [20/31] time 0.716 (0.757) data 0.000 (0.045) loss 0.3679 (0.5902) acc 93.7500 (85.3125) lr 6.4556e-05 eta 0:08:21 +epoch [179/200] batch [25/31] time 0.711 (0.748) data 0.000 (0.036) loss 0.9702 (0.6084) acc 84.3750 (85.8750) lr 6.4556e-05 eta 0:08:11 +epoch [179/200] batch [30/31] time 0.706 (0.741) data 0.000 (0.030) loss 0.6157 (0.6021) acc 90.6250 (86.6667) lr 6.4556e-05 eta 0:08:02 +epoch [180/200] batch [5/31] time 0.712 (0.936) data 0.000 (0.185) loss 0.5298 (0.7079) acc 90.6250 (85.0000) lr 5.9119e-05 eta 0:10:04 +epoch [180/200] batch [10/31] time 0.716 (0.825) data 0.000 (0.093) loss 0.8716 (0.7177) acc 78.1250 (84.6875) lr 5.9119e-05 eta 0:08:48 +epoch [180/200] batch [15/31] time 0.716 (0.787) data 0.000 (0.062) loss 0.5957 (0.7344) acc 84.3750 (83.5417) lr 5.9119e-05 eta 0:08:20 +epoch [180/200] batch [20/31] time 0.710 (0.768) data 0.000 (0.047) loss 1.3848 (0.7639) acc 71.8750 (82.8125) lr 5.9119e-05 eta 0:08:04 +epoch [180/200] batch [25/31] time 0.714 (0.757) data 0.000 (0.037) loss 1.1162 (0.7545) acc 81.2500 (83.3750) lr 5.9119e-05 eta 0:07:53 +epoch [180/200] batch [30/31] time 0.724 (0.750) data 0.000 (0.031) loss 1.2852 (0.7733) acc 71.8750 (83.3333) lr 5.9119e-05 eta 0:07:45 +epoch [181/200] batch [5/31] time 0.710 (0.892) data 0.000 (0.170) loss 0.3772 (0.5146) acc 90.6250 (86.8750) lr 5.3915e-05 eta 0:09:08 +epoch [181/200] batch [10/31] time 0.712 (0.803) data 0.000 (0.085) loss 0.7012 (0.5408) acc 81.2500 (88.4375) lr 5.3915e-05 eta 0:08:09 +epoch [181/200] batch [15/31] time 0.710 (0.773) data 0.000 (0.057) loss 0.6172 (0.5488) acc 87.5000 (89.1667) lr 5.3915e-05 eta 0:07:47 +epoch [181/200] batch [20/31] time 0.707 (0.757) data 0.000 (0.043) loss 1.2744 (0.6085) acc 62.5000 (87.8125) lr 5.3915e-05 eta 0:07:34 +epoch [181/200] batch [25/31] time 0.714 (0.753) data 0.000 (0.034) loss 0.8687 (0.6332) acc 81.2500 (87.0000) lr 5.3915e-05 eta 0:07:28 +epoch [181/200] batch [30/31] time 0.707 (0.746) data 0.000 (0.029) loss 0.6021 (0.6495) acc 81.2500 (86.3542) lr 5.3915e-05 eta 0:07:20 +epoch [182/200] batch [5/31] time 0.721 (0.903) data 0.000 (0.174) loss 0.5732 (0.5974) acc 90.6250 (87.5000) lr 4.8943e-05 eta 0:08:47 +epoch [182/200] batch [10/31] time 0.726 (0.810) data 0.000 (0.087) loss 0.3003 (0.5614) acc 96.8750 (88.1250) lr 4.8943e-05 eta 0:07:48 +epoch [182/200] batch [15/31] time 0.712 (0.777) data 0.000 (0.058) loss 1.0107 (0.5753) acc 78.1250 (88.7500) lr 4.8943e-05 eta 0:07:25 +epoch [182/200] batch [20/31] time 0.711 (0.760) data 0.000 (0.044) loss 0.5225 (0.6283) acc 87.5000 (87.5000) lr 4.8943e-05 eta 0:07:12 +epoch [182/200] batch [25/31] time 0.717 (0.751) data 0.000 (0.035) loss 0.7144 (0.6505) acc 75.0000 (86.1250) lr 4.8943e-05 eta 0:07:03 +epoch [182/200] batch [30/31] time 0.708 (0.744) data 0.000 (0.029) loss 0.6084 (0.6432) acc 87.5000 (86.5625) lr 4.8943e-05 eta 0:06:55 +epoch [183/200] batch [5/31] time 0.711 (0.892) data 0.000 (0.168) loss 0.4658 (0.5925) acc 84.3750 (86.8750) lr 4.4207e-05 eta 0:08:13 +epoch [183/200] batch [10/31] time 0.710 (0.802) data 0.000 (0.084) loss 1.4199 (0.7399) acc 71.8750 (84.6875) lr 4.4207e-05 eta 0:07:19 +epoch [183/200] batch [15/31] time 0.707 (0.773) data 0.000 (0.056) loss 0.4534 (0.6860) acc 93.7500 (86.8750) lr 4.4207e-05 eta 0:06:59 +epoch [183/200] batch [20/31] time 0.710 (0.764) data 0.000 (0.042) loss 0.8491 (0.7261) acc 87.5000 (85.7812) lr 4.4207e-05 eta 0:06:51 +epoch [183/200] batch [25/31] time 0.712 (0.753) data 0.000 (0.034) loss 0.6665 (0.7130) acc 90.6250 (85.8750) lr 4.4207e-05 eta 0:06:41 +epoch [183/200] batch [30/31] time 0.723 (0.747) data 0.000 (0.028) loss 0.5283 (0.6728) acc 87.5000 (86.4583) lr 4.4207e-05 eta 0:06:34 +epoch [184/200] batch [5/31] time 0.712 (0.895) data 0.000 (0.177) loss 0.6602 (0.5805) acc 81.2500 (88.1250) lr 3.9706e-05 eta 0:07:47 +epoch [184/200] batch [10/31] time 0.713 (0.804) data 0.001 (0.089) loss 0.7476 (0.6057) acc 78.1250 (86.8750) lr 3.9706e-05 eta 0:06:55 +epoch [184/200] batch [15/31] time 0.711 (0.775) data 0.000 (0.059) loss 0.5488 (0.5748) acc 87.5000 (87.7083) lr 3.9706e-05 eta 0:06:36 +epoch [184/200] batch [20/31] time 0.703 (0.759) data 0.000 (0.045) loss 0.5874 (0.5592) acc 84.3750 (87.8125) lr 3.9706e-05 eta 0:06:24 +epoch [184/200] batch [25/31] time 0.701 (0.749) data 0.000 (0.036) loss 1.0752 (0.5816) acc 81.2500 (87.6250) lr 3.9706e-05 eta 0:06:16 +epoch [184/200] batch [30/31] time 0.700 (0.742) data 0.000 (0.030) loss 0.6538 (0.5848) acc 84.3750 (87.1875) lr 3.9706e-05 eta 0:06:08 +epoch [185/200] batch [5/31] time 0.713 (0.906) data 0.000 (0.184) loss 0.8413 (0.5884) acc 78.1250 (86.8750) lr 3.5443e-05 eta 0:07:24 +epoch [185/200] batch [10/31] time 0.723 (0.811) data 0.000 (0.092) loss 0.7236 (0.6815) acc 84.3750 (86.8750) lr 3.5443e-05 eta 0:06:34 +epoch [185/200] batch [15/31] time 0.714 (0.777) data 0.000 (0.061) loss 0.9189 (0.6439) acc 84.3750 (87.5000) lr 3.5443e-05 eta 0:06:13 +epoch [185/200] batch [20/31] time 0.703 (0.759) data 0.000 (0.046) loss 0.9146 (0.6683) acc 84.3750 (86.7188) lr 3.5443e-05 eta 0:06:01 +epoch [185/200] batch [25/31] time 0.709 (0.751) data 0.000 (0.037) loss 0.3428 (0.6644) acc 93.7500 (86.8750) lr 3.5443e-05 eta 0:05:53 +epoch [185/200] batch [30/31] time 0.711 (0.744) data 0.000 (0.031) loss 0.5264 (0.6572) acc 90.6250 (86.4583) lr 3.5443e-05 eta 0:05:46 +epoch [186/200] batch [5/31] time 0.707 (0.889) data 0.001 (0.169) loss 0.7720 (0.6139) acc 87.5000 (86.2500) lr 3.1417e-05 eta 0:06:49 +epoch [186/200] batch [10/31] time 0.707 (0.798) data 0.000 (0.085) loss 0.8677 (0.7433) acc 81.2500 (85.6250) lr 3.1417e-05 eta 0:06:03 +epoch [186/200] batch [15/31] time 0.710 (0.778) data 0.000 (0.057) loss 0.7949 (0.7443) acc 87.5000 (86.0417) lr 3.1417e-05 eta 0:05:50 +epoch [186/200] batch [20/31] time 0.710 (0.761) data 0.000 (0.042) loss 0.6719 (0.7586) acc 84.3750 (85.6250) lr 3.1417e-05 eta 0:05:38 +epoch [186/200] batch [25/31] time 0.707 (0.751) data 0.000 (0.034) loss 0.6069 (0.7162) acc 87.5000 (86.0000) lr 3.1417e-05 eta 0:05:30 +epoch [186/200] batch [30/31] time 0.707 (0.744) data 0.000 (0.028) loss 0.5635 (0.7247) acc 84.3750 (85.5208) lr 3.1417e-05 eta 0:05:23 +epoch [187/200] batch [5/31] time 0.708 (0.893) data 0.000 (0.169) loss 0.8159 (0.7086) acc 84.3750 (81.2500) lr 2.7630e-05 eta 0:06:23 +epoch [187/200] batch [10/31] time 0.728 (0.806) data 0.000 (0.085) loss 0.9912 (0.7404) acc 84.3750 (82.8125) lr 2.7630e-05 eta 0:05:41 +epoch [187/200] batch [15/31] time 0.726 (0.777) data 0.000 (0.057) loss 0.6504 (0.7490) acc 87.5000 (82.9167) lr 2.7630e-05 eta 0:05:25 +epoch [187/200] batch [20/31] time 0.714 (0.761) data 0.000 (0.043) loss 0.7251 (0.6924) acc 87.5000 (85.0000) lr 2.7630e-05 eta 0:05:15 +epoch [187/200] batch [25/31] time 0.706 (0.751) data 0.000 (0.034) loss 0.6221 (0.7142) acc 81.2500 (84.5000) lr 2.7630e-05 eta 0:05:07 +epoch [187/200] batch [30/31] time 0.721 (0.745) data 0.000 (0.028) loss 0.5679 (0.6977) acc 87.5000 (85.2083) lr 2.7630e-05 eta 0:05:00 +epoch [188/200] batch [5/31] time 0.727 (0.892) data 0.000 (0.169) loss 0.6826 (0.6745) acc 81.2500 (81.2500) lr 2.4083e-05 eta 0:05:55 +epoch [188/200] batch [10/31] time 0.710 (0.804) data 0.000 (0.085) loss 0.4944 (0.6420) acc 81.2500 (84.0625) lr 2.4083e-05 eta 0:05:15 +epoch [188/200] batch [15/31] time 0.717 (0.774) data 0.000 (0.057) loss 0.6055 (0.6365) acc 90.6250 (85.0000) lr 2.4083e-05 eta 0:05:00 +epoch [188/200] batch [20/31] time 0.704 (0.758) data 0.000 (0.042) loss 0.5825 (0.6405) acc 93.7500 (85.9375) lr 2.4083e-05 eta 0:04:50 +epoch [188/200] batch [25/31] time 0.715 (0.748) data 0.000 (0.034) loss 0.6860 (0.6309) acc 84.3750 (85.7500) lr 2.4083e-05 eta 0:04:42 +epoch [188/200] batch [30/31] time 0.710 (0.741) data 0.000 (0.028) loss 0.7393 (0.6189) acc 81.2500 (86.0417) lr 2.4083e-05 eta 0:04:36 +epoch [189/200] batch [5/31] time 0.720 (0.895) data 0.001 (0.167) loss 1.0312 (0.9122) acc 81.2500 (80.6250) lr 2.0777e-05 eta 0:05:28 +epoch [189/200] batch [10/31] time 0.714 (0.806) data 0.000 (0.084) loss 0.4434 (0.7745) acc 96.8750 (85.0000) lr 2.0777e-05 eta 0:04:51 +epoch [189/200] batch [15/31] time 0.712 (0.777) data 0.000 (0.056) loss 0.7349 (0.7498) acc 87.5000 (84.7917) lr 2.0777e-05 eta 0:04:37 +epoch [189/200] batch [20/31] time 0.718 (0.761) data 0.000 (0.042) loss 0.8589 (0.7114) acc 84.3750 (86.0938) lr 2.0777e-05 eta 0:04:27 +epoch [189/200] batch [25/31] time 0.711 (0.752) data 0.000 (0.034) loss 0.9478 (0.6881) acc 84.3750 (86.5000) lr 2.0777e-05 eta 0:04:20 +epoch [189/200] batch [30/31] time 0.715 (0.746) data 0.000 (0.028) loss 0.1755 (0.6711) acc 96.8750 (86.9792) lr 2.0777e-05 eta 0:04:15 +epoch [190/200] batch [5/31] time 0.726 (0.902) data 0.001 (0.174) loss 0.4841 (0.6260) acc 87.5000 (86.2500) lr 1.7713e-05 eta 0:05:03 +epoch [190/200] batch [10/31] time 0.713 (0.808) data 0.000 (0.087) loss 0.8765 (0.6309) acc 75.0000 (85.6250) lr 1.7713e-05 eta 0:04:27 +epoch [190/200] batch [15/31] time 0.710 (0.776) data 0.000 (0.058) loss 1.0938 (0.6958) acc 78.1250 (83.7500) lr 1.7713e-05 eta 0:04:13 +epoch [190/200] batch [20/31] time 0.708 (0.761) data 0.000 (0.044) loss 0.6836 (0.6598) acc 81.2500 (84.8438) lr 1.7713e-05 eta 0:04:04 +epoch [190/200] batch [25/31] time 0.722 (0.751) data 0.000 (0.035) loss 0.3699 (0.6178) acc 96.8750 (85.6250) lr 1.7713e-05 eta 0:03:57 +epoch [190/200] batch [30/31] time 0.705 (0.744) data 0.000 (0.029) loss 0.5562 (0.6327) acc 93.7500 (85.7292) lr 1.7713e-05 eta 0:03:51 +epoch [191/200] batch [5/31] time 0.712 (0.923) data 0.000 (0.201) loss 0.2014 (0.6209) acc 100.0000 (88.1250) lr 1.4891e-05 eta 0:04:41 +epoch [191/200] batch [10/31] time 0.707 (0.817) data 0.000 (0.101) loss 0.7505 (0.6515) acc 84.3750 (88.1250) lr 1.4891e-05 eta 0:04:05 +epoch [191/200] batch [15/31] time 0.711 (0.794) data 0.000 (0.067) loss 0.9512 (0.7110) acc 81.2500 (86.6667) lr 1.4891e-05 eta 0:03:54 +epoch [191/200] batch [20/31] time 0.712 (0.773) data 0.001 (0.050) loss 0.4939 (0.6956) acc 90.6250 (86.2500) lr 1.4891e-05 eta 0:03:44 +epoch [191/200] batch [25/31] time 0.726 (0.762) data 0.000 (0.040) loss 0.5498 (0.6643) acc 90.6250 (87.0000) lr 1.4891e-05 eta 0:03:37 +epoch [191/200] batch [30/31] time 0.717 (0.754) data 0.000 (0.034) loss 0.6626 (0.6783) acc 78.1250 (86.1458) lr 1.4891e-05 eta 0:03:31 +epoch [192/200] batch [5/31] time 0.730 (0.904) data 0.001 (0.177) loss 0.5664 (0.9031) acc 84.3750 (81.2500) lr 1.2312e-05 eta 0:04:07 +epoch [192/200] batch [10/31] time 0.723 (0.812) data 0.000 (0.089) loss 1.0381 (0.7831) acc 71.8750 (82.5000) lr 1.2312e-05 eta 0:03:38 +epoch [192/200] batch [15/31] time 0.718 (0.781) data 0.000 (0.059) loss 0.4893 (0.6967) acc 87.5000 (83.7500) lr 1.2312e-05 eta 0:03:26 +epoch [192/200] batch [20/31] time 0.712 (0.764) data 0.000 (0.045) loss 0.4832 (0.6835) acc 87.5000 (84.6875) lr 1.2312e-05 eta 0:03:17 +epoch [192/200] batch [25/31] time 0.717 (0.753) data 0.000 (0.036) loss 0.6318 (0.7058) acc 84.3750 (84.3750) lr 1.2312e-05 eta 0:03:11 +epoch [192/200] batch [30/31] time 0.708 (0.746) data 0.000 (0.030) loss 0.7627 (0.7082) acc 84.3750 (84.5833) lr 1.2312e-05 eta 0:03:05 +epoch [193/200] batch [5/31] time 0.715 (0.936) data 0.000 (0.177) loss 0.0958 (0.4741) acc 100.0000 (90.0000) lr 9.9763e-06 eta 0:03:47 +epoch [193/200] batch [10/31] time 0.718 (0.825) data 0.001 (0.089) loss 0.4426 (0.5230) acc 87.5000 (87.8125) lr 9.9763e-06 eta 0:03:16 +epoch [193/200] batch [15/31] time 0.723 (0.789) data 0.000 (0.059) loss 0.3096 (0.5790) acc 93.7500 (86.4583) lr 9.9763e-06 eta 0:03:03 +epoch [193/200] batch [20/31] time 0.712 (0.769) data 0.000 (0.045) loss 0.7676 (0.5706) acc 84.3750 (86.5625) lr 9.9763e-06 eta 0:02:55 +epoch [193/200] batch [25/31] time 0.708 (0.757) data 0.000 (0.036) loss 0.5488 (0.5847) acc 81.2500 (86.7500) lr 9.9763e-06 eta 0:02:48 +epoch [193/200] batch [30/31] time 0.727 (0.750) data 0.000 (0.030) loss 0.6963 (0.5852) acc 87.5000 (86.8750) lr 9.9763e-06 eta 0:02:43 +epoch [194/200] batch [5/31] time 0.706 (0.895) data 0.000 (0.174) loss 0.7852 (0.6838) acc 87.5000 (88.1250) lr 7.8853e-06 eta 0:03:09 +epoch [194/200] batch [10/31] time 0.719 (0.806) data 0.000 (0.087) loss 0.8394 (0.7120) acc 84.3750 (87.8125) lr 7.8853e-06 eta 0:02:46 +epoch [194/200] batch [15/31] time 0.712 (0.777) data 0.000 (0.058) loss 1.0830 (0.6889) acc 81.2500 (87.5000) lr 7.8853e-06 eta 0:02:36 +epoch [194/200] batch [20/31] time 0.719 (0.761) data 0.000 (0.044) loss 0.9922 (0.7138) acc 87.5000 (87.6562) lr 7.8853e-06 eta 0:02:29 +epoch [194/200] batch [25/31] time 0.713 (0.753) data 0.000 (0.035) loss 1.4463 (0.7070) acc 68.7500 (87.2500) lr 7.8853e-06 eta 0:02:24 +epoch [194/200] batch [30/31] time 0.730 (0.747) data 0.000 (0.029) loss 1.0371 (0.7096) acc 78.1250 (86.8750) lr 7.8853e-06 eta 0:02:19 +epoch [195/200] batch [5/31] time 0.715 (0.896) data 0.000 (0.171) loss 0.8481 (0.8990) acc 71.8750 (76.2500) lr 6.0390e-06 eta 0:02:42 +epoch [195/200] batch [10/31] time 0.722 (0.820) data 0.000 (0.086) loss 0.8618 (0.7670) acc 84.3750 (80.9375) lr 6.0390e-06 eta 0:02:24 +epoch [195/200] batch [15/31] time 0.709 (0.784) data 0.000 (0.057) loss 0.7461 (0.7317) acc 87.5000 (83.3333) lr 6.0390e-06 eta 0:02:14 +epoch [195/200] batch [20/31] time 0.712 (0.766) data 0.000 (0.043) loss 0.7207 (0.7292) acc 84.3750 (83.7500) lr 6.0390e-06 eta 0:02:07 +epoch [195/200] batch [25/31] time 0.708 (0.755) data 0.000 (0.035) loss 0.5674 (0.7401) acc 84.3750 (84.1250) lr 6.0390e-06 eta 0:02:01 +epoch [195/200] batch [30/31] time 0.707 (0.747) data 0.000 (0.029) loss 0.5161 (0.7308) acc 87.5000 (84.8958) lr 6.0390e-06 eta 0:01:56 +epoch [196/200] batch [5/31] time 0.714 (0.888) data 0.000 (0.167) loss 0.8311 (0.7694) acc 84.3750 (83.1250) lr 4.4380e-06 eta 0:02:13 +epoch [196/200] batch [10/31] time 0.714 (0.803) data 0.000 (0.084) loss 0.6411 (0.7203) acc 90.6250 (85.9375) lr 4.4380e-06 eta 0:01:56 +epoch [196/200] batch [15/31] time 0.713 (0.774) data 0.000 (0.056) loss 0.6572 (0.6844) acc 87.5000 (86.0417) lr 4.4380e-06 eta 0:01:48 +epoch [196/200] batch [20/31] time 0.708 (0.759) data 0.000 (0.042) loss 0.5820 (0.6601) acc 93.7500 (86.4062) lr 4.4380e-06 eta 0:01:42 +epoch [196/200] batch [25/31] time 0.711 (0.750) data 0.000 (0.034) loss 0.8052 (0.6524) acc 87.5000 (86.6250) lr 4.4380e-06 eta 0:01:37 +epoch [196/200] batch [30/31] time 0.709 (0.744) data 0.000 (0.028) loss 0.3945 (0.6587) acc 87.5000 (86.2500) lr 4.4380e-06 eta 0:01:32 +epoch [197/200] batch [5/31] time 0.713 (0.888) data 0.000 (0.162) loss 0.2585 (0.7083) acc 93.7500 (84.3750) lr 3.0827e-06 eta 0:01:45 +epoch [197/200] batch [10/31] time 0.713 (0.800) data 0.003 (0.081) loss 0.7280 (0.6910) acc 84.3750 (85.3125) lr 3.0827e-06 eta 0:01:31 +epoch [197/200] batch [15/31] time 0.720 (0.774) data 0.000 (0.054) loss 0.4385 (0.6618) acc 87.5000 (84.7917) lr 3.0827e-06 eta 0:01:24 +epoch [197/200] batch [20/31] time 0.710 (0.758) data 0.000 (0.041) loss 0.7534 (0.6927) acc 84.3750 (84.3750) lr 3.0827e-06 eta 0:01:18 +epoch [197/200] batch [25/31] time 0.711 (0.749) data 0.000 (0.033) loss 0.7476 (0.6908) acc 75.0000 (83.8750) lr 3.0827e-06 eta 0:01:14 +epoch [197/200] batch [30/31] time 0.710 (0.742) data 0.000 (0.027) loss 0.5811 (0.6908) acc 78.1250 (83.7500) lr 3.0827e-06 eta 0:01:09 +epoch [198/200] batch [5/31] time 0.710 (0.926) data 0.000 (0.206) loss 0.9126 (0.6579) acc 84.3750 (87.5000) lr 1.9733e-06 eta 0:01:21 +epoch [198/200] batch [10/31] time 0.712 (0.821) data 0.000 (0.103) loss 0.3291 (0.6278) acc 90.6250 (87.5000) lr 1.9733e-06 eta 0:01:08 +epoch [198/200] batch [15/31] time 0.711 (0.785) data 0.000 (0.069) loss 0.8237 (0.5913) acc 84.3750 (88.1250) lr 1.9733e-06 eta 0:01:01 +epoch [198/200] batch [20/31] time 0.714 (0.766) data 0.000 (0.052) loss 1.0020 (0.6194) acc 87.5000 (87.8125) lr 1.9733e-06 eta 0:00:55 +epoch [198/200] batch [25/31] time 0.711 (0.756) data 0.000 (0.041) loss 0.6719 (0.6353) acc 81.2500 (86.8750) lr 1.9733e-06 eta 0:00:51 +epoch [198/200] batch [30/31] time 0.718 (0.749) data 0.000 (0.035) loss 0.5854 (0.6469) acc 81.2500 (86.5625) lr 1.9733e-06 eta 0:00:47 +epoch [199/200] batch [5/31] time 0.727 (0.905) data 0.000 (0.177) loss 0.7383 (0.6876) acc 87.5000 (86.2500) lr 1.1101e-06 eta 0:00:51 +epoch [199/200] batch [10/31] time 0.726 (0.812) data 0.000 (0.089) loss 0.2791 (0.6614) acc 93.7500 (86.2500) lr 1.1101e-06 eta 0:00:42 +epoch [199/200] batch [15/31] time 0.730 (0.780) data 0.000 (0.059) loss 0.6978 (0.6577) acc 81.2500 (85.6250) lr 1.1101e-06 eta 0:00:36 +epoch [199/200] batch [20/31] time 0.711 (0.763) data 0.000 (0.045) loss 1.0977 (0.7065) acc 81.2500 (84.8438) lr 1.1101e-06 eta 0:00:32 +epoch [199/200] batch [25/31] time 0.714 (0.752) data 0.000 (0.036) loss 1.0938 (0.7501) acc 71.8750 (84.0000) lr 1.1101e-06 eta 0:00:27 +epoch [199/200] batch [30/31] time 0.707 (0.745) data 0.000 (0.030) loss 0.7710 (0.7527) acc 84.3750 (84.1667) lr 1.1101e-06 eta 0:00:23 +epoch [200/200] batch [5/31] time 0.718 (0.889) data 0.000 (0.161) loss 0.8462 (0.6117) acc 84.3750 (85.0000) lr 4.9344e-07 eta 0:00:23 +epoch [200/200] batch [10/31] time 0.712 (0.800) data 0.000 (0.081) loss 0.9668 (0.6253) acc 90.6250 (86.5625) lr 4.9344e-07 eta 0:00:16 +epoch [200/200] batch [15/31] time 0.713 (0.772) data 0.000 (0.054) loss 0.3579 (0.6491) acc 90.6250 (86.2500) lr 4.9344e-07 eta 0:00:12 +epoch [200/200] batch [20/31] time 0.707 (0.757) data 0.000 (0.041) loss 0.6587 (0.6775) acc 90.6250 (85.9375) lr 4.9344e-07 eta 0:00:08 +epoch [200/200] batch [25/31] time 0.708 (0.748) data 0.000 (0.033) loss 0.6250 (0.6682) acc 78.1250 (85.5000) lr 4.9344e-07 eta 0:00:04 +epoch [200/200] batch [30/31] time 0.712 (0.743) data 0.000 (0.027) loss 0.6016 (0.6683) acc 84.3750 (85.5208) lr 4.9344e-07 eta 0:00:00 +Checkpoint saved to output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-200 +Finish training +Deploy the last-epoch model +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 26,204 +* accuracy: 52.4% +* error: 47.6% +* macro_f1: 51.0% +Elapsed: 1:20:09 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/checkpoint b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/checkpoint new file mode 100644 index 00000000..b2929f7c --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/checkpoint @@ -0,0 +1 @@ +model.pth.tar-200 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-200 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-200 new file mode 100644 index 00000000..d68ba04a Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-200 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed3/tensorboard/events.out.tfevents.1698082537.ckb-gpu-lambda.1480462.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed3/tensorboard/events.out.tfevents.1698082537.ckb-gpu-lambda.1480462.0 new file mode 100644 index 00000000..1ee73a1b Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_1shots/nctx16_cscFalse_ctpend/seed3/tensorboard/events.out.tfevents.1698082537.ckb-gpu-lambda.1480462.0 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed1/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed1/log.txt new file mode 100644 index 00000000..1767c7e2 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed1/log.txt @@ -0,0 +1,639 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_b32_ep50.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '1'] +output_dir: output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed1 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 1 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 1 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/32 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 50 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 8.4.0-3ubuntu2) 8.4.0 +Clang version: 10.0.0-4ubuntu1 +CMake version: version 3.23.2 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-113-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: Tesla V100-SXM2-32GB +GPU 1: Tesla V100-SXM2-32GB +GPU 2: Tesla V100-SXM2-32GB +GPU 3: Tesla V100-SXM2-32GB + +Nvidia driver version: 510.73.05 +cuDNN version: Probably one of the following: +/usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 +/usr/lib/x86_64-linux-gnu/libcudnn.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.4.1 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 46 bits physical, 48 bits virtual +CPU(s): 64 +On-line CPU(s) list: 0-63 +Thread(s) per core: 2 +Core(s) per socket: 16 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +CPU family: 6 +Model: 85 +Model name: Intel(R) Xeon(R) Gold 6242 CPU @ 2.80GHz +Stepping: 7 +CPU MHz: 1200.126 +CPU max MHz: 3900.0000 +CPU min MHz: 1200.0000 +BogoMIPS: 5600.00 +Virtualization: VT-x +L1d cache: 1 MiB +L1i cache: 1 MiB +L2 cache: 32 MiB +L3 cache: 44 MiB +NUMA node0 CPU(s): 0-15,32-47 +NUMA node1 CPU(s): 16-31,48-63 +Vulnerability Itlb multihit: KVM: Mitigation: Split huge pages +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Mitigation; TSX disabled +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cdp_l3 invpcid_single intel_ppin ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm mpx rdt_a avx512f avx512dq rdseed adx smap clflushopt clwb intel_pt avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts pku ospke avx512_vnni md_clear flush_l1d arch_capabilities + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Loading preprocessed few-shot data from /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_1-seed_1.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 1,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-B/32) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed1/tensorboard) +epoch [1/50] batch [5/31] time 0.736 (1.619) data 0.000 (0.155) loss 4.3398 (4.6867) acc 25.0000 (18.1250) lr 1.0000e-05 eta 0:41:40 +epoch [1/50] batch [10/31] time 0.722 (1.171) data 0.000 (0.078) loss 3.7773 (4.3557) acc 21.8750 (21.8750) lr 1.0000e-05 eta 0:30:04 +epoch [1/50] batch [15/31] time 0.721 (1.022) data 0.000 (0.052) loss 3.6875 (4.2160) acc 28.1250 (23.5417) lr 1.0000e-05 eta 0:26:09 +epoch [1/50] batch [20/31] time 0.734 (0.952) data 0.000 (0.039) loss 3.5723 (4.0763) acc 25.0000 (24.5312) lr 1.0000e-05 eta 0:24:17 +epoch [1/50] batch [25/31] time 0.725 (0.909) data 0.000 (0.031) loss 2.6855 (3.8645) acc 43.7500 (27.5000) lr 1.0000e-05 eta 0:23:06 +epoch [1/50] batch [30/31] time 0.712 (0.880) data 0.000 (0.026) loss 3.4629 (3.7541) acc 31.2500 (28.7500) lr 1.0000e-05 eta 0:22:17 +epoch [2/50] batch [5/31] time 0.733 (0.897) data 0.000 (0.154) loss 1.8252 (2.5494) acc 59.3750 (42.5000) lr 2.0000e-03 eta 0:22:38 +epoch [2/50] batch [10/31] time 0.723 (0.809) data 0.000 (0.077) loss 2.0430 (2.4092) acc 50.0000 (46.5625) lr 2.0000e-03 eta 0:20:21 +epoch [2/50] batch [15/31] time 0.733 (0.783) data 0.000 (0.052) loss 2.0762 (2.2622) acc 43.7500 (48.9583) lr 2.0000e-03 eta 0:19:37 +epoch [2/50] batch [20/31] time 0.733 (0.769) data 0.000 (0.039) loss 1.6934 (2.2380) acc 56.2500 (50.1562) lr 2.0000e-03 eta 0:19:13 +epoch [2/50] batch [25/31] time 0.766 (0.762) data 0.000 (0.031) loss 2.3320 (2.1624) acc 40.6250 (51.7500) lr 2.0000e-03 eta 0:18:58 +epoch [2/50] batch [30/31] time 0.723 (0.756) data 0.000 (0.026) loss 2.1250 (2.1692) acc 40.6250 (50.3125) lr 2.0000e-03 eta 0:18:45 +epoch [3/50] batch [5/31] time 0.756 (0.903) data 0.000 (0.151) loss 1.3076 (1.8510) acc 68.7500 (55.0000) lr 1.9980e-03 eta 0:22:18 +epoch [3/50] batch [10/31] time 0.741 (0.817) data 0.000 (0.076) loss 2.2324 (1.9201) acc 59.3750 (55.0000) lr 1.9980e-03 eta 0:20:07 +epoch [3/50] batch [15/31] time 0.727 (0.792) data 0.000 (0.051) loss 2.0391 (1.9473) acc 56.2500 (54.1667) lr 1.9980e-03 eta 0:19:26 +epoch [3/50] batch [20/31] time 0.736 (0.778) data 0.000 (0.038) loss 1.6973 (1.8804) acc 56.2500 (55.1562) lr 1.9980e-03 eta 0:19:01 +epoch [3/50] batch [25/31] time 0.725 (0.768) data 0.000 (0.031) loss 1.8252 (1.9014) acc 68.7500 (55.0000) lr 1.9980e-03 eta 0:18:44 +epoch [3/50] batch [30/31] time 0.727 (0.762) data 0.000 (0.026) loss 2.3770 (1.8992) acc 53.1250 (54.8958) lr 1.9980e-03 eta 0:18:31 +epoch [4/50] batch [5/31] time 0.749 (0.879) data 0.000 (0.140) loss 1.6064 (1.9088) acc 53.1250 (48.1250) lr 1.9921e-03 eta 0:21:16 +epoch [4/50] batch [10/31] time 0.720 (0.805) data 0.000 (0.070) loss 1.7705 (1.7486) acc 59.3750 (54.3750) lr 1.9921e-03 eta 0:19:25 +epoch [4/50] batch [15/31] time 0.722 (0.781) data 0.000 (0.047) loss 1.9619 (1.7015) acc 59.3750 (56.8750) lr 1.9921e-03 eta 0:18:46 +epoch [4/50] batch [20/31] time 0.761 (0.769) data 0.000 (0.035) loss 1.8125 (1.7617) acc 62.5000 (57.8125) lr 1.9921e-03 eta 0:18:25 +epoch [4/50] batch [25/31] time 0.738 (0.763) data 0.000 (0.028) loss 2.4980 (1.7508) acc 62.5000 (57.8750) lr 1.9921e-03 eta 0:18:12 +epoch [4/50] batch [30/31] time 0.744 (0.757) data 0.000 (0.024) loss 2.0391 (1.7604) acc 53.1250 (58.2292) lr 1.9921e-03 eta 0:17:59 +epoch [5/50] batch [5/31] time 0.726 (0.882) data 0.000 (0.146) loss 1.6543 (1.8254) acc 59.3750 (60.0000) lr 1.9823e-03 eta 0:20:53 +epoch [5/50] batch [10/31] time 0.720 (0.806) data 0.000 (0.073) loss 1.2393 (1.7511) acc 75.0000 (59.6875) lr 1.9823e-03 eta 0:19:00 +epoch [5/50] batch [15/31] time 0.733 (0.780) data 0.000 (0.049) loss 2.6543 (1.8711) acc 34.3750 (57.2917) lr 1.9823e-03 eta 0:18:20 +epoch [5/50] batch [20/31] time 0.756 (0.769) data 0.000 (0.037) loss 1.0488 (1.7469) acc 65.6250 (58.2812) lr 1.9823e-03 eta 0:18:01 +epoch [5/50] batch [25/31] time 0.738 (0.762) data 0.000 (0.030) loss 1.4512 (1.7606) acc 62.5000 (57.8750) lr 1.9823e-03 eta 0:17:47 +epoch [5/50] batch [30/31] time 0.741 (0.759) data 0.000 (0.025) loss 2.1270 (1.7999) acc 43.7500 (56.9792) lr 1.9823e-03 eta 0:17:38 +epoch [6/50] batch [5/31] time 0.725 (0.893) data 0.000 (0.150) loss 1.5938 (1.8076) acc 59.3750 (55.6250) lr 1.9686e-03 eta 0:20:41 +epoch [6/50] batch [10/31] time 0.720 (0.810) data 0.001 (0.075) loss 1.8115 (1.9620) acc 56.2500 (55.9375) lr 1.9686e-03 eta 0:18:41 +epoch [6/50] batch [15/31] time 0.730 (0.781) data 0.000 (0.050) loss 1.9209 (1.8523) acc 62.5000 (58.7500) lr 1.9686e-03 eta 0:17:57 +epoch [6/50] batch [20/31] time 0.754 (0.770) data 0.000 (0.038) loss 1.6855 (1.8605) acc 65.6250 (58.2812) lr 1.9686e-03 eta 0:17:38 +epoch [6/50] batch [25/31] time 0.724 (0.763) data 0.000 (0.030) loss 1.9580 (1.8629) acc 46.8750 (57.7500) lr 1.9686e-03 eta 0:17:25 +epoch [6/50] batch [30/31] time 0.727 (0.758) data 0.000 (0.025) loss 2.0312 (1.8680) acc 56.2500 (57.7083) lr 1.9686e-03 eta 0:17:14 +epoch [7/50] batch [5/31] time 0.727 (0.871) data 0.000 (0.139) loss 1.4785 (1.7729) acc 59.3750 (56.2500) lr 1.9511e-03 eta 0:19:44 +epoch [7/50] batch [10/31] time 0.745 (0.806) data 0.000 (0.070) loss 1.9053 (1.7051) acc 65.6250 (59.0625) lr 1.9511e-03 eta 0:18:11 +epoch [7/50] batch [15/31] time 0.730 (0.783) data 0.000 (0.047) loss 1.6953 (1.7691) acc 65.6250 (58.5417) lr 1.9511e-03 eta 0:17:35 +epoch [7/50] batch [20/31] time 0.748 (0.770) data 0.000 (0.035) loss 2.5527 (1.7861) acc 43.7500 (58.7500) lr 1.9511e-03 eta 0:17:14 +epoch [7/50] batch [25/31] time 0.734 (0.763) data 0.000 (0.028) loss 1.5830 (1.7526) acc 65.6250 (59.3750) lr 1.9511e-03 eta 0:17:01 +epoch [7/50] batch [30/31] time 0.738 (0.759) data 0.000 (0.023) loss 2.1797 (1.7645) acc 53.1250 (58.9583) lr 1.9511e-03 eta 0:16:53 +epoch [8/50] batch [5/31] time 0.754 (0.888) data 0.000 (0.145) loss 1.4297 (1.6352) acc 62.5000 (56.8750) lr 1.9298e-03 eta 0:19:39 +epoch [8/50] batch [10/31] time 0.734 (0.811) data 0.000 (0.073) loss 1.6348 (1.7119) acc 68.7500 (58.1250) lr 1.9298e-03 eta 0:17:53 +epoch [8/50] batch [15/31] time 0.724 (0.793) data 0.000 (0.049) loss 1.1348 (1.6554) acc 71.8750 (59.7917) lr 1.9298e-03 eta 0:17:25 +epoch [8/50] batch [20/31] time 0.748 (0.778) data 0.000 (0.037) loss 1.4248 (1.6853) acc 50.0000 (58.9062) lr 1.9298e-03 eta 0:17:01 +epoch [8/50] batch [25/31] time 0.744 (0.770) data 0.000 (0.029) loss 2.0312 (1.7454) acc 46.8750 (57.3750) lr 1.9298e-03 eta 0:16:46 +epoch [8/50] batch [30/31] time 0.725 (0.763) data 0.000 (0.024) loss 2.1055 (1.7775) acc 59.3750 (57.5000) lr 1.9298e-03 eta 0:16:34 +epoch [9/50] batch [5/31] time 0.739 (0.877) data 0.000 (0.137) loss 1.6436 (1.8209) acc 62.5000 (58.1250) lr 1.9048e-03 eta 0:18:57 +epoch [9/50] batch [10/31] time 0.724 (0.805) data 0.000 (0.069) loss 1.8115 (1.7768) acc 59.3750 (60.9375) lr 1.9048e-03 eta 0:17:20 +epoch [9/50] batch [15/31] time 0.745 (0.784) data 0.001 (0.046) loss 0.9819 (1.7993) acc 75.0000 (59.7917) lr 1.9048e-03 eta 0:16:48 +epoch [9/50] batch [20/31] time 0.743 (0.771) data 0.000 (0.034) loss 1.8369 (1.8025) acc 62.5000 (60.4688) lr 1.9048e-03 eta 0:16:28 +epoch [9/50] batch [25/31] time 0.735 (0.764) data 0.000 (0.028) loss 1.7471 (1.7800) acc 65.6250 (61.0000) lr 1.9048e-03 eta 0:16:15 +epoch [9/50] batch [30/31] time 0.725 (0.760) data 0.000 (0.023) loss 1.3857 (1.7530) acc 68.7500 (61.3542) lr 1.9048e-03 eta 0:16:06 +epoch [10/50] batch [5/31] time 0.740 (0.908) data 0.001 (0.134) loss 1.7559 (1.9877) acc 59.3750 (55.0000) lr 1.8763e-03 eta 0:19:09 +epoch [10/50] batch [10/31] time 0.755 (0.823) data 0.000 (0.067) loss 1.6289 (1.8027) acc 59.3750 (59.0625) lr 1.8763e-03 eta 0:17:17 +epoch [10/50] batch [15/31] time 0.722 (0.792) data 0.001 (0.045) loss 1.2383 (1.7423) acc 59.3750 (56.8750) lr 1.8763e-03 eta 0:16:34 +epoch [10/50] batch [20/31] time 0.721 (0.777) data 0.000 (0.034) loss 1.7744 (1.7202) acc 62.5000 (58.1250) lr 1.8763e-03 eta 0:16:12 +epoch [10/50] batch [25/31] time 0.771 (0.771) data 0.000 (0.027) loss 2.2695 (1.7412) acc 43.7500 (57.8750) lr 1.8763e-03 eta 0:16:00 +epoch [10/50] batch [30/31] time 0.746 (0.765) data 0.000 (0.023) loss 1.8574 (1.7384) acc 50.0000 (57.0833) lr 1.8763e-03 eta 0:15:49 +epoch [11/50] batch [5/31] time 0.756 (0.892) data 0.000 (0.150) loss 1.4922 (1.5584) acc 65.6250 (59.3750) lr 1.8443e-03 eta 0:18:21 +epoch [11/50] batch [10/31] time 0.718 (0.815) data 0.001 (0.075) loss 2.1152 (1.7287) acc 53.1250 (56.8750) lr 1.8443e-03 eta 0:16:42 +epoch [11/50] batch [15/31] time 0.733 (0.789) data 0.000 (0.050) loss 2.0254 (1.6794) acc 59.3750 (59.3750) lr 1.8443e-03 eta 0:16:06 +epoch [11/50] batch [20/31] time 0.747 (0.776) data 0.000 (0.038) loss 1.7148 (1.6687) acc 62.5000 (59.6875) lr 1.8443e-03 eta 0:15:46 +epoch [11/50] batch [25/31] time 0.746 (0.770) data 0.000 (0.030) loss 1.3906 (1.6923) acc 68.7500 (59.7500) lr 1.8443e-03 eta 0:15:35 +epoch [11/50] batch [30/31] time 0.732 (0.766) data 0.000 (0.025) loss 1.8145 (1.7097) acc 59.3750 (59.6875) lr 1.8443e-03 eta 0:15:26 +epoch [12/50] batch [5/31] time 0.740 (0.888) data 0.001 (0.144) loss 1.3662 (1.6918) acc 68.7500 (61.8750) lr 1.8090e-03 eta 0:17:49 +epoch [12/50] batch [10/31] time 0.736 (0.827) data 0.000 (0.072) loss 2.1953 (1.7519) acc 53.1250 (58.7500) lr 1.8090e-03 eta 0:16:31 +epoch [12/50] batch [15/31] time 0.750 (0.797) data 0.001 (0.048) loss 1.2852 (1.7443) acc 62.5000 (57.5000) lr 1.8090e-03 eta 0:15:51 +epoch [12/50] batch [20/31] time 0.729 (0.781) data 0.000 (0.036) loss 2.0586 (1.7310) acc 50.0000 (58.4375) lr 1.8090e-03 eta 0:15:28 +epoch [12/50] batch [25/31] time 0.740 (0.771) data 0.000 (0.029) loss 2.0312 (1.7029) acc 59.3750 (59.0000) lr 1.8090e-03 eta 0:15:12 +epoch [12/50] batch [30/31] time 0.732 (0.766) data 0.000 (0.024) loss 1.8018 (1.7203) acc 46.8750 (58.8542) lr 1.8090e-03 eta 0:15:03 +epoch [13/50] batch [5/31] time 0.745 (0.890) data 0.000 (0.139) loss 1.9727 (1.7277) acc 56.2500 (60.6250) lr 1.7705e-03 eta 0:17:23 +epoch [13/50] batch [10/31] time 0.762 (0.815) data 0.001 (0.070) loss 1.7256 (1.6715) acc 59.3750 (60.0000) lr 1.7705e-03 eta 0:15:52 +epoch [13/50] batch [15/31] time 0.724 (0.787) data 0.001 (0.047) loss 1.8789 (1.6636) acc 50.0000 (59.3750) lr 1.7705e-03 eta 0:15:15 +epoch [13/50] batch [20/31] time 0.730 (0.771) data 0.000 (0.035) loss 1.7012 (1.6916) acc 56.2500 (59.2188) lr 1.7705e-03 eta 0:14:52 +epoch [13/50] batch [25/31] time 0.748 (0.765) data 0.000 (0.028) loss 1.8350 (1.7223) acc 59.3750 (58.8750) lr 1.7705e-03 eta 0:14:41 +epoch [13/50] batch [30/31] time 0.727 (0.760) data 0.000 (0.024) loss 1.6152 (1.7320) acc 59.3750 (58.7500) lr 1.7705e-03 eta 0:14:32 +epoch [14/50] batch [5/31] time 0.746 (0.913) data 0.000 (0.170) loss 1.3281 (1.5967) acc 68.7500 (58.7500) lr 1.7290e-03 eta 0:17:22 +epoch [14/50] batch [10/31] time 0.739 (0.826) data 0.000 (0.085) loss 1.9014 (1.5854) acc 56.2500 (60.0000) lr 1.7290e-03 eta 0:15:38 +epoch [14/50] batch [15/31] time 0.745 (0.797) data 0.000 (0.057) loss 1.9902 (1.6548) acc 53.1250 (58.1250) lr 1.7290e-03 eta 0:15:01 +epoch [14/50] batch [20/31] time 0.730 (0.780) data 0.000 (0.043) loss 1.7793 (1.7182) acc 75.0000 (59.2188) lr 1.7290e-03 eta 0:14:39 +epoch [14/50] batch [25/31] time 0.720 (0.770) data 0.000 (0.034) loss 1.5332 (1.7276) acc 68.7500 (59.0000) lr 1.7290e-03 eta 0:14:23 +epoch [14/50] batch [30/31] time 0.726 (0.763) data 0.000 (0.029) loss 1.5850 (1.7054) acc 62.5000 (59.5833) lr 1.7290e-03 eta 0:14:11 +epoch [15/50] batch [5/31] time 0.723 (0.888) data 0.000 (0.145) loss 2.1777 (1.7504) acc 46.8750 (54.3750) lr 1.6845e-03 eta 0:16:26 +epoch [15/50] batch [10/31] time 0.747 (0.814) data 0.000 (0.073) loss 1.5898 (1.7521) acc 65.6250 (56.5625) lr 1.6845e-03 eta 0:15:00 +epoch [15/50] batch [15/31] time 0.736 (0.787) data 0.000 (0.049) loss 1.8877 (1.7319) acc 46.8750 (56.2500) lr 1.6845e-03 eta 0:14:26 +epoch [15/50] batch [20/31] time 0.728 (0.773) data 0.000 (0.037) loss 2.1719 (1.6682) acc 59.3750 (58.1250) lr 1.6845e-03 eta 0:14:06 +epoch [15/50] batch [25/31] time 0.746 (0.765) data 0.000 (0.029) loss 2.2051 (1.6623) acc 50.0000 (57.7500) lr 1.6845e-03 eta 0:13:55 +epoch [15/50] batch [30/31] time 0.749 (0.760) data 0.000 (0.025) loss 2.4785 (1.6850) acc 43.7500 (57.6042) lr 1.6845e-03 eta 0:13:45 +epoch [16/50] batch [5/31] time 0.720 (0.890) data 0.000 (0.154) loss 1.9727 (1.5664) acc 59.3750 (61.8750) lr 1.6374e-03 eta 0:16:01 +epoch [16/50] batch [10/31] time 0.718 (0.811) data 0.000 (0.077) loss 1.4150 (1.5817) acc 65.6250 (61.8750) lr 1.6374e-03 eta 0:14:31 +epoch [16/50] batch [15/31] time 0.745 (0.787) data 0.001 (0.052) loss 1.7256 (1.5762) acc 56.2500 (62.9167) lr 1.6374e-03 eta 0:14:02 +epoch [16/50] batch [20/31] time 0.742 (0.773) data 0.000 (0.039) loss 1.5596 (1.6434) acc 56.2500 (61.4062) lr 1.6374e-03 eta 0:13:42 +epoch [16/50] batch [25/31] time 0.744 (0.766) data 0.000 (0.031) loss 1.6895 (1.6447) acc 62.5000 (61.0000) lr 1.6374e-03 eta 0:13:31 +epoch [16/50] batch [30/31] time 0.729 (0.760) data 0.000 (0.026) loss 1.3711 (1.6557) acc 59.3750 (60.5208) lr 1.6374e-03 eta 0:13:21 +epoch [17/50] batch [5/31] time 0.730 (0.881) data 0.000 (0.145) loss 1.4092 (1.4502) acc 43.7500 (60.6250) lr 1.5878e-03 eta 0:15:24 +epoch [17/50] batch [10/31] time 0.744 (0.805) data 0.000 (0.073) loss 1.8389 (1.5779) acc 62.5000 (60.9375) lr 1.5878e-03 eta 0:14:00 +epoch [17/50] batch [15/31] time 0.734 (0.780) data 0.001 (0.049) loss 1.8838 (1.6576) acc 59.3750 (61.0417) lr 1.5878e-03 eta 0:13:30 +epoch [17/50] batch [20/31] time 0.731 (0.769) data 0.000 (0.037) loss 2.4473 (1.6905) acc 50.0000 (61.0938) lr 1.5878e-03 eta 0:13:14 +epoch [17/50] batch [25/31] time 0.758 (0.761) data 0.000 (0.029) loss 1.4961 (1.6848) acc 56.2500 (60.2500) lr 1.5878e-03 eta 0:13:03 +epoch [17/50] batch [30/31] time 0.728 (0.757) data 0.000 (0.024) loss 1.9092 (1.6771) acc 62.5000 (60.7292) lr 1.5878e-03 eta 0:12:54 +epoch [18/50] batch [5/31] time 0.735 (0.902) data 0.000 (0.157) loss 1.2969 (1.4010) acc 71.8750 (64.3750) lr 1.5358e-03 eta 0:15:18 +epoch [18/50] batch [10/31] time 0.721 (0.828) data 0.000 (0.079) loss 2.5000 (1.5623) acc 34.3750 (61.2500) lr 1.5358e-03 eta 0:13:59 +epoch [18/50] batch [15/31] time 0.732 (0.799) data 0.000 (0.053) loss 1.5244 (1.5946) acc 62.5000 (61.2500) lr 1.5358e-03 eta 0:13:25 +epoch [18/50] batch [20/31] time 0.739 (0.783) data 0.000 (0.039) loss 1.3408 (1.6483) acc 62.5000 (60.1562) lr 1.5358e-03 eta 0:13:05 +epoch [18/50] batch [25/31] time 0.739 (0.773) data 0.000 (0.032) loss 1.7939 (1.6445) acc 59.3750 (60.5000) lr 1.5358e-03 eta 0:12:51 +epoch [18/50] batch [30/31] time 0.723 (0.765) data 0.000 (0.026) loss 1.8184 (1.6314) acc 53.1250 (60.6250) lr 1.5358e-03 eta 0:12:39 +epoch [19/50] batch [5/31] time 0.735 (0.881) data 0.001 (0.144) loss 1.6729 (1.5230) acc 62.5000 (61.2500) lr 1.4818e-03 eta 0:14:29 +epoch [19/50] batch [10/31] time 0.726 (0.810) data 0.000 (0.072) loss 1.2090 (1.5302) acc 81.2500 (65.0000) lr 1.4818e-03 eta 0:13:15 +epoch [19/50] batch [15/31] time 0.738 (0.785) data 0.001 (0.048) loss 1.8447 (1.5719) acc 56.2500 (63.9583) lr 1.4818e-03 eta 0:12:47 +epoch [19/50] batch [20/31] time 0.722 (0.771) data 0.000 (0.036) loss 2.0156 (1.5807) acc 50.0000 (63.4375) lr 1.4818e-03 eta 0:12:29 +epoch [19/50] batch [25/31] time 0.719 (0.761) data 0.000 (0.029) loss 1.5244 (1.5637) acc 62.5000 (63.5000) lr 1.4818e-03 eta 0:12:15 +epoch [19/50] batch [30/31] time 0.729 (0.762) data 0.000 (0.024) loss 2.3477 (1.6421) acc 50.0000 (61.7708) lr 1.4818e-03 eta 0:12:12 +epoch [20/50] batch [5/31] time 0.746 (0.886) data 0.000 (0.138) loss 1.7305 (1.8320) acc 50.0000 (56.2500) lr 1.4258e-03 eta 0:14:07 +epoch [20/50] batch [10/31] time 0.727 (0.807) data 0.000 (0.069) loss 1.2080 (1.8609) acc 71.8750 (55.6250) lr 1.4258e-03 eta 0:12:47 +epoch [20/50] batch [15/31] time 0.746 (0.786) data 0.000 (0.046) loss 0.9761 (1.8072) acc 78.1250 (58.5417) lr 1.4258e-03 eta 0:12:23 +epoch [20/50] batch [20/31] time 0.729 (0.774) data 0.000 (0.035) loss 1.1426 (1.7070) acc 68.7500 (60.4688) lr 1.4258e-03 eta 0:12:08 +epoch [20/50] batch [25/31] time 0.718 (0.764) data 0.000 (0.028) loss 1.5830 (1.6740) acc 62.5000 (60.6250) lr 1.4258e-03 eta 0:11:55 +epoch [20/50] batch [30/31] time 0.743 (0.761) data 0.000 (0.023) loss 1.6953 (1.6580) acc 56.2500 (61.4583) lr 1.4258e-03 eta 0:11:48 +epoch [21/50] batch [5/31] time 0.732 (0.881) data 0.000 (0.143) loss 1.1982 (1.4502) acc 71.8750 (66.8750) lr 1.3681e-03 eta 0:13:34 +epoch [21/50] batch [10/31] time 0.720 (0.806) data 0.000 (0.072) loss 2.1406 (1.6883) acc 53.1250 (61.8750) lr 1.3681e-03 eta 0:12:21 +epoch [21/50] batch [15/31] time 0.740 (0.782) data 0.001 (0.048) loss 1.6357 (1.6260) acc 56.2500 (62.2917) lr 1.3681e-03 eta 0:11:55 +epoch [21/50] batch [20/31] time 0.734 (0.769) data 0.000 (0.036) loss 1.0674 (1.6138) acc 71.8750 (62.9688) lr 1.3681e-03 eta 0:11:40 +epoch [21/50] batch [25/31] time 0.726 (0.762) data 0.000 (0.029) loss 1.9014 (1.6193) acc 59.3750 (63.3750) lr 1.3681e-03 eta 0:11:29 +epoch [21/50] batch [30/31] time 0.735 (0.763) data 0.000 (0.024) loss 1.5947 (1.6149) acc 59.3750 (63.0208) lr 1.3681e-03 eta 0:11:27 +epoch [22/50] batch [5/31] time 0.763 (0.892) data 0.000 (0.144) loss 2.1543 (1.7273) acc 46.8750 (59.3750) lr 1.3090e-03 eta 0:13:17 +epoch [22/50] batch [10/31] time 0.725 (0.810) data 0.000 (0.072) loss 2.0352 (1.6087) acc 50.0000 (61.2500) lr 1.3090e-03 eta 0:11:59 +epoch [22/50] batch [15/31] time 0.731 (0.786) data 0.001 (0.048) loss 1.2002 (1.5510) acc 62.5000 (61.8750) lr 1.3090e-03 eta 0:11:34 +epoch [22/50] batch [20/31] time 0.738 (0.772) data 0.000 (0.036) loss 1.5771 (1.5228) acc 56.2500 (62.0312) lr 1.3090e-03 eta 0:11:18 +epoch [22/50] batch [25/31] time 0.757 (0.764) data 0.000 (0.029) loss 1.6143 (1.5188) acc 65.6250 (62.8750) lr 1.3090e-03 eta 0:11:07 +epoch [22/50] batch [30/31] time 0.733 (0.759) data 0.000 (0.024) loss 0.9448 (1.5481) acc 71.8750 (62.3958) lr 1.3090e-03 eta 0:10:59 +epoch [23/50] batch [5/31] time 0.717 (0.938) data 0.000 (0.202) loss 1.3350 (1.5055) acc 75.0000 (60.0000) lr 1.2487e-03 eta 0:13:29 +epoch [23/50] batch [10/31] time 0.739 (0.840) data 0.000 (0.101) loss 2.1582 (1.4918) acc 43.7500 (62.8125) lr 1.2487e-03 eta 0:12:01 +epoch [23/50] batch [15/31] time 0.719 (0.803) data 0.000 (0.068) loss 1.8223 (1.5666) acc 59.3750 (62.5000) lr 1.2487e-03 eta 0:11:25 +epoch [23/50] batch [20/31] time 0.726 (0.788) data 0.000 (0.051) loss 1.0068 (1.5585) acc 68.7500 (61.2500) lr 1.2487e-03 eta 0:11:08 +epoch [23/50] batch [25/31] time 0.728 (0.777) data 0.000 (0.041) loss 1.5420 (1.5714) acc 56.2500 (61.2500) lr 1.2487e-03 eta 0:10:54 +epoch [23/50] batch [30/31] time 0.733 (0.769) data 0.000 (0.034) loss 0.8979 (1.5799) acc 75.0000 (61.3542) lr 1.2487e-03 eta 0:10:44 +epoch [24/50] batch [5/31] time 0.732 (0.898) data 0.000 (0.156) loss 1.1865 (1.4369) acc 65.6250 (63.7500) lr 1.1874e-03 eta 0:12:26 +epoch [24/50] batch [10/31] time 0.739 (0.817) data 0.001 (0.078) loss 1.8867 (1.5288) acc 50.0000 (61.2500) lr 1.1874e-03 eta 0:11:15 +epoch [24/50] batch [15/31] time 0.725 (0.789) data 0.000 (0.052) loss 2.1621 (1.6379) acc 53.1250 (60.6250) lr 1.1874e-03 eta 0:10:48 +epoch [24/50] batch [20/31] time 0.729 (0.773) data 0.000 (0.039) loss 1.7520 (1.6146) acc 62.5000 (62.3438) lr 1.1874e-03 eta 0:10:31 +epoch [24/50] batch [25/31] time 0.724 (0.764) data 0.000 (0.031) loss 0.7534 (1.5823) acc 81.2500 (63.2500) lr 1.1874e-03 eta 0:10:20 +epoch [24/50] batch [30/31] time 0.724 (0.758) data 0.000 (0.026) loss 1.6348 (1.5503) acc 59.3750 (64.0625) lr 1.1874e-03 eta 0:10:11 +epoch [25/50] batch [5/31] time 0.736 (0.888) data 0.000 (0.148) loss 1.2324 (1.4379) acc 68.7500 (65.0000) lr 1.1253e-03 eta 0:11:51 +epoch [25/50] batch [10/31] time 0.722 (0.808) data 0.000 (0.074) loss 1.8721 (1.6155) acc 53.1250 (61.5625) lr 1.1253e-03 eta 0:10:43 +epoch [25/50] batch [15/31] time 0.721 (0.789) data 0.000 (0.050) loss 1.6895 (1.5495) acc 65.6250 (63.1250) lr 1.1253e-03 eta 0:10:24 +epoch [25/50] batch [20/31] time 0.722 (0.774) data 0.000 (0.037) loss 1.2129 (1.5580) acc 75.0000 (63.1250) lr 1.1253e-03 eta 0:10:08 +epoch [25/50] batch [25/31] time 0.725 (0.766) data 0.000 (0.030) loss 2.6094 (1.5547) acc 50.0000 (63.7500) lr 1.1253e-03 eta 0:09:58 +epoch [25/50] batch [30/31] time 0.725 (0.759) data 0.000 (0.025) loss 1.8545 (1.5331) acc 62.5000 (64.1667) lr 1.1253e-03 eta 0:09:49 +epoch [26/50] batch [5/31] time 0.736 (0.889) data 0.000 (0.147) loss 1.4707 (1.5316) acc 75.0000 (61.8750) lr 1.0628e-03 eta 0:11:24 +epoch [26/50] batch [10/31] time 0.742 (0.812) data 0.000 (0.074) loss 1.8535 (1.4827) acc 59.3750 (61.8750) lr 1.0628e-03 eta 0:10:21 +epoch [26/50] batch [15/31] time 0.734 (0.787) data 0.000 (0.049) loss 1.5156 (1.4580) acc 59.3750 (62.9167) lr 1.0628e-03 eta 0:09:58 +epoch [26/50] batch [20/31] time 0.725 (0.772) data 0.000 (0.037) loss 2.0332 (1.5119) acc 56.2500 (62.1875) lr 1.0628e-03 eta 0:09:43 +epoch [26/50] batch [25/31] time 0.742 (0.766) data 0.000 (0.030) loss 1.5781 (1.4614) acc 53.1250 (63.2500) lr 1.0628e-03 eta 0:09:34 +epoch [26/50] batch [30/31] time 0.726 (0.759) data 0.000 (0.025) loss 1.5654 (1.4663) acc 65.6250 (64.0625) lr 1.0628e-03 eta 0:09:25 +epoch [27/50] batch [5/31] time 0.732 (0.882) data 0.000 (0.143) loss 1.0176 (1.4246) acc 75.0000 (67.5000) lr 1.0000e-03 eta 0:10:51 +epoch [27/50] batch [10/31] time 0.730 (0.809) data 0.000 (0.072) loss 1.8594 (1.4396) acc 59.3750 (65.9375) lr 1.0000e-03 eta 0:09:53 +epoch [27/50] batch [15/31] time 0.734 (0.784) data 0.000 (0.048) loss 1.3369 (1.4265) acc 56.2500 (65.4167) lr 1.0000e-03 eta 0:09:31 +epoch [27/50] batch [20/31] time 0.740 (0.770) data 0.000 (0.036) loss 1.6982 (1.4659) acc 62.5000 (64.8438) lr 1.0000e-03 eta 0:09:17 +epoch [27/50] batch [25/31] time 0.735 (0.762) data 0.000 (0.029) loss 1.7129 (1.4941) acc 46.8750 (63.5000) lr 1.0000e-03 eta 0:09:07 +epoch [27/50] batch [30/31] time 0.740 (0.758) data 0.000 (0.024) loss 1.3457 (1.4701) acc 68.7500 (64.0625) lr 1.0000e-03 eta 0:09:01 +epoch [28/50] batch [5/31] time 0.746 (0.910) data 0.000 (0.152) loss 1.6133 (1.5568) acc 62.5000 (62.5000) lr 9.3721e-04 eta 0:10:44 +epoch [28/50] batch [10/31] time 0.735 (0.823) data 0.000 (0.076) loss 1.7070 (1.5758) acc 62.5000 (62.1875) lr 9.3721e-04 eta 0:09:38 +epoch [28/50] batch [15/31] time 0.735 (0.794) data 0.000 (0.051) loss 1.8818 (1.5878) acc 53.1250 (61.6667) lr 9.3721e-04 eta 0:09:14 +epoch [28/50] batch [20/31] time 0.718 (0.777) data 0.000 (0.038) loss 1.9355 (1.5509) acc 50.0000 (62.8125) lr 9.3721e-04 eta 0:08:58 +epoch [28/50] batch [25/31] time 0.740 (0.768) data 0.000 (0.031) loss 2.4551 (1.5624) acc 53.1250 (62.3750) lr 9.3721e-04 eta 0:08:48 +epoch [28/50] batch [30/31] time 0.736 (0.761) data 0.000 (0.026) loss 1.4561 (1.5544) acc 65.6250 (62.6042) lr 9.3721e-04 eta 0:08:39 +epoch [29/50] batch [5/31] time 0.741 (0.889) data 0.000 (0.144) loss 1.1436 (1.3592) acc 71.8750 (65.0000) lr 8.7467e-04 eta 0:10:01 +epoch [29/50] batch [10/31] time 0.724 (0.807) data 0.000 (0.072) loss 1.1865 (1.2720) acc 68.7500 (66.5625) lr 8.7467e-04 eta 0:09:02 +epoch [29/50] batch [15/31] time 0.760 (0.781) data 0.000 (0.048) loss 1.7480 (1.3794) acc 56.2500 (64.7917) lr 8.7467e-04 eta 0:08:41 +epoch [29/50] batch [20/31] time 0.733 (0.768) data 0.000 (0.036) loss 1.5254 (1.4202) acc 62.5000 (64.3750) lr 8.7467e-04 eta 0:08:28 +epoch [29/50] batch [25/31] time 0.726 (0.768) data 0.000 (0.029) loss 1.3525 (1.4789) acc 65.6250 (63.5000) lr 8.7467e-04 eta 0:08:24 +epoch [29/50] batch [30/31] time 0.751 (0.763) data 0.000 (0.024) loss 0.8174 (1.4750) acc 75.0000 (64.3750) lr 8.7467e-04 eta 0:08:17 +epoch [30/50] batch [5/31] time 0.745 (0.895) data 0.000 (0.154) loss 1.4033 (1.3996) acc 68.7500 (66.2500) lr 8.1262e-04 eta 0:09:37 +epoch [30/50] batch [10/31] time 0.760 (0.817) data 0.000 (0.077) loss 1.7598 (1.5672) acc 68.7500 (65.3125) lr 8.1262e-04 eta 0:08:43 +epoch [30/50] batch [15/31] time 0.757 (0.792) data 0.000 (0.052) loss 1.0088 (1.4802) acc 68.7500 (65.2083) lr 8.1262e-04 eta 0:08:23 +epoch [30/50] batch [20/31] time 0.717 (0.774) data 0.000 (0.039) loss 1.2334 (1.4234) acc 78.1250 (66.4062) lr 8.1262e-04 eta 0:08:08 +epoch [30/50] batch [25/31] time 0.753 (0.769) data 0.000 (0.031) loss 1.3711 (1.4809) acc 71.8750 (65.1250) lr 8.1262e-04 eta 0:08:01 +epoch [30/50] batch [30/31] time 0.723 (0.762) data 0.000 (0.026) loss 1.3398 (1.4823) acc 59.3750 (64.0625) lr 8.1262e-04 eta 0:07:53 +epoch [31/50] batch [5/31] time 0.736 (0.879) data 0.001 (0.142) loss 1.4531 (1.4578) acc 68.7500 (63.7500) lr 7.5131e-04 eta 0:09:00 +epoch [31/50] batch [10/31] time 0.745 (0.807) data 0.000 (0.071) loss 1.8291 (1.3807) acc 68.7500 (67.1875) lr 7.5131e-04 eta 0:08:12 +epoch [31/50] batch [15/31] time 0.736 (0.784) data 0.000 (0.048) loss 2.0156 (1.5144) acc 53.1250 (63.7500) lr 7.5131e-04 eta 0:07:54 +epoch [31/50] batch [20/31] time 0.724 (0.776) data 0.000 (0.036) loss 2.5430 (1.5689) acc 62.5000 (63.9062) lr 7.5131e-04 eta 0:07:45 +epoch [31/50] batch [25/31] time 0.735 (0.767) data 0.000 (0.029) loss 1.4395 (1.5296) acc 59.3750 (64.1250) lr 7.5131e-04 eta 0:07:36 +epoch [31/50] batch [30/31] time 0.712 (0.760) data 0.000 (0.024) loss 1.6348 (1.5438) acc 46.8750 (63.0208) lr 7.5131e-04 eta 0:07:28 +epoch [32/50] batch [5/31] time 0.730 (0.884) data 0.000 (0.146) loss 1.6387 (1.8109) acc 53.1250 (60.6250) lr 6.9098e-04 eta 0:08:36 +epoch [32/50] batch [10/31] time 0.742 (0.810) data 0.000 (0.073) loss 1.2168 (1.4845) acc 68.7500 (65.9375) lr 6.9098e-04 eta 0:07:48 +epoch [32/50] batch [15/31] time 0.745 (0.787) data 0.000 (0.049) loss 1.7666 (1.5186) acc 50.0000 (65.8333) lr 6.9098e-04 eta 0:07:31 +epoch [32/50] batch [20/31] time 0.747 (0.776) data 0.000 (0.037) loss 1.4668 (1.4894) acc 62.5000 (65.7812) lr 6.9098e-04 eta 0:07:21 +epoch [32/50] batch [25/31] time 0.747 (0.768) data 0.000 (0.029) loss 1.2725 (1.4864) acc 75.0000 (65.6250) lr 6.9098e-04 eta 0:07:12 +epoch [32/50] batch [30/31] time 0.725 (0.762) data 0.000 (0.025) loss 0.8726 (1.4883) acc 68.7500 (64.8958) lr 6.9098e-04 eta 0:07:06 +epoch [33/50] batch [5/31] time 0.737 (0.873) data 0.000 (0.136) loss 0.9014 (1.4457) acc 75.0000 (66.2500) lr 6.3188e-04 eta 0:08:02 +epoch [33/50] batch [10/31] time 0.727 (0.805) data 0.000 (0.068) loss 1.5703 (1.4857) acc 56.2500 (61.5625) lr 6.3188e-04 eta 0:07:21 +epoch [33/50] batch [15/31] time 0.714 (0.781) data 0.000 (0.046) loss 1.5156 (1.4461) acc 62.5000 (62.9167) lr 6.3188e-04 eta 0:07:03 +epoch [33/50] batch [20/31] time 0.722 (0.766) data 0.000 (0.034) loss 1.8516 (1.4938) acc 59.3750 (62.9688) lr 6.3188e-04 eta 0:06:52 +epoch [33/50] batch [25/31] time 0.720 (0.758) data 0.000 (0.027) loss 0.9272 (1.4638) acc 71.8750 (64.2500) lr 6.3188e-04 eta 0:06:43 +epoch [33/50] batch [30/31] time 0.712 (0.753) data 0.000 (0.023) loss 1.2852 (1.4507) acc 68.7500 (64.6875) lr 6.3188e-04 eta 0:06:37 +epoch [34/50] batch [5/31] time 0.728 (0.890) data 0.000 (0.152) loss 2.1230 (1.3791) acc 50.0000 (66.8750) lr 5.7422e-04 eta 0:07:44 +epoch [34/50] batch [10/31] time 0.735 (0.830) data 0.000 (0.076) loss 1.1631 (1.3747) acc 71.8750 (66.5625) lr 5.7422e-04 eta 0:07:08 +epoch [34/50] batch [15/31] time 0.744 (0.797) data 0.000 (0.051) loss 1.4443 (1.3058) acc 75.0000 (68.9583) lr 5.7422e-04 eta 0:06:48 +epoch [34/50] batch [20/31] time 0.729 (0.781) data 0.000 (0.038) loss 1.4844 (1.3288) acc 68.7500 (68.1250) lr 5.7422e-04 eta 0:06:35 +epoch [34/50] batch [25/31] time 0.745 (0.771) data 0.000 (0.031) loss 1.7080 (1.3581) acc 62.5000 (66.7500) lr 5.7422e-04 eta 0:06:27 +epoch [34/50] batch [30/31] time 0.745 (0.766) data 0.000 (0.026) loss 1.6045 (1.3681) acc 62.5000 (66.2500) lr 5.7422e-04 eta 0:06:20 +epoch [35/50] batch [5/31] time 0.723 (0.893) data 0.000 (0.153) loss 1.4668 (1.7008) acc 59.3750 (60.6250) lr 5.1825e-04 eta 0:07:18 +epoch [35/50] batch [10/31] time 0.721 (0.811) data 0.000 (0.077) loss 1.2549 (1.4933) acc 68.7500 (65.0000) lr 5.1825e-04 eta 0:06:34 +epoch [35/50] batch [15/31] time 0.728 (0.786) data 0.000 (0.051) loss 1.2539 (1.4637) acc 81.2500 (65.4167) lr 5.1825e-04 eta 0:06:18 +epoch [35/50] batch [20/31] time 0.723 (0.771) data 0.000 (0.039) loss 1.7793 (1.4917) acc 59.3750 (64.8438) lr 5.1825e-04 eta 0:06:06 +epoch [35/50] batch [25/31] time 0.740 (0.764) data 0.000 (0.031) loss 1.1631 (1.4573) acc 62.5000 (65.1250) lr 5.1825e-04 eta 0:05:59 +epoch [35/50] batch [30/31] time 0.736 (0.759) data 0.000 (0.026) loss 1.3154 (1.4377) acc 56.2500 (64.7917) lr 5.1825e-04 eta 0:05:53 +epoch [36/50] batch [5/31] time 0.734 (0.882) data 0.000 (0.143) loss 1.3711 (1.3111) acc 78.1250 (69.3750) lr 4.6417e-04 eta 0:06:45 +epoch [36/50] batch [10/31] time 0.739 (0.807) data 0.000 (0.072) loss 1.4648 (1.4651) acc 62.5000 (65.6250) lr 4.6417e-04 eta 0:06:07 +epoch [36/50] batch [15/31] time 0.743 (0.782) data 0.000 (0.048) loss 1.8096 (1.4738) acc 53.1250 (64.5833) lr 4.6417e-04 eta 0:05:51 +epoch [36/50] batch [20/31] time 0.722 (0.768) data 0.000 (0.036) loss 0.8081 (1.3980) acc 78.1250 (65.7812) lr 4.6417e-04 eta 0:05:41 +epoch [36/50] batch [25/31] time 0.742 (0.762) data 0.000 (0.029) loss 1.4375 (1.3789) acc 65.6250 (66.1250) lr 4.6417e-04 eta 0:05:35 +epoch [36/50] batch [30/31] time 0.754 (0.757) data 0.000 (0.024) loss 1.3398 (1.3990) acc 56.2500 (65.1042) lr 4.6417e-04 eta 0:05:29 +epoch [37/50] batch [5/31] time 0.735 (0.879) data 0.001 (0.142) loss 1.8750 (1.5375) acc 56.2500 (63.7500) lr 4.1221e-04 eta 0:06:17 +epoch [37/50] batch [10/31] time 0.714 (0.806) data 0.000 (0.071) loss 1.7305 (1.5406) acc 53.1250 (63.7500) lr 4.1221e-04 eta 0:05:41 +epoch [37/50] batch [15/31] time 0.735 (0.781) data 0.001 (0.048) loss 1.6299 (1.4645) acc 65.6250 (65.0000) lr 4.1221e-04 eta 0:05:27 +epoch [37/50] batch [20/31] time 0.717 (0.767) data 0.000 (0.036) loss 1.6982 (1.4024) acc 53.1250 (65.9375) lr 4.1221e-04 eta 0:05:17 +epoch [37/50] batch [25/31] time 0.753 (0.761) data 0.000 (0.029) loss 1.9219 (1.4370) acc 53.1250 (65.2500) lr 4.1221e-04 eta 0:05:11 +epoch [37/50] batch [30/31] time 0.733 (0.757) data 0.000 (0.024) loss 1.3818 (1.4612) acc 68.7500 (65.0000) lr 4.1221e-04 eta 0:05:05 +epoch [38/50] batch [5/31] time 0.730 (0.892) data 0.000 (0.143) loss 1.4287 (1.7783) acc 65.6250 (56.8750) lr 3.6258e-04 eta 0:05:54 +epoch [38/50] batch [10/31] time 0.719 (0.809) data 0.001 (0.072) loss 1.4697 (1.5634) acc 65.6250 (62.1875) lr 3.6258e-04 eta 0:05:18 +epoch [38/50] batch [15/31] time 0.728 (0.783) data 0.000 (0.048) loss 1.5156 (1.4761) acc 59.3750 (62.5000) lr 3.6258e-04 eta 0:05:03 +epoch [38/50] batch [20/31] time 0.746 (0.772) data 0.000 (0.036) loss 1.3760 (1.4346) acc 53.1250 (63.4375) lr 3.6258e-04 eta 0:04:55 +epoch [38/50] batch [25/31] time 0.763 (0.766) data 0.000 (0.029) loss 1.6680 (1.4425) acc 62.5000 (62.8750) lr 3.6258e-04 eta 0:04:49 +epoch [38/50] batch [30/31] time 0.733 (0.760) data 0.000 (0.024) loss 1.4688 (1.4051) acc 62.5000 (64.3750) lr 3.6258e-04 eta 0:04:43 +epoch [39/50] batch [5/31] time 0.727 (0.898) data 0.001 (0.153) loss 1.2275 (1.1623) acc 65.6250 (68.7500) lr 3.1545e-04 eta 0:05:29 +epoch [39/50] batch [10/31] time 0.730 (0.810) data 0.000 (0.077) loss 1.5723 (1.3682) acc 59.3750 (64.6875) lr 3.1545e-04 eta 0:04:53 +epoch [39/50] batch [15/31] time 0.729 (0.792) data 0.000 (0.051) loss 1.0186 (1.3645) acc 81.2500 (64.5833) lr 3.1545e-04 eta 0:04:42 +epoch [39/50] batch [20/31] time 0.724 (0.776) data 0.000 (0.039) loss 0.9980 (1.3365) acc 71.8750 (65.4688) lr 3.1545e-04 eta 0:04:33 +epoch [39/50] batch [25/31] time 0.727 (0.766) data 0.000 (0.031) loss 1.6758 (1.3121) acc 62.5000 (66.6250) lr 3.1545e-04 eta 0:04:25 +epoch [39/50] batch [30/31] time 0.726 (0.759) data 0.000 (0.026) loss 1.3896 (1.3250) acc 62.5000 (66.3542) lr 3.1545e-04 eta 0:04:19 +epoch [40/50] batch [5/31] time 0.722 (0.884) data 0.000 (0.138) loss 1.0020 (1.3381) acc 81.2500 (65.0000) lr 2.7103e-04 eta 0:04:56 +epoch [40/50] batch [10/31] time 0.736 (0.805) data 0.000 (0.069) loss 1.6445 (1.4061) acc 71.8750 (65.3125) lr 2.7103e-04 eta 0:04:26 +epoch [40/50] batch [15/31] time 0.760 (0.781) data 0.000 (0.046) loss 0.9849 (1.3230) acc 65.6250 (66.6667) lr 2.7103e-04 eta 0:04:14 +epoch [40/50] batch [20/31] time 0.734 (0.769) data 0.000 (0.035) loss 1.5459 (1.3896) acc 71.8750 (65.7812) lr 2.7103e-04 eta 0:04:06 +epoch [40/50] batch [25/31] time 0.741 (0.762) data 0.000 (0.028) loss 1.1797 (1.3902) acc 65.6250 (64.7500) lr 2.7103e-04 eta 0:04:00 +epoch [40/50] batch [30/31] time 0.736 (0.758) data 0.000 (0.023) loss 1.4375 (1.4228) acc 68.7500 (65.5208) lr 2.7103e-04 eta 0:03:55 +epoch [41/50] batch [5/31] time 0.729 (0.937) data 0.000 (0.162) loss 1.8984 (1.2104) acc 62.5000 (73.1250) lr 2.2949e-04 eta 0:04:45 +epoch [41/50] batch [10/31] time 0.758 (0.838) data 0.000 (0.081) loss 1.3291 (1.2372) acc 59.3750 (71.2500) lr 2.2949e-04 eta 0:04:11 +epoch [41/50] batch [15/31] time 0.724 (0.802) data 0.000 (0.054) loss 1.1924 (1.2162) acc 68.7500 (70.2083) lr 2.2949e-04 eta 0:03:56 +epoch [41/50] batch [20/31] time 0.727 (0.784) data 0.000 (0.041) loss 1.9209 (1.2359) acc 53.1250 (70.0000) lr 2.2949e-04 eta 0:03:47 +epoch [41/50] batch [25/31] time 0.725 (0.773) data 0.000 (0.033) loss 1.4922 (1.2981) acc 71.8750 (69.0000) lr 2.2949e-04 eta 0:03:40 +epoch [41/50] batch [30/31] time 0.736 (0.767) data 0.000 (0.027) loss 1.3428 (1.3367) acc 71.8750 (67.7083) lr 2.2949e-04 eta 0:03:34 +epoch [42/50] batch [5/31] time 0.764 (0.890) data 0.000 (0.145) loss 1.4131 (1.2460) acc 62.5000 (68.1250) lr 1.9098e-04 eta 0:04:03 +epoch [42/50] batch [10/31] time 0.728 (0.812) data 0.000 (0.073) loss 1.6162 (1.3337) acc 59.3750 (64.6875) lr 1.9098e-04 eta 0:03:38 +epoch [42/50] batch [15/31] time 0.742 (0.787) data 0.000 (0.048) loss 1.4580 (1.3781) acc 62.5000 (64.5833) lr 1.9098e-04 eta 0:03:27 +epoch [42/50] batch [20/31] time 0.725 (0.773) data 0.000 (0.036) loss 1.5098 (1.3544) acc 65.6250 (65.4688) lr 1.9098e-04 eta 0:03:20 +epoch [42/50] batch [25/31] time 0.718 (0.765) data 0.000 (0.029) loss 1.0391 (1.3333) acc 81.2500 (66.7500) lr 1.9098e-04 eta 0:03:14 +epoch [42/50] batch [30/31] time 0.745 (0.762) data 0.000 (0.024) loss 1.5986 (1.3479) acc 59.3750 (66.3542) lr 1.9098e-04 eta 0:03:09 +epoch [43/50] batch [5/31] time 0.760 (0.888) data 0.000 (0.143) loss 1.6934 (1.2563) acc 65.6250 (68.1250) lr 1.5567e-04 eta 0:03:35 +epoch [43/50] batch [10/31] time 0.728 (0.823) data 0.001 (0.072) loss 1.7900 (1.3118) acc 50.0000 (64.3750) lr 1.5567e-04 eta 0:03:15 +epoch [43/50] batch [15/31] time 0.753 (0.795) data 0.000 (0.048) loss 1.7500 (1.4312) acc 59.3750 (62.7083) lr 1.5567e-04 eta 0:03:05 +epoch [43/50] batch [20/31] time 0.729 (0.777) data 0.000 (0.036) loss 1.4404 (1.4677) acc 65.6250 (62.9688) lr 1.5567e-04 eta 0:02:57 +epoch [43/50] batch [25/31] time 0.741 (0.770) data 0.000 (0.029) loss 1.1084 (1.4239) acc 62.5000 (64.2500) lr 1.5567e-04 eta 0:02:51 +epoch [43/50] batch [30/31] time 0.736 (0.764) data 0.000 (0.024) loss 2.2480 (1.4388) acc 50.0000 (63.5417) lr 1.5567e-04 eta 0:02:46 +epoch [44/50] batch [5/31] time 0.726 (0.877) data 0.001 (0.138) loss 1.2227 (1.5900) acc 71.8750 (65.6250) lr 1.2369e-04 eta 0:03:05 +epoch [44/50] batch [10/31] time 0.726 (0.807) data 0.000 (0.069) loss 1.8057 (1.4959) acc 65.6250 (65.0000) lr 1.2369e-04 eta 0:02:46 +epoch [44/50] batch [15/31] time 0.730 (0.784) data 0.001 (0.046) loss 0.9814 (1.4033) acc 75.0000 (66.2500) lr 1.2369e-04 eta 0:02:38 +epoch [44/50] batch [20/31] time 0.736 (0.773) data 0.000 (0.035) loss 2.0352 (1.4160) acc 59.3750 (66.4062) lr 1.2369e-04 eta 0:02:32 +epoch [44/50] batch [25/31] time 0.733 (0.765) data 0.000 (0.028) loss 1.2451 (1.3965) acc 78.1250 (67.1250) lr 1.2369e-04 eta 0:02:26 +epoch [44/50] batch [30/31] time 0.726 (0.759) data 0.000 (0.023) loss 1.4102 (1.4282) acc 65.6250 (66.9792) lr 1.2369e-04 eta 0:02:21 +epoch [45/50] batch [5/31] time 0.733 (0.890) data 0.000 (0.150) loss 1.6064 (1.3188) acc 56.2500 (69.3750) lr 9.5173e-05 eta 0:02:41 +epoch [45/50] batch [10/31] time 0.724 (0.806) data 0.000 (0.075) loss 1.5996 (1.2697) acc 59.3750 (67.1875) lr 9.5173e-05 eta 0:02:21 +epoch [45/50] batch [15/31] time 0.719 (0.783) data 0.000 (0.050) loss 1.6396 (1.3646) acc 62.5000 (67.0833) lr 9.5173e-05 eta 0:02:13 +epoch [45/50] batch [20/31] time 0.720 (0.766) data 0.000 (0.038) loss 1.5332 (1.3892) acc 65.6250 (65.9375) lr 9.5173e-05 eta 0:02:07 +epoch [45/50] batch [25/31] time 0.725 (0.757) data 0.000 (0.030) loss 1.6221 (1.4027) acc 50.0000 (65.1250) lr 9.5173e-05 eta 0:02:01 +epoch [45/50] batch [30/31] time 0.741 (0.752) data 0.000 (0.025) loss 1.2979 (1.3923) acc 62.5000 (65.8333) lr 9.5173e-05 eta 0:01:57 +epoch [46/50] batch [5/31] time 0.729 (0.874) data 0.000 (0.142) loss 1.0176 (1.2252) acc 71.8750 (72.5000) lr 7.0224e-05 eta 0:02:11 +epoch [46/50] batch [10/31] time 0.736 (0.799) data 0.000 (0.071) loss 1.2402 (1.2223) acc 75.0000 (69.6875) lr 7.0224e-05 eta 0:01:55 +epoch [46/50] batch [15/31] time 0.728 (0.777) data 0.000 (0.048) loss 0.9253 (1.2210) acc 78.1250 (70.6250) lr 7.0224e-05 eta 0:01:48 +epoch [46/50] batch [20/31] time 0.718 (0.763) data 0.000 (0.036) loss 1.3721 (1.2589) acc 62.5000 (70.0000) lr 7.0224e-05 eta 0:01:43 +epoch [46/50] batch [25/31] time 0.745 (0.760) data 0.000 (0.029) loss 2.1504 (1.3328) acc 50.0000 (67.5000) lr 7.0224e-05 eta 0:01:38 +epoch [46/50] batch [30/31] time 0.734 (0.756) data 0.000 (0.024) loss 0.9854 (1.3487) acc 68.7500 (67.1875) lr 7.0224e-05 eta 0:01:34 +epoch [47/50] batch [5/31] time 0.747 (0.892) data 0.000 (0.142) loss 1.4736 (1.4928) acc 62.5000 (61.2500) lr 4.8943e-05 eta 0:01:46 +epoch [47/50] batch [10/31] time 0.734 (0.817) data 0.000 (0.071) loss 1.4746 (1.4506) acc 56.2500 (63.7500) lr 4.8943e-05 eta 0:01:33 +epoch [47/50] batch [15/31] time 0.717 (0.788) data 0.000 (0.048) loss 1.4756 (1.4164) acc 62.5000 (65.6250) lr 4.8943e-05 eta 0:01:25 +epoch [47/50] batch [20/31] time 0.732 (0.775) data 0.000 (0.036) loss 1.2275 (1.4059) acc 75.0000 (66.8750) lr 4.8943e-05 eta 0:01:20 +epoch [47/50] batch [25/31] time 0.722 (0.766) data 0.001 (0.029) loss 0.5596 (1.3811) acc 84.3750 (67.7500) lr 4.8943e-05 eta 0:01:15 +epoch [47/50] batch [30/31] time 0.732 (0.760) data 0.000 (0.024) loss 1.0039 (1.3337) acc 62.5000 (68.0208) lr 4.8943e-05 eta 0:01:11 +epoch [48/50] batch [5/31] time 0.725 (0.896) data 0.000 (0.148) loss 1.4316 (1.3472) acc 62.5000 (65.6250) lr 3.1417e-05 eta 0:01:18 +epoch [48/50] batch [10/31] time 0.730 (0.813) data 0.000 (0.074) loss 1.2852 (1.5045) acc 81.2500 (65.9375) lr 3.1417e-05 eta 0:01:07 +epoch [48/50] batch [15/31] time 0.735 (0.789) data 0.001 (0.050) loss 1.2705 (1.4290) acc 62.5000 (66.8750) lr 3.1417e-05 eta 0:01:01 +epoch [48/50] batch [20/31] time 0.741 (0.775) data 0.000 (0.037) loss 1.4619 (1.4861) acc 65.6250 (65.1562) lr 3.1417e-05 eta 0:00:56 +epoch [48/50] batch [25/31] time 0.720 (0.766) data 0.000 (0.030) loss 1.3750 (1.5050) acc 75.0000 (65.1250) lr 3.1417e-05 eta 0:00:52 +epoch [48/50] batch [30/31] time 0.730 (0.761) data 0.000 (0.025) loss 1.6787 (1.4763) acc 68.7500 (66.2500) lr 3.1417e-05 eta 0:00:47 +epoch [49/50] batch [5/31] time 0.741 (0.892) data 0.000 (0.143) loss 1.1025 (1.4008) acc 68.7500 (67.5000) lr 1.7713e-05 eta 0:00:50 +epoch [49/50] batch [10/31] time 0.741 (0.826) data 0.000 (0.072) loss 1.4150 (1.3645) acc 62.5000 (67.5000) lr 1.7713e-05 eta 0:00:42 +epoch [49/50] batch [15/31] time 0.730 (0.795) data 0.000 (0.048) loss 1.5713 (1.4048) acc 56.2500 (66.6667) lr 1.7713e-05 eta 0:00:37 +epoch [49/50] batch [20/31] time 0.750 (0.780) data 0.000 (0.036) loss 1.2402 (1.5054) acc 68.7500 (65.7812) lr 1.7713e-05 eta 0:00:32 +epoch [49/50] batch [25/31] time 0.727 (0.771) data 0.000 (0.029) loss 0.8521 (1.4586) acc 78.1250 (66.2500) lr 1.7713e-05 eta 0:00:28 +epoch [49/50] batch [30/31] time 0.735 (0.764) data 0.000 (0.024) loss 1.4336 (1.4798) acc 62.5000 (65.5208) lr 1.7713e-05 eta 0:00:24 +epoch [50/50] batch [5/31] time 0.716 (0.890) data 0.000 (0.156) loss 1.8701 (1.5762) acc 50.0000 (60.0000) lr 7.8853e-06 eta 0:00:23 +epoch [50/50] batch [10/31] time 0.728 (0.810) data 0.000 (0.078) loss 1.3701 (1.4328) acc 71.8750 (63.7500) lr 7.8853e-06 eta 0:00:17 +epoch [50/50] batch [15/31] time 0.723 (0.782) data 0.000 (0.052) loss 1.6875 (1.6109) acc 68.7500 (61.2500) lr 7.8853e-06 eta 0:00:12 +epoch [50/50] batch [20/31] time 0.718 (0.770) data 0.000 (0.039) loss 1.1250 (1.5353) acc 65.6250 (62.1875) lr 7.8853e-06 eta 0:00:08 +epoch [50/50] batch [25/31] time 0.729 (0.762) data 0.000 (0.032) loss 1.4551 (1.4917) acc 59.3750 (62.8750) lr 7.8853e-06 eta 0:00:04 +epoch [50/50] batch [30/31] time 0.722 (0.760) data 0.000 (0.026) loss 1.2148 (1.4424) acc 75.0000 (64.7917) lr 7.8853e-06 eta 0:00:00 +Checkpoint saved to output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 +Finish training +Deploy the last-epoch model +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 30,777 +* accuracy: 61.6% +* error: 38.4% +* macro_f1: 60.5% +Elapsed: 0:22:36 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint new file mode 100644 index 00000000..a9d493d3 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint @@ -0,0 +1 @@ +model.pth.tar-50 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 new file mode 100644 index 00000000..a34d1f44 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1697829628.ckb-gpu-lambda.2064170.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1697829628.ckb-gpu-lambda.2064170.0 new file mode 100644 index 00000000..210145b3 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1697829628.ckb-gpu-lambda.2064170.0 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed2/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed2/log.txt new file mode 100644 index 00000000..60032aa3 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed2/log.txt @@ -0,0 +1,639 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_b32_ep50.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '1'] +output_dir: output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed2 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 2 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 1 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/32 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 50 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 8.4.0-3ubuntu2) 8.4.0 +Clang version: 10.0.0-4ubuntu1 +CMake version: version 3.23.2 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-113-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: Tesla V100-SXM2-32GB +GPU 1: Tesla V100-SXM2-32GB +GPU 2: Tesla V100-SXM2-32GB +GPU 3: Tesla V100-SXM2-32GB + +Nvidia driver version: 510.73.05 +cuDNN version: Probably one of the following: +/usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 +/usr/lib/x86_64-linux-gnu/libcudnn.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.4.1 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 46 bits physical, 48 bits virtual +CPU(s): 64 +On-line CPU(s) list: 0-63 +Thread(s) per core: 2 +Core(s) per socket: 16 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +CPU family: 6 +Model: 85 +Model name: Intel(R) Xeon(R) Gold 6242 CPU @ 2.80GHz +Stepping: 7 +CPU MHz: 1200.103 +CPU max MHz: 3900.0000 +CPU min MHz: 1200.0000 +BogoMIPS: 5600.00 +Virtualization: VT-x +L1d cache: 1 MiB +L1i cache: 1 MiB +L2 cache: 32 MiB +L3 cache: 44 MiB +NUMA node0 CPU(s): 0-15,32-47 +NUMA node1 CPU(s): 16-31,48-63 +Vulnerability Itlb multihit: KVM: Mitigation: Split huge pages +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Mitigation; TSX disabled +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cdp_l3 invpcid_single intel_ppin ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm mpx rdt_a avx512f avx512dq rdseed adx smap clflushopt clwb intel_pt avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts pku ospke avx512_vnni md_clear flush_l1d arch_capabilities + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Loading preprocessed few-shot data from /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_1-seed_2.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 1,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-B/32) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed2/tensorboard) +epoch [1/50] batch [5/31] time 0.744 (1.596) data 0.000 (0.145) loss 3.5605 (3.5969) acc 25.0000 (32.5000) lr 1.0000e-05 eta 0:41:05 +epoch [1/50] batch [10/31] time 0.728 (1.160) data 0.000 (0.073) loss 3.8535 (3.4861) acc 18.7500 (34.0625) lr 1.0000e-05 eta 0:29:45 +epoch [1/50] batch [15/31] time 0.729 (1.015) data 0.001 (0.049) loss 3.4395 (3.3546) acc 43.7500 (35.6250) lr 1.0000e-05 eta 0:25:57 +epoch [1/50] batch [20/31] time 0.743 (0.943) data 0.000 (0.037) loss 3.0215 (3.2204) acc 34.3750 (37.3438) lr 1.0000e-05 eta 0:24:02 +epoch [1/50] batch [25/31] time 0.726 (0.898) data 0.000 (0.029) loss 2.7930 (3.1371) acc 40.6250 (37.3750) lr 1.0000e-05 eta 0:22:49 +epoch [1/50] batch [30/31] time 0.711 (0.867) data 0.000 (0.024) loss 2.6113 (3.0720) acc 43.7500 (37.7083) lr 1.0000e-05 eta 0:21:58 +epoch [2/50] batch [5/31] time 0.745 (0.879) data 0.000 (0.141) loss 2.0879 (2.5738) acc 56.2500 (46.8750) lr 2.0000e-03 eta 0:22:11 +epoch [2/50] batch [10/31] time 0.752 (0.804) data 0.000 (0.071) loss 1.0957 (2.2741) acc 59.3750 (50.0000) lr 2.0000e-03 eta 0:20:13 +epoch [2/50] batch [15/31] time 0.737 (0.779) data 0.000 (0.047) loss 2.2617 (2.2068) acc 46.8750 (51.8750) lr 2.0000e-03 eta 0:19:31 +epoch [2/50] batch [20/31] time 0.720 (0.764) data 0.000 (0.036) loss 1.9941 (2.1865) acc 56.2500 (52.1875) lr 2.0000e-03 eta 0:19:05 +epoch [2/50] batch [25/31] time 0.713 (0.758) data 0.000 (0.029) loss 2.1230 (2.1198) acc 46.8750 (52.6250) lr 2.0000e-03 eta 0:18:52 +epoch [2/50] batch [30/31] time 0.719 (0.752) data 0.000 (0.024) loss 1.9346 (2.0827) acc 53.1250 (52.7083) lr 2.0000e-03 eta 0:18:39 +epoch [3/50] batch [5/31] time 0.727 (0.888) data 0.000 (0.148) loss 2.2754 (2.2193) acc 46.8750 (50.6250) lr 1.9980e-03 eta 0:21:56 +epoch [3/50] batch [10/31] time 0.718 (0.806) data 0.000 (0.074) loss 1.9541 (2.2126) acc 59.3750 (52.1875) lr 1.9980e-03 eta 0:19:51 +epoch [3/50] batch [15/31] time 0.722 (0.780) data 0.000 (0.050) loss 1.6699 (2.1538) acc 59.3750 (53.9583) lr 1.9980e-03 eta 0:19:09 +epoch [3/50] batch [20/31] time 0.701 (0.766) data 0.000 (0.037) loss 1.6592 (2.0384) acc 65.6250 (56.0938) lr 1.9980e-03 eta 0:18:45 +epoch [3/50] batch [25/31] time 0.727 (0.759) data 0.000 (0.030) loss 1.4814 (1.9636) acc 68.7500 (56.8750) lr 1.9980e-03 eta 0:18:29 +epoch [3/50] batch [30/31] time 0.710 (0.753) data 0.000 (0.025) loss 1.9062 (1.9651) acc 46.8750 (56.1458) lr 1.9980e-03 eta 0:18:17 +epoch [4/50] batch [5/31] time 0.718 (0.876) data 0.001 (0.132) loss 1.8096 (1.7613) acc 53.1250 (55.0000) lr 1.9921e-03 eta 0:21:11 +epoch [4/50] batch [10/31] time 0.732 (0.802) data 0.000 (0.066) loss 1.9268 (1.8851) acc 68.7500 (55.6250) lr 1.9921e-03 eta 0:19:20 +epoch [4/50] batch [15/31] time 0.749 (0.775) data 0.000 (0.044) loss 2.2266 (1.8395) acc 53.1250 (56.8750) lr 1.9921e-03 eta 0:18:37 +epoch [4/50] batch [20/31] time 0.727 (0.762) data 0.000 (0.033) loss 1.3496 (1.8689) acc 68.7500 (56.4062) lr 1.9921e-03 eta 0:18:15 +epoch [4/50] batch [25/31] time 0.716 (0.754) data 0.000 (0.027) loss 1.8672 (1.8460) acc 53.1250 (57.0000) lr 1.9921e-03 eta 0:17:59 +epoch [4/50] batch [30/31] time 0.718 (0.748) data 0.000 (0.022) loss 1.8730 (1.8520) acc 59.3750 (55.8333) lr 1.9921e-03 eta 0:17:48 +epoch [5/50] batch [5/31] time 0.722 (0.849) data 0.000 (0.127) loss 1.9121 (1.9563) acc 59.3750 (56.2500) lr 1.9823e-03 eta 0:20:06 +epoch [5/50] batch [10/31] time 0.731 (0.796) data 0.000 (0.064) loss 2.3223 (1.7925) acc 46.8750 (59.0625) lr 1.9823e-03 eta 0:18:46 +epoch [5/50] batch [15/31] time 0.731 (0.772) data 0.000 (0.042) loss 1.8213 (1.8116) acc 53.1250 (57.9167) lr 1.9823e-03 eta 0:18:09 +epoch [5/50] batch [20/31] time 0.722 (0.759) data 0.000 (0.032) loss 1.7676 (1.7863) acc 62.5000 (58.4375) lr 1.9823e-03 eta 0:17:47 +epoch [5/50] batch [25/31] time 0.735 (0.753) data 0.000 (0.026) loss 1.5215 (1.7683) acc 62.5000 (57.7500) lr 1.9823e-03 eta 0:17:34 +epoch [5/50] batch [30/31] time 0.723 (0.747) data 0.000 (0.021) loss 1.9707 (1.8178) acc 53.1250 (56.8750) lr 1.9823e-03 eta 0:17:22 +epoch [6/50] batch [5/31] time 0.734 (0.877) data 0.000 (0.141) loss 2.3691 (1.8631) acc 53.1250 (56.8750) lr 1.9686e-03 eta 0:20:19 +epoch [6/50] batch [10/31] time 0.734 (0.804) data 0.000 (0.072) loss 2.0332 (1.7026) acc 43.7500 (58.7500) lr 1.9686e-03 eta 0:18:33 +epoch [6/50] batch [15/31] time 0.715 (0.778) data 0.000 (0.048) loss 1.4961 (1.6917) acc 62.5000 (57.9167) lr 1.9686e-03 eta 0:17:54 +epoch [6/50] batch [20/31] time 0.713 (0.765) data 0.000 (0.036) loss 2.0117 (1.6800) acc 53.1250 (57.6562) lr 1.9686e-03 eta 0:17:32 +epoch [6/50] batch [25/31] time 0.721 (0.757) data 0.000 (0.029) loss 2.4688 (1.7189) acc 50.0000 (58.2500) lr 1.9686e-03 eta 0:17:16 +epoch [6/50] batch [30/31] time 0.733 (0.751) data 0.000 (0.024) loss 1.7646 (1.7213) acc 50.0000 (57.9167) lr 1.9686e-03 eta 0:17:04 +epoch [7/50] batch [5/31] time 0.725 (0.886) data 0.000 (0.142) loss 1.3223 (1.4674) acc 68.7500 (65.6250) lr 1.9511e-03 eta 0:20:03 +epoch [7/50] batch [10/31] time 0.733 (0.803) data 0.001 (0.071) loss 1.0420 (1.5184) acc 75.0000 (62.5000) lr 1.9511e-03 eta 0:18:06 +epoch [7/50] batch [15/31] time 0.718 (0.779) data 0.000 (0.048) loss 1.6641 (1.5729) acc 53.1250 (62.9167) lr 1.9511e-03 eta 0:17:30 +epoch [7/50] batch [20/31] time 0.718 (0.765) data 0.000 (0.036) loss 1.7910 (1.6291) acc 50.0000 (60.0000) lr 1.9511e-03 eta 0:17:08 +epoch [7/50] batch [25/31] time 0.707 (0.756) data 0.000 (0.029) loss 3.2559 (1.7070) acc 53.1250 (59.7500) lr 1.9511e-03 eta 0:16:52 +epoch [7/50] batch [30/31] time 0.718 (0.750) data 0.000 (0.024) loss 1.9922 (1.7209) acc 56.2500 (59.7917) lr 1.9511e-03 eta 0:16:40 +epoch [8/50] batch [5/31] time 0.756 (0.897) data 0.000 (0.156) loss 1.7803 (1.6650) acc 62.5000 (61.8750) lr 1.9298e-03 eta 0:19:50 +epoch [8/50] batch [10/31] time 0.731 (0.814) data 0.000 (0.078) loss 1.6670 (1.7481) acc 68.7500 (58.4375) lr 1.9298e-03 eta 0:17:56 +epoch [8/50] batch [15/31] time 0.727 (0.792) data 0.000 (0.052) loss 1.3877 (1.7196) acc 68.7500 (60.4167) lr 1.9298e-03 eta 0:17:23 +epoch [8/50] batch [20/31] time 0.748 (0.780) data 0.001 (0.039) loss 1.1709 (1.6853) acc 75.0000 (60.3125) lr 1.9298e-03 eta 0:17:04 +epoch [8/50] batch [25/31] time 0.716 (0.768) data 0.000 (0.031) loss 1.3311 (1.6531) acc 62.5000 (60.5000) lr 1.9298e-03 eta 0:16:44 +epoch [8/50] batch [30/31] time 0.721 (0.762) data 0.000 (0.026) loss 1.6582 (1.6616) acc 68.7500 (60.5208) lr 1.9298e-03 eta 0:16:32 +epoch [9/50] batch [5/31] time 0.744 (0.912) data 0.002 (0.169) loss 1.8535 (2.0027) acc 53.1250 (51.8750) lr 1.9048e-03 eta 0:19:43 +epoch [9/50] batch [10/31] time 0.731 (0.823) data 0.001 (0.085) loss 1.9072 (1.7606) acc 50.0000 (57.1875) lr 1.9048e-03 eta 0:17:43 +epoch [9/50] batch [15/31] time 0.716 (0.789) data 0.000 (0.057) loss 1.1660 (1.7206) acc 71.8750 (58.5417) lr 1.9048e-03 eta 0:16:55 +epoch [9/50] batch [20/31] time 0.748 (0.777) data 0.000 (0.043) loss 1.8662 (1.7536) acc 53.1250 (57.9688) lr 1.9048e-03 eta 0:16:35 +epoch [9/50] batch [25/31] time 0.725 (0.768) data 0.000 (0.034) loss 0.9844 (1.7092) acc 68.7500 (58.6250) lr 1.9048e-03 eta 0:16:20 +epoch [9/50] batch [30/31] time 0.723 (0.762) data 0.000 (0.029) loss 1.0918 (1.6839) acc 75.0000 (59.5833) lr 1.9048e-03 eta 0:16:09 +epoch [10/50] batch [5/31] time 0.714 (0.896) data 0.000 (0.141) loss 0.8774 (1.4388) acc 78.1250 (63.1250) lr 1.8763e-03 eta 0:18:54 +epoch [10/50] batch [10/31] time 0.708 (0.807) data 0.001 (0.071) loss 1.6494 (1.6143) acc 56.2500 (57.1875) lr 1.8763e-03 eta 0:16:57 +epoch [10/50] batch [15/31] time 0.755 (0.782) data 0.000 (0.047) loss 1.9668 (1.6659) acc 56.2500 (57.0833) lr 1.8763e-03 eta 0:16:21 +epoch [10/50] batch [20/31] time 0.714 (0.766) data 0.000 (0.036) loss 1.2910 (1.6744) acc 62.5000 (58.4375) lr 1.8763e-03 eta 0:15:57 +epoch [10/50] batch [25/31] time 0.713 (0.756) data 0.000 (0.029) loss 2.4766 (1.7025) acc 53.1250 (58.5000) lr 1.8763e-03 eta 0:15:42 +epoch [10/50] batch [30/31] time 0.719 (0.751) data 0.000 (0.024) loss 1.7588 (1.7114) acc 65.6250 (58.8542) lr 1.8763e-03 eta 0:15:31 +epoch [11/50] batch [5/31] time 0.725 (0.867) data 0.000 (0.133) loss 1.7334 (1.5055) acc 68.7500 (65.0000) lr 1.8443e-03 eta 0:17:50 +epoch [11/50] batch [10/31] time 0.717 (0.800) data 0.000 (0.067) loss 2.3652 (1.6203) acc 56.2500 (65.6250) lr 1.8443e-03 eta 0:16:23 +epoch [11/50] batch [15/31] time 0.722 (0.775) data 0.000 (0.045) loss 1.5303 (1.6292) acc 65.6250 (63.5417) lr 1.8443e-03 eta 0:15:49 +epoch [11/50] batch [20/31] time 0.723 (0.763) data 0.000 (0.034) loss 1.6963 (1.6538) acc 71.8750 (63.1250) lr 1.8443e-03 eta 0:15:30 +epoch [11/50] batch [25/31] time 0.753 (0.759) data 0.000 (0.027) loss 1.6133 (1.6482) acc 68.7500 (62.1250) lr 1.8443e-03 eta 0:15:22 +epoch [11/50] batch [30/31] time 0.718 (0.753) data 0.000 (0.023) loss 1.4883 (1.6693) acc 59.3750 (61.8750) lr 1.8443e-03 eta 0:15:11 +epoch [12/50] batch [5/31] time 0.714 (0.878) data 0.001 (0.145) loss 1.3467 (1.4020) acc 62.5000 (66.2500) lr 1.8090e-03 eta 0:17:37 +epoch [12/50] batch [10/31] time 0.728 (0.816) data 0.000 (0.073) loss 1.2979 (1.5370) acc 65.6250 (63.7500) lr 1.8090e-03 eta 0:16:18 +epoch [12/50] batch [15/31] time 0.764 (0.787) data 0.000 (0.049) loss 2.2559 (1.5985) acc 53.1250 (62.2917) lr 1.8090e-03 eta 0:15:40 +epoch [12/50] batch [20/31] time 0.729 (0.771) data 0.000 (0.037) loss 1.6514 (1.5958) acc 62.5000 (62.6562) lr 1.8090e-03 eta 0:15:16 +epoch [12/50] batch [25/31] time 0.717 (0.762) data 0.000 (0.029) loss 2.3496 (1.5637) acc 53.1250 (63.5000) lr 1.8090e-03 eta 0:15:01 +epoch [12/50] batch [30/31] time 0.715 (0.755) data 0.001 (0.025) loss 2.1875 (1.5930) acc 53.1250 (62.6042) lr 1.8090e-03 eta 0:14:49 +epoch [13/50] batch [5/31] time 0.725 (0.868) data 0.000 (0.138) loss 1.2891 (1.5173) acc 75.0000 (67.5000) lr 1.7705e-03 eta 0:16:58 +epoch [13/50] batch [10/31] time 0.735 (0.808) data 0.001 (0.070) loss 2.0215 (1.5420) acc 50.0000 (65.6250) lr 1.7705e-03 eta 0:15:43 +epoch [13/50] batch [15/31] time 0.721 (0.783) data 0.000 (0.047) loss 1.2812 (1.5355) acc 46.8750 (63.3333) lr 1.7705e-03 eta 0:15:10 +epoch [13/50] batch [20/31] time 0.735 (0.770) data 0.000 (0.035) loss 1.3457 (1.5146) acc 71.8750 (63.4375) lr 1.7705e-03 eta 0:14:51 +epoch [13/50] batch [25/31] time 0.706 (0.761) data 0.000 (0.028) loss 2.4258 (1.6273) acc 56.2500 (61.7500) lr 1.7705e-03 eta 0:14:37 +epoch [13/50] batch [30/31] time 0.721 (0.756) data 0.000 (0.023) loss 1.7217 (1.6521) acc 62.5000 (61.5625) lr 1.7705e-03 eta 0:14:28 +epoch [14/50] batch [5/31] time 0.713 (0.876) data 0.000 (0.146) loss 1.0156 (1.3631) acc 75.0000 (68.1250) lr 1.7290e-03 eta 0:16:40 +epoch [14/50] batch [10/31] time 0.715 (0.802) data 0.000 (0.073) loss 1.6777 (1.4973) acc 62.5000 (65.9375) lr 1.7290e-03 eta 0:15:11 +epoch [14/50] batch [15/31] time 0.717 (0.777) data 0.000 (0.049) loss 1.2773 (1.5543) acc 78.1250 (64.1667) lr 1.7290e-03 eta 0:14:40 +epoch [14/50] batch [20/31] time 0.734 (0.765) data 0.000 (0.037) loss 1.2764 (1.5383) acc 53.1250 (63.2812) lr 1.7290e-03 eta 0:14:22 +epoch [14/50] batch [25/31] time 0.720 (0.756) data 0.000 (0.030) loss 1.8125 (1.5907) acc 65.6250 (62.2500) lr 1.7290e-03 eta 0:14:08 +epoch [14/50] batch [30/31] time 0.724 (0.751) data 0.000 (0.025) loss 1.4014 (1.5722) acc 65.6250 (62.8125) lr 1.7290e-03 eta 0:13:58 +epoch [15/50] batch [5/31] time 0.742 (0.872) data 0.000 (0.133) loss 2.0273 (1.6936) acc 59.3750 (58.7500) lr 1.6845e-03 eta 0:16:08 +epoch [15/50] batch [10/31] time 0.714 (0.798) data 0.000 (0.067) loss 1.2549 (1.5649) acc 68.7500 (62.1875) lr 1.6845e-03 eta 0:14:42 +epoch [15/50] batch [15/31] time 0.705 (0.774) data 0.000 (0.045) loss 1.7324 (1.5818) acc 56.2500 (61.4583) lr 1.6845e-03 eta 0:14:11 +epoch [15/50] batch [20/31] time 0.759 (0.763) data 0.000 (0.034) loss 1.2412 (1.4862) acc 59.3750 (62.3438) lr 1.6845e-03 eta 0:13:56 +epoch [15/50] batch [25/31] time 0.741 (0.758) data 0.000 (0.027) loss 1.2305 (1.4680) acc 71.8750 (63.0000) lr 1.6845e-03 eta 0:13:47 +epoch [15/50] batch [30/31] time 0.722 (0.754) data 0.000 (0.022) loss 1.6230 (1.5143) acc 65.6250 (62.8125) lr 1.6845e-03 eta 0:13:38 +epoch [16/50] batch [5/31] time 0.721 (0.858) data 0.000 (0.128) loss 1.9316 (1.6373) acc 56.2500 (57.5000) lr 1.6374e-03 eta 0:15:26 +epoch [16/50] batch [10/31] time 0.739 (0.789) data 0.000 (0.064) loss 1.4121 (1.4872) acc 65.6250 (61.8750) lr 1.6374e-03 eta 0:14:08 +epoch [16/50] batch [15/31] time 0.718 (0.767) data 0.000 (0.043) loss 1.4209 (1.5231) acc 62.5000 (61.4583) lr 1.6374e-03 eta 0:13:40 +epoch [16/50] batch [20/31] time 0.718 (0.757) data 0.000 (0.032) loss 0.9971 (1.5416) acc 81.2500 (62.1875) lr 1.6374e-03 eta 0:13:25 +epoch [16/50] batch [25/31] time 0.759 (0.753) data 0.000 (0.026) loss 1.3711 (1.5413) acc 65.6250 (63.1250) lr 1.6374e-03 eta 0:13:18 +epoch [16/50] batch [30/31] time 0.733 (0.751) data 0.001 (0.022) loss 1.6826 (1.5984) acc 68.7500 (62.9167) lr 1.6374e-03 eta 0:13:12 +epoch [17/50] batch [5/31] time 0.741 (0.906) data 0.001 (0.162) loss 2.0703 (1.3939) acc 62.5000 (68.1250) lr 1.5878e-03 eta 0:15:50 +epoch [17/50] batch [10/31] time 0.720 (0.821) data 0.001 (0.081) loss 2.1934 (1.5841) acc 46.8750 (63.1250) lr 1.5878e-03 eta 0:14:17 +epoch [17/50] batch [15/31] time 0.720 (0.788) data 0.000 (0.054) loss 1.3506 (1.5797) acc 68.7500 (62.9167) lr 1.5878e-03 eta 0:13:39 +epoch [17/50] batch [20/31] time 0.726 (0.773) data 0.000 (0.041) loss 1.4629 (1.6422) acc 59.3750 (62.5000) lr 1.5878e-03 eta 0:13:19 +epoch [17/50] batch [25/31] time 0.751 (0.766) data 0.000 (0.033) loss 1.2607 (1.6036) acc 65.6250 (63.2500) lr 1.5878e-03 eta 0:13:07 +epoch [17/50] batch [30/31] time 0.711 (0.758) data 0.000 (0.027) loss 1.6611 (1.5488) acc 62.5000 (63.4375) lr 1.5878e-03 eta 0:12:56 +epoch [18/50] batch [5/31] time 0.720 (0.876) data 0.001 (0.142) loss 0.8940 (1.2897) acc 75.0000 (66.8750) lr 1.5358e-03 eta 0:14:52 +epoch [18/50] batch [10/31] time 0.710 (0.818) data 0.000 (0.071) loss 1.3711 (1.4337) acc 68.7500 (64.0625) lr 1.5358e-03 eta 0:13:48 +epoch [18/50] batch [15/31] time 0.716 (0.791) data 0.001 (0.047) loss 1.4160 (1.4799) acc 62.5000 (64.5833) lr 1.5358e-03 eta 0:13:17 +epoch [18/50] batch [20/31] time 0.717 (0.774) data 0.000 (0.036) loss 2.6055 (1.5504) acc 46.8750 (64.0625) lr 1.5358e-03 eta 0:12:56 +epoch [18/50] batch [25/31] time 0.725 (0.762) data 0.000 (0.029) loss 0.8623 (1.4794) acc 75.0000 (65.3750) lr 1.5358e-03 eta 0:12:40 +epoch [18/50] batch [30/31] time 0.717 (0.756) data 0.000 (0.024) loss 1.0527 (1.4837) acc 71.8750 (65.3125) lr 1.5358e-03 eta 0:12:30 +epoch [19/50] batch [5/31] time 0.708 (0.901) data 0.000 (0.171) loss 1.3105 (1.2227) acc 62.5000 (68.1250) lr 1.4818e-03 eta 0:14:49 +epoch [19/50] batch [10/31] time 0.736 (0.816) data 0.000 (0.086) loss 0.9180 (1.2324) acc 68.7500 (67.5000) lr 1.4818e-03 eta 0:13:21 +epoch [19/50] batch [15/31] time 0.716 (0.784) data 0.000 (0.057) loss 1.4756 (1.2888) acc 71.8750 (68.5417) lr 1.4818e-03 eta 0:12:46 +epoch [19/50] batch [20/31] time 0.752 (0.769) data 0.000 (0.043) loss 1.3301 (1.3453) acc 62.5000 (66.5625) lr 1.4818e-03 eta 0:12:27 +epoch [19/50] batch [25/31] time 0.725 (0.760) data 0.000 (0.035) loss 1.3750 (1.3700) acc 62.5000 (66.2500) lr 1.4818e-03 eta 0:12:15 +epoch [19/50] batch [30/31] time 0.712 (0.758) data 0.000 (0.029) loss 1.0889 (1.3960) acc 68.7500 (65.9375) lr 1.4818e-03 eta 0:12:09 +epoch [20/50] batch [5/31] time 0.733 (0.884) data 0.000 (0.147) loss 1.4160 (1.3246) acc 62.5000 (68.7500) lr 1.4258e-03 eta 0:14:04 +epoch [20/50] batch [10/31] time 0.753 (0.808) data 0.000 (0.074) loss 1.6260 (1.4948) acc 56.2500 (62.8125) lr 1.4258e-03 eta 0:12:48 +epoch [20/50] batch [15/31] time 0.734 (0.783) data 0.000 (0.049) loss 1.3691 (1.6217) acc 62.5000 (62.7083) lr 1.4258e-03 eta 0:12:21 +epoch [20/50] batch [20/31] time 0.729 (0.769) data 0.000 (0.037) loss 1.8330 (1.6414) acc 56.2500 (62.3438) lr 1.4258e-03 eta 0:12:03 +epoch [20/50] batch [25/31] time 0.740 (0.761) data 0.000 (0.030) loss 1.3975 (1.6248) acc 65.6250 (62.1250) lr 1.4258e-03 eta 0:11:52 +epoch [20/50] batch [30/31] time 0.727 (0.755) data 0.000 (0.025) loss 1.7715 (1.5932) acc 62.5000 (62.5000) lr 1.4258e-03 eta 0:11:43 +epoch [21/50] batch [5/31] time 0.720 (0.889) data 0.001 (0.154) loss 1.5527 (1.5123) acc 56.2500 (65.6250) lr 1.3681e-03 eta 0:13:42 +epoch [21/50] batch [10/31] time 0.728 (0.809) data 0.000 (0.077) loss 1.0088 (1.5328) acc 81.2500 (65.3125) lr 1.3681e-03 eta 0:12:23 +epoch [21/50] batch [15/31] time 0.725 (0.780) data 0.000 (0.051) loss 1.2188 (1.4622) acc 71.8750 (67.0833) lr 1.3681e-03 eta 0:11:53 +epoch [21/50] batch [20/31] time 0.746 (0.770) data 0.000 (0.039) loss 2.4316 (1.4705) acc 40.6250 (67.0312) lr 1.3681e-03 eta 0:11:40 +epoch [21/50] batch [25/31] time 0.726 (0.763) data 0.000 (0.031) loss 1.7148 (1.4802) acc 71.8750 (66.3750) lr 1.3681e-03 eta 0:11:30 +epoch [21/50] batch [30/31] time 0.734 (0.764) data 0.000 (0.026) loss 1.1084 (1.4741) acc 75.0000 (66.1458) lr 1.3681e-03 eta 0:11:27 +epoch [22/50] batch [5/31] time 0.726 (0.879) data 0.000 (0.147) loss 1.0625 (1.4839) acc 75.0000 (68.7500) lr 1.3090e-03 eta 0:13:06 +epoch [22/50] batch [10/31] time 0.725 (0.802) data 0.000 (0.074) loss 1.7617 (1.5793) acc 62.5000 (66.8750) lr 1.3090e-03 eta 0:11:52 +epoch [22/50] batch [15/31] time 0.753 (0.778) data 0.001 (0.049) loss 0.9688 (1.4642) acc 78.1250 (67.9167) lr 1.3090e-03 eta 0:11:27 +epoch [22/50] batch [20/31] time 0.729 (0.764) data 0.000 (0.037) loss 1.6582 (1.4519) acc 59.3750 (67.3438) lr 1.3090e-03 eta 0:11:11 +epoch [22/50] batch [25/31] time 0.739 (0.756) data 0.000 (0.030) loss 1.1689 (1.4597) acc 71.8750 (66.8750) lr 1.3090e-03 eta 0:11:00 +epoch [22/50] batch [30/31] time 0.713 (0.751) data 0.000 (0.025) loss 1.0127 (1.4449) acc 81.2500 (67.0833) lr 1.3090e-03 eta 0:10:52 +epoch [23/50] batch [5/31] time 0.724 (0.892) data 0.001 (0.143) loss 1.2236 (1.3875) acc 71.8750 (68.7500) lr 1.2487e-03 eta 0:12:49 +epoch [23/50] batch [10/31] time 0.722 (0.809) data 0.000 (0.072) loss 1.2529 (1.3554) acc 75.0000 (68.4375) lr 1.2487e-03 eta 0:11:34 +epoch [23/50] batch [15/31] time 0.720 (0.783) data 0.004 (0.048) loss 1.7168 (1.4024) acc 62.5000 (67.5000) lr 1.2487e-03 eta 0:11:07 +epoch [23/50] batch [20/31] time 0.718 (0.768) data 0.000 (0.036) loss 1.8262 (1.4531) acc 65.6250 (67.1875) lr 1.2487e-03 eta 0:10:50 +epoch [23/50] batch [25/31] time 0.763 (0.764) data 0.001 (0.029) loss 1.0850 (1.4063) acc 78.1250 (67.5000) lr 1.2487e-03 eta 0:10:44 +epoch [23/50] batch [30/31] time 0.735 (0.759) data 0.001 (0.024) loss 1.3652 (1.3923) acc 65.6250 (66.6667) lr 1.2487e-03 eta 0:10:36 +epoch [24/50] batch [5/31] time 0.732 (0.899) data 0.001 (0.147) loss 1.2998 (1.3289) acc 62.5000 (68.1250) lr 1.1874e-03 eta 0:12:27 +epoch [24/50] batch [10/31] time 0.713 (0.817) data 0.000 (0.074) loss 1.3398 (1.3853) acc 62.5000 (66.2500) lr 1.1874e-03 eta 0:11:15 +epoch [24/50] batch [15/31] time 0.734 (0.790) data 0.001 (0.049) loss 0.7861 (1.3559) acc 84.3750 (67.7083) lr 1.1874e-03 eta 0:10:49 +epoch [24/50] batch [20/31] time 0.721 (0.774) data 0.000 (0.037) loss 0.9482 (1.3940) acc 68.7500 (65.9375) lr 1.1874e-03 eta 0:10:32 +epoch [24/50] batch [25/31] time 0.753 (0.766) data 0.000 (0.030) loss 1.8340 (1.4175) acc 65.6250 (66.6250) lr 1.1874e-03 eta 0:10:21 +epoch [24/50] batch [30/31] time 0.727 (0.760) data 0.000 (0.025) loss 0.9624 (1.3974) acc 68.7500 (66.2500) lr 1.1874e-03 eta 0:10:13 +epoch [25/50] batch [5/31] time 0.710 (0.901) data 0.000 (0.146) loss 2.2207 (1.4187) acc 46.8750 (64.3750) lr 1.1253e-03 eta 0:12:01 +epoch [25/50] batch [10/31] time 0.752 (0.818) data 0.000 (0.073) loss 0.9526 (1.2893) acc 81.2500 (68.4375) lr 1.1253e-03 eta 0:10:51 +epoch [25/50] batch [15/31] time 0.728 (0.801) data 0.001 (0.049) loss 1.6387 (1.3745) acc 68.7500 (66.6667) lr 1.1253e-03 eta 0:10:33 +epoch [25/50] batch [20/31] time 0.718 (0.780) data 0.000 (0.037) loss 0.9888 (1.3451) acc 84.3750 (67.0312) lr 1.1253e-03 eta 0:10:13 +epoch [25/50] batch [25/31] time 0.722 (0.769) data 0.000 (0.030) loss 0.8237 (1.3395) acc 71.8750 (66.6250) lr 1.1253e-03 eta 0:10:00 +epoch [25/50] batch [30/31] time 0.750 (0.761) data 0.000 (0.025) loss 1.8779 (1.4132) acc 53.1250 (65.1042) lr 1.1253e-03 eta 0:09:50 +epoch [26/50] batch [5/31] time 0.746 (0.881) data 0.000 (0.131) loss 1.5215 (1.2847) acc 65.6250 (71.8750) lr 1.0628e-03 eta 0:11:18 +epoch [26/50] batch [10/31] time 0.715 (0.802) data 0.000 (0.066) loss 1.9609 (1.4096) acc 46.8750 (66.8750) lr 1.0628e-03 eta 0:10:13 +epoch [26/50] batch [15/31] time 0.714 (0.773) data 0.000 (0.044) loss 1.6836 (1.4118) acc 71.8750 (66.6667) lr 1.0628e-03 eta 0:09:47 +epoch [26/50] batch [20/31] time 0.719 (0.763) data 0.001 (0.033) loss 1.2324 (1.3352) acc 59.3750 (67.6562) lr 1.0628e-03 eta 0:09:36 +epoch [26/50] batch [25/31] time 0.718 (0.756) data 0.000 (0.026) loss 1.2383 (1.3669) acc 71.8750 (67.3750) lr 1.0628e-03 eta 0:09:27 +epoch [26/50] batch [30/31] time 0.717 (0.752) data 0.000 (0.022) loss 1.5107 (1.3652) acc 53.1250 (67.2917) lr 1.0628e-03 eta 0:09:20 +epoch [27/50] batch [5/31] time 0.724 (0.865) data 0.001 (0.134) loss 1.4102 (1.2437) acc 59.3750 (68.1250) lr 1.0000e-03 eta 0:10:39 +epoch [27/50] batch [10/31] time 0.738 (0.795) data 0.001 (0.067) loss 1.0693 (1.1996) acc 78.1250 (70.9375) lr 1.0000e-03 eta 0:09:43 +epoch [27/50] batch [15/31] time 0.715 (0.772) data 0.001 (0.045) loss 1.5586 (1.1910) acc 53.1250 (70.6250) lr 1.0000e-03 eta 0:09:22 +epoch [27/50] batch [20/31] time 0.704 (0.761) data 0.000 (0.034) loss 1.1953 (1.1537) acc 75.0000 (71.7188) lr 1.0000e-03 eta 0:09:10 +epoch [27/50] batch [25/31] time 0.730 (0.753) data 0.000 (0.027) loss 1.2725 (1.1979) acc 62.5000 (70.7500) lr 1.0000e-03 eta 0:09:01 +epoch [27/50] batch [30/31] time 0.741 (0.751) data 0.000 (0.023) loss 1.0117 (1.2102) acc 75.0000 (70.9375) lr 1.0000e-03 eta 0:08:56 +epoch [28/50] batch [5/31] time 0.733 (0.939) data 0.000 (0.172) loss 1.2959 (1.3891) acc 71.8750 (64.3750) lr 9.3721e-04 eta 0:11:04 +epoch [28/50] batch [10/31] time 0.737 (0.839) data 0.001 (0.087) loss 0.8647 (1.2662) acc 78.1250 (69.0625) lr 9.3721e-04 eta 0:09:49 +epoch [28/50] batch [15/31] time 0.794 (0.807) data 0.000 (0.058) loss 1.1553 (1.2290) acc 71.8750 (70.0000) lr 9.3721e-04 eta 0:09:23 +epoch [28/50] batch [20/31] time 0.754 (0.790) data 0.001 (0.044) loss 1.5342 (1.2783) acc 62.5000 (68.5938) lr 9.3721e-04 eta 0:09:07 +epoch [28/50] batch [25/31] time 0.723 (0.778) data 0.000 (0.035) loss 1.3438 (1.3143) acc 59.3750 (67.7500) lr 9.3721e-04 eta 0:08:55 +epoch [28/50] batch [30/31] time 0.703 (0.768) data 0.000 (0.029) loss 1.4434 (1.2931) acc 71.8750 (68.9583) lr 9.3721e-04 eta 0:08:44 +epoch [29/50] batch [5/31] time 0.724 (0.866) data 0.000 (0.132) loss 1.2393 (1.4936) acc 78.1250 (66.2500) lr 8.7467e-04 eta 0:09:46 +epoch [29/50] batch [10/31] time 0.722 (0.793) data 0.001 (0.067) loss 1.2705 (1.4078) acc 68.7500 (67.8125) lr 8.7467e-04 eta 0:08:52 +epoch [29/50] batch [15/31] time 0.723 (0.768) data 0.000 (0.045) loss 2.0625 (1.4778) acc 62.5000 (66.6667) lr 8.7467e-04 eta 0:08:32 +epoch [29/50] batch [20/31] time 0.728 (0.757) data 0.000 (0.033) loss 1.3076 (1.4886) acc 59.3750 (67.5000) lr 8.7467e-04 eta 0:08:21 +epoch [29/50] batch [25/31] time 0.717 (0.755) data 0.000 (0.027) loss 1.2295 (1.4563) acc 68.7500 (68.1250) lr 8.7467e-04 eta 0:08:16 +epoch [29/50] batch [30/31] time 0.748 (0.751) data 0.000 (0.022) loss 1.4727 (1.4241) acc 68.7500 (68.2292) lr 8.7467e-04 eta 0:08:09 +epoch [30/50] batch [5/31] time 0.757 (0.903) data 0.001 (0.166) loss 1.5537 (1.3184) acc 56.2500 (66.8750) lr 8.1262e-04 eta 0:09:43 +epoch [30/50] batch [10/31] time 0.730 (0.815) data 0.000 (0.083) loss 1.1963 (1.2679) acc 71.8750 (66.8750) lr 8.1262e-04 eta 0:08:42 +epoch [30/50] batch [15/31] time 0.721 (0.785) data 0.001 (0.056) loss 1.1396 (1.1899) acc 68.7500 (68.9583) lr 8.1262e-04 eta 0:08:19 +epoch [30/50] batch [20/31] time 0.732 (0.770) data 0.000 (0.042) loss 1.2275 (1.2145) acc 75.0000 (69.0625) lr 8.1262e-04 eta 0:08:06 +epoch [30/50] batch [25/31] time 0.741 (0.761) data 0.001 (0.034) loss 2.1582 (1.2415) acc 62.5000 (68.7500) lr 8.1262e-04 eta 0:07:56 +epoch [30/50] batch [30/31] time 0.715 (0.755) data 0.000 (0.028) loss 0.9077 (1.2238) acc 78.1250 (68.9583) lr 8.1262e-04 eta 0:07:49 +epoch [31/50] batch [5/31] time 0.722 (0.882) data 0.000 (0.139) loss 1.2256 (1.1341) acc 75.0000 (74.3750) lr 7.5131e-04 eta 0:09:02 +epoch [31/50] batch [10/31] time 0.731 (0.805) data 0.000 (0.070) loss 0.8032 (1.0799) acc 84.3750 (74.0625) lr 7.5131e-04 eta 0:08:10 +epoch [31/50] batch [15/31] time 0.741 (0.779) data 0.001 (0.047) loss 1.4561 (1.1935) acc 62.5000 (71.8750) lr 7.5131e-04 eta 0:07:51 +epoch [31/50] batch [20/31] time 0.738 (0.772) data 0.000 (0.035) loss 0.9302 (1.2114) acc 78.1250 (70.9375) lr 7.5131e-04 eta 0:07:42 +epoch [31/50] batch [25/31] time 0.722 (0.762) data 0.000 (0.028) loss 0.9761 (1.2057) acc 81.2500 (71.6250) lr 7.5131e-04 eta 0:07:33 +epoch [31/50] batch [30/31] time 0.729 (0.755) data 0.000 (0.023) loss 0.8345 (1.2181) acc 84.3750 (71.5625) lr 7.5131e-04 eta 0:07:25 +epoch [32/50] batch [5/31] time 0.742 (0.888) data 0.001 (0.142) loss 2.3008 (1.5537) acc 71.8750 (68.1250) lr 6.9098e-04 eta 0:08:38 +epoch [32/50] batch [10/31] time 0.730 (0.806) data 0.001 (0.072) loss 0.7983 (1.3414) acc 81.2500 (69.0625) lr 6.9098e-04 eta 0:07:46 +epoch [32/50] batch [15/31] time 0.715 (0.778) data 0.000 (0.048) loss 1.5469 (1.3699) acc 65.6250 (68.9583) lr 6.9098e-04 eta 0:07:26 +epoch [32/50] batch [20/31] time 0.727 (0.766) data 0.000 (0.036) loss 1.2832 (1.3158) acc 68.7500 (70.0000) lr 6.9098e-04 eta 0:07:15 +epoch [32/50] batch [25/31] time 0.723 (0.759) data 0.001 (0.029) loss 0.8823 (1.2877) acc 75.0000 (70.1250) lr 6.9098e-04 eta 0:07:07 +epoch [32/50] batch [30/31] time 0.718 (0.752) data 0.000 (0.024) loss 1.4033 (1.3222) acc 56.2500 (68.6458) lr 6.9098e-04 eta 0:07:00 +epoch [33/50] batch [5/31] time 0.732 (0.874) data 0.000 (0.135) loss 1.6221 (1.3443) acc 62.5000 (68.7500) lr 6.3188e-04 eta 0:08:03 +epoch [33/50] batch [10/31] time 0.743 (0.800) data 0.001 (0.068) loss 1.0078 (1.2753) acc 71.8750 (69.0625) lr 6.3188e-04 eta 0:07:18 +epoch [33/50] batch [15/31] time 0.713 (0.773) data 0.000 (0.045) loss 0.9331 (1.2911) acc 78.1250 (69.1667) lr 6.3188e-04 eta 0:06:59 +epoch [33/50] batch [20/31] time 0.721 (0.760) data 0.000 (0.034) loss 1.2334 (1.3088) acc 68.7500 (69.8438) lr 6.3188e-04 eta 0:06:48 +epoch [33/50] batch [25/31] time 0.732 (0.754) data 0.002 (0.027) loss 2.3867 (1.3855) acc 50.0000 (68.5000) lr 6.3188e-04 eta 0:06:42 +epoch [33/50] batch [30/31] time 0.743 (0.749) data 0.000 (0.023) loss 1.3281 (1.3713) acc 62.5000 (68.3333) lr 6.3188e-04 eta 0:06:35 +epoch [34/50] batch [5/31] time 0.725 (0.873) data 0.000 (0.133) loss 1.2852 (1.1925) acc 75.0000 (76.8750) lr 5.7422e-04 eta 0:07:35 +epoch [34/50] batch [10/31] time 0.738 (0.816) data 0.000 (0.067) loss 1.3896 (1.2432) acc 71.8750 (71.8750) lr 5.7422e-04 eta 0:07:01 +epoch [34/50] batch [15/31] time 0.718 (0.786) data 0.000 (0.045) loss 0.8438 (1.2144) acc 87.5000 (72.9167) lr 5.7422e-04 eta 0:06:42 +epoch [34/50] batch [20/31] time 0.716 (0.768) data 0.000 (0.034) loss 1.0449 (1.1285) acc 78.1250 (75.1562) lr 5.7422e-04 eta 0:06:29 +epoch [34/50] batch [25/31] time 0.719 (0.760) data 0.000 (0.027) loss 0.7979 (1.1430) acc 78.1250 (74.1250) lr 5.7422e-04 eta 0:06:21 +epoch [34/50] batch [30/31] time 0.718 (0.755) data 0.000 (0.022) loss 0.9004 (1.1304) acc 68.7500 (73.5417) lr 5.7422e-04 eta 0:06:15 +epoch [35/50] batch [5/31] time 0.730 (0.888) data 0.001 (0.147) loss 1.0664 (1.1676) acc 71.8750 (70.6250) lr 5.1825e-04 eta 0:07:15 +epoch [35/50] batch [10/31] time 0.737 (0.809) data 0.000 (0.074) loss 1.0811 (1.1312) acc 75.0000 (72.8125) lr 5.1825e-04 eta 0:06:33 +epoch [35/50] batch [15/31] time 0.727 (0.779) data 0.000 (0.049) loss 1.0439 (1.1299) acc 68.7500 (72.5000) lr 5.1825e-04 eta 0:06:14 +epoch [35/50] batch [20/31] time 0.735 (0.767) data 0.000 (0.037) loss 2.1270 (1.2122) acc 65.6250 (70.6250) lr 5.1825e-04 eta 0:06:05 +epoch [35/50] batch [25/31] time 0.715 (0.757) data 0.000 (0.030) loss 1.1777 (1.2107) acc 71.8750 (70.3750) lr 5.1825e-04 eta 0:05:56 +epoch [35/50] batch [30/31] time 0.707 (0.751) data 0.000 (0.025) loss 0.8657 (1.1820) acc 84.3750 (71.1458) lr 5.1825e-04 eta 0:05:49 +epoch [36/50] batch [5/31] time 0.716 (0.879) data 0.000 (0.140) loss 1.0488 (1.2559) acc 75.0000 (68.1250) lr 4.6417e-04 eta 0:06:44 +epoch [36/50] batch [10/31] time 0.726 (0.802) data 0.000 (0.070) loss 0.8877 (1.1361) acc 68.7500 (70.6250) lr 4.6417e-04 eta 0:06:04 +epoch [36/50] batch [15/31] time 0.747 (0.777) data 0.000 (0.047) loss 1.0771 (1.1012) acc 75.0000 (71.8750) lr 4.6417e-04 eta 0:05:49 +epoch [36/50] batch [20/31] time 0.735 (0.771) data 0.000 (0.035) loss 1.5693 (1.1261) acc 62.5000 (70.9375) lr 4.6417e-04 eta 0:05:43 +epoch [36/50] batch [25/31] time 0.727 (0.762) data 0.000 (0.028) loss 1.8057 (1.1817) acc 59.3750 (71.0000) lr 4.6417e-04 eta 0:05:35 +epoch [36/50] batch [30/31] time 0.722 (0.756) data 0.000 (0.024) loss 1.0264 (1.2024) acc 78.1250 (70.4167) lr 4.6417e-04 eta 0:05:28 +epoch [37/50] batch [5/31] time 0.729 (0.884) data 0.002 (0.149) loss 1.0107 (1.1337) acc 65.6250 (69.3750) lr 4.1221e-04 eta 0:06:19 +epoch [37/50] batch [10/31] time 0.718 (0.803) data 0.001 (0.075) loss 0.4521 (1.0836) acc 96.8750 (74.3750) lr 4.1221e-04 eta 0:05:40 +epoch [37/50] batch [15/31] time 0.739 (0.779) data 0.000 (0.050) loss 0.4690 (1.0958) acc 87.5000 (74.5833) lr 4.1221e-04 eta 0:05:26 +epoch [37/50] batch [20/31] time 0.704 (0.766) data 0.000 (0.038) loss 1.0850 (1.0876) acc 78.1250 (75.0000) lr 4.1221e-04 eta 0:05:17 +epoch [37/50] batch [25/31] time 0.720 (0.756) data 0.000 (0.030) loss 1.5088 (1.1106) acc 65.6250 (73.5000) lr 4.1221e-04 eta 0:05:09 +epoch [37/50] batch [30/31] time 0.727 (0.750) data 0.000 (0.025) loss 1.3252 (1.1237) acc 65.6250 (73.2292) lr 4.1221e-04 eta 0:05:03 +epoch [38/50] batch [5/31] time 0.777 (0.884) data 0.001 (0.130) loss 1.0127 (1.2063) acc 75.0000 (72.5000) lr 3.6258e-04 eta 0:05:51 +epoch [38/50] batch [10/31] time 0.732 (0.807) data 0.001 (0.065) loss 1.9443 (1.3479) acc 68.7500 (70.9375) lr 3.6258e-04 eta 0:05:17 +epoch [38/50] batch [15/31] time 0.739 (0.781) data 0.001 (0.044) loss 1.1787 (1.3493) acc 68.7500 (71.2500) lr 3.6258e-04 eta 0:05:03 +epoch [38/50] batch [20/31] time 0.711 (0.769) data 0.000 (0.033) loss 1.0098 (1.2838) acc 81.2500 (72.3438) lr 3.6258e-04 eta 0:04:54 +epoch [38/50] batch [25/31] time 0.715 (0.760) data 0.000 (0.026) loss 1.5742 (1.2915) acc 71.8750 (72.2500) lr 3.6258e-04 eta 0:04:47 +epoch [38/50] batch [30/31] time 0.718 (0.753) data 0.000 (0.022) loss 1.7168 (1.2972) acc 62.5000 (72.1875) lr 3.6258e-04 eta 0:04:40 +epoch [39/50] batch [5/31] time 0.735 (0.902) data 0.001 (0.148) loss 0.9028 (0.9735) acc 87.5000 (80.0000) lr 3.1545e-04 eta 0:05:31 +epoch [39/50] batch [10/31] time 0.730 (0.818) data 0.000 (0.074) loss 0.9497 (1.1544) acc 75.0000 (74.3750) lr 3.1545e-04 eta 0:04:56 +epoch [39/50] batch [15/31] time 0.753 (0.805) data 0.001 (0.050) loss 0.9351 (1.1617) acc 81.2500 (73.7500) lr 3.1545e-04 eta 0:04:47 +epoch [39/50] batch [20/31] time 0.725 (0.784) data 0.000 (0.037) loss 1.2432 (1.1104) acc 75.0000 (74.8438) lr 3.1545e-04 eta 0:04:36 +epoch [39/50] batch [25/31] time 0.727 (0.772) data 0.000 (0.030) loss 1.0293 (1.1575) acc 75.0000 (74.3750) lr 3.1545e-04 eta 0:04:27 +epoch [39/50] batch [30/31] time 0.726 (0.764) data 0.000 (0.025) loss 0.9058 (1.1349) acc 75.0000 (74.4792) lr 3.1545e-04 eta 0:04:21 +epoch [40/50] batch [5/31] time 0.738 (0.896) data 0.000 (0.152) loss 1.1230 (1.2922) acc 75.0000 (71.8750) lr 2.7103e-04 eta 0:05:01 +epoch [40/50] batch [10/31] time 0.751 (0.816) data 0.000 (0.076) loss 1.3340 (1.0914) acc 81.2500 (75.3125) lr 2.7103e-04 eta 0:04:30 +epoch [40/50] batch [15/31] time 0.739 (0.790) data 0.000 (0.051) loss 1.4502 (1.1591) acc 71.8750 (73.3333) lr 2.7103e-04 eta 0:04:17 +epoch [40/50] batch [20/31] time 0.717 (0.773) data 0.000 (0.038) loss 1.1797 (1.1588) acc 75.0000 (73.5938) lr 2.7103e-04 eta 0:04:08 +epoch [40/50] batch [25/31] time 0.782 (0.769) data 0.000 (0.031) loss 1.0732 (1.1318) acc 65.6250 (73.3750) lr 2.7103e-04 eta 0:04:02 +epoch [40/50] batch [30/31] time 0.733 (0.762) data 0.000 (0.026) loss 0.9785 (1.1510) acc 78.1250 (72.5000) lr 2.7103e-04 eta 0:03:56 +epoch [41/50] batch [5/31] time 0.722 (0.929) data 0.001 (0.154) loss 0.9302 (1.0750) acc 81.2500 (76.2500) lr 2.2949e-04 eta 0:04:43 +epoch [41/50] batch [10/31] time 0.732 (0.834) data 0.001 (0.077) loss 0.9922 (1.0893) acc 71.8750 (75.3125) lr 2.2949e-04 eta 0:04:10 +epoch [41/50] batch [15/31] time 0.723 (0.797) data 0.000 (0.052) loss 0.6318 (1.0942) acc 81.2500 (75.0000) lr 2.2949e-04 eta 0:03:55 +epoch [41/50] batch [20/31] time 0.723 (0.779) data 0.000 (0.039) loss 1.5518 (1.1214) acc 56.2500 (74.3750) lr 2.2949e-04 eta 0:03:45 +epoch [41/50] batch [25/31] time 0.725 (0.767) data 0.000 (0.031) loss 1.2510 (1.1181) acc 75.0000 (74.6250) lr 2.2949e-04 eta 0:03:38 +epoch [41/50] batch [30/31] time 0.712 (0.758) data 0.000 (0.026) loss 1.6875 (1.1575) acc 56.2500 (74.0625) lr 2.2949e-04 eta 0:03:32 +epoch [42/50] batch [5/31] time 0.741 (0.878) data 0.000 (0.142) loss 0.9155 (1.2512) acc 81.2500 (73.1250) lr 1.9098e-04 eta 0:04:00 +epoch [42/50] batch [10/31] time 0.739 (0.808) data 0.000 (0.071) loss 0.8833 (1.2255) acc 75.0000 (72.5000) lr 1.9098e-04 eta 0:03:37 +epoch [42/50] batch [15/31] time 0.709 (0.780) data 0.000 (0.047) loss 1.3262 (1.2412) acc 71.8750 (72.5000) lr 1.9098e-04 eta 0:03:25 +epoch [42/50] batch [20/31] time 0.728 (0.767) data 0.000 (0.036) loss 1.0449 (1.1684) acc 78.1250 (73.7500) lr 1.9098e-04 eta 0:03:18 +epoch [42/50] batch [25/31] time 0.715 (0.756) data 0.000 (0.029) loss 1.1914 (1.1492) acc 68.7500 (73.8750) lr 1.9098e-04 eta 0:03:12 +epoch [42/50] batch [30/31] time 0.734 (0.752) data 0.000 (0.024) loss 1.3770 (1.1182) acc 65.6250 (74.1667) lr 1.9098e-04 eta 0:03:07 +epoch [43/50] batch [5/31] time 0.716 (0.877) data 0.000 (0.144) loss 0.6709 (1.2892) acc 84.3750 (70.0000) lr 1.5567e-04 eta 0:03:33 +epoch [43/50] batch [10/31] time 0.720 (0.815) data 0.000 (0.072) loss 0.9087 (1.2473) acc 78.1250 (70.3125) lr 1.5567e-04 eta 0:03:13 +epoch [43/50] batch [15/31] time 0.710 (0.784) data 0.000 (0.048) loss 0.9614 (1.1530) acc 78.1250 (72.2917) lr 1.5567e-04 eta 0:03:02 +epoch [43/50] batch [20/31] time 0.725 (0.767) data 0.000 (0.036) loss 0.8843 (1.1657) acc 68.7500 (71.7188) lr 1.5567e-04 eta 0:02:54 +epoch [43/50] batch [25/31] time 0.715 (0.757) data 0.000 (0.029) loss 1.2881 (1.2128) acc 81.2500 (70.3750) lr 1.5567e-04 eta 0:02:48 +epoch [43/50] batch [30/31] time 0.713 (0.751) data 0.000 (0.024) loss 1.0928 (1.2049) acc 84.3750 (71.5625) lr 1.5567e-04 eta 0:02:43 +epoch [44/50] batch [5/31] time 0.758 (0.868) data 0.000 (0.130) loss 0.9248 (1.1054) acc 75.0000 (75.0000) lr 1.2369e-04 eta 0:03:03 +epoch [44/50] batch [10/31] time 0.725 (0.799) data 0.001 (0.065) loss 0.9395 (1.0227) acc 81.2500 (76.5625) lr 1.2369e-04 eta 0:02:45 +epoch [44/50] batch [15/31] time 0.716 (0.775) data 0.000 (0.044) loss 0.9077 (1.0438) acc 78.1250 (76.0417) lr 1.2369e-04 eta 0:02:36 +epoch [44/50] batch [20/31] time 0.712 (0.761) data 0.000 (0.033) loss 0.9248 (1.0976) acc 71.8750 (74.5312) lr 1.2369e-04 eta 0:02:29 +epoch [44/50] batch [25/31] time 0.709 (0.752) data 0.000 (0.026) loss 2.4297 (1.1417) acc 53.1250 (73.5000) lr 1.2369e-04 eta 0:02:24 +epoch [44/50] batch [30/31] time 0.712 (0.747) data 0.000 (0.022) loss 1.1543 (1.1745) acc 71.8750 (72.6042) lr 1.2369e-04 eta 0:02:19 +epoch [45/50] batch [5/31] time 0.709 (0.858) data 0.000 (0.131) loss 0.8179 (0.9809) acc 75.0000 (75.6250) lr 9.5173e-05 eta 0:02:35 +epoch [45/50] batch [10/31] time 0.728 (0.797) data 0.000 (0.066) loss 1.5811 (1.0124) acc 65.6250 (74.0625) lr 9.5173e-05 eta 0:02:20 +epoch [45/50] batch [15/31] time 0.740 (0.773) data 0.000 (0.044) loss 0.8193 (0.9762) acc 75.0000 (75.6250) lr 9.5173e-05 eta 0:02:12 +epoch [45/50] batch [20/31] time 0.732 (0.761) data 0.000 (0.033) loss 1.0312 (1.0317) acc 75.0000 (75.4688) lr 9.5173e-05 eta 0:02:06 +epoch [45/50] batch [25/31] time 0.705 (0.752) data 0.000 (0.026) loss 1.7295 (1.0725) acc 68.7500 (75.3750) lr 9.5173e-05 eta 0:02:01 +epoch [45/50] batch [30/31] time 0.706 (0.747) data 0.000 (0.022) loss 1.3398 (1.1198) acc 75.0000 (75.4167) lr 9.5173e-05 eta 0:01:56 +epoch [46/50] batch [5/31] time 0.724 (0.872) data 0.000 (0.138) loss 1.5244 (1.2507) acc 62.5000 (68.7500) lr 7.0224e-05 eta 0:02:10 +epoch [46/50] batch [10/31] time 0.733 (0.798) data 0.001 (0.069) loss 0.6855 (1.0826) acc 84.3750 (73.4375) lr 7.0224e-05 eta 0:01:55 +epoch [46/50] batch [15/31] time 0.723 (0.773) data 0.000 (0.046) loss 0.8613 (1.0256) acc 78.1250 (74.7917) lr 7.0224e-05 eta 0:01:48 +epoch [46/50] batch [20/31] time 0.751 (0.761) data 0.000 (0.035) loss 0.8389 (0.9915) acc 81.2500 (75.3125) lr 7.0224e-05 eta 0:01:42 +epoch [46/50] batch [25/31] time 0.721 (0.751) data 0.000 (0.028) loss 1.0332 (1.0185) acc 78.1250 (75.2500) lr 7.0224e-05 eta 0:01:37 +epoch [46/50] batch [30/31] time 0.721 (0.747) data 0.000 (0.023) loss 1.5195 (1.0605) acc 62.5000 (74.6875) lr 7.0224e-05 eta 0:01:33 +epoch [47/50] batch [5/31] time 0.729 (0.910) data 0.001 (0.172) loss 0.7959 (1.1036) acc 81.2500 (73.7500) lr 4.8943e-05 eta 0:01:48 +epoch [47/50] batch [10/31] time 0.716 (0.818) data 0.000 (0.086) loss 2.3203 (1.1669) acc 65.6250 (74.0625) lr 4.8943e-05 eta 0:01:33 +epoch [47/50] batch [15/31] time 0.763 (0.788) data 0.001 (0.058) loss 1.0830 (1.1063) acc 65.6250 (73.7500) lr 4.8943e-05 eta 0:01:25 +epoch [47/50] batch [20/31] time 0.726 (0.774) data 0.000 (0.043) loss 0.7446 (1.1035) acc 78.1250 (74.8438) lr 4.8943e-05 eta 0:01:20 +epoch [47/50] batch [25/31] time 0.720 (0.764) data 0.000 (0.035) loss 0.7915 (1.0645) acc 71.8750 (74.8750) lr 4.8943e-05 eta 0:01:15 +epoch [47/50] batch [30/31] time 0.749 (0.759) data 0.000 (0.029) loss 1.1504 (1.0510) acc 75.0000 (75.3125) lr 4.8943e-05 eta 0:01:11 +epoch [48/50] batch [5/31] time 0.716 (0.865) data 0.000 (0.132) loss 0.7554 (1.2302) acc 81.2500 (71.8750) lr 3.1417e-05 eta 0:01:16 +epoch [48/50] batch [10/31] time 0.719 (0.793) data 0.000 (0.066) loss 0.7261 (1.0709) acc 75.0000 (75.9375) lr 3.1417e-05 eta 0:01:05 +epoch [48/50] batch [15/31] time 0.720 (0.770) data 0.000 (0.044) loss 0.8545 (1.0399) acc 81.2500 (76.4583) lr 3.1417e-05 eta 0:01:00 +epoch [48/50] batch [20/31] time 0.725 (0.758) data 0.000 (0.033) loss 1.1396 (1.0664) acc 81.2500 (76.2500) lr 3.1417e-05 eta 0:00:55 +epoch [48/50] batch [25/31] time 0.740 (0.751) data 0.000 (0.027) loss 0.7456 (1.0921) acc 84.3750 (76.0000) lr 3.1417e-05 eta 0:00:51 +epoch [48/50] batch [30/31] time 0.716 (0.748) data 0.000 (0.022) loss 1.1191 (1.0939) acc 75.0000 (75.7292) lr 3.1417e-05 eta 0:00:47 +epoch [49/50] batch [5/31] time 0.727 (0.886) data 0.000 (0.153) loss 0.5942 (1.0597) acc 90.6250 (74.3750) lr 1.7713e-05 eta 0:00:50 +epoch [49/50] batch [10/31] time 0.715 (0.819) data 0.000 (0.076) loss 1.3730 (1.0991) acc 71.8750 (72.8125) lr 1.7713e-05 eta 0:00:42 +epoch [49/50] batch [15/31] time 0.716 (0.793) data 0.000 (0.051) loss 0.9316 (1.1173) acc 81.2500 (72.7083) lr 1.7713e-05 eta 0:00:37 +epoch [49/50] batch [20/31] time 0.728 (0.776) data 0.000 (0.038) loss 1.1768 (1.1434) acc 81.2500 (73.4375) lr 1.7713e-05 eta 0:00:32 +epoch [49/50] batch [25/31] time 0.745 (0.768) data 0.000 (0.031) loss 1.1123 (1.1660) acc 59.3750 (72.1250) lr 1.7713e-05 eta 0:00:28 +epoch [49/50] batch [30/31] time 0.717 (0.761) data 0.000 (0.026) loss 0.8979 (1.1324) acc 75.0000 (72.9167) lr 1.7713e-05 eta 0:00:24 +epoch [50/50] batch [5/31] time 0.735 (0.872) data 0.000 (0.134) loss 1.5400 (1.2837) acc 68.7500 (71.2500) lr 7.8853e-06 eta 0:00:22 +epoch [50/50] batch [10/31] time 0.738 (0.802) data 0.000 (0.067) loss 0.8896 (1.1411) acc 87.5000 (74.6875) lr 7.8853e-06 eta 0:00:16 +epoch [50/50] batch [15/31] time 0.726 (0.778) data 0.000 (0.045) loss 1.1006 (1.1269) acc 75.0000 (73.7500) lr 7.8853e-06 eta 0:00:12 +epoch [50/50] batch [20/31] time 0.771 (0.767) data 0.000 (0.034) loss 1.2803 (1.1313) acc 81.2500 (73.9062) lr 7.8853e-06 eta 0:00:08 +epoch [50/50] batch [25/31] time 0.727 (0.758) data 0.000 (0.027) loss 1.4160 (1.1408) acc 75.0000 (74.2500) lr 7.8853e-06 eta 0:00:04 +epoch [50/50] batch [30/31] time 0.842 (0.756) data 0.000 (0.023) loss 1.6436 (1.1501) acc 53.1250 (73.9583) lr 7.8853e-06 eta 0:00:00 +Checkpoint saved to output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-50 +Finish training +Deploy the last-epoch model +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 29,372 +* accuracy: 58.7% +* error: 41.3% +* macro_f1: 57.8% +Elapsed: 0:22:25 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/checkpoint b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/checkpoint new file mode 100644 index 00000000..a9d493d3 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/checkpoint @@ -0,0 +1 @@ +model.pth.tar-50 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-50 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-50 new file mode 100644 index 00000000..9450c6f5 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-50 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1697831001.ckb-gpu-lambda.2099160.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1697831001.ckb-gpu-lambda.2099160.0 new file mode 100644 index 00000000..1192c925 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1697831001.ckb-gpu-lambda.2099160.0 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed3/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed3/log.txt new file mode 100644 index 00000000..e0173f50 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed3/log.txt @@ -0,0 +1,639 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_b32_ep50.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '1'] +output_dir: output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed3 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 3 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 1 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/32 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 50 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 8.4.0-3ubuntu2) 8.4.0 +Clang version: 10.0.0-4ubuntu1 +CMake version: version 3.23.2 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-113-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: Tesla V100-SXM2-32GB +GPU 1: Tesla V100-SXM2-32GB +GPU 2: Tesla V100-SXM2-32GB +GPU 3: Tesla V100-SXM2-32GB + +Nvidia driver version: 510.73.05 +cuDNN version: Probably one of the following: +/usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 +/usr/lib/x86_64-linux-gnu/libcudnn.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.4.1 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 46 bits physical, 48 bits virtual +CPU(s): 64 +On-line CPU(s) list: 0-63 +Thread(s) per core: 2 +Core(s) per socket: 16 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +CPU family: 6 +Model: 85 +Model name: Intel(R) Xeon(R) Gold 6242 CPU @ 2.80GHz +Stepping: 7 +CPU MHz: 1199.999 +CPU max MHz: 3900.0000 +CPU min MHz: 1200.0000 +BogoMIPS: 5600.00 +Virtualization: VT-x +L1d cache: 1 MiB +L1i cache: 1 MiB +L2 cache: 32 MiB +L3 cache: 44 MiB +NUMA node0 CPU(s): 0-15,32-47 +NUMA node1 CPU(s): 16-31,48-63 +Vulnerability Itlb multihit: KVM: Mitigation: Split huge pages +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Mitigation; TSX disabled +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cdp_l3 invpcid_single intel_ppin ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm mpx rdt_a avx512f avx512dq rdseed adx smap clflushopt clwb intel_pt avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts pku ospke avx512_vnni md_clear flush_l1d arch_capabilities + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Loading preprocessed few-shot data from /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_1-seed_3.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 1,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-B/32) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed3/tensorboard) +epoch [1/50] batch [5/31] time 0.724 (1.638) data 0.001 (0.182) loss 3.8047 (3.6379) acc 34.3750 (34.3750) lr 1.0000e-05 eta 0:42:10 +epoch [1/50] batch [10/31] time 0.739 (1.186) data 0.000 (0.091) loss 2.9688 (3.6266) acc 53.1250 (35.6250) lr 1.0000e-05 eta 0:30:26 +epoch [1/50] batch [15/31] time 0.741 (1.037) data 0.000 (0.061) loss 2.9219 (3.4673) acc 43.7500 (37.9167) lr 1.0000e-05 eta 0:26:31 +epoch [1/50] batch [20/31] time 0.741 (0.958) data 0.000 (0.046) loss 2.0703 (3.3063) acc 59.3750 (39.8438) lr 1.0000e-05 eta 0:24:26 +epoch [1/50] batch [25/31] time 0.742 (0.914) data 0.000 (0.037) loss 2.7500 (3.2285) acc 31.2500 (38.1250) lr 1.0000e-05 eta 0:23:14 +epoch [1/50] batch [30/31] time 0.741 (0.885) data 0.000 (0.031) loss 2.1719 (3.1133) acc 43.7500 (39.7917) lr 1.0000e-05 eta 0:22:25 +epoch [2/50] batch [5/31] time 0.725 (0.885) data 0.000 (0.154) loss 2.0625 (2.5371) acc 59.3750 (48.1250) lr 2.0000e-03 eta 0:22:19 +epoch [2/50] batch [10/31] time 0.746 (0.808) data 0.000 (0.077) loss 1.9395 (2.3554) acc 53.1250 (50.6250) lr 2.0000e-03 eta 0:20:18 +epoch [2/50] batch [15/31] time 0.720 (0.780) data 0.000 (0.051) loss 1.4062 (2.2290) acc 65.6250 (51.4583) lr 2.0000e-03 eta 0:19:33 +epoch [2/50] batch [20/31] time 0.720 (0.766) data 0.000 (0.039) loss 1.9258 (2.2071) acc 53.1250 (50.7812) lr 2.0000e-03 eta 0:19:08 +epoch [2/50] batch [25/31] time 0.725 (0.756) data 0.000 (0.031) loss 2.2695 (2.1543) acc 53.1250 (52.2500) lr 2.0000e-03 eta 0:18:49 +epoch [2/50] batch [30/31] time 0.739 (0.750) data 0.000 (0.026) loss 2.0996 (2.1356) acc 43.7500 (51.3542) lr 2.0000e-03 eta 0:18:37 +epoch [3/50] batch [5/31] time 0.711 (0.870) data 0.000 (0.142) loss 2.0098 (2.1279) acc 50.0000 (53.1250) lr 1.9980e-03 eta 0:21:29 +epoch [3/50] batch [10/31] time 0.745 (0.798) data 0.001 (0.071) loss 2.2578 (2.0594) acc 46.8750 (54.6875) lr 1.9980e-03 eta 0:19:38 +epoch [3/50] batch [15/31] time 0.718 (0.772) data 0.000 (0.048) loss 1.7539 (2.0196) acc 56.2500 (55.0000) lr 1.9980e-03 eta 0:18:56 +epoch [3/50] batch [20/31] time 0.714 (0.761) data 0.001 (0.036) loss 1.1533 (1.9502) acc 68.7500 (55.7812) lr 1.9980e-03 eta 0:18:36 +epoch [3/50] batch [25/31] time 0.719 (0.754) data 0.000 (0.029) loss 1.8496 (1.9234) acc 53.1250 (56.5000) lr 1.9980e-03 eta 0:18:22 +epoch [3/50] batch [30/31] time 0.731 (0.748) data 0.000 (0.024) loss 1.8340 (1.9156) acc 53.1250 (55.7292) lr 1.9980e-03 eta 0:18:11 +epoch [4/50] batch [5/31] time 0.720 (0.899) data 0.001 (0.149) loss 1.6562 (1.5801) acc 53.1250 (60.6250) lr 1.9921e-03 eta 0:21:45 +epoch [4/50] batch [10/31] time 0.736 (0.815) data 0.000 (0.075) loss 1.5469 (1.7316) acc 65.6250 (58.4375) lr 1.9921e-03 eta 0:19:38 +epoch [4/50] batch [15/31] time 0.730 (0.785) data 0.000 (0.050) loss 1.2969 (1.7867) acc 65.6250 (55.6250) lr 1.9921e-03 eta 0:18:51 +epoch [4/50] batch [20/31] time 0.722 (0.769) data 0.000 (0.038) loss 1.5811 (1.7468) acc 56.2500 (55.9375) lr 1.9921e-03 eta 0:18:25 +epoch [4/50] batch [25/31] time 0.730 (0.760) data 0.000 (0.030) loss 2.2422 (1.8128) acc 56.2500 (55.2500) lr 1.9921e-03 eta 0:18:07 +epoch [4/50] batch [30/31] time 0.736 (0.754) data 0.000 (0.025) loss 1.4375 (1.7640) acc 78.1250 (56.2500) lr 1.9921e-03 eta 0:17:55 +epoch [5/50] batch [5/31] time 0.728 (0.905) data 0.000 (0.166) loss 1.4658 (1.7898) acc 62.5000 (61.2500) lr 1.9823e-03 eta 0:21:26 +epoch [5/50] batch [10/31] time 0.741 (0.815) data 0.000 (0.083) loss 1.6992 (1.7464) acc 56.2500 (60.3125) lr 1.9823e-03 eta 0:19:13 +epoch [5/50] batch [15/31] time 0.711 (0.786) data 0.000 (0.055) loss 1.8320 (1.8018) acc 50.0000 (57.7083) lr 1.9823e-03 eta 0:18:29 +epoch [5/50] batch [20/31] time 0.711 (0.770) data 0.000 (0.042) loss 1.1455 (1.8159) acc 78.1250 (57.3438) lr 1.9823e-03 eta 0:18:03 +epoch [5/50] batch [25/31] time 0.716 (0.759) data 0.000 (0.033) loss 1.5684 (1.8089) acc 50.0000 (56.2500) lr 1.9823e-03 eta 0:17:43 +epoch [5/50] batch [30/31] time 0.723 (0.753) data 0.000 (0.028) loss 1.5957 (1.7737) acc 65.6250 (57.0833) lr 1.9823e-03 eta 0:17:31 +epoch [6/50] batch [5/31] time 0.722 (0.910) data 0.000 (0.173) loss 2.1797 (1.8949) acc 50.0000 (53.7500) lr 1.9686e-03 eta 0:21:04 +epoch [6/50] batch [10/31] time 0.715 (0.820) data 0.000 (0.087) loss 1.5176 (1.8247) acc 46.8750 (53.4375) lr 1.9686e-03 eta 0:18:55 +epoch [6/50] batch [15/31] time 0.737 (0.788) data 0.001 (0.058) loss 2.5957 (1.8934) acc 50.0000 (53.9583) lr 1.9686e-03 eta 0:18:07 +epoch [6/50] batch [20/31] time 0.720 (0.771) data 0.000 (0.044) loss 1.7568 (1.8487) acc 65.6250 (55.3125) lr 1.9686e-03 eta 0:17:40 +epoch [6/50] batch [25/31] time 0.713 (0.762) data 0.000 (0.035) loss 1.7275 (1.7846) acc 65.6250 (57.0000) lr 1.9686e-03 eta 0:17:24 +epoch [6/50] batch [30/31] time 0.712 (0.756) data 0.000 (0.029) loss 1.8555 (1.7599) acc 53.1250 (57.6042) lr 1.9686e-03 eta 0:17:11 +epoch [7/50] batch [5/31] time 0.718 (0.883) data 0.000 (0.143) loss 1.8506 (1.7391) acc 56.2500 (61.2500) lr 1.9511e-03 eta 0:20:00 +epoch [7/50] batch [10/31] time 0.736 (0.803) data 0.000 (0.072) loss 2.1523 (1.7735) acc 46.8750 (56.2500) lr 1.9511e-03 eta 0:18:07 +epoch [7/50] batch [15/31] time 0.723 (0.777) data 0.000 (0.048) loss 1.6973 (1.8325) acc 50.0000 (54.7917) lr 1.9511e-03 eta 0:17:27 +epoch [7/50] batch [20/31] time 0.708 (0.763) data 0.000 (0.036) loss 0.9883 (1.8048) acc 71.8750 (55.3125) lr 1.9511e-03 eta 0:17:05 +epoch [7/50] batch [25/31] time 0.739 (0.757) data 0.000 (0.029) loss 1.7148 (1.8280) acc 50.0000 (55.2500) lr 1.9511e-03 eta 0:16:54 +epoch [7/50] batch [30/31] time 0.738 (0.756) data 0.000 (0.024) loss 2.7559 (1.8851) acc 43.7500 (55.0000) lr 1.9511e-03 eta 0:16:47 +epoch [8/50] batch [5/31] time 0.719 (0.874) data 0.001 (0.135) loss 2.5820 (2.0729) acc 43.7500 (51.2500) lr 1.9298e-03 eta 0:19:20 +epoch [8/50] batch [10/31] time 0.718 (0.794) data 0.000 (0.068) loss 1.4648 (1.8527) acc 62.5000 (55.6250) lr 1.9298e-03 eta 0:17:30 +epoch [8/50] batch [15/31] time 0.779 (0.783) data 0.001 (0.045) loss 1.5137 (1.8245) acc 65.6250 (55.2083) lr 1.9298e-03 eta 0:17:11 +epoch [8/50] batch [20/31] time 0.721 (0.767) data 0.000 (0.034) loss 1.5020 (1.8030) acc 68.7500 (56.4062) lr 1.9298e-03 eta 0:16:46 +epoch [8/50] batch [25/31] time 0.710 (0.757) data 0.000 (0.027) loss 1.2197 (1.7323) acc 71.8750 (57.3750) lr 1.9298e-03 eta 0:16:29 +epoch [8/50] batch [30/31] time 0.710 (0.751) data 0.000 (0.023) loss 1.7822 (1.6820) acc 65.6250 (58.8542) lr 1.9298e-03 eta 0:16:18 +epoch [9/50] batch [5/31] time 0.714 (0.871) data 0.000 (0.142) loss 1.8418 (1.9926) acc 50.0000 (53.1250) lr 1.9048e-03 eta 0:18:49 +epoch [9/50] batch [10/31] time 0.722 (0.795) data 0.000 (0.071) loss 1.4336 (1.8394) acc 56.2500 (56.2500) lr 1.9048e-03 eta 0:17:07 +epoch [9/50] batch [15/31] time 0.725 (0.771) data 0.000 (0.048) loss 2.0957 (1.8074) acc 40.6250 (54.5833) lr 1.9048e-03 eta 0:16:32 +epoch [9/50] batch [20/31] time 0.717 (0.760) data 0.000 (0.036) loss 2.5938 (1.7923) acc 40.6250 (55.3125) lr 1.9048e-03 eta 0:16:14 +epoch [9/50] batch [25/31] time 0.702 (0.752) data 0.000 (0.029) loss 1.5557 (1.7954) acc 59.3750 (56.1250) lr 1.9048e-03 eta 0:16:00 +epoch [9/50] batch [30/31] time 0.716 (0.748) data 0.000 (0.024) loss 1.3018 (1.7386) acc 65.6250 (57.0833) lr 1.9048e-03 eta 0:15:50 +epoch [10/50] batch [5/31] time 0.726 (0.906) data 0.000 (0.147) loss 1.5527 (1.7664) acc 59.3750 (58.1250) lr 1.8763e-03 eta 0:19:07 +epoch [10/50] batch [10/31] time 0.717 (0.812) data 0.000 (0.074) loss 1.8555 (1.8933) acc 56.2500 (56.5625) lr 1.8763e-03 eta 0:17:03 +epoch [10/50] batch [15/31] time 0.726 (0.785) data 0.000 (0.049) loss 1.5098 (1.8223) acc 59.3750 (56.4583) lr 1.8763e-03 eta 0:16:25 +epoch [10/50] batch [20/31] time 0.715 (0.767) data 0.000 (0.037) loss 1.4756 (1.8323) acc 59.3750 (56.0938) lr 1.8763e-03 eta 0:15:59 +epoch [10/50] batch [25/31] time 0.739 (0.758) data 0.000 (0.030) loss 2.0547 (1.8002) acc 56.2500 (56.3750) lr 1.8763e-03 eta 0:15:44 +epoch [10/50] batch [30/31] time 0.711 (0.750) data 0.000 (0.025) loss 1.8076 (1.7872) acc 53.1250 (55.9375) lr 1.8763e-03 eta 0:15:30 +epoch [11/50] batch [5/31] time 0.727 (0.877) data 0.000 (0.137) loss 1.5000 (1.8805) acc 62.5000 (56.2500) lr 1.8443e-03 eta 0:18:03 +epoch [11/50] batch [10/31] time 0.719 (0.801) data 0.000 (0.069) loss 1.5703 (1.7690) acc 65.6250 (59.3750) lr 1.8443e-03 eta 0:16:24 +epoch [11/50] batch [15/31] time 0.728 (0.777) data 0.000 (0.046) loss 1.3018 (1.6354) acc 59.3750 (61.2500) lr 1.8443e-03 eta 0:15:52 +epoch [11/50] batch [20/31] time 0.714 (0.764) data 0.000 (0.035) loss 1.5186 (1.6331) acc 59.3750 (60.0000) lr 1.8443e-03 eta 0:15:32 +epoch [11/50] batch [25/31] time 0.715 (0.755) data 0.000 (0.028) loss 1.1611 (1.6357) acc 75.0000 (60.7500) lr 1.8443e-03 eta 0:15:17 +epoch [11/50] batch [30/31] time 0.740 (0.749) data 0.000 (0.023) loss 2.3574 (1.6878) acc 43.7500 (58.7500) lr 1.8443e-03 eta 0:15:06 +epoch [12/50] batch [5/31] time 0.738 (0.879) data 0.000 (0.137) loss 1.6895 (1.6963) acc 50.0000 (57.5000) lr 1.8090e-03 eta 0:17:37 +epoch [12/50] batch [10/31] time 0.735 (0.819) data 0.000 (0.069) loss 1.3086 (1.5735) acc 65.6250 (59.3750) lr 1.8090e-03 eta 0:16:22 +epoch [12/50] batch [15/31] time 0.725 (0.790) data 0.000 (0.046) loss 1.2666 (1.5943) acc 71.8750 (60.0000) lr 1.8090e-03 eta 0:15:43 +epoch [12/50] batch [20/31] time 0.724 (0.776) data 0.000 (0.035) loss 1.5020 (1.6099) acc 53.1250 (59.6875) lr 1.8090e-03 eta 0:15:22 +epoch [12/50] batch [25/31] time 0.710 (0.765) data 0.000 (0.028) loss 2.2715 (1.6628) acc 56.2500 (58.3750) lr 1.8090e-03 eta 0:15:05 +epoch [12/50] batch [30/31] time 0.732 (0.759) data 0.000 (0.023) loss 2.1836 (1.6575) acc 50.0000 (58.0208) lr 1.8090e-03 eta 0:14:55 +epoch [13/50] batch [5/31] time 0.725 (0.883) data 0.000 (0.145) loss 1.3701 (1.6525) acc 53.1250 (55.6250) lr 1.7705e-03 eta 0:17:15 +epoch [13/50] batch [10/31] time 0.716 (0.804) data 0.000 (0.073) loss 1.7207 (1.6739) acc 56.2500 (58.1250) lr 1.7705e-03 eta 0:15:39 +epoch [13/50] batch [15/31] time 0.716 (0.778) data 0.000 (0.049) loss 2.4395 (1.7790) acc 53.1250 (56.8750) lr 1.7705e-03 eta 0:15:04 +epoch [13/50] batch [20/31] time 0.711 (0.761) data 0.000 (0.037) loss 1.6455 (1.6674) acc 53.1250 (59.5312) lr 1.7705e-03 eta 0:14:41 +epoch [13/50] batch [25/31] time 0.741 (0.753) data 0.000 (0.029) loss 1.9355 (1.6917) acc 59.3750 (59.2500) lr 1.7705e-03 eta 0:14:28 +epoch [13/50] batch [30/31] time 0.716 (0.748) data 0.000 (0.025) loss 1.2373 (1.7036) acc 71.8750 (58.3333) lr 1.7705e-03 eta 0:14:19 +epoch [14/50] batch [5/31] time 0.734 (0.874) data 0.001 (0.133) loss 1.2441 (1.4500) acc 68.7500 (63.7500) lr 1.7290e-03 eta 0:16:38 +epoch [14/50] batch [10/31] time 0.726 (0.800) data 0.000 (0.067) loss 1.8330 (1.6583) acc 62.5000 (60.6250) lr 1.7290e-03 eta 0:15:09 +epoch [14/50] batch [15/31] time 0.720 (0.776) data 0.000 (0.045) loss 1.0723 (1.6376) acc 71.8750 (60.2083) lr 1.7290e-03 eta 0:14:38 +epoch [14/50] batch [20/31] time 0.725 (0.764) data 0.000 (0.034) loss 1.4287 (1.6495) acc 65.6250 (60.4688) lr 1.7290e-03 eta 0:14:20 +epoch [14/50] batch [25/31] time 0.704 (0.755) data 0.000 (0.027) loss 1.3027 (1.5847) acc 62.5000 (62.0000) lr 1.7290e-03 eta 0:14:07 +epoch [14/50] batch [30/31] time 0.764 (0.751) data 0.000 (0.022) loss 1.3789 (1.6108) acc 68.7500 (61.4583) lr 1.7290e-03 eta 0:13:59 +epoch [15/50] batch [5/31] time 0.713 (0.868) data 0.000 (0.141) loss 1.9492 (1.6588) acc 43.7500 (55.6250) lr 1.6845e-03 eta 0:16:04 +epoch [15/50] batch [10/31] time 0.709 (0.797) data 0.000 (0.071) loss 1.2656 (1.6359) acc 65.6250 (57.8125) lr 1.6845e-03 eta 0:14:41 +epoch [15/50] batch [15/31] time 0.733 (0.772) data 0.000 (0.047) loss 1.3467 (1.5402) acc 75.0000 (60.8333) lr 1.6845e-03 eta 0:14:10 +epoch [15/50] batch [20/31] time 0.712 (0.757) data 0.000 (0.036) loss 1.7383 (1.5269) acc 59.3750 (61.7188) lr 1.6845e-03 eta 0:13:50 +epoch [15/50] batch [25/31] time 0.731 (0.752) data 0.000 (0.029) loss 1.6924 (1.5721) acc 68.7500 (61.1250) lr 1.6845e-03 eta 0:13:40 +epoch [15/50] batch [30/31] time 0.713 (0.746) data 0.000 (0.024) loss 2.2070 (1.5871) acc 46.8750 (60.6250) lr 1.6845e-03 eta 0:13:29 +epoch [16/50] batch [5/31] time 0.740 (0.874) data 0.000 (0.144) loss 1.4570 (1.6166) acc 65.6250 (60.0000) lr 1.6374e-03 eta 0:15:43 +epoch [16/50] batch [10/31] time 0.713 (0.795) data 0.000 (0.072) loss 1.4395 (1.6307) acc 59.3750 (60.9375) lr 1.6374e-03 eta 0:14:15 +epoch [16/50] batch [15/31] time 0.700 (0.770) data 0.000 (0.048) loss 1.6211 (1.5674) acc 59.3750 (61.0417) lr 1.6374e-03 eta 0:13:43 +epoch [16/50] batch [20/31] time 0.730 (0.757) data 0.000 (0.036) loss 1.4297 (1.5983) acc 62.5000 (59.8438) lr 1.6374e-03 eta 0:13:26 +epoch [16/50] batch [25/31] time 0.737 (0.749) data 0.000 (0.029) loss 1.4316 (1.5785) acc 71.8750 (60.8750) lr 1.6374e-03 eta 0:13:13 +epoch [16/50] batch [30/31] time 0.724 (0.743) data 0.000 (0.024) loss 1.7451 (1.6084) acc 59.3750 (60.8333) lr 1.6374e-03 eta 0:13:04 +epoch [17/50] batch [5/31] time 0.718 (0.886) data 0.000 (0.150) loss 1.1748 (1.6424) acc 68.7500 (60.6250) lr 1.5878e-03 eta 0:15:29 +epoch [17/50] batch [10/31] time 0.716 (0.803) data 0.000 (0.075) loss 1.3838 (1.6428) acc 62.5000 (62.5000) lr 1.5878e-03 eta 0:13:58 +epoch [17/50] batch [15/31] time 0.725 (0.776) data 0.000 (0.050) loss 1.3467 (1.6752) acc 59.3750 (60.0000) lr 1.5878e-03 eta 0:13:26 +epoch [17/50] batch [20/31] time 0.721 (0.764) data 0.000 (0.038) loss 1.4775 (1.6777) acc 71.8750 (60.6250) lr 1.5878e-03 eta 0:13:09 +epoch [17/50] batch [25/31] time 0.715 (0.754) data 0.000 (0.030) loss 1.8760 (1.6391) acc 65.6250 (61.0000) lr 1.5878e-03 eta 0:12:55 +epoch [17/50] batch [30/31] time 0.750 (0.749) data 0.000 (0.025) loss 1.1328 (1.6281) acc 78.1250 (61.2500) lr 1.5878e-03 eta 0:12:46 +epoch [18/50] batch [5/31] time 0.723 (0.911) data 0.000 (0.181) loss 1.3408 (1.6861) acc 59.3750 (56.2500) lr 1.5358e-03 eta 0:15:27 +epoch [18/50] batch [10/31] time 0.701 (0.828) data 0.001 (0.091) loss 1.0293 (1.5789) acc 71.8750 (60.0000) lr 1.5358e-03 eta 0:13:59 +epoch [18/50] batch [15/31] time 0.705 (0.790) data 0.000 (0.060) loss 1.7305 (1.6817) acc 53.1250 (58.5417) lr 1.5358e-03 eta 0:13:16 +epoch [18/50] batch [20/31] time 0.708 (0.774) data 0.000 (0.045) loss 1.1211 (1.5775) acc 71.8750 (60.7812) lr 1.5358e-03 eta 0:12:56 +epoch [18/50] batch [25/31] time 0.704 (0.763) data 0.000 (0.036) loss 1.8330 (1.5348) acc 46.8750 (61.8750) lr 1.5358e-03 eta 0:12:41 +epoch [18/50] batch [30/31] time 0.749 (0.755) data 0.000 (0.030) loss 1.5410 (1.5273) acc 62.5000 (62.1875) lr 1.5358e-03 eta 0:12:30 +epoch [19/50] batch [5/31] time 0.730 (0.898) data 0.000 (0.159) loss 1.6406 (1.5437) acc 56.2500 (63.7500) lr 1.4818e-03 eta 0:14:46 +epoch [19/50] batch [10/31] time 0.768 (0.815) data 0.000 (0.081) loss 2.0156 (1.6523) acc 53.1250 (60.0000) lr 1.4818e-03 eta 0:13:20 +epoch [19/50] batch [15/31] time 0.712 (0.783) data 0.000 (0.054) loss 1.8066 (1.6204) acc 56.2500 (60.4167) lr 1.4818e-03 eta 0:12:44 +epoch [19/50] batch [20/31] time 0.722 (0.767) data 0.000 (0.041) loss 1.4404 (1.6136) acc 65.6250 (60.9375) lr 1.4818e-03 eta 0:12:25 +epoch [19/50] batch [25/31] time 0.742 (0.761) data 0.001 (0.033) loss 1.3809 (1.6314) acc 62.5000 (60.1250) lr 1.4818e-03 eta 0:12:15 +epoch [19/50] batch [30/31] time 0.870 (0.761) data 0.000 (0.027) loss 1.6455 (1.5865) acc 59.3750 (61.3542) lr 1.4818e-03 eta 0:12:11 +epoch [20/50] batch [5/31] time 0.724 (0.905) data 0.001 (0.169) loss 1.5205 (1.6414) acc 65.6250 (60.0000) lr 1.4258e-03 eta 0:14:25 +epoch [20/50] batch [10/31] time 0.720 (0.816) data 0.000 (0.085) loss 1.3984 (1.6748) acc 62.5000 (59.0625) lr 1.4258e-03 eta 0:12:56 +epoch [20/50] batch [15/31] time 0.722 (0.788) data 0.000 (0.056) loss 1.4043 (1.6089) acc 62.5000 (60.2083) lr 1.4258e-03 eta 0:12:25 +epoch [20/50] batch [20/31] time 0.724 (0.772) data 0.000 (0.042) loss 1.6650 (1.6379) acc 62.5000 (59.3750) lr 1.4258e-03 eta 0:12:06 +epoch [20/50] batch [25/31] time 0.726 (0.761) data 0.000 (0.034) loss 2.2012 (1.6491) acc 40.6250 (59.0000) lr 1.4258e-03 eta 0:11:52 +epoch [20/50] batch [30/31] time 0.713 (0.756) data 0.000 (0.028) loss 1.9180 (1.6705) acc 62.5000 (59.3750) lr 1.4258e-03 eta 0:11:43 +epoch [21/50] batch [5/31] time 0.738 (0.884) data 0.000 (0.148) loss 2.6113 (1.6762) acc 53.1250 (60.6250) lr 1.3681e-03 eta 0:13:37 +epoch [21/50] batch [10/31] time 0.725 (0.804) data 0.000 (0.074) loss 1.1562 (1.6992) acc 62.5000 (59.3750) lr 1.3681e-03 eta 0:12:19 +epoch [21/50] batch [15/31] time 0.712 (0.779) data 0.000 (0.050) loss 1.6758 (1.6873) acc 59.3750 (58.7500) lr 1.3681e-03 eta 0:11:52 +epoch [21/50] batch [20/31] time 0.719 (0.765) data 0.000 (0.037) loss 1.7744 (1.6264) acc 56.2500 (60.3125) lr 1.3681e-03 eta 0:11:35 +epoch [21/50] batch [25/31] time 0.719 (0.755) data 0.000 (0.030) loss 1.4736 (1.6107) acc 62.5000 (60.8750) lr 1.3681e-03 eta 0:11:23 +epoch [21/50] batch [30/31] time 0.721 (0.753) data 0.000 (0.025) loss 1.5078 (1.6218) acc 65.6250 (60.1042) lr 1.3681e-03 eta 0:11:17 +epoch [22/50] batch [5/31] time 0.715 (0.924) data 0.001 (0.166) loss 1.8291 (1.8307) acc 50.0000 (56.8750) lr 1.3090e-03 eta 0:13:45 +epoch [22/50] batch [10/31] time 0.723 (0.826) data 0.000 (0.083) loss 2.1035 (1.6246) acc 46.8750 (60.0000) lr 1.3090e-03 eta 0:12:14 +epoch [22/50] batch [15/31] time 0.728 (0.793) data 0.000 (0.055) loss 1.1338 (1.5773) acc 68.7500 (61.2500) lr 1.3090e-03 eta 0:11:40 +epoch [22/50] batch [20/31] time 0.744 (0.775) data 0.000 (0.042) loss 1.7188 (1.5961) acc 56.2500 (60.9375) lr 1.3090e-03 eta 0:11:21 +epoch [22/50] batch [25/31] time 0.720 (0.764) data 0.000 (0.033) loss 2.1113 (1.6708) acc 56.2500 (59.6250) lr 1.3090e-03 eta 0:11:07 +epoch [22/50] batch [30/31] time 0.716 (0.757) data 0.000 (0.028) loss 1.7900 (1.6758) acc 68.7500 (59.7917) lr 1.3090e-03 eta 0:10:57 +epoch [23/50] batch [5/31] time 0.712 (0.863) data 0.000 (0.131) loss 1.2100 (1.4666) acc 62.5000 (65.0000) lr 1.2487e-03 eta 0:12:24 +epoch [23/50] batch [10/31] time 0.725 (0.797) data 0.000 (0.066) loss 1.8398 (1.6086) acc 65.6250 (60.9375) lr 1.2487e-03 eta 0:11:23 +epoch [23/50] batch [15/31] time 0.726 (0.774) data 0.000 (0.044) loss 1.6787 (1.5918) acc 56.2500 (61.0417) lr 1.2487e-03 eta 0:11:00 +epoch [23/50] batch [20/31] time 0.746 (0.763) data 0.001 (0.033) loss 1.5420 (1.6033) acc 59.3750 (60.3125) lr 1.2487e-03 eta 0:10:46 +epoch [23/50] batch [25/31] time 0.715 (0.756) data 0.000 (0.026) loss 1.0625 (1.5332) acc 62.5000 (61.3750) lr 1.2487e-03 eta 0:10:37 +epoch [23/50] batch [30/31] time 0.725 (0.751) data 0.000 (0.022) loss 1.7617 (1.5220) acc 59.3750 (62.0833) lr 1.2487e-03 eta 0:10:29 +epoch [24/50] batch [5/31] time 0.757 (0.883) data 0.000 (0.137) loss 1.3057 (1.8410) acc 75.0000 (59.3750) lr 1.1874e-03 eta 0:12:15 +epoch [24/50] batch [10/31] time 0.720 (0.809) data 0.000 (0.069) loss 0.8594 (1.6861) acc 75.0000 (61.2500) lr 1.1874e-03 eta 0:11:08 +epoch [24/50] batch [15/31] time 0.718 (0.780) data 0.000 (0.046) loss 1.9756 (1.6884) acc 53.1250 (61.4583) lr 1.1874e-03 eta 0:10:41 +epoch [24/50] batch [20/31] time 0.714 (0.764) data 0.000 (0.035) loss 1.5332 (1.6208) acc 65.6250 (62.3438) lr 1.1874e-03 eta 0:10:24 +epoch [24/50] batch [25/31] time 0.749 (0.757) data 0.000 (0.028) loss 2.4199 (1.6206) acc 46.8750 (62.5000) lr 1.1874e-03 eta 0:10:15 +epoch [24/50] batch [30/31] time 0.729 (0.754) data 0.000 (0.023) loss 1.3809 (1.6350) acc 56.2500 (62.1875) lr 1.1874e-03 eta 0:10:08 +epoch [25/50] batch [5/31] time 0.723 (0.880) data 0.001 (0.137) loss 2.1895 (1.6566) acc 59.3750 (61.8750) lr 1.1253e-03 eta 0:11:44 +epoch [25/50] batch [10/31] time 0.723 (0.805) data 0.000 (0.069) loss 1.2793 (1.4903) acc 65.6250 (63.1250) lr 1.1253e-03 eta 0:10:40 +epoch [25/50] batch [15/31] time 0.721 (0.781) data 0.000 (0.046) loss 1.3672 (1.5076) acc 62.5000 (63.3333) lr 1.1253e-03 eta 0:10:17 +epoch [25/50] batch [20/31] time 0.722 (0.767) data 0.000 (0.035) loss 1.0762 (1.4690) acc 75.0000 (65.0000) lr 1.1253e-03 eta 0:10:02 +epoch [25/50] batch [25/31] time 0.726 (0.758) data 0.000 (0.028) loss 1.4141 (1.4648) acc 59.3750 (64.3750) lr 1.1253e-03 eta 0:09:52 +epoch [25/50] batch [30/31] time 0.875 (0.757) data 0.000 (0.023) loss 1.9170 (1.4361) acc 56.2500 (64.8958) lr 1.1253e-03 eta 0:09:47 +epoch [26/50] batch [5/31] time 0.725 (0.868) data 0.000 (0.134) loss 1.9121 (1.5459) acc 53.1250 (62.5000) lr 1.0628e-03 eta 0:11:08 +epoch [26/50] batch [10/31] time 0.712 (0.794) data 0.000 (0.067) loss 1.2988 (1.4783) acc 68.7500 (64.6875) lr 1.0628e-03 eta 0:10:07 +epoch [26/50] batch [15/31] time 0.732 (0.769) data 0.000 (0.045) loss 1.3418 (1.4947) acc 81.2500 (65.8333) lr 1.0628e-03 eta 0:09:44 +epoch [26/50] batch [20/31] time 0.710 (0.761) data 0.000 (0.034) loss 1.1885 (1.4260) acc 65.6250 (66.7188) lr 1.0628e-03 eta 0:09:34 +epoch [26/50] batch [25/31] time 0.710 (0.752) data 0.000 (0.027) loss 1.8135 (1.4514) acc 62.5000 (66.5000) lr 1.0628e-03 eta 0:09:23 +epoch [26/50] batch [30/31] time 0.719 (0.746) data 0.000 (0.023) loss 1.8066 (1.4454) acc 56.2500 (66.4583) lr 1.0628e-03 eta 0:09:15 +epoch [27/50] batch [5/31] time 0.725 (0.909) data 0.002 (0.168) loss 1.8154 (1.4193) acc 56.2500 (63.7500) lr 1.0000e-03 eta 0:11:11 +epoch [27/50] batch [10/31] time 0.713 (0.814) data 0.001 (0.084) loss 2.0566 (1.6021) acc 59.3750 (62.1875) lr 1.0000e-03 eta 0:09:57 +epoch [27/50] batch [15/31] time 0.724 (0.784) data 0.001 (0.056) loss 1.6982 (1.5643) acc 62.5000 (62.5000) lr 1.0000e-03 eta 0:09:31 +epoch [27/50] batch [20/31] time 0.737 (0.770) data 0.000 (0.042) loss 1.0889 (1.5823) acc 68.7500 (61.5625) lr 1.0000e-03 eta 0:09:17 +epoch [27/50] batch [25/31] time 0.703 (0.761) data 0.000 (0.034) loss 1.5410 (1.5590) acc 56.2500 (62.3750) lr 1.0000e-03 eta 0:09:07 +epoch [27/50] batch [30/31] time 0.722 (0.754) data 0.001 (0.028) loss 1.9775 (1.5411) acc 62.5000 (63.4375) lr 1.0000e-03 eta 0:08:58 +epoch [28/50] batch [5/31] time 0.720 (0.866) data 0.000 (0.133) loss 0.9126 (1.4517) acc 75.0000 (58.7500) lr 9.3721e-04 eta 0:10:13 +epoch [28/50] batch [10/31] time 0.723 (0.804) data 0.000 (0.067) loss 1.0508 (1.4057) acc 68.7500 (63.1250) lr 9.3721e-04 eta 0:09:25 +epoch [28/50] batch [15/31] time 0.724 (0.777) data 0.000 (0.044) loss 1.5957 (1.5011) acc 68.7500 (62.7083) lr 9.3721e-04 eta 0:09:02 +epoch [28/50] batch [20/31] time 0.714 (0.764) data 0.000 (0.033) loss 1.6650 (1.4596) acc 59.3750 (63.9062) lr 9.3721e-04 eta 0:08:49 +epoch [28/50] batch [25/31] time 0.713 (0.755) data 0.000 (0.027) loss 1.6338 (1.4669) acc 53.1250 (63.7500) lr 9.3721e-04 eta 0:08:39 +epoch [28/50] batch [30/31] time 0.746 (0.749) data 0.000 (0.022) loss 1.2529 (1.4387) acc 68.7500 (63.9583) lr 9.3721e-04 eta 0:08:31 +epoch [29/50] batch [5/31] time 0.745 (0.894) data 0.001 (0.148) loss 1.5420 (1.4729) acc 65.6250 (65.0000) lr 8.7467e-04 eta 0:10:05 +epoch [29/50] batch [10/31] time 0.745 (0.810) data 0.000 (0.074) loss 1.3057 (1.4470) acc 56.2500 (66.5625) lr 8.7467e-04 eta 0:09:04 +epoch [29/50] batch [15/31] time 0.725 (0.779) data 0.001 (0.050) loss 1.5684 (1.4383) acc 65.6250 (65.6250) lr 8.7467e-04 eta 0:08:39 +epoch [29/50] batch [20/31] time 0.720 (0.763) data 0.000 (0.037) loss 2.0684 (1.4949) acc 53.1250 (64.5312) lr 8.7467e-04 eta 0:08:25 +epoch [29/50] batch [25/31] time 0.857 (0.761) data 0.000 (0.030) loss 2.0684 (1.5099) acc 50.0000 (64.2500) lr 8.7467e-04 eta 0:08:20 +epoch [29/50] batch [30/31] time 0.726 (0.756) data 0.000 (0.025) loss 1.1582 (1.5017) acc 78.1250 (64.3750) lr 8.7467e-04 eta 0:08:12 +epoch [30/50] batch [5/31] time 0.776 (0.899) data 0.000 (0.152) loss 1.2041 (1.4691) acc 59.3750 (65.0000) lr 8.1262e-04 eta 0:09:40 +epoch [30/50] batch [10/31] time 0.746 (0.814) data 0.000 (0.076) loss 1.9883 (1.5527) acc 56.2500 (62.8125) lr 8.1262e-04 eta 0:08:41 +epoch [30/50] batch [15/31] time 0.724 (0.783) data 0.000 (0.051) loss 1.5293 (1.5508) acc 62.5000 (64.1667) lr 8.1262e-04 eta 0:08:18 +epoch [30/50] batch [20/31] time 0.708 (0.767) data 0.000 (0.038) loss 1.4541 (1.5155) acc 56.2500 (64.0625) lr 8.1262e-04 eta 0:08:04 +epoch [30/50] batch [25/31] time 0.733 (0.760) data 0.000 (0.031) loss 1.5254 (1.4594) acc 62.5000 (66.0000) lr 8.1262e-04 eta 0:07:55 +epoch [30/50] batch [30/31] time 0.737 (0.754) data 0.001 (0.026) loss 1.1855 (1.4769) acc 62.5000 (64.4792) lr 8.1262e-04 eta 0:07:48 +epoch [31/50] batch [5/31] time 0.719 (0.899) data 0.001 (0.147) loss 1.1045 (1.3919) acc 62.5000 (61.2500) lr 7.5131e-04 eta 0:09:12 +epoch [31/50] batch [10/31] time 0.744 (0.812) data 0.001 (0.074) loss 1.3643 (1.3112) acc 71.8750 (65.0000) lr 7.5131e-04 eta 0:08:15 +epoch [31/50] batch [15/31] time 0.738 (0.784) data 0.000 (0.049) loss 1.2393 (1.4371) acc 65.6250 (63.5417) lr 7.5131e-04 eta 0:07:54 +epoch [31/50] batch [20/31] time 0.714 (0.769) data 0.000 (0.037) loss 1.2852 (1.4774) acc 71.8750 (62.8125) lr 7.5131e-04 eta 0:07:41 +epoch [31/50] batch [25/31] time 0.732 (0.767) data 0.000 (0.030) loss 1.3105 (1.4743) acc 65.6250 (62.8750) lr 7.5131e-04 eta 0:07:36 +epoch [31/50] batch [30/31] time 0.741 (0.760) data 0.001 (0.025) loss 1.6172 (1.4851) acc 62.5000 (62.8125) lr 7.5131e-04 eta 0:07:28 +epoch [32/50] batch [5/31] time 0.714 (0.904) data 0.000 (0.172) loss 1.6904 (1.4561) acc 59.3750 (65.0000) lr 6.9098e-04 eta 0:08:48 +epoch [32/50] batch [10/31] time 0.725 (0.816) data 0.000 (0.086) loss 1.0518 (1.4640) acc 71.8750 (64.6875) lr 6.9098e-04 eta 0:07:52 +epoch [32/50] batch [15/31] time 0.728 (0.785) data 0.000 (0.058) loss 1.9023 (1.4438) acc 56.2500 (66.2500) lr 6.9098e-04 eta 0:07:30 +epoch [32/50] batch [20/31] time 0.714 (0.770) data 0.000 (0.043) loss 1.7021 (1.4398) acc 71.8750 (67.0312) lr 6.9098e-04 eta 0:07:18 +epoch [32/50] batch [25/31] time 0.746 (0.761) data 0.000 (0.035) loss 1.2002 (1.4163) acc 81.2500 (67.3750) lr 6.9098e-04 eta 0:07:09 +epoch [32/50] batch [30/31] time 0.719 (0.755) data 0.000 (0.029) loss 1.3203 (1.3800) acc 65.6250 (67.8125) lr 6.9098e-04 eta 0:07:02 +epoch [33/50] batch [5/31] time 0.711 (0.866) data 0.000 (0.138) loss 1.6182 (1.3902) acc 62.5000 (66.8750) lr 6.3188e-04 eta 0:07:58 +epoch [33/50] batch [10/31] time 0.728 (0.796) data 0.000 (0.069) loss 1.0527 (1.4154) acc 75.0000 (65.9375) lr 6.3188e-04 eta 0:07:16 +epoch [33/50] batch [15/31] time 0.719 (0.776) data 0.000 (0.046) loss 1.4229 (1.4328) acc 62.5000 (67.0833) lr 6.3188e-04 eta 0:07:01 +epoch [33/50] batch [20/31] time 0.703 (0.766) data 0.000 (0.035) loss 1.2246 (1.4191) acc 78.1250 (67.1875) lr 6.3188e-04 eta 0:06:52 +epoch [33/50] batch [25/31] time 0.731 (0.757) data 0.000 (0.028) loss 1.2871 (1.4287) acc 59.3750 (66.1250) lr 6.3188e-04 eta 0:06:43 +epoch [33/50] batch [30/31] time 0.719 (0.751) data 0.000 (0.023) loss 1.7266 (1.4555) acc 62.5000 (65.2083) lr 6.3188e-04 eta 0:06:36 +epoch [34/50] batch [5/31] time 0.725 (0.921) data 0.000 (0.185) loss 1.1504 (1.1736) acc 81.2500 (69.3750) lr 5.7422e-04 eta 0:08:00 +epoch [34/50] batch [10/31] time 0.704 (0.820) data 0.000 (0.093) loss 0.9429 (1.1939) acc 68.7500 (68.4375) lr 5.7422e-04 eta 0:07:03 +epoch [34/50] batch [15/31] time 0.714 (0.785) data 0.000 (0.062) loss 1.3975 (1.2274) acc 71.8750 (67.0833) lr 5.7422e-04 eta 0:06:41 +epoch [34/50] batch [20/31] time 0.707 (0.771) data 0.000 (0.047) loss 1.6504 (1.2851) acc 56.2500 (66.8750) lr 5.7422e-04 eta 0:06:30 +epoch [34/50] batch [25/31] time 0.743 (0.760) data 0.000 (0.037) loss 1.3154 (1.3616) acc 59.3750 (65.8750) lr 5.7422e-04 eta 0:06:21 +epoch [34/50] batch [30/31] time 0.857 (0.759) data 0.000 (0.031) loss 0.9224 (1.3451) acc 75.0000 (66.0417) lr 5.7422e-04 eta 0:06:17 +epoch [35/50] batch [5/31] time 0.718 (0.884) data 0.000 (0.148) loss 1.8779 (1.4701) acc 65.6250 (65.6250) lr 5.1825e-04 eta 0:07:14 +epoch [35/50] batch [10/31] time 0.717 (0.804) data 0.000 (0.074) loss 1.1260 (1.4158) acc 71.8750 (65.0000) lr 5.1825e-04 eta 0:06:30 +epoch [35/50] batch [15/31] time 0.723 (0.778) data 0.000 (0.050) loss 1.9346 (1.5205) acc 50.0000 (62.7083) lr 5.1825e-04 eta 0:06:14 +epoch [35/50] batch [20/31] time 0.721 (0.765) data 0.000 (0.037) loss 1.6484 (1.4582) acc 62.5000 (63.7500) lr 5.1825e-04 eta 0:06:04 +epoch [35/50] batch [25/31] time 0.719 (0.756) data 0.000 (0.030) loss 1.4844 (1.4939) acc 65.6250 (63.0000) lr 5.1825e-04 eta 0:05:56 +epoch [35/50] batch [30/31] time 0.722 (0.750) data 0.000 (0.025) loss 1.1279 (1.4517) acc 59.3750 (64.2708) lr 5.1825e-04 eta 0:05:49 +epoch [36/50] batch [5/31] time 0.728 (0.870) data 0.000 (0.135) loss 1.6270 (1.3496) acc 56.2500 (62.5000) lr 4.6417e-04 eta 0:06:40 +epoch [36/50] batch [10/31] time 0.737 (0.799) data 0.000 (0.068) loss 1.4375 (1.3771) acc 62.5000 (64.6875) lr 4.6417e-04 eta 0:06:03 +epoch [36/50] batch [15/31] time 0.718 (0.777) data 0.000 (0.045) loss 1.2412 (1.3666) acc 65.6250 (65.8333) lr 4.6417e-04 eta 0:05:49 +epoch [36/50] batch [20/31] time 0.727 (0.762) data 0.000 (0.034) loss 1.4902 (1.3723) acc 68.7500 (66.0938) lr 4.6417e-04 eta 0:05:39 +epoch [36/50] batch [25/31] time 0.731 (0.753) data 0.000 (0.027) loss 1.2881 (1.3960) acc 75.0000 (66.3750) lr 4.6417e-04 eta 0:05:31 +epoch [36/50] batch [30/31] time 0.728 (0.748) data 0.000 (0.023) loss 1.6660 (1.3882) acc 53.1250 (65.9375) lr 4.6417e-04 eta 0:05:25 +epoch [37/50] batch [5/31] time 0.747 (0.922) data 0.001 (0.177) loss 1.2607 (1.4236) acc 68.7500 (66.8750) lr 4.1221e-04 eta 0:06:35 +epoch [37/50] batch [10/31] time 0.717 (0.823) data 0.000 (0.089) loss 1.1602 (1.4248) acc 84.3750 (66.8750) lr 4.1221e-04 eta 0:05:48 +epoch [37/50] batch [15/31] time 0.731 (0.793) data 0.000 (0.059) loss 1.8906 (1.3835) acc 56.2500 (67.7083) lr 4.1221e-04 eta 0:05:32 +epoch [37/50] batch [20/31] time 0.717 (0.775) data 0.000 (0.045) loss 0.7783 (1.3795) acc 84.3750 (67.1875) lr 4.1221e-04 eta 0:05:20 +epoch [37/50] batch [25/31] time 0.714 (0.763) data 0.000 (0.036) loss 1.4668 (1.3808) acc 59.3750 (67.3750) lr 4.1221e-04 eta 0:05:12 +epoch [37/50] batch [30/31] time 0.747 (0.757) data 0.000 (0.030) loss 1.8076 (1.4085) acc 56.2500 (66.4583) lr 4.1221e-04 eta 0:05:06 +epoch [38/50] batch [5/31] time 0.733 (0.898) data 0.000 (0.151) loss 1.6738 (1.4242) acc 50.0000 (61.2500) lr 3.6258e-04 eta 0:05:57 +epoch [38/50] batch [10/31] time 0.753 (0.816) data 0.000 (0.076) loss 1.8301 (1.3517) acc 53.1250 (64.0625) lr 3.6258e-04 eta 0:05:20 +epoch [38/50] batch [15/31] time 0.735 (0.788) data 0.000 (0.051) loss 1.2090 (1.3229) acc 68.7500 (66.4583) lr 3.6258e-04 eta 0:05:05 +epoch [38/50] batch [20/31] time 0.734 (0.771) data 0.000 (0.038) loss 0.9663 (1.2924) acc 75.0000 (67.1875) lr 3.6258e-04 eta 0:04:55 +epoch [38/50] batch [25/31] time 0.734 (0.762) data 0.000 (0.031) loss 1.5713 (1.3054) acc 68.7500 (67.6250) lr 3.6258e-04 eta 0:04:47 +epoch [38/50] batch [30/31] time 0.749 (0.757) data 0.001 (0.026) loss 1.2852 (1.3180) acc 71.8750 (67.8125) lr 3.6258e-04 eta 0:04:42 +epoch [39/50] batch [5/31] time 0.726 (0.888) data 0.000 (0.147) loss 1.0586 (1.0465) acc 71.8750 (72.5000) lr 3.1545e-04 eta 0:05:25 +epoch [39/50] batch [10/31] time 0.776 (0.811) data 0.000 (0.074) loss 1.2227 (1.2725) acc 75.0000 (70.0000) lr 3.1545e-04 eta 0:04:53 +epoch [39/50] batch [15/31] time 0.732 (0.788) data 0.000 (0.049) loss 0.6836 (1.2611) acc 84.3750 (70.0000) lr 3.1545e-04 eta 0:04:41 +epoch [39/50] batch [20/31] time 0.721 (0.782) data 0.000 (0.037) loss 1.2510 (1.2656) acc 75.0000 (70.6250) lr 3.1545e-04 eta 0:04:35 +epoch [39/50] batch [25/31] time 0.736 (0.772) data 0.000 (0.030) loss 1.1494 (1.2512) acc 81.2500 (71.1250) lr 3.1545e-04 eta 0:04:27 +epoch [39/50] batch [30/31] time 0.718 (0.764) data 0.000 (0.025) loss 1.4346 (1.3066) acc 65.6250 (70.4167) lr 3.1545e-04 eta 0:04:21 +epoch [40/50] batch [5/31] time 0.723 (0.877) data 0.001 (0.144) loss 1.3164 (1.2336) acc 71.8750 (72.5000) lr 2.7103e-04 eta 0:04:54 +epoch [40/50] batch [10/31] time 0.721 (0.801) data 0.000 (0.072) loss 1.2744 (1.3070) acc 71.8750 (69.0625) lr 2.7103e-04 eta 0:04:24 +epoch [40/50] batch [15/31] time 0.724 (0.775) data 0.000 (0.048) loss 1.1885 (1.3245) acc 71.8750 (67.7083) lr 2.7103e-04 eta 0:04:12 +epoch [40/50] batch [20/31] time 0.726 (0.763) data 0.000 (0.036) loss 1.3594 (1.3786) acc 68.7500 (65.9375) lr 2.7103e-04 eta 0:04:05 +epoch [40/50] batch [25/31] time 0.749 (0.757) data 0.000 (0.029) loss 1.4561 (1.3621) acc 71.8750 (66.1250) lr 2.7103e-04 eta 0:03:59 +epoch [40/50] batch [30/31] time 0.754 (0.752) data 0.000 (0.024) loss 1.6895 (1.3484) acc 56.2500 (66.4583) lr 2.7103e-04 eta 0:03:53 +epoch [41/50] batch [5/31] time 0.726 (0.874) data 0.000 (0.144) loss 1.3389 (1.6195) acc 71.8750 (62.5000) lr 2.2949e-04 eta 0:04:26 +epoch [41/50] batch [10/31] time 0.731 (0.815) data 0.000 (0.072) loss 1.4863 (1.5360) acc 71.8750 (63.7500) lr 2.2949e-04 eta 0:04:04 +epoch [41/50] batch [15/31] time 0.720 (0.783) data 0.000 (0.048) loss 1.2617 (1.4608) acc 75.0000 (65.8333) lr 2.2949e-04 eta 0:03:51 +epoch [41/50] batch [20/31] time 0.715 (0.769) data 0.001 (0.036) loss 1.2236 (1.4058) acc 68.7500 (67.0312) lr 2.2949e-04 eta 0:03:43 +epoch [41/50] batch [25/31] time 0.742 (0.763) data 0.000 (0.029) loss 1.5654 (1.3729) acc 59.3750 (67.8750) lr 2.2949e-04 eta 0:03:37 +epoch [41/50] batch [30/31] time 0.719 (0.757) data 0.000 (0.024) loss 1.1562 (1.3673) acc 68.7500 (68.0208) lr 2.2949e-04 eta 0:03:31 +epoch [42/50] batch [5/31] time 0.717 (0.876) data 0.000 (0.136) loss 1.8105 (1.5624) acc 62.5000 (63.7500) lr 1.9098e-04 eta 0:03:59 +epoch [42/50] batch [10/31] time 0.723 (0.800) data 0.000 (0.068) loss 1.4424 (1.5753) acc 68.7500 (62.5000) lr 1.9098e-04 eta 0:03:35 +epoch [42/50] batch [15/31] time 0.721 (0.773) data 0.000 (0.046) loss 0.9888 (1.4544) acc 78.1250 (65.2083) lr 1.9098e-04 eta 0:03:24 +epoch [42/50] batch [20/31] time 0.731 (0.761) data 0.000 (0.034) loss 0.9478 (1.3591) acc 78.1250 (67.3438) lr 1.9098e-04 eta 0:03:17 +epoch [42/50] batch [25/31] time 0.736 (0.754) data 0.000 (0.027) loss 1.2979 (1.3502) acc 62.5000 (68.3750) lr 1.9098e-04 eta 0:03:11 +epoch [42/50] batch [30/31] time 0.731 (0.749) data 0.000 (0.023) loss 1.3369 (1.3863) acc 68.7500 (68.4375) lr 1.9098e-04 eta 0:03:06 +epoch [43/50] batch [5/31] time 0.712 (0.862) data 0.000 (0.132) loss 1.1885 (1.3705) acc 68.7500 (68.1250) lr 1.5567e-04 eta 0:03:29 +epoch [43/50] batch [10/31] time 0.754 (0.803) data 0.000 (0.066) loss 1.6816 (1.3585) acc 62.5000 (66.8750) lr 1.5567e-04 eta 0:03:11 +epoch [43/50] batch [15/31] time 0.730 (0.776) data 0.001 (0.044) loss 1.2686 (1.2755) acc 71.8750 (68.3333) lr 1.5567e-04 eta 0:03:00 +epoch [43/50] batch [20/31] time 0.732 (0.770) data 0.001 (0.033) loss 1.5957 (1.3249) acc 53.1250 (66.5625) lr 1.5567e-04 eta 0:02:55 +epoch [43/50] batch [25/31] time 0.721 (0.760) data 0.000 (0.027) loss 1.5312 (1.3196) acc 59.3750 (66.7500) lr 1.5567e-04 eta 0:02:49 +epoch [43/50] batch [30/31] time 0.737 (0.754) data 0.000 (0.022) loss 1.5537 (1.3343) acc 59.3750 (66.3542) lr 1.5567e-04 eta 0:02:44 +epoch [44/50] batch [5/31] time 0.726 (0.888) data 0.000 (0.149) loss 1.4229 (1.2300) acc 68.7500 (70.0000) lr 1.2369e-04 eta 0:03:08 +epoch [44/50] batch [10/31] time 0.722 (0.807) data 0.000 (0.075) loss 1.2061 (1.2895) acc 75.0000 (70.9375) lr 1.2369e-04 eta 0:02:46 +epoch [44/50] batch [15/31] time 0.733 (0.783) data 0.001 (0.050) loss 0.8281 (1.3459) acc 81.2500 (69.3750) lr 1.2369e-04 eta 0:02:38 +epoch [44/50] batch [20/31] time 0.730 (0.769) data 0.000 (0.038) loss 1.6328 (1.4417) acc 65.6250 (66.7188) lr 1.2369e-04 eta 0:02:31 +epoch [44/50] batch [25/31] time 0.729 (0.761) data 0.000 (0.030) loss 1.9424 (1.4235) acc 62.5000 (66.7500) lr 1.2369e-04 eta 0:02:26 +epoch [44/50] batch [30/31] time 0.745 (0.755) data 0.000 (0.025) loss 1.3076 (1.4137) acc 68.7500 (67.0833) lr 1.2369e-04 eta 0:02:21 +epoch [45/50] batch [5/31] time 0.719 (0.877) data 0.000 (0.136) loss 1.3789 (1.1049) acc 62.5000 (75.6250) lr 9.5173e-05 eta 0:02:38 +epoch [45/50] batch [10/31] time 0.733 (0.800) data 0.000 (0.068) loss 1.9229 (1.2540) acc 53.1250 (71.5625) lr 9.5173e-05 eta 0:02:20 +epoch [45/50] batch [15/31] time 0.739 (0.775) data 0.001 (0.045) loss 1.3770 (1.2667) acc 59.3750 (68.9583) lr 9.5173e-05 eta 0:02:12 +epoch [45/50] batch [20/31] time 0.714 (0.764) data 0.000 (0.034) loss 1.2451 (1.2534) acc 68.7500 (68.7500) lr 9.5173e-05 eta 0:02:06 +epoch [45/50] batch [25/31] time 0.775 (0.757) data 0.000 (0.027) loss 1.4756 (1.3145) acc 65.6250 (67.3750) lr 9.5173e-05 eta 0:02:01 +epoch [45/50] batch [30/31] time 0.726 (0.751) data 0.000 (0.023) loss 1.9219 (1.3613) acc 53.1250 (66.4583) lr 9.5173e-05 eta 0:01:57 +epoch [46/50] batch [5/31] time 0.715 (0.863) data 0.000 (0.135) loss 1.4121 (1.3058) acc 65.6250 (65.0000) lr 7.0224e-05 eta 0:02:09 +epoch [46/50] batch [10/31] time 0.723 (0.794) data 0.000 (0.068) loss 1.3779 (1.1782) acc 75.0000 (68.7500) lr 7.0224e-05 eta 0:01:55 +epoch [46/50] batch [15/31] time 0.731 (0.771) data 0.000 (0.045) loss 1.3369 (1.1998) acc 68.7500 (68.7500) lr 7.0224e-05 eta 0:01:47 +epoch [46/50] batch [20/31] time 0.719 (0.759) data 0.000 (0.034) loss 1.7373 (1.2597) acc 62.5000 (67.0312) lr 7.0224e-05 eta 0:01:42 +epoch [46/50] batch [25/31] time 0.723 (0.752) data 0.000 (0.027) loss 1.0625 (1.2968) acc 62.5000 (65.5000) lr 7.0224e-05 eta 0:01:37 +epoch [46/50] batch [30/31] time 0.737 (0.748) data 0.000 (0.023) loss 1.3330 (1.2746) acc 68.7500 (66.0417) lr 7.0224e-05 eta 0:01:33 +epoch [47/50] batch [5/31] time 0.722 (0.904) data 0.000 (0.165) loss 1.3525 (1.0114) acc 71.8750 (73.1250) lr 4.8943e-05 eta 0:01:47 +epoch [47/50] batch [10/31] time 0.734 (0.814) data 0.001 (0.082) loss 1.4824 (1.2396) acc 56.2500 (68.4375) lr 4.8943e-05 eta 0:01:32 +epoch [47/50] batch [15/31] time 0.714 (0.785) data 0.000 (0.055) loss 1.4648 (1.2509) acc 71.8750 (69.5833) lr 4.8943e-05 eta 0:01:25 +epoch [47/50] batch [20/31] time 0.721 (0.771) data 0.000 (0.041) loss 1.3281 (1.2389) acc 65.6250 (69.6875) lr 4.8943e-05 eta 0:01:20 +epoch [47/50] batch [25/31] time 0.722 (0.762) data 0.001 (0.033) loss 1.5146 (1.2389) acc 59.3750 (69.2500) lr 4.8943e-05 eta 0:01:15 +epoch [47/50] batch [30/31] time 0.730 (0.755) data 0.000 (0.028) loss 1.6680 (1.3007) acc 62.5000 (68.1250) lr 4.8943e-05 eta 0:01:11 +epoch [48/50] batch [5/31] time 0.747 (0.891) data 0.000 (0.155) loss 1.4346 (1.5248) acc 62.5000 (65.0000) lr 3.1417e-05 eta 0:01:18 +epoch [48/50] batch [10/31] time 0.718 (0.806) data 0.000 (0.078) loss 0.9692 (1.3942) acc 78.1250 (67.8125) lr 3.1417e-05 eta 0:01:06 +epoch [48/50] batch [15/31] time 0.713 (0.780) data 0.000 (0.052) loss 1.2432 (1.3733) acc 62.5000 (66.0417) lr 3.1417e-05 eta 0:01:00 +epoch [48/50] batch [20/31] time 0.710 (0.764) data 0.000 (0.039) loss 1.7451 (1.3773) acc 56.2500 (66.0938) lr 3.1417e-05 eta 0:00:55 +epoch [48/50] batch [25/31] time 0.718 (0.755) data 0.000 (0.031) loss 0.8774 (1.3236) acc 78.1250 (68.5000) lr 3.1417e-05 eta 0:00:51 +epoch [48/50] batch [30/31] time 0.724 (0.749) data 0.000 (0.026) loss 1.2100 (1.3621) acc 75.0000 (68.0208) lr 3.1417e-05 eta 0:00:47 +epoch [49/50] batch [5/31] time 0.725 (0.889) data 0.000 (0.150) loss 1.5684 (1.2110) acc 68.7500 (70.6250) lr 1.7713e-05 eta 0:00:50 +epoch [49/50] batch [10/31] time 0.727 (0.804) data 0.000 (0.075) loss 0.9038 (1.2394) acc 87.5000 (70.3125) lr 1.7713e-05 eta 0:00:41 +epoch [49/50] batch [15/31] time 0.717 (0.782) data 0.000 (0.050) loss 1.8975 (1.2426) acc 62.5000 (72.2917) lr 1.7713e-05 eta 0:00:36 +epoch [49/50] batch [20/31] time 0.733 (0.768) data 0.000 (0.038) loss 1.8271 (1.3422) acc 53.1250 (68.4375) lr 1.7713e-05 eta 0:00:32 +epoch [49/50] batch [25/31] time 0.734 (0.760) data 0.000 (0.030) loss 0.9790 (1.3547) acc 68.7500 (68.2500) lr 1.7713e-05 eta 0:00:28 +epoch [49/50] batch [30/31] time 0.718 (0.754) data 0.000 (0.025) loss 2.4453 (1.3583) acc 53.1250 (67.6042) lr 1.7713e-05 eta 0:00:24 +epoch [50/50] batch [5/31] time 0.747 (0.910) data 0.000 (0.156) loss 1.4131 (1.3474) acc 65.6250 (67.5000) lr 7.8853e-06 eta 0:00:23 +epoch [50/50] batch [10/31] time 0.733 (0.823) data 0.000 (0.078) loss 1.1445 (1.3498) acc 71.8750 (67.1875) lr 7.8853e-06 eta 0:00:17 +epoch [50/50] batch [15/31] time 0.713 (0.796) data 0.001 (0.052) loss 1.5879 (1.3459) acc 71.8750 (67.7083) lr 7.8853e-06 eta 0:00:12 +epoch [50/50] batch [20/31] time 0.724 (0.777) data 0.000 (0.039) loss 1.2510 (1.3591) acc 65.6250 (67.3438) lr 7.8853e-06 eta 0:00:08 +epoch [50/50] batch [25/31] time 0.726 (0.768) data 0.000 (0.032) loss 1.1650 (1.3461) acc 59.3750 (67.1250) lr 7.8853e-06 eta 0:00:04 +epoch [50/50] batch [30/31] time 0.739 (0.762) data 0.000 (0.026) loss 1.1816 (1.3125) acc 78.1250 (67.9167) lr 7.8853e-06 eta 0:00:00 +Checkpoint saved to output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50 +Finish training +Deploy the last-epoch model +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 30,681 +* accuracy: 61.4% +* error: 38.6% +* macro_f1: 60.3% +Elapsed: 0:22:24 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/checkpoint b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/checkpoint new file mode 100644 index 00000000..a9d493d3 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/checkpoint @@ -0,0 +1 @@ +model.pth.tar-50 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50 new file mode 100644 index 00000000..1ca4bb2e Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed3/tensorboard/events.out.tfevents.1697832363.ckb-gpu-lambda.2133735.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed3/tensorboard/events.out.tfevents.1697832363.ckb-gpu-lambda.2133735.0 new file mode 100644 index 00000000..ce8ade94 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_b32_ep50_1shots/nctx16_cscFalse_ctpend/seed3/tensorboard/events.out.tfevents.1697832363.ckb-gpu-lambda.2133735.0 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed1/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed1/log.txt new file mode 100644 index 00000000..cdb1be0c --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed1/log.txt @@ -0,0 +1,582 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_l14_bestval_ep50.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '16'] +output_dir: output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed1 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 1 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 16 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-L/14 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 50 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0 +Clang version: Could not collect +CMake version: version 3.16.3 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-166-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: Tesla V100-PCIE-32GB +GPU 1: Tesla V100-PCIE-32GB + +Nvidia driver version: 470.223.02 +cuDNN version: /usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 46 bits physical, 48 bits virtual +CPU(s): 48 +On-line CPU(s) list: 0-47 +Thread(s) per core: 2 +Core(s) per socket: 12 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +CPU family: 6 +Model: 85 +Model name: Intel(R) Xeon(R) Silver 4116 CPU @ 2.10GHz +Stepping: 4 +CPU MHz: 800.077 +CPU max MHz: 3000.0000 +CPU min MHz: 800.0000 +BogoMIPS: 4200.00 +Virtualization: VT-x +L1d cache: 768 KiB +L1i cache: 768 KiB +L2 cache: 24 MiB +L3 cache: 33 MiB +NUMA node0 CPU(s): 0-11,24-35 +NUMA node1 CPU(s): 12-23,36-47 +Vulnerability Gather data sampling: Mitigation; Microcode +Vulnerability Itlb multihit: KVM: Mitigation: Split huge pages +Vulnerability L1tf: Mitigation; PTE Inversion; VMX conditional cache flushes, SMT vulnerable +Vulnerability Mds: Mitigation; Clear CPU buffers; SMT vulnerable +Vulnerability Meltdown: Mitigation; PTI +Vulnerability Mmio stale data: Mitigation; Clear CPU buffers; SMT vulnerable +Vulnerability Retbleed: Mitigation; IBRS +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; IBRS, IBPB conditional, STIBP conditional, RSB filling, PBRSB-eIBRS Not affected +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Mitigation; Clear CPU buffers; SMT vulnerable +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cdp_l3 invpcid_single pti intel_ppin ssbd mba ibrs ibpb stibp tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm mpx rdt_a avx512f avx512dq rdseed adx smap clflushopt clwb intel_pt avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts pku ospke md_clear flush_l1d arch_capabilities + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Loading preprocessed few-shot data from /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_16-seed_1.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 16,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-L/14) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed1/tensorboard) +epoch [1/50] batch [5/500] time 1.548 (2.654) data 0.000 (0.196) loss 2.5957 (3.2949) acc 37.5000 (35.6250) lr 1.0000e-05 eta 18:25:26 +epoch [1/50] batch [10/500] time 1.552 (2.102) data 0.000 (0.098) loss 2.7363 (3.0840) acc 43.7500 (40.0000) lr 1.0000e-05 eta 14:35:25 +epoch [1/50] batch [15/500] time 1.516 (1.918) data 0.000 (0.066) loss 2.2109 (2.7854) acc 50.0000 (44.5833) lr 1.0000e-05 eta 13:18:34 +epoch [1/50] batch [20/500] time 1.534 (1.824) data 0.001 (0.049) loss 2.5312 (2.6285) acc 50.0000 (48.4375) lr 1.0000e-05 eta 12:39:17 +epoch [1/50] batch [25/500] time 1.548 (1.767) data 0.000 (0.040) loss 1.8896 (2.5264) acc 56.2500 (49.1250) lr 1.0000e-05 eta 12:15:39 +epoch [1/50] batch [30/500] time 1.540 (1.730) data 0.000 (0.033) loss 1.5879 (2.4069) acc 65.6250 (51.1458) lr 1.0000e-05 eta 12:00:09 +epoch [1/50] batch [35/500] time 1.575 (1.706) data 0.000 (0.028) loss 1.3916 (2.3061) acc 59.3750 (51.7857) lr 1.0000e-05 eta 11:49:50 +epoch [1/50] batch [40/500] time 1.542 (1.687) data 0.001 (0.025) loss 2.0996 (2.2664) acc 56.2500 (52.4219) lr 1.0000e-05 eta 11:41:43 +epoch [1/50] batch [45/500] time 1.544 (1.673) data 0.000 (0.022) loss 2.7988 (2.2370) acc 43.7500 (52.7778) lr 1.0000e-05 eta 11:35:46 +epoch [1/50] batch [50/500] time 1.534 (1.662) data 0.000 (0.020) loss 1.8828 (2.1898) acc 59.3750 (53.0625) lr 1.0000e-05 eta 11:30:56 +epoch [1/50] batch [55/500] time 1.554 (1.652) data 0.000 (0.018) loss 1.3057 (2.1259) acc 78.1250 (54.3182) lr 1.0000e-05 eta 11:26:50 +epoch [1/50] batch [60/500] time 1.550 (1.643) data 0.000 (0.017) loss 1.0781 (2.0803) acc 68.7500 (54.7396) lr 1.0000e-05 eta 11:23:05 +epoch [1/50] batch [65/500] time 1.566 (1.637) data 0.000 (0.015) loss 1.3174 (2.0519) acc 59.3750 (54.9519) lr 1.0000e-05 eta 11:20:14 +epoch [1/50] batch [70/500] time 1.551 (1.632) data 0.000 (0.014) loss 2.0801 (2.0319) acc 62.5000 (55.1786) lr 1.0000e-05 eta 11:17:54 +epoch [1/50] batch [75/500] time 1.552 (1.626) data 0.000 (0.013) loss 1.3330 (1.9988) acc 65.6250 (55.7083) lr 1.0000e-05 eta 11:15:39 +epoch [1/50] batch [80/500] time 1.559 (1.623) data 0.000 (0.013) loss 1.0049 (1.9743) acc 68.7500 (56.4062) lr 1.0000e-05 eta 11:13:56 +epoch [1/50] batch [85/500] time 1.563 (1.619) data 0.000 (0.012) loss 1.5674 (1.9595) acc 71.8750 (56.8015) lr 1.0000e-05 eta 11:12:27 +epoch [1/50] batch [90/500] time 1.582 (1.617) data 0.000 (0.011) loss 2.5020 (1.9502) acc 50.0000 (56.7708) lr 1.0000e-05 eta 11:11:08 +epoch [1/50] batch [95/500] time 1.569 (1.614) data 0.000 (0.011) loss 1.5625 (1.9372) acc 65.6250 (57.2368) lr 1.0000e-05 eta 11:10:04 +epoch [1/50] batch [100/500] time 1.546 (1.612) data 0.001 (0.010) loss 1.6943 (1.9139) acc 62.5000 (57.6562) lr 1.0000e-05 eta 11:08:51 +epoch [1/50] batch [105/500] time 1.563 (1.609) data 0.000 (0.010) loss 1.8271 (1.9145) acc 68.7500 (57.5595) lr 1.0000e-05 eta 11:07:46 +epoch [1/50] batch [110/500] time 1.546 (1.607) data 0.000 (0.009) loss 1.5078 (1.8877) acc 68.7500 (58.3239) lr 1.0000e-05 eta 11:06:35 +epoch [1/50] batch [115/500] time 1.555 (1.605) data 0.000 (0.009) loss 1.9453 (1.8722) acc 53.1250 (58.5598) lr 1.0000e-05 eta 11:05:32 +epoch [1/50] batch [120/500] time 1.548 (1.603) data 0.000 (0.009) loss 2.4883 (1.8671) acc 40.6250 (58.5677) lr 1.0000e-05 eta 11:04:33 +epoch [1/50] batch [125/500] time 1.566 (1.601) data 0.000 (0.008) loss 2.1230 (1.8599) acc 46.8750 (58.7000) lr 1.0000e-05 eta 11:03:43 +epoch [1/50] batch [130/500] time 1.553 (1.599) data 0.000 (0.008) loss 1.8857 (1.8599) acc 62.5000 (58.6298) lr 1.0000e-05 eta 11:02:56 +epoch [1/50] batch [135/500] time 1.557 (1.598) data 0.000 (0.008) loss 1.8574 (1.8511) acc 56.2500 (58.7037) lr 1.0000e-05 eta 11:02:14 +epoch [1/50] batch [140/500] time 1.552 (1.597) data 0.000 (0.007) loss 1.6787 (1.8529) acc 56.2500 (58.5268) lr 1.0000e-05 eta 11:01:34 +epoch [1/50] batch [145/500] time 1.561 (1.595) data 0.000 (0.007) loss 1.5439 (1.8515) acc 62.5000 (58.4052) lr 1.0000e-05 eta 11:00:50 +epoch [1/50] batch [150/500] time 1.597 (1.594) data 0.000 (0.007) loss 2.3809 (1.8468) acc 43.7500 (58.5625) lr 1.0000e-05 eta 11:00:08 +epoch [1/50] batch [155/500] time 1.551 (1.593) data 0.000 (0.007) loss 1.3672 (1.8343) acc 59.3750 (58.6492) lr 1.0000e-05 eta 10:59:34 +epoch [1/50] batch [160/500] time 1.545 (1.593) data 0.000 (0.007) loss 0.8252 (1.8258) acc 81.2500 (58.7305) lr 1.0000e-05 eta 10:59:22 +epoch [1/50] batch [165/500] time 1.558 (1.591) data 0.000 (0.006) loss 1.6670 (1.8149) acc 53.1250 (58.8826) lr 1.0000e-05 eta 10:58:39 +epoch [1/50] batch [170/500] time 1.559 (1.590) data 0.000 (0.006) loss 1.0439 (1.8017) acc 71.8750 (59.1360) lr 1.0000e-05 eta 10:58:05 +epoch [1/50] batch [175/500] time 1.574 (1.589) data 0.000 (0.006) loss 1.9434 (1.7958) acc 59.3750 (59.1786) lr 1.0000e-05 eta 10:57:35 +epoch [1/50] batch [180/500] time 1.557 (1.589) data 0.001 (0.006) loss 1.8701 (1.7892) acc 59.3750 (59.1840) lr 1.0000e-05 eta 10:57:12 +epoch [1/50] batch [185/500] time 1.540 (1.588) data 0.000 (0.006) loss 1.3232 (1.7843) acc 65.6250 (59.2568) lr 1.0000e-05 eta 10:56:47 +epoch [1/50] batch [190/500] time 1.560 (1.587) data 0.000 (0.006) loss 1.3926 (1.7733) acc 68.7500 (59.5230) lr 1.0000e-05 eta 10:56:19 +epoch [1/50] batch [195/500] time 1.550 (1.586) data 0.001 (0.005) loss 1.6104 (1.7688) acc 59.3750 (59.6314) lr 1.0000e-05 eta 10:55:52 +epoch [1/50] batch [200/500] time 1.574 (1.586) data 0.000 (0.005) loss 1.1621 (1.7606) acc 68.7500 (59.7656) lr 1.0000e-05 eta 10:55:24 +epoch [1/50] batch [205/500] time 1.554 (1.586) data 0.000 (0.005) loss 1.0078 (1.7479) acc 68.7500 (59.9390) lr 1.0000e-05 eta 10:55:17 +epoch [1/50] batch [210/500] time 1.561 (1.585) data 0.001 (0.005) loss 1.6758 (1.7415) acc 68.7500 (60.1190) lr 1.0000e-05 eta 10:54:56 +epoch [1/50] batch [215/500] time 1.560 (1.585) data 0.001 (0.005) loss 1.3613 (1.7380) acc 75.0000 (60.2471) lr 1.0000e-05 eta 10:54:36 +epoch [1/50] batch [220/500] time 1.572 (1.584) data 0.000 (0.005) loss 1.2627 (1.7310) acc 65.6250 (60.4261) lr 1.0000e-05 eta 10:54:18 +epoch [1/50] batch [225/500] time 1.576 (1.584) data 0.000 (0.005) loss 2.0684 (1.7332) acc 59.3750 (60.4167) lr 1.0000e-05 eta 10:54:02 +epoch [1/50] batch [230/500] time 1.539 (1.583) data 0.000 (0.005) loss 1.6084 (1.7276) acc 56.2500 (60.4348) lr 1.0000e-05 eta 10:53:42 +epoch [1/50] batch [235/500] time 1.555 (1.583) data 0.000 (0.005) loss 1.5430 (1.7261) acc 62.5000 (60.4654) lr 1.0000e-05 eta 10:53:20 +epoch [1/50] batch [240/500] time 1.565 (1.582) data 0.000 (0.005) loss 1.5811 (1.7237) acc 65.6250 (60.5859) lr 1.0000e-05 eta 10:52:55 +epoch [1/50] batch [245/500] time 1.566 (1.582) data 0.000 (0.004) loss 1.4033 (1.7219) acc 68.7500 (60.6378) lr 1.0000e-05 eta 10:52:36 +epoch [1/50] batch [250/500] time 1.541 (1.581) data 0.000 (0.004) loss 1.4551 (1.7234) acc 68.7500 (60.6125) lr 1.0000e-05 eta 10:52:14 +epoch [1/50] batch [255/500] time 1.581 (1.581) data 0.000 (0.004) loss 1.5400 (1.7234) acc 59.3750 (60.6618) lr 1.0000e-05 eta 10:51:58 +epoch [1/50] batch [260/500] time 1.560 (1.581) data 0.000 (0.004) loss 2.3906 (1.7175) acc 53.1250 (60.7933) lr 1.0000e-05 eta 10:51:41 +epoch [1/50] batch [265/500] time 1.531 (1.580) data 0.000 (0.004) loss 1.7559 (1.7127) acc 65.6250 (60.9552) lr 1.0000e-05 eta 10:51:24 +epoch [1/50] batch [270/500] time 1.551 (1.579) data 0.000 (0.004) loss 1.3486 (1.7093) acc 65.6250 (61.0301) lr 1.0000e-05 eta 10:51:00 +epoch [1/50] batch [275/500] time 1.583 (1.579) data 0.000 (0.004) loss 1.8271 (1.7078) acc 71.8750 (61.0682) lr 1.0000e-05 eta 10:50:47 +epoch [1/50] batch [280/500] time 1.556 (1.579) data 0.000 (0.004) loss 2.8652 (1.7116) acc 46.8750 (61.0491) lr 1.0000e-05 eta 10:50:32 +epoch [1/50] batch [285/500] time 1.598 (1.579) data 0.000 (0.004) loss 1.1494 (1.7097) acc 75.0000 (61.1294) lr 1.0000e-05 eta 10:50:14 +epoch [1/50] batch [290/500] time 1.685 (1.580) data 0.000 (0.004) loss 2.1191 (1.7120) acc 59.3750 (61.1422) lr 1.0000e-05 eta 10:50:37 +epoch [1/50] batch [295/500] time 1.569 (1.580) data 0.000 (0.004) loss 2.1055 (1.7135) acc 46.8750 (61.1017) lr 1.0000e-05 eta 10:50:36 +epoch [1/50] batch [300/500] time 1.565 (1.580) data 0.000 (0.004) loss 1.2188 (1.7096) acc 68.7500 (61.1354) lr 1.0000e-05 eta 10:50:20 +epoch [1/50] batch [305/500] time 1.555 (1.580) data 0.000 (0.004) loss 1.2275 (1.7028) acc 68.7500 (61.2500) lr 1.0000e-05 eta 10:50:11 +epoch [1/50] batch [310/500] time 1.556 (1.580) data 0.000 (0.004) loss 1.5947 (1.6971) acc 65.6250 (61.3105) lr 1.0000e-05 eta 10:49:58 +epoch [1/50] batch [315/500] time 1.566 (1.579) data 0.000 (0.004) loss 2.0039 (1.6965) acc 59.3750 (61.3591) lr 1.0000e-05 eta 10:49:41 +epoch [1/50] batch [320/500] time 1.541 (1.579) data 0.000 (0.003) loss 1.1045 (1.6944) acc 78.1250 (61.4453) lr 1.0000e-05 eta 10:49:19 +epoch [1/50] batch [325/500] time 1.542 (1.578) data 0.000 (0.003) loss 1.3682 (1.6912) acc 71.8750 (61.5192) lr 1.0000e-05 eta 10:48:59 +epoch [1/50] batch [330/500] time 1.562 (1.578) data 0.000 (0.003) loss 2.4453 (1.6857) acc 46.8750 (61.6383) lr 1.0000e-05 eta 10:48:44 +epoch [1/50] batch [335/500] time 1.552 (1.577) data 0.000 (0.003) loss 1.8232 (1.6861) acc 65.6250 (61.6698) lr 1.0000e-05 eta 10:48:28 +epoch [1/50] batch [340/500] time 1.561 (1.577) data 0.000 (0.003) loss 0.9780 (1.6801) acc 78.1250 (61.7647) lr 1.0000e-05 eta 10:48:15 +epoch [1/50] batch [345/500] time 1.581 (1.577) data 0.000 (0.003) loss 1.6279 (1.6789) acc 59.3750 (61.7663) lr 1.0000e-05 eta 10:48:05 +epoch [1/50] batch [350/500] time 1.534 (1.577) data 0.000 (0.003) loss 1.7861 (1.6800) acc 65.6250 (61.8036) lr 1.0000e-05 eta 10:47:55 +epoch [1/50] batch [355/500] time 1.567 (1.577) data 0.000 (0.003) loss 1.4678 (1.6768) acc 62.5000 (61.8310) lr 1.0000e-05 eta 10:47:39 +epoch [1/50] batch [360/500] time 1.546 (1.577) data 0.000 (0.003) loss 1.8447 (1.6742) acc 56.2500 (61.8576) lr 1.0000e-05 eta 10:47:25 +epoch [1/50] batch [365/500] time 1.539 (1.576) data 0.000 (0.003) loss 1.0889 (1.6704) acc 68.7500 (61.8921) lr 1.0000e-05 eta 10:47:08 +epoch [1/50] batch [370/500] time 1.558 (1.576) data 0.000 (0.003) loss 1.4863 (1.6713) acc 59.3750 (61.8666) lr 1.0000e-05 eta 10:46:53 +epoch [1/50] batch [375/500] time 1.555 (1.576) data 0.000 (0.003) loss 1.3115 (1.6675) acc 65.6250 (61.9250) lr 1.0000e-05 eta 10:46:42 +epoch [1/50] batch [380/500] time 1.542 (1.575) data 0.000 (0.003) loss 1.2100 (1.6659) acc 71.8750 (61.8914) lr 1.0000e-05 eta 10:46:26 +epoch [1/50] batch [385/500] time 1.555 (1.575) data 0.000 (0.003) loss 1.1992 (1.6654) acc 62.5000 (61.9075) lr 1.0000e-05 eta 10:46:11 +epoch [1/50] batch [390/500] time 1.550 (1.575) data 0.000 (0.003) loss 2.3633 (1.6621) acc 53.1250 (62.0353) lr 1.0000e-05 eta 10:45:58 +epoch [1/50] batch [395/500] time 1.565 (1.575) data 0.000 (0.003) loss 0.9912 (1.6581) acc 75.0000 (62.1282) lr 1.0000e-05 eta 10:45:44 +epoch [1/50] batch [400/500] time 1.584 (1.575) data 0.000 (0.003) loss 1.9746 (1.6596) acc 46.8750 (62.0938) lr 1.0000e-05 eta 10:45:36 +epoch [1/50] batch [405/500] time 1.594 (1.575) data 0.000 (0.003) loss 1.0908 (1.6587) acc 68.7500 (62.0988) lr 1.0000e-05 eta 10:45:26 +epoch [1/50] batch [410/500] time 1.564 (1.574) data 0.000 (0.003) loss 1.1006 (1.6530) acc 75.0000 (62.2332) lr 1.0000e-05 eta 10:45:12 +epoch [1/50] batch [415/500] time 1.539 (1.574) data 0.000 (0.003) loss 1.3232 (1.6466) acc 65.6250 (62.3494) lr 1.0000e-05 eta 10:45:01 +epoch [1/50] batch [420/500] time 1.545 (1.574) data 0.000 (0.003) loss 2.1602 (1.6463) acc 56.2500 (62.3438) lr 1.0000e-05 eta 10:44:47 +epoch [1/50] batch [425/500] time 1.567 (1.574) data 0.000 (0.003) loss 1.2725 (1.6437) acc 75.0000 (62.3897) lr 1.0000e-05 eta 10:44:40 +epoch [1/50] batch [430/500] time 1.576 (1.574) data 0.000 (0.003) loss 1.1377 (1.6406) acc 53.1250 (62.4273) lr 1.0000e-05 eta 10:44:32 +epoch [1/50] batch [435/500] time 1.564 (1.574) data 0.000 (0.003) loss 2.1758 (1.6393) acc 59.3750 (62.4641) lr 1.0000e-05 eta 10:44:21 +epoch [1/50] batch [440/500] time 1.560 (1.574) data 0.000 (0.003) loss 1.5996 (1.6367) acc 71.8750 (62.5213) lr 1.0000e-05 eta 10:44:10 +epoch [1/50] batch [445/500] time 1.673 (1.574) data 0.000 (0.003) loss 1.3076 (1.6349) acc 71.8750 (62.5351) lr 1.0000e-05 eta 10:44:04 +epoch [1/50] batch [450/500] time 1.563 (1.574) data 0.000 (0.003) loss 2.0469 (1.6356) acc 68.7500 (62.5556) lr 1.0000e-05 eta 10:43:54 +epoch [1/50] batch [455/500] time 1.576 (1.574) data 0.000 (0.003) loss 2.0859 (1.6344) acc 56.2500 (62.5618) lr 1.0000e-05 eta 10:43:43 +epoch [1/50] batch [460/500] time 1.601 (1.574) data 0.000 (0.003) loss 1.6621 (1.6325) acc 71.8750 (62.6155) lr 1.0000e-05 eta 10:43:34 +epoch [1/50] batch [465/500] time 1.566 (1.573) data 0.000 (0.003) loss 1.7051 (1.6333) acc 65.6250 (62.6142) lr 1.0000e-05 eta 10:43:25 +epoch [1/50] batch [470/500] time 1.538 (1.573) data 0.000 (0.002) loss 1.7656 (1.6313) acc 71.8750 (62.6662) lr 1.0000e-05 eta 10:43:15 +epoch [1/50] batch [475/500] time 1.555 (1.573) data 0.001 (0.002) loss 1.2695 (1.6325) acc 62.5000 (62.6447) lr 1.0000e-05 eta 10:43:05 +epoch [1/50] batch [480/500] time 1.583 (1.573) data 0.000 (0.002) loss 1.6318 (1.6317) acc 62.5000 (62.6432) lr 1.0000e-05 eta 10:42:56 +epoch [1/50] batch [485/500] time 1.550 (1.573) data 0.001 (0.002) loss 1.5332 (1.6318) acc 53.1250 (62.6289) lr 1.0000e-05 eta 10:42:45 +epoch [1/50] batch [490/500] time 1.597 (1.573) data 0.000 (0.002) loss 1.7109 (1.6316) acc 62.5000 (62.6722) lr 1.0000e-05 eta 10:42:41 +epoch [1/50] batch [495/500] time 1.581 (1.573) data 0.000 (0.002) loss 1.1582 (1.6314) acc 75.0000 (62.6957) lr 1.0000e-05 eta 10:42:31 +epoch [1/50] batch [500/500] time 1.538 (1.573) data 0.000 (0.002) loss 1.7373 (1.6320) acc 59.3750 (62.6812) lr 2.0000e-03 eta 10:42:18 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,378 +* accuracy: 72.8% +* error: 27.2% +* macro_f1: 71.6% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model-best.pth.tar +epoch [2/50] batch [5/500] time 1.541 (1.700) data 0.000 (0.170) loss 1.3867 (1.4484) acc 68.7500 (68.1250) lr 2.0000e-03 eta 11:33:58 +epoch [2/50] batch [10/500] time 1.553 (1.625) data 0.001 (0.085) loss 1.6123 (1.6553) acc 75.0000 (65.3125) lr 2.0000e-03 eta 11:03:19 +epoch [2/50] batch [15/500] time 1.557 (1.605) data 0.001 (0.057) loss 1.6123 (1.6129) acc 59.3750 (63.9583) lr 2.0000e-03 eta 10:54:57 +epoch [2/50] batch [20/500] time 1.559 (1.594) data 0.001 (0.043) loss 1.9463 (1.5927) acc 50.0000 (63.1250) lr 2.0000e-03 eta 10:50:21 +epoch [2/50] batch [25/500] time 1.569 (1.588) data 0.001 (0.034) loss 1.4678 (1.5658) acc 59.3750 (62.8750) lr 2.0000e-03 eta 10:47:40 +epoch [2/50] batch [30/500] time 1.562 (1.584) data 0.000 (0.029) loss 1.5146 (1.5066) acc 62.5000 (63.7500) lr 2.0000e-03 eta 10:45:59 +epoch [2/50] batch [35/500] time 1.559 (1.581) data 0.000 (0.025) loss 1.6230 (1.4516) acc 65.6250 (65.0000) lr 2.0000e-03 eta 10:44:30 +epoch [2/50] batch [40/500] time 1.568 (1.578) data 0.000 (0.022) loss 1.9307 (1.4441) acc 62.5000 (65.4688) lr 2.0000e-03 eta 10:43:06 +epoch [2/50] batch [45/500] time 1.550 (1.575) data 0.000 (0.019) loss 1.2168 (1.4396) acc 71.8750 (65.1389) lr 2.0000e-03 eta 10:42:00 +epoch [2/50] batch [50/500] time 1.560 (1.573) data 0.000 (0.017) loss 1.5312 (1.4308) acc 65.6250 (65.3750) lr 2.0000e-03 eta 10:40:56 +epoch [2/50] batch [55/500] time 1.576 (1.572) data 0.001 (0.016) loss 1.3662 (1.4116) acc 59.3750 (65.6250) lr 2.0000e-03 eta 10:40:33 +epoch [2/50] batch [60/500] time 1.572 (1.571) data 0.000 (0.015) loss 1.2666 (1.3945) acc 65.6250 (65.8854) lr 2.0000e-03 eta 10:39:55 +epoch [2/50] batch [65/500] time 1.544 (1.569) data 0.000 (0.013) loss 1.3574 (1.4251) acc 65.6250 (65.1923) lr 2.0000e-03 eta 10:39:02 +epoch [2/50] batch [70/500] time 1.529 (1.568) data 0.000 (0.013) loss 1.7812 (1.4317) acc 62.5000 (65.3571) lr 2.0000e-03 eta 10:38:22 +epoch [2/50] batch [75/500] time 1.534 (1.567) data 0.001 (0.012) loss 1.1631 (1.4245) acc 65.6250 (65.5833) lr 2.0000e-03 eta 10:37:59 +epoch [2/50] batch [80/500] time 1.577 (1.566) data 0.000 (0.011) loss 1.2891 (1.4228) acc 78.1250 (65.9766) lr 2.0000e-03 eta 10:37:33 +epoch [2/50] batch [85/500] time 1.570 (1.566) data 0.000 (0.010) loss 0.7935 (1.4097) acc 84.3750 (66.0662) lr 2.0000e-03 eta 10:37:12 +epoch [2/50] batch [90/500] time 1.564 (1.565) data 0.001 (0.010) loss 1.0361 (1.4035) acc 75.0000 (66.1806) lr 2.0000e-03 eta 10:36:50 +epoch [2/50] batch [95/500] time 1.574 (1.565) data 0.000 (0.009) loss 1.7119 (1.4033) acc 59.3750 (66.0197) lr 2.0000e-03 eta 10:36:26 +epoch [2/50] batch [100/500] time 1.572 (1.565) data 0.001 (0.009) loss 0.5449 (1.3834) acc 90.6250 (66.4062) lr 2.0000e-03 eta 10:36:37 +epoch [2/50] batch [105/500] time 1.553 (1.565) data 0.001 (0.009) loss 0.6543 (1.3699) acc 75.0000 (66.5179) lr 2.0000e-03 eta 10:36:15 +epoch [2/50] batch [110/500] time 1.559 (1.565) data 0.000 (0.008) loss 2.1133 (1.3812) acc 53.1250 (66.5341) lr 2.0000e-03 eta 10:36:01 +epoch [2/50] batch [115/500] time 1.552 (1.564) data 0.000 (0.008) loss 1.4795 (1.3909) acc 71.8750 (66.4402) lr 2.0000e-03 eta 10:35:43 +epoch [2/50] batch [120/500] time 1.550 (1.563) data 0.000 (0.007) loss 1.0488 (1.3895) acc 75.0000 (66.4583) lr 2.0000e-03 eta 10:35:17 +epoch [2/50] batch [125/500] time 1.558 (1.563) data 0.000 (0.007) loss 1.5322 (1.3804) acc 59.3750 (66.5500) lr 2.0000e-03 eta 10:34:58 +epoch [2/50] batch [130/500] time 1.536 (1.563) data 0.000 (0.007) loss 1.3447 (1.3729) acc 68.7500 (66.6587) lr 2.0000e-03 eta 10:34:47 +epoch [2/50] batch [135/500] time 1.552 (1.563) data 0.000 (0.007) loss 1.1836 (1.3687) acc 65.6250 (66.6898) lr 2.0000e-03 eta 10:34:30 +epoch [2/50] batch [140/500] time 1.548 (1.562) data 0.000 (0.006) loss 1.6953 (1.3809) acc 59.3750 (66.5402) lr 2.0000e-03 eta 10:34:13 +epoch [2/50] batch [145/500] time 1.555 (1.563) data 0.001 (0.006) loss 1.3867 (1.3854) acc 59.3750 (66.3793) lr 2.0000e-03 eta 10:34:18 +epoch [2/50] batch [150/500] time 1.561 (1.562) data 0.000 (0.006) loss 1.5205 (1.3895) acc 56.2500 (66.2917) lr 2.0000e-03 eta 10:34:01 +epoch [2/50] batch [155/500] time 1.563 (1.562) data 0.000 (0.006) loss 1.0938 (1.3817) acc 62.5000 (66.4315) lr 2.0000e-03 eta 10:33:44 +epoch [2/50] batch [160/500] time 1.575 (1.562) data 0.000 (0.006) loss 1.6445 (1.3812) acc 59.3750 (66.4062) lr 2.0000e-03 eta 10:33:32 +epoch [2/50] batch [165/500] time 1.550 (1.562) data 0.000 (0.006) loss 1.8525 (1.3759) acc 59.3750 (66.6477) lr 2.0000e-03 eta 10:33:25 +epoch [2/50] batch [170/500] time 1.552 (1.561) data 0.001 (0.005) loss 1.5947 (1.3772) acc 56.2500 (66.6912) lr 2.0000e-03 eta 10:33:09 +epoch [2/50] batch [175/500] time 1.554 (1.561) data 0.000 (0.005) loss 1.7363 (1.3792) acc 62.5000 (66.5357) lr 2.0000e-03 eta 10:32:59 +epoch [2/50] batch [180/500] time 1.588 (1.562) data 0.001 (0.005) loss 1.5449 (1.3739) acc 71.8750 (66.7361) lr 2.0000e-03 eta 10:32:59 +epoch [2/50] batch [185/500] time 1.556 (1.561) data 0.000 (0.005) loss 1.0918 (1.3670) acc 71.8750 (66.7905) lr 2.0000e-03 eta 10:32:40 +epoch [2/50] batch [190/500] time 1.568 (1.561) data 0.000 (0.005) loss 1.3027 (1.3661) acc 78.1250 (66.7763) lr 2.0000e-03 eta 10:32:36 +epoch [2/50] batch [195/500] time 1.553 (1.561) data 0.000 (0.005) loss 1.3584 (1.3647) acc 68.7500 (66.7308) lr 2.0000e-03 eta 10:32:25 +epoch [2/50] batch [200/500] time 1.540 (1.561) data 0.000 (0.005) loss 2.5137 (1.3639) acc 50.0000 (66.7500) lr 2.0000e-03 eta 10:32:10 +epoch [2/50] batch [205/500] time 1.536 (1.561) data 0.000 (0.005) loss 1.8623 (1.3661) acc 50.0000 (66.7073) lr 2.0000e-03 eta 10:31:55 +epoch [2/50] batch [210/500] time 1.566 (1.561) data 0.000 (0.004) loss 1.2871 (1.3612) acc 65.6250 (66.7262) lr 2.0000e-03 eta 10:31:51 +epoch [2/50] batch [215/500] time 1.533 (1.561) data 0.000 (0.004) loss 0.6880 (1.3559) acc 87.5000 (66.8605) lr 2.0000e-03 eta 10:31:43 +epoch [2/50] batch [220/500] time 1.555 (1.560) data 0.000 (0.004) loss 1.2783 (1.3520) acc 62.5000 (66.9602) lr 2.0000e-03 eta 10:31:26 +epoch [2/50] batch [225/500] time 1.542 (1.561) data 0.000 (0.004) loss 1.0176 (1.3480) acc 71.8750 (67.0139) lr 2.0000e-03 eta 10:31:21 +epoch [2/50] batch [230/500] time 1.545 (1.560) data 0.000 (0.004) loss 1.1494 (1.3513) acc 71.8750 (66.9837) lr 2.0000e-03 eta 10:31:01 +epoch [2/50] batch [235/500] time 1.574 (1.560) data 0.000 (0.004) loss 1.3701 (1.3494) acc 71.8750 (67.1011) lr 2.0000e-03 eta 10:30:55 +epoch [2/50] batch [240/500] time 1.545 (1.560) data 0.000 (0.004) loss 1.2910 (1.3506) acc 62.5000 (66.9922) lr 2.0000e-03 eta 10:30:54 +epoch [2/50] batch [245/500] time 1.560 (1.561) data 0.001 (0.004) loss 2.0156 (1.3481) acc 43.7500 (67.0153) lr 2.0000e-03 eta 10:30:56 +epoch [2/50] batch [250/500] time 1.544 (1.561) data 0.000 (0.004) loss 0.8706 (1.3490) acc 81.2500 (66.9500) lr 2.0000e-03 eta 10:30:42 +epoch [2/50] batch [255/500] time 1.548 (1.560) data 0.000 (0.004) loss 1.1543 (1.3473) acc 65.6250 (66.9730) lr 2.0000e-03 eta 10:30:23 +epoch [2/50] batch [260/500] time 1.567 (1.560) data 0.000 (0.004) loss 1.2666 (1.3454) acc 65.6250 (66.9231) lr 2.0000e-03 eta 10:30:15 +epoch [2/50] batch [265/500] time 1.562 (1.560) data 0.000 (0.004) loss 1.6523 (1.3472) acc 59.3750 (66.8868) lr 2.0000e-03 eta 10:30:04 +epoch [2/50] batch [270/500] time 1.555 (1.560) data 0.000 (0.004) loss 1.4375 (1.3450) acc 59.3750 (66.9213) lr 2.0000e-03 eta 10:29:55 +epoch [2/50] batch [275/500] time 1.568 (1.560) data 0.000 (0.003) loss 1.0410 (1.3423) acc 78.1250 (67.0227) lr 2.0000e-03 eta 10:29:45 +epoch [2/50] batch [280/500] time 1.553 (1.560) data 0.000 (0.003) loss 1.3076 (1.3415) acc 68.7500 (66.9866) lr 2.0000e-03 eta 10:29:40 +epoch [2/50] batch [285/500] time 1.548 (1.560) data 0.000 (0.003) loss 1.1865 (1.3402) acc 71.8750 (67.0395) lr 2.0000e-03 eta 10:29:28 +epoch [2/50] batch [290/500] time 1.549 (1.560) data 0.000 (0.003) loss 0.7808 (1.3359) acc 81.2500 (67.1336) lr 2.0000e-03 eta 10:29:20 +epoch [2/50] batch [295/500] time 1.552 (1.560) data 0.000 (0.003) loss 1.2275 (1.3338) acc 71.8750 (67.1928) lr 2.0000e-03 eta 10:29:08 +epoch [2/50] batch [300/500] time 1.574 (1.559) data 0.000 (0.003) loss 1.1143 (1.3328) acc 65.6250 (67.1667) lr 2.0000e-03 eta 10:28:56 +epoch [2/50] batch [305/500] time 1.534 (1.559) data 0.001 (0.003) loss 1.2148 (1.3365) acc 75.0000 (67.1721) lr 2.0000e-03 eta 10:28:44 +epoch [2/50] batch [310/500] time 1.527 (1.559) data 0.000 (0.003) loss 1.4824 (1.3344) acc 65.6250 (67.2581) lr 2.0000e-03 eta 10:28:32 +epoch [2/50] batch [315/500] time 1.548 (1.559) data 0.000 (0.003) loss 1.0566 (1.3325) acc 65.6250 (67.2718) lr 2.0000e-03 eta 10:28:23 +epoch [2/50] batch [320/500] time 1.589 (1.559) data 0.001 (0.003) loss 1.0850 (1.3319) acc 71.8750 (67.2754) lr 2.0000e-03 eta 10:28:16 +epoch [2/50] batch [325/500] time 1.572 (1.559) data 0.000 (0.003) loss 2.1191 (1.3308) acc 56.2500 (67.3365) lr 2.0000e-03 eta 10:28:07 +epoch [2/50] batch [330/500] time 1.578 (1.559) data 0.000 (0.003) loss 1.5439 (1.3299) acc 71.8750 (67.3864) lr 2.0000e-03 eta 10:27:58 +epoch [2/50] batch [335/500] time 1.548 (1.559) data 0.000 (0.003) loss 1.3525 (1.3271) acc 78.1250 (67.4627) lr 2.0000e-03 eta 10:27:49 +epoch [2/50] batch [340/500] time 1.560 (1.559) data 0.000 (0.003) loss 0.9536 (1.3258) acc 81.2500 (67.5368) lr 2.0000e-03 eta 10:27:44 +epoch [2/50] batch [345/500] time 1.562 (1.559) data 0.000 (0.003) loss 1.1748 (1.3237) acc 71.8750 (67.5815) lr 2.0000e-03 eta 10:27:37 +epoch [2/50] batch [350/500] time 1.572 (1.559) data 0.000 (0.003) loss 1.5605 (1.3274) acc 62.5000 (67.5536) lr 2.0000e-03 eta 10:27:30 +epoch [2/50] batch [355/500] time 1.547 (1.559) data 0.001 (0.003) loss 1.6318 (1.3277) acc 65.6250 (67.5880) lr 2.0000e-03 eta 10:27:24 +epoch [2/50] batch [360/500] time 1.544 (1.559) data 0.000 (0.003) loss 0.8418 (1.3256) acc 81.2500 (67.6562) lr 2.0000e-03 eta 10:27:11 +epoch [2/50] batch [365/500] time 1.566 (1.559) data 0.000 (0.003) loss 1.0469 (1.3255) acc 78.1250 (67.6969) lr 2.0000e-03 eta 10:27:05 +epoch [2/50] batch [370/500] time 1.556 (1.559) data 0.000 (0.003) loss 1.4307 (1.3218) acc 71.8750 (67.8463) lr 2.0000e-03 eta 10:26:54 +epoch [2/50] batch [375/500] time 1.573 (1.559) data 0.000 (0.003) loss 0.7222 (1.3212) acc 81.2500 (67.8667) lr 2.0000e-03 eta 10:26:46 +epoch [2/50] batch [380/500] time 1.606 (1.559) data 0.001 (0.003) loss 0.8188 (1.3172) acc 81.2500 (67.9770) lr 2.0000e-03 eta 10:26:39 +epoch [2/50] batch [385/500] time 1.988 (1.562) data 0.001 (0.003) loss 1.2891 (1.3153) acc 68.7500 (68.0276) lr 2.0000e-03 eta 10:27:49 +epoch [2/50] batch [390/500] time 1.712 (1.564) data 0.001 (0.003) loss 1.3574 (1.3195) acc 68.7500 (67.9567) lr 2.0000e-03 eta 10:28:39 +epoch [2/50] batch [395/500] time 1.775 (1.566) data 0.000 (0.003) loss 1.4707 (1.3208) acc 62.5000 (67.9193) lr 2.0000e-03 eta 10:29:17 +epoch [2/50] batch [400/500] time 1.768 (1.569) data 0.002 (0.003) loss 1.4375 (1.3192) acc 59.3750 (67.8984) lr 2.0000e-03 eta 10:30:07 +epoch [2/50] batch [405/500] time 1.782 (1.571) data 0.001 (0.003) loss 0.9619 (1.3185) acc 81.2500 (67.8935) lr 2.0000e-03 eta 10:30:46 +epoch [2/50] batch [410/500] time 1.741 (1.573) data 0.000 (0.003) loss 1.0957 (1.3163) acc 75.0000 (67.9345) lr 2.0000e-03 eta 10:31:28 +epoch [2/50] batch [415/500] time 1.827 (1.575) data 0.000 (0.003) loss 1.0439 (1.3161) acc 75.0000 (67.9066) lr 2.0000e-03 eta 10:32:10 +epoch [2/50] batch [420/500] time 1.888 (1.577) data 0.001 (0.002) loss 1.0957 (1.3136) acc 71.8750 (67.9762) lr 2.0000e-03 eta 10:33:00 +epoch [2/50] batch [425/500] time 1.736 (1.579) data 0.002 (0.002) loss 0.9883 (1.3155) acc 84.3750 (67.9706) lr 2.0000e-03 eta 10:33:34 +epoch [2/50] batch [430/500] time 1.678 (1.581) data 0.001 (0.002) loss 0.6963 (1.3140) acc 81.2500 (68.0451) lr 2.0000e-03 eta 10:34:21 +epoch [2/50] batch [435/500] time 1.574 (1.583) data 0.001 (0.002) loss 0.8965 (1.3135) acc 84.3750 (68.0675) lr 2.0000e-03 eta 10:34:49 +epoch [2/50] batch [440/500] time 1.757 (1.585) data 0.001 (0.002) loss 1.2578 (1.3147) acc 71.8750 (68.0398) lr 2.0000e-03 eta 10:35:24 +epoch [2/50] batch [445/500] time 1.778 (1.586) data 0.001 (0.002) loss 1.0576 (1.3137) acc 68.7500 (68.0548) lr 2.0000e-03 eta 10:36:01 +epoch [2/50] batch [450/500] time 1.738 (1.588) data 0.001 (0.002) loss 1.2119 (1.3132) acc 78.1250 (68.0764) lr 2.0000e-03 eta 10:36:43 +epoch [2/50] batch [455/500] time 1.719 (1.590) data 0.001 (0.002) loss 1.1318 (1.3164) acc 71.8750 (68.0563) lr 2.0000e-03 eta 10:37:10 +epoch [2/50] batch [460/500] time 1.766 (1.592) data 0.001 (0.002) loss 1.4941 (1.3176) acc 68.7500 (67.9688) lr 2.0000e-03 eta 10:37:46 +epoch [2/50] batch [465/500] time 1.814 (1.593) data 0.001 (0.002) loss 0.7646 (1.3186) acc 78.1250 (67.9167) lr 2.0000e-03 eta 10:38:14 +epoch [2/50] batch [470/500] time 1.616 (1.594) data 0.015 (0.002) loss 1.0859 (1.3178) acc 65.6250 (67.9521) lr 2.0000e-03 eta 10:38:29 +epoch [2/50] batch [475/500] time 1.674 (1.596) data 0.001 (0.002) loss 1.3477 (1.3176) acc 59.3750 (67.9276) lr 2.0000e-03 eta 10:38:55 +epoch [2/50] batch [480/500] time 1.605 (1.597) data 0.001 (0.002) loss 1.2148 (1.3185) acc 59.3750 (67.8971) lr 2.0000e-03 eta 10:39:12 +epoch [2/50] batch [485/500] time 1.741 (1.598) data 0.002 (0.002) loss 1.1455 (1.3175) acc 62.5000 (67.8479) lr 2.0000e-03 eta 10:39:36 +epoch [2/50] batch [490/500] time 1.776 (1.599) data 0.000 (0.002) loss 1.1348 (1.3155) acc 68.7500 (67.9018) lr 2.0000e-03 eta 10:39:49 +epoch [2/50] batch [495/500] time 1.683 (1.600) data 0.001 (0.002) loss 1.6367 (1.3167) acc 65.6250 (67.8598) lr 2.0000e-03 eta 10:40:00 +epoch [2/50] batch [500/500] time 1.616 (1.601) data 0.001 (0.002) loss 1.0479 (1.3164) acc 71.8750 (67.8812) lr 1.9980e-03 eta 10:40:13 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,397 +* accuracy: 76.8% +* error: 23.2% +* macro_f1: 76.1% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model-best.pth.tar +epoch [3/50] batch [5/500] time 1.573 (1.680) data 0.000 (0.162) loss 0.9067 (1.1451) acc 71.8750 (71.2500) lr 1.9980e-03 eta 11:11:39 +epoch [3/50] batch [10/500] time 1.551 (1.617) data 0.001 (0.081) loss 1.4072 (1.1780) acc 56.2500 (69.0625) lr 1.9980e-03 eta 10:46:36 +epoch [3/50] batch [15/500] time 1.554 (1.594) data 0.000 (0.054) loss 1.2217 (1.1547) acc 71.8750 (70.2083) lr 1.9980e-03 eta 10:37:22 +epoch [3/50] batch [20/500] time 1.544 (1.590) data 0.000 (0.041) loss 1.1904 (1.1639) acc 68.7500 (70.0000) lr 1.9980e-03 eta 10:35:17 +epoch [3/50] batch [25/500] time 1.562 (1.585) data 0.001 (0.033) loss 0.8750 (1.1539) acc 81.2500 (70.7500) lr 1.9980e-03 eta 10:33:24 +epoch [3/50] batch [30/500] time 1.564 (1.584) data 0.001 (0.027) loss 1.3848 (1.1704) acc 62.5000 (70.3125) lr 1.9980e-03 eta 10:32:44 +epoch [3/50] batch [35/500] time 1.574 (1.586) data 0.001 (0.024) loss 1.4150 (1.1980) acc 62.5000 (69.8214) lr 1.9980e-03 eta 10:33:34 +epoch [3/50] batch [40/500] time 1.577 (1.584) data 0.001 (0.021) loss 1.6670 (1.2277) acc 71.8750 (69.2969) lr 1.9980e-03 eta 10:32:41 +epoch [3/50] batch [45/500] time 1.548 (1.581) data 0.000 (0.018) loss 1.1328 (1.2451) acc 75.0000 (69.1667) lr 1.9980e-03 eta 10:31:13 +epoch [3/50] batch [50/500] time 1.538 (1.579) data 0.001 (0.017) loss 1.4072 (1.2453) acc 59.3750 (68.4375) lr 1.9980e-03 eta 10:30:22 +epoch [3/50] batch [55/500] time 1.556 (1.577) data 0.000 (0.015) loss 1.3545 (1.2553) acc 68.7500 (68.3523) lr 1.9980e-03 eta 10:29:20 +epoch [3/50] batch [60/500] time 1.541 (1.575) data 0.000 (0.014) loss 1.4473 (1.2722) acc 59.3750 (68.3854) lr 1.9980e-03 eta 10:28:28 +epoch [3/50] batch [65/500] time 1.580 (1.574) data 0.001 (0.013) loss 1.4658 (1.2867) acc 65.6250 (68.1731) lr 1.9980e-03 eta 10:27:55 +epoch [3/50] batch [70/500] time 1.526 (1.573) data 0.000 (0.012) loss 1.1016 (1.2943) acc 75.0000 (68.3036) lr 1.9980e-03 eta 10:27:21 +epoch [3/50] batch [75/500] time 1.569 (1.572) data 0.001 (0.011) loss 1.6680 (1.3023) acc 50.0000 (68.0833) lr 1.9980e-03 eta 10:26:41 +epoch [3/50] batch [80/500] time 1.558 (1.572) data 0.000 (0.011) loss 1.0234 (1.3105) acc 71.8750 (68.1250) lr 1.9980e-03 eta 10:26:30 +epoch [3/50] batch [85/500] time 1.534 (1.571) data 0.000 (0.010) loss 1.0527 (1.3199) acc 62.5000 (67.9412) lr 1.9980e-03 eta 10:26:01 +epoch [3/50] batch [90/500] time 1.580 (1.571) data 0.000 (0.009) loss 0.6431 (1.3157) acc 75.0000 (68.0903) lr 1.9980e-03 eta 10:25:59 +epoch [3/50] batch [95/500] time 1.591 (1.571) data 0.000 (0.009) loss 1.6377 (1.3152) acc 59.3750 (68.0592) lr 1.9980e-03 eta 10:26:00 +epoch [3/50] batch [100/500] time 1.554 (1.571) data 0.000 (0.009) loss 1.7949 (1.3186) acc 62.5000 (68.0625) lr 1.9980e-03 eta 10:25:37 +epoch [3/50] batch [105/500] time 1.572 (1.570) data 0.000 (0.008) loss 1.1875 (1.3059) acc 68.7500 (68.4226) lr 1.9980e-03 eta 10:25:17 +epoch [3/50] batch [110/500] time 1.568 (1.570) data 0.000 (0.008) loss 0.9106 (1.2969) acc 75.0000 (68.6648) lr 1.9980e-03 eta 10:25:06 +epoch [3/50] batch [115/500] time 1.560 (1.570) data 0.000 (0.007) loss 1.0967 (1.2977) acc 78.1250 (68.6413) lr 1.9980e-03 eta 10:24:56 +epoch [3/50] batch [120/500] time 1.529 (1.569) data 0.000 (0.007) loss 0.8037 (1.2939) acc 78.1250 (68.7500) lr 1.9980e-03 eta 10:24:29 +epoch [3/50] batch [125/500] time 1.563 (1.569) data 0.000 (0.007) loss 0.9609 (1.3017) acc 68.7500 (68.5000) lr 1.9980e-03 eta 10:24:18 +epoch [3/50] batch [130/500] time 1.707 (1.573) data 0.000 (0.007) loss 0.7139 (1.2872) acc 87.5000 (68.8702) lr 1.9980e-03 eta 10:25:41 +epoch [3/50] batch [135/500] time 1.583 (1.574) data 0.001 (0.006) loss 1.1924 (1.2873) acc 78.1250 (68.9352) lr 1.9980e-03 eta 10:26:08 +epoch [3/50] batch [140/500] time 1.563 (1.574) data 0.001 (0.006) loss 1.3086 (1.2778) acc 68.7500 (69.1518) lr 1.9980e-03 eta 10:25:47 +epoch [3/50] batch [145/500] time 1.571 (1.573) data 0.000 (0.006) loss 1.1006 (1.2739) acc 62.5000 (69.1595) lr 1.9980e-03 eta 10:25:33 +epoch [3/50] batch [150/500] time 1.564 (1.573) data 0.001 (0.006) loss 1.4229 (1.2761) acc 59.3750 (69.0417) lr 1.9980e-03 eta 10:25:25 +epoch [3/50] batch [155/500] time 1.569 (1.573) data 0.000 (0.006) loss 0.9771 (1.2671) acc 68.7500 (69.2137) lr 1.9980e-03 eta 10:25:09 +epoch [3/50] batch [160/500] time 1.569 (1.573) data 0.001 (0.006) loss 0.6021 (1.2571) acc 71.8750 (69.3555) lr 1.9980e-03 eta 10:24:57 +epoch [3/50] batch [165/500] time 1.593 (1.573) data 0.000 (0.005) loss 0.8345 (1.2536) acc 81.2500 (69.5265) lr 1.9980e-03 eta 10:24:44 +epoch [3/50] batch [170/500] time 1.562 (1.572) data 0.000 (0.005) loss 1.3096 (1.2554) acc 65.6250 (69.4301) lr 1.9980e-03 eta 10:24:32 +epoch [3/50] batch [175/500] time 1.572 (1.573) data 0.000 (0.005) loss 1.0371 (1.2549) acc 78.1250 (69.3571) lr 1.9980e-03 eta 10:24:32 +epoch [3/50] batch [180/500] time 1.559 (1.572) data 0.000 (0.005) loss 1.2119 (1.2604) acc 78.1250 (69.2361) lr 1.9980e-03 eta 10:24:13 +epoch [3/50] batch [185/500] time 1.551 (1.572) data 0.001 (0.005) loss 1.6846 (1.2541) acc 56.2500 (69.3412) lr 1.9980e-03 eta 10:24:06 +epoch [3/50] batch [190/500] time 1.579 (1.572) data 0.000 (0.005) loss 0.9927 (1.2481) acc 84.3750 (69.4572) lr 1.9980e-03 eta 10:23:52 +epoch [3/50] batch [195/500] time 1.555 (1.572) data 0.001 (0.005) loss 0.8354 (1.2516) acc 75.0000 (69.3590) lr 1.9980e-03 eta 10:23:33 +epoch [3/50] batch [200/500] time 1.550 (1.572) data 0.000 (0.005) loss 1.9736 (1.2583) acc 43.7500 (69.1875) lr 1.9980e-03 eta 10:23:22 +epoch [3/50] batch [205/500] time 1.553 (1.571) data 0.000 (0.004) loss 0.6851 (1.2489) acc 90.6250 (69.4970) lr 1.9980e-03 eta 10:23:03 +epoch [3/50] batch [210/500] time 1.547 (1.571) data 0.001 (0.004) loss 1.2510 (1.2436) acc 71.8750 (69.7470) lr 1.9980e-03 eta 10:22:47 +epoch [3/50] batch [215/500] time 1.581 (1.570) data 0.000 (0.004) loss 0.9111 (1.2438) acc 78.1250 (69.7820) lr 1.9980e-03 eta 10:22:33 +epoch [3/50] batch [220/500] time 1.607 (1.571) data 0.000 (0.004) loss 1.3145 (1.2434) acc 75.0000 (69.8438) lr 1.9980e-03 eta 10:22:38 +epoch [3/50] batch [225/500] time 1.629 (1.573) data 0.001 (0.004) loss 1.7656 (1.2480) acc 53.1250 (69.7083) lr 1.9980e-03 eta 10:23:13 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint new file mode 100644 index 00000000..22cb2ffb --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint @@ -0,0 +1 @@ +model-best.pth.tar diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model-best.pth.tar b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model-best.pth.tar new file mode 100644 index 00000000..acd01576 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model-best.pth.tar differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1699474665.ckb-gpu-v.mitre.org.3629295.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1699474665.ckb-gpu-v.mitre.org.3629295.0 new file mode 100644 index 00000000..d18a0451 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1699474665.ckb-gpu-v.mitre.org.3629295.0 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/log.txt new file mode 100644 index 00000000..f1ae6541 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/log.txt @@ -0,0 +1,5697 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_l14_bestval_ep50.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '16'] +output_dir: output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 2 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 16 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-L/14 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 50 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0 +Clang version: Could not collect +CMake version: version 3.16.3 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-166-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: Tesla V100-PCIE-32GB +GPU 1: Tesla V100-PCIE-32GB + +Nvidia driver version: 470.223.02 +cuDNN version: /usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 46 bits physical, 48 bits virtual +CPU(s): 48 +On-line CPU(s) list: 0-47 +Thread(s) per core: 2 +Core(s) per socket: 12 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +CPU family: 6 +Model: 85 +Model name: Intel(R) Xeon(R) Silver 4116 CPU @ 2.10GHz +Stepping: 4 +CPU MHz: 800.579 +CPU max MHz: 3000.0000 +CPU min MHz: 800.0000 +BogoMIPS: 4200.00 +Virtualization: VT-x +L1d cache: 768 KiB +L1i cache: 768 KiB +L2 cache: 24 MiB +L3 cache: 33 MiB +NUMA node0 CPU(s): 0-11,24-35 +NUMA node1 CPU(s): 12-23,36-47 +Vulnerability Gather data sampling: Mitigation; Microcode +Vulnerability Itlb multihit: KVM: Mitigation: Split huge pages +Vulnerability L1tf: Mitigation; PTE Inversion; VMX conditional cache flushes, SMT vulnerable +Vulnerability Mds: Mitigation; Clear CPU buffers; SMT vulnerable +Vulnerability Meltdown: Mitigation; PTI +Vulnerability Mmio stale data: Mitigation; Clear CPU buffers; SMT vulnerable +Vulnerability Retbleed: Mitigation; IBRS +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; IBRS, IBPB conditional, STIBP conditional, RSB filling, PBRSB-eIBRS Not affected +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Mitigation; Clear CPU buffers; SMT vulnerable +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cdp_l3 invpcid_single pti intel_ppin ssbd mba ibrs ibpb stibp tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm mpx rdt_a avx512f avx512dq rdseed adx smap clflushopt clwb intel_pt avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts pku ospke md_clear flush_l1d arch_capabilities + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Loading preprocessed few-shot data from /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_16-seed_2.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 16,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-L/14) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/tensorboard) +epoch [1/50] batch [5/500] time 1.526 (2.729) data 0.000 (0.198) loss 2.6680 (3.0004) acc 46.8750 (43.1250) lr 1.0000e-05 eta 18:56:52 +epoch [1/50] batch [10/500] time 1.572 (2.149) data 0.001 (0.099) loss 2.1602 (2.6768) acc 53.1250 (46.8750) lr 1.0000e-05 eta 14:54:53 +epoch [1/50] batch [15/500] time 1.655 (1.964) data 0.000 (0.066) loss 2.3926 (2.6326) acc 53.1250 (47.0833) lr 1.0000e-05 eta 13:37:47 +epoch [1/50] batch [20/500] time 1.600 (1.880) data 0.000 (0.050) loss 2.6055 (2.5407) acc 40.6250 (48.1250) lr 1.0000e-05 eta 13:02:32 +epoch [1/50] batch [25/500] time 1.661 (1.826) data 0.000 (0.040) loss 2.2500 (2.4505) acc 46.8750 (49.3750) lr 1.0000e-05 eta 12:39:59 +epoch [1/50] batch [30/500] time 1.566 (1.781) data 0.000 (0.033) loss 1.9404 (2.4199) acc 56.2500 (49.6875) lr 1.0000e-05 eta 12:21:15 +epoch [1/50] batch [35/500] time 1.553 (1.749) data 0.000 (0.029) loss 1.7510 (2.3313) acc 56.2500 (51.0714) lr 1.0000e-05 eta 12:07:55 +epoch [1/50] batch [40/500] time 1.572 (1.727) data 0.000 (0.025) loss 2.2148 (2.2764) acc 50.0000 (51.7188) lr 1.0000e-05 eta 11:58:27 +epoch [1/50] batch [45/500] time 1.547 (1.710) data 0.000 (0.023) loss 1.5869 (2.2606) acc 59.3750 (52.1528) lr 1.0000e-05 eta 11:51:04 +epoch [1/50] batch [50/500] time 1.555 (1.695) data 0.000 (0.020) loss 1.3623 (2.2209) acc 71.8750 (53.0000) lr 1.0000e-05 eta 11:44:40 +epoch [1/50] batch [55/500] time 1.582 (1.683) data 0.001 (0.019) loss 2.5352 (2.1800) acc 40.6250 (53.6932) lr 1.0000e-05 eta 11:39:34 +epoch [1/50] batch [60/500] time 1.559 (1.673) data 0.001 (0.017) loss 1.6846 (2.1436) acc 59.3750 (54.3750) lr 1.0000e-05 eta 11:35:16 +epoch [1/50] batch [65/500] time 1.546 (1.665) data 0.001 (0.016) loss 1.3691 (2.1060) acc 68.7500 (55.0962) lr 1.0000e-05 eta 11:31:46 +epoch [1/50] batch [70/500] time 1.592 (1.658) data 0.000 (0.015) loss 1.3213 (2.0912) acc 68.7500 (55.4464) lr 1.0000e-05 eta 11:28:50 +epoch [1/50] batch [75/500] time 1.555 (1.651) data 0.000 (0.014) loss 1.7529 (2.0628) acc 56.2500 (55.6667) lr 1.0000e-05 eta 11:25:57 +epoch [1/50] batch [80/500] time 1.640 (1.648) data 0.000 (0.013) loss 1.4932 (2.0367) acc 68.7500 (56.3281) lr 1.0000e-05 eta 11:24:32 +epoch [1/50] batch [85/500] time 1.695 (1.651) data 0.000 (0.012) loss 2.0371 (2.0133) acc 40.6250 (56.4706) lr 1.0000e-05 eta 11:25:24 +epoch [1/50] batch [90/500] time 1.617 (1.653) data 0.001 (0.011) loss 1.4316 (2.0048) acc 59.3750 (56.3194) lr 1.0000e-05 eta 11:26:11 +epoch [1/50] batch [95/500] time 1.571 (1.649) data 0.000 (0.011) loss 0.9624 (1.9725) acc 75.0000 (56.8092) lr 1.0000e-05 eta 11:24:25 +epoch [1/50] batch [100/500] time 1.569 (1.645) data 0.000 (0.010) loss 1.8203 (1.9715) acc 59.3750 (56.9688) lr 1.0000e-05 eta 11:22:40 +epoch [1/50] batch [105/500] time 1.574 (1.641) data 0.000 (0.010) loss 1.8516 (1.9651) acc 59.3750 (57.3214) lr 1.0000e-05 eta 11:20:56 +epoch [1/50] batch [110/500] time 1.551 (1.638) data 0.000 (0.009) loss 1.3242 (1.9543) acc 75.0000 (57.5000) lr 1.0000e-05 eta 11:19:20 +epoch [1/50] batch [115/500] time 1.568 (1.634) data 0.000 (0.009) loss 2.3379 (1.9418) acc 56.2500 (57.6902) lr 1.0000e-05 eta 11:17:36 +epoch [1/50] batch [120/500] time 1.537 (1.631) data 0.000 (0.009) loss 2.0215 (1.9298) acc 46.8750 (57.9688) lr 1.0000e-05 eta 11:16:11 +epoch [1/50] batch [125/500] time 1.532 (1.628) data 0.000 (0.008) loss 1.2373 (1.9107) acc 71.8750 (58.3500) lr 1.0000e-05 eta 11:14:47 +epoch [1/50] batch [130/500] time 1.547 (1.625) data 0.001 (0.008) loss 1.9961 (1.9038) acc 59.3750 (58.5096) lr 1.0000e-05 eta 11:13:33 +epoch [1/50] batch [135/500] time 1.559 (1.623) data 0.001 (0.008) loss 1.7256 (1.8903) acc 56.2500 (58.7269) lr 1.0000e-05 eta 11:12:38 +epoch [1/50] batch [140/500] time 1.551 (1.621) data 0.001 (0.008) loss 2.3574 (1.8775) acc 53.1250 (58.8839) lr 1.0000e-05 eta 11:11:33 +epoch [1/50] batch [145/500] time 1.556 (1.619) data 0.001 (0.007) loss 1.8555 (1.8715) acc 56.2500 (58.8362) lr 1.0000e-05 eta 11:10:35 +epoch [1/50] batch [150/500] time 1.564 (1.617) data 0.001 (0.007) loss 1.9990 (1.8628) acc 53.1250 (59.1458) lr 1.0000e-05 eta 11:09:39 +epoch [1/50] batch [155/500] time 1.595 (1.615) data 0.001 (0.007) loss 2.0996 (1.8558) acc 59.3750 (59.3347) lr 1.0000e-05 eta 11:08:50 +epoch [1/50] batch [160/500] time 1.559 (1.613) data 0.001 (0.007) loss 1.4707 (1.8541) acc 56.2500 (59.1797) lr 1.0000e-05 eta 11:07:56 +epoch [1/50] batch [165/500] time 1.552 (1.611) data 0.001 (0.006) loss 1.7148 (1.8492) acc 53.1250 (59.1856) lr 1.0000e-05 eta 11:06:58 +epoch [1/50] batch [170/500] time 1.591 (1.611) data 0.000 (0.006) loss 1.7578 (1.8375) acc 62.5000 (59.3566) lr 1.0000e-05 eta 11:06:35 +epoch [1/50] batch [175/500] time 1.562 (1.610) data 0.000 (0.006) loss 2.0742 (1.8386) acc 59.3750 (59.4107) lr 1.0000e-05 eta 11:05:59 +epoch [1/50] batch [180/500] time 1.557 (1.608) data 0.001 (0.006) loss 0.9971 (1.8301) acc 75.0000 (59.5486) lr 1.0000e-05 eta 11:05:20 +epoch [1/50] batch [185/500] time 1.573 (1.607) data 0.000 (0.006) loss 1.1113 (1.8254) acc 65.6250 (59.6284) lr 1.0000e-05 eta 11:04:44 +epoch [1/50] batch [190/500] time 1.547 (1.606) data 0.000 (0.006) loss 1.1191 (1.8157) acc 71.8750 (59.7862) lr 1.0000e-05 eta 11:04:05 +epoch [1/50] batch [195/500] time 1.544 (1.605) data 0.001 (0.006) loss 2.0312 (1.8152) acc 62.5000 (59.8397) lr 1.0000e-05 eta 11:03:26 +epoch [1/50] batch [200/500] time 1.565 (1.604) data 0.001 (0.005) loss 1.7812 (1.8054) acc 65.6250 (60.0156) lr 1.0000e-05 eta 11:02:59 +epoch [1/50] batch [205/500] time 1.580 (1.603) data 0.000 (0.005) loss 1.7959 (1.7951) acc 62.5000 (60.1067) lr 1.0000e-05 eta 11:02:24 +epoch [1/50] batch [210/500] time 1.563 (1.602) data 0.000 (0.005) loss 1.6230 (1.7956) acc 62.5000 (60.0744) lr 1.0000e-05 eta 11:01:59 +epoch [1/50] batch [215/500] time 1.546 (1.601) data 0.000 (0.005) loss 1.2314 (1.7844) acc 65.6250 (60.2907) lr 1.0000e-05 eta 11:01:21 +epoch [1/50] batch [220/500] time 1.555 (1.600) data 0.000 (0.005) loss 2.3105 (1.7787) acc 56.2500 (60.4545) lr 1.0000e-05 eta 11:00:47 +epoch [1/50] batch [225/500] time 1.553 (1.599) data 0.000 (0.005) loss 1.5068 (1.7711) acc 65.6250 (60.5694) lr 1.0000e-05 eta 11:00:11 +epoch [1/50] batch [230/500] time 1.524 (1.598) data 0.000 (0.005) loss 1.1777 (1.7646) acc 75.0000 (60.7473) lr 1.0000e-05 eta 10:59:37 +epoch [1/50] batch [235/500] time 1.553 (1.597) data 0.001 (0.005) loss 1.4883 (1.7569) acc 62.5000 (60.8245) lr 1.0000e-05 eta 10:59:07 +epoch [1/50] batch [240/500] time 1.540 (1.596) data 0.000 (0.005) loss 1.3818 (1.7569) acc 71.8750 (60.9245) lr 1.0000e-05 eta 10:58:37 +epoch [1/50] batch [245/500] time 1.538 (1.595) data 0.000 (0.005) loss 1.0498 (1.7544) acc 75.0000 (60.9566) lr 1.0000e-05 eta 10:58:04 +epoch [1/50] batch [250/500] time 1.549 (1.594) data 0.000 (0.004) loss 1.2861 (1.7467) acc 59.3750 (61.0375) lr 1.0000e-05 eta 10:57:42 +epoch [1/50] batch [255/500] time 1.573 (1.594) data 0.000 (0.004) loss 1.6689 (1.7407) acc 68.7500 (61.1275) lr 1.0000e-05 eta 10:57:19 +epoch [1/50] batch [260/500] time 1.569 (1.593) data 0.000 (0.004) loss 1.8477 (1.7376) acc 68.7500 (61.2620) lr 1.0000e-05 eta 10:56:49 +epoch [1/50] batch [265/500] time 1.545 (1.592) data 0.000 (0.004) loss 2.2305 (1.7313) acc 46.8750 (61.4033) lr 1.0000e-05 eta 10:56:23 +epoch [1/50] batch [270/500] time 1.543 (1.592) data 0.000 (0.004) loss 1.3428 (1.7195) acc 71.8750 (61.6551) lr 1.0000e-05 eta 10:56:01 +epoch [1/50] batch [275/500] time 1.559 (1.591) data 0.000 (0.004) loss 1.4639 (1.7159) acc 68.7500 (61.6932) lr 1.0000e-05 eta 10:55:36 +epoch [1/50] batch [280/500] time 1.572 (1.590) data 0.000 (0.004) loss 1.2197 (1.7089) acc 65.6250 (61.8304) lr 1.0000e-05 eta 10:55:15 +epoch [1/50] batch [285/500] time 1.575 (1.590) data 0.000 (0.004) loss 1.1084 (1.7031) acc 71.8750 (61.9737) lr 1.0000e-05 eta 10:55:02 +epoch [1/50] batch [290/500] time 1.579 (1.590) data 0.000 (0.004) loss 1.0654 (1.6992) acc 68.7500 (62.0043) lr 1.0000e-05 eta 10:54:45 +epoch [1/50] batch [295/500] time 1.587 (1.589) data 0.000 (0.004) loss 1.1455 (1.6939) acc 65.6250 (62.0763) lr 1.0000e-05 eta 10:54:23 +epoch [1/50] batch [300/500] time 1.576 (1.589) data 0.000 (0.004) loss 1.6162 (1.6871) acc 56.2500 (62.0625) lr 1.0000e-05 eta 10:54:01 +epoch [1/50] batch [305/500] time 1.549 (1.588) data 0.000 (0.004) loss 1.3760 (1.6856) acc 71.8750 (62.0902) lr 1.0000e-05 eta 10:53:41 +epoch [1/50] batch [310/500] time 1.559 (1.588) data 0.000 (0.004) loss 0.8809 (1.6797) acc 78.1250 (62.2177) lr 1.0000e-05 eta 10:53:25 +epoch [1/50] batch [315/500] time 1.567 (1.587) data 0.001 (0.004) loss 1.0000 (1.6795) acc 65.6250 (62.2123) lr 1.0000e-05 eta 10:53:06 +epoch [1/50] batch [320/500] time 1.531 (1.587) data 0.000 (0.004) loss 1.0859 (1.6703) acc 65.6250 (62.3633) lr 1.0000e-05 eta 10:52:46 +epoch [1/50] batch [325/500] time 1.572 (1.587) data 0.000 (0.004) loss 1.5293 (1.6671) acc 65.6250 (62.4327) lr 1.0000e-05 eta 10:52:32 +epoch [1/50] batch [330/500] time 1.541 (1.586) data 0.001 (0.003) loss 1.6631 (1.6666) acc 59.3750 (62.4053) lr 1.0000e-05 eta 10:52:12 +epoch [1/50] batch [335/500] time 1.545 (1.586) data 0.000 (0.003) loss 1.8867 (1.6611) acc 65.6250 (62.4720) lr 1.0000e-05 eta 10:51:54 +epoch [1/50] batch [340/500] time 1.552 (1.585) data 0.000 (0.003) loss 0.6108 (1.6590) acc 87.5000 (62.5551) lr 1.0000e-05 eta 10:51:35 +epoch [1/50] batch [345/500] time 1.592 (1.585) data 0.000 (0.003) loss 1.0752 (1.6564) acc 71.8750 (62.5996) lr 1.0000e-05 eta 10:51:19 +epoch [1/50] batch [350/500] time 1.683 (1.585) data 0.000 (0.003) loss 1.5283 (1.6548) acc 65.6250 (62.6250) lr 1.0000e-05 eta 10:51:12 +epoch [1/50] batch [355/500] time 1.585 (1.585) data 0.000 (0.003) loss 1.1992 (1.6517) acc 71.8750 (62.6673) lr 1.0000e-05 eta 10:51:03 +epoch [1/50] batch [360/500] time 1.573 (1.585) data 0.000 (0.003) loss 1.1680 (1.6477) acc 71.8750 (62.7083) lr 1.0000e-05 eta 10:50:50 +epoch [1/50] batch [365/500] time 1.557 (1.584) data 0.000 (0.003) loss 1.4873 (1.6436) acc 59.3750 (62.7140) lr 1.0000e-05 eta 10:50:33 +epoch [1/50] batch [370/500] time 1.554 (1.584) data 0.000 (0.003) loss 1.4668 (1.6414) acc 59.3750 (62.7703) lr 1.0000e-05 eta 10:50:14 +epoch [1/50] batch [375/500] time 1.532 (1.584) data 0.000 (0.003) loss 2.1230 (1.6374) acc 53.1250 (62.8000) lr 1.0000e-05 eta 10:49:55 +epoch [1/50] batch [380/500] time 1.540 (1.583) data 0.000 (0.003) loss 1.3418 (1.6347) acc 75.0000 (62.8125) lr 1.0000e-05 eta 10:49:39 +epoch [1/50] batch [385/500] time 1.573 (1.583) data 0.001 (0.003) loss 1.7363 (1.6326) acc 62.5000 (62.8571) lr 1.0000e-05 eta 10:49:27 +epoch [1/50] batch [390/500] time 1.541 (1.583) data 0.000 (0.003) loss 1.1152 (1.6298) acc 71.8750 (62.9808) lr 1.0000e-05 eta 10:49:13 +epoch [1/50] batch [395/500] time 1.555 (1.583) data 0.000 (0.003) loss 0.9424 (1.6234) acc 71.8750 (63.1013) lr 1.0000e-05 eta 10:48:57 +epoch [1/50] batch [400/500] time 1.572 (1.582) data 0.000 (0.003) loss 1.6758 (1.6199) acc 59.3750 (63.2188) lr 1.0000e-05 eta 10:48:42 +epoch [1/50] batch [405/500] time 1.572 (1.582) data 0.000 (0.003) loss 0.9937 (1.6166) acc 59.3750 (63.2330) lr 1.0000e-05 eta 10:48:24 +epoch [1/50] batch [410/500] time 1.567 (1.582) data 0.000 (0.003) loss 1.7041 (1.6145) acc 62.5000 (63.2470) lr 1.0000e-05 eta 10:48:09 +epoch [1/50] batch [415/500] time 1.549 (1.581) data 0.000 (0.003) loss 1.7549 (1.6136) acc 62.5000 (63.3133) lr 1.0000e-05 eta 10:47:55 +epoch [1/50] batch [420/500] time 1.602 (1.581) data 0.000 (0.003) loss 1.8330 (1.6125) acc 53.1250 (63.3185) lr 1.0000e-05 eta 10:47:49 +epoch [1/50] batch [425/500] time 1.530 (1.581) data 0.000 (0.003) loss 1.9277 (1.6128) acc 62.5000 (63.3235) lr 1.0000e-05 eta 10:47:35 +epoch [1/50] batch [430/500] time 1.557 (1.581) data 0.000 (0.003) loss 1.2998 (1.6113) acc 75.0000 (63.3648) lr 1.0000e-05 eta 10:47:23 +epoch [1/50] batch [435/500] time 1.534 (1.581) data 0.000 (0.003) loss 1.1543 (1.6075) acc 71.8750 (63.4483) lr 1.0000e-05 eta 10:47:06 +epoch [1/50] batch [440/500] time 1.597 (1.580) data 0.001 (0.003) loss 1.1318 (1.6046) acc 71.8750 (63.4943) lr 1.0000e-05 eta 10:46:54 +epoch [1/50] batch [445/500] time 1.570 (1.580) data 0.000 (0.003) loss 1.1436 (1.6021) acc 75.0000 (63.5604) lr 1.0000e-05 eta 10:46:41 +epoch [1/50] batch [450/500] time 1.548 (1.580) data 0.000 (0.003) loss 2.3691 (1.6010) acc 53.1250 (63.5556) lr 1.0000e-05 eta 10:46:31 +epoch [1/50] batch [455/500] time 1.570 (1.580) data 0.000 (0.003) loss 1.4902 (1.5982) acc 56.2500 (63.6195) lr 1.0000e-05 eta 10:46:18 +epoch [1/50] batch [460/500] time 1.554 (1.580) data 0.000 (0.003) loss 1.8545 (1.5993) acc 56.2500 (63.6209) lr 1.0000e-05 eta 10:46:03 +epoch [1/50] batch [465/500] time 1.578 (1.579) data 0.000 (0.003) loss 0.8286 (1.5967) acc 75.0000 (63.6492) lr 1.0000e-05 eta 10:45:49 +epoch [1/50] batch [470/500] time 1.534 (1.579) data 0.000 (0.003) loss 1.4180 (1.5934) acc 71.8750 (63.6968) lr 1.0000e-05 eta 10:45:32 +epoch [1/50] batch [475/500] time 1.553 (1.579) data 0.000 (0.003) loss 1.5449 (1.5898) acc 71.8750 (63.7763) lr 1.0000e-05 eta 10:45:18 +epoch [1/50] batch [480/500] time 1.562 (1.579) data 0.000 (0.002) loss 2.2305 (1.5881) acc 56.2500 (63.8021) lr 1.0000e-05 eta 10:45:05 +epoch [1/50] batch [485/500] time 1.562 (1.578) data 0.001 (0.002) loss 1.3672 (1.5830) acc 78.1250 (63.8982) lr 1.0000e-05 eta 10:44:52 +epoch [1/50] batch [490/500] time 1.550 (1.578) data 0.000 (0.002) loss 1.2090 (1.5812) acc 68.7500 (63.9158) lr 1.0000e-05 eta 10:44:36 +epoch [1/50] batch [495/500] time 1.549 (1.578) data 0.000 (0.002) loss 1.5117 (1.5812) acc 65.6250 (63.8699) lr 1.0000e-05 eta 10:44:29 +epoch [1/50] batch [500/500] time 1.578 (1.578) data 0.000 (0.002) loss 1.5713 (1.5795) acc 65.6250 (63.8937) lr 2.0000e-03 eta 10:44:14 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,807 +* accuracy: 73.6% +* error: 26.4% +* macro_f1: 72.6% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar +epoch [2/50] batch [5/500] time 1.574 (1.689) data 0.000 (0.159) loss 0.8926 (1.5412) acc 78.1250 (65.6250) lr 2.0000e-03 eta 11:29:43 +epoch [2/50] batch [10/500] time 1.558 (1.625) data 0.000 (0.080) loss 1.4668 (1.4212) acc 62.5000 (65.9375) lr 2.0000e-03 eta 11:03:21 +epoch [2/50] batch [15/500] time 1.583 (1.605) data 0.000 (0.053) loss 1.8105 (1.3924) acc 59.3750 (66.4583) lr 2.0000e-03 eta 10:54:59 +epoch [2/50] batch [20/500] time 1.561 (1.598) data 0.000 (0.040) loss 1.8594 (1.4065) acc 56.2500 (66.7188) lr 2.0000e-03 eta 10:52:10 +epoch [2/50] batch [25/500] time 1.547 (1.591) data 0.000 (0.032) loss 1.5322 (1.4153) acc 68.7500 (65.7500) lr 2.0000e-03 eta 10:48:51 +epoch [2/50] batch [30/500] time 1.571 (1.585) data 0.000 (0.027) loss 1.1768 (1.4120) acc 62.5000 (66.2500) lr 2.0000e-03 eta 10:46:30 +epoch [2/50] batch [35/500] time 1.567 (1.582) data 0.000 (0.023) loss 0.9868 (1.3767) acc 78.1250 (67.3214) lr 2.0000e-03 eta 10:44:57 +epoch [2/50] batch [40/500] time 1.547 (1.579) data 0.000 (0.020) loss 1.2715 (1.3468) acc 65.6250 (67.5000) lr 2.0000e-03 eta 10:43:31 +epoch [2/50] batch [45/500] time 1.542 (1.576) data 0.000 (0.018) loss 1.1768 (1.3403) acc 71.8750 (67.6389) lr 2.0000e-03 eta 10:42:22 +epoch [2/50] batch [50/500] time 1.566 (1.574) data 0.000 (0.016) loss 0.7129 (1.3346) acc 78.1250 (67.4375) lr 2.0000e-03 eta 10:41:24 +epoch [2/50] batch [55/500] time 1.543 (1.573) data 0.000 (0.015) loss 1.4150 (1.3184) acc 68.7500 (68.0114) lr 2.0000e-03 eta 10:40:57 +epoch [2/50] batch [60/500] time 1.581 (1.572) data 0.001 (0.014) loss 1.5693 (1.3319) acc 53.1250 (67.7083) lr 2.0000e-03 eta 10:40:08 +epoch [2/50] batch [65/500] time 1.554 (1.570) data 0.000 (0.013) loss 1.4238 (1.3278) acc 71.8750 (67.5962) lr 2.0000e-03 eta 10:39:22 +epoch [2/50] batch [70/500] time 1.567 (1.570) data 0.001 (0.012) loss 1.4346 (1.3550) acc 65.6250 (67.1875) lr 2.0000e-03 eta 10:39:05 +epoch [2/50] batch [75/500] time 1.576 (1.569) data 0.001 (0.011) loss 1.2705 (1.3523) acc 81.2500 (67.5000) lr 2.0000e-03 eta 10:38:40 +epoch [2/50] batch [80/500] time 1.551 (1.569) data 0.000 (0.010) loss 1.0664 (1.3410) acc 71.8750 (67.6953) lr 2.0000e-03 eta 10:38:22 +epoch [2/50] batch [85/500] time 1.550 (1.568) data 0.000 (0.010) loss 0.9814 (1.3386) acc 78.1250 (67.7206) lr 2.0000e-03 eta 10:37:58 +epoch [2/50] batch [90/500] time 1.525 (1.567) data 0.000 (0.009) loss 1.0654 (1.3369) acc 75.0000 (67.8819) lr 2.0000e-03 eta 10:37:33 +epoch [2/50] batch [95/500] time 1.561 (1.567) data 0.000 (0.009) loss 1.2188 (1.3364) acc 65.6250 (67.7961) lr 2.0000e-03 eta 10:37:24 +epoch [2/50] batch [100/500] time 1.540 (1.566) data 0.000 (0.008) loss 1.6260 (1.3385) acc 65.6250 (67.7188) lr 2.0000e-03 eta 10:36:57 +epoch [2/50] batch [105/500] time 1.562 (1.567) data 0.000 (0.008) loss 0.7935 (1.3249) acc 81.2500 (67.8274) lr 2.0000e-03 eta 10:37:08 +epoch [2/50] batch [110/500] time 1.569 (1.567) data 0.000 (0.008) loss 1.5908 (1.3312) acc 59.3750 (67.5568) lr 2.0000e-03 eta 10:36:56 +epoch [2/50] batch [115/500] time 1.574 (1.567) data 0.000 (0.007) loss 0.9521 (1.3371) acc 68.7500 (67.4185) lr 2.0000e-03 eta 10:36:56 +epoch [2/50] batch [120/500] time 1.572 (1.567) data 0.000 (0.007) loss 1.3838 (1.3426) acc 59.3750 (67.3177) lr 2.0000e-03 eta 10:36:46 +epoch [2/50] batch [125/500] time 1.583 (1.567) data 0.000 (0.007) loss 1.0908 (1.3338) acc 71.8750 (67.4500) lr 2.0000e-03 eta 10:36:41 +epoch [2/50] batch [130/500] time 1.557 (1.567) data 0.001 (0.007) loss 1.5820 (1.3318) acc 62.5000 (67.4760) lr 2.0000e-03 eta 10:36:29 +epoch [2/50] batch [135/500] time 1.563 (1.567) data 0.000 (0.006) loss 1.2168 (1.3331) acc 71.8750 (67.3380) lr 2.0000e-03 eta 10:36:19 +epoch [2/50] batch [140/500] time 1.570 (1.567) data 0.000 (0.006) loss 1.4570 (1.3298) acc 59.3750 (67.3884) lr 2.0000e-03 eta 10:36:10 +epoch [2/50] batch [145/500] time 1.560 (1.567) data 0.000 (0.006) loss 1.3965 (1.3301) acc 62.5000 (67.4569) lr 2.0000e-03 eta 10:36:12 +epoch [2/50] batch [150/500] time 1.547 (1.568) data 0.000 (0.006) loss 0.8350 (1.3281) acc 78.1250 (67.4792) lr 2.0000e-03 eta 10:36:13 +epoch [2/50] batch [155/500] time 1.553 (1.567) data 0.001 (0.006) loss 1.4609 (1.3193) acc 62.5000 (67.5806) lr 2.0000e-03 eta 10:35:55 +epoch [2/50] batch [160/500] time 1.542 (1.567) data 0.000 (0.005) loss 0.9316 (1.3172) acc 78.1250 (67.6172) lr 2.0000e-03 eta 10:35:38 +epoch [2/50] batch [165/500] time 1.568 (1.567) data 0.000 (0.005) loss 1.2754 (1.3211) acc 71.8750 (67.6326) lr 2.0000e-03 eta 10:35:31 +epoch [2/50] batch [170/500] time 1.555 (1.567) data 0.000 (0.005) loss 1.0801 (1.3219) acc 71.8750 (67.6103) lr 2.0000e-03 eta 10:35:17 +epoch [2/50] batch [175/500] time 1.552 (1.566) data 0.001 (0.005) loss 1.7803 (1.3283) acc 59.3750 (67.5357) lr 2.0000e-03 eta 10:35:01 +epoch [2/50] batch [180/500] time 1.577 (1.566) data 0.001 (0.005) loss 0.9062 (1.3284) acc 78.1250 (67.6389) lr 2.0000e-03 eta 10:34:49 +epoch [2/50] batch [185/500] time 1.536 (1.566) data 0.001 (0.005) loss 1.2637 (1.3290) acc 62.5000 (67.5845) lr 2.0000e-03 eta 10:34:39 +epoch [2/50] batch [190/500] time 1.541 (1.566) data 0.000 (0.005) loss 1.8135 (1.3286) acc 59.3750 (67.6316) lr 2.0000e-03 eta 10:34:20 +epoch [2/50] batch [195/500] time 1.537 (1.565) data 0.000 (0.004) loss 1.4922 (1.3350) acc 56.2500 (67.4840) lr 2.0000e-03 eta 10:34:02 +epoch [2/50] batch [200/500] time 1.557 (1.565) data 0.000 (0.004) loss 0.6753 (1.3341) acc 81.2500 (67.5625) lr 2.0000e-03 eta 10:33:51 +epoch [2/50] batch [205/500] time 1.590 (1.565) data 0.000 (0.004) loss 1.2021 (1.3319) acc 68.7500 (67.5762) lr 2.0000e-03 eta 10:33:45 +epoch [2/50] batch [210/500] time 1.562 (1.565) data 0.000 (0.004) loss 1.6230 (1.3349) acc 53.1250 (67.5298) lr 2.0000e-03 eta 10:33:36 +epoch [2/50] batch [215/500] time 1.557 (1.565) data 0.000 (0.004) loss 0.9067 (1.3347) acc 75.0000 (67.6308) lr 2.0000e-03 eta 10:33:27 +epoch [2/50] batch [220/500] time 1.570 (1.565) data 0.000 (0.004) loss 1.2646 (1.3329) acc 65.6250 (67.6705) lr 2.0000e-03 eta 10:33:24 +epoch [2/50] batch [225/500] time 1.553 (1.565) data 0.000 (0.004) loss 1.1807 (1.3324) acc 71.8750 (67.7222) lr 2.0000e-03 eta 10:33:13 +epoch [2/50] batch [230/500] time 1.587 (1.565) data 0.000 (0.004) loss 0.6294 (1.3270) acc 81.2500 (67.8397) lr 2.0000e-03 eta 10:33:04 +epoch [2/50] batch [235/500] time 1.569 (1.565) data 0.001 (0.004) loss 1.7725 (1.3291) acc 56.2500 (67.7926) lr 2.0000e-03 eta 10:32:54 +epoch [2/50] batch [240/500] time 1.566 (1.565) data 0.001 (0.004) loss 2.5840 (1.3297) acc 46.8750 (67.7734) lr 2.0000e-03 eta 10:32:44 +epoch [2/50] batch [245/500] time 1.571 (1.565) data 0.000 (0.004) loss 1.8184 (1.3283) acc 53.1250 (67.7806) lr 2.0000e-03 eta 10:32:39 +epoch [2/50] batch [250/500] time 1.568 (1.565) data 0.001 (0.004) loss 1.5049 (1.3301) acc 62.5000 (67.7375) lr 2.0000e-03 eta 10:32:35 +epoch [2/50] batch [255/500] time 1.554 (1.565) data 0.001 (0.004) loss 1.8916 (1.3319) acc 56.2500 (67.6716) lr 2.0000e-03 eta 10:32:25 +epoch [2/50] batch [260/500] time 1.600 (1.565) data 0.000 (0.003) loss 1.5449 (1.3284) acc 71.8750 (67.8125) lr 2.0000e-03 eta 10:32:21 +epoch [2/50] batch [265/500] time 1.565 (1.565) data 0.000 (0.003) loss 1.6748 (1.3271) acc 59.3750 (67.8656) lr 2.0000e-03 eta 10:32:16 +epoch [2/50] batch [270/500] time 1.574 (1.565) data 0.000 (0.003) loss 1.1953 (1.3214) acc 62.5000 (67.9167) lr 2.0000e-03 eta 10:32:04 +epoch [2/50] batch [275/500] time 1.529 (1.565) data 0.000 (0.003) loss 0.9761 (1.3169) acc 75.0000 (68.0114) lr 2.0000e-03 eta 10:31:53 +epoch [2/50] batch [280/500] time 1.537 (1.565) data 0.000 (0.003) loss 1.4150 (1.3160) acc 62.5000 (68.0246) lr 2.0000e-03 eta 10:31:40 +epoch [2/50] batch [285/500] time 1.543 (1.565) data 0.001 (0.003) loss 1.2051 (1.3106) acc 68.7500 (68.1250) lr 2.0000e-03 eta 10:31:28 +epoch [2/50] batch [290/500] time 1.688 (1.565) data 0.001 (0.003) loss 1.1045 (1.3092) acc 68.7500 (68.1250) lr 2.0000e-03 eta 10:31:33 +epoch [2/50] batch [295/500] time 1.547 (1.565) data 0.000 (0.003) loss 1.3945 (1.3094) acc 56.2500 (68.1462) lr 2.0000e-03 eta 10:31:24 +epoch [2/50] batch [300/500] time 1.540 (1.565) data 0.000 (0.003) loss 2.0840 (1.3116) acc 53.1250 (68.0938) lr 2.0000e-03 eta 10:31:12 +epoch [2/50] batch [305/500] time 1.560 (1.565) data 0.000 (0.003) loss 0.6509 (1.3058) acc 75.0000 (68.1762) lr 2.0000e-03 eta 10:31:00 +epoch [2/50] batch [310/500] time 1.553 (1.565) data 0.000 (0.003) loss 1.8008 (1.3080) acc 65.6250 (68.1048) lr 2.0000e-03 eta 10:30:51 +epoch [2/50] batch [315/500] time 1.555 (1.565) data 0.000 (0.003) loss 1.7734 (1.3092) acc 62.5000 (68.0556) lr 2.0000e-03 eta 10:30:43 +epoch [2/50] batch [320/500] time 1.559 (1.565) data 0.001 (0.003) loss 1.4893 (1.3052) acc 75.0000 (68.2031) lr 2.0000e-03 eta 10:30:36 +epoch [2/50] batch [325/500] time 1.562 (1.565) data 0.000 (0.003) loss 0.9800 (1.3026) acc 75.0000 (68.2212) lr 2.0000e-03 eta 10:30:27 +epoch [2/50] batch [330/500] time 1.544 (1.565) data 0.000 (0.003) loss 1.5420 (1.2990) acc 65.6250 (68.2955) lr 2.0000e-03 eta 10:30:16 +epoch [2/50] batch [335/500] time 1.557 (1.564) data 0.000 (0.003) loss 1.4570 (1.2995) acc 62.5000 (68.3396) lr 2.0000e-03 eta 10:30:05 +epoch [2/50] batch [340/500] time 1.572 (1.565) data 0.000 (0.003) loss 1.2188 (1.3039) acc 75.0000 (68.2996) lr 2.0000e-03 eta 10:29:58 +epoch [2/50] batch [345/500] time 1.573 (1.564) data 0.000 (0.003) loss 1.4551 (1.2997) acc 71.8750 (68.3514) lr 2.0000e-03 eta 10:29:49 +epoch [2/50] batch [350/500] time 1.550 (1.564) data 0.000 (0.003) loss 0.7661 (1.3018) acc 78.1250 (68.2679) lr 2.0000e-03 eta 10:29:42 +epoch [2/50] batch [355/500] time 1.534 (1.564) data 0.000 (0.003) loss 1.2217 (1.3073) acc 59.3750 (68.1250) lr 2.0000e-03 eta 10:29:31 +epoch [2/50] batch [360/500] time 1.581 (1.564) data 0.000 (0.003) loss 1.4404 (1.3069) acc 65.6250 (68.1250) lr 2.0000e-03 eta 10:29:26 +epoch [2/50] batch [365/500] time 1.555 (1.564) data 0.000 (0.003) loss 1.6787 (1.3076) acc 71.8750 (68.1849) lr 2.0000e-03 eta 10:29:15 +epoch [2/50] batch [370/500] time 1.559 (1.564) data 0.000 (0.003) loss 0.7705 (1.3095) acc 81.2500 (68.1672) lr 2.0000e-03 eta 10:29:05 +epoch [2/50] batch [375/500] time 1.660 (1.565) data 0.000 (0.003) loss 1.0947 (1.3086) acc 81.2500 (68.1750) lr 2.0000e-03 eta 10:29:12 +epoch [2/50] batch [380/500] time 1.578 (1.566) data 0.001 (0.003) loss 1.3682 (1.3086) acc 75.0000 (68.2484) lr 2.0000e-03 eta 10:29:34 +epoch [2/50] batch [385/500] time 1.548 (1.566) data 0.000 (0.002) loss 1.6162 (1.3093) acc 65.6250 (68.2386) lr 2.0000e-03 eta 10:29:28 +epoch [2/50] batch [390/500] time 1.560 (1.566) data 0.001 (0.002) loss 0.5020 (1.3070) acc 90.6250 (68.3093) lr 2.0000e-03 eta 10:29:23 +epoch [2/50] batch [395/500] time 1.547 (1.566) data 0.000 (0.002) loss 2.4316 (1.3069) acc 43.7500 (68.3070) lr 2.0000e-03 eta 10:29:12 +epoch [2/50] batch [400/500] time 1.552 (1.566) data 0.000 (0.002) loss 1.1543 (1.3074) acc 75.0000 (68.3672) lr 2.0000e-03 eta 10:29:01 +epoch [2/50] batch [405/500] time 1.529 (1.566) data 0.001 (0.002) loss 1.0859 (1.3060) acc 78.1250 (68.4259) lr 2.0000e-03 eta 10:28:50 +epoch [2/50] batch [410/500] time 1.559 (1.566) data 0.001 (0.002) loss 1.0137 (1.3050) acc 71.8750 (68.4299) lr 2.0000e-03 eta 10:28:41 +epoch [2/50] batch [415/500] time 1.537 (1.566) data 0.000 (0.002) loss 1.0488 (1.3045) acc 75.0000 (68.4337) lr 2.0000e-03 eta 10:28:29 +epoch [2/50] batch [420/500] time 1.560 (1.566) data 0.001 (0.002) loss 1.2471 (1.3037) acc 68.7500 (68.4747) lr 2.0000e-03 eta 10:28:19 +epoch [2/50] batch [425/500] time 1.576 (1.566) data 0.001 (0.002) loss 0.7466 (1.3027) acc 78.1250 (68.4485) lr 2.0000e-03 eta 10:28:11 +epoch [2/50] batch [430/500] time 1.555 (1.566) data 0.000 (0.002) loss 1.0918 (1.3041) acc 84.3750 (68.4375) lr 2.0000e-03 eta 10:28:02 +epoch [2/50] batch [435/500] time 1.578 (1.566) data 0.000 (0.002) loss 1.7021 (1.3054) acc 59.3750 (68.4195) lr 2.0000e-03 eta 10:28:02 +epoch [2/50] batch [440/500] time 1.531 (1.566) data 0.000 (0.002) loss 1.5176 (1.3074) acc 65.6250 (68.3523) lr 2.0000e-03 eta 10:27:50 +epoch [2/50] batch [445/500] time 1.551 (1.566) data 0.000 (0.002) loss 1.3896 (1.3077) acc 65.6250 (68.3497) lr 2.0000e-03 eta 10:27:41 +epoch [2/50] batch [450/500] time 1.539 (1.566) data 0.000 (0.002) loss 1.3564 (1.3112) acc 65.6250 (68.2708) lr 2.0000e-03 eta 10:27:31 +epoch [2/50] batch [455/500] time 1.545 (1.565) data 0.000 (0.002) loss 1.4131 (1.3092) acc 59.3750 (68.2967) lr 2.0000e-03 eta 10:27:18 +epoch [2/50] batch [460/500] time 1.569 (1.565) data 0.000 (0.002) loss 0.8896 (1.3068) acc 65.6250 (68.3016) lr 2.0000e-03 eta 10:27:08 +epoch [2/50] batch [465/500] time 1.555 (1.565) data 0.000 (0.002) loss 0.8228 (1.3071) acc 75.0000 (68.2796) lr 2.0000e-03 eta 10:27:00 +epoch [2/50] batch [470/500] time 1.622 (1.566) data 0.000 (0.002) loss 1.7109 (1.3087) acc 59.3750 (68.2846) lr 2.0000e-03 eta 10:27:01 +epoch [2/50] batch [475/500] time 1.693 (1.566) data 0.000 (0.002) loss 1.0137 (1.3076) acc 68.7500 (68.2566) lr 2.0000e-03 eta 10:27:14 +epoch [2/50] batch [480/500] time 1.582 (1.567) data 0.001 (0.002) loss 1.3545 (1.3066) acc 75.0000 (68.3333) lr 2.0000e-03 eta 10:27:15 +epoch [2/50] batch [485/500] time 1.586 (1.567) data 0.001 (0.002) loss 1.2471 (1.3057) acc 68.7500 (68.3247) lr 2.0000e-03 eta 10:27:10 +epoch [2/50] batch [490/500] time 1.562 (1.567) data 0.000 (0.002) loss 1.6396 (1.3040) acc 59.3750 (68.3610) lr 2.0000e-03 eta 10:26:59 +epoch [2/50] batch [495/500] time 1.576 (1.567) data 0.001 (0.002) loss 1.1729 (1.3034) acc 71.8750 (68.3649) lr 2.0000e-03 eta 10:26:52 +epoch [2/50] batch [500/500] time 1.575 (1.567) data 0.000 (0.002) loss 0.9146 (1.3015) acc 68.7500 (68.4000) lr 1.9980e-03 eta 10:26:43 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,278 +* accuracy: 76.6% +* error: 23.4% +* macro_f1: 75.9% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar +epoch [3/50] batch [5/500] time 1.555 (1.655) data 0.000 (0.161) loss 2.0117 (1.6623) acc 46.8750 (58.7500) lr 1.9980e-03 eta 11:01:49 +epoch [3/50] batch [10/500] time 1.555 (1.606) data 0.000 (0.081) loss 1.0332 (1.4162) acc 78.1250 (63.4375) lr 1.9980e-03 eta 10:42:01 +epoch [3/50] batch [15/500] time 1.558 (1.589) data 0.000 (0.054) loss 1.4023 (1.3310) acc 68.7500 (65.0000) lr 1.9980e-03 eta 10:35:06 +epoch [3/50] batch [20/500] time 1.567 (1.584) data 0.000 (0.041) loss 1.1230 (1.3204) acc 75.0000 (66.8750) lr 1.9980e-03 eta 10:32:53 +epoch [3/50] batch [25/500] time 1.550 (1.578) data 0.000 (0.033) loss 1.0381 (1.2748) acc 65.6250 (67.5000) lr 1.9980e-03 eta 10:30:24 +epoch [3/50] batch [30/500] time 1.561 (1.574) data 0.000 (0.027) loss 1.9795 (1.2768) acc 59.3750 (67.8125) lr 1.9980e-03 eta 10:28:49 +epoch [3/50] batch [35/500] time 1.696 (1.576) data 0.001 (0.023) loss 0.8716 (1.2351) acc 84.3750 (68.6607) lr 1.9980e-03 eta 10:29:20 +epoch [3/50] batch [40/500] time 1.605 (1.578) data 0.001 (0.021) loss 1.5859 (1.2416) acc 53.1250 (68.1250) lr 1.9980e-03 eta 10:30:02 +epoch [3/50] batch [45/500] time 1.687 (1.588) data 0.001 (0.018) loss 1.1816 (1.2484) acc 75.0000 (68.4028) lr 1.9980e-03 eta 10:33:58 +epoch [3/50] batch [50/500] time 1.564 (1.586) data 0.000 (0.017) loss 1.1445 (1.2576) acc 75.0000 (68.4375) lr 1.9980e-03 eta 10:32:59 +epoch [3/50] batch [55/500] time 1.565 (1.583) data 0.001 (0.015) loss 1.5146 (1.2569) acc 59.3750 (68.1818) lr 1.9980e-03 eta 10:31:50 +epoch [3/50] batch [60/500] time 1.550 (1.582) data 0.000 (0.014) loss 1.1328 (1.2719) acc 75.0000 (68.1771) lr 1.9980e-03 eta 10:31:04 +epoch [3/50] batch [65/500] time 1.634 (1.583) data 0.000 (0.013) loss 1.7070 (1.2751) acc 59.3750 (68.0769) lr 1.9980e-03 eta 10:31:21 +epoch [3/50] batch [70/500] time 1.582 (1.585) data 0.000 (0.012) loss 1.3145 (1.2746) acc 59.3750 (67.8571) lr 1.9980e-03 eta 10:32:09 +epoch [3/50] batch [75/500] time 1.585 (1.588) data 0.000 (0.011) loss 1.1006 (1.2799) acc 75.0000 (68.0417) lr 1.9980e-03 eta 10:33:10 +epoch [3/50] batch [80/500] time 1.576 (1.589) data 0.001 (0.011) loss 1.2227 (1.2814) acc 62.5000 (68.1250) lr 1.9980e-03 eta 10:33:34 +epoch [3/50] batch [85/500] time 1.544 (1.587) data 0.000 (0.010) loss 1.3154 (1.2786) acc 71.8750 (68.3824) lr 1.9980e-03 eta 10:32:42 +epoch [3/50] batch [90/500] time 1.589 (1.586) data 0.000 (0.009) loss 1.3203 (1.2642) acc 71.8750 (68.8542) lr 1.9980e-03 eta 10:32:09 +epoch [3/50] batch [95/500] time 1.535 (1.584) data 0.000 (0.009) loss 1.2959 (1.2522) acc 71.8750 (69.1118) lr 1.9980e-03 eta 10:31:14 +epoch [3/50] batch [100/500] time 1.564 (1.584) data 0.001 (0.009) loss 1.3018 (1.2459) acc 59.3750 (69.2188) lr 1.9980e-03 eta 10:30:47 +epoch [3/50] batch [105/500] time 1.545 (1.582) data 0.000 (0.008) loss 1.2100 (1.2501) acc 71.8750 (69.4048) lr 1.9980e-03 eta 10:30:01 +epoch [3/50] batch [110/500] time 1.558 (1.581) data 0.000 (0.008) loss 0.8018 (1.2406) acc 78.1250 (69.5739) lr 1.9980e-03 eta 10:29:21 +epoch [3/50] batch [115/500] time 1.562 (1.580) data 0.000 (0.008) loss 1.1914 (1.2411) acc 62.5000 (69.5924) lr 1.9980e-03 eta 10:28:53 +epoch [3/50] batch [120/500] time 1.552 (1.579) data 0.001 (0.007) loss 1.0977 (1.2373) acc 75.0000 (69.6094) lr 1.9980e-03 eta 10:28:27 +epoch [3/50] batch [125/500] time 1.612 (1.580) data 0.000 (0.007) loss 1.7559 (1.2450) acc 65.6250 (69.5500) lr 1.9980e-03 eta 10:28:32 +epoch [3/50] batch [130/500] time 1.657 (1.582) data 0.000 (0.007) loss 1.1387 (1.2410) acc 75.0000 (69.6635) lr 1.9980e-03 eta 10:29:18 +epoch [3/50] batch [135/500] time 1.560 (1.583) data 0.000 (0.006) loss 1.3643 (1.2332) acc 68.7500 (69.8611) lr 1.9980e-03 eta 10:29:27 +epoch [3/50] batch [140/500] time 1.581 (1.582) data 0.001 (0.006) loss 0.9819 (1.2339) acc 71.8750 (69.8884) lr 1.9980e-03 eta 10:29:07 +epoch [3/50] batch [145/500] time 1.566 (1.582) data 0.001 (0.006) loss 0.9292 (1.2302) acc 84.3750 (70.0431) lr 1.9980e-03 eta 10:28:57 +epoch [3/50] batch [150/500] time 1.576 (1.581) data 0.000 (0.006) loss 1.8154 (1.2310) acc 65.6250 (70.2500) lr 1.9980e-03 eta 10:28:29 +epoch [3/50] batch [155/500] time 1.550 (1.580) data 0.000 (0.006) loss 1.3721 (1.2265) acc 65.6250 (70.2419) lr 1.9980e-03 eta 10:28:02 +epoch [3/50] batch [160/500] time 1.540 (1.579) data 0.000 (0.006) loss 0.9526 (1.2252) acc 75.0000 (70.1953) lr 1.9980e-03 eta 10:27:34 +epoch [3/50] batch [165/500] time 1.558 (1.579) data 0.001 (0.005) loss 1.3564 (1.2263) acc 59.3750 (69.9432) lr 1.9980e-03 eta 10:27:14 +epoch [3/50] batch [170/500] time 1.573 (1.578) data 0.000 (0.005) loss 1.9707 (1.2291) acc 56.2500 (69.9081) lr 1.9980e-03 eta 10:26:54 +epoch [3/50] batch [175/500] time 1.560 (1.578) data 0.001 (0.005) loss 1.3447 (1.2270) acc 59.3750 (69.9286) lr 1.9980e-03 eta 10:26:35 +epoch [3/50] batch [180/500] time 1.553 (1.578) data 0.000 (0.005) loss 0.7598 (1.2340) acc 81.2500 (69.8611) lr 1.9980e-03 eta 10:26:35 +epoch [3/50] batch [185/500] time 1.567 (1.578) data 0.000 (0.005) loss 1.7090 (1.2367) acc 65.6250 (69.8649) lr 1.9980e-03 eta 10:26:13 +epoch [3/50] batch [190/500] time 1.558 (1.577) data 0.000 (0.005) loss 1.0781 (1.2362) acc 71.8750 (69.8520) lr 1.9980e-03 eta 10:25:54 +epoch [3/50] batch [195/500] time 1.570 (1.577) data 0.001 (0.005) loss 1.3027 (1.2314) acc 75.0000 (70.0000) lr 1.9980e-03 eta 10:25:39 +epoch [3/50] batch [200/500] time 1.547 (1.577) data 0.000 (0.005) loss 1.3936 (1.2267) acc 71.8750 (70.0938) lr 1.9980e-03 eta 10:25:21 +epoch [3/50] batch [205/500] time 1.607 (1.577) data 0.000 (0.004) loss 1.0244 (1.2241) acc 75.0000 (70.1220) lr 1.9980e-03 eta 10:25:22 +epoch [3/50] batch [210/500] time 1.606 (1.579) data 0.000 (0.004) loss 1.4238 (1.2288) acc 65.6250 (70.1339) lr 1.9980e-03 eta 10:26:03 +epoch [3/50] batch [215/500] time 1.524 (1.580) data 0.000 (0.004) loss 1.2236 (1.2328) acc 68.7500 (70.1308) lr 1.9980e-03 eta 10:26:13 +epoch [3/50] batch [220/500] time 1.550 (1.579) data 0.000 (0.004) loss 1.3301 (1.2364) acc 71.8750 (69.9858) lr 1.9980e-03 eta 10:25:57 +epoch [3/50] batch [225/500] time 1.580 (1.579) data 0.000 (0.004) loss 1.2734 (1.2434) acc 62.5000 (69.8750) lr 1.9980e-03 eta 10:25:42 +epoch [3/50] batch [230/500] time 1.579 (1.579) data 0.000 (0.004) loss 1.8096 (1.2460) acc 62.5000 (69.7826) lr 1.9980e-03 eta 10:25:32 +epoch [3/50] batch [235/500] time 1.588 (1.579) data 0.000 (0.004) loss 1.3721 (1.2463) acc 56.2500 (69.7606) lr 1.9980e-03 eta 10:25:20 +epoch [3/50] batch [240/500] time 1.582 (1.578) data 0.000 (0.004) loss 1.4688 (1.2462) acc 65.6250 (69.7266) lr 1.9980e-03 eta 10:25:03 +epoch [3/50] batch [245/500] time 1.561 (1.578) data 0.000 (0.004) loss 1.4209 (1.2476) acc 62.5000 (69.6556) lr 1.9980e-03 eta 10:24:51 +epoch [3/50] batch [250/500] time 1.617 (1.579) data 0.000 (0.004) loss 0.8022 (1.2458) acc 81.2500 (69.7125) lr 1.9980e-03 eta 10:24:50 +epoch [3/50] batch [255/500] time 1.619 (1.580) data 0.000 (0.004) loss 1.2803 (1.2434) acc 68.7500 (69.7059) lr 1.9980e-03 eta 10:25:06 +epoch [3/50] batch [260/500] time 1.559 (1.580) data 0.000 (0.004) loss 0.9209 (1.2409) acc 81.2500 (69.6995) lr 1.9980e-03 eta 10:25:11 +epoch [3/50] batch [265/500] time 1.563 (1.580) data 0.000 (0.003) loss 1.1641 (1.2419) acc 62.5000 (69.6344) lr 1.9980e-03 eta 10:24:54 +epoch [3/50] batch [270/500] time 1.560 (1.579) data 0.000 (0.003) loss 1.5391 (1.2430) acc 65.6250 (69.5833) lr 1.9980e-03 eta 10:24:39 +epoch [3/50] batch [275/500] time 1.533 (1.579) data 0.000 (0.003) loss 1.3955 (1.2429) acc 65.6250 (69.5568) lr 1.9980e-03 eta 10:24:19 +epoch [3/50] batch [280/500] time 1.547 (1.579) data 0.000 (0.003) loss 0.5273 (1.2387) acc 90.6250 (69.6652) lr 1.9980e-03 eta 10:24:09 +epoch [3/50] batch [285/500] time 1.645 (1.579) data 0.000 (0.003) loss 1.4336 (1.2400) acc 59.3750 (69.6601) lr 1.9980e-03 eta 10:24:11 +epoch [3/50] batch [290/500] time 1.673 (1.581) data 0.000 (0.003) loss 1.1748 (1.2422) acc 68.7500 (69.5690) lr 1.9980e-03 eta 10:24:39 +epoch [3/50] batch [295/500] time 1.571 (1.581) data 0.000 (0.003) loss 1.1289 (1.2415) acc 71.8750 (69.5339) lr 1.9980e-03 eta 10:24:34 +epoch [3/50] batch [300/500] time 1.551 (1.580) data 0.000 (0.003) loss 0.9497 (1.2412) acc 71.8750 (69.5208) lr 1.9980e-03 eta 10:24:16 +epoch [3/50] batch [305/500] time 1.542 (1.580) data 0.000 (0.003) loss 1.5264 (1.2457) acc 62.5000 (69.4160) lr 1.9980e-03 eta 10:24:02 +epoch [3/50] batch [310/500] time 1.586 (1.580) data 0.001 (0.003) loss 0.6758 (1.2386) acc 78.1250 (69.5968) lr 1.9980e-03 eta 10:23:51 +epoch [3/50] batch [315/500] time 1.579 (1.580) data 0.001 (0.003) loss 1.9258 (1.2429) acc 50.0000 (69.5139) lr 1.9980e-03 eta 10:23:40 +epoch [3/50] batch [320/500] time 1.572 (1.580) data 0.000 (0.003) loss 1.3369 (1.2450) acc 68.7500 (69.4824) lr 1.9980e-03 eta 10:23:26 +epoch [3/50] batch [325/500] time 1.568 (1.580) data 0.001 (0.003) loss 1.1367 (1.2425) acc 75.0000 (69.5673) lr 1.9980e-03 eta 10:23:19 +epoch [3/50] batch [330/500] time 1.567 (1.579) data 0.001 (0.003) loss 1.8594 (1.2457) acc 56.2500 (69.4792) lr 1.9980e-03 eta 10:23:01 +epoch [3/50] batch [335/500] time 1.564 (1.579) data 0.000 (0.003) loss 0.7114 (1.2422) acc 78.1250 (69.5149) lr 1.9980e-03 eta 10:22:46 +epoch [3/50] batch [340/500] time 1.536 (1.579) data 0.000 (0.003) loss 0.7476 (1.2405) acc 84.3750 (69.5864) lr 1.9980e-03 eta 10:22:28 +epoch [3/50] batch [345/500] time 1.590 (1.578) data 0.000 (0.003) loss 1.3877 (1.2426) acc 68.7500 (69.5290) lr 1.9980e-03 eta 10:22:14 +epoch [3/50] batch [350/500] time 1.564 (1.578) data 0.000 (0.003) loss 1.6250 (1.2422) acc 59.3750 (69.5625) lr 1.9980e-03 eta 10:21:59 +epoch [3/50] batch [355/500] time 1.551 (1.578) data 0.001 (0.003) loss 1.1211 (1.2442) acc 59.3750 (69.4806) lr 1.9980e-03 eta 10:21:46 +epoch [3/50] batch [360/500] time 1.550 (1.578) data 0.000 (0.003) loss 1.6533 (1.2463) acc 65.6250 (69.3750) lr 1.9980e-03 eta 10:21:32 +epoch [3/50] batch [365/500] time 1.564 (1.577) data 0.000 (0.003) loss 1.1748 (1.2465) acc 68.7500 (69.3921) lr 1.9980e-03 eta 10:21:21 +epoch [3/50] batch [370/500] time 1.578 (1.577) data 0.000 (0.003) loss 1.2334 (1.2454) acc 78.1250 (69.3750) lr 1.9980e-03 eta 10:21:09 +epoch [3/50] batch [375/500] time 1.561 (1.577) data 0.000 (0.003) loss 1.1953 (1.2447) acc 68.7500 (69.4083) lr 1.9980e-03 eta 10:20:55 +epoch [3/50] batch [380/500] time 1.569 (1.577) data 0.000 (0.003) loss 1.4150 (1.2461) acc 75.0000 (69.3832) lr 1.9980e-03 eta 10:20:44 +epoch [3/50] batch [385/500] time 1.580 (1.577) data 0.000 (0.003) loss 0.7090 (1.2436) acc 81.2500 (69.4075) lr 1.9980e-03 eta 10:20:34 +epoch [3/50] batch [390/500] time 1.574 (1.577) data 0.000 (0.003) loss 1.4736 (1.2430) acc 65.6250 (69.4631) lr 1.9980e-03 eta 10:20:22 +epoch [3/50] batch [395/500] time 1.582 (1.576) data 0.000 (0.002) loss 1.4951 (1.2438) acc 75.0000 (69.5095) lr 1.9980e-03 eta 10:20:12 +epoch [3/50] batch [400/500] time 1.567 (1.577) data 0.000 (0.002) loss 1.3945 (1.2459) acc 65.6250 (69.4766) lr 1.9980e-03 eta 10:20:05 +epoch [3/50] batch [405/500] time 1.559 (1.576) data 0.000 (0.002) loss 0.8706 (1.2433) acc 78.1250 (69.5293) lr 1.9980e-03 eta 10:19:57 +epoch [3/50] batch [410/500] time 1.569 (1.576) data 0.001 (0.002) loss 1.4961 (1.2434) acc 65.6250 (69.5808) lr 1.9980e-03 eta 10:19:42 +epoch [3/50] batch [415/500] time 1.577 (1.576) data 0.000 (0.002) loss 2.0527 (1.2469) acc 53.1250 (69.4880) lr 1.9980e-03 eta 10:19:30 +epoch [3/50] batch [420/500] time 1.675 (1.576) data 0.001 (0.002) loss 1.3770 (1.2465) acc 65.6250 (69.4345) lr 1.9980e-03 eta 10:19:24 +epoch [3/50] batch [425/500] time 1.584 (1.576) data 0.000 (0.002) loss 1.4131 (1.2470) acc 68.7500 (69.4191) lr 1.9980e-03 eta 10:19:10 +epoch [3/50] batch [430/500] time 1.555 (1.576) data 0.001 (0.002) loss 1.7168 (1.2459) acc 62.5000 (69.4913) lr 1.9980e-03 eta 10:18:59 +epoch [3/50] batch [435/500] time 1.558 (1.576) data 0.000 (0.002) loss 1.4170 (1.2459) acc 56.2500 (69.4684) lr 1.9980e-03 eta 10:18:48 +epoch [3/50] batch [440/500] time 1.530 (1.575) data 0.000 (0.002) loss 1.1709 (1.2458) acc 68.7500 (69.4318) lr 1.9980e-03 eta 10:18:36 +epoch [3/50] batch [445/500] time 1.563 (1.575) data 0.001 (0.002) loss 0.8955 (1.2464) acc 78.1250 (69.4312) lr 1.9980e-03 eta 10:18:24 +epoch [3/50] batch [450/500] time 1.551 (1.575) data 0.000 (0.002) loss 0.9526 (1.2455) acc 81.2500 (69.4583) lr 1.9980e-03 eta 10:18:10 +epoch [3/50] batch [455/500] time 1.560 (1.575) data 0.000 (0.002) loss 0.9551 (1.2462) acc 75.0000 (69.4780) lr 1.9980e-03 eta 10:17:58 +epoch [3/50] batch [460/500] time 1.551 (1.575) data 0.001 (0.002) loss 0.9155 (1.2448) acc 78.1250 (69.5041) lr 1.9980e-03 eta 10:17:47 +epoch [3/50] batch [465/500] time 1.585 (1.575) data 0.001 (0.002) loss 1.2549 (1.2459) acc 68.7500 (69.5027) lr 1.9980e-03 eta 10:17:43 +epoch [3/50] batch [470/500] time 1.547 (1.575) data 0.000 (0.002) loss 1.1553 (1.2429) acc 71.8750 (69.5479) lr 1.9980e-03 eta 10:17:31 +epoch [3/50] batch [475/500] time 1.546 (1.575) data 0.001 (0.002) loss 1.4355 (1.2431) acc 68.7500 (69.5724) lr 1.9980e-03 eta 10:17:22 +epoch [3/50] batch [480/500] time 1.568 (1.574) data 0.000 (0.002) loss 1.4873 (1.2438) acc 62.5000 (69.5768) lr 1.9980e-03 eta 10:17:11 +epoch [3/50] batch [485/500] time 1.559 (1.574) data 0.001 (0.002) loss 1.0420 (1.2426) acc 71.8750 (69.5747) lr 1.9980e-03 eta 10:16:59 +epoch [3/50] batch [490/500] time 1.565 (1.574) data 0.000 (0.002) loss 1.1377 (1.2422) acc 71.8750 (69.5855) lr 1.9980e-03 eta 10:16:46 +epoch [3/50] batch [495/500] time 1.576 (1.574) data 0.000 (0.002) loss 1.2266 (1.2433) acc 65.6250 (69.5265) lr 1.9980e-03 eta 10:16:36 +epoch [3/50] batch [500/500] time 1.551 (1.574) data 0.000 (0.002) loss 1.4248 (1.2423) acc 68.7500 (69.5438) lr 1.9921e-03 eta 10:16:23 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,505 +* accuracy: 77.0% +* error: 23.0% +* macro_f1: 76.3% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar +epoch [4/50] batch [5/500] time 1.579 (1.704) data 0.000 (0.190) loss 1.0117 (1.0842) acc 68.7500 (70.0000) lr 1.9921e-03 eta 11:07:12 +epoch [4/50] batch [10/500] time 1.570 (1.634) data 0.001 (0.095) loss 1.2666 (1.0937) acc 59.3750 (70.6250) lr 1.9921e-03 eta 10:39:46 +epoch [4/50] batch [15/500] time 1.574 (1.609) data 0.000 (0.064) loss 1.4385 (1.1573) acc 65.6250 (70.0000) lr 1.9921e-03 eta 10:29:56 +epoch [4/50] batch [20/500] time 1.568 (1.596) data 0.000 (0.048) loss 1.1191 (1.1961) acc 75.0000 (69.3750) lr 1.9921e-03 eta 10:24:22 +epoch [4/50] batch [25/500] time 1.565 (1.590) data 0.001 (0.038) loss 0.9971 (1.1626) acc 65.6250 (69.8750) lr 1.9921e-03 eta 10:22:04 +epoch [4/50] batch [30/500] time 1.555 (1.587) data 0.000 (0.032) loss 1.2852 (1.1376) acc 68.7500 (70.4167) lr 1.9921e-03 eta 10:20:44 +epoch [4/50] batch [35/500] time 1.544 (1.582) data 0.001 (0.027) loss 1.3730 (1.1701) acc 65.6250 (69.8214) lr 1.9921e-03 eta 10:18:33 +epoch [4/50] batch [40/500] time 1.566 (1.580) data 0.001 (0.024) loss 1.6885 (1.1932) acc 75.0000 (69.8438) lr 1.9921e-03 eta 10:17:58 +epoch [4/50] batch [45/500] time 1.568 (1.579) data 0.000 (0.021) loss 1.0000 (1.1524) acc 78.1250 (70.9722) lr 1.9921e-03 eta 10:17:11 +epoch [4/50] batch [50/500] time 1.561 (1.577) data 0.000 (0.019) loss 1.0430 (1.1627) acc 71.8750 (70.3125) lr 1.9921e-03 eta 10:16:25 +epoch [4/50] batch [55/500] time 1.550 (1.575) data 0.000 (0.018) loss 1.0234 (1.1496) acc 78.1250 (70.8523) lr 1.9921e-03 eta 10:15:29 +epoch [4/50] batch [60/500] time 1.564 (1.574) data 0.001 (0.016) loss 1.4580 (1.1664) acc 71.8750 (70.6771) lr 1.9921e-03 eta 10:14:57 +epoch [4/50] batch [65/500] time 1.557 (1.573) data 0.000 (0.015) loss 1.1709 (1.1770) acc 75.0000 (70.5288) lr 1.9921e-03 eta 10:14:22 +epoch [4/50] batch [70/500] time 1.571 (1.571) data 0.001 (0.014) loss 1.4639 (1.1951) acc 56.2500 (70.2232) lr 1.9921e-03 eta 10:13:38 +epoch [4/50] batch [75/500] time 1.561 (1.571) data 0.001 (0.013) loss 1.4580 (1.2051) acc 68.7500 (70.0833) lr 1.9921e-03 eta 10:13:30 +epoch [4/50] batch [80/500] time 1.565 (1.571) data 0.000 (0.012) loss 1.2051 (1.2118) acc 75.0000 (69.9609) lr 1.9921e-03 eta 10:13:05 +epoch [4/50] batch [85/500] time 1.552 (1.570) data 0.000 (0.012) loss 1.1973 (1.2102) acc 71.8750 (69.9632) lr 1.9921e-03 eta 10:12:42 +epoch [4/50] batch [90/500] time 1.565 (1.570) data 0.000 (0.011) loss 1.6445 (1.2122) acc 71.8750 (70.1042) lr 1.9921e-03 eta 10:12:32 +epoch [4/50] batch [95/500] time 1.545 (1.570) data 0.000 (0.010) loss 1.9326 (1.2112) acc 62.5000 (70.0329) lr 1.9921e-03 eta 10:12:24 +epoch [4/50] batch [100/500] time 1.535 (1.569) data 0.000 (0.010) loss 1.1172 (1.2098) acc 78.1250 (69.9688) lr 1.9921e-03 eta 10:12:00 +epoch [4/50] batch [105/500] time 1.545 (1.569) data 0.000 (0.009) loss 1.0703 (1.2214) acc 75.0000 (69.7619) lr 1.9921e-03 eta 10:11:41 +epoch [4/50] batch [110/500] time 1.562 (1.568) data 0.000 (0.009) loss 1.5508 (1.2382) acc 65.6250 (69.3750) lr 1.9921e-03 eta 10:11:19 +epoch [4/50] batch [115/500] time 1.562 (1.568) data 0.000 (0.009) loss 1.5566 (1.2333) acc 59.3750 (69.4293) lr 1.9921e-03 eta 10:11:01 +epoch [4/50] batch [120/500] time 1.556 (1.567) data 0.001 (0.008) loss 1.4326 (1.2462) acc 68.7500 (69.3490) lr 1.9921e-03 eta 10:10:45 +epoch [4/50] batch [125/500] time 1.569 (1.567) data 0.000 (0.008) loss 1.2109 (1.2422) acc 71.8750 (69.3500) lr 1.9921e-03 eta 10:10:38 +epoch [4/50] batch [130/500] time 1.566 (1.567) data 0.000 (0.008) loss 0.9985 (1.2418) acc 81.2500 (69.5673) lr 1.9921e-03 eta 10:10:22 +epoch [4/50] batch [135/500] time 1.549 (1.567) data 0.000 (0.007) loss 0.7749 (1.2329) acc 84.3750 (69.7222) lr 1.9921e-03 eta 10:10:04 +epoch [4/50] batch [140/500] time 1.560 (1.567) data 0.000 (0.007) loss 0.9810 (1.2294) acc 71.8750 (69.7991) lr 1.9921e-03 eta 10:09:54 +epoch [4/50] batch [145/500] time 1.558 (1.566) data 0.000 (0.007) loss 0.9033 (1.2265) acc 75.0000 (69.6767) lr 1.9921e-03 eta 10:09:37 +epoch [4/50] batch [150/500] time 1.595 (1.566) data 0.000 (0.007) loss 1.0654 (1.2325) acc 68.7500 (69.5208) lr 1.9921e-03 eta 10:09:29 +epoch [4/50] batch [155/500] time 1.561 (1.566) data 0.000 (0.007) loss 1.7422 (1.2398) acc 65.6250 (69.3952) lr 1.9921e-03 eta 10:09:16 +epoch [4/50] batch [160/500] time 1.572 (1.566) data 0.000 (0.006) loss 1.3193 (1.2409) acc 71.8750 (69.3555) lr 1.9921e-03 eta 10:09:09 +epoch [4/50] batch [165/500] time 1.588 (1.566) data 0.001 (0.006) loss 1.0908 (1.2354) acc 65.6250 (69.4129) lr 1.9921e-03 eta 10:09:05 +epoch [4/50] batch [170/500] time 1.577 (1.566) data 0.001 (0.006) loss 1.6221 (1.2378) acc 62.5000 (69.4853) lr 1.9921e-03 eta 10:08:54 +epoch [4/50] batch [175/500] time 1.587 (1.566) data 0.000 (0.006) loss 0.6177 (1.2329) acc 81.2500 (69.5000) lr 1.9921e-03 eta 10:08:52 +epoch [4/50] batch [180/500] time 1.551 (1.566) data 0.000 (0.006) loss 1.5752 (1.2312) acc 65.6250 (69.4618) lr 1.9921e-03 eta 10:08:46 +epoch [4/50] batch [185/500] time 1.584 (1.567) data 0.000 (0.006) loss 1.7217 (1.2269) acc 62.5000 (69.6284) lr 1.9921e-03 eta 10:08:45 +epoch [4/50] batch [190/500] time 1.597 (1.567) data 0.000 (0.005) loss 1.2354 (1.2381) acc 62.5000 (69.5066) lr 1.9921e-03 eta 10:08:38 +epoch [4/50] batch [195/500] time 1.561 (1.567) data 0.001 (0.005) loss 1.0664 (1.2369) acc 78.1250 (69.5994) lr 1.9921e-03 eta 10:08:44 +epoch [4/50] batch [200/500] time 1.562 (1.567) data 0.000 (0.005) loss 0.9712 (1.2392) acc 65.6250 (69.5625) lr 1.9921e-03 eta 10:08:33 +epoch [4/50] batch [205/500] time 1.562 (1.567) data 0.000 (0.005) loss 1.1055 (1.2396) acc 65.6250 (69.6189) lr 1.9921e-03 eta 10:08:14 +epoch [4/50] batch [210/500] time 1.581 (1.566) data 0.000 (0.005) loss 1.1250 (1.2449) acc 68.7500 (69.3750) lr 1.9921e-03 eta 10:07:59 +epoch [4/50] batch [215/500] time 1.568 (1.566) data 0.000 (0.005) loss 1.5010 (1.2452) acc 59.3750 (69.3750) lr 1.9921e-03 eta 10:07:49 +epoch [4/50] batch [220/500] time 1.567 (1.566) data 0.000 (0.005) loss 1.8496 (1.2496) acc 50.0000 (69.2045) lr 1.9921e-03 eta 10:07:41 +epoch [4/50] batch [225/500] time 1.564 (1.566) data 0.000 (0.005) loss 0.8960 (1.2456) acc 71.8750 (69.1667) lr 1.9921e-03 eta 10:07:37 +epoch [4/50] batch [230/500] time 1.570 (1.566) data 0.001 (0.005) loss 1.9600 (1.2480) acc 62.5000 (69.1712) lr 1.9921e-03 eta 10:07:23 +epoch [4/50] batch [235/500] time 1.566 (1.566) data 0.000 (0.004) loss 1.4814 (1.2479) acc 62.5000 (69.1755) lr 1.9921e-03 eta 10:07:05 +epoch [4/50] batch [240/500] time 1.566 (1.566) data 0.000 (0.004) loss 1.2100 (1.2486) acc 68.7500 (69.1536) lr 1.9921e-03 eta 10:06:59 +epoch [4/50] batch [245/500] time 1.532 (1.565) data 0.000 (0.004) loss 1.2988 (1.2456) acc 71.8750 (69.2857) lr 1.9921e-03 eta 10:06:42 +epoch [4/50] batch [250/500] time 1.567 (1.565) data 0.000 (0.004) loss 1.3262 (1.2420) acc 65.6250 (69.3375) lr 1.9921e-03 eta 10:06:31 +epoch [4/50] batch [255/500] time 1.547 (1.565) data 0.000 (0.004) loss 1.1543 (1.2448) acc 78.1250 (69.3137) lr 1.9921e-03 eta 10:06:21 +epoch [4/50] batch [260/500] time 1.550 (1.565) data 0.000 (0.004) loss 1.4512 (1.2440) acc 68.7500 (69.3269) lr 1.9921e-03 eta 10:06:07 +epoch [4/50] batch [265/500] time 1.565 (1.565) data 0.000 (0.004) loss 2.0156 (1.2491) acc 65.6250 (69.2453) lr 1.9921e-03 eta 10:05:55 +epoch [4/50] batch [270/500] time 1.564 (1.565) data 0.000 (0.004) loss 0.8750 (1.2496) acc 65.6250 (69.1551) lr 1.9921e-03 eta 10:05:46 +epoch [4/50] batch [275/500] time 1.563 (1.565) data 0.000 (0.004) loss 1.8525 (1.2486) acc 46.8750 (69.0909) lr 1.9921e-03 eta 10:05:40 +epoch [4/50] batch [280/500] time 1.556 (1.565) data 0.000 (0.004) loss 1.4209 (1.2503) acc 59.3750 (69.1071) lr 1.9921e-03 eta 10:05:32 +epoch [4/50] batch [285/500] time 1.539 (1.565) data 0.000 (0.004) loss 1.7041 (1.2529) acc 62.5000 (69.0570) lr 1.9921e-03 eta 10:05:20 +epoch [4/50] batch [290/500] time 1.562 (1.565) data 0.000 (0.004) loss 1.1855 (1.2579) acc 68.7500 (69.0517) lr 1.9921e-03 eta 10:05:14 +epoch [4/50] batch [295/500] time 1.559 (1.565) data 0.001 (0.004) loss 1.2637 (1.2580) acc 68.7500 (69.0360) lr 1.9921e-03 eta 10:05:07 +epoch [4/50] batch [300/500] time 1.548 (1.564) data 0.000 (0.004) loss 1.1660 (1.2598) acc 71.8750 (69.0208) lr 1.9921e-03 eta 10:04:55 +epoch [4/50] batch [305/500] time 1.542 (1.564) data 0.000 (0.004) loss 1.2051 (1.2588) acc 68.7500 (69.0369) lr 1.9921e-03 eta 10:04:39 +epoch [4/50] batch [310/500] time 1.558 (1.564) data 0.000 (0.003) loss 1.7178 (1.2602) acc 65.6250 (69.0524) lr 1.9921e-03 eta 10:04:27 +epoch [4/50] batch [315/500] time 1.563 (1.564) data 0.000 (0.003) loss 1.4180 (1.2618) acc 62.5000 (69.0079) lr 1.9921e-03 eta 10:04:17 +epoch [4/50] batch [320/500] time 1.580 (1.564) data 0.000 (0.003) loss 0.6138 (1.2596) acc 84.3750 (69.0625) lr 1.9921e-03 eta 10:04:09 +epoch [4/50] batch [325/500] time 1.534 (1.564) data 0.000 (0.003) loss 0.8281 (1.2554) acc 78.1250 (69.0865) lr 1.9921e-03 eta 10:03:58 +epoch [4/50] batch [330/500] time 1.536 (1.563) data 0.000 (0.003) loss 1.7598 (1.2550) acc 68.7500 (69.1098) lr 1.9921e-03 eta 10:03:46 +epoch [4/50] batch [335/500] time 1.571 (1.563) data 0.000 (0.003) loss 1.1611 (1.2548) acc 68.7500 (69.0951) lr 1.9921e-03 eta 10:03:37 +epoch [4/50] batch [340/500] time 1.554 (1.564) data 0.000 (0.003) loss 1.7158 (1.2602) acc 65.6250 (69.0257) lr 1.9921e-03 eta 10:03:31 +epoch [4/50] batch [345/500] time 1.576 (1.563) data 0.000 (0.003) loss 1.6377 (1.2625) acc 50.0000 (68.9130) lr 1.9921e-03 eta 10:03:21 +epoch [4/50] batch [350/500] time 1.561 (1.563) data 0.000 (0.003) loss 1.1768 (1.2657) acc 68.7500 (68.8304) lr 1.9921e-03 eta 10:03:13 +epoch [4/50] batch [355/500] time 1.573 (1.564) data 0.000 (0.003) loss 1.1328 (1.2642) acc 71.8750 (68.8644) lr 1.9921e-03 eta 10:03:07 +epoch [4/50] batch [360/500] time 1.572 (1.564) data 0.000 (0.003) loss 1.5342 (1.2629) acc 68.7500 (68.8628) lr 1.9921e-03 eta 10:03:02 +epoch [4/50] batch [365/500] time 1.557 (1.564) data 0.000 (0.003) loss 0.9419 (1.2591) acc 75.0000 (68.9298) lr 1.9921e-03 eta 10:02:53 +epoch [4/50] batch [370/500] time 1.571 (1.564) data 0.000 (0.003) loss 1.5430 (1.2614) acc 68.7500 (68.9527) lr 1.9921e-03 eta 10:02:45 +epoch [4/50] batch [375/500] time 1.559 (1.564) data 0.000 (0.003) loss 1.5049 (1.2603) acc 68.7500 (68.9917) lr 1.9921e-03 eta 10:02:36 +epoch [4/50] batch [380/500] time 1.658 (1.564) data 0.000 (0.003) loss 1.0547 (1.2558) acc 78.1250 (69.1447) lr 1.9921e-03 eta 10:02:36 +epoch [4/50] batch [385/500] time 1.567 (1.564) data 0.000 (0.003) loss 1.1650 (1.2533) acc 71.8750 (69.2127) lr 1.9921e-03 eta 10:02:28 +epoch [4/50] batch [390/500] time 1.575 (1.564) data 0.000 (0.003) loss 0.7842 (1.2510) acc 81.2500 (69.2548) lr 1.9921e-03 eta 10:02:21 +epoch [4/50] batch [395/500] time 1.542 (1.564) data 0.000 (0.003) loss 0.6729 (1.2513) acc 84.3750 (69.2722) lr 1.9921e-03 eta 10:02:12 +epoch [4/50] batch [400/500] time 1.549 (1.564) data 0.000 (0.003) loss 1.5723 (1.2535) acc 65.6250 (69.2266) lr 1.9921e-03 eta 10:02:01 +epoch [4/50] batch [405/500] time 1.556 (1.564) data 0.001 (0.003) loss 1.0869 (1.2527) acc 78.1250 (69.2284) lr 1.9921e-03 eta 10:01:55 +epoch [4/50] batch [410/500] time 1.545 (1.564) data 0.001 (0.003) loss 0.9116 (1.2521) acc 75.0000 (69.1845) lr 1.9921e-03 eta 10:01:44 +epoch [4/50] batch [415/500] time 1.569 (1.564) data 0.000 (0.003) loss 1.0850 (1.2500) acc 71.8750 (69.2470) lr 1.9921e-03 eta 10:01:37 +epoch [4/50] batch [420/500] time 1.566 (1.564) data 0.000 (0.003) loss 1.1279 (1.2513) acc 65.6250 (69.2113) lr 1.9921e-03 eta 10:01:29 +epoch [4/50] batch [425/500] time 1.578 (1.564) data 0.000 (0.003) loss 1.4443 (1.2499) acc 62.5000 (69.2647) lr 1.9921e-03 eta 10:01:21 +epoch [4/50] batch [430/500] time 1.553 (1.564) data 0.000 (0.003) loss 1.2422 (1.2486) acc 78.1250 (69.2805) lr 1.9921e-03 eta 10:01:12 +epoch [4/50] batch [435/500] time 1.552 (1.563) data 0.001 (0.003) loss 1.2695 (1.2494) acc 62.5000 (69.2601) lr 1.9921e-03 eta 10:01:01 +epoch [4/50] batch [440/500] time 1.526 (1.563) data 0.000 (0.003) loss 1.3135 (1.2484) acc 71.8750 (69.2756) lr 1.9921e-03 eta 10:00:47 +epoch [4/50] batch [445/500] time 1.542 (1.563) data 0.000 (0.003) loss 1.0078 (1.2468) acc 62.5000 (69.3048) lr 1.9921e-03 eta 10:00:39 +epoch [4/50] batch [450/500] time 1.571 (1.563) data 0.000 (0.003) loss 1.3154 (1.2476) acc 62.5000 (69.2917) lr 1.9921e-03 eta 10:00:31 +epoch [4/50] batch [455/500] time 1.545 (1.563) data 0.000 (0.002) loss 1.8447 (1.2491) acc 65.6250 (69.2514) lr 1.9921e-03 eta 10:00:24 +epoch [4/50] batch [460/500] time 1.571 (1.563) data 0.000 (0.002) loss 2.0859 (1.2496) acc 59.3750 (69.2323) lr 1.9921e-03 eta 10:00:15 +epoch [4/50] batch [465/500] time 1.544 (1.563) data 0.000 (0.002) loss 1.3457 (1.2475) acc 62.5000 (69.2608) lr 1.9921e-03 eta 10:00:05 +epoch [4/50] batch [470/500] time 1.591 (1.563) data 0.000 (0.002) loss 1.3936 (1.2486) acc 62.5000 (69.2553) lr 1.9921e-03 eta 9:59:59 +epoch [4/50] batch [475/500] time 1.576 (1.563) data 0.000 (0.002) loss 0.9556 (1.2460) acc 75.0000 (69.3026) lr 1.9921e-03 eta 9:59:51 +epoch [4/50] batch [480/500] time 1.602 (1.563) data 0.000 (0.002) loss 0.5884 (1.2435) acc 78.1250 (69.3490) lr 1.9921e-03 eta 9:59:50 +epoch [4/50] batch [485/500] time 1.554 (1.564) data 0.001 (0.002) loss 0.9624 (1.2406) acc 75.0000 (69.4137) lr 1.9921e-03 eta 9:59:44 +epoch [4/50] batch [490/500] time 1.567 (1.563) data 0.000 (0.002) loss 0.9971 (1.2381) acc 81.2500 (69.4770) lr 1.9921e-03 eta 9:59:35 +epoch [4/50] batch [495/500] time 1.544 (1.563) data 0.001 (0.002) loss 1.2695 (1.2374) acc 71.8750 (69.4760) lr 1.9921e-03 eta 9:59:23 +epoch [4/50] batch [500/500] time 1.541 (1.563) data 0.000 (0.002) loss 0.9971 (1.2377) acc 71.8750 (69.4813) lr 1.9823e-03 eta 9:59:13 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,742 +* accuracy: 77.5% +* error: 22.5% +* macro_f1: 76.8% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar +epoch [5/50] batch [5/500] time 1.559 (1.647) data 0.001 (0.148) loss 0.9780 (1.2434) acc 78.1250 (68.1250) lr 1.9823e-03 eta 10:31:12 +epoch [5/50] batch [10/500] time 1.575 (1.606) data 0.001 (0.074) loss 1.3682 (1.2698) acc 68.7500 (67.8125) lr 1.9823e-03 eta 10:15:25 +epoch [5/50] batch [15/500] time 1.562 (1.592) data 0.000 (0.050) loss 1.2891 (1.2788) acc 71.8750 (68.5417) lr 1.9823e-03 eta 10:10:01 +epoch [5/50] batch [20/500] time 1.548 (1.586) data 0.000 (0.037) loss 1.2607 (1.2253) acc 71.8750 (69.2188) lr 1.9823e-03 eta 10:07:18 +epoch [5/50] batch [25/500] time 1.557 (1.581) data 0.001 (0.030) loss 0.7227 (1.1727) acc 81.2500 (70.5000) lr 1.9823e-03 eta 10:05:29 +epoch [5/50] batch [30/500] time 1.561 (1.582) data 0.000 (0.025) loss 0.6631 (1.1491) acc 84.3750 (71.4583) lr 1.9823e-03 eta 10:05:47 +epoch [5/50] batch [35/500] time 1.547 (1.578) data 0.000 (0.022) loss 1.0410 (1.1458) acc 75.0000 (71.3393) lr 1.9823e-03 eta 10:04:04 +epoch [5/50] batch [40/500] time 1.574 (1.577) data 0.001 (0.019) loss 2.0527 (1.1837) acc 68.7500 (71.0938) lr 1.9823e-03 eta 10:03:22 +epoch [5/50] batch [45/500] time 1.542 (1.575) data 0.000 (0.017) loss 1.1201 (1.1862) acc 81.2500 (71.0417) lr 1.9823e-03 eta 10:02:23 +epoch [5/50] batch [50/500] time 1.560 (1.573) data 0.000 (0.015) loss 1.5674 (1.1940) acc 68.7500 (70.9375) lr 1.9823e-03 eta 10:01:36 +epoch [5/50] batch [55/500] time 1.553 (1.571) data 0.000 (0.014) loss 1.1074 (1.2061) acc 78.1250 (70.3977) lr 1.9823e-03 eta 10:00:45 +epoch [5/50] batch [60/500] time 1.565 (1.570) data 0.000 (0.013) loss 1.1904 (1.2185) acc 65.6250 (69.8438) lr 1.9823e-03 eta 10:00:23 +epoch [5/50] batch [65/500] time 1.548 (1.569) data 0.000 (0.012) loss 1.4883 (1.2387) acc 62.5000 (69.4712) lr 1.9823e-03 eta 9:59:51 +epoch [5/50] batch [70/500] time 1.556 (1.568) data 0.000 (0.011) loss 1.1182 (1.2517) acc 50.0000 (68.6607) lr 1.9823e-03 eta 9:59:08 +epoch [5/50] batch [75/500] time 1.574 (1.568) data 0.001 (0.010) loss 1.4619 (1.2601) acc 68.7500 (68.6250) lr 1.9823e-03 eta 9:59:10 +epoch [5/50] batch [80/500] time 1.576 (1.568) data 0.001 (0.010) loss 0.7012 (1.2414) acc 84.3750 (69.0234) lr 1.9823e-03 eta 9:59:07 +epoch [5/50] batch [85/500] time 1.544 (1.567) data 0.000 (0.009) loss 1.0088 (1.2397) acc 68.7500 (69.0441) lr 1.9823e-03 eta 9:58:34 +epoch [5/50] batch [90/500] time 1.558 (1.567) data 0.000 (0.009) loss 1.6299 (1.2374) acc 59.3750 (69.1319) lr 1.9823e-03 eta 9:58:26 +epoch [5/50] batch [95/500] time 1.543 (1.567) data 0.000 (0.008) loss 1.2256 (1.2449) acc 78.1250 (69.2763) lr 1.9823e-03 eta 9:58:17 +epoch [5/50] batch [100/500] time 1.582 (1.567) data 0.000 (0.008) loss 1.4834 (1.2443) acc 65.6250 (69.4062) lr 1.9823e-03 eta 9:58:12 +epoch [5/50] batch [105/500] time 1.560 (1.567) data 0.000 (0.007) loss 1.1270 (1.2385) acc 71.8750 (69.4940) lr 1.9823e-03 eta 9:57:55 +epoch [5/50] batch [110/500] time 1.543 (1.566) data 0.000 (0.007) loss 1.3105 (1.2316) acc 62.5000 (69.6591) lr 1.9823e-03 eta 9:57:33 +epoch [5/50] batch [115/500] time 1.574 (1.566) data 0.000 (0.007) loss 0.8174 (1.2217) acc 78.1250 (70.0815) lr 1.9823e-03 eta 9:57:17 +epoch [5/50] batch [120/500] time 1.573 (1.566) data 0.000 (0.007) loss 1.5967 (1.2267) acc 65.6250 (69.9479) lr 1.9823e-03 eta 9:57:11 +epoch [5/50] batch [125/500] time 1.659 (1.567) data 0.000 (0.006) loss 0.5195 (1.2201) acc 84.3750 (69.9750) lr 1.9823e-03 eta 9:57:24 +epoch [5/50] batch [130/500] time 1.562 (1.567) data 0.000 (0.006) loss 1.9971 (1.2283) acc 53.1250 (69.6875) lr 1.9823e-03 eta 9:57:10 +epoch [5/50] batch [135/500] time 1.578 (1.567) data 0.000 (0.006) loss 0.8267 (1.2271) acc 81.2500 (69.7454) lr 1.9823e-03 eta 9:57:08 +epoch [5/50] batch [140/500] time 1.573 (1.567) data 0.000 (0.006) loss 1.1572 (1.2247) acc 71.8750 (69.7991) lr 1.9823e-03 eta 9:57:04 +epoch [5/50] batch [145/500] time 1.574 (1.567) data 0.000 (0.006) loss 0.8613 (1.2155) acc 71.8750 (69.9569) lr 1.9823e-03 eta 9:57:04 +epoch [5/50] batch [150/500] time 1.562 (1.567) data 0.000 (0.005) loss 0.5791 (1.2093) acc 87.5000 (70.0208) lr 1.9823e-03 eta 9:56:56 +epoch [5/50] batch [155/500] time 1.558 (1.567) data 0.000 (0.005) loss 1.7275 (1.2122) acc 56.2500 (70.0806) lr 1.9823e-03 eta 9:56:41 +epoch [5/50] batch [160/500] time 1.579 (1.567) data 0.000 (0.005) loss 1.1113 (1.2098) acc 62.5000 (70.0000) lr 1.9823e-03 eta 9:56:26 +epoch [5/50] batch [165/500] time 1.582 (1.567) data 0.000 (0.005) loss 1.0322 (1.2088) acc 78.1250 (70.0758) lr 1.9823e-03 eta 9:56:23 +epoch [5/50] batch [170/500] time 1.570 (1.568) data 0.000 (0.005) loss 1.1416 (1.2049) acc 59.3750 (70.0735) lr 1.9823e-03 eta 9:56:29 +epoch [5/50] batch [175/500] time 1.579 (1.567) data 0.000 (0.005) loss 1.4033 (1.2039) acc 65.6250 (70.0536) lr 1.9823e-03 eta 9:56:17 +epoch [5/50] batch [180/500] time 1.564 (1.567) data 0.000 (0.005) loss 1.1465 (1.2006) acc 75.0000 (70.0694) lr 1.9823e-03 eta 9:56:05 +epoch [5/50] batch [185/500] time 1.567 (1.567) data 0.000 (0.004) loss 1.0615 (1.1945) acc 75.0000 (70.2534) lr 1.9823e-03 eta 9:55:56 +epoch [5/50] batch [190/500] time 1.556 (1.567) data 0.000 (0.004) loss 1.2832 (1.2018) acc 71.8750 (70.0658) lr 1.9823e-03 eta 9:55:45 +epoch [5/50] batch [195/500] time 1.557 (1.567) data 0.000 (0.004) loss 0.9360 (1.2083) acc 81.2500 (70.1122) lr 1.9823e-03 eta 9:55:38 +epoch [5/50] batch [200/500] time 1.549 (1.567) data 0.000 (0.004) loss 1.1768 (1.2101) acc 75.0000 (70.0469) lr 1.9823e-03 eta 9:55:25 +epoch [5/50] batch [205/500] time 1.557 (1.567) data 0.000 (0.004) loss 2.2168 (1.2166) acc 56.2500 (69.9390) lr 1.9823e-03 eta 9:55:13 +epoch [5/50] batch [210/500] time 1.560 (1.566) data 0.000 (0.004) loss 1.2607 (1.2198) acc 65.6250 (69.8363) lr 1.9823e-03 eta 9:54:58 +epoch [5/50] batch [215/500] time 1.570 (1.567) data 0.000 (0.004) loss 0.6343 (1.2169) acc 84.3750 (69.9419) lr 1.9823e-03 eta 9:54:55 +epoch [5/50] batch [220/500] time 1.561 (1.567) data 0.000 (0.004) loss 1.3848 (1.2217) acc 78.1250 (69.8864) lr 1.9823e-03 eta 9:54:47 +epoch [5/50] batch [225/500] time 1.590 (1.567) data 0.001 (0.004) loss 0.8379 (1.2153) acc 81.2500 (70.0278) lr 1.9823e-03 eta 9:54:41 +epoch [5/50] batch [230/500] time 1.563 (1.566) data 0.000 (0.004) loss 0.9448 (1.2136) acc 78.1250 (70.1223) lr 1.9823e-03 eta 9:54:28 +epoch [5/50] batch [235/500] time 1.555 (1.566) data 0.000 (0.004) loss 1.3779 (1.2107) acc 65.6250 (70.1064) lr 1.9823e-03 eta 9:54:11 +epoch [5/50] batch [240/500] time 1.602 (1.566) data 0.000 (0.004) loss 1.7461 (1.2081) acc 62.5000 (70.1172) lr 1.9823e-03 eta 9:54:02 +epoch [5/50] batch [245/500] time 1.558 (1.566) data 0.000 (0.003) loss 1.2070 (1.2149) acc 71.8750 (70.0510) lr 1.9823e-03 eta 9:53:53 +epoch [5/50] batch [250/500] time 1.590 (1.566) data 0.000 (0.003) loss 1.2881 (1.2182) acc 62.5000 (69.9750) lr 1.9823e-03 eta 9:53:50 +epoch [5/50] batch [255/500] time 1.560 (1.566) data 0.000 (0.003) loss 1.0889 (1.2152) acc 71.8750 (70.0000) lr 1.9823e-03 eta 9:53:41 +epoch [5/50] batch [260/500] time 1.575 (1.566) data 0.000 (0.003) loss 1.3086 (1.2186) acc 71.8750 (70.0120) lr 1.9823e-03 eta 9:53:30 +epoch [5/50] batch [265/500] time 1.558 (1.566) data 0.001 (0.003) loss 0.8726 (1.2148) acc 81.2500 (70.1061) lr 1.9823e-03 eta 9:53:24 +epoch [5/50] batch [270/500] time 1.556 (1.566) data 0.000 (0.003) loss 1.2109 (1.2180) acc 68.7500 (70.1389) lr 1.9823e-03 eta 9:53:21 +epoch [5/50] batch [275/500] time 1.552 (1.566) data 0.000 (0.003) loss 1.2227 (1.2146) acc 62.5000 (70.1932) lr 1.9823e-03 eta 9:53:11 +epoch [5/50] batch [280/500] time 1.549 (1.566) data 0.000 (0.003) loss 1.1543 (1.2136) acc 65.6250 (70.1786) lr 1.9823e-03 eta 9:52:58 +epoch [5/50] batch [285/500] time 1.557 (1.566) data 0.000 (0.003) loss 0.9629 (1.2156) acc 65.6250 (70.1535) lr 1.9823e-03 eta 9:52:51 +epoch [5/50] batch [290/500] time 1.544 (1.566) data 0.000 (0.003) loss 1.2158 (1.2116) acc 71.8750 (70.1832) lr 1.9823e-03 eta 9:52:38 +epoch [5/50] batch [295/500] time 1.562 (1.566) data 0.000 (0.003) loss 0.7954 (1.2078) acc 84.3750 (70.2860) lr 1.9823e-03 eta 9:52:26 +epoch [5/50] batch [300/500] time 1.548 (1.565) data 0.000 (0.003) loss 1.0654 (1.2073) acc 68.7500 (70.3125) lr 1.9823e-03 eta 9:52:16 +epoch [5/50] batch [305/500] time 1.553 (1.565) data 0.000 (0.003) loss 1.1387 (1.2023) acc 71.8750 (70.4098) lr 1.9823e-03 eta 9:52:04 +epoch [5/50] batch [310/500] time 1.551 (1.565) data 0.000 (0.003) loss 1.2041 (1.2025) acc 71.8750 (70.3528) lr 1.9823e-03 eta 9:51:53 +epoch [5/50] batch [315/500] time 1.534 (1.566) data 0.001 (0.003) loss 0.8281 (1.1986) acc 78.1250 (70.4365) lr 1.9823e-03 eta 9:51:54 +epoch [5/50] batch [320/500] time 1.546 (1.565) data 0.000 (0.003) loss 0.7910 (1.1967) acc 81.2500 (70.4883) lr 1.9823e-03 eta 9:51:43 +epoch [5/50] batch [325/500] time 1.540 (1.565) data 0.000 (0.003) loss 1.7041 (1.1961) acc 53.1250 (70.5000) lr 1.9823e-03 eta 9:51:29 +epoch [5/50] batch [330/500] time 1.572 (1.565) data 0.000 (0.003) loss 1.5176 (1.1947) acc 68.7500 (70.5492) lr 1.9823e-03 eta 9:51:25 +epoch [5/50] batch [335/500] time 1.551 (1.565) data 0.000 (0.003) loss 1.9004 (1.1977) acc 59.3750 (70.5224) lr 1.9823e-03 eta 9:51:17 +epoch [5/50] batch [340/500] time 1.559 (1.565) data 0.001 (0.003) loss 1.3467 (1.2001) acc 65.6250 (70.4228) lr 1.9823e-03 eta 9:51:06 +epoch [5/50] batch [345/500] time 1.563 (1.565) data 0.000 (0.003) loss 1.0195 (1.2019) acc 81.2500 (70.4076) lr 1.9823e-03 eta 9:50:59 +epoch [5/50] batch [350/500] time 1.549 (1.565) data 0.000 (0.003) loss 1.4463 (1.1999) acc 75.0000 (70.4821) lr 1.9823e-03 eta 9:50:48 +epoch [5/50] batch [355/500] time 1.588 (1.565) data 0.000 (0.002) loss 0.9028 (1.1983) acc 71.8750 (70.4754) lr 1.9823e-03 eta 9:50:43 +epoch [5/50] batch [360/500] time 1.577 (1.565) data 0.001 (0.002) loss 1.8008 (1.2005) acc 53.1250 (70.3906) lr 1.9823e-03 eta 9:50:37 +epoch [5/50] batch [365/500] time 1.572 (1.565) data 0.000 (0.002) loss 0.9731 (1.1967) acc 75.0000 (70.4966) lr 1.9823e-03 eta 9:50:32 +epoch [5/50] batch [370/500] time 1.559 (1.565) data 0.000 (0.002) loss 0.9214 (1.1966) acc 81.2500 (70.4983) lr 1.9823e-03 eta 9:50:25 +epoch [5/50] batch [375/500] time 1.558 (1.565) data 0.001 (0.002) loss 1.2393 (1.1972) acc 68.7500 (70.5250) lr 1.9823e-03 eta 9:50:16 +epoch [5/50] batch [380/500] time 1.585 (1.565) data 0.000 (0.002) loss 1.2891 (1.2005) acc 56.2500 (70.4112) lr 1.9823e-03 eta 9:50:09 +epoch [5/50] batch [385/500] time 1.595 (1.566) data 0.000 (0.002) loss 1.0703 (1.2003) acc 78.1250 (70.4302) lr 1.9823e-03 eta 9:50:04 +epoch [5/50] batch [390/500] time 1.566 (1.566) data 0.001 (0.002) loss 1.1191 (1.2011) acc 62.5000 (70.4247) lr 1.9823e-03 eta 9:49:59 +epoch [5/50] batch [395/500] time 1.580 (1.566) data 0.000 (0.002) loss 0.5508 (1.1987) acc 84.3750 (70.4589) lr 1.9823e-03 eta 9:49:58 +epoch [5/50] batch [400/500] time 1.548 (1.566) data 0.000 (0.002) loss 0.8955 (1.1995) acc 75.0000 (70.3906) lr 1.9823e-03 eta 9:49:46 +epoch [5/50] batch [405/500] time 1.569 (1.566) data 0.001 (0.002) loss 1.6846 (1.2019) acc 59.3750 (70.3318) lr 1.9823e-03 eta 9:49:37 +epoch [5/50] batch [410/500] time 1.561 (1.566) data 0.000 (0.002) loss 0.8418 (1.2021) acc 78.1250 (70.3582) lr 1.9823e-03 eta 9:49:26 +epoch [5/50] batch [415/500] time 1.554 (1.566) data 0.000 (0.002) loss 0.7163 (1.2005) acc 75.0000 (70.3916) lr 1.9823e-03 eta 9:49:29 +epoch [5/50] batch [420/500] time 1.542 (1.566) data 0.000 (0.002) loss 1.3398 (1.2012) acc 59.3750 (70.3795) lr 1.9823e-03 eta 9:49:17 +epoch [5/50] batch [425/500] time 1.573 (1.566) data 0.000 (0.002) loss 0.7524 (1.2015) acc 65.6250 (70.3603) lr 1.9823e-03 eta 9:49:09 +epoch [5/50] batch [430/500] time 1.548 (1.566) data 0.000 (0.002) loss 1.2852 (1.2044) acc 62.5000 (70.2834) lr 1.9823e-03 eta 9:49:02 +epoch [5/50] batch [435/500] time 1.562 (1.566) data 0.000 (0.002) loss 0.8398 (1.2028) acc 78.1250 (70.3017) lr 1.9823e-03 eta 9:48:50 +epoch [5/50] batch [440/500] time 1.568 (1.566) data 0.000 (0.002) loss 1.2236 (1.2037) acc 68.7500 (70.2770) lr 1.9823e-03 eta 9:48:43 +epoch [5/50] batch [445/500] time 1.581 (1.566) data 0.001 (0.002) loss 0.9819 (1.2003) acc 78.1250 (70.3581) lr 1.9823e-03 eta 9:48:35 +epoch [5/50] batch [450/500] time 1.557 (1.566) data 0.000 (0.002) loss 1.1445 (1.1989) acc 68.7500 (70.3750) lr 1.9823e-03 eta 9:48:28 +epoch [5/50] batch [455/500] time 1.662 (1.566) data 0.001 (0.002) loss 1.3564 (1.1982) acc 65.6250 (70.4190) lr 1.9823e-03 eta 9:48:25 +epoch [5/50] batch [460/500] time 1.559 (1.566) data 0.000 (0.002) loss 0.5566 (1.1991) acc 84.3750 (70.4008) lr 1.9823e-03 eta 9:48:17 +epoch [5/50] batch [465/500] time 1.558 (1.566) data 0.001 (0.002) loss 1.0459 (1.1997) acc 78.1250 (70.3965) lr 1.9823e-03 eta 9:48:09 +epoch [5/50] batch [470/500] time 1.576 (1.566) data 0.000 (0.002) loss 0.9663 (1.1989) acc 71.8750 (70.3923) lr 1.9823e-03 eta 9:48:02 +epoch [5/50] batch [475/500] time 1.555 (1.566) data 0.000 (0.002) loss 1.6660 (1.2013) acc 59.3750 (70.3355) lr 1.9823e-03 eta 9:47:52 +epoch [5/50] batch [480/500] time 1.551 (1.566) data 0.000 (0.002) loss 0.8594 (1.2000) acc 75.0000 (70.3255) lr 1.9823e-03 eta 9:47:42 +epoch [5/50] batch [485/500] time 1.585 (1.566) data 0.001 (0.002) loss 1.4746 (1.1987) acc 62.5000 (70.3415) lr 1.9823e-03 eta 9:47:36 +epoch [5/50] batch [490/500] time 1.548 (1.566) data 0.000 (0.002) loss 1.5742 (1.1972) acc 62.5000 (70.3890) lr 1.9823e-03 eta 9:47:27 +epoch [5/50] batch [495/500] time 1.564 (1.566) data 0.000 (0.002) loss 1.3584 (1.1981) acc 59.3750 (70.3409) lr 1.9823e-03 eta 9:47:20 +epoch [5/50] batch [500/500] time 1.570 (1.566) data 0.000 (0.002) loss 0.4722 (1.1967) acc 93.7500 (70.3937) lr 1.9686e-03 eta 9:47:09 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,806 +* accuracy: 77.6% +* error: 22.4% +* macro_f1: 77.0% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar +epoch [6/50] batch [5/500] time 1.554 (1.704) data 0.001 (0.164) loss 0.9009 (1.1271) acc 75.0000 (70.0000) lr 1.9686e-03 eta 10:38:55 +epoch [6/50] batch [10/500] time 1.578 (1.627) data 0.000 (0.082) loss 0.9814 (1.1522) acc 65.6250 (70.3125) lr 1.9686e-03 eta 10:10:01 +epoch [6/50] batch [15/500] time 1.562 (1.606) data 0.001 (0.055) loss 1.1875 (1.1706) acc 59.3750 (70.0000) lr 1.9686e-03 eta 10:01:59 +epoch [6/50] batch [20/500] time 1.550 (1.595) data 0.001 (0.041) loss 0.6875 (1.1511) acc 81.2500 (70.3125) lr 1.9686e-03 eta 9:57:35 +epoch [6/50] batch [25/500] time 1.554 (1.590) data 0.001 (0.033) loss 1.4238 (1.1739) acc 71.8750 (70.1250) lr 1.9686e-03 eta 9:55:43 +epoch [6/50] batch [30/500] time 1.579 (1.587) data 0.000 (0.028) loss 1.7061 (1.1952) acc 56.2500 (69.8958) lr 1.9686e-03 eta 9:54:30 +epoch [6/50] batch [35/500] time 1.564 (1.584) data 0.000 (0.024) loss 1.0088 (1.2089) acc 71.8750 (69.5536) lr 1.9686e-03 eta 9:53:05 +epoch [6/50] batch [40/500] time 1.548 (1.581) data 0.000 (0.021) loss 1.6221 (1.2377) acc 62.5000 (69.3750) lr 1.9686e-03 eta 9:51:55 +epoch [6/50] batch [45/500] time 1.555 (1.579) data 0.000 (0.019) loss 1.2324 (1.2621) acc 62.5000 (68.8194) lr 1.9686e-03 eta 9:50:54 +epoch [6/50] batch [50/500] time 1.558 (1.577) data 0.000 (0.017) loss 1.2383 (1.2481) acc 68.7500 (68.8125) lr 1.9686e-03 eta 9:49:58 +epoch [6/50] batch [55/500] time 1.572 (1.577) data 0.000 (0.015) loss 0.9521 (1.2302) acc 78.1250 (69.4318) lr 1.9686e-03 eta 9:49:46 +epoch [6/50] batch [60/500] time 1.544 (1.576) data 0.000 (0.014) loss 1.5342 (1.2341) acc 59.3750 (69.3229) lr 1.9686e-03 eta 9:49:17 +epoch [6/50] batch [65/500] time 1.576 (1.577) data 0.001 (0.013) loss 1.1934 (1.2054) acc 68.7500 (70.0000) lr 1.9686e-03 eta 9:49:31 +epoch [6/50] batch [70/500] time 1.586 (1.576) data 0.000 (0.012) loss 1.1270 (1.1953) acc 71.8750 (70.0000) lr 1.9686e-03 eta 9:49:08 +epoch [6/50] batch [75/500] time 1.569 (1.575) data 0.000 (0.011) loss 1.5439 (1.2123) acc 65.6250 (70.0417) lr 1.9686e-03 eta 9:48:28 +epoch [6/50] batch [80/500] time 1.559 (1.574) data 0.000 (0.011) loss 1.2109 (1.2147) acc 68.7500 (70.1172) lr 1.9686e-03 eta 9:48:15 +epoch [6/50] batch [85/500] time 1.553 (1.573) data 0.000 (0.010) loss 1.7070 (1.2116) acc 59.3750 (70.1103) lr 1.9686e-03 eta 9:47:44 +epoch [6/50] batch [90/500] time 1.549 (1.572) data 0.000 (0.010) loss 0.7505 (1.2095) acc 78.1250 (70.0347) lr 1.9686e-03 eta 9:47:13 +epoch [6/50] batch [95/500] time 1.550 (1.571) data 0.001 (0.009) loss 1.4316 (1.2142) acc 62.5000 (69.8684) lr 1.9686e-03 eta 9:46:43 +epoch [6/50] batch [100/500] time 1.565 (1.571) data 0.001 (0.009) loss 1.4668 (1.2135) acc 62.5000 (69.9688) lr 1.9686e-03 eta 9:46:28 +epoch [6/50] batch [105/500] time 1.661 (1.571) data 0.000 (0.008) loss 0.9951 (1.2194) acc 71.8750 (69.8512) lr 1.9686e-03 eta 9:46:29 +epoch [6/50] batch [110/500] time 1.566 (1.571) data 0.001 (0.008) loss 0.8232 (1.2216) acc 71.8750 (69.6591) lr 1.9686e-03 eta 9:46:07 +epoch [6/50] batch [115/500] time 1.530 (1.570) data 0.000 (0.008) loss 1.2314 (1.2171) acc 75.0000 (69.7826) lr 1.9686e-03 eta 9:45:34 +epoch [6/50] batch [120/500] time 1.549 (1.569) data 0.001 (0.007) loss 1.3457 (1.2147) acc 62.5000 (69.8698) lr 1.9686e-03 eta 9:45:15 +epoch [6/50] batch [125/500] time 1.583 (1.569) data 0.000 (0.007) loss 1.2744 (1.2120) acc 71.8750 (69.9250) lr 1.9686e-03 eta 9:45:01 +epoch [6/50] batch [130/500] time 1.584 (1.569) data 0.001 (0.007) loss 1.0781 (1.2074) acc 68.7500 (69.9519) lr 1.9686e-03 eta 9:44:55 +epoch [6/50] batch [135/500] time 1.572 (1.569) data 0.000 (0.007) loss 1.0859 (1.2076) acc 75.0000 (70.0231) lr 1.9686e-03 eta 9:44:45 +epoch [6/50] batch [140/500] time 1.550 (1.568) data 0.000 (0.006) loss 1.8311 (1.2064) acc 53.1250 (70.0670) lr 1.9686e-03 eta 9:44:24 +epoch [6/50] batch [145/500] time 1.548 (1.567) data 0.001 (0.006) loss 1.2236 (1.2043) acc 75.0000 (70.0647) lr 1.9686e-03 eta 9:43:57 +epoch [6/50] batch [150/500] time 1.581 (1.568) data 0.000 (0.006) loss 1.0088 (1.2103) acc 65.6250 (69.7917) lr 1.9686e-03 eta 9:44:04 +epoch [6/50] batch [155/500] time 1.567 (1.568) data 0.001 (0.006) loss 1.6396 (1.2161) acc 62.5000 (69.7379) lr 1.9686e-03 eta 9:43:49 +epoch [6/50] batch [160/500] time 1.575 (1.568) data 0.000 (0.006) loss 1.4385 (1.2115) acc 62.5000 (69.8438) lr 1.9686e-03 eta 9:43:39 +epoch [6/50] batch [165/500] time 1.562 (1.568) data 0.000 (0.005) loss 0.9961 (1.2092) acc 81.2500 (69.9242) lr 1.9686e-03 eta 9:43:31 +epoch [6/50] batch [170/500] time 1.579 (1.568) data 0.000 (0.005) loss 1.0068 (1.2049) acc 78.1250 (69.9816) lr 1.9686e-03 eta 9:43:27 +epoch [6/50] batch [175/500] time 1.533 (1.567) data 0.001 (0.005) loss 0.8379 (1.2021) acc 81.2500 (70.1250) lr 1.9686e-03 eta 9:43:06 +epoch [6/50] batch [180/500] time 1.549 (1.567) data 0.000 (0.005) loss 1.6650 (1.2061) acc 65.6250 (70.0694) lr 1.9686e-03 eta 9:43:00 +epoch [6/50] batch [185/500] time 1.575 (1.567) data 0.001 (0.005) loss 1.3867 (1.2053) acc 65.6250 (70.1014) lr 1.9686e-03 eta 9:42:46 +epoch [6/50] batch [190/500] time 1.568 (1.567) data 0.000 (0.005) loss 1.4990 (1.2038) acc 71.8750 (70.1809) lr 1.9686e-03 eta 9:42:36 +epoch [6/50] batch [195/500] time 1.586 (1.567) data 0.000 (0.005) loss 1.4102 (1.2067) acc 56.2500 (70.0000) lr 1.9686e-03 eta 9:42:29 +epoch [6/50] batch [200/500] time 1.579 (1.567) data 0.000 (0.005) loss 1.5850 (1.2094) acc 59.3750 (70.0000) lr 1.9686e-03 eta 9:42:23 +epoch [6/50] batch [205/500] time 1.580 (1.568) data 0.000 (0.004) loss 1.8174 (1.2129) acc 59.3750 (69.9848) lr 1.9686e-03 eta 9:42:32 +epoch [6/50] batch [210/500] time 1.570 (1.568) data 0.001 (0.004) loss 1.7471 (1.2147) acc 50.0000 (69.8512) lr 1.9686e-03 eta 9:42:26 +epoch [6/50] batch [215/500] time 1.562 (1.568) data 0.000 (0.004) loss 1.4385 (1.2133) acc 59.3750 (69.9128) lr 1.9686e-03 eta 9:42:18 +epoch [6/50] batch [220/500] time 1.565 (1.567) data 0.000 (0.004) loss 1.2598 (1.2104) acc 68.7500 (70.0000) lr 1.9686e-03 eta 9:42:01 +epoch [6/50] batch [225/500] time 1.554 (1.567) data 0.000 (0.004) loss 1.3613 (1.2145) acc 78.1250 (69.9722) lr 1.9686e-03 eta 9:41:54 +epoch [6/50] batch [230/500] time 1.533 (1.567) data 0.001 (0.004) loss 1.7021 (1.2082) acc 62.5000 (70.1223) lr 1.9686e-03 eta 9:41:43 +epoch [6/50] batch [235/500] time 1.530 (1.567) data 0.000 (0.004) loss 1.0986 (1.2037) acc 68.7500 (70.1729) lr 1.9686e-03 eta 9:41:30 +epoch [6/50] batch [240/500] time 1.580 (1.567) data 0.000 (0.004) loss 1.1553 (1.2039) acc 59.3750 (70.1302) lr 1.9686e-03 eta 9:41:19 +epoch [6/50] batch [245/500] time 1.565 (1.567) data 0.000 (0.004) loss 1.0684 (1.1996) acc 71.8750 (70.2296) lr 1.9686e-03 eta 9:41:10 +epoch [6/50] batch [250/500] time 1.560 (1.567) data 0.000 (0.004) loss 1.4941 (1.1962) acc 65.6250 (70.3250) lr 1.9686e-03 eta 9:41:05 +epoch [6/50] batch [255/500] time 1.551 (1.567) data 0.000 (0.004) loss 0.9658 (1.1918) acc 71.8750 (70.3554) lr 1.9686e-03 eta 9:40:52 +epoch [6/50] batch [260/500] time 1.573 (1.567) data 0.000 (0.004) loss 1.6387 (1.1952) acc 59.3750 (70.2764) lr 1.9686e-03 eta 9:40:45 +epoch [6/50] batch [265/500] time 1.575 (1.567) data 0.000 (0.004) loss 1.4268 (1.1949) acc 71.8750 (70.3184) lr 1.9686e-03 eta 9:40:37 +epoch [6/50] batch [270/500] time 1.570 (1.567) data 0.000 (0.003) loss 1.0752 (1.1959) acc 59.3750 (70.2431) lr 1.9686e-03 eta 9:40:26 +epoch [6/50] batch [275/500] time 1.531 (1.566) data 0.000 (0.003) loss 1.0078 (1.1941) acc 78.1250 (70.3409) lr 1.9686e-03 eta 9:40:10 +epoch [6/50] batch [280/500] time 1.557 (1.566) data 0.000 (0.003) loss 1.1855 (1.1906) acc 68.7500 (70.4129) lr 1.9686e-03 eta 9:39:58 +epoch [6/50] batch [285/500] time 1.557 (1.566) data 0.000 (0.003) loss 1.6602 (1.1910) acc 62.5000 (70.3947) lr 1.9686e-03 eta 9:39:44 +epoch [6/50] batch [290/500] time 1.558 (1.566) data 0.000 (0.003) loss 1.4316 (1.1923) acc 65.6250 (70.3233) lr 1.9686e-03 eta 9:39:34 +epoch [6/50] batch [295/500] time 1.550 (1.566) data 0.001 (0.003) loss 1.0049 (1.1888) acc 78.1250 (70.4025) lr 1.9686e-03 eta 9:39:26 +epoch [6/50] batch [300/500] time 1.572 (1.566) data 0.000 (0.003) loss 1.4717 (1.1858) acc 68.7500 (70.4688) lr 1.9686e-03 eta 9:39:15 +epoch [6/50] batch [305/500] time 1.555 (1.566) data 0.000 (0.003) loss 1.2871 (1.1859) acc 71.8750 (70.5430) lr 1.9686e-03 eta 9:39:09 +epoch [6/50] batch [310/500] time 1.585 (1.566) data 0.001 (0.003) loss 1.5361 (1.1877) acc 65.6250 (70.5544) lr 1.9686e-03 eta 9:39:01 +epoch [6/50] batch [315/500] time 1.550 (1.565) data 0.000 (0.003) loss 1.4736 (1.1871) acc 59.3750 (70.5456) lr 1.9686e-03 eta 9:38:50 +epoch [6/50] batch [320/500] time 1.572 (1.565) data 0.000 (0.003) loss 1.3867 (1.1892) acc 78.1250 (70.5762) lr 1.9686e-03 eta 9:38:38 +epoch [6/50] batch [325/500] time 1.609 (1.566) data 0.000 (0.003) loss 1.0498 (1.1928) acc 87.5000 (70.5481) lr 1.9686e-03 eta 9:38:35 +epoch [6/50] batch [330/500] time 1.560 (1.566) data 0.000 (0.003) loss 0.8828 (1.1924) acc 84.3750 (70.5777) lr 1.9686e-03 eta 9:38:29 +epoch [6/50] batch [335/500] time 1.568 (1.566) data 0.000 (0.003) loss 1.4990 (1.1936) acc 59.3750 (70.5597) lr 1.9686e-03 eta 9:38:21 +epoch [6/50] batch [340/500] time 1.559 (1.565) data 0.000 (0.003) loss 1.7637 (1.1959) acc 53.1250 (70.4871) lr 1.9686e-03 eta 9:38:10 +epoch [6/50] batch [345/500] time 1.554 (1.565) data 0.001 (0.003) loss 1.5225 (1.1986) acc 62.5000 (70.4348) lr 1.9686e-03 eta 9:37:59 +epoch [6/50] batch [350/500] time 1.547 (1.565) data 0.000 (0.003) loss 1.2051 (1.1948) acc 78.1250 (70.5089) lr 1.9686e-03 eta 9:37:51 +epoch [6/50] batch [355/500] time 1.543 (1.565) data 0.000 (0.003) loss 1.1807 (1.1955) acc 71.8750 (70.5018) lr 1.9686e-03 eta 9:37:42 +epoch [6/50] batch [360/500] time 1.555 (1.565) data 0.001 (0.003) loss 0.8398 (1.1962) acc 87.5000 (70.4948) lr 1.9686e-03 eta 9:37:33 +epoch [6/50] batch [365/500] time 1.572 (1.565) data 0.000 (0.003) loss 1.8545 (1.1973) acc 62.5000 (70.4709) lr 1.9686e-03 eta 9:37:25 +epoch [6/50] batch [370/500] time 1.576 (1.565) data 0.000 (0.003) loss 1.1240 (1.1951) acc 71.8750 (70.5405) lr 1.9686e-03 eta 9:37:17 +epoch [6/50] batch [375/500] time 1.555 (1.565) data 0.000 (0.003) loss 0.9224 (1.1920) acc 78.1250 (70.5667) lr 1.9686e-03 eta 9:37:06 +epoch [6/50] batch [380/500] time 1.564 (1.565) data 0.001 (0.003) loss 0.7495 (1.1894) acc 90.6250 (70.6414) lr 1.9686e-03 eta 9:36:56 +epoch [6/50] batch [385/500] time 1.548 (1.565) data 0.000 (0.003) loss 1.2881 (1.1881) acc 75.0000 (70.7143) lr 1.9686e-03 eta 9:36:47 +epoch [6/50] batch [390/500] time 1.559 (1.565) data 0.000 (0.003) loss 1.7227 (1.1938) acc 50.0000 (70.6170) lr 1.9686e-03 eta 9:36:34 +epoch [6/50] batch [395/500] time 1.547 (1.565) data 0.000 (0.003) loss 1.2656 (1.1940) acc 65.6250 (70.6013) lr 1.9686e-03 eta 9:36:27 +epoch [6/50] batch [400/500] time 1.550 (1.565) data 0.000 (0.002) loss 1.3955 (1.1961) acc 62.5000 (70.5391) lr 1.9686e-03 eta 9:36:16 +epoch [6/50] batch [405/500] time 1.558 (1.564) data 0.000 (0.002) loss 1.0078 (1.1956) acc 75.0000 (70.5633) lr 1.9686e-03 eta 9:36:07 +epoch [6/50] batch [410/500] time 1.579 (1.564) data 0.001 (0.002) loss 1.2900 (1.1946) acc 71.8750 (70.5869) lr 1.9686e-03 eta 9:35:58 +epoch [6/50] batch [415/500] time 1.583 (1.565) data 0.001 (0.002) loss 0.8096 (1.1952) acc 75.0000 (70.5045) lr 1.9686e-03 eta 9:35:53 +epoch [6/50] batch [420/500] time 1.562 (1.565) data 0.001 (0.002) loss 1.5107 (1.1957) acc 62.5000 (70.5134) lr 1.9686e-03 eta 9:35:44 +epoch [6/50] batch [425/500] time 1.570 (1.564) data 0.000 (0.002) loss 0.9902 (1.1938) acc 71.8750 (70.5368) lr 1.9686e-03 eta 9:35:34 +epoch [6/50] batch [430/500] time 1.532 (1.564) data 0.000 (0.002) loss 1.2705 (1.1970) acc 71.8750 (70.4288) lr 1.9686e-03 eta 9:35:24 +epoch [6/50] batch [435/500] time 1.559 (1.564) data 0.000 (0.002) loss 1.2627 (1.2017) acc 68.7500 (70.3592) lr 1.9686e-03 eta 9:35:11 +epoch [6/50] batch [440/500] time 1.552 (1.564) data 0.001 (0.002) loss 1.5059 (1.2017) acc 56.2500 (70.3196) lr 1.9686e-03 eta 9:35:00 +epoch [6/50] batch [445/500] time 1.541 (1.564) data 0.000 (0.002) loss 0.9238 (1.2011) acc 71.8750 (70.3301) lr 1.9686e-03 eta 9:34:49 +epoch [6/50] batch [450/500] time 1.554 (1.564) data 0.000 (0.002) loss 0.6357 (1.2007) acc 75.0000 (70.3125) lr 1.9686e-03 eta 9:34:39 +epoch [6/50] batch [455/500] time 1.559 (1.564) data 0.000 (0.002) loss 0.8726 (1.2018) acc 84.3750 (70.3297) lr 1.9686e-03 eta 9:34:31 +epoch [6/50] batch [460/500] time 1.554 (1.564) data 0.000 (0.002) loss 0.8228 (1.2001) acc 84.3750 (70.3533) lr 1.9686e-03 eta 9:34:21 +epoch [6/50] batch [465/500] time 1.546 (1.564) data 0.000 (0.002) loss 1.4023 (1.2019) acc 62.5000 (70.3024) lr 1.9686e-03 eta 9:34:13 +epoch [6/50] batch [470/500] time 1.577 (1.564) data 0.000 (0.002) loss 0.4924 (1.2006) acc 87.5000 (70.3059) lr 1.9686e-03 eta 9:34:05 +epoch [6/50] batch [475/500] time 1.546 (1.564) data 0.000 (0.002) loss 0.6514 (1.1981) acc 81.2500 (70.3355) lr 1.9686e-03 eta 9:33:57 +epoch [6/50] batch [480/500] time 1.562 (1.563) data 0.000 (0.002) loss 1.1143 (1.1954) acc 75.0000 (70.3776) lr 1.9686e-03 eta 9:33:46 +epoch [6/50] batch [485/500] time 1.545 (1.563) data 0.001 (0.002) loss 1.0840 (1.1966) acc 71.8750 (70.3737) lr 1.9686e-03 eta 9:33:34 +epoch [6/50] batch [490/500] time 1.643 (1.563) data 0.000 (0.002) loss 1.2070 (1.1966) acc 78.1250 (70.4018) lr 1.9686e-03 eta 9:33:29 +epoch [6/50] batch [495/500] time 1.545 (1.563) data 0.000 (0.002) loss 1.1699 (1.1969) acc 71.8750 (70.4040) lr 1.9686e-03 eta 9:33:19 +epoch [6/50] batch [500/500] time 1.542 (1.563) data 0.000 (0.002) loss 1.1074 (1.1961) acc 62.5000 (70.3937) lr 1.9511e-03 eta 9:33:08 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,751 +* accuracy: 77.5% +* error: 22.5% +* macro_f1: 76.9% +epoch [7/50] batch [5/500] time 1.535 (1.678) data 0.000 (0.188) loss 0.7231 (0.8938) acc 81.2500 (78.7500) lr 1.9511e-03 eta 10:15:02 +epoch [7/50] batch [10/500] time 1.570 (1.630) data 0.000 (0.094) loss 1.1719 (0.9581) acc 75.0000 (76.2500) lr 1.9511e-03 eta 9:57:18 +epoch [7/50] batch [15/500] time 1.535 (1.605) data 0.000 (0.063) loss 1.1143 (0.9807) acc 68.7500 (75.2083) lr 1.9511e-03 eta 9:47:58 +epoch [7/50] batch [20/500] time 1.539 (1.593) data 0.001 (0.047) loss 0.9385 (0.9950) acc 78.1250 (74.2188) lr 1.9511e-03 eta 9:43:43 +epoch [7/50] batch [25/500] time 1.566 (1.588) data 0.000 (0.038) loss 0.7515 (1.0485) acc 84.3750 (72.6250) lr 1.9511e-03 eta 9:41:26 +epoch [7/50] batch [30/500] time 1.605 (1.585) data 0.001 (0.032) loss 0.8882 (1.0608) acc 78.1250 (72.2917) lr 1.9511e-03 eta 9:40:17 +epoch [7/50] batch [35/500] time 1.565 (1.581) data 0.000 (0.027) loss 1.3145 (1.0521) acc 59.3750 (71.9643) lr 1.9511e-03 eta 9:38:42 +epoch [7/50] batch [40/500] time 1.571 (1.579) data 0.000 (0.024) loss 0.9487 (1.0401) acc 75.0000 (72.4219) lr 1.9511e-03 eta 9:38:05 +epoch [7/50] batch [45/500] time 1.559 (1.576) data 0.000 (0.021) loss 1.3281 (1.0617) acc 65.6250 (71.8056) lr 1.9511e-03 eta 9:36:44 +epoch [7/50] batch [50/500] time 1.583 (1.575) data 0.002 (0.019) loss 1.2070 (1.0854) acc 68.7500 (71.0625) lr 1.9511e-03 eta 9:36:01 +epoch [7/50] batch [55/500] time 1.551 (1.573) data 0.000 (0.017) loss 1.1943 (1.1038) acc 68.7500 (70.7955) lr 1.9511e-03 eta 9:35:12 +epoch [7/50] batch [60/500] time 1.555 (1.571) data 0.000 (0.016) loss 0.9805 (1.1114) acc 71.8750 (70.7812) lr 1.9511e-03 eta 9:34:27 +epoch [7/50] batch [65/500] time 1.564 (1.571) data 0.000 (0.015) loss 1.5898 (1.1351) acc 62.5000 (70.5769) lr 1.9511e-03 eta 9:34:09 +epoch [7/50] batch [70/500] time 1.570 (1.571) data 0.000 (0.014) loss 1.5986 (1.1523) acc 62.5000 (70.3125) lr 1.9511e-03 eta 9:34:03 +epoch [7/50] batch [75/500] time 1.553 (1.569) data 0.000 (0.013) loss 1.0342 (1.1526) acc 65.6250 (70.2917) lr 1.9511e-03 eta 9:33:26 +epoch [7/50] batch [80/500] time 1.564 (1.568) data 0.000 (0.012) loss 1.1963 (1.1814) acc 65.6250 (69.9219) lr 1.9511e-03 eta 9:32:58 +epoch [7/50] batch [85/500] time 1.543 (1.567) data 0.000 (0.011) loss 1.7773 (1.1878) acc 62.5000 (69.7426) lr 1.9511e-03 eta 9:32:28 +epoch [7/50] batch [90/500] time 1.567 (1.567) data 0.000 (0.011) loss 1.5322 (1.1906) acc 62.5000 (69.5833) lr 1.9511e-03 eta 9:32:06 +epoch [7/50] batch [95/500] time 1.571 (1.566) data 0.000 (0.010) loss 1.0957 (1.1931) acc 75.0000 (69.6382) lr 1.9511e-03 eta 9:31:52 +epoch [7/50] batch [100/500] time 1.557 (1.567) data 0.000 (0.010) loss 1.3877 (1.1932) acc 65.6250 (69.5625) lr 1.9511e-03 eta 9:31:46 +epoch [7/50] batch [105/500] time 1.552 (1.567) data 0.001 (0.009) loss 1.5986 (1.1966) acc 53.1250 (69.4048) lr 1.9511e-03 eta 9:31:49 +epoch [7/50] batch [110/500] time 1.569 (1.567) data 0.001 (0.009) loss 0.8413 (1.1818) acc 81.2500 (69.7727) lr 1.9511e-03 eta 9:31:36 +epoch [7/50] batch [115/500] time 1.525 (1.566) data 0.000 (0.009) loss 1.1846 (1.1809) acc 68.7500 (69.9728) lr 1.9511e-03 eta 9:31:21 +epoch [7/50] batch [120/500] time 1.548 (1.566) data 0.000 (0.008) loss 0.7422 (1.1729) acc 78.1250 (70.1562) lr 1.9511e-03 eta 9:31:03 +epoch [7/50] batch [125/500] time 1.550 (1.566) data 0.000 (0.008) loss 1.5508 (1.1830) acc 78.1250 (70.0750) lr 1.9511e-03 eta 9:30:57 +epoch [7/50] batch [130/500] time 1.550 (1.566) data 0.000 (0.008) loss 0.9263 (1.1801) acc 71.8750 (70.1923) lr 1.9511e-03 eta 9:30:42 +epoch [7/50] batch [135/500] time 1.558 (1.565) data 0.000 (0.007) loss 1.2852 (1.1787) acc 71.8750 (70.2546) lr 1.9511e-03 eta 9:30:27 +epoch [7/50] batch [140/500] time 1.549 (1.565) data 0.000 (0.007) loss 2.0469 (1.1876) acc 59.3750 (70.1786) lr 1.9511e-03 eta 9:30:10 +epoch [7/50] batch [145/500] time 1.552 (1.565) data 0.001 (0.007) loss 1.1992 (1.1824) acc 68.7500 (70.2155) lr 1.9511e-03 eta 9:30:00 +epoch [7/50] batch [150/500] time 1.556 (1.565) data 0.000 (0.007) loss 0.7559 (1.1873) acc 78.1250 (70.0417) lr 1.9511e-03 eta 9:30:02 +epoch [7/50] batch [155/500] time 1.558 (1.565) data 0.001 (0.006) loss 0.9155 (1.1755) acc 75.0000 (70.4032) lr 1.9511e-03 eta 9:29:46 +epoch [7/50] batch [160/500] time 1.555 (1.564) data 0.000 (0.006) loss 0.8452 (1.1766) acc 78.1250 (70.5273) lr 1.9511e-03 eta 9:29:27 +epoch [7/50] batch [165/500] time 1.559 (1.564) data 0.000 (0.006) loss 1.5098 (1.1841) acc 68.7500 (70.4545) lr 1.9511e-03 eta 9:29:19 +epoch [7/50] batch [170/500] time 1.559 (1.564) data 0.000 (0.006) loss 1.3408 (1.1795) acc 62.5000 (70.5515) lr 1.9511e-03 eta 9:29:07 +epoch [7/50] batch [175/500] time 1.536 (1.564) data 0.000 (0.006) loss 1.3516 (1.1813) acc 59.3750 (70.4643) lr 1.9511e-03 eta 9:28:50 +epoch [7/50] batch [180/500] time 1.568 (1.564) data 0.000 (0.006) loss 1.3594 (1.1789) acc 65.6250 (70.5035) lr 1.9511e-03 eta 9:28:36 +epoch [7/50] batch [185/500] time 1.555 (1.563) data 0.000 (0.005) loss 1.3486 (1.1811) acc 75.0000 (70.5405) lr 1.9511e-03 eta 9:28:23 +epoch [7/50] batch [190/500] time 1.543 (1.563) data 0.001 (0.005) loss 1.3262 (1.1846) acc 75.0000 (70.5757) lr 1.9511e-03 eta 9:28:10 +epoch [7/50] batch [195/500] time 1.562 (1.563) data 0.000 (0.005) loss 1.3633 (1.1874) acc 59.3750 (70.4327) lr 1.9511e-03 eta 9:27:56 +epoch [7/50] batch [200/500] time 1.556 (1.563) data 0.001 (0.005) loss 0.8330 (1.1868) acc 78.1250 (70.5156) lr 1.9511e-03 eta 9:27:45 +epoch [7/50] batch [205/500] time 1.578 (1.562) data 0.000 (0.005) loss 1.0850 (1.1840) acc 65.6250 (70.4878) lr 1.9511e-03 eta 9:27:34 +epoch [7/50] batch [210/500] time 1.540 (1.562) data 0.001 (0.005) loss 1.7578 (1.1869) acc 62.5000 (70.5208) lr 1.9511e-03 eta 9:27:20 +epoch [7/50] batch [215/500] time 1.571 (1.562) data 0.000 (0.005) loss 1.4404 (1.1933) acc 53.1250 (70.4070) lr 1.9511e-03 eta 9:27:13 +epoch [7/50] batch [220/500] time 1.547 (1.562) data 0.000 (0.005) loss 0.8252 (1.1933) acc 71.8750 (70.2983) lr 1.9511e-03 eta 9:26:58 +epoch [7/50] batch [225/500] time 1.581 (1.562) data 0.000 (0.005) loss 1.3223 (1.1942) acc 71.8750 (70.3333) lr 1.9511e-03 eta 9:26:51 +epoch [7/50] batch [230/500] time 1.538 (1.562) data 0.000 (0.004) loss 1.1719 (1.1930) acc 68.7500 (70.2174) lr 1.9511e-03 eta 9:26:38 +epoch [7/50] batch [235/500] time 1.548 (1.562) data 0.000 (0.004) loss 0.7725 (1.1954) acc 81.2500 (70.2128) lr 1.9511e-03 eta 9:26:30 +epoch [7/50] batch [240/500] time 1.543 (1.561) data 0.000 (0.004) loss 1.2705 (1.1988) acc 78.1250 (70.1432) lr 1.9511e-03 eta 9:26:16 +epoch [7/50] batch [245/500] time 1.570 (1.561) data 0.001 (0.004) loss 1.9219 (1.2065) acc 53.1250 (70.0128) lr 1.9511e-03 eta 9:26:02 +epoch [7/50] batch [250/500] time 1.561 (1.561) data 0.000 (0.004) loss 0.4814 (1.2043) acc 84.3750 (70.0625) lr 1.9511e-03 eta 9:25:58 +epoch [7/50] batch [255/500] time 1.569 (1.561) data 0.000 (0.004) loss 1.4385 (1.2021) acc 56.2500 (70.0735) lr 1.9511e-03 eta 9:25:50 +epoch [7/50] batch [260/500] time 1.558 (1.561) data 0.000 (0.004) loss 1.0811 (1.1993) acc 75.0000 (70.1442) lr 1.9511e-03 eta 9:25:37 +epoch [7/50] batch [265/500] time 1.567 (1.561) data 0.000 (0.004) loss 1.4561 (1.2013) acc 59.3750 (70.0236) lr 1.9511e-03 eta 9:25:27 +epoch [7/50] batch [270/500] time 1.574 (1.561) data 0.000 (0.004) loss 1.0713 (1.1971) acc 71.8750 (70.1273) lr 1.9511e-03 eta 9:25:19 +epoch [7/50] batch [275/500] time 1.579 (1.561) data 0.001 (0.004) loss 1.1533 (1.2000) acc 68.7500 (70.1023) lr 1.9511e-03 eta 9:25:13 +epoch [7/50] batch [280/500] time 1.552 (1.561) data 0.001 (0.004) loss 0.7900 (1.1978) acc 75.0000 (70.0893) lr 1.9511e-03 eta 9:25:06 +epoch [7/50] batch [285/500] time 1.578 (1.561) data 0.000 (0.004) loss 1.1543 (1.1964) acc 65.6250 (70.1096) lr 1.9511e-03 eta 9:25:00 +epoch [7/50] batch [290/500] time 1.561 (1.561) data 0.000 (0.004) loss 1.1289 (1.1954) acc 71.8750 (70.1401) lr 1.9511e-03 eta 9:24:55 +epoch [7/50] batch [295/500] time 1.561 (1.562) data 0.000 (0.004) loss 0.9463 (1.1939) acc 78.1250 (70.1695) lr 1.9511e-03 eta 9:24:55 +epoch [7/50] batch [300/500] time 1.568 (1.562) data 0.000 (0.004) loss 0.5400 (1.1919) acc 84.3750 (70.2083) lr 1.9511e-03 eta 9:24:49 +epoch [7/50] batch [305/500] time 1.567 (1.562) data 0.000 (0.003) loss 1.1562 (1.1926) acc 78.1250 (70.2561) lr 1.9511e-03 eta 9:24:44 +epoch [7/50] batch [310/500] time 1.567 (1.562) data 0.001 (0.003) loss 1.1660 (1.1921) acc 68.7500 (70.2319) lr 1.9511e-03 eta 9:24:39 +epoch [7/50] batch [315/500] time 1.568 (1.562) data 0.000 (0.003) loss 1.8740 (1.1968) acc 59.3750 (70.1786) lr 1.9511e-03 eta 9:24:28 +epoch [7/50] batch [320/500] time 1.556 (1.562) data 0.000 (0.003) loss 1.0518 (1.1929) acc 75.0000 (70.2930) lr 1.9511e-03 eta 9:24:20 +epoch [7/50] batch [325/500] time 1.572 (1.562) data 0.001 (0.003) loss 0.6011 (1.1913) acc 84.3750 (70.3462) lr 1.9511e-03 eta 9:24:16 +epoch [7/50] batch [330/500] time 1.551 (1.562) data 0.001 (0.003) loss 0.7905 (1.1870) acc 78.1250 (70.4356) lr 1.9511e-03 eta 9:24:08 +epoch [7/50] batch [335/500] time 1.557 (1.562) data 0.000 (0.003) loss 0.9556 (1.1858) acc 71.8750 (70.5037) lr 1.9511e-03 eta 9:24:02 +epoch [7/50] batch [340/500] time 1.573 (1.562) data 0.000 (0.003) loss 0.9961 (1.1869) acc 75.0000 (70.4136) lr 1.9511e-03 eta 9:24:00 +epoch [7/50] batch [345/500] time 1.582 (1.563) data 0.000 (0.003) loss 1.5830 (1.1872) acc 65.6250 (70.4529) lr 1.9511e-03 eta 9:23:56 +epoch [7/50] batch [350/500] time 1.593 (1.563) data 0.000 (0.003) loss 1.6982 (1.1880) acc 59.3750 (70.4375) lr 1.9511e-03 eta 9:23:49 +epoch [7/50] batch [355/500] time 1.545 (1.562) data 0.000 (0.003) loss 0.8428 (1.1865) acc 78.1250 (70.4577) lr 1.9511e-03 eta 9:23:40 +epoch [7/50] batch [360/500] time 1.549 (1.563) data 0.000 (0.003) loss 1.1895 (1.1875) acc 71.8750 (70.3906) lr 1.9511e-03 eta 9:23:32 +epoch [7/50] batch [365/500] time 1.563 (1.562) data 0.000 (0.003) loss 0.9121 (1.1897) acc 71.8750 (70.3425) lr 1.9511e-03 eta 9:23:24 +epoch [7/50] batch [370/500] time 1.562 (1.563) data 0.000 (0.003) loss 0.6772 (1.1882) acc 84.3750 (70.3716) lr 1.9511e-03 eta 9:23:19 +epoch [7/50] batch [375/500] time 1.553 (1.562) data 0.000 (0.003) loss 0.7891 (1.1880) acc 75.0000 (70.3417) lr 1.9511e-03 eta 9:23:08 +epoch [7/50] batch [380/500] time 1.529 (1.562) data 0.000 (0.003) loss 1.7412 (1.1900) acc 65.6250 (70.3289) lr 1.9511e-03 eta 9:22:59 +epoch [7/50] batch [385/500] time 1.563 (1.562) data 0.000 (0.003) loss 1.1777 (1.1893) acc 62.5000 (70.3571) lr 1.9511e-03 eta 9:22:51 +epoch [7/50] batch [390/500] time 1.691 (1.563) data 0.000 (0.003) loss 0.4980 (1.1920) acc 87.5000 (70.2804) lr 1.9511e-03 eta 9:22:53 +epoch [7/50] batch [395/500] time 1.579 (1.563) data 0.000 (0.003) loss 1.2520 (1.1898) acc 71.8750 (70.3481) lr 1.9511e-03 eta 9:22:46 +epoch [7/50] batch [400/500] time 1.569 (1.563) data 0.000 (0.003) loss 1.4287 (1.1891) acc 68.7500 (70.3203) lr 1.9511e-03 eta 9:22:40 +epoch [7/50] batch [405/500] time 1.565 (1.563) data 0.000 (0.003) loss 0.9854 (1.1858) acc 71.8750 (70.4090) lr 1.9511e-03 eta 9:22:33 +epoch [7/50] batch [410/500] time 1.560 (1.563) data 0.000 (0.003) loss 0.9419 (1.1850) acc 78.1250 (70.4345) lr 1.9511e-03 eta 9:22:26 +epoch [7/50] batch [415/500] time 1.548 (1.563) data 0.000 (0.003) loss 1.1680 (1.1831) acc 78.1250 (70.4970) lr 1.9511e-03 eta 9:22:15 +epoch [7/50] batch [420/500] time 1.579 (1.563) data 0.000 (0.003) loss 0.8491 (1.1864) acc 81.2500 (70.4464) lr 1.9511e-03 eta 9:22:08 +epoch [7/50] batch [425/500] time 1.580 (1.563) data 0.000 (0.003) loss 0.8730 (1.1854) acc 78.1250 (70.4706) lr 1.9511e-03 eta 9:22:02 +epoch [7/50] batch [430/500] time 1.559 (1.563) data 0.000 (0.003) loss 0.6416 (1.1842) acc 81.2500 (70.5523) lr 1.9511e-03 eta 9:21:53 +epoch [7/50] batch [435/500] time 1.558 (1.563) data 0.000 (0.003) loss 1.1348 (1.1835) acc 68.7500 (70.5460) lr 1.9511e-03 eta 9:21:50 +epoch [7/50] batch [440/500] time 1.576 (1.563) data 0.000 (0.003) loss 1.1055 (1.1829) acc 75.0000 (70.6037) lr 1.9511e-03 eta 9:21:41 +epoch [7/50] batch [445/500] time 1.569 (1.563) data 0.000 (0.003) loss 0.7974 (1.1805) acc 78.1250 (70.6531) lr 1.9511e-03 eta 9:21:36 +epoch [7/50] batch [450/500] time 1.608 (1.563) data 0.000 (0.002) loss 1.2334 (1.1812) acc 65.6250 (70.5972) lr 1.9511e-03 eta 9:21:26 +epoch [7/50] batch [455/500] time 1.551 (1.563) data 0.000 (0.002) loss 1.2725 (1.1807) acc 75.0000 (70.6662) lr 1.9511e-03 eta 9:21:15 +epoch [7/50] batch [460/500] time 1.568 (1.563) data 0.000 (0.002) loss 0.5776 (1.1802) acc 81.2500 (70.7201) lr 1.9511e-03 eta 9:21:06 +epoch [7/50] batch [465/500] time 1.548 (1.563) data 0.000 (0.002) loss 0.7881 (1.1798) acc 78.1250 (70.7460) lr 1.9511e-03 eta 9:20:55 +epoch [7/50] batch [470/500] time 1.563 (1.563) data 0.000 (0.002) loss 0.9546 (1.1800) acc 75.0000 (70.7447) lr 1.9511e-03 eta 9:20:47 +epoch [7/50] batch [475/500] time 1.527 (1.563) data 0.000 (0.002) loss 1.5938 (1.1822) acc 65.6250 (70.7303) lr 1.9511e-03 eta 9:20:36 +epoch [7/50] batch [480/500] time 1.560 (1.563) data 0.000 (0.002) loss 0.6245 (1.1799) acc 90.6250 (70.7943) lr 1.9511e-03 eta 9:20:28 +epoch [7/50] batch [485/500] time 1.544 (1.563) data 0.001 (0.002) loss 1.1689 (1.1804) acc 68.7500 (70.7539) lr 1.9511e-03 eta 9:20:18 +epoch [7/50] batch [490/500] time 1.553 (1.562) data 0.000 (0.002) loss 1.3359 (1.1825) acc 56.2500 (70.7143) lr 1.9511e-03 eta 9:20:08 +epoch [7/50] batch [495/500] time 1.548 (1.562) data 0.000 (0.002) loss 1.1377 (1.1800) acc 71.8750 (70.7513) lr 1.9511e-03 eta 9:19:57 +epoch [7/50] batch [500/500] time 1.542 (1.562) data 0.000 (0.002) loss 0.9746 (1.1807) acc 75.0000 (70.7812) lr 1.9298e-03 eta 9:19:46 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,847 +* accuracy: 77.7% +* error: 22.3% +* macro_f1: 77.1% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar +epoch [8/50] batch [5/500] time 1.543 (1.651) data 0.001 (0.158) loss 2.2266 (1.4221) acc 46.8750 (66.2500) lr 1.9298e-03 eta 9:51:36 +epoch [8/50] batch [10/500] time 1.564 (1.604) data 0.001 (0.079) loss 1.5420 (1.3735) acc 62.5000 (68.4375) lr 1.9298e-03 eta 9:34:20 +epoch [8/50] batch [15/500] time 1.530 (1.584) data 0.000 (0.053) loss 1.4883 (1.3611) acc 53.1250 (65.8333) lr 1.9298e-03 eta 9:27:20 +epoch [8/50] batch [20/500] time 1.588 (1.577) data 0.000 (0.040) loss 1.1045 (1.2959) acc 75.0000 (67.6562) lr 1.9298e-03 eta 9:24:39 +epoch [8/50] batch [25/500] time 1.540 (1.572) data 0.000 (0.032) loss 1.8818 (1.3143) acc 50.0000 (68.1250) lr 1.9298e-03 eta 9:22:49 +epoch [8/50] batch [30/500] time 1.558 (1.575) data 0.001 (0.027) loss 1.0225 (1.2820) acc 75.0000 (68.9583) lr 1.9298e-03 eta 9:23:42 +epoch [8/50] batch [35/500] time 1.563 (1.572) data 0.000 (0.023) loss 0.7915 (1.2578) acc 75.0000 (69.5536) lr 1.9298e-03 eta 9:22:19 +epoch [8/50] batch [40/500] time 1.551 (1.569) data 0.000 (0.020) loss 1.4990 (1.2798) acc 62.5000 (69.2969) lr 1.9298e-03 eta 9:21:13 +epoch [8/50] batch [45/500] time 1.558 (1.567) data 0.000 (0.018) loss 1.7832 (1.2660) acc 65.6250 (69.7917) lr 1.9298e-03 eta 9:20:29 +epoch [8/50] batch [50/500] time 1.565 (1.567) data 0.000 (0.016) loss 1.1807 (1.2503) acc 65.6250 (70.2500) lr 1.9298e-03 eta 9:20:14 +epoch [8/50] batch [55/500] time 1.576 (1.566) data 0.000 (0.015) loss 1.3438 (1.2310) acc 62.5000 (70.3977) lr 1.9298e-03 eta 9:19:43 +epoch [8/50] batch [60/500] time 1.569 (1.566) data 0.000 (0.014) loss 0.9614 (1.2343) acc 75.0000 (70.6250) lr 1.9298e-03 eta 9:19:29 +epoch [8/50] batch [65/500] time 1.596 (1.565) data 0.001 (0.013) loss 1.3096 (1.2598) acc 65.6250 (70.0962) lr 1.9298e-03 eta 9:19:10 +epoch [8/50] batch [70/500] time 1.641 (1.566) data 0.000 (0.012) loss 0.7632 (1.2497) acc 81.2500 (70.1786) lr 1.9298e-03 eta 9:19:21 +epoch [8/50] batch [75/500] time 1.560 (1.566) data 0.000 (0.011) loss 1.3164 (1.2444) acc 65.6250 (70.1250) lr 1.9298e-03 eta 9:19:14 +epoch [8/50] batch [80/500] time 1.545 (1.565) data 0.000 (0.010) loss 1.1807 (1.2406) acc 65.6250 (70.2734) lr 1.9298e-03 eta 9:18:44 +epoch [8/50] batch [85/500] time 1.557 (1.564) data 0.000 (0.010) loss 0.7422 (1.2327) acc 75.0000 (70.2941) lr 1.9298e-03 eta 9:18:13 +epoch [8/50] batch [90/500] time 1.542 (1.563) data 0.000 (0.009) loss 1.7041 (1.2344) acc 59.3750 (70.0694) lr 1.9298e-03 eta 9:17:41 +epoch [8/50] batch [95/500] time 1.564 (1.563) data 0.000 (0.009) loss 1.4297 (1.2399) acc 71.8750 (70.0658) lr 1.9298e-03 eta 9:17:27 +epoch [8/50] batch [100/500] time 1.572 (1.563) data 0.000 (0.008) loss 1.6309 (1.2533) acc 59.3750 (69.8125) lr 1.9298e-03 eta 9:17:21 +epoch [8/50] batch [105/500] time 1.538 (1.563) data 0.000 (0.008) loss 0.9141 (1.2481) acc 78.1250 (69.7917) lr 1.9298e-03 eta 9:17:11 +epoch [8/50] batch [110/500] time 1.553 (1.562) data 0.000 (0.008) loss 1.0918 (1.2418) acc 71.8750 (70.0284) lr 1.9298e-03 eta 9:17:00 +epoch [8/50] batch [115/500] time 1.543 (1.562) data 0.000 (0.007) loss 1.4209 (1.2312) acc 59.3750 (69.9185) lr 1.9298e-03 eta 9:16:46 +epoch [8/50] batch [120/500] time 1.554 (1.562) data 0.000 (0.007) loss 1.8340 (1.2367) acc 56.2500 (69.7656) lr 1.9298e-03 eta 9:16:39 +epoch [8/50] batch [125/500] time 1.555 (1.562) data 0.000 (0.007) loss 1.1924 (1.2402) acc 75.0000 (69.8250) lr 1.9298e-03 eta 9:16:22 +epoch [8/50] batch [130/500] time 1.529 (1.561) data 0.000 (0.006) loss 0.8999 (1.2332) acc 87.5000 (70.0962) lr 1.9298e-03 eta 9:16:06 +epoch [8/50] batch [135/500] time 1.562 (1.561) data 0.000 (0.006) loss 1.0596 (1.2274) acc 71.8750 (70.1157) lr 1.9298e-03 eta 9:15:52 +epoch [8/50] batch [140/500] time 1.562 (1.561) data 0.001 (0.006) loss 0.8955 (1.2265) acc 71.8750 (70.0223) lr 1.9298e-03 eta 9:15:40 +epoch [8/50] batch [145/500] time 1.555 (1.561) data 0.001 (0.006) loss 1.0254 (1.2169) acc 71.8750 (70.1078) lr 1.9298e-03 eta 9:15:31 +epoch [8/50] batch [150/500] time 1.597 (1.561) data 0.001 (0.006) loss 1.2100 (1.2129) acc 65.6250 (70.1875) lr 1.9298e-03 eta 9:15:35 +epoch [8/50] batch [155/500] time 1.554 (1.561) data 0.001 (0.006) loss 0.8013 (1.2071) acc 78.1250 (70.2218) lr 1.9298e-03 eta 9:15:26 +epoch [8/50] batch [160/500] time 1.541 (1.561) data 0.000 (0.005) loss 0.8101 (1.2048) acc 71.8750 (70.2539) lr 1.9298e-03 eta 9:15:11 +epoch [8/50] batch [165/500] time 1.546 (1.561) data 0.000 (0.005) loss 1.6738 (1.2066) acc 65.6250 (70.2652) lr 1.9298e-03 eta 9:14:53 +epoch [8/50] batch [170/500] time 1.535 (1.561) data 0.000 (0.005) loss 1.3477 (1.2123) acc 62.5000 (70.1654) lr 1.9298e-03 eta 9:14:49 +epoch [8/50] batch [175/500] time 1.554 (1.561) data 0.001 (0.005) loss 0.5005 (1.2161) acc 84.3750 (70.0536) lr 1.9298e-03 eta 9:14:40 +epoch [8/50] batch [180/500] time 1.561 (1.560) data 0.000 (0.005) loss 1.0176 (1.2108) acc 84.3750 (70.2083) lr 1.9298e-03 eta 9:14:26 +epoch [8/50] batch [185/500] time 1.554 (1.560) data 0.000 (0.005) loss 1.1191 (1.2091) acc 75.0000 (70.2534) lr 1.9298e-03 eta 9:14:19 +epoch [8/50] batch [190/500] time 1.544 (1.560) data 0.000 (0.005) loss 0.8467 (1.2112) acc 78.1250 (70.2961) lr 1.9298e-03 eta 9:14:07 +epoch [8/50] batch [195/500] time 1.537 (1.560) data 0.000 (0.004) loss 1.0986 (1.2127) acc 68.7500 (70.1923) lr 1.9298e-03 eta 9:14:02 +epoch [8/50] batch [200/500] time 1.564 (1.560) data 0.000 (0.004) loss 1.9697 (1.2166) acc 62.5000 (70.1875) lr 1.9298e-03 eta 9:13:56 +epoch [8/50] batch [205/500] time 1.546 (1.560) data 0.000 (0.004) loss 1.1855 (1.2166) acc 71.8750 (70.1982) lr 1.9298e-03 eta 9:13:48 +epoch [8/50] batch [210/500] time 1.538 (1.560) data 0.000 (0.004) loss 1.2207 (1.2134) acc 65.6250 (70.2083) lr 1.9298e-03 eta 9:13:38 +epoch [8/50] batch [215/500] time 1.569 (1.561) data 0.000 (0.004) loss 1.4951 (1.2167) acc 68.7500 (70.1163) lr 1.9298e-03 eta 9:13:38 +epoch [8/50] batch [220/500] time 1.553 (1.561) data 0.000 (0.004) loss 1.2080 (1.2180) acc 71.8750 (70.0142) lr 1.9298e-03 eta 9:13:33 +epoch [8/50] batch [225/500] time 1.553 (1.561) data 0.000 (0.004) loss 0.8125 (1.2135) acc 75.0000 (70.0833) lr 1.9298e-03 eta 9:13:23 +epoch [8/50] batch [230/500] time 1.544 (1.560) data 0.000 (0.004) loss 0.4666 (1.2170) acc 87.5000 (70.0408) lr 1.9298e-03 eta 9:13:11 +epoch [8/50] batch [235/500] time 1.576 (1.560) data 0.000 (0.004) loss 1.3047 (1.2156) acc 62.5000 (70.0133) lr 1.9298e-03 eta 9:13:03 +epoch [8/50] batch [240/500] time 1.546 (1.560) data 0.000 (0.004) loss 1.0908 (1.2121) acc 81.2500 (70.1302) lr 1.9298e-03 eta 9:12:51 +epoch [8/50] batch [245/500] time 1.556 (1.560) data 0.000 (0.004) loss 0.9551 (1.2156) acc 78.1250 (70.1148) lr 1.9298e-03 eta 9:12:39 +epoch [8/50] batch [250/500] time 1.549 (1.560) data 0.000 (0.004) loss 0.6680 (1.2141) acc 84.3750 (70.2250) lr 1.9298e-03 eta 9:12:29 +epoch [8/50] batch [255/500] time 1.578 (1.560) data 0.000 (0.004) loss 1.4229 (1.2117) acc 65.6250 (70.1838) lr 1.9298e-03 eta 9:12:21 +epoch [8/50] batch [260/500] time 1.565 (1.560) data 0.000 (0.003) loss 1.4824 (1.2138) acc 68.7500 (70.1322) lr 1.9298e-03 eta 9:12:17 +epoch [8/50] batch [265/500] time 1.568 (1.560) data 0.000 (0.003) loss 1.2031 (1.2129) acc 78.1250 (70.1415) lr 1.9298e-03 eta 9:12:11 +epoch [8/50] batch [270/500] time 1.563 (1.560) data 0.000 (0.003) loss 1.5635 (1.2168) acc 56.2500 (70.0810) lr 1.9298e-03 eta 9:12:08 +epoch [8/50] batch [275/500] time 1.587 (1.560) data 0.000 (0.003) loss 1.4590 (1.2176) acc 62.5000 (70.0455) lr 1.9298e-03 eta 9:12:00 +epoch [8/50] batch [280/500] time 1.552 (1.560) data 0.000 (0.003) loss 1.2012 (1.2144) acc 78.1250 (70.1228) lr 1.9298e-03 eta 9:11:49 +epoch [8/50] batch [285/500] time 1.593 (1.560) data 0.000 (0.003) loss 0.7852 (1.2094) acc 75.0000 (70.1974) lr 1.9298e-03 eta 9:11:43 +epoch [8/50] batch [290/500] time 1.561 (1.560) data 0.000 (0.003) loss 0.7939 (1.2087) acc 78.1250 (70.2263) lr 1.9298e-03 eta 9:11:32 +epoch [8/50] batch [295/500] time 1.557 (1.560) data 0.000 (0.003) loss 0.9507 (1.2081) acc 81.2500 (70.2225) lr 1.9298e-03 eta 9:11:24 +epoch [8/50] batch [300/500] time 1.573 (1.560) data 0.000 (0.003) loss 1.0801 (1.2065) acc 75.0000 (70.2708) lr 1.9298e-03 eta 9:11:17 +epoch [8/50] batch [305/500] time 1.571 (1.560) data 0.000 (0.003) loss 1.0254 (1.2062) acc 75.0000 (70.2357) lr 1.9298e-03 eta 9:11:12 +epoch [8/50] batch [310/500] time 1.652 (1.561) data 0.000 (0.003) loss 1.0879 (1.2016) acc 75.0000 (70.3629) lr 1.9298e-03 eta 9:11:10 +epoch [8/50] batch [315/500] time 1.568 (1.561) data 0.000 (0.003) loss 1.2666 (1.2020) acc 68.7500 (70.3770) lr 1.9298e-03 eta 9:11:04 +epoch [8/50] batch [320/500] time 1.558 (1.561) data 0.000 (0.003) loss 1.0137 (1.2018) acc 78.1250 (70.4102) lr 1.9298e-03 eta 9:10:53 +epoch [8/50] batch [325/500] time 1.576 (1.561) data 0.000 (0.003) loss 1.1201 (1.2044) acc 78.1250 (70.3750) lr 1.9298e-03 eta 9:10:47 +epoch [8/50] batch [330/500] time 1.575 (1.561) data 0.000 (0.003) loss 1.6084 (1.2064) acc 59.3750 (70.3220) lr 1.9298e-03 eta 9:10:40 +epoch [8/50] batch [335/500] time 1.553 (1.561) data 0.000 (0.003) loss 0.9233 (1.2057) acc 75.0000 (70.2985) lr 1.9298e-03 eta 9:10:35 +epoch [8/50] batch [340/500] time 1.575 (1.561) data 0.000 (0.003) loss 1.0215 (1.2030) acc 71.8750 (70.3493) lr 1.9298e-03 eta 9:10:30 +epoch [8/50] batch [345/500] time 1.556 (1.561) data 0.000 (0.003) loss 0.9780 (1.1994) acc 87.5000 (70.4348) lr 1.9298e-03 eta 9:10:21 +epoch [8/50] batch [350/500] time 1.567 (1.561) data 0.000 (0.003) loss 1.3682 (1.1960) acc 65.6250 (70.5179) lr 1.9298e-03 eta 9:10:13 +epoch [8/50] batch [355/500] time 1.560 (1.561) data 0.000 (0.003) loss 0.8555 (1.1957) acc 81.2500 (70.5282) lr 1.9298e-03 eta 9:10:11 +epoch [8/50] batch [360/500] time 1.537 (1.561) data 0.000 (0.003) loss 1.3398 (1.1979) acc 68.7500 (70.4774) lr 1.9298e-03 eta 9:10:02 +epoch [8/50] batch [365/500] time 1.545 (1.561) data 0.000 (0.003) loss 0.9971 (1.1983) acc 78.1250 (70.5137) lr 1.9298e-03 eta 9:09:52 +epoch [8/50] batch [370/500] time 1.558 (1.561) data 0.000 (0.003) loss 1.7100 (1.2007) acc 65.6250 (70.4814) lr 1.9298e-03 eta 9:09:42 +epoch [8/50] batch [375/500] time 1.552 (1.561) data 0.000 (0.003) loss 0.9482 (1.2007) acc 65.6250 (70.4833) lr 1.9298e-03 eta 9:09:36 +epoch [8/50] batch [380/500] time 1.563 (1.561) data 0.000 (0.002) loss 1.3936 (1.2008) acc 68.7500 (70.4441) lr 1.9298e-03 eta 9:09:29 +epoch [8/50] batch [385/500] time 1.564 (1.561) data 0.000 (0.002) loss 1.5234 (1.1980) acc 75.0000 (70.4870) lr 1.9298e-03 eta 9:09:21 +epoch [8/50] batch [390/500] time 1.567 (1.561) data 0.000 (0.002) loss 1.1934 (1.1948) acc 62.5000 (70.5208) lr 1.9298e-03 eta 9:09:14 +epoch [8/50] batch [395/500] time 1.537 (1.561) data 0.000 (0.002) loss 1.1562 (1.1931) acc 75.0000 (70.5696) lr 1.9298e-03 eta 9:09:05 +epoch [8/50] batch [400/500] time 1.556 (1.561) data 0.000 (0.002) loss 1.4746 (1.1894) acc 65.6250 (70.6562) lr 1.9298e-03 eta 9:08:57 +epoch [8/50] batch [405/500] time 1.547 (1.561) data 0.000 (0.002) loss 1.2236 (1.1901) acc 75.0000 (70.6636) lr 1.9298e-03 eta 9:08:46 +epoch [8/50] batch [410/500] time 1.573 (1.561) data 0.000 (0.002) loss 1.7041 (1.1959) acc 53.1250 (70.5640) lr 1.9298e-03 eta 9:08:38 +epoch [8/50] batch [415/500] time 1.562 (1.561) data 0.000 (0.002) loss 0.7910 (1.1942) acc 87.5000 (70.5798) lr 1.9298e-03 eta 9:08:29 +epoch [8/50] batch [420/500] time 1.536 (1.561) data 0.000 (0.002) loss 0.5635 (1.1951) acc 81.2500 (70.6027) lr 1.9298e-03 eta 9:08:18 +epoch [8/50] batch [425/500] time 1.540 (1.561) data 0.001 (0.002) loss 1.3027 (1.1957) acc 75.0000 (70.6029) lr 1.9298e-03 eta 9:08:10 +epoch [8/50] batch [430/500] time 1.567 (1.561) data 0.000 (0.002) loss 1.7803 (1.1978) acc 65.6250 (70.5596) lr 1.9298e-03 eta 9:08:01 +epoch [8/50] batch [435/500] time 1.554 (1.560) data 0.000 (0.002) loss 0.8750 (1.1979) acc 75.0000 (70.5675) lr 1.9298e-03 eta 9:07:51 +epoch [8/50] batch [440/500] time 1.529 (1.560) data 0.000 (0.002) loss 0.6616 (1.1963) acc 81.2500 (70.6392) lr 1.9298e-03 eta 9:07:40 +epoch [8/50] batch [445/500] time 1.557 (1.560) data 0.000 (0.002) loss 1.2090 (1.1966) acc 65.6250 (70.6039) lr 1.9298e-03 eta 9:07:31 +epoch [8/50] batch [450/500] time 1.525 (1.560) data 0.000 (0.002) loss 1.5986 (1.1985) acc 62.5000 (70.5833) lr 1.9298e-03 eta 9:07:21 +epoch [8/50] batch [455/500] time 1.546 (1.560) data 0.000 (0.002) loss 1.0439 (1.1963) acc 81.2500 (70.6387) lr 1.9298e-03 eta 9:07:18 +epoch [8/50] batch [460/500] time 1.543 (1.560) data 0.000 (0.002) loss 0.9673 (1.1950) acc 81.2500 (70.6861) lr 1.9298e-03 eta 9:07:11 +epoch [8/50] batch [465/500] time 1.557 (1.560) data 0.000 (0.002) loss 0.8887 (1.1916) acc 75.0000 (70.7392) lr 1.9298e-03 eta 9:07:02 +epoch [8/50] batch [470/500] time 1.557 (1.560) data 0.000 (0.002) loss 0.6357 (1.1906) acc 84.3750 (70.7447) lr 1.9298e-03 eta 9:06:54 +epoch [8/50] batch [475/500] time 1.553 (1.560) data 0.000 (0.002) loss 1.0703 (1.1878) acc 81.2500 (70.8224) lr 1.9298e-03 eta 9:06:44 +epoch [8/50] batch [480/500] time 1.573 (1.560) data 0.000 (0.002) loss 0.9653 (1.1861) acc 71.8750 (70.8268) lr 1.9298e-03 eta 9:06:38 +epoch [8/50] batch [485/500] time 1.547 (1.560) data 0.001 (0.002) loss 0.4692 (1.1858) acc 90.6250 (70.8376) lr 1.9298e-03 eta 9:06:29 +epoch [8/50] batch [490/500] time 1.578 (1.560) data 0.000 (0.002) loss 1.3926 (1.1879) acc 59.3750 (70.7781) lr 1.9298e-03 eta 9:06:22 +epoch [8/50] batch [495/500] time 1.548 (1.560) data 0.000 (0.002) loss 1.9434 (1.1875) acc 40.6250 (70.7765) lr 1.9298e-03 eta 9:06:14 +epoch [8/50] batch [500/500] time 1.571 (1.560) data 0.000 (0.002) loss 0.6724 (1.1865) acc 78.1250 (70.7625) lr 1.9048e-03 eta 9:06:09 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,946 +* accuracy: 77.9% +* error: 22.1% +* macro_f1: 77.3% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar +epoch [9/50] batch [5/500] time 1.549 (1.663) data 0.000 (0.161) loss 1.1377 (1.3512) acc 71.8750 (65.6250) lr 1.9048e-03 eta 9:42:00 +epoch [9/50] batch [10/500] time 1.560 (1.624) data 0.000 (0.081) loss 1.7568 (1.2298) acc 53.1250 (67.5000) lr 1.9048e-03 eta 9:28:12 +epoch [9/50] batch [15/500] time 1.552 (1.604) data 0.000 (0.054) loss 1.2002 (1.2139) acc 75.0000 (68.5417) lr 1.9048e-03 eta 9:21:08 +epoch [9/50] batch [20/500] time 1.563 (1.595) data 0.001 (0.041) loss 1.4658 (1.2248) acc 65.6250 (69.2188) lr 1.9048e-03 eta 9:17:35 +epoch [9/50] batch [25/500] time 1.567 (1.587) data 0.000 (0.033) loss 1.2207 (1.2206) acc 68.7500 (69.5000) lr 1.9048e-03 eta 9:14:50 +epoch [9/50] batch [30/500] time 1.536 (1.580) data 0.000 (0.027) loss 1.0703 (1.2153) acc 84.3750 (69.7917) lr 1.9048e-03 eta 9:12:18 +epoch [9/50] batch [35/500] time 1.563 (1.577) data 0.000 (0.023) loss 1.6113 (1.2279) acc 62.5000 (69.4643) lr 1.9048e-03 eta 9:11:00 +epoch [9/50] batch [40/500] time 1.560 (1.575) data 0.000 (0.021) loss 1.2568 (1.2345) acc 75.0000 (69.6094) lr 1.9048e-03 eta 9:10:08 +epoch [9/50] batch [45/500] time 1.542 (1.572) data 0.000 (0.018) loss 0.8345 (1.1850) acc 75.0000 (70.4861) lr 1.9048e-03 eta 9:09:01 +epoch [9/50] batch [50/500] time 1.569 (1.571) data 0.000 (0.016) loss 0.5737 (1.1827) acc 84.3750 (70.6875) lr 1.9048e-03 eta 9:08:25 +epoch [9/50] batch [55/500] time 1.567 (1.570) data 0.001 (0.015) loss 0.9893 (1.1588) acc 68.7500 (70.9091) lr 1.9048e-03 eta 9:08:10 +epoch [9/50] batch [60/500] time 1.577 (1.570) data 0.001 (0.014) loss 1.3350 (1.1596) acc 62.5000 (70.7812) lr 1.9048e-03 eta 9:07:56 +epoch [9/50] batch [65/500] time 1.570 (1.569) data 0.000 (0.013) loss 0.8618 (1.1329) acc 75.0000 (71.0577) lr 1.9048e-03 eta 9:07:29 +epoch [9/50] batch [70/500] time 1.562 (1.568) data 0.000 (0.012) loss 1.3535 (1.1303) acc 71.8750 (71.2500) lr 1.9048e-03 eta 9:07:06 +epoch [9/50] batch [75/500] time 1.544 (1.567) data 0.000 (0.011) loss 1.2773 (1.1289) acc 68.7500 (71.4583) lr 1.9048e-03 eta 9:06:38 +epoch [9/50] batch [80/500] time 1.561 (1.567) data 0.000 (0.010) loss 1.4863 (1.1261) acc 65.6250 (71.5234) lr 1.9048e-03 eta 9:06:26 +epoch [9/50] batch [85/500] time 1.561 (1.567) data 0.000 (0.010) loss 0.8701 (1.1199) acc 78.1250 (71.5441) lr 1.9048e-03 eta 9:06:03 +epoch [9/50] batch [90/500] time 1.561 (1.566) data 0.000 (0.009) loss 0.8413 (1.1322) acc 75.0000 (71.1806) lr 1.9048e-03 eta 9:05:37 +epoch [9/50] batch [95/500] time 1.568 (1.565) data 0.001 (0.009) loss 0.5962 (1.1317) acc 78.1250 (71.2829) lr 1.9048e-03 eta 9:05:21 +epoch [9/50] batch [100/500] time 1.561 (1.565) data 0.000 (0.008) loss 1.1885 (1.1350) acc 65.6250 (71.3750) lr 1.9048e-03 eta 9:05:05 +epoch [9/50] batch [105/500] time 1.538 (1.564) data 0.000 (0.008) loss 0.9712 (1.1460) acc 71.8750 (71.2500) lr 1.9048e-03 eta 9:04:38 +epoch [9/50] batch [110/500] time 1.535 (1.564) data 0.000 (0.008) loss 1.2939 (1.1504) acc 65.6250 (71.1364) lr 1.9048e-03 eta 9:04:35 +epoch [9/50] batch [115/500] time 1.541 (1.564) data 0.000 (0.007) loss 0.8149 (1.1451) acc 68.7500 (71.2500) lr 1.9048e-03 eta 9:04:18 +epoch [9/50] batch [120/500] time 1.552 (1.563) data 0.001 (0.007) loss 0.9014 (1.1482) acc 68.7500 (71.0938) lr 1.9048e-03 eta 9:03:57 +epoch [9/50] batch [125/500] time 1.544 (1.562) data 0.000 (0.007) loss 0.8237 (1.1399) acc 78.1250 (71.2500) lr 1.9048e-03 eta 9:03:33 +epoch [9/50] batch [130/500] time 1.554 (1.562) data 0.000 (0.007) loss 1.4062 (1.1468) acc 59.3750 (71.0096) lr 1.9048e-03 eta 9:03:19 +epoch [9/50] batch [135/500] time 1.550 (1.562) data 0.000 (0.006) loss 1.9814 (1.1602) acc 65.6250 (70.9028) lr 1.9048e-03 eta 9:03:10 +epoch [9/50] batch [140/500] time 1.536 (1.562) data 0.000 (0.006) loss 0.7036 (1.1502) acc 81.2500 (71.0491) lr 1.9048e-03 eta 9:02:52 +epoch [9/50] batch [145/500] time 1.552 (1.562) data 0.000 (0.006) loss 1.1582 (1.1436) acc 56.2500 (70.9914) lr 1.9048e-03 eta 9:02:47 +epoch [9/50] batch [150/500] time 1.544 (1.562) data 0.001 (0.006) loss 0.8896 (1.1484) acc 75.0000 (70.9167) lr 1.9048e-03 eta 9:02:38 +epoch [9/50] batch [155/500] time 1.575 (1.562) data 0.000 (0.006) loss 1.2734 (1.1392) acc 65.6250 (71.0685) lr 1.9048e-03 eta 9:02:43 +epoch [9/50] batch [160/500] time 1.560 (1.562) data 0.000 (0.005) loss 1.1143 (1.1496) acc 78.1250 (70.8789) lr 1.9048e-03 eta 9:02:30 +epoch [9/50] batch [165/500] time 1.561 (1.562) data 0.000 (0.005) loss 1.6855 (1.1468) acc 68.7500 (70.9848) lr 1.9048e-03 eta 9:02:17 +epoch [9/50] batch [170/500] time 1.564 (1.561) data 0.000 (0.005) loss 1.5117 (1.1470) acc 65.6250 (70.9559) lr 1.9048e-03 eta 9:02:05 +epoch [9/50] batch [175/500] time 1.556 (1.561) data 0.000 (0.005) loss 1.5176 (1.1449) acc 68.7500 (71.0357) lr 1.9048e-03 eta 9:01:54 +epoch [9/50] batch [180/500] time 1.545 (1.561) data 0.000 (0.005) loss 1.7783 (1.1403) acc 59.3750 (71.1458) lr 1.9048e-03 eta 9:01:43 +epoch [9/50] batch [185/500] time 1.565 (1.561) data 0.001 (0.005) loss 0.8037 (1.1424) acc 84.3750 (71.1824) lr 1.9048e-03 eta 9:01:38 +epoch [9/50] batch [190/500] time 1.548 (1.561) data 0.000 (0.005) loss 1.4609 (1.1444) acc 56.2500 (71.0526) lr 1.9048e-03 eta 9:01:30 +epoch [9/50] batch [195/500] time 1.576 (1.561) data 0.000 (0.005) loss 1.0107 (1.1371) acc 78.1250 (71.2179) lr 1.9048e-03 eta 9:01:26 +epoch [9/50] batch [200/500] time 1.554 (1.562) data 0.000 (0.004) loss 0.8711 (1.1419) acc 75.0000 (71.1406) lr 1.9048e-03 eta 9:01:19 +epoch [9/50] batch [205/500] time 1.575 (1.562) data 0.000 (0.004) loss 0.5874 (1.1429) acc 84.3750 (71.1738) lr 1.9048e-03 eta 9:01:18 +epoch [9/50] batch [210/500] time 1.552 (1.562) data 0.001 (0.004) loss 0.8794 (1.1361) acc 75.0000 (71.3095) lr 1.9048e-03 eta 9:01:12 +epoch [9/50] batch [215/500] time 1.585 (1.562) data 0.000 (0.004) loss 0.5527 (1.1298) acc 81.2500 (71.4535) lr 1.9048e-03 eta 9:01:05 +epoch [9/50] batch [220/500] time 1.575 (1.562) data 0.000 (0.004) loss 1.0010 (1.1315) acc 78.1250 (71.5057) lr 1.9048e-03 eta 9:00:56 +epoch [9/50] batch [225/500] time 1.550 (1.562) data 0.000 (0.004) loss 1.8730 (1.1356) acc 56.2500 (71.4722) lr 1.9048e-03 eta 9:00:50 +epoch [9/50] batch [230/500] time 1.587 (1.562) data 0.000 (0.004) loss 1.7617 (1.1391) acc 68.7500 (71.3859) lr 1.9048e-03 eta 9:00:46 +epoch [9/50] batch [235/500] time 1.558 (1.562) data 0.000 (0.004) loss 1.0361 (1.1381) acc 78.1250 (71.4495) lr 1.9048e-03 eta 9:00:40 +epoch [9/50] batch [240/500] time 1.539 (1.562) data 0.000 (0.004) loss 1.1875 (1.1379) acc 68.7500 (71.4453) lr 1.9048e-03 eta 9:00:31 +epoch [9/50] batch [245/500] time 1.549 (1.562) data 0.000 (0.004) loss 0.9189 (1.1408) acc 78.1250 (71.4158) lr 1.9048e-03 eta 9:00:17 +epoch [9/50] batch [250/500] time 1.656 (1.562) data 0.000 (0.004) loss 0.9956 (1.1431) acc 75.0000 (71.3750) lr 1.9048e-03 eta 9:00:15 +epoch [9/50] batch [255/500] time 1.554 (1.562) data 0.000 (0.004) loss 0.4658 (1.1427) acc 87.5000 (71.3113) lr 1.9048e-03 eta 9:00:06 +epoch [9/50] batch [260/500] time 1.548 (1.562) data 0.000 (0.004) loss 0.8447 (1.1423) acc 68.7500 (71.2260) lr 1.9048e-03 eta 8:59:50 +epoch [9/50] batch [265/500] time 1.584 (1.562) data 0.001 (0.003) loss 1.0293 (1.1420) acc 68.7500 (71.2618) lr 1.9048e-03 eta 8:59:43 +epoch [9/50] batch [270/500] time 1.535 (1.561) data 0.000 (0.003) loss 1.3867 (1.1441) acc 68.7500 (71.2384) lr 1.9048e-03 eta 8:59:26 +epoch [9/50] batch [275/500] time 1.549 (1.561) data 0.000 (0.003) loss 1.5010 (1.1458) acc 71.8750 (71.2955) lr 1.9048e-03 eta 8:59:15 +epoch [9/50] batch [280/500] time 1.562 (1.561) data 0.000 (0.003) loss 1.1416 (1.1415) acc 78.1250 (71.4174) lr 1.9048e-03 eta 8:59:06 +epoch [9/50] batch [285/500] time 1.542 (1.561) data 0.000 (0.003) loss 2.1230 (1.1449) acc 53.1250 (71.3816) lr 1.9048e-03 eta 8:58:56 +epoch [9/50] batch [290/500] time 1.545 (1.561) data 0.000 (0.003) loss 1.0547 (1.1455) acc 71.8750 (71.3254) lr 1.9048e-03 eta 8:58:45 +epoch [9/50] batch [295/500] time 1.527 (1.561) data 0.000 (0.003) loss 0.9248 (1.1445) acc 78.1250 (71.3877) lr 1.9048e-03 eta 8:58:42 +epoch [9/50] batch [300/500] time 1.544 (1.561) data 0.000 (0.003) loss 0.7822 (1.1447) acc 78.1250 (71.3229) lr 1.9048e-03 eta 8:58:33 +epoch [9/50] batch [305/500] time 1.553 (1.561) data 0.001 (0.003) loss 1.2197 (1.1478) acc 68.7500 (71.2705) lr 1.9048e-03 eta 8:58:25 +epoch [9/50] batch [310/500] time 1.558 (1.561) data 0.001 (0.003) loss 0.9517 (1.1474) acc 75.0000 (71.1996) lr 1.9048e-03 eta 8:58:19 +epoch [9/50] batch [315/500] time 1.556 (1.561) data 0.000 (0.003) loss 1.1572 (1.1513) acc 62.5000 (71.0813) lr 1.9048e-03 eta 8:58:12 +epoch [9/50] batch [320/500] time 1.583 (1.562) data 0.000 (0.003) loss 0.5933 (1.1535) acc 90.6250 (71.0449) lr 1.9048e-03 eta 8:58:12 +epoch [9/50] batch [325/500] time 1.551 (1.562) data 0.000 (0.003) loss 0.6792 (1.1532) acc 78.1250 (71.0673) lr 1.9048e-03 eta 8:58:04 +epoch [9/50] batch [330/500] time 1.543 (1.562) data 0.000 (0.003) loss 1.5371 (1.1538) acc 68.7500 (71.1080) lr 1.9048e-03 eta 8:57:57 +epoch [9/50] batch [335/500] time 1.540 (1.562) data 0.000 (0.003) loss 1.4668 (1.1543) acc 68.7500 (71.0634) lr 1.9048e-03 eta 8:57:50 +epoch [9/50] batch [340/500] time 1.567 (1.562) data 0.000 (0.003) loss 0.9434 (1.1553) acc 68.7500 (71.0478) lr 1.9048e-03 eta 8:57:43 +epoch [9/50] batch [345/500] time 1.574 (1.562) data 0.000 (0.003) loss 1.3730 (1.1580) acc 68.7500 (71.0236) lr 1.9048e-03 eta 8:57:36 +epoch [9/50] batch [350/500] time 1.564 (1.562) data 0.000 (0.003) loss 1.1572 (1.1591) acc 68.7500 (70.9643) lr 1.9048e-03 eta 8:57:30 +epoch [9/50] batch [355/500] time 1.548 (1.562) data 0.000 (0.003) loss 1.4199 (1.1567) acc 68.7500 (71.0123) lr 1.9048e-03 eta 8:57:23 +epoch [9/50] batch [360/500] time 1.562 (1.562) data 0.000 (0.003) loss 0.9326 (1.1570) acc 84.3750 (71.0851) lr 1.9048e-03 eta 8:57:17 +epoch [9/50] batch [365/500] time 1.574 (1.562) data 0.000 (0.003) loss 1.3096 (1.1574) acc 65.6250 (71.0017) lr 1.9048e-03 eta 8:57:08 +epoch [9/50] batch [370/500] time 1.549 (1.562) data 0.000 (0.003) loss 1.5723 (1.1604) acc 62.5000 (70.9544) lr 1.9048e-03 eta 8:56:59 +epoch [9/50] batch [375/500] time 1.564 (1.562) data 0.000 (0.003) loss 1.1836 (1.1619) acc 59.3750 (70.9000) lr 1.9048e-03 eta 8:56:48 +epoch [9/50] batch [380/500] time 1.561 (1.562) data 0.000 (0.003) loss 1.3457 (1.1633) acc 65.6250 (70.8964) lr 1.9048e-03 eta 8:56:39 +epoch [9/50] batch [385/500] time 1.531 (1.561) data 0.000 (0.003) loss 1.7959 (1.1638) acc 59.3750 (70.8442) lr 1.9048e-03 eta 8:56:27 +epoch [9/50] batch [390/500] time 1.559 (1.561) data 0.000 (0.002) loss 1.0449 (1.1628) acc 71.8750 (70.8253) lr 1.9048e-03 eta 8:56:19 +epoch [9/50] batch [395/500] time 1.540 (1.562) data 0.000 (0.002) loss 1.3037 (1.1641) acc 71.8750 (70.8386) lr 1.9048e-03 eta 8:56:15 +epoch [9/50] batch [400/500] time 1.564 (1.562) data 0.000 (0.002) loss 1.0195 (1.1623) acc 84.3750 (70.9141) lr 1.9048e-03 eta 8:56:06 +epoch [9/50] batch [405/500] time 1.566 (1.561) data 0.000 (0.002) loss 0.8501 (1.1641) acc 81.2500 (70.8951) lr 1.9048e-03 eta 8:55:57 +epoch [9/50] batch [410/500] time 1.558 (1.561) data 0.000 (0.002) loss 0.6714 (1.1614) acc 84.3750 (70.9756) lr 1.9048e-03 eta 8:55:48 +epoch [9/50] batch [415/500] time 1.573 (1.561) data 0.000 (0.002) loss 1.0186 (1.1597) acc 75.0000 (71.0241) lr 1.9048e-03 eta 8:55:39 +epoch [9/50] batch [420/500] time 1.563 (1.561) data 0.000 (0.002) loss 1.0693 (1.1587) acc 78.1250 (71.0268) lr 1.9048e-03 eta 8:55:30 +epoch [9/50] batch [425/500] time 1.585 (1.561) data 0.000 (0.002) loss 0.6245 (1.1567) acc 87.5000 (71.0515) lr 1.9048e-03 eta 8:55:26 +epoch [9/50] batch [430/500] time 1.583 (1.562) data 0.000 (0.002) loss 1.0762 (1.1565) acc 71.8750 (71.0828) lr 1.9048e-03 eta 8:55:22 +epoch [9/50] batch [435/500] time 1.547 (1.562) data 0.000 (0.002) loss 1.0000 (1.1590) acc 71.8750 (71.0417) lr 1.9048e-03 eta 8:55:16 +epoch [9/50] batch [440/500] time 1.568 (1.562) data 0.000 (0.002) loss 0.7715 (1.1574) acc 75.0000 (71.0440) lr 1.9048e-03 eta 8:55:14 +epoch [9/50] batch [445/500] time 1.532 (1.562) data 0.001 (0.002) loss 1.4971 (1.1580) acc 65.6250 (70.9761) lr 1.9048e-03 eta 8:55:03 +epoch [9/50] batch [450/500] time 1.540 (1.562) data 0.000 (0.002) loss 0.9263 (1.1563) acc 81.2500 (71.0069) lr 1.9048e-03 eta 8:54:53 +epoch [9/50] batch [455/500] time 1.579 (1.562) data 0.000 (0.002) loss 1.1631 (1.1550) acc 75.0000 (71.0234) lr 1.9048e-03 eta 8:54:44 +epoch [9/50] batch [460/500] time 1.558 (1.562) data 0.000 (0.002) loss 0.9458 (1.1555) acc 68.7500 (71.0122) lr 1.9048e-03 eta 8:54:36 +epoch [9/50] batch [465/500] time 1.568 (1.562) data 0.000 (0.002) loss 1.4121 (1.1548) acc 71.8750 (71.0685) lr 1.9048e-03 eta 8:54:29 +epoch [9/50] batch [470/500] time 1.558 (1.562) data 0.000 (0.002) loss 1.3652 (1.1539) acc 59.3750 (71.0904) lr 1.9048e-03 eta 8:54:21 +epoch [9/50] batch [475/500] time 1.548 (1.562) data 0.000 (0.002) loss 0.9604 (1.1538) acc 75.0000 (71.1118) lr 1.9048e-03 eta 8:54:12 +epoch [9/50] batch [480/500] time 1.554 (1.562) data 0.000 (0.002) loss 1.2822 (1.1539) acc 75.0000 (71.1654) lr 1.9048e-03 eta 8:54:03 +epoch [9/50] batch [485/500] time 1.534 (1.561) data 0.001 (0.002) loss 1.0576 (1.1559) acc 84.3750 (71.1856) lr 1.9048e-03 eta 8:53:51 +epoch [9/50] batch [490/500] time 1.546 (1.561) data 0.000 (0.002) loss 1.9336 (1.1585) acc 56.2500 (71.1288) lr 1.9048e-03 eta 8:53:41 +epoch [9/50] batch [495/500] time 1.552 (1.561) data 0.000 (0.002) loss 1.3652 (1.1586) acc 71.8750 (71.1301) lr 1.9048e-03 eta 8:53:31 +epoch [9/50] batch [500/500] time 1.556 (1.561) data 0.000 (0.002) loss 0.6602 (1.1582) acc 87.5000 (71.1562) lr 1.8763e-03 eta 8:53:20 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,876 +* accuracy: 77.8% +* error: 22.2% +* macro_f1: 77.2% +epoch [10/50] batch [5/500] time 1.557 (1.684) data 0.000 (0.181) loss 1.0049 (1.0425) acc 78.1250 (71.8750) lr 1.8763e-03 eta 9:35:21 +epoch [10/50] batch [10/500] time 1.570 (1.629) data 0.000 (0.091) loss 1.6309 (1.2458) acc 62.5000 (67.1875) lr 1.8763e-03 eta 9:16:12 +epoch [10/50] batch [15/500] time 1.544 (1.608) data 0.000 (0.061) loss 0.9360 (1.1974) acc 78.1250 (68.9583) lr 1.8763e-03 eta 9:08:56 +epoch [10/50] batch [20/500] time 1.585 (1.598) data 0.000 (0.046) loss 1.6816 (1.1740) acc 65.6250 (70.3125) lr 1.8763e-03 eta 9:05:30 +epoch [10/50] batch [25/500] time 1.567 (1.591) data 0.000 (0.037) loss 1.2861 (1.1744) acc 68.7500 (70.8750) lr 1.8763e-03 eta 9:03:01 +epoch [10/50] batch [30/500] time 1.579 (1.585) data 0.000 (0.031) loss 1.1572 (1.1629) acc 75.0000 (71.2500) lr 1.8763e-03 eta 9:00:38 +epoch [10/50] batch [35/500] time 1.572 (1.581) data 0.000 (0.026) loss 0.3308 (1.1419) acc 90.6250 (71.6071) lr 1.8763e-03 eta 8:59:08 +epoch [10/50] batch [40/500] time 1.564 (1.582) data 0.000 (0.023) loss 0.8428 (1.1288) acc 81.2500 (71.7969) lr 1.8763e-03 eta 8:59:21 +epoch [10/50] batch [45/500] time 1.564 (1.579) data 0.000 (0.021) loss 1.0547 (1.1230) acc 75.0000 (71.6667) lr 1.8763e-03 eta 8:58:27 +epoch [10/50] batch [50/500] time 1.555 (1.578) data 0.000 (0.019) loss 1.3477 (1.1156) acc 62.5000 (71.5000) lr 1.8763e-03 eta 8:57:40 +epoch [10/50] batch [55/500] time 1.548 (1.576) data 0.000 (0.017) loss 0.8716 (1.1069) acc 68.7500 (71.0795) lr 1.8763e-03 eta 8:57:03 +epoch [10/50] batch [60/500] time 1.563 (1.575) data 0.000 (0.015) loss 1.6787 (1.1198) acc 62.5000 (70.6771) lr 1.8763e-03 eta 8:56:35 +epoch [10/50] batch [65/500] time 1.575 (1.575) data 0.001 (0.014) loss 1.1924 (1.1250) acc 71.8750 (70.5769) lr 1.8763e-03 eta 8:56:28 +epoch [10/50] batch [70/500] time 1.590 (1.574) data 0.001 (0.013) loss 0.6768 (1.1176) acc 81.2500 (70.9375) lr 1.8763e-03 eta 8:56:01 +epoch [10/50] batch [75/500] time 1.584 (1.574) data 0.000 (0.013) loss 1.1641 (1.1304) acc 78.1250 (70.7917) lr 1.8763e-03 eta 8:55:43 +epoch [10/50] batch [80/500] time 1.585 (1.575) data 0.000 (0.012) loss 0.7695 (1.1330) acc 87.5000 (70.7812) lr 1.8763e-03 eta 8:55:58 +epoch [10/50] batch [85/500] time 1.559 (1.574) data 0.002 (0.011) loss 1.2852 (1.1293) acc 65.6250 (70.9559) lr 1.8763e-03 eta 8:55:28 +epoch [10/50] batch [90/500] time 1.579 (1.574) data 0.001 (0.011) loss 0.7041 (1.1323) acc 78.1250 (70.5903) lr 1.8763e-03 eta 8:55:15 +epoch [10/50] batch [95/500] time 1.582 (1.574) data 0.000 (0.010) loss 1.2041 (1.1376) acc 62.5000 (70.4605) lr 1.8763e-03 eta 8:55:09 +epoch [10/50] batch [100/500] time 1.569 (1.573) data 0.000 (0.009) loss 1.0645 (1.1353) acc 81.2500 (70.7812) lr 1.8763e-03 eta 8:54:50 +epoch [10/50] batch [105/500] time 1.559 (1.572) data 0.001 (0.009) loss 1.6094 (1.1412) acc 59.3750 (70.7143) lr 1.8763e-03 eta 8:54:29 +epoch [10/50] batch [110/500] time 1.546 (1.571) data 0.000 (0.009) loss 1.1250 (1.1405) acc 56.2500 (70.5966) lr 1.8763e-03 eta 8:53:56 +epoch [10/50] batch [115/500] time 1.519 (1.571) data 0.000 (0.008) loss 1.0283 (1.1280) acc 68.7500 (70.8152) lr 1.8763e-03 eta 8:53:38 +epoch [10/50] batch [120/500] time 1.566 (1.570) data 0.000 (0.008) loss 1.0215 (1.1176) acc 71.8750 (70.9375) lr 1.8763e-03 eta 8:53:23 +epoch [10/50] batch [125/500] time 1.585 (1.570) data 0.001 (0.008) loss 1.0771 (1.1138) acc 78.1250 (71.0750) lr 1.8763e-03 eta 8:53:13 +epoch [10/50] batch [130/500] time 1.560 (1.570) data 0.001 (0.007) loss 0.6831 (1.1105) acc 81.2500 (71.1058) lr 1.8763e-03 eta 8:52:51 +epoch [10/50] batch [135/500] time 1.539 (1.569) data 0.000 (0.007) loss 0.8530 (1.1056) acc 68.7500 (71.1111) lr 1.8763e-03 eta 8:52:29 +epoch [10/50] batch [140/500] time 1.566 (1.569) data 0.000 (0.007) loss 1.4141 (1.1181) acc 59.3750 (70.9375) lr 1.8763e-03 eta 8:52:21 +epoch [10/50] batch [145/500] time 1.570 (1.568) data 0.000 (0.007) loss 1.4209 (1.1213) acc 75.0000 (70.9267) lr 1.8763e-03 eta 8:52:06 +epoch [10/50] batch [150/500] time 1.554 (1.568) data 0.000 (0.006) loss 1.3789 (1.1205) acc 65.6250 (70.8750) lr 1.8763e-03 eta 8:51:57 +epoch [10/50] batch [155/500] time 1.570 (1.568) data 0.000 (0.006) loss 1.2061 (1.1309) acc 75.0000 (70.8468) lr 1.8763e-03 eta 8:51:47 +epoch [10/50] batch [160/500] time 1.544 (1.568) data 0.000 (0.006) loss 0.5586 (1.1204) acc 87.5000 (71.0938) lr 1.8763e-03 eta 8:51:28 +epoch [10/50] batch [165/500] time 1.555 (1.567) data 0.001 (0.006) loss 1.5303 (1.1269) acc 71.8750 (71.0038) lr 1.8763e-03 eta 8:51:10 +epoch [10/50] batch [170/500] time 1.562 (1.567) data 0.000 (0.006) loss 1.5195 (1.1215) acc 62.5000 (71.0478) lr 1.8763e-03 eta 8:51:02 +epoch [10/50] batch [175/500] time 1.560 (1.567) data 0.000 (0.006) loss 0.9473 (1.1193) acc 81.2500 (71.1429) lr 1.8763e-03 eta 8:50:48 +epoch [10/50] batch [180/500] time 1.570 (1.566) data 0.000 (0.005) loss 0.4827 (1.1165) acc 78.1250 (71.1285) lr 1.8763e-03 eta 8:50:29 +epoch [10/50] batch [185/500] time 1.556 (1.567) data 0.000 (0.005) loss 1.2627 (1.1200) acc 62.5000 (71.0304) lr 1.8763e-03 eta 8:50:30 +epoch [10/50] batch [190/500] time 1.572 (1.566) data 0.001 (0.005) loss 1.4248 (1.1211) acc 65.6250 (71.0855) lr 1.8763e-03 eta 8:50:14 +epoch [10/50] batch [195/500] time 1.552 (1.566) data 0.000 (0.005) loss 1.3945 (1.1253) acc 59.3750 (70.9936) lr 1.8763e-03 eta 8:49:59 +epoch [10/50] batch [200/500] time 1.555 (1.566) data 0.000 (0.005) loss 0.9141 (1.1228) acc 81.2500 (71.0625) lr 1.8763e-03 eta 8:49:46 +epoch [10/50] batch [205/500] time 1.543 (1.566) data 0.000 (0.005) loss 0.7109 (1.1251) acc 84.3750 (71.0518) lr 1.8763e-03 eta 8:49:35 +epoch [10/50] batch [210/500] time 1.550 (1.565) data 0.001 (0.005) loss 0.9575 (1.1293) acc 71.8750 (70.9970) lr 1.8763e-03 eta 8:49:21 +epoch [10/50] batch [215/500] time 1.545 (1.565) data 0.001 (0.005) loss 0.7744 (1.1306) acc 81.2500 (71.0465) lr 1.8763e-03 eta 8:49:09 +epoch [10/50] batch [220/500] time 1.571 (1.565) data 0.000 (0.005) loss 1.0547 (1.1350) acc 71.8750 (70.9375) lr 1.8763e-03 eta 8:48:56 +epoch [10/50] batch [225/500] time 1.550 (1.565) data 0.000 (0.004) loss 1.7314 (1.1400) acc 68.7500 (70.9306) lr 1.8763e-03 eta 8:48:45 +epoch [10/50] batch [230/500] time 1.579 (1.565) data 0.000 (0.004) loss 1.5264 (1.1436) acc 62.5000 (70.7880) lr 1.8763e-03 eta 8:48:36 +epoch [10/50] batch [235/500] time 1.565 (1.565) data 0.000 (0.004) loss 1.2129 (1.1457) acc 71.8750 (70.8378) lr 1.8763e-03 eta 8:48:28 +epoch [10/50] batch [240/500] time 1.563 (1.565) data 0.000 (0.004) loss 0.6650 (1.1447) acc 78.1250 (70.8203) lr 1.8763e-03 eta 8:48:23 +epoch [10/50] batch [245/500] time 1.554 (1.565) data 0.000 (0.004) loss 1.0811 (1.1415) acc 75.0000 (71.0204) lr 1.8763e-03 eta 8:48:18 +epoch [10/50] batch [250/500] time 1.541 (1.565) data 0.000 (0.004) loss 1.4297 (1.1425) acc 62.5000 (70.9750) lr 1.8763e-03 eta 8:48:02 +epoch [10/50] batch [255/500] time 1.542 (1.564) data 0.000 (0.004) loss 1.4922 (1.1385) acc 59.3750 (71.0417) lr 1.8763e-03 eta 8:47:49 +epoch [10/50] batch [260/500] time 1.565 (1.564) data 0.001 (0.004) loss 0.7651 (1.1375) acc 81.2500 (71.0216) lr 1.8763e-03 eta 8:47:38 +epoch [10/50] batch [265/500] time 1.544 (1.564) data 0.000 (0.004) loss 1.2041 (1.1400) acc 75.0000 (71.0613) lr 1.8763e-03 eta 8:47:22 +epoch [10/50] batch [270/500] time 1.554 (1.564) data 0.000 (0.004) loss 1.0859 (1.1392) acc 65.6250 (71.0532) lr 1.8763e-03 eta 8:47:13 +epoch [10/50] batch [275/500] time 1.556 (1.564) data 0.000 (0.004) loss 1.6455 (1.1438) acc 56.2500 (70.9886) lr 1.8763e-03 eta 8:47:03 +epoch [10/50] batch [280/500] time 1.567 (1.564) data 0.000 (0.004) loss 0.8105 (1.1408) acc 75.0000 (71.0268) lr 1.8763e-03 eta 8:47:00 +epoch [10/50] batch [285/500] time 1.574 (1.564) data 0.000 (0.004) loss 1.6260 (1.1432) acc 62.5000 (70.9759) lr 1.8763e-03 eta 8:46:59 +epoch [10/50] batch [290/500] time 1.548 (1.564) data 0.000 (0.004) loss 1.2197 (1.1416) acc 65.6250 (70.9483) lr 1.8763e-03 eta 8:46:48 +epoch [10/50] batch [295/500] time 1.544 (1.564) data 0.000 (0.004) loss 1.3027 (1.1451) acc 65.6250 (70.8792) lr 1.8763e-03 eta 8:46:40 +epoch [10/50] batch [300/500] time 1.563 (1.564) data 0.000 (0.003) loss 1.4209 (1.1437) acc 62.5000 (70.9375) lr 1.8763e-03 eta 8:46:28 +epoch [10/50] batch [305/500] time 1.549 (1.564) data 0.000 (0.003) loss 1.0234 (1.1433) acc 62.5000 (70.9016) lr 1.8763e-03 eta 8:46:19 +epoch [10/50] batch [310/500] time 1.574 (1.564) data 0.000 (0.003) loss 1.8398 (1.1447) acc 62.5000 (70.9274) lr 1.8763e-03 eta 8:46:11 +epoch [10/50] batch [315/500] time 1.547 (1.564) data 0.001 (0.003) loss 1.6357 (1.1455) acc 59.3750 (70.9325) lr 1.8763e-03 eta 8:46:02 +epoch [10/50] batch [320/500] time 1.541 (1.563) data 0.000 (0.003) loss 1.2510 (1.1473) acc 71.8750 (70.8887) lr 1.8763e-03 eta 8:45:50 +epoch [10/50] batch [325/500] time 1.673 (1.564) data 0.000 (0.003) loss 1.2510 (1.1454) acc 68.7500 (70.8750) lr 1.8763e-03 eta 8:45:47 +epoch [10/50] batch [330/500] time 1.550 (1.564) data 0.000 (0.003) loss 1.0820 (1.1475) acc 65.6250 (70.8523) lr 1.8763e-03 eta 8:45:40 +epoch [10/50] batch [335/500] time 1.564 (1.564) data 0.000 (0.003) loss 1.0146 (1.1486) acc 71.8750 (70.8955) lr 1.8763e-03 eta 8:45:34 +epoch [10/50] batch [340/500] time 1.572 (1.564) data 0.000 (0.003) loss 1.1777 (1.1474) acc 68.7500 (70.9007) lr 1.8763e-03 eta 8:45:27 +epoch [10/50] batch [345/500] time 1.558 (1.564) data 0.000 (0.003) loss 1.2754 (1.1459) acc 65.6250 (70.9058) lr 1.8763e-03 eta 8:45:17 +epoch [10/50] batch [350/500] time 1.576 (1.564) data 0.000 (0.003) loss 1.1865 (1.1429) acc 68.7500 (70.9821) lr 1.8763e-03 eta 8:45:09 +epoch [10/50] batch [355/500] time 1.533 (1.564) data 0.000 (0.003) loss 1.0391 (1.1420) acc 71.8750 (70.9947) lr 1.8763e-03 eta 8:44:59 +epoch [10/50] batch [360/500] time 1.578 (1.564) data 0.000 (0.003) loss 1.2490 (1.1393) acc 68.7500 (71.0764) lr 1.8763e-03 eta 8:44:49 +epoch [10/50] batch [365/500] time 1.577 (1.564) data 0.000 (0.003) loss 1.6650 (1.1420) acc 62.5000 (71.0531) lr 1.8763e-03 eta 8:44:41 +epoch [10/50] batch [370/500] time 1.571 (1.563) data 0.000 (0.003) loss 0.7930 (1.1422) acc 81.2500 (71.0389) lr 1.8763e-03 eta 8:44:33 +epoch [10/50] batch [375/500] time 1.565 (1.563) data 0.000 (0.003) loss 1.2852 (1.1423) acc 75.0000 (71.0917) lr 1.8763e-03 eta 8:44:22 +epoch [10/50] batch [380/500] time 1.553 (1.563) data 0.000 (0.003) loss 1.3105 (1.1431) acc 68.7500 (71.0855) lr 1.8763e-03 eta 8:44:12 +epoch [10/50] batch [385/500] time 1.538 (1.563) data 0.000 (0.003) loss 1.4707 (1.1444) acc 62.5000 (71.0714) lr 1.8763e-03 eta 8:44:01 +epoch [10/50] batch [390/500] time 1.567 (1.563) data 0.000 (0.003) loss 1.0254 (1.1441) acc 78.1250 (71.1218) lr 1.8763e-03 eta 8:43:52 +epoch [10/50] batch [395/500] time 1.557 (1.563) data 0.000 (0.003) loss 1.1582 (1.1450) acc 56.2500 (71.1076) lr 1.8763e-03 eta 8:43:44 +epoch [10/50] batch [400/500] time 1.562 (1.563) data 0.000 (0.003) loss 0.8784 (1.1465) acc 71.8750 (71.0938) lr 1.8763e-03 eta 8:43:35 +epoch [10/50] batch [405/500] time 1.574 (1.563) data 0.000 (0.003) loss 1.3936 (1.1483) acc 59.3750 (71.0340) lr 1.8763e-03 eta 8:43:28 +epoch [10/50] batch [410/500] time 1.560 (1.563) data 0.001 (0.003) loss 1.3457 (1.1470) acc 75.0000 (71.1052) lr 1.8763e-03 eta 8:43:20 +epoch [10/50] batch [415/500] time 1.566 (1.563) data 0.000 (0.003) loss 1.0801 (1.1462) acc 78.1250 (71.1446) lr 1.8763e-03 eta 8:43:14 +epoch [10/50] batch [420/500] time 1.581 (1.563) data 0.000 (0.003) loss 1.4336 (1.1445) acc 65.6250 (71.1682) lr 1.8763e-03 eta 8:43:07 +epoch [10/50] batch [425/500] time 1.595 (1.563) data 0.000 (0.003) loss 1.2969 (1.1455) acc 75.0000 (71.1250) lr 1.8763e-03 eta 8:43:04 +epoch [10/50] batch [430/500] time 1.559 (1.563) data 0.000 (0.003) loss 1.0811 (1.1441) acc 71.8750 (71.1628) lr 1.8763e-03 eta 8:42:56 +epoch [10/50] batch [435/500] time 1.556 (1.563) data 0.000 (0.002) loss 0.8174 (1.1449) acc 65.6250 (71.1279) lr 1.8763e-03 eta 8:42:47 +epoch [10/50] batch [440/500] time 1.592 (1.563) data 0.000 (0.002) loss 1.3262 (1.1458) acc 65.6250 (71.1080) lr 1.8763e-03 eta 8:42:42 +epoch [10/50] batch [445/500] time 1.563 (1.563) data 0.000 (0.002) loss 1.8271 (1.1494) acc 59.3750 (71.0253) lr 1.8763e-03 eta 8:42:32 +epoch [10/50] batch [450/500] time 1.560 (1.563) data 0.000 (0.002) loss 1.5723 (1.1518) acc 62.5000 (71.0069) lr 1.8763e-03 eta 8:42:26 +epoch [10/50] batch [455/500] time 1.560 (1.563) data 0.001 (0.002) loss 1.7559 (1.1510) acc 68.7500 (71.0234) lr 1.8763e-03 eta 8:42:18 +epoch [10/50] batch [460/500] time 1.554 (1.563) data 0.000 (0.002) loss 1.1807 (1.1509) acc 65.6250 (70.9783) lr 1.8763e-03 eta 8:42:08 +epoch [10/50] batch [465/500] time 1.586 (1.563) data 0.000 (0.002) loss 2.2109 (1.1515) acc 56.2500 (70.9812) lr 1.8763e-03 eta 8:42:00 +epoch [10/50] batch [470/500] time 1.549 (1.563) data 0.000 (0.002) loss 1.1523 (1.1481) acc 62.5000 (71.0439) lr 1.8763e-03 eta 8:41:56 +epoch [10/50] batch [475/500] time 1.564 (1.563) data 0.001 (0.002) loss 1.9941 (1.1490) acc 56.2500 (71.0263) lr 1.8763e-03 eta 8:41:46 +epoch [10/50] batch [480/500] time 1.544 (1.563) data 0.000 (0.002) loss 0.9736 (1.1499) acc 78.1250 (71.0286) lr 1.8763e-03 eta 8:41:37 +epoch [10/50] batch [485/500] time 1.560 (1.563) data 0.001 (0.002) loss 1.6484 (1.1500) acc 68.7500 (71.0631) lr 1.8763e-03 eta 8:41:29 +epoch [10/50] batch [490/500] time 1.569 (1.563) data 0.000 (0.002) loss 0.8760 (1.1499) acc 81.2500 (71.0651) lr 1.8763e-03 eta 8:41:20 +epoch [10/50] batch [495/500] time 1.526 (1.563) data 0.000 (0.002) loss 0.7622 (1.1482) acc 78.1250 (71.0922) lr 1.8763e-03 eta 8:41:10 +epoch [10/50] batch [500/500] time 1.551 (1.563) data 0.000 (0.002) loss 1.5322 (1.1499) acc 71.8750 (71.0875) lr 1.8443e-03 eta 8:41:00 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,866 +* accuracy: 77.7% +* error: 22.3% +* macro_f1: 77.2% +epoch [11/50] batch [5/500] time 1.531 (1.647) data 0.000 (0.158) loss 1.2617 (1.3807) acc 65.6250 (66.8750) lr 1.8443e-03 eta 9:08:44 +epoch [11/50] batch [10/500] time 1.581 (1.605) data 0.000 (0.079) loss 1.1992 (1.2677) acc 62.5000 (67.5000) lr 1.8443e-03 eta 8:54:44 +epoch [11/50] batch [15/500] time 1.597 (1.596) data 0.000 (0.053) loss 1.4199 (1.2301) acc 71.8750 (68.7500) lr 1.8443e-03 eta 8:51:41 +epoch [11/50] batch [20/500] time 1.555 (1.589) data 0.001 (0.040) loss 1.3447 (1.2435) acc 59.3750 (69.5312) lr 1.8443e-03 eta 8:49:03 +epoch [11/50] batch [25/500] time 1.552 (1.583) data 0.000 (0.032) loss 1.5156 (1.2126) acc 68.7500 (69.6250) lr 1.8443e-03 eta 8:46:53 +epoch [11/50] batch [30/500] time 1.569 (1.580) data 0.000 (0.027) loss 0.6602 (1.1862) acc 75.0000 (69.0625) lr 1.8443e-03 eta 8:45:52 +epoch [11/50] batch [35/500] time 1.568 (1.580) data 0.001 (0.023) loss 0.9351 (1.1939) acc 68.7500 (69.1071) lr 1.8443e-03 eta 8:45:37 +epoch [11/50] batch [40/500] time 1.567 (1.577) data 0.000 (0.020) loss 0.6821 (1.1574) acc 75.0000 (69.4531) lr 1.8443e-03 eta 8:44:42 +epoch [11/50] batch [45/500] time 1.559 (1.576) data 0.000 (0.018) loss 0.9819 (1.1488) acc 71.8750 (69.7222) lr 1.8443e-03 eta 8:43:59 +epoch [11/50] batch [50/500] time 1.553 (1.575) data 0.001 (0.016) loss 0.7866 (1.1512) acc 75.0000 (70.0000) lr 1.8443e-03 eta 8:43:36 +epoch [11/50] batch [55/500] time 1.557 (1.573) data 0.001 (0.015) loss 1.8008 (1.1491) acc 62.5000 (70.2841) lr 1.8443e-03 eta 8:42:58 +epoch [11/50] batch [60/500] time 1.563 (1.575) data 0.000 (0.014) loss 1.3223 (1.1579) acc 68.7500 (70.1042) lr 1.8443e-03 eta 8:43:15 +epoch [11/50] batch [65/500] time 1.535 (1.573) data 0.000 (0.013) loss 1.1377 (1.1519) acc 65.6250 (70.3365) lr 1.8443e-03 eta 8:42:35 +epoch [11/50] batch [70/500] time 1.565 (1.572) data 0.000 (0.012) loss 0.8003 (1.1460) acc 90.6250 (70.8036) lr 1.8443e-03 eta 8:42:13 +epoch [11/50] batch [75/500] time 1.551 (1.571) data 0.000 (0.011) loss 0.7349 (1.1259) acc 81.2500 (71.3750) lr 1.8443e-03 eta 8:41:33 +epoch [11/50] batch [80/500] time 1.543 (1.570) data 0.000 (0.010) loss 1.2314 (1.1241) acc 75.0000 (71.4844) lr 1.8443e-03 eta 8:41:16 +epoch [11/50] batch [85/500] time 1.556 (1.569) data 0.000 (0.010) loss 1.2002 (1.1190) acc 71.8750 (71.4706) lr 1.8443e-03 eta 8:40:45 +epoch [11/50] batch [90/500] time 1.565 (1.568) data 0.001 (0.009) loss 1.6182 (1.1236) acc 56.2500 (71.3194) lr 1.8443e-03 eta 8:40:25 +epoch [11/50] batch [95/500] time 1.575 (1.567) data 0.000 (0.009) loss 0.7324 (1.1265) acc 84.3750 (71.3816) lr 1.8443e-03 eta 8:39:47 +epoch [11/50] batch [100/500] time 1.555 (1.566) data 0.000 (0.008) loss 1.3447 (1.1229) acc 65.6250 (71.3750) lr 1.8443e-03 eta 8:39:30 +epoch [11/50] batch [105/500] time 1.526 (1.567) data 0.000 (0.008) loss 1.0332 (1.1300) acc 75.0000 (71.3690) lr 1.8443e-03 eta 8:39:26 +epoch [11/50] batch [110/500] time 1.561 (1.566) data 0.000 (0.008) loss 1.0742 (1.1309) acc 71.8750 (71.4205) lr 1.8443e-03 eta 8:39:07 +epoch [11/50] batch [115/500] time 1.548 (1.565) data 0.000 (0.007) loss 1.0352 (1.1322) acc 71.8750 (71.1957) lr 1.8443e-03 eta 8:38:43 +epoch [11/50] batch [120/500] time 1.538 (1.565) data 0.000 (0.007) loss 0.8257 (1.1227) acc 78.1250 (71.4583) lr 1.8443e-03 eta 8:38:30 +epoch [11/50] batch [125/500] time 1.569 (1.565) data 0.001 (0.007) loss 1.3213 (1.1266) acc 71.8750 (71.4250) lr 1.8443e-03 eta 8:38:18 +epoch [11/50] batch [130/500] time 1.554 (1.564) data 0.000 (0.007) loss 1.2891 (1.1349) acc 65.6250 (71.3702) lr 1.8443e-03 eta 8:38:01 +epoch [11/50] batch [135/500] time 1.556 (1.564) data 0.000 (0.006) loss 0.6377 (1.1321) acc 84.3750 (71.3657) lr 1.8443e-03 eta 8:37:54 +epoch [11/50] batch [140/500] time 1.566 (1.564) data 0.001 (0.006) loss 0.9077 (1.1371) acc 75.0000 (71.2054) lr 1.8443e-03 eta 8:37:42 +epoch [11/50] batch [145/500] time 1.553 (1.564) data 0.000 (0.006) loss 1.2998 (1.1425) acc 71.8750 (71.2069) lr 1.8443e-03 eta 8:37:29 +epoch [11/50] batch [150/500] time 1.561 (1.564) data 0.000 (0.006) loss 1.2471 (1.1436) acc 65.6250 (71.0833) lr 1.8443e-03 eta 8:37:21 +epoch [11/50] batch [155/500] time 1.572 (1.564) data 0.001 (0.006) loss 1.6406 (1.1470) acc 65.6250 (71.0282) lr 1.8443e-03 eta 8:37:10 +epoch [11/50] batch [160/500] time 1.572 (1.563) data 0.001 (0.005) loss 1.4229 (1.1530) acc 59.3750 (70.9375) lr 1.8443e-03 eta 8:36:57 +epoch [11/50] batch [165/500] time 1.551 (1.563) data 0.000 (0.005) loss 1.4922 (1.1609) acc 62.5000 (70.9470) lr 1.8443e-03 eta 8:36:47 +epoch [11/50] batch [170/500] time 1.551 (1.563) data 0.001 (0.005) loss 1.3623 (1.1635) acc 56.2500 (70.8272) lr 1.8443e-03 eta 8:36:36 +epoch [11/50] batch [175/500] time 1.562 (1.563) data 0.001 (0.005) loss 1.3711 (1.1649) acc 65.6250 (70.7500) lr 1.8443e-03 eta 8:36:31 +epoch [11/50] batch [180/500] time 1.552 (1.563) data 0.001 (0.005) loss 0.8203 (1.1635) acc 71.8750 (70.7986) lr 1.8443e-03 eta 8:36:21 +epoch [11/50] batch [185/500] time 1.528 (1.563) data 0.000 (0.005) loss 0.9575 (1.1640) acc 71.8750 (70.6588) lr 1.8443e-03 eta 8:36:03 +epoch [11/50] batch [190/500] time 1.578 (1.562) data 0.000 (0.005) loss 0.7744 (1.1597) acc 75.0000 (70.6743) lr 1.8443e-03 eta 8:35:52 +epoch [11/50] batch [195/500] time 1.565 (1.563) data 0.000 (0.005) loss 1.0449 (1.1630) acc 68.7500 (70.5288) lr 1.8443e-03 eta 8:35:47 +epoch [11/50] batch [200/500] time 1.547 (1.562) data 0.001 (0.004) loss 1.3193 (1.1620) acc 71.8750 (70.4531) lr 1.8443e-03 eta 8:35:36 +epoch [11/50] batch [205/500] time 1.547 (1.563) data 0.000 (0.004) loss 1.0752 (1.1636) acc 68.7500 (70.4268) lr 1.8443e-03 eta 8:35:35 +epoch [11/50] batch [210/500] time 1.537 (1.562) data 0.001 (0.004) loss 1.3789 (1.1654) acc 59.3750 (70.3869) lr 1.8443e-03 eta 8:35:17 +epoch [11/50] batch [215/500] time 1.558 (1.562) data 0.000 (0.004) loss 0.8921 (1.1662) acc 65.6250 (70.2471) lr 1.8443e-03 eta 8:35:06 +epoch [11/50] batch [220/500] time 1.546 (1.562) data 0.000 (0.004) loss 1.0693 (1.1676) acc 65.6250 (70.2131) lr 1.8443e-03 eta 8:34:49 +epoch [11/50] batch [225/500] time 1.556 (1.562) data 0.000 (0.004) loss 0.8726 (1.1621) acc 75.0000 (70.2917) lr 1.8443e-03 eta 8:34:40 +epoch [11/50] batch [230/500] time 1.587 (1.562) data 0.001 (0.004) loss 0.8315 (1.1570) acc 81.2500 (70.4484) lr 1.8443e-03 eta 8:34:31 +epoch [11/50] batch [235/500] time 1.559 (1.562) data 0.000 (0.004) loss 1.1816 (1.1625) acc 75.0000 (70.3590) lr 1.8443e-03 eta 8:34:24 +epoch [11/50] batch [240/500] time 1.554 (1.561) data 0.000 (0.004) loss 1.6484 (1.1648) acc 62.5000 (70.3776) lr 1.8443e-03 eta 8:34:14 +epoch [11/50] batch [245/500] time 1.689 (1.562) data 0.000 (0.004) loss 1.4531 (1.1658) acc 68.7500 (70.3827) lr 1.8443e-03 eta 8:34:15 +epoch [11/50] batch [250/500] time 1.531 (1.562) data 0.001 (0.004) loss 0.8540 (1.1625) acc 81.2500 (70.4750) lr 1.8443e-03 eta 8:34:04 +epoch [11/50] batch [255/500] time 1.546 (1.562) data 0.000 (0.004) loss 0.9536 (1.1660) acc 81.2500 (70.4657) lr 1.8443e-03 eta 8:33:51 +epoch [11/50] batch [260/500] time 1.577 (1.561) data 0.000 (0.004) loss 0.8813 (1.1653) acc 68.7500 (70.4447) lr 1.8443e-03 eta 8:33:39 +epoch [11/50] batch [265/500] time 1.565 (1.561) data 0.000 (0.003) loss 1.2578 (1.1668) acc 71.8750 (70.4245) lr 1.8443e-03 eta 8:33:29 +epoch [11/50] batch [270/500] time 1.581 (1.561) data 0.000 (0.003) loss 0.6055 (1.1627) acc 84.3750 (70.5208) lr 1.8443e-03 eta 8:33:24 +epoch [11/50] batch [275/500] time 1.567 (1.561) data 0.000 (0.003) loss 0.9272 (1.1644) acc 81.2500 (70.5114) lr 1.8443e-03 eta 8:33:17 +epoch [11/50] batch [280/500] time 1.567 (1.561) data 0.000 (0.003) loss 1.4541 (1.1646) acc 68.7500 (70.4799) lr 1.8443e-03 eta 8:33:10 +epoch [11/50] batch [285/500] time 1.559 (1.561) data 0.000 (0.003) loss 1.2666 (1.1636) acc 75.0000 (70.5263) lr 1.8443e-03 eta 8:32:59 +epoch [11/50] batch [290/500] time 1.555 (1.561) data 0.000 (0.003) loss 1.1943 (1.1639) acc 71.8750 (70.4957) lr 1.8443e-03 eta 8:32:49 +epoch [11/50] batch [295/500] time 1.574 (1.561) data 0.000 (0.003) loss 1.4609 (1.1615) acc 65.6250 (70.5403) lr 1.8443e-03 eta 8:32:45 +epoch [11/50] batch [300/500] time 1.552 (1.561) data 0.000 (0.003) loss 1.4658 (1.1605) acc 59.3750 (70.5417) lr 1.8443e-03 eta 8:32:35 +epoch [11/50] batch [305/500] time 1.536 (1.561) data 0.001 (0.003) loss 0.8521 (1.1572) acc 71.8750 (70.6045) lr 1.8443e-03 eta 8:32:25 +epoch [11/50] batch [310/500] time 1.565 (1.561) data 0.000 (0.003) loss 1.5068 (1.1601) acc 71.8750 (70.5847) lr 1.8443e-03 eta 8:32:16 +epoch [11/50] batch [315/500] time 1.568 (1.561) data 0.000 (0.003) loss 1.0537 (1.1616) acc 75.0000 (70.5952) lr 1.8443e-03 eta 8:32:08 +epoch [11/50] batch [320/500] time 1.551 (1.561) data 0.000 (0.003) loss 1.7627 (1.1573) acc 59.3750 (70.6836) lr 1.8443e-03 eta 8:31:59 +epoch [11/50] batch [325/500] time 1.556 (1.561) data 0.001 (0.003) loss 0.4792 (1.1563) acc 87.5000 (70.7212) lr 1.8443e-03 eta 8:31:48 +epoch [11/50] batch [330/500] time 1.577 (1.561) data 0.000 (0.003) loss 0.9131 (1.1561) acc 68.7500 (70.6723) lr 1.8443e-03 eta 8:31:41 +epoch [11/50] batch [335/500] time 1.571 (1.561) data 0.000 (0.003) loss 0.8164 (1.1572) acc 87.5000 (70.6810) lr 1.8443e-03 eta 8:31:33 +epoch [11/50] batch [340/500] time 1.567 (1.561) data 0.000 (0.003) loss 1.7617 (1.1577) acc 65.6250 (70.6985) lr 1.8443e-03 eta 8:31:25 +epoch [11/50] batch [345/500] time 1.555 (1.561) data 0.001 (0.003) loss 1.2119 (1.1565) acc 78.1250 (70.7065) lr 1.8443e-03 eta 8:31:20 +epoch [11/50] batch [350/500] time 1.566 (1.561) data 0.000 (0.003) loss 1.6035 (1.1567) acc 65.6250 (70.7232) lr 1.8443e-03 eta 8:31:12 +epoch [11/50] batch [355/500] time 1.545 (1.561) data 0.000 (0.003) loss 1.8604 (1.1579) acc 53.1250 (70.6778) lr 1.8443e-03 eta 8:31:06 +epoch [11/50] batch [360/500] time 1.552 (1.561) data 0.000 (0.003) loss 1.2119 (1.1584) acc 62.5000 (70.6424) lr 1.8443e-03 eta 8:30:58 +epoch [11/50] batch [365/500] time 1.570 (1.561) data 0.000 (0.003) loss 0.9155 (1.1586) acc 84.3750 (70.7192) lr 1.8443e-03 eta 8:30:49 +epoch [11/50] batch [370/500] time 1.561 (1.561) data 0.000 (0.003) loss 0.9204 (1.1571) acc 68.7500 (70.6926) lr 1.8443e-03 eta 8:30:41 +epoch [11/50] batch [375/500] time 1.561 (1.561) data 0.000 (0.003) loss 1.0439 (1.1581) acc 71.8750 (70.6750) lr 1.8443e-03 eta 8:30:32 +epoch [11/50] batch [380/500] time 1.554 (1.561) data 0.000 (0.003) loss 0.8442 (1.1553) acc 81.2500 (70.7648) lr 1.8443e-03 eta 8:30:24 +epoch [11/50] batch [385/500] time 1.563 (1.561) data 0.000 (0.003) loss 0.9883 (1.1561) acc 78.1250 (70.7955) lr 1.8443e-03 eta 8:30:11 +epoch [11/50] batch [390/500] time 1.563 (1.561) data 0.000 (0.002) loss 0.6997 (1.1573) acc 84.3750 (70.8494) lr 1.8443e-03 eta 8:30:10 +epoch [11/50] batch [395/500] time 1.547 (1.561) data 0.001 (0.002) loss 1.0479 (1.1578) acc 68.7500 (70.8307) lr 1.8443e-03 eta 8:30:01 +epoch [11/50] batch [400/500] time 1.554 (1.561) data 0.000 (0.002) loss 0.9951 (1.1569) acc 65.6250 (70.8438) lr 1.8443e-03 eta 8:29:50 +epoch [11/50] batch [405/500] time 1.598 (1.561) data 0.000 (0.002) loss 1.6904 (1.1578) acc 68.7500 (70.8488) lr 1.8443e-03 eta 8:29:44 +epoch [11/50] batch [410/500] time 1.555 (1.561) data 0.001 (0.002) loss 1.4199 (1.1590) acc 65.6250 (70.8155) lr 1.8443e-03 eta 8:29:38 +epoch [11/50] batch [415/500] time 1.549 (1.561) data 0.000 (0.002) loss 0.7964 (1.1576) acc 78.1250 (70.8057) lr 1.8443e-03 eta 8:29:31 +epoch [11/50] batch [420/500] time 1.565 (1.561) data 0.000 (0.002) loss 1.2646 (1.1583) acc 71.8750 (70.8408) lr 1.8443e-03 eta 8:29:22 +epoch [11/50] batch [425/500] time 1.588 (1.561) data 0.001 (0.002) loss 0.9937 (1.1547) acc 68.7500 (70.9191) lr 1.8443e-03 eta 8:29:15 +epoch [11/50] batch [430/500] time 1.563 (1.561) data 0.000 (0.002) loss 0.9995 (1.1529) acc 75.0000 (70.9448) lr 1.8443e-03 eta 8:29:06 +epoch [11/50] batch [435/500] time 1.564 (1.561) data 0.000 (0.002) loss 0.7612 (1.1532) acc 71.8750 (70.9052) lr 1.8443e-03 eta 8:28:56 +epoch [11/50] batch [440/500] time 1.565 (1.561) data 0.000 (0.002) loss 1.4268 (1.1543) acc 65.6250 (70.8665) lr 1.8443e-03 eta 8:28:49 +epoch [11/50] batch [445/500] time 1.556 (1.561) data 0.001 (0.002) loss 0.7598 (1.1544) acc 81.2500 (70.9129) lr 1.8443e-03 eta 8:28:43 +epoch [11/50] batch [450/500] time 1.557 (1.561) data 0.000 (0.002) loss 1.3574 (1.1532) acc 78.1250 (70.9722) lr 1.8443e-03 eta 8:28:35 +epoch [11/50] batch [455/500] time 1.551 (1.561) data 0.000 (0.002) loss 1.1406 (1.1536) acc 71.8750 (70.9890) lr 1.8443e-03 eta 8:28:26 +epoch [11/50] batch [460/500] time 1.555 (1.561) data 0.000 (0.002) loss 2.0723 (1.1575) acc 46.8750 (70.9103) lr 1.8443e-03 eta 8:28:16 +epoch [11/50] batch [465/500] time 1.556 (1.561) data 0.001 (0.002) loss 0.8853 (1.1568) acc 78.1250 (70.9341) lr 1.8443e-03 eta 8:28:08 +epoch [11/50] batch [470/500] time 1.542 (1.561) data 0.000 (0.002) loss 1.4092 (1.1590) acc 59.3750 (70.8777) lr 1.8443e-03 eta 8:28:01 +epoch [11/50] batch [475/500] time 1.542 (1.561) data 0.000 (0.002) loss 1.8789 (1.1594) acc 53.1250 (70.8684) lr 1.8443e-03 eta 8:27:51 +epoch [11/50] batch [480/500] time 1.564 (1.560) data 0.000 (0.002) loss 1.1367 (1.1612) acc 71.8750 (70.8464) lr 1.8443e-03 eta 8:27:40 +epoch [11/50] batch [485/500] time 1.542 (1.560) data 0.001 (0.002) loss 1.2646 (1.1610) acc 75.0000 (70.8634) lr 1.8443e-03 eta 8:27:31 +epoch [11/50] batch [490/500] time 1.539 (1.561) data 0.000 (0.002) loss 1.1279 (1.1592) acc 65.6250 (70.8865) lr 1.8443e-03 eta 8:27:26 +epoch [11/50] batch [495/500] time 1.546 (1.560) data 0.000 (0.002) loss 1.1406 (1.1573) acc 78.1250 (70.9280) lr 1.8443e-03 eta 8:27:17 +epoch [11/50] batch [500/500] time 1.564 (1.560) data 0.000 (0.002) loss 1.6260 (1.1565) acc 65.6250 (70.9437) lr 1.8090e-03 eta 8:27:09 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,840 +* accuracy: 77.7% +* error: 22.3% +* macro_f1: 77.1% +epoch [12/50] batch [5/500] time 1.541 (1.671) data 0.000 (0.162) loss 1.3418 (1.1914) acc 75.0000 (70.0000) lr 1.8090e-03 eta 9:02:59 +epoch [12/50] batch [10/500] time 1.575 (1.614) data 0.001 (0.081) loss 0.9624 (1.1471) acc 81.2500 (71.5625) lr 1.8090e-03 eta 8:44:11 +epoch [12/50] batch [15/500] time 1.566 (1.595) data 0.001 (0.054) loss 0.6597 (1.1317) acc 75.0000 (71.2500) lr 1.8090e-03 eta 8:37:55 +epoch [12/50] batch [20/500] time 1.570 (1.588) data 0.000 (0.041) loss 1.0889 (1.0639) acc 65.6250 (72.3438) lr 1.8090e-03 eta 8:35:27 +epoch [12/50] batch [25/500] time 1.565 (1.584) data 0.001 (0.033) loss 1.1943 (1.1051) acc 71.8750 (71.6250) lr 1.8090e-03 eta 8:34:08 +epoch [12/50] batch [30/500] time 1.556 (1.580) data 0.000 (0.027) loss 1.3223 (1.1496) acc 59.3750 (70.6250) lr 1.8090e-03 eta 8:32:38 +epoch [12/50] batch [35/500] time 1.542 (1.581) data 0.001 (0.024) loss 1.2314 (1.1492) acc 78.1250 (70.8929) lr 1.8090e-03 eta 8:32:49 +epoch [12/50] batch [40/500] time 1.533 (1.579) data 0.001 (0.021) loss 1.3164 (1.1487) acc 75.0000 (71.0156) lr 1.8090e-03 eta 8:32:08 +epoch [12/50] batch [45/500] time 1.561 (1.577) data 0.000 (0.018) loss 1.5527 (1.1423) acc 62.5000 (70.9722) lr 1.8090e-03 eta 8:31:20 +epoch [12/50] batch [50/500] time 1.558 (1.574) data 0.001 (0.017) loss 0.5581 (1.1039) acc 84.3750 (71.8750) lr 1.8090e-03 eta 8:30:06 +epoch [12/50] batch [55/500] time 1.534 (1.572) data 0.001 (0.015) loss 1.3262 (1.1008) acc 68.7500 (72.0455) lr 1.8090e-03 eta 8:29:18 +epoch [12/50] batch [60/500] time 1.566 (1.571) data 0.001 (0.014) loss 0.4045 (1.0862) acc 90.6250 (72.6042) lr 1.8090e-03 eta 8:29:07 +epoch [12/50] batch [65/500] time 1.554 (1.570) data 0.000 (0.013) loss 1.0020 (1.0842) acc 78.1250 (72.8365) lr 1.8090e-03 eta 8:28:42 +epoch [12/50] batch [70/500] time 1.566 (1.570) data 0.000 (0.012) loss 1.4414 (1.0980) acc 62.5000 (72.4554) lr 1.8090e-03 eta 8:28:17 +epoch [12/50] batch [75/500] time 1.562 (1.569) data 0.001 (0.011) loss 1.4023 (1.1102) acc 62.5000 (72.0000) lr 1.8090e-03 eta 8:27:58 +epoch [12/50] batch [80/500] time 1.551 (1.568) data 0.001 (0.011) loss 0.6592 (1.1086) acc 81.2500 (71.9531) lr 1.8090e-03 eta 8:27:27 +epoch [12/50] batch [85/500] time 1.555 (1.568) data 0.000 (0.010) loss 1.3672 (1.1111) acc 65.6250 (72.0956) lr 1.8090e-03 eta 8:27:13 +epoch [12/50] batch [90/500] time 1.561 (1.568) data 0.000 (0.009) loss 1.2559 (1.0972) acc 71.8750 (72.3264) lr 1.8090e-03 eta 8:27:10 +epoch [12/50] batch [95/500] time 1.577 (1.568) data 0.000 (0.009) loss 0.9419 (1.0888) acc 68.7500 (72.4342) lr 1.8090e-03 eta 8:27:05 +epoch [12/50] batch [100/500] time 1.541 (1.567) data 0.000 (0.009) loss 1.8037 (1.0977) acc 68.7500 (72.5938) lr 1.8090e-03 eta 8:26:41 +epoch [12/50] batch [105/500] time 1.580 (1.567) data 0.000 (0.008) loss 1.0518 (1.0928) acc 75.0000 (72.6488) lr 1.8090e-03 eta 8:26:40 +epoch [12/50] batch [110/500] time 1.553 (1.568) data 0.000 (0.008) loss 1.3008 (1.0982) acc 75.0000 (72.6420) lr 1.8090e-03 eta 8:26:33 +epoch [12/50] batch [115/500] time 1.539 (1.567) data 0.000 (0.007) loss 1.3467 (1.0960) acc 62.5000 (72.7717) lr 1.8090e-03 eta 8:26:10 +epoch [12/50] batch [120/500] time 1.564 (1.567) data 0.001 (0.007) loss 1.0801 (1.0898) acc 68.7500 (72.7083) lr 1.8090e-03 eta 8:25:59 +epoch [12/50] batch [125/500] time 1.577 (1.566) data 0.000 (0.007) loss 1.5293 (1.0967) acc 62.5000 (72.5750) lr 1.8090e-03 eta 8:25:46 +epoch [12/50] batch [130/500] time 1.569 (1.566) data 0.000 (0.007) loss 0.9038 (1.0962) acc 71.8750 (72.5721) lr 1.8090e-03 eta 8:25:35 +epoch [12/50] batch [135/500] time 1.562 (1.567) data 0.001 (0.006) loss 1.8877 (1.0997) acc 62.5000 (72.4769) lr 1.8090e-03 eta 8:25:41 +epoch [12/50] batch [140/500] time 1.560 (1.567) data 0.000 (0.006) loss 0.5303 (1.0928) acc 87.5000 (72.8795) lr 1.8090e-03 eta 8:25:31 +epoch [12/50] batch [145/500] time 1.607 (1.567) data 0.000 (0.006) loss 0.6987 (1.0887) acc 75.0000 (72.8448) lr 1.8090e-03 eta 8:25:21 +epoch [12/50] batch [150/500] time 1.564 (1.566) data 0.000 (0.006) loss 0.9683 (1.0933) acc 75.0000 (72.7292) lr 1.8090e-03 eta 8:25:09 +epoch [12/50] batch [155/500] time 1.535 (1.566) data 0.000 (0.006) loss 1.0684 (1.0999) acc 75.0000 (72.6210) lr 1.8090e-03 eta 8:24:55 +epoch [12/50] batch [160/500] time 1.604 (1.566) data 0.000 (0.006) loss 0.6919 (1.0971) acc 81.2500 (72.5781) lr 1.8090e-03 eta 8:24:40 +epoch [12/50] batch [165/500] time 1.569 (1.566) data 0.000 (0.005) loss 0.9839 (1.0922) acc 78.1250 (72.6515) lr 1.8090e-03 eta 8:24:29 +epoch [12/50] batch [170/500] time 1.578 (1.565) data 0.000 (0.005) loss 0.8433 (1.0848) acc 71.8750 (72.7941) lr 1.8090e-03 eta 8:24:20 +epoch [12/50] batch [175/500] time 1.535 (1.565) data 0.000 (0.005) loss 1.3418 (1.0881) acc 65.6250 (72.6786) lr 1.8090e-03 eta 8:24:09 +epoch [12/50] batch [180/500] time 1.555 (1.566) data 0.000 (0.005) loss 0.9819 (1.0870) acc 65.6250 (72.6042) lr 1.8090e-03 eta 8:24:08 +epoch [12/50] batch [185/500] time 1.558 (1.566) data 0.000 (0.005) loss 0.6860 (1.0868) acc 78.1250 (72.6520) lr 1.8090e-03 eta 8:23:58 +epoch [12/50] batch [190/500] time 1.574 (1.566) data 0.000 (0.005) loss 1.2197 (1.0922) acc 71.8750 (72.5329) lr 1.8090e-03 eta 8:23:50 +epoch [12/50] batch [195/500] time 1.532 (1.565) data 0.000 (0.005) loss 1.1260 (1.0931) acc 75.0000 (72.4679) lr 1.8090e-03 eta 8:23:28 +epoch [12/50] batch [200/500] time 1.551 (1.564) data 0.000 (0.004) loss 0.8516 (1.0932) acc 71.8750 (72.4688) lr 1.8090e-03 eta 8:23:13 +epoch [12/50] batch [205/500] time 1.542 (1.564) data 0.000 (0.004) loss 1.6348 (1.0978) acc 65.6250 (72.3933) lr 1.8090e-03 eta 8:23:04 +epoch [12/50] batch [210/500] time 1.548 (1.564) data 0.000 (0.004) loss 1.3682 (1.0981) acc 71.8750 (72.4107) lr 1.8090e-03 eta 8:22:50 +epoch [12/50] batch [215/500] time 1.558 (1.564) data 0.000 (0.004) loss 1.0234 (1.0945) acc 62.5000 (72.3692) lr 1.8090e-03 eta 8:22:35 +epoch [12/50] batch [220/500] time 1.572 (1.564) data 0.000 (0.004) loss 1.2090 (1.0962) acc 75.0000 (72.3438) lr 1.8090e-03 eta 8:22:28 +epoch [12/50] batch [225/500] time 1.553 (1.564) data 0.000 (0.004) loss 1.3418 (1.1024) acc 68.7500 (72.2222) lr 1.8090e-03 eta 8:22:23 +epoch [12/50] batch [230/500] time 1.561 (1.564) data 0.000 (0.004) loss 1.3428 (1.1030) acc 62.5000 (72.2283) lr 1.8090e-03 eta 8:22:16 +epoch [12/50] batch [235/500] time 1.563 (1.564) data 0.000 (0.004) loss 1.3438 (1.1051) acc 56.2500 (72.1543) lr 1.8090e-03 eta 8:22:07 +epoch [12/50] batch [240/500] time 1.544 (1.564) data 0.000 (0.004) loss 1.4297 (1.1079) acc 56.2500 (72.0182) lr 1.8090e-03 eta 8:21:57 +epoch [12/50] batch [245/500] time 1.532 (1.563) data 0.000 (0.004) loss 1.0449 (1.1064) acc 75.0000 (72.0026) lr 1.8090e-03 eta 8:21:44 +epoch [12/50] batch [250/500] time 1.582 (1.564) data 0.000 (0.004) loss 1.6553 (1.1069) acc 62.5000 (72.0125) lr 1.8090e-03 eta 8:21:42 +epoch [12/50] batch [255/500] time 1.555 (1.564) data 0.000 (0.004) loss 0.9102 (1.1123) acc 71.8750 (71.8505) lr 1.8090e-03 eta 8:21:30 +epoch [12/50] batch [260/500] time 1.540 (1.563) data 0.000 (0.004) loss 1.7080 (1.1159) acc 56.2500 (71.7668) lr 1.8090e-03 eta 8:21:18 +epoch [12/50] batch [265/500] time 1.556 (1.563) data 0.000 (0.003) loss 1.0078 (1.1131) acc 75.0000 (71.8514) lr 1.8090e-03 eta 8:21:07 +epoch [12/50] batch [270/500] time 1.556 (1.563) data 0.001 (0.003) loss 0.9292 (1.1119) acc 68.7500 (71.7940) lr 1.8090e-03 eta 8:20:59 +epoch [12/50] batch [275/500] time 1.573 (1.563) data 0.001 (0.003) loss 1.5391 (1.1136) acc 71.8750 (71.7727) lr 1.8090e-03 eta 8:20:52 +epoch [12/50] batch [280/500] time 1.561 (1.564) data 0.000 (0.003) loss 1.2041 (1.1121) acc 71.8750 (71.7969) lr 1.8090e-03 eta 8:20:54 +epoch [12/50] batch [285/500] time 1.538 (1.564) data 0.000 (0.003) loss 1.1875 (1.1149) acc 71.8750 (71.7325) lr 1.8090e-03 eta 8:20:45 +epoch [12/50] batch [290/500] time 1.564 (1.564) data 0.001 (0.003) loss 1.4727 (1.1174) acc 68.7500 (71.6595) lr 1.8090e-03 eta 8:20:39 +epoch [12/50] batch [295/500] time 1.555 (1.564) data 0.001 (0.003) loss 1.2803 (1.1197) acc 59.3750 (71.6314) lr 1.8090e-03 eta 8:20:32 +epoch [12/50] batch [300/500] time 1.561 (1.564) data 0.000 (0.003) loss 1.6328 (1.1216) acc 59.3750 (71.5833) lr 1.8090e-03 eta 8:20:20 +epoch [12/50] batch [305/500] time 1.551 (1.563) data 0.000 (0.003) loss 1.1992 (1.1221) acc 68.7500 (71.5984) lr 1.8090e-03 eta 8:20:10 +epoch [12/50] batch [310/500] time 1.536 (1.563) data 0.001 (0.003) loss 1.1035 (1.1265) acc 68.7500 (71.5020) lr 1.8090e-03 eta 8:19:59 +epoch [12/50] batch [315/500] time 1.559 (1.563) data 0.000 (0.003) loss 1.6729 (1.1268) acc 62.5000 (71.5179) lr 1.8090e-03 eta 8:19:51 +epoch [12/50] batch [320/500] time 1.660 (1.564) data 0.000 (0.003) loss 1.1211 (1.1274) acc 68.7500 (71.5039) lr 1.8090e-03 eta 8:19:48 +epoch [12/50] batch [325/500] time 1.579 (1.564) data 0.000 (0.003) loss 0.8096 (1.1264) acc 78.1250 (71.5096) lr 1.8090e-03 eta 8:19:40 +epoch [12/50] batch [330/500] time 1.575 (1.564) data 0.001 (0.003) loss 1.1035 (1.1250) acc 65.6250 (71.4678) lr 1.8090e-03 eta 8:19:34 +epoch [12/50] batch [335/500] time 1.567 (1.564) data 0.000 (0.003) loss 1.7539 (1.1289) acc 59.3750 (71.3713) lr 1.8090e-03 eta 8:19:27 +epoch [12/50] batch [340/500] time 1.572 (1.564) data 0.000 (0.003) loss 0.9707 (1.1304) acc 75.0000 (71.3511) lr 1.8090e-03 eta 8:19:22 +epoch [12/50] batch [345/500] time 1.573 (1.564) data 0.000 (0.003) loss 1.7676 (1.1313) acc 56.2500 (71.3134) lr 1.8090e-03 eta 8:19:13 +epoch [12/50] batch [350/500] time 1.551 (1.564) data 0.001 (0.003) loss 1.4014 (1.1317) acc 65.6250 (71.3304) lr 1.8090e-03 eta 8:19:03 +epoch [12/50] batch [355/500] time 1.573 (1.564) data 0.001 (0.003) loss 1.4932 (1.1336) acc 68.7500 (71.3468) lr 1.8090e-03 eta 8:18:54 +epoch [12/50] batch [360/500] time 1.538 (1.563) data 0.000 (0.003) loss 1.0156 (1.1338) acc 68.7500 (71.3368) lr 1.8090e-03 eta 8:18:42 +epoch [12/50] batch [365/500] time 1.570 (1.563) data 0.000 (0.003) loss 0.6875 (1.1317) acc 81.2500 (71.3613) lr 1.8090e-03 eta 8:18:31 +epoch [12/50] batch [370/500] time 1.540 (1.563) data 0.001 (0.003) loss 1.2656 (1.1332) acc 68.7500 (71.3176) lr 1.8090e-03 eta 8:18:19 +epoch [12/50] batch [375/500] time 1.547 (1.563) data 0.001 (0.003) loss 0.7466 (1.1319) acc 75.0000 (71.3583) lr 1.8090e-03 eta 8:18:08 +epoch [12/50] batch [380/500] time 1.572 (1.563) data 0.000 (0.003) loss 0.4326 (1.1283) acc 90.6250 (71.4638) lr 1.8090e-03 eta 8:17:59 +epoch [12/50] batch [385/500] time 1.558 (1.563) data 0.000 (0.003) loss 1.3477 (1.1282) acc 71.8750 (71.4692) lr 1.8090e-03 eta 8:17:51 +epoch [12/50] batch [390/500] time 1.557 (1.563) data 0.000 (0.003) loss 1.2705 (1.1296) acc 65.6250 (71.4423) lr 1.8090e-03 eta 8:17:41 +epoch [12/50] batch [395/500] time 1.543 (1.563) data 0.000 (0.002) loss 0.9048 (1.1305) acc 65.6250 (71.3924) lr 1.8090e-03 eta 8:17:34 +epoch [12/50] batch [400/500] time 1.556 (1.563) data 0.000 (0.002) loss 1.2520 (1.1331) acc 68.7500 (71.3828) lr 1.8090e-03 eta 8:17:23 +epoch [12/50] batch [405/500] time 1.543 (1.562) data 0.000 (0.002) loss 1.6963 (1.1332) acc 56.2500 (71.3812) lr 1.8090e-03 eta 8:17:12 +epoch [12/50] batch [410/500] time 1.574 (1.562) data 0.001 (0.002) loss 0.7632 (1.1300) acc 84.3750 (71.4253) lr 1.8090e-03 eta 8:17:04 +epoch [12/50] batch [415/500] time 1.570 (1.562) data 0.000 (0.002) loss 1.3242 (1.1322) acc 71.8750 (71.3780) lr 1.8090e-03 eta 8:16:53 +epoch [12/50] batch [420/500] time 1.573 (1.562) data 0.000 (0.002) loss 0.9707 (1.1307) acc 71.8750 (71.4360) lr 1.8090e-03 eta 8:16:49 +epoch [12/50] batch [425/500] time 1.565 (1.562) data 0.000 (0.002) loss 1.6396 (1.1308) acc 62.5000 (71.4485) lr 1.8090e-03 eta 8:16:40 +epoch [12/50] batch [430/500] time 1.537 (1.562) data 0.000 (0.002) loss 1.7334 (1.1340) acc 65.6250 (71.4099) lr 1.8090e-03 eta 8:16:32 +epoch [12/50] batch [435/500] time 1.552 (1.562) data 0.000 (0.002) loss 1.3271 (1.1349) acc 71.8750 (71.4152) lr 1.8090e-03 eta 8:16:26 +epoch [12/50] batch [440/500] time 1.566 (1.562) data 0.000 (0.002) loss 1.1855 (1.1356) acc 68.7500 (71.3423) lr 1.8090e-03 eta 8:16:18 +epoch [12/50] batch [445/500] time 1.569 (1.562) data 0.000 (0.002) loss 1.3721 (1.1370) acc 62.5000 (71.3062) lr 1.8090e-03 eta 8:16:12 +epoch [12/50] batch [450/500] time 1.577 (1.563) data 0.000 (0.002) loss 1.0215 (1.1355) acc 75.0000 (71.3403) lr 1.8090e-03 eta 8:16:07 +epoch [12/50] batch [455/500] time 1.541 (1.563) data 0.000 (0.002) loss 1.2568 (1.1374) acc 62.5000 (71.2912) lr 1.8090e-03 eta 8:16:00 +epoch [12/50] batch [460/500] time 1.548 (1.563) data 0.000 (0.002) loss 1.6602 (1.1427) acc 56.2500 (71.2092) lr 1.8090e-03 eta 8:15:54 +epoch [12/50] batch [465/500] time 1.569 (1.563) data 0.000 (0.002) loss 1.1523 (1.1436) acc 65.6250 (71.1694) lr 1.8090e-03 eta 8:15:52 +epoch [12/50] batch [470/500] time 1.569 (1.563) data 0.000 (0.002) loss 1.0234 (1.1427) acc 65.6250 (71.1569) lr 1.8090e-03 eta 8:15:42 +epoch [12/50] batch [475/500] time 1.570 (1.563) data 0.000 (0.002) loss 0.5332 (1.1422) acc 84.3750 (71.1842) lr 1.8090e-03 eta 8:15:33 +epoch [12/50] batch [480/500] time 1.566 (1.563) data 0.000 (0.002) loss 0.9316 (1.1387) acc 75.0000 (71.2760) lr 1.8090e-03 eta 8:15:27 +epoch [12/50] batch [485/500] time 1.574 (1.563) data 0.001 (0.002) loss 1.4268 (1.1375) acc 62.5000 (71.2822) lr 1.8090e-03 eta 8:15:19 +epoch [12/50] batch [490/500] time 1.543 (1.563) data 0.000 (0.002) loss 1.3701 (1.1388) acc 75.0000 (71.2755) lr 1.8090e-03 eta 8:15:08 +epoch [12/50] batch [495/500] time 1.554 (1.563) data 0.000 (0.002) loss 0.7710 (1.1378) acc 78.1250 (71.2689) lr 1.8090e-03 eta 8:14:59 +epoch [12/50] batch [500/500] time 1.544 (1.563) data 0.000 (0.002) loss 0.8726 (1.1376) acc 65.6250 (71.2250) lr 1.7705e-03 eta 8:14:48 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,836 +* accuracy: 77.7% +* error: 22.3% +* macro_f1: 77.2% +epoch [13/50] batch [5/500] time 1.540 (1.642) data 0.000 (0.146) loss 1.5537 (1.2555) acc 65.6250 (68.1250) lr 1.7705e-03 eta 8:39:41 +epoch [13/50] batch [10/500] time 1.558 (1.597) data 0.000 (0.073) loss 0.7593 (1.1264) acc 75.0000 (71.8750) lr 1.7705e-03 eta 8:25:28 +epoch [13/50] batch [15/500] time 1.541 (1.584) data 0.000 (0.049) loss 0.8159 (1.1257) acc 81.2500 (71.8750) lr 1.7705e-03 eta 8:21:08 +epoch [13/50] batch [20/500] time 1.543 (1.576) data 0.000 (0.037) loss 1.3799 (1.1493) acc 65.6250 (71.8750) lr 1.7705e-03 eta 8:18:41 +epoch [13/50] batch [25/500] time 1.555 (1.579) data 0.001 (0.029) loss 1.4561 (1.1832) acc 68.7500 (71.0000) lr 1.7705e-03 eta 8:19:29 +epoch [13/50] batch [30/500] time 1.574 (1.577) data 0.000 (0.025) loss 0.7671 (1.1530) acc 78.1250 (71.9792) lr 1.7705e-03 eta 8:18:42 +epoch [13/50] batch [35/500] time 1.586 (1.576) data 0.001 (0.021) loss 0.9263 (1.1301) acc 78.1250 (72.4107) lr 1.7705e-03 eta 8:18:10 +epoch [13/50] batch [40/500] time 1.569 (1.574) data 0.000 (0.019) loss 1.6221 (1.1521) acc 65.6250 (72.2656) lr 1.7705e-03 eta 8:17:29 +epoch [13/50] batch [45/500] time 1.573 (1.573) data 0.000 (0.017) loss 0.4153 (1.1509) acc 81.2500 (72.2222) lr 1.7705e-03 eta 8:16:50 +epoch [13/50] batch [50/500] time 1.572 (1.572) data 0.000 (0.015) loss 1.1602 (1.1336) acc 71.8750 (72.6250) lr 1.7705e-03 eta 8:16:36 +epoch [13/50] batch [55/500] time 1.559 (1.572) data 0.000 (0.014) loss 0.8091 (1.1304) acc 71.8750 (72.4432) lr 1.7705e-03 eta 8:16:22 +epoch [13/50] batch [60/500] time 1.555 (1.571) data 0.001 (0.013) loss 0.9800 (1.1136) acc 75.0000 (72.6042) lr 1.7705e-03 eta 8:15:46 +epoch [13/50] batch [65/500] time 1.577 (1.571) data 0.000 (0.012) loss 0.4883 (1.1077) acc 84.3750 (72.6442) lr 1.7705e-03 eta 8:15:42 +epoch [13/50] batch [70/500] time 1.559 (1.570) data 0.000 (0.011) loss 0.6631 (1.0919) acc 90.6250 (73.3482) lr 1.7705e-03 eta 8:15:22 +epoch [13/50] batch [75/500] time 1.560 (1.570) data 0.001 (0.010) loss 1.1436 (1.0845) acc 65.6250 (73.2917) lr 1.7705e-03 eta 8:15:08 +epoch [13/50] batch [80/500] time 1.571 (1.570) data 0.001 (0.010) loss 1.2705 (1.0937) acc 78.1250 (73.1641) lr 1.7705e-03 eta 8:14:56 +epoch [13/50] batch [85/500] time 1.565 (1.570) data 0.001 (0.009) loss 1.0986 (1.0867) acc 84.3750 (73.4191) lr 1.7705e-03 eta 8:15:00 +epoch [13/50] batch [90/500] time 1.551 (1.569) data 0.001 (0.009) loss 1.0684 (1.0911) acc 78.1250 (73.1944) lr 1.7705e-03 eta 8:14:35 +epoch [13/50] batch [95/500] time 1.581 (1.569) data 0.001 (0.008) loss 0.8657 (1.0864) acc 71.8750 (73.2237) lr 1.7705e-03 eta 8:14:24 +epoch [13/50] batch [100/500] time 1.529 (1.568) data 0.000 (0.008) loss 1.0186 (1.0914) acc 81.2500 (73.1250) lr 1.7705e-03 eta 8:13:52 +epoch [13/50] batch [105/500] time 1.545 (1.568) data 0.000 (0.007) loss 1.1445 (1.0985) acc 68.7500 (72.8869) lr 1.7705e-03 eta 8:13:40 +epoch [13/50] batch [110/500] time 1.544 (1.567) data 0.000 (0.007) loss 1.2588 (1.0995) acc 68.7500 (72.8125) lr 1.7705e-03 eta 8:13:25 +epoch [13/50] batch [115/500] time 1.548 (1.566) data 0.000 (0.007) loss 1.5605 (1.0987) acc 62.5000 (72.8804) lr 1.7705e-03 eta 8:13:01 +epoch [13/50] batch [120/500] time 1.560 (1.566) data 0.000 (0.007) loss 1.3242 (1.1039) acc 75.0000 (72.8646) lr 1.7705e-03 eta 8:12:50 +epoch [13/50] batch [125/500] time 1.654 (1.567) data 0.001 (0.006) loss 0.9854 (1.1118) acc 75.0000 (72.6500) lr 1.7705e-03 eta 8:12:48 +epoch [13/50] batch [130/500] time 1.573 (1.566) data 0.001 (0.006) loss 1.2266 (1.1030) acc 68.7500 (72.7644) lr 1.7705e-03 eta 8:12:33 +epoch [13/50] batch [135/500] time 1.535 (1.565) data 0.001 (0.006) loss 0.8745 (1.0895) acc 81.2500 (73.0093) lr 1.7705e-03 eta 8:12:12 +epoch [13/50] batch [140/500] time 1.594 (1.566) data 0.000 (0.006) loss 1.2764 (1.0909) acc 65.6250 (72.9911) lr 1.7705e-03 eta 8:12:05 +epoch [13/50] batch [145/500] time 1.570 (1.565) data 0.000 (0.005) loss 1.4238 (1.0850) acc 59.3750 (73.0172) lr 1.7705e-03 eta 8:11:48 +epoch [13/50] batch [150/500] time 1.565 (1.565) data 0.000 (0.005) loss 1.3564 (1.0839) acc 62.5000 (72.9375) lr 1.7705e-03 eta 8:11:36 +epoch [13/50] batch [155/500] time 1.565 (1.565) data 0.000 (0.005) loss 0.8750 (1.0816) acc 68.7500 (72.9839) lr 1.7705e-03 eta 8:11:29 +epoch [13/50] batch [160/500] time 1.525 (1.564) data 0.000 (0.005) loss 0.8755 (1.0856) acc 78.1250 (72.9297) lr 1.7705e-03 eta 8:11:10 +epoch [13/50] batch [165/500] time 1.571 (1.564) data 0.000 (0.005) loss 0.8662 (1.0844) acc 81.2500 (72.8409) lr 1.7705e-03 eta 8:10:54 +epoch [13/50] batch [170/500] time 1.561 (1.563) data 0.001 (0.005) loss 0.7271 (1.0843) acc 75.0000 (72.7757) lr 1.7705e-03 eta 8:10:39 +epoch [13/50] batch [175/500] time 1.553 (1.563) data 0.000 (0.005) loss 0.5771 (1.0837) acc 78.1250 (72.7679) lr 1.7705e-03 eta 8:10:28 +epoch [13/50] batch [180/500] time 1.543 (1.563) data 0.000 (0.004) loss 1.4434 (1.0892) acc 59.3750 (72.6042) lr 1.7705e-03 eta 8:10:18 +epoch [13/50] batch [185/500] time 1.562 (1.563) data 0.001 (0.004) loss 1.0332 (1.0947) acc 71.8750 (72.4662) lr 1.7705e-03 eta 8:10:11 +epoch [13/50] batch [190/500] time 1.549 (1.563) data 0.000 (0.004) loss 1.5420 (1.0976) acc 62.5000 (72.5000) lr 1.7705e-03 eta 8:10:01 +epoch [13/50] batch [195/500] time 1.540 (1.563) data 0.000 (0.004) loss 1.1221 (1.0908) acc 75.0000 (72.6923) lr 1.7705e-03 eta 8:09:48 +epoch [13/50] batch [200/500] time 1.570 (1.563) data 0.000 (0.004) loss 0.9048 (1.0872) acc 75.0000 (72.7500) lr 1.7705e-03 eta 8:09:42 +epoch [13/50] batch [205/500] time 1.555 (1.563) data 0.000 (0.004) loss 1.0068 (1.0876) acc 81.2500 (72.8811) lr 1.7705e-03 eta 8:09:33 +epoch [13/50] batch [210/500] time 1.551 (1.562) data 0.000 (0.004) loss 1.1211 (1.0859) acc 75.0000 (72.9315) lr 1.7705e-03 eta 8:09:19 +epoch [13/50] batch [215/500] time 1.574 (1.562) data 0.000 (0.004) loss 1.6572 (1.0901) acc 65.6250 (72.8779) lr 1.7705e-03 eta 8:09:09 +epoch [13/50] batch [220/500] time 1.551 (1.562) data 0.000 (0.004) loss 1.1748 (1.0900) acc 75.0000 (72.8977) lr 1.7705e-03 eta 8:08:58 +epoch [13/50] batch [225/500] time 1.538 (1.562) data 0.000 (0.004) loss 1.2568 (1.0903) acc 68.7500 (72.8611) lr 1.7705e-03 eta 8:08:55 +epoch [13/50] batch [230/500] time 1.547 (1.562) data 0.001 (0.004) loss 0.7739 (1.0970) acc 78.1250 (72.7310) lr 1.7705e-03 eta 8:08:40 +epoch [13/50] batch [235/500] time 1.562 (1.562) data 0.000 (0.004) loss 1.1377 (1.0965) acc 62.5000 (72.6463) lr 1.7705e-03 eta 8:08:29 +epoch [13/50] batch [240/500] time 1.552 (1.562) data 0.001 (0.003) loss 1.1260 (1.0983) acc 68.7500 (72.5911) lr 1.7705e-03 eta 8:08:20 +epoch [13/50] batch [245/500] time 1.591 (1.562) data 0.000 (0.003) loss 1.4814 (1.0991) acc 56.2500 (72.6403) lr 1.7705e-03 eta 8:08:11 +epoch [13/50] batch [250/500] time 1.566 (1.562) data 0.001 (0.003) loss 1.2178 (1.0943) acc 62.5000 (72.6500) lr 1.7705e-03 eta 8:08:03 +epoch [13/50] batch [255/500] time 1.536 (1.562) data 0.000 (0.003) loss 1.7275 (1.0971) acc 56.2500 (72.5858) lr 1.7705e-03 eta 8:07:53 +epoch [13/50] batch [260/500] time 1.547 (1.562) data 0.000 (0.003) loss 1.2412 (1.0975) acc 75.0000 (72.6683) lr 1.7705e-03 eta 8:07:45 +epoch [13/50] batch [265/500] time 1.582 (1.562) data 0.000 (0.003) loss 1.0586 (1.0999) acc 65.6250 (72.5590) lr 1.7705e-03 eta 8:07:40 +epoch [13/50] batch [270/500] time 1.579 (1.562) data 0.000 (0.003) loss 1.1670 (1.0975) acc 78.1250 (72.6389) lr 1.7705e-03 eta 8:07:44 +epoch [13/50] batch [275/500] time 1.552 (1.562) data 0.001 (0.003) loss 0.5547 (1.1052) acc 84.3750 (72.4545) lr 1.7705e-03 eta 8:07:36 +epoch [13/50] batch [280/500] time 1.545 (1.562) data 0.000 (0.003) loss 1.5234 (1.1085) acc 62.5000 (72.4330) lr 1.7705e-03 eta 8:07:26 +epoch [13/50] batch [285/500] time 1.552 (1.562) data 0.001 (0.003) loss 0.7832 (1.1053) acc 71.8750 (72.4232) lr 1.7705e-03 eta 8:07:16 +epoch [13/50] batch [290/500] time 1.546 (1.562) data 0.000 (0.003) loss 1.1387 (1.1059) acc 71.8750 (72.4569) lr 1.7705e-03 eta 8:07:06 +epoch [13/50] batch [295/500] time 1.561 (1.562) data 0.000 (0.003) loss 1.4219 (1.1059) acc 62.5000 (72.4894) lr 1.7705e-03 eta 8:06:54 +epoch [13/50] batch [300/500] time 1.572 (1.562) data 0.001 (0.003) loss 1.7227 (1.1076) acc 50.0000 (72.4375) lr 1.7705e-03 eta 8:06:50 +epoch [13/50] batch [305/500] time 1.538 (1.562) data 0.000 (0.003) loss 1.3721 (1.1066) acc 68.7500 (72.4078) lr 1.7705e-03 eta 8:06:38 +epoch [13/50] batch [310/500] time 1.563 (1.562) data 0.001 (0.003) loss 1.2559 (1.1107) acc 65.6250 (72.4093) lr 1.7705e-03 eta 8:06:31 +epoch [13/50] batch [315/500] time 1.555 (1.562) data 0.000 (0.003) loss 1.4131 (1.1111) acc 68.7500 (72.4206) lr 1.7705e-03 eta 8:06:19 +epoch [13/50] batch [320/500] time 1.562 (1.562) data 0.000 (0.003) loss 1.1396 (1.1132) acc 71.8750 (72.3730) lr 1.7705e-03 eta 8:06:11 +epoch [13/50] batch [325/500] time 1.565 (1.562) data 0.001 (0.003) loss 1.3232 (1.1164) acc 59.3750 (72.3077) lr 1.7705e-03 eta 8:06:04 +epoch [13/50] batch [330/500] time 1.575 (1.562) data 0.000 (0.003) loss 1.1914 (1.1178) acc 68.7500 (72.2538) lr 1.7705e-03 eta 8:05:58 +epoch [13/50] batch [335/500] time 1.534 (1.562) data 0.000 (0.003) loss 1.2148 (1.1207) acc 71.8750 (72.1642) lr 1.7705e-03 eta 8:05:53 +epoch [13/50] batch [340/500] time 1.598 (1.562) data 0.001 (0.003) loss 1.0801 (1.1233) acc 75.0000 (72.1140) lr 1.7705e-03 eta 8:05:46 +epoch [13/50] batch [345/500] time 1.585 (1.562) data 0.000 (0.003) loss 1.4824 (1.1238) acc 68.7500 (72.1467) lr 1.7705e-03 eta 8:05:42 +epoch [13/50] batch [350/500] time 1.546 (1.562) data 0.001 (0.003) loss 0.8257 (1.1229) acc 78.1250 (72.1786) lr 1.7705e-03 eta 8:05:34 +epoch [13/50] batch [355/500] time 1.533 (1.562) data 0.000 (0.002) loss 1.5898 (1.1232) acc 71.8750 (72.1831) lr 1.7705e-03 eta 8:05:25 +epoch [13/50] batch [360/500] time 1.600 (1.562) data 0.000 (0.002) loss 0.9292 (1.1233) acc 78.1250 (72.1788) lr 1.7705e-03 eta 8:05:22 +epoch [13/50] batch [365/500] time 1.557 (1.562) data 0.000 (0.002) loss 0.8848 (1.1243) acc 81.2500 (72.1747) lr 1.7705e-03 eta 8:05:14 +epoch [13/50] batch [370/500] time 1.538 (1.563) data 0.001 (0.002) loss 1.3818 (1.1243) acc 68.7500 (72.1622) lr 1.7705e-03 eta 8:05:10 +epoch [13/50] batch [375/500] time 1.566 (1.563) data 0.001 (0.002) loss 1.2715 (1.1224) acc 71.8750 (72.2417) lr 1.7705e-03 eta 8:05:03 +epoch [13/50] batch [380/500] time 1.580 (1.563) data 0.000 (0.002) loss 1.6631 (1.1241) acc 65.6250 (72.2039) lr 1.7705e-03 eta 8:04:55 +epoch [13/50] batch [385/500] time 1.574 (1.563) data 0.000 (0.002) loss 0.8389 (1.1235) acc 78.1250 (72.1347) lr 1.7705e-03 eta 8:04:48 +epoch [13/50] batch [390/500] time 1.554 (1.563) data 0.000 (0.002) loss 1.0771 (1.1274) acc 71.8750 (72.0833) lr 1.7705e-03 eta 8:04:38 +epoch [13/50] batch [395/500] time 1.547 (1.562) data 0.000 (0.002) loss 0.7031 (1.1263) acc 78.1250 (72.1361) lr 1.7705e-03 eta 8:04:29 +epoch [13/50] batch [400/500] time 1.560 (1.562) data 0.000 (0.002) loss 1.1973 (1.1262) acc 65.6250 (72.1328) lr 1.7705e-03 eta 8:04:22 +epoch [13/50] batch [405/500] time 1.557 (1.562) data 0.000 (0.002) loss 0.8442 (1.1312) acc 71.8750 (72.0602) lr 1.7705e-03 eta 8:04:14 +epoch [13/50] batch [410/500] time 1.556 (1.562) data 0.000 (0.002) loss 1.3027 (1.1318) acc 68.7500 (72.0198) lr 1.7705e-03 eta 8:04:05 +epoch [13/50] batch [415/500] time 1.526 (1.563) data 0.000 (0.002) loss 1.3945 (1.1329) acc 62.5000 (72.0181) lr 1.7705e-03 eta 8:03:59 +epoch [13/50] batch [420/500] time 1.550 (1.563) data 0.000 (0.002) loss 0.9829 (1.1339) acc 75.0000 (72.0610) lr 1.7705e-03 eta 8:03:51 +epoch [13/50] batch [425/500] time 1.552 (1.562) data 0.000 (0.002) loss 1.5527 (1.1348) acc 62.5000 (72.0147) lr 1.7705e-03 eta 8:03:40 +epoch [13/50] batch [430/500] time 1.549 (1.562) data 0.000 (0.002) loss 1.1191 (1.1371) acc 78.1250 (72.0131) lr 1.7705e-03 eta 8:03:33 +epoch [13/50] batch [435/500] time 1.557 (1.562) data 0.000 (0.002) loss 0.7090 (1.1364) acc 75.0000 (71.9971) lr 1.7705e-03 eta 8:03:25 +epoch [13/50] batch [440/500] time 1.533 (1.562) data 0.000 (0.002) loss 1.9336 (1.1376) acc 59.3750 (71.9531) lr 1.7705e-03 eta 8:03:14 +epoch [13/50] batch [445/500] time 1.562 (1.562) data 0.000 (0.002) loss 1.2471 (1.1393) acc 68.7500 (71.9031) lr 1.7705e-03 eta 8:03:04 +epoch [13/50] batch [450/500] time 1.549 (1.562) data 0.001 (0.002) loss 1.2383 (1.1396) acc 68.7500 (71.9028) lr 1.7705e-03 eta 8:02:59 +epoch [13/50] batch [455/500] time 1.565 (1.562) data 0.000 (0.002) loss 1.0674 (1.1375) acc 71.8750 (71.9368) lr 1.7705e-03 eta 8:02:51 +epoch [13/50] batch [460/500] time 1.561 (1.562) data 0.002 (0.002) loss 1.1426 (1.1378) acc 68.7500 (71.9226) lr 1.7705e-03 eta 8:02:44 +epoch [13/50] batch [465/500] time 1.571 (1.562) data 0.000 (0.002) loss 1.3672 (1.1391) acc 71.8750 (71.9019) lr 1.7705e-03 eta 8:02:37 +epoch [13/50] batch [470/500] time 1.550 (1.562) data 0.002 (0.002) loss 0.9819 (1.1401) acc 71.8750 (71.8617) lr 1.7705e-03 eta 8:02:28 +epoch [13/50] batch [475/500] time 1.573 (1.562) data 0.000 (0.002) loss 1.0547 (1.1379) acc 71.8750 (71.8816) lr 1.7705e-03 eta 8:02:19 +epoch [13/50] batch [480/500] time 1.560 (1.562) data 0.000 (0.002) loss 1.3242 (1.1381) acc 68.7500 (71.8555) lr 1.7705e-03 eta 8:02:10 +epoch [13/50] batch [485/500] time 1.582 (1.562) data 0.001 (0.002) loss 1.1387 (1.1390) acc 71.8750 (71.8686) lr 1.7705e-03 eta 8:02:02 +epoch [13/50] batch [490/500] time 1.594 (1.562) data 0.000 (0.002) loss 1.0713 (1.1391) acc 78.1250 (71.8431) lr 1.7705e-03 eta 8:01:56 +epoch [13/50] batch [495/500] time 1.575 (1.562) data 0.000 (0.002) loss 0.8530 (1.1376) acc 71.8750 (71.8371) lr 1.7705e-03 eta 8:01:49 +epoch [13/50] batch [500/500] time 1.572 (1.562) data 0.000 (0.002) loss 1.2891 (1.1379) acc 65.6250 (71.8187) lr 1.7290e-03 eta 8:01:42 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,978 +* accuracy: 78.0% +* error: 22.0% +* macro_f1: 77.4% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar +epoch [14/50] batch [5/500] time 1.567 (1.662) data 0.000 (0.165) loss 0.7349 (1.1411) acc 84.3750 (78.1250) lr 1.7290e-03 eta 8:32:12 +epoch [14/50] batch [10/500] time 1.568 (1.613) data 0.000 (0.083) loss 0.7437 (1.0822) acc 81.2500 (76.2500) lr 1.7290e-03 eta 8:16:56 +epoch [14/50] batch [15/500] time 1.552 (1.591) data 0.001 (0.055) loss 0.7139 (1.0884) acc 84.3750 (74.3750) lr 1.7290e-03 eta 8:10:16 +epoch [14/50] batch [20/500] time 1.572 (1.591) data 0.001 (0.041) loss 1.0918 (1.0852) acc 68.7500 (73.5938) lr 1.7290e-03 eta 8:10:00 +epoch [14/50] batch [25/500] time 1.554 (1.584) data 0.000 (0.033) loss 1.6953 (1.0643) acc 65.6250 (74.1250) lr 1.7290e-03 eta 8:07:41 +epoch [14/50] batch [30/500] time 1.551 (1.578) data 0.000 (0.028) loss 1.1006 (1.0622) acc 75.0000 (73.8542) lr 1.7290e-03 eta 8:05:49 +epoch [14/50] batch [35/500] time 1.565 (1.577) data 0.000 (0.024) loss 1.3223 (1.0786) acc 53.1250 (72.3214) lr 1.7290e-03 eta 8:05:15 +epoch [14/50] batch [40/500] time 1.566 (1.573) data 0.000 (0.021) loss 0.3613 (1.0661) acc 93.7500 (73.1250) lr 1.7290e-03 eta 8:04:04 +epoch [14/50] batch [45/500] time 1.550 (1.572) data 0.000 (0.019) loss 1.1299 (1.0500) acc 75.0000 (73.6111) lr 1.7290e-03 eta 8:03:23 +epoch [14/50] batch [50/500] time 1.552 (1.571) data 0.001 (0.017) loss 1.0117 (1.0533) acc 68.7500 (73.2500) lr 1.7290e-03 eta 8:03:00 +epoch [14/50] batch [55/500] time 1.567 (1.570) data 0.000 (0.015) loss 1.3643 (1.0546) acc 59.3750 (72.9545) lr 1.7290e-03 eta 8:02:46 +epoch [14/50] batch [60/500] time 1.551 (1.570) data 0.001 (0.014) loss 0.6152 (1.0450) acc 78.1250 (73.3854) lr 1.7290e-03 eta 8:02:27 +epoch [14/50] batch [65/500] time 1.537 (1.569) data 0.001 (0.013) loss 0.6206 (1.0586) acc 78.1250 (72.9808) lr 1.7290e-03 eta 8:02:07 +epoch [14/50] batch [70/500] time 1.557 (1.569) data 0.000 (0.012) loss 1.1885 (1.0617) acc 62.5000 (72.9911) lr 1.7290e-03 eta 8:01:51 +epoch [14/50] batch [75/500] time 1.545 (1.567) data 0.001 (0.011) loss 1.3408 (1.0715) acc 68.7500 (72.6667) lr 1.7290e-03 eta 8:01:18 +epoch [14/50] batch [80/500] time 1.571 (1.568) data 0.000 (0.011) loss 1.2910 (1.0736) acc 68.7500 (72.5781) lr 1.7290e-03 eta 8:01:16 +epoch [14/50] batch [85/500] time 1.575 (1.568) data 0.000 (0.010) loss 1.7168 (1.0776) acc 53.1250 (72.5368) lr 1.7290e-03 eta 8:01:07 +epoch [14/50] batch [90/500] time 1.601 (1.568) data 0.001 (0.010) loss 1.4199 (1.0813) acc 68.7500 (72.3958) lr 1.7290e-03 eta 8:01:01 +epoch [14/50] batch [95/500] time 1.537 (1.567) data 0.001 (0.009) loss 1.3232 (1.0883) acc 62.5000 (72.2368) lr 1.7290e-03 eta 8:00:36 +epoch [14/50] batch [100/500] time 1.558 (1.567) data 0.000 (0.009) loss 0.8955 (1.0882) acc 68.7500 (72.0625) lr 1.7290e-03 eta 8:00:27 +epoch [14/50] batch [105/500] time 1.548 (1.566) data 0.000 (0.008) loss 1.4023 (1.1037) acc 62.5000 (71.8750) lr 1.7290e-03 eta 8:00:14 +epoch [14/50] batch [110/500] time 1.546 (1.566) data 0.000 (0.008) loss 0.9868 (1.0950) acc 71.8750 (72.1591) lr 1.7290e-03 eta 7:59:55 +epoch [14/50] batch [115/500] time 1.579 (1.566) data 0.000 (0.008) loss 1.2939 (1.0842) acc 75.0000 (72.4728) lr 1.7290e-03 eta 7:59:47 +epoch [14/50] batch [120/500] time 1.556 (1.567) data 0.001 (0.007) loss 1.2998 (1.0829) acc 78.1250 (72.6562) lr 1.7290e-03 eta 7:59:55 +epoch [14/50] batch [125/500] time 1.562 (1.566) data 0.001 (0.007) loss 1.5791 (1.0852) acc 65.6250 (72.6500) lr 1.7290e-03 eta 7:59:38 +epoch [14/50] batch [130/500] time 1.533 (1.566) data 0.000 (0.007) loss 1.1338 (1.0946) acc 68.7500 (72.4519) lr 1.7290e-03 eta 7:59:24 +epoch [14/50] batch [135/500] time 1.517 (1.565) data 0.000 (0.007) loss 1.4326 (1.0959) acc 62.5000 (72.2454) lr 1.7290e-03 eta 7:59:03 +epoch [14/50] batch [140/500] time 1.551 (1.565) data 0.000 (0.006) loss 0.6421 (1.0882) acc 87.5000 (72.4777) lr 1.7290e-03 eta 7:58:53 +epoch [14/50] batch [145/500] time 1.571 (1.565) data 0.000 (0.006) loss 1.2783 (1.0920) acc 65.6250 (72.3060) lr 1.7290e-03 eta 7:58:41 +epoch [14/50] batch [150/500] time 1.569 (1.564) data 0.000 (0.006) loss 1.3711 (1.1045) acc 75.0000 (72.2083) lr 1.7290e-03 eta 7:58:27 +epoch [14/50] batch [155/500] time 1.594 (1.564) data 0.000 (0.006) loss 0.8457 (1.1106) acc 75.0000 (72.0766) lr 1.7290e-03 eta 7:58:15 +epoch [14/50] batch [160/500] time 1.661 (1.564) data 0.000 (0.006) loss 0.9048 (1.1085) acc 75.0000 (72.1289) lr 1.7290e-03 eta 7:58:12 +epoch [14/50] batch [165/500] time 1.542 (1.564) data 0.000 (0.005) loss 0.9966 (1.1102) acc 78.1250 (71.9697) lr 1.7290e-03 eta 7:57:54 +epoch [14/50] batch [170/500] time 1.562 (1.564) data 0.000 (0.005) loss 0.8462 (1.1124) acc 71.8750 (71.9853) lr 1.7290e-03 eta 7:57:40 +epoch [14/50] batch [175/500] time 1.572 (1.564) data 0.000 (0.005) loss 1.1055 (1.1122) acc 65.6250 (71.9821) lr 1.7290e-03 eta 7:57:33 +epoch [14/50] batch [180/500] time 1.569 (1.563) data 0.000 (0.005) loss 0.9175 (1.1138) acc 71.8750 (71.8403) lr 1.7290e-03 eta 7:57:22 +epoch [14/50] batch [185/500] time 1.567 (1.564) data 0.000 (0.005) loss 1.2969 (1.1105) acc 71.8750 (71.9932) lr 1.7290e-03 eta 7:57:19 +epoch [14/50] batch [190/500] time 1.548 (1.564) data 0.000 (0.005) loss 0.9512 (1.1118) acc 81.2500 (72.0888) lr 1.7290e-03 eta 7:57:09 +epoch [14/50] batch [195/500] time 1.592 (1.564) data 0.000 (0.005) loss 1.0791 (1.1154) acc 71.8750 (72.0032) lr 1.7290e-03 eta 7:57:06 +epoch [14/50] batch [200/500] time 1.569 (1.564) data 0.000 (0.005) loss 1.2520 (1.1123) acc 65.6250 (72.1094) lr 1.7290e-03 eta 7:56:57 +epoch [14/50] batch [205/500] time 1.576 (1.564) data 0.000 (0.004) loss 0.6758 (1.1105) acc 81.2500 (72.1951) lr 1.7290e-03 eta 7:56:55 +epoch [14/50] batch [210/500] time 1.563 (1.564) data 0.000 (0.004) loss 1.2646 (1.1112) acc 68.7500 (72.2024) lr 1.7290e-03 eta 7:56:46 +epoch [14/50] batch [215/500] time 1.551 (1.564) data 0.001 (0.004) loss 0.8462 (1.1122) acc 87.5000 (72.2965) lr 1.7290e-03 eta 7:56:37 +epoch [14/50] batch [220/500] time 1.571 (1.564) data 0.000 (0.004) loss 0.5454 (1.1087) acc 84.3750 (72.4148) lr 1.7290e-03 eta 7:56:27 +epoch [14/50] batch [225/500] time 1.568 (1.564) data 0.000 (0.004) loss 1.2998 (1.1045) acc 56.2500 (72.4306) lr 1.7290e-03 eta 7:56:21 +epoch [14/50] batch [230/500] time 1.574 (1.564) data 0.000 (0.004) loss 1.0830 (1.1076) acc 71.8750 (72.3370) lr 1.7290e-03 eta 7:56:19 +epoch [14/50] batch [235/500] time 1.536 (1.564) data 0.001 (0.004) loss 1.8691 (1.1110) acc 62.5000 (72.3271) lr 1.7290e-03 eta 7:56:10 +epoch [14/50] batch [240/500] time 1.556 (1.564) data 0.000 (0.004) loss 1.7012 (1.1167) acc 59.3750 (72.2656) lr 1.7290e-03 eta 7:56:02 +epoch [14/50] batch [245/500] time 1.564 (1.564) data 0.000 (0.004) loss 1.6855 (1.1179) acc 50.0000 (72.2577) lr 1.7290e-03 eta 7:55:51 +epoch [14/50] batch [250/500] time 1.556 (1.564) data 0.000 (0.004) loss 1.3047 (1.1157) acc 62.5000 (72.3000) lr 1.7290e-03 eta 7:55:39 +epoch [14/50] batch [255/500] time 1.535 (1.563) data 0.000 (0.004) loss 1.1104 (1.1152) acc 71.8750 (72.3162) lr 1.7290e-03 eta 7:55:25 +epoch [14/50] batch [260/500] time 1.526 (1.564) data 0.000 (0.004) loss 1.1084 (1.1144) acc 81.2500 (72.3558) lr 1.7290e-03 eta 7:55:19 +epoch [14/50] batch [265/500] time 1.536 (1.563) data 0.000 (0.004) loss 1.1719 (1.1151) acc 59.3750 (72.2642) lr 1.7290e-03 eta 7:55:06 +epoch [14/50] batch [270/500] time 1.551 (1.563) data 0.000 (0.003) loss 0.7646 (1.1139) acc 81.2500 (72.3380) lr 1.7290e-03 eta 7:54:49 +epoch [14/50] batch [275/500] time 1.562 (1.562) data 0.000 (0.003) loss 0.6343 (1.1177) acc 87.5000 (72.2727) lr 1.7290e-03 eta 7:54:35 +epoch [14/50] batch [280/500] time 1.557 (1.562) data 0.000 (0.003) loss 0.8662 (1.1155) acc 84.3750 (72.3549) lr 1.7290e-03 eta 7:54:27 +epoch [14/50] batch [285/500] time 1.572 (1.562) data 0.000 (0.003) loss 1.1670 (1.1137) acc 75.0000 (72.4232) lr 1.7290e-03 eta 7:54:19 +epoch [14/50] batch [290/500] time 1.561 (1.562) data 0.000 (0.003) loss 1.4121 (1.1131) acc 59.3750 (72.4138) lr 1.7290e-03 eta 7:54:04 +epoch [14/50] batch [295/500] time 1.554 (1.562) data 0.000 (0.003) loss 1.0459 (1.1105) acc 75.0000 (72.4470) lr 1.7290e-03 eta 7:53:58 +epoch [14/50] batch [300/500] time 1.568 (1.562) data 0.001 (0.003) loss 1.6309 (1.1089) acc 75.0000 (72.4792) lr 1.7290e-03 eta 7:53:48 +epoch [14/50] batch [305/500] time 1.580 (1.562) data 0.001 (0.003) loss 0.5347 (1.1058) acc 78.1250 (72.5307) lr 1.7290e-03 eta 7:53:47 +epoch [14/50] batch [310/500] time 1.567 (1.562) data 0.000 (0.003) loss 1.0732 (1.1056) acc 78.1250 (72.6008) lr 1.7290e-03 eta 7:53:37 +epoch [14/50] batch [315/500] time 1.593 (1.562) data 0.001 (0.003) loss 0.5425 (1.1020) acc 81.2500 (72.6587) lr 1.7290e-03 eta 7:53:27 +epoch [14/50] batch [320/500] time 1.558 (1.562) data 0.000 (0.003) loss 1.1543 (1.1060) acc 68.7500 (72.5879) lr 1.7290e-03 eta 7:53:16 +epoch [14/50] batch [325/500] time 1.570 (1.562) data 0.000 (0.003) loss 2.1387 (1.1138) acc 68.7500 (72.4808) lr 1.7290e-03 eta 7:53:08 +epoch [14/50] batch [330/500] time 1.552 (1.562) data 0.000 (0.003) loss 0.6982 (1.1122) acc 81.2500 (72.4716) lr 1.7290e-03 eta 7:52:59 +epoch [14/50] batch [335/500] time 1.560 (1.562) data 0.000 (0.003) loss 1.5850 (1.1134) acc 62.5000 (72.4067) lr 1.7290e-03 eta 7:52:51 +epoch [14/50] batch [340/500] time 1.547 (1.562) data 0.000 (0.003) loss 0.6851 (1.1129) acc 90.6250 (72.4265) lr 1.7290e-03 eta 7:52:40 +epoch [14/50] batch [345/500] time 1.532 (1.561) data 0.000 (0.003) loss 1.1143 (1.1161) acc 75.0000 (72.4004) lr 1.7290e-03 eta 7:52:28 +epoch [14/50] batch [350/500] time 1.554 (1.561) data 0.000 (0.003) loss 0.9282 (1.1156) acc 65.6250 (72.4196) lr 1.7290e-03 eta 7:52:16 +epoch [14/50] batch [355/500] time 1.570 (1.561) data 0.000 (0.003) loss 1.0547 (1.1186) acc 71.8750 (72.3592) lr 1.7290e-03 eta 7:52:08 +epoch [14/50] batch [360/500] time 1.563 (1.561) data 0.000 (0.003) loss 1.0771 (1.1186) acc 68.7500 (72.2917) lr 1.7290e-03 eta 7:52:04 +epoch [14/50] batch [365/500] time 1.580 (1.561) data 0.000 (0.003) loss 0.7915 (1.1195) acc 81.2500 (72.3288) lr 1.7290e-03 eta 7:51:57 +epoch [14/50] batch [370/500] time 1.544 (1.562) data 0.000 (0.003) loss 1.0977 (1.1222) acc 68.7500 (72.2213) lr 1.7290e-03 eta 7:51:50 +epoch [14/50] batch [375/500] time 1.544 (1.561) data 0.000 (0.003) loss 1.3711 (1.1248) acc 78.1250 (72.1500) lr 1.7290e-03 eta 7:51:40 +epoch [14/50] batch [380/500] time 1.560 (1.561) data 0.001 (0.003) loss 0.9546 (1.1230) acc 75.0000 (72.1628) lr 1.7290e-03 eta 7:51:30 +epoch [14/50] batch [385/500] time 1.561 (1.561) data 0.000 (0.003) loss 0.7144 (1.1239) acc 87.5000 (72.1834) lr 1.7290e-03 eta 7:51:20 +epoch [14/50] batch [390/500] time 1.566 (1.561) data 0.001 (0.003) loss 1.0850 (1.1235) acc 71.8750 (72.1715) lr 1.7290e-03 eta 7:51:13 +epoch [14/50] batch [395/500] time 1.559 (1.561) data 0.000 (0.002) loss 0.5503 (1.1208) acc 87.5000 (72.2706) lr 1.7290e-03 eta 7:51:05 +epoch [14/50] batch [400/500] time 1.559 (1.561) data 0.000 (0.002) loss 0.8125 (1.1209) acc 75.0000 (72.2500) lr 1.7290e-03 eta 7:50:57 +epoch [14/50] batch [405/500] time 1.567 (1.562) data 0.000 (0.002) loss 1.4502 (1.1231) acc 65.6250 (72.2068) lr 1.7290e-03 eta 7:50:56 +epoch [14/50] batch [410/500] time 1.555 (1.561) data 0.000 (0.002) loss 1.4658 (1.1263) acc 56.2500 (72.1341) lr 1.7290e-03 eta 7:50:47 +epoch [14/50] batch [415/500] time 1.556 (1.561) data 0.000 (0.002) loss 0.8579 (1.1257) acc 81.2500 (72.1687) lr 1.7290e-03 eta 7:50:38 +epoch [14/50] batch [420/500] time 1.556 (1.561) data 0.000 (0.002) loss 0.6748 (1.1257) acc 81.2500 (72.1652) lr 1.7290e-03 eta 7:50:30 +epoch [14/50] batch [425/500] time 1.567 (1.561) data 0.000 (0.002) loss 1.7080 (1.1265) acc 68.7500 (72.1691) lr 1.7290e-03 eta 7:50:21 +epoch [14/50] batch [430/500] time 1.569 (1.561) data 0.000 (0.002) loss 0.7896 (1.1273) acc 78.1250 (72.2020) lr 1.7290e-03 eta 7:50:16 +epoch [14/50] batch [435/500] time 1.563 (1.561) data 0.000 (0.002) loss 1.2305 (1.1269) acc 65.6250 (72.1911) lr 1.7290e-03 eta 7:50:07 +epoch [14/50] batch [440/500] time 1.537 (1.561) data 0.000 (0.002) loss 0.9150 (1.1276) acc 78.1250 (72.1804) lr 1.7290e-03 eta 7:50:00 +epoch [14/50] batch [445/500] time 1.604 (1.562) data 0.000 (0.002) loss 1.2773 (1.1301) acc 71.8750 (72.1489) lr 1.7290e-03 eta 7:49:53 +epoch [14/50] batch [450/500] time 1.565 (1.562) data 0.000 (0.002) loss 1.7402 (1.1326) acc 53.1250 (72.0764) lr 1.7290e-03 eta 7:49:47 +epoch [14/50] batch [455/500] time 1.555 (1.562) data 0.000 (0.002) loss 1.7109 (1.1326) acc 56.2500 (72.0398) lr 1.7290e-03 eta 7:49:38 +epoch [14/50] batch [460/500] time 1.562 (1.562) data 0.000 (0.002) loss 1.2100 (1.1342) acc 75.0000 (72.0380) lr 1.7290e-03 eta 7:49:30 +epoch [14/50] batch [465/500] time 1.558 (1.562) data 0.000 (0.002) loss 1.8213 (1.1362) acc 56.2500 (72.0094) lr 1.7290e-03 eta 7:49:22 +epoch [14/50] batch [470/500] time 1.570 (1.562) data 0.000 (0.002) loss 1.8418 (1.1380) acc 65.6250 (71.9681) lr 1.7290e-03 eta 7:49:16 +epoch [14/50] batch [475/500] time 1.554 (1.562) data 0.001 (0.002) loss 0.8364 (1.1391) acc 78.1250 (71.9211) lr 1.7290e-03 eta 7:49:07 +epoch [14/50] batch [480/500] time 1.553 (1.562) data 0.001 (0.002) loss 1.0469 (1.1390) acc 71.8750 (71.8750) lr 1.7290e-03 eta 7:48:58 +epoch [14/50] batch [485/500] time 1.527 (1.561) data 0.001 (0.002) loss 1.1172 (1.1396) acc 71.8750 (71.8621) lr 1.7290e-03 eta 7:48:48 +epoch [14/50] batch [490/500] time 1.550 (1.561) data 0.000 (0.002) loss 0.5771 (1.1390) acc 84.3750 (71.8814) lr 1.7290e-03 eta 7:48:40 +epoch [14/50] batch [495/500] time 1.540 (1.561) data 0.000 (0.002) loss 1.2256 (1.1373) acc 65.6250 (71.8750) lr 1.7290e-03 eta 7:48:31 +epoch [14/50] batch [500/500] time 1.562 (1.561) data 0.000 (0.002) loss 1.2100 (1.1384) acc 68.7500 (71.8563) lr 1.6845e-03 eta 7:48:21 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,917 +* accuracy: 77.8% +* error: 22.2% +* macro_f1: 77.3% +epoch [15/50] batch [5/500] time 1.529 (1.674) data 0.000 (0.174) loss 1.6348 (1.2069) acc 59.3750 (70.0000) lr 1.6845e-03 eta 8:22:01 +epoch [15/50] batch [10/500] time 1.536 (1.613) data 0.000 (0.087) loss 0.6860 (1.1134) acc 81.2500 (70.9375) lr 1.6845e-03 eta 8:03:32 +epoch [15/50] batch [15/500] time 1.564 (1.598) data 0.001 (0.058) loss 1.1777 (1.0796) acc 75.0000 (72.0833) lr 1.6845e-03 eta 7:58:58 +epoch [15/50] batch [20/500] time 1.539 (1.586) data 0.001 (0.044) loss 1.1162 (1.0834) acc 75.0000 (72.3438) lr 1.6845e-03 eta 7:55:16 +epoch [15/50] batch [25/500] time 1.541 (1.580) data 0.000 (0.035) loss 1.4609 (1.0973) acc 62.5000 (71.1250) lr 1.6845e-03 eta 7:53:19 +epoch [15/50] batch [30/500] time 1.574 (1.579) data 0.000 (0.029) loss 1.0068 (1.0550) acc 75.0000 (72.1875) lr 1.6845e-03 eta 7:52:49 +epoch [15/50] batch [35/500] time 1.564 (1.576) data 0.000 (0.025) loss 1.2783 (1.0491) acc 71.8750 (72.6786) lr 1.6845e-03 eta 7:51:49 +epoch [15/50] batch [40/500] time 1.567 (1.578) data 0.000 (0.022) loss 1.2725 (1.0589) acc 62.5000 (72.3438) lr 1.6845e-03 eta 7:52:12 +epoch [15/50] batch [45/500] time 1.566 (1.576) data 0.000 (0.020) loss 1.3291 (1.0554) acc 65.6250 (72.5000) lr 1.6845e-03 eta 7:51:32 +epoch [15/50] batch [50/500] time 1.547 (1.573) data 0.000 (0.018) loss 0.7280 (1.0515) acc 78.1250 (72.6250) lr 1.6845e-03 eta 7:50:36 +epoch [15/50] batch [55/500] time 1.532 (1.572) data 0.000 (0.016) loss 0.9946 (1.0502) acc 68.7500 (72.3295) lr 1.6845e-03 eta 7:50:09 +epoch [15/50] batch [60/500] time 1.540 (1.570) data 0.000 (0.015) loss 0.7212 (1.0447) acc 78.1250 (72.5521) lr 1.6845e-03 eta 7:49:31 +epoch [15/50] batch [65/500] time 1.541 (1.568) data 0.000 (0.014) loss 0.9312 (1.0487) acc 81.2500 (72.7885) lr 1.6845e-03 eta 7:48:49 +epoch [15/50] batch [70/500] time 1.569 (1.568) data 0.000 (0.013) loss 0.9375 (1.0565) acc 75.0000 (72.7679) lr 1.6845e-03 eta 7:48:36 +epoch [15/50] batch [75/500] time 1.541 (1.567) data 0.001 (0.012) loss 1.3750 (1.0538) acc 65.6250 (72.9167) lr 1.6845e-03 eta 7:48:01 +epoch [15/50] batch [80/500] time 1.625 (1.566) data 0.000 (0.011) loss 1.3027 (1.0658) acc 68.7500 (72.6953) lr 1.6845e-03 eta 7:47:48 +epoch [15/50] batch [85/500] time 1.576 (1.565) data 0.000 (0.011) loss 1.0635 (1.0664) acc 68.7500 (72.7206) lr 1.6845e-03 eta 7:47:25 +epoch [15/50] batch [90/500] time 1.563 (1.564) data 0.000 (0.010) loss 0.7568 (1.0632) acc 78.1250 (72.7778) lr 1.6845e-03 eta 7:46:57 +epoch [15/50] batch [95/500] time 1.539 (1.564) data 0.000 (0.010) loss 1.0439 (1.0534) acc 75.0000 (72.9605) lr 1.6845e-03 eta 7:46:39 +epoch [15/50] batch [100/500] time 1.548 (1.563) data 0.000 (0.009) loss 1.0469 (1.0661) acc 71.8750 (72.6562) lr 1.6845e-03 eta 7:46:21 +epoch [15/50] batch [105/500] time 1.524 (1.562) data 0.000 (0.009) loss 0.7856 (1.0722) acc 81.2500 (72.6190) lr 1.6845e-03 eta 7:45:46 +epoch [15/50] batch [110/500] time 1.562 (1.562) data 0.001 (0.008) loss 0.9976 (1.0772) acc 84.3750 (72.6705) lr 1.6845e-03 eta 7:45:46 +epoch [15/50] batch [115/500] time 1.571 (1.562) data 0.000 (0.008) loss 0.7456 (1.0753) acc 71.8750 (72.6902) lr 1.6845e-03 eta 7:45:35 +epoch [15/50] batch [120/500] time 1.568 (1.562) data 0.000 (0.008) loss 0.7759 (1.0767) acc 81.2500 (72.7604) lr 1.6845e-03 eta 7:45:24 +epoch [15/50] batch [125/500] time 1.548 (1.562) data 0.000 (0.007) loss 0.9468 (1.0753) acc 81.2500 (72.8250) lr 1.6845e-03 eta 7:45:13 +epoch [15/50] batch [130/500] time 1.569 (1.561) data 0.000 (0.007) loss 0.9575 (1.0758) acc 75.0000 (72.8846) lr 1.6845e-03 eta 7:45:02 +epoch [15/50] batch [135/500] time 1.564 (1.562) data 0.000 (0.007) loss 1.5117 (1.0814) acc 71.8750 (72.8472) lr 1.6845e-03 eta 7:44:56 +epoch [15/50] batch [140/500] time 1.540 (1.561) data 0.001 (0.007) loss 1.9619 (1.0895) acc 46.8750 (72.6116) lr 1.6845e-03 eta 7:44:44 +epoch [15/50] batch [145/500] time 1.549 (1.561) data 0.001 (0.006) loss 0.8462 (1.0905) acc 68.7500 (72.3922) lr 1.6845e-03 eta 7:44:30 +epoch [15/50] batch [150/500] time 1.579 (1.561) data 0.000 (0.006) loss 1.2266 (1.0859) acc 65.6250 (72.4375) lr 1.6845e-03 eta 7:44:21 +epoch [15/50] batch [155/500] time 1.551 (1.561) data 0.000 (0.006) loss 1.1289 (1.0856) acc 62.5000 (72.3589) lr 1.6845e-03 eta 7:44:09 +epoch [15/50] batch [160/500] time 1.560 (1.561) data 0.001 (0.006) loss 1.0820 (1.0857) acc 71.8750 (72.3047) lr 1.6845e-03 eta 7:44:00 +epoch [15/50] batch [165/500] time 1.555 (1.560) data 0.000 (0.006) loss 1.2451 (1.0835) acc 65.6250 (72.3485) lr 1.6845e-03 eta 7:43:49 +epoch [15/50] batch [170/500] time 1.551 (1.560) data 0.001 (0.006) loss 1.1064 (1.0901) acc 68.7500 (72.1691) lr 1.6845e-03 eta 7:43:40 +epoch [15/50] batch [175/500] time 1.593 (1.560) data 0.000 (0.005) loss 0.5601 (1.0815) acc 81.2500 (72.3929) lr 1.6845e-03 eta 7:43:35 +epoch [15/50] batch [180/500] time 1.533 (1.560) data 0.000 (0.005) loss 0.9272 (1.0775) acc 71.8750 (72.5347) lr 1.6845e-03 eta 7:43:27 +epoch [15/50] batch [185/500] time 1.536 (1.560) data 0.000 (0.005) loss 1.0039 (1.0770) acc 81.2500 (72.5507) lr 1.6845e-03 eta 7:43:19 +epoch [15/50] batch [190/500] time 1.541 (1.560) data 0.000 (0.005) loss 1.2686 (1.0786) acc 75.0000 (72.4836) lr 1.6845e-03 eta 7:43:07 +epoch [15/50] batch [195/500] time 1.525 (1.560) data 0.001 (0.005) loss 1.7139 (1.0883) acc 59.3750 (72.3237) lr 1.6845e-03 eta 7:42:52 +epoch [15/50] batch [200/500] time 1.571 (1.560) data 0.001 (0.005) loss 0.9814 (1.0857) acc 71.8750 (72.3438) lr 1.6845e-03 eta 7:42:46 +epoch [15/50] batch [205/500] time 1.585 (1.560) data 0.000 (0.005) loss 1.5420 (1.0843) acc 59.3750 (72.4085) lr 1.6845e-03 eta 7:42:44 +epoch [15/50] batch [210/500] time 1.557 (1.560) data 0.000 (0.005) loss 1.4268 (1.0886) acc 62.5000 (72.2470) lr 1.6845e-03 eta 7:42:37 +epoch [15/50] batch [215/500] time 1.545 (1.560) data 0.001 (0.004) loss 1.0811 (1.0839) acc 68.7500 (72.4128) lr 1.6845e-03 eta 7:42:25 +epoch [15/50] batch [220/500] time 1.552 (1.560) data 0.001 (0.004) loss 0.9849 (1.0912) acc 75.0000 (72.3438) lr 1.6845e-03 eta 7:42:13 +epoch [15/50] batch [225/500] time 1.548 (1.560) data 0.001 (0.004) loss 1.2510 (1.0957) acc 62.5000 (72.2500) lr 1.6845e-03 eta 7:42:11 +epoch [15/50] batch [230/500] time 1.536 (1.560) data 0.001 (0.004) loss 1.2920 (1.0973) acc 75.0000 (72.2826) lr 1.6845e-03 eta 7:41:59 +epoch [15/50] batch [235/500] time 1.546 (1.560) data 0.001 (0.004) loss 0.7256 (1.0927) acc 81.2500 (72.3803) lr 1.6845e-03 eta 7:41:45 +epoch [15/50] batch [240/500] time 1.539 (1.559) data 0.000 (0.004) loss 0.8672 (1.0924) acc 71.8750 (72.3438) lr 1.6845e-03 eta 7:41:35 +epoch [15/50] batch [245/500] time 1.555 (1.559) data 0.000 (0.004) loss 1.5586 (1.0928) acc 65.6250 (72.3597) lr 1.6845e-03 eta 7:41:25 +epoch [15/50] batch [250/500] time 1.550 (1.559) data 0.000 (0.004) loss 1.5684 (1.0961) acc 62.5000 (72.2875) lr 1.6845e-03 eta 7:41:11 +epoch [15/50] batch [255/500] time 1.555 (1.559) data 0.000 (0.004) loss 1.1387 (1.0960) acc 81.2500 (72.3407) lr 1.6845e-03 eta 7:41:02 +epoch [15/50] batch [260/500] time 1.538 (1.559) data 0.000 (0.004) loss 1.1865 (1.0961) acc 71.8750 (72.2957) lr 1.6845e-03 eta 7:40:52 +epoch [15/50] batch [265/500] time 1.572 (1.559) data 0.000 (0.004) loss 0.6162 (1.0939) acc 78.1250 (72.2995) lr 1.6845e-03 eta 7:40:46 +epoch [15/50] batch [270/500] time 1.571 (1.559) data 0.000 (0.004) loss 1.1807 (1.0949) acc 78.1250 (72.2801) lr 1.6845e-03 eta 7:40:36 +epoch [15/50] batch [275/500] time 1.563 (1.559) data 0.000 (0.004) loss 1.1064 (1.0955) acc 65.6250 (72.2955) lr 1.6845e-03 eta 7:40:29 +epoch [15/50] batch [280/500] time 1.573 (1.559) data 0.000 (0.004) loss 0.9585 (1.0933) acc 81.2500 (72.3661) lr 1.6845e-03 eta 7:40:22 +epoch [15/50] batch [285/500] time 1.567 (1.559) data 0.000 (0.003) loss 1.4072 (1.0948) acc 59.3750 (72.3575) lr 1.6845e-03 eta 7:40:14 +epoch [15/50] batch [290/500] time 1.554 (1.559) data 0.000 (0.003) loss 1.3887 (1.0985) acc 65.6250 (72.3599) lr 1.6845e-03 eta 7:40:05 +epoch [15/50] batch [295/500] time 1.573 (1.559) data 0.000 (0.003) loss 1.2656 (1.0993) acc 78.1250 (72.3411) lr 1.6845e-03 eta 7:39:56 +epoch [15/50] batch [300/500] time 1.577 (1.559) data 0.001 (0.003) loss 0.8843 (1.0976) acc 81.2500 (72.3542) lr 1.6845e-03 eta 7:39:52 +epoch [15/50] batch [305/500] time 1.560 (1.559) data 0.000 (0.003) loss 0.5117 (1.0934) acc 84.3750 (72.4180) lr 1.6845e-03 eta 7:39:46 +epoch [15/50] batch [310/500] time 1.561 (1.559) data 0.000 (0.003) loss 0.9146 (1.0990) acc 84.3750 (72.3690) lr 1.6845e-03 eta 7:39:39 +epoch [15/50] batch [315/500] time 1.560 (1.559) data 0.000 (0.003) loss 1.4961 (1.0997) acc 65.6250 (72.3115) lr 1.6845e-03 eta 7:39:34 +epoch [15/50] batch [320/500] time 1.558 (1.559) data 0.000 (0.003) loss 1.1289 (1.1006) acc 71.8750 (72.2754) lr 1.6845e-03 eta 7:39:26 +epoch [15/50] batch [325/500] time 1.534 (1.559) data 0.000 (0.003) loss 1.0117 (1.0999) acc 68.7500 (72.3462) lr 1.6845e-03 eta 7:39:22 +epoch [15/50] batch [330/500] time 1.552 (1.559) data 0.000 (0.003) loss 0.9619 (1.0986) acc 78.1250 (72.3580) lr 1.6845e-03 eta 7:39:14 +epoch [15/50] batch [335/500] time 1.559 (1.559) data 0.000 (0.003) loss 1.1523 (1.0981) acc 68.7500 (72.3321) lr 1.6845e-03 eta 7:39:06 +epoch [15/50] batch [340/500] time 1.544 (1.559) data 0.000 (0.003) loss 0.8740 (1.0978) acc 81.2500 (72.3162) lr 1.6845e-03 eta 7:38:57 +epoch [15/50] batch [345/500] time 1.560 (1.559) data 0.001 (0.003) loss 1.1445 (1.0985) acc 71.8750 (72.3098) lr 1.6845e-03 eta 7:38:49 +epoch [15/50] batch [350/500] time 1.573 (1.559) data 0.000 (0.003) loss 0.8530 (1.0972) acc 81.2500 (72.3214) lr 1.6845e-03 eta 7:38:43 +epoch [15/50] batch [355/500] time 1.561 (1.559) data 0.000 (0.003) loss 1.2207 (1.0970) acc 75.0000 (72.3239) lr 1.6845e-03 eta 7:38:34 +epoch [15/50] batch [360/500] time 1.561 (1.559) data 0.000 (0.003) loss 1.0391 (1.1004) acc 71.8750 (72.2656) lr 1.6845e-03 eta 7:38:26 +epoch [15/50] batch [365/500] time 1.579 (1.559) data 0.000 (0.003) loss 0.9058 (1.1028) acc 65.6250 (72.1832) lr 1.6845e-03 eta 7:38:18 +epoch [15/50] batch [370/500] time 1.582 (1.560) data 0.000 (0.003) loss 1.5664 (1.1023) acc 59.3750 (72.1706) lr 1.6845e-03 eta 7:38:18 +epoch [15/50] batch [375/500] time 1.567 (1.560) data 0.000 (0.003) loss 0.6729 (1.1006) acc 78.1250 (72.2250) lr 1.6845e-03 eta 7:38:10 +epoch [15/50] batch [380/500] time 1.561 (1.560) data 0.000 (0.003) loss 1.2178 (1.1002) acc 65.6250 (72.2039) lr 1.6845e-03 eta 7:38:02 +epoch [15/50] batch [385/500] time 1.557 (1.560) data 0.001 (0.003) loss 1.4326 (1.1024) acc 65.6250 (72.1916) lr 1.6845e-03 eta 7:37:54 +epoch [15/50] batch [390/500] time 1.558 (1.560) data 0.000 (0.003) loss 1.3047 (1.1022) acc 71.8750 (72.1875) lr 1.6845e-03 eta 7:37:46 +epoch [15/50] batch [395/500] time 1.558 (1.560) data 0.000 (0.003) loss 1.2295 (1.1010) acc 65.6250 (72.1835) lr 1.6845e-03 eta 7:37:39 +epoch [15/50] batch [400/500] time 1.557 (1.560) data 0.000 (0.003) loss 0.8838 (1.0990) acc 78.1250 (72.2344) lr 1.6845e-03 eta 7:37:32 +epoch [15/50] batch [405/500] time 1.549 (1.560) data 0.000 (0.003) loss 0.6621 (1.0982) acc 90.6250 (72.2685) lr 1.6845e-03 eta 7:37:24 +epoch [15/50] batch [410/500] time 1.561 (1.560) data 0.000 (0.003) loss 1.2500 (1.0998) acc 59.3750 (72.1951) lr 1.6845e-03 eta 7:37:17 +epoch [15/50] batch [415/500] time 1.560 (1.560) data 0.000 (0.003) loss 1.4619 (1.1005) acc 62.5000 (72.1913) lr 1.6845e-03 eta 7:37:10 +epoch [15/50] batch [420/500] time 1.567 (1.560) data 0.000 (0.003) loss 1.3906 (1.1024) acc 71.8750 (72.1205) lr 1.6845e-03 eta 7:37:04 +epoch [15/50] batch [425/500] time 1.569 (1.560) data 0.000 (0.002) loss 1.2275 (1.1053) acc 65.6250 (72.0735) lr 1.6845e-03 eta 7:36:58 +epoch [15/50] batch [430/500] time 1.573 (1.560) data 0.000 (0.002) loss 1.0605 (1.1055) acc 68.7500 (72.0640) lr 1.6845e-03 eta 7:36:50 +epoch [15/50] batch [435/500] time 1.551 (1.560) data 0.000 (0.002) loss 0.9507 (1.1063) acc 78.1250 (72.0618) lr 1.6845e-03 eta 7:36:43 +epoch [15/50] batch [440/500] time 1.559 (1.560) data 0.000 (0.002) loss 0.9360 (1.1041) acc 78.1250 (72.1378) lr 1.6845e-03 eta 7:36:35 +epoch [15/50] batch [445/500] time 1.560 (1.560) data 0.000 (0.002) loss 1.4473 (1.1058) acc 78.1250 (72.1348) lr 1.6845e-03 eta 7:36:29 +epoch [15/50] batch [450/500] time 1.578 (1.560) data 0.000 (0.002) loss 1.2764 (1.1048) acc 65.6250 (72.1111) lr 1.6845e-03 eta 7:36:21 +epoch [15/50] batch [455/500] time 1.547 (1.560) data 0.000 (0.002) loss 0.8813 (1.1070) acc 71.8750 (72.0398) lr 1.6845e-03 eta 7:36:12 +epoch [15/50] batch [460/500] time 1.568 (1.560) data 0.000 (0.002) loss 1.3047 (1.1081) acc 68.7500 (72.0109) lr 1.6845e-03 eta 7:36:03 +epoch [15/50] batch [465/500] time 1.645 (1.560) data 0.000 (0.002) loss 1.3184 (1.1098) acc 65.6250 (71.9960) lr 1.6845e-03 eta 7:35:59 +epoch [15/50] batch [470/500] time 1.539 (1.560) data 0.000 (0.002) loss 1.0498 (1.1099) acc 75.0000 (71.9814) lr 1.6845e-03 eta 7:35:49 +epoch [15/50] batch [475/500] time 1.552 (1.560) data 0.000 (0.002) loss 1.7334 (1.1110) acc 59.3750 (71.9145) lr 1.6845e-03 eta 7:35:41 +epoch [15/50] batch [480/500] time 1.565 (1.560) data 0.000 (0.002) loss 1.3320 (1.1106) acc 65.6250 (71.9141) lr 1.6845e-03 eta 7:35:32 +epoch [15/50] batch [485/500] time 1.549 (1.560) data 0.001 (0.002) loss 1.0244 (1.1116) acc 78.1250 (71.9072) lr 1.6845e-03 eta 7:35:24 +epoch [15/50] batch [490/500] time 1.555 (1.560) data 0.000 (0.002) loss 1.7363 (1.1167) acc 65.6250 (71.8367) lr 1.6845e-03 eta 7:35:15 +epoch [15/50] batch [495/500] time 1.537 (1.560) data 0.000 (0.002) loss 1.5918 (1.1186) acc 56.2500 (71.7677) lr 1.6845e-03 eta 7:35:06 +epoch [15/50] batch [500/500] time 1.529 (1.560) data 0.000 (0.002) loss 1.7139 (1.1202) acc 56.2500 (71.7313) lr 1.6374e-03 eta 7:34:56 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,882 +* accuracy: 77.8% +* error: 22.2% +* macro_f1: 77.2% +epoch [16/50] batch [5/500] time 1.540 (1.717) data 0.000 (0.212) loss 0.9033 (1.3578) acc 71.8750 (64.3750) lr 1.6374e-03 eta 8:20:36 +epoch [16/50] batch [10/500] time 1.564 (1.637) data 0.000 (0.106) loss 1.6211 (1.2714) acc 68.7500 (65.3125) lr 1.6374e-03 eta 7:57:10 +epoch [16/50] batch [15/500] time 1.570 (1.616) data 0.001 (0.071) loss 1.3447 (1.1962) acc 62.5000 (66.8750) lr 1.6374e-03 eta 7:50:47 +epoch [16/50] batch [20/500] time 1.562 (1.607) data 0.000 (0.053) loss 0.6968 (1.1239) acc 84.3750 (68.5938) lr 1.6374e-03 eta 7:48:14 +epoch [16/50] batch [25/500] time 1.556 (1.598) data 0.000 (0.043) loss 1.1416 (1.1286) acc 78.1250 (69.3750) lr 1.6374e-03 eta 7:45:25 +epoch [16/50] batch [30/500] time 1.558 (1.592) data 0.000 (0.036) loss 1.5693 (1.1378) acc 68.7500 (70.1042) lr 1.6374e-03 eta 7:43:28 +epoch [16/50] batch [35/500] time 1.548 (1.587) data 0.000 (0.031) loss 0.6387 (1.0902) acc 75.0000 (70.8929) lr 1.6374e-03 eta 7:42:04 +epoch [16/50] batch [40/500] time 1.549 (1.585) data 0.000 (0.027) loss 1.1973 (1.0668) acc 65.6250 (71.3281) lr 1.6374e-03 eta 7:41:06 +epoch [16/50] batch [45/500] time 1.571 (1.584) data 0.000 (0.024) loss 1.7637 (1.0641) acc 56.2500 (71.5278) lr 1.6374e-03 eta 7:40:41 +epoch [16/50] batch [50/500] time 1.544 (1.581) data 0.001 (0.022) loss 0.9302 (1.0646) acc 81.2500 (71.8125) lr 1.6374e-03 eta 7:39:55 +epoch [16/50] batch [55/500] time 1.574 (1.579) data 0.000 (0.020) loss 0.5962 (1.0631) acc 81.2500 (71.9886) lr 1.6374e-03 eta 7:39:11 +epoch [16/50] batch [60/500] time 1.569 (1.578) data 0.001 (0.018) loss 1.3516 (1.0617) acc 65.6250 (72.2396) lr 1.6374e-03 eta 7:38:39 +epoch [16/50] batch [65/500] time 1.544 (1.577) data 0.000 (0.017) loss 1.1289 (1.0570) acc 75.0000 (72.2596) lr 1.6374e-03 eta 7:38:12 +epoch [16/50] batch [70/500] time 1.563 (1.575) data 0.000 (0.016) loss 0.9043 (1.0553) acc 81.2500 (72.3661) lr 1.6374e-03 eta 7:37:36 +epoch [16/50] batch [75/500] time 1.565 (1.575) data 0.001 (0.015) loss 0.9272 (1.0562) acc 75.0000 (72.3333) lr 1.6374e-03 eta 7:37:17 +epoch [16/50] batch [80/500] time 1.553 (1.573) data 0.000 (0.014) loss 0.7446 (1.0548) acc 81.2500 (72.2656) lr 1.6374e-03 eta 7:36:44 +epoch [16/50] batch [85/500] time 1.543 (1.572) data 0.000 (0.013) loss 1.4072 (1.0629) acc 62.5000 (72.2426) lr 1.6374e-03 eta 7:36:10 +epoch [16/50] batch [90/500] time 1.538 (1.570) data 0.000 (0.012) loss 0.9653 (1.0577) acc 78.1250 (72.2917) lr 1.6374e-03 eta 7:35:38 +epoch [16/50] batch [95/500] time 1.565 (1.570) data 0.000 (0.012) loss 1.0869 (1.0638) acc 71.8750 (72.3684) lr 1.6374e-03 eta 7:35:19 +epoch [16/50] batch [100/500] time 1.574 (1.570) data 0.000 (0.011) loss 0.8623 (1.0670) acc 81.2500 (72.4688) lr 1.6374e-03 eta 7:35:10 +epoch [16/50] batch [105/500] time 1.576 (1.569) data 0.000 (0.011) loss 1.3105 (1.0685) acc 62.5000 (72.4107) lr 1.6374e-03 eta 7:34:56 +epoch [16/50] batch [110/500] time 1.570 (1.569) data 0.000 (0.010) loss 0.9619 (1.0740) acc 75.0000 (72.1023) lr 1.6374e-03 eta 7:34:50 +epoch [16/50] batch [115/500] time 1.697 (1.570) data 0.000 (0.010) loss 0.7505 (1.0695) acc 78.1250 (72.3098) lr 1.6374e-03 eta 7:34:51 +epoch [16/50] batch [120/500] time 1.580 (1.569) data 0.001 (0.009) loss 1.3750 (1.0750) acc 71.8750 (72.3177) lr 1.6374e-03 eta 7:34:33 +epoch [16/50] batch [125/500] time 1.550 (1.569) data 0.000 (0.009) loss 0.6514 (1.0805) acc 78.1250 (72.2000) lr 1.6374e-03 eta 7:34:27 +epoch [16/50] batch [130/500] time 1.556 (1.569) data 0.000 (0.009) loss 0.8989 (1.0768) acc 75.0000 (72.2596) lr 1.6374e-03 eta 7:34:13 +epoch [16/50] batch [135/500] time 1.550 (1.568) data 0.000 (0.008) loss 0.9595 (1.0807) acc 71.8750 (72.0139) lr 1.6374e-03 eta 7:33:53 +epoch [16/50] batch [140/500] time 1.561 (1.568) data 0.000 (0.008) loss 1.6035 (1.0832) acc 68.7500 (72.0536) lr 1.6374e-03 eta 7:33:46 +epoch [16/50] batch [145/500] time 1.572 (1.568) data 0.000 (0.008) loss 1.0117 (1.0872) acc 75.0000 (72.0474) lr 1.6374e-03 eta 7:33:36 +epoch [16/50] batch [150/500] time 1.580 (1.569) data 0.000 (0.007) loss 1.5391 (1.0875) acc 65.6250 (72.1042) lr 1.6374e-03 eta 7:33:33 +epoch [16/50] batch [155/500] time 1.561 (1.568) data 0.000 (0.007) loss 1.3154 (1.0848) acc 71.8750 (72.1774) lr 1.6374e-03 eta 7:33:25 +epoch [16/50] batch [160/500] time 1.581 (1.569) data 0.001 (0.007) loss 1.1260 (1.0883) acc 75.0000 (72.1289) lr 1.6374e-03 eta 7:33:28 +epoch [16/50] batch [165/500] time 1.557 (1.569) data 0.000 (0.007) loss 2.1406 (1.0976) acc 53.1250 (72.0265) lr 1.6374e-03 eta 7:33:21 +epoch [16/50] batch [170/500] time 1.570 (1.569) data 0.001 (0.007) loss 0.9556 (1.0953) acc 68.7500 (72.0037) lr 1.6374e-03 eta 7:33:13 +epoch [16/50] batch [175/500] time 1.546 (1.569) data 0.000 (0.006) loss 0.8828 (1.0883) acc 75.0000 (72.1071) lr 1.6374e-03 eta 7:33:00 +epoch [16/50] batch [180/500] time 1.561 (1.569) data 0.000 (0.006) loss 0.9404 (1.0860) acc 78.1250 (72.2049) lr 1.6374e-03 eta 7:32:53 +epoch [16/50] batch [185/500] time 1.559 (1.569) data 0.001 (0.006) loss 0.9678 (1.0860) acc 75.0000 (72.1959) lr 1.6374e-03 eta 7:32:44 +epoch [16/50] batch [190/500] time 1.568 (1.569) data 0.001 (0.006) loss 0.8794 (1.0898) acc 71.8750 (72.1546) lr 1.6374e-03 eta 7:32:38 +epoch [16/50] batch [195/500] time 1.569 (1.569) data 0.000 (0.006) loss 0.9053 (1.0872) acc 71.8750 (72.1474) lr 1.6374e-03 eta 7:32:29 +epoch [16/50] batch [200/500] time 1.569 (1.569) data 0.000 (0.006) loss 0.4844 (1.0851) acc 87.5000 (72.2031) lr 1.6374e-03 eta 7:32:17 +epoch [16/50] batch [205/500] time 1.556 (1.568) data 0.000 (0.006) loss 1.0664 (1.0873) acc 75.0000 (72.2256) lr 1.6374e-03 eta 7:32:05 +epoch [16/50] batch [210/500] time 1.544 (1.568) data 0.000 (0.005) loss 1.3223 (1.0893) acc 59.3750 (72.1726) lr 1.6374e-03 eta 7:31:50 +epoch [16/50] batch [215/500] time 1.540 (1.568) data 0.000 (0.005) loss 1.9189 (1.0947) acc 59.3750 (72.0785) lr 1.6374e-03 eta 7:31:36 +epoch [16/50] batch [220/500] time 1.563 (1.567) data 0.000 (0.005) loss 1.2031 (1.1005) acc 62.5000 (72.0028) lr 1.6374e-03 eta 7:31:20 +epoch [16/50] batch [225/500] time 1.563 (1.567) data 0.000 (0.005) loss 1.5049 (1.1018) acc 62.5000 (71.9861) lr 1.6374e-03 eta 7:31:10 +epoch [16/50] batch [230/500] time 1.580 (1.567) data 0.000 (0.005) loss 0.5078 (1.0993) acc 87.5000 (72.0516) lr 1.6374e-03 eta 7:30:57 +epoch [16/50] batch [235/500] time 1.556 (1.567) data 0.000 (0.005) loss 1.2363 (1.0990) acc 75.0000 (72.1144) lr 1.6374e-03 eta 7:30:46 +epoch [16/50] batch [240/500] time 1.538 (1.566) data 0.000 (0.005) loss 0.5117 (1.0949) acc 87.5000 (72.1745) lr 1.6374e-03 eta 7:30:31 +epoch [16/50] batch [245/500] time 1.554 (1.566) data 0.001 (0.005) loss 0.9775 (1.0946) acc 75.0000 (72.2577) lr 1.6374e-03 eta 7:30:19 +epoch [16/50] batch [250/500] time 1.555 (1.566) data 0.000 (0.005) loss 0.9458 (1.0931) acc 71.8750 (72.2625) lr 1.6374e-03 eta 7:30:07 +epoch [16/50] batch [255/500] time 1.543 (1.565) data 0.000 (0.005) loss 1.0723 (1.0974) acc 65.6250 (72.1078) lr 1.6374e-03 eta 7:29:55 +epoch [16/50] batch [260/500] time 1.563 (1.566) data 0.000 (0.004) loss 0.5669 (1.0952) acc 84.3750 (72.1274) lr 1.6374e-03 eta 7:29:53 +epoch [16/50] batch [265/500] time 1.582 (1.566) data 0.000 (0.004) loss 1.4102 (1.0957) acc 65.6250 (72.0991) lr 1.6374e-03 eta 7:29:47 +epoch [16/50] batch [270/500] time 1.561 (1.566) data 0.000 (0.004) loss 0.8208 (1.0963) acc 84.3750 (72.1412) lr 1.6374e-03 eta 7:29:34 +epoch [16/50] batch [275/500] time 1.569 (1.565) data 0.000 (0.004) loss 0.9370 (1.0963) acc 78.1250 (72.1932) lr 1.6374e-03 eta 7:29:22 +epoch [16/50] batch [280/500] time 1.556 (1.565) data 0.000 (0.004) loss 1.2266 (1.0947) acc 68.7500 (72.2321) lr 1.6374e-03 eta 7:29:12 +epoch [16/50] batch [285/500] time 1.565 (1.565) data 0.000 (0.004) loss 1.4961 (1.0975) acc 68.7500 (72.1162) lr 1.6374e-03 eta 7:29:02 +epoch [16/50] batch [290/500] time 1.543 (1.565) data 0.000 (0.004) loss 0.9873 (1.0978) acc 75.0000 (72.1228) lr 1.6374e-03 eta 7:28:49 +epoch [16/50] batch [295/500] time 1.550 (1.565) data 0.000 (0.004) loss 1.1924 (1.0966) acc 71.8750 (72.1822) lr 1.6374e-03 eta 7:28:40 +epoch [16/50] batch [300/500] time 1.544 (1.565) data 0.000 (0.004) loss 1.1787 (1.0974) acc 68.7500 (72.2083) lr 1.6374e-03 eta 7:28:30 +epoch [16/50] batch [305/500] time 1.558 (1.565) data 0.000 (0.004) loss 1.0361 (1.1004) acc 75.0000 (72.2131) lr 1.6374e-03 eta 7:28:23 +epoch [16/50] batch [310/500] time 1.543 (1.565) data 0.000 (0.004) loss 0.7246 (1.1005) acc 78.1250 (72.2077) lr 1.6374e-03 eta 7:28:14 +epoch [16/50] batch [315/500] time 1.562 (1.564) data 0.000 (0.004) loss 1.5303 (1.1054) acc 68.7500 (72.1726) lr 1.6374e-03 eta 7:28:04 +epoch [16/50] batch [320/500] time 1.558 (1.564) data 0.000 (0.004) loss 1.5078 (1.1085) acc 71.8750 (72.1191) lr 1.6374e-03 eta 7:27:54 +epoch [16/50] batch [325/500] time 1.580 (1.565) data 0.001 (0.004) loss 1.7061 (1.1144) acc 62.5000 (71.9904) lr 1.6374e-03 eta 7:27:50 +epoch [16/50] batch [330/500] time 1.579 (1.565) data 0.000 (0.004) loss 0.7954 (1.1178) acc 81.2500 (71.8939) lr 1.6374e-03 eta 7:27:42 +epoch [16/50] batch [335/500] time 1.566 (1.565) data 0.000 (0.004) loss 0.9502 (1.1188) acc 71.8750 (71.8843) lr 1.6374e-03 eta 7:27:35 +epoch [16/50] batch [340/500] time 1.576 (1.565) data 0.000 (0.004) loss 1.5127 (1.1190) acc 59.3750 (71.8842) lr 1.6374e-03 eta 7:27:29 +epoch [16/50] batch [345/500] time 1.548 (1.565) data 0.000 (0.003) loss 1.1855 (1.1210) acc 68.7500 (71.8297) lr 1.6374e-03 eta 7:27:19 +epoch [16/50] batch [350/500] time 1.579 (1.565) data 0.000 (0.003) loss 0.6055 (1.1202) acc 75.0000 (71.7857) lr 1.6374e-03 eta 7:27:12 +epoch [16/50] batch [355/500] time 1.571 (1.564) data 0.000 (0.003) loss 0.9023 (1.1245) acc 78.1250 (71.6989) lr 1.6374e-03 eta 7:27:02 +epoch [16/50] batch [360/500] time 1.546 (1.564) data 0.000 (0.003) loss 1.3740 (1.1243) acc 71.8750 (71.7188) lr 1.6374e-03 eta 7:26:49 +epoch [16/50] batch [365/500] time 1.546 (1.564) data 0.000 (0.003) loss 1.0352 (1.1215) acc 65.6250 (71.7723) lr 1.6374e-03 eta 7:26:38 +epoch [16/50] batch [370/500] time 1.574 (1.564) data 0.000 (0.003) loss 1.5225 (1.1246) acc 59.3750 (71.7399) lr 1.6374e-03 eta 7:26:30 +epoch [16/50] batch [375/500] time 1.573 (1.564) data 0.000 (0.003) loss 0.5283 (1.1247) acc 87.5000 (71.7667) lr 1.6374e-03 eta 7:26:19 +epoch [16/50] batch [380/500] time 1.550 (1.564) data 0.000 (0.003) loss 1.1484 (1.1243) acc 75.0000 (71.7516) lr 1.6374e-03 eta 7:26:09 +epoch [16/50] batch [385/500] time 1.572 (1.564) data 0.000 (0.003) loss 1.0664 (1.1231) acc 84.3750 (71.8263) lr 1.6374e-03 eta 7:26:01 +epoch [16/50] batch [390/500] time 1.556 (1.563) data 0.000 (0.003) loss 0.9863 (1.1230) acc 68.7500 (71.8510) lr 1.6374e-03 eta 7:25:51 +epoch [16/50] batch [395/500] time 1.560 (1.564) data 0.000 (0.003) loss 1.9600 (1.1240) acc 62.5000 (71.8196) lr 1.6374e-03 eta 7:25:44 +epoch [16/50] batch [400/500] time 1.575 (1.564) data 0.000 (0.003) loss 0.8433 (1.1238) acc 78.1250 (71.8750) lr 1.6374e-03 eta 7:25:36 +epoch [16/50] batch [405/500] time 1.541 (1.564) data 0.001 (0.003) loss 1.6895 (1.1256) acc 59.3750 (71.8441) lr 1.6374e-03 eta 7:25:32 +epoch [16/50] batch [410/500] time 1.563 (1.564) data 0.000 (0.003) loss 0.9702 (1.1242) acc 81.2500 (71.8674) lr 1.6374e-03 eta 7:25:24 +epoch [16/50] batch [415/500] time 1.576 (1.564) data 0.000 (0.003) loss 0.8809 (1.1221) acc 68.7500 (71.8976) lr 1.6374e-03 eta 7:25:16 +epoch [16/50] batch [420/500] time 1.562 (1.564) data 0.000 (0.003) loss 1.0693 (1.1187) acc 75.0000 (71.9717) lr 1.6374e-03 eta 7:25:07 +epoch [16/50] batch [425/500] time 1.542 (1.564) data 0.000 (0.003) loss 0.7485 (1.1177) acc 84.3750 (72.0441) lr 1.6374e-03 eta 7:24:58 +epoch [16/50] batch [430/500] time 1.550 (1.563) data 0.000 (0.003) loss 1.6943 (1.1215) acc 53.1250 (71.9622) lr 1.6374e-03 eta 7:24:48 +epoch [16/50] batch [435/500] time 1.542 (1.563) data 0.000 (0.003) loss 0.9194 (1.1201) acc 81.2500 (72.0043) lr 1.6374e-03 eta 7:24:39 +epoch [16/50] batch [440/500] time 1.543 (1.563) data 0.000 (0.003) loss 0.7651 (1.1195) acc 81.2500 (71.9957) lr 1.6374e-03 eta 7:24:29 +epoch [16/50] batch [445/500] time 1.653 (1.564) data 0.000 (0.003) loss 0.9902 (1.1178) acc 75.0000 (72.0014) lr 1.6374e-03 eta 7:24:25 +epoch [16/50] batch [450/500] time 1.542 (1.563) data 0.000 (0.003) loss 1.2334 (1.1194) acc 65.6250 (71.9444) lr 1.6374e-03 eta 7:24:16 +epoch [16/50] batch [455/500] time 1.556 (1.563) data 0.000 (0.003) loss 1.1250 (1.1202) acc 65.6250 (71.9093) lr 1.6374e-03 eta 7:24:08 +epoch [16/50] batch [460/500] time 1.558 (1.563) data 0.000 (0.003) loss 0.9736 (1.1204) acc 78.1250 (71.9565) lr 1.6374e-03 eta 7:23:57 +epoch [16/50] batch [465/500] time 1.542 (1.563) data 0.000 (0.003) loss 1.1348 (1.1200) acc 62.5000 (71.9153) lr 1.6374e-03 eta 7:23:49 +epoch [16/50] batch [470/500] time 1.544 (1.563) data 0.000 (0.003) loss 1.3398 (1.1232) acc 62.5000 (71.8551) lr 1.6374e-03 eta 7:23:38 +epoch [16/50] batch [475/500] time 1.565 (1.563) data 0.000 (0.003) loss 1.1152 (1.1218) acc 78.1250 (71.8816) lr 1.6374e-03 eta 7:23:30 +epoch [16/50] batch [480/500] time 1.538 (1.563) data 0.000 (0.003) loss 0.9019 (1.1187) acc 78.1250 (71.9271) lr 1.6374e-03 eta 7:23:20 +epoch [16/50] batch [485/500] time 1.531 (1.563) data 0.001 (0.003) loss 1.1387 (1.1191) acc 75.0000 (71.9008) lr 1.6374e-03 eta 7:23:10 +epoch [16/50] batch [490/500] time 1.557 (1.563) data 0.000 (0.003) loss 1.1377 (1.1186) acc 71.8750 (71.9069) lr 1.6374e-03 eta 7:23:00 +epoch [16/50] batch [495/500] time 1.539 (1.563) data 0.000 (0.003) loss 1.8232 (1.1190) acc 68.7500 (71.9192) lr 1.6374e-03 eta 7:22:51 +epoch [16/50] batch [500/500] time 1.558 (1.563) data 0.000 (0.003) loss 1.5898 (1.1208) acc 65.6250 (71.8875) lr 1.5878e-03 eta 7:22:42 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,859 +* accuracy: 77.7% +* error: 22.3% +* macro_f1: 77.2% +epoch [17/50] batch [5/500] time 1.544 (1.733) data 0.001 (0.192) loss 1.1123 (1.2357) acc 62.5000 (68.1250) lr 1.5878e-03 eta 8:10:44 +epoch [17/50] batch [10/500] time 1.554 (1.643) data 0.000 (0.096) loss 1.7197 (1.1653) acc 68.7500 (70.6250) lr 1.5878e-03 eta 7:45:09 +epoch [17/50] batch [15/500] time 1.573 (1.616) data 0.000 (0.064) loss 1.1719 (1.2168) acc 71.8750 (69.7917) lr 1.5878e-03 eta 7:37:21 +epoch [17/50] batch [20/500] time 1.557 (1.601) data 0.000 (0.048) loss 0.9111 (1.1754) acc 78.1250 (70.0000) lr 1.5878e-03 eta 7:33:00 +epoch [17/50] batch [25/500] time 1.542 (1.591) data 0.000 (0.039) loss 0.9136 (1.1717) acc 81.2500 (70.3750) lr 1.5878e-03 eta 7:30:00 +epoch [17/50] batch [30/500] time 1.524 (1.583) data 0.000 (0.032) loss 0.6201 (1.1398) acc 84.3750 (71.1458) lr 1.5878e-03 eta 7:27:46 +epoch [17/50] batch [35/500] time 1.563 (1.579) data 0.000 (0.028) loss 1.0059 (1.1173) acc 78.1250 (71.6071) lr 1.5878e-03 eta 7:26:24 +epoch [17/50] batch [40/500] time 1.561 (1.576) data 0.000 (0.024) loss 1.4502 (1.1210) acc 65.6250 (71.7188) lr 1.5878e-03 eta 7:25:25 +epoch [17/50] batch [45/500] time 1.544 (1.575) data 0.001 (0.022) loss 1.6621 (1.1355) acc 65.6250 (71.7361) lr 1.5878e-03 eta 7:25:05 +epoch [17/50] batch [50/500] time 1.568 (1.574) data 0.000 (0.020) loss 1.2109 (1.1292) acc 71.8750 (72.1250) lr 1.5878e-03 eta 7:24:34 +epoch [17/50] batch [55/500] time 1.554 (1.572) data 0.000 (0.018) loss 1.3984 (1.1258) acc 56.2500 (71.9886) lr 1.5878e-03 eta 7:24:05 +epoch [17/50] batch [60/500] time 1.584 (1.572) data 0.000 (0.016) loss 1.0068 (1.1165) acc 68.7500 (72.2917) lr 1.5878e-03 eta 7:23:45 +epoch [17/50] batch [65/500] time 1.530 (1.570) data 0.000 (0.015) loss 1.7275 (1.1152) acc 71.8750 (72.3558) lr 1.5878e-03 eta 7:23:10 +epoch [17/50] batch [70/500] time 1.554 (1.569) data 0.000 (0.014) loss 1.3398 (1.1167) acc 62.5000 (72.0982) lr 1.5878e-03 eta 7:22:47 +epoch [17/50] batch [75/500] time 1.547 (1.568) data 0.001 (0.013) loss 0.9204 (1.1118) acc 81.2500 (72.0417) lr 1.5878e-03 eta 7:22:21 +epoch [17/50] batch [80/500] time 1.557 (1.567) data 0.000 (0.012) loss 1.1064 (1.1244) acc 65.6250 (71.6797) lr 1.5878e-03 eta 7:21:57 +epoch [17/50] batch [85/500] time 1.569 (1.567) data 0.000 (0.012) loss 1.2607 (1.1267) acc 71.8750 (71.6176) lr 1.5878e-03 eta 7:21:48 +epoch [17/50] batch [90/500] time 1.545 (1.567) data 0.000 (0.011) loss 1.2461 (1.1293) acc 71.8750 (71.5278) lr 1.5878e-03 eta 7:21:31 +epoch [17/50] batch [95/500] time 1.575 (1.567) data 0.000 (0.011) loss 1.1406 (1.1324) acc 68.7500 (71.4145) lr 1.5878e-03 eta 7:21:25 +epoch [17/50] batch [100/500] time 1.570 (1.567) data 0.000 (0.010) loss 1.2227 (1.1288) acc 68.7500 (71.5000) lr 1.5878e-03 eta 7:21:22 +epoch [17/50] batch [105/500] time 1.542 (1.566) data 0.000 (0.010) loss 1.1670 (1.1206) acc 68.7500 (71.5476) lr 1.5878e-03 eta 7:21:04 +epoch [17/50] batch [110/500] time 1.569 (1.566) data 0.001 (0.009) loss 0.9272 (1.1106) acc 84.3750 (71.9034) lr 1.5878e-03 eta 7:20:56 +epoch [17/50] batch [115/500] time 1.558 (1.566) data 0.000 (0.009) loss 1.8252 (1.1152) acc 53.1250 (71.9565) lr 1.5878e-03 eta 7:20:42 +epoch [17/50] batch [120/500] time 1.533 (1.565) data 0.000 (0.008) loss 1.1406 (1.1248) acc 65.6250 (71.6927) lr 1.5878e-03 eta 7:20:21 +epoch [17/50] batch [125/500] time 1.565 (1.565) data 0.000 (0.008) loss 1.2119 (1.1197) acc 71.8750 (71.8500) lr 1.5878e-03 eta 7:20:12 +epoch [17/50] batch [130/500] time 1.550 (1.565) data 0.001 (0.008) loss 0.9868 (1.1302) acc 78.1250 (71.7548) lr 1.5878e-03 eta 7:20:07 +epoch [17/50] batch [135/500] time 1.568 (1.566) data 0.000 (0.008) loss 0.8403 (1.1246) acc 78.1250 (71.9907) lr 1.5878e-03 eta 7:20:08 +epoch [17/50] batch [140/500] time 1.587 (1.566) data 0.000 (0.007) loss 1.1055 (1.1216) acc 68.7500 (71.9866) lr 1.5878e-03 eta 7:19:57 +epoch [17/50] batch [145/500] time 1.564 (1.566) data 0.000 (0.007) loss 0.7563 (1.1170) acc 75.0000 (72.1552) lr 1.5878e-03 eta 7:19:56 +epoch [17/50] batch [150/500] time 1.569 (1.566) data 0.000 (0.007) loss 1.3066 (1.1177) acc 68.7500 (72.0625) lr 1.5878e-03 eta 7:19:42 +epoch [17/50] batch [155/500] time 1.531 (1.565) data 0.000 (0.007) loss 0.6372 (1.1183) acc 87.5000 (72.1371) lr 1.5878e-03 eta 7:19:29 +epoch [17/50] batch [160/500] time 1.570 (1.565) data 0.000 (0.006) loss 1.7969 (1.1288) acc 53.1250 (71.8359) lr 1.5878e-03 eta 7:19:16 +epoch [17/50] batch [165/500] time 1.550 (1.565) data 0.000 (0.006) loss 0.8662 (1.1253) acc 78.1250 (71.8939) lr 1.5878e-03 eta 7:19:01 +epoch [17/50] batch [170/500] time 1.557 (1.564) data 0.000 (0.006) loss 1.1045 (1.1296) acc 71.8750 (71.8566) lr 1.5878e-03 eta 7:18:50 +epoch [17/50] batch [175/500] time 1.555 (1.564) data 0.000 (0.006) loss 0.7544 (1.1281) acc 81.2500 (71.8571) lr 1.5878e-03 eta 7:18:36 +epoch [17/50] batch [180/500] time 1.580 (1.565) data 0.000 (0.006) loss 1.0381 (1.1265) acc 75.0000 (71.9965) lr 1.5878e-03 eta 7:18:36 +epoch [17/50] batch [185/500] time 1.571 (1.564) data 0.000 (0.006) loss 0.9146 (1.1334) acc 78.1250 (71.9257) lr 1.5878e-03 eta 7:18:26 +epoch [17/50] batch [190/500] time 1.573 (1.565) data 0.000 (0.005) loss 0.9702 (1.1397) acc 75.0000 (71.8586) lr 1.5878e-03 eta 7:18:21 +epoch [17/50] batch [195/500] time 1.558 (1.564) data 0.000 (0.005) loss 0.8682 (1.1349) acc 68.7500 (71.9551) lr 1.5878e-03 eta 7:18:10 +epoch [17/50] batch [200/500] time 1.548 (1.564) data 0.000 (0.005) loss 1.0977 (1.1351) acc 71.8750 (72.0156) lr 1.5878e-03 eta 7:18:02 +epoch [17/50] batch [205/500] time 1.540 (1.564) data 0.001 (0.005) loss 1.0439 (1.1344) acc 71.8750 (72.0274) lr 1.5878e-03 eta 7:17:53 +epoch [17/50] batch [210/500] time 1.578 (1.564) data 0.000 (0.005) loss 1.1953 (1.1329) acc 71.8750 (71.9940) lr 1.5878e-03 eta 7:17:45 +epoch [17/50] batch [215/500] time 1.546 (1.564) data 0.000 (0.005) loss 0.9517 (1.1281) acc 65.6250 (72.1512) lr 1.5878e-03 eta 7:17:35 +epoch [17/50] batch [220/500] time 1.566 (1.564) data 0.000 (0.005) loss 0.9927 (1.1236) acc 78.1250 (72.2443) lr 1.5878e-03 eta 7:17:30 +epoch [17/50] batch [225/500] time 1.547 (1.564) data 0.000 (0.005) loss 1.2334 (1.1233) acc 59.3750 (72.2222) lr 1.5878e-03 eta 7:17:17 +epoch [17/50] batch [230/500] time 1.560 (1.564) data 0.000 (0.005) loss 1.0195 (1.1284) acc 75.0000 (72.1467) lr 1.5878e-03 eta 7:17:08 +epoch [17/50] batch [235/500] time 1.562 (1.564) data 0.001 (0.004) loss 1.2598 (1.1346) acc 71.8750 (72.0080) lr 1.5878e-03 eta 7:16:56 +epoch [17/50] batch [240/500] time 1.535 (1.563) data 0.000 (0.004) loss 0.7441 (1.1351) acc 65.6250 (71.8750) lr 1.5878e-03 eta 7:16:43 +epoch [17/50] batch [245/500] time 1.558 (1.563) data 0.000 (0.004) loss 1.1221 (1.1351) acc 68.7500 (71.8495) lr 1.5878e-03 eta 7:16:33 +epoch [17/50] batch [250/500] time 1.586 (1.563) data 0.000 (0.004) loss 1.0596 (1.1320) acc 78.1250 (71.9000) lr 1.5878e-03 eta 7:16:26 +epoch [17/50] batch [255/500] time 1.569 (1.563) data 0.000 (0.004) loss 1.2725 (1.1330) acc 75.0000 (71.9363) lr 1.5878e-03 eta 7:16:18 +epoch [17/50] batch [260/500] time 1.567 (1.563) data 0.000 (0.004) loss 1.5303 (1.1384) acc 71.8750 (71.9111) lr 1.5878e-03 eta 7:16:08 +epoch [17/50] batch [265/500] time 1.577 (1.563) data 0.000 (0.004) loss 0.9297 (1.1344) acc 68.7500 (71.9929) lr 1.5878e-03 eta 7:16:04 +epoch [17/50] batch [270/500] time 1.544 (1.563) data 0.000 (0.004) loss 1.2773 (1.1366) acc 78.1250 (71.9907) lr 1.5878e-03 eta 7:15:49 +epoch [17/50] batch [275/500] time 1.550 (1.563) data 0.000 (0.004) loss 0.9521 (1.1364) acc 78.1250 (72.0341) lr 1.5878e-03 eta 7:15:39 +epoch [17/50] batch [280/500] time 1.572 (1.563) data 0.001 (0.004) loss 1.0566 (1.1393) acc 71.8750 (71.9754) lr 1.5878e-03 eta 7:15:33 +epoch [17/50] batch [285/500] time 1.635 (1.563) data 0.000 (0.004) loss 1.8955 (1.1430) acc 43.7500 (71.8421) lr 1.5878e-03 eta 7:15:28 +epoch [17/50] batch [290/500] time 1.546 (1.563) data 0.000 (0.004) loss 0.7144 (1.1415) acc 75.0000 (71.8534) lr 1.5878e-03 eta 7:15:19 +epoch [17/50] batch [295/500] time 1.553 (1.563) data 0.000 (0.004) loss 0.7998 (1.1409) acc 78.1250 (71.8644) lr 1.5878e-03 eta 7:15:11 +epoch [17/50] batch [300/500] time 1.543 (1.563) data 0.000 (0.004) loss 0.8379 (1.1393) acc 81.2500 (71.9271) lr 1.5878e-03 eta 7:15:00 +epoch [17/50] batch [305/500] time 1.589 (1.563) data 0.000 (0.004) loss 0.7285 (1.1406) acc 75.0000 (71.8648) lr 1.5878e-03 eta 7:14:54 +epoch [17/50] batch [310/500] time 1.565 (1.563) data 0.001 (0.003) loss 0.8237 (1.1378) acc 75.0000 (71.9355) lr 1.5878e-03 eta 7:14:44 +epoch [17/50] batch [315/500] time 1.551 (1.563) data 0.000 (0.003) loss 0.8687 (1.1364) acc 87.5000 (72.0139) lr 1.5878e-03 eta 7:14:35 +epoch [17/50] batch [320/500] time 1.557 (1.563) data 0.000 (0.003) loss 0.5806 (1.1320) acc 87.5000 (72.1387) lr 1.5878e-03 eta 7:14:25 +epoch [17/50] batch [325/500] time 1.548 (1.563) data 0.000 (0.003) loss 0.8325 (1.1284) acc 81.2500 (72.1635) lr 1.5878e-03 eta 7:14:17 +epoch [17/50] batch [330/500] time 1.555 (1.563) data 0.000 (0.003) loss 0.8423 (1.1284) acc 81.2500 (72.1780) lr 1.5878e-03 eta 7:14:09 +epoch [17/50] batch [335/500] time 1.581 (1.563) data 0.000 (0.003) loss 1.3281 (1.1275) acc 75.0000 (72.2015) lr 1.5878e-03 eta 7:13:59 +epoch [17/50] batch [340/500] time 1.575 (1.563) data 0.001 (0.003) loss 1.0938 (1.1254) acc 75.0000 (72.2610) lr 1.5878e-03 eta 7:13:53 +epoch [17/50] batch [345/500] time 1.555 (1.563) data 0.000 (0.003) loss 0.8584 (1.1241) acc 81.2500 (72.2917) lr 1.5878e-03 eta 7:13:45 +epoch [17/50] batch [350/500] time 1.594 (1.563) data 0.000 (0.003) loss 0.8657 (1.1244) acc 75.0000 (72.3036) lr 1.5878e-03 eta 7:13:36 +epoch [17/50] batch [355/500] time 1.571 (1.563) data 0.000 (0.003) loss 0.9023 (1.1243) acc 75.0000 (72.3063) lr 1.5878e-03 eta 7:13:29 +epoch [17/50] batch [360/500] time 1.556 (1.563) data 0.000 (0.003) loss 1.0703 (1.1250) acc 68.7500 (72.2743) lr 1.5878e-03 eta 7:13:24 +epoch [17/50] batch [365/500] time 1.566 (1.563) data 0.000 (0.003) loss 1.1094 (1.1268) acc 68.7500 (72.1747) lr 1.5878e-03 eta 7:13:17 +epoch [17/50] batch [370/500] time 1.545 (1.563) data 0.000 (0.003) loss 0.8804 (1.1290) acc 75.0000 (72.1368) lr 1.5878e-03 eta 7:13:10 +epoch [17/50] batch [375/500] time 1.570 (1.563) data 0.001 (0.003) loss 0.7354 (1.1269) acc 78.1250 (72.2000) lr 1.5878e-03 eta 7:13:05 +epoch [17/50] batch [380/500] time 1.570 (1.563) data 0.001 (0.003) loss 1.3564 (1.1255) acc 71.8750 (72.2451) lr 1.5878e-03 eta 7:12:57 +epoch [17/50] batch [385/500] time 1.541 (1.563) data 0.000 (0.003) loss 1.2881 (1.1275) acc 68.7500 (72.2646) lr 1.5878e-03 eta 7:12:50 +epoch [17/50] batch [390/500] time 1.586 (1.563) data 0.001 (0.003) loss 0.9795 (1.1278) acc 68.7500 (72.2276) lr 1.5878e-03 eta 7:12:44 +epoch [17/50] batch [395/500] time 1.551 (1.563) data 0.000 (0.003) loss 1.5674 (1.1293) acc 59.3750 (72.1915) lr 1.5878e-03 eta 7:12:35 +epoch [17/50] batch [400/500] time 1.536 (1.563) data 0.000 (0.003) loss 0.9404 (1.1278) acc 84.3750 (72.2500) lr 1.5878e-03 eta 7:12:26 +epoch [17/50] batch [405/500] time 1.548 (1.563) data 0.000 (0.003) loss 0.6694 (1.1265) acc 90.6250 (72.2762) lr 1.5878e-03 eta 7:12:19 +epoch [17/50] batch [410/500] time 1.564 (1.563) data 0.001 (0.003) loss 1.2285 (1.1259) acc 71.8750 (72.3018) lr 1.5878e-03 eta 7:12:09 +epoch [17/50] batch [415/500] time 1.568 (1.563) data 0.000 (0.003) loss 1.4893 (1.1234) acc 71.8750 (72.3343) lr 1.5878e-03 eta 7:12:00 +epoch [17/50] batch [420/500] time 1.564 (1.563) data 0.000 (0.003) loss 1.5908 (1.1237) acc 62.5000 (72.2768) lr 1.5878e-03 eta 7:11:54 +epoch [17/50] batch [425/500] time 1.569 (1.563) data 0.000 (0.003) loss 0.6689 (1.1239) acc 81.2500 (72.2794) lr 1.5878e-03 eta 7:11:49 +epoch [17/50] batch [430/500] time 1.572 (1.563) data 0.000 (0.003) loss 0.5356 (1.1238) acc 87.5000 (72.2602) lr 1.5878e-03 eta 7:11:45 +epoch [17/50] batch [435/500] time 1.547 (1.563) data 0.000 (0.003) loss 1.2881 (1.1252) acc 71.8750 (72.2486) lr 1.5878e-03 eta 7:11:35 +epoch [17/50] batch [440/500] time 1.563 (1.563) data 0.000 (0.003) loss 1.1377 (1.1240) acc 62.5000 (72.2159) lr 1.5878e-03 eta 7:11:27 +epoch [17/50] batch [445/500] time 1.562 (1.563) data 0.000 (0.003) loss 0.9409 (1.1229) acc 71.8750 (72.1980) lr 1.5878e-03 eta 7:11:21 +epoch [17/50] batch [450/500] time 1.558 (1.563) data 0.000 (0.003) loss 1.4697 (1.1252) acc 62.5000 (72.1042) lr 1.5878e-03 eta 7:11:12 +epoch [17/50] batch [455/500] time 1.557 (1.563) data 0.001 (0.003) loss 0.8491 (1.1236) acc 75.0000 (72.1016) lr 1.5878e-03 eta 7:11:04 +epoch [17/50] batch [460/500] time 1.552 (1.563) data 0.001 (0.002) loss 1.2451 (1.1249) acc 59.3750 (72.0788) lr 1.5878e-03 eta 7:10:55 +epoch [17/50] batch [465/500] time 1.556 (1.563) data 0.000 (0.002) loss 1.3848 (1.1236) acc 62.5000 (72.0632) lr 1.5878e-03 eta 7:10:46 +epoch [17/50] batch [470/500] time 1.560 (1.563) data 0.000 (0.002) loss 0.9834 (1.1214) acc 71.8750 (72.1210) lr 1.5878e-03 eta 7:10:37 +epoch [17/50] batch [475/500] time 1.572 (1.563) data 0.001 (0.002) loss 0.9624 (1.1236) acc 81.2500 (72.1118) lr 1.5878e-03 eta 7:10:32 +epoch [17/50] batch [480/500] time 1.596 (1.563) data 0.000 (0.002) loss 0.9639 (1.1259) acc 78.1250 (72.0703) lr 1.5878e-03 eta 7:10:25 +epoch [17/50] batch [485/500] time 1.554 (1.563) data 0.001 (0.002) loss 1.0957 (1.1263) acc 78.1250 (72.0490) lr 1.5878e-03 eta 7:10:16 +epoch [17/50] batch [490/500] time 1.577 (1.563) data 0.000 (0.002) loss 1.3652 (1.1263) acc 65.6250 (72.0344) lr 1.5878e-03 eta 7:10:10 +epoch [17/50] batch [495/500] time 1.578 (1.563) data 0.000 (0.002) loss 0.6069 (1.1236) acc 84.3750 (72.0960) lr 1.5878e-03 eta 7:10:03 +epoch [17/50] batch [500/500] time 1.587 (1.563) data 0.000 (0.002) loss 0.7563 (1.1229) acc 84.3750 (72.1562) lr 1.5358e-03 eta 7:09:55 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,968 +* accuracy: 77.9% +* error: 22.1% +* macro_f1: 77.4% +epoch [18/50] batch [5/500] time 1.547 (1.651) data 0.000 (0.158) loss 0.6758 (0.9303) acc 75.0000 (72.5000) lr 1.5358e-03 eta 7:33:57 +epoch [18/50] batch [10/500] time 1.556 (1.600) data 0.000 (0.079) loss 1.1191 (1.0221) acc 68.7500 (71.8750) lr 1.5358e-03 eta 7:19:35 +epoch [18/50] batch [15/500] time 1.563 (1.585) data 0.001 (0.053) loss 0.9932 (1.0561) acc 81.2500 (72.9167) lr 1.5358e-03 eta 7:15:25 +epoch [18/50] batch [20/500] time 1.559 (1.580) data 0.001 (0.040) loss 0.8589 (1.0651) acc 75.0000 (72.6562) lr 1.5358e-03 eta 7:13:56 +epoch [18/50] batch [25/500] time 1.566 (1.574) data 0.000 (0.032) loss 1.3525 (1.1199) acc 78.1250 (72.0000) lr 1.5358e-03 eta 7:12:07 +epoch [18/50] batch [30/500] time 1.562 (1.571) data 0.000 (0.027) loss 1.3955 (1.1266) acc 68.7500 (71.8750) lr 1.5358e-03 eta 7:11:14 +epoch [18/50] batch [35/500] time 1.556 (1.570) data 0.001 (0.023) loss 0.6484 (1.1234) acc 75.0000 (71.9643) lr 1.5358e-03 eta 7:10:52 +epoch [18/50] batch [40/500] time 1.570 (1.569) data 0.000 (0.020) loss 0.8438 (1.1683) acc 78.1250 (71.3281) lr 1.5358e-03 eta 7:10:21 +epoch [18/50] batch [45/500] time 1.580 (1.569) data 0.001 (0.018) loss 0.9971 (1.1718) acc 78.1250 (71.0417) lr 1.5358e-03 eta 7:10:25 +epoch [18/50] batch [50/500] time 1.592 (1.569) data 0.000 (0.016) loss 1.2627 (1.1853) acc 68.7500 (70.6875) lr 1.5358e-03 eta 7:10:13 +epoch [18/50] batch [55/500] time 1.550 (1.568) data 0.000 (0.015) loss 1.1338 (1.1894) acc 62.5000 (70.5114) lr 1.5358e-03 eta 7:09:39 +epoch [18/50] batch [60/500] time 1.529 (1.566) data 0.000 (0.014) loss 1.1016 (1.1746) acc 68.7500 (71.0938) lr 1.5358e-03 eta 7:09:07 +epoch [18/50] batch [65/500] time 1.548 (1.568) data 0.000 (0.013) loss 1.3057 (1.1602) acc 68.7500 (71.2500) lr 1.5358e-03 eta 7:09:26 +epoch [18/50] batch [70/500] time 1.570 (1.568) data 0.000 (0.012) loss 1.0801 (1.1502) acc 78.1250 (71.6964) lr 1.5358e-03 eta 7:09:15 +epoch [18/50] batch [75/500] time 1.570 (1.568) data 0.000 (0.011) loss 0.8594 (1.1398) acc 68.7500 (71.7917) lr 1.5358e-03 eta 7:09:11 +epoch [18/50] batch [80/500] time 1.569 (1.568) data 0.000 (0.010) loss 1.7754 (1.1322) acc 56.2500 (71.9141) lr 1.5358e-03 eta 7:09:09 +epoch [18/50] batch [85/500] time 1.558 (1.568) data 0.001 (0.010) loss 0.6167 (1.1264) acc 84.3750 (71.9118) lr 1.5358e-03 eta 7:08:58 +epoch [18/50] batch [90/500] time 1.553 (1.568) data 0.000 (0.009) loss 1.2471 (1.1302) acc 68.7500 (71.9792) lr 1.5358e-03 eta 7:08:45 +epoch [18/50] batch [95/500] time 1.569 (1.568) data 0.000 (0.009) loss 0.8018 (1.1337) acc 84.3750 (71.8421) lr 1.5358e-03 eta 7:08:37 +epoch [18/50] batch [100/500] time 1.587 (1.567) data 0.000 (0.008) loss 1.0459 (1.1367) acc 75.0000 (71.8438) lr 1.5358e-03 eta 7:08:26 +epoch [18/50] batch [105/500] time 1.551 (1.567) data 0.001 (0.008) loss 0.6592 (1.1358) acc 84.3750 (71.7262) lr 1.5358e-03 eta 7:08:11 +epoch [18/50] batch [110/500] time 1.552 (1.568) data 0.000 (0.008) loss 0.6543 (1.1379) acc 78.1250 (71.6761) lr 1.5358e-03 eta 7:08:20 +epoch [18/50] batch [115/500] time 1.583 (1.568) data 0.000 (0.007) loss 1.1182 (1.1408) acc 78.1250 (71.7935) lr 1.5358e-03 eta 7:08:11 +epoch [18/50] batch [120/500] time 1.596 (1.568) data 0.001 (0.007) loss 1.1035 (1.1326) acc 68.7500 (71.8750) lr 1.5358e-03 eta 7:08:02 +epoch [18/50] batch [125/500] time 1.554 (1.567) data 0.001 (0.007) loss 1.0840 (1.1358) acc 78.1250 (71.9250) lr 1.5358e-03 eta 7:07:41 +epoch [18/50] batch [130/500] time 1.564 (1.567) data 0.000 (0.007) loss 1.2461 (1.1296) acc 62.5000 (71.9471) lr 1.5358e-03 eta 7:07:26 +epoch [18/50] batch [135/500] time 1.578 (1.567) data 0.000 (0.006) loss 0.8096 (1.1299) acc 81.2500 (72.0602) lr 1.5358e-03 eta 7:07:16 +epoch [18/50] batch [140/500] time 1.572 (1.566) data 0.000 (0.006) loss 1.1689 (1.1251) acc 68.7500 (72.0312) lr 1.5358e-03 eta 7:07:02 +epoch [18/50] batch [145/500] time 1.557 (1.566) data 0.000 (0.006) loss 1.4229 (1.1316) acc 65.6250 (71.8319) lr 1.5358e-03 eta 7:06:51 +epoch [18/50] batch [150/500] time 1.578 (1.566) data 0.001 (0.006) loss 1.2432 (1.1309) acc 75.0000 (71.8125) lr 1.5358e-03 eta 7:06:40 +epoch [18/50] batch [155/500] time 1.560 (1.565) data 0.001 (0.006) loss 1.5107 (1.1307) acc 65.6250 (71.8952) lr 1.5358e-03 eta 7:06:26 +epoch [18/50] batch [160/500] time 1.541 (1.565) data 0.000 (0.005) loss 0.8667 (1.1341) acc 68.7500 (71.7383) lr 1.5358e-03 eta 7:06:11 +epoch [18/50] batch [165/500] time 1.554 (1.565) data 0.000 (0.005) loss 1.0361 (1.1356) acc 68.7500 (71.7992) lr 1.5358e-03 eta 7:06:03 +epoch [18/50] batch [170/500] time 1.572 (1.565) data 0.000 (0.005) loss 0.9390 (1.1297) acc 81.2500 (71.9301) lr 1.5358e-03 eta 7:05:52 +epoch [18/50] batch [175/500] time 1.556 (1.565) data 0.001 (0.005) loss 0.6533 (1.1252) acc 78.1250 (72.0536) lr 1.5358e-03 eta 7:05:43 +epoch [18/50] batch [180/500] time 1.576 (1.565) data 0.000 (0.005) loss 0.5225 (1.1149) acc 90.6250 (72.2917) lr 1.5358e-03 eta 7:05:36 +epoch [18/50] batch [185/500] time 1.557 (1.565) data 0.000 (0.005) loss 0.8311 (1.1143) acc 78.1250 (72.2297) lr 1.5358e-03 eta 7:05:27 +epoch [18/50] batch [190/500] time 1.569 (1.565) data 0.000 (0.005) loss 0.6631 (1.1110) acc 81.2500 (72.2697) lr 1.5358e-03 eta 7:05:21 +epoch [18/50] batch [195/500] time 1.561 (1.565) data 0.000 (0.004) loss 1.5469 (1.1123) acc 68.7500 (72.2115) lr 1.5358e-03 eta 7:05:12 +epoch [18/50] batch [200/500] time 1.552 (1.564) data 0.001 (0.004) loss 0.9775 (1.1118) acc 71.8750 (72.1562) lr 1.5358e-03 eta 7:04:59 +epoch [18/50] batch [205/500] time 1.649 (1.565) data 0.001 (0.004) loss 1.1572 (1.1110) acc 65.6250 (72.1646) lr 1.5358e-03 eta 7:04:57 +epoch [18/50] batch [210/500] time 1.572 (1.565) data 0.000 (0.004) loss 0.7817 (1.1065) acc 75.0000 (72.2470) lr 1.5358e-03 eta 7:04:48 +epoch [18/50] batch [215/500] time 1.573 (1.565) data 0.000 (0.004) loss 0.9507 (1.1046) acc 78.1250 (72.3256) lr 1.5358e-03 eta 7:04:41 +epoch [18/50] batch [220/500] time 1.549 (1.565) data 0.000 (0.004) loss 0.7520 (1.1036) acc 78.1250 (72.3011) lr 1.5358e-03 eta 7:04:30 +epoch [18/50] batch [225/500] time 1.551 (1.564) data 0.000 (0.004) loss 1.8828 (1.1080) acc 56.2500 (72.2083) lr 1.5358e-03 eta 7:04:20 +epoch [18/50] batch [230/500] time 1.571 (1.564) data 0.000 (0.004) loss 0.9883 (1.1075) acc 71.8750 (72.1603) lr 1.5358e-03 eta 7:04:11 +epoch [18/50] batch [235/500] time 1.566 (1.564) data 0.000 (0.004) loss 1.0605 (1.1085) acc 68.7500 (72.2074) lr 1.5358e-03 eta 7:04:00 +epoch [18/50] batch [240/500] time 1.550 (1.564) data 0.000 (0.004) loss 1.0352 (1.1079) acc 75.0000 (72.2917) lr 1.5358e-03 eta 7:03:49 +epoch [18/50] batch [245/500] time 1.547 (1.564) data 0.000 (0.004) loss 0.9736 (1.1062) acc 87.5000 (72.4235) lr 1.5358e-03 eta 7:03:39 +epoch [18/50] batch [250/500] time 1.535 (1.564) data 0.000 (0.004) loss 0.6528 (1.1046) acc 78.1250 (72.4250) lr 1.5358e-03 eta 7:03:31 +epoch [18/50] batch [255/500] time 1.556 (1.564) data 0.000 (0.004) loss 0.6055 (1.0996) acc 78.1250 (72.4755) lr 1.5358e-03 eta 7:03:21 +epoch [18/50] batch [260/500] time 1.541 (1.563) data 0.000 (0.003) loss 1.6045 (1.1048) acc 62.5000 (72.3918) lr 1.5358e-03 eta 7:03:08 +epoch [18/50] batch [265/500] time 1.532 (1.563) data 0.000 (0.003) loss 1.0830 (1.1032) acc 78.1250 (72.4528) lr 1.5358e-03 eta 7:02:59 +epoch [18/50] batch [270/500] time 1.542 (1.563) data 0.000 (0.003) loss 0.7207 (1.1011) acc 84.3750 (72.5116) lr 1.5358e-03 eta 7:02:48 +epoch [18/50] batch [275/500] time 1.571 (1.563) data 0.001 (0.003) loss 1.4658 (1.1024) acc 71.8750 (72.4545) lr 1.5358e-03 eta 7:02:40 +epoch [18/50] batch [280/500] time 1.566 (1.563) data 0.000 (0.003) loss 1.6650 (1.1065) acc 75.0000 (72.4219) lr 1.5358e-03 eta 7:02:33 +epoch [18/50] batch [285/500] time 1.554 (1.563) data 0.000 (0.003) loss 1.2178 (1.1071) acc 62.5000 (72.4232) lr 1.5358e-03 eta 7:02:24 +epoch [18/50] batch [290/500] time 1.547 (1.563) data 0.000 (0.003) loss 1.1123 (1.1080) acc 75.0000 (72.3599) lr 1.5358e-03 eta 7:02:14 +epoch [18/50] batch [295/500] time 1.537 (1.563) data 0.000 (0.003) loss 0.7446 (1.1048) acc 81.2500 (72.3941) lr 1.5358e-03 eta 7:02:05 +epoch [18/50] batch [300/500] time 1.574 (1.563) data 0.000 (0.003) loss 0.7012 (1.1001) acc 81.2500 (72.4583) lr 1.5358e-03 eta 7:01:55 +epoch [18/50] batch [305/500] time 1.565 (1.563) data 0.000 (0.003) loss 1.0566 (1.1036) acc 78.1250 (72.4488) lr 1.5358e-03 eta 7:01:46 +epoch [18/50] batch [310/500] time 1.560 (1.563) data 0.000 (0.003) loss 1.3438 (1.1029) acc 65.6250 (72.4395) lr 1.5358e-03 eta 7:01:38 +epoch [18/50] batch [315/500] time 1.547 (1.563) data 0.000 (0.003) loss 1.0918 (1.1065) acc 75.0000 (72.3710) lr 1.5358e-03 eta 7:01:29 +epoch [18/50] batch [320/500] time 1.555 (1.562) data 0.000 (0.003) loss 0.7876 (1.1097) acc 81.2500 (72.2949) lr 1.5358e-03 eta 7:01:20 +epoch [18/50] batch [325/500] time 1.560 (1.562) data 0.000 (0.003) loss 0.9990 (1.1129) acc 81.2500 (72.2404) lr 1.5358e-03 eta 7:01:13 +epoch [18/50] batch [330/500] time 1.574 (1.562) data 0.000 (0.003) loss 1.5518 (1.1154) acc 71.8750 (72.2064) lr 1.5358e-03 eta 7:01:05 +epoch [18/50] batch [335/500] time 1.571 (1.562) data 0.000 (0.003) loss 1.8096 (1.1189) acc 50.0000 (72.1642) lr 1.5358e-03 eta 7:00:56 +epoch [18/50] batch [340/500] time 1.541 (1.562) data 0.000 (0.003) loss 1.0840 (1.1199) acc 71.8750 (72.1324) lr 1.5358e-03 eta 7:00:49 +epoch [18/50] batch [345/500] time 1.568 (1.562) data 0.000 (0.003) loss 0.9561 (1.1200) acc 68.7500 (72.0833) lr 1.5358e-03 eta 7:00:41 +epoch [18/50] batch [350/500] time 1.568 (1.563) data 0.000 (0.003) loss 0.8853 (1.1211) acc 81.2500 (72.0536) lr 1.5358e-03 eta 7:00:36 +epoch [18/50] batch [355/500] time 1.548 (1.563) data 0.000 (0.003) loss 1.0371 (1.1201) acc 75.0000 (72.0687) lr 1.5358e-03 eta 7:00:29 +epoch [18/50] batch [360/500] time 1.547 (1.563) data 0.000 (0.003) loss 1.0361 (1.1190) acc 71.8750 (72.1354) lr 1.5358e-03 eta 7:00:20 +epoch [18/50] batch [365/500] time 1.566 (1.563) data 0.000 (0.003) loss 0.9805 (1.1175) acc 71.8750 (72.1575) lr 1.5358e-03 eta 7:00:11 +epoch [18/50] batch [370/500] time 1.562 (1.562) data 0.000 (0.003) loss 0.9810 (1.1225) acc 75.0000 (72.0355) lr 1.5358e-03 eta 7:00:02 +epoch [18/50] batch [375/500] time 1.586 (1.563) data 0.000 (0.003) loss 0.7661 (1.1202) acc 87.5000 (72.0917) lr 1.5358e-03 eta 6:59:55 +epoch [18/50] batch [380/500] time 1.557 (1.562) data 0.000 (0.002) loss 0.6211 (1.1189) acc 81.2500 (72.0888) lr 1.5358e-03 eta 6:59:47 +epoch [18/50] batch [385/500] time 1.571 (1.563) data 0.000 (0.002) loss 0.9189 (1.1193) acc 71.8750 (72.0211) lr 1.5358e-03 eta 6:59:40 +epoch [18/50] batch [390/500] time 1.578 (1.563) data 0.000 (0.002) loss 1.0967 (1.1191) acc 78.1250 (72.0673) lr 1.5358e-03 eta 6:59:34 +epoch [18/50] batch [395/500] time 1.560 (1.563) data 0.000 (0.002) loss 1.0439 (1.1188) acc 75.0000 (72.0886) lr 1.5358e-03 eta 6:59:29 +epoch [18/50] batch [400/500] time 1.592 (1.563) data 0.001 (0.002) loss 1.4072 (1.1181) acc 71.8750 (72.1250) lr 1.5358e-03 eta 6:59:20 +epoch [18/50] batch [405/500] time 1.545 (1.563) data 0.000 (0.002) loss 0.9575 (1.1205) acc 71.8750 (72.0910) lr 1.5358e-03 eta 6:59:13 +epoch [18/50] batch [410/500] time 1.600 (1.563) data 0.001 (0.002) loss 1.1729 (1.1201) acc 75.0000 (72.1037) lr 1.5358e-03 eta 6:59:05 +epoch [18/50] batch [415/500] time 1.566 (1.563) data 0.001 (0.002) loss 0.7344 (1.1188) acc 87.5000 (72.1461) lr 1.5358e-03 eta 6:58:58 +epoch [18/50] batch [420/500] time 1.579 (1.563) data 0.000 (0.002) loss 1.1670 (1.1183) acc 71.8750 (72.1280) lr 1.5358e-03 eta 6:58:51 +epoch [18/50] batch [425/500] time 1.568 (1.563) data 0.000 (0.002) loss 0.9346 (1.1212) acc 71.8750 (72.0515) lr 1.5358e-03 eta 6:58:46 +epoch [18/50] batch [430/500] time 1.571 (1.563) data 0.001 (0.002) loss 0.7925 (1.1179) acc 81.2500 (72.1294) lr 1.5358e-03 eta 6:58:37 +epoch [18/50] batch [435/500] time 1.575 (1.563) data 0.000 (0.002) loss 1.2793 (1.1184) acc 68.7500 (72.1408) lr 1.5358e-03 eta 6:58:30 +epoch [18/50] batch [440/500] time 1.577 (1.563) data 0.000 (0.002) loss 0.6792 (1.1161) acc 81.2500 (72.1733) lr 1.5358e-03 eta 6:58:23 +epoch [18/50] batch [445/500] time 1.586 (1.563) data 0.000 (0.002) loss 1.2334 (1.1145) acc 68.7500 (72.2121) lr 1.5358e-03 eta 6:58:15 +epoch [18/50] batch [450/500] time 1.577 (1.563) data 0.000 (0.002) loss 1.1475 (1.1140) acc 71.8750 (72.2153) lr 1.5358e-03 eta 6:58:05 +epoch [18/50] batch [455/500] time 1.556 (1.563) data 0.000 (0.002) loss 1.1064 (1.1163) acc 68.7500 (72.1566) lr 1.5358e-03 eta 6:57:57 +epoch [18/50] batch [460/500] time 1.571 (1.563) data 0.001 (0.002) loss 0.8760 (1.1167) acc 81.2500 (72.2079) lr 1.5358e-03 eta 6:57:46 +epoch [18/50] batch [465/500] time 1.572 (1.563) data 0.001 (0.002) loss 1.0615 (1.1188) acc 71.8750 (72.1371) lr 1.5358e-03 eta 6:57:38 +epoch [18/50] batch [470/500] time 1.549 (1.563) data 0.000 (0.002) loss 0.6748 (1.1186) acc 81.2500 (72.1809) lr 1.5358e-03 eta 6:57:28 +epoch [18/50] batch [475/500] time 1.571 (1.563) data 0.000 (0.002) loss 1.4893 (1.1175) acc 65.6250 (72.2171) lr 1.5358e-03 eta 6:57:19 +epoch [18/50] batch [480/500] time 1.557 (1.562) data 0.000 (0.002) loss 1.1875 (1.1168) acc 75.0000 (72.2266) lr 1.5358e-03 eta 6:57:09 +epoch [18/50] batch [485/500] time 1.534 (1.562) data 0.001 (0.002) loss 1.3408 (1.1166) acc 71.8750 (72.2165) lr 1.5358e-03 eta 6:56:59 +epoch [18/50] batch [490/500] time 1.528 (1.562) data 0.000 (0.002) loss 1.2812 (1.1177) acc 65.6250 (72.1811) lr 1.5358e-03 eta 6:56:49 +epoch [18/50] batch [495/500] time 1.526 (1.562) data 0.000 (0.002) loss 1.0635 (1.1159) acc 84.3750 (72.2601) lr 1.5358e-03 eta 6:56:41 +epoch [18/50] batch [500/500] time 1.544 (1.562) data 0.000 (0.002) loss 0.9624 (1.1152) acc 78.1250 (72.2313) lr 1.4818e-03 eta 6:56:31 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,987 +* accuracy: 78.0% +* error: 22.0% +* macro_f1: 77.5% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar +epoch [19/50] batch [5/500] time 1.549 (1.671) data 0.000 (0.168) loss 0.8496 (1.2457) acc 71.8750 (65.6250) lr 1.4818e-03 eta 7:25:28 +epoch [19/50] batch [10/500] time 1.556 (1.618) data 0.000 (0.084) loss 0.7856 (1.1403) acc 81.2500 (68.7500) lr 1.4818e-03 eta 7:11:05 +epoch [19/50] batch [15/500] time 1.554 (1.596) data 0.000 (0.056) loss 1.3994 (1.1925) acc 68.7500 (69.5833) lr 1.4818e-03 eta 7:05:06 +epoch [19/50] batch [20/500] time 1.566 (1.588) data 0.000 (0.042) loss 0.9438 (1.1386) acc 75.0000 (70.7812) lr 1.4818e-03 eta 7:02:48 +epoch [19/50] batch [25/500] time 1.567 (1.582) data 0.000 (0.034) loss 1.0029 (1.1531) acc 78.1250 (70.2500) lr 1.4818e-03 eta 7:01:11 +epoch [19/50] batch [30/500] time 1.587 (1.581) data 0.000 (0.028) loss 0.6016 (1.1156) acc 78.1250 (71.0417) lr 1.4818e-03 eta 7:00:44 +epoch [19/50] batch [35/500] time 1.574 (1.580) data 0.000 (0.024) loss 0.3547 (1.0906) acc 93.7500 (72.0536) lr 1.4818e-03 eta 7:00:27 +epoch [19/50] batch [40/500] time 1.573 (1.583) data 0.000 (0.021) loss 1.7275 (1.1554) acc 68.7500 (71.1719) lr 1.4818e-03 eta 7:00:57 +epoch [19/50] batch [45/500] time 1.578 (1.581) data 0.001 (0.019) loss 1.1250 (1.1459) acc 71.8750 (71.7361) lr 1.4818e-03 eta 7:00:29 +epoch [19/50] batch [50/500] time 1.588 (1.582) data 0.000 (0.017) loss 0.9639 (1.1094) acc 75.0000 (72.6250) lr 1.4818e-03 eta 7:00:30 +epoch [19/50] batch [55/500] time 1.579 (1.581) data 0.001 (0.016) loss 1.0820 (1.0930) acc 68.7500 (72.7841) lr 1.4818e-03 eta 7:00:09 +epoch [19/50] batch [60/500] time 1.563 (1.580) data 0.000 (0.014) loss 1.3232 (1.1039) acc 62.5000 (72.2917) lr 1.4818e-03 eta 6:59:46 +epoch [19/50] batch [65/500] time 1.577 (1.580) data 0.000 (0.013) loss 1.0547 (1.1083) acc 75.0000 (72.2596) lr 1.4818e-03 eta 6:59:32 +epoch [19/50] batch [70/500] time 1.545 (1.578) data 0.001 (0.012) loss 0.7002 (1.1006) acc 78.1250 (72.4107) lr 1.4818e-03 eta 6:59:01 +epoch [19/50] batch [75/500] time 1.541 (1.577) data 0.000 (0.012) loss 0.6987 (1.0867) acc 78.1250 (72.6667) lr 1.4818e-03 eta 6:58:30 +epoch [19/50] batch [80/500] time 1.542 (1.577) data 0.000 (0.011) loss 0.8994 (1.0936) acc 75.0000 (72.6172) lr 1.4818e-03 eta 6:58:24 +epoch [19/50] batch [85/500] time 1.579 (1.577) data 0.000 (0.010) loss 1.3145 (1.0985) acc 68.7500 (72.2794) lr 1.4818e-03 eta 6:58:10 +epoch [19/50] batch [90/500] time 1.552 (1.575) data 0.000 (0.010) loss 1.6055 (1.1117) acc 65.6250 (72.0139) lr 1.4818e-03 eta 6:57:39 +epoch [19/50] batch [95/500] time 1.558 (1.575) data 0.000 (0.009) loss 0.9209 (1.1063) acc 81.2500 (72.1382) lr 1.4818e-03 eta 6:57:27 +epoch [19/50] batch [100/500] time 1.544 (1.573) data 0.001 (0.009) loss 1.1953 (1.1101) acc 71.8750 (72.0312) lr 1.4818e-03 eta 6:56:58 +epoch [19/50] batch [105/500] time 1.557 (1.573) data 0.001 (0.008) loss 0.9639 (1.1070) acc 78.1250 (72.1726) lr 1.4818e-03 eta 6:56:37 +epoch [19/50] batch [110/500] time 1.528 (1.571) data 0.000 (0.008) loss 1.2803 (1.1113) acc 71.8750 (72.3580) lr 1.4818e-03 eta 6:56:05 +epoch [19/50] batch [115/500] time 1.548 (1.570) data 0.000 (0.008) loss 0.7803 (1.1065) acc 68.7500 (72.3641) lr 1.4818e-03 eta 6:55:43 +epoch [19/50] batch [120/500] time 1.548 (1.570) data 0.001 (0.007) loss 1.0469 (1.1110) acc 78.1250 (72.2656) lr 1.4818e-03 eta 6:55:29 +epoch [19/50] batch [125/500] time 1.584 (1.570) data 0.001 (0.007) loss 0.9849 (1.1106) acc 78.1250 (72.4000) lr 1.4818e-03 eta 6:55:16 +epoch [19/50] batch [130/500] time 1.580 (1.570) data 0.000 (0.007) loss 0.9487 (1.1088) acc 68.7500 (72.3558) lr 1.4818e-03 eta 6:55:09 +epoch [19/50] batch [135/500] time 1.584 (1.570) data 0.000 (0.007) loss 1.2842 (1.1042) acc 68.7500 (72.4769) lr 1.4818e-03 eta 6:55:01 +epoch [19/50] batch [140/500] time 1.564 (1.570) data 0.000 (0.006) loss 0.6572 (1.1003) acc 78.1250 (72.5446) lr 1.4818e-03 eta 6:54:59 +epoch [19/50] batch [145/500] time 1.565 (1.569) data 0.000 (0.006) loss 0.5039 (1.1007) acc 84.3750 (72.6078) lr 1.4818e-03 eta 6:54:43 +epoch [19/50] batch [150/500] time 1.560 (1.569) data 0.000 (0.006) loss 0.9131 (1.0931) acc 81.2500 (72.7083) lr 1.4818e-03 eta 6:54:31 +epoch [19/50] batch [155/500] time 1.563 (1.569) data 0.001 (0.006) loss 2.4180 (1.1057) acc 56.2500 (72.4395) lr 1.4818e-03 eta 6:54:22 +epoch [19/50] batch [160/500] time 1.553 (1.569) data 0.000 (0.006) loss 0.8838 (1.1019) acc 78.1250 (72.4023) lr 1.4818e-03 eta 6:54:08 +epoch [19/50] batch [165/500] time 1.534 (1.568) data 0.000 (0.006) loss 1.7236 (1.1076) acc 65.6250 (72.4621) lr 1.4818e-03 eta 6:53:56 +epoch [19/50] batch [170/500] time 1.576 (1.568) data 0.001 (0.005) loss 0.5205 (1.1041) acc 84.3750 (72.4265) lr 1.4818e-03 eta 6:53:45 +epoch [19/50] batch [175/500] time 1.554 (1.568) data 0.000 (0.005) loss 0.7949 (1.1009) acc 71.8750 (72.4107) lr 1.4818e-03 eta 6:53:30 +epoch [19/50] batch [180/500] time 1.549 (1.568) data 0.000 (0.005) loss 0.6836 (1.0990) acc 84.3750 (72.5521) lr 1.4818e-03 eta 6:53:19 +epoch [19/50] batch [185/500] time 1.565 (1.568) data 0.001 (0.005) loss 0.8906 (1.0945) acc 78.1250 (72.6014) lr 1.4818e-03 eta 6:53:15 +epoch [19/50] batch [190/500] time 1.553 (1.568) data 0.001 (0.005) loss 1.2305 (1.0947) acc 71.8750 (72.6151) lr 1.4818e-03 eta 6:53:06 +epoch [19/50] batch [195/500] time 1.553 (1.568) data 0.000 (0.005) loss 1.1709 (1.0898) acc 62.5000 (72.6923) lr 1.4818e-03 eta 6:52:56 +epoch [19/50] batch [200/500] time 1.552 (1.568) data 0.000 (0.005) loss 0.7783 (1.0883) acc 71.8750 (72.7188) lr 1.4818e-03 eta 6:52:46 +epoch [19/50] batch [205/500] time 1.572 (1.567) data 0.000 (0.005) loss 0.8237 (1.0859) acc 75.0000 (72.8354) lr 1.4818e-03 eta 6:52:33 +epoch [19/50] batch [210/500] time 1.552 (1.567) data 0.000 (0.004) loss 1.4609 (1.0970) acc 68.7500 (72.6637) lr 1.4818e-03 eta 6:52:22 +epoch [19/50] batch [215/500] time 1.569 (1.567) data 0.001 (0.004) loss 1.2031 (1.0988) acc 78.1250 (72.6744) lr 1.4818e-03 eta 6:52:11 +epoch [19/50] batch [220/500] time 1.543 (1.567) data 0.000 (0.004) loss 0.6113 (1.0973) acc 84.3750 (72.5994) lr 1.4818e-03 eta 6:52:00 +epoch [19/50] batch [225/500] time 1.556 (1.566) data 0.000 (0.004) loss 1.0596 (1.0997) acc 68.7500 (72.5000) lr 1.4818e-03 eta 6:51:49 +epoch [19/50] batch [230/500] time 1.574 (1.566) data 0.000 (0.004) loss 0.8120 (1.1007) acc 75.0000 (72.4457) lr 1.4818e-03 eta 6:51:41 +epoch [19/50] batch [235/500] time 1.554 (1.566) data 0.000 (0.004) loss 0.9028 (1.1015) acc 78.1250 (72.4734) lr 1.4818e-03 eta 6:51:31 +epoch [19/50] batch [240/500] time 1.557 (1.566) data 0.000 (0.004) loss 0.7842 (1.0998) acc 78.1250 (72.4609) lr 1.4818e-03 eta 6:51:21 +epoch [19/50] batch [245/500] time 1.558 (1.566) data 0.001 (0.004) loss 0.7471 (1.0970) acc 84.3750 (72.5128) lr 1.4818e-03 eta 6:51:09 +epoch [19/50] batch [250/500] time 1.564 (1.566) data 0.000 (0.004) loss 1.4043 (1.0945) acc 65.6250 (72.5750) lr 1.4818e-03 eta 6:51:00 +epoch [19/50] batch [255/500] time 1.551 (1.566) data 0.000 (0.004) loss 1.4199 (1.0977) acc 62.5000 (72.5490) lr 1.4818e-03 eta 6:50:54 +epoch [19/50] batch [260/500] time 1.541 (1.566) data 0.000 (0.004) loss 0.7236 (1.0955) acc 81.2500 (72.5481) lr 1.4818e-03 eta 6:50:43 +epoch [19/50] batch [265/500] time 1.563 (1.566) data 0.000 (0.004) loss 0.9692 (1.0955) acc 71.8750 (72.5118) lr 1.4818e-03 eta 6:50:34 +epoch [19/50] batch [270/500] time 1.568 (1.566) data 0.001 (0.004) loss 1.2295 (1.0948) acc 62.5000 (72.4537) lr 1.4818e-03 eta 6:50:25 +epoch [19/50] batch [275/500] time 1.545 (1.565) data 0.000 (0.003) loss 1.1973 (1.0943) acc 75.0000 (72.4773) lr 1.4818e-03 eta 6:50:13 +epoch [19/50] batch [280/500] time 1.638 (1.565) data 0.000 (0.003) loss 0.9780 (1.0936) acc 71.8750 (72.5000) lr 1.4818e-03 eta 6:50:05 +epoch [19/50] batch [285/500] time 1.557 (1.565) data 0.000 (0.003) loss 1.4961 (1.0957) acc 68.7500 (72.4781) lr 1.4818e-03 eta 6:49:55 +epoch [19/50] batch [290/500] time 1.567 (1.565) data 0.000 (0.003) loss 2.1523 (1.1009) acc 59.3750 (72.4461) lr 1.4818e-03 eta 6:49:48 +epoch [19/50] batch [295/500] time 1.573 (1.565) data 0.001 (0.003) loss 1.0791 (1.1008) acc 78.1250 (72.4682) lr 1.4818e-03 eta 6:49:38 +epoch [19/50] batch [300/500] time 1.552 (1.565) data 0.001 (0.003) loss 1.5049 (1.1021) acc 62.5000 (72.4479) lr 1.4818e-03 eta 6:49:28 +epoch [19/50] batch [305/500] time 1.550 (1.565) data 0.000 (0.003) loss 1.9014 (1.1036) acc 62.5000 (72.4590) lr 1.4818e-03 eta 6:49:18 +epoch [19/50] batch [310/500] time 1.561 (1.565) data 0.000 (0.003) loss 1.2178 (1.1055) acc 71.8750 (72.3790) lr 1.4818e-03 eta 6:49:09 +epoch [19/50] batch [315/500] time 1.554 (1.564) data 0.000 (0.003) loss 1.5459 (1.1086) acc 62.5000 (72.3413) lr 1.4818e-03 eta 6:48:57 +epoch [19/50] batch [320/500] time 1.522 (1.564) data 0.001 (0.003) loss 1.2500 (1.1109) acc 71.8750 (72.2852) lr 1.4818e-03 eta 6:48:46 +epoch [19/50] batch [325/500] time 1.563 (1.564) data 0.000 (0.003) loss 1.2812 (1.1129) acc 65.6250 (72.2788) lr 1.4818e-03 eta 6:48:41 +epoch [19/50] batch [330/500] time 1.562 (1.564) data 0.000 (0.003) loss 1.2822 (1.1132) acc 68.7500 (72.2633) lr 1.4818e-03 eta 6:48:32 +epoch [19/50] batch [335/500] time 1.560 (1.564) data 0.000 (0.003) loss 0.8135 (1.1155) acc 81.2500 (72.2481) lr 1.4818e-03 eta 6:48:21 +epoch [19/50] batch [340/500] time 1.561 (1.564) data 0.000 (0.003) loss 0.8706 (1.1155) acc 75.0000 (72.2518) lr 1.4818e-03 eta 6:48:10 +epoch [19/50] batch [345/500] time 1.535 (1.564) data 0.000 (0.003) loss 1.0215 (1.1142) acc 62.5000 (72.2192) lr 1.4818e-03 eta 6:48:00 +epoch [19/50] batch [350/500] time 1.564 (1.564) data 0.000 (0.003) loss 1.6641 (1.1143) acc 56.2500 (72.2143) lr 1.4818e-03 eta 6:47:51 +epoch [19/50] batch [355/500] time 1.527 (1.563) data 0.000 (0.003) loss 1.4824 (1.1178) acc 62.5000 (72.1039) lr 1.4818e-03 eta 6:47:40 +epoch [19/50] batch [360/500] time 1.541 (1.563) data 0.000 (0.003) loss 2.2109 (1.1204) acc 50.0000 (72.0660) lr 1.4818e-03 eta 6:47:28 +epoch [19/50] batch [365/500] time 1.571 (1.563) data 0.001 (0.003) loss 1.2852 (1.1188) acc 78.1250 (72.1832) lr 1.4818e-03 eta 6:47:20 +epoch [19/50] batch [370/500] time 1.573 (1.563) data 0.000 (0.003) loss 1.5049 (1.1197) acc 65.6250 (72.1791) lr 1.4818e-03 eta 6:47:11 +epoch [19/50] batch [375/500] time 1.574 (1.563) data 0.000 (0.003) loss 0.7837 (1.1167) acc 84.3750 (72.2417) lr 1.4818e-03 eta 6:47:03 +epoch [19/50] batch [380/500] time 1.577 (1.563) data 0.000 (0.003) loss 0.9136 (1.1162) acc 78.1250 (72.2615) lr 1.4818e-03 eta 6:46:54 +epoch [19/50] batch [385/500] time 1.560 (1.563) data 0.000 (0.003) loss 1.3428 (1.1158) acc 71.8750 (72.3295) lr 1.4818e-03 eta 6:46:48 +epoch [19/50] batch [390/500] time 1.555 (1.563) data 0.000 (0.003) loss 1.3721 (1.1202) acc 65.6250 (72.2356) lr 1.4818e-03 eta 6:46:40 +epoch [19/50] batch [395/500] time 1.548 (1.563) data 0.000 (0.003) loss 0.8911 (1.1232) acc 75.0000 (72.2231) lr 1.4818e-03 eta 6:46:29 +epoch [19/50] batch [400/500] time 1.563 (1.563) data 0.000 (0.003) loss 0.9229 (1.1261) acc 68.7500 (72.1797) lr 1.4818e-03 eta 6:46:22 +epoch [19/50] batch [405/500] time 1.555 (1.563) data 0.000 (0.002) loss 0.8955 (1.1239) acc 71.8750 (72.1836) lr 1.4818e-03 eta 6:46:13 +epoch [19/50] batch [410/500] time 1.566 (1.563) data 0.000 (0.002) loss 0.8145 (1.1212) acc 71.8750 (72.2027) lr 1.4818e-03 eta 6:46:06 +epoch [19/50] batch [415/500] time 1.577 (1.563) data 0.000 (0.002) loss 0.8091 (1.1184) acc 75.0000 (72.2364) lr 1.4818e-03 eta 6:46:00 +epoch [19/50] batch [420/500] time 1.560 (1.563) data 0.000 (0.002) loss 1.0479 (1.1178) acc 62.5000 (72.2024) lr 1.4818e-03 eta 6:45:51 +epoch [19/50] batch [425/500] time 1.548 (1.563) data 0.000 (0.002) loss 0.9819 (1.1188) acc 71.8750 (72.1838) lr 1.4818e-03 eta 6:45:45 +epoch [19/50] batch [430/500] time 1.576 (1.563) data 0.001 (0.002) loss 2.0977 (1.1213) acc 59.3750 (72.1148) lr 1.4818e-03 eta 6:45:36 +epoch [19/50] batch [435/500] time 1.546 (1.563) data 0.000 (0.002) loss 0.8560 (1.1217) acc 75.0000 (72.0905) lr 1.4818e-03 eta 6:45:28 +epoch [19/50] batch [440/500] time 1.557 (1.563) data 0.000 (0.002) loss 0.7354 (1.1197) acc 78.1250 (72.1307) lr 1.4818e-03 eta 6:45:20 +epoch [19/50] batch [445/500] time 1.567 (1.563) data 0.000 (0.002) loss 1.1729 (1.1215) acc 78.1250 (72.0927) lr 1.4818e-03 eta 6:45:13 +epoch [19/50] batch [450/500] time 1.578 (1.563) data 0.000 (0.002) loss 0.8994 (1.1183) acc 78.1250 (72.1667) lr 1.4818e-03 eta 6:45:04 +epoch [19/50] batch [455/500] time 1.572 (1.563) data 0.000 (0.002) loss 1.2236 (1.1189) acc 68.7500 (72.1223) lr 1.4818e-03 eta 6:44:57 +epoch [19/50] batch [460/500] time 1.556 (1.563) data 0.000 (0.002) loss 1.2568 (1.1201) acc 68.7500 (72.0720) lr 1.4818e-03 eta 6:44:49 +epoch [19/50] batch [465/500] time 1.529 (1.563) data 0.000 (0.002) loss 1.5146 (1.1201) acc 65.6250 (72.0833) lr 1.4818e-03 eta 6:44:41 +epoch [19/50] batch [470/500] time 1.550 (1.563) data 0.001 (0.002) loss 1.4785 (1.1199) acc 75.0000 (72.1410) lr 1.4818e-03 eta 6:44:35 +epoch [19/50] batch [475/500] time 1.561 (1.563) data 0.000 (0.002) loss 0.7231 (1.1181) acc 84.3750 (72.1908) lr 1.4818e-03 eta 6:44:28 +epoch [19/50] batch [480/500] time 1.542 (1.563) data 0.000 (0.002) loss 0.6470 (1.1151) acc 78.1250 (72.2201) lr 1.4818e-03 eta 6:44:18 +epoch [19/50] batch [485/500] time 1.541 (1.563) data 0.001 (0.002) loss 1.0820 (1.1145) acc 75.0000 (72.2487) lr 1.4818e-03 eta 6:44:11 +epoch [19/50] batch [490/500] time 1.565 (1.563) data 0.000 (0.002) loss 1.0576 (1.1133) acc 75.0000 (72.2513) lr 1.4818e-03 eta 6:44:01 +epoch [19/50] batch [495/500] time 1.573 (1.563) data 0.000 (0.002) loss 0.7896 (1.1120) acc 87.5000 (72.2854) lr 1.4818e-03 eta 6:43:53 +epoch [19/50] batch [500/500] time 1.575 (1.563) data 0.000 (0.002) loss 1.5312 (1.1123) acc 59.3750 (72.3000) lr 1.4258e-03 eta 6:43:45 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,984 +* accuracy: 78.0% +* error: 22.0% +* macro_f1: 77.5% +epoch [20/50] batch [5/500] time 1.536 (1.714) data 0.000 (0.214) loss 1.3018 (1.2977) acc 65.6250 (68.1250) lr 1.4258e-03 eta 7:22:36 +epoch [20/50] batch [10/500] time 1.551 (1.633) data 0.000 (0.107) loss 1.5371 (1.1150) acc 68.7500 (72.1875) lr 1.4258e-03 eta 7:01:35 +epoch [20/50] batch [15/500] time 1.567 (1.611) data 0.000 (0.072) loss 1.2793 (1.0517) acc 71.8750 (73.5417) lr 1.4258e-03 eta 6:55:49 +epoch [20/50] batch [20/500] time 1.554 (1.595) data 0.001 (0.054) loss 1.0234 (1.0387) acc 75.0000 (74.2188) lr 1.4258e-03 eta 6:51:33 +epoch [20/50] batch [25/500] time 1.723 (1.594) data 0.000 (0.043) loss 1.1572 (1.0395) acc 62.5000 (73.5000) lr 1.4258e-03 eta 6:51:07 +epoch [20/50] batch [30/500] time 1.582 (1.590) data 0.001 (0.036) loss 0.7954 (1.0005) acc 75.0000 (74.2708) lr 1.4258e-03 eta 6:50:03 +epoch [20/50] batch [35/500] time 1.546 (1.586) data 0.000 (0.031) loss 0.9839 (1.0185) acc 81.2500 (74.1071) lr 1.4258e-03 eta 6:48:45 +epoch [20/50] batch [40/500] time 1.561 (1.583) data 0.001 (0.027) loss 1.0439 (1.0485) acc 84.3750 (73.8281) lr 1.4258e-03 eta 6:47:55 +epoch [20/50] batch [45/500] time 1.572 (1.581) data 0.000 (0.024) loss 0.7373 (1.0373) acc 81.2500 (74.2361) lr 1.4258e-03 eta 6:47:20 +epoch [20/50] batch [50/500] time 1.543 (1.579) data 0.001 (0.022) loss 1.3682 (1.0553) acc 65.6250 (73.5625) lr 1.4258e-03 eta 6:46:33 +epoch [20/50] batch [55/500] time 1.560 (1.577) data 0.001 (0.020) loss 1.5107 (1.0697) acc 75.0000 (73.4659) lr 1.4258e-03 eta 6:46:03 +epoch [20/50] batch [60/500] time 1.546 (1.576) data 0.000 (0.018) loss 1.5703 (1.0924) acc 71.8750 (73.1771) lr 1.4258e-03 eta 6:45:37 +epoch [20/50] batch [65/500] time 1.561 (1.575) data 0.001 (0.017) loss 1.0400 (1.1057) acc 78.1250 (73.2212) lr 1.4258e-03 eta 6:45:12 +epoch [20/50] batch [70/500] time 1.568 (1.574) data 0.001 (0.016) loss 1.9287 (1.1217) acc 62.5000 (72.8571) lr 1.4258e-03 eta 6:44:45 +epoch [20/50] batch [75/500] time 1.560 (1.573) data 0.000 (0.015) loss 1.1143 (1.1335) acc 71.8750 (72.7500) lr 1.4258e-03 eta 6:44:29 +epoch [20/50] batch [80/500] time 1.556 (1.572) data 0.000 (0.014) loss 1.2949 (1.1210) acc 71.8750 (72.9688) lr 1.4258e-03 eta 6:44:07 +epoch [20/50] batch [85/500] time 1.633 (1.573) data 0.000 (0.013) loss 0.5850 (1.1186) acc 87.5000 (73.0147) lr 1.4258e-03 eta 6:44:06 +epoch [20/50] batch [90/500] time 1.568 (1.572) data 0.000 (0.012) loss 1.0684 (1.1252) acc 68.7500 (72.8472) lr 1.4258e-03 eta 6:43:48 +epoch [20/50] batch [95/500] time 1.610 (1.572) data 0.000 (0.012) loss 0.7832 (1.1276) acc 81.2500 (72.6974) lr 1.4258e-03 eta 6:43:42 +epoch [20/50] batch [100/500] time 1.572 (1.572) data 0.000 (0.011) loss 0.8672 (1.1314) acc 84.3750 (72.6875) lr 1.4258e-03 eta 6:43:32 +epoch [20/50] batch [105/500] time 1.589 (1.572) data 0.000 (0.011) loss 1.9316 (1.1272) acc 62.5000 (72.7381) lr 1.4258e-03 eta 6:43:28 +epoch [20/50] batch [110/500] time 1.547 (1.572) data 0.000 (0.010) loss 0.7661 (1.1156) acc 75.0000 (73.0114) lr 1.4258e-03 eta 6:43:12 +epoch [20/50] batch [115/500] time 1.555 (1.571) data 0.001 (0.010) loss 0.7012 (1.1099) acc 78.1250 (73.0978) lr 1.4258e-03 eta 6:42:57 +epoch [20/50] batch [120/500] time 1.593 (1.571) data 0.000 (0.009) loss 0.6943 (1.1009) acc 84.3750 (73.1510) lr 1.4258e-03 eta 6:42:49 +epoch [20/50] batch [125/500] time 1.544 (1.571) data 0.000 (0.009) loss 1.4639 (1.1005) acc 53.1250 (73.0250) lr 1.4258e-03 eta 6:42:31 +epoch [20/50] batch [130/500] time 1.540 (1.571) data 0.000 (0.009) loss 0.9121 (1.0979) acc 71.8750 (73.1250) lr 1.4258e-03 eta 6:42:22 +epoch [20/50] batch [135/500] time 1.549 (1.571) data 0.000 (0.008) loss 0.6685 (1.0951) acc 75.0000 (72.9630) lr 1.4258e-03 eta 6:42:10 +epoch [20/50] batch [140/500] time 1.577 (1.570) data 0.000 (0.008) loss 1.6113 (1.0983) acc 62.5000 (72.8125) lr 1.4258e-03 eta 6:42:01 +epoch [20/50] batch [145/500] time 1.554 (1.570) data 0.001 (0.008) loss 1.2461 (1.1000) acc 68.7500 (72.7371) lr 1.4258e-03 eta 6:41:46 +epoch [20/50] batch [150/500] time 1.540 (1.570) data 0.000 (0.008) loss 0.9897 (1.0979) acc 68.7500 (72.7917) lr 1.4258e-03 eta 6:41:32 +epoch [20/50] batch [155/500] time 1.534 (1.569) data 0.000 (0.007) loss 1.0049 (1.1001) acc 68.7500 (72.7419) lr 1.4258e-03 eta 6:41:19 +epoch [20/50] batch [160/500] time 1.538 (1.568) data 0.000 (0.007) loss 1.0342 (1.0965) acc 71.8750 (72.8906) lr 1.4258e-03 eta 6:40:59 +epoch [20/50] batch [165/500] time 1.551 (1.568) data 0.000 (0.007) loss 0.9453 (1.0920) acc 71.8750 (72.9924) lr 1.4258e-03 eta 6:40:43 +epoch [20/50] batch [170/500] time 1.549 (1.568) data 0.000 (0.007) loss 1.4482 (1.0966) acc 65.6250 (72.9963) lr 1.4258e-03 eta 6:40:30 +epoch [20/50] batch [175/500] time 1.550 (1.567) data 0.000 (0.007) loss 1.3193 (1.0997) acc 68.7500 (72.9286) lr 1.4258e-03 eta 6:40:15 +epoch [20/50] batch [180/500] time 1.565 (1.567) data 0.000 (0.006) loss 0.8901 (1.0987) acc 78.1250 (72.8993) lr 1.4258e-03 eta 6:40:07 +epoch [20/50] batch [185/500] time 1.566 (1.567) data 0.000 (0.006) loss 1.3066 (1.1008) acc 68.7500 (72.8378) lr 1.4258e-03 eta 6:39:59 +epoch [20/50] batch [190/500] time 1.541 (1.567) data 0.000 (0.006) loss 0.7046 (1.0982) acc 81.2500 (72.9276) lr 1.4258e-03 eta 6:39:48 +epoch [20/50] batch [195/500] time 1.576 (1.567) data 0.000 (0.006) loss 0.8584 (1.0989) acc 68.7500 (72.9167) lr 1.4258e-03 eta 6:39:40 +epoch [20/50] batch [200/500] time 1.557 (1.566) data 0.000 (0.006) loss 1.0908 (1.0984) acc 62.5000 (72.8438) lr 1.4258e-03 eta 6:39:25 +epoch [20/50] batch [205/500] time 1.532 (1.566) data 0.000 (0.006) loss 0.7866 (1.0943) acc 78.1250 (72.8963) lr 1.4258e-03 eta 6:39:11 +epoch [20/50] batch [210/500] time 1.549 (1.566) data 0.000 (0.006) loss 0.7007 (1.0949) acc 71.8750 (72.9167) lr 1.4258e-03 eta 6:39:00 +epoch [20/50] batch [215/500] time 1.568 (1.566) data 0.000 (0.005) loss 1.0469 (1.0901) acc 71.8750 (73.0378) lr 1.4258e-03 eta 6:38:51 +epoch [20/50] batch [220/500] time 1.558 (1.566) data 0.000 (0.005) loss 1.0225 (1.0905) acc 75.0000 (73.0256) lr 1.4258e-03 eta 6:38:42 +epoch [20/50] batch [225/500] time 1.558 (1.566) data 0.000 (0.005) loss 1.0820 (1.0950) acc 65.6250 (72.8889) lr 1.4258e-03 eta 6:38:34 +epoch [20/50] batch [230/500] time 1.574 (1.566) data 0.000 (0.005) loss 0.5239 (1.0973) acc 78.1250 (72.7853) lr 1.4258e-03 eta 6:38:32 +epoch [20/50] batch [235/500] time 1.553 (1.566) data 0.001 (0.005) loss 1.5703 (1.0975) acc 65.6250 (72.7926) lr 1.4258e-03 eta 6:38:22 +epoch [20/50] batch [240/500] time 1.579 (1.566) data 0.000 (0.005) loss 1.5107 (1.1020) acc 71.8750 (72.7604) lr 1.4258e-03 eta 6:38:13 +epoch [20/50] batch [245/500] time 1.599 (1.566) data 0.000 (0.005) loss 0.8867 (1.1033) acc 75.0000 (72.7041) lr 1.4258e-03 eta 6:38:08 +epoch [20/50] batch [250/500] time 1.591 (1.566) data 0.000 (0.005) loss 1.3369 (1.1071) acc 78.1250 (72.6375) lr 1.4258e-03 eta 6:38:01 +epoch [20/50] batch [255/500] time 1.568 (1.566) data 0.000 (0.005) loss 1.6455 (1.1094) acc 65.6250 (72.6471) lr 1.4258e-03 eta 6:37:51 +epoch [20/50] batch [260/500] time 1.553 (1.566) data 0.000 (0.005) loss 0.9546 (1.1075) acc 71.8750 (72.5841) lr 1.4258e-03 eta 6:37:38 +epoch [20/50] batch [265/500] time 1.557 (1.565) data 0.000 (0.004) loss 0.9829 (1.1083) acc 71.8750 (72.5472) lr 1.4258e-03 eta 6:37:30 +epoch [20/50] batch [270/500] time 1.575 (1.565) data 0.000 (0.004) loss 0.8560 (1.1070) acc 65.6250 (72.5116) lr 1.4258e-03 eta 6:37:21 +epoch [20/50] batch [275/500] time 1.589 (1.566) data 0.000 (0.004) loss 0.9268 (1.1089) acc 65.6250 (72.4205) lr 1.4258e-03 eta 6:37:18 +epoch [20/50] batch [280/500] time 1.566 (1.566) data 0.000 (0.004) loss 0.8169 (1.1098) acc 75.0000 (72.4107) lr 1.4258e-03 eta 6:37:12 +epoch [20/50] batch [285/500] time 1.581 (1.566) data 0.000 (0.004) loss 1.1953 (1.1136) acc 62.5000 (72.2807) lr 1.4258e-03 eta 6:37:05 +epoch [20/50] batch [290/500] time 1.565 (1.566) data 0.001 (0.004) loss 1.7520 (1.1169) acc 65.6250 (72.2414) lr 1.4258e-03 eta 6:36:56 +epoch [20/50] batch [295/500] time 1.542 (1.566) data 0.000 (0.004) loss 0.4651 (1.1155) acc 87.5000 (72.3093) lr 1.4258e-03 eta 6:36:47 +epoch [20/50] batch [300/500] time 1.555 (1.566) data 0.000 (0.004) loss 0.4658 (1.1126) acc 84.3750 (72.3333) lr 1.4258e-03 eta 6:36:40 +epoch [20/50] batch [305/500] time 1.550 (1.566) data 0.000 (0.004) loss 1.2900 (1.1165) acc 65.6250 (72.2541) lr 1.4258e-03 eta 6:36:30 +epoch [20/50] batch [310/500] time 1.536 (1.565) data 0.000 (0.004) loss 1.1104 (1.1181) acc 75.0000 (72.2480) lr 1.4258e-03 eta 6:36:19 +epoch [20/50] batch [315/500] time 1.585 (1.565) data 0.001 (0.004) loss 1.2529 (1.1155) acc 68.7500 (72.3016) lr 1.4258e-03 eta 6:36:10 +epoch [20/50] batch [320/500] time 1.541 (1.565) data 0.001 (0.004) loss 0.8091 (1.1122) acc 75.0000 (72.4023) lr 1.4258e-03 eta 6:36:02 +epoch [20/50] batch [325/500] time 1.563 (1.565) data 0.000 (0.004) loss 0.9248 (1.1097) acc 81.2500 (72.4231) lr 1.4258e-03 eta 6:35:54 +epoch [20/50] batch [330/500] time 1.555 (1.565) data 0.001 (0.004) loss 1.5225 (1.1126) acc 65.6250 (72.3958) lr 1.4258e-03 eta 6:35:48 +epoch [20/50] batch [335/500] time 1.562 (1.566) data 0.001 (0.004) loss 1.4600 (1.1140) acc 65.6250 (72.3507) lr 1.4258e-03 eta 6:35:41 +epoch [20/50] batch [340/500] time 1.537 (1.565) data 0.000 (0.004) loss 0.6689 (1.1121) acc 78.1250 (72.3529) lr 1.4258e-03 eta 6:35:31 +epoch [20/50] batch [345/500] time 1.554 (1.565) data 0.000 (0.004) loss 0.9985 (1.1114) acc 71.8750 (72.3551) lr 1.4258e-03 eta 6:35:20 +epoch [20/50] batch [350/500] time 1.559 (1.565) data 0.000 (0.004) loss 0.8374 (1.1094) acc 78.1250 (72.3661) lr 1.4258e-03 eta 6:35:10 +epoch [20/50] batch [355/500] time 1.580 (1.565) data 0.001 (0.003) loss 0.9453 (1.1075) acc 75.0000 (72.4032) lr 1.4258e-03 eta 6:35:02 +epoch [20/50] batch [360/500] time 1.568 (1.565) data 0.000 (0.003) loss 1.3506 (1.1074) acc 75.0000 (72.4392) lr 1.4258e-03 eta 6:34:52 +epoch [20/50] batch [365/500] time 1.535 (1.565) data 0.000 (0.003) loss 1.2266 (1.1053) acc 71.8750 (72.4572) lr 1.4258e-03 eta 6:34:42 +epoch [20/50] batch [370/500] time 1.575 (1.565) data 0.000 (0.003) loss 0.9653 (1.1043) acc 71.8750 (72.4662) lr 1.4258e-03 eta 6:34:33 +epoch [20/50] batch [375/500] time 1.541 (1.565) data 0.000 (0.003) loss 1.0469 (1.1051) acc 71.8750 (72.4583) lr 1.4258e-03 eta 6:34:27 +epoch [20/50] batch [380/500] time 1.550 (1.565) data 0.001 (0.003) loss 0.7026 (1.1043) acc 84.3750 (72.5329) lr 1.4258e-03 eta 6:34:18 +epoch [20/50] batch [385/500] time 1.559 (1.565) data 0.000 (0.003) loss 1.1641 (1.1054) acc 71.8750 (72.4675) lr 1.4258e-03 eta 6:34:09 +epoch [20/50] batch [390/500] time 1.559 (1.565) data 0.000 (0.003) loss 1.6787 (1.1046) acc 68.7500 (72.5000) lr 1.4258e-03 eta 6:34:03 +epoch [20/50] batch [395/500] time 1.554 (1.565) data 0.000 (0.003) loss 1.1201 (1.1036) acc 68.7500 (72.4842) lr 1.4258e-03 eta 6:33:53 +epoch [20/50] batch [400/500] time 1.554 (1.565) data 0.000 (0.003) loss 0.7944 (1.1035) acc 71.8750 (72.4062) lr 1.4258e-03 eta 6:33:45 +epoch [20/50] batch [405/500] time 1.549 (1.565) data 0.000 (0.003) loss 1.0703 (1.1024) acc 75.0000 (72.4769) lr 1.4258e-03 eta 6:33:36 +epoch [20/50] batch [410/500] time 1.575 (1.565) data 0.000 (0.003) loss 1.5410 (1.1023) acc 65.6250 (72.4695) lr 1.4258e-03 eta 6:33:29 +epoch [20/50] batch [415/500] time 1.664 (1.565) data 0.000 (0.003) loss 0.8945 (1.1021) acc 78.1250 (72.5075) lr 1.4258e-03 eta 6:33:24 +epoch [20/50] batch [420/500] time 1.542 (1.565) data 0.000 (0.003) loss 1.1807 (1.1018) acc 68.7500 (72.5446) lr 1.4258e-03 eta 6:33:16 +epoch [20/50] batch [425/500] time 1.541 (1.565) data 0.001 (0.003) loss 1.1641 (1.1016) acc 68.7500 (72.5221) lr 1.4258e-03 eta 6:33:06 +epoch [20/50] batch [430/500] time 1.567 (1.565) data 0.001 (0.003) loss 0.5742 (1.0988) acc 87.5000 (72.5945) lr 1.4258e-03 eta 6:32:57 +epoch [20/50] batch [435/500] time 1.565 (1.565) data 0.000 (0.003) loss 0.8242 (1.0956) acc 84.3750 (72.6580) lr 1.4258e-03 eta 6:32:51 +epoch [20/50] batch [440/500] time 1.569 (1.565) data 0.000 (0.003) loss 1.2334 (1.0988) acc 75.0000 (72.5710) lr 1.4258e-03 eta 6:32:42 +epoch [20/50] batch [445/500] time 1.556 (1.565) data 0.001 (0.003) loss 1.0908 (1.1006) acc 75.0000 (72.5351) lr 1.4258e-03 eta 6:32:34 +epoch [20/50] batch [450/500] time 1.585 (1.565) data 0.000 (0.003) loss 0.9580 (1.1007) acc 78.1250 (72.5556) lr 1.4258e-03 eta 6:32:29 +epoch [20/50] batch [455/500] time 1.587 (1.565) data 0.000 (0.003) loss 1.7207 (1.1016) acc 65.6250 (72.5549) lr 1.4258e-03 eta 6:32:23 +epoch [20/50] batch [460/500] time 1.566 (1.565) data 0.000 (0.003) loss 1.1318 (1.1028) acc 68.7500 (72.5204) lr 1.4258e-03 eta 6:32:13 +epoch [20/50] batch [465/500] time 1.570 (1.565) data 0.000 (0.003) loss 1.4512 (1.1059) acc 71.8750 (72.5000) lr 1.4258e-03 eta 6:32:06 +epoch [20/50] batch [470/500] time 1.567 (1.565) data 0.000 (0.003) loss 0.6133 (1.1047) acc 84.3750 (72.4867) lr 1.4258e-03 eta 6:32:00 +epoch [20/50] batch [475/500] time 1.541 (1.565) data 0.001 (0.003) loss 0.8716 (1.1042) acc 87.5000 (72.5658) lr 1.4258e-03 eta 6:31:49 +epoch [20/50] batch [480/500] time 1.569 (1.565) data 0.000 (0.003) loss 0.9834 (1.1029) acc 71.8750 (72.5716) lr 1.4258e-03 eta 6:31:41 +epoch [20/50] batch [485/500] time 1.552 (1.565) data 0.001 (0.003) loss 1.8643 (1.1069) acc 62.5000 (72.4936) lr 1.4258e-03 eta 6:31:33 +epoch [20/50] batch [490/500] time 1.562 (1.565) data 0.000 (0.003) loss 0.9976 (1.1054) acc 68.7500 (72.5255) lr 1.4258e-03 eta 6:31:23 +epoch [20/50] batch [495/500] time 1.536 (1.564) data 0.000 (0.003) loss 1.6113 (1.1082) acc 56.2500 (72.4684) lr 1.4258e-03 eta 6:31:13 +epoch [20/50] batch [500/500] time 1.558 (1.564) data 0.000 (0.003) loss 0.6240 (1.1078) acc 87.5000 (72.4875) lr 1.3681e-03 eta 6:31:03 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,041 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.6% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar +epoch [21/50] batch [5/500] time 1.547 (1.644) data 0.000 (0.152) loss 1.0645 (1.1066) acc 71.8750 (70.0000) lr 1.3681e-03 eta 6:50:59 +epoch [21/50] batch [10/500] time 1.565 (1.607) data 0.000 (0.076) loss 0.9150 (1.0889) acc 75.0000 (70.6250) lr 1.3681e-03 eta 6:41:29 +epoch [21/50] batch [15/500] time 1.562 (1.596) data 0.000 (0.051) loss 0.7744 (1.1385) acc 84.3750 (71.6667) lr 1.3681e-03 eta 6:38:28 +epoch [21/50] batch [20/500] time 1.571 (1.587) data 0.000 (0.038) loss 1.5928 (1.1478) acc 68.7500 (71.2500) lr 1.3681e-03 eta 6:36:15 +epoch [21/50] batch [25/500] time 1.563 (1.588) data 0.000 (0.031) loss 1.8770 (1.1002) acc 65.6250 (73.0000) lr 1.3681e-03 eta 6:36:24 +epoch [21/50] batch [30/500] time 1.572 (1.585) data 0.001 (0.026) loss 1.3857 (1.1252) acc 71.8750 (71.9792) lr 1.3681e-03 eta 6:35:25 +epoch [21/50] batch [35/500] time 1.561 (1.581) data 0.000 (0.022) loss 1.0742 (1.1556) acc 71.8750 (71.4286) lr 1.3681e-03 eta 6:34:21 +epoch [21/50] batch [40/500] time 1.573 (1.578) data 0.001 (0.019) loss 1.0166 (1.1547) acc 68.7500 (71.2500) lr 1.3681e-03 eta 6:33:24 +epoch [21/50] batch [45/500] time 1.578 (1.577) data 0.000 (0.017) loss 1.7529 (1.1788) acc 62.5000 (70.8333) lr 1.3681e-03 eta 6:33:08 +epoch [21/50] batch [50/500] time 1.559 (1.576) data 0.000 (0.016) loss 1.3145 (1.1869) acc 65.6250 (70.5625) lr 1.3681e-03 eta 6:32:35 +epoch [21/50] batch [55/500] time 1.539 (1.573) data 0.000 (0.014) loss 1.1504 (1.1719) acc 68.7500 (70.5114) lr 1.3681e-03 eta 6:31:52 +epoch [21/50] batch [60/500] time 1.570 (1.573) data 0.000 (0.013) loss 0.7568 (1.1468) acc 81.2500 (71.1979) lr 1.3681e-03 eta 6:31:33 +epoch [21/50] batch [65/500] time 1.553 (1.572) data 0.000 (0.012) loss 1.2139 (1.1395) acc 65.6250 (71.2981) lr 1.3681e-03 eta 6:31:21 +epoch [21/50] batch [70/500] time 1.567 (1.571) data 0.001 (0.011) loss 0.6182 (1.1292) acc 78.1250 (71.4732) lr 1.3681e-03 eta 6:30:56 +epoch [21/50] batch [75/500] time 1.609 (1.571) data 0.000 (0.011) loss 1.4199 (1.1312) acc 59.3750 (71.5000) lr 1.3681e-03 eta 6:30:50 +epoch [21/50] batch [80/500] time 1.548 (1.571) data 0.000 (0.010) loss 0.7578 (1.1174) acc 75.0000 (71.5234) lr 1.3681e-03 eta 6:30:34 +epoch [21/50] batch [85/500] time 1.540 (1.570) data 0.000 (0.009) loss 0.9639 (1.1153) acc 78.1250 (71.8382) lr 1.3681e-03 eta 6:30:15 +epoch [21/50] batch [90/500] time 1.572 (1.570) data 0.000 (0.009) loss 1.0605 (1.1166) acc 78.1250 (71.8750) lr 1.3681e-03 eta 6:30:05 +epoch [21/50] batch [95/500] time 1.536 (1.569) data 0.001 (0.008) loss 0.9185 (1.1244) acc 71.8750 (71.7763) lr 1.3681e-03 eta 6:29:45 +epoch [21/50] batch [100/500] time 1.552 (1.569) data 0.001 (0.008) loss 1.6543 (1.1190) acc 59.3750 (71.8750) lr 1.3681e-03 eta 6:29:37 +epoch [21/50] batch [105/500] time 1.551 (1.569) data 0.000 (0.008) loss 0.9473 (1.1096) acc 78.1250 (72.0833) lr 1.3681e-03 eta 6:29:24 +epoch [21/50] batch [110/500] time 1.553 (1.568) data 0.001 (0.007) loss 1.1777 (1.1022) acc 71.8750 (72.2443) lr 1.3681e-03 eta 6:29:10 +epoch [21/50] batch [115/500] time 1.554 (1.568) data 0.000 (0.007) loss 1.0713 (1.0979) acc 62.5000 (72.2283) lr 1.3681e-03 eta 6:28:56 +epoch [21/50] batch [120/500] time 1.640 (1.569) data 0.000 (0.007) loss 1.4824 (1.1016) acc 75.0000 (72.1615) lr 1.3681e-03 eta 6:29:05 +epoch [21/50] batch [125/500] time 1.546 (1.569) data 0.000 (0.007) loss 1.1709 (1.1115) acc 78.1250 (72.1000) lr 1.3681e-03 eta 6:28:52 +epoch [21/50] batch [130/500] time 1.553 (1.568) data 0.000 (0.006) loss 1.0811 (1.1095) acc 68.7500 (71.9712) lr 1.3681e-03 eta 6:28:35 +epoch [21/50] batch [135/500] time 1.534 (1.567) data 0.000 (0.006) loss 1.1123 (1.1102) acc 65.6250 (71.9444) lr 1.3681e-03 eta 6:28:17 +epoch [21/50] batch [140/500] time 1.542 (1.566) data 0.000 (0.006) loss 0.7632 (1.1126) acc 87.5000 (72.1205) lr 1.3681e-03 eta 6:27:57 +epoch [21/50] batch [145/500] time 1.568 (1.566) data 0.000 (0.006) loss 1.3154 (1.1065) acc 62.5000 (72.2629) lr 1.3681e-03 eta 6:27:39 +epoch [21/50] batch [150/500] time 1.541 (1.565) data 0.000 (0.006) loss 1.0049 (1.1086) acc 68.7500 (72.2292) lr 1.3681e-03 eta 6:27:22 +epoch [21/50] batch [155/500] time 1.535 (1.565) data 0.000 (0.005) loss 1.1230 (1.1113) acc 81.2500 (72.2581) lr 1.3681e-03 eta 6:27:11 +epoch [21/50] batch [160/500] time 1.556 (1.565) data 0.000 (0.005) loss 1.6973 (1.1108) acc 78.1250 (72.3242) lr 1.3681e-03 eta 6:26:59 +epoch [21/50] batch [165/500] time 1.556 (1.565) data 0.000 (0.005) loss 1.1455 (1.1126) acc 75.0000 (72.2917) lr 1.3681e-03 eta 6:26:54 +epoch [21/50] batch [170/500] time 1.557 (1.565) data 0.000 (0.005) loss 1.3281 (1.1072) acc 68.7500 (72.3897) lr 1.3681e-03 eta 6:26:43 +epoch [21/50] batch [175/500] time 1.566 (1.564) data 0.001 (0.005) loss 1.3027 (1.1090) acc 71.8750 (72.4286) lr 1.3681e-03 eta 6:26:33 +epoch [21/50] batch [180/500] time 1.575 (1.564) data 0.000 (0.005) loss 0.6489 (1.1106) acc 84.3750 (72.4653) lr 1.3681e-03 eta 6:26:23 +epoch [21/50] batch [185/500] time 1.544 (1.564) data 0.000 (0.005) loss 1.2480 (1.1071) acc 71.8750 (72.5169) lr 1.3681e-03 eta 6:26:05 +epoch [21/50] batch [190/500] time 1.562 (1.563) data 0.000 (0.004) loss 0.9507 (1.1057) acc 78.1250 (72.5987) lr 1.3681e-03 eta 6:25:50 +epoch [21/50] batch [195/500] time 1.581 (1.563) data 0.000 (0.004) loss 0.8091 (1.0995) acc 78.1250 (72.7083) lr 1.3681e-03 eta 6:25:42 +epoch [21/50] batch [200/500] time 1.541 (1.563) data 0.000 (0.004) loss 0.6416 (1.0999) acc 84.3750 (72.7031) lr 1.3681e-03 eta 6:25:33 +epoch [21/50] batch [205/500] time 1.559 (1.563) data 0.000 (0.004) loss 1.2598 (1.0997) acc 68.7500 (72.7134) lr 1.3681e-03 eta 6:25:26 +epoch [21/50] batch [210/500] time 1.575 (1.563) data 0.000 (0.004) loss 0.8882 (1.0956) acc 84.3750 (72.8125) lr 1.3681e-03 eta 6:25:16 +epoch [21/50] batch [215/500] time 1.564 (1.563) data 0.000 (0.004) loss 0.4175 (1.0950) acc 93.7500 (72.8924) lr 1.3681e-03 eta 6:25:13 +epoch [21/50] batch [220/500] time 1.558 (1.563) data 0.000 (0.004) loss 1.2949 (1.0947) acc 68.7500 (72.9119) lr 1.3681e-03 eta 6:25:02 +epoch [21/50] batch [225/500] time 1.547 (1.563) data 0.000 (0.004) loss 0.9907 (1.0928) acc 71.8750 (72.9444) lr 1.3681e-03 eta 6:24:48 +epoch [21/50] batch [230/500] time 1.571 (1.563) data 0.000 (0.004) loss 0.6875 (1.0932) acc 78.1250 (72.9212) lr 1.3681e-03 eta 6:24:41 +epoch [21/50] batch [235/500] time 1.574 (1.563) data 0.000 (0.004) loss 1.2119 (1.0947) acc 78.1250 (72.9787) lr 1.3681e-03 eta 6:24:31 +epoch [21/50] batch [240/500] time 1.601 (1.563) data 0.001 (0.004) loss 1.0957 (1.0963) acc 75.0000 (72.9948) lr 1.3681e-03 eta 6:24:25 +epoch [21/50] batch [245/500] time 1.559 (1.563) data 0.000 (0.004) loss 1.0654 (1.0962) acc 65.6250 (72.9719) lr 1.3681e-03 eta 6:24:19 +epoch [21/50] batch [250/500] time 1.548 (1.563) data 0.000 (0.003) loss 1.5713 (1.0963) acc 68.7500 (72.9375) lr 1.3681e-03 eta 6:24:11 +epoch [21/50] batch [255/500] time 1.555 (1.563) data 0.000 (0.003) loss 1.3701 (1.0976) acc 62.5000 (72.9167) lr 1.3681e-03 eta 6:24:02 +epoch [21/50] batch [260/500] time 1.552 (1.562) data 0.000 (0.003) loss 1.6182 (1.0954) acc 71.8750 (72.9808) lr 1.3681e-03 eta 6:23:50 +epoch [21/50] batch [265/500] time 1.561 (1.563) data 0.000 (0.003) loss 1.0527 (1.0965) acc 65.6250 (72.8892) lr 1.3681e-03 eta 6:23:46 +epoch [21/50] batch [270/500] time 1.541 (1.562) data 0.000 (0.003) loss 0.8481 (1.0943) acc 78.1250 (72.8819) lr 1.3681e-03 eta 6:23:32 +epoch [21/50] batch [275/500] time 1.552 (1.562) data 0.000 (0.003) loss 1.3867 (1.0938) acc 62.5000 (72.7841) lr 1.3681e-03 eta 6:23:25 +epoch [21/50] batch [280/500] time 1.572 (1.562) data 0.001 (0.003) loss 1.0918 (1.0900) acc 75.0000 (72.9353) lr 1.3681e-03 eta 6:23:16 +epoch [21/50] batch [285/500] time 1.573 (1.562) data 0.001 (0.003) loss 1.7207 (1.0945) acc 65.6250 (72.8289) lr 1.3681e-03 eta 6:23:09 +epoch [21/50] batch [290/500] time 1.581 (1.562) data 0.000 (0.003) loss 0.9648 (1.0902) acc 68.7500 (72.8987) lr 1.3681e-03 eta 6:23:03 +epoch [21/50] batch [295/500] time 1.548 (1.562) data 0.000 (0.003) loss 0.6851 (1.0910) acc 87.5000 (72.8708) lr 1.3681e-03 eta 6:22:55 +epoch [21/50] batch [300/500] time 1.551 (1.562) data 0.000 (0.003) loss 0.8018 (1.0911) acc 78.1250 (72.8125) lr 1.3681e-03 eta 6:22:45 +epoch [21/50] batch [305/500] time 1.554 (1.562) data 0.000 (0.003) loss 0.8052 (1.0940) acc 84.3750 (72.7254) lr 1.3681e-03 eta 6:22:37 +epoch [21/50] batch [310/500] time 1.564 (1.563) data 0.000 (0.003) loss 1.3691 (1.0941) acc 68.7500 (72.7621) lr 1.3681e-03 eta 6:22:34 +epoch [21/50] batch [315/500] time 1.551 (1.562) data 0.000 (0.003) loss 0.7363 (1.0939) acc 84.3750 (72.7778) lr 1.3681e-03 eta 6:22:24 +epoch [21/50] batch [320/500] time 1.572 (1.563) data 0.000 (0.003) loss 0.5889 (1.0941) acc 87.5000 (72.7930) lr 1.3681e-03 eta 6:22:18 +epoch [21/50] batch [325/500] time 1.556 (1.563) data 0.000 (0.003) loss 0.7197 (1.0937) acc 81.2500 (72.7692) lr 1.3681e-03 eta 6:22:11 +epoch [21/50] batch [330/500] time 1.567 (1.563) data 0.000 (0.003) loss 1.2432 (1.0956) acc 65.6250 (72.7083) lr 1.3681e-03 eta 6:22:04 +epoch [21/50] batch [335/500] time 1.539 (1.563) data 0.000 (0.003) loss 1.4326 (1.0972) acc 62.5000 (72.6772) lr 1.3681e-03 eta 6:21:55 +epoch [21/50] batch [340/500] time 1.563 (1.562) data 0.000 (0.003) loss 1.1680 (1.0978) acc 71.8750 (72.5827) lr 1.3681e-03 eta 6:21:46 +epoch [21/50] batch [345/500] time 1.552 (1.562) data 0.000 (0.003) loss 1.2783 (1.1012) acc 62.5000 (72.4728) lr 1.3681e-03 eta 6:21:37 +epoch [21/50] batch [350/500] time 1.570 (1.562) data 0.000 (0.003) loss 0.9468 (1.1011) acc 71.8750 (72.4286) lr 1.3681e-03 eta 6:21:27 +epoch [21/50] batch [355/500] time 1.572 (1.562) data 0.000 (0.003) loss 1.4971 (1.1021) acc 65.6250 (72.3944) lr 1.3681e-03 eta 6:21:21 +epoch [21/50] batch [360/500] time 1.551 (1.562) data 0.000 (0.003) loss 0.4651 (1.0997) acc 87.5000 (72.4132) lr 1.3681e-03 eta 6:21:13 +epoch [21/50] batch [365/500] time 1.583 (1.562) data 0.000 (0.002) loss 0.9751 (1.1015) acc 65.6250 (72.3716) lr 1.3681e-03 eta 6:21:06 +epoch [21/50] batch [370/500] time 1.582 (1.562) data 0.001 (0.002) loss 1.3057 (1.0994) acc 68.7500 (72.4155) lr 1.3681e-03 eta 6:20:57 +epoch [21/50] batch [375/500] time 1.557 (1.562) data 0.000 (0.002) loss 1.0938 (1.0983) acc 65.6250 (72.3750) lr 1.3681e-03 eta 6:20:47 +epoch [21/50] batch [380/500] time 1.548 (1.562) data 0.000 (0.002) loss 1.6816 (1.0999) acc 62.5000 (72.3684) lr 1.3681e-03 eta 6:20:37 +epoch [21/50] batch [385/500] time 1.562 (1.562) data 0.000 (0.002) loss 1.1953 (1.0994) acc 78.1250 (72.3620) lr 1.3681e-03 eta 6:20:27 +epoch [21/50] batch [390/500] time 1.550 (1.562) data 0.000 (0.002) loss 1.5527 (1.1009) acc 62.5000 (72.3558) lr 1.3681e-03 eta 6:20:20 +epoch [21/50] batch [395/500] time 1.553 (1.562) data 0.001 (0.002) loss 1.3330 (1.1005) acc 65.6250 (72.3418) lr 1.3681e-03 eta 6:20:14 +epoch [21/50] batch [400/500] time 1.558 (1.562) data 0.000 (0.002) loss 1.1084 (1.1011) acc 71.8750 (72.3516) lr 1.3681e-03 eta 6:20:04 +epoch [21/50] batch [405/500] time 1.552 (1.562) data 0.000 (0.002) loss 0.6821 (1.0982) acc 90.6250 (72.4460) lr 1.3681e-03 eta 6:19:55 +epoch [21/50] batch [410/500] time 1.551 (1.562) data 0.000 (0.002) loss 1.0303 (1.0972) acc 68.7500 (72.4695) lr 1.3681e-03 eta 6:19:51 +epoch [21/50] batch [415/500] time 1.577 (1.562) data 0.000 (0.002) loss 0.8965 (1.0953) acc 81.2500 (72.5226) lr 1.3681e-03 eta 6:19:43 +epoch [21/50] batch [420/500] time 1.541 (1.562) data 0.001 (0.002) loss 1.3857 (1.0957) acc 65.6250 (72.5149) lr 1.3681e-03 eta 6:19:34 +epoch [21/50] batch [425/500] time 1.562 (1.562) data 0.000 (0.002) loss 0.8965 (1.0976) acc 78.1250 (72.4853) lr 1.3681e-03 eta 6:19:25 +epoch [21/50] batch [430/500] time 1.563 (1.562) data 0.000 (0.002) loss 0.8716 (1.0973) acc 78.1250 (72.5000) lr 1.3681e-03 eta 6:19:16 +epoch [21/50] batch [435/500] time 1.568 (1.562) data 0.000 (0.002) loss 1.3145 (1.0984) acc 65.6250 (72.4425) lr 1.3681e-03 eta 6:19:07 +epoch [21/50] batch [440/500] time 1.592 (1.562) data 0.000 (0.002) loss 1.1816 (1.0974) acc 78.1250 (72.4432) lr 1.3681e-03 eta 6:18:59 +epoch [21/50] batch [445/500] time 1.547 (1.562) data 0.000 (0.002) loss 1.4023 (1.0964) acc 53.1250 (72.4438) lr 1.3681e-03 eta 6:18:51 +epoch [21/50] batch [450/500] time 1.663 (1.562) data 0.000 (0.002) loss 1.4189 (1.0958) acc 62.5000 (72.4653) lr 1.3681e-03 eta 6:18:46 +epoch [21/50] batch [455/500] time 1.554 (1.562) data 0.000 (0.002) loss 0.7065 (1.0961) acc 81.2500 (72.4519) lr 1.3681e-03 eta 6:18:37 +epoch [21/50] batch [460/500] time 1.573 (1.562) data 0.000 (0.002) loss 0.7012 (1.0947) acc 81.2500 (72.5272) lr 1.3681e-03 eta 6:18:29 +epoch [21/50] batch [465/500] time 1.569 (1.562) data 0.000 (0.002) loss 1.2734 (1.0934) acc 62.5000 (72.5538) lr 1.3681e-03 eta 6:18:22 +epoch [21/50] batch [470/500] time 1.582 (1.562) data 0.001 (0.002) loss 0.8501 (1.0909) acc 78.1250 (72.6263) lr 1.3681e-03 eta 6:18:15 +epoch [21/50] batch [475/500] time 1.578 (1.562) data 0.001 (0.002) loss 1.2275 (1.0912) acc 65.6250 (72.6382) lr 1.3681e-03 eta 6:18:06 +epoch [21/50] batch [480/500] time 1.564 (1.562) data 0.000 (0.002) loss 1.0254 (1.0924) acc 71.8750 (72.6172) lr 1.3681e-03 eta 6:17:59 +epoch [21/50] batch [485/500] time 1.542 (1.562) data 0.001 (0.002) loss 1.0898 (1.0913) acc 75.0000 (72.6353) lr 1.3681e-03 eta 6:17:50 +epoch [21/50] batch [490/500] time 1.555 (1.562) data 0.000 (0.002) loss 1.0439 (1.0935) acc 71.8750 (72.6148) lr 1.3681e-03 eta 6:17:41 +epoch [21/50] batch [495/500] time 1.541 (1.562) data 0.000 (0.002) loss 1.5928 (1.0948) acc 65.6250 (72.5568) lr 1.3681e-03 eta 6:17:31 +epoch [21/50] batch [500/500] time 1.566 (1.562) data 0.000 (0.002) loss 1.4590 (1.0961) acc 62.5000 (72.5438) lr 1.3090e-03 eta 6:17:23 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,982 +* accuracy: 78.0% +* error: 22.0% +* macro_f1: 77.5% +epoch [22/50] batch [5/500] time 1.550 (1.659) data 0.000 (0.151) loss 0.8403 (0.9158) acc 81.2500 (76.8750) lr 1.3090e-03 eta 6:40:46 +epoch [22/50] batch [10/500] time 1.543 (1.604) data 0.000 (0.076) loss 0.7544 (0.9626) acc 84.3750 (74.3750) lr 1.3090e-03 eta 6:27:22 +epoch [22/50] batch [15/500] time 1.546 (1.589) data 0.000 (0.051) loss 0.6270 (0.9099) acc 81.2500 (75.0000) lr 1.3090e-03 eta 6:23:43 +epoch [22/50] batch [20/500] time 1.522 (1.576) data 0.000 (0.038) loss 0.6055 (0.9514) acc 84.3750 (74.0625) lr 1.3090e-03 eta 6:20:20 +epoch [22/50] batch [25/500] time 1.542 (1.568) data 0.000 (0.031) loss 1.3516 (1.0503) acc 59.3750 (72.1250) lr 1.3090e-03 eta 6:18:19 +epoch [22/50] batch [30/500] time 1.582 (1.565) data 0.001 (0.026) loss 1.1299 (1.0113) acc 62.5000 (73.0208) lr 1.3090e-03 eta 6:17:31 +epoch [22/50] batch [35/500] time 1.549 (1.565) data 0.001 (0.022) loss 1.7559 (0.9967) acc 56.2500 (73.7500) lr 1.3090e-03 eta 6:17:12 +epoch [22/50] batch [40/500] time 1.562 (1.568) data 0.001 (0.019) loss 0.7158 (0.9986) acc 81.2500 (74.1406) lr 1.3090e-03 eta 6:17:53 +epoch [22/50] batch [45/500] time 1.547 (1.567) data 0.001 (0.017) loss 1.1699 (0.9893) acc 62.5000 (73.8194) lr 1.3090e-03 eta 6:17:28 +epoch [22/50] batch [50/500] time 1.571 (1.565) data 0.000 (0.016) loss 0.9746 (0.9798) acc 68.7500 (73.8750) lr 1.3090e-03 eta 6:17:00 +epoch [22/50] batch [55/500] time 1.566 (1.565) data 0.000 (0.014) loss 1.3486 (0.9824) acc 75.0000 (74.1477) lr 1.3090e-03 eta 6:16:47 +epoch [22/50] batch [60/500] time 1.557 (1.565) data 0.001 (0.013) loss 0.8633 (0.9895) acc 68.7500 (74.0104) lr 1.3090e-03 eta 6:16:41 +epoch [22/50] batch [65/500] time 1.551 (1.565) data 0.001 (0.012) loss 1.4961 (1.0174) acc 50.0000 (73.4135) lr 1.3090e-03 eta 6:16:23 +epoch [22/50] batch [70/500] time 1.566 (1.565) data 0.001 (0.011) loss 1.6973 (1.0408) acc 65.6250 (72.9911) lr 1.3090e-03 eta 6:16:23 +epoch [22/50] batch [75/500] time 1.579 (1.565) data 0.000 (0.011) loss 0.8369 (1.0308) acc 75.0000 (73.1667) lr 1.3090e-03 eta 6:16:22 +epoch [22/50] batch [80/500] time 1.561 (1.565) data 0.000 (0.010) loss 0.7969 (1.0267) acc 84.3750 (73.3984) lr 1.3090e-03 eta 6:16:03 +epoch [22/50] batch [85/500] time 1.575 (1.566) data 0.000 (0.009) loss 1.0400 (1.0150) acc 75.0000 (73.5294) lr 1.3090e-03 eta 6:16:09 +epoch [22/50] batch [90/500] time 1.554 (1.565) data 0.001 (0.009) loss 1.7744 (1.0332) acc 65.6250 (73.2986) lr 1.3090e-03 eta 6:15:52 +epoch [22/50] batch [95/500] time 1.562 (1.565) data 0.000 (0.008) loss 1.2529 (1.0286) acc 81.2500 (73.5526) lr 1.3090e-03 eta 6:15:42 +epoch [22/50] batch [100/500] time 1.581 (1.565) data 0.000 (0.008) loss 0.8403 (1.0366) acc 84.3750 (73.5625) lr 1.3090e-03 eta 6:15:32 +epoch [22/50] batch [105/500] time 1.564 (1.565) data 0.000 (0.008) loss 0.9219 (1.0533) acc 71.8750 (73.5417) lr 1.3090e-03 eta 6:15:25 +epoch [22/50] batch [110/500] time 1.561 (1.565) data 0.000 (0.007) loss 1.8096 (1.0674) acc 53.1250 (73.3523) lr 1.3090e-03 eta 6:15:16 +epoch [22/50] batch [115/500] time 1.577 (1.565) data 0.000 (0.007) loss 1.3633 (1.0842) acc 68.7500 (72.9620) lr 1.3090e-03 eta 6:15:10 +epoch [22/50] batch [120/500] time 1.579 (1.565) data 0.000 (0.007) loss 0.8433 (1.0818) acc 78.1250 (72.9427) lr 1.3090e-03 eta 6:15:04 +epoch [22/50] batch [125/500] time 1.560 (1.565) data 0.000 (0.006) loss 1.5117 (1.0846) acc 68.7500 (72.9500) lr 1.3090e-03 eta 6:15:02 +epoch [22/50] batch [130/500] time 1.578 (1.566) data 0.001 (0.006) loss 1.0146 (1.0792) acc 68.7500 (72.8606) lr 1.3090e-03 eta 6:14:57 +epoch [22/50] batch [135/500] time 1.571 (1.565) data 0.000 (0.006) loss 0.8369 (1.0792) acc 75.0000 (72.8704) lr 1.3090e-03 eta 6:14:43 +epoch [22/50] batch [140/500] time 1.564 (1.565) data 0.000 (0.006) loss 0.5479 (1.0677) acc 87.5000 (73.1250) lr 1.3090e-03 eta 6:14:31 +epoch [22/50] batch [145/500] time 1.572 (1.565) data 0.000 (0.006) loss 1.1826 (1.0698) acc 71.8750 (73.1681) lr 1.3090e-03 eta 6:14:25 +epoch [22/50] batch [150/500] time 1.542 (1.565) data 0.000 (0.005) loss 1.0029 (1.0656) acc 65.6250 (73.3125) lr 1.3090e-03 eta 6:14:16 +epoch [22/50] batch [155/500] time 1.573 (1.565) data 0.000 (0.005) loss 1.1523 (1.0704) acc 65.6250 (73.1048) lr 1.3090e-03 eta 6:14:08 +epoch [22/50] batch [160/500] time 1.564 (1.565) data 0.000 (0.005) loss 1.5615 (1.0731) acc 75.0000 (73.1641) lr 1.3090e-03 eta 6:14:00 +epoch [22/50] batch [165/500] time 1.564 (1.565) data 0.000 (0.005) loss 1.3066 (1.0710) acc 56.2500 (73.1061) lr 1.3090e-03 eta 6:13:51 +epoch [22/50] batch [170/500] time 1.547 (1.565) data 0.001 (0.005) loss 0.8018 (1.0698) acc 78.1250 (73.0515) lr 1.3090e-03 eta 6:13:39 +epoch [22/50] batch [175/500] time 1.561 (1.564) data 0.000 (0.005) loss 1.0068 (1.0747) acc 71.8750 (73.0357) lr 1.3090e-03 eta 6:13:29 +epoch [22/50] batch [180/500] time 1.555 (1.565) data 0.000 (0.005) loss 1.2666 (1.0778) acc 62.5000 (72.9340) lr 1.3090e-03 eta 6:13:26 +epoch [22/50] batch [185/500] time 1.538 (1.564) data 0.000 (0.005) loss 2.1895 (1.0862) acc 56.2500 (72.7534) lr 1.3090e-03 eta 6:13:14 +epoch [22/50] batch [190/500] time 1.559 (1.564) data 0.000 (0.004) loss 1.1494 (1.0901) acc 68.7500 (72.6316) lr 1.3090e-03 eta 6:13:04 +epoch [22/50] batch [195/500] time 1.599 (1.565) data 0.000 (0.004) loss 1.5303 (1.0884) acc 56.2500 (72.6442) lr 1.3090e-03 eta 6:13:02 +epoch [22/50] batch [200/500] time 1.566 (1.565) data 0.000 (0.004) loss 1.5859 (1.0937) acc 59.3750 (72.5000) lr 1.3090e-03 eta 6:12:55 +epoch [22/50] batch [205/500] time 1.547 (1.565) data 0.000 (0.004) loss 1.4199 (1.0950) acc 68.7500 (72.5152) lr 1.3090e-03 eta 6:12:45 +epoch [22/50] batch [210/500] time 1.548 (1.564) data 0.000 (0.004) loss 1.2783 (1.0955) acc 71.8750 (72.5298) lr 1.3090e-03 eta 6:12:34 +epoch [22/50] batch [215/500] time 1.577 (1.564) data 0.000 (0.004) loss 1.1055 (1.0987) acc 75.0000 (72.4855) lr 1.3090e-03 eta 6:12:27 +epoch [22/50] batch [220/500] time 1.550 (1.564) data 0.001 (0.004) loss 1.6416 (1.1038) acc 53.1250 (72.4006) lr 1.3090e-03 eta 6:12:14 +epoch [22/50] batch [225/500] time 1.569 (1.564) data 0.000 (0.004) loss 0.8882 (1.1019) acc 78.1250 (72.3750) lr 1.3090e-03 eta 6:12:09 +epoch [22/50] batch [230/500] time 1.568 (1.564) data 0.000 (0.004) loss 0.8579 (1.1075) acc 81.2500 (72.2418) lr 1.3090e-03 eta 6:12:02 +epoch [22/50] batch [235/500] time 1.541 (1.564) data 0.001 (0.004) loss 0.7988 (1.1080) acc 75.0000 (72.2739) lr 1.3090e-03 eta 6:11:53 +epoch [22/50] batch [240/500] time 1.544 (1.564) data 0.001 (0.004) loss 1.5879 (1.1085) acc 62.5000 (72.2656) lr 1.3090e-03 eta 6:11:42 +epoch [22/50] batch [245/500] time 1.571 (1.564) data 0.000 (0.004) loss 1.5742 (1.1082) acc 68.7500 (72.2832) lr 1.3090e-03 eta 6:11:31 +epoch [22/50] batch [250/500] time 1.561 (1.564) data 0.000 (0.003) loss 0.8379 (1.1064) acc 81.2500 (72.3500) lr 1.3090e-03 eta 6:11:24 +epoch [22/50] batch [255/500] time 1.561 (1.564) data 0.000 (0.003) loss 1.0625 (1.1067) acc 78.1250 (72.4142) lr 1.3090e-03 eta 6:11:15 +epoch [22/50] batch [260/500] time 1.565 (1.564) data 0.001 (0.003) loss 1.6328 (1.1121) acc 65.6250 (72.2115) lr 1.3090e-03 eta 6:11:05 +epoch [22/50] batch [265/500] time 1.553 (1.563) data 0.000 (0.003) loss 1.1348 (1.1108) acc 71.8750 (72.2406) lr 1.3090e-03 eta 6:10:54 +epoch [22/50] batch [270/500] time 1.561 (1.563) data 0.000 (0.003) loss 0.8896 (1.1095) acc 81.2500 (72.2454) lr 1.3090e-03 eta 6:10:43 +epoch [22/50] batch [275/500] time 1.562 (1.563) data 0.000 (0.003) loss 0.9404 (1.1080) acc 75.0000 (72.2955) lr 1.3090e-03 eta 6:10:34 +epoch [22/50] batch [280/500] time 1.573 (1.563) data 0.001 (0.003) loss 0.6382 (1.1056) acc 78.1250 (72.3884) lr 1.3090e-03 eta 6:10:26 +epoch [22/50] batch [285/500] time 1.557 (1.563) data 0.000 (0.003) loss 0.7793 (1.1016) acc 81.2500 (72.4561) lr 1.3090e-03 eta 6:10:19 +epoch [22/50] batch [290/500] time 1.559 (1.563) data 0.000 (0.003) loss 1.2871 (1.1026) acc 81.2500 (72.4246) lr 1.3090e-03 eta 6:10:13 +epoch [22/50] batch [295/500] time 1.564 (1.563) data 0.000 (0.003) loss 1.1572 (1.0995) acc 71.8750 (72.4894) lr 1.3090e-03 eta 6:10:06 +epoch [22/50] batch [300/500] time 1.546 (1.563) data 0.000 (0.003) loss 0.9819 (1.0993) acc 68.7500 (72.5000) lr 1.3090e-03 eta 6:09:59 +epoch [22/50] batch [305/500] time 1.564 (1.563) data 0.000 (0.003) loss 0.9351 (1.0953) acc 81.2500 (72.5922) lr 1.3090e-03 eta 6:09:50 +epoch [22/50] batch [310/500] time 1.563 (1.563) data 0.001 (0.003) loss 0.5645 (1.0919) acc 90.6250 (72.6512) lr 1.3090e-03 eta 6:09:43 +epoch [22/50] batch [315/500] time 1.552 (1.563) data 0.000 (0.003) loss 1.0068 (1.0901) acc 68.7500 (72.6687) lr 1.3090e-03 eta 6:09:34 +epoch [22/50] batch [320/500] time 1.581 (1.563) data 0.000 (0.003) loss 0.9536 (1.0881) acc 75.0000 (72.7246) lr 1.3090e-03 eta 6:09:26 +epoch [22/50] batch [325/500] time 1.581 (1.564) data 0.000 (0.003) loss 0.8638 (1.0864) acc 75.0000 (72.7308) lr 1.3090e-03 eta 6:09:24 +epoch [22/50] batch [330/500] time 1.544 (1.564) data 0.000 (0.003) loss 1.0410 (1.0844) acc 75.0000 (72.7273) lr 1.3090e-03 eta 6:09:15 +epoch [22/50] batch [335/500] time 1.541 (1.563) data 0.000 (0.003) loss 1.1240 (1.0876) acc 71.8750 (72.6399) lr 1.3090e-03 eta 6:09:04 +epoch [22/50] batch [340/500] time 1.541 (1.563) data 0.001 (0.003) loss 1.4648 (1.0900) acc 68.7500 (72.5919) lr 1.3090e-03 eta 6:08:55 +epoch [22/50] batch [345/500] time 1.549 (1.563) data 0.000 (0.003) loss 1.4072 (1.0937) acc 62.5000 (72.4547) lr 1.3090e-03 eta 6:08:46 +epoch [22/50] batch [350/500] time 1.575 (1.563) data 0.000 (0.003) loss 1.0049 (1.0921) acc 75.0000 (72.4732) lr 1.3090e-03 eta 6:08:39 +epoch [22/50] batch [355/500] time 1.559 (1.563) data 0.000 (0.003) loss 1.7021 (1.0903) acc 68.7500 (72.5352) lr 1.3090e-03 eta 6:08:30 +epoch [22/50] batch [360/500] time 1.571 (1.563) data 0.001 (0.003) loss 1.0205 (1.0876) acc 68.7500 (72.5694) lr 1.3090e-03 eta 6:08:21 +epoch [22/50] batch [365/500] time 1.548 (1.563) data 0.000 (0.003) loss 1.2070 (1.0853) acc 75.0000 (72.6627) lr 1.3090e-03 eta 6:08:11 +epoch [22/50] batch [370/500] time 1.571 (1.563) data 0.000 (0.002) loss 1.6025 (1.0858) acc 68.7500 (72.6605) lr 1.3090e-03 eta 6:08:05 +epoch [22/50] batch [375/500] time 1.570 (1.563) data 0.000 (0.002) loss 1.1406 (1.0886) acc 68.7500 (72.5750) lr 1.3090e-03 eta 6:07:56 +epoch [22/50] batch [380/500] time 1.530 (1.563) data 0.000 (0.002) loss 0.8994 (1.0867) acc 75.0000 (72.5905) lr 1.3090e-03 eta 6:07:46 +epoch [22/50] batch [385/500] time 1.555 (1.563) data 0.001 (0.002) loss 1.1299 (1.0860) acc 62.5000 (72.6136) lr 1.3090e-03 eta 6:07:37 +epoch [22/50] batch [390/500] time 1.563 (1.563) data 0.000 (0.002) loss 1.5527 (1.0857) acc 68.7500 (72.6282) lr 1.3090e-03 eta 6:07:28 +epoch [22/50] batch [395/500] time 1.545 (1.563) data 0.001 (0.002) loss 0.7490 (1.0833) acc 78.1250 (72.6820) lr 1.3090e-03 eta 6:07:19 +epoch [22/50] batch [400/500] time 1.565 (1.562) data 0.000 (0.002) loss 1.2070 (1.0839) acc 68.7500 (72.6797) lr 1.3090e-03 eta 6:07:09 +epoch [22/50] batch [405/500] time 1.551 (1.562) data 0.000 (0.002) loss 1.4277 (1.0860) acc 65.6250 (72.6698) lr 1.3090e-03 eta 6:07:00 +epoch [22/50] batch [410/500] time 1.572 (1.562) data 0.000 (0.002) loss 1.5479 (1.0886) acc 53.1250 (72.5610) lr 1.3090e-03 eta 6:06:52 +epoch [22/50] batch [415/500] time 1.554 (1.562) data 0.000 (0.002) loss 1.0947 (1.0892) acc 71.8750 (72.5527) lr 1.3090e-03 eta 6:06:44 +epoch [22/50] batch [420/500] time 1.564 (1.562) data 0.000 (0.002) loss 0.8589 (1.0921) acc 81.2500 (72.5074) lr 1.3090e-03 eta 6:06:36 +epoch [22/50] batch [425/500] time 1.550 (1.562) data 0.000 (0.002) loss 0.9839 (1.0917) acc 71.8750 (72.5294) lr 1.3090e-03 eta 6:06:28 +epoch [22/50] batch [430/500] time 1.551 (1.562) data 0.000 (0.002) loss 0.8066 (1.0896) acc 81.2500 (72.5509) lr 1.3090e-03 eta 6:06:19 +epoch [22/50] batch [435/500] time 1.558 (1.562) data 0.000 (0.002) loss 1.0771 (1.0897) acc 62.5000 (72.5216) lr 1.3090e-03 eta 6:06:09 +epoch [22/50] batch [440/500] time 1.597 (1.562) data 0.001 (0.002) loss 0.9722 (1.0872) acc 71.8750 (72.5071) lr 1.3090e-03 eta 6:06:01 +epoch [22/50] batch [445/500] time 1.553 (1.562) data 0.000 (0.002) loss 0.9824 (1.0850) acc 84.3750 (72.5632) lr 1.3090e-03 eta 6:05:54 +epoch [22/50] batch [450/500] time 1.542 (1.562) data 0.000 (0.002) loss 0.8828 (1.0846) acc 81.2500 (72.6111) lr 1.3090e-03 eta 6:05:46 +epoch [22/50] batch [455/500] time 1.544 (1.562) data 0.000 (0.002) loss 0.7896 (1.0828) acc 75.0000 (72.6305) lr 1.3090e-03 eta 6:05:36 +epoch [22/50] batch [460/500] time 1.594 (1.562) data 0.001 (0.002) loss 1.1934 (1.0818) acc 68.7500 (72.6630) lr 1.3090e-03 eta 6:05:30 +epoch [22/50] batch [465/500] time 1.646 (1.562) data 0.001 (0.002) loss 1.3174 (1.0811) acc 68.7500 (72.7083) lr 1.3090e-03 eta 6:05:23 +epoch [22/50] batch [470/500] time 1.550 (1.562) data 0.000 (0.002) loss 1.0986 (1.0807) acc 71.8750 (72.6862) lr 1.3090e-03 eta 6:05:16 +epoch [22/50] batch [475/500] time 1.550 (1.562) data 0.000 (0.002) loss 0.8862 (1.0777) acc 75.0000 (72.6974) lr 1.3090e-03 eta 6:05:07 +epoch [22/50] batch [480/500] time 1.561 (1.562) data 0.000 (0.002) loss 0.5645 (1.0768) acc 84.3750 (72.7344) lr 1.3090e-03 eta 6:04:59 +epoch [22/50] batch [485/500] time 1.541 (1.562) data 0.001 (0.002) loss 1.3789 (1.0788) acc 65.6250 (72.6933) lr 1.3090e-03 eta 6:04:51 +epoch [22/50] batch [490/500] time 1.578 (1.562) data 0.000 (0.002) loss 1.0098 (1.0777) acc 71.8750 (72.7232) lr 1.3090e-03 eta 6:04:43 +epoch [22/50] batch [495/500] time 1.554 (1.562) data 0.000 (0.002) loss 0.9741 (1.0766) acc 68.7500 (72.7462) lr 1.3090e-03 eta 6:04:35 +epoch [22/50] batch [500/500] time 1.555 (1.562) data 0.000 (0.002) loss 0.7769 (1.0764) acc 75.0000 (72.7562) lr 1.2487e-03 eta 6:04:27 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,053 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.6% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar +epoch [23/50] batch [5/500] time 1.532 (1.664) data 0.000 (0.160) loss 1.1143 (1.0252) acc 75.0000 (75.0000) lr 1.2487e-03 eta 6:28:05 +epoch [23/50] batch [10/500] time 1.544 (1.614) data 0.000 (0.080) loss 0.7446 (1.0323) acc 84.3750 (74.0625) lr 1.2487e-03 eta 6:16:20 +epoch [23/50] batch [15/500] time 1.534 (1.593) data 0.001 (0.054) loss 0.9395 (1.0508) acc 68.7500 (73.3333) lr 1.2487e-03 eta 6:11:21 +epoch [23/50] batch [20/500] time 1.563 (1.590) data 0.001 (0.040) loss 1.3574 (1.0665) acc 75.0000 (73.4375) lr 1.2487e-03 eta 6:10:34 +epoch [23/50] batch [25/500] time 1.565 (1.584) data 0.000 (0.032) loss 0.7012 (1.0445) acc 78.1250 (74.0000) lr 1.2487e-03 eta 6:09:01 +epoch [23/50] batch [30/500] time 1.564 (1.580) data 0.001 (0.027) loss 1.3467 (1.0660) acc 59.3750 (73.2292) lr 1.2487e-03 eta 6:07:56 +epoch [23/50] batch [35/500] time 1.555 (1.576) data 0.001 (0.023) loss 1.1641 (1.0544) acc 59.3750 (72.8571) lr 1.2487e-03 eta 6:06:50 +epoch [23/50] batch [40/500] time 1.550 (1.574) data 0.000 (0.020) loss 1.2061 (1.0850) acc 68.7500 (72.2656) lr 1.2487e-03 eta 6:06:13 +epoch [23/50] batch [45/500] time 1.559 (1.573) data 0.001 (0.018) loss 0.8066 (1.1002) acc 78.1250 (71.9444) lr 1.2487e-03 eta 6:05:46 +epoch [23/50] batch [50/500] time 1.546 (1.570) data 0.000 (0.016) loss 1.6123 (1.1020) acc 62.5000 (72.3125) lr 1.2487e-03 eta 6:05:03 +epoch [23/50] batch [55/500] time 1.557 (1.569) data 0.000 (0.015) loss 0.5884 (1.0914) acc 81.2500 (72.2727) lr 1.2487e-03 eta 6:04:39 +epoch [23/50] batch [60/500] time 1.552 (1.568) data 0.000 (0.014) loss 1.6201 (1.1029) acc 59.3750 (72.0833) lr 1.2487e-03 eta 6:04:18 +epoch [23/50] batch [65/500] time 1.571 (1.568) data 0.000 (0.013) loss 1.2832 (1.1096) acc 75.0000 (72.1154) lr 1.2487e-03 eta 6:04:09 +epoch [23/50] batch [70/500] time 1.562 (1.567) data 0.000 (0.012) loss 0.7466 (1.1092) acc 81.2500 (72.2321) lr 1.2487e-03 eta 6:03:55 +epoch [23/50] batch [75/500] time 1.592 (1.567) data 0.000 (0.011) loss 0.9614 (1.1185) acc 78.1250 (72.1667) lr 1.2487e-03 eta 6:03:47 +epoch [23/50] batch [80/500] time 1.540 (1.567) data 0.000 (0.010) loss 0.8613 (1.1166) acc 68.7500 (72.0312) lr 1.2487e-03 eta 6:03:35 +epoch [23/50] batch [85/500] time 1.542 (1.567) data 0.001 (0.010) loss 1.0771 (1.1080) acc 75.0000 (72.2426) lr 1.2487e-03 eta 6:03:18 +epoch [23/50] batch [90/500] time 1.567 (1.567) data 0.001 (0.009) loss 1.0186 (1.1063) acc 78.1250 (72.1181) lr 1.2487e-03 eta 6:03:10 +epoch [23/50] batch [95/500] time 1.553 (1.566) data 0.001 (0.009) loss 1.1621 (1.1059) acc 78.1250 (72.3026) lr 1.2487e-03 eta 6:02:57 +epoch [23/50] batch [100/500] time 1.544 (1.565) data 0.000 (0.008) loss 1.0137 (1.0988) acc 65.6250 (72.4688) lr 1.2487e-03 eta 6:02:39 +epoch [23/50] batch [105/500] time 1.569 (1.565) data 0.000 (0.008) loss 0.7051 (1.0943) acc 81.2500 (72.5000) lr 1.2487e-03 eta 6:02:28 +epoch [23/50] batch [110/500] time 1.565 (1.565) data 0.001 (0.008) loss 1.1250 (1.0938) acc 71.8750 (72.3864) lr 1.2487e-03 eta 6:02:22 +epoch [23/50] batch [115/500] time 1.649 (1.566) data 0.001 (0.007) loss 0.8867 (1.0896) acc 75.0000 (72.5000) lr 1.2487e-03 eta 6:02:19 +epoch [23/50] batch [120/500] time 1.542 (1.565) data 0.000 (0.007) loss 1.5537 (1.0938) acc 68.7500 (72.4479) lr 1.2487e-03 eta 6:02:04 +epoch [23/50] batch [125/500] time 1.538 (1.564) data 0.000 (0.007) loss 0.7017 (1.0900) acc 90.6250 (72.5750) lr 1.2487e-03 eta 6:01:43 +epoch [23/50] batch [130/500] time 1.551 (1.564) data 0.000 (0.007) loss 1.5527 (1.0929) acc 65.6250 (72.5721) lr 1.2487e-03 eta 6:01:33 +epoch [23/50] batch [135/500] time 1.536 (1.563) data 0.000 (0.006) loss 0.5537 (1.0834) acc 81.2500 (72.6620) lr 1.2487e-03 eta 6:01:16 +epoch [23/50] batch [140/500] time 1.569 (1.563) data 0.001 (0.006) loss 0.8579 (1.0883) acc 81.2500 (72.6562) lr 1.2487e-03 eta 6:01:02 +epoch [23/50] batch [145/500] time 1.542 (1.562) data 0.000 (0.006) loss 0.7998 (1.0867) acc 81.2500 (72.6724) lr 1.2487e-03 eta 6:00:46 +epoch [23/50] batch [150/500] time 1.601 (1.562) data 0.001 (0.006) loss 0.3425 (1.0773) acc 90.6250 (72.8958) lr 1.2487e-03 eta 6:00:35 +epoch [23/50] batch [155/500] time 1.555 (1.562) data 0.000 (0.006) loss 1.5127 (1.0804) acc 56.2500 (72.8831) lr 1.2487e-03 eta 6:00:29 +epoch [23/50] batch [160/500] time 1.550 (1.562) data 0.000 (0.005) loss 0.8413 (1.0738) acc 75.0000 (72.9688) lr 1.2487e-03 eta 6:00:21 +epoch [23/50] batch [165/500] time 1.544 (1.562) data 0.001 (0.005) loss 1.2432 (1.0695) acc 68.7500 (73.0303) lr 1.2487e-03 eta 6:00:12 +epoch [23/50] batch [170/500] time 1.556 (1.562) data 0.001 (0.005) loss 0.8301 (1.0720) acc 71.8750 (72.9228) lr 1.2487e-03 eta 5:59:58 +epoch [23/50] batch [175/500] time 1.556 (1.562) data 0.001 (0.005) loss 1.0898 (1.0728) acc 71.8750 (72.9286) lr 1.2487e-03 eta 5:59:48 +epoch [23/50] batch [180/500] time 1.562 (1.562) data 0.000 (0.005) loss 1.1074 (1.0716) acc 75.0000 (73.0382) lr 1.2487e-03 eta 5:59:41 +epoch [23/50] batch [185/500] time 1.570 (1.562) data 0.001 (0.005) loss 1.2549 (1.0717) acc 68.7500 (73.0743) lr 1.2487e-03 eta 5:59:32 +epoch [23/50] batch [190/500] time 1.544 (1.561) data 0.001 (0.005) loss 0.9932 (1.0738) acc 75.0000 (73.0428) lr 1.2487e-03 eta 5:59:21 +epoch [23/50] batch [195/500] time 1.570 (1.561) data 0.001 (0.005) loss 1.0303 (1.0709) acc 71.8750 (73.1250) lr 1.2487e-03 eta 5:59:13 +epoch [23/50] batch [200/500] time 1.544 (1.561) data 0.000 (0.004) loss 0.4429 (1.0684) acc 93.7500 (73.3125) lr 1.2487e-03 eta 5:59:03 +epoch [23/50] batch [205/500] time 1.569 (1.561) data 0.001 (0.004) loss 0.8330 (1.0656) acc 78.1250 (73.2470) lr 1.2487e-03 eta 5:58:54 +epoch [23/50] batch [210/500] time 1.575 (1.561) data 0.001 (0.004) loss 0.5850 (1.0613) acc 81.2500 (73.2887) lr 1.2487e-03 eta 5:58:46 +epoch [23/50] batch [215/500] time 1.583 (1.561) data 0.001 (0.004) loss 1.2510 (1.0663) acc 78.1250 (73.2413) lr 1.2487e-03 eta 5:58:37 +epoch [23/50] batch [220/500] time 1.564 (1.561) data 0.001 (0.004) loss 1.0801 (1.0676) acc 78.1250 (73.2812) lr 1.2487e-03 eta 5:58:25 +epoch [23/50] batch [225/500] time 1.554 (1.560) data 0.001 (0.004) loss 0.9468 (1.0709) acc 75.0000 (73.1944) lr 1.2487e-03 eta 5:58:12 +epoch [23/50] batch [230/500] time 1.563 (1.560) data 0.000 (0.004) loss 1.2188 (1.0746) acc 75.0000 (73.1929) lr 1.2487e-03 eta 5:58:03 +epoch [23/50] batch [235/500] time 1.564 (1.560) data 0.000 (0.004) loss 0.6997 (1.0725) acc 81.2500 (73.2181) lr 1.2487e-03 eta 5:57:53 +epoch [23/50] batch [240/500] time 1.578 (1.560) data 0.000 (0.004) loss 0.8716 (1.0727) acc 68.7500 (73.1510) lr 1.2487e-03 eta 5:57:46 +epoch [23/50] batch [245/500] time 1.559 (1.560) data 0.001 (0.004) loss 1.3906 (1.0775) acc 71.8750 (72.9847) lr 1.2487e-03 eta 5:57:36 +epoch [23/50] batch [250/500] time 1.563 (1.560) data 0.000 (0.004) loss 1.0127 (1.0784) acc 78.1250 (73.0125) lr 1.2487e-03 eta 5:57:25 +epoch [23/50] batch [255/500] time 1.533 (1.559) data 0.001 (0.004) loss 0.8960 (1.0785) acc 84.3750 (73.0882) lr 1.2487e-03 eta 5:57:14 +epoch [23/50] batch [260/500] time 1.548 (1.560) data 0.000 (0.004) loss 1.1250 (1.0813) acc 62.5000 (73.0168) lr 1.2487e-03 eta 5:57:11 +epoch [23/50] batch [265/500] time 1.555 (1.560) data 0.000 (0.004) loss 0.6743 (1.0774) acc 84.3750 (73.1250) lr 1.2487e-03 eta 5:57:01 +epoch [23/50] batch [270/500] time 1.585 (1.560) data 0.000 (0.003) loss 1.1377 (1.0771) acc 75.0000 (73.1250) lr 1.2487e-03 eta 5:56:55 +epoch [23/50] batch [275/500] time 1.563 (1.560) data 0.000 (0.003) loss 1.3115 (1.0817) acc 71.8750 (73.0568) lr 1.2487e-03 eta 5:56:51 +epoch [23/50] batch [280/500] time 1.588 (1.560) data 0.001 (0.003) loss 1.1436 (1.0810) acc 71.8750 (73.0357) lr 1.2487e-03 eta 5:56:45 +epoch [23/50] batch [285/500] time 1.586 (1.560) data 0.001 (0.003) loss 0.9653 (1.0798) acc 84.3750 (73.0154) lr 1.2487e-03 eta 5:56:39 +epoch [23/50] batch [290/500] time 1.528 (1.560) data 0.001 (0.003) loss 1.4482 (1.0794) acc 65.6250 (73.0496) lr 1.2487e-03 eta 5:56:31 +epoch [23/50] batch [295/500] time 1.550 (1.560) data 0.000 (0.003) loss 1.4307 (1.0837) acc 56.2500 (72.9661) lr 1.2487e-03 eta 5:56:24 +epoch [23/50] batch [300/500] time 1.570 (1.560) data 0.000 (0.003) loss 0.7959 (1.0849) acc 81.2500 (72.9479) lr 1.2487e-03 eta 5:56:16 +epoch [23/50] batch [305/500] time 1.571 (1.561) data 0.000 (0.003) loss 0.9673 (1.0877) acc 75.0000 (72.8996) lr 1.2487e-03 eta 5:56:16 +epoch [23/50] batch [310/500] time 1.564 (1.561) data 0.000 (0.003) loss 0.9980 (1.0876) acc 68.7500 (72.9234) lr 1.2487e-03 eta 5:56:09 +epoch [23/50] batch [315/500] time 1.589 (1.561) data 0.000 (0.003) loss 0.6787 (1.0876) acc 81.2500 (72.9067) lr 1.2487e-03 eta 5:56:01 +epoch [23/50] batch [320/500] time 1.571 (1.561) data 0.000 (0.003) loss 1.5146 (1.0876) acc 62.5000 (72.8906) lr 1.2487e-03 eta 5:55:54 +epoch [23/50] batch [325/500] time 1.569 (1.561) data 0.000 (0.003) loss 1.0420 (1.0875) acc 65.6250 (72.8654) lr 1.2487e-03 eta 5:55:47 +epoch [23/50] batch [330/500] time 1.554 (1.561) data 0.001 (0.003) loss 0.8599 (1.0865) acc 71.8750 (72.9356) lr 1.2487e-03 eta 5:55:38 +epoch [23/50] batch [335/500] time 1.567 (1.561) data 0.000 (0.003) loss 1.1084 (1.0861) acc 71.8750 (72.9011) lr 1.2487e-03 eta 5:55:29 +epoch [23/50] batch [340/500] time 1.559 (1.561) data 0.000 (0.003) loss 0.7344 (1.0865) acc 78.1250 (72.8860) lr 1.2487e-03 eta 5:55:20 +epoch [23/50] batch [345/500] time 1.552 (1.561) data 0.000 (0.003) loss 1.4053 (1.0838) acc 71.8750 (72.9529) lr 1.2487e-03 eta 5:55:12 +epoch [23/50] batch [350/500] time 1.568 (1.561) data 0.000 (0.003) loss 0.8760 (1.0814) acc 71.8750 (72.9911) lr 1.2487e-03 eta 5:55:06 +epoch [23/50] batch [355/500] time 1.578 (1.561) data 0.000 (0.003) loss 0.9639 (1.0806) acc 81.2500 (73.0194) lr 1.2487e-03 eta 5:54:57 +epoch [23/50] batch [360/500] time 1.546 (1.561) data 0.000 (0.003) loss 2.0898 (1.0833) acc 59.3750 (72.9688) lr 1.2487e-03 eta 5:54:47 +epoch [23/50] batch [365/500] time 1.536 (1.561) data 0.000 (0.003) loss 0.5498 (1.0854) acc 84.3750 (72.9195) lr 1.2487e-03 eta 5:54:38 +epoch [23/50] batch [370/500] time 1.528 (1.561) data 0.000 (0.003) loss 0.8999 (1.0834) acc 71.8750 (72.9645) lr 1.2487e-03 eta 5:54:29 +epoch [23/50] batch [375/500] time 1.556 (1.560) data 0.000 (0.003) loss 1.0342 (1.0850) acc 68.7500 (72.9167) lr 1.2487e-03 eta 5:54:19 +epoch [23/50] batch [380/500] time 1.562 (1.560) data 0.000 (0.003) loss 0.6108 (1.0845) acc 84.3750 (72.9276) lr 1.2487e-03 eta 5:54:11 +epoch [23/50] batch [385/500] time 1.546 (1.560) data 0.000 (0.003) loss 0.8779 (1.0846) acc 81.2500 (72.9383) lr 1.2487e-03 eta 5:53:59 +epoch [23/50] batch [390/500] time 1.564 (1.560) data 0.000 (0.003) loss 0.8145 (1.0847) acc 75.0000 (72.9247) lr 1.2487e-03 eta 5:53:52 +epoch [23/50] batch [395/500] time 1.588 (1.560) data 0.000 (0.002) loss 1.3320 (1.0850) acc 65.6250 (72.9430) lr 1.2487e-03 eta 5:53:45 +epoch [23/50] batch [400/500] time 1.559 (1.560) data 0.000 (0.002) loss 1.5283 (1.0860) acc 62.5000 (72.8906) lr 1.2487e-03 eta 5:53:36 +epoch [23/50] batch [405/500] time 1.549 (1.560) data 0.000 (0.002) loss 0.9307 (1.0848) acc 81.2500 (72.9552) lr 1.2487e-03 eta 5:53:31 +epoch [23/50] batch [410/500] time 1.571 (1.560) data 0.000 (0.002) loss 1.6309 (1.0848) acc 65.6250 (72.9649) lr 1.2487e-03 eta 5:53:24 +epoch [23/50] batch [415/500] time 1.570 (1.560) data 0.001 (0.002) loss 0.9111 (1.0826) acc 68.7500 (72.9669) lr 1.2487e-03 eta 5:53:15 +epoch [23/50] batch [420/500] time 1.545 (1.560) data 0.000 (0.002) loss 1.3818 (1.0837) acc 68.7500 (72.9911) lr 1.2487e-03 eta 5:53:07 +epoch [23/50] batch [425/500] time 1.539 (1.560) data 0.000 (0.002) loss 0.6890 (1.0829) acc 81.2500 (73.0294) lr 1.2487e-03 eta 5:52:56 +epoch [23/50] batch [430/500] time 1.572 (1.560) data 0.000 (0.002) loss 0.6982 (1.0832) acc 75.0000 (72.9942) lr 1.2487e-03 eta 5:52:47 +epoch [23/50] batch [435/500] time 1.551 (1.560) data 0.000 (0.002) loss 1.2354 (1.0825) acc 75.0000 (72.9957) lr 1.2487e-03 eta 5:52:39 +epoch [23/50] batch [440/500] time 1.554 (1.560) data 0.000 (0.002) loss 1.1992 (1.0855) acc 75.0000 (72.9830) lr 1.2487e-03 eta 5:52:31 +epoch [23/50] batch [445/500] time 1.649 (1.560) data 0.000 (0.002) loss 1.2939 (1.0889) acc 65.6250 (72.9073) lr 1.2487e-03 eta 5:52:28 +epoch [23/50] batch [450/500] time 1.596 (1.560) data 0.000 (0.002) loss 0.8813 (1.0926) acc 78.1250 (72.8472) lr 1.2487e-03 eta 5:52:22 +epoch [23/50] batch [455/500] time 1.599 (1.560) data 0.000 (0.002) loss 1.3623 (1.0955) acc 65.6250 (72.8091) lr 1.2487e-03 eta 5:52:15 +epoch [23/50] batch [460/500] time 1.544 (1.560) data 0.001 (0.002) loss 0.7642 (1.0947) acc 81.2500 (72.8601) lr 1.2487e-03 eta 5:52:06 +epoch [23/50] batch [465/500] time 1.588 (1.560) data 0.000 (0.002) loss 1.4287 (1.0955) acc 71.8750 (72.8360) lr 1.2487e-03 eta 5:51:59 +epoch [23/50] batch [470/500] time 1.564 (1.560) data 0.000 (0.002) loss 0.9688 (1.0947) acc 78.1250 (72.7926) lr 1.2487e-03 eta 5:51:51 +epoch [23/50] batch [475/500] time 1.548 (1.560) data 0.000 (0.002) loss 1.0518 (1.0963) acc 65.6250 (72.7566) lr 1.2487e-03 eta 5:51:41 +epoch [23/50] batch [480/500] time 1.562 (1.560) data 0.000 (0.002) loss 1.5547 (1.0946) acc 68.7500 (72.8190) lr 1.2487e-03 eta 5:51:31 +epoch [23/50] batch [485/500] time 1.552 (1.560) data 0.001 (0.002) loss 1.1592 (1.0935) acc 78.1250 (72.8673) lr 1.2487e-03 eta 5:51:22 +epoch [23/50] batch [490/500] time 1.557 (1.560) data 0.000 (0.002) loss 1.2598 (1.0930) acc 65.6250 (72.8763) lr 1.2487e-03 eta 5:51:14 +epoch [23/50] batch [495/500] time 1.546 (1.560) data 0.000 (0.002) loss 1.2002 (1.0925) acc 71.8750 (72.8788) lr 1.2487e-03 eta 5:51:05 +epoch [23/50] batch [500/500] time 1.535 (1.560) data 0.000 (0.002) loss 0.9116 (1.0927) acc 78.1250 (72.9062) lr 1.1874e-03 eta 5:50:55 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,947 +* accuracy: 77.9% +* error: 22.1% +* macro_f1: 77.4% +epoch [24/50] batch [5/500] time 1.544 (1.728) data 0.000 (0.202) loss 1.2100 (1.2296) acc 71.8750 (69.3750) lr 1.1874e-03 eta 6:28:45 +epoch [24/50] batch [10/500] time 1.553 (1.643) data 0.001 (0.101) loss 0.9985 (1.1294) acc 75.0000 (71.2500) lr 1.1874e-03 eta 6:09:30 +epoch [24/50] batch [15/500] time 1.555 (1.617) data 0.001 (0.068) loss 0.4429 (1.0226) acc 84.3750 (73.1250) lr 1.1874e-03 eta 6:03:21 +epoch [24/50] batch [20/500] time 1.535 (1.598) data 0.000 (0.051) loss 1.9072 (1.0740) acc 59.3750 (72.3438) lr 1.1874e-03 eta 5:59:06 +epoch [24/50] batch [25/500] time 1.557 (1.590) data 0.000 (0.041) loss 1.0566 (1.0590) acc 71.8750 (72.7500) lr 1.1874e-03 eta 5:57:03 +epoch [24/50] batch [30/500] time 1.570 (1.586) data 0.001 (0.034) loss 0.8350 (1.0584) acc 75.0000 (72.2917) lr 1.1874e-03 eta 5:56:04 +epoch [24/50] batch [35/500] time 1.571 (1.583) data 0.000 (0.029) loss 0.3411 (1.0336) acc 87.5000 (73.3036) lr 1.1874e-03 eta 5:55:11 +epoch [24/50] batch [40/500] time 1.533 (1.579) data 0.000 (0.026) loss 0.7666 (1.0215) acc 78.1250 (72.8906) lr 1.1874e-03 eta 5:54:16 +epoch [24/50] batch [45/500] time 1.568 (1.578) data 0.000 (0.023) loss 0.9771 (1.0156) acc 75.0000 (72.9167) lr 1.1874e-03 eta 5:53:54 +epoch [24/50] batch [50/500] time 1.572 (1.578) data 0.000 (0.021) loss 0.6255 (1.0208) acc 81.2500 (73.0625) lr 1.1874e-03 eta 5:53:41 +epoch [24/50] batch [55/500] time 1.573 (1.576) data 0.001 (0.019) loss 0.5908 (1.0201) acc 81.2500 (73.0114) lr 1.1874e-03 eta 5:53:15 +epoch [24/50] batch [60/500] time 1.579 (1.575) data 0.000 (0.017) loss 1.6836 (1.0230) acc 71.8750 (73.3854) lr 1.1874e-03 eta 5:52:54 +epoch [24/50] batch [65/500] time 1.564 (1.575) data 0.000 (0.016) loss 1.2842 (1.0404) acc 65.6250 (72.9327) lr 1.1874e-03 eta 5:52:37 +epoch [24/50] batch [70/500] time 1.567 (1.574) data 0.001 (0.015) loss 0.8384 (1.0432) acc 84.3750 (73.3482) lr 1.1874e-03 eta 5:52:14 +epoch [24/50] batch [75/500] time 1.559 (1.572) data 0.001 (0.014) loss 0.7246 (1.0391) acc 75.0000 (73.2500) lr 1.1874e-03 eta 5:51:46 +epoch [24/50] batch [80/500] time 1.522 (1.571) data 0.001 (0.013) loss 1.0234 (1.0380) acc 68.7500 (73.3203) lr 1.1874e-03 eta 5:51:19 +epoch [24/50] batch [85/500] time 1.566 (1.570) data 0.000 (0.012) loss 1.1045 (1.0506) acc 68.7500 (73.1985) lr 1.1874e-03 eta 5:50:58 +epoch [24/50] batch [90/500] time 1.564 (1.569) data 0.001 (0.012) loss 1.7607 (1.0592) acc 56.2500 (73.0208) lr 1.1874e-03 eta 5:50:43 +epoch [24/50] batch [95/500] time 1.570 (1.570) data 0.000 (0.011) loss 0.7896 (1.0500) acc 75.0000 (73.1579) lr 1.1874e-03 eta 5:50:44 +epoch [24/50] batch [100/500] time 1.575 (1.569) data 0.000 (0.011) loss 1.1562 (1.0550) acc 71.8750 (73.0312) lr 1.1874e-03 eta 5:50:28 +epoch [24/50] batch [105/500] time 1.549 (1.569) data 0.001 (0.010) loss 0.9624 (1.0584) acc 68.7500 (73.0060) lr 1.1874e-03 eta 5:50:19 +epoch [24/50] batch [110/500] time 1.560 (1.569) data 0.000 (0.010) loss 0.7817 (1.0481) acc 78.1250 (73.3239) lr 1.1874e-03 eta 5:50:02 +epoch [24/50] batch [115/500] time 1.587 (1.568) data 0.001 (0.009) loss 0.9980 (1.0561) acc 84.3750 (73.2337) lr 1.1874e-03 eta 5:49:53 +epoch [24/50] batch [120/500] time 1.537 (1.568) data 0.000 (0.009) loss 0.9878 (1.0581) acc 78.1250 (72.9948) lr 1.1874e-03 eta 5:49:38 +epoch [24/50] batch [125/500] time 1.544 (1.567) data 0.001 (0.009) loss 0.8569 (1.0571) acc 78.1250 (72.9250) lr 1.1874e-03 eta 5:49:22 +epoch [24/50] batch [130/500] time 1.556 (1.567) data 0.000 (0.008) loss 1.4844 (1.0651) acc 68.7500 (72.7163) lr 1.1874e-03 eta 5:49:14 +epoch [24/50] batch [135/500] time 1.562 (1.567) data 0.000 (0.008) loss 1.2764 (1.0702) acc 65.6250 (72.6620) lr 1.1874e-03 eta 5:48:58 +epoch [24/50] batch [140/500] time 1.547 (1.566) data 0.000 (0.008) loss 1.1875 (1.0732) acc 75.0000 (72.5000) lr 1.1874e-03 eta 5:48:44 +epoch [24/50] batch [145/500] time 1.560 (1.567) data 0.000 (0.007) loss 1.2266 (1.0694) acc 62.5000 (72.6078) lr 1.1874e-03 eta 5:48:48 +epoch [24/50] batch [150/500] time 1.584 (1.567) data 0.001 (0.007) loss 1.3584 (1.0733) acc 71.8750 (72.5833) lr 1.1874e-03 eta 5:48:44 +epoch [24/50] batch [155/500] time 1.567 (1.568) data 0.000 (0.007) loss 0.9009 (1.0726) acc 78.1250 (72.7218) lr 1.1874e-03 eta 5:48:42 +epoch [24/50] batch [160/500] time 1.578 (1.568) data 0.000 (0.007) loss 0.9609 (1.0724) acc 75.0000 (72.8125) lr 1.1874e-03 eta 5:48:36 +epoch [24/50] batch [165/500] time 1.565 (1.568) data 0.000 (0.007) loss 1.5791 (1.0724) acc 62.5000 (72.7273) lr 1.1874e-03 eta 5:48:25 +epoch [24/50] batch [170/500] time 1.570 (1.568) data 0.001 (0.006) loss 1.4102 (1.0785) acc 62.5000 (72.5184) lr 1.1874e-03 eta 5:48:16 +epoch [24/50] batch [175/500] time 1.557 (1.567) data 0.000 (0.006) loss 1.5527 (1.0797) acc 65.6250 (72.5000) lr 1.1874e-03 eta 5:48:03 +epoch [24/50] batch [180/500] time 1.574 (1.567) data 0.001 (0.006) loss 1.5312 (1.0788) acc 59.3750 (72.5000) lr 1.1874e-03 eta 5:47:52 +epoch [24/50] batch [185/500] time 1.552 (1.567) data 0.001 (0.006) loss 1.1816 (1.0868) acc 75.0000 (72.4493) lr 1.1874e-03 eta 5:47:39 +epoch [24/50] batch [190/500] time 1.562 (1.567) data 0.000 (0.006) loss 1.2656 (1.0878) acc 78.1250 (72.5329) lr 1.1874e-03 eta 5:47:33 +epoch [24/50] batch [195/500] time 1.576 (1.567) data 0.000 (0.006) loss 1.1309 (1.0827) acc 68.7500 (72.5481) lr 1.1874e-03 eta 5:47:27 +epoch [24/50] batch [200/500] time 1.572 (1.567) data 0.001 (0.006) loss 0.8765 (1.0846) acc 78.1250 (72.5625) lr 1.1874e-03 eta 5:47:16 +epoch [24/50] batch [205/500] time 1.585 (1.567) data 0.000 (0.005) loss 0.5068 (1.0841) acc 87.5000 (72.6372) lr 1.1874e-03 eta 5:47:11 +epoch [24/50] batch [210/500] time 1.558 (1.567) data 0.001 (0.005) loss 1.6826 (1.0885) acc 65.6250 (72.6190) lr 1.1874e-03 eta 5:47:03 +epoch [24/50] batch [215/500] time 1.574 (1.567) data 0.001 (0.005) loss 1.5303 (1.0913) acc 68.7500 (72.4855) lr 1.1874e-03 eta 5:46:53 +epoch [24/50] batch [220/500] time 1.554 (1.566) data 0.001 (0.005) loss 1.2178 (1.0906) acc 75.0000 (72.5284) lr 1.1874e-03 eta 5:46:42 +epoch [24/50] batch [225/500] time 1.551 (1.566) data 0.001 (0.005) loss 1.1348 (1.0965) acc 68.7500 (72.4583) lr 1.1874e-03 eta 5:46:30 +epoch [24/50] batch [230/500] time 1.569 (1.566) data 0.001 (0.005) loss 1.4346 (1.0942) acc 65.6250 (72.5815) lr 1.1874e-03 eta 5:46:22 +epoch [24/50] batch [235/500] time 1.572 (1.566) data 0.001 (0.005) loss 1.2100 (1.0920) acc 65.6250 (72.5000) lr 1.1874e-03 eta 5:46:14 +epoch [24/50] batch [240/500] time 1.550 (1.566) data 0.001 (0.005) loss 0.9404 (1.0942) acc 71.8750 (72.5130) lr 1.1874e-03 eta 5:46:06 +epoch [24/50] batch [245/500] time 1.592 (1.566) data 0.000 (0.005) loss 1.1270 (1.0939) acc 68.7500 (72.5000) lr 1.1874e-03 eta 5:46:00 +epoch [24/50] batch [250/500] time 1.551 (1.566) data 0.000 (0.005) loss 1.1416 (1.0940) acc 71.8750 (72.4875) lr 1.1874e-03 eta 5:45:51 +epoch [24/50] batch [255/500] time 1.574 (1.566) data 0.000 (0.004) loss 1.5908 (1.0977) acc 71.8750 (72.4632) lr 1.1874e-03 eta 5:45:42 +epoch [24/50] batch [260/500] time 1.567 (1.566) data 0.000 (0.004) loss 1.1562 (1.0994) acc 71.8750 (72.4519) lr 1.1874e-03 eta 5:45:32 +epoch [24/50] batch [265/500] time 1.560 (1.566) data 0.000 (0.004) loss 1.0967 (1.0971) acc 71.8750 (72.5118) lr 1.1874e-03 eta 5:45:24 +epoch [24/50] batch [270/500] time 1.559 (1.566) data 0.000 (0.004) loss 1.0439 (1.0969) acc 68.7500 (72.4537) lr 1.1874e-03 eta 5:45:12 +epoch [24/50] batch [275/500] time 1.569 (1.565) data 0.001 (0.004) loss 0.8862 (1.0971) acc 81.2500 (72.5227) lr 1.1874e-03 eta 5:45:03 +epoch [24/50] batch [280/500] time 1.558 (1.565) data 0.000 (0.004) loss 1.2168 (1.0980) acc 81.2500 (72.5446) lr 1.1874e-03 eta 5:44:54 +epoch [24/50] batch [285/500] time 1.682 (1.566) data 0.000 (0.004) loss 0.8110 (1.0966) acc 75.0000 (72.5329) lr 1.1874e-03 eta 5:44:48 +epoch [24/50] batch [290/500] time 1.569 (1.565) data 0.000 (0.004) loss 0.9463 (1.0942) acc 71.8750 (72.5754) lr 1.1874e-03 eta 5:44:38 +epoch [24/50] batch [295/500] time 1.566 (1.565) data 0.001 (0.004) loss 1.5771 (1.0987) acc 56.2500 (72.4682) lr 1.1874e-03 eta 5:44:32 +epoch [24/50] batch [300/500] time 1.551 (1.565) data 0.000 (0.004) loss 1.4062 (1.1015) acc 59.3750 (72.4062) lr 1.1874e-03 eta 5:44:20 +epoch [24/50] batch [305/500] time 1.554 (1.565) data 0.001 (0.004) loss 1.0420 (1.1049) acc 68.7500 (72.3053) lr 1.1874e-03 eta 5:44:09 +epoch [24/50] batch [310/500] time 1.561 (1.565) data 0.000 (0.004) loss 0.6094 (1.1024) acc 84.3750 (72.3690) lr 1.1874e-03 eta 5:44:00 +epoch [24/50] batch [315/500] time 1.549 (1.565) data 0.000 (0.004) loss 0.8857 (1.1034) acc 81.2500 (72.3413) lr 1.1874e-03 eta 5:43:51 +epoch [24/50] batch [320/500] time 1.577 (1.565) data 0.000 (0.004) loss 0.8984 (1.1053) acc 78.1250 (72.3340) lr 1.1874e-03 eta 5:43:45 +epoch [24/50] batch [325/500] time 1.556 (1.565) data 0.000 (0.004) loss 1.2607 (1.1033) acc 78.1250 (72.4135) lr 1.1874e-03 eta 5:43:35 +epoch [24/50] batch [330/500] time 1.541 (1.565) data 0.000 (0.004) loss 0.9932 (1.1062) acc 78.1250 (72.3958) lr 1.1874e-03 eta 5:43:30 +epoch [24/50] batch [335/500] time 1.541 (1.565) data 0.000 (0.003) loss 1.0967 (1.1048) acc 71.8750 (72.4160) lr 1.1874e-03 eta 5:43:20 +epoch [24/50] batch [340/500] time 1.566 (1.565) data 0.000 (0.003) loss 1.0166 (1.1038) acc 68.7500 (72.4632) lr 1.1874e-03 eta 5:43:10 +epoch [24/50] batch [345/500] time 1.589 (1.565) data 0.000 (0.003) loss 1.1289 (1.1042) acc 59.3750 (72.4275) lr 1.1874e-03 eta 5:43:03 +epoch [24/50] batch [350/500] time 1.583 (1.565) data 0.000 (0.003) loss 0.7114 (1.1042) acc 71.8750 (72.3750) lr 1.1874e-03 eta 5:42:56 +epoch [24/50] batch [355/500] time 1.559 (1.565) data 0.001 (0.003) loss 1.1465 (1.1031) acc 71.8750 (72.3415) lr 1.1874e-03 eta 5:42:47 +epoch [24/50] batch [360/500] time 1.558 (1.564) data 0.000 (0.003) loss 0.8066 (1.1020) acc 84.3750 (72.3785) lr 1.1874e-03 eta 5:42:37 +epoch [24/50] batch [365/500] time 1.578 (1.564) data 0.000 (0.003) loss 1.0400 (1.0984) acc 75.0000 (72.4743) lr 1.1874e-03 eta 5:42:28 +epoch [24/50] batch [370/500] time 1.552 (1.564) data 0.000 (0.003) loss 1.4678 (1.1006) acc 65.6250 (72.3986) lr 1.1874e-03 eta 5:42:19 +epoch [24/50] batch [375/500] time 1.558 (1.564) data 0.001 (0.003) loss 0.7036 (1.0990) acc 90.6250 (72.4917) lr 1.1874e-03 eta 5:42:09 +epoch [24/50] batch [380/500] time 1.571 (1.564) data 0.000 (0.003) loss 0.9243 (1.0975) acc 75.0000 (72.5411) lr 1.1874e-03 eta 5:42:00 +epoch [24/50] batch [385/500] time 1.595 (1.564) data 0.001 (0.003) loss 1.1152 (1.0976) acc 68.7500 (72.5649) lr 1.1874e-03 eta 5:41:53 +epoch [24/50] batch [390/500] time 1.581 (1.564) data 0.000 (0.003) loss 0.8145 (1.0962) acc 75.0000 (72.5641) lr 1.1874e-03 eta 5:41:44 +epoch [24/50] batch [395/500] time 1.575 (1.564) data 0.001 (0.003) loss 1.0283 (1.0935) acc 75.0000 (72.6661) lr 1.1874e-03 eta 5:41:38 +epoch [24/50] batch [400/500] time 1.556 (1.564) data 0.000 (0.003) loss 1.0430 (1.0980) acc 62.5000 (72.5625) lr 1.1874e-03 eta 5:41:30 +epoch [24/50] batch [405/500] time 1.550 (1.564) data 0.000 (0.003) loss 1.2012 (1.1008) acc 65.6250 (72.4923) lr 1.1874e-03 eta 5:41:19 +epoch [24/50] batch [410/500] time 1.533 (1.564) data 0.000 (0.003) loss 1.4316 (1.1022) acc 59.3750 (72.4085) lr 1.1874e-03 eta 5:41:11 +epoch [24/50] batch [415/500] time 1.534 (1.564) data 0.000 (0.003) loss 1.5088 (1.1027) acc 62.5000 (72.3870) lr 1.1874e-03 eta 5:41:01 +epoch [24/50] batch [420/500] time 1.528 (1.563) data 0.000 (0.003) loss 1.3428 (1.1011) acc 71.8750 (72.4479) lr 1.1874e-03 eta 5:40:49 +epoch [24/50] batch [425/500] time 1.555 (1.563) data 0.000 (0.003) loss 1.1006 (1.0990) acc 71.8750 (72.5147) lr 1.1874e-03 eta 5:40:37 +epoch [24/50] batch [430/500] time 1.559 (1.563) data 0.000 (0.003) loss 0.8452 (1.0987) acc 84.3750 (72.5000) lr 1.1874e-03 eta 5:40:31 +epoch [24/50] batch [435/500] time 1.541 (1.563) data 0.000 (0.003) loss 1.6357 (1.0983) acc 59.3750 (72.4856) lr 1.1874e-03 eta 5:40:20 +epoch [24/50] batch [440/500] time 1.534 (1.563) data 0.000 (0.003) loss 1.4268 (1.0993) acc 68.7500 (72.4716) lr 1.1874e-03 eta 5:40:10 +epoch [24/50] batch [445/500] time 1.546 (1.563) data 0.000 (0.003) loss 1.2930 (1.0999) acc 81.2500 (72.5140) lr 1.1874e-03 eta 5:40:00 +epoch [24/50] batch [450/500] time 1.558 (1.563) data 0.000 (0.003) loss 1.1396 (1.1010) acc 71.8750 (72.4653) lr 1.1874e-03 eta 5:39:51 +epoch [24/50] batch [455/500] time 1.554 (1.563) data 0.000 (0.003) loss 0.9312 (1.0993) acc 78.1250 (72.4931) lr 1.1874e-03 eta 5:39:42 +epoch [24/50] batch [460/500] time 1.569 (1.562) data 0.000 (0.003) loss 1.1318 (1.1026) acc 62.5000 (72.4253) lr 1.1874e-03 eta 5:39:34 +epoch [24/50] batch [465/500] time 1.564 (1.563) data 0.001 (0.003) loss 0.6411 (1.1029) acc 78.1250 (72.3992) lr 1.1874e-03 eta 5:39:29 +epoch [24/50] batch [470/500] time 1.575 (1.563) data 0.000 (0.003) loss 0.4072 (1.0996) acc 87.5000 (72.4601) lr 1.1874e-03 eta 5:39:21 +epoch [24/50] batch [475/500] time 1.566 (1.563) data 0.000 (0.003) loss 0.5239 (1.0995) acc 90.6250 (72.4342) lr 1.1874e-03 eta 5:39:18 +epoch [24/50] batch [480/500] time 1.549 (1.563) data 0.000 (0.003) loss 0.8066 (1.0983) acc 81.2500 (72.4479) lr 1.1874e-03 eta 5:39:10 +epoch [24/50] batch [485/500] time 1.548 (1.563) data 0.001 (0.003) loss 1.0801 (1.0986) acc 68.7500 (72.4098) lr 1.1874e-03 eta 5:39:02 +epoch [24/50] batch [490/500] time 1.570 (1.563) data 0.000 (0.003) loss 1.1797 (1.1009) acc 65.6250 (72.3469) lr 1.1874e-03 eta 5:38:53 +epoch [24/50] batch [495/500] time 1.561 (1.563) data 0.000 (0.002) loss 1.4678 (1.1033) acc 56.2500 (72.2854) lr 1.1874e-03 eta 5:38:44 +epoch [24/50] batch [500/500] time 1.555 (1.563) data 0.000 (0.002) loss 0.6470 (1.1017) acc 75.0000 (72.3000) lr 1.1253e-03 eta 5:38:34 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,024 +* accuracy: 78.0% +* error: 22.0% +* macro_f1: 77.5% +epoch [25/50] batch [5/500] time 1.556 (1.673) data 0.000 (0.177) loss 1.0693 (1.1984) acc 75.0000 (70.6250) lr 1.1253e-03 eta 6:02:26 +epoch [25/50] batch [10/500] time 1.558 (1.611) data 0.000 (0.089) loss 0.9302 (1.1119) acc 71.8750 (71.8750) lr 1.1253e-03 eta 5:48:51 +epoch [25/50] batch [15/500] time 1.569 (1.594) data 0.001 (0.059) loss 1.4326 (1.0871) acc 68.7500 (72.9167) lr 1.1253e-03 eta 5:45:04 +epoch [25/50] batch [20/500] time 1.561 (1.587) data 0.000 (0.045) loss 1.7627 (1.1357) acc 68.7500 (71.7188) lr 1.1253e-03 eta 5:43:16 +epoch [25/50] batch [25/500] time 1.567 (1.582) data 0.000 (0.036) loss 0.8867 (1.1252) acc 78.1250 (72.5000) lr 1.1253e-03 eta 5:42:11 +epoch [25/50] batch [30/500] time 1.576 (1.579) data 0.000 (0.030) loss 1.1396 (1.1418) acc 71.8750 (72.3958) lr 1.1253e-03 eta 5:41:19 +epoch [25/50] batch [35/500] time 1.556 (1.576) data 0.001 (0.026) loss 1.1201 (1.1179) acc 78.1250 (72.5893) lr 1.1253e-03 eta 5:40:37 +epoch [25/50] batch [40/500] time 1.554 (1.574) data 0.000 (0.023) loss 0.9365 (1.1119) acc 75.0000 (72.8906) lr 1.1253e-03 eta 5:40:04 +epoch [25/50] batch [45/500] time 1.585 (1.574) data 0.000 (0.020) loss 0.8379 (1.1117) acc 75.0000 (72.4306) lr 1.1253e-03 eta 5:39:48 +epoch [25/50] batch [50/500] time 1.571 (1.574) data 0.000 (0.018) loss 1.5078 (1.1305) acc 71.8750 (72.0000) lr 1.1253e-03 eta 5:39:38 +epoch [25/50] batch [55/500] time 1.578 (1.573) data 0.000 (0.016) loss 1.1611 (1.1360) acc 78.1250 (71.8182) lr 1.1253e-03 eta 5:39:17 +epoch [25/50] batch [60/500] time 1.566 (1.572) data 0.001 (0.015) loss 1.1270 (1.1387) acc 78.1250 (71.9271) lr 1.1253e-03 eta 5:39:00 +epoch [25/50] batch [65/500] time 1.552 (1.573) data 0.000 (0.014) loss 1.2500 (1.1389) acc 59.3750 (71.6346) lr 1.1253e-03 eta 5:39:04 +epoch [25/50] batch [70/500] time 1.563 (1.572) data 0.000 (0.013) loss 1.5352 (1.1370) acc 68.7500 (71.7411) lr 1.1253e-03 eta 5:38:50 +epoch [25/50] batch [75/500] time 1.560 (1.571) data 0.001 (0.012) loss 1.2158 (1.1337) acc 62.5000 (71.8333) lr 1.1253e-03 eta 5:38:28 +epoch [25/50] batch [80/500] time 1.572 (1.571) data 0.000 (0.011) loss 1.1885 (1.1254) acc 71.8750 (71.9141) lr 1.1253e-03 eta 5:38:20 +epoch [25/50] batch [85/500] time 1.547 (1.571) data 0.000 (0.011) loss 0.6738 (1.1228) acc 78.1250 (71.9485) lr 1.1253e-03 eta 5:38:06 +epoch [25/50] batch [90/500] time 1.597 (1.571) data 0.000 (0.010) loss 1.3604 (1.1229) acc 59.3750 (71.8056) lr 1.1253e-03 eta 5:37:59 +epoch [25/50] batch [95/500] time 1.567 (1.570) data 0.000 (0.010) loss 1.3262 (1.1255) acc 68.7500 (71.8092) lr 1.1253e-03 eta 5:37:46 +epoch [25/50] batch [100/500] time 1.566 (1.570) data 0.000 (0.009) loss 0.8784 (1.1271) acc 62.5000 (71.6250) lr 1.1253e-03 eta 5:37:28 +epoch [25/50] batch [105/500] time 1.558 (1.569) data 0.000 (0.009) loss 1.0742 (1.1261) acc 71.8750 (71.7560) lr 1.1253e-03 eta 5:37:10 +epoch [25/50] batch [110/500] time 1.575 (1.570) data 0.000 (0.008) loss 1.2207 (1.1166) acc 68.7500 (72.0455) lr 1.1253e-03 eta 5:37:10 +epoch [25/50] batch [115/500] time 1.576 (1.570) data 0.000 (0.008) loss 1.8809 (1.1184) acc 62.5000 (72.0109) lr 1.1253e-03 eta 5:37:04 +epoch [25/50] batch [120/500] time 1.557 (1.570) data 0.000 (0.008) loss 1.3701 (1.1165) acc 78.1250 (72.1615) lr 1.1253e-03 eta 5:36:58 +epoch [25/50] batch [125/500] time 1.570 (1.570) data 0.000 (0.007) loss 1.4873 (1.1203) acc 59.3750 (72.0500) lr 1.1253e-03 eta 5:36:50 +epoch [25/50] batch [130/500] time 1.541 (1.569) data 0.000 (0.007) loss 1.3770 (1.1276) acc 59.3750 (71.9231) lr 1.1253e-03 eta 5:36:38 +epoch [25/50] batch [135/500] time 1.577 (1.569) data 0.000 (0.007) loss 1.3262 (1.1219) acc 59.3750 (72.0370) lr 1.1253e-03 eta 5:36:29 +epoch [25/50] batch [140/500] time 1.553 (1.569) data 0.000 (0.007) loss 1.0811 (1.1199) acc 68.7500 (72.1875) lr 1.1253e-03 eta 5:36:19 +epoch [25/50] batch [145/500] time 1.536 (1.569) data 0.001 (0.007) loss 1.7246 (1.1206) acc 53.1250 (72.0905) lr 1.1253e-03 eta 5:36:03 +epoch [25/50] batch [150/500] time 1.556 (1.568) data 0.000 (0.006) loss 0.8438 (1.1117) acc 81.2500 (72.1875) lr 1.1253e-03 eta 5:35:47 +epoch [25/50] batch [155/500] time 1.568 (1.568) data 0.001 (0.006) loss 1.2393 (1.1100) acc 68.7500 (72.1774) lr 1.1253e-03 eta 5:35:40 +epoch [25/50] batch [160/500] time 1.578 (1.568) data 0.000 (0.006) loss 1.2676 (1.1092) acc 56.2500 (72.0703) lr 1.1253e-03 eta 5:35:34 +epoch [25/50] batch [165/500] time 1.553 (1.568) data 0.000 (0.006) loss 1.6250 (1.1177) acc 62.5000 (71.8371) lr 1.1253e-03 eta 5:35:23 +epoch [25/50] batch [170/500] time 1.560 (1.568) data 0.000 (0.006) loss 1.0869 (1.1192) acc 71.8750 (71.8199) lr 1.1253e-03 eta 5:35:14 +epoch [25/50] batch [175/500] time 1.547 (1.568) data 0.000 (0.005) loss 0.5811 (1.1129) acc 84.3750 (71.8750) lr 1.1253e-03 eta 5:35:03 +epoch [25/50] batch [180/500] time 1.566 (1.568) data 0.000 (0.005) loss 0.6968 (1.1135) acc 81.2500 (71.9271) lr 1.1253e-03 eta 5:35:00 +epoch [25/50] batch [185/500] time 1.559 (1.568) data 0.000 (0.005) loss 1.1699 (1.1109) acc 68.7500 (71.9764) lr 1.1253e-03 eta 5:34:53 +epoch [25/50] batch [190/500] time 1.557 (1.568) data 0.000 (0.005) loss 1.6338 (1.1172) acc 75.0000 (71.9079) lr 1.1253e-03 eta 5:34:45 +epoch [25/50] batch [195/500] time 1.567 (1.568) data 0.000 (0.005) loss 1.4346 (1.1196) acc 68.7500 (71.8750) lr 1.1253e-03 eta 5:34:36 +epoch [25/50] batch [200/500] time 1.569 (1.568) data 0.000 (0.005) loss 1.0654 (1.1171) acc 75.0000 (71.9219) lr 1.1253e-03 eta 5:34:27 +epoch [25/50] batch [205/500] time 1.662 (1.568) data 0.000 (0.005) loss 1.8359 (1.1151) acc 65.6250 (72.0579) lr 1.1253e-03 eta 5:34:24 +epoch [25/50] batch [210/500] time 1.616 (1.568) data 0.000 (0.005) loss 1.2861 (1.1179) acc 68.7500 (72.0238) lr 1.1253e-03 eta 5:34:18 +epoch [25/50] batch [215/500] time 1.576 (1.568) data 0.000 (0.005) loss 1.2461 (1.1162) acc 75.0000 (72.1512) lr 1.1253e-03 eta 5:34:09 +epoch [25/50] batch [220/500] time 1.554 (1.568) data 0.000 (0.004) loss 0.7056 (1.1099) acc 84.3750 (72.3722) lr 1.1253e-03 eta 5:33:59 +epoch [25/50] batch [225/500] time 1.552 (1.568) data 0.001 (0.004) loss 1.2842 (1.1094) acc 78.1250 (72.4028) lr 1.1253e-03 eta 5:33:49 +epoch [25/50] batch [230/500] time 1.559 (1.568) data 0.000 (0.004) loss 1.0967 (1.1094) acc 68.7500 (72.3641) lr 1.1253e-03 eta 5:33:38 +epoch [25/50] batch [235/500] time 1.535 (1.567) data 0.000 (0.004) loss 1.6641 (1.1077) acc 62.5000 (72.3670) lr 1.1253e-03 eta 5:33:27 +epoch [25/50] batch [240/500] time 1.547 (1.567) data 0.000 (0.004) loss 1.4053 (1.1053) acc 65.6250 (72.4479) lr 1.1253e-03 eta 5:33:16 +epoch [25/50] batch [245/500] time 1.567 (1.567) data 0.000 (0.004) loss 0.7690 (1.1039) acc 84.3750 (72.4490) lr 1.1253e-03 eta 5:33:08 +epoch [25/50] batch [250/500] time 1.532 (1.567) data 0.000 (0.004) loss 0.7998 (1.1000) acc 78.1250 (72.5000) lr 1.1253e-03 eta 5:33:04 +epoch [25/50] batch [255/500] time 1.557 (1.567) data 0.000 (0.004) loss 0.7871 (1.1008) acc 71.8750 (72.4877) lr 1.1253e-03 eta 5:32:52 +epoch [25/50] batch [260/500] time 1.550 (1.567) data 0.000 (0.004) loss 0.5142 (1.0984) acc 90.6250 (72.5240) lr 1.1253e-03 eta 5:32:42 +epoch [25/50] batch [265/500] time 1.547 (1.567) data 0.000 (0.004) loss 0.6714 (1.0972) acc 81.2500 (72.5236) lr 1.1253e-03 eta 5:32:30 +epoch [25/50] batch [270/500] time 1.553 (1.566) data 0.000 (0.004) loss 0.8218 (1.0967) acc 87.5000 (72.5926) lr 1.1253e-03 eta 5:32:20 +epoch [25/50] batch [275/500] time 1.562 (1.566) data 0.000 (0.004) loss 1.1426 (1.0985) acc 78.1250 (72.5909) lr 1.1253e-03 eta 5:32:10 +epoch [25/50] batch [280/500] time 1.542 (1.566) data 0.000 (0.004) loss 1.1270 (1.0965) acc 65.6250 (72.6562) lr 1.1253e-03 eta 5:32:02 +epoch [25/50] batch [285/500] time 1.556 (1.566) data 0.000 (0.004) loss 0.7373 (1.0946) acc 81.2500 (72.6754) lr 1.1253e-03 eta 5:31:51 +epoch [25/50] batch [290/500] time 1.560 (1.566) data 0.000 (0.003) loss 0.9673 (1.0971) acc 65.6250 (72.5539) lr 1.1253e-03 eta 5:31:42 +epoch [25/50] batch [295/500] time 1.561 (1.566) data 0.000 (0.003) loss 1.6191 (1.1017) acc 53.1250 (72.3835) lr 1.1253e-03 eta 5:31:31 +epoch [25/50] batch [300/500] time 1.548 (1.566) data 0.000 (0.003) loss 1.0381 (1.0988) acc 75.0000 (72.4792) lr 1.1253e-03 eta 5:31:22 +epoch [25/50] batch [305/500] time 1.549 (1.565) data 0.000 (0.003) loss 1.7236 (1.1001) acc 62.5000 (72.4590) lr 1.1253e-03 eta 5:31:10 +epoch [25/50] batch [310/500] time 1.562 (1.565) data 0.000 (0.003) loss 1.0654 (1.0989) acc 75.0000 (72.4395) lr 1.1253e-03 eta 5:30:58 +epoch [25/50] batch [315/500] time 1.566 (1.565) data 0.000 (0.003) loss 1.4463 (1.1008) acc 78.1250 (72.4702) lr 1.1253e-03 eta 5:30:49 +epoch [25/50] batch [320/500] time 1.556 (1.565) data 0.000 (0.003) loss 0.6069 (1.0956) acc 90.6250 (72.6465) lr 1.1253e-03 eta 5:30:40 +epoch [25/50] batch [325/500] time 1.527 (1.565) data 0.000 (0.003) loss 1.1191 (1.0987) acc 71.8750 (72.6058) lr 1.1253e-03 eta 5:30:31 +epoch [25/50] batch [330/500] time 1.578 (1.565) data 0.000 (0.003) loss 1.1318 (1.0966) acc 78.1250 (72.6610) lr 1.1253e-03 eta 5:30:23 +epoch [25/50] batch [335/500] time 1.532 (1.564) data 0.000 (0.003) loss 0.8955 (1.0965) acc 71.8750 (72.6119) lr 1.1253e-03 eta 5:30:12 +epoch [25/50] batch [340/500] time 1.538 (1.564) data 0.000 (0.003) loss 1.3105 (1.0980) acc 78.1250 (72.5735) lr 1.1253e-03 eta 5:30:02 +epoch [25/50] batch [345/500] time 1.553 (1.564) data 0.000 (0.003) loss 0.9717 (1.0975) acc 65.6250 (72.5543) lr 1.1253e-03 eta 5:29:52 +epoch [25/50] batch [350/500] time 1.541 (1.564) data 0.000 (0.003) loss 1.3936 (1.0984) acc 68.7500 (72.5536) lr 1.1253e-03 eta 5:29:47 +epoch [25/50] batch [355/500] time 1.548 (1.564) data 0.000 (0.003) loss 0.8892 (1.0979) acc 75.0000 (72.5616) lr 1.1253e-03 eta 5:29:38 +epoch [25/50] batch [360/500] time 1.571 (1.564) data 0.000 (0.003) loss 1.2832 (1.1015) acc 68.7500 (72.4306) lr 1.1253e-03 eta 5:29:29 +epoch [25/50] batch [365/500] time 1.540 (1.564) data 0.000 (0.003) loss 0.7544 (1.0977) acc 87.5000 (72.5086) lr 1.1253e-03 eta 5:29:19 +epoch [25/50] batch [370/500] time 1.562 (1.564) data 0.000 (0.003) loss 0.4934 (1.0953) acc 84.3750 (72.5253) lr 1.1253e-03 eta 5:29:11 +epoch [25/50] batch [375/500] time 1.589 (1.564) data 0.000 (0.003) loss 1.0508 (1.0942) acc 71.8750 (72.5750) lr 1.1253e-03 eta 5:29:02 +epoch [25/50] batch [380/500] time 1.586 (1.564) data 0.000 (0.003) loss 0.9336 (1.0946) acc 68.7500 (72.5658) lr 1.1253e-03 eta 5:28:56 +epoch [25/50] batch [385/500] time 1.532 (1.564) data 0.000 (0.003) loss 0.9385 (1.0942) acc 75.0000 (72.5325) lr 1.1253e-03 eta 5:28:45 +epoch [25/50] batch [390/500] time 1.568 (1.564) data 0.001 (0.003) loss 1.0635 (1.0973) acc 68.7500 (72.4599) lr 1.1253e-03 eta 5:28:36 +epoch [25/50] batch [395/500] time 1.529 (1.564) data 0.000 (0.003) loss 0.8564 (1.0949) acc 78.1250 (72.5158) lr 1.1253e-03 eta 5:28:29 +epoch [25/50] batch [400/500] time 1.560 (1.564) data 0.000 (0.003) loss 1.5420 (1.0938) acc 71.8750 (72.5859) lr 1.1253e-03 eta 5:28:22 +epoch [25/50] batch [405/500] time 1.576 (1.564) data 0.000 (0.003) loss 0.8716 (1.0928) acc 78.1250 (72.5926) lr 1.1253e-03 eta 5:28:14 +epoch [25/50] batch [410/500] time 1.564 (1.564) data 0.000 (0.003) loss 1.1572 (1.0942) acc 68.7500 (72.5762) lr 1.1253e-03 eta 5:28:07 +epoch [25/50] batch [415/500] time 1.542 (1.564) data 0.000 (0.003) loss 1.2881 (1.0940) acc 71.8750 (72.5828) lr 1.1253e-03 eta 5:28:00 +epoch [25/50] batch [420/500] time 1.577 (1.564) data 0.000 (0.003) loss 0.9233 (1.0943) acc 71.8750 (72.5670) lr 1.1253e-03 eta 5:27:51 +epoch [25/50] batch [425/500] time 1.544 (1.563) data 0.001 (0.002) loss 1.5449 (1.0958) acc 62.5000 (72.5441) lr 1.1253e-03 eta 5:27:40 +epoch [25/50] batch [430/500] time 1.572 (1.563) data 0.000 (0.002) loss 1.3105 (1.0943) acc 71.8750 (72.6017) lr 1.1253e-03 eta 5:27:32 +epoch [25/50] batch [435/500] time 1.561 (1.563) data 0.000 (0.002) loss 0.9155 (1.0955) acc 68.7500 (72.5503) lr 1.1253e-03 eta 5:27:24 +epoch [25/50] batch [440/500] time 1.589 (1.563) data 0.000 (0.002) loss 1.4893 (1.0934) acc 53.1250 (72.5639) lr 1.1253e-03 eta 5:27:16 +epoch [25/50] batch [445/500] time 1.573 (1.563) data 0.000 (0.002) loss 1.2754 (1.0934) acc 71.8750 (72.5492) lr 1.1253e-03 eta 5:27:09 +epoch [25/50] batch [450/500] time 1.542 (1.563) data 0.000 (0.002) loss 0.8560 (1.0941) acc 78.1250 (72.5347) lr 1.1253e-03 eta 5:26:59 +epoch [25/50] batch [455/500] time 1.572 (1.563) data 0.000 (0.002) loss 0.8901 (1.0951) acc 87.5000 (72.5755) lr 1.1253e-03 eta 5:26:51 +epoch [25/50] batch [460/500] time 1.577 (1.563) data 0.000 (0.002) loss 0.5586 (1.0959) acc 87.5000 (72.5883) lr 1.1253e-03 eta 5:26:43 +epoch [25/50] batch [465/500] time 1.556 (1.563) data 0.001 (0.002) loss 1.1406 (1.0972) acc 71.8750 (72.5941) lr 1.1253e-03 eta 5:26:35 +epoch [25/50] batch [470/500] time 1.572 (1.563) data 0.000 (0.002) loss 1.0215 (1.0966) acc 78.1250 (72.6064) lr 1.1253e-03 eta 5:26:26 +epoch [25/50] batch [475/500] time 1.547 (1.563) data 0.000 (0.002) loss 1.9531 (1.0978) acc 62.5000 (72.5789) lr 1.1253e-03 eta 5:26:18 +epoch [25/50] batch [480/500] time 1.561 (1.563) data 0.000 (0.002) loss 1.0020 (1.0957) acc 75.0000 (72.6107) lr 1.1253e-03 eta 5:26:08 +epoch [25/50] batch [485/500] time 1.550 (1.563) data 0.001 (0.002) loss 0.6602 (1.0950) acc 75.0000 (72.6353) lr 1.1253e-03 eta 5:25:59 +epoch [25/50] batch [490/500] time 1.570 (1.563) data 0.000 (0.002) loss 1.4121 (1.0940) acc 71.8750 (72.6786) lr 1.1253e-03 eta 5:25:52 +epoch [25/50] batch [495/500] time 1.547 (1.563) data 0.000 (0.002) loss 1.2188 (1.0951) acc 78.1250 (72.6705) lr 1.1253e-03 eta 5:25:46 +epoch [25/50] batch [500/500] time 1.550 (1.563) data 0.000 (0.002) loss 1.0244 (1.0960) acc 71.8750 (72.6562) lr 1.0628e-03 eta 5:25:36 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,964 +* accuracy: 77.9% +* error: 22.1% +* macro_f1: 77.4% +epoch [26/50] batch [5/500] time 1.549 (1.653) data 0.000 (0.153) loss 1.3701 (0.9959) acc 75.0000 (75.6250) lr 1.0628e-03 eta 5:44:13 +epoch [26/50] batch [10/500] time 1.572 (1.608) data 0.000 (0.077) loss 0.9077 (1.0585) acc 81.2500 (74.0625) lr 1.0628e-03 eta 5:34:43 +epoch [26/50] batch [15/500] time 1.538 (1.588) data 0.000 (0.051) loss 1.1318 (1.1010) acc 75.0000 (72.9167) lr 1.0628e-03 eta 5:30:29 +epoch [26/50] batch [20/500] time 1.548 (1.580) data 0.000 (0.039) loss 1.3789 (1.1268) acc 71.8750 (72.5000) lr 1.0628e-03 eta 5:28:44 +epoch [26/50] batch [25/500] time 1.557 (1.576) data 0.000 (0.031) loss 2.1582 (1.1294) acc 62.5000 (73.1250) lr 1.0628e-03 eta 5:27:41 +epoch [26/50] batch [30/500] time 1.556 (1.574) data 0.000 (0.026) loss 1.0947 (1.1174) acc 75.0000 (72.6042) lr 1.0628e-03 eta 5:27:11 +epoch [26/50] batch [35/500] time 1.568 (1.574) data 0.001 (0.022) loss 1.5146 (1.1124) acc 65.6250 (72.5893) lr 1.0628e-03 eta 5:26:55 +epoch [26/50] batch [40/500] time 1.533 (1.575) data 0.001 (0.020) loss 1.6357 (1.1326) acc 65.6250 (72.0312) lr 1.0628e-03 eta 5:26:59 +epoch [26/50] batch [45/500] time 1.556 (1.573) data 0.001 (0.018) loss 1.2002 (1.1271) acc 65.6250 (71.8056) lr 1.0628e-03 eta 5:26:35 +epoch [26/50] batch [50/500] time 1.567 (1.573) data 0.000 (0.016) loss 0.4143 (1.1057) acc 87.5000 (72.4375) lr 1.0628e-03 eta 5:26:24 +epoch [26/50] batch [55/500] time 1.569 (1.573) data 0.000 (0.014) loss 1.4277 (1.1251) acc 65.6250 (72.1591) lr 1.0628e-03 eta 5:26:10 +epoch [26/50] batch [60/500] time 1.558 (1.571) data 0.001 (0.013) loss 1.0283 (1.1172) acc 78.1250 (72.3438) lr 1.0628e-03 eta 5:25:48 +epoch [26/50] batch [65/500] time 1.578 (1.571) data 0.000 (0.012) loss 0.5723 (1.1105) acc 90.6250 (72.5481) lr 1.0628e-03 eta 5:25:32 +epoch [26/50] batch [70/500] time 1.536 (1.570) data 0.000 (0.011) loss 1.1338 (1.1216) acc 75.0000 (72.6339) lr 1.0628e-03 eta 5:25:12 +epoch [26/50] batch [75/500] time 1.552 (1.569) data 0.001 (0.011) loss 1.1182 (1.0982) acc 75.0000 (73.1667) lr 1.0628e-03 eta 5:24:58 +epoch [26/50] batch [80/500] time 1.571 (1.570) data 0.000 (0.010) loss 1.0010 (1.0987) acc 78.1250 (73.1250) lr 1.0628e-03 eta 5:25:02 +epoch [26/50] batch [85/500] time 1.556 (1.569) data 0.000 (0.009) loss 1.3457 (1.1040) acc 62.5000 (72.9412) lr 1.0628e-03 eta 5:24:42 +epoch [26/50] batch [90/500] time 1.527 (1.568) data 0.000 (0.009) loss 1.3525 (1.1057) acc 75.0000 (73.0556) lr 1.0628e-03 eta 5:24:19 +epoch [26/50] batch [95/500] time 1.546 (1.568) data 0.000 (0.009) loss 0.9971 (1.1002) acc 65.6250 (73.1908) lr 1.0628e-03 eta 5:24:07 +epoch [26/50] batch [100/500] time 1.591 (1.568) data 0.001 (0.008) loss 1.7256 (1.1002) acc 65.6250 (73.3125) lr 1.0628e-03 eta 5:23:59 +epoch [26/50] batch [105/500] time 1.556 (1.568) data 0.000 (0.008) loss 1.8398 (1.1129) acc 65.6250 (73.1548) lr 1.0628e-03 eta 5:23:53 +epoch [26/50] batch [110/500] time 1.571 (1.567) data 0.000 (0.007) loss 1.4961 (1.1269) acc 68.7500 (72.8409) lr 1.0628e-03 eta 5:23:37 +epoch [26/50] batch [115/500] time 1.539 (1.567) data 0.001 (0.007) loss 0.8740 (1.1201) acc 78.1250 (72.9348) lr 1.0628e-03 eta 5:23:21 +epoch [26/50] batch [120/500] time 1.558 (1.566) data 0.000 (0.007) loss 0.8540 (1.1103) acc 75.0000 (72.9948) lr 1.0628e-03 eta 5:23:08 +epoch [26/50] batch [125/500] time 1.548 (1.566) data 0.000 (0.007) loss 1.0967 (1.1159) acc 68.7500 (72.7750) lr 1.0628e-03 eta 5:22:58 +epoch [26/50] batch [130/500] time 1.565 (1.566) data 0.000 (0.006) loss 0.6572 (1.1064) acc 81.2500 (72.9087) lr 1.0628e-03 eta 5:22:49 +epoch [26/50] batch [135/500] time 1.584 (1.566) data 0.000 (0.006) loss 1.0117 (1.1094) acc 81.2500 (72.9630) lr 1.0628e-03 eta 5:22:39 +epoch [26/50] batch [140/500] time 1.557 (1.566) data 0.001 (0.006) loss 1.0010 (1.1156) acc 65.6250 (72.7232) lr 1.0628e-03 eta 5:22:40 +epoch [26/50] batch [145/500] time 1.553 (1.566) data 0.001 (0.006) loss 1.3086 (1.1217) acc 68.7500 (72.5431) lr 1.0628e-03 eta 5:22:29 +epoch [26/50] batch [150/500] time 1.548 (1.566) data 0.001 (0.006) loss 1.0674 (1.1142) acc 78.1250 (72.7292) lr 1.0628e-03 eta 5:22:20 +epoch [26/50] batch [155/500] time 1.548 (1.565) data 0.000 (0.005) loss 0.9580 (1.1158) acc 87.5000 (72.7419) lr 1.0628e-03 eta 5:22:05 +epoch [26/50] batch [160/500] time 1.555 (1.565) data 0.000 (0.005) loss 0.6685 (1.1097) acc 84.3750 (72.9297) lr 1.0628e-03 eta 5:21:52 +epoch [26/50] batch [165/500] time 1.558 (1.565) data 0.000 (0.005) loss 0.9746 (1.1088) acc 78.1250 (72.8788) lr 1.0628e-03 eta 5:21:40 +epoch [26/50] batch [170/500] time 1.542 (1.564) data 0.001 (0.005) loss 1.4688 (1.1103) acc 62.5000 (72.8125) lr 1.0628e-03 eta 5:21:27 +epoch [26/50] batch [175/500] time 1.547 (1.564) data 0.001 (0.005) loss 1.5928 (1.1095) acc 56.2500 (72.7321) lr 1.0628e-03 eta 5:21:16 +epoch [26/50] batch [180/500] time 1.569 (1.564) data 0.001 (0.005) loss 1.2178 (1.1095) acc 65.6250 (72.7778) lr 1.0628e-03 eta 5:21:06 +epoch [26/50] batch [185/500] time 1.564 (1.564) data 0.000 (0.005) loss 0.9980 (1.1088) acc 81.2500 (72.8547) lr 1.0628e-03 eta 5:21:02 +epoch [26/50] batch [190/500] time 1.574 (1.564) data 0.000 (0.005) loss 1.0615 (1.1012) acc 78.1250 (73.0099) lr 1.0628e-03 eta 5:20:51 +epoch [26/50] batch [195/500] time 1.566 (1.564) data 0.001 (0.004) loss 1.1895 (1.0998) acc 65.6250 (72.9647) lr 1.0628e-03 eta 5:20:43 +epoch [26/50] batch [200/500] time 1.564 (1.564) data 0.000 (0.004) loss 1.0371 (1.0983) acc 75.0000 (72.9688) lr 1.0628e-03 eta 5:20:36 +epoch [26/50] batch [205/500] time 1.537 (1.564) data 0.000 (0.004) loss 1.5068 (1.1024) acc 71.8750 (72.9116) lr 1.0628e-03 eta 5:20:27 +epoch [26/50] batch [210/500] time 1.565 (1.564) data 0.001 (0.004) loss 1.8047 (1.1052) acc 62.5000 (72.9464) lr 1.0628e-03 eta 5:20:18 +epoch [26/50] batch [215/500] time 1.555 (1.563) data 0.001 (0.004) loss 1.5049 (1.1077) acc 75.0000 (72.9651) lr 1.0628e-03 eta 5:20:06 +epoch [26/50] batch [220/500] time 1.546 (1.563) data 0.000 (0.004) loss 0.8472 (1.1088) acc 65.6250 (72.8693) lr 1.0628e-03 eta 5:19:57 +epoch [26/50] batch [225/500] time 1.565 (1.563) data 0.000 (0.004) loss 0.7358 (1.1098) acc 75.0000 (72.7639) lr 1.0628e-03 eta 5:19:49 +epoch [26/50] batch [230/500] time 1.561 (1.563) data 0.000 (0.004) loss 0.4685 (1.1105) acc 93.7500 (72.7446) lr 1.0628e-03 eta 5:19:40 +epoch [26/50] batch [235/500] time 1.543 (1.563) data 0.001 (0.004) loss 1.1367 (1.1057) acc 71.8750 (72.9122) lr 1.0628e-03 eta 5:19:28 +epoch [26/50] batch [240/500] time 1.553 (1.563) data 0.001 (0.004) loss 0.8716 (1.1038) acc 81.2500 (72.9818) lr 1.0628e-03 eta 5:19:19 +epoch [26/50] batch [245/500] time 1.567 (1.562) data 0.001 (0.004) loss 1.0547 (1.0992) acc 81.2500 (73.1378) lr 1.0628e-03 eta 5:19:08 +epoch [26/50] batch [250/500] time 1.556 (1.562) data 0.000 (0.004) loss 0.9502 (1.1002) acc 84.3750 (73.2625) lr 1.0628e-03 eta 5:18:59 +epoch [26/50] batch [255/500] time 1.556 (1.562) data 0.001 (0.003) loss 1.1797 (1.0998) acc 65.6250 (73.1863) lr 1.0628e-03 eta 5:18:49 +epoch [26/50] batch [260/500] time 1.587 (1.562) data 0.000 (0.003) loss 0.9487 (1.1007) acc 78.1250 (73.1851) lr 1.0628e-03 eta 5:18:40 +epoch [26/50] batch [265/500] time 1.574 (1.562) data 0.001 (0.003) loss 1.0576 (1.0988) acc 75.0000 (73.2429) lr 1.0628e-03 eta 5:18:34 +epoch [26/50] batch [270/500] time 1.579 (1.562) data 0.000 (0.003) loss 1.3535 (1.1005) acc 68.7500 (73.2060) lr 1.0628e-03 eta 5:18:27 +epoch [26/50] batch [275/500] time 1.574 (1.563) data 0.000 (0.003) loss 1.3916 (1.0990) acc 62.5000 (73.1705) lr 1.0628e-03 eta 5:18:21 +epoch [26/50] batch [280/500] time 1.641 (1.563) data 0.000 (0.003) loss 0.9424 (1.0951) acc 68.7500 (73.2254) lr 1.0628e-03 eta 5:18:17 +epoch [26/50] batch [285/500] time 1.547 (1.563) data 0.000 (0.003) loss 1.1914 (1.0936) acc 75.0000 (73.2456) lr 1.0628e-03 eta 5:18:09 +epoch [26/50] batch [290/500] time 1.575 (1.563) data 0.000 (0.003) loss 1.2256 (1.0914) acc 68.7500 (73.3190) lr 1.0628e-03 eta 5:18:02 +epoch [26/50] batch [295/500] time 1.567 (1.563) data 0.000 (0.003) loss 1.1084 (1.0880) acc 65.6250 (73.3369) lr 1.0628e-03 eta 5:17:53 +epoch [26/50] batch [300/500] time 1.559 (1.563) data 0.000 (0.003) loss 1.3623 (1.0880) acc 71.8750 (73.3646) lr 1.0628e-03 eta 5:17:43 +epoch [26/50] batch [305/500] time 1.554 (1.563) data 0.000 (0.003) loss 1.1143 (1.0868) acc 78.1250 (73.4631) lr 1.0628e-03 eta 5:17:37 +epoch [26/50] batch [310/500] time 1.568 (1.563) data 0.000 (0.003) loss 1.5439 (1.0897) acc 75.0000 (73.4879) lr 1.0628e-03 eta 5:17:28 +epoch [26/50] batch [315/500] time 1.566 (1.563) data 0.001 (0.003) loss 0.9438 (1.0835) acc 75.0000 (73.6012) lr 1.0628e-03 eta 5:17:20 +epoch [26/50] batch [320/500] time 1.584 (1.563) data 0.000 (0.003) loss 1.1387 (1.0896) acc 81.2500 (73.5156) lr 1.0628e-03 eta 5:17:13 +epoch [26/50] batch [325/500] time 1.543 (1.563) data 0.000 (0.003) loss 1.0234 (1.0921) acc 71.8750 (73.4615) lr 1.0628e-03 eta 5:17:07 +epoch [26/50] batch [330/500] time 1.560 (1.563) data 0.001 (0.003) loss 1.1396 (1.0925) acc 68.7500 (73.4564) lr 1.0628e-03 eta 5:16:58 +epoch [26/50] batch [335/500] time 1.558 (1.563) data 0.000 (0.003) loss 0.9204 (1.0922) acc 81.2500 (73.4515) lr 1.0628e-03 eta 5:16:50 +epoch [26/50] batch [340/500] time 1.548 (1.563) data 0.000 (0.003) loss 1.2207 (1.0905) acc 68.7500 (73.4926) lr 1.0628e-03 eta 5:16:41 +epoch [26/50] batch [345/500] time 1.537 (1.562) data 0.000 (0.003) loss 1.2197 (1.0887) acc 71.8750 (73.5145) lr 1.0628e-03 eta 5:16:32 +epoch [26/50] batch [350/500] time 1.555 (1.562) data 0.000 (0.003) loss 0.9106 (1.0926) acc 75.0000 (73.3839) lr 1.0628e-03 eta 5:16:23 +epoch [26/50] batch [355/500] time 1.539 (1.562) data 0.000 (0.003) loss 0.9336 (1.0937) acc 71.8750 (73.3451) lr 1.0628e-03 eta 5:16:11 +epoch [26/50] batch [360/500] time 1.552 (1.562) data 0.000 (0.003) loss 0.8413 (1.0900) acc 71.8750 (73.3854) lr 1.0628e-03 eta 5:16:02 +epoch [26/50] batch [365/500] time 1.545 (1.562) data 0.000 (0.003) loss 1.3672 (1.0918) acc 71.8750 (73.3904) lr 1.0628e-03 eta 5:15:53 +epoch [26/50] batch [370/500] time 1.540 (1.562) data 0.000 (0.003) loss 0.8906 (1.0911) acc 75.0000 (73.4037) lr 1.0628e-03 eta 5:15:44 +epoch [26/50] batch [375/500] time 1.562 (1.562) data 0.000 (0.002) loss 1.2910 (1.0921) acc 71.8750 (73.3833) lr 1.0628e-03 eta 5:15:34 +epoch [26/50] batch [380/500] time 1.558 (1.562) data 0.000 (0.002) loss 0.9106 (1.0916) acc 81.2500 (73.3799) lr 1.0628e-03 eta 5:15:27 +epoch [26/50] batch [385/500] time 1.577 (1.562) data 0.001 (0.002) loss 0.9512 (1.0913) acc 75.0000 (73.3766) lr 1.0628e-03 eta 5:15:18 +epoch [26/50] batch [390/500] time 1.573 (1.562) data 0.000 (0.002) loss 0.7480 (1.0906) acc 84.3750 (73.4295) lr 1.0628e-03 eta 5:15:11 +epoch [26/50] batch [395/500] time 1.545 (1.562) data 0.001 (0.002) loss 1.1055 (1.0921) acc 65.6250 (73.4256) lr 1.0628e-03 eta 5:15:02 +epoch [26/50] batch [400/500] time 1.554 (1.561) data 0.000 (0.002) loss 1.0361 (1.0930) acc 75.0000 (73.3672) lr 1.0628e-03 eta 5:14:53 +epoch [26/50] batch [405/500] time 1.547 (1.561) data 0.000 (0.002) loss 0.9609 (1.0931) acc 78.1250 (73.3488) lr 1.0628e-03 eta 5:14:44 +epoch [26/50] batch [410/500] time 1.542 (1.561) data 0.000 (0.002) loss 1.1934 (1.0910) acc 65.6250 (73.3765) lr 1.0628e-03 eta 5:14:36 +epoch [26/50] batch [415/500] time 1.543 (1.561) data 0.000 (0.002) loss 1.0732 (1.0922) acc 68.7500 (73.3509) lr 1.0628e-03 eta 5:14:26 +epoch [26/50] batch [420/500] time 1.545 (1.561) data 0.000 (0.002) loss 1.2822 (1.0945) acc 68.7500 (73.2292) lr 1.0628e-03 eta 5:14:17 +epoch [26/50] batch [425/500] time 1.573 (1.561) data 0.000 (0.002) loss 1.0078 (1.0931) acc 81.2500 (73.2279) lr 1.0628e-03 eta 5:14:11 +epoch [26/50] batch [430/500] time 1.543 (1.561) data 0.000 (0.002) loss 0.7070 (1.0914) acc 84.3750 (73.2776) lr 1.0628e-03 eta 5:14:01 +epoch [26/50] batch [435/500] time 1.571 (1.561) data 0.000 (0.002) loss 1.1348 (1.0929) acc 81.2500 (73.2543) lr 1.0628e-03 eta 5:13:52 +epoch [26/50] batch [440/500] time 1.572 (1.561) data 0.000 (0.002) loss 0.8853 (1.0933) acc 75.0000 (73.2031) lr 1.0628e-03 eta 5:13:44 +epoch [26/50] batch [445/500] time 1.555 (1.561) data 0.000 (0.002) loss 0.6899 (1.0912) acc 84.3750 (73.2163) lr 1.0628e-03 eta 5:13:35 +epoch [26/50] batch [450/500] time 1.563 (1.561) data 0.001 (0.002) loss 1.0166 (1.0925) acc 71.8750 (73.1458) lr 1.0628e-03 eta 5:13:29 +epoch [26/50] batch [455/500] time 1.558 (1.561) data 0.000 (0.002) loss 0.9355 (1.0928) acc 71.8750 (73.1319) lr 1.0628e-03 eta 5:13:19 +epoch [26/50] batch [460/500] time 1.539 (1.561) data 0.001 (0.002) loss 1.1533 (1.0943) acc 65.6250 (73.1046) lr 1.0628e-03 eta 5:13:10 +epoch [26/50] batch [465/500] time 1.556 (1.560) data 0.000 (0.002) loss 1.4102 (1.0944) acc 65.6250 (73.0645) lr 1.0628e-03 eta 5:13:00 +epoch [26/50] batch [470/500] time 1.538 (1.561) data 0.000 (0.002) loss 1.7607 (1.0954) acc 62.5000 (73.0319) lr 1.0628e-03 eta 5:12:53 +epoch [26/50] batch [475/500] time 1.531 (1.560) data 0.000 (0.002) loss 0.6455 (1.0946) acc 84.3750 (73.0592) lr 1.0628e-03 eta 5:12:43 +epoch [26/50] batch [480/500] time 1.559 (1.560) data 0.000 (0.002) loss 1.0361 (1.0931) acc 71.8750 (73.0794) lr 1.0628e-03 eta 5:12:35 +epoch [26/50] batch [485/500] time 1.544 (1.560) data 0.001 (0.002) loss 1.1562 (1.0941) acc 78.1250 (73.0477) lr 1.0628e-03 eta 5:12:26 +epoch [26/50] batch [490/500] time 1.526 (1.560) data 0.000 (0.002) loss 1.3232 (1.0943) acc 65.6250 (73.0357) lr 1.0628e-03 eta 5:12:16 +epoch [26/50] batch [495/500] time 1.550 (1.560) data 0.000 (0.002) loss 1.8838 (1.0958) acc 59.3750 (72.9861) lr 1.0628e-03 eta 5:12:07 +epoch [26/50] batch [500/500] time 1.539 (1.560) data 0.000 (0.002) loss 0.6860 (1.0946) acc 78.1250 (73.0062) lr 1.0000e-03 eta 5:11:59 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,042 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.6% +epoch [27/50] batch [5/500] time 1.564 (1.651) data 0.000 (0.148) loss 1.0791 (1.1414) acc 68.7500 (68.7500) lr 1.0000e-03 eta 5:30:05 +epoch [27/50] batch [10/500] time 1.570 (1.608) data 0.000 (0.074) loss 0.7207 (1.0937) acc 87.5000 (71.5625) lr 1.0000e-03 eta 5:21:21 +epoch [27/50] batch [15/500] time 1.553 (1.590) data 0.000 (0.050) loss 1.0498 (1.1743) acc 71.8750 (70.8333) lr 1.0000e-03 eta 5:17:32 +epoch [27/50] batch [20/500] time 1.548 (1.583) data 0.001 (0.037) loss 1.1299 (1.1269) acc 75.0000 (72.0312) lr 1.0000e-03 eta 5:16:03 +epoch [27/50] batch [25/500] time 1.710 (1.585) data 0.000 (0.030) loss 1.0566 (1.1730) acc 71.8750 (71.2500) lr 1.0000e-03 eta 5:16:18 +epoch [27/50] batch [30/500] time 1.551 (1.582) data 0.001 (0.025) loss 1.3955 (1.1415) acc 68.7500 (72.6042) lr 1.0000e-03 eta 5:15:35 +epoch [27/50] batch [35/500] time 1.585 (1.578) data 0.001 (0.022) loss 1.5830 (1.1942) acc 56.2500 (71.9643) lr 1.0000e-03 eta 5:14:35 +epoch [27/50] batch [40/500] time 1.569 (1.576) data 0.000 (0.019) loss 1.3379 (1.1788) acc 68.7500 (71.8750) lr 1.0000e-03 eta 5:14:04 +epoch [27/50] batch [45/500] time 1.523 (1.572) data 0.001 (0.017) loss 1.4990 (1.1833) acc 71.8750 (71.6667) lr 1.0000e-03 eta 5:13:17 +epoch [27/50] batch [50/500] time 1.558 (1.570) data 0.000 (0.015) loss 0.6191 (1.1708) acc 81.2500 (71.3125) lr 1.0000e-03 eta 5:12:35 +epoch [27/50] batch [55/500] time 1.563 (1.569) data 0.000 (0.014) loss 1.0332 (1.1554) acc 71.8750 (71.3068) lr 1.0000e-03 eta 5:12:17 +epoch [27/50] batch [60/500] time 1.555 (1.567) data 0.000 (0.013) loss 0.9819 (1.1535) acc 65.6250 (71.0417) lr 1.0000e-03 eta 5:11:53 +epoch [27/50] batch [65/500] time 1.551 (1.567) data 0.000 (0.012) loss 1.4248 (1.1520) acc 59.3750 (70.7212) lr 1.0000e-03 eta 5:11:41 +epoch [27/50] batch [70/500] time 1.552 (1.566) data 0.000 (0.011) loss 0.9473 (1.1460) acc 75.0000 (70.7143) lr 1.0000e-03 eta 5:11:26 +epoch [27/50] batch [75/500] time 1.525 (1.565) data 0.000 (0.010) loss 0.9741 (1.1313) acc 65.6250 (70.7917) lr 1.0000e-03 eta 5:11:03 +epoch [27/50] batch [80/500] time 1.542 (1.564) data 0.000 (0.010) loss 0.7505 (1.1245) acc 78.1250 (70.7812) lr 1.0000e-03 eta 5:10:46 +epoch [27/50] batch [85/500] time 1.662 (1.564) data 0.001 (0.009) loss 1.0215 (1.1229) acc 65.6250 (70.5882) lr 1.0000e-03 eta 5:10:40 +epoch [27/50] batch [90/500] time 1.559 (1.564) data 0.000 (0.009) loss 0.8540 (1.1281) acc 78.1250 (70.6250) lr 1.0000e-03 eta 5:10:30 +epoch [27/50] batch [95/500] time 1.527 (1.564) data 0.001 (0.008) loss 0.6553 (1.1178) acc 84.3750 (70.8224) lr 1.0000e-03 eta 5:10:18 +epoch [27/50] batch [100/500] time 1.570 (1.564) data 0.000 (0.008) loss 0.7451 (1.1134) acc 71.8750 (70.8125) lr 1.0000e-03 eta 5:10:14 +epoch [27/50] batch [105/500] time 1.564 (1.564) data 0.000 (0.007) loss 0.9482 (1.1107) acc 78.1250 (71.0714) lr 1.0000e-03 eta 5:10:06 +epoch [27/50] batch [110/500] time 1.565 (1.565) data 0.000 (0.007) loss 1.1475 (1.1040) acc 71.8750 (71.0227) lr 1.0000e-03 eta 5:10:03 +epoch [27/50] batch [115/500] time 1.548 (1.564) data 0.000 (0.007) loss 1.1875 (1.1020) acc 68.7500 (71.1413) lr 1.0000e-03 eta 5:09:49 +epoch [27/50] batch [120/500] time 1.582 (1.564) data 0.000 (0.007) loss 1.0361 (1.0992) acc 75.0000 (71.2240) lr 1.0000e-03 eta 5:09:40 +epoch [27/50] batch [125/500] time 1.565 (1.564) data 0.001 (0.006) loss 0.9761 (1.1006) acc 78.1250 (71.2250) lr 1.0000e-03 eta 5:09:38 +epoch [27/50] batch [130/500] time 1.572 (1.565) data 0.001 (0.006) loss 0.8975 (1.1075) acc 84.3750 (71.3702) lr 1.0000e-03 eta 5:09:40 +epoch [27/50] batch [135/500] time 1.574 (1.565) data 0.000 (0.006) loss 0.8052 (1.1081) acc 75.0000 (71.3657) lr 1.0000e-03 eta 5:09:32 +epoch [27/50] batch [140/500] time 1.575 (1.565) data 0.000 (0.006) loss 0.8560 (1.1066) acc 84.3750 (71.4062) lr 1.0000e-03 eta 5:09:20 +epoch [27/50] batch [145/500] time 1.559 (1.565) data 0.001 (0.006) loss 1.3701 (1.1110) acc 68.7500 (71.4224) lr 1.0000e-03 eta 5:09:07 +epoch [27/50] batch [150/500] time 1.580 (1.565) data 0.001 (0.005) loss 1.4326 (1.1122) acc 62.5000 (71.3333) lr 1.0000e-03 eta 5:09:03 +epoch [27/50] batch [155/500] time 1.544 (1.565) data 0.000 (0.005) loss 1.4590 (1.1195) acc 59.3750 (71.1895) lr 1.0000e-03 eta 5:08:52 +epoch [27/50] batch [160/500] time 1.555 (1.565) data 0.001 (0.005) loss 0.9565 (1.1178) acc 78.1250 (71.3086) lr 1.0000e-03 eta 5:08:43 +epoch [27/50] batch [165/500] time 1.577 (1.564) data 0.000 (0.005) loss 1.0781 (1.1136) acc 68.7500 (71.4205) lr 1.0000e-03 eta 5:08:33 +epoch [27/50] batch [170/500] time 1.567 (1.564) data 0.000 (0.005) loss 0.4539 (1.1062) acc 84.3750 (71.5993) lr 1.0000e-03 eta 5:08:24 +epoch [27/50] batch [175/500] time 1.548 (1.564) data 0.000 (0.005) loss 1.2852 (1.1041) acc 78.1250 (71.7857) lr 1.0000e-03 eta 5:08:18 +epoch [27/50] batch [180/500] time 1.606 (1.565) data 0.000 (0.005) loss 1.1270 (1.1093) acc 71.8750 (71.6493) lr 1.0000e-03 eta 5:08:13 +epoch [27/50] batch [185/500] time 1.558 (1.565) data 0.000 (0.004) loss 1.3525 (1.1046) acc 78.1250 (71.9088) lr 1.0000e-03 eta 5:08:10 +epoch [27/50] batch [190/500] time 1.561 (1.565) data 0.000 (0.004) loss 1.3330 (1.1033) acc 71.8750 (71.8586) lr 1.0000e-03 eta 5:08:03 +epoch [27/50] batch [195/500] time 1.571 (1.565) data 0.000 (0.004) loss 0.8633 (1.1052) acc 81.2500 (71.8910) lr 1.0000e-03 eta 5:07:53 +epoch [27/50] batch [200/500] time 1.547 (1.565) data 0.000 (0.004) loss 1.4062 (1.1114) acc 62.5000 (71.8125) lr 1.0000e-03 eta 5:07:44 +epoch [27/50] batch [205/500] time 1.573 (1.565) data 0.000 (0.004) loss 0.4807 (1.1112) acc 87.5000 (71.8293) lr 1.0000e-03 eta 5:07:34 +epoch [27/50] batch [210/500] time 1.580 (1.564) data 0.000 (0.004) loss 1.0996 (1.1168) acc 78.1250 (71.8006) lr 1.0000e-03 eta 5:07:24 +epoch [27/50] batch [215/500] time 1.580 (1.564) data 0.000 (0.004) loss 1.0430 (1.1111) acc 81.2500 (71.8750) lr 1.0000e-03 eta 5:07:16 +epoch [27/50] batch [220/500] time 1.552 (1.564) data 0.000 (0.004) loss 1.5293 (1.1143) acc 53.1250 (71.8040) lr 1.0000e-03 eta 5:07:05 +epoch [27/50] batch [225/500] time 1.544 (1.564) data 0.000 (0.004) loss 1.6230 (1.1206) acc 62.5000 (71.7361) lr 1.0000e-03 eta 5:06:55 +epoch [27/50] batch [230/500] time 1.551 (1.564) data 0.001 (0.004) loss 0.7124 (1.1141) acc 81.2500 (71.8478) lr 1.0000e-03 eta 5:06:50 +epoch [27/50] batch [235/500] time 1.588 (1.564) data 0.001 (0.004) loss 0.6279 (1.1119) acc 84.3750 (71.9415) lr 1.0000e-03 eta 5:06:43 +epoch [27/50] batch [240/500] time 1.559 (1.564) data 0.000 (0.004) loss 1.5312 (1.1157) acc 62.5000 (71.8750) lr 1.0000e-03 eta 5:06:34 +epoch [27/50] batch [245/500] time 1.574 (1.564) data 0.000 (0.003) loss 0.8374 (1.1163) acc 81.2500 (71.7857) lr 1.0000e-03 eta 5:06:23 +epoch [27/50] batch [250/500] time 1.535 (1.564) data 0.000 (0.003) loss 0.3015 (1.1100) acc 90.6250 (71.9000) lr 1.0000e-03 eta 5:06:14 +epoch [27/50] batch [255/500] time 1.569 (1.564) data 0.000 (0.003) loss 0.8013 (1.1042) acc 68.7500 (71.9853) lr 1.0000e-03 eta 5:06:04 +epoch [27/50] batch [260/500] time 1.540 (1.563) data 0.000 (0.003) loss 1.4766 (1.1068) acc 75.0000 (72.0433) lr 1.0000e-03 eta 5:05:55 +epoch [27/50] batch [265/500] time 1.541 (1.563) data 0.000 (0.003) loss 0.7612 (1.1046) acc 81.2500 (72.1462) lr 1.0000e-03 eta 5:05:47 +epoch [27/50] batch [270/500] time 1.523 (1.563) data 0.000 (0.003) loss 0.6729 (1.1021) acc 84.3750 (72.1875) lr 1.0000e-03 eta 5:05:37 +epoch [27/50] batch [275/500] time 1.567 (1.564) data 0.000 (0.003) loss 1.5820 (1.1018) acc 65.6250 (72.2045) lr 1.0000e-03 eta 5:05:32 +epoch [27/50] batch [280/500] time 1.536 (1.563) data 0.000 (0.003) loss 1.0020 (1.1053) acc 78.1250 (72.1540) lr 1.0000e-03 eta 5:05:22 +epoch [27/50] batch [285/500] time 1.530 (1.563) data 0.000 (0.003) loss 0.8218 (1.1057) acc 68.7500 (72.1711) lr 1.0000e-03 eta 5:05:12 +epoch [27/50] batch [290/500] time 1.548 (1.563) data 0.000 (0.003) loss 1.1650 (1.1056) acc 62.5000 (72.1659) lr 1.0000e-03 eta 5:05:03 +epoch [27/50] batch [295/500] time 1.592 (1.563) data 0.000 (0.003) loss 0.8511 (1.1062) acc 81.2500 (72.1292) lr 1.0000e-03 eta 5:04:57 +epoch [27/50] batch [300/500] time 1.565 (1.563) data 0.000 (0.003) loss 0.5869 (1.1040) acc 87.5000 (72.2500) lr 1.0000e-03 eta 5:04:48 +epoch [27/50] batch [305/500] time 1.571 (1.563) data 0.001 (0.003) loss 0.6025 (1.1025) acc 84.3750 (72.2439) lr 1.0000e-03 eta 5:04:40 +epoch [27/50] batch [310/500] time 1.549 (1.563) data 0.000 (0.003) loss 2.0762 (1.1054) acc 62.5000 (72.1976) lr 1.0000e-03 eta 5:04:34 +epoch [27/50] batch [315/500] time 1.556 (1.563) data 0.000 (0.003) loss 1.1621 (1.1035) acc 81.2500 (72.2619) lr 1.0000e-03 eta 5:04:26 +epoch [27/50] batch [320/500] time 1.558 (1.563) data 0.000 (0.003) loss 1.0176 (1.1021) acc 68.7500 (72.2461) lr 1.0000e-03 eta 5:04:17 +epoch [27/50] batch [325/500] time 1.554 (1.563) data 0.000 (0.003) loss 1.2988 (1.1017) acc 56.2500 (72.1538) lr 1.0000e-03 eta 5:04:07 +epoch [27/50] batch [330/500] time 1.542 (1.563) data 0.001 (0.003) loss 1.5654 (1.1032) acc 68.7500 (72.1402) lr 1.0000e-03 eta 5:03:58 +epoch [27/50] batch [335/500] time 1.568 (1.563) data 0.000 (0.003) loss 1.1123 (1.1023) acc 68.7500 (72.1828) lr 1.0000e-03 eta 5:03:49 +epoch [27/50] batch [340/500] time 1.557 (1.563) data 0.001 (0.003) loss 1.3770 (1.1002) acc 56.2500 (72.2059) lr 1.0000e-03 eta 5:03:42 +epoch [27/50] batch [345/500] time 1.549 (1.563) data 0.001 (0.003) loss 1.6006 (1.1020) acc 59.3750 (72.1467) lr 1.0000e-03 eta 5:03:33 +epoch [27/50] batch [350/500] time 1.551 (1.563) data 0.000 (0.003) loss 0.7197 (1.1006) acc 78.1250 (72.2143) lr 1.0000e-03 eta 5:03:23 +epoch [27/50] batch [355/500] time 1.528 (1.562) data 0.000 (0.003) loss 0.5913 (1.1003) acc 81.2500 (72.2535) lr 1.0000e-03 eta 5:03:13 +epoch [27/50] batch [360/500] time 1.549 (1.562) data 0.000 (0.002) loss 1.3242 (1.0998) acc 68.7500 (72.2917) lr 1.0000e-03 eta 5:03:04 +epoch [27/50] batch [365/500] time 1.530 (1.562) data 0.000 (0.002) loss 0.9141 (1.1004) acc 78.1250 (72.3031) lr 1.0000e-03 eta 5:02:54 +epoch [27/50] batch [370/500] time 1.572 (1.562) data 0.000 (0.002) loss 1.4600 (1.1021) acc 71.8750 (72.3311) lr 1.0000e-03 eta 5:02:47 +epoch [27/50] batch [375/500] time 1.556 (1.562) data 0.000 (0.002) loss 0.5405 (1.0992) acc 81.2500 (72.4000) lr 1.0000e-03 eta 5:02:41 +epoch [27/50] batch [380/500] time 1.549 (1.562) data 0.000 (0.002) loss 1.4277 (1.1013) acc 62.5000 (72.3438) lr 1.0000e-03 eta 5:02:34 +epoch [27/50] batch [385/500] time 1.561 (1.562) data 0.000 (0.002) loss 0.6938 (1.0982) acc 84.3750 (72.3945) lr 1.0000e-03 eta 5:02:26 +epoch [27/50] batch [390/500] time 1.552 (1.562) data 0.001 (0.002) loss 1.0713 (1.0984) acc 68.7500 (72.3958) lr 1.0000e-03 eta 5:02:18 +epoch [27/50] batch [395/500] time 1.568 (1.562) data 0.000 (0.002) loss 1.4844 (1.1023) acc 59.3750 (72.3022) lr 1.0000e-03 eta 5:02:10 +epoch [27/50] batch [400/500] time 1.580 (1.562) data 0.001 (0.002) loss 0.6489 (1.1039) acc 81.2500 (72.2734) lr 1.0000e-03 eta 5:02:02 +epoch [27/50] batch [405/500] time 1.564 (1.562) data 0.000 (0.002) loss 1.1406 (1.1045) acc 75.0000 (72.2377) lr 1.0000e-03 eta 5:01:55 +epoch [27/50] batch [410/500] time 1.576 (1.562) data 0.000 (0.002) loss 1.1914 (1.1076) acc 68.7500 (72.1570) lr 1.0000e-03 eta 5:01:47 +epoch [27/50] batch [415/500] time 1.624 (1.562) data 0.000 (0.002) loss 0.3599 (1.1036) acc 90.6250 (72.2967) lr 1.0000e-03 eta 5:01:40 +epoch [27/50] batch [420/500] time 1.558 (1.562) data 0.000 (0.002) loss 1.2090 (1.1038) acc 56.2500 (72.2396) lr 1.0000e-03 eta 5:01:31 +epoch [27/50] batch [425/500] time 1.542 (1.562) data 0.000 (0.002) loss 1.0801 (1.1006) acc 71.8750 (72.3382) lr 1.0000e-03 eta 5:01:22 +epoch [27/50] batch [430/500] time 1.580 (1.562) data 0.000 (0.002) loss 1.6543 (1.1015) acc 59.3750 (72.3256) lr 1.0000e-03 eta 5:01:14 +epoch [27/50] batch [435/500] time 1.570 (1.562) data 0.000 (0.002) loss 1.3379 (1.1008) acc 59.3750 (72.2773) lr 1.0000e-03 eta 5:01:06 +epoch [27/50] batch [440/500] time 1.560 (1.562) data 0.001 (0.002) loss 0.8091 (1.1003) acc 84.3750 (72.3509) lr 1.0000e-03 eta 5:00:58 +epoch [27/50] batch [445/500] time 1.536 (1.562) data 0.000 (0.002) loss 0.8506 (1.1003) acc 81.2500 (72.3876) lr 1.0000e-03 eta 5:00:49 +epoch [27/50] batch [450/500] time 1.600 (1.562) data 0.000 (0.002) loss 0.8428 (1.0981) acc 71.8750 (72.4236) lr 1.0000e-03 eta 5:00:43 +epoch [27/50] batch [455/500] time 1.563 (1.562) data 0.000 (0.002) loss 0.7319 (1.0971) acc 75.0000 (72.4107) lr 1.0000e-03 eta 5:00:35 +epoch [27/50] batch [460/500] time 1.534 (1.562) data 0.000 (0.002) loss 0.9067 (1.0995) acc 84.3750 (72.3913) lr 1.0000e-03 eta 5:00:26 +epoch [27/50] batch [465/500] time 1.558 (1.562) data 0.000 (0.002) loss 1.0762 (1.0981) acc 81.2500 (72.4261) lr 1.0000e-03 eta 5:00:17 +epoch [27/50] batch [470/500] time 1.556 (1.562) data 0.000 (0.002) loss 1.3975 (1.0979) acc 68.7500 (72.4069) lr 1.0000e-03 eta 5:00:09 +epoch [27/50] batch [475/500] time 1.545 (1.562) data 0.000 (0.002) loss 1.0381 (1.0969) acc 75.0000 (72.4408) lr 1.0000e-03 eta 5:00:01 +epoch [27/50] batch [480/500] time 1.546 (1.562) data 0.001 (0.002) loss 0.9902 (1.0969) acc 75.0000 (72.4544) lr 1.0000e-03 eta 4:59:51 +epoch [27/50] batch [485/500] time 1.551 (1.562) data 0.001 (0.002) loss 0.8315 (1.0951) acc 78.1250 (72.5000) lr 1.0000e-03 eta 4:59:42 +epoch [27/50] batch [490/500] time 1.580 (1.562) data 0.000 (0.002) loss 0.7510 (1.0946) acc 81.2500 (72.5128) lr 1.0000e-03 eta 4:59:33 +epoch [27/50] batch [495/500] time 1.556 (1.561) data 0.000 (0.002) loss 0.8794 (1.0932) acc 81.2500 (72.5189) lr 1.0000e-03 eta 4:59:25 +epoch [27/50] batch [500/500] time 1.540 (1.561) data 0.000 (0.002) loss 0.8550 (1.0947) acc 75.0000 (72.4750) lr 9.3721e-04 eta 4:59:15 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,018 +* accuracy: 78.0% +* error: 22.0% +* macro_f1: 77.5% +epoch [28/50] batch [5/500] time 1.532 (1.689) data 0.001 (0.173) loss 0.5068 (1.0148) acc 87.5000 (71.8750) lr 9.3721e-04 eta 5:23:39 +epoch [28/50] batch [10/500] time 1.566 (1.621) data 0.000 (0.087) loss 0.7607 (1.0102) acc 75.0000 (74.0625) lr 9.3721e-04 eta 5:10:19 +epoch [28/50] batch [15/500] time 1.552 (1.602) data 0.000 (0.058) loss 0.7119 (1.0230) acc 84.3750 (74.5833) lr 9.3721e-04 eta 5:06:42 +epoch [28/50] batch [20/500] time 1.556 (1.589) data 0.000 (0.043) loss 0.9072 (1.0121) acc 71.8750 (74.2188) lr 9.3721e-04 eta 5:04:05 +epoch [28/50] batch [25/500] time 1.572 (1.589) data 0.000 (0.035) loss 1.3262 (1.0288) acc 78.1250 (73.8750) lr 9.3721e-04 eta 5:03:51 +epoch [28/50] batch [30/500] time 1.531 (1.585) data 0.000 (0.029) loss 0.7266 (1.0227) acc 78.1250 (74.1667) lr 9.3721e-04 eta 5:03:01 +epoch [28/50] batch [35/500] time 1.550 (1.582) data 0.000 (0.025) loss 0.8218 (1.0672) acc 78.1250 (73.4821) lr 9.3721e-04 eta 5:02:19 +epoch [28/50] batch [40/500] time 1.578 (1.581) data 0.000 (0.022) loss 0.8916 (1.0374) acc 81.2500 (74.0625) lr 9.3721e-04 eta 5:01:56 +epoch [28/50] batch [45/500] time 1.576 (1.580) data 0.000 (0.020) loss 1.2324 (1.0525) acc 71.8750 (73.9583) lr 9.3721e-04 eta 5:01:35 +epoch [28/50] batch [50/500] time 1.558 (1.577) data 0.000 (0.018) loss 0.8364 (1.0494) acc 75.0000 (73.9375) lr 9.3721e-04 eta 5:00:55 +epoch [28/50] batch [55/500] time 1.565 (1.576) data 0.000 (0.016) loss 1.0576 (1.0489) acc 71.8750 (74.0341) lr 9.3721e-04 eta 5:00:32 +epoch [28/50] batch [60/500] time 1.577 (1.574) data 0.001 (0.015) loss 1.1562 (1.0524) acc 68.7500 (73.3333) lr 9.3721e-04 eta 5:00:08 +epoch [28/50] batch [65/500] time 1.576 (1.574) data 0.000 (0.014) loss 1.5078 (1.0401) acc 56.2500 (73.6058) lr 9.3721e-04 eta 4:59:54 +epoch [28/50] batch [70/500] time 1.567 (1.573) data 0.000 (0.013) loss 1.5215 (1.0388) acc 71.8750 (73.6161) lr 9.3721e-04 eta 4:59:41 +epoch [28/50] batch [75/500] time 1.570 (1.573) data 0.001 (0.012) loss 0.6753 (1.0355) acc 81.2500 (73.4583) lr 9.3721e-04 eta 4:59:34 +epoch [28/50] batch [80/500] time 1.571 (1.572) data 0.000 (0.011) loss 1.1533 (1.0587) acc 71.8750 (73.0859) lr 9.3721e-04 eta 4:59:17 +epoch [28/50] batch [85/500] time 1.582 (1.572) data 0.000 (0.011) loss 1.0713 (1.0505) acc 75.0000 (73.1985) lr 9.3721e-04 eta 4:59:07 +epoch [28/50] batch [90/500] time 1.550 (1.571) data 0.001 (0.010) loss 0.8667 (1.0432) acc 75.0000 (73.3681) lr 9.3721e-04 eta 4:58:45 +epoch [28/50] batch [95/500] time 1.552 (1.570) data 0.000 (0.009) loss 1.1514 (1.0288) acc 71.8750 (73.5526) lr 9.3721e-04 eta 4:58:27 +epoch [28/50] batch [100/500] time 1.587 (1.570) data 0.000 (0.009) loss 1.0391 (1.0271) acc 65.6250 (73.3438) lr 9.3721e-04 eta 4:58:14 +epoch [28/50] batch [105/500] time 1.536 (1.569) data 0.000 (0.009) loss 0.9619 (1.0309) acc 75.0000 (73.2738) lr 9.3721e-04 eta 4:58:02 +epoch [28/50] batch [110/500] time 1.560 (1.569) data 0.000 (0.008) loss 0.9268 (1.0374) acc 75.0000 (73.1818) lr 9.3721e-04 eta 4:57:45 +epoch [28/50] batch [115/500] time 1.562 (1.569) data 0.000 (0.008) loss 1.3828 (1.0399) acc 71.8750 (73.3696) lr 9.3721e-04 eta 4:57:37 +epoch [28/50] batch [120/500] time 1.673 (1.569) data 0.000 (0.008) loss 1.7168 (1.0454) acc 65.6250 (73.3854) lr 9.3721e-04 eta 4:57:39 +epoch [28/50] batch [125/500] time 1.553 (1.569) data 0.000 (0.007) loss 1.0381 (1.0457) acc 75.0000 (73.5000) lr 9.3721e-04 eta 4:57:25 +epoch [28/50] batch [130/500] time 1.558 (1.568) data 0.000 (0.007) loss 0.5400 (1.0559) acc 90.6250 (73.5096) lr 9.3721e-04 eta 4:57:12 +epoch [28/50] batch [135/500] time 1.572 (1.568) data 0.000 (0.007) loss 1.3887 (1.0544) acc 65.6250 (73.5417) lr 9.3721e-04 eta 4:56:59 +epoch [28/50] batch [140/500] time 1.559 (1.568) data 0.000 (0.007) loss 1.2129 (1.0509) acc 68.7500 (73.5938) lr 9.3721e-04 eta 4:56:48 +epoch [28/50] batch [145/500] time 1.557 (1.567) data 0.001 (0.006) loss 0.8921 (1.0502) acc 71.8750 (73.6207) lr 9.3721e-04 eta 4:56:35 +epoch [28/50] batch [150/500] time 1.560 (1.567) data 0.000 (0.006) loss 1.2002 (1.0542) acc 71.8750 (73.5417) lr 9.3721e-04 eta 4:56:23 +epoch [28/50] batch [155/500] time 1.570 (1.567) data 0.000 (0.006) loss 1.1719 (1.0570) acc 75.0000 (73.5484) lr 9.3721e-04 eta 4:56:15 +epoch [28/50] batch [160/500] time 1.559 (1.567) data 0.001 (0.006) loss 0.9536 (1.0582) acc 68.7500 (73.5156) lr 9.3721e-04 eta 4:56:09 +epoch [28/50] batch [165/500] time 1.580 (1.568) data 0.000 (0.006) loss 0.7793 (1.0533) acc 71.8750 (73.4848) lr 9.3721e-04 eta 4:56:16 +epoch [28/50] batch [170/500] time 1.569 (1.568) data 0.000 (0.006) loss 0.6084 (1.0541) acc 81.2500 (73.4375) lr 9.3721e-04 eta 4:56:09 +epoch [28/50] batch [175/500] time 1.573 (1.568) data 0.000 (0.005) loss 1.2188 (1.0611) acc 75.0000 (73.4107) lr 9.3721e-04 eta 4:56:02 +epoch [28/50] batch [180/500] time 1.545 (1.568) data 0.000 (0.005) loss 1.0273 (1.0612) acc 68.7500 (73.3681) lr 9.3721e-04 eta 4:55:50 +epoch [28/50] batch [185/500] time 1.536 (1.568) data 0.000 (0.005) loss 1.1855 (1.0627) acc 75.0000 (73.3615) lr 9.3721e-04 eta 4:55:39 +epoch [28/50] batch [190/500] time 1.548 (1.568) data 0.001 (0.005) loss 0.8501 (1.0557) acc 81.2500 (73.4868) lr 9.3721e-04 eta 4:55:30 +epoch [28/50] batch [195/500] time 1.575 (1.568) data 0.000 (0.005) loss 1.0840 (1.0556) acc 78.1250 (73.6058) lr 9.3721e-04 eta 4:55:24 +epoch [28/50] batch [200/500] time 1.539 (1.568) data 0.000 (0.005) loss 0.7510 (1.0587) acc 75.0000 (73.4219) lr 9.3721e-04 eta 4:55:12 +epoch [28/50] batch [205/500] time 1.566 (1.567) data 0.000 (0.005) loss 0.8970 (1.0628) acc 75.0000 (73.3384) lr 9.3721e-04 eta 4:55:04 +epoch [28/50] batch [210/500] time 1.541 (1.567) data 0.000 (0.005) loss 0.6240 (1.0534) acc 75.0000 (73.5268) lr 9.3721e-04 eta 4:54:56 +epoch [28/50] batch [215/500] time 1.540 (1.567) data 0.000 (0.004) loss 1.1465 (1.0577) acc 68.7500 (73.4738) lr 9.3721e-04 eta 4:54:47 +epoch [28/50] batch [220/500] time 1.542 (1.567) data 0.000 (0.004) loss 1.6914 (1.0579) acc 59.3750 (73.4659) lr 9.3721e-04 eta 4:54:35 +epoch [28/50] batch [225/500] time 1.547 (1.567) data 0.000 (0.004) loss 0.8418 (1.0580) acc 81.2500 (73.4028) lr 9.3721e-04 eta 4:54:22 +epoch [28/50] batch [230/500] time 1.537 (1.566) data 0.000 (0.004) loss 0.7915 (1.0575) acc 81.2500 (73.4783) lr 9.3721e-04 eta 4:54:11 +epoch [28/50] batch [235/500] time 1.539 (1.566) data 0.000 (0.004) loss 1.3018 (1.0620) acc 59.3750 (73.3910) lr 9.3721e-04 eta 4:53:59 +epoch [28/50] batch [240/500] time 1.534 (1.566) data 0.000 (0.004) loss 1.2627 (1.0655) acc 71.8750 (73.2943) lr 9.3721e-04 eta 4:53:49 +epoch [28/50] batch [245/500] time 1.552 (1.565) data 0.000 (0.004) loss 1.4844 (1.0660) acc 71.8750 (73.3291) lr 9.3721e-04 eta 4:53:39 +epoch [28/50] batch [250/500] time 1.578 (1.565) data 0.000 (0.004) loss 0.8926 (1.0668) acc 71.8750 (73.3500) lr 9.3721e-04 eta 4:53:30 +epoch [28/50] batch [255/500] time 1.570 (1.565) data 0.001 (0.004) loss 0.6333 (1.0647) acc 87.5000 (73.3824) lr 9.3721e-04 eta 4:53:23 +epoch [28/50] batch [260/500] time 1.578 (1.565) data 0.000 (0.004) loss 1.5586 (1.0649) acc 62.5000 (73.3293) lr 9.3721e-04 eta 4:53:14 +epoch [28/50] batch [265/500] time 1.559 (1.565) data 0.000 (0.004) loss 1.5879 (1.0696) acc 62.5000 (73.2783) lr 9.3721e-04 eta 4:53:06 +epoch [28/50] batch [270/500] time 1.541 (1.565) data 0.000 (0.004) loss 1.4678 (1.0721) acc 59.3750 (73.2639) lr 9.3721e-04 eta 4:52:55 +epoch [28/50] batch [275/500] time 1.568 (1.565) data 0.000 (0.004) loss 0.8516 (1.0737) acc 78.1250 (73.2386) lr 9.3721e-04 eta 4:52:45 +epoch [28/50] batch [280/500] time 1.551 (1.565) data 0.000 (0.004) loss 0.2900 (1.0752) acc 87.5000 (73.2366) lr 9.3721e-04 eta 4:52:33 +epoch [28/50] batch [285/500] time 1.553 (1.564) data 0.000 (0.003) loss 0.9893 (1.0754) acc 81.2500 (73.2456) lr 9.3721e-04 eta 4:52:25 +epoch [28/50] batch [290/500] time 1.582 (1.564) data 0.000 (0.003) loss 1.1807 (1.0748) acc 59.3750 (73.2543) lr 9.3721e-04 eta 4:52:16 +epoch [28/50] batch [295/500] time 1.551 (1.564) data 0.000 (0.003) loss 1.0996 (1.0762) acc 71.8750 (73.2203) lr 9.3721e-04 eta 4:52:09 +epoch [28/50] batch [300/500] time 1.573 (1.564) data 0.000 (0.003) loss 1.1270 (1.0794) acc 68.7500 (73.2396) lr 9.3721e-04 eta 4:52:00 +epoch [28/50] batch [305/500] time 1.539 (1.564) data 0.000 (0.003) loss 0.9995 (1.0825) acc 75.0000 (73.1967) lr 9.3721e-04 eta 4:51:49 +epoch [28/50] batch [310/500] time 1.562 (1.564) data 0.000 (0.003) loss 1.2373 (1.0858) acc 65.6250 (73.1250) lr 9.3721e-04 eta 4:51:45 +epoch [28/50] batch [315/500] time 1.544 (1.564) data 0.000 (0.003) loss 1.5283 (1.0863) acc 56.2500 (73.1845) lr 9.3721e-04 eta 4:51:35 +epoch [28/50] batch [320/500] time 1.568 (1.564) data 0.000 (0.003) loss 0.6084 (1.0849) acc 78.1250 (73.2227) lr 9.3721e-04 eta 4:51:27 +epoch [28/50] batch [325/500] time 1.558 (1.564) data 0.001 (0.003) loss 0.6104 (1.0826) acc 87.5000 (73.2692) lr 9.3721e-04 eta 4:51:18 +epoch [28/50] batch [330/500] time 1.561 (1.564) data 0.001 (0.003) loss 0.7949 (1.0834) acc 78.1250 (73.1534) lr 9.3721e-04 eta 4:51:11 +epoch [28/50] batch [335/500] time 1.554 (1.564) data 0.001 (0.003) loss 0.8672 (1.0857) acc 81.2500 (73.0877) lr 9.3721e-04 eta 4:51:01 +epoch [28/50] batch [340/500] time 1.550 (1.564) data 0.001 (0.003) loss 1.0400 (1.0849) acc 71.8750 (73.0790) lr 9.3721e-04 eta 4:50:50 +epoch [28/50] batch [345/500] time 1.554 (1.563) data 0.001 (0.003) loss 0.8135 (1.0859) acc 81.2500 (73.0978) lr 9.3721e-04 eta 4:50:39 +epoch [28/50] batch [350/500] time 1.541 (1.563) data 0.000 (0.003) loss 1.3125 (1.0839) acc 62.5000 (73.0893) lr 9.3721e-04 eta 4:50:28 +epoch [28/50] batch [355/500] time 1.566 (1.563) data 0.000 (0.003) loss 0.8091 (1.0827) acc 75.0000 (73.0810) lr 9.3721e-04 eta 4:50:18 +epoch [28/50] batch [360/500] time 1.563 (1.563) data 0.000 (0.003) loss 1.5605 (1.0816) acc 65.6250 (73.0903) lr 9.3721e-04 eta 4:50:10 +epoch [28/50] batch [365/500] time 1.535 (1.563) data 0.000 (0.003) loss 1.0117 (1.0833) acc 68.7500 (73.0479) lr 9.3721e-04 eta 4:50:00 +epoch [28/50] batch [370/500] time 1.563 (1.562) data 0.000 (0.003) loss 0.9131 (1.0841) acc 75.0000 (73.0068) lr 9.3721e-04 eta 4:49:50 +epoch [28/50] batch [375/500] time 1.548 (1.562) data 0.000 (0.003) loss 1.3447 (1.0837) acc 75.0000 (72.9750) lr 9.3721e-04 eta 4:49:39 +epoch [28/50] batch [380/500] time 1.552 (1.562) data 0.000 (0.003) loss 1.2383 (1.0866) acc 62.5000 (72.9030) lr 9.3721e-04 eta 4:49:31 +epoch [28/50] batch [385/500] time 1.559 (1.562) data 0.001 (0.003) loss 0.8418 (1.0891) acc 68.7500 (72.8490) lr 9.3721e-04 eta 4:49:23 +epoch [28/50] batch [390/500] time 1.554 (1.562) data 0.000 (0.003) loss 0.9062 (1.0889) acc 71.8750 (72.8446) lr 9.3721e-04 eta 4:49:15 +epoch [28/50] batch [395/500] time 1.559 (1.562) data 0.000 (0.003) loss 1.2334 (1.0899) acc 68.7500 (72.8323) lr 9.3721e-04 eta 4:49:06 +epoch [28/50] batch [400/500] time 1.530 (1.562) data 0.000 (0.003) loss 0.5264 (1.0875) acc 90.6250 (72.8750) lr 9.3721e-04 eta 4:48:56 +epoch [28/50] batch [405/500] time 1.533 (1.562) data 0.001 (0.003) loss 1.2139 (1.0888) acc 75.0000 (72.8704) lr 9.3721e-04 eta 4:48:46 +epoch [28/50] batch [410/500] time 1.543 (1.562) data 0.000 (0.003) loss 1.0664 (1.0877) acc 65.6250 (72.8659) lr 9.3721e-04 eta 4:48:39 +epoch [28/50] batch [415/500] time 1.551 (1.562) data 0.000 (0.002) loss 1.3789 (1.0890) acc 75.0000 (72.7937) lr 9.3721e-04 eta 4:48:30 +epoch [28/50] batch [420/500] time 1.526 (1.561) data 0.000 (0.002) loss 1.0449 (1.0869) acc 75.0000 (72.8125) lr 9.3721e-04 eta 4:48:20 +epoch [28/50] batch [425/500] time 1.556 (1.561) data 0.000 (0.002) loss 1.3965 (1.0856) acc 65.6250 (72.8529) lr 9.3721e-04 eta 4:48:13 +epoch [28/50] batch [430/500] time 1.566 (1.561) data 0.001 (0.002) loss 1.0381 (1.0859) acc 81.2500 (72.8997) lr 9.3721e-04 eta 4:48:03 +epoch [28/50] batch [435/500] time 1.565 (1.561) data 0.000 (0.002) loss 1.3867 (1.0868) acc 59.3750 (72.8233) lr 9.3721e-04 eta 4:47:55 +epoch [28/50] batch [440/500] time 1.551 (1.561) data 0.000 (0.002) loss 0.8457 (1.0856) acc 78.1250 (72.8764) lr 9.3721e-04 eta 4:47:47 +epoch [28/50] batch [445/500] time 1.545 (1.561) data 0.000 (0.002) loss 0.9385 (1.0849) acc 78.1250 (72.8722) lr 9.3721e-04 eta 4:47:37 +epoch [28/50] batch [450/500] time 1.650 (1.561) data 0.000 (0.002) loss 0.6875 (1.0834) acc 90.6250 (72.8958) lr 9.3721e-04 eta 4:47:30 +epoch [28/50] batch [455/500] time 1.565 (1.561) data 0.000 (0.002) loss 0.4670 (1.0820) acc 87.5000 (72.9052) lr 9.3721e-04 eta 4:47:21 +epoch [28/50] batch [460/500] time 1.566 (1.561) data 0.000 (0.002) loss 1.9707 (1.0841) acc 50.0000 (72.8804) lr 9.3721e-04 eta 4:47:14 +epoch [28/50] batch [465/500] time 1.556 (1.561) data 0.000 (0.002) loss 0.9614 (1.0848) acc 75.0000 (72.8898) lr 9.3721e-04 eta 4:47:06 +epoch [28/50] batch [470/500] time 1.530 (1.561) data 0.000 (0.002) loss 1.1855 (1.0852) acc 65.6250 (72.8457) lr 9.3721e-04 eta 4:46:55 +epoch [28/50] batch [475/500] time 1.544 (1.561) data 0.000 (0.002) loss 0.9258 (1.0861) acc 75.0000 (72.8487) lr 9.3721e-04 eta 4:46:46 +epoch [28/50] batch [480/500] time 1.548 (1.561) data 0.000 (0.002) loss 1.1416 (1.0880) acc 71.8750 (72.7539) lr 9.3721e-04 eta 4:46:37 +epoch [28/50] batch [485/500] time 1.552 (1.561) data 0.001 (0.002) loss 0.8257 (1.0863) acc 75.0000 (72.7577) lr 9.3721e-04 eta 4:46:30 +epoch [28/50] batch [490/500] time 1.538 (1.560) data 0.000 (0.002) loss 0.9048 (1.0861) acc 71.8750 (72.7615) lr 9.3721e-04 eta 4:46:19 +epoch [28/50] batch [495/500] time 1.534 (1.560) data 0.000 (0.002) loss 1.6582 (1.0858) acc 68.7500 (72.7525) lr 9.3721e-04 eta 4:46:09 +epoch [28/50] batch [500/500] time 1.556 (1.560) data 0.000 (0.002) loss 1.1943 (1.0857) acc 71.8750 (72.7812) lr 8.7467e-04 eta 4:46:01 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,065 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.7% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar +epoch [29/50] batch [5/500] time 1.554 (1.661) data 0.000 (0.159) loss 1.0869 (0.9994) acc 78.1250 (71.8750) lr 8.7467e-04 eta 5:04:21 +epoch [29/50] batch [10/500] time 1.550 (1.608) data 0.000 (0.080) loss 1.7656 (1.1471) acc 50.0000 (68.1250) lr 8.7467e-04 eta 4:54:32 +epoch [29/50] batch [15/500] time 1.545 (1.588) data 0.000 (0.053) loss 0.6743 (1.1446) acc 84.3750 (70.6250) lr 8.7467e-04 eta 4:50:46 +epoch [29/50] batch [20/500] time 1.560 (1.580) data 0.000 (0.040) loss 0.6372 (1.1028) acc 81.2500 (70.6250) lr 8.7467e-04 eta 4:49:08 +epoch [29/50] batch [25/500] time 1.591 (1.578) data 0.001 (0.032) loss 1.1729 (1.0950) acc 68.7500 (71.0000) lr 8.7467e-04 eta 4:48:33 +epoch [29/50] batch [30/500] time 1.545 (1.574) data 0.001 (0.027) loss 0.9458 (1.0865) acc 65.6250 (71.4583) lr 8.7467e-04 eta 4:47:51 +epoch [29/50] batch [35/500] time 1.567 (1.573) data 0.000 (0.023) loss 0.9517 (1.0908) acc 75.0000 (72.1429) lr 8.7467e-04 eta 4:47:32 +epoch [29/50] batch [40/500] time 1.541 (1.574) data 0.001 (0.020) loss 1.1016 (1.0963) acc 71.8750 (72.3438) lr 8.7467e-04 eta 4:47:34 +epoch [29/50] batch [45/500] time 1.556 (1.573) data 0.000 (0.018) loss 0.6533 (1.1119) acc 87.5000 (72.2917) lr 8.7467e-04 eta 4:47:09 +epoch [29/50] batch [50/500] time 1.544 (1.571) data 0.000 (0.016) loss 1.0225 (1.0912) acc 71.8750 (72.5000) lr 8.7467e-04 eta 4:46:41 +epoch [29/50] batch [55/500] time 1.566 (1.570) data 0.000 (0.015) loss 1.5498 (1.0941) acc 78.1250 (72.8409) lr 8.7467e-04 eta 4:46:19 +epoch [29/50] batch [60/500] time 1.576 (1.569) data 0.001 (0.014) loss 1.2109 (1.0733) acc 68.7500 (73.3854) lr 8.7467e-04 eta 4:46:09 +epoch [29/50] batch [65/500] time 1.537 (1.568) data 0.001 (0.013) loss 1.2900 (1.0818) acc 65.6250 (72.9327) lr 8.7467e-04 eta 4:45:50 +epoch [29/50] batch [70/500] time 1.550 (1.567) data 0.000 (0.012) loss 0.9736 (1.0714) acc 78.1250 (73.1250) lr 8.7467e-04 eta 4:45:27 +epoch [29/50] batch [75/500] time 1.560 (1.566) data 0.000 (0.011) loss 0.9272 (1.0684) acc 68.7500 (72.9583) lr 8.7467e-04 eta 4:45:12 +epoch [29/50] batch [80/500] time 1.555 (1.566) data 0.000 (0.010) loss 1.3418 (1.0721) acc 68.7500 (72.6562) lr 8.7467e-04 eta 4:45:01 +epoch [29/50] batch [85/500] time 1.569 (1.567) data 0.000 (0.010) loss 0.8042 (1.0763) acc 78.1250 (72.4265) lr 8.7467e-04 eta 4:45:01 +epoch [29/50] batch [90/500] time 1.545 (1.566) data 0.000 (0.009) loss 1.7451 (1.0897) acc 59.3750 (72.0139) lr 8.7467e-04 eta 4:44:40 +epoch [29/50] batch [95/500] time 1.573 (1.566) data 0.000 (0.009) loss 1.1826 (1.0837) acc 78.1250 (72.3355) lr 8.7467e-04 eta 4:44:34 +epoch [29/50] batch [100/500] time 1.580 (1.565) data 0.000 (0.008) loss 0.4702 (1.0696) acc 84.3750 (72.7188) lr 8.7467e-04 eta 4:44:19 +epoch [29/50] batch [105/500] time 1.586 (1.565) data 0.001 (0.008) loss 0.8184 (1.0643) acc 84.3750 (72.8571) lr 8.7467e-04 eta 4:44:15 +epoch [29/50] batch [110/500] time 1.554 (1.565) data 0.000 (0.008) loss 1.4902 (1.0733) acc 68.7500 (72.7273) lr 8.7467e-04 eta 4:44:05 +epoch [29/50] batch [115/500] time 1.571 (1.565) data 0.001 (0.007) loss 0.7109 (1.0692) acc 75.0000 (72.6630) lr 8.7467e-04 eta 4:43:58 +epoch [29/50] batch [120/500] time 1.580 (1.566) data 0.000 (0.007) loss 1.3818 (1.0751) acc 71.8750 (72.7083) lr 8.7467e-04 eta 4:43:56 +epoch [29/50] batch [125/500] time 1.589 (1.566) data 0.000 (0.007) loss 1.0664 (1.0785) acc 68.7500 (72.6000) lr 8.7467e-04 eta 4:43:49 +epoch [29/50] batch [130/500] time 1.584 (1.566) data 0.000 (0.007) loss 0.5098 (1.0727) acc 84.3750 (72.7644) lr 8.7467e-04 eta 4:43:45 +epoch [29/50] batch [135/500] time 1.569 (1.566) data 0.000 (0.006) loss 1.1514 (1.0710) acc 68.7500 (72.8472) lr 8.7467e-04 eta 4:43:38 +epoch [29/50] batch [140/500] time 1.559 (1.566) data 0.000 (0.006) loss 0.8120 (1.0730) acc 78.1250 (72.7232) lr 8.7467e-04 eta 4:43:31 +epoch [29/50] batch [145/500] time 1.560 (1.566) data 0.000 (0.006) loss 1.1729 (1.0711) acc 71.8750 (72.7802) lr 8.7467e-04 eta 4:43:23 +epoch [29/50] batch [150/500] time 1.562 (1.566) data 0.000 (0.006) loss 0.7808 (1.0709) acc 75.0000 (72.8542) lr 8.7467e-04 eta 4:43:14 +epoch [29/50] batch [155/500] time 1.551 (1.566) data 0.000 (0.006) loss 1.1631 (1.0747) acc 78.1250 (72.8024) lr 8.7467e-04 eta 4:43:00 +epoch [29/50] batch [160/500] time 1.556 (1.566) data 0.001 (0.005) loss 0.9575 (1.0713) acc 78.1250 (72.8125) lr 8.7467e-04 eta 4:42:52 +epoch [29/50] batch [165/500] time 1.569 (1.566) data 0.000 (0.005) loss 0.8135 (1.0739) acc 84.3750 (72.7273) lr 8.7467e-04 eta 4:42:45 +epoch [29/50] batch [170/500] time 1.570 (1.566) data 0.001 (0.005) loss 0.5005 (1.0682) acc 81.2500 (72.6838) lr 8.7467e-04 eta 4:42:35 +epoch [29/50] batch [175/500] time 1.574 (1.565) data 0.001 (0.005) loss 1.1523 (1.0764) acc 75.0000 (72.7321) lr 8.7467e-04 eta 4:42:25 +epoch [29/50] batch [180/500] time 1.559 (1.566) data 0.000 (0.005) loss 1.0439 (1.0745) acc 71.8750 (72.7604) lr 8.7467e-04 eta 4:42:21 +epoch [29/50] batch [185/500] time 1.556 (1.566) data 0.001 (0.005) loss 0.8589 (1.0762) acc 71.8750 (72.6520) lr 8.7467e-04 eta 4:42:11 +epoch [29/50] batch [190/500] time 1.585 (1.566) data 0.000 (0.005) loss 1.1279 (1.0829) acc 68.7500 (72.5658) lr 8.7467e-04 eta 4:42:04 +epoch [29/50] batch [195/500] time 1.547 (1.566) data 0.000 (0.004) loss 1.4434 (1.0858) acc 65.6250 (72.5481) lr 8.7467e-04 eta 4:41:57 +epoch [29/50] batch [200/500] time 1.554 (1.566) data 0.000 (0.004) loss 1.2891 (1.0858) acc 65.6250 (72.5156) lr 8.7467e-04 eta 4:41:48 +epoch [29/50] batch [205/500] time 1.562 (1.565) data 0.000 (0.004) loss 0.8394 (1.0821) acc 78.1250 (72.5915) lr 8.7467e-04 eta 4:41:37 +epoch [29/50] batch [210/500] time 1.552 (1.565) data 0.000 (0.004) loss 0.9321 (1.0773) acc 75.0000 (72.6935) lr 8.7467e-04 eta 4:41:27 +epoch [29/50] batch [215/500] time 1.555 (1.565) data 0.000 (0.004) loss 1.4707 (1.0785) acc 65.6250 (72.6890) lr 8.7467e-04 eta 4:41:16 +epoch [29/50] batch [220/500] time 1.562 (1.565) data 0.001 (0.004) loss 1.2383 (1.0780) acc 62.5000 (72.5852) lr 8.7467e-04 eta 4:41:06 +epoch [29/50] batch [225/500] time 1.573 (1.565) data 0.000 (0.004) loss 1.3242 (1.0784) acc 68.7500 (72.5556) lr 8.7467e-04 eta 4:41:03 +epoch [29/50] batch [230/500] time 1.567 (1.565) data 0.000 (0.004) loss 0.7915 (1.0764) acc 78.1250 (72.5951) lr 8.7467e-04 eta 4:40:52 +epoch [29/50] batch [235/500] time 1.601 (1.565) data 0.000 (0.004) loss 1.1357 (1.0751) acc 65.6250 (72.5399) lr 8.7467e-04 eta 4:40:46 +epoch [29/50] batch [240/500] time 1.573 (1.565) data 0.001 (0.004) loss 1.0635 (1.0726) acc 68.7500 (72.5521) lr 8.7467e-04 eta 4:40:39 +epoch [29/50] batch [245/500] time 1.545 (1.565) data 0.000 (0.004) loss 0.9487 (1.0717) acc 68.7500 (72.5765) lr 8.7467e-04 eta 4:40:29 +epoch [29/50] batch [250/500] time 1.546 (1.565) data 0.000 (0.004) loss 1.0449 (1.0722) acc 75.0000 (72.5375) lr 8.7467e-04 eta 4:40:22 +epoch [29/50] batch [255/500] time 1.548 (1.565) data 0.000 (0.004) loss 0.8853 (1.0734) acc 68.7500 (72.4632) lr 8.7467e-04 eta 4:40:13 +epoch [29/50] batch [260/500] time 1.596 (1.565) data 0.000 (0.003) loss 1.0977 (1.0733) acc 68.7500 (72.4519) lr 8.7467e-04 eta 4:40:05 +epoch [29/50] batch [265/500] time 1.591 (1.565) data 0.001 (0.003) loss 1.5137 (1.0728) acc 65.6250 (72.5236) lr 8.7467e-04 eta 4:39:57 +epoch [29/50] batch [270/500] time 1.575 (1.565) data 0.000 (0.003) loss 0.7812 (1.0716) acc 81.2500 (72.5463) lr 8.7467e-04 eta 4:39:49 +epoch [29/50] batch [275/500] time 1.578 (1.565) data 0.000 (0.003) loss 1.5254 (1.0723) acc 65.6250 (72.5227) lr 8.7467e-04 eta 4:39:40 +epoch [29/50] batch [280/500] time 1.542 (1.565) data 0.000 (0.003) loss 0.7803 (1.0757) acc 71.8750 (72.4107) lr 8.7467e-04 eta 4:39:32 +epoch [29/50] batch [285/500] time 1.543 (1.565) data 0.000 (0.003) loss 1.1836 (1.0778) acc 65.6250 (72.3904) lr 8.7467e-04 eta 4:39:28 +epoch [29/50] batch [290/500] time 1.585 (1.565) data 0.001 (0.003) loss 1.4229 (1.0789) acc 65.6250 (72.3384) lr 8.7467e-04 eta 4:39:22 +epoch [29/50] batch [295/500] time 1.573 (1.565) data 0.000 (0.003) loss 0.8442 (1.0777) acc 75.0000 (72.3517) lr 8.7467e-04 eta 4:39:14 +epoch [29/50] batch [300/500] time 1.561 (1.565) data 0.000 (0.003) loss 0.6836 (1.0765) acc 75.0000 (72.3229) lr 8.7467e-04 eta 4:39:07 +epoch [29/50] batch [305/500] time 1.526 (1.565) data 0.000 (0.003) loss 2.1660 (1.0844) acc 62.5000 (72.2643) lr 8.7467e-04 eta 4:38:57 +epoch [29/50] batch [310/500] time 1.547 (1.565) data 0.000 (0.003) loss 1.3135 (1.0825) acc 65.6250 (72.3185) lr 8.7467e-04 eta 4:38:47 +epoch [29/50] batch [315/500] time 1.538 (1.564) data 0.000 (0.003) loss 1.3721 (1.0843) acc 75.0000 (72.3115) lr 8.7467e-04 eta 4:38:36 +epoch [29/50] batch [320/500] time 1.550 (1.564) data 0.000 (0.003) loss 0.8877 (1.0830) acc 71.8750 (72.3535) lr 8.7467e-04 eta 4:38:27 +epoch [29/50] batch [325/500] time 1.549 (1.565) data 0.000 (0.003) loss 0.8931 (1.0799) acc 81.2500 (72.4423) lr 8.7467e-04 eta 4:38:22 +epoch [29/50] batch [330/500] time 1.548 (1.565) data 0.000 (0.003) loss 1.5361 (1.0809) acc 68.7500 (72.4148) lr 8.7467e-04 eta 4:38:14 +epoch [29/50] batch [335/500] time 1.564 (1.565) data 0.000 (0.003) loss 1.0000 (1.0793) acc 78.1250 (72.5000) lr 8.7467e-04 eta 4:38:05 +epoch [29/50] batch [340/500] time 1.576 (1.564) data 0.000 (0.003) loss 0.9521 (1.0804) acc 75.0000 (72.4449) lr 8.7467e-04 eta 4:37:57 +epoch [29/50] batch [345/500] time 1.587 (1.564) data 0.000 (0.003) loss 1.1553 (1.0793) acc 75.0000 (72.4819) lr 8.7467e-04 eta 4:37:49 +epoch [29/50] batch [350/500] time 1.583 (1.565) data 0.001 (0.003) loss 2.1387 (1.0819) acc 62.5000 (72.5000) lr 8.7467e-04 eta 4:37:42 +epoch [29/50] batch [355/500] time 1.554 (1.565) data 0.001 (0.003) loss 1.3486 (1.0828) acc 75.0000 (72.5000) lr 8.7467e-04 eta 4:37:34 +epoch [29/50] batch [360/500] time 1.549 (1.565) data 0.000 (0.003) loss 1.3320 (1.0850) acc 71.8750 (72.4653) lr 8.7467e-04 eta 4:37:27 +epoch [29/50] batch [365/500] time 1.556 (1.565) data 0.000 (0.003) loss 0.6582 (1.0855) acc 71.8750 (72.4229) lr 8.7467e-04 eta 4:37:19 +epoch [29/50] batch [370/500] time 1.566 (1.565) data 0.001 (0.003) loss 1.1611 (1.0868) acc 62.5000 (72.3226) lr 8.7467e-04 eta 4:37:16 +epoch [29/50] batch [375/500] time 1.592 (1.565) data 0.000 (0.003) loss 1.0596 (1.0859) acc 81.2500 (72.3833) lr 8.7467e-04 eta 4:37:08 +epoch [29/50] batch [380/500] time 1.560 (1.565) data 0.000 (0.002) loss 0.9458 (1.0866) acc 71.8750 (72.4013) lr 8.7467e-04 eta 4:37:00 +epoch [29/50] batch [385/500] time 1.553 (1.565) data 0.001 (0.002) loss 1.1074 (1.0868) acc 71.8750 (72.4026) lr 8.7467e-04 eta 4:36:49 +epoch [29/50] batch [390/500] time 1.548 (1.565) data 0.001 (0.002) loss 0.9038 (1.0880) acc 78.1250 (72.3638) lr 8.7467e-04 eta 4:36:39 +epoch [29/50] batch [395/500] time 1.545 (1.564) data 0.001 (0.002) loss 0.6099 (1.0858) acc 81.2500 (72.4367) lr 8.7467e-04 eta 4:36:30 +epoch [29/50] batch [400/500] time 1.558 (1.564) data 0.000 (0.002) loss 1.0586 (1.0852) acc 78.1250 (72.4688) lr 8.7467e-04 eta 4:36:21 +epoch [29/50] batch [405/500] time 1.562 (1.564) data 0.001 (0.002) loss 1.3232 (1.0843) acc 78.1250 (72.5000) lr 8.7467e-04 eta 4:36:12 +epoch [29/50] batch [410/500] time 1.569 (1.564) data 0.000 (0.002) loss 0.7681 (1.0824) acc 84.3750 (72.5762) lr 8.7467e-04 eta 4:36:03 +epoch [29/50] batch [415/500] time 1.558 (1.564) data 0.000 (0.002) loss 0.8213 (1.0832) acc 75.0000 (72.5979) lr 8.7467e-04 eta 4:35:56 +epoch [29/50] batch [420/500] time 1.575 (1.564) data 0.000 (0.002) loss 1.5830 (1.0839) acc 62.5000 (72.5818) lr 8.7467e-04 eta 4:35:47 +epoch [29/50] batch [425/500] time 1.554 (1.564) data 0.000 (0.002) loss 1.3418 (1.0840) acc 65.6250 (72.5588) lr 8.7467e-04 eta 4:35:37 +epoch [29/50] batch [430/500] time 1.526 (1.564) data 0.000 (0.002) loss 0.6816 (1.0816) acc 78.1250 (72.5654) lr 8.7467e-04 eta 4:35:28 +epoch [29/50] batch [435/500] time 1.566 (1.564) data 0.000 (0.002) loss 1.0381 (1.0804) acc 75.0000 (72.6365) lr 8.7467e-04 eta 4:35:19 +epoch [29/50] batch [440/500] time 1.545 (1.564) data 0.001 (0.002) loss 1.1348 (1.0832) acc 71.8750 (72.5710) lr 8.7467e-04 eta 4:35:11 +epoch [29/50] batch [445/500] time 1.541 (1.564) data 0.001 (0.002) loss 1.7549 (1.0885) acc 62.5000 (72.5000) lr 8.7467e-04 eta 4:35:02 +epoch [29/50] batch [450/500] time 1.568 (1.564) data 0.000 (0.002) loss 0.7939 (1.0895) acc 81.2500 (72.5000) lr 8.7467e-04 eta 4:34:55 +epoch [29/50] batch [455/500] time 1.552 (1.563) data 0.000 (0.002) loss 1.2109 (1.0877) acc 68.7500 (72.5137) lr 8.7467e-04 eta 4:34:45 +epoch [29/50] batch [460/500] time 1.549 (1.563) data 0.000 (0.002) loss 1.2148 (1.0885) acc 75.0000 (72.4864) lr 8.7467e-04 eta 4:34:37 +epoch [29/50] batch [465/500] time 1.653 (1.563) data 0.000 (0.002) loss 1.2744 (1.0861) acc 71.8750 (72.4933) lr 8.7467e-04 eta 4:34:30 +epoch [29/50] batch [470/500] time 1.579 (1.563) data 0.000 (0.002) loss 1.4268 (1.0861) acc 75.0000 (72.5000) lr 8.7467e-04 eta 4:34:22 +epoch [29/50] batch [475/500] time 1.555 (1.563) data 0.000 (0.002) loss 1.1621 (1.0867) acc 62.5000 (72.4934) lr 8.7467e-04 eta 4:34:13 +epoch [29/50] batch [480/500] time 1.541 (1.563) data 0.000 (0.002) loss 0.8716 (1.0873) acc 75.0000 (72.4740) lr 8.7467e-04 eta 4:34:04 +epoch [29/50] batch [485/500] time 1.537 (1.563) data 0.001 (0.002) loss 1.1348 (1.0846) acc 71.8750 (72.5580) lr 8.7467e-04 eta 4:33:55 +epoch [29/50] batch [490/500] time 1.555 (1.563) data 0.000 (0.002) loss 1.2275 (1.0847) acc 71.8750 (72.5702) lr 8.7467e-04 eta 4:33:48 +epoch [29/50] batch [495/500] time 1.549 (1.563) data 0.000 (0.002) loss 1.0039 (1.0855) acc 71.8750 (72.4937) lr 8.7467e-04 eta 4:33:39 +epoch [29/50] batch [500/500] time 1.533 (1.563) data 0.000 (0.002) loss 0.9341 (1.0856) acc 68.7500 (72.4562) lr 8.1262e-04 eta 4:33:30 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,062 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.7% +epoch [30/50] batch [5/500] time 1.551 (1.654) data 0.001 (0.161) loss 0.7310 (1.0721) acc 78.1250 (75.0000) lr 8.1262e-04 eta 4:49:15 +epoch [30/50] batch [10/500] time 1.574 (1.611) data 0.001 (0.081) loss 0.8491 (1.0624) acc 71.8750 (72.5000) lr 8.1262e-04 eta 4:41:37 +epoch [30/50] batch [15/500] time 1.578 (1.596) data 0.000 (0.054) loss 0.9766 (1.0934) acc 75.0000 (71.4583) lr 8.1262e-04 eta 4:38:57 +epoch [30/50] batch [20/500] time 1.561 (1.594) data 0.000 (0.041) loss 0.9316 (1.0319) acc 75.0000 (73.2812) lr 8.1262e-04 eta 4:38:26 +epoch [30/50] batch [25/500] time 1.582 (1.590) data 0.000 (0.033) loss 0.8945 (1.0216) acc 75.0000 (73.1250) lr 8.1262e-04 eta 4:37:39 +epoch [30/50] batch [30/500] time 1.584 (1.587) data 0.001 (0.027) loss 1.0635 (1.0608) acc 65.6250 (72.8125) lr 8.1262e-04 eta 4:36:54 +epoch [30/50] batch [35/500] time 1.555 (1.582) data 0.000 (0.023) loss 0.7773 (1.0572) acc 71.8750 (72.9464) lr 8.1262e-04 eta 4:35:59 +epoch [30/50] batch [40/500] time 1.567 (1.579) data 0.000 (0.021) loss 1.1738 (1.0431) acc 65.6250 (73.3594) lr 8.1262e-04 eta 4:35:18 +epoch [30/50] batch [45/500] time 1.555 (1.576) data 0.000 (0.018) loss 1.3389 (1.0544) acc 59.3750 (73.3333) lr 8.1262e-04 eta 4:34:38 +epoch [30/50] batch [50/500] time 1.571 (1.574) data 0.000 (0.017) loss 1.0977 (1.0535) acc 78.1250 (73.3125) lr 8.1262e-04 eta 4:34:11 +epoch [30/50] batch [55/500] time 1.558 (1.573) data 0.000 (0.015) loss 0.8735 (1.0553) acc 75.0000 (73.2955) lr 8.1262e-04 eta 4:33:46 +epoch [30/50] batch [60/500] time 1.586 (1.571) data 0.000 (0.014) loss 1.1523 (1.0429) acc 71.8750 (73.6458) lr 8.1262e-04 eta 4:33:26 +epoch [30/50] batch [65/500] time 1.565 (1.571) data 0.000 (0.013) loss 1.7676 (1.0556) acc 59.3750 (73.1250) lr 8.1262e-04 eta 4:33:11 +epoch [30/50] batch [70/500] time 1.558 (1.570) data 0.001 (0.012) loss 0.9297 (1.0540) acc 71.8750 (73.0804) lr 8.1262e-04 eta 4:32:51 +epoch [30/50] batch [75/500] time 1.553 (1.568) data 0.001 (0.011) loss 1.2627 (1.0613) acc 75.0000 (72.6667) lr 8.1262e-04 eta 4:32:29 +epoch [30/50] batch [80/500] time 1.542 (1.567) data 0.000 (0.010) loss 1.1963 (1.0637) acc 68.7500 (72.7344) lr 8.1262e-04 eta 4:32:10 +epoch [30/50] batch [85/500] time 1.571 (1.567) data 0.000 (0.010) loss 0.8706 (1.0585) acc 65.6250 (72.9412) lr 8.1262e-04 eta 4:32:01 +epoch [30/50] batch [90/500] time 1.561 (1.567) data 0.001 (0.009) loss 1.2500 (1.0680) acc 68.7500 (72.8472) lr 8.1262e-04 eta 4:31:50 +epoch [30/50] batch [95/500] time 1.565 (1.567) data 0.000 (0.009) loss 0.8013 (1.0660) acc 81.2500 (73.0921) lr 8.1262e-04 eta 4:31:41 +epoch [30/50] batch [100/500] time 1.570 (1.566) data 0.000 (0.008) loss 1.3779 (1.0650) acc 75.0000 (73.3125) lr 8.1262e-04 eta 4:31:30 +epoch [30/50] batch [105/500] time 1.579 (1.566) data 0.000 (0.008) loss 1.1172 (1.0599) acc 75.0000 (73.2738) lr 8.1262e-04 eta 4:31:19 +epoch [30/50] batch [110/500] time 1.559 (1.566) data 0.000 (0.008) loss 1.2402 (1.0688) acc 62.5000 (72.9261) lr 8.1262e-04 eta 4:31:11 +epoch [30/50] batch [115/500] time 1.661 (1.566) data 0.001 (0.007) loss 1.3193 (1.0770) acc 62.5000 (72.7446) lr 8.1262e-04 eta 4:31:04 +epoch [30/50] batch [120/500] time 1.564 (1.566) data 0.001 (0.007) loss 1.0605 (1.0809) acc 78.1250 (72.6042) lr 8.1262e-04 eta 4:30:58 +epoch [30/50] batch [125/500] time 1.556 (1.566) data 0.000 (0.007) loss 1.1543 (1.0831) acc 68.7500 (72.4000) lr 8.1262e-04 eta 4:30:46 +epoch [30/50] batch [130/500] time 1.552 (1.566) data 0.000 (0.007) loss 0.7612 (1.0777) acc 78.1250 (72.5721) lr 8.1262e-04 eta 4:30:39 +epoch [30/50] batch [135/500] time 1.536 (1.566) data 0.000 (0.006) loss 1.7158 (1.0809) acc 65.6250 (72.5000) lr 8.1262e-04 eta 4:30:30 +epoch [30/50] batch [140/500] time 1.553 (1.566) data 0.000 (0.006) loss 0.9443 (1.0796) acc 78.1250 (72.5670) lr 8.1262e-04 eta 4:30:18 +epoch [30/50] batch [145/500] time 1.543 (1.565) data 0.000 (0.006) loss 0.8057 (1.0777) acc 71.8750 (72.5216) lr 8.1262e-04 eta 4:30:09 +epoch [30/50] batch [150/500] time 1.581 (1.566) data 0.000 (0.006) loss 1.2832 (1.0722) acc 65.6250 (72.5833) lr 8.1262e-04 eta 4:30:07 +epoch [30/50] batch [155/500] time 1.563 (1.566) data 0.000 (0.006) loss 1.2881 (1.0756) acc 71.8750 (72.5605) lr 8.1262e-04 eta 4:29:57 +epoch [30/50] batch [160/500] time 1.558 (1.566) data 0.000 (0.005) loss 1.2822 (1.0774) acc 75.0000 (72.5781) lr 8.1262e-04 eta 4:29:52 +epoch [30/50] batch [165/500] time 1.536 (1.565) data 0.000 (0.005) loss 1.3760 (1.0769) acc 68.7500 (72.6326) lr 8.1262e-04 eta 4:29:36 +epoch [30/50] batch [170/500] time 1.540 (1.565) data 0.000 (0.005) loss 1.0244 (1.0756) acc 75.0000 (72.5735) lr 8.1262e-04 eta 4:29:25 +epoch [30/50] batch [175/500] time 1.557 (1.564) data 0.000 (0.005) loss 0.9644 (1.0756) acc 71.8750 (72.5893) lr 8.1262e-04 eta 4:29:13 +epoch [30/50] batch [180/500] time 1.544 (1.564) data 0.000 (0.005) loss 1.2588 (1.0742) acc 68.7500 (72.5694) lr 8.1262e-04 eta 4:29:02 +epoch [30/50] batch [185/500] time 1.554 (1.564) data 0.000 (0.005) loss 0.8804 (1.0782) acc 78.1250 (72.4493) lr 8.1262e-04 eta 4:28:53 +epoch [30/50] batch [190/500] time 1.567 (1.564) data 0.000 (0.005) loss 0.8491 (1.0747) acc 78.1250 (72.5493) lr 8.1262e-04 eta 4:28:42 +epoch [30/50] batch [195/500] time 1.550 (1.564) data 0.000 (0.005) loss 1.6152 (1.0798) acc 68.7500 (72.4519) lr 8.1262e-04 eta 4:28:35 +epoch [30/50] batch [200/500] time 1.579 (1.564) data 0.001 (0.004) loss 0.7485 (1.0736) acc 75.0000 (72.5625) lr 8.1262e-04 eta 4:28:28 +epoch [30/50] batch [205/500] time 1.567 (1.564) data 0.001 (0.004) loss 1.0791 (1.0750) acc 68.7500 (72.5610) lr 8.1262e-04 eta 4:28:22 +epoch [30/50] batch [210/500] time 1.544 (1.564) data 0.000 (0.004) loss 1.0684 (1.0772) acc 71.8750 (72.4851) lr 8.1262e-04 eta 4:28:13 +epoch [30/50] batch [215/500] time 1.570 (1.564) data 0.000 (0.004) loss 1.4434 (1.0761) acc 62.5000 (72.5000) lr 8.1262e-04 eta 4:28:06 +epoch [30/50] batch [220/500] time 1.542 (1.564) data 0.001 (0.004) loss 1.8086 (1.0809) acc 50.0000 (72.3295) lr 8.1262e-04 eta 4:27:55 +epoch [30/50] batch [225/500] time 1.569 (1.564) data 0.001 (0.004) loss 0.8931 (1.0759) acc 81.2500 (72.4722) lr 8.1262e-04 eta 4:27:46 +epoch [30/50] batch [230/500] time 1.578 (1.564) data 0.001 (0.004) loss 0.7769 (1.0722) acc 87.5000 (72.5951) lr 8.1262e-04 eta 4:27:38 +epoch [30/50] batch [235/500] time 1.549 (1.563) data 0.000 (0.004) loss 1.1367 (1.0697) acc 71.8750 (72.6596) lr 8.1262e-04 eta 4:27:27 +epoch [30/50] batch [240/500] time 1.561 (1.563) data 0.000 (0.004) loss 1.0127 (1.0728) acc 71.8750 (72.5651) lr 8.1262e-04 eta 4:27:19 +epoch [30/50] batch [245/500] time 1.563 (1.563) data 0.000 (0.004) loss 0.5098 (1.0716) acc 87.5000 (72.6276) lr 8.1262e-04 eta 4:27:12 +epoch [30/50] batch [250/500] time 1.545 (1.563) data 0.000 (0.004) loss 1.2080 (1.0696) acc 71.8750 (72.7000) lr 8.1262e-04 eta 4:27:03 +epoch [30/50] batch [255/500] time 1.557 (1.563) data 0.000 (0.004) loss 0.7935 (1.0706) acc 65.6250 (72.5980) lr 8.1262e-04 eta 4:26:54 +epoch [30/50] batch [260/500] time 1.554 (1.563) data 0.000 (0.004) loss 0.7715 (1.0691) acc 87.5000 (72.7163) lr 8.1262e-04 eta 4:26:49 +epoch [30/50] batch [265/500] time 1.578 (1.563) data 0.000 (0.003) loss 1.3457 (1.0734) acc 59.3750 (72.6297) lr 8.1262e-04 eta 4:26:41 +epoch [30/50] batch [270/500] time 1.576 (1.563) data 0.000 (0.003) loss 1.6855 (1.0744) acc 71.8750 (72.6505) lr 8.1262e-04 eta 4:26:34 +epoch [30/50] batch [275/500] time 1.564 (1.563) data 0.000 (0.003) loss 1.2246 (1.0750) acc 75.0000 (72.6136) lr 8.1262e-04 eta 4:26:26 +epoch [30/50] batch [280/500] time 1.547 (1.563) data 0.000 (0.003) loss 1.0381 (1.0760) acc 75.0000 (72.5781) lr 8.1262e-04 eta 4:26:16 +epoch [30/50] batch [285/500] time 1.531 (1.563) data 0.000 (0.003) loss 1.2539 (1.0751) acc 68.7500 (72.5768) lr 8.1262e-04 eta 4:26:05 +epoch [30/50] batch [290/500] time 1.569 (1.563) data 0.000 (0.003) loss 1.0576 (1.0775) acc 68.7500 (72.5647) lr 8.1262e-04 eta 4:25:57 +epoch [30/50] batch [295/500] time 1.538 (1.563) data 0.000 (0.003) loss 1.9863 (1.0828) acc 59.3750 (72.5318) lr 8.1262e-04 eta 4:25:47 +epoch [30/50] batch [300/500] time 1.590 (1.563) data 0.000 (0.003) loss 1.1123 (1.0845) acc 56.2500 (72.3854) lr 8.1262e-04 eta 4:25:41 +epoch [30/50] batch [305/500] time 1.554 (1.563) data 0.000 (0.003) loss 0.9424 (1.0828) acc 78.1250 (72.4283) lr 8.1262e-04 eta 4:25:34 +epoch [30/50] batch [310/500] time 1.601 (1.563) data 0.001 (0.003) loss 0.9609 (1.0825) acc 78.1250 (72.4395) lr 8.1262e-04 eta 4:25:28 +epoch [30/50] batch [315/500] time 1.560 (1.563) data 0.000 (0.003) loss 1.5283 (1.0851) acc 59.3750 (72.4107) lr 8.1262e-04 eta 4:25:20 +epoch [30/50] batch [320/500] time 1.592 (1.563) data 0.001 (0.003) loss 1.0488 (1.0839) acc 75.0000 (72.4805) lr 8.1262e-04 eta 4:25:13 +epoch [30/50] batch [325/500] time 1.567 (1.563) data 0.000 (0.003) loss 0.4131 (1.0837) acc 87.5000 (72.4808) lr 8.1262e-04 eta 4:25:07 +epoch [30/50] batch [330/500] time 1.566 (1.563) data 0.001 (0.003) loss 1.1152 (1.0869) acc 75.0000 (72.4242) lr 8.1262e-04 eta 4:25:00 +epoch [30/50] batch [335/500] time 1.543 (1.563) data 0.000 (0.003) loss 1.0615 (1.0880) acc 75.0000 (72.4534) lr 8.1262e-04 eta 4:24:51 +epoch [30/50] batch [340/500] time 1.557 (1.563) data 0.000 (0.003) loss 1.0615 (1.0886) acc 68.7500 (72.4265) lr 8.1262e-04 eta 4:24:41 +epoch [30/50] batch [345/500] time 1.551 (1.563) data 0.000 (0.003) loss 0.7632 (1.0873) acc 78.1250 (72.5000) lr 8.1262e-04 eta 4:24:34 +epoch [30/50] batch [350/500] time 1.566 (1.563) data 0.000 (0.003) loss 1.0723 (1.0850) acc 78.1250 (72.5536) lr 8.1262e-04 eta 4:24:24 +epoch [30/50] batch [355/500] time 1.581 (1.563) data 0.001 (0.003) loss 1.4307 (1.0854) acc 59.3750 (72.5440) lr 8.1262e-04 eta 4:24:17 +epoch [30/50] batch [360/500] time 1.548 (1.563) data 0.001 (0.003) loss 0.8296 (1.0850) acc 84.3750 (72.5955) lr 8.1262e-04 eta 4:24:08 +epoch [30/50] batch [365/500] time 1.551 (1.563) data 0.000 (0.003) loss 0.8916 (1.0875) acc 71.8750 (72.5342) lr 8.1262e-04 eta 4:23:59 +epoch [30/50] batch [370/500] time 1.549 (1.563) data 0.000 (0.003) loss 0.7930 (1.0832) acc 75.0000 (72.6182) lr 8.1262e-04 eta 4:23:49 +epoch [30/50] batch [375/500] time 1.551 (1.562) data 0.000 (0.003) loss 1.3467 (1.0853) acc 68.7500 (72.6000) lr 8.1262e-04 eta 4:23:39 +epoch [30/50] batch [380/500] time 1.560 (1.562) data 0.001 (0.003) loss 1.0781 (1.0852) acc 65.6250 (72.5822) lr 8.1262e-04 eta 4:23:30 +epoch [30/50] batch [385/500] time 1.560 (1.562) data 0.000 (0.003) loss 0.8867 (1.0827) acc 75.0000 (72.5893) lr 8.1262e-04 eta 4:23:23 +epoch [30/50] batch [390/500] time 1.558 (1.562) data 0.000 (0.002) loss 1.3682 (1.0818) acc 62.5000 (72.6122) lr 8.1262e-04 eta 4:23:15 +epoch [30/50] batch [395/500] time 1.528 (1.562) data 0.000 (0.002) loss 1.0605 (1.0823) acc 68.7500 (72.6187) lr 8.1262e-04 eta 4:23:06 +epoch [30/50] batch [400/500] time 1.578 (1.562) data 0.000 (0.002) loss 0.9365 (1.0790) acc 75.0000 (72.6797) lr 8.1262e-04 eta 4:22:58 +epoch [30/50] batch [405/500] time 1.540 (1.562) data 0.000 (0.002) loss 1.5039 (1.0790) acc 68.7500 (72.6389) lr 8.1262e-04 eta 4:22:53 +epoch [30/50] batch [410/500] time 1.596 (1.563) data 0.000 (0.002) loss 0.9634 (1.0787) acc 75.0000 (72.6220) lr 8.1262e-04 eta 4:22:47 +epoch [30/50] batch [415/500] time 1.555 (1.563) data 0.000 (0.002) loss 1.1318 (1.0793) acc 68.7500 (72.6054) lr 8.1262e-04 eta 4:22:39 +epoch [30/50] batch [420/500] time 1.571 (1.563) data 0.000 (0.002) loss 1.0127 (1.0801) acc 81.2500 (72.6339) lr 8.1262e-04 eta 4:22:32 +epoch [30/50] batch [425/500] time 1.559 (1.563) data 0.000 (0.002) loss 0.8848 (1.0789) acc 78.1250 (72.6765) lr 8.1262e-04 eta 4:22:24 +epoch [30/50] batch [430/500] time 1.560 (1.563) data 0.000 (0.002) loss 1.2461 (1.0808) acc 65.6250 (72.6453) lr 8.1262e-04 eta 4:22:18 +epoch [30/50] batch [435/500] time 1.534 (1.563) data 0.001 (0.002) loss 1.1621 (1.0794) acc 75.0000 (72.6796) lr 8.1262e-04 eta 4:22:07 +epoch [30/50] batch [440/500] time 1.555 (1.563) data 0.000 (0.002) loss 2.1270 (1.0797) acc 46.8750 (72.6420) lr 8.1262e-04 eta 4:21:59 +epoch [30/50] batch [445/500] time 1.683 (1.563) data 0.000 (0.002) loss 1.0283 (1.0778) acc 81.2500 (72.7247) lr 8.1262e-04 eta 4:21:54 +epoch [30/50] batch [450/500] time 1.582 (1.563) data 0.000 (0.002) loss 1.2617 (1.0794) acc 71.8750 (72.6944) lr 8.1262e-04 eta 4:21:47 +epoch [30/50] batch [455/500] time 1.565 (1.563) data 0.000 (0.002) loss 0.9663 (1.0770) acc 68.7500 (72.7610) lr 8.1262e-04 eta 4:21:40 +epoch [30/50] batch [460/500] time 1.567 (1.563) data 0.000 (0.002) loss 1.0615 (1.0781) acc 68.7500 (72.7174) lr 8.1262e-04 eta 4:21:33 +epoch [30/50] batch [465/500] time 1.573 (1.563) data 0.001 (0.002) loss 1.3408 (1.0790) acc 65.6250 (72.6680) lr 8.1262e-04 eta 4:21:27 +epoch [30/50] batch [470/500] time 1.550 (1.563) data 0.000 (0.002) loss 0.9590 (1.0787) acc 68.7500 (72.6928) lr 8.1262e-04 eta 4:21:19 +epoch [30/50] batch [475/500] time 1.546 (1.563) data 0.000 (0.002) loss 1.3135 (1.0788) acc 68.7500 (72.6711) lr 8.1262e-04 eta 4:21:10 +epoch [30/50] batch [480/500] time 1.566 (1.563) data 0.000 (0.002) loss 0.3689 (1.0796) acc 90.6250 (72.6823) lr 8.1262e-04 eta 4:21:03 +epoch [30/50] batch [485/500] time 1.559 (1.563) data 0.000 (0.002) loss 1.1143 (1.0798) acc 65.6250 (72.6546) lr 8.1262e-04 eta 4:20:54 +epoch [30/50] batch [490/500] time 1.578 (1.563) data 0.000 (0.002) loss 1.7549 (1.0829) acc 68.7500 (72.6467) lr 8.1262e-04 eta 4:20:47 +epoch [30/50] batch [495/500] time 1.539 (1.563) data 0.000 (0.002) loss 1.4170 (1.0837) acc 68.7500 (72.6263) lr 8.1262e-04 eta 4:20:38 +epoch [30/50] batch [500/500] time 1.546 (1.563) data 0.000 (0.002) loss 1.1084 (1.0840) acc 65.6250 (72.6125) lr 7.5131e-04 eta 4:20:30 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,949 +* accuracy: 77.9% +* error: 22.1% +* macro_f1: 77.4% +epoch [31/50] batch [5/500] time 1.534 (1.717) data 0.000 (0.199) loss 1.3662 (1.2356) acc 68.7500 (74.3750) lr 7.5131e-04 eta 4:46:05 +epoch [31/50] batch [10/500] time 1.568 (1.639) data 0.000 (0.100) loss 0.9072 (1.0597) acc 68.7500 (73.1250) lr 7.5131e-04 eta 4:32:52 +epoch [31/50] batch [15/500] time 1.591 (1.616) data 0.000 (0.067) loss 0.9956 (1.0345) acc 68.7500 (73.1250) lr 7.5131e-04 eta 4:28:58 +epoch [31/50] batch [20/500] time 1.559 (1.602) data 0.001 (0.050) loss 1.2285 (1.0872) acc 65.6250 (73.1250) lr 7.5131e-04 eta 4:26:28 +epoch [31/50] batch [25/500] time 1.564 (1.595) data 0.000 (0.040) loss 0.9561 (1.0935) acc 81.2500 (72.5000) lr 7.5131e-04 eta 4:25:09 +epoch [31/50] batch [30/500] time 1.570 (1.590) data 0.001 (0.034) loss 1.2725 (1.0880) acc 75.0000 (72.3958) lr 7.5131e-04 eta 4:24:13 +epoch [31/50] batch [35/500] time 1.581 (1.587) data 0.001 (0.029) loss 1.2676 (1.0960) acc 65.6250 (72.2321) lr 7.5131e-04 eta 4:23:36 +epoch [31/50] batch [40/500] time 1.543 (1.583) data 0.000 (0.025) loss 0.9839 (1.0733) acc 65.6250 (72.1094) lr 7.5131e-04 eta 4:22:50 +epoch [31/50] batch [45/500] time 1.567 (1.583) data 0.000 (0.023) loss 0.7832 (1.0665) acc 84.3750 (72.8472) lr 7.5131e-04 eta 4:22:40 +epoch [31/50] batch [50/500] time 1.538 (1.580) data 0.000 (0.020) loss 0.7363 (1.0648) acc 84.3750 (72.9375) lr 7.5131e-04 eta 4:22:02 +epoch [31/50] batch [55/500] time 1.544 (1.578) data 0.001 (0.019) loss 1.1377 (1.0712) acc 68.7500 (72.5000) lr 7.5131e-04 eta 4:21:29 +epoch [31/50] batch [60/500] time 1.536 (1.575) data 0.001 (0.017) loss 0.9365 (1.0653) acc 81.2500 (72.5521) lr 7.5131e-04 eta 4:20:57 +epoch [31/50] batch [65/500] time 1.573 (1.574) data 0.000 (0.016) loss 1.2891 (1.0667) acc 71.8750 (72.5962) lr 7.5131e-04 eta 4:20:41 +epoch [31/50] batch [70/500] time 1.545 (1.573) data 0.000 (0.015) loss 1.4375 (1.0733) acc 68.7500 (72.3661) lr 7.5131e-04 eta 4:20:20 +epoch [31/50] batch [75/500] time 1.576 (1.572) data 0.000 (0.014) loss 0.9917 (1.0786) acc 78.1250 (72.3750) lr 7.5131e-04 eta 4:20:03 +epoch [31/50] batch [80/500] time 1.568 (1.571) data 0.001 (0.013) loss 0.7886 (1.0711) acc 71.8750 (72.3828) lr 7.5131e-04 eta 4:19:49 +epoch [31/50] batch [85/500] time 1.569 (1.572) data 0.000 (0.012) loss 0.7642 (1.0756) acc 78.1250 (72.3529) lr 7.5131e-04 eta 4:19:43 +epoch [31/50] batch [90/500] time 1.575 (1.571) data 0.000 (0.011) loss 1.1602 (1.0634) acc 68.7500 (72.5694) lr 7.5131e-04 eta 4:19:32 +epoch [31/50] batch [95/500] time 1.573 (1.572) data 0.000 (0.011) loss 1.1172 (1.0556) acc 75.0000 (72.8618) lr 7.5131e-04 eta 4:19:26 +epoch [31/50] batch [100/500] time 1.568 (1.572) data 0.000 (0.010) loss 0.9990 (1.0641) acc 75.0000 (72.9375) lr 7.5131e-04 eta 4:19:17 +epoch [31/50] batch [105/500] time 1.551 (1.571) data 0.001 (0.010) loss 1.1445 (1.0563) acc 71.8750 (73.0655) lr 7.5131e-04 eta 4:19:06 +epoch [31/50] batch [110/500] time 1.538 (1.571) data 0.000 (0.009) loss 0.8965 (1.0567) acc 71.8750 (73.0114) lr 7.5131e-04 eta 4:18:52 +epoch [31/50] batch [115/500] time 1.565 (1.571) data 0.001 (0.009) loss 0.6372 (1.0582) acc 78.1250 (72.9620) lr 7.5131e-04 eta 4:18:45 +epoch [31/50] batch [120/500] time 1.565 (1.571) data 0.001 (0.009) loss 1.0859 (1.0606) acc 78.1250 (73.0208) lr 7.5131e-04 eta 4:18:37 +epoch [31/50] batch [125/500] time 1.567 (1.570) data 0.000 (0.008) loss 1.0029 (1.0623) acc 71.8750 (72.8750) lr 7.5131e-04 eta 4:18:27 +epoch [31/50] batch [130/500] time 1.567 (1.570) data 0.001 (0.008) loss 0.5430 (1.0616) acc 81.2500 (73.0529) lr 7.5131e-04 eta 4:18:16 +epoch [31/50] batch [135/500] time 1.581 (1.570) data 0.000 (0.008) loss 1.2949 (1.0609) acc 62.5000 (73.0324) lr 7.5131e-04 eta 4:18:06 +epoch [31/50] batch [140/500] time 1.563 (1.570) data 0.001 (0.008) loss 0.6826 (1.0573) acc 75.0000 (73.0357) lr 7.5131e-04 eta 4:17:59 +epoch [31/50] batch [145/500] time 1.594 (1.571) data 0.000 (0.007) loss 0.9526 (1.0540) acc 75.0000 (73.1034) lr 7.5131e-04 eta 4:17:57 +epoch [31/50] batch [150/500] time 1.562 (1.571) data 0.000 (0.007) loss 0.8545 (1.0538) acc 84.3750 (73.1042) lr 7.5131e-04 eta 4:17:51 +epoch [31/50] batch [155/500] time 1.543 (1.570) data 0.000 (0.007) loss 1.1816 (1.0517) acc 68.7500 (73.1250) lr 7.5131e-04 eta 4:17:38 +epoch [31/50] batch [160/500] time 1.566 (1.570) data 0.000 (0.007) loss 0.5420 (1.0494) acc 87.5000 (73.3594) lr 7.5131e-04 eta 4:17:25 +epoch [31/50] batch [165/500] time 1.555 (1.569) data 0.000 (0.006) loss 1.3574 (1.0465) acc 68.7500 (73.4470) lr 7.5131e-04 eta 4:17:12 +epoch [31/50] batch [170/500] time 1.560 (1.569) data 0.001 (0.006) loss 1.2178 (1.0415) acc 68.7500 (73.4926) lr 7.5131e-04 eta 4:17:01 +epoch [31/50] batch [175/500] time 1.530 (1.568) data 0.000 (0.006) loss 1.2197 (1.0455) acc 71.8750 (73.4821) lr 7.5131e-04 eta 4:16:46 +epoch [31/50] batch [180/500] time 1.555 (1.568) data 0.000 (0.006) loss 0.7422 (1.0465) acc 81.2500 (73.5069) lr 7.5131e-04 eta 4:16:34 +epoch [31/50] batch [185/500] time 1.541 (1.567) data 0.001 (0.006) loss 1.0361 (1.0460) acc 75.0000 (73.4628) lr 7.5131e-04 eta 4:16:22 +epoch [31/50] batch [190/500] time 1.533 (1.567) data 0.000 (0.006) loss 0.7202 (1.0479) acc 78.1250 (73.4211) lr 7.5131e-04 eta 4:16:16 +epoch [31/50] batch [195/500] time 1.563 (1.567) data 0.000 (0.006) loss 1.4141 (1.0529) acc 62.5000 (73.2692) lr 7.5131e-04 eta 4:16:07 +epoch [31/50] batch [200/500] time 1.563 (1.567) data 0.001 (0.005) loss 0.8257 (1.0480) acc 81.2500 (73.3906) lr 7.5131e-04 eta 4:15:55 +epoch [31/50] batch [205/500] time 1.569 (1.567) data 0.001 (0.005) loss 0.9805 (1.0520) acc 71.8750 (73.2622) lr 7.5131e-04 eta 4:15:47 +epoch [31/50] batch [210/500] time 1.590 (1.567) data 0.000 (0.005) loss 0.8857 (1.0484) acc 81.2500 (73.4375) lr 7.5131e-04 eta 4:15:40 +epoch [31/50] batch [215/500] time 1.561 (1.567) data 0.001 (0.005) loss 1.8506 (1.0525) acc 65.6250 (73.4012) lr 7.5131e-04 eta 4:15:31 +epoch [31/50] batch [220/500] time 1.562 (1.567) data 0.000 (0.005) loss 0.8125 (1.0529) acc 81.2500 (73.3239) lr 7.5131e-04 eta 4:15:22 +epoch [31/50] batch [225/500] time 1.560 (1.567) data 0.000 (0.005) loss 0.5845 (1.0534) acc 84.3750 (73.3194) lr 7.5131e-04 eta 4:15:13 +epoch [31/50] batch [230/500] time 1.567 (1.566) data 0.000 (0.005) loss 1.0322 (1.0535) acc 65.6250 (73.2065) lr 7.5131e-04 eta 4:15:03 +epoch [31/50] batch [235/500] time 1.556 (1.566) data 0.000 (0.005) loss 1.3350 (1.0613) acc 68.7500 (73.0053) lr 7.5131e-04 eta 4:14:55 +epoch [31/50] batch [240/500] time 1.554 (1.566) data 0.001 (0.005) loss 1.1133 (1.0620) acc 71.8750 (72.9818) lr 7.5131e-04 eta 4:14:47 +epoch [31/50] batch [245/500] time 1.569 (1.566) data 0.001 (0.005) loss 1.3506 (1.0640) acc 71.8750 (73.0612) lr 7.5131e-04 eta 4:14:39 +epoch [31/50] batch [250/500] time 1.556 (1.566) data 0.000 (0.004) loss 1.3164 (1.0668) acc 59.3750 (72.9750) lr 7.5131e-04 eta 4:14:30 +epoch [31/50] batch [255/500] time 1.550 (1.566) data 0.000 (0.004) loss 1.1865 (1.0705) acc 68.7500 (72.9167) lr 7.5131e-04 eta 4:14:20 +epoch [31/50] batch [260/500] time 1.567 (1.566) data 0.000 (0.004) loss 1.2021 (1.0733) acc 68.7500 (72.8726) lr 7.5131e-04 eta 4:14:12 +epoch [31/50] batch [265/500] time 1.585 (1.566) data 0.001 (0.004) loss 0.7690 (1.0709) acc 81.2500 (72.9363) lr 7.5131e-04 eta 4:14:03 +epoch [31/50] batch [270/500] time 1.584 (1.566) data 0.001 (0.004) loss 1.6240 (1.0740) acc 62.5000 (72.8819) lr 7.5131e-04 eta 4:13:57 +epoch [31/50] batch [275/500] time 1.550 (1.566) data 0.000 (0.004) loss 1.1562 (1.0733) acc 71.8750 (72.9432) lr 7.5131e-04 eta 4:13:48 +epoch [31/50] batch [280/500] time 1.542 (1.566) data 0.000 (0.004) loss 0.8774 (1.0729) acc 71.8750 (72.9241) lr 7.5131e-04 eta 4:13:40 +epoch [31/50] batch [285/500] time 1.666 (1.566) data 0.000 (0.004) loss 2.0078 (1.0756) acc 56.2500 (72.8180) lr 7.5131e-04 eta 4:13:36 +epoch [31/50] batch [290/500] time 1.561 (1.566) data 0.001 (0.004) loss 2.0020 (1.0797) acc 62.5000 (72.7694) lr 7.5131e-04 eta 4:13:28 +epoch [31/50] batch [295/500] time 1.541 (1.566) data 0.001 (0.004) loss 1.0049 (1.0769) acc 87.5000 (72.8496) lr 7.5131e-04 eta 4:13:19 +epoch [31/50] batch [300/500] time 1.564 (1.566) data 0.000 (0.004) loss 1.0449 (1.0744) acc 71.8750 (72.9167) lr 7.5131e-04 eta 4:13:09 +epoch [31/50] batch [305/500] time 1.539 (1.566) data 0.000 (0.004) loss 1.3564 (1.0790) acc 59.3750 (72.7766) lr 7.5131e-04 eta 4:13:03 +epoch [31/50] batch [310/500] time 1.581 (1.566) data 0.001 (0.004) loss 0.9863 (1.0797) acc 81.2500 (72.7823) lr 7.5131e-04 eta 4:12:54 +epoch [31/50] batch [315/500] time 1.539 (1.566) data 0.000 (0.004) loss 1.7324 (1.0864) acc 62.5000 (72.6786) lr 7.5131e-04 eta 4:12:44 +epoch [31/50] batch [320/500] time 1.578 (1.566) data 0.000 (0.004) loss 1.0010 (1.0844) acc 78.1250 (72.7051) lr 7.5131e-04 eta 4:12:35 +epoch [31/50] batch [325/500] time 1.558 (1.566) data 0.000 (0.004) loss 0.7427 (1.0852) acc 75.0000 (72.7019) lr 7.5131e-04 eta 4:12:26 +epoch [31/50] batch [330/500] time 1.558 (1.566) data 0.000 (0.003) loss 0.5664 (1.0840) acc 84.3750 (72.7746) lr 7.5131e-04 eta 4:12:21 +epoch [31/50] batch [335/500] time 1.558 (1.566) data 0.001 (0.003) loss 1.0371 (1.0838) acc 68.7500 (72.7425) lr 7.5131e-04 eta 4:12:14 +epoch [31/50] batch [340/500] time 1.528 (1.566) data 0.000 (0.003) loss 1.4902 (1.0861) acc 62.5000 (72.6930) lr 7.5131e-04 eta 4:12:04 +epoch [31/50] batch [345/500] time 1.552 (1.565) data 0.001 (0.003) loss 2.1699 (1.0867) acc 53.1250 (72.6902) lr 7.5131e-04 eta 4:11:54 +epoch [31/50] batch [350/500] time 1.554 (1.565) data 0.000 (0.003) loss 1.0479 (1.0870) acc 75.0000 (72.7232) lr 7.5131e-04 eta 4:11:45 +epoch [31/50] batch [355/500] time 1.559 (1.565) data 0.000 (0.003) loss 1.6484 (1.0848) acc 50.0000 (72.7377) lr 7.5131e-04 eta 4:11:37 +epoch [31/50] batch [360/500] time 1.587 (1.566) data 0.000 (0.003) loss 0.9194 (1.0827) acc 71.8750 (72.7604) lr 7.5131e-04 eta 4:11:32 +epoch [31/50] batch [365/500] time 1.569 (1.566) data 0.000 (0.003) loss 1.6650 (1.0864) acc 68.7500 (72.6969) lr 7.5131e-04 eta 4:11:24 +epoch [31/50] batch [370/500] time 1.566 (1.566) data 0.000 (0.003) loss 1.0010 (1.0858) acc 78.1250 (72.7196) lr 7.5131e-04 eta 4:11:16 +epoch [31/50] batch [375/500] time 1.577 (1.566) data 0.000 (0.003) loss 1.4561 (1.0881) acc 62.5000 (72.7250) lr 7.5131e-04 eta 4:11:09 +epoch [31/50] batch [380/500] time 1.571 (1.566) data 0.000 (0.003) loss 1.0811 (1.0896) acc 65.6250 (72.6562) lr 7.5131e-04 eta 4:11:02 +epoch [31/50] batch [385/500] time 1.565 (1.566) data 0.000 (0.003) loss 1.1455 (1.0888) acc 75.0000 (72.6948) lr 7.5131e-04 eta 4:10:54 +epoch [31/50] batch [390/500] time 1.558 (1.566) data 0.001 (0.003) loss 1.5352 (1.0919) acc 65.6250 (72.6042) lr 7.5131e-04 eta 4:10:45 +epoch [31/50] batch [395/500] time 1.555 (1.565) data 0.000 (0.003) loss 0.6509 (1.0895) acc 78.1250 (72.6661) lr 7.5131e-04 eta 4:10:35 +epoch [31/50] batch [400/500] time 1.568 (1.565) data 0.000 (0.003) loss 0.8467 (1.0906) acc 78.1250 (72.5938) lr 7.5131e-04 eta 4:10:26 +epoch [31/50] batch [405/500] time 1.550 (1.565) data 0.000 (0.003) loss 1.1855 (1.0904) acc 68.7500 (72.6080) lr 7.5131e-04 eta 4:10:16 +epoch [31/50] batch [410/500] time 1.546 (1.565) data 0.000 (0.003) loss 0.9297 (1.0928) acc 78.1250 (72.5838) lr 7.5131e-04 eta 4:10:08 +epoch [31/50] batch [415/500] time 1.553 (1.565) data 0.000 (0.003) loss 0.8501 (1.0936) acc 71.8750 (72.5678) lr 7.5131e-04 eta 4:09:58 +epoch [31/50] batch [420/500] time 1.566 (1.565) data 0.000 (0.003) loss 1.2217 (1.0933) acc 65.6250 (72.5893) lr 7.5131e-04 eta 4:09:51 +epoch [31/50] batch [425/500] time 1.583 (1.565) data 0.000 (0.003) loss 1.2686 (1.0956) acc 75.0000 (72.5809) lr 7.5131e-04 eta 4:09:44 +epoch [31/50] batch [430/500] time 1.571 (1.565) data 0.000 (0.003) loss 1.1582 (1.0955) acc 65.6250 (72.6163) lr 7.5131e-04 eta 4:09:39 +epoch [31/50] batch [435/500] time 1.526 (1.565) data 0.000 (0.003) loss 0.8076 (1.0943) acc 78.1250 (72.6365) lr 7.5131e-04 eta 4:09:30 +epoch [31/50] batch [440/500] time 1.560 (1.565) data 0.001 (0.003) loss 1.4521 (1.0968) acc 59.3750 (72.5639) lr 7.5131e-04 eta 4:09:21 +epoch [31/50] batch [445/500] time 1.573 (1.565) data 0.000 (0.003) loss 1.1904 (1.0962) acc 68.7500 (72.5281) lr 7.5131e-04 eta 4:09:11 +epoch [31/50] batch [450/500] time 1.545 (1.565) data 0.000 (0.003) loss 0.9077 (1.0948) acc 78.1250 (72.5556) lr 7.5131e-04 eta 4:09:03 +epoch [31/50] batch [455/500] time 1.539 (1.565) data 0.000 (0.003) loss 0.8770 (1.0952) acc 75.0000 (72.5412) lr 7.5131e-04 eta 4:08:54 +epoch [31/50] batch [460/500] time 1.563 (1.564) data 0.000 (0.003) loss 1.2051 (1.0955) acc 71.8750 (72.5272) lr 7.5131e-04 eta 4:08:45 +epoch [31/50] batch [465/500] time 1.561 (1.564) data 0.000 (0.003) loss 0.8052 (1.0957) acc 87.5000 (72.5470) lr 7.5131e-04 eta 4:08:36 +epoch [31/50] batch [470/500] time 1.598 (1.565) data 0.000 (0.003) loss 0.5508 (1.0940) acc 84.3750 (72.6330) lr 7.5131e-04 eta 4:08:29 +epoch [31/50] batch [475/500] time 1.566 (1.565) data 0.000 (0.003) loss 0.6470 (1.0922) acc 78.1250 (72.6842) lr 7.5131e-04 eta 4:08:24 +epoch [31/50] batch [480/500] time 1.555 (1.565) data 0.000 (0.003) loss 1.4639 (1.0942) acc 59.3750 (72.6432) lr 7.5131e-04 eta 4:08:15 +epoch [31/50] batch [485/500] time 1.579 (1.565) data 0.001 (0.002) loss 0.7729 (1.0940) acc 81.2500 (72.6482) lr 7.5131e-04 eta 4:08:08 +epoch [31/50] batch [490/500] time 1.554 (1.565) data 0.000 (0.002) loss 0.8037 (1.0935) acc 68.7500 (72.6403) lr 7.5131e-04 eta 4:07:59 +epoch [31/50] batch [495/500] time 1.540 (1.565) data 0.000 (0.002) loss 1.4971 (1.0957) acc 50.0000 (72.5253) lr 7.5131e-04 eta 4:07:50 +epoch [31/50] batch [500/500] time 1.566 (1.564) data 0.000 (0.002) loss 1.0469 (1.0947) acc 81.2500 (72.5625) lr 6.9098e-04 eta 4:07:41 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,010 +* accuracy: 78.0% +* error: 22.0% +* macro_f1: 77.6% +epoch [32/50] batch [5/500] time 1.564 (1.670) data 0.000 (0.167) loss 1.1035 (1.2072) acc 68.7500 (70.0000) lr 6.9098e-04 eta 4:24:16 +epoch [32/50] batch [10/500] time 1.539 (1.609) data 0.000 (0.084) loss 1.0869 (1.1386) acc 62.5000 (69.6875) lr 6.9098e-04 eta 4:14:32 +epoch [32/50] batch [15/500] time 1.554 (1.592) data 0.001 (0.056) loss 0.5576 (1.1084) acc 78.1250 (71.0417) lr 6.9098e-04 eta 4:11:35 +epoch [32/50] batch [20/500] time 1.562 (1.585) data 0.001 (0.042) loss 1.1738 (1.0792) acc 78.1250 (72.3438) lr 6.9098e-04 eta 4:10:30 +epoch [32/50] batch [25/500] time 1.545 (1.579) data 0.001 (0.034) loss 1.0342 (1.0576) acc 68.7500 (72.0000) lr 6.9098e-04 eta 4:09:16 +epoch [32/50] batch [30/500] time 1.594 (1.577) data 0.000 (0.028) loss 1.1230 (1.0566) acc 68.7500 (72.1875) lr 6.9098e-04 eta 4:08:52 +epoch [32/50] batch [35/500] time 1.563 (1.574) data 0.001 (0.024) loss 0.9331 (1.0694) acc 78.1250 (72.4107) lr 6.9098e-04 eta 4:08:22 +epoch [32/50] batch [40/500] time 1.571 (1.574) data 0.000 (0.021) loss 0.9502 (1.0842) acc 68.7500 (72.1875) lr 6.9098e-04 eta 4:08:13 +epoch [32/50] batch [45/500] time 1.532 (1.573) data 0.001 (0.019) loss 1.2002 (1.0832) acc 71.8750 (72.4306) lr 6.9098e-04 eta 4:07:53 +epoch [32/50] batch [50/500] time 1.532 (1.571) data 0.001 (0.017) loss 1.2539 (1.0746) acc 68.7500 (72.5625) lr 6.9098e-04 eta 4:07:29 +epoch [32/50] batch [55/500] time 1.540 (1.570) data 0.000 (0.016) loss 0.4421 (1.0478) acc 87.5000 (73.0682) lr 6.9098e-04 eta 4:07:06 +epoch [32/50] batch [60/500] time 1.574 (1.568) data 0.001 (0.014) loss 0.7520 (1.0745) acc 81.2500 (72.5521) lr 6.9098e-04 eta 4:06:45 +epoch [32/50] batch [65/500] time 1.537 (1.569) data 0.001 (0.013) loss 1.1152 (1.0725) acc 65.6250 (72.4519) lr 6.9098e-04 eta 4:06:41 +epoch [32/50] batch [70/500] time 1.562 (1.568) data 0.000 (0.012) loss 0.7729 (1.0601) acc 78.1250 (72.7232) lr 6.9098e-04 eta 4:06:25 +epoch [32/50] batch [75/500] time 1.560 (1.567) data 0.000 (0.012) loss 1.4795 (1.0519) acc 68.7500 (73.0833) lr 6.9098e-04 eta 4:06:08 +epoch [32/50] batch [80/500] time 1.540 (1.566) data 0.000 (0.011) loss 0.6650 (1.0481) acc 78.1250 (73.0078) lr 6.9098e-04 eta 4:05:53 +epoch [32/50] batch [85/500] time 1.559 (1.565) data 0.000 (0.010) loss 0.8198 (1.0471) acc 75.0000 (72.8676) lr 6.9098e-04 eta 4:05:36 +epoch [32/50] batch [90/500] time 1.575 (1.565) data 0.000 (0.010) loss 0.9888 (1.0423) acc 78.1250 (73.0903) lr 6.9098e-04 eta 4:05:26 +epoch [32/50] batch [95/500] time 1.550 (1.565) data 0.001 (0.009) loss 0.6250 (1.0483) acc 87.5000 (73.0592) lr 6.9098e-04 eta 4:05:14 +epoch [32/50] batch [100/500] time 1.599 (1.565) data 0.000 (0.009) loss 1.2715 (1.0483) acc 78.1250 (73.1562) lr 6.9098e-04 eta 4:05:11 +epoch [32/50] batch [105/500] time 1.574 (1.565) data 0.000 (0.008) loss 0.9346 (1.0507) acc 71.8750 (73.0655) lr 6.9098e-04 eta 4:05:05 +epoch [32/50] batch [110/500] time 1.524 (1.566) data 0.000 (0.008) loss 0.4937 (1.0434) acc 87.5000 (73.2955) lr 6.9098e-04 eta 4:05:00 +epoch [32/50] batch [115/500] time 1.571 (1.565) data 0.000 (0.008) loss 1.4893 (1.0494) acc 68.7500 (73.1793) lr 6.9098e-04 eta 4:04:47 +epoch [32/50] batch [120/500] time 1.534 (1.565) data 0.001 (0.007) loss 1.2080 (1.0535) acc 68.7500 (73.0990) lr 6.9098e-04 eta 4:04:36 +epoch [32/50] batch [125/500] time 1.555 (1.565) data 0.000 (0.007) loss 1.2969 (1.0482) acc 68.7500 (73.3000) lr 6.9098e-04 eta 4:04:27 +epoch [32/50] batch [130/500] time 1.551 (1.565) data 0.000 (0.007) loss 0.8281 (1.0387) acc 78.1250 (73.5337) lr 6.9098e-04 eta 4:04:20 +epoch [32/50] batch [135/500] time 1.552 (1.564) data 0.000 (0.007) loss 1.1426 (1.0393) acc 65.6250 (73.4491) lr 6.9098e-04 eta 4:04:10 +epoch [32/50] batch [140/500] time 1.560 (1.564) data 0.000 (0.006) loss 1.1064 (1.0462) acc 75.0000 (73.2812) lr 6.9098e-04 eta 4:03:56 +epoch [32/50] batch [145/500] time 1.578 (1.564) data 0.001 (0.006) loss 1.5098 (1.0499) acc 62.5000 (73.2328) lr 6.9098e-04 eta 4:03:52 +epoch [32/50] batch [150/500] time 1.543 (1.564) data 0.000 (0.006) loss 0.5737 (1.0487) acc 78.1250 (73.2083) lr 6.9098e-04 eta 4:03:43 +epoch [32/50] batch [155/500] time 1.569 (1.564) data 0.000 (0.006) loss 0.8960 (1.0470) acc 84.3750 (73.3065) lr 6.9098e-04 eta 4:03:34 +epoch [32/50] batch [160/500] time 1.573 (1.564) data 0.000 (0.006) loss 1.5518 (1.0488) acc 68.7500 (73.2812) lr 6.9098e-04 eta 4:03:26 +epoch [32/50] batch [165/500] time 1.550 (1.564) data 0.001 (0.006) loss 1.0605 (1.0435) acc 62.5000 (73.3523) lr 6.9098e-04 eta 4:03:16 +epoch [32/50] batch [170/500] time 1.541 (1.563) data 0.001 (0.005) loss 1.6152 (1.0475) acc 59.3750 (73.2904) lr 6.9098e-04 eta 4:03:03 +epoch [32/50] batch [175/500] time 1.555 (1.563) data 0.000 (0.005) loss 1.4658 (1.0546) acc 59.3750 (73.0357) lr 6.9098e-04 eta 4:02:50 +epoch [32/50] batch [180/500] time 1.578 (1.563) data 0.000 (0.005) loss 1.2754 (1.0541) acc 62.5000 (72.8646) lr 6.9098e-04 eta 4:02:43 +epoch [32/50] batch [185/500] time 1.569 (1.563) data 0.000 (0.005) loss 1.4736 (1.0549) acc 68.7500 (72.8885) lr 6.9098e-04 eta 4:02:34 +epoch [32/50] batch [190/500] time 1.597 (1.563) data 0.000 (0.005) loss 0.9092 (1.0509) acc 71.8750 (72.9770) lr 6.9098e-04 eta 4:02:27 +epoch [32/50] batch [195/500] time 1.563 (1.563) data 0.000 (0.005) loss 0.7148 (1.0499) acc 84.3750 (72.9487) lr 6.9098e-04 eta 4:02:20 +epoch [32/50] batch [200/500] time 1.545 (1.562) data 0.000 (0.005) loss 1.3418 (1.0598) acc 75.0000 (72.8438) lr 6.9098e-04 eta 4:02:11 +epoch [32/50] batch [205/500] time 1.622 (1.563) data 0.000 (0.005) loss 1.4316 (1.0641) acc 68.7500 (72.8506) lr 6.9098e-04 eta 4:02:05 +epoch [32/50] batch [210/500] time 1.567 (1.563) data 0.000 (0.004) loss 1.3291 (1.0692) acc 62.5000 (72.7381) lr 6.9098e-04 eta 4:01:59 +epoch [32/50] batch [215/500] time 1.587 (1.563) data 0.000 (0.004) loss 0.6045 (1.0670) acc 84.3750 (72.8052) lr 6.9098e-04 eta 4:01:54 +epoch [32/50] batch [220/500] time 1.560 (1.563) data 0.000 (0.004) loss 0.6895 (1.0644) acc 81.2500 (72.7841) lr 6.9098e-04 eta 4:01:47 +epoch [32/50] batch [225/500] time 1.558 (1.563) data 0.000 (0.004) loss 1.2783 (1.0674) acc 71.8750 (72.7639) lr 6.9098e-04 eta 4:01:40 +epoch [32/50] batch [230/500] time 1.586 (1.564) data 0.001 (0.004) loss 0.9053 (1.0655) acc 68.7500 (72.7989) lr 6.9098e-04 eta 4:01:34 +epoch [32/50] batch [235/500] time 1.566 (1.564) data 0.000 (0.004) loss 0.8257 (1.0656) acc 75.0000 (72.6995) lr 6.9098e-04 eta 4:01:26 +epoch [32/50] batch [240/500] time 1.565 (1.563) data 0.000 (0.004) loss 1.0273 (1.0618) acc 78.1250 (72.8516) lr 6.9098e-04 eta 4:01:15 +epoch [32/50] batch [245/500] time 1.583 (1.564) data 0.000 (0.004) loss 1.3994 (1.0677) acc 68.7500 (72.7679) lr 6.9098e-04 eta 4:01:10 +epoch [32/50] batch [250/500] time 1.586 (1.564) data 0.000 (0.004) loss 1.0703 (1.0664) acc 71.8750 (72.8500) lr 6.9098e-04 eta 4:01:06 +epoch [32/50] batch [255/500] time 1.573 (1.564) data 0.000 (0.004) loss 1.1455 (1.0663) acc 68.7500 (72.7819) lr 6.9098e-04 eta 4:00:57 +epoch [32/50] batch [260/500] time 1.583 (1.564) data 0.000 (0.004) loss 0.7144 (1.0699) acc 75.0000 (72.7524) lr 6.9098e-04 eta 4:00:51 +epoch [32/50] batch [265/500] time 1.555 (1.564) data 0.000 (0.004) loss 1.0947 (1.0694) acc 78.1250 (72.8420) lr 6.9098e-04 eta 4:00:43 +epoch [32/50] batch [270/500] time 1.571 (1.564) data 0.000 (0.004) loss 1.3057 (1.0713) acc 68.7500 (72.8009) lr 6.9098e-04 eta 4:00:35 +epoch [32/50] batch [275/500] time 1.567 (1.564) data 0.000 (0.003) loss 1.1611 (1.0705) acc 62.5000 (72.7159) lr 6.9098e-04 eta 4:00:29 +epoch [32/50] batch [280/500] time 1.560 (1.564) data 0.000 (0.003) loss 1.1748 (1.0684) acc 75.0000 (72.7567) lr 6.9098e-04 eta 4:00:20 +epoch [32/50] batch [285/500] time 1.561 (1.564) data 0.000 (0.003) loss 0.6763 (1.0661) acc 87.5000 (72.7961) lr 6.9098e-04 eta 4:00:12 +epoch [32/50] batch [290/500] time 1.564 (1.564) data 0.000 (0.003) loss 0.7354 (1.0667) acc 78.1250 (72.7478) lr 6.9098e-04 eta 4:00:03 +epoch [32/50] batch [295/500] time 1.539 (1.564) data 0.000 (0.003) loss 1.2578 (1.0684) acc 65.6250 (72.6589) lr 6.9098e-04 eta 3:59:52 +epoch [32/50] batch [300/500] time 1.562 (1.564) data 0.000 (0.003) loss 1.3301 (1.0715) acc 62.5000 (72.6458) lr 6.9098e-04 eta 3:59:44 +epoch [32/50] batch [305/500] time 1.570 (1.563) data 0.000 (0.003) loss 0.7007 (1.0747) acc 81.2500 (72.6127) lr 6.9098e-04 eta 3:59:34 +epoch [32/50] batch [310/500] time 1.562 (1.563) data 0.000 (0.003) loss 1.1445 (1.0741) acc 65.6250 (72.6210) lr 6.9098e-04 eta 3:59:26 +epoch [32/50] batch [315/500] time 1.561 (1.563) data 0.000 (0.003) loss 1.3711 (1.0753) acc 75.0000 (72.6786) lr 6.9098e-04 eta 3:59:19 +epoch [32/50] batch [320/500] time 1.554 (1.563) data 0.000 (0.003) loss 1.5781 (1.0751) acc 65.6250 (72.6758) lr 6.9098e-04 eta 3:59:09 +epoch [32/50] batch [325/500] time 1.575 (1.563) data 0.000 (0.003) loss 1.0322 (1.0779) acc 78.1250 (72.6250) lr 6.9098e-04 eta 3:59:01 +epoch [32/50] batch [330/500] time 1.561 (1.563) data 0.000 (0.003) loss 1.1045 (1.0764) acc 75.0000 (72.6799) lr 6.9098e-04 eta 3:58:54 +epoch [32/50] batch [335/500] time 1.560 (1.563) data 0.000 (0.003) loss 0.7959 (1.0759) acc 78.1250 (72.6772) lr 6.9098e-04 eta 3:58:45 +epoch [32/50] batch [340/500] time 1.575 (1.563) data 0.000 (0.003) loss 1.1387 (1.0778) acc 75.0000 (72.6379) lr 6.9098e-04 eta 3:58:37 +epoch [32/50] batch [345/500] time 1.558 (1.563) data 0.000 (0.003) loss 1.7734 (1.0790) acc 59.3750 (72.6178) lr 6.9098e-04 eta 3:58:28 +epoch [32/50] batch [350/500] time 1.574 (1.563) data 0.000 (0.003) loss 0.8022 (1.0796) acc 87.5000 (72.6786) lr 6.9098e-04 eta 3:58:23 +epoch [32/50] batch [355/500] time 1.578 (1.563) data 0.000 (0.003) loss 0.9707 (1.0813) acc 81.2500 (72.7113) lr 6.9098e-04 eta 3:58:17 +epoch [32/50] batch [360/500] time 1.577 (1.563) data 0.000 (0.003) loss 1.2305 (1.0774) acc 68.7500 (72.7517) lr 6.9098e-04 eta 3:58:09 +epoch [32/50] batch [365/500] time 1.566 (1.563) data 0.000 (0.003) loss 1.0488 (1.0805) acc 75.0000 (72.6798) lr 6.9098e-04 eta 3:58:00 +epoch [32/50] batch [370/500] time 1.531 (1.563) data 0.001 (0.003) loss 1.0488 (1.0805) acc 71.8750 (72.6943) lr 6.9098e-04 eta 3:57:52 +epoch [32/50] batch [375/500] time 1.576 (1.563) data 0.000 (0.003) loss 1.2422 (1.0800) acc 65.6250 (72.7083) lr 6.9098e-04 eta 3:57:44 +epoch [32/50] batch [380/500] time 1.546 (1.563) data 0.000 (0.003) loss 0.9365 (1.0782) acc 81.2500 (72.7467) lr 6.9098e-04 eta 3:57:36 +epoch [32/50] batch [385/500] time 1.556 (1.563) data 0.000 (0.003) loss 1.5156 (1.0803) acc 59.3750 (72.6867) lr 6.9098e-04 eta 3:57:28 +epoch [32/50] batch [390/500] time 1.557 (1.563) data 0.000 (0.003) loss 1.2129 (1.0818) acc 71.8750 (72.6603) lr 6.9098e-04 eta 3:57:19 +epoch [32/50] batch [395/500] time 1.562 (1.563) data 0.000 (0.002) loss 0.6182 (1.0808) acc 84.3750 (72.7215) lr 6.9098e-04 eta 3:57:14 +epoch [32/50] batch [400/500] time 1.561 (1.563) data 0.000 (0.002) loss 0.8521 (1.0789) acc 81.2500 (72.7656) lr 6.9098e-04 eta 3:57:04 +epoch [32/50] batch [405/500] time 1.548 (1.563) data 0.000 (0.002) loss 0.6426 (1.0781) acc 75.0000 (72.7855) lr 6.9098e-04 eta 3:56:57 +epoch [32/50] batch [410/500] time 1.546 (1.563) data 0.000 (0.002) loss 0.9868 (1.0751) acc 71.8750 (72.8277) lr 6.9098e-04 eta 3:56:49 +epoch [32/50] batch [415/500] time 1.561 (1.563) data 0.000 (0.002) loss 0.7832 (1.0726) acc 75.0000 (72.8690) lr 6.9098e-04 eta 3:56:41 +epoch [32/50] batch [420/500] time 1.557 (1.563) data 0.000 (0.002) loss 0.7656 (1.0703) acc 84.3750 (72.9241) lr 6.9098e-04 eta 3:56:33 +epoch [32/50] batch [425/500] time 1.556 (1.563) data 0.000 (0.002) loss 1.8447 (1.0710) acc 59.3750 (72.8971) lr 6.9098e-04 eta 3:56:23 +epoch [32/50] batch [430/500] time 1.559 (1.563) data 0.000 (0.002) loss 1.2119 (1.0722) acc 71.8750 (72.8488) lr 6.9098e-04 eta 3:56:15 +epoch [32/50] batch [435/500] time 1.545 (1.563) data 0.000 (0.002) loss 1.4707 (1.0744) acc 68.7500 (72.8161) lr 6.9098e-04 eta 3:56:07 +epoch [32/50] batch [440/500] time 1.558 (1.563) data 0.000 (0.002) loss 0.6177 (1.0754) acc 81.2500 (72.7912) lr 6.9098e-04 eta 3:55:59 +epoch [32/50] batch [445/500] time 1.591 (1.563) data 0.000 (0.002) loss 1.4678 (1.0776) acc 71.8750 (72.7177) lr 6.9098e-04 eta 3:55:50 +epoch [32/50] batch [450/500] time 1.554 (1.563) data 0.000 (0.002) loss 1.2158 (1.0769) acc 71.8750 (72.7639) lr 6.9098e-04 eta 3:55:43 +epoch [32/50] batch [455/500] time 1.560 (1.563) data 0.000 (0.002) loss 1.4434 (1.0775) acc 65.6250 (72.7473) lr 6.9098e-04 eta 3:55:35 +epoch [32/50] batch [460/500] time 1.566 (1.563) data 0.000 (0.002) loss 1.6025 (1.0790) acc 50.0000 (72.6902) lr 6.9098e-04 eta 3:55:27 +epoch [32/50] batch [465/500] time 1.568 (1.563) data 0.000 (0.002) loss 1.0557 (1.0765) acc 75.0000 (72.7755) lr 6.9098e-04 eta 3:55:19 +epoch [32/50] batch [470/500] time 1.555 (1.563) data 0.000 (0.002) loss 1.2363 (1.0790) acc 68.7500 (72.7527) lr 6.9098e-04 eta 3:55:11 +epoch [32/50] batch [475/500] time 1.557 (1.563) data 0.000 (0.002) loss 0.9082 (1.0814) acc 75.0000 (72.7237) lr 6.9098e-04 eta 3:55:03 +epoch [32/50] batch [480/500] time 1.586 (1.563) data 0.000 (0.002) loss 1.6289 (1.0806) acc 78.1250 (72.7734) lr 6.9098e-04 eta 3:54:55 +epoch [32/50] batch [485/500] time 1.544 (1.563) data 0.001 (0.002) loss 0.8623 (1.0790) acc 75.0000 (72.8028) lr 6.9098e-04 eta 3:54:45 +epoch [32/50] batch [490/500] time 1.557 (1.562) data 0.000 (0.002) loss 0.8257 (1.0785) acc 78.1250 (72.8125) lr 6.9098e-04 eta 3:54:37 +epoch [32/50] batch [495/500] time 1.586 (1.563) data 0.000 (0.002) loss 1.0889 (1.0788) acc 71.8750 (72.7967) lr 6.9098e-04 eta 3:54:31 +epoch [32/50] batch [500/500] time 1.557 (1.563) data 0.000 (0.002) loss 1.4375 (1.0793) acc 71.8750 (72.7812) lr 6.3188e-04 eta 3:54:22 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,083 +* accuracy: 78.2% +* error: 21.8% +* macro_f1: 77.7% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar +epoch [33/50] batch [5/500] time 1.539 (1.643) data 0.000 (0.156) loss 0.9487 (1.1862) acc 78.1250 (70.0000) lr 6.3188e-04 eta 4:06:20 +epoch [33/50] batch [10/500] time 1.562 (1.600) data 0.001 (0.078) loss 0.8979 (1.1542) acc 81.2500 (71.5625) lr 6.3188e-04 eta 3:59:39 +epoch [33/50] batch [15/500] time 1.564 (1.586) data 0.000 (0.052) loss 1.0078 (1.0964) acc 68.7500 (72.5000) lr 6.3188e-04 eta 3:57:27 +epoch [33/50] batch [20/500] time 1.562 (1.581) data 0.000 (0.039) loss 1.4004 (1.1304) acc 56.2500 (71.2500) lr 6.3188e-04 eta 3:56:40 +epoch [33/50] batch [25/500] time 1.570 (1.579) data 0.000 (0.032) loss 0.5640 (1.0452) acc 84.3750 (72.8750) lr 6.3188e-04 eta 3:56:11 +epoch [33/50] batch [30/500] time 1.583 (1.577) data 0.000 (0.026) loss 1.0566 (1.0753) acc 78.1250 (72.1875) lr 6.3188e-04 eta 3:55:47 +epoch [33/50] batch [35/500] time 1.551 (1.575) data 0.000 (0.023) loss 1.5713 (1.1019) acc 56.2500 (72.1429) lr 6.3188e-04 eta 3:55:22 +epoch [33/50] batch [40/500] time 1.559 (1.577) data 0.000 (0.020) loss 0.9424 (1.0596) acc 75.0000 (73.2031) lr 6.3188e-04 eta 3:55:32 +epoch [33/50] batch [45/500] time 1.575 (1.577) data 0.000 (0.018) loss 0.5483 (1.0471) acc 87.5000 (73.2639) lr 6.3188e-04 eta 3:55:18 +epoch [33/50] batch [50/500] time 1.541 (1.575) data 0.000 (0.016) loss 1.0830 (1.0400) acc 65.6250 (73.5000) lr 6.3188e-04 eta 3:54:55 +epoch [33/50] batch [55/500] time 1.543 (1.573) data 0.001 (0.015) loss 1.0986 (1.0487) acc 78.1250 (73.3523) lr 6.3188e-04 eta 3:54:27 +epoch [33/50] batch [60/500] time 1.573 (1.571) data 0.000 (0.013) loss 0.8994 (1.0457) acc 75.0000 (73.3333) lr 6.3188e-04 eta 3:54:08 +epoch [33/50] batch [65/500] time 1.551 (1.570) data 0.000 (0.012) loss 0.8994 (1.0542) acc 78.1250 (73.2692) lr 6.3188e-04 eta 3:53:50 +epoch [33/50] batch [70/500] time 1.557 (1.569) data 0.001 (0.012) loss 1.2129 (1.0565) acc 68.7500 (73.1696) lr 6.3188e-04 eta 3:53:33 +epoch [33/50] batch [75/500] time 1.563 (1.569) data 0.000 (0.011) loss 0.9077 (1.0394) acc 84.3750 (73.8333) lr 6.3188e-04 eta 3:53:25 +epoch [33/50] batch [80/500] time 1.585 (1.570) data 0.000 (0.010) loss 1.4443 (1.0334) acc 68.7500 (73.8672) lr 6.3188e-04 eta 3:53:26 +epoch [33/50] batch [85/500] time 1.554 (1.570) data 0.001 (0.010) loss 0.6646 (1.0334) acc 81.2500 (73.8603) lr 6.3188e-04 eta 3:53:12 +epoch [33/50] batch [90/500] time 1.565 (1.569) data 0.001 (0.009) loss 1.2695 (1.0331) acc 65.6250 (73.8542) lr 6.3188e-04 eta 3:53:02 +epoch [33/50] batch [95/500] time 1.560 (1.569) data 0.001 (0.009) loss 1.6455 (1.0386) acc 71.8750 (73.8816) lr 6.3188e-04 eta 3:52:52 +epoch [33/50] batch [100/500] time 1.572 (1.569) data 0.000 (0.008) loss 0.8813 (1.0413) acc 75.0000 (73.6875) lr 6.3188e-04 eta 3:52:44 +epoch [33/50] batch [105/500] time 1.549 (1.569) data 0.000 (0.008) loss 1.6006 (1.0385) acc 56.2500 (73.7798) lr 6.3188e-04 eta 3:52:32 +epoch [33/50] batch [110/500] time 1.554 (1.568) data 0.000 (0.008) loss 1.2148 (1.0453) acc 71.8750 (73.7216) lr 6.3188e-04 eta 3:52:21 +epoch [33/50] batch [115/500] time 1.569 (1.568) data 0.000 (0.007) loss 0.9287 (1.0461) acc 75.0000 (73.8315) lr 6.3188e-04 eta 3:52:11 +epoch [33/50] batch [120/500] time 1.561 (1.568) data 0.001 (0.007) loss 0.9839 (1.0469) acc 71.8750 (73.8281) lr 6.3188e-04 eta 3:52:01 +epoch [33/50] batch [125/500] time 1.535 (1.567) data 0.001 (0.007) loss 1.3623 (1.0452) acc 68.7500 (73.9000) lr 6.3188e-04 eta 3:51:48 +epoch [33/50] batch [130/500] time 1.570 (1.567) data 0.001 (0.006) loss 1.1924 (1.0522) acc 81.2500 (73.8462) lr 6.3188e-04 eta 3:51:41 +epoch [33/50] batch [135/500] time 1.560 (1.567) data 0.000 (0.006) loss 0.5439 (1.0400) acc 81.2500 (74.0046) lr 6.3188e-04 eta 3:51:30 +epoch [33/50] batch [140/500] time 1.573 (1.568) data 0.000 (0.006) loss 0.8921 (1.0310) acc 75.0000 (74.1964) lr 6.3188e-04 eta 3:51:28 +epoch [33/50] batch [145/500] time 1.564 (1.567) data 0.001 (0.006) loss 0.6802 (1.0276) acc 87.5000 (74.3750) lr 6.3188e-04 eta 3:51:16 +epoch [33/50] batch [150/500] time 1.581 (1.567) data 0.000 (0.006) loss 0.2839 (1.0245) acc 93.7500 (74.5208) lr 6.3188e-04 eta 3:51:05 +epoch [33/50] batch [155/500] time 1.560 (1.567) data 0.000 (0.005) loss 1.1416 (1.0210) acc 62.5000 (74.6573) lr 6.3188e-04 eta 3:50:56 +epoch [33/50] batch [160/500] time 1.567 (1.567) data 0.001 (0.005) loss 1.2266 (1.0200) acc 65.6250 (74.6484) lr 6.3188e-04 eta 3:50:50 +epoch [33/50] batch [165/500] time 1.558 (1.567) data 0.000 (0.005) loss 0.9702 (1.0224) acc 78.1250 (74.6023) lr 6.3188e-04 eta 3:50:40 +epoch [33/50] batch [170/500] time 1.562 (1.567) data 0.000 (0.005) loss 1.2959 (1.0236) acc 59.3750 (74.6140) lr 6.3188e-04 eta 3:50:33 +epoch [33/50] batch [175/500] time 1.546 (1.566) data 0.001 (0.005) loss 1.1787 (1.0282) acc 71.8750 (74.4643) lr 6.3188e-04 eta 3:50:24 +epoch [33/50] batch [180/500] time 1.565 (1.566) data 0.000 (0.005) loss 1.3896 (1.0333) acc 68.7500 (74.3056) lr 6.3188e-04 eta 3:50:13 +epoch [33/50] batch [185/500] time 1.539 (1.566) data 0.001 (0.005) loss 1.0332 (1.0326) acc 75.0000 (74.3412) lr 6.3188e-04 eta 3:50:04 +epoch [33/50] batch [190/500] time 1.573 (1.566) data 0.001 (0.005) loss 2.2070 (1.0370) acc 46.8750 (74.1941) lr 6.3188e-04 eta 3:49:55 +epoch [33/50] batch [195/500] time 1.554 (1.566) data 0.001 (0.004) loss 1.1318 (1.0412) acc 68.7500 (74.0865) lr 6.3188e-04 eta 3:49:47 +epoch [33/50] batch [200/500] time 1.573 (1.566) data 0.000 (0.004) loss 0.9995 (1.0411) acc 71.8750 (74.0000) lr 6.3188e-04 eta 3:49:39 +epoch [33/50] batch [205/500] time 1.551 (1.566) data 0.001 (0.004) loss 0.7979 (1.0408) acc 75.0000 (73.9177) lr 6.3188e-04 eta 3:49:31 +epoch [33/50] batch [210/500] time 1.557 (1.566) data 0.000 (0.004) loss 1.0195 (1.0434) acc 78.1250 (73.8988) lr 6.3188e-04 eta 3:49:21 +epoch [33/50] batch [215/500] time 1.561 (1.566) data 0.000 (0.004) loss 1.1533 (1.0487) acc 71.8750 (73.7064) lr 6.3188e-04 eta 3:49:13 +epoch [33/50] batch [220/500] time 1.556 (1.566) data 0.000 (0.004) loss 0.9238 (1.0497) acc 71.8750 (73.7358) lr 6.3188e-04 eta 3:49:05 +epoch [33/50] batch [225/500] time 1.551 (1.566) data 0.000 (0.004) loss 1.1680 (1.0490) acc 65.6250 (73.7639) lr 6.3188e-04 eta 3:48:57 +epoch [33/50] batch [230/500] time 1.548 (1.565) data 0.000 (0.004) loss 1.3447 (1.0479) acc 62.5000 (73.7772) lr 6.3188e-04 eta 3:48:47 +epoch [33/50] batch [235/500] time 1.547 (1.565) data 0.000 (0.004) loss 1.0615 (1.0492) acc 78.1250 (73.7633) lr 6.3188e-04 eta 3:48:37 +epoch [33/50] batch [240/500] time 1.550 (1.565) data 0.000 (0.004) loss 1.1250 (1.0457) acc 75.0000 (73.8802) lr 6.3188e-04 eta 3:48:27 +epoch [33/50] batch [245/500] time 1.560 (1.565) data 0.000 (0.004) loss 1.1631 (1.0455) acc 59.3750 (73.8648) lr 6.3188e-04 eta 3:48:17 +epoch [33/50] batch [250/500] time 1.581 (1.564) data 0.000 (0.004) loss 0.5503 (1.0472) acc 71.8750 (73.8375) lr 6.3188e-04 eta 3:48:08 +epoch [33/50] batch [255/500] time 1.578 (1.564) data 0.000 (0.004) loss 1.1250 (1.0471) acc 71.8750 (73.8725) lr 6.3188e-04 eta 3:48:00 +epoch [33/50] batch [260/500] time 1.584 (1.564) data 0.000 (0.003) loss 1.2559 (1.0480) acc 65.6250 (73.8822) lr 6.3188e-04 eta 3:47:53 +epoch [33/50] batch [265/500] time 1.568 (1.564) data 0.001 (0.003) loss 1.0762 (1.0450) acc 62.5000 (73.8443) lr 6.3188e-04 eta 3:47:45 +epoch [33/50] batch [270/500] time 1.556 (1.564) data 0.000 (0.003) loss 1.1191 (1.0468) acc 81.2500 (73.7847) lr 6.3188e-04 eta 3:47:36 +epoch [33/50] batch [275/500] time 1.551 (1.564) data 0.001 (0.003) loss 1.2666 (1.0486) acc 78.1250 (73.8182) lr 6.3188e-04 eta 3:47:28 +epoch [33/50] batch [280/500] time 1.729 (1.565) data 0.000 (0.003) loss 0.8574 (1.0498) acc 81.2500 (73.7835) lr 6.3188e-04 eta 3:47:26 +epoch [33/50] batch [285/500] time 1.575 (1.565) data 0.000 (0.003) loss 1.3857 (1.0524) acc 71.8750 (73.7829) lr 6.3188e-04 eta 3:47:18 +epoch [33/50] batch [290/500] time 1.561 (1.565) data 0.001 (0.003) loss 0.9912 (1.0517) acc 78.1250 (73.8470) lr 6.3188e-04 eta 3:47:11 +epoch [33/50] batch [295/500] time 1.592 (1.565) data 0.001 (0.003) loss 0.7354 (1.0502) acc 84.3750 (73.8983) lr 6.3188e-04 eta 3:47:02 +epoch [33/50] batch [300/500] time 1.561 (1.565) data 0.000 (0.003) loss 1.4404 (1.0515) acc 68.7500 (73.9167) lr 6.3188e-04 eta 3:46:53 +epoch [33/50] batch [305/500] time 1.548 (1.565) data 0.000 (0.003) loss 1.4131 (1.0534) acc 68.7500 (73.8934) lr 6.3188e-04 eta 3:46:44 +epoch [33/50] batch [310/500] time 1.532 (1.565) data 0.000 (0.003) loss 0.8159 (1.0513) acc 71.8750 (73.8911) lr 6.3188e-04 eta 3:46:35 +epoch [33/50] batch [315/500] time 1.579 (1.564) data 0.000 (0.003) loss 0.8784 (1.0495) acc 81.2500 (73.9286) lr 6.3188e-04 eta 3:46:27 +epoch [33/50] batch [320/500] time 1.560 (1.564) data 0.000 (0.003) loss 1.3525 (1.0481) acc 65.6250 (73.9062) lr 6.3188e-04 eta 3:46:18 +epoch [33/50] batch [325/500] time 1.564 (1.565) data 0.001 (0.003) loss 0.3372 (1.0466) acc 93.7500 (73.9038) lr 6.3188e-04 eta 3:46:15 +epoch [33/50] batch [330/500] time 1.557 (1.565) data 0.001 (0.003) loss 0.6543 (1.0464) acc 78.1250 (73.9394) lr 6.3188e-04 eta 3:46:05 +epoch [33/50] batch [335/500] time 1.578 (1.565) data 0.000 (0.003) loss 0.8633 (1.0448) acc 78.1250 (73.9832) lr 6.3188e-04 eta 3:45:59 +epoch [33/50] batch [340/500] time 1.576 (1.565) data 0.000 (0.003) loss 0.5996 (1.0425) acc 81.2500 (73.9982) lr 6.3188e-04 eta 3:45:51 +epoch [33/50] batch [345/500] time 1.552 (1.565) data 0.000 (0.003) loss 1.0469 (1.0466) acc 68.7500 (73.9221) lr 6.3188e-04 eta 3:45:42 +epoch [33/50] batch [350/500] time 1.559 (1.565) data 0.000 (0.003) loss 0.6206 (1.0464) acc 78.1250 (73.8750) lr 6.3188e-04 eta 3:45:34 +epoch [33/50] batch [355/500] time 1.576 (1.564) data 0.000 (0.003) loss 0.9937 (1.0500) acc 75.0000 (73.7764) lr 6.3188e-04 eta 3:45:24 +epoch [33/50] batch [360/500] time 1.541 (1.564) data 0.000 (0.003) loss 1.0225 (1.0523) acc 68.7500 (73.6979) lr 6.3188e-04 eta 3:45:16 +epoch [33/50] batch [365/500] time 1.574 (1.565) data 0.000 (0.003) loss 1.0283 (1.0485) acc 71.8750 (73.8014) lr 6.3188e-04 eta 3:45:09 +epoch [33/50] batch [370/500] time 1.563 (1.565) data 0.000 (0.003) loss 1.2324 (1.0521) acc 65.6250 (73.6993) lr 6.3188e-04 eta 3:45:01 +epoch [33/50] batch [375/500] time 1.561 (1.564) data 0.000 (0.003) loss 0.4458 (1.0524) acc 84.3750 (73.6667) lr 6.3188e-04 eta 3:44:52 +epoch [33/50] batch [380/500] time 1.556 (1.564) data 0.000 (0.003) loss 1.1914 (1.0515) acc 71.8750 (73.6760) lr 6.3188e-04 eta 3:44:43 +epoch [33/50] batch [385/500] time 1.597 (1.564) data 0.000 (0.002) loss 0.8804 (1.0489) acc 81.2500 (73.7175) lr 6.3188e-04 eta 3:44:35 +epoch [33/50] batch [390/500] time 1.562 (1.564) data 0.000 (0.002) loss 1.2354 (1.0479) acc 68.7500 (73.7019) lr 6.3188e-04 eta 3:44:27 +epoch [33/50] batch [395/500] time 1.558 (1.564) data 0.001 (0.002) loss 0.8027 (1.0468) acc 78.1250 (73.6946) lr 6.3188e-04 eta 3:44:19 +epoch [33/50] batch [400/500] time 1.556 (1.564) data 0.000 (0.002) loss 0.9331 (1.0479) acc 75.0000 (73.6719) lr 6.3188e-04 eta 3:44:10 +epoch [33/50] batch [405/500] time 1.551 (1.564) data 0.000 (0.002) loss 1.2744 (1.0511) acc 71.8750 (73.6034) lr 6.3188e-04 eta 3:44:02 +epoch [33/50] batch [410/500] time 1.567 (1.564) data 0.001 (0.002) loss 0.9155 (1.0511) acc 78.1250 (73.6280) lr 6.3188e-04 eta 3:43:53 +epoch [33/50] batch [415/500] time 1.544 (1.564) data 0.000 (0.002) loss 1.1963 (1.0517) acc 75.0000 (73.5994) lr 6.3188e-04 eta 3:43:45 +epoch [33/50] batch [420/500] time 1.570 (1.564) data 0.000 (0.002) loss 0.8618 (1.0514) acc 75.0000 (73.5789) lr 6.3188e-04 eta 3:43:36 +epoch [33/50] batch [425/500] time 1.550 (1.564) data 0.000 (0.002) loss 0.9150 (1.0503) acc 84.3750 (73.5956) lr 6.3188e-04 eta 3:43:30 +epoch [33/50] batch [430/500] time 1.554 (1.564) data 0.000 (0.002) loss 1.0527 (1.0479) acc 78.1250 (73.6773) lr 6.3188e-04 eta 3:43:21 +epoch [33/50] batch [435/500] time 1.559 (1.564) data 0.001 (0.002) loss 1.2070 (1.0480) acc 75.0000 (73.6997) lr 6.3188e-04 eta 3:43:13 +epoch [33/50] batch [440/500] time 1.564 (1.564) data 0.000 (0.002) loss 0.9976 (1.0494) acc 78.1250 (73.6932) lr 6.3188e-04 eta 3:43:04 +epoch [33/50] batch [445/500] time 1.547 (1.564) data 0.000 (0.002) loss 1.7061 (1.0515) acc 59.3750 (73.6587) lr 6.3188e-04 eta 3:42:55 +epoch [33/50] batch [450/500] time 1.563 (1.564) data 0.000 (0.002) loss 1.2041 (1.0509) acc 75.0000 (73.6736) lr 6.3188e-04 eta 3:42:48 +epoch [33/50] batch [455/500] time 1.557 (1.564) data 0.001 (0.002) loss 0.9946 (1.0522) acc 75.0000 (73.6058) lr 6.3188e-04 eta 3:42:41 +epoch [33/50] batch [460/500] time 1.555 (1.564) data 0.000 (0.002) loss 1.5566 (1.0532) acc 75.0000 (73.5870) lr 6.3188e-04 eta 3:42:32 +epoch [33/50] batch [465/500] time 1.590 (1.564) data 0.000 (0.002) loss 1.2793 (1.0556) acc 71.8750 (73.5484) lr 6.3188e-04 eta 3:42:25 +epoch [33/50] batch [470/500] time 1.532 (1.564) data 0.000 (0.002) loss 1.0703 (1.0550) acc 71.8750 (73.5505) lr 6.3188e-04 eta 3:42:18 +epoch [33/50] batch [475/500] time 1.555 (1.564) data 0.000 (0.002) loss 1.3086 (1.0534) acc 62.5000 (73.5855) lr 6.3188e-04 eta 3:42:10 +epoch [33/50] batch [480/500] time 1.584 (1.564) data 0.000 (0.002) loss 0.8750 (1.0515) acc 75.0000 (73.6393) lr 6.3188e-04 eta 3:42:03 +epoch [33/50] batch [485/500] time 1.563 (1.564) data 0.001 (0.002) loss 0.9619 (1.0518) acc 68.7500 (73.6340) lr 6.3188e-04 eta 3:41:55 +epoch [33/50] batch [490/500] time 1.569 (1.564) data 0.000 (0.002) loss 1.2100 (1.0537) acc 84.3750 (73.5842) lr 6.3188e-04 eta 3:41:46 +epoch [33/50] batch [495/500] time 1.561 (1.564) data 0.000 (0.002) loss 1.3584 (1.0525) acc 71.8750 (73.6048) lr 6.3188e-04 eta 3:41:38 +epoch [33/50] batch [500/500] time 1.548 (1.564) data 0.000 (0.002) loss 1.2656 (1.0543) acc 62.5000 (73.5687) lr 5.7422e-04 eta 3:41:29 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,057 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.6% +epoch [34/50] batch [5/500] time 1.541 (1.668) data 0.001 (0.162) loss 1.5850 (1.0038) acc 65.6250 (72.5000) lr 5.7422e-04 eta 3:56:07 +epoch [34/50] batch [10/500] time 1.543 (1.614) data 0.000 (0.081) loss 0.7798 (0.9412) acc 75.0000 (74.0625) lr 5.7422e-04 eta 3:48:20 +epoch [34/50] batch [15/500] time 1.558 (1.596) data 0.000 (0.054) loss 0.7817 (0.9425) acc 81.2500 (74.5833) lr 5.7422e-04 eta 3:45:42 +epoch [34/50] batch [20/500] time 1.562 (1.590) data 0.000 (0.041) loss 0.9648 (0.9363) acc 71.8750 (74.5312) lr 5.7422e-04 eta 3:44:44 +epoch [34/50] batch [25/500] time 1.690 (1.590) data 0.000 (0.033) loss 0.9634 (0.9486) acc 75.0000 (74.2500) lr 5.7422e-04 eta 3:44:31 +epoch [34/50] batch [30/500] time 1.568 (1.585) data 0.000 (0.027) loss 1.4287 (0.9438) acc 62.5000 (74.7917) lr 5.7422e-04 eta 3:43:44 +epoch [34/50] batch [35/500] time 1.564 (1.582) data 0.001 (0.023) loss 1.1309 (0.9537) acc 78.1250 (74.6429) lr 5.7422e-04 eta 3:43:13 +epoch [34/50] batch [40/500] time 1.558 (1.579) data 0.001 (0.021) loss 1.3174 (0.9618) acc 68.7500 (74.6094) lr 5.7422e-04 eta 3:42:34 +epoch [34/50] batch [45/500] time 1.568 (1.577) data 0.001 (0.018) loss 0.9731 (0.9613) acc 81.2500 (74.3056) lr 5.7422e-04 eta 3:42:13 +epoch [34/50] batch [50/500] time 1.569 (1.576) data 0.000 (0.017) loss 1.4121 (0.9673) acc 68.7500 (74.5625) lr 5.7422e-04 eta 3:42:00 +epoch [34/50] batch [55/500] time 1.581 (1.576) data 0.000 (0.015) loss 1.0010 (0.9683) acc 75.0000 (74.8864) lr 5.7422e-04 eta 3:41:45 +epoch [34/50] batch [60/500] time 1.554 (1.574) data 0.000 (0.014) loss 0.7666 (0.9507) acc 81.2500 (75.1042) lr 5.7422e-04 eta 3:41:21 +epoch [34/50] batch [65/500] time 1.567 (1.573) data 0.000 (0.013) loss 1.4307 (0.9726) acc 71.8750 (74.9519) lr 5.7422e-04 eta 3:41:08 +epoch [34/50] batch [70/500] time 1.591 (1.573) data 0.000 (0.012) loss 0.7568 (0.9812) acc 78.1250 (74.9554) lr 5.7422e-04 eta 3:41:00 +epoch [34/50] batch [75/500] time 1.552 (1.572) data 0.000 (0.011) loss 1.2461 (0.9835) acc 71.8750 (74.9583) lr 5.7422e-04 eta 3:40:43 +epoch [34/50] batch [80/500] time 1.572 (1.571) data 0.000 (0.011) loss 1.0996 (0.9970) acc 68.7500 (74.7656) lr 5.7422e-04 eta 3:40:27 +epoch [34/50] batch [85/500] time 1.648 (1.572) data 0.000 (0.010) loss 0.8271 (0.9864) acc 71.8750 (74.8897) lr 5.7422e-04 eta 3:40:24 +epoch [34/50] batch [90/500] time 1.565 (1.571) data 0.000 (0.009) loss 1.3682 (0.9983) acc 65.6250 (74.5833) lr 5.7422e-04 eta 3:40:14 +epoch [34/50] batch [95/500] time 1.573 (1.571) data 0.000 (0.009) loss 1.3525 (0.9971) acc 68.7500 (74.7697) lr 5.7422e-04 eta 3:40:00 +epoch [34/50] batch [100/500] time 1.566 (1.570) data 0.000 (0.008) loss 0.9541 (1.0034) acc 71.8750 (74.5000) lr 5.7422e-04 eta 3:39:50 +epoch [34/50] batch [105/500] time 1.580 (1.570) data 0.000 (0.008) loss 1.0303 (1.0050) acc 78.1250 (74.4643) lr 5.7422e-04 eta 3:39:39 +epoch [34/50] batch [110/500] time 1.559 (1.570) data 0.000 (0.008) loss 0.8721 (0.9973) acc 81.2500 (74.6307) lr 5.7422e-04 eta 3:39:28 +epoch [34/50] batch [115/500] time 1.558 (1.569) data 0.001 (0.007) loss 0.5776 (0.9946) acc 81.2500 (74.4565) lr 5.7422e-04 eta 3:39:18 +epoch [34/50] batch [120/500] time 1.580 (1.569) data 0.001 (0.007) loss 0.7573 (0.9862) acc 81.2500 (74.5573) lr 5.7422e-04 eta 3:39:08 +epoch [34/50] batch [125/500] time 1.556 (1.569) data 0.000 (0.007) loss 0.7935 (0.9947) acc 84.3750 (74.3500) lr 5.7422e-04 eta 3:39:01 +epoch [34/50] batch [130/500] time 1.573 (1.570) data 0.000 (0.007) loss 1.0225 (0.9894) acc 78.1250 (74.5673) lr 5.7422e-04 eta 3:38:56 +epoch [34/50] batch [135/500] time 1.569 (1.570) data 0.000 (0.006) loss 1.4160 (0.9864) acc 68.7500 (74.5370) lr 5.7422e-04 eta 3:38:49 +epoch [34/50] batch [140/500] time 1.568 (1.569) data 0.000 (0.006) loss 1.5947 (0.9847) acc 62.5000 (74.5089) lr 5.7422e-04 eta 3:38:40 +epoch [34/50] batch [145/500] time 1.548 (1.570) data 0.000 (0.006) loss 0.6997 (0.9943) acc 81.2500 (74.2241) lr 5.7422e-04 eta 3:38:33 +epoch [34/50] batch [150/500] time 1.544 (1.569) data 0.000 (0.006) loss 0.7456 (0.9873) acc 71.8750 (74.4167) lr 5.7422e-04 eta 3:38:21 +epoch [34/50] batch [155/500] time 1.566 (1.569) data 0.000 (0.006) loss 1.3721 (0.9862) acc 59.3750 (74.3750) lr 5.7422e-04 eta 3:38:10 +epoch [34/50] batch [160/500] time 1.564 (1.568) data 0.001 (0.005) loss 0.7637 (0.9846) acc 84.3750 (74.3750) lr 5.7422e-04 eta 3:37:58 +epoch [34/50] batch [165/500] time 1.541 (1.568) data 0.001 (0.005) loss 0.9067 (0.9815) acc 81.2500 (74.4508) lr 5.7422e-04 eta 3:37:48 +epoch [34/50] batch [170/500] time 1.553 (1.568) data 0.000 (0.005) loss 1.1514 (0.9872) acc 71.8750 (74.3934) lr 5.7422e-04 eta 3:37:38 +epoch [34/50] batch [175/500] time 1.530 (1.567) data 0.000 (0.005) loss 1.1865 (0.9924) acc 62.5000 (74.4286) lr 5.7422e-04 eta 3:37:26 +epoch [34/50] batch [180/500] time 1.585 (1.567) data 0.000 (0.005) loss 1.0088 (0.9917) acc 78.1250 (74.4271) lr 5.7422e-04 eta 3:37:15 +epoch [34/50] batch [185/500] time 1.557 (1.566) data 0.001 (0.005) loss 1.3369 (0.9905) acc 62.5000 (74.4426) lr 5.7422e-04 eta 3:37:04 +epoch [34/50] batch [190/500] time 1.537 (1.566) data 0.000 (0.005) loss 1.2314 (0.9959) acc 71.8750 (74.3586) lr 5.7422e-04 eta 3:36:51 +epoch [34/50] batch [195/500] time 1.586 (1.566) data 0.001 (0.005) loss 1.0371 (0.9961) acc 71.8750 (74.2949) lr 5.7422e-04 eta 3:36:42 +epoch [34/50] batch [200/500] time 1.553 (1.565) data 0.000 (0.004) loss 1.1143 (0.9954) acc 71.8750 (74.3125) lr 5.7422e-04 eta 3:36:32 +epoch [34/50] batch [205/500] time 1.548 (1.565) data 0.000 (0.004) loss 0.7285 (0.9950) acc 81.2500 (74.3445) lr 5.7422e-04 eta 3:36:21 +epoch [34/50] batch [210/500] time 1.549 (1.565) data 0.001 (0.004) loss 1.2002 (0.9962) acc 65.6250 (74.2411) lr 5.7422e-04 eta 3:36:11 +epoch [34/50] batch [215/500] time 1.560 (1.565) data 0.000 (0.004) loss 0.6938 (0.9958) acc 78.1250 (74.3169) lr 5.7422e-04 eta 3:36:05 +epoch [34/50] batch [220/500] time 1.578 (1.565) data 0.001 (0.004) loss 1.2598 (0.9966) acc 68.7500 (74.3040) lr 5.7422e-04 eta 3:35:57 +epoch [34/50] batch [225/500] time 1.549 (1.565) data 0.000 (0.004) loss 1.5156 (0.9979) acc 65.6250 (74.2500) lr 5.7422e-04 eta 3:35:49 +epoch [34/50] batch [230/500] time 1.558 (1.565) data 0.000 (0.004) loss 0.9336 (1.0017) acc 71.8750 (74.1033) lr 5.7422e-04 eta 3:35:43 +epoch [34/50] batch [235/500] time 1.599 (1.565) data 0.001 (0.004) loss 1.3496 (1.0041) acc 65.6250 (74.0957) lr 5.7422e-04 eta 3:35:35 +epoch [34/50] batch [240/500] time 1.552 (1.565) data 0.000 (0.004) loss 0.9805 (1.0011) acc 65.6250 (74.1276) lr 5.7422e-04 eta 3:35:28 +epoch [34/50] batch [245/500] time 1.575 (1.565) data 0.000 (0.004) loss 1.0693 (1.0021) acc 68.7500 (74.0306) lr 5.7422e-04 eta 3:35:20 +epoch [34/50] batch [250/500] time 1.572 (1.565) data 0.000 (0.004) loss 0.6890 (1.0003) acc 90.6250 (74.1250) lr 5.7422e-04 eta 3:35:11 +epoch [34/50] batch [255/500] time 1.554 (1.565) data 0.001 (0.004) loss 1.1299 (0.9998) acc 75.0000 (74.0931) lr 5.7422e-04 eta 3:35:02 +epoch [34/50] batch [260/500] time 1.545 (1.565) data 0.001 (0.004) loss 1.1484 (0.9991) acc 78.1250 (74.1106) lr 5.7422e-04 eta 3:34:52 +epoch [34/50] batch [265/500] time 1.542 (1.565) data 0.000 (0.003) loss 0.8208 (0.9988) acc 78.1250 (74.1274) lr 5.7422e-04 eta 3:34:46 +epoch [34/50] batch [270/500] time 1.569 (1.565) data 0.001 (0.003) loss 1.4736 (1.0026) acc 65.6250 (74.0856) lr 5.7422e-04 eta 3:34:39 +epoch [34/50] batch [275/500] time 1.543 (1.565) data 0.000 (0.003) loss 1.7598 (1.0103) acc 56.2500 (73.9659) lr 5.7422e-04 eta 3:34:33 +epoch [34/50] batch [280/500] time 1.545 (1.565) data 0.000 (0.003) loss 1.0283 (1.0118) acc 75.0000 (73.9174) lr 5.7422e-04 eta 3:34:23 +epoch [34/50] batch [285/500] time 1.562 (1.565) data 0.000 (0.003) loss 0.5034 (1.0087) acc 81.2500 (73.9693) lr 5.7422e-04 eta 3:34:17 +epoch [34/50] batch [290/500] time 1.565 (1.565) data 0.000 (0.003) loss 0.6670 (1.0111) acc 71.8750 (73.9440) lr 5.7422e-04 eta 3:34:10 +epoch [34/50] batch [295/500] time 1.549 (1.565) data 0.000 (0.003) loss 0.6831 (1.0108) acc 81.2500 (73.9301) lr 5.7422e-04 eta 3:34:01 +epoch [34/50] batch [300/500] time 1.599 (1.565) data 0.000 (0.003) loss 1.5264 (1.0114) acc 65.6250 (73.9062) lr 5.7422e-04 eta 3:33:54 +epoch [34/50] batch [305/500] time 1.558 (1.565) data 0.000 (0.003) loss 1.8564 (1.0141) acc 56.2500 (73.8320) lr 5.7422e-04 eta 3:33:47 +epoch [34/50] batch [310/500] time 1.557 (1.565) data 0.000 (0.003) loss 1.0977 (1.0168) acc 75.0000 (73.7500) lr 5.7422e-04 eta 3:33:39 +epoch [34/50] batch [315/500] time 1.588 (1.565) data 0.000 (0.003) loss 1.1660 (1.0182) acc 62.5000 (73.7302) lr 5.7422e-04 eta 3:33:32 +epoch [34/50] batch [320/500] time 1.567 (1.565) data 0.001 (0.003) loss 0.7905 (1.0196) acc 71.8750 (73.7500) lr 5.7422e-04 eta 3:33:23 +epoch [34/50] batch [325/500] time 1.553 (1.565) data 0.001 (0.003) loss 1.2383 (1.0203) acc 68.7500 (73.7404) lr 5.7422e-04 eta 3:33:14 +epoch [34/50] batch [330/500] time 1.555 (1.565) data 0.000 (0.003) loss 1.5781 (1.0244) acc 50.0000 (73.5795) lr 5.7422e-04 eta 3:33:04 +epoch [34/50] batch [335/500] time 1.553 (1.565) data 0.000 (0.003) loss 1.2891 (1.0282) acc 65.6250 (73.4981) lr 5.7422e-04 eta 3:32:56 +epoch [34/50] batch [340/500] time 1.540 (1.565) data 0.000 (0.003) loss 1.5332 (1.0282) acc 68.7500 (73.5294) lr 5.7422e-04 eta 3:32:47 +epoch [34/50] batch [345/500] time 1.572 (1.564) data 0.000 (0.003) loss 1.1475 (1.0267) acc 65.6250 (73.5779) lr 5.7422e-04 eta 3:32:38 +epoch [34/50] batch [350/500] time 1.557 (1.564) data 0.000 (0.003) loss 0.6973 (1.0286) acc 71.8750 (73.5357) lr 5.7422e-04 eta 3:32:30 +epoch [34/50] batch [355/500] time 1.575 (1.564) data 0.000 (0.003) loss 0.7944 (1.0277) acc 75.0000 (73.5475) lr 5.7422e-04 eta 3:32:22 +epoch [34/50] batch [360/500] time 1.557 (1.564) data 0.000 (0.003) loss 1.3174 (1.0276) acc 68.7500 (73.5590) lr 5.7422e-04 eta 3:32:13 +epoch [34/50] batch [365/500] time 1.549 (1.564) data 0.000 (0.003) loss 1.3994 (1.0298) acc 75.0000 (73.5188) lr 5.7422e-04 eta 3:32:04 +epoch [34/50] batch [370/500] time 1.540 (1.564) data 0.000 (0.003) loss 0.6704 (1.0280) acc 75.0000 (73.6064) lr 5.7422e-04 eta 3:31:55 +epoch [34/50] batch [375/500] time 1.570 (1.564) data 0.000 (0.003) loss 1.5107 (1.0310) acc 71.8750 (73.5417) lr 5.7422e-04 eta 3:31:50 +epoch [34/50] batch [380/500] time 1.546 (1.564) data 0.000 (0.003) loss 0.8677 (1.0333) acc 78.1250 (73.4868) lr 5.7422e-04 eta 3:31:39 +epoch [34/50] batch [385/500] time 1.570 (1.564) data 0.000 (0.003) loss 0.7627 (1.0353) acc 81.2500 (73.4821) lr 5.7422e-04 eta 3:31:30 +epoch [34/50] batch [390/500] time 1.549 (1.564) data 0.000 (0.002) loss 0.9668 (1.0331) acc 78.1250 (73.5096) lr 5.7422e-04 eta 3:31:20 +epoch [34/50] batch [395/500] time 1.557 (1.564) data 0.000 (0.002) loss 1.7881 (1.0335) acc 53.1250 (73.4573) lr 5.7422e-04 eta 3:31:12 +epoch [34/50] batch [400/500] time 1.572 (1.564) data 0.001 (0.002) loss 0.9873 (1.0342) acc 81.2500 (73.4453) lr 5.7422e-04 eta 3:31:05 +epoch [34/50] batch [405/500] time 1.551 (1.564) data 0.000 (0.002) loss 0.8076 (1.0345) acc 71.8750 (73.4336) lr 5.7422e-04 eta 3:30:56 +epoch [34/50] batch [410/500] time 1.545 (1.563) data 0.000 (0.002) loss 1.2783 (1.0354) acc 68.7500 (73.4223) lr 5.7422e-04 eta 3:30:48 +epoch [34/50] batch [415/500] time 1.647 (1.564) data 0.000 (0.002) loss 1.3984 (1.0339) acc 65.6250 (73.4639) lr 5.7422e-04 eta 3:30:42 +epoch [34/50] batch [420/500] time 1.581 (1.564) data 0.000 (0.002) loss 0.7407 (1.0365) acc 75.0000 (73.4003) lr 5.7422e-04 eta 3:30:34 +epoch [34/50] batch [425/500] time 1.558 (1.564) data 0.000 (0.002) loss 0.4021 (1.0339) acc 84.3750 (73.4412) lr 5.7422e-04 eta 3:30:26 +epoch [34/50] batch [430/500] time 1.545 (1.564) data 0.000 (0.002) loss 1.3135 (1.0350) acc 62.5000 (73.4230) lr 5.7422e-04 eta 3:30:17 +epoch [34/50] batch [435/500] time 1.587 (1.564) data 0.000 (0.002) loss 0.8818 (1.0337) acc 84.3750 (73.4770) lr 5.7422e-04 eta 3:30:10 +epoch [34/50] batch [440/500] time 1.580 (1.564) data 0.000 (0.002) loss 0.8262 (1.0336) acc 71.8750 (73.4659) lr 5.7422e-04 eta 3:30:03 +epoch [34/50] batch [445/500] time 1.552 (1.564) data 0.000 (0.002) loss 0.9028 (1.0339) acc 75.0000 (73.4410) lr 5.7422e-04 eta 3:29:54 +epoch [34/50] batch [450/500] time 1.545 (1.564) data 0.000 (0.002) loss 0.8291 (1.0354) acc 78.1250 (73.4028) lr 5.7422e-04 eta 3:29:46 +epoch [34/50] batch [455/500] time 1.567 (1.563) data 0.000 (0.002) loss 0.9312 (1.0379) acc 78.1250 (73.3791) lr 5.7422e-04 eta 3:29:37 +epoch [34/50] batch [460/500] time 1.569 (1.563) data 0.000 (0.002) loss 1.3232 (1.0388) acc 75.0000 (73.4035) lr 5.7422e-04 eta 3:29:29 +epoch [34/50] batch [465/500] time 1.579 (1.563) data 0.000 (0.002) loss 1.2939 (1.0433) acc 56.2500 (73.2930) lr 5.7422e-04 eta 3:29:21 +epoch [34/50] batch [470/500] time 1.553 (1.563) data 0.000 (0.002) loss 1.1084 (1.0433) acc 65.6250 (73.3112) lr 5.7422e-04 eta 3:29:13 +epoch [34/50] batch [475/500] time 1.568 (1.563) data 0.000 (0.002) loss 0.6870 (1.0425) acc 78.1250 (73.3421) lr 5.7422e-04 eta 3:29:05 +epoch [34/50] batch [480/500] time 1.579 (1.563) data 0.000 (0.002) loss 1.0801 (1.0454) acc 71.8750 (73.3138) lr 5.7422e-04 eta 3:28:56 +epoch [34/50] batch [485/500] time 1.580 (1.563) data 0.001 (0.002) loss 0.9585 (1.0455) acc 62.5000 (73.2796) lr 5.7422e-04 eta 3:28:49 +epoch [34/50] batch [490/500] time 1.557 (1.563) data 0.000 (0.002) loss 1.0469 (1.0477) acc 78.1250 (73.2781) lr 5.7422e-04 eta 3:28:40 +epoch [34/50] batch [495/500] time 1.554 (1.563) data 0.000 (0.002) loss 1.3867 (1.0490) acc 75.0000 (73.2449) lr 5.7422e-04 eta 3:28:31 +epoch [34/50] batch [500/500] time 1.579 (1.563) data 0.000 (0.002) loss 1.3604 (1.0513) acc 78.1250 (73.2313) lr 5.1825e-04 eta 3:28:23 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,972 +* accuracy: 77.9% +* error: 22.1% +* macro_f1: 77.5% +epoch [35/50] batch [5/500] time 1.552 (1.691) data 0.000 (0.174) loss 0.8682 (0.9563) acc 81.2500 (78.7500) lr 5.1825e-04 eta 3:45:17 +epoch [35/50] batch [10/500] time 1.538 (1.625) data 0.000 (0.087) loss 1.3291 (0.9780) acc 62.5000 (75.0000) lr 5.1825e-04 eta 3:36:22 +epoch [35/50] batch [15/500] time 1.547 (1.604) data 0.000 (0.058) loss 1.3350 (1.0960) acc 71.8750 (72.0833) lr 5.1825e-04 eta 3:33:25 +epoch [35/50] batch [20/500] time 1.573 (1.593) data 0.001 (0.044) loss 0.6309 (1.0545) acc 75.0000 (73.5938) lr 5.1825e-04 eta 3:31:54 +epoch [35/50] batch [25/500] time 1.571 (1.596) data 0.002 (0.035) loss 1.4561 (1.1026) acc 68.7500 (73.3750) lr 5.1825e-04 eta 3:32:08 +epoch [35/50] batch [30/500] time 1.547 (1.589) data 0.001 (0.030) loss 0.7998 (1.0922) acc 81.2500 (73.4375) lr 5.1825e-04 eta 3:31:04 +epoch [35/50] batch [35/500] time 1.582 (1.585) data 0.001 (0.025) loss 1.0322 (1.0660) acc 75.0000 (73.8393) lr 5.1825e-04 eta 3:30:23 +epoch [35/50] batch [40/500] time 1.562 (1.583) data 0.001 (0.022) loss 1.5498 (1.0858) acc 71.8750 (73.5156) lr 5.1825e-04 eta 3:29:56 +epoch [35/50] batch [45/500] time 1.578 (1.581) data 0.000 (0.020) loss 1.0508 (1.0599) acc 68.7500 (74.0972) lr 5.1825e-04 eta 3:29:40 +epoch [35/50] batch [50/500] time 1.568 (1.580) data 0.001 (0.018) loss 1.1475 (1.0573) acc 78.1250 (74.1875) lr 5.1825e-04 eta 3:29:23 +epoch [35/50] batch [55/500] time 1.603 (1.580) data 0.001 (0.016) loss 1.0869 (1.0521) acc 75.0000 (74.0909) lr 5.1825e-04 eta 3:29:15 +epoch [35/50] batch [60/500] time 1.575 (1.578) data 0.001 (0.015) loss 0.7627 (1.0245) acc 75.0000 (74.5312) lr 5.1825e-04 eta 3:28:52 +epoch [35/50] batch [65/500] time 1.556 (1.577) data 0.001 (0.014) loss 1.4023 (1.0335) acc 59.3750 (74.6154) lr 5.1825e-04 eta 3:28:35 +epoch [35/50] batch [70/500] time 1.589 (1.577) data 0.001 (0.013) loss 0.8633 (1.0128) acc 78.1250 (74.8214) lr 5.1825e-04 eta 3:28:27 +epoch [35/50] batch [75/500] time 1.561 (1.577) data 0.001 (0.012) loss 1.2568 (1.0044) acc 68.7500 (74.7917) lr 5.1825e-04 eta 3:28:15 +epoch [35/50] batch [80/500] time 1.590 (1.577) data 0.001 (0.011) loss 0.4397 (0.9982) acc 84.3750 (74.7266) lr 5.1825e-04 eta 3:28:10 +epoch [35/50] batch [85/500] time 1.559 (1.576) data 0.000 (0.011) loss 1.0996 (1.0041) acc 75.0000 (74.8897) lr 5.1825e-04 eta 3:27:57 +epoch [35/50] batch [90/500] time 1.582 (1.576) data 0.001 (0.010) loss 0.7383 (1.0026) acc 81.2500 (74.7917) lr 5.1825e-04 eta 3:27:43 +epoch [35/50] batch [95/500] time 1.561 (1.575) data 0.000 (0.010) loss 1.1074 (1.0076) acc 68.7500 (74.3750) lr 5.1825e-04 eta 3:27:31 +epoch [35/50] batch [100/500] time 1.572 (1.575) data 0.001 (0.009) loss 1.1592 (1.0192) acc 71.8750 (74.0938) lr 5.1825e-04 eta 3:27:20 +epoch [35/50] batch [105/500] time 1.568 (1.575) data 0.001 (0.009) loss 1.5146 (1.0186) acc 62.5000 (74.2560) lr 5.1825e-04 eta 3:27:12 +epoch [35/50] batch [110/500] time 1.561 (1.574) data 0.001 (0.008) loss 0.6895 (1.0144) acc 75.0000 (74.2330) lr 5.1825e-04 eta 3:27:01 +epoch [35/50] batch [115/500] time 1.564 (1.574) data 0.001 (0.008) loss 1.1836 (1.0167) acc 65.6250 (73.9946) lr 5.1825e-04 eta 3:26:52 +epoch [35/50] batch [120/500] time 1.695 (1.575) data 0.000 (0.008) loss 0.3455 (1.0085) acc 87.5000 (74.1146) lr 5.1825e-04 eta 3:26:51 +epoch [35/50] batch [125/500] time 1.549 (1.574) data 0.000 (0.008) loss 0.7480 (1.0147) acc 78.1250 (73.9500) lr 5.1825e-04 eta 3:26:32 +epoch [35/50] batch [130/500] time 1.561 (1.573) data 0.001 (0.007) loss 0.9004 (1.0096) acc 81.2500 (74.0865) lr 5.1825e-04 eta 3:26:20 +epoch [35/50] batch [135/500] time 1.575 (1.573) data 0.000 (0.007) loss 0.9492 (1.0050) acc 71.8750 (74.2130) lr 5.1825e-04 eta 3:26:08 +epoch [35/50] batch [140/500] time 1.574 (1.572) data 0.000 (0.007) loss 1.6455 (1.0032) acc 71.8750 (74.3080) lr 5.1825e-04 eta 3:25:56 +epoch [35/50] batch [145/500] time 1.573 (1.572) data 0.000 (0.007) loss 0.3672 (0.9965) acc 81.2500 (74.4181) lr 5.1825e-04 eta 3:25:48 +epoch [35/50] batch [150/500] time 1.535 (1.572) data 0.000 (0.006) loss 1.5879 (0.9945) acc 62.5000 (74.4375) lr 5.1825e-04 eta 3:25:37 +epoch [35/50] batch [155/500] time 1.563 (1.571) data 0.000 (0.006) loss 1.6973 (0.9995) acc 68.7500 (74.4960) lr 5.1825e-04 eta 3:25:26 +epoch [35/50] batch [160/500] time 1.544 (1.571) data 0.000 (0.006) loss 1.1348 (0.9950) acc 75.0000 (74.6094) lr 5.1825e-04 eta 3:25:14 +epoch [35/50] batch [165/500] time 1.570 (1.571) data 0.000 (0.006) loss 1.1348 (0.9969) acc 62.5000 (74.5644) lr 5.1825e-04 eta 3:25:08 +epoch [35/50] batch [170/500] time 1.559 (1.570) data 0.000 (0.006) loss 0.7627 (1.0003) acc 78.1250 (74.5404) lr 5.1825e-04 eta 3:24:54 +epoch [35/50] batch [175/500] time 1.560 (1.570) data 0.000 (0.006) loss 0.8506 (1.0040) acc 78.1250 (74.5893) lr 5.1825e-04 eta 3:24:44 +epoch [35/50] batch [180/500] time 1.547 (1.570) data 0.000 (0.005) loss 1.1611 (1.0042) acc 84.3750 (74.6007) lr 5.1825e-04 eta 3:24:33 +epoch [35/50] batch [185/500] time 1.534 (1.569) data 0.000 (0.005) loss 0.8828 (1.0097) acc 78.1250 (74.4932) lr 5.1825e-04 eta 3:24:21 +epoch [35/50] batch [190/500] time 1.548 (1.569) data 0.000 (0.005) loss 1.0107 (1.0106) acc 75.0000 (74.4079) lr 5.1825e-04 eta 3:24:10 +epoch [35/50] batch [195/500] time 1.559 (1.568) data 0.000 (0.005) loss 0.7554 (1.0111) acc 75.0000 (74.3750) lr 5.1825e-04 eta 3:23:59 +epoch [35/50] batch [200/500] time 1.548 (1.568) data 0.000 (0.005) loss 0.7749 (1.0106) acc 84.3750 (74.4688) lr 5.1825e-04 eta 3:23:49 +epoch [35/50] batch [205/500] time 1.534 (1.567) data 0.000 (0.005) loss 1.2598 (1.0069) acc 78.1250 (74.6494) lr 5.1825e-04 eta 3:23:38 +epoch [35/50] batch [210/500] time 1.557 (1.567) data 0.000 (0.005) loss 1.2510 (1.0097) acc 68.7500 (74.5536) lr 5.1825e-04 eta 3:23:26 +epoch [35/50] batch [215/500] time 1.547 (1.567) data 0.002 (0.005) loss 0.9170 (1.0121) acc 78.1250 (74.5203) lr 5.1825e-04 eta 3:23:17 +epoch [35/50] batch [220/500] time 1.536 (1.566) data 0.000 (0.004) loss 1.6133 (1.0152) acc 59.3750 (74.4176) lr 5.1825e-04 eta 3:23:06 +epoch [35/50] batch [225/500] time 1.545 (1.566) data 0.000 (0.004) loss 1.5879 (1.0200) acc 71.8750 (74.4306) lr 5.1825e-04 eta 3:22:55 +epoch [35/50] batch [230/500] time 1.548 (1.566) data 0.000 (0.004) loss 1.6846 (1.0232) acc 71.8750 (74.4022) lr 5.1825e-04 eta 3:22:47 +epoch [35/50] batch [235/500] time 1.556 (1.566) data 0.000 (0.004) loss 0.8711 (1.0230) acc 78.1250 (74.4149) lr 5.1825e-04 eta 3:22:38 +epoch [35/50] batch [240/500] time 1.541 (1.565) data 0.000 (0.004) loss 1.1113 (1.0289) acc 75.0000 (74.2448) lr 5.1825e-04 eta 3:22:27 +epoch [35/50] batch [245/500] time 1.554 (1.565) data 0.000 (0.004) loss 0.5581 (1.0249) acc 87.5000 (74.3878) lr 5.1825e-04 eta 3:22:18 +epoch [35/50] batch [250/500] time 1.557 (1.565) data 0.000 (0.004) loss 0.7476 (1.0238) acc 87.5000 (74.4000) lr 5.1825e-04 eta 3:22:09 +epoch [35/50] batch [255/500] time 1.547 (1.565) data 0.000 (0.004) loss 0.8374 (1.0246) acc 81.2500 (74.3505) lr 5.1825e-04 eta 3:22:00 +epoch [35/50] batch [260/500] time 1.574 (1.565) data 0.000 (0.004) loss 1.2500 (1.0282) acc 75.0000 (74.2668) lr 5.1825e-04 eta 3:21:52 +epoch [35/50] batch [265/500] time 1.564 (1.565) data 0.001 (0.004) loss 0.8555 (1.0269) acc 78.1250 (74.3160) lr 5.1825e-04 eta 3:21:46 +epoch [35/50] batch [270/500] time 1.546 (1.565) data 0.001 (0.004) loss 0.8857 (1.0264) acc 90.6250 (74.3403) lr 5.1825e-04 eta 3:21:36 +epoch [35/50] batch [275/500] time 1.536 (1.565) data 0.000 (0.004) loss 1.1387 (1.0283) acc 78.1250 (74.2727) lr 5.1825e-04 eta 3:21:26 +epoch [35/50] batch [280/500] time 1.530 (1.564) data 0.001 (0.004) loss 1.5479 (1.0288) acc 62.5000 (74.2634) lr 5.1825e-04 eta 3:21:15 +epoch [35/50] batch [285/500] time 1.571 (1.564) data 0.001 (0.004) loss 1.3242 (1.0343) acc 68.7500 (74.2105) lr 5.1825e-04 eta 3:21:05 +epoch [35/50] batch [290/500] time 1.546 (1.564) data 0.000 (0.003) loss 0.7144 (1.0358) acc 78.1250 (74.1918) lr 5.1825e-04 eta 3:20:55 +epoch [35/50] batch [295/500] time 1.578 (1.564) data 0.000 (0.003) loss 1.2637 (1.0375) acc 71.8750 (74.1843) lr 5.1825e-04 eta 3:20:47 +epoch [35/50] batch [300/500] time 1.550 (1.563) data 0.000 (0.003) loss 1.2402 (1.0406) acc 65.6250 (74.0729) lr 5.1825e-04 eta 3:20:38 +epoch [35/50] batch [305/500] time 1.545 (1.563) data 0.000 (0.003) loss 1.0889 (1.0423) acc 71.8750 (74.0676) lr 5.1825e-04 eta 3:20:28 +epoch [35/50] batch [310/500] time 1.554 (1.564) data 0.000 (0.003) loss 1.1807 (1.0464) acc 71.8750 (73.9617) lr 5.1825e-04 eta 3:20:23 +epoch [35/50] batch [315/500] time 1.544 (1.563) data 0.000 (0.003) loss 0.5723 (1.0452) acc 87.5000 (74.0079) lr 5.1825e-04 eta 3:20:15 +epoch [35/50] batch [320/500] time 1.532 (1.563) data 0.000 (0.003) loss 1.1396 (1.0438) acc 71.8750 (74.0332) lr 5.1825e-04 eta 3:20:05 +epoch [35/50] batch [325/500] time 1.548 (1.563) data 0.000 (0.003) loss 0.6875 (1.0415) acc 78.1250 (74.0385) lr 5.1825e-04 eta 3:19:56 +epoch [35/50] batch [330/500] time 1.532 (1.563) data 0.001 (0.003) loss 1.5391 (1.0422) acc 62.5000 (74.0057) lr 5.1825e-04 eta 3:19:47 +epoch [35/50] batch [335/500] time 1.611 (1.563) data 0.001 (0.003) loss 1.3584 (1.0474) acc 59.3750 (73.8806) lr 5.1825e-04 eta 3:19:40 +epoch [35/50] batch [340/500] time 1.551 (1.563) data 0.001 (0.003) loss 0.8667 (1.0465) acc 71.8750 (73.8235) lr 5.1825e-04 eta 3:19:32 +epoch [35/50] batch [345/500] time 1.561 (1.563) data 0.001 (0.003) loss 0.4517 (1.0454) acc 90.6250 (73.8949) lr 5.1825e-04 eta 3:19:24 +epoch [35/50] batch [350/500] time 1.540 (1.563) data 0.001 (0.003) loss 2.1211 (1.0502) acc 62.5000 (73.8482) lr 5.1825e-04 eta 3:19:14 +epoch [35/50] batch [355/500] time 1.537 (1.562) data 0.001 (0.003) loss 0.7725 (1.0519) acc 71.8750 (73.7940) lr 5.1825e-04 eta 3:19:04 +epoch [35/50] batch [360/500] time 1.568 (1.563) data 0.001 (0.003) loss 0.8877 (1.0489) acc 78.1250 (73.8542) lr 5.1825e-04 eta 3:18:57 +epoch [35/50] batch [365/500] time 1.561 (1.562) data 0.000 (0.003) loss 1.1943 (1.0477) acc 71.8750 (73.8955) lr 5.1825e-04 eta 3:18:49 +epoch [35/50] batch [370/500] time 1.539 (1.562) data 0.001 (0.003) loss 0.8999 (1.0461) acc 71.8750 (73.9020) lr 5.1825e-04 eta 3:18:39 +epoch [35/50] batch [375/500] time 1.540 (1.562) data 0.001 (0.003) loss 0.9731 (1.0472) acc 62.5000 (73.8083) lr 5.1825e-04 eta 3:18:29 +epoch [35/50] batch [380/500] time 1.564 (1.562) data 0.000 (0.003) loss 0.9946 (1.0486) acc 75.0000 (73.8322) lr 5.1825e-04 eta 3:18:20 +epoch [35/50] batch [385/500] time 1.569 (1.562) data 0.000 (0.003) loss 0.9102 (1.0476) acc 71.8750 (73.8231) lr 5.1825e-04 eta 3:18:12 +epoch [35/50] batch [390/500] time 1.567 (1.562) data 0.000 (0.003) loss 1.0840 (1.0482) acc 68.7500 (73.8381) lr 5.1825e-04 eta 3:18:04 +epoch [35/50] batch [395/500] time 1.565 (1.562) data 0.001 (0.003) loss 1.4238 (1.0505) acc 68.7500 (73.8133) lr 5.1825e-04 eta 3:17:55 +epoch [35/50] batch [400/500] time 1.564 (1.562) data 0.000 (0.003) loss 1.2520 (1.0544) acc 71.8750 (73.7578) lr 5.1825e-04 eta 3:17:47 +epoch [35/50] batch [405/500] time 1.561 (1.561) data 0.000 (0.003) loss 0.8726 (1.0572) acc 78.1250 (73.6497) lr 5.1825e-04 eta 3:17:38 +epoch [35/50] batch [410/500] time 1.533 (1.561) data 0.001 (0.003) loss 1.2021 (1.0590) acc 62.5000 (73.6128) lr 5.1825e-04 eta 3:17:30 +epoch [35/50] batch [415/500] time 1.576 (1.561) data 0.000 (0.003) loss 1.2607 (1.0597) acc 68.7500 (73.6069) lr 5.1825e-04 eta 3:17:21 +epoch [35/50] batch [420/500] time 1.577 (1.561) data 0.000 (0.003) loss 1.2734 (1.0614) acc 68.7500 (73.5193) lr 5.1825e-04 eta 3:17:14 +epoch [35/50] batch [425/500] time 1.557 (1.561) data 0.000 (0.003) loss 1.2119 (1.0617) acc 68.7500 (73.5515) lr 5.1825e-04 eta 3:17:05 +epoch [35/50] batch [430/500] time 1.573 (1.561) data 0.000 (0.003) loss 0.6143 (1.0615) acc 81.2500 (73.5610) lr 5.1825e-04 eta 3:16:56 +epoch [35/50] batch [435/500] time 1.547 (1.561) data 0.000 (0.002) loss 0.7769 (1.0630) acc 75.0000 (73.5345) lr 5.1825e-04 eta 3:16:47 +epoch [35/50] batch [440/500] time 1.547 (1.561) data 0.001 (0.002) loss 0.9062 (1.0609) acc 71.8750 (73.5298) lr 5.1825e-04 eta 3:16:39 +epoch [35/50] batch [445/500] time 1.578 (1.561) data 0.000 (0.002) loss 1.5283 (1.0605) acc 68.7500 (73.5674) lr 5.1825e-04 eta 3:16:30 +epoch [35/50] batch [450/500] time 1.653 (1.561) data 0.000 (0.002) loss 0.8232 (1.0607) acc 81.2500 (73.5694) lr 5.1825e-04 eta 3:16:23 +epoch [35/50] batch [455/500] time 1.552 (1.561) data 0.000 (0.002) loss 1.0166 (1.0599) acc 71.8750 (73.6264) lr 5.1825e-04 eta 3:16:14 +epoch [35/50] batch [460/500] time 1.560 (1.560) data 0.000 (0.002) loss 0.8159 (1.0574) acc 78.1250 (73.6413) lr 5.1825e-04 eta 3:16:06 +epoch [35/50] batch [465/500] time 1.565 (1.560) data 0.000 (0.002) loss 0.8955 (1.0566) acc 75.0000 (73.6358) lr 5.1825e-04 eta 3:15:56 +epoch [35/50] batch [470/500] time 1.568 (1.560) data 0.001 (0.002) loss 1.4355 (1.0578) acc 71.8750 (73.6503) lr 5.1825e-04 eta 3:15:48 +epoch [35/50] batch [475/500] time 1.542 (1.560) data 0.000 (0.002) loss 1.6367 (1.0580) acc 56.2500 (73.5987) lr 5.1825e-04 eta 3:15:39 +epoch [35/50] batch [480/500] time 1.562 (1.560) data 0.000 (0.002) loss 0.9146 (1.0592) acc 75.0000 (73.5547) lr 5.1825e-04 eta 3:15:31 +epoch [35/50] batch [485/500] time 1.581 (1.560) data 0.001 (0.002) loss 1.0898 (1.0591) acc 71.8750 (73.5438) lr 5.1825e-04 eta 3:15:23 +epoch [35/50] batch [490/500] time 1.545 (1.560) data 0.000 (0.002) loss 1.1475 (1.0609) acc 68.7500 (73.4885) lr 5.1825e-04 eta 3:15:15 +epoch [35/50] batch [495/500] time 1.549 (1.560) data 0.000 (0.002) loss 1.0068 (1.0614) acc 81.2500 (73.5101) lr 5.1825e-04 eta 3:15:07 +epoch [35/50] batch [500/500] time 1.573 (1.560) data 0.000 (0.002) loss 1.2979 (1.0624) acc 71.8750 (73.4875) lr 4.6417e-04 eta 3:15:00 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,027 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.6% +epoch [36/50] batch [5/500] time 1.545 (1.664) data 0.000 (0.167) loss 0.4915 (0.8998) acc 84.3750 (76.2500) lr 4.6417e-04 eta 3:27:54 +epoch [36/50] batch [10/500] time 1.565 (1.611) data 0.000 (0.084) loss 1.4502 (1.0271) acc 65.6250 (72.5000) lr 4.6417e-04 eta 3:21:09 +epoch [36/50] batch [15/500] time 1.565 (1.592) data 0.000 (0.056) loss 0.6650 (1.0835) acc 84.3750 (71.4583) lr 4.6417e-04 eta 3:18:36 +epoch [36/50] batch [20/500] time 1.579 (1.585) data 0.000 (0.042) loss 0.7930 (1.0290) acc 81.2500 (73.5938) lr 4.6417e-04 eta 3:17:37 +epoch [36/50] batch [25/500] time 1.555 (1.580) data 0.000 (0.034) loss 1.0596 (1.0367) acc 68.7500 (73.0000) lr 4.6417e-04 eta 3:16:51 +epoch [36/50] batch [30/500] time 1.556 (1.577) data 0.000 (0.028) loss 1.4150 (1.0754) acc 65.6250 (72.3958) lr 4.6417e-04 eta 3:16:23 +epoch [36/50] batch [35/500] time 1.550 (1.575) data 0.000 (0.024) loss 1.1846 (1.0789) acc 75.0000 (72.2321) lr 4.6417e-04 eta 3:15:58 +epoch [36/50] batch [40/500] time 1.567 (1.576) data 0.000 (0.021) loss 0.5703 (1.0789) acc 78.1250 (71.8750) lr 4.6417e-04 eta 3:15:57 +epoch [36/50] batch [45/500] time 1.553 (1.573) data 0.000 (0.019) loss 0.9365 (1.0667) acc 68.7500 (72.0139) lr 4.6417e-04 eta 3:15:26 +epoch [36/50] batch [50/500] time 1.583 (1.572) data 0.000 (0.017) loss 1.0166 (1.0746) acc 65.6250 (71.3125) lr 4.6417e-04 eta 3:15:08 +epoch [36/50] batch [55/500] time 1.556 (1.570) data 0.000 (0.016) loss 0.9780 (1.0687) acc 71.8750 (71.4773) lr 4.6417e-04 eta 3:14:49 +epoch [36/50] batch [60/500] time 1.543 (1.570) data 0.000 (0.014) loss 1.1211 (1.0731) acc 75.0000 (71.4062) lr 4.6417e-04 eta 3:14:39 +epoch [36/50] batch [65/500] time 1.563 (1.570) data 0.000 (0.013) loss 1.6104 (1.0755) acc 59.3750 (71.2981) lr 4.6417e-04 eta 3:14:30 +epoch [36/50] batch [70/500] time 1.546 (1.568) data 0.000 (0.012) loss 1.0244 (1.0859) acc 75.0000 (71.3839) lr 4.6417e-04 eta 3:14:12 +epoch [36/50] batch [75/500] time 1.563 (1.569) data 0.000 (0.011) loss 0.7842 (1.0832) acc 81.2500 (71.5417) lr 4.6417e-04 eta 3:14:08 +epoch [36/50] batch [80/500] time 1.541 (1.568) data 0.000 (0.011) loss 1.0889 (1.0879) acc 84.3750 (71.6016) lr 4.6417e-04 eta 3:13:52 +epoch [36/50] batch [85/500] time 1.546 (1.568) data 0.000 (0.010) loss 0.7676 (1.0649) acc 78.1250 (72.2426) lr 4.6417e-04 eta 3:13:49 +epoch [36/50] batch [90/500] time 1.552 (1.568) data 0.000 (0.010) loss 1.0039 (1.0771) acc 87.5000 (72.1875) lr 4.6417e-04 eta 3:13:36 +epoch [36/50] batch [95/500] time 1.546 (1.567) data 0.000 (0.009) loss 1.1318 (1.0803) acc 78.1250 (72.2368) lr 4.6417e-04 eta 3:13:24 +epoch [36/50] batch [100/500] time 1.540 (1.566) data 0.000 (0.009) loss 0.6396 (1.0768) acc 78.1250 (72.2500) lr 4.6417e-04 eta 3:13:10 +epoch [36/50] batch [105/500] time 1.574 (1.566) data 0.001 (0.008) loss 0.3474 (1.0697) acc 93.7500 (72.5595) lr 4.6417e-04 eta 3:13:01 +epoch [36/50] batch [110/500] time 1.555 (1.565) data 0.000 (0.008) loss 0.8833 (1.0757) acc 78.1250 (72.4148) lr 4.6417e-04 eta 3:12:47 +epoch [36/50] batch [115/500] time 1.550 (1.565) data 0.000 (0.008) loss 1.0820 (1.0765) acc 62.5000 (72.3641) lr 4.6417e-04 eta 3:12:35 +epoch [36/50] batch [120/500] time 1.581 (1.565) data 0.001 (0.007) loss 1.2227 (1.0727) acc 65.6250 (72.3177) lr 4.6417e-04 eta 3:12:27 +epoch [36/50] batch [125/500] time 1.549 (1.565) data 0.000 (0.007) loss 1.9248 (1.0823) acc 65.6250 (72.4000) lr 4.6417e-04 eta 3:12:19 +epoch [36/50] batch [130/500] time 1.554 (1.564) data 0.001 (0.007) loss 1.0537 (1.0793) acc 75.0000 (72.4519) lr 4.6417e-04 eta 3:12:09 +epoch [36/50] batch [135/500] time 1.552 (1.564) data 0.000 (0.007) loss 1.0439 (1.0799) acc 68.7500 (72.4537) lr 4.6417e-04 eta 3:12:01 +epoch [36/50] batch [140/500] time 1.584 (1.565) data 0.000 (0.006) loss 1.2646 (1.0812) acc 71.8750 (72.6339) lr 4.6417e-04 eta 3:11:54 +epoch [36/50] batch [145/500] time 1.562 (1.564) data 0.000 (0.006) loss 1.1602 (1.0842) acc 71.8750 (72.5216) lr 4.6417e-04 eta 3:11:46 +epoch [36/50] batch [150/500] time 1.553 (1.564) data 0.000 (0.006) loss 1.0664 (1.0809) acc 78.1250 (72.6667) lr 4.6417e-04 eta 3:11:36 +epoch [36/50] batch [155/500] time 1.567 (1.564) data 0.000 (0.006) loss 0.4971 (1.0829) acc 84.3750 (72.6411) lr 4.6417e-04 eta 3:11:25 +epoch [36/50] batch [160/500] time 1.571 (1.564) data 0.001 (0.006) loss 1.1133 (1.0871) acc 78.1250 (72.6367) lr 4.6417e-04 eta 3:11:16 +epoch [36/50] batch [165/500] time 1.554 (1.564) data 0.000 (0.005) loss 0.7197 (1.0803) acc 84.3750 (72.6705) lr 4.6417e-04 eta 3:11:08 +epoch [36/50] batch [170/500] time 1.536 (1.563) data 0.000 (0.005) loss 1.0186 (1.0761) acc 78.1250 (72.7022) lr 4.6417e-04 eta 3:10:57 +epoch [36/50] batch [175/500] time 1.537 (1.563) data 0.000 (0.005) loss 0.9590 (1.0738) acc 68.7500 (72.6786) lr 4.6417e-04 eta 3:10:46 +epoch [36/50] batch [180/500] time 1.564 (1.564) data 0.000 (0.005) loss 1.2432 (1.0726) acc 71.8750 (72.6736) lr 4.6417e-04 eta 3:10:45 +epoch [36/50] batch [185/500] time 1.528 (1.563) data 0.000 (0.005) loss 1.6074 (1.0823) acc 62.5000 (72.5000) lr 4.6417e-04 eta 3:10:33 +epoch [36/50] batch [190/500] time 1.550 (1.563) data 0.000 (0.005) loss 0.7446 (1.0747) acc 78.1250 (72.6809) lr 4.6417e-04 eta 3:10:25 +epoch [36/50] batch [195/500] time 1.555 (1.563) data 0.000 (0.005) loss 1.2520 (1.0777) acc 75.0000 (72.6763) lr 4.6417e-04 eta 3:10:16 +epoch [36/50] batch [200/500] time 1.544 (1.563) data 0.000 (0.005) loss 1.0674 (1.0785) acc 78.1250 (72.7344) lr 4.6417e-04 eta 3:10:06 +epoch [36/50] batch [205/500] time 1.538 (1.563) data 0.000 (0.004) loss 0.4856 (1.0720) acc 84.3750 (72.9573) lr 4.6417e-04 eta 3:09:59 +epoch [36/50] batch [210/500] time 1.578 (1.563) data 0.000 (0.004) loss 1.1211 (1.0682) acc 62.5000 (73.0060) lr 4.6417e-04 eta 3:09:50 +epoch [36/50] batch [215/500] time 1.565 (1.563) data 0.000 (0.004) loss 1.4600 (1.0698) acc 59.3750 (72.9360) lr 4.6417e-04 eta 3:09:45 +epoch [36/50] batch [220/500] time 1.560 (1.563) data 0.000 (0.004) loss 1.0615 (1.0689) acc 71.8750 (72.9403) lr 4.6417e-04 eta 3:09:37 +epoch [36/50] batch [225/500] time 1.548 (1.563) data 0.000 (0.004) loss 1.0107 (1.0723) acc 68.7500 (72.8611) lr 4.6417e-04 eta 3:09:33 +epoch [36/50] batch [230/500] time 1.545 (1.563) data 0.000 (0.004) loss 0.7905 (1.0664) acc 78.1250 (72.9484) lr 4.6417e-04 eta 3:09:25 +epoch [36/50] batch [235/500] time 1.568 (1.563) data 0.000 (0.004) loss 1.2051 (1.0627) acc 75.0000 (73.0718) lr 4.6417e-04 eta 3:09:17 +epoch [36/50] batch [240/500] time 1.541 (1.563) data 0.000 (0.004) loss 1.5771 (1.0644) acc 65.6250 (73.0859) lr 4.6417e-04 eta 3:09:08 +epoch [36/50] batch [245/500] time 1.563 (1.563) data 0.000 (0.004) loss 0.9302 (1.0636) acc 81.2500 (73.1633) lr 4.6417e-04 eta 3:09:00 +epoch [36/50] batch [250/500] time 1.563 (1.563) data 0.000 (0.004) loss 0.9414 (1.0646) acc 68.7500 (73.1125) lr 4.6417e-04 eta 3:08:50 +epoch [36/50] batch [255/500] time 1.544 (1.563) data 0.000 (0.004) loss 1.3447 (1.0660) acc 68.7500 (73.1618) lr 4.6417e-04 eta 3:08:41 +epoch [36/50] batch [260/500] time 1.575 (1.563) data 0.000 (0.004) loss 0.9292 (1.0649) acc 71.8750 (73.1490) lr 4.6417e-04 eta 3:08:32 +epoch [36/50] batch [265/500] time 1.554 (1.563) data 0.000 (0.004) loss 0.6509 (1.0633) acc 81.2500 (73.1486) lr 4.6417e-04 eta 3:08:25 +epoch [36/50] batch [270/500] time 1.556 (1.562) data 0.001 (0.003) loss 1.0762 (1.0642) acc 68.7500 (73.1481) lr 4.6417e-04 eta 3:08:15 +epoch [36/50] batch [275/500] time 1.563 (1.562) data 0.001 (0.003) loss 1.4697 (1.0663) acc 68.7500 (73.1591) lr 4.6417e-04 eta 3:08:07 +epoch [36/50] batch [280/500] time 1.556 (1.562) data 0.001 (0.003) loss 1.0557 (1.0660) acc 68.7500 (73.1696) lr 4.6417e-04 eta 3:07:58 +epoch [36/50] batch [285/500] time 1.554 (1.562) data 0.000 (0.003) loss 1.0830 (1.0667) acc 75.0000 (73.1360) lr 4.6417e-04 eta 3:07:50 +epoch [36/50] batch [290/500] time 1.561 (1.562) data 0.000 (0.003) loss 1.0928 (1.0681) acc 65.6250 (73.1573) lr 4.6417e-04 eta 3:07:42 +epoch [36/50] batch [295/500] time 1.588 (1.562) data 0.000 (0.003) loss 1.0918 (1.0648) acc 75.0000 (73.2309) lr 4.6417e-04 eta 3:07:34 +epoch [36/50] batch [300/500] time 1.534 (1.562) data 0.000 (0.003) loss 1.0195 (1.0626) acc 81.2500 (73.2917) lr 4.6417e-04 eta 3:07:27 +epoch [36/50] batch [305/500] time 1.550 (1.562) data 0.000 (0.003) loss 0.5649 (1.0653) acc 78.1250 (73.2582) lr 4.6417e-04 eta 3:07:19 +epoch [36/50] batch [310/500] time 1.578 (1.562) data 0.000 (0.003) loss 1.0762 (1.0651) acc 78.1250 (73.2661) lr 4.6417e-04 eta 3:07:09 +epoch [36/50] batch [315/500] time 1.557 (1.562) data 0.000 (0.003) loss 1.2139 (1.0670) acc 53.1250 (73.2639) lr 4.6417e-04 eta 3:07:01 +epoch [36/50] batch [320/500] time 1.553 (1.562) data 0.000 (0.003) loss 1.1094 (1.0638) acc 59.3750 (73.3398) lr 4.6417e-04 eta 3:06:54 +epoch [36/50] batch [325/500] time 1.567 (1.562) data 0.000 (0.003) loss 1.0850 (1.0666) acc 78.1250 (73.2692) lr 4.6417e-04 eta 3:06:48 +epoch [36/50] batch [330/500] time 1.535 (1.562) data 0.000 (0.003) loss 0.9062 (1.0630) acc 75.0000 (73.3523) lr 4.6417e-04 eta 3:06:40 +epoch [36/50] batch [335/500] time 1.544 (1.562) data 0.000 (0.003) loss 0.6934 (1.0602) acc 84.3750 (73.3862) lr 4.6417e-04 eta 3:06:31 +epoch [36/50] batch [340/500] time 1.548 (1.562) data 0.000 (0.003) loss 0.9473 (1.0627) acc 78.1250 (73.3732) lr 4.6417e-04 eta 3:06:22 +epoch [36/50] batch [345/500] time 1.558 (1.562) data 0.000 (0.003) loss 1.5459 (1.0659) acc 68.7500 (73.2880) lr 4.6417e-04 eta 3:06:14 +epoch [36/50] batch [350/500] time 1.552 (1.562) data 0.000 (0.003) loss 1.0820 (1.0662) acc 75.0000 (73.2589) lr 4.6417e-04 eta 3:06:06 +epoch [36/50] batch [355/500] time 1.563 (1.562) data 0.000 (0.003) loss 0.8989 (1.0620) acc 84.3750 (73.3275) lr 4.6417e-04 eta 3:05:58 +epoch [36/50] batch [360/500] time 1.555 (1.562) data 0.000 (0.003) loss 1.2607 (1.0614) acc 78.1250 (73.3420) lr 4.6417e-04 eta 3:05:50 +epoch [36/50] batch [365/500] time 1.556 (1.562) data 0.001 (0.003) loss 0.9663 (1.0609) acc 71.8750 (73.3134) lr 4.6417e-04 eta 3:05:42 +epoch [36/50] batch [370/500] time 1.565 (1.562) data 0.000 (0.003) loss 1.7070 (1.0608) acc 65.6250 (73.3108) lr 4.6417e-04 eta 3:05:36 +epoch [36/50] batch [375/500] time 1.540 (1.562) data 0.000 (0.003) loss 1.6289 (1.0605) acc 56.2500 (73.2917) lr 4.6417e-04 eta 3:05:29 +epoch [36/50] batch [380/500] time 1.552 (1.562) data 0.000 (0.003) loss 0.8726 (1.0637) acc 75.0000 (73.1990) lr 4.6417e-04 eta 3:05:20 +epoch [36/50] batch [385/500] time 1.593 (1.562) data 0.000 (0.003) loss 0.7339 (1.0618) acc 84.3750 (73.2224) lr 4.6417e-04 eta 3:05:12 +epoch [36/50] batch [390/500] time 1.573 (1.562) data 0.000 (0.003) loss 0.9937 (1.0619) acc 78.1250 (73.2212) lr 4.6417e-04 eta 3:05:05 +epoch [36/50] batch [395/500] time 1.577 (1.562) data 0.000 (0.002) loss 1.4277 (1.0626) acc 62.5000 (73.1883) lr 4.6417e-04 eta 3:04:58 +epoch [36/50] batch [400/500] time 1.560 (1.562) data 0.000 (0.002) loss 0.8774 (1.0613) acc 75.0000 (73.2031) lr 4.6417e-04 eta 3:04:51 +epoch [36/50] batch [405/500] time 1.540 (1.562) data 0.000 (0.002) loss 0.9429 (1.0612) acc 75.0000 (73.1559) lr 4.6417e-04 eta 3:04:42 +epoch [36/50] batch [410/500] time 1.568 (1.562) data 0.000 (0.002) loss 1.2461 (1.0610) acc 75.0000 (73.1479) lr 4.6417e-04 eta 3:04:35 +epoch [36/50] batch [415/500] time 1.567 (1.562) data 0.001 (0.002) loss 0.3999 (1.0565) acc 87.5000 (73.2530) lr 4.6417e-04 eta 3:04:26 +epoch [36/50] batch [420/500] time 1.581 (1.562) data 0.001 (0.002) loss 0.6108 (1.0561) acc 78.1250 (73.2738) lr 4.6417e-04 eta 3:04:19 +epoch [36/50] batch [425/500] time 1.568 (1.562) data 0.000 (0.002) loss 0.6543 (1.0563) acc 81.2500 (73.2794) lr 4.6417e-04 eta 3:04:11 +epoch [36/50] batch [430/500] time 1.546 (1.562) data 0.000 (0.002) loss 1.0977 (1.0587) acc 71.8750 (73.2485) lr 4.6417e-04 eta 3:04:02 +epoch [36/50] batch [435/500] time 1.546 (1.562) data 0.000 (0.002) loss 1.1250 (1.0570) acc 68.7500 (73.2830) lr 4.6417e-04 eta 3:03:53 +epoch [36/50] batch [440/500] time 1.550 (1.562) data 0.000 (0.002) loss 1.1816 (1.0584) acc 71.8750 (73.2670) lr 4.6417e-04 eta 3:03:45 +epoch [36/50] batch [445/500] time 1.560 (1.561) data 0.000 (0.002) loss 1.4160 (1.0637) acc 65.6250 (73.1531) lr 4.6417e-04 eta 3:03:36 +epoch [36/50] batch [450/500] time 1.571 (1.562) data 0.000 (0.002) loss 0.6079 (1.0623) acc 81.2500 (73.1875) lr 4.6417e-04 eta 3:03:28 +epoch [36/50] batch [455/500] time 1.540 (1.562) data 0.000 (0.002) loss 1.3086 (1.0623) acc 65.6250 (73.1593) lr 4.6417e-04 eta 3:03:21 +epoch [36/50] batch [460/500] time 1.541 (1.562) data 0.000 (0.002) loss 0.9561 (1.0616) acc 71.8750 (73.1454) lr 4.6417e-04 eta 3:03:13 +epoch [36/50] batch [465/500] time 1.653 (1.562) data 0.000 (0.002) loss 0.7847 (1.0636) acc 71.8750 (73.1183) lr 4.6417e-04 eta 3:03:07 +epoch [36/50] batch [470/500] time 1.605 (1.562) data 0.000 (0.002) loss 0.5542 (1.0623) acc 87.5000 (73.1782) lr 4.6417e-04 eta 3:03:01 +epoch [36/50] batch [475/500] time 1.582 (1.562) data 0.000 (0.002) loss 0.6221 (1.0628) acc 81.2500 (73.1842) lr 4.6417e-04 eta 3:02:54 +epoch [36/50] batch [480/500] time 1.564 (1.562) data 0.000 (0.002) loss 0.7305 (1.0614) acc 84.3750 (73.2422) lr 4.6417e-04 eta 3:02:47 +epoch [36/50] batch [485/500] time 1.535 (1.562) data 0.001 (0.002) loss 0.9097 (1.0614) acc 78.1250 (73.2281) lr 4.6417e-04 eta 3:02:39 +epoch [36/50] batch [490/500] time 1.564 (1.562) data 0.000 (0.002) loss 1.0547 (1.0617) acc 78.1250 (73.2462) lr 4.6417e-04 eta 3:02:31 +epoch [36/50] batch [495/500] time 1.552 (1.562) data 0.000 (0.002) loss 1.0273 (1.0627) acc 78.1250 (73.2576) lr 4.6417e-04 eta 3:02:23 +epoch [36/50] batch [500/500] time 1.567 (1.562) data 0.000 (0.002) loss 1.0732 (1.0617) acc 71.8750 (73.2750) lr 4.1221e-04 eta 3:02:15 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,033 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.6% +epoch [37/50] batch [5/500] time 1.577 (1.663) data 0.001 (0.145) loss 0.8506 (1.0982) acc 78.1250 (73.7500) lr 4.1221e-04 eta 3:13:50 +epoch [37/50] batch [10/500] time 1.532 (1.608) data 0.001 (0.073) loss 0.5952 (1.0958) acc 93.7500 (74.0625) lr 4.1221e-04 eta 3:07:18 +epoch [37/50] batch [15/500] time 1.540 (1.590) data 0.000 (0.049) loss 0.7021 (1.0501) acc 75.0000 (74.3750) lr 4.1221e-04 eta 3:05:08 +epoch [37/50] batch [20/500] time 1.530 (1.589) data 0.001 (0.037) loss 0.7085 (0.9700) acc 87.5000 (76.2500) lr 4.1221e-04 eta 3:04:52 +epoch [37/50] batch [25/500] time 1.554 (1.585) data 0.000 (0.029) loss 1.0176 (0.9620) acc 78.1250 (76.5000) lr 4.1221e-04 eta 3:04:14 +epoch [37/50] batch [30/500] time 1.576 (1.581) data 0.001 (0.025) loss 1.1113 (0.9409) acc 68.7500 (76.4583) lr 4.1221e-04 eta 3:03:40 +epoch [37/50] batch [35/500] time 1.569 (1.578) data 0.000 (0.021) loss 0.5474 (0.9629) acc 78.1250 (75.8036) lr 4.1221e-04 eta 3:03:07 +epoch [37/50] batch [40/500] time 1.555 (1.574) data 0.000 (0.019) loss 1.5645 (1.0165) acc 65.6250 (74.7656) lr 4.1221e-04 eta 3:02:36 +epoch [37/50] batch [45/500] time 1.564 (1.574) data 0.000 (0.017) loss 0.8950 (1.0550) acc 81.2500 (74.2361) lr 4.1221e-04 eta 3:02:25 +epoch [37/50] batch [50/500] time 1.543 (1.572) data 0.000 (0.015) loss 1.0732 (1.0440) acc 71.8750 (74.1875) lr 4.1221e-04 eta 3:02:04 +epoch [37/50] batch [55/500] time 1.566 (1.570) data 0.000 (0.014) loss 1.2275 (1.0543) acc 65.6250 (74.0909) lr 4.1221e-04 eta 3:01:42 +epoch [37/50] batch [60/500] time 1.580 (1.569) data 0.001 (0.013) loss 1.0049 (1.0434) acc 68.7500 (74.2708) lr 4.1221e-04 eta 3:01:31 +epoch [37/50] batch [65/500] time 1.561 (1.568) data 0.000 (0.012) loss 1.2168 (1.0500) acc 65.6250 (73.6538) lr 4.1221e-04 eta 3:01:16 +epoch [37/50] batch [70/500] time 1.571 (1.568) data 0.000 (0.011) loss 0.6606 (1.0266) acc 84.3750 (74.2857) lr 4.1221e-04 eta 3:01:08 +epoch [37/50] batch [75/500] time 1.559 (1.568) data 0.000 (0.010) loss 0.7202 (1.0226) acc 81.2500 (74.3750) lr 4.1221e-04 eta 3:00:55 +epoch [37/50] batch [80/500] time 1.553 (1.567) data 0.000 (0.009) loss 0.4463 (1.0093) acc 93.7500 (74.5312) lr 4.1221e-04 eta 3:00:43 +epoch [37/50] batch [85/500] time 1.550 (1.566) data 0.000 (0.009) loss 1.3213 (1.0182) acc 71.8750 (74.1912) lr 4.1221e-04 eta 3:00:31 +epoch [37/50] batch [90/500] time 1.547 (1.565) data 0.000 (0.008) loss 1.3311 (1.0166) acc 68.7500 (74.2014) lr 4.1221e-04 eta 3:00:13 +epoch [37/50] batch [95/500] time 1.551 (1.564) data 0.000 (0.008) loss 1.2256 (1.0219) acc 75.0000 (74.1447) lr 4.1221e-04 eta 2:59:59 +epoch [37/50] batch [100/500] time 1.578 (1.564) data 0.000 (0.008) loss 1.5107 (1.0212) acc 68.7500 (74.0000) lr 4.1221e-04 eta 2:59:54 +epoch [37/50] batch [105/500] time 1.576 (1.564) data 0.000 (0.007) loss 1.1279 (1.0285) acc 71.8750 (73.8393) lr 4.1221e-04 eta 2:59:47 +epoch [37/50] batch [110/500] time 1.571 (1.564) data 0.000 (0.007) loss 0.9307 (1.0250) acc 71.8750 (73.8068) lr 4.1221e-04 eta 2:59:37 +epoch [37/50] batch [115/500] time 1.665 (1.566) data 0.000 (0.007) loss 0.7739 (1.0207) acc 81.2500 (74.1033) lr 4.1221e-04 eta 2:59:39 +epoch [37/50] batch [120/500] time 1.549 (1.565) data 0.000 (0.006) loss 1.3574 (1.0306) acc 75.0000 (73.8281) lr 4.1221e-04 eta 2:59:26 +epoch [37/50] batch [125/500] time 1.559 (1.564) data 0.000 (0.006) loss 0.7617 (1.0216) acc 78.1250 (73.9500) lr 4.1221e-04 eta 2:59:13 +epoch [37/50] batch [130/500] time 1.554 (1.564) data 0.000 (0.006) loss 1.2979 (1.0296) acc 68.7500 (73.8221) lr 4.1221e-04 eta 2:59:05 +epoch [37/50] batch [135/500] time 1.582 (1.564) data 0.000 (0.006) loss 0.9102 (1.0244) acc 75.0000 (73.9815) lr 4.1221e-04 eta 2:58:58 +epoch [37/50] batch [140/500] time 1.557 (1.564) data 0.001 (0.006) loss 1.6572 (1.0267) acc 56.2500 (73.8839) lr 4.1221e-04 eta 2:58:51 +epoch [37/50] batch [145/500] time 1.537 (1.564) data 0.000 (0.005) loss 0.5703 (1.0255) acc 81.2500 (73.9655) lr 4.1221e-04 eta 2:58:39 +epoch [37/50] batch [150/500] time 1.548 (1.563) data 0.000 (0.005) loss 1.2119 (1.0404) acc 71.8750 (73.7917) lr 4.1221e-04 eta 2:58:27 +epoch [37/50] batch [155/500] time 1.556 (1.563) data 0.000 (0.005) loss 1.4072 (1.0467) acc 68.7500 (73.8306) lr 4.1221e-04 eta 2:58:17 +epoch [37/50] batch [160/500] time 1.561 (1.563) data 0.000 (0.005) loss 0.7109 (1.0441) acc 75.0000 (73.8477) lr 4.1221e-04 eta 2:58:13 +epoch [37/50] batch [165/500] time 1.551 (1.563) data 0.000 (0.005) loss 0.8042 (1.0395) acc 78.1250 (73.8447) lr 4.1221e-04 eta 2:58:04 +epoch [37/50] batch [170/500] time 1.552 (1.563) data 0.000 (0.005) loss 1.0020 (1.0354) acc 75.0000 (73.9706) lr 4.1221e-04 eta 2:57:57 +epoch [37/50] batch [175/500] time 1.525 (1.563) data 0.000 (0.005) loss 0.7856 (1.0331) acc 81.2500 (74.0179) lr 4.1221e-04 eta 2:57:46 +epoch [37/50] batch [180/500] time 1.561 (1.563) data 0.000 (0.004) loss 0.5566 (1.0284) acc 81.2500 (74.1493) lr 4.1221e-04 eta 2:57:37 +epoch [37/50] batch [185/500] time 1.570 (1.563) data 0.000 (0.004) loss 1.1777 (1.0315) acc 68.7500 (74.0709) lr 4.1221e-04 eta 2:57:30 +epoch [37/50] batch [190/500] time 1.578 (1.563) data 0.000 (0.004) loss 1.0371 (1.0363) acc 65.6250 (73.8980) lr 4.1221e-04 eta 2:57:21 +epoch [37/50] batch [195/500] time 1.539 (1.562) data 0.000 (0.004) loss 1.1426 (1.0340) acc 78.1250 (74.0385) lr 4.1221e-04 eta 2:57:12 +epoch [37/50] batch [200/500] time 1.577 (1.562) data 0.000 (0.004) loss 0.9321 (1.0339) acc 75.0000 (74.0156) lr 4.1221e-04 eta 2:57:04 +epoch [37/50] batch [205/500] time 1.549 (1.562) data 0.000 (0.004) loss 1.2959 (1.0257) acc 75.0000 (74.1921) lr 4.1221e-04 eta 2:56:55 +epoch [37/50] batch [210/500] time 1.545 (1.562) data 0.000 (0.004) loss 0.8442 (1.0226) acc 68.7500 (74.2411) lr 4.1221e-04 eta 2:56:44 +epoch [37/50] batch [215/500] time 1.552 (1.562) data 0.000 (0.004) loss 0.9956 (1.0221) acc 81.2500 (74.2442) lr 4.1221e-04 eta 2:56:37 +epoch [37/50] batch [220/500] time 1.537 (1.562) data 0.000 (0.004) loss 1.0361 (1.0191) acc 59.3750 (74.2330) lr 4.1221e-04 eta 2:56:27 +epoch [37/50] batch [225/500] time 1.551 (1.562) data 0.000 (0.004) loss 1.0635 (1.0205) acc 78.1250 (74.1528) lr 4.1221e-04 eta 2:56:20 +epoch [37/50] batch [230/500] time 1.538 (1.561) data 0.000 (0.004) loss 1.2012 (1.0169) acc 75.0000 (74.1576) lr 4.1221e-04 eta 2:56:11 +epoch [37/50] batch [235/500] time 1.552 (1.561) data 0.000 (0.003) loss 1.0596 (1.0162) acc 78.1250 (74.1622) lr 4.1221e-04 eta 2:56:02 +epoch [37/50] batch [240/500] time 1.592 (1.561) data 0.000 (0.003) loss 0.7168 (1.0164) acc 78.1250 (74.1536) lr 4.1221e-04 eta 2:55:55 +epoch [37/50] batch [245/500] time 1.548 (1.561) data 0.000 (0.003) loss 0.8716 (1.0211) acc 75.0000 (74.1071) lr 4.1221e-04 eta 2:55:46 +epoch [37/50] batch [250/500] time 1.543 (1.561) data 0.001 (0.003) loss 0.6089 (1.0196) acc 78.1250 (74.1000) lr 4.1221e-04 eta 2:55:37 +epoch [37/50] batch [255/500] time 1.546 (1.561) data 0.000 (0.003) loss 1.1816 (1.0225) acc 62.5000 (74.0074) lr 4.1221e-04 eta 2:55:28 +epoch [37/50] batch [260/500] time 1.582 (1.561) data 0.000 (0.003) loss 0.9028 (1.0231) acc 71.8750 (73.9784) lr 4.1221e-04 eta 2:55:22 +epoch [37/50] batch [265/500] time 1.545 (1.561) data 0.000 (0.003) loss 0.5254 (1.0264) acc 78.1250 (73.9151) lr 4.1221e-04 eta 2:55:13 +epoch [37/50] batch [270/500] time 1.547 (1.561) data 0.000 (0.003) loss 1.4043 (1.0267) acc 71.8750 (73.9005) lr 4.1221e-04 eta 2:55:03 +epoch [37/50] batch [275/500] time 1.567 (1.561) data 0.000 (0.003) loss 1.2480 (1.0267) acc 75.0000 (73.9205) lr 4.1221e-04 eta 2:54:55 +epoch [37/50] batch [280/500] time 1.530 (1.560) data 0.000 (0.003) loss 1.1660 (1.0289) acc 68.7500 (73.8839) lr 4.1221e-04 eta 2:54:45 +epoch [37/50] batch [285/500] time 1.547 (1.560) data 0.000 (0.003) loss 0.4819 (1.0250) acc 81.2500 (73.9803) lr 4.1221e-04 eta 2:54:36 +epoch [37/50] batch [290/500] time 1.554 (1.560) data 0.000 (0.003) loss 0.7485 (1.0253) acc 75.0000 (73.9763) lr 4.1221e-04 eta 2:54:29 +epoch [37/50] batch [295/500] time 1.559 (1.560) data 0.000 (0.003) loss 0.7061 (1.0248) acc 87.5000 (74.0678) lr 4.1221e-04 eta 2:54:20 +epoch [37/50] batch [300/500] time 1.566 (1.560) data 0.001 (0.003) loss 2.3750 (1.0288) acc 59.3750 (74.0417) lr 4.1221e-04 eta 2:54:11 +epoch [37/50] batch [305/500] time 1.564 (1.560) data 0.000 (0.003) loss 1.0742 (1.0286) acc 75.0000 (74.0061) lr 4.1221e-04 eta 2:54:06 +epoch [37/50] batch [310/500] time 1.539 (1.560) data 0.000 (0.003) loss 0.5698 (1.0319) acc 87.5000 (73.9819) lr 4.1221e-04 eta 2:53:57 +epoch [37/50] batch [315/500] time 1.554 (1.560) data 0.000 (0.003) loss 0.7192 (1.0307) acc 84.3750 (74.0774) lr 4.1221e-04 eta 2:53:48 +epoch [37/50] batch [320/500] time 1.537 (1.560) data 0.000 (0.003) loss 1.5078 (1.0304) acc 65.6250 (74.0234) lr 4.1221e-04 eta 2:53:39 +epoch [37/50] batch [325/500] time 1.537 (1.560) data 0.000 (0.003) loss 0.9604 (1.0305) acc 75.0000 (73.9904) lr 4.1221e-04 eta 2:53:30 +epoch [37/50] batch [330/500] time 1.529 (1.559) data 0.000 (0.003) loss 0.6689 (1.0288) acc 81.2500 (74.0246) lr 4.1221e-04 eta 2:53:21 +epoch [37/50] batch [335/500] time 1.570 (1.559) data 0.000 (0.003) loss 1.1387 (1.0275) acc 68.7500 (74.0672) lr 4.1221e-04 eta 2:53:14 +epoch [37/50] batch [340/500] time 1.555 (1.559) data 0.000 (0.003) loss 1.2734 (1.0272) acc 78.1250 (74.0717) lr 4.1221e-04 eta 2:53:04 +epoch [37/50] batch [345/500] time 1.571 (1.559) data 0.000 (0.002) loss 1.0752 (1.0276) acc 78.1250 (74.0489) lr 4.1221e-04 eta 2:52:57 +epoch [37/50] batch [350/500] time 1.542 (1.559) data 0.000 (0.002) loss 1.9346 (1.0287) acc 53.1250 (73.9821) lr 4.1221e-04 eta 2:52:48 +epoch [37/50] batch [355/500] time 1.575 (1.559) data 0.000 (0.002) loss 1.3115 (1.0290) acc 75.0000 (73.9789) lr 4.1221e-04 eta 2:52:41 +epoch [37/50] batch [360/500] time 1.553 (1.559) data 0.000 (0.002) loss 1.0771 (1.0291) acc 68.7500 (73.9931) lr 4.1221e-04 eta 2:52:33 +epoch [37/50] batch [365/500] time 1.559 (1.559) data 0.000 (0.002) loss 1.4209 (1.0322) acc 62.5000 (73.9726) lr 4.1221e-04 eta 2:52:25 +epoch [37/50] batch [370/500] time 1.570 (1.559) data 0.000 (0.002) loss 1.2773 (1.0344) acc 71.8750 (73.9189) lr 4.1221e-04 eta 2:52:18 +epoch [37/50] batch [375/500] time 1.587 (1.560) data 0.000 (0.002) loss 0.9966 (1.0374) acc 71.8750 (73.8667) lr 4.1221e-04 eta 2:52:11 +epoch [37/50] batch [380/500] time 1.568 (1.560) data 0.001 (0.002) loss 0.6099 (1.0350) acc 78.1250 (73.8980) lr 4.1221e-04 eta 2:52:05 +epoch [37/50] batch [385/500] time 1.548 (1.559) data 0.000 (0.002) loss 1.0908 (1.0336) acc 62.5000 (73.8555) lr 4.1221e-04 eta 2:51:56 +epoch [37/50] batch [390/500] time 1.551 (1.559) data 0.000 (0.002) loss 1.2803 (1.0361) acc 71.8750 (73.8462) lr 4.1221e-04 eta 2:51:48 +epoch [37/50] batch [395/500] time 1.561 (1.559) data 0.000 (0.002) loss 0.9570 (1.0334) acc 84.3750 (73.9241) lr 4.1221e-04 eta 2:51:40 +epoch [37/50] batch [400/500] time 1.557 (1.559) data 0.001 (0.002) loss 0.8491 (1.0342) acc 68.7500 (73.8672) lr 4.1221e-04 eta 2:51:32 +epoch [37/50] batch [405/500] time 1.562 (1.560) data 0.000 (0.002) loss 0.8130 (1.0334) acc 68.7500 (73.8426) lr 4.1221e-04 eta 2:51:26 +epoch [37/50] batch [410/500] time 1.559 (1.560) data 0.000 (0.002) loss 1.2646 (1.0320) acc 62.5000 (73.8415) lr 4.1221e-04 eta 2:51:17 +epoch [37/50] batch [415/500] time 1.563 (1.560) data 0.001 (0.002) loss 2.3145 (1.0327) acc 50.0000 (73.8178) lr 4.1221e-04 eta 2:51:10 +epoch [37/50] batch [420/500] time 1.571 (1.560) data 0.001 (0.002) loss 0.7598 (1.0355) acc 78.1250 (73.7500) lr 4.1221e-04 eta 2:51:03 +epoch [37/50] batch [425/500] time 1.563 (1.560) data 0.000 (0.002) loss 1.2451 (1.0356) acc 68.7500 (73.7500) lr 4.1221e-04 eta 2:50:54 +epoch [37/50] batch [430/500] time 1.565 (1.560) data 0.001 (0.002) loss 0.7271 (1.0351) acc 75.0000 (73.7573) lr 4.1221e-04 eta 2:50:45 +epoch [37/50] batch [435/500] time 1.577 (1.560) data 0.000 (0.002) loss 1.0371 (1.0330) acc 65.6250 (73.7787) lr 4.1221e-04 eta 2:50:38 +epoch [37/50] batch [440/500] time 1.567 (1.560) data 0.000 (0.002) loss 1.3848 (1.0346) acc 71.8750 (73.7571) lr 4.1221e-04 eta 2:50:31 +epoch [37/50] batch [445/500] time 1.648 (1.560) data 0.001 (0.002) loss 1.1445 (1.0342) acc 71.8750 (73.7570) lr 4.1221e-04 eta 2:50:25 +epoch [37/50] batch [450/500] time 1.582 (1.560) data 0.001 (0.002) loss 1.1406 (1.0341) acc 68.7500 (73.7222) lr 4.1221e-04 eta 2:50:17 +epoch [37/50] batch [455/500] time 1.555 (1.560) data 0.000 (0.002) loss 0.6816 (1.0334) acc 78.1250 (73.7431) lr 4.1221e-04 eta 2:50:09 +epoch [37/50] batch [460/500] time 1.553 (1.560) data 0.000 (0.002) loss 1.2227 (1.0328) acc 62.5000 (73.7228) lr 4.1221e-04 eta 2:50:02 +epoch [37/50] batch [465/500] time 1.580 (1.560) data 0.001 (0.002) loss 0.9370 (1.0316) acc 68.7500 (73.7097) lr 4.1221e-04 eta 2:49:54 +epoch [37/50] batch [470/500] time 1.538 (1.560) data 0.001 (0.002) loss 1.0742 (1.0319) acc 78.1250 (73.7434) lr 4.1221e-04 eta 2:49:46 +epoch [37/50] batch [475/500] time 1.544 (1.560) data 0.000 (0.002) loss 1.3301 (1.0319) acc 71.8750 (73.7961) lr 4.1221e-04 eta 2:49:38 +epoch [37/50] batch [480/500] time 1.552 (1.560) data 0.000 (0.002) loss 1.0527 (1.0318) acc 81.2500 (73.8021) lr 4.1221e-04 eta 2:49:30 +epoch [37/50] batch [485/500] time 1.552 (1.560) data 0.001 (0.002) loss 2.1230 (1.0366) acc 59.3750 (73.7242) lr 4.1221e-04 eta 2:49:21 +epoch [37/50] batch [490/500] time 1.523 (1.560) data 0.000 (0.002) loss 0.8955 (1.0367) acc 71.8750 (73.6798) lr 4.1221e-04 eta 2:49:12 +epoch [37/50] batch [495/500] time 1.542 (1.559) data 0.000 (0.002) loss 0.6729 (1.0350) acc 75.0000 (73.6869) lr 4.1221e-04 eta 2:49:03 +epoch [37/50] batch [500/500] time 1.546 (1.559) data 0.000 (0.002) loss 1.1387 (1.0377) acc 65.6250 (73.6188) lr 3.6258e-04 eta 2:48:55 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,020 +* accuracy: 78.0% +* error: 22.0% +* macro_f1: 77.6% +epoch [38/50] batch [5/500] time 1.531 (1.719) data 0.000 (0.200) loss 0.8770 (0.8992) acc 81.2500 (80.0000) lr 3.6258e-04 eta 3:06:05 +epoch [38/50] batch [10/500] time 1.543 (1.637) data 0.000 (0.100) loss 1.2969 (0.9837) acc 68.7500 (77.1875) lr 3.6258e-04 eta 2:57:06 +epoch [38/50] batch [15/500] time 1.556 (1.608) data 0.000 (0.067) loss 0.9521 (0.9674) acc 75.0000 (76.6667) lr 3.6258e-04 eta 2:53:46 +epoch [38/50] batch [20/500] time 1.542 (1.595) data 0.000 (0.050) loss 0.6992 (1.0143) acc 78.1250 (75.6250) lr 3.6258e-04 eta 2:52:13 +epoch [38/50] batch [25/500] time 1.564 (1.587) data 0.001 (0.040) loss 1.0713 (1.0393) acc 71.8750 (74.8750) lr 3.6258e-04 eta 2:51:15 +epoch [38/50] batch [30/500] time 1.548 (1.580) data 0.000 (0.034) loss 1.3330 (1.0667) acc 65.6250 (73.7500) lr 3.6258e-04 eta 2:50:24 +epoch [38/50] batch [35/500] time 1.555 (1.578) data 0.000 (0.029) loss 1.2881 (1.0721) acc 71.8750 (73.7500) lr 3.6258e-04 eta 2:49:59 +epoch [38/50] batch [40/500] time 1.529 (1.574) data 0.001 (0.025) loss 0.9824 (1.0729) acc 81.2500 (73.5156) lr 3.6258e-04 eta 2:49:27 +epoch [38/50] batch [45/500] time 1.553 (1.573) data 0.000 (0.023) loss 0.7388 (1.0479) acc 75.0000 (73.8194) lr 3.6258e-04 eta 2:49:16 +epoch [38/50] batch [50/500] time 1.565 (1.573) data 0.000 (0.020) loss 1.0791 (1.0578) acc 75.0000 (74.1250) lr 3.6258e-04 eta 2:49:03 +epoch [38/50] batch [55/500] time 1.577 (1.572) data 0.000 (0.019) loss 1.1719 (1.0575) acc 78.1250 (74.4886) lr 3.6258e-04 eta 2:48:54 +epoch [38/50] batch [60/500] time 1.557 (1.571) data 0.000 (0.017) loss 1.1328 (1.0440) acc 62.5000 (74.7396) lr 3.6258e-04 eta 2:48:35 +epoch [38/50] batch [65/500] time 1.564 (1.569) data 0.000 (0.016) loss 1.6172 (1.0464) acc 68.7500 (74.8077) lr 3.6258e-04 eta 2:48:18 +epoch [38/50] batch [70/500] time 1.575 (1.570) data 0.000 (0.015) loss 0.4858 (1.0433) acc 90.6250 (74.7768) lr 3.6258e-04 eta 2:48:11 +epoch [38/50] batch [75/500] time 1.570 (1.569) data 0.000 (0.014) loss 1.0098 (1.0312) acc 71.8750 (74.9167) lr 3.6258e-04 eta 2:48:01 +epoch [38/50] batch [80/500] time 1.555 (1.568) data 0.000 (0.013) loss 1.7510 (1.0457) acc 53.1250 (74.5703) lr 3.6258e-04 eta 2:47:46 +epoch [38/50] batch [85/500] time 1.541 (1.567) data 0.000 (0.012) loss 0.9053 (1.0418) acc 71.8750 (74.4485) lr 3.6258e-04 eta 2:47:32 +epoch [38/50] batch [90/500] time 1.552 (1.566) data 0.000 (0.012) loss 0.9819 (1.0479) acc 71.8750 (74.3750) lr 3.6258e-04 eta 2:47:18 +epoch [38/50] batch [95/500] time 1.568 (1.566) data 0.000 (0.011) loss 1.0986 (1.0496) acc 68.7500 (74.1447) lr 3.6258e-04 eta 2:47:10 +epoch [38/50] batch [100/500] time 1.559 (1.566) data 0.000 (0.010) loss 0.9526 (1.0606) acc 84.3750 (74.0000) lr 3.6258e-04 eta 2:47:00 +epoch [38/50] batch [105/500] time 1.565 (1.565) data 0.001 (0.010) loss 1.1016 (1.0674) acc 68.7500 (73.7798) lr 3.6258e-04 eta 2:46:51 +epoch [38/50] batch [110/500] time 1.543 (1.565) data 0.000 (0.010) loss 0.7969 (1.0592) acc 71.8750 (73.8352) lr 3.6258e-04 eta 2:46:42 +epoch [38/50] batch [115/500] time 1.554 (1.565) data 0.001 (0.009) loss 1.0898 (1.0508) acc 75.0000 (74.1304) lr 3.6258e-04 eta 2:46:31 +epoch [38/50] batch [120/500] time 1.556 (1.565) data 0.000 (0.009) loss 0.4893 (1.0385) acc 81.2500 (74.1927) lr 3.6258e-04 eta 2:46:22 +epoch [38/50] batch [125/500] time 1.556 (1.564) data 0.000 (0.008) loss 0.9507 (1.0504) acc 81.2500 (74.1000) lr 3.6258e-04 eta 2:46:11 +epoch [38/50] batch [130/500] time 1.553 (1.564) data 0.000 (0.008) loss 1.4746 (1.0550) acc 65.6250 (74.0865) lr 3.6258e-04 eta 2:46:00 +epoch [38/50] batch [135/500] time 1.554 (1.563) data 0.000 (0.008) loss 1.2705 (1.0615) acc 53.1250 (73.9120) lr 3.6258e-04 eta 2:45:51 +epoch [38/50] batch [140/500] time 1.560 (1.563) data 0.001 (0.008) loss 1.3281 (1.0610) acc 71.8750 (73.9286) lr 3.6258e-04 eta 2:45:43 +epoch [38/50] batch [145/500] time 1.561 (1.564) data 0.000 (0.007) loss 1.1543 (1.0555) acc 65.6250 (73.9009) lr 3.6258e-04 eta 2:45:40 +epoch [38/50] batch [150/500] time 1.560 (1.564) data 0.000 (0.007) loss 1.1973 (1.0536) acc 78.1250 (73.8958) lr 3.6258e-04 eta 2:45:30 +epoch [38/50] batch [155/500] time 1.568 (1.564) data 0.001 (0.007) loss 0.9268 (1.0565) acc 75.0000 (73.8306) lr 3.6258e-04 eta 2:45:21 +epoch [38/50] batch [160/500] time 1.554 (1.564) data 0.000 (0.007) loss 0.4839 (1.0504) acc 93.7500 (73.9453) lr 3.6258e-04 eta 2:45:14 +epoch [38/50] batch [165/500] time 1.578 (1.564) data 0.001 (0.007) loss 0.9185 (1.0481) acc 78.1250 (74.0720) lr 3.6258e-04 eta 2:45:07 +epoch [38/50] batch [170/500] time 1.584 (1.564) data 0.001 (0.006) loss 1.4873 (1.0543) acc 71.8750 (73.7684) lr 3.6258e-04 eta 2:45:00 +epoch [38/50] batch [175/500] time 1.570 (1.564) data 0.001 (0.006) loss 1.3154 (1.0500) acc 71.8750 (73.7500) lr 3.6258e-04 eta 2:44:53 +epoch [38/50] batch [180/500] time 1.569 (1.564) data 0.000 (0.006) loss 1.0869 (1.0506) acc 78.1250 (73.6458) lr 3.6258e-04 eta 2:44:44 +epoch [38/50] batch [185/500] time 1.543 (1.564) data 0.000 (0.006) loss 1.1904 (1.0546) acc 65.6250 (73.5811) lr 3.6258e-04 eta 2:44:34 +epoch [38/50] batch [190/500] time 1.554 (1.564) data 0.000 (0.006) loss 1.2236 (1.0562) acc 68.7500 (73.6020) lr 3.6258e-04 eta 2:44:29 +epoch [38/50] batch [195/500] time 1.569 (1.564) data 0.000 (0.006) loss 1.0381 (1.0570) acc 71.8750 (73.6378) lr 3.6258e-04 eta 2:44:19 +epoch [38/50] batch [200/500] time 1.552 (1.564) data 0.000 (0.005) loss 1.1035 (1.0581) acc 81.2500 (73.6875) lr 3.6258e-04 eta 2:44:10 +epoch [38/50] batch [205/500] time 1.545 (1.563) data 0.000 (0.005) loss 0.8579 (1.0567) acc 78.1250 (73.6890) lr 3.6258e-04 eta 2:44:01 +epoch [38/50] batch [210/500] time 1.551 (1.563) data 0.001 (0.005) loss 1.4102 (1.0564) acc 59.3750 (73.6607) lr 3.6258e-04 eta 2:43:50 +epoch [38/50] batch [215/500] time 1.557 (1.563) data 0.000 (0.005) loss 0.3906 (1.0502) acc 90.6250 (73.8227) lr 3.6258e-04 eta 2:43:42 +epoch [38/50] batch [220/500] time 1.563 (1.563) data 0.000 (0.005) loss 1.4297 (1.0550) acc 68.7500 (73.8210) lr 3.6258e-04 eta 2:43:33 +epoch [38/50] batch [225/500] time 1.570 (1.562) data 0.000 (0.005) loss 1.1523 (1.0581) acc 78.1250 (73.8333) lr 3.6258e-04 eta 2:43:23 +epoch [38/50] batch [230/500] time 1.548 (1.562) data 0.000 (0.005) loss 0.8179 (1.0531) acc 78.1250 (73.9266) lr 3.6258e-04 eta 2:43:14 +epoch [38/50] batch [235/500] time 1.581 (1.562) data 0.001 (0.005) loss 1.2793 (1.0568) acc 53.1250 (73.8165) lr 3.6258e-04 eta 2:43:08 +epoch [38/50] batch [240/500] time 1.554 (1.562) data 0.000 (0.005) loss 1.1445 (1.0595) acc 75.0000 (73.7760) lr 3.6258e-04 eta 2:42:59 +epoch [38/50] batch [245/500] time 1.545 (1.562) data 0.000 (0.005) loss 1.1387 (1.0589) acc 81.2500 (73.8903) lr 3.6258e-04 eta 2:42:51 +epoch [38/50] batch [250/500] time 1.546 (1.562) data 0.001 (0.004) loss 1.2793 (1.0605) acc 78.1250 (73.9250) lr 3.6258e-04 eta 2:42:42 +epoch [38/50] batch [255/500] time 1.560 (1.562) data 0.000 (0.004) loss 1.4551 (1.0611) acc 71.8750 (73.9461) lr 3.6258e-04 eta 2:42:33 +epoch [38/50] batch [260/500] time 1.574 (1.562) data 0.000 (0.004) loss 0.9248 (1.0612) acc 71.8750 (73.8702) lr 3.6258e-04 eta 2:42:25 +epoch [38/50] batch [265/500] time 1.552 (1.562) data 0.000 (0.004) loss 0.9106 (1.0611) acc 75.0000 (73.8797) lr 3.6258e-04 eta 2:42:17 +epoch [38/50] batch [270/500] time 1.553 (1.562) data 0.000 (0.004) loss 0.9634 (1.0606) acc 65.6250 (73.8310) lr 3.6258e-04 eta 2:42:09 +epoch [38/50] batch [275/500] time 1.572 (1.562) data 0.000 (0.004) loss 0.8711 (1.0611) acc 84.3750 (73.8864) lr 3.6258e-04 eta 2:42:01 +epoch [38/50] batch [280/500] time 1.556 (1.561) data 0.000 (0.004) loss 0.7520 (1.0607) acc 78.1250 (73.9174) lr 3.6258e-04 eta 2:41:52 +epoch [38/50] batch [285/500] time 1.674 (1.562) data 0.000 (0.004) loss 1.1982 (1.0587) acc 75.0000 (73.9693) lr 3.6258e-04 eta 2:41:47 +epoch [38/50] batch [290/500] time 1.549 (1.562) data 0.001 (0.004) loss 0.9844 (1.0610) acc 78.1250 (73.9763) lr 3.6258e-04 eta 2:41:38 +epoch [38/50] batch [295/500] time 1.564 (1.562) data 0.000 (0.004) loss 1.1494 (1.0615) acc 75.0000 (73.9936) lr 3.6258e-04 eta 2:41:30 +epoch [38/50] batch [300/500] time 1.550 (1.561) data 0.000 (0.004) loss 1.0020 (1.0592) acc 75.0000 (74.0729) lr 3.6258e-04 eta 2:41:20 +epoch [38/50] batch [305/500] time 1.573 (1.561) data 0.000 (0.004) loss 0.9541 (1.0573) acc 75.0000 (74.1086) lr 3.6258e-04 eta 2:41:13 +epoch [38/50] batch [310/500] time 1.568 (1.561) data 0.001 (0.004) loss 1.2334 (1.0622) acc 62.5000 (74.0121) lr 3.6258e-04 eta 2:41:05 +epoch [38/50] batch [315/500] time 1.538 (1.561) data 0.000 (0.004) loss 0.9292 (1.0610) acc 75.0000 (73.9782) lr 3.6258e-04 eta 2:40:56 +epoch [38/50] batch [320/500] time 1.561 (1.561) data 0.000 (0.004) loss 1.0186 (1.0610) acc 75.0000 (73.9453) lr 3.6258e-04 eta 2:40:48 +epoch [38/50] batch [325/500] time 1.543 (1.561) data 0.000 (0.004) loss 0.6670 (1.0573) acc 84.3750 (74.0192) lr 3.6258e-04 eta 2:40:39 +epoch [38/50] batch [330/500] time 1.563 (1.561) data 0.000 (0.003) loss 0.9927 (1.0556) acc 71.8750 (74.0246) lr 3.6258e-04 eta 2:40:32 +epoch [38/50] batch [335/500] time 1.538 (1.561) data 0.000 (0.003) loss 0.7129 (1.0538) acc 78.1250 (74.0112) lr 3.6258e-04 eta 2:40:24 +epoch [38/50] batch [340/500] time 1.550 (1.561) data 0.000 (0.003) loss 0.6914 (1.0540) acc 78.1250 (73.9982) lr 3.6258e-04 eta 2:40:16 +epoch [38/50] batch [345/500] time 1.558 (1.561) data 0.000 (0.003) loss 0.8975 (1.0525) acc 78.1250 (74.0308) lr 3.6258e-04 eta 2:40:08 +epoch [38/50] batch [350/500] time 1.575 (1.561) data 0.000 (0.003) loss 0.9321 (1.0533) acc 75.0000 (74.0268) lr 3.6258e-04 eta 2:40:00 +epoch [38/50] batch [355/500] time 1.543 (1.561) data 0.000 (0.003) loss 1.2441 (1.0551) acc 81.2500 (74.0493) lr 3.6258e-04 eta 2:39:52 +epoch [38/50] batch [360/500] time 1.559 (1.561) data 0.000 (0.003) loss 0.9917 (1.0562) acc 75.0000 (74.0451) lr 3.6258e-04 eta 2:39:44 +epoch [38/50] batch [365/500] time 1.546 (1.561) data 0.001 (0.003) loss 1.2363 (1.0545) acc 62.5000 (74.0668) lr 3.6258e-04 eta 2:39:37 +epoch [38/50] batch [370/500] time 1.559 (1.561) data 0.000 (0.003) loss 0.7881 (1.0535) acc 81.2500 (74.1216) lr 3.6258e-04 eta 2:39:29 +epoch [38/50] batch [375/500] time 1.556 (1.561) data 0.000 (0.003) loss 0.7910 (1.0514) acc 81.2500 (74.2167) lr 3.6258e-04 eta 2:39:21 +epoch [38/50] batch [380/500] time 1.540 (1.561) data 0.000 (0.003) loss 1.4541 (1.0511) acc 62.5000 (74.2352) lr 3.6258e-04 eta 2:39:13 +epoch [38/50] batch [385/500] time 1.533 (1.561) data 0.000 (0.003) loss 1.2930 (1.0490) acc 62.5000 (74.2614) lr 3.6258e-04 eta 2:39:04 +epoch [38/50] batch [390/500] time 1.587 (1.561) data 0.000 (0.003) loss 1.1592 (1.0522) acc 68.7500 (74.2228) lr 3.6258e-04 eta 2:38:57 +epoch [38/50] batch [395/500] time 1.546 (1.561) data 0.000 (0.003) loss 1.3955 (1.0519) acc 75.0000 (74.2484) lr 3.6258e-04 eta 2:38:48 +epoch [38/50] batch [400/500] time 1.548 (1.561) data 0.001 (0.003) loss 0.8086 (1.0501) acc 71.8750 (74.2500) lr 3.6258e-04 eta 2:38:40 +epoch [38/50] batch [405/500] time 1.566 (1.561) data 0.000 (0.003) loss 0.9429 (1.0473) acc 78.1250 (74.2747) lr 3.6258e-04 eta 2:38:32 +epoch [38/50] batch [410/500] time 1.564 (1.561) data 0.000 (0.003) loss 1.1162 (1.0487) acc 75.0000 (74.2302) lr 3.6258e-04 eta 2:38:23 +epoch [38/50] batch [415/500] time 1.561 (1.561) data 0.000 (0.003) loss 0.9019 (1.0486) acc 75.0000 (74.2018) lr 3.6258e-04 eta 2:38:15 +epoch [38/50] batch [420/500] time 1.568 (1.561) data 0.000 (0.003) loss 1.3486 (1.0495) acc 75.0000 (74.1964) lr 3.6258e-04 eta 2:38:08 +epoch [38/50] batch [425/500] time 1.566 (1.561) data 0.000 (0.003) loss 0.9590 (1.0487) acc 71.8750 (74.1691) lr 3.6258e-04 eta 2:38:00 +epoch [38/50] batch [430/500] time 1.557 (1.561) data 0.000 (0.003) loss 1.1396 (1.0499) acc 68.7500 (74.1134) lr 3.6258e-04 eta 2:37:54 +epoch [38/50] batch [435/500] time 1.574 (1.561) data 0.000 (0.003) loss 1.2432 (1.0507) acc 81.2500 (74.1236) lr 3.6258e-04 eta 2:37:46 +epoch [38/50] batch [440/500] time 1.541 (1.561) data 0.000 (0.003) loss 1.6016 (1.0520) acc 71.8750 (74.0767) lr 3.6258e-04 eta 2:37:38 +epoch [38/50] batch [445/500] time 1.560 (1.561) data 0.001 (0.003) loss 1.3652 (1.0514) acc 56.2500 (74.0379) lr 3.6258e-04 eta 2:37:29 +epoch [38/50] batch [450/500] time 1.547 (1.560) data 0.001 (0.003) loss 0.9365 (1.0534) acc 71.8750 (73.9931) lr 3.6258e-04 eta 2:37:20 +epoch [38/50] batch [455/500] time 1.561 (1.561) data 0.000 (0.003) loss 1.0020 (1.0525) acc 75.0000 (73.9973) lr 3.6258e-04 eta 2:37:13 +epoch [38/50] batch [460/500] time 1.577 (1.560) data 0.000 (0.003) loss 0.9756 (1.0521) acc 75.0000 (74.0149) lr 3.6258e-04 eta 2:37:05 +epoch [38/50] batch [465/500] time 1.546 (1.560) data 0.000 (0.003) loss 1.4795 (1.0524) acc 62.5000 (74.0390) lr 3.6258e-04 eta 2:36:56 +epoch [38/50] batch [470/500] time 1.537 (1.560) data 0.000 (0.003) loss 1.0732 (1.0536) acc 78.1250 (74.0426) lr 3.6258e-04 eta 2:36:48 +epoch [38/50] batch [475/500] time 1.588 (1.560) data 0.000 (0.003) loss 0.4607 (1.0527) acc 87.5000 (74.0395) lr 3.6258e-04 eta 2:36:41 +epoch [38/50] batch [480/500] time 1.547 (1.561) data 0.000 (0.003) loss 0.5444 (1.0514) acc 81.2500 (74.0690) lr 3.6258e-04 eta 2:36:34 +epoch [38/50] batch [485/500] time 1.560 (1.561) data 0.001 (0.002) loss 1.1992 (1.0508) acc 75.0000 (74.0786) lr 3.6258e-04 eta 2:36:27 +epoch [38/50] batch [490/500] time 1.547 (1.561) data 0.000 (0.002) loss 1.2129 (1.0513) acc 62.5000 (74.0434) lr 3.6258e-04 eta 2:36:18 +epoch [38/50] batch [495/500] time 1.563 (1.561) data 0.000 (0.002) loss 0.7827 (1.0510) acc 87.5000 (74.0657) lr 3.6258e-04 eta 2:36:11 +epoch [38/50] batch [500/500] time 1.572 (1.561) data 0.000 (0.002) loss 1.3350 (1.0502) acc 75.0000 (74.0875) lr 3.1545e-04 eta 2:36:03 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,086 +* accuracy: 78.2% +* error: 21.8% +* macro_f1: 77.7% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar +epoch [39/50] batch [5/500] time 1.522 (1.650) data 0.001 (0.149) loss 1.3105 (1.1438) acc 71.8750 (75.6250) lr 3.1545e-04 eta 2:44:54 +epoch [39/50] batch [10/500] time 1.571 (1.604) data 0.000 (0.074) loss 0.8379 (1.0696) acc 78.1250 (76.5625) lr 3.1545e-04 eta 2:40:10 +epoch [39/50] batch [15/500] time 1.541 (1.589) data 0.000 (0.050) loss 1.0420 (1.0927) acc 81.2500 (75.2083) lr 3.1545e-04 eta 2:38:27 +epoch [39/50] batch [20/500] time 1.554 (1.581) data 0.001 (0.037) loss 0.6392 (0.9711) acc 84.3750 (77.3438) lr 3.1545e-04 eta 2:37:33 +epoch [39/50] batch [25/500] time 1.571 (1.577) data 0.000 (0.030) loss 1.3584 (0.9746) acc 68.7500 (76.7500) lr 3.1545e-04 eta 2:37:01 +epoch [39/50] batch [30/500] time 1.528 (1.573) data 0.000 (0.025) loss 0.6646 (0.9853) acc 84.3750 (76.2500) lr 3.1545e-04 eta 2:36:32 +epoch [39/50] batch [35/500] time 1.536 (1.571) data 0.000 (0.022) loss 1.0088 (0.9841) acc 71.8750 (75.5357) lr 3.1545e-04 eta 2:36:09 +epoch [39/50] batch [40/500] time 1.564 (1.569) data 0.000 (0.019) loss 0.9160 (0.9867) acc 71.8750 (75.4688) lr 3.1545e-04 eta 2:35:51 +epoch [39/50] batch [45/500] time 1.569 (1.568) data 0.000 (0.017) loss 0.7554 (0.9916) acc 78.1250 (75.2083) lr 3.1545e-04 eta 2:35:39 +epoch [39/50] batch [50/500] time 1.579 (1.569) data 0.000 (0.015) loss 1.2559 (0.9983) acc 75.0000 (75.0625) lr 3.1545e-04 eta 2:35:34 +epoch [39/50] batch [55/500] time 1.567 (1.567) data 0.000 (0.014) loss 0.6782 (0.9873) acc 75.0000 (75.0568) lr 3.1545e-04 eta 2:35:18 +epoch [39/50] batch [60/500] time 1.572 (1.568) data 0.001 (0.013) loss 1.0459 (0.9960) acc 65.6250 (74.7917) lr 3.1545e-04 eta 2:35:13 +epoch [39/50] batch [65/500] time 1.556 (1.569) data 0.001 (0.012) loss 1.5693 (1.0217) acc 53.1250 (74.0865) lr 3.1545e-04 eta 2:35:13 +epoch [39/50] batch [70/500] time 1.552 (1.568) data 0.000 (0.011) loss 1.8213 (1.0373) acc 56.2500 (73.6161) lr 3.1545e-04 eta 2:34:58 +epoch [39/50] batch [75/500] time 1.549 (1.567) data 0.000 (0.010) loss 0.4688 (1.0244) acc 81.2500 (73.7083) lr 3.1545e-04 eta 2:34:44 +epoch [39/50] batch [80/500] time 1.540 (1.567) data 0.000 (0.010) loss 1.0068 (1.0203) acc 78.1250 (73.7500) lr 3.1545e-04 eta 2:34:34 +epoch [39/50] batch [85/500] time 1.540 (1.566) data 0.000 (0.009) loss 1.1318 (1.0242) acc 75.0000 (73.6765) lr 3.1545e-04 eta 2:34:23 +epoch [39/50] batch [90/500] time 1.555 (1.566) data 0.001 (0.009) loss 0.8521 (1.0138) acc 78.1250 (73.8542) lr 3.1545e-04 eta 2:34:16 +epoch [39/50] batch [95/500] time 1.544 (1.566) data 0.000 (0.008) loss 1.2627 (1.0154) acc 62.5000 (73.6184) lr 3.1545e-04 eta 2:34:05 +epoch [39/50] batch [100/500] time 1.558 (1.565) data 0.000 (0.008) loss 1.5820 (1.0267) acc 62.5000 (73.3750) lr 3.1545e-04 eta 2:33:55 +epoch [39/50] batch [105/500] time 1.552 (1.565) data 0.000 (0.007) loss 1.1426 (1.0327) acc 75.0000 (73.3036) lr 3.1545e-04 eta 2:33:44 +epoch [39/50] batch [110/500] time 1.568 (1.565) data 0.001 (0.007) loss 0.8066 (1.0292) acc 71.8750 (73.3523) lr 3.1545e-04 eta 2:33:40 +epoch [39/50] batch [115/500] time 1.600 (1.565) data 0.001 (0.007) loss 0.9214 (1.0410) acc 78.1250 (73.1793) lr 3.1545e-04 eta 2:33:31 +epoch [39/50] batch [120/500] time 1.559 (1.565) data 0.001 (0.007) loss 0.6567 (1.0355) acc 84.3750 (73.4115) lr 3.1545e-04 eta 2:33:20 +epoch [39/50] batch [125/500] time 1.561 (1.565) data 0.001 (0.006) loss 0.7900 (1.0331) acc 75.0000 (73.4750) lr 3.1545e-04 eta 2:33:11 +epoch [39/50] batch [130/500] time 1.557 (1.564) data 0.000 (0.006) loss 1.0244 (1.0381) acc 68.7500 (73.2692) lr 3.1545e-04 eta 2:33:03 +epoch [39/50] batch [135/500] time 1.564 (1.564) data 0.001 (0.006) loss 0.6240 (1.0298) acc 84.3750 (73.5185) lr 3.1545e-04 eta 2:32:54 +epoch [39/50] batch [140/500] time 1.557 (1.564) data 0.001 (0.006) loss 0.9048 (1.0261) acc 81.2500 (73.7054) lr 3.1545e-04 eta 2:32:46 +epoch [39/50] batch [145/500] time 1.559 (1.564) data 0.000 (0.006) loss 0.7998 (1.0251) acc 78.1250 (73.6853) lr 3.1545e-04 eta 2:32:37 +epoch [39/50] batch [150/500] time 1.561 (1.564) data 0.000 (0.005) loss 1.4873 (1.0232) acc 65.6250 (73.8333) lr 3.1545e-04 eta 2:32:29 +epoch [39/50] batch [155/500] time 1.556 (1.564) data 0.001 (0.005) loss 0.6240 (1.0230) acc 78.1250 (73.8508) lr 3.1545e-04 eta 2:32:19 +epoch [39/50] batch [160/500] time 1.556 (1.563) data 0.000 (0.005) loss 0.9395 (1.0263) acc 75.0000 (73.7305) lr 3.1545e-04 eta 2:32:09 +epoch [39/50] batch [165/500] time 1.574 (1.563) data 0.000 (0.005) loss 1.1270 (1.0334) acc 71.8750 (73.7121) lr 3.1545e-04 eta 2:32:00 +epoch [39/50] batch [170/500] time 1.566 (1.563) data 0.000 (0.005) loss 0.6548 (1.0305) acc 75.0000 (73.7868) lr 3.1545e-04 eta 2:31:52 +epoch [39/50] batch [175/500] time 1.579 (1.563) data 0.001 (0.005) loss 1.1611 (1.0348) acc 65.6250 (73.7857) lr 3.1545e-04 eta 2:31:45 +epoch [39/50] batch [180/500] time 1.556 (1.563) data 0.001 (0.005) loss 0.7388 (1.0376) acc 75.0000 (73.6285) lr 3.1545e-04 eta 2:31:36 +epoch [39/50] batch [185/500] time 1.554 (1.563) data 0.000 (0.004) loss 0.9580 (1.0391) acc 81.2500 (73.6318) lr 3.1545e-04 eta 2:31:27 +epoch [39/50] batch [190/500] time 1.533 (1.563) data 0.000 (0.004) loss 1.1758 (1.0417) acc 71.8750 (73.5033) lr 3.1545e-04 eta 2:31:18 +epoch [39/50] batch [195/500] time 1.566 (1.563) data 0.001 (0.004) loss 1.0029 (1.0411) acc 71.8750 (73.5737) lr 3.1545e-04 eta 2:31:11 +epoch [39/50] batch [200/500] time 1.569 (1.563) data 0.000 (0.004) loss 1.0615 (1.0504) acc 71.8750 (73.3438) lr 3.1545e-04 eta 2:31:03 +epoch [39/50] batch [205/500] time 1.653 (1.563) data 0.000 (0.004) loss 0.5767 (1.0462) acc 90.6250 (73.5061) lr 3.1545e-04 eta 2:30:56 +epoch [39/50] batch [210/500] time 1.567 (1.563) data 0.000 (0.004) loss 1.0967 (1.0482) acc 81.2500 (73.4970) lr 3.1545e-04 eta 2:30:49 +epoch [39/50] batch [215/500] time 1.549 (1.563) data 0.000 (0.004) loss 0.9639 (1.0505) acc 71.8750 (73.4593) lr 3.1545e-04 eta 2:30:40 +epoch [39/50] batch [220/500] time 1.580 (1.562) data 0.000 (0.004) loss 0.8223 (1.0489) acc 71.8750 (73.4233) lr 3.1545e-04 eta 2:30:31 +epoch [39/50] batch [225/500] time 1.560 (1.562) data 0.000 (0.004) loss 1.1621 (1.0532) acc 71.8750 (73.2917) lr 3.1545e-04 eta 2:30:23 +epoch [39/50] batch [230/500] time 1.554 (1.562) data 0.000 (0.004) loss 1.3105 (1.0538) acc 62.5000 (73.2473) lr 3.1545e-04 eta 2:30:15 +epoch [39/50] batch [235/500] time 1.550 (1.562) data 0.000 (0.004) loss 1.2148 (1.0553) acc 65.6250 (73.2181) lr 3.1545e-04 eta 2:30:06 +epoch [39/50] batch [240/500] time 1.541 (1.562) data 0.000 (0.004) loss 0.8564 (1.0572) acc 84.3750 (73.2292) lr 3.1545e-04 eta 2:29:57 +epoch [39/50] batch [245/500] time 1.538 (1.562) data 0.000 (0.003) loss 0.5718 (1.0588) acc 87.5000 (73.2270) lr 3.1545e-04 eta 2:29:47 +epoch [39/50] batch [250/500] time 1.530 (1.562) data 0.000 (0.003) loss 0.5024 (1.0590) acc 81.2500 (73.2250) lr 3.1545e-04 eta 2:29:40 +epoch [39/50] batch [255/500] time 1.572 (1.562) data 0.000 (0.003) loss 0.8887 (1.0601) acc 75.0000 (73.1740) lr 3.1545e-04 eta 2:29:31 +epoch [39/50] batch [260/500] time 1.539 (1.562) data 0.000 (0.003) loss 1.1553 (1.0592) acc 68.7500 (73.2091) lr 3.1545e-04 eta 2:29:23 +epoch [39/50] batch [265/500] time 1.559 (1.562) data 0.000 (0.003) loss 1.1074 (1.0604) acc 78.1250 (73.1486) lr 3.1545e-04 eta 2:29:15 +epoch [39/50] batch [270/500] time 1.557 (1.562) data 0.000 (0.003) loss 0.9800 (1.0621) acc 68.7500 (73.0440) lr 3.1545e-04 eta 2:29:07 +epoch [39/50] batch [275/500] time 1.566 (1.562) data 0.000 (0.003) loss 1.1172 (1.0657) acc 68.7500 (72.9432) lr 3.1545e-04 eta 2:29:01 +epoch [39/50] batch [280/500] time 1.551 (1.562) data 0.001 (0.003) loss 1.3213 (1.0674) acc 62.5000 (72.8460) lr 3.1545e-04 eta 2:28:52 +epoch [39/50] batch [285/500] time 1.559 (1.562) data 0.000 (0.003) loss 1.0029 (1.0664) acc 71.8750 (72.8399) lr 3.1545e-04 eta 2:28:44 +epoch [39/50] batch [290/500] time 1.573 (1.562) data 0.001 (0.003) loss 0.5952 (1.0654) acc 81.2500 (72.8125) lr 3.1545e-04 eta 2:28:36 +epoch [39/50] batch [295/500] time 1.554 (1.562) data 0.000 (0.003) loss 1.1348 (1.0659) acc 68.7500 (72.7013) lr 3.1545e-04 eta 2:28:28 +epoch [39/50] batch [300/500] time 1.543 (1.561) data 0.000 (0.003) loss 1.4814 (1.0658) acc 75.0000 (72.7396) lr 3.1545e-04 eta 2:28:20 +epoch [39/50] batch [305/500] time 1.559 (1.561) data 0.001 (0.003) loss 0.6592 (1.0655) acc 71.8750 (72.6947) lr 3.1545e-04 eta 2:28:12 +epoch [39/50] batch [310/500] time 1.546 (1.562) data 0.000 (0.003) loss 0.6494 (1.0634) acc 78.1250 (72.7117) lr 3.1545e-04 eta 2:28:05 +epoch [39/50] batch [315/500] time 1.572 (1.562) data 0.000 (0.003) loss 1.2773 (1.0656) acc 68.7500 (72.7083) lr 3.1545e-04 eta 2:27:57 +epoch [39/50] batch [320/500] time 1.557 (1.561) data 0.000 (0.003) loss 1.2539 (1.0679) acc 65.6250 (72.6855) lr 3.1545e-04 eta 2:27:49 +epoch [39/50] batch [325/500] time 1.579 (1.562) data 0.000 (0.003) loss 1.4990 (1.0676) acc 65.6250 (72.7500) lr 3.1545e-04 eta 2:27:41 +epoch [39/50] batch [330/500] time 1.545 (1.561) data 0.000 (0.003) loss 1.4609 (1.0755) acc 62.5000 (72.6042) lr 3.1545e-04 eta 2:27:33 +epoch [39/50] batch [335/500] time 1.583 (1.561) data 0.000 (0.003) loss 1.1699 (1.0757) acc 65.6250 (72.5746) lr 3.1545e-04 eta 2:27:25 +epoch [39/50] batch [340/500] time 1.567 (1.562) data 0.000 (0.003) loss 0.5293 (1.0763) acc 84.3750 (72.6379) lr 3.1545e-04 eta 2:27:18 +epoch [39/50] batch [345/500] time 1.569 (1.562) data 0.000 (0.003) loss 0.8750 (1.0738) acc 81.2500 (72.7174) lr 3.1545e-04 eta 2:27:10 +epoch [39/50] batch [350/500] time 1.587 (1.562) data 0.000 (0.003) loss 1.1748 (1.0721) acc 71.8750 (72.7589) lr 3.1545e-04 eta 2:27:06 +epoch [39/50] batch [355/500] time 1.571 (1.562) data 0.000 (0.002) loss 1.1230 (1.0743) acc 78.1250 (72.6937) lr 3.1545e-04 eta 2:26:58 +epoch [39/50] batch [360/500] time 1.550 (1.562) data 0.000 (0.002) loss 0.8682 (1.0711) acc 68.7500 (72.7431) lr 3.1545e-04 eta 2:26:50 +epoch [39/50] batch [365/500] time 1.570 (1.562) data 0.000 (0.002) loss 1.2178 (1.0705) acc 78.1250 (72.7825) lr 3.1545e-04 eta 2:26:43 +epoch [39/50] batch [370/500] time 1.552 (1.562) data 0.000 (0.002) loss 1.0537 (1.0688) acc 65.6250 (72.8041) lr 3.1545e-04 eta 2:26:35 +epoch [39/50] batch [375/500] time 1.571 (1.562) data 0.000 (0.002) loss 0.8608 (1.0682) acc 87.5000 (72.8583) lr 3.1545e-04 eta 2:26:27 +epoch [39/50] batch [380/500] time 1.565 (1.562) data 0.000 (0.002) loss 1.0596 (1.0682) acc 62.5000 (72.8536) lr 3.1545e-04 eta 2:26:20 +epoch [39/50] batch [385/500] time 1.564 (1.562) data 0.000 (0.002) loss 0.6797 (1.0656) acc 81.2500 (72.8977) lr 3.1545e-04 eta 2:26:11 +epoch [39/50] batch [390/500] time 1.557 (1.562) data 0.000 (0.002) loss 1.2188 (1.0645) acc 71.8750 (72.9327) lr 3.1545e-04 eta 2:26:04 +epoch [39/50] batch [395/500] time 1.546 (1.563) data 0.000 (0.002) loss 0.8887 (1.0636) acc 78.1250 (72.9905) lr 3.1545e-04 eta 2:25:58 +epoch [39/50] batch [400/500] time 1.585 (1.563) data 0.000 (0.002) loss 0.9790 (1.0614) acc 71.8750 (73.0156) lr 3.1545e-04 eta 2:25:50 +epoch [39/50] batch [405/500] time 1.576 (1.563) data 0.000 (0.002) loss 0.4990 (1.0583) acc 81.2500 (73.0864) lr 3.1545e-04 eta 2:25:42 +epoch [39/50] batch [410/500] time 1.553 (1.563) data 0.000 (0.002) loss 0.9131 (1.0557) acc 81.2500 (73.1402) lr 3.1545e-04 eta 2:25:34 +epoch [39/50] batch [415/500] time 1.552 (1.562) data 0.001 (0.002) loss 1.2578 (1.0605) acc 62.5000 (73.0723) lr 3.1545e-04 eta 2:25:26 +epoch [39/50] batch [420/500] time 1.542 (1.562) data 0.000 (0.002) loss 1.1016 (1.0606) acc 68.7500 (73.0580) lr 3.1545e-04 eta 2:25:17 +epoch [39/50] batch [425/500] time 1.551 (1.562) data 0.000 (0.002) loss 0.8545 (1.0586) acc 71.8750 (73.0956) lr 3.1545e-04 eta 2:25:10 +epoch [39/50] batch [430/500] time 1.565 (1.562) data 0.000 (0.002) loss 0.7280 (1.0586) acc 81.2500 (73.1323) lr 3.1545e-04 eta 2:25:02 +epoch [39/50] batch [435/500] time 1.578 (1.562) data 0.000 (0.002) loss 0.6582 (1.0547) acc 75.0000 (73.1537) lr 3.1545e-04 eta 2:24:55 +epoch [39/50] batch [440/500] time 1.553 (1.562) data 0.000 (0.002) loss 1.6885 (1.0566) acc 59.3750 (73.0895) lr 3.1545e-04 eta 2:24:47 +epoch [39/50] batch [445/500] time 1.558 (1.562) data 0.000 (0.002) loss 0.5952 (1.0552) acc 84.3750 (73.1039) lr 3.1545e-04 eta 2:24:38 +epoch [39/50] batch [450/500] time 1.533 (1.562) data 0.000 (0.002) loss 1.5596 (1.0556) acc 56.2500 (73.0903) lr 3.1545e-04 eta 2:24:30 +epoch [39/50] batch [455/500] time 1.567 (1.562) data 0.001 (0.002) loss 0.7710 (1.0533) acc 81.2500 (73.1593) lr 3.1545e-04 eta 2:24:22 +epoch [39/50] batch [460/500] time 1.556 (1.562) data 0.000 (0.002) loss 1.3984 (1.0551) acc 68.7500 (73.1658) lr 3.1545e-04 eta 2:24:15 +epoch [39/50] batch [465/500] time 1.556 (1.562) data 0.000 (0.002) loss 0.6240 (1.0536) acc 90.6250 (73.2124) lr 3.1545e-04 eta 2:24:06 +epoch [39/50] batch [470/500] time 1.546 (1.562) data 0.000 (0.002) loss 1.2119 (1.0543) acc 59.3750 (73.1981) lr 3.1545e-04 eta 2:23:57 +epoch [39/50] batch [475/500] time 1.532 (1.562) data 0.000 (0.002) loss 0.9194 (1.0544) acc 75.0000 (73.2039) lr 3.1545e-04 eta 2:23:49 +epoch [39/50] batch [480/500] time 1.556 (1.562) data 0.000 (0.002) loss 0.5894 (1.0521) acc 78.1250 (73.2227) lr 3.1545e-04 eta 2:23:41 +epoch [39/50] batch [485/500] time 1.563 (1.562) data 0.001 (0.002) loss 1.4668 (1.0519) acc 65.6250 (73.2281) lr 3.1545e-04 eta 2:23:33 +epoch [39/50] batch [490/500] time 1.579 (1.562) data 0.000 (0.002) loss 1.4082 (1.0539) acc 75.0000 (73.1633) lr 3.1545e-04 eta 2:23:25 +epoch [39/50] batch [495/500] time 1.554 (1.562) data 0.000 (0.002) loss 1.0713 (1.0528) acc 68.7500 (73.2008) lr 3.1545e-04 eta 2:23:19 +epoch [39/50] batch [500/500] time 1.552 (1.562) data 0.000 (0.002) loss 0.7778 (1.0533) acc 75.0000 (73.2062) lr 2.7103e-04 eta 2:23:10 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,002 +* accuracy: 78.0% +* error: 22.0% +* macro_f1: 77.5% +epoch [40/50] batch [5/500] time 1.552 (1.687) data 0.001 (0.186) loss 1.0938 (1.0329) acc 75.0000 (76.2500) lr 2.7103e-04 eta 2:34:31 +epoch [40/50] batch [10/500] time 1.555 (1.619) data 0.001 (0.093) loss 1.2939 (1.0709) acc 62.5000 (72.1875) lr 2.7103e-04 eta 2:28:08 +epoch [40/50] batch [15/500] time 1.598 (1.606) data 0.000 (0.062) loss 1.2393 (1.0038) acc 75.0000 (74.5833) lr 2.7103e-04 eta 2:26:51 +epoch [40/50] batch [20/500] time 1.549 (1.597) data 0.001 (0.047) loss 1.1543 (1.0953) acc 78.1250 (73.1250) lr 2.7103e-04 eta 2:25:48 +epoch [40/50] batch [25/500] time 1.563 (1.588) data 0.000 (0.038) loss 0.7290 (1.0694) acc 71.8750 (73.3750) lr 2.7103e-04 eta 2:24:54 +epoch [40/50] batch [30/500] time 1.583 (1.583) data 0.000 (0.031) loss 0.8276 (1.0680) acc 71.8750 (73.0208) lr 2.7103e-04 eta 2:24:21 +epoch [40/50] batch [35/500] time 1.549 (1.580) data 0.001 (0.027) loss 0.6924 (1.0583) acc 78.1250 (73.3036) lr 2.7103e-04 eta 2:23:52 +epoch [40/50] batch [40/500] time 1.569 (1.581) data 0.000 (0.024) loss 1.6201 (1.0481) acc 65.6250 (73.4375) lr 2.7103e-04 eta 2:23:52 +epoch [40/50] batch [45/500] time 1.557 (1.579) data 0.000 (0.021) loss 0.5327 (1.0620) acc 81.2500 (73.1250) lr 2.7103e-04 eta 2:23:33 +epoch [40/50] batch [50/500] time 1.547 (1.576) data 0.000 (0.019) loss 0.6729 (1.0469) acc 87.5000 (73.7500) lr 2.7103e-04 eta 2:23:11 +epoch [40/50] batch [55/500] time 1.556 (1.574) data 0.000 (0.017) loss 1.2236 (1.0453) acc 71.8750 (73.5227) lr 2.7103e-04 eta 2:22:51 +epoch [40/50] batch [60/500] time 1.544 (1.572) data 0.000 (0.016) loss 1.3232 (1.0483) acc 68.7500 (73.5938) lr 2.7103e-04 eta 2:22:31 +epoch [40/50] batch [65/500] time 1.552 (1.571) data 0.000 (0.015) loss 0.9966 (1.0453) acc 71.8750 (73.4135) lr 2.7103e-04 eta 2:22:16 +epoch [40/50] batch [70/500] time 1.553 (1.570) data 0.001 (0.014) loss 0.9028 (1.0462) acc 71.8750 (73.2589) lr 2.7103e-04 eta 2:22:02 +epoch [40/50] batch [75/500] time 1.530 (1.567) data 0.000 (0.013) loss 0.8921 (1.0544) acc 71.8750 (73.0833) lr 2.7103e-04 eta 2:21:42 +epoch [40/50] batch [80/500] time 1.558 (1.568) data 0.000 (0.012) loss 0.7983 (1.0540) acc 78.1250 (72.9297) lr 2.7103e-04 eta 2:21:39 +epoch [40/50] batch [85/500] time 1.539 (1.567) data 0.000 (0.011) loss 0.7451 (1.0410) acc 78.1250 (73.4559) lr 2.7103e-04 eta 2:21:26 +epoch [40/50] batch [90/500] time 1.546 (1.566) data 0.000 (0.011) loss 1.0195 (1.0443) acc 78.1250 (73.4722) lr 2.7103e-04 eta 2:21:14 +epoch [40/50] batch [95/500] time 1.572 (1.566) data 0.001 (0.010) loss 1.3125 (1.0401) acc 62.5000 (73.3882) lr 2.7103e-04 eta 2:21:05 +epoch [40/50] batch [100/500] time 1.559 (1.566) data 0.000 (0.010) loss 1.1406 (1.0468) acc 75.0000 (73.2188) lr 2.7103e-04 eta 2:20:55 +epoch [40/50] batch [105/500] time 1.580 (1.566) data 0.000 (0.009) loss 1.1084 (1.0438) acc 81.2500 (73.4226) lr 2.7103e-04 eta 2:20:47 +epoch [40/50] batch [110/500] time 1.560 (1.566) data 0.000 (0.009) loss 0.9814 (1.0423) acc 75.0000 (73.3239) lr 2.7103e-04 eta 2:20:39 +epoch [40/50] batch [115/500] time 1.560 (1.566) data 0.001 (0.009) loss 1.0723 (1.0495) acc 65.6250 (73.0435) lr 2.7103e-04 eta 2:20:31 +epoch [40/50] batch [120/500] time 1.581 (1.566) data 0.001 (0.008) loss 1.5508 (1.0522) acc 62.5000 (73.0208) lr 2.7103e-04 eta 2:20:22 +epoch [40/50] batch [125/500] time 1.543 (1.565) data 0.000 (0.008) loss 1.2588 (1.0597) acc 71.8750 (72.9000) lr 2.7103e-04 eta 2:20:10 +epoch [40/50] batch [130/500] time 1.567 (1.565) data 0.000 (0.008) loss 1.4561 (1.0590) acc 71.8750 (72.9087) lr 2.7103e-04 eta 2:20:02 +epoch [40/50] batch [135/500] time 1.565 (1.565) data 0.000 (0.007) loss 0.9873 (1.0583) acc 81.2500 (72.9630) lr 2.7103e-04 eta 2:19:55 +epoch [40/50] batch [140/500] time 1.565 (1.565) data 0.000 (0.007) loss 0.8301 (1.0609) acc 68.7500 (72.9464) lr 2.7103e-04 eta 2:19:49 +epoch [40/50] batch [145/500] time 1.535 (1.565) data 0.001 (0.007) loss 1.4297 (1.0657) acc 65.6250 (72.9095) lr 2.7103e-04 eta 2:19:38 +epoch [40/50] batch [150/500] time 1.574 (1.565) data 0.000 (0.007) loss 1.0078 (1.0702) acc 71.8750 (72.7708) lr 2.7103e-04 eta 2:19:30 +epoch [40/50] batch [155/500] time 1.570 (1.565) data 0.001 (0.006) loss 1.2168 (1.0696) acc 75.0000 (72.8024) lr 2.7103e-04 eta 2:19:23 +epoch [40/50] batch [160/500] time 1.558 (1.565) data 0.000 (0.006) loss 0.9048 (1.0731) acc 75.0000 (72.7344) lr 2.7103e-04 eta 2:19:14 +epoch [40/50] batch [165/500] time 1.563 (1.564) data 0.000 (0.006) loss 0.7866 (1.0781) acc 78.1250 (72.6136) lr 2.7103e-04 eta 2:19:04 +epoch [40/50] batch [170/500] time 1.565 (1.564) data 0.000 (0.006) loss 0.7734 (1.0731) acc 78.1250 (72.7206) lr 2.7103e-04 eta 2:18:53 +epoch [40/50] batch [175/500] time 1.550 (1.563) data 0.000 (0.006) loss 1.1201 (1.0700) acc 78.1250 (72.8750) lr 2.7103e-04 eta 2:18:44 +epoch [40/50] batch [180/500] time 1.614 (1.564) data 0.001 (0.006) loss 1.2549 (1.0729) acc 65.6250 (72.8125) lr 2.7103e-04 eta 2:18:38 +epoch [40/50] batch [185/500] time 1.564 (1.564) data 0.000 (0.005) loss 0.5723 (1.0655) acc 90.6250 (73.0743) lr 2.7103e-04 eta 2:18:34 +epoch [40/50] batch [190/500] time 1.543 (1.564) data 0.000 (0.005) loss 0.9058 (1.0668) acc 75.0000 (72.9770) lr 2.7103e-04 eta 2:18:25 +epoch [40/50] batch [195/500] time 1.582 (1.564) data 0.000 (0.005) loss 1.1504 (1.0666) acc 68.7500 (72.9167) lr 2.7103e-04 eta 2:18:17 +epoch [40/50] batch [200/500] time 1.578 (1.564) data 0.001 (0.005) loss 0.9736 (1.0674) acc 78.1250 (72.9844) lr 2.7103e-04 eta 2:18:08 +epoch [40/50] batch [205/500] time 1.579 (1.564) data 0.000 (0.005) loss 1.5518 (1.0701) acc 62.5000 (73.0488) lr 2.7103e-04 eta 2:18:00 +epoch [40/50] batch [210/500] time 1.577 (1.564) data 0.000 (0.005) loss 1.4189 (1.0706) acc 75.0000 (73.1696) lr 2.7103e-04 eta 2:17:51 +epoch [40/50] batch [215/500] time 1.571 (1.564) data 0.001 (0.005) loss 1.3789 (1.0688) acc 59.3750 (73.0814) lr 2.7103e-04 eta 2:17:43 +epoch [40/50] batch [220/500] time 1.556 (1.564) data 0.000 (0.005) loss 0.6826 (1.0642) acc 87.5000 (73.1960) lr 2.7103e-04 eta 2:17:36 +epoch [40/50] batch [225/500] time 1.565 (1.564) data 0.000 (0.005) loss 1.0459 (1.0642) acc 75.0000 (73.2500) lr 2.7103e-04 eta 2:17:28 +epoch [40/50] batch [230/500] time 1.574 (1.563) data 0.000 (0.004) loss 1.1855 (1.0665) acc 68.7500 (73.1522) lr 2.7103e-04 eta 2:17:19 +epoch [40/50] batch [235/500] time 1.542 (1.563) data 0.000 (0.004) loss 0.7812 (1.0638) acc 75.0000 (73.2979) lr 2.7103e-04 eta 2:17:10 +epoch [40/50] batch [240/500] time 1.552 (1.563) data 0.000 (0.004) loss 1.2324 (1.0644) acc 68.7500 (73.2812) lr 2.7103e-04 eta 2:17:03 +epoch [40/50] batch [245/500] time 1.541 (1.563) data 0.000 (0.004) loss 0.8291 (1.0626) acc 87.5000 (73.3546) lr 2.7103e-04 eta 2:16:55 +epoch [40/50] batch [250/500] time 1.533 (1.563) data 0.000 (0.004) loss 0.7871 (1.0643) acc 71.8750 (73.2500) lr 2.7103e-04 eta 2:16:45 +epoch [40/50] batch [255/500] time 1.547 (1.563) data 0.000 (0.004) loss 0.7603 (1.0613) acc 84.3750 (73.3456) lr 2.7103e-04 eta 2:16:37 +epoch [40/50] batch [260/500] time 1.558 (1.563) data 0.000 (0.004) loss 0.9829 (1.0596) acc 65.6250 (73.3293) lr 2.7103e-04 eta 2:16:29 +epoch [40/50] batch [265/500] time 1.537 (1.563) data 0.001 (0.004) loss 0.7856 (1.0594) acc 81.2500 (73.3373) lr 2.7103e-04 eta 2:16:20 +epoch [40/50] batch [270/500] time 1.559 (1.563) data 0.000 (0.004) loss 0.6699 (1.0581) acc 84.3750 (73.3912) lr 2.7103e-04 eta 2:16:11 +epoch [40/50] batch [275/500] time 1.591 (1.562) data 0.001 (0.004) loss 0.8418 (1.0558) acc 71.8750 (73.4318) lr 2.7103e-04 eta 2:16:03 +epoch [40/50] batch [280/500] time 1.668 (1.563) data 0.000 (0.004) loss 1.2490 (1.0568) acc 75.0000 (73.4821) lr 2.7103e-04 eta 2:15:57 +epoch [40/50] batch [285/500] time 1.568 (1.563) data 0.000 (0.004) loss 0.4954 (1.0545) acc 93.7500 (73.5307) lr 2.7103e-04 eta 2:15:49 +epoch [40/50] batch [290/500] time 1.540 (1.563) data 0.000 (0.004) loss 1.4043 (1.0559) acc 71.8750 (73.5453) lr 2.7103e-04 eta 2:15:41 +epoch [40/50] batch [295/500] time 1.556 (1.563) data 0.000 (0.004) loss 0.9082 (1.0561) acc 71.8750 (73.4852) lr 2.7103e-04 eta 2:15:33 +epoch [40/50] batch [300/500] time 1.531 (1.563) data 0.000 (0.004) loss 0.5693 (1.0543) acc 87.5000 (73.5000) lr 2.7103e-04 eta 2:15:25 +epoch [40/50] batch [305/500] time 1.567 (1.562) data 0.000 (0.003) loss 0.9600 (1.0534) acc 71.8750 (73.5143) lr 2.7103e-04 eta 2:15:16 +epoch [40/50] batch [310/500] time 1.545 (1.562) data 0.000 (0.003) loss 1.0137 (1.0513) acc 71.8750 (73.5383) lr 2.7103e-04 eta 2:15:07 +epoch [40/50] batch [315/500] time 1.569 (1.562) data 0.000 (0.003) loss 1.0400 (1.0572) acc 68.7500 (73.4325) lr 2.7103e-04 eta 2:15:00 +epoch [40/50] batch [320/500] time 1.557 (1.562) data 0.000 (0.003) loss 0.5830 (1.0533) acc 81.2500 (73.4863) lr 2.7103e-04 eta 2:14:51 +epoch [40/50] batch [325/500] time 1.561 (1.562) data 0.000 (0.003) loss 0.8550 (1.0534) acc 75.0000 (73.5000) lr 2.7103e-04 eta 2:14:45 +epoch [40/50] batch [330/500] time 1.554 (1.562) data 0.001 (0.003) loss 0.8262 (1.0526) acc 75.0000 (73.5322) lr 2.7103e-04 eta 2:14:36 +epoch [40/50] batch [335/500] time 1.577 (1.562) data 0.000 (0.003) loss 1.0889 (1.0537) acc 75.0000 (73.5168) lr 2.7103e-04 eta 2:14:28 +epoch [40/50] batch [340/500] time 1.553 (1.562) data 0.000 (0.003) loss 1.0908 (1.0590) acc 71.8750 (73.4099) lr 2.7103e-04 eta 2:14:20 +epoch [40/50] batch [345/500] time 1.572 (1.562) data 0.000 (0.003) loss 1.0361 (1.0575) acc 75.0000 (73.4239) lr 2.7103e-04 eta 2:14:11 +epoch [40/50] batch [350/500] time 1.548 (1.562) data 0.000 (0.003) loss 1.0186 (1.0551) acc 75.0000 (73.4643) lr 2.7103e-04 eta 2:14:03 +epoch [40/50] batch [355/500] time 1.562 (1.562) data 0.000 (0.003) loss 1.4961 (1.0540) acc 62.5000 (73.4859) lr 2.7103e-04 eta 2:13:54 +epoch [40/50] batch [360/500] time 1.589 (1.562) data 0.001 (0.003) loss 1.2051 (1.0559) acc 68.7500 (73.4115) lr 2.7103e-04 eta 2:13:47 +epoch [40/50] batch [365/500] time 1.535 (1.562) data 0.000 (0.003) loss 0.4663 (1.0532) acc 84.3750 (73.4932) lr 2.7103e-04 eta 2:13:38 +epoch [40/50] batch [370/500] time 1.534 (1.561) data 0.000 (0.003) loss 1.0078 (1.0538) acc 78.1250 (73.4713) lr 2.7103e-04 eta 2:13:30 +epoch [40/50] batch [375/500] time 1.562 (1.561) data 0.000 (0.003) loss 1.2451 (1.0554) acc 65.6250 (73.4500) lr 2.7103e-04 eta 2:13:22 +epoch [40/50] batch [380/500] time 1.556 (1.562) data 0.000 (0.003) loss 1.0303 (1.0512) acc 81.2500 (73.5115) lr 2.7103e-04 eta 2:13:14 +epoch [40/50] batch [385/500] time 1.542 (1.561) data 0.000 (0.003) loss 1.0918 (1.0554) acc 75.0000 (73.4091) lr 2.7103e-04 eta 2:13:06 +epoch [40/50] batch [390/500] time 1.539 (1.561) data 0.000 (0.003) loss 1.1270 (1.0558) acc 71.8750 (73.4135) lr 2.7103e-04 eta 2:12:57 +epoch [40/50] batch [395/500] time 1.568 (1.561) data 0.000 (0.003) loss 1.4707 (1.0576) acc 65.6250 (73.3703) lr 2.7103e-04 eta 2:12:49 +epoch [40/50] batch [400/500] time 1.563 (1.561) data 0.000 (0.003) loss 1.0322 (1.0573) acc 65.6250 (73.3672) lr 2.7103e-04 eta 2:12:43 +epoch [40/50] batch [405/500] time 1.550 (1.561) data 0.000 (0.003) loss 1.3613 (1.0574) acc 62.5000 (73.3796) lr 2.7103e-04 eta 2:12:35 +epoch [40/50] batch [410/500] time 1.571 (1.561) data 0.000 (0.003) loss 0.9072 (1.0572) acc 62.5000 (73.3841) lr 2.7103e-04 eta 2:12:27 +epoch [40/50] batch [415/500] time 1.578 (1.561) data 0.000 (0.003) loss 1.1289 (1.0555) acc 71.8750 (73.4413) lr 2.7103e-04 eta 2:12:19 +epoch [40/50] batch [420/500] time 1.557 (1.561) data 0.000 (0.003) loss 1.0537 (1.0556) acc 78.1250 (73.4524) lr 2.7103e-04 eta 2:12:11 +epoch [40/50] batch [425/500] time 1.562 (1.561) data 0.000 (0.003) loss 1.1465 (1.0585) acc 71.8750 (73.3750) lr 2.7103e-04 eta 2:12:04 +epoch [40/50] batch [430/500] time 1.548 (1.561) data 0.000 (0.003) loss 0.8467 (1.0579) acc 71.8750 (73.3794) lr 2.7103e-04 eta 2:11:56 +epoch [40/50] batch [435/500] time 1.562 (1.561) data 0.000 (0.003) loss 0.7124 (1.0534) acc 87.5000 (73.4986) lr 2.7103e-04 eta 2:11:48 +epoch [40/50] batch [440/500] time 1.571 (1.561) data 0.000 (0.003) loss 0.6558 (1.0510) acc 87.5000 (73.5511) lr 2.7103e-04 eta 2:11:40 +epoch [40/50] batch [445/500] time 1.554 (1.561) data 0.001 (0.002) loss 1.1787 (1.0513) acc 75.0000 (73.5815) lr 2.7103e-04 eta 2:11:32 +epoch [40/50] batch [450/500] time 1.545 (1.561) data 0.000 (0.002) loss 0.9678 (1.0503) acc 78.1250 (73.6250) lr 2.7103e-04 eta 2:11:24 +epoch [40/50] batch [455/500] time 1.589 (1.561) data 0.000 (0.002) loss 0.9961 (1.0475) acc 81.2500 (73.6813) lr 2.7103e-04 eta 2:11:16 +epoch [40/50] batch [460/500] time 1.545 (1.561) data 0.000 (0.002) loss 1.4258 (1.0467) acc 71.8750 (73.6617) lr 2.7103e-04 eta 2:11:08 +epoch [40/50] batch [465/500] time 1.574 (1.561) data 0.000 (0.002) loss 1.3975 (1.0479) acc 78.1250 (73.6156) lr 2.7103e-04 eta 2:11:00 +epoch [40/50] batch [470/500] time 1.532 (1.561) data 0.000 (0.002) loss 1.2861 (1.0501) acc 53.1250 (73.5372) lr 2.7103e-04 eta 2:10:53 +epoch [40/50] batch [475/500] time 1.588 (1.561) data 0.000 (0.002) loss 0.3511 (1.0474) acc 87.5000 (73.6053) lr 2.7103e-04 eta 2:10:46 +epoch [40/50] batch [480/500] time 1.553 (1.561) data 0.000 (0.002) loss 0.9512 (1.0464) acc 68.7500 (73.6133) lr 2.7103e-04 eta 2:10:38 +epoch [40/50] batch [485/500] time 1.550 (1.561) data 0.001 (0.002) loss 0.2661 (1.0445) acc 93.7500 (73.6469) lr 2.7103e-04 eta 2:10:30 +epoch [40/50] batch [490/500] time 1.544 (1.561) data 0.000 (0.002) loss 1.2256 (1.0438) acc 71.8750 (73.6798) lr 2.7103e-04 eta 2:10:21 +epoch [40/50] batch [495/500] time 1.532 (1.561) data 0.000 (0.002) loss 1.1465 (1.0454) acc 68.7500 (73.6237) lr 2.7103e-04 eta 2:10:13 +epoch [40/50] batch [500/500] time 1.561 (1.561) data 0.000 (0.002) loss 0.7192 (1.0445) acc 78.1250 (73.6375) lr 2.2949e-04 eta 2:10:05 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,029 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.6% +epoch [41/50] batch [5/500] time 1.560 (1.677) data 0.000 (0.174) loss 1.2871 (1.0147) acc 68.7500 (74.3750) lr 2.2949e-04 eta 2:19:34 +epoch [41/50] batch [10/500] time 1.551 (1.622) data 0.000 (0.087) loss 0.6543 (0.8895) acc 90.6250 (78.4375) lr 2.2949e-04 eta 2:14:51 +epoch [41/50] batch [15/500] time 1.554 (1.600) data 0.001 (0.058) loss 0.5225 (0.9221) acc 90.6250 (76.6667) lr 2.2949e-04 eta 2:12:57 +epoch [41/50] batch [20/500] time 1.563 (1.590) data 0.001 (0.044) loss 1.1514 (0.9344) acc 68.7500 (76.2500) lr 2.2949e-04 eta 2:11:58 +epoch [41/50] batch [25/500] time 1.648 (1.585) data 0.000 (0.035) loss 1.1104 (0.9791) acc 75.0000 (75.3750) lr 2.2949e-04 eta 2:11:26 +epoch [41/50] batch [30/500] time 1.534 (1.580) data 0.000 (0.029) loss 0.6982 (0.9972) acc 84.3750 (75.1042) lr 2.2949e-04 eta 2:10:52 +epoch [41/50] batch [35/500] time 1.552 (1.576) data 0.000 (0.025) loss 0.7124 (0.9657) acc 71.8750 (75.0000) lr 2.2949e-04 eta 2:10:27 +epoch [41/50] batch [40/500] time 1.562 (1.574) data 0.001 (0.022) loss 0.7407 (0.9631) acc 81.2500 (75.2344) lr 2.2949e-04 eta 2:10:08 +epoch [41/50] batch [45/500] time 1.548 (1.572) data 0.000 (0.020) loss 1.4189 (0.9959) acc 71.8750 (74.5833) lr 2.2949e-04 eta 2:09:50 +epoch [41/50] batch [50/500] time 1.562 (1.570) data 0.000 (0.018) loss 1.0342 (0.9960) acc 71.8750 (74.3750) lr 2.2949e-04 eta 2:09:32 +epoch [41/50] batch [55/500] time 1.558 (1.569) data 0.000 (0.016) loss 1.0381 (1.0057) acc 65.6250 (74.0341) lr 2.2949e-04 eta 2:09:20 +epoch [41/50] batch [60/500] time 1.545 (1.569) data 0.000 (0.015) loss 0.9604 (1.0037) acc 68.7500 (74.1146) lr 2.2949e-04 eta 2:09:08 +epoch [41/50] batch [65/500] time 1.555 (1.567) data 0.000 (0.014) loss 0.7935 (1.0209) acc 75.0000 (73.6538) lr 2.2949e-04 eta 2:08:53 +epoch [41/50] batch [70/500] time 1.561 (1.566) data 0.001 (0.013) loss 1.6553 (1.0341) acc 65.6250 (73.3929) lr 2.2949e-04 eta 2:08:42 +epoch [41/50] batch [75/500] time 1.557 (1.566) data 0.000 (0.012) loss 0.7173 (1.0254) acc 78.1250 (73.5417) lr 2.2949e-04 eta 2:08:30 +epoch [41/50] batch [80/500] time 1.556 (1.566) data 0.000 (0.011) loss 1.7207 (1.0433) acc 56.2500 (72.9297) lr 2.2949e-04 eta 2:08:22 +epoch [41/50] batch [85/500] time 1.663 (1.566) data 0.000 (0.011) loss 1.3672 (1.0469) acc 81.2500 (73.0515) lr 2.2949e-04 eta 2:08:17 +epoch [41/50] batch [90/500] time 1.581 (1.566) data 0.001 (0.010) loss 1.1689 (1.0505) acc 65.6250 (72.8472) lr 2.2949e-04 eta 2:08:09 +epoch [41/50] batch [95/500] time 1.559 (1.566) data 0.000 (0.010) loss 0.6523 (1.0602) acc 84.3750 (72.8947) lr 2.2949e-04 eta 2:07:58 +epoch [41/50] batch [100/500] time 1.545 (1.565) data 0.000 (0.009) loss 0.9526 (1.0618) acc 71.8750 (72.7812) lr 2.2949e-04 eta 2:07:49 +epoch [41/50] batch [105/500] time 1.562 (1.564) data 0.000 (0.009) loss 1.1426 (1.0722) acc 68.7500 (72.5595) lr 2.2949e-04 eta 2:07:36 +epoch [41/50] batch [110/500] time 1.539 (1.564) data 0.000 (0.008) loss 1.4248 (1.0728) acc 56.2500 (72.5568) lr 2.2949e-04 eta 2:07:26 +epoch [41/50] batch [115/500] time 1.561 (1.563) data 0.000 (0.008) loss 0.6235 (1.0725) acc 75.0000 (72.3370) lr 2.2949e-04 eta 2:07:16 +epoch [41/50] batch [120/500] time 1.561 (1.563) data 0.000 (0.008) loss 1.1270 (1.0678) acc 62.5000 (72.2656) lr 2.2949e-04 eta 2:07:09 +epoch [41/50] batch [125/500] time 1.552 (1.563) data 0.001 (0.007) loss 1.0107 (1.0729) acc 71.8750 (72.1000) lr 2.2949e-04 eta 2:06:59 +epoch [41/50] batch [130/500] time 1.595 (1.564) data 0.000 (0.007) loss 1.1250 (1.0752) acc 71.8750 (72.0192) lr 2.2949e-04 eta 2:06:57 +epoch [41/50] batch [135/500] time 1.585 (1.564) data 0.001 (0.007) loss 1.0049 (1.0693) acc 68.7500 (72.1759) lr 2.2949e-04 eta 2:06:49 +epoch [41/50] batch [140/500] time 1.543 (1.564) data 0.000 (0.007) loss 1.4854 (1.0727) acc 71.8750 (72.1875) lr 2.2949e-04 eta 2:06:38 +epoch [41/50] batch [145/500] time 1.555 (1.563) data 0.000 (0.006) loss 0.8540 (1.0711) acc 78.1250 (72.1767) lr 2.2949e-04 eta 2:06:30 +epoch [41/50] batch [150/500] time 1.555 (1.563) data 0.001 (0.006) loss 1.1006 (1.0734) acc 75.0000 (72.0833) lr 2.2949e-04 eta 2:06:22 +epoch [41/50] batch [155/500] time 1.543 (1.563) data 0.000 (0.006) loss 0.9458 (1.0750) acc 68.7500 (72.0565) lr 2.2949e-04 eta 2:06:13 +epoch [41/50] batch [160/500] time 1.562 (1.563) data 0.000 (0.006) loss 1.0225 (1.0783) acc 78.1250 (72.1094) lr 2.2949e-04 eta 2:06:04 +epoch [41/50] batch [165/500] time 1.553 (1.563) data 0.000 (0.006) loss 1.1484 (1.0788) acc 71.8750 (72.1970) lr 2.2949e-04 eta 2:05:56 +epoch [41/50] batch [170/500] time 1.583 (1.563) data 0.000 (0.006) loss 1.0020 (1.0755) acc 81.2500 (72.4081) lr 2.2949e-04 eta 2:05:48 +epoch [41/50] batch [175/500] time 1.579 (1.563) data 0.000 (0.005) loss 1.0352 (1.0732) acc 68.7500 (72.3929) lr 2.2949e-04 eta 2:05:41 +epoch [41/50] batch [180/500] time 1.549 (1.563) data 0.000 (0.005) loss 1.1143 (1.0696) acc 81.2500 (72.5000) lr 2.2949e-04 eta 2:05:32 +epoch [41/50] batch [185/500] time 1.564 (1.563) data 0.000 (0.005) loss 1.1260 (1.0725) acc 62.5000 (72.4155) lr 2.2949e-04 eta 2:05:23 +epoch [41/50] batch [190/500] time 1.572 (1.562) data 0.000 (0.005) loss 0.9868 (1.0684) acc 81.2500 (72.6151) lr 2.2949e-04 eta 2:05:15 +epoch [41/50] batch [195/500] time 1.556 (1.563) data 0.000 (0.005) loss 1.3232 (1.0734) acc 59.3750 (72.5641) lr 2.2949e-04 eta 2:05:07 +epoch [41/50] batch [200/500] time 1.594 (1.563) data 0.000 (0.005) loss 0.9927 (1.0761) acc 87.5000 (72.6406) lr 2.2949e-04 eta 2:05:01 +epoch [41/50] batch [205/500] time 1.554 (1.563) data 0.000 (0.005) loss 0.7852 (1.0742) acc 84.3750 (72.6829) lr 2.2949e-04 eta 2:04:52 +epoch [41/50] batch [210/500] time 1.557 (1.563) data 0.000 (0.005) loss 1.5303 (1.0824) acc 59.3750 (72.5893) lr 2.2949e-04 eta 2:04:46 +epoch [41/50] batch [215/500] time 1.545 (1.563) data 0.000 (0.004) loss 0.5220 (1.0823) acc 87.5000 (72.5872) lr 2.2949e-04 eta 2:04:37 +epoch [41/50] batch [220/500] time 1.547 (1.562) data 0.000 (0.004) loss 1.0381 (1.0802) acc 87.5000 (72.6705) lr 2.2949e-04 eta 2:04:28 +epoch [41/50] batch [225/500] time 1.585 (1.562) data 0.000 (0.004) loss 0.5005 (1.0813) acc 90.6250 (72.6528) lr 2.2949e-04 eta 2:04:19 +epoch [41/50] batch [230/500] time 1.543 (1.562) data 0.000 (0.004) loss 1.5898 (1.0816) acc 65.6250 (72.6902) lr 2.2949e-04 eta 2:04:12 +epoch [41/50] batch [235/500] time 1.568 (1.562) data 0.000 (0.004) loss 1.0605 (1.0853) acc 78.1250 (72.6463) lr 2.2949e-04 eta 2:04:04 +epoch [41/50] batch [240/500] time 1.560 (1.562) data 0.000 (0.004) loss 1.0576 (1.0851) acc 75.0000 (72.6042) lr 2.2949e-04 eta 2:03:55 +epoch [41/50] batch [245/500] time 1.568 (1.562) data 0.000 (0.004) loss 0.6665 (1.0801) acc 87.5000 (72.7296) lr 2.2949e-04 eta 2:03:47 +epoch [41/50] batch [250/500] time 1.547 (1.562) data 0.000 (0.004) loss 0.9683 (1.0799) acc 75.0000 (72.7875) lr 2.2949e-04 eta 2:03:40 +epoch [41/50] batch [255/500] time 1.562 (1.562) data 0.000 (0.004) loss 1.3057 (1.0794) acc 62.5000 (72.8309) lr 2.2949e-04 eta 2:03:31 +epoch [41/50] batch [260/500] time 1.551 (1.562) data 0.000 (0.004) loss 1.1855 (1.0768) acc 81.2500 (72.9207) lr 2.2949e-04 eta 2:03:23 +epoch [41/50] batch [265/500] time 1.584 (1.562) data 0.000 (0.004) loss 1.0098 (1.0767) acc 81.2500 (72.9481) lr 2.2949e-04 eta 2:03:16 +epoch [41/50] batch [270/500] time 1.571 (1.562) data 0.001 (0.004) loss 1.0254 (1.0770) acc 68.7500 (72.8935) lr 2.2949e-04 eta 2:03:08 +epoch [41/50] batch [275/500] time 1.580 (1.562) data 0.000 (0.004) loss 0.9233 (1.0739) acc 71.8750 (72.8523) lr 2.2949e-04 eta 2:03:02 +epoch [41/50] batch [280/500] time 1.546 (1.563) data 0.001 (0.004) loss 1.1221 (1.0748) acc 68.7500 (72.8125) lr 2.2949e-04 eta 2:02:55 +epoch [41/50] batch [285/500] time 1.552 (1.562) data 0.000 (0.003) loss 1.1309 (1.0757) acc 59.3750 (72.8289) lr 2.2949e-04 eta 2:02:46 +epoch [41/50] batch [290/500] time 1.577 (1.563) data 0.000 (0.003) loss 0.5171 (1.0697) acc 84.3750 (73.0172) lr 2.2949e-04 eta 2:02:39 +epoch [41/50] batch [295/500] time 1.571 (1.563) data 0.000 (0.003) loss 0.6455 (1.0655) acc 81.2500 (73.0720) lr 2.2949e-04 eta 2:02:31 +epoch [41/50] batch [300/500] time 1.564 (1.563) data 0.000 (0.003) loss 1.0674 (1.0649) acc 71.8750 (73.0729) lr 2.2949e-04 eta 2:02:23 +epoch [41/50] batch [305/500] time 1.571 (1.563) data 0.000 (0.003) loss 0.9307 (1.0645) acc 84.3750 (73.1762) lr 2.2949e-04 eta 2:02:16 +epoch [41/50] batch [310/500] time 1.524 (1.562) data 0.000 (0.003) loss 0.7959 (1.0603) acc 81.2500 (73.2560) lr 2.2949e-04 eta 2:02:07 +epoch [41/50] batch [315/500] time 1.556 (1.562) data 0.001 (0.003) loss 1.2803 (1.0586) acc 68.7500 (73.2837) lr 2.2949e-04 eta 2:01:59 +epoch [41/50] batch [320/500] time 1.552 (1.562) data 0.000 (0.003) loss 1.1445 (1.0572) acc 68.7500 (73.3203) lr 2.2949e-04 eta 2:01:51 +epoch [41/50] batch [325/500] time 1.574 (1.562) data 0.000 (0.003) loss 1.2188 (1.0576) acc 75.0000 (73.3077) lr 2.2949e-04 eta 2:01:43 +epoch [41/50] batch [330/500] time 1.563 (1.562) data 0.000 (0.003) loss 1.2549 (1.0610) acc 75.0000 (73.2670) lr 2.2949e-04 eta 2:01:35 +epoch [41/50] batch [335/500] time 1.551 (1.562) data 0.000 (0.003) loss 1.2090 (1.0594) acc 62.5000 (73.2836) lr 2.2949e-04 eta 2:01:27 +epoch [41/50] batch [340/500] time 1.601 (1.562) data 0.000 (0.003) loss 0.6685 (1.0552) acc 87.5000 (73.3915) lr 2.2949e-04 eta 2:01:20 +epoch [41/50] batch [345/500] time 1.560 (1.562) data 0.000 (0.003) loss 1.8613 (1.0556) acc 56.2500 (73.3696) lr 2.2949e-04 eta 2:01:12 +epoch [41/50] batch [350/500] time 1.564 (1.562) data 0.000 (0.003) loss 1.3447 (1.0559) acc 68.7500 (73.3750) lr 2.2949e-04 eta 2:01:04 +epoch [41/50] batch [355/500] time 1.552 (1.562) data 0.000 (0.003) loss 1.3438 (1.0573) acc 68.7500 (73.3275) lr 2.2949e-04 eta 2:00:56 +epoch [41/50] batch [360/500] time 1.575 (1.562) data 0.000 (0.003) loss 1.4609 (1.0563) acc 65.6250 (73.2899) lr 2.2949e-04 eta 2:00:48 +epoch [41/50] batch [365/500] time 1.557 (1.562) data 0.001 (0.003) loss 1.4023 (1.0583) acc 71.8750 (73.2791) lr 2.2949e-04 eta 2:00:41 +epoch [41/50] batch [370/500] time 1.555 (1.563) data 0.000 (0.003) loss 1.0410 (1.0586) acc 68.7500 (73.3277) lr 2.2949e-04 eta 2:00:34 +epoch [41/50] batch [375/500] time 1.537 (1.563) data 0.000 (0.003) loss 1.0801 (1.0601) acc 75.0000 (73.2667) lr 2.2949e-04 eta 2:00:27 +epoch [41/50] batch [380/500] time 1.555 (1.563) data 0.000 (0.003) loss 1.2900 (1.0599) acc 78.1250 (73.3306) lr 2.2949e-04 eta 2:00:19 +epoch [41/50] batch [385/500] time 1.566 (1.563) data 0.000 (0.003) loss 1.2656 (1.0577) acc 71.8750 (73.3604) lr 2.2949e-04 eta 2:00:11 +epoch [41/50] batch [390/500] time 1.539 (1.562) data 0.000 (0.003) loss 1.5391 (1.0578) acc 68.7500 (73.3494) lr 2.2949e-04 eta 2:00:02 +epoch [41/50] batch [395/500] time 1.553 (1.562) data 0.000 (0.003) loss 1.4277 (1.0581) acc 65.6250 (73.3465) lr 2.2949e-04 eta 1:59:54 +epoch [41/50] batch [400/500] time 1.560 (1.562) data 0.000 (0.003) loss 0.8184 (1.0566) acc 81.2500 (73.3750) lr 2.2949e-04 eta 1:59:47 +epoch [41/50] batch [405/500] time 1.565 (1.562) data 0.000 (0.003) loss 0.6611 (1.0534) acc 78.1250 (73.4028) lr 2.2949e-04 eta 1:59:38 +epoch [41/50] batch [410/500] time 1.587 (1.562) data 0.000 (0.003) loss 0.9424 (1.0547) acc 75.0000 (73.3460) lr 2.2949e-04 eta 1:59:31 +epoch [41/50] batch [415/500] time 1.671 (1.563) data 0.000 (0.003) loss 0.8184 (1.0527) acc 84.3750 (73.4337) lr 2.2949e-04 eta 1:59:25 +epoch [41/50] batch [420/500] time 1.552 (1.563) data 0.000 (0.002) loss 1.2920 (1.0547) acc 62.5000 (73.3780) lr 2.2949e-04 eta 1:59:17 +epoch [41/50] batch [425/500] time 1.582 (1.563) data 0.000 (0.002) loss 0.8960 (1.0556) acc 71.8750 (73.3824) lr 2.2949e-04 eta 1:59:09 +epoch [41/50] batch [430/500] time 1.571 (1.563) data 0.000 (0.002) loss 0.5317 (1.0529) acc 87.5000 (73.4302) lr 2.2949e-04 eta 1:59:01 +epoch [41/50] batch [435/500] time 1.554 (1.563) data 0.000 (0.002) loss 0.8877 (1.0529) acc 75.0000 (73.4339) lr 2.2949e-04 eta 1:58:54 +epoch [41/50] batch [440/500] time 1.561 (1.563) data 0.000 (0.002) loss 1.1611 (1.0536) acc 68.7500 (73.4517) lr 2.2949e-04 eta 1:58:46 +epoch [41/50] batch [445/500] time 1.554 (1.563) data 0.000 (0.002) loss 0.6479 (1.0529) acc 84.3750 (73.4480) lr 2.2949e-04 eta 1:58:38 +epoch [41/50] batch [450/500] time 1.561 (1.563) data 0.000 (0.002) loss 1.5498 (1.0535) acc 62.5000 (73.4653) lr 2.2949e-04 eta 1:58:29 +epoch [41/50] batch [455/500] time 1.550 (1.562) data 0.000 (0.002) loss 0.9985 (1.0520) acc 75.0000 (73.4890) lr 2.2949e-04 eta 1:58:21 +epoch [41/50] batch [460/500] time 1.558 (1.562) data 0.000 (0.002) loss 0.5708 (1.0506) acc 84.3750 (73.5258) lr 2.2949e-04 eta 1:58:13 +epoch [41/50] batch [465/500] time 1.568 (1.563) data 0.000 (0.002) loss 1.1484 (1.0496) acc 75.0000 (73.5349) lr 2.2949e-04 eta 1:58:06 +epoch [41/50] batch [470/500] time 1.562 (1.563) data 0.000 (0.002) loss 0.9126 (1.0486) acc 68.7500 (73.5306) lr 2.2949e-04 eta 1:57:58 +epoch [41/50] batch [475/500] time 1.556 (1.562) data 0.000 (0.002) loss 1.1309 (1.0495) acc 75.0000 (73.5132) lr 2.2949e-04 eta 1:57:50 +epoch [41/50] batch [480/500] time 1.541 (1.562) data 0.000 (0.002) loss 1.2197 (1.0487) acc 62.5000 (73.5091) lr 2.2949e-04 eta 1:57:42 +epoch [41/50] batch [485/500] time 1.556 (1.562) data 0.001 (0.002) loss 0.8721 (1.0487) acc 84.3750 (73.5438) lr 2.2949e-04 eta 1:57:34 +epoch [41/50] batch [490/500] time 1.550 (1.562) data 0.000 (0.002) loss 1.0850 (1.0488) acc 75.0000 (73.5268) lr 2.2949e-04 eta 1:57:26 +epoch [41/50] batch [495/500] time 1.559 (1.562) data 0.000 (0.002) loss 0.7217 (1.0496) acc 81.2500 (73.5290) lr 2.2949e-04 eta 1:57:18 +epoch [41/50] batch [500/500] time 1.544 (1.562) data 0.000 (0.002) loss 0.8071 (1.0500) acc 87.5000 (73.5125) lr 1.9098e-04 eta 1:57:10 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,041 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.6% +epoch [42/50] batch [5/500] time 1.521 (1.646) data 0.000 (0.148) loss 0.7954 (0.7874) acc 78.1250 (81.2500) lr 1.9098e-04 eta 2:03:18 +epoch [42/50] batch [10/500] time 1.534 (1.594) data 0.001 (0.074) loss 1.1807 (0.8896) acc 71.8750 (77.8125) lr 1.9098e-04 eta 1:59:16 +epoch [42/50] batch [15/500] time 1.551 (1.583) data 0.001 (0.050) loss 1.8350 (1.0346) acc 56.2500 (74.3750) lr 1.9098e-04 eta 1:58:20 +epoch [42/50] batch [20/500] time 1.562 (1.575) data 0.001 (0.037) loss 1.5752 (1.0465) acc 56.2500 (73.4375) lr 1.9098e-04 eta 1:57:37 +epoch [42/50] batch [25/500] time 1.574 (1.578) data 0.001 (0.030) loss 0.9531 (1.0641) acc 71.8750 (73.6250) lr 1.9098e-04 eta 1:57:43 +epoch [42/50] batch [30/500] time 1.558 (1.576) data 0.000 (0.025) loss 1.3652 (1.0668) acc 62.5000 (73.8542) lr 1.9098e-04 eta 1:57:25 +epoch [42/50] batch [35/500] time 1.559 (1.575) data 0.000 (0.021) loss 0.7163 (1.0637) acc 71.8750 (73.6607) lr 1.9098e-04 eta 1:57:10 +epoch [42/50] batch [40/500] time 1.576 (1.573) data 0.000 (0.019) loss 1.1064 (1.0460) acc 78.1250 (74.0625) lr 1.9098e-04 eta 1:56:55 +epoch [42/50] batch [45/500] time 1.567 (1.573) data 0.000 (0.017) loss 1.3262 (1.0453) acc 71.8750 (73.6111) lr 1.9098e-04 eta 1:56:46 +epoch [42/50] batch [50/500] time 1.568 (1.572) data 0.000 (0.015) loss 1.1572 (1.0423) acc 65.6250 (73.1875) lr 1.9098e-04 eta 1:56:33 +epoch [42/50] batch [55/500] time 1.567 (1.572) data 0.000 (0.014) loss 1.6631 (1.0258) acc 65.6250 (73.4091) lr 1.9098e-04 eta 1:56:25 +epoch [42/50] batch [60/500] time 1.550 (1.571) data 0.001 (0.013) loss 1.7891 (1.0470) acc 68.7500 (73.2812) lr 1.9098e-04 eta 1:56:13 +epoch [42/50] batch [65/500] time 1.554 (1.569) data 0.000 (0.012) loss 1.0410 (1.0307) acc 65.6250 (73.5577) lr 1.9098e-04 eta 1:55:59 +epoch [42/50] batch [70/500] time 1.600 (1.569) data 0.000 (0.011) loss 0.5620 (1.0181) acc 87.5000 (73.9732) lr 1.9098e-04 eta 1:55:51 +epoch [42/50] batch [75/500] time 1.539 (1.568) data 0.000 (0.010) loss 1.6357 (1.0288) acc 68.7500 (73.8750) lr 1.9098e-04 eta 1:55:38 +epoch [42/50] batch [80/500] time 1.571 (1.568) data 0.000 (0.010) loss 0.9644 (1.0215) acc 84.3750 (74.3359) lr 1.9098e-04 eta 1:55:28 +epoch [42/50] batch [85/500] time 1.558 (1.567) data 0.000 (0.009) loss 0.4973 (1.0165) acc 81.2500 (74.5221) lr 1.9098e-04 eta 1:55:18 +epoch [42/50] batch [90/500] time 1.559 (1.566) data 0.000 (0.009) loss 1.1162 (1.0172) acc 68.7500 (74.3750) lr 1.9098e-04 eta 1:55:07 +epoch [42/50] batch [95/500] time 1.525 (1.566) data 0.000 (0.008) loss 0.9351 (1.0063) acc 81.2500 (74.6053) lr 1.9098e-04 eta 1:54:56 +epoch [42/50] batch [100/500] time 1.550 (1.565) data 0.000 (0.008) loss 1.1836 (1.0007) acc 71.8750 (74.5938) lr 1.9098e-04 eta 1:54:45 +epoch [42/50] batch [105/500] time 1.553 (1.565) data 0.000 (0.007) loss 1.1387 (1.0143) acc 81.2500 (74.7321) lr 1.9098e-04 eta 1:54:37 +epoch [42/50] batch [110/500] time 1.571 (1.564) data 0.000 (0.007) loss 1.0645 (1.0169) acc 78.1250 (74.7443) lr 1.9098e-04 eta 1:54:25 +epoch [42/50] batch [115/500] time 1.542 (1.563) data 0.000 (0.007) loss 1.2490 (1.0195) acc 65.6250 (74.5924) lr 1.9098e-04 eta 1:54:14 +epoch [42/50] batch [120/500] time 1.659 (1.563) data 0.000 (0.007) loss 1.5244 (1.0261) acc 59.3750 (74.3229) lr 1.9098e-04 eta 1:54:07 +epoch [42/50] batch [125/500] time 1.534 (1.563) data 0.001 (0.006) loss 0.6064 (1.0275) acc 84.3750 (74.2250) lr 1.9098e-04 eta 1:53:57 +epoch [42/50] batch [130/500] time 1.577 (1.563) data 0.000 (0.006) loss 0.9551 (1.0314) acc 75.0000 (74.1346) lr 1.9098e-04 eta 1:53:48 +epoch [42/50] batch [135/500] time 1.540 (1.562) data 0.000 (0.006) loss 1.3516 (1.0326) acc 71.8750 (74.1204) lr 1.9098e-04 eta 1:53:39 +epoch [42/50] batch [140/500] time 1.560 (1.562) data 0.000 (0.006) loss 1.2021 (1.0379) acc 65.6250 (73.9732) lr 1.9098e-04 eta 1:53:32 +epoch [42/50] batch [145/500] time 1.559 (1.562) data 0.000 (0.005) loss 0.8696 (1.0424) acc 75.0000 (73.9009) lr 1.9098e-04 eta 1:53:24 +epoch [42/50] batch [150/500] time 1.573 (1.562) data 0.000 (0.005) loss 0.6162 (1.0403) acc 71.8750 (73.9167) lr 1.9098e-04 eta 1:53:16 +epoch [42/50] batch [155/500] time 1.538 (1.563) data 0.000 (0.005) loss 0.9575 (1.0420) acc 78.1250 (73.9113) lr 1.9098e-04 eta 1:53:09 +epoch [42/50] batch [160/500] time 1.557 (1.562) data 0.000 (0.005) loss 0.7407 (1.0460) acc 81.2500 (73.9453) lr 1.9098e-04 eta 1:53:01 +epoch [42/50] batch [165/500] time 1.570 (1.563) data 0.000 (0.005) loss 1.0977 (1.0468) acc 71.8750 (73.9205) lr 1.9098e-04 eta 1:52:56 +epoch [42/50] batch [170/500] time 1.563 (1.564) data 0.000 (0.005) loss 1.6914 (1.0507) acc 65.6250 (73.8419) lr 1.9098e-04 eta 1:52:50 +epoch [42/50] batch [175/500] time 1.561 (1.563) data 0.000 (0.005) loss 0.9873 (1.0470) acc 81.2500 (73.9821) lr 1.9098e-04 eta 1:52:41 +epoch [42/50] batch [180/500] time 1.563 (1.563) data 0.000 (0.004) loss 1.3477 (1.0470) acc 65.6250 (73.9236) lr 1.9098e-04 eta 1:52:33 +epoch [42/50] batch [185/500] time 1.544 (1.563) data 0.001 (0.004) loss 1.2773 (1.0516) acc 68.7500 (73.8345) lr 1.9098e-04 eta 1:52:24 +epoch [42/50] batch [190/500] time 1.561 (1.563) data 0.000 (0.004) loss 1.2559 (1.0493) acc 68.7500 (73.8651) lr 1.9098e-04 eta 1:52:16 +epoch [42/50] batch [195/500] time 1.562 (1.563) data 0.000 (0.004) loss 1.0498 (1.0529) acc 71.8750 (73.8462) lr 1.9098e-04 eta 1:52:09 +epoch [42/50] batch [200/500] time 1.588 (1.563) data 0.000 (0.004) loss 0.8003 (1.0484) acc 71.8750 (73.8594) lr 1.9098e-04 eta 1:52:02 +epoch [42/50] batch [205/500] time 1.574 (1.563) data 0.000 (0.004) loss 1.5352 (1.0466) acc 68.7500 (73.9329) lr 1.9098e-04 eta 1:51:53 +epoch [42/50] batch [210/500] time 1.557 (1.563) data 0.000 (0.004) loss 0.7178 (1.0431) acc 71.8750 (74.0179) lr 1.9098e-04 eta 1:51:45 +epoch [42/50] batch [215/500] time 1.549 (1.563) data 0.000 (0.004) loss 0.8237 (1.0434) acc 78.1250 (73.9971) lr 1.9098e-04 eta 1:51:37 +epoch [42/50] batch [220/500] time 1.564 (1.563) data 0.000 (0.004) loss 1.5166 (1.0464) acc 75.0000 (74.0483) lr 1.9098e-04 eta 1:51:30 +epoch [42/50] batch [225/500] time 1.569 (1.564) data 0.000 (0.004) loss 1.7969 (1.0489) acc 65.6250 (74.0417) lr 1.9098e-04 eta 1:51:24 +epoch [42/50] batch [230/500] time 1.572 (1.564) data 0.000 (0.004) loss 0.7393 (1.0449) acc 75.0000 (74.0761) lr 1.9098e-04 eta 1:51:17 +epoch [42/50] batch [235/500] time 1.572 (1.564) data 0.000 (0.004) loss 1.2910 (1.0471) acc 62.5000 (74.0426) lr 1.9098e-04 eta 1:51:09 +epoch [42/50] batch [240/500] time 1.551 (1.564) data 0.000 (0.003) loss 1.0762 (1.0445) acc 78.1250 (74.2057) lr 1.9098e-04 eta 1:51:00 +epoch [42/50] batch [245/500] time 1.542 (1.563) data 0.000 (0.003) loss 0.9175 (1.0426) acc 78.1250 (74.2730) lr 1.9098e-04 eta 1:50:52 +epoch [42/50] batch [250/500] time 1.579 (1.563) data 0.000 (0.003) loss 0.6436 (1.0453) acc 81.2500 (74.1750) lr 1.9098e-04 eta 1:50:44 +epoch [42/50] batch [255/500] time 1.565 (1.563) data 0.000 (0.003) loss 0.8516 (1.0441) acc 75.0000 (74.2157) lr 1.9098e-04 eta 1:50:36 +epoch [42/50] batch [260/500] time 1.537 (1.563) data 0.000 (0.003) loss 0.7969 (1.0450) acc 78.1250 (74.1947) lr 1.9098e-04 eta 1:50:27 +epoch [42/50] batch [265/500] time 1.535 (1.563) data 0.001 (0.003) loss 0.9351 (1.0446) acc 75.0000 (74.2335) lr 1.9098e-04 eta 1:50:20 +epoch [42/50] batch [270/500] time 1.560 (1.563) data 0.000 (0.003) loss 1.0127 (1.0480) acc 75.0000 (74.3171) lr 1.9098e-04 eta 1:50:12 +epoch [42/50] batch [275/500] time 1.555 (1.563) data 0.000 (0.003) loss 1.4385 (1.0508) acc 65.6250 (74.2045) lr 1.9098e-04 eta 1:50:03 +epoch [42/50] batch [280/500] time 1.555 (1.563) data 0.000 (0.003) loss 1.5000 (1.0545) acc 65.6250 (74.1183) lr 1.9098e-04 eta 1:49:54 +epoch [42/50] batch [285/500] time 1.555 (1.562) data 0.001 (0.003) loss 1.4043 (1.0541) acc 65.6250 (74.1009) lr 1.9098e-04 eta 1:49:45 +epoch [42/50] batch [290/500] time 1.548 (1.562) data 0.000 (0.003) loss 1.4707 (1.0520) acc 71.8750 (74.2026) lr 1.9098e-04 eta 1:49:36 +epoch [42/50] batch [295/500] time 1.557 (1.562) data 0.000 (0.003) loss 1.2266 (1.0497) acc 68.7500 (74.2267) lr 1.9098e-04 eta 1:49:28 +epoch [42/50] batch [300/500] time 1.546 (1.562) data 0.000 (0.003) loss 0.7500 (1.0475) acc 78.1250 (74.2188) lr 1.9098e-04 eta 1:49:20 +epoch [42/50] batch [305/500] time 1.602 (1.562) data 0.000 (0.003) loss 0.9521 (1.0462) acc 68.7500 (74.2316) lr 1.9098e-04 eta 1:49:14 +epoch [42/50] batch [310/500] time 1.575 (1.563) data 0.000 (0.003) loss 1.1152 (1.0487) acc 75.0000 (74.2641) lr 1.9098e-04 eta 1:49:07 +epoch [42/50] batch [315/500] time 1.555 (1.563) data 0.000 (0.003) loss 0.8198 (1.0475) acc 81.2500 (74.2956) lr 1.9098e-04 eta 1:49:00 +epoch [42/50] batch [320/500] time 1.566 (1.563) data 0.000 (0.003) loss 1.7529 (1.0509) acc 46.8750 (74.1113) lr 1.9098e-04 eta 1:48:52 +epoch [42/50] batch [325/500] time 1.529 (1.563) data 0.000 (0.003) loss 1.1045 (1.0539) acc 71.8750 (74.0577) lr 1.9098e-04 eta 1:48:44 +epoch [42/50] batch [330/500] time 1.545 (1.563) data 0.000 (0.003) loss 1.4111 (1.0565) acc 56.2500 (73.9773) lr 1.9098e-04 eta 1:48:36 +epoch [42/50] batch [335/500] time 1.531 (1.562) data 0.000 (0.003) loss 1.2842 (1.0563) acc 65.6250 (73.9739) lr 1.9098e-04 eta 1:48:27 +epoch [42/50] batch [340/500] time 1.569 (1.562) data 0.000 (0.003) loss 1.0107 (1.0536) acc 71.8750 (73.9798) lr 1.9098e-04 eta 1:48:19 +epoch [42/50] batch [345/500] time 1.553 (1.563) data 0.001 (0.003) loss 1.1484 (1.0521) acc 65.6250 (73.9312) lr 1.9098e-04 eta 1:48:12 +epoch [42/50] batch [350/500] time 1.542 (1.563) data 0.000 (0.002) loss 1.5225 (1.0545) acc 68.7500 (73.8571) lr 1.9098e-04 eta 1:48:04 +epoch [42/50] batch [355/500] time 1.588 (1.563) data 0.000 (0.002) loss 1.2188 (1.0534) acc 68.7500 (73.8820) lr 1.9098e-04 eta 1:47:57 +epoch [42/50] batch [360/500] time 1.567 (1.563) data 0.000 (0.002) loss 1.7471 (1.0533) acc 56.2500 (73.8802) lr 1.9098e-04 eta 1:47:49 +epoch [42/50] batch [365/500] time 1.574 (1.563) data 0.000 (0.002) loss 1.3457 (1.0575) acc 68.7500 (73.8784) lr 1.9098e-04 eta 1:47:41 +epoch [42/50] batch [370/500] time 1.560 (1.563) data 0.000 (0.002) loss 0.8789 (1.0556) acc 75.0000 (73.9443) lr 1.9098e-04 eta 1:47:33 +epoch [42/50] batch [375/500] time 1.534 (1.563) data 0.000 (0.002) loss 0.6938 (1.0548) acc 71.8750 (73.9667) lr 1.9098e-04 eta 1:47:25 +epoch [42/50] batch [380/500] time 1.558 (1.563) data 0.001 (0.002) loss 1.2031 (1.0541) acc 65.6250 (73.9145) lr 1.9098e-04 eta 1:47:17 +epoch [42/50] batch [385/500] time 1.561 (1.563) data 0.000 (0.002) loss 0.9487 (1.0551) acc 81.2500 (73.9529) lr 1.9098e-04 eta 1:47:09 +epoch [42/50] batch [390/500] time 1.568 (1.563) data 0.000 (0.002) loss 1.0312 (1.0574) acc 75.0000 (73.8782) lr 1.9098e-04 eta 1:47:01 +epoch [42/50] batch [395/500] time 1.541 (1.562) data 0.001 (0.002) loss 1.3457 (1.0568) acc 75.0000 (73.9241) lr 1.9098e-04 eta 1:46:53 +epoch [42/50] batch [400/500] time 1.547 (1.562) data 0.000 (0.002) loss 1.4404 (1.0578) acc 65.6250 (73.8438) lr 1.9098e-04 eta 1:46:45 +epoch [42/50] batch [405/500] time 1.561 (1.562) data 0.000 (0.002) loss 0.8135 (1.0554) acc 75.0000 (73.8812) lr 1.9098e-04 eta 1:46:37 +epoch [42/50] batch [410/500] time 1.563 (1.563) data 0.000 (0.002) loss 1.0459 (1.0538) acc 75.0000 (73.8796) lr 1.9098e-04 eta 1:46:30 +epoch [42/50] batch [415/500] time 1.560 (1.563) data 0.001 (0.002) loss 0.7300 (1.0506) acc 84.3750 (73.9458) lr 1.9098e-04 eta 1:46:23 +epoch [42/50] batch [420/500] time 1.577 (1.563) data 0.000 (0.002) loss 0.9160 (1.0489) acc 68.7500 (73.9658) lr 1.9098e-04 eta 1:46:15 +epoch [42/50] batch [425/500] time 1.543 (1.562) data 0.000 (0.002) loss 1.1289 (1.0505) acc 75.0000 (73.9632) lr 1.9098e-04 eta 1:46:06 +epoch [42/50] batch [430/500] time 1.571 (1.562) data 0.000 (0.002) loss 1.2363 (1.0506) acc 71.8750 (73.9680) lr 1.9098e-04 eta 1:45:59 +epoch [42/50] batch [435/500] time 1.542 (1.562) data 0.000 (0.002) loss 0.8892 (1.0503) acc 75.0000 (73.9871) lr 1.9098e-04 eta 1:45:50 +epoch [42/50] batch [440/500] time 1.552 (1.562) data 0.000 (0.002) loss 1.3555 (1.0501) acc 71.8750 (73.9702) lr 1.9098e-04 eta 1:45:43 +epoch [42/50] batch [445/500] time 1.593 (1.562) data 0.000 (0.002) loss 1.4619 (1.0515) acc 59.3750 (73.9045) lr 1.9098e-04 eta 1:45:34 +epoch [42/50] batch [450/500] time 1.666 (1.562) data 0.000 (0.002) loss 1.6006 (1.0534) acc 65.6250 (73.8958) lr 1.9098e-04 eta 1:45:27 +epoch [42/50] batch [455/500] time 1.567 (1.563) data 0.000 (0.002) loss 0.6885 (1.0544) acc 87.5000 (73.8599) lr 1.9098e-04 eta 1:45:20 +epoch [42/50] batch [460/500] time 1.585 (1.563) data 0.000 (0.002) loss 0.4204 (1.0518) acc 87.5000 (73.9198) lr 1.9098e-04 eta 1:45:13 +epoch [42/50] batch [465/500] time 1.543 (1.563) data 0.001 (0.002) loss 0.7202 (1.0524) acc 84.3750 (73.8777) lr 1.9098e-04 eta 1:45:05 +epoch [42/50] batch [470/500] time 1.564 (1.563) data 0.000 (0.002) loss 1.0703 (1.0509) acc 71.8750 (73.9029) lr 1.9098e-04 eta 1:44:57 +epoch [42/50] batch [475/500] time 1.551 (1.563) data 0.000 (0.002) loss 1.5908 (1.0504) acc 68.7500 (73.9276) lr 1.9098e-04 eta 1:44:49 +epoch [42/50] batch [480/500] time 1.549 (1.562) data 0.000 (0.002) loss 1.4131 (1.0512) acc 65.6250 (73.9258) lr 1.9098e-04 eta 1:44:41 +epoch [42/50] batch [485/500] time 1.571 (1.563) data 0.001 (0.002) loss 0.7515 (1.0503) acc 75.0000 (73.8982) lr 1.9098e-04 eta 1:44:33 +epoch [42/50] batch [490/500] time 1.550 (1.563) data 0.000 (0.002) loss 1.2021 (1.0516) acc 75.0000 (73.8393) lr 1.9098e-04 eta 1:44:25 +epoch [42/50] batch [495/500] time 1.562 (1.562) data 0.000 (0.002) loss 1.4443 (1.0524) acc 71.8750 (73.8068) lr 1.9098e-04 eta 1:44:17 +epoch [42/50] batch [500/500] time 1.577 (1.562) data 0.000 (0.002) loss 1.6924 (1.0522) acc 59.3750 (73.8187) lr 1.5567e-04 eta 1:44:09 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,035 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.6% +epoch [43/50] batch [5/500] time 1.547 (1.660) data 0.000 (0.162) loss 0.9980 (1.0148) acc 71.8750 (77.5000) lr 1.5567e-04 eta 1:50:31 +epoch [43/50] batch [10/500] time 1.535 (1.607) data 0.000 (0.081) loss 0.6855 (0.9552) acc 78.1250 (77.5000) lr 1.5567e-04 eta 1:46:50 +epoch [43/50] batch [15/500] time 1.567 (1.590) data 0.000 (0.054) loss 0.7153 (0.9557) acc 84.3750 (76.8750) lr 1.5567e-04 eta 1:45:36 +epoch [43/50] batch [20/500] time 1.559 (1.582) data 0.000 (0.041) loss 0.8379 (0.9000) acc 78.1250 (78.7500) lr 1.5567e-04 eta 1:44:54 +epoch [43/50] batch [25/500] time 1.581 (1.577) data 0.000 (0.033) loss 1.6670 (0.9772) acc 59.3750 (77.2500) lr 1.5567e-04 eta 1:44:27 +epoch [43/50] batch [30/500] time 1.563 (1.572) data 0.000 (0.027) loss 1.1309 (1.0091) acc 78.1250 (76.2500) lr 1.5567e-04 eta 1:44:00 +epoch [43/50] batch [35/500] time 1.556 (1.571) data 0.000 (0.024) loss 1.3359 (1.0544) acc 78.1250 (75.7143) lr 1.5567e-04 eta 1:43:47 +epoch [43/50] batch [40/500] time 1.546 (1.571) data 0.000 (0.021) loss 1.1270 (1.0584) acc 71.8750 (75.6250) lr 1.5567e-04 eta 1:43:42 +epoch [43/50] batch [45/500] time 1.576 (1.571) data 0.000 (0.018) loss 0.9409 (1.0556) acc 75.0000 (75.2778) lr 1.5567e-04 eta 1:43:31 +epoch [43/50] batch [50/500] time 1.569 (1.571) data 0.000 (0.017) loss 1.0234 (1.0614) acc 68.7500 (74.7500) lr 1.5567e-04 eta 1:43:24 +epoch [43/50] batch [55/500] time 1.587 (1.571) data 0.000 (0.015) loss 1.1025 (1.0585) acc 71.8750 (75.0000) lr 1.5567e-04 eta 1:43:15 +epoch [43/50] batch [60/500] time 1.556 (1.571) data 0.001 (0.014) loss 1.1357 (1.0590) acc 75.0000 (75.0000) lr 1.5567e-04 eta 1:43:08 +epoch [43/50] batch [65/500] time 1.579 (1.570) data 0.000 (0.013) loss 0.9082 (1.0415) acc 68.7500 (75.0000) lr 1.5567e-04 eta 1:42:57 +epoch [43/50] batch [70/500] time 1.555 (1.569) data 0.000 (0.012) loss 1.0527 (1.0512) acc 71.8750 (74.7321) lr 1.5567e-04 eta 1:42:46 +epoch [43/50] batch [75/500] time 1.561 (1.568) data 0.000 (0.011) loss 0.7080 (1.0444) acc 78.1250 (74.7500) lr 1.5567e-04 eta 1:42:36 +epoch [43/50] batch [80/500] time 1.556 (1.568) data 0.000 (0.011) loss 1.2803 (1.0430) acc 65.6250 (74.7266) lr 1.5567e-04 eta 1:42:25 +epoch [43/50] batch [85/500] time 1.558 (1.568) data 0.000 (0.010) loss 1.2871 (1.0405) acc 71.8750 (74.7059) lr 1.5567e-04 eta 1:42:20 +epoch [43/50] batch [90/500] time 1.567 (1.568) data 0.001 (0.009) loss 1.1846 (1.0465) acc 65.6250 (74.4097) lr 1.5567e-04 eta 1:42:11 +epoch [43/50] batch [95/500] time 1.552 (1.567) data 0.000 (0.009) loss 0.9512 (1.0534) acc 75.0000 (74.2434) lr 1.5567e-04 eta 1:41:58 +epoch [43/50] batch [100/500] time 1.548 (1.566) data 0.000 (0.009) loss 0.9985 (1.0586) acc 71.8750 (74.0312) lr 1.5567e-04 eta 1:41:48 +epoch [43/50] batch [105/500] time 1.549 (1.566) data 0.000 (0.008) loss 0.5879 (1.0473) acc 87.5000 (74.1964) lr 1.5567e-04 eta 1:41:38 +epoch [43/50] batch [110/500] time 1.568 (1.565) data 0.000 (0.008) loss 0.7866 (1.0462) acc 78.1250 (74.3466) lr 1.5567e-04 eta 1:41:29 +epoch [43/50] batch [115/500] time 1.542 (1.565) data 0.000 (0.007) loss 0.8345 (1.0399) acc 81.2500 (74.4837) lr 1.5567e-04 eta 1:41:19 +epoch [43/50] batch [120/500] time 1.572 (1.565) data 0.000 (0.007) loss 0.5059 (1.0319) acc 87.5000 (74.7656) lr 1.5567e-04 eta 1:41:10 +epoch [43/50] batch [125/500] time 1.547 (1.565) data 0.000 (0.007) loss 0.9585 (1.0417) acc 75.0000 (74.5750) lr 1.5567e-04 eta 1:41:03 +epoch [43/50] batch [130/500] time 1.582 (1.565) data 0.000 (0.007) loss 0.4150 (1.0362) acc 90.6250 (74.6875) lr 1.5567e-04 eta 1:40:55 +epoch [43/50] batch [135/500] time 1.562 (1.565) data 0.000 (0.006) loss 1.6680 (1.0462) acc 62.5000 (74.4676) lr 1.5567e-04 eta 1:40:47 +epoch [43/50] batch [140/500] time 1.544 (1.564) data 0.000 (0.006) loss 0.7261 (1.0417) acc 71.8750 (74.4196) lr 1.5567e-04 eta 1:40:37 +epoch [43/50] batch [145/500] time 1.543 (1.564) data 0.000 (0.006) loss 0.9927 (1.0415) acc 71.8750 (74.4828) lr 1.5567e-04 eta 1:40:28 +epoch [43/50] batch [150/500] time 1.561 (1.564) data 0.000 (0.006) loss 0.8799 (1.0397) acc 78.1250 (74.3958) lr 1.5567e-04 eta 1:40:21 +epoch [43/50] batch [155/500] time 1.581 (1.564) data 0.000 (0.006) loss 0.6802 (1.0400) acc 84.3750 (74.4758) lr 1.5567e-04 eta 1:40:13 +epoch [43/50] batch [160/500] time 1.563 (1.564) data 0.001 (0.005) loss 1.0586 (1.0460) acc 75.0000 (74.3359) lr 1.5567e-04 eta 1:40:05 +epoch [43/50] batch [165/500] time 1.557 (1.564) data 0.001 (0.005) loss 1.0928 (1.0421) acc 81.2500 (74.3939) lr 1.5567e-04 eta 1:39:57 +epoch [43/50] batch [170/500] time 1.578 (1.564) data 0.000 (0.005) loss 1.2676 (1.0448) acc 68.7500 (74.2831) lr 1.5567e-04 eta 1:39:49 +epoch [43/50] batch [175/500] time 1.577 (1.564) data 0.000 (0.005) loss 1.3770 (1.0527) acc 62.5000 (74.0179) lr 1.5567e-04 eta 1:39:41 +epoch [43/50] batch [180/500] time 1.582 (1.565) data 0.000 (0.005) loss 0.7812 (1.0552) acc 75.0000 (73.9236) lr 1.5567e-04 eta 1:39:37 +epoch [43/50] batch [185/500] time 1.550 (1.565) data 0.001 (0.005) loss 1.5107 (1.0543) acc 65.6250 (73.9696) lr 1.5567e-04 eta 1:39:29 +epoch [43/50] batch [190/500] time 1.580 (1.565) data 0.000 (0.005) loss 0.9351 (1.0525) acc 71.8750 (73.9967) lr 1.5567e-04 eta 1:39:21 +epoch [43/50] batch [195/500] time 1.549 (1.565) data 0.000 (0.005) loss 0.7158 (1.0492) acc 75.0000 (74.1346) lr 1.5567e-04 eta 1:39:13 +epoch [43/50] batch [200/500] time 1.559 (1.564) data 0.000 (0.004) loss 1.0176 (1.0495) acc 68.7500 (74.0469) lr 1.5567e-04 eta 1:39:04 +epoch [43/50] batch [205/500] time 1.528 (1.564) data 0.000 (0.004) loss 1.1953 (1.0493) acc 78.1250 (74.0091) lr 1.5567e-04 eta 1:38:55 +epoch [43/50] batch [210/500] time 1.560 (1.564) data 0.000 (0.004) loss 0.7920 (1.0510) acc 75.0000 (73.9435) lr 1.5567e-04 eta 1:38:47 +epoch [43/50] batch [215/500] time 1.550 (1.564) data 0.000 (0.004) loss 1.1279 (1.0568) acc 68.7500 (73.8517) lr 1.5567e-04 eta 1:38:39 +epoch [43/50] batch [220/500] time 1.548 (1.564) data 0.000 (0.004) loss 1.3643 (1.0594) acc 65.6250 (73.6364) lr 1.5567e-04 eta 1:38:31 +epoch [43/50] batch [225/500] time 1.558 (1.564) data 0.000 (0.004) loss 0.6294 (1.0569) acc 71.8750 (73.6528) lr 1.5567e-04 eta 1:38:24 +epoch [43/50] batch [230/500] time 1.546 (1.564) data 0.000 (0.004) loss 0.7998 (1.0547) acc 81.2500 (73.6005) lr 1.5567e-04 eta 1:38:15 +epoch [43/50] batch [235/500] time 1.578 (1.564) data 0.001 (0.004) loss 0.6719 (1.0511) acc 81.2500 (73.6702) lr 1.5567e-04 eta 1:38:07 +epoch [43/50] batch [240/500] time 1.564 (1.564) data 0.000 (0.004) loss 1.2939 (1.0504) acc 75.0000 (73.7500) lr 1.5567e-04 eta 1:37:59 +epoch [43/50] batch [245/500] time 1.572 (1.564) data 0.001 (0.004) loss 1.0508 (1.0501) acc 84.3750 (73.8520) lr 1.5567e-04 eta 1:37:51 +epoch [43/50] batch [250/500] time 1.547 (1.564) data 0.000 (0.004) loss 1.1016 (1.0502) acc 71.8750 (73.8000) lr 1.5567e-04 eta 1:37:43 +epoch [43/50] batch [255/500] time 1.550 (1.563) data 0.000 (0.004) loss 0.9761 (1.0493) acc 75.0000 (73.7255) lr 1.5567e-04 eta 1:37:34 +epoch [43/50] batch [260/500] time 1.554 (1.563) data 0.000 (0.004) loss 0.7969 (1.0497) acc 81.2500 (73.6899) lr 1.5567e-04 eta 1:37:26 +epoch [43/50] batch [265/500] time 1.561 (1.563) data 0.000 (0.003) loss 0.9780 (1.0514) acc 68.7500 (73.5495) lr 1.5567e-04 eta 1:37:18 +epoch [43/50] batch [270/500] time 1.534 (1.563) data 0.000 (0.003) loss 1.0039 (1.0498) acc 68.7500 (73.5069) lr 1.5567e-04 eta 1:37:09 +epoch [43/50] batch [275/500] time 1.549 (1.563) data 0.001 (0.003) loss 2.0254 (1.0532) acc 56.2500 (73.4545) lr 1.5567e-04 eta 1:37:01 +epoch [43/50] batch [280/500] time 1.541 (1.563) data 0.001 (0.003) loss 0.6587 (1.0522) acc 78.1250 (73.4598) lr 1.5567e-04 eta 1:36:52 +epoch [43/50] batch [285/500] time 1.536 (1.562) data 0.000 (0.003) loss 0.5752 (1.0501) acc 78.1250 (73.4868) lr 1.5567e-04 eta 1:36:43 +epoch [43/50] batch [290/500] time 1.557 (1.562) data 0.000 (0.003) loss 1.5605 (1.0511) acc 68.7500 (73.4806) lr 1.5567e-04 eta 1:36:35 +epoch [43/50] batch [295/500] time 1.568 (1.562) data 0.000 (0.003) loss 0.8135 (1.0501) acc 87.5000 (73.5169) lr 1.5567e-04 eta 1:36:28 +epoch [43/50] batch [300/500] time 1.563 (1.562) data 0.000 (0.003) loss 0.4248 (1.0469) acc 84.3750 (73.5938) lr 1.5567e-04 eta 1:36:20 +epoch [43/50] batch [305/500] time 1.560 (1.562) data 0.000 (0.003) loss 0.8530 (1.0444) acc 81.2500 (73.6783) lr 1.5567e-04 eta 1:36:12 +epoch [43/50] batch [310/500] time 1.567 (1.562) data 0.000 (0.003) loss 0.6768 (1.0418) acc 81.2500 (73.7298) lr 1.5567e-04 eta 1:36:04 +epoch [43/50] batch [315/500] time 1.584 (1.562) data 0.000 (0.003) loss 1.6641 (1.0420) acc 65.6250 (73.6806) lr 1.5567e-04 eta 1:35:57 +epoch [43/50] batch [320/500] time 1.579 (1.562) data 0.000 (0.003) loss 1.3516 (1.0394) acc 62.5000 (73.7305) lr 1.5567e-04 eta 1:35:49 +epoch [43/50] batch [325/500] time 1.576 (1.563) data 0.000 (0.003) loss 0.8330 (1.0372) acc 81.2500 (73.7885) lr 1.5567e-04 eta 1:35:44 +epoch [43/50] batch [330/500] time 1.602 (1.563) data 0.000 (0.003) loss 1.1533 (1.0387) acc 75.0000 (73.7879) lr 1.5567e-04 eta 1:35:36 +epoch [43/50] batch [335/500] time 1.540 (1.563) data 0.001 (0.003) loss 1.9443 (1.0420) acc 53.1250 (73.7034) lr 1.5567e-04 eta 1:35:29 +epoch [43/50] batch [340/500] time 1.537 (1.563) data 0.000 (0.003) loss 1.1631 (1.0425) acc 75.0000 (73.6673) lr 1.5567e-04 eta 1:35:20 +epoch [43/50] batch [345/500] time 1.563 (1.563) data 0.000 (0.003) loss 0.9062 (1.0419) acc 78.1250 (73.6866) lr 1.5567e-04 eta 1:35:12 +epoch [43/50] batch [350/500] time 1.569 (1.563) data 0.001 (0.003) loss 0.5122 (1.0404) acc 87.5000 (73.7589) lr 1.5567e-04 eta 1:35:05 +epoch [43/50] batch [355/500] time 1.568 (1.563) data 0.000 (0.003) loss 0.6123 (1.0365) acc 84.3750 (73.8644) lr 1.5567e-04 eta 1:34:58 +epoch [43/50] batch [360/500] time 1.556 (1.563) data 0.000 (0.003) loss 0.7314 (1.0349) acc 78.1250 (73.8628) lr 1.5567e-04 eta 1:34:49 +epoch [43/50] batch [365/500] time 1.557 (1.563) data 0.000 (0.003) loss 1.2891 (1.0351) acc 75.0000 (73.8870) lr 1.5567e-04 eta 1:34:41 +epoch [43/50] batch [370/500] time 1.551 (1.563) data 0.001 (0.003) loss 1.0967 (1.0371) acc 81.2500 (73.8345) lr 1.5567e-04 eta 1:34:34 +epoch [43/50] batch [375/500] time 1.587 (1.563) data 0.000 (0.003) loss 1.6904 (1.0360) acc 56.2500 (73.8583) lr 1.5567e-04 eta 1:34:26 +epoch [43/50] batch [380/500] time 1.562 (1.563) data 0.000 (0.003) loss 1.6084 (1.0386) acc 62.5000 (73.7993) lr 1.5567e-04 eta 1:34:18 +epoch [43/50] batch [385/500] time 1.551 (1.563) data 0.000 (0.003) loss 0.8530 (1.0420) acc 75.0000 (73.7581) lr 1.5567e-04 eta 1:34:10 +epoch [43/50] batch [390/500] time 1.568 (1.563) data 0.000 (0.002) loss 0.8599 (1.0413) acc 71.8750 (73.7660) lr 1.5567e-04 eta 1:34:02 +epoch [43/50] batch [395/500] time 1.555 (1.563) data 0.000 (0.002) loss 1.1025 (1.0406) acc 71.8750 (73.8212) lr 1.5567e-04 eta 1:33:54 +epoch [43/50] batch [400/500] time 1.573 (1.563) data 0.000 (0.002) loss 0.9902 (1.0416) acc 75.0000 (73.8672) lr 1.5567e-04 eta 1:33:46 +epoch [43/50] batch [405/500] time 1.543 (1.563) data 0.000 (0.002) loss 0.7646 (1.0401) acc 84.3750 (73.9660) lr 1.5567e-04 eta 1:33:37 +epoch [43/50] batch [410/500] time 1.570 (1.563) data 0.001 (0.002) loss 1.1289 (1.0398) acc 71.8750 (73.9787) lr 1.5567e-04 eta 1:33:30 +epoch [43/50] batch [415/500] time 1.571 (1.563) data 0.001 (0.002) loss 0.9150 (1.0420) acc 75.0000 (73.9307) lr 1.5567e-04 eta 1:33:22 +epoch [43/50] batch [420/500] time 1.545 (1.563) data 0.000 (0.002) loss 0.6929 (1.0400) acc 81.2500 (73.9732) lr 1.5567e-04 eta 1:33:14 +epoch [43/50] batch [425/500] time 1.545 (1.563) data 0.000 (0.002) loss 1.1641 (1.0422) acc 65.6250 (73.9044) lr 1.5567e-04 eta 1:33:06 +epoch [43/50] batch [430/500] time 1.552 (1.563) data 0.000 (0.002) loss 1.0400 (1.0430) acc 65.6250 (73.8445) lr 1.5567e-04 eta 1:32:58 +epoch [43/50] batch [435/500] time 1.554 (1.562) data 0.000 (0.002) loss 1.0410 (1.0426) acc 84.3750 (73.9009) lr 1.5567e-04 eta 1:32:50 +epoch [43/50] batch [440/500] time 1.546 (1.563) data 0.000 (0.002) loss 1.1387 (1.0419) acc 75.0000 (73.9205) lr 1.5567e-04 eta 1:32:42 +epoch [43/50] batch [445/500] time 1.581 (1.563) data 0.000 (0.002) loss 0.5620 (1.0404) acc 84.3750 (73.9466) lr 1.5567e-04 eta 1:32:35 +epoch [43/50] batch [450/500] time 1.580 (1.563) data 0.000 (0.002) loss 1.2910 (1.0390) acc 59.3750 (73.9444) lr 1.5567e-04 eta 1:32:28 +epoch [43/50] batch [455/500] time 1.560 (1.563) data 0.000 (0.002) loss 0.9893 (1.0398) acc 75.0000 (73.8805) lr 1.5567e-04 eta 1:32:20 +epoch [43/50] batch [460/500] time 1.564 (1.563) data 0.000 (0.002) loss 0.7778 (1.0397) acc 75.0000 (73.8587) lr 1.5567e-04 eta 1:32:12 +epoch [43/50] batch [465/500] time 1.668 (1.563) data 0.000 (0.002) loss 1.1914 (1.0405) acc 68.7500 (73.8710) lr 1.5567e-04 eta 1:32:04 +epoch [43/50] batch [470/500] time 1.570 (1.563) data 0.000 (0.002) loss 0.7417 (1.0390) acc 78.1250 (73.8830) lr 1.5567e-04 eta 1:31:56 +epoch [43/50] batch [475/500] time 1.569 (1.563) data 0.001 (0.002) loss 1.1816 (1.0396) acc 68.7500 (73.8487) lr 1.5567e-04 eta 1:31:48 +epoch [43/50] batch [480/500] time 1.553 (1.563) data 0.000 (0.002) loss 0.8169 (1.0376) acc 71.8750 (73.8542) lr 1.5567e-04 eta 1:31:40 +epoch [43/50] batch [485/500] time 1.551 (1.563) data 0.001 (0.002) loss 0.4236 (1.0359) acc 90.6250 (73.8724) lr 1.5567e-04 eta 1:31:32 +epoch [43/50] batch [490/500] time 1.525 (1.563) data 0.000 (0.002) loss 0.8315 (1.0367) acc 78.1250 (73.8712) lr 1.5567e-04 eta 1:31:24 +epoch [43/50] batch [495/500] time 1.569 (1.563) data 0.000 (0.002) loss 0.6655 (1.0355) acc 71.8750 (73.8510) lr 1.5567e-04 eta 1:31:16 +epoch [43/50] batch [500/500] time 1.557 (1.562) data 0.000 (0.002) loss 0.8647 (1.0343) acc 78.1250 (73.8563) lr 1.2369e-04 eta 1:31:08 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,061 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.7% +epoch [44/50] batch [5/500] time 1.538 (1.667) data 0.000 (0.168) loss 0.6187 (0.8603) acc 84.3750 (77.5000) lr 1.2369e-04 eta 1:37:04 +epoch [44/50] batch [10/500] time 1.561 (1.606) data 0.000 (0.084) loss 0.8901 (0.9103) acc 75.0000 (76.2500) lr 1.2369e-04 eta 1:33:25 +epoch [44/50] batch [15/500] time 1.560 (1.596) data 0.000 (0.056) loss 1.0420 (0.9664) acc 71.8750 (75.4167) lr 1.2369e-04 eta 1:32:40 +epoch [44/50] batch [20/500] time 1.576 (1.595) data 0.000 (0.042) loss 1.2334 (1.0615) acc 62.5000 (73.9062) lr 1.2369e-04 eta 1:32:31 +epoch [44/50] batch [25/500] time 1.543 (1.587) data 0.000 (0.034) loss 1.3252 (1.0790) acc 68.7500 (74.0000) lr 1.2369e-04 eta 1:31:55 +epoch [44/50] batch [30/500] time 1.531 (1.582) data 0.000 (0.028) loss 0.8945 (1.0950) acc 81.2500 (73.6458) lr 1.2369e-04 eta 1:31:28 +epoch [44/50] batch [35/500] time 1.568 (1.579) data 0.000 (0.024) loss 1.3760 (1.0898) acc 68.7500 (73.6607) lr 1.2369e-04 eta 1:31:09 +epoch [44/50] batch [40/500] time 1.573 (1.576) data 0.000 (0.021) loss 1.0811 (1.0944) acc 62.5000 (73.2812) lr 1.2369e-04 eta 1:30:52 +epoch [44/50] batch [45/500] time 1.561 (1.573) data 0.001 (0.019) loss 1.3916 (1.0985) acc 71.8750 (73.4722) lr 1.2369e-04 eta 1:30:35 +epoch [44/50] batch [50/500] time 1.576 (1.572) data 0.000 (0.017) loss 1.1953 (1.0813) acc 68.7500 (73.5000) lr 1.2369e-04 eta 1:30:22 +epoch [44/50] batch [55/500] time 1.577 (1.571) data 0.000 (0.016) loss 1.0908 (1.0693) acc 81.2500 (74.1477) lr 1.2369e-04 eta 1:30:13 +epoch [44/50] batch [60/500] time 1.566 (1.571) data 0.000 (0.014) loss 1.2578 (1.0711) acc 62.5000 (74.0625) lr 1.2369e-04 eta 1:30:03 +epoch [44/50] batch [65/500] time 1.553 (1.570) data 0.000 (0.013) loss 1.4619 (1.0815) acc 59.3750 (73.7500) lr 1.2369e-04 eta 1:29:53 +epoch [44/50] batch [70/500] time 1.555 (1.569) data 0.000 (0.012) loss 0.5898 (1.0654) acc 78.1250 (73.8393) lr 1.2369e-04 eta 1:29:41 +epoch [44/50] batch [75/500] time 1.566 (1.569) data 0.000 (0.012) loss 0.7563 (1.0602) acc 78.1250 (73.9583) lr 1.2369e-04 eta 1:29:32 +epoch [44/50] batch [80/500] time 1.565 (1.569) data 0.000 (0.011) loss 1.5322 (1.0549) acc 59.3750 (73.9844) lr 1.2369e-04 eta 1:29:25 +epoch [44/50] batch [85/500] time 1.570 (1.569) data 0.000 (0.010) loss 1.4580 (1.0524) acc 62.5000 (74.0441) lr 1.2369e-04 eta 1:29:18 +epoch [44/50] batch [90/500] time 1.563 (1.568) data 0.000 (0.010) loss 1.2998 (1.0563) acc 71.8750 (73.9236) lr 1.2369e-04 eta 1:29:08 +epoch [44/50] batch [95/500] time 1.564 (1.568) data 0.000 (0.009) loss 0.9536 (1.0594) acc 75.0000 (73.6842) lr 1.2369e-04 eta 1:29:00 +epoch [44/50] batch [100/500] time 1.552 (1.568) data 0.000 (0.009) loss 0.6338 (1.0429) acc 84.3750 (73.7812) lr 1.2369e-04 eta 1:28:51 +epoch [44/50] batch [105/500] time 1.570 (1.568) data 0.000 (0.008) loss 1.0850 (1.0607) acc 75.0000 (73.4524) lr 1.2369e-04 eta 1:28:43 +epoch [44/50] batch [110/500] time 1.559 (1.568) data 0.000 (0.008) loss 0.9517 (1.0535) acc 78.1250 (73.5795) lr 1.2369e-04 eta 1:28:34 +epoch [44/50] batch [115/500] time 1.662 (1.568) data 0.000 (0.008) loss 1.1787 (1.0644) acc 59.3750 (73.1793) lr 1.2369e-04 eta 1:28:28 +epoch [44/50] batch [120/500] time 1.550 (1.568) data 0.000 (0.007) loss 0.6069 (1.0648) acc 81.2500 (73.2292) lr 1.2369e-04 eta 1:28:18 +epoch [44/50] batch [125/500] time 1.569 (1.567) data 0.000 (0.007) loss 0.8735 (1.0647) acc 75.0000 (73.2750) lr 1.2369e-04 eta 1:28:09 +epoch [44/50] batch [130/500] time 1.556 (1.567) data 0.000 (0.007) loss 0.7563 (1.0554) acc 81.2500 (73.5577) lr 1.2369e-04 eta 1:28:00 +epoch [44/50] batch [135/500] time 1.569 (1.567) data 0.000 (0.007) loss 0.8340 (1.0585) acc 68.7500 (73.3796) lr 1.2369e-04 eta 1:27:52 +epoch [44/50] batch [140/500] time 1.559 (1.567) data 0.000 (0.006) loss 1.4375 (1.0585) acc 71.8750 (73.3482) lr 1.2369e-04 eta 1:27:45 +epoch [44/50] batch [145/500] time 1.561 (1.567) data 0.001 (0.006) loss 1.1348 (1.0577) acc 71.8750 (73.3405) lr 1.2369e-04 eta 1:27:38 +epoch [44/50] batch [150/500] time 1.572 (1.567) data 0.001 (0.006) loss 1.6592 (1.0603) acc 56.2500 (73.3958) lr 1.2369e-04 eta 1:27:29 +epoch [44/50] batch [155/500] time 1.548 (1.567) data 0.000 (0.006) loss 1.1680 (1.0560) acc 65.6250 (73.3669) lr 1.2369e-04 eta 1:27:21 +epoch [44/50] batch [160/500] time 1.570 (1.568) data 0.000 (0.006) loss 1.7422 (1.0559) acc 68.7500 (73.4180) lr 1.2369e-04 eta 1:27:15 +epoch [44/50] batch [165/500] time 1.560 (1.567) data 0.000 (0.005) loss 1.4434 (1.0615) acc 68.7500 (73.3333) lr 1.2369e-04 eta 1:27:07 +epoch [44/50] batch [170/500] time 1.581 (1.568) data 0.000 (0.005) loss 0.9321 (1.0608) acc 78.1250 (73.2904) lr 1.2369e-04 eta 1:26:59 +epoch [44/50] batch [175/500] time 1.541 (1.567) data 0.000 (0.005) loss 1.0430 (1.0650) acc 65.6250 (73.1607) lr 1.2369e-04 eta 1:26:51 +epoch [44/50] batch [180/500] time 1.532 (1.567) data 0.000 (0.005) loss 0.7695 (1.0608) acc 78.1250 (73.1771) lr 1.2369e-04 eta 1:26:42 +epoch [44/50] batch [185/500] time 1.564 (1.567) data 0.000 (0.005) loss 1.5352 (1.0679) acc 68.7500 (73.0743) lr 1.2369e-04 eta 1:26:34 +epoch [44/50] batch [190/500] time 1.553 (1.566) data 0.001 (0.005) loss 1.1348 (1.0691) acc 78.1250 (73.0099) lr 1.2369e-04 eta 1:26:24 +epoch [44/50] batch [195/500] time 1.569 (1.566) data 0.001 (0.005) loss 0.9136 (1.0696) acc 71.8750 (72.9808) lr 1.2369e-04 eta 1:26:16 +epoch [44/50] batch [200/500] time 1.550 (1.566) data 0.000 (0.005) loss 1.4297 (1.0718) acc 75.0000 (72.9844) lr 1.2369e-04 eta 1:26:09 +epoch [44/50] batch [205/500] time 1.573 (1.566) data 0.001 (0.004) loss 0.4404 (1.0711) acc 87.5000 (73.0488) lr 1.2369e-04 eta 1:26:00 +epoch [44/50] batch [210/500] time 1.554 (1.566) data 0.000 (0.004) loss 1.1475 (1.0724) acc 71.8750 (72.9762) lr 1.2369e-04 eta 1:25:52 +epoch [44/50] batch [215/500] time 1.557 (1.566) data 0.000 (0.004) loss 0.4087 (1.0671) acc 87.5000 (73.0523) lr 1.2369e-04 eta 1:25:44 +epoch [44/50] batch [220/500] time 1.555 (1.566) data 0.000 (0.004) loss 1.1338 (1.0689) acc 71.8750 (73.0114) lr 1.2369e-04 eta 1:25:35 +epoch [44/50] batch [225/500] time 1.550 (1.566) data 0.000 (0.004) loss 0.7480 (1.0703) acc 87.5000 (73.0000) lr 1.2369e-04 eta 1:25:27 +epoch [44/50] batch [230/500] time 1.587 (1.566) data 0.000 (0.004) loss 1.1680 (1.0714) acc 71.8750 (72.9620) lr 1.2369e-04 eta 1:25:19 +epoch [44/50] batch [235/500] time 1.564 (1.565) data 0.000 (0.004) loss 1.0283 (1.0724) acc 78.1250 (72.9654) lr 1.2369e-04 eta 1:25:11 +epoch [44/50] batch [240/500] time 1.551 (1.565) data 0.000 (0.004) loss 0.7354 (1.0703) acc 71.8750 (72.9948) lr 1.2369e-04 eta 1:25:03 +epoch [44/50] batch [245/500] time 1.527 (1.565) data 0.000 (0.004) loss 1.2441 (1.0648) acc 75.0000 (73.0740) lr 1.2369e-04 eta 1:24:54 +epoch [44/50] batch [250/500] time 1.568 (1.565) data 0.000 (0.004) loss 1.9082 (1.0688) acc 56.2500 (72.9000) lr 1.2369e-04 eta 1:24:46 +epoch [44/50] batch [255/500] time 1.559 (1.565) data 0.000 (0.004) loss 1.4023 (1.0668) acc 62.5000 (72.9289) lr 1.2369e-04 eta 1:24:37 +epoch [44/50] batch [260/500] time 1.571 (1.565) data 0.001 (0.004) loss 1.3262 (1.0655) acc 71.8750 (72.9567) lr 1.2369e-04 eta 1:24:31 +epoch [44/50] batch [265/500] time 1.537 (1.565) data 0.000 (0.004) loss 1.0576 (1.0654) acc 71.8750 (73.0071) lr 1.2369e-04 eta 1:24:22 +epoch [44/50] batch [270/500] time 1.548 (1.565) data 0.000 (0.004) loss 0.6523 (1.0635) acc 75.0000 (73.0093) lr 1.2369e-04 eta 1:24:13 +epoch [44/50] batch [275/500] time 1.561 (1.564) data 0.000 (0.003) loss 1.5332 (1.0676) acc 65.6250 (72.9773) lr 1.2369e-04 eta 1:24:05 +epoch [44/50] batch [280/500] time 1.557 (1.564) data 0.000 (0.003) loss 0.9429 (1.0729) acc 71.8750 (72.8125) lr 1.2369e-04 eta 1:23:57 +epoch [44/50] batch [285/500] time 1.553 (1.564) data 0.000 (0.003) loss 0.7998 (1.0732) acc 75.0000 (72.8180) lr 1.2369e-04 eta 1:23:49 +epoch [44/50] batch [290/500] time 1.556 (1.564) data 0.000 (0.003) loss 0.9912 (1.0745) acc 68.7500 (72.7802) lr 1.2369e-04 eta 1:23:41 +epoch [44/50] batch [295/500] time 1.572 (1.564) data 0.001 (0.003) loss 1.0264 (1.0705) acc 75.0000 (72.8708) lr 1.2369e-04 eta 1:23:33 +epoch [44/50] batch [300/500] time 1.559 (1.564) data 0.001 (0.003) loss 0.8931 (1.0690) acc 71.8750 (72.8229) lr 1.2369e-04 eta 1:23:25 +epoch [44/50] batch [305/500] time 1.530 (1.564) data 0.000 (0.003) loss 0.8530 (1.0694) acc 65.6250 (72.8381) lr 1.2369e-04 eta 1:23:17 +epoch [44/50] batch [310/500] time 1.588 (1.564) data 0.000 (0.003) loss 1.4004 (1.0698) acc 65.6250 (72.8226) lr 1.2369e-04 eta 1:23:09 +epoch [44/50] batch [315/500] time 1.566 (1.564) data 0.000 (0.003) loss 0.4517 (1.0684) acc 93.7500 (72.8671) lr 1.2369e-04 eta 1:23:01 +epoch [44/50] batch [320/500] time 1.555 (1.564) data 0.000 (0.003) loss 0.9155 (1.0649) acc 75.0000 (72.9883) lr 1.2369e-04 eta 1:22:53 +epoch [44/50] batch [325/500] time 1.549 (1.564) data 0.000 (0.003) loss 0.6670 (1.0602) acc 81.2500 (73.0962) lr 1.2369e-04 eta 1:22:45 +epoch [44/50] batch [330/500] time 1.562 (1.564) data 0.001 (0.003) loss 0.4294 (1.0558) acc 84.3750 (73.2008) lr 1.2369e-04 eta 1:22:37 +epoch [44/50] batch [335/500] time 1.562 (1.564) data 0.000 (0.003) loss 1.0137 (1.0538) acc 75.0000 (73.2183) lr 1.2369e-04 eta 1:22:29 +epoch [44/50] batch [340/500] time 1.562 (1.564) data 0.000 (0.003) loss 1.0488 (1.0532) acc 78.1250 (73.2445) lr 1.2369e-04 eta 1:22:21 +epoch [44/50] batch [345/500] time 1.548 (1.564) data 0.001 (0.003) loss 0.9629 (1.0534) acc 65.6250 (73.2246) lr 1.2369e-04 eta 1:22:13 +epoch [44/50] batch [350/500] time 1.561 (1.564) data 0.000 (0.003) loss 1.2217 (1.0552) acc 56.2500 (73.1696) lr 1.2369e-04 eta 1:22:05 +epoch [44/50] batch [355/500] time 1.568 (1.564) data 0.000 (0.003) loss 0.8789 (1.0544) acc 75.0000 (73.1602) lr 1.2369e-04 eta 1:21:58 +epoch [44/50] batch [360/500] time 1.568 (1.564) data 0.000 (0.003) loss 0.9707 (1.0566) acc 75.0000 (73.1163) lr 1.2369e-04 eta 1:21:49 +epoch [44/50] batch [365/500] time 1.553 (1.564) data 0.000 (0.003) loss 0.8286 (1.0564) acc 78.1250 (73.0908) lr 1.2369e-04 eta 1:21:42 +epoch [44/50] batch [370/500] time 1.574 (1.564) data 0.000 (0.003) loss 1.1143 (1.0585) acc 78.1250 (73.0659) lr 1.2369e-04 eta 1:21:33 +epoch [44/50] batch [375/500] time 1.565 (1.564) data 0.000 (0.003) loss 0.8164 (1.0572) acc 71.8750 (73.1000) lr 1.2369e-04 eta 1:21:26 +epoch [44/50] batch [380/500] time 1.526 (1.564) data 0.000 (0.003) loss 0.9736 (1.0565) acc 81.2500 (73.1003) lr 1.2369e-04 eta 1:21:18 +epoch [44/50] batch [385/500] time 1.556 (1.564) data 0.000 (0.003) loss 0.8662 (1.0531) acc 78.1250 (73.1818) lr 1.2369e-04 eta 1:21:10 +epoch [44/50] batch [390/500] time 1.541 (1.563) data 0.000 (0.003) loss 0.7534 (1.0501) acc 84.3750 (73.2452) lr 1.2369e-04 eta 1:21:02 +epoch [44/50] batch [395/500] time 1.569 (1.563) data 0.000 (0.003) loss 1.0586 (1.0508) acc 68.7500 (73.2358) lr 1.2369e-04 eta 1:20:54 +epoch [44/50] batch [400/500] time 1.577 (1.563) data 0.000 (0.003) loss 0.6006 (1.0493) acc 87.5000 (73.2969) lr 1.2369e-04 eta 1:20:46 +epoch [44/50] batch [405/500] time 1.551 (1.563) data 0.000 (0.002) loss 0.9414 (1.0490) acc 78.1250 (73.3333) lr 1.2369e-04 eta 1:20:38 +epoch [44/50] batch [410/500] time 1.552 (1.563) data 0.001 (0.002) loss 0.9292 (1.0470) acc 87.5000 (73.4070) lr 1.2369e-04 eta 1:20:30 +epoch [44/50] batch [415/500] time 1.582 (1.563) data 0.000 (0.002) loss 1.3955 (1.0472) acc 62.5000 (73.4036) lr 1.2369e-04 eta 1:20:22 +epoch [44/50] batch [420/500] time 1.564 (1.563) data 0.000 (0.002) loss 0.9463 (1.0457) acc 75.0000 (73.4301) lr 1.2369e-04 eta 1:20:15 +epoch [44/50] batch [425/500] time 1.568 (1.563) data 0.001 (0.002) loss 0.6797 (1.0455) acc 78.1250 (73.4265) lr 1.2369e-04 eta 1:20:07 +epoch [44/50] batch [430/500] time 1.553 (1.563) data 0.000 (0.002) loss 1.1670 (1.0457) acc 65.6250 (73.4302) lr 1.2369e-04 eta 1:19:59 +epoch [44/50] batch [435/500] time 1.548 (1.563) data 0.000 (0.002) loss 0.7100 (1.0447) acc 78.1250 (73.4195) lr 1.2369e-04 eta 1:19:51 +epoch [44/50] batch [440/500] time 1.534 (1.563) data 0.000 (0.002) loss 1.6191 (1.0455) acc 65.6250 (73.4446) lr 1.2369e-04 eta 1:19:42 +epoch [44/50] batch [445/500] time 1.646 (1.563) data 0.000 (0.002) loss 1.2100 (1.0451) acc 71.8750 (73.4621) lr 1.2369e-04 eta 1:19:35 +epoch [44/50] batch [450/500] time 1.542 (1.563) data 0.000 (0.002) loss 0.9019 (1.0455) acc 78.1250 (73.4583) lr 1.2369e-04 eta 1:19:26 +epoch [44/50] batch [455/500] time 1.550 (1.563) data 0.000 (0.002) loss 0.7070 (1.0450) acc 81.2500 (73.4753) lr 1.2369e-04 eta 1:19:18 +epoch [44/50] batch [460/500] time 1.560 (1.563) data 0.000 (0.002) loss 1.2363 (1.0428) acc 68.7500 (73.4986) lr 1.2369e-04 eta 1:19:10 +epoch [44/50] batch [465/500] time 1.574 (1.563) data 0.000 (0.002) loss 0.9150 (1.0409) acc 71.8750 (73.5349) lr 1.2369e-04 eta 1:19:02 +epoch [44/50] batch [470/500] time 1.544 (1.563) data 0.000 (0.002) loss 2.2402 (1.0424) acc 56.2500 (73.5372) lr 1.2369e-04 eta 1:18:54 +epoch [44/50] batch [475/500] time 1.562 (1.563) data 0.000 (0.002) loss 0.8018 (1.0426) acc 78.1250 (73.5197) lr 1.2369e-04 eta 1:18:46 +epoch [44/50] batch [480/500] time 1.556 (1.563) data 0.000 (0.002) loss 0.4802 (1.0414) acc 87.5000 (73.5612) lr 1.2369e-04 eta 1:18:38 +epoch [44/50] batch [485/500] time 1.552 (1.563) data 0.001 (0.002) loss 0.8110 (1.0408) acc 78.1250 (73.5696) lr 1.2369e-04 eta 1:18:31 +epoch [44/50] batch [490/500] time 1.556 (1.563) data 0.000 (0.002) loss 1.3740 (1.0429) acc 68.7500 (73.5140) lr 1.2369e-04 eta 1:18:23 +epoch [44/50] batch [495/500] time 1.536 (1.562) data 0.000 (0.002) loss 0.7944 (1.0419) acc 78.1250 (73.5354) lr 1.2369e-04 eta 1:18:15 +epoch [44/50] batch [500/500] time 1.544 (1.562) data 0.000 (0.002) loss 0.7769 (1.0402) acc 81.2500 (73.5750) lr 9.5173e-05 eta 1:18:06 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,021 +* accuracy: 78.0% +* error: 22.0% +* macro_f1: 77.6% +epoch [45/50] batch [5/500] time 1.573 (1.725) data 0.000 (0.195) loss 1.1318 (1.0828) acc 75.0000 (75.6250) lr 9.5173e-05 eta 1:26:06 +epoch [45/50] batch [10/500] time 1.554 (1.639) data 0.001 (0.098) loss 1.0059 (1.0111) acc 75.0000 (75.9375) lr 9.5173e-05 eta 1:21:39 +epoch [45/50] batch [15/500] time 1.567 (1.613) data 0.001 (0.065) loss 0.9712 (0.9877) acc 71.8750 (74.3750) lr 9.5173e-05 eta 1:20:16 +epoch [45/50] batch [20/500] time 1.565 (1.600) data 0.001 (0.049) loss 1.4951 (1.0625) acc 62.5000 (72.8125) lr 9.5173e-05 eta 1:19:29 +epoch [45/50] batch [25/500] time 1.574 (1.593) data 0.000 (0.039) loss 1.0293 (1.0461) acc 68.7500 (73.6250) lr 9.5173e-05 eta 1:18:59 +epoch [45/50] batch [30/500] time 1.576 (1.588) data 0.000 (0.033) loss 1.3330 (1.0647) acc 62.5000 (72.9167) lr 9.5173e-05 eta 1:18:35 +epoch [45/50] batch [35/500] time 1.577 (1.586) data 0.000 (0.028) loss 0.8804 (1.0584) acc 78.1250 (73.3036) lr 9.5173e-05 eta 1:18:22 +epoch [45/50] batch [40/500] time 1.559 (1.583) data 0.000 (0.025) loss 0.5581 (1.0216) acc 81.2500 (74.4531) lr 9.5173e-05 eta 1:18:05 +epoch [45/50] batch [45/500] time 1.538 (1.581) data 0.000 (0.022) loss 1.4463 (1.0339) acc 68.7500 (74.0278) lr 9.5173e-05 eta 1:17:51 +epoch [45/50] batch [50/500] time 1.560 (1.578) data 0.001 (0.020) loss 0.4873 (1.0188) acc 87.5000 (74.5000) lr 9.5173e-05 eta 1:17:35 +epoch [45/50] batch [55/500] time 1.535 (1.575) data 0.000 (0.018) loss 1.3330 (1.0214) acc 71.8750 (74.4886) lr 9.5173e-05 eta 1:17:19 +epoch [45/50] batch [60/500] time 1.599 (1.575) data 0.001 (0.017) loss 0.8525 (1.0247) acc 84.3750 (74.7396) lr 9.5173e-05 eta 1:17:09 +epoch [45/50] batch [65/500] time 1.537 (1.573) data 0.000 (0.015) loss 0.9634 (1.0236) acc 75.0000 (74.8077) lr 9.5173e-05 eta 1:16:57 +epoch [45/50] batch [70/500] time 1.598 (1.573) data 0.000 (0.014) loss 1.2227 (1.0426) acc 68.7500 (74.4196) lr 9.5173e-05 eta 1:16:49 +epoch [45/50] batch [75/500] time 1.560 (1.573) data 0.001 (0.013) loss 1.0059 (1.0431) acc 65.6250 (74.2500) lr 9.5173e-05 eta 1:16:40 +epoch [45/50] batch [80/500] time 1.570 (1.573) data 0.000 (0.013) loss 0.7627 (1.0333) acc 87.5000 (74.6094) lr 9.5173e-05 eta 1:16:32 +epoch [45/50] batch [85/500] time 1.553 (1.572) data 0.000 (0.012) loss 0.5371 (1.0304) acc 75.0000 (74.4853) lr 9.5173e-05 eta 1:16:21 +epoch [45/50] batch [90/500] time 1.587 (1.572) data 0.000 (0.011) loss 0.9863 (1.0231) acc 84.3750 (74.6528) lr 9.5173e-05 eta 1:16:13 +epoch [45/50] batch [95/500] time 1.553 (1.571) data 0.000 (0.011) loss 0.4387 (1.0111) acc 90.6250 (74.9342) lr 9.5173e-05 eta 1:16:03 +epoch [45/50] batch [100/500] time 1.553 (1.571) data 0.001 (0.010) loss 0.6675 (1.0016) acc 81.2500 (75.0312) lr 9.5173e-05 eta 1:15:54 +epoch [45/50] batch [105/500] time 1.540 (1.570) data 0.000 (0.010) loss 1.1133 (1.0028) acc 75.0000 (74.9107) lr 9.5173e-05 eta 1:15:44 +epoch [45/50] batch [110/500] time 1.553 (1.569) data 0.000 (0.009) loss 0.6460 (1.0020) acc 87.5000 (74.9432) lr 9.5173e-05 eta 1:15:35 +epoch [45/50] batch [115/500] time 1.578 (1.569) data 0.000 (0.009) loss 1.1299 (1.0051) acc 65.6250 (74.7826) lr 9.5173e-05 eta 1:15:25 +epoch [45/50] batch [120/500] time 1.558 (1.568) data 0.000 (0.009) loss 1.2480 (1.0041) acc 65.6250 (74.7656) lr 9.5173e-05 eta 1:15:17 +epoch [45/50] batch [125/500] time 1.548 (1.568) data 0.000 (0.008) loss 0.7393 (0.9920) acc 65.6250 (74.9750) lr 9.5173e-05 eta 1:15:06 +epoch [45/50] batch [130/500] time 1.542 (1.567) data 0.000 (0.008) loss 1.1514 (0.9889) acc 75.0000 (75.2163) lr 9.5173e-05 eta 1:14:57 +epoch [45/50] batch [135/500] time 1.550 (1.567) data 0.001 (0.008) loss 0.9365 (0.9854) acc 78.1250 (75.2083) lr 9.5173e-05 eta 1:14:48 +epoch [45/50] batch [140/500] time 1.568 (1.566) data 0.000 (0.007) loss 0.7456 (0.9857) acc 75.0000 (75.1786) lr 9.5173e-05 eta 1:14:39 +epoch [45/50] batch [145/500] time 1.554 (1.567) data 0.000 (0.007) loss 1.1465 (0.9889) acc 68.7500 (75.0647) lr 9.5173e-05 eta 1:14:32 +epoch [45/50] batch [150/500] time 1.566 (1.566) data 0.000 (0.007) loss 1.0430 (0.9940) acc 68.7500 (74.9583) lr 9.5173e-05 eta 1:14:23 +epoch [45/50] batch [155/500] time 1.569 (1.566) data 0.000 (0.007) loss 0.9438 (0.9906) acc 65.6250 (74.9395) lr 9.5173e-05 eta 1:14:16 +epoch [45/50] batch [160/500] time 1.555 (1.566) data 0.000 (0.007) loss 1.0801 (0.9879) acc 68.7500 (75.0000) lr 9.5173e-05 eta 1:14:07 +epoch [45/50] batch [165/500] time 1.572 (1.566) data 0.000 (0.006) loss 1.3857 (0.9938) acc 75.0000 (75.0189) lr 9.5173e-05 eta 1:14:00 +epoch [45/50] batch [170/500] time 1.545 (1.566) data 0.000 (0.006) loss 1.1328 (0.9956) acc 78.1250 (74.8162) lr 9.5173e-05 eta 1:13:52 +epoch [45/50] batch [175/500] time 1.553 (1.566) data 0.000 (0.006) loss 0.9946 (0.9954) acc 71.8750 (74.7679) lr 9.5173e-05 eta 1:13:43 +epoch [45/50] batch [180/500] time 1.540 (1.566) data 0.000 (0.006) loss 0.9292 (0.9976) acc 81.2500 (74.7396) lr 9.5173e-05 eta 1:13:35 +epoch [45/50] batch [185/500] time 1.579 (1.566) data 0.001 (0.006) loss 0.9819 (0.9978) acc 75.0000 (74.7804) lr 9.5173e-05 eta 1:13:26 +epoch [45/50] batch [190/500] time 1.546 (1.566) data 0.000 (0.006) loss 0.8618 (0.9970) acc 75.0000 (74.7697) lr 9.5173e-05 eta 1:13:19 +epoch [45/50] batch [195/500] time 1.565 (1.566) data 0.000 (0.005) loss 0.7412 (0.9964) acc 84.3750 (74.8397) lr 9.5173e-05 eta 1:13:11 +epoch [45/50] batch [200/500] time 1.552 (1.566) data 0.000 (0.005) loss 1.0156 (0.9951) acc 71.8750 (74.8750) lr 9.5173e-05 eta 1:13:03 +epoch [45/50] batch [205/500] time 1.556 (1.566) data 0.000 (0.005) loss 0.7681 (0.9935) acc 78.1250 (74.8476) lr 9.5173e-05 eta 1:12:55 +epoch [45/50] batch [210/500] time 1.553 (1.565) data 0.000 (0.005) loss 0.7998 (0.9920) acc 84.3750 (74.9256) lr 9.5173e-05 eta 1:12:47 +epoch [45/50] batch [215/500] time 1.552 (1.565) data 0.000 (0.005) loss 0.9639 (0.9909) acc 78.1250 (75.0000) lr 9.5173e-05 eta 1:12:39 +epoch [45/50] batch [220/500] time 1.565 (1.566) data 0.000 (0.005) loss 1.1416 (0.9943) acc 78.1250 (74.8864) lr 9.5173e-05 eta 1:12:32 +epoch [45/50] batch [225/500] time 1.553 (1.565) data 0.001 (0.005) loss 1.3975 (0.9980) acc 59.3750 (74.8194) lr 9.5173e-05 eta 1:12:23 +epoch [45/50] batch [230/500] time 1.563 (1.565) data 0.000 (0.005) loss 1.1484 (0.9973) acc 68.7500 (74.8505) lr 9.5173e-05 eta 1:12:16 +epoch [45/50] batch [235/500] time 1.561 (1.565) data 0.000 (0.005) loss 0.6523 (0.9984) acc 81.2500 (74.7872) lr 9.5173e-05 eta 1:12:08 +epoch [45/50] batch [240/500] time 1.527 (1.565) data 0.000 (0.004) loss 1.2246 (0.9989) acc 71.8750 (74.7917) lr 9.5173e-05 eta 1:11:59 +epoch [45/50] batch [245/500] time 1.561 (1.565) data 0.000 (0.004) loss 0.7837 (0.9955) acc 84.3750 (74.8852) lr 9.5173e-05 eta 1:11:50 +epoch [45/50] batch [250/500] time 1.559 (1.564) data 0.000 (0.004) loss 0.4531 (0.9920) acc 84.3750 (74.9625) lr 9.5173e-05 eta 1:11:42 +epoch [45/50] batch [255/500] time 1.553 (1.564) data 0.000 (0.004) loss 0.9888 (0.9931) acc 71.8750 (74.9755) lr 9.5173e-05 eta 1:11:33 +epoch [45/50] batch [260/500] time 1.558 (1.564) data 0.000 (0.004) loss 0.7910 (0.9932) acc 81.2500 (75.0000) lr 9.5173e-05 eta 1:11:25 +epoch [45/50] batch [265/500] time 1.564 (1.564) data 0.000 (0.004) loss 0.8813 (0.9948) acc 75.0000 (75.0236) lr 9.5173e-05 eta 1:11:17 +epoch [45/50] batch [270/500] time 1.527 (1.564) data 0.000 (0.004) loss 0.7725 (0.9937) acc 75.0000 (75.0231) lr 9.5173e-05 eta 1:11:08 +epoch [45/50] batch [275/500] time 1.550 (1.563) data 0.000 (0.004) loss 1.2041 (0.9923) acc 75.0000 (75.0568) lr 9.5173e-05 eta 1:11:00 +epoch [45/50] batch [280/500] time 1.566 (1.564) data 0.000 (0.004) loss 0.5557 (0.9923) acc 84.3750 (75.0223) lr 9.5173e-05 eta 1:10:52 +epoch [45/50] batch [285/500] time 1.700 (1.564) data 0.000 (0.004) loss 1.1484 (0.9931) acc 68.7500 (74.9781) lr 9.5173e-05 eta 1:10:46 +epoch [45/50] batch [290/500] time 1.583 (1.564) data 0.000 (0.004) loss 1.1396 (0.9966) acc 65.6250 (74.8491) lr 9.5173e-05 eta 1:10:39 +epoch [45/50] batch [295/500] time 1.590 (1.564) data 0.001 (0.004) loss 0.7207 (0.9965) acc 78.1250 (74.8623) lr 9.5173e-05 eta 1:10:31 +epoch [45/50] batch [300/500] time 1.565 (1.564) data 0.000 (0.004) loss 1.3086 (0.9958) acc 68.7500 (74.8542) lr 9.5173e-05 eta 1:10:24 +epoch [45/50] batch [305/500] time 1.568 (1.565) data 0.000 (0.004) loss 0.9478 (0.9931) acc 68.7500 (74.8361) lr 9.5173e-05 eta 1:10:17 +epoch [45/50] batch [310/500] time 1.569 (1.565) data 0.001 (0.004) loss 0.9619 (0.9953) acc 78.1250 (74.8690) lr 9.5173e-05 eta 1:10:09 +epoch [45/50] batch [315/500] time 1.550 (1.565) data 0.000 (0.004) loss 1.6113 (0.9981) acc 68.7500 (74.8512) lr 9.5173e-05 eta 1:10:02 +epoch [45/50] batch [320/500] time 1.558 (1.565) data 0.000 (0.003) loss 0.9419 (0.9971) acc 75.0000 (74.8438) lr 9.5173e-05 eta 1:09:54 +epoch [45/50] batch [325/500] time 1.571 (1.565) data 0.000 (0.003) loss 1.1338 (0.9974) acc 81.2500 (74.8173) lr 9.5173e-05 eta 1:09:46 +epoch [45/50] batch [330/500] time 1.568 (1.565) data 0.000 (0.003) loss 0.6597 (0.9965) acc 84.3750 (74.8485) lr 9.5173e-05 eta 1:09:39 +epoch [45/50] batch [335/500] time 1.551 (1.565) data 0.000 (0.003) loss 1.3066 (0.9974) acc 65.6250 (74.8694) lr 9.5173e-05 eta 1:09:31 +epoch [45/50] batch [340/500] time 1.584 (1.565) data 0.000 (0.003) loss 1.3535 (0.9983) acc 75.0000 (74.8989) lr 9.5173e-05 eta 1:09:23 +epoch [45/50] batch [345/500] time 1.543 (1.565) data 0.000 (0.003) loss 1.3447 (1.0042) acc 62.5000 (74.7917) lr 9.5173e-05 eta 1:09:15 +epoch [45/50] batch [350/500] time 1.576 (1.565) data 0.000 (0.003) loss 0.7725 (1.0063) acc 81.2500 (74.7857) lr 9.5173e-05 eta 1:09:07 +epoch [45/50] batch [355/500] time 1.580 (1.565) data 0.000 (0.003) loss 1.1045 (1.0085) acc 68.7500 (74.7095) lr 9.5173e-05 eta 1:09:00 +epoch [45/50] batch [360/500] time 1.577 (1.565) data 0.001 (0.003) loss 0.8955 (1.0094) acc 75.0000 (74.7049) lr 9.5173e-05 eta 1:08:52 +epoch [45/50] batch [365/500] time 1.544 (1.565) data 0.000 (0.003) loss 1.0508 (1.0088) acc 78.1250 (74.6661) lr 9.5173e-05 eta 1:08:44 +epoch [45/50] batch [370/500] time 1.528 (1.565) data 0.000 (0.003) loss 1.1836 (1.0104) acc 65.6250 (74.6284) lr 9.5173e-05 eta 1:08:36 +epoch [45/50] batch [375/500] time 1.553 (1.565) data 0.000 (0.003) loss 0.5913 (1.0071) acc 84.3750 (74.7083) lr 9.5173e-05 eta 1:08:27 +epoch [45/50] batch [380/500] time 1.537 (1.565) data 0.000 (0.003) loss 1.1182 (1.0091) acc 68.7500 (74.6382) lr 9.5173e-05 eta 1:08:19 +epoch [45/50] batch [385/500] time 1.568 (1.565) data 0.000 (0.003) loss 0.8037 (1.0107) acc 78.1250 (74.6510) lr 9.5173e-05 eta 1:08:11 +epoch [45/50] batch [390/500] time 1.581 (1.564) data 0.000 (0.003) loss 1.1611 (1.0119) acc 62.5000 (74.5833) lr 9.5173e-05 eta 1:08:03 +epoch [45/50] batch [395/500] time 1.566 (1.565) data 0.000 (0.003) loss 1.2256 (1.0132) acc 75.0000 (74.5728) lr 9.5173e-05 eta 1:07:55 +epoch [45/50] batch [400/500] time 1.569 (1.565) data 0.000 (0.003) loss 1.5664 (1.0137) acc 56.2500 (74.5391) lr 9.5173e-05 eta 1:07:48 +epoch [45/50] batch [405/500] time 1.573 (1.565) data 0.000 (0.003) loss 0.9370 (1.0135) acc 78.1250 (74.5525) lr 9.5173e-05 eta 1:07:40 +epoch [45/50] batch [410/500] time 1.579 (1.565) data 0.001 (0.003) loss 0.6333 (1.0143) acc 81.2500 (74.5503) lr 9.5173e-05 eta 1:07:32 +epoch [45/50] batch [415/500] time 1.561 (1.565) data 0.000 (0.003) loss 0.5518 (1.0118) acc 87.5000 (74.5482) lr 9.5173e-05 eta 1:07:25 +epoch [45/50] batch [420/500] time 1.572 (1.565) data 0.000 (0.003) loss 2.0508 (1.0142) acc 59.3750 (74.5164) lr 9.5173e-05 eta 1:07:17 +epoch [45/50] batch [425/500] time 1.535 (1.565) data 0.000 (0.003) loss 0.8545 (1.0155) acc 78.1250 (74.5147) lr 9.5173e-05 eta 1:07:09 +epoch [45/50] batch [430/500] time 1.539 (1.565) data 0.000 (0.003) loss 1.0361 (1.0152) acc 56.2500 (74.4404) lr 9.5173e-05 eta 1:07:01 +epoch [45/50] batch [435/500] time 1.551 (1.565) data 0.000 (0.003) loss 0.9995 (1.0160) acc 68.7500 (74.4181) lr 9.5173e-05 eta 1:06:53 +epoch [45/50] batch [440/500] time 1.562 (1.565) data 0.000 (0.003) loss 0.7173 (1.0171) acc 75.0000 (74.3892) lr 9.5173e-05 eta 1:06:45 +epoch [45/50] batch [445/500] time 1.530 (1.565) data 0.000 (0.003) loss 0.6011 (1.0148) acc 90.6250 (74.5084) lr 9.5173e-05 eta 1:06:37 +epoch [45/50] batch [450/500] time 1.555 (1.565) data 0.000 (0.003) loss 0.4243 (1.0123) acc 84.3750 (74.5486) lr 9.5173e-05 eta 1:06:29 +epoch [45/50] batch [455/500] time 1.546 (1.564) data 0.000 (0.003) loss 0.9375 (1.0155) acc 78.1250 (74.4986) lr 9.5173e-05 eta 1:06:21 +epoch [45/50] batch [460/500] time 1.570 (1.564) data 0.001 (0.003) loss 0.9351 (1.0162) acc 78.1250 (74.5177) lr 9.5173e-05 eta 1:06:13 +epoch [45/50] batch [465/500] time 1.575 (1.564) data 0.000 (0.003) loss 0.5449 (1.0145) acc 84.3750 (74.5430) lr 9.5173e-05 eta 1:06:05 +epoch [45/50] batch [470/500] time 1.557 (1.564) data 0.000 (0.002) loss 1.3135 (1.0149) acc 75.0000 (74.5878) lr 9.5173e-05 eta 1:05:57 +epoch [45/50] batch [475/500] time 1.546 (1.565) data 0.001 (0.002) loss 1.3330 (1.0159) acc 62.5000 (74.5395) lr 9.5173e-05 eta 1:05:50 +epoch [45/50] batch [480/500] time 1.549 (1.565) data 0.001 (0.002) loss 0.8979 (1.0169) acc 71.8750 (74.5182) lr 9.5173e-05 eta 1:05:42 +epoch [45/50] batch [485/500] time 1.579 (1.565) data 0.001 (0.002) loss 1.4746 (1.0180) acc 59.3750 (74.4845) lr 9.5173e-05 eta 1:05:34 +epoch [45/50] batch [490/500] time 1.562 (1.564) data 0.000 (0.002) loss 0.7666 (1.0160) acc 81.2500 (74.4898) lr 9.5173e-05 eta 1:05:26 +epoch [45/50] batch [495/500] time 1.558 (1.564) data 0.000 (0.002) loss 1.1797 (1.0185) acc 68.7500 (74.4318) lr 9.5173e-05 eta 1:05:18 +epoch [45/50] batch [500/500] time 1.550 (1.564) data 0.000 (0.002) loss 1.3389 (1.0190) acc 65.6250 (74.4313) lr 7.0224e-05 eta 1:05:10 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,016 +* accuracy: 78.0% +* error: 22.0% +* macro_f1: 77.6% +epoch [46/50] batch [5/500] time 1.539 (1.694) data 0.000 (0.195) loss 1.0010 (1.0370) acc 75.0000 (75.0000) lr 7.0224e-05 eta 1:10:25 +epoch [46/50] batch [10/500] time 1.572 (1.628) data 0.000 (0.098) loss 0.8457 (1.1188) acc 81.2500 (73.7500) lr 7.0224e-05 eta 1:07:33 +epoch [46/50] batch [15/500] time 1.556 (1.606) data 0.001 (0.065) loss 0.9360 (1.0685) acc 75.0000 (73.9583) lr 7.0224e-05 eta 1:06:31 +epoch [46/50] batch [20/500] time 1.558 (1.592) data 0.000 (0.049) loss 0.6157 (1.0729) acc 87.5000 (74.6875) lr 7.0224e-05 eta 1:05:47 +epoch [46/50] batch [25/500] time 1.556 (1.584) data 0.000 (0.039) loss 1.1035 (1.0613) acc 78.1250 (74.8750) lr 7.0224e-05 eta 1:05:19 +epoch [46/50] batch [30/500] time 1.546 (1.581) data 0.000 (0.033) loss 1.1543 (1.0311) acc 68.7500 (75.1042) lr 7.0224e-05 eta 1:05:04 +epoch [46/50] batch [35/500] time 1.540 (1.577) data 0.000 (0.028) loss 1.0752 (1.0381) acc 75.0000 (75.0000) lr 7.0224e-05 eta 1:04:47 +epoch [46/50] batch [40/500] time 1.569 (1.576) data 0.000 (0.025) loss 1.2197 (1.0262) acc 71.8750 (75.1562) lr 7.0224e-05 eta 1:04:36 +epoch [46/50] batch [45/500] time 1.556 (1.575) data 0.000 (0.022) loss 1.0938 (1.0482) acc 78.1250 (74.1667) lr 7.0224e-05 eta 1:04:25 +epoch [46/50] batch [50/500] time 1.564 (1.573) data 0.000 (0.020) loss 0.9077 (1.0425) acc 71.8750 (74.5000) lr 7.0224e-05 eta 1:04:15 +epoch [46/50] batch [55/500] time 1.555 (1.571) data 0.000 (0.018) loss 1.3428 (1.0604) acc 71.8750 (74.2045) lr 7.0224e-05 eta 1:04:01 +epoch [46/50] batch [60/500] time 1.563 (1.571) data 0.000 (0.017) loss 0.8960 (1.0456) acc 68.7500 (74.4792) lr 7.0224e-05 eta 1:03:54 +epoch [46/50] batch [65/500] time 1.566 (1.573) data 0.000 (0.015) loss 0.6289 (1.0320) acc 84.3750 (74.5192) lr 7.0224e-05 eta 1:03:49 +epoch [46/50] batch [70/500] time 1.556 (1.572) data 0.000 (0.014) loss 1.0254 (1.0270) acc 84.3750 (74.5982) lr 7.0224e-05 eta 1:03:39 +epoch [46/50] batch [75/500] time 1.547 (1.570) data 0.000 (0.013) loss 1.1270 (1.0203) acc 71.8750 (74.5833) lr 7.0224e-05 eta 1:03:28 +epoch [46/50] batch [80/500] time 1.549 (1.571) data 0.000 (0.013) loss 0.8521 (1.0103) acc 75.0000 (74.9609) lr 7.0224e-05 eta 1:03:21 +epoch [46/50] batch [85/500] time 1.544 (1.569) data 0.001 (0.012) loss 0.9214 (1.0223) acc 71.8750 (74.7426) lr 7.0224e-05 eta 1:03:09 +epoch [46/50] batch [90/500] time 1.558 (1.569) data 0.000 (0.011) loss 1.0518 (1.0262) acc 75.0000 (74.4792) lr 7.0224e-05 eta 1:03:00 +epoch [46/50] batch [95/500] time 1.561 (1.568) data 0.000 (0.011) loss 1.1494 (1.0256) acc 71.8750 (74.4408) lr 7.0224e-05 eta 1:02:50 +epoch [46/50] batch [100/500] time 1.556 (1.567) data 0.000 (0.010) loss 0.8696 (1.0228) acc 84.3750 (74.3750) lr 7.0224e-05 eta 1:02:40 +epoch [46/50] batch [105/500] time 1.547 (1.566) data 0.000 (0.010) loss 0.7026 (1.0318) acc 84.3750 (74.4048) lr 7.0224e-05 eta 1:02:31 +epoch [46/50] batch [110/500] time 1.542 (1.566) data 0.000 (0.009) loss 0.7793 (1.0241) acc 84.3750 (74.6307) lr 7.0224e-05 eta 1:02:23 +epoch [46/50] batch [115/500] time 1.530 (1.565) data 0.000 (0.009) loss 1.0879 (1.0290) acc 78.1250 (74.6467) lr 7.0224e-05 eta 1:02:13 +epoch [46/50] batch [120/500] time 1.551 (1.565) data 0.000 (0.009) loss 1.2178 (1.0281) acc 62.5000 (74.5833) lr 7.0224e-05 eta 1:02:03 +epoch [46/50] batch [125/500] time 1.546 (1.564) data 0.000 (0.008) loss 1.0762 (1.0207) acc 81.2500 (74.8000) lr 7.0224e-05 eta 1:01:55 +epoch [46/50] batch [130/500] time 1.562 (1.564) data 0.000 (0.008) loss 0.4348 (1.0174) acc 87.5000 (74.8317) lr 7.0224e-05 eta 1:01:46 +epoch [46/50] batch [135/500] time 1.558 (1.564) data 0.001 (0.008) loss 0.8784 (1.0166) acc 75.0000 (74.8148) lr 7.0224e-05 eta 1:01:37 +epoch [46/50] batch [140/500] time 1.565 (1.564) data 0.000 (0.007) loss 0.7759 (1.0145) acc 81.2500 (74.7545) lr 7.0224e-05 eta 1:01:30 +epoch [46/50] batch [145/500] time 1.557 (1.564) data 0.000 (0.007) loss 1.7256 (1.0285) acc 65.6250 (74.6121) lr 7.0224e-05 eta 1:01:22 +epoch [46/50] batch [150/500] time 1.553 (1.563) data 0.001 (0.007) loss 1.6729 (1.0356) acc 65.6250 (74.3958) lr 7.0224e-05 eta 1:01:13 +epoch [46/50] batch [155/500] time 1.571 (1.563) data 0.001 (0.007) loss 0.9258 (1.0323) acc 78.1250 (74.3347) lr 7.0224e-05 eta 1:01:05 +epoch [46/50] batch [160/500] time 1.562 (1.563) data 0.001 (0.006) loss 1.7324 (1.0316) acc 65.6250 (74.4141) lr 7.0224e-05 eta 1:00:57 +epoch [46/50] batch [165/500] time 1.532 (1.563) data 0.000 (0.006) loss 0.6865 (1.0346) acc 84.3750 (74.3750) lr 7.0224e-05 eta 1:00:49 +epoch [46/50] batch [170/500] time 1.562 (1.563) data 0.000 (0.006) loss 0.6763 (1.0293) acc 81.2500 (74.5037) lr 7.0224e-05 eta 1:00:41 +epoch [46/50] batch [175/500] time 1.550 (1.563) data 0.000 (0.006) loss 1.5176 (1.0380) acc 68.7500 (74.4464) lr 7.0224e-05 eta 1:00:32 +epoch [46/50] batch [180/500] time 1.529 (1.562) data 0.000 (0.006) loss 0.7207 (1.0351) acc 84.3750 (74.4444) lr 7.0224e-05 eta 1:00:23 +epoch [46/50] batch [185/500] time 1.541 (1.562) data 0.000 (0.006) loss 1.0674 (1.0331) acc 68.7500 (74.4088) lr 7.0224e-05 eta 1:00:15 +epoch [46/50] batch [190/500] time 1.544 (1.561) data 0.000 (0.006) loss 0.8525 (1.0292) acc 68.7500 (74.2599) lr 7.0224e-05 eta 1:00:06 +epoch [46/50] batch [195/500] time 1.563 (1.561) data 0.000 (0.005) loss 1.6768 (1.0346) acc 56.2500 (74.0705) lr 7.0224e-05 eta 0:59:58 +epoch [46/50] batch [200/500] time 1.558 (1.561) data 0.000 (0.005) loss 1.2256 (1.0336) acc 71.8750 (74.1406) lr 7.0224e-05 eta 0:59:50 +epoch [46/50] batch [205/500] time 1.655 (1.561) data 0.000 (0.005) loss 0.9414 (1.0407) acc 81.2500 (74.0396) lr 7.0224e-05 eta 0:59:42 +epoch [46/50] batch [210/500] time 1.570 (1.561) data 0.000 (0.005) loss 0.9214 (1.0359) acc 78.1250 (74.1518) lr 7.0224e-05 eta 0:59:34 +epoch [46/50] batch [215/500] time 1.533 (1.561) data 0.000 (0.005) loss 1.1504 (1.0391) acc 68.7500 (74.0698) lr 7.0224e-05 eta 0:59:26 +epoch [46/50] batch [220/500] time 1.555 (1.561) data 0.000 (0.005) loss 1.3027 (1.0423) acc 65.6250 (74.0625) lr 7.0224e-05 eta 0:59:18 +epoch [46/50] batch [225/500] time 1.557 (1.561) data 0.000 (0.005) loss 1.2559 (1.0443) acc 65.6250 (74.0556) lr 7.0224e-05 eta 0:59:10 +epoch [46/50] batch [230/500] time 1.532 (1.560) data 0.001 (0.005) loss 1.7764 (1.0438) acc 59.3750 (74.0217) lr 7.0224e-05 eta 0:59:02 +epoch [46/50] batch [235/500] time 1.550 (1.561) data 0.000 (0.005) loss 1.0996 (1.0408) acc 78.1250 (74.1090) lr 7.0224e-05 eta 0:58:54 +epoch [46/50] batch [240/500] time 1.550 (1.560) data 0.000 (0.004) loss 0.6206 (1.0393) acc 84.3750 (74.2057) lr 7.0224e-05 eta 0:58:46 +epoch [46/50] batch [245/500] time 1.545 (1.560) data 0.000 (0.004) loss 2.0020 (1.0450) acc 62.5000 (74.1582) lr 7.0224e-05 eta 0:58:38 +epoch [46/50] batch [250/500] time 1.556 (1.561) data 0.000 (0.004) loss 1.5801 (1.0468) acc 75.0000 (74.1750) lr 7.0224e-05 eta 0:58:32 +epoch [46/50] batch [255/500] time 1.542 (1.561) data 0.000 (0.004) loss 0.7358 (1.0509) acc 84.3750 (74.1176) lr 7.0224e-05 eta 0:58:23 +epoch [46/50] batch [260/500] time 1.541 (1.560) data 0.000 (0.004) loss 1.6768 (1.0512) acc 65.6250 (74.1346) lr 7.0224e-05 eta 0:58:15 +epoch [46/50] batch [265/500] time 1.549 (1.560) data 0.000 (0.004) loss 1.0342 (1.0487) acc 68.7500 (74.1863) lr 7.0224e-05 eta 0:58:07 +epoch [46/50] batch [270/500] time 1.532 (1.560) data 0.000 (0.004) loss 1.2217 (1.0450) acc 68.7500 (74.1782) lr 7.0224e-05 eta 0:57:59 +epoch [46/50] batch [275/500] time 1.545 (1.560) data 0.000 (0.004) loss 1.5195 (1.0525) acc 68.7500 (74.0682) lr 7.0224e-05 eta 0:57:51 +epoch [46/50] batch [280/500] time 1.554 (1.560) data 0.000 (0.004) loss 0.7700 (1.0493) acc 81.2500 (74.1629) lr 7.0224e-05 eta 0:57:43 +epoch [46/50] batch [285/500] time 1.577 (1.560) data 0.000 (0.004) loss 1.3545 (1.0486) acc 62.5000 (74.1557) lr 7.0224e-05 eta 0:57:35 +epoch [46/50] batch [290/500] time 1.573 (1.560) data 0.000 (0.004) loss 1.0654 (1.0473) acc 75.0000 (74.1810) lr 7.0224e-05 eta 0:57:27 +epoch [46/50] batch [295/500] time 1.546 (1.560) data 0.000 (0.004) loss 1.1846 (1.0484) acc 71.8750 (74.0890) lr 7.0224e-05 eta 0:57:20 +epoch [46/50] batch [300/500] time 1.530 (1.560) data 0.000 (0.004) loss 1.4219 (1.0515) acc 68.7500 (74.0938) lr 7.0224e-05 eta 0:57:11 +epoch [46/50] batch [305/500] time 1.552 (1.560) data 0.001 (0.004) loss 0.9717 (1.0509) acc 65.6250 (74.0164) lr 7.0224e-05 eta 0:57:03 +epoch [46/50] batch [310/500] time 1.562 (1.560) data 0.001 (0.004) loss 1.1807 (1.0490) acc 71.8750 (74.1028) lr 7.0224e-05 eta 0:56:55 +epoch [46/50] batch [315/500] time 1.553 (1.560) data 0.000 (0.003) loss 0.5562 (1.0478) acc 84.3750 (74.1567) lr 7.0224e-05 eta 0:56:47 +epoch [46/50] batch [320/500] time 1.559 (1.560) data 0.000 (0.003) loss 1.2275 (1.0481) acc 65.6250 (74.1309) lr 7.0224e-05 eta 0:56:39 +epoch [46/50] batch [325/500] time 1.551 (1.559) data 0.000 (0.003) loss 0.5918 (1.0511) acc 78.1250 (74.0673) lr 7.0224e-05 eta 0:56:31 +epoch [46/50] batch [330/500] time 1.578 (1.560) data 0.000 (0.003) loss 0.7969 (1.0510) acc 81.2500 (74.0436) lr 7.0224e-05 eta 0:56:24 +epoch [46/50] batch [335/500] time 1.539 (1.560) data 0.000 (0.003) loss 1.0625 (1.0502) acc 75.0000 (74.0765) lr 7.0224e-05 eta 0:56:16 +epoch [46/50] batch [340/500] time 1.566 (1.559) data 0.000 (0.003) loss 0.7480 (1.0494) acc 71.8750 (74.1176) lr 7.0224e-05 eta 0:56:08 +epoch [46/50] batch [345/500] time 1.553 (1.560) data 0.000 (0.003) loss 1.3984 (1.0478) acc 68.7500 (74.1304) lr 7.0224e-05 eta 0:56:00 +epoch [46/50] batch [350/500] time 1.559 (1.560) data 0.000 (0.003) loss 1.2930 (1.0442) acc 68.7500 (74.2321) lr 7.0224e-05 eta 0:55:53 +epoch [46/50] batch [355/500] time 1.571 (1.560) data 0.001 (0.003) loss 1.0498 (1.0444) acc 75.0000 (74.2165) lr 7.0224e-05 eta 0:55:45 +epoch [46/50] batch [360/500] time 1.560 (1.560) data 0.000 (0.003) loss 1.3682 (1.0454) acc 68.7500 (74.2274) lr 7.0224e-05 eta 0:55:37 +epoch [46/50] batch [365/500] time 1.555 (1.560) data 0.000 (0.003) loss 0.8267 (1.0427) acc 71.8750 (74.2637) lr 7.0224e-05 eta 0:55:29 +epoch [46/50] batch [370/500] time 1.554 (1.560) data 0.000 (0.003) loss 1.1631 (1.0418) acc 71.8750 (74.2821) lr 7.0224e-05 eta 0:55:21 +epoch [46/50] batch [375/500] time 1.556 (1.560) data 0.000 (0.003) loss 0.8403 (1.0433) acc 65.6250 (74.2000) lr 7.0224e-05 eta 0:55:14 +epoch [46/50] batch [380/500] time 1.551 (1.559) data 0.000 (0.003) loss 1.5029 (1.0440) acc 59.3750 (74.1118) lr 7.0224e-05 eta 0:55:06 +epoch [46/50] batch [385/500] time 1.565 (1.560) data 0.000 (0.003) loss 0.9326 (1.0426) acc 71.8750 (74.1153) lr 7.0224e-05 eta 0:54:58 +epoch [46/50] batch [390/500] time 1.555 (1.559) data 0.000 (0.003) loss 0.8994 (1.0442) acc 71.8750 (74.0785) lr 7.0224e-05 eta 0:54:50 +epoch [46/50] batch [395/500] time 1.543 (1.560) data 0.000 (0.003) loss 1.0811 (1.0438) acc 75.0000 (74.1218) lr 7.0224e-05 eta 0:54:43 +epoch [46/50] batch [400/500] time 1.554 (1.560) data 0.000 (0.003) loss 1.0596 (1.0419) acc 68.7500 (74.1328) lr 7.0224e-05 eta 0:54:35 +epoch [46/50] batch [405/500] time 1.597 (1.560) data 0.000 (0.003) loss 1.0264 (1.0402) acc 78.1250 (74.1512) lr 7.0224e-05 eta 0:54:27 +epoch [46/50] batch [410/500] time 1.552 (1.560) data 0.000 (0.003) loss 0.5103 (1.0393) acc 84.3750 (74.1616) lr 7.0224e-05 eta 0:54:19 +epoch [46/50] batch [415/500] time 1.565 (1.560) data 0.000 (0.003) loss 0.5376 (1.0393) acc 93.7500 (74.1416) lr 7.0224e-05 eta 0:54:11 +epoch [46/50] batch [420/500] time 1.570 (1.559) data 0.000 (0.003) loss 0.9238 (1.0363) acc 75.0000 (74.1964) lr 7.0224e-05 eta 0:54:03 +epoch [46/50] batch [425/500] time 1.558 (1.560) data 0.000 (0.003) loss 0.8896 (1.0329) acc 81.2500 (74.2868) lr 7.0224e-05 eta 0:53:55 +epoch [46/50] batch [430/500] time 1.535 (1.559) data 0.000 (0.003) loss 0.4663 (1.0306) acc 75.0000 (74.2733) lr 7.0224e-05 eta 0:53:48 +epoch [46/50] batch [435/500] time 1.543 (1.559) data 0.000 (0.003) loss 0.9663 (1.0284) acc 62.5000 (74.2457) lr 7.0224e-05 eta 0:53:40 +epoch [46/50] batch [440/500] time 1.535 (1.559) data 0.000 (0.003) loss 0.7329 (1.0287) acc 81.2500 (74.2472) lr 7.0224e-05 eta 0:53:32 +epoch [46/50] batch [445/500] time 1.565 (1.559) data 0.001 (0.003) loss 1.2490 (1.0270) acc 68.7500 (74.2978) lr 7.0224e-05 eta 0:53:24 +epoch [46/50] batch [450/500] time 1.535 (1.559) data 0.001 (0.003) loss 1.2627 (1.0280) acc 75.0000 (74.2847) lr 7.0224e-05 eta 0:53:16 +epoch [46/50] batch [455/500] time 1.567 (1.559) data 0.000 (0.003) loss 1.2773 (1.0301) acc 71.8750 (74.2788) lr 7.0224e-05 eta 0:53:09 +epoch [46/50] batch [460/500] time 1.528 (1.559) data 0.001 (0.003) loss 0.3718 (1.0291) acc 90.6250 (74.2935) lr 7.0224e-05 eta 0:53:01 +epoch [46/50] batch [465/500] time 1.543 (1.559) data 0.000 (0.003) loss 0.8735 (1.0286) acc 68.7500 (74.2876) lr 7.0224e-05 eta 0:52:53 +epoch [46/50] batch [470/500] time 1.554 (1.559) data 0.000 (0.002) loss 1.0654 (1.0305) acc 68.7500 (74.2354) lr 7.0224e-05 eta 0:52:45 +epoch [46/50] batch [475/500] time 1.544 (1.559) data 0.000 (0.002) loss 0.4465 (1.0304) acc 84.3750 (74.2171) lr 7.0224e-05 eta 0:52:37 +epoch [46/50] batch [480/500] time 1.556 (1.559) data 0.000 (0.002) loss 1.0264 (1.0297) acc 71.8750 (74.2448) lr 7.0224e-05 eta 0:52:29 +epoch [46/50] batch [485/500] time 1.568 (1.559) data 0.001 (0.002) loss 1.0869 (1.0292) acc 75.0000 (74.2268) lr 7.0224e-05 eta 0:52:21 +epoch [46/50] batch [490/500] time 1.549 (1.559) data 0.000 (0.002) loss 0.7939 (1.0290) acc 75.0000 (74.1773) lr 7.0224e-05 eta 0:52:13 +epoch [46/50] batch [495/500] time 1.563 (1.559) data 0.000 (0.002) loss 1.3457 (1.0295) acc 68.7500 (74.1667) lr 7.0224e-05 eta 0:52:06 +epoch [46/50] batch [500/500] time 1.569 (1.559) data 0.000 (0.002) loss 0.8887 (1.0275) acc 81.2500 (74.2125) lr 4.8943e-05 eta 0:51:58 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,020 +* accuracy: 78.0% +* error: 22.0% +* macro_f1: 77.6% +epoch [47/50] batch [5/500] time 1.540 (1.656) data 0.000 (0.161) loss 0.9849 (0.9564) acc 71.8750 (75.0000) lr 4.8943e-05 eta 0:55:04 +epoch [47/50] batch [10/500] time 1.581 (1.609) data 0.000 (0.081) loss 1.4580 (1.0336) acc 65.6250 (74.0625) lr 4.8943e-05 eta 0:53:22 +epoch [47/50] batch [15/500] time 1.557 (1.589) data 0.000 (0.054) loss 1.3105 (1.0239) acc 62.5000 (73.9583) lr 4.8943e-05 eta 0:52:33 +epoch [47/50] batch [20/500] time 1.553 (1.580) data 0.000 (0.041) loss 1.2715 (1.0800) acc 56.2500 (73.4375) lr 4.8943e-05 eta 0:52:07 +epoch [47/50] batch [25/500] time 1.573 (1.577) data 0.001 (0.033) loss 1.5186 (1.0676) acc 59.3750 (73.3750) lr 4.8943e-05 eta 0:51:55 +epoch [47/50] batch [30/500] time 1.556 (1.574) data 0.000 (0.027) loss 1.0732 (1.0557) acc 84.3750 (74.1667) lr 4.8943e-05 eta 0:51:41 +epoch [47/50] batch [35/500] time 1.557 (1.572) data 0.000 (0.023) loss 0.5747 (1.0511) acc 78.1250 (73.9286) lr 4.8943e-05 eta 0:51:28 +epoch [47/50] batch [40/500] time 1.559 (1.572) data 0.000 (0.021) loss 1.2666 (1.0726) acc 59.3750 (73.0469) lr 4.8943e-05 eta 0:51:21 +epoch [47/50] batch [45/500] time 1.575 (1.571) data 0.000 (0.018) loss 0.6787 (1.0451) acc 78.1250 (73.6806) lr 4.8943e-05 eta 0:51:10 +epoch [47/50] batch [50/500] time 1.547 (1.569) data 0.000 (0.017) loss 1.5146 (1.0637) acc 68.7500 (73.1875) lr 4.8943e-05 eta 0:50:59 +epoch [47/50] batch [55/500] time 1.576 (1.569) data 0.000 (0.015) loss 0.8569 (1.0563) acc 75.0000 (73.5227) lr 4.8943e-05 eta 0:50:52 +epoch [47/50] batch [60/500] time 1.561 (1.569) data 0.001 (0.014) loss 0.5273 (1.0334) acc 90.6250 (74.3229) lr 4.8943e-05 eta 0:50:44 +epoch [47/50] batch [65/500] time 1.528 (1.567) data 0.001 (0.013) loss 1.0527 (1.0460) acc 65.6250 (73.9423) lr 4.8943e-05 eta 0:50:32 +epoch [47/50] batch [70/500] time 1.557 (1.566) data 0.001 (0.012) loss 1.0547 (1.0331) acc 71.8750 (74.0625) lr 4.8943e-05 eta 0:50:21 +epoch [47/50] batch [75/500] time 1.595 (1.566) data 0.001 (0.011) loss 1.0430 (1.0425) acc 65.6250 (73.7083) lr 4.8943e-05 eta 0:50:14 +epoch [47/50] batch [80/500] time 1.573 (1.567) data 0.000 (0.011) loss 1.0664 (1.0439) acc 75.0000 (73.9453) lr 4.8943e-05 eta 0:50:08 +epoch [47/50] batch [85/500] time 1.558 (1.566) data 0.000 (0.010) loss 0.8818 (1.0397) acc 71.8750 (73.8603) lr 4.8943e-05 eta 0:49:58 +epoch [47/50] batch [90/500] time 1.555 (1.566) data 0.000 (0.009) loss 0.9932 (1.0235) acc 65.6250 (74.2361) lr 4.8943e-05 eta 0:49:50 +epoch [47/50] batch [95/500] time 1.541 (1.565) data 0.000 (0.009) loss 0.7124 (1.0318) acc 81.2500 (73.8816) lr 4.8943e-05 eta 0:49:41 +epoch [47/50] batch [100/500] time 1.549 (1.564) data 0.000 (0.009) loss 0.9116 (1.0259) acc 75.0000 (74.0625) lr 4.8943e-05 eta 0:49:32 +epoch [47/50] batch [105/500] time 1.562 (1.564) data 0.000 (0.008) loss 0.6050 (1.0407) acc 93.7500 (73.8095) lr 4.8943e-05 eta 0:49:24 +epoch [47/50] batch [110/500] time 1.559 (1.564) data 0.000 (0.008) loss 1.3955 (1.0471) acc 75.0000 (73.7216) lr 4.8943e-05 eta 0:49:15 +epoch [47/50] batch [115/500] time 1.568 (1.564) data 0.001 (0.007) loss 1.2510 (1.0540) acc 71.8750 (73.5326) lr 4.8943e-05 eta 0:49:08 +epoch [47/50] batch [120/500] time 1.597 (1.565) data 0.000 (0.007) loss 1.2656 (1.0503) acc 71.8750 (73.7760) lr 4.8943e-05 eta 0:49:02 +epoch [47/50] batch [125/500] time 1.560 (1.565) data 0.000 (0.007) loss 1.0801 (1.0582) acc 71.8750 (73.7750) lr 4.8943e-05 eta 0:48:54 +epoch [47/50] batch [130/500] time 1.572 (1.565) data 0.000 (0.007) loss 1.2920 (1.0594) acc 65.6250 (73.7500) lr 4.8943e-05 eta 0:48:46 +epoch [47/50] batch [135/500] time 1.565 (1.564) data 0.000 (0.006) loss 1.2080 (1.0630) acc 68.7500 (73.6806) lr 4.8943e-05 eta 0:48:37 +epoch [47/50] batch [140/500] time 1.559 (1.565) data 0.000 (0.006) loss 1.4727 (1.0603) acc 68.7500 (73.7277) lr 4.8943e-05 eta 0:48:30 +epoch [47/50] batch [145/500] time 1.542 (1.565) data 0.000 (0.006) loss 1.1387 (1.0635) acc 65.6250 (73.6207) lr 4.8943e-05 eta 0:48:22 +epoch [47/50] batch [150/500] time 1.530 (1.564) data 0.000 (0.006) loss 1.1279 (1.0631) acc 71.8750 (73.5417) lr 4.8943e-05 eta 0:48:13 +epoch [47/50] batch [155/500] time 1.545 (1.564) data 0.000 (0.006) loss 0.6382 (1.0577) acc 81.2500 (73.5081) lr 4.8943e-05 eta 0:48:04 +epoch [47/50] batch [160/500] time 1.558 (1.564) data 0.000 (0.005) loss 1.1123 (1.0566) acc 75.0000 (73.5938) lr 4.8943e-05 eta 0:47:57 +epoch [47/50] batch [165/500] time 1.558 (1.564) data 0.000 (0.005) loss 1.0137 (1.0585) acc 75.0000 (73.6553) lr 4.8943e-05 eta 0:47:50 +epoch [47/50] batch [170/500] time 1.581 (1.564) data 0.000 (0.005) loss 1.4102 (1.0576) acc 75.0000 (73.6949) lr 4.8943e-05 eta 0:47:42 +epoch [47/50] batch [175/500] time 1.553 (1.564) data 0.000 (0.005) loss 0.8066 (1.0533) acc 81.2500 (73.8036) lr 4.8943e-05 eta 0:47:34 +epoch [47/50] batch [180/500] time 1.563 (1.564) data 0.000 (0.005) loss 1.5566 (1.0546) acc 65.6250 (73.7153) lr 4.8943e-05 eta 0:47:26 +epoch [47/50] batch [185/500] time 1.569 (1.564) data 0.000 (0.005) loss 0.6045 (1.0485) acc 78.1250 (73.8007) lr 4.8943e-05 eta 0:47:18 +epoch [47/50] batch [190/500] time 1.554 (1.564) data 0.000 (0.005) loss 1.1436 (1.0578) acc 78.1250 (73.7171) lr 4.8943e-05 eta 0:47:10 +epoch [47/50] batch [195/500] time 1.584 (1.563) data 0.000 (0.005) loss 0.6978 (1.0551) acc 84.3750 (73.8141) lr 4.8943e-05 eta 0:47:02 +epoch [47/50] batch [200/500] time 1.562 (1.563) data 0.000 (0.004) loss 1.3213 (1.0561) acc 65.6250 (73.8281) lr 4.8943e-05 eta 0:46:53 +epoch [47/50] batch [205/500] time 1.548 (1.563) data 0.000 (0.004) loss 1.2930 (1.0610) acc 59.3750 (73.7043) lr 4.8943e-05 eta 0:46:45 +epoch [47/50] batch [210/500] time 1.563 (1.563) data 0.000 (0.004) loss 0.8477 (1.0555) acc 75.0000 (73.8393) lr 4.8943e-05 eta 0:46:37 +epoch [47/50] batch [215/500] time 1.559 (1.563) data 0.000 (0.004) loss 0.9019 (1.0516) acc 78.1250 (73.8953) lr 4.8943e-05 eta 0:46:29 +epoch [47/50] batch [220/500] time 1.580 (1.563) data 0.000 (0.004) loss 1.0635 (1.0503) acc 78.1250 (73.9347) lr 4.8943e-05 eta 0:46:21 +epoch [47/50] batch [225/500] time 1.589 (1.563) data 0.000 (0.004) loss 1.2119 (1.0545) acc 65.6250 (73.8750) lr 4.8943e-05 eta 0:46:13 +epoch [47/50] batch [230/500] time 1.547 (1.563) data 0.000 (0.004) loss 1.0752 (1.0560) acc 78.1250 (73.8995) lr 4.8943e-05 eta 0:46:05 +epoch [47/50] batch [235/500] time 1.542 (1.562) data 0.000 (0.004) loss 0.9624 (1.0567) acc 75.0000 (73.8963) lr 4.8943e-05 eta 0:45:57 +epoch [47/50] batch [240/500] time 1.541 (1.562) data 0.000 (0.004) loss 0.9478 (1.0581) acc 71.8750 (73.8802) lr 4.8943e-05 eta 0:45:49 +epoch [47/50] batch [245/500] time 1.559 (1.562) data 0.000 (0.004) loss 1.3926 (1.0593) acc 71.8750 (73.9158) lr 4.8943e-05 eta 0:45:41 +epoch [47/50] batch [250/500] time 1.550 (1.562) data 0.000 (0.004) loss 0.7192 (1.0601) acc 71.8750 (73.8125) lr 4.8943e-05 eta 0:45:33 +epoch [47/50] batch [255/500] time 1.574 (1.562) data 0.000 (0.004) loss 0.7505 (1.0607) acc 84.3750 (73.8113) lr 4.8943e-05 eta 0:45:25 +epoch [47/50] batch [260/500] time 1.565 (1.562) data 0.000 (0.003) loss 1.2871 (1.0632) acc 59.3750 (73.7620) lr 4.8943e-05 eta 0:45:17 +epoch [47/50] batch [265/500] time 1.573 (1.562) data 0.000 (0.003) loss 0.9927 (1.0623) acc 71.8750 (73.7146) lr 4.8943e-05 eta 0:45:10 +epoch [47/50] batch [270/500] time 1.560 (1.562) data 0.000 (0.003) loss 1.2715 (1.0649) acc 71.8750 (73.7037) lr 4.8943e-05 eta 0:45:02 +epoch [47/50] batch [275/500] time 1.580 (1.562) data 0.000 (0.003) loss 1.3994 (1.0670) acc 59.3750 (73.6591) lr 4.8943e-05 eta 0:44:54 +epoch [47/50] batch [280/500] time 1.661 (1.562) data 0.000 (0.003) loss 1.0889 (1.0683) acc 84.3750 (73.6496) lr 4.8943e-05 eta 0:44:47 +epoch [47/50] batch [285/500] time 1.557 (1.562) data 0.000 (0.003) loss 1.0205 (1.0691) acc 78.1250 (73.6184) lr 4.8943e-05 eta 0:44:39 +epoch [47/50] batch [290/500] time 1.560 (1.562) data 0.000 (0.003) loss 0.8013 (1.0714) acc 84.3750 (73.5560) lr 4.8943e-05 eta 0:44:31 +epoch [47/50] batch [295/500] time 1.559 (1.562) data 0.000 (0.003) loss 0.7505 (1.0678) acc 84.3750 (73.6547) lr 4.8943e-05 eta 0:44:23 +epoch [47/50] batch [300/500] time 1.568 (1.562) data 0.000 (0.003) loss 0.7812 (1.0713) acc 84.3750 (73.6042) lr 4.8943e-05 eta 0:44:15 +epoch [47/50] batch [305/500] time 1.551 (1.562) data 0.000 (0.003) loss 0.9370 (1.0698) acc 78.1250 (73.5758) lr 4.8943e-05 eta 0:44:07 +epoch [47/50] batch [310/500] time 1.568 (1.562) data 0.000 (0.003) loss 0.7661 (1.0671) acc 81.2500 (73.6794) lr 4.8943e-05 eta 0:43:59 +epoch [47/50] batch [315/500] time 1.559 (1.562) data 0.000 (0.003) loss 1.1230 (1.0662) acc 68.7500 (73.7103) lr 4.8943e-05 eta 0:43:52 +epoch [47/50] batch [320/500] time 1.545 (1.562) data 0.000 (0.003) loss 1.1406 (1.0674) acc 68.7500 (73.6621) lr 4.8943e-05 eta 0:43:44 +epoch [47/50] batch [325/500] time 1.554 (1.562) data 0.000 (0.003) loss 0.8193 (1.0629) acc 71.8750 (73.7308) lr 4.8943e-05 eta 0:43:36 +epoch [47/50] batch [330/500] time 1.579 (1.562) data 0.000 (0.003) loss 0.6743 (1.0589) acc 81.2500 (73.7879) lr 4.8943e-05 eta 0:43:28 +epoch [47/50] batch [335/500] time 1.543 (1.562) data 0.000 (0.003) loss 1.1436 (1.0584) acc 68.7500 (73.7873) lr 4.8943e-05 eta 0:43:20 +epoch [47/50] batch [340/500] time 1.564 (1.562) data 0.000 (0.003) loss 1.4170 (1.0574) acc 65.6250 (73.7684) lr 4.8943e-05 eta 0:43:12 +epoch [47/50] batch [345/500] time 1.554 (1.562) data 0.001 (0.003) loss 0.8525 (1.0541) acc 81.2500 (73.8315) lr 4.8943e-05 eta 0:43:04 +epoch [47/50] batch [350/500] time 1.543 (1.562) data 0.000 (0.003) loss 0.6016 (1.0541) acc 87.5000 (73.8571) lr 4.8943e-05 eta 0:42:57 +epoch [47/50] batch [355/500] time 1.543 (1.562) data 0.000 (0.003) loss 0.7339 (1.0547) acc 78.1250 (73.8380) lr 4.8943e-05 eta 0:42:48 +epoch [47/50] batch [360/500] time 1.554 (1.562) data 0.000 (0.003) loss 1.0342 (1.0530) acc 78.1250 (73.9149) lr 4.8943e-05 eta 0:42:41 +epoch [47/50] batch [365/500] time 1.572 (1.562) data 0.000 (0.003) loss 1.3262 (1.0514) acc 68.7500 (73.8699) lr 4.8943e-05 eta 0:42:33 +epoch [47/50] batch [370/500] time 1.587 (1.562) data 0.000 (0.003) loss 0.7432 (1.0483) acc 78.1250 (73.9358) lr 4.8943e-05 eta 0:42:25 +epoch [47/50] batch [375/500] time 1.545 (1.562) data 0.000 (0.002) loss 0.7964 (1.0473) acc 71.8750 (73.9167) lr 4.8943e-05 eta 0:42:17 +epoch [47/50] batch [380/500] time 1.552 (1.562) data 0.000 (0.002) loss 0.4971 (1.0445) acc 87.5000 (73.9474) lr 4.8943e-05 eta 0:42:09 +epoch [47/50] batch [385/500] time 1.561 (1.562) data 0.000 (0.002) loss 0.5488 (1.0470) acc 75.0000 (73.8880) lr 4.8943e-05 eta 0:42:01 +epoch [47/50] batch [390/500] time 1.541 (1.561) data 0.000 (0.002) loss 1.6504 (1.0503) acc 68.7500 (73.8381) lr 4.8943e-05 eta 0:41:53 +epoch [47/50] batch [395/500] time 1.562 (1.562) data 0.000 (0.002) loss 0.7808 (1.0481) acc 90.6250 (73.9320) lr 4.8943e-05 eta 0:41:46 +epoch [47/50] batch [400/500] time 1.565 (1.562) data 0.001 (0.002) loss 2.0859 (1.0493) acc 50.0000 (73.8984) lr 4.8943e-05 eta 0:41:38 +epoch [47/50] batch [405/500] time 1.560 (1.561) data 0.000 (0.002) loss 0.9985 (1.0496) acc 71.8750 (73.8889) lr 4.8943e-05 eta 0:41:30 +epoch [47/50] batch [410/500] time 1.565 (1.561) data 0.000 (0.002) loss 0.7505 (1.0470) acc 71.8750 (73.8948) lr 4.8943e-05 eta 0:41:22 +epoch [47/50] batch [415/500] time 1.546 (1.561) data 0.000 (0.002) loss 0.6152 (1.0479) acc 87.5000 (73.8780) lr 4.8943e-05 eta 0:41:14 +epoch [47/50] batch [420/500] time 1.588 (1.561) data 0.001 (0.002) loss 1.3125 (1.0454) acc 78.1250 (73.9509) lr 4.8943e-05 eta 0:41:07 +epoch [47/50] batch [425/500] time 1.577 (1.562) data 0.000 (0.002) loss 1.5059 (1.0446) acc 59.3750 (73.9559) lr 4.8943e-05 eta 0:40:59 +epoch [47/50] batch [430/500] time 1.545 (1.562) data 0.000 (0.002) loss 0.9985 (1.0462) acc 75.0000 (73.8953) lr 4.8943e-05 eta 0:40:52 +epoch [47/50] batch [435/500] time 1.557 (1.562) data 0.000 (0.002) loss 0.7993 (1.0446) acc 84.3750 (73.9440) lr 4.8943e-05 eta 0:40:44 +epoch [47/50] batch [440/500] time 1.560 (1.562) data 0.001 (0.002) loss 1.1240 (1.0461) acc 71.8750 (73.9560) lr 4.8943e-05 eta 0:40:36 +epoch [47/50] batch [445/500] time 1.551 (1.562) data 0.000 (0.002) loss 0.8901 (1.0446) acc 81.2500 (73.9537) lr 4.8943e-05 eta 0:40:28 +epoch [47/50] batch [450/500] time 1.545 (1.562) data 0.000 (0.002) loss 0.9292 (1.0425) acc 81.2500 (74.0069) lr 4.8943e-05 eta 0:40:20 +epoch [47/50] batch [455/500] time 1.548 (1.562) data 0.000 (0.002) loss 1.0635 (1.0433) acc 71.8750 (73.9835) lr 4.8943e-05 eta 0:40:12 +epoch [47/50] batch [460/500] time 1.562 (1.562) data 0.000 (0.002) loss 1.1992 (1.0440) acc 62.5000 (73.9810) lr 4.8943e-05 eta 0:40:04 +epoch [47/50] batch [465/500] time 1.575 (1.562) data 0.000 (0.002) loss 1.2402 (1.0427) acc 71.8750 (74.0457) lr 4.8943e-05 eta 0:39:57 +epoch [47/50] batch [470/500] time 1.533 (1.562) data 0.000 (0.002) loss 1.0928 (1.0434) acc 81.2500 (74.0359) lr 4.8943e-05 eta 0:39:49 +epoch [47/50] batch [475/500] time 1.581 (1.562) data 0.000 (0.002) loss 0.7490 (1.0397) acc 87.5000 (74.1250) lr 4.8943e-05 eta 0:39:41 +epoch [47/50] batch [480/500] time 1.567 (1.562) data 0.000 (0.002) loss 0.6436 (1.0371) acc 81.2500 (74.1927) lr 4.8943e-05 eta 0:39:34 +epoch [47/50] batch [485/500] time 1.565 (1.562) data 0.001 (0.002) loss 1.1885 (1.0370) acc 71.8750 (74.1624) lr 4.8943e-05 eta 0:39:26 +epoch [47/50] batch [490/500] time 1.552 (1.562) data 0.000 (0.002) loss 0.9917 (1.0365) acc 68.7500 (74.1327) lr 4.8943e-05 eta 0:39:18 +epoch [47/50] batch [495/500] time 1.556 (1.562) data 0.000 (0.002) loss 1.1523 (1.0368) acc 78.1250 (74.1288) lr 4.8943e-05 eta 0:39:10 +epoch [47/50] batch [500/500] time 1.542 (1.562) data 0.000 (0.002) loss 0.7925 (1.0365) acc 78.1250 (74.1250) lr 3.1417e-05 eta 0:39:02 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,022 +* accuracy: 78.0% +* error: 22.0% +* macro_f1: 77.6% +epoch [48/50] batch [5/500] time 1.541 (1.639) data 0.000 (0.143) loss 0.8838 (1.0408) acc 84.3750 (76.8750) lr 3.1417e-05 eta 0:40:50 +epoch [48/50] batch [10/500] time 1.571 (1.601) data 0.000 (0.072) loss 1.3477 (0.9808) acc 71.8750 (77.8125) lr 3.1417e-05 eta 0:39:45 +epoch [48/50] batch [15/500] time 1.573 (1.588) data 0.000 (0.048) loss 1.4424 (1.0341) acc 59.3750 (75.2083) lr 3.1417e-05 eta 0:39:17 +epoch [48/50] batch [20/500] time 1.544 (1.580) data 0.001 (0.036) loss 0.9644 (1.1244) acc 65.6250 (72.6562) lr 3.1417e-05 eta 0:38:59 +epoch [48/50] batch [25/500] time 1.693 (1.581) data 0.000 (0.029) loss 1.3691 (1.1977) acc 62.5000 (71.1250) lr 3.1417e-05 eta 0:38:51 +epoch [48/50] batch [30/500] time 1.546 (1.577) data 0.000 (0.024) loss 1.3125 (1.1949) acc 68.7500 (71.0417) lr 3.1417e-05 eta 0:38:38 +epoch [48/50] batch [35/500] time 1.553 (1.574) data 0.001 (0.021) loss 1.2178 (1.1658) acc 68.7500 (71.2500) lr 3.1417e-05 eta 0:38:26 +epoch [48/50] batch [40/500] time 1.569 (1.573) data 0.000 (0.018) loss 1.0322 (1.1922) acc 71.8750 (70.8594) lr 3.1417e-05 eta 0:38:17 +epoch [48/50] batch [45/500] time 1.568 (1.572) data 0.000 (0.016) loss 0.6929 (1.1606) acc 75.0000 (71.0417) lr 3.1417e-05 eta 0:38:07 +epoch [48/50] batch [50/500] time 1.529 (1.570) data 0.000 (0.015) loss 0.4404 (1.1335) acc 81.2500 (71.6875) lr 3.1417e-05 eta 0:37:57 +epoch [48/50] batch [55/500] time 1.559 (1.569) data 0.000 (0.013) loss 1.3506 (1.1356) acc 65.6250 (71.6477) lr 3.1417e-05 eta 0:37:47 +epoch [48/50] batch [60/500] time 1.565 (1.569) data 0.001 (0.012) loss 1.4102 (1.1266) acc 68.7500 (71.7188) lr 3.1417e-05 eta 0:37:38 +epoch [48/50] batch [65/500] time 1.542 (1.569) data 0.001 (0.011) loss 0.4600 (1.0938) acc 84.3750 (72.2596) lr 3.1417e-05 eta 0:37:30 +epoch [48/50] batch [70/500] time 1.537 (1.567) data 0.000 (0.011) loss 1.1504 (1.1035) acc 68.7500 (72.0089) lr 3.1417e-05 eta 0:37:21 +epoch [48/50] batch [75/500] time 1.550 (1.566) data 0.000 (0.010) loss 0.7847 (1.1001) acc 84.3750 (71.8750) lr 3.1417e-05 eta 0:37:11 +epoch [48/50] batch [80/500] time 1.556 (1.566) data 0.000 (0.009) loss 1.7236 (1.1119) acc 53.1250 (71.7188) lr 3.1417e-05 eta 0:37:03 +epoch [48/50] batch [85/500] time 1.664 (1.567) data 0.000 (0.009) loss 0.6113 (1.1156) acc 84.3750 (71.8382) lr 3.1417e-05 eta 0:36:56 +epoch [48/50] batch [90/500] time 1.542 (1.566) data 0.000 (0.008) loss 0.7515 (1.0921) acc 81.2500 (72.4306) lr 3.1417e-05 eta 0:36:48 +epoch [48/50] batch [95/500] time 1.532 (1.565) data 0.000 (0.008) loss 0.5693 (1.0890) acc 81.2500 (72.4342) lr 3.1417e-05 eta 0:36:38 +epoch [48/50] batch [100/500] time 1.548 (1.564) data 0.001 (0.008) loss 0.9985 (1.0887) acc 78.1250 (72.7188) lr 3.1417e-05 eta 0:36:30 +epoch [48/50] batch [105/500] time 1.555 (1.563) data 0.000 (0.007) loss 1.4863 (1.0857) acc 75.0000 (72.8274) lr 3.1417e-05 eta 0:36:20 +epoch [48/50] batch [110/500] time 1.539 (1.563) data 0.000 (0.007) loss 1.3193 (1.0966) acc 75.0000 (72.6420) lr 3.1417e-05 eta 0:36:12 +epoch [48/50] batch [115/500] time 1.578 (1.563) data 0.000 (0.007) loss 0.7642 (1.0870) acc 71.8750 (72.8261) lr 3.1417e-05 eta 0:36:04 +epoch [48/50] batch [120/500] time 1.562 (1.562) data 0.000 (0.006) loss 1.5625 (1.0872) acc 65.6250 (72.7344) lr 3.1417e-05 eta 0:35:55 +epoch [48/50] batch [125/500] time 1.553 (1.562) data 0.000 (0.006) loss 0.9785 (1.0825) acc 81.2500 (72.8750) lr 3.1417e-05 eta 0:35:47 +epoch [48/50] batch [130/500] time 1.554 (1.562) data 0.000 (0.006) loss 0.7578 (1.0781) acc 78.1250 (73.0769) lr 3.1417e-05 eta 0:35:39 +epoch [48/50] batch [135/500] time 1.583 (1.562) data 0.001 (0.006) loss 1.1904 (1.0797) acc 68.7500 (73.0787) lr 3.1417e-05 eta 0:35:32 +epoch [48/50] batch [140/500] time 1.555 (1.562) data 0.000 (0.006) loss 1.0332 (1.0827) acc 75.0000 (73.0804) lr 3.1417e-05 eta 0:35:24 +epoch [48/50] batch [145/500] time 1.582 (1.562) data 0.000 (0.005) loss 1.8887 (1.0807) acc 62.5000 (73.1681) lr 3.1417e-05 eta 0:35:16 +epoch [48/50] batch [150/500] time 1.559 (1.562) data 0.000 (0.005) loss 0.5601 (1.0861) acc 84.3750 (73.2083) lr 3.1417e-05 eta 0:35:08 +epoch [48/50] batch [155/500] time 1.577 (1.562) data 0.000 (0.005) loss 0.5435 (1.0868) acc 87.5000 (73.1855) lr 3.1417e-05 eta 0:35:00 +epoch [48/50] batch [160/500] time 1.581 (1.562) data 0.000 (0.005) loss 0.5952 (1.0813) acc 84.3750 (73.3008) lr 3.1417e-05 eta 0:34:53 +epoch [48/50] batch [165/500] time 1.572 (1.562) data 0.000 (0.005) loss 0.9336 (1.0806) acc 81.2500 (73.3902) lr 3.1417e-05 eta 0:34:45 +epoch [48/50] batch [170/500] time 1.561 (1.562) data 0.000 (0.005) loss 0.6602 (1.0737) acc 90.6250 (73.6029) lr 3.1417e-05 eta 0:34:37 +epoch [48/50] batch [175/500] time 1.554 (1.562) data 0.000 (0.005) loss 1.0322 (1.0748) acc 71.8750 (73.5357) lr 3.1417e-05 eta 0:34:29 +epoch [48/50] batch [180/500] time 1.572 (1.562) data 0.000 (0.004) loss 0.8149 (1.0767) acc 87.5000 (73.6111) lr 3.1417e-05 eta 0:34:22 +epoch [48/50] batch [185/500] time 1.576 (1.563) data 0.000 (0.004) loss 1.1006 (1.0769) acc 71.8750 (73.6824) lr 3.1417e-05 eta 0:34:15 +epoch [48/50] batch [190/500] time 1.568 (1.563) data 0.001 (0.004) loss 0.8501 (1.0747) acc 78.1250 (73.7829) lr 3.1417e-05 eta 0:34:08 +epoch [48/50] batch [195/500] time 1.557 (1.563) data 0.000 (0.004) loss 0.5889 (1.0700) acc 81.2500 (73.8942) lr 3.1417e-05 eta 0:34:00 +epoch [48/50] batch [200/500] time 1.560 (1.564) data 0.001 (0.004) loss 1.2676 (1.0677) acc 68.7500 (73.9375) lr 3.1417e-05 eta 0:33:52 +epoch [48/50] batch [205/500] time 1.567 (1.563) data 0.000 (0.004) loss 1.2461 (1.0686) acc 68.7500 (73.8872) lr 3.1417e-05 eta 0:33:44 +epoch [48/50] batch [210/500] time 1.545 (1.563) data 0.000 (0.004) loss 0.9170 (1.0689) acc 81.2500 (73.9583) lr 3.1417e-05 eta 0:33:36 +epoch [48/50] batch [215/500] time 1.564 (1.563) data 0.000 (0.004) loss 1.2793 (1.0710) acc 71.8750 (73.9099) lr 3.1417e-05 eta 0:33:28 +epoch [48/50] batch [220/500] time 1.543 (1.562) data 0.000 (0.004) loss 0.5332 (1.0685) acc 90.6250 (73.9489) lr 3.1417e-05 eta 0:33:19 +epoch [48/50] batch [225/500] time 1.567 (1.562) data 0.001 (0.004) loss 0.9863 (1.0646) acc 78.1250 (74.0556) lr 3.1417e-05 eta 0:33:12 +epoch [48/50] batch [230/500] time 1.548 (1.563) data 0.000 (0.004) loss 1.0850 (1.0603) acc 71.8750 (74.1168) lr 3.1417e-05 eta 0:33:04 +epoch [48/50] batch [235/500] time 1.568 (1.563) data 0.000 (0.003) loss 2.2520 (1.0630) acc 56.2500 (74.0957) lr 3.1417e-05 eta 0:32:57 +epoch [48/50] batch [240/500] time 1.549 (1.562) data 0.000 (0.003) loss 0.8628 (1.0615) acc 81.2500 (74.1667) lr 3.1417e-05 eta 0:32:48 +epoch [48/50] batch [245/500] time 1.558 (1.562) data 0.000 (0.003) loss 0.4929 (1.0636) acc 90.6250 (74.1454) lr 3.1417e-05 eta 0:32:40 +epoch [48/50] batch [250/500] time 1.578 (1.562) data 0.000 (0.003) loss 1.1689 (1.0625) acc 71.8750 (74.1250) lr 3.1417e-05 eta 0:32:32 +epoch [48/50] batch [255/500] time 1.558 (1.562) data 0.000 (0.003) loss 0.7124 (1.0607) acc 81.2500 (74.1054) lr 3.1417e-05 eta 0:32:24 +epoch [48/50] batch [260/500] time 1.545 (1.562) data 0.000 (0.003) loss 1.2939 (1.0591) acc 68.7500 (74.0986) lr 3.1417e-05 eta 0:32:16 +epoch [48/50] batch [265/500] time 1.545 (1.562) data 0.000 (0.003) loss 1.2744 (1.0631) acc 68.7500 (74.0212) lr 3.1417e-05 eta 0:32:08 +epoch [48/50] batch [270/500] time 1.573 (1.562) data 0.001 (0.003) loss 1.1445 (1.0633) acc 71.8750 (74.0162) lr 3.1417e-05 eta 0:32:00 +epoch [48/50] batch [275/500] time 1.574 (1.562) data 0.000 (0.003) loss 1.0869 (1.0607) acc 75.0000 (74.1023) lr 3.1417e-05 eta 0:31:53 +epoch [48/50] batch [280/500] time 1.540 (1.562) data 0.000 (0.003) loss 0.7617 (1.0606) acc 75.0000 (74.0513) lr 3.1417e-05 eta 0:31:45 +epoch [48/50] batch [285/500] time 1.552 (1.562) data 0.000 (0.003) loss 0.7529 (1.0593) acc 65.6250 (74.0351) lr 3.1417e-05 eta 0:31:37 +epoch [48/50] batch [290/500] time 1.568 (1.562) data 0.001 (0.003) loss 1.4365 (1.0590) acc 65.6250 (74.0302) lr 3.1417e-05 eta 0:31:29 +epoch [48/50] batch [295/500] time 1.537 (1.562) data 0.000 (0.003) loss 1.3467 (1.0578) acc 56.2500 (73.9619) lr 3.1417e-05 eta 0:31:22 +epoch [48/50] batch [300/500] time 1.564 (1.562) data 0.000 (0.003) loss 1.6162 (1.0588) acc 68.7500 (73.9583) lr 3.1417e-05 eta 0:31:14 +epoch [48/50] batch [305/500] time 1.540 (1.562) data 0.001 (0.003) loss 0.7583 (1.0550) acc 75.0000 (73.9754) lr 3.1417e-05 eta 0:31:06 +epoch [48/50] batch [310/500] time 1.568 (1.562) data 0.000 (0.003) loss 1.0166 (1.0553) acc 65.6250 (73.9113) lr 3.1417e-05 eta 0:30:58 +epoch [48/50] batch [315/500] time 1.569 (1.562) data 0.000 (0.003) loss 0.7715 (1.0542) acc 84.3750 (73.9187) lr 3.1417e-05 eta 0:30:50 +epoch [48/50] batch [320/500] time 1.530 (1.561) data 0.000 (0.003) loss 0.7700 (1.0553) acc 71.8750 (73.8770) lr 3.1417e-05 eta 0:30:42 +epoch [48/50] batch [325/500] time 1.567 (1.562) data 0.000 (0.003) loss 1.1416 (1.0552) acc 65.6250 (73.8558) lr 3.1417e-05 eta 0:30:34 +epoch [48/50] batch [330/500] time 1.568 (1.562) data 0.000 (0.003) loss 1.7715 (1.0549) acc 62.5000 (73.9110) lr 3.1417e-05 eta 0:30:27 +epoch [48/50] batch [335/500] time 1.539 (1.561) data 0.000 (0.003) loss 0.5146 (1.0514) acc 87.5000 (73.9459) lr 3.1417e-05 eta 0:30:18 +epoch [48/50] batch [340/500] time 1.553 (1.561) data 0.000 (0.003) loss 1.7432 (1.0530) acc 62.5000 (73.9246) lr 3.1417e-05 eta 0:30:10 +epoch [48/50] batch [345/500] time 1.555 (1.561) data 0.000 (0.002) loss 1.4922 (1.0529) acc 78.1250 (73.9493) lr 3.1417e-05 eta 0:30:02 +epoch [48/50] batch [350/500] time 1.577 (1.561) data 0.000 (0.002) loss 0.6909 (1.0514) acc 81.2500 (73.9554) lr 3.1417e-05 eta 0:29:55 +epoch [48/50] batch [355/500] time 1.556 (1.561) data 0.000 (0.002) loss 0.8335 (1.0503) acc 78.1250 (73.9701) lr 3.1417e-05 eta 0:29:47 +epoch [48/50] batch [360/500] time 1.526 (1.561) data 0.000 (0.002) loss 1.2627 (1.0506) acc 68.7500 (73.9670) lr 3.1417e-05 eta 0:29:39 +epoch [48/50] batch [365/500] time 1.563 (1.561) data 0.000 (0.002) loss 0.7983 (1.0503) acc 75.0000 (73.9812) lr 3.1417e-05 eta 0:29:31 +epoch [48/50] batch [370/500] time 1.546 (1.561) data 0.000 (0.002) loss 0.7339 (1.0501) acc 87.5000 (74.0118) lr 3.1417e-05 eta 0:29:23 +epoch [48/50] batch [375/500] time 1.563 (1.561) data 0.001 (0.002) loss 0.9204 (1.0526) acc 68.7500 (73.9833) lr 3.1417e-05 eta 0:29:15 +epoch [48/50] batch [380/500] time 1.552 (1.561) data 0.000 (0.002) loss 0.9771 (1.0509) acc 68.7500 (73.9720) lr 3.1417e-05 eta 0:29:08 +epoch [48/50] batch [385/500] time 1.570 (1.561) data 0.000 (0.002) loss 1.3916 (1.0541) acc 71.8750 (73.8555) lr 3.1417e-05 eta 0:29:00 +epoch [48/50] batch [390/500] time 1.550 (1.561) data 0.000 (0.002) loss 0.8984 (1.0524) acc 75.0000 (73.8702) lr 3.1417e-05 eta 0:28:52 +epoch [48/50] batch [395/500] time 1.554 (1.561) data 0.000 (0.002) loss 1.2578 (1.0534) acc 71.8750 (73.8528) lr 3.1417e-05 eta 0:28:44 +epoch [48/50] batch [400/500] time 1.582 (1.561) data 0.000 (0.002) loss 0.6709 (1.0508) acc 75.0000 (73.8516) lr 3.1417e-05 eta 0:28:37 +epoch [48/50] batch [405/500] time 1.571 (1.561) data 0.000 (0.002) loss 0.9565 (1.0501) acc 81.2500 (73.9352) lr 3.1417e-05 eta 0:28:29 +epoch [48/50] batch [410/500] time 1.569 (1.561) data 0.001 (0.002) loss 1.0938 (1.0478) acc 71.8750 (73.9710) lr 3.1417e-05 eta 0:28:21 +epoch [48/50] batch [415/500] time 1.650 (1.561) data 0.000 (0.002) loss 0.8398 (1.0461) acc 78.1250 (74.0136) lr 3.1417e-05 eta 0:28:14 +epoch [48/50] batch [420/500] time 1.566 (1.561) data 0.000 (0.002) loss 1.1621 (1.0452) acc 71.8750 (74.0327) lr 3.1417e-05 eta 0:28:06 +epoch [48/50] batch [425/500] time 1.572 (1.562) data 0.000 (0.002) loss 2.2266 (1.0497) acc 53.1250 (73.9118) lr 3.1417e-05 eta 0:27:58 +epoch [48/50] batch [430/500] time 1.558 (1.562) data 0.000 (0.002) loss 0.6973 (1.0490) acc 87.5000 (73.9898) lr 3.1417e-05 eta 0:27:50 +epoch [48/50] batch [435/500] time 1.564 (1.562) data 0.000 (0.002) loss 0.8799 (1.0479) acc 75.0000 (74.0086) lr 3.1417e-05 eta 0:27:43 +epoch [48/50] batch [440/500] time 1.538 (1.562) data 0.000 (0.002) loss 0.7324 (1.0473) acc 81.2500 (74.0341) lr 3.1417e-05 eta 0:27:35 +epoch [48/50] batch [445/500] time 1.557 (1.562) data 0.000 (0.002) loss 1.2256 (1.0474) acc 65.6250 (74.0449) lr 3.1417e-05 eta 0:27:27 +epoch [48/50] batch [450/500] time 1.556 (1.562) data 0.000 (0.002) loss 0.9419 (1.0470) acc 71.8750 (74.0347) lr 3.1417e-05 eta 0:27:19 +epoch [48/50] batch [455/500] time 1.556 (1.561) data 0.000 (0.002) loss 0.9688 (1.0462) acc 71.8750 (74.0247) lr 3.1417e-05 eta 0:27:11 +epoch [48/50] batch [460/500] time 1.552 (1.562) data 0.000 (0.002) loss 0.7812 (1.0454) acc 78.1250 (74.0285) lr 3.1417e-05 eta 0:27:04 +epoch [48/50] batch [465/500] time 1.586 (1.562) data 0.000 (0.002) loss 0.9917 (1.0464) acc 75.0000 (74.0457) lr 3.1417e-05 eta 0:26:56 +epoch [48/50] batch [470/500] time 1.568 (1.562) data 0.000 (0.002) loss 1.4873 (1.0485) acc 62.5000 (74.0027) lr 3.1417e-05 eta 0:26:48 +epoch [48/50] batch [475/500] time 1.568 (1.562) data 0.001 (0.002) loss 1.5146 (1.0478) acc 59.3750 (74.0066) lr 3.1417e-05 eta 0:26:40 +epoch [48/50] batch [480/500] time 1.569 (1.562) data 0.001 (0.002) loss 1.1689 (1.0453) acc 68.7500 (74.0430) lr 3.1417e-05 eta 0:26:33 +epoch [48/50] batch [485/500] time 1.545 (1.562) data 0.001 (0.002) loss 1.5010 (1.0455) acc 71.8750 (74.0464) lr 3.1417e-05 eta 0:26:25 +epoch [48/50] batch [490/500] time 1.556 (1.562) data 0.000 (0.002) loss 1.0195 (1.0455) acc 81.2500 (74.0625) lr 3.1417e-05 eta 0:26:17 +epoch [48/50] batch [495/500] time 1.543 (1.562) data 0.000 (0.002) loss 1.1318 (1.0480) acc 71.8750 (73.9836) lr 3.1417e-05 eta 0:26:09 +epoch [48/50] batch [500/500] time 1.548 (1.562) data 0.000 (0.002) loss 1.0195 (1.0471) acc 71.8750 (73.9875) lr 1.7713e-05 eta 0:26:01 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,031 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.6% +epoch [49/50] batch [5/500] time 1.545 (1.663) data 0.000 (0.168) loss 0.4819 (1.1761) acc 93.7500 (75.0000) lr 1.7713e-05 eta 0:27:34 +epoch [49/50] batch [10/500] time 1.542 (1.606) data 0.001 (0.085) loss 0.7598 (1.0524) acc 81.2500 (76.5625) lr 1.7713e-05 eta 0:26:29 +epoch [49/50] batch [15/500] time 1.569 (1.585) data 0.001 (0.057) loss 1.3066 (1.0935) acc 68.7500 (75.0000) lr 1.7713e-05 eta 0:26:01 +epoch [49/50] batch [20/500] time 1.560 (1.579) data 0.000 (0.043) loss 1.4961 (1.1596) acc 68.7500 (72.6562) lr 1.7713e-05 eta 0:25:47 +epoch [49/50] batch [25/500] time 1.571 (1.581) data 0.000 (0.034) loss 0.8633 (1.1758) acc 71.8750 (71.6250) lr 1.7713e-05 eta 0:25:41 +epoch [49/50] batch [30/500] time 1.545 (1.576) data 0.000 (0.028) loss 1.4980 (1.1558) acc 68.7500 (71.7708) lr 1.7713e-05 eta 0:25:28 +epoch [49/50] batch [35/500] time 1.533 (1.571) data 0.000 (0.024) loss 1.0742 (1.1338) acc 75.0000 (71.9643) lr 1.7713e-05 eta 0:25:16 +epoch [49/50] batch [40/500] time 1.547 (1.569) data 0.001 (0.022) loss 1.4629 (1.1532) acc 65.6250 (71.4844) lr 1.7713e-05 eta 0:25:05 +epoch [49/50] batch [45/500] time 1.554 (1.567) data 0.000 (0.019) loss 0.7026 (1.1334) acc 84.3750 (71.5972) lr 1.7713e-05 eta 0:24:56 +epoch [49/50] batch [50/500] time 1.546 (1.567) data 0.000 (0.017) loss 1.1035 (1.1040) acc 78.1250 (72.3750) lr 1.7713e-05 eta 0:24:48 +epoch [49/50] batch [55/500] time 1.572 (1.566) data 0.000 (0.016) loss 1.4004 (1.0991) acc 59.3750 (72.3864) lr 1.7713e-05 eta 0:24:40 +epoch [49/50] batch [60/500] time 1.577 (1.566) data 0.000 (0.014) loss 0.7686 (1.0859) acc 75.0000 (72.7604) lr 1.7713e-05 eta 0:24:32 +epoch [49/50] batch [65/500] time 1.553 (1.566) data 0.001 (0.013) loss 0.9966 (1.0773) acc 81.2500 (73.2212) lr 1.7713e-05 eta 0:24:24 +epoch [49/50] batch [70/500] time 1.557 (1.565) data 0.001 (0.012) loss 0.7637 (1.0769) acc 81.2500 (73.3482) lr 1.7713e-05 eta 0:24:15 +epoch [49/50] batch [75/500] time 1.557 (1.565) data 0.001 (0.012) loss 0.9321 (1.0704) acc 71.8750 (73.3333) lr 1.7713e-05 eta 0:24:07 +epoch [49/50] batch [80/500] time 1.553 (1.565) data 0.001 (0.011) loss 1.3545 (1.0726) acc 68.7500 (73.1641) lr 1.7713e-05 eta 0:23:59 +epoch [49/50] batch [85/500] time 1.550 (1.564) data 0.000 (0.010) loss 0.7905 (1.0729) acc 81.2500 (73.0882) lr 1.7713e-05 eta 0:23:51 +epoch [49/50] batch [90/500] time 1.562 (1.563) data 0.000 (0.010) loss 1.0254 (1.0621) acc 81.2500 (73.3681) lr 1.7713e-05 eta 0:23:42 +epoch [49/50] batch [95/500] time 1.559 (1.563) data 0.000 (0.009) loss 1.4336 (1.0650) acc 68.7500 (73.1250) lr 1.7713e-05 eta 0:23:34 +epoch [49/50] batch [100/500] time 1.568 (1.563) data 0.001 (0.009) loss 0.9600 (1.0654) acc 78.1250 (73.1250) lr 1.7713e-05 eta 0:23:26 +epoch [49/50] batch [105/500] time 1.549 (1.563) data 0.001 (0.008) loss 1.2061 (1.0607) acc 75.0000 (73.2143) lr 1.7713e-05 eta 0:23:18 +epoch [49/50] batch [110/500] time 1.556 (1.562) data 0.000 (0.008) loss 1.2559 (1.0591) acc 65.6250 (73.2670) lr 1.7713e-05 eta 0:23:10 +epoch [49/50] batch [115/500] time 1.573 (1.562) data 0.000 (0.008) loss 0.7300 (1.0565) acc 81.2500 (73.3152) lr 1.7713e-05 eta 0:23:02 +epoch [49/50] batch [120/500] time 1.635 (1.563) data 0.001 (0.007) loss 0.5713 (1.0629) acc 87.5000 (73.2292) lr 1.7713e-05 eta 0:22:55 +epoch [49/50] batch [125/500] time 1.592 (1.563) data 0.001 (0.007) loss 0.7227 (1.0572) acc 90.6250 (73.4250) lr 1.7713e-05 eta 0:22:47 +epoch [49/50] batch [130/500] time 1.559 (1.562) data 0.000 (0.007) loss 1.2949 (1.0554) acc 68.7500 (73.5096) lr 1.7713e-05 eta 0:22:39 +epoch [49/50] batch [135/500] time 1.546 (1.563) data 0.000 (0.007) loss 1.2334 (1.0562) acc 71.8750 (73.4491) lr 1.7713e-05 eta 0:22:31 +epoch [49/50] batch [140/500] time 1.551 (1.563) data 0.000 (0.006) loss 0.6538 (1.0453) acc 81.2500 (73.7500) lr 1.7713e-05 eta 0:22:24 +epoch [49/50] batch [145/500] time 1.561 (1.563) data 0.000 (0.006) loss 1.6602 (1.0453) acc 62.5000 (73.7284) lr 1.7713e-05 eta 0:22:16 +epoch [49/50] batch [150/500] time 1.537 (1.563) data 0.000 (0.006) loss 0.8169 (1.0514) acc 84.3750 (73.6042) lr 1.7713e-05 eta 0:22:08 +epoch [49/50] batch [155/500] time 1.576 (1.563) data 0.000 (0.006) loss 0.6353 (1.0425) acc 87.5000 (73.7500) lr 1.7713e-05 eta 0:22:00 +epoch [49/50] batch [160/500] time 1.553 (1.563) data 0.000 (0.006) loss 1.0293 (1.0414) acc 71.8750 (73.6719) lr 1.7713e-05 eta 0:21:52 +epoch [49/50] batch [165/500] time 1.541 (1.563) data 0.000 (0.006) loss 0.7354 (1.0448) acc 78.1250 (73.5795) lr 1.7713e-05 eta 0:21:45 +epoch [49/50] batch [170/500] time 1.571 (1.564) data 0.000 (0.005) loss 1.0654 (1.0416) acc 78.1250 (73.6949) lr 1.7713e-05 eta 0:21:37 +epoch [49/50] batch [175/500] time 1.539 (1.563) data 0.000 (0.005) loss 1.0342 (1.0396) acc 71.8750 (73.5714) lr 1.7713e-05 eta 0:21:29 +epoch [49/50] batch [180/500] time 1.570 (1.563) data 0.000 (0.005) loss 1.6963 (1.0355) acc 65.6250 (73.7153) lr 1.7713e-05 eta 0:21:21 +epoch [49/50] batch [185/500] time 1.573 (1.563) data 0.000 (0.005) loss 1.7930 (1.0437) acc 62.5000 (73.5135) lr 1.7713e-05 eta 0:21:13 +epoch [49/50] batch [190/500] time 1.551 (1.563) data 0.000 (0.005) loss 0.6748 (1.0364) acc 84.3750 (73.6184) lr 1.7713e-05 eta 0:21:05 +epoch [49/50] batch [195/500] time 1.561 (1.563) data 0.000 (0.005) loss 1.0127 (1.0451) acc 75.0000 (73.4936) lr 1.7713e-05 eta 0:20:58 +epoch [49/50] batch [200/500] time 1.559 (1.563) data 0.000 (0.005) loss 0.8911 (1.0401) acc 78.1250 (73.6094) lr 1.7713e-05 eta 0:20:50 +epoch [49/50] batch [205/500] time 1.565 (1.563) data 0.000 (0.005) loss 1.0488 (1.0399) acc 71.8750 (73.6433) lr 1.7713e-05 eta 0:20:42 +epoch [49/50] batch [210/500] time 1.565 (1.563) data 0.000 (0.004) loss 0.7842 (1.0359) acc 75.0000 (73.6756) lr 1.7713e-05 eta 0:20:34 +epoch [49/50] batch [215/500] time 1.553 (1.563) data 0.000 (0.004) loss 0.9653 (1.0372) acc 78.1250 (73.6337) lr 1.7713e-05 eta 0:20:27 +epoch [49/50] batch [220/500] time 1.565 (1.563) data 0.000 (0.004) loss 1.2412 (1.0314) acc 68.7500 (73.7358) lr 1.7713e-05 eta 0:20:19 +epoch [49/50] batch [225/500] time 1.563 (1.564) data 0.000 (0.004) loss 1.0195 (1.0265) acc 78.1250 (73.8611) lr 1.7713e-05 eta 0:20:11 +epoch [49/50] batch [230/500] time 1.579 (1.564) data 0.000 (0.004) loss 1.3242 (1.0316) acc 68.7500 (73.8043) lr 1.7713e-05 eta 0:20:04 +epoch [49/50] batch [235/500] time 1.559 (1.564) data 0.001 (0.004) loss 0.9668 (1.0307) acc 84.3750 (73.8298) lr 1.7713e-05 eta 0:19:56 +epoch [49/50] batch [240/500] time 1.557 (1.564) data 0.001 (0.004) loss 1.0186 (1.0278) acc 75.0000 (73.8802) lr 1.7713e-05 eta 0:19:48 +epoch [49/50] batch [245/500] time 1.559 (1.564) data 0.000 (0.004) loss 0.9395 (1.0281) acc 68.7500 (73.7883) lr 1.7713e-05 eta 0:19:40 +epoch [49/50] batch [250/500] time 1.574 (1.564) data 0.000 (0.004) loss 1.6133 (1.0325) acc 68.7500 (73.7625) lr 1.7713e-05 eta 0:19:32 +epoch [49/50] batch [255/500] time 1.572 (1.564) data 0.000 (0.004) loss 1.1621 (1.0358) acc 62.5000 (73.6887) lr 1.7713e-05 eta 0:19:25 +epoch [49/50] batch [260/500] time 1.594 (1.564) data 0.000 (0.004) loss 1.6807 (1.0357) acc 59.3750 (73.7139) lr 1.7713e-05 eta 0:19:17 +epoch [49/50] batch [265/500] time 1.543 (1.564) data 0.001 (0.004) loss 0.8193 (1.0360) acc 78.1250 (73.6321) lr 1.7713e-05 eta 0:19:09 +epoch [49/50] batch [270/500] time 1.555 (1.564) data 0.000 (0.004) loss 1.1035 (1.0329) acc 68.7500 (73.7153) lr 1.7713e-05 eta 0:19:01 +epoch [49/50] batch [275/500] time 1.581 (1.564) data 0.000 (0.003) loss 0.6528 (1.0275) acc 84.3750 (73.8409) lr 1.7713e-05 eta 0:18:53 +epoch [49/50] batch [280/500] time 1.539 (1.564) data 0.000 (0.003) loss 0.9067 (1.0248) acc 78.1250 (73.9174) lr 1.7713e-05 eta 0:18:46 +epoch [49/50] batch [285/500] time 1.553 (1.564) data 0.000 (0.003) loss 1.1846 (1.0289) acc 71.8750 (73.8377) lr 1.7713e-05 eta 0:18:38 +epoch [49/50] batch [290/500] time 1.547 (1.564) data 0.000 (0.003) loss 1.1826 (1.0299) acc 81.2500 (73.8685) lr 1.7713e-05 eta 0:18:30 +epoch [49/50] batch [295/500] time 1.573 (1.564) data 0.000 (0.003) loss 0.7100 (1.0306) acc 87.5000 (73.8665) lr 1.7713e-05 eta 0:18:22 +epoch [49/50] batch [300/500] time 1.541 (1.564) data 0.000 (0.003) loss 1.2402 (1.0289) acc 71.8750 (73.9167) lr 1.7713e-05 eta 0:18:14 +epoch [49/50] batch [305/500] time 1.585 (1.564) data 0.000 (0.003) loss 1.0732 (1.0287) acc 71.8750 (73.9037) lr 1.7713e-05 eta 0:18:06 +epoch [49/50] batch [310/500] time 1.565 (1.564) data 0.000 (0.003) loss 1.2119 (1.0292) acc 62.5000 (73.8306) lr 1.7713e-05 eta 0:17:59 +epoch [49/50] batch [315/500] time 1.550 (1.564) data 0.000 (0.003) loss 1.1934 (1.0308) acc 75.0000 (73.8393) lr 1.7713e-05 eta 0:17:51 +epoch [49/50] batch [320/500] time 1.546 (1.564) data 0.000 (0.003) loss 1.6826 (1.0321) acc 53.1250 (73.7988) lr 1.7713e-05 eta 0:17:43 +epoch [49/50] batch [325/500] time 1.560 (1.564) data 0.000 (0.003) loss 1.1182 (1.0301) acc 75.0000 (73.8462) lr 1.7713e-05 eta 0:17:35 +epoch [49/50] batch [330/500] time 1.565 (1.564) data 0.000 (0.003) loss 1.1377 (1.0304) acc 68.7500 (73.8447) lr 1.7713e-05 eta 0:17:27 +epoch [49/50] batch [335/500] time 1.556 (1.564) data 0.000 (0.003) loss 0.8667 (1.0291) acc 81.2500 (73.8526) lr 1.7713e-05 eta 0:17:19 +epoch [49/50] batch [340/500] time 1.553 (1.564) data 0.000 (0.003) loss 1.0137 (1.0293) acc 75.0000 (73.8327) lr 1.7713e-05 eta 0:17:12 +epoch [49/50] batch [345/500] time 1.572 (1.564) data 0.000 (0.003) loss 0.6060 (1.0271) acc 84.3750 (73.8949) lr 1.7713e-05 eta 0:17:04 +epoch [49/50] batch [350/500] time 1.555 (1.564) data 0.000 (0.003) loss 1.0400 (1.0247) acc 65.6250 (73.9375) lr 1.7713e-05 eta 0:16:56 +epoch [49/50] batch [355/500] time 1.548 (1.564) data 0.000 (0.003) loss 1.0381 (1.0249) acc 68.7500 (73.9613) lr 1.7713e-05 eta 0:16:48 +epoch [49/50] batch [360/500] time 1.569 (1.564) data 0.000 (0.003) loss 1.0986 (1.0242) acc 68.7500 (73.9583) lr 1.7713e-05 eta 0:16:40 +epoch [49/50] batch [365/500] time 1.572 (1.564) data 0.000 (0.003) loss 1.0137 (1.0244) acc 78.1250 (73.9640) lr 1.7713e-05 eta 0:16:32 +epoch [49/50] batch [370/500] time 1.572 (1.563) data 0.000 (0.003) loss 1.0459 (1.0225) acc 68.7500 (73.9696) lr 1.7713e-05 eta 0:16:24 +epoch [49/50] batch [375/500] time 1.548 (1.563) data 0.000 (0.003) loss 1.2012 (1.0237) acc 71.8750 (73.9583) lr 1.7713e-05 eta 0:16:17 +epoch [49/50] batch [380/500] time 1.550 (1.563) data 0.000 (0.003) loss 0.8125 (1.0250) acc 75.0000 (73.8405) lr 1.7713e-05 eta 0:16:09 +epoch [49/50] batch [385/500] time 1.576 (1.563) data 0.000 (0.003) loss 1.6299 (1.0259) acc 50.0000 (73.7662) lr 1.7713e-05 eta 0:16:01 +epoch [49/50] batch [390/500] time 1.558 (1.563) data 0.000 (0.003) loss 0.8726 (1.0241) acc 84.3750 (73.8301) lr 1.7713e-05 eta 0:15:53 +epoch [49/50] batch [395/500] time 1.541 (1.563) data 0.000 (0.003) loss 1.2520 (1.0242) acc 71.8750 (73.8370) lr 1.7713e-05 eta 0:15:45 +epoch [49/50] batch [400/500] time 1.571 (1.563) data 0.000 (0.003) loss 1.1699 (1.0238) acc 65.6250 (73.8281) lr 1.7713e-05 eta 0:15:38 +epoch [49/50] batch [405/500] time 1.563 (1.563) data 0.000 (0.002) loss 1.2109 (1.0240) acc 65.6250 (73.8194) lr 1.7713e-05 eta 0:15:30 +epoch [49/50] batch [410/500] time 1.567 (1.564) data 0.000 (0.002) loss 0.7573 (1.0225) acc 68.7500 (73.8262) lr 1.7713e-05 eta 0:15:22 +epoch [49/50] batch [415/500] time 1.552 (1.564) data 0.001 (0.002) loss 1.0430 (1.0231) acc 81.2500 (73.8554) lr 1.7713e-05 eta 0:15:14 +epoch [49/50] batch [420/500] time 1.543 (1.563) data 0.000 (0.002) loss 0.5078 (1.0212) acc 78.1250 (73.8542) lr 1.7713e-05 eta 0:15:06 +epoch [49/50] batch [425/500] time 1.534 (1.563) data 0.000 (0.002) loss 0.9487 (1.0230) acc 75.0000 (73.8382) lr 1.7713e-05 eta 0:14:58 +epoch [49/50] batch [430/500] time 1.547 (1.563) data 0.000 (0.002) loss 0.6167 (1.0235) acc 81.2500 (73.7863) lr 1.7713e-05 eta 0:14:51 +epoch [49/50] batch [435/500] time 1.581 (1.563) data 0.000 (0.002) loss 0.6318 (1.0236) acc 84.3750 (73.8003) lr 1.7713e-05 eta 0:14:43 +epoch [49/50] batch [440/500] time 1.575 (1.563) data 0.000 (0.002) loss 0.6172 (1.0243) acc 81.2500 (73.7713) lr 1.7713e-05 eta 0:14:35 +epoch [49/50] batch [445/500] time 1.552 (1.563) data 0.001 (0.002) loss 0.4946 (1.0261) acc 84.3750 (73.7008) lr 1.7713e-05 eta 0:14:27 +epoch [49/50] batch [450/500] time 1.672 (1.563) data 0.000 (0.002) loss 1.2393 (1.0258) acc 68.7500 (73.7292) lr 1.7713e-05 eta 0:14:19 +epoch [49/50] batch [455/500] time 1.557 (1.563) data 0.000 (0.002) loss 1.5742 (1.0279) acc 65.6250 (73.7225) lr 1.7713e-05 eta 0:14:11 +epoch [49/50] batch [460/500] time 1.567 (1.563) data 0.000 (0.002) loss 0.5708 (1.0282) acc 87.5000 (73.7092) lr 1.7713e-05 eta 0:14:04 +epoch [49/50] batch [465/500] time 1.556 (1.563) data 0.000 (0.002) loss 1.5352 (1.0293) acc 56.2500 (73.6492) lr 1.7713e-05 eta 0:13:56 +epoch [49/50] batch [470/500] time 1.586 (1.563) data 0.000 (0.002) loss 0.9819 (1.0270) acc 71.8750 (73.7035) lr 1.7713e-05 eta 0:13:48 +epoch [49/50] batch [475/500] time 1.579 (1.563) data 0.000 (0.002) loss 1.4443 (1.0268) acc 65.6250 (73.7105) lr 1.7713e-05 eta 0:13:40 +epoch [49/50] batch [480/500] time 1.569 (1.564) data 0.000 (0.002) loss 0.7148 (1.0285) acc 84.3750 (73.6914) lr 1.7713e-05 eta 0:13:33 +epoch [49/50] batch [485/500] time 1.536 (1.563) data 0.001 (0.002) loss 1.0166 (1.0291) acc 75.0000 (73.7178) lr 1.7713e-05 eta 0:13:25 +epoch [49/50] batch [490/500] time 1.543 (1.563) data 0.000 (0.002) loss 0.5801 (1.0305) acc 75.0000 (73.6926) lr 1.7713e-05 eta 0:13:17 +epoch [49/50] batch [495/500] time 1.568 (1.563) data 0.000 (0.002) loss 1.0146 (1.0299) acc 71.8750 (73.6995) lr 1.7713e-05 eta 0:13:09 +epoch [49/50] batch [500/500] time 1.568 (1.563) data 0.000 (0.002) loss 1.2500 (1.0298) acc 71.8750 (73.7125) lr 7.8853e-06 eta 0:13:01 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,031 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.6% +epoch [50/50] batch [5/500] time 1.553 (1.675) data 0.000 (0.165) loss 1.2383 (1.0951) acc 68.7500 (72.5000) lr 7.8853e-06 eta 0:13:49 +epoch [50/50] batch [10/500] time 1.578 (1.613) data 0.000 (0.083) loss 1.2822 (1.0132) acc 62.5000 (72.5000) lr 7.8853e-06 eta 0:13:10 +epoch [50/50] batch [15/500] time 1.541 (1.590) data 0.000 (0.055) loss 0.8442 (1.0407) acc 75.0000 (71.6667) lr 7.8853e-06 eta 0:12:50 +epoch [50/50] batch [20/500] time 1.580 (1.585) data 0.000 (0.042) loss 1.6572 (1.0825) acc 62.5000 (72.1875) lr 7.8853e-06 eta 0:12:40 +epoch [50/50] batch [25/500] time 1.557 (1.581) data 0.001 (0.033) loss 0.8125 (1.0586) acc 81.2500 (73.3750) lr 7.8853e-06 eta 0:12:30 +epoch [50/50] batch [30/500] time 1.569 (1.576) data 0.000 (0.028) loss 0.8975 (1.0560) acc 65.6250 (73.4375) lr 7.8853e-06 eta 0:12:20 +epoch [50/50] batch [35/500] time 1.561 (1.575) data 0.000 (0.024) loss 1.0029 (1.0495) acc 78.1250 (73.6607) lr 7.8853e-06 eta 0:12:12 +epoch [50/50] batch [40/500] time 1.573 (1.577) data 0.000 (0.021) loss 0.7979 (1.0479) acc 81.2500 (73.8281) lr 7.8853e-06 eta 0:12:05 +epoch [50/50] batch [45/500] time 1.539 (1.575) data 0.000 (0.019) loss 1.0254 (1.0513) acc 78.1250 (73.8194) lr 7.8853e-06 eta 0:11:56 +epoch [50/50] batch [50/500] time 1.552 (1.574) data 0.000 (0.017) loss 0.5010 (1.0366) acc 87.5000 (74.4375) lr 7.8853e-06 eta 0:11:48 +epoch [50/50] batch [55/500] time 1.561 (1.571) data 0.001 (0.015) loss 1.1396 (1.0600) acc 71.8750 (74.0909) lr 7.8853e-06 eta 0:11:39 +epoch [50/50] batch [60/500] time 1.543 (1.570) data 0.000 (0.014) loss 0.6738 (1.0489) acc 81.2500 (74.2188) lr 7.8853e-06 eta 0:11:30 +epoch [50/50] batch [65/500] time 1.552 (1.568) data 0.000 (0.013) loss 0.8530 (1.0330) acc 84.3750 (74.4712) lr 7.8853e-06 eta 0:11:22 +epoch [50/50] batch [70/500] time 1.563 (1.568) data 0.000 (0.012) loss 0.6328 (1.0183) acc 87.5000 (74.7768) lr 7.8853e-06 eta 0:11:14 +epoch [50/50] batch [75/500] time 1.572 (1.567) data 0.000 (0.011) loss 0.9751 (1.0213) acc 75.0000 (74.4583) lr 7.8853e-06 eta 0:11:05 +epoch [50/50] batch [80/500] time 1.559 (1.567) data 0.001 (0.011) loss 1.5928 (1.0395) acc 65.6250 (74.0234) lr 7.8853e-06 eta 0:10:58 +epoch [50/50] batch [85/500] time 1.553 (1.567) data 0.001 (0.010) loss 0.4905 (1.0326) acc 84.3750 (74.0074) lr 7.8853e-06 eta 0:10:50 +epoch [50/50] batch [90/500] time 1.557 (1.567) data 0.000 (0.010) loss 1.0879 (1.0269) acc 65.6250 (74.2014) lr 7.8853e-06 eta 0:10:42 +epoch [50/50] batch [95/500] time 1.560 (1.567) data 0.000 (0.009) loss 1.1426 (1.0302) acc 68.7500 (73.9145) lr 7.8853e-06 eta 0:10:34 +epoch [50/50] batch [100/500] time 1.557 (1.567) data 0.000 (0.009) loss 0.9683 (1.0373) acc 78.1250 (73.9062) lr 7.8853e-06 eta 0:10:26 +epoch [50/50] batch [105/500] time 1.557 (1.567) data 0.000 (0.008) loss 0.9785 (1.0457) acc 75.0000 (73.6905) lr 7.8853e-06 eta 0:10:19 +epoch [50/50] batch [110/500] time 1.586 (1.567) data 0.000 (0.008) loss 0.6924 (1.0581) acc 78.1250 (73.2670) lr 7.8853e-06 eta 0:10:11 +epoch [50/50] batch [115/500] time 1.557 (1.567) data 0.000 (0.008) loss 1.7217 (1.0579) acc 56.2500 (73.1522) lr 7.8853e-06 eta 0:10:03 +epoch [50/50] batch [120/500] time 1.532 (1.566) data 0.000 (0.007) loss 1.3594 (1.0624) acc 68.7500 (72.9948) lr 7.8853e-06 eta 0:09:55 +epoch [50/50] batch [125/500] time 1.560 (1.566) data 0.001 (0.007) loss 1.1084 (1.0736) acc 78.1250 (72.8750) lr 7.8853e-06 eta 0:09:47 +epoch [50/50] batch [130/500] time 1.547 (1.565) data 0.000 (0.007) loss 0.6816 (1.0569) acc 78.1250 (73.2692) lr 7.8853e-06 eta 0:09:39 +epoch [50/50] batch [135/500] time 1.553 (1.565) data 0.000 (0.007) loss 0.5181 (1.0461) acc 84.3750 (73.5185) lr 7.8853e-06 eta 0:09:31 +epoch [50/50] batch [140/500] time 1.560 (1.565) data 0.000 (0.006) loss 1.2969 (1.0470) acc 75.0000 (73.4821) lr 7.8853e-06 eta 0:09:23 +epoch [50/50] batch [145/500] time 1.578 (1.564) data 0.000 (0.006) loss 0.7729 (1.0434) acc 68.7500 (73.3836) lr 7.8853e-06 eta 0:09:15 +epoch [50/50] batch [150/500] time 1.563 (1.564) data 0.001 (0.006) loss 0.7690 (1.0359) acc 71.8750 (73.3750) lr 7.8853e-06 eta 0:09:07 +epoch [50/50] batch [155/500] time 1.573 (1.564) data 0.000 (0.006) loss 1.3350 (1.0330) acc 68.7500 (73.5081) lr 7.8853e-06 eta 0:08:59 +epoch [50/50] batch [160/500] time 1.526 (1.564) data 0.000 (0.006) loss 1.1377 (1.0289) acc 71.8750 (73.6328) lr 7.8853e-06 eta 0:08:51 +epoch [50/50] batch [165/500] time 1.527 (1.564) data 0.001 (0.005) loss 1.3301 (1.0328) acc 65.6250 (73.5985) lr 7.8853e-06 eta 0:08:43 +epoch [50/50] batch [170/500] time 1.575 (1.563) data 0.000 (0.005) loss 2.2051 (1.0365) acc 62.5000 (73.5662) lr 7.8853e-06 eta 0:08:35 +epoch [50/50] batch [175/500] time 1.572 (1.563) data 0.000 (0.005) loss 0.9712 (1.0398) acc 81.2500 (73.5536) lr 7.8853e-06 eta 0:08:28 +epoch [50/50] batch [180/500] time 1.555 (1.564) data 0.000 (0.005) loss 0.6914 (1.0347) acc 87.5000 (73.6979) lr 7.8853e-06 eta 0:08:20 +epoch [50/50] batch [185/500] time 1.540 (1.564) data 0.000 (0.005) loss 1.0586 (1.0335) acc 78.1250 (73.7838) lr 7.8853e-06 eta 0:08:12 +epoch [50/50] batch [190/500] time 1.548 (1.563) data 0.000 (0.005) loss 1.7656 (1.0403) acc 62.5000 (73.6678) lr 7.8853e-06 eta 0:08:04 +epoch [50/50] batch [195/500] time 1.537 (1.563) data 0.000 (0.005) loss 0.8960 (1.0379) acc 71.8750 (73.7179) lr 7.8853e-06 eta 0:07:56 +epoch [50/50] batch [200/500] time 1.551 (1.563) data 0.000 (0.005) loss 0.9351 (1.0351) acc 78.1250 (73.8594) lr 7.8853e-06 eta 0:07:48 +epoch [50/50] batch [205/500] time 1.561 (1.562) data 0.000 (0.004) loss 1.0850 (1.0343) acc 71.8750 (73.8872) lr 7.8853e-06 eta 0:07:40 +epoch [50/50] batch [210/500] time 1.539 (1.562) data 0.000 (0.004) loss 1.3506 (1.0338) acc 65.6250 (73.8244) lr 7.8853e-06 eta 0:07:32 +epoch [50/50] batch [215/500] time 1.554 (1.562) data 0.000 (0.004) loss 0.7168 (1.0271) acc 78.1250 (73.9826) lr 7.8853e-06 eta 0:07:25 +epoch [50/50] batch [220/500] time 1.565 (1.562) data 0.000 (0.004) loss 0.9805 (1.0235) acc 75.0000 (73.9631) lr 7.8853e-06 eta 0:07:17 +epoch [50/50] batch [225/500] time 1.561 (1.562) data 0.000 (0.004) loss 0.5205 (1.0199) acc 81.2500 (74.0417) lr 7.8853e-06 eta 0:07:09 +epoch [50/50] batch [230/500] time 1.574 (1.562) data 0.000 (0.004) loss 0.5200 (1.0205) acc 84.3750 (74.0489) lr 7.8853e-06 eta 0:07:01 +epoch [50/50] batch [235/500] time 1.547 (1.562) data 0.000 (0.004) loss 0.9917 (1.0198) acc 78.1250 (74.0426) lr 7.8853e-06 eta 0:06:54 +epoch [50/50] batch [240/500] time 1.562 (1.563) data 0.000 (0.004) loss 0.7383 (1.0222) acc 81.2500 (73.9974) lr 7.8853e-06 eta 0:06:46 +epoch [50/50] batch [245/500] time 1.567 (1.563) data 0.000 (0.004) loss 0.6909 (1.0202) acc 81.2500 (74.0306) lr 7.8853e-06 eta 0:06:38 +epoch [50/50] batch [250/500] time 1.584 (1.563) data 0.001 (0.004) loss 0.8047 (1.0163) acc 81.2500 (74.0875) lr 7.8853e-06 eta 0:06:30 +epoch [50/50] batch [255/500] time 1.555 (1.563) data 0.000 (0.004) loss 1.7402 (1.0221) acc 65.6250 (73.9583) lr 7.8853e-06 eta 0:06:22 +epoch [50/50] batch [260/500] time 1.562 (1.563) data 0.000 (0.004) loss 1.6025 (1.0227) acc 65.6250 (73.8942) lr 7.8853e-06 eta 0:06:15 +epoch [50/50] batch [265/500] time 1.563 (1.563) data 0.000 (0.004) loss 0.9736 (1.0194) acc 75.0000 (73.9387) lr 7.8853e-06 eta 0:06:07 +epoch [50/50] batch [270/500] time 1.550 (1.563) data 0.000 (0.003) loss 0.6562 (1.0215) acc 87.5000 (73.9468) lr 7.8853e-06 eta 0:05:59 +epoch [50/50] batch [275/500] time 1.548 (1.563) data 0.000 (0.003) loss 0.7607 (1.0205) acc 78.1250 (73.9659) lr 7.8853e-06 eta 0:05:51 +epoch [50/50] batch [280/500] time 1.572 (1.563) data 0.000 (0.003) loss 0.9507 (1.0221) acc 81.2500 (73.9062) lr 7.8853e-06 eta 0:05:43 +epoch [50/50] batch [285/500] time 1.565 (1.563) data 0.000 (0.003) loss 1.0654 (1.0267) acc 62.5000 (73.7939) lr 7.8853e-06 eta 0:05:36 +epoch [50/50] batch [290/500] time 1.567 (1.563) data 0.001 (0.003) loss 0.5005 (1.0262) acc 90.6250 (73.8362) lr 7.8853e-06 eta 0:05:28 +epoch [50/50] batch [295/500] time 1.536 (1.563) data 0.000 (0.003) loss 1.4512 (1.0295) acc 81.2500 (73.8347) lr 7.8853e-06 eta 0:05:20 +epoch [50/50] batch [300/500] time 1.572 (1.563) data 0.000 (0.003) loss 0.9019 (1.0282) acc 71.8750 (73.8229) lr 7.8853e-06 eta 0:05:12 +epoch [50/50] batch [305/500] time 1.573 (1.563) data 0.000 (0.003) loss 1.4365 (1.0293) acc 71.8750 (73.9037) lr 7.8853e-06 eta 0:05:04 +epoch [50/50] batch [310/500] time 1.555 (1.563) data 0.000 (0.003) loss 0.9390 (1.0288) acc 71.8750 (73.9113) lr 7.8853e-06 eta 0:04:57 +epoch [50/50] batch [315/500] time 1.566 (1.564) data 0.000 (0.003) loss 1.3096 (1.0273) acc 65.6250 (73.9187) lr 7.8853e-06 eta 0:04:49 +epoch [50/50] batch [320/500] time 1.564 (1.563) data 0.000 (0.003) loss 1.5840 (1.0283) acc 71.8750 (73.9258) lr 7.8853e-06 eta 0:04:41 +epoch [50/50] batch [325/500] time 1.583 (1.564) data 0.000 (0.003) loss 1.1504 (1.0291) acc 75.0000 (73.9423) lr 7.8853e-06 eta 0:04:33 +epoch [50/50] batch [330/500] time 1.595 (1.564) data 0.000 (0.003) loss 0.6060 (1.0279) acc 81.2500 (73.9299) lr 7.8853e-06 eta 0:04:25 +epoch [50/50] batch [335/500] time 1.574 (1.564) data 0.000 (0.003) loss 1.0781 (1.0287) acc 71.8750 (73.9552) lr 7.8853e-06 eta 0:04:18 +epoch [50/50] batch [340/500] time 1.579 (1.564) data 0.000 (0.003) loss 0.3896 (1.0296) acc 81.2500 (73.9062) lr 7.8853e-06 eta 0:04:10 +epoch [50/50] batch [345/500] time 1.561 (1.564) data 0.000 (0.003) loss 1.4238 (1.0289) acc 71.8750 (73.9402) lr 7.8853e-06 eta 0:04:02 +epoch [50/50] batch [350/500] time 1.559 (1.564) data 0.000 (0.003) loss 0.8379 (1.0294) acc 81.2500 (73.9554) lr 7.8853e-06 eta 0:03:54 +epoch [50/50] batch [355/500] time 1.543 (1.564) data 0.000 (0.003) loss 1.5156 (1.0299) acc 68.7500 (74.0053) lr 7.8853e-06 eta 0:03:46 +epoch [50/50] batch [360/500] time 1.564 (1.564) data 0.000 (0.003) loss 1.1367 (1.0311) acc 65.6250 (73.9323) lr 7.8853e-06 eta 0:03:38 +epoch [50/50] batch [365/500] time 1.543 (1.564) data 0.001 (0.003) loss 0.7285 (1.0282) acc 78.1250 (73.9897) lr 7.8853e-06 eta 0:03:31 +epoch [50/50] batch [370/500] time 1.569 (1.564) data 0.000 (0.003) loss 0.9312 (1.0302) acc 75.0000 (73.9527) lr 7.8853e-06 eta 0:03:23 +epoch [50/50] batch [375/500] time 1.548 (1.564) data 0.000 (0.003) loss 0.4570 (1.0258) acc 90.6250 (74.0750) lr 7.8853e-06 eta 0:03:15 +epoch [50/50] batch [380/500] time 1.557 (1.564) data 0.000 (0.003) loss 0.5576 (1.0245) acc 84.3750 (74.1530) lr 7.8853e-06 eta 0:03:07 +epoch [50/50] batch [385/500] time 1.551 (1.564) data 0.000 (0.003) loss 0.9648 (1.0257) acc 71.8750 (74.1153) lr 7.8853e-06 eta 0:02:59 +epoch [50/50] batch [390/500] time 1.571 (1.564) data 0.000 (0.003) loss 0.9028 (1.0231) acc 75.0000 (74.1827) lr 7.8853e-06 eta 0:02:52 +epoch [50/50] batch [395/500] time 1.553 (1.564) data 0.000 (0.002) loss 1.7705 (1.0244) acc 65.6250 (74.1851) lr 7.8853e-06 eta 0:02:44 +epoch [50/50] batch [400/500] time 1.546 (1.564) data 0.000 (0.002) loss 1.2012 (1.0251) acc 62.5000 (74.0703) lr 7.8853e-06 eta 0:02:36 +epoch [50/50] batch [405/500] time 1.550 (1.563) data 0.000 (0.002) loss 1.3545 (1.0265) acc 65.6250 (74.0201) lr 7.8853e-06 eta 0:02:28 +epoch [50/50] batch [410/500] time 1.553 (1.563) data 0.000 (0.002) loss 0.6792 (1.0256) acc 84.3750 (74.0320) lr 7.8853e-06 eta 0:02:20 +epoch [50/50] batch [415/500] time 1.566 (1.563) data 0.000 (0.002) loss 1.4648 (1.0251) acc 62.5000 (74.0361) lr 7.8853e-06 eta 0:02:12 +epoch [50/50] batch [420/500] time 1.545 (1.563) data 0.000 (0.002) loss 0.9683 (1.0251) acc 78.1250 (74.0625) lr 7.8853e-06 eta 0:02:05 +epoch [50/50] batch [425/500] time 1.573 (1.563) data 0.000 (0.002) loss 1.4932 (1.0272) acc 75.0000 (74.0368) lr 7.8853e-06 eta 0:01:57 +epoch [50/50] batch [430/500] time 1.548 (1.563) data 0.000 (0.002) loss 0.8813 (1.0283) acc 78.1250 (74.0044) lr 7.8853e-06 eta 0:01:49 +epoch [50/50] batch [435/500] time 1.544 (1.563) data 0.000 (0.002) loss 0.7407 (1.0273) acc 75.0000 (74.0158) lr 7.8853e-06 eta 0:01:41 +epoch [50/50] batch [440/500] time 1.538 (1.563) data 0.000 (0.002) loss 1.3008 (1.0274) acc 62.5000 (73.9702) lr 7.8853e-06 eta 0:01:33 +epoch [50/50] batch [445/500] time 1.558 (1.563) data 0.000 (0.002) loss 0.4863 (1.0238) acc 84.3750 (74.0520) lr 7.8853e-06 eta 0:01:25 +epoch [50/50] batch [450/500] time 1.555 (1.562) data 0.000 (0.002) loss 1.1406 (1.0241) acc 65.6250 (74.0000) lr 7.8853e-06 eta 0:01:18 +epoch [50/50] batch [455/500] time 1.577 (1.563) data 0.000 (0.002) loss 0.9609 (1.0224) acc 81.2500 (74.0591) lr 7.8853e-06 eta 0:01:10 +epoch [50/50] batch [460/500] time 1.550 (1.563) data 0.000 (0.002) loss 1.4707 (1.0233) acc 71.8750 (74.0761) lr 7.8853e-06 eta 0:01:02 +epoch [50/50] batch [465/500] time 1.635 (1.563) data 0.000 (0.002) loss 0.9512 (1.0223) acc 75.0000 (74.0524) lr 7.8853e-06 eta 0:00:54 +epoch [50/50] batch [470/500] time 1.606 (1.563) data 0.000 (0.002) loss 0.4978 (1.0214) acc 78.1250 (74.0824) lr 7.8853e-06 eta 0:00:46 +epoch [50/50] batch [475/500] time 1.546 (1.563) data 0.001 (0.002) loss 0.6152 (1.0225) acc 81.2500 (74.0526) lr 7.8853e-06 eta 0:00:39 +epoch [50/50] batch [480/500] time 1.558 (1.563) data 0.000 (0.002) loss 1.6885 (1.0217) acc 59.3750 (74.0885) lr 7.8853e-06 eta 0:00:31 +epoch [50/50] batch [485/500] time 1.557 (1.563) data 0.001 (0.002) loss 0.7749 (1.0223) acc 75.0000 (74.0528) lr 7.8853e-06 eta 0:00:23 +epoch [50/50] batch [490/500] time 1.533 (1.562) data 0.000 (0.002) loss 1.2568 (1.0236) acc 71.8750 (74.0370) lr 7.8853e-06 eta 0:00:15 +epoch [50/50] batch [495/500] time 1.561 (1.562) data 0.001 (0.002) loss 0.9731 (1.0228) acc 78.1250 (74.0593) lr 7.8853e-06 eta 0:00:07 +epoch [50/50] batch [500/500] time 1.539 (1.562) data 0.000 (0.002) loss 1.6846 (1.0230) acc 65.6250 (74.0187) lr 1.9733e-06 eta 0:00:00 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,023 +* accuracy: 78.0% +* error: 22.0% +* macro_f1: 77.6% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-50 +Finish training +Deploy the model with the best val performance +Loading weights to prompt_learner from "output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar" (epoch = 38) +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 39,086 +* accuracy: 78.2% +* error: 21.8% +* macro_f1: 77.7% +Elapsed: 16:11:34 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/checkpoint b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/checkpoint new file mode 100644 index 00000000..a9d493d3 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/checkpoint @@ -0,0 +1 @@ +model.pth.tar-50 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar new file mode 100644 index 00000000..12f4da90 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-50 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-50 new file mode 100644 index 00000000..a1ddc6d0 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-50 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1699477394.ckb-gpu-v.mitre.org.3662742.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1699477394.ckb-gpu-v.mitre.org.3662742.0 new file mode 100644 index 00000000..2c58f866 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1699477394.ckb-gpu-v.mitre.org.3662742.0 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/log.txt new file mode 100644 index 00000000..1a121be0 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/log.txt @@ -0,0 +1,1822 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_l14_bestval_ep50.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '16'] +output_dir: output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 3 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 16 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-L/14 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 50 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0 +Clang version: Could not collect +CMake version: version 3.16.3 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-166-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: Tesla V100-PCIE-32GB +GPU 1: Tesla V100-PCIE-32GB + +Nvidia driver version: 470.223.02 +cuDNN version: /usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 46 bits physical, 48 bits virtual +CPU(s): 48 +On-line CPU(s) list: 0-47 +Thread(s) per core: 2 +Core(s) per socket: 12 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +CPU family: 6 +Model: 85 +Model name: Intel(R) Xeon(R) Silver 4116 CPU @ 2.10GHz +Stepping: 4 +CPU MHz: 800.113 +CPU max MHz: 3000.0000 +CPU min MHz: 800.0000 +BogoMIPS: 4200.00 +Virtualization: VT-x +L1d cache: 768 KiB +L1i cache: 768 KiB +L2 cache: 24 MiB +L3 cache: 33 MiB +NUMA node0 CPU(s): 0-11,24-35 +NUMA node1 CPU(s): 12-23,36-47 +Vulnerability Gather data sampling: Mitigation; Microcode +Vulnerability Itlb multihit: KVM: Mitigation: Split huge pages +Vulnerability L1tf: Mitigation; PTE Inversion; VMX conditional cache flushes, SMT vulnerable +Vulnerability Mds: Mitigation; Clear CPU buffers; SMT vulnerable +Vulnerability Meltdown: Mitigation; PTI +Vulnerability Mmio stale data: Mitigation; Clear CPU buffers; SMT vulnerable +Vulnerability Retbleed: Mitigation; IBRS +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; IBRS, IBPB conditional, STIBP conditional, RSB filling, PBRSB-eIBRS Not affected +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Mitigation; Clear CPU buffers; SMT vulnerable +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cdp_l3 invpcid_single pti intel_ppin ssbd mba ibrs ibpb stibp tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm mpx rdt_a avx512f avx512dq rdseed adx smap clflushopt clwb intel_pt avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts pku ospke md_clear flush_l1d arch_capabilities + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Loading preprocessed few-shot data from /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_16-seed_3.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 16,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-L/14) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/tensorboard) +epoch [1/50] batch [5/500] time 1.547 (2.691) data 0.000 (0.239) loss 2.3516 (2.9645) acc 50.0000 (42.5000) lr 1.0000e-05 eta 18:41:07 +epoch [1/50] batch [10/500] time 1.566 (2.126) data 0.001 (0.120) loss 2.1309 (2.6680) acc 59.3750 (47.8125) lr 1.0000e-05 eta 14:45:35 +epoch [1/50] batch [15/500] time 1.535 (1.935) data 0.000 (0.080) loss 1.8623 (2.4137) acc 46.8750 (50.0000) lr 1.0000e-05 eta 13:25:39 +epoch [1/50] batch [20/500] time 1.561 (1.841) data 0.000 (0.060) loss 2.0859 (2.3499) acc 46.8750 (50.0000) lr 1.0000e-05 eta 12:46:36 +epoch [1/50] batch [25/500] time 1.551 (1.784) data 0.000 (0.048) loss 1.4434 (2.2323) acc 71.8750 (52.0000) lr 1.0000e-05 eta 12:22:36 +epoch [1/50] batch [30/500] time 1.542 (1.746) data 0.000 (0.040) loss 1.3477 (2.1673) acc 65.6250 (53.1250) lr 1.0000e-05 eta 12:06:38 +epoch [1/50] batch [35/500] time 1.559 (1.720) data 0.001 (0.035) loss 1.2822 (2.1095) acc 68.7500 (53.8393) lr 1.0000e-05 eta 11:55:37 +epoch [1/50] batch [40/500] time 1.556 (1.700) data 0.000 (0.030) loss 2.2305 (2.0646) acc 50.0000 (54.5312) lr 1.0000e-05 eta 11:47:23 +epoch [1/50] batch [45/500] time 1.565 (1.685) data 0.000 (0.027) loss 1.4141 (1.9933) acc 53.1250 (55.7639) lr 1.0000e-05 eta 11:40:37 +epoch [1/50] batch [50/500] time 1.575 (1.673) data 0.000 (0.024) loss 1.7217 (1.9862) acc 56.2500 (55.5625) lr 1.0000e-05 eta 11:35:36 +epoch [1/50] batch [55/500] time 1.555 (1.664) data 0.000 (0.022) loss 2.0098 (1.9550) acc 53.1250 (56.0795) lr 1.0000e-05 eta 11:31:42 +epoch [1/50] batch [60/500] time 1.570 (1.655) data 0.001 (0.020) loss 1.7812 (1.9369) acc 62.5000 (56.5104) lr 1.0000e-05 eta 11:28:01 +epoch [1/50] batch [65/500] time 1.588 (1.649) data 0.001 (0.019) loss 0.9331 (1.8873) acc 71.8750 (57.3558) lr 1.0000e-05 eta 11:25:18 +epoch [1/50] batch [70/500] time 1.556 (1.642) data 0.000 (0.017) loss 1.4219 (1.8519) acc 65.6250 (57.8571) lr 1.0000e-05 eta 11:22:18 +epoch [1/50] batch [75/500] time 1.559 (1.637) data 0.000 (0.016) loss 1.8936 (1.8255) acc 59.3750 (58.5833) lr 1.0000e-05 eta 11:19:51 +epoch [1/50] batch [80/500] time 1.573 (1.632) data 0.000 (0.015) loss 1.1973 (1.8048) acc 56.2500 (58.9453) lr 1.0000e-05 eta 11:17:41 +epoch [1/50] batch [85/500] time 1.564 (1.627) data 0.000 (0.014) loss 1.7314 (1.8046) acc 59.3750 (58.6029) lr 1.0000e-05 eta 11:15:35 +epoch [1/50] batch [90/500] time 1.569 (1.623) data 0.000 (0.014) loss 1.2559 (1.7868) acc 56.2500 (58.8194) lr 1.0000e-05 eta 11:13:53 +epoch [1/50] batch [95/500] time 1.566 (1.620) data 0.000 (0.013) loss 1.3594 (1.7709) acc 71.8750 (59.1447) lr 1.0000e-05 eta 11:12:21 +epoch [1/50] batch [100/500] time 1.540 (1.616) data 0.000 (0.012) loss 1.1426 (1.7555) acc 68.7500 (59.4375) lr 1.0000e-05 eta 11:10:44 +epoch [1/50] batch [105/500] time 1.579 (1.613) data 0.000 (0.012) loss 1.5840 (1.7365) acc 68.7500 (59.8512) lr 1.0000e-05 eta 11:09:14 +epoch [1/50] batch [110/500] time 1.562 (1.611) data 0.000 (0.011) loss 1.3252 (1.7170) acc 78.1250 (60.3693) lr 1.0000e-05 eta 11:08:11 +epoch [1/50] batch [115/500] time 1.543 (1.609) data 0.000 (0.011) loss 1.1426 (1.7074) acc 71.8750 (60.6522) lr 1.0000e-05 eta 11:07:10 +epoch [1/50] batch [120/500] time 1.550 (1.606) data 0.000 (0.010) loss 1.3359 (1.6981) acc 59.3750 (60.8333) lr 1.0000e-05 eta 11:06:07 +epoch [1/50] batch [125/500] time 1.557 (1.604) data 0.000 (0.010) loss 1.1455 (1.6868) acc 68.7500 (61.1250) lr 1.0000e-05 eta 11:05:06 +epoch [1/50] batch [130/500] time 1.558 (1.603) data 0.000 (0.010) loss 1.6211 (1.6871) acc 62.5000 (61.2019) lr 1.0000e-05 eta 11:04:16 +epoch [1/50] batch [135/500] time 1.549 (1.601) data 0.000 (0.009) loss 1.7881 (1.6815) acc 56.2500 (61.4352) lr 1.0000e-05 eta 11:03:26 +epoch [1/50] batch [140/500] time 1.572 (1.600) data 0.001 (0.009) loss 1.6777 (1.6734) acc 59.3750 (61.5179) lr 1.0000e-05 eta 11:02:48 +epoch [1/50] batch [145/500] time 1.533 (1.598) data 0.001 (0.009) loss 1.7451 (1.6702) acc 62.5000 (61.5733) lr 1.0000e-05 eta 11:01:51 +epoch [1/50] batch [150/500] time 1.544 (1.596) data 0.000 (0.008) loss 1.4453 (1.6645) acc 62.5000 (61.7708) lr 1.0000e-05 eta 11:01:11 +epoch [1/50] batch [155/500] time 1.541 (1.595) data 0.000 (0.008) loss 1.2041 (1.6575) acc 68.7500 (61.8145) lr 1.0000e-05 eta 11:00:28 +epoch [1/50] batch [160/500] time 1.564 (1.594) data 0.001 (0.008) loss 1.4932 (1.6524) acc 68.7500 (61.9531) lr 1.0000e-05 eta 10:59:57 +epoch [1/50] batch [165/500] time 1.590 (1.593) data 0.000 (0.008) loss 1.2861 (1.6442) acc 50.0000 (61.9886) lr 1.0000e-05 eta 10:59:31 +epoch [1/50] batch [170/500] time 1.567 (1.594) data 0.001 (0.007) loss 1.9189 (1.6443) acc 62.5000 (62.0037) lr 1.0000e-05 eta 10:59:34 +epoch [1/50] batch [175/500] time 1.556 (1.593) data 0.001 (0.007) loss 0.9751 (1.6303) acc 75.0000 (62.1429) lr 1.0000e-05 eta 10:58:56 +epoch [1/50] batch [180/500] time 1.550 (1.592) data 0.000 (0.007) loss 1.6406 (1.6306) acc 62.5000 (62.3438) lr 1.0000e-05 eta 10:58:24 +epoch [1/50] batch [185/500] time 1.575 (1.591) data 0.001 (0.007) loss 1.8311 (1.6264) acc 62.5000 (62.5338) lr 1.0000e-05 eta 10:57:59 +epoch [1/50] batch [190/500] time 1.597 (1.590) data 0.000 (0.007) loss 1.7725 (1.6253) acc 71.8750 (62.6480) lr 1.0000e-05 eta 10:57:29 +epoch [1/50] batch [195/500] time 1.595 (1.589) data 0.000 (0.007) loss 1.0312 (1.6205) acc 71.8750 (62.7404) lr 1.0000e-05 eta 10:57:06 +epoch [1/50] batch [200/500] time 1.569 (1.589) data 0.001 (0.006) loss 2.3223 (1.6221) acc 50.0000 (62.7344) lr 1.0000e-05 eta 10:56:40 +epoch [1/50] batch [205/500] time 1.550 (1.588) data 0.000 (0.006) loss 0.9180 (1.6103) acc 71.8750 (62.9268) lr 1.0000e-05 eta 10:56:23 +epoch [1/50] batch [210/500] time 1.575 (1.588) data 0.001 (0.006) loss 0.9785 (1.6064) acc 75.0000 (62.9613) lr 1.0000e-05 eta 10:56:02 +epoch [1/50] batch [215/500] time 1.564 (1.588) data 0.000 (0.006) loss 1.7549 (1.6034) acc 59.3750 (62.9651) lr 1.0000e-05 eta 10:55:49 +epoch [1/50] batch [220/500] time 1.569 (1.587) data 0.000 (0.006) loss 1.6797 (1.5979) acc 56.2500 (63.0682) lr 1.0000e-05 eta 10:55:34 +epoch [1/50] batch [225/500] time 1.555 (1.587) data 0.000 (0.006) loss 1.4434 (1.5980) acc 59.3750 (63.0278) lr 1.0000e-05 eta 10:55:13 +epoch [1/50] batch [230/500] time 1.563 (1.587) data 0.000 (0.006) loss 1.0625 (1.5912) acc 75.0000 (63.1793) lr 1.0000e-05 eta 10:54:58 +epoch [1/50] batch [235/500] time 1.562 (1.586) data 0.001 (0.006) loss 1.8760 (1.5891) acc 56.2500 (63.1649) lr 1.0000e-05 eta 10:54:39 +epoch [1/50] batch [240/500] time 1.556 (1.586) data 0.000 (0.005) loss 2.0391 (1.5904) acc 53.1250 (63.2031) lr 1.0000e-05 eta 10:54:17 +epoch [1/50] batch [245/500] time 1.541 (1.585) data 0.000 (0.005) loss 1.5566 (1.5902) acc 71.8750 (63.2781) lr 1.0000e-05 eta 10:53:57 +epoch [1/50] batch [250/500] time 1.549 (1.584) data 0.001 (0.005) loss 1.2197 (1.5844) acc 71.8750 (63.4625) lr 1.0000e-05 eta 10:53:31 +epoch [1/50] batch [255/500] time 1.544 (1.584) data 0.000 (0.005) loss 0.9180 (1.5802) acc 75.0000 (63.5294) lr 1.0000e-05 eta 10:53:10 +epoch [1/50] batch [260/500] time 1.558 (1.583) data 0.000 (0.005) loss 1.3281 (1.5795) acc 71.8750 (63.5457) lr 1.0000e-05 eta 10:52:54 +epoch [1/50] batch [265/500] time 1.555 (1.583) data 0.000 (0.005) loss 1.2812 (1.5703) acc 53.1250 (63.6557) lr 1.0000e-05 eta 10:52:37 +epoch [1/50] batch [270/500] time 1.558 (1.583) data 0.000 (0.005) loss 1.4941 (1.5716) acc 62.5000 (63.5880) lr 1.0000e-05 eta 10:52:21 +epoch [1/50] batch [275/500] time 1.558 (1.582) data 0.000 (0.005) loss 1.4463 (1.5643) acc 71.8750 (63.7386) lr 1.0000e-05 eta 10:52:00 +epoch [1/50] batch [280/500] time 1.543 (1.582) data 0.001 (0.005) loss 1.3633 (1.5627) acc 56.2500 (63.6830) lr 1.0000e-05 eta 10:51:43 +epoch [1/50] batch [285/500] time 1.565 (1.582) data 0.000 (0.005) loss 1.5410 (1.5625) acc 56.2500 (63.6294) lr 1.0000e-05 eta 10:51:27 +epoch [1/50] batch [290/500] time 1.565 (1.581) data 0.000 (0.005) loss 1.6826 (1.5611) acc 68.7500 (63.7177) lr 1.0000e-05 eta 10:51:11 +epoch [1/50] batch [295/500] time 1.573 (1.581) data 0.000 (0.005) loss 1.7686 (1.5635) acc 53.1250 (63.6653) lr 1.0000e-05 eta 10:50:54 +epoch [1/50] batch [300/500] time 1.560 (1.581) data 0.000 (0.004) loss 2.2227 (1.5659) acc 59.3750 (63.5938) lr 1.0000e-05 eta 10:50:43 +epoch [1/50] batch [305/500] time 1.565 (1.580) data 0.000 (0.004) loss 1.2344 (1.5631) acc 68.7500 (63.6578) lr 1.0000e-05 eta 10:50:27 +epoch [1/50] batch [310/500] time 1.671 (1.580) data 0.000 (0.004) loss 1.2109 (1.5629) acc 68.7500 (63.6391) lr 1.0000e-05 eta 10:50:20 +epoch [1/50] batch [315/500] time 1.563 (1.580) data 0.001 (0.004) loss 1.5137 (1.5651) acc 59.3750 (63.5417) lr 1.0000e-05 eta 10:50:12 +epoch [1/50] batch [320/500] time 1.566 (1.580) data 0.001 (0.004) loss 1.7041 (1.5659) acc 56.2500 (63.5156) lr 1.0000e-05 eta 10:49:58 +epoch [1/50] batch [325/500] time 1.554 (1.580) data 0.000 (0.004) loss 0.9395 (1.5644) acc 71.8750 (63.5769) lr 1.0000e-05 eta 10:49:44 +epoch [1/50] batch [330/500] time 1.543 (1.580) data 0.000 (0.004) loss 1.0830 (1.5617) acc 68.7500 (63.5985) lr 1.0000e-05 eta 10:49:26 +epoch [1/50] batch [335/500] time 1.594 (1.580) data 0.000 (0.004) loss 1.2061 (1.5593) acc 68.7500 (63.6474) lr 1.0000e-05 eta 10:49:19 +epoch [1/50] batch [340/500] time 1.580 (1.579) data 0.000 (0.004) loss 1.9912 (1.5579) acc 50.0000 (63.6305) lr 1.0000e-05 eta 10:49:10 +epoch [1/50] batch [345/500] time 1.570 (1.579) data 0.000 (0.004) loss 1.8516 (1.5545) acc 56.2500 (63.7319) lr 1.0000e-05 eta 10:48:59 +epoch [1/50] batch [350/500] time 1.597 (1.579) data 0.000 (0.004) loss 1.3428 (1.5545) acc 62.5000 (63.7589) lr 1.0000e-05 eta 10:48:49 +epoch [1/50] batch [355/500] time 1.588 (1.579) data 0.000 (0.004) loss 1.2578 (1.5474) acc 65.6250 (63.9173) lr 1.0000e-05 eta 10:48:44 +epoch [1/50] batch [360/500] time 1.564 (1.579) data 0.000 (0.004) loss 0.7729 (1.5438) acc 81.2500 (63.9931) lr 1.0000e-05 eta 10:48:32 +epoch [1/50] batch [365/500] time 1.561 (1.579) data 0.000 (0.004) loss 1.3926 (1.5444) acc 71.8750 (63.9897) lr 1.0000e-05 eta 10:48:17 +epoch [1/50] batch [370/500] time 1.536 (1.579) data 0.001 (0.004) loss 1.4385 (1.5431) acc 59.3750 (64.0034) lr 1.0000e-05 eta 10:47:58 +epoch [1/50] batch [375/500] time 1.567 (1.578) data 0.000 (0.004) loss 1.2969 (1.5439) acc 65.6250 (63.9333) lr 1.0000e-05 eta 10:47:48 +epoch [1/50] batch [380/500] time 1.552 (1.578) data 0.000 (0.004) loss 0.9595 (1.5426) acc 78.1250 (63.9309) lr 1.0000e-05 eta 10:47:35 +epoch [1/50] batch [385/500] time 1.585 (1.578) data 0.001 (0.004) loss 1.4238 (1.5422) acc 65.6250 (63.9042) lr 1.0000e-05 eta 10:47:21 +epoch [1/50] batch [390/500] time 1.575 (1.578) data 0.000 (0.003) loss 1.2520 (1.5395) acc 78.1250 (63.9744) lr 1.0000e-05 eta 10:47:09 +epoch [1/50] batch [395/500] time 1.583 (1.578) data 0.001 (0.003) loss 1.2812 (1.5387) acc 75.0000 (63.9873) lr 1.0000e-05 eta 10:47:02 +epoch [1/50] batch [400/500] time 1.559 (1.578) data 0.000 (0.003) loss 1.4199 (1.5356) acc 71.8750 (64.0547) lr 1.0000e-05 eta 10:46:48 +epoch [1/50] batch [405/500] time 1.577 (1.577) data 0.000 (0.003) loss 1.0479 (1.5369) acc 68.7500 (64.0123) lr 1.0000e-05 eta 10:46:38 +epoch [1/50] batch [410/500] time 1.579 (1.577) data 0.000 (0.003) loss 0.8203 (1.5333) acc 81.2500 (64.1159) lr 1.0000e-05 eta 10:46:27 +epoch [1/50] batch [415/500] time 1.574 (1.577) data 0.000 (0.003) loss 0.7832 (1.5305) acc 78.1250 (64.1792) lr 1.0000e-05 eta 10:46:15 +epoch [1/50] batch [420/500] time 1.576 (1.577) data 0.001 (0.003) loss 1.0283 (1.5273) acc 71.8750 (64.2113) lr 1.0000e-05 eta 10:46:04 +epoch [1/50] batch [425/500] time 1.552 (1.577) data 0.000 (0.003) loss 1.6855 (1.5247) acc 59.3750 (64.2206) lr 1.0000e-05 eta 10:45:53 +epoch [1/50] batch [430/500] time 1.564 (1.577) data 0.000 (0.003) loss 0.9365 (1.5220) acc 81.2500 (64.2951) lr 1.0000e-05 eta 10:45:40 +epoch [1/50] batch [435/500] time 1.572 (1.576) data 0.001 (0.003) loss 0.8037 (1.5216) acc 90.6250 (64.3750) lr 1.0000e-05 eta 10:45:26 +epoch [1/50] batch [440/500] time 1.572 (1.576) data 0.000 (0.003) loss 0.9634 (1.5231) acc 71.8750 (64.3608) lr 1.0000e-05 eta 10:45:17 +epoch [1/50] batch [445/500] time 1.559 (1.576) data 0.001 (0.003) loss 1.5225 (1.5218) acc 59.3750 (64.3258) lr 1.0000e-05 eta 10:45:06 +epoch [1/50] batch [450/500] time 1.605 (1.576) data 0.000 (0.003) loss 1.6738 (1.5194) acc 62.5000 (64.3681) lr 1.0000e-05 eta 10:45:01 +epoch [1/50] batch [455/500] time 1.538 (1.576) data 0.000 (0.003) loss 1.3281 (1.5197) acc 62.5000 (64.3544) lr 1.0000e-05 eta 10:44:54 +epoch [1/50] batch [460/500] time 1.571 (1.576) data 0.000 (0.003) loss 1.2383 (1.5183) acc 68.7500 (64.3410) lr 1.0000e-05 eta 10:44:43 +epoch [1/50] batch [465/500] time 1.547 (1.576) data 0.000 (0.003) loss 1.4102 (1.5167) acc 68.7500 (64.3952) lr 1.0000e-05 eta 10:44:30 +epoch [1/50] batch [470/500] time 1.554 (1.576) data 0.001 (0.003) loss 0.8804 (1.5152) acc 78.1250 (64.4082) lr 1.0000e-05 eta 10:44:19 +epoch [1/50] batch [475/500] time 1.555 (1.576) data 0.000 (0.003) loss 1.4150 (1.5133) acc 68.7500 (64.4342) lr 1.0000e-05 eta 10:44:09 +epoch [1/50] batch [480/500] time 1.561 (1.576) data 0.000 (0.003) loss 1.2441 (1.5096) acc 65.6250 (64.4596) lr 1.0000e-05 eta 10:43:58 +epoch [1/50] batch [485/500] time 1.551 (1.576) data 0.001 (0.003) loss 1.3711 (1.5074) acc 68.7500 (64.5232) lr 1.0000e-05 eta 10:43:46 +epoch [1/50] batch [490/500] time 1.574 (1.576) data 0.000 (0.003) loss 1.4854 (1.5057) acc 46.8750 (64.5281) lr 1.0000e-05 eta 10:43:36 +epoch [1/50] batch [495/500] time 1.557 (1.575) data 0.000 (0.003) loss 1.3203 (1.5032) acc 62.5000 (64.5581) lr 1.0000e-05 eta 10:43:24 +epoch [1/50] batch [500/500] time 1.542 (1.575) data 0.000 (0.003) loss 1.1758 (1.4989) acc 75.0000 (64.6562) lr 2.0000e-03 eta 10:43:16 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,022 +* accuracy: 74.0% +* error: 26.0% +* macro_f1: 73.2% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [2/50] batch [5/500] time 1.548 (1.711) data 0.001 (0.209) loss 1.7939 (1.8871) acc 53.1250 (59.3750) lr 2.0000e-03 eta 11:38:36 +epoch [2/50] batch [10/500] time 1.556 (1.649) data 0.001 (0.105) loss 1.0879 (1.6939) acc 75.0000 (61.5625) lr 2.0000e-03 eta 11:13:10 +epoch [2/50] batch [15/500] time 1.556 (1.620) data 0.000 (0.070) loss 1.5400 (1.5844) acc 71.8750 (64.5833) lr 2.0000e-03 eta 11:01:10 +epoch [2/50] batch [20/500] time 1.558 (1.606) data 0.000 (0.053) loss 1.2637 (1.5213) acc 65.6250 (65.3125) lr 2.0000e-03 eta 10:55:17 +epoch [2/50] batch [25/500] time 1.581 (1.598) data 0.000 (0.042) loss 1.7734 (1.5128) acc 68.7500 (65.6250) lr 2.0000e-03 eta 10:52:00 +epoch [2/50] batch [30/500] time 1.570 (1.593) data 0.000 (0.035) loss 0.7622 (1.4763) acc 65.6250 (65.7292) lr 2.0000e-03 eta 10:49:46 +epoch [2/50] batch [35/500] time 1.567 (1.588) data 0.000 (0.030) loss 1.2793 (1.4962) acc 68.7500 (65.2679) lr 2.0000e-03 eta 10:47:25 +epoch [2/50] batch [40/500] time 1.568 (1.585) data 0.000 (0.026) loss 0.9712 (1.5117) acc 71.8750 (64.5312) lr 2.0000e-03 eta 10:46:08 +epoch [2/50] batch [45/500] time 1.571 (1.582) data 0.000 (0.024) loss 1.3662 (1.5049) acc 62.5000 (64.2361) lr 2.0000e-03 eta 10:44:41 +epoch [2/50] batch [50/500] time 1.557 (1.580) data 0.000 (0.021) loss 1.0000 (1.5088) acc 81.2500 (64.6250) lr 2.0000e-03 eta 10:43:49 +epoch [2/50] batch [55/500] time 1.557 (1.578) data 0.000 (0.019) loss 1.4316 (1.4831) acc 65.6250 (64.9432) lr 2.0000e-03 eta 10:42:56 +epoch [2/50] batch [60/500] time 1.552 (1.577) data 0.000 (0.018) loss 0.7456 (1.4687) acc 84.3750 (65.1042) lr 2.0000e-03 eta 10:42:11 +epoch [2/50] batch [65/500] time 1.555 (1.575) data 0.000 (0.016) loss 1.4736 (1.4392) acc 68.7500 (65.5769) lr 2.0000e-03 eta 10:41:33 +epoch [2/50] batch [70/500] time 1.563 (1.574) data 0.001 (0.015) loss 1.2520 (1.4131) acc 78.1250 (66.2946) lr 2.0000e-03 eta 10:41:00 +epoch [2/50] batch [75/500] time 1.553 (1.573) data 0.000 (0.014) loss 0.6807 (1.3990) acc 71.8750 (66.4583) lr 2.0000e-03 eta 10:40:30 +epoch [2/50] batch [80/500] time 1.571 (1.573) data 0.001 (0.013) loss 1.1221 (1.3959) acc 71.8750 (66.6797) lr 2.0000e-03 eta 10:40:16 +epoch [2/50] batch [85/500] time 1.572 (1.573) data 0.000 (0.013) loss 1.0098 (1.3837) acc 81.2500 (66.8015) lr 2.0000e-03 eta 10:39:57 +epoch [2/50] batch [90/500] time 1.558 (1.572) data 0.001 (0.012) loss 1.1465 (1.3791) acc 78.1250 (66.9792) lr 2.0000e-03 eta 10:39:33 +epoch [2/50] batch [95/500] time 1.567 (1.571) data 0.001 (0.011) loss 1.8760 (1.3786) acc 65.6250 (66.9079) lr 2.0000e-03 eta 10:39:09 +epoch [2/50] batch [100/500] time 1.557 (1.571) data 0.000 (0.011) loss 0.9229 (1.3626) acc 78.1250 (67.0312) lr 2.0000e-03 eta 10:38:44 +epoch [2/50] batch [105/500] time 1.573 (1.571) data 0.000 (0.010) loss 1.8467 (1.3641) acc 62.5000 (66.9643) lr 2.0000e-03 eta 10:38:37 +epoch [2/50] batch [110/500] time 1.570 (1.571) data 0.000 (0.010) loss 0.9014 (1.3510) acc 81.2500 (67.2443) lr 2.0000e-03 eta 10:38:47 +epoch [2/50] batch [115/500] time 1.564 (1.571) data 0.001 (0.009) loss 1.0127 (1.3445) acc 68.7500 (67.3641) lr 2.0000e-03 eta 10:38:36 +epoch [2/50] batch [120/500] time 1.569 (1.571) data 0.000 (0.009) loss 1.4131 (1.3457) acc 65.6250 (67.2656) lr 2.0000e-03 eta 10:38:24 +epoch [2/50] batch [125/500] time 1.563 (1.571) data 0.000 (0.009) loss 1.3750 (1.3428) acc 56.2500 (67.3250) lr 2.0000e-03 eta 10:38:06 +epoch [2/50] batch [130/500] time 1.557 (1.570) data 0.000 (0.008) loss 1.4502 (1.3461) acc 50.0000 (67.2837) lr 2.0000e-03 eta 10:37:46 +epoch [2/50] batch [135/500] time 1.583 (1.570) data 0.000 (0.008) loss 1.7471 (1.3460) acc 62.5000 (67.3611) lr 2.0000e-03 eta 10:37:31 +epoch [2/50] batch [140/500] time 1.551 (1.570) data 0.000 (0.008) loss 1.7100 (1.3483) acc 62.5000 (67.3884) lr 2.0000e-03 eta 10:37:13 +epoch [2/50] batch [145/500] time 1.565 (1.570) data 0.000 (0.008) loss 1.0879 (1.3424) acc 68.7500 (67.4784) lr 2.0000e-03 eta 10:37:05 +epoch [2/50] batch [150/500] time 1.565 (1.569) data 0.000 (0.007) loss 1.4873 (1.3415) acc 62.5000 (67.5208) lr 2.0000e-03 eta 10:36:52 +epoch [2/50] batch [155/500] time 1.556 (1.570) data 0.000 (0.007) loss 1.4609 (1.3347) acc 62.5000 (67.6613) lr 2.0000e-03 eta 10:36:50 +epoch [2/50] batch [160/500] time 1.588 (1.570) data 0.000 (0.007) loss 0.7773 (1.3264) acc 78.1250 (67.7734) lr 2.0000e-03 eta 10:36:51 +epoch [2/50] batch [165/500] time 1.532 (1.570) data 0.000 (0.007) loss 1.0420 (1.3270) acc 62.5000 (67.7462) lr 2.0000e-03 eta 10:36:41 +epoch [2/50] batch [170/500] time 1.585 (1.570) data 0.000 (0.007) loss 1.0264 (1.3241) acc 84.3750 (67.9044) lr 2.0000e-03 eta 10:36:32 +epoch [2/50] batch [175/500] time 1.552 (1.569) data 0.000 (0.006) loss 0.9590 (1.3183) acc 75.0000 (67.9821) lr 2.0000e-03 eta 10:36:16 +epoch [2/50] batch [180/500] time 1.566 (1.570) data 0.000 (0.006) loss 2.2129 (1.3207) acc 59.3750 (67.9340) lr 2.0000e-03 eta 10:36:10 +epoch [2/50] batch [185/500] time 1.552 (1.569) data 0.000 (0.006) loss 1.5068 (1.3188) acc 65.6250 (68.1419) lr 2.0000e-03 eta 10:35:52 +epoch [2/50] batch [190/500] time 1.561 (1.569) data 0.000 (0.006) loss 0.9888 (1.3158) acc 65.6250 (68.0592) lr 2.0000e-03 eta 10:35:37 +epoch [2/50] batch [195/500] time 1.573 (1.569) data 0.000 (0.006) loss 1.2607 (1.3266) acc 78.1250 (67.9487) lr 2.0000e-03 eta 10:35:24 +epoch [2/50] batch [200/500] time 1.582 (1.569) data 0.000 (0.006) loss 2.0859 (1.3289) acc 53.1250 (67.9219) lr 2.0000e-03 eta 10:35:15 +epoch [2/50] batch [205/500] time 1.557 (1.568) data 0.000 (0.006) loss 1.3711 (1.3270) acc 68.7500 (67.9268) lr 2.0000e-03 eta 10:34:58 +epoch [2/50] batch [210/500] time 1.563 (1.568) data 0.000 (0.005) loss 0.7485 (1.3210) acc 81.2500 (68.0804) lr 2.0000e-03 eta 10:34:52 +epoch [2/50] batch [215/500] time 1.542 (1.568) data 0.000 (0.005) loss 1.3516 (1.3200) acc 62.5000 (68.1250) lr 2.0000e-03 eta 10:34:38 +epoch [2/50] batch [220/500] time 1.562 (1.568) data 0.000 (0.005) loss 1.6396 (1.3234) acc 65.6250 (68.1250) lr 2.0000e-03 eta 10:34:23 +epoch [2/50] batch [225/500] time 1.563 (1.567) data 0.000 (0.005) loss 0.7334 (1.3216) acc 71.8750 (68.2083) lr 2.0000e-03 eta 10:34:08 +epoch [2/50] batch [230/500] time 1.555 (1.567) data 0.000 (0.005) loss 1.2139 (1.3230) acc 62.5000 (68.1386) lr 2.0000e-03 eta 10:33:52 +epoch [2/50] batch [235/500] time 1.570 (1.567) data 0.000 (0.005) loss 0.6670 (1.3188) acc 87.5000 (68.2314) lr 2.0000e-03 eta 10:33:43 +epoch [2/50] batch [240/500] time 1.550 (1.567) data 0.000 (0.005) loss 1.0303 (1.3153) acc 71.8750 (68.2552) lr 2.0000e-03 eta 10:33:29 +epoch [2/50] batch [245/500] time 1.580 (1.566) data 0.000 (0.005) loss 1.5928 (1.3170) acc 65.6250 (68.2270) lr 2.0000e-03 eta 10:33:15 +epoch [2/50] batch [250/500] time 1.644 (1.567) data 0.000 (0.005) loss 1.3203 (1.3213) acc 75.0000 (68.2500) lr 2.0000e-03 eta 10:33:15 +epoch [2/50] batch [255/500] time 1.586 (1.567) data 0.000 (0.005) loss 1.1250 (1.3208) acc 65.6250 (68.2108) lr 2.0000e-03 eta 10:33:08 +epoch [2/50] batch [260/500] time 1.566 (1.567) data 0.000 (0.004) loss 1.5342 (1.3188) acc 65.6250 (68.1971) lr 2.0000e-03 eta 10:33:03 +epoch [2/50] batch [265/500] time 1.576 (1.567) data 0.000 (0.004) loss 1.5488 (1.3206) acc 62.5000 (68.1014) lr 2.0000e-03 eta 10:33:00 +epoch [2/50] batch [270/500] time 1.560 (1.567) data 0.000 (0.004) loss 1.0879 (1.3167) acc 78.1250 (68.1481) lr 2.0000e-03 eta 10:32:47 +epoch [2/50] batch [275/500] time 1.545 (1.567) data 0.000 (0.004) loss 1.3262 (1.3158) acc 62.5000 (68.1364) lr 2.0000e-03 eta 10:32:32 +epoch [2/50] batch [280/500] time 1.562 (1.567) data 0.000 (0.004) loss 1.2900 (1.3166) acc 68.7500 (68.1473) lr 2.0000e-03 eta 10:32:25 +epoch [2/50] batch [285/500] time 1.583 (1.567) data 0.000 (0.004) loss 1.7959 (1.3190) acc 65.6250 (68.0702) lr 2.0000e-03 eta 10:32:19 +epoch [2/50] batch [290/500] time 1.565 (1.567) data 0.000 (0.004) loss 1.1172 (1.3139) acc 71.8750 (68.1681) lr 2.0000e-03 eta 10:32:10 +epoch [2/50] batch [295/500] time 1.543 (1.567) data 0.001 (0.004) loss 1.0518 (1.3119) acc 71.8750 (68.1886) lr 2.0000e-03 eta 10:32:03 +epoch [2/50] batch [300/500] time 1.564 (1.567) data 0.000 (0.004) loss 0.8979 (1.3131) acc 78.1250 (68.1979) lr 2.0000e-03 eta 10:31:59 +epoch [2/50] batch [305/500] time 1.582 (1.567) data 0.000 (0.004) loss 1.4443 (1.3099) acc 65.6250 (68.2070) lr 2.0000e-03 eta 10:31:48 +epoch [2/50] batch [310/500] time 1.551 (1.567) data 0.000 (0.004) loss 1.2939 (1.3123) acc 71.8750 (68.1552) lr 2.0000e-03 eta 10:31:33 +epoch [2/50] batch [315/500] time 1.551 (1.566) data 0.001 (0.004) loss 1.5166 (1.3121) acc 65.6250 (68.2044) lr 2.0000e-03 eta 10:31:22 +epoch [2/50] batch [320/500] time 1.553 (1.566) data 0.000 (0.004) loss 1.7461 (1.3087) acc 59.3750 (68.2715) lr 2.0000e-03 eta 10:31:11 +epoch [2/50] batch [325/500] time 1.554 (1.566) data 0.000 (0.004) loss 1.4365 (1.3084) acc 71.8750 (68.2788) lr 2.0000e-03 eta 10:31:01 +epoch [2/50] batch [330/500] time 1.569 (1.566) data 0.000 (0.004) loss 1.8252 (1.3108) acc 65.6250 (68.3144) lr 2.0000e-03 eta 10:30:49 +epoch [2/50] batch [335/500] time 1.536 (1.566) data 0.000 (0.004) loss 0.6475 (1.3070) acc 78.1250 (68.3675) lr 2.0000e-03 eta 10:30:36 +epoch [2/50] batch [340/500] time 1.547 (1.566) data 0.000 (0.003) loss 1.6201 (1.3070) acc 68.7500 (68.4007) lr 2.0000e-03 eta 10:30:25 +epoch [2/50] batch [345/500] time 1.562 (1.566) data 0.000 (0.003) loss 1.3291 (1.3085) acc 65.6250 (68.3696) lr 2.0000e-03 eta 10:30:15 +epoch [2/50] batch [350/500] time 1.563 (1.565) data 0.000 (0.003) loss 0.6616 (1.3104) acc 81.2500 (68.3482) lr 2.0000e-03 eta 10:30:04 +epoch [2/50] batch [355/500] time 1.559 (1.565) data 0.000 (0.003) loss 1.5146 (1.3092) acc 65.6250 (68.3979) lr 2.0000e-03 eta 10:29:55 +epoch [2/50] batch [360/500] time 1.541 (1.565) data 0.000 (0.003) loss 1.0664 (1.3108) acc 62.5000 (68.3594) lr 2.0000e-03 eta 10:29:48 +epoch [2/50] batch [365/500] time 1.571 (1.565) data 0.000 (0.003) loss 1.3340 (1.3116) acc 71.8750 (68.3562) lr 2.0000e-03 eta 10:29:41 +epoch [2/50] batch [370/500] time 1.586 (1.565) data 0.000 (0.003) loss 0.8262 (1.3089) acc 71.8750 (68.3530) lr 2.0000e-03 eta 10:29:33 +epoch [2/50] batch [375/500] time 1.577 (1.565) data 0.000 (0.003) loss 1.0449 (1.3128) acc 78.1250 (68.2917) lr 2.0000e-03 eta 10:29:24 +epoch [2/50] batch [380/500] time 1.555 (1.565) data 0.000 (0.003) loss 0.6396 (1.3089) acc 75.0000 (68.3388) lr 2.0000e-03 eta 10:29:15 +epoch [2/50] batch [385/500] time 1.565 (1.565) data 0.000 (0.003) loss 1.8672 (1.3083) acc 59.3750 (68.3766) lr 2.0000e-03 eta 10:29:01 +epoch [2/50] batch [390/500] time 1.548 (1.565) data 0.000 (0.003) loss 1.3525 (1.3082) acc 65.6250 (68.3413) lr 2.0000e-03 eta 10:28:54 +epoch [2/50] batch [395/500] time 1.557 (1.565) data 0.000 (0.003) loss 1.6152 (1.3099) acc 56.2500 (68.3386) lr 2.0000e-03 eta 10:28:51 +epoch [2/50] batch [400/500] time 1.555 (1.565) data 0.001 (0.003) loss 1.0342 (1.3123) acc 81.2500 (68.3047) lr 2.0000e-03 eta 10:28:41 +epoch [2/50] batch [405/500] time 1.569 (1.565) data 0.000 (0.003) loss 0.9194 (1.3083) acc 75.0000 (68.3719) lr 2.0000e-03 eta 10:28:32 +epoch [2/50] batch [410/500] time 1.540 (1.565) data 0.000 (0.003) loss 1.5625 (1.3089) acc 62.5000 (68.3841) lr 2.0000e-03 eta 10:28:21 +epoch [2/50] batch [415/500] time 1.563 (1.565) data 0.000 (0.003) loss 0.5527 (1.3054) acc 90.6250 (68.5166) lr 2.0000e-03 eta 10:28:11 +epoch [2/50] batch [420/500] time 1.567 (1.565) data 0.000 (0.003) loss 1.9014 (1.3074) acc 59.3750 (68.5045) lr 2.0000e-03 eta 10:28:01 +epoch [2/50] batch [425/500] time 1.557 (1.565) data 0.000 (0.003) loss 1.6377 (1.3074) acc 62.5000 (68.5221) lr 2.0000e-03 eta 10:27:50 +epoch [2/50] batch [430/500] time 1.539 (1.565) data 0.000 (0.003) loss 1.3330 (1.3064) acc 62.5000 (68.4811) lr 2.0000e-03 eta 10:27:43 +epoch [2/50] batch [435/500] time 1.591 (1.565) data 0.000 (0.003) loss 0.7407 (1.3072) acc 78.1250 (68.4555) lr 2.0000e-03 eta 10:27:38 +epoch [2/50] batch [440/500] time 1.583 (1.565) data 0.000 (0.003) loss 1.1201 (1.3039) acc 71.8750 (68.5085) lr 2.0000e-03 eta 10:27:34 +epoch [2/50] batch [445/500] time 1.574 (1.565) data 0.000 (0.003) loss 1.0986 (1.3051) acc 78.1250 (68.5183) lr 2.0000e-03 eta 10:27:25 +epoch [2/50] batch [450/500] time 1.552 (1.565) data 0.000 (0.003) loss 0.9971 (1.3030) acc 75.0000 (68.5556) lr 2.0000e-03 eta 10:27:19 +epoch [2/50] batch [455/500] time 1.568 (1.565) data 0.000 (0.003) loss 2.0117 (1.3023) acc 56.2500 (68.5852) lr 2.0000e-03 eta 10:27:11 +epoch [2/50] batch [460/500] time 1.569 (1.565) data 0.000 (0.003) loss 1.5977 (1.3031) acc 62.5000 (68.5666) lr 2.0000e-03 eta 10:27:02 +epoch [2/50] batch [465/500] time 1.564 (1.565) data 0.000 (0.003) loss 0.9819 (1.3011) acc 68.7500 (68.5282) lr 2.0000e-03 eta 10:26:53 +epoch [2/50] batch [470/500] time 1.538 (1.565) data 0.000 (0.003) loss 0.7437 (1.2982) acc 78.1250 (68.5572) lr 2.0000e-03 eta 10:26:41 +epoch [2/50] batch [475/500] time 1.569 (1.565) data 0.000 (0.003) loss 1.2246 (1.2996) acc 75.0000 (68.5592) lr 2.0000e-03 eta 10:26:30 +epoch [2/50] batch [480/500] time 1.531 (1.565) data 0.000 (0.003) loss 0.8628 (1.2967) acc 71.8750 (68.6198) lr 2.0000e-03 eta 10:26:19 +epoch [2/50] batch [485/500] time 1.559 (1.564) data 0.001 (0.003) loss 1.0752 (1.2947) acc 81.2500 (68.6534) lr 2.0000e-03 eta 10:26:09 +epoch [2/50] batch [490/500] time 1.513 (1.564) data 0.000 (0.003) loss 1.5518 (1.2930) acc 56.2500 (68.6543) lr 2.0000e-03 eta 10:25:56 +epoch [2/50] batch [495/500] time 1.555 (1.564) data 0.000 (0.003) loss 1.0498 (1.2895) acc 71.8750 (68.7311) lr 2.0000e-03 eta 10:25:45 +epoch [2/50] batch [500/500] time 1.536 (1.564) data 0.000 (0.002) loss 1.1924 (1.2871) acc 62.5000 (68.7625) lr 1.9980e-03 eta 10:25:32 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,474 +* accuracy: 76.9% +* error: 23.1% +* macro_f1: 76.4% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [3/50] batch [5/500] time 1.530 (1.701) data 0.001 (0.196) loss 1.0645 (1.0272) acc 75.0000 (72.5000) lr 1.9980e-03 eta 11:20:16 +epoch [3/50] batch [10/500] time 1.526 (1.625) data 0.001 (0.098) loss 1.5176 (1.0540) acc 62.5000 (73.7500) lr 1.9980e-03 eta 10:49:48 +epoch [3/50] batch [15/500] time 1.550 (1.598) data 0.000 (0.066) loss 1.0430 (1.0806) acc 68.7500 (72.0833) lr 1.9980e-03 eta 10:38:47 +epoch [3/50] batch [20/500] time 1.556 (1.590) data 0.000 (0.049) loss 1.0537 (1.1208) acc 75.0000 (72.3438) lr 1.9980e-03 eta 10:35:23 +epoch [3/50] batch [25/500] time 1.566 (1.585) data 0.001 (0.040) loss 0.8604 (1.1368) acc 81.2500 (72.1250) lr 1.9980e-03 eta 10:33:13 +epoch [3/50] batch [30/500] time 1.577 (1.580) data 0.000 (0.033) loss 1.3623 (1.1972) acc 78.1250 (71.3542) lr 1.9980e-03 eta 10:31:17 +epoch [3/50] batch [35/500] time 1.562 (1.576) data 0.001 (0.028) loss 0.5063 (1.1668) acc 87.5000 (72.0536) lr 1.9980e-03 eta 10:29:39 +epoch [3/50] batch [40/500] time 1.592 (1.579) data 0.000 (0.025) loss 1.0732 (1.1641) acc 65.6250 (72.1875) lr 1.9980e-03 eta 10:30:27 +epoch [3/50] batch [45/500] time 1.558 (1.578) data 0.000 (0.022) loss 1.3545 (1.1634) acc 65.6250 (72.0139) lr 1.9980e-03 eta 10:29:49 +epoch [3/50] batch [50/500] time 1.553 (1.576) data 0.001 (0.020) loss 0.7183 (1.1433) acc 81.2500 (72.5000) lr 1.9980e-03 eta 10:28:58 +epoch [3/50] batch [55/500] time 1.560 (1.575) data 0.000 (0.018) loss 1.0586 (1.1695) acc 75.0000 (71.8182) lr 1.9980e-03 eta 10:28:25 +epoch [3/50] batch [60/500] time 1.552 (1.574) data 0.001 (0.017) loss 1.0137 (1.1949) acc 65.6250 (71.1458) lr 1.9980e-03 eta 10:27:54 +epoch [3/50] batch [65/500] time 1.576 (1.572) data 0.001 (0.016) loss 0.9312 (1.1818) acc 71.8750 (71.4423) lr 1.9980e-03 eta 10:27:14 +epoch [3/50] batch [70/500] time 1.570 (1.572) data 0.000 (0.014) loss 1.0010 (1.1879) acc 78.1250 (71.3839) lr 1.9980e-03 eta 10:26:46 +epoch [3/50] batch [75/500] time 1.570 (1.571) data 0.000 (0.014) loss 0.7905 (1.1924) acc 78.1250 (71.1667) lr 1.9980e-03 eta 10:26:35 +epoch [3/50] batch [80/500] time 1.574 (1.572) data 0.000 (0.013) loss 1.0312 (1.1987) acc 78.1250 (70.9766) lr 1.9980e-03 eta 10:26:32 +epoch [3/50] batch [85/500] time 1.558 (1.571) data 0.000 (0.012) loss 1.4355 (1.2010) acc 68.7500 (70.7721) lr 1.9980e-03 eta 10:26:10 +epoch [3/50] batch [90/500] time 1.573 (1.571) data 0.000 (0.011) loss 1.2314 (1.1905) acc 62.5000 (70.7986) lr 1.9980e-03 eta 10:26:03 +epoch [3/50] batch [95/500] time 1.580 (1.571) data 0.000 (0.011) loss 1.8574 (1.1919) acc 65.6250 (70.8224) lr 1.9980e-03 eta 10:25:46 +epoch [3/50] batch [100/500] time 1.552 (1.570) data 0.000 (0.010) loss 0.8765 (1.1760) acc 84.3750 (71.2812) lr 1.9980e-03 eta 10:25:28 +epoch [3/50] batch [105/500] time 1.572 (1.570) data 0.000 (0.010) loss 1.5996 (1.1816) acc 62.5000 (70.9821) lr 1.9980e-03 eta 10:25:13 +epoch [3/50] batch [110/500] time 1.540 (1.569) data 0.001 (0.009) loss 1.5449 (1.1886) acc 65.6250 (70.6818) lr 1.9980e-03 eta 10:24:50 +epoch [3/50] batch [115/500] time 1.569 (1.569) data 0.000 (0.009) loss 1.2197 (1.1928) acc 65.6250 (70.5978) lr 1.9980e-03 eta 10:24:37 +epoch [3/50] batch [120/500] time 1.572 (1.569) data 0.000 (0.009) loss 0.7881 (1.1859) acc 81.2500 (70.5469) lr 1.9980e-03 eta 10:24:20 +epoch [3/50] batch [125/500] time 1.548 (1.568) data 0.000 (0.008) loss 1.2715 (1.2017) acc 65.6250 (70.2750) lr 1.9980e-03 eta 10:24:03 +epoch [3/50] batch [130/500] time 1.557 (1.568) data 0.001 (0.008) loss 0.9473 (1.1980) acc 68.7500 (70.2885) lr 1.9980e-03 eta 10:23:53 +epoch [3/50] batch [135/500] time 1.573 (1.568) data 0.000 (0.008) loss 1.0381 (1.1989) acc 65.6250 (70.1620) lr 1.9980e-03 eta 10:23:34 +epoch [3/50] batch [140/500] time 1.553 (1.568) data 0.000 (0.007) loss 1.4629 (1.2097) acc 65.6250 (69.9777) lr 1.9980e-03 eta 10:23:31 +epoch [3/50] batch [145/500] time 1.526 (1.568) data 0.000 (0.007) loss 1.0244 (1.2150) acc 75.0000 (69.8060) lr 1.9980e-03 eta 10:23:14 +epoch [3/50] batch [150/500] time 1.531 (1.567) data 0.000 (0.007) loss 1.0342 (1.2182) acc 78.1250 (69.9167) lr 1.9980e-03 eta 10:22:53 +epoch [3/50] batch [155/500] time 1.546 (1.567) data 0.001 (0.007) loss 1.1094 (1.2276) acc 75.0000 (69.7379) lr 1.9980e-03 eta 10:22:39 +epoch [3/50] batch [160/500] time 1.609 (1.567) data 0.000 (0.007) loss 1.1230 (1.2221) acc 71.8750 (69.8047) lr 1.9980e-03 eta 10:22:44 +epoch [3/50] batch [165/500] time 1.607 (1.570) data 0.000 (0.006) loss 1.5088 (1.2231) acc 56.2500 (69.7348) lr 1.9980e-03 eta 10:23:29 +epoch [3/50] batch [170/500] time 1.552 (1.571) data 0.000 (0.006) loss 1.3027 (1.2230) acc 68.7500 (69.6140) lr 1.9980e-03 eta 10:23:54 +epoch [3/50] batch [175/500] time 1.559 (1.570) data 0.000 (0.006) loss 1.1973 (1.2240) acc 62.5000 (69.5179) lr 1.9980e-03 eta 10:23:33 +epoch [3/50] batch [180/500] time 1.558 (1.570) data 0.000 (0.006) loss 1.1006 (1.2252) acc 62.5000 (69.5139) lr 1.9980e-03 eta 10:23:14 +epoch [3/50] batch [185/500] time 1.558 (1.570) data 0.000 (0.006) loss 1.5400 (1.2219) acc 65.6250 (69.6115) lr 1.9980e-03 eta 10:23:12 +epoch [3/50] batch [190/500] time 1.553 (1.570) data 0.000 (0.006) loss 1.4570 (1.2230) acc 59.3750 (69.5230) lr 1.9980e-03 eta 10:22:53 +epoch [3/50] batch [195/500] time 1.578 (1.569) data 0.000 (0.005) loss 1.5811 (1.2252) acc 62.5000 (69.5032) lr 1.9980e-03 eta 10:22:39 +epoch [3/50] batch [200/500] time 1.555 (1.569) data 0.000 (0.005) loss 0.8022 (1.2273) acc 71.8750 (69.3906) lr 1.9980e-03 eta 10:22:28 +epoch [3/50] batch [205/500] time 1.579 (1.569) data 0.001 (0.005) loss 1.2549 (1.2286) acc 75.0000 (69.3598) lr 1.9980e-03 eta 10:22:19 +epoch [3/50] batch [210/500] time 1.574 (1.569) data 0.000 (0.005) loss 1.0605 (1.2263) acc 71.8750 (69.4494) lr 1.9980e-03 eta 10:22:09 +epoch [3/50] batch [215/500] time 1.570 (1.569) data 0.000 (0.005) loss 0.9790 (1.2264) acc 71.8750 (69.5640) lr 1.9980e-03 eta 10:21:56 +epoch [3/50] batch [220/500] time 1.575 (1.569) data 0.000 (0.005) loss 1.0342 (1.2281) acc 81.2500 (69.5739) lr 1.9980e-03 eta 10:21:41 +epoch [3/50] batch [225/500] time 1.569 (1.568) data 0.000 (0.005) loss 1.4854 (1.2287) acc 68.7500 (69.4861) lr 1.9980e-03 eta 10:21:30 +epoch [3/50] batch [230/500] time 1.599 (1.569) data 0.001 (0.005) loss 1.4150 (1.2337) acc 68.7500 (69.4022) lr 1.9980e-03 eta 10:21:30 +epoch [3/50] batch [235/500] time 1.575 (1.569) data 0.001 (0.005) loss 0.9829 (1.2346) acc 75.0000 (69.4016) lr 1.9980e-03 eta 10:21:19 +epoch [3/50] batch [240/500] time 1.552 (1.569) data 0.000 (0.005) loss 1.9053 (1.2344) acc 56.2500 (69.4792) lr 1.9980e-03 eta 10:21:07 +epoch [3/50] batch [245/500] time 1.572 (1.569) data 0.000 (0.004) loss 1.4160 (1.2380) acc 56.2500 (69.4005) lr 1.9980e-03 eta 10:21:02 +epoch [3/50] batch [250/500] time 1.543 (1.568) data 0.000 (0.004) loss 1.0020 (1.2367) acc 81.2500 (69.5000) lr 1.9980e-03 eta 10:20:50 +epoch [3/50] batch [255/500] time 1.556 (1.568) data 0.000 (0.004) loss 1.5078 (1.2353) acc 68.7500 (69.4975) lr 1.9980e-03 eta 10:20:35 +epoch [3/50] batch [260/500] time 1.565 (1.568) data 0.000 (0.004) loss 1.2783 (1.2385) acc 71.8750 (69.4231) lr 1.9980e-03 eta 10:20:27 +epoch [3/50] batch [265/500] time 1.586 (1.568) data 0.001 (0.004) loss 1.1611 (1.2440) acc 71.8750 (69.4340) lr 1.9980e-03 eta 10:20:18 +epoch [3/50] batch [270/500] time 1.571 (1.568) data 0.000 (0.004) loss 1.1523 (1.2425) acc 78.1250 (69.5139) lr 1.9980e-03 eta 10:20:07 +epoch [3/50] batch [275/500] time 1.565 (1.568) data 0.000 (0.004) loss 1.9248 (1.2448) acc 62.5000 (69.5682) lr 1.9980e-03 eta 10:19:59 +epoch [3/50] batch [280/500] time 1.565 (1.568) data 0.000 (0.004) loss 0.7241 (1.2434) acc 75.0000 (69.6094) lr 1.9980e-03 eta 10:19:54 +epoch [3/50] batch [285/500] time 1.539 (1.568) data 0.000 (0.004) loss 1.0303 (1.2407) acc 71.8750 (69.6820) lr 1.9980e-03 eta 10:19:51 +epoch [3/50] batch [290/500] time 1.556 (1.568) data 0.000 (0.004) loss 1.5234 (1.2406) acc 62.5000 (69.6444) lr 1.9980e-03 eta 10:19:42 +epoch [3/50] batch [295/500] time 1.564 (1.568) data 0.000 (0.004) loss 1.1660 (1.2391) acc 71.8750 (69.6716) lr 1.9980e-03 eta 10:19:33 +epoch [3/50] batch [300/500] time 1.568 (1.568) data 0.000 (0.004) loss 1.6709 (1.2360) acc 59.3750 (69.6979) lr 1.9980e-03 eta 10:19:22 +epoch [3/50] batch [305/500] time 1.565 (1.568) data 0.000 (0.004) loss 0.6401 (1.2350) acc 81.2500 (69.6926) lr 1.9980e-03 eta 10:19:14 +epoch [3/50] batch [310/500] time 1.550 (1.568) data 0.000 (0.004) loss 1.7422 (1.2362) acc 59.3750 (69.7077) lr 1.9980e-03 eta 10:19:04 +epoch [3/50] batch [315/500] time 1.576 (1.568) data 0.000 (0.004) loss 1.2539 (1.2363) acc 65.6250 (69.7123) lr 1.9980e-03 eta 10:18:58 +epoch [3/50] batch [320/500] time 1.565 (1.568) data 0.000 (0.003) loss 1.3418 (1.2380) acc 71.8750 (69.6875) lr 1.9980e-03 eta 10:18:43 +epoch [3/50] batch [325/500] time 1.665 (1.568) data 0.001 (0.003) loss 0.8140 (1.2384) acc 78.1250 (69.7212) lr 1.9980e-03 eta 10:18:38 +epoch [3/50] batch [330/500] time 1.565 (1.568) data 0.000 (0.003) loss 0.9102 (1.2371) acc 78.1250 (69.6875) lr 1.9980e-03 eta 10:18:27 +epoch [3/50] batch [335/500] time 1.562 (1.568) data 0.000 (0.003) loss 1.2266 (1.2398) acc 65.6250 (69.6455) lr 1.9980e-03 eta 10:18:16 +epoch [3/50] batch [340/500] time 1.552 (1.567) data 0.000 (0.003) loss 1.9355 (1.2436) acc 56.2500 (69.5956) lr 1.9980e-03 eta 10:18:06 +epoch [3/50] batch [345/500] time 1.570 (1.567) data 0.000 (0.003) loss 1.6348 (1.2445) acc 62.5000 (69.6014) lr 1.9980e-03 eta 10:17:56 +epoch [3/50] batch [350/500] time 1.585 (1.567) data 0.000 (0.003) loss 0.7671 (1.2416) acc 84.3750 (69.6696) lr 1.9980e-03 eta 10:17:48 +epoch [3/50] batch [355/500] time 1.544 (1.567) data 0.000 (0.003) loss 0.8955 (1.2361) acc 65.6250 (69.7711) lr 1.9980e-03 eta 10:17:39 +epoch [3/50] batch [360/500] time 1.576 (1.567) data 0.000 (0.003) loss 0.9863 (1.2368) acc 68.7500 (69.7569) lr 1.9980e-03 eta 10:17:32 +epoch [3/50] batch [365/500] time 1.572 (1.567) data 0.000 (0.003) loss 1.3438 (1.2349) acc 65.6250 (69.7774) lr 1.9980e-03 eta 10:17:25 +epoch [3/50] batch [370/500] time 1.574 (1.568) data 0.000 (0.003) loss 1.1377 (1.2334) acc 65.6250 (69.7720) lr 1.9980e-03 eta 10:17:21 +epoch [3/50] batch [375/500] time 1.557 (1.568) data 0.001 (0.003) loss 1.0742 (1.2304) acc 71.8750 (69.8333) lr 1.9980e-03 eta 10:17:12 +epoch [3/50] batch [380/500] time 1.555 (1.567) data 0.000 (0.003) loss 1.3584 (1.2329) acc 62.5000 (69.7697) lr 1.9980e-03 eta 10:17:03 +epoch [3/50] batch [385/500] time 1.540 (1.567) data 0.000 (0.003) loss 1.0459 (1.2316) acc 78.1250 (69.7646) lr 1.9980e-03 eta 10:16:52 +epoch [3/50] batch [390/500] time 1.551 (1.567) data 0.000 (0.003) loss 1.1270 (1.2315) acc 65.6250 (69.7276) lr 1.9980e-03 eta 10:16:46 +epoch [3/50] batch [395/500] time 1.567 (1.567) data 0.000 (0.003) loss 1.7861 (1.2344) acc 68.7500 (69.7389) lr 1.9980e-03 eta 10:16:37 +epoch [3/50] batch [400/500] time 1.543 (1.567) data 0.000 (0.003) loss 1.8174 (1.2361) acc 56.2500 (69.7109) lr 1.9980e-03 eta 10:16:24 +epoch [3/50] batch [405/500] time 1.545 (1.567) data 0.000 (0.003) loss 0.9321 (1.2343) acc 75.0000 (69.7299) lr 1.9980e-03 eta 10:16:15 +epoch [3/50] batch [410/500] time 1.565 (1.567) data 0.001 (0.003) loss 1.3125 (1.2341) acc 75.0000 (69.7332) lr 1.9980e-03 eta 10:16:06 +epoch [3/50] batch [415/500] time 1.564 (1.567) data 0.001 (0.003) loss 1.3633 (1.2309) acc 56.2500 (69.8042) lr 1.9980e-03 eta 10:15:59 +epoch [3/50] batch [420/500] time 1.565 (1.567) data 0.001 (0.003) loss 0.7520 (1.2326) acc 81.2500 (69.7470) lr 1.9980e-03 eta 10:15:50 +epoch [3/50] batch [425/500] time 1.553 (1.567) data 0.000 (0.003) loss 1.4951 (1.2344) acc 62.5000 (69.6912) lr 1.9980e-03 eta 10:15:46 +epoch [3/50] batch [430/500] time 1.537 (1.567) data 0.000 (0.003) loss 1.8086 (1.2370) acc 59.3750 (69.6366) lr 1.9980e-03 eta 10:15:35 +epoch [3/50] batch [435/500] time 1.550 (1.567) data 0.000 (0.003) loss 1.6162 (1.2400) acc 71.8750 (69.6049) lr 1.9980e-03 eta 10:15:25 +epoch [3/50] batch [440/500] time 1.565 (1.567) data 0.000 (0.003) loss 0.7725 (1.2396) acc 78.1250 (69.5739) lr 1.9980e-03 eta 10:15:14 +epoch [3/50] batch [445/500] time 1.568 (1.567) data 0.000 (0.003) loss 1.2998 (1.2393) acc 59.3750 (69.5154) lr 1.9980e-03 eta 10:15:05 +epoch [3/50] batch [450/500] time 1.544 (1.567) data 0.000 (0.003) loss 1.7070 (1.2427) acc 65.6250 (69.4583) lr 1.9980e-03 eta 10:14:54 +epoch [3/50] batch [455/500] time 1.567 (1.567) data 0.000 (0.003) loss 0.5923 (1.2406) acc 84.3750 (69.5055) lr 1.9980e-03 eta 10:14:45 +epoch [3/50] batch [460/500] time 1.561 (1.567) data 0.000 (0.003) loss 1.3623 (1.2407) acc 71.8750 (69.5041) lr 1.9980e-03 eta 10:14:38 +epoch [3/50] batch [465/500] time 1.565 (1.566) data 0.000 (0.003) loss 0.9004 (1.2406) acc 81.2500 (69.5296) lr 1.9980e-03 eta 10:14:27 +epoch [3/50] batch [470/500] time 1.527 (1.567) data 0.000 (0.002) loss 0.8750 (1.2387) acc 75.0000 (69.5545) lr 1.9980e-03 eta 10:14:20 +epoch [3/50] batch [475/500] time 1.560 (1.566) data 0.000 (0.002) loss 1.7803 (1.2396) acc 56.2500 (69.5789) lr 1.9980e-03 eta 10:14:11 +epoch [3/50] batch [480/500] time 1.550 (1.566) data 0.000 (0.002) loss 1.2930 (1.2380) acc 68.7500 (69.6224) lr 1.9980e-03 eta 10:14:01 +epoch [3/50] batch [485/500] time 1.571 (1.566) data 0.001 (0.002) loss 1.5693 (1.2375) acc 65.6250 (69.6521) lr 1.9980e-03 eta 10:13:54 +epoch [3/50] batch [490/500] time 1.569 (1.566) data 0.000 (0.002) loss 1.1201 (1.2382) acc 68.7500 (69.6429) lr 1.9980e-03 eta 10:13:44 +epoch [3/50] batch [495/500] time 1.543 (1.566) data 0.000 (0.002) loss 1.4541 (1.2395) acc 62.5000 (69.6275) lr 1.9980e-03 eta 10:13:33 +epoch [3/50] batch [500/500] time 1.536 (1.566) data 0.000 (0.002) loss 1.4707 (1.2421) acc 78.1250 (69.6188) lr 1.9921e-03 eta 10:13:22 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,594 +* accuracy: 77.2% +* error: 22.8% +* macro_f1: 76.5% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [4/50] batch [5/500] time 1.551 (1.688) data 0.000 (0.178) loss 1.4854 (1.4664) acc 71.8750 (63.1250) lr 1.9921e-03 eta 11:00:51 +epoch [4/50] batch [10/500] time 1.536 (1.626) data 0.000 (0.089) loss 1.9824 (1.4054) acc 62.5000 (69.0625) lr 1.9921e-03 eta 10:36:26 +epoch [4/50] batch [15/500] time 1.544 (1.605) data 0.000 (0.060) loss 1.4111 (1.3109) acc 62.5000 (71.0417) lr 1.9921e-03 eta 10:28:11 +epoch [4/50] batch [20/500] time 1.579 (1.598) data 0.000 (0.045) loss 0.9487 (1.3033) acc 75.0000 (72.1875) lr 1.9921e-03 eta 10:25:15 +epoch [4/50] batch [25/500] time 1.551 (1.590) data 0.000 (0.036) loss 1.4209 (1.2379) acc 65.6250 (72.3750) lr 1.9921e-03 eta 10:22:09 +epoch [4/50] batch [30/500] time 1.576 (1.586) data 0.000 (0.030) loss 1.2549 (1.2384) acc 78.1250 (71.9792) lr 1.9921e-03 eta 10:20:18 +epoch [4/50] batch [35/500] time 1.562 (1.582) data 0.000 (0.026) loss 0.5991 (1.2099) acc 90.6250 (72.7679) lr 1.9921e-03 eta 10:18:41 +epoch [4/50] batch [40/500] time 1.571 (1.580) data 0.000 (0.023) loss 1.5186 (1.2139) acc 68.7500 (72.1094) lr 1.9921e-03 eta 10:17:37 +epoch [4/50] batch [45/500] time 1.584 (1.577) data 0.000 (0.020) loss 1.0625 (1.2405) acc 78.1250 (71.3194) lr 1.9921e-03 eta 10:16:39 +epoch [4/50] batch [50/500] time 1.555 (1.576) data 0.000 (0.018) loss 1.1904 (1.2415) acc 71.8750 (70.7500) lr 1.9921e-03 eta 10:16:04 +epoch [4/50] batch [55/500] time 1.562 (1.574) data 0.000 (0.017) loss 2.2988 (1.2680) acc 53.1250 (70.2273) lr 1.9921e-03 eta 10:15:07 +epoch [4/50] batch [60/500] time 1.542 (1.574) data 0.000 (0.015) loss 1.0059 (1.2726) acc 68.7500 (69.9479) lr 1.9921e-03 eta 10:15:06 +epoch [4/50] batch [65/500] time 1.557 (1.573) data 0.000 (0.014) loss 1.7998 (1.2636) acc 59.3750 (70.0962) lr 1.9921e-03 eta 10:14:20 +epoch [4/50] batch [70/500] time 1.557 (1.572) data 0.001 (0.013) loss 1.2725 (1.2744) acc 71.8750 (69.9107) lr 1.9921e-03 eta 10:13:48 +epoch [4/50] batch [75/500] time 1.565 (1.571) data 0.000 (0.012) loss 1.5010 (1.2858) acc 59.3750 (69.5417) lr 1.9921e-03 eta 10:13:20 +epoch [4/50] batch [80/500] time 1.563 (1.570) data 0.000 (0.012) loss 0.4321 (1.2884) acc 87.5000 (69.5312) lr 1.9921e-03 eta 10:12:58 +epoch [4/50] batch [85/500] time 1.586 (1.570) data 0.000 (0.011) loss 1.2236 (1.2861) acc 75.0000 (69.6691) lr 1.9921e-03 eta 10:12:40 +epoch [4/50] batch [90/500] time 1.563 (1.569) data 0.000 (0.010) loss 1.2373 (1.2912) acc 75.0000 (69.7569) lr 1.9921e-03 eta 10:12:13 +epoch [4/50] batch [95/500] time 1.557 (1.569) data 0.000 (0.010) loss 1.2510 (1.3018) acc 71.8750 (69.4079) lr 1.9921e-03 eta 10:11:56 +epoch [4/50] batch [100/500] time 1.541 (1.568) data 0.000 (0.009) loss 1.3379 (1.2919) acc 68.7500 (69.6562) lr 1.9921e-03 eta 10:11:32 +epoch [4/50] batch [105/500] time 1.553 (1.568) data 0.001 (0.009) loss 1.3486 (1.2944) acc 62.5000 (69.5536) lr 1.9921e-03 eta 10:11:34 +epoch [4/50] batch [110/500] time 1.554 (1.568) data 0.000 (0.008) loss 1.0234 (1.2869) acc 75.0000 (69.8580) lr 1.9921e-03 eta 10:11:14 +epoch [4/50] batch [115/500] time 1.560 (1.567) data 0.000 (0.008) loss 0.7393 (1.2789) acc 81.2500 (70.1630) lr 1.9921e-03 eta 10:10:49 +epoch [4/50] batch [120/500] time 1.554 (1.567) data 0.000 (0.008) loss 1.6777 (1.2811) acc 59.3750 (69.8958) lr 1.9921e-03 eta 10:10:37 +epoch [4/50] batch [125/500] time 1.547 (1.567) data 0.000 (0.007) loss 1.5283 (1.2723) acc 68.7500 (70.0750) lr 1.9921e-03 eta 10:10:19 +epoch [4/50] batch [130/500] time 1.561 (1.566) data 0.000 (0.007) loss 1.3047 (1.2737) acc 59.3750 (70.0721) lr 1.9921e-03 eta 10:10:00 +epoch [4/50] batch [135/500] time 1.533 (1.566) data 0.001 (0.007) loss 0.7705 (1.2627) acc 81.2500 (70.2778) lr 1.9921e-03 eta 10:09:40 +epoch [4/50] batch [140/500] time 1.543 (1.565) data 0.000 (0.007) loss 0.9863 (1.2619) acc 71.8750 (70.2902) lr 1.9921e-03 eta 10:09:17 +epoch [4/50] batch [145/500] time 1.565 (1.564) data 0.000 (0.007) loss 1.6250 (1.2640) acc 62.5000 (70.1940) lr 1.9921e-03 eta 10:08:57 +epoch [4/50] batch [150/500] time 1.557 (1.564) data 0.000 (0.006) loss 1.6123 (1.2636) acc 62.5000 (69.9167) lr 1.9921e-03 eta 10:08:46 +epoch [4/50] batch [155/500] time 1.563 (1.564) data 0.000 (0.006) loss 1.1582 (1.2632) acc 84.3750 (70.0000) lr 1.9921e-03 eta 10:08:33 +epoch [4/50] batch [160/500] time 1.550 (1.563) data 0.001 (0.006) loss 1.4717 (1.2603) acc 68.7500 (69.8828) lr 1.9921e-03 eta 10:08:11 +epoch [4/50] batch [165/500] time 1.552 (1.563) data 0.000 (0.006) loss 0.9897 (1.2518) acc 81.2500 (70.1705) lr 1.9921e-03 eta 10:07:57 +epoch [4/50] batch [170/500] time 1.562 (1.563) data 0.000 (0.006) loss 1.1895 (1.2512) acc 68.7500 (70.2390) lr 1.9921e-03 eta 10:07:47 +epoch [4/50] batch [175/500] time 1.573 (1.563) data 0.000 (0.005) loss 1.2402 (1.2497) acc 65.6250 (70.1607) lr 1.9921e-03 eta 10:07:38 +epoch [4/50] batch [180/500] time 1.566 (1.563) data 0.000 (0.005) loss 1.3711 (1.2498) acc 59.3750 (70.1215) lr 1.9921e-03 eta 10:07:29 +epoch [4/50] batch [185/500] time 1.562 (1.563) data 0.000 (0.005) loss 0.4265 (1.2436) acc 87.5000 (70.2365) lr 1.9921e-03 eta 10:07:16 +epoch [4/50] batch [190/500] time 1.538 (1.563) data 0.000 (0.005) loss 0.8657 (1.2375) acc 68.7500 (70.3783) lr 1.9921e-03 eta 10:07:04 +epoch [4/50] batch [195/500] time 1.571 (1.563) data 0.000 (0.005) loss 1.5332 (1.2430) acc 62.5000 (70.3365) lr 1.9921e-03 eta 10:06:59 +epoch [4/50] batch [200/500] time 1.560 (1.563) data 0.001 (0.005) loss 0.8960 (1.2429) acc 81.2500 (70.3438) lr 1.9921e-03 eta 10:06:48 +epoch [4/50] batch [205/500] time 1.534 (1.563) data 0.000 (0.005) loss 1.0225 (1.2448) acc 71.8750 (70.2134) lr 1.9921e-03 eta 10:06:43 +epoch [4/50] batch [210/500] time 1.558 (1.563) data 0.000 (0.005) loss 1.3252 (1.2440) acc 71.8750 (70.1637) lr 1.9921e-03 eta 10:06:33 +epoch [4/50] batch [215/500] time 1.542 (1.562) data 0.000 (0.005) loss 1.5381 (1.2456) acc 62.5000 (70.1308) lr 1.9921e-03 eta 10:06:19 +epoch [4/50] batch [220/500] time 1.551 (1.562) data 0.000 (0.004) loss 1.2031 (1.2503) acc 62.5000 (69.9858) lr 1.9921e-03 eta 10:06:06 +epoch [4/50] batch [225/500] time 1.580 (1.562) data 0.000 (0.004) loss 0.7485 (1.2470) acc 78.1250 (70.0417) lr 1.9921e-03 eta 10:05:55 +epoch [4/50] batch [230/500] time 1.551 (1.562) data 0.000 (0.004) loss 1.1836 (1.2465) acc 68.7500 (70.0951) lr 1.9921e-03 eta 10:05:40 +epoch [4/50] batch [235/500] time 1.529 (1.561) data 0.001 (0.004) loss 0.6914 (1.2456) acc 81.2500 (70.0798) lr 1.9921e-03 eta 10:05:26 +epoch [4/50] batch [240/500] time 1.558 (1.561) data 0.000 (0.004) loss 0.7607 (1.2449) acc 81.2500 (70.0391) lr 1.9921e-03 eta 10:05:16 +epoch [4/50] batch [245/500] time 1.685 (1.562) data 0.000 (0.004) loss 1.5811 (1.2481) acc 71.8750 (69.9617) lr 1.9921e-03 eta 10:05:20 +epoch [4/50] batch [250/500] time 1.556 (1.562) data 0.000 (0.004) loss 0.7466 (1.2466) acc 71.8750 (70.0875) lr 1.9921e-03 eta 10:05:13 +epoch [4/50] batch [255/500] time 1.572 (1.562) data 0.000 (0.004) loss 1.1992 (1.2482) acc 75.0000 (70.0368) lr 1.9921e-03 eta 10:05:01 +epoch [4/50] batch [260/500] time 1.571 (1.562) data 0.000 (0.004) loss 0.7485 (1.2445) acc 75.0000 (70.1322) lr 1.9921e-03 eta 10:04:55 +epoch [4/50] batch [265/500] time 1.570 (1.562) data 0.000 (0.004) loss 1.2227 (1.2446) acc 68.7500 (70.0708) lr 1.9921e-03 eta 10:04:51 +epoch [4/50] batch [270/500] time 1.533 (1.562) data 0.001 (0.004) loss 1.2002 (1.2475) acc 68.7500 (70.0116) lr 1.9921e-03 eta 10:04:40 +epoch [4/50] batch [275/500] time 1.571 (1.562) data 0.000 (0.004) loss 1.7070 (1.2478) acc 65.6250 (69.9886) lr 1.9921e-03 eta 10:04:36 +epoch [4/50] batch [280/500] time 1.587 (1.562) data 0.000 (0.004) loss 1.3047 (1.2524) acc 59.3750 (69.8884) lr 1.9921e-03 eta 10:04:30 +epoch [4/50] batch [285/500] time 1.568 (1.562) data 0.000 (0.004) loss 0.7754 (1.2501) acc 90.6250 (69.9452) lr 1.9921e-03 eta 10:04:26 +epoch [4/50] batch [290/500] time 1.571 (1.562) data 0.000 (0.003) loss 1.4648 (1.2549) acc 71.8750 (69.8276) lr 1.9921e-03 eta 10:04:19 +epoch [4/50] batch [295/500] time 1.542 (1.562) data 0.000 (0.003) loss 1.4092 (1.2571) acc 71.8750 (69.8093) lr 1.9921e-03 eta 10:04:10 +epoch [4/50] batch [300/500] time 1.581 (1.562) data 0.000 (0.003) loss 0.9795 (1.2527) acc 81.2500 (69.9167) lr 1.9921e-03 eta 10:04:08 +epoch [4/50] batch [305/500] time 1.578 (1.563) data 0.000 (0.003) loss 1.3301 (1.2516) acc 71.8750 (69.9488) lr 1.9921e-03 eta 10:04:03 +epoch [4/50] batch [310/500] time 1.569 (1.563) data 0.000 (0.003) loss 0.9902 (1.2508) acc 75.0000 (69.9395) lr 1.9921e-03 eta 10:04:00 +epoch [4/50] batch [315/500] time 1.580 (1.563) data 0.000 (0.003) loss 1.4834 (1.2507) acc 65.6250 (69.9008) lr 1.9921e-03 eta 10:03:53 +epoch [4/50] batch [320/500] time 1.563 (1.563) data 0.000 (0.003) loss 1.9580 (1.2538) acc 50.0000 (69.7461) lr 1.9921e-03 eta 10:03:40 +epoch [4/50] batch [325/500] time 1.548 (1.563) data 0.000 (0.003) loss 1.6006 (1.2550) acc 68.7500 (69.7308) lr 1.9921e-03 eta 10:03:31 +epoch [4/50] batch [330/500] time 1.573 (1.563) data 0.001 (0.003) loss 1.9521 (1.2583) acc 59.3750 (69.6970) lr 1.9921e-03 eta 10:03:25 +epoch [4/50] batch [335/500] time 1.578 (1.563) data 0.000 (0.003) loss 1.0889 (1.2595) acc 65.6250 (69.5896) lr 1.9921e-03 eta 10:03:20 +epoch [4/50] batch [340/500] time 1.577 (1.563) data 0.000 (0.003) loss 1.0762 (1.2574) acc 71.8750 (69.6415) lr 1.9921e-03 eta 10:03:13 +epoch [4/50] batch [345/500] time 1.555 (1.563) data 0.000 (0.003) loss 1.2998 (1.2561) acc 62.5000 (69.6739) lr 1.9921e-03 eta 10:03:13 +epoch [4/50] batch [350/500] time 1.544 (1.563) data 0.000 (0.003) loss 1.3037 (1.2575) acc 68.7500 (69.6607) lr 1.9921e-03 eta 10:03:04 +epoch [4/50] batch [355/500] time 1.569 (1.563) data 0.000 (0.003) loss 0.7642 (1.2559) acc 75.0000 (69.7007) lr 1.9921e-03 eta 10:02:58 +epoch [4/50] batch [360/500] time 1.578 (1.563) data 0.000 (0.003) loss 1.5850 (1.2542) acc 56.2500 (69.7049) lr 1.9921e-03 eta 10:02:54 +epoch [4/50] batch [365/500] time 1.563 (1.563) data 0.000 (0.003) loss 0.9937 (1.2577) acc 68.7500 (69.6404) lr 1.9921e-03 eta 10:02:49 +epoch [4/50] batch [370/500] time 1.547 (1.563) data 0.000 (0.003) loss 1.3604 (1.2569) acc 78.1250 (69.6706) lr 1.9921e-03 eta 10:02:40 +epoch [4/50] batch [375/500] time 1.557 (1.563) data 0.000 (0.003) loss 1.3193 (1.2552) acc 62.5000 (69.6417) lr 1.9921e-03 eta 10:02:32 +epoch [4/50] batch [380/500] time 1.569 (1.563) data 0.000 (0.003) loss 1.3369 (1.2575) acc 75.0000 (69.5970) lr 1.9921e-03 eta 10:02:22 +epoch [4/50] batch [385/500] time 1.533 (1.563) data 0.000 (0.003) loss 0.9590 (1.2563) acc 84.3750 (69.6429) lr 1.9921e-03 eta 10:02:09 +epoch [4/50] batch [390/500] time 1.533 (1.563) data 0.000 (0.003) loss 1.0547 (1.2590) acc 71.8750 (69.5753) lr 1.9921e-03 eta 10:02:02 +epoch [4/50] batch [395/500] time 1.534 (1.563) data 0.000 (0.003) loss 0.9268 (1.2602) acc 75.0000 (69.5570) lr 1.9921e-03 eta 10:01:51 +epoch [4/50] batch [400/500] time 1.563 (1.563) data 0.000 (0.003) loss 1.1670 (1.2616) acc 71.8750 (69.5234) lr 1.9921e-03 eta 10:01:42 +epoch [4/50] batch [405/500] time 1.543 (1.563) data 0.000 (0.003) loss 0.8950 (1.2584) acc 78.1250 (69.6065) lr 1.9921e-03 eta 10:01:31 +epoch [4/50] batch [410/500] time 1.566 (1.563) data 0.000 (0.003) loss 1.0928 (1.2593) acc 68.7500 (69.5351) lr 1.9921e-03 eta 10:01:22 +epoch [4/50] batch [415/500] time 1.566 (1.563) data 0.001 (0.003) loss 1.2178 (1.2572) acc 71.8750 (69.5256) lr 1.9921e-03 eta 10:01:15 +epoch [4/50] batch [420/500] time 1.557 (1.563) data 0.000 (0.003) loss 0.8784 (1.2548) acc 81.2500 (69.5461) lr 1.9921e-03 eta 10:01:06 +epoch [4/50] batch [425/500] time 1.562 (1.563) data 0.000 (0.002) loss 1.3984 (1.2535) acc 65.6250 (69.5588) lr 1.9921e-03 eta 10:00:59 +epoch [4/50] batch [430/500] time 1.562 (1.563) data 0.000 (0.002) loss 1.5410 (1.2536) acc 59.3750 (69.5567) lr 1.9921e-03 eta 10:00:51 +epoch [4/50] batch [435/500] time 1.554 (1.563) data 0.001 (0.002) loss 1.7158 (1.2558) acc 62.5000 (69.5115) lr 1.9921e-03 eta 10:00:39 +epoch [4/50] batch [440/500] time 1.561 (1.563) data 0.000 (0.002) loss 0.8828 (1.2529) acc 78.1250 (69.5170) lr 1.9921e-03 eta 10:00:32 +epoch [4/50] batch [445/500] time 1.566 (1.563) data 0.000 (0.002) loss 1.6270 (1.2529) acc 65.6250 (69.4874) lr 1.9921e-03 eta 10:00:24 +epoch [4/50] batch [450/500] time 1.584 (1.563) data 0.000 (0.002) loss 1.5215 (1.2522) acc 65.6250 (69.5347) lr 1.9921e-03 eta 10:00:18 +epoch [4/50] batch [455/500] time 1.553 (1.563) data 0.001 (0.002) loss 1.4844 (1.2540) acc 65.6250 (69.5536) lr 1.9921e-03 eta 10:00:08 +epoch [4/50] batch [460/500] time 1.535 (1.562) data 0.000 (0.002) loss 1.8447 (1.2557) acc 59.3750 (69.5177) lr 1.9921e-03 eta 9:59:58 +epoch [4/50] batch [465/500] time 1.549 (1.562) data 0.000 (0.002) loss 1.7285 (1.2559) acc 59.3750 (69.4892) lr 1.9921e-03 eta 9:59:49 +epoch [4/50] batch [470/500] time 1.556 (1.562) data 0.000 (0.002) loss 1.7480 (1.2564) acc 62.5000 (69.4814) lr 1.9921e-03 eta 9:59:40 +epoch [4/50] batch [475/500] time 1.573 (1.562) data 0.000 (0.002) loss 1.0996 (1.2597) acc 71.8750 (69.4737) lr 1.9921e-03 eta 9:59:32 +epoch [4/50] batch [480/500] time 1.543 (1.562) data 0.000 (0.002) loss 0.7256 (1.2569) acc 87.5000 (69.5638) lr 1.9921e-03 eta 9:59:23 +epoch [4/50] batch [485/500] time 1.540 (1.562) data 0.001 (0.002) loss 1.5234 (1.2582) acc 53.1250 (69.4910) lr 1.9921e-03 eta 9:59:14 +epoch [4/50] batch [490/500] time 1.583 (1.562) data 0.000 (0.002) loss 1.1230 (1.2597) acc 68.7500 (69.4515) lr 1.9921e-03 eta 9:59:10 +epoch [4/50] batch [495/500] time 1.549 (1.562) data 0.000 (0.002) loss 1.0020 (1.2562) acc 65.6250 (69.5013) lr 1.9921e-03 eta 9:59:04 +epoch [4/50] batch [500/500] time 1.547 (1.562) data 0.000 (0.002) loss 1.8877 (1.2575) acc 62.5000 (69.5062) lr 1.9823e-03 eta 9:58:55 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,735 +* accuracy: 77.5% +* error: 22.5% +* macro_f1: 76.9% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [5/50] batch [5/500] time 1.551 (1.681) data 0.000 (0.176) loss 1.0010 (1.0877) acc 75.0000 (73.1250) lr 1.9823e-03 eta 10:44:04 +epoch [5/50] batch [10/500] time 1.567 (1.621) data 0.001 (0.088) loss 1.3730 (1.1820) acc 65.6250 (71.2500) lr 1.9823e-03 eta 10:21:01 +epoch [5/50] batch [15/500] time 1.590 (1.601) data 0.001 (0.059) loss 1.5391 (1.1960) acc 59.3750 (70.2083) lr 1.9823e-03 eta 10:13:24 +epoch [5/50] batch [20/500] time 1.563 (1.593) data 0.000 (0.044) loss 1.1064 (1.1754) acc 65.6250 (69.0625) lr 1.9823e-03 eta 10:09:57 +epoch [5/50] batch [25/500] time 1.556 (1.588) data 0.001 (0.036) loss 1.2754 (1.1703) acc 75.0000 (69.2500) lr 1.9823e-03 eta 10:08:11 +epoch [5/50] batch [30/500] time 1.573 (1.586) data 0.000 (0.030) loss 1.3799 (1.1621) acc 68.7500 (69.8958) lr 1.9823e-03 eta 10:07:15 +epoch [5/50] batch [35/500] time 1.561 (1.587) data 0.001 (0.026) loss 0.9971 (1.2037) acc 75.0000 (69.2857) lr 1.9823e-03 eta 10:07:34 +epoch [5/50] batch [40/500] time 1.586 (1.585) data 0.001 (0.022) loss 0.8579 (1.2088) acc 78.1250 (69.3750) lr 1.9823e-03 eta 10:06:40 +epoch [5/50] batch [45/500] time 1.556 (1.583) data 0.001 (0.020) loss 1.2354 (1.2125) acc 75.0000 (69.6528) lr 1.9823e-03 eta 10:05:31 +epoch [5/50] batch [50/500] time 1.567 (1.580) data 0.001 (0.018) loss 0.9668 (1.1911) acc 81.2500 (70.8125) lr 1.9823e-03 eta 10:04:28 +epoch [5/50] batch [55/500] time 1.553 (1.579) data 0.001 (0.016) loss 1.0176 (1.1832) acc 75.0000 (71.1932) lr 1.9823e-03 eta 10:03:38 +epoch [5/50] batch [60/500] time 1.567 (1.578) data 0.001 (0.015) loss 1.1230 (1.1713) acc 65.6250 (71.3021) lr 1.9823e-03 eta 10:03:18 +epoch [5/50] batch [65/500] time 1.559 (1.577) data 0.001 (0.014) loss 1.5273 (1.1772) acc 62.5000 (71.2019) lr 1.9823e-03 eta 10:02:46 +epoch [5/50] batch [70/500] time 1.581 (1.576) data 0.001 (0.013) loss 0.8960 (1.1949) acc 78.1250 (71.0714) lr 1.9823e-03 eta 10:02:26 +epoch [5/50] batch [75/500] time 1.553 (1.575) data 0.000 (0.012) loss 0.9331 (1.1868) acc 68.7500 (71.0833) lr 1.9823e-03 eta 10:01:56 +epoch [5/50] batch [80/500] time 1.553 (1.574) data 0.000 (0.011) loss 1.4219 (1.1849) acc 59.3750 (71.2109) lr 1.9823e-03 eta 10:01:23 +epoch [5/50] batch [85/500] time 1.550 (1.573) data 0.000 (0.011) loss 0.6548 (1.1794) acc 81.2500 (71.0662) lr 1.9823e-03 eta 10:00:54 +epoch [5/50] batch [90/500] time 1.588 (1.573) data 0.000 (0.010) loss 1.1406 (1.1973) acc 71.8750 (70.7986) lr 1.9823e-03 eta 10:00:36 +epoch [5/50] batch [95/500] time 1.541 (1.572) data 0.000 (0.010) loss 0.9512 (1.1940) acc 68.7500 (70.7566) lr 1.9823e-03 eta 10:00:13 +epoch [5/50] batch [100/500] time 1.567 (1.572) data 0.000 (0.009) loss 1.3926 (1.2064) acc 68.7500 (70.5938) lr 1.9823e-03 eta 10:00:00 +epoch [5/50] batch [105/500] time 1.582 (1.572) data 0.000 (0.009) loss 1.7236 (1.2110) acc 65.6250 (70.4464) lr 1.9823e-03 eta 9:59:51 +epoch [5/50] batch [110/500] time 1.556 (1.572) data 0.000 (0.008) loss 1.5322 (1.2205) acc 65.6250 (70.3125) lr 1.9823e-03 eta 9:59:38 +epoch [5/50] batch [115/500] time 1.569 (1.571) data 0.000 (0.008) loss 1.1963 (1.2167) acc 65.6250 (70.3261) lr 1.9823e-03 eta 9:59:12 +epoch [5/50] batch [120/500] time 1.557 (1.571) data 0.000 (0.008) loss 0.8960 (1.2204) acc 81.2500 (70.1823) lr 1.9823e-03 eta 9:58:57 +epoch [5/50] batch [125/500] time 1.565 (1.570) data 0.000 (0.008) loss 1.7852 (1.2206) acc 53.1250 (70.1250) lr 1.9823e-03 eta 9:58:44 +epoch [5/50] batch [130/500] time 1.578 (1.570) data 0.000 (0.007) loss 1.0840 (1.2239) acc 71.8750 (70.1442) lr 1.9823e-03 eta 9:58:32 +epoch [5/50] batch [135/500] time 1.568 (1.571) data 0.000 (0.007) loss 1.3672 (1.2217) acc 71.8750 (70.2083) lr 1.9823e-03 eta 9:58:41 +epoch [5/50] batch [140/500] time 1.542 (1.571) data 0.000 (0.007) loss 1.3037 (1.2144) acc 59.3750 (70.2902) lr 1.9823e-03 eta 9:58:27 +epoch [5/50] batch [145/500] time 1.587 (1.570) data 0.000 (0.007) loss 1.0342 (1.2074) acc 78.1250 (70.3879) lr 1.9823e-03 eta 9:58:13 +epoch [5/50] batch [150/500] time 1.568 (1.570) data 0.000 (0.006) loss 1.0010 (1.2076) acc 71.8750 (70.3125) lr 1.9823e-03 eta 9:57:58 +epoch [5/50] batch [155/500] time 1.563 (1.570) data 0.000 (0.006) loss 1.2734 (1.2099) acc 62.5000 (70.1815) lr 1.9823e-03 eta 9:57:54 +epoch [5/50] batch [160/500] time 1.616 (1.570) data 0.000 (0.006) loss 0.8662 (1.2055) acc 84.3750 (70.3125) lr 1.9823e-03 eta 9:57:49 +epoch [5/50] batch [165/500] time 1.555 (1.571) data 0.000 (0.006) loss 0.9346 (1.2044) acc 71.8750 (70.2083) lr 1.9823e-03 eta 9:57:42 +epoch [5/50] batch [170/500] time 1.582 (1.571) data 0.000 (0.006) loss 1.0439 (1.2024) acc 81.2500 (70.2757) lr 1.9823e-03 eta 9:57:36 +epoch [5/50] batch [175/500] time 1.545 (1.570) data 0.000 (0.005) loss 1.3516 (1.2051) acc 62.5000 (70.1607) lr 1.9823e-03 eta 9:57:22 +epoch [5/50] batch [180/500] time 1.553 (1.571) data 0.000 (0.005) loss 0.8096 (1.1951) acc 78.1250 (70.3993) lr 1.9823e-03 eta 9:57:26 +epoch [5/50] batch [185/500] time 1.546 (1.570) data 0.000 (0.005) loss 1.0596 (1.2006) acc 68.7500 (70.2534) lr 1.9823e-03 eta 9:57:08 +epoch [5/50] batch [190/500] time 1.557 (1.570) data 0.000 (0.005) loss 0.6582 (1.1927) acc 78.1250 (70.3783) lr 1.9823e-03 eta 9:56:51 +epoch [5/50] batch [195/500] time 1.566 (1.570) data 0.000 (0.005) loss 1.2197 (1.1925) acc 71.8750 (70.4006) lr 1.9823e-03 eta 9:56:43 +epoch [5/50] batch [200/500] time 1.579 (1.570) data 0.000 (0.005) loss 1.1699 (1.1967) acc 71.8750 (70.4062) lr 1.9823e-03 eta 9:56:29 +epoch [5/50] batch [205/500] time 1.571 (1.570) data 0.000 (0.005) loss 1.2510 (1.1994) acc 71.8750 (70.3354) lr 1.9823e-03 eta 9:56:20 +epoch [5/50] batch [210/500] time 1.571 (1.570) data 0.000 (0.005) loss 1.2100 (1.1984) acc 78.1250 (70.4613) lr 1.9823e-03 eta 9:56:09 +epoch [5/50] batch [215/500] time 1.558 (1.569) data 0.000 (0.005) loss 1.3760 (1.2022) acc 62.5000 (70.3488) lr 1.9823e-03 eta 9:55:54 +epoch [5/50] batch [220/500] time 1.578 (1.569) data 0.000 (0.004) loss 1.0244 (1.1984) acc 71.8750 (70.3551) lr 1.9823e-03 eta 9:55:45 +epoch [5/50] batch [225/500] time 1.561 (1.569) data 0.000 (0.004) loss 0.9741 (1.2029) acc 68.7500 (70.1389) lr 1.9823e-03 eta 9:55:34 +epoch [5/50] batch [230/500] time 1.575 (1.569) data 0.000 (0.004) loss 0.7915 (1.1984) acc 81.2500 (70.2038) lr 1.9823e-03 eta 9:55:28 +epoch [5/50] batch [235/500] time 1.552 (1.569) data 0.000 (0.004) loss 1.1729 (1.2004) acc 71.8750 (70.1330) lr 1.9823e-03 eta 9:55:21 +epoch [5/50] batch [240/500] time 1.562 (1.569) data 0.000 (0.004) loss 1.9922 (1.2053) acc 59.3750 (70.0521) lr 1.9823e-03 eta 9:55:06 +epoch [5/50] batch [245/500] time 1.555 (1.569) data 0.000 (0.004) loss 0.7271 (1.2032) acc 75.0000 (70.1148) lr 1.9823e-03 eta 9:54:54 +epoch [5/50] batch [250/500] time 1.555 (1.568) data 0.000 (0.004) loss 1.1826 (1.2047) acc 78.1250 (70.1000) lr 1.9823e-03 eta 9:54:39 +epoch [5/50] batch [255/500] time 1.530 (1.568) data 0.000 (0.004) loss 0.8115 (1.2035) acc 78.1250 (70.1348) lr 1.9823e-03 eta 9:54:23 +epoch [5/50] batch [260/500] time 1.552 (1.568) data 0.000 (0.004) loss 1.4453 (1.2031) acc 56.2500 (70.0721) lr 1.9823e-03 eta 9:54:10 +epoch [5/50] batch [265/500] time 1.572 (1.568) data 0.000 (0.004) loss 1.3975 (1.2001) acc 59.3750 (70.1061) lr 1.9823e-03 eta 9:54:04 +epoch [5/50] batch [270/500] time 1.567 (1.568) data 0.001 (0.004) loss 1.2773 (1.2050) acc 71.8750 (70.1157) lr 1.9823e-03 eta 9:53:57 +epoch [5/50] batch [275/500] time 1.558 (1.568) data 0.001 (0.004) loss 1.0312 (1.2068) acc 75.0000 (70.1932) lr 1.9823e-03 eta 9:53:46 +epoch [5/50] batch [280/500] time 1.558 (1.568) data 0.000 (0.004) loss 1.3037 (1.2089) acc 68.7500 (70.0893) lr 1.9823e-03 eta 9:53:48 +epoch [5/50] batch [285/500] time 1.546 (1.568) data 0.000 (0.004) loss 1.6865 (1.2087) acc 62.5000 (70.1425) lr 1.9823e-03 eta 9:53:39 +epoch [5/50] batch [290/500] time 1.567 (1.568) data 0.000 (0.003) loss 0.9023 (1.2038) acc 75.0000 (70.2478) lr 1.9823e-03 eta 9:53:30 +epoch [5/50] batch [295/500] time 1.571 (1.568) data 0.000 (0.003) loss 1.4609 (1.2048) acc 62.5000 (70.2225) lr 1.9823e-03 eta 9:53:19 +epoch [5/50] batch [300/500] time 1.558 (1.568) data 0.000 (0.003) loss 1.5303 (1.2054) acc 62.5000 (70.2188) lr 1.9823e-03 eta 9:53:09 +epoch [5/50] batch [305/500] time 1.549 (1.568) data 0.000 (0.003) loss 1.1875 (1.2048) acc 65.6250 (70.2254) lr 1.9823e-03 eta 9:52:57 +epoch [5/50] batch [310/500] time 1.608 (1.568) data 0.000 (0.003) loss 1.2314 (1.2056) acc 71.8750 (70.2117) lr 1.9823e-03 eta 9:52:55 +epoch [5/50] batch [315/500] time 1.561 (1.568) data 0.000 (0.003) loss 0.7339 (1.2074) acc 78.1250 (70.1984) lr 1.9823e-03 eta 9:52:47 +epoch [5/50] batch [320/500] time 1.641 (1.568) data 0.000 (0.003) loss 1.1846 (1.2042) acc 53.1250 (70.2441) lr 1.9823e-03 eta 9:52:44 +epoch [5/50] batch [325/500] time 1.584 (1.568) data 0.000 (0.003) loss 1.8145 (1.2046) acc 62.5000 (70.2788) lr 1.9823e-03 eta 9:52:35 +epoch [5/50] batch [330/500] time 1.564 (1.568) data 0.000 (0.003) loss 0.9209 (1.2039) acc 71.8750 (70.2367) lr 1.9823e-03 eta 9:52:26 +epoch [5/50] batch [335/500] time 1.557 (1.568) data 0.000 (0.003) loss 1.5352 (1.2077) acc 65.6250 (70.2239) lr 1.9823e-03 eta 9:52:17 +epoch [5/50] batch [340/500] time 1.542 (1.568) data 0.000 (0.003) loss 1.5225 (1.2088) acc 59.3750 (70.2114) lr 1.9823e-03 eta 9:52:05 +epoch [5/50] batch [345/500] time 1.563 (1.568) data 0.001 (0.003) loss 1.3096 (1.2111) acc 62.5000 (70.1449) lr 1.9823e-03 eta 9:51:52 +epoch [5/50] batch [350/500] time 1.583 (1.568) data 0.000 (0.003) loss 1.1318 (1.2105) acc 75.0000 (70.1875) lr 1.9823e-03 eta 9:51:44 +epoch [5/50] batch [355/500] time 1.561 (1.568) data 0.000 (0.003) loss 1.3594 (1.2107) acc 68.7500 (70.2113) lr 1.9823e-03 eta 9:51:36 +epoch [5/50] batch [360/500] time 1.547 (1.567) data 0.000 (0.003) loss 1.2178 (1.2097) acc 68.7500 (70.1736) lr 1.9823e-03 eta 9:51:20 +epoch [5/50] batch [365/500] time 1.577 (1.567) data 0.000 (0.003) loss 1.7188 (1.2124) acc 50.0000 (70.0942) lr 1.9823e-03 eta 9:51:11 +epoch [5/50] batch [370/500] time 1.546 (1.567) data 0.000 (0.003) loss 1.3115 (1.2143) acc 71.8750 (70.0253) lr 1.9823e-03 eta 9:50:57 +epoch [5/50] batch [375/500] time 1.555 (1.567) data 0.000 (0.003) loss 1.2236 (1.2164) acc 78.1250 (70.0333) lr 1.9823e-03 eta 9:50:48 +epoch [5/50] batch [380/500] time 1.570 (1.567) data 0.000 (0.003) loss 1.1631 (1.2152) acc 75.0000 (70.0493) lr 1.9823e-03 eta 9:50:36 +epoch [5/50] batch [385/500] time 1.554 (1.567) data 0.000 (0.003) loss 1.4688 (1.2162) acc 68.7500 (70.0000) lr 1.9823e-03 eta 9:50:28 +epoch [5/50] batch [390/500] time 1.567 (1.567) data 0.000 (0.003) loss 1.1084 (1.2133) acc 68.7500 (70.0160) lr 1.9823e-03 eta 9:50:20 +epoch [5/50] batch [395/500] time 1.574 (1.567) data 0.000 (0.003) loss 1.2852 (1.2104) acc 75.0000 (70.0712) lr 1.9823e-03 eta 9:50:12 +epoch [5/50] batch [400/500] time 1.569 (1.566) data 0.001 (0.003) loss 1.4170 (1.2170) acc 71.8750 (69.9609) lr 1.9823e-03 eta 9:50:00 +epoch [5/50] batch [405/500] time 1.547 (1.566) data 0.001 (0.003) loss 1.4238 (1.2173) acc 65.6250 (69.9383) lr 1.9823e-03 eta 9:49:52 +epoch [5/50] batch [410/500] time 1.563 (1.566) data 0.001 (0.003) loss 0.8584 (1.2185) acc 81.2500 (69.9009) lr 1.9823e-03 eta 9:49:44 +epoch [5/50] batch [415/500] time 1.538 (1.566) data 0.000 (0.003) loss 1.1221 (1.2180) acc 68.7500 (69.8870) lr 1.9823e-03 eta 9:49:33 +epoch [5/50] batch [420/500] time 1.545 (1.566) data 0.000 (0.003) loss 0.9976 (1.2175) acc 68.7500 (69.8884) lr 1.9823e-03 eta 9:49:27 +epoch [5/50] batch [425/500] time 1.556 (1.566) data 0.000 (0.003) loss 0.9551 (1.2185) acc 78.1250 (69.8603) lr 1.9823e-03 eta 9:49:19 +epoch [5/50] batch [430/500] time 1.562 (1.566) data 0.000 (0.002) loss 1.4551 (1.2194) acc 56.2500 (69.8474) lr 1.9823e-03 eta 9:49:06 +epoch [5/50] batch [435/500] time 1.572 (1.566) data 0.000 (0.002) loss 0.9526 (1.2174) acc 71.8750 (69.8707) lr 1.9823e-03 eta 9:48:59 +epoch [5/50] batch [440/500] time 1.552 (1.566) data 0.000 (0.002) loss 1.3916 (1.2171) acc 62.5000 (69.8722) lr 1.9823e-03 eta 9:48:50 +epoch [5/50] batch [445/500] time 1.566 (1.566) data 0.001 (0.002) loss 0.6177 (1.2179) acc 87.5000 (69.9228) lr 1.9823e-03 eta 9:48:43 +epoch [5/50] batch [450/500] time 1.560 (1.566) data 0.000 (0.002) loss 0.8750 (1.2154) acc 68.7500 (69.9583) lr 1.9823e-03 eta 9:48:35 +epoch [5/50] batch [455/500] time 1.574 (1.566) data 0.000 (0.002) loss 1.2910 (1.2153) acc 68.7500 (69.9725) lr 1.9823e-03 eta 9:48:27 +epoch [5/50] batch [460/500] time 1.564 (1.566) data 0.000 (0.002) loss 1.9346 (1.2191) acc 53.1250 (69.8505) lr 1.9823e-03 eta 9:48:20 +epoch [5/50] batch [465/500] time 1.558 (1.566) data 0.000 (0.002) loss 1.4932 (1.2189) acc 71.8750 (69.8253) lr 1.9823e-03 eta 9:48:16 +epoch [5/50] batch [470/500] time 1.560 (1.566) data 0.000 (0.002) loss 1.4326 (1.2173) acc 65.6250 (69.8537) lr 1.9823e-03 eta 9:48:07 +epoch [5/50] batch [475/500] time 1.562 (1.566) data 0.000 (0.002) loss 1.2256 (1.2175) acc 75.0000 (69.8684) lr 1.9823e-03 eta 9:48:00 +epoch [5/50] batch [480/500] time 1.561 (1.566) data 0.001 (0.002) loss 1.1875 (1.2172) acc 75.0000 (69.8763) lr 1.9823e-03 eta 9:47:51 +epoch [5/50] batch [485/500] time 1.556 (1.566) data 0.001 (0.002) loss 1.1514 (1.2186) acc 68.7500 (69.8582) lr 1.9823e-03 eta 9:47:43 +epoch [5/50] batch [490/500] time 1.551 (1.566) data 0.001 (0.002) loss 1.5000 (1.2175) acc 62.5000 (69.8852) lr 1.9823e-03 eta 9:47:36 +epoch [5/50] batch [495/500] time 1.559 (1.566) data 0.000 (0.002) loss 1.1895 (1.2171) acc 78.1250 (69.9242) lr 1.9823e-03 eta 9:47:27 +epoch [5/50] batch [500/500] time 1.564 (1.566) data 0.000 (0.002) loss 1.0947 (1.2176) acc 75.0000 (69.9688) lr 1.9686e-03 eta 9:47:16 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,697 +* accuracy: 77.4% +* error: 22.6% +* macro_f1: 76.7% +epoch [6/50] batch [5/500] time 1.557 (1.688) data 0.002 (0.176) loss 1.8281 (1.2459) acc 59.3750 (70.0000) lr 1.9686e-03 eta 10:32:58 +epoch [6/50] batch [10/500] time 1.567 (1.626) data 0.000 (0.088) loss 1.2871 (1.1916) acc 75.0000 (73.4375) lr 1.9686e-03 eta 10:09:21 +epoch [6/50] batch [15/500] time 1.570 (1.607) data 0.001 (0.059) loss 1.2324 (1.1583) acc 78.1250 (72.5000) lr 1.9686e-03 eta 10:02:09 +epoch [6/50] batch [20/500] time 1.545 (1.595) data 0.001 (0.044) loss 1.5693 (1.1527) acc 68.7500 (72.3438) lr 1.9686e-03 eta 9:57:45 +epoch [6/50] batch [25/500] time 1.577 (1.596) data 0.000 (0.036) loss 0.4331 (1.1209) acc 84.3750 (73.5000) lr 1.9686e-03 eta 9:57:47 +epoch [6/50] batch [30/500] time 1.553 (1.589) data 0.000 (0.030) loss 1.2061 (1.1113) acc 68.7500 (73.2292) lr 1.9686e-03 eta 9:54:56 +epoch [6/50] batch [35/500] time 1.563 (1.586) data 0.000 (0.026) loss 1.1143 (1.1018) acc 68.7500 (73.5714) lr 1.9686e-03 eta 9:53:52 +epoch [6/50] batch [40/500] time 1.549 (1.585) data 0.000 (0.022) loss 1.0439 (1.1211) acc 75.0000 (72.9688) lr 1.9686e-03 eta 9:53:09 +epoch [6/50] batch [45/500] time 1.552 (1.582) data 0.001 (0.020) loss 1.5391 (1.1262) acc 65.6250 (72.4306) lr 1.9686e-03 eta 9:51:57 +epoch [6/50] batch [50/500] time 1.576 (1.579) data 0.000 (0.018) loss 1.0947 (1.1372) acc 75.0000 (72.2500) lr 1.9686e-03 eta 9:50:59 +epoch [6/50] batch [55/500] time 1.563 (1.577) data 0.001 (0.016) loss 1.3379 (1.1523) acc 68.7500 (72.1591) lr 1.9686e-03 eta 9:49:56 +epoch [6/50] batch [60/500] time 1.582 (1.576) data 0.000 (0.015) loss 1.0762 (1.1540) acc 71.8750 (71.6146) lr 1.9686e-03 eta 9:49:28 +epoch [6/50] batch [65/500] time 1.544 (1.575) data 0.000 (0.014) loss 1.0361 (1.1594) acc 75.0000 (71.6346) lr 1.9686e-03 eta 9:48:47 +epoch [6/50] batch [70/500] time 1.553 (1.574) data 0.000 (0.013) loss 1.3203 (1.1534) acc 75.0000 (72.0089) lr 1.9686e-03 eta 9:48:27 +epoch [6/50] batch [75/500] time 1.566 (1.574) data 0.000 (0.012) loss 1.0879 (1.1565) acc 71.8750 (71.7917) lr 1.9686e-03 eta 9:48:20 +epoch [6/50] batch [80/500] time 1.580 (1.573) data 0.000 (0.011) loss 0.9722 (1.1620) acc 65.6250 (71.6016) lr 1.9686e-03 eta 9:47:54 +epoch [6/50] batch [85/500] time 1.542 (1.574) data 0.000 (0.011) loss 0.9673 (1.1599) acc 71.8750 (71.6544) lr 1.9686e-03 eta 9:48:03 +epoch [6/50] batch [90/500] time 1.571 (1.573) data 0.000 (0.010) loss 1.4521 (1.1624) acc 62.5000 (71.7014) lr 1.9686e-03 eta 9:47:41 +epoch [6/50] batch [95/500] time 1.542 (1.573) data 0.000 (0.010) loss 0.8809 (1.1584) acc 75.0000 (71.7105) lr 1.9686e-03 eta 9:47:12 +epoch [6/50] batch [100/500] time 1.568 (1.572) data 0.001 (0.009) loss 0.7290 (1.1685) acc 75.0000 (71.4688) lr 1.9686e-03 eta 9:47:01 +epoch [6/50] batch [105/500] time 1.565 (1.572) data 0.000 (0.009) loss 1.2158 (1.1656) acc 68.7500 (71.3988) lr 1.9686e-03 eta 9:46:43 +epoch [6/50] batch [110/500] time 1.537 (1.571) data 0.000 (0.008) loss 1.6680 (1.1606) acc 71.8750 (71.5909) lr 1.9686e-03 eta 9:46:24 +epoch [6/50] batch [115/500] time 1.543 (1.571) data 0.000 (0.008) loss 1.7314 (1.1581) acc 59.3750 (71.6304) lr 1.9686e-03 eta 9:45:56 +epoch [6/50] batch [120/500] time 1.553 (1.570) data 0.000 (0.008) loss 1.0605 (1.1586) acc 62.5000 (71.4323) lr 1.9686e-03 eta 9:45:38 +epoch [6/50] batch [125/500] time 1.663 (1.571) data 0.000 (0.007) loss 0.9990 (1.1596) acc 71.8750 (71.5500) lr 1.9686e-03 eta 9:45:51 +epoch [6/50] batch [130/500] time 1.540 (1.570) data 0.000 (0.007) loss 1.4756 (1.1589) acc 65.6250 (71.4183) lr 1.9686e-03 eta 9:45:31 +epoch [6/50] batch [135/500] time 1.555 (1.570) data 0.000 (0.007) loss 1.0273 (1.1620) acc 71.8750 (71.3194) lr 1.9686e-03 eta 9:45:06 +epoch [6/50] batch [140/500] time 1.571 (1.569) data 0.000 (0.007) loss 1.1377 (1.1572) acc 68.7500 (71.4286) lr 1.9686e-03 eta 9:44:49 +epoch [6/50] batch [145/500] time 1.558 (1.569) data 0.000 (0.007) loss 1.1777 (1.1577) acc 56.2500 (71.3578) lr 1.9686e-03 eta 9:44:39 +epoch [6/50] batch [150/500] time 1.563 (1.568) data 0.000 (0.006) loss 1.3125 (1.1705) acc 75.0000 (71.2083) lr 1.9686e-03 eta 9:44:15 +epoch [6/50] batch [155/500] time 1.547 (1.568) data 0.000 (0.006) loss 0.9741 (1.1727) acc 75.0000 (71.2298) lr 1.9686e-03 eta 9:43:56 +epoch [6/50] batch [160/500] time 1.559 (1.568) data 0.000 (0.006) loss 1.2080 (1.1769) acc 71.8750 (71.1719) lr 1.9686e-03 eta 9:43:44 +epoch [6/50] batch [165/500] time 1.557 (1.568) data 0.001 (0.006) loss 1.8232 (1.1788) acc 59.3750 (71.0417) lr 1.9686e-03 eta 9:43:30 +epoch [6/50] batch [170/500] time 1.565 (1.567) data 0.000 (0.006) loss 1.2539 (1.1847) acc 78.1250 (71.0110) lr 1.9686e-03 eta 9:43:18 +epoch [6/50] batch [175/500] time 1.591 (1.567) data 0.000 (0.005) loss 1.2324 (1.1863) acc 65.6250 (71.0179) lr 1.9686e-03 eta 9:43:04 +epoch [6/50] batch [180/500] time 1.568 (1.567) data 0.000 (0.005) loss 1.6201 (1.1966) acc 59.3750 (70.8854) lr 1.9686e-03 eta 9:42:57 +epoch [6/50] batch [185/500] time 1.558 (1.567) data 0.000 (0.005) loss 1.1748 (1.2042) acc 65.6250 (70.6757) lr 1.9686e-03 eta 9:42:46 +epoch [6/50] batch [190/500] time 1.569 (1.567) data 0.000 (0.005) loss 0.6348 (1.2003) acc 84.3750 (70.8059) lr 1.9686e-03 eta 9:42:38 +epoch [6/50] batch [195/500] time 1.559 (1.567) data 0.000 (0.005) loss 1.3623 (1.2018) acc 62.5000 (70.6571) lr 1.9686e-03 eta 9:42:31 +epoch [6/50] batch [200/500] time 1.553 (1.567) data 0.001 (0.005) loss 0.8955 (1.1979) acc 71.8750 (70.5781) lr 1.9686e-03 eta 9:42:27 +epoch [6/50] batch [205/500] time 1.555 (1.567) data 0.000 (0.005) loss 1.1729 (1.2006) acc 68.7500 (70.4726) lr 1.9686e-03 eta 9:42:14 +epoch [6/50] batch [210/500] time 1.580 (1.567) data 0.000 (0.005) loss 0.7197 (1.1999) acc 87.5000 (70.5506) lr 1.9686e-03 eta 9:42:08 +epoch [6/50] batch [215/500] time 1.589 (1.567) data 0.000 (0.005) loss 0.8008 (1.1992) acc 75.0000 (70.5087) lr 1.9686e-03 eta 9:42:04 +epoch [6/50] batch [220/500] time 1.575 (1.567) data 0.000 (0.004) loss 1.2529 (1.2004) acc 68.7500 (70.4830) lr 1.9686e-03 eta 9:41:48 +epoch [6/50] batch [225/500] time 1.550 (1.567) data 0.001 (0.004) loss 1.2041 (1.1989) acc 68.7500 (70.5139) lr 1.9686e-03 eta 9:41:50 +epoch [6/50] batch [230/500] time 1.576 (1.567) data 0.001 (0.004) loss 0.5713 (1.1947) acc 78.1250 (70.5707) lr 1.9686e-03 eta 9:41:44 +epoch [6/50] batch [235/500] time 1.558 (1.567) data 0.000 (0.004) loss 1.2441 (1.1908) acc 62.5000 (70.5984) lr 1.9686e-03 eta 9:41:39 +epoch [6/50] batch [240/500] time 1.560 (1.567) data 0.000 (0.004) loss 1.2539 (1.1898) acc 56.2500 (70.6250) lr 1.9686e-03 eta 9:41:30 +epoch [6/50] batch [245/500] time 1.555 (1.567) data 0.000 (0.004) loss 0.6279 (1.1888) acc 84.3750 (70.7015) lr 1.9686e-03 eta 9:41:22 +epoch [6/50] batch [250/500] time 1.557 (1.567) data 0.000 (0.004) loss 1.3262 (1.1876) acc 68.7500 (70.7250) lr 1.9686e-03 eta 9:41:16 +epoch [6/50] batch [255/500] time 1.552 (1.567) data 0.000 (0.004) loss 1.0420 (1.1879) acc 81.2500 (70.7353) lr 1.9686e-03 eta 9:41:04 +epoch [6/50] batch [260/500] time 1.566 (1.567) data 0.000 (0.004) loss 1.2842 (1.1905) acc 62.5000 (70.6611) lr 1.9686e-03 eta 9:40:58 +epoch [6/50] batch [265/500] time 1.559 (1.567) data 0.000 (0.004) loss 1.0518 (1.1943) acc 65.6250 (70.5425) lr 1.9686e-03 eta 9:40:49 +epoch [6/50] batch [270/500] time 1.529 (1.568) data 0.000 (0.004) loss 1.8232 (1.1968) acc 71.8750 (70.5556) lr 1.9686e-03 eta 9:40:47 +epoch [6/50] batch [275/500] time 1.572 (1.567) data 0.000 (0.004) loss 1.4824 (1.1992) acc 56.2500 (70.5000) lr 1.9686e-03 eta 9:40:37 +epoch [6/50] batch [280/500] time 1.560 (1.567) data 0.000 (0.004) loss 1.2783 (1.2011) acc 71.8750 (70.5134) lr 1.9686e-03 eta 9:40:29 +epoch [6/50] batch [285/500] time 1.563 (1.567) data 0.000 (0.004) loss 0.5815 (1.1957) acc 78.1250 (70.6140) lr 1.9686e-03 eta 9:40:17 +epoch [6/50] batch [290/500] time 1.591 (1.567) data 0.000 (0.003) loss 1.6221 (1.1990) acc 53.1250 (70.5603) lr 1.9686e-03 eta 9:40:11 +epoch [6/50] batch [295/500] time 1.550 (1.567) data 0.001 (0.003) loss 0.7642 (1.2013) acc 78.1250 (70.4767) lr 1.9686e-03 eta 9:39:57 +epoch [6/50] batch [300/500] time 1.555 (1.567) data 0.000 (0.003) loss 1.5137 (1.1984) acc 68.7500 (70.5833) lr 1.9686e-03 eta 9:39:44 +epoch [6/50] batch [305/500] time 1.553 (1.567) data 0.000 (0.003) loss 0.8574 (1.1957) acc 78.1250 (70.6352) lr 1.9686e-03 eta 9:39:32 +epoch [6/50] batch [310/500] time 1.535 (1.567) data 0.000 (0.003) loss 0.9521 (1.1930) acc 78.1250 (70.6956) lr 1.9686e-03 eta 9:39:22 +epoch [6/50] batch [315/500] time 1.559 (1.567) data 0.001 (0.003) loss 1.2051 (1.1937) acc 75.0000 (70.6944) lr 1.9686e-03 eta 9:39:13 +epoch [6/50] batch [320/500] time 1.557 (1.566) data 0.000 (0.003) loss 1.5342 (1.1952) acc 65.6250 (70.6348) lr 1.9686e-03 eta 9:39:02 +epoch [6/50] batch [325/500] time 1.583 (1.566) data 0.000 (0.003) loss 0.9585 (1.1975) acc 68.7500 (70.5769) lr 1.9686e-03 eta 9:38:56 +epoch [6/50] batch [330/500] time 1.550 (1.566) data 0.001 (0.003) loss 1.1240 (1.1975) acc 62.5000 (70.5682) lr 1.9686e-03 eta 9:38:45 +epoch [6/50] batch [335/500] time 1.574 (1.566) data 0.000 (0.003) loss 0.9370 (1.1963) acc 75.0000 (70.5690) lr 1.9686e-03 eta 9:38:34 +epoch [6/50] batch [340/500] time 1.554 (1.566) data 0.001 (0.003) loss 1.4531 (1.1969) acc 75.0000 (70.5699) lr 1.9686e-03 eta 9:38:22 +epoch [6/50] batch [345/500] time 1.559 (1.566) data 0.000 (0.003) loss 0.9253 (1.1963) acc 78.1250 (70.6341) lr 1.9686e-03 eta 9:38:10 +epoch [6/50] batch [350/500] time 1.562 (1.566) data 0.000 (0.003) loss 0.8413 (1.1975) acc 65.6250 (70.6429) lr 1.9686e-03 eta 9:37:58 +epoch [6/50] batch [355/500] time 1.577 (1.566) data 0.000 (0.003) loss 1.1992 (1.1934) acc 59.3750 (70.6690) lr 1.9686e-03 eta 9:37:48 +epoch [6/50] batch [360/500] time 1.563 (1.566) data 0.000 (0.003) loss 1.5605 (1.1980) acc 71.8750 (70.7031) lr 1.9686e-03 eta 9:37:40 +epoch [6/50] batch [365/500] time 1.545 (1.565) data 0.000 (0.003) loss 0.9805 (1.1989) acc 78.1250 (70.7192) lr 1.9686e-03 eta 9:37:29 +epoch [6/50] batch [370/500] time 1.539 (1.566) data 0.000 (0.003) loss 1.3242 (1.1992) acc 65.6250 (70.7601) lr 1.9686e-03 eta 9:37:25 +epoch [6/50] batch [375/500] time 1.558 (1.565) data 0.000 (0.003) loss 1.9551 (1.2024) acc 56.2500 (70.6750) lr 1.9686e-03 eta 9:37:14 +epoch [6/50] batch [380/500] time 1.553 (1.565) data 0.000 (0.003) loss 1.0566 (1.2016) acc 71.8750 (70.6743) lr 1.9686e-03 eta 9:37:02 +epoch [6/50] batch [385/500] time 1.595 (1.565) data 0.000 (0.003) loss 1.3223 (1.2020) acc 71.8750 (70.6899) lr 1.9686e-03 eta 9:36:56 +epoch [6/50] batch [390/500] time 1.584 (1.565) data 0.000 (0.003) loss 2.0117 (1.2051) acc 65.6250 (70.6731) lr 1.9686e-03 eta 9:36:50 +epoch [6/50] batch [395/500] time 1.560 (1.565) data 0.000 (0.003) loss 1.4551 (1.2061) acc 68.7500 (70.6566) lr 1.9686e-03 eta 9:36:42 +epoch [6/50] batch [400/500] time 1.561 (1.565) data 0.000 (0.003) loss 0.8291 (1.2050) acc 78.1250 (70.5859) lr 1.9686e-03 eta 9:36:31 +epoch [6/50] batch [405/500] time 1.573 (1.565) data 0.000 (0.003) loss 1.8779 (1.2063) acc 46.8750 (70.5015) lr 1.9686e-03 eta 9:36:25 +epoch [6/50] batch [410/500] time 1.553 (1.565) data 0.000 (0.003) loss 1.4502 (1.2055) acc 62.5000 (70.4878) lr 1.9686e-03 eta 9:36:16 +epoch [6/50] batch [415/500] time 1.553 (1.565) data 0.000 (0.003) loss 1.1035 (1.2072) acc 68.7500 (70.4066) lr 1.9686e-03 eta 9:36:12 +epoch [6/50] batch [420/500] time 1.552 (1.565) data 0.000 (0.002) loss 1.1240 (1.2051) acc 71.8750 (70.4613) lr 1.9686e-03 eta 9:36:03 +epoch [6/50] batch [425/500] time 1.579 (1.565) data 0.000 (0.002) loss 0.6382 (1.2062) acc 90.6250 (70.4559) lr 1.9686e-03 eta 9:35:56 +epoch [6/50] batch [430/500] time 1.578 (1.565) data 0.000 (0.002) loss 1.3662 (1.2063) acc 65.6250 (70.4433) lr 1.9686e-03 eta 9:35:48 +epoch [6/50] batch [435/500] time 1.550 (1.565) data 0.000 (0.002) loss 0.8423 (1.2028) acc 84.3750 (70.5532) lr 1.9686e-03 eta 9:35:42 +epoch [6/50] batch [440/500] time 1.536 (1.565) data 0.000 (0.002) loss 0.4758 (1.2024) acc 93.7500 (70.5611) lr 1.9686e-03 eta 9:35:31 +epoch [6/50] batch [445/500] time 1.559 (1.565) data 0.000 (0.002) loss 0.8774 (1.2031) acc 75.0000 (70.5267) lr 1.9686e-03 eta 9:35:23 +epoch [6/50] batch [450/500] time 1.537 (1.565) data 0.000 (0.002) loss 0.7856 (1.2005) acc 78.1250 (70.5833) lr 1.9686e-03 eta 9:35:10 +epoch [6/50] batch [455/500] time 1.560 (1.565) data 0.000 (0.002) loss 0.9824 (1.1960) acc 75.0000 (70.6868) lr 1.9686e-03 eta 9:35:02 +epoch [6/50] batch [460/500] time 1.559 (1.565) data 0.000 (0.002) loss 1.1875 (1.1993) acc 71.8750 (70.6793) lr 1.9686e-03 eta 9:34:53 +epoch [6/50] batch [465/500] time 1.560 (1.565) data 0.000 (0.002) loss 1.4033 (1.2004) acc 71.8750 (70.6922) lr 1.9686e-03 eta 9:34:41 +epoch [6/50] batch [470/500] time 1.599 (1.565) data 0.000 (0.002) loss 0.6172 (1.1989) acc 87.5000 (70.7048) lr 1.9686e-03 eta 9:34:36 +epoch [6/50] batch [475/500] time 1.558 (1.565) data 0.000 (0.002) loss 1.3066 (1.1990) acc 78.1250 (70.7303) lr 1.9686e-03 eta 9:34:26 +epoch [6/50] batch [480/500] time 1.538 (1.565) data 0.000 (0.002) loss 0.6997 (1.1967) acc 87.5000 (70.7943) lr 1.9686e-03 eta 9:34:14 +epoch [6/50] batch [485/500] time 1.556 (1.565) data 0.001 (0.002) loss 1.1553 (1.1961) acc 62.5000 (70.7990) lr 1.9686e-03 eta 9:34:04 +epoch [6/50] batch [490/500] time 1.551 (1.564) data 0.000 (0.002) loss 1.7998 (1.1961) acc 68.7500 (70.8099) lr 1.9686e-03 eta 9:33:54 +epoch [6/50] batch [495/500] time 1.551 (1.564) data 0.000 (0.002) loss 1.6709 (1.1961) acc 56.2500 (70.7955) lr 1.9686e-03 eta 9:33:44 +epoch [6/50] batch [500/500] time 1.570 (1.564) data 0.000 (0.002) loss 1.6064 (1.1961) acc 59.3750 (70.7625) lr 1.9511e-03 eta 9:33:36 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,799 +* accuracy: 77.6% +* error: 22.4% +* macro_f1: 76.9% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [7/50] batch [5/500] time 1.558 (1.737) data 0.001 (0.231) loss 2.4043 (1.4098) acc 43.7500 (65.6250) lr 1.9511e-03 eta 10:36:55 +epoch [7/50] batch [10/500] time 1.556 (1.644) data 0.000 (0.116) loss 1.1553 (1.2192) acc 75.0000 (70.3125) lr 1.9511e-03 eta 10:02:26 +epoch [7/50] batch [15/500] time 1.568 (1.614) data 0.000 (0.077) loss 1.0078 (1.1534) acc 68.7500 (70.2083) lr 1.9511e-03 eta 9:51:18 +epoch [7/50] batch [20/500] time 1.551 (1.608) data 0.000 (0.058) loss 1.0938 (1.1602) acc 62.5000 (70.0000) lr 1.9511e-03 eta 9:49:09 +epoch [7/50] batch [25/500] time 1.560 (1.599) data 0.001 (0.047) loss 1.3926 (1.1676) acc 68.7500 (70.7500) lr 1.9511e-03 eta 9:45:31 +epoch [7/50] batch [30/500] time 1.570 (1.592) data 0.000 (0.039) loss 0.9141 (1.1679) acc 68.7500 (70.4167) lr 1.9511e-03 eta 9:42:58 +epoch [7/50] batch [35/500] time 1.575 (1.589) data 0.000 (0.033) loss 1.8281 (1.1529) acc 56.2500 (71.2500) lr 1.9511e-03 eta 9:41:38 +epoch [7/50] batch [40/500] time 1.559 (1.587) data 0.000 (0.029) loss 1.1143 (1.1699) acc 71.8750 (71.4062) lr 1.9511e-03 eta 9:40:44 +epoch [7/50] batch [45/500] time 1.559 (1.585) data 0.000 (0.026) loss 0.3442 (1.1581) acc 90.6250 (71.7361) lr 1.9511e-03 eta 9:39:58 +epoch [7/50] batch [50/500] time 1.591 (1.583) data 0.000 (0.024) loss 0.5449 (1.1335) acc 87.5000 (72.3125) lr 1.9511e-03 eta 9:39:00 +epoch [7/50] batch [55/500] time 1.579 (1.582) data 0.001 (0.021) loss 0.8608 (1.1116) acc 87.5000 (72.8409) lr 1.9511e-03 eta 9:38:32 +epoch [7/50] batch [60/500] time 1.549 (1.580) data 0.001 (0.020) loss 0.9121 (1.1193) acc 81.2500 (72.7604) lr 1.9511e-03 eta 9:37:40 +epoch [7/50] batch [65/500] time 1.558 (1.579) data 0.000 (0.018) loss 0.9839 (1.1143) acc 75.0000 (72.7885) lr 1.9511e-03 eta 9:37:13 +epoch [7/50] batch [70/500] time 1.569 (1.578) data 0.000 (0.017) loss 1.3096 (1.1421) acc 71.8750 (72.3214) lr 1.9511e-03 eta 9:36:44 +epoch [7/50] batch [75/500] time 1.565 (1.577) data 0.000 (0.016) loss 0.9912 (1.1328) acc 68.7500 (72.3750) lr 1.9511e-03 eta 9:36:17 +epoch [7/50] batch [80/500] time 1.574 (1.576) data 0.000 (0.015) loss 0.9380 (1.1307) acc 78.1250 (72.1484) lr 1.9511e-03 eta 9:35:50 +epoch [7/50] batch [85/500] time 1.546 (1.575) data 0.000 (0.014) loss 1.3643 (1.1200) acc 65.6250 (72.2059) lr 1.9511e-03 eta 9:35:16 +epoch [7/50] batch [90/500] time 1.550 (1.574) data 0.000 (0.013) loss 1.3750 (1.1256) acc 75.0000 (72.0833) lr 1.9511e-03 eta 9:34:46 +epoch [7/50] batch [95/500] time 1.573 (1.573) data 0.001 (0.013) loss 0.7822 (1.1168) acc 84.3750 (72.2368) lr 1.9511e-03 eta 9:34:15 +epoch [7/50] batch [100/500] time 1.548 (1.572) data 0.000 (0.012) loss 1.3125 (1.1222) acc 62.5000 (72.2500) lr 1.9511e-03 eta 9:33:54 +epoch [7/50] batch [105/500] time 1.540 (1.572) data 0.000 (0.011) loss 0.8945 (1.1329) acc 84.3750 (72.4107) lr 1.9511e-03 eta 9:33:38 +epoch [7/50] batch [110/500] time 1.548 (1.571) data 0.000 (0.011) loss 0.9712 (1.1281) acc 71.8750 (72.4432) lr 1.9511e-03 eta 9:33:18 +epoch [7/50] batch [115/500] time 1.578 (1.571) data 0.000 (0.010) loss 1.0850 (1.1322) acc 71.8750 (72.3913) lr 1.9511e-03 eta 9:32:59 +epoch [7/50] batch [120/500] time 1.577 (1.572) data 0.001 (0.010) loss 0.9897 (1.1331) acc 81.2500 (72.3698) lr 1.9511e-03 eta 9:33:13 +epoch [7/50] batch [125/500] time 1.584 (1.572) data 0.000 (0.010) loss 1.4434 (1.1382) acc 71.8750 (72.0500) lr 1.9511e-03 eta 9:33:05 +epoch [7/50] batch [130/500] time 1.567 (1.571) data 0.000 (0.009) loss 1.4004 (1.1393) acc 68.7500 (71.9471) lr 1.9511e-03 eta 9:32:46 +epoch [7/50] batch [135/500] time 1.540 (1.571) data 0.000 (0.009) loss 0.8687 (1.1376) acc 78.1250 (72.0833) lr 1.9511e-03 eta 9:32:32 +epoch [7/50] batch [140/500] time 1.583 (1.571) data 0.000 (0.009) loss 1.0713 (1.1407) acc 62.5000 (71.8750) lr 1.9511e-03 eta 9:32:18 +epoch [7/50] batch [145/500] time 1.554 (1.570) data 0.000 (0.008) loss 1.5703 (1.1357) acc 65.6250 (71.9181) lr 1.9511e-03 eta 9:31:53 +epoch [7/50] batch [150/500] time 1.549 (1.570) data 0.000 (0.008) loss 1.0771 (1.1373) acc 71.8750 (71.8542) lr 1.9511e-03 eta 9:31:33 +epoch [7/50] batch [155/500] time 1.561 (1.569) data 0.000 (0.008) loss 1.3379 (1.1387) acc 62.5000 (71.7137) lr 1.9511e-03 eta 9:31:13 +epoch [7/50] batch [160/500] time 1.640 (1.569) data 0.001 (0.008) loss 1.0508 (1.1360) acc 75.0000 (71.7773) lr 1.9511e-03 eta 9:31:11 +epoch [7/50] batch [165/500] time 1.555 (1.569) data 0.000 (0.007) loss 2.4277 (1.1428) acc 56.2500 (71.7235) lr 1.9511e-03 eta 9:31:01 +epoch [7/50] batch [170/500] time 1.551 (1.568) data 0.000 (0.007) loss 1.0889 (1.1431) acc 62.5000 (71.5809) lr 1.9511e-03 eta 9:30:39 +epoch [7/50] batch [175/500] time 1.545 (1.568) data 0.000 (0.007) loss 1.5029 (1.1512) acc 68.7500 (71.4107) lr 1.9511e-03 eta 9:30:28 +epoch [7/50] batch [180/500] time 1.547 (1.568) data 0.000 (0.007) loss 0.5542 (1.1507) acc 78.1250 (71.4236) lr 1.9511e-03 eta 9:30:16 +epoch [7/50] batch [185/500] time 1.563 (1.568) data 0.000 (0.007) loss 0.9175 (1.1510) acc 78.1250 (71.4020) lr 1.9511e-03 eta 9:29:57 +epoch [7/50] batch [190/500] time 1.559 (1.568) data 0.000 (0.007) loss 0.7002 (1.1579) acc 87.5000 (71.3651) lr 1.9511e-03 eta 9:29:48 +epoch [7/50] batch [195/500] time 1.577 (1.567) data 0.000 (0.006) loss 1.9375 (1.1621) acc 56.2500 (71.2981) lr 1.9511e-03 eta 9:29:37 +epoch [7/50] batch [200/500] time 1.554 (1.567) data 0.001 (0.006) loss 0.6860 (1.1589) acc 81.2500 (71.3125) lr 1.9511e-03 eta 9:29:24 +epoch [7/50] batch [205/500] time 1.553 (1.567) data 0.000 (0.006) loss 1.0732 (1.1592) acc 68.7500 (71.2805) lr 1.9511e-03 eta 9:29:14 +epoch [7/50] batch [210/500] time 1.567 (1.567) data 0.001 (0.006) loss 1.5439 (1.1645) acc 65.6250 (71.1458) lr 1.9511e-03 eta 9:29:05 +epoch [7/50] batch [215/500] time 1.557 (1.567) data 0.000 (0.006) loss 1.0215 (1.1634) acc 68.7500 (71.1773) lr 1.9511e-03 eta 9:28:52 +epoch [7/50] batch [220/500] time 1.539 (1.566) data 0.000 (0.006) loss 1.2803 (1.1654) acc 78.1250 (71.1648) lr 1.9511e-03 eta 9:28:35 +epoch [7/50] batch [225/500] time 1.557 (1.566) data 0.000 (0.006) loss 1.9346 (1.1641) acc 59.3750 (71.1806) lr 1.9511e-03 eta 9:28:28 +epoch [7/50] batch [230/500] time 1.560 (1.566) data 0.000 (0.005) loss 1.0586 (1.1603) acc 75.0000 (71.1821) lr 1.9511e-03 eta 9:28:22 +epoch [7/50] batch [235/500] time 1.539 (1.566) data 0.000 (0.005) loss 1.3076 (1.1615) acc 78.1250 (71.2367) lr 1.9511e-03 eta 9:28:07 +epoch [7/50] batch [240/500] time 1.569 (1.566) data 0.000 (0.005) loss 1.4639 (1.1639) acc 71.8750 (71.2760) lr 1.9511e-03 eta 9:27:58 +epoch [7/50] batch [245/500] time 1.568 (1.566) data 0.000 (0.005) loss 1.3467 (1.1685) acc 62.5000 (71.1607) lr 1.9511e-03 eta 9:27:53 +epoch [7/50] batch [250/500] time 1.552 (1.566) data 0.000 (0.005) loss 1.1982 (1.1674) acc 78.1250 (71.2375) lr 1.9511e-03 eta 9:27:41 +epoch [7/50] batch [255/500] time 1.549 (1.566) data 0.000 (0.005) loss 1.9785 (1.1704) acc 56.2500 (71.2255) lr 1.9511e-03 eta 9:27:25 +epoch [7/50] batch [260/500] time 1.572 (1.566) data 0.000 (0.005) loss 0.9160 (1.1686) acc 78.1250 (71.2260) lr 1.9511e-03 eta 9:27:24 +epoch [7/50] batch [265/500] time 1.565 (1.566) data 0.000 (0.005) loss 1.0078 (1.1707) acc 75.0000 (71.1910) lr 1.9511e-03 eta 9:27:14 +epoch [7/50] batch [270/500] time 1.560 (1.566) data 0.000 (0.005) loss 1.0859 (1.1718) acc 62.5000 (71.1343) lr 1.9511e-03 eta 9:27:03 +epoch [7/50] batch [275/500] time 1.553 (1.566) data 0.000 (0.005) loss 0.6470 (1.1655) acc 84.3750 (71.2841) lr 1.9511e-03 eta 9:26:55 +epoch [7/50] batch [280/500] time 1.532 (1.566) data 0.000 (0.005) loss 0.8511 (1.1662) acc 75.0000 (71.3281) lr 1.9511e-03 eta 9:26:43 +epoch [7/50] batch [285/500] time 1.564 (1.565) data 0.000 (0.004) loss 1.1738 (1.1653) acc 59.3750 (71.3158) lr 1.9511e-03 eta 9:26:31 +epoch [7/50] batch [290/500] time 1.568 (1.565) data 0.000 (0.004) loss 1.2051 (1.1670) acc 65.6250 (71.3039) lr 1.9511e-03 eta 9:26:22 +epoch [7/50] batch [295/500] time 1.556 (1.565) data 0.001 (0.004) loss 0.8135 (1.1635) acc 75.0000 (71.3665) lr 1.9511e-03 eta 9:26:14 +epoch [7/50] batch [300/500] time 1.552 (1.565) data 0.000 (0.004) loss 1.3037 (1.1643) acc 68.7500 (71.3542) lr 1.9511e-03 eta 9:26:02 +epoch [7/50] batch [305/500] time 1.559 (1.565) data 0.000 (0.004) loss 1.2734 (1.1626) acc 75.0000 (71.3525) lr 1.9511e-03 eta 9:25:59 +epoch [7/50] batch [310/500] time 1.571 (1.566) data 0.000 (0.004) loss 1.4062 (1.1605) acc 68.7500 (71.3810) lr 1.9511e-03 eta 9:25:55 +epoch [7/50] batch [315/500] time 1.543 (1.565) data 0.001 (0.004) loss 1.3789 (1.1580) acc 78.1250 (71.5079) lr 1.9511e-03 eta 9:25:46 +epoch [7/50] batch [320/500] time 1.600 (1.566) data 0.000 (0.004) loss 1.4951 (1.1602) acc 56.2500 (71.4746) lr 1.9511e-03 eta 9:25:42 +epoch [7/50] batch [325/500] time 1.586 (1.566) data 0.000 (0.004) loss 2.3008 (1.1672) acc 53.1250 (71.4231) lr 1.9511e-03 eta 9:25:38 +epoch [7/50] batch [330/500] time 1.568 (1.566) data 0.000 (0.004) loss 0.9717 (1.1681) acc 81.2500 (71.4299) lr 1.9511e-03 eta 9:25:27 +epoch [7/50] batch [335/500] time 1.577 (1.566) data 0.000 (0.004) loss 1.5625 (1.1699) acc 65.6250 (71.3993) lr 1.9511e-03 eta 9:25:20 +epoch [7/50] batch [340/500] time 1.568 (1.566) data 0.000 (0.004) loss 1.5840 (1.1699) acc 53.1250 (71.3511) lr 1.9511e-03 eta 9:25:10 +epoch [7/50] batch [345/500] time 1.555 (1.566) data 0.000 (0.004) loss 0.9321 (1.1716) acc 75.0000 (71.3134) lr 1.9511e-03 eta 9:25:03 +epoch [7/50] batch [350/500] time 1.580 (1.566) data 0.000 (0.004) loss 1.6113 (1.1704) acc 59.3750 (71.3393) lr 1.9511e-03 eta 9:24:58 +epoch [7/50] batch [355/500] time 1.565 (1.566) data 0.000 (0.004) loss 2.0781 (1.1736) acc 59.3750 (71.3116) lr 1.9511e-03 eta 9:24:48 +epoch [7/50] batch [360/500] time 1.553 (1.566) data 0.000 (0.004) loss 0.9224 (1.1727) acc 75.0000 (71.3542) lr 1.9511e-03 eta 9:24:38 +epoch [7/50] batch [365/500] time 1.563 (1.565) data 0.000 (0.004) loss 0.7915 (1.1750) acc 87.5000 (71.3442) lr 1.9511e-03 eta 9:24:28 +epoch [7/50] batch [370/500] time 1.554 (1.565) data 0.000 (0.004) loss 1.0762 (1.1785) acc 68.7500 (71.2416) lr 1.9511e-03 eta 9:24:18 +epoch [7/50] batch [375/500] time 1.553 (1.565) data 0.000 (0.003) loss 1.1240 (1.1781) acc 71.8750 (71.2167) lr 1.9511e-03 eta 9:24:05 +epoch [7/50] batch [380/500] time 1.550 (1.565) data 0.000 (0.003) loss 0.9219 (1.1760) acc 78.1250 (71.2664) lr 1.9511e-03 eta 9:23:56 +epoch [7/50] batch [385/500] time 1.558 (1.565) data 0.000 (0.003) loss 1.1943 (1.1757) acc 81.2500 (71.2987) lr 1.9511e-03 eta 9:23:49 +epoch [7/50] batch [390/500] time 1.545 (1.565) data 0.000 (0.003) loss 1.3691 (1.1749) acc 68.7500 (71.3462) lr 1.9511e-03 eta 9:23:41 +epoch [7/50] batch [395/500] time 1.557 (1.565) data 0.000 (0.003) loss 0.7363 (1.1738) acc 71.8750 (71.2975) lr 1.9511e-03 eta 9:23:36 +epoch [7/50] batch [400/500] time 1.543 (1.565) data 0.000 (0.003) loss 1.4453 (1.1718) acc 68.7500 (71.3125) lr 1.9511e-03 eta 9:23:29 +epoch [7/50] batch [405/500] time 1.577 (1.565) data 0.000 (0.003) loss 1.2471 (1.1709) acc 71.8750 (71.3040) lr 1.9511e-03 eta 9:23:26 +epoch [7/50] batch [410/500] time 1.559 (1.565) data 0.001 (0.003) loss 1.1338 (1.1712) acc 65.6250 (71.2805) lr 1.9511e-03 eta 9:23:15 +epoch [7/50] batch [415/500] time 1.559 (1.565) data 0.000 (0.003) loss 1.0537 (1.1723) acc 81.2500 (71.2877) lr 1.9511e-03 eta 9:23:05 +epoch [7/50] batch [420/500] time 1.559 (1.565) data 0.000 (0.003) loss 1.0732 (1.1726) acc 71.8750 (71.2946) lr 1.9511e-03 eta 9:22:56 +epoch [7/50] batch [425/500] time 1.559 (1.565) data 0.000 (0.003) loss 0.7041 (1.1686) acc 81.2500 (71.3456) lr 1.9511e-03 eta 9:22:48 +epoch [7/50] batch [430/500] time 1.555 (1.565) data 0.000 (0.003) loss 1.2383 (1.1696) acc 65.6250 (71.3227) lr 1.9511e-03 eta 9:22:42 +epoch [7/50] batch [435/500] time 1.554 (1.565) data 0.000 (0.003) loss 0.9473 (1.1721) acc 75.0000 (71.2859) lr 1.9511e-03 eta 9:22:36 +epoch [7/50] batch [440/500] time 1.567 (1.565) data 0.000 (0.003) loss 1.0352 (1.1711) acc 71.8750 (71.2997) lr 1.9511e-03 eta 9:22:28 +epoch [7/50] batch [445/500] time 1.557 (1.565) data 0.000 (0.003) loss 1.2549 (1.1720) acc 62.5000 (71.2430) lr 1.9511e-03 eta 9:22:17 +epoch [7/50] batch [450/500] time 1.539 (1.565) data 0.000 (0.003) loss 1.6553 (1.1732) acc 53.1250 (71.1806) lr 1.9511e-03 eta 9:22:12 +epoch [7/50] batch [455/500] time 1.545 (1.565) data 0.000 (0.003) loss 1.1357 (1.1742) acc 71.8750 (71.1470) lr 1.9511e-03 eta 9:22:02 +epoch [7/50] batch [460/500] time 1.557 (1.565) data 0.000 (0.003) loss 1.3252 (1.1762) acc 68.7500 (71.1345) lr 1.9511e-03 eta 9:21:51 +epoch [7/50] batch [465/500] time 1.554 (1.565) data 0.000 (0.003) loss 1.1543 (1.1769) acc 68.7500 (71.1156) lr 1.9511e-03 eta 9:21:43 +epoch [7/50] batch [470/500] time 1.562 (1.565) data 0.000 (0.003) loss 0.9370 (1.1789) acc 71.8750 (71.0505) lr 1.9511e-03 eta 9:21:34 +epoch [7/50] batch [475/500] time 1.553 (1.565) data 0.001 (0.003) loss 0.4363 (1.1786) acc 87.5000 (71.1118) lr 1.9511e-03 eta 9:21:23 +epoch [7/50] batch [480/500] time 1.567 (1.565) data 0.000 (0.003) loss 1.1260 (1.1761) acc 71.8750 (71.1654) lr 1.9511e-03 eta 9:21:16 +epoch [7/50] batch [485/500] time 1.553 (1.565) data 0.001 (0.003) loss 1.3135 (1.1774) acc 56.2500 (71.1018) lr 1.9511e-03 eta 9:21:07 +epoch [7/50] batch [490/500] time 1.515 (1.565) data 0.000 (0.003) loss 0.5913 (1.1759) acc 81.2500 (71.1352) lr 1.9511e-03 eta 9:20:55 +epoch [7/50] batch [495/500] time 1.568 (1.564) data 0.000 (0.003) loss 1.6543 (1.1759) acc 50.0000 (71.1237) lr 1.9511e-03 eta 9:20:42 +epoch [7/50] batch [500/500] time 1.544 (1.564) data 0.000 (0.003) loss 0.3960 (1.1761) acc 87.5000 (71.1375) lr 1.9298e-03 eta 9:20:35 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,828 +* accuracy: 77.7% +* error: 22.3% +* macro_f1: 77.1% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [8/50] batch [5/500] time 1.563 (1.702) data 0.000 (0.189) loss 0.7524 (1.0770) acc 81.2500 (71.8750) lr 1.9298e-03 eta 10:09:35 +epoch [8/50] batch [10/500] time 1.553 (1.630) data 0.000 (0.095) loss 0.9478 (1.0654) acc 81.2500 (74.0625) lr 1.9298e-03 eta 9:43:42 +epoch [8/50] batch [15/500] time 1.547 (1.609) data 0.000 (0.063) loss 1.4238 (1.0653) acc 59.3750 (72.5000) lr 1.9298e-03 eta 9:36:04 +epoch [8/50] batch [20/500] time 1.545 (1.595) data 0.000 (0.048) loss 1.0713 (1.1403) acc 71.8750 (70.4688) lr 1.9298e-03 eta 9:31:00 +epoch [8/50] batch [25/500] time 1.546 (1.587) data 0.000 (0.038) loss 1.1670 (1.1177) acc 71.8750 (71.5000) lr 1.9298e-03 eta 9:28:08 +epoch [8/50] batch [30/500] time 1.570 (1.583) data 0.000 (0.032) loss 1.4697 (1.1174) acc 65.6250 (71.6667) lr 1.9298e-03 eta 9:26:21 +epoch [8/50] batch [35/500] time 1.556 (1.579) data 0.001 (0.027) loss 0.9971 (1.1403) acc 71.8750 (71.1607) lr 1.9298e-03 eta 9:24:54 +epoch [8/50] batch [40/500] time 1.558 (1.580) data 0.000 (0.024) loss 0.7466 (1.1037) acc 81.2500 (72.1875) lr 1.9298e-03 eta 9:25:06 +epoch [8/50] batch [45/500] time 1.563 (1.578) data 0.000 (0.021) loss 1.1885 (1.1326) acc 75.0000 (71.8056) lr 1.9298e-03 eta 9:24:22 +epoch [8/50] batch [50/500] time 1.575 (1.577) data 0.000 (0.019) loss 1.0410 (1.1266) acc 75.0000 (72.0625) lr 1.9298e-03 eta 9:23:47 +epoch [8/50] batch [55/500] time 1.551 (1.576) data 0.000 (0.018) loss 1.0908 (1.1355) acc 65.6250 (71.4773) lr 1.9298e-03 eta 9:23:08 +epoch [8/50] batch [60/500] time 1.545 (1.574) data 0.000 (0.016) loss 1.3848 (1.1690) acc 71.8750 (70.9375) lr 1.9298e-03 eta 9:22:18 +epoch [8/50] batch [65/500] time 1.575 (1.573) data 0.000 (0.015) loss 1.9619 (1.2071) acc 53.1250 (70.1923) lr 1.9298e-03 eta 9:21:48 +epoch [8/50] batch [70/500] time 1.566 (1.571) data 0.000 (0.014) loss 1.3809 (1.2192) acc 75.0000 (70.1339) lr 1.9298e-03 eta 9:21:04 +epoch [8/50] batch [75/500] time 1.573 (1.570) data 0.001 (0.013) loss 0.5806 (1.2171) acc 78.1250 (70.1250) lr 1.9298e-03 eta 9:20:40 +epoch [8/50] batch [80/500] time 1.651 (1.571) data 0.000 (0.012) loss 1.3105 (1.2201) acc 68.7500 (70.1562) lr 1.9298e-03 eta 9:20:45 +epoch [8/50] batch [85/500] time 1.582 (1.570) data 0.000 (0.011) loss 0.9370 (1.2327) acc 75.0000 (69.8897) lr 1.9298e-03 eta 9:20:29 +epoch [8/50] batch [90/500] time 1.554 (1.570) data 0.000 (0.011) loss 1.1250 (1.2395) acc 71.8750 (69.5139) lr 1.9298e-03 eta 9:20:17 +epoch [8/50] batch [95/500] time 1.554 (1.570) data 0.001 (0.010) loss 0.6030 (1.2273) acc 81.2500 (69.9013) lr 1.9298e-03 eta 9:19:57 +epoch [8/50] batch [100/500] time 1.554 (1.569) data 0.000 (0.010) loss 0.7993 (1.2177) acc 78.1250 (70.1875) lr 1.9298e-03 eta 9:19:36 +epoch [8/50] batch [105/500] time 1.565 (1.568) data 0.000 (0.009) loss 1.0996 (1.2257) acc 71.8750 (69.9405) lr 1.9298e-03 eta 9:19:13 +epoch [8/50] batch [110/500] time 1.570 (1.568) data 0.000 (0.009) loss 1.2949 (1.2342) acc 68.7500 (69.8011) lr 1.9298e-03 eta 9:18:57 +epoch [8/50] batch [115/500] time 1.573 (1.567) data 0.000 (0.009) loss 0.7393 (1.2226) acc 68.7500 (69.9728) lr 1.9298e-03 eta 9:18:39 +epoch [8/50] batch [120/500] time 1.563 (1.567) data 0.000 (0.008) loss 1.7617 (1.2252) acc 62.5000 (70.0260) lr 1.9298e-03 eta 9:18:22 +epoch [8/50] batch [125/500] time 1.540 (1.567) data 0.000 (0.008) loss 0.5918 (1.2212) acc 78.1250 (70.1250) lr 1.9298e-03 eta 9:18:08 +epoch [8/50] batch [130/500] time 1.545 (1.566) data 0.000 (0.008) loss 2.0039 (1.2276) acc 56.2500 (70.0481) lr 1.9298e-03 eta 9:17:52 +epoch [8/50] batch [135/500] time 1.564 (1.566) data 0.000 (0.007) loss 1.7773 (1.2302) acc 62.5000 (70.0463) lr 1.9298e-03 eta 9:17:42 +epoch [8/50] batch [140/500] time 1.571 (1.566) data 0.000 (0.007) loss 1.9014 (1.2322) acc 62.5000 (70.0000) lr 1.9298e-03 eta 9:17:28 +epoch [8/50] batch [145/500] time 1.555 (1.566) data 0.000 (0.007) loss 1.4521 (1.2334) acc 68.7500 (70.1078) lr 1.9298e-03 eta 9:17:22 +epoch [8/50] batch [150/500] time 1.560 (1.566) data 0.001 (0.007) loss 0.9214 (1.2291) acc 68.7500 (70.1458) lr 1.9298e-03 eta 9:17:14 +epoch [8/50] batch [155/500] time 1.559 (1.566) data 0.001 (0.006) loss 1.0049 (1.2255) acc 71.8750 (70.3427) lr 1.9298e-03 eta 9:17:02 +epoch [8/50] batch [160/500] time 1.580 (1.566) data 0.001 (0.006) loss 1.1504 (1.2222) acc 68.7500 (70.3906) lr 1.9298e-03 eta 9:16:52 +epoch [8/50] batch [165/500] time 1.559 (1.566) data 0.000 (0.006) loss 0.7178 (1.2128) acc 81.2500 (70.4924) lr 1.9298e-03 eta 9:16:40 +epoch [8/50] batch [170/500] time 1.554 (1.565) data 0.000 (0.006) loss 0.9839 (1.2043) acc 75.0000 (70.7537) lr 1.9298e-03 eta 9:16:25 +epoch [8/50] batch [175/500] time 1.551 (1.565) data 0.000 (0.006) loss 1.2637 (1.2036) acc 65.6250 (70.8214) lr 1.9298e-03 eta 9:16:10 +epoch [8/50] batch [180/500] time 1.575 (1.565) data 0.001 (0.006) loss 0.9614 (1.2064) acc 78.1250 (70.8854) lr 1.9298e-03 eta 9:16:14 +epoch [8/50] batch [185/500] time 1.579 (1.565) data 0.000 (0.006) loss 1.1865 (1.2023) acc 75.0000 (71.0135) lr 1.9298e-03 eta 9:16:05 +epoch [8/50] batch [190/500] time 1.576 (1.566) data 0.000 (0.005) loss 0.7559 (1.2024) acc 71.8750 (70.9539) lr 1.9298e-03 eta 9:16:02 +epoch [8/50] batch [195/500] time 1.573 (1.566) data 0.000 (0.005) loss 1.4541 (1.1992) acc 71.8750 (70.9615) lr 1.9298e-03 eta 9:15:55 +epoch [8/50] batch [200/500] time 1.558 (1.565) data 0.000 (0.005) loss 0.5215 (1.1949) acc 90.6250 (71.0469) lr 1.9298e-03 eta 9:15:42 +epoch [8/50] batch [205/500] time 1.559 (1.565) data 0.000 (0.005) loss 0.6499 (1.1881) acc 84.3750 (71.2043) lr 1.9298e-03 eta 9:15:28 +epoch [8/50] batch [210/500] time 1.539 (1.565) data 0.000 (0.005) loss 1.1289 (1.1844) acc 75.0000 (71.2054) lr 1.9298e-03 eta 9:15:13 +epoch [8/50] batch [215/500] time 1.566 (1.565) data 0.000 (0.005) loss 0.7539 (1.1842) acc 78.1250 (71.2064) lr 1.9298e-03 eta 9:15:02 +epoch [8/50] batch [220/500] time 1.582 (1.565) data 0.000 (0.005) loss 1.6895 (1.1882) acc 59.3750 (71.1932) lr 1.9298e-03 eta 9:14:53 +epoch [8/50] batch [225/500] time 1.545 (1.565) data 0.000 (0.005) loss 0.7246 (1.1883) acc 84.3750 (71.1667) lr 1.9298e-03 eta 9:14:54 +epoch [8/50] batch [230/500] time 1.601 (1.565) data 0.000 (0.005) loss 1.0938 (1.1833) acc 75.0000 (71.2636) lr 1.9298e-03 eta 9:14:44 +epoch [8/50] batch [235/500] time 1.563 (1.565) data 0.000 (0.004) loss 1.6494 (1.1836) acc 59.3750 (71.1968) lr 1.9298e-03 eta 9:14:37 +epoch [8/50] batch [240/500] time 1.565 (1.565) data 0.001 (0.004) loss 0.6821 (1.1798) acc 71.8750 (71.2500) lr 1.9298e-03 eta 9:14:32 +epoch [8/50] batch [245/500] time 1.544 (1.565) data 0.001 (0.004) loss 0.8320 (1.1770) acc 84.3750 (71.3138) lr 1.9298e-03 eta 9:14:21 +epoch [8/50] batch [250/500] time 1.540 (1.565) data 0.000 (0.004) loss 0.7310 (1.1778) acc 78.1250 (71.3000) lr 1.9298e-03 eta 9:14:10 +epoch [8/50] batch [255/500] time 1.562 (1.564) data 0.000 (0.004) loss 1.2051 (1.1801) acc 65.6250 (71.2377) lr 1.9298e-03 eta 9:13:57 +epoch [8/50] batch [260/500] time 1.533 (1.564) data 0.000 (0.004) loss 0.9683 (1.1792) acc 75.0000 (71.3101) lr 1.9298e-03 eta 9:13:44 +epoch [8/50] batch [265/500] time 1.551 (1.564) data 0.001 (0.004) loss 1.8086 (1.1847) acc 62.5000 (71.2264) lr 1.9298e-03 eta 9:13:30 +epoch [8/50] batch [270/500] time 1.537 (1.564) data 0.000 (0.004) loss 1.4033 (1.1837) acc 59.3750 (71.1921) lr 1.9298e-03 eta 9:13:23 +epoch [8/50] batch [275/500] time 1.569 (1.564) data 0.000 (0.004) loss 1.7490 (1.1836) acc 62.5000 (71.0795) lr 1.9298e-03 eta 9:13:14 +epoch [8/50] batch [280/500] time 1.568 (1.564) data 0.000 (0.004) loss 0.6963 (1.1831) acc 81.2500 (71.0826) lr 1.9298e-03 eta 9:13:05 +epoch [8/50] batch [285/500] time 1.575 (1.564) data 0.000 (0.004) loss 1.2100 (1.1833) acc 75.0000 (71.0965) lr 1.9298e-03 eta 9:12:55 +epoch [8/50] batch [290/500] time 1.589 (1.564) data 0.000 (0.004) loss 1.7949 (1.1897) acc 56.2500 (71.0237) lr 1.9298e-03 eta 9:12:49 +epoch [8/50] batch [295/500] time 1.572 (1.564) data 0.000 (0.004) loss 0.6899 (1.1905) acc 87.5000 (71.0699) lr 1.9298e-03 eta 9:12:38 +epoch [8/50] batch [300/500] time 1.586 (1.564) data 0.000 (0.004) loss 1.5723 (1.1939) acc 68.7500 (71.0417) lr 1.9298e-03 eta 9:12:33 +epoch [8/50] batch [305/500] time 1.563 (1.564) data 0.000 (0.003) loss 1.4189 (1.1928) acc 68.7500 (71.0348) lr 1.9298e-03 eta 9:12:25 +epoch [8/50] batch [310/500] time 1.568 (1.564) data 0.000 (0.003) loss 1.8105 (1.2001) acc 59.3750 (70.9476) lr 1.9298e-03 eta 9:12:17 +epoch [8/50] batch [315/500] time 1.572 (1.564) data 0.000 (0.003) loss 1.1689 (1.1980) acc 75.0000 (70.9821) lr 1.9298e-03 eta 9:12:15 +epoch [8/50] batch [320/500] time 1.572 (1.564) data 0.000 (0.003) loss 1.4561 (1.1967) acc 65.6250 (70.9766) lr 1.9298e-03 eta 9:12:10 +epoch [8/50] batch [325/500] time 1.557 (1.565) data 0.000 (0.003) loss 1.2559 (1.1962) acc 71.8750 (71.0096) lr 1.9298e-03 eta 9:12:10 +epoch [8/50] batch [330/500] time 1.571 (1.565) data 0.000 (0.003) loss 0.7998 (1.1989) acc 78.1250 (71.0038) lr 1.9298e-03 eta 9:12:01 +epoch [8/50] batch [335/500] time 1.548 (1.564) data 0.000 (0.003) loss 1.3633 (1.1959) acc 65.6250 (71.0541) lr 1.9298e-03 eta 9:11:50 +epoch [8/50] batch [340/500] time 1.566 (1.564) data 0.000 (0.003) loss 1.8486 (1.1997) acc 62.5000 (71.0110) lr 1.9298e-03 eta 9:11:40 +epoch [8/50] batch [345/500] time 1.576 (1.564) data 0.000 (0.003) loss 1.0576 (1.1980) acc 59.3750 (70.9692) lr 1.9298e-03 eta 9:11:32 +epoch [8/50] batch [350/500] time 1.557 (1.564) data 0.000 (0.003) loss 1.0439 (1.1965) acc 71.8750 (70.9732) lr 1.9298e-03 eta 9:11:24 +epoch [8/50] batch [355/500] time 1.586 (1.564) data 0.000 (0.003) loss 1.0645 (1.1935) acc 78.1250 (71.0211) lr 1.9298e-03 eta 9:11:19 +epoch [8/50] batch [360/500] time 1.567 (1.564) data 0.000 (0.003) loss 1.0586 (1.1972) acc 65.6250 (70.9549) lr 1.9298e-03 eta 9:11:11 +epoch [8/50] batch [365/500] time 1.571 (1.564) data 0.000 (0.003) loss 1.0049 (1.1974) acc 71.8750 (70.9675) lr 1.9298e-03 eta 9:11:04 +epoch [8/50] batch [370/500] time 1.556 (1.565) data 0.000 (0.003) loss 1.3047 (1.1991) acc 65.6250 (70.9037) lr 1.9298e-03 eta 9:11:03 +epoch [8/50] batch [375/500] time 1.554 (1.565) data 0.000 (0.003) loss 0.8311 (1.1999) acc 78.1250 (70.8917) lr 1.9298e-03 eta 9:10:54 +epoch [8/50] batch [380/500] time 1.580 (1.565) data 0.000 (0.003) loss 0.7817 (1.1969) acc 75.0000 (70.9128) lr 1.9298e-03 eta 9:10:49 +epoch [8/50] batch [385/500] time 1.569 (1.565) data 0.000 (0.003) loss 1.2969 (1.1981) acc 68.7500 (70.8685) lr 1.9298e-03 eta 9:10:41 +epoch [8/50] batch [390/500] time 1.560 (1.565) data 0.000 (0.003) loss 1.2148 (1.2012) acc 81.2500 (70.8974) lr 1.9298e-03 eta 9:10:34 +epoch [8/50] batch [395/500] time 1.549 (1.565) data 0.001 (0.003) loss 0.7646 (1.2016) acc 78.1250 (70.9177) lr 1.9298e-03 eta 9:10:26 +epoch [8/50] batch [400/500] time 1.565 (1.565) data 0.000 (0.003) loss 1.4414 (1.2009) acc 65.6250 (70.9297) lr 1.9298e-03 eta 9:10:17 +epoch [8/50] batch [405/500] time 1.571 (1.565) data 0.000 (0.003) loss 1.1855 (1.2006) acc 71.8750 (70.8951) lr 1.9298e-03 eta 9:10:09 +epoch [8/50] batch [410/500] time 1.533 (1.565) data 0.000 (0.003) loss 1.3271 (1.1999) acc 81.2500 (70.9451) lr 1.9298e-03 eta 9:09:59 +epoch [8/50] batch [415/500] time 1.568 (1.565) data 0.000 (0.003) loss 0.8320 (1.1972) acc 81.2500 (70.9864) lr 1.9298e-03 eta 9:09:51 +epoch [8/50] batch [420/500] time 1.569 (1.565) data 0.000 (0.003) loss 0.7397 (1.1941) acc 75.0000 (71.0491) lr 1.9298e-03 eta 9:09:43 +epoch [8/50] batch [425/500] time 1.570 (1.565) data 0.001 (0.003) loss 0.8159 (1.1948) acc 75.0000 (71.0074) lr 1.9298e-03 eta 9:09:34 +epoch [8/50] batch [430/500] time 1.556 (1.565) data 0.001 (0.003) loss 0.7563 (1.1928) acc 81.2500 (71.0102) lr 1.9298e-03 eta 9:09:26 +epoch [8/50] batch [435/500] time 1.566 (1.565) data 0.000 (0.003) loss 1.5391 (1.1948) acc 62.5000 (70.9267) lr 1.9298e-03 eta 9:09:17 +epoch [8/50] batch [440/500] time 1.548 (1.565) data 0.000 (0.003) loss 1.1885 (1.1935) acc 71.8750 (70.9517) lr 1.9298e-03 eta 9:09:09 +epoch [8/50] batch [445/500] time 1.569 (1.565) data 0.000 (0.003) loss 1.1377 (1.1954) acc 75.0000 (70.9270) lr 1.9298e-03 eta 9:09:02 +epoch [8/50] batch [450/500] time 1.540 (1.565) data 0.000 (0.002) loss 1.0869 (1.1951) acc 68.7500 (70.8611) lr 1.9298e-03 eta 9:08:53 +epoch [8/50] batch [455/500] time 1.545 (1.564) data 0.000 (0.002) loss 0.9941 (1.1937) acc 78.1250 (70.8860) lr 1.9298e-03 eta 9:08:42 +epoch [8/50] batch [460/500] time 1.552 (1.564) data 0.000 (0.002) loss 0.8564 (1.1916) acc 90.6250 (70.9375) lr 1.9298e-03 eta 9:08:32 +epoch [8/50] batch [465/500] time 1.674 (1.564) data 0.000 (0.002) loss 0.8735 (1.1909) acc 78.1250 (70.9879) lr 1.9298e-03 eta 9:08:28 +epoch [8/50] batch [470/500] time 1.578 (1.564) data 0.000 (0.002) loss 1.2373 (1.1917) acc 68.7500 (70.9508) lr 1.9298e-03 eta 9:08:21 +epoch [8/50] batch [475/500] time 1.555 (1.565) data 0.000 (0.002) loss 1.1953 (1.1905) acc 65.6250 (70.9539) lr 1.9298e-03 eta 9:08:14 +epoch [8/50] batch [480/500] time 1.557 (1.565) data 0.000 (0.002) loss 0.9517 (1.1887) acc 68.7500 (70.9701) lr 1.9298e-03 eta 9:08:07 +epoch [8/50] batch [485/500] time 1.553 (1.564) data 0.001 (0.002) loss 0.9443 (1.1867) acc 68.7500 (71.0180) lr 1.9298e-03 eta 9:07:57 +epoch [8/50] batch [490/500] time 1.559 (1.564) data 0.000 (0.002) loss 1.0664 (1.1869) acc 71.8750 (70.9885) lr 1.9298e-03 eta 9:07:48 +epoch [8/50] batch [495/500] time 1.528 (1.564) data 0.000 (0.002) loss 1.3633 (1.1889) acc 68.7500 (70.9533) lr 1.9298e-03 eta 9:07:38 +epoch [8/50] batch [500/500] time 1.554 (1.564) data 0.000 (0.002) loss 1.0586 (1.1891) acc 78.1250 (71.0000) lr 1.9048e-03 eta 9:07:25 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,811 +* accuracy: 77.6% +* error: 22.4% +* macro_f1: 77.0% +epoch [9/50] batch [5/500] time 1.551 (1.708) data 0.000 (0.202) loss 0.7822 (1.0520) acc 81.2500 (75.0000) lr 1.9048e-03 eta 9:57:30 +epoch [9/50] batch [10/500] time 1.567 (1.639) data 0.000 (0.101) loss 1.8652 (1.0984) acc 59.3750 (74.3750) lr 1.9048e-03 eta 9:33:27 +epoch [9/50] batch [15/500] time 1.576 (1.611) data 0.000 (0.067) loss 1.6367 (1.1623) acc 65.6250 (72.7083) lr 1.9048e-03 eta 9:23:37 +epoch [9/50] batch [20/500] time 1.556 (1.605) data 0.001 (0.051) loss 0.5752 (1.1429) acc 84.3750 (73.5938) lr 1.9048e-03 eta 9:21:14 +epoch [9/50] batch [25/500] time 1.570 (1.600) data 0.001 (0.041) loss 0.7690 (1.1541) acc 78.1250 (73.0000) lr 1.9048e-03 eta 9:19:18 +epoch [9/50] batch [30/500] time 1.548 (1.593) data 0.000 (0.034) loss 0.6641 (1.1245) acc 75.0000 (73.2292) lr 1.9048e-03 eta 9:16:39 +epoch [9/50] batch [35/500] time 1.573 (1.589) data 0.000 (0.029) loss 0.9414 (1.1154) acc 75.0000 (73.3036) lr 1.9048e-03 eta 9:15:21 +epoch [9/50] batch [40/500] time 1.555 (1.586) data 0.001 (0.026) loss 0.9082 (1.1416) acc 65.6250 (72.5000) lr 1.9048e-03 eta 9:14:05 +epoch [9/50] batch [45/500] time 1.538 (1.583) data 0.000 (0.023) loss 1.3398 (1.1239) acc 65.6250 (72.5694) lr 1.9048e-03 eta 9:12:55 +epoch [9/50] batch [50/500] time 1.545 (1.582) data 0.000 (0.021) loss 0.7910 (1.1142) acc 71.8750 (72.6250) lr 1.9048e-03 eta 9:12:20 +epoch [9/50] batch [55/500] time 1.546 (1.579) data 0.000 (0.019) loss 1.6738 (1.1506) acc 50.0000 (71.5341) lr 1.9048e-03 eta 9:11:04 +epoch [9/50] batch [60/500] time 1.555 (1.578) data 0.001 (0.017) loss 0.9268 (1.1317) acc 81.2500 (71.6146) lr 1.9048e-03 eta 9:10:34 +epoch [9/50] batch [65/500] time 1.587 (1.577) data 0.000 (0.016) loss 0.7383 (1.1220) acc 78.1250 (71.7308) lr 1.9048e-03 eta 9:10:10 +epoch [9/50] batch [70/500] time 1.559 (1.575) data 0.000 (0.015) loss 0.9043 (1.1256) acc 75.0000 (71.5625) lr 1.9048e-03 eta 9:09:30 +epoch [9/50] batch [75/500] time 1.564 (1.574) data 0.000 (0.014) loss 0.9248 (1.1316) acc 75.0000 (71.5000) lr 1.9048e-03 eta 9:08:53 +epoch [9/50] batch [80/500] time 1.556 (1.573) data 0.000 (0.013) loss 1.4492 (1.1352) acc 68.7500 (71.6016) lr 1.9048e-03 eta 9:08:24 +epoch [9/50] batch [85/500] time 1.560 (1.572) data 0.001 (0.012) loss 1.1650 (1.1352) acc 75.0000 (71.5809) lr 1.9048e-03 eta 9:08:00 +epoch [9/50] batch [90/500] time 1.553 (1.571) data 0.000 (0.012) loss 0.7622 (1.1262) acc 75.0000 (71.7361) lr 1.9048e-03 eta 9:07:31 +epoch [9/50] batch [95/500] time 1.579 (1.571) data 0.000 (0.011) loss 0.7930 (1.1277) acc 78.1250 (71.8421) lr 1.9048e-03 eta 9:07:17 +epoch [9/50] batch [100/500] time 1.541 (1.570) data 0.001 (0.010) loss 1.7178 (1.1345) acc 59.3750 (71.6250) lr 1.9048e-03 eta 9:06:53 +epoch [9/50] batch [105/500] time 1.533 (1.569) data 0.001 (0.010) loss 0.6895 (1.1327) acc 84.3750 (71.6667) lr 1.9048e-03 eta 9:06:24 +epoch [9/50] batch [110/500] time 1.547 (1.568) data 0.000 (0.010) loss 1.2783 (1.1291) acc 71.8750 (71.7045) lr 1.9048e-03 eta 9:05:57 +epoch [9/50] batch [115/500] time 1.642 (1.568) data 0.001 (0.009) loss 0.8467 (1.1356) acc 75.0000 (71.6848) lr 1.9048e-03 eta 9:05:54 +epoch [9/50] batch [120/500] time 1.569 (1.568) data 0.000 (0.009) loss 1.1768 (1.1419) acc 78.1250 (71.5365) lr 1.9048e-03 eta 9:05:44 +epoch [9/50] batch [125/500] time 1.541 (1.567) data 0.000 (0.008) loss 0.9048 (1.1438) acc 81.2500 (71.5000) lr 1.9048e-03 eta 9:05:16 +epoch [9/50] batch [130/500] time 1.562 (1.567) data 0.000 (0.008) loss 1.1484 (1.1465) acc 78.1250 (71.5865) lr 1.9048e-03 eta 9:04:59 +epoch [9/50] batch [135/500] time 1.567 (1.567) data 0.000 (0.008) loss 0.7612 (1.1449) acc 78.1250 (71.7130) lr 1.9048e-03 eta 9:04:57 +epoch [9/50] batch [140/500] time 1.577 (1.567) data 0.000 (0.008) loss 1.0049 (1.1482) acc 78.1250 (71.8080) lr 1.9048e-03 eta 9:04:52 +epoch [9/50] batch [145/500] time 1.549 (1.567) data 0.000 (0.007) loss 1.2188 (1.1426) acc 78.1250 (72.0259) lr 1.9048e-03 eta 9:04:39 +epoch [9/50] batch [150/500] time 1.559 (1.567) data 0.000 (0.007) loss 1.3838 (1.1418) acc 68.7500 (72.1250) lr 1.9048e-03 eta 9:04:23 +epoch [9/50] batch [155/500] time 1.555 (1.566) data 0.000 (0.007) loss 1.8936 (1.1437) acc 62.5000 (72.0565) lr 1.9048e-03 eta 9:04:09 +epoch [9/50] batch [160/500] time 1.561 (1.567) data 0.001 (0.007) loss 1.1025 (1.1467) acc 78.1250 (72.0898) lr 1.9048e-03 eta 9:04:09 +epoch [9/50] batch [165/500] time 1.543 (1.566) data 0.000 (0.007) loss 0.8389 (1.1407) acc 78.1250 (72.0076) lr 1.9048e-03 eta 9:03:57 +epoch [9/50] batch [170/500] time 1.557 (1.566) data 0.000 (0.006) loss 1.1455 (1.1398) acc 62.5000 (71.9485) lr 1.9048e-03 eta 9:03:46 +epoch [9/50] batch [175/500] time 1.570 (1.566) data 0.001 (0.006) loss 1.1631 (1.1447) acc 75.0000 (71.8750) lr 1.9048e-03 eta 9:03:37 +epoch [9/50] batch [180/500] time 1.581 (1.567) data 0.000 (0.006) loss 1.4160 (1.1421) acc 75.0000 (71.9618) lr 1.9048e-03 eta 9:03:35 +epoch [9/50] batch [185/500] time 1.549 (1.566) data 0.000 (0.006) loss 1.5918 (1.1504) acc 65.6250 (71.7905) lr 1.9048e-03 eta 9:03:18 +epoch [9/50] batch [190/500] time 1.577 (1.566) data 0.000 (0.006) loss 0.9946 (1.1470) acc 71.8750 (71.9243) lr 1.9048e-03 eta 9:03:13 +epoch [9/50] batch [195/500] time 1.579 (1.566) data 0.000 (0.006) loss 0.9565 (1.1486) acc 71.8750 (71.8910) lr 1.9048e-03 eta 9:03:02 +epoch [9/50] batch [200/500] time 1.540 (1.566) data 0.000 (0.005) loss 1.3125 (1.1494) acc 65.6250 (71.9688) lr 1.9048e-03 eta 9:02:53 +epoch [9/50] batch [205/500] time 1.547 (1.566) data 0.000 (0.005) loss 1.6035 (1.1503) acc 62.5000 (71.9665) lr 1.9048e-03 eta 9:02:37 +epoch [9/50] batch [210/500] time 1.565 (1.565) data 0.000 (0.005) loss 1.1182 (1.1496) acc 75.0000 (71.9494) lr 1.9048e-03 eta 9:02:24 +epoch [9/50] batch [215/500] time 1.545 (1.565) data 0.000 (0.005) loss 1.2568 (1.1514) acc 62.5000 (71.9767) lr 1.9048e-03 eta 9:02:11 +epoch [9/50] batch [220/500] time 1.552 (1.565) data 0.000 (0.005) loss 1.1377 (1.1503) acc 75.0000 (72.0028) lr 1.9048e-03 eta 9:01:56 +epoch [9/50] batch [225/500] time 1.567 (1.565) data 0.000 (0.005) loss 1.3193 (1.1580) acc 71.8750 (71.7500) lr 1.9048e-03 eta 9:01:43 +epoch [9/50] batch [230/500] time 1.544 (1.564) data 0.000 (0.005) loss 1.0244 (1.1550) acc 71.8750 (71.7120) lr 1.9048e-03 eta 9:01:29 +epoch [9/50] batch [235/500] time 1.556 (1.564) data 0.000 (0.005) loss 0.9292 (1.1532) acc 81.2500 (71.7819) lr 1.9048e-03 eta 9:01:20 +epoch [9/50] batch [240/500] time 1.563 (1.564) data 0.000 (0.005) loss 1.2900 (1.1539) acc 75.0000 (71.8099) lr 1.9048e-03 eta 9:01:11 +epoch [9/50] batch [245/500] time 1.571 (1.564) data 0.000 (0.005) loss 1.1680 (1.1526) acc 68.7500 (71.7092) lr 1.9048e-03 eta 9:01:00 +epoch [9/50] batch [250/500] time 1.553 (1.564) data 0.001 (0.004) loss 1.1094 (1.1586) acc 78.1250 (71.6125) lr 1.9048e-03 eta 9:00:45 +epoch [9/50] batch [255/500] time 1.547 (1.563) data 0.000 (0.004) loss 0.7847 (1.1537) acc 87.5000 (71.7647) lr 1.9048e-03 eta 9:00:34 +epoch [9/50] batch [260/500] time 1.568 (1.564) data 0.000 (0.004) loss 1.0264 (1.1525) acc 84.3750 (71.7548) lr 1.9048e-03 eta 9:00:36 +epoch [9/50] batch [265/500] time 1.557 (1.564) data 0.000 (0.004) loss 1.0986 (1.1495) acc 68.7500 (71.8042) lr 1.9048e-03 eta 9:00:28 +epoch [9/50] batch [270/500] time 1.552 (1.564) data 0.000 (0.004) loss 1.0176 (1.1426) acc 81.2500 (71.9329) lr 1.9048e-03 eta 9:00:17 +epoch [9/50] batch [275/500] time 1.551 (1.563) data 0.000 (0.004) loss 1.8242 (1.1486) acc 65.6250 (71.7273) lr 1.9048e-03 eta 9:00:00 +epoch [9/50] batch [280/500] time 1.557 (1.563) data 0.000 (0.004) loss 1.3086 (1.1532) acc 71.8750 (71.6071) lr 1.9048e-03 eta 8:59:45 +epoch [9/50] batch [285/500] time 1.559 (1.563) data 0.000 (0.004) loss 0.7144 (1.1545) acc 78.1250 (71.5789) lr 1.9048e-03 eta 8:59:31 +epoch [9/50] batch [290/500] time 1.550 (1.563) data 0.000 (0.004) loss 1.2188 (1.1573) acc 68.7500 (71.5517) lr 1.9048e-03 eta 8:59:19 +epoch [9/50] batch [295/500] time 1.573 (1.563) data 0.001 (0.004) loss 1.1094 (1.1594) acc 71.8750 (71.5784) lr 1.9048e-03 eta 8:59:13 +epoch [9/50] batch [300/500] time 1.552 (1.563) data 0.000 (0.004) loss 1.5059 (1.1607) acc 71.8750 (71.5833) lr 1.9048e-03 eta 8:59:05 +epoch [9/50] batch [305/500] time 1.584 (1.563) data 0.001 (0.004) loss 1.3730 (1.1599) acc 71.8750 (71.6189) lr 1.9048e-03 eta 8:59:06 +epoch [9/50] batch [310/500] time 1.573 (1.563) data 0.000 (0.004) loss 1.7822 (1.1617) acc 59.3750 (71.5222) lr 1.9048e-03 eta 8:59:01 +epoch [9/50] batch [315/500] time 1.569 (1.563) data 0.000 (0.004) loss 1.0391 (1.1632) acc 75.0000 (71.5179) lr 1.9048e-03 eta 8:58:51 +epoch [9/50] batch [320/500] time 1.550 (1.563) data 0.000 (0.004) loss 1.0322 (1.1622) acc 81.2500 (71.5234) lr 1.9048e-03 eta 8:58:39 +epoch [9/50] batch [325/500] time 1.546 (1.563) data 0.000 (0.004) loss 1.5439 (1.1592) acc 68.7500 (71.6154) lr 1.9048e-03 eta 8:58:27 +epoch [9/50] batch [330/500] time 1.562 (1.563) data 0.000 (0.003) loss 0.9053 (1.1556) acc 81.2500 (71.6667) lr 1.9048e-03 eta 8:58:23 +epoch [9/50] batch [335/500] time 1.553 (1.563) data 0.000 (0.003) loss 1.3018 (1.1581) acc 62.5000 (71.5858) lr 1.9048e-03 eta 8:58:21 +epoch [9/50] batch [340/500] time 1.580 (1.563) data 0.000 (0.003) loss 0.8647 (1.1570) acc 78.1250 (71.6085) lr 1.9048e-03 eta 8:58:15 +epoch [9/50] batch [345/500] time 1.562 (1.563) data 0.000 (0.003) loss 1.2354 (1.1557) acc 65.6250 (71.6304) lr 1.9048e-03 eta 8:58:06 +epoch [9/50] batch [350/500] time 1.549 (1.563) data 0.000 (0.003) loss 0.8540 (1.1568) acc 78.1250 (71.6339) lr 1.9048e-03 eta 8:57:55 +epoch [9/50] batch [355/500] time 1.548 (1.563) data 0.000 (0.003) loss 1.4648 (1.1586) acc 62.5000 (71.5581) lr 1.9048e-03 eta 8:57:43 +epoch [9/50] batch [360/500] time 1.539 (1.563) data 0.000 (0.003) loss 0.7832 (1.1597) acc 75.0000 (71.5538) lr 1.9048e-03 eta 8:57:32 +epoch [9/50] batch [365/500] time 1.560 (1.562) data 0.000 (0.003) loss 1.1279 (1.1598) acc 75.0000 (71.5753) lr 1.9048e-03 eta 8:57:20 +epoch [9/50] batch [370/500] time 1.576 (1.563) data 0.000 (0.003) loss 1.0566 (1.1573) acc 75.0000 (71.6301) lr 1.9048e-03 eta 8:57:15 +epoch [9/50] batch [375/500] time 1.561 (1.562) data 0.000 (0.003) loss 1.4053 (1.1580) acc 62.5000 (71.6083) lr 1.9048e-03 eta 8:57:05 +epoch [9/50] batch [380/500] time 1.552 (1.562) data 0.001 (0.003) loss 1.2500 (1.1547) acc 71.8750 (71.6941) lr 1.9048e-03 eta 8:56:56 +epoch [9/50] batch [385/500] time 1.541 (1.562) data 0.000 (0.003) loss 0.8213 (1.1558) acc 81.2500 (71.6477) lr 1.9048e-03 eta 8:56:47 +epoch [9/50] batch [390/500] time 1.559 (1.562) data 0.000 (0.003) loss 1.6611 (1.1590) acc 71.8750 (71.6346) lr 1.9048e-03 eta 8:56:37 +epoch [9/50] batch [395/500] time 1.557 (1.562) data 0.000 (0.003) loss 1.3398 (1.1594) acc 65.6250 (71.6060) lr 1.9048e-03 eta 8:56:25 +epoch [9/50] batch [400/500] time 1.546 (1.562) data 0.000 (0.003) loss 0.9731 (1.1585) acc 75.0000 (71.6094) lr 1.9048e-03 eta 8:56:16 +epoch [9/50] batch [405/500] time 1.575 (1.562) data 0.000 (0.003) loss 1.4766 (1.1598) acc 65.6250 (71.5818) lr 1.9048e-03 eta 8:56:16 +epoch [9/50] batch [410/500] time 1.585 (1.562) data 0.000 (0.003) loss 1.2041 (1.1595) acc 71.8750 (71.5244) lr 1.9048e-03 eta 8:56:08 +epoch [9/50] batch [415/500] time 1.574 (1.562) data 0.000 (0.003) loss 0.9561 (1.1593) acc 78.1250 (71.5286) lr 1.9048e-03 eta 8:56:00 +epoch [9/50] batch [420/500] time 1.562 (1.562) data 0.000 (0.003) loss 1.1680 (1.1618) acc 71.8750 (71.4658) lr 1.9048e-03 eta 8:55:54 +epoch [9/50] batch [425/500] time 1.569 (1.563) data 0.000 (0.003) loss 1.5596 (1.1597) acc 71.8750 (71.5221) lr 1.9048e-03 eta 8:55:49 +epoch [9/50] batch [430/500] time 1.541 (1.563) data 0.000 (0.003) loss 1.1152 (1.1608) acc 65.6250 (71.4971) lr 1.9048e-03 eta 8:55:40 +epoch [9/50] batch [435/500] time 1.555 (1.562) data 0.000 (0.003) loss 0.9956 (1.1657) acc 81.2500 (71.3649) lr 1.9048e-03 eta 8:55:31 +epoch [9/50] batch [440/500] time 1.569 (1.562) data 0.000 (0.003) loss 0.7002 (1.1642) acc 75.0000 (71.3849) lr 1.9048e-03 eta 8:55:20 +epoch [9/50] batch [445/500] time 1.686 (1.562) data 0.000 (0.003) loss 0.9902 (1.1648) acc 71.8750 (71.3764) lr 1.9048e-03 eta 8:55:16 +epoch [9/50] batch [450/500] time 1.578 (1.563) data 0.000 (0.003) loss 1.3906 (1.1668) acc 65.6250 (71.3056) lr 1.9048e-03 eta 8:55:09 +epoch [9/50] batch [455/500] time 1.542 (1.562) data 0.000 (0.003) loss 1.0557 (1.1651) acc 78.1250 (71.3255) lr 1.9048e-03 eta 8:54:58 +epoch [9/50] batch [460/500] time 1.558 (1.562) data 0.000 (0.003) loss 1.8730 (1.1658) acc 59.3750 (71.2840) lr 1.9048e-03 eta 8:54:48 +epoch [9/50] batch [465/500] time 1.569 (1.562) data 0.000 (0.003) loss 0.7700 (1.1655) acc 84.3750 (71.2970) lr 1.9048e-03 eta 8:54:40 +epoch [9/50] batch [470/500] time 1.562 (1.562) data 0.000 (0.003) loss 1.4893 (1.1674) acc 68.7500 (71.2633) lr 1.9048e-03 eta 8:54:33 +epoch [9/50] batch [475/500] time 1.555 (1.562) data 0.000 (0.003) loss 1.2871 (1.1712) acc 65.6250 (71.1645) lr 1.9048e-03 eta 8:54:24 +epoch [9/50] batch [480/500] time 1.543 (1.562) data 0.000 (0.003) loss 1.5801 (1.1717) acc 59.3750 (71.1328) lr 1.9048e-03 eta 8:54:13 +epoch [9/50] batch [485/500] time 1.576 (1.562) data 0.001 (0.002) loss 1.4551 (1.1719) acc 65.6250 (71.1405) lr 1.9048e-03 eta 8:54:05 +epoch [9/50] batch [490/500] time 1.565 (1.562) data 0.000 (0.002) loss 1.1699 (1.1725) acc 62.5000 (71.1288) lr 1.9048e-03 eta 8:53:55 +epoch [9/50] batch [495/500] time 1.561 (1.562) data 0.000 (0.002) loss 1.0703 (1.1743) acc 71.8750 (71.0795) lr 1.9048e-03 eta 8:53:43 +epoch [9/50] batch [500/500] time 1.571 (1.562) data 0.000 (0.002) loss 1.0186 (1.1746) acc 78.1250 (71.0625) lr 1.8763e-03 eta 8:53:34 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,779 +* accuracy: 77.6% +* error: 22.4% +* macro_f1: 77.0% +epoch [10/50] batch [5/500] time 1.551 (1.769) data 0.001 (0.232) loss 1.0039 (0.9812) acc 71.8750 (73.1250) lr 1.8763e-03 eta 10:04:21 +epoch [10/50] batch [10/500] time 1.558 (1.672) data 0.001 (0.117) loss 1.2129 (1.0738) acc 68.7500 (70.9375) lr 1.8763e-03 eta 9:30:53 +epoch [10/50] batch [15/500] time 1.540 (1.633) data 0.000 (0.078) loss 1.5156 (1.1214) acc 56.2500 (69.5833) lr 1.8763e-03 eta 9:17:32 +epoch [10/50] batch [20/500] time 1.577 (1.614) data 0.001 (0.059) loss 0.7798 (1.1111) acc 81.2500 (70.6250) lr 1.8763e-03 eta 9:10:45 +epoch [10/50] batch [25/500] time 1.556 (1.604) data 0.000 (0.047) loss 0.9282 (1.1374) acc 75.0000 (70.3750) lr 1.8763e-03 eta 9:07:18 +epoch [10/50] batch [30/500] time 1.560 (1.596) data 0.000 (0.039) loss 0.9141 (1.1261) acc 71.8750 (70.8333) lr 1.8763e-03 eta 9:04:35 +epoch [10/50] batch [35/500] time 1.556 (1.590) data 0.000 (0.034) loss 1.1270 (1.1166) acc 71.8750 (70.8036) lr 1.8763e-03 eta 9:02:11 +epoch [10/50] batch [40/500] time 1.567 (1.587) data 0.000 (0.030) loss 1.2900 (1.1407) acc 68.7500 (70.4688) lr 1.8763e-03 eta 9:01:02 +epoch [10/50] batch [45/500] time 1.554 (1.587) data 0.000 (0.026) loss 1.0674 (1.1339) acc 78.1250 (71.0417) lr 1.8763e-03 eta 9:01:01 +epoch [10/50] batch [50/500] time 1.571 (1.584) data 0.000 (0.024) loss 0.9277 (1.1429) acc 78.1250 (70.9375) lr 1.8763e-03 eta 9:00:02 +epoch [10/50] batch [55/500] time 1.554 (1.582) data 0.000 (0.022) loss 0.9624 (1.1496) acc 68.7500 (70.6818) lr 1.8763e-03 eta 8:59:10 +epoch [10/50] batch [60/500] time 1.587 (1.581) data 0.000 (0.020) loss 1.6348 (1.1575) acc 59.3750 (70.6250) lr 1.8763e-03 eta 8:58:41 +epoch [10/50] batch [65/500] time 1.564 (1.579) data 0.000 (0.018) loss 0.8862 (1.1464) acc 78.1250 (70.7692) lr 1.8763e-03 eta 8:57:50 +epoch [10/50] batch [70/500] time 1.568 (1.578) data 0.000 (0.017) loss 0.9116 (1.1407) acc 75.0000 (70.8036) lr 1.8763e-03 eta 8:57:13 +epoch [10/50] batch [75/500] time 1.536 (1.576) data 0.000 (0.016) loss 1.7314 (1.1503) acc 65.6250 (70.5833) lr 1.8763e-03 eta 8:56:35 +epoch [10/50] batch [80/500] time 1.573 (1.575) data 0.001 (0.015) loss 1.2764 (1.1559) acc 75.0000 (70.5859) lr 1.8763e-03 eta 8:56:07 +epoch [10/50] batch [85/500] time 1.576 (1.574) data 0.000 (0.014) loss 0.9458 (1.1453) acc 65.6250 (70.7353) lr 1.8763e-03 eta 8:55:33 +epoch [10/50] batch [90/500] time 1.558 (1.573) data 0.000 (0.013) loss 0.7070 (1.1415) acc 81.2500 (70.9722) lr 1.8763e-03 eta 8:55:03 +epoch [10/50] batch [95/500] time 1.545 (1.572) data 0.000 (0.013) loss 0.9941 (1.1448) acc 68.7500 (70.6579) lr 1.8763e-03 eta 8:54:46 +epoch [10/50] batch [100/500] time 1.545 (1.572) data 0.000 (0.012) loss 1.1865 (1.1470) acc 68.7500 (70.6562) lr 1.8763e-03 eta 8:54:21 +epoch [10/50] batch [105/500] time 1.578 (1.571) data 0.001 (0.012) loss 1.2324 (1.1569) acc 71.8750 (70.4762) lr 1.8763e-03 eta 8:54:04 +epoch [10/50] batch [110/500] time 1.551 (1.571) data 0.000 (0.011) loss 0.6099 (1.1539) acc 87.5000 (70.4545) lr 1.8763e-03 eta 8:53:48 +epoch [10/50] batch [115/500] time 1.565 (1.570) data 0.000 (0.011) loss 1.0195 (1.1510) acc 81.2500 (70.7065) lr 1.8763e-03 eta 8:53:25 +epoch [10/50] batch [120/500] time 1.551 (1.569) data 0.000 (0.010) loss 0.7988 (1.1558) acc 78.1250 (70.5990) lr 1.8763e-03 eta 8:52:59 +epoch [10/50] batch [125/500] time 1.586 (1.569) data 0.001 (0.010) loss 2.0039 (1.1703) acc 53.1250 (70.4250) lr 1.8763e-03 eta 8:52:49 +epoch [10/50] batch [130/500] time 1.545 (1.569) data 0.001 (0.009) loss 1.5967 (1.1684) acc 59.3750 (70.3125) lr 1.8763e-03 eta 8:52:30 +epoch [10/50] batch [135/500] time 1.566 (1.568) data 0.000 (0.009) loss 1.4365 (1.1700) acc 59.3750 (70.3472) lr 1.8763e-03 eta 8:52:16 +epoch [10/50] batch [140/500] time 1.540 (1.568) data 0.001 (0.009) loss 1.3047 (1.1659) acc 71.8750 (70.5580) lr 1.8763e-03 eta 8:52:08 +epoch [10/50] batch [145/500] time 1.566 (1.569) data 0.001 (0.008) loss 1.2373 (1.1687) acc 68.7500 (70.5603) lr 1.8763e-03 eta 8:52:13 +epoch [10/50] batch [150/500] time 1.557 (1.568) data 0.001 (0.008) loss 0.8237 (1.1665) acc 78.1250 (70.5625) lr 1.8763e-03 eta 8:51:56 +epoch [10/50] batch [155/500] time 1.562 (1.568) data 0.000 (0.008) loss 0.9688 (1.1647) acc 68.7500 (70.6452) lr 1.8763e-03 eta 8:51:46 +epoch [10/50] batch [160/500] time 1.561 (1.568) data 0.000 (0.008) loss 0.5903 (1.1568) acc 78.1250 (70.7812) lr 1.8763e-03 eta 8:51:31 +epoch [10/50] batch [165/500] time 1.563 (1.568) data 0.000 (0.007) loss 1.0039 (1.1524) acc 78.1250 (70.7955) lr 1.8763e-03 eta 8:51:20 +epoch [10/50] batch [170/500] time 1.547 (1.567) data 0.000 (0.007) loss 1.7822 (1.1549) acc 53.1250 (70.6985) lr 1.8763e-03 eta 8:51:07 +epoch [10/50] batch [175/500] time 1.532 (1.567) data 0.000 (0.007) loss 1.3857 (1.1604) acc 65.6250 (70.6071) lr 1.8763e-03 eta 8:50:50 +epoch [10/50] batch [180/500] time 1.548 (1.567) data 0.001 (0.007) loss 1.4980 (1.1591) acc 59.3750 (70.5208) lr 1.8763e-03 eta 8:50:38 +epoch [10/50] batch [185/500] time 1.550 (1.567) data 0.000 (0.007) loss 1.1445 (1.1549) acc 65.6250 (70.6081) lr 1.8763e-03 eta 8:50:25 +epoch [10/50] batch [190/500] time 1.541 (1.567) data 0.000 (0.007) loss 0.7451 (1.1547) acc 81.2500 (70.6086) lr 1.8763e-03 eta 8:50:22 +epoch [10/50] batch [195/500] time 1.555 (1.567) data 0.000 (0.006) loss 0.7422 (1.1498) acc 87.5000 (70.8013) lr 1.8763e-03 eta 8:50:10 +epoch [10/50] batch [200/500] time 1.567 (1.567) data 0.000 (0.006) loss 1.4385 (1.1557) acc 75.0000 (70.7344) lr 1.8763e-03 eta 8:50:01 +epoch [10/50] batch [205/500] time 1.564 (1.567) data 0.000 (0.006) loss 1.4756 (1.1559) acc 65.6250 (70.6707) lr 1.8763e-03 eta 8:49:56 +epoch [10/50] batch [210/500] time 1.539 (1.567) data 0.000 (0.006) loss 0.9521 (1.1569) acc 65.6250 (70.6250) lr 1.8763e-03 eta 8:49:44 +epoch [10/50] batch [215/500] time 1.548 (1.566) data 0.000 (0.006) loss 1.4004 (1.1563) acc 68.7500 (70.7267) lr 1.8763e-03 eta 8:49:29 +epoch [10/50] batch [220/500] time 1.553 (1.566) data 0.000 (0.006) loss 1.7031 (1.1541) acc 62.5000 (70.7955) lr 1.8763e-03 eta 8:49:11 +epoch [10/50] batch [225/500] time 1.567 (1.566) data 0.000 (0.006) loss 1.3291 (1.1542) acc 71.8750 (70.7778) lr 1.8763e-03 eta 8:49:02 +epoch [10/50] batch [230/500] time 1.555 (1.565) data 0.000 (0.005) loss 1.1758 (1.1561) acc 71.8750 (70.8016) lr 1.8763e-03 eta 8:48:50 +epoch [10/50] batch [235/500] time 1.558 (1.565) data 0.000 (0.005) loss 1.5420 (1.1580) acc 62.5000 (70.7979) lr 1.8763e-03 eta 8:48:40 +epoch [10/50] batch [240/500] time 1.553 (1.565) data 0.000 (0.005) loss 1.2900 (1.1563) acc 75.0000 (70.8594) lr 1.8763e-03 eta 8:48:28 +epoch [10/50] batch [245/500] time 1.562 (1.565) data 0.000 (0.005) loss 1.2617 (1.1607) acc 71.8750 (70.7781) lr 1.8763e-03 eta 8:48:18 +epoch [10/50] batch [250/500] time 1.586 (1.565) data 0.000 (0.005) loss 1.3076 (1.1588) acc 68.7500 (70.7875) lr 1.8763e-03 eta 8:48:11 +epoch [10/50] batch [255/500] time 1.558 (1.565) data 0.000 (0.005) loss 1.2402 (1.1590) acc 78.1250 (70.8333) lr 1.8763e-03 eta 8:48:02 +epoch [10/50] batch [260/500] time 1.562 (1.565) data 0.000 (0.005) loss 1.4160 (1.1601) acc 68.7500 (70.8413) lr 1.8763e-03 eta 8:47:58 +epoch [10/50] batch [265/500] time 1.545 (1.565) data 0.001 (0.005) loss 1.8174 (1.1618) acc 56.2500 (70.8373) lr 1.8763e-03 eta 8:47:50 +epoch [10/50] batch [270/500] time 1.573 (1.565) data 0.000 (0.005) loss 1.2178 (1.1669) acc 65.6250 (70.7407) lr 1.8763e-03 eta 8:47:43 +epoch [10/50] batch [275/500] time 1.562 (1.565) data 0.001 (0.005) loss 1.4863 (1.1727) acc 68.7500 (70.6818) lr 1.8763e-03 eta 8:47:35 +epoch [10/50] batch [280/500] time 1.545 (1.565) data 0.000 (0.005) loss 1.5234 (1.1773) acc 62.5000 (70.5580) lr 1.8763e-03 eta 8:47:23 +epoch [10/50] batch [285/500] time 1.675 (1.565) data 0.000 (0.005) loss 1.1846 (1.1766) acc 71.8750 (70.5921) lr 1.8763e-03 eta 8:47:23 +epoch [10/50] batch [290/500] time 1.568 (1.565) data 0.000 (0.004) loss 1.3945 (1.1796) acc 71.8750 (70.6034) lr 1.8763e-03 eta 8:47:13 +epoch [10/50] batch [295/500] time 1.573 (1.565) data 0.000 (0.004) loss 1.7666 (1.1827) acc 68.7500 (70.5614) lr 1.8763e-03 eta 8:47:06 +epoch [10/50] batch [300/500] time 1.563 (1.565) data 0.000 (0.004) loss 1.7207 (1.1844) acc 56.2500 (70.5625) lr 1.8763e-03 eta 8:46:55 +epoch [10/50] batch [305/500] time 1.549 (1.565) data 0.001 (0.004) loss 1.0107 (1.1858) acc 78.1250 (70.5328) lr 1.8763e-03 eta 8:46:46 +epoch [10/50] batch [310/500] time 1.532 (1.565) data 0.000 (0.004) loss 1.2197 (1.1863) acc 68.7500 (70.5645) lr 1.8763e-03 eta 8:46:35 +epoch [10/50] batch [315/500] time 1.537 (1.565) data 0.000 (0.004) loss 1.3369 (1.1854) acc 71.8750 (70.5952) lr 1.8763e-03 eta 8:46:24 +epoch [10/50] batch [320/500] time 1.567 (1.565) data 0.000 (0.004) loss 1.4736 (1.1877) acc 59.3750 (70.5469) lr 1.8763e-03 eta 8:46:13 +epoch [10/50] batch [325/500] time 1.571 (1.565) data 0.000 (0.004) loss 1.1416 (1.1917) acc 65.6250 (70.4423) lr 1.8763e-03 eta 8:46:05 +epoch [10/50] batch [330/500] time 1.544 (1.565) data 0.001 (0.004) loss 0.9180 (1.1903) acc 78.1250 (70.5019) lr 1.8763e-03 eta 8:46:01 +epoch [10/50] batch [335/500] time 1.549 (1.565) data 0.000 (0.004) loss 1.5908 (1.1892) acc 65.6250 (70.5410) lr 1.8763e-03 eta 8:45:49 +epoch [10/50] batch [340/500] time 1.554 (1.564) data 0.000 (0.004) loss 0.9663 (1.1892) acc 71.8750 (70.5055) lr 1.8763e-03 eta 8:45:37 +epoch [10/50] batch [345/500] time 1.536 (1.564) data 0.001 (0.004) loss 0.5547 (1.1848) acc 87.5000 (70.6159) lr 1.8763e-03 eta 8:45:26 +epoch [10/50] batch [350/500] time 1.533 (1.564) data 0.000 (0.004) loss 0.8555 (1.1867) acc 81.2500 (70.6161) lr 1.8763e-03 eta 8:45:13 +epoch [10/50] batch [355/500] time 1.563 (1.564) data 0.000 (0.004) loss 1.6279 (1.1881) acc 62.5000 (70.6250) lr 1.8763e-03 eta 8:45:05 +epoch [10/50] batch [360/500] time 1.570 (1.564) data 0.000 (0.004) loss 1.4971 (1.1864) acc 62.5000 (70.6510) lr 1.8763e-03 eta 8:44:57 +epoch [10/50] batch [365/500] time 1.552 (1.564) data 0.000 (0.004) loss 1.2715 (1.1855) acc 78.1250 (70.6507) lr 1.8763e-03 eta 8:44:50 +epoch [10/50] batch [370/500] time 1.560 (1.564) data 0.000 (0.004) loss 0.7446 (1.1842) acc 84.3750 (70.7348) lr 1.8763e-03 eta 8:44:42 +epoch [10/50] batch [375/500] time 1.535 (1.564) data 0.000 (0.004) loss 1.2520 (1.1812) acc 75.0000 (70.8167) lr 1.8763e-03 eta 8:44:31 +epoch [10/50] batch [380/500] time 1.558 (1.564) data 0.000 (0.003) loss 1.5176 (1.1811) acc 65.6250 (70.8224) lr 1.8763e-03 eta 8:44:19 +epoch [10/50] batch [385/500] time 1.555 (1.563) data 0.000 (0.003) loss 1.5205 (1.1777) acc 65.6250 (70.8523) lr 1.8763e-03 eta 8:44:09 +epoch [10/50] batch [390/500] time 1.555 (1.563) data 0.001 (0.003) loss 1.6895 (1.1775) acc 65.6250 (70.9215) lr 1.8763e-03 eta 8:43:59 +epoch [10/50] batch [395/500] time 1.571 (1.563) data 0.001 (0.003) loss 1.3252 (1.1811) acc 71.8750 (70.8861) lr 1.8763e-03 eta 8:43:50 +epoch [10/50] batch [400/500] time 1.548 (1.563) data 0.000 (0.003) loss 1.0547 (1.1802) acc 68.7500 (70.8594) lr 1.8763e-03 eta 8:43:42 +epoch [10/50] batch [405/500] time 1.549 (1.563) data 0.000 (0.003) loss 1.0869 (1.1824) acc 71.8750 (70.8719) lr 1.8763e-03 eta 8:43:34 +epoch [10/50] batch [410/500] time 1.562 (1.563) data 0.000 (0.003) loss 0.7329 (1.1810) acc 84.3750 (70.9146) lr 1.8763e-03 eta 8:43:25 +epoch [10/50] batch [415/500] time 1.581 (1.563) data 0.000 (0.003) loss 1.1523 (1.1786) acc 75.0000 (70.9714) lr 1.8763e-03 eta 8:43:20 +epoch [10/50] batch [420/500] time 1.545 (1.563) data 0.000 (0.003) loss 1.1123 (1.1778) acc 75.0000 (71.0268) lr 1.8763e-03 eta 8:43:11 +epoch [10/50] batch [425/500] time 1.552 (1.563) data 0.000 (0.003) loss 0.6802 (1.1792) acc 81.2500 (71.0294) lr 1.8763e-03 eta 8:43:01 +epoch [10/50] batch [430/500] time 1.583 (1.564) data 0.000 (0.003) loss 1.3740 (1.1803) acc 62.5000 (70.9956) lr 1.8763e-03 eta 8:43:01 +epoch [10/50] batch [435/500] time 1.570 (1.564) data 0.001 (0.003) loss 1.6846 (1.1826) acc 59.3750 (70.9411) lr 1.8763e-03 eta 8:42:52 +epoch [10/50] batch [440/500] time 1.553 (1.563) data 0.000 (0.003) loss 1.0508 (1.1796) acc 78.1250 (70.9588) lr 1.8763e-03 eta 8:42:42 +epoch [10/50] batch [445/500] time 1.553 (1.563) data 0.001 (0.003) loss 1.8975 (1.1800) acc 53.1250 (70.9480) lr 1.8763e-03 eta 8:42:34 +epoch [10/50] batch [450/500] time 1.547 (1.563) data 0.000 (0.003) loss 1.1689 (1.1783) acc 68.7500 (70.9722) lr 1.8763e-03 eta 8:42:24 +epoch [10/50] batch [455/500] time 1.571 (1.563) data 0.000 (0.003) loss 0.7993 (1.1781) acc 78.1250 (70.9547) lr 1.8763e-03 eta 8:42:16 +epoch [10/50] batch [460/500] time 1.572 (1.563) data 0.000 (0.003) loss 1.4561 (1.1784) acc 65.6250 (71.0054) lr 1.8763e-03 eta 8:42:07 +epoch [10/50] batch [465/500] time 1.555 (1.563) data 0.000 (0.003) loss 1.2178 (1.1773) acc 62.5000 (71.0282) lr 1.8763e-03 eta 8:42:00 +epoch [10/50] batch [470/500] time 1.559 (1.563) data 0.001 (0.003) loss 1.2539 (1.1777) acc 75.0000 (71.0239) lr 1.8763e-03 eta 8:41:55 +epoch [10/50] batch [475/500] time 1.574 (1.564) data 0.000 (0.003) loss 0.7480 (1.1753) acc 78.1250 (71.0658) lr 1.8763e-03 eta 8:41:53 +epoch [10/50] batch [480/500] time 1.532 (1.564) data 0.000 (0.003) loss 1.5908 (1.1773) acc 56.2500 (71.0156) lr 1.8763e-03 eta 8:41:43 +epoch [10/50] batch [485/500] time 1.548 (1.563) data 0.001 (0.003) loss 1.4111 (1.1774) acc 71.8750 (71.0180) lr 1.8763e-03 eta 8:41:31 +epoch [10/50] batch [490/500] time 1.560 (1.563) data 0.000 (0.003) loss 1.1367 (1.1763) acc 75.0000 (71.0842) lr 1.8763e-03 eta 8:41:22 +epoch [10/50] batch [495/500] time 1.577 (1.563) data 0.000 (0.003) loss 0.7261 (1.1751) acc 84.3750 (71.1111) lr 1.8763e-03 eta 8:41:15 +epoch [10/50] batch [500/500] time 1.586 (1.563) data 0.000 (0.003) loss 1.0049 (1.1736) acc 75.0000 (71.1625) lr 1.8443e-03 eta 8:41:07 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,919 +* accuracy: 77.8% +* error: 22.2% +* macro_f1: 77.3% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [11/50] batch [5/500] time 1.552 (1.700) data 0.001 (0.203) loss 1.6348 (1.1372) acc 65.6250 (73.1250) lr 1.8443e-03 eta 9:26:26 +epoch [11/50] batch [10/500] time 1.554 (1.629) data 0.002 (0.102) loss 1.9365 (1.3328) acc 59.3750 (70.6250) lr 1.8443e-03 eta 9:02:48 +epoch [11/50] batch [15/500] time 1.578 (1.606) data 0.000 (0.068) loss 1.1143 (1.3206) acc 65.6250 (69.7917) lr 1.8443e-03 eta 8:54:54 +epoch [11/50] batch [20/500] time 1.564 (1.595) data 0.000 (0.051) loss 0.9004 (1.2604) acc 68.7500 (70.4688) lr 1.8443e-03 eta 8:51:13 +epoch [11/50] batch [25/500] time 1.553 (1.589) data 0.000 (0.041) loss 0.8188 (1.2182) acc 81.2500 (71.5000) lr 1.8443e-03 eta 8:49:06 +epoch [11/50] batch [30/500] time 1.577 (1.587) data 0.000 (0.034) loss 1.1445 (1.2121) acc 62.5000 (71.4583) lr 1.8443e-03 eta 8:48:07 +epoch [11/50] batch [35/500] time 1.582 (1.582) data 0.001 (0.029) loss 1.6211 (1.2106) acc 62.5000 (71.8750) lr 1.8443e-03 eta 8:46:31 +epoch [11/50] batch [40/500] time 1.565 (1.581) data 0.001 (0.026) loss 0.9165 (1.2310) acc 75.0000 (71.3281) lr 1.8443e-03 eta 8:45:54 +epoch [11/50] batch [45/500] time 1.560 (1.579) data 0.001 (0.023) loss 1.7549 (1.2278) acc 46.8750 (70.7639) lr 1.8443e-03 eta 8:44:59 +epoch [11/50] batch [50/500] time 1.570 (1.578) data 0.000 (0.021) loss 0.8984 (1.2158) acc 71.8750 (70.8750) lr 1.8443e-03 eta 8:44:32 +epoch [11/50] batch [55/500] time 1.572 (1.576) data 0.000 (0.019) loss 1.0977 (1.2091) acc 68.7500 (70.7955) lr 1.8443e-03 eta 8:43:45 +epoch [11/50] batch [60/500] time 1.550 (1.575) data 0.001 (0.017) loss 1.2129 (1.1896) acc 68.7500 (70.8854) lr 1.8443e-03 eta 8:43:17 +epoch [11/50] batch [65/500] time 1.542 (1.574) data 0.000 (0.016) loss 1.7783 (1.2130) acc 68.7500 (70.6250) lr 1.8443e-03 eta 8:43:06 +epoch [11/50] batch [70/500] time 1.590 (1.573) data 0.000 (0.015) loss 0.5117 (1.1888) acc 87.5000 (71.0714) lr 1.8443e-03 eta 8:42:24 +epoch [11/50] batch [75/500] time 1.559 (1.572) data 0.000 (0.014) loss 0.6987 (1.1746) acc 81.2500 (71.3750) lr 1.8443e-03 eta 8:42:04 +epoch [11/50] batch [80/500] time 1.552 (1.571) data 0.000 (0.013) loss 1.3652 (1.1732) acc 71.8750 (71.5234) lr 1.8443e-03 eta 8:41:25 +epoch [11/50] batch [85/500] time 1.569 (1.570) data 0.001 (0.012) loss 1.1133 (1.1785) acc 65.6250 (71.4706) lr 1.8443e-03 eta 8:41:08 +epoch [11/50] batch [90/500] time 1.554 (1.569) data 0.000 (0.012) loss 1.8604 (1.1933) acc 59.3750 (71.2153) lr 1.8443e-03 eta 8:40:40 +epoch [11/50] batch [95/500] time 1.556 (1.569) data 0.000 (0.011) loss 1.6904 (1.1812) acc 59.3750 (71.4474) lr 1.8443e-03 eta 8:40:30 +epoch [11/50] batch [100/500] time 1.529 (1.568) data 0.000 (0.011) loss 1.0928 (1.1765) acc 75.0000 (71.3750) lr 1.8443e-03 eta 8:40:02 +epoch [11/50] batch [105/500] time 1.571 (1.567) data 0.000 (0.010) loss 1.5029 (1.1778) acc 56.2500 (71.3095) lr 1.8443e-03 eta 8:39:43 +epoch [11/50] batch [110/500] time 1.548 (1.568) data 0.000 (0.010) loss 1.6885 (1.1803) acc 71.8750 (71.4489) lr 1.8443e-03 eta 8:39:51 +epoch [11/50] batch [115/500] time 1.552 (1.568) data 0.000 (0.009) loss 0.8628 (1.1806) acc 78.1250 (71.3043) lr 1.8443e-03 eta 8:39:31 +epoch [11/50] batch [120/500] time 1.548 (1.567) data 0.001 (0.009) loss 1.0205 (1.1833) acc 68.7500 (71.2240) lr 1.8443e-03 eta 8:39:18 +epoch [11/50] batch [125/500] time 1.539 (1.567) data 0.000 (0.009) loss 1.2764 (1.1962) acc 62.5000 (71.0250) lr 1.8443e-03 eta 8:38:57 +epoch [11/50] batch [130/500] time 1.552 (1.566) data 0.000 (0.008) loss 1.1514 (1.1906) acc 75.0000 (71.0337) lr 1.8443e-03 eta 8:38:38 +epoch [11/50] batch [135/500] time 1.565 (1.566) data 0.000 (0.008) loss 1.3672 (1.1966) acc 62.5000 (70.8796) lr 1.8443e-03 eta 8:38:26 +epoch [11/50] batch [140/500] time 1.552 (1.566) data 0.001 (0.008) loss 1.8848 (1.1988) acc 56.2500 (70.8705) lr 1.8443e-03 eta 8:38:14 +epoch [11/50] batch [145/500] time 1.560 (1.565) data 0.001 (0.007) loss 1.2080 (1.1981) acc 62.5000 (70.7974) lr 1.8443e-03 eta 8:38:02 +epoch [11/50] batch [150/500] time 1.576 (1.566) data 0.000 (0.007) loss 0.7163 (1.1864) acc 81.2500 (71.0208) lr 1.8443e-03 eta 8:37:58 +epoch [11/50] batch [155/500] time 1.567 (1.566) data 0.000 (0.007) loss 1.1836 (1.1868) acc 71.8750 (70.8871) lr 1.8443e-03 eta 8:37:55 +epoch [11/50] batch [160/500] time 1.538 (1.566) data 0.001 (0.007) loss 1.0059 (1.1851) acc 78.1250 (70.9570) lr 1.8443e-03 eta 8:37:39 +epoch [11/50] batch [165/500] time 1.567 (1.566) data 0.000 (0.007) loss 1.0654 (1.1871) acc 71.8750 (70.9470) lr 1.8443e-03 eta 8:37:33 +epoch [11/50] batch [170/500] time 1.557 (1.565) data 0.001 (0.006) loss 1.5830 (1.1887) acc 53.1250 (70.8088) lr 1.8443e-03 eta 8:37:21 +epoch [11/50] batch [175/500] time 1.575 (1.565) data 0.000 (0.006) loss 1.0791 (1.1858) acc 68.7500 (70.9286) lr 1.8443e-03 eta 8:37:11 +epoch [11/50] batch [180/500] time 1.541 (1.565) data 0.000 (0.006) loss 1.3545 (1.1906) acc 56.2500 (70.7986) lr 1.8443e-03 eta 8:37:02 +epoch [11/50] batch [185/500] time 1.545 (1.565) data 0.000 (0.006) loss 0.5903 (1.1894) acc 78.1250 (70.8277) lr 1.8443e-03 eta 8:36:47 +epoch [11/50] batch [190/500] time 1.556 (1.565) data 0.000 (0.006) loss 1.2607 (1.1866) acc 75.0000 (70.9539) lr 1.8443e-03 eta 8:36:39 +epoch [11/50] batch [195/500] time 1.577 (1.565) data 0.000 (0.006) loss 1.2188 (1.1865) acc 75.0000 (70.9776) lr 1.8443e-03 eta 8:36:32 +epoch [11/50] batch [200/500] time 1.579 (1.565) data 0.001 (0.006) loss 1.4561 (1.1857) acc 62.5000 (70.9844) lr 1.8443e-03 eta 8:36:24 +epoch [11/50] batch [205/500] time 1.669 (1.566) data 0.000 (0.005) loss 1.0645 (1.1847) acc 78.1250 (70.9604) lr 1.8443e-03 eta 8:36:29 +epoch [11/50] batch [210/500] time 1.564 (1.566) data 0.001 (0.005) loss 1.2363 (1.1822) acc 75.0000 (71.0268) lr 1.8443e-03 eta 8:36:28 +epoch [11/50] batch [215/500] time 1.575 (1.566) data 0.000 (0.005) loss 1.6797 (1.1928) acc 59.3750 (70.9012) lr 1.8443e-03 eta 8:36:22 +epoch [11/50] batch [220/500] time 1.556 (1.566) data 0.000 (0.005) loss 1.1338 (1.1922) acc 71.8750 (70.8807) lr 1.8443e-03 eta 8:36:15 +epoch [11/50] batch [225/500] time 1.572 (1.566) data 0.000 (0.005) loss 1.1582 (1.1890) acc 65.6250 (70.9583) lr 1.8443e-03 eta 8:36:05 +epoch [11/50] batch [230/500] time 1.559 (1.566) data 0.000 (0.005) loss 0.7822 (1.1803) acc 84.3750 (71.1821) lr 1.8443e-03 eta 8:36:02 +epoch [11/50] batch [235/500] time 1.599 (1.566) data 0.000 (0.005) loss 1.3291 (1.1764) acc 62.5000 (71.2101) lr 1.8443e-03 eta 8:35:55 +epoch [11/50] batch [240/500] time 1.574 (1.566) data 0.000 (0.005) loss 1.2490 (1.1717) acc 71.8750 (71.2500) lr 1.8443e-03 eta 8:35:53 +epoch [11/50] batch [245/500] time 1.553 (1.566) data 0.000 (0.005) loss 1.5010 (1.1759) acc 59.3750 (71.1990) lr 1.8443e-03 eta 8:35:41 +epoch [11/50] batch [250/500] time 1.580 (1.567) data 0.000 (0.005) loss 1.1445 (1.1834) acc 68.7500 (71.0125) lr 1.8443e-03 eta 8:35:45 +epoch [11/50] batch [255/500] time 1.588 (1.567) data 0.000 (0.004) loss 1.0176 (1.1821) acc 62.5000 (70.9926) lr 1.8443e-03 eta 8:35:39 +epoch [11/50] batch [260/500] time 1.555 (1.567) data 0.000 (0.004) loss 1.0117 (1.1784) acc 78.1250 (71.0938) lr 1.8443e-03 eta 8:35:27 +epoch [11/50] batch [265/500] time 1.545 (1.566) data 0.000 (0.004) loss 0.7905 (1.1748) acc 78.1250 (71.2028) lr 1.8443e-03 eta 8:35:13 +epoch [11/50] batch [270/500] time 1.542 (1.566) data 0.000 (0.004) loss 1.0195 (1.1767) acc 71.8750 (71.1574) lr 1.8443e-03 eta 8:35:03 +epoch [11/50] batch [275/500] time 1.596 (1.566) data 0.001 (0.004) loss 0.7827 (1.1756) acc 81.2500 (71.1932) lr 1.8443e-03 eta 8:34:57 +epoch [11/50] batch [280/500] time 1.555 (1.567) data 0.000 (0.004) loss 1.6123 (1.1738) acc 59.3750 (71.1384) lr 1.8443e-03 eta 8:34:52 +epoch [11/50] batch [285/500] time 1.576 (1.567) data 0.000 (0.004) loss 1.7490 (1.1770) acc 62.5000 (71.0636) lr 1.8443e-03 eta 8:34:45 +epoch [11/50] batch [290/500] time 1.591 (1.567) data 0.001 (0.004) loss 1.8047 (1.1846) acc 43.7500 (70.8190) lr 1.8443e-03 eta 8:34:39 +epoch [11/50] batch [295/500] time 1.558 (1.566) data 0.001 (0.004) loss 0.8125 (1.1837) acc 78.1250 (70.8686) lr 1.8443e-03 eta 8:34:27 +epoch [11/50] batch [300/500] time 1.560 (1.567) data 0.000 (0.004) loss 1.3574 (1.1838) acc 84.3750 (70.8750) lr 1.8443e-03 eta 8:34:21 +epoch [11/50] batch [305/500] time 1.559 (1.567) data 0.000 (0.004) loss 0.9663 (1.1809) acc 71.8750 (70.9016) lr 1.8443e-03 eta 8:34:13 +epoch [11/50] batch [310/500] time 1.557 (1.567) data 0.000 (0.004) loss 1.4219 (1.1802) acc 71.8750 (70.9375) lr 1.8443e-03 eta 8:34:04 +epoch [11/50] batch [315/500] time 1.580 (1.567) data 0.001 (0.004) loss 0.7939 (1.1786) acc 68.7500 (70.9325) lr 1.8443e-03 eta 8:33:57 +epoch [11/50] batch [320/500] time 1.564 (1.567) data 0.000 (0.004) loss 0.7617 (1.1747) acc 71.8750 (70.9082) lr 1.8443e-03 eta 8:33:51 +epoch [11/50] batch [325/500] time 1.552 (1.567) data 0.001 (0.004) loss 1.1982 (1.1766) acc 78.1250 (70.8365) lr 1.8443e-03 eta 8:33:42 +epoch [11/50] batch [330/500] time 1.557 (1.566) data 0.000 (0.004) loss 1.3574 (1.1737) acc 65.6250 (70.8996) lr 1.8443e-03 eta 8:33:32 +epoch [11/50] batch [335/500] time 1.567 (1.566) data 0.000 (0.003) loss 1.7627 (1.1750) acc 59.3750 (70.9235) lr 1.8443e-03 eta 8:33:22 +epoch [11/50] batch [340/500] time 1.563 (1.566) data 0.000 (0.003) loss 1.4482 (1.1782) acc 59.3750 (70.9007) lr 1.8443e-03 eta 8:33:09 +epoch [11/50] batch [345/500] time 1.569 (1.566) data 0.000 (0.003) loss 1.4023 (1.1802) acc 68.7500 (70.8424) lr 1.8443e-03 eta 8:33:00 +epoch [11/50] batch [350/500] time 1.554 (1.566) data 0.000 (0.003) loss 0.9805 (1.1792) acc 84.3750 (70.8929) lr 1.8443e-03 eta 8:32:56 +epoch [11/50] batch [355/500] time 1.584 (1.566) data 0.000 (0.003) loss 0.8525 (1.1759) acc 75.0000 (70.9507) lr 1.8443e-03 eta 8:32:49 +epoch [11/50] batch [360/500] time 1.559 (1.566) data 0.000 (0.003) loss 1.2041 (1.1757) acc 75.0000 (70.9722) lr 1.8443e-03 eta 8:32:38 +epoch [11/50] batch [365/500] time 1.544 (1.566) data 0.000 (0.003) loss 0.6016 (1.1771) acc 81.2500 (70.9418) lr 1.8443e-03 eta 8:32:28 +epoch [11/50] batch [370/500] time 1.536 (1.566) data 0.001 (0.003) loss 0.8530 (1.1770) acc 87.5000 (70.9291) lr 1.8443e-03 eta 8:32:17 +epoch [11/50] batch [375/500] time 1.581 (1.566) data 0.000 (0.003) loss 0.7690 (1.1773) acc 75.0000 (70.9000) lr 1.8443e-03 eta 8:32:09 +epoch [11/50] batch [380/500] time 1.543 (1.566) data 0.000 (0.003) loss 1.6562 (1.1792) acc 71.8750 (70.9293) lr 1.8443e-03 eta 8:31:58 +epoch [11/50] batch [385/500] time 1.538 (1.566) data 0.000 (0.003) loss 1.1982 (1.1777) acc 78.1250 (70.9253) lr 1.8443e-03 eta 8:31:48 +epoch [11/50] batch [390/500] time 1.539 (1.565) data 0.000 (0.003) loss 1.0576 (1.1788) acc 78.1250 (70.9295) lr 1.8443e-03 eta 8:31:35 +epoch [11/50] batch [395/500] time 1.551 (1.565) data 0.000 (0.003) loss 1.2197 (1.1810) acc 62.5000 (70.8623) lr 1.8443e-03 eta 8:31:29 +epoch [11/50] batch [400/500] time 1.557 (1.565) data 0.000 (0.003) loss 1.7646 (1.1824) acc 56.2500 (70.8438) lr 1.8443e-03 eta 8:31:18 +epoch [11/50] batch [405/500] time 1.565 (1.565) data 0.000 (0.003) loss 1.5879 (1.1835) acc 62.5000 (70.8410) lr 1.8443e-03 eta 8:31:07 +epoch [11/50] batch [410/500] time 1.561 (1.565) data 0.000 (0.003) loss 1.2461 (1.1821) acc 68.7500 (70.8384) lr 1.8443e-03 eta 8:30:58 +epoch [11/50] batch [415/500] time 1.564 (1.565) data 0.000 (0.003) loss 0.9663 (1.1816) acc 78.1250 (70.8434) lr 1.8443e-03 eta 8:30:48 +epoch [11/50] batch [420/500] time 1.550 (1.565) data 0.000 (0.003) loss 0.7061 (1.1819) acc 87.5000 (70.8408) lr 1.8443e-03 eta 8:30:35 +epoch [11/50] batch [425/500] time 1.552 (1.564) data 0.000 (0.003) loss 1.3281 (1.1841) acc 75.0000 (70.8382) lr 1.8443e-03 eta 8:30:25 +epoch [11/50] batch [430/500] time 1.558 (1.564) data 0.000 (0.003) loss 0.5757 (1.1829) acc 87.5000 (70.8648) lr 1.8443e-03 eta 8:30:17 +epoch [11/50] batch [435/500] time 1.548 (1.564) data 0.000 (0.003) loss 0.9360 (1.1838) acc 68.7500 (70.8405) lr 1.8443e-03 eta 8:30:09 +epoch [11/50] batch [440/500] time 1.543 (1.564) data 0.000 (0.003) loss 1.2754 (1.1834) acc 68.7500 (70.8523) lr 1.8443e-03 eta 8:29:58 +epoch [11/50] batch [445/500] time 1.584 (1.564) data 0.000 (0.003) loss 1.3096 (1.1835) acc 68.7500 (70.8427) lr 1.8443e-03 eta 8:29:48 +epoch [11/50] batch [450/500] time 1.540 (1.564) data 0.000 (0.003) loss 1.1982 (1.1852) acc 71.8750 (70.8194) lr 1.8443e-03 eta 8:29:37 +epoch [11/50] batch [455/500] time 1.550 (1.564) data 0.001 (0.003) loss 1.3457 (1.1833) acc 68.7500 (70.8448) lr 1.8443e-03 eta 8:29:27 +epoch [11/50] batch [460/500] time 1.566 (1.564) data 0.000 (0.003) loss 1.7275 (1.1811) acc 59.3750 (70.9103) lr 1.8443e-03 eta 8:29:17 +epoch [11/50] batch [465/500] time 1.583 (1.564) data 0.000 (0.003) loss 1.7842 (1.1807) acc 62.5000 (70.9140) lr 1.8443e-03 eta 8:29:06 +epoch [11/50] batch [470/500] time 1.572 (1.564) data 0.000 (0.003) loss 1.5596 (1.1797) acc 68.7500 (70.9574) lr 1.8443e-03 eta 8:28:58 +epoch [11/50] batch [475/500] time 1.559 (1.564) data 0.000 (0.003) loss 0.7241 (1.1784) acc 81.2500 (71.0395) lr 1.8443e-03 eta 8:28:48 +epoch [11/50] batch [480/500] time 1.551 (1.564) data 0.001 (0.003) loss 0.8628 (1.1774) acc 75.0000 (71.0417) lr 1.8443e-03 eta 8:28:40 +epoch [11/50] batch [485/500] time 1.528 (1.563) data 0.001 (0.003) loss 1.5547 (1.1784) acc 65.6250 (71.0116) lr 1.8443e-03 eta 8:28:28 +epoch [11/50] batch [490/500] time 1.557 (1.563) data 0.000 (0.003) loss 1.4346 (1.1784) acc 59.3750 (70.9821) lr 1.8443e-03 eta 8:28:17 +epoch [11/50] batch [495/500] time 1.540 (1.563) data 0.000 (0.002) loss 0.9971 (1.1787) acc 75.0000 (71.0101) lr 1.8443e-03 eta 8:28:09 +epoch [11/50] batch [500/500] time 1.553 (1.563) data 0.000 (0.002) loss 0.8047 (1.1765) acc 78.1250 (71.0563) lr 1.8090e-03 eta 8:28:00 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,934 +* accuracy: 77.9% +* error: 22.1% +* macro_f1: 77.3% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [12/50] batch [5/500] time 1.546 (1.694) data 0.000 (0.189) loss 0.8926 (1.1154) acc 78.1250 (70.0000) lr 1.8090e-03 eta 9:10:31 +epoch [12/50] batch [10/500] time 1.569 (1.626) data 0.001 (0.095) loss 1.2637 (1.2128) acc 71.8750 (69.0625) lr 1.8090e-03 eta 8:48:12 +epoch [12/50] batch [15/500] time 1.555 (1.605) data 0.000 (0.063) loss 1.5762 (1.2310) acc 62.5000 (68.5417) lr 1.8090e-03 eta 8:41:09 +epoch [12/50] batch [20/500] time 1.553 (1.596) data 0.000 (0.048) loss 1.3389 (1.2262) acc 78.1250 (69.3750) lr 1.8090e-03 eta 8:38:02 +epoch [12/50] batch [25/500] time 1.563 (1.588) data 0.001 (0.038) loss 0.8535 (1.1769) acc 78.1250 (70.5000) lr 1.8090e-03 eta 8:35:28 +epoch [12/50] batch [30/500] time 1.567 (1.584) data 0.000 (0.032) loss 0.9189 (1.1612) acc 68.7500 (70.3125) lr 1.8090e-03 eta 8:34:04 +epoch [12/50] batch [35/500] time 1.537 (1.581) data 0.000 (0.027) loss 1.5654 (1.1825) acc 62.5000 (70.3571) lr 1.8090e-03 eta 8:32:51 +epoch [12/50] batch [40/500] time 1.579 (1.583) data 0.001 (0.024) loss 1.4766 (1.1952) acc 62.5000 (69.9219) lr 1.8090e-03 eta 8:33:19 +epoch [12/50] batch [45/500] time 1.567 (1.580) data 0.000 (0.021) loss 1.4297 (1.1991) acc 56.2500 (69.8611) lr 1.8090e-03 eta 8:32:24 +epoch [12/50] batch [50/500] time 1.557 (1.578) data 0.000 (0.019) loss 1.2666 (1.2147) acc 62.5000 (69.5000) lr 1.8090e-03 eta 8:31:38 +epoch [12/50] batch [55/500] time 1.559 (1.576) data 0.000 (0.018) loss 1.2061 (1.2171) acc 78.1250 (69.6023) lr 1.8090e-03 eta 8:30:53 +epoch [12/50] batch [60/500] time 1.563 (1.575) data 0.000 (0.016) loss 1.1270 (1.2117) acc 75.0000 (69.8958) lr 1.8090e-03 eta 8:30:09 +epoch [12/50] batch [65/500] time 1.545 (1.574) data 0.000 (0.015) loss 1.2852 (1.2209) acc 68.7500 (70.0481) lr 1.8090e-03 eta 8:29:51 +epoch [12/50] batch [70/500] time 1.553 (1.573) data 0.001 (0.014) loss 0.7285 (1.2169) acc 81.2500 (70.0446) lr 1.8090e-03 eta 8:29:31 +epoch [12/50] batch [75/500] time 1.562 (1.572) data 0.000 (0.013) loss 1.0830 (1.2053) acc 75.0000 (70.4167) lr 1.8090e-03 eta 8:29:03 +epoch [12/50] batch [80/500] time 1.597 (1.573) data 0.000 (0.012) loss 1.0762 (1.2024) acc 75.0000 (70.3125) lr 1.8090e-03 eta 8:29:14 +epoch [12/50] batch [85/500] time 1.559 (1.573) data 0.000 (0.012) loss 0.6001 (1.2048) acc 87.5000 (70.3309) lr 1.8090e-03 eta 8:29:07 +epoch [12/50] batch [90/500] time 1.576 (1.572) data 0.000 (0.011) loss 1.1084 (1.2092) acc 75.0000 (70.3819) lr 1.8090e-03 eta 8:28:40 +epoch [12/50] batch [95/500] time 1.549 (1.572) data 0.000 (0.010) loss 1.1221 (1.2007) acc 71.8750 (70.1645) lr 1.8090e-03 eta 8:28:16 +epoch [12/50] batch [100/500] time 1.574 (1.571) data 0.000 (0.010) loss 0.9258 (1.2018) acc 75.0000 (70.0625) lr 1.8090e-03 eta 8:28:00 +epoch [12/50] batch [105/500] time 1.554 (1.570) data 0.001 (0.009) loss 0.8188 (1.2078) acc 84.3750 (70.0893) lr 1.8090e-03 eta 8:27:35 +epoch [12/50] batch [110/500] time 1.547 (1.570) data 0.000 (0.009) loss 1.0850 (1.2049) acc 75.0000 (70.1705) lr 1.8090e-03 eta 8:27:13 +epoch [12/50] batch [115/500] time 1.566 (1.569) data 0.000 (0.009) loss 1.2549 (1.2039) acc 75.0000 (70.2717) lr 1.8090e-03 eta 8:27:02 +epoch [12/50] batch [120/500] time 1.573 (1.569) data 0.000 (0.008) loss 1.7363 (1.1987) acc 50.0000 (70.4688) lr 1.8090e-03 eta 8:26:48 +epoch [12/50] batch [125/500] time 1.558 (1.569) data 0.000 (0.008) loss 1.2305 (1.1959) acc 65.6250 (70.5500) lr 1.8090e-03 eta 8:26:32 +epoch [12/50] batch [130/500] time 1.572 (1.568) data 0.000 (0.008) loss 1.0977 (1.2039) acc 62.5000 (70.4327) lr 1.8090e-03 eta 8:26:14 +epoch [12/50] batch [135/500] time 1.555 (1.568) data 0.000 (0.007) loss 0.9087 (1.2027) acc 75.0000 (70.3009) lr 1.8090e-03 eta 8:26:02 +epoch [12/50] batch [140/500] time 1.578 (1.569) data 0.000 (0.007) loss 1.3174 (1.2011) acc 65.6250 (70.3348) lr 1.8090e-03 eta 8:26:13 +epoch [12/50] batch [145/500] time 1.537 (1.569) data 0.000 (0.007) loss 1.5566 (1.1958) acc 65.6250 (70.3017) lr 1.8090e-03 eta 8:26:02 +epoch [12/50] batch [150/500] time 1.555 (1.569) data 0.000 (0.007) loss 1.2139 (1.1942) acc 71.8750 (70.3125) lr 1.8090e-03 eta 8:25:53 +epoch [12/50] batch [155/500] time 1.575 (1.569) data 0.000 (0.007) loss 0.8223 (1.1941) acc 75.0000 (70.3226) lr 1.8090e-03 eta 8:25:45 +epoch [12/50] batch [160/500] time 1.547 (1.569) data 0.000 (0.006) loss 1.1387 (1.1911) acc 75.0000 (70.4297) lr 1.8090e-03 eta 8:25:36 +epoch [12/50] batch [165/500] time 1.557 (1.568) data 0.000 (0.006) loss 0.9839 (1.1908) acc 71.8750 (70.3977) lr 1.8090e-03 eta 8:25:24 +epoch [12/50] batch [170/500] time 1.547 (1.568) data 0.001 (0.006) loss 1.5781 (1.1919) acc 59.3750 (70.3676) lr 1.8090e-03 eta 8:25:16 +epoch [12/50] batch [175/500] time 1.582 (1.568) data 0.000 (0.006) loss 1.0400 (1.1901) acc 75.0000 (70.3571) lr 1.8090e-03 eta 8:25:08 +epoch [12/50] batch [180/500] time 1.553 (1.568) data 0.000 (0.006) loss 1.3564 (1.1916) acc 59.3750 (70.2604) lr 1.8090e-03 eta 8:24:56 +epoch [12/50] batch [185/500] time 1.564 (1.568) data 0.000 (0.006) loss 0.9814 (1.1912) acc 78.1250 (70.2534) lr 1.8090e-03 eta 8:24:53 +epoch [12/50] batch [190/500] time 1.562 (1.568) data 0.000 (0.005) loss 1.4033 (1.1890) acc 68.7500 (70.3618) lr 1.8090e-03 eta 8:24:39 +epoch [12/50] batch [195/500] time 1.562 (1.568) data 0.000 (0.005) loss 0.7153 (1.1881) acc 78.1250 (70.3526) lr 1.8090e-03 eta 8:24:30 +epoch [12/50] batch [200/500] time 1.565 (1.568) data 0.000 (0.005) loss 1.5850 (1.1791) acc 59.3750 (70.5156) lr 1.8090e-03 eta 8:24:16 +epoch [12/50] batch [205/500] time 1.555 (1.568) data 0.000 (0.005) loss 1.4150 (1.1761) acc 78.1250 (70.5640) lr 1.8090e-03 eta 8:24:05 +epoch [12/50] batch [210/500] time 1.552 (1.567) data 0.000 (0.005) loss 0.5503 (1.1714) acc 87.5000 (70.6548) lr 1.8090e-03 eta 8:23:54 +epoch [12/50] batch [215/500] time 1.555 (1.567) data 0.000 (0.005) loss 0.7905 (1.1666) acc 81.2500 (70.7267) lr 1.8090e-03 eta 8:23:42 +epoch [12/50] batch [220/500] time 1.572 (1.567) data 0.000 (0.005) loss 0.7305 (1.1752) acc 78.1250 (70.6534) lr 1.8090e-03 eta 8:23:36 +epoch [12/50] batch [225/500] time 1.582 (1.567) data 0.000 (0.005) loss 1.4805 (1.1731) acc 62.5000 (70.7083) lr 1.8090e-03 eta 8:23:28 +epoch [12/50] batch [230/500] time 1.573 (1.567) data 0.000 (0.005) loss 0.7515 (1.1713) acc 71.8750 (70.7473) lr 1.8090e-03 eta 8:23:19 +epoch [12/50] batch [235/500] time 1.600 (1.567) data 0.000 (0.004) loss 1.2842 (1.1707) acc 62.5000 (70.7713) lr 1.8090e-03 eta 8:23:12 +epoch [12/50] batch [240/500] time 1.544 (1.567) data 0.001 (0.004) loss 0.9966 (1.1666) acc 81.2500 (70.8724) lr 1.8090e-03 eta 8:23:02 +epoch [12/50] batch [245/500] time 1.555 (1.567) data 0.000 (0.004) loss 0.7661 (1.1645) acc 78.1250 (70.9439) lr 1.8090e-03 eta 8:22:51 +epoch [12/50] batch [250/500] time 1.569 (1.567) data 0.000 (0.004) loss 1.8760 (1.1677) acc 56.2500 (70.8750) lr 1.8090e-03 eta 8:22:43 +epoch [12/50] batch [255/500] time 1.577 (1.567) data 0.000 (0.004) loss 0.7339 (1.1677) acc 81.2500 (70.9314) lr 1.8090e-03 eta 8:22:39 +epoch [12/50] batch [260/500] time 1.537 (1.567) data 0.000 (0.004) loss 1.4473 (1.1667) acc 68.7500 (70.9255) lr 1.8090e-03 eta 8:22:29 +epoch [12/50] batch [265/500] time 1.549 (1.567) data 0.001 (0.004) loss 1.6934 (1.1687) acc 56.2500 (70.8255) lr 1.8090e-03 eta 8:22:17 +epoch [12/50] batch [270/500] time 1.576 (1.567) data 0.000 (0.004) loss 1.2783 (1.1669) acc 68.7500 (70.8681) lr 1.8090e-03 eta 8:22:09 +epoch [12/50] batch [275/500] time 1.569 (1.567) data 0.001 (0.004) loss 1.6289 (1.1704) acc 62.5000 (70.7955) lr 1.8090e-03 eta 8:22:00 +epoch [12/50] batch [280/500] time 1.658 (1.567) data 0.000 (0.004) loss 1.3418 (1.1770) acc 71.8750 (70.7031) lr 1.8090e-03 eta 8:22:00 +epoch [12/50] batch [285/500] time 1.547 (1.567) data 0.000 (0.004) loss 2.0605 (1.1802) acc 53.1250 (70.5811) lr 1.8090e-03 eta 8:21:51 +epoch [12/50] batch [290/500] time 1.551 (1.567) data 0.000 (0.004) loss 0.6538 (1.1825) acc 87.5000 (70.5172) lr 1.8090e-03 eta 8:21:42 +epoch [12/50] batch [295/500] time 1.573 (1.567) data 0.000 (0.004) loss 1.3711 (1.1838) acc 71.8750 (70.5191) lr 1.8090e-03 eta 8:21:34 +epoch [12/50] batch [300/500] time 1.555 (1.567) data 0.000 (0.004) loss 1.2754 (1.1871) acc 71.8750 (70.4792) lr 1.8090e-03 eta 8:21:25 +epoch [12/50] batch [305/500] time 1.558 (1.567) data 0.000 (0.004) loss 1.1562 (1.1868) acc 75.0000 (70.5020) lr 1.8090e-03 eta 8:21:13 +epoch [12/50] batch [310/500] time 1.565 (1.567) data 0.000 (0.003) loss 1.3174 (1.1853) acc 71.8750 (70.5141) lr 1.8090e-03 eta 8:21:02 +epoch [12/50] batch [315/500] time 1.556 (1.566) data 0.001 (0.003) loss 1.6133 (1.1886) acc 62.5000 (70.4365) lr 1.8090e-03 eta 8:20:48 +epoch [12/50] batch [320/500] time 1.559 (1.566) data 0.000 (0.003) loss 1.4473 (1.1905) acc 65.6250 (70.3613) lr 1.8090e-03 eta 8:20:37 +epoch [12/50] batch [325/500] time 1.562 (1.566) data 0.001 (0.003) loss 1.4521 (1.1937) acc 59.3750 (70.2981) lr 1.8090e-03 eta 8:20:32 +epoch [12/50] batch [330/500] time 1.545 (1.566) data 0.000 (0.003) loss 1.0820 (1.1933) acc 71.8750 (70.2936) lr 1.8090e-03 eta 8:20:21 +epoch [12/50] batch [335/500] time 1.547 (1.566) data 0.000 (0.003) loss 0.6738 (1.1920) acc 78.1250 (70.2892) lr 1.8090e-03 eta 8:20:09 +epoch [12/50] batch [340/500] time 1.541 (1.565) data 0.000 (0.003) loss 1.4531 (1.1929) acc 68.7500 (70.3033) lr 1.8090e-03 eta 8:19:54 +epoch [12/50] batch [345/500] time 1.566 (1.565) data 0.000 (0.003) loss 0.6870 (1.1876) acc 81.2500 (70.3714) lr 1.8090e-03 eta 8:19:46 +epoch [12/50] batch [350/500] time 1.535 (1.565) data 0.001 (0.003) loss 0.9863 (1.1842) acc 65.6250 (70.4196) lr 1.8090e-03 eta 8:19:35 +epoch [12/50] batch [355/500] time 1.564 (1.565) data 0.000 (0.003) loss 1.3555 (1.1835) acc 62.5000 (70.4049) lr 1.8090e-03 eta 8:19:25 +epoch [12/50] batch [360/500] time 1.552 (1.565) data 0.000 (0.003) loss 1.4238 (1.1857) acc 68.7500 (70.3906) lr 1.8090e-03 eta 8:19:16 +epoch [12/50] batch [365/500] time 1.577 (1.565) data 0.000 (0.003) loss 1.1621 (1.1821) acc 75.0000 (70.4452) lr 1.8090e-03 eta 8:19:08 +epoch [12/50] batch [370/500] time 1.543 (1.565) data 0.000 (0.003) loss 1.1455 (1.1824) acc 59.3750 (70.4392) lr 1.8090e-03 eta 8:18:58 +epoch [12/50] batch [375/500] time 1.543 (1.565) data 0.000 (0.003) loss 1.2979 (1.1823) acc 65.6250 (70.4417) lr 1.8090e-03 eta 8:18:47 +epoch [12/50] batch [380/500] time 1.558 (1.565) data 0.000 (0.003) loss 1.1689 (1.1809) acc 71.8750 (70.4934) lr 1.8090e-03 eta 8:18:36 +epoch [12/50] batch [385/500] time 1.553 (1.564) data 0.000 (0.003) loss 1.4023 (1.1786) acc 62.5000 (70.5357) lr 1.8090e-03 eta 8:18:25 +epoch [12/50] batch [390/500] time 1.544 (1.564) data 0.000 (0.003) loss 0.8701 (1.1778) acc 81.2500 (70.5288) lr 1.8090e-03 eta 8:18:17 +epoch [12/50] batch [395/500] time 1.576 (1.565) data 0.000 (0.003) loss 1.2139 (1.1758) acc 65.6250 (70.5301) lr 1.8090e-03 eta 8:18:12 +epoch [12/50] batch [400/500] time 1.585 (1.565) data 0.001 (0.003) loss 1.1094 (1.1796) acc 71.8750 (70.5000) lr 1.8090e-03 eta 8:18:06 +epoch [12/50] batch [405/500] time 1.551 (1.565) data 0.000 (0.003) loss 1.7725 (1.1790) acc 68.7500 (70.5478) lr 1.8090e-03 eta 8:17:55 +epoch [12/50] batch [410/500] time 1.545 (1.565) data 0.000 (0.003) loss 0.6118 (1.1805) acc 84.3750 (70.5412) lr 1.8090e-03 eta 8:17:46 +epoch [12/50] batch [415/500] time 1.584 (1.564) data 0.000 (0.003) loss 1.2158 (1.1813) acc 75.0000 (70.5045) lr 1.8090e-03 eta 8:17:38 +epoch [12/50] batch [420/500] time 1.562 (1.564) data 0.000 (0.003) loss 0.5122 (1.1801) acc 84.3750 (70.4985) lr 1.8090e-03 eta 8:17:29 +epoch [12/50] batch [425/500] time 1.542 (1.565) data 0.001 (0.003) loss 0.7734 (1.1783) acc 81.2500 (70.5735) lr 1.8090e-03 eta 8:17:25 +epoch [12/50] batch [430/500] time 1.542 (1.565) data 0.000 (0.003) loss 0.6489 (1.1748) acc 71.8750 (70.6105) lr 1.8090e-03 eta 8:17:17 +epoch [12/50] batch [435/500] time 1.553 (1.565) data 0.000 (0.003) loss 0.7476 (1.1742) acc 78.1250 (70.5963) lr 1.8090e-03 eta 8:17:09 +epoch [12/50] batch [440/500] time 1.579 (1.565) data 0.000 (0.003) loss 0.5874 (1.1753) acc 81.2500 (70.5824) lr 1.8090e-03 eta 8:17:00 +epoch [12/50] batch [445/500] time 1.567 (1.565) data 0.000 (0.003) loss 1.1055 (1.1737) acc 78.1250 (70.6180) lr 1.8090e-03 eta 8:16:54 +epoch [12/50] batch [450/500] time 1.554 (1.565) data 0.000 (0.003) loss 1.2178 (1.1742) acc 71.8750 (70.6111) lr 1.8090e-03 eta 8:16:45 +epoch [12/50] batch [455/500] time 1.563 (1.565) data 0.000 (0.002) loss 0.7354 (1.1741) acc 84.3750 (70.6250) lr 1.8090e-03 eta 8:16:37 +epoch [12/50] batch [460/500] time 1.577 (1.565) data 0.000 (0.002) loss 1.6885 (1.1756) acc 62.5000 (70.5842) lr 1.8090e-03 eta 8:16:29 +epoch [12/50] batch [465/500] time 1.578 (1.565) data 0.000 (0.002) loss 1.1895 (1.1760) acc 62.5000 (70.5712) lr 1.8090e-03 eta 8:16:21 +epoch [12/50] batch [470/500] time 1.571 (1.565) data 0.000 (0.002) loss 1.9795 (1.1775) acc 59.3750 (70.5585) lr 1.8090e-03 eta 8:16:16 +epoch [12/50] batch [475/500] time 1.547 (1.565) data 0.000 (0.002) loss 1.7754 (1.1770) acc 56.2500 (70.5789) lr 1.8090e-03 eta 8:16:07 +epoch [12/50] batch [480/500] time 1.560 (1.565) data 0.000 (0.002) loss 0.8237 (1.1773) acc 75.0000 (70.5469) lr 1.8090e-03 eta 8:15:59 +epoch [12/50] batch [485/500] time 1.580 (1.565) data 0.001 (0.002) loss 1.0703 (1.1771) acc 78.1250 (70.5412) lr 1.8090e-03 eta 8:15:52 +epoch [12/50] batch [490/500] time 1.547 (1.565) data 0.000 (0.002) loss 1.0371 (1.1770) acc 78.1250 (70.5357) lr 1.8090e-03 eta 8:15:44 +epoch [12/50] batch [495/500] time 1.573 (1.565) data 0.000 (0.002) loss 1.1650 (1.1791) acc 75.0000 (70.5177) lr 1.8090e-03 eta 8:15:37 +epoch [12/50] batch [500/500] time 1.538 (1.565) data 0.000 (0.002) loss 1.0879 (1.1775) acc 78.1250 (70.5250) lr 1.7705e-03 eta 8:15:26 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,939 +* accuracy: 77.9% +* error: 22.1% +* macro_f1: 77.3% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [13/50] batch [5/500] time 1.568 (1.692) data 0.000 (0.187) loss 1.4023 (1.2686) acc 59.3750 (67.5000) lr 1.7705e-03 eta 8:55:34 +epoch [13/50] batch [10/500] time 1.552 (1.629) data 0.000 (0.094) loss 0.8232 (1.1961) acc 75.0000 (69.6875) lr 1.7705e-03 eta 8:35:25 +epoch [13/50] batch [15/500] time 1.550 (1.608) data 0.001 (0.063) loss 1.1309 (1.1639) acc 59.3750 (68.9583) lr 1.7705e-03 eta 8:28:49 +epoch [13/50] batch [20/500] time 1.586 (1.596) data 0.000 (0.047) loss 1.6973 (1.1297) acc 65.6250 (70.7812) lr 1.7705e-03 eta 8:24:56 +epoch [13/50] batch [25/500] time 1.701 (1.594) data 0.000 (0.038) loss 1.0449 (1.1209) acc 75.0000 (71.0000) lr 1.7705e-03 eta 8:24:15 +epoch [13/50] batch [30/500] time 1.556 (1.588) data 0.001 (0.032) loss 1.5361 (1.2021) acc 71.8750 (70.4167) lr 1.7705e-03 eta 8:22:06 +epoch [13/50] batch [35/500] time 1.552 (1.584) data 0.000 (0.027) loss 1.1465 (1.1791) acc 71.8750 (70.8036) lr 1.7705e-03 eta 8:20:44 +epoch [13/50] batch [40/500] time 1.578 (1.582) data 0.000 (0.024) loss 1.1162 (1.1576) acc 65.6250 (71.2500) lr 1.7705e-03 eta 8:19:47 +epoch [13/50] batch [45/500] time 1.562 (1.579) data 0.000 (0.021) loss 1.0107 (1.1777) acc 68.7500 (70.8333) lr 1.7705e-03 eta 8:18:40 +epoch [13/50] batch [50/500] time 1.561 (1.577) data 0.000 (0.019) loss 1.8555 (1.2033) acc 53.1250 (70.2500) lr 1.7705e-03 eta 8:17:56 +epoch [13/50] batch [55/500] time 1.578 (1.575) data 0.000 (0.017) loss 1.4756 (1.1866) acc 65.6250 (70.5682) lr 1.7705e-03 eta 8:17:27 +epoch [13/50] batch [60/500] time 1.579 (1.576) data 0.000 (0.016) loss 1.2119 (1.1614) acc 68.7500 (71.0938) lr 1.7705e-03 eta 8:17:21 +epoch [13/50] batch [65/500] time 1.571 (1.574) data 0.000 (0.015) loss 1.5049 (1.1661) acc 68.7500 (71.2500) lr 1.7705e-03 eta 8:16:39 +epoch [13/50] batch [70/500] time 1.553 (1.573) data 0.000 (0.014) loss 1.3320 (1.1660) acc 62.5000 (71.4286) lr 1.7705e-03 eta 8:16:18 +epoch [13/50] batch [75/500] time 1.555 (1.572) data 0.000 (0.013) loss 1.8154 (1.1879) acc 53.1250 (70.8333) lr 1.7705e-03 eta 8:15:54 +epoch [13/50] batch [80/500] time 1.595 (1.573) data 0.001 (0.012) loss 0.9209 (1.1928) acc 81.2500 (70.9375) lr 1.7705e-03 eta 8:16:06 +epoch [13/50] batch [85/500] time 1.691 (1.574) data 0.001 (0.011) loss 1.3037 (1.1867) acc 75.0000 (71.2500) lr 1.7705e-03 eta 8:16:20 +epoch [13/50] batch [90/500] time 1.555 (1.574) data 0.000 (0.011) loss 0.5176 (1.1733) acc 84.3750 (71.4931) lr 1.7705e-03 eta 8:16:02 +epoch [13/50] batch [95/500] time 1.577 (1.573) data 0.000 (0.010) loss 0.4529 (1.1680) acc 90.6250 (71.7434) lr 1.7705e-03 eta 8:15:44 +epoch [13/50] batch [100/500] time 1.588 (1.573) data 0.000 (0.010) loss 0.5903 (1.1570) acc 78.1250 (71.5625) lr 1.7705e-03 eta 8:15:33 +epoch [13/50] batch [105/500] time 1.590 (1.573) data 0.000 (0.009) loss 0.9419 (1.1532) acc 75.0000 (71.8155) lr 1.7705e-03 eta 8:15:28 +epoch [13/50] batch [110/500] time 1.567 (1.573) data 0.001 (0.009) loss 0.6147 (1.1540) acc 81.2500 (71.8182) lr 1.7705e-03 eta 8:15:05 +epoch [13/50] batch [115/500] time 1.574 (1.572) data 0.000 (0.009) loss 1.5176 (1.1554) acc 68.7500 (71.8207) lr 1.7705e-03 eta 8:14:53 +epoch [13/50] batch [120/500] time 1.561 (1.572) data 0.000 (0.008) loss 1.3301 (1.1462) acc 65.6250 (71.8750) lr 1.7705e-03 eta 8:14:39 +epoch [13/50] batch [125/500] time 1.561 (1.572) data 0.000 (0.008) loss 1.0723 (1.1410) acc 71.8750 (72.0250) lr 1.7705e-03 eta 8:14:31 +epoch [13/50] batch [130/500] time 1.580 (1.573) data 0.000 (0.008) loss 1.4229 (1.1417) acc 62.5000 (71.9471) lr 1.7705e-03 eta 8:14:35 +epoch [13/50] batch [135/500] time 1.564 (1.573) data 0.001 (0.007) loss 1.2168 (1.1393) acc 65.6250 (71.9444) lr 1.7705e-03 eta 8:14:25 +epoch [13/50] batch [140/500] time 1.583 (1.572) data 0.001 (0.007) loss 0.8091 (1.1311) acc 78.1250 (72.0759) lr 1.7705e-03 eta 8:14:12 +epoch [13/50] batch [145/500] time 1.553 (1.572) data 0.000 (0.007) loss 1.1699 (1.1277) acc 62.5000 (72.0043) lr 1.7705e-03 eta 8:14:04 +epoch [13/50] batch [150/500] time 1.562 (1.573) data 0.001 (0.007) loss 0.6748 (1.1260) acc 75.0000 (72.0417) lr 1.7705e-03 eta 8:14:02 +epoch [13/50] batch [155/500] time 1.577 (1.573) data 0.000 (0.006) loss 1.3350 (1.1251) acc 68.7500 (72.1976) lr 1.7705e-03 eta 8:13:56 +epoch [13/50] batch [160/500] time 1.582 (1.573) data 0.000 (0.006) loss 0.8755 (1.1190) acc 75.0000 (72.4023) lr 1.7705e-03 eta 8:13:48 +epoch [13/50] batch [165/500] time 1.578 (1.573) data 0.001 (0.006) loss 1.2627 (1.1192) acc 68.7500 (72.3106) lr 1.7705e-03 eta 8:13:40 +epoch [13/50] batch [170/500] time 1.560 (1.572) data 0.001 (0.006) loss 1.3291 (1.1260) acc 59.3750 (72.0588) lr 1.7705e-03 eta 8:13:27 +epoch [13/50] batch [175/500] time 1.561 (1.572) data 0.000 (0.006) loss 1.5518 (1.1285) acc 65.6250 (72.1071) lr 1.7705e-03 eta 8:13:10 +epoch [13/50] batch [180/500] time 1.568 (1.572) data 0.000 (0.006) loss 0.8076 (1.1311) acc 81.2500 (72.0660) lr 1.7705e-03 eta 8:12:59 +epoch [13/50] batch [185/500] time 1.560 (1.571) data 0.000 (0.005) loss 0.9351 (1.1271) acc 78.1250 (72.0946) lr 1.7705e-03 eta 8:12:42 +epoch [13/50] batch [190/500] time 1.565 (1.571) data 0.000 (0.005) loss 0.7373 (1.1244) acc 87.5000 (72.0559) lr 1.7705e-03 eta 8:12:27 +epoch [13/50] batch [195/500] time 1.566 (1.570) data 0.000 (0.005) loss 1.0586 (1.1259) acc 71.8750 (72.0513) lr 1.7705e-03 eta 8:12:11 +epoch [13/50] batch [200/500] time 1.568 (1.570) data 0.000 (0.005) loss 1.5088 (1.1201) acc 65.6250 (72.2188) lr 1.7705e-03 eta 8:12:02 +epoch [13/50] batch [205/500] time 1.555 (1.570) data 0.000 (0.005) loss 1.0010 (1.1171) acc 78.1250 (72.3323) lr 1.7705e-03 eta 8:11:48 +epoch [13/50] batch [210/500] time 1.569 (1.570) data 0.000 (0.005) loss 0.8477 (1.1195) acc 71.8750 (72.2470) lr 1.7705e-03 eta 8:11:37 +epoch [13/50] batch [215/500] time 1.577 (1.570) data 0.000 (0.005) loss 1.5264 (1.1201) acc 65.6250 (72.2965) lr 1.7705e-03 eta 8:11:23 +epoch [13/50] batch [220/500] time 1.559 (1.569) data 0.000 (0.005) loss 1.0977 (1.1203) acc 84.3750 (72.2443) lr 1.7705e-03 eta 8:11:11 +epoch [13/50] batch [225/500] time 1.564 (1.569) data 0.001 (0.005) loss 1.1953 (1.1209) acc 75.0000 (72.2778) lr 1.7705e-03 eta 8:11:02 +epoch [13/50] batch [230/500] time 1.572 (1.570) data 0.000 (0.005) loss 0.9565 (1.1234) acc 71.8750 (72.1875) lr 1.7705e-03 eta 8:11:02 +epoch [13/50] batch [235/500] time 1.567 (1.569) data 0.000 (0.004) loss 0.9839 (1.1254) acc 78.1250 (72.1809) lr 1.7705e-03 eta 8:10:51 +epoch [13/50] batch [240/500] time 1.567 (1.569) data 0.000 (0.004) loss 1.9648 (1.1306) acc 71.8750 (72.2135) lr 1.7705e-03 eta 8:10:41 +epoch [13/50] batch [245/500] time 1.561 (1.569) data 0.000 (0.004) loss 0.9800 (1.1281) acc 71.8750 (72.2321) lr 1.7705e-03 eta 8:10:34 +epoch [13/50] batch [250/500] time 1.559 (1.569) data 0.000 (0.004) loss 0.9834 (1.1265) acc 75.0000 (72.2250) lr 1.7705e-03 eta 8:10:22 +epoch [13/50] batch [255/500] time 1.554 (1.569) data 0.000 (0.004) loss 0.9395 (1.1275) acc 75.0000 (72.2059) lr 1.7705e-03 eta 8:10:11 +epoch [13/50] batch [260/500] time 1.546 (1.569) data 0.000 (0.004) loss 0.9473 (1.1275) acc 78.1250 (72.1995) lr 1.7705e-03 eta 8:09:58 +epoch [13/50] batch [265/500] time 1.556 (1.568) data 0.000 (0.004) loss 0.9873 (1.1286) acc 81.2500 (72.2642) lr 1.7705e-03 eta 8:09:44 +epoch [13/50] batch [270/500] time 1.570 (1.568) data 0.000 (0.004) loss 1.0049 (1.1263) acc 84.3750 (72.3264) lr 1.7705e-03 eta 8:09:34 +epoch [13/50] batch [275/500] time 1.574 (1.569) data 0.000 (0.004) loss 1.8086 (1.1285) acc 56.2500 (72.2386) lr 1.7705e-03 eta 8:09:31 +epoch [13/50] batch [280/500] time 1.545 (1.568) data 0.000 (0.004) loss 1.5117 (1.1308) acc 65.6250 (72.1540) lr 1.7705e-03 eta 8:09:18 +epoch [13/50] batch [285/500] time 1.537 (1.568) data 0.000 (0.004) loss 1.8682 (1.1321) acc 65.6250 (72.2259) lr 1.7705e-03 eta 8:09:05 +epoch [13/50] batch [290/500] time 1.529 (1.568) data 0.000 (0.004) loss 0.5522 (1.1331) acc 87.5000 (72.2091) lr 1.7705e-03 eta 8:08:50 +epoch [13/50] batch [295/500] time 1.569 (1.568) data 0.000 (0.004) loss 0.7471 (1.1324) acc 81.2500 (72.2352) lr 1.7705e-03 eta 8:08:42 +epoch [13/50] batch [300/500] time 1.550 (1.567) data 0.001 (0.004) loss 0.5283 (1.1274) acc 84.3750 (72.2708) lr 1.7705e-03 eta 8:08:30 +epoch [13/50] batch [305/500] time 1.594 (1.567) data 0.000 (0.004) loss 0.5962 (1.1242) acc 81.2500 (72.2848) lr 1.7705e-03 eta 8:08:22 +epoch [13/50] batch [310/500] time 1.574 (1.567) data 0.000 (0.003) loss 1.0801 (1.1238) acc 78.1250 (72.3690) lr 1.7705e-03 eta 8:08:16 +epoch [13/50] batch [315/500] time 1.541 (1.567) data 0.001 (0.003) loss 0.9150 (1.1225) acc 65.6250 (72.3214) lr 1.7705e-03 eta 8:08:03 +epoch [13/50] batch [320/500] time 1.572 (1.567) data 0.000 (0.003) loss 0.7803 (1.1245) acc 87.5000 (72.3145) lr 1.7705e-03 eta 8:07:53 +epoch [13/50] batch [325/500] time 1.562 (1.567) data 0.000 (0.003) loss 1.4629 (1.1248) acc 65.6250 (72.2885) lr 1.7705e-03 eta 8:07:43 +epoch [13/50] batch [330/500] time 1.546 (1.567) data 0.000 (0.003) loss 1.2988 (1.1229) acc 59.3750 (72.2538) lr 1.7705e-03 eta 8:07:31 +epoch [13/50] batch [335/500] time 1.546 (1.567) data 0.001 (0.003) loss 1.5420 (1.1256) acc 62.5000 (72.1642) lr 1.7705e-03 eta 8:07:22 +epoch [13/50] batch [340/500] time 1.571 (1.567) data 0.001 (0.003) loss 1.1426 (1.1232) acc 62.5000 (72.1691) lr 1.7705e-03 eta 8:07:13 +epoch [13/50] batch [345/500] time 1.565 (1.566) data 0.000 (0.003) loss 1.1016 (1.1244) acc 75.0000 (72.1286) lr 1.7705e-03 eta 8:07:02 +epoch [13/50] batch [350/500] time 1.566 (1.566) data 0.000 (0.003) loss 1.5518 (1.1274) acc 68.7500 (72.0714) lr 1.7705e-03 eta 8:06:52 +epoch [13/50] batch [355/500] time 1.558 (1.566) data 0.000 (0.003) loss 1.2783 (1.1256) acc 65.6250 (72.1127) lr 1.7705e-03 eta 8:06:45 +epoch [13/50] batch [360/500] time 1.555 (1.566) data 0.000 (0.003) loss 1.4121 (1.1252) acc 65.6250 (72.1267) lr 1.7705e-03 eta 8:06:36 +epoch [13/50] batch [365/500] time 1.556 (1.566) data 0.000 (0.003) loss 0.7231 (1.1231) acc 87.5000 (72.1918) lr 1.7705e-03 eta 8:06:26 +epoch [13/50] batch [370/500] time 1.523 (1.566) data 0.000 (0.003) loss 1.3047 (1.1244) acc 75.0000 (72.1875) lr 1.7705e-03 eta 8:06:13 +epoch [13/50] batch [375/500] time 1.549 (1.566) data 0.000 (0.003) loss 1.2764 (1.1259) acc 62.5000 (72.1750) lr 1.7705e-03 eta 8:06:07 +epoch [13/50] batch [380/500] time 1.539 (1.566) data 0.000 (0.003) loss 0.9976 (1.1266) acc 78.1250 (72.1957) lr 1.7705e-03 eta 8:05:58 +epoch [13/50] batch [385/500] time 1.544 (1.566) data 0.000 (0.003) loss 2.0781 (1.1280) acc 59.3750 (72.1591) lr 1.7705e-03 eta 8:05:46 +epoch [13/50] batch [390/500] time 1.554 (1.566) data 0.000 (0.003) loss 1.3223 (1.1292) acc 62.5000 (72.0913) lr 1.7705e-03 eta 8:05:35 +epoch [13/50] batch [395/500] time 1.560 (1.565) data 0.000 (0.003) loss 0.9292 (1.1271) acc 68.7500 (72.1203) lr 1.7705e-03 eta 8:05:24 +epoch [13/50] batch [400/500] time 1.560 (1.565) data 0.000 (0.003) loss 0.6021 (1.1250) acc 87.5000 (72.1797) lr 1.7705e-03 eta 8:05:15 +epoch [13/50] batch [405/500] time 1.576 (1.565) data 0.000 (0.003) loss 1.6250 (1.1287) acc 59.3750 (72.0833) lr 1.7705e-03 eta 8:05:08 +epoch [13/50] batch [410/500] time 1.556 (1.565) data 0.000 (0.003) loss 1.3545 (1.1300) acc 65.6250 (72.0884) lr 1.7705e-03 eta 8:05:00 +epoch [13/50] batch [415/500] time 1.647 (1.566) data 0.000 (0.003) loss 1.0127 (1.1300) acc 78.1250 (72.1084) lr 1.7705e-03 eta 8:04:55 +epoch [13/50] batch [420/500] time 1.571 (1.566) data 0.000 (0.003) loss 1.1689 (1.1304) acc 68.7500 (72.0908) lr 1.7705e-03 eta 8:04:48 +epoch [13/50] batch [425/500] time 1.558 (1.565) data 0.000 (0.003) loss 0.9985 (1.1306) acc 84.3750 (72.0956) lr 1.7705e-03 eta 8:04:37 +epoch [13/50] batch [430/500] time 1.562 (1.565) data 0.000 (0.003) loss 1.7705 (1.1331) acc 68.7500 (72.1076) lr 1.7705e-03 eta 8:04:28 +epoch [13/50] batch [435/500] time 1.558 (1.565) data 0.000 (0.003) loss 0.7461 (1.1353) acc 78.1250 (72.0402) lr 1.7705e-03 eta 8:04:21 +epoch [13/50] batch [440/500] time 1.568 (1.566) data 0.000 (0.003) loss 1.3799 (1.1353) acc 68.7500 (72.0241) lr 1.7705e-03 eta 8:04:16 +epoch [13/50] batch [445/500] time 1.558 (1.565) data 0.001 (0.003) loss 0.9341 (1.1374) acc 71.8750 (71.9733) lr 1.7705e-03 eta 8:04:06 +epoch [13/50] batch [450/500] time 1.558 (1.565) data 0.000 (0.002) loss 1.2920 (1.1366) acc 68.7500 (71.9722) lr 1.7705e-03 eta 8:03:58 +epoch [13/50] batch [455/500] time 1.569 (1.565) data 0.000 (0.002) loss 1.2930 (1.1373) acc 62.5000 (71.9231) lr 1.7705e-03 eta 8:03:50 +epoch [13/50] batch [460/500] time 1.562 (1.565) data 0.000 (0.002) loss 1.1250 (1.1389) acc 71.8750 (71.9293) lr 1.7705e-03 eta 8:03:42 +epoch [13/50] batch [465/500] time 1.579 (1.565) data 0.000 (0.002) loss 1.1777 (1.1400) acc 71.8750 (71.9288) lr 1.7705e-03 eta 8:03:35 +epoch [13/50] batch [470/500] time 1.558 (1.565) data 0.000 (0.002) loss 1.5703 (1.1433) acc 68.7500 (71.8949) lr 1.7705e-03 eta 8:03:26 +epoch [13/50] batch [475/500] time 1.566 (1.565) data 0.000 (0.002) loss 0.8569 (1.1456) acc 75.0000 (71.8289) lr 1.7705e-03 eta 8:03:16 +epoch [13/50] batch [480/500] time 1.577 (1.565) data 0.000 (0.002) loss 1.1826 (1.1474) acc 71.8750 (71.7708) lr 1.7705e-03 eta 8:03:07 +epoch [13/50] batch [485/500] time 1.573 (1.565) data 0.001 (0.002) loss 1.4863 (1.1479) acc 62.5000 (71.7719) lr 1.7705e-03 eta 8:02:58 +epoch [13/50] batch [490/500] time 1.552 (1.565) data 0.000 (0.002) loss 1.3623 (1.1499) acc 65.6250 (71.7538) lr 1.7705e-03 eta 8:02:49 +epoch [13/50] batch [495/500] time 1.588 (1.565) data 0.000 (0.002) loss 1.4883 (1.1489) acc 62.5000 (71.7740) lr 1.7705e-03 eta 8:02:41 +epoch [13/50] batch [500/500] time 1.571 (1.565) data 0.000 (0.002) loss 1.1387 (1.1500) acc 65.6250 (71.7625) lr 1.7290e-03 eta 8:02:32 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,882 +* accuracy: 77.8% +* error: 22.2% +* macro_f1: 77.2% +epoch [14/50] batch [5/500] time 1.558 (1.685) data 0.000 (0.182) loss 1.6699 (1.1060) acc 68.7500 (75.6250) lr 1.7290e-03 eta 8:39:20 +epoch [14/50] batch [10/500] time 1.549 (1.626) data 0.001 (0.091) loss 1.6719 (1.1472) acc 59.3750 (71.8750) lr 1.7290e-03 eta 8:21:07 +epoch [14/50] batch [15/500] time 1.561 (1.600) data 0.001 (0.061) loss 1.1865 (1.1371) acc 71.8750 (72.7083) lr 1.7290e-03 eta 8:12:55 +epoch [14/50] batch [20/500] time 1.552 (1.591) data 0.000 (0.046) loss 0.9512 (1.1165) acc 78.1250 (73.1250) lr 1.7290e-03 eta 8:10:09 +epoch [14/50] batch [25/500] time 1.566 (1.591) data 0.001 (0.037) loss 1.4453 (1.1604) acc 65.6250 (72.3750) lr 1.7290e-03 eta 8:10:02 +epoch [14/50] batch [30/500] time 1.556 (1.586) data 0.000 (0.031) loss 0.6357 (1.1209) acc 84.3750 (73.0208) lr 1.7290e-03 eta 8:08:18 +epoch [14/50] batch [35/500] time 1.547 (1.582) data 0.001 (0.026) loss 0.7749 (1.1366) acc 71.8750 (72.1429) lr 1.7290e-03 eta 8:06:57 +epoch [14/50] batch [40/500] time 1.577 (1.580) data 0.000 (0.023) loss 0.6245 (1.1208) acc 81.2500 (71.7969) lr 1.7290e-03 eta 8:06:06 +epoch [14/50] batch [45/500] time 1.556 (1.578) data 0.000 (0.021) loss 1.3994 (1.1218) acc 62.5000 (71.3889) lr 1.7290e-03 eta 8:05:24 +epoch [14/50] batch [50/500] time 1.561 (1.576) data 0.000 (0.019) loss 0.8740 (1.1170) acc 75.0000 (71.1250) lr 1.7290e-03 eta 8:04:35 +epoch [14/50] batch [55/500] time 1.562 (1.575) data 0.000 (0.017) loss 1.0195 (1.1128) acc 78.1250 (71.3636) lr 1.7290e-03 eta 8:04:18 +epoch [14/50] batch [60/500] time 1.567 (1.575) data 0.002 (0.016) loss 0.8916 (1.1099) acc 75.0000 (71.2500) lr 1.7290e-03 eta 8:04:01 +epoch [14/50] batch [65/500] time 1.573 (1.574) data 0.001 (0.014) loss 0.6753 (1.0995) acc 87.5000 (71.5865) lr 1.7290e-03 eta 8:03:35 +epoch [14/50] batch [70/500] time 1.580 (1.574) data 0.000 (0.013) loss 0.8735 (1.0890) acc 71.8750 (71.6518) lr 1.7290e-03 eta 8:03:27 +epoch [14/50] batch [75/500] time 1.556 (1.573) data 0.000 (0.013) loss 1.5195 (1.0758) acc 56.2500 (71.9583) lr 1.7290e-03 eta 8:03:03 +epoch [14/50] batch [80/500] time 1.560 (1.572) data 0.001 (0.012) loss 0.8354 (1.0669) acc 71.8750 (72.1875) lr 1.7290e-03 eta 8:02:37 +epoch [14/50] batch [85/500] time 1.546 (1.572) data 0.000 (0.011) loss 0.8843 (1.0622) acc 68.7500 (71.9485) lr 1.7290e-03 eta 8:02:19 +epoch [14/50] batch [90/500] time 1.572 (1.571) data 0.001 (0.011) loss 0.9424 (1.0546) acc 75.0000 (72.2917) lr 1.7290e-03 eta 8:02:00 +epoch [14/50] batch [95/500] time 1.555 (1.571) data 0.001 (0.010) loss 1.4521 (1.0513) acc 56.2500 (72.5000) lr 1.7290e-03 eta 8:01:50 +epoch [14/50] batch [100/500] time 1.563 (1.570) data 0.000 (0.010) loss 0.7183 (1.0506) acc 78.1250 (72.5625) lr 1.7290e-03 eta 8:01:32 +epoch [14/50] batch [105/500] time 1.547 (1.570) data 0.000 (0.009) loss 0.7397 (1.0436) acc 71.8750 (72.7679) lr 1.7290e-03 eta 8:01:21 +epoch [14/50] batch [110/500] time 1.571 (1.570) data 0.000 (0.009) loss 1.0020 (1.0365) acc 81.2500 (72.9830) lr 1.7290e-03 eta 8:01:12 +epoch [14/50] batch [115/500] time 1.564 (1.570) data 0.000 (0.008) loss 0.9219 (1.0401) acc 71.8750 (72.8804) lr 1.7290e-03 eta 8:00:57 +epoch [14/50] batch [120/500] time 1.666 (1.571) data 0.000 (0.008) loss 0.5801 (1.0370) acc 81.2500 (72.9427) lr 1.7290e-03 eta 8:01:07 +epoch [14/50] batch [125/500] time 1.576 (1.570) data 0.000 (0.008) loss 1.3545 (1.0403) acc 65.6250 (72.8750) lr 1.7290e-03 eta 8:00:53 +epoch [14/50] batch [130/500] time 1.562 (1.570) data 0.001 (0.007) loss 0.8560 (1.0361) acc 78.1250 (72.9808) lr 1.7290e-03 eta 8:00:36 +epoch [14/50] batch [135/500] time 1.566 (1.569) data 0.000 (0.007) loss 1.3047 (1.0431) acc 59.3750 (72.6620) lr 1.7290e-03 eta 8:00:23 +epoch [14/50] batch [140/500] time 1.551 (1.569) data 0.001 (0.007) loss 0.8604 (1.0465) acc 71.8750 (72.5446) lr 1.7290e-03 eta 8:00:04 +epoch [14/50] batch [145/500] time 1.582 (1.569) data 0.000 (0.007) loss 1.1221 (1.0496) acc 75.0000 (72.5431) lr 1.7290e-03 eta 7:59:54 +epoch [14/50] batch [150/500] time 1.576 (1.569) data 0.000 (0.007) loss 0.8403 (1.0588) acc 81.2500 (72.4792) lr 1.7290e-03 eta 7:59:44 +epoch [14/50] batch [155/500] time 1.568 (1.568) data 0.000 (0.006) loss 1.2393 (1.0669) acc 75.0000 (72.4395) lr 1.7290e-03 eta 7:59:32 +epoch [14/50] batch [160/500] time 1.573 (1.569) data 0.000 (0.006) loss 1.3604 (1.0757) acc 68.7500 (72.3047) lr 1.7290e-03 eta 7:59:28 +epoch [14/50] batch [165/500] time 1.555 (1.569) data 0.001 (0.006) loss 1.1025 (1.0767) acc 78.1250 (72.2538) lr 1.7290e-03 eta 7:59:29 +epoch [14/50] batch [170/500] time 1.565 (1.569) data 0.000 (0.006) loss 0.7393 (1.0732) acc 81.2500 (72.3713) lr 1.7290e-03 eta 7:59:15 +epoch [14/50] batch [175/500] time 1.552 (1.569) data 0.000 (0.006) loss 0.8130 (1.0724) acc 81.2500 (72.3393) lr 1.7290e-03 eta 7:59:08 +epoch [14/50] batch [180/500] time 1.555 (1.569) data 0.000 (0.006) loss 0.6216 (1.0717) acc 81.2500 (72.4132) lr 1.7290e-03 eta 7:58:55 +epoch [14/50] batch [185/500] time 1.572 (1.569) data 0.000 (0.005) loss 1.4141 (1.0723) acc 71.8750 (72.4493) lr 1.7290e-03 eta 7:58:47 +epoch [14/50] batch [190/500] time 1.546 (1.568) data 0.000 (0.005) loss 0.9702 (1.0734) acc 75.0000 (72.4671) lr 1.7290e-03 eta 7:58:33 +epoch [14/50] batch [195/500] time 1.537 (1.567) data 0.000 (0.005) loss 1.5400 (1.0808) acc 65.6250 (72.3077) lr 1.7290e-03 eta 7:58:12 +epoch [14/50] batch [200/500] time 1.569 (1.567) data 0.000 (0.005) loss 1.1094 (1.0913) acc 78.1250 (72.1875) lr 1.7290e-03 eta 7:58:01 +epoch [14/50] batch [205/500] time 1.543 (1.567) data 0.000 (0.005) loss 1.0117 (1.0900) acc 75.0000 (72.3018) lr 1.7290e-03 eta 7:57:43 +epoch [14/50] batch [210/500] time 1.556 (1.567) data 0.001 (0.005) loss 1.4043 (1.0909) acc 65.6250 (72.2768) lr 1.7290e-03 eta 7:57:32 +epoch [14/50] batch [215/500] time 1.555 (1.566) data 0.000 (0.005) loss 1.1328 (1.0943) acc 62.5000 (72.0494) lr 1.7290e-03 eta 7:57:20 +epoch [14/50] batch [220/500] time 1.548 (1.566) data 0.000 (0.005) loss 0.5225 (1.0927) acc 87.5000 (72.0881) lr 1.7290e-03 eta 7:57:06 +epoch [14/50] batch [225/500] time 1.547 (1.566) data 0.000 (0.004) loss 0.9956 (1.0937) acc 75.0000 (71.9861) lr 1.7290e-03 eta 7:56:58 +epoch [14/50] batch [230/500] time 1.561 (1.566) data 0.000 (0.004) loss 1.1094 (1.0941) acc 68.7500 (71.9429) lr 1.7290e-03 eta 7:56:46 +epoch [14/50] batch [235/500] time 1.549 (1.566) data 0.000 (0.004) loss 0.9902 (1.0977) acc 71.8750 (71.8351) lr 1.7290e-03 eta 7:56:37 +epoch [14/50] batch [240/500] time 1.557 (1.566) data 0.000 (0.004) loss 1.1924 (1.0996) acc 71.8750 (71.7839) lr 1.7290e-03 eta 7:56:31 +epoch [14/50] batch [245/500] time 1.554 (1.566) data 0.000 (0.004) loss 1.4561 (1.1069) acc 62.5000 (71.6327) lr 1.7290e-03 eta 7:56:21 +epoch [14/50] batch [250/500] time 1.562 (1.566) data 0.000 (0.004) loss 1.4111 (1.1066) acc 65.6250 (71.6125) lr 1.7290e-03 eta 7:56:11 +epoch [14/50] batch [255/500] time 1.553 (1.566) data 0.000 (0.004) loss 1.5078 (1.1100) acc 65.6250 (71.5564) lr 1.7290e-03 eta 7:56:02 +epoch [14/50] batch [260/500] time 1.587 (1.566) data 0.000 (0.004) loss 1.4082 (1.1104) acc 71.8750 (71.5865) lr 1.7290e-03 eta 7:55:55 +epoch [14/50] batch [265/500] time 1.557 (1.566) data 0.000 (0.004) loss 0.8193 (1.1070) acc 75.0000 (71.6509) lr 1.7290e-03 eta 7:55:54 +epoch [14/50] batch [270/500] time 1.566 (1.566) data 0.000 (0.004) loss 0.8232 (1.1043) acc 75.0000 (71.6898) lr 1.7290e-03 eta 7:55:50 +epoch [14/50] batch [275/500] time 1.566 (1.566) data 0.000 (0.004) loss 0.9526 (1.1048) acc 65.6250 (71.6591) lr 1.7290e-03 eta 7:55:39 +epoch [14/50] batch [280/500] time 1.564 (1.566) data 0.000 (0.004) loss 1.1846 (1.1045) acc 75.0000 (71.6406) lr 1.7290e-03 eta 7:55:33 +epoch [14/50] batch [285/500] time 1.559 (1.566) data 0.000 (0.004) loss 1.0244 (1.1045) acc 71.8750 (71.6886) lr 1.7290e-03 eta 7:55:27 +epoch [14/50] batch [290/500] time 1.580 (1.566) data 0.000 (0.004) loss 1.8477 (1.1068) acc 62.5000 (71.6595) lr 1.7290e-03 eta 7:55:20 +epoch [14/50] batch [295/500] time 1.585 (1.566) data 0.000 (0.004) loss 0.9258 (1.1015) acc 75.0000 (71.7797) lr 1.7290e-03 eta 7:55:16 +epoch [14/50] batch [300/500] time 1.547 (1.566) data 0.000 (0.003) loss 1.4131 (1.1039) acc 59.3750 (71.7188) lr 1.7290e-03 eta 7:55:07 +epoch [14/50] batch [305/500] time 1.558 (1.566) data 0.000 (0.003) loss 1.0605 (1.1087) acc 71.8750 (71.6701) lr 1.7290e-03 eta 7:55:02 +epoch [14/50] batch [310/500] time 1.574 (1.567) data 0.001 (0.003) loss 0.6431 (1.1064) acc 81.2500 (71.7641) lr 1.7290e-03 eta 7:55:01 +epoch [14/50] batch [315/500] time 1.591 (1.567) data 0.001 (0.003) loss 1.3613 (1.1075) acc 71.8750 (71.7758) lr 1.7290e-03 eta 7:54:56 +epoch [14/50] batch [320/500] time 1.567 (1.567) data 0.001 (0.003) loss 1.9014 (1.1126) acc 46.8750 (71.6699) lr 1.7290e-03 eta 7:54:49 +epoch [14/50] batch [325/500] time 1.567 (1.567) data 0.000 (0.003) loss 1.1914 (1.1149) acc 78.1250 (71.6923) lr 1.7290e-03 eta 7:54:41 +epoch [14/50] batch [330/500] time 1.538 (1.567) data 0.001 (0.003) loss 0.6123 (1.1158) acc 78.1250 (71.6288) lr 1.7290e-03 eta 7:54:31 +epoch [14/50] batch [335/500] time 1.551 (1.567) data 0.000 (0.003) loss 0.9512 (1.1173) acc 78.1250 (71.6045) lr 1.7290e-03 eta 7:54:21 +epoch [14/50] batch [340/500] time 1.554 (1.567) data 0.000 (0.003) loss 1.4336 (1.1180) acc 68.7500 (71.6544) lr 1.7290e-03 eta 7:54:11 +epoch [14/50] batch [345/500] time 1.585 (1.567) data 0.000 (0.003) loss 1.2080 (1.1183) acc 59.3750 (71.6123) lr 1.7290e-03 eta 7:54:04 +epoch [14/50] batch [350/500] time 1.567 (1.567) data 0.000 (0.003) loss 1.3291 (1.1218) acc 59.3750 (71.5268) lr 1.7290e-03 eta 7:53:56 +epoch [14/50] batch [355/500] time 1.554 (1.567) data 0.000 (0.003) loss 0.8003 (1.1192) acc 71.8750 (71.5757) lr 1.7290e-03 eta 7:53:48 +epoch [14/50] batch [360/500] time 1.585 (1.567) data 0.000 (0.003) loss 1.6455 (1.1184) acc 53.1250 (71.5712) lr 1.7290e-03 eta 7:53:41 +epoch [14/50] batch [365/500] time 1.571 (1.567) data 0.000 (0.003) loss 0.9199 (1.1169) acc 68.7500 (71.5668) lr 1.7290e-03 eta 7:53:33 +epoch [14/50] batch [370/500] time 1.579 (1.567) data 0.000 (0.003) loss 1.4580 (1.1217) acc 75.0000 (71.4949) lr 1.7290e-03 eta 7:53:26 +epoch [14/50] batch [375/500] time 1.573 (1.567) data 0.000 (0.003) loss 0.8262 (1.1192) acc 78.1250 (71.5583) lr 1.7290e-03 eta 7:53:19 +epoch [14/50] batch [380/500] time 1.555 (1.567) data 0.000 (0.003) loss 1.1621 (1.1189) acc 78.1250 (71.5461) lr 1.7290e-03 eta 7:53:11 +epoch [14/50] batch [385/500] time 1.566 (1.567) data 0.000 (0.003) loss 0.9863 (1.1157) acc 78.1250 (71.6153) lr 1.7290e-03 eta 7:52:59 +epoch [14/50] batch [390/500] time 1.577 (1.567) data 0.000 (0.003) loss 1.4541 (1.1214) acc 62.5000 (71.5304) lr 1.7290e-03 eta 7:52:51 +epoch [14/50] batch [395/500] time 1.562 (1.567) data 0.000 (0.003) loss 1.0439 (1.1226) acc 78.1250 (71.4953) lr 1.7290e-03 eta 7:52:42 +epoch [14/50] batch [400/500] time 1.546 (1.566) data 0.000 (0.003) loss 1.2949 (1.1248) acc 71.8750 (71.4688) lr 1.7290e-03 eta 7:52:33 +epoch [14/50] batch [405/500] time 1.561 (1.566) data 0.001 (0.003) loss 1.3506 (1.1241) acc 68.7500 (71.4892) lr 1.7290e-03 eta 7:52:25 +epoch [14/50] batch [410/500] time 1.573 (1.567) data 0.001 (0.003) loss 1.2119 (1.1233) acc 68.7500 (71.4634) lr 1.7290e-03 eta 7:52:20 +epoch [14/50] batch [415/500] time 1.587 (1.567) data 0.000 (0.003) loss 1.3340 (1.1250) acc 71.8750 (71.4533) lr 1.7290e-03 eta 7:52:12 +epoch [14/50] batch [420/500] time 1.580 (1.567) data 0.000 (0.003) loss 0.6797 (1.1209) acc 84.3750 (71.5476) lr 1.7290e-03 eta 7:52:04 +epoch [14/50] batch [425/500] time 1.579 (1.567) data 0.000 (0.003) loss 1.3740 (1.1213) acc 59.3750 (71.5588) lr 1.7290e-03 eta 7:51:58 +epoch [14/50] batch [430/500] time 1.556 (1.567) data 0.000 (0.003) loss 1.3076 (1.1222) acc 68.7500 (71.5625) lr 1.7290e-03 eta 7:51:50 +epoch [14/50] batch [435/500] time 1.551 (1.567) data 0.000 (0.003) loss 0.5659 (1.1217) acc 87.5000 (71.5805) lr 1.7290e-03 eta 7:51:41 +epoch [14/50] batch [440/500] time 1.550 (1.567) data 0.000 (0.002) loss 1.4209 (1.1247) acc 68.7500 (71.5412) lr 1.7290e-03 eta 7:51:32 +epoch [14/50] batch [445/500] time 1.561 (1.567) data 0.000 (0.002) loss 1.3652 (1.1255) acc 62.5000 (71.5028) lr 1.7290e-03 eta 7:51:25 +epoch [14/50] batch [450/500] time 1.685 (1.567) data 0.001 (0.002) loss 1.5645 (1.1264) acc 65.6250 (71.4792) lr 1.7290e-03 eta 7:51:21 +epoch [14/50] batch [455/500] time 1.553 (1.567) data 0.000 (0.002) loss 1.5098 (1.1280) acc 75.0000 (71.4698) lr 1.7290e-03 eta 7:51:08 +epoch [14/50] batch [460/500] time 1.577 (1.567) data 0.000 (0.002) loss 1.1562 (1.1282) acc 78.1250 (71.4674) lr 1.7290e-03 eta 7:51:01 +epoch [14/50] batch [465/500] time 1.558 (1.567) data 0.000 (0.002) loss 2.1562 (1.1322) acc 50.0000 (71.4113) lr 1.7290e-03 eta 7:50:52 +epoch [14/50] batch [470/500] time 1.547 (1.567) data 0.000 (0.002) loss 1.4219 (1.1335) acc 62.5000 (71.3630) lr 1.7290e-03 eta 7:50:45 +epoch [14/50] batch [475/500] time 1.575 (1.567) data 0.000 (0.002) loss 1.1953 (1.1350) acc 68.7500 (71.3092) lr 1.7290e-03 eta 7:50:39 +epoch [14/50] batch [480/500] time 1.545 (1.567) data 0.001 (0.002) loss 1.0039 (1.1373) acc 71.8750 (71.2695) lr 1.7290e-03 eta 7:50:31 +epoch [14/50] batch [485/500] time 1.579 (1.567) data 0.001 (0.002) loss 0.8340 (1.1384) acc 84.3750 (71.3080) lr 1.7290e-03 eta 7:50:25 +epoch [14/50] batch [490/500] time 1.549 (1.567) data 0.000 (0.002) loss 1.3145 (1.1374) acc 68.7500 (71.3074) lr 1.7290e-03 eta 7:50:16 +epoch [14/50] batch [495/500] time 1.553 (1.567) data 0.000 (0.002) loss 1.3906 (1.1374) acc 75.0000 (71.3321) lr 1.7290e-03 eta 7:50:08 +epoch [14/50] batch [500/500] time 1.572 (1.567) data 0.000 (0.002) loss 1.2373 (1.1379) acc 65.6250 (71.3000) lr 1.6845e-03 eta 7:50:01 +Evaluate on the *val* set diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/checkpoint b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/checkpoint new file mode 100644 index 00000000..22cb2ffb --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/checkpoint @@ -0,0 +1 @@ +model-best.pth.tar diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar new file mode 100644 index 00000000..674afaca Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/tensorboard/events.out.tfevents.1699535713.ckb-gpu-v.mitre.org.78974.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/tensorboard/events.out.tfevents.1699535713.ckb-gpu-v.mitre.org.78974.0 new file mode 100644 index 00000000..7af1c463 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_16shots/nctx16_cscFalse_ctpend/seed3/tensorboard/events.out.tfevents.1699535713.ckb-gpu-v.mitre.org.78974.0 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/log.txt new file mode 100644 index 00000000..9afb9ed4 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/log.txt @@ -0,0 +1,999 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_l14_bestval_ep50.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '1'] +output_dir: output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 1 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 1 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-L/14 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 50 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0 +Clang version: 10.0.0-4ubuntu1 +CMake version: version 3.16.3 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-166-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: NVIDIA A100-SXM4-40GB +GPU 1: NVIDIA A100-SXM4-40GB +GPU 2: NVIDIA A100-SXM4-40GB +GPU 3: NVIDIA A100-SXM4-40GB + +Nvidia driver version: 525.125.06 +cuDNN version: Probably one of the following: +/usr/lib/x86_64-linux-gnu/libcudnn.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.9.5 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 43 bits physical, 48 bits virtual +CPU(s): 256 +On-line CPU(s) list: 0-255 +Thread(s) per core: 2 +Core(s) per socket: 64 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: AuthenticAMD +CPU family: 23 +Model: 49 +Model name: AMD EPYC 7H12 64-Core Processor +Stepping: 0 +Frequency boost: enabled +CPU MHz: 1430.454 +CPU max MHz: 2600.0000 +CPU min MHz: 1500.0000 +BogoMIPS: 5200.20 +Virtualization: AMD-V +L1d cache: 4 MiB +L1i cache: 4 MiB +L2 cache: 64 MiB +L3 cache: 512 MiB +NUMA node0 CPU(s): 0-63,128-191 +NUMA node1 CPU(s): 64-127,192-255 +Vulnerability Gather data sampling: Not affected +Vulnerability Itlb multihit: Not affected +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Mmio stale data: Not affected +Vulnerability Retbleed: Vulnerable +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Retpolines, IBPB conditional, IBRS_FW, STIBP conditional, RSB filling, PBRSB-eIBRS Not affected +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Not affected +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca sme sev sev_es + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Loading preprocessed few-shot data from /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_1-seed_1.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 1,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-L/14) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/tensorboard) +epoch [1/50] batch [5/31] time 0.848 (1.791) data 0.000 (0.242) loss 3.2520 (3.2883) acc 34.3750 (34.3750) lr 1.0000e-05 eta 0:46:07 +epoch [1/50] batch [10/31] time 0.899 (1.338) data 0.000 (0.121) loss 3.3301 (3.0773) acc 34.3750 (36.2500) lr 1.0000e-05 eta 0:34:20 +epoch [1/50] batch [15/31] time 0.869 (1.184) data 0.000 (0.081) loss 2.5020 (2.8862) acc 46.8750 (40.0000) lr 1.0000e-05 eta 0:30:17 +epoch [1/50] batch [20/31] time 0.871 (1.106) data 0.000 (0.061) loss 3.4648 (2.8348) acc 34.3750 (42.0312) lr 1.0000e-05 eta 0:28:11 +epoch [1/50] batch [25/31] time 0.895 (1.063) data 0.000 (0.049) loss 2.0664 (2.6439) acc 65.6250 (45.2500) lr 1.0000e-05 eta 0:27:01 +epoch [1/50] batch [30/31] time 0.931 (1.036) data 0.000 (0.041) loss 2.5938 (2.5394) acc 43.7500 (46.7708) lr 1.0000e-05 eta 0:26:15 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 32,755 +* accuracy: 65.5% +* error: 34.5% +* macro_f1: 63.1% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model-best.pth.tar +epoch [2/50] batch [5/31] time 0.885 (0.972) data 0.000 (0.146) loss 0.9092 (1.4906) acc 75.0000 (59.3750) lr 2.0000e-03 eta 0:24:31 +epoch [2/50] batch [10/31] time 0.866 (0.927) data 0.000 (0.073) loss 1.9473 (1.6865) acc 56.2500 (57.8125) lr 2.0000e-03 eta 0:23:18 +epoch [2/50] batch [15/31] time 0.885 (0.911) data 0.000 (0.049) loss 1.6846 (1.6270) acc 59.3750 (61.4583) lr 2.0000e-03 eta 0:22:49 +epoch [2/50] batch [20/31] time 0.875 (0.904) data 0.000 (0.037) loss 1.9307 (1.5381) acc 46.8750 (62.8125) lr 2.0000e-03 eta 0:22:34 +epoch [2/50] batch [25/31] time 0.899 (0.901) data 0.000 (0.029) loss 1.3008 (1.5084) acc 62.5000 (63.2500) lr 2.0000e-03 eta 0:22:25 +epoch [2/50] batch [30/31] time 0.886 (0.903) data 0.000 (0.024) loss 1.5566 (1.4461) acc 65.6250 (64.3750) lr 2.0000e-03 eta 0:22:25 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,297 +* accuracy: 74.6% +* error: 25.4% +* macro_f1: 73.7% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model-best.pth.tar +epoch [3/50] batch [5/31] time 0.896 (0.991) data 0.000 (0.167) loss 0.8872 (1.3119) acc 75.0000 (69.3750) lr 1.9980e-03 eta 0:24:29 +epoch [3/50] batch [10/31] time 0.882 (0.938) data 0.000 (0.084) loss 0.8140 (1.2232) acc 71.8750 (70.0000) lr 1.9980e-03 eta 0:23:06 +epoch [3/50] batch [15/31] time 0.885 (0.921) data 0.000 (0.056) loss 0.9478 (1.2718) acc 81.2500 (70.2083) lr 1.9980e-03 eta 0:22:36 +epoch [3/50] batch [20/31] time 0.880 (0.910) data 0.000 (0.042) loss 1.2627 (1.2977) acc 71.8750 (69.5312) lr 1.9980e-03 eta 0:22:16 +epoch [3/50] batch [25/31] time 0.883 (0.907) data 0.000 (0.034) loss 1.1738 (1.3329) acc 68.7500 (68.0000) lr 1.9980e-03 eta 0:22:07 +epoch [3/50] batch [30/31] time 0.926 (0.910) data 0.000 (0.028) loss 1.1367 (1.2945) acc 75.0000 (68.3333) lr 1.9980e-03 eta 0:22:06 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,754 +* accuracy: 75.5% +* error: 24.5% +* macro_f1: 74.7% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model-best.pth.tar +epoch [4/50] batch [5/31] time 0.877 (0.994) data 0.000 (0.167) loss 1.6201 (1.1210) acc 62.5000 (71.8750) lr 1.9921e-03 eta 0:24:02 +epoch [4/50] batch [10/31] time 0.890 (0.934) data 0.000 (0.083) loss 1.0127 (1.1780) acc 68.7500 (70.3125) lr 1.9921e-03 eta 0:22:32 +epoch [4/50] batch [15/31] time 0.862 (0.916) data 0.000 (0.056) loss 1.3008 (1.2424) acc 71.8750 (69.5833) lr 1.9921e-03 eta 0:22:00 +epoch [4/50] batch [20/31] time 0.893 (0.908) data 0.000 (0.042) loss 2.1914 (1.2863) acc 53.1250 (69.3750) lr 1.9921e-03 eta 0:21:44 +epoch [4/50] batch [25/31] time 0.871 (0.903) data 0.000 (0.034) loss 0.7344 (1.2553) acc 78.1250 (69.7500) lr 1.9921e-03 eta 0:21:32 +epoch [4/50] batch [30/31] time 0.897 (0.905) data 0.000 (0.028) loss 1.7480 (1.2998) acc 59.3750 (68.4375) lr 1.9921e-03 eta 0:21:31 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,892 +* accuracy: 75.8% +* error: 24.2% +* macro_f1: 75.0% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model-best.pth.tar +epoch [5/50] batch [5/31] time 0.913 (0.977) data 0.000 (0.148) loss 1.3018 (1.4043) acc 75.0000 (65.6250) lr 1.9823e-03 eta 0:23:08 +epoch [5/50] batch [10/31] time 0.901 (0.931) data 0.000 (0.074) loss 0.8940 (1.2365) acc 68.7500 (69.0625) lr 1.9823e-03 eta 0:21:58 +epoch [5/50] batch [15/31] time 0.883 (0.913) data 0.000 (0.050) loss 0.9497 (1.2506) acc 68.7500 (69.3750) lr 1.9823e-03 eta 0:21:27 +epoch [5/50] batch [20/31] time 0.898 (0.908) data 0.000 (0.037) loss 1.3467 (1.2136) acc 71.8750 (70.4688) lr 1.9823e-03 eta 0:21:16 +epoch [5/50] batch [25/31] time 0.891 (0.903) data 0.000 (0.030) loss 1.3174 (1.2354) acc 68.7500 (69.5000) lr 1.9823e-03 eta 0:21:04 +epoch [5/50] batch [30/31] time 0.896 (0.900) data 0.000 (0.025) loss 1.9980 (1.2541) acc 59.3750 (69.4792) lr 1.9823e-03 eta 0:20:56 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,790 +* accuracy: 75.6% +* error: 24.4% +* macro_f1: 74.8% +epoch [6/50] batch [5/31] time 0.864 (1.052) data 0.000 (0.238) loss 1.2295 (1.1853) acc 65.6250 (72.5000) lr 1.9686e-03 eta 0:24:21 +epoch [6/50] batch [10/31] time 0.901 (0.966) data 0.000 (0.119) loss 1.2041 (1.2354) acc 75.0000 (72.1875) lr 1.9686e-03 eta 0:22:17 +epoch [6/50] batch [15/31] time 0.899 (0.935) data 0.000 (0.080) loss 1.1943 (1.2196) acc 68.7500 (71.4583) lr 1.9686e-03 eta 0:21:30 +epoch [6/50] batch [20/31] time 0.900 (0.924) data 0.000 (0.060) loss 1.2080 (1.2304) acc 59.3750 (70.4688) lr 1.9686e-03 eta 0:21:10 +epoch [6/50] batch [25/31] time 0.867 (0.921) data 0.000 (0.048) loss 1.1660 (1.2649) acc 68.7500 (69.0000) lr 1.9686e-03 eta 0:21:02 +epoch [6/50] batch [30/31] time 0.875 (0.915) data 0.000 (0.040) loss 1.3740 (1.2754) acc 62.5000 (68.6458) lr 1.9686e-03 eta 0:20:49 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,897 +* accuracy: 75.8% +* error: 24.2% +* macro_f1: 75.0% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model-best.pth.tar +epoch [7/50] batch [5/31] time 0.901 (1.000) data 0.000 (0.178) loss 0.9160 (1.1259) acc 78.1250 (70.6250) lr 1.9511e-03 eta 0:22:38 +epoch [7/50] batch [10/31] time 0.887 (0.940) data 0.000 (0.089) loss 1.5439 (1.1247) acc 62.5000 (71.8750) lr 1.9511e-03 eta 0:21:12 +epoch [7/50] batch [15/31] time 0.865 (0.917) data 0.000 (0.059) loss 1.6309 (1.1862) acc 65.6250 (71.2500) lr 1.9511e-03 eta 0:20:36 +epoch [7/50] batch [20/31] time 0.872 (0.907) data 0.000 (0.045) loss 1.1133 (1.1856) acc 75.0000 (71.2500) lr 1.9511e-03 eta 0:20:18 +epoch [7/50] batch [25/31] time 0.891 (0.900) data 0.000 (0.036) loss 1.1533 (1.1518) acc 75.0000 (72.2500) lr 1.9511e-03 eta 0:20:05 +epoch [7/50] batch [30/31] time 0.899 (0.899) data 0.000 (0.030) loss 1.0312 (1.1234) acc 75.0000 (71.5625) lr 1.9511e-03 eta 0:19:58 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,737 +* accuracy: 75.5% +* error: 24.5% +* macro_f1: 74.7% +epoch [8/50] batch [5/31] time 0.901 (0.991) data 0.000 (0.171) loss 0.8101 (1.0462) acc 75.0000 (70.0000) lr 1.9298e-03 eta 0:21:55 +epoch [8/50] batch [10/31] time 0.863 (0.934) data 0.000 (0.086) loss 1.9209 (1.1958) acc 59.3750 (67.8125) lr 1.9298e-03 eta 0:20:35 +epoch [8/50] batch [15/31] time 0.903 (0.927) data 0.000 (0.057) loss 1.1016 (1.1840) acc 71.8750 (69.5833) lr 1.9298e-03 eta 0:20:22 +epoch [8/50] batch [20/31] time 0.874 (0.916) data 0.000 (0.043) loss 1.0625 (1.1754) acc 75.0000 (70.9375) lr 1.9298e-03 eta 0:20:02 +epoch [8/50] batch [25/31] time 0.900 (0.913) data 0.000 (0.035) loss 0.8574 (1.2035) acc 81.2500 (70.7500) lr 1.9298e-03 eta 0:19:53 +epoch [8/50] batch [30/31] time 0.872 (0.908) data 0.000 (0.029) loss 1.0049 (1.1626) acc 78.1250 (72.0833) lr 1.9298e-03 eta 0:19:43 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,818 +* accuracy: 75.6% +* error: 24.4% +* macro_f1: 74.9% +epoch [9/50] batch [5/31] time 0.899 (0.992) data 0.000 (0.172) loss 1.2334 (1.2978) acc 68.7500 (68.7500) lr 1.9048e-03 eta 0:21:26 +epoch [9/50] batch [10/31] time 0.887 (0.934) data 0.000 (0.086) loss 1.2051 (1.1667) acc 65.6250 (70.3125) lr 1.9048e-03 eta 0:20:06 +epoch [9/50] batch [15/31] time 0.878 (0.928) data 0.000 (0.058) loss 0.9771 (1.1819) acc 71.8750 (70.2083) lr 1.9048e-03 eta 0:19:54 +epoch [9/50] batch [20/31] time 0.890 (0.917) data 0.000 (0.043) loss 1.0518 (1.1873) acc 68.7500 (69.6875) lr 1.9048e-03 eta 0:19:35 +epoch [9/50] batch [25/31] time 0.896 (0.915) data 0.000 (0.035) loss 2.2754 (1.2481) acc 59.3750 (69.5000) lr 1.9048e-03 eta 0:19:28 +epoch [9/50] batch [30/31] time 0.896 (0.911) data 0.000 (0.029) loss 0.8281 (1.2225) acc 81.2500 (69.8958) lr 1.9048e-03 eta 0:19:18 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,954 +* accuracy: 75.9% +* error: 24.1% +* macro_f1: 75.1% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model-best.pth.tar +epoch [10/50] batch [5/31] time 0.890 (0.996) data 0.000 (0.162) loss 1.6455 (1.1373) acc 65.6250 (75.0000) lr 1.8763e-03 eta 0:21:00 +epoch [10/50] batch [10/31] time 0.907 (0.938) data 0.000 (0.081) loss 1.0127 (1.0890) acc 71.8750 (71.5625) lr 1.8763e-03 eta 0:19:43 +epoch [10/50] batch [15/31] time 0.884 (0.919) data 0.000 (0.054) loss 0.8979 (1.0939) acc 81.2500 (71.6667) lr 1.8763e-03 eta 0:19:14 +epoch [10/50] batch [20/31] time 0.903 (0.917) data 0.000 (0.041) loss 1.3516 (1.1165) acc 65.6250 (70.9375) lr 1.8763e-03 eta 0:19:07 +epoch [10/50] batch [25/31] time 0.901 (0.910) data 0.000 (0.033) loss 1.0420 (1.1205) acc 71.8750 (70.7500) lr 1.8763e-03 eta 0:18:53 +epoch [10/50] batch [30/31] time 0.881 (0.905) data 0.000 (0.027) loss 1.1924 (1.1153) acc 78.1250 (71.2500) lr 1.8763e-03 eta 0:18:43 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,602 +* accuracy: 75.2% +* error: 24.8% +* macro_f1: 74.4% +epoch [11/50] batch [5/31] time 1.061 (1.024) data 0.001 (0.165) loss 1.1670 (0.9580) acc 65.6250 (73.7500) lr 1.8443e-03 eta 0:21:04 +epoch [11/50] batch [10/31] time 0.862 (0.947) data 0.000 (0.083) loss 1.1631 (1.0304) acc 71.8750 (72.5000) lr 1.8443e-03 eta 0:19:24 +epoch [11/50] batch [15/31] time 0.876 (0.925) data 0.000 (0.055) loss 1.4395 (1.0790) acc 68.7500 (72.0833) lr 1.8443e-03 eta 0:18:53 +epoch [11/50] batch [20/31] time 0.878 (0.915) data 0.000 (0.042) loss 1.1328 (1.0555) acc 65.6250 (72.9688) lr 1.8443e-03 eta 0:18:36 +epoch [11/50] batch [25/31] time 0.849 (0.909) data 0.001 (0.033) loss 0.9546 (1.0706) acc 75.0000 (72.6250) lr 1.8443e-03 eta 0:18:24 +epoch [11/50] batch [30/31] time 0.867 (0.903) data 0.000 (0.028) loss 0.7925 (1.0717) acc 78.1250 (72.9167) lr 1.8443e-03 eta 0:18:12 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,461 +* accuracy: 74.9% +* error: 25.1% +* macro_f1: 74.1% +epoch [12/50] batch [5/31] time 0.886 (0.980) data 0.000 (0.159) loss 1.3281 (1.0457) acc 71.8750 (73.1250) lr 1.8090e-03 eta 0:19:40 +epoch [12/50] batch [10/31] time 0.887 (0.928) data 0.000 (0.079) loss 1.0742 (1.2715) acc 71.8750 (67.5000) lr 1.8090e-03 eta 0:18:32 +epoch [12/50] batch [15/31] time 0.885 (0.914) data 0.000 (0.053) loss 1.4307 (1.2462) acc 68.7500 (69.1667) lr 1.8090e-03 eta 0:18:10 +epoch [12/50] batch [20/31] time 0.870 (0.903) data 0.000 (0.040) loss 0.8076 (1.1340) acc 75.0000 (71.8750) lr 1.8090e-03 eta 0:17:53 +epoch [12/50] batch [25/31] time 0.865 (0.902) data 0.000 (0.032) loss 0.6460 (1.0852) acc 84.3750 (73.2500) lr 1.8090e-03 eta 0:17:48 +epoch [12/50] batch [30/31] time 0.903 (0.900) data 0.000 (0.027) loss 1.6943 (1.0802) acc 65.6250 (73.2292) lr 1.8090e-03 eta 0:17:40 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,538 +* accuracy: 75.1% +* error: 24.9% +* macro_f1: 74.3% +epoch [13/50] batch [5/31] time 0.919 (0.991) data 0.001 (0.156) loss 1.1338 (1.1184) acc 68.7500 (71.2500) lr 1.7705e-03 eta 0:19:21 +epoch [13/50] batch [10/31] time 0.889 (0.936) data 0.000 (0.078) loss 0.6470 (1.0664) acc 81.2500 (71.2500) lr 1.7705e-03 eta 0:18:13 +epoch [13/50] batch [15/31] time 0.889 (0.923) data 0.000 (0.052) loss 1.1631 (1.0973) acc 71.8750 (71.6667) lr 1.7705e-03 eta 0:17:53 +epoch [13/50] batch [20/31] time 0.893 (0.913) data 0.000 (0.039) loss 0.8389 (1.0665) acc 78.1250 (72.5000) lr 1.7705e-03 eta 0:17:36 +epoch [13/50] batch [25/31] time 0.883 (0.911) data 0.000 (0.031) loss 0.9072 (1.0519) acc 78.1250 (72.8750) lr 1.7705e-03 eta 0:17:30 +epoch [13/50] batch [30/31] time 0.885 (0.905) data 0.000 (0.026) loss 1.1982 (1.0227) acc 71.8750 (73.4375) lr 1.7705e-03 eta 0:17:18 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,587 +* accuracy: 75.2% +* error: 24.8% +* macro_f1: 74.3% +epoch [14/50] batch [5/31] time 0.861 (0.977) data 0.000 (0.163) loss 0.7578 (1.1274) acc 84.3750 (70.6250) lr 1.7290e-03 eta 0:18:35 +epoch [14/50] batch [10/31] time 0.874 (0.931) data 0.000 (0.082) loss 1.5488 (1.0801) acc 75.0000 (75.0000) lr 1.7290e-03 eta 0:17:38 +epoch [14/50] batch [15/31] time 0.887 (0.918) data 0.000 (0.055) loss 0.5249 (1.1114) acc 78.1250 (73.9583) lr 1.7290e-03 eta 0:17:18 +epoch [14/50] batch [20/31] time 0.882 (0.914) data 0.000 (0.041) loss 1.3955 (1.0959) acc 71.8750 (74.8438) lr 1.7290e-03 eta 0:17:09 +epoch [14/50] batch [25/31] time 0.883 (0.908) data 0.000 (0.033) loss 0.8618 (1.0610) acc 71.8750 (74.7500) lr 1.7290e-03 eta 0:16:59 +epoch [14/50] batch [30/31] time 0.912 (0.905) data 0.000 (0.028) loss 0.7759 (1.0758) acc 84.3750 (74.4792) lr 1.7290e-03 eta 0:16:50 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,475 +* accuracy: 75.0% +* error: 25.0% +* macro_f1: 74.1% +epoch [15/50] batch [5/31] time 0.915 (1.000) data 0.000 (0.164) loss 0.7251 (0.8655) acc 81.2500 (77.5000) lr 1.6845e-03 eta 0:18:30 +epoch [15/50] batch [10/31] time 0.893 (0.945) data 0.001 (0.082) loss 0.9185 (0.9269) acc 68.7500 (75.0000) lr 1.6845e-03 eta 0:17:25 +epoch [15/50] batch [15/31] time 0.875 (0.922) data 0.000 (0.055) loss 0.9561 (0.9604) acc 75.0000 (74.5833) lr 1.6845e-03 eta 0:16:55 +epoch [15/50] batch [20/31] time 0.890 (0.912) data 0.000 (0.041) loss 0.9443 (0.9170) acc 75.0000 (76.2500) lr 1.6845e-03 eta 0:16:39 +epoch [15/50] batch [25/31] time 0.886 (0.913) data 0.000 (0.033) loss 1.3262 (0.9984) acc 62.5000 (74.2500) lr 1.6845e-03 eta 0:16:35 +epoch [15/50] batch [30/31] time 0.869 (0.906) data 0.000 (0.028) loss 1.2676 (0.9855) acc 62.5000 (74.4792) lr 1.6845e-03 eta 0:16:24 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,534 +* accuracy: 75.1% +* error: 24.9% +* macro_f1: 74.2% +epoch [16/50] batch [5/31] time 0.991 (1.001) data 0.000 (0.161) loss 0.5278 (1.1270) acc 81.2500 (71.2500) lr 1.6374e-03 eta 0:18:01 +epoch [16/50] batch [10/31] time 0.863 (0.939) data 0.000 (0.081) loss 1.4980 (1.1190) acc 68.7500 (72.1875) lr 1.6374e-03 eta 0:16:49 +epoch [16/50] batch [15/31] time 0.864 (0.916) data 0.001 (0.054) loss 1.8232 (1.1531) acc 62.5000 (71.2500) lr 1.6374e-03 eta 0:16:20 +epoch [16/50] batch [20/31] time 0.871 (0.906) data 0.000 (0.040) loss 1.6152 (1.1071) acc 65.6250 (72.1875) lr 1.6374e-03 eta 0:16:04 +epoch [16/50] batch [25/31] time 0.874 (0.903) data 0.000 (0.032) loss 0.5996 (1.1028) acc 93.7500 (73.5000) lr 1.6374e-03 eta 0:15:57 +epoch [16/50] batch [30/31] time 0.882 (0.901) data 0.000 (0.027) loss 1.6787 (1.1273) acc 53.1250 (73.1250) lr 1.6374e-03 eta 0:15:50 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,346 +* accuracy: 74.7% +* error: 25.3% +* macro_f1: 73.8% +epoch [17/50] batch [5/31] time 0.895 (0.979) data 0.000 (0.158) loss 0.8472 (0.7923) acc 81.2500 (80.6250) lr 1.5878e-03 eta 0:17:07 +epoch [17/50] batch [10/31] time 0.898 (0.928) data 0.001 (0.079) loss 1.0752 (0.8329) acc 75.0000 (78.7500) lr 1.5878e-03 eta 0:16:08 +epoch [17/50] batch [15/31] time 0.870 (0.908) data 0.000 (0.053) loss 0.5806 (0.9451) acc 84.3750 (75.8333) lr 1.5878e-03 eta 0:15:43 +epoch [17/50] batch [20/31] time 0.863 (0.898) data 0.000 (0.040) loss 1.5801 (1.0469) acc 65.6250 (74.2188) lr 1.5878e-03 eta 0:15:28 +epoch [17/50] batch [25/31] time 0.882 (0.896) data 0.000 (0.032) loss 1.4326 (1.0514) acc 62.5000 (73.5000) lr 1.5878e-03 eta 0:15:21 +epoch [17/50] batch [30/31] time 0.906 (0.895) data 0.000 (0.027) loss 1.1055 (1.0325) acc 75.0000 (74.4792) lr 1.5878e-03 eta 0:15:16 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,314 +* accuracy: 74.6% +* error: 25.4% +* macro_f1: 73.7% +epoch [18/50] batch [5/31] time 0.900 (0.974) data 0.000 (0.153) loss 0.9189 (0.9608) acc 75.0000 (77.5000) lr 1.5358e-03 eta 0:16:31 +epoch [18/50] batch [10/31] time 0.887 (0.928) data 0.000 (0.077) loss 0.8589 (0.9020) acc 75.0000 (76.2500) lr 1.5358e-03 eta 0:15:39 +epoch [18/50] batch [15/31] time 0.871 (0.916) data 0.000 (0.051) loss 0.7500 (1.0002) acc 84.3750 (76.0417) lr 1.5358e-03 eta 0:15:23 +epoch [18/50] batch [20/31] time 0.864 (0.910) data 0.000 (0.039) loss 0.7793 (0.9611) acc 84.3750 (76.5625) lr 1.5358e-03 eta 0:15:12 +epoch [18/50] batch [25/31] time 0.878 (0.904) data 0.000 (0.031) loss 0.7368 (0.9603) acc 84.3750 (76.3750) lr 1.5358e-03 eta 0:15:01 +epoch [18/50] batch [30/31] time 0.905 (0.906) data 0.000 (0.026) loss 0.4136 (0.9436) acc 90.6250 (76.7708) lr 1.5358e-03 eta 0:15:00 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,238 +* accuracy: 74.5% +* error: 25.5% +* macro_f1: 73.6% +epoch [19/50] batch [5/31] time 0.874 (0.984) data 0.000 (0.165) loss 0.4106 (0.6513) acc 84.3750 (81.8750) lr 1.4818e-03 eta 0:16:10 +epoch [19/50] batch [10/31] time 0.855 (0.931) data 0.000 (0.083) loss 0.6143 (0.8867) acc 87.5000 (79.0625) lr 1.4818e-03 eta 0:15:14 +epoch [19/50] batch [15/31] time 0.915 (0.917) data 0.000 (0.055) loss 0.8555 (0.8581) acc 81.2500 (79.3750) lr 1.4818e-03 eta 0:14:55 +epoch [19/50] batch [20/31] time 0.902 (0.907) data 0.000 (0.042) loss 0.9316 (0.8530) acc 78.1250 (79.6875) lr 1.4818e-03 eta 0:14:41 +epoch [19/50] batch [25/31] time 0.903 (0.901) data 0.000 (0.033) loss 1.5664 (0.8866) acc 71.8750 (79.1250) lr 1.4818e-03 eta 0:14:31 +epoch [19/50] batch [30/31] time 0.907 (0.904) data 0.000 (0.028) loss 1.3672 (0.9513) acc 71.8750 (77.6042) lr 1.4818e-03 eta 0:14:29 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,016 +* accuracy: 74.0% +* error: 26.0% +* macro_f1: 73.1% +epoch [20/50] batch [5/31] time 0.878 (0.993) data 0.000 (0.169) loss 1.0312 (0.7780) acc 78.1250 (83.1250) lr 1.4258e-03 eta 0:15:49 +epoch [20/50] batch [10/31] time 0.888 (0.944) data 0.000 (0.085) loss 0.7783 (0.8637) acc 81.2500 (79.0625) lr 1.4258e-03 eta 0:14:57 +epoch [20/50] batch [15/31] time 0.870 (0.922) data 0.000 (0.057) loss 1.1758 (0.9426) acc 71.8750 (77.9167) lr 1.4258e-03 eta 0:14:31 +epoch [20/50] batch [20/31] time 0.895 (0.911) data 0.000 (0.042) loss 1.1299 (0.9577) acc 78.1250 (76.7188) lr 1.4258e-03 eta 0:14:17 +epoch [20/50] batch [25/31] time 0.896 (0.910) data 0.000 (0.034) loss 1.1758 (0.9182) acc 78.1250 (76.5000) lr 1.4258e-03 eta 0:14:11 +epoch [20/50] batch [30/31] time 0.895 (0.905) data 0.000 (0.028) loss 1.8887 (0.9336) acc 65.6250 (76.9792) lr 1.4258e-03 eta 0:14:02 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,267 +* accuracy: 74.5% +* error: 25.5% +* macro_f1: 73.7% +epoch [21/50] batch [5/31] time 0.857 (0.975) data 0.000 (0.161) loss 0.9673 (0.7929) acc 78.1250 (81.2500) lr 1.3681e-03 eta 0:15:02 +epoch [21/50] batch [10/31] time 0.892 (0.924) data 0.000 (0.081) loss 0.9800 (0.9207) acc 81.2500 (78.1250) lr 1.3681e-03 eta 0:14:09 +epoch [21/50] batch [15/31] time 0.859 (0.906) data 0.000 (0.054) loss 0.5186 (0.9623) acc 81.2500 (75.8333) lr 1.3681e-03 eta 0:13:49 +epoch [21/50] batch [20/31] time 0.903 (0.899) data 0.000 (0.040) loss 1.1680 (0.9311) acc 75.0000 (76.5625) lr 1.3681e-03 eta 0:13:38 +epoch [21/50] batch [25/31] time 0.876 (0.895) data 0.000 (0.032) loss 0.6650 (0.9474) acc 81.2500 (76.5000) lr 1.3681e-03 eta 0:13:30 +epoch [21/50] batch [30/31] time 0.901 (0.893) data 0.000 (0.027) loss 0.9966 (0.9209) acc 81.2500 (76.8750) lr 1.3681e-03 eta 0:13:23 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,230 +* accuracy: 74.5% +* error: 25.5% +* macro_f1: 73.6% +epoch [22/50] batch [5/31] time 0.876 (0.973) data 0.000 (0.139) loss 1.0537 (1.1194) acc 78.1250 (73.7500) lr 1.3090e-03 eta 0:14:29 +epoch [22/50] batch [10/31] time 0.904 (0.929) data 0.000 (0.070) loss 0.5181 (0.9966) acc 87.5000 (76.2500) lr 1.3090e-03 eta 0:13:45 +epoch [22/50] batch [15/31] time 0.871 (0.910) data 0.000 (0.047) loss 0.9980 (0.9783) acc 71.8750 (76.2500) lr 1.3090e-03 eta 0:13:23 +epoch [22/50] batch [20/31] time 0.868 (0.905) data 0.000 (0.035) loss 0.8818 (0.9543) acc 75.0000 (76.0938) lr 1.3090e-03 eta 0:13:15 +epoch [22/50] batch [25/31] time 0.925 (0.901) data 0.000 (0.028) loss 0.5767 (0.9456) acc 75.0000 (75.8750) lr 1.3090e-03 eta 0:13:07 +epoch [22/50] batch [30/31] time 0.876 (0.902) data 0.000 (0.023) loss 0.6934 (0.9264) acc 84.3750 (76.3542) lr 1.3090e-03 eta 0:13:04 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,829 +* accuracy: 73.7% +* error: 26.3% +* macro_f1: 72.7% +epoch [23/50] batch [5/31] time 0.884 (0.959) data 0.000 (0.148) loss 0.7070 (0.7402) acc 81.2500 (80.6250) lr 1.2487e-03 eta 0:13:47 +epoch [23/50] batch [10/31] time 0.893 (0.921) data 0.000 (0.074) loss 1.0254 (0.7157) acc 68.7500 (80.6250) lr 1.2487e-03 eta 0:13:10 +epoch [23/50] batch [15/31] time 0.885 (0.911) data 0.000 (0.049) loss 0.6724 (0.7559) acc 84.3750 (80.0000) lr 1.2487e-03 eta 0:12:57 +epoch [23/50] batch [20/31] time 0.854 (0.905) data 0.000 (0.037) loss 1.5605 (0.8691) acc 71.8750 (78.7500) lr 1.2487e-03 eta 0:12:47 +epoch [23/50] batch [25/31] time 0.899 (0.899) data 0.000 (0.030) loss 0.5332 (0.8755) acc 90.6250 (79.3750) lr 1.2487e-03 eta 0:12:38 +epoch [23/50] batch [30/31] time 0.890 (0.896) data 0.000 (0.025) loss 0.8896 (0.8682) acc 78.1250 (79.2708) lr 1.2487e-03 eta 0:12:31 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,940 +* accuracy: 73.9% +* error: 26.1% +* macro_f1: 73.0% +epoch [24/50] batch [5/31] time 0.887 (0.964) data 0.000 (0.142) loss 1.0850 (0.9168) acc 71.8750 (76.2500) lr 1.1874e-03 eta 0:13:22 +epoch [24/50] batch [10/31] time 0.890 (0.921) data 0.000 (0.071) loss 1.2119 (0.9048) acc 78.1250 (78.1250) lr 1.1874e-03 eta 0:12:41 +epoch [24/50] batch [15/31] time 0.899 (0.923) data 0.000 (0.047) loss 0.9634 (0.9310) acc 75.0000 (78.5417) lr 1.1874e-03 eta 0:12:38 +epoch [24/50] batch [20/31] time 0.892 (0.911) data 0.000 (0.036) loss 1.0273 (0.9138) acc 68.7500 (78.4375) lr 1.1874e-03 eta 0:12:23 +epoch [24/50] batch [25/31] time 0.862 (0.902) data 0.000 (0.029) loss 0.6558 (0.8948) acc 84.3750 (79.2500) lr 1.1874e-03 eta 0:12:12 +epoch [24/50] batch [30/31] time 0.861 (0.897) data 0.000 (0.024) loss 1.0312 (0.8881) acc 71.8750 (79.2708) lr 1.1874e-03 eta 0:12:03 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,574 +* accuracy: 73.1% +* error: 26.9% +* macro_f1: 72.3% +epoch [25/50] batch [5/31] time 0.899 (0.947) data 0.000 (0.127) loss 0.9048 (0.7045) acc 75.0000 (83.7500) lr 1.1253e-03 eta 0:12:38 +epoch [25/50] batch [10/31] time 0.899 (0.917) data 0.000 (0.063) loss 0.9634 (0.7408) acc 87.5000 (85.3125) lr 1.1253e-03 eta 0:12:09 +epoch [25/50] batch [15/31] time 0.890 (0.909) data 0.000 (0.042) loss 0.3577 (0.7461) acc 93.7500 (83.5417) lr 1.1253e-03 eta 0:11:58 +epoch [25/50] batch [20/31] time 0.914 (0.908) data 0.000 (0.032) loss 1.5703 (0.8438) acc 65.6250 (80.7812) lr 1.1253e-03 eta 0:11:53 +epoch [25/50] batch [25/31] time 0.880 (0.903) data 0.000 (0.026) loss 0.6733 (0.8183) acc 81.2500 (80.8750) lr 1.1253e-03 eta 0:11:45 +epoch [25/50] batch [30/31] time 0.888 (0.899) data 0.000 (0.021) loss 0.7671 (0.8108) acc 84.3750 (80.7292) lr 1.1253e-03 eta 0:11:37 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,642 +* accuracy: 73.3% +* error: 26.7% +* macro_f1: 72.4% +epoch [26/50] batch [5/31] time 0.852 (0.968) data 0.000 (0.153) loss 0.7314 (0.7089) acc 84.3750 (82.5000) lr 1.0628e-03 eta 0:12:25 +epoch [26/50] batch [10/31] time 0.890 (0.923) data 0.000 (0.077) loss 1.0029 (0.7572) acc 81.2500 (81.5625) lr 1.0628e-03 eta 0:11:46 +epoch [26/50] batch [15/31] time 0.913 (0.912) data 0.000 (0.051) loss 1.1104 (0.8136) acc 71.8750 (80.8333) lr 1.0628e-03 eta 0:11:32 +epoch [26/50] batch [20/31] time 0.878 (0.905) data 0.000 (0.038) loss 1.1436 (0.8682) acc 71.8750 (79.5312) lr 1.0628e-03 eta 0:11:23 +epoch [26/50] batch [25/31] time 0.890 (0.901) data 0.000 (0.031) loss 0.5210 (0.8599) acc 90.6250 (79.8750) lr 1.0628e-03 eta 0:11:15 +epoch [26/50] batch [30/31] time 0.891 (0.900) data 0.000 (0.026) loss 0.9912 (0.8883) acc 87.5000 (79.3750) lr 1.0628e-03 eta 0:11:10 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,661 +* accuracy: 73.3% +* error: 26.7% +* macro_f1: 72.4% +epoch [27/50] batch [5/31] time 0.866 (0.964) data 0.000 (0.148) loss 0.8613 (0.8125) acc 87.5000 (80.6250) lr 1.0000e-03 eta 0:11:52 +epoch [27/50] batch [10/31] time 0.892 (0.927) data 0.000 (0.074) loss 1.7266 (1.0006) acc 71.8750 (78.1250) lr 1.0000e-03 eta 0:11:20 +epoch [27/50] batch [15/31] time 0.894 (0.912) data 0.000 (0.050) loss 0.8267 (0.9043) acc 78.1250 (79.1667) lr 1.0000e-03 eta 0:11:04 +epoch [27/50] batch [20/31] time 0.869 (0.905) data 0.000 (0.037) loss 0.6045 (0.8684) acc 84.3750 (78.9062) lr 1.0000e-03 eta 0:10:55 +epoch [27/50] batch [25/31] time 0.887 (0.902) data 0.000 (0.030) loss 0.7900 (0.8614) acc 78.1250 (79.2500) lr 1.0000e-03 eta 0:10:48 +epoch [27/50] batch [30/31] time 0.878 (0.901) data 0.000 (0.025) loss 1.1357 (0.8957) acc 75.0000 (78.5417) lr 1.0000e-03 eta 0:10:43 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,591 +* accuracy: 73.2% +* error: 26.8% +* macro_f1: 72.2% +epoch [28/50] batch [5/31] time 0.853 (0.937) data 0.000 (0.126) loss 0.4980 (0.7694) acc 90.6250 (83.7500) lr 9.3721e-04 eta 0:11:03 +epoch [28/50] batch [10/31] time 0.897 (0.911) data 0.000 (0.063) loss 0.4756 (0.6876) acc 87.5000 (84.3750) lr 9.3721e-04 eta 0:10:40 +epoch [28/50] batch [15/31] time 0.874 (0.903) data 0.000 (0.042) loss 0.5723 (0.7086) acc 87.5000 (84.3750) lr 9.3721e-04 eta 0:10:30 +epoch [28/50] batch [20/31] time 0.995 (0.908) data 0.000 (0.032) loss 0.7671 (0.7512) acc 81.2500 (83.4375) lr 9.3721e-04 eta 0:10:29 +epoch [28/50] batch [25/31] time 0.882 (0.903) data 0.000 (0.025) loss 0.6748 (0.7650) acc 81.2500 (83.1250) lr 9.3721e-04 eta 0:10:21 +epoch [28/50] batch [30/31] time 0.884 (0.902) data 0.000 (0.021) loss 1.1475 (0.7948) acc 75.0000 (81.7708) lr 9.3721e-04 eta 0:10:15 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,734 +* accuracy: 73.5% +* error: 26.5% +* macro_f1: 72.5% +epoch [29/50] batch [5/31] time 0.874 (0.943) data 0.000 (0.124) loss 0.9292 (0.6433) acc 84.3750 (85.0000) lr 8.7467e-04 eta 0:10:38 +epoch [29/50] batch [10/31] time 0.874 (0.909) data 0.000 (0.062) loss 1.0020 (0.6859) acc 78.1250 (82.8125) lr 8.7467e-04 eta 0:10:10 +epoch [29/50] batch [15/31] time 0.864 (0.899) data 0.000 (0.042) loss 0.6758 (0.6638) acc 87.5000 (83.7500) lr 8.7467e-04 eta 0:09:59 +epoch [29/50] batch [20/31] time 1.019 (0.900) data 0.000 (0.031) loss 0.5283 (0.6947) acc 87.5000 (82.5000) lr 8.7467e-04 eta 0:09:55 +epoch [29/50] batch [25/31] time 0.859 (0.897) data 0.000 (0.025) loss 1.1602 (0.7583) acc 78.1250 (81.6250) lr 8.7467e-04 eta 0:09:49 +epoch [29/50] batch [30/31] time 0.888 (0.896) data 0.000 (0.021) loss 1.0537 (0.8103) acc 71.8750 (79.7917) lr 8.7467e-04 eta 0:09:44 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,891 +* accuracy: 73.8% +* error: 26.2% +* macro_f1: 72.9% +epoch [30/50] batch [5/31] time 0.872 (0.933) data 0.000 (0.125) loss 0.8848 (0.7889) acc 78.1250 (80.0000) lr 8.1262e-04 eta 0:10:02 +epoch [30/50] batch [10/31] time 0.883 (0.913) data 0.000 (0.062) loss 0.4763 (0.7708) acc 84.3750 (81.2500) lr 8.1262e-04 eta 0:09:45 +epoch [30/50] batch [15/31] time 0.874 (0.906) data 0.000 (0.042) loss 0.6533 (0.7874) acc 84.3750 (81.2500) lr 8.1262e-04 eta 0:09:36 +epoch [30/50] batch [20/31] time 0.888 (0.900) data 0.000 (0.031) loss 0.7061 (0.8078) acc 81.2500 (80.3125) lr 8.1262e-04 eta 0:09:27 +epoch [30/50] batch [25/31] time 0.897 (0.901) data 0.000 (0.025) loss 0.6118 (0.8065) acc 78.1250 (79.8750) lr 8.1262e-04 eta 0:09:23 +epoch [30/50] batch [30/31] time 0.894 (0.898) data 0.000 (0.021) loss 1.1455 (0.7940) acc 71.8750 (80.5208) lr 8.1262e-04 eta 0:09:17 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,480 +* accuracy: 73.0% +* error: 27.0% +* macro_f1: 72.0% +epoch [31/50] batch [5/31] time 0.903 (0.951) data 0.000 (0.129) loss 0.8081 (0.8092) acc 81.2500 (80.0000) lr 7.5131e-04 eta 0:09:44 +epoch [31/50] batch [10/31] time 0.838 (0.912) data 0.000 (0.065) loss 0.6001 (0.6951) acc 81.2500 (82.5000) lr 7.5131e-04 eta 0:09:16 +epoch [31/50] batch [15/31] time 0.851 (0.896) data 0.000 (0.043) loss 0.5635 (0.7180) acc 90.6250 (82.5000) lr 7.5131e-04 eta 0:09:02 +epoch [31/50] batch [20/31] time 0.916 (0.892) data 0.000 (0.033) loss 1.0459 (0.8187) acc 71.8750 (80.3125) lr 7.5131e-04 eta 0:08:55 +epoch [31/50] batch [25/31] time 0.877 (0.889) data 0.000 (0.026) loss 1.0596 (0.8350) acc 81.2500 (80.1250) lr 7.5131e-04 eta 0:08:49 +epoch [31/50] batch [30/31] time 0.897 (0.889) data 0.000 (0.022) loss 1.1309 (0.8236) acc 71.8750 (80.1042) lr 7.5131e-04 eta 0:08:44 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,526 +* accuracy: 73.1% +* error: 26.9% +* macro_f1: 72.1% +epoch [32/50] batch [5/31] time 0.865 (0.941) data 0.000 (0.125) loss 0.9263 (0.8586) acc 81.2500 (81.2500) lr 6.9098e-04 eta 0:09:09 +epoch [32/50] batch [10/31] time 0.866 (0.905) data 0.000 (0.063) loss 0.6821 (0.7995) acc 87.5000 (80.6250) lr 6.9098e-04 eta 0:08:43 +epoch [32/50] batch [15/31] time 0.881 (0.898) data 0.000 (0.042) loss 0.8745 (0.7245) acc 78.1250 (81.4583) lr 6.9098e-04 eta 0:08:35 +epoch [32/50] batch [20/31] time 0.889 (0.893) data 0.000 (0.031) loss 1.3037 (0.7803) acc 78.1250 (80.9375) lr 6.9098e-04 eta 0:08:28 +epoch [32/50] batch [25/31] time 0.879 (0.891) data 0.000 (0.025) loss 1.2012 (0.7804) acc 78.1250 (80.8750) lr 6.9098e-04 eta 0:08:22 +epoch [32/50] batch [30/31] time 0.896 (0.891) data 0.000 (0.021) loss 0.3354 (0.7622) acc 90.6250 (81.5625) lr 6.9098e-04 eta 0:08:18 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,195 +* accuracy: 72.4% +* error: 27.6% +* macro_f1: 71.4% +epoch [33/50] batch [5/31] time 0.875 (0.951) data 0.000 (0.133) loss 0.7554 (0.8025) acc 90.6250 (82.5000) lr 6.3188e-04 eta 0:08:45 +epoch [33/50] batch [10/31] time 0.876 (0.917) data 0.000 (0.067) loss 1.0215 (0.7839) acc 78.1250 (82.1875) lr 6.3188e-04 eta 0:08:22 +epoch [33/50] batch [15/31] time 0.897 (0.908) data 0.000 (0.045) loss 0.4202 (0.7574) acc 93.7500 (82.5000) lr 6.3188e-04 eta 0:08:13 +epoch [33/50] batch [20/31] time 0.891 (0.902) data 0.000 (0.033) loss 0.7334 (0.7677) acc 81.2500 (82.5000) lr 6.3188e-04 eta 0:08:05 +epoch [33/50] batch [25/31] time 0.883 (0.900) data 0.000 (0.027) loss 0.6753 (0.7981) acc 78.1250 (81.8750) lr 6.3188e-04 eta 0:07:59 +epoch [33/50] batch [30/31] time 0.922 (0.899) data 0.000 (0.022) loss 0.3496 (0.7874) acc 93.7500 (82.1875) lr 6.3188e-04 eta 0:07:54 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,442 +* accuracy: 72.9% +* error: 27.1% +* macro_f1: 71.9% +epoch [34/50] batch [5/31] time 0.909 (0.952) data 0.000 (0.126) loss 0.8091 (0.7973) acc 78.1250 (81.2500) lr 5.7422e-04 eta 0:08:16 +epoch [34/50] batch [10/31] time 0.865 (0.914) data 0.001 (0.063) loss 0.3713 (0.7604) acc 93.7500 (84.0625) lr 5.7422e-04 eta 0:07:52 +epoch [34/50] batch [15/31] time 0.883 (0.903) data 0.000 (0.042) loss 0.5874 (0.7395) acc 90.6250 (85.0000) lr 5.7422e-04 eta 0:07:42 +epoch [34/50] batch [20/31] time 0.873 (0.895) data 0.000 (0.032) loss 0.9878 (0.7862) acc 68.7500 (81.8750) lr 5.7422e-04 eta 0:07:33 +epoch [34/50] batch [25/31] time 0.867 (0.892) data 0.000 (0.025) loss 0.9385 (0.8404) acc 78.1250 (80.7500) lr 5.7422e-04 eta 0:07:27 +epoch [34/50] batch [30/31] time 0.896 (0.891) data 0.000 (0.021) loss 0.3950 (0.8458) acc 90.6250 (80.9375) lr 5.7422e-04 eta 0:07:22 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,081 +* accuracy: 72.2% +* error: 27.8% +* macro_f1: 71.1% +epoch [35/50] batch [5/31] time 0.884 (0.945) data 0.000 (0.125) loss 0.8594 (0.7630) acc 71.8750 (79.3750) lr 5.1825e-04 eta 0:07:44 +epoch [35/50] batch [10/31] time 0.879 (0.915) data 0.000 (0.063) loss 1.2666 (0.8583) acc 75.0000 (79.0625) lr 5.1825e-04 eta 0:07:24 +epoch [35/50] batch [15/31] time 0.876 (0.906) data 0.000 (0.042) loss 0.8184 (0.8342) acc 68.7500 (79.1667) lr 5.1825e-04 eta 0:07:15 +epoch [35/50] batch [20/31] time 0.868 (0.902) data 0.000 (0.031) loss 0.8721 (0.8096) acc 75.0000 (79.6875) lr 5.1825e-04 eta 0:07:09 +epoch [35/50] batch [25/31] time 0.879 (0.895) data 0.000 (0.025) loss 0.7129 (0.8309) acc 81.2500 (79.7500) lr 5.1825e-04 eta 0:07:01 +epoch [35/50] batch [30/31] time 0.890 (0.897) data 0.000 (0.021) loss 0.6582 (0.7817) acc 78.1250 (80.8333) lr 5.1825e-04 eta 0:06:57 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,549 +* accuracy: 73.1% +* error: 26.9% +* macro_f1: 72.2% +epoch [36/50] batch [5/31] time 0.899 (0.953) data 0.000 (0.126) loss 0.9160 (0.8711) acc 81.2500 (81.2500) lr 4.6417e-04 eta 0:07:18 +epoch [36/50] batch [10/31] time 0.887 (0.917) data 0.000 (0.063) loss 0.4429 (0.7086) acc 87.5000 (83.7500) lr 4.6417e-04 eta 0:06:57 +epoch [36/50] batch [15/31] time 0.899 (0.909) data 0.001 (0.042) loss 0.9619 (0.7028) acc 81.2500 (84.1667) lr 4.6417e-04 eta 0:06:48 +epoch [36/50] batch [20/31] time 0.867 (0.901) data 0.000 (0.032) loss 0.7266 (0.7189) acc 90.6250 (84.5312) lr 4.6417e-04 eta 0:06:40 +epoch [36/50] batch [25/31] time 0.869 (0.896) data 0.000 (0.025) loss 0.6006 (0.7236) acc 84.3750 (84.5000) lr 4.6417e-04 eta 0:06:34 +epoch [36/50] batch [30/31] time 0.870 (0.895) data 0.000 (0.021) loss 0.9199 (0.7367) acc 87.5000 (84.1667) lr 4.6417e-04 eta 0:06:29 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,246 +* accuracy: 72.5% +* error: 27.5% +* macro_f1: 71.6% +epoch [37/50] batch [5/31] time 0.890 (0.945) data 0.000 (0.128) loss 1.0371 (0.7881) acc 75.0000 (85.0000) lr 4.1221e-04 eta 0:06:45 +epoch [37/50] batch [10/31] time 0.884 (0.912) data 0.000 (0.064) loss 0.7739 (0.8287) acc 90.6250 (83.4375) lr 4.1221e-04 eta 0:06:26 +epoch [37/50] batch [15/31] time 0.886 (0.900) data 0.000 (0.043) loss 0.4636 (0.7892) acc 84.3750 (82.0833) lr 4.1221e-04 eta 0:06:17 +epoch [37/50] batch [20/31] time 0.878 (0.899) data 0.000 (0.032) loss 0.4536 (0.7667) acc 87.5000 (82.6562) lr 4.1221e-04 eta 0:06:12 +epoch [37/50] batch [25/31] time 0.886 (0.896) data 0.000 (0.026) loss 1.1055 (0.7564) acc 71.8750 (82.2500) lr 4.1221e-04 eta 0:06:06 +epoch [37/50] batch [30/31] time 0.888 (0.893) data 0.000 (0.021) loss 0.7217 (0.8151) acc 84.3750 (80.9375) lr 4.1221e-04 eta 0:06:00 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,454 +* accuracy: 72.9% +* error: 27.1% +* macro_f1: 71.9% +epoch [38/50] batch [5/31] time 0.879 (0.934) data 0.000 (0.124) loss 0.6040 (0.7356) acc 84.3750 (80.0000) lr 3.6258e-04 eta 0:06:11 +epoch [38/50] batch [10/31] time 0.872 (0.902) data 0.000 (0.062) loss 1.0137 (0.7932) acc 81.2500 (80.9375) lr 3.6258e-04 eta 0:05:54 +epoch [38/50] batch [15/31] time 0.881 (0.892) data 0.000 (0.041) loss 0.9155 (0.7434) acc 75.0000 (82.0833) lr 3.6258e-04 eta 0:05:46 +epoch [38/50] batch [20/31] time 0.887 (0.891) data 0.000 (0.031) loss 0.6274 (0.7214) acc 78.1250 (82.0312) lr 3.6258e-04 eta 0:05:41 +epoch [38/50] batch [25/31] time 0.884 (0.892) data 0.000 (0.025) loss 0.5835 (0.7103) acc 87.5000 (82.1250) lr 3.6258e-04 eta 0:05:37 +epoch [38/50] batch [30/31] time 0.897 (0.890) data 0.000 (0.021) loss 0.5225 (0.7035) acc 81.2500 (82.2917) lr 3.6258e-04 eta 0:05:31 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,287 +* accuracy: 72.6% +* error: 27.4% +* macro_f1: 71.6% +epoch [39/50] batch [5/31] time 0.895 (0.956) data 0.000 (0.130) loss 1.1689 (0.9846) acc 75.0000 (80.0000) lr 3.1545e-04 eta 0:05:50 +epoch [39/50] batch [10/31] time 0.862 (0.920) data 0.000 (0.065) loss 0.4626 (0.8553) acc 84.3750 (80.6250) lr 3.1545e-04 eta 0:05:33 +epoch [39/50] batch [15/31] time 0.876 (0.906) data 0.000 (0.044) loss 0.7241 (0.8632) acc 81.2500 (80.0000) lr 3.1545e-04 eta 0:05:23 +epoch [39/50] batch [20/31] time 0.872 (0.900) data 0.000 (0.033) loss 0.7754 (0.8133) acc 78.1250 (81.0938) lr 3.1545e-04 eta 0:05:16 +epoch [39/50] batch [25/31] time 0.861 (0.894) data 0.000 (0.026) loss 0.5146 (0.7546) acc 84.3750 (82.2500) lr 3.1545e-04 eta 0:05:10 +epoch [39/50] batch [30/31] time 0.881 (0.890) data 0.000 (0.022) loss 0.6841 (0.7965) acc 84.3750 (82.0833) lr 3.1545e-04 eta 0:05:04 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,233 +* accuracy: 72.5% +* error: 27.5% +* macro_f1: 71.5% +epoch [40/50] batch [5/31] time 0.884 (0.961) data 0.000 (0.137) loss 0.4541 (0.9224) acc 90.6250 (82.5000) lr 2.7103e-04 eta 0:05:22 +epoch [40/50] batch [10/31] time 0.824 (0.914) data 0.000 (0.069) loss 1.0537 (0.8170) acc 71.8750 (82.5000) lr 2.7103e-04 eta 0:05:02 +epoch [40/50] batch [15/31] time 0.887 (0.915) data 0.000 (0.046) loss 1.0127 (0.8457) acc 87.5000 (81.6667) lr 2.7103e-04 eta 0:04:58 +epoch [40/50] batch [20/31] time 0.853 (0.907) data 0.000 (0.034) loss 0.5474 (0.8309) acc 81.2500 (81.8750) lr 2.7103e-04 eta 0:04:51 +epoch [40/50] batch [25/31] time 0.892 (0.901) data 0.000 (0.028) loss 0.9248 (0.8136) acc 75.0000 (81.8750) lr 2.7103e-04 eta 0:04:44 +epoch [40/50] batch [30/31] time 0.902 (0.898) data 0.000 (0.023) loss 0.5820 (0.8336) acc 81.2500 (81.5625) lr 2.7103e-04 eta 0:04:39 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,138 +* accuracy: 72.3% +* error: 27.7% +* macro_f1: 71.3% +epoch [41/50] batch [5/31] time 0.895 (0.957) data 0.000 (0.138) loss 0.4866 (0.7594) acc 75.0000 (80.0000) lr 2.2949e-04 eta 0:04:51 +epoch [41/50] batch [10/31] time 0.911 (0.921) data 0.000 (0.069) loss 0.7866 (0.7962) acc 78.1250 (79.3750) lr 2.2949e-04 eta 0:04:36 +epoch [41/50] batch [15/31] time 1.021 (0.918) data 0.000 (0.046) loss 0.4351 (0.7328) acc 87.5000 (81.2500) lr 2.2949e-04 eta 0:04:30 +epoch [41/50] batch [20/31] time 0.868 (0.907) data 0.000 (0.035) loss 0.4490 (0.7153) acc 84.3750 (81.7188) lr 2.2949e-04 eta 0:04:23 +epoch [41/50] batch [25/31] time 0.863 (0.899) data 0.000 (0.028) loss 1.2910 (0.7311) acc 75.0000 (82.0000) lr 2.2949e-04 eta 0:04:16 +epoch [41/50] batch [30/31] time 0.884 (0.896) data 0.000 (0.023) loss 0.5405 (0.7297) acc 78.1250 (81.7708) lr 2.2949e-04 eta 0:04:10 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,407 +* accuracy: 72.8% +* error: 27.2% +* macro_f1: 71.9% +epoch [42/50] batch [5/31] time 0.889 (0.956) data 0.000 (0.134) loss 1.4990 (0.6764) acc 78.1250 (85.6250) lr 1.9098e-04 eta 0:04:21 +epoch [42/50] batch [10/31] time 0.892 (0.918) data 0.000 (0.067) loss 1.0273 (0.7518) acc 71.8750 (83.1250) lr 1.9098e-04 eta 0:04:06 +epoch [42/50] batch [15/31] time 0.855 (0.901) data 0.000 (0.045) loss 0.7070 (0.7337) acc 84.3750 (83.9583) lr 1.9098e-04 eta 0:03:57 +epoch [42/50] batch [20/31] time 0.903 (0.906) data 0.000 (0.034) loss 0.6357 (0.7522) acc 81.2500 (83.4375) lr 1.9098e-04 eta 0:03:54 +epoch [42/50] batch [25/31] time 0.900 (0.903) data 0.000 (0.027) loss 0.8120 (0.7550) acc 84.3750 (83.0000) lr 1.9098e-04 eta 0:03:49 +epoch [42/50] batch [30/31] time 0.863 (0.897) data 0.000 (0.023) loss 0.9238 (0.7382) acc 75.0000 (83.0208) lr 1.9098e-04 eta 0:03:43 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,267 +* accuracy: 72.5% +* error: 27.5% +* macro_f1: 71.6% +epoch [43/50] batch [5/31] time 0.877 (0.956) data 0.000 (0.135) loss 0.5815 (0.6160) acc 78.1250 (85.6250) lr 1.5567e-04 eta 0:03:52 +epoch [43/50] batch [10/31] time 0.891 (0.916) data 0.000 (0.068) loss 0.4097 (0.7043) acc 93.7500 (84.6875) lr 1.5567e-04 eta 0:03:38 +epoch [43/50] batch [15/31] time 0.871 (0.902) data 0.000 (0.045) loss 0.7144 (0.7178) acc 78.1250 (84.7917) lr 1.5567e-04 eta 0:03:30 +epoch [43/50] batch [20/31] time 0.909 (0.898) data 0.000 (0.034) loss 1.0322 (0.7757) acc 71.8750 (83.1250) lr 1.5567e-04 eta 0:03:24 +epoch [43/50] batch [25/31] time 0.873 (0.895) data 0.000 (0.027) loss 0.8765 (0.7872) acc 81.2500 (82.2500) lr 1.5567e-04 eta 0:03:19 +epoch [43/50] batch [30/31] time 0.899 (0.896) data 0.000 (0.023) loss 0.6226 (0.7641) acc 84.3750 (82.7083) lr 1.5567e-04 eta 0:03:15 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,346 +* accuracy: 72.7% +* error: 27.3% +* macro_f1: 71.7% +epoch [44/50] batch [5/31] time 0.850 (0.937) data 0.000 (0.132) loss 0.9893 (0.7952) acc 78.1250 (84.3750) lr 1.2369e-04 eta 0:03:18 +epoch [44/50] batch [10/31] time 0.884 (0.911) data 0.000 (0.066) loss 0.7588 (0.7323) acc 84.3750 (84.6875) lr 1.2369e-04 eta 0:03:08 +epoch [44/50] batch [15/31] time 0.860 (0.902) data 0.000 (0.044) loss 0.4165 (0.6868) acc 90.6250 (84.7917) lr 1.2369e-04 eta 0:03:02 +epoch [44/50] batch [20/31] time 0.883 (0.894) data 0.000 (0.033) loss 0.5518 (0.6486) acc 87.5000 (85.9375) lr 1.2369e-04 eta 0:02:56 +epoch [44/50] batch [25/31] time 0.887 (0.898) data 0.000 (0.027) loss 0.7373 (0.6835) acc 81.2500 (84.8750) lr 1.2369e-04 eta 0:02:52 +epoch [44/50] batch [30/31] time 0.863 (0.894) data 0.000 (0.022) loss 0.6978 (0.7043) acc 84.3750 (84.4792) lr 1.2369e-04 eta 0:02:47 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,243 +* accuracy: 72.5% +* error: 27.5% +* macro_f1: 71.5% +epoch [45/50] batch [5/31] time 0.890 (0.953) data 0.000 (0.129) loss 0.4839 (0.6955) acc 84.3750 (83.1250) lr 9.5173e-05 eta 0:02:52 +epoch [45/50] batch [10/31] time 0.897 (0.917) data 0.000 (0.065) loss 0.5420 (0.6375) acc 87.5000 (84.6875) lr 9.5173e-05 eta 0:02:41 +epoch [45/50] batch [15/31] time 0.896 (0.912) data 0.000 (0.043) loss 1.1738 (0.6360) acc 81.2500 (85.6250) lr 9.5173e-05 eta 0:02:35 +epoch [45/50] batch [20/31] time 0.891 (0.906) data 0.000 (0.032) loss 1.3369 (0.7331) acc 71.8750 (84.0625) lr 9.5173e-05 eta 0:02:30 +epoch [45/50] batch [25/31] time 0.874 (0.906) data 0.000 (0.026) loss 0.7939 (0.7171) acc 84.3750 (84.3750) lr 9.5173e-05 eta 0:02:25 +epoch [45/50] batch [30/31] time 0.897 (0.902) data 0.000 (0.022) loss 0.7041 (0.7388) acc 75.0000 (83.8542) lr 9.5173e-05 eta 0:02:20 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,191 +* accuracy: 72.4% +* error: 27.6% +* macro_f1: 71.4% +epoch [46/50] batch [5/31] time 0.886 (0.942) data 0.000 (0.125) loss 0.7671 (0.7390) acc 78.1250 (82.5000) lr 7.0224e-05 eta 0:02:21 +epoch [46/50] batch [10/31] time 0.845 (0.908) data 0.000 (0.062) loss 0.7671 (0.6531) acc 87.5000 (85.3125) lr 7.0224e-05 eta 0:02:11 +epoch [46/50] batch [15/31] time 0.904 (0.904) data 0.000 (0.042) loss 0.4719 (0.6451) acc 87.5000 (85.4167) lr 7.0224e-05 eta 0:02:06 +epoch [46/50] batch [20/31] time 0.915 (0.903) data 0.000 (0.031) loss 0.6128 (0.7030) acc 84.3750 (84.3750) lr 7.0224e-05 eta 0:02:01 +epoch [46/50] batch [25/31] time 0.888 (0.904) data 0.000 (0.025) loss 0.6978 (0.6804) acc 81.2500 (85.1250) lr 7.0224e-05 eta 0:01:57 +epoch [46/50] batch [30/31] time 0.904 (0.901) data 0.000 (0.021) loss 0.7441 (0.7001) acc 81.2500 (84.4792) lr 7.0224e-05 eta 0:01:52 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,173 +* accuracy: 72.3% +* error: 27.7% +* macro_f1: 71.4% +epoch [47/50] batch [5/31] time 0.890 (0.946) data 0.000 (0.120) loss 0.9536 (0.7832) acc 71.8750 (81.2500) lr 4.8943e-05 eta 0:01:52 +epoch [47/50] batch [10/31] time 0.871 (0.915) data 0.000 (0.060) loss 0.7979 (0.6747) acc 75.0000 (83.1250) lr 4.8943e-05 eta 0:01:44 +epoch [47/50] batch [15/31] time 0.893 (0.903) data 0.000 (0.040) loss 1.1982 (0.6806) acc 84.3750 (84.3750) lr 4.8943e-05 eta 0:01:38 +epoch [47/50] batch [20/31] time 0.865 (0.894) data 0.000 (0.030) loss 0.4045 (0.6634) acc 81.2500 (84.2188) lr 4.8943e-05 eta 0:01:32 +epoch [47/50] batch [25/31] time 0.915 (0.892) data 0.000 (0.024) loss 0.4897 (0.6641) acc 93.7500 (84.2500) lr 4.8943e-05 eta 0:01:28 +epoch [47/50] batch [30/31] time 0.852 (0.888) data 0.000 (0.020) loss 0.5640 (0.6782) acc 93.7500 (84.4792) lr 4.8943e-05 eta 0:01:23 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,200 +* accuracy: 72.4% +* error: 27.6% +* macro_f1: 71.5% +epoch [48/50] batch [5/31] time 0.871 (0.949) data 0.001 (0.133) loss 0.6396 (0.7861) acc 93.7500 (86.2500) lr 3.1417e-05 eta 0:01:23 +epoch [48/50] batch [10/31] time 0.887 (0.922) data 0.000 (0.067) loss 1.0273 (0.7704) acc 75.0000 (85.0000) lr 3.1417e-05 eta 0:01:16 +epoch [48/50] batch [15/31] time 0.939 (0.915) data 0.000 (0.045) loss 0.4082 (0.7391) acc 96.8750 (85.0000) lr 3.1417e-05 eta 0:01:11 +epoch [48/50] batch [20/31] time 0.892 (0.905) data 0.000 (0.033) loss 0.4800 (0.6923) acc 90.6250 (85.0000) lr 3.1417e-05 eta 0:01:06 +epoch [48/50] batch [25/31] time 0.899 (0.901) data 0.000 (0.027) loss 0.4441 (0.6702) acc 90.6250 (85.3750) lr 3.1417e-05 eta 0:01:01 +epoch [48/50] batch [30/31] time 0.889 (0.902) data 0.000 (0.022) loss 0.9199 (0.7315) acc 75.0000 (83.5417) lr 3.1417e-05 eta 0:00:56 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,195 +* accuracy: 72.4% +* error: 27.6% +* macro_f1: 71.4% +epoch [49/50] batch [5/31] time 0.851 (0.930) data 0.000 (0.123) loss 0.8584 (0.6711) acc 78.1250 (76.8750) lr 1.7713e-05 eta 0:00:53 +epoch [49/50] batch [10/31] time 0.888 (0.921) data 0.000 (0.062) loss 0.3171 (0.6027) acc 87.5000 (80.9375) lr 1.7713e-05 eta 0:00:47 +epoch [49/50] batch [15/31] time 0.892 (0.908) data 0.000 (0.041) loss 0.9678 (0.6439) acc 84.3750 (81.6667) lr 1.7713e-05 eta 0:00:42 +epoch [49/50] batch [20/31] time 0.864 (0.903) data 0.000 (0.031) loss 0.7812 (0.6873) acc 84.3750 (82.3438) lr 1.7713e-05 eta 0:00:37 +epoch [49/50] batch [25/31] time 0.930 (0.902) data 0.000 (0.025) loss 0.4753 (0.6559) acc 93.7500 (83.0000) lr 1.7713e-05 eta 0:00:33 +epoch [49/50] batch [30/31] time 0.886 (0.899) data 0.000 (0.021) loss 0.5386 (0.6223) acc 84.3750 (83.6458) lr 1.7713e-05 eta 0:00:28 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,191 +* accuracy: 72.4% +* error: 27.6% +* macro_f1: 71.4% +epoch [50/50] batch [5/31] time 0.860 (0.936) data 0.000 (0.126) loss 0.7983 (0.7974) acc 78.1250 (81.2500) lr 7.8853e-06 eta 0:00:24 +epoch [50/50] batch [10/31] time 0.884 (0.905) data 0.000 (0.063) loss 0.4873 (0.6901) acc 93.7500 (84.0625) lr 7.8853e-06 eta 0:00:19 +epoch [50/50] batch [15/31] time 0.878 (0.896) data 0.000 (0.042) loss 0.4387 (0.6778) acc 87.5000 (85.2083) lr 7.8853e-06 eta 0:00:14 +epoch [50/50] batch [20/31] time 0.875 (0.894) data 0.000 (0.032) loss 0.4661 (0.7057) acc 87.5000 (84.8438) lr 7.8853e-06 eta 0:00:09 +epoch [50/50] batch [25/31] time 0.892 (0.893) data 0.000 (0.025) loss 1.0576 (0.7488) acc 75.0000 (84.0000) lr 7.8853e-06 eta 0:00:05 +epoch [50/50] batch [30/31] time 0.884 (0.892) data 0.000 (0.021) loss 0.8081 (0.7657) acc 78.1250 (83.5417) lr 7.8853e-06 eta 0:00:00 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,194 +* accuracy: 72.4% +* error: 27.6% +* macro_f1: 71.4% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 +Finish training +Deploy the model with the best val performance +Loading weights to prompt_learner from "output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model-best.pth.tar" (epoch = 9) +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 37,953 +* accuracy: 75.9% +* error: 24.1% +* macro_f1: 75.1% +Elapsed: 2:51:12 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint new file mode 100644 index 00000000..a9d493d3 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint @@ -0,0 +1 @@ +model.pth.tar-50 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model-best.pth.tar b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model-best.pth.tar new file mode 100644 index 00000000..bf5e4d6f Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model-best.pth.tar differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 new file mode 100644 index 00000000..6d35265f Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1699016832.ckb-gpu-a.2018357.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1699016832.ckb-gpu-a.2018357.0 new file mode 100644 index 00000000..3e816437 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1699016832.ckb-gpu-a.2018357.0 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed2/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed2/log.txt new file mode 100644 index 00000000..4565c873 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed2/log.txt @@ -0,0 +1,366 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_l14_bestval_ep50.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '1'] +output_dir: output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed2 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 2 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 1 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-L/14 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 50 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0 +Clang version: 10.0.0-4ubuntu1 +CMake version: version 3.16.3 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-166-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: NVIDIA A100-SXM4-40GB +GPU 1: NVIDIA A100-SXM4-40GB +GPU 2: NVIDIA A100-SXM4-40GB +GPU 3: NVIDIA A100-SXM4-40GB + +Nvidia driver version: 525.125.06 +cuDNN version: Probably one of the following: +/usr/lib/x86_64-linux-gnu/libcudnn.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.9.5 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 43 bits physical, 48 bits virtual +CPU(s): 256 +On-line CPU(s) list: 0-255 +Thread(s) per core: 2 +Core(s) per socket: 64 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: AuthenticAMD +CPU family: 23 +Model: 49 +Model name: AMD EPYC 7H12 64-Core Processor +Stepping: 0 +Frequency boost: enabled +CPU MHz: 2889.976 +CPU max MHz: 2600.0000 +CPU min MHz: 1500.0000 +BogoMIPS: 5200.20 +Virtualization: AMD-V +L1d cache: 4 MiB +L1i cache: 4 MiB +L2 cache: 64 MiB +L3 cache: 512 MiB +NUMA node0 CPU(s): 0-63,128-191 +NUMA node1 CPU(s): 64-127,192-255 +Vulnerability Gather data sampling: Not affected +Vulnerability Itlb multihit: Not affected +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Mmio stale data: Not affected +Vulnerability Retbleed: Vulnerable +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Retpolines, IBPB conditional, IBRS_FW, STIBP conditional, RSB filling, PBRSB-eIBRS Not affected +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Not affected +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca sme sev sev_es + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Loading preprocessed few-shot data from /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_1-seed_2.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 1,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-L/14) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed2/tensorboard) +epoch [1/50] batch [5/31] time 0.884 (1.769) data 0.000 (0.226) loss 2.6309 (3.1812) acc 50.0000 (38.1250) lr 1.0000e-05 eta 0:45:32 +epoch [1/50] batch [10/31] time 0.879 (1.321) data 0.000 (0.113) loss 2.1055 (2.9594) acc 53.1250 (41.8750) lr 1.0000e-05 eta 0:33:54 +epoch [1/50] batch [15/31] time 0.888 (1.179) data 0.000 (0.076) loss 2.1406 (2.7664) acc 68.7500 (45.2083) lr 1.0000e-05 eta 0:30:09 +epoch [1/50] batch [20/31] time 0.884 (1.103) data 0.000 (0.057) loss 1.6699 (2.6191) acc 50.0000 (46.4062) lr 1.0000e-05 eta 0:28:07 +epoch [1/50] batch [25/31] time 0.862 (1.059) data 0.000 (0.045) loss 2.6172 (2.5647) acc 50.0000 (47.8750) lr 1.0000e-05 eta 0:26:54 +epoch [1/50] batch [30/31] time 0.900 (1.030) data 0.000 (0.038) loss 1.8232 (2.4437) acc 59.3750 (49.8958) lr 1.0000e-05 eta 0:26:04 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 31,890 +* accuracy: 63.8% +* error: 36.2% +* macro_f1: 60.9% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar +epoch [2/50] batch [5/31] time 0.877 (0.943) data 0.000 (0.123) loss 1.2910 (1.8162) acc 78.1250 (63.7500) lr 2.0000e-03 eta 0:23:47 +epoch [2/50] batch [10/31] time 0.874 (0.905) data 0.000 (0.062) loss 1.1309 (1.6651) acc 68.7500 (64.3750) lr 2.0000e-03 eta 0:22:45 +epoch [2/50] batch [15/31] time 0.861 (0.896) data 0.000 (0.041) loss 1.0498 (1.5736) acc 81.2500 (64.7917) lr 2.0000e-03 eta 0:22:27 +epoch [2/50] batch [20/31] time 0.873 (0.893) data 0.000 (0.031) loss 1.8838 (1.5561) acc 62.5000 (65.9375) lr 2.0000e-03 eta 0:22:18 +epoch [2/50] batch [25/31] time 0.889 (0.892) data 0.000 (0.025) loss 1.1074 (1.5011) acc 71.8750 (66.5000) lr 2.0000e-03 eta 0:22:12 +epoch [2/50] batch [30/31] time 0.914 (0.895) data 0.000 (0.021) loss 1.1963 (1.4579) acc 68.7500 (66.6667) lr 2.0000e-03 eta 0:22:12 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 36,962 +* accuracy: 73.9% +* error: 26.1% +* macro_f1: 73.0% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar +epoch [3/50] batch [5/31] time 0.868 (0.934) data 0.000 (0.115) loss 1.2383 (1.4051) acc 62.5000 (61.8750) lr 1.9980e-03 eta 0:23:04 +epoch [3/50] batch [10/31] time 0.890 (0.905) data 0.000 (0.058) loss 1.6523 (1.3682) acc 62.5000 (65.0000) lr 1.9980e-03 eta 0:22:17 +epoch [3/50] batch [15/31] time 0.888 (0.897) data 0.000 (0.039) loss 1.5488 (1.2870) acc 65.6250 (67.0833) lr 1.9980e-03 eta 0:22:01 +epoch [3/50] batch [20/31] time 0.895 (0.893) data 0.000 (0.029) loss 1.2393 (1.2433) acc 56.2500 (67.1875) lr 1.9980e-03 eta 0:21:50 +epoch [3/50] batch [25/31] time 0.867 (0.892) data 0.000 (0.023) loss 1.1943 (1.3245) acc 62.5000 (65.6250) lr 1.9980e-03 eta 0:21:45 +epoch [3/50] batch [30/31] time 0.873 (0.889) data 0.000 (0.019) loss 1.1924 (1.3167) acc 62.5000 (66.3542) lr 1.9980e-03 eta 0:21:36 +Evaluate on the *val* set diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/checkpoint b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/checkpoint new file mode 100644 index 00000000..22cb2ffb --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/checkpoint @@ -0,0 +1 @@ +model-best.pth.tar diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar new file mode 100644 index 00000000..c25ec070 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model-best.pth.tar differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1699027126.ckb-gpu-a.2173477.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1699027126.ckb-gpu-a.2173477.0 new file mode 100644 index 00000000..b4768715 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_1shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1699027126.ckb-gpu-a.2173477.0 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed1/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed1/log.txt new file mode 100644 index 00000000..f070b51d --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed1/log.txt @@ -0,0 +1,338 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_l14_bestval_ep50.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '32'] +output_dir: output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed1 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 1 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 32 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-L/14 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 50 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0 +Clang version: Could not collect +CMake version: version 3.16.3 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-166-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: Tesla V100-PCIE-32GB +GPU 1: Tesla V100-PCIE-32GB + +Nvidia driver version: 470.223.02 +cuDNN version: /usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 46 bits physical, 48 bits virtual +CPU(s): 48 +On-line CPU(s) list: 0-47 +Thread(s) per core: 2 +Core(s) per socket: 12 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +CPU family: 6 +Model: 85 +Model name: Intel(R) Xeon(R) Silver 4116 CPU @ 2.10GHz +Stepping: 4 +CPU MHz: 800.167 +CPU max MHz: 3000.0000 +CPU min MHz: 800.0000 +BogoMIPS: 4200.00 +Virtualization: VT-x +L1d cache: 768 KiB +L1i cache: 768 KiB +L2 cache: 24 MiB +L3 cache: 33 MiB +NUMA node0 CPU(s): 0-11,24-35 +NUMA node1 CPU(s): 12-23,36-47 +Vulnerability Gather data sampling: Mitigation; Microcode +Vulnerability Itlb multihit: KVM: Mitigation: Split huge pages +Vulnerability L1tf: Mitigation; PTE Inversion; VMX conditional cache flushes, SMT vulnerable +Vulnerability Mds: Mitigation; Clear CPU buffers; SMT vulnerable +Vulnerability Meltdown: Mitigation; PTI +Vulnerability Mmio stale data: Mitigation; Clear CPU buffers; SMT vulnerable +Vulnerability Retbleed: Mitigation; IBRS +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; IBRS, IBPB conditional, STIBP conditional, RSB filling, PBRSB-eIBRS Not affected +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Mitigation; Clear CPU buffers; SMT vulnerable +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cdp_l3 invpcid_single pti intel_ppin ssbd mba ibrs ibpb stibp tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm mpx rdt_a avx512f avx512dq rdseed adx smap clflushopt clwb intel_pt avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts pku ospke md_clear flush_l1d arch_capabilities + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Creating a 32-shot dataset +Saving preprocessed few-shot data to /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_32-seed_1.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 32,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-L/14) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed1/tensorboard) +epoch [1/50] batch [5/1000] time 1.550 (2.862) data 0.000 (0.361) loss 2.7012 (3.1289) acc 34.3750 (37.5000) lr 1.0000e-05 eta 1 day, 15:45:01 +epoch [1/50] batch [10/1000] time 1.559 (2.210) data 0.000 (0.181) loss 2.9297 (3.1416) acc 37.5000 (37.1875) lr 1.0000e-05 eta 1 day, 6:40:59 +epoch [1/50] batch [15/1000] time 1.556 (1.992) data 0.000 (0.121) loss 2.2539 (2.8736) acc 56.2500 (41.0417) lr 1.0000e-05 eta 1 day, 3:39:47 +epoch [1/50] batch [20/1000] time 1.570 (1.885) data 0.000 (0.091) loss 2.3027 (2.6997) acc 50.0000 (44.3750) lr 1.0000e-05 eta 1 day, 2:10:15 +epoch [1/50] batch [25/1000] time 1.654 (1.828) data 0.001 (0.073) loss 1.8682 (2.5346) acc 62.5000 (46.8750) lr 1.0000e-05 eta 1 day, 1:22:15 +epoch [1/50] batch [30/1000] time 1.588 (1.796) data 0.000 (0.061) loss 2.0176 (2.4302) acc 46.8750 (48.3333) lr 1.0000e-05 eta 1 day, 0:55:49 +epoch [1/50] batch [35/1000] time 1.666 (1.769) data 0.000 (0.052) loss 2.1113 (2.3558) acc 43.7500 (49.1964) lr 1.0000e-05 eta 1 day, 0:33:33 +epoch [1/50] batch [40/1000] time 1.564 (1.743) data 0.000 (0.046) loss 1.4189 (2.2630) acc 62.5000 (51.0156) lr 1.0000e-05 eta 1 day, 0:11:17 +epoch [1/50] batch [45/1000] time 1.541 (1.722) data 0.000 (0.041) loss 1.5010 (2.2084) acc 62.5000 (51.9444) lr 1.0000e-05 eta 23:53:53 +epoch [1/50] batch [50/1000] time 1.558 (1.706) data 0.001 (0.037) loss 1.0537 (2.1655) acc 75.0000 (53.1875) lr 1.0000e-05 eta 23:40:00 +epoch [1/50] batch [55/1000] time 1.571 (1.692) data 0.000 (0.033) loss 1.7764 (2.1291) acc 59.3750 (53.8068) lr 1.0000e-05 eta 23:28:49 +epoch [1/50] batch [60/1000] time 1.581 (1.682) data 0.001 (0.031) loss 2.2754 (2.0985) acc 59.3750 (54.5312) lr 1.0000e-05 eta 23:20:05 +epoch [1/50] batch [65/1000] time 1.554 (1.673) data 0.001 (0.028) loss 2.0254 (2.0795) acc 53.1250 (55.2404) lr 1.0000e-05 eta 23:11:59 +epoch [1/50] batch [70/1000] time 1.675 (1.672) data 0.001 (0.026) loss 2.1621 (2.0580) acc 50.0000 (55.4464) lr 1.0000e-05 eta 23:11:12 +epoch [1/50] batch [75/1000] time 1.640 (1.667) data 0.001 (0.025) loss 1.5146 (2.0385) acc 62.5000 (55.7500) lr 1.0000e-05 eta 23:07:20 +epoch [1/50] batch [80/1000] time 1.629 (1.664) data 0.001 (0.023) loss 1.2988 (2.0008) acc 68.7500 (56.4062) lr 1.0000e-05 eta 23:04:22 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1699551792.ckb-gpu-v.mitre.org.246734.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1699551792.ckb-gpu-v.mitre.org.246734.0 new file mode 100644 index 00000000..ce7a7825 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1699551792.ckb-gpu-v.mitre.org.246734.0 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed2/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed2/log.txt new file mode 100644 index 00000000..80df5131 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed2/log.txt @@ -0,0 +1,497 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_l14_bestval_ep50.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '32'] +output_dir: output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed2 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 2 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 32 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-L/14 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 50 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0 +Clang version: Could not collect +CMake version: version 3.16.3 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-166-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: Tesla V100-PCIE-32GB +GPU 1: Tesla V100-PCIE-32GB + +Nvidia driver version: 470.223.02 +cuDNN version: /usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 46 bits physical, 48 bits virtual +CPU(s): 48 +On-line CPU(s) list: 0-47 +Thread(s) per core: 2 +Core(s) per socket: 12 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +CPU family: 6 +Model: 85 +Model name: Intel(R) Xeon(R) Silver 4116 CPU @ 2.10GHz +Stepping: 4 +CPU MHz: 800.458 +CPU max MHz: 3000.0000 +CPU min MHz: 800.0000 +BogoMIPS: 4200.00 +Virtualization: VT-x +L1d cache: 768 KiB +L1i cache: 768 KiB +L2 cache: 24 MiB +L3 cache: 33 MiB +NUMA node0 CPU(s): 0-11,24-35 +NUMA node1 CPU(s): 12-23,36-47 +Vulnerability Gather data sampling: Mitigation; Microcode +Vulnerability Itlb multihit: KVM: Mitigation: Split huge pages +Vulnerability L1tf: Mitigation; PTE Inversion; VMX conditional cache flushes, SMT vulnerable +Vulnerability Mds: Mitigation; Clear CPU buffers; SMT vulnerable +Vulnerability Meltdown: Mitigation; PTI +Vulnerability Mmio stale data: Mitigation; Clear CPU buffers; SMT vulnerable +Vulnerability Retbleed: Mitigation; IBRS +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; IBRS, IBPB conditional, STIBP conditional, RSB filling, PBRSB-eIBRS Not affected +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Mitigation; Clear CPU buffers; SMT vulnerable +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cdp_l3 invpcid_single pti intel_ppin ssbd mba ibrs ibpb stibp tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm mpx rdt_a avx512f avx512dq rdseed adx smap clflushopt clwb intel_pt avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts pku ospke md_clear flush_l1d arch_capabilities + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Creating a 32-shot dataset +Saving preprocessed few-shot data to /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_32-seed_2.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 32,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-L/14) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed2/tensorboard) +epoch [1/50] batch [5/1000] time 1.552 (2.862) data 0.001 (0.331) loss 3.2129 (3.3297) acc 31.2500 (33.1250) lr 1.0000e-05 eta 1 day, 15:44:43 +epoch [1/50] batch [10/1000] time 1.577 (2.216) data 0.000 (0.166) loss 2.3770 (2.8386) acc 46.8750 (41.2500) lr 1.0000e-05 eta 1 day, 6:45:59 +epoch [1/50] batch [15/1000] time 1.538 (1.995) data 0.000 (0.111) loss 2.6895 (2.7644) acc 37.5000 (42.2917) lr 1.0000e-05 eta 1 day, 3:41:57 +epoch [1/50] batch [20/1000] time 1.542 (1.884) data 0.001 (0.083) loss 1.8750 (2.6117) acc 62.5000 (45.1562) lr 1.0000e-05 eta 1 day, 2:09:33 +epoch [1/50] batch [25/1000] time 1.543 (1.818) data 0.000 (0.067) loss 2.3906 (2.4859) acc 37.5000 (46.6250) lr 1.0000e-05 eta 1 day, 1:14:08 +epoch [1/50] batch [30/1000] time 1.568 (1.775) data 0.001 (0.056) loss 1.8633 (2.4151) acc 59.3750 (48.0208) lr 1.0000e-05 eta 1 day, 0:37:55 +epoch [1/50] batch [35/1000] time 1.540 (1.743) data 0.000 (0.048) loss 1.5547 (2.3304) acc 65.6250 (49.4643) lr 1.0000e-05 eta 1 day, 0:11:31 +epoch [1/50] batch [40/1000] time 1.581 (1.721) data 0.000 (0.042) loss 2.7773 (2.3015) acc 43.7500 (50.3906) lr 1.0000e-05 eta 23:53:04 +epoch [1/50] batch [45/1000] time 1.539 (1.704) data 0.000 (0.037) loss 2.4551 (2.2919) acc 50.0000 (50.9722) lr 1.0000e-05 eta 23:38:21 +epoch [1/50] batch [50/1000] time 1.550 (1.688) data 0.000 (0.033) loss 2.3320 (2.2593) acc 50.0000 (51.4375) lr 1.0000e-05 eta 23:25:35 +epoch [1/50] batch [55/1000] time 1.539 (1.676) data 0.001 (0.030) loss 1.6865 (2.2085) acc 50.0000 (52.2727) lr 1.0000e-05 eta 23:15:05 +epoch [1/50] batch [60/1000] time 1.548 (1.665) data 0.001 (0.028) loss 1.9121 (2.1947) acc 59.3750 (52.6042) lr 1.0000e-05 eta 23:06:14 +epoch [1/50] batch [65/1000] time 1.578 (1.658) data 0.001 (0.026) loss 1.8125 (2.1728) acc 59.3750 (52.9327) lr 1.0000e-05 eta 23:00:02 +epoch [1/50] batch [70/1000] time 1.579 (1.652) data 0.000 (0.024) loss 1.4854 (2.1328) acc 62.5000 (53.4821) lr 1.0000e-05 eta 22:54:34 +epoch [1/50] batch [75/1000] time 1.586 (1.646) data 0.001 (0.023) loss 1.6553 (2.1092) acc 56.2500 (53.7083) lr 1.0000e-05 eta 22:49:52 +epoch [1/50] batch [80/1000] time 1.546 (1.640) data 0.000 (0.021) loss 1.9111 (2.0955) acc 56.2500 (54.0234) lr 1.0000e-05 eta 22:44:49 +epoch [1/50] batch [85/1000] time 1.554 (1.635) data 0.001 (0.020) loss 1.5283 (2.0585) acc 68.7500 (54.6324) lr 1.0000e-05 eta 22:40:25 +epoch [1/50] batch [90/1000] time 1.561 (1.631) data 0.000 (0.019) loss 1.3135 (2.0386) acc 62.5000 (55.0694) lr 1.0000e-05 eta 22:36:51 +epoch [1/50] batch [95/1000] time 1.569 (1.627) data 0.001 (0.018) loss 1.4170 (2.0144) acc 68.7500 (55.7237) lr 1.0000e-05 eta 22:33:31 +epoch [1/50] batch [100/1000] time 1.577 (1.624) data 0.000 (0.017) loss 2.1562 (2.0048) acc 56.2500 (56.0938) lr 1.0000e-05 eta 22:30:28 +epoch [1/50] batch [105/1000] time 1.602 (1.622) data 0.000 (0.016) loss 1.7061 (1.9873) acc 53.1250 (56.3690) lr 1.0000e-05 eta 22:28:47 +epoch [1/50] batch [110/1000] time 1.578 (1.620) data 0.000 (0.015) loss 2.2207 (1.9801) acc 56.2500 (56.5341) lr 1.0000e-05 eta 22:26:37 +epoch [1/50] batch [115/1000] time 1.555 (1.617) data 0.000 (0.015) loss 1.6680 (1.9705) acc 62.5000 (56.6848) lr 1.0000e-05 eta 22:24:26 +epoch [1/50] batch [120/1000] time 1.547 (1.615) data 0.000 (0.014) loss 2.1875 (1.9721) acc 59.3750 (56.7188) lr 1.0000e-05 eta 22:22:11 +epoch [1/50] batch [125/1000] time 1.550 (1.615) data 0.000 (0.014) loss 1.1270 (1.9525) acc 71.8750 (57.1750) lr 1.0000e-05 eta 22:22:23 +epoch [1/50] batch [130/1000] time 1.555 (1.613) data 0.000 (0.013) loss 1.7090 (1.9327) acc 59.3750 (57.5962) lr 1.0000e-05 eta 22:20:35 +epoch [1/50] batch [135/1000] time 1.563 (1.611) data 0.001 (0.013) loss 2.1484 (1.9290) acc 50.0000 (57.5926) lr 1.0000e-05 eta 22:18:41 +epoch [1/50] batch [140/1000] time 1.561 (1.609) data 0.000 (0.012) loss 1.3672 (1.9224) acc 65.6250 (57.7902) lr 1.0000e-05 eta 22:17:07 +epoch [1/50] batch [145/1000] time 1.542 (1.607) data 0.001 (0.012) loss 1.0801 (1.9010) acc 71.8750 (58.1681) lr 1.0000e-05 eta 22:15:38 +epoch [1/50] batch [150/1000] time 1.580 (1.606) data 0.000 (0.011) loss 1.8027 (1.8871) acc 50.0000 (58.3333) lr 1.0000e-05 eta 22:14:19 +epoch [1/50] batch [155/1000] time 1.542 (1.605) data 0.000 (0.011) loss 2.1543 (1.8814) acc 56.2500 (58.5081) lr 1.0000e-05 eta 22:12:57 +epoch [1/50] batch [160/1000] time 1.561 (1.603) data 0.000 (0.011) loss 1.5566 (1.8838) acc 68.7500 (58.5352) lr 1.0000e-05 eta 22:11:45 +epoch [1/50] batch [165/1000] time 1.564 (1.602) data 0.000 (0.010) loss 2.5508 (1.8829) acc 56.2500 (58.6364) lr 1.0000e-05 eta 22:10:26 +epoch [1/50] batch [170/1000] time 1.550 (1.600) data 0.000 (0.010) loss 1.3594 (1.8768) acc 65.6250 (58.7132) lr 1.0000e-05 eta 22:09:05 +epoch [1/50] batch [175/1000] time 1.553 (1.599) data 0.001 (0.010) loss 1.7432 (1.8719) acc 59.3750 (58.8750) lr 1.0000e-05 eta 22:08:02 +epoch [1/50] batch [180/1000] time 1.541 (1.598) data 0.000 (0.010) loss 1.3662 (1.8629) acc 59.3750 (58.9931) lr 1.0000e-05 eta 22:06:44 +epoch [1/50] batch [185/1000] time 1.550 (1.597) data 0.000 (0.009) loss 1.6924 (1.8545) acc 56.2500 (59.0541) lr 1.0000e-05 eta 22:05:52 +epoch [1/50] batch [190/1000] time 1.601 (1.596) data 0.001 (0.009) loss 1.4004 (1.8490) acc 75.0000 (59.1941) lr 1.0000e-05 eta 22:05:03 +epoch [1/50] batch [195/1000] time 1.532 (1.595) data 0.000 (0.009) loss 1.7549 (1.8426) acc 56.2500 (59.3109) lr 1.0000e-05 eta 22:04:00 +epoch [1/50] batch [200/1000] time 1.541 (1.594) data 0.001 (0.009) loss 1.6318 (1.8367) acc 59.3750 (59.3281) lr 1.0000e-05 eta 22:03:09 +epoch [1/50] batch [205/1000] time 1.558 (1.593) data 0.000 (0.009) loss 1.2705 (1.8298) acc 68.7500 (59.4512) lr 1.0000e-05 eta 22:02:10 +epoch [1/50] batch [210/1000] time 1.571 (1.592) data 0.000 (0.008) loss 1.4170 (1.8193) acc 68.7500 (59.6726) lr 1.0000e-05 eta 22:01:26 +epoch [1/50] batch [215/1000] time 1.565 (1.592) data 0.000 (0.008) loss 1.5020 (1.8125) acc 65.6250 (59.8256) lr 1.0000e-05 eta 22:00:50 +epoch [1/50] batch [220/1000] time 1.552 (1.591) data 0.001 (0.008) loss 1.7783 (1.8046) acc 50.0000 (59.9148) lr 1.0000e-05 eta 21:59:52 +epoch [1/50] batch [225/1000] time 1.554 (1.590) data 0.000 (0.008) loss 1.2324 (1.7945) acc 65.6250 (60.0972) lr 1.0000e-05 eta 21:59:23 +epoch [1/50] batch [230/1000] time 1.547 (1.590) data 0.000 (0.008) loss 2.1328 (1.7924) acc 68.7500 (60.2717) lr 1.0000e-05 eta 21:59:13 +epoch [1/50] batch [235/1000] time 1.563 (1.590) data 0.001 (0.007) loss 1.2344 (1.7844) acc 68.7500 (60.3191) lr 1.0000e-05 eta 21:58:25 +epoch [1/50] batch [240/1000] time 1.554 (1.589) data 0.000 (0.007) loss 1.3184 (1.7810) acc 65.6250 (60.2995) lr 1.0000e-05 eta 21:57:51 +epoch [1/50] batch [245/1000] time 1.554 (1.588) data 0.000 (0.007) loss 1.6240 (1.7714) acc 65.6250 (60.4592) lr 1.0000e-05 eta 21:57:10 +epoch [1/50] batch [250/1000] time 1.592 (1.588) data 0.000 (0.007) loss 1.5693 (1.7690) acc 56.2500 (60.5000) lr 1.0000e-05 eta 21:56:42 +epoch [1/50] batch [255/1000] time 1.552 (1.588) data 0.000 (0.007) loss 1.1299 (1.7669) acc 75.0000 (60.5637) lr 1.0000e-05 eta 21:56:16 +epoch [1/50] batch [260/1000] time 1.564 (1.587) data 0.000 (0.007) loss 1.7207 (1.7641) acc 59.3750 (60.5769) lr 1.0000e-05 eta 21:55:44 +epoch [1/50] batch [265/1000] time 1.565 (1.587) data 0.000 (0.007) loss 1.6064 (1.7601) acc 62.5000 (60.7075) lr 1.0000e-05 eta 21:55:14 +epoch [1/50] batch [270/1000] time 1.546 (1.586) data 0.000 (0.007) loss 1.7627 (1.7612) acc 71.8750 (60.7523) lr 1.0000e-05 eta 21:54:37 +epoch [1/50] batch [275/1000] time 1.538 (1.586) data 0.000 (0.006) loss 1.8115 (1.7601) acc 65.6250 (60.7955) lr 1.0000e-05 eta 21:54:21 +epoch [1/50] batch [280/1000] time 1.562 (1.585) data 0.000 (0.006) loss 1.1152 (1.7534) acc 59.3750 (60.8705) lr 1.0000e-05 eta 21:53:46 +epoch [1/50] batch [285/1000] time 1.570 (1.585) data 0.000 (0.006) loss 1.2490 (1.7483) acc 71.8750 (60.8991) lr 1.0000e-05 eta 21:53:14 +epoch [1/50] batch [290/1000] time 1.535 (1.584) data 0.000 (0.006) loss 1.1660 (1.7450) acc 71.8750 (60.9698) lr 1.0000e-05 eta 21:52:37 +epoch [1/50] batch [295/1000] time 1.565 (1.584) data 0.000 (0.006) loss 2.1406 (1.7394) acc 59.3750 (61.1017) lr 1.0000e-05 eta 21:52:03 +epoch [1/50] batch [300/1000] time 1.546 (1.583) data 0.001 (0.006) loss 1.2344 (1.7353) acc 65.6250 (61.1979) lr 1.0000e-05 eta 21:51:33 +epoch [1/50] batch [305/1000] time 1.558 (1.583) data 0.001 (0.006) loss 1.6748 (1.7296) acc 56.2500 (61.3320) lr 1.0000e-05 eta 21:51:05 +epoch [1/50] batch [310/1000] time 1.580 (1.583) data 0.001 (0.006) loss 1.8223 (1.7235) acc 56.2500 (61.3609) lr 1.0000e-05 eta 21:50:42 +epoch [1/50] batch [315/1000] time 1.711 (1.583) data 0.000 (0.006) loss 1.2598 (1.7246) acc 75.0000 (61.3591) lr 1.0000e-05 eta 21:50:33 +epoch [1/50] batch [320/1000] time 1.565 (1.582) data 0.000 (0.006) loss 1.4336 (1.7220) acc 75.0000 (61.4062) lr 1.0000e-05 eta 21:50:14 +epoch [1/50] batch [325/1000] time 1.560 (1.582) data 0.000 (0.006) loss 1.5820 (1.7235) acc 71.8750 (61.3942) lr 1.0000e-05 eta 21:49:53 +epoch [1/50] batch [330/1000] time 1.559 (1.582) data 0.001 (0.005) loss 1.0791 (1.7210) acc 78.1250 (61.4489) lr 1.0000e-05 eta 21:49:21 +epoch [1/50] batch [335/1000] time 1.557 (1.581) data 0.000 (0.005) loss 1.3477 (1.7146) acc 59.3750 (61.5205) lr 1.0000e-05 eta 21:48:57 +epoch [1/50] batch [340/1000] time 1.539 (1.581) data 0.000 (0.005) loss 1.6904 (1.7100) acc 62.5000 (61.5993) lr 1.0000e-05 eta 21:48:24 +epoch [1/50] batch [345/1000] time 1.550 (1.580) data 0.000 (0.005) loss 1.2930 (1.7045) acc 68.7500 (61.7391) lr 1.0000e-05 eta 21:47:46 +epoch [1/50] batch [350/1000] time 1.544 (1.580) data 0.000 (0.005) loss 1.4277 (1.7008) acc 65.6250 (61.7857) lr 1.0000e-05 eta 21:47:11 +epoch [1/50] batch [355/1000] time 1.549 (1.579) data 0.000 (0.005) loss 1.7012 (1.6969) acc 46.8750 (61.7958) lr 1.0000e-05 eta 21:46:46 +epoch [1/50] batch [360/1000] time 1.574 (1.579) data 0.000 (0.005) loss 1.2656 (1.6946) acc 71.8750 (61.8056) lr 1.0000e-05 eta 21:46:24 +epoch [1/50] batch [365/1000] time 1.575 (1.579) data 0.000 (0.005) loss 1.6045 (1.6914) acc 50.0000 (61.8236) lr 1.0000e-05 eta 21:46:07 +epoch [1/50] batch [370/1000] time 1.540 (1.579) data 0.001 (0.005) loss 1.3740 (1.6903) acc 78.1250 (61.8497) lr 1.0000e-05 eta 21:45:44 +epoch [1/50] batch [375/1000] time 1.552 (1.578) data 0.000 (0.005) loss 1.1006 (1.6865) acc 71.8750 (61.9250) lr 1.0000e-05 eta 21:45:25 +epoch [1/50] batch [380/1000] time 1.561 (1.578) data 0.000 (0.005) loss 2.2461 (1.6861) acc 43.7500 (61.8997) lr 1.0000e-05 eta 21:45:24 +epoch [1/50] batch [385/1000] time 1.536 (1.578) data 0.000 (0.005) loss 2.0078 (1.6851) acc 59.3750 (61.8506) lr 1.0000e-05 eta 21:44:57 +epoch [1/50] batch [390/1000] time 1.542 (1.578) data 0.000 (0.005) loss 1.7256 (1.6822) acc 56.2500 (61.9151) lr 1.0000e-05 eta 21:44:32 +epoch [1/50] batch [395/1000] time 1.565 (1.578) data 0.000 (0.005) loss 1.2822 (1.6798) acc 71.8750 (61.9778) lr 1.0000e-05 eta 21:44:15 +epoch [1/50] batch [400/1000] time 1.576 (1.577) data 0.000 (0.005) loss 1.3887 (1.6733) acc 78.1250 (62.1094) lr 1.0000e-05 eta 21:43:57 +epoch [1/50] batch [405/1000] time 1.567 (1.577) data 0.001 (0.005) loss 1.0195 (1.6718) acc 65.6250 (62.1065) lr 1.0000e-05 eta 21:43:40 +epoch [1/50] batch [410/1000] time 1.569 (1.577) data 0.001 (0.004) loss 0.8125 (1.6669) acc 75.0000 (62.1951) lr 1.0000e-05 eta 21:43:24 +epoch [1/50] batch [415/1000] time 1.572 (1.577) data 0.000 (0.004) loss 1.6006 (1.6648) acc 59.3750 (62.2289) lr 1.0000e-05 eta 21:43:05 +epoch [1/50] batch [420/1000] time 1.588 (1.577) data 0.000 (0.004) loss 1.3242 (1.6677) acc 65.6250 (62.1875) lr 1.0000e-05 eta 21:42:48 +epoch [1/50] batch [425/1000] time 1.548 (1.577) data 0.001 (0.004) loss 2.1562 (1.6683) acc 59.3750 (62.1765) lr 1.0000e-05 eta 21:42:56 +epoch [1/50] batch [430/1000] time 1.573 (1.577) data 0.000 (0.004) loss 1.2793 (1.6651) acc 65.6250 (62.2238) lr 1.0000e-05 eta 21:42:42 +epoch [1/50] batch [435/1000] time 1.578 (1.577) data 0.000 (0.004) loss 1.6211 (1.6635) acc 62.5000 (62.2126) lr 1.0000e-05 eta 21:42:19 +epoch [1/50] batch [440/1000] time 1.559 (1.576) data 0.000 (0.004) loss 1.8457 (1.6627) acc 62.5000 (62.2301) lr 1.0000e-05 eta 21:42:02 +epoch [1/50] batch [445/1000] time 1.554 (1.576) data 0.000 (0.004) loss 1.1553 (1.6603) acc 68.7500 (62.2612) lr 1.0000e-05 eta 21:41:43 +epoch [1/50] batch [450/1000] time 1.545 (1.576) data 0.000 (0.004) loss 1.3271 (1.6583) acc 59.3750 (62.2917) lr 1.0000e-05 eta 21:41:27 +epoch [1/50] batch [455/1000] time 1.540 (1.576) data 0.000 (0.004) loss 1.7930 (1.6536) acc 62.5000 (62.4038) lr 1.0000e-05 eta 21:41:09 +epoch [1/50] batch [460/1000] time 1.580 (1.576) data 0.000 (0.004) loss 1.6719 (1.6530) acc 62.5000 (62.3981) lr 1.0000e-05 eta 21:40:55 +epoch [1/50] batch [465/1000] time 1.539 (1.575) data 0.000 (0.004) loss 1.1094 (1.6495) acc 78.1250 (62.4597) lr 1.0000e-05 eta 21:40:38 +epoch [1/50] batch [470/1000] time 1.558 (1.575) data 0.000 (0.004) loss 1.1875 (1.6469) acc 75.0000 (62.5266) lr 1.0000e-05 eta 21:40:34 +epoch [1/50] batch [475/1000] time 1.553 (1.575) data 0.001 (0.004) loss 1.7500 (1.6480) acc 65.6250 (62.5066) lr 1.0000e-05 eta 21:40:21 +epoch [1/50] batch [480/1000] time 1.525 (1.575) data 0.000 (0.004) loss 0.8691 (1.6454) acc 75.0000 (62.5456) lr 1.0000e-05 eta 21:40:05 +epoch [1/50] batch [485/1000] time 1.581 (1.575) data 0.000 (0.004) loss 0.9414 (1.6411) acc 68.7500 (62.5966) lr 1.0000e-05 eta 21:39:52 +epoch [1/50] batch [490/1000] time 1.567 (1.575) data 0.000 (0.004) loss 1.5732 (1.6404) acc 59.3750 (62.6212) lr 1.0000e-05 eta 21:39:40 +epoch [1/50] batch [495/1000] time 1.540 (1.575) data 0.000 (0.004) loss 1.2236 (1.6378) acc 71.8750 (62.6831) lr 1.0000e-05 eta 21:39:24 +epoch [1/50] batch [500/1000] time 1.540 (1.575) data 0.000 (0.004) loss 1.6729 (1.6359) acc 65.6250 (62.7375) lr 1.0000e-05 eta 21:39:11 +epoch [1/50] batch [505/1000] time 1.582 (1.575) data 0.000 (0.004) loss 1.1504 (1.6365) acc 75.0000 (62.7351) lr 1.0000e-05 eta 21:38:59 +epoch [1/50] batch [510/1000] time 1.570 (1.575) data 0.000 (0.004) loss 1.2139 (1.6324) acc 68.7500 (62.8125) lr 1.0000e-05 eta 21:38:48 +epoch [1/50] batch [515/1000] time 1.543 (1.574) data 0.001 (0.004) loss 1.9238 (1.6321) acc 62.5000 (62.7973) lr 1.0000e-05 eta 21:38:33 +epoch [1/50] batch [520/1000] time 1.557 (1.574) data 0.000 (0.004) loss 0.9771 (1.6294) acc 68.7500 (62.8185) lr 1.0000e-05 eta 21:38:20 +epoch [1/50] batch [525/1000] time 1.592 (1.574) data 0.000 (0.004) loss 1.4082 (1.6278) acc 65.6250 (62.8393) lr 1.0000e-05 eta 21:38:07 +epoch [1/50] batch [530/1000] time 1.590 (1.575) data 0.000 (0.004) loss 1.3096 (1.6234) acc 53.1250 (62.8950) lr 1.0000e-05 eta 21:38:11 +epoch [1/50] batch [535/1000] time 1.570 (1.574) data 0.000 (0.004) loss 0.9595 (1.6194) acc 75.0000 (62.9614) lr 1.0000e-05 eta 21:38:00 +epoch [1/50] batch [540/1000] time 1.559 (1.574) data 0.000 (0.003) loss 1.7139 (1.6180) acc 68.7500 (62.9745) lr 1.0000e-05 eta 21:37:42 +epoch [1/50] batch [545/1000] time 1.559 (1.574) data 0.001 (0.003) loss 2.2793 (1.6201) acc 50.0000 (62.9415) lr 1.0000e-05 eta 21:37:23 +epoch [1/50] batch [550/1000] time 1.555 (1.574) data 0.001 (0.003) loss 1.7939 (1.6202) acc 56.2500 (62.9432) lr 1.0000e-05 eta 21:37:10 +epoch [1/50] batch [555/1000] time 1.558 (1.574) data 0.000 (0.003) loss 1.1484 (1.6173) acc 81.2500 (63.0462) lr 1.0000e-05 eta 21:36:56 +epoch [1/50] batch [560/1000] time 1.566 (1.574) data 0.000 (0.003) loss 1.7490 (1.6158) acc 65.6250 (63.0804) lr 1.0000e-05 eta 21:36:42 +epoch [1/50] batch [565/1000] time 1.550 (1.574) data 0.000 (0.003) loss 1.4502 (1.6132) acc 68.7500 (63.1471) lr 1.0000e-05 eta 21:36:26 +epoch [1/50] batch [570/1000] time 1.555 (1.573) data 0.000 (0.003) loss 1.3955 (1.6107) acc 65.6250 (63.2072) lr 1.0000e-05 eta 21:36:12 +epoch [1/50] batch [575/1000] time 1.538 (1.573) data 0.000 (0.003) loss 1.5273 (1.6103) acc 65.6250 (63.2228) lr 1.0000e-05 eta 21:36:05 +epoch [1/50] batch [580/1000] time 1.559 (1.573) data 0.000 (0.003) loss 0.7612 (1.6076) acc 78.1250 (63.2651) lr 1.0000e-05 eta 21:35:51 +epoch [1/50] batch [585/1000] time 1.559 (1.573) data 0.000 (0.003) loss 1.6611 (1.6056) acc 56.2500 (63.2585) lr 1.0000e-05 eta 21:35:33 +epoch [1/50] batch [590/1000] time 1.538 (1.573) data 0.001 (0.003) loss 1.0967 (1.6036) acc 71.8750 (63.2998) lr 1.0000e-05 eta 21:35:18 +epoch [1/50] batch [595/1000] time 1.579 (1.573) data 0.000 (0.003) loss 1.7236 (1.6007) acc 46.8750 (63.2931) lr 1.0000e-05 eta 21:35:05 +epoch [1/50] batch [600/1000] time 1.535 (1.573) data 0.001 (0.003) loss 1.2207 (1.5982) acc 75.0000 (63.3438) lr 1.0000e-05 eta 21:34:50 +epoch [1/50] batch [605/1000] time 1.563 (1.573) data 0.001 (0.003) loss 2.3047 (1.5964) acc 53.1250 (63.4039) lr 1.0000e-05 eta 21:34:40 +epoch [1/50] batch [610/1000] time 1.579 (1.573) data 0.000 (0.003) loss 1.3408 (1.5956) acc 59.3750 (63.4068) lr 1.0000e-05 eta 21:34:30 +epoch [1/50] batch [615/1000] time 1.578 (1.573) data 0.000 (0.003) loss 1.3525 (1.5943) acc 65.6250 (63.4451) lr 1.0000e-05 eta 21:34:22 +epoch [1/50] batch [620/1000] time 1.582 (1.573) data 0.000 (0.003) loss 1.8574 (1.5921) acc 68.7500 (63.4929) lr 1.0000e-05 eta 21:34:28 +epoch [1/50] batch [625/1000] time 1.537 (1.573) data 0.000 (0.003) loss 1.6221 (1.5899) acc 59.3750 (63.5400) lr 1.0000e-05 eta 21:34:16 +epoch [1/50] batch [630/1000] time 1.570 (1.573) data 0.000 (0.003) loss 1.2871 (1.5869) acc 75.0000 (63.6210) lr 1.0000e-05 eta 21:34:02 +epoch [1/50] batch [635/1000] time 1.570 (1.573) data 0.000 (0.003) loss 1.5420 (1.5871) acc 68.7500 (63.6220) lr 1.0000e-05 eta 21:33:51 +epoch [1/50] batch [640/1000] time 1.575 (1.573) data 0.000 (0.003) loss 1.2715 (1.5861) acc 65.6250 (63.6279) lr 1.0000e-05 eta 21:33:40 +epoch [1/50] batch [645/1000] time 1.570 (1.572) data 0.001 (0.003) loss 1.5107 (1.5851) acc 59.3750 (63.6386) lr 1.0000e-05 eta 21:33:30 +epoch [1/50] batch [650/1000] time 1.570 (1.573) data 0.000 (0.003) loss 1.0576 (1.5833) acc 71.8750 (63.6779) lr 1.0000e-05 eta 21:33:25 +epoch [1/50] batch [655/1000] time 1.595 (1.573) data 0.000 (0.003) loss 1.9365 (1.5841) acc 53.1250 (63.6832) lr 1.0000e-05 eta 21:33:17 +epoch [1/50] batch [660/1000] time 1.557 (1.572) data 0.000 (0.003) loss 1.7334 (1.5816) acc 62.5000 (63.7121) lr 1.0000e-05 eta 21:33:05 +epoch [1/50] batch [665/1000] time 1.547 (1.573) data 0.000 (0.003) loss 1.3271 (1.5806) acc 68.7500 (63.7547) lr 1.0000e-05 eta 21:32:59 +epoch [1/50] batch [670/1000] time 1.552 (1.572) data 0.001 (0.003) loss 1.7510 (1.5797) acc 56.2500 (63.7313) lr 1.0000e-05 eta 21:32:44 +epoch [1/50] batch [675/1000] time 1.540 (1.572) data 0.000 (0.003) loss 1.5967 (1.5775) acc 59.3750 (63.7685) lr 1.0000e-05 eta 21:32:30 +epoch [1/50] batch [680/1000] time 1.697 (1.572) data 0.001 (0.003) loss 1.6094 (1.5784) acc 56.2500 (63.7270) lr 1.0000e-05 eta 21:32:28 +epoch [1/50] batch [685/1000] time 1.541 (1.572) data 0.000 (0.003) loss 0.8765 (1.5756) acc 78.1250 (63.7682) lr 1.0000e-05 eta 21:32:13 +epoch [1/50] batch [690/1000] time 1.571 (1.572) data 0.001 (0.003) loss 1.2480 (1.5753) acc 65.6250 (63.7636) lr 1.0000e-05 eta 21:32:01 +epoch [1/50] batch [695/1000] time 1.566 (1.572) data 0.000 (0.003) loss 0.9355 (1.5732) acc 68.7500 (63.7860) lr 1.0000e-05 eta 21:31:45 +epoch [1/50] batch [700/1000] time 1.563 (1.572) data 0.000 (0.003) loss 0.5439 (1.5704) acc 81.2500 (63.8348) lr 1.0000e-05 eta 21:31:34 +epoch [1/50] batch [705/1000] time 1.557 (1.572) data 0.001 (0.003) loss 1.0576 (1.5678) acc 59.3750 (63.8697) lr 1.0000e-05 eta 21:31:23 +epoch [1/50] batch [710/1000] time 1.598 (1.572) data 0.001 (0.003) loss 0.5308 (1.5680) acc 81.2500 (63.8512) lr 1.0000e-05 eta 21:31:13 +epoch [1/50] batch [715/1000] time 1.550 (1.572) data 0.001 (0.003) loss 1.7471 (1.5666) acc 59.3750 (63.8855) lr 1.0000e-05 eta 21:31:04 +epoch [1/50] batch [720/1000] time 1.570 (1.572) data 0.000 (0.003) loss 1.5400 (1.5664) acc 53.1250 (63.8889) lr 1.0000e-05 eta 21:30:54 +epoch [1/50] batch [725/1000] time 1.706 (1.572) data 0.001 (0.003) loss 0.9927 (1.5660) acc 84.3750 (63.8793) lr 1.0000e-05 eta 21:30:52 +epoch [1/50] batch [730/1000] time 1.572 (1.572) data 0.000 (0.003) loss 1.2324 (1.5633) acc 65.6250 (63.9170) lr 1.0000e-05 eta 21:30:40 +epoch [1/50] batch [735/1000] time 1.550 (1.572) data 0.000 (0.003) loss 0.9204 (1.5604) acc 78.1250 (63.9668) lr 1.0000e-05 eta 21:30:26 +epoch [1/50] batch [740/1000] time 1.575 (1.572) data 0.000 (0.003) loss 1.3184 (1.5587) acc 68.7500 (63.9907) lr 1.0000e-05 eta 21:30:15 +epoch [1/50] batch [745/1000] time 1.540 (1.571) data 0.000 (0.003) loss 1.5137 (1.5569) acc 62.5000 (64.0101) lr 1.0000e-05 eta 21:30:01 +epoch [1/50] batch [750/1000] time 1.577 (1.571) data 0.000 (0.003) loss 2.4492 (1.5558) acc 56.2500 (64.0167) lr 1.0000e-05 eta 21:29:46 +epoch [1/50] batch [755/1000] time 1.552 (1.571) data 0.000 (0.003) loss 1.0889 (1.5553) acc 75.0000 (64.0066) lr 1.0000e-05 eta 21:29:35 +epoch [1/50] batch [760/1000] time 1.552 (1.571) data 0.000 (0.003) loss 1.1006 (1.5521) acc 75.0000 (64.0666) lr 1.0000e-05 eta 21:29:28 +epoch [1/50] batch [765/1000] time 1.562 (1.571) data 0.000 (0.003) loss 1.4385 (1.5503) acc 71.8750 (64.0972) lr 1.0000e-05 eta 21:29:19 +epoch [1/50] batch [770/1000] time 1.543 (1.571) data 0.000 (0.003) loss 1.2725 (1.5511) acc 71.8750 (64.0950) lr 1.0000e-05 eta 21:29:17 +epoch [1/50] batch [775/1000] time 1.559 (1.571) data 0.000 (0.003) loss 1.0547 (1.5510) acc 75.0000 (64.1048) lr 1.0000e-05 eta 21:29:05 +epoch [1/50] batch [780/1000] time 1.550 (1.571) data 0.000 (0.003) loss 1.7061 (1.5497) acc 56.2500 (64.1146) lr 1.0000e-05 eta 21:28:55 +epoch [1/50] batch [785/1000] time 1.588 (1.571) data 0.000 (0.003) loss 1.2412 (1.5472) acc 68.7500 (64.1441) lr 1.0000e-05 eta 21:28:46 +epoch [1/50] batch [790/1000] time 1.572 (1.571) data 0.000 (0.003) loss 1.1396 (1.5455) acc 56.2500 (64.1337) lr 1.0000e-05 eta 21:28:36 +epoch [1/50] batch [795/1000] time 1.560 (1.571) data 0.001 (0.003) loss 1.2070 (1.5448) acc 68.7500 (64.1627) lr 1.0000e-05 eta 21:28:25 +epoch [1/50] batch [800/1000] time 1.535 (1.571) data 0.000 (0.002) loss 0.9888 (1.5421) acc 75.0000 (64.1797) lr 1.0000e-05 eta 21:28:12 +epoch [1/50] batch [805/1000] time 1.560 (1.571) data 0.000 (0.002) loss 0.9526 (1.5397) acc 68.7500 (64.2314) lr 1.0000e-05 eta 21:28:00 +epoch [1/50] batch [810/1000] time 1.529 (1.571) data 0.000 (0.002) loss 1.0791 (1.5384) acc 75.0000 (64.2515) lr 1.0000e-05 eta 21:27:48 +epoch [1/50] batch [815/1000] time 1.555 (1.571) data 0.001 (0.002) loss 1.8682 (1.5382) acc 53.1250 (64.2638) lr 1.0000e-05 eta 21:27:37 +epoch [1/50] batch [820/1000] time 1.550 (1.571) data 0.000 (0.002) loss 1.2422 (1.5381) acc 71.8750 (64.2607) lr 1.0000e-05 eta 21:27:26 +epoch [1/50] batch [825/1000] time 1.549 (1.571) data 0.000 (0.002) loss 0.9912 (1.5367) acc 68.7500 (64.2803) lr 1.0000e-05 eta 21:27:14 +epoch [1/50] batch [830/1000] time 1.543 (1.570) data 0.000 (0.002) loss 1.5879 (1.5368) acc 65.6250 (64.2771) lr 1.0000e-05 eta 21:27:00 +epoch [1/50] batch [835/1000] time 1.592 (1.571) data 0.001 (0.002) loss 1.0732 (1.5357) acc 71.8750 (64.2814) lr 1.0000e-05 eta 21:27:01 +epoch [1/50] batch [840/1000] time 1.546 (1.571) data 0.001 (0.002) loss 1.4932 (1.5344) acc 68.7500 (64.2932) lr 1.0000e-05 eta 21:26:49 +epoch [1/50] batch [845/1000] time 1.561 (1.570) data 0.000 (0.002) loss 1.3154 (1.5347) acc 71.8750 (64.2825) lr 1.0000e-05 eta 21:26:37 +epoch [1/50] batch [850/1000] time 1.561 (1.571) data 0.000 (0.002) loss 1.0137 (1.5339) acc 81.2500 (64.2941) lr 1.0000e-05 eta 21:26:30 +epoch [1/50] batch [855/1000] time 1.559 (1.571) data 0.000 (0.002) loss 0.8994 (1.5327) acc 71.8750 (64.3311) lr 1.0000e-05 eta 21:26:24 +epoch [1/50] batch [860/1000] time 1.571 (1.571) data 0.001 (0.002) loss 0.9482 (1.5314) acc 75.0000 (64.3423) lr 1.0000e-05 eta 21:26:16 +epoch [1/50] batch [865/1000] time 1.621 (1.571) data 0.001 (0.002) loss 1.2021 (1.5308) acc 71.8750 (64.3497) lr 1.0000e-05 eta 21:26:26 +epoch [1/50] batch [870/1000] time 1.594 (1.571) data 0.000 (0.002) loss 1.2627 (1.5292) acc 68.7500 (64.3966) lr 1.0000e-05 eta 21:26:31 +epoch [1/50] batch [875/1000] time 1.598 (1.571) data 0.001 (0.002) loss 1.3223 (1.5280) acc 71.8750 (64.4179) lr 1.0000e-05 eta 21:26:28 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1699551960.ckb-gpu-v.mitre.org.249380.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1699551960.ckb-gpu-v.mitre.org.249380.0 new file mode 100644 index 00000000..c0c75a5a Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1699551960.ckb-gpu-v.mitre.org.249380.0 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/log.txt new file mode 100644 index 00000000..b2c9fca3 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/log.txt @@ -0,0 +1,10707 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_l14_bestval_ep50.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '32'] +output_dir: output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 3 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 32 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-L/14 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 50 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0 +Clang version: Could not collect +CMake version: version 3.16.3 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-166-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: Tesla V100-PCIE-32GB +GPU 1: Tesla V100-PCIE-32GB + +Nvidia driver version: 470.223.02 +cuDNN version: /usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 46 bits physical, 48 bits virtual +CPU(s): 48 +On-line CPU(s) list: 0-47 +Thread(s) per core: 2 +Core(s) per socket: 12 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +CPU family: 6 +Model: 85 +Model name: Intel(R) Xeon(R) Silver 4116 CPU @ 2.10GHz +Stepping: 4 +CPU MHz: 800.084 +CPU max MHz: 3000.0000 +CPU min MHz: 800.0000 +BogoMIPS: 4200.00 +Virtualization: VT-x +L1d cache: 768 KiB +L1i cache: 768 KiB +L2 cache: 24 MiB +L3 cache: 33 MiB +NUMA node0 CPU(s): 0-11,24-35 +NUMA node1 CPU(s): 12-23,36-47 +Vulnerability Gather data sampling: Mitigation; Microcode +Vulnerability Itlb multihit: KVM: Mitigation: Split huge pages +Vulnerability L1tf: Mitigation; PTE Inversion; VMX conditional cache flushes, SMT vulnerable +Vulnerability Mds: Mitigation; Clear CPU buffers; SMT vulnerable +Vulnerability Meltdown: Mitigation; PTI +Vulnerability Mmio stale data: Mitigation; Clear CPU buffers; SMT vulnerable +Vulnerability Retbleed: Mitigation; IBRS +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; IBRS, IBPB conditional, STIBP conditional, RSB filling, PBRSB-eIBRS Not affected +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Mitigation; Clear CPU buffers; SMT vulnerable +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cdp_l3 invpcid_single pti intel_ppin ssbd mba ibrs ibpb stibp tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm mpx rdt_a avx512f avx512dq rdseed adx smap clflushopt clwb intel_pt avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts pku ospke md_clear flush_l1d arch_capabilities + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Creating a 32-shot dataset +Saving preprocessed few-shot data to /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_32-seed_3.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 32,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-L/14) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/tensorboard) +epoch [1/50] batch [5/1000] time 1.551 (2.802) data 0.000 (0.306) loss 2.5234 (2.9820) acc 46.8750 (41.2500) lr 1.0000e-05 eta 1 day, 14:54:32 +epoch [1/50] batch [10/1000] time 1.697 (2.217) data 0.000 (0.153) loss 1.9727 (2.6852) acc 62.5000 (46.5625) lr 1.0000e-05 eta 1 day, 6:46:59 +epoch [1/50] batch [15/1000] time 1.594 (2.010) data 0.000 (0.103) loss 2.2012 (2.5629) acc 46.8750 (47.0833) lr 1.0000e-05 eta 1 day, 3:54:20 +epoch [1/50] batch [20/1000] time 1.654 (1.907) data 0.001 (0.077) loss 1.2891 (2.4267) acc 78.1250 (49.2188) lr 1.0000e-05 eta 1 day, 2:28:56 +epoch [1/50] batch [25/1000] time 1.554 (1.844) data 0.001 (0.062) loss 2.0820 (2.2772) acc 53.1250 (51.6250) lr 1.0000e-05 eta 1 day, 1:36:07 +epoch [1/50] batch [30/1000] time 1.533 (1.797) data 0.000 (0.052) loss 1.6045 (2.2089) acc 65.6250 (52.9167) lr 1.0000e-05 eta 1 day, 0:56:30 +epoch [1/50] batch [35/1000] time 1.566 (1.764) data 0.001 (0.044) loss 1.2236 (2.1169) acc 68.7500 (54.5536) lr 1.0000e-05 eta 1 day, 0:28:43 +epoch [1/50] batch [40/1000] time 1.574 (1.738) data 0.000 (0.039) loss 2.5605 (2.0809) acc 50.0000 (55.5469) lr 1.0000e-05 eta 1 day, 0:06:48 +epoch [1/50] batch [45/1000] time 1.569 (1.718) data 0.000 (0.035) loss 1.2598 (2.0213) acc 71.8750 (56.3889) lr 1.0000e-05 eta 23:50:27 +epoch [1/50] batch [50/1000] time 1.579 (1.703) data 0.000 (0.031) loss 1.6699 (1.9927) acc 62.5000 (56.8125) lr 1.0000e-05 eta 23:37:22 +epoch [1/50] batch [55/1000] time 1.575 (1.690) data 0.001 (0.028) loss 2.2207 (1.9767) acc 59.3750 (57.3295) lr 1.0000e-05 eta 23:27:05 +epoch [1/50] batch [60/1000] time 1.556 (1.680) data 0.001 (0.026) loss 1.3506 (1.9538) acc 71.8750 (57.8125) lr 1.0000e-05 eta 23:18:40 +epoch [1/50] batch [65/1000] time 1.565 (1.671) data 0.001 (0.024) loss 1.4521 (1.9284) acc 71.8750 (58.1731) lr 1.0000e-05 eta 23:10:33 +epoch [1/50] batch [70/1000] time 1.581 (1.663) data 0.000 (0.022) loss 1.4893 (1.8995) acc 68.7500 (58.7500) lr 1.0000e-05 eta 23:03:37 +epoch [1/50] batch [75/1000] time 1.576 (1.656) data 0.000 (0.021) loss 1.1084 (1.8495) acc 62.5000 (59.5000) lr 1.0000e-05 eta 22:57:51 +epoch [1/50] batch [80/1000] time 1.584 (1.651) data 0.001 (0.020) loss 2.1992 (1.8455) acc 53.1250 (59.1797) lr 1.0000e-05 eta 22:53:33 +epoch [1/50] batch [85/1000] time 1.575 (1.646) data 0.000 (0.019) loss 1.3047 (1.8133) acc 68.7500 (59.7426) lr 1.0000e-05 eta 22:49:23 +epoch [1/50] batch [90/1000] time 1.590 (1.641) data 0.000 (0.018) loss 2.2207 (1.8245) acc 56.2500 (59.6528) lr 1.0000e-05 eta 22:45:20 +epoch [1/50] batch [95/1000] time 1.562 (1.638) data 0.000 (0.017) loss 1.2715 (1.8125) acc 81.2500 (59.8684) lr 1.0000e-05 eta 22:42:14 +epoch [1/50] batch [100/1000] time 1.568 (1.634) data 0.001 (0.016) loss 1.7676 (1.7934) acc 65.6250 (60.2500) lr 1.0000e-05 eta 22:38:59 +epoch [1/50] batch [105/1000] time 1.549 (1.631) data 0.000 (0.015) loss 1.5566 (1.7797) acc 62.5000 (60.6845) lr 1.0000e-05 eta 22:36:36 +epoch [1/50] batch [110/1000] time 1.571 (1.628) data 0.001 (0.014) loss 1.8145 (1.7798) acc 56.2500 (60.4830) lr 1.0000e-05 eta 22:33:58 +epoch [1/50] batch [115/1000] time 1.568 (1.625) data 0.002 (0.014) loss 1.4355 (1.7596) acc 56.2500 (60.7337) lr 1.0000e-05 eta 22:31:23 +epoch [1/50] batch [120/1000] time 1.555 (1.623) data 0.000 (0.013) loss 1.1152 (1.7482) acc 81.2500 (60.9635) lr 1.0000e-05 eta 22:29:19 +epoch [1/50] batch [125/1000] time 1.554 (1.623) data 0.001 (0.013) loss 1.8975 (1.7416) acc 59.3750 (61.0250) lr 1.0000e-05 eta 22:29:09 +epoch [1/50] batch [130/1000] time 1.566 (1.621) data 0.001 (0.012) loss 0.8594 (1.7171) acc 78.1250 (61.4663) lr 1.0000e-05 eta 22:26:59 +epoch [1/50] batch [135/1000] time 1.569 (1.619) data 0.000 (0.012) loss 1.8232 (1.7211) acc 68.7500 (61.3657) lr 1.0000e-05 eta 22:25:08 +epoch [1/50] batch [140/1000] time 1.569 (1.617) data 0.001 (0.012) loss 1.5400 (1.7107) acc 68.7500 (61.6518) lr 1.0000e-05 eta 22:23:30 +epoch [1/50] batch [145/1000] time 1.546 (1.614) data 0.002 (0.011) loss 1.4785 (1.6953) acc 59.3750 (61.7241) lr 1.0000e-05 eta 22:21:26 +epoch [1/50] batch [150/1000] time 1.537 (1.612) data 0.000 (0.011) loss 1.3145 (1.6830) acc 68.7500 (61.9167) lr 1.0000e-05 eta 22:19:39 +epoch [1/50] batch [155/1000] time 1.558 (1.611) data 0.000 (0.010) loss 1.5166 (1.6687) acc 62.5000 (62.0565) lr 1.0000e-05 eta 22:18:32 +epoch [1/50] batch [160/1000] time 1.562 (1.610) data 0.001 (0.010) loss 1.1992 (1.6632) acc 68.7500 (62.0703) lr 1.0000e-05 eta 22:17:05 +epoch [1/50] batch [165/1000] time 1.565 (1.608) data 0.001 (0.010) loss 1.0918 (1.6549) acc 65.6250 (62.2538) lr 1.0000e-05 eta 22:15:50 +epoch [1/50] batch [170/1000] time 1.567 (1.607) data 0.000 (0.010) loss 0.8086 (1.6541) acc 87.5000 (62.2243) lr 1.0000e-05 eta 22:14:37 +epoch [1/50] batch [175/1000] time 1.585 (1.606) data 0.000 (0.009) loss 1.2354 (1.6569) acc 65.6250 (62.0714) lr 1.0000e-05 eta 22:13:28 +epoch [1/50] batch [180/1000] time 1.565 (1.605) data 0.001 (0.009) loss 1.0898 (1.6505) acc 71.8750 (62.1181) lr 1.0000e-05 eta 22:12:23 +epoch [1/50] batch [185/1000] time 1.535 (1.604) data 0.001 (0.009) loss 0.8770 (1.6433) acc 75.0000 (62.2973) lr 1.0000e-05 eta 22:11:24 +epoch [1/50] batch [190/1000] time 1.573 (1.603) data 0.000 (0.009) loss 1.2021 (1.6438) acc 59.3750 (62.1875) lr 1.0000e-05 eta 22:10:34 +epoch [1/50] batch [195/1000] time 1.560 (1.602) data 0.000 (0.008) loss 1.9932 (1.6383) acc 56.2500 (62.3558) lr 1.0000e-05 eta 22:09:26 +epoch [1/50] batch [200/1000] time 1.553 (1.601) data 0.001 (0.008) loss 1.0859 (1.6334) acc 75.0000 (62.5000) lr 1.0000e-05 eta 22:08:36 +epoch [1/50] batch [205/1000] time 1.543 (1.600) data 0.001 (0.008) loss 1.2969 (1.6313) acc 68.7500 (62.5610) lr 1.0000e-05 eta 22:07:36 +epoch [1/50] batch [210/1000] time 1.561 (1.599) data 0.001 (0.008) loss 1.4824 (1.6289) acc 59.3750 (62.5000) lr 1.0000e-05 eta 22:06:43 +epoch [1/50] batch [215/1000] time 1.566 (1.598) data 0.000 (0.008) loss 1.1113 (1.6224) acc 81.2500 (62.7180) lr 1.0000e-05 eta 22:05:48 +epoch [1/50] batch [220/1000] time 1.584 (1.597) data 0.001 (0.008) loss 1.8818 (1.6194) acc 65.6250 (62.8693) lr 1.0000e-05 eta 22:05:05 +epoch [1/50] batch [225/1000] time 1.535 (1.596) data 0.000 (0.007) loss 1.1611 (1.6131) acc 65.6250 (62.9028) lr 1.0000e-05 eta 22:04:11 +epoch [1/50] batch [230/1000] time 1.564 (1.596) data 0.000 (0.007) loss 1.4658 (1.6085) acc 46.8750 (62.9891) lr 1.0000e-05 eta 22:03:51 +epoch [1/50] batch [235/1000] time 1.551 (1.595) data 0.000 (0.007) loss 1.4189 (1.6005) acc 65.6250 (63.1383) lr 1.0000e-05 eta 22:03:13 +epoch [1/50] batch [240/1000] time 1.552 (1.595) data 0.001 (0.007) loss 1.1719 (1.5945) acc 71.8750 (63.2292) lr 1.0000e-05 eta 22:02:29 +epoch [1/50] batch [245/1000] time 1.554 (1.594) data 0.001 (0.007) loss 1.1279 (1.5920) acc 59.3750 (63.2653) lr 1.0000e-05 eta 22:01:38 +epoch [1/50] batch [250/1000] time 1.562 (1.593) data 0.000 (0.007) loss 1.6709 (1.5978) acc 62.5000 (63.1750) lr 1.0000e-05 eta 22:00:50 +epoch [1/50] batch [255/1000] time 1.572 (1.592) data 0.000 (0.007) loss 0.6953 (1.5883) acc 87.5000 (63.4191) lr 1.0000e-05 eta 22:00:14 +epoch [1/50] batch [260/1000] time 1.572 (1.592) data 0.000 (0.006) loss 1.2334 (1.5858) acc 68.7500 (63.4135) lr 1.0000e-05 eta 21:59:57 +epoch [1/50] batch [265/1000] time 1.547 (1.592) data 0.000 (0.006) loss 1.7041 (1.5844) acc 62.5000 (63.3726) lr 1.0000e-05 eta 21:59:27 +epoch [1/50] batch [270/1000] time 1.552 (1.591) data 0.000 (0.006) loss 0.7261 (1.5818) acc 84.3750 (63.4028) lr 1.0000e-05 eta 21:58:37 +epoch [1/50] batch [275/1000] time 1.550 (1.591) data 0.000 (0.006) loss 1.0898 (1.5803) acc 68.7500 (63.4318) lr 1.0000e-05 eta 21:58:32 +epoch [1/50] batch [280/1000] time 1.542 (1.590) data 0.000 (0.006) loss 1.7979 (1.5800) acc 56.2500 (63.4375) lr 1.0000e-05 eta 21:57:57 +epoch [1/50] batch [285/1000] time 1.563 (1.590) data 0.001 (0.006) loss 2.4004 (1.5814) acc 50.0000 (63.4101) lr 1.0000e-05 eta 21:57:20 +epoch [1/50] batch [290/1000] time 1.550 (1.589) data 0.000 (0.006) loss 1.1191 (1.5781) acc 71.8750 (63.5129) lr 1.0000e-05 eta 21:56:44 +epoch [1/50] batch [295/1000] time 1.539 (1.589) data 0.000 (0.006) loss 1.5195 (1.5700) acc 56.2500 (63.6653) lr 1.0000e-05 eta 21:56:07 +epoch [1/50] batch [300/1000] time 1.548 (1.588) data 0.001 (0.006) loss 0.7812 (1.5675) acc 75.0000 (63.7396) lr 1.0000e-05 eta 21:55:31 +epoch [1/50] batch [305/1000] time 1.568 (1.588) data 0.000 (0.006) loss 1.4668 (1.5673) acc 75.0000 (63.7807) lr 1.0000e-05 eta 21:55:04 +epoch [1/50] batch [310/1000] time 1.575 (1.587) data 0.000 (0.005) loss 2.0508 (1.5710) acc 59.3750 (63.7500) lr 1.0000e-05 eta 21:54:36 +epoch [1/50] batch [315/1000] time 1.689 (1.587) data 0.000 (0.005) loss 0.7744 (1.5643) acc 87.5000 (63.8690) lr 1.0000e-05 eta 21:54:22 +epoch [1/50] batch [320/1000] time 1.554 (1.587) data 0.000 (0.005) loss 1.5303 (1.5605) acc 59.3750 (63.9258) lr 1.0000e-05 eta 21:53:55 +epoch [1/50] batch [325/1000] time 1.561 (1.586) data 0.000 (0.005) loss 1.5205 (1.5618) acc 59.3750 (63.9808) lr 1.0000e-05 eta 21:53:27 +epoch [1/50] batch [330/1000] time 1.562 (1.586) data 0.000 (0.005) loss 1.6885 (1.5594) acc 53.1250 (63.9583) lr 1.0000e-05 eta 21:52:59 +epoch [1/50] batch [335/1000] time 1.580 (1.586) data 0.000 (0.005) loss 1.6455 (1.5617) acc 56.2500 (63.9272) lr 1.0000e-05 eta 21:52:29 +epoch [1/50] batch [340/1000] time 1.556 (1.585) data 0.000 (0.005) loss 1.7256 (1.5612) acc 65.6250 (63.9982) lr 1.0000e-05 eta 21:51:58 +epoch [1/50] batch [345/1000] time 1.546 (1.585) data 0.000 (0.005) loss 1.5938 (1.5604) acc 62.5000 (63.9493) lr 1.0000e-05 eta 21:51:24 +epoch [1/50] batch [350/1000] time 1.578 (1.584) data 0.000 (0.005) loss 1.0664 (1.5553) acc 62.5000 (64.0268) lr 1.0000e-05 eta 21:51:03 +epoch [1/50] batch [355/1000] time 1.539 (1.584) data 0.000 (0.005) loss 1.3877 (1.5528) acc 65.6250 (64.0053) lr 1.0000e-05 eta 21:50:31 +epoch [1/50] batch [360/1000] time 1.549 (1.583) data 0.000 (0.005) loss 1.6504 (1.5523) acc 62.5000 (64.0191) lr 1.0000e-05 eta 21:49:58 +epoch [1/50] batch [365/1000] time 1.524 (1.583) data 0.000 (0.005) loss 0.9712 (1.5496) acc 78.1250 (64.1182) lr 1.0000e-05 eta 21:49:23 +epoch [1/50] batch [370/1000] time 1.565 (1.583) data 0.000 (0.005) loss 1.7666 (1.5500) acc 59.3750 (64.1047) lr 1.0000e-05 eta 21:49:03 +epoch [1/50] batch [375/1000] time 1.566 (1.582) data 0.000 (0.005) loss 1.1855 (1.5492) acc 62.5000 (64.0500) lr 1.0000e-05 eta 21:48:38 +epoch [1/50] batch [380/1000] time 1.546 (1.582) data 0.000 (0.005) loss 0.8394 (1.5459) acc 78.1250 (64.0789) lr 1.0000e-05 eta 21:48:30 +epoch [1/50] batch [385/1000] time 1.544 (1.582) data 0.000 (0.004) loss 1.0176 (1.5387) acc 78.1250 (64.2370) lr 1.0000e-05 eta 21:48:00 +epoch [1/50] batch [390/1000] time 1.554 (1.581) data 0.000 (0.004) loss 1.5127 (1.5394) acc 62.5000 (64.2228) lr 1.0000e-05 eta 21:47:33 +epoch [1/50] batch [395/1000] time 1.550 (1.581) data 0.001 (0.004) loss 0.5669 (1.5362) acc 84.3750 (64.2959) lr 1.0000e-05 eta 21:47:06 +epoch [1/50] batch [400/1000] time 1.581 (1.581) data 0.000 (0.004) loss 0.7549 (1.5294) acc 71.8750 (64.4219) lr 1.0000e-05 eta 21:46:45 +epoch [1/50] batch [405/1000] time 1.574 (1.581) data 0.000 (0.004) loss 0.8672 (1.5269) acc 75.0000 (64.4676) lr 1.0000e-05 eta 21:46:26 +epoch [1/50] batch [410/1000] time 1.554 (1.580) data 0.000 (0.004) loss 1.5781 (1.5269) acc 59.3750 (64.4741) lr 1.0000e-05 eta 21:46:03 +epoch [1/50] batch [415/1000] time 1.529 (1.580) data 0.000 (0.004) loss 1.1982 (1.5243) acc 62.5000 (64.4804) lr 1.0000e-05 eta 21:45:38 +epoch [1/50] batch [420/1000] time 1.585 (1.580) data 0.000 (0.004) loss 1.6797 (1.5227) acc 62.5000 (64.4643) lr 1.0000e-05 eta 21:45:24 +epoch [1/50] batch [425/1000] time 1.558 (1.580) data 0.000 (0.004) loss 1.7910 (1.5237) acc 56.2500 (64.4559) lr 1.0000e-05 eta 21:45:22 +epoch [1/50] batch [430/1000] time 1.562 (1.580) data 0.000 (0.004) loss 1.8936 (1.5247) acc 59.3750 (64.4549) lr 1.0000e-05 eta 21:45:00 +epoch [1/50] batch [435/1000] time 1.557 (1.579) data 0.000 (0.004) loss 1.0430 (1.5243) acc 65.6250 (64.4756) lr 1.0000e-05 eta 21:44:42 +epoch [1/50] batch [440/1000] time 1.566 (1.579) data 0.000 (0.004) loss 0.6362 (1.5200) acc 81.2500 (64.5668) lr 1.0000e-05 eta 21:44:28 +epoch [1/50] batch [445/1000] time 1.586 (1.579) data 0.000 (0.004) loss 1.1914 (1.5181) acc 75.0000 (64.6348) lr 1.0000e-05 eta 21:44:16 +epoch [1/50] batch [450/1000] time 1.554 (1.579) data 0.000 (0.004) loss 1.6719 (1.5194) acc 59.3750 (64.6181) lr 1.0000e-05 eta 21:43:55 +epoch [1/50] batch [455/1000] time 1.559 (1.579) data 0.000 (0.004) loss 1.7607 (1.5206) acc 56.2500 (64.5948) lr 1.0000e-05 eta 21:43:34 +epoch [1/50] batch [460/1000] time 1.565 (1.578) data 0.000 (0.004) loss 1.4395 (1.5200) acc 62.5000 (64.6196) lr 1.0000e-05 eta 21:43:16 +epoch [1/50] batch [465/1000] time 1.563 (1.578) data 0.000 (0.004) loss 1.4160 (1.5194) acc 71.8750 (64.6304) lr 1.0000e-05 eta 21:42:58 +epoch [1/50] batch [470/1000] time 1.571 (1.578) data 0.000 (0.004) loss 1.1436 (1.5172) acc 56.2500 (64.6077) lr 1.0000e-05 eta 21:42:54 +epoch [1/50] batch [475/1000] time 1.598 (1.578) data 0.000 (0.004) loss 0.9092 (1.5154) acc 71.8750 (64.6382) lr 1.0000e-05 eta 21:42:43 +epoch [1/50] batch [480/1000] time 1.551 (1.578) data 0.001 (0.004) loss 1.6211 (1.5164) acc 56.2500 (64.5833) lr 1.0000e-05 eta 21:42:17 +epoch [1/50] batch [485/1000] time 1.556 (1.578) data 0.001 (0.004) loss 1.5244 (1.5177) acc 62.5000 (64.5812) lr 1.0000e-05 eta 21:41:56 +epoch [1/50] batch [490/1000] time 1.545 (1.577) data 0.000 (0.004) loss 2.0742 (1.5214) acc 50.0000 (64.5281) lr 1.0000e-05 eta 21:41:38 +epoch [1/50] batch [495/1000] time 1.534 (1.577) data 0.000 (0.004) loss 0.8174 (1.5195) acc 78.1250 (64.5644) lr 1.0000e-05 eta 21:41:19 +epoch [1/50] batch [500/1000] time 1.549 (1.577) data 0.000 (0.004) loss 1.2617 (1.5176) acc 62.5000 (64.5625) lr 1.0000e-05 eta 21:41:02 +epoch [1/50] batch [505/1000] time 1.557 (1.577) data 0.000 (0.004) loss 1.1318 (1.5177) acc 65.6250 (64.5173) lr 1.0000e-05 eta 21:40:43 +epoch [1/50] batch [510/1000] time 1.600 (1.577) data 0.000 (0.003) loss 0.7847 (1.5152) acc 78.1250 (64.5527) lr 1.0000e-05 eta 21:40:33 +epoch [1/50] batch [515/1000] time 1.561 (1.577) data 0.000 (0.003) loss 1.5615 (1.5126) acc 62.5000 (64.6177) lr 1.0000e-05 eta 21:40:23 +epoch [1/50] batch [520/1000] time 1.565 (1.577) data 0.001 (0.003) loss 1.7812 (1.5138) acc 59.3750 (64.5793) lr 1.0000e-05 eta 21:40:09 +epoch [1/50] batch [525/1000] time 1.588 (1.577) data 0.000 (0.003) loss 1.3672 (1.5121) acc 65.6250 (64.6012) lr 1.0000e-05 eta 21:40:03 +epoch [1/50] batch [530/1000] time 1.561 (1.577) data 0.000 (0.003) loss 1.1797 (1.5101) acc 78.1250 (64.6462) lr 1.0000e-05 eta 21:40:01 +epoch [1/50] batch [535/1000] time 1.562 (1.577) data 0.001 (0.003) loss 1.6904 (1.5087) acc 65.6250 (64.6787) lr 1.0000e-05 eta 21:39:42 +epoch [1/50] batch [540/1000] time 1.578 (1.576) data 0.001 (0.003) loss 1.7266 (1.5084) acc 68.7500 (64.7049) lr 1.0000e-05 eta 21:39:27 +epoch [1/50] batch [545/1000] time 1.574 (1.576) data 0.002 (0.003) loss 1.6045 (1.5050) acc 50.0000 (64.7420) lr 1.0000e-05 eta 21:39:14 +epoch [1/50] batch [550/1000] time 1.559 (1.576) data 0.001 (0.003) loss 2.1738 (1.5094) acc 43.7500 (64.6420) lr 1.0000e-05 eta 21:38:56 +epoch [1/50] batch [555/1000] time 1.534 (1.576) data 0.000 (0.003) loss 1.0293 (1.5062) acc 71.8750 (64.6903) lr 1.0000e-05 eta 21:38:34 +epoch [1/50] batch [560/1000] time 1.555 (1.576) data 0.000 (0.003) loss 0.7114 (1.5049) acc 81.2500 (64.7321) lr 1.0000e-05 eta 21:38:18 +epoch [1/50] batch [565/1000] time 1.565 (1.575) data 0.000 (0.003) loss 2.0215 (1.5044) acc 46.8750 (64.7069) lr 1.0000e-05 eta 21:38:00 +epoch [1/50] batch [570/1000] time 1.540 (1.575) data 0.001 (0.003) loss 1.5967 (1.5056) acc 62.5000 (64.6875) lr 1.0000e-05 eta 21:37:48 +epoch [1/50] batch [575/1000] time 1.567 (1.575) data 0.000 (0.003) loss 1.5684 (1.5076) acc 62.5000 (64.6793) lr 1.0000e-05 eta 21:37:46 +epoch [1/50] batch [580/1000] time 1.563 (1.575) data 0.000 (0.003) loss 0.7471 (1.5028) acc 78.1250 (64.7845) lr 1.0000e-05 eta 21:37:34 +epoch [1/50] batch [585/1000] time 1.553 (1.575) data 0.000 (0.003) loss 1.1348 (1.5022) acc 78.1250 (64.7970) lr 1.0000e-05 eta 21:37:15 +epoch [1/50] batch [590/1000] time 1.563 (1.575) data 0.000 (0.003) loss 1.2510 (1.5025) acc 59.3750 (64.7775) lr 1.0000e-05 eta 21:37:08 +epoch [1/50] batch [595/1000] time 1.549 (1.575) data 0.001 (0.003) loss 1.5010 (1.5009) acc 68.7500 (64.7952) lr 1.0000e-05 eta 21:36:50 +epoch [1/50] batch [600/1000] time 1.562 (1.575) data 0.001 (0.003) loss 1.7695 (1.4998) acc 65.6250 (64.7969) lr 1.0000e-05 eta 21:36:37 +epoch [1/50] batch [605/1000] time 1.575 (1.575) data 0.000 (0.003) loss 1.9014 (1.4988) acc 59.3750 (64.7882) lr 1.0000e-05 eta 21:36:20 +epoch [1/50] batch [610/1000] time 1.571 (1.575) data 0.000 (0.003) loss 1.1758 (1.4992) acc 65.6250 (64.8053) lr 1.0000e-05 eta 21:36:10 +epoch [1/50] batch [615/1000] time 1.563 (1.574) data 0.000 (0.003) loss 1.2959 (1.4994) acc 75.0000 (64.7663) lr 1.0000e-05 eta 21:35:55 +epoch [1/50] batch [620/1000] time 1.573 (1.575) data 0.000 (0.003) loss 1.1182 (1.4982) acc 81.2500 (64.8286) lr 1.0000e-05 eta 21:35:56 +epoch [1/50] batch [625/1000] time 1.558 (1.575) data 0.000 (0.003) loss 0.8838 (1.4975) acc 68.7500 (64.8200) lr 1.0000e-05 eta 21:35:41 +epoch [1/50] batch [630/1000] time 1.565 (1.574) data 0.000 (0.003) loss 1.0801 (1.4948) acc 71.8750 (64.8710) lr 1.0000e-05 eta 21:35:29 +epoch [1/50] batch [635/1000] time 1.557 (1.574) data 0.000 (0.003) loss 1.2725 (1.4943) acc 81.2500 (64.9016) lr 1.0000e-05 eta 21:35:23 +epoch [1/50] batch [640/1000] time 1.568 (1.574) data 0.000 (0.003) loss 1.1123 (1.4929) acc 65.6250 (64.9121) lr 1.0000e-05 eta 21:35:14 +epoch [1/50] batch [645/1000] time 1.562 (1.574) data 0.001 (0.003) loss 1.1426 (1.4908) acc 81.2500 (64.9467) lr 1.0000e-05 eta 21:35:04 +epoch [1/50] batch [650/1000] time 1.574 (1.574) data 0.000 (0.003) loss 1.9111 (1.4933) acc 56.2500 (64.8846) lr 1.0000e-05 eta 21:34:53 +epoch [1/50] batch [655/1000] time 1.560 (1.574) data 0.000 (0.003) loss 0.8447 (1.4934) acc 81.2500 (64.8664) lr 1.0000e-05 eta 21:34:46 +epoch [1/50] batch [660/1000] time 1.559 (1.574) data 0.000 (0.003) loss 0.8809 (1.4927) acc 75.0000 (64.8532) lr 1.0000e-05 eta 21:34:31 +epoch [1/50] batch [665/1000] time 1.589 (1.574) data 0.000 (0.003) loss 1.0518 (1.4907) acc 75.0000 (64.8825) lr 1.0000e-05 eta 21:34:26 +epoch [1/50] batch [670/1000] time 1.581 (1.574) data 0.000 (0.003) loss 1.0781 (1.4884) acc 84.3750 (64.9394) lr 1.0000e-05 eta 21:34:16 +epoch [1/50] batch [675/1000] time 1.584 (1.574) data 0.000 (0.003) loss 2.1172 (1.4867) acc 46.8750 (64.9352) lr 1.0000e-05 eta 21:34:07 +epoch [1/50] batch [680/1000] time 1.709 (1.574) data 0.000 (0.003) loss 1.5029 (1.4852) acc 71.8750 (64.9724) lr 1.0000e-05 eta 21:34:06 +epoch [1/50] batch [685/1000] time 1.565 (1.574) data 0.000 (0.003) loss 1.2832 (1.4835) acc 50.0000 (64.9589) lr 1.0000e-05 eta 21:33:55 +epoch [1/50] batch [690/1000] time 1.559 (1.574) data 0.000 (0.003) loss 1.5391 (1.4833) acc 65.6250 (64.9592) lr 1.0000e-05 eta 21:33:43 +epoch [1/50] batch [695/1000] time 1.562 (1.574) data 0.000 (0.003) loss 1.9209 (1.4846) acc 56.2500 (64.9281) lr 1.0000e-05 eta 21:33:32 +epoch [1/50] batch [700/1000] time 1.544 (1.574) data 0.000 (0.003) loss 1.2061 (1.4853) acc 71.8750 (64.9107) lr 1.0000e-05 eta 21:33:18 +epoch [1/50] batch [705/1000] time 1.580 (1.574) data 0.000 (0.003) loss 1.2275 (1.4853) acc 68.7500 (64.9113) lr 1.0000e-05 eta 21:33:07 +epoch [1/50] batch [710/1000] time 1.561 (1.574) data 0.000 (0.003) loss 1.3105 (1.4838) acc 68.7500 (64.9428) lr 1.0000e-05 eta 21:32:53 +epoch [1/50] batch [715/1000] time 1.546 (1.574) data 0.000 (0.003) loss 1.2412 (1.4822) acc 75.0000 (64.9781) lr 1.0000e-05 eta 21:32:40 +epoch [1/50] batch [720/1000] time 1.560 (1.574) data 0.001 (0.003) loss 1.8809 (1.4835) acc 62.5000 (64.9306) lr 1.0000e-05 eta 21:32:27 +epoch [1/50] batch [725/1000] time 1.714 (1.574) data 0.000 (0.003) loss 1.0293 (1.4819) acc 81.2500 (64.9655) lr 1.0000e-05 eta 21:32:23 +epoch [1/50] batch [730/1000] time 1.565 (1.574) data 0.000 (0.003) loss 1.7285 (1.4810) acc 53.1250 (64.9572) lr 1.0000e-05 eta 21:32:11 +epoch [1/50] batch [735/1000] time 1.545 (1.573) data 0.001 (0.003) loss 1.6533 (1.4807) acc 53.1250 (64.9745) lr 1.0000e-05 eta 21:31:56 +epoch [1/50] batch [740/1000] time 1.555 (1.573) data 0.001 (0.003) loss 1.2637 (1.4785) acc 65.6250 (65.0042) lr 1.0000e-05 eta 21:31:45 +epoch [1/50] batch [745/1000] time 1.570 (1.573) data 0.000 (0.003) loss 1.4121 (1.4766) acc 68.7500 (65.0419) lr 1.0000e-05 eta 21:31:34 +epoch [1/50] batch [750/1000] time 1.561 (1.573) data 0.000 (0.003) loss 1.5996 (1.4770) acc 65.6250 (65.0375) lr 1.0000e-05 eta 21:31:17 +epoch [1/50] batch [755/1000] time 1.556 (1.573) data 0.000 (0.002) loss 1.5381 (1.4755) acc 62.5000 (65.0538) lr 1.0000e-05 eta 21:31:06 +epoch [1/50] batch [760/1000] time 1.567 (1.573) data 0.000 (0.002) loss 1.1650 (1.4741) acc 65.6250 (65.0576) lr 1.0000e-05 eta 21:30:56 +epoch [1/50] batch [765/1000] time 1.558 (1.573) data 0.000 (0.002) loss 1.6182 (1.4744) acc 65.6250 (65.0449) lr 1.0000e-05 eta 21:30:41 +epoch [1/50] batch [770/1000] time 1.554 (1.573) data 0.000 (0.002) loss 0.8687 (1.4726) acc 87.5000 (65.0974) lr 1.0000e-05 eta 21:30:37 +epoch [1/50] batch [775/1000] time 1.531 (1.573) data 0.001 (0.002) loss 1.3164 (1.4740) acc 65.6250 (65.0685) lr 1.0000e-05 eta 21:30:24 +epoch [1/50] batch [780/1000] time 1.549 (1.573) data 0.000 (0.002) loss 2.0938 (1.4742) acc 53.1250 (65.0280) lr 1.0000e-05 eta 21:30:15 +epoch [1/50] batch [785/1000] time 1.584 (1.573) data 0.000 (0.002) loss 0.8633 (1.4723) acc 78.1250 (65.0557) lr 1.0000e-05 eta 21:30:06 +epoch [1/50] batch [790/1000] time 1.570 (1.573) data 0.000 (0.002) loss 1.7656 (1.4727) acc 65.6250 (65.0791) lr 1.0000e-05 eta 21:29:53 +epoch [1/50] batch [795/1000] time 1.557 (1.573) data 0.000 (0.002) loss 1.0527 (1.4725) acc 81.2500 (65.0943) lr 1.0000e-05 eta 21:29:46 +epoch [1/50] batch [800/1000] time 1.561 (1.573) data 0.000 (0.002) loss 1.2998 (1.4726) acc 65.6250 (65.0547) lr 1.0000e-05 eta 21:29:33 +epoch [1/50] batch [805/1000] time 1.559 (1.573) data 0.000 (0.002) loss 1.4141 (1.4728) acc 65.6250 (65.0349) lr 1.0000e-05 eta 21:29:23 +epoch [1/50] batch [810/1000] time 1.556 (1.572) data 0.000 (0.002) loss 1.8848 (1.4729) acc 56.2500 (65.0231) lr 1.0000e-05 eta 21:29:10 +epoch [1/50] batch [815/1000] time 1.550 (1.572) data 0.000 (0.002) loss 2.1309 (1.4737) acc 59.3750 (65.0307) lr 1.0000e-05 eta 21:28:57 +epoch [1/50] batch [820/1000] time 1.551 (1.572) data 0.000 (0.002) loss 1.7998 (1.4732) acc 59.3750 (65.0457) lr 1.0000e-05 eta 21:28:48 +epoch [1/50] batch [825/1000] time 1.575 (1.572) data 0.000 (0.002) loss 1.6855 (1.4714) acc 68.7500 (65.0909) lr 1.0000e-05 eta 21:28:37 +epoch [1/50] batch [830/1000] time 1.556 (1.572) data 0.000 (0.002) loss 1.5244 (1.4701) acc 56.2500 (65.0866) lr 1.0000e-05 eta 21:28:26 +epoch [1/50] batch [835/1000] time 1.579 (1.572) data 0.000 (0.002) loss 1.5947 (1.4679) acc 68.7500 (65.1534) lr 1.0000e-05 eta 21:28:25 +epoch [1/50] batch [840/1000] time 1.548 (1.572) data 0.000 (0.002) loss 1.3008 (1.4663) acc 71.8750 (65.1860) lr 1.0000e-05 eta 21:28:13 +epoch [1/50] batch [845/1000] time 1.557 (1.572) data 0.000 (0.002) loss 2.3809 (1.4681) acc 46.8750 (65.1627) lr 1.0000e-05 eta 21:28:01 +epoch [1/50] batch [850/1000] time 1.560 (1.572) data 0.001 (0.002) loss 1.3242 (1.4669) acc 68.7500 (65.1801) lr 1.0000e-05 eta 21:27:49 +epoch [1/50] batch [855/1000] time 1.568 (1.572) data 0.001 (0.002) loss 1.1865 (1.4651) acc 71.8750 (65.2230) lr 1.0000e-05 eta 21:27:37 +epoch [1/50] batch [860/1000] time 1.552 (1.572) data 0.000 (0.002) loss 0.8887 (1.4652) acc 68.7500 (65.1817) lr 1.0000e-05 eta 21:27:23 +epoch [1/50] batch [865/1000] time 1.588 (1.572) data 0.001 (0.002) loss 1.3867 (1.4652) acc 59.3750 (65.2059) lr 1.0000e-05 eta 21:27:17 +epoch [1/50] batch [870/1000] time 1.552 (1.572) data 0.000 (0.002) loss 1.5283 (1.4657) acc 56.2500 (65.1868) lr 1.0000e-05 eta 21:27:07 +epoch [1/50] batch [875/1000] time 1.567 (1.572) data 0.001 (0.002) loss 1.3828 (1.4633) acc 68.7500 (65.2321) lr 1.0000e-05 eta 21:27:00 +epoch [1/50] batch [880/1000] time 1.570 (1.572) data 0.000 (0.002) loss 1.9609 (1.4632) acc 59.3750 (65.2450) lr 1.0000e-05 eta 21:26:56 +epoch [1/50] batch [885/1000] time 1.562 (1.572) data 0.000 (0.002) loss 1.3057 (1.4635) acc 68.7500 (65.2401) lr 1.0000e-05 eta 21:26:45 +epoch [1/50] batch [890/1000] time 1.554 (1.572) data 0.000 (0.002) loss 0.8892 (1.4619) acc 78.1250 (65.2949) lr 1.0000e-05 eta 21:26:35 +epoch [1/50] batch [895/1000] time 1.584 (1.572) data 0.001 (0.002) loss 1.2314 (1.4613) acc 71.8750 (65.3038) lr 1.0000e-05 eta 21:26:27 +epoch [1/50] batch [900/1000] time 1.542 (1.572) data 0.001 (0.002) loss 1.4355 (1.4617) acc 65.6250 (65.3125) lr 1.0000e-05 eta 21:26:16 +epoch [1/50] batch [905/1000] time 1.568 (1.572) data 0.000 (0.002) loss 1.5869 (1.4608) acc 56.2500 (65.3315) lr 1.0000e-05 eta 21:26:03 +epoch [1/50] batch [910/1000] time 1.551 (1.572) data 0.000 (0.002) loss 1.2041 (1.4600) acc 65.6250 (65.3434) lr 1.0000e-05 eta 21:25:51 +epoch [1/50] batch [915/1000] time 1.555 (1.572) data 0.000 (0.002) loss 1.3105 (1.4607) acc 75.0000 (65.3484) lr 1.0000e-05 eta 21:25:40 +epoch [1/50] batch [920/1000] time 1.562 (1.572) data 0.001 (0.002) loss 1.2646 (1.4591) acc 65.6250 (65.3668) lr 1.0000e-05 eta 21:25:39 +epoch [1/50] batch [925/1000] time 1.552 (1.572) data 0.000 (0.002) loss 0.9512 (1.4590) acc 68.7500 (65.3682) lr 1.0000e-05 eta 21:25:27 +epoch [1/50] batch [930/1000] time 1.552 (1.572) data 0.000 (0.002) loss 0.9897 (1.4598) acc 65.6250 (65.3495) lr 1.0000e-05 eta 21:25:15 +epoch [1/50] batch [935/1000] time 1.564 (1.571) data 0.000 (0.002) loss 2.1953 (1.4623) acc 53.1250 (65.2941) lr 1.0000e-05 eta 21:25:03 +epoch [1/50] batch [940/1000] time 1.566 (1.571) data 0.000 (0.002) loss 1.3955 (1.4601) acc 68.7500 (65.3324) lr 1.0000e-05 eta 21:24:52 +epoch [1/50] batch [945/1000] time 1.553 (1.571) data 0.001 (0.002) loss 1.3311 (1.4622) acc 71.8750 (65.3009) lr 1.0000e-05 eta 21:24:39 +epoch [1/50] batch [950/1000] time 1.537 (1.571) data 0.000 (0.002) loss 1.5361 (1.4607) acc 68.7500 (65.3487) lr 1.0000e-05 eta 21:24:25 +epoch [1/50] batch [955/1000] time 1.560 (1.571) data 0.000 (0.002) loss 1.9131 (1.4605) acc 56.2500 (65.3632) lr 1.0000e-05 eta 21:24:14 +epoch [1/50] batch [960/1000] time 1.568 (1.571) data 0.000 (0.002) loss 1.1133 (1.4583) acc 75.0000 (65.4069) lr 1.0000e-05 eta 21:24:02 +epoch [1/50] batch [965/1000] time 1.551 (1.571) data 0.000 (0.002) loss 1.0977 (1.4570) acc 68.7500 (65.4404) lr 1.0000e-05 eta 21:23:46 +epoch [1/50] batch [970/1000] time 1.542 (1.571) data 0.000 (0.002) loss 1.4453 (1.4572) acc 68.7500 (65.4510) lr 1.0000e-05 eta 21:23:37 +epoch [1/50] batch [975/1000] time 1.567 (1.571) data 0.001 (0.002) loss 1.6934 (1.4581) acc 62.5000 (65.4359) lr 1.0000e-05 eta 21:23:27 +epoch [1/50] batch [980/1000] time 1.606 (1.571) data 0.001 (0.002) loss 1.2520 (1.4571) acc 81.2500 (65.4719) lr 1.0000e-05 eta 21:23:18 +epoch [1/50] batch [985/1000] time 1.560 (1.571) data 0.001 (0.002) loss 0.9692 (1.4557) acc 75.0000 (65.4791) lr 1.0000e-05 eta 21:23:14 +epoch [1/50] batch [990/1000] time 1.557 (1.571) data 0.000 (0.002) loss 1.6660 (1.4568) acc 59.3750 (65.4545) lr 1.0000e-05 eta 21:23:04 +epoch [1/50] batch [995/1000] time 1.567 (1.571) data 0.000 (0.002) loss 1.8740 (1.4564) acc 56.2500 (65.4617) lr 1.0000e-05 eta 21:22:54 +epoch [1/50] batch [1000/1000] time 1.560 (1.571) data 0.000 (0.002) loss 0.9551 (1.4552) acc 71.8750 (65.4688) lr 2.0000e-03 eta 21:22:45 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 37,535 +* accuracy: 75.1% +* error: 24.9% +* macro_f1: 74.3% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [2/50] batch [5/1000] time 1.532 (1.724) data 0.000 (0.170) loss 1.1953 (1.4148) acc 65.6250 (66.2500) lr 2.0000e-03 eta 23:27:35 +epoch [2/50] batch [10/1000] time 1.562 (1.636) data 0.001 (0.085) loss 1.6836 (1.4440) acc 56.2500 (66.5625) lr 2.0000e-03 eta 22:16:03 +epoch [2/50] batch [15/1000] time 1.536 (1.609) data 0.000 (0.057) loss 2.1309 (1.4900) acc 62.5000 (66.6667) lr 2.0000e-03 eta 21:53:39 +epoch [2/50] batch [20/1000] time 1.555 (1.600) data 0.001 (0.043) loss 0.9731 (1.4653) acc 71.8750 (66.4062) lr 2.0000e-03 eta 21:45:52 +epoch [2/50] batch [25/1000] time 1.587 (1.594) data 0.000 (0.034) loss 0.9907 (1.4076) acc 65.6250 (66.8750) lr 2.0000e-03 eta 21:41:17 +epoch [2/50] batch [30/1000] time 1.553 (1.590) data 0.001 (0.029) loss 1.0518 (1.3659) acc 68.7500 (67.7083) lr 2.0000e-03 eta 21:37:20 +epoch [2/50] batch [35/1000] time 1.556 (1.585) data 0.001 (0.025) loss 0.7041 (1.3660) acc 81.2500 (67.5000) lr 2.0000e-03 eta 21:33:25 +epoch [2/50] batch [40/1000] time 1.557 (1.581) data 0.000 (0.022) loss 1.4395 (1.3722) acc 65.6250 (67.1875) lr 2.0000e-03 eta 21:29:53 +epoch [2/50] batch [45/1000] time 1.562 (1.578) data 0.001 (0.019) loss 1.9824 (1.3866) acc 56.2500 (66.8056) lr 2.0000e-03 eta 21:27:39 +epoch [2/50] batch [50/1000] time 1.584 (1.580) data 0.000 (0.018) loss 0.9009 (1.3722) acc 81.2500 (66.9375) lr 2.0000e-03 eta 21:29:16 +epoch [2/50] batch [55/1000] time 1.551 (1.577) data 0.000 (0.016) loss 1.3848 (1.3897) acc 62.5000 (66.7614) lr 2.0000e-03 eta 21:26:44 +epoch [2/50] batch [60/1000] time 1.564 (1.577) data 0.001 (0.015) loss 2.0254 (1.4114) acc 56.2500 (66.5625) lr 2.0000e-03 eta 21:26:20 +epoch [2/50] batch [65/1000] time 1.551 (1.577) data 0.001 (0.014) loss 1.0742 (1.4002) acc 68.7500 (66.6827) lr 2.0000e-03 eta 21:25:47 +epoch [2/50] batch [70/1000] time 1.541 (1.576) data 0.000 (0.013) loss 1.4502 (1.3822) acc 62.5000 (66.9196) lr 2.0000e-03 eta 21:25:03 +epoch [2/50] batch [75/1000] time 1.580 (1.574) data 0.001 (0.012) loss 0.9331 (1.3586) acc 81.2500 (67.5417) lr 2.0000e-03 eta 21:23:47 +epoch [2/50] batch [80/1000] time 1.561 (1.573) data 0.000 (0.011) loss 0.8877 (1.3394) acc 75.0000 (67.8516) lr 2.0000e-03 eta 21:22:52 +epoch [2/50] batch [85/1000] time 1.553 (1.573) data 0.000 (0.011) loss 1.8271 (1.3421) acc 68.7500 (67.7574) lr 2.0000e-03 eta 21:22:07 +epoch [2/50] batch [90/1000] time 1.567 (1.572) data 0.000 (0.010) loss 1.3115 (1.3297) acc 65.6250 (67.9167) lr 2.0000e-03 eta 21:21:41 +epoch [2/50] batch [95/1000] time 1.557 (1.572) data 0.000 (0.009) loss 1.3271 (1.3263) acc 71.8750 (68.0921) lr 2.0000e-03 eta 21:20:58 +epoch [2/50] batch [100/1000] time 1.550 (1.571) data 0.001 (0.009) loss 1.4473 (1.3294) acc 59.3750 (67.9688) lr 2.0000e-03 eta 21:20:03 +epoch [2/50] batch [105/1000] time 1.550 (1.571) data 0.001 (0.009) loss 1.3545 (1.3259) acc 68.7500 (68.0357) lr 2.0000e-03 eta 21:19:53 +epoch [2/50] batch [110/1000] time 1.557 (1.570) data 0.001 (0.008) loss 1.0781 (1.3224) acc 75.0000 (68.1250) lr 2.0000e-03 eta 21:19:27 +epoch [2/50] batch [115/1000] time 1.564 (1.570) data 0.000 (0.008) loss 1.4492 (1.3230) acc 68.7500 (68.1522) lr 2.0000e-03 eta 21:18:50 +epoch [2/50] batch [120/1000] time 1.541 (1.569) data 0.000 (0.008) loss 1.4473 (1.3174) acc 62.5000 (68.2812) lr 2.0000e-03 eta 21:18:15 +epoch [2/50] batch [125/1000] time 1.536 (1.569) data 0.001 (0.007) loss 1.6924 (1.3073) acc 65.6250 (68.6000) lr 2.0000e-03 eta 21:17:51 +epoch [2/50] batch [130/1000] time 1.572 (1.569) data 0.000 (0.007) loss 1.6182 (1.3030) acc 71.8750 (68.7019) lr 2.0000e-03 eta 21:17:41 +epoch [2/50] batch [135/1000] time 1.552 (1.569) data 0.001 (0.007) loss 1.4189 (1.3067) acc 65.6250 (68.5417) lr 2.0000e-03 eta 21:17:26 +epoch [2/50] batch [140/1000] time 1.558 (1.568) data 0.000 (0.007) loss 0.8872 (1.3032) acc 68.7500 (68.5268) lr 2.0000e-03 eta 21:17:06 +epoch [2/50] batch [145/1000] time 1.587 (1.568) data 0.000 (0.006) loss 1.4385 (1.3008) acc 56.2500 (68.3836) lr 2.0000e-03 eta 21:17:01 +epoch [2/50] batch [150/1000] time 1.552 (1.568) data 0.000 (0.006) loss 2.3809 (1.3067) acc 59.3750 (68.2917) lr 2.0000e-03 eta 21:16:43 +epoch [2/50] batch [155/1000] time 1.547 (1.569) data 0.001 (0.006) loss 1.3965 (1.3086) acc 62.5000 (68.2863) lr 2.0000e-03 eta 21:17:14 +epoch [2/50] batch [160/1000] time 1.574 (1.569) data 0.000 (0.006) loss 1.2803 (1.3059) acc 75.0000 (68.3398) lr 2.0000e-03 eta 21:17:03 +epoch [2/50] batch [165/1000] time 1.555 (1.569) data 0.000 (0.006) loss 1.3633 (1.3070) acc 65.6250 (68.2576) lr 2.0000e-03 eta 21:16:52 +epoch [2/50] batch [170/1000] time 1.584 (1.569) data 0.000 (0.005) loss 1.0693 (1.3085) acc 65.6250 (68.1434) lr 2.0000e-03 eta 21:16:34 +epoch [2/50] batch [175/1000] time 1.548 (1.568) data 0.000 (0.005) loss 1.3457 (1.3081) acc 59.3750 (68.1250) lr 2.0000e-03 eta 21:16:01 +epoch [2/50] batch [180/1000] time 1.550 (1.568) data 0.000 (0.005) loss 1.5205 (1.3052) acc 65.6250 (68.1424) lr 2.0000e-03 eta 21:15:44 +epoch [2/50] batch [185/1000] time 1.558 (1.568) data 0.000 (0.005) loss 0.9556 (1.3034) acc 75.0000 (68.2939) lr 2.0000e-03 eta 21:15:25 +epoch [2/50] batch [190/1000] time 1.561 (1.568) data 0.000 (0.005) loss 1.4160 (1.3059) acc 65.6250 (68.1743) lr 2.0000e-03 eta 21:15:15 +epoch [2/50] batch [195/1000] time 1.535 (1.567) data 0.001 (0.005) loss 1.1045 (1.3039) acc 71.8750 (68.2051) lr 2.0000e-03 eta 21:14:46 +epoch [2/50] batch [200/1000] time 1.536 (1.568) data 0.001 (0.005) loss 0.8379 (1.2964) acc 75.0000 (68.3438) lr 2.0000e-03 eta 21:14:59 +epoch [2/50] batch [205/1000] time 1.572 (1.568) data 0.000 (0.005) loss 1.5098 (1.3018) acc 56.2500 (68.2012) lr 2.0000e-03 eta 21:14:50 +epoch [2/50] batch [210/1000] time 1.553 (1.568) data 0.000 (0.005) loss 0.6797 (1.2986) acc 75.0000 (68.2589) lr 2.0000e-03 eta 21:14:43 +epoch [2/50] batch [215/1000] time 1.555 (1.567) data 0.000 (0.004) loss 1.0830 (1.2987) acc 71.8750 (68.3430) lr 2.0000e-03 eta 21:14:17 +epoch [2/50] batch [220/1000] time 1.582 (1.567) data 0.000 (0.004) loss 1.4297 (1.2959) acc 59.3750 (68.3949) lr 2.0000e-03 eta 21:14:06 +epoch [2/50] batch [225/1000] time 1.529 (1.567) data 0.000 (0.004) loss 1.1836 (1.2889) acc 68.7500 (68.5417) lr 2.0000e-03 eta 21:13:44 +epoch [2/50] batch [230/1000] time 1.562 (1.567) data 0.000 (0.004) loss 1.6787 (1.2957) acc 62.5000 (68.3967) lr 2.0000e-03 eta 21:13:28 +epoch [2/50] batch [235/1000] time 1.569 (1.567) data 0.000 (0.004) loss 1.6807 (1.3023) acc 65.6250 (68.3644) lr 2.0000e-03 eta 21:13:20 +epoch [2/50] batch [240/1000] time 1.731 (1.567) data 0.000 (0.004) loss 1.5215 (1.3064) acc 56.2500 (68.2292) lr 2.0000e-03 eta 21:13:40 +epoch [2/50] batch [245/1000] time 1.556 (1.567) data 0.000 (0.004) loss 0.7769 (1.3038) acc 84.3750 (68.3163) lr 2.0000e-03 eta 21:13:27 +epoch [2/50] batch [250/1000] time 1.556 (1.567) data 0.000 (0.004) loss 1.1250 (1.3033) acc 81.2500 (68.2875) lr 2.0000e-03 eta 21:13:11 +epoch [2/50] batch [255/1000] time 1.565 (1.567) data 0.000 (0.004) loss 1.0771 (1.2987) acc 59.3750 (68.3578) lr 2.0000e-03 eta 21:13:08 +epoch [2/50] batch [260/1000] time 1.581 (1.567) data 0.001 (0.004) loss 0.5649 (1.2947) acc 81.2500 (68.4976) lr 2.0000e-03 eta 21:13:03 +epoch [2/50] batch [265/1000] time 1.555 (1.567) data 0.001 (0.004) loss 1.3984 (1.2940) acc 71.8750 (68.5613) lr 2.0000e-03 eta 21:12:49 +epoch [2/50] batch [270/1000] time 1.576 (1.567) data 0.001 (0.004) loss 1.1426 (1.2954) acc 68.7500 (68.5185) lr 2.0000e-03 eta 21:12:39 +epoch [2/50] batch [275/1000] time 1.542 (1.567) data 0.000 (0.004) loss 1.5625 (1.2972) acc 65.6250 (68.5227) lr 2.0000e-03 eta 21:12:22 +epoch [2/50] batch [280/1000] time 1.574 (1.567) data 0.000 (0.004) loss 1.2295 (1.2966) acc 71.8750 (68.5379) lr 2.0000e-03 eta 21:12:07 +epoch [2/50] batch [285/1000] time 1.554 (1.567) data 0.000 (0.003) loss 2.0664 (1.2966) acc 53.1250 (68.5746) lr 2.0000e-03 eta 21:11:54 +epoch [2/50] batch [290/1000] time 1.545 (1.566) data 0.000 (0.003) loss 1.1191 (1.2978) acc 68.7500 (68.4914) lr 2.0000e-03 eta 21:11:41 +epoch [2/50] batch [295/1000] time 1.570 (1.566) data 0.001 (0.003) loss 1.7344 (1.2992) acc 59.3750 (68.5064) lr 2.0000e-03 eta 21:11:27 +epoch [2/50] batch [300/1000] time 1.532 (1.566) data 0.001 (0.003) loss 1.5850 (1.2989) acc 71.8750 (68.5208) lr 2.0000e-03 eta 21:11:06 +epoch [2/50] batch [305/1000] time 1.550 (1.566) data 0.000 (0.003) loss 1.0107 (1.2953) acc 68.7500 (68.5246) lr 2.0000e-03 eta 21:11:17 +epoch [2/50] batch [310/1000] time 1.570 (1.566) data 0.000 (0.003) loss 1.3271 (1.2910) acc 68.7500 (68.5887) lr 2.0000e-03 eta 21:11:09 +epoch [2/50] batch [315/1000] time 1.553 (1.566) data 0.000 (0.003) loss 1.2666 (1.2900) acc 75.0000 (68.5714) lr 2.0000e-03 eta 21:10:56 +epoch [2/50] batch [320/1000] time 1.559 (1.566) data 0.000 (0.003) loss 1.6084 (1.2899) acc 62.5000 (68.5449) lr 2.0000e-03 eta 21:10:41 +epoch [2/50] batch [325/1000] time 1.552 (1.566) data 0.000 (0.003) loss 0.8291 (1.2890) acc 78.1250 (68.5192) lr 2.0000e-03 eta 21:10:21 +epoch [2/50] batch [330/1000] time 1.561 (1.566) data 0.001 (0.003) loss 1.0625 (1.2860) acc 71.8750 (68.5795) lr 2.0000e-03 eta 21:10:09 +epoch [2/50] batch [335/1000] time 1.573 (1.566) data 0.001 (0.003) loss 1.2461 (1.2860) acc 68.7500 (68.5448) lr 2.0000e-03 eta 21:09:57 +epoch [2/50] batch [340/1000] time 1.557 (1.566) data 0.000 (0.003) loss 1.6631 (1.2848) acc 65.6250 (68.5754) lr 2.0000e-03 eta 21:09:49 +epoch [2/50] batch [345/1000] time 1.558 (1.566) data 0.000 (0.003) loss 0.9927 (1.2817) acc 65.6250 (68.6775) lr 2.0000e-03 eta 21:09:41 +epoch [2/50] batch [350/1000] time 1.565 (1.566) data 0.000 (0.003) loss 2.0410 (1.2858) acc 59.3750 (68.6250) lr 2.0000e-03 eta 21:09:50 +epoch [2/50] batch [355/1000] time 1.577 (1.566) data 0.001 (0.003) loss 1.5381 (1.2853) acc 68.7500 (68.6268) lr 2.0000e-03 eta 21:09:42 +epoch [2/50] batch [360/1000] time 1.564 (1.566) data 0.000 (0.003) loss 1.6533 (1.2850) acc 50.0000 (68.5938) lr 2.0000e-03 eta 21:09:24 +epoch [2/50] batch [365/1000] time 1.547 (1.566) data 0.000 (0.003) loss 1.1982 (1.2856) acc 75.0000 (68.5616) lr 2.0000e-03 eta 21:09:10 +epoch [2/50] batch [370/1000] time 1.550 (1.566) data 0.001 (0.003) loss 1.2461 (1.2843) acc 71.8750 (68.6233) lr 2.0000e-03 eta 21:09:06 +epoch [2/50] batch [375/1000] time 1.558 (1.566) data 0.001 (0.003) loss 1.0576 (1.2836) acc 71.8750 (68.6083) lr 2.0000e-03 eta 21:08:57 +epoch [2/50] batch [380/1000] time 1.605 (1.566) data 0.000 (0.003) loss 1.4717 (1.2823) acc 78.1250 (68.6184) lr 2.0000e-03 eta 21:08:53 +epoch [2/50] batch [385/1000] time 1.561 (1.566) data 0.000 (0.003) loss 0.5254 (1.2822) acc 84.3750 (68.6607) lr 2.0000e-03 eta 21:08:41 +epoch [2/50] batch [390/1000] time 1.588 (1.566) data 0.001 (0.003) loss 1.8691 (1.2811) acc 62.5000 (68.6779) lr 2.0000e-03 eta 21:08:37 +epoch [2/50] batch [395/1000] time 1.567 (1.566) data 0.000 (0.003) loss 0.9927 (1.2796) acc 75.0000 (68.7104) lr 2.0000e-03 eta 21:08:40 +epoch [2/50] batch [400/1000] time 1.559 (1.566) data 0.000 (0.003) loss 1.6689 (1.2796) acc 59.3750 (68.6953) lr 2.0000e-03 eta 21:08:30 +epoch [2/50] batch [405/1000] time 1.577 (1.566) data 0.000 (0.003) loss 1.3799 (1.2770) acc 68.7500 (68.7423) lr 2.0000e-03 eta 21:08:20 +epoch [2/50] batch [410/1000] time 1.554 (1.566) data 0.000 (0.003) loss 1.2793 (1.2797) acc 68.7500 (68.7043) lr 2.0000e-03 eta 21:08:09 +epoch [2/50] batch [415/1000] time 1.554 (1.566) data 0.001 (0.003) loss 0.6885 (1.2752) acc 78.1250 (68.7199) lr 2.0000e-03 eta 21:07:56 +epoch [2/50] batch [420/1000] time 1.536 (1.566) data 0.000 (0.002) loss 1.1582 (1.2754) acc 68.7500 (68.6979) lr 2.0000e-03 eta 21:07:41 +epoch [2/50] batch [425/1000] time 1.556 (1.566) data 0.000 (0.002) loss 1.5205 (1.2753) acc 62.5000 (68.7132) lr 2.0000e-03 eta 21:07:29 +epoch [2/50] batch [430/1000] time 1.575 (1.566) data 0.001 (0.002) loss 1.5430 (1.2743) acc 53.1250 (68.6773) lr 2.0000e-03 eta 21:07:23 +epoch [2/50] batch [435/1000] time 1.579 (1.566) data 0.000 (0.002) loss 0.9072 (1.2720) acc 78.1250 (68.7069) lr 2.0000e-03 eta 21:07:11 +epoch [2/50] batch [440/1000] time 1.559 (1.566) data 0.000 (0.002) loss 1.6240 (1.2700) acc 65.6250 (68.7784) lr 2.0000e-03 eta 21:07:05 +epoch [2/50] batch [445/1000] time 1.574 (1.566) data 0.000 (0.002) loss 1.2363 (1.2703) acc 68.7500 (68.7711) lr 2.0000e-03 eta 21:06:55 +epoch [2/50] batch [450/1000] time 1.539 (1.566) data 0.000 (0.002) loss 0.8599 (1.2689) acc 81.2500 (68.7847) lr 2.0000e-03 eta 21:06:46 +epoch [2/50] batch [455/1000] time 1.561 (1.566) data 0.000 (0.002) loss 1.8809 (1.2695) acc 50.0000 (68.7363) lr 2.0000e-03 eta 21:06:54 +epoch [2/50] batch [460/1000] time 1.564 (1.566) data 0.001 (0.002) loss 0.9922 (1.2692) acc 75.0000 (68.7296) lr 2.0000e-03 eta 21:06:43 +epoch [2/50] batch [465/1000] time 1.531 (1.566) data 0.000 (0.002) loss 1.5596 (1.2668) acc 65.6250 (68.8105) lr 2.0000e-03 eta 21:06:24 +epoch [2/50] batch [470/1000] time 1.574 (1.565) data 0.000 (0.002) loss 1.3438 (1.2672) acc 59.3750 (68.7899) lr 2.0000e-03 eta 21:06:11 +epoch [2/50] batch [475/1000] time 1.544 (1.565) data 0.001 (0.002) loss 1.2559 (1.2675) acc 71.8750 (68.7829) lr 2.0000e-03 eta 21:05:54 +epoch [2/50] batch [480/1000] time 1.555 (1.565) data 0.000 (0.002) loss 1.0039 (1.2682) acc 81.2500 (68.7435) lr 2.0000e-03 eta 21:05:45 +epoch [2/50] batch [485/1000] time 1.570 (1.565) data 0.000 (0.002) loss 1.2080 (1.2688) acc 65.6250 (68.7049) lr 2.0000e-03 eta 21:05:39 +epoch [2/50] batch [490/1000] time 1.579 (1.565) data 0.000 (0.002) loss 0.9067 (1.2676) acc 78.1250 (68.7054) lr 2.0000e-03 eta 21:05:32 +epoch [2/50] batch [495/1000] time 1.566 (1.565) data 0.000 (0.002) loss 1.2910 (1.2645) acc 65.6250 (68.7626) lr 2.0000e-03 eta 21:05:21 +epoch [2/50] batch [500/1000] time 1.563 (1.565) data 0.001 (0.002) loss 1.6670 (1.2645) acc 65.6250 (68.7812) lr 2.0000e-03 eta 21:05:26 +epoch [2/50] batch [505/1000] time 1.564 (1.565) data 0.000 (0.002) loss 1.3076 (1.2640) acc 68.7500 (68.7500) lr 2.0000e-03 eta 21:05:12 +epoch [2/50] batch [510/1000] time 1.565 (1.565) data 0.000 (0.002) loss 1.1943 (1.2646) acc 75.0000 (68.7745) lr 2.0000e-03 eta 21:05:02 +epoch [2/50] batch [515/1000] time 1.569 (1.565) data 0.000 (0.002) loss 1.6338 (1.2644) acc 65.6250 (68.8167) lr 2.0000e-03 eta 21:04:56 +epoch [2/50] batch [520/1000] time 1.562 (1.565) data 0.001 (0.002) loss 1.9346 (1.2644) acc 65.6250 (68.8401) lr 2.0000e-03 eta 21:04:45 +epoch [2/50] batch [525/1000] time 1.554 (1.565) data 0.000 (0.002) loss 1.6924 (1.2640) acc 62.5000 (68.8393) lr 2.0000e-03 eta 21:04:29 +epoch [2/50] batch [530/1000] time 1.564 (1.565) data 0.000 (0.002) loss 0.9521 (1.2637) acc 71.8750 (68.8384) lr 2.0000e-03 eta 21:04:21 +epoch [2/50] batch [535/1000] time 1.545 (1.565) data 0.000 (0.002) loss 1.0244 (1.2638) acc 75.0000 (68.8376) lr 2.0000e-03 eta 21:04:04 +epoch [2/50] batch [540/1000] time 1.552 (1.565) data 0.000 (0.002) loss 1.8086 (1.2644) acc 53.1250 (68.8079) lr 2.0000e-03 eta 21:03:59 +epoch [2/50] batch [545/1000] time 1.543 (1.565) data 0.000 (0.002) loss 1.4697 (1.2681) acc 68.7500 (68.7328) lr 2.0000e-03 eta 21:04:07 +epoch [2/50] batch [550/1000] time 1.554 (1.565) data 0.000 (0.002) loss 1.2793 (1.2694) acc 62.5000 (68.7102) lr 2.0000e-03 eta 21:03:56 +epoch [2/50] batch [555/1000] time 1.565 (1.565) data 0.000 (0.002) loss 0.9419 (1.2693) acc 68.7500 (68.6768) lr 2.0000e-03 eta 21:03:49 +epoch [2/50] batch [560/1000] time 1.573 (1.565) data 0.000 (0.002) loss 1.1924 (1.2693) acc 68.7500 (68.6663) lr 2.0000e-03 eta 21:03:41 +epoch [2/50] batch [565/1000] time 1.569 (1.565) data 0.000 (0.002) loss 1.4688 (1.2679) acc 59.3750 (68.6726) lr 2.0000e-03 eta 21:03:30 +epoch [2/50] batch [570/1000] time 1.572 (1.565) data 0.000 (0.002) loss 1.4863 (1.2689) acc 68.7500 (68.6732) lr 2.0000e-03 eta 21:03:19 +epoch [2/50] batch [575/1000] time 1.588 (1.565) data 0.000 (0.002) loss 1.5078 (1.2684) acc 68.7500 (68.7011) lr 2.0000e-03 eta 21:03:08 +epoch [2/50] batch [580/1000] time 1.577 (1.565) data 0.000 (0.002) loss 0.8613 (1.2677) acc 75.0000 (68.7015) lr 2.0000e-03 eta 21:03:01 +epoch [2/50] batch [585/1000] time 1.547 (1.565) data 0.000 (0.002) loss 0.8291 (1.2671) acc 84.3750 (68.6966) lr 2.0000e-03 eta 21:02:50 +epoch [2/50] batch [590/1000] time 1.582 (1.565) data 0.000 (0.002) loss 0.7720 (1.2649) acc 81.2500 (68.7500) lr 2.0000e-03 eta 21:02:39 +epoch [2/50] batch [595/1000] time 1.569 (1.565) data 0.001 (0.002) loss 0.7378 (1.2634) acc 84.3750 (68.7710) lr 2.0000e-03 eta 21:02:31 +epoch [2/50] batch [600/1000] time 1.523 (1.565) data 0.000 (0.002) loss 1.4678 (1.2618) acc 65.6250 (68.8125) lr 2.0000e-03 eta 21:02:20 +epoch [2/50] batch [605/1000] time 1.717 (1.565) data 0.000 (0.002) loss 0.8271 (1.2609) acc 71.8750 (68.8275) lr 2.0000e-03 eta 21:02:19 +epoch [2/50] batch [610/1000] time 1.583 (1.565) data 0.000 (0.002) loss 1.4277 (1.2600) acc 68.7500 (68.8268) lr 2.0000e-03 eta 21:02:12 +epoch [2/50] batch [615/1000] time 1.569 (1.565) data 0.000 (0.002) loss 1.5098 (1.2599) acc 62.5000 (68.8364) lr 2.0000e-03 eta 21:02:03 +epoch [2/50] batch [620/1000] time 1.573 (1.565) data 0.000 (0.002) loss 1.2979 (1.2611) acc 65.6250 (68.7853) lr 2.0000e-03 eta 21:01:59 +epoch [2/50] batch [625/1000] time 1.544 (1.565) data 0.000 (0.002) loss 1.0967 (1.2613) acc 75.0000 (68.8000) lr 2.0000e-03 eta 21:01:47 +epoch [2/50] batch [630/1000] time 1.572 (1.565) data 0.000 (0.002) loss 1.1602 (1.2620) acc 71.8750 (68.7996) lr 2.0000e-03 eta 21:01:42 +epoch [2/50] batch [635/1000] time 1.577 (1.565) data 0.001 (0.002) loss 0.9507 (1.2616) acc 81.2500 (68.7943) lr 2.0000e-03 eta 21:01:29 +epoch [2/50] batch [640/1000] time 1.560 (1.565) data 0.000 (0.002) loss 0.7476 (1.2618) acc 81.2500 (68.7695) lr 2.0000e-03 eta 21:01:22 +epoch [2/50] batch [645/1000] time 1.543 (1.565) data 0.000 (0.002) loss 1.2236 (1.2608) acc 75.0000 (68.8178) lr 2.0000e-03 eta 21:01:13 +epoch [2/50] batch [650/1000] time 1.712 (1.565) data 0.000 (0.002) loss 1.2344 (1.2589) acc 68.7500 (68.8702) lr 2.0000e-03 eta 21:01:12 +epoch [2/50] batch [655/1000] time 1.559 (1.565) data 0.000 (0.002) loss 1.5146 (1.2582) acc 56.2500 (68.8788) lr 2.0000e-03 eta 21:01:02 +epoch [2/50] batch [660/1000] time 1.590 (1.565) data 0.000 (0.002) loss 0.5264 (1.2546) acc 87.5000 (68.9678) lr 2.0000e-03 eta 21:00:52 +epoch [2/50] batch [665/1000] time 1.552 (1.565) data 0.000 (0.002) loss 1.0312 (1.2539) acc 75.0000 (69.0038) lr 2.0000e-03 eta 21:00:44 +epoch [2/50] batch [670/1000] time 1.546 (1.565) data 0.000 (0.002) loss 1.4678 (1.2531) acc 68.7500 (69.0159) lr 2.0000e-03 eta 21:00:36 +epoch [2/50] batch [675/1000] time 1.552 (1.565) data 0.000 (0.002) loss 0.8774 (1.2538) acc 75.0000 (69.0093) lr 2.0000e-03 eta 21:00:24 +epoch [2/50] batch [680/1000] time 1.545 (1.565) data 0.000 (0.002) loss 1.4707 (1.2549) acc 65.6250 (68.9798) lr 2.0000e-03 eta 21:00:12 +epoch [2/50] batch [685/1000] time 1.550 (1.565) data 0.000 (0.002) loss 1.1484 (1.2543) acc 75.0000 (68.9827) lr 2.0000e-03 eta 21:00:01 +epoch [2/50] batch [690/1000] time 1.551 (1.565) data 0.000 (0.002) loss 1.4434 (1.2544) acc 65.6250 (69.0082) lr 2.0000e-03 eta 20:59:52 +epoch [2/50] batch [695/1000] time 1.556 (1.565) data 0.000 (0.002) loss 1.0674 (1.2533) acc 71.8750 (69.0378) lr 2.0000e-03 eta 20:59:51 +epoch [2/50] batch [700/1000] time 1.535 (1.565) data 0.000 (0.002) loss 1.5693 (1.2535) acc 56.2500 (69.0402) lr 2.0000e-03 eta 20:59:42 +epoch [2/50] batch [705/1000] time 1.569 (1.565) data 0.001 (0.002) loss 1.1768 (1.2527) acc 81.2500 (69.0603) lr 2.0000e-03 eta 20:59:33 +epoch [2/50] batch [710/1000] time 1.545 (1.565) data 0.000 (0.002) loss 1.0801 (1.2510) acc 71.8750 (69.1109) lr 2.0000e-03 eta 20:59:22 +epoch [2/50] batch [715/1000] time 1.555 (1.565) data 0.000 (0.002) loss 1.0566 (1.2501) acc 81.2500 (69.1302) lr 2.0000e-03 eta 20:59:12 +epoch [2/50] batch [720/1000] time 1.574 (1.565) data 0.000 (0.002) loss 1.3848 (1.2518) acc 78.1250 (69.1233) lr 2.0000e-03 eta 20:59:06 +epoch [2/50] batch [725/1000] time 1.551 (1.565) data 0.000 (0.002) loss 1.2695 (1.2520) acc 71.8750 (69.1250) lr 2.0000e-03 eta 20:58:56 +epoch [2/50] batch [730/1000] time 1.558 (1.565) data 0.000 (0.002) loss 0.9736 (1.2531) acc 68.7500 (69.0796) lr 2.0000e-03 eta 20:58:47 +epoch [2/50] batch [735/1000] time 1.542 (1.565) data 0.001 (0.002) loss 0.8853 (1.2528) acc 75.0000 (69.0646) lr 2.0000e-03 eta 20:58:36 +epoch [2/50] batch [740/1000] time 1.583 (1.565) data 0.001 (0.002) loss 0.9858 (1.2519) acc 75.0000 (69.0752) lr 2.0000e-03 eta 20:58:34 +epoch [2/50] batch [745/1000] time 1.553 (1.565) data 0.000 (0.002) loss 1.0713 (1.2520) acc 65.6250 (69.0898) lr 2.0000e-03 eta 20:58:22 +epoch [2/50] batch [750/1000] time 1.542 (1.565) data 0.000 (0.002) loss 1.4189 (1.2514) acc 59.3750 (69.1042) lr 2.0000e-03 eta 20:58:08 +epoch [2/50] batch [755/1000] time 1.556 (1.564) data 0.000 (0.002) loss 1.3730 (1.2534) acc 62.5000 (69.0397) lr 2.0000e-03 eta 20:57:57 +epoch [2/50] batch [760/1000] time 1.563 (1.565) data 0.001 (0.002) loss 1.1035 (1.2519) acc 75.0000 (69.0666) lr 2.0000e-03 eta 20:57:57 +epoch [2/50] batch [765/1000] time 1.576 (1.565) data 0.000 (0.002) loss 1.8164 (1.2530) acc 59.3750 (69.0482) lr 2.0000e-03 eta 20:57:50 +epoch [2/50] batch [770/1000] time 1.572 (1.565) data 0.000 (0.002) loss 1.0205 (1.2539) acc 78.1250 (69.0463) lr 2.0000e-03 eta 20:57:40 +epoch [2/50] batch [775/1000] time 1.570 (1.565) data 0.000 (0.002) loss 1.1289 (1.2545) acc 65.6250 (69.0282) lr 2.0000e-03 eta 20:57:30 +epoch [2/50] batch [780/1000] time 1.546 (1.564) data 0.000 (0.002) loss 1.1748 (1.2540) acc 68.7500 (69.0264) lr 2.0000e-03 eta 20:57:18 +epoch [2/50] batch [785/1000] time 1.537 (1.564) data 0.000 (0.002) loss 1.2148 (1.2542) acc 65.6250 (69.0088) lr 2.0000e-03 eta 20:57:07 +epoch [2/50] batch [790/1000] time 1.561 (1.564) data 0.000 (0.002) loss 1.1182 (1.2538) acc 75.0000 (69.0388) lr 2.0000e-03 eta 20:56:56 +epoch [2/50] batch [795/1000] time 1.568 (1.564) data 0.000 (0.002) loss 1.1855 (1.2537) acc 68.7500 (69.0684) lr 2.0000e-03 eta 20:56:47 +epoch [2/50] batch [800/1000] time 1.573 (1.564) data 0.000 (0.002) loss 1.1055 (1.2564) acc 68.7500 (69.0117) lr 2.0000e-03 eta 20:56:38 +epoch [2/50] batch [805/1000] time 1.559 (1.564) data 0.000 (0.002) loss 1.5264 (1.2587) acc 62.5000 (68.9868) lr 2.0000e-03 eta 20:56:39 +epoch [2/50] batch [810/1000] time 1.570 (1.564) data 0.000 (0.002) loss 0.8560 (1.2588) acc 71.8750 (68.9931) lr 2.0000e-03 eta 20:56:29 +epoch [2/50] batch [815/1000] time 1.556 (1.564) data 0.000 (0.001) loss 1.1172 (1.2587) acc 75.0000 (69.0069) lr 2.0000e-03 eta 20:56:17 +epoch [2/50] batch [820/1000] time 1.558 (1.564) data 0.000 (0.001) loss 1.4277 (1.2586) acc 65.6250 (69.0358) lr 2.0000e-03 eta 20:56:08 +epoch [2/50] batch [825/1000] time 1.560 (1.564) data 0.000 (0.001) loss 1.7646 (1.2599) acc 59.3750 (69.0152) lr 2.0000e-03 eta 20:55:59 +epoch [2/50] batch [830/1000] time 1.549 (1.564) data 0.000 (0.001) loss 1.4229 (1.2606) acc 71.8750 (68.9985) lr 2.0000e-03 eta 20:55:47 +epoch [2/50] batch [835/1000] time 1.569 (1.564) data 0.000 (0.001) loss 0.8101 (1.2616) acc 87.5000 (69.0120) lr 2.0000e-03 eta 20:55:40 +epoch [2/50] batch [840/1000] time 1.574 (1.564) data 0.000 (0.001) loss 1.1748 (1.2624) acc 68.7500 (69.0030) lr 2.0000e-03 eta 20:55:32 +epoch [2/50] batch [845/1000] time 1.561 (1.564) data 0.000 (0.001) loss 0.9927 (1.2605) acc 75.0000 (69.0274) lr 2.0000e-03 eta 20:55:31 +epoch [2/50] batch [850/1000] time 1.553 (1.564) data 0.001 (0.001) loss 1.6465 (1.2617) acc 62.5000 (69.0110) lr 2.0000e-03 eta 20:55:21 +epoch [2/50] batch [855/1000] time 1.551 (1.564) data 0.000 (0.001) loss 1.3613 (1.2644) acc 65.6250 (68.9766) lr 2.0000e-03 eta 20:55:13 +epoch [2/50] batch [860/1000] time 1.589 (1.564) data 0.000 (0.001) loss 1.0273 (1.2654) acc 71.8750 (68.9353) lr 2.0000e-03 eta 20:55:08 +epoch [2/50] batch [865/1000] time 1.549 (1.564) data 0.000 (0.001) loss 1.8184 (1.2656) acc 68.7500 (68.9306) lr 2.0000e-03 eta 20:54:58 +epoch [2/50] batch [870/1000] time 1.545 (1.564) data 0.001 (0.001) loss 1.0938 (1.2654) acc 75.0000 (68.9440) lr 2.0000e-03 eta 20:54:47 +epoch [2/50] batch [875/1000] time 1.556 (1.564) data 0.000 (0.001) loss 0.8296 (1.2638) acc 75.0000 (68.9536) lr 2.0000e-03 eta 20:54:37 +epoch [2/50] batch [880/1000] time 1.564 (1.564) data 0.000 (0.001) loss 1.0020 (1.2640) acc 75.0000 (68.9560) lr 2.0000e-03 eta 20:54:27 +epoch [2/50] batch [885/1000] time 1.566 (1.564) data 0.000 (0.001) loss 1.5752 (1.2641) acc 62.5000 (68.9548) lr 2.0000e-03 eta 20:54:19 +epoch [2/50] batch [890/1000] time 1.547 (1.564) data 0.000 (0.001) loss 1.1719 (1.2647) acc 68.7500 (68.9466) lr 2.0000e-03 eta 20:54:11 +epoch [2/50] batch [895/1000] time 1.559 (1.564) data 0.000 (0.001) loss 1.3828 (1.2664) acc 53.1250 (68.9071) lr 2.0000e-03 eta 20:54:02 +epoch [2/50] batch [900/1000] time 1.565 (1.564) data 0.000 (0.001) loss 1.8086 (1.2660) acc 65.6250 (68.9201) lr 2.0000e-03 eta 20:53:49 +epoch [2/50] batch [905/1000] time 1.576 (1.564) data 0.000 (0.001) loss 1.2441 (1.2664) acc 68.7500 (68.9157) lr 2.0000e-03 eta 20:53:43 +epoch [2/50] batch [910/1000] time 1.566 (1.564) data 0.000 (0.001) loss 1.3047 (1.2652) acc 71.8750 (68.9595) lr 2.0000e-03 eta 20:53:40 +epoch [2/50] batch [915/1000] time 1.576 (1.564) data 0.000 (0.001) loss 1.3701 (1.2659) acc 68.7500 (68.9515) lr 2.0000e-03 eta 20:53:32 +epoch [2/50] batch [920/1000] time 1.522 (1.564) data 0.001 (0.001) loss 1.5547 (1.2660) acc 71.8750 (68.9538) lr 2.0000e-03 eta 20:53:20 +epoch [2/50] batch [925/1000] time 1.542 (1.564) data 0.000 (0.001) loss 1.3975 (1.2656) acc 59.3750 (68.9459) lr 2.0000e-03 eta 20:53:11 +epoch [2/50] batch [930/1000] time 1.561 (1.564) data 0.001 (0.001) loss 1.1045 (1.2654) acc 75.0000 (68.9247) lr 2.0000e-03 eta 20:53:04 +epoch [2/50] batch [935/1000] time 1.561 (1.564) data 0.000 (0.001) loss 1.2715 (1.2654) acc 65.6250 (68.9271) lr 2.0000e-03 eta 20:52:56 +epoch [2/50] batch [940/1000] time 1.560 (1.564) data 0.000 (0.001) loss 1.1768 (1.2655) acc 75.0000 (68.9162) lr 2.0000e-03 eta 20:52:47 +epoch [2/50] batch [945/1000] time 1.548 (1.564) data 0.000 (0.001) loss 1.5332 (1.2656) acc 71.8750 (68.9153) lr 2.0000e-03 eta 20:52:39 +epoch [2/50] batch [950/1000] time 1.569 (1.564) data 0.000 (0.001) loss 0.9360 (1.2653) acc 75.0000 (68.9211) lr 2.0000e-03 eta 20:52:34 +epoch [2/50] batch [955/1000] time 1.549 (1.564) data 0.000 (0.001) loss 0.8174 (1.2647) acc 78.1250 (68.9103) lr 2.0000e-03 eta 20:52:30 +epoch [2/50] batch [960/1000] time 1.593 (1.564) data 0.001 (0.001) loss 1.2842 (1.2643) acc 75.0000 (68.9225) lr 2.0000e-03 eta 20:52:23 +epoch [2/50] batch [965/1000] time 1.560 (1.564) data 0.000 (0.001) loss 1.5293 (1.2643) acc 71.8750 (68.9184) lr 2.0000e-03 eta 20:52:13 +epoch [2/50] batch [970/1000] time 1.567 (1.564) data 0.000 (0.001) loss 1.3418 (1.2649) acc 68.7500 (68.9143) lr 2.0000e-03 eta 20:52:03 +epoch [2/50] batch [975/1000] time 1.556 (1.564) data 0.000 (0.001) loss 0.8062 (1.2638) acc 84.3750 (68.9712) lr 2.0000e-03 eta 20:51:52 +epoch [2/50] batch [980/1000] time 1.552 (1.564) data 0.000 (0.001) loss 1.3604 (1.2644) acc 68.7500 (68.9541) lr 2.0000e-03 eta 20:51:43 +epoch [2/50] batch [985/1000] time 1.549 (1.564) data 0.001 (0.001) loss 1.2314 (1.2641) acc 71.8750 (68.9721) lr 2.0000e-03 eta 20:51:31 +epoch [2/50] batch [990/1000] time 1.567 (1.564) data 0.000 (0.001) loss 1.2051 (1.2642) acc 62.5000 (68.9615) lr 2.0000e-03 eta 20:51:21 +epoch [2/50] batch [995/1000] time 1.554 (1.564) data 0.001 (0.001) loss 1.1875 (1.2639) acc 84.3750 (68.9761) lr 2.0000e-03 eta 20:51:13 +epoch [2/50] batch [1000/1000] time 1.558 (1.564) data 0.000 (0.001) loss 1.2979 (1.2654) acc 78.1250 (68.9656) lr 1.9980e-03 eta 20:51:04 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,575 +* accuracy: 77.2% +* error: 22.8% +* macro_f1: 76.5% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [3/50] batch [5/1000] time 1.527 (1.738) data 0.000 (0.188) loss 2.0801 (1.4611) acc 56.2500 (66.8750) lr 1.9980e-03 eta 23:10:29 +epoch [3/50] batch [10/1000] time 1.568 (1.651) data 0.000 (0.094) loss 0.9062 (1.3557) acc 75.0000 (67.1875) lr 1.9980e-03 eta 22:00:31 +epoch [3/50] batch [15/1000] time 1.585 (1.625) data 0.000 (0.063) loss 1.7275 (1.3215) acc 53.1250 (68.7500) lr 1.9980e-03 eta 21:39:25 +epoch [3/50] batch [20/1000] time 1.585 (1.609) data 0.001 (0.047) loss 1.7432 (1.3365) acc 59.3750 (69.2188) lr 1.9980e-03 eta 21:26:58 +epoch [3/50] batch [25/1000] time 1.543 (1.599) data 0.000 (0.038) loss 1.0498 (1.3341) acc 81.2500 (69.6250) lr 1.9980e-03 eta 21:18:30 +epoch [3/50] batch [30/1000] time 1.567 (1.592) data 0.001 (0.032) loss 0.7441 (1.2851) acc 78.1250 (70.0000) lr 1.9980e-03 eta 21:12:26 +epoch [3/50] batch [35/1000] time 1.573 (1.586) data 0.000 (0.027) loss 1.0879 (1.2679) acc 68.7500 (69.9107) lr 1.9980e-03 eta 21:08:05 +epoch [3/50] batch [40/1000] time 1.556 (1.585) data 0.000 (0.024) loss 0.8931 (1.2291) acc 71.8750 (70.1562) lr 1.9980e-03 eta 21:06:38 +epoch [3/50] batch [45/1000] time 1.535 (1.581) data 0.000 (0.021) loss 0.9663 (1.2088) acc 78.1250 (70.7639) lr 1.9980e-03 eta 21:03:25 +epoch [3/50] batch [50/1000] time 1.564 (1.578) data 0.000 (0.019) loss 0.9712 (1.1975) acc 78.1250 (70.6250) lr 1.9980e-03 eta 21:01:05 +epoch [3/50] batch [55/1000] time 1.552 (1.576) data 0.000 (0.018) loss 1.2061 (1.1926) acc 75.0000 (70.6818) lr 1.9980e-03 eta 20:59:16 +epoch [3/50] batch [60/1000] time 1.583 (1.575) data 0.001 (0.016) loss 1.2471 (1.2048) acc 59.3750 (70.1562) lr 1.9980e-03 eta 20:58:13 +epoch [3/50] batch [65/1000] time 1.572 (1.576) data 0.001 (0.015) loss 1.1865 (1.1946) acc 68.7500 (70.2885) lr 1.9980e-03 eta 20:58:54 +epoch [3/50] batch [70/1000] time 1.550 (1.574) data 0.000 (0.014) loss 1.6357 (1.1975) acc 56.2500 (70.1786) lr 1.9980e-03 eta 20:57:33 +epoch [3/50] batch [75/1000] time 1.575 (1.573) data 0.001 (0.013) loss 1.0840 (1.1822) acc 71.8750 (70.2500) lr 1.9980e-03 eta 20:56:34 +epoch [3/50] batch [80/1000] time 1.566 (1.573) data 0.000 (0.012) loss 1.4004 (1.1767) acc 62.5000 (70.4688) lr 1.9980e-03 eta 20:56:02 +epoch [3/50] batch [85/1000] time 1.558 (1.572) data 0.000 (0.012) loss 0.9800 (1.1810) acc 87.5000 (70.6250) lr 1.9980e-03 eta 20:55:06 +epoch [3/50] batch [90/1000] time 1.540 (1.571) data 0.000 (0.011) loss 2.1387 (1.1919) acc 62.5000 (70.6250) lr 1.9980e-03 eta 20:54:08 +epoch [3/50] batch [95/1000] time 1.547 (1.570) data 0.000 (0.010) loss 1.5986 (1.1956) acc 56.2500 (70.5263) lr 1.9980e-03 eta 20:53:20 +epoch [3/50] batch [100/1000] time 1.551 (1.569) data 0.000 (0.010) loss 1.3389 (1.1964) acc 78.1250 (70.8438) lr 1.9980e-03 eta 20:52:36 +epoch [3/50] batch [105/1000] time 1.563 (1.568) data 0.000 (0.009) loss 1.4404 (1.1889) acc 56.2500 (71.0417) lr 1.9980e-03 eta 20:51:57 +epoch [3/50] batch [110/1000] time 1.566 (1.569) data 0.000 (0.009) loss 1.5049 (1.1785) acc 68.7500 (71.3352) lr 1.9980e-03 eta 20:52:22 +epoch [3/50] batch [115/1000] time 1.563 (1.569) data 0.000 (0.009) loss 0.8179 (1.1801) acc 75.0000 (71.3043) lr 1.9980e-03 eta 20:51:49 +epoch [3/50] batch [120/1000] time 1.582 (1.568) data 0.000 (0.008) loss 1.2100 (1.1816) acc 62.5000 (71.0156) lr 1.9980e-03 eta 20:51:19 +epoch [3/50] batch [125/1000] time 1.559 (1.568) data 0.001 (0.008) loss 1.2080 (1.1803) acc 62.5000 (71.0250) lr 1.9980e-03 eta 20:50:49 +epoch [3/50] batch [130/1000] time 1.549 (1.567) data 0.000 (0.008) loss 1.2686 (1.1775) acc 71.8750 (71.0577) lr 1.9980e-03 eta 20:50:02 +epoch [3/50] batch [135/1000] time 1.561 (1.567) data 0.000 (0.007) loss 1.3770 (1.1807) acc 59.3750 (70.9954) lr 1.9980e-03 eta 20:49:40 +epoch [3/50] batch [140/1000] time 1.576 (1.566) data 0.000 (0.007) loss 0.9995 (1.1840) acc 68.7500 (70.9821) lr 1.9980e-03 eta 20:49:11 +epoch [3/50] batch [145/1000] time 1.545 (1.566) data 0.000 (0.007) loss 1.1289 (1.1900) acc 75.0000 (70.9052) lr 1.9980e-03 eta 20:48:41 +epoch [3/50] batch [150/1000] time 1.571 (1.565) data 0.000 (0.007) loss 1.3965 (1.1916) acc 62.5000 (70.7917) lr 1.9980e-03 eta 20:48:12 +epoch [3/50] batch [155/1000] time 1.532 (1.565) data 0.000 (0.006) loss 1.5488 (1.1900) acc 62.5000 (70.7056) lr 1.9980e-03 eta 20:47:35 +epoch [3/50] batch [160/1000] time 1.549 (1.564) data 0.000 (0.006) loss 1.4160 (1.1978) acc 68.7500 (70.6641) lr 1.9980e-03 eta 20:47:01 +epoch [3/50] batch [165/1000] time 1.581 (1.564) data 0.000 (0.006) loss 0.9897 (1.2016) acc 75.0000 (70.7008) lr 1.9980e-03 eta 20:46:50 +epoch [3/50] batch [170/1000] time 1.545 (1.563) data 0.000 (0.006) loss 0.9482 (1.2086) acc 78.1250 (70.6250) lr 1.9980e-03 eta 20:46:09 +epoch [3/50] batch [175/1000] time 1.541 (1.563) data 0.000 (0.006) loss 1.0332 (1.2121) acc 62.5000 (70.4821) lr 1.9980e-03 eta 20:45:30 +epoch [3/50] batch [180/1000] time 1.548 (1.562) data 0.000 (0.006) loss 1.2402 (1.2109) acc 75.0000 (70.5382) lr 1.9980e-03 eta 20:45:09 +epoch [3/50] batch [185/1000] time 1.562 (1.562) data 0.001 (0.005) loss 1.3623 (1.2062) acc 71.8750 (70.6926) lr 1.9980e-03 eta 20:45:03 +epoch [3/50] batch [190/1000] time 1.560 (1.562) data 0.000 (0.005) loss 1.9668 (1.2101) acc 59.3750 (70.5921) lr 1.9980e-03 eta 20:44:59 +epoch [3/50] batch [195/1000] time 1.557 (1.562) data 0.001 (0.005) loss 0.9326 (1.2093) acc 75.0000 (70.6250) lr 1.9980e-03 eta 20:44:40 +epoch [3/50] batch [200/1000] time 1.550 (1.562) data 0.000 (0.005) loss 1.6650 (1.2143) acc 62.5000 (70.4688) lr 1.9980e-03 eta 20:44:28 +epoch [3/50] batch [205/1000] time 1.558 (1.562) data 0.000 (0.005) loss 0.9419 (1.2156) acc 75.0000 (70.5030) lr 1.9980e-03 eta 20:44:21 +epoch [3/50] batch [210/1000] time 1.545 (1.562) data 0.000 (0.005) loss 1.1230 (1.2151) acc 68.7500 (70.5060) lr 1.9980e-03 eta 20:44:06 +epoch [3/50] batch [215/1000] time 1.548 (1.562) data 0.000 (0.005) loss 0.8740 (1.2119) acc 75.0000 (70.5669) lr 1.9980e-03 eta 20:44:12 +epoch [3/50] batch [220/1000] time 1.541 (1.562) data 0.000 (0.005) loss 1.0889 (1.2136) acc 81.2500 (70.5398) lr 1.9980e-03 eta 20:43:55 +epoch [3/50] batch [225/1000] time 1.553 (1.562) data 0.000 (0.005) loss 1.3203 (1.2175) acc 75.0000 (70.5278) lr 1.9980e-03 eta 20:43:33 +epoch [3/50] batch [230/1000] time 1.562 (1.562) data 0.000 (0.005) loss 0.8594 (1.2172) acc 75.0000 (70.5707) lr 1.9980e-03 eta 20:43:26 +epoch [3/50] batch [235/1000] time 1.578 (1.562) data 0.000 (0.004) loss 1.1738 (1.2161) acc 75.0000 (70.6649) lr 1.9980e-03 eta 20:43:27 +epoch [3/50] batch [240/1000] time 1.539 (1.562) data 0.000 (0.004) loss 1.6885 (1.2179) acc 53.1250 (70.6120) lr 1.9980e-03 eta 20:43:20 +epoch [3/50] batch [245/1000] time 1.543 (1.562) data 0.000 (0.004) loss 1.8232 (1.2200) acc 56.2500 (70.5357) lr 1.9980e-03 eta 20:43:09 +epoch [3/50] batch [250/1000] time 1.552 (1.562) data 0.000 (0.004) loss 1.1475 (1.2191) acc 71.8750 (70.5250) lr 1.9980e-03 eta 20:42:59 +epoch [3/50] batch [255/1000] time 1.539 (1.562) data 0.000 (0.004) loss 1.3174 (1.2138) acc 65.6250 (70.6127) lr 1.9980e-03 eta 20:42:56 +epoch [3/50] batch [260/1000] time 1.552 (1.562) data 0.000 (0.004) loss 0.9419 (1.2094) acc 68.7500 (70.6490) lr 1.9980e-03 eta 20:43:06 +epoch [3/50] batch [265/1000] time 1.557 (1.562) data 0.000 (0.004) loss 0.7778 (1.2116) acc 78.1250 (70.6840) lr 1.9980e-03 eta 20:42:45 +epoch [3/50] batch [270/1000] time 1.534 (1.562) data 0.000 (0.004) loss 0.9941 (1.2109) acc 65.6250 (70.6829) lr 1.9980e-03 eta 20:42:20 +epoch [3/50] batch [275/1000] time 1.600 (1.562) data 0.000 (0.004) loss 0.7314 (1.2099) acc 75.0000 (70.6477) lr 1.9980e-03 eta 20:42:17 +epoch [3/50] batch [280/1000] time 1.567 (1.562) data 0.000 (0.004) loss 1.0693 (1.2073) acc 75.0000 (70.6808) lr 1.9980e-03 eta 20:42:08 +epoch [3/50] batch [285/1000] time 1.561 (1.562) data 0.000 (0.004) loss 1.3516 (1.2097) acc 65.6250 (70.5482) lr 1.9980e-03 eta 20:41:58 +epoch [3/50] batch [290/1000] time 1.564 (1.562) data 0.001 (0.004) loss 0.9893 (1.2115) acc 81.2500 (70.5496) lr 1.9980e-03 eta 20:41:50 +epoch [3/50] batch [295/1000] time 1.555 (1.562) data 0.000 (0.004) loss 1.5771 (1.2132) acc 59.3750 (70.4979) lr 1.9980e-03 eta 20:41:39 +epoch [3/50] batch [300/1000] time 1.722 (1.562) data 0.001 (0.004) loss 1.8408 (1.2164) acc 56.2500 (70.4167) lr 1.9980e-03 eta 20:41:59 +epoch [3/50] batch [305/1000] time 1.579 (1.562) data 0.000 (0.003) loss 1.0654 (1.2189) acc 75.0000 (70.3176) lr 1.9980e-03 eta 20:41:54 +epoch [3/50] batch [310/1000] time 1.553 (1.562) data 0.000 (0.003) loss 1.4365 (1.2228) acc 62.5000 (70.2520) lr 1.9980e-03 eta 20:41:47 +epoch [3/50] batch [315/1000] time 1.589 (1.562) data 0.000 (0.003) loss 1.1143 (1.2251) acc 81.2500 (70.1984) lr 1.9980e-03 eta 20:41:47 +epoch [3/50] batch [320/1000] time 1.566 (1.563) data 0.000 (0.003) loss 1.4023 (1.2255) acc 65.6250 (70.2051) lr 1.9980e-03 eta 20:41:41 +epoch [3/50] batch [325/1000] time 1.568 (1.562) data 0.000 (0.003) loss 1.5156 (1.2291) acc 50.0000 (70.1346) lr 1.9980e-03 eta 20:41:27 +epoch [3/50] batch [330/1000] time 1.545 (1.562) data 0.000 (0.003) loss 1.3203 (1.2298) acc 62.5000 (70.1610) lr 1.9980e-03 eta 20:41:19 +epoch [3/50] batch [335/1000] time 1.568 (1.562) data 0.000 (0.003) loss 1.5645 (1.2338) acc 71.8750 (70.1119) lr 1.9980e-03 eta 20:41:13 +epoch [3/50] batch [340/1000] time 1.576 (1.563) data 0.000 (0.003) loss 0.9443 (1.2341) acc 75.0000 (70.0827) lr 1.9980e-03 eta 20:41:09 +epoch [3/50] batch [345/1000] time 1.569 (1.562) data 0.000 (0.003) loss 0.4917 (1.2336) acc 87.5000 (70.0996) lr 1.9980e-03 eta 20:41:00 +epoch [3/50] batch [350/1000] time 1.576 (1.563) data 0.001 (0.003) loss 1.6602 (1.2356) acc 65.6250 (70.0625) lr 1.9980e-03 eta 20:40:57 +epoch [3/50] batch [355/1000] time 1.584 (1.563) data 0.001 (0.003) loss 0.8398 (1.2354) acc 84.3750 (70.0704) lr 1.9980e-03 eta 20:40:48 +epoch [3/50] batch [360/1000] time 1.561 (1.563) data 0.000 (0.003) loss 0.7280 (1.2304) acc 78.1250 (70.1476) lr 1.9980e-03 eta 20:40:41 +epoch [3/50] batch [365/1000] time 1.557 (1.563) data 0.000 (0.003) loss 0.9565 (1.2289) acc 68.7500 (70.1627) lr 1.9980e-03 eta 20:40:50 +epoch [3/50] batch [370/1000] time 1.534 (1.563) data 0.000 (0.003) loss 1.2607 (1.2270) acc 71.8750 (70.2111) lr 1.9980e-03 eta 20:40:33 +epoch [3/50] batch [375/1000] time 1.549 (1.563) data 0.000 (0.003) loss 1.1582 (1.2266) acc 75.0000 (70.1833) lr 1.9980e-03 eta 20:40:24 +epoch [3/50] batch [380/1000] time 1.543 (1.563) data 0.001 (0.003) loss 1.5020 (1.2257) acc 62.5000 (70.1645) lr 1.9980e-03 eta 20:40:11 +epoch [3/50] batch [385/1000] time 1.535 (1.562) data 0.000 (0.003) loss 1.1377 (1.2272) acc 75.0000 (70.1218) lr 1.9980e-03 eta 20:39:54 +epoch [3/50] batch [390/1000] time 1.564 (1.562) data 0.000 (0.003) loss 1.6748 (1.2268) acc 71.8750 (70.1122) lr 1.9980e-03 eta 20:39:42 +epoch [3/50] batch [395/1000] time 1.551 (1.562) data 0.001 (0.003) loss 1.9619 (1.2288) acc 50.0000 (70.0396) lr 1.9980e-03 eta 20:39:36 +epoch [3/50] batch [400/1000] time 1.564 (1.562) data 0.000 (0.003) loss 2.0645 (1.2295) acc 65.6250 (70.0469) lr 1.9980e-03 eta 20:39:25 +epoch [3/50] batch [405/1000] time 1.551 (1.562) data 0.000 (0.003) loss 0.8145 (1.2292) acc 81.2500 (70.0540) lr 1.9980e-03 eta 20:39:15 +epoch [3/50] batch [410/1000] time 1.560 (1.563) data 0.000 (0.003) loss 1.0459 (1.2269) acc 78.1250 (70.1296) lr 1.9980e-03 eta 20:39:29 +epoch [3/50] batch [415/1000] time 1.564 (1.563) data 0.000 (0.003) loss 1.4795 (1.2276) acc 68.7500 (70.1054) lr 1.9980e-03 eta 20:39:23 +epoch [3/50] batch [420/1000] time 1.553 (1.563) data 0.000 (0.003) loss 0.9253 (1.2287) acc 75.0000 (70.1116) lr 1.9980e-03 eta 20:39:19 +epoch [3/50] batch [425/1000] time 1.564 (1.563) data 0.000 (0.003) loss 1.6338 (1.2281) acc 68.7500 (70.0956) lr 1.9980e-03 eta 20:39:07 +epoch [3/50] batch [430/1000] time 1.548 (1.563) data 0.001 (0.003) loss 0.8496 (1.2281) acc 84.3750 (70.0945) lr 1.9980e-03 eta 20:39:00 +epoch [3/50] batch [435/1000] time 1.584 (1.563) data 0.000 (0.003) loss 1.1279 (1.2268) acc 65.6250 (70.1149) lr 1.9980e-03 eta 20:38:53 +epoch [3/50] batch [440/1000] time 1.556 (1.563) data 0.000 (0.003) loss 0.9297 (1.2293) acc 75.0000 (70.1065) lr 1.9980e-03 eta 20:38:39 +epoch [3/50] batch [445/1000] time 1.570 (1.563) data 0.000 (0.003) loss 1.9004 (1.2313) acc 56.2500 (70.0843) lr 1.9980e-03 eta 20:38:28 +epoch [3/50] batch [450/1000] time 1.558 (1.563) data 0.000 (0.002) loss 1.1670 (1.2302) acc 78.1250 (70.1250) lr 1.9980e-03 eta 20:38:19 +epoch [3/50] batch [455/1000] time 1.577 (1.563) data 0.000 (0.002) loss 1.2031 (1.2312) acc 68.7500 (70.0549) lr 1.9980e-03 eta 20:38:28 +epoch [3/50] batch [460/1000] time 1.605 (1.563) data 0.001 (0.002) loss 1.0186 (1.2324) acc 71.8750 (69.9932) lr 1.9980e-03 eta 20:38:30 +epoch [3/50] batch [465/1000] time 1.538 (1.563) data 0.000 (0.002) loss 1.0762 (1.2332) acc 71.8750 (69.9395) lr 1.9980e-03 eta 20:38:24 +epoch [3/50] batch [470/1000] time 1.567 (1.563) data 0.000 (0.002) loss 1.3936 (1.2320) acc 62.5000 (69.8803) lr 1.9980e-03 eta 20:38:15 +epoch [3/50] batch [475/1000] time 1.564 (1.563) data 0.000 (0.002) loss 1.3262 (1.2312) acc 71.8750 (69.9079) lr 1.9980e-03 eta 20:38:07 +epoch [3/50] batch [480/1000] time 1.553 (1.563) data 0.000 (0.002) loss 0.8618 (1.2286) acc 75.0000 (69.9349) lr 1.9980e-03 eta 20:37:55 +epoch [3/50] batch [485/1000] time 1.562 (1.563) data 0.000 (0.002) loss 0.5293 (1.2290) acc 78.1250 (69.9162) lr 1.9980e-03 eta 20:37:42 +epoch [3/50] batch [490/1000] time 1.553 (1.563) data 0.000 (0.002) loss 1.2637 (1.2277) acc 68.7500 (69.9362) lr 1.9980e-03 eta 20:37:26 +epoch [3/50] batch [495/1000] time 1.549 (1.563) data 0.000 (0.002) loss 1.3877 (1.2290) acc 68.7500 (69.9369) lr 1.9980e-03 eta 20:37:18 +epoch [3/50] batch [500/1000] time 1.578 (1.563) data 0.001 (0.002) loss 0.9663 (1.2319) acc 75.0000 (69.8937) lr 1.9980e-03 eta 20:37:05 +epoch [3/50] batch [505/1000] time 1.539 (1.562) data 0.000 (0.002) loss 0.8657 (1.2312) acc 78.1250 (69.9443) lr 1.9980e-03 eta 20:36:49 +epoch [3/50] batch [510/1000] time 1.540 (1.562) data 0.000 (0.002) loss 1.1895 (1.2306) acc 68.7500 (69.9449) lr 1.9980e-03 eta 20:36:39 +epoch [3/50] batch [515/1000] time 1.574 (1.563) data 0.000 (0.002) loss 0.9258 (1.2290) acc 68.7500 (69.9515) lr 1.9980e-03 eta 20:36:45 +epoch [3/50] batch [520/1000] time 1.556 (1.563) data 0.000 (0.002) loss 0.8320 (1.2284) acc 75.0000 (69.9279) lr 1.9980e-03 eta 20:36:36 +epoch [3/50] batch [525/1000] time 1.551 (1.563) data 0.000 (0.002) loss 1.4912 (1.2276) acc 68.7500 (69.9286) lr 1.9980e-03 eta 20:36:21 +epoch [3/50] batch [530/1000] time 1.568 (1.563) data 0.000 (0.002) loss 0.8857 (1.2264) acc 65.6250 (69.9175) lr 1.9980e-03 eta 20:36:15 +epoch [3/50] batch [535/1000] time 1.561 (1.563) data 0.000 (0.002) loss 1.0723 (1.2259) acc 68.7500 (69.9007) lr 1.9980e-03 eta 20:36:12 +epoch [3/50] batch [540/1000] time 1.559 (1.563) data 0.000 (0.002) loss 1.1211 (1.2257) acc 81.2500 (69.9306) lr 1.9980e-03 eta 20:36:05 +epoch [3/50] batch [545/1000] time 1.561 (1.563) data 0.000 (0.002) loss 1.3652 (1.2263) acc 59.3750 (69.9140) lr 1.9980e-03 eta 20:35:55 +epoch [3/50] batch [550/1000] time 1.540 (1.563) data 0.001 (0.002) loss 1.5713 (1.2282) acc 59.3750 (69.8864) lr 1.9980e-03 eta 20:35:40 +epoch [3/50] batch [555/1000] time 1.545 (1.562) data 0.000 (0.002) loss 1.5557 (1.2270) acc 62.5000 (69.8930) lr 1.9980e-03 eta 20:35:29 +epoch [3/50] batch [560/1000] time 1.582 (1.563) data 0.000 (0.002) loss 1.3662 (1.2257) acc 56.2500 (69.8717) lr 1.9980e-03 eta 20:35:29 +epoch [3/50] batch [565/1000] time 1.529 (1.562) data 0.000 (0.002) loss 1.1113 (1.2237) acc 65.6250 (69.9060) lr 1.9980e-03 eta 20:35:10 +epoch [3/50] batch [570/1000] time 1.554 (1.562) data 0.000 (0.002) loss 1.0664 (1.2231) acc 68.7500 (69.8958) lr 1.9980e-03 eta 20:34:59 +epoch [3/50] batch [575/1000] time 1.532 (1.562) data 0.000 (0.002) loss 1.6367 (1.2240) acc 59.3750 (69.8424) lr 1.9980e-03 eta 20:34:50 +epoch [3/50] batch [580/1000] time 1.562 (1.562) data 0.001 (0.002) loss 1.0801 (1.2234) acc 71.8750 (69.8491) lr 1.9980e-03 eta 20:34:38 +epoch [3/50] batch [585/1000] time 1.549 (1.562) data 0.001 (0.002) loss 1.2168 (1.2237) acc 65.6250 (69.8024) lr 1.9980e-03 eta 20:34:33 +epoch [3/50] batch [590/1000] time 1.547 (1.562) data 0.000 (0.002) loss 1.3730 (1.2231) acc 53.1250 (69.8252) lr 1.9980e-03 eta 20:34:22 +epoch [3/50] batch [595/1000] time 1.568 (1.562) data 0.000 (0.002) loss 1.8623 (1.2236) acc 62.5000 (69.7899) lr 1.9980e-03 eta 20:34:11 +epoch [3/50] batch [600/1000] time 1.555 (1.562) data 0.001 (0.002) loss 0.9302 (1.2246) acc 75.0000 (69.7969) lr 1.9980e-03 eta 20:34:02 +epoch [3/50] batch [605/1000] time 1.559 (1.562) data 0.000 (0.002) loss 1.2988 (1.2243) acc 65.6250 (69.8037) lr 1.9980e-03 eta 20:34:07 +epoch [3/50] batch [610/1000] time 1.551 (1.562) data 0.001 (0.002) loss 0.8320 (1.2241) acc 78.1250 (69.8258) lr 1.9980e-03 eta 20:33:54 +epoch [3/50] batch [615/1000] time 1.549 (1.562) data 0.001 (0.002) loss 1.6465 (1.2260) acc 62.5000 (69.8120) lr 1.9980e-03 eta 20:33:48 +epoch [3/50] batch [620/1000] time 1.594 (1.562) data 0.000 (0.002) loss 1.3955 (1.2261) acc 56.2500 (69.7681) lr 1.9980e-03 eta 20:33:43 +epoch [3/50] batch [625/1000] time 1.562 (1.562) data 0.001 (0.002) loss 1.3975 (1.2266) acc 65.6250 (69.7400) lr 1.9980e-03 eta 20:33:38 +epoch [3/50] batch [630/1000] time 1.553 (1.562) data 0.000 (0.002) loss 1.0586 (1.2271) acc 71.8750 (69.7272) lr 1.9980e-03 eta 20:33:30 +epoch [3/50] batch [635/1000] time 1.552 (1.562) data 0.000 (0.002) loss 1.3789 (1.2267) acc 62.5000 (69.7195) lr 1.9980e-03 eta 20:33:18 +epoch [3/50] batch [640/1000] time 1.568 (1.562) data 0.000 (0.002) loss 0.8892 (1.2277) acc 78.1250 (69.6680) lr 1.9980e-03 eta 20:33:05 +epoch [3/50] batch [645/1000] time 1.567 (1.562) data 0.000 (0.002) loss 1.1670 (1.2273) acc 68.7500 (69.6512) lr 1.9980e-03 eta 20:32:57 +epoch [3/50] batch [650/1000] time 1.553 (1.562) data 0.001 (0.002) loss 1.1670 (1.2267) acc 62.5000 (69.6587) lr 1.9980e-03 eta 20:32:49 +epoch [3/50] batch [655/1000] time 1.565 (1.562) data 0.000 (0.002) loss 0.7646 (1.2279) acc 71.8750 (69.6422) lr 1.9980e-03 eta 20:32:45 +epoch [3/50] batch [660/1000] time 1.566 (1.562) data 0.000 (0.002) loss 1.0430 (1.2274) acc 81.2500 (69.6496) lr 1.9980e-03 eta 20:32:35 +epoch [3/50] batch [665/1000] time 1.712 (1.562) data 0.000 (0.002) loss 1.8486 (1.2289) acc 50.0000 (69.6147) lr 1.9980e-03 eta 20:32:37 +epoch [3/50] batch [670/1000] time 1.539 (1.562) data 0.000 (0.002) loss 1.3262 (1.2298) acc 71.8750 (69.6035) lr 1.9980e-03 eta 20:32:27 +epoch [3/50] batch [675/1000] time 1.555 (1.562) data 0.000 (0.002) loss 1.2451 (1.2287) acc 71.8750 (69.6296) lr 1.9980e-03 eta 20:32:18 +epoch [3/50] batch [680/1000] time 1.580 (1.562) data 0.000 (0.002) loss 1.5439 (1.2282) acc 65.6250 (69.6461) lr 1.9980e-03 eta 20:32:07 +epoch [3/50] batch [685/1000] time 1.549 (1.562) data 0.000 (0.002) loss 1.6982 (1.2285) acc 50.0000 (69.6533) lr 1.9980e-03 eta 20:31:58 +epoch [3/50] batch [690/1000] time 1.545 (1.562) data 0.000 (0.002) loss 1.4932 (1.2268) acc 56.2500 (69.6784) lr 1.9980e-03 eta 20:31:48 +epoch [3/50] batch [695/1000] time 1.541 (1.562) data 0.000 (0.002) loss 1.1064 (1.2250) acc 75.0000 (69.7347) lr 1.9980e-03 eta 20:31:36 +epoch [3/50] batch [700/1000] time 1.562 (1.562) data 0.000 (0.002) loss 1.3115 (1.2251) acc 75.0000 (69.7455) lr 1.9980e-03 eta 20:31:26 +epoch [3/50] batch [705/1000] time 1.557 (1.562) data 0.001 (0.002) loss 1.1670 (1.2254) acc 75.0000 (69.7429) lr 1.9980e-03 eta 20:31:13 +epoch [3/50] batch [710/1000] time 1.738 (1.562) data 0.000 (0.002) loss 1.7256 (1.2280) acc 68.7500 (69.6875) lr 1.9980e-03 eta 20:31:16 +epoch [3/50] batch [715/1000] time 1.557 (1.562) data 0.001 (0.002) loss 0.7476 (1.2273) acc 81.2500 (69.7247) lr 1.9980e-03 eta 20:31:07 +epoch [3/50] batch [720/1000] time 1.544 (1.562) data 0.001 (0.002) loss 1.6035 (1.2280) acc 62.5000 (69.7049) lr 1.9980e-03 eta 20:30:58 +epoch [3/50] batch [725/1000] time 1.568 (1.562) data 0.000 (0.002) loss 1.1426 (1.2291) acc 71.8750 (69.7069) lr 1.9980e-03 eta 20:30:50 +epoch [3/50] batch [730/1000] time 1.569 (1.562) data 0.000 (0.002) loss 1.5127 (1.2310) acc 65.6250 (69.6875) lr 1.9980e-03 eta 20:30:44 +epoch [3/50] batch [735/1000] time 1.572 (1.562) data 0.001 (0.002) loss 0.7930 (1.2299) acc 81.2500 (69.7194) lr 1.9980e-03 eta 20:30:40 +epoch [3/50] batch [740/1000] time 1.585 (1.562) data 0.000 (0.002) loss 1.5439 (1.2291) acc 53.1250 (69.6959) lr 1.9980e-03 eta 20:30:35 +epoch [3/50] batch [745/1000] time 1.541 (1.562) data 0.000 (0.002) loss 1.1406 (1.2281) acc 71.8750 (69.7273) lr 1.9980e-03 eta 20:30:26 +epoch [3/50] batch [750/1000] time 1.559 (1.562) data 0.000 (0.002) loss 1.3232 (1.2281) acc 65.6250 (69.7375) lr 1.9980e-03 eta 20:30:21 +epoch [3/50] batch [755/1000] time 1.564 (1.563) data 0.000 (0.002) loss 1.1455 (1.2255) acc 65.6250 (69.7848) lr 1.9980e-03 eta 20:30:21 +epoch [3/50] batch [760/1000] time 1.556 (1.562) data 0.001 (0.002) loss 0.4861 (1.2245) acc 84.3750 (69.8191) lr 1.9980e-03 eta 20:30:12 +epoch [3/50] batch [765/1000] time 1.547 (1.562) data 0.000 (0.002) loss 1.7646 (1.2242) acc 59.3750 (69.8243) lr 1.9980e-03 eta 20:30:01 +epoch [3/50] batch [770/1000] time 1.575 (1.562) data 0.001 (0.002) loss 1.0605 (1.2247) acc 68.7500 (69.7971) lr 1.9980e-03 eta 20:29:52 +epoch [3/50] batch [775/1000] time 1.534 (1.562) data 0.000 (0.002) loss 1.5537 (1.2254) acc 59.3750 (69.7581) lr 1.9980e-03 eta 20:29:41 +epoch [3/50] batch [780/1000] time 1.524 (1.562) data 0.000 (0.002) loss 1.3105 (1.2265) acc 65.6250 (69.7436) lr 1.9980e-03 eta 20:29:28 +epoch [3/50] batch [785/1000] time 1.573 (1.562) data 0.000 (0.002) loss 0.9727 (1.2271) acc 71.8750 (69.7412) lr 1.9980e-03 eta 20:29:16 +epoch [3/50] batch [790/1000] time 1.571 (1.562) data 0.001 (0.002) loss 2.1387 (1.2265) acc 53.1250 (69.7429) lr 1.9980e-03 eta 20:29:07 +epoch [3/50] batch [795/1000] time 1.531 (1.562) data 0.001 (0.002) loss 0.8057 (1.2245) acc 78.1250 (69.7720) lr 1.9980e-03 eta 20:28:58 +epoch [3/50] batch [800/1000] time 1.556 (1.562) data 0.001 (0.002) loss 1.3457 (1.2233) acc 62.5000 (69.7773) lr 1.9980e-03 eta 20:28:49 +epoch [3/50] batch [805/1000] time 1.550 (1.562) data 0.000 (0.002) loss 2.1641 (1.2246) acc 56.2500 (69.7671) lr 1.9980e-03 eta 20:28:39 +epoch [3/50] batch [810/1000] time 1.560 (1.562) data 0.000 (0.002) loss 1.3311 (1.2255) acc 65.6250 (69.7338) lr 1.9980e-03 eta 20:28:31 +epoch [3/50] batch [815/1000] time 1.590 (1.562) data 0.001 (0.002) loss 1.0654 (1.2262) acc 65.6250 (69.7201) lr 1.9980e-03 eta 20:28:24 +epoch [3/50] batch [820/1000] time 1.549 (1.562) data 0.000 (0.002) loss 1.0029 (1.2264) acc 78.1250 (69.6875) lr 1.9980e-03 eta 20:28:27 +epoch [3/50] batch [825/1000] time 1.574 (1.562) data 0.000 (0.002) loss 1.3809 (1.2255) acc 71.8750 (69.6932) lr 1.9980e-03 eta 20:28:19 +epoch [3/50] batch [830/1000] time 1.552 (1.562) data 0.000 (0.002) loss 1.2441 (1.2260) acc 56.2500 (69.6687) lr 1.9980e-03 eta 20:28:11 +epoch [3/50] batch [835/1000] time 1.566 (1.562) data 0.000 (0.002) loss 1.2314 (1.2272) acc 65.6250 (69.6257) lr 1.9980e-03 eta 20:28:04 +epoch [3/50] batch [840/1000] time 1.558 (1.562) data 0.001 (0.002) loss 0.9380 (1.2274) acc 75.0000 (69.6280) lr 1.9980e-03 eta 20:27:53 +epoch [3/50] batch [845/1000] time 1.557 (1.562) data 0.000 (0.002) loss 1.1201 (1.2302) acc 65.6250 (69.6006) lr 1.9980e-03 eta 20:27:43 +epoch [3/50] batch [850/1000] time 1.563 (1.562) data 0.000 (0.002) loss 1.2256 (1.2306) acc 71.8750 (69.6029) lr 1.9980e-03 eta 20:27:34 +epoch [3/50] batch [855/1000] time 1.564 (1.562) data 0.001 (0.002) loss 0.7588 (1.2297) acc 78.1250 (69.6235) lr 1.9980e-03 eta 20:27:26 +epoch [3/50] batch [860/1000] time 1.531 (1.562) data 0.000 (0.002) loss 1.3418 (1.2300) acc 71.8750 (69.6148) lr 1.9980e-03 eta 20:27:17 +epoch [3/50] batch [865/1000] time 1.586 (1.562) data 0.000 (0.002) loss 1.0068 (1.2302) acc 71.8750 (69.5990) lr 1.9980e-03 eta 20:27:19 +epoch [3/50] batch [870/1000] time 1.583 (1.562) data 0.001 (0.002) loss 1.3174 (1.2304) acc 68.7500 (69.6049) lr 1.9980e-03 eta 20:27:15 +epoch [3/50] batch [875/1000] time 1.578 (1.562) data 0.000 (0.002) loss 1.2217 (1.2293) acc 75.0000 (69.6286) lr 1.9980e-03 eta 20:27:10 +epoch [3/50] batch [880/1000] time 1.560 (1.562) data 0.000 (0.002) loss 1.0596 (1.2289) acc 75.0000 (69.6662) lr 1.9980e-03 eta 20:27:04 +epoch [3/50] batch [885/1000] time 1.560 (1.563) data 0.000 (0.001) loss 0.7783 (1.2277) acc 81.2500 (69.6857) lr 1.9980e-03 eta 20:26:57 +epoch [3/50] batch [890/1000] time 1.562 (1.563) data 0.000 (0.001) loss 0.8130 (1.2260) acc 71.8750 (69.7015) lr 1.9980e-03 eta 20:26:49 +epoch [3/50] batch [895/1000] time 1.549 (1.563) data 0.001 (0.001) loss 1.4287 (1.2270) acc 75.0000 (69.7067) lr 1.9980e-03 eta 20:26:42 +epoch [3/50] batch [900/1000] time 1.555 (1.563) data 0.000 (0.001) loss 1.6562 (1.2264) acc 59.3750 (69.7292) lr 1.9980e-03 eta 20:26:33 +epoch [3/50] batch [905/1000] time 1.547 (1.563) data 0.001 (0.001) loss 2.0762 (1.2272) acc 62.5000 (69.7134) lr 1.9980e-03 eta 20:26:31 +epoch [3/50] batch [910/1000] time 1.553 (1.563) data 0.001 (0.001) loss 1.4189 (1.2289) acc 68.7500 (69.6978) lr 1.9980e-03 eta 20:26:22 +epoch [3/50] batch [915/1000] time 1.567 (1.563) data 0.001 (0.001) loss 0.8867 (1.2288) acc 78.1250 (69.6995) lr 1.9980e-03 eta 20:26:11 +epoch [3/50] batch [920/1000] time 1.602 (1.563) data 0.000 (0.001) loss 0.5156 (1.2276) acc 90.6250 (69.7317) lr 1.9980e-03 eta 20:26:02 +epoch [3/50] batch [925/1000] time 1.572 (1.562) data 0.001 (0.001) loss 1.5391 (1.2282) acc 59.3750 (69.7162) lr 1.9980e-03 eta 20:25:52 +epoch [3/50] batch [930/1000] time 1.556 (1.562) data 0.000 (0.001) loss 1.6650 (1.2274) acc 68.7500 (69.7413) lr 1.9980e-03 eta 20:25:44 +epoch [3/50] batch [935/1000] time 1.547 (1.562) data 0.001 (0.001) loss 1.5879 (1.2283) acc 59.3750 (69.7360) lr 1.9980e-03 eta 20:25:36 +epoch [3/50] batch [940/1000] time 1.542 (1.562) data 0.001 (0.001) loss 0.6431 (1.2275) acc 90.6250 (69.7507) lr 1.9980e-03 eta 20:25:29 +epoch [3/50] batch [945/1000] time 1.547 (1.562) data 0.000 (0.001) loss 1.6084 (1.2272) acc 53.1250 (69.7421) lr 1.9980e-03 eta 20:25:17 +epoch [3/50] batch [950/1000] time 1.565 (1.562) data 0.000 (0.001) loss 1.1270 (1.2275) acc 65.6250 (69.7237) lr 1.9980e-03 eta 20:25:08 +epoch [3/50] batch [955/1000] time 1.567 (1.562) data 0.001 (0.001) loss 1.0547 (1.2276) acc 68.7500 (69.7153) lr 1.9980e-03 eta 20:25:01 +epoch [3/50] batch [960/1000] time 1.578 (1.562) data 0.001 (0.001) loss 0.8994 (1.2273) acc 71.8750 (69.6973) lr 1.9980e-03 eta 20:24:53 +epoch [3/50] batch [965/1000] time 1.558 (1.562) data 0.000 (0.001) loss 0.6440 (1.2273) acc 81.2500 (69.7085) lr 1.9980e-03 eta 20:24:43 +epoch [3/50] batch [970/1000] time 1.560 (1.562) data 0.000 (0.001) loss 1.3125 (1.2278) acc 56.2500 (69.6843) lr 1.9980e-03 eta 20:24:41 +epoch [3/50] batch [975/1000] time 1.560 (1.562) data 0.000 (0.001) loss 0.8984 (1.2276) acc 75.0000 (69.6795) lr 1.9980e-03 eta 20:24:33 +epoch [3/50] batch [980/1000] time 1.549 (1.562) data 0.000 (0.001) loss 1.0098 (1.2275) acc 65.6250 (69.6588) lr 1.9980e-03 eta 20:24:24 +epoch [3/50] batch [985/1000] time 1.554 (1.562) data 0.001 (0.001) loss 1.4463 (1.2287) acc 62.5000 (69.6288) lr 1.9980e-03 eta 20:24:15 +epoch [3/50] batch [990/1000] time 1.567 (1.562) data 0.000 (0.001) loss 0.8389 (1.2266) acc 81.2500 (69.6843) lr 1.9980e-03 eta 20:24:06 +epoch [3/50] batch [995/1000] time 1.555 (1.562) data 0.000 (0.001) loss 1.5352 (1.2269) acc 71.8750 (69.6796) lr 1.9980e-03 eta 20:23:56 +epoch [3/50] batch [1000/1000] time 1.556 (1.562) data 0.000 (0.001) loss 0.6152 (1.2268) acc 84.3750 (69.6813) lr 1.9921e-03 eta 20:23:46 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,792 +* accuracy: 77.6% +* error: 22.4% +* macro_f1: 77.0% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [4/50] batch [5/1000] time 1.560 (1.673) data 0.000 (0.170) loss 1.1875 (1.2334) acc 81.2500 (70.0000) lr 1.9921e-03 eta 21:50:31 +epoch [4/50] batch [10/1000] time 1.554 (1.616) data 0.000 (0.085) loss 1.2275 (1.1227) acc 78.1250 (73.4375) lr 1.9921e-03 eta 21:05:13 +epoch [4/50] batch [15/1000] time 1.554 (1.602) data 0.000 (0.057) loss 0.9927 (1.1324) acc 71.8750 (72.9167) lr 1.9921e-03 eta 20:54:18 +epoch [4/50] batch [20/1000] time 1.551 (1.589) data 0.001 (0.043) loss 1.4629 (1.1391) acc 68.7500 (73.1250) lr 1.9921e-03 eta 20:44:26 +epoch [4/50] batch [25/1000] time 1.545 (1.582) data 0.000 (0.034) loss 1.6152 (1.1850) acc 53.1250 (71.7500) lr 1.9921e-03 eta 20:38:56 +epoch [4/50] batch [30/1000] time 1.555 (1.578) data 0.001 (0.029) loss 1.1260 (1.1680) acc 71.8750 (71.6667) lr 1.9921e-03 eta 20:35:28 +epoch [4/50] batch [35/1000] time 1.588 (1.577) data 0.001 (0.025) loss 0.6743 (1.1545) acc 78.1250 (71.2500) lr 1.9921e-03 eta 20:34:16 +epoch [4/50] batch [40/1000] time 1.590 (1.575) data 0.001 (0.022) loss 1.4893 (1.1863) acc 56.2500 (70.6250) lr 1.9921e-03 eta 20:32:58 +epoch [4/50] batch [45/1000] time 1.542 (1.575) data 0.001 (0.019) loss 0.8921 (1.1742) acc 81.2500 (71.0417) lr 1.9921e-03 eta 20:32:30 +epoch [4/50] batch [50/1000] time 1.547 (1.573) data 0.000 (0.018) loss 1.3066 (1.1821) acc 75.0000 (71.0625) lr 1.9921e-03 eta 20:30:53 +epoch [4/50] batch [55/1000] time 1.560 (1.573) data 0.001 (0.016) loss 0.9629 (1.1800) acc 71.8750 (70.8523) lr 1.9921e-03 eta 20:30:23 +epoch [4/50] batch [60/1000] time 1.575 (1.572) data 0.001 (0.015) loss 0.8882 (1.1842) acc 78.1250 (70.6771) lr 1.9921e-03 eta 20:29:32 +epoch [4/50] batch [65/1000] time 1.551 (1.570) data 0.000 (0.014) loss 0.9751 (1.1960) acc 81.2500 (70.6731) lr 1.9921e-03 eta 20:28:12 +epoch [4/50] batch [70/1000] time 1.566 (1.569) data 0.001 (0.013) loss 1.0869 (1.2021) acc 75.0000 (70.5804) lr 1.9921e-03 eta 20:27:10 +epoch [4/50] batch [75/1000] time 1.551 (1.568) data 0.001 (0.012) loss 0.8628 (1.2080) acc 75.0000 (70.3333) lr 1.9921e-03 eta 20:26:25 +epoch [4/50] batch [80/1000] time 1.556 (1.568) data 0.001 (0.011) loss 1.3408 (1.2230) acc 62.5000 (69.9609) lr 1.9921e-03 eta 20:26:09 +epoch [4/50] batch [85/1000] time 1.564 (1.567) data 0.001 (0.011) loss 0.8154 (1.2225) acc 81.2500 (69.9632) lr 1.9921e-03 eta 20:25:36 +epoch [4/50] batch [90/1000] time 1.563 (1.567) data 0.001 (0.010) loss 1.1318 (1.2109) acc 81.2500 (70.2083) lr 1.9921e-03 eta 20:25:16 +epoch [4/50] batch [95/1000] time 1.557 (1.567) data 0.001 (0.009) loss 1.3408 (1.2138) acc 71.8750 (70.0658) lr 1.9921e-03 eta 20:24:45 +epoch [4/50] batch [100/1000] time 1.547 (1.566) data 0.001 (0.009) loss 1.3281 (1.2144) acc 68.7500 (70.0000) lr 1.9921e-03 eta 20:24:16 +epoch [4/50] batch [105/1000] time 1.561 (1.567) data 0.000 (0.009) loss 1.6582 (1.2097) acc 68.7500 (70.1488) lr 1.9921e-03 eta 20:24:41 +epoch [4/50] batch [110/1000] time 1.540 (1.566) data 0.000 (0.008) loss 0.8794 (1.2057) acc 78.1250 (70.2841) lr 1.9921e-03 eta 20:23:47 +epoch [4/50] batch [115/1000] time 1.568 (1.566) data 0.000 (0.008) loss 1.4365 (1.2129) acc 56.2500 (69.8641) lr 1.9921e-03 eta 20:23:46 +epoch [4/50] batch [120/1000] time 1.551 (1.566) data 0.000 (0.008) loss 1.3613 (1.2177) acc 71.8750 (69.6615) lr 1.9921e-03 eta 20:23:21 +epoch [4/50] batch [125/1000] time 1.551 (1.566) data 0.000 (0.007) loss 1.6865 (1.2244) acc 50.0000 (69.4250) lr 1.9921e-03 eta 20:23:15 +epoch [4/50] batch [130/1000] time 1.538 (1.565) data 0.000 (0.007) loss 1.3418 (1.2261) acc 78.1250 (69.6154) lr 1.9921e-03 eta 20:22:52 +epoch [4/50] batch [135/1000] time 1.542 (1.565) data 0.000 (0.007) loss 1.3643 (1.2285) acc 75.0000 (69.5370) lr 1.9921e-03 eta 20:22:47 +epoch [4/50] batch [140/1000] time 1.577 (1.565) data 0.000 (0.007) loss 1.2129 (1.2319) acc 65.6250 (69.3750) lr 1.9921e-03 eta 20:22:32 +epoch [4/50] batch [145/1000] time 1.553 (1.565) data 0.000 (0.006) loss 0.9980 (1.2345) acc 71.8750 (69.3750) lr 1.9921e-03 eta 20:22:13 +epoch [4/50] batch [150/1000] time 1.559 (1.566) data 0.000 (0.006) loss 1.1152 (1.2338) acc 62.5000 (69.2292) lr 1.9921e-03 eta 20:22:26 +epoch [4/50] batch [155/1000] time 1.555 (1.565) data 0.001 (0.006) loss 1.6016 (1.2347) acc 59.3750 (69.1532) lr 1.9921e-03 eta 20:22:13 +epoch [4/50] batch [160/1000] time 1.559 (1.565) data 0.000 (0.006) loss 0.8975 (1.2296) acc 68.7500 (69.2188) lr 1.9921e-03 eta 20:21:47 +epoch [4/50] batch [165/1000] time 1.559 (1.565) data 0.000 (0.006) loss 1.3535 (1.2349) acc 71.8750 (69.2045) lr 1.9921e-03 eta 20:21:39 +epoch [4/50] batch [170/1000] time 1.580 (1.565) data 0.000 (0.005) loss 1.3564 (1.2340) acc 71.8750 (69.2463) lr 1.9921e-03 eta 20:21:44 +epoch [4/50] batch [175/1000] time 1.561 (1.565) data 0.000 (0.005) loss 1.4453 (1.2321) acc 65.6250 (69.2679) lr 1.9921e-03 eta 20:21:36 +epoch [4/50] batch [180/1000] time 1.590 (1.566) data 0.000 (0.005) loss 2.0391 (1.2365) acc 53.1250 (69.1667) lr 1.9921e-03 eta 20:21:36 +epoch [4/50] batch [185/1000] time 1.564 (1.565) data 0.000 (0.005) loss 1.2266 (1.2368) acc 62.5000 (68.9696) lr 1.9921e-03 eta 20:21:25 +epoch [4/50] batch [190/1000] time 1.581 (1.566) data 0.001 (0.005) loss 1.5166 (1.2443) acc 71.8750 (68.8816) lr 1.9921e-03 eta 20:21:53 +epoch [4/50] batch [195/1000] time 1.547 (1.566) data 0.000 (0.005) loss 1.1377 (1.2533) acc 65.6250 (68.8141) lr 1.9921e-03 eta 20:21:31 +epoch [4/50] batch [200/1000] time 1.561 (1.566) data 0.001 (0.005) loss 1.3223 (1.2569) acc 65.6250 (68.6875) lr 1.9921e-03 eta 20:21:07 +epoch [4/50] batch [205/1000] time 1.570 (1.565) data 0.000 (0.005) loss 0.9341 (1.2554) acc 68.7500 (68.6890) lr 1.9921e-03 eta 20:20:45 +epoch [4/50] batch [210/1000] time 1.571 (1.565) data 0.000 (0.005) loss 1.0234 (1.2526) acc 68.7500 (68.7202) lr 1.9921e-03 eta 20:20:33 +epoch [4/50] batch [215/1000] time 1.537 (1.565) data 0.001 (0.004) loss 0.5547 (1.2464) acc 78.1250 (68.7645) lr 1.9921e-03 eta 20:20:03 +epoch [4/50] batch [220/1000] time 1.565 (1.565) data 0.000 (0.004) loss 0.8506 (1.2473) acc 78.1250 (68.7216) lr 1.9921e-03 eta 20:19:49 +epoch [4/50] batch [225/1000] time 1.573 (1.565) data 0.001 (0.004) loss 1.4785 (1.2464) acc 65.6250 (68.6944) lr 1.9921e-03 eta 20:19:42 +epoch [4/50] batch [230/1000] time 1.573 (1.564) data 0.000 (0.004) loss 1.4141 (1.2460) acc 50.0000 (68.6821) lr 1.9921e-03 eta 20:19:21 +epoch [4/50] batch [235/1000] time 1.575 (1.564) data 0.001 (0.004) loss 1.1631 (1.2435) acc 65.6250 (68.7367) lr 1.9921e-03 eta 20:19:11 +epoch [4/50] batch [240/1000] time 1.549 (1.564) data 0.000 (0.004) loss 0.9551 (1.2413) acc 75.0000 (68.8021) lr 1.9921e-03 eta 20:18:51 +epoch [4/50] batch [245/1000] time 1.558 (1.564) data 0.001 (0.004) loss 0.9399 (1.2441) acc 78.1250 (68.7755) lr 1.9921e-03 eta 20:18:36 +epoch [4/50] batch [250/1000] time 1.552 (1.563) data 0.000 (0.004) loss 1.0352 (1.2443) acc 65.6250 (68.8125) lr 1.9921e-03 eta 20:18:13 +epoch [4/50] batch [255/1000] time 1.555 (1.564) data 0.001 (0.004) loss 1.8105 (1.2431) acc 62.5000 (68.7990) lr 1.9921e-03 eta 20:18:32 +epoch [4/50] batch [260/1000] time 1.549 (1.564) data 0.001 (0.004) loss 1.9961 (1.2432) acc 56.2500 (68.8101) lr 1.9921e-03 eta 20:18:16 +epoch [4/50] batch [265/1000] time 1.546 (1.564) data 0.001 (0.004) loss 0.7881 (1.2430) acc 71.8750 (68.8325) lr 1.9921e-03 eta 20:18:01 +epoch [4/50] batch [270/1000] time 1.581 (1.564) data 0.000 (0.004) loss 1.4424 (1.2420) acc 65.6250 (68.8426) lr 1.9921e-03 eta 20:17:57 +epoch [4/50] batch [275/1000] time 1.574 (1.564) data 0.000 (0.004) loss 1.2188 (1.2389) acc 68.7500 (68.9091) lr 1.9921e-03 eta 20:17:46 +epoch [4/50] batch [280/1000] time 1.576 (1.564) data 0.000 (0.004) loss 1.9951 (1.2426) acc 50.0000 (68.8951) lr 1.9921e-03 eta 20:17:29 +epoch [4/50] batch [285/1000] time 1.600 (1.564) data 0.001 (0.003) loss 0.9814 (1.2407) acc 75.0000 (68.9254) lr 1.9921e-03 eta 20:17:24 +epoch [4/50] batch [290/1000] time 1.578 (1.563) data 0.000 (0.003) loss 1.1094 (1.2388) acc 62.5000 (68.9332) lr 1.9921e-03 eta 20:17:08 +epoch [4/50] batch [295/1000] time 1.551 (1.563) data 0.000 (0.003) loss 1.5557 (1.2391) acc 65.6250 (68.9513) lr 1.9921e-03 eta 20:16:53 +epoch [4/50] batch [300/1000] time 1.538 (1.564) data 0.000 (0.003) loss 1.3145 (1.2396) acc 65.6250 (68.9688) lr 1.9921e-03 eta 20:17:01 +epoch [4/50] batch [305/1000] time 1.569 (1.564) data 0.000 (0.003) loss 1.7676 (1.2402) acc 53.1250 (68.9447) lr 1.9921e-03 eta 20:17:00 +epoch [4/50] batch [310/1000] time 1.541 (1.564) data 0.000 (0.003) loss 1.4854 (1.2399) acc 68.7500 (68.9617) lr 1.9921e-03 eta 20:16:50 +epoch [4/50] batch [315/1000] time 1.571 (1.564) data 0.001 (0.003) loss 1.6592 (1.2419) acc 53.1250 (68.9087) lr 1.9921e-03 eta 20:16:51 +epoch [4/50] batch [320/1000] time 1.555 (1.564) data 0.000 (0.003) loss 2.1855 (1.2431) acc 62.5000 (68.9453) lr 1.9921e-03 eta 20:16:46 +epoch [4/50] batch [325/1000] time 1.561 (1.564) data 0.000 (0.003) loss 1.2031 (1.2365) acc 68.7500 (69.0865) lr 1.9921e-03 eta 20:16:38 +epoch [4/50] batch [330/1000] time 1.570 (1.564) data 0.000 (0.003) loss 1.0371 (1.2345) acc 75.0000 (69.1667) lr 1.9921e-03 eta 20:16:28 +epoch [4/50] batch [335/1000] time 1.540 (1.564) data 0.001 (0.003) loss 1.7129 (1.2400) acc 65.6250 (69.1045) lr 1.9921e-03 eta 20:16:16 +epoch [4/50] batch [340/1000] time 1.701 (1.564) data 0.000 (0.003) loss 0.9009 (1.2357) acc 78.1250 (69.2188) lr 1.9921e-03 eta 20:16:22 +epoch [4/50] batch [345/1000] time 1.574 (1.564) data 0.001 (0.003) loss 1.2480 (1.2333) acc 68.7500 (69.2301) lr 1.9921e-03 eta 20:16:08 +epoch [4/50] batch [350/1000] time 1.559 (1.564) data 0.001 (0.003) loss 1.3438 (1.2329) acc 65.6250 (69.2946) lr 1.9921e-03 eta 20:16:00 +epoch [4/50] batch [355/1000] time 1.557 (1.564) data 0.001 (0.003) loss 0.6743 (1.2278) acc 75.0000 (69.3662) lr 1.9921e-03 eta 20:15:50 +epoch [4/50] batch [360/1000] time 1.559 (1.564) data 0.000 (0.003) loss 1.3828 (1.2265) acc 68.7500 (69.4531) lr 1.9921e-03 eta 20:15:42 +epoch [4/50] batch [365/1000] time 1.567 (1.564) data 0.000 (0.003) loss 0.9463 (1.2253) acc 78.1250 (69.4692) lr 1.9921e-03 eta 20:15:42 +epoch [4/50] batch [370/1000] time 1.564 (1.564) data 0.001 (0.003) loss 1.0127 (1.2245) acc 68.7500 (69.4764) lr 1.9921e-03 eta 20:15:36 +epoch [4/50] batch [375/1000] time 1.559 (1.564) data 0.000 (0.003) loss 1.6650 (1.2260) acc 62.5000 (69.4417) lr 1.9921e-03 eta 20:15:25 +epoch [4/50] batch [380/1000] time 1.537 (1.564) data 0.000 (0.003) loss 1.8301 (1.2259) acc 59.3750 (69.4737) lr 1.9921e-03 eta 20:15:10 +epoch [4/50] batch [385/1000] time 1.575 (1.564) data 0.000 (0.003) loss 0.9619 (1.2245) acc 78.1250 (69.5049) lr 1.9921e-03 eta 20:15:01 +epoch [4/50] batch [390/1000] time 1.565 (1.564) data 0.000 (0.003) loss 1.4902 (1.2283) acc 65.6250 (69.4551) lr 1.9921e-03 eta 20:14:49 +epoch [4/50] batch [395/1000] time 1.575 (1.564) data 0.000 (0.003) loss 0.7676 (1.2297) acc 78.1250 (69.4541) lr 1.9921e-03 eta 20:14:42 +epoch [4/50] batch [400/1000] time 1.555 (1.564) data 0.000 (0.003) loss 1.2646 (1.2270) acc 71.8750 (69.4609) lr 1.9921e-03 eta 20:14:31 +epoch [4/50] batch [405/1000] time 1.555 (1.564) data 0.000 (0.003) loss 0.7559 (1.2269) acc 81.2500 (69.4599) lr 1.9921e-03 eta 20:14:39 +epoch [4/50] batch [410/1000] time 1.573 (1.564) data 0.000 (0.003) loss 0.8394 (1.2256) acc 75.0000 (69.4817) lr 1.9921e-03 eta 20:14:34 +epoch [4/50] batch [415/1000] time 1.558 (1.564) data 0.000 (0.003) loss 1.1025 (1.2236) acc 65.6250 (69.5181) lr 1.9921e-03 eta 20:14:24 +epoch [4/50] batch [420/1000] time 1.547 (1.564) data 0.000 (0.002) loss 1.3311 (1.2259) acc 62.5000 (69.4345) lr 1.9921e-03 eta 20:14:18 +epoch [4/50] batch [425/1000] time 1.558 (1.564) data 0.000 (0.002) loss 0.8159 (1.2275) acc 71.8750 (69.4191) lr 1.9921e-03 eta 20:14:06 +epoch [4/50] batch [430/1000] time 1.573 (1.564) data 0.000 (0.002) loss 1.4072 (1.2257) acc 75.0000 (69.4985) lr 1.9921e-03 eta 20:13:58 +epoch [4/50] batch [435/1000] time 1.561 (1.564) data 0.000 (0.002) loss 2.1836 (1.2283) acc 50.0000 (69.4756) lr 1.9921e-03 eta 20:13:49 +epoch [4/50] batch [440/1000] time 1.551 (1.564) data 0.000 (0.002) loss 0.6895 (1.2256) acc 84.3750 (69.5241) lr 1.9921e-03 eta 20:13:32 +epoch [4/50] batch [445/1000] time 1.550 (1.564) data 0.001 (0.002) loss 1.1592 (1.2250) acc 78.1250 (69.5295) lr 1.9921e-03 eta 20:13:23 +epoch [4/50] batch [450/1000] time 1.567 (1.564) data 0.000 (0.002) loss 1.1377 (1.2208) acc 75.0000 (69.6181) lr 1.9921e-03 eta 20:13:25 +epoch [4/50] batch [455/1000] time 1.552 (1.564) data 0.001 (0.002) loss 1.6592 (1.2220) acc 56.2500 (69.5742) lr 1.9921e-03 eta 20:13:13 +epoch [4/50] batch [460/1000] time 1.546 (1.564) data 0.001 (0.002) loss 1.7559 (1.2222) acc 56.2500 (69.5788) lr 1.9921e-03 eta 20:12:58 +epoch [4/50] batch [465/1000] time 1.567 (1.564) data 0.001 (0.002) loss 1.0771 (1.2226) acc 65.6250 (69.5228) lr 1.9921e-03 eta 20:12:46 +epoch [4/50] batch [470/1000] time 1.572 (1.564) data 0.000 (0.002) loss 1.0283 (1.2203) acc 56.2500 (69.5080) lr 1.9921e-03 eta 20:12:43 +epoch [4/50] batch [475/1000] time 1.561 (1.564) data 0.000 (0.002) loss 1.3350 (1.2207) acc 65.6250 (69.4803) lr 1.9921e-03 eta 20:12:32 +epoch [4/50] batch [480/1000] time 1.536 (1.564) data 0.001 (0.002) loss 1.0693 (1.2227) acc 81.2500 (69.4857) lr 1.9921e-03 eta 20:12:21 +epoch [4/50] batch [485/1000] time 1.570 (1.564) data 0.000 (0.002) loss 0.8813 (1.2214) acc 81.2500 (69.5103) lr 1.9921e-03 eta 20:12:15 +epoch [4/50] batch [490/1000] time 1.530 (1.564) data 0.000 (0.002) loss 1.4775 (1.2218) acc 65.6250 (69.5089) lr 1.9921e-03 eta 20:12:04 +epoch [4/50] batch [495/1000] time 1.564 (1.564) data 0.000 (0.002) loss 1.0840 (1.2223) acc 68.7500 (69.4634) lr 1.9921e-03 eta 20:12:07 +epoch [4/50] batch [500/1000] time 1.571 (1.564) data 0.000 (0.002) loss 1.3398 (1.2222) acc 53.1250 (69.4437) lr 1.9921e-03 eta 20:11:58 +epoch [4/50] batch [505/1000] time 1.553 (1.564) data 0.000 (0.002) loss 1.3242 (1.2238) acc 56.2500 (69.4059) lr 1.9921e-03 eta 20:11:47 +epoch [4/50] batch [510/1000] time 1.560 (1.564) data 0.000 (0.002) loss 0.7949 (1.2233) acc 71.8750 (69.4179) lr 1.9921e-03 eta 20:11:32 +epoch [4/50] batch [515/1000] time 1.558 (1.564) data 0.000 (0.002) loss 1.5996 (1.2235) acc 68.7500 (69.4478) lr 1.9921e-03 eta 20:11:25 +epoch [4/50] batch [520/1000] time 1.548 (1.563) data 0.000 (0.002) loss 1.0957 (1.2228) acc 75.0000 (69.4952) lr 1.9921e-03 eta 20:11:10 +epoch [4/50] batch [525/1000] time 1.573 (1.563) data 0.000 (0.002) loss 0.4546 (1.2191) acc 81.2500 (69.5655) lr 1.9921e-03 eta 20:10:59 +epoch [4/50] batch [530/1000] time 1.564 (1.563) data 0.000 (0.002) loss 1.9814 (1.2199) acc 46.8750 (69.5342) lr 1.9921e-03 eta 20:10:52 +epoch [4/50] batch [535/1000] time 1.574 (1.563) data 0.000 (0.002) loss 0.8062 (1.2175) acc 68.7500 (69.5561) lr 1.9921e-03 eta 20:10:43 +epoch [4/50] batch [540/1000] time 1.555 (1.563) data 0.000 (0.002) loss 1.1865 (1.2191) acc 75.0000 (69.5428) lr 1.9921e-03 eta 20:10:33 +epoch [4/50] batch [545/1000] time 1.556 (1.563) data 0.000 (0.002) loss 1.1719 (1.2193) acc 81.2500 (69.5757) lr 1.9921e-03 eta 20:10:29 +epoch [4/50] batch [550/1000] time 1.566 (1.563) data 0.000 (0.002) loss 1.9053 (1.2186) acc 68.7500 (69.6136) lr 1.9921e-03 eta 20:10:16 +epoch [4/50] batch [555/1000] time 1.562 (1.564) data 0.000 (0.002) loss 0.7822 (1.2173) acc 81.2500 (69.6453) lr 1.9921e-03 eta 20:10:19 +epoch [4/50] batch [560/1000] time 1.571 (1.564) data 0.000 (0.002) loss 1.0566 (1.2173) acc 68.7500 (69.6373) lr 1.9921e-03 eta 20:10:11 +epoch [4/50] batch [565/1000] time 1.561 (1.564) data 0.001 (0.002) loss 0.9302 (1.2168) acc 78.1250 (69.6294) lr 1.9921e-03 eta 20:10:03 +epoch [4/50] batch [570/1000] time 1.541 (1.564) data 0.001 (0.002) loss 1.0410 (1.2152) acc 78.1250 (69.6711) lr 1.9921e-03 eta 20:09:54 +epoch [4/50] batch [575/1000] time 1.577 (1.563) data 0.001 (0.002) loss 1.2979 (1.2159) acc 75.0000 (69.6848) lr 1.9921e-03 eta 20:09:43 +epoch [4/50] batch [580/1000] time 1.561 (1.563) data 0.000 (0.002) loss 1.4932 (1.2167) acc 59.3750 (69.6228) lr 1.9921e-03 eta 20:09:34 +epoch [4/50] batch [585/1000] time 1.555 (1.563) data 0.000 (0.002) loss 1.2178 (1.2174) acc 75.0000 (69.6581) lr 1.9921e-03 eta 20:09:23 +epoch [4/50] batch [590/1000] time 1.554 (1.563) data 0.000 (0.002) loss 0.8691 (1.2180) acc 78.1250 (69.6928) lr 1.9921e-03 eta 20:09:10 +epoch [4/50] batch [595/1000] time 1.555 (1.563) data 0.001 (0.002) loss 1.3467 (1.2211) acc 71.8750 (69.6271) lr 1.9921e-03 eta 20:08:55 +epoch [4/50] batch [600/1000] time 1.563 (1.563) data 0.000 (0.002) loss 1.1270 (1.2208) acc 71.8750 (69.6354) lr 1.9921e-03 eta 20:08:56 +epoch [4/50] batch [605/1000] time 1.560 (1.563) data 0.000 (0.002) loss 1.0625 (1.2196) acc 59.3750 (69.6488) lr 1.9921e-03 eta 20:08:46 +epoch [4/50] batch [610/1000] time 1.569 (1.563) data 0.001 (0.002) loss 1.6348 (1.2203) acc 71.8750 (69.6107) lr 1.9921e-03 eta 20:08:39 +epoch [4/50] batch [615/1000] time 1.581 (1.563) data 0.000 (0.002) loss 0.8550 (1.2193) acc 78.1250 (69.6189) lr 1.9921e-03 eta 20:08:34 +epoch [4/50] batch [620/1000] time 1.554 (1.563) data 0.000 (0.002) loss 1.1631 (1.2167) acc 71.8750 (69.6724) lr 1.9921e-03 eta 20:08:26 +epoch [4/50] batch [625/1000] time 1.556 (1.563) data 0.000 (0.002) loss 1.1816 (1.2176) acc 65.6250 (69.6700) lr 1.9921e-03 eta 20:08:13 +epoch [4/50] batch [630/1000] time 1.558 (1.563) data 0.001 (0.002) loss 1.6123 (1.2203) acc 68.7500 (69.6478) lr 1.9921e-03 eta 20:08:03 +epoch [4/50] batch [635/1000] time 1.553 (1.563) data 0.001 (0.002) loss 0.8906 (1.2218) acc 68.7500 (69.5915) lr 1.9921e-03 eta 20:07:52 +epoch [4/50] batch [640/1000] time 1.534 (1.563) data 0.000 (0.002) loss 1.1758 (1.2222) acc 71.8750 (69.5703) lr 1.9921e-03 eta 20:07:37 +epoch [4/50] batch [645/1000] time 1.539 (1.563) data 0.001 (0.002) loss 1.5820 (1.2227) acc 59.3750 (69.5397) lr 1.9921e-03 eta 20:07:38 +epoch [4/50] batch [650/1000] time 1.563 (1.563) data 0.001 (0.002) loss 1.4580 (1.2235) acc 65.6250 (69.5288) lr 1.9921e-03 eta 20:07:25 +epoch [4/50] batch [655/1000] time 1.548 (1.563) data 0.001 (0.002) loss 1.1602 (1.2222) acc 71.8750 (69.5802) lr 1.9921e-03 eta 20:07:13 +epoch [4/50] batch [660/1000] time 1.572 (1.563) data 0.001 (0.002) loss 1.0518 (1.2213) acc 84.3750 (69.6117) lr 1.9921e-03 eta 20:07:03 +epoch [4/50] batch [665/1000] time 1.549 (1.563) data 0.000 (0.002) loss 2.0742 (1.2228) acc 50.0000 (69.6053) lr 1.9921e-03 eta 20:06:51 +epoch [4/50] batch [670/1000] time 1.574 (1.563) data 0.000 (0.002) loss 0.9512 (1.2210) acc 75.0000 (69.6362) lr 1.9921e-03 eta 20:06:40 +epoch [4/50] batch [675/1000] time 1.574 (1.563) data 0.000 (0.002) loss 2.0176 (1.2218) acc 59.3750 (69.6343) lr 1.9921e-03 eta 20:06:32 +epoch [4/50] batch [680/1000] time 1.568 (1.563) data 0.000 (0.002) loss 0.9976 (1.2210) acc 68.7500 (69.6140) lr 1.9921e-03 eta 20:06:26 +epoch [4/50] batch [685/1000] time 1.555 (1.563) data 0.000 (0.002) loss 1.2578 (1.2213) acc 75.0000 (69.6168) lr 1.9921e-03 eta 20:06:19 +epoch [4/50] batch [690/1000] time 1.568 (1.563) data 0.001 (0.002) loss 1.0742 (1.2202) acc 75.0000 (69.6422) lr 1.9921e-03 eta 20:06:07 +epoch [4/50] batch [695/1000] time 1.551 (1.563) data 0.000 (0.002) loss 1.5469 (1.2194) acc 62.5000 (69.6673) lr 1.9921e-03 eta 20:05:59 +epoch [4/50] batch [700/1000] time 1.562 (1.563) data 0.000 (0.002) loss 1.0498 (1.2176) acc 81.2500 (69.7054) lr 1.9921e-03 eta 20:05:50 +epoch [4/50] batch [705/1000] time 1.702 (1.563) data 0.001 (0.002) loss 0.7285 (1.2157) acc 93.7500 (69.7518) lr 1.9921e-03 eta 20:05:49 +epoch [4/50] batch [710/1000] time 1.526 (1.563) data 0.000 (0.002) loss 1.2363 (1.2150) acc 78.1250 (69.7843) lr 1.9921e-03 eta 20:05:36 +epoch [4/50] batch [715/1000] time 1.580 (1.563) data 0.000 (0.002) loss 1.3164 (1.2140) acc 68.7500 (69.8033) lr 1.9921e-03 eta 20:05:29 +epoch [4/50] batch [720/1000] time 1.568 (1.563) data 0.000 (0.002) loss 0.8442 (1.2130) acc 75.0000 (69.8003) lr 1.9921e-03 eta 20:05:22 +epoch [4/50] batch [725/1000] time 1.560 (1.563) data 0.001 (0.002) loss 1.4277 (1.2133) acc 68.7500 (69.8017) lr 1.9921e-03 eta 20:05:14 +epoch [4/50] batch [730/1000] time 1.550 (1.563) data 0.000 (0.002) loss 0.7676 (1.2131) acc 78.1250 (69.8288) lr 1.9921e-03 eta 20:05:04 +epoch [4/50] batch [735/1000] time 1.544 (1.563) data 0.001 (0.002) loss 0.9214 (1.2137) acc 81.2500 (69.8299) lr 1.9921e-03 eta 20:04:51 +epoch [4/50] batch [740/1000] time 1.551 (1.563) data 0.001 (0.002) loss 1.6211 (1.2123) acc 65.6250 (69.8564) lr 1.9921e-03 eta 20:04:45 +epoch [4/50] batch [745/1000] time 1.541 (1.562) data 0.000 (0.002) loss 1.4023 (1.2133) acc 62.5000 (69.8532) lr 1.9921e-03 eta 20:04:31 +epoch [4/50] batch [750/1000] time 1.709 (1.563) data 0.001 (0.002) loss 2.4238 (1.2161) acc 62.5000 (69.8292) lr 1.9921e-03 eta 20:04:31 +epoch [4/50] batch [755/1000] time 1.554 (1.563) data 0.000 (0.002) loss 1.0195 (1.2162) acc 71.8750 (69.8593) lr 1.9921e-03 eta 20:04:19 +epoch [4/50] batch [760/1000] time 1.562 (1.563) data 0.000 (0.002) loss 0.9961 (1.2145) acc 68.7500 (69.9137) lr 1.9921e-03 eta 20:04:13 +epoch [4/50] batch [765/1000] time 1.553 (1.563) data 0.001 (0.002) loss 0.8110 (1.2133) acc 75.0000 (69.9306) lr 1.9921e-03 eta 20:04:04 +epoch [4/50] batch [770/1000] time 1.539 (1.563) data 0.000 (0.002) loss 0.5562 (1.2126) acc 84.3750 (69.9513) lr 1.9921e-03 eta 20:03:58 +epoch [4/50] batch [775/1000] time 1.552 (1.563) data 0.001 (0.002) loss 1.2412 (1.2139) acc 65.6250 (69.9476) lr 1.9921e-03 eta 20:03:47 +epoch [4/50] batch [780/1000] time 1.595 (1.563) data 0.000 (0.002) loss 0.7144 (1.2126) acc 81.2500 (69.9639) lr 1.9921e-03 eta 20:03:43 +epoch [4/50] batch [785/1000] time 1.582 (1.563) data 0.001 (0.002) loss 0.7021 (1.2115) acc 81.2500 (69.9841) lr 1.9921e-03 eta 20:03:36 +epoch [4/50] batch [790/1000] time 1.557 (1.563) data 0.001 (0.002) loss 1.6152 (1.2119) acc 65.6250 (70.0040) lr 1.9921e-03 eta 20:03:28 +epoch [4/50] batch [795/1000] time 1.548 (1.563) data 0.000 (0.002) loss 1.0791 (1.2125) acc 59.3750 (70.0000) lr 1.9921e-03 eta 20:03:24 +epoch [4/50] batch [800/1000] time 1.542 (1.563) data 0.000 (0.002) loss 1.5557 (1.2138) acc 65.6250 (69.9805) lr 1.9921e-03 eta 20:03:12 +epoch [4/50] batch [805/1000] time 1.564 (1.563) data 0.000 (0.002) loss 0.9116 (1.2133) acc 81.2500 (69.9806) lr 1.9921e-03 eta 20:03:00 +epoch [4/50] batch [810/1000] time 1.538 (1.562) data 0.000 (0.002) loss 0.7856 (1.2129) acc 78.1250 (70.0039) lr 1.9921e-03 eta 20:02:47 +epoch [4/50] batch [815/1000] time 1.527 (1.562) data 0.000 (0.002) loss 2.0430 (1.2151) acc 53.1250 (69.9578) lr 1.9921e-03 eta 20:02:36 +epoch [4/50] batch [820/1000] time 1.583 (1.562) data 0.000 (0.002) loss 1.7979 (1.2160) acc 59.3750 (69.9314) lr 1.9921e-03 eta 20:02:26 +epoch [4/50] batch [825/1000] time 1.562 (1.562) data 0.001 (0.002) loss 1.0576 (1.2156) acc 68.7500 (69.9242) lr 1.9921e-03 eta 20:02:17 +epoch [4/50] batch [830/1000] time 1.532 (1.562) data 0.001 (0.002) loss 1.4316 (1.2163) acc 68.7500 (69.8983) lr 1.9921e-03 eta 20:02:08 +epoch [4/50] batch [835/1000] time 1.571 (1.562) data 0.000 (0.001) loss 1.2852 (1.2162) acc 71.8750 (69.9027) lr 1.9921e-03 eta 20:02:02 +epoch [4/50] batch [840/1000] time 1.566 (1.562) data 0.000 (0.001) loss 1.5332 (1.2165) acc 62.5000 (69.8958) lr 1.9921e-03 eta 20:01:53 +epoch [4/50] batch [845/1000] time 1.583 (1.562) data 0.000 (0.001) loss 1.1846 (1.2171) acc 62.5000 (69.8706) lr 1.9921e-03 eta 20:01:45 +epoch [4/50] batch [850/1000] time 1.550 (1.562) data 0.001 (0.001) loss 1.4746 (1.2170) acc 62.5000 (69.8640) lr 1.9921e-03 eta 20:01:35 +epoch [4/50] batch [855/1000] time 1.569 (1.562) data 0.000 (0.001) loss 0.9414 (1.2165) acc 78.1250 (69.8830) lr 1.9921e-03 eta 20:01:28 +epoch [4/50] batch [860/1000] time 1.567 (1.562) data 0.001 (0.001) loss 1.5918 (1.2166) acc 62.5000 (69.8765) lr 1.9921e-03 eta 20:01:29 +epoch [4/50] batch [865/1000] time 1.586 (1.562) data 0.000 (0.001) loss 0.9258 (1.2159) acc 81.2500 (69.9277) lr 1.9921e-03 eta 20:01:20 +epoch [4/50] batch [870/1000] time 1.551 (1.562) data 0.001 (0.001) loss 0.8770 (1.2145) acc 68.7500 (69.9425) lr 1.9921e-03 eta 20:01:08 +epoch [4/50] batch [875/1000] time 1.591 (1.562) data 0.000 (0.001) loss 1.2061 (1.2139) acc 71.8750 (69.9679) lr 1.9921e-03 eta 20:01:00 +epoch [4/50] batch [880/1000] time 1.591 (1.562) data 0.000 (0.001) loss 0.6748 (1.2132) acc 81.2500 (69.9751) lr 1.9921e-03 eta 20:00:53 +epoch [4/50] batch [885/1000] time 1.569 (1.562) data 0.001 (0.001) loss 1.7041 (1.2145) acc 62.5000 (69.9541) lr 1.9921e-03 eta 20:00:48 +epoch [4/50] batch [890/1000] time 1.544 (1.562) data 0.000 (0.001) loss 1.5244 (1.2154) acc 62.5000 (69.9438) lr 1.9921e-03 eta 20:00:37 +epoch [4/50] batch [895/1000] time 1.563 (1.562) data 0.000 (0.001) loss 1.1562 (1.2162) acc 65.6250 (69.9127) lr 1.9921e-03 eta 20:00:30 +epoch [4/50] batch [900/1000] time 1.575 (1.562) data 0.001 (0.001) loss 1.2158 (1.2167) acc 68.7500 (69.9132) lr 1.9921e-03 eta 20:00:22 +epoch [4/50] batch [905/1000] time 1.560 (1.562) data 0.000 (0.001) loss 1.3545 (1.2164) acc 71.8750 (69.9378) lr 1.9921e-03 eta 20:00:22 +epoch [4/50] batch [910/1000] time 1.591 (1.562) data 0.000 (0.001) loss 1.5303 (1.2175) acc 59.3750 (69.8970) lr 1.9921e-03 eta 20:00:14 +epoch [4/50] batch [915/1000] time 1.603 (1.562) data 0.000 (0.001) loss 0.7158 (1.2174) acc 78.1250 (69.9010) lr 1.9921e-03 eta 20:00:07 +epoch [4/50] batch [920/1000] time 1.548 (1.562) data 0.000 (0.001) loss 1.4971 (1.2168) acc 53.1250 (69.9219) lr 1.9921e-03 eta 19:59:59 +epoch [4/50] batch [925/1000] time 1.559 (1.563) data 0.000 (0.001) loss 1.5615 (1.2161) acc 56.2500 (69.9020) lr 1.9921e-03 eta 19:59:54 +epoch [4/50] batch [930/1000] time 1.569 (1.563) data 0.000 (0.001) loss 0.7949 (1.2159) acc 75.0000 (69.8992) lr 1.9921e-03 eta 19:59:47 +epoch [4/50] batch [935/1000] time 1.571 (1.563) data 0.000 (0.001) loss 1.5186 (1.2166) acc 62.5000 (69.9131) lr 1.9921e-03 eta 19:59:41 +epoch [4/50] batch [940/1000] time 1.572 (1.563) data 0.000 (0.001) loss 1.5430 (1.2164) acc 75.0000 (69.9368) lr 1.9921e-03 eta 19:59:33 +epoch [4/50] batch [945/1000] time 1.588 (1.563) data 0.000 (0.001) loss 1.3838 (1.2161) acc 62.5000 (69.9405) lr 1.9921e-03 eta 19:59:35 +epoch [4/50] batch [950/1000] time 1.595 (1.563) data 0.000 (0.001) loss 1.3711 (1.2165) acc 65.6250 (69.9408) lr 1.9921e-03 eta 19:59:31 +epoch [4/50] batch [955/1000] time 1.545 (1.563) data 0.000 (0.001) loss 1.1582 (1.2168) acc 65.6250 (69.9215) lr 1.9921e-03 eta 19:59:23 +epoch [4/50] batch [960/1000] time 1.572 (1.563) data 0.001 (0.001) loss 1.0674 (1.2158) acc 71.8750 (69.9382) lr 1.9921e-03 eta 19:59:14 +epoch [4/50] batch [965/1000] time 1.564 (1.563) data 0.001 (0.001) loss 0.9771 (1.2161) acc 78.1250 (69.9417) lr 1.9921e-03 eta 19:59:04 +epoch [4/50] batch [970/1000] time 1.564 (1.563) data 0.000 (0.001) loss 1.3711 (1.2154) acc 62.5000 (69.9646) lr 1.9921e-03 eta 19:58:54 +epoch [4/50] batch [975/1000] time 1.556 (1.563) data 0.000 (0.001) loss 1.2363 (1.2167) acc 75.0000 (69.9615) lr 1.9921e-03 eta 19:58:45 +epoch [4/50] batch [980/1000] time 1.547 (1.563) data 0.000 (0.001) loss 0.6323 (1.2160) acc 78.1250 (69.9809) lr 1.9921e-03 eta 19:58:35 +epoch [4/50] batch [985/1000] time 1.549 (1.563) data 0.001 (0.001) loss 0.9854 (1.2161) acc 81.2500 (69.9810) lr 1.9921e-03 eta 19:58:25 +epoch [4/50] batch [990/1000] time 1.547 (1.563) data 0.000 (0.001) loss 1.1338 (1.2164) acc 65.6250 (69.9716) lr 1.9921e-03 eta 19:58:13 +epoch [4/50] batch [995/1000] time 1.553 (1.563) data 0.001 (0.001) loss 0.9897 (1.2158) acc 71.8750 (69.9937) lr 1.9921e-03 eta 19:58:04 +epoch [4/50] batch [1000/1000] time 1.589 (1.563) data 0.000 (0.001) loss 1.2998 (1.2156) acc 68.7500 (69.9906) lr 1.9823e-03 eta 19:57:59 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,946 +* accuracy: 77.9% +* error: 22.1% +* macro_f1: 77.3% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [5/50] batch [5/1000] time 1.557 (1.699) data 0.000 (0.194) loss 0.6040 (0.9898) acc 81.2500 (70.6250) lr 1.9823e-03 eta 21:42:03 +epoch [5/50] batch [10/1000] time 1.554 (1.624) data 0.000 (0.097) loss 1.4121 (1.2194) acc 53.1250 (66.5625) lr 1.9823e-03 eta 20:45:07 +epoch [5/50] batch [15/1000] time 1.570 (1.605) data 0.001 (0.065) loss 1.2227 (1.1979) acc 71.8750 (67.2917) lr 1.9823e-03 eta 20:29:53 +epoch [5/50] batch [20/1000] time 1.565 (1.595) data 0.001 (0.049) loss 1.2559 (1.1629) acc 71.8750 (68.5938) lr 1.9823e-03 eta 20:22:16 +epoch [5/50] batch [25/1000] time 1.572 (1.600) data 0.000 (0.039) loss 1.0498 (1.1458) acc 75.0000 (69.1250) lr 1.9823e-03 eta 20:25:37 +epoch [5/50] batch [30/1000] time 1.563 (1.593) data 0.001 (0.033) loss 1.5205 (1.1559) acc 56.2500 (68.9583) lr 1.9823e-03 eta 20:20:41 +epoch [5/50] batch [35/1000] time 1.548 (1.588) data 0.001 (0.028) loss 1.0312 (1.1335) acc 75.0000 (70.0000) lr 1.9823e-03 eta 20:16:52 +epoch [5/50] batch [40/1000] time 1.549 (1.585) data 0.001 (0.025) loss 1.1719 (1.1381) acc 75.0000 (70.6250) lr 1.9823e-03 eta 20:13:48 +epoch [5/50] batch [45/1000] time 1.545 (1.581) data 0.001 (0.022) loss 1.3418 (1.1384) acc 68.7500 (71.0417) lr 1.9823e-03 eta 20:10:54 +epoch [5/50] batch [50/1000] time 1.532 (1.579) data 0.000 (0.020) loss 1.4023 (1.1307) acc 56.2500 (71.3750) lr 1.9823e-03 eta 20:09:11 +epoch [5/50] batch [55/1000] time 1.562 (1.578) data 0.001 (0.018) loss 1.1113 (1.1151) acc 81.2500 (71.9886) lr 1.9823e-03 eta 20:08:01 +epoch [5/50] batch [60/1000] time 1.558 (1.577) data 0.002 (0.017) loss 0.9883 (1.1247) acc 68.7500 (71.4062) lr 1.9823e-03 eta 20:07:04 +epoch [5/50] batch [65/1000] time 1.559 (1.575) data 0.001 (0.016) loss 1.3604 (1.1240) acc 65.6250 (71.2019) lr 1.9823e-03 eta 20:05:41 +epoch [5/50] batch [70/1000] time 1.545 (1.574) data 0.000 (0.015) loss 0.7715 (1.1191) acc 68.7500 (71.3393) lr 1.9823e-03 eta 20:04:32 +epoch [5/50] batch [75/1000] time 1.570 (1.573) data 0.001 (0.014) loss 1.5752 (1.1377) acc 59.3750 (71.0833) lr 1.9823e-03 eta 20:03:55 +epoch [5/50] batch [80/1000] time 1.543 (1.572) data 0.000 (0.013) loss 1.2354 (1.1532) acc 71.8750 (70.7422) lr 1.9823e-03 eta 20:02:58 +epoch [5/50] batch [85/1000] time 1.555 (1.571) data 0.001 (0.012) loss 1.3164 (1.1635) acc 68.7500 (70.5515) lr 1.9823e-03 eta 20:02:29 +epoch [5/50] batch [90/1000] time 1.549 (1.571) data 0.001 (0.011) loss 0.9980 (1.1645) acc 71.8750 (70.4167) lr 1.9823e-03 eta 20:01:57 +epoch [5/50] batch [95/1000] time 1.561 (1.570) data 0.001 (0.011) loss 1.4609 (1.1764) acc 62.5000 (70.2961) lr 1.9823e-03 eta 20:01:19 +epoch [5/50] batch [100/1000] time 1.563 (1.569) data 0.001 (0.010) loss 0.9033 (1.1730) acc 81.2500 (70.6250) lr 1.9823e-03 eta 20:00:37 +epoch [5/50] batch [105/1000] time 1.534 (1.569) data 0.000 (0.010) loss 0.7417 (1.1635) acc 81.2500 (70.9524) lr 1.9823e-03 eta 20:00:06 +epoch [5/50] batch [110/1000] time 1.574 (1.568) data 0.000 (0.009) loss 1.4814 (1.1853) acc 68.7500 (70.5398) lr 1.9823e-03 eta 19:59:37 +epoch [5/50] batch [115/1000] time 1.586 (1.568) data 0.001 (0.009) loss 0.8862 (1.1770) acc 75.0000 (70.6522) lr 1.9823e-03 eta 19:58:49 +epoch [5/50] batch [120/1000] time 1.550 (1.567) data 0.001 (0.009) loss 0.9678 (1.1824) acc 81.2500 (70.5208) lr 1.9823e-03 eta 19:58:04 +epoch [5/50] batch [125/1000] time 1.705 (1.568) data 0.001 (0.008) loss 1.2979 (1.1837) acc 56.2500 (70.4750) lr 1.9823e-03 eta 19:58:30 +epoch [5/50] batch [130/1000] time 1.567 (1.567) data 0.000 (0.008) loss 1.0918 (1.1811) acc 68.7500 (70.5288) lr 1.9823e-03 eta 19:58:13 +epoch [5/50] batch [135/1000] time 1.558 (1.567) data 0.000 (0.008) loss 0.8901 (1.1755) acc 68.7500 (70.6250) lr 1.9823e-03 eta 19:58:00 +epoch [5/50] batch [140/1000] time 1.575 (1.567) data 0.001 (0.008) loss 1.4443 (1.1723) acc 65.6250 (70.5357) lr 1.9823e-03 eta 19:57:50 +epoch [5/50] batch [145/1000] time 1.565 (1.567) data 0.000 (0.007) loss 0.9268 (1.1757) acc 84.3750 (70.4095) lr 1.9823e-03 eta 19:57:32 +epoch [5/50] batch [150/1000] time 1.565 (1.567) data 0.001 (0.007) loss 1.5293 (1.1827) acc 62.5000 (70.3333) lr 1.9823e-03 eta 19:57:10 +epoch [5/50] batch [155/1000] time 1.544 (1.566) data 0.000 (0.007) loss 1.2861 (1.1910) acc 71.8750 (70.2016) lr 1.9823e-03 eta 19:56:41 +epoch [5/50] batch [160/1000] time 1.598 (1.567) data 0.001 (0.007) loss 1.2959 (1.1918) acc 75.0000 (70.3125) lr 1.9823e-03 eta 19:57:00 +epoch [5/50] batch [165/1000] time 1.568 (1.566) data 0.001 (0.006) loss 1.2119 (1.1924) acc 78.1250 (70.3220) lr 1.9823e-03 eta 19:56:39 +epoch [5/50] batch [170/1000] time 1.703 (1.567) data 0.000 (0.006) loss 1.0020 (1.1956) acc 78.1250 (70.3125) lr 1.9823e-03 eta 19:57:09 +epoch [5/50] batch [175/1000] time 1.552 (1.567) data 0.001 (0.006) loss 0.9478 (1.1901) acc 78.1250 (70.3750) lr 1.9823e-03 eta 19:56:48 +epoch [5/50] batch [180/1000] time 1.549 (1.567) data 0.000 (0.006) loss 1.2627 (1.1885) acc 75.0000 (70.4514) lr 1.9823e-03 eta 19:56:20 +epoch [5/50] batch [185/1000] time 1.540 (1.566) data 0.000 (0.006) loss 1.6172 (1.1965) acc 62.5000 (70.3716) lr 1.9823e-03 eta 19:55:59 +epoch [5/50] batch [190/1000] time 1.560 (1.566) data 0.000 (0.006) loss 1.3584 (1.2054) acc 71.8750 (70.1974) lr 1.9823e-03 eta 19:55:26 +epoch [5/50] batch [195/1000] time 1.553 (1.566) data 0.001 (0.006) loss 1.3877 (1.1982) acc 65.6250 (70.3686) lr 1.9823e-03 eta 19:55:19 +epoch [5/50] batch [200/1000] time 1.540 (1.566) data 0.000 (0.005) loss 1.8018 (1.2037) acc 53.1250 (70.2500) lr 1.9823e-03 eta 19:55:09 +epoch [5/50] batch [205/1000] time 1.557 (1.566) data 0.000 (0.005) loss 1.0928 (1.2090) acc 62.5000 (70.0915) lr 1.9823e-03 eta 19:55:01 +epoch [5/50] batch [210/1000] time 1.600 (1.566) data 0.001 (0.005) loss 1.2266 (1.2056) acc 71.8750 (70.2679) lr 1.9823e-03 eta 19:54:53 +epoch [5/50] batch [215/1000] time 1.537 (1.566) data 0.000 (0.005) loss 1.3896 (1.2084) acc 68.7500 (70.1453) lr 1.9823e-03 eta 19:54:49 +epoch [5/50] batch [220/1000] time 1.561 (1.566) data 0.001 (0.005) loss 1.6221 (1.2018) acc 59.3750 (70.2557) lr 1.9823e-03 eta 19:54:30 +epoch [5/50] batch [225/1000] time 1.550 (1.565) data 0.000 (0.005) loss 1.0664 (1.1991) acc 75.0000 (70.2361) lr 1.9823e-03 eta 19:54:15 +epoch [5/50] batch [230/1000] time 1.555 (1.565) data 0.000 (0.005) loss 1.2178 (1.2023) acc 68.7500 (70.1087) lr 1.9823e-03 eta 19:54:01 +epoch [5/50] batch [235/1000] time 1.573 (1.565) data 0.001 (0.005) loss 1.6094 (1.2004) acc 59.3750 (70.1463) lr 1.9823e-03 eta 19:53:52 +epoch [5/50] batch [240/1000] time 1.570 (1.565) data 0.001 (0.005) loss 1.4121 (1.2005) acc 68.7500 (70.1432) lr 1.9823e-03 eta 19:53:40 +epoch [5/50] batch [245/1000] time 1.564 (1.565) data 0.001 (0.005) loss 0.7290 (1.1980) acc 84.3750 (70.1658) lr 1.9823e-03 eta 19:53:33 +epoch [5/50] batch [250/1000] time 1.562 (1.565) data 0.000 (0.004) loss 1.0801 (1.1939) acc 65.6250 (70.2125) lr 1.9823e-03 eta 19:53:20 +epoch [5/50] batch [255/1000] time 1.573 (1.565) data 0.001 (0.004) loss 1.1846 (1.1956) acc 59.3750 (70.1348) lr 1.9823e-03 eta 19:53:11 +epoch [5/50] batch [260/1000] time 1.544 (1.565) data 0.001 (0.004) loss 1.1758 (1.2021) acc 75.0000 (70.0361) lr 1.9823e-03 eta 19:53:02 +epoch [5/50] batch [265/1000] time 1.569 (1.565) data 0.001 (0.004) loss 1.0996 (1.1985) acc 75.0000 (70.1297) lr 1.9823e-03 eta 19:52:52 +epoch [5/50] batch [270/1000] time 1.571 (1.565) data 0.000 (0.004) loss 1.2646 (1.1992) acc 75.0000 (70.1852) lr 1.9823e-03 eta 19:52:38 +epoch [5/50] batch [275/1000] time 1.559 (1.565) data 0.000 (0.004) loss 1.1855 (1.2000) acc 68.7500 (70.1477) lr 1.9823e-03 eta 19:52:22 +epoch [5/50] batch [280/1000] time 1.559 (1.565) data 0.000 (0.004) loss 1.1689 (1.1986) acc 75.0000 (70.2344) lr 1.9823e-03 eta 19:52:35 +epoch [5/50] batch [285/1000] time 1.537 (1.565) data 0.001 (0.004) loss 1.3584 (1.1996) acc 56.2500 (70.1425) lr 1.9823e-03 eta 19:52:19 +epoch [5/50] batch [290/1000] time 1.561 (1.565) data 0.001 (0.004) loss 1.3975 (1.2010) acc 62.5000 (70.1185) lr 1.9823e-03 eta 19:52:03 +epoch [5/50] batch [295/1000] time 1.586 (1.565) data 0.000 (0.004) loss 0.8701 (1.2009) acc 78.1250 (70.1165) lr 1.9823e-03 eta 19:52:01 +epoch [5/50] batch [300/1000] time 1.570 (1.565) data 0.001 (0.004) loss 0.7012 (1.1986) acc 75.0000 (70.1562) lr 1.9823e-03 eta 19:51:46 +epoch [5/50] batch [305/1000] time 1.553 (1.565) data 0.000 (0.004) loss 1.2061 (1.2018) acc 59.3750 (70.0922) lr 1.9823e-03 eta 19:51:31 +epoch [5/50] batch [310/1000] time 1.562 (1.565) data 0.000 (0.004) loss 1.0791 (1.2006) acc 68.7500 (70.1210) lr 1.9823e-03 eta 19:51:24 +epoch [5/50] batch [315/1000] time 1.554 (1.565) data 0.001 (0.004) loss 1.3311 (1.2003) acc 62.5000 (70.1389) lr 1.9823e-03 eta 19:51:15 +epoch [5/50] batch [320/1000] time 1.572 (1.565) data 0.000 (0.004) loss 1.2725 (1.2018) acc 75.0000 (70.1074) lr 1.9823e-03 eta 19:51:09 +epoch [5/50] batch [325/1000] time 1.553 (1.565) data 0.000 (0.004) loss 1.7070 (1.2006) acc 53.1250 (70.0865) lr 1.9823e-03 eta 19:51:16 +epoch [5/50] batch [330/1000] time 1.559 (1.565) data 0.001 (0.003) loss 0.7671 (1.1983) acc 71.8750 (70.1705) lr 1.9823e-03 eta 19:51:04 +epoch [5/50] batch [335/1000] time 1.552 (1.565) data 0.000 (0.003) loss 1.1494 (1.1983) acc 75.0000 (70.1866) lr 1.9823e-03 eta 19:50:49 +epoch [5/50] batch [340/1000] time 1.567 (1.565) data 0.000 (0.003) loss 1.3877 (1.2015) acc 68.7500 (70.1011) lr 1.9823e-03 eta 19:50:37 +epoch [5/50] batch [345/1000] time 1.566 (1.564) data 0.001 (0.003) loss 1.1299 (1.2016) acc 68.7500 (70.0362) lr 1.9823e-03 eta 19:50:26 +epoch [5/50] batch [350/1000] time 1.556 (1.565) data 0.000 (0.003) loss 1.3799 (1.2029) acc 65.6250 (69.9732) lr 1.9823e-03 eta 19:50:20 +epoch [5/50] batch [355/1000] time 1.558 (1.565) data 0.001 (0.003) loss 1.0566 (1.2045) acc 78.1250 (69.9472) lr 1.9823e-03 eta 19:50:13 +epoch [5/50] batch [360/1000] time 1.552 (1.564) data 0.001 (0.003) loss 0.9175 (1.2061) acc 78.1250 (69.9392) lr 1.9823e-03 eta 19:50:00 +epoch [5/50] batch [365/1000] time 1.553 (1.565) data 0.000 (0.003) loss 1.3574 (1.2077) acc 65.6250 (69.9401) lr 1.9823e-03 eta 19:50:09 +epoch [5/50] batch [370/1000] time 1.560 (1.565) data 0.000 (0.003) loss 1.0947 (1.2076) acc 65.6250 (69.9240) lr 1.9823e-03 eta 19:50:00 +epoch [5/50] batch [375/1000] time 1.568 (1.565) data 0.000 (0.003) loss 1.4111 (1.2089) acc 65.6250 (69.9333) lr 1.9823e-03 eta 19:49:49 +epoch [5/50] batch [380/1000] time 1.540 (1.564) data 0.000 (0.003) loss 1.0557 (1.2092) acc 75.0000 (69.9095) lr 1.9823e-03 eta 19:49:32 +epoch [5/50] batch [385/1000] time 1.545 (1.564) data 0.000 (0.003) loss 0.4702 (1.2081) acc 87.5000 (69.9351) lr 1.9823e-03 eta 19:49:15 +epoch [5/50] batch [390/1000] time 1.548 (1.564) data 0.000 (0.003) loss 1.3516 (1.2099) acc 65.6250 (69.8878) lr 1.9823e-03 eta 19:48:58 +epoch [5/50] batch [395/1000] time 1.558 (1.564) data 0.001 (0.003) loss 0.9326 (1.2102) acc 78.1250 (69.9130) lr 1.9823e-03 eta 19:48:43 +epoch [5/50] batch [400/1000] time 1.578 (1.564) data 0.001 (0.003) loss 1.3711 (1.2095) acc 68.7500 (69.9297) lr 1.9823e-03 eta 19:48:35 +epoch [5/50] batch [405/1000] time 1.568 (1.564) data 0.000 (0.003) loss 0.9922 (1.2107) acc 71.8750 (69.9614) lr 1.9823e-03 eta 19:48:24 +epoch [5/50] batch [410/1000] time 1.584 (1.564) data 0.001 (0.003) loss 1.4746 (1.2111) acc 62.5000 (69.9390) lr 1.9823e-03 eta 19:48:16 +epoch [5/50] batch [415/1000] time 1.559 (1.564) data 0.000 (0.003) loss 0.8818 (1.2086) acc 81.2500 (70.0151) lr 1.9823e-03 eta 19:48:09 +epoch [5/50] batch [420/1000] time 1.554 (1.564) data 0.000 (0.003) loss 1.1523 (1.2121) acc 68.7500 (69.9777) lr 1.9823e-03 eta 19:47:52 +epoch [5/50] batch [425/1000] time 1.571 (1.564) data 0.000 (0.003) loss 1.0625 (1.2097) acc 78.1250 (70.0662) lr 1.9823e-03 eta 19:47:39 +epoch [5/50] batch [430/1000] time 1.569 (1.564) data 0.000 (0.003) loss 1.1484 (1.2081) acc 68.7500 (70.0654) lr 1.9823e-03 eta 19:47:44 +epoch [5/50] batch [435/1000] time 1.555 (1.564) data 0.000 (0.003) loss 0.6836 (1.2056) acc 84.3750 (70.1509) lr 1.9823e-03 eta 19:47:32 +epoch [5/50] batch [440/1000] time 1.539 (1.564) data 0.000 (0.003) loss 1.2578 (1.2058) acc 68.7500 (70.1989) lr 1.9823e-03 eta 19:47:16 +epoch [5/50] batch [445/1000] time 1.562 (1.564) data 0.001 (0.003) loss 1.1758 (1.2061) acc 62.5000 (70.1826) lr 1.9823e-03 eta 19:47:08 +epoch [5/50] batch [450/1000] time 1.561 (1.564) data 0.000 (0.003) loss 2.6504 (1.2107) acc 50.0000 (70.1042) lr 1.9823e-03 eta 19:46:58 +epoch [5/50] batch [455/1000] time 1.557 (1.563) data 0.000 (0.003) loss 0.6714 (1.2077) acc 81.2500 (70.1580) lr 1.9823e-03 eta 19:46:48 +epoch [5/50] batch [460/1000] time 1.556 (1.563) data 0.001 (0.003) loss 0.8813 (1.2059) acc 81.2500 (70.2174) lr 1.9823e-03 eta 19:46:39 +epoch [5/50] batch [465/1000] time 1.564 (1.563) data 0.000 (0.003) loss 1.1338 (1.2058) acc 65.6250 (70.2419) lr 1.9823e-03 eta 19:46:28 +epoch [5/50] batch [470/1000] time 1.588 (1.563) data 0.000 (0.003) loss 1.0625 (1.2059) acc 71.8750 (70.2128) lr 1.9823e-03 eta 19:46:24 +epoch [5/50] batch [475/1000] time 1.568 (1.564) data 0.000 (0.003) loss 1.1680 (1.2049) acc 68.7500 (70.2566) lr 1.9823e-03 eta 19:46:36 +epoch [5/50] batch [480/1000] time 1.594 (1.564) data 0.000 (0.003) loss 1.0820 (1.2052) acc 71.8750 (70.2474) lr 1.9823e-03 eta 19:46:36 +epoch [5/50] batch [485/1000] time 1.564 (1.564) data 0.001 (0.003) loss 1.1572 (1.2071) acc 68.7500 (70.2320) lr 1.9823e-03 eta 19:46:31 +epoch [5/50] batch [490/1000] time 1.582 (1.564) data 0.000 (0.002) loss 0.8745 (1.2060) acc 78.1250 (70.2487) lr 1.9823e-03 eta 19:46:20 +epoch [5/50] batch [495/1000] time 1.553 (1.564) data 0.000 (0.002) loss 1.7920 (1.2079) acc 59.3750 (70.2273) lr 1.9823e-03 eta 19:46:08 +epoch [5/50] batch [500/1000] time 1.561 (1.564) data 0.000 (0.002) loss 1.5010 (1.2102) acc 59.3750 (70.1750) lr 1.9823e-03 eta 19:45:57 +epoch [5/50] batch [505/1000] time 1.576 (1.564) data 0.001 (0.002) loss 1.4229 (1.2098) acc 68.7500 (70.1918) lr 1.9823e-03 eta 19:45:46 +epoch [5/50] batch [510/1000] time 1.548 (1.564) data 0.001 (0.002) loss 1.7285 (1.2094) acc 53.1250 (70.1777) lr 1.9823e-03 eta 19:45:33 +epoch [5/50] batch [515/1000] time 1.707 (1.564) data 0.001 (0.002) loss 1.1348 (1.2094) acc 65.6250 (70.1699) lr 1.9823e-03 eta 19:45:34 +epoch [5/50] batch [520/1000] time 1.565 (1.564) data 0.000 (0.002) loss 1.4238 (1.2110) acc 65.6250 (70.1322) lr 1.9823e-03 eta 19:45:27 +epoch [5/50] batch [525/1000] time 1.565 (1.564) data 0.000 (0.002) loss 1.4785 (1.2120) acc 62.5000 (70.1190) lr 1.9823e-03 eta 19:45:16 +epoch [5/50] batch [530/1000] time 1.559 (1.564) data 0.000 (0.002) loss 1.0830 (1.2120) acc 75.0000 (70.1356) lr 1.9823e-03 eta 19:45:11 +epoch [5/50] batch [535/1000] time 1.544 (1.564) data 0.000 (0.002) loss 0.8564 (1.2105) acc 78.1250 (70.1636) lr 1.9823e-03 eta 19:45:03 +epoch [5/50] batch [540/1000] time 1.551 (1.564) data 0.001 (0.002) loss 1.0977 (1.2084) acc 75.0000 (70.2546) lr 1.9823e-03 eta 19:44:56 +epoch [5/50] batch [545/1000] time 1.577 (1.564) data 0.000 (0.002) loss 1.3311 (1.2069) acc 71.8750 (70.3211) lr 1.9823e-03 eta 19:44:44 +epoch [5/50] batch [550/1000] time 1.540 (1.564) data 0.001 (0.002) loss 2.6406 (1.2119) acc 59.3750 (70.2898) lr 1.9823e-03 eta 19:44:32 +epoch [5/50] batch [555/1000] time 1.552 (1.564) data 0.000 (0.002) loss 0.9775 (1.2122) acc 71.8750 (70.2590) lr 1.9823e-03 eta 19:44:21 +epoch [5/50] batch [560/1000] time 1.552 (1.564) data 0.000 (0.002) loss 1.2910 (1.2121) acc 75.0000 (70.2455) lr 1.9823e-03 eta 19:44:10 +epoch [5/50] batch [565/1000] time 1.561 (1.564) data 0.000 (0.002) loss 1.4482 (1.2114) acc 62.5000 (70.2378) lr 1.9823e-03 eta 19:43:58 +epoch [5/50] batch [570/1000] time 1.531 (1.563) data 0.000 (0.002) loss 1.2188 (1.2120) acc 62.5000 (70.2467) lr 1.9823e-03 eta 19:43:44 +epoch [5/50] batch [575/1000] time 1.548 (1.563) data 0.000 (0.002) loss 1.9727 (1.2122) acc 59.3750 (70.2283) lr 1.9823e-03 eta 19:43:35 +epoch [5/50] batch [580/1000] time 1.570 (1.563) data 0.000 (0.002) loss 1.3164 (1.2123) acc 62.5000 (70.1994) lr 1.9823e-03 eta 19:43:33 +epoch [5/50] batch [585/1000] time 1.556 (1.563) data 0.000 (0.002) loss 0.6748 (1.2138) acc 78.1250 (70.1923) lr 1.9823e-03 eta 19:43:22 +epoch [5/50] batch [590/1000] time 1.552 (1.563) data 0.000 (0.002) loss 1.4326 (1.2136) acc 59.3750 (70.1960) lr 1.9823e-03 eta 19:43:14 +epoch [5/50] batch [595/1000] time 1.569 (1.563) data 0.001 (0.002) loss 1.4814 (1.2134) acc 53.1250 (70.1838) lr 1.9823e-03 eta 19:43:06 +epoch [5/50] batch [600/1000] time 1.549 (1.563) data 0.000 (0.002) loss 1.6201 (1.2132) acc 65.6250 (70.1927) lr 1.9823e-03 eta 19:42:54 +epoch [5/50] batch [605/1000] time 1.574 (1.563) data 0.001 (0.002) loss 1.3535 (1.2121) acc 59.3750 (70.1550) lr 1.9823e-03 eta 19:42:46 +epoch [5/50] batch [610/1000] time 1.561 (1.563) data 0.001 (0.002) loss 0.7852 (1.2118) acc 84.3750 (70.1588) lr 1.9823e-03 eta 19:42:42 +epoch [5/50] batch [615/1000] time 1.537 (1.563) data 0.000 (0.002) loss 0.9985 (1.2101) acc 78.1250 (70.1778) lr 1.9823e-03 eta 19:42:28 +epoch [5/50] batch [620/1000] time 1.559 (1.563) data 0.001 (0.002) loss 1.1162 (1.2091) acc 65.6250 (70.1966) lr 1.9823e-03 eta 19:42:19 +epoch [5/50] batch [625/1000] time 1.559 (1.563) data 0.000 (0.002) loss 1.3994 (1.2106) acc 75.0000 (70.1900) lr 1.9823e-03 eta 19:42:20 +epoch [5/50] batch [630/1000] time 1.558 (1.563) data 0.000 (0.002) loss 1.4053 (1.2114) acc 68.7500 (70.1687) lr 1.9823e-03 eta 19:42:09 +epoch [5/50] batch [635/1000] time 1.572 (1.563) data 0.000 (0.002) loss 1.2012 (1.2114) acc 71.8750 (70.1624) lr 1.9823e-03 eta 19:42:01 +epoch [5/50] batch [640/1000] time 1.579 (1.563) data 0.000 (0.002) loss 0.9624 (1.2091) acc 75.0000 (70.2197) lr 1.9823e-03 eta 19:41:51 +epoch [5/50] batch [645/1000] time 1.556 (1.563) data 0.001 (0.002) loss 1.2139 (1.2081) acc 71.8750 (70.2374) lr 1.9823e-03 eta 19:41:39 +epoch [5/50] batch [650/1000] time 1.551 (1.563) data 0.001 (0.002) loss 1.1797 (1.2072) acc 65.6250 (70.2212) lr 1.9823e-03 eta 19:41:32 +epoch [5/50] batch [655/1000] time 1.559 (1.563) data 0.000 (0.002) loss 1.0254 (1.2072) acc 78.1250 (70.2147) lr 1.9823e-03 eta 19:41:22 +epoch [5/50] batch [660/1000] time 1.584 (1.563) data 0.000 (0.002) loss 1.4092 (1.2078) acc 65.6250 (70.2036) lr 1.9823e-03 eta 19:41:16 +epoch [5/50] batch [665/1000] time 1.536 (1.563) data 0.001 (0.002) loss 1.5674 (1.2073) acc 68.7500 (70.2115) lr 1.9823e-03 eta 19:41:06 +epoch [5/50] batch [670/1000] time 1.550 (1.563) data 0.000 (0.002) loss 1.3145 (1.2067) acc 56.2500 (70.1866) lr 1.9823e-03 eta 19:41:03 +epoch [5/50] batch [675/1000] time 1.555 (1.563) data 0.001 (0.002) loss 1.3057 (1.2084) acc 68.7500 (70.1574) lr 1.9823e-03 eta 19:40:53 +epoch [5/50] batch [680/1000] time 1.573 (1.563) data 0.001 (0.002) loss 0.6504 (1.2074) acc 68.7500 (70.1149) lr 1.9823e-03 eta 19:40:44 +epoch [5/50] batch [685/1000] time 1.563 (1.563) data 0.000 (0.002) loss 1.0020 (1.2068) acc 71.8750 (70.0776) lr 1.9823e-03 eta 19:40:36 +epoch [5/50] batch [690/1000] time 1.560 (1.563) data 0.000 (0.002) loss 0.8271 (1.2060) acc 78.1250 (70.0906) lr 1.9823e-03 eta 19:40:23 +epoch [5/50] batch [695/1000] time 1.577 (1.563) data 0.000 (0.002) loss 1.2510 (1.2064) acc 59.3750 (70.0719) lr 1.9823e-03 eta 19:40:15 +epoch [5/50] batch [700/1000] time 1.569 (1.563) data 0.001 (0.002) loss 0.8853 (1.2043) acc 71.8750 (70.0982) lr 1.9823e-03 eta 19:40:10 +epoch [5/50] batch [705/1000] time 1.558 (1.563) data 0.000 (0.002) loss 1.7197 (1.2046) acc 59.3750 (70.0798) lr 1.9823e-03 eta 19:40:05 +epoch [5/50] batch [710/1000] time 1.562 (1.563) data 0.001 (0.002) loss 0.9326 (1.2026) acc 71.8750 (70.0836) lr 1.9823e-03 eta 19:39:59 +epoch [5/50] batch [715/1000] time 1.565 (1.563) data 0.000 (0.002) loss 1.5518 (1.2042) acc 59.3750 (70.0437) lr 1.9823e-03 eta 19:39:51 +epoch [5/50] batch [720/1000] time 1.579 (1.563) data 0.000 (0.002) loss 0.9624 (1.2031) acc 75.0000 (70.0651) lr 1.9823e-03 eta 19:39:43 +epoch [5/50] batch [725/1000] time 1.544 (1.563) data 0.000 (0.002) loss 1.2939 (1.2035) acc 71.8750 (70.0431) lr 1.9823e-03 eta 19:39:35 +epoch [5/50] batch [730/1000] time 1.573 (1.563) data 0.000 (0.002) loss 0.7285 (1.2021) acc 81.2500 (70.0771) lr 1.9823e-03 eta 19:39:35 +epoch [5/50] batch [735/1000] time 1.570 (1.563) data 0.001 (0.002) loss 1.7725 (1.2021) acc 68.7500 (70.1063) lr 1.9823e-03 eta 19:39:28 +epoch [5/50] batch [740/1000] time 1.569 (1.563) data 0.001 (0.002) loss 1.4121 (1.2033) acc 59.3750 (70.0845) lr 1.9823e-03 eta 19:39:18 +epoch [5/50] batch [745/1000] time 1.542 (1.563) data 0.001 (0.002) loss 0.8574 (1.2039) acc 71.8750 (70.0713) lr 1.9823e-03 eta 19:39:06 +epoch [5/50] batch [750/1000] time 1.561 (1.563) data 0.000 (0.002) loss 1.0479 (1.2042) acc 75.0000 (70.0875) lr 1.9823e-03 eta 19:38:58 +epoch [5/50] batch [755/1000] time 1.524 (1.563) data 0.000 (0.002) loss 0.9585 (1.2050) acc 71.8750 (70.0745) lr 1.9823e-03 eta 19:38:49 +epoch [5/50] batch [760/1000] time 1.552 (1.563) data 0.001 (0.002) loss 1.1045 (1.2062) acc 75.0000 (70.0822) lr 1.9823e-03 eta 19:38:39 +epoch [5/50] batch [765/1000] time 1.579 (1.563) data 0.000 (0.002) loss 1.8232 (1.2053) acc 46.8750 (70.0735) lr 1.9823e-03 eta 19:38:34 +epoch [5/50] batch [770/1000] time 1.564 (1.563) data 0.000 (0.002) loss 2.0801 (1.2059) acc 56.2500 (70.0609) lr 1.9823e-03 eta 19:38:25 +epoch [5/50] batch [775/1000] time 1.547 (1.563) data 0.000 (0.002) loss 1.7686 (1.2061) acc 59.3750 (70.0565) lr 1.9823e-03 eta 19:38:22 +epoch [5/50] batch [780/1000] time 1.581 (1.563) data 0.001 (0.002) loss 1.0352 (1.2061) acc 75.0000 (70.0521) lr 1.9823e-03 eta 19:38:15 +epoch [5/50] batch [785/1000] time 1.571 (1.563) data 0.000 (0.002) loss 1.1240 (1.2069) acc 68.7500 (70.0398) lr 1.9823e-03 eta 19:38:07 +epoch [5/50] batch [790/1000] time 1.541 (1.563) data 0.000 (0.002) loss 0.6694 (1.2049) acc 81.2500 (70.0633) lr 1.9823e-03 eta 19:37:59 +epoch [5/50] batch [795/1000] time 1.551 (1.563) data 0.000 (0.002) loss 1.7080 (1.2057) acc 59.3750 (70.0393) lr 1.9823e-03 eta 19:37:47 +epoch [5/50] batch [800/1000] time 1.556 (1.563) data 0.001 (0.002) loss 0.7964 (1.2042) acc 78.1250 (70.0469) lr 1.9823e-03 eta 19:37:36 +epoch [5/50] batch [805/1000] time 1.560 (1.563) data 0.000 (0.002) loss 1.6211 (1.2062) acc 56.2500 (69.9884) lr 1.9823e-03 eta 19:37:27 +epoch [5/50] batch [810/1000] time 1.555 (1.563) data 0.000 (0.002) loss 1.1865 (1.2048) acc 71.8750 (70.0309) lr 1.9823e-03 eta 19:37:16 +epoch [5/50] batch [815/1000] time 1.567 (1.563) data 0.000 (0.002) loss 1.0156 (1.2043) acc 68.7500 (70.0307) lr 1.9823e-03 eta 19:37:04 +epoch [5/50] batch [820/1000] time 1.549 (1.563) data 0.000 (0.002) loss 1.1924 (1.2043) acc 75.0000 (70.0229) lr 1.9823e-03 eta 19:37:05 +epoch [5/50] batch [825/1000] time 1.556 (1.563) data 0.000 (0.002) loss 1.3760 (1.2045) acc 62.5000 (70.0038) lr 1.9823e-03 eta 19:36:55 +epoch [5/50] batch [830/1000] time 1.563 (1.563) data 0.000 (0.002) loss 1.4424 (1.2040) acc 56.2500 (70.0000) lr 1.9823e-03 eta 19:36:50 +epoch [5/50] batch [835/1000] time 1.570 (1.563) data 0.001 (0.002) loss 1.1836 (1.2040) acc 75.0000 (69.9738) lr 1.9823e-03 eta 19:36:40 +epoch [5/50] batch [840/1000] time 1.549 (1.563) data 0.000 (0.002) loss 1.4434 (1.2062) acc 62.5000 (69.9256) lr 1.9823e-03 eta 19:36:33 +epoch [5/50] batch [845/1000] time 1.538 (1.563) data 0.000 (0.002) loss 0.8804 (1.2067) acc 78.1250 (69.9001) lr 1.9823e-03 eta 19:36:24 +epoch [5/50] batch [850/1000] time 1.557 (1.563) data 0.001 (0.002) loss 1.7734 (1.2068) acc 56.2500 (69.8934) lr 1.9823e-03 eta 19:36:17 +epoch [5/50] batch [855/1000] time 1.577 (1.563) data 0.000 (0.002) loss 1.3350 (1.2067) acc 71.8750 (69.9050) lr 1.9823e-03 eta 19:36:10 +epoch [5/50] batch [860/1000] time 1.566 (1.563) data 0.000 (0.002) loss 1.0420 (1.2046) acc 71.8750 (69.9491) lr 1.9823e-03 eta 19:36:03 +epoch [5/50] batch [865/1000] time 1.544 (1.563) data 0.000 (0.002) loss 0.8911 (1.2027) acc 68.7500 (69.9711) lr 1.9823e-03 eta 19:35:54 +epoch [5/50] batch [870/1000] time 1.540 (1.563) data 0.001 (0.002) loss 1.1289 (1.2026) acc 81.2500 (69.9964) lr 1.9823e-03 eta 19:35:45 +epoch [5/50] batch [875/1000] time 1.557 (1.563) data 0.001 (0.002) loss 1.5596 (1.2034) acc 62.5000 (69.9714) lr 1.9823e-03 eta 19:35:35 +epoch [5/50] batch [880/1000] time 1.730 (1.563) data 0.000 (0.002) loss 1.0879 (1.2030) acc 68.7500 (69.9787) lr 1.9823e-03 eta 19:35:34 +epoch [5/50] batch [885/1000] time 1.572 (1.563) data 0.000 (0.002) loss 0.7153 (1.2024) acc 81.2500 (69.9894) lr 1.9823e-03 eta 19:35:24 +epoch [5/50] batch [890/1000] time 1.551 (1.563) data 0.001 (0.002) loss 1.1299 (1.2037) acc 75.0000 (69.9754) lr 1.9823e-03 eta 19:35:14 +epoch [5/50] batch [895/1000] time 1.559 (1.563) data 0.000 (0.002) loss 1.4854 (1.2036) acc 62.5000 (69.9756) lr 1.9823e-03 eta 19:35:04 +epoch [5/50] batch [900/1000] time 1.555 (1.563) data 0.000 (0.002) loss 0.6562 (1.2020) acc 84.3750 (69.9792) lr 1.9823e-03 eta 19:34:57 +epoch [5/50] batch [905/1000] time 1.562 (1.563) data 0.000 (0.002) loss 0.9214 (1.2014) acc 65.6250 (69.9793) lr 1.9823e-03 eta 19:34:46 +epoch [5/50] batch [910/1000] time 1.556 (1.563) data 0.000 (0.002) loss 0.8794 (1.2014) acc 84.3750 (69.9966) lr 1.9823e-03 eta 19:34:37 +epoch [5/50] batch [915/1000] time 1.543 (1.563) data 0.000 (0.002) loss 1.1113 (1.2013) acc 59.3750 (69.9898) lr 1.9823e-03 eta 19:34:28 +epoch [5/50] batch [920/1000] time 1.534 (1.563) data 0.000 (0.002) loss 1.5742 (1.2019) acc 56.2500 (69.9660) lr 1.9823e-03 eta 19:34:17 +epoch [5/50] batch [925/1000] time 1.720 (1.563) data 0.000 (0.002) loss 1.4385 (1.2020) acc 62.5000 (69.9493) lr 1.9823e-03 eta 19:34:14 +epoch [5/50] batch [930/1000] time 1.554 (1.563) data 0.000 (0.002) loss 1.2168 (1.2014) acc 71.8750 (69.9664) lr 1.9823e-03 eta 19:34:06 +epoch [5/50] batch [935/1000] time 1.559 (1.563) data 0.001 (0.002) loss 1.2031 (1.2020) acc 62.5000 (69.9499) lr 1.9823e-03 eta 19:34:00 +epoch [5/50] batch [940/1000] time 1.569 (1.563) data 0.000 (0.002) loss 1.6406 (1.2023) acc 59.3750 (69.9202) lr 1.9823e-03 eta 19:33:51 +epoch [5/50] batch [945/1000] time 1.576 (1.563) data 0.000 (0.002) loss 1.5986 (1.2034) acc 62.5000 (69.9107) lr 1.9823e-03 eta 19:33:42 +epoch [5/50] batch [950/1000] time 1.565 (1.563) data 0.000 (0.002) loss 0.8931 (1.2038) acc 78.1250 (69.8980) lr 1.9823e-03 eta 19:33:30 +epoch [5/50] batch [955/1000] time 1.544 (1.563) data 0.000 (0.001) loss 1.4629 (1.2032) acc 71.8750 (69.9215) lr 1.9823e-03 eta 19:33:22 +epoch [5/50] batch [960/1000] time 1.564 (1.563) data 0.000 (0.001) loss 1.1201 (1.2029) acc 71.8750 (69.9251) lr 1.9823e-03 eta 19:33:14 +epoch [5/50] batch [965/1000] time 1.560 (1.563) data 0.000 (0.001) loss 1.6035 (1.2040) acc 65.6250 (69.9126) lr 1.9823e-03 eta 19:33:08 +epoch [5/50] batch [970/1000] time 1.557 (1.563) data 0.000 (0.001) loss 1.2646 (1.2047) acc 71.8750 (69.9195) lr 1.9823e-03 eta 19:33:07 +epoch [5/50] batch [975/1000] time 1.553 (1.563) data 0.000 (0.001) loss 1.2842 (1.2044) acc 65.6250 (69.9167) lr 1.9823e-03 eta 19:32:58 +epoch [5/50] batch [980/1000] time 1.553 (1.563) data 0.000 (0.001) loss 1.2070 (1.2042) acc 68.7500 (69.9107) lr 1.9823e-03 eta 19:32:51 +epoch [5/50] batch [985/1000] time 1.558 (1.563) data 0.001 (0.001) loss 1.4707 (1.2036) acc 71.8750 (69.9365) lr 1.9823e-03 eta 19:32:43 +epoch [5/50] batch [990/1000] time 1.547 (1.563) data 0.000 (0.001) loss 1.5449 (1.2027) acc 68.7500 (69.9716) lr 1.9823e-03 eta 19:32:33 +epoch [5/50] batch [995/1000] time 1.570 (1.563) data 0.000 (0.001) loss 1.3369 (1.2030) acc 53.1250 (69.9623) lr 1.9823e-03 eta 19:32:23 +epoch [5/50] batch [1000/1000] time 1.548 (1.563) data 0.000 (0.001) loss 1.2070 (1.2019) acc 62.5000 (69.9562) lr 1.9686e-03 eta 19:32:15 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,005 +* accuracy: 78.0% +* error: 22.0% +* macro_f1: 77.5% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [6/50] batch [5/1000] time 1.547 (1.685) data 0.000 (0.186) loss 1.0752 (0.8844) acc 65.6250 (75.6250) lr 1.9686e-03 eta 21:03:53 +epoch [6/50] batch [10/1000] time 1.585 (1.629) data 0.001 (0.093) loss 1.0830 (1.0910) acc 75.0000 (73.7500) lr 1.9686e-03 eta 20:21:12 +epoch [6/50] batch [15/1000] time 1.551 (1.608) data 0.000 (0.062) loss 1.1768 (1.0743) acc 75.0000 (73.9583) lr 1.9686e-03 eta 20:05:44 +epoch [6/50] batch [20/1000] time 1.553 (1.598) data 0.001 (0.047) loss 1.2480 (1.1402) acc 59.3750 (73.2812) lr 1.9686e-03 eta 19:57:50 +epoch [6/50] batch [25/1000] time 1.560 (1.590) data 0.000 (0.038) loss 0.6343 (1.1473) acc 84.3750 (73.6250) lr 1.9686e-03 eta 19:52:11 +epoch [6/50] batch [30/1000] time 1.567 (1.586) data 0.001 (0.031) loss 0.7534 (1.0942) acc 81.2500 (74.3750) lr 1.9686e-03 eta 19:48:34 +epoch [6/50] batch [35/1000] time 1.551 (1.582) data 0.000 (0.027) loss 1.2500 (1.0941) acc 71.8750 (74.3750) lr 1.9686e-03 eta 19:45:18 +epoch [6/50] batch [40/1000] time 1.574 (1.586) data 0.001 (0.024) loss 1.2002 (1.1071) acc 68.7500 (73.7500) lr 1.9686e-03 eta 19:48:28 +epoch [6/50] batch [45/1000] time 1.573 (1.584) data 0.000 (0.021) loss 1.5791 (1.1430) acc 56.2500 (72.9861) lr 1.9686e-03 eta 19:46:28 +epoch [6/50] batch [50/1000] time 1.572 (1.582) data 0.001 (0.019) loss 1.6260 (1.1559) acc 62.5000 (72.6250) lr 1.9686e-03 eta 19:45:12 +epoch [6/50] batch [55/1000] time 1.564 (1.581) data 0.000 (0.017) loss 0.9302 (1.1767) acc 78.1250 (72.0455) lr 1.9686e-03 eta 19:44:13 +epoch [6/50] batch [60/1000] time 1.572 (1.580) data 0.001 (0.016) loss 1.3330 (1.1907) acc 65.6250 (71.6667) lr 1.9686e-03 eta 19:43:28 +epoch [6/50] batch [65/1000] time 1.558 (1.579) data 0.000 (0.015) loss 1.1982 (1.2033) acc 71.8750 (71.3462) lr 1.9686e-03 eta 19:42:40 +epoch [6/50] batch [70/1000] time 1.567 (1.578) data 0.000 (0.014) loss 1.1104 (1.1951) acc 78.1250 (71.6518) lr 1.9686e-03 eta 19:42:01 +epoch [6/50] batch [75/1000] time 1.555 (1.578) data 0.001 (0.013) loss 0.9390 (1.2039) acc 75.0000 (71.4167) lr 1.9686e-03 eta 19:41:27 +epoch [6/50] batch [80/1000] time 1.570 (1.577) data 0.000 (0.012) loss 1.4697 (1.2128) acc 68.7500 (71.1328) lr 1.9686e-03 eta 19:40:37 +epoch [6/50] batch [85/1000] time 1.540 (1.578) data 0.000 (0.011) loss 1.2402 (1.2235) acc 71.8750 (70.9559) lr 1.9686e-03 eta 19:40:59 +epoch [6/50] batch [90/1000] time 1.560 (1.577) data 0.000 (0.011) loss 0.9917 (1.2042) acc 68.7500 (71.2500) lr 1.9686e-03 eta 19:40:34 +epoch [6/50] batch [95/1000] time 1.546 (1.576) data 0.001 (0.010) loss 0.9634 (1.2064) acc 81.2500 (71.0855) lr 1.9686e-03 eta 19:39:26 +epoch [6/50] batch [100/1000] time 1.542 (1.575) data 0.000 (0.010) loss 1.1299 (1.2007) acc 68.7500 (71.0000) lr 1.9686e-03 eta 19:38:40 +epoch [6/50] batch [105/1000] time 1.560 (1.574) data 0.000 (0.009) loss 1.6484 (1.2186) acc 68.7500 (70.8929) lr 1.9686e-03 eta 19:37:48 +epoch [6/50] batch [110/1000] time 1.584 (1.574) data 0.000 (0.009) loss 1.2705 (1.2340) acc 59.3750 (70.4830) lr 1.9686e-03 eta 19:37:39 +epoch [6/50] batch [115/1000] time 1.556 (1.574) data 0.000 (0.009) loss 1.5518 (1.2377) acc 62.5000 (70.3261) lr 1.9686e-03 eta 19:37:09 +epoch [6/50] batch [120/1000] time 1.536 (1.573) data 0.001 (0.008) loss 1.0371 (1.2431) acc 75.0000 (70.1042) lr 1.9686e-03 eta 19:36:18 +epoch [6/50] batch [125/1000] time 1.544 (1.572) data 0.001 (0.008) loss 1.1602 (1.2390) acc 71.8750 (70.2750) lr 1.9686e-03 eta 19:35:49 +epoch [6/50] batch [130/1000] time 1.555 (1.572) data 0.000 (0.008) loss 0.6162 (1.2282) acc 87.5000 (70.4808) lr 1.9686e-03 eta 19:35:24 +epoch [6/50] batch [135/1000] time 1.586 (1.572) data 0.000 (0.007) loss 1.1016 (1.2137) acc 68.7500 (70.7870) lr 1.9686e-03 eta 19:35:20 +epoch [6/50] batch [140/1000] time 1.550 (1.572) data 0.000 (0.007) loss 1.2852 (1.2099) acc 56.2500 (70.5804) lr 1.9686e-03 eta 19:35:09 +epoch [6/50] batch [145/1000] time 1.725 (1.573) data 0.000 (0.007) loss 1.2393 (1.2138) acc 62.5000 (70.3448) lr 1.9686e-03 eta 19:35:44 +epoch [6/50] batch [150/1000] time 1.573 (1.573) data 0.001 (0.007) loss 0.8320 (1.2065) acc 75.0000 (70.3958) lr 1.9686e-03 eta 19:35:39 +epoch [6/50] batch [155/1000] time 1.561 (1.573) data 0.000 (0.007) loss 1.0195 (1.2086) acc 68.7500 (70.2218) lr 1.9686e-03 eta 19:35:21 +epoch [6/50] batch [160/1000] time 1.573 (1.572) data 0.000 (0.006) loss 0.9268 (1.2036) acc 71.8750 (70.3711) lr 1.9686e-03 eta 19:34:55 +epoch [6/50] batch [165/1000] time 1.571 (1.572) data 0.000 (0.006) loss 1.4482 (1.1977) acc 68.7500 (70.5303) lr 1.9686e-03 eta 19:34:45 +epoch [6/50] batch [170/1000] time 1.567 (1.572) data 0.001 (0.006) loss 1.0742 (1.1957) acc 75.0000 (70.5331) lr 1.9686e-03 eta 19:34:30 +epoch [6/50] batch [175/1000] time 1.584 (1.572) data 0.001 (0.006) loss 0.9712 (1.1895) acc 75.0000 (70.6607) lr 1.9686e-03 eta 19:34:32 +epoch [6/50] batch [180/1000] time 1.545 (1.572) data 0.000 (0.006) loss 1.2852 (1.1951) acc 68.7500 (70.6424) lr 1.9686e-03 eta 19:34:11 +epoch [6/50] batch [185/1000] time 1.550 (1.572) data 0.000 (0.006) loss 1.0225 (1.1919) acc 84.3750 (70.8108) lr 1.9686e-03 eta 19:33:50 +epoch [6/50] batch [190/1000] time 1.720 (1.572) data 0.001 (0.005) loss 1.8262 (1.1906) acc 62.5000 (70.9046) lr 1.9686e-03 eta 19:34:19 +epoch [6/50] batch [195/1000] time 1.568 (1.572) data 0.001 (0.005) loss 1.1357 (1.1871) acc 75.0000 (71.0577) lr 1.9686e-03 eta 19:33:54 +epoch [6/50] batch [200/1000] time 1.571 (1.572) data 0.000 (0.005) loss 0.8867 (1.1812) acc 78.1250 (71.2031) lr 1.9686e-03 eta 19:33:43 +epoch [6/50] batch [205/1000] time 1.553 (1.572) data 0.001 (0.005) loss 1.4434 (1.1872) acc 62.5000 (71.0671) lr 1.9686e-03 eta 19:33:24 +epoch [6/50] batch [210/1000] time 1.576 (1.572) data 0.000 (0.005) loss 0.5552 (1.1838) acc 84.3750 (71.1458) lr 1.9686e-03 eta 19:33:18 +epoch [6/50] batch [215/1000] time 1.553 (1.571) data 0.000 (0.005) loss 0.8525 (1.1814) acc 75.0000 (71.1628) lr 1.9686e-03 eta 19:32:56 +epoch [6/50] batch [220/1000] time 1.556 (1.571) data 0.001 (0.005) loss 1.4414 (1.1841) acc 62.5000 (71.0653) lr 1.9686e-03 eta 19:32:38 +epoch [6/50] batch [225/1000] time 1.532 (1.571) data 0.000 (0.005) loss 1.9385 (1.1883) acc 62.5000 (70.9583) lr 1.9686e-03 eta 19:32:24 +epoch [6/50] batch [230/1000] time 1.546 (1.571) data 0.001 (0.005) loss 1.2275 (1.1877) acc 75.0000 (70.8560) lr 1.9686e-03 eta 19:32:03 +epoch [6/50] batch [235/1000] time 1.573 (1.571) data 0.000 (0.004) loss 1.2803 (1.1871) acc 71.8750 (70.8378) lr 1.9686e-03 eta 19:32:19 +epoch [6/50] batch [240/1000] time 1.564 (1.571) data 0.000 (0.004) loss 0.7520 (1.1881) acc 71.8750 (70.7422) lr 1.9686e-03 eta 19:32:06 +epoch [6/50] batch [245/1000] time 1.567 (1.571) data 0.000 (0.004) loss 1.6641 (1.1918) acc 62.5000 (70.6505) lr 1.9686e-03 eta 19:31:53 +epoch [6/50] batch [250/1000] time 1.554 (1.571) data 0.000 (0.004) loss 1.5566 (1.1939) acc 59.3750 (70.6250) lr 1.9686e-03 eta 19:31:33 +epoch [6/50] batch [255/1000] time 1.556 (1.571) data 0.000 (0.004) loss 1.0928 (1.1954) acc 78.1250 (70.5882) lr 1.9686e-03 eta 19:31:12 +epoch [6/50] batch [260/1000] time 1.550 (1.570) data 0.000 (0.004) loss 0.9761 (1.1915) acc 75.0000 (70.6370) lr 1.9686e-03 eta 19:30:49 +epoch [6/50] batch [265/1000] time 1.593 (1.570) data 0.000 (0.004) loss 0.9087 (1.1898) acc 78.1250 (70.6840) lr 1.9686e-03 eta 19:30:34 +epoch [6/50] batch [270/1000] time 1.560 (1.570) data 0.000 (0.004) loss 1.5127 (1.1922) acc 62.5000 (70.6019) lr 1.9686e-03 eta 19:30:18 +epoch [6/50] batch [275/1000] time 1.555 (1.570) data 0.000 (0.004) loss 1.3945 (1.1884) acc 65.6250 (70.7159) lr 1.9686e-03 eta 19:30:06 +epoch [6/50] batch [280/1000] time 1.555 (1.569) data 0.000 (0.004) loss 0.8345 (1.1890) acc 78.1250 (70.6585) lr 1.9686e-03 eta 19:29:47 +epoch [6/50] batch [285/1000] time 1.572 (1.569) data 0.001 (0.004) loss 1.0186 (1.1890) acc 75.0000 (70.7018) lr 1.9686e-03 eta 19:29:34 +epoch [6/50] batch [290/1000] time 1.587 (1.569) data 0.001 (0.004) loss 0.6738 (1.1873) acc 81.2500 (70.7220) lr 1.9686e-03 eta 19:29:17 +epoch [6/50] batch [295/1000] time 1.576 (1.569) data 0.000 (0.004) loss 1.0254 (1.1876) acc 65.6250 (70.6568) lr 1.9686e-03 eta 19:29:06 +epoch [6/50] batch [300/1000] time 1.568 (1.570) data 0.000 (0.004) loss 1.1045 (1.1877) acc 71.8750 (70.6771) lr 1.9686e-03 eta 19:29:18 +epoch [6/50] batch [305/1000] time 1.548 (1.569) data 0.000 (0.004) loss 1.4404 (1.1893) acc 75.0000 (70.6865) lr 1.9686e-03 eta 19:28:55 +epoch [6/50] batch [310/1000] time 1.542 (1.569) data 0.001 (0.004) loss 1.4502 (1.1885) acc 65.6250 (70.6552) lr 1.9686e-03 eta 19:28:37 +epoch [6/50] batch [315/1000] time 1.543 (1.569) data 0.000 (0.003) loss 0.6953 (1.1845) acc 71.8750 (70.7341) lr 1.9686e-03 eta 19:28:20 +epoch [6/50] batch [320/1000] time 1.549 (1.569) data 0.001 (0.003) loss 0.9009 (1.1805) acc 71.8750 (70.8008) lr 1.9686e-03 eta 19:28:05 +epoch [6/50] batch [325/1000] time 1.565 (1.568) data 0.000 (0.003) loss 1.1074 (1.1829) acc 68.7500 (70.7500) lr 1.9686e-03 eta 19:27:50 +epoch [6/50] batch [330/1000] time 1.577 (1.568) data 0.001 (0.003) loss 1.4375 (1.1836) acc 71.8750 (70.7576) lr 1.9686e-03 eta 19:27:40 +epoch [6/50] batch [335/1000] time 1.544 (1.568) data 0.000 (0.003) loss 0.7192 (1.1828) acc 78.1250 (70.8022) lr 1.9686e-03 eta 19:27:25 +epoch [6/50] batch [340/1000] time 1.554 (1.568) data 0.000 (0.003) loss 1.6309 (1.1832) acc 65.6250 (70.7629) lr 1.9686e-03 eta 19:27:12 +epoch [6/50] batch [345/1000] time 1.569 (1.569) data 0.000 (0.003) loss 1.1699 (1.1811) acc 75.0000 (70.8424) lr 1.9686e-03 eta 19:27:29 +epoch [6/50] batch [350/1000] time 1.535 (1.568) data 0.001 (0.003) loss 1.0762 (1.1828) acc 78.1250 (70.7857) lr 1.9686e-03 eta 19:27:04 +epoch [6/50] batch [355/1000] time 1.570 (1.568) data 0.000 (0.003) loss 1.5811 (1.1835) acc 65.6250 (70.7746) lr 1.9686e-03 eta 19:26:51 +epoch [6/50] batch [360/1000] time 1.543 (1.568) data 0.000 (0.003) loss 1.8770 (1.1891) acc 62.5000 (70.6250) lr 1.9686e-03 eta 19:26:34 +epoch [6/50] batch [365/1000] time 1.568 (1.568) data 0.001 (0.003) loss 1.0762 (1.1908) acc 75.0000 (70.6592) lr 1.9686e-03 eta 19:26:28 +epoch [6/50] batch [370/1000] time 1.551 (1.568) data 0.001 (0.003) loss 1.2354 (1.1918) acc 65.6250 (70.6081) lr 1.9686e-03 eta 19:26:12 +epoch [6/50] batch [375/1000] time 1.555 (1.568) data 0.000 (0.003) loss 1.2822 (1.1912) acc 56.2500 (70.6583) lr 1.9686e-03 eta 19:26:02 +epoch [6/50] batch [380/1000] time 1.572 (1.568) data 0.000 (0.003) loss 1.3428 (1.1930) acc 71.8750 (70.6743) lr 1.9686e-03 eta 19:25:57 +epoch [6/50] batch [385/1000] time 1.570 (1.568) data 0.000 (0.003) loss 1.4521 (1.1930) acc 68.7500 (70.6737) lr 1.9686e-03 eta 19:26:04 +epoch [6/50] batch [390/1000] time 1.537 (1.568) data 0.000 (0.003) loss 0.8188 (1.1898) acc 78.1250 (70.7772) lr 1.9686e-03 eta 19:25:45 +epoch [6/50] batch [395/1000] time 1.560 (1.568) data 0.001 (0.003) loss 1.2139 (1.1893) acc 78.1250 (70.8070) lr 1.9686e-03 eta 19:25:28 +epoch [6/50] batch [400/1000] time 1.549 (1.568) data 0.001 (0.003) loss 1.2588 (1.1901) acc 68.7500 (70.7812) lr 1.9686e-03 eta 19:25:13 +epoch [6/50] batch [405/1000] time 1.562 (1.567) data 0.000 (0.003) loss 2.0781 (1.1922) acc 59.3750 (70.7562) lr 1.9686e-03 eta 19:24:57 +epoch [6/50] batch [410/1000] time 1.556 (1.567) data 0.001 (0.003) loss 1.7988 (1.1916) acc 62.5000 (70.7698) lr 1.9686e-03 eta 19:24:48 +epoch [6/50] batch [415/1000] time 1.551 (1.567) data 0.001 (0.003) loss 1.3330 (1.1934) acc 59.3750 (70.6627) lr 1.9686e-03 eta 19:24:32 +epoch [6/50] batch [420/1000] time 1.545 (1.567) data 0.000 (0.003) loss 1.0273 (1.1914) acc 78.1250 (70.7143) lr 1.9686e-03 eta 19:24:24 +epoch [6/50] batch [425/1000] time 1.579 (1.567) data 0.000 (0.003) loss 1.2891 (1.1908) acc 62.5000 (70.7059) lr 1.9686e-03 eta 19:24:17 +epoch [6/50] batch [430/1000] time 1.568 (1.567) data 0.000 (0.003) loss 2.3066 (1.1952) acc 46.8750 (70.5451) lr 1.9686e-03 eta 19:24:15 +epoch [6/50] batch [435/1000] time 1.563 (1.567) data 0.000 (0.003) loss 1.5400 (1.1954) acc 65.6250 (70.5460) lr 1.9686e-03 eta 19:24:13 +epoch [6/50] batch [440/1000] time 1.570 (1.567) data 0.000 (0.003) loss 1.6611 (1.1962) acc 59.3750 (70.5540) lr 1.9686e-03 eta 19:24:05 +epoch [6/50] batch [445/1000] time 1.554 (1.568) data 0.000 (0.003) loss 1.4053 (1.1959) acc 68.7500 (70.5618) lr 1.9686e-03 eta 19:24:03 +epoch [6/50] batch [450/1000] time 1.579 (1.568) data 0.001 (0.003) loss 0.8281 (1.1946) acc 87.5000 (70.5903) lr 1.9686e-03 eta 19:24:13 +epoch [6/50] batch [455/1000] time 1.555 (1.568) data 0.001 (0.003) loss 0.8892 (1.1932) acc 75.0000 (70.6250) lr 1.9686e-03 eta 19:24:06 +epoch [6/50] batch [460/1000] time 1.533 (1.568) data 0.001 (0.003) loss 0.8652 (1.1945) acc 84.3750 (70.6454) lr 1.9686e-03 eta 19:23:54 +epoch [6/50] batch [465/1000] time 1.572 (1.568) data 0.000 (0.002) loss 1.3066 (1.1942) acc 53.1250 (70.6116) lr 1.9686e-03 eta 19:23:41 +epoch [6/50] batch [470/1000] time 1.534 (1.568) data 0.000 (0.002) loss 0.3748 (1.1943) acc 90.6250 (70.6250) lr 1.9686e-03 eta 19:23:27 +epoch [6/50] batch [475/1000] time 1.573 (1.567) data 0.000 (0.002) loss 1.3691 (1.1961) acc 71.8750 (70.5987) lr 1.9686e-03 eta 19:23:10 +epoch [6/50] batch [480/1000] time 1.556 (1.567) data 0.000 (0.002) loss 1.3838 (1.1992) acc 59.3750 (70.5339) lr 1.9686e-03 eta 19:22:56 +epoch [6/50] batch [485/1000] time 1.556 (1.567) data 0.001 (0.002) loss 0.9282 (1.1987) acc 71.8750 (70.5412) lr 1.9686e-03 eta 19:22:43 +epoch [6/50] batch [490/1000] time 1.579 (1.567) data 0.001 (0.002) loss 0.6055 (1.2000) acc 87.5000 (70.4974) lr 1.9686e-03 eta 19:22:34 +epoch [6/50] batch [495/1000] time 1.566 (1.567) data 0.000 (0.002) loss 1.1279 (1.2017) acc 65.6250 (70.4735) lr 1.9686e-03 eta 19:22:36 +epoch [6/50] batch [500/1000] time 1.570 (1.567) data 0.000 (0.002) loss 0.7324 (1.1990) acc 84.3750 (70.5375) lr 1.9686e-03 eta 19:22:21 +epoch [6/50] batch [505/1000] time 1.564 (1.567) data 0.000 (0.002) loss 1.4277 (1.2014) acc 68.7500 (70.5012) lr 1.9686e-03 eta 19:22:07 +epoch [6/50] batch [510/1000] time 1.551 (1.567) data 0.000 (0.002) loss 1.3906 (1.1994) acc 75.0000 (70.5576) lr 1.9686e-03 eta 19:21:59 +epoch [6/50] batch [515/1000] time 1.581 (1.567) data 0.000 (0.002) loss 2.0059 (1.2026) acc 56.2500 (70.5036) lr 1.9686e-03 eta 19:21:52 +epoch [6/50] batch [520/1000] time 1.544 (1.567) data 0.000 (0.002) loss 1.1152 (1.2013) acc 71.8750 (70.5409) lr 1.9686e-03 eta 19:21:41 +epoch [6/50] batch [525/1000] time 1.566 (1.567) data 0.001 (0.002) loss 1.4229 (1.2012) acc 68.7500 (70.5298) lr 1.9686e-03 eta 19:21:34 +epoch [6/50] batch [530/1000] time 1.541 (1.567) data 0.000 (0.002) loss 1.2432 (1.2027) acc 59.3750 (70.4894) lr 1.9686e-03 eta 19:21:19 +epoch [6/50] batch [535/1000] time 1.734 (1.567) data 0.000 (0.002) loss 0.9780 (1.2044) acc 71.8750 (70.4731) lr 1.9686e-03 eta 19:21:24 +epoch [6/50] batch [540/1000] time 1.591 (1.567) data 0.000 (0.002) loss 1.2119 (1.2054) acc 65.6250 (70.4282) lr 1.9686e-03 eta 19:21:15 +epoch [6/50] batch [545/1000] time 1.543 (1.567) data 0.000 (0.002) loss 1.7432 (1.2071) acc 56.2500 (70.3727) lr 1.9686e-03 eta 19:21:05 +epoch [6/50] batch [550/1000] time 1.546 (1.567) data 0.001 (0.002) loss 1.2129 (1.2081) acc 62.5000 (70.3523) lr 1.9686e-03 eta 19:20:56 +epoch [6/50] batch [555/1000] time 1.582 (1.567) data 0.000 (0.002) loss 0.5322 (1.2073) acc 84.3750 (70.3773) lr 1.9686e-03 eta 19:20:47 +epoch [6/50] batch [560/1000] time 1.556 (1.567) data 0.001 (0.002) loss 0.8687 (1.2079) acc 78.1250 (70.3460) lr 1.9686e-03 eta 19:20:38 +epoch [6/50] batch [565/1000] time 1.543 (1.567) data 0.000 (0.002) loss 0.8745 (1.2055) acc 71.8750 (70.3816) lr 1.9686e-03 eta 19:20:24 +epoch [6/50] batch [570/1000] time 1.552 (1.567) data 0.000 (0.002) loss 0.7114 (1.2072) acc 81.2500 (70.3783) lr 1.9686e-03 eta 19:20:12 +epoch [6/50] batch [575/1000] time 1.561 (1.567) data 0.000 (0.002) loss 1.0977 (1.2062) acc 71.8750 (70.4130) lr 1.9686e-03 eta 19:19:58 +epoch [6/50] batch [580/1000] time 1.557 (1.567) data 0.000 (0.002) loss 0.7720 (1.2048) acc 68.7500 (70.4095) lr 1.9686e-03 eta 19:19:49 +epoch [6/50] batch [585/1000] time 1.552 (1.567) data 0.000 (0.002) loss 1.4199 (1.2054) acc 65.6250 (70.3846) lr 1.9686e-03 eta 19:19:37 +epoch [6/50] batch [590/1000] time 1.560 (1.566) data 0.000 (0.002) loss 1.1885 (1.2050) acc 75.0000 (70.3867) lr 1.9686e-03 eta 19:19:27 +epoch [6/50] batch [595/1000] time 1.557 (1.566) data 0.000 (0.002) loss 1.0371 (1.2047) acc 65.6250 (70.3834) lr 1.9686e-03 eta 19:19:16 +epoch [6/50] batch [600/1000] time 1.556 (1.567) data 0.000 (0.002) loss 1.1074 (1.2046) acc 68.7500 (70.3646) lr 1.9686e-03 eta 19:19:15 +epoch [6/50] batch [605/1000] time 1.552 (1.566) data 0.000 (0.002) loss 1.1006 (1.2031) acc 65.6250 (70.3926) lr 1.9686e-03 eta 19:19:04 +epoch [6/50] batch [610/1000] time 1.528 (1.566) data 0.001 (0.002) loss 1.0381 (1.2034) acc 71.8750 (70.3637) lr 1.9686e-03 eta 19:18:48 +epoch [6/50] batch [615/1000] time 1.541 (1.566) data 0.000 (0.002) loss 1.3672 (1.2039) acc 65.6250 (70.3659) lr 1.9686e-03 eta 19:18:37 +epoch [6/50] batch [620/1000] time 1.546 (1.566) data 0.000 (0.002) loss 1.3369 (1.2054) acc 65.6250 (70.3730) lr 1.9686e-03 eta 19:18:24 +epoch [6/50] batch [625/1000] time 1.553 (1.566) data 0.000 (0.002) loss 1.5410 (1.2067) acc 53.1250 (70.3550) lr 1.9686e-03 eta 19:18:14 +epoch [6/50] batch [630/1000] time 1.566 (1.566) data 0.001 (0.002) loss 1.0254 (1.2064) acc 68.7500 (70.3423) lr 1.9686e-03 eta 19:18:01 +epoch [6/50] batch [635/1000] time 1.536 (1.566) data 0.000 (0.002) loss 1.3906 (1.2070) acc 75.0000 (70.3297) lr 1.9686e-03 eta 19:17:46 +epoch [6/50] batch [640/1000] time 1.552 (1.566) data 0.001 (0.002) loss 1.6494 (1.2089) acc 65.6250 (70.2930) lr 1.9686e-03 eta 19:17:36 +epoch [6/50] batch [645/1000] time 1.591 (1.566) data 0.000 (0.002) loss 0.8477 (1.2079) acc 78.1250 (70.3004) lr 1.9686e-03 eta 19:17:42 +epoch [6/50] batch [650/1000] time 1.541 (1.566) data 0.000 (0.002) loss 1.2246 (1.2084) acc 65.6250 (70.2740) lr 1.9686e-03 eta 19:17:31 +epoch [6/50] batch [655/1000] time 1.586 (1.566) data 0.000 (0.002) loss 1.5859 (1.2077) acc 62.5000 (70.3197) lr 1.9686e-03 eta 19:17:25 +epoch [6/50] batch [660/1000] time 1.582 (1.566) data 0.000 (0.002) loss 0.9331 (1.2073) acc 68.7500 (70.3220) lr 1.9686e-03 eta 19:17:16 +epoch [6/50] batch [665/1000] time 1.602 (1.566) data 0.000 (0.002) loss 1.7520 (1.2077) acc 65.6250 (70.2820) lr 1.9686e-03 eta 19:17:11 +epoch [6/50] batch [670/1000] time 1.569 (1.566) data 0.001 (0.002) loss 1.1641 (1.2082) acc 71.8750 (70.2705) lr 1.9686e-03 eta 19:17:01 +epoch [6/50] batch [675/1000] time 1.552 (1.566) data 0.000 (0.002) loss 1.4873 (1.2069) acc 53.1250 (70.2917) lr 1.9686e-03 eta 19:16:49 +epoch [6/50] batch [680/1000] time 1.552 (1.566) data 0.000 (0.002) loss 1.5654 (1.2074) acc 65.6250 (70.3079) lr 1.9686e-03 eta 19:16:36 +epoch [6/50] batch [685/1000] time 1.563 (1.566) data 0.000 (0.002) loss 0.8623 (1.2058) acc 78.1250 (70.3330) lr 1.9686e-03 eta 19:16:26 +epoch [6/50] batch [690/1000] time 1.575 (1.566) data 0.000 (0.002) loss 1.0586 (1.2063) acc 75.0000 (70.3261) lr 1.9686e-03 eta 19:16:29 +epoch [6/50] batch [695/1000] time 1.553 (1.566) data 0.000 (0.002) loss 1.6689 (1.2057) acc 62.5000 (70.3192) lr 1.9686e-03 eta 19:16:20 +epoch [6/50] batch [700/1000] time 1.566 (1.566) data 0.000 (0.002) loss 0.5967 (1.2035) acc 75.0000 (70.3571) lr 1.9686e-03 eta 19:16:11 +epoch [6/50] batch [705/1000] time 1.547 (1.566) data 0.000 (0.002) loss 0.7139 (1.2040) acc 81.2500 (70.3635) lr 1.9686e-03 eta 19:16:03 +epoch [6/50] batch [710/1000] time 1.570 (1.566) data 0.000 (0.002) loss 1.1455 (1.2032) acc 65.6250 (70.3873) lr 1.9686e-03 eta 19:15:56 +epoch [6/50] batch [715/1000] time 1.540 (1.566) data 0.000 (0.002) loss 0.6963 (1.2024) acc 87.5000 (70.4108) lr 1.9686e-03 eta 19:15:42 +epoch [6/50] batch [720/1000] time 1.543 (1.566) data 0.000 (0.002) loss 0.9189 (1.2024) acc 65.6250 (70.3819) lr 1.9686e-03 eta 19:15:29 +epoch [6/50] batch [725/1000] time 1.558 (1.566) data 0.001 (0.002) loss 1.8398 (1.2043) acc 68.7500 (70.3491) lr 1.9686e-03 eta 19:15:21 +epoch [6/50] batch [730/1000] time 1.563 (1.566) data 0.000 (0.002) loss 0.9155 (1.2037) acc 78.1250 (70.3767) lr 1.9686e-03 eta 19:15:12 +epoch [6/50] batch [735/1000] time 1.553 (1.566) data 0.000 (0.002) loss 1.3750 (1.2045) acc 62.5000 (70.3656) lr 1.9686e-03 eta 19:15:02 +epoch [6/50] batch [740/1000] time 1.549 (1.566) data 0.001 (0.002) loss 1.7578 (1.2046) acc 56.2500 (70.3632) lr 1.9686e-03 eta 19:14:51 +epoch [6/50] batch [745/1000] time 1.569 (1.565) data 0.000 (0.002) loss 1.3750 (1.2064) acc 59.3750 (70.3062) lr 1.9686e-03 eta 19:14:40 +epoch [6/50] batch [750/1000] time 1.553 (1.566) data 0.000 (0.002) loss 1.1963 (1.2055) acc 65.6250 (70.3375) lr 1.9686e-03 eta 19:14:36 +epoch [6/50] batch [755/1000] time 1.539 (1.565) data 0.000 (0.002) loss 1.4971 (1.2048) acc 75.0000 (70.3477) lr 1.9686e-03 eta 19:14:25 +epoch [6/50] batch [760/1000] time 1.544 (1.565) data 0.001 (0.002) loss 0.9683 (1.2032) acc 75.0000 (70.3701) lr 1.9686e-03 eta 19:14:12 +epoch [6/50] batch [765/1000] time 1.562 (1.565) data 0.000 (0.002) loss 1.1338 (1.2047) acc 75.0000 (70.3350) lr 1.9686e-03 eta 19:14:02 +epoch [6/50] batch [770/1000] time 1.541 (1.565) data 0.000 (0.002) loss 1.0654 (1.2047) acc 59.3750 (70.3328) lr 1.9686e-03 eta 19:13:49 +epoch [6/50] batch [775/1000] time 1.558 (1.565) data 0.000 (0.002) loss 1.4941 (1.2062) acc 62.5000 (70.3266) lr 1.9686e-03 eta 19:13:39 +epoch [6/50] batch [780/1000] time 1.546 (1.565) data 0.001 (0.002) loss 1.5381 (1.2068) acc 65.6250 (70.3125) lr 1.9686e-03 eta 19:13:31 +epoch [6/50] batch [785/1000] time 1.565 (1.565) data 0.000 (0.002) loss 1.5732 (1.2077) acc 68.7500 (70.3025) lr 1.9686e-03 eta 19:13:25 +epoch [6/50] batch [790/1000] time 1.572 (1.565) data 0.000 (0.002) loss 1.3262 (1.2072) acc 71.8750 (70.3204) lr 1.9686e-03 eta 19:13:19 +epoch [6/50] batch [795/1000] time 1.541 (1.565) data 0.000 (0.002) loss 1.3496 (1.2066) acc 62.5000 (70.3341) lr 1.9686e-03 eta 19:13:19 +epoch [6/50] batch [800/1000] time 1.564 (1.565) data 0.000 (0.002) loss 0.7148 (1.2062) acc 81.2500 (70.3398) lr 1.9686e-03 eta 19:13:10 +epoch [6/50] batch [805/1000] time 1.565 (1.565) data 0.001 (0.002) loss 1.2568 (1.2056) acc 65.6250 (70.3455) lr 1.9686e-03 eta 19:12:58 +epoch [6/50] batch [810/1000] time 1.566 (1.565) data 0.001 (0.002) loss 1.3760 (1.2060) acc 62.5000 (70.3511) lr 1.9686e-03 eta 19:12:47 +epoch [6/50] batch [815/1000] time 1.561 (1.565) data 0.000 (0.002) loss 0.7607 (1.2061) acc 71.8750 (70.3528) lr 1.9686e-03 eta 19:12:38 +epoch [6/50] batch [820/1000] time 1.558 (1.565) data 0.000 (0.002) loss 1.3535 (1.2063) acc 62.5000 (70.3620) lr 1.9686e-03 eta 19:12:31 +epoch [6/50] batch [825/1000] time 1.556 (1.565) data 0.000 (0.002) loss 0.9780 (1.2052) acc 81.2500 (70.3750) lr 1.9686e-03 eta 19:12:23 +epoch [6/50] batch [830/1000] time 1.582 (1.565) data 0.001 (0.002) loss 0.7896 (1.2045) acc 75.0000 (70.3765) lr 1.9686e-03 eta 19:12:14 +epoch [6/50] batch [835/1000] time 1.586 (1.565) data 0.000 (0.002) loss 0.9839 (1.2042) acc 78.1250 (70.3705) lr 1.9686e-03 eta 19:12:07 +epoch [6/50] batch [840/1000] time 1.590 (1.565) data 0.000 (0.002) loss 1.7197 (1.2053) acc 62.5000 (70.3571) lr 1.9686e-03 eta 19:12:05 +epoch [6/50] batch [845/1000] time 1.530 (1.565) data 0.000 (0.002) loss 1.1279 (1.2060) acc 75.0000 (70.3439) lr 1.9686e-03 eta 19:11:59 +epoch [6/50] batch [850/1000] time 1.555 (1.565) data 0.001 (0.002) loss 1.2021 (1.2041) acc 71.8750 (70.3750) lr 1.9686e-03 eta 19:11:49 +epoch [6/50] batch [855/1000] time 1.559 (1.565) data 0.001 (0.002) loss 1.2900 (1.2045) acc 68.7500 (70.3618) lr 1.9686e-03 eta 19:11:42 +epoch [6/50] batch [860/1000] time 1.586 (1.565) data 0.000 (0.002) loss 1.8564 (1.2057) acc 59.3750 (70.3634) lr 1.9686e-03 eta 19:11:36 +epoch [6/50] batch [865/1000] time 1.561 (1.565) data 0.000 (0.002) loss 0.9492 (1.2048) acc 62.5000 (70.3577) lr 1.9686e-03 eta 19:11:29 +epoch [6/50] batch [870/1000] time 1.588 (1.565) data 0.001 (0.002) loss 1.7393 (1.2054) acc 62.5000 (70.3556) lr 1.9686e-03 eta 19:11:20 +epoch [6/50] batch [875/1000] time 1.561 (1.565) data 0.001 (0.002) loss 1.1934 (1.2052) acc 68.7500 (70.3571) lr 1.9686e-03 eta 19:11:11 +epoch [6/50] batch [880/1000] time 1.545 (1.565) data 0.001 (0.002) loss 1.6992 (1.2056) acc 65.6250 (70.3587) lr 1.9686e-03 eta 19:10:58 +epoch [6/50] batch [885/1000] time 1.560 (1.565) data 0.000 (0.002) loss 1.7852 (1.2054) acc 62.5000 (70.3672) lr 1.9686e-03 eta 19:10:51 +epoch [6/50] batch [890/1000] time 1.580 (1.565) data 0.000 (0.002) loss 1.1611 (1.2055) acc 75.0000 (70.3617) lr 1.9686e-03 eta 19:10:42 +epoch [6/50] batch [895/1000] time 1.542 (1.565) data 0.000 (0.002) loss 1.9619 (1.2046) acc 53.1250 (70.3666) lr 1.9686e-03 eta 19:10:33 +epoch [6/50] batch [900/1000] time 1.729 (1.565) data 0.000 (0.002) loss 1.4316 (1.2038) acc 65.6250 (70.3750) lr 1.9686e-03 eta 19:10:33 +epoch [6/50] batch [905/1000] time 1.538 (1.565) data 0.001 (0.002) loss 1.1826 (1.2031) acc 65.6250 (70.3936) lr 1.9686e-03 eta 19:10:26 +epoch [6/50] batch [910/1000] time 1.566 (1.565) data 0.000 (0.002) loss 0.9668 (1.2036) acc 81.2500 (70.3571) lr 1.9686e-03 eta 19:10:16 +epoch [6/50] batch [915/1000] time 1.534 (1.565) data 0.001 (0.002) loss 0.4675 (1.2023) acc 87.5000 (70.3996) lr 1.9686e-03 eta 19:10:07 +epoch [6/50] batch [920/1000] time 1.585 (1.565) data 0.000 (0.001) loss 0.9097 (1.2004) acc 75.0000 (70.4212) lr 1.9686e-03 eta 19:10:00 +epoch [6/50] batch [925/1000] time 1.547 (1.565) data 0.001 (0.001) loss 1.0273 (1.1999) acc 65.6250 (70.4088) lr 1.9686e-03 eta 19:09:51 +epoch [6/50] batch [930/1000] time 1.540 (1.565) data 0.001 (0.001) loss 0.8335 (1.1988) acc 78.1250 (70.4167) lr 1.9686e-03 eta 19:09:42 +epoch [6/50] batch [935/1000] time 1.561 (1.565) data 0.000 (0.001) loss 1.0889 (1.1982) acc 71.8750 (70.4278) lr 1.9686e-03 eta 19:09:31 +epoch [6/50] batch [940/1000] time 1.559 (1.565) data 0.000 (0.001) loss 0.8789 (1.1979) acc 78.1250 (70.4289) lr 1.9686e-03 eta 19:09:20 +epoch [6/50] batch [945/1000] time 1.690 (1.565) data 0.001 (0.001) loss 0.9614 (1.1977) acc 62.5000 (70.4200) lr 1.9686e-03 eta 19:09:18 +epoch [6/50] batch [950/1000] time 1.550 (1.565) data 0.001 (0.001) loss 1.4756 (1.1983) acc 65.6250 (70.4046) lr 1.9686e-03 eta 19:09:08 +epoch [6/50] batch [955/1000] time 1.555 (1.565) data 0.000 (0.001) loss 1.0898 (1.1972) acc 78.1250 (70.4058) lr 1.9686e-03 eta 19:08:57 +epoch [6/50] batch [960/1000] time 1.545 (1.565) data 0.000 (0.001) loss 0.9009 (1.1961) acc 68.7500 (70.4004) lr 1.9686e-03 eta 19:08:46 +epoch [6/50] batch [965/1000] time 1.565 (1.565) data 0.001 (0.001) loss 1.1748 (1.1955) acc 68.7500 (70.4242) lr 1.9686e-03 eta 19:08:37 +epoch [6/50] batch [970/1000] time 1.566 (1.565) data 0.000 (0.001) loss 1.5146 (1.1958) acc 65.6250 (70.4253) lr 1.9686e-03 eta 19:08:29 +epoch [6/50] batch [975/1000] time 1.571 (1.565) data 0.000 (0.001) loss 1.2480 (1.1952) acc 68.7500 (70.4391) lr 1.9686e-03 eta 19:08:20 +epoch [6/50] batch [980/1000] time 1.562 (1.565) data 0.001 (0.001) loss 1.2617 (1.1954) acc 75.0000 (70.4528) lr 1.9686e-03 eta 19:08:10 +epoch [6/50] batch [985/1000] time 1.590 (1.565) data 0.001 (0.001) loss 0.7124 (1.1949) acc 81.2500 (70.4537) lr 1.9686e-03 eta 19:08:03 +epoch [6/50] batch [990/1000] time 1.561 (1.565) data 0.000 (0.001) loss 0.9800 (1.1950) acc 78.1250 (70.4640) lr 1.9686e-03 eta 19:07:59 +epoch [6/50] batch [995/1000] time 1.558 (1.565) data 0.000 (0.001) loss 1.5010 (1.1950) acc 50.0000 (70.4554) lr 1.9686e-03 eta 19:07:49 +epoch [6/50] batch [1000/1000] time 1.534 (1.565) data 0.000 (0.001) loss 1.0176 (1.1949) acc 68.7500 (70.4594) lr 1.9511e-03 eta 19:07:40 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,084 +* accuracy: 78.2% +* error: 21.8% +* macro_f1: 77.7% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [7/50] batch [5/1000] time 1.548 (1.697) data 0.000 (0.190) loss 0.9692 (1.0441) acc 75.0000 (73.1250) lr 1.9511e-03 eta 20:44:40 +epoch [7/50] batch [10/1000] time 1.555 (1.623) data 0.000 (0.095) loss 0.8799 (0.9870) acc 75.0000 (74.0625) lr 1.9511e-03 eta 19:50:13 +epoch [7/50] batch [15/1000] time 1.561 (1.603) data 0.000 (0.063) loss 0.6147 (0.9664) acc 78.1250 (74.7917) lr 1.9511e-03 eta 19:35:27 +epoch [7/50] batch [20/1000] time 1.588 (1.597) data 0.001 (0.048) loss 0.9727 (0.9669) acc 75.0000 (74.5312) lr 1.9511e-03 eta 19:30:17 +epoch [7/50] batch [25/1000] time 1.552 (1.593) data 0.000 (0.038) loss 1.1377 (1.0412) acc 78.1250 (73.3750) lr 1.9511e-03 eta 19:27:15 +epoch [7/50] batch [30/1000] time 1.554 (1.588) data 0.000 (0.032) loss 1.6064 (1.0652) acc 59.3750 (73.4375) lr 1.9511e-03 eta 19:24:05 +epoch [7/50] batch [35/1000] time 1.531 (1.584) data 0.000 (0.027) loss 2.1777 (1.0993) acc 46.8750 (72.6786) lr 1.9511e-03 eta 19:20:36 +epoch [7/50] batch [40/1000] time 1.550 (1.580) data 0.000 (0.024) loss 0.4983 (1.0875) acc 84.3750 (73.5156) lr 1.9511e-03 eta 19:17:32 +epoch [7/50] batch [45/1000] time 1.540 (1.577) data 0.000 (0.022) loss 1.5605 (1.1227) acc 65.6250 (72.7083) lr 1.9511e-03 eta 19:15:00 +epoch [7/50] batch [50/1000] time 1.556 (1.574) data 0.000 (0.019) loss 1.0459 (1.1099) acc 68.7500 (72.6875) lr 1.9511e-03 eta 19:12:49 +epoch [7/50] batch [55/1000] time 1.549 (1.572) data 0.001 (0.018) loss 1.1670 (1.1114) acc 53.1250 (71.9318) lr 1.9511e-03 eta 19:11:31 +epoch [7/50] batch [60/1000] time 1.574 (1.575) data 0.000 (0.016) loss 0.8618 (1.1012) acc 71.8750 (71.8750) lr 1.9511e-03 eta 19:13:37 +epoch [7/50] batch [65/1000] time 1.541 (1.573) data 0.001 (0.015) loss 1.0957 (1.0955) acc 68.7500 (71.8269) lr 1.9511e-03 eta 19:12:10 +epoch [7/50] batch [70/1000] time 1.566 (1.572) data 0.000 (0.014) loss 1.4160 (1.1249) acc 62.5000 (71.2946) lr 1.9511e-03 eta 19:11:07 +epoch [7/50] batch [75/1000] time 1.555 (1.571) data 0.000 (0.013) loss 1.4004 (1.1199) acc 62.5000 (71.4167) lr 1.9511e-03 eta 19:09:56 +epoch [7/50] batch [80/1000] time 1.539 (1.570) data 0.001 (0.012) loss 1.2861 (1.1305) acc 65.6250 (71.3672) lr 1.9511e-03 eta 19:08:58 +epoch [7/50] batch [85/1000] time 1.581 (1.569) data 0.000 (0.012) loss 1.3203 (1.1437) acc 68.7500 (71.2132) lr 1.9511e-03 eta 19:08:33 +epoch [7/50] batch [90/1000] time 1.548 (1.568) data 0.000 (0.011) loss 1.1973 (1.1566) acc 75.0000 (70.9722) lr 1.9511e-03 eta 19:07:46 +epoch [7/50] batch [95/1000] time 1.563 (1.568) data 0.000 (0.010) loss 1.6113 (1.1702) acc 71.8750 (70.8882) lr 1.9511e-03 eta 19:07:25 +epoch [7/50] batch [100/1000] time 1.569 (1.568) data 0.000 (0.010) loss 1.0596 (1.1754) acc 75.0000 (70.9062) lr 1.9511e-03 eta 19:07:22 +epoch [7/50] batch [105/1000] time 1.557 (1.568) data 0.000 (0.010) loss 1.3809 (1.1795) acc 65.6250 (70.7738) lr 1.9511e-03 eta 19:07:14 +epoch [7/50] batch [110/1000] time 1.570 (1.569) data 0.000 (0.009) loss 1.0908 (1.1735) acc 62.5000 (70.8807) lr 1.9511e-03 eta 19:07:57 +epoch [7/50] batch [115/1000] time 1.562 (1.569) data 0.001 (0.009) loss 1.8652 (1.1810) acc 62.5000 (70.8152) lr 1.9511e-03 eta 19:07:22 +epoch [7/50] batch [120/1000] time 1.546 (1.568) data 0.000 (0.008) loss 1.1045 (1.1805) acc 65.6250 (70.5990) lr 1.9511e-03 eta 19:06:57 +epoch [7/50] batch [125/1000] time 1.561 (1.568) data 0.000 (0.008) loss 0.9482 (1.1757) acc 71.8750 (70.7000) lr 1.9511e-03 eta 19:06:45 +epoch [7/50] batch [130/1000] time 1.591 (1.568) data 0.001 (0.008) loss 1.3984 (1.1857) acc 68.7500 (70.5769) lr 1.9511e-03 eta 19:06:42 +epoch [7/50] batch [135/1000] time 1.575 (1.568) data 0.000 (0.008) loss 1.1777 (1.1868) acc 71.8750 (70.6944) lr 1.9511e-03 eta 19:06:14 +epoch [7/50] batch [140/1000] time 1.536 (1.567) data 0.000 (0.007) loss 1.5371 (1.1987) acc 68.7500 (70.6696) lr 1.9511e-03 eta 19:05:46 +epoch [7/50] batch [145/1000] time 1.556 (1.567) data 0.000 (0.007) loss 1.3828 (1.1953) acc 59.3750 (70.6681) lr 1.9511e-03 eta 19:05:26 +epoch [7/50] batch [150/1000] time 1.548 (1.567) data 0.001 (0.007) loss 1.2998 (1.1998) acc 68.7500 (70.4583) lr 1.9511e-03 eta 19:05:00 +epoch [7/50] batch [155/1000] time 1.535 (1.566) data 0.001 (0.007) loss 1.5752 (1.2090) acc 62.5000 (70.2218) lr 1.9511e-03 eta 19:04:35 +epoch [7/50] batch [160/1000] time 1.558 (1.566) data 0.001 (0.006) loss 1.0840 (1.2112) acc 78.1250 (70.2148) lr 1.9511e-03 eta 19:04:05 +epoch [7/50] batch [165/1000] time 1.557 (1.566) data 0.000 (0.006) loss 0.8696 (1.2086) acc 75.0000 (70.1515) lr 1.9511e-03 eta 19:04:00 +epoch [7/50] batch [170/1000] time 1.577 (1.566) data 0.000 (0.006) loss 1.3418 (1.2088) acc 65.6250 (70.0000) lr 1.9511e-03 eta 19:03:47 +epoch [7/50] batch [175/1000] time 1.551 (1.566) data 0.000 (0.006) loss 0.9565 (1.2091) acc 78.1250 (70.0893) lr 1.9511e-03 eta 19:03:32 +epoch [7/50] batch [180/1000] time 1.559 (1.566) data 0.000 (0.006) loss 1.3320 (1.2054) acc 65.6250 (70.1562) lr 1.9511e-03 eta 19:03:31 +epoch [7/50] batch [185/1000] time 1.561 (1.566) data 0.000 (0.006) loss 1.2412 (1.2071) acc 65.6250 (70.0845) lr 1.9511e-03 eta 19:03:25 +epoch [7/50] batch [190/1000] time 1.548 (1.565) data 0.000 (0.005) loss 0.9854 (1.2037) acc 75.0000 (70.0987) lr 1.9511e-03 eta 19:03:04 +epoch [7/50] batch [195/1000] time 1.526 (1.565) data 0.001 (0.005) loss 1.7598 (1.2051) acc 56.2500 (70.0801) lr 1.9511e-03 eta 19:02:41 +epoch [7/50] batch [200/1000] time 1.579 (1.565) data 0.000 (0.005) loss 1.3926 (1.2025) acc 59.3750 (70.0938) lr 1.9511e-03 eta 19:02:26 +epoch [7/50] batch [205/1000] time 1.564 (1.565) data 0.000 (0.005) loss 1.3115 (1.1976) acc 68.7500 (70.1524) lr 1.9511e-03 eta 19:02:14 +epoch [7/50] batch [210/1000] time 1.545 (1.565) data 0.000 (0.005) loss 1.3398 (1.1981) acc 68.7500 (70.1637) lr 1.9511e-03 eta 19:02:32 +epoch [7/50] batch [215/1000] time 1.604 (1.565) data 0.000 (0.005) loss 1.2432 (1.1934) acc 62.5000 (70.1453) lr 1.9511e-03 eta 19:02:24 +epoch [7/50] batch [220/1000] time 1.548 (1.565) data 0.000 (0.005) loss 1.5049 (1.1915) acc 71.8750 (70.2131) lr 1.9511e-03 eta 19:02:10 +epoch [7/50] batch [225/1000] time 1.562 (1.565) data 0.000 (0.005) loss 1.3271 (1.1970) acc 65.6250 (70.1667) lr 1.9511e-03 eta 19:02:04 +epoch [7/50] batch [230/1000] time 1.572 (1.565) data 0.000 (0.005) loss 1.0566 (1.1953) acc 75.0000 (70.2446) lr 1.9511e-03 eta 19:01:45 +epoch [7/50] batch [235/1000] time 1.561 (1.565) data 0.001 (0.004) loss 1.5127 (1.1917) acc 65.6250 (70.2793) lr 1.9511e-03 eta 19:01:30 +epoch [7/50] batch [240/1000] time 1.541 (1.565) data 0.001 (0.004) loss 1.1270 (1.1968) acc 65.6250 (70.1823) lr 1.9511e-03 eta 19:01:16 +epoch [7/50] batch [245/1000] time 1.556 (1.565) data 0.000 (0.004) loss 1.0107 (1.1979) acc 71.8750 (70.2168) lr 1.9511e-03 eta 19:01:05 +epoch [7/50] batch [250/1000] time 1.547 (1.565) data 0.000 (0.004) loss 1.2754 (1.1943) acc 59.3750 (70.2375) lr 1.9511e-03 eta 19:01:03 +epoch [7/50] batch [255/1000] time 1.574 (1.565) data 0.000 (0.004) loss 1.8369 (1.1989) acc 68.7500 (70.2328) lr 1.9511e-03 eta 19:01:22 +epoch [7/50] batch [260/1000] time 1.564 (1.566) data 0.001 (0.004) loss 0.9570 (1.1962) acc 71.8750 (70.2885) lr 1.9511e-03 eta 19:01:15 +epoch [7/50] batch [265/1000] time 1.561 (1.565) data 0.000 (0.004) loss 1.2012 (1.1956) acc 65.6250 (70.2476) lr 1.9511e-03 eta 19:00:57 +epoch [7/50] batch [270/1000] time 1.564 (1.565) data 0.000 (0.004) loss 1.6768 (1.1990) acc 65.6250 (70.3009) lr 1.9511e-03 eta 19:00:50 +epoch [7/50] batch [275/1000] time 1.553 (1.565) data 0.000 (0.004) loss 1.3027 (1.1989) acc 75.0000 (70.2955) lr 1.9511e-03 eta 19:00:50 +epoch [7/50] batch [280/1000] time 1.578 (1.565) data 0.001 (0.004) loss 1.0801 (1.1972) acc 71.8750 (70.2679) lr 1.9511e-03 eta 19:00:40 +epoch [7/50] batch [285/1000] time 1.577 (1.565) data 0.000 (0.004) loss 1.4365 (1.1958) acc 68.7500 (70.3509) lr 1.9511e-03 eta 19:00:34 +epoch [7/50] batch [290/1000] time 1.553 (1.565) data 0.000 (0.004) loss 1.5020 (1.1947) acc 68.7500 (70.4095) lr 1.9511e-03 eta 19:00:19 +epoch [7/50] batch [295/1000] time 1.551 (1.565) data 0.000 (0.004) loss 0.8403 (1.1937) acc 75.0000 (70.4131) lr 1.9511e-03 eta 19:00:03 +epoch [7/50] batch [300/1000] time 1.574 (1.566) data 0.000 (0.004) loss 1.4580 (1.1898) acc 65.6250 (70.5312) lr 1.9511e-03 eta 19:00:17 +epoch [7/50] batch [305/1000] time 1.572 (1.566) data 0.000 (0.004) loss 1.6553 (1.1938) acc 62.5000 (70.4406) lr 1.9511e-03 eta 19:00:16 +epoch [7/50] batch [310/1000] time 1.548 (1.566) data 0.000 (0.004) loss 0.5869 (1.1898) acc 90.6250 (70.4940) lr 1.9511e-03 eta 19:00:01 +epoch [7/50] batch [315/1000] time 1.551 (1.565) data 0.000 (0.003) loss 0.6787 (1.1863) acc 81.2500 (70.5456) lr 1.9511e-03 eta 18:59:41 +epoch [7/50] batch [320/1000] time 1.580 (1.565) data 0.000 (0.003) loss 1.3018 (1.1889) acc 71.8750 (70.5371) lr 1.9511e-03 eta 18:59:26 +epoch [7/50] batch [325/1000] time 1.560 (1.565) data 0.000 (0.003) loss 1.6123 (1.1934) acc 68.7500 (70.4808) lr 1.9511e-03 eta 18:59:12 +epoch [7/50] batch [330/1000] time 1.548 (1.565) data 0.001 (0.003) loss 1.1152 (1.1913) acc 65.6250 (70.5303) lr 1.9511e-03 eta 18:59:00 +epoch [7/50] batch [335/1000] time 1.556 (1.565) data 0.000 (0.003) loss 1.1865 (1.1917) acc 75.0000 (70.5970) lr 1.9511e-03 eta 18:58:45 +epoch [7/50] batch [340/1000] time 1.560 (1.565) data 0.000 (0.003) loss 0.8008 (1.1887) acc 81.2500 (70.6526) lr 1.9511e-03 eta 18:58:36 +epoch [7/50] batch [345/1000] time 1.569 (1.565) data 0.000 (0.003) loss 1.6084 (1.1894) acc 62.5000 (70.6522) lr 1.9511e-03 eta 18:58:31 +epoch [7/50] batch [350/1000] time 1.572 (1.565) data 0.000 (0.003) loss 0.9854 (1.1854) acc 71.8750 (70.6786) lr 1.9511e-03 eta 18:58:20 +epoch [7/50] batch [355/1000] time 1.542 (1.564) data 0.000 (0.003) loss 1.3076 (1.1832) acc 53.1250 (70.6602) lr 1.9511e-03 eta 18:58:02 +epoch [7/50] batch [360/1000] time 1.704 (1.565) data 0.000 (0.003) loss 0.7705 (1.1812) acc 78.1250 (70.6510) lr 1.9511e-03 eta 18:57:59 +epoch [7/50] batch [365/1000] time 1.587 (1.565) data 0.000 (0.003) loss 1.4375 (1.1821) acc 59.3750 (70.6164) lr 1.9511e-03 eta 18:57:49 +epoch [7/50] batch [370/1000] time 1.542 (1.564) data 0.000 (0.003) loss 1.3799 (1.1838) acc 68.7500 (70.5659) lr 1.9511e-03 eta 18:57:33 +epoch [7/50] batch [375/1000] time 1.540 (1.564) data 0.001 (0.003) loss 1.7393 (1.1885) acc 65.6250 (70.5167) lr 1.9511e-03 eta 18:57:20 +epoch [7/50] batch [380/1000] time 1.560 (1.564) data 0.001 (0.003) loss 0.7866 (1.1871) acc 71.8750 (70.5510) lr 1.9511e-03 eta 18:57:10 +epoch [7/50] batch [385/1000] time 1.560 (1.564) data 0.000 (0.003) loss 1.8701 (1.1899) acc 59.3750 (70.5519) lr 1.9511e-03 eta 18:57:04 +epoch [7/50] batch [390/1000] time 1.552 (1.564) data 0.000 (0.003) loss 1.3545 (1.1887) acc 68.7500 (70.6410) lr 1.9511e-03 eta 18:56:51 +epoch [7/50] batch [395/1000] time 1.553 (1.564) data 0.000 (0.003) loss 1.2686 (1.1874) acc 65.6250 (70.6962) lr 1.9511e-03 eta 18:56:38 +epoch [7/50] batch [400/1000] time 1.535 (1.564) data 0.001 (0.003) loss 0.9854 (1.1865) acc 75.0000 (70.7578) lr 1.9511e-03 eta 18:56:25 +epoch [7/50] batch [405/1000] time 1.724 (1.564) data 0.001 (0.003) loss 1.4521 (1.1886) acc 59.3750 (70.6713) lr 1.9511e-03 eta 18:56:34 +epoch [7/50] batch [410/1000] time 1.550 (1.564) data 0.000 (0.003) loss 1.2256 (1.1858) acc 71.8750 (70.6707) lr 1.9511e-03 eta 18:56:22 +epoch [7/50] batch [415/1000] time 1.531 (1.564) data 0.000 (0.003) loss 1.5762 (1.1852) acc 65.6250 (70.7003) lr 1.9511e-03 eta 18:56:10 +epoch [7/50] batch [420/1000] time 1.552 (1.564) data 0.001 (0.003) loss 1.2861 (1.1891) acc 65.6250 (70.6548) lr 1.9511e-03 eta 18:55:54 +epoch [7/50] batch [425/1000] time 1.557 (1.564) data 0.001 (0.003) loss 1.0547 (1.1869) acc 75.0000 (70.6912) lr 1.9511e-03 eta 18:55:47 +epoch [7/50] batch [430/1000] time 1.542 (1.564) data 0.000 (0.003) loss 1.4053 (1.1876) acc 75.0000 (70.6759) lr 1.9511e-03 eta 18:55:39 +epoch [7/50] batch [435/1000] time 1.559 (1.564) data 0.001 (0.003) loss 0.7383 (1.1864) acc 84.3750 (70.6681) lr 1.9511e-03 eta 18:55:31 +epoch [7/50] batch [440/1000] time 1.578 (1.564) data 0.000 (0.003) loss 1.1855 (1.1873) acc 75.0000 (70.6321) lr 1.9511e-03 eta 18:55:27 +epoch [7/50] batch [445/1000] time 1.576 (1.564) data 0.000 (0.003) loss 1.0977 (1.1843) acc 75.0000 (70.6882) lr 1.9511e-03 eta 18:55:21 +epoch [7/50] batch [450/1000] time 1.580 (1.565) data 0.001 (0.003) loss 1.5000 (1.1840) acc 56.2500 (70.6944) lr 1.9511e-03 eta 18:55:34 +epoch [7/50] batch [455/1000] time 1.585 (1.565) data 0.001 (0.003) loss 1.2656 (1.1829) acc 65.6250 (70.7005) lr 1.9511e-03 eta 18:55:29 +epoch [7/50] batch [460/1000] time 1.563 (1.565) data 0.001 (0.003) loss 1.4102 (1.1813) acc 78.1250 (70.7677) lr 1.9511e-03 eta 18:55:20 +epoch [7/50] batch [465/1000] time 1.555 (1.564) data 0.000 (0.002) loss 1.2393 (1.1821) acc 62.5000 (70.7527) lr 1.9511e-03 eta 18:55:09 +epoch [7/50] batch [470/1000] time 1.562 (1.564) data 0.001 (0.002) loss 1.4531 (1.1856) acc 71.8750 (70.7247) lr 1.9511e-03 eta 18:54:57 +epoch [7/50] batch [475/1000] time 1.574 (1.564) data 0.000 (0.002) loss 1.3730 (1.1883) acc 65.6250 (70.6645) lr 1.9511e-03 eta 18:54:51 +epoch [7/50] batch [480/1000] time 1.560 (1.564) data 0.000 (0.002) loss 0.9478 (1.1872) acc 78.1250 (70.6901) lr 1.9511e-03 eta 18:54:39 +epoch [7/50] batch [485/1000] time 1.554 (1.564) data 0.001 (0.002) loss 1.6260 (1.1870) acc 65.6250 (70.6959) lr 1.9511e-03 eta 18:54:28 +epoch [7/50] batch [490/1000] time 1.538 (1.564) data 0.001 (0.002) loss 1.3496 (1.1861) acc 68.7500 (70.7207) lr 1.9511e-03 eta 18:54:19 +epoch [7/50] batch [495/1000] time 1.566 (1.564) data 0.000 (0.002) loss 0.9058 (1.1852) acc 78.1250 (70.7639) lr 1.9511e-03 eta 18:54:11 +epoch [7/50] batch [500/1000] time 1.544 (1.564) data 0.001 (0.002) loss 0.8633 (1.1862) acc 75.0000 (70.7562) lr 1.9511e-03 eta 18:53:58 +epoch [7/50] batch [505/1000] time 1.568 (1.564) data 0.000 (0.002) loss 1.1553 (1.1856) acc 81.2500 (70.8045) lr 1.9511e-03 eta 18:53:51 +epoch [7/50] batch [510/1000] time 1.522 (1.564) data 0.001 (0.002) loss 1.2227 (1.1843) acc 62.5000 (70.8272) lr 1.9511e-03 eta 18:53:41 +epoch [7/50] batch [515/1000] time 1.559 (1.564) data 0.000 (0.002) loss 1.0459 (1.1830) acc 71.8750 (70.7828) lr 1.9511e-03 eta 18:53:40 +epoch [7/50] batch [520/1000] time 1.561 (1.564) data 0.000 (0.002) loss 0.8672 (1.1855) acc 75.0000 (70.7692) lr 1.9511e-03 eta 18:53:27 +epoch [7/50] batch [525/1000] time 1.565 (1.564) data 0.001 (0.002) loss 1.1094 (1.1854) acc 75.0000 (70.7798) lr 1.9511e-03 eta 18:53:18 +epoch [7/50] batch [530/1000] time 1.547 (1.564) data 0.000 (0.002) loss 0.9009 (1.1827) acc 75.0000 (70.8196) lr 1.9511e-03 eta 18:53:08 +epoch [7/50] batch [535/1000] time 1.587 (1.564) data 0.000 (0.002) loss 1.3750 (1.1813) acc 62.5000 (70.8411) lr 1.9511e-03 eta 18:53:00 +epoch [7/50] batch [540/1000] time 1.569 (1.564) data 0.000 (0.002) loss 1.0332 (1.1793) acc 75.0000 (70.8970) lr 1.9511e-03 eta 18:52:53 +epoch [7/50] batch [545/1000] time 1.583 (1.564) data 0.000 (0.002) loss 1.4980 (1.1802) acc 78.1250 (70.8658) lr 1.9511e-03 eta 18:52:46 +epoch [7/50] batch [550/1000] time 1.563 (1.564) data 0.000 (0.002) loss 1.4863 (1.1816) acc 71.8750 (70.8352) lr 1.9511e-03 eta 18:52:41 +epoch [7/50] batch [555/1000] time 1.566 (1.564) data 0.000 (0.002) loss 1.3311 (1.1810) acc 68.7500 (70.8502) lr 1.9511e-03 eta 18:52:31 +epoch [7/50] batch [560/1000] time 1.551 (1.564) data 0.001 (0.002) loss 0.8184 (1.1807) acc 75.0000 (70.8482) lr 1.9511e-03 eta 18:52:38 +epoch [7/50] batch [565/1000] time 1.543 (1.564) data 0.001 (0.002) loss 1.3818 (1.1800) acc 68.7500 (70.8628) lr 1.9511e-03 eta 18:52:29 +epoch [7/50] batch [570/1000] time 1.548 (1.564) data 0.000 (0.002) loss 1.0781 (1.1789) acc 59.3750 (70.8553) lr 1.9511e-03 eta 18:52:16 +epoch [7/50] batch [575/1000] time 1.550 (1.564) data 0.000 (0.002) loss 1.0664 (1.1782) acc 75.0000 (70.8750) lr 1.9511e-03 eta 18:52:01 +epoch [7/50] batch [580/1000] time 1.579 (1.564) data 0.001 (0.002) loss 0.7822 (1.1761) acc 81.2500 (70.9267) lr 1.9511e-03 eta 18:51:56 +epoch [7/50] batch [585/1000] time 1.556 (1.564) data 0.001 (0.002) loss 1.9443 (1.1776) acc 56.2500 (70.8761) lr 1.9511e-03 eta 18:51:43 +epoch [7/50] batch [590/1000] time 1.553 (1.564) data 0.001 (0.002) loss 1.8809 (1.1768) acc 53.1250 (70.8951) lr 1.9511e-03 eta 18:51:38 +epoch [7/50] batch [595/1000] time 1.557 (1.564) data 0.000 (0.002) loss 1.4893 (1.1760) acc 68.7500 (70.9034) lr 1.9511e-03 eta 18:51:27 +epoch [7/50] batch [600/1000] time 1.566 (1.564) data 0.001 (0.002) loss 1.2861 (1.1766) acc 59.3750 (70.8698) lr 1.9511e-03 eta 18:51:26 +epoch [7/50] batch [605/1000] time 1.555 (1.564) data 0.001 (0.002) loss 0.6343 (1.1737) acc 78.1250 (70.9298) lr 1.9511e-03 eta 18:51:15 +epoch [7/50] batch [610/1000] time 1.570 (1.564) data 0.000 (0.002) loss 0.8105 (1.1732) acc 78.1250 (70.9221) lr 1.9511e-03 eta 18:51:08 +epoch [7/50] batch [615/1000] time 1.546 (1.564) data 0.001 (0.002) loss 1.0967 (1.1741) acc 78.1250 (70.9299) lr 1.9511e-03 eta 18:50:59 +epoch [7/50] batch [620/1000] time 1.569 (1.564) data 0.000 (0.002) loss 0.7446 (1.1729) acc 84.3750 (70.9627) lr 1.9511e-03 eta 18:50:49 +epoch [7/50] batch [625/1000] time 1.578 (1.564) data 0.000 (0.002) loss 1.2100 (1.1748) acc 68.7500 (70.9150) lr 1.9511e-03 eta 18:50:38 +epoch [7/50] batch [630/1000] time 1.574 (1.564) data 0.000 (0.002) loss 1.4639 (1.1765) acc 65.6250 (70.8780) lr 1.9511e-03 eta 18:50:32 +epoch [7/50] batch [635/1000] time 1.533 (1.564) data 0.000 (0.002) loss 1.3428 (1.1784) acc 75.0000 (70.8907) lr 1.9511e-03 eta 18:50:19 +epoch [7/50] batch [640/1000] time 1.552 (1.564) data 0.001 (0.002) loss 1.3516 (1.1820) acc 65.6250 (70.8301) lr 1.9511e-03 eta 18:50:09 +epoch [7/50] batch [645/1000] time 1.578 (1.564) data 0.001 (0.002) loss 0.9771 (1.1829) acc 84.3750 (70.8479) lr 1.9511e-03 eta 18:50:02 +epoch [7/50] batch [650/1000] time 1.566 (1.564) data 0.000 (0.002) loss 1.1670 (1.1815) acc 68.7500 (70.8942) lr 1.9511e-03 eta 18:49:52 +epoch [7/50] batch [655/1000] time 1.563 (1.564) data 0.000 (0.002) loss 1.5361 (1.1832) acc 62.5000 (70.8540) lr 1.9511e-03 eta 18:49:48 +epoch [7/50] batch [660/1000] time 1.565 (1.564) data 0.000 (0.002) loss 1.3594 (1.1823) acc 65.6250 (70.8902) lr 1.9511e-03 eta 18:49:41 +epoch [7/50] batch [665/1000] time 1.555 (1.564) data 0.001 (0.002) loss 1.2148 (1.1821) acc 65.6250 (70.8600) lr 1.9511e-03 eta 18:49:41 +epoch [7/50] batch [670/1000] time 1.544 (1.564) data 0.000 (0.002) loss 1.1279 (1.1822) acc 65.6250 (70.8535) lr 1.9511e-03 eta 18:49:36 +epoch [7/50] batch [675/1000] time 1.534 (1.564) data 0.000 (0.002) loss 1.9805 (1.1824) acc 43.7500 (70.8380) lr 1.9511e-03 eta 18:49:24 +epoch [7/50] batch [680/1000] time 1.554 (1.564) data 0.000 (0.002) loss 0.9136 (1.1833) acc 75.0000 (70.8318) lr 1.9511e-03 eta 18:49:12 +epoch [7/50] batch [685/1000] time 1.567 (1.564) data 0.000 (0.002) loss 0.8389 (1.1837) acc 75.0000 (70.8212) lr 1.9511e-03 eta 18:49:02 +epoch [7/50] batch [690/1000] time 1.544 (1.564) data 0.000 (0.002) loss 0.9146 (1.1830) acc 75.0000 (70.8379) lr 1.9511e-03 eta 18:48:55 +epoch [7/50] batch [695/1000] time 1.554 (1.564) data 0.001 (0.002) loss 1.2676 (1.1865) acc 71.8750 (70.7779) lr 1.9511e-03 eta 18:48:45 +epoch [7/50] batch [700/1000] time 1.550 (1.564) data 0.000 (0.002) loss 0.8579 (1.1858) acc 75.0000 (70.7857) lr 1.9511e-03 eta 18:48:34 +epoch [7/50] batch [705/1000] time 1.558 (1.564) data 0.000 (0.002) loss 0.7070 (1.1861) acc 78.1250 (70.7846) lr 1.9511e-03 eta 18:48:25 +epoch [7/50] batch [710/1000] time 1.564 (1.564) data 0.001 (0.002) loss 1.1602 (1.1866) acc 68.7500 (70.7746) lr 1.9511e-03 eta 18:48:25 +epoch [7/50] batch [715/1000] time 1.562 (1.564) data 0.000 (0.002) loss 1.1592 (1.1883) acc 78.1250 (70.7212) lr 1.9511e-03 eta 18:48:19 +epoch [7/50] batch [720/1000] time 1.564 (1.564) data 0.000 (0.002) loss 1.6738 (1.1893) acc 65.6250 (70.6944) lr 1.9511e-03 eta 18:48:13 +epoch [7/50] batch [725/1000] time 1.567 (1.564) data 0.000 (0.002) loss 0.8730 (1.1883) acc 81.2500 (70.7241) lr 1.9511e-03 eta 18:48:03 +epoch [7/50] batch [730/1000] time 1.569 (1.564) data 0.000 (0.002) loss 0.8579 (1.1880) acc 75.0000 (70.7192) lr 1.9511e-03 eta 18:47:56 +epoch [7/50] batch [735/1000] time 1.564 (1.564) data 0.001 (0.002) loss 1.3994 (1.1883) acc 65.6250 (70.7185) lr 1.9511e-03 eta 18:47:51 +epoch [7/50] batch [740/1000] time 1.584 (1.564) data 0.000 (0.002) loss 1.5010 (1.1892) acc 65.6250 (70.6926) lr 1.9511e-03 eta 18:47:44 +epoch [7/50] batch [745/1000] time 1.589 (1.564) data 0.001 (0.002) loss 1.2627 (1.1896) acc 56.2500 (70.6460) lr 1.9511e-03 eta 18:47:38 +epoch [7/50] batch [750/1000] time 1.713 (1.564) data 0.001 (0.002) loss 1.3184 (1.1914) acc 56.2500 (70.6167) lr 1.9511e-03 eta 18:47:39 +epoch [7/50] batch [755/1000] time 1.571 (1.564) data 0.000 (0.002) loss 1.4424 (1.1916) acc 62.5000 (70.6209) lr 1.9511e-03 eta 18:47:31 +epoch [7/50] batch [760/1000] time 1.574 (1.564) data 0.000 (0.002) loss 1.1963 (1.1926) acc 62.5000 (70.5880) lr 1.9511e-03 eta 18:47:23 +epoch [7/50] batch [765/1000] time 1.569 (1.564) data 0.000 (0.002) loss 0.7441 (1.1919) acc 81.2500 (70.6127) lr 1.9511e-03 eta 18:47:14 +epoch [7/50] batch [770/1000] time 1.560 (1.564) data 0.001 (0.002) loss 1.2607 (1.1925) acc 81.2500 (70.6006) lr 1.9511e-03 eta 18:47:03 +epoch [7/50] batch [775/1000] time 1.537 (1.564) data 0.001 (0.002) loss 0.8291 (1.1915) acc 81.2500 (70.6169) lr 1.9511e-03 eta 18:46:55 +epoch [7/50] batch [780/1000] time 1.564 (1.564) data 0.001 (0.002) loss 0.8823 (1.1913) acc 71.8750 (70.6050) lr 1.9511e-03 eta 18:46:50 +epoch [7/50] batch [785/1000] time 1.562 (1.564) data 0.001 (0.002) loss 1.2178 (1.1917) acc 78.1250 (70.6131) lr 1.9511e-03 eta 18:46:41 +epoch [7/50] batch [790/1000] time 1.565 (1.564) data 0.001 (0.002) loss 0.9883 (1.1916) acc 65.6250 (70.5973) lr 1.9511e-03 eta 18:46:34 +epoch [7/50] batch [795/1000] time 1.543 (1.564) data 0.000 (0.002) loss 1.1758 (1.1914) acc 62.5000 (70.6093) lr 1.9511e-03 eta 18:46:23 +epoch [7/50] batch [800/1000] time 1.557 (1.564) data 0.001 (0.002) loss 1.0205 (1.1903) acc 68.7500 (70.6328) lr 1.9511e-03 eta 18:46:11 +epoch [7/50] batch [805/1000] time 1.557 (1.564) data 0.000 (0.002) loss 1.3682 (1.1903) acc 62.5000 (70.6444) lr 1.9511e-03 eta 18:46:00 +epoch [7/50] batch [810/1000] time 1.566 (1.564) data 0.000 (0.002) loss 1.1562 (1.1901) acc 71.8750 (70.6674) lr 1.9511e-03 eta 18:45:54 +epoch [7/50] batch [815/1000] time 1.576 (1.564) data 0.001 (0.002) loss 1.0469 (1.1889) acc 71.8750 (70.6902) lr 1.9511e-03 eta 18:45:54 +epoch [7/50] batch [820/1000] time 1.569 (1.564) data 0.000 (0.002) loss 1.5117 (1.1885) acc 68.7500 (70.7050) lr 1.9511e-03 eta 18:45:44 +epoch [7/50] batch [825/1000] time 1.533 (1.564) data 0.000 (0.002) loss 1.5986 (1.1889) acc 56.2500 (70.6894) lr 1.9511e-03 eta 18:45:34 +epoch [7/50] batch [830/1000] time 1.539 (1.564) data 0.001 (0.002) loss 1.0410 (1.1884) acc 71.8750 (70.7078) lr 1.9511e-03 eta 18:45:23 +epoch [7/50] batch [835/1000] time 1.578 (1.564) data 0.000 (0.002) loss 1.2178 (1.1885) acc 65.6250 (70.6924) lr 1.9511e-03 eta 18:45:14 +epoch [7/50] batch [840/1000] time 1.555 (1.564) data 0.001 (0.002) loss 1.0811 (1.1888) acc 68.7500 (70.6734) lr 1.9511e-03 eta 18:45:02 +epoch [7/50] batch [845/1000] time 1.579 (1.564) data 0.000 (0.002) loss 1.3066 (1.1887) acc 62.5000 (70.6768) lr 1.9511e-03 eta 18:44:52 +epoch [7/50] batch [850/1000] time 1.575 (1.564) data 0.001 (0.002) loss 1.0732 (1.1892) acc 75.0000 (70.6654) lr 1.9511e-03 eta 18:44:44 +epoch [7/50] batch [855/1000] time 1.560 (1.564) data 0.000 (0.002) loss 1.0869 (1.1889) acc 71.8750 (70.6762) lr 1.9511e-03 eta 18:44:36 +epoch [7/50] batch [860/1000] time 1.542 (1.564) data 0.001 (0.002) loss 1.1230 (1.1882) acc 75.0000 (70.6868) lr 1.9511e-03 eta 18:44:36 +epoch [7/50] batch [865/1000] time 1.554 (1.564) data 0.000 (0.002) loss 0.9434 (1.1878) acc 71.8750 (70.6936) lr 1.9511e-03 eta 18:44:26 +epoch [7/50] batch [870/1000] time 1.584 (1.564) data 0.001 (0.002) loss 0.7319 (1.1870) acc 75.0000 (70.7004) lr 1.9511e-03 eta 18:44:21 +epoch [7/50] batch [875/1000] time 1.533 (1.564) data 0.000 (0.002) loss 0.5640 (1.1865) acc 84.3750 (70.7000) lr 1.9511e-03 eta 18:44:10 +epoch [7/50] batch [880/1000] time 1.547 (1.564) data 0.000 (0.002) loss 1.0879 (1.1858) acc 71.8750 (70.7102) lr 1.9511e-03 eta 18:43:58 +epoch [7/50] batch [885/1000] time 1.552 (1.564) data 0.000 (0.002) loss 1.8398 (1.1872) acc 62.5000 (70.6815) lr 1.9511e-03 eta 18:43:48 +epoch [7/50] batch [890/1000] time 1.557 (1.564) data 0.001 (0.002) loss 1.1758 (1.1890) acc 71.8750 (70.6390) lr 1.9511e-03 eta 18:43:37 +epoch [7/50] batch [895/1000] time 1.556 (1.564) data 0.001 (0.002) loss 0.9194 (1.1888) acc 78.1250 (70.6320) lr 1.9511e-03 eta 18:43:26 +epoch [7/50] batch [900/1000] time 1.560 (1.564) data 0.000 (0.002) loss 0.8374 (1.1879) acc 78.1250 (70.6285) lr 1.9511e-03 eta 18:43:15 +epoch [7/50] batch [905/1000] time 1.543 (1.564) data 0.000 (0.002) loss 0.6328 (1.1862) acc 87.5000 (70.6733) lr 1.9511e-03 eta 18:43:14 +epoch [7/50] batch [910/1000] time 1.561 (1.564) data 0.000 (0.002) loss 1.2803 (1.1851) acc 65.6250 (70.6696) lr 1.9511e-03 eta 18:43:06 +epoch [7/50] batch [915/1000] time 1.580 (1.564) data 0.001 (0.002) loss 0.9878 (1.1859) acc 68.7500 (70.6626) lr 1.9511e-03 eta 18:42:58 +epoch [7/50] batch [920/1000] time 1.560 (1.564) data 0.000 (0.001) loss 1.2061 (1.1853) acc 65.6250 (70.6658) lr 1.9511e-03 eta 18:42:49 +epoch [7/50] batch [925/1000] time 1.541 (1.564) data 0.000 (0.001) loss 0.9170 (1.1844) acc 78.1250 (70.6892) lr 1.9511e-03 eta 18:42:36 +epoch [7/50] batch [930/1000] time 1.554 (1.564) data 0.000 (0.001) loss 0.6084 (1.1840) acc 87.5000 (70.6754) lr 1.9511e-03 eta 18:42:28 +epoch [7/50] batch [935/1000] time 1.568 (1.564) data 0.000 (0.001) loss 1.3369 (1.1850) acc 71.8750 (70.6651) lr 1.9511e-03 eta 18:42:23 +epoch [7/50] batch [940/1000] time 1.559 (1.564) data 0.000 (0.001) loss 1.0391 (1.1857) acc 62.5000 (70.6616) lr 1.9511e-03 eta 18:42:12 +epoch [7/50] batch [945/1000] time 1.569 (1.564) data 0.000 (0.001) loss 1.5977 (1.1871) acc 78.1250 (70.6548) lr 1.9511e-03 eta 18:42:06 +epoch [7/50] batch [950/1000] time 1.584 (1.564) data 0.000 (0.001) loss 1.3701 (1.1882) acc 65.6250 (70.6316) lr 1.9511e-03 eta 18:42:00 +epoch [7/50] batch [955/1000] time 1.584 (1.564) data 0.001 (0.001) loss 1.1631 (1.1881) acc 65.6250 (70.6512) lr 1.9511e-03 eta 18:41:56 +epoch [7/50] batch [960/1000] time 1.559 (1.564) data 0.001 (0.001) loss 0.8843 (1.1875) acc 75.0000 (70.6413) lr 1.9511e-03 eta 18:41:47 +epoch [7/50] batch [965/1000] time 1.568 (1.564) data 0.000 (0.001) loss 0.9565 (1.1875) acc 78.1250 (70.6412) lr 1.9511e-03 eta 18:41:45 +epoch [7/50] batch [970/1000] time 1.550 (1.564) data 0.001 (0.001) loss 1.6143 (1.1880) acc 65.6250 (70.6347) lr 1.9511e-03 eta 18:41:36 +epoch [7/50] batch [975/1000] time 1.552 (1.564) data 0.000 (0.001) loss 1.1514 (1.1883) acc 65.6250 (70.6314) lr 1.9511e-03 eta 18:41:25 +epoch [7/50] batch [980/1000] time 1.560 (1.564) data 0.001 (0.001) loss 1.5156 (1.1886) acc 62.5000 (70.6250) lr 1.9511e-03 eta 18:41:17 +epoch [7/50] batch [985/1000] time 1.542 (1.564) data 0.001 (0.001) loss 0.7861 (1.1882) acc 78.1250 (70.6345) lr 1.9511e-03 eta 18:41:06 +epoch [7/50] batch [990/1000] time 1.559 (1.564) data 0.000 (0.001) loss 1.1270 (1.1880) acc 59.3750 (70.6250) lr 1.9511e-03 eta 18:40:57 +epoch [7/50] batch [995/1000] time 1.546 (1.564) data 0.000 (0.001) loss 1.5557 (1.1890) acc 59.3750 (70.6093) lr 1.9511e-03 eta 18:40:47 +epoch [7/50] batch [1000/1000] time 1.560 (1.564) data 0.000 (0.001) loss 1.0996 (1.1891) acc 75.0000 (70.5938) lr 1.9298e-03 eta 18:40:38 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,042 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.6% +epoch [8/50] batch [5/1000] time 1.544 (1.687) data 0.000 (0.188) loss 1.3320 (1.1812) acc 65.6250 (67.5000) lr 1.9298e-03 eta 20:09:06 +epoch [8/50] batch [10/1000] time 1.550 (1.623) data 0.000 (0.094) loss 0.7056 (1.0885) acc 81.2500 (71.2500) lr 1.9298e-03 eta 19:22:55 +epoch [8/50] batch [15/1000] time 1.537 (1.599) data 0.000 (0.063) loss 1.3496 (1.1596) acc 65.6250 (70.6250) lr 1.9298e-03 eta 19:05:44 +epoch [8/50] batch [20/1000] time 1.563 (1.590) data 0.001 (0.047) loss 1.1045 (1.1479) acc 62.5000 (70.0000) lr 1.9298e-03 eta 18:58:55 +epoch [8/50] batch [25/1000] time 1.557 (1.584) data 0.000 (0.038) loss 1.5352 (1.1564) acc 62.5000 (69.8750) lr 1.9298e-03 eta 18:54:34 +epoch [8/50] batch [30/1000] time 1.550 (1.581) data 0.000 (0.032) loss 1.2344 (1.1777) acc 75.0000 (70.2083) lr 1.9298e-03 eta 18:52:04 +epoch [8/50] batch [35/1000] time 1.548 (1.585) data 0.000 (0.027) loss 1.0156 (1.1605) acc 68.7500 (70.5357) lr 1.9298e-03 eta 18:55:13 +epoch [8/50] batch [40/1000] time 1.602 (1.583) data 0.000 (0.024) loss 1.3320 (1.1792) acc 68.7500 (70.3906) lr 1.9298e-03 eta 18:53:37 +epoch [8/50] batch [45/1000] time 1.553 (1.581) data 0.000 (0.021) loss 1.3877 (1.1714) acc 62.5000 (70.6250) lr 1.9298e-03 eta 18:51:52 +epoch [8/50] batch [50/1000] time 1.565 (1.580) data 0.001 (0.019) loss 1.1885 (1.1823) acc 75.0000 (70.3750) lr 1.9298e-03 eta 18:51:09 +epoch [8/50] batch [55/1000] time 1.525 (1.578) data 0.000 (0.017) loss 1.1738 (1.1891) acc 75.0000 (70.2841) lr 1.9298e-03 eta 18:49:19 +epoch [8/50] batch [60/1000] time 1.569 (1.576) data 0.000 (0.016) loss 0.6143 (1.1886) acc 84.3750 (70.4167) lr 1.9298e-03 eta 18:47:41 +epoch [8/50] batch [65/1000] time 1.575 (1.575) data 0.001 (0.015) loss 0.7515 (1.2060) acc 71.8750 (70.1442) lr 1.9298e-03 eta 18:47:22 +epoch [8/50] batch [70/1000] time 1.544 (1.574) data 0.000 (0.014) loss 0.9399 (1.1957) acc 81.2500 (70.1786) lr 1.9298e-03 eta 18:46:13 +epoch [8/50] batch [75/1000] time 1.725 (1.575) data 0.001 (0.013) loss 0.9863 (1.1894) acc 75.0000 (70.2083) lr 1.9298e-03 eta 18:47:06 +epoch [8/50] batch [80/1000] time 1.559 (1.575) data 0.001 (0.012) loss 0.6470 (1.1803) acc 87.5000 (70.3516) lr 1.9298e-03 eta 18:46:32 +epoch [8/50] batch [85/1000] time 1.575 (1.574) data 0.000 (0.012) loss 1.3125 (1.1889) acc 71.8750 (70.2206) lr 1.9298e-03 eta 18:45:57 +epoch [8/50] batch [90/1000] time 1.562 (1.574) data 0.000 (0.011) loss 1.7363 (1.1957) acc 68.7500 (70.2083) lr 1.9298e-03 eta 18:45:48 +epoch [8/50] batch [95/1000] time 1.568 (1.574) data 0.000 (0.010) loss 1.0127 (1.1930) acc 75.0000 (70.2303) lr 1.9298e-03 eta 18:45:24 +epoch [8/50] batch [100/1000] time 1.573 (1.574) data 0.000 (0.010) loss 1.5576 (1.2130) acc 56.2500 (69.9062) lr 1.9298e-03 eta 18:45:05 +epoch [8/50] batch [105/1000] time 1.548 (1.572) data 0.001 (0.009) loss 1.3672 (1.2107) acc 68.7500 (70.0000) lr 1.9298e-03 eta 18:44:07 +epoch [8/50] batch [110/1000] time 1.538 (1.571) data 0.001 (0.009) loss 1.1992 (1.2132) acc 65.6250 (69.8295) lr 1.9298e-03 eta 18:43:09 +epoch [8/50] batch [115/1000] time 1.571 (1.571) data 0.000 (0.009) loss 0.9858 (1.2111) acc 65.6250 (69.8913) lr 1.9298e-03 eta 18:42:38 +epoch [8/50] batch [120/1000] time 1.529 (1.570) data 0.000 (0.008) loss 1.4385 (1.2044) acc 71.8750 (70.0260) lr 1.9298e-03 eta 18:41:50 +epoch [8/50] batch [125/1000] time 1.551 (1.569) data 0.000 (0.008) loss 0.9673 (1.1979) acc 78.1250 (70.1250) lr 1.9298e-03 eta 18:41:09 +epoch [8/50] batch [130/1000] time 1.557 (1.569) data 0.000 (0.008) loss 1.6006 (1.2054) acc 62.5000 (69.9760) lr 1.9298e-03 eta 18:40:45 +epoch [8/50] batch [135/1000] time 1.534 (1.568) data 0.000 (0.007) loss 1.2275 (1.2070) acc 65.6250 (69.8611) lr 1.9298e-03 eta 18:40:02 +epoch [8/50] batch [140/1000] time 1.556 (1.568) data 0.000 (0.007) loss 1.6064 (1.2089) acc 62.5000 (69.7545) lr 1.9298e-03 eta 18:40:12 +epoch [8/50] batch [145/1000] time 1.552 (1.568) data 0.000 (0.007) loss 1.1514 (1.2109) acc 62.5000 (69.6121) lr 1.9298e-03 eta 18:39:43 +epoch [8/50] batch [150/1000] time 1.567 (1.568) data 0.001 (0.007) loss 1.2051 (1.2137) acc 62.5000 (69.6042) lr 1.9298e-03 eta 18:39:28 +epoch [8/50] batch [155/1000] time 1.533 (1.567) data 0.000 (0.007) loss 0.9434 (1.2140) acc 78.1250 (69.6976) lr 1.9298e-03 eta 18:39:11 +epoch [8/50] batch [160/1000] time 1.544 (1.567) data 0.000 (0.006) loss 1.7197 (1.2211) acc 59.3750 (69.6094) lr 1.9298e-03 eta 18:38:35 +epoch [8/50] batch [165/1000] time 1.554 (1.566) data 0.000 (0.006) loss 1.0303 (1.2163) acc 84.3750 (69.6591) lr 1.9298e-03 eta 18:38:09 +epoch [8/50] batch [170/1000] time 1.551 (1.566) data 0.000 (0.006) loss 1.3594 (1.2162) acc 59.3750 (69.4853) lr 1.9298e-03 eta 18:37:55 +epoch [8/50] batch [175/1000] time 1.544 (1.566) data 0.000 (0.006) loss 1.3428 (1.2152) acc 71.8750 (69.5536) lr 1.9298e-03 eta 18:37:34 +epoch [8/50] batch [180/1000] time 1.571 (1.566) data 0.000 (0.006) loss 1.8105 (1.2206) acc 59.3750 (69.5660) lr 1.9298e-03 eta 18:37:26 +epoch [8/50] batch [185/1000] time 1.544 (1.567) data 0.000 (0.006) loss 0.6704 (1.2117) acc 81.2500 (69.6959) lr 1.9298e-03 eta 18:37:49 +epoch [8/50] batch [190/1000] time 1.555 (1.566) data 0.001 (0.005) loss 1.2490 (1.2116) acc 59.3750 (69.6217) lr 1.9298e-03 eta 18:37:30 +epoch [8/50] batch [195/1000] time 1.557 (1.566) data 0.000 (0.005) loss 0.9282 (1.2059) acc 71.8750 (69.7276) lr 1.9298e-03 eta 18:37:06 +epoch [8/50] batch [200/1000] time 1.556 (1.566) data 0.000 (0.005) loss 0.9761 (1.2060) acc 71.8750 (69.6406) lr 1.9298e-03 eta 18:36:47 +epoch [8/50] batch [205/1000] time 1.555 (1.565) data 0.001 (0.005) loss 0.7979 (1.2000) acc 75.0000 (69.6951) lr 1.9298e-03 eta 18:36:28 +epoch [8/50] batch [210/1000] time 1.526 (1.565) data 0.000 (0.005) loss 1.5371 (1.2027) acc 71.8750 (69.7024) lr 1.9298e-03 eta 18:35:56 +epoch [8/50] batch [215/1000] time 1.561 (1.565) data 0.000 (0.005) loss 0.6274 (1.2019) acc 81.2500 (69.6802) lr 1.9298e-03 eta 18:35:52 +epoch [8/50] batch [220/1000] time 1.527 (1.565) data 0.001 (0.005) loss 1.5830 (1.2088) acc 75.0000 (69.5739) lr 1.9298e-03 eta 18:35:46 +epoch [8/50] batch [225/1000] time 1.547 (1.565) data 0.001 (0.005) loss 0.9619 (1.2059) acc 68.7500 (69.5417) lr 1.9298e-03 eta 18:35:27 +epoch [8/50] batch [230/1000] time 1.537 (1.565) data 0.000 (0.005) loss 0.8447 (1.2052) acc 78.1250 (69.5788) lr 1.9298e-03 eta 18:35:32 +epoch [8/50] batch [235/1000] time 1.556 (1.565) data 0.001 (0.004) loss 0.9009 (1.2013) acc 71.8750 (69.6277) lr 1.9298e-03 eta 18:35:22 +epoch [8/50] batch [240/1000] time 1.548 (1.565) data 0.000 (0.004) loss 0.3743 (1.1943) acc 90.6250 (69.8177) lr 1.9298e-03 eta 18:35:11 +epoch [8/50] batch [245/1000] time 1.546 (1.565) data 0.001 (0.004) loss 1.2139 (1.2019) acc 71.8750 (69.7066) lr 1.9298e-03 eta 18:34:51 +epoch [8/50] batch [250/1000] time 1.561 (1.565) data 0.001 (0.004) loss 0.7314 (1.2024) acc 84.3750 (69.7250) lr 1.9298e-03 eta 18:34:44 +epoch [8/50] batch [255/1000] time 1.576 (1.564) data 0.000 (0.004) loss 1.0078 (1.2024) acc 71.8750 (69.7304) lr 1.9298e-03 eta 18:34:31 +epoch [8/50] batch [260/1000] time 1.536 (1.564) data 0.000 (0.004) loss 1.0625 (1.1982) acc 75.0000 (69.8197) lr 1.9298e-03 eta 18:34:12 +epoch [8/50] batch [265/1000] time 1.547 (1.564) data 0.000 (0.004) loss 1.7139 (1.1982) acc 56.2500 (69.7524) lr 1.9298e-03 eta 18:34:01 +epoch [8/50] batch [270/1000] time 1.554 (1.564) data 0.001 (0.004) loss 1.1543 (1.1962) acc 81.2500 (69.8148) lr 1.9298e-03 eta 18:33:48 +epoch [8/50] batch [275/1000] time 1.536 (1.564) data 0.001 (0.004) loss 0.8159 (1.1899) acc 84.3750 (69.9659) lr 1.9298e-03 eta 18:33:33 +epoch [8/50] batch [280/1000] time 1.538 (1.564) data 0.001 (0.004) loss 0.7812 (1.1846) acc 78.1250 (70.0223) lr 1.9298e-03 eta 18:33:25 +epoch [8/50] batch [285/1000] time 1.545 (1.564) data 0.001 (0.004) loss 1.8027 (1.1815) acc 71.8750 (70.1096) lr 1.9298e-03 eta 18:33:11 +epoch [8/50] batch [290/1000] time 1.558 (1.564) data 0.000 (0.004) loss 0.8892 (1.1831) acc 75.0000 (70.0754) lr 1.9298e-03 eta 18:33:18 +epoch [8/50] batch [295/1000] time 1.568 (1.564) data 0.000 (0.004) loss 1.4746 (1.1857) acc 65.6250 (70.0424) lr 1.9298e-03 eta 18:32:59 +epoch [8/50] batch [300/1000] time 1.554 (1.564) data 0.000 (0.004) loss 0.9312 (1.1820) acc 71.8750 (70.0729) lr 1.9298e-03 eta 18:32:51 +epoch [8/50] batch [305/1000] time 1.536 (1.563) data 0.001 (0.004) loss 1.4482 (1.1799) acc 65.6250 (70.1742) lr 1.9298e-03 eta 18:32:30 +epoch [8/50] batch [310/1000] time 1.561 (1.563) data 0.000 (0.003) loss 1.6562 (1.1853) acc 53.1250 (70.1008) lr 1.9298e-03 eta 18:32:16 +epoch [8/50] batch [315/1000] time 1.549 (1.563) data 0.000 (0.003) loss 1.3584 (1.1841) acc 75.0000 (70.0992) lr 1.9298e-03 eta 18:32:01 +epoch [8/50] batch [320/1000] time 1.526 (1.563) data 0.001 (0.003) loss 1.2568 (1.1848) acc 59.3750 (70.0781) lr 1.9298e-03 eta 18:31:44 +epoch [8/50] batch [325/1000] time 1.550 (1.563) data 0.000 (0.003) loss 1.1357 (1.1858) acc 75.0000 (70.0769) lr 1.9298e-03 eta 18:31:27 +epoch [8/50] batch [330/1000] time 1.558 (1.563) data 0.001 (0.003) loss 1.4736 (1.1852) acc 59.3750 (70.1042) lr 1.9298e-03 eta 18:31:23 +epoch [8/50] batch [335/1000] time 1.585 (1.563) data 0.000 (0.003) loss 1.1191 (1.1844) acc 71.8750 (70.1026) lr 1.9298e-03 eta 18:31:33 +epoch [8/50] batch [340/1000] time 1.554 (1.563) data 0.001 (0.003) loss 0.7993 (1.1840) acc 78.1250 (70.1011) lr 1.9298e-03 eta 18:31:18 +epoch [8/50] batch [345/1000] time 1.539 (1.563) data 0.000 (0.003) loss 0.9453 (1.1858) acc 81.2500 (70.0906) lr 1.9298e-03 eta 18:31:07 +epoch [8/50] batch [350/1000] time 1.584 (1.563) data 0.000 (0.003) loss 1.1406 (1.1850) acc 65.6250 (70.1339) lr 1.9298e-03 eta 18:31:03 +epoch [8/50] batch [355/1000] time 1.563 (1.563) data 0.001 (0.003) loss 0.9067 (1.1832) acc 75.0000 (70.1232) lr 1.9298e-03 eta 18:30:55 +epoch [8/50] batch [360/1000] time 1.564 (1.563) data 0.001 (0.003) loss 1.6201 (1.1821) acc 68.7500 (70.1649) lr 1.9298e-03 eta 18:30:49 +epoch [8/50] batch [365/1000] time 1.541 (1.563) data 0.001 (0.003) loss 2.0195 (1.1841) acc 62.5000 (70.1541) lr 1.9298e-03 eta 18:30:32 +epoch [8/50] batch [370/1000] time 1.580 (1.563) data 0.000 (0.003) loss 0.9097 (1.1812) acc 71.8750 (70.2027) lr 1.9298e-03 eta 18:30:20 +epoch [8/50] batch [375/1000] time 1.562 (1.563) data 0.001 (0.003) loss 1.2568 (1.1799) acc 71.8750 (70.2250) lr 1.9298e-03 eta 18:30:10 +epoch [8/50] batch [380/1000] time 1.539 (1.563) data 0.000 (0.003) loss 1.3701 (1.1807) acc 65.6250 (70.2549) lr 1.9298e-03 eta 18:30:13 +epoch [8/50] batch [385/1000] time 1.567 (1.563) data 0.001 (0.003) loss 1.4170 (1.1806) acc 65.6250 (70.2354) lr 1.9298e-03 eta 18:30:03 +epoch [8/50] batch [390/1000] time 1.576 (1.563) data 0.001 (0.003) loss 1.3945 (1.1805) acc 68.7500 (70.1923) lr 1.9298e-03 eta 18:29:51 +epoch [8/50] batch [395/1000] time 1.576 (1.563) data 0.001 (0.003) loss 1.0020 (1.1780) acc 68.7500 (70.2611) lr 1.9298e-03 eta 18:29:45 +epoch [8/50] batch [400/1000] time 1.591 (1.563) data 0.001 (0.003) loss 1.1895 (1.1787) acc 65.6250 (70.2578) lr 1.9298e-03 eta 18:29:38 +epoch [8/50] batch [405/1000] time 1.560 (1.563) data 0.001 (0.003) loss 1.4971 (1.1801) acc 68.7500 (70.2469) lr 1.9298e-03 eta 18:29:29 +epoch [8/50] batch [410/1000] time 1.552 (1.563) data 0.001 (0.003) loss 1.1230 (1.1788) acc 75.0000 (70.3201) lr 1.9298e-03 eta 18:29:15 +epoch [8/50] batch [415/1000] time 1.595 (1.563) data 0.001 (0.003) loss 1.0293 (1.1768) acc 71.8750 (70.3840) lr 1.9298e-03 eta 18:29:11 +epoch [8/50] batch [420/1000] time 1.564 (1.563) data 0.000 (0.003) loss 1.0889 (1.1778) acc 71.8750 (70.3795) lr 1.9298e-03 eta 18:29:09 +epoch [8/50] batch [425/1000] time 1.554 (1.563) data 0.001 (0.003) loss 0.9087 (1.1789) acc 78.1250 (70.3750) lr 1.9298e-03 eta 18:29:02 +epoch [8/50] batch [430/1000] time 1.569 (1.563) data 0.000 (0.003) loss 0.7441 (1.1776) acc 84.3750 (70.4433) lr 1.9298e-03 eta 18:28:57 +epoch [8/50] batch [435/1000] time 1.580 (1.563) data 0.000 (0.003) loss 1.2490 (1.1799) acc 56.2500 (70.4023) lr 1.9298e-03 eta 18:28:55 +epoch [8/50] batch [440/1000] time 1.707 (1.563) data 0.000 (0.003) loss 1.4414 (1.1808) acc 62.5000 (70.3977) lr 1.9298e-03 eta 18:28:59 +epoch [8/50] batch [445/1000] time 1.542 (1.563) data 0.001 (0.003) loss 0.9717 (1.1805) acc 75.0000 (70.3722) lr 1.9298e-03 eta 18:28:50 +epoch [8/50] batch [450/1000] time 1.587 (1.563) data 0.001 (0.003) loss 0.9312 (1.1843) acc 75.0000 (70.2917) lr 1.9298e-03 eta 18:28:42 +epoch [8/50] batch [455/1000] time 1.555 (1.563) data 0.000 (0.003) loss 0.9277 (1.1816) acc 78.1250 (70.3365) lr 1.9298e-03 eta 18:28:30 +epoch [8/50] batch [460/1000] time 1.549 (1.563) data 0.000 (0.003) loss 1.2441 (1.1785) acc 78.1250 (70.4416) lr 1.9298e-03 eta 18:28:18 +epoch [8/50] batch [465/1000] time 1.550 (1.563) data 0.000 (0.002) loss 1.0420 (1.1772) acc 68.7500 (70.4772) lr 1.9298e-03 eta 18:28:06 +epoch [8/50] batch [470/1000] time 1.584 (1.563) data 0.000 (0.002) loss 0.9277 (1.1758) acc 68.7500 (70.4787) lr 1.9298e-03 eta 18:27:57 +epoch [8/50] batch [475/1000] time 1.540 (1.563) data 0.000 (0.002) loss 0.8525 (1.1753) acc 71.8750 (70.4737) lr 1.9298e-03 eta 18:27:42 +epoch [8/50] batch [480/1000] time 1.547 (1.563) data 0.001 (0.002) loss 1.4775 (1.1778) acc 65.6250 (70.4232) lr 1.9298e-03 eta 18:27:28 +epoch [8/50] batch [485/1000] time 1.690 (1.563) data 0.000 (0.002) loss 1.5840 (1.1786) acc 65.6250 (70.4253) lr 1.9298e-03 eta 18:27:27 +epoch [8/50] batch [490/1000] time 1.575 (1.563) data 0.000 (0.002) loss 1.5527 (1.1806) acc 71.8750 (70.4082) lr 1.9298e-03 eta 18:27:22 +epoch [8/50] batch [495/1000] time 1.545 (1.563) data 0.000 (0.002) loss 1.2549 (1.1810) acc 75.0000 (70.4356) lr 1.9298e-03 eta 18:27:16 +epoch [8/50] batch [500/1000] time 1.547 (1.563) data 0.001 (0.002) loss 0.8652 (1.1816) acc 71.8750 (70.4250) lr 1.9298e-03 eta 18:27:04 +epoch [8/50] batch [505/1000] time 1.546 (1.563) data 0.001 (0.002) loss 0.7427 (1.1791) acc 84.3750 (70.4641) lr 1.9298e-03 eta 18:26:50 +epoch [8/50] batch [510/1000] time 1.549 (1.563) data 0.001 (0.002) loss 1.4814 (1.1808) acc 71.8750 (70.4412) lr 1.9298e-03 eta 18:26:37 +epoch [8/50] batch [515/1000] time 1.561 (1.563) data 0.001 (0.002) loss 0.9985 (1.1784) acc 62.5000 (70.4915) lr 1.9298e-03 eta 18:26:25 +epoch [8/50] batch [520/1000] time 1.576 (1.562) data 0.000 (0.002) loss 0.6968 (1.1771) acc 71.8750 (70.5108) lr 1.9298e-03 eta 18:26:14 +epoch [8/50] batch [525/1000] time 1.570 (1.562) data 0.000 (0.002) loss 1.2090 (1.1796) acc 62.5000 (70.4702) lr 1.9298e-03 eta 18:26:06 +epoch [8/50] batch [530/1000] time 1.595 (1.563) data 0.001 (0.002) loss 0.5049 (1.1777) acc 87.5000 (70.5307) lr 1.9298e-03 eta 18:26:12 +epoch [8/50] batch [535/1000] time 1.548 (1.563) data 0.001 (0.002) loss 1.1074 (1.1785) acc 62.5000 (70.4790) lr 1.9298e-03 eta 18:26:04 +epoch [8/50] batch [540/1000] time 1.540 (1.563) data 0.001 (0.002) loss 1.0586 (1.1801) acc 78.1250 (70.4572) lr 1.9298e-03 eta 18:25:51 +epoch [8/50] batch [545/1000] time 1.543 (1.563) data 0.000 (0.002) loss 0.4468 (1.1778) acc 81.2500 (70.4989) lr 1.9298e-03 eta 18:25:40 +epoch [8/50] batch [550/1000] time 1.544 (1.563) data 0.000 (0.002) loss 1.0215 (1.1778) acc 75.0000 (70.4773) lr 1.9298e-03 eta 18:25:30 +epoch [8/50] batch [555/1000] time 1.557 (1.563) data 0.001 (0.002) loss 1.9268 (1.1795) acc 50.0000 (70.4279) lr 1.9298e-03 eta 18:25:21 +epoch [8/50] batch [560/1000] time 1.576 (1.563) data 0.000 (0.002) loss 1.0059 (1.1794) acc 81.2500 (70.4520) lr 1.9298e-03 eta 18:25:13 +epoch [8/50] batch [565/1000] time 1.594 (1.563) data 0.000 (0.002) loss 1.3047 (1.1799) acc 75.0000 (70.4369) lr 1.9298e-03 eta 18:25:12 +epoch [8/50] batch [570/1000] time 1.555 (1.563) data 0.000 (0.002) loss 0.9097 (1.1795) acc 75.0000 (70.4496) lr 1.9298e-03 eta 18:25:05 +epoch [8/50] batch [575/1000] time 1.550 (1.563) data 0.001 (0.002) loss 0.9487 (1.1794) acc 75.0000 (70.4511) lr 1.9298e-03 eta 18:24:55 +epoch [8/50] batch [580/1000] time 1.553 (1.563) data 0.000 (0.002) loss 0.8848 (1.1778) acc 81.2500 (70.4903) lr 1.9298e-03 eta 18:24:43 +epoch [8/50] batch [585/1000] time 1.544 (1.562) data 0.001 (0.002) loss 1.2666 (1.1788) acc 68.7500 (70.5021) lr 1.9298e-03 eta 18:24:30 +epoch [8/50] batch [590/1000] time 1.554 (1.562) data 0.000 (0.002) loss 1.1426 (1.1786) acc 75.0000 (70.4926) lr 1.9298e-03 eta 18:24:22 +epoch [8/50] batch [595/1000] time 1.564 (1.563) data 0.001 (0.002) loss 0.8823 (1.1797) acc 78.1250 (70.4727) lr 1.9298e-03 eta 18:24:24 +epoch [8/50] batch [600/1000] time 1.548 (1.563) data 0.000 (0.002) loss 0.9199 (1.1796) acc 71.8750 (70.4740) lr 1.9298e-03 eta 18:24:11 +epoch [8/50] batch [605/1000] time 1.561 (1.563) data 0.001 (0.002) loss 0.7759 (1.1774) acc 78.1250 (70.5165) lr 1.9298e-03 eta 18:24:03 +epoch [8/50] batch [610/1000] time 1.549 (1.562) data 0.000 (0.002) loss 0.9604 (1.1769) acc 71.8750 (70.4918) lr 1.9298e-03 eta 18:23:54 +epoch [8/50] batch [615/1000] time 1.535 (1.562) data 0.001 (0.002) loss 0.4668 (1.1758) acc 90.6250 (70.5386) lr 1.9298e-03 eta 18:23:42 +epoch [8/50] batch [620/1000] time 1.562 (1.562) data 0.000 (0.002) loss 1.5889 (1.1760) acc 59.3750 (70.5444) lr 1.9298e-03 eta 18:23:30 +epoch [8/50] batch [625/1000] time 1.581 (1.562) data 0.000 (0.002) loss 1.1611 (1.1769) acc 75.0000 (70.5400) lr 1.9298e-03 eta 18:23:19 +epoch [8/50] batch [630/1000] time 1.571 (1.562) data 0.000 (0.002) loss 1.4180 (1.1765) acc 68.7500 (70.5556) lr 1.9298e-03 eta 18:23:15 +epoch [8/50] batch [635/1000] time 1.561 (1.562) data 0.000 (0.002) loss 0.9658 (1.1774) acc 78.1250 (70.5561) lr 1.9298e-03 eta 18:23:07 +epoch [8/50] batch [640/1000] time 1.561 (1.563) data 0.000 (0.002) loss 1.1387 (1.1776) acc 65.6250 (70.5566) lr 1.9298e-03 eta 18:23:09 +epoch [8/50] batch [645/1000] time 1.561 (1.563) data 0.000 (0.002) loss 1.1348 (1.1764) acc 71.8750 (70.5620) lr 1.9298e-03 eta 18:23:01 +epoch [8/50] batch [650/1000] time 1.550 (1.562) data 0.001 (0.002) loss 0.8032 (1.1758) acc 81.2500 (70.5817) lr 1.9298e-03 eta 18:22:50 +epoch [8/50] batch [655/1000] time 1.555 (1.562) data 0.000 (0.002) loss 0.9932 (1.1745) acc 75.0000 (70.5964) lr 1.9298e-03 eta 18:22:38 +epoch [8/50] batch [660/1000] time 1.559 (1.562) data 0.000 (0.002) loss 1.3652 (1.1754) acc 62.5000 (70.5634) lr 1.9298e-03 eta 18:22:25 +epoch [8/50] batch [665/1000] time 1.559 (1.562) data 0.000 (0.002) loss 1.5000 (1.1758) acc 65.6250 (70.5921) lr 1.9298e-03 eta 18:22:13 +epoch [8/50] batch [670/1000] time 1.531 (1.562) data 0.000 (0.002) loss 0.8672 (1.1751) acc 75.0000 (70.6017) lr 1.9298e-03 eta 18:22:02 +epoch [8/50] batch [675/1000] time 1.580 (1.562) data 0.001 (0.002) loss 0.5918 (1.1731) acc 75.0000 (70.6389) lr 1.9298e-03 eta 18:21:55 +epoch [8/50] batch [680/1000] time 1.537 (1.562) data 0.000 (0.002) loss 1.3682 (1.1740) acc 62.5000 (70.6158) lr 1.9298e-03 eta 18:21:53 +epoch [8/50] batch [685/1000] time 1.556 (1.562) data 0.000 (0.002) loss 1.2139 (1.1744) acc 68.7500 (70.6204) lr 1.9298e-03 eta 18:21:43 +epoch [8/50] batch [690/1000] time 1.530 (1.562) data 0.001 (0.002) loss 1.0342 (1.1743) acc 78.1250 (70.6295) lr 1.9298e-03 eta 18:21:36 +epoch [8/50] batch [695/1000] time 1.574 (1.562) data 0.000 (0.002) loss 0.6772 (1.1740) acc 81.2500 (70.6250) lr 1.9298e-03 eta 18:21:29 +epoch [8/50] batch [700/1000] time 1.557 (1.562) data 0.000 (0.002) loss 0.7168 (1.1736) acc 71.8750 (70.6161) lr 1.9298e-03 eta 18:21:21 +epoch [8/50] batch [705/1000] time 1.536 (1.562) data 0.001 (0.002) loss 1.2861 (1.1734) acc 68.7500 (70.6117) lr 1.9298e-03 eta 18:21:11 +epoch [8/50] batch [710/1000] time 1.541 (1.562) data 0.000 (0.002) loss 0.9927 (1.1738) acc 71.8750 (70.5854) lr 1.9298e-03 eta 18:21:00 +epoch [8/50] batch [715/1000] time 1.559 (1.562) data 0.000 (0.002) loss 0.6807 (1.1727) acc 84.3750 (70.6425) lr 1.9298e-03 eta 18:20:52 +epoch [8/50] batch [720/1000] time 1.567 (1.562) data 0.000 (0.002) loss 1.7051 (1.1734) acc 59.3750 (70.6293) lr 1.9298e-03 eta 18:20:46 +epoch [8/50] batch [725/1000] time 1.557 (1.562) data 0.000 (0.002) loss 0.8389 (1.1744) acc 65.6250 (70.6164) lr 1.9298e-03 eta 18:20:37 +epoch [8/50] batch [730/1000] time 1.546 (1.562) data 0.000 (0.002) loss 1.5732 (1.1759) acc 68.7500 (70.5822) lr 1.9298e-03 eta 18:20:29 +epoch [8/50] batch [735/1000] time 1.589 (1.562) data 0.001 (0.002) loss 1.0723 (1.1764) acc 62.5000 (70.5400) lr 1.9298e-03 eta 18:20:23 +epoch [8/50] batch [740/1000] time 1.560 (1.562) data 0.001 (0.002) loss 0.9526 (1.1787) acc 75.0000 (70.4983) lr 1.9298e-03 eta 18:20:15 +epoch [8/50] batch [745/1000] time 1.561 (1.562) data 0.000 (0.002) loss 1.4121 (1.1789) acc 75.0000 (70.5159) lr 1.9298e-03 eta 18:20:16 +epoch [8/50] batch [750/1000] time 1.562 (1.562) data 0.000 (0.002) loss 0.9731 (1.1783) acc 68.7500 (70.5000) lr 1.9298e-03 eta 18:20:08 +epoch [8/50] batch [755/1000] time 1.566 (1.562) data 0.001 (0.002) loss 1.1123 (1.1798) acc 62.5000 (70.4594) lr 1.9298e-03 eta 18:20:00 +epoch [8/50] batch [760/1000] time 1.526 (1.562) data 0.000 (0.002) loss 1.0078 (1.1781) acc 65.6250 (70.4893) lr 1.9298e-03 eta 18:19:49 +epoch [8/50] batch [765/1000] time 1.554 (1.562) data 0.000 (0.002) loss 1.1484 (1.1782) acc 62.5000 (70.4616) lr 1.9298e-03 eta 18:19:40 +epoch [8/50] batch [770/1000] time 1.587 (1.562) data 0.000 (0.002) loss 0.9385 (1.1777) acc 75.0000 (70.4667) lr 1.9298e-03 eta 18:19:37 +epoch [8/50] batch [775/1000] time 1.587 (1.562) data 0.000 (0.002) loss 1.7920 (1.1772) acc 59.3750 (70.4839) lr 1.9298e-03 eta 18:19:35 +epoch [8/50] batch [780/1000] time 1.556 (1.563) data 0.001 (0.002) loss 1.1279 (1.1759) acc 59.3750 (70.4888) lr 1.9298e-03 eta 18:19:29 +epoch [8/50] batch [785/1000] time 1.541 (1.562) data 0.000 (0.002) loss 0.7905 (1.1754) acc 84.3750 (70.5135) lr 1.9298e-03 eta 18:19:17 +epoch [8/50] batch [790/1000] time 1.573 (1.563) data 0.000 (0.002) loss 1.4082 (1.1754) acc 65.6250 (70.5380) lr 1.9298e-03 eta 18:19:16 +epoch [8/50] batch [795/1000] time 1.551 (1.562) data 0.000 (0.002) loss 1.9385 (1.1765) acc 62.5000 (70.5346) lr 1.9298e-03 eta 18:19:04 +epoch [8/50] batch [800/1000] time 1.589 (1.563) data 0.000 (0.002) loss 1.2793 (1.1773) acc 68.7500 (70.5195) lr 1.9298e-03 eta 18:18:57 +epoch [8/50] batch [805/1000] time 1.545 (1.562) data 0.000 (0.002) loss 1.1104 (1.1773) acc 62.5000 (70.4930) lr 1.9298e-03 eta 18:18:47 +epoch [8/50] batch [810/1000] time 1.559 (1.562) data 0.000 (0.002) loss 1.2500 (1.1773) acc 68.7500 (70.4938) lr 1.9298e-03 eta 18:18:38 +epoch [8/50] batch [815/1000] time 1.542 (1.562) data 0.000 (0.002) loss 0.8198 (1.1761) acc 84.3750 (70.5215) lr 1.9298e-03 eta 18:18:30 +epoch [8/50] batch [820/1000] time 1.590 (1.562) data 0.000 (0.002) loss 1.2422 (1.1760) acc 71.8750 (70.5221) lr 1.9298e-03 eta 18:18:25 +epoch [8/50] batch [825/1000] time 1.534 (1.562) data 0.001 (0.002) loss 1.0010 (1.1749) acc 75.0000 (70.5492) lr 1.9298e-03 eta 18:18:16 +epoch [8/50] batch [830/1000] time 1.702 (1.563) data 0.000 (0.002) loss 0.5137 (1.1735) acc 87.5000 (70.5798) lr 1.9298e-03 eta 18:18:13 +epoch [8/50] batch [835/1000] time 1.561 (1.563) data 0.000 (0.002) loss 1.5693 (1.1757) acc 62.5000 (70.5539) lr 1.9298e-03 eta 18:18:07 +epoch [8/50] batch [840/1000] time 1.560 (1.563) data 0.000 (0.002) loss 1.0469 (1.1753) acc 71.8750 (70.5766) lr 1.9298e-03 eta 18:17:58 +epoch [8/50] batch [845/1000] time 1.556 (1.563) data 0.000 (0.002) loss 1.5332 (1.1766) acc 62.5000 (70.5362) lr 1.9298e-03 eta 18:17:50 +epoch [8/50] batch [850/1000] time 1.565 (1.563) data 0.000 (0.002) loss 1.4209 (1.1760) acc 65.6250 (70.5625) lr 1.9298e-03 eta 18:17:42 +epoch [8/50] batch [855/1000] time 1.554 (1.563) data 0.000 (0.002) loss 0.8384 (1.1758) acc 87.5000 (70.5629) lr 1.9298e-03 eta 18:17:33 +epoch [8/50] batch [860/1000] time 1.534 (1.562) data 0.001 (0.002) loss 1.4697 (1.1762) acc 59.3750 (70.5378) lr 1.9298e-03 eta 18:17:23 +epoch [8/50] batch [865/1000] time 1.542 (1.562) data 0.000 (0.002) loss 0.5859 (1.1754) acc 71.8750 (70.5419) lr 1.9298e-03 eta 18:17:11 +epoch [8/50] batch [870/1000] time 1.565 (1.562) data 0.000 (0.002) loss 1.1973 (1.1750) acc 62.5000 (70.5352) lr 1.9298e-03 eta 18:17:01 +epoch [8/50] batch [875/1000] time 1.574 (1.562) data 0.000 (0.002) loss 1.2910 (1.1748) acc 62.5000 (70.5179) lr 1.9298e-03 eta 18:16:54 +epoch [8/50] batch [880/1000] time 1.536 (1.562) data 0.000 (0.002) loss 1.5078 (1.1755) acc 59.3750 (70.5043) lr 1.9298e-03 eta 18:16:46 +epoch [8/50] batch [885/1000] time 1.556 (1.562) data 0.000 (0.002) loss 1.2256 (1.1760) acc 68.7500 (70.4873) lr 1.9298e-03 eta 18:16:36 +epoch [8/50] batch [890/1000] time 1.550 (1.562) data 0.000 (0.002) loss 1.5068 (1.1756) acc 59.3750 (70.5056) lr 1.9298e-03 eta 18:16:28 +epoch [8/50] batch [895/1000] time 1.560 (1.563) data 0.001 (0.002) loss 1.4619 (1.1756) acc 75.0000 (70.5237) lr 1.9298e-03 eta 18:16:29 +epoch [8/50] batch [900/1000] time 1.542 (1.562) data 0.001 (0.002) loss 0.9448 (1.1761) acc 81.2500 (70.5347) lr 1.9298e-03 eta 18:16:18 +epoch [8/50] batch [905/1000] time 1.559 (1.562) data 0.000 (0.002) loss 1.4932 (1.1763) acc 62.5000 (70.5456) lr 1.9298e-03 eta 18:16:13 +epoch [8/50] batch [910/1000] time 1.551 (1.563) data 0.001 (0.001) loss 1.4150 (1.1762) acc 62.5000 (70.5632) lr 1.9298e-03 eta 18:16:06 +epoch [8/50] batch [915/1000] time 1.560 (1.563) data 0.000 (0.001) loss 1.4883 (1.1765) acc 68.7500 (70.5430) lr 1.9298e-03 eta 18:15:59 +epoch [8/50] batch [920/1000] time 1.540 (1.563) data 0.000 (0.001) loss 1.2500 (1.1765) acc 59.3750 (70.5265) lr 1.9298e-03 eta 18:15:50 +epoch [8/50] batch [925/1000] time 1.567 (1.562) data 0.000 (0.001) loss 1.0693 (1.1763) acc 71.8750 (70.5236) lr 1.9298e-03 eta 18:15:40 +epoch [8/50] batch [930/1000] time 1.562 (1.562) data 0.001 (0.001) loss 1.1328 (1.1776) acc 78.1250 (70.5309) lr 1.9298e-03 eta 18:15:32 +epoch [8/50] batch [935/1000] time 1.555 (1.562) data 0.001 (0.001) loss 2.1523 (1.1778) acc 53.1250 (70.5348) lr 1.9298e-03 eta 18:15:25 +epoch [8/50] batch [940/1000] time 1.578 (1.563) data 0.000 (0.001) loss 0.7563 (1.1769) acc 81.2500 (70.5452) lr 1.9298e-03 eta 18:15:25 +epoch [8/50] batch [945/1000] time 1.553 (1.563) data 0.000 (0.001) loss 1.1963 (1.1773) acc 75.0000 (70.5522) lr 1.9298e-03 eta 18:15:18 +epoch [8/50] batch [950/1000] time 1.573 (1.563) data 0.000 (0.001) loss 1.3721 (1.1781) acc 59.3750 (70.5428) lr 1.9298e-03 eta 18:15:08 +epoch [8/50] batch [955/1000] time 1.539 (1.563) data 0.000 (0.001) loss 1.4453 (1.1797) acc 71.8750 (70.5007) lr 1.9298e-03 eta 18:15:00 +epoch [8/50] batch [960/1000] time 1.559 (1.563) data 0.000 (0.001) loss 0.7344 (1.1786) acc 75.0000 (70.5176) lr 1.9298e-03 eta 18:14:52 +epoch [8/50] batch [965/1000] time 1.543 (1.563) data 0.000 (0.001) loss 1.4082 (1.1796) acc 62.5000 (70.4987) lr 1.9298e-03 eta 18:14:41 +epoch [8/50] batch [970/1000] time 1.570 (1.562) data 0.000 (0.001) loss 1.4395 (1.1805) acc 78.1250 (70.4800) lr 1.9298e-03 eta 18:14:31 +epoch [8/50] batch [975/1000] time 1.571 (1.562) data 0.000 (0.001) loss 1.4600 (1.1807) acc 62.5000 (70.4712) lr 1.9298e-03 eta 18:14:22 +epoch [8/50] batch [980/1000] time 1.559 (1.562) data 0.000 (0.001) loss 1.3486 (1.1810) acc 71.8750 (70.4783) lr 1.9298e-03 eta 18:14:15 +epoch [8/50] batch [985/1000] time 1.587 (1.563) data 0.001 (0.001) loss 0.9312 (1.1815) acc 71.8750 (70.4854) lr 1.9298e-03 eta 18:14:16 +epoch [8/50] batch [990/1000] time 1.555 (1.563) data 0.000 (0.001) loss 1.5342 (1.1837) acc 53.1250 (70.4388) lr 1.9298e-03 eta 18:14:08 +epoch [8/50] batch [995/1000] time 1.574 (1.563) data 0.000 (0.001) loss 1.0576 (1.1838) acc 65.6250 (70.4366) lr 1.9298e-03 eta 18:13:56 +epoch [8/50] batch [1000/1000] time 1.568 (1.563) data 0.000 (0.001) loss 1.0010 (1.1849) acc 81.2500 (70.4281) lr 1.9048e-03 eta 18:13:48 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,054 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.6% +epoch [9/50] batch [5/1000] time 1.551 (1.690) data 0.000 (0.181) loss 0.9941 (1.1134) acc 75.0000 (71.8750) lr 1.9048e-03 eta 19:42:50 +epoch [9/50] batch [10/1000] time 1.574 (1.631) data 0.001 (0.091) loss 0.9141 (1.0746) acc 71.8750 (70.9375) lr 1.9048e-03 eta 19:01:25 +epoch [9/50] batch [15/1000] time 1.555 (1.606) data 0.000 (0.061) loss 1.3936 (1.1519) acc 59.3750 (70.8333) lr 1.9048e-03 eta 18:44:06 +epoch [9/50] batch [20/1000] time 1.560 (1.595) data 0.001 (0.046) loss 1.5830 (1.1297) acc 59.3750 (71.2500) lr 1.9048e-03 eta 18:36:10 +epoch [9/50] batch [25/1000] time 1.570 (1.588) data 0.001 (0.037) loss 1.0400 (1.1402) acc 78.1250 (71.3750) lr 1.9048e-03 eta 18:31:17 +epoch [9/50] batch [30/1000] time 1.565 (1.584) data 0.000 (0.031) loss 0.8384 (1.1369) acc 78.1250 (71.0417) lr 1.9048e-03 eta 18:28:14 +epoch [9/50] batch [35/1000] time 1.553 (1.581) data 0.000 (0.026) loss 1.1533 (1.1346) acc 78.1250 (71.6964) lr 1.9048e-03 eta 18:25:47 +epoch [9/50] batch [40/1000] time 1.556 (1.579) data 0.000 (0.023) loss 1.6494 (1.1554) acc 65.6250 (71.8750) lr 1.9048e-03 eta 18:24:17 +epoch [9/50] batch [45/1000] time 1.578 (1.578) data 0.000 (0.021) loss 0.9497 (1.1538) acc 75.0000 (72.0833) lr 1.9048e-03 eta 18:23:24 +epoch [9/50] batch [50/1000] time 1.558 (1.577) data 0.001 (0.019) loss 0.8877 (1.1616) acc 68.7500 (71.6875) lr 1.9048e-03 eta 18:22:14 +epoch [9/50] batch [55/1000] time 1.571 (1.579) data 0.001 (0.017) loss 1.7939 (1.1542) acc 59.3750 (71.7614) lr 1.9048e-03 eta 18:23:56 +epoch [9/50] batch [60/1000] time 1.593 (1.577) data 0.001 (0.016) loss 1.6680 (1.1743) acc 59.3750 (71.4583) lr 1.9048e-03 eta 18:22:31 +epoch [9/50] batch [65/1000] time 1.561 (1.576) data 0.000 (0.014) loss 1.1367 (1.2004) acc 68.7500 (70.8654) lr 1.9048e-03 eta 18:21:26 +epoch [9/50] batch [70/1000] time 1.552 (1.575) data 0.001 (0.013) loss 1.7451 (1.1936) acc 53.1250 (70.7143) lr 1.9048e-03 eta 18:20:29 +epoch [9/50] batch [75/1000] time 1.545 (1.573) data 0.000 (0.013) loss 0.7388 (1.2036) acc 81.2500 (70.6667) lr 1.9048e-03 eta 18:19:22 +epoch [9/50] batch [80/1000] time 1.554 (1.573) data 0.000 (0.012) loss 1.0996 (1.1969) acc 65.6250 (70.5859) lr 1.9048e-03 eta 18:18:41 +epoch [9/50] batch [85/1000] time 1.541 (1.571) data 0.000 (0.011) loss 0.9014 (1.1863) acc 71.8750 (70.8456) lr 1.9048e-03 eta 18:17:37 +epoch [9/50] batch [90/1000] time 1.570 (1.570) data 0.001 (0.011) loss 1.0635 (1.1898) acc 71.8750 (70.6944) lr 1.9048e-03 eta 18:16:52 +epoch [9/50] batch [95/1000] time 1.539 (1.570) data 0.000 (0.010) loss 0.7002 (1.1912) acc 81.2500 (70.6250) lr 1.9048e-03 eta 18:16:11 +epoch [9/50] batch [100/1000] time 1.546 (1.571) data 0.001 (0.010) loss 0.6084 (1.1854) acc 78.1250 (70.5938) lr 1.9048e-03 eta 18:16:47 +epoch [9/50] batch [105/1000] time 1.554 (1.570) data 0.000 (0.009) loss 1.9287 (1.1988) acc 50.0000 (70.3869) lr 1.9048e-03 eta 18:16:01 +epoch [9/50] batch [110/1000] time 1.572 (1.569) data 0.000 (0.009) loss 1.2715 (1.2087) acc 65.6250 (70.1705) lr 1.9048e-03 eta 18:15:27 +epoch [9/50] batch [115/1000] time 1.551 (1.569) data 0.000 (0.008) loss 1.1709 (1.2146) acc 71.8750 (70.1087) lr 1.9048e-03 eta 18:15:01 +epoch [9/50] batch [120/1000] time 1.564 (1.568) data 0.000 (0.008) loss 1.1650 (1.2071) acc 78.1250 (70.3125) lr 1.9048e-03 eta 18:14:11 +epoch [9/50] batch [125/1000] time 1.550 (1.567) data 0.000 (0.008) loss 1.3770 (1.1993) acc 68.7500 (70.5750) lr 1.9048e-03 eta 18:13:51 +epoch [9/50] batch [130/1000] time 1.572 (1.567) data 0.000 (0.007) loss 1.0273 (1.1988) acc 78.1250 (70.5529) lr 1.9048e-03 eta 18:13:36 +epoch [9/50] batch [135/1000] time 1.548 (1.567) data 0.000 (0.007) loss 1.5215 (1.1988) acc 50.0000 (70.1852) lr 1.9048e-03 eta 18:13:03 +epoch [9/50] batch [140/1000] time 1.545 (1.567) data 0.000 (0.007) loss 0.7090 (1.1966) acc 78.1250 (70.1786) lr 1.9048e-03 eta 18:13:20 +epoch [9/50] batch [145/1000] time 1.543 (1.567) data 0.000 (0.007) loss 1.2695 (1.1923) acc 62.5000 (70.2371) lr 1.9048e-03 eta 18:12:54 +epoch [9/50] batch [150/1000] time 1.561 (1.566) data 0.001 (0.006) loss 1.0508 (1.1872) acc 81.2500 (70.3750) lr 1.9048e-03 eta 18:12:27 +epoch [9/50] batch [155/1000] time 1.544 (1.566) data 0.000 (0.006) loss 1.1426 (1.1855) acc 68.7500 (70.3629) lr 1.9048e-03 eta 18:12:08 +epoch [9/50] batch [160/1000] time 1.563 (1.566) data 0.000 (0.006) loss 0.7622 (1.1791) acc 84.3750 (70.5078) lr 1.9048e-03 eta 18:12:01 +epoch [9/50] batch [165/1000] time 1.555 (1.566) data 0.000 (0.006) loss 1.2910 (1.1831) acc 65.6250 (70.3030) lr 1.9048e-03 eta 18:11:56 +epoch [9/50] batch [170/1000] time 1.565 (1.566) data 0.000 (0.006) loss 1.7646 (1.1879) acc 56.2500 (70.2941) lr 1.9048e-03 eta 18:11:45 +epoch [9/50] batch [175/1000] time 1.549 (1.566) data 0.000 (0.006) loss 1.1475 (1.1877) acc 68.7500 (70.3571) lr 1.9048e-03 eta 18:11:26 +epoch [9/50] batch [180/1000] time 1.534 (1.565) data 0.000 (0.005) loss 1.3672 (1.1819) acc 75.0000 (70.5208) lr 1.9048e-03 eta 18:10:59 +epoch [9/50] batch [185/1000] time 1.551 (1.565) data 0.000 (0.005) loss 0.8066 (1.1827) acc 87.5000 (70.6250) lr 1.9048e-03 eta 18:10:44 +epoch [9/50] batch [190/1000] time 1.560 (1.565) data 0.000 (0.005) loss 1.1377 (1.1797) acc 59.3750 (70.5921) lr 1.9048e-03 eta 18:10:31 +epoch [9/50] batch [195/1000] time 1.531 (1.565) data 0.000 (0.005) loss 0.9980 (1.1812) acc 75.0000 (70.4647) lr 1.9048e-03 eta 18:10:16 +epoch [9/50] batch [200/1000] time 1.558 (1.565) data 0.000 (0.005) loss 1.2334 (1.1833) acc 68.7500 (70.4062) lr 1.9048e-03 eta 18:10:09 +epoch [9/50] batch [205/1000] time 1.537 (1.565) data 0.001 (0.005) loss 1.6865 (1.1850) acc 71.8750 (70.4421) lr 1.9048e-03 eta 18:10:22 +epoch [9/50] batch [210/1000] time 1.561 (1.565) data 0.000 (0.005) loss 1.1836 (1.1868) acc 71.8750 (70.4464) lr 1.9048e-03 eta 18:10:05 +epoch [9/50] batch [215/1000] time 1.564 (1.565) data 0.001 (0.005) loss 1.6943 (1.1908) acc 53.1250 (70.2471) lr 1.9048e-03 eta 18:09:45 +epoch [9/50] batch [220/1000] time 1.545 (1.565) data 0.001 (0.005) loss 1.2988 (1.1927) acc 68.7500 (70.1562) lr 1.9048e-03 eta 18:09:33 +epoch [9/50] batch [225/1000] time 1.575 (1.565) data 0.001 (0.004) loss 0.8462 (1.1878) acc 78.1250 (70.3056) lr 1.9048e-03 eta 18:09:22 +epoch [9/50] batch [230/1000] time 1.556 (1.565) data 0.000 (0.004) loss 1.1699 (1.1858) acc 68.7500 (70.3668) lr 1.9048e-03 eta 18:09:11 +epoch [9/50] batch [235/1000] time 1.578 (1.564) data 0.000 (0.004) loss 1.1855 (1.1848) acc 71.8750 (70.3989) lr 1.9048e-03 eta 18:08:53 +epoch [9/50] batch [240/1000] time 1.565 (1.564) data 0.000 (0.004) loss 2.0117 (1.1886) acc 62.5000 (70.4297) lr 1.9048e-03 eta 18:08:45 +epoch [9/50] batch [245/1000] time 1.562 (1.564) data 0.001 (0.004) loss 0.7988 (1.1875) acc 81.2500 (70.5485) lr 1.9048e-03 eta 18:08:39 +epoch [9/50] batch [250/1000] time 1.561 (1.565) data 0.000 (0.004) loss 1.1152 (1.1894) acc 65.6250 (70.4375) lr 1.9048e-03 eta 18:08:57 +epoch [9/50] batch [255/1000] time 1.566 (1.565) data 0.001 (0.004) loss 0.8188 (1.1850) acc 84.3750 (70.4902) lr 1.9048e-03 eta 18:08:49 +epoch [9/50] batch [260/1000] time 1.552 (1.565) data 0.000 (0.004) loss 1.2227 (1.1869) acc 75.0000 (70.4808) lr 1.9048e-03 eta 18:08:34 +epoch [9/50] batch [265/1000] time 1.547 (1.565) data 0.000 (0.004) loss 1.0947 (1.1874) acc 68.7500 (70.5189) lr 1.9048e-03 eta 18:08:23 +epoch [9/50] batch [270/1000] time 1.549 (1.565) data 0.000 (0.004) loss 0.9961 (1.1873) acc 71.8750 (70.5093) lr 1.9048e-03 eta 18:08:18 +epoch [9/50] batch [275/1000] time 1.587 (1.565) data 0.000 (0.004) loss 0.8271 (1.1852) acc 87.5000 (70.6250) lr 1.9048e-03 eta 18:08:10 +epoch [9/50] batch [280/1000] time 1.537 (1.564) data 0.000 (0.004) loss 1.0645 (1.1849) acc 75.0000 (70.6138) lr 1.9048e-03 eta 18:07:45 +epoch [9/50] batch [285/1000] time 1.542 (1.564) data 0.001 (0.004) loss 1.6875 (1.1835) acc 59.3750 (70.6360) lr 1.9048e-03 eta 18:07:26 +epoch [9/50] batch [290/1000] time 1.674 (1.564) data 0.001 (0.004) loss 0.8091 (1.1824) acc 68.7500 (70.6142) lr 1.9048e-03 eta 18:07:23 +epoch [9/50] batch [295/1000] time 1.553 (1.564) data 0.000 (0.004) loss 0.8940 (1.1785) acc 81.2500 (70.7097) lr 1.9048e-03 eta 18:07:20 +epoch [9/50] batch [300/1000] time 1.565 (1.564) data 0.000 (0.003) loss 1.1436 (1.1755) acc 81.2500 (70.8021) lr 1.9048e-03 eta 18:07:10 +epoch [9/50] batch [305/1000] time 1.544 (1.564) data 0.000 (0.003) loss 1.3867 (1.1791) acc 59.3750 (70.7582) lr 1.9048e-03 eta 18:06:59 +epoch [9/50] batch [310/1000] time 1.577 (1.564) data 0.001 (0.003) loss 0.9414 (1.1778) acc 78.1250 (70.7762) lr 1.9048e-03 eta 18:06:53 +epoch [9/50] batch [315/1000] time 1.536 (1.564) data 0.000 (0.003) loss 1.2305 (1.1752) acc 65.6250 (70.8135) lr 1.9048e-03 eta 18:06:44 +epoch [9/50] batch [320/1000] time 1.528 (1.564) data 0.000 (0.003) loss 1.5166 (1.1754) acc 62.5000 (70.8691) lr 1.9048e-03 eta 18:06:21 +epoch [9/50] batch [325/1000] time 1.563 (1.564) data 0.000 (0.003) loss 1.7549 (1.1741) acc 65.6250 (70.9615) lr 1.9048e-03 eta 18:06:10 +epoch [9/50] batch [330/1000] time 1.550 (1.564) data 0.001 (0.003) loss 0.8120 (1.1725) acc 75.0000 (70.9754) lr 1.9048e-03 eta 18:05:55 +epoch [9/50] batch [335/1000] time 1.536 (1.563) data 0.001 (0.003) loss 1.0068 (1.1731) acc 68.7500 (70.8955) lr 1.9048e-03 eta 18:05:35 +epoch [9/50] batch [340/1000] time 1.543 (1.563) data 0.000 (0.003) loss 1.4941 (1.1756) acc 53.1250 (70.8732) lr 1.9048e-03 eta 18:05:19 +epoch [9/50] batch [345/1000] time 1.528 (1.563) data 0.000 (0.003) loss 1.2939 (1.1736) acc 75.0000 (70.9239) lr 1.9048e-03 eta 18:05:06 +epoch [9/50] batch [350/1000] time 1.563 (1.563) data 0.000 (0.003) loss 1.1768 (1.1712) acc 75.0000 (70.9286) lr 1.9048e-03 eta 18:04:55 +epoch [9/50] batch [355/1000] time 1.554 (1.563) data 0.000 (0.003) loss 0.8403 (1.1710) acc 81.2500 (70.9331) lr 1.9048e-03 eta 18:05:02 +epoch [9/50] batch [360/1000] time 1.548 (1.563) data 0.000 (0.003) loss 1.3408 (1.1716) acc 62.5000 (70.9288) lr 1.9048e-03 eta 18:04:48 +epoch [9/50] batch [365/1000] time 1.555 (1.563) data 0.001 (0.003) loss 1.6289 (1.1705) acc 71.8750 (71.0017) lr 1.9048e-03 eta 18:04:33 +epoch [9/50] batch [370/1000] time 1.528 (1.563) data 0.001 (0.003) loss 1.3086 (1.1709) acc 68.7500 (70.9713) lr 1.9048e-03 eta 18:04:20 +epoch [9/50] batch [375/1000] time 1.544 (1.563) data 0.000 (0.003) loss 1.3008 (1.1749) acc 65.6250 (70.8250) lr 1.9048e-03 eta 18:04:03 +epoch [9/50] batch [380/1000] time 1.553 (1.562) data 0.001 (0.003) loss 1.0146 (1.1748) acc 65.6250 (70.7977) lr 1.9048e-03 eta 18:03:47 +epoch [9/50] batch [385/1000] time 1.569 (1.562) data 0.000 (0.003) loss 0.7183 (1.1734) acc 84.3750 (70.8360) lr 1.9048e-03 eta 18:03:41 +epoch [9/50] batch [390/1000] time 1.575 (1.562) data 0.000 (0.003) loss 0.9272 (1.1739) acc 71.8750 (70.8013) lr 1.9048e-03 eta 18:03:34 +epoch [9/50] batch [395/1000] time 1.556 (1.562) data 0.000 (0.003) loss 1.1270 (1.1751) acc 65.6250 (70.7595) lr 1.9048e-03 eta 18:03:21 +epoch [9/50] batch [400/1000] time 1.554 (1.563) data 0.001 (0.003) loss 1.3838 (1.1748) acc 62.5000 (70.7578) lr 1.9048e-03 eta 18:03:22 +epoch [9/50] batch [405/1000] time 1.540 (1.562) data 0.000 (0.003) loss 1.1855 (1.1768) acc 65.6250 (70.7330) lr 1.9048e-03 eta 18:03:09 +epoch [9/50] batch [410/1000] time 1.551 (1.562) data 0.001 (0.003) loss 0.6460 (1.1743) acc 78.1250 (70.7698) lr 1.9048e-03 eta 18:02:55 +epoch [9/50] batch [415/1000] time 1.554 (1.562) data 0.000 (0.003) loss 0.8374 (1.1739) acc 81.2500 (70.8133) lr 1.9048e-03 eta 18:02:44 +epoch [9/50] batch [420/1000] time 1.525 (1.562) data 0.000 (0.003) loss 1.2705 (1.1758) acc 65.6250 (70.7664) lr 1.9048e-03 eta 18:02:30 +epoch [9/50] batch [425/1000] time 1.544 (1.562) data 0.000 (0.003) loss 1.5654 (1.1781) acc 65.6250 (70.7132) lr 1.9048e-03 eta 18:02:14 +epoch [9/50] batch [430/1000] time 1.528 (1.562) data 0.000 (0.003) loss 1.4971 (1.1780) acc 56.2500 (70.7049) lr 1.9048e-03 eta 18:01:57 +epoch [9/50] batch [435/1000] time 1.540 (1.562) data 0.000 (0.003) loss 1.0410 (1.1771) acc 78.1250 (70.6968) lr 1.9048e-03 eta 18:01:45 +epoch [9/50] batch [440/1000] time 1.533 (1.561) data 0.000 (0.003) loss 0.8994 (1.1772) acc 75.0000 (70.7173) lr 1.9048e-03 eta 18:01:25 +epoch [9/50] batch [445/1000] time 1.538 (1.561) data 0.000 (0.002) loss 1.0459 (1.1790) acc 71.8750 (70.6601) lr 1.9048e-03 eta 18:01:22 +epoch [9/50] batch [450/1000] time 1.576 (1.561) data 0.001 (0.002) loss 1.1152 (1.1789) acc 68.7500 (70.6528) lr 1.9048e-03 eta 18:01:12 +epoch [9/50] batch [455/1000] time 1.549 (1.561) data 0.001 (0.002) loss 1.0410 (1.1811) acc 78.1250 (70.6250) lr 1.9048e-03 eta 18:01:04 +epoch [9/50] batch [460/1000] time 1.557 (1.561) data 0.000 (0.002) loss 1.1855 (1.1819) acc 68.7500 (70.6114) lr 1.9048e-03 eta 18:00:54 +epoch [9/50] batch [465/1000] time 1.576 (1.561) data 0.000 (0.002) loss 0.8325 (1.1800) acc 78.1250 (70.6317) lr 1.9048e-03 eta 18:00:47 +epoch [9/50] batch [470/1000] time 1.560 (1.561) data 0.000 (0.002) loss 1.0898 (1.1806) acc 65.6250 (70.5984) lr 1.9048e-03 eta 18:00:35 +epoch [9/50] batch [475/1000] time 1.572 (1.561) data 0.001 (0.002) loss 0.6704 (1.1788) acc 84.3750 (70.6645) lr 1.9048e-03 eta 18:00:28 +epoch [9/50] batch [480/1000] time 1.556 (1.561) data 0.001 (0.002) loss 0.8564 (1.1796) acc 78.1250 (70.6641) lr 1.9048e-03 eta 18:00:23 +epoch [9/50] batch [485/1000] time 1.556 (1.561) data 0.000 (0.002) loss 1.2129 (1.1790) acc 62.5000 (70.6637) lr 1.9048e-03 eta 18:00:12 +epoch [9/50] batch [490/1000] time 1.551 (1.561) data 0.000 (0.002) loss 1.8760 (1.1793) acc 59.3750 (70.6952) lr 1.9048e-03 eta 17:59:59 +epoch [9/50] batch [495/1000] time 1.565 (1.561) data 0.000 (0.002) loss 1.2637 (1.1815) acc 68.7500 (70.6629) lr 1.9048e-03 eta 17:59:46 +epoch [9/50] batch [500/1000] time 1.537 (1.561) data 0.001 (0.002) loss 1.1152 (1.1820) acc 84.3750 (70.6937) lr 1.9048e-03 eta 17:59:30 +epoch [9/50] batch [505/1000] time 1.529 (1.561) data 0.001 (0.002) loss 1.5771 (1.1813) acc 59.3750 (70.6807) lr 1.9048e-03 eta 17:59:27 +epoch [9/50] batch [510/1000] time 1.554 (1.561) data 0.001 (0.002) loss 1.0039 (1.1822) acc 71.8750 (70.6434) lr 1.9048e-03 eta 17:59:11 +epoch [9/50] batch [515/1000] time 1.580 (1.561) data 0.000 (0.002) loss 0.8965 (1.1808) acc 78.1250 (70.7039) lr 1.9048e-03 eta 17:59:01 +epoch [9/50] batch [520/1000] time 1.554 (1.561) data 0.000 (0.002) loss 1.0381 (1.1804) acc 71.8750 (70.7031) lr 1.9048e-03 eta 17:58:53 +epoch [9/50] batch [525/1000] time 1.567 (1.561) data 0.001 (0.002) loss 1.2617 (1.1786) acc 71.8750 (70.7679) lr 1.9048e-03 eta 17:58:42 +epoch [9/50] batch [530/1000] time 1.558 (1.560) data 0.000 (0.002) loss 1.4785 (1.1783) acc 68.7500 (70.7842) lr 1.9048e-03 eta 17:58:33 +epoch [9/50] batch [535/1000] time 1.554 (1.560) data 0.001 (0.002) loss 0.7627 (1.1777) acc 84.3750 (70.8294) lr 1.9048e-03 eta 17:58:26 +epoch [9/50] batch [540/1000] time 1.568 (1.560) data 0.001 (0.002) loss 1.7285 (1.1790) acc 53.1250 (70.7986) lr 1.9048e-03 eta 17:58:15 +epoch [9/50] batch [545/1000] time 1.557 (1.560) data 0.000 (0.002) loss 1.7783 (1.1782) acc 50.0000 (70.7856) lr 1.9048e-03 eta 17:58:06 +epoch [9/50] batch [550/1000] time 1.577 (1.561) data 0.001 (0.002) loss 1.3467 (1.1774) acc 62.5000 (70.7955) lr 1.9048e-03 eta 17:58:12 +epoch [9/50] batch [555/1000] time 1.550 (1.561) data 0.000 (0.002) loss 1.1182 (1.1772) acc 75.0000 (70.8108) lr 1.9048e-03 eta 17:58:02 +epoch [9/50] batch [560/1000] time 1.556 (1.561) data 0.000 (0.002) loss 1.5459 (1.1777) acc 68.7500 (70.8147) lr 1.9048e-03 eta 17:57:52 +epoch [9/50] batch [565/1000] time 1.546 (1.561) data 0.001 (0.002) loss 0.6201 (1.1788) acc 78.1250 (70.7799) lr 1.9048e-03 eta 17:57:43 +epoch [9/50] batch [570/1000] time 1.558 (1.561) data 0.000 (0.002) loss 1.2637 (1.1807) acc 78.1250 (70.7675) lr 1.9048e-03 eta 17:57:32 +epoch [9/50] batch [575/1000] time 1.569 (1.561) data 0.000 (0.002) loss 0.7979 (1.1795) acc 75.0000 (70.8043) lr 1.9048e-03 eta 17:57:24 +epoch [9/50] batch [580/1000] time 1.548 (1.560) data 0.000 (0.002) loss 0.8438 (1.1793) acc 78.1250 (70.8136) lr 1.9048e-03 eta 17:57:13 +epoch [9/50] batch [585/1000] time 1.571 (1.560) data 0.001 (0.002) loss 1.3770 (1.1785) acc 75.0000 (70.8280) lr 1.9048e-03 eta 17:57:07 +epoch [9/50] batch [590/1000] time 1.556 (1.560) data 0.000 (0.002) loss 1.0361 (1.1793) acc 71.8750 (70.8157) lr 1.9048e-03 eta 17:56:57 +epoch [9/50] batch [595/1000] time 1.577 (1.561) data 0.001 (0.002) loss 0.9746 (1.1787) acc 81.2500 (70.8298) lr 1.9048e-03 eta 17:57:05 +epoch [9/50] batch [600/1000] time 1.526 (1.561) data 0.001 (0.002) loss 0.7832 (1.1789) acc 68.7500 (70.8073) lr 1.9048e-03 eta 17:56:53 +epoch [9/50] batch [605/1000] time 1.565 (1.561) data 0.000 (0.002) loss 1.1250 (1.1772) acc 65.6250 (70.8006) lr 1.9048e-03 eta 17:56:44 +epoch [9/50] batch [610/1000] time 1.536 (1.561) data 0.000 (0.002) loss 1.2715 (1.1775) acc 68.7500 (70.7889) lr 1.9048e-03 eta 17:56:32 +epoch [9/50] batch [615/1000] time 1.543 (1.561) data 0.000 (0.002) loss 1.0527 (1.1770) acc 75.0000 (70.7978) lr 1.9048e-03 eta 17:56:22 +epoch [9/50] batch [620/1000] time 1.538 (1.560) data 0.000 (0.002) loss 1.6670 (1.1775) acc 68.7500 (70.8014) lr 1.9048e-03 eta 17:56:10 +epoch [9/50] batch [625/1000] time 1.564 (1.560) data 0.000 (0.002) loss 1.5254 (1.1795) acc 71.8750 (70.7900) lr 1.9048e-03 eta 17:56:02 +epoch [9/50] batch [630/1000] time 1.546 (1.560) data 0.000 (0.002) loss 0.7051 (1.1786) acc 78.1250 (70.8234) lr 1.9048e-03 eta 17:55:52 +epoch [9/50] batch [635/1000] time 1.548 (1.560) data 0.000 (0.002) loss 0.9492 (1.1776) acc 78.1250 (70.8563) lr 1.9048e-03 eta 17:55:41 +epoch [9/50] batch [640/1000] time 1.558 (1.560) data 0.000 (0.002) loss 1.2637 (1.1779) acc 68.7500 (70.8350) lr 1.9048e-03 eta 17:55:31 +epoch [9/50] batch [645/1000] time 1.582 (1.560) data 0.001 (0.002) loss 0.8877 (1.1748) acc 68.7500 (70.8866) lr 1.9048e-03 eta 17:55:27 +epoch [9/50] batch [650/1000] time 1.526 (1.560) data 0.000 (0.002) loss 1.0977 (1.1740) acc 65.6250 (70.8846) lr 1.9048e-03 eta 17:55:17 +epoch [9/50] batch [655/1000] time 1.728 (1.560) data 0.001 (0.002) loss 0.7861 (1.1750) acc 78.1250 (70.8922) lr 1.9048e-03 eta 17:55:18 +epoch [9/50] batch [660/1000] time 1.546 (1.560) data 0.000 (0.002) loss 1.0439 (1.1767) acc 75.0000 (70.8570) lr 1.9048e-03 eta 17:55:08 +epoch [9/50] batch [665/1000] time 1.572 (1.560) data 0.000 (0.002) loss 1.0625 (1.1777) acc 68.7500 (70.8412) lr 1.9048e-03 eta 17:54:58 +epoch [9/50] batch [670/1000] time 1.522 (1.560) data 0.000 (0.002) loss 0.9180 (1.1764) acc 71.8750 (70.8722) lr 1.9048e-03 eta 17:54:41 +epoch [9/50] batch [675/1000] time 1.549 (1.560) data 0.000 (0.002) loss 1.4033 (1.1768) acc 65.6250 (70.8611) lr 1.9048e-03 eta 17:54:31 +epoch [9/50] batch [680/1000] time 1.580 (1.560) data 0.001 (0.002) loss 1.4258 (1.1758) acc 68.7500 (70.8869) lr 1.9048e-03 eta 17:54:25 +epoch [9/50] batch [685/1000] time 1.552 (1.560) data 0.001 (0.002) loss 1.7881 (1.1782) acc 46.8750 (70.8212) lr 1.9048e-03 eta 17:54:17 +epoch [9/50] batch [690/1000] time 1.567 (1.560) data 0.000 (0.002) loss 1.1562 (1.1773) acc 68.7500 (70.8107) lr 1.9048e-03 eta 17:54:11 +epoch [9/50] batch [695/1000] time 1.567 (1.560) data 0.001 (0.002) loss 0.5806 (1.1754) acc 87.5000 (70.8498) lr 1.9048e-03 eta 17:54:02 +epoch [9/50] batch [700/1000] time 1.701 (1.560) data 0.001 (0.002) loss 1.3047 (1.1750) acc 62.5000 (70.8482) lr 1.9048e-03 eta 17:54:01 +epoch [9/50] batch [705/1000] time 1.579 (1.560) data 0.001 (0.002) loss 1.2666 (1.1765) acc 75.0000 (70.8200) lr 1.9048e-03 eta 17:53:54 +epoch [9/50] batch [710/1000] time 1.555 (1.560) data 0.001 (0.002) loss 1.6191 (1.1774) acc 65.6250 (70.8143) lr 1.9048e-03 eta 17:53:44 +epoch [9/50] batch [715/1000] time 1.551 (1.560) data 0.000 (0.002) loss 0.8208 (1.1785) acc 78.1250 (70.8042) lr 1.9048e-03 eta 17:53:36 +epoch [9/50] batch [720/1000] time 1.551 (1.560) data 0.001 (0.002) loss 1.2295 (1.1785) acc 59.3750 (70.7899) lr 1.9048e-03 eta 17:53:28 +epoch [9/50] batch [725/1000] time 1.579 (1.560) data 0.001 (0.002) loss 1.0381 (1.1778) acc 71.8750 (70.7845) lr 1.9048e-03 eta 17:53:18 +epoch [9/50] batch [730/1000] time 1.551 (1.560) data 0.000 (0.002) loss 1.4170 (1.1790) acc 62.5000 (70.7449) lr 1.9048e-03 eta 17:53:10 +epoch [9/50] batch [735/1000] time 1.580 (1.560) data 0.001 (0.002) loss 1.2939 (1.1780) acc 68.7500 (70.7483) lr 1.9048e-03 eta 17:53:04 +epoch [9/50] batch [740/1000] time 1.566 (1.560) data 0.000 (0.002) loss 0.7725 (1.1783) acc 78.1250 (70.7475) lr 1.9048e-03 eta 17:52:56 +epoch [9/50] batch [745/1000] time 1.556 (1.560) data 0.001 (0.002) loss 0.7837 (1.1779) acc 78.1250 (70.7508) lr 1.9048e-03 eta 17:52:57 +epoch [9/50] batch [750/1000] time 1.569 (1.561) data 0.000 (0.002) loss 1.6455 (1.1787) acc 59.3750 (70.7583) lr 1.9048e-03 eta 17:52:50 +epoch [9/50] batch [755/1000] time 1.577 (1.560) data 0.001 (0.002) loss 1.0693 (1.1778) acc 75.0000 (70.7947) lr 1.9048e-03 eta 17:52:42 +epoch [9/50] batch [760/1000] time 1.544 (1.560) data 0.000 (0.002) loss 0.6201 (1.1769) acc 87.5000 (70.8183) lr 1.9048e-03 eta 17:52:34 +epoch [9/50] batch [765/1000] time 1.537 (1.560) data 0.000 (0.002) loss 1.1309 (1.1778) acc 81.2500 (70.8007) lr 1.9048e-03 eta 17:52:25 +epoch [9/50] batch [770/1000] time 1.584 (1.561) data 0.001 (0.002) loss 1.2021 (1.1785) acc 68.7500 (70.7630) lr 1.9048e-03 eta 17:52:20 +epoch [9/50] batch [775/1000] time 1.566 (1.561) data 0.000 (0.002) loss 0.9458 (1.1782) acc 71.8750 (70.7742) lr 1.9048e-03 eta 17:52:12 +epoch [9/50] batch [780/1000] time 1.535 (1.560) data 0.000 (0.002) loss 0.7437 (1.1781) acc 68.7500 (70.7452) lr 1.9048e-03 eta 17:52:01 +epoch [9/50] batch [785/1000] time 1.565 (1.560) data 0.000 (0.002) loss 0.8906 (1.1782) acc 78.1250 (70.7643) lr 1.9048e-03 eta 17:51:53 +epoch [9/50] batch [790/1000] time 1.557 (1.560) data 0.000 (0.002) loss 1.7656 (1.1794) acc 68.7500 (70.7318) lr 1.9048e-03 eta 17:51:45 +epoch [9/50] batch [795/1000] time 1.572 (1.560) data 0.001 (0.002) loss 1.0078 (1.1785) acc 78.1250 (70.7429) lr 1.9048e-03 eta 17:51:38 +epoch [9/50] batch [800/1000] time 1.576 (1.560) data 0.000 (0.002) loss 0.8833 (1.1767) acc 78.1250 (70.7852) lr 1.9048e-03 eta 17:51:29 +epoch [9/50] batch [805/1000] time 1.558 (1.560) data 0.000 (0.002) loss 0.9009 (1.1757) acc 75.0000 (70.7764) lr 1.9048e-03 eta 17:51:24 +epoch [9/50] batch [810/1000] time 1.568 (1.561) data 0.001 (0.002) loss 1.1865 (1.1762) acc 65.6250 (70.7446) lr 1.9048e-03 eta 17:51:23 +epoch [9/50] batch [815/1000] time 1.561 (1.561) data 0.000 (0.002) loss 1.7734 (1.1777) acc 65.6250 (70.7285) lr 1.9048e-03 eta 17:51:17 +epoch [9/50] batch [820/1000] time 1.566 (1.561) data 0.001 (0.002) loss 1.9961 (1.1814) acc 62.5000 (70.6593) lr 1.9048e-03 eta 17:51:11 +epoch [9/50] batch [825/1000] time 1.537 (1.561) data 0.000 (0.002) loss 1.2480 (1.1819) acc 68.7500 (70.6439) lr 1.9048e-03 eta 17:51:01 +epoch [9/50] batch [830/1000] time 1.551 (1.561) data 0.000 (0.002) loss 0.9204 (1.1821) acc 71.8750 (70.6438) lr 1.9048e-03 eta 17:50:50 +epoch [9/50] batch [835/1000] time 1.563 (1.561) data 0.000 (0.002) loss 0.9478 (1.1818) acc 65.6250 (70.6325) lr 1.9048e-03 eta 17:50:40 +epoch [9/50] batch [840/1000] time 1.585 (1.561) data 0.000 (0.002) loss 1.1670 (1.1818) acc 65.6250 (70.6399) lr 1.9048e-03 eta 17:50:32 +epoch [9/50] batch [845/1000] time 1.537 (1.561) data 0.001 (0.002) loss 0.8477 (1.1822) acc 71.8750 (70.6361) lr 1.9048e-03 eta 17:50:24 +epoch [9/50] batch [850/1000] time 1.569 (1.561) data 0.000 (0.002) loss 0.9565 (1.1829) acc 68.7500 (70.5993) lr 1.9048e-03 eta 17:50:15 +epoch [9/50] batch [855/1000] time 1.549 (1.561) data 0.001 (0.002) loss 0.6621 (1.1813) acc 78.1250 (70.6177) lr 1.9048e-03 eta 17:50:14 +epoch [9/50] batch [860/1000] time 1.559 (1.561) data 0.000 (0.002) loss 1.2334 (1.1813) acc 68.7500 (70.6068) lr 1.9048e-03 eta 17:50:07 +epoch [9/50] batch [865/1000] time 1.551 (1.561) data 0.000 (0.002) loss 0.8867 (1.1805) acc 75.0000 (70.6214) lr 1.9048e-03 eta 17:49:57 +epoch [9/50] batch [870/1000] time 1.550 (1.561) data 0.001 (0.002) loss 1.2314 (1.1793) acc 65.6250 (70.6430) lr 1.9048e-03 eta 17:49:48 +epoch [9/50] batch [875/1000] time 1.549 (1.561) data 0.001 (0.002) loss 0.9541 (1.1775) acc 65.6250 (70.6750) lr 1.9048e-03 eta 17:49:39 +epoch [9/50] batch [880/1000] time 1.565 (1.561) data 0.000 (0.001) loss 1.2402 (1.1764) acc 78.1250 (70.7173) lr 1.9048e-03 eta 17:49:31 +epoch [9/50] batch [885/1000] time 1.532 (1.561) data 0.001 (0.001) loss 1.3945 (1.1757) acc 78.1250 (70.7486) lr 1.9048e-03 eta 17:49:20 +epoch [9/50] batch [890/1000] time 1.552 (1.560) data 0.000 (0.001) loss 1.0352 (1.1762) acc 62.5000 (70.7233) lr 1.9048e-03 eta 17:49:09 +epoch [9/50] batch [895/1000] time 1.553 (1.561) data 0.000 (0.001) loss 1.3096 (1.1757) acc 71.8750 (70.7332) lr 1.9048e-03 eta 17:49:05 +epoch [9/50] batch [900/1000] time 1.560 (1.561) data 0.000 (0.001) loss 1.3643 (1.1764) acc 68.7500 (70.7153) lr 1.9048e-03 eta 17:48:58 +epoch [9/50] batch [905/1000] time 1.572 (1.561) data 0.000 (0.001) loss 0.7100 (1.1758) acc 84.3750 (70.7148) lr 1.9048e-03 eta 17:48:51 +epoch [9/50] batch [910/1000] time 1.572 (1.561) data 0.000 (0.001) loss 1.1357 (1.1758) acc 81.2500 (70.7040) lr 1.9048e-03 eta 17:48:45 +epoch [9/50] batch [915/1000] time 1.562 (1.561) data 0.000 (0.001) loss 0.9766 (1.1754) acc 68.7500 (70.6933) lr 1.9048e-03 eta 17:48:36 +epoch [9/50] batch [920/1000] time 1.559 (1.561) data 0.000 (0.001) loss 0.5757 (1.1745) acc 81.2500 (70.7099) lr 1.9048e-03 eta 17:48:27 +epoch [9/50] batch [925/1000] time 1.577 (1.561) data 0.000 (0.001) loss 1.0811 (1.1739) acc 81.2500 (70.7095) lr 1.9048e-03 eta 17:48:22 +epoch [9/50] batch [930/1000] time 1.541 (1.561) data 0.001 (0.001) loss 1.1211 (1.1722) acc 71.8750 (70.7527) lr 1.9048e-03 eta 17:48:12 +epoch [9/50] batch [935/1000] time 1.555 (1.561) data 0.000 (0.001) loss 1.0127 (1.1720) acc 71.8750 (70.7420) lr 1.9048e-03 eta 17:48:06 +epoch [9/50] batch [940/1000] time 1.550 (1.561) data 0.000 (0.001) loss 1.6709 (1.1729) acc 68.7500 (70.7447) lr 1.9048e-03 eta 17:47:55 +epoch [9/50] batch [945/1000] time 1.584 (1.561) data 0.001 (0.001) loss 0.8096 (1.1725) acc 84.3750 (70.7771) lr 1.9048e-03 eta 17:47:47 +epoch [9/50] batch [950/1000] time 1.558 (1.561) data 0.000 (0.001) loss 1.2021 (1.1738) acc 71.8750 (70.7599) lr 1.9048e-03 eta 17:47:39 +epoch [9/50] batch [955/1000] time 1.535 (1.560) data 0.000 (0.001) loss 0.9800 (1.1737) acc 81.2500 (70.7428) lr 1.9048e-03 eta 17:47:29 +epoch [9/50] batch [960/1000] time 1.555 (1.561) data 0.000 (0.001) loss 0.8271 (1.1733) acc 75.0000 (70.7487) lr 1.9048e-03 eta 17:47:28 +epoch [9/50] batch [965/1000] time 1.583 (1.561) data 0.000 (0.001) loss 0.9419 (1.1736) acc 71.8750 (70.7481) lr 1.9048e-03 eta 17:47:23 +epoch [9/50] batch [970/1000] time 1.565 (1.561) data 0.001 (0.001) loss 1.0342 (1.1720) acc 65.6250 (70.7700) lr 1.9048e-03 eta 17:47:16 +epoch [9/50] batch [975/1000] time 1.560 (1.561) data 0.000 (0.001) loss 0.9097 (1.1731) acc 84.3750 (70.7628) lr 1.9048e-03 eta 17:47:08 +epoch [9/50] batch [980/1000] time 1.544 (1.561) data 0.001 (0.001) loss 1.5059 (1.1719) acc 65.6250 (70.7812) lr 1.9048e-03 eta 17:47:01 +epoch [9/50] batch [985/1000] time 1.554 (1.561) data 0.001 (0.001) loss 1.2607 (1.1725) acc 65.6250 (70.7582) lr 1.9048e-03 eta 17:46:53 +epoch [9/50] batch [990/1000] time 1.583 (1.561) data 0.000 (0.001) loss 1.9268 (1.1740) acc 65.6250 (70.7449) lr 1.9048e-03 eta 17:46:49 +epoch [9/50] batch [995/1000] time 1.583 (1.561) data 0.000 (0.001) loss 1.5508 (1.1736) acc 62.5000 (70.7632) lr 1.9048e-03 eta 17:46:42 +epoch [9/50] batch [1000/1000] time 1.559 (1.561) data 0.000 (0.001) loss 1.0879 (1.1740) acc 62.5000 (70.7469) lr 1.8763e-03 eta 17:46:36 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 38,963 +* accuracy: 77.9% +* error: 22.1% +* macro_f1: 77.4% +epoch [10/50] batch [5/1000] time 1.532 (1.655) data 0.000 (0.167) loss 0.7480 (0.9463) acc 75.0000 (71.8750) lr 1.8763e-03 eta 18:51:05 +epoch [10/50] batch [10/1000] time 1.571 (1.606) data 0.000 (0.084) loss 0.7959 (1.0112) acc 75.0000 (71.8750) lr 1.8763e-03 eta 18:17:30 +epoch [10/50] batch [15/1000] time 1.542 (1.589) data 0.000 (0.056) loss 1.1914 (1.0839) acc 75.0000 (71.4583) lr 1.8763e-03 eta 18:05:37 +epoch [10/50] batch [20/1000] time 1.557 (1.582) data 0.000 (0.042) loss 1.1250 (1.1332) acc 78.1250 (72.0312) lr 1.8763e-03 eta 18:00:15 +epoch [10/50] batch [25/1000] time 1.559 (1.580) data 0.000 (0.034) loss 1.3271 (1.1643) acc 68.7500 (71.8750) lr 1.8763e-03 eta 17:58:50 +epoch [10/50] batch [30/1000] time 1.562 (1.577) data 0.000 (0.028) loss 1.0176 (1.1418) acc 65.6250 (71.2500) lr 1.8763e-03 eta 17:56:35 +epoch [10/50] batch [35/1000] time 1.586 (1.581) data 0.000 (0.024) loss 2.0742 (1.1635) acc 59.3750 (71.4286) lr 1.8763e-03 eta 17:59:22 +epoch [10/50] batch [40/1000] time 1.576 (1.579) data 0.000 (0.021) loss 1.1816 (1.1547) acc 78.1250 (72.0312) lr 1.8763e-03 eta 17:57:50 +epoch [10/50] batch [45/1000] time 1.587 (1.578) data 0.001 (0.019) loss 0.5630 (1.1352) acc 87.5000 (72.3611) lr 1.8763e-03 eta 17:56:55 +epoch [10/50] batch [50/1000] time 1.564 (1.576) data 0.001 (0.017) loss 1.3984 (1.1259) acc 65.6250 (72.5625) lr 1.8763e-03 eta 17:55:27 +epoch [10/50] batch [55/1000] time 1.585 (1.577) data 0.001 (0.016) loss 0.7456 (1.1396) acc 75.0000 (72.3864) lr 1.8763e-03 eta 17:55:53 +epoch [10/50] batch [60/1000] time 1.563 (1.575) data 0.001 (0.014) loss 0.6021 (1.1080) acc 81.2500 (72.8646) lr 1.8763e-03 eta 17:54:47 +epoch [10/50] batch [65/1000] time 1.570 (1.574) data 0.000 (0.013) loss 1.6787 (1.1366) acc 59.3750 (72.4519) lr 1.8763e-03 eta 17:54:00 +epoch [10/50] batch [70/1000] time 1.573 (1.574) data 0.001 (0.012) loss 1.1475 (1.1521) acc 71.8750 (72.1429) lr 1.8763e-03 eta 17:53:31 +epoch [10/50] batch [75/1000] time 1.561 (1.573) data 0.000 (0.012) loss 1.2373 (1.1595) acc 71.8750 (72.2500) lr 1.8763e-03 eta 17:52:43 +epoch [10/50] batch [80/1000] time 1.561 (1.572) data 0.001 (0.011) loss 0.8140 (1.1497) acc 78.1250 (72.4609) lr 1.8763e-03 eta 17:52:03 +epoch [10/50] batch [85/1000] time 1.565 (1.571) data 0.000 (0.010) loss 1.5781 (1.1626) acc 62.5000 (72.2426) lr 1.8763e-03 eta 17:51:15 +epoch [10/50] batch [90/1000] time 1.528 (1.570) data 0.000 (0.010) loss 1.1123 (1.1557) acc 65.6250 (72.1875) lr 1.8763e-03 eta 17:50:26 +epoch [10/50] batch [95/1000] time 1.545 (1.571) data 0.000 (0.009) loss 0.9429 (1.1544) acc 65.6250 (72.0066) lr 1.8763e-03 eta 17:51:20 +epoch [10/50] batch [100/1000] time 1.558 (1.571) data 0.000 (0.009) loss 1.0176 (1.1459) acc 78.1250 (71.9062) lr 1.8763e-03 eta 17:51:01 +epoch [10/50] batch [105/1000] time 1.571 (1.571) data 0.001 (0.008) loss 1.2412 (1.1441) acc 59.3750 (71.8155) lr 1.8763e-03 eta 17:50:45 +epoch [10/50] batch [110/1000] time 1.563 (1.571) data 0.000 (0.008) loss 1.0439 (1.1278) acc 71.8750 (71.8182) lr 1.8763e-03 eta 17:50:47 +epoch [10/50] batch [115/1000] time 1.569 (1.571) data 0.000 (0.008) loss 0.8394 (1.1285) acc 78.1250 (71.9022) lr 1.8763e-03 eta 17:50:33 +epoch [10/50] batch [120/1000] time 1.569 (1.570) data 0.000 (0.007) loss 0.7368 (1.1228) acc 78.1250 (72.0052) lr 1.8763e-03 eta 17:49:59 +epoch [10/50] batch [125/1000] time 1.570 (1.570) data 0.000 (0.007) loss 1.0166 (1.1274) acc 75.0000 (71.8750) lr 1.8763e-03 eta 17:49:30 +epoch [10/50] batch [130/1000] time 1.590 (1.570) data 0.001 (0.007) loss 0.8604 (1.1271) acc 81.2500 (71.7067) lr 1.8763e-03 eta 17:49:26 +epoch [10/50] batch [135/1000] time 1.592 (1.570) data 0.001 (0.007) loss 1.1055 (1.1265) acc 75.0000 (71.6667) lr 1.8763e-03 eta 17:49:12 +epoch [10/50] batch [140/1000] time 1.560 (1.571) data 0.000 (0.006) loss 0.9702 (1.1150) acc 78.1250 (72.0089) lr 1.8763e-03 eta 17:49:34 +epoch [10/50] batch [145/1000] time 1.567 (1.570) data 0.001 (0.006) loss 1.5557 (1.1182) acc 75.0000 (72.1336) lr 1.8763e-03 eta 17:49:04 +epoch [10/50] batch [150/1000] time 1.563 (1.570) data 0.000 (0.006) loss 1.1914 (1.1111) acc 71.8750 (72.3542) lr 1.8763e-03 eta 17:48:54 +epoch [10/50] batch [155/1000] time 1.567 (1.570) data 0.001 (0.006) loss 1.5996 (1.1178) acc 75.0000 (72.2782) lr 1.8763e-03 eta 17:48:47 +epoch [10/50] batch [160/1000] time 1.559 (1.570) data 0.000 (0.006) loss 1.2080 (1.1214) acc 68.7500 (72.2266) lr 1.8763e-03 eta 17:48:25 +epoch [10/50] batch [165/1000] time 1.575 (1.570) data 0.000 (0.006) loss 1.7031 (1.1204) acc 62.5000 (72.1970) lr 1.8763e-03 eta 17:48:19 +epoch [10/50] batch [170/1000] time 1.552 (1.569) data 0.000 (0.005) loss 0.9375 (1.1251) acc 78.1250 (72.0404) lr 1.8763e-03 eta 17:47:57 +epoch [10/50] batch [175/1000] time 1.560 (1.569) data 0.001 (0.005) loss 0.9209 (1.1229) acc 68.7500 (71.8750) lr 1.8763e-03 eta 17:47:30 +epoch [10/50] batch [180/1000] time 1.558 (1.569) data 0.000 (0.005) loss 1.4570 (1.1286) acc 65.6250 (71.6840) lr 1.8763e-03 eta 17:47:15 +epoch [10/50] batch [185/1000] time 1.549 (1.569) data 0.000 (0.005) loss 1.5010 (1.1279) acc 68.7500 (71.6385) lr 1.8763e-03 eta 17:47:00 +epoch [10/50] batch [190/1000] time 1.580 (1.568) data 0.001 (0.005) loss 0.6079 (1.1272) acc 78.1250 (71.7270) lr 1.8763e-03 eta 17:46:40 +epoch [10/50] batch [195/1000] time 1.561 (1.568) data 0.001 (0.005) loss 1.4404 (1.1303) acc 71.8750 (71.6346) lr 1.8763e-03 eta 17:46:10 +epoch [10/50] batch [200/1000] time 1.552 (1.568) data 0.000 (0.005) loss 1.1572 (1.1319) acc 78.1250 (71.5781) lr 1.8763e-03 eta 17:45:55 +epoch [10/50] batch [205/1000] time 1.531 (1.567) data 0.001 (0.005) loss 0.9575 (1.1301) acc 78.1250 (71.6311) lr 1.8763e-03 eta 17:45:30 +epoch [10/50] batch [210/1000] time 1.541 (1.567) data 0.001 (0.004) loss 1.0703 (1.1316) acc 75.0000 (71.5625) lr 1.8763e-03 eta 17:45:05 +epoch [10/50] batch [215/1000] time 1.574 (1.567) data 0.001 (0.004) loss 1.1943 (1.1336) acc 68.7500 (71.4535) lr 1.8763e-03 eta 17:44:51 +epoch [10/50] batch [220/1000] time 1.555 (1.566) data 0.000 (0.004) loss 0.6079 (1.1316) acc 84.3750 (71.4205) lr 1.8763e-03 eta 17:44:38 +epoch [10/50] batch [225/1000] time 1.574 (1.566) data 0.000 (0.004) loss 0.6704 (1.1318) acc 84.3750 (71.4444) lr 1.8763e-03 eta 17:44:32 +epoch [10/50] batch [230/1000] time 1.583 (1.567) data 0.000 (0.004) loss 1.1465 (1.1324) acc 75.0000 (71.4810) lr 1.8763e-03 eta 17:44:31 +epoch [10/50] batch [235/1000] time 1.571 (1.567) data 0.000 (0.004) loss 0.8628 (1.1332) acc 75.0000 (71.4096) lr 1.8763e-03 eta 17:44:22 +epoch [10/50] batch [240/1000] time 1.559 (1.566) data 0.001 (0.004) loss 1.2178 (1.1317) acc 71.8750 (71.4062) lr 1.8763e-03 eta 17:43:49 +epoch [10/50] batch [245/1000] time 1.553 (1.566) data 0.000 (0.004) loss 1.1699 (1.1302) acc 68.7500 (71.3776) lr 1.8763e-03 eta 17:43:48 +epoch [10/50] batch [250/1000] time 1.559 (1.566) data 0.000 (0.004) loss 1.3887 (1.1295) acc 68.7500 (71.4125) lr 1.8763e-03 eta 17:43:22 +epoch [10/50] batch [255/1000] time 1.593 (1.566) data 0.000 (0.004) loss 0.9771 (1.1304) acc 81.2500 (71.3480) lr 1.8763e-03 eta 17:43:16 +epoch [10/50] batch [260/1000] time 1.556 (1.566) data 0.000 (0.004) loss 0.8979 (1.1262) acc 68.7500 (71.4303) lr 1.8763e-03 eta 17:43:05 +epoch [10/50] batch [265/1000] time 1.565 (1.565) data 0.001 (0.004) loss 0.7393 (1.1263) acc 81.2500 (71.4505) lr 1.8763e-03 eta 17:42:46 +epoch [10/50] batch [270/1000] time 1.539 (1.565) data 0.000 (0.004) loss 0.9214 (1.1260) acc 75.0000 (71.4815) lr 1.8763e-03 eta 17:42:30 +epoch [10/50] batch [275/1000] time 1.583 (1.565) data 0.000 (0.003) loss 1.2490 (1.1259) acc 62.5000 (71.4545) lr 1.8763e-03 eta 17:42:23 +epoch [10/50] batch [280/1000] time 1.568 (1.565) data 0.000 (0.003) loss 1.5459 (1.1292) acc 62.5000 (71.3951) lr 1.8763e-03 eta 17:42:18 +epoch [10/50] batch [285/1000] time 1.586 (1.565) data 0.000 (0.003) loss 1.7051 (1.1315) acc 56.2500 (71.3816) lr 1.8763e-03 eta 17:42:12 +epoch [10/50] batch [290/1000] time 1.562 (1.566) data 0.000 (0.003) loss 0.9409 (1.1319) acc 81.2500 (71.4116) lr 1.8763e-03 eta 17:42:13 +epoch [10/50] batch [295/1000] time 1.559 (1.566) data 0.000 (0.003) loss 1.5186 (1.1343) acc 59.3750 (71.3136) lr 1.8763e-03 eta 17:42:05 +epoch [10/50] batch [300/1000] time 1.566 (1.565) data 0.000 (0.003) loss 0.9595 (1.1345) acc 75.0000 (71.3125) lr 1.8763e-03 eta 17:41:54 +epoch [10/50] batch [305/1000] time 1.564 (1.565) data 0.001 (0.003) loss 0.9399 (1.1363) acc 78.1250 (71.2295) lr 1.8763e-03 eta 17:41:39 +epoch [10/50] batch [310/1000] time 1.555 (1.565) data 0.000 (0.003) loss 1.0029 (1.1380) acc 71.8750 (71.1895) lr 1.8763e-03 eta 17:41:35 +epoch [10/50] batch [315/1000] time 1.582 (1.565) data 0.000 (0.003) loss 2.1348 (1.1435) acc 46.8750 (71.0714) lr 1.8763e-03 eta 17:41:25 +epoch [10/50] batch [320/1000] time 1.549 (1.565) data 0.000 (0.003) loss 0.6519 (1.1423) acc 71.8750 (71.0547) lr 1.8763e-03 eta 17:41:03 +epoch [10/50] batch [325/1000] time 1.551 (1.565) data 0.000 (0.003) loss 0.7803 (1.1396) acc 78.1250 (71.1058) lr 1.8763e-03 eta 17:40:44 +epoch [10/50] batch [330/1000] time 1.635 (1.565) data 0.000 (0.003) loss 1.0312 (1.1416) acc 75.0000 (71.1080) lr 1.8763e-03 eta 17:40:45 +epoch [10/50] batch [335/1000] time 1.569 (1.565) data 0.000 (0.003) loss 1.3125 (1.1404) acc 59.3750 (71.0914) lr 1.8763e-03 eta 17:40:34 +epoch [10/50] batch [340/1000] time 1.559 (1.565) data 0.000 (0.003) loss 0.9536 (1.1414) acc 78.1250 (71.1213) lr 1.8763e-03 eta 17:40:24 +epoch [10/50] batch [345/1000] time 1.566 (1.565) data 0.000 (0.003) loss 0.8813 (1.1404) acc 78.1250 (71.1866) lr 1.8763e-03 eta 17:40:10 +epoch [10/50] batch [350/1000] time 1.569 (1.565) data 0.000 (0.003) loss 2.0156 (1.1423) acc 62.5000 (71.1786) lr 1.8763e-03 eta 17:40:02 +epoch [10/50] batch [355/1000] time 1.562 (1.565) data 0.000 (0.003) loss 1.2295 (1.1421) acc 78.1250 (71.2676) lr 1.8763e-03 eta 17:39:53 +epoch [10/50] batch [360/1000] time 1.534 (1.565) data 0.000 (0.003) loss 0.9390 (1.1399) acc 75.0000 (71.2934) lr 1.8763e-03 eta 17:39:41 +epoch [10/50] batch [365/1000] time 1.576 (1.564) data 0.000 (0.003) loss 1.4033 (1.1401) acc 62.5000 (71.3442) lr 1.8763e-03 eta 17:39:27 +epoch [10/50] batch [370/1000] time 1.570 (1.564) data 0.000 (0.003) loss 1.5967 (1.1386) acc 62.5000 (71.3682) lr 1.8763e-03 eta 17:39:18 +epoch [10/50] batch [375/1000] time 1.567 (1.564) data 0.001 (0.003) loss 0.7046 (1.1395) acc 81.2500 (71.3917) lr 1.8763e-03 eta 17:39:06 +epoch [10/50] batch [380/1000] time 1.546 (1.564) data 0.000 (0.003) loss 0.7886 (1.1402) acc 71.8750 (71.3405) lr 1.8763e-03 eta 17:38:56 +epoch [10/50] batch [385/1000] time 1.558 (1.564) data 0.000 (0.003) loss 1.0186 (1.1386) acc 71.8750 (71.3636) lr 1.8763e-03 eta 17:38:48 +epoch [10/50] batch [390/1000] time 1.578 (1.564) data 0.000 (0.003) loss 1.3096 (1.1380) acc 71.8750 (71.4022) lr 1.8763e-03 eta 17:38:42 +epoch [10/50] batch [395/1000] time 1.548 (1.565) data 0.000 (0.003) loss 1.4209 (1.1380) acc 59.3750 (71.4003) lr 1.8763e-03 eta 17:38:48 +epoch [10/50] batch [400/1000] time 1.565 (1.565) data 0.001 (0.003) loss 0.7114 (1.1375) acc 78.1250 (71.4062) lr 1.8763e-03 eta 17:38:42 +epoch [10/50] batch [405/1000] time 1.567 (1.565) data 0.000 (0.003) loss 1.4502 (1.1366) acc 75.0000 (71.4429) lr 1.8763e-03 eta 17:38:35 +epoch [10/50] batch [410/1000] time 1.555 (1.565) data 0.000 (0.002) loss 1.4180 (1.1392) acc 68.7500 (71.4177) lr 1.8763e-03 eta 17:38:29 +epoch [10/50] batch [415/1000] time 1.546 (1.565) data 0.000 (0.002) loss 1.5703 (1.1415) acc 50.0000 (71.3630) lr 1.8763e-03 eta 17:38:20 +epoch [10/50] batch [420/1000] time 1.554 (1.565) data 0.000 (0.002) loss 0.4360 (1.1389) acc 84.3750 (71.4062) lr 1.8763e-03 eta 17:38:11 +epoch [10/50] batch [425/1000] time 1.563 (1.565) data 0.000 (0.002) loss 1.2568 (1.1409) acc 59.3750 (71.3382) lr 1.8763e-03 eta 17:38:00 +epoch [10/50] batch [430/1000] time 1.539 (1.564) data 0.000 (0.002) loss 1.4336 (1.1415) acc 59.3750 (71.3953) lr 1.8763e-03 eta 17:37:44 +epoch [10/50] batch [435/1000] time 1.570 (1.564) data 0.001 (0.002) loss 1.3701 (1.1430) acc 62.5000 (71.3649) lr 1.8763e-03 eta 17:37:42 +epoch [10/50] batch [440/1000] time 1.546 (1.565) data 0.001 (0.002) loss 1.0098 (1.1434) acc 81.2500 (71.3494) lr 1.8763e-03 eta 17:37:46 +epoch [10/50] batch [445/1000] time 1.547 (1.565) data 0.000 (0.002) loss 1.3506 (1.1438) acc 59.3750 (71.3202) lr 1.8763e-03 eta 17:37:34 +epoch [10/50] batch [450/1000] time 1.584 (1.565) data 0.000 (0.002) loss 1.0918 (1.1447) acc 68.7500 (71.3264) lr 1.8763e-03 eta 17:37:29 +epoch [10/50] batch [455/1000] time 1.577 (1.565) data 0.000 (0.002) loss 1.0967 (1.1439) acc 71.8750 (71.3462) lr 1.8763e-03 eta 17:37:23 +epoch [10/50] batch [460/1000] time 1.566 (1.565) data 0.001 (0.002) loss 0.9492 (1.1437) acc 71.8750 (71.3111) lr 1.8763e-03 eta 17:37:16 +epoch [10/50] batch [465/1000] time 1.560 (1.565) data 0.000 (0.002) loss 0.8584 (1.1431) acc 81.2500 (71.3306) lr 1.8763e-03 eta 17:37:09 +epoch [10/50] batch [470/1000] time 1.575 (1.565) data 0.000 (0.002) loss 0.8120 (1.1419) acc 71.8750 (71.3431) lr 1.8763e-03 eta 17:37:03 +epoch [10/50] batch [475/1000] time 1.563 (1.565) data 0.000 (0.002) loss 0.7290 (1.1420) acc 78.1250 (71.3487) lr 1.8763e-03 eta 17:36:51 +epoch [10/50] batch [480/1000] time 1.571 (1.565) data 0.000 (0.002) loss 1.1406 (1.1430) acc 71.8750 (71.3607) lr 1.8763e-03 eta 17:36:40 +epoch [10/50] batch [485/1000] time 1.549 (1.565) data 0.001 (0.002) loss 1.7969 (1.1429) acc 62.5000 (71.3402) lr 1.8763e-03 eta 17:36:41 +epoch [10/50] batch [490/1000] time 1.559 (1.565) data 0.001 (0.002) loss 1.5508 (1.1421) acc 65.6250 (71.3520) lr 1.8763e-03 eta 17:36:31 +epoch [10/50] batch [495/1000] time 1.550 (1.565) data 0.000 (0.002) loss 1.0215 (1.1390) acc 68.7500 (71.4078) lr 1.8763e-03 eta 17:36:19 +epoch [10/50] batch [500/1000] time 1.562 (1.565) data 0.001 (0.002) loss 1.0039 (1.1389) acc 65.6250 (71.3875) lr 1.8763e-03 eta 17:36:13 +epoch [10/50] batch [505/1000] time 1.575 (1.565) data 0.001 (0.002) loss 0.5864 (1.1369) acc 84.3750 (71.4418) lr 1.8763e-03 eta 17:36:03 +epoch [10/50] batch [510/1000] time 1.552 (1.565) data 0.000 (0.002) loss 1.1670 (1.1386) acc 65.6250 (71.4216) lr 1.8763e-03 eta 17:35:49 +epoch [10/50] batch [515/1000] time 1.562 (1.565) data 0.000 (0.002) loss 1.0566 (1.1361) acc 81.2500 (71.4745) lr 1.8763e-03 eta 17:35:39 +epoch [10/50] batch [520/1000] time 1.554 (1.565) data 0.001 (0.002) loss 0.4524 (1.1366) acc 84.3750 (71.4543) lr 1.8763e-03 eta 17:35:31 +epoch [10/50] batch [525/1000] time 1.555 (1.564) data 0.001 (0.002) loss 1.2607 (1.1370) acc 71.8750 (71.4345) lr 1.8763e-03 eta 17:35:21 +epoch [10/50] batch [530/1000] time 1.567 (1.565) data 0.001 (0.002) loss 1.5420 (1.1373) acc 65.6250 (71.4446) lr 1.8763e-03 eta 17:35:17 +epoch [10/50] batch [535/1000] time 1.560 (1.565) data 0.000 (0.002) loss 1.0693 (1.1359) acc 75.0000 (71.4428) lr 1.8763e-03 eta 17:35:11 +epoch [10/50] batch [540/1000] time 1.527 (1.565) data 0.001 (0.002) loss 1.0850 (1.1367) acc 71.8750 (71.4410) lr 1.8763e-03 eta 17:35:03 +epoch [10/50] batch [545/1000] time 1.559 (1.565) data 0.000 (0.002) loss 1.1396 (1.1359) acc 71.8750 (71.4851) lr 1.8763e-03 eta 17:35:05 +epoch [10/50] batch [550/1000] time 1.566 (1.565) data 0.000 (0.002) loss 1.0449 (1.1342) acc 71.8750 (71.4830) lr 1.8763e-03 eta 17:34:57 +epoch [10/50] batch [555/1000] time 1.555 (1.565) data 0.000 (0.002) loss 1.6221 (1.1354) acc 65.6250 (71.4809) lr 1.8763e-03 eta 17:34:50 +epoch [10/50] batch [560/1000] time 1.557 (1.565) data 0.000 (0.002) loss 0.9038 (1.1361) acc 65.6250 (71.4788) lr 1.8763e-03 eta 17:34:41 +epoch [10/50] batch [565/1000] time 1.560 (1.565) data 0.000 (0.002) loss 1.2871 (1.1366) acc 71.8750 (71.4823) lr 1.8763e-03 eta 17:34:28 +epoch [10/50] batch [570/1000] time 1.553 (1.565) data 0.000 (0.002) loss 1.2607 (1.1357) acc 75.0000 (71.4912) lr 1.8763e-03 eta 17:34:15 +epoch [10/50] batch [575/1000] time 1.568 (1.565) data 0.000 (0.002) loss 0.8223 (1.1359) acc 78.1250 (71.4783) lr 1.8763e-03 eta 17:34:05 +epoch [10/50] batch [580/1000] time 1.596 (1.565) data 0.000 (0.002) loss 1.0205 (1.1357) acc 78.1250 (71.4709) lr 1.8763e-03 eta 17:34:01 +epoch [10/50] batch [585/1000] time 1.569 (1.565) data 0.000 (0.002) loss 0.8701 (1.1352) acc 78.1250 (71.4583) lr 1.8763e-03 eta 17:33:51 +epoch [10/50] batch [590/1000] time 1.550 (1.565) data 0.000 (0.002) loss 0.8774 (1.1339) acc 78.1250 (71.5095) lr 1.8763e-03 eta 17:33:57 +epoch [10/50] batch [595/1000] time 1.566 (1.565) data 0.001 (0.002) loss 0.8184 (1.1336) acc 81.2500 (71.5074) lr 1.8763e-03 eta 17:33:49 +epoch [10/50] batch [600/1000] time 1.567 (1.565) data 0.001 (0.002) loss 1.2217 (1.1334) acc 75.0000 (71.5312) lr 1.8763e-03 eta 17:33:38 +epoch [10/50] batch [605/1000] time 1.571 (1.565) data 0.001 (0.002) loss 0.6431 (1.1344) acc 81.2500 (71.5083) lr 1.8763e-03 eta 17:33:31 +epoch [10/50] batch [610/1000] time 1.547 (1.565) data 0.000 (0.002) loss 1.2041 (1.1344) acc 75.0000 (71.5164) lr 1.8763e-03 eta 17:33:21 +epoch [10/50] batch [615/1000] time 1.554 (1.565) data 0.000 (0.002) loss 1.3516 (1.1352) acc 68.7500 (71.4939) lr 1.8763e-03 eta 17:33:10 +epoch [10/50] batch [620/1000] time 1.577 (1.565) data 0.001 (0.002) loss 1.2383 (1.1356) acc 68.7500 (71.4869) lr 1.8763e-03 eta 17:33:03 +epoch [10/50] batch [625/1000] time 1.574 (1.565) data 0.000 (0.002) loss 2.1016 (1.1356) acc 56.2500 (71.4950) lr 1.8763e-03 eta 17:32:58 +epoch [10/50] batch [630/1000] time 1.550 (1.565) data 0.001 (0.002) loss 1.4492 (1.1343) acc 68.7500 (71.5030) lr 1.8763e-03 eta 17:32:47 +epoch [10/50] batch [635/1000] time 1.583 (1.565) data 0.000 (0.002) loss 1.6270 (1.1355) acc 62.5000 (71.4715) lr 1.8763e-03 eta 17:32:51 +epoch [10/50] batch [640/1000] time 1.588 (1.565) data 0.000 (0.002) loss 0.7422 (1.1364) acc 78.1250 (71.4404) lr 1.8763e-03 eta 17:32:50 +epoch [10/50] batch [645/1000] time 1.561 (1.565) data 0.000 (0.002) loss 1.3916 (1.1365) acc 59.3750 (71.4341) lr 1.8763e-03 eta 17:32:43 +epoch [10/50] batch [650/1000] time 1.552 (1.565) data 0.000 (0.002) loss 1.3516 (1.1366) acc 65.6250 (71.4087) lr 1.8763e-03 eta 17:32:30 +epoch [10/50] batch [655/1000] time 1.575 (1.565) data 0.000 (0.002) loss 0.8931 (1.1343) acc 78.1250 (71.4408) lr 1.8763e-03 eta 17:32:22 +epoch [10/50] batch [660/1000] time 1.572 (1.565) data 0.000 (0.002) loss 1.0400 (1.1334) acc 65.6250 (71.4441) lr 1.8763e-03 eta 17:32:12 +epoch [10/50] batch [665/1000] time 1.582 (1.565) data 0.000 (0.002) loss 0.8013 (1.1323) acc 78.1250 (71.4662) lr 1.8763e-03 eta 17:32:05 +epoch [10/50] batch [670/1000] time 1.551 (1.565) data 0.000 (0.002) loss 1.5518 (1.1337) acc 65.6250 (71.4412) lr 1.8763e-03 eta 17:31:53 +epoch [10/50] batch [675/1000] time 1.543 (1.565) data 0.000 (0.002) loss 1.0703 (1.1333) acc 75.0000 (71.4398) lr 1.8763e-03 eta 17:31:41 +epoch [10/50] batch [680/1000] time 1.546 (1.565) data 0.000 (0.002) loss 1.3135 (1.1357) acc 59.3750 (71.3787) lr 1.8763e-03 eta 17:31:31 +epoch [10/50] batch [685/1000] time 1.567 (1.565) data 0.000 (0.002) loss 0.5522 (1.1335) acc 87.5000 (71.4325) lr 1.8763e-03 eta 17:31:20 +epoch [10/50] batch [690/1000] time 1.550 (1.565) data 0.000 (0.002) loss 1.2686 (1.1339) acc 68.7500 (71.4402) lr 1.8763e-03 eta 17:31:10 +epoch [10/50] batch [695/1000] time 1.717 (1.565) data 0.000 (0.002) loss 0.9414 (1.1343) acc 75.0000 (71.4119) lr 1.8763e-03 eta 17:31:08 +epoch [10/50] batch [700/1000] time 1.533 (1.565) data 0.000 (0.002) loss 0.7168 (1.1337) acc 78.1250 (71.4330) lr 1.8763e-03 eta 17:30:58 +epoch [10/50] batch [705/1000] time 1.558 (1.565) data 0.001 (0.002) loss 1.0586 (1.1341) acc 78.1250 (71.4450) lr 1.8763e-03 eta 17:30:47 +epoch [10/50] batch [710/1000] time 1.572 (1.565) data 0.000 (0.002) loss 1.8486 (1.1354) acc 50.0000 (71.3908) lr 1.8763e-03 eta 17:30:41 +epoch [10/50] batch [715/1000] time 1.555 (1.565) data 0.000 (0.002) loss 1.4160 (1.1350) acc 68.7500 (71.4030) lr 1.8763e-03 eta 17:30:33 +epoch [10/50] batch [720/1000] time 1.555 (1.565) data 0.001 (0.002) loss 1.3066 (1.1356) acc 65.6250 (71.4106) lr 1.8763e-03 eta 17:30:25 +epoch [10/50] batch [725/1000] time 1.590 (1.565) data 0.000 (0.002) loss 0.7129 (1.1357) acc 81.2500 (71.3750) lr 1.8763e-03 eta 17:30:16 +epoch [10/50] batch [730/1000] time 1.553 (1.565) data 0.001 (0.002) loss 0.7788 (1.1342) acc 81.2500 (71.3870) lr 1.8763e-03 eta 17:30:03 +epoch [10/50] batch [735/1000] time 1.541 (1.565) data 0.001 (0.002) loss 1.2607 (1.1354) acc 78.1250 (71.3818) lr 1.8763e-03 eta 17:29:55 +epoch [10/50] batch [740/1000] time 1.706 (1.565) data 0.001 (0.002) loss 1.1426 (1.1346) acc 62.5000 (71.3767) lr 1.8763e-03 eta 17:29:53 +epoch [10/50] batch [745/1000] time 1.551 (1.565) data 0.000 (0.002) loss 1.7676 (1.1377) acc 56.2500 (71.3297) lr 1.8763e-03 eta 17:29:43 +epoch [10/50] batch [750/1000] time 1.552 (1.565) data 0.000 (0.002) loss 0.9023 (1.1366) acc 78.1250 (71.3250) lr 1.8763e-03 eta 17:29:34 +epoch [10/50] batch [755/1000] time 1.564 (1.565) data 0.000 (0.002) loss 0.6499 (1.1368) acc 78.1250 (71.3204) lr 1.8763e-03 eta 17:29:27 +epoch [10/50] batch [760/1000] time 1.538 (1.564) data 0.000 (0.002) loss 1.0547 (1.1357) acc 68.7500 (71.3322) lr 1.8763e-03 eta 17:29:15 +epoch [10/50] batch [765/1000] time 1.553 (1.564) data 0.000 (0.002) loss 0.8594 (1.1360) acc 71.8750 (71.3154) lr 1.8763e-03 eta 17:29:01 +epoch [10/50] batch [770/1000] time 1.549 (1.564) data 0.000 (0.002) loss 2.0996 (1.1364) acc 62.5000 (71.3352) lr 1.8763e-03 eta 17:28:52 +epoch [10/50] batch [775/1000] time 1.563 (1.564) data 0.001 (0.002) loss 1.3477 (1.1358) acc 68.7500 (71.3185) lr 1.8763e-03 eta 17:28:43 +epoch [10/50] batch [780/1000] time 1.578 (1.564) data 0.001 (0.002) loss 0.8755 (1.1352) acc 71.8750 (71.3502) lr 1.8763e-03 eta 17:28:34 +epoch [10/50] batch [785/1000] time 1.559 (1.564) data 0.001 (0.002) loss 0.8989 (1.1335) acc 78.1250 (71.4172) lr 1.8763e-03 eta 17:28:32 +epoch [10/50] batch [790/1000] time 1.582 (1.564) data 0.000 (0.002) loss 1.2363 (1.1333) acc 59.3750 (71.4003) lr 1.8763e-03 eta 17:28:26 +epoch [10/50] batch [795/1000] time 1.569 (1.564) data 0.000 (0.002) loss 0.9873 (1.1343) acc 71.8750 (71.3640) lr 1.8763e-03 eta 17:28:14 +epoch [10/50] batch [800/1000] time 1.562 (1.564) data 0.000 (0.002) loss 0.9243 (1.1343) acc 75.0000 (71.3555) lr 1.8763e-03 eta 17:28:08 +epoch [10/50] batch [805/1000] time 1.573 (1.564) data 0.000 (0.002) loss 1.6982 (1.1368) acc 62.5000 (71.2927) lr 1.8763e-03 eta 17:28:00 +epoch [10/50] batch [810/1000] time 1.559 (1.564) data 0.000 (0.001) loss 0.6353 (1.1378) acc 84.3750 (71.2770) lr 1.8763e-03 eta 17:27:49 +epoch [10/50] batch [815/1000] time 1.608 (1.564) data 0.000 (0.001) loss 1.0312 (1.1383) acc 71.8750 (71.2615) lr 1.8763e-03 eta 17:27:46 +epoch [10/50] batch [820/1000] time 1.561 (1.564) data 0.001 (0.001) loss 1.0049 (1.1378) acc 84.3750 (71.2767) lr 1.8763e-03 eta 17:27:38 +epoch [10/50] batch [825/1000] time 1.585 (1.564) data 0.001 (0.001) loss 1.8838 (1.1386) acc 62.5000 (71.2652) lr 1.8763e-03 eta 17:27:32 +epoch [10/50] batch [830/1000] time 1.577 (1.565) data 0.000 (0.001) loss 1.1084 (1.1403) acc 75.0000 (71.2199) lr 1.8763e-03 eta 17:27:26 +epoch [10/50] batch [835/1000] time 1.601 (1.565) data 0.000 (0.001) loss 1.6934 (1.1407) acc 56.2500 (71.1976) lr 1.8763e-03 eta 17:27:20 +epoch [10/50] batch [840/1000] time 1.565 (1.565) data 0.000 (0.001) loss 1.6660 (1.1395) acc 62.5000 (71.2202) lr 1.8763e-03 eta 17:27:13 +epoch [10/50] batch [845/1000] time 1.568 (1.565) data 0.000 (0.001) loss 0.9414 (1.1401) acc 68.7500 (71.1908) lr 1.8763e-03 eta 17:27:03 +epoch [10/50] batch [850/1000] time 1.557 (1.565) data 0.000 (0.001) loss 0.9507 (1.1403) acc 71.8750 (71.1912) lr 1.8763e-03 eta 17:27:02 +epoch [10/50] batch [855/1000] time 1.566 (1.565) data 0.000 (0.001) loss 1.3789 (1.1401) acc 65.6250 (71.1879) lr 1.8763e-03 eta 17:26:57 +epoch [10/50] batch [860/1000] time 1.566 (1.565) data 0.001 (0.001) loss 1.1572 (1.1405) acc 59.3750 (71.1737) lr 1.8763e-03 eta 17:26:48 +epoch [10/50] batch [865/1000] time 1.554 (1.565) data 0.000 (0.001) loss 0.3577 (1.1394) acc 90.6250 (71.1886) lr 1.8763e-03 eta 17:26:38 +epoch [10/50] batch [870/1000] time 1.548 (1.565) data 0.001 (0.001) loss 0.9727 (1.1398) acc 78.1250 (71.1961) lr 1.8763e-03 eta 17:26:29 +epoch [10/50] batch [875/1000] time 1.556 (1.565) data 0.000 (0.001) loss 2.2559 (1.1413) acc 40.6250 (71.1643) lr 1.8763e-03 eta 17:26:19 +epoch [10/50] batch [880/1000] time 1.554 (1.565) data 0.000 (0.001) loss 1.4658 (1.1428) acc 65.6250 (71.1328) lr 1.8763e-03 eta 17:26:13 +epoch [10/50] batch [885/1000] time 1.557 (1.565) data 0.000 (0.001) loss 0.7871 (1.1425) acc 75.0000 (71.1370) lr 1.8763e-03 eta 17:26:03 +epoch [10/50] batch [890/1000] time 1.563 (1.565) data 0.000 (0.001) loss 1.1328 (1.1417) acc 75.0000 (71.1692) lr 1.8763e-03 eta 17:25:54 +epoch [10/50] batch [895/1000] time 1.547 (1.565) data 0.001 (0.001) loss 1.6729 (1.1433) acc 65.6250 (71.1522) lr 1.8763e-03 eta 17:25:55 +epoch [10/50] batch [900/1000] time 1.582 (1.565) data 0.001 (0.001) loss 1.4717 (1.1431) acc 71.8750 (71.1528) lr 1.8763e-03 eta 17:25:47 +epoch [10/50] batch [905/1000] time 1.567 (1.565) data 0.001 (0.001) loss 1.4824 (1.1430) acc 71.8750 (71.1602) lr 1.8763e-03 eta 17:25:39 +epoch [10/50] batch [910/1000] time 1.540 (1.565) data 0.000 (0.001) loss 0.9043 (1.1430) acc 71.8750 (71.1538) lr 1.8763e-03 eta 17:25:30 +epoch [10/50] batch [915/1000] time 1.556 (1.565) data 0.000 (0.001) loss 1.5977 (1.1435) acc 53.1250 (71.1270) lr 1.8763e-03 eta 17:25:21 +epoch [10/50] batch [920/1000] time 1.555 (1.565) data 0.000 (0.001) loss 1.0293 (1.1430) acc 68.7500 (71.1345) lr 1.8763e-03 eta 17:25:14 +epoch [10/50] batch [925/1000] time 1.529 (1.565) data 0.000 (0.001) loss 1.1426 (1.1435) acc 62.5000 (71.1149) lr 1.8763e-03 eta 17:25:06 +epoch [10/50] batch [930/1000] time 1.564 (1.565) data 0.000 (0.001) loss 1.5918 (1.1438) acc 59.3750 (71.0988) lr 1.8763e-03 eta 17:24:59 +epoch [10/50] batch [935/1000] time 1.556 (1.565) data 0.000 (0.001) loss 0.7583 (1.1431) acc 75.0000 (71.1130) lr 1.8763e-03 eta 17:24:56 +epoch [10/50] batch [940/1000] time 1.557 (1.565) data 0.000 (0.001) loss 1.5166 (1.1435) acc 59.3750 (71.1004) lr 1.8763e-03 eta 17:24:48 +epoch [10/50] batch [945/1000] time 1.524 (1.565) data 0.000 (0.001) loss 0.8218 (1.1440) acc 75.0000 (71.1045) lr 1.8763e-03 eta 17:24:37 +epoch [10/50] batch [950/1000] time 1.537 (1.565) data 0.000 (0.001) loss 1.9297 (1.1451) acc 59.3750 (71.0954) lr 1.8763e-03 eta 17:24:23 +epoch [10/50] batch [955/1000] time 1.546 (1.565) data 0.001 (0.001) loss 1.0068 (1.1459) acc 75.0000 (71.0798) lr 1.8763e-03 eta 17:24:13 +epoch [10/50] batch [960/1000] time 1.574 (1.565) data 0.001 (0.001) loss 1.7451 (1.1482) acc 56.2500 (71.0286) lr 1.8763e-03 eta 17:24:05 +epoch [10/50] batch [965/1000] time 1.539 (1.565) data 0.000 (0.001) loss 0.9658 (1.1478) acc 75.0000 (71.0233) lr 1.8763e-03 eta 17:23:56 +epoch [10/50] batch [970/1000] time 1.537 (1.564) data 0.000 (0.001) loss 1.1377 (1.1482) acc 65.6250 (71.0213) lr 1.8763e-03 eta 17:23:44 +epoch [10/50] batch [975/1000] time 1.561 (1.564) data 0.001 (0.001) loss 1.9053 (1.1495) acc 59.3750 (70.9968) lr 1.8763e-03 eta 17:23:35 +epoch [10/50] batch [980/1000] time 1.542 (1.564) data 0.001 (0.001) loss 1.1963 (1.1492) acc 78.1250 (70.9981) lr 1.8763e-03 eta 17:23:27 +epoch [10/50] batch [985/1000] time 1.550 (1.564) data 0.001 (0.001) loss 1.2676 (1.1499) acc 65.6250 (70.9772) lr 1.8763e-03 eta 17:23:19 +epoch [10/50] batch [990/1000] time 1.552 (1.564) data 0.000 (0.001) loss 0.8218 (1.1497) acc 81.2500 (71.0006) lr 1.8763e-03 eta 17:23:11 +epoch [10/50] batch [995/1000] time 1.551 (1.564) data 0.000 (0.001) loss 1.1152 (1.1493) acc 71.8750 (71.0082) lr 1.8763e-03 eta 17:23:01 +epoch [10/50] batch [1000/1000] time 1.557 (1.564) data 0.000 (0.001) loss 1.5801 (1.1496) acc 65.6250 (71.0031) lr 1.8443e-03 eta 17:22:58 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,136 +* accuracy: 78.3% +* error: 21.7% +* macro_f1: 77.7% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [11/50] batch [5/1000] time 1.544 (1.690) data 0.000 (0.190) loss 1.4355 (1.0134) acc 65.6250 (73.7500) lr 1.8443e-03 eta 18:46:14 +epoch [11/50] batch [10/1000] time 1.565 (1.626) data 0.001 (0.095) loss 0.9097 (0.9962) acc 78.1250 (75.3125) lr 1.8443e-03 eta 18:03:45 +epoch [11/50] batch [15/1000] time 1.559 (1.603) data 0.000 (0.064) loss 0.5117 (1.0487) acc 84.3750 (73.9583) lr 1.8443e-03 eta 17:48:34 +epoch [11/50] batch [20/1000] time 1.566 (1.605) data 0.000 (0.048) loss 1.3994 (1.1441) acc 68.7500 (71.5625) lr 1.8443e-03 eta 17:49:28 +epoch [11/50] batch [25/1000] time 1.570 (1.599) data 0.000 (0.038) loss 0.8037 (1.1495) acc 84.3750 (72.3750) lr 1.8443e-03 eta 17:45:33 +epoch [11/50] batch [30/1000] time 1.561 (1.592) data 0.001 (0.032) loss 1.1074 (1.1664) acc 78.1250 (72.6042) lr 1.8443e-03 eta 17:40:35 +epoch [11/50] batch [35/1000] time 1.550 (1.586) data 0.000 (0.028) loss 1.1318 (1.1429) acc 59.3750 (72.1429) lr 1.8443e-03 eta 17:36:23 +epoch [11/50] batch [40/1000] time 1.552 (1.582) data 0.000 (0.024) loss 1.0195 (1.1484) acc 75.0000 (71.9531) lr 1.8443e-03 eta 17:33:25 +epoch [11/50] batch [45/1000] time 1.581 (1.579) data 0.000 (0.022) loss 1.3232 (1.1437) acc 65.6250 (72.2917) lr 1.8443e-03 eta 17:31:38 +epoch [11/50] batch [50/1000] time 1.557 (1.577) data 0.001 (0.020) loss 1.2979 (1.1619) acc 71.8750 (71.9375) lr 1.8443e-03 eta 17:29:54 +epoch [11/50] batch [55/1000] time 1.565 (1.575) data 0.000 (0.018) loss 0.7002 (1.1377) acc 87.5000 (72.3864) lr 1.8443e-03 eta 17:28:21 +epoch [11/50] batch [60/1000] time 1.535 (1.575) data 0.000 (0.016) loss 1.2012 (1.1523) acc 75.0000 (71.9792) lr 1.8443e-03 eta 17:28:06 +epoch [11/50] batch [65/1000] time 1.557 (1.573) data 0.000 (0.015) loss 1.2617 (1.1775) acc 68.7500 (71.1058) lr 1.8443e-03 eta 17:26:42 +epoch [11/50] batch [70/1000] time 1.541 (1.571) data 0.000 (0.014) loss 0.9927 (1.1765) acc 78.1250 (71.1161) lr 1.8443e-03 eta 17:25:28 +epoch [11/50] batch [75/1000] time 1.541 (1.570) data 0.001 (0.013) loss 0.8696 (1.1645) acc 75.0000 (71.3333) lr 1.8443e-03 eta 17:24:34 +epoch [11/50] batch [80/1000] time 1.545 (1.568) data 0.001 (0.012) loss 1.5352 (1.1644) acc 68.7500 (71.4062) lr 1.8443e-03 eta 17:23:26 +epoch [11/50] batch [85/1000] time 1.545 (1.568) data 0.000 (0.012) loss 1.8213 (1.1773) acc 50.0000 (71.0294) lr 1.8443e-03 eta 17:22:59 +epoch [11/50] batch [90/1000] time 1.555 (1.567) data 0.000 (0.011) loss 0.8535 (1.1702) acc 78.1250 (71.1458) lr 1.8443e-03 eta 17:22:37 +epoch [11/50] batch [95/1000] time 1.535 (1.567) data 0.000 (0.011) loss 1.3701 (1.1779) acc 59.3750 (70.8553) lr 1.8443e-03 eta 17:22:18 +epoch [11/50] batch [100/1000] time 1.585 (1.568) data 0.001 (0.010) loss 1.3271 (1.1796) acc 65.6250 (70.9062) lr 1.8443e-03 eta 17:22:24 +epoch [11/50] batch [105/1000] time 1.554 (1.568) data 0.000 (0.010) loss 1.6250 (1.1791) acc 71.8750 (71.0119) lr 1.8443e-03 eta 17:22:17 +epoch [11/50] batch [110/1000] time 1.566 (1.567) data 0.001 (0.009) loss 1.0205 (1.1819) acc 78.1250 (70.9943) lr 1.8443e-03 eta 17:21:52 +epoch [11/50] batch [115/1000] time 1.558 (1.567) data 0.001 (0.009) loss 0.8188 (1.1714) acc 81.2500 (71.1685) lr 1.8443e-03 eta 17:21:40 +epoch [11/50] batch [120/1000] time 1.592 (1.569) data 0.000 (0.008) loss 1.0869 (1.1717) acc 65.6250 (71.1979) lr 1.8443e-03 eta 17:22:36 +epoch [11/50] batch [125/1000] time 1.575 (1.568) data 0.000 (0.008) loss 0.9170 (1.1627) acc 78.1250 (71.3500) lr 1.8443e-03 eta 17:22:21 +epoch [11/50] batch [130/1000] time 1.599 (1.568) data 0.000 (0.008) loss 1.1641 (1.1625) acc 62.5000 (71.3462) lr 1.8443e-03 eta 17:22:08 +epoch [11/50] batch [135/1000] time 1.548 (1.568) data 0.000 (0.008) loss 1.4199 (1.1580) acc 68.7500 (71.2500) lr 1.8443e-03 eta 17:21:38 +epoch [11/50] batch [140/1000] time 1.555 (1.568) data 0.000 (0.007) loss 1.2471 (1.1574) acc 59.3750 (71.1384) lr 1.8443e-03 eta 17:21:25 +epoch [11/50] batch [145/1000] time 1.577 (1.567) data 0.000 (0.007) loss 1.2246 (1.1538) acc 81.2500 (71.2931) lr 1.8443e-03 eta 17:21:07 +epoch [11/50] batch [150/1000] time 1.577 (1.567) data 0.000 (0.007) loss 1.2402 (1.1522) acc 68.7500 (71.3750) lr 1.8443e-03 eta 17:20:56 +epoch [11/50] batch [155/1000] time 1.554 (1.567) data 0.001 (0.007) loss 1.2461 (1.1526) acc 68.7500 (71.4315) lr 1.8443e-03 eta 17:20:38 +epoch [11/50] batch [160/1000] time 1.568 (1.567) data 0.001 (0.006) loss 0.7764 (1.1499) acc 84.3750 (71.5234) lr 1.8443e-03 eta 17:20:37 +epoch [11/50] batch [165/1000] time 1.556 (1.568) data 0.000 (0.006) loss 1.2676 (1.1512) acc 65.6250 (71.3826) lr 1.8443e-03 eta 17:20:56 +epoch [11/50] batch [170/1000] time 1.572 (1.567) data 0.001 (0.006) loss 1.2734 (1.1506) acc 65.6250 (71.3051) lr 1.8443e-03 eta 17:20:29 +epoch [11/50] batch [175/1000] time 1.546 (1.567) data 0.000 (0.006) loss 0.9238 (1.1495) acc 78.1250 (71.3214) lr 1.8443e-03 eta 17:20:07 +epoch [11/50] batch [180/1000] time 1.561 (1.567) data 0.000 (0.006) loss 1.2764 (1.1451) acc 59.3750 (71.3194) lr 1.8443e-03 eta 17:19:46 +epoch [11/50] batch [185/1000] time 1.556 (1.567) data 0.000 (0.006) loss 1.2305 (1.1421) acc 75.0000 (71.3851) lr 1.8443e-03 eta 17:19:31 +epoch [11/50] batch [190/1000] time 1.543 (1.566) data 0.000 (0.005) loss 0.5811 (1.1426) acc 90.6250 (71.4145) lr 1.8443e-03 eta 17:19:14 +epoch [11/50] batch [195/1000] time 1.581 (1.566) data 0.000 (0.005) loss 1.4873 (1.1441) acc 62.5000 (71.3462) lr 1.8443e-03 eta 17:19:02 +epoch [11/50] batch [200/1000] time 1.545 (1.567) data 0.000 (0.005) loss 1.3291 (1.1488) acc 71.8750 (71.2656) lr 1.8443e-03 eta 17:19:10 +epoch [11/50] batch [205/1000] time 1.566 (1.566) data 0.000 (0.005) loss 0.8906 (1.1442) acc 62.5000 (71.2805) lr 1.8443e-03 eta 17:18:58 +epoch [11/50] batch [210/1000] time 1.562 (1.567) data 0.000 (0.005) loss 1.5654 (1.1452) acc 56.2500 (71.2202) lr 1.8443e-03 eta 17:19:06 +epoch [11/50] batch [215/1000] time 1.558 (1.567) data 0.000 (0.005) loss 1.0195 (1.1356) acc 68.7500 (71.3372) lr 1.8443e-03 eta 17:18:46 +epoch [11/50] batch [220/1000] time 1.555 (1.567) data 0.000 (0.005) loss 2.0059 (1.1324) acc 62.5000 (71.4347) lr 1.8443e-03 eta 17:18:36 +epoch [11/50] batch [225/1000] time 1.563 (1.566) data 0.000 (0.005) loss 0.9912 (1.1333) acc 78.1250 (71.3611) lr 1.8443e-03 eta 17:18:23 +epoch [11/50] batch [230/1000] time 1.569 (1.567) data 0.000 (0.005) loss 1.0791 (1.1374) acc 71.8750 (71.2636) lr 1.8443e-03 eta 17:18:20 +epoch [11/50] batch [235/1000] time 1.563 (1.567) data 0.000 (0.005) loss 1.8018 (1.1362) acc 62.5000 (71.2766) lr 1.8443e-03 eta 17:18:19 +epoch [11/50] batch [240/1000] time 1.563 (1.567) data 0.000 (0.004) loss 1.0811 (1.1372) acc 68.7500 (71.2760) lr 1.8443e-03 eta 17:18:12 +epoch [11/50] batch [245/1000] time 1.572 (1.567) data 0.000 (0.004) loss 1.1924 (1.1367) acc 71.8750 (71.2628) lr 1.8443e-03 eta 17:18:01 +epoch [11/50] batch [250/1000] time 1.555 (1.567) data 0.000 (0.004) loss 1.7158 (1.1410) acc 65.6250 (71.1500) lr 1.8443e-03 eta 17:17:54 +epoch [11/50] batch [255/1000] time 1.568 (1.567) data 0.000 (0.004) loss 0.7524 (1.1403) acc 81.2500 (71.1887) lr 1.8443e-03 eta 17:17:44 +epoch [11/50] batch [260/1000] time 1.558 (1.566) data 0.000 (0.004) loss 1.0840 (1.1404) acc 71.8750 (71.1538) lr 1.8443e-03 eta 17:17:29 +epoch [11/50] batch [265/1000] time 1.560 (1.566) data 0.000 (0.004) loss 1.0977 (1.1418) acc 78.1250 (71.1321) lr 1.8443e-03 eta 17:17:17 +epoch [11/50] batch [270/1000] time 1.711 (1.567) data 0.000 (0.004) loss 0.8916 (1.1392) acc 75.0000 (71.1690) lr 1.8443e-03 eta 17:17:31 +epoch [11/50] batch [275/1000] time 1.543 (1.567) data 0.000 (0.004) loss 1.1416 (1.1399) acc 68.7500 (71.1250) lr 1.8443e-03 eta 17:17:18 +epoch [11/50] batch [280/1000] time 1.573 (1.567) data 0.000 (0.004) loss 1.7588 (1.1412) acc 59.3750 (71.1384) lr 1.8443e-03 eta 17:17:03 +epoch [11/50] batch [285/1000] time 1.530 (1.567) data 0.000 (0.004) loss 1.5586 (1.1432) acc 59.3750 (71.1184) lr 1.8443e-03 eta 17:16:55 +epoch [11/50] batch [290/1000] time 1.569 (1.566) data 0.000 (0.004) loss 0.5366 (1.1371) acc 84.3750 (71.2823) lr 1.8443e-03 eta 17:16:40 +epoch [11/50] batch [295/1000] time 1.545 (1.566) data 0.000 (0.004) loss 1.3467 (1.1367) acc 62.5000 (71.2606) lr 1.8443e-03 eta 17:16:21 +epoch [11/50] batch [300/1000] time 1.580 (1.566) data 0.000 (0.004) loss 0.9468 (1.1395) acc 75.0000 (71.1875) lr 1.8443e-03 eta 17:16:05 +epoch [11/50] batch [305/1000] time 1.548 (1.566) data 0.001 (0.004) loss 1.0039 (1.1371) acc 75.0000 (71.2398) lr 1.8443e-03 eta 17:15:50 +epoch [11/50] batch [310/1000] time 1.538 (1.565) data 0.000 (0.004) loss 1.0596 (1.1389) acc 75.0000 (71.1996) lr 1.8443e-03 eta 17:15:33 +epoch [11/50] batch [315/1000] time 1.722 (1.566) data 0.000 (0.003) loss 1.1182 (1.1390) acc 71.8750 (71.1905) lr 1.8443e-03 eta 17:15:43 +epoch [11/50] batch [320/1000] time 1.556 (1.566) data 0.000 (0.003) loss 0.9180 (1.1418) acc 78.1250 (71.1523) lr 1.8443e-03 eta 17:15:33 +epoch [11/50] batch [325/1000] time 1.568 (1.566) data 0.000 (0.003) loss 1.1328 (1.1447) acc 65.6250 (71.1058) lr 1.8443e-03 eta 17:15:20 +epoch [11/50] batch [330/1000] time 1.561 (1.566) data 0.000 (0.003) loss 0.9741 (1.1423) acc 68.7500 (71.1269) lr 1.8443e-03 eta 17:15:10 +epoch [11/50] batch [335/1000] time 1.549 (1.566) data 0.000 (0.003) loss 1.2471 (1.1441) acc 59.3750 (71.0821) lr 1.8443e-03 eta 17:14:56 +epoch [11/50] batch [340/1000] time 1.584 (1.566) data 0.001 (0.003) loss 1.1299 (1.1462) acc 62.5000 (70.9835) lr 1.8443e-03 eta 17:14:49 +epoch [11/50] batch [345/1000] time 1.549 (1.565) data 0.001 (0.003) loss 1.1094 (1.1435) acc 68.7500 (71.0417) lr 1.8443e-03 eta 17:14:36 +epoch [11/50] batch [350/1000] time 1.555 (1.565) data 0.000 (0.003) loss 0.6611 (1.1443) acc 78.1250 (71.0000) lr 1.8443e-03 eta 17:14:20 +epoch [11/50] batch [355/1000] time 1.570 (1.565) data 0.000 (0.003) loss 0.7480 (1.1428) acc 87.5000 (71.0651) lr 1.8443e-03 eta 17:14:12 +epoch [11/50] batch [360/1000] time 1.558 (1.565) data 0.000 (0.003) loss 1.0752 (1.1441) acc 65.6250 (71.0330) lr 1.8443e-03 eta 17:14:13 +epoch [11/50] batch [365/1000] time 1.564 (1.566) data 0.000 (0.003) loss 0.8833 (1.1434) acc 68.7500 (71.0702) lr 1.8443e-03 eta 17:14:09 +epoch [11/50] batch [370/1000] time 1.554 (1.565) data 0.000 (0.003) loss 1.2900 (1.1435) acc 65.6250 (71.0389) lr 1.8443e-03 eta 17:13:57 +epoch [11/50] batch [375/1000] time 1.544 (1.565) data 0.000 (0.003) loss 1.1807 (1.1421) acc 71.8750 (71.0750) lr 1.8443e-03 eta 17:13:45 +epoch [11/50] batch [380/1000] time 1.576 (1.565) data 0.000 (0.003) loss 1.4951 (1.1436) acc 62.5000 (71.0362) lr 1.8443e-03 eta 17:13:38 +epoch [11/50] batch [385/1000] time 1.550 (1.565) data 0.000 (0.003) loss 1.0732 (1.1443) acc 78.1250 (71.0308) lr 1.8443e-03 eta 17:13:26 +epoch [11/50] batch [390/1000] time 1.532 (1.565) data 0.000 (0.003) loss 1.4844 (1.1447) acc 62.5000 (71.0176) lr 1.8443e-03 eta 17:13:10 +epoch [11/50] batch [395/1000] time 1.545 (1.565) data 0.000 (0.003) loss 1.0459 (1.1452) acc 65.6250 (70.9810) lr 1.8443e-03 eta 17:12:56 +epoch [11/50] batch [400/1000] time 1.553 (1.565) data 0.000 (0.003) loss 1.0664 (1.1451) acc 75.0000 (71.0234) lr 1.8443e-03 eta 17:12:42 +epoch [11/50] batch [405/1000] time 1.549 (1.565) data 0.000 (0.003) loss 0.4478 (1.1400) acc 90.6250 (71.1651) lr 1.8443e-03 eta 17:12:28 +epoch [11/50] batch [410/1000] time 1.534 (1.564) data 0.000 (0.003) loss 1.0312 (1.1388) acc 68.7500 (71.1890) lr 1.8443e-03 eta 17:12:11 +epoch [11/50] batch [415/1000] time 1.578 (1.564) data 0.000 (0.003) loss 0.8281 (1.1405) acc 78.1250 (71.1822) lr 1.8443e-03 eta 17:12:03 +epoch [11/50] batch [420/1000] time 1.538 (1.564) data 0.001 (0.003) loss 0.8071 (1.1413) acc 78.1250 (71.2054) lr 1.8443e-03 eta 17:11:46 +epoch [11/50] batch [425/1000] time 1.563 (1.564) data 0.000 (0.003) loss 1.1670 (1.1420) acc 71.8750 (71.2059) lr 1.8443e-03 eta 17:11:49 +epoch [11/50] batch [430/1000] time 1.554 (1.564) data 0.000 (0.003) loss 0.9155 (1.1403) acc 84.3750 (71.2209) lr 1.8443e-03 eta 17:11:33 +epoch [11/50] batch [435/1000] time 1.523 (1.564) data 0.000 (0.003) loss 1.1963 (1.1387) acc 68.7500 (71.2500) lr 1.8443e-03 eta 17:11:16 +epoch [11/50] batch [440/1000] time 1.537 (1.564) data 0.000 (0.003) loss 1.3672 (1.1395) acc 65.6250 (71.1932) lr 1.8443e-03 eta 17:11:03 +epoch [11/50] batch [445/1000] time 1.537 (1.564) data 0.000 (0.003) loss 1.4883 (1.1400) acc 65.6250 (71.2008) lr 1.8443e-03 eta 17:10:49 +epoch [11/50] batch [450/1000] time 1.530 (1.564) data 0.000 (0.003) loss 1.3545 (1.1397) acc 68.7500 (71.2500) lr 1.8443e-03 eta 17:10:38 +epoch [11/50] batch [455/1000] time 1.559 (1.563) data 0.000 (0.003) loss 0.9395 (1.1389) acc 78.1250 (71.2843) lr 1.8443e-03 eta 17:10:27 +epoch [11/50] batch [460/1000] time 1.546 (1.563) data 0.001 (0.003) loss 1.0713 (1.1394) acc 78.1250 (71.2908) lr 1.8443e-03 eta 17:10:13 +epoch [11/50] batch [465/1000] time 1.569 (1.563) data 0.000 (0.002) loss 0.8252 (1.1394) acc 75.0000 (71.2903) lr 1.8443e-03 eta 17:10:05 +epoch [11/50] batch [470/1000] time 1.573 (1.564) data 0.000 (0.002) loss 0.9824 (1.1395) acc 71.8750 (71.3298) lr 1.8443e-03 eta 17:10:08 +epoch [11/50] batch [475/1000] time 1.551 (1.564) data 0.000 (0.002) loss 1.1582 (1.1405) acc 71.8750 (71.3224) lr 1.8443e-03 eta 17:10:00 +epoch [11/50] batch [480/1000] time 1.563 (1.563) data 0.001 (0.002) loss 1.0820 (1.1420) acc 68.7500 (71.3086) lr 1.8443e-03 eta 17:09:49 +epoch [11/50] batch [485/1000] time 1.559 (1.563) data 0.000 (0.002) loss 1.2139 (1.1422) acc 68.7500 (71.3015) lr 1.8443e-03 eta 17:09:38 +epoch [11/50] batch [490/1000] time 1.563 (1.564) data 0.000 (0.002) loss 1.2510 (1.1423) acc 71.8750 (71.3520) lr 1.8443e-03 eta 17:09:34 +epoch [11/50] batch [495/1000] time 1.530 (1.563) data 0.000 (0.002) loss 1.0918 (1.1427) acc 71.8750 (71.3321) lr 1.8443e-03 eta 17:09:23 +epoch [11/50] batch [500/1000] time 1.543 (1.563) data 0.001 (0.002) loss 0.9463 (1.1450) acc 68.7500 (71.2938) lr 1.8443e-03 eta 17:09:11 +epoch [11/50] batch [505/1000] time 1.553 (1.563) data 0.000 (0.002) loss 1.2090 (1.1446) acc 75.0000 (71.2809) lr 1.8443e-03 eta 17:08:58 +epoch [11/50] batch [510/1000] time 1.576 (1.563) data 0.000 (0.002) loss 1.1797 (1.1440) acc 62.5000 (71.2561) lr 1.8443e-03 eta 17:08:58 +epoch [11/50] batch [515/1000] time 1.546 (1.563) data 0.001 (0.002) loss 1.3760 (1.1447) acc 68.7500 (71.2439) lr 1.8443e-03 eta 17:08:45 +epoch [11/50] batch [520/1000] time 1.546 (1.563) data 0.000 (0.002) loss 1.2217 (1.1442) acc 75.0000 (71.2380) lr 1.8443e-03 eta 17:08:35 +epoch [11/50] batch [525/1000] time 1.558 (1.563) data 0.000 (0.002) loss 1.2100 (1.1448) acc 71.8750 (71.2321) lr 1.8443e-03 eta 17:08:23 +epoch [11/50] batch [530/1000] time 1.561 (1.563) data 0.000 (0.002) loss 1.0166 (1.1453) acc 75.0000 (71.2323) lr 1.8443e-03 eta 17:08:13 +epoch [11/50] batch [535/1000] time 1.576 (1.563) data 0.000 (0.002) loss 1.9297 (1.1454) acc 65.6250 (71.2383) lr 1.8443e-03 eta 17:08:03 +epoch [11/50] batch [540/1000] time 1.556 (1.563) data 0.000 (0.002) loss 1.3154 (1.1468) acc 65.6250 (71.2095) lr 1.8443e-03 eta 17:07:49 +epoch [11/50] batch [545/1000] time 1.544 (1.563) data 0.000 (0.002) loss 0.9380 (1.1482) acc 78.1250 (71.1697) lr 1.8443e-03 eta 17:07:37 +epoch [11/50] batch [550/1000] time 1.532 (1.563) data 0.000 (0.002) loss 1.5020 (1.1468) acc 62.5000 (71.1932) lr 1.8443e-03 eta 17:07:22 +epoch [11/50] batch [555/1000] time 1.554 (1.562) data 0.001 (0.002) loss 0.7896 (1.1468) acc 87.5000 (71.2331) lr 1.8443e-03 eta 17:07:10 +epoch [11/50] batch [560/1000] time 1.555 (1.562) data 0.000 (0.002) loss 1.3662 (1.1470) acc 68.7500 (71.2333) lr 1.8443e-03 eta 17:07:02 +epoch [11/50] batch [565/1000] time 1.542 (1.562) data 0.000 (0.002) loss 0.5947 (1.1452) acc 84.3750 (71.2721) lr 1.8443e-03 eta 17:06:52 +epoch [11/50] batch [570/1000] time 1.552 (1.562) data 0.000 (0.002) loss 0.5273 (1.1468) acc 90.6250 (71.2281) lr 1.8443e-03 eta 17:06:43 +epoch [11/50] batch [575/1000] time 1.541 (1.563) data 0.001 (0.002) loss 1.0293 (1.1462) acc 78.1250 (71.2554) lr 1.8443e-03 eta 17:06:42 +epoch [11/50] batch [580/1000] time 1.550 (1.562) data 0.001 (0.002) loss 1.1338 (1.1486) acc 78.1250 (71.2231) lr 1.8443e-03 eta 17:06:30 +epoch [11/50] batch [585/1000] time 1.561 (1.562) data 0.001 (0.002) loss 0.9302 (1.1497) acc 71.8750 (71.1806) lr 1.8443e-03 eta 17:06:20 +epoch [11/50] batch [590/1000] time 1.561 (1.562) data 0.000 (0.002) loss 1.2959 (1.1493) acc 65.6250 (71.2076) lr 1.8443e-03 eta 17:06:11 +epoch [11/50] batch [595/1000] time 1.561 (1.562) data 0.000 (0.002) loss 0.8071 (1.1488) acc 78.1250 (71.1817) lr 1.8443e-03 eta 17:05:59 +epoch [11/50] batch [600/1000] time 1.567 (1.562) data 0.000 (0.002) loss 1.3877 (1.1488) acc 68.7500 (71.1823) lr 1.8443e-03 eta 17:05:52 +epoch [11/50] batch [605/1000] time 1.578 (1.562) data 0.000 (0.002) loss 1.1963 (1.1472) acc 65.6250 (71.2087) lr 1.8443e-03 eta 17:05:45 +epoch [11/50] batch [610/1000] time 1.564 (1.562) data 0.000 (0.002) loss 0.9204 (1.1459) acc 75.0000 (71.2346) lr 1.8443e-03 eta 17:05:41 +epoch [11/50] batch [615/1000] time 1.546 (1.562) data 0.000 (0.002) loss 1.4316 (1.1476) acc 65.6250 (71.2144) lr 1.8443e-03 eta 17:05:31 +epoch [11/50] batch [620/1000] time 1.549 (1.562) data 0.000 (0.002) loss 1.3330 (1.1499) acc 65.6250 (71.1492) lr 1.8443e-03 eta 17:05:29 +epoch [11/50] batch [625/1000] time 1.565 (1.562) data 0.000 (0.002) loss 1.0684 (1.1498) acc 71.8750 (71.1550) lr 1.8443e-03 eta 17:05:21 +epoch [11/50] batch [630/1000] time 1.559 (1.562) data 0.000 (0.002) loss 0.5044 (1.1485) acc 81.2500 (71.1607) lr 1.8443e-03 eta 17:05:14 +epoch [11/50] batch [635/1000] time 1.556 (1.562) data 0.000 (0.002) loss 1.5479 (1.1506) acc 56.2500 (71.1220) lr 1.8443e-03 eta 17:05:07 +epoch [11/50] batch [640/1000] time 1.548 (1.562) data 0.000 (0.002) loss 1.0625 (1.1499) acc 65.6250 (71.1328) lr 1.8443e-03 eta 17:04:58 +epoch [11/50] batch [645/1000] time 1.561 (1.562) data 0.000 (0.002) loss 1.2822 (1.1506) acc 65.6250 (71.1192) lr 1.8443e-03 eta 17:04:51 +epoch [11/50] batch [650/1000] time 1.579 (1.562) data 0.001 (0.002) loss 1.9092 (1.1521) acc 68.7500 (71.1298) lr 1.8443e-03 eta 17:04:40 +epoch [11/50] batch [655/1000] time 1.545 (1.562) data 0.000 (0.002) loss 1.7246 (1.1542) acc 56.2500 (71.0973) lr 1.8443e-03 eta 17:04:32 +epoch [11/50] batch [660/1000] time 1.724 (1.563) data 0.000 (0.002) loss 1.4121 (1.1556) acc 59.3750 (71.0653) lr 1.8443e-03 eta 17:04:35 +epoch [11/50] batch [665/1000] time 1.580 (1.563) data 0.000 (0.002) loss 1.1895 (1.1540) acc 71.8750 (71.0761) lr 1.8443e-03 eta 17:04:31 +epoch [11/50] batch [670/1000] time 1.591 (1.563) data 0.000 (0.002) loss 1.7578 (1.1546) acc 65.6250 (71.0821) lr 1.8443e-03 eta 17:04:28 +epoch [11/50] batch [675/1000] time 1.565 (1.563) data 0.000 (0.002) loss 1.1807 (1.1553) acc 65.6250 (71.0741) lr 1.8443e-03 eta 17:04:18 +epoch [11/50] batch [680/1000] time 1.571 (1.563) data 0.000 (0.002) loss 1.2686 (1.1560) acc 71.8750 (71.0432) lr 1.8443e-03 eta 17:04:10 +epoch [11/50] batch [685/1000] time 1.553 (1.563) data 0.000 (0.002) loss 0.8105 (1.1547) acc 81.2500 (71.0903) lr 1.8443e-03 eta 17:03:59 +epoch [11/50] batch [690/1000] time 1.549 (1.563) data 0.001 (0.002) loss 1.3408 (1.1564) acc 71.8750 (71.0507) lr 1.8443e-03 eta 17:03:52 +epoch [11/50] batch [695/1000] time 1.547 (1.563) data 0.000 (0.002) loss 1.2432 (1.1557) acc 68.7500 (71.0746) lr 1.8443e-03 eta 17:03:41 +epoch [11/50] batch [700/1000] time 1.534 (1.562) data 0.000 (0.002) loss 1.2021 (1.1561) acc 78.1250 (71.0759) lr 1.8443e-03 eta 17:03:24 +epoch [11/50] batch [705/1000] time 1.546 (1.562) data 0.000 (0.002) loss 1.6465 (1.1582) acc 65.6250 (71.0328) lr 1.8443e-03 eta 17:03:14 +epoch [11/50] batch [710/1000] time 1.571 (1.562) data 0.000 (0.002) loss 0.5337 (1.1590) acc 84.3750 (71.0299) lr 1.8443e-03 eta 17:03:08 +epoch [11/50] batch [715/1000] time 1.544 (1.562) data 0.000 (0.002) loss 1.4580 (1.1580) acc 71.8750 (71.0402) lr 1.8443e-03 eta 17:02:56 +epoch [11/50] batch [720/1000] time 1.552 (1.562) data 0.000 (0.002) loss 1.3105 (1.1575) acc 68.7500 (71.0634) lr 1.8443e-03 eta 17:02:50 +epoch [11/50] batch [725/1000] time 1.545 (1.563) data 0.000 (0.002) loss 1.0400 (1.1564) acc 81.2500 (71.1207) lr 1.8443e-03 eta 17:02:47 +epoch [11/50] batch [730/1000] time 1.560 (1.562) data 0.000 (0.002) loss 1.2256 (1.1568) acc 68.7500 (71.1130) lr 1.8443e-03 eta 17:02:36 +epoch [11/50] batch [735/1000] time 1.574 (1.562) data 0.001 (0.002) loss 1.0908 (1.1562) acc 65.6250 (71.1054) lr 1.8443e-03 eta 17:02:27 +epoch [11/50] batch [740/1000] time 1.546 (1.562) data 0.001 (0.002) loss 1.4404 (1.1561) acc 62.5000 (71.1106) lr 1.8443e-03 eta 17:02:16 +epoch [11/50] batch [745/1000] time 1.559 (1.562) data 0.000 (0.002) loss 1.1064 (1.1567) acc 68.7500 (71.0990) lr 1.8443e-03 eta 17:02:08 +epoch [11/50] batch [750/1000] time 1.562 (1.562) data 0.000 (0.002) loss 0.6587 (1.1556) acc 78.1250 (71.1333) lr 1.8443e-03 eta 17:02:02 +epoch [11/50] batch [755/1000] time 1.543 (1.562) data 0.000 (0.002) loss 0.7954 (1.1543) acc 78.1250 (71.1507) lr 1.8443e-03 eta 17:01:51 +epoch [11/50] batch [760/1000] time 1.540 (1.562) data 0.000 (0.002) loss 1.4189 (1.1558) acc 56.2500 (71.0896) lr 1.8443e-03 eta 17:01:43 +epoch [11/50] batch [765/1000] time 1.544 (1.562) data 0.000 (0.002) loss 0.7148 (1.1559) acc 78.1250 (71.0825) lr 1.8443e-03 eta 17:01:34 +epoch [11/50] batch [770/1000] time 1.545 (1.562) data 0.001 (0.002) loss 1.3350 (1.1550) acc 75.0000 (71.1080) lr 1.8443e-03 eta 17:01:33 +epoch [11/50] batch [775/1000] time 1.565 (1.562) data 0.000 (0.002) loss 1.9014 (1.1552) acc 71.8750 (71.1532) lr 1.8443e-03 eta 17:01:26 +epoch [11/50] batch [780/1000] time 1.538 (1.562) data 0.001 (0.002) loss 0.7598 (1.1561) acc 87.5000 (71.1498) lr 1.8443e-03 eta 17:01:16 +epoch [11/50] batch [785/1000] time 1.552 (1.562) data 0.001 (0.002) loss 0.8276 (1.1552) acc 75.0000 (71.1863) lr 1.8443e-03 eta 17:01:08 +epoch [11/50] batch [790/1000] time 1.550 (1.562) data 0.000 (0.002) loss 1.4932 (1.1557) acc 62.5000 (71.1748) lr 1.8443e-03 eta 17:01:01 +epoch [11/50] batch [795/1000] time 1.566 (1.562) data 0.000 (0.002) loss 1.3867 (1.1566) acc 65.6250 (71.1478) lr 1.8443e-03 eta 17:00:54 +epoch [11/50] batch [800/1000] time 1.579 (1.562) data 0.001 (0.002) loss 1.4814 (1.1578) acc 65.6250 (71.1094) lr 1.8443e-03 eta 17:00:47 +epoch [11/50] batch [805/1000] time 1.557 (1.562) data 0.001 (0.002) loss 1.2588 (1.1578) acc 59.3750 (71.0986) lr 1.8443e-03 eta 17:00:39 +epoch [11/50] batch [810/1000] time 1.565 (1.563) data 0.001 (0.002) loss 1.1846 (1.1584) acc 75.0000 (71.0880) lr 1.8443e-03 eta 17:00:34 +epoch [11/50] batch [815/1000] time 1.621 (1.563) data 0.000 (0.002) loss 0.9966 (1.1565) acc 75.0000 (71.1350) lr 1.8443e-03 eta 17:00:38 +epoch [11/50] batch [820/1000] time 1.557 (1.563) data 0.000 (0.002) loss 1.0781 (1.1569) acc 78.1250 (71.1395) lr 1.8443e-03 eta 17:00:29 +epoch [11/50] batch [825/1000] time 1.552 (1.563) data 0.001 (0.002) loss 0.9180 (1.1575) acc 71.8750 (71.1061) lr 1.8443e-03 eta 17:00:22 +epoch [11/50] batch [830/1000] time 1.556 (1.563) data 0.000 (0.002) loss 0.7998 (1.1579) acc 81.2500 (71.0919) lr 1.8443e-03 eta 17:00:15 +epoch [11/50] batch [835/1000] time 1.543 (1.563) data 0.000 (0.002) loss 1.0713 (1.1579) acc 75.0000 (71.0928) lr 1.8443e-03 eta 17:00:05 +epoch [11/50] batch [840/1000] time 1.558 (1.563) data 0.001 (0.002) loss 0.8892 (1.1580) acc 78.1250 (71.0789) lr 1.8443e-03 eta 16:59:55 +epoch [11/50] batch [845/1000] time 1.536 (1.563) data 0.001 (0.002) loss 0.9272 (1.1575) acc 75.0000 (71.0799) lr 1.8443e-03 eta 16:59:44 +epoch [11/50] batch [850/1000] time 1.585 (1.563) data 0.001 (0.002) loss 1.3086 (1.1569) acc 71.8750 (71.0919) lr 1.8443e-03 eta 16:59:36 +epoch [11/50] batch [855/1000] time 1.565 (1.563) data 0.001 (0.002) loss 1.7686 (1.1572) acc 68.7500 (71.1001) lr 1.8443e-03 eta 16:59:30 +epoch [11/50] batch [860/1000] time 1.559 (1.563) data 0.000 (0.002) loss 1.0176 (1.1573) acc 71.8750 (71.0901) lr 1.8443e-03 eta 16:59:22 +epoch [11/50] batch [865/1000] time 1.585 (1.563) data 0.000 (0.002) loss 0.7393 (1.1565) acc 81.2500 (71.1091) lr 1.8443e-03 eta 16:59:15 +epoch [11/50] batch [870/1000] time 1.573 (1.563) data 0.001 (0.002) loss 0.7197 (1.1563) acc 78.1250 (71.1315) lr 1.8443e-03 eta 16:59:10 +epoch [11/50] batch [875/1000] time 1.564 (1.563) data 0.001 (0.002) loss 1.7656 (1.1581) acc 62.5000 (71.1179) lr 1.8443e-03 eta 16:59:10 +epoch [11/50] batch [880/1000] time 1.580 (1.563) data 0.001 (0.002) loss 0.8604 (1.1579) acc 81.2500 (71.1399) lr 1.8443e-03 eta 16:59:04 +epoch [11/50] batch [885/1000] time 1.575 (1.563) data 0.000 (0.002) loss 0.9902 (1.1573) acc 81.2500 (71.1476) lr 1.8443e-03 eta 16:58:55 +epoch [11/50] batch [890/1000] time 1.555 (1.563) data 0.000 (0.002) loss 1.5859 (1.1578) acc 65.6250 (71.1376) lr 1.8443e-03 eta 16:58:47 +epoch [11/50] batch [895/1000] time 1.577 (1.563) data 0.000 (0.002) loss 1.2119 (1.1582) acc 65.6250 (71.1453) lr 1.8443e-03 eta 16:58:39 +epoch [11/50] batch [900/1000] time 1.563 (1.563) data 0.000 (0.001) loss 1.3984 (1.1587) acc 62.5000 (71.1076) lr 1.8443e-03 eta 16:58:30 +epoch [11/50] batch [905/1000] time 1.554 (1.563) data 0.000 (0.001) loss 1.0488 (1.1573) acc 71.8750 (71.1291) lr 1.8443e-03 eta 16:58:23 +epoch [11/50] batch [910/1000] time 1.552 (1.563) data 0.000 (0.001) loss 0.9355 (1.1567) acc 71.8750 (71.1435) lr 1.8443e-03 eta 16:58:14 +epoch [11/50] batch [915/1000] time 1.575 (1.563) data 0.000 (0.001) loss 0.7134 (1.1553) acc 84.3750 (71.1783) lr 1.8443e-03 eta 16:58:07 +epoch [11/50] batch [920/1000] time 1.536 (1.563) data 0.001 (0.001) loss 1.1416 (1.1571) acc 75.0000 (71.1583) lr 1.8443e-03 eta 16:58:03 +epoch [11/50] batch [925/1000] time 1.570 (1.563) data 0.000 (0.001) loss 1.0771 (1.1567) acc 75.0000 (71.1622) lr 1.8443e-03 eta 16:57:56 +epoch [11/50] batch [930/1000] time 1.554 (1.563) data 0.001 (0.001) loss 1.1104 (1.1554) acc 62.5000 (71.1660) lr 1.8443e-03 eta 16:57:48 +epoch [11/50] batch [935/1000] time 1.556 (1.563) data 0.000 (0.001) loss 1.3545 (1.1551) acc 68.7500 (71.1932) lr 1.8443e-03 eta 16:57:41 +epoch [11/50] batch [940/1000] time 1.563 (1.563) data 0.000 (0.001) loss 1.4580 (1.1540) acc 59.3750 (71.2134) lr 1.8443e-03 eta 16:57:32 +epoch [11/50] batch [945/1000] time 1.543 (1.563) data 0.000 (0.001) loss 1.2607 (1.1543) acc 81.2500 (71.2169) lr 1.8443e-03 eta 16:57:25 +epoch [11/50] batch [950/1000] time 1.562 (1.563) data 0.000 (0.001) loss 0.5688 (1.1542) acc 81.2500 (71.2204) lr 1.8443e-03 eta 16:57:17 +epoch [11/50] batch [955/1000] time 1.570 (1.563) data 0.000 (0.001) loss 1.1064 (1.1543) acc 68.7500 (71.2336) lr 1.8443e-03 eta 16:57:08 +epoch [11/50] batch [960/1000] time 1.564 (1.563) data 0.000 (0.001) loss 1.1943 (1.1544) acc 71.8750 (71.2305) lr 1.8443e-03 eta 16:56:57 +epoch [11/50] batch [965/1000] time 1.575 (1.563) data 0.000 (0.001) loss 0.9111 (1.1535) acc 78.1250 (71.2662) lr 1.8443e-03 eta 16:56:58 +epoch [11/50] batch [970/1000] time 1.570 (1.563) data 0.000 (0.001) loss 1.4365 (1.1542) acc 65.6250 (71.2371) lr 1.8443e-03 eta 16:56:50 +epoch [11/50] batch [975/1000] time 1.555 (1.563) data 0.000 (0.001) loss 0.8789 (1.1553) acc 75.0000 (71.2276) lr 1.8443e-03 eta 16:56:38 +epoch [11/50] batch [980/1000] time 1.554 (1.563) data 0.000 (0.001) loss 1.4453 (1.1548) acc 62.5000 (71.2245) lr 1.8443e-03 eta 16:56:28 +epoch [11/50] batch [985/1000] time 1.552 (1.563) data 0.001 (0.001) loss 1.2314 (1.1554) acc 81.2500 (71.2341) lr 1.8443e-03 eta 16:56:18 +epoch [11/50] batch [990/1000] time 1.546 (1.563) data 0.000 (0.001) loss 1.4434 (1.1560) acc 62.5000 (71.2247) lr 1.8443e-03 eta 16:56:07 +epoch [11/50] batch [995/1000] time 1.569 (1.563) data 0.000 (0.001) loss 1.3125 (1.1558) acc 65.6250 (71.2217) lr 1.8443e-03 eta 16:55:57 +epoch [11/50] batch [1000/1000] time 1.565 (1.563) data 0.000 (0.001) loss 1.6523 (1.1561) acc 65.6250 (71.2281) lr 1.8090e-03 eta 16:55:49 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,078 +* accuracy: 78.2% +* error: 21.8% +* macro_f1: 77.7% +epoch [12/50] batch [5/1000] time 1.542 (1.665) data 0.001 (0.170) loss 1.5059 (1.3250) acc 71.8750 (70.0000) lr 1.8090e-03 eta 18:02:23 +epoch [12/50] batch [10/1000] time 1.552 (1.609) data 0.001 (0.085) loss 1.1797 (1.1437) acc 75.0000 (74.3750) lr 1.8090e-03 eta 17:25:23 +epoch [12/50] batch [15/1000] time 1.539 (1.593) data 0.000 (0.057) loss 1.5498 (1.1576) acc 56.2500 (72.2917) lr 1.8090e-03 eta 17:14:48 +epoch [12/50] batch [20/1000] time 1.575 (1.586) data 0.000 (0.043) loss 0.6538 (1.0891) acc 81.2500 (73.1250) lr 1.8090e-03 eta 17:10:08 +epoch [12/50] batch [25/1000] time 1.578 (1.584) data 0.000 (0.034) loss 1.4482 (1.0965) acc 62.5000 (72.6250) lr 1.8090e-03 eta 17:08:46 +epoch [12/50] batch [30/1000] time 1.530 (1.578) data 0.001 (0.029) loss 0.9619 (1.0919) acc 78.1250 (72.7083) lr 1.8090e-03 eta 17:05:07 +epoch [12/50] batch [35/1000] time 1.563 (1.584) data 0.000 (0.025) loss 0.7812 (1.1016) acc 78.1250 (72.7679) lr 1.8090e-03 eta 17:08:36 +epoch [12/50] batch [40/1000] time 1.589 (1.583) data 0.001 (0.022) loss 1.7988 (1.1424) acc 59.3750 (72.3438) lr 1.8090e-03 eta 17:07:38 +epoch [12/50] batch [45/1000] time 1.564 (1.581) data 0.000 (0.019) loss 1.5303 (1.1528) acc 43.7500 (71.5278) lr 1.8090e-03 eta 17:06:18 +epoch [12/50] batch [50/1000] time 1.534 (1.578) data 0.000 (0.017) loss 0.9399 (1.1544) acc 71.8750 (71.3125) lr 1.8090e-03 eta 17:04:26 +epoch [12/50] batch [55/1000] time 1.571 (1.576) data 0.000 (0.016) loss 0.9194 (1.1574) acc 75.0000 (71.4773) lr 1.8090e-03 eta 17:03:12 +epoch [12/50] batch [60/1000] time 1.581 (1.575) data 0.000 (0.015) loss 1.0166 (1.1518) acc 78.1250 (71.8750) lr 1.8090e-03 eta 17:02:27 +epoch [12/50] batch [65/1000] time 1.558 (1.575) data 0.000 (0.013) loss 1.5205 (1.1652) acc 59.3750 (71.6827) lr 1.8090e-03 eta 17:02:03 +epoch [12/50] batch [70/1000] time 1.587 (1.575) data 0.000 (0.013) loss 1.3105 (1.1688) acc 71.8750 (71.8304) lr 1.8090e-03 eta 17:01:36 +epoch [12/50] batch [75/1000] time 1.569 (1.574) data 0.001 (0.012) loss 1.5137 (1.1668) acc 71.8750 (71.9167) lr 1.8090e-03 eta 17:01:02 +epoch [12/50] batch [80/1000] time 1.587 (1.575) data 0.000 (0.011) loss 0.5469 (1.1545) acc 90.6250 (72.1484) lr 1.8090e-03 eta 17:01:52 +epoch [12/50] batch [85/1000] time 1.540 (1.575) data 0.000 (0.010) loss 0.8516 (1.1487) acc 78.1250 (72.2426) lr 1.8090e-03 eta 17:01:26 +epoch [12/50] batch [90/1000] time 1.551 (1.574) data 0.000 (0.010) loss 1.2402 (1.1471) acc 62.5000 (72.0486) lr 1.8090e-03 eta 17:00:56 +epoch [12/50] batch [95/1000] time 1.545 (1.573) data 0.000 (0.009) loss 0.9653 (1.1459) acc 62.5000 (71.7763) lr 1.8090e-03 eta 17:00:12 +epoch [12/50] batch [100/1000] time 1.555 (1.573) data 0.001 (0.009) loss 1.4932 (1.1508) acc 59.3750 (71.5000) lr 1.8090e-03 eta 16:59:47 +epoch [12/50] batch [105/1000] time 1.560 (1.572) data 0.000 (0.009) loss 1.2432 (1.1564) acc 62.5000 (71.1310) lr 1.8090e-03 eta 16:59:13 +epoch [12/50] batch [110/1000] time 1.557 (1.572) data 0.000 (0.008) loss 1.1885 (1.1512) acc 71.8750 (71.2784) lr 1.8090e-03 eta 16:58:37 +epoch [12/50] batch [115/1000] time 1.574 (1.571) data 0.000 (0.008) loss 1.4746 (1.1525) acc 75.0000 (71.4402) lr 1.8090e-03 eta 16:58:21 +epoch [12/50] batch [120/1000] time 1.567 (1.571) data 0.001 (0.008) loss 1.0635 (1.1481) acc 71.8750 (71.5365) lr 1.8090e-03 eta 16:58:02 +epoch [12/50] batch [125/1000] time 1.539 (1.570) data 0.000 (0.007) loss 0.7842 (1.1488) acc 78.1250 (71.5500) lr 1.8090e-03 eta 16:57:23 +epoch [12/50] batch [130/1000] time 1.543 (1.569) data 0.000 (0.007) loss 0.9438 (1.1402) acc 71.8750 (71.6346) lr 1.8090e-03 eta 16:56:43 +epoch [12/50] batch [135/1000] time 1.585 (1.569) data 0.000 (0.007) loss 1.0654 (1.1395) acc 68.7500 (71.6435) lr 1.8090e-03 eta 16:56:26 +epoch [12/50] batch [140/1000] time 1.576 (1.570) data 0.000 (0.007) loss 1.1523 (1.1367) acc 65.6250 (71.6964) lr 1.8090e-03 eta 16:56:54 +epoch [12/50] batch [145/1000] time 1.561 (1.570) data 0.001 (0.006) loss 1.0107 (1.1339) acc 71.8750 (71.6595) lr 1.8090e-03 eta 16:56:40 +epoch [12/50] batch [150/1000] time 1.590 (1.571) data 0.000 (0.006) loss 1.5938 (1.1371) acc 62.5000 (71.6667) lr 1.8090e-03 eta 16:56:56 +epoch [12/50] batch [155/1000] time 1.552 (1.570) data 0.001 (0.006) loss 0.8013 (1.1362) acc 75.0000 (71.6935) lr 1.8090e-03 eta 16:56:22 +epoch [12/50] batch [160/1000] time 1.548 (1.569) data 0.000 (0.006) loss 0.8369 (1.1364) acc 84.3750 (71.6406) lr 1.8090e-03 eta 16:55:51 +epoch [12/50] batch [165/1000] time 1.543 (1.569) data 0.000 (0.006) loss 0.9067 (1.1392) acc 84.3750 (71.5720) lr 1.8090e-03 eta 16:55:33 +epoch [12/50] batch [170/1000] time 1.571 (1.569) data 0.001 (0.005) loss 1.2910 (1.1443) acc 65.6250 (71.3971) lr 1.8090e-03 eta 16:55:32 +epoch [12/50] batch [175/1000] time 1.569 (1.569) data 0.001 (0.005) loss 1.2520 (1.1425) acc 62.5000 (71.3929) lr 1.8090e-03 eta 16:55:12 +epoch [12/50] batch [180/1000] time 1.573 (1.568) data 0.000 (0.005) loss 0.6978 (1.1481) acc 75.0000 (71.3368) lr 1.8090e-03 eta 16:54:44 +epoch [12/50] batch [185/1000] time 1.558 (1.569) data 0.001 (0.005) loss 1.0234 (1.1439) acc 78.1250 (71.3514) lr 1.8090e-03 eta 16:54:55 +epoch [12/50] batch [190/1000] time 1.537 (1.568) data 0.001 (0.005) loss 1.1543 (1.1436) acc 62.5000 (71.3487) lr 1.8090e-03 eta 16:54:25 +epoch [12/50] batch [195/1000] time 1.572 (1.568) data 0.000 (0.005) loss 1.1250 (1.1459) acc 78.1250 (71.3462) lr 1.8090e-03 eta 16:54:01 +epoch [12/50] batch [200/1000] time 1.542 (1.568) data 0.000 (0.005) loss 1.0137 (1.1495) acc 75.0000 (71.1719) lr 1.8090e-03 eta 16:53:41 +epoch [12/50] batch [205/1000] time 1.550 (1.567) data 0.000 (0.005) loss 0.2617 (1.1472) acc 96.8750 (71.2195) lr 1.8090e-03 eta 16:53:17 +epoch [12/50] batch [210/1000] time 1.548 (1.567) data 0.000 (0.005) loss 1.1504 (1.1515) acc 62.5000 (71.1161) lr 1.8090e-03 eta 16:52:59 +epoch [12/50] batch [215/1000] time 1.551 (1.567) data 0.000 (0.004) loss 1.2520 (1.1526) acc 78.1250 (71.1773) lr 1.8090e-03 eta 16:52:37 +epoch [12/50] batch [220/1000] time 1.552 (1.566) data 0.001 (0.004) loss 1.5098 (1.1566) acc 68.7500 (71.1222) lr 1.8090e-03 eta 16:52:21 +epoch [12/50] batch [225/1000] time 1.550 (1.566) data 0.001 (0.004) loss 0.9849 (1.1508) acc 68.7500 (71.1528) lr 1.8090e-03 eta 16:52:04 +epoch [12/50] batch [230/1000] time 1.542 (1.566) data 0.000 (0.004) loss 1.1504 (1.1480) acc 65.6250 (71.1957) lr 1.8090e-03 eta 16:52:11 +epoch [12/50] batch [235/1000] time 1.591 (1.566) data 0.000 (0.004) loss 1.3936 (1.1475) acc 68.7500 (71.1702) lr 1.8090e-03 eta 16:52:03 +epoch [12/50] batch [240/1000] time 1.552 (1.566) data 0.000 (0.004) loss 1.1904 (1.1480) acc 75.0000 (71.1719) lr 1.8090e-03 eta 16:51:49 +epoch [12/50] batch [245/1000] time 1.550 (1.566) data 0.000 (0.004) loss 1.4531 (1.1521) acc 68.7500 (71.0714) lr 1.8090e-03 eta 16:51:39 +epoch [12/50] batch [250/1000] time 1.551 (1.566) data 0.000 (0.004) loss 0.9590 (1.1484) acc 71.8750 (71.1625) lr 1.8090e-03 eta 16:51:28 +epoch [12/50] batch [255/1000] time 1.539 (1.566) data 0.000 (0.004) loss 0.6812 (1.1487) acc 81.2500 (71.1029) lr 1.8090e-03 eta 16:51:13 +epoch [12/50] batch [260/1000] time 1.544 (1.566) data 0.000 (0.004) loss 1.1113 (1.1490) acc 62.5000 (70.9976) lr 1.8090e-03 eta 16:50:55 +epoch [12/50] batch [265/1000] time 1.540 (1.566) data 0.001 (0.004) loss 1.3311 (1.1486) acc 68.7500 (71.0142) lr 1.8090e-03 eta 16:50:40 +epoch [12/50] batch [270/1000] time 1.554 (1.565) data 0.000 (0.004) loss 1.3174 (1.1508) acc 62.5000 (70.9838) lr 1.8090e-03 eta 16:50:19 +epoch [12/50] batch [275/1000] time 1.579 (1.565) data 0.000 (0.004) loss 1.3594 (1.1517) acc 59.3750 (70.8977) lr 1.8090e-03 eta 16:50:12 +epoch [12/50] batch [280/1000] time 1.560 (1.565) data 0.000 (0.003) loss 1.7607 (1.1511) acc 59.3750 (70.9040) lr 1.8090e-03 eta 16:50:04 +epoch [12/50] batch [285/1000] time 1.554 (1.565) data 0.001 (0.003) loss 1.5332 (1.1499) acc 65.6250 (70.9649) lr 1.8090e-03 eta 16:49:56 +epoch [12/50] batch [290/1000] time 1.688 (1.566) data 0.000 (0.003) loss 1.0186 (1.1548) acc 81.2500 (70.8190) lr 1.8090e-03 eta 16:50:01 +epoch [12/50] batch [295/1000] time 1.578 (1.566) data 0.000 (0.003) loss 1.0752 (1.1569) acc 75.0000 (70.8263) lr 1.8090e-03 eta 16:49:59 +epoch [12/50] batch [300/1000] time 1.549 (1.566) data 0.001 (0.003) loss 1.3525 (1.1580) acc 65.6250 (70.8438) lr 1.8090e-03 eta 16:49:49 +epoch [12/50] batch [305/1000] time 1.565 (1.565) data 0.001 (0.003) loss 1.4463 (1.1588) acc 65.6250 (70.8504) lr 1.8090e-03 eta 16:49:31 +epoch [12/50] batch [310/1000] time 1.577 (1.565) data 0.000 (0.003) loss 0.7983 (1.1559) acc 71.8750 (70.8468) lr 1.8090e-03 eta 16:49:26 +epoch [12/50] batch [315/1000] time 1.544 (1.565) data 0.000 (0.003) loss 1.7256 (1.1548) acc 56.2500 (70.8036) lr 1.8090e-03 eta 16:49:15 +epoch [12/50] batch [320/1000] time 1.549 (1.565) data 0.000 (0.003) loss 1.1191 (1.1585) acc 71.8750 (70.7227) lr 1.8090e-03 eta 16:49:10 +epoch [12/50] batch [325/1000] time 1.540 (1.565) data 0.001 (0.003) loss 1.4932 (1.1576) acc 53.1250 (70.7308) lr 1.8090e-03 eta 16:48:48 +epoch [12/50] batch [330/1000] time 1.545 (1.565) data 0.001 (0.003) loss 1.1113 (1.1611) acc 71.8750 (70.6818) lr 1.8090e-03 eta 16:48:33 +epoch [12/50] batch [335/1000] time 1.739 (1.565) data 0.000 (0.003) loss 0.9971 (1.1603) acc 71.8750 (70.6996) lr 1.8090e-03 eta 16:48:40 +epoch [12/50] batch [340/1000] time 1.572 (1.565) data 0.000 (0.003) loss 1.2354 (1.1614) acc 75.0000 (70.7261) lr 1.8090e-03 eta 16:48:35 +epoch [12/50] batch [345/1000] time 1.606 (1.565) data 0.001 (0.003) loss 0.5591 (1.1613) acc 84.3750 (70.7065) lr 1.8090e-03 eta 16:48:32 +epoch [12/50] batch [350/1000] time 1.549 (1.565) data 0.001 (0.003) loss 0.8813 (1.1591) acc 75.0000 (70.7411) lr 1.8090e-03 eta 16:48:19 +epoch [12/50] batch [355/1000] time 1.519 (1.565) data 0.001 (0.003) loss 0.9062 (1.1562) acc 75.0000 (70.7923) lr 1.8090e-03 eta 16:48:04 +epoch [12/50] batch [360/1000] time 1.553 (1.565) data 0.000 (0.003) loss 1.9209 (1.1588) acc 62.5000 (70.8073) lr 1.8090e-03 eta 16:47:51 +epoch [12/50] batch [365/1000] time 1.547 (1.565) data 0.000 (0.003) loss 1.2285 (1.1597) acc 71.8750 (70.7705) lr 1.8090e-03 eta 16:47:39 +epoch [12/50] batch [370/1000] time 1.551 (1.565) data 0.000 (0.003) loss 0.9058 (1.1602) acc 71.8750 (70.8193) lr 1.8090e-03 eta 16:47:27 +epoch [12/50] batch [375/1000] time 1.544 (1.565) data 0.000 (0.003) loss 0.8306 (1.1595) acc 87.5000 (70.8250) lr 1.8090e-03 eta 16:47:13 +epoch [12/50] batch [380/1000] time 1.554 (1.565) data 0.000 (0.003) loss 0.5776 (1.1576) acc 84.3750 (70.8882) lr 1.8090e-03 eta 16:47:14 +epoch [12/50] batch [385/1000] time 1.562 (1.565) data 0.000 (0.003) loss 0.7349 (1.1552) acc 78.1250 (70.9497) lr 1.8090e-03 eta 16:47:01 +epoch [12/50] batch [390/1000] time 1.544 (1.565) data 0.001 (0.003) loss 0.5137 (1.1548) acc 87.5000 (70.9856) lr 1.8090e-03 eta 16:46:50 +epoch [12/50] batch [395/1000] time 1.551 (1.565) data 0.001 (0.003) loss 1.0947 (1.1527) acc 75.0000 (71.0364) lr 1.8090e-03 eta 16:46:38 +epoch [12/50] batch [400/1000] time 1.569 (1.564) data 0.000 (0.003) loss 1.1553 (1.1524) acc 68.7500 (71.0234) lr 1.8090e-03 eta 16:46:24 +epoch [12/50] batch [405/1000] time 1.544 (1.564) data 0.001 (0.003) loss 1.0283 (1.1530) acc 68.7500 (71.0185) lr 1.8090e-03 eta 16:46:11 +epoch [12/50] batch [410/1000] time 1.543 (1.564) data 0.001 (0.003) loss 1.1143 (1.1512) acc 75.0000 (71.0976) lr 1.8090e-03 eta 16:45:56 +epoch [12/50] batch [415/1000] time 1.558 (1.564) data 0.000 (0.003) loss 1.2500 (1.1486) acc 75.0000 (71.1672) lr 1.8090e-03 eta 16:45:49 +epoch [12/50] batch [420/1000] time 1.576 (1.564) data 0.000 (0.002) loss 0.8887 (1.1464) acc 81.2500 (71.1979) lr 1.8090e-03 eta 16:45:42 +epoch [12/50] batch [425/1000] time 1.557 (1.564) data 0.000 (0.002) loss 1.3691 (1.1479) acc 78.1250 (71.1985) lr 1.8090e-03 eta 16:45:32 +epoch [12/50] batch [430/1000] time 1.568 (1.564) data 0.001 (0.002) loss 0.8511 (1.1466) acc 78.1250 (71.2209) lr 1.8090e-03 eta 16:45:22 +epoch [12/50] batch [435/1000] time 1.566 (1.564) data 0.001 (0.002) loss 0.8525 (1.1461) acc 81.2500 (71.2572) lr 1.8090e-03 eta 16:45:16 +epoch [12/50] batch [440/1000] time 1.547 (1.564) data 0.000 (0.002) loss 0.9858 (1.1472) acc 71.8750 (71.2216) lr 1.8090e-03 eta 16:45:02 +epoch [12/50] batch [445/1000] time 1.590 (1.564) data 0.000 (0.002) loss 0.9336 (1.1442) acc 71.8750 (71.2430) lr 1.8090e-03 eta 16:45:14 +epoch [12/50] batch [450/1000] time 1.548 (1.564) data 0.000 (0.002) loss 1.6885 (1.1455) acc 56.2500 (71.1875) lr 1.8090e-03 eta 16:45:06 +epoch [12/50] batch [455/1000] time 1.564 (1.564) data 0.000 (0.002) loss 1.0957 (1.1459) acc 75.0000 (71.1951) lr 1.8090e-03 eta 16:44:56 +epoch [12/50] batch [460/1000] time 1.549 (1.564) data 0.000 (0.002) loss 0.9912 (1.1451) acc 68.7500 (71.2228) lr 1.8090e-03 eta 16:44:45 +epoch [12/50] batch [465/1000] time 1.544 (1.564) data 0.001 (0.002) loss 1.1084 (1.1436) acc 65.6250 (71.2164) lr 1.8090e-03 eta 16:44:34 +epoch [12/50] batch [470/1000] time 1.553 (1.564) data 0.001 (0.002) loss 1.8877 (1.1455) acc 65.6250 (71.2035) lr 1.8090e-03 eta 16:44:26 +epoch [12/50] batch [475/1000] time 1.556 (1.564) data 0.001 (0.002) loss 1.2949 (1.1453) acc 71.8750 (71.2434) lr 1.8090e-03 eta 16:44:18 +epoch [12/50] batch [480/1000] time 1.563 (1.564) data 0.000 (0.002) loss 1.8252 (1.1476) acc 56.2500 (71.1784) lr 1.8090e-03 eta 16:44:08 +epoch [12/50] batch [485/1000] time 1.566 (1.564) data 0.000 (0.002) loss 1.1367 (1.1461) acc 68.7500 (71.1791) lr 1.8090e-03 eta 16:44:01 +epoch [12/50] batch [490/1000] time 1.556 (1.564) data 0.000 (0.002) loss 0.9136 (1.1469) acc 84.3750 (71.1990) lr 1.8090e-03 eta 16:44:01 +epoch [12/50] batch [495/1000] time 1.548 (1.564) data 0.000 (0.002) loss 0.8350 (1.1446) acc 75.0000 (71.2247) lr 1.8090e-03 eta 16:43:53 +epoch [12/50] batch [500/1000] time 1.530 (1.564) data 0.001 (0.002) loss 0.8667 (1.1448) acc 81.2500 (71.2438) lr 1.8090e-03 eta 16:43:39 +epoch [12/50] batch [505/1000] time 1.551 (1.564) data 0.001 (0.002) loss 1.0898 (1.1468) acc 75.0000 (71.2067) lr 1.8090e-03 eta 16:43:29 +epoch [12/50] batch [510/1000] time 1.560 (1.564) data 0.000 (0.002) loss 1.1396 (1.1447) acc 81.2500 (71.2377) lr 1.8090e-03 eta 16:43:16 +epoch [12/50] batch [515/1000] time 1.563 (1.564) data 0.000 (0.002) loss 1.1133 (1.1433) acc 56.2500 (71.2015) lr 1.8090e-03 eta 16:43:07 +epoch [12/50] batch [520/1000] time 1.566 (1.564) data 0.000 (0.002) loss 1.6436 (1.1431) acc 62.5000 (71.1719) lr 1.8090e-03 eta 16:42:58 +epoch [12/50] batch [525/1000] time 1.559 (1.564) data 0.001 (0.002) loss 0.6978 (1.1431) acc 84.3750 (71.1905) lr 1.8090e-03 eta 16:42:49 +epoch [12/50] batch [530/1000] time 1.581 (1.564) data 0.001 (0.002) loss 1.0342 (1.1437) acc 78.1250 (71.1675) lr 1.8090e-03 eta 16:42:52 +epoch [12/50] batch [535/1000] time 1.571 (1.564) data 0.001 (0.002) loss 1.5234 (1.1440) acc 62.5000 (71.1799) lr 1.8090e-03 eta 16:42:47 +epoch [12/50] batch [540/1000] time 1.522 (1.564) data 0.000 (0.002) loss 1.3838 (1.1429) acc 62.5000 (71.1921) lr 1.8090e-03 eta 16:42:35 +epoch [12/50] batch [545/1000] time 1.558 (1.564) data 0.000 (0.002) loss 1.0791 (1.1417) acc 75.0000 (71.2156) lr 1.8090e-03 eta 16:42:25 +epoch [12/50] batch [550/1000] time 1.539 (1.564) data 0.000 (0.002) loss 1.2803 (1.1408) acc 65.6250 (71.2102) lr 1.8090e-03 eta 16:42:14 +epoch [12/50] batch [555/1000] time 1.565 (1.564) data 0.000 (0.002) loss 0.9946 (1.1412) acc 81.2500 (71.2162) lr 1.8090e-03 eta 16:42:03 +epoch [12/50] batch [560/1000] time 1.550 (1.564) data 0.000 (0.002) loss 0.9092 (1.1397) acc 78.1250 (71.2444) lr 1.8090e-03 eta 16:41:54 +epoch [12/50] batch [565/1000] time 1.554 (1.564) data 0.000 (0.002) loss 1.6396 (1.1417) acc 68.7500 (71.2500) lr 1.8090e-03 eta 16:41:45 +epoch [12/50] batch [570/1000] time 1.563 (1.564) data 0.000 (0.002) loss 1.0859 (1.1413) acc 75.0000 (71.2664) lr 1.8090e-03 eta 16:41:35 +epoch [12/50] batch [575/1000] time 1.594 (1.564) data 0.000 (0.002) loss 1.0195 (1.1415) acc 78.1250 (71.3043) lr 1.8090e-03 eta 16:41:27 +epoch [12/50] batch [580/1000] time 1.552 (1.564) data 0.000 (0.002) loss 0.7686 (1.1430) acc 78.1250 (71.2931) lr 1.8090e-03 eta 16:41:15 +epoch [12/50] batch [585/1000] time 1.556 (1.564) data 0.000 (0.002) loss 1.5439 (1.1445) acc 56.2500 (71.2607) lr 1.8090e-03 eta 16:41:06 +epoch [12/50] batch [590/1000] time 1.557 (1.564) data 0.000 (0.002) loss 1.2646 (1.1459) acc 68.7500 (71.2394) lr 1.8090e-03 eta 16:40:57 +epoch [12/50] batch [595/1000] time 1.567 (1.564) data 0.001 (0.002) loss 0.9097 (1.1453) acc 78.1250 (71.2395) lr 1.8090e-03 eta 16:41:03 +epoch [12/50] batch [600/1000] time 1.564 (1.564) data 0.001 (0.002) loss 0.7920 (1.1479) acc 78.1250 (71.1667) lr 1.8090e-03 eta 16:40:56 +epoch [12/50] batch [605/1000] time 1.551 (1.564) data 0.000 (0.002) loss 0.6104 (1.1485) acc 84.3750 (71.1829) lr 1.8090e-03 eta 16:40:51 +epoch [12/50] batch [610/1000] time 1.557 (1.564) data 0.000 (0.002) loss 1.0576 (1.1479) acc 71.8750 (71.1783) lr 1.8090e-03 eta 16:40:43 +epoch [12/50] batch [615/1000] time 1.579 (1.564) data 0.000 (0.002) loss 0.9146 (1.1472) acc 68.7500 (71.1839) lr 1.8090e-03 eta 16:40:34 +epoch [12/50] batch [620/1000] time 1.554 (1.564) data 0.000 (0.002) loss 1.6504 (1.1475) acc 62.5000 (71.1845) lr 1.8090e-03 eta 16:40:26 +epoch [12/50] batch [625/1000] time 1.541 (1.564) data 0.001 (0.002) loss 1.0654 (1.1469) acc 75.0000 (71.2000) lr 1.8090e-03 eta 16:40:14 +epoch [12/50] batch [630/1000] time 1.562 (1.564) data 0.001 (0.002) loss 0.8667 (1.1471) acc 75.0000 (71.2153) lr 1.8090e-03 eta 16:40:01 +epoch [12/50] batch [635/1000] time 1.572 (1.564) data 0.000 (0.002) loss 1.3311 (1.1465) acc 56.2500 (71.2057) lr 1.8090e-03 eta 16:39:48 +epoch [12/50] batch [640/1000] time 1.561 (1.564) data 0.000 (0.002) loss 0.9448 (1.1461) acc 78.1250 (71.2109) lr 1.8090e-03 eta 16:39:50 +epoch [12/50] batch [645/1000] time 1.557 (1.564) data 0.000 (0.002) loss 1.0342 (1.1448) acc 71.8750 (71.2209) lr 1.8090e-03 eta 16:39:42 +epoch [12/50] batch [650/1000] time 1.557 (1.564) data 0.000 (0.002) loss 1.9258 (1.1441) acc 62.5000 (71.2404) lr 1.8090e-03 eta 16:39:32 +epoch [12/50] batch [655/1000] time 1.551 (1.564) data 0.000 (0.002) loss 1.1650 (1.1437) acc 71.8750 (71.2643) lr 1.8090e-03 eta 16:39:26 +epoch [12/50] batch [660/1000] time 1.567 (1.564) data 0.000 (0.002) loss 1.3672 (1.1427) acc 65.6250 (71.2879) lr 1.8090e-03 eta 16:39:16 +epoch [12/50] batch [665/1000] time 1.553 (1.564) data 0.001 (0.002) loss 1.1484 (1.1427) acc 65.6250 (71.2688) lr 1.8090e-03 eta 16:39:06 +epoch [12/50] batch [670/1000] time 1.569 (1.564) data 0.001 (0.002) loss 0.6738 (1.1455) acc 81.2500 (71.2080) lr 1.8090e-03 eta 16:39:00 +epoch [12/50] batch [675/1000] time 1.552 (1.564) data 0.000 (0.002) loss 0.7783 (1.1469) acc 75.0000 (71.1620) lr 1.8090e-03 eta 16:38:47 +epoch [12/50] batch [680/1000] time 1.690 (1.564) data 0.000 (0.002) loss 0.9531 (1.1484) acc 81.2500 (71.1535) lr 1.8090e-03 eta 16:38:46 +epoch [12/50] batch [685/1000] time 1.547 (1.564) data 0.000 (0.002) loss 0.7227 (1.1481) acc 84.3750 (71.1542) lr 1.8090e-03 eta 16:38:38 +epoch [12/50] batch [690/1000] time 1.554 (1.564) data 0.001 (0.002) loss 1.2266 (1.1481) acc 71.8750 (71.1322) lr 1.8090e-03 eta 16:38:30 +epoch [12/50] batch [695/1000] time 1.583 (1.564) data 0.000 (0.002) loss 1.8408 (1.1484) acc 56.2500 (71.0971) lr 1.8090e-03 eta 16:38:22 +epoch [12/50] batch [700/1000] time 1.533 (1.564) data 0.000 (0.002) loss 1.0352 (1.1481) acc 71.8750 (71.1027) lr 1.8090e-03 eta 16:38:08 +epoch [12/50] batch [705/1000] time 1.566 (1.564) data 0.000 (0.002) loss 1.1074 (1.1459) acc 71.8750 (71.1303) lr 1.8090e-03 eta 16:38:01 +epoch [12/50] batch [710/1000] time 1.569 (1.564) data 0.000 (0.002) loss 1.2988 (1.1463) acc 75.0000 (71.1136) lr 1.8090e-03 eta 16:37:54 +epoch [12/50] batch [715/1000] time 1.532 (1.564) data 0.000 (0.002) loss 1.0742 (1.1469) acc 62.5000 (71.0839) lr 1.8090e-03 eta 16:37:40 +epoch [12/50] batch [720/1000] time 1.556 (1.563) data 0.000 (0.002) loss 1.0020 (1.1463) acc 68.7500 (71.0851) lr 1.8090e-03 eta 16:37:29 +epoch [12/50] batch [725/1000] time 1.584 (1.563) data 0.000 (0.002) loss 1.2529 (1.1462) acc 65.6250 (71.0517) lr 1.8090e-03 eta 16:37:22 +epoch [12/50] batch [730/1000] time 1.548 (1.563) data 0.001 (0.002) loss 0.9678 (1.1455) acc 68.7500 (71.0616) lr 1.8090e-03 eta 16:37:11 +epoch [12/50] batch [735/1000] time 1.547 (1.563) data 0.001 (0.002) loss 1.1768 (1.1445) acc 71.8750 (71.0884) lr 1.8090e-03 eta 16:37:01 +epoch [12/50] batch [740/1000] time 1.587 (1.563) data 0.000 (0.002) loss 1.6523 (1.1464) acc 65.6250 (71.0684) lr 1.8090e-03 eta 16:36:52 +epoch [12/50] batch [745/1000] time 1.557 (1.564) data 0.000 (0.002) loss 1.3672 (1.1454) acc 56.2500 (71.0738) lr 1.8090e-03 eta 16:36:52 +epoch [12/50] batch [750/1000] time 1.557 (1.563) data 0.000 (0.002) loss 0.9277 (1.1461) acc 71.8750 (71.0750) lr 1.8090e-03 eta 16:36:42 +epoch [12/50] batch [755/1000] time 1.537 (1.563) data 0.000 (0.002) loss 2.0430 (1.1474) acc 62.5000 (71.0348) lr 1.8090e-03 eta 16:36:33 +epoch [12/50] batch [760/1000] time 1.557 (1.563) data 0.000 (0.002) loss 1.2441 (1.1462) acc 68.7500 (71.0567) lr 1.8090e-03 eta 16:36:23 +epoch [12/50] batch [765/1000] time 1.560 (1.563) data 0.000 (0.002) loss 1.2500 (1.1464) acc 71.8750 (71.0825) lr 1.8090e-03 eta 16:36:13 +epoch [12/50] batch [770/1000] time 1.552 (1.563) data 0.000 (0.002) loss 1.0439 (1.1461) acc 75.0000 (71.0917) lr 1.8090e-03 eta 16:36:02 +epoch [12/50] batch [775/1000] time 1.568 (1.563) data 0.000 (0.002) loss 0.3813 (1.1455) acc 87.5000 (71.1008) lr 1.8090e-03 eta 16:35:55 +epoch [12/50] batch [780/1000] time 1.572 (1.563) data 0.000 (0.002) loss 1.1787 (1.1467) acc 62.5000 (71.0737) lr 1.8090e-03 eta 16:35:44 +epoch [12/50] batch [785/1000] time 1.561 (1.563) data 0.000 (0.002) loss 0.8540 (1.1479) acc 65.6250 (71.0549) lr 1.8090e-03 eta 16:35:36 +epoch [12/50] batch [790/1000] time 1.546 (1.563) data 0.000 (0.002) loss 0.7944 (1.1480) acc 78.1250 (71.0522) lr 1.8090e-03 eta 16:35:35 +epoch [12/50] batch [795/1000] time 1.574 (1.563) data 0.000 (0.002) loss 0.6699 (1.1477) acc 84.3750 (71.0731) lr 1.8090e-03 eta 16:35:27 +epoch [12/50] batch [800/1000] time 1.553 (1.563) data 0.000 (0.002) loss 1.2295 (1.1489) acc 71.8750 (71.0352) lr 1.8090e-03 eta 16:35:17 +epoch [12/50] batch [805/1000] time 1.561 (1.563) data 0.000 (0.002) loss 1.1006 (1.1491) acc 84.3750 (71.0287) lr 1.8090e-03 eta 16:35:07 +epoch [12/50] batch [810/1000] time 1.553 (1.563) data 0.000 (0.002) loss 0.9941 (1.1493) acc 68.7500 (71.0301) lr 1.8090e-03 eta 16:34:57 +epoch [12/50] batch [815/1000] time 1.568 (1.563) data 0.000 (0.002) loss 0.9824 (1.1492) acc 75.0000 (71.0506) lr 1.8090e-03 eta 16:34:50 +epoch [12/50] batch [820/1000] time 1.580 (1.563) data 0.000 (0.001) loss 0.8350 (1.1504) acc 81.2500 (71.0442) lr 1.8090e-03 eta 16:34:41 +epoch [12/50] batch [825/1000] time 1.577 (1.563) data 0.000 (0.001) loss 0.5107 (1.1499) acc 87.5000 (71.0530) lr 1.8090e-03 eta 16:34:36 +epoch [12/50] batch [830/1000] time 1.546 (1.563) data 0.000 (0.001) loss 1.1455 (1.1502) acc 65.6250 (71.0354) lr 1.8090e-03 eta 16:34:27 +epoch [12/50] batch [835/1000] time 1.580 (1.563) data 0.001 (0.001) loss 1.4365 (1.1503) acc 71.8750 (71.0479) lr 1.8090e-03 eta 16:34:28 +epoch [12/50] batch [840/1000] time 1.580 (1.563) data 0.000 (0.001) loss 0.8628 (1.1500) acc 78.1250 (71.0491) lr 1.8090e-03 eta 16:34:20 +epoch [12/50] batch [845/1000] time 1.558 (1.563) data 0.000 (0.001) loss 0.9658 (1.1501) acc 68.7500 (71.0503) lr 1.8090e-03 eta 16:34:11 +epoch [12/50] batch [850/1000] time 1.595 (1.563) data 0.000 (0.001) loss 1.3271 (1.1499) acc 68.7500 (71.0735) lr 1.8090e-03 eta 16:34:04 +epoch [12/50] batch [855/1000] time 1.578 (1.563) data 0.000 (0.001) loss 0.4263 (1.1487) acc 87.5000 (71.1001) lr 1.8090e-03 eta 16:33:58 +epoch [12/50] batch [860/1000] time 1.570 (1.563) data 0.000 (0.001) loss 1.0918 (1.1486) acc 68.7500 (71.1083) lr 1.8090e-03 eta 16:33:47 +epoch [12/50] batch [865/1000] time 1.565 (1.563) data 0.000 (0.001) loss 1.0107 (1.1485) acc 75.0000 (71.1163) lr 1.8090e-03 eta 16:33:38 +epoch [12/50] batch [870/1000] time 1.533 (1.563) data 0.000 (0.001) loss 1.2764 (1.1500) acc 65.6250 (71.0848) lr 1.8090e-03 eta 16:33:27 +epoch [12/50] batch [875/1000] time 1.540 (1.563) data 0.000 (0.001) loss 0.9922 (1.1499) acc 71.8750 (71.1071) lr 1.8090e-03 eta 16:33:15 +epoch [12/50] batch [880/1000] time 1.574 (1.563) data 0.001 (0.001) loss 1.7539 (1.1523) acc 53.1250 (71.0618) lr 1.8090e-03 eta 16:33:06 +epoch [12/50] batch [885/1000] time 1.544 (1.563) data 0.000 (0.001) loss 0.7427 (1.1519) acc 71.8750 (71.0523) lr 1.8090e-03 eta 16:32:57 +epoch [12/50] batch [890/1000] time 1.534 (1.563) data 0.000 (0.001) loss 1.3320 (1.1521) acc 75.0000 (71.0674) lr 1.8090e-03 eta 16:32:48 +epoch [12/50] batch [895/1000] time 1.549 (1.563) data 0.000 (0.001) loss 0.8062 (1.1514) acc 81.2500 (71.0824) lr 1.8090e-03 eta 16:32:43 +epoch [12/50] batch [900/1000] time 1.571 (1.563) data 0.000 (0.001) loss 0.6118 (1.1504) acc 84.3750 (71.1076) lr 1.8090e-03 eta 16:32:35 +epoch [12/50] batch [905/1000] time 1.587 (1.563) data 0.000 (0.001) loss 2.1855 (1.1514) acc 53.1250 (71.0946) lr 1.8090e-03 eta 16:32:27 +epoch [12/50] batch [910/1000] time 1.563 (1.563) data 0.001 (0.001) loss 1.1270 (1.1499) acc 65.6250 (71.1023) lr 1.8090e-03 eta 16:32:15 +epoch [12/50] batch [915/1000] time 1.574 (1.563) data 0.000 (0.001) loss 1.0342 (1.1496) acc 78.1250 (71.1031) lr 1.8090e-03 eta 16:32:07 +epoch [12/50] batch [920/1000] time 1.572 (1.563) data 0.001 (0.001) loss 1.4932 (1.1498) acc 68.7500 (71.1107) lr 1.8090e-03 eta 16:31:58 +epoch [12/50] batch [925/1000] time 1.559 (1.563) data 0.000 (0.001) loss 2.1035 (1.1513) acc 43.7500 (71.0878) lr 1.8090e-03 eta 16:31:49 +epoch [12/50] batch [930/1000] time 1.552 (1.563) data 0.001 (0.001) loss 1.9180 (1.1523) acc 62.5000 (71.0753) lr 1.8090e-03 eta 16:31:41 +epoch [12/50] batch [935/1000] time 1.569 (1.563) data 0.000 (0.001) loss 0.8896 (1.1517) acc 75.0000 (71.0896) lr 1.8090e-03 eta 16:31:31 +epoch [12/50] batch [940/1000] time 1.571 (1.563) data 0.000 (0.001) loss 1.0938 (1.1527) acc 71.8750 (71.0705) lr 1.8090e-03 eta 16:31:29 +epoch [12/50] batch [945/1000] time 1.539 (1.563) data 0.000 (0.001) loss 0.6963 (1.1531) acc 78.1250 (71.0681) lr 1.8090e-03 eta 16:31:19 +epoch [12/50] batch [950/1000] time 1.556 (1.563) data 0.000 (0.001) loss 1.0820 (1.1527) acc 68.7500 (71.0888) lr 1.8090e-03 eta 16:31:11 +epoch [12/50] batch [955/1000] time 1.532 (1.563) data 0.000 (0.001) loss 0.9351 (1.1526) acc 81.2500 (71.1060) lr 1.8090e-03 eta 16:31:02 +epoch [12/50] batch [960/1000] time 1.571 (1.563) data 0.000 (0.001) loss 1.0498 (1.1527) acc 68.7500 (71.0905) lr 1.8090e-03 eta 16:30:53 +epoch [12/50] batch [965/1000] time 1.545 (1.563) data 0.000 (0.001) loss 1.2188 (1.1522) acc 65.6250 (71.0978) lr 1.8090e-03 eta 16:30:44 +epoch [12/50] batch [970/1000] time 1.575 (1.563) data 0.000 (0.001) loss 0.6641 (1.1519) acc 84.3750 (71.0986) lr 1.8090e-03 eta 16:30:34 +epoch [12/50] batch [975/1000] time 1.545 (1.563) data 0.000 (0.001) loss 0.7173 (1.1508) acc 81.2500 (71.1314) lr 1.8090e-03 eta 16:30:26 +epoch [12/50] batch [980/1000] time 1.563 (1.563) data 0.000 (0.001) loss 0.9443 (1.1503) acc 68.7500 (71.1320) lr 1.8090e-03 eta 16:30:17 +epoch [12/50] batch [985/1000] time 1.554 (1.563) data 0.001 (0.001) loss 0.9248 (1.1499) acc 78.1250 (71.1326) lr 1.8090e-03 eta 16:30:10 +epoch [12/50] batch [990/1000] time 1.552 (1.563) data 0.000 (0.001) loss 0.8105 (1.1497) acc 71.8750 (71.1364) lr 1.8090e-03 eta 16:30:00 +epoch [12/50] batch [995/1000] time 1.575 (1.563) data 0.000 (0.001) loss 1.7764 (1.1502) acc 65.6250 (71.1212) lr 1.8090e-03 eta 16:29:53 +epoch [12/50] batch [1000/1000] time 1.546 (1.563) data 0.000 (0.001) loss 1.9102 (1.1511) acc 62.5000 (71.1125) lr 1.7705e-03 eta 16:29:42 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,060 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.6% +epoch [13/50] batch [5/1000] time 1.556 (1.672) data 0.000 (0.179) loss 1.1758 (1.2706) acc 59.3750 (66.8750) lr 1.7705e-03 eta 17:38:46 +epoch [13/50] batch [10/1000] time 1.545 (1.616) data 0.001 (0.090) loss 1.1934 (1.1813) acc 81.2500 (70.9375) lr 1.7705e-03 eta 17:03:24 +epoch [13/50] batch [15/1000] time 1.560 (1.596) data 0.000 (0.060) loss 1.3535 (1.2141) acc 75.0000 (71.4583) lr 1.7705e-03 eta 16:50:13 +epoch [13/50] batch [20/1000] time 1.544 (1.582) data 0.000 (0.045) loss 0.6431 (1.1523) acc 71.8750 (71.8750) lr 1.7705e-03 eta 16:41:37 +epoch [13/50] batch [25/1000] time 1.573 (1.579) data 0.001 (0.036) loss 0.9165 (1.1435) acc 75.0000 (72.6250) lr 1.7705e-03 eta 16:39:35 +epoch [13/50] batch [30/1000] time 1.563 (1.576) data 0.000 (0.030) loss 1.3311 (1.1584) acc 68.7500 (72.0833) lr 1.7705e-03 eta 16:37:13 +epoch [13/50] batch [35/1000] time 1.599 (1.582) data 0.001 (0.026) loss 1.0977 (1.1639) acc 78.1250 (72.0536) lr 1.7705e-03 eta 16:40:57 +epoch [13/50] batch [40/1000] time 1.559 (1.579) data 0.000 (0.023) loss 1.8428 (1.1987) acc 46.8750 (71.0938) lr 1.7705e-03 eta 16:39:00 +epoch [13/50] batch [45/1000] time 1.533 (1.577) data 0.000 (0.020) loss 1.4238 (1.1940) acc 62.5000 (70.6250) lr 1.7705e-03 eta 16:37:45 +epoch [13/50] batch [50/1000] time 1.542 (1.575) data 0.000 (0.018) loss 0.8569 (1.1821) acc 75.0000 (71.0000) lr 1.7705e-03 eta 16:36:00 +epoch [13/50] batch [55/1000] time 1.547 (1.572) data 0.001 (0.017) loss 1.6562 (1.1714) acc 71.8750 (71.2500) lr 1.7705e-03 eta 16:34:06 +epoch [13/50] batch [60/1000] time 1.563 (1.571) data 0.000 (0.015) loss 1.9170 (1.1790) acc 62.5000 (71.1979) lr 1.7705e-03 eta 16:33:24 +epoch [13/50] batch [65/1000] time 1.573 (1.571) data 0.000 (0.014) loss 1.0596 (1.1747) acc 75.0000 (71.2981) lr 1.7705e-03 eta 16:32:58 +epoch [13/50] batch [70/1000] time 1.552 (1.569) data 0.001 (0.013) loss 0.9966 (1.1773) acc 78.1250 (71.1607) lr 1.7705e-03 eta 16:31:51 +epoch [13/50] batch [75/1000] time 1.550 (1.568) data 0.000 (0.012) loss 1.8125 (1.1852) acc 71.8750 (71.3333) lr 1.7705e-03 eta 16:31:20 +epoch [13/50] batch [80/1000] time 1.543 (1.569) data 0.000 (0.012) loss 1.4775 (1.1924) acc 62.5000 (71.0156) lr 1.7705e-03 eta 16:31:29 +epoch [13/50] batch [85/1000] time 1.536 (1.567) data 0.000 (0.011) loss 1.1699 (1.1927) acc 68.7500 (70.9926) lr 1.7705e-03 eta 16:30:21 +epoch [13/50] batch [90/1000] time 1.584 (1.567) data 0.000 (0.010) loss 0.8716 (1.1928) acc 78.1250 (71.2847) lr 1.7705e-03 eta 16:29:52 +epoch [13/50] batch [95/1000] time 1.565 (1.566) data 0.000 (0.010) loss 0.9990 (1.1843) acc 81.2500 (71.3487) lr 1.7705e-03 eta 16:29:29 +epoch [13/50] batch [100/1000] time 1.565 (1.566) data 0.001 (0.009) loss 0.5591 (1.1821) acc 87.5000 (71.4375) lr 1.7705e-03 eta 16:29:01 +epoch [13/50] batch [105/1000] time 1.540 (1.565) data 0.000 (0.009) loss 0.7686 (1.1821) acc 81.2500 (71.3393) lr 1.7705e-03 eta 16:28:36 +epoch [13/50] batch [110/1000] time 1.532 (1.565) data 0.001 (0.009) loss 1.5615 (1.1844) acc 65.6250 (71.3068) lr 1.7705e-03 eta 16:28:09 +epoch [13/50] batch [115/1000] time 1.558 (1.564) data 0.000 (0.008) loss 1.2188 (1.1842) acc 65.6250 (71.3587) lr 1.7705e-03 eta 16:27:41 +epoch [13/50] batch [120/1000] time 1.734 (1.565) data 0.000 (0.008) loss 0.8877 (1.1723) acc 81.2500 (71.5885) lr 1.7705e-03 eta 16:28:12 +epoch [13/50] batch [125/1000] time 1.563 (1.565) data 0.000 (0.008) loss 0.4519 (1.1713) acc 84.3750 (71.5750) lr 1.7705e-03 eta 16:27:44 +epoch [13/50] batch [130/1000] time 1.547 (1.564) data 0.000 (0.007) loss 1.3594 (1.1743) acc 68.7500 (71.2981) lr 1.7705e-03 eta 16:27:27 +epoch [13/50] batch [135/1000] time 1.560 (1.565) data 0.001 (0.007) loss 1.0449 (1.1642) acc 71.8750 (71.5509) lr 1.7705e-03 eta 16:27:20 +epoch [13/50] batch [140/1000] time 1.540 (1.564) data 0.000 (0.007) loss 1.0869 (1.1723) acc 71.8750 (71.5179) lr 1.7705e-03 eta 16:26:55 +epoch [13/50] batch [145/1000] time 1.566 (1.564) data 0.000 (0.007) loss 0.5913 (1.1602) acc 78.1250 (71.6595) lr 1.7705e-03 eta 16:26:43 +epoch [13/50] batch [150/1000] time 1.546 (1.564) data 0.000 (0.006) loss 0.6382 (1.1607) acc 84.3750 (71.6875) lr 1.7705e-03 eta 16:26:28 +epoch [13/50] batch [155/1000] time 1.541 (1.564) data 0.000 (0.006) loss 1.2764 (1.1614) acc 65.6250 (71.6331) lr 1.7705e-03 eta 16:26:22 +epoch [13/50] batch [160/1000] time 1.576 (1.564) data 0.000 (0.006) loss 0.7559 (1.1662) acc 78.1250 (71.4453) lr 1.7705e-03 eta 16:26:18 +epoch [13/50] batch [165/1000] time 1.535 (1.564) data 0.000 (0.006) loss 0.6499 (1.1609) acc 81.2500 (71.5341) lr 1.7705e-03 eta 16:25:55 +epoch [13/50] batch [170/1000] time 1.546 (1.563) data 0.000 (0.006) loss 1.0791 (1.1634) acc 65.6250 (71.4338) lr 1.7705e-03 eta 16:25:44 +epoch [13/50] batch [175/1000] time 1.564 (1.563) data 0.001 (0.006) loss 0.8242 (1.1627) acc 75.0000 (71.4821) lr 1.7705e-03 eta 16:25:24 +epoch [13/50] batch [180/1000] time 1.554 (1.563) data 0.000 (0.005) loss 1.3691 (1.1611) acc 62.5000 (71.4931) lr 1.7705e-03 eta 16:25:11 +epoch [13/50] batch [185/1000] time 1.564 (1.564) data 0.000 (0.005) loss 1.7041 (1.1614) acc 68.7500 (71.5372) lr 1.7705e-03 eta 16:25:54 +epoch [13/50] batch [190/1000] time 1.552 (1.564) data 0.001 (0.005) loss 1.0547 (1.1569) acc 75.0000 (71.7105) lr 1.7705e-03 eta 16:25:38 +epoch [13/50] batch [195/1000] time 1.534 (1.564) data 0.000 (0.005) loss 0.8120 (1.1589) acc 68.7500 (71.6827) lr 1.7705e-03 eta 16:25:21 +epoch [13/50] batch [200/1000] time 1.589 (1.564) data 0.000 (0.005) loss 1.1943 (1.1648) acc 68.7500 (71.4375) lr 1.7705e-03 eta 16:25:06 +epoch [13/50] batch [205/1000] time 1.567 (1.564) data 0.000 (0.005) loss 0.5151 (1.1582) acc 96.8750 (71.5396) lr 1.7705e-03 eta 16:24:59 +epoch [13/50] batch [210/1000] time 1.547 (1.563) data 0.000 (0.005) loss 0.8154 (1.1546) acc 81.2500 (71.5625) lr 1.7705e-03 eta 16:24:37 +epoch [13/50] batch [215/1000] time 1.532 (1.563) data 0.000 (0.005) loss 0.8916 (1.1476) acc 78.1250 (71.5988) lr 1.7705e-03 eta 16:24:17 +epoch [13/50] batch [220/1000] time 1.546 (1.563) data 0.000 (0.005) loss 0.8618 (1.1430) acc 81.2500 (71.7188) lr 1.7705e-03 eta 16:23:52 +epoch [13/50] batch [225/1000] time 1.566 (1.562) data 0.000 (0.004) loss 1.2021 (1.1509) acc 71.8750 (71.5833) lr 1.7705e-03 eta 16:23:35 +epoch [13/50] batch [230/1000] time 1.552 (1.563) data 0.000 (0.004) loss 2.0586 (1.1546) acc 68.7500 (71.5217) lr 1.7705e-03 eta 16:23:51 +epoch [13/50] batch [235/1000] time 1.574 (1.563) data 0.000 (0.004) loss 1.3604 (1.1532) acc 62.5000 (71.4229) lr 1.7705e-03 eta 16:23:44 +epoch [13/50] batch [240/1000] time 1.563 (1.563) data 0.000 (0.004) loss 1.2852 (1.1515) acc 65.6250 (71.4583) lr 1.7705e-03 eta 16:23:40 +epoch [13/50] batch [245/1000] time 1.550 (1.563) data 0.001 (0.004) loss 1.6777 (1.1514) acc 59.3750 (71.5561) lr 1.7705e-03 eta 16:23:21 +epoch [13/50] batch [250/1000] time 1.560 (1.563) data 0.000 (0.004) loss 1.0469 (1.1540) acc 65.6250 (71.4625) lr 1.7705e-03 eta 16:23:06 +epoch [13/50] batch [255/1000] time 1.576 (1.563) data 0.000 (0.004) loss 1.1758 (1.1542) acc 75.0000 (71.4338) lr 1.7705e-03 eta 16:23:05 +epoch [13/50] batch [260/1000] time 1.538 (1.562) data 0.000 (0.004) loss 0.9990 (1.1549) acc 81.2500 (71.4543) lr 1.7705e-03 eta 16:22:47 +epoch [13/50] batch [265/1000] time 1.550 (1.562) data 0.000 (0.004) loss 1.2344 (1.1610) acc 71.8750 (71.3561) lr 1.7705e-03 eta 16:22:39 +epoch [13/50] batch [270/1000] time 1.565 (1.563) data 0.001 (0.004) loss 1.3604 (1.1621) acc 68.7500 (71.2384) lr 1.7705e-03 eta 16:22:35 +epoch [13/50] batch [275/1000] time 1.552 (1.563) data 0.000 (0.004) loss 1.4629 (1.1676) acc 65.6250 (71.0682) lr 1.7705e-03 eta 16:22:48 +epoch [13/50] batch [280/1000] time 1.545 (1.563) data 0.001 (0.004) loss 1.0625 (1.1654) acc 75.0000 (71.0714) lr 1.7705e-03 eta 16:22:32 +epoch [13/50] batch [285/1000] time 1.565 (1.563) data 0.000 (0.004) loss 0.7271 (1.1612) acc 84.3750 (71.1294) lr 1.7705e-03 eta 16:22:16 +epoch [13/50] batch [290/1000] time 1.550 (1.563) data 0.000 (0.003) loss 1.4463 (1.1637) acc 56.2500 (71.0991) lr 1.7705e-03 eta 16:22:07 +epoch [13/50] batch [295/1000] time 1.552 (1.563) data 0.000 (0.003) loss 0.9507 (1.1615) acc 78.1250 (71.1547) lr 1.7705e-03 eta 16:21:56 +epoch [13/50] batch [300/1000] time 1.563 (1.563) data 0.000 (0.003) loss 0.9165 (1.1622) acc 75.0000 (71.1458) lr 1.7705e-03 eta 16:21:48 +epoch [13/50] batch [305/1000] time 1.568 (1.563) data 0.000 (0.003) loss 1.1836 (1.1644) acc 75.0000 (71.1373) lr 1.7705e-03 eta 16:21:41 +epoch [13/50] batch [310/1000] time 1.552 (1.563) data 0.000 (0.003) loss 1.5459 (1.1661) acc 65.6250 (71.0585) lr 1.7705e-03 eta 16:21:33 +epoch [13/50] batch [315/1000] time 1.553 (1.562) data 0.001 (0.003) loss 1.0850 (1.1610) acc 75.0000 (71.1607) lr 1.7705e-03 eta 16:21:19 +epoch [13/50] batch [320/1000] time 1.564 (1.562) data 0.000 (0.003) loss 0.6333 (1.1600) acc 75.0000 (71.2109) lr 1.7705e-03 eta 16:21:07 +epoch [13/50] batch [325/1000] time 1.580 (1.562) data 0.000 (0.003) loss 1.3203 (1.1615) acc 68.7500 (71.1731) lr 1.7705e-03 eta 16:21:05 +epoch [13/50] batch [330/1000] time 1.565 (1.562) data 0.000 (0.003) loss 1.0879 (1.1588) acc 71.8750 (71.2121) lr 1.7705e-03 eta 16:20:50 +epoch [13/50] batch [335/1000] time 1.565 (1.563) data 0.000 (0.003) loss 0.3965 (1.1552) acc 87.5000 (71.2966) lr 1.7705e-03 eta 16:20:52 +epoch [13/50] batch [340/1000] time 1.572 (1.562) data 0.001 (0.003) loss 1.4189 (1.1544) acc 68.7500 (71.2868) lr 1.7705e-03 eta 16:20:40 +epoch [13/50] batch [345/1000] time 1.567 (1.562) data 0.000 (0.003) loss 0.8232 (1.1554) acc 71.8750 (71.2319) lr 1.7705e-03 eta 16:20:33 +epoch [13/50] batch [350/1000] time 1.568 (1.562) data 0.000 (0.003) loss 1.0840 (1.1593) acc 71.8750 (71.1339) lr 1.7705e-03 eta 16:20:27 +epoch [13/50] batch [355/1000] time 1.552 (1.562) data 0.000 (0.003) loss 1.5029 (1.1605) acc 75.0000 (71.1092) lr 1.7705e-03 eta 16:20:15 +epoch [13/50] batch [360/1000] time 1.562 (1.562) data 0.000 (0.003) loss 1.2529 (1.1606) acc 62.5000 (71.1024) lr 1.7705e-03 eta 16:20:06 +epoch [13/50] batch [365/1000] time 1.550 (1.562) data 0.001 (0.003) loss 0.9658 (1.1584) acc 78.1250 (71.1473) lr 1.7705e-03 eta 16:19:57 +epoch [13/50] batch [370/1000] time 1.575 (1.562) data 0.001 (0.003) loss 1.0801 (1.1565) acc 84.3750 (71.1655) lr 1.7705e-03 eta 16:19:39 +epoch [13/50] batch [375/1000] time 1.536 (1.562) data 0.000 (0.003) loss 0.8145 (1.1553) acc 78.1250 (71.2000) lr 1.7705e-03 eta 16:19:20 +epoch [13/50] batch [380/1000] time 1.571 (1.562) data 0.001 (0.003) loss 1.4434 (1.1560) acc 68.7500 (71.2007) lr 1.7705e-03 eta 16:19:26 +epoch [13/50] batch [385/1000] time 1.532 (1.562) data 0.000 (0.003) loss 0.7988 (1.1559) acc 81.2500 (71.2338) lr 1.7705e-03 eta 16:19:11 +epoch [13/50] batch [390/1000] time 1.558 (1.562) data 0.000 (0.003) loss 0.7983 (1.1552) acc 75.0000 (71.1859) lr 1.7705e-03 eta 16:18:55 +epoch [13/50] batch [395/1000] time 1.545 (1.562) data 0.000 (0.003) loss 1.0264 (1.1538) acc 78.1250 (71.2342) lr 1.7705e-03 eta 16:18:45 +epoch [13/50] batch [400/1000] time 1.554 (1.562) data 0.000 (0.003) loss 1.2529 (1.1585) acc 65.6250 (71.1484) lr 1.7705e-03 eta 16:18:36 +epoch [13/50] batch [405/1000] time 1.560 (1.562) data 0.000 (0.003) loss 0.8525 (1.1577) acc 81.2500 (71.2114) lr 1.7705e-03 eta 16:18:32 +epoch [13/50] batch [410/1000] time 1.547 (1.562) data 0.000 (0.003) loss 0.8477 (1.1572) acc 68.7500 (71.1738) lr 1.7705e-03 eta 16:18:23 +epoch [13/50] batch [415/1000] time 1.565 (1.562) data 0.000 (0.003) loss 1.0547 (1.1583) acc 71.8750 (71.1747) lr 1.7705e-03 eta 16:18:14 +epoch [13/50] batch [420/1000] time 1.554 (1.562) data 0.000 (0.003) loss 0.9370 (1.1572) acc 75.0000 (71.2128) lr 1.7705e-03 eta 16:18:08 +epoch [13/50] batch [425/1000] time 1.566 (1.562) data 0.000 (0.003) loss 1.3330 (1.1574) acc 62.5000 (71.1912) lr 1.7705e-03 eta 16:18:15 +epoch [13/50] batch [430/1000] time 1.528 (1.562) data 0.000 (0.002) loss 0.9497 (1.1582) acc 81.2500 (71.1773) lr 1.7705e-03 eta 16:18:06 +epoch [13/50] batch [435/1000] time 1.547 (1.562) data 0.000 (0.002) loss 0.8120 (1.1573) acc 78.1250 (71.2213) lr 1.7705e-03 eta 16:18:01 +epoch [13/50] batch [440/1000] time 1.576 (1.562) data 0.001 (0.002) loss 1.4785 (1.1570) acc 71.8750 (71.2287) lr 1.7705e-03 eta 16:17:52 +epoch [13/50] batch [445/1000] time 1.563 (1.562) data 0.000 (0.002) loss 0.7153 (1.1549) acc 75.0000 (71.2851) lr 1.7705e-03 eta 16:17:40 +epoch [13/50] batch [450/1000] time 1.588 (1.562) data 0.000 (0.002) loss 1.3633 (1.1563) acc 78.1250 (71.2708) lr 1.7705e-03 eta 16:17:34 +epoch [13/50] batch [455/1000] time 1.561 (1.562) data 0.000 (0.002) loss 1.0576 (1.1576) acc 71.8750 (71.2706) lr 1.7705e-03 eta 16:17:27 +epoch [13/50] batch [460/1000] time 1.583 (1.562) data 0.000 (0.002) loss 1.2529 (1.1576) acc 71.8750 (71.3383) lr 1.7705e-03 eta 16:17:20 +epoch [13/50] batch [465/1000] time 1.549 (1.562) data 0.001 (0.002) loss 1.4395 (1.1589) acc 68.7500 (71.3306) lr 1.7705e-03 eta 16:17:12 +epoch [13/50] batch [470/1000] time 1.580 (1.562) data 0.000 (0.002) loss 0.8301 (1.1607) acc 71.8750 (71.2766) lr 1.7705e-03 eta 16:17:05 +epoch [13/50] batch [475/1000] time 1.573 (1.562) data 0.000 (0.002) loss 0.6846 (1.1616) acc 84.3750 (71.2500) lr 1.7705e-03 eta 16:17:02 +epoch [13/50] batch [480/1000] time 1.564 (1.562) data 0.000 (0.002) loss 1.1680 (1.1604) acc 62.5000 (71.2500) lr 1.7705e-03 eta 16:16:57 +epoch [13/50] batch [485/1000] time 1.731 (1.563) data 0.001 (0.002) loss 0.9165 (1.1603) acc 71.8750 (71.2242) lr 1.7705e-03 eta 16:17:04 +epoch [13/50] batch [490/1000] time 1.566 (1.563) data 0.000 (0.002) loss 0.9868 (1.1592) acc 71.8750 (71.2117) lr 1.7705e-03 eta 16:16:57 +epoch [13/50] batch [495/1000] time 1.560 (1.563) data 0.001 (0.002) loss 1.0576 (1.1619) acc 65.6250 (71.1364) lr 1.7705e-03 eta 16:16:45 +epoch [13/50] batch [500/1000] time 1.572 (1.563) data 0.000 (0.002) loss 1.2900 (1.1630) acc 75.0000 (71.1063) lr 1.7705e-03 eta 16:16:41 +epoch [13/50] batch [505/1000] time 1.590 (1.563) data 0.000 (0.002) loss 0.8945 (1.1603) acc 75.0000 (71.1634) lr 1.7705e-03 eta 16:16:33 +epoch [13/50] batch [510/1000] time 1.541 (1.563) data 0.000 (0.002) loss 1.1953 (1.1598) acc 68.7500 (71.1949) lr 1.7705e-03 eta 16:16:24 +epoch [13/50] batch [515/1000] time 1.533 (1.563) data 0.000 (0.002) loss 0.9019 (1.1592) acc 75.0000 (71.2136) lr 1.7705e-03 eta 16:16:13 +epoch [13/50] batch [520/1000] time 1.549 (1.563) data 0.001 (0.002) loss 1.1279 (1.1580) acc 71.8750 (71.2500) lr 1.7705e-03 eta 16:16:06 +epoch [13/50] batch [525/1000] time 1.556 (1.563) data 0.000 (0.002) loss 0.8892 (1.1556) acc 75.0000 (71.2976) lr 1.7705e-03 eta 16:15:55 +epoch [13/50] batch [530/1000] time 1.726 (1.563) data 0.001 (0.002) loss 0.4480 (1.1529) acc 78.1250 (71.3384) lr 1.7705e-03 eta 16:16:00 +epoch [13/50] batch [535/1000] time 1.553 (1.563) data 0.001 (0.002) loss 1.5293 (1.1524) acc 59.3750 (71.3376) lr 1.7705e-03 eta 16:15:50 +epoch [13/50] batch [540/1000] time 1.554 (1.563) data 0.000 (0.002) loss 1.0322 (1.1518) acc 68.7500 (71.3252) lr 1.7705e-03 eta 16:15:41 +epoch [13/50] batch [545/1000] time 1.556 (1.563) data 0.000 (0.002) loss 1.1572 (1.1516) acc 62.5000 (71.3245) lr 1.7705e-03 eta 16:15:26 +epoch [13/50] batch [550/1000] time 1.542 (1.562) data 0.000 (0.002) loss 1.3359 (1.1545) acc 62.5000 (71.2330) lr 1.7705e-03 eta 16:15:13 +epoch [13/50] batch [555/1000] time 1.600 (1.562) data 0.000 (0.002) loss 1.1914 (1.1555) acc 68.7500 (71.2331) lr 1.7705e-03 eta 16:15:03 +epoch [13/50] batch [560/1000] time 1.537 (1.562) data 0.001 (0.002) loss 0.9785 (1.1551) acc 68.7500 (71.2333) lr 1.7705e-03 eta 16:14:49 +epoch [13/50] batch [565/1000] time 1.549 (1.562) data 0.000 (0.002) loss 0.6260 (1.1542) acc 84.3750 (71.2389) lr 1.7705e-03 eta 16:14:39 +epoch [13/50] batch [570/1000] time 1.554 (1.562) data 0.000 (0.002) loss 0.7896 (1.1531) acc 81.2500 (71.2445) lr 1.7705e-03 eta 16:14:26 +epoch [13/50] batch [575/1000] time 1.548 (1.562) data 0.000 (0.002) loss 0.7373 (1.1532) acc 75.0000 (71.2391) lr 1.7705e-03 eta 16:14:23 +epoch [13/50] batch [580/1000] time 1.554 (1.562) data 0.000 (0.002) loss 1.6318 (1.1518) acc 75.0000 (71.2877) lr 1.7705e-03 eta 16:14:10 +epoch [13/50] batch [585/1000] time 1.561 (1.562) data 0.001 (0.002) loss 1.1445 (1.1502) acc 62.5000 (71.2821) lr 1.7705e-03 eta 16:13:58 +epoch [13/50] batch [590/1000] time 1.558 (1.562) data 0.000 (0.002) loss 0.9761 (1.1501) acc 75.0000 (71.2765) lr 1.7705e-03 eta 16:13:46 +epoch [13/50] batch [595/1000] time 1.557 (1.562) data 0.000 (0.002) loss 1.2998 (1.1498) acc 75.0000 (71.3288) lr 1.7705e-03 eta 16:13:36 +epoch [13/50] batch [600/1000] time 1.548 (1.562) data 0.000 (0.002) loss 1.4590 (1.1496) acc 62.5000 (71.3385) lr 1.7705e-03 eta 16:13:25 +epoch [13/50] batch [605/1000] time 1.572 (1.562) data 0.000 (0.002) loss 0.9556 (1.1493) acc 71.8750 (71.3533) lr 1.7705e-03 eta 16:13:20 +epoch [13/50] batch [610/1000] time 1.545 (1.562) data 0.000 (0.002) loss 0.5884 (1.1493) acc 81.2500 (71.3627) lr 1.7705e-03 eta 16:13:07 +epoch [13/50] batch [615/1000] time 1.570 (1.562) data 0.000 (0.002) loss 1.2227 (1.1488) acc 71.8750 (71.3821) lr 1.7705e-03 eta 16:13:00 +epoch [13/50] batch [620/1000] time 1.547 (1.562) data 0.000 (0.002) loss 1.3438 (1.1512) acc 59.3750 (71.3306) lr 1.7705e-03 eta 16:12:50 +epoch [13/50] batch [625/1000] time 1.582 (1.562) data 0.000 (0.002) loss 1.4609 (1.1511) acc 71.8750 (71.3350) lr 1.7705e-03 eta 16:12:44 +epoch [13/50] batch [630/1000] time 1.546 (1.562) data 0.000 (0.002) loss 1.4883 (1.1522) acc 65.6250 (71.3046) lr 1.7705e-03 eta 16:12:34 +epoch [13/50] batch [635/1000] time 1.566 (1.561) data 0.000 (0.002) loss 1.4209 (1.1512) acc 68.7500 (71.2943) lr 1.7705e-03 eta 16:12:25 +epoch [13/50] batch [640/1000] time 1.519 (1.562) data 0.001 (0.002) loss 1.1729 (1.1491) acc 68.7500 (71.3232) lr 1.7705e-03 eta 16:12:22 +epoch [13/50] batch [645/1000] time 1.559 (1.562) data 0.000 (0.002) loss 1.2158 (1.1492) acc 75.0000 (71.3275) lr 1.7705e-03 eta 16:12:12 +epoch [13/50] batch [650/1000] time 1.573 (1.562) data 0.000 (0.002) loss 1.1953 (1.1491) acc 65.6250 (71.3462) lr 1.7705e-03 eta 16:12:05 +epoch [13/50] batch [655/1000] time 1.583 (1.562) data 0.000 (0.002) loss 0.6401 (1.1479) acc 81.2500 (71.3836) lr 1.7705e-03 eta 16:11:58 +epoch [13/50] batch [660/1000] time 1.562 (1.562) data 0.000 (0.002) loss 1.0430 (1.1475) acc 75.0000 (71.3968) lr 1.7705e-03 eta 16:11:51 +epoch [13/50] batch [665/1000] time 1.538 (1.562) data 0.000 (0.002) loss 1.3408 (1.1487) acc 59.3750 (71.3440) lr 1.7705e-03 eta 16:11:39 +epoch [13/50] batch [670/1000] time 1.538 (1.561) data 0.000 (0.002) loss 0.9927 (1.1493) acc 71.8750 (71.3200) lr 1.7705e-03 eta 16:11:29 +epoch [13/50] batch [675/1000] time 1.564 (1.561) data 0.000 (0.002) loss 1.7266 (1.1487) acc 56.2500 (71.3287) lr 1.7705e-03 eta 16:11:19 +epoch [13/50] batch [680/1000] time 1.555 (1.561) data 0.001 (0.002) loss 1.0801 (1.1476) acc 81.2500 (71.3879) lr 1.7705e-03 eta 16:11:10 +epoch [13/50] batch [685/1000] time 1.566 (1.562) data 0.000 (0.002) loss 0.9614 (1.1482) acc 75.0000 (71.3641) lr 1.7705e-03 eta 16:11:12 +epoch [13/50] batch [690/1000] time 1.534 (1.562) data 0.001 (0.002) loss 0.9668 (1.1478) acc 71.8750 (71.3587) lr 1.7705e-03 eta 16:11:03 +epoch [13/50] batch [695/1000] time 1.584 (1.562) data 0.000 (0.002) loss 0.9473 (1.1480) acc 75.0000 (71.3804) lr 1.7705e-03 eta 16:10:55 +epoch [13/50] batch [700/1000] time 1.545 (1.561) data 0.001 (0.002) loss 1.4941 (1.1501) acc 65.6250 (71.3348) lr 1.7705e-03 eta 16:10:43 +epoch [13/50] batch [705/1000] time 1.552 (1.561) data 0.000 (0.002) loss 0.9727 (1.1494) acc 68.7500 (71.3652) lr 1.7705e-03 eta 16:10:33 +epoch [13/50] batch [710/1000] time 1.561 (1.562) data 0.000 (0.002) loss 1.0391 (1.1498) acc 84.3750 (71.3468) lr 1.7705e-03 eta 16:10:28 +epoch [13/50] batch [715/1000] time 1.539 (1.561) data 0.000 (0.002) loss 1.0859 (1.1500) acc 78.1250 (71.3855) lr 1.7705e-03 eta 16:10:17 +epoch [13/50] batch [720/1000] time 1.547 (1.561) data 0.000 (0.002) loss 1.4131 (1.1500) acc 62.5000 (71.3759) lr 1.7705e-03 eta 16:10:06 +epoch [13/50] batch [725/1000] time 1.530 (1.562) data 0.000 (0.002) loss 1.4404 (1.1500) acc 65.6250 (71.3664) lr 1.7705e-03 eta 16:10:06 +epoch [13/50] batch [730/1000] time 1.538 (1.561) data 0.000 (0.002) loss 0.6240 (1.1484) acc 84.3750 (71.4127) lr 1.7705e-03 eta 16:09:56 +epoch [13/50] batch [735/1000] time 1.547 (1.561) data 0.001 (0.002) loss 1.4941 (1.1492) acc 75.0000 (71.4371) lr 1.7705e-03 eta 16:09:44 +epoch [13/50] batch [740/1000] time 1.544 (1.561) data 0.000 (0.002) loss 1.5469 (1.1496) acc 68.7500 (71.4231) lr 1.7705e-03 eta 16:09:36 +epoch [13/50] batch [745/1000] time 1.540 (1.561) data 0.001 (0.002) loss 0.9854 (1.1486) acc 75.0000 (71.4136) lr 1.7705e-03 eta 16:09:28 +epoch [13/50] batch [750/1000] time 1.558 (1.561) data 0.000 (0.002) loss 0.9473 (1.1480) acc 81.2500 (71.4250) lr 1.7705e-03 eta 16:09:20 +epoch [13/50] batch [755/1000] time 1.564 (1.561) data 0.001 (0.002) loss 1.2441 (1.1482) acc 68.7500 (71.4280) lr 1.7705e-03 eta 16:09:14 +epoch [13/50] batch [760/1000] time 1.534 (1.561) data 0.001 (0.002) loss 0.9126 (1.1476) acc 78.1250 (71.4433) lr 1.7705e-03 eta 16:09:08 +epoch [13/50] batch [765/1000] time 1.614 (1.562) data 0.000 (0.002) loss 1.2803 (1.1461) acc 68.7500 (71.4747) lr 1.7705e-03 eta 16:09:03 +epoch [13/50] batch [770/1000] time 1.557 (1.562) data 0.000 (0.002) loss 1.7100 (1.1460) acc 56.2500 (71.4570) lr 1.7705e-03 eta 16:08:56 +epoch [13/50] batch [775/1000] time 1.572 (1.562) data 0.000 (0.002) loss 0.7065 (1.1446) acc 78.1250 (71.4879) lr 1.7705e-03 eta 16:08:47 +epoch [13/50] batch [780/1000] time 1.558 (1.561) data 0.000 (0.002) loss 0.7822 (1.1446) acc 87.5000 (71.5064) lr 1.7705e-03 eta 16:08:37 +epoch [13/50] batch [785/1000] time 1.575 (1.562) data 0.000 (0.002) loss 0.7422 (1.1446) acc 75.0000 (71.4769) lr 1.7705e-03 eta 16:08:32 +epoch [13/50] batch [790/1000] time 1.581 (1.562) data 0.000 (0.002) loss 0.8086 (1.1445) acc 75.0000 (71.4834) lr 1.7705e-03 eta 16:08:32 +epoch [13/50] batch [795/1000] time 1.579 (1.562) data 0.000 (0.002) loss 0.9785 (1.1439) acc 68.7500 (71.4937) lr 1.7705e-03 eta 16:08:24 +epoch [13/50] batch [800/1000] time 1.575 (1.562) data 0.000 (0.002) loss 1.3262 (1.1441) acc 71.8750 (71.4805) lr 1.7705e-03 eta 16:08:18 +epoch [13/50] batch [805/1000] time 1.557 (1.562) data 0.000 (0.002) loss 1.1240 (1.1446) acc 71.8750 (71.4713) lr 1.7705e-03 eta 16:08:09 +epoch [13/50] batch [810/1000] time 1.580 (1.562) data 0.000 (0.002) loss 1.2295 (1.1450) acc 65.6250 (71.4699) lr 1.7705e-03 eta 16:08:02 +epoch [13/50] batch [815/1000] time 1.545 (1.562) data 0.000 (0.002) loss 1.1777 (1.1459) acc 68.7500 (71.4647) lr 1.7705e-03 eta 16:07:55 +epoch [13/50] batch [820/1000] time 1.547 (1.562) data 0.000 (0.002) loss 0.6709 (1.1474) acc 84.3750 (71.4177) lr 1.7705e-03 eta 16:07:46 +epoch [13/50] batch [825/1000] time 1.550 (1.562) data 0.001 (0.002) loss 0.9688 (1.1474) acc 81.2500 (71.4356) lr 1.7705e-03 eta 16:07:42 +epoch [13/50] batch [830/1000] time 1.559 (1.562) data 0.000 (0.001) loss 1.0547 (1.1475) acc 78.1250 (71.4194) lr 1.7705e-03 eta 16:07:35 +epoch [13/50] batch [835/1000] time 1.574 (1.562) data 0.000 (0.001) loss 1.3965 (1.1491) acc 68.7500 (71.3698) lr 1.7705e-03 eta 16:07:35 +epoch [13/50] batch [840/1000] time 1.552 (1.562) data 0.001 (0.001) loss 0.9365 (1.1487) acc 71.8750 (71.3876) lr 1.7705e-03 eta 16:07:24 +epoch [13/50] batch [845/1000] time 1.539 (1.562) data 0.000 (0.001) loss 1.4873 (1.1484) acc 62.5000 (71.3720) lr 1.7705e-03 eta 16:07:14 +epoch [13/50] batch [850/1000] time 1.558 (1.562) data 0.000 (0.001) loss 1.4482 (1.1477) acc 62.5000 (71.3713) lr 1.7705e-03 eta 16:07:05 +epoch [13/50] batch [855/1000] time 1.565 (1.562) data 0.001 (0.001) loss 1.4531 (1.1486) acc 62.5000 (71.3852) lr 1.7705e-03 eta 16:06:56 +epoch [13/50] batch [860/1000] time 1.539 (1.562) data 0.000 (0.001) loss 0.9126 (1.1481) acc 71.8750 (71.4062) lr 1.7705e-03 eta 16:06:47 +epoch [13/50] batch [865/1000] time 1.569 (1.562) data 0.000 (0.001) loss 1.2686 (1.1482) acc 75.0000 (71.4234) lr 1.7705e-03 eta 16:06:39 +epoch [13/50] batch [870/1000] time 1.542 (1.562) data 0.000 (0.001) loss 0.9014 (1.1479) acc 71.8750 (71.4296) lr 1.7705e-03 eta 16:06:30 +epoch [13/50] batch [875/1000] time 1.717 (1.562) data 0.001 (0.001) loss 1.4736 (1.1478) acc 65.6250 (71.4214) lr 1.7705e-03 eta 16:06:29 +epoch [13/50] batch [880/1000] time 1.544 (1.562) data 0.000 (0.001) loss 0.7451 (1.1491) acc 84.3750 (71.4240) lr 1.7705e-03 eta 16:06:20 +epoch [13/50] batch [885/1000] time 1.558 (1.562) data 0.000 (0.001) loss 0.9585 (1.1489) acc 84.3750 (71.4301) lr 1.7705e-03 eta 16:06:11 +epoch [13/50] batch [890/1000] time 1.545 (1.562) data 0.000 (0.001) loss 0.5708 (1.1483) acc 81.2500 (71.4221) lr 1.7705e-03 eta 16:06:01 +epoch [13/50] batch [895/1000] time 1.529 (1.562) data 0.001 (0.001) loss 0.6533 (1.1470) acc 81.2500 (71.4420) lr 1.7705e-03 eta 16:05:51 +epoch [13/50] batch [900/1000] time 1.576 (1.562) data 0.000 (0.001) loss 1.0654 (1.1471) acc 68.7500 (71.4236) lr 1.7705e-03 eta 16:05:43 +epoch [13/50] batch [905/1000] time 1.559 (1.562) data 0.001 (0.001) loss 1.2686 (1.1477) acc 71.8750 (71.4054) lr 1.7705e-03 eta 16:05:34 +epoch [13/50] batch [910/1000] time 1.572 (1.562) data 0.001 (0.001) loss 1.2529 (1.1479) acc 62.5000 (71.3874) lr 1.7705e-03 eta 16:05:26 +epoch [13/50] batch [915/1000] time 1.570 (1.562) data 0.000 (0.001) loss 1.4404 (1.1483) acc 68.7500 (71.3900) lr 1.7705e-03 eta 16:05:19 +epoch [13/50] batch [920/1000] time 1.556 (1.562) data 0.000 (0.001) loss 1.1582 (1.1479) acc 68.7500 (71.4164) lr 1.7705e-03 eta 16:05:12 +epoch [13/50] batch [925/1000] time 1.537 (1.562) data 0.000 (0.001) loss 1.2842 (1.1495) acc 68.7500 (71.4020) lr 1.7705e-03 eta 16:05:04 +epoch [13/50] batch [930/1000] time 1.570 (1.562) data 0.000 (0.001) loss 0.8091 (1.1489) acc 75.0000 (71.3978) lr 1.7705e-03 eta 16:04:56 +epoch [13/50] batch [935/1000] time 1.551 (1.562) data 0.000 (0.001) loss 1.0781 (1.1484) acc 81.2500 (71.4372) lr 1.7705e-03 eta 16:04:48 +epoch [13/50] batch [940/1000] time 1.563 (1.562) data 0.001 (0.001) loss 1.7656 (1.1480) acc 59.3750 (71.4328) lr 1.7705e-03 eta 16:04:46 +epoch [13/50] batch [945/1000] time 1.551 (1.562) data 0.000 (0.001) loss 1.0908 (1.1471) acc 65.6250 (71.4253) lr 1.7705e-03 eta 16:04:35 +epoch [13/50] batch [950/1000] time 1.575 (1.562) data 0.000 (0.001) loss 0.9189 (1.1454) acc 81.2500 (71.4704) lr 1.7705e-03 eta 16:04:27 +epoch [13/50] batch [955/1000] time 1.568 (1.562) data 0.001 (0.001) loss 1.5576 (1.1449) acc 59.3750 (71.4594) lr 1.7705e-03 eta 16:04:21 +epoch [13/50] batch [960/1000] time 1.569 (1.562) data 0.001 (0.001) loss 1.1338 (1.1447) acc 71.8750 (71.4681) lr 1.7705e-03 eta 16:04:14 +epoch [13/50] batch [965/1000] time 1.553 (1.562) data 0.000 (0.001) loss 0.7856 (1.1447) acc 84.3750 (71.4832) lr 1.7705e-03 eta 16:04:03 +epoch [13/50] batch [970/1000] time 1.549 (1.562) data 0.000 (0.001) loss 1.4326 (1.1439) acc 62.5000 (71.5013) lr 1.7705e-03 eta 16:03:53 +epoch [13/50] batch [975/1000] time 1.581 (1.562) data 0.000 (0.001) loss 1.4990 (1.1446) acc 68.7500 (71.4904) lr 1.7705e-03 eta 16:03:44 +epoch [13/50] batch [980/1000] time 1.560 (1.562) data 0.000 (0.001) loss 0.5796 (1.1462) acc 81.2500 (71.4445) lr 1.7705e-03 eta 16:03:34 +epoch [13/50] batch [985/1000] time 1.563 (1.562) data 0.001 (0.001) loss 1.3262 (1.1474) acc 68.7500 (71.4055) lr 1.7705e-03 eta 16:03:31 +epoch [13/50] batch [990/1000] time 1.545 (1.562) data 0.000 (0.001) loss 0.5967 (1.1469) acc 81.2500 (71.4236) lr 1.7705e-03 eta 16:03:21 +epoch [13/50] batch [995/1000] time 1.572 (1.562) data 0.000 (0.001) loss 0.7983 (1.1473) acc 87.5000 (71.4227) lr 1.7705e-03 eta 16:03:15 +epoch [13/50] batch [1000/1000] time 1.536 (1.562) data 0.000 (0.001) loss 1.0947 (1.1472) acc 68.7500 (71.4219) lr 1.7290e-03 eta 16:03:05 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,145 +* accuracy: 78.3% +* error: 21.7% +* macro_f1: 77.8% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [14/50] batch [5/1000] time 1.556 (1.661) data 0.000 (0.159) loss 1.1748 (0.9594) acc 78.1250 (78.1250) lr 1.7290e-03 eta 17:04:09 +epoch [14/50] batch [10/1000] time 1.561 (1.611) data 0.001 (0.080) loss 1.0645 (0.9932) acc 75.0000 (75.6250) lr 1.7290e-03 eta 16:32:57 +epoch [14/50] batch [15/1000] time 1.551 (1.609) data 0.000 (0.053) loss 0.9272 (0.9783) acc 75.0000 (75.0000) lr 1.7290e-03 eta 16:31:36 +epoch [14/50] batch [20/1000] time 1.562 (1.598) data 0.000 (0.040) loss 0.9580 (1.0396) acc 81.2500 (74.3750) lr 1.7290e-03 eta 16:24:42 +epoch [14/50] batch [25/1000] time 1.551 (1.592) data 0.000 (0.032) loss 0.5840 (1.0379) acc 87.5000 (74.1250) lr 1.7290e-03 eta 16:21:13 +epoch [14/50] batch [30/1000] time 1.570 (1.587) data 0.001 (0.027) loss 1.3408 (1.0725) acc 68.7500 (73.9583) lr 1.7290e-03 eta 16:18:06 +epoch [14/50] batch [35/1000] time 1.570 (1.585) data 0.001 (0.023) loss 1.4922 (1.1223) acc 68.7500 (73.5714) lr 1.7290e-03 eta 16:16:28 +epoch [14/50] batch [40/1000] time 1.538 (1.581) data 0.000 (0.020) loss 1.4395 (1.1249) acc 75.0000 (73.3594) lr 1.7290e-03 eta 16:13:53 +epoch [14/50] batch [45/1000] time 1.543 (1.577) data 0.000 (0.018) loss 1.1719 (1.1592) acc 65.6250 (72.9167) lr 1.7290e-03 eta 16:11:36 +epoch [14/50] batch [50/1000] time 1.558 (1.576) data 0.000 (0.016) loss 0.9277 (1.1364) acc 75.0000 (73.3750) lr 1.7290e-03 eta 16:10:16 +epoch [14/50] batch [55/1000] time 1.543 (1.573) data 0.000 (0.015) loss 1.2178 (1.1224) acc 65.6250 (73.5227) lr 1.7290e-03 eta 16:08:47 +epoch [14/50] batch [60/1000] time 1.544 (1.574) data 0.000 (0.014) loss 0.9751 (1.1217) acc 81.2500 (73.1771) lr 1.7290e-03 eta 16:09:01 +epoch [14/50] batch [65/1000] time 1.546 (1.573) data 0.000 (0.013) loss 1.6377 (1.1422) acc 68.7500 (72.7885) lr 1.7290e-03 eta 16:08:14 +epoch [14/50] batch [70/1000] time 1.559 (1.571) data 0.000 (0.012) loss 1.5059 (1.1453) acc 75.0000 (72.5893) lr 1.7290e-03 eta 16:06:43 +epoch [14/50] batch [75/1000] time 1.541 (1.570) data 0.000 (0.011) loss 1.1035 (1.1325) acc 78.1250 (72.8750) lr 1.7290e-03 eta 16:05:55 +epoch [14/50] batch [80/1000] time 1.561 (1.569) data 0.001 (0.010) loss 1.1162 (1.1239) acc 71.8750 (73.0469) lr 1.7290e-03 eta 16:05:23 +epoch [14/50] batch [85/1000] time 1.541 (1.568) data 0.001 (0.010) loss 1.1289 (1.1272) acc 71.8750 (72.6471) lr 1.7290e-03 eta 16:04:33 +epoch [14/50] batch [90/1000] time 1.565 (1.567) data 0.000 (0.009) loss 2.3984 (1.1445) acc 56.2500 (72.4653) lr 1.7290e-03 eta 16:04:09 +epoch [14/50] batch [95/1000] time 1.581 (1.568) data 0.000 (0.009) loss 1.2744 (1.1406) acc 68.7500 (72.5658) lr 1.7290e-03 eta 16:04:11 +epoch [14/50] batch [100/1000] time 1.563 (1.567) data 0.000 (0.008) loss 2.0273 (1.1457) acc 62.5000 (72.7500) lr 1.7290e-03 eta 16:03:53 +epoch [14/50] batch [105/1000] time 1.554 (1.567) data 0.000 (0.008) loss 1.1582 (1.1354) acc 68.7500 (72.7976) lr 1.7290e-03 eta 16:03:16 +epoch [14/50] batch [110/1000] time 1.555 (1.566) data 0.000 (0.008) loss 0.8882 (1.1367) acc 71.8750 (72.6705) lr 1.7290e-03 eta 16:02:49 +epoch [14/50] batch [115/1000] time 1.575 (1.566) data 0.000 (0.007) loss 1.3164 (1.1355) acc 62.5000 (72.4457) lr 1.7290e-03 eta 16:02:28 +epoch [14/50] batch [120/1000] time 1.714 (1.567) data 0.000 (0.007) loss 1.4941 (1.1413) acc 71.8750 (72.3698) lr 1.7290e-03 eta 16:02:56 +epoch [14/50] batch [125/1000] time 1.566 (1.566) data 0.000 (0.007) loss 0.7822 (1.1260) acc 62.5000 (72.4750) lr 1.7290e-03 eta 16:02:42 +epoch [14/50] batch [130/1000] time 1.562 (1.566) data 0.000 (0.007) loss 0.6797 (1.1169) acc 71.8750 (72.5240) lr 1.7290e-03 eta 16:02:26 +epoch [14/50] batch [135/1000] time 1.566 (1.566) data 0.000 (0.006) loss 1.5654 (1.1172) acc 50.0000 (72.5231) lr 1.7290e-03 eta 16:02:24 +epoch [14/50] batch [140/1000] time 1.585 (1.567) data 0.001 (0.006) loss 0.8794 (1.1152) acc 68.7500 (72.5223) lr 1.7290e-03 eta 16:02:22 +epoch [14/50] batch [145/1000] time 1.548 (1.566) data 0.000 (0.006) loss 1.6240 (1.1316) acc 68.7500 (72.2198) lr 1.7290e-03 eta 16:01:53 +epoch [14/50] batch [150/1000] time 1.559 (1.566) data 0.001 (0.006) loss 0.9067 (1.1295) acc 81.2500 (72.3333) lr 1.7290e-03 eta 16:01:35 +epoch [14/50] batch [155/1000] time 1.556 (1.565) data 0.000 (0.006) loss 1.4297 (1.1330) acc 65.6250 (72.2581) lr 1.7290e-03 eta 16:01:15 +epoch [14/50] batch [160/1000] time 1.562 (1.565) data 0.000 (0.005) loss 0.6416 (1.1233) acc 81.2500 (72.3438) lr 1.7290e-03 eta 16:01:09 +epoch [14/50] batch [165/1000] time 1.718 (1.566) data 0.000 (0.005) loss 0.7725 (1.1206) acc 78.1250 (72.3864) lr 1.7290e-03 eta 16:01:29 +epoch [14/50] batch [170/1000] time 1.552 (1.566) data 0.000 (0.005) loss 0.6235 (1.1142) acc 81.2500 (72.4632) lr 1.7290e-03 eta 16:01:28 +epoch [14/50] batch [175/1000] time 1.556 (1.566) data 0.000 (0.005) loss 1.1680 (1.1200) acc 62.5000 (72.2857) lr 1.7290e-03 eta 16:01:12 +epoch [14/50] batch [180/1000] time 1.552 (1.566) data 0.000 (0.005) loss 1.0098 (1.1194) acc 71.8750 (72.2222) lr 1.7290e-03 eta 16:00:57 +epoch [14/50] batch [185/1000] time 1.559 (1.566) data 0.000 (0.005) loss 1.8203 (1.1211) acc 59.3750 (72.2128) lr 1.7290e-03 eta 16:00:53 +epoch [14/50] batch [190/1000] time 1.567 (1.566) data 0.000 (0.005) loss 1.6377 (1.1241) acc 62.5000 (72.3026) lr 1.7290e-03 eta 16:00:36 +epoch [14/50] batch [195/1000] time 1.572 (1.566) data 0.001 (0.004) loss 0.8643 (1.1268) acc 84.3750 (72.2596) lr 1.7290e-03 eta 16:00:26 +epoch [14/50] batch [200/1000] time 1.540 (1.565) data 0.000 (0.004) loss 1.3271 (1.1278) acc 68.7500 (72.2344) lr 1.7290e-03 eta 16:00:06 +epoch [14/50] batch [205/1000] time 1.567 (1.565) data 0.001 (0.004) loss 1.4092 (1.1290) acc 68.7500 (72.1646) lr 1.7290e-03 eta 15:59:59 +epoch [14/50] batch [210/1000] time 1.569 (1.566) data 0.000 (0.004) loss 1.0303 (1.1288) acc 75.0000 (72.1875) lr 1.7290e-03 eta 16:00:24 +epoch [14/50] batch [215/1000] time 1.574 (1.566) data 0.000 (0.004) loss 1.5273 (1.1327) acc 65.6250 (72.0349) lr 1.7290e-03 eta 16:00:14 +epoch [14/50] batch [220/1000] time 1.569 (1.566) data 0.000 (0.004) loss 1.6934 (1.1311) acc 59.3750 (72.0170) lr 1.7290e-03 eta 15:59:59 +epoch [14/50] batch [225/1000] time 1.548 (1.566) data 0.000 (0.004) loss 0.9189 (1.1307) acc 78.1250 (72.0000) lr 1.7290e-03 eta 15:59:44 +epoch [14/50] batch [230/1000] time 1.565 (1.566) data 0.001 (0.004) loss 0.8755 (1.1294) acc 68.7500 (72.0788) lr 1.7290e-03 eta 15:59:34 +epoch [14/50] batch [235/1000] time 1.565 (1.566) data 0.000 (0.004) loss 1.9258 (1.1342) acc 53.1250 (71.9415) lr 1.7290e-03 eta 15:59:27 +epoch [14/50] batch [240/1000] time 1.593 (1.566) data 0.000 (0.004) loss 0.9863 (1.1364) acc 78.1250 (71.9531) lr 1.7290e-03 eta 15:59:25 +epoch [14/50] batch [245/1000] time 1.559 (1.566) data 0.000 (0.004) loss 1.3789 (1.1377) acc 56.2500 (71.8367) lr 1.7290e-03 eta 15:59:18 +epoch [14/50] batch [250/1000] time 1.554 (1.566) data 0.000 (0.004) loss 1.2559 (1.1452) acc 59.3750 (71.6625) lr 1.7290e-03 eta 15:59:04 +epoch [14/50] batch [255/1000] time 1.523 (1.565) data 0.000 (0.004) loss 1.2021 (1.1455) acc 62.5000 (71.6176) lr 1.7290e-03 eta 15:58:43 +epoch [14/50] batch [260/1000] time 1.559 (1.565) data 0.000 (0.003) loss 1.0625 (1.1446) acc 78.1250 (71.5625) lr 1.7290e-03 eta 15:58:27 +epoch [14/50] batch [265/1000] time 1.564 (1.565) data 0.000 (0.003) loss 1.2305 (1.1477) acc 75.0000 (71.5212) lr 1.7290e-03 eta 15:58:17 +epoch [14/50] batch [270/1000] time 1.578 (1.565) data 0.000 (0.003) loss 1.0312 (1.1473) acc 75.0000 (71.5046) lr 1.7290e-03 eta 15:58:12 +epoch [14/50] batch [275/1000] time 1.557 (1.566) data 0.000 (0.003) loss 1.0996 (1.1425) acc 65.6250 (71.5795) lr 1.7290e-03 eta 15:58:25 +epoch [14/50] batch [280/1000] time 1.548 (1.566) data 0.000 (0.003) loss 1.2588 (1.1447) acc 71.8750 (71.5625) lr 1.7290e-03 eta 15:58:13 +epoch [14/50] batch [285/1000] time 1.545 (1.565) data 0.000 (0.003) loss 0.7759 (1.1437) acc 87.5000 (71.5789) lr 1.7290e-03 eta 15:57:52 +epoch [14/50] batch [290/1000] time 1.569 (1.565) data 0.000 (0.003) loss 0.7358 (1.1416) acc 81.2500 (71.6056) lr 1.7290e-03 eta 15:57:41 +epoch [14/50] batch [295/1000] time 1.556 (1.565) data 0.000 (0.003) loss 1.1309 (1.1401) acc 75.0000 (71.5784) lr 1.7290e-03 eta 15:57:24 +epoch [14/50] batch [300/1000] time 1.584 (1.565) data 0.000 (0.003) loss 0.8491 (1.1415) acc 75.0000 (71.5208) lr 1.7290e-03 eta 15:57:23 +epoch [14/50] batch [305/1000] time 1.559 (1.565) data 0.000 (0.003) loss 1.7119 (1.1422) acc 65.6250 (71.5061) lr 1.7290e-03 eta 15:57:21 +epoch [14/50] batch [310/1000] time 1.525 (1.565) data 0.000 (0.003) loss 1.1221 (1.1423) acc 71.8750 (71.5323) lr 1.7290e-03 eta 15:57:11 +epoch [14/50] batch [315/1000] time 1.589 (1.565) data 0.000 (0.003) loss 0.6489 (1.1412) acc 81.2500 (71.5476) lr 1.7290e-03 eta 15:57:02 +epoch [14/50] batch [320/1000] time 1.567 (1.566) data 0.000 (0.003) loss 0.9893 (1.1399) acc 78.1250 (71.6406) lr 1.7290e-03 eta 15:57:12 +epoch [14/50] batch [325/1000] time 1.562 (1.566) data 0.000 (0.003) loss 0.7710 (1.1349) acc 78.1250 (71.7788) lr 1.7290e-03 eta 15:57:02 +epoch [14/50] batch [330/1000] time 1.542 (1.566) data 0.000 (0.003) loss 1.1367 (1.1335) acc 75.0000 (71.8371) lr 1.7290e-03 eta 15:56:48 +epoch [14/50] batch [335/1000] time 1.552 (1.565) data 0.001 (0.003) loss 1.4619 (1.1341) acc 71.8750 (71.8284) lr 1.7290e-03 eta 15:56:37 +epoch [14/50] batch [340/1000] time 1.569 (1.565) data 0.000 (0.003) loss 1.3027 (1.1373) acc 71.8750 (71.7739) lr 1.7290e-03 eta 15:56:27 +epoch [14/50] batch [345/1000] time 1.558 (1.565) data 0.000 (0.003) loss 1.0293 (1.1382) acc 65.6250 (71.7754) lr 1.7290e-03 eta 15:56:15 +epoch [14/50] batch [350/1000] time 1.548 (1.565) data 0.000 (0.003) loss 1.6191 (1.1372) acc 62.5000 (71.8125) lr 1.7290e-03 eta 15:56:03 +epoch [14/50] batch [355/1000] time 1.568 (1.565) data 0.000 (0.003) loss 1.1846 (1.1369) acc 78.1250 (71.8046) lr 1.7290e-03 eta 15:55:47 +epoch [14/50] batch [360/1000] time 1.571 (1.565) data 0.000 (0.003) loss 1.1025 (1.1369) acc 71.8750 (71.7535) lr 1.7290e-03 eta 15:55:48 +epoch [14/50] batch [365/1000] time 1.557 (1.565) data 0.000 (0.003) loss 1.1162 (1.1343) acc 78.1250 (71.8065) lr 1.7290e-03 eta 15:55:33 +epoch [14/50] batch [370/1000] time 1.561 (1.565) data 0.000 (0.003) loss 1.4170 (1.1324) acc 53.1250 (71.8328) lr 1.7290e-03 eta 15:55:20 +epoch [14/50] batch [375/1000] time 1.577 (1.565) data 0.000 (0.003) loss 0.9375 (1.1326) acc 78.1250 (71.7917) lr 1.7290e-03 eta 15:55:14 +epoch [14/50] batch [380/1000] time 1.592 (1.565) data 0.000 (0.002) loss 0.9473 (1.1336) acc 71.8750 (71.7599) lr 1.7290e-03 eta 15:55:03 +epoch [14/50] batch [385/1000] time 1.565 (1.565) data 0.000 (0.002) loss 0.8677 (1.1303) acc 81.2500 (71.8831) lr 1.7290e-03 eta 15:54:52 +epoch [14/50] batch [390/1000] time 1.565 (1.565) data 0.001 (0.002) loss 1.2549 (1.1294) acc 75.0000 (71.8830) lr 1.7290e-03 eta 15:54:38 +epoch [14/50] batch [395/1000] time 1.552 (1.564) data 0.001 (0.002) loss 0.6802 (1.1274) acc 75.0000 (71.8750) lr 1.7290e-03 eta 15:54:22 +epoch [14/50] batch [400/1000] time 1.568 (1.564) data 0.000 (0.002) loss 1.6504 (1.1316) acc 65.6250 (71.7891) lr 1.7290e-03 eta 15:54:14 +epoch [14/50] batch [405/1000] time 1.548 (1.564) data 0.000 (0.002) loss 1.1367 (1.1312) acc 75.0000 (71.8133) lr 1.7290e-03 eta 15:53:57 +epoch [14/50] batch [410/1000] time 1.564 (1.564) data 0.000 (0.002) loss 1.6504 (1.1321) acc 59.3750 (71.7835) lr 1.7290e-03 eta 15:53:47 +epoch [14/50] batch [415/1000] time 1.570 (1.564) data 0.000 (0.002) loss 0.8853 (1.1332) acc 68.7500 (71.7470) lr 1.7290e-03 eta 15:53:37 +epoch [14/50] batch [420/1000] time 1.571 (1.564) data 0.001 (0.002) loss 1.4834 (1.1362) acc 68.7500 (71.7411) lr 1.7290e-03 eta 15:53:29 +epoch [14/50] batch [425/1000] time 1.550 (1.564) data 0.000 (0.002) loss 1.2275 (1.1382) acc 71.8750 (71.7059) lr 1.7290e-03 eta 15:53:34 +epoch [14/50] batch [430/1000] time 1.561 (1.564) data 0.000 (0.002) loss 1.1094 (1.1380) acc 71.8750 (71.7369) lr 1.7290e-03 eta 15:53:22 +epoch [14/50] batch [435/1000] time 1.568 (1.564) data 0.000 (0.002) loss 1.3066 (1.1362) acc 71.8750 (71.7960) lr 1.7290e-03 eta 15:53:13 +epoch [14/50] batch [440/1000] time 1.573 (1.564) data 0.000 (0.002) loss 1.6748 (1.1364) acc 65.6250 (71.7969) lr 1.7290e-03 eta 15:53:06 +epoch [14/50] batch [445/1000] time 1.588 (1.564) data 0.001 (0.002) loss 0.5942 (1.1369) acc 90.6250 (71.8329) lr 1.7290e-03 eta 15:52:59 +epoch [14/50] batch [450/1000] time 1.569 (1.564) data 0.000 (0.002) loss 1.0547 (1.1377) acc 71.8750 (71.7917) lr 1.7290e-03 eta 15:52:50 +epoch [14/50] batch [455/1000] time 1.576 (1.564) data 0.001 (0.002) loss 2.2070 (1.1387) acc 56.2500 (71.8132) lr 1.7290e-03 eta 15:52:42 +epoch [14/50] batch [460/1000] time 1.577 (1.564) data 0.001 (0.002) loss 0.7295 (1.1350) acc 75.0000 (71.8818) lr 1.7290e-03 eta 15:52:35 +epoch [14/50] batch [465/1000] time 1.565 (1.564) data 0.001 (0.002) loss 1.1133 (1.1367) acc 65.6250 (71.8414) lr 1.7290e-03 eta 15:52:26 +epoch [14/50] batch [470/1000] time 1.539 (1.564) data 0.001 (0.002) loss 1.0039 (1.1386) acc 71.8750 (71.8285) lr 1.7290e-03 eta 15:52:25 +epoch [14/50] batch [475/1000] time 1.521 (1.564) data 0.000 (0.002) loss 1.0879 (1.1386) acc 65.6250 (71.8289) lr 1.7290e-03 eta 15:52:12 +epoch [14/50] batch [480/1000] time 1.566 (1.564) data 0.000 (0.002) loss 1.5918 (1.1390) acc 62.5000 (71.7904) lr 1.7290e-03 eta 15:52:04 +epoch [14/50] batch [485/1000] time 1.597 (1.564) data 0.000 (0.002) loss 1.1260 (1.1410) acc 68.7500 (71.7397) lr 1.7290e-03 eta 15:51:56 +epoch [14/50] batch [490/1000] time 1.575 (1.564) data 0.000 (0.002) loss 1.3232 (1.1400) acc 68.7500 (71.7793) lr 1.7290e-03 eta 15:51:50 +epoch [14/50] batch [495/1000] time 1.536 (1.564) data 0.001 (0.002) loss 1.3008 (1.1391) acc 71.8750 (71.7929) lr 1.7290e-03 eta 15:51:41 +epoch [14/50] batch [500/1000] time 1.589 (1.564) data 0.001 (0.002) loss 1.5439 (1.1400) acc 68.7500 (71.7938) lr 1.7290e-03 eta 15:51:34 +epoch [14/50] batch [505/1000] time 1.555 (1.564) data 0.000 (0.002) loss 0.7437 (1.1392) acc 87.5000 (71.8379) lr 1.7290e-03 eta 15:51:24 +epoch [14/50] batch [510/1000] time 1.710 (1.564) data 0.001 (0.002) loss 1.3438 (1.1378) acc 71.8750 (71.8811) lr 1.7290e-03 eta 15:51:23 +epoch [14/50] batch [515/1000] time 1.555 (1.564) data 0.000 (0.002) loss 1.1436 (1.1362) acc 81.2500 (71.9296) lr 1.7290e-03 eta 15:51:15 +epoch [14/50] batch [520/1000] time 1.549 (1.564) data 0.000 (0.002) loss 1.4521 (1.1385) acc 75.0000 (71.9291) lr 1.7290e-03 eta 15:51:05 +epoch [14/50] batch [525/1000] time 1.559 (1.564) data 0.000 (0.002) loss 0.7842 (1.1393) acc 71.8750 (71.9286) lr 1.7290e-03 eta 15:50:55 +epoch [14/50] batch [530/1000] time 1.558 (1.564) data 0.000 (0.002) loss 0.9067 (1.1391) acc 75.0000 (71.9458) lr 1.7290e-03 eta 15:50:49 +epoch [14/50] batch [535/1000] time 1.564 (1.564) data 0.000 (0.002) loss 1.7832 (1.1430) acc 50.0000 (71.8575) lr 1.7290e-03 eta 15:50:41 +epoch [14/50] batch [540/1000] time 1.562 (1.564) data 0.001 (0.002) loss 0.9897 (1.1436) acc 78.1250 (71.8634) lr 1.7290e-03 eta 15:50:31 +epoch [14/50] batch [545/1000] time 1.558 (1.564) data 0.000 (0.002) loss 1.1943 (1.1443) acc 71.8750 (71.8463) lr 1.7290e-03 eta 15:50:20 +epoch [14/50] batch [550/1000] time 1.571 (1.564) data 0.001 (0.002) loss 1.3096 (1.1438) acc 68.7500 (71.8409) lr 1.7290e-03 eta 15:50:09 +epoch [14/50] batch [555/1000] time 1.574 (1.564) data 0.000 (0.002) loss 0.7959 (1.1444) acc 81.2500 (71.7905) lr 1.7290e-03 eta 15:50:02 +epoch [14/50] batch [560/1000] time 1.582 (1.564) data 0.000 (0.002) loss 1.3066 (1.1444) acc 78.1250 (71.7690) lr 1.7290e-03 eta 15:49:54 +epoch [14/50] batch [565/1000] time 1.595 (1.564) data 0.000 (0.002) loss 1.2891 (1.1442) acc 71.8750 (71.7699) lr 1.7290e-03 eta 15:49:44 +epoch [14/50] batch [570/1000] time 1.535 (1.564) data 0.000 (0.002) loss 1.0645 (1.1442) acc 71.8750 (71.7489) lr 1.7290e-03 eta 15:49:30 +epoch [14/50] batch [575/1000] time 1.560 (1.564) data 0.000 (0.002) loss 1.2393 (1.1426) acc 75.0000 (71.7935) lr 1.7290e-03 eta 15:49:29 +epoch [14/50] batch [580/1000] time 1.552 (1.564) data 0.001 (0.002) loss 1.3770 (1.1426) acc 62.5000 (71.8050) lr 1.7290e-03 eta 15:49:15 +epoch [14/50] batch [585/1000] time 1.556 (1.564) data 0.000 (0.002) loss 0.6895 (1.1423) acc 84.3750 (71.8536) lr 1.7290e-03 eta 15:49:02 +epoch [14/50] batch [590/1000] time 1.542 (1.564) data 0.000 (0.002) loss 0.4690 (1.1408) acc 90.6250 (71.9015) lr 1.7290e-03 eta 15:48:49 +epoch [14/50] batch [595/1000] time 1.536 (1.563) data 0.000 (0.002) loss 1.1650 (1.1423) acc 75.0000 (71.8540) lr 1.7290e-03 eta 15:48:37 +epoch [14/50] batch [600/1000] time 1.553 (1.563) data 0.001 (0.002) loss 0.9844 (1.1410) acc 71.8750 (71.8802) lr 1.7290e-03 eta 15:48:26 +epoch [14/50] batch [605/1000] time 1.548 (1.563) data 0.001 (0.002) loss 1.1631 (1.1396) acc 71.8750 (71.9318) lr 1.7290e-03 eta 15:48:16 +epoch [14/50] batch [610/1000] time 1.540 (1.563) data 0.000 (0.002) loss 1.0322 (1.1387) acc 68.7500 (71.9416) lr 1.7290e-03 eta 15:48:04 +epoch [14/50] batch [615/1000] time 1.536 (1.563) data 0.000 (0.002) loss 1.1553 (1.1376) acc 71.8750 (71.9461) lr 1.7290e-03 eta 15:47:53 +epoch [14/50] batch [620/1000] time 1.569 (1.563) data 0.000 (0.002) loss 1.1865 (1.1398) acc 81.2500 (71.9002) lr 1.7290e-03 eta 15:47:53 +epoch [14/50] batch [625/1000] time 1.556 (1.563) data 0.000 (0.002) loss 1.8418 (1.1406) acc 53.1250 (71.9100) lr 1.7290e-03 eta 15:47:41 +epoch [14/50] batch [630/1000] time 1.581 (1.563) data 0.000 (0.002) loss 1.3291 (1.1404) acc 71.8750 (71.8948) lr 1.7290e-03 eta 15:47:32 +epoch [14/50] batch [635/1000] time 1.548 (1.563) data 0.000 (0.002) loss 1.4053 (1.1404) acc 68.7500 (71.8996) lr 1.7290e-03 eta 15:47:24 +epoch [14/50] batch [640/1000] time 1.564 (1.563) data 0.000 (0.002) loss 1.1143 (1.1387) acc 81.2500 (71.9629) lr 1.7290e-03 eta 15:47:16 +epoch [14/50] batch [645/1000] time 1.532 (1.563) data 0.001 (0.002) loss 1.0713 (1.1408) acc 78.1250 (71.9186) lr 1.7290e-03 eta 15:47:05 +epoch [14/50] batch [650/1000] time 1.579 (1.563) data 0.000 (0.002) loss 0.9507 (1.1402) acc 75.0000 (71.9375) lr 1.7290e-03 eta 15:46:56 +epoch [14/50] batch [655/1000] time 1.552 (1.563) data 0.000 (0.002) loss 1.7920 (1.1412) acc 65.6250 (71.9323) lr 1.7290e-03 eta 15:46:46 +epoch [14/50] batch [660/1000] time 1.551 (1.563) data 0.000 (0.002) loss 1.0771 (1.1422) acc 78.1250 (71.9129) lr 1.7290e-03 eta 15:46:35 +epoch [14/50] batch [665/1000] time 1.555 (1.563) data 0.000 (0.002) loss 1.0381 (1.1416) acc 71.8750 (71.9079) lr 1.7290e-03 eta 15:46:36 +epoch [14/50] batch [670/1000] time 1.578 (1.563) data 0.000 (0.002) loss 1.1689 (1.1412) acc 75.0000 (71.8937) lr 1.7290e-03 eta 15:46:27 +epoch [14/50] batch [675/1000] time 1.545 (1.563) data 0.000 (0.002) loss 1.4629 (1.1413) acc 68.7500 (71.8981) lr 1.7290e-03 eta 15:46:20 +epoch [14/50] batch [680/1000] time 1.564 (1.563) data 0.000 (0.002) loss 0.6240 (1.1392) acc 81.2500 (71.9118) lr 1.7290e-03 eta 15:46:10 +epoch [14/50] batch [685/1000] time 1.578 (1.563) data 0.000 (0.002) loss 1.0117 (1.1384) acc 68.7500 (71.9206) lr 1.7290e-03 eta 15:46:05 +epoch [14/50] batch [690/1000] time 1.522 (1.563) data 0.000 (0.002) loss 1.4453 (1.1374) acc 65.6250 (71.9384) lr 1.7290e-03 eta 15:45:55 +epoch [14/50] batch [695/1000] time 1.551 (1.563) data 0.000 (0.002) loss 0.8730 (1.1382) acc 78.1250 (71.9110) lr 1.7290e-03 eta 15:45:44 +epoch [14/50] batch [700/1000] time 1.557 (1.563) data 0.000 (0.002) loss 0.9204 (1.1379) acc 71.8750 (71.8973) lr 1.7290e-03 eta 15:45:35 +epoch [14/50] batch [705/1000] time 1.567 (1.563) data 0.000 (0.002) loss 1.0283 (1.1394) acc 71.8750 (71.8573) lr 1.7290e-03 eta 15:45:27 +epoch [14/50] batch [710/1000] time 1.565 (1.563) data 0.000 (0.002) loss 1.2402 (1.1397) acc 65.6250 (71.8574) lr 1.7290e-03 eta 15:45:17 +epoch [14/50] batch [715/1000] time 1.545 (1.563) data 0.000 (0.002) loss 0.6230 (1.1395) acc 81.2500 (71.8881) lr 1.7290e-03 eta 15:45:10 +epoch [14/50] batch [720/1000] time 1.565 (1.563) data 0.001 (0.002) loss 1.1621 (1.1399) acc 68.7500 (71.8707) lr 1.7290e-03 eta 15:45:03 +epoch [14/50] batch [725/1000] time 1.546 (1.563) data 0.000 (0.002) loss 0.5811 (1.1392) acc 90.6250 (71.8750) lr 1.7290e-03 eta 15:45:00 +epoch [14/50] batch [730/1000] time 1.572 (1.563) data 0.000 (0.002) loss 1.1328 (1.1409) acc 71.8750 (71.8365) lr 1.7290e-03 eta 15:44:52 +epoch [14/50] batch [735/1000] time 1.585 (1.563) data 0.001 (0.002) loss 1.3242 (1.1415) acc 62.5000 (71.8240) lr 1.7290e-03 eta 15:44:42 +epoch [14/50] batch [740/1000] time 1.553 (1.563) data 0.001 (0.001) loss 0.9175 (1.1401) acc 71.8750 (71.8412) lr 1.7290e-03 eta 15:44:32 +epoch [14/50] batch [745/1000] time 1.576 (1.563) data 0.000 (0.001) loss 1.4053 (1.1429) acc 65.6250 (71.8037) lr 1.7290e-03 eta 15:44:25 +epoch [14/50] batch [750/1000] time 1.564 (1.563) data 0.000 (0.001) loss 1.2764 (1.1428) acc 65.6250 (71.7917) lr 1.7290e-03 eta 15:44:17 +epoch [14/50] batch [755/1000] time 1.557 (1.563) data 0.000 (0.001) loss 1.0811 (1.1448) acc 71.8750 (71.7591) lr 1.7290e-03 eta 15:44:11 +epoch [14/50] batch [760/1000] time 1.539 (1.563) data 0.001 (0.001) loss 1.1641 (1.1440) acc 56.2500 (71.7434) lr 1.7290e-03 eta 15:44:04 +epoch [14/50] batch [765/1000] time 1.575 (1.563) data 0.000 (0.001) loss 1.8057 (1.1449) acc 46.8750 (71.7239) lr 1.7290e-03 eta 15:43:57 +epoch [14/50] batch [770/1000] time 1.553 (1.563) data 0.000 (0.001) loss 1.0479 (1.1447) acc 68.7500 (71.7248) lr 1.7290e-03 eta 15:43:54 +epoch [14/50] batch [775/1000] time 1.588 (1.563) data 0.001 (0.001) loss 1.1104 (1.1448) acc 65.6250 (71.7016) lr 1.7290e-03 eta 15:43:47 +epoch [14/50] batch [780/1000] time 1.580 (1.563) data 0.001 (0.001) loss 0.9741 (1.1448) acc 81.2500 (71.6987) lr 1.7290e-03 eta 15:43:41 +epoch [14/50] batch [785/1000] time 1.561 (1.563) data 0.001 (0.001) loss 1.3701 (1.1453) acc 68.7500 (71.6839) lr 1.7290e-03 eta 15:43:36 +epoch [14/50] batch [790/1000] time 1.553 (1.563) data 0.000 (0.001) loss 1.0957 (1.1444) acc 68.7500 (71.6772) lr 1.7290e-03 eta 15:43:27 +epoch [14/50] batch [795/1000] time 1.554 (1.563) data 0.000 (0.001) loss 1.1592 (1.1443) acc 59.3750 (71.6352) lr 1.7290e-03 eta 15:43:18 +epoch [14/50] batch [800/1000] time 1.571 (1.563) data 0.000 (0.001) loss 1.4443 (1.1451) acc 78.1250 (71.6523) lr 1.7290e-03 eta 15:43:10 +epoch [14/50] batch [805/1000] time 1.549 (1.563) data 0.001 (0.001) loss 1.3301 (1.1447) acc 75.0000 (71.6498) lr 1.7290e-03 eta 15:43:03 +epoch [14/50] batch [810/1000] time 1.556 (1.563) data 0.001 (0.001) loss 0.6519 (1.1438) acc 87.5000 (71.6821) lr 1.7290e-03 eta 15:42:55 +epoch [14/50] batch [815/1000] time 1.554 (1.563) data 0.001 (0.001) loss 0.8813 (1.1429) acc 81.2500 (71.7216) lr 1.7290e-03 eta 15:42:50 +epoch [14/50] batch [820/1000] time 1.545 (1.563) data 0.001 (0.001) loss 0.9717 (1.1428) acc 68.7500 (71.7149) lr 1.7290e-03 eta 15:42:40 +epoch [14/50] batch [825/1000] time 1.541 (1.563) data 0.000 (0.001) loss 1.0449 (1.1421) acc 75.0000 (71.7311) lr 1.7290e-03 eta 15:42:31 +epoch [14/50] batch [830/1000] time 1.543 (1.563) data 0.001 (0.001) loss 0.9990 (1.1417) acc 81.2500 (71.7508) lr 1.7290e-03 eta 15:42:21 +epoch [14/50] batch [835/1000] time 1.538 (1.563) data 0.000 (0.001) loss 1.0508 (1.1411) acc 68.7500 (71.7665) lr 1.7290e-03 eta 15:42:08 +epoch [14/50] batch [840/1000] time 1.544 (1.563) data 0.000 (0.001) loss 0.4375 (1.1396) acc 84.3750 (71.8006) lr 1.7290e-03 eta 15:41:59 +epoch [14/50] batch [845/1000] time 1.551 (1.563) data 0.000 (0.001) loss 0.9014 (1.1405) acc 75.0000 (71.7751) lr 1.7290e-03 eta 15:41:48 +epoch [14/50] batch [850/1000] time 1.549 (1.563) data 0.000 (0.001) loss 0.8818 (1.1404) acc 68.7500 (71.7647) lr 1.7290e-03 eta 15:41:39 +epoch [14/50] batch [855/1000] time 1.536 (1.563) data 0.000 (0.001) loss 0.7964 (1.1391) acc 75.0000 (71.7763) lr 1.7290e-03 eta 15:41:30 +epoch [14/50] batch [860/1000] time 1.554 (1.563) data 0.000 (0.001) loss 1.6299 (1.1393) acc 59.3750 (71.7551) lr 1.7290e-03 eta 15:41:21 +epoch [14/50] batch [865/1000] time 1.541 (1.563) data 0.000 (0.001) loss 1.2295 (1.1397) acc 68.7500 (71.7594) lr 1.7290e-03 eta 15:41:11 +epoch [14/50] batch [870/1000] time 1.560 (1.563) data 0.000 (0.001) loss 1.0693 (1.1411) acc 68.7500 (71.7241) lr 1.7290e-03 eta 15:41:00 +epoch [14/50] batch [875/1000] time 1.735 (1.563) data 0.000 (0.001) loss 1.1387 (1.1407) acc 71.8750 (71.7286) lr 1.7290e-03 eta 15:40:57 +epoch [14/50] batch [880/1000] time 1.557 (1.563) data 0.000 (0.001) loss 0.8989 (1.1406) acc 81.2500 (71.7401) lr 1.7290e-03 eta 15:40:51 +epoch [14/50] batch [885/1000] time 1.565 (1.563) data 0.000 (0.001) loss 1.3076 (1.1397) acc 75.0000 (71.7549) lr 1.7290e-03 eta 15:40:42 +epoch [14/50] batch [890/1000] time 1.579 (1.563) data 0.000 (0.001) loss 0.8677 (1.1389) acc 81.2500 (71.7662) lr 1.7290e-03 eta 15:40:36 +epoch [14/50] batch [895/1000] time 1.582 (1.563) data 0.000 (0.001) loss 0.5293 (1.1384) acc 84.3750 (71.7633) lr 1.7290e-03 eta 15:40:28 +epoch [14/50] batch [900/1000] time 1.568 (1.563) data 0.000 (0.001) loss 1.3750 (1.1386) acc 59.3750 (71.7708) lr 1.7290e-03 eta 15:40:20 +epoch [14/50] batch [905/1000] time 1.551 (1.563) data 0.000 (0.001) loss 1.0430 (1.1383) acc 68.7500 (71.7472) lr 1.7290e-03 eta 15:40:10 +epoch [14/50] batch [910/1000] time 1.548 (1.563) data 0.000 (0.001) loss 1.3164 (1.1388) acc 68.7500 (71.7411) lr 1.7290e-03 eta 15:40:01 +epoch [14/50] batch [915/1000] time 1.556 (1.563) data 0.001 (0.001) loss 1.3936 (1.1388) acc 65.6250 (71.7418) lr 1.7290e-03 eta 15:39:51 +epoch [14/50] batch [920/1000] time 1.691 (1.563) data 0.000 (0.001) loss 1.6904 (1.1386) acc 65.6250 (71.7561) lr 1.7290e-03 eta 15:39:48 +epoch [14/50] batch [925/1000] time 1.561 (1.563) data 0.000 (0.001) loss 0.5264 (1.1389) acc 90.6250 (71.7635) lr 1.7290e-03 eta 15:39:40 +epoch [14/50] batch [930/1000] time 1.551 (1.563) data 0.000 (0.001) loss 1.0869 (1.1403) acc 75.0000 (71.7339) lr 1.7290e-03 eta 15:39:33 +epoch [14/50] batch [935/1000] time 1.578 (1.563) data 0.000 (0.001) loss 1.2080 (1.1404) acc 75.0000 (71.7447) lr 1.7290e-03 eta 15:39:26 +epoch [14/50] batch [940/1000] time 1.555 (1.563) data 0.001 (0.001) loss 0.9189 (1.1400) acc 75.0000 (71.7653) lr 1.7290e-03 eta 15:39:16 +epoch [14/50] batch [945/1000] time 1.536 (1.563) data 0.000 (0.001) loss 0.6846 (1.1390) acc 84.3750 (71.7824) lr 1.7290e-03 eta 15:39:07 +epoch [14/50] batch [950/1000] time 1.550 (1.563) data 0.000 (0.001) loss 1.1201 (1.1385) acc 65.6250 (71.7566) lr 1.7290e-03 eta 15:38:59 +epoch [14/50] batch [955/1000] time 1.545 (1.563) data 0.001 (0.001) loss 1.7637 (1.1384) acc 56.2500 (71.7637) lr 1.7290e-03 eta 15:38:52 +epoch [14/50] batch [960/1000] time 1.550 (1.563) data 0.000 (0.001) loss 1.5898 (1.1388) acc 56.2500 (71.7350) lr 1.7290e-03 eta 15:38:44 +epoch [14/50] batch [965/1000] time 1.532 (1.563) data 0.001 (0.001) loss 1.3682 (1.1394) acc 68.7500 (71.7260) lr 1.7290e-03 eta 15:38:39 +epoch [14/50] batch [970/1000] time 1.570 (1.563) data 0.000 (0.001) loss 0.8672 (1.1390) acc 71.8750 (71.7204) lr 1.7290e-03 eta 15:38:32 +epoch [14/50] batch [975/1000] time 1.557 (1.563) data 0.000 (0.001) loss 1.1924 (1.1401) acc 68.7500 (71.7147) lr 1.7290e-03 eta 15:38:25 +epoch [14/50] batch [980/1000] time 1.564 (1.563) data 0.000 (0.001) loss 0.9536 (1.1400) acc 78.1250 (71.7124) lr 1.7290e-03 eta 15:38:16 +epoch [14/50] batch [985/1000] time 1.541 (1.563) data 0.001 (0.001) loss 1.1104 (1.1396) acc 71.8750 (71.7195) lr 1.7290e-03 eta 15:38:06 +epoch [14/50] batch [990/1000] time 1.543 (1.563) data 0.000 (0.001) loss 0.9302 (1.1396) acc 81.2500 (71.7298) lr 1.7290e-03 eta 15:37:56 +epoch [14/50] batch [995/1000] time 1.557 (1.563) data 0.000 (0.001) loss 1.3604 (1.1404) acc 68.7500 (71.7242) lr 1.7290e-03 eta 15:37:47 +epoch [14/50] batch [1000/1000] time 1.564 (1.563) data 0.000 (0.001) loss 0.8628 (1.1403) acc 78.1250 (71.7031) lr 1.6845e-03 eta 15:37:40 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,110 +* accuracy: 78.2% +* error: 21.8% +* macro_f1: 77.7% +epoch [15/50] batch [5/1000] time 1.579 (1.682) data 0.001 (0.180) loss 1.2568 (0.8736) acc 65.6250 (78.1250) lr 1.6845e-03 eta 16:48:54 +epoch [15/50] batch [10/1000] time 1.545 (1.617) data 0.000 (0.090) loss 1.2402 (1.0376) acc 71.8750 (73.7500) lr 1.6845e-03 eta 16:10:07 +epoch [15/50] batch [15/1000] time 1.570 (1.600) data 0.001 (0.060) loss 0.6738 (1.0433) acc 87.5000 (72.7083) lr 1.6845e-03 eta 15:59:18 +epoch [15/50] batch [20/1000] time 1.583 (1.593) data 0.000 (0.045) loss 0.8760 (1.0423) acc 75.0000 (73.1250) lr 1.6845e-03 eta 15:55:05 +epoch [15/50] batch [25/1000] time 1.553 (1.587) data 0.001 (0.036) loss 2.1758 (1.1422) acc 46.8750 (71.6250) lr 1.6845e-03 eta 15:51:32 +epoch [15/50] batch [30/1000] time 1.555 (1.582) data 0.001 (0.030) loss 0.7339 (1.1549) acc 84.3750 (71.8750) lr 1.6845e-03 eta 15:48:30 +epoch [15/50] batch [35/1000] time 1.559 (1.579) data 0.000 (0.026) loss 0.7065 (1.1348) acc 75.0000 (71.9643) lr 1.6845e-03 eta 15:46:46 +epoch [15/50] batch [40/1000] time 1.563 (1.582) data 0.001 (0.023) loss 0.7065 (1.1374) acc 81.2500 (71.7188) lr 1.6845e-03 eta 15:47:58 +epoch [15/50] batch [45/1000] time 1.569 (1.579) data 0.001 (0.020) loss 1.5098 (1.1419) acc 65.6250 (71.5972) lr 1.6845e-03 eta 15:46:10 +epoch [15/50] batch [50/1000] time 1.549 (1.576) data 0.001 (0.018) loss 1.1348 (1.1214) acc 71.8750 (72.0000) lr 1.6845e-03 eta 15:44:05 +epoch [15/50] batch [55/1000] time 1.570 (1.574) data 0.000 (0.017) loss 1.0000 (1.1376) acc 71.8750 (71.9318) lr 1.6845e-03 eta 15:43:11 +epoch [15/50] batch [60/1000] time 1.578 (1.575) data 0.001 (0.015) loss 0.9004 (1.1361) acc 84.3750 (72.5000) lr 1.6845e-03 eta 15:43:14 +epoch [15/50] batch [65/1000] time 1.554 (1.573) data 0.000 (0.014) loss 1.0391 (1.1322) acc 78.1250 (72.5481) lr 1.6845e-03 eta 15:42:10 +epoch [15/50] batch [70/1000] time 1.574 (1.572) data 0.000 (0.013) loss 1.1445 (1.1295) acc 71.8750 (72.5446) lr 1.6845e-03 eta 15:41:20 +epoch [15/50] batch [75/1000] time 1.572 (1.572) data 0.000 (0.012) loss 1.1689 (1.1404) acc 68.7500 (72.4167) lr 1.6845e-03 eta 15:41:00 +epoch [15/50] batch [80/1000] time 1.560 (1.571) data 0.000 (0.012) loss 0.8672 (1.1392) acc 71.8750 (72.3438) lr 1.6845e-03 eta 15:40:21 +epoch [15/50] batch [85/1000] time 1.586 (1.572) data 0.000 (0.011) loss 1.0273 (1.1389) acc 71.8750 (72.3162) lr 1.6845e-03 eta 15:41:06 +epoch [15/50] batch [90/1000] time 1.554 (1.572) data 0.000 (0.010) loss 1.3662 (1.1335) acc 75.0000 (72.4653) lr 1.6845e-03 eta 15:40:38 +epoch [15/50] batch [95/1000] time 1.529 (1.570) data 0.000 (0.010) loss 1.1270 (1.1361) acc 59.3750 (72.2039) lr 1.6845e-03 eta 15:39:39 +epoch [15/50] batch [100/1000] time 1.554 (1.570) data 0.000 (0.009) loss 0.9604 (1.1356) acc 81.2500 (72.4375) lr 1.6845e-03 eta 15:39:13 +epoch [15/50] batch [105/1000] time 1.571 (1.569) data 0.000 (0.009) loss 1.1484 (1.1468) acc 78.1250 (72.3512) lr 1.6845e-03 eta 15:38:43 +epoch [15/50] batch [110/1000] time 1.562 (1.569) data 0.000 (0.009) loss 0.7856 (1.1334) acc 81.2500 (72.5568) lr 1.6845e-03 eta 15:38:40 +epoch [15/50] batch [115/1000] time 1.594 (1.569) data 0.000 (0.008) loss 1.1221 (1.1409) acc 68.7500 (72.3641) lr 1.6845e-03 eta 15:38:29 +epoch [15/50] batch [120/1000] time 1.548 (1.569) data 0.000 (0.008) loss 0.8647 (1.1465) acc 65.6250 (72.1354) lr 1.6845e-03 eta 15:37:58 +epoch [15/50] batch [125/1000] time 1.709 (1.569) data 0.001 (0.008) loss 1.0547 (1.1450) acc 68.7500 (72.2000) lr 1.6845e-03 eta 15:38:15 +epoch [15/50] batch [130/1000] time 1.566 (1.569) data 0.000 (0.007) loss 0.9932 (1.1427) acc 78.1250 (72.1875) lr 1.6845e-03 eta 15:37:45 +epoch [15/50] batch [135/1000] time 1.543 (1.568) data 0.001 (0.007) loss 1.1191 (1.1429) acc 81.2500 (72.2685) lr 1.6845e-03 eta 15:37:15 +epoch [15/50] batch [140/1000] time 1.547 (1.567) data 0.000 (0.007) loss 1.5361 (1.1435) acc 62.5000 (72.2545) lr 1.6845e-03 eta 15:36:41 +epoch [15/50] batch [145/1000] time 1.575 (1.567) data 0.000 (0.007) loss 1.2295 (1.1433) acc 78.1250 (72.1552) lr 1.6845e-03 eta 15:36:31 +epoch [15/50] batch [150/1000] time 1.554 (1.567) data 0.000 (0.006) loss 1.1055 (1.1382) acc 68.7500 (72.2292) lr 1.6845e-03 eta 15:36:11 +epoch [15/50] batch [155/1000] time 1.550 (1.567) data 0.001 (0.006) loss 0.9336 (1.1428) acc 65.6250 (72.1371) lr 1.6845e-03 eta 15:35:57 +epoch [15/50] batch [160/1000] time 1.571 (1.566) data 0.000 (0.006) loss 0.8354 (1.1463) acc 84.3750 (72.1680) lr 1.6845e-03 eta 15:35:29 +epoch [15/50] batch [165/1000] time 1.561 (1.566) data 0.000 (0.006) loss 0.3999 (1.1391) acc 90.6250 (72.2159) lr 1.6845e-03 eta 15:35:11 +epoch [15/50] batch [170/1000] time 1.559 (1.565) data 0.001 (0.006) loss 0.6489 (1.1381) acc 78.1250 (72.1324) lr 1.6845e-03 eta 15:34:51 +epoch [15/50] batch [175/1000] time 1.534 (1.565) data 0.000 (0.006) loss 1.6846 (1.1337) acc 53.1250 (72.1429) lr 1.6845e-03 eta 15:34:40 +epoch [15/50] batch [180/1000] time 1.557 (1.565) data 0.000 (0.005) loss 1.6260 (1.1306) acc 65.6250 (72.2396) lr 1.6845e-03 eta 15:34:27 +epoch [15/50] batch [185/1000] time 1.559 (1.565) data 0.000 (0.005) loss 0.7021 (1.1253) acc 81.2500 (72.4155) lr 1.6845e-03 eta 15:34:15 +epoch [15/50] batch [190/1000] time 1.568 (1.566) data 0.000 (0.005) loss 0.8589 (1.1218) acc 65.6250 (72.3520) lr 1.6845e-03 eta 15:34:45 +epoch [15/50] batch [195/1000] time 1.566 (1.566) data 0.000 (0.005) loss 1.3623 (1.1275) acc 68.7500 (72.1795) lr 1.6845e-03 eta 15:34:36 +epoch [15/50] batch [200/1000] time 1.563 (1.566) data 0.000 (0.005) loss 1.6768 (1.1304) acc 59.3750 (72.1562) lr 1.6845e-03 eta 15:34:19 +epoch [15/50] batch [205/1000] time 1.547 (1.566) data 0.000 (0.005) loss 0.8701 (1.1309) acc 78.1250 (72.1189) lr 1.6845e-03 eta 15:33:58 +epoch [15/50] batch [210/1000] time 1.555 (1.566) data 0.001 (0.005) loss 1.7461 (1.1328) acc 65.6250 (72.1280) lr 1.6845e-03 eta 15:33:50 +epoch [15/50] batch [215/1000] time 1.550 (1.565) data 0.000 (0.005) loss 1.0078 (1.1294) acc 71.8750 (72.1221) lr 1.6845e-03 eta 15:33:31 +epoch [15/50] batch [220/1000] time 1.591 (1.565) data 0.001 (0.005) loss 0.6211 (1.1219) acc 81.2500 (72.2443) lr 1.6845e-03 eta 15:33:28 +epoch [15/50] batch [225/1000] time 1.556 (1.565) data 0.000 (0.004) loss 1.0771 (1.1244) acc 75.0000 (72.1667) lr 1.6845e-03 eta 15:33:15 +epoch [15/50] batch [230/1000] time 1.545 (1.565) data 0.000 (0.004) loss 1.1074 (1.1224) acc 65.6250 (72.1875) lr 1.6845e-03 eta 15:33:03 +epoch [15/50] batch [235/1000] time 1.582 (1.566) data 0.000 (0.004) loss 0.7603 (1.1219) acc 84.3750 (72.1941) lr 1.6845e-03 eta 15:33:20 +epoch [15/50] batch [240/1000] time 1.567 (1.566) data 0.001 (0.004) loss 2.2266 (1.1273) acc 53.1250 (72.0703) lr 1.6845e-03 eta 15:33:06 +epoch [15/50] batch [245/1000] time 1.555 (1.566) data 0.000 (0.004) loss 0.9937 (1.1287) acc 78.1250 (72.1173) lr 1.6845e-03 eta 15:32:59 +epoch [15/50] batch [250/1000] time 1.543 (1.565) data 0.000 (0.004) loss 1.0908 (1.1280) acc 75.0000 (72.1375) lr 1.6845e-03 eta 15:32:42 +epoch [15/50] batch [255/1000] time 1.562 (1.565) data 0.000 (0.004) loss 1.4082 (1.1277) acc 68.7500 (72.0956) lr 1.6845e-03 eta 15:32:32 +epoch [15/50] batch [260/1000] time 1.546 (1.565) data 0.000 (0.004) loss 0.8037 (1.1269) acc 75.0000 (72.1034) lr 1.6845e-03 eta 15:32:16 +epoch [15/50] batch [265/1000] time 1.573 (1.565) data 0.001 (0.004) loss 1.0156 (1.1250) acc 71.8750 (72.1580) lr 1.6845e-03 eta 15:32:10 +epoch [15/50] batch [270/1000] time 1.569 (1.565) data 0.000 (0.004) loss 1.1816 (1.1244) acc 59.3750 (72.0833) lr 1.6845e-03 eta 15:31:57 +epoch [15/50] batch [275/1000] time 1.528 (1.565) data 0.000 (0.004) loss 1.6172 (1.1290) acc 59.3750 (72.0227) lr 1.6845e-03 eta 15:31:41 +epoch [15/50] batch [280/1000] time 1.561 (1.565) data 0.001 (0.004) loss 1.1289 (1.1309) acc 68.7500 (71.9531) lr 1.6845e-03 eta 15:31:58 +epoch [15/50] batch [285/1000] time 1.562 (1.566) data 0.000 (0.004) loss 0.7744 (1.1292) acc 84.3750 (71.9408) lr 1.6845e-03 eta 15:31:52 +epoch [15/50] batch [290/1000] time 1.529 (1.565) data 0.000 (0.004) loss 1.6689 (1.1308) acc 53.1250 (71.8750) lr 1.6845e-03 eta 15:31:36 +epoch [15/50] batch [295/1000] time 1.546 (1.565) data 0.000 (0.003) loss 1.4932 (1.1328) acc 59.3750 (71.8220) lr 1.6845e-03 eta 15:31:22 +epoch [15/50] batch [300/1000] time 1.546 (1.565) data 0.001 (0.003) loss 1.2412 (1.1350) acc 75.0000 (71.8958) lr 1.6845e-03 eta 15:31:13 +epoch [15/50] batch [305/1000] time 1.542 (1.565) data 0.000 (0.003) loss 1.2129 (1.1350) acc 62.5000 (71.8955) lr 1.6845e-03 eta 15:30:55 +epoch [15/50] batch [310/1000] time 1.563 (1.565) data 0.000 (0.003) loss 1.0693 (1.1350) acc 71.8750 (71.8750) lr 1.6845e-03 eta 15:30:46 +epoch [15/50] batch [315/1000] time 1.560 (1.565) data 0.000 (0.003) loss 1.1787 (1.1361) acc 71.8750 (71.8948) lr 1.6845e-03 eta 15:30:34 +epoch [15/50] batch [320/1000] time 1.572 (1.565) data 0.001 (0.003) loss 1.5713 (1.1378) acc 56.2500 (71.7871) lr 1.6845e-03 eta 15:30:24 +epoch [15/50] batch [325/1000] time 1.562 (1.565) data 0.000 (0.003) loss 0.7891 (1.1352) acc 75.0000 (71.7692) lr 1.6845e-03 eta 15:30:21 +epoch [15/50] batch [330/1000] time 1.548 (1.565) data 0.001 (0.003) loss 1.1660 (1.1369) acc 68.7500 (71.6761) lr 1.6845e-03 eta 15:30:09 +epoch [15/50] batch [335/1000] time 1.575 (1.565) data 0.000 (0.003) loss 2.0918 (1.1398) acc 62.5000 (71.6511) lr 1.6845e-03 eta 15:29:59 +epoch [15/50] batch [340/1000] time 1.541 (1.565) data 0.000 (0.003) loss 0.9272 (1.1394) acc 71.8750 (71.6636) lr 1.6845e-03 eta 15:30:02 +epoch [15/50] batch [345/1000] time 1.566 (1.565) data 0.000 (0.003) loss 1.0527 (1.1368) acc 71.8750 (71.7210) lr 1.6845e-03 eta 15:29:47 +epoch [15/50] batch [350/1000] time 1.561 (1.565) data 0.000 (0.003) loss 1.0928 (1.1340) acc 75.0000 (71.7768) lr 1.6845e-03 eta 15:29:37 +epoch [15/50] batch [355/1000] time 1.577 (1.565) data 0.000 (0.003) loss 0.9097 (1.1335) acc 75.0000 (71.7606) lr 1.6845e-03 eta 15:29:30 +epoch [15/50] batch [360/1000] time 1.562 (1.565) data 0.000 (0.003) loss 1.7812 (1.1337) acc 62.5000 (71.7969) lr 1.6845e-03 eta 15:29:20 +epoch [15/50] batch [365/1000] time 1.592 (1.565) data 0.000 (0.003) loss 0.8320 (1.1326) acc 68.7500 (71.7979) lr 1.6845e-03 eta 15:29:17 +epoch [15/50] batch [370/1000] time 1.556 (1.565) data 0.000 (0.003) loss 0.7573 (1.1336) acc 75.0000 (71.7483) lr 1.6845e-03 eta 15:29:10 +epoch [15/50] batch [375/1000] time 1.570 (1.565) data 0.000 (0.003) loss 0.6548 (1.1330) acc 84.3750 (71.8000) lr 1.6845e-03 eta 15:28:59 +epoch [15/50] batch [380/1000] time 1.585 (1.565) data 0.000 (0.003) loss 1.5117 (1.1336) acc 71.8750 (71.7845) lr 1.6845e-03 eta 15:28:48 +epoch [15/50] batch [385/1000] time 1.547 (1.565) data 0.000 (0.003) loss 1.2891 (1.1327) acc 68.7500 (71.8182) lr 1.6845e-03 eta 15:28:48 +epoch [15/50] batch [390/1000] time 1.548 (1.565) data 0.000 (0.003) loss 1.2383 (1.1316) acc 65.6250 (71.8429) lr 1.6845e-03 eta 15:28:33 +epoch [15/50] batch [395/1000] time 1.576 (1.565) data 0.000 (0.003) loss 1.1621 (1.1347) acc 65.6250 (71.7880) lr 1.6845e-03 eta 15:28:26 +epoch [15/50] batch [400/1000] time 1.534 (1.564) data 0.000 (0.003) loss 1.7178 (1.1385) acc 62.5000 (71.7188) lr 1.6845e-03 eta 15:28:12 +epoch [15/50] batch [405/1000] time 1.576 (1.564) data 0.001 (0.003) loss 0.8833 (1.1377) acc 78.1250 (71.6975) lr 1.6845e-03 eta 15:28:05 +epoch [15/50] batch [410/1000] time 1.560 (1.565) data 0.000 (0.003) loss 0.8779 (1.1367) acc 71.8750 (71.7226) lr 1.6845e-03 eta 15:28:01 +epoch [15/50] batch [415/1000] time 1.539 (1.565) data 0.001 (0.003) loss 1.0811 (1.1392) acc 65.6250 (71.6642) lr 1.6845e-03 eta 15:27:53 +epoch [15/50] batch [420/1000] time 1.557 (1.564) data 0.001 (0.003) loss 0.9585 (1.1405) acc 75.0000 (71.6220) lr 1.6845e-03 eta 15:27:42 +epoch [15/50] batch [425/1000] time 1.554 (1.564) data 0.000 (0.003) loss 1.2500 (1.1400) acc 65.6250 (71.6324) lr 1.6845e-03 eta 15:27:35 +epoch [15/50] batch [430/1000] time 1.556 (1.565) data 0.001 (0.003) loss 0.8086 (1.1388) acc 81.2500 (71.6497) lr 1.6845e-03 eta 15:27:43 +epoch [15/50] batch [435/1000] time 1.552 (1.565) data 0.000 (0.003) loss 1.1631 (1.1360) acc 75.0000 (71.6954) lr 1.6845e-03 eta 15:27:33 +epoch [15/50] batch [440/1000] time 1.587 (1.565) data 0.000 (0.002) loss 0.8999 (1.1365) acc 71.8750 (71.6761) lr 1.6845e-03 eta 15:27:25 +epoch [15/50] batch [445/1000] time 1.585 (1.565) data 0.000 (0.002) loss 1.1416 (1.1383) acc 81.2500 (71.6503) lr 1.6845e-03 eta 15:27:22 +epoch [15/50] batch [450/1000] time 1.555 (1.565) data 0.000 (0.002) loss 1.0127 (1.1380) acc 71.8750 (71.6389) lr 1.6845e-03 eta 15:27:17 +epoch [15/50] batch [455/1000] time 1.572 (1.565) data 0.001 (0.002) loss 0.9717 (1.1357) acc 78.1250 (71.6964) lr 1.6845e-03 eta 15:27:12 +epoch [15/50] batch [460/1000] time 1.576 (1.565) data 0.000 (0.002) loss 0.8496 (1.1371) acc 84.3750 (71.7188) lr 1.6845e-03 eta 15:27:09 +epoch [15/50] batch [465/1000] time 1.565 (1.565) data 0.000 (0.002) loss 1.0879 (1.1392) acc 81.2500 (71.6868) lr 1.6845e-03 eta 15:27:01 +epoch [15/50] batch [470/1000] time 1.554 (1.565) data 0.000 (0.002) loss 0.8115 (1.1366) acc 71.8750 (71.7221) lr 1.6845e-03 eta 15:26:51 +epoch [15/50] batch [475/1000] time 1.547 (1.565) data 0.000 (0.002) loss 1.0352 (1.1390) acc 75.0000 (71.6776) lr 1.6845e-03 eta 15:26:38 +epoch [15/50] batch [480/1000] time 1.556 (1.565) data 0.000 (0.002) loss 0.9707 (1.1375) acc 71.8750 (71.7122) lr 1.6845e-03 eta 15:26:28 +epoch [15/50] batch [485/1000] time 1.562 (1.565) data 0.000 (0.002) loss 0.4875 (1.1359) acc 81.2500 (71.7268) lr 1.6845e-03 eta 15:26:21 +epoch [15/50] batch [490/1000] time 1.721 (1.565) data 0.001 (0.002) loss 1.1416 (1.1354) acc 68.7500 (71.7283) lr 1.6845e-03 eta 15:26:23 +epoch [15/50] batch [495/1000] time 1.550 (1.565) data 0.000 (0.002) loss 0.8467 (1.1352) acc 81.2500 (71.7235) lr 1.6845e-03 eta 15:26:15 +epoch [15/50] batch [500/1000] time 1.562 (1.565) data 0.000 (0.002) loss 1.5244 (1.1355) acc 62.5000 (71.6875) lr 1.6845e-03 eta 15:26:05 +epoch [15/50] batch [505/1000] time 1.558 (1.565) data 0.000 (0.002) loss 0.9019 (1.1339) acc 68.7500 (71.6770) lr 1.6845e-03 eta 15:25:53 +epoch [15/50] batch [510/1000] time 1.540 (1.565) data 0.000 (0.002) loss 1.1748 (1.1363) acc 68.7500 (71.6667) lr 1.6845e-03 eta 15:25:43 +epoch [15/50] batch [515/1000] time 1.556 (1.565) data 0.000 (0.002) loss 0.6982 (1.1351) acc 87.5000 (71.6930) lr 1.6845e-03 eta 15:25:33 +epoch [15/50] batch [520/1000] time 1.530 (1.565) data 0.000 (0.002) loss 1.4355 (1.1349) acc 65.6250 (71.7067) lr 1.6845e-03 eta 15:25:20 +epoch [15/50] batch [525/1000] time 1.555 (1.565) data 0.000 (0.002) loss 0.9243 (1.1348) acc 71.8750 (71.7143) lr 1.6845e-03 eta 15:25:12 +epoch [15/50] batch [530/1000] time 1.564 (1.565) data 0.000 (0.002) loss 1.1816 (1.1356) acc 75.0000 (71.7158) lr 1.6845e-03 eta 15:25:03 +epoch [15/50] batch [535/1000] time 1.708 (1.565) data 0.000 (0.002) loss 1.8896 (1.1403) acc 53.1250 (71.5829) lr 1.6845e-03 eta 15:25:03 +epoch [15/50] batch [540/1000] time 1.554 (1.565) data 0.001 (0.002) loss 1.0879 (1.1404) acc 68.7500 (71.5567) lr 1.6845e-03 eta 15:24:51 +epoch [15/50] batch [545/1000] time 1.589 (1.565) data 0.000 (0.002) loss 0.8081 (1.1394) acc 75.0000 (71.5654) lr 1.6845e-03 eta 15:24:49 +epoch [15/50] batch [550/1000] time 1.554 (1.565) data 0.000 (0.002) loss 1.0088 (1.1373) acc 68.7500 (71.5852) lr 1.6845e-03 eta 15:24:39 +epoch [15/50] batch [555/1000] time 1.578 (1.565) data 0.001 (0.002) loss 0.9082 (1.1390) acc 75.0000 (71.5541) lr 1.6845e-03 eta 15:24:32 +epoch [15/50] batch [560/1000] time 1.576 (1.565) data 0.000 (0.002) loss 0.6016 (1.1378) acc 90.6250 (71.5904) lr 1.6845e-03 eta 15:24:23 +epoch [15/50] batch [565/1000] time 1.555 (1.565) data 0.000 (0.002) loss 0.8989 (1.1389) acc 71.8750 (71.5487) lr 1.6845e-03 eta 15:24:14 +epoch [15/50] batch [570/1000] time 1.549 (1.565) data 0.001 (0.002) loss 1.1289 (1.1385) acc 68.7500 (71.5735) lr 1.6845e-03 eta 15:24:02 +epoch [15/50] batch [575/1000] time 1.582 (1.565) data 0.000 (0.002) loss 0.9102 (1.1378) acc 68.7500 (71.5652) lr 1.6845e-03 eta 15:23:55 +epoch [15/50] batch [580/1000] time 1.560 (1.565) data 0.000 (0.002) loss 0.7837 (1.1392) acc 71.8750 (71.5517) lr 1.6845e-03 eta 15:23:54 +epoch [15/50] batch [585/1000] time 1.563 (1.565) data 0.000 (0.002) loss 0.7686 (1.1387) acc 75.0000 (71.5385) lr 1.6845e-03 eta 15:23:46 +epoch [15/50] batch [590/1000] time 1.579 (1.565) data 0.000 (0.002) loss 1.3281 (1.1384) acc 68.7500 (71.5307) lr 1.6845e-03 eta 15:23:37 +epoch [15/50] batch [595/1000] time 1.556 (1.565) data 0.001 (0.002) loss 1.2227 (1.1387) acc 59.3750 (71.4863) lr 1.6845e-03 eta 15:23:30 +epoch [15/50] batch [600/1000] time 1.563 (1.565) data 0.001 (0.002) loss 1.0400 (1.1383) acc 56.2500 (71.4792) lr 1.6845e-03 eta 15:23:22 +epoch [15/50] batch [605/1000] time 1.561 (1.565) data 0.000 (0.002) loss 1.0986 (1.1399) acc 68.7500 (71.4360) lr 1.6845e-03 eta 15:23:12 +epoch [15/50] batch [610/1000] time 1.558 (1.565) data 0.000 (0.002) loss 0.9878 (1.1406) acc 81.2500 (71.4037) lr 1.6845e-03 eta 15:23:04 +epoch [15/50] batch [615/1000] time 1.587 (1.565) data 0.001 (0.002) loss 0.6558 (1.1407) acc 68.7500 (71.3516) lr 1.6845e-03 eta 15:22:55 +epoch [15/50] batch [620/1000] time 1.559 (1.565) data 0.001 (0.002) loss 1.1875 (1.1402) acc 65.6250 (71.3458) lr 1.6845e-03 eta 15:22:47 +epoch [15/50] batch [625/1000] time 1.591 (1.565) data 0.001 (0.002) loss 0.9131 (1.1393) acc 78.1250 (71.3300) lr 1.6845e-03 eta 15:22:41 +epoch [15/50] batch [630/1000] time 1.559 (1.565) data 0.000 (0.002) loss 1.3916 (1.1381) acc 71.8750 (71.3294) lr 1.6845e-03 eta 15:22:32 +epoch [15/50] batch [635/1000] time 1.554 (1.565) data 0.000 (0.002) loss 1.4473 (1.1401) acc 65.6250 (71.3091) lr 1.6845e-03 eta 15:22:24 +epoch [15/50] batch [640/1000] time 1.531 (1.565) data 0.000 (0.002) loss 0.8262 (1.1390) acc 78.1250 (71.3330) lr 1.6845e-03 eta 15:22:12 +epoch [15/50] batch [645/1000] time 1.548 (1.565) data 0.001 (0.002) loss 1.5186 (1.1386) acc 56.2500 (71.3227) lr 1.6845e-03 eta 15:22:10 +epoch [15/50] batch [650/1000] time 1.571 (1.565) data 0.001 (0.002) loss 1.3271 (1.1395) acc 71.8750 (71.3029) lr 1.6845e-03 eta 15:22:03 +epoch [15/50] batch [655/1000] time 1.567 (1.565) data 0.000 (0.002) loss 1.2275 (1.1385) acc 68.7500 (71.3359) lr 1.6845e-03 eta 15:21:52 +epoch [15/50] batch [660/1000] time 1.553 (1.565) data 0.000 (0.002) loss 0.9326 (1.1396) acc 71.8750 (71.3352) lr 1.6845e-03 eta 15:21:44 +epoch [15/50] batch [665/1000] time 1.554 (1.565) data 0.000 (0.002) loss 1.6172 (1.1415) acc 68.7500 (71.3017) lr 1.6845e-03 eta 15:21:34 +epoch [15/50] batch [670/1000] time 1.557 (1.565) data 0.000 (0.002) loss 0.6870 (1.1408) acc 93.7500 (71.3153) lr 1.6845e-03 eta 15:21:24 +epoch [15/50] batch [675/1000] time 1.554 (1.565) data 0.000 (0.002) loss 1.1523 (1.1405) acc 65.6250 (71.3009) lr 1.6845e-03 eta 15:21:15 +epoch [15/50] batch [680/1000] time 1.558 (1.565) data 0.000 (0.002) loss 0.5371 (1.1413) acc 84.3750 (71.2730) lr 1.6845e-03 eta 15:21:05 +epoch [15/50] batch [685/1000] time 1.563 (1.565) data 0.000 (0.002) loss 1.2617 (1.1420) acc 68.7500 (71.2591) lr 1.6845e-03 eta 15:20:59 +epoch [15/50] batch [690/1000] time 1.547 (1.565) data 0.000 (0.002) loss 1.4521 (1.1425) acc 68.7500 (71.2455) lr 1.6845e-03 eta 15:20:53 +epoch [15/50] batch [695/1000] time 1.549 (1.565) data 0.000 (0.002) loss 1.2158 (1.1412) acc 65.6250 (71.2545) lr 1.6845e-03 eta 15:20:44 +epoch [15/50] batch [700/1000] time 1.571 (1.565) data 0.000 (0.002) loss 0.6079 (1.1391) acc 81.2500 (71.3080) lr 1.6845e-03 eta 15:20:34 +epoch [15/50] batch [705/1000] time 1.559 (1.565) data 0.000 (0.002) loss 0.8882 (1.1397) acc 75.0000 (71.3165) lr 1.6845e-03 eta 15:20:27 +epoch [15/50] batch [710/1000] time 1.567 (1.565) data 0.001 (0.002) loss 0.6431 (1.1387) acc 81.2500 (71.3424) lr 1.6845e-03 eta 15:20:19 +epoch [15/50] batch [715/1000] time 1.558 (1.565) data 0.000 (0.002) loss 1.3809 (1.1394) acc 56.2500 (71.3112) lr 1.6845e-03 eta 15:20:11 +epoch [15/50] batch [720/1000] time 1.556 (1.565) data 0.000 (0.002) loss 1.0205 (1.1386) acc 78.1250 (71.3368) lr 1.6845e-03 eta 15:20:01 +epoch [15/50] batch [725/1000] time 1.554 (1.565) data 0.001 (0.002) loss 1.0986 (1.1381) acc 75.0000 (71.3491) lr 1.6845e-03 eta 15:19:51 +epoch [15/50] batch [730/1000] time 1.545 (1.565) data 0.000 (0.002) loss 1.3066 (1.1402) acc 59.3750 (71.3057) lr 1.6845e-03 eta 15:19:50 +epoch [15/50] batch [735/1000] time 1.572 (1.565) data 0.001 (0.002) loss 1.0605 (1.1404) acc 71.8750 (71.3138) lr 1.6845e-03 eta 15:19:42 +epoch [15/50] batch [740/1000] time 1.572 (1.565) data 0.000 (0.002) loss 1.9502 (1.1407) acc 59.3750 (71.3218) lr 1.6845e-03 eta 15:19:35 +epoch [15/50] batch [745/1000] time 1.569 (1.565) data 0.000 (0.002) loss 1.0732 (1.1406) acc 68.7500 (71.3255) lr 1.6845e-03 eta 15:19:29 +epoch [15/50] batch [750/1000] time 1.552 (1.565) data 0.000 (0.002) loss 0.9507 (1.1392) acc 71.8750 (71.3708) lr 1.6845e-03 eta 15:19:20 +epoch [15/50] batch [755/1000] time 1.560 (1.565) data 0.000 (0.002) loss 1.4229 (1.1402) acc 62.5000 (71.3659) lr 1.6845e-03 eta 15:19:10 +epoch [15/50] batch [760/1000] time 1.567 (1.565) data 0.001 (0.002) loss 1.7217 (1.1402) acc 68.7500 (71.3734) lr 1.6845e-03 eta 15:19:00 +epoch [15/50] batch [765/1000] time 1.572 (1.565) data 0.000 (0.002) loss 0.9292 (1.1394) acc 78.1250 (71.4093) lr 1.6845e-03 eta 15:18:51 +epoch [15/50] batch [770/1000] time 1.554 (1.565) data 0.000 (0.002) loss 0.9150 (1.1394) acc 71.8750 (71.3961) lr 1.6845e-03 eta 15:18:42 +epoch [15/50] batch [775/1000] time 1.560 (1.565) data 0.000 (0.002) loss 0.9658 (1.1386) acc 78.1250 (71.4153) lr 1.6845e-03 eta 15:18:32 +epoch [15/50] batch [780/1000] time 1.542 (1.565) data 0.000 (0.002) loss 0.8296 (1.1372) acc 81.2500 (71.4423) lr 1.6845e-03 eta 15:18:23 +epoch [15/50] batch [785/1000] time 1.545 (1.564) data 0.000 (0.002) loss 1.2559 (1.1381) acc 71.8750 (71.4490) lr 1.6845e-03 eta 15:18:11 +epoch [15/50] batch [790/1000] time 1.567 (1.564) data 0.001 (0.002) loss 1.6797 (1.1381) acc 65.6250 (71.4557) lr 1.6845e-03 eta 15:18:02 +epoch [15/50] batch [795/1000] time 1.584 (1.565) data 0.000 (0.002) loss 0.6582 (1.1372) acc 81.2500 (71.4426) lr 1.6845e-03 eta 15:18:02 +epoch [15/50] batch [800/1000] time 1.556 (1.565) data 0.000 (0.002) loss 1.0986 (1.1372) acc 71.8750 (71.4609) lr 1.6845e-03 eta 15:17:54 +epoch [15/50] batch [805/1000] time 1.545 (1.565) data 0.000 (0.002) loss 0.8340 (1.1356) acc 81.2500 (71.5179) lr 1.6845e-03 eta 15:17:45 +epoch [15/50] batch [810/1000] time 1.559 (1.565) data 0.000 (0.002) loss 1.2363 (1.1348) acc 75.0000 (71.5586) lr 1.6845e-03 eta 15:17:37 +epoch [15/50] batch [815/1000] time 1.548 (1.564) data 0.000 (0.002) loss 1.1094 (1.1347) acc 75.0000 (71.5567) lr 1.6845e-03 eta 15:17:26 +epoch [15/50] batch [820/1000] time 1.586 (1.565) data 0.000 (0.002) loss 1.4727 (1.1362) acc 68.7500 (71.5396) lr 1.6845e-03 eta 15:17:20 +epoch [15/50] batch [825/1000] time 1.557 (1.565) data 0.000 (0.002) loss 0.7290 (1.1349) acc 81.2500 (71.5758) lr 1.6845e-03 eta 15:17:13 +epoch [15/50] batch [830/1000] time 1.569 (1.565) data 0.000 (0.002) loss 0.9888 (1.1349) acc 78.1250 (71.5663) lr 1.6845e-03 eta 15:17:06 +epoch [15/50] batch [835/1000] time 1.571 (1.565) data 0.000 (0.002) loss 0.8950 (1.1349) acc 78.1250 (71.5606) lr 1.6845e-03 eta 15:16:57 +epoch [15/50] batch [840/1000] time 1.548 (1.565) data 0.001 (0.002) loss 1.5615 (1.1358) acc 65.6250 (71.5327) lr 1.6845e-03 eta 15:16:55 +epoch [15/50] batch [845/1000] time 1.584 (1.565) data 0.000 (0.002) loss 1.2861 (1.1374) acc 65.6250 (71.4904) lr 1.6845e-03 eta 15:16:46 +epoch [15/50] batch [850/1000] time 1.537 (1.565) data 0.000 (0.002) loss 1.1846 (1.1367) acc 75.0000 (71.5147) lr 1.6845e-03 eta 15:16:37 +epoch [15/50] batch [855/1000] time 1.553 (1.565) data 0.000 (0.001) loss 1.4229 (1.1376) acc 59.3750 (71.5022) lr 1.6845e-03 eta 15:16:27 +epoch [15/50] batch [860/1000] time 1.562 (1.565) data 0.000 (0.001) loss 0.9321 (1.1377) acc 75.0000 (71.5116) lr 1.6845e-03 eta 15:16:19 +epoch [15/50] batch [865/1000] time 1.567 (1.565) data 0.000 (0.001) loss 1.0234 (1.1370) acc 75.0000 (71.5282) lr 1.6845e-03 eta 15:16:13 +epoch [15/50] batch [870/1000] time 1.566 (1.565) data 0.000 (0.001) loss 0.9253 (1.1366) acc 81.2500 (71.5445) lr 1.6845e-03 eta 15:16:05 +epoch [15/50] batch [875/1000] time 1.538 (1.565) data 0.000 (0.001) loss 0.9326 (1.1364) acc 78.1250 (71.5571) lr 1.6845e-03 eta 15:15:54 +epoch [15/50] batch [880/1000] time 1.700 (1.565) data 0.000 (0.001) loss 0.9385 (1.1362) acc 78.1250 (71.5447) lr 1.6845e-03 eta 15:15:51 +epoch [15/50] batch [885/1000] time 1.571 (1.565) data 0.000 (0.001) loss 1.5811 (1.1372) acc 53.1250 (71.5290) lr 1.6845e-03 eta 15:15:44 +epoch [15/50] batch [890/1000] time 1.585 (1.565) data 0.000 (0.001) loss 1.3350 (1.1380) acc 68.7500 (71.5028) lr 1.6845e-03 eta 15:15:38 +epoch [15/50] batch [895/1000] time 1.579 (1.565) data 0.000 (0.001) loss 1.2520 (1.1380) acc 65.6250 (71.4944) lr 1.6845e-03 eta 15:15:32 +epoch [15/50] batch [900/1000] time 1.582 (1.565) data 0.001 (0.001) loss 1.3906 (1.1377) acc 59.3750 (71.4722) lr 1.6845e-03 eta 15:15:26 +epoch [15/50] batch [905/1000] time 1.559 (1.565) data 0.000 (0.001) loss 1.7988 (1.1388) acc 65.6250 (71.4399) lr 1.6845e-03 eta 15:15:16 +epoch [15/50] batch [910/1000] time 1.553 (1.565) data 0.000 (0.001) loss 0.8955 (1.1390) acc 75.0000 (71.4148) lr 1.6845e-03 eta 15:15:06 +epoch [15/50] batch [915/1000] time 1.555 (1.565) data 0.000 (0.001) loss 1.1357 (1.1387) acc 75.0000 (71.4276) lr 1.6845e-03 eta 15:14:54 +epoch [15/50] batch [920/1000] time 1.567 (1.565) data 0.000 (0.001) loss 1.3018 (1.1378) acc 71.8750 (71.4368) lr 1.6845e-03 eta 15:14:46 +epoch [15/50] batch [925/1000] time 1.594 (1.565) data 0.001 (0.001) loss 1.1689 (1.1380) acc 71.8750 (71.4324) lr 1.6845e-03 eta 15:14:39 +epoch [15/50] batch [930/1000] time 1.537 (1.565) data 0.001 (0.001) loss 1.8662 (1.1403) acc 65.6250 (71.4147) lr 1.6845e-03 eta 15:14:29 +epoch [15/50] batch [935/1000] time 1.563 (1.565) data 0.000 (0.001) loss 1.1943 (1.1393) acc 78.1250 (71.4439) lr 1.6845e-03 eta 15:14:21 +epoch [15/50] batch [940/1000] time 1.575 (1.565) data 0.000 (0.001) loss 1.3604 (1.1390) acc 62.5000 (71.4461) lr 1.6845e-03 eta 15:14:11 +epoch [15/50] batch [945/1000] time 1.537 (1.565) data 0.000 (0.001) loss 1.2910 (1.1398) acc 68.7500 (71.4220) lr 1.6845e-03 eta 15:14:08 +epoch [15/50] batch [950/1000] time 1.542 (1.565) data 0.000 (0.001) loss 1.1182 (1.1409) acc 78.1250 (71.4211) lr 1.6845e-03 eta 15:13:59 +epoch [15/50] batch [955/1000] time 1.575 (1.565) data 0.000 (0.001) loss 1.3877 (1.1399) acc 75.0000 (71.4692) lr 1.6845e-03 eta 15:13:50 +epoch [15/50] batch [960/1000] time 1.569 (1.565) data 0.000 (0.001) loss 1.5527 (1.1402) acc 62.5000 (71.4616) lr 1.6845e-03 eta 15:13:42 +epoch [15/50] batch [965/1000] time 1.550 (1.565) data 0.000 (0.001) loss 0.7915 (1.1387) acc 81.2500 (71.5026) lr 1.6845e-03 eta 15:13:33 +epoch [15/50] batch [970/1000] time 1.548 (1.564) data 0.001 (0.001) loss 1.0410 (1.1388) acc 65.6250 (71.4884) lr 1.6845e-03 eta 15:13:23 +epoch [15/50] batch [975/1000] time 1.565 (1.564) data 0.000 (0.001) loss 1.4219 (1.1400) acc 75.0000 (71.4615) lr 1.6845e-03 eta 15:13:14 +epoch [15/50] batch [980/1000] time 1.562 (1.564) data 0.000 (0.001) loss 0.7485 (1.1392) acc 75.0000 (71.4509) lr 1.6845e-03 eta 15:13:06 +epoch [15/50] batch [985/1000] time 1.558 (1.564) data 0.001 (0.001) loss 1.0664 (1.1390) acc 65.6250 (71.4435) lr 1.6845e-03 eta 15:12:57 +epoch [15/50] batch [990/1000] time 1.569 (1.565) data 0.000 (0.001) loss 0.7446 (1.1392) acc 78.1250 (71.4583) lr 1.6845e-03 eta 15:12:55 +epoch [15/50] batch [995/1000] time 1.560 (1.565) data 0.000 (0.001) loss 1.2998 (1.1400) acc 75.0000 (71.4447) lr 1.6845e-03 eta 15:12:46 +epoch [15/50] batch [1000/1000] time 1.557 (1.565) data 0.000 (0.001) loss 1.0596 (1.1406) acc 78.1250 (71.4344) lr 1.6374e-03 eta 15:12:39 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,175 +* accuracy: 78.3% +* error: 21.7% +* macro_f1: 77.8% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [16/50] batch [5/1000] time 1.545 (1.701) data 0.000 (0.199) loss 1.0889 (0.9160) acc 71.8750 (73.1250) lr 1.6374e-03 eta 16:32:00 +epoch [16/50] batch [10/1000] time 1.562 (1.631) data 0.001 (0.100) loss 0.9609 (1.0070) acc 68.7500 (71.2500) lr 1.6374e-03 eta 15:50:54 +epoch [16/50] batch [15/1000] time 1.582 (1.608) data 0.001 (0.067) loss 0.6626 (1.0417) acc 84.3750 (71.8750) lr 1.6374e-03 eta 15:37:18 +epoch [16/50] batch [20/1000] time 1.586 (1.597) data 0.000 (0.050) loss 0.8140 (1.0689) acc 75.0000 (70.9375) lr 1.6374e-03 eta 15:30:48 +epoch [16/50] batch [25/1000] time 1.575 (1.601) data 0.000 (0.040) loss 1.1895 (1.0754) acc 71.8750 (70.8750) lr 1.6374e-03 eta 15:33:25 +epoch [16/50] batch [30/1000] time 1.555 (1.595) data 0.000 (0.034) loss 1.2500 (1.0725) acc 68.7500 (71.6667) lr 1.6374e-03 eta 15:29:24 +epoch [16/50] batch [35/1000] time 1.564 (1.590) data 0.001 (0.029) loss 1.3213 (1.1039) acc 62.5000 (71.1607) lr 1.6374e-03 eta 15:26:24 +epoch [16/50] batch [40/1000] time 1.565 (1.586) data 0.000 (0.025) loss 0.9673 (1.1188) acc 81.2500 (71.1719) lr 1.6374e-03 eta 15:24:04 +epoch [16/50] batch [45/1000] time 1.572 (1.584) data 0.001 (0.023) loss 1.0264 (1.0977) acc 75.0000 (71.6667) lr 1.6374e-03 eta 15:22:44 +epoch [16/50] batch [50/1000] time 1.570 (1.581) data 0.000 (0.020) loss 0.9644 (1.1100) acc 75.0000 (71.3750) lr 1.6374e-03 eta 15:20:45 +epoch [16/50] batch [55/1000] time 1.556 (1.579) data 0.000 (0.019) loss 1.3105 (1.1152) acc 68.7500 (71.5341) lr 1.6374e-03 eta 15:19:46 +epoch [16/50] batch [60/1000] time 1.546 (1.578) data 0.001 (0.017) loss 0.9746 (1.1044) acc 68.7500 (71.5625) lr 1.6374e-03 eta 15:19:07 +epoch [16/50] batch [65/1000] time 1.558 (1.577) data 0.000 (0.016) loss 1.6670 (1.1235) acc 56.2500 (71.0577) lr 1.6374e-03 eta 15:18:26 +epoch [16/50] batch [70/1000] time 1.570 (1.576) data 0.000 (0.015) loss 1.5420 (1.1223) acc 59.3750 (71.4732) lr 1.6374e-03 eta 15:17:33 +epoch [16/50] batch [75/1000] time 1.566 (1.576) data 0.000 (0.014) loss 1.0674 (1.1334) acc 81.2500 (71.4583) lr 1.6374e-03 eta 15:17:07 +epoch [16/50] batch [80/1000] time 1.592 (1.575) data 0.001 (0.013) loss 1.4092 (1.1351) acc 68.7500 (71.4844) lr 1.6374e-03 eta 15:16:34 +epoch [16/50] batch [85/1000] time 1.713 (1.576) data 0.001 (0.012) loss 1.8389 (1.1444) acc 59.3750 (71.4706) lr 1.6374e-03 eta 15:17:07 +epoch [16/50] batch [90/1000] time 1.562 (1.575) data 0.001 (0.012) loss 1.0605 (1.1382) acc 81.2500 (71.7361) lr 1.6374e-03 eta 15:16:22 +epoch [16/50] batch [95/1000] time 1.574 (1.574) data 0.000 (0.011) loss 1.1367 (1.1321) acc 75.0000 (71.9737) lr 1.6374e-03 eta 15:15:43 +epoch [16/50] batch [100/1000] time 1.548 (1.573) data 0.000 (0.010) loss 1.0859 (1.1218) acc 75.0000 (72.1562) lr 1.6374e-03 eta 15:14:59 +epoch [16/50] batch [105/1000] time 1.564 (1.572) data 0.001 (0.010) loss 0.5112 (1.1177) acc 81.2500 (72.2321) lr 1.6374e-03 eta 15:14:16 +epoch [16/50] batch [110/1000] time 1.553 (1.572) data 0.001 (0.010) loss 1.1963 (1.1101) acc 68.7500 (72.1875) lr 1.6374e-03 eta 15:13:59 +epoch [16/50] batch [115/1000] time 1.566 (1.571) data 0.000 (0.009) loss 0.9521 (1.1215) acc 62.5000 (71.9837) lr 1.6374e-03 eta 15:13:28 +epoch [16/50] batch [120/1000] time 1.565 (1.571) data 0.000 (0.009) loss 1.4346 (1.1230) acc 68.7500 (72.0052) lr 1.6374e-03 eta 15:13:01 +epoch [16/50] batch [125/1000] time 1.541 (1.570) data 0.001 (0.008) loss 1.0684 (1.1212) acc 78.1250 (72.0000) lr 1.6374e-03 eta 15:12:26 +epoch [16/50] batch [130/1000] time 1.727 (1.571) data 0.000 (0.008) loss 0.7422 (1.1168) acc 81.2500 (72.1875) lr 1.6374e-03 eta 15:12:55 +epoch [16/50] batch [135/1000] time 1.558 (1.571) data 0.000 (0.008) loss 1.5439 (1.1214) acc 62.5000 (71.9444) lr 1.6374e-03 eta 15:12:40 +epoch [16/50] batch [140/1000] time 1.559 (1.571) data 0.000 (0.008) loss 1.1982 (1.1298) acc 75.0000 (71.8304) lr 1.6374e-03 eta 15:12:30 +epoch [16/50] batch [145/1000] time 1.537 (1.570) data 0.000 (0.007) loss 1.7178 (1.1244) acc 59.3750 (71.9612) lr 1.6374e-03 eta 15:12:02 +epoch [16/50] batch [150/1000] time 1.553 (1.570) data 0.000 (0.007) loss 1.0645 (1.1261) acc 68.7500 (72.0000) lr 1.6374e-03 eta 15:11:50 +epoch [16/50] batch [155/1000] time 1.576 (1.570) data 0.000 (0.007) loss 1.2549 (1.1290) acc 71.8750 (71.9758) lr 1.6374e-03 eta 15:11:36 +epoch [16/50] batch [160/1000] time 1.565 (1.569) data 0.000 (0.007) loss 0.9790 (1.1314) acc 68.7500 (71.7383) lr 1.6374e-03 eta 15:11:13 +epoch [16/50] batch [165/1000] time 1.547 (1.569) data 0.000 (0.006) loss 1.1396 (1.1308) acc 71.8750 (71.7803) lr 1.6374e-03 eta 15:10:51 +epoch [16/50] batch [170/1000] time 1.572 (1.568) data 0.001 (0.006) loss 1.2168 (1.1279) acc 68.7500 (71.8015) lr 1.6374e-03 eta 15:10:28 +epoch [16/50] batch [175/1000] time 1.570 (1.570) data 0.000 (0.006) loss 0.7368 (1.1300) acc 87.5000 (71.8929) lr 1.6374e-03 eta 15:11:03 +epoch [16/50] batch [180/1000] time 1.555 (1.569) data 0.000 (0.006) loss 0.8926 (1.1345) acc 71.8750 (71.8924) lr 1.6374e-03 eta 15:10:46 +epoch [16/50] batch [185/1000] time 1.567 (1.569) data 0.000 (0.006) loss 1.0234 (1.1313) acc 71.8750 (71.9764) lr 1.6374e-03 eta 15:10:31 +epoch [16/50] batch [190/1000] time 1.560 (1.569) data 0.000 (0.006) loss 1.5967 (1.1333) acc 71.8750 (71.9079) lr 1.6374e-03 eta 15:10:17 +epoch [16/50] batch [195/1000] time 1.567 (1.569) data 0.001 (0.006) loss 1.0801 (1.1345) acc 78.1250 (71.7949) lr 1.6374e-03 eta 15:10:12 +epoch [16/50] batch [200/1000] time 1.564 (1.569) data 0.001 (0.005) loss 1.1592 (1.1356) acc 71.8750 (71.7031) lr 1.6374e-03 eta 15:09:54 +epoch [16/50] batch [205/1000] time 1.573 (1.569) data 0.000 (0.005) loss 1.3438 (1.1319) acc 62.5000 (71.6463) lr 1.6374e-03 eta 15:09:44 +epoch [16/50] batch [210/1000] time 1.561 (1.569) data 0.001 (0.005) loss 1.4766 (1.1308) acc 53.1250 (71.6071) lr 1.6374e-03 eta 15:09:31 +epoch [16/50] batch [215/1000] time 1.548 (1.568) data 0.000 (0.005) loss 1.2168 (1.1342) acc 68.7500 (71.5407) lr 1.6374e-03 eta 15:09:19 +epoch [16/50] batch [220/1000] time 1.584 (1.568) data 0.001 (0.005) loss 1.2510 (1.1362) acc 68.7500 (71.4631) lr 1.6374e-03 eta 15:09:09 +epoch [16/50] batch [225/1000] time 1.587 (1.568) data 0.000 (0.005) loss 1.0430 (1.1347) acc 65.6250 (71.5139) lr 1.6374e-03 eta 15:08:57 +epoch [16/50] batch [230/1000] time 1.594 (1.569) data 0.001 (0.005) loss 0.8589 (1.1360) acc 78.1250 (71.5353) lr 1.6374e-03 eta 15:09:01 +epoch [16/50] batch [235/1000] time 1.566 (1.569) data 0.000 (0.005) loss 1.1992 (1.1347) acc 65.6250 (71.5293) lr 1.6374e-03 eta 15:08:52 +epoch [16/50] batch [240/1000] time 1.576 (1.569) data 0.000 (0.005) loss 1.5732 (1.1330) acc 62.5000 (71.5495) lr 1.6374e-03 eta 15:09:05 +epoch [16/50] batch [245/1000] time 1.552 (1.569) data 0.000 (0.005) loss 1.2314 (1.1318) acc 71.8750 (71.5051) lr 1.6374e-03 eta 15:08:52 +epoch [16/50] batch [250/1000] time 1.541 (1.569) data 0.000 (0.004) loss 0.9082 (1.1294) acc 75.0000 (71.5375) lr 1.6374e-03 eta 15:08:39 +epoch [16/50] batch [255/1000] time 1.571 (1.569) data 0.000 (0.004) loss 1.2031 (1.1309) acc 78.1250 (71.5196) lr 1.6374e-03 eta 15:08:24 +epoch [16/50] batch [260/1000] time 1.534 (1.568) data 0.000 (0.004) loss 0.8862 (1.1278) acc 81.2500 (71.5865) lr 1.6374e-03 eta 15:08:07 +epoch [16/50] batch [265/1000] time 1.550 (1.568) data 0.000 (0.004) loss 0.7261 (1.1286) acc 84.3750 (71.5802) lr 1.6374e-03 eta 15:07:56 +epoch [16/50] batch [270/1000] time 1.553 (1.568) data 0.000 (0.004) loss 1.1006 (1.1314) acc 71.8750 (71.4815) lr 1.6374e-03 eta 15:07:48 +epoch [16/50] batch [275/1000] time 1.551 (1.568) data 0.001 (0.004) loss 1.1006 (1.1277) acc 78.1250 (71.5909) lr 1.6374e-03 eta 15:07:39 +epoch [16/50] batch [280/1000] time 1.575 (1.568) data 0.001 (0.004) loss 0.9160 (1.1303) acc 90.6250 (71.5513) lr 1.6374e-03 eta 15:07:29 +epoch [16/50] batch [285/1000] time 1.561 (1.569) data 0.000 (0.004) loss 1.5957 (1.1324) acc 65.6250 (71.4474) lr 1.6374e-03 eta 15:07:30 +epoch [16/50] batch [290/1000] time 1.553 (1.568) data 0.000 (0.004) loss 0.9302 (1.1328) acc 71.8750 (71.4547) lr 1.6374e-03 eta 15:07:15 +epoch [16/50] batch [295/1000] time 1.570 (1.568) data 0.000 (0.004) loss 1.0186 (1.1288) acc 75.0000 (71.4936) lr 1.6374e-03 eta 15:07:06 +epoch [16/50] batch [300/1000] time 1.556 (1.568) data 0.000 (0.004) loss 1.0508 (1.1270) acc 75.0000 (71.5312) lr 1.6374e-03 eta 15:06:58 +epoch [16/50] batch [305/1000] time 1.580 (1.568) data 0.000 (0.004) loss 1.0928 (1.1287) acc 65.6250 (71.4139) lr 1.6374e-03 eta 15:06:48 +epoch [16/50] batch [310/1000] time 1.551 (1.568) data 0.000 (0.004) loss 1.5586 (1.1289) acc 62.5000 (71.4415) lr 1.6374e-03 eta 15:06:33 +epoch [16/50] batch [315/1000] time 1.557 (1.568) data 0.000 (0.004) loss 0.5288 (1.1282) acc 81.2500 (71.4683) lr 1.6374e-03 eta 15:06:25 +epoch [16/50] batch [320/1000] time 1.544 (1.568) data 0.001 (0.004) loss 1.8789 (1.1324) acc 56.2500 (71.4062) lr 1.6374e-03 eta 15:06:06 +epoch [16/50] batch [325/1000] time 1.564 (1.568) data 0.000 (0.004) loss 0.8647 (1.1294) acc 71.8750 (71.4231) lr 1.6374e-03 eta 15:06:16 +epoch [16/50] batch [330/1000] time 1.547 (1.568) data 0.001 (0.003) loss 1.1475 (1.1350) acc 75.0000 (71.3352) lr 1.6374e-03 eta 15:06:02 +epoch [16/50] batch [335/1000] time 1.556 (1.568) data 0.000 (0.003) loss 1.1318 (1.1348) acc 62.5000 (71.3060) lr 1.6374e-03 eta 15:05:49 +epoch [16/50] batch [340/1000] time 1.588 (1.568) data 0.001 (0.003) loss 1.4990 (1.1376) acc 56.2500 (71.2316) lr 1.6374e-03 eta 15:05:39 +epoch [16/50] batch [345/1000] time 1.551 (1.568) data 0.000 (0.003) loss 0.9473 (1.1402) acc 75.0000 (71.1775) lr 1.6374e-03 eta 15:05:27 +epoch [16/50] batch [350/1000] time 1.541 (1.567) data 0.000 (0.003) loss 0.7056 (1.1393) acc 75.0000 (71.1607) lr 1.6374e-03 eta 15:05:12 +epoch [16/50] batch [355/1000] time 1.559 (1.567) data 0.000 (0.003) loss 0.9395 (1.1452) acc 75.0000 (71.0739) lr 1.6374e-03 eta 15:05:00 +epoch [16/50] batch [360/1000] time 1.570 (1.567) data 0.001 (0.003) loss 1.2734 (1.1467) acc 68.7500 (71.0677) lr 1.6374e-03 eta 15:04:53 +epoch [16/50] batch [365/1000] time 1.592 (1.567) data 0.001 (0.003) loss 0.7393 (1.1482) acc 81.2500 (71.0531) lr 1.6374e-03 eta 15:04:49 +epoch [16/50] batch [370/1000] time 1.556 (1.567) data 0.001 (0.003) loss 1.0459 (1.1450) acc 59.3750 (71.1064) lr 1.6374e-03 eta 15:04:42 +epoch [16/50] batch [375/1000] time 1.565 (1.568) data 0.000 (0.003) loss 0.6919 (1.1429) acc 81.2500 (71.1667) lr 1.6374e-03 eta 15:04:36 +epoch [16/50] batch [380/1000] time 1.563 (1.567) data 0.000 (0.003) loss 0.5181 (1.1379) acc 87.5000 (71.2664) lr 1.6374e-03 eta 15:04:26 +epoch [16/50] batch [385/1000] time 1.542 (1.567) data 0.000 (0.003) loss 0.8916 (1.1370) acc 71.8750 (71.2419) lr 1.6374e-03 eta 15:04:12 +epoch [16/50] batch [390/1000] time 1.564 (1.568) data 0.000 (0.003) loss 0.7451 (1.1331) acc 78.1250 (71.2981) lr 1.6374e-03 eta 15:04:15 +epoch [16/50] batch [395/1000] time 1.550 (1.567) data 0.001 (0.003) loss 1.4053 (1.1344) acc 71.8750 (71.3291) lr 1.6374e-03 eta 15:04:02 +epoch [16/50] batch [400/1000] time 1.564 (1.567) data 0.001 (0.003) loss 0.8638 (1.1343) acc 75.0000 (71.3359) lr 1.6374e-03 eta 15:03:49 +epoch [16/50] batch [405/1000] time 1.535 (1.567) data 0.001 (0.003) loss 1.0996 (1.1329) acc 65.6250 (71.3194) lr 1.6374e-03 eta 15:03:37 +epoch [16/50] batch [410/1000] time 1.551 (1.567) data 0.001 (0.003) loss 1.3672 (1.1331) acc 65.6250 (71.2729) lr 1.6374e-03 eta 15:03:25 +epoch [16/50] batch [415/1000] time 1.558 (1.567) data 0.000 (0.003) loss 1.7549 (1.1371) acc 62.5000 (71.1596) lr 1.6374e-03 eta 15:03:13 +epoch [16/50] batch [420/1000] time 1.562 (1.567) data 0.000 (0.003) loss 0.9321 (1.1369) acc 78.1250 (71.1979) lr 1.6374e-03 eta 15:03:04 +epoch [16/50] batch [425/1000] time 1.552 (1.567) data 0.001 (0.003) loss 1.2852 (1.1417) acc 62.5000 (71.1176) lr 1.6374e-03 eta 15:02:58 +epoch [16/50] batch [430/1000] time 1.583 (1.567) data 0.001 (0.003) loss 0.8320 (1.1422) acc 78.1250 (71.1265) lr 1.6374e-03 eta 15:02:49 +epoch [16/50] batch [435/1000] time 1.562 (1.567) data 0.000 (0.003) loss 1.0195 (1.1443) acc 75.0000 (71.1782) lr 1.6374e-03 eta 15:02:50 +epoch [16/50] batch [440/1000] time 1.567 (1.567) data 0.000 (0.003) loss 0.6719 (1.1430) acc 81.2500 (71.1861) lr 1.6374e-03 eta 15:02:40 +epoch [16/50] batch [445/1000] time 1.535 (1.567) data 0.000 (0.003) loss 1.1123 (1.1438) acc 71.8750 (71.1306) lr 1.6374e-03 eta 15:02:28 +epoch [16/50] batch [450/1000] time 1.563 (1.567) data 0.000 (0.003) loss 1.7383 (1.1442) acc 65.6250 (71.1389) lr 1.6374e-03 eta 15:02:18 +epoch [16/50] batch [455/1000] time 1.548 (1.567) data 0.000 (0.003) loss 0.7266 (1.1413) acc 87.5000 (71.2157) lr 1.6374e-03 eta 15:02:09 +epoch [16/50] batch [460/1000] time 1.558 (1.567) data 0.000 (0.003) loss 2.0547 (1.1426) acc 62.5000 (71.1957) lr 1.6374e-03 eta 15:01:53 +epoch [16/50] batch [465/1000] time 1.568 (1.567) data 0.001 (0.003) loss 1.0771 (1.1401) acc 65.6250 (71.2366) lr 1.6374e-03 eta 15:01:45 +epoch [16/50] batch [470/1000] time 1.546 (1.567) data 0.000 (0.003) loss 0.7275 (1.1390) acc 81.2500 (71.2500) lr 1.6374e-03 eta 15:01:32 +epoch [16/50] batch [475/1000] time 1.707 (1.567) data 0.000 (0.003) loss 0.7891 (1.1368) acc 78.1250 (71.2632) lr 1.6374e-03 eta 15:01:32 +epoch [16/50] batch [480/1000] time 1.548 (1.567) data 0.001 (0.003) loss 1.0732 (1.1350) acc 75.0000 (71.2826) lr 1.6374e-03 eta 15:01:20 +epoch [16/50] batch [485/1000] time 1.583 (1.567) data 0.001 (0.003) loss 1.8066 (1.1351) acc 53.1250 (71.2500) lr 1.6374e-03 eta 15:01:12 +epoch [16/50] batch [490/1000] time 1.562 (1.567) data 0.000 (0.002) loss 0.9463 (1.1338) acc 62.5000 (71.2755) lr 1.6374e-03 eta 15:01:01 +epoch [16/50] batch [495/1000] time 1.567 (1.567) data 0.000 (0.002) loss 1.1211 (1.1328) acc 68.7500 (71.2563) lr 1.6374e-03 eta 15:00:52 +epoch [16/50] batch [500/1000] time 1.584 (1.566) data 0.000 (0.002) loss 0.7002 (1.1310) acc 84.3750 (71.2750) lr 1.6374e-03 eta 15:00:42 +epoch [16/50] batch [505/1000] time 1.580 (1.566) data 0.001 (0.002) loss 1.1660 (1.1320) acc 75.0000 (71.2500) lr 1.6374e-03 eta 15:00:32 +epoch [16/50] batch [510/1000] time 1.570 (1.566) data 0.001 (0.002) loss 0.6577 (1.1315) acc 84.3750 (71.2500) lr 1.6374e-03 eta 15:00:24 +epoch [16/50] batch [515/1000] time 1.543 (1.566) data 0.000 (0.002) loss 1.4590 (1.1333) acc 59.3750 (71.2015) lr 1.6374e-03 eta 15:00:12 +epoch [16/50] batch [520/1000] time 1.537 (1.566) data 0.000 (0.002) loss 1.5664 (1.1352) acc 62.5000 (71.1719) lr 1.6374e-03 eta 14:59:58 +epoch [16/50] batch [525/1000] time 1.568 (1.566) data 0.001 (0.002) loss 1.4658 (1.1375) acc 71.8750 (71.1369) lr 1.6374e-03 eta 14:59:45 +epoch [16/50] batch [530/1000] time 1.545 (1.566) data 0.000 (0.002) loss 0.8999 (1.1372) acc 87.5000 (71.1675) lr 1.6374e-03 eta 14:59:32 +epoch [16/50] batch [535/1000] time 1.550 (1.566) data 0.000 (0.002) loss 1.7656 (1.1399) acc 59.3750 (71.1098) lr 1.6374e-03 eta 14:59:24 +epoch [16/50] batch [540/1000] time 1.582 (1.566) data 0.001 (0.002) loss 1.2334 (1.1410) acc 59.3750 (71.0880) lr 1.6374e-03 eta 14:59:26 +epoch [16/50] batch [545/1000] time 1.547 (1.566) data 0.001 (0.002) loss 0.9653 (1.1393) acc 68.7500 (71.1009) lr 1.6374e-03 eta 14:59:12 +epoch [16/50] batch [550/1000] time 1.540 (1.566) data 0.000 (0.002) loss 1.3838 (1.1382) acc 62.5000 (71.1250) lr 1.6374e-03 eta 14:59:01 +epoch [16/50] batch [555/1000] time 1.545 (1.566) data 0.000 (0.002) loss 1.2412 (1.1388) acc 62.5000 (71.0980) lr 1.6374e-03 eta 14:58:46 +epoch [16/50] batch [560/1000] time 1.542 (1.566) data 0.001 (0.002) loss 1.2734 (1.1397) acc 75.0000 (71.1161) lr 1.6374e-03 eta 14:58:36 +epoch [16/50] batch [565/1000] time 1.562 (1.565) data 0.000 (0.002) loss 1.6924 (1.1397) acc 59.3750 (71.1117) lr 1.6374e-03 eta 14:58:26 +epoch [16/50] batch [570/1000] time 1.556 (1.565) data 0.001 (0.002) loss 0.9702 (1.1404) acc 71.8750 (71.0910) lr 1.6374e-03 eta 14:58:19 +epoch [16/50] batch [575/1000] time 1.550 (1.565) data 0.001 (0.002) loss 1.3896 (1.1419) acc 65.6250 (71.0598) lr 1.6374e-03 eta 14:58:05 +epoch [16/50] batch [580/1000] time 1.538 (1.565) data 0.000 (0.002) loss 0.6807 (1.1409) acc 78.1250 (71.0776) lr 1.6374e-03 eta 14:57:54 +epoch [16/50] batch [585/1000] time 1.546 (1.565) data 0.001 (0.002) loss 1.4062 (1.1434) acc 59.3750 (71.0363) lr 1.6374e-03 eta 14:57:54 +epoch [16/50] batch [590/1000] time 1.559 (1.565) data 0.000 (0.002) loss 0.9458 (1.1432) acc 75.0000 (71.0752) lr 1.6374e-03 eta 14:57:44 +epoch [16/50] batch [595/1000] time 1.558 (1.565) data 0.000 (0.002) loss 1.3525 (1.1434) acc 71.8750 (71.1029) lr 1.6374e-03 eta 14:57:36 +epoch [16/50] batch [600/1000] time 1.555 (1.565) data 0.001 (0.002) loss 1.0117 (1.1420) acc 71.8750 (71.1510) lr 1.6374e-03 eta 14:57:26 +epoch [16/50] batch [605/1000] time 1.567 (1.565) data 0.001 (0.002) loss 1.4287 (1.1416) acc 65.6250 (71.1622) lr 1.6374e-03 eta 14:57:17 +epoch [16/50] batch [610/1000] time 1.552 (1.565) data 0.000 (0.002) loss 1.1211 (1.1435) acc 62.5000 (71.1168) lr 1.6374e-03 eta 14:57:05 +epoch [16/50] batch [615/1000] time 1.557 (1.565) data 0.000 (0.002) loss 0.9526 (1.1432) acc 75.0000 (71.1077) lr 1.6374e-03 eta 14:56:53 +epoch [16/50] batch [620/1000] time 1.558 (1.565) data 0.001 (0.002) loss 0.8755 (1.1441) acc 81.2500 (71.1089) lr 1.6374e-03 eta 14:56:41 +epoch [16/50] batch [625/1000] time 1.551 (1.565) data 0.001 (0.002) loss 0.8599 (1.1425) acc 81.2500 (71.1400) lr 1.6374e-03 eta 14:56:30 +epoch [16/50] batch [630/1000] time 1.553 (1.565) data 0.000 (0.002) loss 1.0654 (1.1433) acc 78.1250 (71.1359) lr 1.6374e-03 eta 14:56:26 +epoch [16/50] batch [635/1000] time 1.539 (1.565) data 0.000 (0.002) loss 0.7690 (1.1431) acc 81.2500 (71.1614) lr 1.6374e-03 eta 14:56:15 +epoch [16/50] batch [640/1000] time 1.572 (1.565) data 0.000 (0.002) loss 0.9272 (1.1410) acc 75.0000 (71.2109) lr 1.6374e-03 eta 14:56:08 +epoch [16/50] batch [645/1000] time 1.568 (1.565) data 0.000 (0.002) loss 1.1475 (1.1403) acc 71.8750 (71.2064) lr 1.6374e-03 eta 14:55:58 +epoch [16/50] batch [650/1000] time 1.578 (1.565) data 0.000 (0.002) loss 1.4756 (1.1411) acc 68.7500 (71.1923) lr 1.6374e-03 eta 14:55:49 +epoch [16/50] batch [655/1000] time 1.570 (1.565) data 0.000 (0.002) loss 0.7095 (1.1410) acc 81.2500 (71.2214) lr 1.6374e-03 eta 14:55:39 +epoch [16/50] batch [660/1000] time 1.568 (1.565) data 0.001 (0.002) loss 1.1914 (1.1427) acc 71.8750 (71.2074) lr 1.6374e-03 eta 14:55:29 +epoch [16/50] batch [665/1000] time 1.577 (1.565) data 0.000 (0.002) loss 0.7803 (1.1420) acc 71.8750 (71.2218) lr 1.6374e-03 eta 14:55:24 +epoch [16/50] batch [670/1000] time 1.558 (1.565) data 0.000 (0.002) loss 1.3467 (1.1423) acc 65.6250 (71.2313) lr 1.6374e-03 eta 14:55:14 +epoch [16/50] batch [675/1000] time 1.560 (1.565) data 0.000 (0.002) loss 1.0537 (1.1434) acc 78.1250 (71.1944) lr 1.6374e-03 eta 14:55:05 +epoch [16/50] batch [680/1000] time 1.536 (1.565) data 0.001 (0.002) loss 0.9292 (1.1425) acc 78.1250 (71.2224) lr 1.6374e-03 eta 14:54:56 +epoch [16/50] batch [685/1000] time 1.563 (1.565) data 0.000 (0.002) loss 0.9448 (1.1415) acc 81.2500 (71.2774) lr 1.6374e-03 eta 14:54:48 +epoch [16/50] batch [690/1000] time 1.569 (1.565) data 0.000 (0.002) loss 1.5176 (1.1439) acc 59.3750 (71.2002) lr 1.6374e-03 eta 14:54:48 +epoch [16/50] batch [695/1000] time 1.548 (1.565) data 0.000 (0.002) loss 0.7168 (1.1423) acc 81.2500 (71.2320) lr 1.6374e-03 eta 14:54:36 +epoch [16/50] batch [700/1000] time 1.573 (1.565) data 0.000 (0.002) loss 1.4326 (1.1428) acc 68.7500 (71.2232) lr 1.6374e-03 eta 14:54:25 +epoch [16/50] batch [705/1000] time 1.551 (1.565) data 0.000 (0.002) loss 0.8579 (1.1435) acc 75.0000 (71.1879) lr 1.6374e-03 eta 14:54:17 +epoch [16/50] batch [710/1000] time 1.563 (1.565) data 0.001 (0.002) loss 1.0117 (1.1413) acc 81.2500 (71.2412) lr 1.6374e-03 eta 14:54:08 +epoch [16/50] batch [715/1000] time 1.565 (1.564) data 0.000 (0.002) loss 0.8843 (1.1417) acc 78.1250 (71.2325) lr 1.6374e-03 eta 14:53:58 +epoch [16/50] batch [720/1000] time 1.584 (1.564) data 0.001 (0.002) loss 1.7646 (1.1435) acc 53.1250 (71.1979) lr 1.6374e-03 eta 14:53:49 +epoch [16/50] batch [725/1000] time 1.572 (1.564) data 0.001 (0.002) loss 0.9937 (1.1434) acc 68.7500 (71.1983) lr 1.6374e-03 eta 14:53:43 +epoch [16/50] batch [730/1000] time 1.569 (1.564) data 0.000 (0.002) loss 0.8618 (1.1421) acc 75.0000 (71.2286) lr 1.6374e-03 eta 14:53:34 +epoch [16/50] batch [735/1000] time 1.543 (1.565) data 0.000 (0.002) loss 0.7900 (1.1423) acc 78.1250 (71.2287) lr 1.6374e-03 eta 14:53:29 +epoch [16/50] batch [740/1000] time 1.553 (1.564) data 0.001 (0.002) loss 1.3369 (1.1406) acc 65.6250 (71.2627) lr 1.6374e-03 eta 14:53:19 +epoch [16/50] batch [745/1000] time 1.536 (1.564) data 0.001 (0.002) loss 0.7178 (1.1412) acc 75.0000 (71.2248) lr 1.6374e-03 eta 14:53:08 +epoch [16/50] batch [750/1000] time 1.560 (1.564) data 0.001 (0.002) loss 1.2119 (1.1408) acc 75.0000 (71.2333) lr 1.6374e-03 eta 14:52:56 +epoch [16/50] batch [755/1000] time 1.549 (1.564) data 0.000 (0.002) loss 0.9834 (1.1417) acc 71.8750 (71.2252) lr 1.6374e-03 eta 14:52:46 +epoch [16/50] batch [760/1000] time 1.579 (1.564) data 0.000 (0.002) loss 1.1074 (1.1412) acc 68.7500 (71.2253) lr 1.6374e-03 eta 14:52:37 +epoch [16/50] batch [765/1000] time 1.548 (1.564) data 0.000 (0.002) loss 0.5449 (1.1401) acc 84.3750 (71.2500) lr 1.6374e-03 eta 14:52:28 +epoch [16/50] batch [770/1000] time 1.567 (1.564) data 0.001 (0.002) loss 1.2520 (1.1398) acc 65.6250 (71.2581) lr 1.6374e-03 eta 14:52:19 +epoch [16/50] batch [775/1000] time 1.561 (1.564) data 0.001 (0.002) loss 1.1953 (1.1413) acc 71.8750 (71.2097) lr 1.6374e-03 eta 14:52:11 +epoch [16/50] batch [780/1000] time 1.549 (1.564) data 0.001 (0.002) loss 1.4238 (1.1416) acc 62.5000 (71.1899) lr 1.6374e-03 eta 14:52:06 +epoch [16/50] batch [785/1000] time 1.558 (1.564) data 0.000 (0.002) loss 1.4531 (1.1406) acc 71.8750 (71.2221) lr 1.6374e-03 eta 14:51:55 +epoch [16/50] batch [790/1000] time 1.551 (1.564) data 0.000 (0.002) loss 1.2832 (1.1398) acc 68.7500 (71.2342) lr 1.6374e-03 eta 14:51:47 +epoch [16/50] batch [795/1000] time 1.580 (1.564) data 0.000 (0.002) loss 0.6450 (1.1382) acc 81.2500 (71.2775) lr 1.6374e-03 eta 14:51:38 +epoch [16/50] batch [800/1000] time 1.567 (1.564) data 0.001 (0.002) loss 1.6709 (1.1379) acc 62.5000 (71.3008) lr 1.6374e-03 eta 14:51:31 +epoch [16/50] batch [805/1000] time 1.561 (1.564) data 0.001 (0.002) loss 1.2549 (1.1382) acc 62.5000 (71.2655) lr 1.6374e-03 eta 14:51:24 +epoch [16/50] batch [810/1000] time 1.545 (1.564) data 0.000 (0.002) loss 1.6025 (1.1399) acc 65.6250 (71.2346) lr 1.6374e-03 eta 14:51:16 +epoch [16/50] batch [815/1000] time 1.549 (1.564) data 0.000 (0.002) loss 1.1152 (1.1405) acc 71.8750 (71.2385) lr 1.6374e-03 eta 14:51:05 +epoch [16/50] batch [820/1000] time 1.531 (1.564) data 0.001 (0.002) loss 0.7783 (1.1394) acc 71.8750 (71.2538) lr 1.6374e-03 eta 14:50:53 +epoch [16/50] batch [825/1000] time 1.542 (1.564) data 0.001 (0.002) loss 1.1045 (1.1386) acc 75.0000 (71.2652) lr 1.6374e-03 eta 14:50:44 +epoch [16/50] batch [830/1000] time 1.579 (1.564) data 0.000 (0.002) loss 0.9854 (1.1379) acc 71.8750 (71.2613) lr 1.6374e-03 eta 14:50:35 +epoch [16/50] batch [835/1000] time 1.532 (1.564) data 0.000 (0.002) loss 1.2227 (1.1401) acc 75.0000 (71.2275) lr 1.6374e-03 eta 14:50:24 +epoch [16/50] batch [840/1000] time 1.726 (1.564) data 0.000 (0.002) loss 1.3496 (1.1394) acc 75.0000 (71.2537) lr 1.6374e-03 eta 14:50:22 +epoch [16/50] batch [845/1000] time 1.575 (1.564) data 0.000 (0.002) loss 1.5762 (1.1395) acc 68.7500 (71.2574) lr 1.6374e-03 eta 14:50:14 +epoch [16/50] batch [850/1000] time 1.562 (1.564) data 0.000 (0.002) loss 1.1445 (1.1395) acc 65.6250 (71.2574) lr 1.6374e-03 eta 14:50:07 +epoch [16/50] batch [855/1000] time 1.565 (1.564) data 0.000 (0.002) loss 1.4688 (1.1398) acc 62.5000 (71.2573) lr 1.6374e-03 eta 14:49:59 +epoch [16/50] batch [860/1000] time 1.558 (1.564) data 0.000 (0.002) loss 1.2490 (1.1406) acc 71.8750 (71.2391) lr 1.6374e-03 eta 14:49:50 +epoch [16/50] batch [865/1000] time 1.564 (1.564) data 0.000 (0.002) loss 1.2451 (1.1414) acc 62.5000 (71.2103) lr 1.6374e-03 eta 14:49:43 +epoch [16/50] batch [870/1000] time 1.554 (1.564) data 0.001 (0.002) loss 0.9604 (1.1409) acc 71.8750 (71.2284) lr 1.6374e-03 eta 14:49:33 +epoch [16/50] batch [875/1000] time 1.541 (1.564) data 0.001 (0.002) loss 0.9517 (1.1402) acc 78.1250 (71.2393) lr 1.6374e-03 eta 14:49:24 +epoch [16/50] batch [880/1000] time 1.541 (1.564) data 0.001 (0.002) loss 1.1045 (1.1404) acc 75.0000 (71.2322) lr 1.6374e-03 eta 14:49:16 +epoch [16/50] batch [885/1000] time 1.718 (1.564) data 0.000 (0.002) loss 0.8022 (1.1392) acc 78.1250 (71.2500) lr 1.6374e-03 eta 14:49:16 +epoch [16/50] batch [890/1000] time 1.547 (1.564) data 0.000 (0.002) loss 1.3584 (1.1404) acc 71.8750 (71.2395) lr 1.6374e-03 eta 14:49:07 +epoch [16/50] batch [895/1000] time 1.558 (1.564) data 0.000 (0.002) loss 0.9268 (1.1398) acc 75.0000 (71.2535) lr 1.6374e-03 eta 14:48:59 +epoch [16/50] batch [900/1000] time 1.558 (1.564) data 0.000 (0.002) loss 1.4268 (1.1399) acc 65.6250 (71.2604) lr 1.6374e-03 eta 14:48:50 +epoch [16/50] batch [905/1000] time 1.533 (1.564) data 0.000 (0.002) loss 1.3418 (1.1399) acc 68.7500 (71.2638) lr 1.6374e-03 eta 14:48:38 +epoch [16/50] batch [910/1000] time 1.554 (1.564) data 0.000 (0.002) loss 1.2754 (1.1407) acc 68.7500 (71.2466) lr 1.6374e-03 eta 14:48:29 +epoch [16/50] batch [915/1000] time 1.572 (1.564) data 0.000 (0.002) loss 1.8115 (1.1413) acc 65.6250 (71.2295) lr 1.6374e-03 eta 14:48:19 +epoch [16/50] batch [920/1000] time 1.566 (1.564) data 0.000 (0.002) loss 0.9678 (1.1421) acc 84.3750 (71.2296) lr 1.6374e-03 eta 14:48:11 +epoch [16/50] batch [925/1000] time 1.583 (1.564) data 0.000 (0.002) loss 0.7817 (1.1424) acc 84.3750 (71.2162) lr 1.6374e-03 eta 14:48:02 +epoch [16/50] batch [930/1000] time 1.557 (1.564) data 0.000 (0.002) loss 1.6650 (1.1427) acc 62.5000 (71.2097) lr 1.6374e-03 eta 14:48:00 +epoch [16/50] batch [935/1000] time 1.575 (1.564) data 0.001 (0.002) loss 1.1104 (1.1424) acc 78.1250 (71.2099) lr 1.6374e-03 eta 14:47:53 +epoch [16/50] batch [940/1000] time 1.553 (1.564) data 0.001 (0.002) loss 1.4893 (1.1424) acc 71.8750 (71.2201) lr 1.6374e-03 eta 14:47:44 +epoch [16/50] batch [945/1000] time 1.564 (1.564) data 0.001 (0.002) loss 1.1211 (1.1430) acc 71.8750 (71.2169) lr 1.6374e-03 eta 14:47:36 +epoch [16/50] batch [950/1000] time 1.569 (1.564) data 0.000 (0.002) loss 0.6655 (1.1421) acc 84.3750 (71.2664) lr 1.6374e-03 eta 14:47:30 +epoch [16/50] batch [955/1000] time 1.572 (1.564) data 0.000 (0.002) loss 1.3398 (1.1415) acc 68.7500 (71.2696) lr 1.6374e-03 eta 14:47:23 +epoch [16/50] batch [960/1000] time 1.573 (1.564) data 0.000 (0.002) loss 1.1270 (1.1417) acc 78.1250 (71.2728) lr 1.6374e-03 eta 14:47:16 +epoch [16/50] batch [965/1000] time 1.550 (1.564) data 0.001 (0.002) loss 0.8608 (1.1410) acc 81.2500 (71.3018) lr 1.6374e-03 eta 14:47:07 +epoch [16/50] batch [970/1000] time 1.556 (1.564) data 0.001 (0.001) loss 1.5146 (1.1425) acc 65.6250 (71.2951) lr 1.6374e-03 eta 14:46:59 +epoch [16/50] batch [975/1000] time 1.559 (1.564) data 0.001 (0.001) loss 1.2725 (1.1418) acc 62.5000 (71.3045) lr 1.6374e-03 eta 14:46:51 +epoch [16/50] batch [980/1000] time 1.564 (1.564) data 0.001 (0.001) loss 1.1982 (1.1415) acc 71.8750 (71.3170) lr 1.6374e-03 eta 14:46:42 +epoch [16/50] batch [985/1000] time 1.560 (1.564) data 0.001 (0.001) loss 0.8921 (1.1417) acc 75.0000 (71.3166) lr 1.6374e-03 eta 14:46:34 +epoch [16/50] batch [990/1000] time 1.542 (1.564) data 0.000 (0.001) loss 1.4121 (1.1425) acc 75.0000 (71.3005) lr 1.6374e-03 eta 14:46:24 +epoch [16/50] batch [995/1000] time 1.553 (1.564) data 0.000 (0.001) loss 1.4795 (1.1430) acc 68.7500 (71.2908) lr 1.6374e-03 eta 14:46:20 +epoch [16/50] batch [1000/1000] time 1.563 (1.564) data 0.000 (0.001) loss 1.0127 (1.1438) acc 75.0000 (71.2719) lr 1.5878e-03 eta 14:46:09 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,123 +* accuracy: 78.2% +* error: 21.8% +* macro_f1: 77.8% +epoch [17/50] batch [5/1000] time 1.574 (1.689) data 0.000 (0.177) loss 0.9268 (1.0384) acc 75.0000 (72.5000) lr 1.5878e-03 eta 15:56:53 +epoch [17/50] batch [10/1000] time 1.552 (1.622) data 0.000 (0.089) loss 1.2764 (1.0252) acc 75.0000 (73.4375) lr 1.5878e-03 eta 15:18:39 +epoch [17/50] batch [15/1000] time 1.585 (1.624) data 0.000 (0.059) loss 0.9077 (0.9819) acc 75.0000 (73.9583) lr 1.5878e-03 eta 15:19:39 +epoch [17/50] batch [20/1000] time 1.568 (1.608) data 0.000 (0.045) loss 1.1504 (1.0757) acc 81.2500 (73.1250) lr 1.5878e-03 eta 15:10:25 +epoch [17/50] batch [25/1000] time 1.565 (1.598) data 0.000 (0.036) loss 1.0566 (1.0434) acc 78.1250 (74.2500) lr 1.5878e-03 eta 15:05:09 +epoch [17/50] batch [30/1000] time 1.549 (1.592) data 0.000 (0.030) loss 0.4631 (1.0458) acc 84.3750 (74.2708) lr 1.5878e-03 eta 15:01:30 +epoch [17/50] batch [35/1000] time 1.564 (1.589) data 0.000 (0.026) loss 1.5283 (1.0642) acc 59.3750 (73.0357) lr 1.5878e-03 eta 14:59:21 +epoch [17/50] batch [40/1000] time 1.575 (1.586) data 0.000 (0.022) loss 1.1328 (1.0771) acc 68.7500 (72.5000) lr 1.5878e-03 eta 14:57:44 +epoch [17/50] batch [45/1000] time 1.542 (1.582) data 0.000 (0.020) loss 1.2031 (1.0704) acc 68.7500 (72.4306) lr 1.5878e-03 eta 14:55:10 +epoch [17/50] batch [50/1000] time 1.582 (1.581) data 0.000 (0.018) loss 1.6738 (1.0704) acc 68.7500 (72.2500) lr 1.5878e-03 eta 14:54:29 +epoch [17/50] batch [55/1000] time 1.544 (1.579) data 0.000 (0.016) loss 1.3701 (1.0907) acc 62.5000 (71.7614) lr 1.5878e-03 eta 14:53:14 +epoch [17/50] batch [60/1000] time 1.569 (1.578) data 0.000 (0.015) loss 0.6768 (1.0852) acc 78.1250 (71.7188) lr 1.5878e-03 eta 14:52:33 +epoch [17/50] batch [65/1000] time 1.564 (1.576) data 0.000 (0.014) loss 0.4988 (1.0874) acc 87.5000 (71.8750) lr 1.5878e-03 eta 14:51:11 +epoch [17/50] batch [70/1000] time 1.556 (1.574) data 0.000 (0.013) loss 0.8862 (1.0764) acc 81.2500 (72.5446) lr 1.5878e-03 eta 14:50:18 +epoch [17/50] batch [75/1000] time 1.544 (1.574) data 0.000 (0.012) loss 1.4561 (1.0719) acc 56.2500 (72.5000) lr 1.5878e-03 eta 14:49:52 +epoch [17/50] batch [80/1000] time 1.556 (1.573) data 0.000 (0.011) loss 0.4932 (1.0730) acc 84.3750 (72.5391) lr 1.5878e-03 eta 14:49:18 +epoch [17/50] batch [85/1000] time 1.556 (1.572) data 0.001 (0.011) loss 1.3438 (1.0693) acc 65.6250 (72.5368) lr 1.5878e-03 eta 14:48:37 +epoch [17/50] batch [90/1000] time 1.558 (1.571) data 0.000 (0.010) loss 1.0498 (1.0674) acc 75.0000 (72.5000) lr 1.5878e-03 eta 14:48:04 +epoch [17/50] batch [95/1000] time 1.568 (1.571) data 0.000 (0.010) loss 0.9526 (1.0663) acc 81.2500 (72.5329) lr 1.5878e-03 eta 14:47:34 +epoch [17/50] batch [100/1000] time 1.565 (1.571) data 0.000 (0.009) loss 1.2256 (1.0820) acc 75.0000 (72.1875) lr 1.5878e-03 eta 14:47:23 +epoch [17/50] batch [105/1000] time 1.589 (1.571) data 0.000 (0.009) loss 1.5127 (1.0883) acc 65.6250 (72.1726) lr 1.5878e-03 eta 14:47:12 +epoch [17/50] batch [110/1000] time 1.536 (1.570) data 0.000 (0.008) loss 0.8320 (1.0782) acc 78.1250 (72.3580) lr 1.5878e-03 eta 14:46:35 +epoch [17/50] batch [115/1000] time 1.540 (1.569) data 0.000 (0.008) loss 1.4443 (1.0775) acc 62.5000 (72.3913) lr 1.5878e-03 eta 14:46:05 +epoch [17/50] batch [120/1000] time 1.542 (1.569) data 0.000 (0.008) loss 1.2246 (1.0753) acc 71.8750 (72.4740) lr 1.5878e-03 eta 14:46:13 +epoch [17/50] batch [125/1000] time 1.597 (1.570) data 0.001 (0.007) loss 1.0352 (1.0732) acc 65.6250 (72.4750) lr 1.5878e-03 eta 14:46:09 +epoch [17/50] batch [130/1000] time 1.574 (1.570) data 0.000 (0.007) loss 0.9062 (1.0761) acc 65.6250 (72.4760) lr 1.5878e-03 eta 14:46:00 +epoch [17/50] batch [135/1000] time 1.573 (1.569) data 0.000 (0.007) loss 0.7695 (1.0699) acc 75.0000 (72.5694) lr 1.5878e-03 eta 14:45:42 +epoch [17/50] batch [140/1000] time 1.551 (1.569) data 0.000 (0.007) loss 0.4324 (1.0660) acc 87.5000 (72.7679) lr 1.5878e-03 eta 14:45:33 +epoch [17/50] batch [145/1000] time 1.535 (1.569) data 0.000 (0.006) loss 1.1309 (1.0652) acc 78.1250 (72.8664) lr 1.5878e-03 eta 14:45:09 +epoch [17/50] batch [150/1000] time 1.534 (1.568) data 0.000 (0.006) loss 0.9736 (1.0672) acc 75.0000 (72.8333) lr 1.5878e-03 eta 14:44:44 +epoch [17/50] batch [155/1000] time 1.572 (1.568) data 0.001 (0.006) loss 1.0059 (1.0654) acc 68.7500 (72.7823) lr 1.5878e-03 eta 14:44:24 +epoch [17/50] batch [160/1000] time 1.574 (1.568) data 0.000 (0.006) loss 0.7524 (1.0610) acc 81.2500 (72.8906) lr 1.5878e-03 eta 14:44:11 +epoch [17/50] batch [165/1000] time 1.557 (1.568) data 0.000 (0.006) loss 1.6416 (1.0663) acc 68.7500 (72.7273) lr 1.5878e-03 eta 14:44:23 +epoch [17/50] batch [170/1000] time 1.535 (1.568) data 0.000 (0.006) loss 0.9360 (1.0775) acc 68.7500 (72.5184) lr 1.5878e-03 eta 14:44:13 +epoch [17/50] batch [175/1000] time 1.560 (1.568) data 0.000 (0.005) loss 0.7954 (1.0705) acc 81.2500 (72.6250) lr 1.5878e-03 eta 14:43:43 +epoch [17/50] batch [180/1000] time 1.589 (1.567) data 0.000 (0.005) loss 0.7168 (1.0691) acc 81.2500 (72.6562) lr 1.5878e-03 eta 14:43:27 +epoch [17/50] batch [185/1000] time 1.551 (1.567) data 0.000 (0.005) loss 0.7109 (1.0663) acc 84.3750 (72.7703) lr 1.5878e-03 eta 14:43:11 +epoch [17/50] batch [190/1000] time 1.588 (1.568) data 0.000 (0.005) loss 1.0967 (1.0723) acc 71.8750 (72.6480) lr 1.5878e-03 eta 14:43:18 +epoch [17/50] batch [195/1000] time 1.557 (1.567) data 0.000 (0.005) loss 1.0156 (1.0739) acc 68.7500 (72.5801) lr 1.5878e-03 eta 14:43:01 +epoch [17/50] batch [200/1000] time 1.554 (1.567) data 0.000 (0.005) loss 1.1416 (1.0781) acc 68.7500 (72.5000) lr 1.5878e-03 eta 14:42:46 +epoch [17/50] batch [205/1000] time 1.698 (1.568) data 0.000 (0.005) loss 1.3838 (1.0823) acc 56.2500 (72.3476) lr 1.5878e-03 eta 14:43:07 +epoch [17/50] batch [210/1000] time 1.605 (1.568) data 0.000 (0.005) loss 1.3633 (1.0857) acc 68.7500 (72.2470) lr 1.5878e-03 eta 14:43:11 +epoch [17/50] batch [215/1000] time 1.528 (1.568) data 0.000 (0.004) loss 1.7168 (1.0885) acc 62.5000 (72.2093) lr 1.5878e-03 eta 14:42:56 +epoch [17/50] batch [220/1000] time 1.563 (1.568) data 0.000 (0.004) loss 1.0439 (1.0906) acc 59.3750 (72.1449) lr 1.5878e-03 eta 14:42:41 +epoch [17/50] batch [225/1000] time 1.542 (1.567) data 0.001 (0.004) loss 1.5713 (1.0952) acc 62.5000 (72.0278) lr 1.5878e-03 eta 14:42:14 +epoch [17/50] batch [230/1000] time 1.552 (1.567) data 0.000 (0.004) loss 0.8750 (1.0920) acc 75.0000 (72.0516) lr 1.5878e-03 eta 14:41:54 +epoch [17/50] batch [235/1000] time 1.546 (1.567) data 0.000 (0.004) loss 1.4580 (1.0957) acc 56.2500 (71.9681) lr 1.5878e-03 eta 14:41:42 +epoch [17/50] batch [240/1000] time 1.569 (1.567) data 0.000 (0.004) loss 0.5601 (1.0978) acc 87.5000 (71.9661) lr 1.5878e-03 eta 14:41:28 +epoch [17/50] batch [245/1000] time 1.549 (1.567) data 0.000 (0.004) loss 0.6577 (1.0947) acc 78.1250 (71.9643) lr 1.5878e-03 eta 14:41:18 +epoch [17/50] batch [250/1000] time 1.571 (1.566) data 0.000 (0.004) loss 1.1562 (1.0954) acc 68.7500 (71.9375) lr 1.5878e-03 eta 14:41:00 +epoch [17/50] batch [255/1000] time 1.543 (1.566) data 0.000 (0.004) loss 1.0361 (1.0965) acc 78.1250 (71.9363) lr 1.5878e-03 eta 14:40:42 +epoch [17/50] batch [260/1000] time 1.548 (1.566) data 0.001 (0.004) loss 0.9541 (1.0937) acc 71.8750 (71.9231) lr 1.5878e-03 eta 14:40:28 +epoch [17/50] batch [265/1000] time 1.561 (1.566) data 0.001 (0.004) loss 0.6807 (1.0962) acc 84.3750 (71.8986) lr 1.5878e-03 eta 14:40:20 +epoch [17/50] batch [270/1000] time 1.555 (1.566) data 0.000 (0.004) loss 0.8193 (1.0942) acc 75.0000 (71.8403) lr 1.5878e-03 eta 14:40:25 +epoch [17/50] batch [275/1000] time 1.553 (1.566) data 0.000 (0.004) loss 0.7627 (1.0947) acc 81.2500 (71.8636) lr 1.5878e-03 eta 14:40:18 +epoch [17/50] batch [280/1000] time 1.538 (1.566) data 0.001 (0.004) loss 1.5020 (1.0961) acc 68.7500 (71.8750) lr 1.5878e-03 eta 14:40:01 +epoch [17/50] batch [285/1000] time 1.557 (1.566) data 0.000 (0.003) loss 0.4050 (1.0925) acc 90.6250 (71.9627) lr 1.5878e-03 eta 14:39:48 +epoch [17/50] batch [290/1000] time 1.565 (1.566) data 0.000 (0.003) loss 1.4492 (1.0966) acc 56.2500 (71.8211) lr 1.5878e-03 eta 14:39:38 +epoch [17/50] batch [295/1000] time 1.550 (1.565) data 0.000 (0.003) loss 1.1162 (1.0977) acc 75.0000 (71.8114) lr 1.5878e-03 eta 14:39:24 +epoch [17/50] batch [300/1000] time 1.558 (1.566) data 0.000 (0.003) loss 0.7056 (1.0948) acc 87.5000 (71.8542) lr 1.5878e-03 eta 14:39:19 +epoch [17/50] batch [305/1000] time 1.567 (1.566) data 0.001 (0.003) loss 1.3330 (1.1002) acc 68.7500 (71.7520) lr 1.5878e-03 eta 14:39:12 +epoch [17/50] batch [310/1000] time 1.549 (1.565) data 0.000 (0.003) loss 0.4089 (1.0979) acc 90.6250 (71.8649) lr 1.5878e-03 eta 14:38:56 +epoch [17/50] batch [315/1000] time 1.594 (1.566) data 0.000 (0.003) loss 1.1914 (1.0948) acc 78.1250 (71.9345) lr 1.5878e-03 eta 14:39:04 +epoch [17/50] batch [320/1000] time 1.572 (1.566) data 0.001 (0.003) loss 1.0244 (1.0906) acc 68.7500 (72.0117) lr 1.5878e-03 eta 14:38:58 +epoch [17/50] batch [325/1000] time 1.557 (1.566) data 0.001 (0.003) loss 1.1484 (1.0893) acc 78.1250 (72.0481) lr 1.5878e-03 eta 14:38:46 +epoch [17/50] batch [330/1000] time 1.558 (1.566) data 0.000 (0.003) loss 1.2773 (1.0913) acc 71.8750 (71.9697) lr 1.5878e-03 eta 14:38:32 +epoch [17/50] batch [335/1000] time 1.563 (1.566) data 0.000 (0.003) loss 1.0205 (1.0907) acc 75.0000 (71.9963) lr 1.5878e-03 eta 14:38:22 +epoch [17/50] batch [340/1000] time 1.577 (1.566) data 0.001 (0.003) loss 1.2852 (1.0908) acc 65.6250 (71.9945) lr 1.5878e-03 eta 14:38:15 +epoch [17/50] batch [345/1000] time 1.567 (1.565) data 0.001 (0.003) loss 1.2764 (1.0910) acc 59.3750 (71.9656) lr 1.5878e-03 eta 14:38:05 +epoch [17/50] batch [350/1000] time 1.575 (1.565) data 0.000 (0.003) loss 1.2158 (1.0951) acc 62.5000 (71.8304) lr 1.5878e-03 eta 14:37:58 +epoch [17/50] batch [355/1000] time 1.579 (1.565) data 0.000 (0.003) loss 0.6641 (1.0957) acc 78.1250 (71.8134) lr 1.5878e-03 eta 14:37:51 +epoch [17/50] batch [360/1000] time 1.554 (1.566) data 0.000 (0.003) loss 0.8306 (1.0974) acc 78.1250 (71.7969) lr 1.5878e-03 eta 14:37:54 +epoch [17/50] batch [365/1000] time 1.571 (1.566) data 0.001 (0.003) loss 1.6455 (1.1000) acc 59.3750 (71.7637) lr 1.5878e-03 eta 14:37:46 +epoch [17/50] batch [370/1000] time 1.562 (1.566) data 0.000 (0.003) loss 0.8579 (1.1016) acc 78.1250 (71.6892) lr 1.5878e-03 eta 14:37:36 +epoch [17/50] batch [375/1000] time 1.556 (1.566) data 0.000 (0.003) loss 1.1963 (1.1045) acc 65.6250 (71.6083) lr 1.5878e-03 eta 14:37:27 +epoch [17/50] batch [380/1000] time 1.580 (1.566) data 0.000 (0.003) loss 1.5557 (1.1054) acc 71.8750 (71.6036) lr 1.5878e-03 eta 14:37:19 +epoch [17/50] batch [385/1000] time 1.542 (1.566) data 0.000 (0.003) loss 1.3301 (1.1068) acc 62.5000 (71.5584) lr 1.5878e-03 eta 14:37:04 +epoch [17/50] batch [390/1000] time 1.562 (1.566) data 0.001 (0.003) loss 1.2979 (1.1077) acc 68.7500 (71.5545) lr 1.5878e-03 eta 14:36:58 +epoch [17/50] batch [395/1000] time 1.543 (1.566) data 0.000 (0.003) loss 0.9590 (1.1097) acc 78.1250 (71.5190) lr 1.5878e-03 eta 14:36:50 +epoch [17/50] batch [400/1000] time 1.553 (1.565) data 0.000 (0.003) loss 1.2793 (1.1111) acc 65.6250 (71.5078) lr 1.5878e-03 eta 14:36:37 +epoch [17/50] batch [405/1000] time 1.536 (1.565) data 0.000 (0.003) loss 1.2764 (1.1107) acc 68.7500 (71.5432) lr 1.5878e-03 eta 14:36:25 +epoch [17/50] batch [410/1000] time 1.542 (1.565) data 0.000 (0.003) loss 1.7041 (1.1114) acc 59.3750 (71.5625) lr 1.5878e-03 eta 14:36:08 +epoch [17/50] batch [415/1000] time 1.593 (1.565) data 0.000 (0.003) loss 1.1504 (1.1144) acc 78.1250 (71.5437) lr 1.5878e-03 eta 14:36:02 +epoch [17/50] batch [420/1000] time 1.554 (1.565) data 0.000 (0.003) loss 1.0088 (1.1114) acc 78.1250 (71.6071) lr 1.5878e-03 eta 14:35:58 +epoch [17/50] batch [425/1000] time 1.588 (1.565) data 0.000 (0.002) loss 1.2217 (1.1147) acc 65.6250 (71.5441) lr 1.5878e-03 eta 14:35:51 +epoch [17/50] batch [430/1000] time 1.546 (1.565) data 0.000 (0.002) loss 0.9185 (1.1153) acc 75.0000 (71.5407) lr 1.5878e-03 eta 14:35:39 +epoch [17/50] batch [435/1000] time 1.553 (1.565) data 0.001 (0.002) loss 0.5786 (1.1137) acc 75.0000 (71.5805) lr 1.5878e-03 eta 14:35:25 +epoch [17/50] batch [440/1000] time 1.548 (1.565) data 0.000 (0.002) loss 1.0186 (1.1137) acc 81.2500 (71.6264) lr 1.5878e-03 eta 14:35:14 +epoch [17/50] batch [445/1000] time 1.559 (1.565) data 0.000 (0.002) loss 0.6792 (1.1122) acc 93.7500 (71.6924) lr 1.5878e-03 eta 14:35:04 +epoch [17/50] batch [450/1000] time 1.562 (1.565) data 0.000 (0.002) loss 1.0244 (1.1119) acc 78.1250 (71.6944) lr 1.5878e-03 eta 14:34:54 +epoch [17/50] batch [455/1000] time 1.551 (1.565) data 0.000 (0.002) loss 0.9399 (1.1112) acc 59.3750 (71.6758) lr 1.5878e-03 eta 14:34:45 +epoch [17/50] batch [460/1000] time 1.570 (1.565) data 0.000 (0.002) loss 1.2676 (1.1117) acc 65.6250 (71.6916) lr 1.5878e-03 eta 14:34:36 +epoch [17/50] batch [465/1000] time 1.565 (1.565) data 0.001 (0.002) loss 0.6763 (1.1108) acc 81.2500 (71.7003) lr 1.5878e-03 eta 14:34:41 +epoch [17/50] batch [470/1000] time 1.572 (1.565) data 0.000 (0.002) loss 1.2627 (1.1106) acc 62.5000 (71.7221) lr 1.5878e-03 eta 14:34:34 +epoch [17/50] batch [475/1000] time 1.543 (1.565) data 0.000 (0.002) loss 0.6211 (1.1087) acc 81.2500 (71.7434) lr 1.5878e-03 eta 14:34:24 +epoch [17/50] batch [480/1000] time 1.557 (1.565) data 0.000 (0.002) loss 1.4482 (1.1109) acc 59.3750 (71.6732) lr 1.5878e-03 eta 14:34:13 +epoch [17/50] batch [485/1000] time 1.552 (1.565) data 0.000 (0.002) loss 1.5420 (1.1122) acc 62.5000 (71.6302) lr 1.5878e-03 eta 14:34:02 +epoch [17/50] batch [490/1000] time 1.548 (1.565) data 0.000 (0.002) loss 1.5098 (1.1148) acc 65.6250 (71.6263) lr 1.5878e-03 eta 14:33:48 +epoch [17/50] batch [495/1000] time 1.566 (1.565) data 0.001 (0.002) loss 1.1729 (1.1177) acc 62.5000 (71.5278) lr 1.5878e-03 eta 14:33:39 +epoch [17/50] batch [500/1000] time 1.550 (1.564) data 0.000 (0.002) loss 0.9326 (1.1188) acc 81.2500 (71.4875) lr 1.5878e-03 eta 14:33:28 +epoch [17/50] batch [505/1000] time 1.543 (1.564) data 0.000 (0.002) loss 1.1582 (1.1183) acc 71.8750 (71.4790) lr 1.5878e-03 eta 14:33:16 +epoch [17/50] batch [510/1000] time 1.543 (1.564) data 0.000 (0.002) loss 1.0586 (1.1190) acc 71.8750 (71.4338) lr 1.5878e-03 eta 14:33:14 +epoch [17/50] batch [515/1000] time 1.564 (1.565) data 0.001 (0.002) loss 0.7900 (1.1192) acc 78.1250 (71.4381) lr 1.5878e-03 eta 14:33:08 +epoch [17/50] batch [520/1000] time 1.556 (1.564) data 0.000 (0.002) loss 0.9922 (1.1207) acc 71.8750 (71.3942) lr 1.5878e-03 eta 14:32:56 +epoch [17/50] batch [525/1000] time 1.569 (1.564) data 0.000 (0.002) loss 0.6738 (1.1208) acc 71.8750 (71.3810) lr 1.5878e-03 eta 14:32:48 +epoch [17/50] batch [530/1000] time 1.538 (1.564) data 0.000 (0.002) loss 1.3281 (1.1204) acc 65.6250 (71.3561) lr 1.5878e-03 eta 14:32:36 +epoch [17/50] batch [535/1000] time 1.572 (1.564) data 0.000 (0.002) loss 0.8740 (1.1183) acc 78.1250 (71.4311) lr 1.5878e-03 eta 14:32:28 +epoch [17/50] batch [540/1000] time 1.558 (1.564) data 0.000 (0.002) loss 1.0693 (1.1193) acc 68.7500 (71.4178) lr 1.5878e-03 eta 14:32:17 +epoch [17/50] batch [545/1000] time 1.566 (1.564) data 0.000 (0.002) loss 1.4844 (1.1175) acc 65.6250 (71.4564) lr 1.5878e-03 eta 14:32:07 +epoch [17/50] batch [550/1000] time 1.537 (1.564) data 0.000 (0.002) loss 1.2588 (1.1173) acc 71.8750 (71.4830) lr 1.5878e-03 eta 14:31:58 +epoch [17/50] batch [555/1000] time 1.550 (1.564) data 0.001 (0.002) loss 1.0186 (1.1169) acc 78.1250 (71.5259) lr 1.5878e-03 eta 14:31:47 +epoch [17/50] batch [560/1000] time 1.536 (1.564) data 0.000 (0.002) loss 1.3555 (1.1175) acc 68.7500 (71.5067) lr 1.5878e-03 eta 14:31:38 +epoch [17/50] batch [565/1000] time 1.541 (1.564) data 0.000 (0.002) loss 1.0908 (1.1177) acc 71.8750 (71.5210) lr 1.5878e-03 eta 14:31:26 +epoch [17/50] batch [570/1000] time 1.705 (1.564) data 0.000 (0.002) loss 0.9927 (1.1187) acc 71.8750 (71.4912) lr 1.5878e-03 eta 14:31:27 +epoch [17/50] batch [575/1000] time 1.549 (1.564) data 0.000 (0.002) loss 0.9121 (1.1187) acc 68.7500 (71.4891) lr 1.5878e-03 eta 14:31:19 +epoch [17/50] batch [580/1000] time 1.581 (1.564) data 0.000 (0.002) loss 1.6309 (1.1210) acc 65.6250 (71.4440) lr 1.5878e-03 eta 14:31:09 +epoch [17/50] batch [585/1000] time 1.575 (1.564) data 0.000 (0.002) loss 1.6240 (1.1222) acc 62.5000 (71.4156) lr 1.5878e-03 eta 14:31:03 +epoch [17/50] batch [590/1000] time 1.565 (1.564) data 0.001 (0.002) loss 1.1016 (1.1221) acc 68.7500 (71.4142) lr 1.5878e-03 eta 14:30:51 +epoch [17/50] batch [595/1000] time 1.561 (1.564) data 0.001 (0.002) loss 1.5850 (1.1236) acc 62.5000 (71.3655) lr 1.5878e-03 eta 14:30:43 +epoch [17/50] batch [600/1000] time 1.556 (1.564) data 0.001 (0.002) loss 0.6006 (1.1231) acc 84.3750 (71.4010) lr 1.5878e-03 eta 14:30:33 +epoch [17/50] batch [605/1000] time 1.582 (1.564) data 0.000 (0.002) loss 0.9106 (1.1241) acc 81.2500 (71.4101) lr 1.5878e-03 eta 14:30:24 +epoch [17/50] batch [610/1000] time 1.548 (1.564) data 0.000 (0.002) loss 1.2061 (1.1247) acc 78.1250 (71.3832) lr 1.5878e-03 eta 14:30:15 +epoch [17/50] batch [615/1000] time 1.714 (1.564) data 0.000 (0.002) loss 1.3232 (1.1267) acc 68.7500 (71.3262) lr 1.5878e-03 eta 14:30:15 +epoch [17/50] batch [620/1000] time 1.570 (1.564) data 0.001 (0.002) loss 0.9019 (1.1261) acc 78.1250 (71.3407) lr 1.5878e-03 eta 14:30:06 +epoch [17/50] batch [625/1000] time 1.565 (1.564) data 0.000 (0.002) loss 1.3477 (1.1256) acc 65.6250 (71.3500) lr 1.5878e-03 eta 14:29:57 +epoch [17/50] batch [630/1000] time 1.534 (1.564) data 0.000 (0.002) loss 0.7485 (1.1251) acc 81.2500 (71.3641) lr 1.5878e-03 eta 14:29:47 +epoch [17/50] batch [635/1000] time 1.569 (1.564) data 0.000 (0.002) loss 0.8418 (1.1237) acc 81.2500 (71.4272) lr 1.5878e-03 eta 14:29:35 +epoch [17/50] batch [640/1000] time 1.555 (1.564) data 0.000 (0.002) loss 1.4404 (1.1245) acc 71.8750 (71.4160) lr 1.5878e-03 eta 14:29:25 +epoch [17/50] batch [645/1000] time 1.555 (1.564) data 0.000 (0.002) loss 0.9556 (1.1239) acc 71.8750 (71.4293) lr 1.5878e-03 eta 14:29:15 +epoch [17/50] batch [650/1000] time 1.579 (1.564) data 0.000 (0.002) loss 1.0566 (1.1234) acc 78.1250 (71.4567) lr 1.5878e-03 eta 14:29:07 +epoch [17/50] batch [655/1000] time 1.541 (1.564) data 0.000 (0.002) loss 0.9873 (1.1218) acc 75.0000 (71.5076) lr 1.5878e-03 eta 14:28:59 +epoch [17/50] batch [660/1000] time 1.544 (1.564) data 0.000 (0.002) loss 1.2734 (1.1209) acc 71.8750 (71.5246) lr 1.5878e-03 eta 14:28:57 +epoch [17/50] batch [665/1000] time 1.574 (1.564) data 0.001 (0.002) loss 1.5254 (1.1212) acc 56.2500 (71.5132) lr 1.5878e-03 eta 14:28:49 +epoch [17/50] batch [670/1000] time 1.550 (1.564) data 0.000 (0.002) loss 0.9941 (1.1201) acc 68.7500 (71.5159) lr 1.5878e-03 eta 14:28:40 +epoch [17/50] batch [675/1000] time 1.548 (1.564) data 0.000 (0.002) loss 1.3369 (1.1192) acc 68.7500 (71.5231) lr 1.5878e-03 eta 14:28:29 +epoch [17/50] batch [680/1000] time 1.551 (1.564) data 0.000 (0.002) loss 0.6372 (1.1188) acc 78.1250 (71.5257) lr 1.5878e-03 eta 14:28:22 +epoch [17/50] batch [685/1000] time 1.567 (1.564) data 0.000 (0.002) loss 1.0537 (1.1176) acc 68.7500 (71.5374) lr 1.5878e-03 eta 14:28:13 +epoch [17/50] batch [690/1000] time 1.557 (1.564) data 0.000 (0.002) loss 1.7773 (1.1196) acc 59.3750 (71.5082) lr 1.5878e-03 eta 14:28:06 +epoch [17/50] batch [695/1000] time 1.585 (1.564) data 0.000 (0.002) loss 1.2285 (1.1189) acc 68.7500 (71.5333) lr 1.5878e-03 eta 14:28:00 +epoch [17/50] batch [700/1000] time 1.584 (1.564) data 0.000 (0.002) loss 0.9746 (1.1180) acc 68.7500 (71.5223) lr 1.5878e-03 eta 14:27:55 +epoch [17/50] batch [705/1000] time 1.563 (1.564) data 0.000 (0.002) loss 0.8906 (1.1181) acc 75.0000 (71.5204) lr 1.5878e-03 eta 14:27:46 +epoch [17/50] batch [710/1000] time 1.578 (1.564) data 0.000 (0.002) loss 1.6602 (1.1187) acc 56.2500 (71.5009) lr 1.5878e-03 eta 14:27:39 +epoch [17/50] batch [715/1000] time 1.557 (1.564) data 0.001 (0.002) loss 1.2803 (1.1202) acc 65.6250 (71.4729) lr 1.5878e-03 eta 14:27:31 +epoch [17/50] batch [720/1000] time 1.561 (1.564) data 0.000 (0.002) loss 1.4541 (1.1227) acc 68.7500 (71.4583) lr 1.5878e-03 eta 14:27:22 +epoch [17/50] batch [725/1000] time 1.560 (1.564) data 0.000 (0.002) loss 1.2236 (1.1242) acc 75.0000 (71.4526) lr 1.5878e-03 eta 14:27:17 +epoch [17/50] batch [730/1000] time 1.542 (1.564) data 0.000 (0.002) loss 1.1455 (1.1235) acc 81.2500 (71.4726) lr 1.5878e-03 eta 14:27:06 +epoch [17/50] batch [735/1000] time 1.580 (1.564) data 0.001 (0.002) loss 1.9473 (1.1236) acc 53.1250 (71.4711) lr 1.5878e-03 eta 14:27:00 +epoch [17/50] batch [740/1000] time 1.559 (1.564) data 0.000 (0.002) loss 0.7603 (1.1226) acc 87.5000 (71.5160) lr 1.5878e-03 eta 14:26:51 +epoch [17/50] batch [745/1000] time 1.553 (1.564) data 0.000 (0.002) loss 1.8662 (1.1251) acc 59.3750 (71.4933) lr 1.5878e-03 eta 14:26:44 +epoch [17/50] batch [750/1000] time 1.572 (1.564) data 0.000 (0.002) loss 1.4834 (1.1250) acc 56.2500 (71.4917) lr 1.5878e-03 eta 14:26:36 +epoch [17/50] batch [755/1000] time 1.581 (1.564) data 0.000 (0.002) loss 1.1680 (1.1245) acc 68.7500 (71.4901) lr 1.5878e-03 eta 14:26:29 +epoch [17/50] batch [760/1000] time 1.564 (1.564) data 0.000 (0.002) loss 1.5752 (1.1273) acc 62.5000 (71.4515) lr 1.5878e-03 eta 14:26:23 +epoch [17/50] batch [765/1000] time 1.554 (1.564) data 0.000 (0.002) loss 1.2832 (1.1282) acc 68.7500 (71.4624) lr 1.5878e-03 eta 14:26:12 +epoch [17/50] batch [770/1000] time 1.539 (1.564) data 0.000 (0.002) loss 1.1475 (1.1287) acc 71.8750 (71.4286) lr 1.5878e-03 eta 14:26:10 +epoch [17/50] batch [775/1000] time 1.543 (1.564) data 0.000 (0.002) loss 0.9873 (1.1286) acc 68.7500 (71.4194) lr 1.5878e-03 eta 14:26:01 +epoch [17/50] batch [780/1000] time 1.550 (1.564) data 0.000 (0.002) loss 1.6836 (1.1286) acc 59.3750 (71.4463) lr 1.5878e-03 eta 14:25:53 +epoch [17/50] batch [785/1000] time 1.561 (1.564) data 0.000 (0.002) loss 1.6562 (1.1278) acc 59.3750 (71.4411) lr 1.5878e-03 eta 14:25:42 +epoch [17/50] batch [790/1000] time 1.556 (1.564) data 0.000 (0.002) loss 1.0068 (1.1267) acc 71.8750 (71.4636) lr 1.5878e-03 eta 14:25:30 +epoch [17/50] batch [795/1000] time 1.556 (1.564) data 0.000 (0.002) loss 1.0029 (1.1269) acc 68.7500 (71.4505) lr 1.5878e-03 eta 14:25:23 +epoch [17/50] batch [800/1000] time 1.561 (1.564) data 0.000 (0.002) loss 1.1328 (1.1251) acc 62.5000 (71.4844) lr 1.5878e-03 eta 14:25:14 +epoch [17/50] batch [805/1000] time 1.555 (1.564) data 0.000 (0.002) loss 0.8745 (1.1266) acc 75.0000 (71.4480) lr 1.5878e-03 eta 14:25:05 +epoch [17/50] batch [810/1000] time 1.586 (1.564) data 0.000 (0.001) loss 1.1631 (1.1259) acc 65.6250 (71.4429) lr 1.5878e-03 eta 14:25:05 +epoch [17/50] batch [815/1000] time 1.562 (1.564) data 0.000 (0.001) loss 0.8481 (1.1272) acc 78.1250 (71.4302) lr 1.5878e-03 eta 14:24:55 +epoch [17/50] batch [820/1000] time 1.538 (1.564) data 0.000 (0.001) loss 0.8037 (1.1261) acc 68.7500 (71.4405) lr 1.5878e-03 eta 14:24:45 +epoch [17/50] batch [825/1000] time 1.577 (1.564) data 0.000 (0.001) loss 1.2158 (1.1278) acc 71.8750 (71.4015) lr 1.5878e-03 eta 14:24:36 +epoch [17/50] batch [830/1000] time 1.550 (1.564) data 0.000 (0.001) loss 0.7905 (1.1282) acc 84.3750 (71.3931) lr 1.5878e-03 eta 14:24:28 +epoch [17/50] batch [835/1000] time 1.557 (1.564) data 0.001 (0.001) loss 1.3711 (1.1277) acc 65.6250 (71.4034) lr 1.5878e-03 eta 14:24:20 +epoch [17/50] batch [840/1000] time 1.580 (1.564) data 0.000 (0.001) loss 1.1367 (1.1283) acc 78.1250 (71.3876) lr 1.5878e-03 eta 14:24:11 +epoch [17/50] batch [845/1000] time 1.541 (1.564) data 0.000 (0.001) loss 0.5322 (1.1267) acc 90.6250 (71.4386) lr 1.5878e-03 eta 14:24:03 +epoch [17/50] batch [850/1000] time 1.603 (1.564) data 0.001 (0.001) loss 1.2051 (1.1275) acc 59.3750 (71.4044) lr 1.5878e-03 eta 14:23:56 +epoch [17/50] batch [855/1000] time 1.598 (1.564) data 0.000 (0.001) loss 1.1182 (1.1274) acc 68.7500 (71.4145) lr 1.5878e-03 eta 14:23:53 +epoch [17/50] batch [860/1000] time 1.559 (1.564) data 0.000 (0.001) loss 1.0713 (1.1274) acc 65.6250 (71.4026) lr 1.5878e-03 eta 14:23:46 +epoch [17/50] batch [865/1000] time 1.538 (1.564) data 0.000 (0.001) loss 1.6689 (1.1291) acc 65.6250 (71.3873) lr 1.5878e-03 eta 14:23:37 +epoch [17/50] batch [870/1000] time 1.558 (1.564) data 0.001 (0.001) loss 0.5703 (1.1283) acc 87.5000 (71.4116) lr 1.5878e-03 eta 14:23:29 +epoch [17/50] batch [875/1000] time 1.550 (1.564) data 0.001 (0.001) loss 0.6919 (1.1279) acc 71.8750 (71.4107) lr 1.5878e-03 eta 14:23:26 +epoch [17/50] batch [880/1000] time 1.578 (1.564) data 0.000 (0.001) loss 1.1396 (1.1280) acc 71.8750 (71.4134) lr 1.5878e-03 eta 14:23:18 +epoch [17/50] batch [885/1000] time 1.533 (1.564) data 0.000 (0.001) loss 1.0645 (1.1266) acc 68.7500 (71.4301) lr 1.5878e-03 eta 14:23:06 +epoch [17/50] batch [890/1000] time 1.531 (1.564) data 0.001 (0.001) loss 1.0137 (1.1275) acc 78.1250 (71.4185) lr 1.5878e-03 eta 14:22:56 +epoch [17/50] batch [895/1000] time 1.553 (1.564) data 0.001 (0.001) loss 1.1338 (1.1277) acc 75.0000 (71.4106) lr 1.5878e-03 eta 14:22:46 +epoch [17/50] batch [900/1000] time 1.579 (1.564) data 0.000 (0.001) loss 1.7354 (1.1285) acc 62.5000 (71.4028) lr 1.5878e-03 eta 14:22:38 +epoch [17/50] batch [905/1000] time 1.562 (1.564) data 0.000 (0.001) loss 1.4150 (1.1301) acc 75.0000 (71.3950) lr 1.5878e-03 eta 14:22:29 +epoch [17/50] batch [910/1000] time 1.575 (1.564) data 0.000 (0.001) loss 1.5830 (1.1294) acc 53.1250 (71.3977) lr 1.5878e-03 eta 14:22:21 +epoch [17/50] batch [915/1000] time 1.569 (1.564) data 0.000 (0.001) loss 1.1074 (1.1305) acc 78.1250 (71.3832) lr 1.5878e-03 eta 14:22:13 +epoch [17/50] batch [920/1000] time 1.576 (1.564) data 0.000 (0.001) loss 1.3057 (1.1315) acc 68.7500 (71.3723) lr 1.5878e-03 eta 14:22:12 +epoch [17/50] batch [925/1000] time 1.566 (1.564) data 0.001 (0.001) loss 0.9629 (1.1302) acc 75.0000 (71.3818) lr 1.5878e-03 eta 14:22:06 +epoch [17/50] batch [930/1000] time 1.563 (1.564) data 0.000 (0.001) loss 0.8711 (1.1305) acc 68.7500 (71.3743) lr 1.5878e-03 eta 14:21:58 +epoch [17/50] batch [935/1000] time 1.541 (1.564) data 0.000 (0.001) loss 1.7490 (1.1310) acc 56.2500 (71.3670) lr 1.5878e-03 eta 14:21:48 +epoch [17/50] batch [940/1000] time 1.573 (1.564) data 0.000 (0.001) loss 0.8315 (1.1311) acc 84.3750 (71.3763) lr 1.5878e-03 eta 14:21:39 +epoch [17/50] batch [945/1000] time 1.554 (1.564) data 0.000 (0.001) loss 1.2666 (1.1313) acc 78.1250 (71.3790) lr 1.5878e-03 eta 14:21:30 +epoch [17/50] batch [950/1000] time 1.548 (1.564) data 0.000 (0.001) loss 1.3398 (1.1311) acc 68.7500 (71.3684) lr 1.5878e-03 eta 14:21:20 +epoch [17/50] batch [955/1000] time 1.537 (1.564) data 0.000 (0.001) loss 1.2188 (1.1307) acc 68.7500 (71.3907) lr 1.5878e-03 eta 14:21:10 +epoch [17/50] batch [960/1000] time 1.711 (1.564) data 0.000 (0.001) loss 0.9336 (1.1294) acc 75.0000 (71.3997) lr 1.5878e-03 eta 14:21:03 +epoch [17/50] batch [965/1000] time 1.556 (1.564) data 0.000 (0.001) loss 0.8032 (1.1288) acc 78.1250 (71.4249) lr 1.5878e-03 eta 14:20:53 +epoch [17/50] batch [970/1000] time 1.547 (1.564) data 0.000 (0.001) loss 0.8711 (1.1281) acc 84.3750 (71.4369) lr 1.5878e-03 eta 14:20:43 +epoch [17/50] batch [975/1000] time 1.573 (1.564) data 0.000 (0.001) loss 0.9751 (1.1278) acc 75.0000 (71.4487) lr 1.5878e-03 eta 14:20:36 +epoch [17/50] batch [980/1000] time 1.564 (1.564) data 0.001 (0.001) loss 0.8408 (1.1281) acc 78.1250 (71.4541) lr 1.5878e-03 eta 14:20:29 +epoch [17/50] batch [985/1000] time 1.542 (1.564) data 0.001 (0.001) loss 0.9102 (1.1283) acc 81.2500 (71.4562) lr 1.5878e-03 eta 14:20:20 +epoch [17/50] batch [990/1000] time 1.528 (1.563) data 0.000 (0.001) loss 0.8745 (1.1282) acc 75.0000 (71.4520) lr 1.5878e-03 eta 14:20:09 +epoch [17/50] batch [995/1000] time 1.575 (1.563) data 0.000 (0.001) loss 1.2266 (1.1277) acc 65.6250 (71.4604) lr 1.5878e-03 eta 14:19:58 +epoch [17/50] batch [1000/1000] time 1.566 (1.563) data 0.000 (0.001) loss 1.1641 (1.1282) acc 75.0000 (71.4531) lr 1.5358e-03 eta 14:19:48 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,179 +* accuracy: 78.4% +* error: 21.6% +* macro_f1: 77.9% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [18/50] batch [5/1000] time 1.562 (1.673) data 0.000 (0.176) loss 0.9316 (1.1216) acc 78.1250 (71.8750) lr 1.5358e-03 eta 15:19:47 +epoch [18/50] batch [10/1000] time 1.562 (1.616) data 0.000 (0.089) loss 0.9238 (1.1423) acc 71.8750 (70.3125) lr 1.5358e-03 eta 14:48:20 +epoch [18/50] batch [15/1000] time 1.581 (1.597) data 0.000 (0.059) loss 0.9238 (1.1020) acc 78.1250 (72.0833) lr 1.5358e-03 eta 14:38:03 +epoch [18/50] batch [20/1000] time 1.567 (1.588) data 0.000 (0.044) loss 1.7285 (1.1349) acc 59.3750 (72.1875) lr 1.5358e-03 eta 14:33:04 +epoch [18/50] batch [25/1000] time 1.596 (1.587) data 0.000 (0.036) loss 1.8154 (1.1662) acc 62.5000 (71.1250) lr 1.5358e-03 eta 14:32:06 +epoch [18/50] batch [30/1000] time 1.566 (1.584) data 0.001 (0.030) loss 1.0127 (1.1749) acc 71.8750 (71.1458) lr 1.5358e-03 eta 14:30:11 +epoch [18/50] batch [35/1000] time 1.572 (1.591) data 0.000 (0.026) loss 0.5137 (1.1602) acc 90.6250 (71.9643) lr 1.5358e-03 eta 14:34:02 +epoch [18/50] batch [40/1000] time 1.566 (1.588) data 0.001 (0.023) loss 0.6484 (1.1288) acc 87.5000 (72.6562) lr 1.5358e-03 eta 14:32:34 +epoch [18/50] batch [45/1000] time 1.563 (1.586) data 0.001 (0.020) loss 1.4619 (1.1500) acc 56.2500 (71.7361) lr 1.5358e-03 eta 14:31:13 +epoch [18/50] batch [50/1000] time 1.559 (1.584) data 0.001 (0.018) loss 1.2324 (1.1447) acc 75.0000 (71.8125) lr 1.5358e-03 eta 14:29:47 +epoch [18/50] batch [55/1000] time 1.566 (1.582) data 0.000 (0.017) loss 1.0928 (1.1379) acc 75.0000 (71.6477) lr 1.5358e-03 eta 14:28:36 +epoch [18/50] batch [60/1000] time 1.591 (1.581) data 0.001 (0.015) loss 0.9907 (1.1262) acc 71.8750 (71.9792) lr 1.5358e-03 eta 14:27:51 +epoch [18/50] batch [65/1000] time 1.554 (1.579) data 0.001 (0.014) loss 1.0352 (1.1164) acc 71.8750 (72.2115) lr 1.5358e-03 eta 14:26:56 +epoch [18/50] batch [70/1000] time 1.571 (1.578) data 0.000 (0.013) loss 1.4814 (1.1190) acc 62.5000 (72.1429) lr 1.5358e-03 eta 14:26:03 +epoch [18/50] batch [75/1000] time 1.564 (1.577) data 0.000 (0.012) loss 1.4551 (1.1404) acc 65.6250 (71.6667) lr 1.5358e-03 eta 14:25:11 +epoch [18/50] batch [80/1000] time 1.559 (1.576) data 0.001 (0.012) loss 1.2236 (1.1642) acc 68.7500 (70.9766) lr 1.5358e-03 eta 14:24:35 +epoch [18/50] batch [85/1000] time 1.563 (1.575) data 0.001 (0.011) loss 0.7358 (1.1542) acc 81.2500 (71.1397) lr 1.5358e-03 eta 14:24:00 +epoch [18/50] batch [90/1000] time 1.574 (1.574) data 0.000 (0.010) loss 0.9077 (1.1524) acc 75.0000 (71.1458) lr 1.5358e-03 eta 14:23:12 +epoch [18/50] batch [95/1000] time 1.563 (1.573) data 0.001 (0.010) loss 0.9302 (1.1570) acc 71.8750 (70.7895) lr 1.5358e-03 eta 14:22:55 +epoch [18/50] batch [100/1000] time 1.554 (1.573) data 0.000 (0.009) loss 1.0371 (1.1530) acc 71.8750 (70.6875) lr 1.5358e-03 eta 14:22:22 +epoch [18/50] batch [105/1000] time 1.566 (1.572) data 0.000 (0.009) loss 1.1416 (1.1616) acc 78.1250 (70.7440) lr 1.5358e-03 eta 14:22:03 +epoch [18/50] batch [110/1000] time 1.569 (1.572) data 0.000 (0.009) loss 1.0469 (1.1573) acc 78.1250 (70.9943) lr 1.5358e-03 eta 14:21:36 +epoch [18/50] batch [115/1000] time 1.542 (1.571) data 0.000 (0.008) loss 0.7451 (1.1603) acc 71.8750 (70.8152) lr 1.5358e-03 eta 14:21:08 +epoch [18/50] batch [120/1000] time 1.549 (1.570) data 0.000 (0.008) loss 0.6084 (1.1488) acc 87.5000 (71.1719) lr 1.5358e-03 eta 14:20:35 +epoch [18/50] batch [125/1000] time 1.557 (1.570) data 0.001 (0.008) loss 0.9165 (1.1449) acc 78.1250 (71.1500) lr 1.5358e-03 eta 14:20:09 +epoch [18/50] batch [130/1000] time 1.573 (1.570) data 0.000 (0.007) loss 0.8799 (1.1416) acc 81.2500 (71.2981) lr 1.5358e-03 eta 14:19:50 +epoch [18/50] batch [135/1000] time 1.539 (1.569) data 0.000 (0.007) loss 1.9521 (1.1369) acc 56.2500 (71.4352) lr 1.5358e-03 eta 14:19:18 +epoch [18/50] batch [140/1000] time 1.583 (1.570) data 0.000 (0.007) loss 1.6191 (1.1304) acc 62.5000 (71.4732) lr 1.5358e-03 eta 14:19:46 +epoch [18/50] batch [145/1000] time 1.554 (1.570) data 0.000 (0.007) loss 1.1113 (1.1335) acc 71.8750 (71.5733) lr 1.5358e-03 eta 14:19:34 +epoch [18/50] batch [150/1000] time 1.565 (1.570) data 0.000 (0.006) loss 0.8965 (1.1324) acc 78.1250 (71.5625) lr 1.5358e-03 eta 14:19:20 +epoch [18/50] batch [155/1000] time 1.562 (1.569) data 0.000 (0.006) loss 1.1914 (1.1304) acc 78.1250 (71.6734) lr 1.5358e-03 eta 14:19:05 +epoch [18/50] batch [160/1000] time 1.598 (1.569) data 0.000 (0.006) loss 1.5332 (1.1343) acc 53.1250 (71.5430) lr 1.5358e-03 eta 14:18:58 +epoch [18/50] batch [165/1000] time 1.556 (1.569) data 0.000 (0.006) loss 1.8594 (1.1311) acc 50.0000 (71.6667) lr 1.5358e-03 eta 14:18:38 +epoch [18/50] batch [170/1000] time 1.557 (1.569) data 0.000 (0.006) loss 1.5703 (1.1332) acc 56.2500 (71.6544) lr 1.5358e-03 eta 14:18:17 +epoch [18/50] batch [175/1000] time 1.565 (1.568) data 0.000 (0.006) loss 1.3193 (1.1371) acc 75.0000 (71.5893) lr 1.5358e-03 eta 14:18:04 +epoch [18/50] batch [180/1000] time 1.549 (1.568) data 0.000 (0.005) loss 0.6943 (1.1351) acc 81.2500 (71.5451) lr 1.5358e-03 eta 14:17:51 +epoch [18/50] batch [185/1000] time 1.573 (1.569) data 0.000 (0.005) loss 1.1689 (1.1330) acc 71.8750 (71.6047) lr 1.5358e-03 eta 14:18:11 +epoch [18/50] batch [190/1000] time 1.564 (1.569) data 0.000 (0.005) loss 1.3350 (1.1349) acc 65.6250 (71.5461) lr 1.5358e-03 eta 14:17:51 +epoch [18/50] batch [195/1000] time 1.542 (1.569) data 0.000 (0.005) loss 1.2363 (1.1340) acc 65.6250 (71.5545) lr 1.5358e-03 eta 14:17:36 +epoch [18/50] batch [200/1000] time 1.552 (1.568) data 0.000 (0.005) loss 2.1035 (1.1436) acc 53.1250 (71.3281) lr 1.5358e-03 eta 14:17:19 +epoch [18/50] batch [205/1000] time 1.556 (1.568) data 0.000 (0.005) loss 0.6553 (1.1420) acc 75.0000 (71.2500) lr 1.5358e-03 eta 14:16:58 +epoch [18/50] batch [210/1000] time 1.567 (1.568) data 0.000 (0.005) loss 1.2217 (1.1387) acc 65.6250 (71.2946) lr 1.5358e-03 eta 14:16:46 +epoch [18/50] batch [215/1000] time 1.567 (1.568) data 0.000 (0.005) loss 1.5059 (1.1437) acc 65.6250 (71.2500) lr 1.5358e-03 eta 14:16:35 +epoch [18/50] batch [220/1000] time 1.552 (1.567) data 0.000 (0.004) loss 0.8682 (1.1492) acc 71.8750 (71.1364) lr 1.5358e-03 eta 14:16:14 +epoch [18/50] batch [225/1000] time 1.708 (1.567) data 0.000 (0.004) loss 1.4326 (1.1472) acc 65.6250 (71.0972) lr 1.5358e-03 eta 14:16:14 +epoch [18/50] batch [230/1000] time 1.553 (1.567) data 0.000 (0.004) loss 1.5254 (1.1501) acc 65.6250 (70.9918) lr 1.5358e-03 eta 14:16:02 +epoch [18/50] batch [235/1000] time 1.568 (1.567) data 0.000 (0.004) loss 1.5879 (1.1497) acc 62.5000 (71.0239) lr 1.5358e-03 eta 14:15:45 +epoch [18/50] batch [240/1000] time 1.529 (1.567) data 0.000 (0.004) loss 1.3115 (1.1503) acc 71.8750 (70.9766) lr 1.5358e-03 eta 14:15:26 +epoch [18/50] batch [245/1000] time 1.558 (1.567) data 0.000 (0.004) loss 0.7251 (1.1468) acc 84.3750 (70.9949) lr 1.5358e-03 eta 14:15:13 +epoch [18/50] batch [250/1000] time 1.557 (1.566) data 0.000 (0.004) loss 1.4375 (1.1489) acc 59.3750 (71.0250) lr 1.5358e-03 eta 14:14:53 +epoch [18/50] batch [255/1000] time 1.577 (1.566) data 0.001 (0.004) loss 1.1113 (1.1455) acc 68.7500 (71.1152) lr 1.5358e-03 eta 14:14:38 +epoch [18/50] batch [260/1000] time 1.562 (1.566) data 0.000 (0.004) loss 1.7988 (1.1471) acc 65.6250 (71.1058) lr 1.5358e-03 eta 14:14:27 +epoch [18/50] batch [265/1000] time 1.564 (1.566) data 0.000 (0.004) loss 1.6006 (1.1491) acc 59.3750 (71.0731) lr 1.5358e-03 eta 14:14:15 +epoch [18/50] batch [270/1000] time 1.544 (1.566) data 0.000 (0.004) loss 0.9224 (1.1444) acc 71.8750 (71.1343) lr 1.5358e-03 eta 14:14:03 +epoch [18/50] batch [275/1000] time 1.565 (1.566) data 0.000 (0.004) loss 1.7031 (1.1469) acc 65.6250 (71.1023) lr 1.5358e-03 eta 14:13:52 +epoch [18/50] batch [280/1000] time 1.535 (1.565) data 0.000 (0.004) loss 1.2861 (1.1475) acc 68.7500 (71.0603) lr 1.5358e-03 eta 14:13:39 +epoch [18/50] batch [285/1000] time 1.556 (1.565) data 0.001 (0.004) loss 0.7964 (1.1455) acc 81.2500 (71.0965) lr 1.5358e-03 eta 14:13:24 +epoch [18/50] batch [290/1000] time 1.535 (1.565) data 0.000 (0.003) loss 0.7754 (1.1454) acc 81.2500 (71.1315) lr 1.5358e-03 eta 14:13:22 +epoch [18/50] batch [295/1000] time 1.554 (1.565) data 0.000 (0.003) loss 1.2070 (1.1434) acc 68.7500 (71.1970) lr 1.5358e-03 eta 14:13:08 +epoch [18/50] batch [300/1000] time 1.574 (1.565) data 0.000 (0.003) loss 1.0664 (1.1456) acc 68.7500 (71.1562) lr 1.5358e-03 eta 14:13:04 +epoch [18/50] batch [305/1000] time 1.606 (1.565) data 0.000 (0.003) loss 1.4570 (1.1474) acc 71.8750 (71.1168) lr 1.5358e-03 eta 14:12:58 +epoch [18/50] batch [310/1000] time 1.571 (1.565) data 0.001 (0.003) loss 1.0020 (1.1444) acc 75.0000 (71.1593) lr 1.5358e-03 eta 14:12:47 +epoch [18/50] batch [315/1000] time 1.562 (1.565) data 0.000 (0.003) loss 0.7905 (1.1420) acc 84.3750 (71.2897) lr 1.5358e-03 eta 14:12:35 +epoch [18/50] batch [320/1000] time 1.575 (1.565) data 0.000 (0.003) loss 1.3730 (1.1408) acc 56.2500 (71.2598) lr 1.5358e-03 eta 14:12:28 +epoch [18/50] batch [325/1000] time 1.547 (1.565) data 0.000 (0.003) loss 1.3584 (1.1412) acc 62.5000 (71.2788) lr 1.5358e-03 eta 14:12:16 +epoch [18/50] batch [330/1000] time 1.564 (1.565) data 0.001 (0.003) loss 1.5234 (1.1426) acc 62.5000 (71.2879) lr 1.5358e-03 eta 14:12:07 +epoch [18/50] batch [335/1000] time 1.549 (1.565) data 0.000 (0.003) loss 1.0430 (1.1413) acc 65.6250 (71.2407) lr 1.5358e-03 eta 14:12:11 +epoch [18/50] batch [340/1000] time 1.571 (1.565) data 0.000 (0.003) loss 0.8828 (1.1459) acc 75.0000 (71.1121) lr 1.5358e-03 eta 14:11:58 +epoch [18/50] batch [345/1000] time 1.576 (1.565) data 0.001 (0.003) loss 1.4434 (1.1467) acc 59.3750 (71.1232) lr 1.5358e-03 eta 14:11:57 +epoch [18/50] batch [350/1000] time 1.583 (1.565) data 0.000 (0.003) loss 1.1748 (1.1504) acc 65.6250 (71.0536) lr 1.5358e-03 eta 14:11:52 +epoch [18/50] batch [355/1000] time 1.564 (1.565) data 0.000 (0.003) loss 1.6777 (1.1490) acc 71.8750 (71.1092) lr 1.5358e-03 eta 14:11:45 +epoch [18/50] batch [360/1000] time 1.541 (1.565) data 0.000 (0.003) loss 1.3008 (1.1529) acc 65.6250 (70.9809) lr 1.5358e-03 eta 14:11:29 +epoch [18/50] batch [365/1000] time 1.531 (1.565) data 0.000 (0.003) loss 0.9922 (1.1504) acc 78.1250 (71.0103) lr 1.5358e-03 eta 14:11:15 +epoch [18/50] batch [370/1000] time 1.571 (1.565) data 0.001 (0.003) loss 1.1699 (1.1509) acc 75.0000 (71.0473) lr 1.5358e-03 eta 14:11:07 +epoch [18/50] batch [375/1000] time 1.546 (1.565) data 0.000 (0.003) loss 1.4385 (1.1517) acc 68.7500 (71.0250) lr 1.5358e-03 eta 14:10:59 +epoch [18/50] batch [380/1000] time 1.575 (1.566) data 0.000 (0.003) loss 1.0332 (1.1487) acc 81.2500 (71.1431) lr 1.5358e-03 eta 14:11:06 +epoch [18/50] batch [385/1000] time 1.562 (1.565) data 0.000 (0.003) loss 0.9375 (1.1473) acc 78.1250 (71.2338) lr 1.5358e-03 eta 14:10:57 +epoch [18/50] batch [390/1000] time 1.551 (1.565) data 0.000 (0.003) loss 0.9434 (1.1444) acc 68.7500 (71.2821) lr 1.5358e-03 eta 14:10:43 +epoch [18/50] batch [395/1000] time 1.533 (1.565) data 0.000 (0.003) loss 0.9351 (1.1412) acc 71.8750 (71.3687) lr 1.5358e-03 eta 14:10:33 +epoch [18/50] batch [400/1000] time 1.574 (1.565) data 0.000 (0.003) loss 1.7295 (1.1427) acc 68.7500 (71.3594) lr 1.5358e-03 eta 14:10:21 +epoch [18/50] batch [405/1000] time 1.579 (1.565) data 0.001 (0.003) loss 1.5303 (1.1435) acc 65.6250 (71.3812) lr 1.5358e-03 eta 14:10:14 +epoch [18/50] batch [410/1000] time 1.573 (1.565) data 0.001 (0.003) loss 0.8184 (1.1427) acc 78.1250 (71.3796) lr 1.5358e-03 eta 14:10:07 +epoch [18/50] batch [415/1000] time 1.539 (1.565) data 0.000 (0.003) loss 1.1465 (1.1428) acc 71.8750 (71.3630) lr 1.5358e-03 eta 14:10:00 +epoch [18/50] batch [420/1000] time 1.554 (1.565) data 0.001 (0.003) loss 0.8003 (1.1424) acc 78.1250 (71.3765) lr 1.5358e-03 eta 14:09:47 +epoch [18/50] batch [425/1000] time 1.566 (1.565) data 0.001 (0.003) loss 1.5791 (1.1406) acc 62.5000 (71.3897) lr 1.5358e-03 eta 14:09:36 +epoch [18/50] batch [430/1000] time 1.567 (1.565) data 0.000 (0.002) loss 1.2373 (1.1390) acc 62.5000 (71.3808) lr 1.5358e-03 eta 14:09:27 +epoch [18/50] batch [435/1000] time 1.562 (1.565) data 0.000 (0.002) loss 1.2188 (1.1403) acc 68.7500 (71.3649) lr 1.5358e-03 eta 14:09:17 +epoch [18/50] batch [440/1000] time 1.574 (1.565) data 0.000 (0.002) loss 0.6499 (1.1386) acc 84.3750 (71.4134) lr 1.5358e-03 eta 14:09:19 +epoch [18/50] batch [445/1000] time 1.565 (1.565) data 0.000 (0.002) loss 1.2920 (1.1370) acc 62.5000 (71.4326) lr 1.5358e-03 eta 14:09:10 +epoch [18/50] batch [450/1000] time 1.563 (1.565) data 0.000 (0.002) loss 0.9429 (1.1344) acc 75.0000 (71.4861) lr 1.5358e-03 eta 14:09:01 +epoch [18/50] batch [455/1000] time 1.588 (1.565) data 0.000 (0.002) loss 1.7432 (1.1359) acc 62.5000 (71.4904) lr 1.5358e-03 eta 14:08:55 +epoch [18/50] batch [460/1000] time 1.569 (1.565) data 0.000 (0.002) loss 0.9126 (1.1357) acc 71.8750 (71.4946) lr 1.5358e-03 eta 14:08:44 +epoch [18/50] batch [465/1000] time 1.550 (1.565) data 0.000 (0.002) loss 1.1270 (1.1342) acc 65.6250 (71.5457) lr 1.5358e-03 eta 14:08:37 +epoch [18/50] batch [470/1000] time 1.567 (1.565) data 0.001 (0.002) loss 1.0098 (1.1347) acc 68.7500 (71.5492) lr 1.5358e-03 eta 14:08:30 +epoch [18/50] batch [475/1000] time 1.559 (1.565) data 0.000 (0.002) loss 1.5879 (1.1347) acc 71.8750 (71.5658) lr 1.5358e-03 eta 14:08:20 +epoch [18/50] batch [480/1000] time 1.575 (1.565) data 0.000 (0.002) loss 0.7007 (1.1320) acc 84.3750 (71.6276) lr 1.5358e-03 eta 14:08:17 +epoch [18/50] batch [485/1000] time 1.572 (1.565) data 0.001 (0.002) loss 1.2607 (1.1323) acc 71.8750 (71.6108) lr 1.5358e-03 eta 14:08:21 +epoch [18/50] batch [490/1000] time 1.568 (1.565) data 0.001 (0.002) loss 1.0254 (1.1320) acc 78.1250 (71.6008) lr 1.5358e-03 eta 14:08:10 +epoch [18/50] batch [495/1000] time 1.583 (1.565) data 0.000 (0.002) loss 1.2100 (1.1349) acc 78.1250 (71.5783) lr 1.5358e-03 eta 14:08:03 +epoch [18/50] batch [500/1000] time 1.549 (1.565) data 0.000 (0.002) loss 1.3535 (1.1369) acc 65.6250 (71.5062) lr 1.5358e-03 eta 14:07:51 +epoch [18/50] batch [505/1000] time 1.528 (1.565) data 0.000 (0.002) loss 1.0381 (1.1348) acc 65.6250 (71.5099) lr 1.5358e-03 eta 14:07:35 +epoch [18/50] batch [510/1000] time 1.553 (1.565) data 0.001 (0.002) loss 1.0586 (1.1364) acc 71.8750 (71.5196) lr 1.5358e-03 eta 14:07:32 +epoch [18/50] batch [515/1000] time 1.573 (1.565) data 0.000 (0.002) loss 1.0586 (1.1347) acc 75.0000 (71.5595) lr 1.5358e-03 eta 14:07:21 +epoch [18/50] batch [520/1000] time 1.565 (1.565) data 0.000 (0.002) loss 0.8149 (1.1344) acc 68.7500 (71.5685) lr 1.5358e-03 eta 14:07:12 +epoch [18/50] batch [525/1000] time 1.568 (1.565) data 0.000 (0.002) loss 1.2871 (1.1351) acc 59.3750 (71.5417) lr 1.5358e-03 eta 14:07:03 +epoch [18/50] batch [530/1000] time 1.561 (1.565) data 0.000 (0.002) loss 0.8398 (1.1353) acc 71.8750 (71.4917) lr 1.5358e-03 eta 14:07:05 +epoch [18/50] batch [535/1000] time 1.567 (1.565) data 0.000 (0.002) loss 1.0723 (1.1332) acc 78.1250 (71.5421) lr 1.5358e-03 eta 14:06:54 +epoch [18/50] batch [540/1000] time 1.554 (1.565) data 0.000 (0.002) loss 1.5518 (1.1339) acc 59.3750 (71.5162) lr 1.5358e-03 eta 14:06:44 +epoch [18/50] batch [545/1000] time 1.575 (1.565) data 0.000 (0.002) loss 0.5371 (1.1316) acc 87.5000 (71.5940) lr 1.5358e-03 eta 14:06:34 +epoch [18/50] batch [550/1000] time 1.568 (1.565) data 0.000 (0.002) loss 1.3760 (1.1310) acc 65.6250 (71.6080) lr 1.5358e-03 eta 14:06:26 +epoch [18/50] batch [555/1000] time 1.543 (1.565) data 0.000 (0.002) loss 0.8442 (1.1305) acc 81.2500 (71.6667) lr 1.5358e-03 eta 14:06:16 +epoch [18/50] batch [560/1000] time 1.576 (1.565) data 0.000 (0.002) loss 1.3350 (1.1312) acc 68.7500 (71.6518) lr 1.5358e-03 eta 14:06:09 +epoch [18/50] batch [565/1000] time 1.543 (1.565) data 0.001 (0.002) loss 0.7393 (1.1300) acc 78.1250 (71.6593) lr 1.5358e-03 eta 14:05:59 +epoch [18/50] batch [570/1000] time 1.537 (1.565) data 0.000 (0.002) loss 1.0264 (1.1294) acc 78.1250 (71.6502) lr 1.5358e-03 eta 14:05:45 +epoch [18/50] batch [575/1000] time 1.566 (1.565) data 0.000 (0.002) loss 0.6772 (1.1278) acc 81.2500 (71.6957) lr 1.5358e-03 eta 14:05:35 +epoch [18/50] batch [580/1000] time 1.540 (1.565) data 0.000 (0.002) loss 0.7695 (1.1269) acc 75.0000 (71.7349) lr 1.5358e-03 eta 14:05:26 +epoch [18/50] batch [585/1000] time 1.574 (1.565) data 0.000 (0.002) loss 0.7817 (1.1251) acc 68.7500 (71.7575) lr 1.5358e-03 eta 14:05:20 +epoch [18/50] batch [590/1000] time 1.705 (1.565) data 0.000 (0.002) loss 1.4082 (1.1270) acc 68.7500 (71.7214) lr 1.5358e-03 eta 14:05:17 +epoch [18/50] batch [595/1000] time 1.564 (1.565) data 0.001 (0.002) loss 0.9561 (1.1279) acc 68.7500 (71.6964) lr 1.5358e-03 eta 14:05:06 +epoch [18/50] batch [600/1000] time 1.556 (1.565) data 0.000 (0.002) loss 1.6289 (1.1284) acc 59.3750 (71.6615) lr 1.5358e-03 eta 14:04:54 +epoch [18/50] batch [605/1000] time 1.548 (1.564) data 0.000 (0.002) loss 0.7148 (1.1262) acc 84.3750 (71.6839) lr 1.5358e-03 eta 14:04:40 +epoch [18/50] batch [610/1000] time 1.564 (1.564) data 0.001 (0.002) loss 0.9185 (1.1256) acc 78.1250 (71.7264) lr 1.5358e-03 eta 14:04:31 +epoch [18/50] batch [615/1000] time 1.577 (1.564) data 0.001 (0.002) loss 1.6924 (1.1254) acc 53.1250 (71.7175) lr 1.5358e-03 eta 14:04:21 +epoch [18/50] batch [620/1000] time 1.578 (1.564) data 0.000 (0.002) loss 1.2295 (1.1256) acc 75.0000 (71.7087) lr 1.5358e-03 eta 14:04:14 +epoch [18/50] batch [625/1000] time 1.558 (1.564) data 0.000 (0.002) loss 1.5361 (1.1275) acc 65.6250 (71.6650) lr 1.5358e-03 eta 14:04:03 +epoch [18/50] batch [630/1000] time 1.536 (1.564) data 0.000 (0.002) loss 1.0225 (1.1273) acc 71.8750 (71.6567) lr 1.5358e-03 eta 14:03:51 +epoch [18/50] batch [635/1000] time 1.707 (1.564) data 0.001 (0.002) loss 1.3057 (1.1284) acc 78.1250 (71.6634) lr 1.5358e-03 eta 14:03:47 +epoch [18/50] batch [640/1000] time 1.570 (1.564) data 0.001 (0.002) loss 1.1172 (1.1288) acc 68.7500 (71.6504) lr 1.5358e-03 eta 14:03:37 +epoch [18/50] batch [645/1000] time 1.558 (1.564) data 0.000 (0.002) loss 1.0146 (1.1293) acc 71.8750 (71.6279) lr 1.5358e-03 eta 14:03:28 +epoch [18/50] batch [650/1000] time 1.555 (1.564) data 0.001 (0.002) loss 1.1309 (1.1287) acc 71.8750 (71.6587) lr 1.5358e-03 eta 14:03:20 +epoch [18/50] batch [655/1000] time 1.553 (1.564) data 0.000 (0.002) loss 1.3301 (1.1297) acc 71.8750 (71.6460) lr 1.5358e-03 eta 14:03:09 +epoch [18/50] batch [660/1000] time 1.582 (1.564) data 0.001 (0.002) loss 1.7861 (1.1310) acc 62.5000 (71.6572) lr 1.5358e-03 eta 14:03:04 +epoch [18/50] batch [665/1000] time 1.560 (1.564) data 0.000 (0.002) loss 0.7563 (1.1298) acc 75.0000 (71.6964) lr 1.5358e-03 eta 14:02:54 +epoch [18/50] batch [670/1000] time 1.559 (1.564) data 0.000 (0.002) loss 1.1865 (1.1303) acc 71.8750 (71.6931) lr 1.5358e-03 eta 14:02:43 +epoch [18/50] batch [675/1000] time 1.568 (1.564) data 0.000 (0.002) loss 1.5391 (1.1304) acc 65.6250 (71.6759) lr 1.5358e-03 eta 14:02:36 +epoch [18/50] batch [680/1000] time 1.556 (1.564) data 0.000 (0.002) loss 0.8071 (1.1310) acc 87.5000 (71.6636) lr 1.5358e-03 eta 14:02:35 +epoch [18/50] batch [685/1000] time 1.556 (1.564) data 0.000 (0.002) loss 1.5254 (1.1314) acc 62.5000 (71.6469) lr 1.5358e-03 eta 14:02:26 +epoch [18/50] batch [690/1000] time 1.555 (1.564) data 0.000 (0.002) loss 0.8721 (1.1294) acc 78.1250 (71.6938) lr 1.5358e-03 eta 14:02:14 +epoch [18/50] batch [695/1000] time 1.561 (1.564) data 0.000 (0.002) loss 0.8667 (1.1300) acc 81.2500 (71.7131) lr 1.5358e-03 eta 14:02:03 +epoch [18/50] batch [700/1000] time 1.552 (1.564) data 0.000 (0.002) loss 1.6885 (1.1304) acc 53.1250 (71.7009) lr 1.5358e-03 eta 14:01:54 +epoch [18/50] batch [705/1000] time 1.538 (1.564) data 0.000 (0.002) loss 1.2637 (1.1302) acc 71.8750 (71.7199) lr 1.5358e-03 eta 14:01:44 +epoch [18/50] batch [710/1000] time 1.565 (1.564) data 0.001 (0.002) loss 0.9067 (1.1292) acc 75.0000 (71.7121) lr 1.5358e-03 eta 14:01:34 +epoch [18/50] batch [715/1000] time 1.570 (1.564) data 0.000 (0.002) loss 1.5459 (1.1312) acc 71.8750 (71.6914) lr 1.5358e-03 eta 14:01:24 +epoch [18/50] batch [720/1000] time 1.590 (1.564) data 0.000 (0.002) loss 1.6650 (1.1323) acc 62.5000 (71.6970) lr 1.5358e-03 eta 14:01:19 +epoch [18/50] batch [725/1000] time 1.558 (1.564) data 0.000 (0.002) loss 1.5098 (1.1321) acc 71.8750 (71.7155) lr 1.5358e-03 eta 14:01:10 +epoch [18/50] batch [730/1000] time 1.574 (1.564) data 0.001 (0.002) loss 1.2080 (1.1326) acc 65.6250 (71.7080) lr 1.5358e-03 eta 14:01:02 +epoch [18/50] batch [735/1000] time 1.559 (1.564) data 0.001 (0.002) loss 1.1465 (1.1330) acc 71.8750 (71.7007) lr 1.5358e-03 eta 14:00:54 +epoch [18/50] batch [740/1000] time 1.538 (1.564) data 0.001 (0.002) loss 0.8228 (1.1320) acc 75.0000 (71.7188) lr 1.5358e-03 eta 14:00:44 +epoch [18/50] batch [745/1000] time 1.537 (1.564) data 0.000 (0.002) loss 1.3057 (1.1302) acc 62.5000 (71.7366) lr 1.5358e-03 eta 14:00:41 +epoch [18/50] batch [750/1000] time 1.555 (1.564) data 0.000 (0.002) loss 1.3906 (1.1306) acc 53.1250 (71.7000) lr 1.5358e-03 eta 14:00:30 +epoch [18/50] batch [755/1000] time 1.563 (1.564) data 0.000 (0.002) loss 0.8706 (1.1306) acc 78.1250 (71.7136) lr 1.5358e-03 eta 14:00:20 +epoch [18/50] batch [760/1000] time 1.563 (1.564) data 0.000 (0.002) loss 1.3076 (1.1320) acc 68.7500 (71.6859) lr 1.5358e-03 eta 14:00:08 +epoch [18/50] batch [765/1000] time 1.555 (1.563) data 0.000 (0.002) loss 1.5654 (1.1336) acc 62.5000 (71.6708) lr 1.5358e-03 eta 13:59:59 +epoch [18/50] batch [770/1000] time 1.549 (1.563) data 0.001 (0.002) loss 1.2432 (1.1354) acc 68.7500 (71.6315) lr 1.5358e-03 eta 13:59:48 +epoch [18/50] batch [775/1000] time 1.553 (1.563) data 0.000 (0.002) loss 0.7368 (1.1360) acc 84.3750 (71.6371) lr 1.5358e-03 eta 13:59:42 +epoch [18/50] batch [780/1000] time 1.559 (1.563) data 0.001 (0.002) loss 1.3721 (1.1360) acc 68.7500 (71.6627) lr 1.5358e-03 eta 13:59:32 +epoch [18/50] batch [785/1000] time 1.554 (1.563) data 0.000 (0.002) loss 1.0576 (1.1353) acc 75.0000 (71.6919) lr 1.5358e-03 eta 13:59:22 +epoch [18/50] batch [790/1000] time 1.571 (1.564) data 0.001 (0.002) loss 1.0752 (1.1356) acc 78.1250 (71.6930) lr 1.5358e-03 eta 13:59:21 +epoch [18/50] batch [795/1000] time 1.529 (1.563) data 0.001 (0.002) loss 1.0928 (1.1356) acc 71.8750 (71.6863) lr 1.5358e-03 eta 13:59:10 +epoch [18/50] batch [800/1000] time 1.538 (1.563) data 0.000 (0.002) loss 0.5981 (1.1351) acc 87.5000 (71.7031) lr 1.5358e-03 eta 13:59:01 +epoch [18/50] batch [805/1000] time 1.558 (1.563) data 0.000 (0.002) loss 1.1436 (1.1349) acc 75.0000 (71.6887) lr 1.5358e-03 eta 13:58:54 +epoch [18/50] batch [810/1000] time 1.537 (1.563) data 0.001 (0.002) loss 0.8779 (1.1343) acc 71.8750 (71.7091) lr 1.5358e-03 eta 13:58:44 +epoch [18/50] batch [815/1000] time 1.562 (1.563) data 0.001 (0.002) loss 1.3027 (1.1365) acc 62.5000 (71.6258) lr 1.5358e-03 eta 13:58:35 +epoch [18/50] batch [820/1000] time 1.576 (1.563) data 0.000 (0.002) loss 0.9106 (1.1361) acc 78.1250 (71.6273) lr 1.5358e-03 eta 13:58:27 +epoch [18/50] batch [825/1000] time 1.559 (1.563) data 0.000 (0.002) loss 0.8379 (1.1353) acc 78.1250 (71.6439) lr 1.5358e-03 eta 13:58:19 +epoch [18/50] batch [830/1000] time 1.564 (1.563) data 0.000 (0.002) loss 1.7383 (1.1346) acc 62.5000 (71.6491) lr 1.5358e-03 eta 13:58:16 +epoch [18/50] batch [835/1000] time 1.564 (1.564) data 0.000 (0.001) loss 1.1113 (1.1337) acc 71.8750 (71.6542) lr 1.5358e-03 eta 13:58:10 +epoch [18/50] batch [840/1000] time 1.573 (1.564) data 0.000 (0.001) loss 1.1055 (1.1329) acc 87.5000 (71.6704) lr 1.5358e-03 eta 13:58:03 +epoch [18/50] batch [845/1000] time 1.585 (1.564) data 0.001 (0.001) loss 0.8018 (1.1325) acc 65.6250 (71.6494) lr 1.5358e-03 eta 13:57:56 +epoch [18/50] batch [850/1000] time 1.560 (1.564) data 0.000 (0.001) loss 1.5381 (1.1322) acc 56.2500 (71.6397) lr 1.5358e-03 eta 13:57:48 +epoch [18/50] batch [855/1000] time 1.558 (1.564) data 0.000 (0.001) loss 1.3623 (1.1342) acc 56.2500 (71.5972) lr 1.5358e-03 eta 13:57:41 +epoch [18/50] batch [860/1000] time 1.572 (1.564) data 0.001 (0.001) loss 0.8940 (1.1332) acc 81.2500 (71.6170) lr 1.5358e-03 eta 13:57:34 +epoch [18/50] batch [865/1000] time 1.551 (1.564) data 0.001 (0.001) loss 1.4336 (1.1330) acc 56.2500 (71.6040) lr 1.5358e-03 eta 13:57:24 +epoch [18/50] batch [870/1000] time 1.550 (1.564) data 0.000 (0.001) loss 1.8945 (1.1340) acc 59.3750 (71.6056) lr 1.5358e-03 eta 13:57:17 +epoch [18/50] batch [875/1000] time 1.568 (1.564) data 0.001 (0.001) loss 1.3613 (1.1343) acc 75.0000 (71.6000) lr 1.5358e-03 eta 13:57:09 +epoch [18/50] batch [880/1000] time 1.557 (1.564) data 0.000 (0.001) loss 1.4727 (1.1355) acc 68.7500 (71.5803) lr 1.5358e-03 eta 13:57:00 +epoch [18/50] batch [885/1000] time 1.555 (1.563) data 0.000 (0.001) loss 1.1816 (1.1351) acc 62.5000 (71.5855) lr 1.5358e-03 eta 13:56:50 +epoch [18/50] batch [890/1000] time 1.547 (1.563) data 0.000 (0.001) loss 1.2178 (1.1352) acc 71.8750 (71.5941) lr 1.5358e-03 eta 13:56:40 +epoch [18/50] batch [895/1000] time 1.550 (1.564) data 0.000 (0.001) loss 1.7178 (1.1371) acc 56.2500 (71.5642) lr 1.5358e-03 eta 13:56:36 +epoch [18/50] batch [900/1000] time 1.559 (1.563) data 0.000 (0.001) loss 1.0186 (1.1367) acc 75.0000 (71.5799) lr 1.5358e-03 eta 13:56:26 +epoch [18/50] batch [905/1000] time 1.558 (1.563) data 0.000 (0.001) loss 0.6782 (1.1364) acc 78.1250 (71.6126) lr 1.5358e-03 eta 13:56:15 +epoch [18/50] batch [910/1000] time 1.566 (1.563) data 0.000 (0.001) loss 0.9600 (1.1362) acc 75.0000 (71.6174) lr 1.5358e-03 eta 13:56:06 +epoch [18/50] batch [915/1000] time 1.560 (1.563) data 0.000 (0.001) loss 1.1035 (1.1356) acc 84.3750 (71.6564) lr 1.5358e-03 eta 13:55:57 +epoch [18/50] batch [920/1000] time 1.552 (1.563) data 0.001 (0.001) loss 0.8286 (1.1351) acc 78.1250 (71.6746) lr 1.5358e-03 eta 13:55:47 +epoch [18/50] batch [925/1000] time 1.557 (1.563) data 0.001 (0.001) loss 0.8062 (1.1358) acc 78.1250 (71.6689) lr 1.5358e-03 eta 13:55:40 +epoch [18/50] batch [930/1000] time 1.562 (1.563) data 0.000 (0.001) loss 1.1807 (1.1354) acc 68.7500 (71.6700) lr 1.5358e-03 eta 13:55:31 +epoch [18/50] batch [935/1000] time 1.564 (1.563) data 0.000 (0.001) loss 0.3958 (1.1340) acc 87.5000 (71.7079) lr 1.5358e-03 eta 13:55:20 +epoch [18/50] batch [940/1000] time 1.558 (1.563) data 0.000 (0.001) loss 1.4844 (1.1346) acc 62.5000 (71.7055) lr 1.5358e-03 eta 13:55:17 +epoch [18/50] batch [945/1000] time 1.550 (1.563) data 0.001 (0.001) loss 0.8799 (1.1352) acc 78.1250 (71.6931) lr 1.5358e-03 eta 13:55:08 +epoch [18/50] batch [950/1000] time 1.555 (1.563) data 0.000 (0.001) loss 0.8799 (1.1360) acc 71.8750 (71.6941) lr 1.5358e-03 eta 13:54:59 +epoch [18/50] batch [955/1000] time 1.541 (1.563) data 0.000 (0.001) loss 1.3242 (1.1362) acc 68.7500 (71.6950) lr 1.5358e-03 eta 13:54:50 +epoch [18/50] batch [960/1000] time 1.534 (1.563) data 0.000 (0.001) loss 1.1680 (1.1357) acc 65.6250 (71.7090) lr 1.5358e-03 eta 13:54:41 +epoch [18/50] batch [965/1000] time 1.553 (1.563) data 0.000 (0.001) loss 1.1396 (1.1359) acc 71.8750 (71.6969) lr 1.5358e-03 eta 13:54:31 +epoch [18/50] batch [970/1000] time 1.552 (1.563) data 0.000 (0.001) loss 1.4463 (1.1375) acc 65.6250 (71.6624) lr 1.5358e-03 eta 13:54:24 +epoch [18/50] batch [975/1000] time 1.577 (1.563) data 0.001 (0.001) loss 0.9624 (1.1377) acc 71.8750 (71.6538) lr 1.5358e-03 eta 13:54:14 +epoch [18/50] batch [980/1000] time 1.716 (1.563) data 0.000 (0.001) loss 1.1953 (1.1376) acc 71.8750 (71.6422) lr 1.5358e-03 eta 13:54:11 +epoch [18/50] batch [985/1000] time 1.553 (1.563) data 0.001 (0.001) loss 1.2109 (1.1373) acc 75.0000 (71.6466) lr 1.5358e-03 eta 13:54:02 +epoch [18/50] batch [990/1000] time 1.568 (1.563) data 0.000 (0.001) loss 0.5596 (1.1367) acc 84.3750 (71.6667) lr 1.5358e-03 eta 13:53:54 +epoch [18/50] batch [995/1000] time 1.557 (1.563) data 0.000 (0.001) loss 0.8232 (1.1380) acc 71.8750 (71.6112) lr 1.5358e-03 eta 13:53:44 +epoch [18/50] batch [1000/1000] time 1.582 (1.563) data 0.000 (0.001) loss 0.6934 (1.1371) acc 78.1250 (71.6219) lr 1.4818e-03 eta 13:53:36 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,195 +* accuracy: 78.4% +* error: 21.6% +* macro_f1: 77.9% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [19/50] batch [5/1000] time 1.531 (1.674) data 0.000 (0.176) loss 0.8545 (0.8663) acc 81.2500 (79.3750) lr 1.4818e-03 eta 14:52:45 +epoch [19/50] batch [10/1000] time 1.554 (1.610) data 0.000 (0.088) loss 1.1094 (1.0231) acc 68.7500 (76.5625) lr 1.4818e-03 eta 14:18:28 +epoch [19/50] batch [15/1000] time 1.795 (1.606) data 0.000 (0.059) loss 1.2852 (1.0726) acc 65.6250 (74.7917) lr 1.4818e-03 eta 14:16:02 +epoch [19/50] batch [20/1000] time 1.550 (1.594) data 0.001 (0.044) loss 0.6938 (1.0673) acc 78.1250 (73.2812) lr 1.4818e-03 eta 14:09:20 +epoch [19/50] batch [25/1000] time 1.556 (1.585) data 0.000 (0.036) loss 0.7271 (1.0367) acc 84.3750 (74.1250) lr 1.4818e-03 eta 14:04:38 +epoch [19/50] batch [30/1000] time 1.560 (1.581) data 0.000 (0.030) loss 1.2178 (1.0568) acc 65.6250 (73.2292) lr 1.4818e-03 eta 14:02:14 +epoch [19/50] batch [35/1000] time 1.569 (1.578) data 0.000 (0.026) loss 1.1875 (1.0818) acc 75.0000 (73.0357) lr 1.4818e-03 eta 14:00:46 +epoch [19/50] batch [40/1000] time 1.572 (1.577) data 0.001 (0.022) loss 0.7246 (1.0596) acc 84.3750 (73.7500) lr 1.4818e-03 eta 13:59:47 +epoch [19/50] batch [45/1000] time 1.556 (1.576) data 0.000 (0.020) loss 1.5117 (1.0695) acc 68.7500 (73.4028) lr 1.4818e-03 eta 13:59:05 +epoch [19/50] batch [50/1000] time 1.544 (1.574) data 0.000 (0.018) loss 0.9365 (1.0740) acc 81.2500 (73.1875) lr 1.4818e-03 eta 13:58:13 +epoch [19/50] batch [55/1000] time 1.574 (1.574) data 0.000 (0.016) loss 1.4053 (1.0766) acc 68.7500 (72.7841) lr 1.4818e-03 eta 13:57:46 +epoch [19/50] batch [60/1000] time 1.581 (1.573) data 0.000 (0.015) loss 0.9067 (1.0861) acc 78.1250 (72.7604) lr 1.4818e-03 eta 13:57:13 +epoch [19/50] batch [65/1000] time 1.567 (1.575) data 0.001 (0.014) loss 1.2051 (1.0829) acc 65.6250 (72.5962) lr 1.4818e-03 eta 13:58:18 +epoch [19/50] batch [70/1000] time 1.558 (1.573) data 0.000 (0.013) loss 0.7686 (1.0741) acc 81.2500 (72.8571) lr 1.4818e-03 eta 13:57:18 +epoch [19/50] batch [75/1000] time 1.539 (1.572) data 0.000 (0.012) loss 1.0010 (1.0678) acc 65.6250 (72.8750) lr 1.4818e-03 eta 13:56:41 +epoch [19/50] batch [80/1000] time 1.547 (1.572) data 0.000 (0.011) loss 1.1357 (1.0750) acc 71.8750 (72.7344) lr 1.4818e-03 eta 13:56:08 +epoch [19/50] batch [85/1000] time 1.535 (1.570) data 0.000 (0.011) loss 1.1572 (1.0828) acc 59.3750 (72.6471) lr 1.4818e-03 eta 13:55:10 +epoch [19/50] batch [90/1000] time 1.557 (1.569) data 0.001 (0.010) loss 0.9863 (1.0949) acc 78.1250 (72.6042) lr 1.4818e-03 eta 13:54:41 +epoch [19/50] batch [95/1000] time 1.549 (1.569) data 0.001 (0.010) loss 0.7773 (1.1054) acc 75.0000 (72.3355) lr 1.4818e-03 eta 13:54:03 +epoch [19/50] batch [100/1000] time 1.537 (1.568) data 0.000 (0.009) loss 1.1631 (1.1229) acc 78.1250 (71.8750) lr 1.4818e-03 eta 13:53:27 +epoch [19/50] batch [105/1000] time 1.544 (1.567) data 0.001 (0.009) loss 0.9448 (1.1340) acc 84.3750 (71.6964) lr 1.4818e-03 eta 13:53:11 +epoch [19/50] batch [110/1000] time 1.549 (1.567) data 0.000 (0.008) loss 1.7021 (1.1407) acc 59.3750 (71.4489) lr 1.4818e-03 eta 13:52:50 +epoch [19/50] batch [115/1000] time 1.545 (1.567) data 0.000 (0.008) loss 0.8486 (1.1343) acc 81.2500 (71.4674) lr 1.4818e-03 eta 13:52:36 +epoch [19/50] batch [120/1000] time 1.547 (1.566) data 0.001 (0.008) loss 0.7212 (1.1292) acc 78.1250 (71.4583) lr 1.4818e-03 eta 13:52:13 +epoch [19/50] batch [125/1000] time 1.549 (1.566) data 0.001 (0.007) loss 1.4209 (1.1262) acc 68.7500 (71.4500) lr 1.4818e-03 eta 13:51:55 +epoch [19/50] batch [130/1000] time 1.538 (1.566) data 0.000 (0.007) loss 1.3271 (1.1289) acc 68.7500 (71.3942) lr 1.4818e-03 eta 13:51:32 +epoch [19/50] batch [135/1000] time 1.567 (1.565) data 0.000 (0.007) loss 1.4434 (1.1311) acc 71.8750 (71.3426) lr 1.4818e-03 eta 13:51:13 +epoch [19/50] batch [140/1000] time 1.559 (1.565) data 0.000 (0.007) loss 1.4570 (1.1387) acc 62.5000 (71.1830) lr 1.4818e-03 eta 13:50:57 +epoch [19/50] batch [145/1000] time 1.556 (1.564) data 0.000 (0.007) loss 1.5068 (1.1413) acc 65.6250 (71.1207) lr 1.4818e-03 eta 13:50:36 +epoch [19/50] batch [150/1000] time 1.559 (1.564) data 0.001 (0.006) loss 0.7812 (1.1386) acc 81.2500 (71.2708) lr 1.4818e-03 eta 13:50:16 +epoch [19/50] batch [155/1000] time 1.568 (1.564) data 0.001 (0.006) loss 1.2979 (1.1384) acc 65.6250 (71.3105) lr 1.4818e-03 eta 13:50:09 +epoch [19/50] batch [160/1000] time 1.547 (1.564) data 0.000 (0.006) loss 0.4556 (1.1437) acc 87.5000 (71.2305) lr 1.4818e-03 eta 13:49:50 +epoch [19/50] batch [165/1000] time 1.719 (1.565) data 0.000 (0.006) loss 1.4834 (1.1381) acc 59.3750 (71.2879) lr 1.4818e-03 eta 13:50:08 +epoch [19/50] batch [170/1000] time 1.558 (1.564) data 0.000 (0.006) loss 0.6973 (1.1371) acc 84.3750 (71.3603) lr 1.4818e-03 eta 13:49:46 +epoch [19/50] batch [175/1000] time 1.550 (1.564) data 0.000 (0.005) loss 0.8555 (1.1318) acc 68.7500 (71.4643) lr 1.4818e-03 eta 13:49:26 +epoch [19/50] batch [180/1000] time 1.561 (1.564) data 0.000 (0.005) loss 0.9775 (1.1283) acc 65.6250 (71.4410) lr 1.4818e-03 eta 13:49:11 +epoch [19/50] batch [185/1000] time 1.561 (1.563) data 0.000 (0.005) loss 0.9688 (1.1356) acc 75.0000 (71.3007) lr 1.4818e-03 eta 13:48:58 +epoch [19/50] batch [190/1000] time 1.561 (1.563) data 0.000 (0.005) loss 0.7451 (1.1300) acc 81.2500 (71.4309) lr 1.4818e-03 eta 13:48:40 +epoch [19/50] batch [195/1000] time 1.540 (1.563) data 0.000 (0.005) loss 1.0439 (1.1318) acc 71.8750 (71.3141) lr 1.4818e-03 eta 13:48:26 +epoch [19/50] batch [200/1000] time 1.559 (1.563) data 0.000 (0.005) loss 1.1045 (1.1257) acc 84.3750 (71.5469) lr 1.4818e-03 eta 13:48:15 +epoch [19/50] batch [205/1000] time 1.528 (1.562) data 0.000 (0.005) loss 0.5293 (1.1207) acc 84.3750 (71.6311) lr 1.4818e-03 eta 13:47:55 +epoch [19/50] batch [210/1000] time 1.671 (1.563) data 0.000 (0.005) loss 1.4258 (1.1245) acc 59.3750 (71.5476) lr 1.4818e-03 eta 13:47:59 +epoch [19/50] batch [215/1000] time 1.584 (1.563) data 0.000 (0.005) loss 0.7603 (1.1205) acc 78.1250 (71.5552) lr 1.4818e-03 eta 13:48:02 +epoch [19/50] batch [220/1000] time 1.547 (1.563) data 0.000 (0.004) loss 0.7910 (1.1181) acc 78.1250 (71.6477) lr 1.4818e-03 eta 13:47:48 +epoch [19/50] batch [225/1000] time 1.557 (1.563) data 0.000 (0.004) loss 0.7563 (1.1193) acc 78.1250 (71.5833) lr 1.4818e-03 eta 13:47:39 +epoch [19/50] batch [230/1000] time 1.578 (1.563) data 0.000 (0.004) loss 0.8203 (1.1193) acc 81.2500 (71.6440) lr 1.4818e-03 eta 13:47:28 +epoch [19/50] batch [235/1000] time 1.567 (1.563) data 0.000 (0.004) loss 1.5107 (1.1198) acc 62.5000 (71.6622) lr 1.4818e-03 eta 13:47:26 +epoch [19/50] batch [240/1000] time 1.562 (1.563) data 0.000 (0.004) loss 1.2783 (1.1186) acc 59.3750 (71.6276) lr 1.4818e-03 eta 13:47:21 +epoch [19/50] batch [245/1000] time 1.576 (1.563) data 0.000 (0.004) loss 1.9414 (1.1225) acc 59.3750 (71.5561) lr 1.4818e-03 eta 13:47:20 +epoch [19/50] batch [250/1000] time 1.539 (1.563) data 0.000 (0.004) loss 0.9868 (1.1201) acc 75.0000 (71.5625) lr 1.4818e-03 eta 13:47:14 +epoch [19/50] batch [255/1000] time 1.585 (1.564) data 0.000 (0.004) loss 0.9204 (1.1225) acc 78.1250 (71.4828) lr 1.4818e-03 eta 13:47:27 +epoch [19/50] batch [260/1000] time 1.547 (1.564) data 0.001 (0.004) loss 1.5684 (1.1262) acc 62.5000 (71.5144) lr 1.4818e-03 eta 13:47:21 +epoch [19/50] batch [265/1000] time 1.583 (1.564) data 0.001 (0.004) loss 1.1562 (1.1315) acc 71.8750 (71.4387) lr 1.4818e-03 eta 13:47:18 +epoch [19/50] batch [270/1000] time 1.555 (1.564) data 0.001 (0.004) loss 1.0156 (1.1295) acc 78.1250 (71.4583) lr 1.4818e-03 eta 13:47:09 +epoch [19/50] batch [275/1000] time 1.567 (1.564) data 0.001 (0.004) loss 1.4609 (1.1303) acc 71.8750 (71.5114) lr 1.4818e-03 eta 13:46:56 +epoch [19/50] batch [280/1000] time 1.552 (1.564) data 0.000 (0.004) loss 0.8916 (1.1302) acc 78.1250 (71.5513) lr 1.4818e-03 eta 13:46:39 +epoch [19/50] batch [285/1000] time 1.556 (1.563) data 0.000 (0.004) loss 0.6782 (1.1286) acc 81.2500 (71.6009) lr 1.4818e-03 eta 13:46:22 +epoch [19/50] batch [290/1000] time 1.562 (1.563) data 0.000 (0.003) loss 1.7598 (1.1284) acc 59.3750 (71.5948) lr 1.4818e-03 eta 13:46:11 +epoch [19/50] batch [295/1000] time 1.541 (1.563) data 0.000 (0.003) loss 1.8223 (1.1301) acc 56.2500 (71.5890) lr 1.4818e-03 eta 13:45:59 +epoch [19/50] batch [300/1000] time 1.534 (1.563) data 0.000 (0.003) loss 1.1006 (1.1329) acc 75.0000 (71.5417) lr 1.4818e-03 eta 13:45:45 +epoch [19/50] batch [305/1000] time 1.547 (1.563) data 0.000 (0.003) loss 1.2861 (1.1324) acc 68.7500 (71.5881) lr 1.4818e-03 eta 13:45:39 +epoch [19/50] batch [310/1000] time 1.575 (1.563) data 0.000 (0.003) loss 1.1543 (1.1336) acc 68.7500 (71.5927) lr 1.4818e-03 eta 13:45:35 +epoch [19/50] batch [315/1000] time 1.564 (1.563) data 0.000 (0.003) loss 1.3877 (1.1376) acc 71.8750 (71.5774) lr 1.4818e-03 eta 13:45:29 +epoch [19/50] batch [320/1000] time 1.575 (1.564) data 0.000 (0.003) loss 0.9858 (1.1350) acc 68.7500 (71.6016) lr 1.4818e-03 eta 13:45:42 +epoch [19/50] batch [325/1000] time 1.561 (1.564) data 0.000 (0.003) loss 1.0000 (1.1320) acc 71.8750 (71.7596) lr 1.4818e-03 eta 13:45:32 +epoch [19/50] batch [330/1000] time 1.586 (1.564) data 0.001 (0.003) loss 1.0488 (1.1301) acc 78.1250 (71.8182) lr 1.4818e-03 eta 13:45:23 +epoch [19/50] batch [335/1000] time 1.569 (1.564) data 0.000 (0.003) loss 1.1621 (1.1325) acc 68.7500 (71.7537) lr 1.4818e-03 eta 13:45:21 +epoch [19/50] batch [340/1000] time 1.577 (1.564) data 0.000 (0.003) loss 0.6216 (1.1267) acc 84.3750 (71.9210) lr 1.4818e-03 eta 13:45:23 +epoch [19/50] batch [345/1000] time 1.577 (1.564) data 0.000 (0.003) loss 1.3770 (1.1302) acc 62.5000 (71.8569) lr 1.4818e-03 eta 13:45:24 +epoch [19/50] batch [350/1000] time 1.582 (1.565) data 0.000 (0.003) loss 1.1367 (1.1311) acc 68.7500 (71.8036) lr 1.4818e-03 eta 13:45:17 +epoch [19/50] batch [355/1000] time 1.587 (1.565) data 0.000 (0.003) loss 1.1602 (1.1336) acc 68.7500 (71.6549) lr 1.4818e-03 eta 13:45:12 +epoch [19/50] batch [360/1000] time 1.561 (1.565) data 0.000 (0.003) loss 0.6953 (1.1326) acc 78.1250 (71.6840) lr 1.4818e-03 eta 13:45:02 +epoch [19/50] batch [365/1000] time 1.543 (1.565) data 0.000 (0.003) loss 1.4229 (1.1334) acc 68.7500 (71.6866) lr 1.4818e-03 eta 13:45:06 +epoch [19/50] batch [370/1000] time 1.560 (1.565) data 0.000 (0.003) loss 0.7944 (1.1317) acc 78.1250 (71.7483) lr 1.4818e-03 eta 13:44:52 +epoch [19/50] batch [375/1000] time 1.551 (1.565) data 0.000 (0.003) loss 1.2061 (1.1338) acc 68.7500 (71.6917) lr 1.4818e-03 eta 13:44:45 +epoch [19/50] batch [380/1000] time 1.556 (1.565) data 0.000 (0.003) loss 1.1582 (1.1322) acc 56.2500 (71.6776) lr 1.4818e-03 eta 13:44:31 +epoch [19/50] batch [385/1000] time 1.554 (1.565) data 0.000 (0.003) loss 1.4258 (1.1351) acc 62.5000 (71.6234) lr 1.4818e-03 eta 13:44:26 +epoch [19/50] batch [390/1000] time 1.546 (1.565) data 0.000 (0.003) loss 1.5947 (1.1364) acc 68.7500 (71.6186) lr 1.4818e-03 eta 13:44:17 +epoch [19/50] batch [395/1000] time 1.563 (1.565) data 0.000 (0.003) loss 0.9727 (1.1360) acc 68.7500 (71.5981) lr 1.4818e-03 eta 13:44:09 +epoch [19/50] batch [400/1000] time 1.557 (1.564) data 0.000 (0.003) loss 1.6279 (1.1388) acc 62.5000 (71.5625) lr 1.4818e-03 eta 13:43:57 +epoch [19/50] batch [405/1000] time 1.553 (1.565) data 0.000 (0.003) loss 1.0654 (1.1384) acc 68.7500 (71.5432) lr 1.4818e-03 eta 13:43:56 +epoch [19/50] batch [410/1000] time 1.568 (1.565) data 0.000 (0.003) loss 1.7422 (1.1395) acc 62.5000 (71.5473) lr 1.4818e-03 eta 13:43:45 +epoch [19/50] batch [415/1000] time 1.537 (1.564) data 0.000 (0.003) loss 1.2881 (1.1407) acc 81.2500 (71.5587) lr 1.4818e-03 eta 13:43:31 +epoch [19/50] batch [420/1000] time 1.572 (1.564) data 0.000 (0.003) loss 1.1133 (1.1389) acc 65.6250 (71.5699) lr 1.4818e-03 eta 13:43:20 +epoch [19/50] batch [425/1000] time 1.554 (1.564) data 0.001 (0.002) loss 1.6084 (1.1412) acc 56.2500 (71.4853) lr 1.4818e-03 eta 13:43:13 +epoch [19/50] batch [430/1000] time 1.548 (1.564) data 0.001 (0.002) loss 1.2207 (1.1406) acc 71.8750 (71.5407) lr 1.4818e-03 eta 13:43:01 +epoch [19/50] batch [435/1000] time 1.593 (1.564) data 0.000 (0.002) loss 1.2461 (1.1397) acc 75.0000 (71.5661) lr 1.4818e-03 eta 13:42:50 +epoch [19/50] batch [440/1000] time 1.573 (1.564) data 0.000 (0.002) loss 1.5781 (1.1422) acc 68.7500 (71.5128) lr 1.4818e-03 eta 13:42:48 +epoch [19/50] batch [445/1000] time 1.563 (1.564) data 0.000 (0.002) loss 1.5068 (1.1432) acc 62.5000 (71.4817) lr 1.4818e-03 eta 13:42:43 +epoch [19/50] batch [450/1000] time 1.594 (1.565) data 0.000 (0.002) loss 0.4280 (1.1431) acc 87.5000 (71.4722) lr 1.4818e-03 eta 13:42:40 +epoch [19/50] batch [455/1000] time 1.580 (1.565) data 0.000 (0.002) loss 0.8423 (1.1416) acc 75.0000 (71.4698) lr 1.4818e-03 eta 13:42:38 +epoch [19/50] batch [460/1000] time 1.571 (1.565) data 0.000 (0.002) loss 1.2559 (1.1412) acc 71.8750 (71.4810) lr 1.4818e-03 eta 13:42:32 +epoch [19/50] batch [465/1000] time 1.562 (1.565) data 0.001 (0.002) loss 1.8262 (1.1414) acc 46.8750 (71.4180) lr 1.4818e-03 eta 13:42:23 +epoch [19/50] batch [470/1000] time 1.560 (1.565) data 0.001 (0.002) loss 1.0273 (1.1425) acc 75.0000 (71.3963) lr 1.4818e-03 eta 13:42:22 +epoch [19/50] batch [475/1000] time 1.575 (1.565) data 0.000 (0.002) loss 1.0215 (1.1438) acc 84.3750 (71.4408) lr 1.4818e-03 eta 13:42:12 +epoch [19/50] batch [480/1000] time 1.546 (1.565) data 0.001 (0.002) loss 1.3848 (1.1460) acc 62.5000 (71.3997) lr 1.4818e-03 eta 13:41:58 +epoch [19/50] batch [485/1000] time 1.568 (1.565) data 0.000 (0.002) loss 1.0684 (1.1473) acc 81.2500 (71.3853) lr 1.4818e-03 eta 13:41:49 +epoch [19/50] batch [490/1000] time 1.549 (1.565) data 0.000 (0.002) loss 1.2812 (1.1481) acc 62.5000 (71.3329) lr 1.4818e-03 eta 13:41:40 +epoch [19/50] batch [495/1000] time 1.555 (1.565) data 0.000 (0.002) loss 1.8389 (1.1505) acc 56.2500 (71.2816) lr 1.4818e-03 eta 13:41:31 +epoch [19/50] batch [500/1000] time 1.570 (1.565) data 0.001 (0.002) loss 0.9922 (1.1505) acc 68.7500 (71.2562) lr 1.4818e-03 eta 13:41:24 +epoch [19/50] batch [505/1000] time 1.576 (1.565) data 0.000 (0.002) loss 0.9307 (1.1489) acc 78.1250 (71.3490) lr 1.4818e-03 eta 13:41:18 +epoch [19/50] batch [510/1000] time 1.560 (1.565) data 0.000 (0.002) loss 1.0898 (1.1468) acc 65.6250 (71.3848) lr 1.4818e-03 eta 13:41:12 +epoch [19/50] batch [515/1000] time 1.588 (1.565) data 0.000 (0.002) loss 0.8315 (1.1465) acc 81.2500 (71.3896) lr 1.4818e-03 eta 13:41:15 +epoch [19/50] batch [520/1000] time 1.562 (1.565) data 0.001 (0.002) loss 1.1758 (1.1473) acc 71.8750 (71.3762) lr 1.4818e-03 eta 13:41:08 +epoch [19/50] batch [525/1000] time 1.544 (1.565) data 0.001 (0.002) loss 1.3662 (1.1473) acc 62.5000 (71.3929) lr 1.4818e-03 eta 13:40:57 +epoch [19/50] batch [530/1000] time 1.570 (1.565) data 0.000 (0.002) loss 0.9336 (1.1479) acc 78.1250 (71.3856) lr 1.4818e-03 eta 13:40:47 +epoch [19/50] batch [535/1000] time 1.556 (1.565) data 0.000 (0.002) loss 1.1963 (1.1471) acc 68.7500 (71.4194) lr 1.4818e-03 eta 13:40:35 +epoch [19/50] batch [540/1000] time 1.547 (1.565) data 0.000 (0.002) loss 1.5869 (1.1498) acc 65.6250 (71.3484) lr 1.4818e-03 eta 13:40:25 +epoch [19/50] batch [545/1000] time 1.568 (1.565) data 0.001 (0.002) loss 1.0781 (1.1510) acc 65.6250 (71.3303) lr 1.4818e-03 eta 13:40:16 +epoch [19/50] batch [550/1000] time 1.559 (1.565) data 0.001 (0.002) loss 1.6035 (1.1533) acc 62.5000 (71.3011) lr 1.4818e-03 eta 13:40:06 +epoch [19/50] batch [555/1000] time 1.702 (1.565) data 0.000 (0.002) loss 0.9136 (1.1537) acc 81.2500 (71.3288) lr 1.4818e-03 eta 13:40:05 +epoch [19/50] batch [560/1000] time 1.555 (1.565) data 0.000 (0.002) loss 1.3486 (1.1535) acc 68.7500 (71.3114) lr 1.4818e-03 eta 13:39:54 +epoch [19/50] batch [565/1000] time 1.545 (1.565) data 0.001 (0.002) loss 0.5469 (1.1524) acc 87.5000 (71.3662) lr 1.4818e-03 eta 13:39:41 +epoch [19/50] batch [570/1000] time 1.563 (1.564) data 0.000 (0.002) loss 0.9644 (1.1542) acc 78.1250 (71.3158) lr 1.4818e-03 eta 13:39:31 +epoch [19/50] batch [575/1000] time 1.539 (1.564) data 0.001 (0.002) loss 0.7603 (1.1521) acc 75.0000 (71.3261) lr 1.4818e-03 eta 13:39:22 +epoch [19/50] batch [580/1000] time 1.551 (1.564) data 0.000 (0.002) loss 1.0234 (1.1540) acc 68.7500 (71.2769) lr 1.4818e-03 eta 13:39:14 +epoch [19/50] batch [585/1000] time 1.562 (1.564) data 0.000 (0.002) loss 1.5752 (1.1561) acc 59.3750 (71.2286) lr 1.4818e-03 eta 13:39:06 +epoch [19/50] batch [590/1000] time 1.570 (1.564) data 0.001 (0.002) loss 1.2061 (1.1570) acc 71.8750 (71.1970) lr 1.4818e-03 eta 13:38:55 +epoch [19/50] batch [595/1000] time 1.562 (1.564) data 0.001 (0.002) loss 0.8218 (1.1570) acc 84.3750 (71.1975) lr 1.4818e-03 eta 13:38:46 +epoch [19/50] batch [600/1000] time 1.558 (1.564) data 0.000 (0.002) loss 0.6016 (1.1561) acc 78.1250 (71.2344) lr 1.4818e-03 eta 13:38:37 +epoch [19/50] batch [605/1000] time 1.580 (1.564) data 0.000 (0.002) loss 2.1465 (1.1573) acc 56.2500 (71.2138) lr 1.4818e-03 eta 13:38:33 +epoch [19/50] batch [610/1000] time 1.564 (1.564) data 0.000 (0.002) loss 1.2354 (1.1580) acc 65.6250 (71.2039) lr 1.4818e-03 eta 13:38:27 +epoch [19/50] batch [615/1000] time 1.539 (1.564) data 0.001 (0.002) loss 1.0068 (1.1589) acc 62.5000 (71.1636) lr 1.4818e-03 eta 13:38:15 +epoch [19/50] batch [620/1000] time 1.547 (1.565) data 0.000 (0.002) loss 0.9541 (1.1579) acc 71.8750 (71.2097) lr 1.4818e-03 eta 13:38:14 +epoch [19/50] batch [625/1000] time 1.550 (1.565) data 0.001 (0.002) loss 1.1758 (1.1565) acc 65.6250 (71.2100) lr 1.4818e-03 eta 13:38:06 +epoch [19/50] batch [630/1000] time 1.546 (1.564) data 0.000 (0.002) loss 1.2422 (1.1553) acc 78.1250 (71.2500) lr 1.4818e-03 eta 13:37:53 +epoch [19/50] batch [635/1000] time 1.547 (1.564) data 0.000 (0.002) loss 1.5898 (1.1538) acc 59.3750 (71.2697) lr 1.4818e-03 eta 13:37:43 +epoch [19/50] batch [640/1000] time 1.603 (1.564) data 0.001 (0.002) loss 0.8540 (1.1538) acc 78.1250 (71.2598) lr 1.4818e-03 eta 13:37:36 +epoch [19/50] batch [645/1000] time 1.564 (1.564) data 0.000 (0.002) loss 1.1787 (1.1534) acc 75.0000 (71.2694) lr 1.4818e-03 eta 13:37:30 +epoch [19/50] batch [650/1000] time 1.559 (1.564) data 0.000 (0.002) loss 0.9927 (1.1538) acc 75.0000 (71.2740) lr 1.4818e-03 eta 13:37:21 +epoch [19/50] batch [655/1000] time 1.547 (1.564) data 0.000 (0.002) loss 1.2627 (1.1545) acc 75.0000 (71.2548) lr 1.4818e-03 eta 13:37:09 +epoch [19/50] batch [660/1000] time 1.561 (1.564) data 0.000 (0.002) loss 1.6094 (1.1551) acc 65.6250 (71.2595) lr 1.4818e-03 eta 13:36:59 +epoch [19/50] batch [665/1000] time 1.575 (1.564) data 0.000 (0.002) loss 1.0547 (1.1555) acc 68.7500 (71.2124) lr 1.4818e-03 eta 13:36:58 +epoch [19/50] batch [670/1000] time 1.555 (1.564) data 0.000 (0.002) loss 1.4893 (1.1566) acc 71.8750 (71.1894) lr 1.4818e-03 eta 13:36:51 +epoch [19/50] batch [675/1000] time 1.559 (1.564) data 0.000 (0.002) loss 1.8682 (1.1582) acc 59.3750 (71.1667) lr 1.4818e-03 eta 13:36:44 +epoch [19/50] batch [680/1000] time 1.557 (1.564) data 0.000 (0.002) loss 1.5264 (1.1575) acc 65.6250 (71.1949) lr 1.4818e-03 eta 13:36:37 +epoch [19/50] batch [685/1000] time 1.565 (1.564) data 0.000 (0.002) loss 1.1045 (1.1579) acc 78.1250 (71.1816) lr 1.4818e-03 eta 13:36:28 +epoch [19/50] batch [690/1000] time 1.546 (1.564) data 0.001 (0.002) loss 0.8628 (1.1589) acc 84.3750 (71.1685) lr 1.4818e-03 eta 13:36:19 +epoch [19/50] batch [695/1000] time 1.555 (1.564) data 0.000 (0.002) loss 0.9111 (1.1569) acc 78.1250 (71.2050) lr 1.4818e-03 eta 13:36:11 +epoch [19/50] batch [700/1000] time 1.537 (1.564) data 0.000 (0.002) loss 0.9497 (1.1562) acc 75.0000 (71.2009) lr 1.4818e-03 eta 13:36:01 +epoch [19/50] batch [705/1000] time 1.556 (1.564) data 0.001 (0.002) loss 1.2314 (1.1549) acc 71.8750 (71.2145) lr 1.4818e-03 eta 13:35:50 +epoch [19/50] batch [710/1000] time 1.559 (1.564) data 0.001 (0.002) loss 0.7158 (1.1542) acc 68.7500 (71.2060) lr 1.4818e-03 eta 13:35:49 +epoch [19/50] batch [715/1000] time 1.537 (1.564) data 0.000 (0.002) loss 1.4609 (1.1537) acc 65.6250 (71.2281) lr 1.4818e-03 eta 13:35:37 +epoch [19/50] batch [720/1000] time 1.557 (1.564) data 0.001 (0.002) loss 1.3096 (1.1525) acc 65.6250 (71.2500) lr 1.4818e-03 eta 13:35:27 +epoch [19/50] batch [725/1000] time 1.560 (1.564) data 0.000 (0.002) loss 0.7061 (1.1508) acc 75.0000 (71.2629) lr 1.4818e-03 eta 13:35:18 +epoch [19/50] batch [730/1000] time 1.557 (1.564) data 0.001 (0.002) loss 0.6411 (1.1494) acc 84.3750 (71.2971) lr 1.4818e-03 eta 13:35:10 +epoch [19/50] batch [735/1000] time 1.577 (1.564) data 0.001 (0.002) loss 1.2930 (1.1508) acc 68.7500 (71.2670) lr 1.4818e-03 eta 13:35:05 +epoch [19/50] batch [740/1000] time 1.570 (1.564) data 0.000 (0.002) loss 1.1650 (1.1502) acc 78.1250 (71.2922) lr 1.4818e-03 eta 13:34:58 +epoch [19/50] batch [745/1000] time 1.582 (1.564) data 0.000 (0.002) loss 1.4580 (1.1527) acc 62.5000 (71.2542) lr 1.4818e-03 eta 13:34:50 +epoch [19/50] batch [750/1000] time 1.571 (1.564) data 0.000 (0.002) loss 1.0488 (1.1531) acc 75.0000 (71.2500) lr 1.4818e-03 eta 13:34:43 +epoch [19/50] batch [755/1000] time 1.564 (1.564) data 0.000 (0.002) loss 0.8096 (1.1516) acc 75.0000 (71.2666) lr 1.4818e-03 eta 13:34:36 +epoch [19/50] batch [760/1000] time 1.561 (1.564) data 0.000 (0.002) loss 1.2227 (1.1514) acc 78.1250 (71.3035) lr 1.4818e-03 eta 13:34:28 +epoch [19/50] batch [765/1000] time 1.556 (1.564) data 0.000 (0.002) loss 1.1221 (1.1505) acc 78.1250 (71.3276) lr 1.4818e-03 eta 13:34:19 +epoch [19/50] batch [770/1000] time 1.549 (1.564) data 0.000 (0.002) loss 1.2148 (1.1508) acc 68.7500 (71.3312) lr 1.4818e-03 eta 13:34:17 +epoch [19/50] batch [775/1000] time 1.551 (1.564) data 0.000 (0.002) loss 1.3730 (1.1518) acc 62.5000 (71.2984) lr 1.4818e-03 eta 13:34:07 +epoch [19/50] batch [780/1000] time 1.557 (1.564) data 0.000 (0.002) loss 1.0420 (1.1509) acc 68.7500 (71.3141) lr 1.4818e-03 eta 13:33:58 +epoch [19/50] batch [785/1000] time 1.543 (1.564) data 0.001 (0.002) loss 1.4121 (1.1512) acc 65.6250 (71.3177) lr 1.4818e-03 eta 13:33:50 +epoch [19/50] batch [790/1000] time 1.543 (1.564) data 0.000 (0.002) loss 0.7432 (1.1510) acc 84.3750 (71.3410) lr 1.4818e-03 eta 13:33:41 +epoch [19/50] batch [795/1000] time 1.556 (1.564) data 0.000 (0.002) loss 1.2275 (1.1499) acc 75.0000 (71.3679) lr 1.4818e-03 eta 13:33:31 +epoch [19/50] batch [800/1000] time 1.559 (1.564) data 0.000 (0.002) loss 0.4622 (1.1478) acc 84.3750 (71.4023) lr 1.4818e-03 eta 13:33:21 +epoch [19/50] batch [805/1000] time 1.548 (1.564) data 0.000 (0.002) loss 1.4775 (1.1485) acc 56.2500 (71.3781) lr 1.4818e-03 eta 13:33:11 +epoch [19/50] batch [810/1000] time 1.592 (1.564) data 0.000 (0.002) loss 2.0625 (1.1491) acc 56.2500 (71.3927) lr 1.4818e-03 eta 13:33:04 +epoch [19/50] batch [815/1000] time 1.550 (1.564) data 0.000 (0.002) loss 1.0391 (1.1487) acc 71.8750 (71.3880) lr 1.4818e-03 eta 13:33:01 +epoch [19/50] batch [820/1000] time 1.557 (1.564) data 0.000 (0.002) loss 1.4824 (1.1482) acc 71.8750 (71.3948) lr 1.4818e-03 eta 13:32:52 +epoch [19/50] batch [825/1000] time 1.542 (1.564) data 0.000 (0.001) loss 0.9521 (1.1467) acc 75.0000 (71.4091) lr 1.4818e-03 eta 13:32:42 +epoch [19/50] batch [830/1000] time 1.542 (1.564) data 0.001 (0.001) loss 1.9248 (1.1472) acc 53.1250 (71.3855) lr 1.4818e-03 eta 13:32:33 +epoch [19/50] batch [835/1000] time 1.573 (1.564) data 0.000 (0.001) loss 1.6309 (1.1477) acc 56.2500 (71.3922) lr 1.4818e-03 eta 13:32:23 +epoch [19/50] batch [840/1000] time 1.547 (1.564) data 0.000 (0.001) loss 1.1309 (1.1467) acc 68.7500 (71.4174) lr 1.4818e-03 eta 13:32:13 +epoch [19/50] batch [845/1000] time 1.572 (1.564) data 0.000 (0.001) loss 1.3877 (1.1475) acc 68.7500 (71.4238) lr 1.4818e-03 eta 13:32:04 +epoch [19/50] batch [850/1000] time 1.544 (1.564) data 0.000 (0.001) loss 0.9424 (1.1466) acc 78.1250 (71.4412) lr 1.4818e-03 eta 13:31:55 +epoch [19/50] batch [855/1000] time 1.541 (1.564) data 0.000 (0.001) loss 1.4580 (1.1476) acc 68.7500 (71.4145) lr 1.4818e-03 eta 13:31:47 +epoch [19/50] batch [860/1000] time 1.553 (1.564) data 0.001 (0.001) loss 0.9722 (1.1470) acc 68.7500 (71.4172) lr 1.4818e-03 eta 13:31:45 +epoch [19/50] batch [865/1000] time 1.569 (1.564) data 0.000 (0.001) loss 1.1309 (1.1474) acc 71.8750 (71.4126) lr 1.4818e-03 eta 13:31:34 +epoch [19/50] batch [870/1000] time 1.569 (1.564) data 0.001 (0.001) loss 1.5078 (1.1477) acc 81.2500 (71.4224) lr 1.4818e-03 eta 13:31:26 +epoch [19/50] batch [875/1000] time 1.541 (1.564) data 0.000 (0.001) loss 0.6401 (1.1466) acc 84.3750 (71.4500) lr 1.4818e-03 eta 13:31:17 +epoch [19/50] batch [880/1000] time 1.571 (1.564) data 0.000 (0.001) loss 1.0557 (1.1447) acc 75.0000 (71.4808) lr 1.4818e-03 eta 13:31:09 +epoch [19/50] batch [885/1000] time 1.558 (1.564) data 0.001 (0.001) loss 0.5972 (1.1426) acc 84.3750 (71.5078) lr 1.4818e-03 eta 13:31:00 +epoch [19/50] batch [890/1000] time 1.567 (1.564) data 0.000 (0.001) loss 1.2178 (1.1420) acc 71.8750 (71.5204) lr 1.4818e-03 eta 13:30:50 +epoch [19/50] batch [895/1000] time 1.578 (1.564) data 0.000 (0.001) loss 1.1094 (1.1428) acc 75.0000 (71.5154) lr 1.4818e-03 eta 13:30:42 +epoch [19/50] batch [900/1000] time 1.553 (1.564) data 0.001 (0.001) loss 1.1758 (1.1436) acc 68.7500 (71.4896) lr 1.4818e-03 eta 13:30:33 +epoch [19/50] batch [905/1000] time 1.564 (1.564) data 0.001 (0.001) loss 1.8291 (1.1444) acc 59.3750 (71.4779) lr 1.4818e-03 eta 13:30:23 +epoch [19/50] batch [910/1000] time 1.562 (1.564) data 0.000 (0.001) loss 0.8101 (1.1438) acc 81.2500 (71.4973) lr 1.4818e-03 eta 13:30:14 +epoch [19/50] batch [915/1000] time 1.556 (1.564) data 0.000 (0.001) loss 0.8306 (1.1420) acc 75.0000 (71.5198) lr 1.4818e-03 eta 13:30:08 +epoch [19/50] batch [920/1000] time 1.723 (1.564) data 0.000 (0.001) loss 0.9258 (1.1408) acc 78.1250 (71.5319) lr 1.4818e-03 eta 13:30:03 +epoch [19/50] batch [925/1000] time 1.557 (1.564) data 0.001 (0.001) loss 1.5225 (1.1414) acc 75.0000 (71.5338) lr 1.4818e-03 eta 13:29:56 +epoch [19/50] batch [930/1000] time 1.566 (1.564) data 0.000 (0.001) loss 1.1357 (1.1405) acc 75.0000 (71.5491) lr 1.4818e-03 eta 13:29:48 +epoch [19/50] batch [935/1000] time 1.573 (1.564) data 0.000 (0.001) loss 1.2910 (1.1415) acc 68.7500 (71.5341) lr 1.4818e-03 eta 13:29:43 +epoch [19/50] batch [940/1000] time 1.567 (1.564) data 0.000 (0.001) loss 1.3662 (1.1418) acc 59.3750 (71.5226) lr 1.4818e-03 eta 13:29:35 +epoch [19/50] batch [945/1000] time 1.557 (1.564) data 0.000 (0.001) loss 0.6045 (1.1406) acc 84.3750 (71.5443) lr 1.4818e-03 eta 13:29:26 +epoch [19/50] batch [950/1000] time 1.578 (1.564) data 0.000 (0.001) loss 1.4854 (1.1401) acc 71.8750 (71.5493) lr 1.4818e-03 eta 13:29:19 +epoch [19/50] batch [955/1000] time 1.554 (1.564) data 0.000 (0.001) loss 0.9868 (1.1409) acc 75.0000 (71.5314) lr 1.4818e-03 eta 13:29:11 +epoch [19/50] batch [960/1000] time 1.566 (1.564) data 0.001 (0.001) loss 1.3936 (1.1421) acc 68.7500 (71.5072) lr 1.4818e-03 eta 13:29:02 +epoch [19/50] batch [965/1000] time 1.725 (1.564) data 0.001 (0.001) loss 0.9302 (1.1428) acc 71.8750 (71.4929) lr 1.4818e-03 eta 13:29:00 +epoch [19/50] batch [970/1000] time 1.569 (1.564) data 0.000 (0.001) loss 1.1279 (1.1430) acc 68.7500 (71.4820) lr 1.4818e-03 eta 13:28:51 +epoch [19/50] batch [975/1000] time 1.561 (1.564) data 0.000 (0.001) loss 1.1865 (1.1426) acc 68.7500 (71.4744) lr 1.4818e-03 eta 13:28:43 +epoch [19/50] batch [980/1000] time 1.545 (1.564) data 0.000 (0.001) loss 1.3125 (1.1430) acc 68.7500 (71.4541) lr 1.4818e-03 eta 13:28:35 +epoch [19/50] batch [985/1000] time 1.554 (1.564) data 0.001 (0.001) loss 1.8486 (1.1429) acc 62.5000 (71.4530) lr 1.4818e-03 eta 13:28:26 +epoch [19/50] batch [990/1000] time 1.534 (1.564) data 0.000 (0.001) loss 1.1592 (1.1425) acc 75.0000 (71.4741) lr 1.4818e-03 eta 13:28:17 +epoch [19/50] batch [995/1000] time 1.560 (1.564) data 0.000 (0.001) loss 1.0000 (1.1414) acc 78.1250 (71.5107) lr 1.4818e-03 eta 13:28:08 +epoch [19/50] batch [1000/1000] time 1.566 (1.564) data 0.000 (0.001) loss 0.8564 (1.1408) acc 84.3750 (71.5344) lr 1.4258e-03 eta 13:27:59 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,203 +* accuracy: 78.4% +* error: 21.6% +* macro_f1: 77.9% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [20/50] batch [5/1000] time 1.553 (1.692) data 0.000 (0.190) loss 1.2295 (1.0469) acc 65.6250 (75.6250) lr 1.4258e-03 eta 14:34:07 +epoch [20/50] batch [10/1000] time 1.580 (1.629) data 0.001 (0.095) loss 0.7710 (1.0288) acc 81.2500 (75.3125) lr 1.4258e-03 eta 14:01:19 +epoch [20/50] batch [15/1000] time 1.544 (1.604) data 0.000 (0.064) loss 1.4014 (1.1186) acc 59.3750 (73.1250) lr 1.4258e-03 eta 13:48:19 +epoch [20/50] batch [20/1000] time 1.590 (1.594) data 0.001 (0.048) loss 1.0664 (1.0892) acc 65.6250 (72.8125) lr 1.4258e-03 eta 13:43:17 +epoch [20/50] batch [25/1000] time 1.558 (1.586) data 0.001 (0.039) loss 0.8945 (1.0655) acc 78.1250 (73.0000) lr 1.4258e-03 eta 13:38:47 +epoch [20/50] batch [30/1000] time 1.552 (1.580) data 0.000 (0.032) loss 1.0898 (1.0760) acc 71.8750 (72.3958) lr 1.4258e-03 eta 13:35:43 +epoch [20/50] batch [35/1000] time 1.563 (1.577) data 0.000 (0.028) loss 1.1826 (1.0812) acc 78.1250 (72.8571) lr 1.4258e-03 eta 13:33:57 +epoch [20/50] batch [40/1000] time 1.545 (1.574) data 0.001 (0.024) loss 1.2607 (1.0624) acc 75.0000 (73.2031) lr 1.4258e-03 eta 13:31:57 +epoch [20/50] batch [45/1000] time 1.560 (1.578) data 0.000 (0.022) loss 1.5566 (1.0645) acc 59.3750 (72.8472) lr 1.4258e-03 eta 13:33:59 +epoch [20/50] batch [50/1000] time 1.568 (1.575) data 0.000 (0.020) loss 0.6997 (1.0830) acc 84.3750 (72.7500) lr 1.4258e-03 eta 13:32:27 +epoch [20/50] batch [55/1000] time 1.560 (1.574) data 0.001 (0.018) loss 1.1436 (1.0682) acc 75.0000 (72.9545) lr 1.4258e-03 eta 13:31:51 +epoch [20/50] batch [60/1000] time 1.549 (1.573) data 0.001 (0.016) loss 0.4839 (1.0774) acc 81.2500 (72.6042) lr 1.4258e-03 eta 13:31:00 +epoch [20/50] batch [65/1000] time 1.584 (1.572) data 0.001 (0.015) loss 1.3896 (1.0900) acc 62.5000 (71.9231) lr 1.4258e-03 eta 13:30:29 +epoch [20/50] batch [70/1000] time 1.561 (1.571) data 0.000 (0.014) loss 1.4717 (1.0883) acc 62.5000 (71.9643) lr 1.4258e-03 eta 13:29:59 +epoch [20/50] batch [75/1000] time 1.537 (1.570) data 0.001 (0.013) loss 0.8286 (1.0839) acc 75.0000 (72.0417) lr 1.4258e-03 eta 13:29:07 +epoch [20/50] batch [80/1000] time 1.562 (1.569) data 0.000 (0.012) loss 1.5967 (1.0940) acc 71.8750 (71.8750) lr 1.4258e-03 eta 13:28:34 +epoch [20/50] batch [85/1000] time 1.553 (1.568) data 0.000 (0.012) loss 1.0049 (1.0944) acc 75.0000 (71.9485) lr 1.4258e-03 eta 13:27:54 +epoch [20/50] batch [90/1000] time 1.590 (1.568) data 0.000 (0.011) loss 1.1455 (1.1006) acc 81.2500 (72.1181) lr 1.4258e-03 eta 13:27:34 +epoch [20/50] batch [95/1000] time 1.539 (1.566) data 0.000 (0.010) loss 0.8203 (1.0961) acc 78.1250 (72.3026) lr 1.4258e-03 eta 13:26:50 +epoch [20/50] batch [100/1000] time 1.581 (1.566) data 0.001 (0.010) loss 0.5547 (1.1168) acc 87.5000 (71.9062) lr 1.4258e-03 eta 13:26:36 +epoch [20/50] batch [105/1000] time 1.560 (1.566) data 0.000 (0.010) loss 1.9004 (1.1233) acc 62.5000 (71.9345) lr 1.4258e-03 eta 13:26:21 +epoch [20/50] batch [110/1000] time 1.534 (1.567) data 0.000 (0.009) loss 0.9800 (1.1181) acc 75.0000 (71.9886) lr 1.4258e-03 eta 13:26:35 +epoch [20/50] batch [115/1000] time 1.550 (1.566) data 0.000 (0.009) loss 1.1533 (1.1232) acc 78.1250 (72.0380) lr 1.4258e-03 eta 13:26:03 +epoch [20/50] batch [120/1000] time 1.560 (1.566) data 0.000 (0.008) loss 0.8208 (1.1195) acc 75.0000 (72.2656) lr 1.4258e-03 eta 13:25:47 +epoch [20/50] batch [125/1000] time 1.536 (1.565) data 0.000 (0.008) loss 1.1934 (1.1251) acc 75.0000 (72.1750) lr 1.4258e-03 eta 13:25:22 +epoch [20/50] batch [130/1000] time 1.554 (1.565) data 0.000 (0.008) loss 1.2852 (1.1227) acc 65.6250 (72.1394) lr 1.4258e-03 eta 13:24:58 +epoch [20/50] batch [135/1000] time 1.532 (1.564) data 0.000 (0.008) loss 1.3203 (1.1294) acc 65.6250 (72.0602) lr 1.4258e-03 eta 13:24:34 +epoch [20/50] batch [140/1000] time 1.546 (1.564) data 0.000 (0.007) loss 1.1465 (1.1351) acc 68.7500 (71.8973) lr 1.4258e-03 eta 13:24:10 +epoch [20/50] batch [145/1000] time 1.560 (1.563) data 0.000 (0.007) loss 0.6187 (1.1437) acc 84.3750 (71.7672) lr 1.4258e-03 eta 13:23:54 +epoch [20/50] batch [150/1000] time 1.547 (1.563) data 0.000 (0.007) loss 0.4736 (1.1386) acc 87.5000 (71.8333) lr 1.4258e-03 eta 13:23:43 +epoch [20/50] batch [155/1000] time 1.551 (1.564) data 0.000 (0.007) loss 1.3809 (1.1398) acc 65.6250 (71.8347) lr 1.4258e-03 eta 13:23:53 +epoch [20/50] batch [160/1000] time 1.573 (1.564) data 0.000 (0.006) loss 0.9297 (1.1348) acc 84.3750 (72.0312) lr 1.4258e-03 eta 13:23:46 +epoch [20/50] batch [165/1000] time 1.566 (1.564) data 0.000 (0.006) loss 0.6577 (1.1304) acc 78.1250 (72.0644) lr 1.4258e-03 eta 13:23:47 +epoch [20/50] batch [170/1000] time 1.568 (1.564) data 0.000 (0.006) loss 1.3711 (1.1353) acc 65.6250 (71.9853) lr 1.4258e-03 eta 13:23:31 +epoch [20/50] batch [175/1000] time 1.558 (1.564) data 0.001 (0.006) loss 0.9375 (1.1357) acc 81.2500 (71.9107) lr 1.4258e-03 eta 13:23:18 +epoch [20/50] batch [180/1000] time 1.573 (1.563) data 0.000 (0.006) loss 0.8022 (1.1341) acc 75.0000 (71.9271) lr 1.4258e-03 eta 13:23:06 +epoch [20/50] batch [185/1000] time 1.584 (1.564) data 0.000 (0.006) loss 0.9482 (1.1393) acc 75.0000 (71.9088) lr 1.4258e-03 eta 13:23:09 +epoch [20/50] batch [190/1000] time 1.573 (1.564) data 0.001 (0.005) loss 0.9458 (1.1405) acc 81.2500 (71.9408) lr 1.4258e-03 eta 13:23:04 +epoch [20/50] batch [195/1000] time 1.560 (1.564) data 0.001 (0.005) loss 0.8032 (1.1377) acc 78.1250 (71.8910) lr 1.4258e-03 eta 13:23:12 +epoch [20/50] batch [200/1000] time 1.564 (1.564) data 0.000 (0.005) loss 1.4365 (1.1384) acc 65.6250 (71.8594) lr 1.4258e-03 eta 13:23:00 +epoch [20/50] batch [205/1000] time 1.569 (1.564) data 0.000 (0.005) loss 1.4834 (1.1399) acc 65.6250 (71.6921) lr 1.4258e-03 eta 13:22:43 +epoch [20/50] batch [210/1000] time 1.563 (1.564) data 0.000 (0.005) loss 1.1973 (1.1441) acc 75.0000 (71.5774) lr 1.4258e-03 eta 13:22:27 +epoch [20/50] batch [215/1000] time 1.586 (1.564) data 0.000 (0.005) loss 1.1514 (1.1469) acc 68.7500 (71.5262) lr 1.4258e-03 eta 13:22:21 +epoch [20/50] batch [220/1000] time 1.553 (1.564) data 0.000 (0.005) loss 1.2910 (1.1478) acc 59.3750 (71.5341) lr 1.4258e-03 eta 13:22:17 +epoch [20/50] batch [225/1000] time 1.559 (1.564) data 0.001 (0.005) loss 0.8120 (1.1501) acc 81.2500 (71.4861) lr 1.4258e-03 eta 13:22:03 +epoch [20/50] batch [230/1000] time 1.539 (1.564) data 0.000 (0.005) loss 0.9355 (1.1459) acc 78.1250 (71.5489) lr 1.4258e-03 eta 13:21:49 +epoch [20/50] batch [235/1000] time 1.549 (1.564) data 0.000 (0.005) loss 1.1221 (1.1464) acc 78.1250 (71.5957) lr 1.4258e-03 eta 13:21:41 +epoch [20/50] batch [240/1000] time 1.576 (1.564) data 0.000 (0.004) loss 0.7773 (1.1415) acc 84.3750 (71.6927) lr 1.4258e-03 eta 13:21:36 +epoch [20/50] batch [245/1000] time 1.571 (1.564) data 0.000 (0.004) loss 0.9639 (1.1401) acc 81.2500 (71.7092) lr 1.4258e-03 eta 13:21:26 +epoch [20/50] batch [250/1000] time 1.557 (1.563) data 0.000 (0.004) loss 0.6265 (1.1390) acc 81.2500 (71.7125) lr 1.4258e-03 eta 13:21:12 +epoch [20/50] batch [255/1000] time 1.549 (1.563) data 0.000 (0.004) loss 1.1309 (1.1411) acc 71.8750 (71.6667) lr 1.4258e-03 eta 13:20:59 +epoch [20/50] batch [260/1000] time 1.559 (1.564) data 0.000 (0.004) loss 0.5801 (1.1398) acc 81.2500 (71.7067) lr 1.4258e-03 eta 13:21:07 +epoch [20/50] batch [265/1000] time 1.551 (1.564) data 0.000 (0.004) loss 1.1484 (1.1380) acc 75.0000 (71.7217) lr 1.4258e-03 eta 13:20:57 +epoch [20/50] batch [270/1000] time 1.537 (1.563) data 0.000 (0.004) loss 1.5273 (1.1427) acc 68.7500 (71.6319) lr 1.4258e-03 eta 13:20:42 +epoch [20/50] batch [275/1000] time 1.544 (1.563) data 0.001 (0.004) loss 0.7998 (1.1424) acc 75.0000 (71.6250) lr 1.4258e-03 eta 13:20:29 +epoch [20/50] batch [280/1000] time 1.578 (1.563) data 0.001 (0.004) loss 0.8530 (1.1446) acc 71.8750 (71.6295) lr 1.4258e-03 eta 13:20:15 +epoch [20/50] batch [285/1000] time 1.562 (1.563) data 0.001 (0.004) loss 0.7422 (1.1372) acc 78.1250 (71.7654) lr 1.4258e-03 eta 13:20:00 +epoch [20/50] batch [290/1000] time 1.589 (1.563) data 0.000 (0.004) loss 0.9658 (1.1346) acc 71.8750 (71.7996) lr 1.4258e-03 eta 13:19:54 +epoch [20/50] batch [295/1000] time 1.568 (1.563) data 0.000 (0.004) loss 0.9795 (1.1344) acc 78.1250 (71.8220) lr 1.4258e-03 eta 13:19:37 +epoch [20/50] batch [300/1000] time 1.559 (1.562) data 0.000 (0.004) loss 1.0566 (1.1343) acc 68.7500 (71.8229) lr 1.4258e-03 eta 13:19:22 +epoch [20/50] batch [305/1000] time 1.563 (1.563) data 0.001 (0.004) loss 1.3672 (1.1355) acc 68.7500 (71.8545) lr 1.4258e-03 eta 13:19:27 +epoch [20/50] batch [310/1000] time 1.548 (1.562) data 0.000 (0.004) loss 0.7173 (1.1336) acc 87.5000 (71.8851) lr 1.4258e-03 eta 13:19:12 +epoch [20/50] batch [315/1000] time 1.559 (1.562) data 0.000 (0.003) loss 1.0684 (1.1341) acc 68.7500 (71.8452) lr 1.4258e-03 eta 13:18:57 +epoch [20/50] batch [320/1000] time 1.555 (1.562) data 0.000 (0.003) loss 1.6709 (1.1366) acc 62.5000 (71.7676) lr 1.4258e-03 eta 13:18:40 +epoch [20/50] batch [325/1000] time 1.568 (1.562) data 0.001 (0.003) loss 1.0879 (1.1334) acc 65.6250 (71.8173) lr 1.4258e-03 eta 13:18:32 +epoch [20/50] batch [330/1000] time 1.558 (1.562) data 0.001 (0.003) loss 0.6050 (1.1305) acc 78.1250 (71.8466) lr 1.4258e-03 eta 13:18:26 +epoch [20/50] batch [335/1000] time 1.595 (1.562) data 0.000 (0.003) loss 0.9097 (1.1338) acc 68.7500 (71.8097) lr 1.4258e-03 eta 13:18:21 +epoch [20/50] batch [340/1000] time 1.579 (1.562) data 0.000 (0.003) loss 1.2188 (1.1350) acc 68.7500 (71.8199) lr 1.4258e-03 eta 13:18:17 +epoch [20/50] batch [345/1000] time 1.720 (1.563) data 0.000 (0.003) loss 1.9912 (1.1375) acc 53.1250 (71.7663) lr 1.4258e-03 eta 13:18:21 +epoch [20/50] batch [350/1000] time 1.561 (1.563) data 0.000 (0.003) loss 1.0830 (1.1377) acc 75.0000 (71.7500) lr 1.4258e-03 eta 13:18:13 +epoch [20/50] batch [355/1000] time 1.555 (1.563) data 0.000 (0.003) loss 1.3066 (1.1380) acc 65.6250 (71.7342) lr 1.4258e-03 eta 13:18:04 +epoch [20/50] batch [360/1000] time 1.563 (1.562) data 0.001 (0.003) loss 1.4590 (1.1387) acc 68.7500 (71.7274) lr 1.4258e-03 eta 13:17:54 +epoch [20/50] batch [365/1000] time 1.566 (1.563) data 0.001 (0.003) loss 0.8491 (1.1419) acc 75.0000 (71.6866) lr 1.4258e-03 eta 13:17:49 +epoch [20/50] batch [370/1000] time 1.560 (1.563) data 0.000 (0.003) loss 1.4492 (1.1419) acc 71.8750 (71.7230) lr 1.4258e-03 eta 13:17:41 +epoch [20/50] batch [375/1000] time 1.565 (1.562) data 0.000 (0.003) loss 1.7256 (1.1424) acc 59.3750 (71.7250) lr 1.4258e-03 eta 13:17:29 +epoch [20/50] batch [380/1000] time 1.564 (1.562) data 0.000 (0.003) loss 1.0430 (1.1437) acc 68.7500 (71.6941) lr 1.4258e-03 eta 13:17:20 +epoch [20/50] batch [385/1000] time 1.550 (1.562) data 0.001 (0.003) loss 1.2979 (1.1442) acc 71.8750 (71.6721) lr 1.4258e-03 eta 13:17:10 +epoch [20/50] batch [390/1000] time 1.560 (1.562) data 0.001 (0.003) loss 1.3555 (1.1442) acc 65.6250 (71.6667) lr 1.4258e-03 eta 13:17:00 +epoch [20/50] batch [395/1000] time 1.557 (1.562) data 0.001 (0.003) loss 1.4453 (1.1430) acc 59.3750 (71.6614) lr 1.4258e-03 eta 13:16:53 +epoch [20/50] batch [400/1000] time 1.543 (1.562) data 0.001 (0.003) loss 1.2812 (1.1431) acc 71.8750 (71.6406) lr 1.4258e-03 eta 13:16:43 +epoch [20/50] batch [405/1000] time 1.560 (1.562) data 0.001 (0.003) loss 1.4570 (1.1437) acc 68.7500 (71.6512) lr 1.4258e-03 eta 13:16:30 +epoch [20/50] batch [410/1000] time 1.571 (1.562) data 0.000 (0.003) loss 1.2041 (1.1478) acc 62.5000 (71.5244) lr 1.4258e-03 eta 13:16:33 +epoch [20/50] batch [415/1000] time 1.566 (1.562) data 0.000 (0.003) loss 0.6050 (1.1443) acc 81.2500 (71.5663) lr 1.4258e-03 eta 13:16:26 +epoch [20/50] batch [420/1000] time 1.570 (1.562) data 0.000 (0.003) loss 1.2188 (1.1439) acc 68.7500 (71.5997) lr 1.4258e-03 eta 13:16:19 +epoch [20/50] batch [425/1000] time 1.573 (1.563) data 0.000 (0.003) loss 1.3848 (1.1470) acc 62.5000 (71.5441) lr 1.4258e-03 eta 13:16:14 +epoch [20/50] batch [430/1000] time 1.570 (1.563) data 0.000 (0.003) loss 0.9229 (1.1462) acc 71.8750 (71.5698) lr 1.4258e-03 eta 13:16:08 +epoch [20/50] batch [435/1000] time 1.582 (1.563) data 0.000 (0.003) loss 1.2539 (1.1446) acc 68.7500 (71.5805) lr 1.4258e-03 eta 13:15:59 +epoch [20/50] batch [440/1000] time 1.553 (1.563) data 0.001 (0.003) loss 1.1035 (1.1443) acc 71.8750 (71.5483) lr 1.4258e-03 eta 13:15:51 +epoch [20/50] batch [445/1000] time 1.551 (1.563) data 0.001 (0.003) loss 1.2949 (1.1466) acc 56.2500 (71.4677) lr 1.4258e-03 eta 13:15:44 +epoch [20/50] batch [450/1000] time 1.569 (1.563) data 0.000 (0.003) loss 0.8501 (1.1448) acc 78.1250 (71.4931) lr 1.4258e-03 eta 13:15:38 +epoch [20/50] batch [455/1000] time 1.565 (1.563) data 0.000 (0.003) loss 1.1133 (1.1466) acc 81.2500 (71.5247) lr 1.4258e-03 eta 13:15:43 +epoch [20/50] batch [460/1000] time 1.553 (1.563) data 0.001 (0.003) loss 1.8154 (1.1507) acc 62.5000 (71.3927) lr 1.4258e-03 eta 13:15:35 +epoch [20/50] batch [465/1000] time 1.587 (1.563) data 0.000 (0.003) loss 0.5010 (1.1494) acc 81.2500 (71.4516) lr 1.4258e-03 eta 13:15:31 +epoch [20/50] batch [470/1000] time 1.563 (1.563) data 0.000 (0.002) loss 1.2979 (1.1507) acc 65.6250 (71.4096) lr 1.4258e-03 eta 13:15:21 +epoch [20/50] batch [475/1000] time 1.524 (1.563) data 0.000 (0.002) loss 0.7769 (1.1496) acc 78.1250 (71.4211) lr 1.4258e-03 eta 13:15:08 +epoch [20/50] batch [480/1000] time 1.581 (1.563) data 0.001 (0.002) loss 0.9336 (1.1490) acc 75.0000 (71.4583) lr 1.4258e-03 eta 13:15:00 +epoch [20/50] batch [485/1000] time 1.549 (1.563) data 0.000 (0.002) loss 1.3262 (1.1491) acc 62.5000 (71.4369) lr 1.4258e-03 eta 13:14:49 +epoch [20/50] batch [490/1000] time 1.577 (1.563) data 0.001 (0.002) loss 1.2061 (1.1507) acc 75.0000 (71.3967) lr 1.4258e-03 eta 13:14:42 +epoch [20/50] batch [495/1000] time 1.546 (1.563) data 0.000 (0.002) loss 1.4160 (1.1497) acc 68.7500 (71.4141) lr 1.4258e-03 eta 13:14:32 +epoch [20/50] batch [500/1000] time 1.567 (1.563) data 0.000 (0.002) loss 1.0244 (1.1479) acc 75.0000 (71.4313) lr 1.4258e-03 eta 13:14:31 +epoch [20/50] batch [505/1000] time 1.571 (1.563) data 0.000 (0.002) loss 0.8257 (1.1475) acc 71.8750 (71.3800) lr 1.4258e-03 eta 13:14:23 +epoch [20/50] batch [510/1000] time 1.555 (1.563) data 0.000 (0.002) loss 1.4717 (1.1496) acc 59.3750 (71.3174) lr 1.4258e-03 eta 13:14:16 +epoch [20/50] batch [515/1000] time 1.550 (1.563) data 0.000 (0.002) loss 1.1221 (1.1491) acc 75.0000 (71.3471) lr 1.4258e-03 eta 13:14:07 +epoch [20/50] batch [520/1000] time 1.579 (1.563) data 0.000 (0.002) loss 1.0205 (1.1490) acc 78.1250 (71.3702) lr 1.4258e-03 eta 13:13:56 +epoch [20/50] batch [525/1000] time 1.539 (1.563) data 0.000 (0.002) loss 0.9438 (1.1477) acc 75.0000 (71.4464) lr 1.4258e-03 eta 13:13:46 +epoch [20/50] batch [530/1000] time 1.544 (1.563) data 0.000 (0.002) loss 1.5010 (1.1487) acc 71.8750 (71.4387) lr 1.4258e-03 eta 13:13:35 +epoch [20/50] batch [535/1000] time 1.538 (1.563) data 0.000 (0.002) loss 1.1309 (1.1478) acc 75.0000 (71.4544) lr 1.4258e-03 eta 13:13:22 +epoch [20/50] batch [540/1000] time 1.536 (1.562) data 0.000 (0.002) loss 1.4150 (1.1487) acc 75.0000 (71.4525) lr 1.4258e-03 eta 13:13:13 +epoch [20/50] batch [545/1000] time 1.540 (1.562) data 0.000 (0.002) loss 0.7983 (1.1481) acc 75.0000 (71.4392) lr 1.4258e-03 eta 13:13:00 +epoch [20/50] batch [550/1000] time 1.553 (1.562) data 0.000 (0.002) loss 1.0059 (1.1474) acc 78.1250 (71.4602) lr 1.4258e-03 eta 13:12:51 +epoch [20/50] batch [555/1000] time 1.541 (1.562) data 0.000 (0.002) loss 1.3623 (1.1458) acc 62.5000 (71.4865) lr 1.4258e-03 eta 13:12:45 +epoch [20/50] batch [560/1000] time 1.569 (1.563) data 0.001 (0.002) loss 1.2441 (1.1468) acc 71.8750 (71.4900) lr 1.4258e-03 eta 13:12:45 +epoch [20/50] batch [565/1000] time 1.557 (1.563) data 0.000 (0.002) loss 0.8813 (1.1466) acc 75.0000 (71.5100) lr 1.4258e-03 eta 13:12:38 +epoch [20/50] batch [570/1000] time 1.559 (1.563) data 0.001 (0.002) loss 1.0361 (1.1459) acc 68.7500 (71.4967) lr 1.4258e-03 eta 13:12:27 +epoch [20/50] batch [575/1000] time 1.546 (1.562) data 0.000 (0.002) loss 1.1602 (1.1451) acc 75.0000 (71.5272) lr 1.4258e-03 eta 13:12:16 +epoch [20/50] batch [580/1000] time 1.554 (1.562) data 0.000 (0.002) loss 0.9312 (1.1437) acc 81.2500 (71.5787) lr 1.4258e-03 eta 13:12:06 +epoch [20/50] batch [585/1000] time 1.583 (1.562) data 0.000 (0.002) loss 0.7441 (1.1413) acc 78.1250 (71.6400) lr 1.4258e-03 eta 13:12:00 +epoch [20/50] batch [590/1000] time 1.562 (1.562) data 0.000 (0.002) loss 1.0195 (1.1393) acc 78.1250 (71.6843) lr 1.4258e-03 eta 13:11:52 +epoch [20/50] batch [595/1000] time 1.563 (1.562) data 0.000 (0.002) loss 1.3115 (1.1389) acc 65.6250 (71.6649) lr 1.4258e-03 eta 13:11:46 +epoch [20/50] batch [600/1000] time 1.570 (1.562) data 0.000 (0.002) loss 1.4961 (1.1392) acc 62.5000 (71.6250) lr 1.4258e-03 eta 13:11:36 +epoch [20/50] batch [605/1000] time 1.564 (1.563) data 0.000 (0.002) loss 1.3271 (1.1391) acc 65.6250 (71.6219) lr 1.4258e-03 eta 13:11:35 +epoch [20/50] batch [610/1000] time 1.576 (1.563) data 0.000 (0.002) loss 0.9736 (1.1406) acc 71.8750 (71.5369) lr 1.4258e-03 eta 13:11:28 +epoch [20/50] batch [615/1000] time 1.553 (1.563) data 0.000 (0.002) loss 1.9395 (1.1413) acc 65.6250 (71.5346) lr 1.4258e-03 eta 13:11:22 +epoch [20/50] batch [620/1000] time 1.551 (1.563) data 0.001 (0.002) loss 1.5742 (1.1419) acc 56.2500 (71.4768) lr 1.4258e-03 eta 13:11:11 +epoch [20/50] batch [625/1000] time 1.558 (1.563) data 0.000 (0.002) loss 1.2559 (1.1418) acc 78.1250 (71.5350) lr 1.4258e-03 eta 13:11:05 +epoch [20/50] batch [630/1000] time 1.565 (1.562) data 0.000 (0.002) loss 0.9004 (1.1415) acc 78.1250 (71.5228) lr 1.4258e-03 eta 13:10:52 +epoch [20/50] batch [635/1000] time 1.544 (1.563) data 0.000 (0.002) loss 1.0332 (1.1419) acc 68.7500 (71.5256) lr 1.4258e-03 eta 13:10:47 +epoch [20/50] batch [640/1000] time 1.543 (1.563) data 0.000 (0.002) loss 1.2197 (1.1405) acc 68.7500 (71.5674) lr 1.4258e-03 eta 13:10:37 +epoch [20/50] batch [645/1000] time 1.556 (1.562) data 0.000 (0.002) loss 0.8994 (1.1387) acc 75.0000 (71.5891) lr 1.4258e-03 eta 13:10:28 +epoch [20/50] batch [650/1000] time 1.536 (1.563) data 0.000 (0.002) loss 0.8276 (1.1382) acc 75.0000 (71.5913) lr 1.4258e-03 eta 13:10:25 +epoch [20/50] batch [655/1000] time 1.556 (1.563) data 0.001 (0.002) loss 0.8145 (1.1367) acc 71.8750 (71.5792) lr 1.4258e-03 eta 13:10:15 +epoch [20/50] batch [660/1000] time 1.575 (1.563) data 0.001 (0.002) loss 0.8467 (1.1362) acc 87.5000 (71.6335) lr 1.4258e-03 eta 13:10:08 +epoch [20/50] batch [665/1000] time 1.569 (1.562) data 0.000 (0.002) loss 0.9766 (1.1363) acc 75.0000 (71.6588) lr 1.4258e-03 eta 13:09:57 +epoch [20/50] batch [670/1000] time 1.556 (1.563) data 0.000 (0.002) loss 1.3955 (1.1378) acc 65.6250 (71.6465) lr 1.4258e-03 eta 13:09:51 +epoch [20/50] batch [675/1000] time 1.556 (1.562) data 0.000 (0.002) loss 1.3848 (1.1396) acc 65.6250 (71.6111) lr 1.4258e-03 eta 13:09:42 +epoch [20/50] batch [680/1000] time 1.543 (1.562) data 0.001 (0.002) loss 1.1543 (1.1399) acc 65.6250 (71.5671) lr 1.4258e-03 eta 13:09:30 +epoch [20/50] batch [685/1000] time 1.541 (1.562) data 0.000 (0.002) loss 1.4023 (1.1398) acc 65.6250 (71.5465) lr 1.4258e-03 eta 13:09:19 +epoch [20/50] batch [690/1000] time 1.545 (1.562) data 0.000 (0.002) loss 0.8525 (1.1382) acc 78.1250 (71.5489) lr 1.4258e-03 eta 13:09:07 +epoch [20/50] batch [695/1000] time 1.546 (1.562) data 0.000 (0.002) loss 1.4307 (1.1401) acc 65.6250 (71.5108) lr 1.4258e-03 eta 13:08:57 +epoch [20/50] batch [700/1000] time 1.534 (1.562) data 0.000 (0.002) loss 0.7451 (1.1396) acc 71.8750 (71.5268) lr 1.4258e-03 eta 13:08:47 +epoch [20/50] batch [705/1000] time 1.555 (1.562) data 0.000 (0.002) loss 1.4756 (1.1405) acc 65.6250 (71.5115) lr 1.4258e-03 eta 13:08:36 +epoch [20/50] batch [710/1000] time 1.715 (1.562) data 0.000 (0.002) loss 0.8960 (1.1387) acc 75.0000 (71.5493) lr 1.4258e-03 eta 13:08:35 +epoch [20/50] batch [715/1000] time 1.568 (1.562) data 0.001 (0.002) loss 0.6890 (1.1373) acc 78.1250 (71.5734) lr 1.4258e-03 eta 13:08:28 +epoch [20/50] batch [720/1000] time 1.562 (1.562) data 0.000 (0.002) loss 0.8730 (1.1371) acc 65.6250 (71.5712) lr 1.4258e-03 eta 13:08:20 +epoch [20/50] batch [725/1000] time 1.579 (1.562) data 0.000 (0.002) loss 1.5400 (1.1371) acc 59.3750 (71.5560) lr 1.4258e-03 eta 13:08:11 +epoch [20/50] batch [730/1000] time 1.555 (1.562) data 0.001 (0.002) loss 0.8682 (1.1354) acc 71.8750 (71.5753) lr 1.4258e-03 eta 13:08:01 +epoch [20/50] batch [735/1000] time 1.569 (1.562) data 0.001 (0.002) loss 1.4082 (1.1361) acc 59.3750 (71.5476) lr 1.4258e-03 eta 13:07:53 +epoch [20/50] batch [740/1000] time 1.555 (1.562) data 0.000 (0.002) loss 1.0586 (1.1367) acc 71.8750 (71.5372) lr 1.4258e-03 eta 13:07:44 +epoch [20/50] batch [745/1000] time 1.541 (1.562) data 0.000 (0.002) loss 1.0566 (1.1363) acc 81.2500 (71.5562) lr 1.4258e-03 eta 13:07:33 +epoch [20/50] batch [750/1000] time 1.579 (1.562) data 0.001 (0.002) loss 1.6660 (1.1359) acc 56.2500 (71.5375) lr 1.4258e-03 eta 13:07:25 +epoch [20/50] batch [755/1000] time 1.727 (1.562) data 0.001 (0.002) loss 1.3066 (1.1375) acc 75.0000 (71.5356) lr 1.4258e-03 eta 13:07:24 +epoch [20/50] batch [760/1000] time 1.545 (1.562) data 0.000 (0.002) loss 1.5107 (1.1387) acc 71.8750 (71.5378) lr 1.4258e-03 eta 13:07:13 +epoch [20/50] batch [765/1000] time 1.540 (1.562) data 0.000 (0.002) loss 1.0439 (1.1402) acc 75.0000 (71.5033) lr 1.4258e-03 eta 13:07:03 +epoch [20/50] batch [770/1000] time 1.539 (1.562) data 0.000 (0.002) loss 0.9111 (1.1409) acc 68.7500 (71.4692) lr 1.4258e-03 eta 13:06:52 +epoch [20/50] batch [775/1000] time 1.576 (1.562) data 0.000 (0.002) loss 1.3418 (1.1410) acc 71.8750 (71.4556) lr 1.4258e-03 eta 13:06:44 +epoch [20/50] batch [780/1000] time 1.567 (1.562) data 0.000 (0.002) loss 1.1230 (1.1394) acc 75.0000 (71.4944) lr 1.4258e-03 eta 13:06:36 +epoch [20/50] batch [785/1000] time 1.546 (1.562) data 0.001 (0.002) loss 0.9263 (1.1391) acc 75.0000 (71.5048) lr 1.4258e-03 eta 13:06:27 +epoch [20/50] batch [790/1000] time 1.543 (1.562) data 0.001 (0.002) loss 0.9355 (1.1387) acc 75.0000 (71.5229) lr 1.4258e-03 eta 13:06:19 +epoch [20/50] batch [795/1000] time 1.563 (1.562) data 0.000 (0.002) loss 0.8257 (1.1377) acc 68.7500 (71.5134) lr 1.4258e-03 eta 13:06:11 +epoch [20/50] batch [800/1000] time 1.585 (1.562) data 0.000 (0.002) loss 0.9453 (1.1391) acc 71.8750 (71.4805) lr 1.4258e-03 eta 13:06:11 +epoch [20/50] batch [805/1000] time 1.564 (1.562) data 0.001 (0.002) loss 1.3408 (1.1388) acc 65.6250 (71.4790) lr 1.4258e-03 eta 13:06:03 +epoch [20/50] batch [810/1000] time 1.560 (1.562) data 0.000 (0.002) loss 0.7700 (1.1386) acc 81.2500 (71.4622) lr 1.4258e-03 eta 13:05:55 +epoch [20/50] batch [815/1000] time 1.540 (1.562) data 0.000 (0.002) loss 1.0176 (1.1380) acc 71.8750 (71.4686) lr 1.4258e-03 eta 13:05:42 +epoch [20/50] batch [820/1000] time 1.560 (1.562) data 0.000 (0.002) loss 0.8262 (1.1369) acc 81.2500 (71.4977) lr 1.4258e-03 eta 13:05:32 +epoch [20/50] batch [825/1000] time 1.563 (1.562) data 0.000 (0.002) loss 0.9302 (1.1368) acc 75.0000 (71.5000) lr 1.4258e-03 eta 13:05:22 +epoch [20/50] batch [830/1000] time 1.562 (1.562) data 0.000 (0.002) loss 1.3584 (1.1389) acc 75.0000 (71.4721) lr 1.4258e-03 eta 13:05:14 +epoch [20/50] batch [835/1000] time 1.534 (1.562) data 0.000 (0.002) loss 1.4404 (1.1395) acc 75.0000 (71.4783) lr 1.4258e-03 eta 13:05:03 +epoch [20/50] batch [840/1000] time 1.558 (1.561) data 0.000 (0.002) loss 1.1855 (1.1410) acc 75.0000 (71.4583) lr 1.4258e-03 eta 13:04:54 +epoch [20/50] batch [845/1000] time 1.546 (1.561) data 0.000 (0.002) loss 1.0098 (1.1402) acc 75.0000 (71.4645) lr 1.4258e-03 eta 13:04:46 +epoch [20/50] batch [850/1000] time 1.544 (1.561) data 0.000 (0.002) loss 1.1465 (1.1404) acc 68.7500 (71.4559) lr 1.4258e-03 eta 13:04:35 +epoch [20/50] batch [855/1000] time 1.541 (1.561) data 0.000 (0.002) loss 1.4365 (1.1410) acc 68.7500 (71.4437) lr 1.4258e-03 eta 13:04:24 +epoch [20/50] batch [860/1000] time 1.532 (1.561) data 0.000 (0.002) loss 0.6636 (1.1398) acc 81.2500 (71.4717) lr 1.4258e-03 eta 13:04:12 +epoch [20/50] batch [865/1000] time 1.579 (1.561) data 0.000 (0.002) loss 1.2910 (1.1389) acc 68.7500 (71.5282) lr 1.4258e-03 eta 13:04:11 +epoch [20/50] batch [870/1000] time 1.525 (1.561) data 0.000 (0.002) loss 1.1475 (1.1392) acc 68.7500 (71.5050) lr 1.4258e-03 eta 13:03:59 +epoch [20/50] batch [875/1000] time 1.552 (1.561) data 0.000 (0.002) loss 1.3896 (1.1397) acc 71.8750 (71.5000) lr 1.4258e-03 eta 13:03:50 +epoch [20/50] batch [880/1000] time 1.553 (1.561) data 0.000 (0.002) loss 1.4219 (1.1388) acc 65.6250 (71.4879) lr 1.4258e-03 eta 13:03:40 +epoch [20/50] batch [885/1000] time 1.572 (1.561) data 0.000 (0.002) loss 0.6704 (1.1384) acc 87.5000 (71.5148) lr 1.4258e-03 eta 13:03:32 +epoch [20/50] batch [890/1000] time 1.544 (1.561) data 0.000 (0.002) loss 1.4775 (1.1382) acc 62.5000 (71.5169) lr 1.4258e-03 eta 13:03:24 +epoch [20/50] batch [895/1000] time 1.563 (1.561) data 0.000 (0.001) loss 1.0459 (1.1376) acc 78.1250 (71.5538) lr 1.4258e-03 eta 13:03:16 +epoch [20/50] batch [900/1000] time 1.556 (1.561) data 0.000 (0.001) loss 1.0479 (1.1373) acc 81.2500 (71.5660) lr 1.4258e-03 eta 13:03:08 +epoch [20/50] batch [905/1000] time 1.537 (1.561) data 0.000 (0.001) loss 1.4160 (1.1370) acc 65.6250 (71.5746) lr 1.4258e-03 eta 13:03:00 +epoch [20/50] batch [910/1000] time 1.579 (1.561) data 0.000 (0.001) loss 0.9771 (1.1358) acc 68.7500 (71.5865) lr 1.4258e-03 eta 13:02:56 +epoch [20/50] batch [915/1000] time 1.547 (1.561) data 0.000 (0.001) loss 0.7305 (1.1346) acc 81.2500 (71.6052) lr 1.4258e-03 eta 13:02:48 +epoch [20/50] batch [920/1000] time 1.584 (1.561) data 0.001 (0.001) loss 0.9131 (1.1350) acc 75.0000 (71.5931) lr 1.4258e-03 eta 13:02:40 +epoch [20/50] batch [925/1000] time 1.536 (1.561) data 0.000 (0.001) loss 1.2812 (1.1356) acc 75.0000 (71.5980) lr 1.4258e-03 eta 13:02:33 +epoch [20/50] batch [930/1000] time 1.568 (1.561) data 0.000 (0.001) loss 0.9043 (1.1348) acc 81.2500 (71.6230) lr 1.4258e-03 eta 13:02:26 +epoch [20/50] batch [935/1000] time 1.555 (1.561) data 0.000 (0.001) loss 0.6309 (1.1344) acc 87.5000 (71.6444) lr 1.4258e-03 eta 13:02:18 +epoch [20/50] batch [940/1000] time 1.559 (1.561) data 0.000 (0.001) loss 1.6133 (1.1342) acc 62.5000 (71.6423) lr 1.4258e-03 eta 13:02:09 +epoch [20/50] batch [945/1000] time 1.552 (1.561) data 0.000 (0.001) loss 0.9653 (1.1335) acc 65.6250 (71.6303) lr 1.4258e-03 eta 13:02:01 +epoch [20/50] batch [950/1000] time 1.573 (1.561) data 0.000 (0.001) loss 1.4307 (1.1348) acc 75.0000 (71.6151) lr 1.4258e-03 eta 13:01:57 +epoch [20/50] batch [955/1000] time 1.560 (1.561) data 0.000 (0.001) loss 1.0459 (1.1343) acc 78.1250 (71.6263) lr 1.4258e-03 eta 13:01:50 +epoch [20/50] batch [960/1000] time 1.544 (1.561) data 0.000 (0.001) loss 0.9629 (1.1334) acc 71.8750 (71.6406) lr 1.4258e-03 eta 13:01:42 +epoch [20/50] batch [965/1000] time 1.528 (1.561) data 0.000 (0.001) loss 0.7666 (1.1325) acc 84.3750 (71.6645) lr 1.4258e-03 eta 13:01:32 +epoch [20/50] batch [970/1000] time 1.567 (1.561) data 0.000 (0.001) loss 1.9326 (1.1345) acc 62.5000 (71.6302) lr 1.4258e-03 eta 13:01:25 +epoch [20/50] batch [975/1000] time 1.543 (1.561) data 0.000 (0.001) loss 0.8652 (1.1342) acc 78.1250 (71.6410) lr 1.4258e-03 eta 13:01:15 +epoch [20/50] batch [980/1000] time 1.556 (1.561) data 0.000 (0.001) loss 1.7754 (1.1341) acc 53.1250 (71.6358) lr 1.4258e-03 eta 13:01:07 +epoch [20/50] batch [985/1000] time 1.532 (1.561) data 0.001 (0.001) loss 0.9780 (1.1356) acc 65.6250 (71.5926) lr 1.4258e-03 eta 13:00:58 +epoch [20/50] batch [990/1000] time 1.568 (1.561) data 0.000 (0.001) loss 1.6377 (1.1367) acc 71.8750 (71.5720) lr 1.4258e-03 eta 13:00:50 +epoch [20/50] batch [995/1000] time 1.553 (1.561) data 0.000 (0.001) loss 0.9790 (1.1369) acc 78.1250 (71.5735) lr 1.4258e-03 eta 13:00:40 +epoch [20/50] batch [1000/1000] time 1.541 (1.561) data 0.000 (0.001) loss 1.4287 (1.1370) acc 50.0000 (71.5750) lr 1.3681e-03 eta 13:00:29 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,193 +* accuracy: 78.4% +* error: 21.6% +* macro_f1: 77.9% +epoch [21/50] batch [5/1000] time 1.524 (1.669) data 0.001 (0.177) loss 1.1973 (1.4265) acc 68.7500 (66.8750) lr 1.3681e-03 eta 13:54:14 +epoch [21/50] batch [10/1000] time 1.559 (1.613) data 0.000 (0.089) loss 0.9585 (1.2646) acc 81.2500 (70.3125) lr 1.3681e-03 eta 13:26:02 +epoch [21/50] batch [15/1000] time 1.564 (1.598) data 0.000 (0.059) loss 1.1035 (1.2122) acc 68.7500 (71.6667) lr 1.3681e-03 eta 13:18:29 +epoch [21/50] batch [20/1000] time 1.538 (1.588) data 0.001 (0.045) loss 1.5898 (1.2149) acc 71.8750 (71.8750) lr 1.3681e-03 eta 13:13:34 +epoch [21/50] batch [25/1000] time 1.561 (1.584) data 0.000 (0.036) loss 1.3555 (1.1823) acc 65.6250 (72.3750) lr 1.3681e-03 eta 13:11:26 +epoch [21/50] batch [30/1000] time 1.544 (1.588) data 0.000 (0.030) loss 1.3613 (1.1701) acc 65.6250 (72.6042) lr 1.3681e-03 eta 13:13:13 +epoch [21/50] batch [35/1000] time 1.548 (1.583) data 0.001 (0.026) loss 1.2598 (1.1607) acc 68.7500 (72.5000) lr 1.3681e-03 eta 13:10:48 +epoch [21/50] batch [40/1000] time 1.554 (1.580) data 0.000 (0.023) loss 1.1260 (1.1738) acc 71.8750 (72.3438) lr 1.3681e-03 eta 13:09:05 +epoch [21/50] batch [45/1000] time 1.562 (1.578) data 0.000 (0.020) loss 1.5146 (1.1585) acc 68.7500 (72.7778) lr 1.3681e-03 eta 13:07:58 +epoch [21/50] batch [50/1000] time 1.563 (1.577) data 0.000 (0.018) loss 0.9648 (1.1618) acc 71.8750 (72.7500) lr 1.3681e-03 eta 13:07:20 +epoch [21/50] batch [55/1000] time 1.543 (1.576) data 0.000 (0.017) loss 0.9199 (1.1332) acc 68.7500 (72.8977) lr 1.3681e-03 eta 13:06:22 +epoch [21/50] batch [60/1000] time 1.565 (1.574) data 0.001 (0.015) loss 1.5703 (1.1590) acc 68.7500 (72.1354) lr 1.3681e-03 eta 13:05:33 +epoch [21/50] batch [65/1000] time 1.555 (1.573) data 0.000 (0.014) loss 0.9155 (1.1531) acc 81.2500 (72.3077) lr 1.3681e-03 eta 13:04:32 +epoch [21/50] batch [70/1000] time 1.569 (1.572) data 0.000 (0.013) loss 1.2725 (1.1564) acc 59.3750 (72.0536) lr 1.3681e-03 eta 13:04:04 +epoch [21/50] batch [75/1000] time 1.541 (1.572) data 0.000 (0.012) loss 1.0352 (1.1579) acc 75.0000 (72.0833) lr 1.3681e-03 eta 13:04:16 +epoch [21/50] batch [80/1000] time 1.578 (1.572) data 0.000 (0.011) loss 0.4375 (1.1464) acc 84.3750 (72.1875) lr 1.3681e-03 eta 13:04:05 +epoch [21/50] batch [85/1000] time 1.553 (1.571) data 0.000 (0.011) loss 1.4482 (1.1409) acc 75.0000 (72.2794) lr 1.3681e-03 eta 13:03:21 +epoch [21/50] batch [90/1000] time 1.572 (1.571) data 0.000 (0.010) loss 0.5415 (1.1237) acc 84.3750 (72.5347) lr 1.3681e-03 eta 13:02:54 +epoch [21/50] batch [95/1000] time 1.557 (1.570) data 0.000 (0.010) loss 0.8594 (1.1231) acc 71.8750 (72.3026) lr 1.3681e-03 eta 13:02:27 +epoch [21/50] batch [100/1000] time 1.544 (1.569) data 0.000 (0.009) loss 1.4463 (1.1315) acc 62.5000 (72.0938) lr 1.3681e-03 eta 13:01:55 +epoch [21/50] batch [105/1000] time 1.574 (1.569) data 0.000 (0.009) loss 0.8462 (1.1305) acc 75.0000 (72.1131) lr 1.3681e-03 eta 13:01:44 +epoch [21/50] batch [110/1000] time 1.543 (1.568) data 0.000 (0.008) loss 1.0605 (1.1237) acc 75.0000 (72.0739) lr 1.3681e-03 eta 13:01:15 +epoch [21/50] batch [115/1000] time 1.732 (1.569) data 0.001 (0.008) loss 1.0186 (1.1175) acc 71.8750 (72.2283) lr 1.3681e-03 eta 13:01:38 +epoch [21/50] batch [120/1000] time 1.533 (1.569) data 0.001 (0.008) loss 1.4199 (1.1240) acc 62.5000 (72.0052) lr 1.3681e-03 eta 13:01:19 +epoch [21/50] batch [125/1000] time 1.573 (1.569) data 0.000 (0.008) loss 0.6968 (1.1289) acc 78.1250 (71.9500) lr 1.3681e-03 eta 13:01:09 +epoch [21/50] batch [130/1000] time 1.556 (1.569) data 0.001 (0.007) loss 1.3145 (1.1407) acc 71.8750 (71.6346) lr 1.3681e-03 eta 13:00:58 +epoch [21/50] batch [135/1000] time 1.563 (1.568) data 0.000 (0.007) loss 1.6621 (1.1402) acc 62.5000 (71.5741) lr 1.3681e-03 eta 13:00:29 +epoch [21/50] batch [140/1000] time 1.552 (1.567) data 0.000 (0.007) loss 0.7681 (1.1383) acc 81.2500 (71.6964) lr 1.3681e-03 eta 13:00:01 +epoch [21/50] batch [145/1000] time 1.572 (1.567) data 0.000 (0.007) loss 1.4277 (1.1403) acc 71.8750 (71.6810) lr 1.3681e-03 eta 12:59:49 +epoch [21/50] batch [150/1000] time 1.581 (1.568) data 0.000 (0.006) loss 1.1943 (1.1458) acc 75.0000 (71.6458) lr 1.3681e-03 eta 12:59:55 +epoch [21/50] batch [155/1000] time 1.593 (1.568) data 0.000 (0.006) loss 1.1660 (1.1469) acc 71.8750 (71.5726) lr 1.3681e-03 eta 12:59:52 +epoch [21/50] batch [160/1000] time 1.564 (1.568) data 0.000 (0.006) loss 1.3066 (1.1454) acc 65.6250 (71.4648) lr 1.3681e-03 eta 12:59:43 +epoch [21/50] batch [165/1000] time 1.547 (1.567) data 0.000 (0.006) loss 1.1230 (1.1373) acc 78.1250 (71.6288) lr 1.3681e-03 eta 12:59:19 +epoch [21/50] batch [170/1000] time 1.537 (1.567) data 0.000 (0.006) loss 1.0283 (1.1326) acc 78.1250 (71.7096) lr 1.3681e-03 eta 12:59:01 +epoch [21/50] batch [175/1000] time 1.573 (1.567) data 0.000 (0.005) loss 1.3340 (1.1293) acc 65.6250 (71.7143) lr 1.3681e-03 eta 12:58:47 +epoch [21/50] batch [180/1000] time 1.553 (1.567) data 0.000 (0.005) loss 1.9355 (1.1274) acc 56.2500 (71.7361) lr 1.3681e-03 eta 12:58:44 +epoch [21/50] batch [185/1000] time 1.559 (1.567) data 0.000 (0.005) loss 0.7881 (1.1329) acc 84.3750 (71.7230) lr 1.3681e-03 eta 12:58:29 +epoch [21/50] batch [190/1000] time 1.556 (1.566) data 0.000 (0.005) loss 0.8604 (1.1268) acc 75.0000 (71.7434) lr 1.3681e-03 eta 12:58:15 +epoch [21/50] batch [195/1000] time 1.561 (1.566) data 0.001 (0.005) loss 0.8506 (1.1240) acc 75.0000 (71.7468) lr 1.3681e-03 eta 12:58:05 +epoch [21/50] batch [200/1000] time 1.554 (1.566) data 0.000 (0.005) loss 0.9219 (1.1221) acc 81.2500 (71.8125) lr 1.3681e-03 eta 12:57:50 +epoch [21/50] batch [205/1000] time 1.565 (1.566) data 0.000 (0.005) loss 0.9595 (1.1176) acc 75.0000 (71.9360) lr 1.3681e-03 eta 12:57:30 +epoch [21/50] batch [210/1000] time 1.575 (1.566) data 0.001 (0.005) loss 0.8589 (1.1159) acc 81.2500 (71.9494) lr 1.3681e-03 eta 12:57:20 +epoch [21/50] batch [215/1000] time 1.564 (1.566) data 0.000 (0.005) loss 1.1523 (1.1152) acc 75.0000 (71.9477) lr 1.3681e-03 eta 12:57:12 +epoch [21/50] batch [220/1000] time 1.568 (1.566) data 0.000 (0.004) loss 0.7891 (1.1117) acc 84.3750 (71.9886) lr 1.3681e-03 eta 12:57:02 +epoch [21/50] batch [225/1000] time 1.560 (1.566) data 0.000 (0.004) loss 0.4727 (1.1056) acc 90.6250 (72.2083) lr 1.3681e-03 eta 12:57:13 +epoch [21/50] batch [230/1000] time 1.561 (1.566) data 0.000 (0.004) loss 0.8638 (1.1029) acc 78.1250 (72.3234) lr 1.3681e-03 eta 12:56:54 +epoch [21/50] batch [235/1000] time 1.566 (1.566) data 0.000 (0.004) loss 1.0420 (1.1067) acc 78.1250 (72.3138) lr 1.3681e-03 eta 12:56:45 +epoch [21/50] batch [240/1000] time 1.564 (1.566) data 0.000 (0.004) loss 1.1660 (1.1124) acc 78.1250 (72.2526) lr 1.3681e-03 eta 12:56:35 +epoch [21/50] batch [245/1000] time 1.542 (1.566) data 0.000 (0.004) loss 0.3708 (1.1080) acc 90.6250 (72.3214) lr 1.3681e-03 eta 12:56:25 +epoch [21/50] batch [250/1000] time 1.566 (1.566) data 0.000 (0.004) loss 1.5732 (1.1124) acc 59.3750 (72.1500) lr 1.3681e-03 eta 12:56:22 +epoch [21/50] batch [255/1000] time 1.558 (1.566) data 0.000 (0.004) loss 1.1260 (1.1152) acc 62.5000 (72.0588) lr 1.3681e-03 eta 12:56:16 +epoch [21/50] batch [260/1000] time 1.554 (1.566) data 0.000 (0.004) loss 1.0459 (1.1143) acc 71.8750 (72.0433) lr 1.3681e-03 eta 12:56:02 +epoch [21/50] batch [265/1000] time 1.550 (1.565) data 0.000 (0.004) loss 0.8525 (1.1143) acc 81.2500 (72.0637) lr 1.3681e-03 eta 12:55:47 +epoch [21/50] batch [270/1000] time 1.576 (1.566) data 0.000 (0.004) loss 1.2959 (1.1151) acc 71.8750 (72.0718) lr 1.3681e-03 eta 12:55:53 +epoch [21/50] batch [275/1000] time 1.561 (1.566) data 0.001 (0.004) loss 1.6143 (1.1175) acc 59.3750 (72.0455) lr 1.3681e-03 eta 12:55:41 +epoch [21/50] batch [280/1000] time 1.574 (1.566) data 0.000 (0.004) loss 0.9795 (1.1156) acc 78.1250 (72.1540) lr 1.3681e-03 eta 12:55:31 +epoch [21/50] batch [285/1000] time 1.546 (1.566) data 0.000 (0.004) loss 1.2100 (1.1203) acc 65.6250 (72.1053) lr 1.3681e-03 eta 12:55:19 +epoch [21/50] batch [290/1000] time 1.567 (1.565) data 0.000 (0.003) loss 0.6655 (1.1181) acc 84.3750 (72.1444) lr 1.3681e-03 eta 12:55:10 +epoch [21/50] batch [295/1000] time 1.563 (1.565) data 0.000 (0.003) loss 1.5068 (1.1214) acc 71.8750 (72.1186) lr 1.3681e-03 eta 12:54:59 +epoch [21/50] batch [300/1000] time 1.562 (1.565) data 0.001 (0.003) loss 1.1533 (1.1198) acc 78.1250 (72.2396) lr 1.3681e-03 eta 12:54:50 +epoch [21/50] batch [305/1000] time 1.564 (1.565) data 0.000 (0.003) loss 1.4473 (1.1207) acc 68.7500 (72.3053) lr 1.3681e-03 eta 12:54:37 +epoch [21/50] batch [310/1000] time 1.544 (1.565) data 0.000 (0.003) loss 0.9707 (1.1169) acc 68.7500 (72.3790) lr 1.3681e-03 eta 12:54:24 +epoch [21/50] batch [315/1000] time 1.568 (1.565) data 0.000 (0.003) loss 1.2656 (1.1193) acc 71.8750 (72.3413) lr 1.3681e-03 eta 12:54:16 +epoch [21/50] batch [320/1000] time 1.574 (1.565) data 0.001 (0.003) loss 0.7285 (1.1165) acc 81.2500 (72.3535) lr 1.3681e-03 eta 12:54:06 +epoch [21/50] batch [325/1000] time 1.561 (1.565) data 0.000 (0.003) loss 1.5957 (1.1172) acc 65.6250 (72.2885) lr 1.3681e-03 eta 12:53:55 +epoch [21/50] batch [330/1000] time 1.551 (1.565) data 0.000 (0.003) loss 0.8594 (1.1174) acc 81.2500 (72.2917) lr 1.3681e-03 eta 12:53:58 +epoch [21/50] batch [335/1000] time 1.531 (1.565) data 0.000 (0.003) loss 0.8911 (1.1163) acc 78.1250 (72.3041) lr 1.3681e-03 eta 12:53:46 +epoch [21/50] batch [340/1000] time 1.547 (1.565) data 0.000 (0.003) loss 1.2061 (1.1209) acc 68.7500 (72.2059) lr 1.3681e-03 eta 12:53:37 +epoch [21/50] batch [345/1000] time 1.566 (1.565) data 0.000 (0.003) loss 1.1309 (1.1233) acc 68.7500 (72.0380) lr 1.3681e-03 eta 12:53:25 +epoch [21/50] batch [350/1000] time 1.550 (1.565) data 0.000 (0.003) loss 1.1787 (1.1242) acc 71.8750 (72.0446) lr 1.3681e-03 eta 12:53:14 +epoch [21/50] batch [355/1000] time 1.556 (1.565) data 0.000 (0.003) loss 0.8140 (1.1251) acc 84.3750 (72.0511) lr 1.3681e-03 eta 12:53:08 +epoch [21/50] batch [360/1000] time 1.565 (1.565) data 0.000 (0.003) loss 1.3633 (1.1268) acc 75.0000 (71.9792) lr 1.3681e-03 eta 12:53:05 +epoch [21/50] batch [365/1000] time 1.568 (1.565) data 0.001 (0.003) loss 2.0938 (1.1312) acc 50.0000 (71.8664) lr 1.3681e-03 eta 12:53:02 +epoch [21/50] batch [370/1000] time 1.554 (1.565) data 0.000 (0.003) loss 1.1289 (1.1311) acc 78.1250 (71.8666) lr 1.3681e-03 eta 12:52:52 +epoch [21/50] batch [375/1000] time 1.571 (1.566) data 0.000 (0.003) loss 1.0479 (1.1314) acc 75.0000 (71.8583) lr 1.3681e-03 eta 12:52:58 +epoch [21/50] batch [380/1000] time 1.557 (1.566) data 0.000 (0.003) loss 1.0986 (1.1340) acc 71.8750 (71.8257) lr 1.3681e-03 eta 12:52:54 +epoch [21/50] batch [385/1000] time 1.575 (1.566) data 0.000 (0.003) loss 1.2217 (1.1305) acc 65.6250 (71.9156) lr 1.3681e-03 eta 12:52:46 +epoch [21/50] batch [390/1000] time 1.556 (1.566) data 0.000 (0.003) loss 1.1484 (1.1287) acc 78.1250 (71.9792) lr 1.3681e-03 eta 12:52:35 +epoch [21/50] batch [395/1000] time 1.560 (1.565) data 0.000 (0.003) loss 1.5488 (1.1303) acc 59.3750 (71.9383) lr 1.3681e-03 eta 12:52:22 +epoch [21/50] batch [400/1000] time 1.555 (1.565) data 0.000 (0.003) loss 1.0723 (1.1277) acc 78.1250 (72.0156) lr 1.3681e-03 eta 12:52:13 +epoch [21/50] batch [405/1000] time 1.545 (1.565) data 0.000 (0.003) loss 1.4609 (1.1290) acc 62.5000 (71.9907) lr 1.3681e-03 eta 12:52:03 +epoch [21/50] batch [410/1000] time 1.565 (1.565) data 0.001 (0.003) loss 0.9185 (1.1293) acc 75.0000 (71.9741) lr 1.3681e-03 eta 12:51:55 +epoch [21/50] batch [415/1000] time 1.563 (1.565) data 0.000 (0.003) loss 0.9185 (1.1280) acc 68.7500 (72.0030) lr 1.3681e-03 eta 12:51:45 +epoch [21/50] batch [420/1000] time 1.536 (1.566) data 0.000 (0.003) loss 0.9629 (1.1268) acc 81.2500 (72.0461) lr 1.3681e-03 eta 12:51:49 +epoch [21/50] batch [425/1000] time 1.550 (1.565) data 0.000 (0.003) loss 1.0303 (1.1269) acc 65.6250 (72.0294) lr 1.3681e-03 eta 12:51:39 +epoch [21/50] batch [430/1000] time 1.551 (1.565) data 0.000 (0.002) loss 1.4082 (1.1261) acc 56.2500 (72.0131) lr 1.3681e-03 eta 12:51:26 +epoch [21/50] batch [435/1000] time 1.557 (1.565) data 0.000 (0.002) loss 0.7271 (1.1246) acc 84.3750 (72.0690) lr 1.3681e-03 eta 12:51:20 +epoch [21/50] batch [440/1000] time 1.533 (1.565) data 0.000 (0.002) loss 0.8413 (1.1209) acc 78.1250 (72.1378) lr 1.3681e-03 eta 12:51:12 +epoch [21/50] batch [445/1000] time 1.581 (1.565) data 0.000 (0.002) loss 1.3027 (1.1223) acc 78.1250 (72.1489) lr 1.3681e-03 eta 12:51:03 +epoch [21/50] batch [450/1000] time 1.545 (1.565) data 0.000 (0.002) loss 0.7407 (1.1206) acc 71.8750 (72.1667) lr 1.3681e-03 eta 12:50:51 +epoch [21/50] batch [455/1000] time 1.570 (1.565) data 0.001 (0.002) loss 1.2188 (1.1192) acc 75.0000 (72.1978) lr 1.3681e-03 eta 12:50:40 +epoch [21/50] batch [460/1000] time 1.523 (1.565) data 0.000 (0.002) loss 0.8389 (1.1204) acc 75.0000 (72.1875) lr 1.3681e-03 eta 12:50:27 +epoch [21/50] batch [465/1000] time 1.546 (1.565) data 0.000 (0.002) loss 0.7314 (1.1199) acc 71.8750 (72.1707) lr 1.3681e-03 eta 12:50:18 +epoch [21/50] batch [470/1000] time 1.559 (1.565) data 0.001 (0.002) loss 0.7256 (1.1182) acc 81.2500 (72.1742) lr 1.3681e-03 eta 12:50:11 +epoch [21/50] batch [475/1000] time 1.550 (1.565) data 0.000 (0.002) loss 0.4065 (1.1182) acc 90.6250 (72.1842) lr 1.3681e-03 eta 12:50:02 +epoch [21/50] batch [480/1000] time 1.717 (1.565) data 0.000 (0.002) loss 0.8262 (1.1166) acc 84.3750 (72.2005) lr 1.3681e-03 eta 12:50:02 +epoch [21/50] batch [485/1000] time 1.570 (1.565) data 0.000 (0.002) loss 1.4766 (1.1181) acc 62.5000 (72.1456) lr 1.3681e-03 eta 12:49:52 +epoch [21/50] batch [490/1000] time 1.554 (1.565) data 0.000 (0.002) loss 1.1494 (1.1210) acc 68.7500 (72.1173) lr 1.3681e-03 eta 12:49:39 +epoch [21/50] batch [495/1000] time 1.537 (1.565) data 0.000 (0.002) loss 1.2227 (1.1231) acc 78.1250 (72.0896) lr 1.3681e-03 eta 12:49:30 +epoch [21/50] batch [500/1000] time 1.556 (1.565) data 0.001 (0.002) loss 1.1885 (1.1230) acc 65.6250 (72.1063) lr 1.3681e-03 eta 12:49:18 +epoch [21/50] batch [505/1000] time 1.547 (1.565) data 0.000 (0.002) loss 1.5107 (1.1232) acc 65.6250 (72.1163) lr 1.3681e-03 eta 12:49:06 +epoch [21/50] batch [510/1000] time 1.536 (1.564) data 0.001 (0.002) loss 1.7100 (1.1236) acc 62.5000 (72.1201) lr 1.3681e-03 eta 12:48:57 +epoch [21/50] batch [515/1000] time 1.545 (1.564) data 0.001 (0.002) loss 0.5830 (1.1216) acc 78.1250 (72.1541) lr 1.3681e-03 eta 12:48:46 +epoch [21/50] batch [520/1000] time 1.573 (1.564) data 0.000 (0.002) loss 1.1895 (1.1214) acc 62.5000 (72.1514) lr 1.3681e-03 eta 12:48:35 +epoch [21/50] batch [525/1000] time 1.675 (1.565) data 0.000 (0.002) loss 1.6992 (1.1227) acc 65.6250 (72.0952) lr 1.3681e-03 eta 12:48:34 +epoch [21/50] batch [530/1000] time 1.560 (1.564) data 0.000 (0.002) loss 0.5288 (1.1236) acc 84.3750 (72.0578) lr 1.3681e-03 eta 12:48:22 +epoch [21/50] batch [535/1000] time 1.579 (1.564) data 0.000 (0.002) loss 1.1953 (1.1251) acc 68.7500 (72.0444) lr 1.3681e-03 eta 12:48:16 +epoch [21/50] batch [540/1000] time 1.551 (1.564) data 0.000 (0.002) loss 1.1201 (1.1268) acc 71.8750 (72.0197) lr 1.3681e-03 eta 12:48:07 +epoch [21/50] batch [545/1000] time 1.566 (1.564) data 0.000 (0.002) loss 1.4756 (1.1278) acc 68.7500 (72.0126) lr 1.3681e-03 eta 12:48:00 +epoch [21/50] batch [550/1000] time 1.576 (1.565) data 0.000 (0.002) loss 1.1045 (1.1266) acc 71.8750 (72.0398) lr 1.3681e-03 eta 12:47:54 +epoch [21/50] batch [555/1000] time 1.551 (1.564) data 0.001 (0.002) loss 0.6201 (1.1240) acc 81.2500 (72.0777) lr 1.3681e-03 eta 12:47:45 +epoch [21/50] batch [560/1000] time 1.562 (1.564) data 0.000 (0.002) loss 0.9873 (1.1222) acc 78.1250 (72.1429) lr 1.3681e-03 eta 12:47:35 +epoch [21/50] batch [565/1000] time 1.578 (1.564) data 0.001 (0.002) loss 1.0938 (1.1222) acc 75.0000 (72.1350) lr 1.3681e-03 eta 12:47:28 +epoch [21/50] batch [570/1000] time 1.565 (1.565) data 0.000 (0.002) loss 0.7222 (1.1241) acc 84.3750 (72.0998) lr 1.3681e-03 eta 12:47:29 +epoch [21/50] batch [575/1000] time 1.569 (1.565) data 0.000 (0.002) loss 0.8345 (1.1240) acc 78.1250 (72.1141) lr 1.3681e-03 eta 12:47:23 +epoch [21/50] batch [580/1000] time 1.563 (1.565) data 0.001 (0.002) loss 1.0645 (1.1234) acc 62.5000 (72.1067) lr 1.3681e-03 eta 12:47:15 +epoch [21/50] batch [585/1000] time 1.562 (1.565) data 0.000 (0.002) loss 0.8315 (1.1249) acc 75.0000 (72.1207) lr 1.3681e-03 eta 12:47:06 +epoch [21/50] batch [590/1000] time 1.585 (1.565) data 0.000 (0.002) loss 1.0898 (1.1239) acc 71.8750 (72.1028) lr 1.3681e-03 eta 12:47:00 +epoch [21/50] batch [595/1000] time 1.563 (1.565) data 0.000 (0.002) loss 1.2998 (1.1228) acc 68.7500 (72.1113) lr 1.3681e-03 eta 12:46:51 +epoch [21/50] batch [600/1000] time 1.554 (1.565) data 0.000 (0.002) loss 1.1885 (1.1235) acc 68.7500 (72.1094) lr 1.3681e-03 eta 12:46:42 +epoch [21/50] batch [605/1000] time 1.554 (1.565) data 0.000 (0.002) loss 0.7661 (1.1239) acc 81.2500 (72.0661) lr 1.3681e-03 eta 12:46:35 +epoch [21/50] batch [610/1000] time 1.568 (1.565) data 0.000 (0.002) loss 1.4844 (1.1242) acc 62.5000 (72.0184) lr 1.3681e-03 eta 12:46:29 +epoch [21/50] batch [615/1000] time 1.562 (1.565) data 0.000 (0.002) loss 0.6279 (1.1237) acc 71.8750 (72.0173) lr 1.3681e-03 eta 12:46:22 +epoch [21/50] batch [620/1000] time 1.581 (1.565) data 0.000 (0.002) loss 0.6221 (1.1258) acc 84.3750 (71.9909) lr 1.3681e-03 eta 12:46:17 +epoch [21/50] batch [625/1000] time 1.600 (1.565) data 0.000 (0.002) loss 0.9600 (1.1271) acc 84.3750 (71.9650) lr 1.3681e-03 eta 12:46:09 +epoch [21/50] batch [630/1000] time 1.556 (1.565) data 0.000 (0.002) loss 1.6182 (1.1276) acc 65.6250 (71.9841) lr 1.3681e-03 eta 12:45:59 +epoch [21/50] batch [635/1000] time 1.572 (1.565) data 0.001 (0.002) loss 0.6777 (1.1280) acc 81.2500 (71.9833) lr 1.3681e-03 eta 12:46:02 +epoch [21/50] batch [640/1000] time 1.549 (1.565) data 0.000 (0.002) loss 1.2656 (1.1288) acc 71.8750 (71.9629) lr 1.3681e-03 eta 12:45:52 +epoch [21/50] batch [645/1000] time 1.552 (1.565) data 0.000 (0.002) loss 0.9595 (1.1301) acc 68.7500 (71.9283) lr 1.3681e-03 eta 12:45:41 +epoch [21/50] batch [650/1000] time 1.560 (1.565) data 0.000 (0.002) loss 1.7949 (1.1313) acc 65.6250 (71.9375) lr 1.3681e-03 eta 12:45:33 +epoch [21/50] batch [655/1000] time 1.533 (1.565) data 0.000 (0.002) loss 1.3145 (1.1341) acc 65.6250 (71.8893) lr 1.3681e-03 eta 12:45:23 +epoch [21/50] batch [660/1000] time 1.537 (1.565) data 0.000 (0.002) loss 1.3008 (1.1351) acc 62.5000 (71.8561) lr 1.3681e-03 eta 12:45:13 +epoch [21/50] batch [665/1000] time 1.542 (1.565) data 0.000 (0.002) loss 1.5322 (1.1359) acc 59.3750 (71.8186) lr 1.3681e-03 eta 12:45:02 +epoch [21/50] batch [670/1000] time 1.570 (1.565) data 0.001 (0.002) loss 0.9292 (1.1351) acc 78.1250 (71.8470) lr 1.3681e-03 eta 12:44:54 +epoch [21/50] batch [675/1000] time 1.572 (1.565) data 0.000 (0.002) loss 0.5952 (1.1340) acc 84.3750 (71.8704) lr 1.3681e-03 eta 12:44:47 +epoch [21/50] batch [680/1000] time 1.601 (1.565) data 0.000 (0.002) loss 1.4590 (1.1338) acc 71.8750 (71.8704) lr 1.3681e-03 eta 12:44:49 +epoch [21/50] batch [685/1000] time 1.563 (1.565) data 0.000 (0.002) loss 0.6997 (1.1350) acc 81.2500 (71.8568) lr 1.3681e-03 eta 12:44:40 +epoch [21/50] batch [690/1000] time 1.563 (1.565) data 0.000 (0.002) loss 0.7803 (1.1358) acc 78.1250 (71.8524) lr 1.3681e-03 eta 12:44:36 +epoch [21/50] batch [695/1000] time 1.577 (1.565) data 0.000 (0.002) loss 0.6206 (1.1351) acc 84.3750 (71.8525) lr 1.3681e-03 eta 12:44:30 +epoch [21/50] batch [700/1000] time 1.594 (1.565) data 0.000 (0.002) loss 0.8184 (1.1346) acc 71.8750 (71.8393) lr 1.3681e-03 eta 12:44:23 +epoch [21/50] batch [705/1000] time 1.585 (1.565) data 0.000 (0.002) loss 1.1465 (1.1346) acc 78.1250 (71.8528) lr 1.3681e-03 eta 12:44:16 +epoch [21/50] batch [710/1000] time 1.576 (1.565) data 0.000 (0.002) loss 0.7954 (1.1333) acc 78.1250 (71.8926) lr 1.3681e-03 eta 12:44:10 +epoch [21/50] batch [715/1000] time 1.555 (1.565) data 0.001 (0.002) loss 1.0518 (1.1336) acc 78.1250 (71.8881) lr 1.3681e-03 eta 12:44:02 +epoch [21/50] batch [720/1000] time 1.571 (1.566) data 0.001 (0.002) loss 0.8628 (1.1343) acc 81.2500 (71.9010) lr 1.3681e-03 eta 12:44:00 +epoch [21/50] batch [725/1000] time 1.579 (1.566) data 0.000 (0.002) loss 0.8525 (1.1334) acc 81.2500 (71.9138) lr 1.3681e-03 eta 12:43:52 +epoch [21/50] batch [730/1000] time 1.606 (1.566) data 0.000 (0.002) loss 1.0635 (1.1334) acc 71.8750 (71.9349) lr 1.3681e-03 eta 12:43:48 +epoch [21/50] batch [735/1000] time 1.559 (1.566) data 0.001 (0.002) loss 1.0146 (1.1334) acc 75.0000 (71.9303) lr 1.3681e-03 eta 12:43:42 +epoch [21/50] batch [740/1000] time 1.547 (1.566) data 0.001 (0.002) loss 0.7437 (1.1320) acc 81.2500 (71.9595) lr 1.3681e-03 eta 12:43:31 +epoch [21/50] batch [745/1000] time 1.552 (1.566) data 0.000 (0.002) loss 1.2178 (1.1311) acc 71.8750 (71.9966) lr 1.3681e-03 eta 12:43:23 +epoch [21/50] batch [750/1000] time 1.564 (1.566) data 0.000 (0.002) loss 0.9927 (1.1310) acc 68.7500 (71.9833) lr 1.3681e-03 eta 12:43:14 +epoch [21/50] batch [755/1000] time 1.556 (1.566) data 0.000 (0.002) loss 2.1406 (1.1321) acc 53.1250 (71.9743) lr 1.3681e-03 eta 12:43:04 +epoch [21/50] batch [760/1000] time 1.574 (1.566) data 0.000 (0.002) loss 0.8911 (1.1321) acc 71.8750 (71.9449) lr 1.3681e-03 eta 12:42:55 +epoch [21/50] batch [765/1000] time 1.551 (1.565) data 0.000 (0.002) loss 1.1924 (1.1324) acc 59.3750 (71.9158) lr 1.3681e-03 eta 12:42:46 +epoch [21/50] batch [770/1000] time 1.542 (1.565) data 0.000 (0.002) loss 0.9414 (1.1323) acc 78.1250 (71.9115) lr 1.3681e-03 eta 12:42:36 +epoch [21/50] batch [775/1000] time 1.565 (1.565) data 0.000 (0.002) loss 0.7300 (1.1316) acc 81.2500 (71.9476) lr 1.3681e-03 eta 12:42:26 +epoch [21/50] batch [780/1000] time 1.527 (1.565) data 0.001 (0.002) loss 1.1982 (1.1300) acc 56.2500 (71.9631) lr 1.3681e-03 eta 12:42:15 +epoch [21/50] batch [785/1000] time 1.559 (1.565) data 0.000 (0.002) loss 0.7788 (1.1299) acc 78.1250 (71.9546) lr 1.3681e-03 eta 12:42:09 +epoch [21/50] batch [790/1000] time 1.576 (1.565) data 0.000 (0.002) loss 0.6514 (1.1306) acc 71.8750 (71.9422) lr 1.3681e-03 eta 12:42:00 +epoch [21/50] batch [795/1000] time 1.556 (1.565) data 0.000 (0.002) loss 1.0049 (1.1322) acc 78.1250 (71.9379) lr 1.3681e-03 eta 12:41:49 +epoch [21/50] batch [800/1000] time 1.575 (1.565) data 0.000 (0.002) loss 1.3311 (1.1315) acc 68.7500 (71.9375) lr 1.3681e-03 eta 12:41:41 +epoch [21/50] batch [805/1000] time 1.561 (1.565) data 0.000 (0.002) loss 1.0078 (1.1307) acc 78.1250 (71.9565) lr 1.3681e-03 eta 12:41:34 +epoch [21/50] batch [810/1000] time 1.556 (1.565) data 0.001 (0.002) loss 1.1709 (1.1304) acc 75.0000 (71.9483) lr 1.3681e-03 eta 12:41:26 +epoch [21/50] batch [815/1000] time 1.582 (1.565) data 0.000 (0.002) loss 0.9160 (1.1285) acc 71.8750 (71.9670) lr 1.3681e-03 eta 12:41:19 +epoch [21/50] batch [820/1000] time 1.546 (1.565) data 0.000 (0.002) loss 0.8564 (1.1272) acc 78.1250 (72.0008) lr 1.3681e-03 eta 12:41:09 +epoch [21/50] batch [825/1000] time 1.575 (1.565) data 0.001 (0.001) loss 1.3789 (1.1290) acc 68.7500 (72.0038) lr 1.3681e-03 eta 12:41:02 +epoch [21/50] batch [830/1000] time 1.572 (1.565) data 0.000 (0.001) loss 0.7515 (1.1286) acc 81.2500 (71.9992) lr 1.3681e-03 eta 12:41:00 +epoch [21/50] batch [835/1000] time 1.547 (1.565) data 0.000 (0.001) loss 0.6489 (1.1277) acc 75.0000 (72.0060) lr 1.3681e-03 eta 12:40:52 +epoch [21/50] batch [840/1000] time 1.545 (1.565) data 0.000 (0.001) loss 1.4863 (1.1273) acc 53.1250 (71.9903) lr 1.3681e-03 eta 12:40:40 +epoch [21/50] batch [845/1000] time 1.569 (1.565) data 0.000 (0.001) loss 1.2021 (1.1271) acc 68.7500 (71.9822) lr 1.3681e-03 eta 12:40:31 +epoch [21/50] batch [850/1000] time 1.574 (1.565) data 0.000 (0.001) loss 1.4990 (1.1279) acc 65.6250 (71.9596) lr 1.3681e-03 eta 12:40:24 +epoch [21/50] batch [855/1000] time 1.548 (1.565) data 0.000 (0.001) loss 0.6953 (1.1267) acc 84.3750 (71.9956) lr 1.3681e-03 eta 12:40:16 +epoch [21/50] batch [860/1000] time 1.569 (1.565) data 0.000 (0.001) loss 1.2930 (1.1287) acc 68.7500 (71.9695) lr 1.3681e-03 eta 12:40:08 +epoch [21/50] batch [865/1000] time 1.570 (1.565) data 0.000 (0.001) loss 0.9727 (1.1280) acc 68.7500 (71.9473) lr 1.3681e-03 eta 12:40:01 +epoch [21/50] batch [870/1000] time 1.725 (1.565) data 0.001 (0.001) loss 1.1875 (1.1274) acc 68.7500 (71.9540) lr 1.3681e-03 eta 12:40:00 +epoch [21/50] batch [875/1000] time 1.545 (1.565) data 0.000 (0.001) loss 0.8369 (1.1264) acc 84.3750 (71.9821) lr 1.3681e-03 eta 12:39:50 +epoch [21/50] batch [880/1000] time 1.550 (1.565) data 0.000 (0.001) loss 1.2676 (1.1265) acc 71.8750 (71.9815) lr 1.3681e-03 eta 12:39:39 +epoch [21/50] batch [885/1000] time 1.574 (1.565) data 0.000 (0.001) loss 1.0576 (1.1254) acc 71.8750 (71.9915) lr 1.3681e-03 eta 12:39:31 +epoch [21/50] batch [890/1000] time 1.579 (1.565) data 0.000 (0.001) loss 0.8496 (1.1258) acc 75.0000 (71.9628) lr 1.3681e-03 eta 12:39:22 +epoch [21/50] batch [895/1000] time 1.547 (1.565) data 0.000 (0.001) loss 0.6733 (1.1239) acc 78.1250 (71.9797) lr 1.3681e-03 eta 12:39:13 +epoch [21/50] batch [900/1000] time 1.554 (1.565) data 0.000 (0.001) loss 0.8472 (1.1226) acc 71.8750 (72.0069) lr 1.3681e-03 eta 12:39:06 +epoch [21/50] batch [905/1000] time 1.582 (1.565) data 0.000 (0.001) loss 0.7134 (1.1218) acc 78.1250 (72.0304) lr 1.3681e-03 eta 12:39:00 +epoch [21/50] batch [910/1000] time 1.566 (1.565) data 0.000 (0.001) loss 0.6323 (1.1205) acc 84.3750 (72.0673) lr 1.3681e-03 eta 12:38:51 +epoch [21/50] batch [915/1000] time 1.584 (1.565) data 0.001 (0.001) loss 0.7202 (1.1209) acc 81.2500 (72.0594) lr 1.3681e-03 eta 12:38:43 +epoch [21/50] batch [920/1000] time 1.553 (1.565) data 0.000 (0.001) loss 1.1846 (1.1206) acc 71.8750 (72.0584) lr 1.3681e-03 eta 12:38:34 +epoch [21/50] batch [925/1000] time 1.573 (1.565) data 0.001 (0.001) loss 0.8462 (1.1199) acc 78.1250 (72.0743) lr 1.3681e-03 eta 12:38:26 +epoch [21/50] batch [930/1000] time 1.587 (1.565) data 0.000 (0.001) loss 0.9053 (1.1194) acc 71.8750 (72.0867) lr 1.3681e-03 eta 12:38:18 +epoch [21/50] batch [935/1000] time 1.547 (1.565) data 0.000 (0.001) loss 0.4727 (1.1189) acc 81.2500 (72.0956) lr 1.3681e-03 eta 12:38:12 +epoch [21/50] batch [940/1000] time 1.534 (1.565) data 0.001 (0.001) loss 1.4727 (1.1199) acc 62.5000 (72.0844) lr 1.3681e-03 eta 12:38:02 +epoch [21/50] batch [945/1000] time 1.546 (1.565) data 0.001 (0.001) loss 0.9053 (1.1211) acc 78.1250 (72.0734) lr 1.3681e-03 eta 12:37:52 +epoch [21/50] batch [950/1000] time 1.560 (1.565) data 0.000 (0.001) loss 1.2754 (1.1213) acc 59.3750 (72.0625) lr 1.3681e-03 eta 12:37:44 +epoch [21/50] batch [955/1000] time 1.550 (1.565) data 0.000 (0.001) loss 1.2246 (1.1221) acc 65.6250 (72.0386) lr 1.3681e-03 eta 12:37:35 +epoch [21/50] batch [960/1000] time 1.575 (1.565) data 0.001 (0.001) loss 1.1406 (1.1226) acc 84.3750 (72.0410) lr 1.3681e-03 eta 12:37:25 +epoch [21/50] batch [965/1000] time 1.553 (1.565) data 0.001 (0.001) loss 1.2109 (1.1219) acc 65.6250 (72.0369) lr 1.3681e-03 eta 12:37:16 +epoch [21/50] batch [970/1000] time 1.561 (1.565) data 0.000 (0.001) loss 1.3096 (1.1225) acc 75.0000 (72.0425) lr 1.3681e-03 eta 12:37:06 +epoch [21/50] batch [975/1000] time 1.549 (1.565) data 0.001 (0.001) loss 1.2490 (1.1218) acc 75.0000 (72.0513) lr 1.3681e-03 eta 12:36:55 +epoch [21/50] batch [980/1000] time 1.564 (1.565) data 0.000 (0.001) loss 1.2041 (1.1226) acc 71.8750 (72.0568) lr 1.3681e-03 eta 12:36:50 +epoch [21/50] batch [985/1000] time 1.559 (1.565) data 0.001 (0.001) loss 0.6050 (1.1216) acc 84.3750 (72.0876) lr 1.3681e-03 eta 12:36:41 +epoch [21/50] batch [990/1000] time 1.562 (1.565) data 0.000 (0.001) loss 1.1621 (1.1210) acc 62.5000 (72.0802) lr 1.3681e-03 eta 12:36:33 +epoch [21/50] batch [995/1000] time 1.544 (1.565) data 0.000 (0.001) loss 0.8076 (1.1209) acc 68.7500 (72.0729) lr 1.3681e-03 eta 12:36:24 +epoch [21/50] batch [1000/1000] time 1.569 (1.565) data 0.000 (0.001) loss 0.8716 (1.1211) acc 78.1250 (72.0750) lr 1.3090e-03 eta 12:36:15 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,192 +* accuracy: 78.4% +* error: 21.6% +* macro_f1: 77.9% +epoch [22/50] batch [5/1000] time 1.560 (1.682) data 0.001 (0.176) loss 0.5220 (0.8425) acc 84.3750 (78.7500) lr 1.3090e-03 eta 13:32:43 +epoch [22/50] batch [10/1000] time 1.560 (1.618) data 0.000 (0.088) loss 0.6963 (0.9248) acc 75.0000 (76.8750) lr 1.3090e-03 eta 13:01:47 +epoch [22/50] batch [15/1000] time 1.549 (1.597) data 0.001 (0.059) loss 1.1230 (0.9644) acc 71.8750 (75.8333) lr 1.3090e-03 eta 12:51:40 +epoch [22/50] batch [20/1000] time 1.580 (1.604) data 0.001 (0.044) loss 1.6143 (1.0203) acc 56.2500 (74.0625) lr 1.3090e-03 eta 12:54:42 +epoch [22/50] batch [25/1000] time 1.565 (1.596) data 0.001 (0.036) loss 0.8335 (0.9738) acc 65.6250 (74.3750) lr 1.3090e-03 eta 12:50:42 +epoch [22/50] batch [30/1000] time 1.579 (1.591) data 0.001 (0.030) loss 0.7495 (0.9841) acc 78.1250 (74.0625) lr 1.3090e-03 eta 12:47:57 +epoch [22/50] batch [35/1000] time 1.546 (1.585) data 0.000 (0.026) loss 1.0879 (0.9650) acc 68.7500 (74.4643) lr 1.3090e-03 eta 12:45:02 +epoch [22/50] batch [40/1000] time 1.538 (1.580) data 0.000 (0.022) loss 0.9360 (0.9801) acc 81.2500 (74.4531) lr 1.3090e-03 eta 12:42:32 +epoch [22/50] batch [45/1000] time 1.557 (1.578) data 0.000 (0.020) loss 0.9565 (1.0074) acc 75.0000 (73.6111) lr 1.3090e-03 eta 12:41:19 +epoch [22/50] batch [50/1000] time 1.541 (1.575) data 0.001 (0.018) loss 0.7866 (0.9980) acc 71.8750 (74.1250) lr 1.3090e-03 eta 12:40:05 +epoch [22/50] batch [55/1000] time 1.568 (1.574) data 0.001 (0.016) loss 0.7520 (1.0096) acc 78.1250 (74.0909) lr 1.3090e-03 eta 12:39:05 +epoch [22/50] batch [60/1000] time 1.569 (1.572) data 0.001 (0.015) loss 0.9492 (1.0013) acc 71.8750 (74.3229) lr 1.3090e-03 eta 12:38:21 +epoch [22/50] batch [65/1000] time 1.559 (1.572) data 0.000 (0.014) loss 1.1592 (1.0153) acc 62.5000 (73.8942) lr 1.3090e-03 eta 12:38:01 +epoch [22/50] batch [70/1000] time 1.551 (1.571) data 0.001 (0.013) loss 0.4736 (1.0158) acc 87.5000 (74.0179) lr 1.3090e-03 eta 12:37:21 +epoch [22/50] batch [75/1000] time 1.539 (1.570) data 0.001 (0.012) loss 0.9941 (1.0124) acc 75.0000 (73.7917) lr 1.3090e-03 eta 12:36:47 +epoch [22/50] batch [80/1000] time 1.563 (1.572) data 0.001 (0.011) loss 0.4521 (1.0036) acc 87.5000 (74.1016) lr 1.3090e-03 eta 12:37:36 +epoch [22/50] batch [85/1000] time 1.548 (1.570) data 0.001 (0.011) loss 0.8330 (1.0025) acc 81.2500 (74.1176) lr 1.3090e-03 eta 12:36:48 +epoch [22/50] batch [90/1000] time 1.552 (1.570) data 0.001 (0.010) loss 1.1543 (1.0076) acc 68.7500 (73.9236) lr 1.3090e-03 eta 12:36:28 +epoch [22/50] batch [95/1000] time 1.559 (1.570) data 0.000 (0.010) loss 1.1738 (1.0106) acc 71.8750 (73.8487) lr 1.3090e-03 eta 12:36:15 +epoch [22/50] batch [100/1000] time 1.561 (1.569) data 0.000 (0.009) loss 0.9839 (1.0233) acc 71.8750 (73.7188) lr 1.3090e-03 eta 12:35:50 +epoch [22/50] batch [105/1000] time 1.536 (1.569) data 0.000 (0.009) loss 1.2520 (1.0354) acc 68.7500 (73.6905) lr 1.3090e-03 eta 12:35:38 +epoch [22/50] batch [110/1000] time 1.556 (1.569) data 0.001 (0.008) loss 0.7822 (1.0365) acc 78.1250 (73.7784) lr 1.3090e-03 eta 12:35:14 +epoch [22/50] batch [115/1000] time 1.560 (1.568) data 0.001 (0.008) loss 1.5029 (1.0388) acc 62.5000 (73.9130) lr 1.3090e-03 eta 12:34:52 +epoch [22/50] batch [120/1000] time 1.569 (1.568) data 0.001 (0.008) loss 1.1426 (1.0479) acc 75.0000 (73.6719) lr 1.3090e-03 eta 12:34:43 +epoch [22/50] batch [125/1000] time 1.572 (1.569) data 0.000 (0.008) loss 1.0156 (1.0538) acc 68.7500 (73.6250) lr 1.3090e-03 eta 12:35:19 +epoch [22/50] batch [130/1000] time 1.540 (1.569) data 0.000 (0.007) loss 0.4160 (1.0503) acc 90.6250 (73.7019) lr 1.3090e-03 eta 12:34:51 +epoch [22/50] batch [135/1000] time 1.573 (1.569) data 0.000 (0.007) loss 1.0059 (1.0560) acc 81.2500 (73.7269) lr 1.3090e-03 eta 12:34:39 +epoch [22/50] batch [140/1000] time 1.549 (1.569) data 0.001 (0.007) loss 0.8145 (1.0504) acc 84.3750 (73.8839) lr 1.3090e-03 eta 12:34:27 +epoch [22/50] batch [145/1000] time 1.540 (1.568) data 0.001 (0.007) loss 0.5015 (1.0500) acc 90.6250 (73.9009) lr 1.3090e-03 eta 12:34:11 +epoch [22/50] batch [150/1000] time 1.561 (1.568) data 0.000 (0.006) loss 1.2793 (1.0531) acc 59.3750 (73.7708) lr 1.3090e-03 eta 12:33:45 +epoch [22/50] batch [155/1000] time 1.554 (1.567) data 0.000 (0.006) loss 1.2197 (1.0542) acc 68.7500 (73.6694) lr 1.3090e-03 eta 12:33:23 +epoch [22/50] batch [160/1000] time 1.578 (1.567) data 0.001 (0.006) loss 0.9453 (1.0595) acc 68.7500 (73.4766) lr 1.3090e-03 eta 12:33:04 +epoch [22/50] batch [165/1000] time 1.564 (1.567) data 0.001 (0.006) loss 1.1670 (1.0696) acc 75.0000 (73.3523) lr 1.3090e-03 eta 12:32:55 +epoch [22/50] batch [170/1000] time 1.554 (1.567) data 0.000 (0.006) loss 0.9194 (1.0679) acc 78.1250 (73.3272) lr 1.3090e-03 eta 12:33:02 +epoch [22/50] batch [175/1000] time 1.547 (1.567) data 0.000 (0.006) loss 1.4502 (1.0689) acc 62.5000 (73.1786) lr 1.3090e-03 eta 12:32:42 +epoch [22/50] batch [180/1000] time 1.555 (1.566) data 0.000 (0.005) loss 0.6333 (1.0736) acc 90.6250 (73.1076) lr 1.3090e-03 eta 12:32:21 +epoch [22/50] batch [185/1000] time 1.560 (1.566) data 0.000 (0.005) loss 0.9727 (1.0706) acc 78.1250 (73.2264) lr 1.3090e-03 eta 12:32:07 +epoch [22/50] batch [190/1000] time 1.558 (1.566) data 0.000 (0.005) loss 1.3994 (1.0730) acc 71.8750 (73.2237) lr 1.3090e-03 eta 12:31:58 +epoch [22/50] batch [195/1000] time 1.582 (1.566) data 0.000 (0.005) loss 1.3652 (1.0738) acc 62.5000 (73.1571) lr 1.3090e-03 eta 12:31:51 +epoch [22/50] batch [200/1000] time 1.530 (1.566) data 0.000 (0.005) loss 0.9927 (1.0785) acc 75.0000 (73.0938) lr 1.3090e-03 eta 12:31:33 +epoch [22/50] batch [205/1000] time 1.563 (1.566) data 0.000 (0.005) loss 1.2363 (1.0826) acc 78.1250 (73.0945) lr 1.3090e-03 eta 12:31:25 +epoch [22/50] batch [210/1000] time 1.542 (1.566) data 0.000 (0.005) loss 1.4336 (1.0827) acc 65.6250 (73.1101) lr 1.3090e-03 eta 12:31:13 +epoch [22/50] batch [215/1000] time 1.556 (1.565) data 0.000 (0.005) loss 1.1133 (1.0794) acc 75.0000 (73.2122) lr 1.3090e-03 eta 12:31:00 +epoch [22/50] batch [220/1000] time 1.529 (1.565) data 0.001 (0.004) loss 1.7432 (1.0821) acc 62.5000 (73.1392) lr 1.3090e-03 eta 12:30:43 +epoch [22/50] batch [225/1000] time 1.549 (1.565) data 0.000 (0.004) loss 1.5684 (1.0830) acc 62.5000 (73.1806) lr 1.3090e-03 eta 12:30:37 +epoch [22/50] batch [230/1000] time 1.740 (1.566) data 0.001 (0.004) loss 0.9912 (1.0794) acc 71.8750 (73.3016) lr 1.3090e-03 eta 12:30:50 +epoch [22/50] batch [235/1000] time 1.554 (1.566) data 0.000 (0.004) loss 0.9399 (1.0837) acc 78.1250 (73.1649) lr 1.3090e-03 eta 12:30:38 +epoch [22/50] batch [240/1000] time 1.558 (1.566) data 0.000 (0.004) loss 1.2285 (1.0847) acc 62.5000 (73.0990) lr 1.3090e-03 eta 12:30:25 +epoch [22/50] batch [245/1000] time 1.537 (1.565) data 0.000 (0.004) loss 1.7109 (1.0882) acc 62.5000 (73.0740) lr 1.3090e-03 eta 12:30:05 +epoch [22/50] batch [250/1000] time 1.552 (1.565) data 0.000 (0.004) loss 1.3652 (1.0917) acc 68.7500 (73.0250) lr 1.3090e-03 eta 12:29:51 +epoch [22/50] batch [255/1000] time 1.556 (1.565) data 0.001 (0.004) loss 1.2354 (1.0941) acc 68.7500 (72.9289) lr 1.3090e-03 eta 12:29:35 +epoch [22/50] batch [260/1000] time 1.558 (1.564) data 0.000 (0.004) loss 1.0527 (1.0943) acc 68.7500 (72.8966) lr 1.3090e-03 eta 12:29:23 +epoch [22/50] batch [265/1000] time 1.551 (1.564) data 0.000 (0.004) loss 2.2930 (1.1001) acc 40.6250 (72.7358) lr 1.3090e-03 eta 12:29:07 +epoch [22/50] batch [270/1000] time 1.567 (1.564) data 0.000 (0.004) loss 1.3516 (1.1020) acc 62.5000 (72.6736) lr 1.3090e-03 eta 12:28:57 +epoch [22/50] batch [275/1000] time 1.691 (1.564) data 0.000 (0.004) loss 1.0107 (1.1000) acc 81.2500 (72.6932) lr 1.3090e-03 eta 12:28:54 +epoch [22/50] batch [280/1000] time 1.525 (1.564) data 0.000 (0.004) loss 1.2793 (1.1032) acc 65.6250 (72.6228) lr 1.3090e-03 eta 12:28:40 +epoch [22/50] batch [285/1000] time 1.573 (1.564) data 0.001 (0.004) loss 1.1904 (1.1032) acc 75.0000 (72.5877) lr 1.3090e-03 eta 12:28:29 +epoch [22/50] batch [290/1000] time 1.582 (1.564) data 0.001 (0.003) loss 1.4141 (1.1052) acc 56.2500 (72.5323) lr 1.3090e-03 eta 12:28:23 +epoch [22/50] batch [295/1000] time 1.559 (1.564) data 0.000 (0.003) loss 1.1553 (1.1082) acc 62.5000 (72.5318) lr 1.3090e-03 eta 12:28:14 +epoch [22/50] batch [300/1000] time 1.539 (1.564) data 0.001 (0.003) loss 0.7690 (1.1056) acc 78.1250 (72.6250) lr 1.3090e-03 eta 12:28:02 +epoch [22/50] batch [305/1000] time 1.561 (1.564) data 0.001 (0.003) loss 1.3857 (1.1024) acc 68.7500 (72.6947) lr 1.3090e-03 eta 12:27:52 +epoch [22/50] batch [310/1000] time 1.550 (1.564) data 0.001 (0.003) loss 1.4424 (1.1014) acc 65.6250 (72.7016) lr 1.3090e-03 eta 12:27:37 +epoch [22/50] batch [315/1000] time 1.568 (1.564) data 0.000 (0.003) loss 1.2900 (1.1012) acc 56.2500 (72.7183) lr 1.3090e-03 eta 12:27:32 +epoch [22/50] batch [320/1000] time 1.528 (1.564) data 0.000 (0.003) loss 1.3916 (1.1007) acc 78.1250 (72.7832) lr 1.3090e-03 eta 12:27:36 +epoch [22/50] batch [325/1000] time 1.569 (1.564) data 0.000 (0.003) loss 1.0596 (1.1005) acc 75.0000 (72.7981) lr 1.3090e-03 eta 12:27:24 +epoch [22/50] batch [330/1000] time 1.559 (1.564) data 0.000 (0.003) loss 0.9956 (1.0981) acc 78.1250 (72.8504) lr 1.3090e-03 eta 12:27:15 +epoch [22/50] batch [335/1000] time 1.546 (1.564) data 0.001 (0.003) loss 1.2725 (1.1022) acc 62.5000 (72.7519) lr 1.3090e-03 eta 12:27:07 +epoch [22/50] batch [340/1000] time 1.569 (1.564) data 0.000 (0.003) loss 0.7720 (1.1013) acc 71.8750 (72.7574) lr 1.3090e-03 eta 12:26:57 +epoch [22/50] batch [345/1000] time 1.541 (1.564) data 0.000 (0.003) loss 0.8525 (1.1015) acc 71.8750 (72.6902) lr 1.3090e-03 eta 12:26:45 +epoch [22/50] batch [350/1000] time 1.547 (1.564) data 0.000 (0.003) loss 0.4114 (1.0991) acc 87.5000 (72.7321) lr 1.3090e-03 eta 12:26:37 +epoch [22/50] batch [355/1000] time 1.542 (1.564) data 0.000 (0.003) loss 1.0215 (1.0957) acc 75.0000 (72.7993) lr 1.3090e-03 eta 12:26:33 +epoch [22/50] batch [360/1000] time 1.560 (1.564) data 0.000 (0.003) loss 1.0771 (1.0942) acc 65.6250 (72.7517) lr 1.3090e-03 eta 12:26:24 +epoch [22/50] batch [365/1000] time 1.576 (1.564) data 0.000 (0.003) loss 0.8013 (1.0915) acc 78.1250 (72.7911) lr 1.3090e-03 eta 12:26:13 +epoch [22/50] batch [370/1000] time 1.557 (1.563) data 0.001 (0.003) loss 0.6270 (1.0895) acc 71.8750 (72.8041) lr 1.3090e-03 eta 12:26:02 +epoch [22/50] batch [375/1000] time 1.565 (1.563) data 0.000 (0.003) loss 0.9834 (1.0905) acc 68.7500 (72.7917) lr 1.3090e-03 eta 12:25:51 +epoch [22/50] batch [380/1000] time 1.579 (1.564) data 0.001 (0.003) loss 0.9976 (1.0925) acc 71.8750 (72.7796) lr 1.3090e-03 eta 12:25:49 +epoch [22/50] batch [385/1000] time 1.567 (1.564) data 0.000 (0.003) loss 0.8315 (1.0922) acc 71.8750 (72.7597) lr 1.3090e-03 eta 12:25:50 +epoch [22/50] batch [390/1000] time 1.557 (1.564) data 0.000 (0.003) loss 0.5532 (1.0900) acc 84.3750 (72.7804) lr 1.3090e-03 eta 12:25:42 +epoch [22/50] batch [395/1000] time 1.555 (1.564) data 0.000 (0.003) loss 1.3750 (1.0907) acc 65.6250 (72.7453) lr 1.3090e-03 eta 12:25:32 +epoch [22/50] batch [400/1000] time 1.542 (1.564) data 0.000 (0.003) loss 1.2920 (1.0905) acc 71.8750 (72.7578) lr 1.3090e-03 eta 12:25:21 +epoch [22/50] batch [405/1000] time 1.552 (1.564) data 0.000 (0.003) loss 1.3896 (1.0925) acc 65.6250 (72.7083) lr 1.3090e-03 eta 12:25:11 +epoch [22/50] batch [410/1000] time 1.566 (1.564) data 0.001 (0.003) loss 1.2568 (1.0932) acc 75.0000 (72.7287) lr 1.3090e-03 eta 12:25:01 +epoch [22/50] batch [415/1000] time 1.554 (1.563) data 0.001 (0.003) loss 1.5312 (1.0924) acc 68.7500 (72.7334) lr 1.3090e-03 eta 12:24:49 +epoch [22/50] batch [420/1000] time 1.558 (1.563) data 0.000 (0.003) loss 0.6714 (1.0907) acc 81.2500 (72.7753) lr 1.3090e-03 eta 12:24:43 +epoch [22/50] batch [425/1000] time 1.559 (1.563) data 0.000 (0.003) loss 1.5264 (1.0919) acc 65.6250 (72.7574) lr 1.3090e-03 eta 12:24:33 +epoch [22/50] batch [430/1000] time 1.554 (1.564) data 0.000 (0.002) loss 0.9438 (1.0908) acc 75.0000 (72.7907) lr 1.3090e-03 eta 12:24:37 +epoch [22/50] batch [435/1000] time 1.558 (1.564) data 0.000 (0.002) loss 1.1396 (1.0912) acc 62.5000 (72.7874) lr 1.3090e-03 eta 12:24:28 +epoch [22/50] batch [440/1000] time 1.599 (1.564) data 0.000 (0.002) loss 1.0215 (1.0935) acc 71.8750 (72.7486) lr 1.3090e-03 eta 12:24:22 +epoch [22/50] batch [445/1000] time 1.543 (1.564) data 0.000 (0.002) loss 1.4189 (1.0984) acc 65.6250 (72.6334) lr 1.3090e-03 eta 12:24:12 +epoch [22/50] batch [450/1000] time 1.556 (1.564) data 0.000 (0.002) loss 1.3711 (1.0960) acc 53.1250 (72.6458) lr 1.3090e-03 eta 12:24:03 +epoch [22/50] batch [455/1000] time 1.554 (1.563) data 0.000 (0.002) loss 1.3047 (1.0959) acc 71.8750 (72.6442) lr 1.3090e-03 eta 12:23:47 +epoch [22/50] batch [460/1000] time 1.573 (1.563) data 0.000 (0.002) loss 0.8711 (1.0980) acc 75.0000 (72.5611) lr 1.3090e-03 eta 12:23:36 +epoch [22/50] batch [465/1000] time 1.554 (1.563) data 0.001 (0.002) loss 1.4658 (1.1005) acc 62.5000 (72.5067) lr 1.3090e-03 eta 12:23:25 +epoch [22/50] batch [470/1000] time 1.562 (1.563) data 0.000 (0.002) loss 1.6006 (1.1027) acc 68.7500 (72.4801) lr 1.3090e-03 eta 12:23:25 +epoch [22/50] batch [475/1000] time 1.571 (1.563) data 0.000 (0.002) loss 1.1475 (1.1022) acc 68.7500 (72.4474) lr 1.3090e-03 eta 12:23:15 +epoch [22/50] batch [480/1000] time 1.552 (1.563) data 0.000 (0.002) loss 0.9360 (1.1005) acc 68.7500 (72.4870) lr 1.3090e-03 eta 12:23:04 +epoch [22/50] batch [485/1000] time 1.557 (1.563) data 0.000 (0.002) loss 0.5913 (1.0986) acc 81.2500 (72.4936) lr 1.3090e-03 eta 12:22:54 +epoch [22/50] batch [490/1000] time 1.576 (1.563) data 0.001 (0.002) loss 1.1084 (1.1000) acc 68.7500 (72.4490) lr 1.3090e-03 eta 12:22:48 +epoch [22/50] batch [495/1000] time 1.538 (1.563) data 0.000 (0.002) loss 1.4482 (1.1027) acc 62.5000 (72.3674) lr 1.3090e-03 eta 12:22:37 +epoch [22/50] batch [500/1000] time 1.564 (1.563) data 0.001 (0.002) loss 1.0352 (1.1028) acc 68.7500 (72.3500) lr 1.3090e-03 eta 12:22:28 +epoch [22/50] batch [505/1000] time 1.537 (1.563) data 0.000 (0.002) loss 1.7344 (1.1034) acc 65.6250 (72.3144) lr 1.3090e-03 eta 12:22:20 +epoch [22/50] batch [510/1000] time 1.538 (1.563) data 0.000 (0.002) loss 1.4922 (1.1034) acc 65.6250 (72.3223) lr 1.3090e-03 eta 12:22:09 +epoch [22/50] batch [515/1000] time 1.545 (1.563) data 0.000 (0.002) loss 1.4443 (1.1022) acc 59.3750 (72.2998) lr 1.3090e-03 eta 12:21:57 +epoch [22/50] batch [520/1000] time 1.563 (1.563) data 0.000 (0.002) loss 1.5000 (1.1050) acc 62.5000 (72.2356) lr 1.3090e-03 eta 12:21:49 +epoch [22/50] batch [525/1000] time 1.582 (1.563) data 0.000 (0.002) loss 1.2568 (1.1057) acc 68.7500 (72.2143) lr 1.3090e-03 eta 12:21:45 +epoch [22/50] batch [530/1000] time 1.528 (1.563) data 0.001 (0.002) loss 1.5459 (1.1081) acc 59.3750 (72.1462) lr 1.3090e-03 eta 12:21:34 +epoch [22/50] batch [535/1000] time 1.550 (1.563) data 0.000 (0.002) loss 1.4922 (1.1084) acc 62.5000 (72.1612) lr 1.3090e-03 eta 12:21:30 +epoch [22/50] batch [540/1000] time 1.556 (1.563) data 0.000 (0.002) loss 1.3887 (1.1088) acc 65.6250 (72.1528) lr 1.3090e-03 eta 12:21:20 +epoch [22/50] batch [545/1000] time 1.535 (1.563) data 0.000 (0.002) loss 1.0059 (1.1075) acc 68.7500 (72.1789) lr 1.3090e-03 eta 12:21:09 +epoch [22/50] batch [550/1000] time 1.554 (1.563) data 0.000 (0.002) loss 1.4434 (1.1099) acc 62.5000 (72.1307) lr 1.3090e-03 eta 12:21:00 +epoch [22/50] batch [555/1000] time 1.564 (1.563) data 0.000 (0.002) loss 1.2197 (1.1092) acc 78.1250 (72.1734) lr 1.3090e-03 eta 12:20:52 +epoch [22/50] batch [560/1000] time 1.528 (1.563) data 0.000 (0.002) loss 1.1748 (1.1113) acc 75.0000 (72.1484) lr 1.3090e-03 eta 12:20:42 +epoch [22/50] batch [565/1000] time 1.541 (1.563) data 0.001 (0.002) loss 1.3477 (1.1113) acc 62.5000 (72.1184) lr 1.3090e-03 eta 12:20:33 +epoch [22/50] batch [570/1000] time 1.584 (1.563) data 0.001 (0.002) loss 0.6626 (1.1105) acc 87.5000 (72.1546) lr 1.3090e-03 eta 12:20:22 +epoch [22/50] batch [575/1000] time 1.545 (1.562) data 0.000 (0.002) loss 1.7139 (1.1103) acc 56.2500 (72.1467) lr 1.3090e-03 eta 12:20:11 +epoch [22/50] batch [580/1000] time 1.542 (1.563) data 0.000 (0.002) loss 1.0332 (1.1097) acc 75.0000 (72.1659) lr 1.3090e-03 eta 12:20:08 +epoch [22/50] batch [585/1000] time 1.560 (1.563) data 0.000 (0.002) loss 0.8682 (1.1095) acc 84.3750 (72.1688) lr 1.3090e-03 eta 12:20:01 +epoch [22/50] batch [590/1000] time 1.555 (1.563) data 0.001 (0.002) loss 1.1270 (1.1085) acc 62.5000 (72.1716) lr 1.3090e-03 eta 12:19:52 +epoch [22/50] batch [595/1000] time 1.544 (1.562) data 0.000 (0.002) loss 1.5195 (1.1094) acc 68.7500 (72.1324) lr 1.3090e-03 eta 12:19:41 +epoch [22/50] batch [600/1000] time 1.568 (1.562) data 0.001 (0.002) loss 0.8501 (1.1085) acc 71.8750 (72.1406) lr 1.3090e-03 eta 12:19:32 +epoch [22/50] batch [605/1000] time 1.560 (1.562) data 0.000 (0.002) loss 0.9043 (1.1092) acc 84.3750 (72.1436) lr 1.3090e-03 eta 12:19:23 +epoch [22/50] batch [610/1000] time 1.584 (1.562) data 0.000 (0.002) loss 1.0898 (1.1088) acc 78.1250 (72.1824) lr 1.3090e-03 eta 12:19:14 +epoch [22/50] batch [615/1000] time 1.567 (1.562) data 0.000 (0.002) loss 0.8081 (1.1074) acc 81.2500 (72.2104) lr 1.3090e-03 eta 12:19:08 +epoch [22/50] batch [620/1000] time 1.690 (1.563) data 0.000 (0.002) loss 1.3076 (1.1078) acc 68.7500 (72.1774) lr 1.3090e-03 eta 12:19:03 +epoch [22/50] batch [625/1000] time 1.564 (1.562) data 0.000 (0.002) loss 1.9795 (1.1087) acc 71.8750 (72.1800) lr 1.3090e-03 eta 12:18:54 +epoch [22/50] batch [630/1000] time 1.598 (1.562) data 0.001 (0.002) loss 1.7959 (1.1088) acc 56.2500 (72.1528) lr 1.3090e-03 eta 12:18:47 +epoch [22/50] batch [635/1000] time 1.563 (1.562) data 0.001 (0.002) loss 1.4141 (1.1098) acc 53.1250 (72.1260) lr 1.3090e-03 eta 12:18:38 +epoch [22/50] batch [640/1000] time 1.556 (1.562) data 0.000 (0.002) loss 1.1201 (1.1108) acc 68.7500 (72.0947) lr 1.3090e-03 eta 12:18:30 +epoch [22/50] batch [645/1000] time 1.574 (1.562) data 0.001 (0.002) loss 1.0850 (1.1107) acc 78.1250 (72.1124) lr 1.3090e-03 eta 12:18:21 +epoch [22/50] batch [650/1000] time 1.543 (1.562) data 0.001 (0.002) loss 0.9863 (1.1105) acc 78.1250 (72.1154) lr 1.3090e-03 eta 12:18:10 +epoch [22/50] batch [655/1000] time 1.571 (1.562) data 0.000 (0.002) loss 0.8833 (1.1116) acc 87.5000 (72.1422) lr 1.3090e-03 eta 12:17:59 +epoch [22/50] batch [660/1000] time 1.571 (1.562) data 0.000 (0.002) loss 2.0117 (1.1118) acc 53.1250 (72.1354) lr 1.3090e-03 eta 12:17:51 +epoch [22/50] batch [665/1000] time 1.545 (1.562) data 0.000 (0.002) loss 0.9834 (1.1125) acc 78.1250 (72.1053) lr 1.3090e-03 eta 12:17:38 +epoch [22/50] batch [670/1000] time 1.550 (1.562) data 0.001 (0.002) loss 1.4443 (1.1141) acc 68.7500 (72.0756) lr 1.3090e-03 eta 12:17:27 +epoch [22/50] batch [675/1000] time 1.557 (1.562) data 0.001 (0.002) loss 1.0508 (1.1131) acc 68.7500 (72.0833) lr 1.3090e-03 eta 12:17:17 +epoch [22/50] batch [680/1000] time 1.547 (1.562) data 0.000 (0.002) loss 1.5234 (1.1138) acc 56.2500 (72.0542) lr 1.3090e-03 eta 12:17:06 +epoch [22/50] batch [685/1000] time 1.562 (1.562) data 0.001 (0.002) loss 1.4463 (1.1140) acc 59.3750 (72.0529) lr 1.3090e-03 eta 12:17:03 +epoch [22/50] batch [690/1000] time 1.540 (1.562) data 0.000 (0.002) loss 1.0420 (1.1127) acc 75.0000 (72.0652) lr 1.3090e-03 eta 12:16:53 +epoch [22/50] batch [695/1000] time 1.547 (1.562) data 0.000 (0.002) loss 0.8506 (1.1142) acc 75.0000 (72.0279) lr 1.3090e-03 eta 12:16:44 +epoch [22/50] batch [700/1000] time 1.582 (1.562) data 0.000 (0.002) loss 1.5752 (1.1145) acc 59.3750 (72.0223) lr 1.3090e-03 eta 12:16:35 +epoch [22/50] batch [705/1000] time 1.553 (1.562) data 0.000 (0.002) loss 0.5615 (1.1139) acc 87.5000 (72.0612) lr 1.3090e-03 eta 12:16:24 +epoch [22/50] batch [710/1000] time 1.556 (1.561) data 0.001 (0.002) loss 0.8618 (1.1148) acc 87.5000 (72.0423) lr 1.3090e-03 eta 12:16:13 +epoch [22/50] batch [715/1000] time 1.566 (1.561) data 0.000 (0.002) loss 1.2139 (1.1146) acc 71.8750 (72.0586) lr 1.3090e-03 eta 12:16:03 +epoch [22/50] batch [720/1000] time 1.556 (1.561) data 0.000 (0.002) loss 1.1250 (1.1151) acc 68.7500 (72.0486) lr 1.3090e-03 eta 12:15:53 +epoch [22/50] batch [725/1000] time 1.567 (1.561) data 0.000 (0.002) loss 0.5542 (1.1133) acc 87.5000 (72.0948) lr 1.3090e-03 eta 12:15:44 +epoch [22/50] batch [730/1000] time 1.570 (1.561) data 0.000 (0.002) loss 1.0449 (1.1149) acc 81.2500 (72.0762) lr 1.3090e-03 eta 12:15:43 +epoch [22/50] batch [735/1000] time 1.580 (1.562) data 0.000 (0.002) loss 1.2334 (1.1175) acc 59.3750 (72.0238) lr 1.3090e-03 eta 12:15:37 +epoch [22/50] batch [740/1000] time 1.565 (1.561) data 0.000 (0.002) loss 1.0791 (1.1171) acc 81.2500 (72.0312) lr 1.3090e-03 eta 12:15:26 +epoch [22/50] batch [745/1000] time 1.549 (1.561) data 0.000 (0.002) loss 0.5176 (1.1161) acc 87.5000 (72.0680) lr 1.3090e-03 eta 12:15:18 +epoch [22/50] batch [750/1000] time 1.544 (1.561) data 0.000 (0.002) loss 1.1143 (1.1146) acc 68.7500 (72.1000) lr 1.3090e-03 eta 12:15:10 +epoch [22/50] batch [755/1000] time 1.562 (1.561) data 0.000 (0.002) loss 0.7583 (1.1153) acc 78.1250 (72.0820) lr 1.3090e-03 eta 12:15:01 +epoch [22/50] batch [760/1000] time 1.550 (1.561) data 0.000 (0.002) loss 1.0430 (1.1146) acc 81.2500 (72.1135) lr 1.3090e-03 eta 12:14:52 +epoch [22/50] batch [765/1000] time 1.538 (1.561) data 0.000 (0.002) loss 0.7744 (1.1143) acc 75.0000 (72.1405) lr 1.3090e-03 eta 12:14:42 +epoch [22/50] batch [770/1000] time 1.573 (1.561) data 0.000 (0.002) loss 1.4424 (1.1145) acc 68.7500 (72.1226) lr 1.3090e-03 eta 12:14:32 +epoch [22/50] batch [775/1000] time 1.563 (1.561) data 0.000 (0.002) loss 1.5596 (1.1144) acc 68.7500 (72.1250) lr 1.3090e-03 eta 12:14:27 +epoch [22/50] batch [780/1000] time 1.573 (1.561) data 0.001 (0.002) loss 1.3760 (1.1150) acc 71.8750 (72.1354) lr 1.3090e-03 eta 12:14:20 +epoch [22/50] batch [785/1000] time 1.541 (1.561) data 0.001 (0.002) loss 1.2646 (1.1154) acc 75.0000 (72.1298) lr 1.3090e-03 eta 12:14:09 +epoch [22/50] batch [790/1000] time 1.563 (1.561) data 0.000 (0.002) loss 0.8218 (1.1153) acc 81.2500 (72.1282) lr 1.3090e-03 eta 12:14:00 +epoch [22/50] batch [795/1000] time 1.556 (1.561) data 0.000 (0.002) loss 1.0107 (1.1152) acc 81.2500 (72.1462) lr 1.3090e-03 eta 12:13:50 +epoch [22/50] batch [800/1000] time 1.541 (1.561) data 0.000 (0.002) loss 1.4600 (1.1160) acc 62.5000 (72.1328) lr 1.3090e-03 eta 12:13:42 +epoch [22/50] batch [805/1000] time 1.546 (1.561) data 0.000 (0.002) loss 1.1445 (1.1161) acc 68.7500 (72.1234) lr 1.3090e-03 eta 12:13:34 +epoch [22/50] batch [810/1000] time 1.557 (1.561) data 0.000 (0.002) loss 0.8994 (1.1169) acc 75.0000 (72.1181) lr 1.3090e-03 eta 12:13:25 +epoch [22/50] batch [815/1000] time 1.569 (1.561) data 0.000 (0.002) loss 1.0918 (1.1166) acc 71.8750 (72.1166) lr 1.3090e-03 eta 12:13:20 +epoch [22/50] batch [820/1000] time 1.543 (1.561) data 0.000 (0.002) loss 1.1436 (1.1158) acc 78.1250 (72.1189) lr 1.3090e-03 eta 12:13:11 +epoch [22/50] batch [825/1000] time 1.570 (1.561) data 0.000 (0.002) loss 1.0957 (1.1165) acc 68.7500 (72.0985) lr 1.3090e-03 eta 12:13:04 +epoch [22/50] batch [830/1000] time 1.529 (1.561) data 0.001 (0.002) loss 0.9248 (1.1158) acc 71.8750 (72.0821) lr 1.3090e-03 eta 12:12:56 +epoch [22/50] batch [835/1000] time 1.562 (1.561) data 0.000 (0.002) loss 1.4658 (1.1163) acc 62.5000 (72.0771) lr 1.3090e-03 eta 12:12:51 +epoch [22/50] batch [840/1000] time 1.570 (1.561) data 0.000 (0.001) loss 0.8828 (1.1154) acc 84.3750 (72.0833) lr 1.3090e-03 eta 12:12:43 +epoch [22/50] batch [845/1000] time 1.547 (1.561) data 0.000 (0.001) loss 1.4014 (1.1163) acc 62.5000 (72.0599) lr 1.3090e-03 eta 12:12:35 +epoch [22/50] batch [850/1000] time 1.562 (1.561) data 0.000 (0.001) loss 0.9087 (1.1165) acc 81.2500 (72.0588) lr 1.3090e-03 eta 12:12:27 +epoch [22/50] batch [855/1000] time 1.545 (1.561) data 0.000 (0.001) loss 1.2197 (1.1170) acc 68.7500 (72.0687) lr 1.3090e-03 eta 12:12:19 +epoch [22/50] batch [860/1000] time 1.555 (1.561) data 0.001 (0.001) loss 1.0322 (1.1177) acc 71.8750 (72.0494) lr 1.3090e-03 eta 12:12:12 +epoch [22/50] batch [865/1000] time 1.567 (1.561) data 0.001 (0.001) loss 1.7109 (1.1192) acc 53.1250 (72.0340) lr 1.3090e-03 eta 12:12:06 +epoch [22/50] batch [870/1000] time 1.557 (1.561) data 0.000 (0.001) loss 1.6006 (1.1192) acc 65.6250 (72.0223) lr 1.3090e-03 eta 12:11:56 +epoch [22/50] batch [875/1000] time 1.541 (1.561) data 0.000 (0.001) loss 0.6519 (1.1175) acc 81.2500 (72.0643) lr 1.3090e-03 eta 12:11:46 +epoch [22/50] batch [880/1000] time 1.583 (1.561) data 0.000 (0.001) loss 1.0479 (1.1173) acc 71.8750 (72.0810) lr 1.3090e-03 eta 12:11:46 +epoch [22/50] batch [885/1000] time 1.583 (1.561) data 0.000 (0.001) loss 1.1836 (1.1175) acc 71.8750 (72.0763) lr 1.3090e-03 eta 12:11:39 +epoch [22/50] batch [890/1000] time 1.547 (1.561) data 0.000 (0.001) loss 1.5928 (1.1175) acc 68.7500 (72.0892) lr 1.3090e-03 eta 12:11:28 +epoch [22/50] batch [895/1000] time 1.548 (1.561) data 0.000 (0.001) loss 1.8086 (1.1188) acc 56.2500 (72.0426) lr 1.3090e-03 eta 12:11:19 +epoch [22/50] batch [900/1000] time 1.538 (1.561) data 0.001 (0.001) loss 1.7715 (1.1197) acc 59.3750 (72.0417) lr 1.3090e-03 eta 12:11:11 +epoch [22/50] batch [905/1000] time 1.550 (1.561) data 0.000 (0.001) loss 0.7778 (1.1209) acc 81.2500 (72.0373) lr 1.3090e-03 eta 12:11:02 +epoch [22/50] batch [910/1000] time 1.526 (1.561) data 0.000 (0.001) loss 0.9390 (1.1217) acc 78.1250 (72.0364) lr 1.3090e-03 eta 12:10:51 +epoch [22/50] batch [915/1000] time 1.571 (1.561) data 0.000 (0.001) loss 0.7173 (1.1225) acc 84.3750 (72.0287) lr 1.3090e-03 eta 12:10:45 +epoch [22/50] batch [920/1000] time 1.556 (1.561) data 0.000 (0.001) loss 1.4424 (1.1225) acc 75.0000 (72.0211) lr 1.3090e-03 eta 12:10:38 +epoch [22/50] batch [925/1000] time 1.558 (1.561) data 0.000 (0.001) loss 1.1523 (1.1219) acc 75.0000 (72.0304) lr 1.3090e-03 eta 12:10:34 +epoch [22/50] batch [930/1000] time 1.576 (1.561) data 0.001 (0.001) loss 1.1299 (1.1221) acc 75.0000 (72.0296) lr 1.3090e-03 eta 12:10:25 +epoch [22/50] batch [935/1000] time 1.546 (1.561) data 0.001 (0.001) loss 1.0029 (1.1214) acc 78.1250 (72.0321) lr 1.3090e-03 eta 12:10:16 +epoch [22/50] batch [940/1000] time 1.548 (1.561) data 0.001 (0.001) loss 1.5234 (1.1215) acc 59.3750 (72.0246) lr 1.3090e-03 eta 12:10:06 +epoch [22/50] batch [945/1000] time 1.564 (1.561) data 0.000 (0.001) loss 0.6396 (1.1211) acc 87.5000 (72.0337) lr 1.3090e-03 eta 12:09:57 +epoch [22/50] batch [950/1000] time 1.550 (1.561) data 0.001 (0.001) loss 1.0635 (1.1216) acc 71.8750 (72.0197) lr 1.3090e-03 eta 12:09:49 +epoch [22/50] batch [955/1000] time 1.547 (1.561) data 0.001 (0.001) loss 0.7891 (1.1211) acc 90.6250 (72.0419) lr 1.3090e-03 eta 12:09:41 +epoch [22/50] batch [960/1000] time 1.546 (1.561) data 0.000 (0.001) loss 1.1221 (1.1217) acc 68.7500 (72.0345) lr 1.3090e-03 eta 12:09:33 +epoch [22/50] batch [965/1000] time 1.536 (1.561) data 0.001 (0.001) loss 0.6318 (1.1206) acc 90.6250 (72.0628) lr 1.3090e-03 eta 12:09:24 +epoch [22/50] batch [970/1000] time 1.559 (1.561) data 0.000 (0.001) loss 0.9053 (1.1190) acc 75.0000 (72.0973) lr 1.3090e-03 eta 12:09:16 +epoch [22/50] batch [975/1000] time 1.559 (1.561) data 0.000 (0.001) loss 0.9165 (1.1193) acc 65.6250 (72.0897) lr 1.3090e-03 eta 12:09:07 +epoch [22/50] batch [980/1000] time 1.555 (1.561) data 0.000 (0.001) loss 1.4883 (1.1207) acc 59.3750 (72.0568) lr 1.3090e-03 eta 12:08:58 +epoch [22/50] batch [985/1000] time 1.704 (1.561) data 0.001 (0.001) loss 1.2910 (1.1203) acc 65.6250 (72.0495) lr 1.3090e-03 eta 12:08:53 +epoch [22/50] batch [990/1000] time 1.546 (1.561) data 0.000 (0.001) loss 1.3252 (1.1207) acc 59.3750 (72.0328) lr 1.3090e-03 eta 12:08:44 +epoch [22/50] batch [995/1000] time 1.559 (1.561) data 0.000 (0.001) loss 1.1338 (1.1212) acc 71.8750 (72.0195) lr 1.3090e-03 eta 12:08:35 +epoch [22/50] batch [1000/1000] time 1.552 (1.561) data 0.000 (0.001) loss 1.5049 (1.1211) acc 62.5000 (72.0219) lr 1.2487e-03 eta 12:08:26 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,222 +* accuracy: 78.4% +* error: 21.6% +* macro_f1: 78.0% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [23/50] batch [5/1000] time 1.546 (1.692) data 0.000 (0.194) loss 1.4141 (1.2255) acc 68.7500 (71.8750) lr 1.2487e-03 eta 13:09:14 +epoch [23/50] batch [10/1000] time 1.561 (1.646) data 0.000 (0.097) loss 0.8633 (1.1249) acc 71.8750 (71.8750) lr 1.2487e-03 eta 12:47:42 +epoch [23/50] batch [15/1000] time 1.545 (1.617) data 0.000 (0.065) loss 0.8970 (1.0840) acc 75.0000 (72.7083) lr 1.2487e-03 eta 12:34:02 +epoch [23/50] batch [20/1000] time 1.577 (1.604) data 0.000 (0.049) loss 0.9395 (1.0400) acc 75.0000 (73.7500) lr 1.2487e-03 eta 12:27:48 +epoch [23/50] batch [25/1000] time 1.561 (1.595) data 0.000 (0.039) loss 1.6387 (1.0865) acc 62.5000 (73.2500) lr 1.2487e-03 eta 12:23:39 +epoch [23/50] batch [30/1000] time 1.557 (1.589) data 0.000 (0.033) loss 1.2188 (1.0666) acc 71.8750 (73.5417) lr 1.2487e-03 eta 12:20:31 +epoch [23/50] batch [35/1000] time 1.552 (1.583) data 0.001 (0.028) loss 1.3203 (1.0529) acc 65.6250 (74.1964) lr 1.2487e-03 eta 12:17:40 +epoch [23/50] batch [40/1000] time 1.545 (1.579) data 0.001 (0.025) loss 0.9458 (1.0740) acc 75.0000 (73.9062) lr 1.2487e-03 eta 12:15:52 +epoch [23/50] batch [45/1000] time 1.569 (1.577) data 0.000 (0.022) loss 1.2666 (1.0773) acc 65.6250 (73.5417) lr 1.2487e-03 eta 12:14:50 +epoch [23/50] batch [50/1000] time 1.587 (1.577) data 0.000 (0.020) loss 0.9897 (1.0648) acc 71.8750 (73.5625) lr 1.2487e-03 eta 12:14:42 +epoch [23/50] batch [55/1000] time 1.574 (1.579) data 0.001 (0.018) loss 1.4053 (1.0602) acc 56.2500 (73.5795) lr 1.2487e-03 eta 12:15:28 +epoch [23/50] batch [60/1000] time 1.580 (1.578) data 0.001 (0.017) loss 0.9648 (1.0643) acc 71.8750 (73.0729) lr 1.2487e-03 eta 12:15:01 +epoch [23/50] batch [65/1000] time 1.572 (1.577) data 0.000 (0.015) loss 0.9541 (1.0589) acc 84.3750 (73.0769) lr 1.2487e-03 eta 12:14:14 +epoch [23/50] batch [70/1000] time 1.552 (1.576) data 0.000 (0.014) loss 0.9106 (1.0497) acc 75.0000 (73.3929) lr 1.2487e-03 eta 12:13:26 +epoch [23/50] batch [75/1000] time 1.570 (1.575) data 0.000 (0.013) loss 0.6836 (1.0565) acc 81.2500 (73.1667) lr 1.2487e-03 eta 12:13:05 +epoch [23/50] batch [80/1000] time 1.544 (1.574) data 0.000 (0.013) loss 0.9561 (1.0455) acc 68.7500 (73.0859) lr 1.2487e-03 eta 12:12:19 +epoch [23/50] batch [85/1000] time 1.535 (1.573) data 0.001 (0.012) loss 1.2959 (1.0441) acc 71.8750 (73.1618) lr 1.2487e-03 eta 12:11:52 +epoch [23/50] batch [90/1000] time 1.568 (1.573) data 0.001 (0.011) loss 1.1045 (1.0539) acc 65.6250 (73.0556) lr 1.2487e-03 eta 12:11:28 +epoch [23/50] batch [95/1000] time 1.566 (1.572) data 0.000 (0.011) loss 1.3779 (1.0604) acc 65.6250 (72.6974) lr 1.2487e-03 eta 12:11:10 +epoch [23/50] batch [100/1000] time 1.565 (1.572) data 0.000 (0.010) loss 1.0713 (1.0783) acc 75.0000 (72.4375) lr 1.2487e-03 eta 12:10:53 +epoch [23/50] batch [105/1000] time 1.545 (1.571) data 0.000 (0.010) loss 1.2168 (1.0765) acc 71.8750 (72.5595) lr 1.2487e-03 eta 12:10:30 +epoch [23/50] batch [110/1000] time 1.548 (1.571) data 0.001 (0.009) loss 0.7041 (1.0707) acc 78.1250 (72.6705) lr 1.2487e-03 eta 12:10:01 +epoch [23/50] batch [115/1000] time 1.723 (1.572) data 0.001 (0.009) loss 1.0693 (1.0630) acc 71.8750 (72.6902) lr 1.2487e-03 eta 12:10:40 +epoch [23/50] batch [120/1000] time 1.557 (1.572) data 0.001 (0.009) loss 0.6104 (1.0534) acc 81.2500 (72.9167) lr 1.2487e-03 eta 12:10:23 +epoch [23/50] batch [125/1000] time 1.560 (1.571) data 0.001 (0.008) loss 1.2988 (1.0598) acc 68.7500 (72.8000) lr 1.2487e-03 eta 12:09:56 +epoch [23/50] batch [130/1000] time 1.547 (1.571) data 0.000 (0.008) loss 1.3838 (1.0553) acc 75.0000 (72.8846) lr 1.2487e-03 eta 12:09:35 +epoch [23/50] batch [135/1000] time 1.535 (1.570) data 0.000 (0.008) loss 1.0889 (1.0522) acc 75.0000 (73.0787) lr 1.2487e-03 eta 12:09:02 +epoch [23/50] batch [140/1000] time 1.549 (1.569) data 0.000 (0.007) loss 0.8691 (1.0529) acc 71.8750 (72.9911) lr 1.2487e-03 eta 12:08:44 +epoch [23/50] batch [145/1000] time 1.566 (1.569) data 0.000 (0.007) loss 0.9561 (1.0602) acc 81.2500 (72.8664) lr 1.2487e-03 eta 12:08:27 +epoch [23/50] batch [150/1000] time 1.562 (1.569) data 0.000 (0.007) loss 0.7212 (1.0609) acc 75.0000 (72.8542) lr 1.2487e-03 eta 12:08:12 +epoch [23/50] batch [155/1000] time 1.562 (1.569) data 0.000 (0.007) loss 1.1641 (1.0640) acc 62.5000 (72.8226) lr 1.2487e-03 eta 12:08:00 +epoch [23/50] batch [160/1000] time 1.722 (1.569) data 0.000 (0.007) loss 0.9272 (1.0682) acc 75.0000 (72.8125) lr 1.2487e-03 eta 12:08:12 +epoch [23/50] batch [165/1000] time 1.579 (1.569) data 0.000 (0.006) loss 0.7148 (1.0629) acc 78.1250 (72.8409) lr 1.2487e-03 eta 12:07:58 +epoch [23/50] batch [170/1000] time 1.538 (1.568) data 0.000 (0.006) loss 1.5566 (1.0734) acc 62.5000 (72.5551) lr 1.2487e-03 eta 12:07:30 +epoch [23/50] batch [175/1000] time 1.542 (1.568) data 0.001 (0.006) loss 0.8726 (1.0742) acc 81.2500 (72.5893) lr 1.2487e-03 eta 12:07:19 +epoch [23/50] batch [180/1000] time 1.550 (1.568) data 0.000 (0.006) loss 0.9404 (1.0727) acc 75.0000 (72.6910) lr 1.2487e-03 eta 12:07:08 +epoch [23/50] batch [185/1000] time 1.560 (1.568) data 0.000 (0.006) loss 0.8579 (1.0710) acc 81.2500 (72.8041) lr 1.2487e-03 eta 12:06:59 +epoch [23/50] batch [190/1000] time 1.552 (1.568) data 0.001 (0.006) loss 0.9575 (1.0669) acc 68.7500 (72.8125) lr 1.2487e-03 eta 12:06:33 +epoch [23/50] batch [195/1000] time 1.526 (1.567) data 0.001 (0.005) loss 0.9707 (1.0757) acc 75.0000 (72.5801) lr 1.2487e-03 eta 12:06:07 +epoch [23/50] batch [200/1000] time 1.548 (1.567) data 0.000 (0.005) loss 1.2236 (1.0770) acc 62.5000 (72.5469) lr 1.2487e-03 eta 12:05:48 +epoch [23/50] batch [205/1000] time 1.547 (1.567) data 0.000 (0.005) loss 0.8442 (1.0827) acc 75.0000 (72.5152) lr 1.2487e-03 eta 12:05:50 +epoch [23/50] batch [210/1000] time 1.551 (1.567) data 0.000 (0.005) loss 1.3604 (1.0873) acc 65.6250 (72.3661) lr 1.2487e-03 eta 12:05:36 +epoch [23/50] batch [215/1000] time 1.559 (1.567) data 0.001 (0.005) loss 1.3398 (1.0885) acc 75.0000 (72.2674) lr 1.2487e-03 eta 12:05:28 +epoch [23/50] batch [220/1000] time 1.558 (1.566) data 0.001 (0.005) loss 0.8394 (1.0880) acc 81.2500 (72.3011) lr 1.2487e-03 eta 12:05:17 +epoch [23/50] batch [225/1000] time 1.549 (1.566) data 0.000 (0.005) loss 0.9297 (1.0871) acc 68.7500 (72.3056) lr 1.2487e-03 eta 12:05:06 +epoch [23/50] batch [230/1000] time 1.558 (1.566) data 0.000 (0.005) loss 0.9844 (1.0848) acc 71.8750 (72.2147) lr 1.2487e-03 eta 12:05:01 +epoch [23/50] batch [235/1000] time 1.572 (1.566) data 0.000 (0.005) loss 1.2168 (1.0848) acc 65.6250 (72.1809) lr 1.2487e-03 eta 12:04:51 +epoch [23/50] batch [240/1000] time 1.550 (1.566) data 0.001 (0.004) loss 0.7920 (1.0840) acc 78.1250 (72.2526) lr 1.2487e-03 eta 12:04:40 +epoch [23/50] batch [245/1000] time 1.544 (1.566) data 0.001 (0.004) loss 1.6348 (1.0884) acc 62.5000 (72.1811) lr 1.2487e-03 eta 12:04:28 +epoch [23/50] batch [250/1000] time 1.564 (1.566) data 0.001 (0.004) loss 0.8804 (1.0876) acc 75.0000 (72.1875) lr 1.2487e-03 eta 12:04:15 +epoch [23/50] batch [255/1000] time 1.570 (1.566) data 0.000 (0.004) loss 0.6245 (1.0877) acc 81.2500 (72.1936) lr 1.2487e-03 eta 12:04:07 +epoch [23/50] batch [260/1000] time 1.566 (1.566) data 0.000 (0.004) loss 0.9043 (1.0880) acc 78.1250 (72.2596) lr 1.2487e-03 eta 12:03:52 +epoch [23/50] batch [265/1000] time 1.558 (1.565) data 0.001 (0.004) loss 0.8447 (1.0858) acc 78.1250 (72.3585) lr 1.2487e-03 eta 12:03:39 +epoch [23/50] batch [270/1000] time 1.537 (1.566) data 0.000 (0.004) loss 1.0791 (1.0864) acc 68.7500 (72.3495) lr 1.2487e-03 eta 12:03:43 +epoch [23/50] batch [275/1000] time 1.579 (1.566) data 0.000 (0.004) loss 0.7900 (1.0853) acc 87.5000 (72.4205) lr 1.2487e-03 eta 12:03:33 +epoch [23/50] batch [280/1000] time 1.556 (1.566) data 0.001 (0.004) loss 0.7222 (1.0834) acc 87.5000 (72.4665) lr 1.2487e-03 eta 12:03:21 +epoch [23/50] batch [285/1000] time 1.571 (1.566) data 0.000 (0.004) loss 0.8101 (1.0820) acc 84.3750 (72.5548) lr 1.2487e-03 eta 12:03:15 +epoch [23/50] batch [290/1000] time 1.572 (1.566) data 0.001 (0.004) loss 0.9316 (1.0844) acc 68.7500 (72.5323) lr 1.2487e-03 eta 12:03:02 +epoch [23/50] batch [295/1000] time 1.560 (1.566) data 0.000 (0.004) loss 1.0869 (1.0861) acc 68.7500 (72.5318) lr 1.2487e-03 eta 12:02:58 +epoch [23/50] batch [300/1000] time 1.592 (1.566) data 0.000 (0.004) loss 1.0312 (1.0851) acc 78.1250 (72.5729) lr 1.2487e-03 eta 12:02:52 +epoch [23/50] batch [305/1000] time 1.560 (1.566) data 0.000 (0.004) loss 1.6348 (1.0857) acc 68.7500 (72.6127) lr 1.2487e-03 eta 12:02:43 +epoch [23/50] batch [310/1000] time 1.538 (1.566) data 0.001 (0.004) loss 2.0273 (1.0887) acc 59.3750 (72.5706) lr 1.2487e-03 eta 12:02:30 +epoch [23/50] batch [315/1000] time 1.553 (1.566) data 0.000 (0.004) loss 0.9004 (1.0909) acc 78.1250 (72.5000) lr 1.2487e-03 eta 12:02:34 +epoch [23/50] batch [320/1000] time 1.556 (1.566) data 0.000 (0.003) loss 0.8154 (1.0865) acc 78.1250 (72.6270) lr 1.2487e-03 eta 12:02:21 +epoch [23/50] batch [325/1000] time 1.556 (1.566) data 0.000 (0.003) loss 1.0693 (1.0871) acc 71.8750 (72.5769) lr 1.2487e-03 eta 12:02:16 +epoch [23/50] batch [330/1000] time 1.557 (1.566) data 0.001 (0.003) loss 1.0039 (1.0853) acc 68.7500 (72.5473) lr 1.2487e-03 eta 12:02:04 +epoch [23/50] batch [335/1000] time 1.532 (1.565) data 0.000 (0.003) loss 1.2617 (1.0893) acc 71.8750 (72.5187) lr 1.2487e-03 eta 12:01:49 +epoch [23/50] batch [340/1000] time 1.567 (1.565) data 0.000 (0.003) loss 0.7534 (1.0903) acc 78.1250 (72.5184) lr 1.2487e-03 eta 12:01:37 +epoch [23/50] batch [345/1000] time 1.593 (1.565) data 0.000 (0.003) loss 1.1074 (1.0892) acc 75.0000 (72.5181) lr 1.2487e-03 eta 12:01:28 +epoch [23/50] batch [350/1000] time 1.549 (1.565) data 0.001 (0.003) loss 0.8569 (1.0909) acc 78.1250 (72.5089) lr 1.2487e-03 eta 12:01:16 +epoch [23/50] batch [355/1000] time 1.573 (1.566) data 0.000 (0.003) loss 1.6816 (1.0948) acc 53.1250 (72.4120) lr 1.2487e-03 eta 12:01:21 +epoch [23/50] batch [360/1000] time 1.563 (1.566) data 0.000 (0.003) loss 1.0693 (1.0987) acc 59.3750 (72.2656) lr 1.2487e-03 eta 12:01:15 +epoch [23/50] batch [365/1000] time 1.575 (1.566) data 0.000 (0.003) loss 0.9141 (1.0971) acc 78.1250 (72.2860) lr 1.2487e-03 eta 12:01:05 +epoch [23/50] batch [370/1000] time 1.553 (1.565) data 0.001 (0.003) loss 1.3604 (1.0972) acc 71.8750 (72.2804) lr 1.2487e-03 eta 12:00:53 +epoch [23/50] batch [375/1000] time 1.569 (1.565) data 0.000 (0.003) loss 0.7646 (1.0964) acc 71.8750 (72.2750) lr 1.2487e-03 eta 12:00:43 +epoch [23/50] batch [380/1000] time 1.575 (1.565) data 0.000 (0.003) loss 1.9180 (1.1010) acc 62.5000 (72.1875) lr 1.2487e-03 eta 12:00:34 +epoch [23/50] batch [385/1000] time 1.570 (1.565) data 0.000 (0.003) loss 0.3682 (1.0990) acc 93.7500 (72.2240) lr 1.2487e-03 eta 12:00:21 +epoch [23/50] batch [390/1000] time 1.560 (1.565) data 0.000 (0.003) loss 1.3164 (1.0980) acc 65.6250 (72.1955) lr 1.2487e-03 eta 12:00:09 +epoch [23/50] batch [395/1000] time 1.550 (1.565) data 0.000 (0.003) loss 0.8643 (1.0965) acc 78.1250 (72.2389) lr 1.2487e-03 eta 12:00:00 +epoch [23/50] batch [400/1000] time 1.566 (1.565) data 0.000 (0.003) loss 1.3242 (1.0967) acc 75.0000 (72.2891) lr 1.2487e-03 eta 11:59:54 +epoch [23/50] batch [405/1000] time 1.535 (1.565) data 0.000 (0.003) loss 1.1309 (1.0980) acc 62.5000 (72.2685) lr 1.2487e-03 eta 11:59:43 +epoch [23/50] batch [410/1000] time 1.549 (1.565) data 0.001 (0.003) loss 0.7681 (1.0969) acc 75.0000 (72.2866) lr 1.2487e-03 eta 11:59:32 +epoch [23/50] batch [415/1000] time 1.559 (1.565) data 0.000 (0.003) loss 0.4709 (1.0927) acc 84.3750 (72.4096) lr 1.2487e-03 eta 11:59:22 +epoch [23/50] batch [420/1000] time 1.554 (1.565) data 0.000 (0.003) loss 1.0137 (1.0939) acc 78.1250 (72.4256) lr 1.2487e-03 eta 11:59:22 +epoch [23/50] batch [425/1000] time 1.546 (1.565) data 0.000 (0.003) loss 1.4170 (1.0950) acc 81.2500 (72.4044) lr 1.2487e-03 eta 11:59:10 +epoch [23/50] batch [430/1000] time 1.541 (1.565) data 0.000 (0.003) loss 0.9712 (1.0943) acc 71.8750 (72.4201) lr 1.2487e-03 eta 11:58:57 +epoch [23/50] batch [435/1000] time 1.552 (1.565) data 0.001 (0.003) loss 0.7324 (1.0935) acc 75.0000 (72.4425) lr 1.2487e-03 eta 11:58:48 +epoch [23/50] batch [440/1000] time 1.565 (1.565) data 0.001 (0.003) loss 1.1348 (1.0937) acc 71.8750 (72.4219) lr 1.2487e-03 eta 11:58:38 +epoch [23/50] batch [445/1000] time 1.575 (1.565) data 0.000 (0.003) loss 1.3115 (1.0934) acc 75.0000 (72.4298) lr 1.2487e-03 eta 11:58:32 +epoch [23/50] batch [450/1000] time 1.548 (1.564) data 0.001 (0.003) loss 1.6221 (1.0943) acc 50.0000 (72.3819) lr 1.2487e-03 eta 11:58:21 +epoch [23/50] batch [455/1000] time 1.568 (1.564) data 0.001 (0.003) loss 1.7852 (1.0989) acc 56.2500 (72.2459) lr 1.2487e-03 eta 11:58:13 +epoch [23/50] batch [460/1000] time 1.579 (1.565) data 0.000 (0.003) loss 0.8418 (1.0978) acc 87.5000 (72.2826) lr 1.2487e-03 eta 11:58:07 +epoch [23/50] batch [465/1000] time 1.582 (1.565) data 0.001 (0.003) loss 1.8203 (1.1007) acc 62.5000 (72.2446) lr 1.2487e-03 eta 11:58:10 +epoch [23/50] batch [470/1000] time 1.557 (1.565) data 0.001 (0.003) loss 1.3711 (1.1013) acc 68.7500 (72.2340) lr 1.2487e-03 eta 11:58:00 +epoch [23/50] batch [475/1000] time 1.552 (1.565) data 0.000 (0.003) loss 1.0742 (1.1024) acc 71.8750 (72.1908) lr 1.2487e-03 eta 11:57:50 +epoch [23/50] batch [480/1000] time 1.551 (1.565) data 0.000 (0.002) loss 1.8867 (1.1037) acc 68.7500 (72.2201) lr 1.2487e-03 eta 11:57:38 +epoch [23/50] batch [485/1000] time 1.535 (1.564) data 0.001 (0.002) loss 0.5337 (1.1032) acc 81.2500 (72.2423) lr 1.2487e-03 eta 11:57:24 +epoch [23/50] batch [490/1000] time 1.544 (1.564) data 0.000 (0.002) loss 1.2949 (1.1030) acc 68.7500 (72.2577) lr 1.2487e-03 eta 11:57:14 +epoch [23/50] batch [495/1000] time 1.560 (1.564) data 0.001 (0.002) loss 0.5532 (1.1002) acc 87.5000 (72.3295) lr 1.2487e-03 eta 11:57:02 +epoch [23/50] batch [500/1000] time 1.564 (1.564) data 0.001 (0.002) loss 1.0322 (1.0997) acc 78.1250 (72.3625) lr 1.2487e-03 eta 11:56:53 +epoch [23/50] batch [505/1000] time 1.703 (1.564) data 0.001 (0.002) loss 1.0059 (1.1007) acc 68.7500 (72.3205) lr 1.2487e-03 eta 11:56:50 +epoch [23/50] batch [510/1000] time 1.566 (1.564) data 0.001 (0.002) loss 0.8447 (1.1009) acc 87.5000 (72.3468) lr 1.2487e-03 eta 11:56:38 +epoch [23/50] batch [515/1000] time 1.567 (1.564) data 0.000 (0.002) loss 1.1895 (1.1007) acc 71.8750 (72.3726) lr 1.2487e-03 eta 11:56:31 +epoch [23/50] batch [520/1000] time 1.542 (1.564) data 0.001 (0.002) loss 2.0625 (1.1007) acc 59.3750 (72.3858) lr 1.2487e-03 eta 11:56:20 +epoch [23/50] batch [525/1000] time 1.549 (1.564) data 0.001 (0.002) loss 0.7998 (1.1005) acc 65.6250 (72.3571) lr 1.2487e-03 eta 11:56:08 +epoch [23/50] batch [530/1000] time 1.532 (1.564) data 0.000 (0.002) loss 1.1504 (1.1003) acc 71.8750 (72.3703) lr 1.2487e-03 eta 11:56:01 +epoch [23/50] batch [535/1000] time 1.567 (1.564) data 0.000 (0.002) loss 0.8354 (1.1005) acc 75.0000 (72.4007) lr 1.2487e-03 eta 11:55:53 +epoch [23/50] batch [540/1000] time 1.552 (1.564) data 0.000 (0.002) loss 1.1904 (1.1011) acc 68.7500 (72.4016) lr 1.2487e-03 eta 11:55:42 +epoch [23/50] batch [545/1000] time 1.558 (1.564) data 0.001 (0.002) loss 1.0469 (1.1025) acc 68.7500 (72.3509) lr 1.2487e-03 eta 11:55:30 +epoch [23/50] batch [550/1000] time 1.541 (1.564) data 0.000 (0.002) loss 0.9653 (1.1027) acc 68.7500 (72.3409) lr 1.2487e-03 eta 11:55:20 +epoch [23/50] batch [555/1000] time 1.542 (1.563) data 0.000 (0.002) loss 1.3213 (1.1019) acc 65.6250 (72.3311) lr 1.2487e-03 eta 11:55:07 +epoch [23/50] batch [560/1000] time 1.563 (1.563) data 0.001 (0.002) loss 1.0420 (1.1023) acc 65.6250 (72.2935) lr 1.2487e-03 eta 11:54:57 +epoch [23/50] batch [565/1000] time 1.551 (1.563) data 0.001 (0.002) loss 0.3606 (1.1016) acc 93.7500 (72.3341) lr 1.2487e-03 eta 11:54:46 +epoch [23/50] batch [570/1000] time 1.543 (1.563) data 0.000 (0.002) loss 0.7686 (1.1009) acc 78.1250 (72.3191) lr 1.2487e-03 eta 11:54:45 +epoch [23/50] batch [575/1000] time 1.550 (1.563) data 0.000 (0.002) loss 1.1123 (1.1029) acc 71.8750 (72.2935) lr 1.2487e-03 eta 11:54:34 +epoch [23/50] batch [580/1000] time 1.552 (1.563) data 0.000 (0.002) loss 0.8354 (1.1031) acc 87.5000 (72.2899) lr 1.2487e-03 eta 11:54:24 +epoch [23/50] batch [585/1000] time 1.556 (1.563) data 0.001 (0.002) loss 0.8423 (1.1020) acc 59.3750 (72.2756) lr 1.2487e-03 eta 11:54:14 +epoch [23/50] batch [590/1000] time 1.584 (1.563) data 0.000 (0.002) loss 0.8120 (1.1015) acc 78.1250 (72.2669) lr 1.2487e-03 eta 11:54:05 +epoch [23/50] batch [595/1000] time 1.543 (1.563) data 0.000 (0.002) loss 0.5439 (1.0995) acc 87.5000 (72.3214) lr 1.2487e-03 eta 11:53:56 +epoch [23/50] batch [600/1000] time 1.597 (1.563) data 0.001 (0.002) loss 1.1484 (1.0999) acc 71.8750 (72.3125) lr 1.2487e-03 eta 11:53:49 +epoch [23/50] batch [605/1000] time 1.536 (1.563) data 0.001 (0.002) loss 1.3525 (1.1018) acc 65.6250 (72.2469) lr 1.2487e-03 eta 11:53:37 +epoch [23/50] batch [610/1000] time 1.557 (1.563) data 0.001 (0.002) loss 1.0781 (1.1011) acc 81.2500 (72.2951) lr 1.2487e-03 eta 11:53:27 +epoch [23/50] batch [615/1000] time 1.551 (1.563) data 0.001 (0.002) loss 1.4385 (1.1032) acc 62.5000 (72.2358) lr 1.2487e-03 eta 11:53:23 +epoch [23/50] batch [620/1000] time 1.563 (1.563) data 0.001 (0.002) loss 0.9756 (1.1026) acc 65.6250 (72.2379) lr 1.2487e-03 eta 11:53:15 +epoch [23/50] batch [625/1000] time 1.562 (1.563) data 0.000 (0.002) loss 1.2178 (1.1047) acc 59.3750 (72.1750) lr 1.2487e-03 eta 11:53:07 +epoch [23/50] batch [630/1000] time 1.555 (1.563) data 0.000 (0.002) loss 0.9102 (1.1064) acc 78.1250 (72.1627) lr 1.2487e-03 eta 11:52:58 +epoch [23/50] batch [635/1000] time 1.565 (1.563) data 0.001 (0.002) loss 1.4482 (1.1072) acc 62.5000 (72.1309) lr 1.2487e-03 eta 11:52:49 +epoch [23/50] batch [640/1000] time 1.550 (1.563) data 0.000 (0.002) loss 0.6421 (1.1080) acc 81.2500 (72.1045) lr 1.2487e-03 eta 11:52:40 +epoch [23/50] batch [645/1000] time 1.552 (1.563) data 0.000 (0.002) loss 1.4639 (1.1097) acc 59.3750 (72.0833) lr 1.2487e-03 eta 11:52:31 +epoch [23/50] batch [650/1000] time 1.544 (1.563) data 0.000 (0.002) loss 0.9551 (1.1095) acc 84.3750 (72.0962) lr 1.2487e-03 eta 11:52:21 +epoch [23/50] batch [655/1000] time 1.548 (1.563) data 0.001 (0.002) loss 0.8452 (1.1084) acc 71.8750 (72.1040) lr 1.2487e-03 eta 11:52:12 +epoch [23/50] batch [660/1000] time 1.560 (1.563) data 0.000 (0.002) loss 2.4707 (1.1117) acc 65.6250 (72.0549) lr 1.2487e-03 eta 11:52:08 +epoch [23/50] batch [665/1000] time 1.593 (1.563) data 0.000 (0.002) loss 1.0391 (1.1119) acc 65.6250 (72.0113) lr 1.2487e-03 eta 11:52:00 +epoch [23/50] batch [670/1000] time 1.542 (1.563) data 0.000 (0.002) loss 0.6646 (1.1120) acc 87.5000 (72.0149) lr 1.2487e-03 eta 11:51:53 +epoch [23/50] batch [675/1000] time 1.538 (1.563) data 0.000 (0.002) loss 0.8433 (1.1123) acc 68.7500 (71.9907) lr 1.2487e-03 eta 11:51:44 +epoch [23/50] batch [680/1000] time 1.558 (1.563) data 0.000 (0.002) loss 0.9160 (1.1113) acc 68.7500 (71.9945) lr 1.2487e-03 eta 11:51:37 +epoch [23/50] batch [685/1000] time 1.580 (1.563) data 0.001 (0.002) loss 1.1289 (1.1110) acc 65.6250 (71.9982) lr 1.2487e-03 eta 11:51:30 +epoch [23/50] batch [690/1000] time 1.588 (1.563) data 0.001 (0.002) loss 1.4668 (1.1127) acc 65.6250 (71.9475) lr 1.2487e-03 eta 11:51:24 +epoch [23/50] batch [695/1000] time 1.585 (1.563) data 0.001 (0.002) loss 1.3809 (1.1144) acc 68.7500 (71.9065) lr 1.2487e-03 eta 11:51:17 +epoch [23/50] batch [700/1000] time 1.540 (1.563) data 0.001 (0.002) loss 1.2793 (1.1123) acc 71.8750 (71.9688) lr 1.2487e-03 eta 11:51:09 +epoch [23/50] batch [705/1000] time 1.579 (1.563) data 0.001 (0.002) loss 1.0273 (1.1122) acc 65.6250 (71.9504) lr 1.2487e-03 eta 11:51:01 +epoch [23/50] batch [710/1000] time 1.549 (1.563) data 0.000 (0.002) loss 0.8018 (1.1118) acc 68.7500 (71.9366) lr 1.2487e-03 eta 11:50:49 +epoch [23/50] batch [715/1000] time 1.556 (1.563) data 0.001 (0.002) loss 0.7759 (1.1128) acc 78.1250 (71.9100) lr 1.2487e-03 eta 11:50:40 +epoch [23/50] batch [720/1000] time 1.542 (1.563) data 0.000 (0.002) loss 1.6826 (1.1141) acc 56.2500 (71.8880) lr 1.2487e-03 eta 11:50:36 +epoch [23/50] batch [725/1000] time 1.561 (1.563) data 0.000 (0.002) loss 0.9937 (1.1134) acc 71.8750 (71.8793) lr 1.2487e-03 eta 11:50:28 +epoch [23/50] batch [730/1000] time 1.555 (1.563) data 0.000 (0.002) loss 1.3477 (1.1131) acc 78.1250 (71.8878) lr 1.2487e-03 eta 11:50:19 +epoch [23/50] batch [735/1000] time 1.565 (1.563) data 0.001 (0.002) loss 1.1895 (1.1132) acc 71.8750 (71.8963) lr 1.2487e-03 eta 11:50:09 +epoch [23/50] batch [740/1000] time 1.555 (1.563) data 0.000 (0.002) loss 0.7720 (1.1125) acc 81.2500 (71.9046) lr 1.2487e-03 eta 11:50:01 +epoch [23/50] batch [745/1000] time 1.555 (1.563) data 0.000 (0.002) loss 1.7578 (1.1133) acc 59.3750 (71.8540) lr 1.2487e-03 eta 11:49:51 +epoch [23/50] batch [750/1000] time 1.576 (1.563) data 0.000 (0.002) loss 1.1367 (1.1133) acc 59.3750 (71.8542) lr 1.2487e-03 eta 11:49:42 +epoch [23/50] batch [755/1000] time 1.554 (1.563) data 0.000 (0.002) loss 0.9160 (1.1114) acc 84.3750 (71.9247) lr 1.2487e-03 eta 11:49:34 +epoch [23/50] batch [760/1000] time 1.543 (1.563) data 0.000 (0.002) loss 0.5947 (1.1102) acc 81.2500 (71.9490) lr 1.2487e-03 eta 11:49:25 +epoch [23/50] batch [765/1000] time 1.555 (1.563) data 0.001 (0.002) loss 1.0693 (1.1105) acc 68.7500 (71.9444) lr 1.2487e-03 eta 11:49:23 +epoch [23/50] batch [770/1000] time 1.574 (1.563) data 0.000 (0.002) loss 1.3037 (1.1094) acc 59.3750 (71.9521) lr 1.2487e-03 eta 11:49:16 +epoch [23/50] batch [775/1000] time 1.580 (1.563) data 0.000 (0.002) loss 0.7104 (1.1085) acc 84.3750 (71.9758) lr 1.2487e-03 eta 11:49:09 +epoch [23/50] batch [780/1000] time 1.586 (1.563) data 0.000 (0.002) loss 1.0684 (1.1076) acc 75.0000 (71.9992) lr 1.2487e-03 eta 11:49:03 +epoch [23/50] batch [785/1000] time 1.589 (1.563) data 0.001 (0.002) loss 0.6914 (1.1070) acc 84.3750 (72.0143) lr 1.2487e-03 eta 11:48:59 +epoch [23/50] batch [790/1000] time 1.565 (1.563) data 0.001 (0.002) loss 0.9058 (1.1074) acc 68.7500 (72.0174) lr 1.2487e-03 eta 11:48:50 +epoch [23/50] batch [795/1000] time 1.548 (1.563) data 0.001 (0.002) loss 1.3750 (1.1075) acc 75.0000 (72.0047) lr 1.2487e-03 eta 11:48:39 +epoch [23/50] batch [800/1000] time 1.549 (1.563) data 0.000 (0.002) loss 1.1836 (1.1070) acc 75.0000 (72.0156) lr 1.2487e-03 eta 11:48:30 +epoch [23/50] batch [805/1000] time 1.542 (1.563) data 0.000 (0.002) loss 0.9629 (1.1074) acc 65.6250 (72.0225) lr 1.2487e-03 eta 11:48:21 +epoch [23/50] batch [810/1000] time 1.573 (1.563) data 0.001 (0.002) loss 0.9590 (1.1074) acc 78.1250 (72.0216) lr 1.2487e-03 eta 11:48:19 +epoch [23/50] batch [815/1000] time 1.556 (1.563) data 0.000 (0.002) loss 0.8892 (1.1075) acc 75.0000 (72.0054) lr 1.2487e-03 eta 11:48:08 +epoch [23/50] batch [820/1000] time 1.548 (1.563) data 0.001 (0.002) loss 1.3018 (1.1100) acc 62.5000 (71.9665) lr 1.2487e-03 eta 11:48:00 +epoch [23/50] batch [825/1000] time 1.572 (1.563) data 0.001 (0.002) loss 0.7959 (1.1110) acc 78.1250 (71.9545) lr 1.2487e-03 eta 11:47:54 +epoch [23/50] batch [830/1000] time 1.549 (1.563) data 0.000 (0.002) loss 1.2617 (1.1115) acc 65.6250 (71.9352) lr 1.2487e-03 eta 11:47:46 +epoch [23/50] batch [835/1000] time 1.555 (1.563) data 0.000 (0.002) loss 0.8281 (1.1120) acc 78.1250 (71.9199) lr 1.2487e-03 eta 11:47:36 +epoch [23/50] batch [840/1000] time 1.572 (1.563) data 0.000 (0.002) loss 0.8579 (1.1124) acc 78.1250 (71.9085) lr 1.2487e-03 eta 11:47:27 +epoch [23/50] batch [845/1000] time 1.575 (1.563) data 0.001 (0.002) loss 0.8628 (1.1135) acc 81.2500 (71.9009) lr 1.2487e-03 eta 11:47:20 +epoch [23/50] batch [850/1000] time 1.552 (1.563) data 0.000 (0.002) loss 1.1729 (1.1133) acc 78.1250 (71.9301) lr 1.2487e-03 eta 11:47:12 +epoch [23/50] batch [855/1000] time 1.566 (1.563) data 0.000 (0.002) loss 0.6777 (1.1137) acc 75.0000 (71.8896) lr 1.2487e-03 eta 11:47:03 +epoch [23/50] batch [860/1000] time 1.572 (1.563) data 0.000 (0.002) loss 1.0596 (1.1143) acc 71.8750 (71.8677) lr 1.2487e-03 eta 11:46:54 +epoch [23/50] batch [865/1000] time 1.587 (1.563) data 0.001 (0.002) loss 0.4695 (1.1132) acc 87.5000 (71.8931) lr 1.2487e-03 eta 11:46:49 +epoch [23/50] batch [870/1000] time 1.699 (1.563) data 0.001 (0.002) loss 1.1777 (1.1146) acc 71.8750 (71.8786) lr 1.2487e-03 eta 11:46:45 +epoch [23/50] batch [875/1000] time 1.555 (1.563) data 0.000 (0.002) loss 0.8818 (1.1132) acc 71.8750 (71.9071) lr 1.2487e-03 eta 11:46:36 +epoch [23/50] batch [880/1000] time 1.520 (1.563) data 0.000 (0.002) loss 1.1230 (1.1123) acc 71.8750 (71.9176) lr 1.2487e-03 eta 11:46:27 +epoch [23/50] batch [885/1000] time 1.543 (1.563) data 0.000 (0.002) loss 0.9644 (1.1129) acc 62.5000 (71.8856) lr 1.2487e-03 eta 11:46:17 +epoch [23/50] batch [890/1000] time 1.551 (1.563) data 0.000 (0.002) loss 1.2197 (1.1132) acc 71.8750 (71.8926) lr 1.2487e-03 eta 11:46:07 +epoch [23/50] batch [895/1000] time 1.555 (1.563) data 0.001 (0.002) loss 1.0117 (1.1131) acc 71.8750 (71.8715) lr 1.2487e-03 eta 11:45:58 +epoch [23/50] batch [900/1000] time 1.577 (1.563) data 0.001 (0.002) loss 1.0889 (1.1129) acc 81.2500 (71.8785) lr 1.2487e-03 eta 11:45:51 +epoch [23/50] batch [905/1000] time 1.576 (1.563) data 0.000 (0.002) loss 0.8345 (1.1124) acc 68.7500 (71.8681) lr 1.2487e-03 eta 11:45:43 +epoch [23/50] batch [910/1000] time 1.550 (1.563) data 0.000 (0.002) loss 1.2607 (1.1115) acc 81.2500 (71.9025) lr 1.2487e-03 eta 11:45:34 +epoch [23/50] batch [915/1000] time 1.696 (1.563) data 0.000 (0.002) loss 1.0537 (1.1106) acc 75.0000 (71.9126) lr 1.2487e-03 eta 11:45:29 +epoch [23/50] batch [920/1000] time 1.555 (1.563) data 0.001 (0.002) loss 0.9980 (1.1093) acc 68.7500 (71.9226) lr 1.2487e-03 eta 11:45:20 +epoch [23/50] batch [925/1000] time 1.549 (1.563) data 0.000 (0.002) loss 0.9189 (1.1098) acc 75.0000 (71.9223) lr 1.2487e-03 eta 11:45:12 +epoch [23/50] batch [930/1000] time 1.567 (1.563) data 0.000 (0.002) loss 1.2207 (1.1110) acc 65.6250 (71.9052) lr 1.2487e-03 eta 11:45:03 +epoch [23/50] batch [935/1000] time 1.546 (1.563) data 0.001 (0.002) loss 1.0391 (1.1096) acc 78.1250 (71.9552) lr 1.2487e-03 eta 11:44:54 +epoch [23/50] batch [940/1000] time 1.554 (1.563) data 0.001 (0.002) loss 0.6665 (1.1091) acc 87.5000 (71.9847) lr 1.2487e-03 eta 11:44:46 +epoch [23/50] batch [945/1000] time 1.561 (1.563) data 0.000 (0.001) loss 0.7109 (1.1083) acc 84.3750 (71.9907) lr 1.2487e-03 eta 11:44:38 +epoch [23/50] batch [950/1000] time 1.576 (1.563) data 0.000 (0.001) loss 1.2432 (1.1082) acc 71.8750 (72.0000) lr 1.2487e-03 eta 11:44:31 +epoch [23/50] batch [955/1000] time 1.567 (1.563) data 0.000 (0.001) loss 0.9326 (1.1078) acc 75.0000 (72.0092) lr 1.2487e-03 eta 11:44:24 +epoch [23/50] batch [960/1000] time 1.579 (1.563) data 0.000 (0.001) loss 1.0166 (1.1066) acc 68.7500 (72.0247) lr 1.2487e-03 eta 11:44:21 +epoch [23/50] batch [965/1000] time 1.548 (1.563) data 0.000 (0.001) loss 1.3008 (1.1070) acc 68.7500 (72.0304) lr 1.2487e-03 eta 11:44:13 +epoch [23/50] batch [970/1000] time 1.560 (1.563) data 0.000 (0.001) loss 1.3887 (1.1082) acc 65.6250 (72.0071) lr 1.2487e-03 eta 11:44:04 +epoch [23/50] batch [975/1000] time 1.553 (1.563) data 0.000 (0.001) loss 0.9961 (1.1081) acc 81.2500 (72.0224) lr 1.2487e-03 eta 11:43:55 +epoch [23/50] batch [980/1000] time 1.565 (1.563) data 0.000 (0.001) loss 1.0879 (1.1082) acc 75.0000 (72.0217) lr 1.2487e-03 eta 11:43:46 +epoch [23/50] batch [985/1000] time 1.539 (1.563) data 0.001 (0.001) loss 1.3506 (1.1083) acc 65.6250 (72.0209) lr 1.2487e-03 eta 11:43:37 +epoch [23/50] batch [990/1000] time 1.539 (1.563) data 0.000 (0.001) loss 1.3154 (1.1096) acc 65.6250 (71.9855) lr 1.2487e-03 eta 11:43:29 +epoch [23/50] batch [995/1000] time 1.553 (1.563) data 0.000 (0.001) loss 1.4551 (1.1103) acc 71.8750 (71.9755) lr 1.2487e-03 eta 11:43:19 +epoch [23/50] batch [1000/1000] time 1.547 (1.563) data 0.000 (0.001) loss 0.9277 (1.1114) acc 75.0000 (71.9750) lr 1.1874e-03 eta 11:43:10 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,204 +* accuracy: 78.4% +* error: 21.6% +* macro_f1: 77.9% +epoch [24/50] batch [5/1000] time 1.565 (1.691) data 0.000 (0.191) loss 1.9678 (1.4041) acc 56.2500 (69.3750) lr 1.1874e-03 eta 12:41:00 +epoch [24/50] batch [10/1000] time 1.562 (1.625) data 0.001 (0.096) loss 1.3672 (1.3235) acc 71.8750 (68.4375) lr 1.1874e-03 eta 12:11:05 +epoch [24/50] batch [15/1000] time 1.586 (1.605) data 0.001 (0.064) loss 2.1836 (1.2462) acc 50.0000 (69.7917) lr 1.1874e-03 eta 12:01:41 +epoch [24/50] batch [20/1000] time 1.567 (1.594) data 0.000 (0.048) loss 1.6699 (1.2052) acc 62.5000 (70.4688) lr 1.1874e-03 eta 11:56:40 +epoch [24/50] batch [25/1000] time 1.567 (1.590) data 0.000 (0.039) loss 1.0752 (1.1995) acc 78.1250 (70.6250) lr 1.1874e-03 eta 11:54:41 +epoch [24/50] batch [30/1000] time 1.560 (1.586) data 0.001 (0.032) loss 1.2402 (1.1995) acc 81.2500 (70.7292) lr 1.1874e-03 eta 11:52:51 +epoch [24/50] batch [35/1000] time 1.573 (1.591) data 0.001 (0.028) loss 1.4395 (1.2204) acc 59.3750 (70.4464) lr 1.1874e-03 eta 11:55:04 +epoch [24/50] batch [40/1000] time 1.546 (1.587) data 0.001 (0.024) loss 0.7432 (1.1795) acc 75.0000 (71.3281) lr 1.1874e-03 eta 11:53:08 +epoch [24/50] batch [45/1000] time 1.576 (1.585) data 0.000 (0.022) loss 0.9038 (1.1623) acc 65.6250 (71.3889) lr 1.1874e-03 eta 11:51:58 +epoch [24/50] batch [50/1000] time 1.545 (1.582) data 0.001 (0.020) loss 1.2451 (1.1609) acc 75.0000 (71.5000) lr 1.1874e-03 eta 11:50:43 +epoch [24/50] batch [55/1000] time 1.576 (1.581) data 0.001 (0.018) loss 0.8315 (1.1439) acc 68.7500 (71.3636) lr 1.1874e-03 eta 11:49:57 +epoch [24/50] batch [60/1000] time 1.535 (1.578) data 0.001 (0.016) loss 1.0586 (1.1469) acc 75.0000 (71.5104) lr 1.1874e-03 eta 11:48:31 +epoch [24/50] batch [65/1000] time 1.564 (1.577) data 0.000 (0.015) loss 0.9746 (1.1397) acc 75.0000 (71.6346) lr 1.1874e-03 eta 11:47:54 +epoch [24/50] batch [70/1000] time 1.584 (1.576) data 0.000 (0.014) loss 1.4219 (1.1679) acc 56.2500 (71.0714) lr 1.1874e-03 eta 11:47:26 +epoch [24/50] batch [75/1000] time 1.547 (1.575) data 0.000 (0.013) loss 0.8345 (1.1475) acc 78.1250 (71.6250) lr 1.1874e-03 eta 11:46:50 +epoch [24/50] batch [80/1000] time 1.588 (1.575) data 0.000 (0.012) loss 1.2324 (1.1527) acc 59.3750 (71.4844) lr 1.1874e-03 eta 11:46:26 +epoch [24/50] batch [85/1000] time 1.564 (1.574) data 0.001 (0.012) loss 0.7119 (1.1323) acc 84.3750 (71.9853) lr 1.1874e-03 eta 11:45:52 +epoch [24/50] batch [90/1000] time 1.560 (1.573) data 0.000 (0.011) loss 1.2725 (1.1351) acc 75.0000 (72.0833) lr 1.1874e-03 eta 11:45:34 +epoch [24/50] batch [95/1000] time 1.573 (1.573) data 0.001 (0.011) loss 1.1475 (1.1420) acc 71.8750 (71.9079) lr 1.1874e-03 eta 11:45:21 +epoch [24/50] batch [100/1000] time 1.586 (1.572) data 0.000 (0.010) loss 0.9829 (1.1343) acc 68.7500 (72.0000) lr 1.1874e-03 eta 11:44:50 +epoch [24/50] batch [105/1000] time 1.576 (1.572) data 0.000 (0.010) loss 1.4189 (1.1214) acc 59.3750 (72.1131) lr 1.1874e-03 eta 11:44:40 +epoch [24/50] batch [110/1000] time 1.574 (1.572) data 0.000 (0.009) loss 0.9297 (1.1118) acc 75.0000 (72.4148) lr 1.1874e-03 eta 11:44:18 +epoch [24/50] batch [115/1000] time 1.571 (1.571) data 0.000 (0.009) loss 1.2998 (1.1175) acc 71.8750 (72.2283) lr 1.1874e-03 eta 11:43:56 +epoch [24/50] batch [120/1000] time 1.589 (1.571) data 0.000 (0.008) loss 1.7520 (1.1179) acc 65.6250 (72.1615) lr 1.1874e-03 eta 11:43:57 +epoch [24/50] batch [125/1000] time 1.582 (1.572) data 0.000 (0.008) loss 1.4053 (1.1108) acc 62.5000 (72.0500) lr 1.1874e-03 eta 11:43:59 +epoch [24/50] batch [130/1000] time 1.578 (1.572) data 0.000 (0.008) loss 0.9565 (1.1116) acc 75.0000 (72.0433) lr 1.1874e-03 eta 11:43:49 +epoch [24/50] batch [135/1000] time 1.713 (1.572) data 0.000 (0.008) loss 0.7397 (1.1167) acc 78.1250 (71.9676) lr 1.1874e-03 eta 11:44:01 +epoch [24/50] batch [140/1000] time 1.583 (1.572) data 0.001 (0.007) loss 1.5615 (1.1181) acc 71.8750 (72.0312) lr 1.1874e-03 eta 11:43:50 +epoch [24/50] batch [145/1000] time 1.554 (1.572) data 0.001 (0.007) loss 0.7461 (1.1091) acc 78.1250 (72.2198) lr 1.1874e-03 eta 11:43:30 +epoch [24/50] batch [150/1000] time 1.556 (1.571) data 0.001 (0.007) loss 2.0996 (1.1154) acc 56.2500 (72.1250) lr 1.1874e-03 eta 11:43:11 +epoch [24/50] batch [155/1000] time 1.568 (1.571) data 0.000 (0.007) loss 0.6719 (1.1129) acc 87.5000 (72.3185) lr 1.1874e-03 eta 11:42:54 +epoch [24/50] batch [160/1000] time 1.568 (1.571) data 0.001 (0.006) loss 0.7349 (1.1123) acc 84.3750 (72.3047) lr 1.1874e-03 eta 11:42:38 +epoch [24/50] batch [165/1000] time 1.577 (1.571) data 0.000 (0.006) loss 1.2998 (1.1118) acc 65.6250 (72.2348) lr 1.1874e-03 eta 11:42:35 +epoch [24/50] batch [170/1000] time 1.575 (1.571) data 0.001 (0.006) loss 0.6968 (1.1123) acc 78.1250 (72.1507) lr 1.1874e-03 eta 11:42:30 +epoch [24/50] batch [175/1000] time 1.575 (1.571) data 0.000 (0.006) loss 0.9102 (1.1113) acc 84.3750 (72.1071) lr 1.1874e-03 eta 11:42:18 +epoch [24/50] batch [180/1000] time 1.747 (1.572) data 0.000 (0.006) loss 0.9346 (1.1089) acc 71.8750 (72.1181) lr 1.1874e-03 eta 11:42:38 +epoch [24/50] batch [185/1000] time 1.574 (1.572) data 0.001 (0.006) loss 1.2246 (1.1085) acc 75.0000 (72.1115) lr 1.1874e-03 eta 11:42:32 +epoch [24/50] batch [190/1000] time 1.553 (1.572) data 0.001 (0.006) loss 1.2832 (1.1073) acc 71.8750 (72.1711) lr 1.1874e-03 eta 11:42:15 +epoch [24/50] batch [195/1000] time 1.570 (1.571) data 0.001 (0.005) loss 0.7583 (1.1013) acc 84.3750 (72.2756) lr 1.1874e-03 eta 11:42:01 +epoch [24/50] batch [200/1000] time 1.581 (1.571) data 0.000 (0.005) loss 1.1494 (1.0994) acc 71.8750 (72.4062) lr 1.1874e-03 eta 11:41:45 +epoch [24/50] batch [205/1000] time 1.566 (1.571) data 0.001 (0.005) loss 0.7832 (1.1031) acc 81.2500 (72.3323) lr 1.1874e-03 eta 11:41:32 +epoch [24/50] batch [210/1000] time 1.574 (1.571) data 0.001 (0.005) loss 1.3184 (1.1101) acc 75.0000 (72.2619) lr 1.1874e-03 eta 11:41:14 +epoch [24/50] batch [215/1000] time 1.559 (1.570) data 0.000 (0.005) loss 1.1582 (1.1077) acc 75.0000 (72.2674) lr 1.1874e-03 eta 11:40:57 +epoch [24/50] batch [220/1000] time 1.579 (1.570) data 0.000 (0.005) loss 1.0176 (1.1068) acc 78.1250 (72.3011) lr 1.1874e-03 eta 11:40:47 +epoch [24/50] batch [225/1000] time 1.556 (1.571) data 0.000 (0.005) loss 1.3721 (1.1064) acc 71.8750 (72.2361) lr 1.1874e-03 eta 11:40:53 +epoch [24/50] batch [230/1000] time 1.546 (1.570) data 0.000 (0.005) loss 1.4492 (1.1055) acc 59.3750 (72.2690) lr 1.1874e-03 eta 11:40:41 +epoch [24/50] batch [235/1000] time 1.549 (1.570) data 0.000 (0.005) loss 0.8945 (1.1006) acc 78.1250 (72.3670) lr 1.1874e-03 eta 11:40:30 +epoch [24/50] batch [240/1000] time 1.566 (1.570) data 0.000 (0.004) loss 0.6074 (1.0960) acc 84.3750 (72.4219) lr 1.1874e-03 eta 11:40:16 +epoch [24/50] batch [245/1000] time 1.581 (1.570) data 0.001 (0.004) loss 1.6787 (1.1039) acc 62.5000 (72.1811) lr 1.1874e-03 eta 11:40:03 +epoch [24/50] batch [250/1000] time 1.589 (1.570) data 0.000 (0.004) loss 1.0566 (1.1029) acc 68.7500 (72.2375) lr 1.1874e-03 eta 11:40:01 +epoch [24/50] batch [255/1000] time 1.571 (1.570) data 0.001 (0.004) loss 1.1865 (1.1094) acc 65.6250 (72.1814) lr 1.1874e-03 eta 11:39:50 +epoch [24/50] batch [260/1000] time 1.581 (1.570) data 0.001 (0.004) loss 0.8271 (1.1067) acc 78.1250 (72.2476) lr 1.1874e-03 eta 11:39:39 +epoch [24/50] batch [265/1000] time 1.549 (1.570) data 0.000 (0.004) loss 1.1016 (1.1055) acc 75.0000 (72.2759) lr 1.1874e-03 eta 11:39:33 +epoch [24/50] batch [270/1000] time 1.538 (1.570) data 0.001 (0.004) loss 1.4756 (1.1101) acc 71.8750 (72.1412) lr 1.1874e-03 eta 11:39:15 +epoch [24/50] batch [275/1000] time 1.569 (1.570) data 0.001 (0.004) loss 0.5283 (1.1070) acc 87.5000 (72.1932) lr 1.1874e-03 eta 11:39:05 +epoch [24/50] batch [280/1000] time 1.568 (1.569) data 0.001 (0.004) loss 1.3125 (1.1077) acc 71.8750 (72.1763) lr 1.1874e-03 eta 11:38:54 +epoch [24/50] batch [285/1000] time 1.581 (1.569) data 0.000 (0.004) loss 1.2646 (1.1106) acc 71.8750 (72.0943) lr 1.1874e-03 eta 11:38:45 +epoch [24/50] batch [290/1000] time 1.563 (1.570) data 0.000 (0.004) loss 1.3301 (1.1099) acc 65.6250 (72.1336) lr 1.1874e-03 eta 11:38:53 +epoch [24/50] batch [295/1000] time 1.540 (1.570) data 0.000 (0.004) loss 1.3203 (1.1128) acc 62.5000 (72.0551) lr 1.1874e-03 eta 11:38:42 +epoch [24/50] batch [300/1000] time 1.576 (1.570) data 0.001 (0.004) loss 1.1074 (1.1146) acc 68.7500 (72.0521) lr 1.1874e-03 eta 11:38:36 +epoch [24/50] batch [305/1000] time 1.571 (1.570) data 0.000 (0.004) loss 1.3721 (1.1151) acc 68.7500 (72.0082) lr 1.1874e-03 eta 11:38:27 +epoch [24/50] batch [310/1000] time 1.578 (1.570) data 0.001 (0.004) loss 1.3330 (1.1174) acc 62.5000 (71.9052) lr 1.1874e-03 eta 11:38:18 +epoch [24/50] batch [315/1000] time 1.549 (1.570) data 0.001 (0.004) loss 1.4092 (1.1214) acc 59.3750 (71.9048) lr 1.1874e-03 eta 11:38:05 +epoch [24/50] batch [320/1000] time 1.571 (1.569) data 0.001 (0.003) loss 0.7114 (1.1223) acc 78.1250 (71.8750) lr 1.1874e-03 eta 11:37:51 +epoch [24/50] batch [325/1000] time 1.570 (1.569) data 0.000 (0.003) loss 1.0449 (1.1237) acc 71.8750 (71.8173) lr 1.1874e-03 eta 11:37:41 +epoch [24/50] batch [330/1000] time 1.571 (1.569) data 0.000 (0.003) loss 1.5537 (1.1246) acc 62.5000 (71.8087) lr 1.1874e-03 eta 11:37:33 +epoch [24/50] batch [335/1000] time 1.572 (1.570) data 0.000 (0.003) loss 1.2715 (1.1257) acc 71.8750 (71.7910) lr 1.1874e-03 eta 11:37:36 +epoch [24/50] batch [340/1000] time 1.558 (1.570) data 0.001 (0.003) loss 0.9546 (1.1251) acc 68.7500 (71.7371) lr 1.1874e-03 eta 11:37:24 +epoch [24/50] batch [345/1000] time 1.561 (1.569) data 0.000 (0.003) loss 0.9341 (1.1240) acc 81.2500 (71.8297) lr 1.1874e-03 eta 11:37:14 +epoch [24/50] batch [350/1000] time 1.543 (1.569) data 0.000 (0.003) loss 0.9390 (1.1261) acc 71.8750 (71.7500) lr 1.1874e-03 eta 11:37:04 +epoch [24/50] batch [355/1000] time 1.556 (1.569) data 0.000 (0.003) loss 0.9146 (1.1236) acc 68.7500 (71.7430) lr 1.1874e-03 eta 11:36:51 +epoch [24/50] batch [360/1000] time 1.544 (1.569) data 0.001 (0.003) loss 1.2021 (1.1215) acc 62.5000 (71.7969) lr 1.1874e-03 eta 11:36:38 +epoch [24/50] batch [365/1000] time 1.553 (1.569) data 0.001 (0.003) loss 0.8057 (1.1207) acc 71.8750 (71.8151) lr 1.1874e-03 eta 11:36:28 +epoch [24/50] batch [370/1000] time 1.570 (1.569) data 0.001 (0.003) loss 1.3984 (1.1223) acc 62.5000 (71.7399) lr 1.1874e-03 eta 11:36:17 +epoch [24/50] batch [375/1000] time 1.560 (1.569) data 0.001 (0.003) loss 1.0664 (1.1254) acc 78.1250 (71.7167) lr 1.1874e-03 eta 11:36:23 +epoch [24/50] batch [380/1000] time 1.561 (1.569) data 0.001 (0.003) loss 0.5659 (1.1244) acc 84.3750 (71.7516) lr 1.1874e-03 eta 11:36:10 +epoch [24/50] batch [385/1000] time 1.571 (1.569) data 0.001 (0.003) loss 1.1846 (1.1232) acc 68.7500 (71.7695) lr 1.1874e-03 eta 11:36:02 +epoch [24/50] batch [390/1000] time 1.542 (1.569) data 0.000 (0.003) loss 0.8706 (1.1208) acc 71.8750 (71.8750) lr 1.1874e-03 eta 11:35:49 +epoch [24/50] batch [395/1000] time 1.564 (1.569) data 0.000 (0.003) loss 1.1963 (1.1217) acc 71.8750 (71.8908) lr 1.1874e-03 eta 11:35:39 +epoch [24/50] batch [400/1000] time 1.567 (1.569) data 0.001 (0.003) loss 1.5244 (1.1233) acc 65.6250 (71.8438) lr 1.1874e-03 eta 11:35:28 +epoch [24/50] batch [405/1000] time 1.560 (1.569) data 0.000 (0.003) loss 1.4941 (1.1268) acc 65.6250 (71.7824) lr 1.1874e-03 eta 11:35:21 +epoch [24/50] batch [410/1000] time 1.571 (1.569) data 0.001 (0.003) loss 1.3604 (1.1252) acc 65.6250 (71.8140) lr 1.1874e-03 eta 11:35:13 +epoch [24/50] batch [415/1000] time 1.560 (1.569) data 0.001 (0.003) loss 1.4844 (1.1248) acc 53.1250 (71.8148) lr 1.1874e-03 eta 11:35:00 +epoch [24/50] batch [420/1000] time 1.556 (1.569) data 0.001 (0.003) loss 1.3340 (1.1234) acc 59.3750 (71.8527) lr 1.1874e-03 eta 11:34:52 +epoch [24/50] batch [425/1000] time 1.554 (1.568) data 0.001 (0.003) loss 1.1309 (1.1229) acc 68.7500 (71.8309) lr 1.1874e-03 eta 11:34:40 +epoch [24/50] batch [430/1000] time 1.580 (1.568) data 0.001 (0.003) loss 0.8599 (1.1235) acc 75.0000 (71.7878) lr 1.1874e-03 eta 11:34:32 +epoch [24/50] batch [435/1000] time 1.580 (1.568) data 0.001 (0.003) loss 1.4189 (1.1251) acc 71.8750 (71.7744) lr 1.1874e-03 eta 11:34:26 +epoch [24/50] batch [440/1000] time 1.540 (1.569) data 0.001 (0.003) loss 1.3193 (1.1290) acc 65.6250 (71.6974) lr 1.1874e-03 eta 11:34:24 +epoch [24/50] batch [445/1000] time 1.566 (1.569) data 0.001 (0.003) loss 1.5986 (1.1287) acc 75.0000 (71.7205) lr 1.1874e-03 eta 11:34:16 +epoch [24/50] batch [450/1000] time 1.547 (1.569) data 0.000 (0.003) loss 1.3633 (1.1292) acc 65.6250 (71.7569) lr 1.1874e-03 eta 11:34:04 +epoch [24/50] batch [455/1000] time 1.550 (1.569) data 0.001 (0.003) loss 1.6504 (1.1293) acc 62.5000 (71.7788) lr 1.1874e-03 eta 11:33:55 +epoch [24/50] batch [460/1000] time 1.543 (1.568) data 0.000 (0.003) loss 0.9082 (1.1265) acc 78.1250 (71.8546) lr 1.1874e-03 eta 11:33:45 +epoch [24/50] batch [465/1000] time 1.547 (1.568) data 0.001 (0.003) loss 1.0479 (1.1265) acc 71.8750 (71.8481) lr 1.1874e-03 eta 11:33:33 +epoch [24/50] batch [470/1000] time 1.575 (1.568) data 0.001 (0.003) loss 1.0801 (1.1270) acc 71.8750 (71.8551) lr 1.1874e-03 eta 11:33:24 +epoch [24/50] batch [475/1000] time 1.564 (1.568) data 0.001 (0.003) loss 1.1289 (1.1308) acc 68.7500 (71.7763) lr 1.1874e-03 eta 11:33:13 +epoch [24/50] batch [480/1000] time 1.565 (1.568) data 0.000 (0.003) loss 1.4707 (1.1315) acc 71.8750 (71.7773) lr 1.1874e-03 eta 11:33:04 +epoch [24/50] batch [485/1000] time 1.575 (1.568) data 0.001 (0.002) loss 1.0654 (1.1316) acc 71.8750 (71.7848) lr 1.1874e-03 eta 11:33:04 +epoch [24/50] batch [490/1000] time 1.563 (1.568) data 0.000 (0.002) loss 1.9443 (1.1326) acc 53.1250 (71.7283) lr 1.1874e-03 eta 11:32:58 +epoch [24/50] batch [495/1000] time 1.598 (1.568) data 0.000 (0.002) loss 0.8569 (1.1308) acc 78.1250 (71.7929) lr 1.1874e-03 eta 11:32:50 +epoch [24/50] batch [500/1000] time 1.575 (1.568) data 0.001 (0.002) loss 1.0605 (1.1304) acc 71.8750 (71.7687) lr 1.1874e-03 eta 11:32:40 +epoch [24/50] batch [505/1000] time 1.568 (1.568) data 0.000 (0.002) loss 1.1016 (1.1313) acc 71.8750 (71.7450) lr 1.1874e-03 eta 11:32:31 +epoch [24/50] batch [510/1000] time 1.551 (1.568) data 0.000 (0.002) loss 0.7544 (1.1297) acc 81.2500 (71.7708) lr 1.1874e-03 eta 11:32:25 +epoch [24/50] batch [515/1000] time 1.579 (1.568) data 0.001 (0.002) loss 1.3848 (1.1316) acc 65.6250 (71.7112) lr 1.1874e-03 eta 11:32:17 +epoch [24/50] batch [520/1000] time 1.556 (1.568) data 0.001 (0.002) loss 0.7593 (1.1309) acc 78.1250 (71.7368) lr 1.1874e-03 eta 11:32:10 +epoch [24/50] batch [525/1000] time 1.752 (1.569) data 0.000 (0.002) loss 1.6641 (1.1324) acc 68.7500 (71.7143) lr 1.1874e-03 eta 11:32:11 +epoch [24/50] batch [530/1000] time 1.524 (1.569) data 0.001 (0.002) loss 1.9482 (1.1321) acc 59.3750 (71.7217) lr 1.1874e-03 eta 11:31:59 +epoch [24/50] batch [535/1000] time 1.528 (1.568) data 0.001 (0.002) loss 1.0723 (1.1325) acc 65.6250 (71.7290) lr 1.1874e-03 eta 11:31:47 +epoch [24/50] batch [540/1000] time 1.542 (1.568) data 0.000 (0.002) loss 1.3301 (1.1340) acc 75.0000 (71.7188) lr 1.1874e-03 eta 11:31:39 +epoch [24/50] batch [545/1000] time 1.536 (1.568) data 0.001 (0.002) loss 0.9639 (1.1360) acc 68.7500 (71.6743) lr 1.1874e-03 eta 11:31:27 +epoch [24/50] batch [550/1000] time 1.553 (1.568) data 0.001 (0.002) loss 1.2344 (1.1379) acc 65.6250 (71.6420) lr 1.1874e-03 eta 11:31:14 +epoch [24/50] batch [555/1000] time 1.567 (1.568) data 0.000 (0.002) loss 0.9771 (1.1360) acc 71.8750 (71.6948) lr 1.1874e-03 eta 11:31:04 +epoch [24/50] batch [560/1000] time 1.540 (1.568) data 0.000 (0.002) loss 1.4326 (1.1368) acc 75.0000 (71.6908) lr 1.1874e-03 eta 11:30:50 +epoch [24/50] batch [565/1000] time 1.546 (1.568) data 0.000 (0.002) loss 1.0576 (1.1346) acc 71.8750 (71.7644) lr 1.1874e-03 eta 11:30:42 +epoch [24/50] batch [570/1000] time 1.555 (1.568) data 0.001 (0.002) loss 1.7041 (1.1342) acc 68.7500 (71.7818) lr 1.1874e-03 eta 11:30:33 +epoch [24/50] batch [575/1000] time 1.583 (1.568) data 0.001 (0.002) loss 1.0244 (1.1348) acc 71.8750 (71.7554) lr 1.1874e-03 eta 11:30:26 +epoch [24/50] batch [580/1000] time 1.588 (1.568) data 0.000 (0.002) loss 1.1729 (1.1335) acc 71.8750 (71.8103) lr 1.1874e-03 eta 11:30:15 +epoch [24/50] batch [585/1000] time 1.566 (1.568) data 0.001 (0.002) loss 0.6904 (1.1303) acc 81.2500 (71.9124) lr 1.1874e-03 eta 11:30:08 +epoch [24/50] batch [590/1000] time 1.555 (1.568) data 0.001 (0.002) loss 0.7417 (1.1286) acc 81.2500 (71.9386) lr 1.1874e-03 eta 11:30:06 +epoch [24/50] batch [595/1000] time 1.547 (1.568) data 0.001 (0.002) loss 0.9912 (1.1302) acc 62.5000 (71.8750) lr 1.1874e-03 eta 11:29:57 +epoch [24/50] batch [600/1000] time 1.581 (1.568) data 0.001 (0.002) loss 0.7686 (1.1291) acc 75.0000 (71.8802) lr 1.1874e-03 eta 11:29:50 +epoch [24/50] batch [605/1000] time 1.591 (1.568) data 0.001 (0.002) loss 1.1387 (1.1300) acc 75.0000 (71.8802) lr 1.1874e-03 eta 11:29:44 +epoch [24/50] batch [610/1000] time 1.597 (1.568) data 0.001 (0.002) loss 1.1953 (1.1301) acc 65.6250 (71.8852) lr 1.1874e-03 eta 11:29:37 +epoch [24/50] batch [615/1000] time 1.568 (1.568) data 0.001 (0.002) loss 0.5317 (1.1290) acc 90.6250 (71.9055) lr 1.1874e-03 eta 11:29:27 +epoch [24/50] batch [620/1000] time 1.561 (1.568) data 0.000 (0.002) loss 0.8179 (1.1291) acc 84.3750 (71.9304) lr 1.1874e-03 eta 11:29:18 +epoch [24/50] batch [625/1000] time 1.541 (1.568) data 0.000 (0.002) loss 1.4141 (1.1306) acc 65.6250 (71.9100) lr 1.1874e-03 eta 11:29:09 +epoch [24/50] batch [630/1000] time 1.573 (1.568) data 0.000 (0.002) loss 0.9937 (1.1300) acc 71.8750 (71.8948) lr 1.1874e-03 eta 11:29:02 +epoch [24/50] batch [635/1000] time 1.572 (1.568) data 0.001 (0.002) loss 1.1484 (1.1288) acc 71.8750 (71.8996) lr 1.1874e-03 eta 11:29:01 +epoch [24/50] batch [640/1000] time 1.553 (1.568) data 0.000 (0.002) loss 1.0547 (1.1283) acc 68.7500 (71.9189) lr 1.1874e-03 eta 11:28:52 +epoch [24/50] batch [645/1000] time 1.581 (1.568) data 0.001 (0.002) loss 0.9067 (1.1284) acc 78.1250 (71.9428) lr 1.1874e-03 eta 11:28:43 +epoch [24/50] batch [650/1000] time 1.553 (1.568) data 0.000 (0.002) loss 1.1514 (1.1277) acc 78.1250 (71.9471) lr 1.1874e-03 eta 11:28:33 +epoch [24/50] batch [655/1000] time 1.542 (1.568) data 0.000 (0.002) loss 1.4297 (1.1267) acc 65.6250 (71.9895) lr 1.1874e-03 eta 11:28:24 +epoch [24/50] batch [660/1000] time 1.579 (1.568) data 0.001 (0.002) loss 1.1348 (1.1266) acc 81.2500 (71.9981) lr 1.1874e-03 eta 11:28:19 +epoch [24/50] batch [665/1000] time 1.581 (1.568) data 0.001 (0.002) loss 0.7129 (1.1269) acc 81.2500 (71.9972) lr 1.1874e-03 eta 11:28:13 +epoch [24/50] batch [670/1000] time 1.558 (1.568) data 0.000 (0.002) loss 1.0020 (1.1255) acc 75.0000 (72.0243) lr 1.1874e-03 eta 11:28:05 +epoch [24/50] batch [675/1000] time 1.601 (1.568) data 0.001 (0.002) loss 0.8179 (1.1237) acc 78.1250 (72.0694) lr 1.1874e-03 eta 11:27:56 +epoch [24/50] batch [680/1000] time 1.549 (1.568) data 0.000 (0.002) loss 1.3105 (1.1247) acc 65.6250 (72.0129) lr 1.1874e-03 eta 11:27:52 +epoch [24/50] batch [685/1000] time 1.572 (1.568) data 0.000 (0.002) loss 1.0420 (1.1262) acc 75.0000 (71.9982) lr 1.1874e-03 eta 11:27:41 +epoch [24/50] batch [690/1000] time 1.564 (1.568) data 0.000 (0.002) loss 0.9028 (1.1264) acc 81.2500 (72.0245) lr 1.1874e-03 eta 11:27:32 +epoch [24/50] batch [695/1000] time 1.557 (1.568) data 0.000 (0.002) loss 0.7627 (1.1243) acc 87.5000 (72.0773) lr 1.1874e-03 eta 11:27:21 +epoch [24/50] batch [700/1000] time 1.562 (1.568) data 0.000 (0.002) loss 1.4326 (1.1256) acc 71.8750 (72.0893) lr 1.1874e-03 eta 11:27:12 +epoch [24/50] batch [705/1000] time 1.576 (1.568) data 0.000 (0.002) loss 1.1846 (1.1265) acc 68.7500 (72.0523) lr 1.1874e-03 eta 11:27:07 +epoch [24/50] batch [710/1000] time 1.565 (1.568) data 0.000 (0.002) loss 1.1074 (1.1259) acc 62.5000 (72.0335) lr 1.1874e-03 eta 11:26:57 +epoch [24/50] batch [715/1000] time 1.546 (1.568) data 0.000 (0.002) loss 0.7539 (1.1254) acc 75.0000 (72.0411) lr 1.1874e-03 eta 11:26:48 +epoch [24/50] batch [720/1000] time 1.548 (1.568) data 0.000 (0.002) loss 1.0830 (1.1244) acc 75.0000 (72.0747) lr 1.1874e-03 eta 11:26:38 +epoch [24/50] batch [725/1000] time 1.564 (1.568) data 0.001 (0.002) loss 0.9189 (1.1245) acc 75.0000 (72.0776) lr 1.1874e-03 eta 11:26:31 +epoch [24/50] batch [730/1000] time 1.605 (1.568) data 0.001 (0.002) loss 0.9473 (1.1235) acc 71.8750 (72.0762) lr 1.1874e-03 eta 11:26:22 +epoch [24/50] batch [735/1000] time 1.546 (1.568) data 0.000 (0.002) loss 1.4160 (1.1242) acc 68.7500 (72.0663) lr 1.1874e-03 eta 11:26:14 +epoch [24/50] batch [740/1000] time 1.556 (1.568) data 0.000 (0.002) loss 1.9355 (1.1254) acc 62.5000 (72.0481) lr 1.1874e-03 eta 11:26:12 +epoch [24/50] batch [745/1000] time 1.539 (1.568) data 0.001 (0.002) loss 0.8525 (1.1249) acc 78.1250 (72.0512) lr 1.1874e-03 eta 11:26:01 +epoch [24/50] batch [750/1000] time 1.549 (1.568) data 0.000 (0.002) loss 0.5991 (1.1231) acc 81.2500 (72.0708) lr 1.1874e-03 eta 11:25:53 +epoch [24/50] batch [755/1000] time 1.596 (1.568) data 0.001 (0.002) loss 0.9185 (1.1232) acc 78.1250 (72.0861) lr 1.1874e-03 eta 11:25:45 +epoch [24/50] batch [760/1000] time 1.544 (1.568) data 0.000 (0.002) loss 0.8091 (1.1222) acc 78.1250 (72.1135) lr 1.1874e-03 eta 11:25:38 +epoch [24/50] batch [765/1000] time 1.567 (1.568) data 0.000 (0.002) loss 1.2637 (1.1216) acc 68.7500 (72.1324) lr 1.1874e-03 eta 11:25:30 +epoch [24/50] batch [770/1000] time 1.546 (1.568) data 0.000 (0.002) loss 1.8730 (1.1243) acc 59.3750 (72.0576) lr 1.1874e-03 eta 11:25:19 +epoch [24/50] batch [775/1000] time 1.560 (1.568) data 0.000 (0.002) loss 1.1338 (1.1256) acc 68.7500 (72.0161) lr 1.1874e-03 eta 11:25:10 +epoch [24/50] batch [780/1000] time 1.570 (1.568) data 0.001 (0.002) loss 0.8770 (1.1257) acc 78.1250 (72.0312) lr 1.1874e-03 eta 11:25:02 +epoch [24/50] batch [785/1000] time 1.557 (1.568) data 0.001 (0.002) loss 1.8594 (1.1270) acc 56.2500 (71.9904) lr 1.1874e-03 eta 11:24:58 +epoch [24/50] batch [790/1000] time 1.541 (1.568) data 0.000 (0.002) loss 0.6914 (1.1268) acc 71.8750 (71.9699) lr 1.1874e-03 eta 11:24:46 +epoch [24/50] batch [795/1000] time 1.563 (1.568) data 0.001 (0.002) loss 2.0254 (1.1284) acc 56.2500 (71.9300) lr 1.1874e-03 eta 11:24:38 +epoch [24/50] batch [800/1000] time 1.607 (1.568) data 0.000 (0.002) loss 1.1533 (1.1267) acc 75.0000 (71.9766) lr 1.1874e-03 eta 11:24:29 +epoch [24/50] batch [805/1000] time 1.569 (1.568) data 0.000 (0.002) loss 0.7163 (1.1264) acc 84.3750 (71.9643) lr 1.1874e-03 eta 11:24:23 +epoch [24/50] batch [810/1000] time 1.562 (1.568) data 0.000 (0.002) loss 0.9097 (1.1268) acc 65.6250 (71.9406) lr 1.1874e-03 eta 11:24:13 +epoch [24/50] batch [815/1000] time 1.544 (1.567) data 0.000 (0.002) loss 1.1660 (1.1276) acc 59.3750 (71.9018) lr 1.1874e-03 eta 11:24:03 +epoch [24/50] batch [820/1000] time 1.563 (1.567) data 0.000 (0.002) loss 1.0137 (1.1284) acc 68.7500 (71.8636) lr 1.1874e-03 eta 11:23:55 +epoch [24/50] batch [825/1000] time 1.575 (1.567) data 0.000 (0.002) loss 1.2119 (1.1296) acc 75.0000 (71.8636) lr 1.1874e-03 eta 11:23:46 +epoch [24/50] batch [830/1000] time 1.598 (1.568) data 0.000 (0.002) loss 1.0049 (1.1298) acc 84.3750 (71.8788) lr 1.1874e-03 eta 11:23:45 +epoch [24/50] batch [835/1000] time 1.565 (1.568) data 0.000 (0.002) loss 1.4697 (1.1300) acc 65.6250 (71.8787) lr 1.1874e-03 eta 11:23:38 +epoch [24/50] batch [840/1000] time 1.549 (1.568) data 0.000 (0.002) loss 1.7686 (1.1309) acc 62.5000 (71.8676) lr 1.1874e-03 eta 11:23:28 +epoch [24/50] batch [845/1000] time 1.543 (1.568) data 0.001 (0.002) loss 1.1396 (1.1312) acc 78.1250 (71.8676) lr 1.1874e-03 eta 11:23:19 +epoch [24/50] batch [850/1000] time 1.586 (1.568) data 0.001 (0.002) loss 0.7275 (1.1298) acc 75.0000 (71.8971) lr 1.1874e-03 eta 11:23:12 +epoch [24/50] batch [855/1000] time 1.571 (1.568) data 0.000 (0.002) loss 1.3730 (1.1291) acc 59.3750 (71.9006) lr 1.1874e-03 eta 11:23:03 +epoch [24/50] batch [860/1000] time 1.540 (1.568) data 0.000 (0.002) loss 1.1533 (1.1296) acc 65.6250 (71.8750) lr 1.1874e-03 eta 11:22:57 +epoch [24/50] batch [865/1000] time 1.567 (1.568) data 0.000 (0.002) loss 1.0117 (1.1295) acc 75.0000 (71.8497) lr 1.1874e-03 eta 11:22:48 +epoch [24/50] batch [870/1000] time 1.561 (1.567) data 0.001 (0.002) loss 1.2031 (1.1288) acc 71.8750 (71.8570) lr 1.1874e-03 eta 11:22:38 +epoch [24/50] batch [875/1000] time 1.546 (1.567) data 0.000 (0.002) loss 0.8853 (1.1292) acc 75.0000 (71.8643) lr 1.1874e-03 eta 11:22:29 +epoch [24/50] batch [880/1000] time 1.575 (1.567) data 0.000 (0.002) loss 1.0547 (1.1287) acc 65.6250 (71.8643) lr 1.1874e-03 eta 11:22:21 +epoch [24/50] batch [885/1000] time 1.559 (1.567) data 0.000 (0.002) loss 1.4248 (1.1287) acc 65.6250 (71.8538) lr 1.1874e-03 eta 11:22:11 +epoch [24/50] batch [890/1000] time 1.689 (1.567) data 0.000 (0.002) loss 1.6035 (1.1289) acc 62.5000 (71.8610) lr 1.1874e-03 eta 11:22:06 +epoch [24/50] batch [895/1000] time 1.542 (1.567) data 0.000 (0.002) loss 1.4600 (1.1291) acc 65.6250 (71.8575) lr 1.1874e-03 eta 11:21:56 +epoch [24/50] batch [900/1000] time 1.547 (1.567) data 0.000 (0.002) loss 0.7217 (1.1293) acc 81.2500 (71.8472) lr 1.1874e-03 eta 11:21:47 +epoch [24/50] batch [905/1000] time 1.573 (1.567) data 0.000 (0.002) loss 0.8975 (1.1308) acc 68.7500 (71.8094) lr 1.1874e-03 eta 11:21:38 +epoch [24/50] batch [910/1000] time 1.551 (1.567) data 0.001 (0.002) loss 1.0078 (1.1300) acc 68.7500 (71.8235) lr 1.1874e-03 eta 11:21:30 +epoch [24/50] batch [915/1000] time 1.553 (1.567) data 0.000 (0.002) loss 1.2744 (1.1302) acc 84.3750 (71.8340) lr 1.1874e-03 eta 11:21:21 +epoch [24/50] batch [920/1000] time 1.531 (1.567) data 0.000 (0.002) loss 0.9204 (1.1300) acc 75.0000 (71.8342) lr 1.1874e-03 eta 11:21:12 +epoch [24/50] batch [925/1000] time 1.557 (1.567) data 0.000 (0.002) loss 0.9951 (1.1303) acc 71.8750 (71.8243) lr 1.1874e-03 eta 11:21:03 +epoch [24/50] batch [930/1000] time 1.574 (1.567) data 0.001 (0.002) loss 1.2227 (1.1307) acc 78.1250 (71.8448) lr 1.1874e-03 eta 11:20:55 +epoch [24/50] batch [935/1000] time 1.746 (1.567) data 0.001 (0.002) loss 0.9688 (1.1294) acc 78.1250 (71.8817) lr 1.1874e-03 eta 11:20:53 +epoch [24/50] batch [940/1000] time 1.587 (1.567) data 0.000 (0.002) loss 1.0645 (1.1297) acc 71.8750 (71.8650) lr 1.1874e-03 eta 11:20:45 +epoch [24/50] batch [945/1000] time 1.551 (1.567) data 0.000 (0.002) loss 1.5020 (1.1315) acc 71.8750 (71.8519) lr 1.1874e-03 eta 11:20:36 +epoch [24/50] batch [950/1000] time 1.540 (1.567) data 0.000 (0.002) loss 0.7998 (1.1308) acc 71.8750 (71.8651) lr 1.1874e-03 eta 11:20:26 +epoch [24/50] batch [955/1000] time 1.551 (1.567) data 0.001 (0.002) loss 1.7441 (1.1308) acc 62.5000 (71.8685) lr 1.1874e-03 eta 11:20:17 +epoch [24/50] batch [960/1000] time 1.529 (1.567) data 0.000 (0.002) loss 1.7549 (1.1321) acc 59.3750 (71.8229) lr 1.1874e-03 eta 11:20:07 +epoch [24/50] batch [965/1000] time 1.547 (1.567) data 0.001 (0.001) loss 1.3115 (1.1311) acc 71.8750 (71.8491) lr 1.1874e-03 eta 11:19:58 +epoch [24/50] batch [970/1000] time 1.565 (1.567) data 0.001 (0.001) loss 1.2217 (1.1308) acc 68.7500 (71.8492) lr 1.1874e-03 eta 11:19:47 +epoch [24/50] batch [975/1000] time 1.560 (1.567) data 0.001 (0.001) loss 1.6113 (1.1321) acc 71.8750 (71.8397) lr 1.1874e-03 eta 11:19:37 +epoch [24/50] batch [980/1000] time 1.594 (1.567) data 0.000 (0.001) loss 1.0459 (1.1309) acc 71.8750 (71.8559) lr 1.1874e-03 eta 11:19:34 +epoch [24/50] batch [985/1000] time 1.575 (1.567) data 0.001 (0.001) loss 0.7095 (1.1305) acc 87.5000 (71.8560) lr 1.1874e-03 eta 11:19:26 +epoch [24/50] batch [990/1000] time 1.583 (1.567) data 0.000 (0.001) loss 0.8892 (1.1313) acc 65.6250 (71.8403) lr 1.1874e-03 eta 11:19:17 +epoch [24/50] batch [995/1000] time 1.552 (1.567) data 0.000 (0.001) loss 0.9990 (1.1308) acc 71.8750 (71.8373) lr 1.1874e-03 eta 11:19:08 +epoch [24/50] batch [1000/1000] time 1.565 (1.567) data 0.000 (0.001) loss 1.5068 (1.1316) acc 62.5000 (71.8031) lr 1.1253e-03 eta 11:18:59 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,224 +* accuracy: 78.4% +* error: 21.6% +* macro_f1: 78.0% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [25/50] batch [5/1000] time 1.569 (1.707) data 0.000 (0.206) loss 1.7559 (1.3606) acc 62.5000 (68.1250) lr 1.1253e-03 eta 12:19:33 +epoch [25/50] batch [10/1000] time 1.572 (1.634) data 0.000 (0.103) loss 0.7637 (1.1656) acc 78.1250 (71.5625) lr 1.1253e-03 eta 11:47:42 +epoch [25/50] batch [15/1000] time 1.570 (1.611) data 0.000 (0.069) loss 0.9561 (1.1291) acc 71.8750 (71.2500) lr 1.1253e-03 eta 11:37:52 +epoch [25/50] batch [20/1000] time 1.565 (1.601) data 0.000 (0.052) loss 0.7603 (1.0638) acc 75.0000 (72.3438) lr 1.1253e-03 eta 11:33:23 +epoch [25/50] batch [25/1000] time 1.551 (1.594) data 0.001 (0.041) loss 1.1572 (1.1465) acc 71.8750 (71.8750) lr 1.1253e-03 eta 11:29:53 +epoch [25/50] batch [30/1000] time 1.545 (1.588) data 0.001 (0.035) loss 1.3828 (1.1410) acc 68.7500 (71.4583) lr 1.1253e-03 eta 11:27:32 +epoch [25/50] batch [35/1000] time 1.578 (1.588) data 0.001 (0.030) loss 1.4229 (1.1475) acc 62.5000 (71.3393) lr 1.1253e-03 eta 11:27:06 +epoch [25/50] batch [40/1000] time 1.530 (1.585) data 0.000 (0.026) loss 0.7407 (1.1552) acc 78.1250 (71.6406) lr 1.1253e-03 eta 11:25:44 +epoch [25/50] batch [45/1000] time 1.566 (1.582) data 0.001 (0.023) loss 1.0107 (1.1380) acc 68.7500 (71.6667) lr 1.1253e-03 eta 11:24:17 +epoch [25/50] batch [50/1000] time 1.536 (1.579) data 0.001 (0.021) loss 0.6953 (1.1262) acc 87.5000 (72.3125) lr 1.1253e-03 eta 11:23:02 +epoch [25/50] batch [55/1000] time 1.575 (1.577) data 0.001 (0.019) loss 1.3086 (1.1345) acc 71.8750 (71.9886) lr 1.1253e-03 eta 11:21:54 +epoch [25/50] batch [60/1000] time 1.560 (1.579) data 0.000 (0.018) loss 0.8262 (1.1243) acc 78.1250 (72.0312) lr 1.1253e-03 eta 11:22:33 +epoch [25/50] batch [65/1000] time 1.565 (1.578) data 0.000 (0.016) loss 0.8926 (1.1280) acc 75.0000 (71.8269) lr 1.1253e-03 eta 11:22:03 +epoch [25/50] batch [70/1000] time 1.567 (1.577) data 0.001 (0.015) loss 1.0996 (1.1234) acc 65.6250 (71.8304) lr 1.1253e-03 eta 11:21:28 +epoch [25/50] batch [75/1000] time 1.559 (1.576) data 0.001 (0.014) loss 1.0459 (1.1093) acc 75.0000 (72.0000) lr 1.1253e-03 eta 11:21:00 +epoch [25/50] batch [80/1000] time 1.570 (1.575) data 0.000 (0.013) loss 1.0410 (1.0897) acc 71.8750 (72.4609) lr 1.1253e-03 eta 11:20:36 +epoch [25/50] batch [85/1000] time 1.567 (1.574) data 0.000 (0.013) loss 1.1826 (1.0860) acc 68.7500 (72.4632) lr 1.1253e-03 eta 11:19:59 +epoch [25/50] batch [90/1000] time 1.549 (1.574) data 0.000 (0.012) loss 1.9121 (1.0934) acc 59.3750 (72.3958) lr 1.1253e-03 eta 11:19:30 +epoch [25/50] batch [95/1000] time 1.586 (1.573) data 0.000 (0.011) loss 1.4648 (1.1009) acc 65.6250 (72.1711) lr 1.1253e-03 eta 11:19:06 +epoch [25/50] batch [100/1000] time 1.578 (1.572) data 0.000 (0.011) loss 1.0244 (1.1010) acc 68.7500 (72.1250) lr 1.1253e-03 eta 11:18:45 +epoch [25/50] batch [105/1000] time 1.579 (1.572) data 0.000 (0.010) loss 1.2363 (1.1052) acc 71.8750 (72.1131) lr 1.1253e-03 eta 11:18:38 +epoch [25/50] batch [110/1000] time 1.553 (1.572) data 0.000 (0.010) loss 1.0615 (1.1060) acc 75.0000 (72.2443) lr 1.1253e-03 eta 11:18:12 +epoch [25/50] batch [115/1000] time 1.554 (1.572) data 0.001 (0.009) loss 1.5361 (1.1143) acc 56.2500 (72.0109) lr 1.1253e-03 eta 11:17:58 +epoch [25/50] batch [120/1000] time 1.557 (1.571) data 0.000 (0.009) loss 2.1270 (1.1317) acc 50.0000 (71.7969) lr 1.1253e-03 eta 11:17:47 +epoch [25/50] batch [125/1000] time 1.565 (1.571) data 0.001 (0.009) loss 1.0938 (1.1314) acc 71.8750 (71.8250) lr 1.1253e-03 eta 11:17:29 +epoch [25/50] batch [130/1000] time 1.572 (1.571) data 0.001 (0.008) loss 0.7349 (1.1256) acc 78.1250 (71.9471) lr 1.1253e-03 eta 11:17:18 +epoch [25/50] batch [135/1000] time 1.585 (1.571) data 0.000 (0.008) loss 0.9810 (1.1275) acc 75.0000 (71.8287) lr 1.1253e-03 eta 11:17:21 +epoch [25/50] batch [140/1000] time 1.558 (1.571) data 0.000 (0.008) loss 0.8892 (1.1261) acc 78.1250 (71.8080) lr 1.1253e-03 eta 11:17:08 +epoch [25/50] batch [145/1000] time 1.582 (1.571) data 0.000 (0.008) loss 1.2070 (1.1205) acc 78.1250 (71.8966) lr 1.1253e-03 eta 11:16:52 +epoch [25/50] batch [150/1000] time 1.567 (1.571) data 0.000 (0.007) loss 1.5439 (1.1137) acc 71.8750 (72.1875) lr 1.1253e-03 eta 11:16:38 +epoch [25/50] batch [155/1000] time 1.582 (1.571) data 0.001 (0.007) loss 1.7031 (1.1245) acc 68.7500 (72.0161) lr 1.1253e-03 eta 11:16:32 +epoch [25/50] batch [160/1000] time 1.764 (1.571) data 0.001 (0.007) loss 0.9180 (1.1216) acc 81.2500 (72.1484) lr 1.1253e-03 eta 11:16:45 +epoch [25/50] batch [165/1000] time 1.568 (1.571) data 0.000 (0.007) loss 0.7905 (1.1105) acc 78.1250 (72.2917) lr 1.1253e-03 eta 11:16:36 +epoch [25/50] batch [170/1000] time 1.579 (1.571) data 0.000 (0.007) loss 0.4260 (1.1056) acc 90.6250 (72.4449) lr 1.1253e-03 eta 11:16:22 +epoch [25/50] batch [175/1000] time 1.546 (1.571) data 0.000 (0.006) loss 1.2109 (1.1052) acc 75.0000 (72.4821) lr 1.1253e-03 eta 11:16:07 +epoch [25/50] batch [180/1000] time 1.539 (1.570) data 0.000 (0.006) loss 0.7920 (1.1083) acc 87.5000 (72.5347) lr 1.1253e-03 eta 11:15:48 +epoch [25/50] batch [185/1000] time 1.574 (1.570) data 0.001 (0.006) loss 0.9062 (1.1100) acc 78.1250 (72.5169) lr 1.1253e-03 eta 11:15:38 +epoch [25/50] batch [190/1000] time 1.538 (1.570) data 0.000 (0.006) loss 1.3086 (1.1082) acc 59.3750 (72.4836) lr 1.1253e-03 eta 11:15:16 +epoch [25/50] batch [195/1000] time 1.553 (1.570) data 0.000 (0.006) loss 1.1699 (1.1110) acc 65.6250 (72.4519) lr 1.1253e-03 eta 11:15:02 +epoch [25/50] batch [200/1000] time 1.552 (1.569) data 0.001 (0.006) loss 0.7183 (1.1058) acc 84.3750 (72.6406) lr 1.1253e-03 eta 11:14:52 +epoch [25/50] batch [205/1000] time 1.573 (1.569) data 0.000 (0.005) loss 0.8843 (1.1130) acc 81.2500 (72.5762) lr 1.1253e-03 eta 11:14:38 +epoch [25/50] batch [210/1000] time 1.544 (1.570) data 0.001 (0.005) loss 1.0684 (1.1081) acc 71.8750 (72.6339) lr 1.1253e-03 eta 11:14:47 +epoch [25/50] batch [215/1000] time 1.542 (1.569) data 0.000 (0.005) loss 1.1885 (1.1129) acc 71.8750 (72.5581) lr 1.1253e-03 eta 11:14:26 +epoch [25/50] batch [220/1000] time 1.546 (1.569) data 0.000 (0.005) loss 1.4717 (1.1124) acc 62.5000 (72.5142) lr 1.1253e-03 eta 11:14:08 +epoch [25/50] batch [225/1000] time 1.567 (1.569) data 0.000 (0.005) loss 1.0840 (1.1137) acc 62.5000 (72.4722) lr 1.1253e-03 eta 11:13:52 +epoch [25/50] batch [230/1000] time 1.566 (1.568) data 0.000 (0.005) loss 0.8789 (1.1103) acc 78.1250 (72.4864) lr 1.1253e-03 eta 11:13:39 +epoch [25/50] batch [235/1000] time 1.558 (1.568) data 0.000 (0.005) loss 1.4609 (1.1113) acc 62.5000 (72.4335) lr 1.1253e-03 eta 11:13:27 +epoch [25/50] batch [240/1000] time 1.558 (1.568) data 0.000 (0.005) loss 1.4434 (1.1121) acc 62.5000 (72.3828) lr 1.1253e-03 eta 11:13:19 +epoch [25/50] batch [245/1000] time 1.551 (1.568) data 0.000 (0.005) loss 1.0898 (1.1165) acc 68.7500 (72.2194) lr 1.1253e-03 eta 11:13:07 +epoch [25/50] batch [250/1000] time 1.562 (1.568) data 0.000 (0.005) loss 1.2461 (1.1214) acc 68.7500 (72.1250) lr 1.1253e-03 eta 11:12:55 +epoch [25/50] batch [255/1000] time 1.574 (1.568) data 0.000 (0.004) loss 0.6558 (1.1200) acc 84.3750 (72.1446) lr 1.1253e-03 eta 11:12:49 +epoch [25/50] batch [260/1000] time 1.574 (1.568) data 0.001 (0.004) loss 1.4463 (1.1247) acc 59.3750 (72.1274) lr 1.1253e-03 eta 11:12:39 +epoch [25/50] batch [265/1000] time 1.593 (1.568) data 0.000 (0.004) loss 0.8765 (1.1222) acc 81.2500 (72.1580) lr 1.1253e-03 eta 11:12:29 +epoch [25/50] batch [270/1000] time 1.599 (1.568) data 0.000 (0.004) loss 1.2041 (1.1212) acc 71.8750 (72.1644) lr 1.1253e-03 eta 11:12:25 +epoch [25/50] batch [275/1000] time 1.562 (1.568) data 0.000 (0.004) loss 0.5635 (1.1156) acc 84.3750 (72.2045) lr 1.1253e-03 eta 11:12:14 +epoch [25/50] batch [280/1000] time 1.561 (1.568) data 0.000 (0.004) loss 0.9966 (1.1144) acc 78.1250 (72.2321) lr 1.1253e-03 eta 11:12:05 +epoch [25/50] batch [285/1000] time 1.587 (1.568) data 0.001 (0.004) loss 0.7412 (1.1105) acc 68.7500 (72.2368) lr 1.1253e-03 eta 11:11:57 +epoch [25/50] batch [290/1000] time 1.542 (1.568) data 0.000 (0.004) loss 1.1641 (1.1082) acc 71.8750 (72.2414) lr 1.1253e-03 eta 11:11:48 +epoch [25/50] batch [295/1000] time 1.565 (1.568) data 0.000 (0.004) loss 0.9810 (1.1102) acc 81.2500 (72.2140) lr 1.1253e-03 eta 11:11:38 +epoch [25/50] batch [300/1000] time 1.553 (1.568) data 0.001 (0.004) loss 1.1709 (1.1089) acc 65.6250 (72.1979) lr 1.1253e-03 eta 11:11:31 +epoch [25/50] batch [305/1000] time 1.562 (1.568) data 0.000 (0.004) loss 2.3008 (1.1142) acc 46.8750 (72.1414) lr 1.1253e-03 eta 11:11:23 +epoch [25/50] batch [310/1000] time 1.557 (1.568) data 0.000 (0.004) loss 1.1172 (1.1149) acc 65.6250 (72.1169) lr 1.1253e-03 eta 11:11:09 +epoch [25/50] batch [315/1000] time 1.588 (1.568) data 0.000 (0.004) loss 0.9380 (1.1154) acc 68.7500 (72.0635) lr 1.1253e-03 eta 11:11:11 +epoch [25/50] batch [320/1000] time 1.561 (1.568) data 0.000 (0.004) loss 2.1289 (1.1188) acc 59.3750 (71.9629) lr 1.1253e-03 eta 11:11:04 +epoch [25/50] batch [325/1000] time 1.547 (1.568) data 0.000 (0.004) loss 1.8408 (1.1236) acc 59.3750 (71.8365) lr 1.1253e-03 eta 11:10:56 +epoch [25/50] batch [330/1000] time 1.561 (1.568) data 0.000 (0.004) loss 0.6240 (1.1218) acc 87.5000 (71.8845) lr 1.1253e-03 eta 11:10:49 +epoch [25/50] batch [335/1000] time 1.582 (1.568) data 0.000 (0.004) loss 1.2520 (1.1191) acc 71.8750 (71.9030) lr 1.1253e-03 eta 11:10:41 +epoch [25/50] batch [340/1000] time 1.546 (1.568) data 0.000 (0.003) loss 1.0029 (1.1209) acc 68.7500 (71.8107) lr 1.1253e-03 eta 11:10:35 +epoch [25/50] batch [345/1000] time 1.567 (1.568) data 0.000 (0.003) loss 0.7720 (1.1182) acc 78.1250 (71.8478) lr 1.1253e-03 eta 11:10:28 +epoch [25/50] batch [350/1000] time 1.561 (1.568) data 0.000 (0.003) loss 0.5610 (1.1133) acc 84.3750 (71.9018) lr 1.1253e-03 eta 11:10:16 +epoch [25/50] batch [355/1000] time 1.566 (1.568) data 0.000 (0.003) loss 1.7295 (1.1125) acc 56.2500 (71.8926) lr 1.1253e-03 eta 11:10:05 +epoch [25/50] batch [360/1000] time 1.557 (1.568) data 0.000 (0.003) loss 1.3984 (1.1124) acc 71.8750 (71.9010) lr 1.1253e-03 eta 11:10:13 +epoch [25/50] batch [365/1000] time 1.548 (1.568) data 0.001 (0.003) loss 1.2881 (1.1115) acc 75.0000 (71.9606) lr 1.1253e-03 eta 11:10:01 +epoch [25/50] batch [370/1000] time 1.566 (1.568) data 0.000 (0.003) loss 0.9087 (1.1099) acc 84.3750 (71.9764) lr 1.1253e-03 eta 11:09:47 +epoch [25/50] batch [375/1000] time 1.566 (1.568) data 0.000 (0.003) loss 1.4287 (1.1093) acc 62.5000 (72.0167) lr 1.1253e-03 eta 11:09:36 +epoch [25/50] batch [380/1000] time 1.564 (1.568) data 0.000 (0.003) loss 0.7368 (1.1096) acc 78.1250 (72.0477) lr 1.1253e-03 eta 11:09:25 +epoch [25/50] batch [385/1000] time 1.543 (1.568) data 0.000 (0.003) loss 1.0752 (1.1102) acc 68.7500 (72.0130) lr 1.1253e-03 eta 11:09:14 +epoch [25/50] batch [390/1000] time 1.571 (1.568) data 0.000 (0.003) loss 1.3447 (1.1093) acc 68.7500 (72.0272) lr 1.1253e-03 eta 11:09:08 +epoch [25/50] batch [395/1000] time 1.526 (1.567) data 0.000 (0.003) loss 1.7568 (1.1106) acc 62.5000 (71.9699) lr 1.1253e-03 eta 11:08:52 +epoch [25/50] batch [400/1000] time 1.536 (1.567) data 0.000 (0.003) loss 0.7788 (1.1118) acc 75.0000 (71.8984) lr 1.1253e-03 eta 11:08:37 +epoch [25/50] batch [405/1000] time 1.574 (1.567) data 0.000 (0.003) loss 1.0420 (1.1098) acc 75.0000 (71.9367) lr 1.1253e-03 eta 11:08:27 +epoch [25/50] batch [410/1000] time 1.569 (1.567) data 0.000 (0.003) loss 1.2686 (1.1104) acc 62.5000 (71.8902) lr 1.1253e-03 eta 11:08:19 +epoch [25/50] batch [415/1000] time 1.571 (1.567) data 0.000 (0.003) loss 1.5791 (1.1105) acc 71.8750 (71.9127) lr 1.1253e-03 eta 11:08:13 +epoch [25/50] batch [420/1000] time 1.545 (1.567) data 0.000 (0.003) loss 1.1299 (1.1105) acc 62.5000 (71.8973) lr 1.1253e-03 eta 11:08:03 +epoch [25/50] batch [425/1000] time 1.545 (1.567) data 0.000 (0.003) loss 1.3975 (1.1116) acc 65.6250 (71.8456) lr 1.1253e-03 eta 11:07:51 +epoch [25/50] batch [430/1000] time 1.559 (1.567) data 0.000 (0.003) loss 1.2061 (1.1114) acc 65.6250 (71.8823) lr 1.1253e-03 eta 11:07:40 +epoch [25/50] batch [435/1000] time 1.549 (1.567) data 0.000 (0.003) loss 0.8706 (1.1118) acc 71.8750 (71.8391) lr 1.1253e-03 eta 11:07:32 +epoch [25/50] batch [440/1000] time 1.563 (1.567) data 0.000 (0.003) loss 1.2207 (1.1121) acc 75.0000 (71.8395) lr 1.1253e-03 eta 11:07:23 +epoch [25/50] batch [445/1000] time 1.563 (1.567) data 0.000 (0.003) loss 1.3574 (1.1137) acc 62.5000 (71.8258) lr 1.1253e-03 eta 11:07:13 +epoch [25/50] batch [450/1000] time 1.553 (1.567) data 0.000 (0.003) loss 1.0264 (1.1131) acc 78.1250 (71.7986) lr 1.1253e-03 eta 11:07:07 +epoch [25/50] batch [455/1000] time 1.585 (1.567) data 0.001 (0.003) loss 0.4773 (1.1123) acc 90.6250 (71.7788) lr 1.1253e-03 eta 11:07:00 +epoch [25/50] batch [460/1000] time 1.571 (1.567) data 0.000 (0.003) loss 1.7764 (1.1138) acc 62.5000 (71.7731) lr 1.1253e-03 eta 11:06:50 +epoch [25/50] batch [465/1000] time 1.546 (1.567) data 0.001 (0.003) loss 0.9517 (1.1128) acc 75.0000 (71.7540) lr 1.1253e-03 eta 11:06:49 +epoch [25/50] batch [470/1000] time 1.546 (1.567) data 0.000 (0.003) loss 0.8623 (1.1121) acc 71.8750 (71.7819) lr 1.1253e-03 eta 11:06:38 +epoch [25/50] batch [475/1000] time 1.573 (1.567) data 0.000 (0.003) loss 0.7349 (1.1128) acc 87.5000 (71.7632) lr 1.1253e-03 eta 11:06:32 +epoch [25/50] batch [480/1000] time 1.575 (1.567) data 0.000 (0.003) loss 0.7251 (1.1133) acc 81.2500 (71.7904) lr 1.1253e-03 eta 11:06:24 +epoch [25/50] batch [485/1000] time 1.557 (1.567) data 0.000 (0.003) loss 1.5449 (1.1128) acc 59.3750 (71.7655) lr 1.1253e-03 eta 11:06:15 +epoch [25/50] batch [490/1000] time 1.558 (1.567) data 0.001 (0.003) loss 0.8809 (1.1139) acc 75.0000 (71.7857) lr 1.1253e-03 eta 11:06:03 +epoch [25/50] batch [495/1000] time 1.580 (1.567) data 0.001 (0.003) loss 0.9453 (1.1140) acc 71.8750 (71.7740) lr 1.1253e-03 eta 11:05:54 +epoch [25/50] batch [500/1000] time 1.563 (1.567) data 0.000 (0.002) loss 1.2725 (1.1118) acc 75.0000 (71.8125) lr 1.1253e-03 eta 11:05:47 +epoch [25/50] batch [505/1000] time 1.565 (1.567) data 0.000 (0.002) loss 1.3428 (1.1129) acc 65.6250 (71.8007) lr 1.1253e-03 eta 11:05:38 +epoch [25/50] batch [510/1000] time 1.563 (1.567) data 0.000 (0.002) loss 1.1885 (1.1156) acc 78.1250 (71.7770) lr 1.1253e-03 eta 11:05:36 +epoch [25/50] batch [515/1000] time 1.562 (1.567) data 0.000 (0.002) loss 1.7148 (1.1173) acc 65.6250 (71.7658) lr 1.1253e-03 eta 11:05:27 +epoch [25/50] batch [520/1000] time 1.555 (1.567) data 0.000 (0.002) loss 1.0332 (1.1192) acc 78.1250 (71.7788) lr 1.1253e-03 eta 11:05:18 +epoch [25/50] batch [525/1000] time 1.580 (1.567) data 0.000 (0.002) loss 1.3145 (1.1179) acc 71.8750 (71.8333) lr 1.1253e-03 eta 11:05:11 +epoch [25/50] batch [530/1000] time 1.566 (1.567) data 0.000 (0.002) loss 1.0996 (1.1171) acc 65.6250 (71.8396) lr 1.1253e-03 eta 11:05:02 +epoch [25/50] batch [535/1000] time 1.546 (1.567) data 0.000 (0.002) loss 1.2031 (1.1186) acc 71.8750 (71.7699) lr 1.1253e-03 eta 11:04:50 +epoch [25/50] batch [540/1000] time 1.535 (1.566) data 0.000 (0.002) loss 0.7930 (1.1175) acc 71.8750 (71.7766) lr 1.1253e-03 eta 11:04:38 +epoch [25/50] batch [545/1000] time 1.589 (1.566) data 0.000 (0.002) loss 0.6436 (1.1189) acc 81.2500 (71.7718) lr 1.1253e-03 eta 11:04:30 +epoch [25/50] batch [550/1000] time 1.583 (1.566) data 0.000 (0.002) loss 1.2715 (1.1182) acc 75.0000 (71.8068) lr 1.1253e-03 eta 11:04:25 +epoch [25/50] batch [555/1000] time 1.564 (1.566) data 0.000 (0.002) loss 0.5576 (1.1164) acc 90.6250 (71.8243) lr 1.1253e-03 eta 11:04:17 +epoch [25/50] batch [560/1000] time 1.541 (1.566) data 0.001 (0.002) loss 1.2324 (1.1170) acc 68.7500 (71.7746) lr 1.1253e-03 eta 11:04:06 +epoch [25/50] batch [565/1000] time 1.552 (1.566) data 0.001 (0.002) loss 0.7622 (1.1160) acc 68.7500 (71.7976) lr 1.1253e-03 eta 11:03:54 +epoch [25/50] batch [570/1000] time 1.583 (1.566) data 0.000 (0.002) loss 0.9883 (1.1152) acc 75.0000 (71.8147) lr 1.1253e-03 eta 11:03:48 +epoch [25/50] batch [575/1000] time 1.589 (1.566) data 0.000 (0.002) loss 0.9521 (1.1155) acc 78.1250 (71.8370) lr 1.1253e-03 eta 11:03:40 +epoch [25/50] batch [580/1000] time 1.563 (1.566) data 0.000 (0.002) loss 0.8311 (1.1128) acc 78.1250 (71.8966) lr 1.1253e-03 eta 11:03:29 +epoch [25/50] batch [585/1000] time 1.561 (1.566) data 0.000 (0.002) loss 1.5361 (1.1122) acc 59.3750 (71.9071) lr 1.1253e-03 eta 11:03:19 +epoch [25/50] batch [590/1000] time 1.571 (1.566) data 0.000 (0.002) loss 0.8496 (1.1115) acc 75.0000 (71.9174) lr 1.1253e-03 eta 11:03:12 +epoch [25/50] batch [595/1000] time 1.550 (1.566) data 0.000 (0.002) loss 1.1113 (1.1130) acc 71.8750 (71.9013) lr 1.1253e-03 eta 11:03:04 +epoch [25/50] batch [600/1000] time 1.562 (1.566) data 0.001 (0.002) loss 1.0098 (1.1114) acc 68.7500 (71.9427) lr 1.1253e-03 eta 11:02:58 +epoch [25/50] batch [605/1000] time 1.559 (1.566) data 0.001 (0.002) loss 0.6113 (1.1121) acc 84.3750 (71.9318) lr 1.1253e-03 eta 11:02:49 +epoch [25/50] batch [610/1000] time 1.591 (1.566) data 0.000 (0.002) loss 1.3652 (1.1136) acc 68.7500 (71.8904) lr 1.1253e-03 eta 11:02:42 +epoch [25/50] batch [615/1000] time 1.562 (1.566) data 0.000 (0.002) loss 1.0039 (1.1155) acc 71.8750 (71.8242) lr 1.1253e-03 eta 11:02:40 +epoch [25/50] batch [620/1000] time 1.530 (1.566) data 0.001 (0.002) loss 1.7549 (1.1173) acc 71.8750 (71.8044) lr 1.1253e-03 eta 11:02:30 +epoch [25/50] batch [625/1000] time 1.571 (1.566) data 0.000 (0.002) loss 0.7554 (1.1155) acc 75.0000 (71.8350) lr 1.1253e-03 eta 11:02:22 +epoch [25/50] batch [630/1000] time 1.557 (1.566) data 0.001 (0.002) loss 0.8931 (1.1143) acc 71.8750 (71.8254) lr 1.1253e-03 eta 11:02:15 +epoch [25/50] batch [635/1000] time 1.551 (1.566) data 0.000 (0.002) loss 0.6982 (1.1126) acc 78.1250 (71.8504) lr 1.1253e-03 eta 11:02:05 +epoch [25/50] batch [640/1000] time 1.551 (1.566) data 0.000 (0.002) loss 0.8984 (1.1112) acc 78.1250 (71.8555) lr 1.1253e-03 eta 11:01:56 +epoch [25/50] batch [645/1000] time 1.568 (1.566) data 0.000 (0.002) loss 1.7471 (1.1109) acc 50.0000 (71.8120) lr 1.1253e-03 eta 11:01:47 +epoch [25/50] batch [650/1000] time 1.567 (1.566) data 0.000 (0.002) loss 1.1016 (1.1104) acc 71.8750 (71.8221) lr 1.1253e-03 eta 11:01:40 +epoch [25/50] batch [655/1000] time 1.574 (1.566) data 0.000 (0.002) loss 0.8550 (1.1105) acc 78.1250 (71.8034) lr 1.1253e-03 eta 11:01:32 +epoch [25/50] batch [660/1000] time 1.745 (1.566) data 0.000 (0.002) loss 1.8135 (1.1134) acc 62.5000 (71.7472) lr 1.1253e-03 eta 11:01:31 +epoch [25/50] batch [665/1000] time 1.592 (1.566) data 0.000 (0.002) loss 1.1553 (1.1129) acc 78.1250 (71.7763) lr 1.1253e-03 eta 11:01:24 +epoch [25/50] batch [670/1000] time 1.565 (1.566) data 0.000 (0.002) loss 1.1738 (1.1116) acc 75.0000 (71.8190) lr 1.1253e-03 eta 11:01:16 +epoch [25/50] batch [675/1000] time 1.551 (1.566) data 0.001 (0.002) loss 0.7803 (1.1124) acc 81.2500 (71.8333) lr 1.1253e-03 eta 11:01:07 +epoch [25/50] batch [680/1000] time 1.569 (1.566) data 0.001 (0.002) loss 0.7715 (1.1129) acc 81.2500 (71.8612) lr 1.1253e-03 eta 11:00:59 +epoch [25/50] batch [685/1000] time 1.576 (1.566) data 0.000 (0.002) loss 1.6113 (1.1135) acc 59.3750 (71.8522) lr 1.1253e-03 eta 11:00:50 +epoch [25/50] batch [690/1000] time 1.543 (1.566) data 0.000 (0.002) loss 1.1973 (1.1131) acc 68.7500 (71.8614) lr 1.1253e-03 eta 11:00:40 +epoch [25/50] batch [695/1000] time 1.573 (1.566) data 0.001 (0.002) loss 1.3096 (1.1135) acc 68.7500 (71.8660) lr 1.1253e-03 eta 11:00:31 +epoch [25/50] batch [700/1000] time 1.556 (1.566) data 0.000 (0.002) loss 0.8433 (1.1139) acc 75.0000 (71.8750) lr 1.1253e-03 eta 11:00:23 +epoch [25/50] batch [705/1000] time 1.550 (1.566) data 0.000 (0.002) loss 0.9155 (1.1137) acc 78.1250 (71.8750) lr 1.1253e-03 eta 11:00:13 +epoch [25/50] batch [710/1000] time 1.569 (1.566) data 0.001 (0.002) loss 1.0742 (1.1135) acc 81.2500 (71.8794) lr 1.1253e-03 eta 11:00:09 +epoch [25/50] batch [715/1000] time 1.552 (1.566) data 0.000 (0.002) loss 0.8125 (1.1145) acc 68.7500 (71.8444) lr 1.1253e-03 eta 10:59:59 +epoch [25/50] batch [720/1000] time 1.564 (1.566) data 0.001 (0.002) loss 1.4961 (1.1144) acc 71.8750 (71.8403) lr 1.1253e-03 eta 10:59:52 +epoch [25/50] batch [725/1000] time 1.560 (1.566) data 0.000 (0.002) loss 0.9385 (1.1144) acc 75.0000 (71.8664) lr 1.1253e-03 eta 10:59:45 +epoch [25/50] batch [730/1000] time 1.590 (1.566) data 0.000 (0.002) loss 0.7954 (1.1145) acc 75.0000 (71.8793) lr 1.1253e-03 eta 10:59:39 +epoch [25/50] batch [735/1000] time 1.544 (1.566) data 0.000 (0.002) loss 0.9829 (1.1143) acc 71.8750 (71.9090) lr 1.1253e-03 eta 10:59:28 +epoch [25/50] batch [740/1000] time 1.550 (1.566) data 0.000 (0.002) loss 1.3467 (1.1151) acc 71.8750 (71.9003) lr 1.1253e-03 eta 10:59:19 +epoch [25/50] batch [745/1000] time 1.564 (1.566) data 0.000 (0.002) loss 1.0361 (1.1152) acc 71.8750 (71.9002) lr 1.1253e-03 eta 10:59:10 +epoch [25/50] batch [750/1000] time 1.555 (1.566) data 0.001 (0.002) loss 0.9009 (1.1152) acc 71.8750 (71.8750) lr 1.1253e-03 eta 10:59:01 +epoch [25/50] batch [755/1000] time 1.549 (1.566) data 0.000 (0.002) loss 0.8442 (1.1141) acc 71.8750 (71.9164) lr 1.1253e-03 eta 10:58:52 +epoch [25/50] batch [760/1000] time 1.579 (1.566) data 0.000 (0.002) loss 1.0029 (1.1144) acc 71.8750 (71.9079) lr 1.1253e-03 eta 10:58:42 +epoch [25/50] batch [765/1000] time 1.553 (1.566) data 0.000 (0.002) loss 0.7280 (1.1140) acc 84.3750 (71.9118) lr 1.1253e-03 eta 10:58:40 +epoch [25/50] batch [770/1000] time 1.573 (1.566) data 0.000 (0.002) loss 1.4062 (1.1155) acc 62.5000 (71.8750) lr 1.1253e-03 eta 10:58:30 +epoch [25/50] batch [775/1000] time 1.540 (1.566) data 0.000 (0.002) loss 1.4102 (1.1157) acc 59.3750 (71.8629) lr 1.1253e-03 eta 10:58:19 +epoch [25/50] batch [780/1000] time 1.553 (1.566) data 0.000 (0.002) loss 0.8477 (1.1152) acc 71.8750 (71.8950) lr 1.1253e-03 eta 10:58:11 +epoch [25/50] batch [785/1000] time 1.580 (1.566) data 0.000 (0.002) loss 2.1035 (1.1163) acc 43.7500 (71.8710) lr 1.1253e-03 eta 10:58:03 +epoch [25/50] batch [790/1000] time 1.585 (1.566) data 0.000 (0.002) loss 0.8159 (1.1165) acc 81.2500 (71.8790) lr 1.1253e-03 eta 10:57:57 +epoch [25/50] batch [795/1000] time 1.551 (1.566) data 0.000 (0.002) loss 1.4814 (1.1169) acc 68.7500 (71.8514) lr 1.1253e-03 eta 10:57:50 +epoch [25/50] batch [800/1000] time 1.542 (1.566) data 0.000 (0.002) loss 1.0635 (1.1171) acc 71.8750 (71.8633) lr 1.1253e-03 eta 10:57:40 +epoch [25/50] batch [805/1000] time 1.563 (1.566) data 0.000 (0.002) loss 1.5459 (1.1165) acc 71.8750 (71.8983) lr 1.1253e-03 eta 10:57:31 +epoch [25/50] batch [810/1000] time 1.559 (1.566) data 0.000 (0.002) loss 0.7080 (1.1158) acc 81.2500 (71.9097) lr 1.1253e-03 eta 10:57:27 +epoch [25/50] batch [815/1000] time 1.554 (1.566) data 0.000 (0.002) loss 1.2324 (1.1169) acc 68.7500 (71.8942) lr 1.1253e-03 eta 10:57:17 +epoch [25/50] batch [820/1000] time 1.558 (1.566) data 0.000 (0.002) loss 1.7012 (1.1167) acc 65.6250 (71.8979) lr 1.1253e-03 eta 10:57:10 +epoch [25/50] batch [825/1000] time 1.542 (1.566) data 0.000 (0.002) loss 1.4189 (1.1176) acc 68.7500 (71.8523) lr 1.1253e-03 eta 10:57:01 +epoch [25/50] batch [830/1000] time 1.566 (1.566) data 0.000 (0.002) loss 0.8960 (1.1175) acc 78.1250 (71.8562) lr 1.1253e-03 eta 10:56:50 +epoch [25/50] batch [835/1000] time 1.549 (1.566) data 0.001 (0.002) loss 1.3877 (1.1186) acc 68.7500 (71.8563) lr 1.1253e-03 eta 10:56:41 +epoch [25/50] batch [840/1000] time 1.581 (1.566) data 0.000 (0.002) loss 1.0625 (1.1199) acc 75.0000 (71.8266) lr 1.1253e-03 eta 10:56:33 +epoch [25/50] batch [845/1000] time 1.579 (1.566) data 0.000 (0.002) loss 1.3057 (1.1209) acc 75.0000 (71.8232) lr 1.1253e-03 eta 10:56:27 +epoch [25/50] batch [850/1000] time 1.552 (1.566) data 0.000 (0.002) loss 1.4238 (1.1222) acc 65.6250 (71.8088) lr 1.1253e-03 eta 10:56:20 +epoch [25/50] batch [855/1000] time 1.543 (1.566) data 0.000 (0.002) loss 1.0674 (1.1223) acc 71.8750 (71.8202) lr 1.1253e-03 eta 10:56:15 +epoch [25/50] batch [860/1000] time 1.583 (1.566) data 0.001 (0.002) loss 1.8223 (1.1234) acc 68.7500 (71.8096) lr 1.1253e-03 eta 10:56:07 +epoch [25/50] batch [865/1000] time 1.575 (1.566) data 0.000 (0.002) loss 1.6221 (1.1233) acc 59.3750 (71.8280) lr 1.1253e-03 eta 10:55:58 +epoch [25/50] batch [870/1000] time 1.558 (1.566) data 0.001 (0.002) loss 1.6406 (1.1235) acc 68.7500 (71.8463) lr 1.1253e-03 eta 10:55:49 +epoch [25/50] batch [875/1000] time 1.546 (1.566) data 0.001 (0.002) loss 1.0010 (1.1244) acc 68.7500 (71.8250) lr 1.1253e-03 eta 10:55:39 +epoch [25/50] batch [880/1000] time 1.572 (1.566) data 0.000 (0.002) loss 1.0332 (1.1233) acc 71.8750 (71.8572) lr 1.1253e-03 eta 10:55:31 +epoch [25/50] batch [885/1000] time 1.527 (1.566) data 0.001 (0.002) loss 1.8408 (1.1235) acc 65.6250 (71.8432) lr 1.1253e-03 eta 10:55:22 +epoch [25/50] batch [890/1000] time 1.541 (1.566) data 0.000 (0.002) loss 0.9341 (1.1232) acc 71.8750 (71.8469) lr 1.1253e-03 eta 10:55:12 +epoch [25/50] batch [895/1000] time 1.574 (1.566) data 0.001 (0.002) loss 0.9976 (1.1229) acc 75.0000 (71.8610) lr 1.1253e-03 eta 10:55:02 +epoch [25/50] batch [900/1000] time 1.568 (1.565) data 0.000 (0.002) loss 1.2617 (1.1238) acc 68.7500 (71.8368) lr 1.1253e-03 eta 10:54:52 +epoch [25/50] batch [905/1000] time 1.534 (1.565) data 0.000 (0.002) loss 1.3779 (1.1236) acc 71.8750 (71.8577) lr 1.1253e-03 eta 10:54:43 +epoch [25/50] batch [910/1000] time 1.550 (1.565) data 0.000 (0.002) loss 0.9155 (1.1241) acc 75.0000 (71.8338) lr 1.1253e-03 eta 10:54:34 +epoch [25/50] batch [915/1000] time 1.563 (1.565) data 0.001 (0.002) loss 0.8745 (1.1248) acc 81.2500 (71.8340) lr 1.1253e-03 eta 10:54:29 +epoch [25/50] batch [920/1000] time 1.547 (1.565) data 0.001 (0.002) loss 1.1787 (1.1249) acc 75.0000 (71.8410) lr 1.1253e-03 eta 10:54:20 +epoch [25/50] batch [925/1000] time 1.585 (1.565) data 0.000 (0.002) loss 1.2373 (1.1255) acc 71.8750 (71.8311) lr 1.1253e-03 eta 10:54:11 +epoch [25/50] batch [930/1000] time 1.578 (1.565) data 0.000 (0.002) loss 0.9219 (1.1248) acc 75.0000 (71.8313) lr 1.1253e-03 eta 10:54:04 +epoch [25/50] batch [935/1000] time 1.548 (1.565) data 0.000 (0.002) loss 0.8779 (1.1253) acc 71.8750 (71.8249) lr 1.1253e-03 eta 10:53:54 +epoch [25/50] batch [940/1000] time 1.550 (1.565) data 0.001 (0.002) loss 1.1650 (1.1257) acc 65.6250 (71.8318) lr 1.1253e-03 eta 10:53:47 +epoch [25/50] batch [945/1000] time 1.556 (1.565) data 0.000 (0.002) loss 1.0537 (1.1262) acc 71.8750 (71.8122) lr 1.1253e-03 eta 10:53:39 +epoch [25/50] batch [950/1000] time 1.567 (1.565) data 0.001 (0.002) loss 1.5234 (1.1266) acc 56.2500 (71.7862) lr 1.1253e-03 eta 10:53:32 +epoch [25/50] batch [955/1000] time 1.564 (1.565) data 0.000 (0.002) loss 1.4375 (1.1283) acc 62.5000 (71.7605) lr 1.1253e-03 eta 10:53:25 +epoch [25/50] batch [960/1000] time 1.570 (1.566) data 0.001 (0.001) loss 0.9058 (1.1292) acc 75.0000 (71.7513) lr 1.1253e-03 eta 10:53:21 +epoch [25/50] batch [965/1000] time 1.556 (1.566) data 0.000 (0.001) loss 0.4104 (1.1278) acc 90.6250 (71.7843) lr 1.1253e-03 eta 10:53:12 +epoch [25/50] batch [970/1000] time 1.531 (1.566) data 0.000 (0.001) loss 1.1377 (1.1278) acc 78.1250 (71.7977) lr 1.1253e-03 eta 10:53:04 +epoch [25/50] batch [975/1000] time 1.547 (1.565) data 0.000 (0.001) loss 1.5674 (1.1289) acc 59.3750 (71.7692) lr 1.1253e-03 eta 10:52:55 +epoch [25/50] batch [980/1000] time 1.539 (1.565) data 0.000 (0.001) loss 0.9043 (1.1282) acc 84.3750 (71.7857) lr 1.1253e-03 eta 10:52:47 +epoch [25/50] batch [985/1000] time 1.542 (1.565) data 0.001 (0.001) loss 1.0820 (1.1277) acc 65.6250 (71.7798) lr 1.1253e-03 eta 10:52:38 +epoch [25/50] batch [990/1000] time 1.556 (1.565) data 0.000 (0.001) loss 1.4160 (1.1286) acc 65.6250 (71.7803) lr 1.1253e-03 eta 10:52:29 +epoch [25/50] batch [995/1000] time 1.547 (1.565) data 0.000 (0.001) loss 1.2754 (1.1293) acc 75.0000 (71.7776) lr 1.1253e-03 eta 10:52:19 +epoch [25/50] batch [1000/1000] time 1.563 (1.565) data 0.000 (0.001) loss 0.7871 (1.1293) acc 81.2500 (71.7812) lr 1.0628e-03 eta 10:52:11 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,275 +* accuracy: 78.5% +* error: 21.5% +* macro_f1: 78.1% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [26/50] batch [5/1000] time 1.551 (1.692) data 0.001 (0.192) loss 1.3682 (1.0519) acc 65.6250 (74.3750) lr 1.0628e-03 eta 11:45:02 +epoch [26/50] batch [10/1000] time 1.554 (1.626) data 0.000 (0.096) loss 1.3301 (1.0194) acc 65.6250 (74.0625) lr 1.0628e-03 eta 11:17:09 +epoch [26/50] batch [15/1000] time 1.555 (1.603) data 0.000 (0.064) loss 1.7139 (1.1103) acc 71.8750 (72.5000) lr 1.0628e-03 eta 11:07:40 +epoch [26/50] batch [20/1000] time 1.572 (1.594) data 0.001 (0.048) loss 1.5928 (1.1180) acc 65.6250 (71.7188) lr 1.0628e-03 eta 11:03:40 +epoch [26/50] batch [25/1000] time 1.570 (1.590) data 0.001 (0.039) loss 1.0664 (1.0737) acc 71.8750 (72.6250) lr 1.0628e-03 eta 11:01:55 +epoch [26/50] batch [30/1000] time 1.564 (1.585) data 0.001 (0.032) loss 1.3916 (1.0968) acc 62.5000 (72.6042) lr 1.0628e-03 eta 10:59:44 +epoch [26/50] batch [35/1000] time 1.552 (1.580) data 0.000 (0.028) loss 0.9932 (1.1236) acc 71.8750 (71.7857) lr 1.0628e-03 eta 10:57:32 +epoch [26/50] batch [40/1000] time 1.560 (1.578) data 0.000 (0.024) loss 0.5664 (1.0972) acc 84.3750 (72.2656) lr 1.0628e-03 eta 10:56:36 +epoch [26/50] batch [45/1000] time 1.554 (1.576) data 0.000 (0.022) loss 1.4258 (1.1092) acc 62.5000 (72.2222) lr 1.0628e-03 eta 10:55:27 +epoch [26/50] batch [50/1000] time 1.548 (1.574) data 0.001 (0.020) loss 0.9355 (1.1058) acc 68.7500 (72.2500) lr 1.0628e-03 eta 10:54:29 +epoch [26/50] batch [55/1000] time 1.563 (1.572) data 0.000 (0.018) loss 1.8955 (1.1585) acc 53.1250 (70.9091) lr 1.0628e-03 eta 10:53:36 +epoch [26/50] batch [60/1000] time 1.572 (1.572) data 0.001 (0.016) loss 1.1006 (1.1431) acc 65.6250 (71.1458) lr 1.0628e-03 eta 10:53:26 +epoch [26/50] batch [65/1000] time 1.597 (1.571) data 0.000 (0.015) loss 0.4026 (1.1238) acc 90.6250 (71.4904) lr 1.0628e-03 eta 10:53:04 +epoch [26/50] batch [70/1000] time 1.543 (1.571) data 0.000 (0.014) loss 1.1729 (1.1324) acc 75.0000 (71.5625) lr 1.0628e-03 eta 10:52:54 +epoch [26/50] batch [75/1000] time 1.541 (1.570) data 0.000 (0.013) loss 0.9858 (1.1306) acc 68.7500 (71.4583) lr 1.0628e-03 eta 10:52:21 +epoch [26/50] batch [80/1000] time 1.570 (1.570) data 0.000 (0.012) loss 0.6997 (1.1258) acc 75.0000 (71.4844) lr 1.0628e-03 eta 10:52:00 +epoch [26/50] batch [85/1000] time 1.575 (1.569) data 0.001 (0.012) loss 1.1670 (1.1234) acc 68.7500 (71.4338) lr 1.0628e-03 eta 10:51:41 +epoch [26/50] batch [90/1000] time 1.560 (1.569) data 0.000 (0.011) loss 0.8765 (1.1139) acc 78.1250 (71.6667) lr 1.0628e-03 eta 10:51:12 +epoch [26/50] batch [95/1000] time 1.560 (1.568) data 0.001 (0.011) loss 2.1328 (1.1179) acc 59.3750 (71.7105) lr 1.0628e-03 eta 10:50:55 +epoch [26/50] batch [100/1000] time 1.747 (1.570) data 0.001 (0.010) loss 0.8081 (1.1134) acc 68.7500 (71.7812) lr 1.0628e-03 eta 10:51:30 +epoch [26/50] batch [105/1000] time 1.557 (1.570) data 0.001 (0.010) loss 1.6816 (1.1259) acc 75.0000 (71.7857) lr 1.0628e-03 eta 10:51:18 +epoch [26/50] batch [110/1000] time 1.553 (1.569) data 0.001 (0.009) loss 1.3730 (1.1206) acc 75.0000 (72.1307) lr 1.0628e-03 eta 10:50:57 +epoch [26/50] batch [115/1000] time 1.544 (1.569) data 0.001 (0.009) loss 1.2627 (1.1208) acc 81.2500 (72.2554) lr 1.0628e-03 eta 10:50:38 +epoch [26/50] batch [120/1000] time 1.560 (1.569) data 0.000 (0.008) loss 0.8311 (1.1160) acc 68.7500 (72.2656) lr 1.0628e-03 eta 10:50:28 +epoch [26/50] batch [125/1000] time 1.560 (1.568) data 0.000 (0.008) loss 1.2246 (1.1151) acc 65.6250 (72.3250) lr 1.0628e-03 eta 10:50:12 +epoch [26/50] batch [130/1000] time 1.590 (1.568) data 0.000 (0.008) loss 1.5879 (1.1116) acc 62.5000 (72.5000) lr 1.0628e-03 eta 10:50:07 +epoch [26/50] batch [135/1000] time 1.558 (1.568) data 0.001 (0.008) loss 1.5947 (1.1148) acc 56.2500 (72.3843) lr 1.0628e-03 eta 10:49:59 +epoch [26/50] batch [140/1000] time 1.553 (1.568) data 0.000 (0.007) loss 1.1680 (1.1167) acc 84.3750 (72.5223) lr 1.0628e-03 eta 10:49:40 +epoch [26/50] batch [145/1000] time 1.757 (1.570) data 0.000 (0.007) loss 0.6831 (1.1109) acc 78.1250 (72.5431) lr 1.0628e-03 eta 10:50:12 +epoch [26/50] batch [150/1000] time 1.577 (1.570) data 0.001 (0.007) loss 1.1377 (1.1089) acc 62.5000 (72.5208) lr 1.0628e-03 eta 10:50:04 +epoch [26/50] batch [155/1000] time 1.561 (1.569) data 0.001 (0.007) loss 1.4863 (1.1154) acc 62.5000 (72.2984) lr 1.0628e-03 eta 10:49:51 +epoch [26/50] batch [160/1000] time 1.567 (1.569) data 0.001 (0.006) loss 1.4961 (1.1168) acc 65.6250 (72.3438) lr 1.0628e-03 eta 10:49:35 +epoch [26/50] batch [165/1000] time 1.544 (1.569) data 0.001 (0.006) loss 1.1533 (1.1116) acc 59.3750 (72.3674) lr 1.0628e-03 eta 10:49:21 +epoch [26/50] batch [170/1000] time 1.568 (1.569) data 0.001 (0.006) loss 1.4951 (1.1159) acc 56.2500 (72.2243) lr 1.0628e-03 eta 10:49:12 +epoch [26/50] batch [175/1000] time 1.577 (1.569) data 0.000 (0.006) loss 0.9614 (1.1170) acc 78.1250 (72.1964) lr 1.0628e-03 eta 10:49:03 +epoch [26/50] batch [180/1000] time 1.549 (1.569) data 0.000 (0.006) loss 1.8184 (1.1196) acc 56.2500 (72.1528) lr 1.0628e-03 eta 10:48:51 +epoch [26/50] batch [185/1000] time 1.574 (1.569) data 0.001 (0.006) loss 0.8257 (1.1204) acc 75.0000 (72.1959) lr 1.0628e-03 eta 10:48:44 +epoch [26/50] batch [190/1000] time 1.565 (1.569) data 0.000 (0.006) loss 1.1543 (1.1172) acc 65.6250 (72.2204) lr 1.0628e-03 eta 10:48:56 +epoch [26/50] batch [195/1000] time 1.584 (1.569) data 0.000 (0.005) loss 1.2051 (1.1223) acc 71.8750 (72.1795) lr 1.0628e-03 eta 10:48:45 +epoch [26/50] batch [200/1000] time 1.574 (1.569) data 0.001 (0.005) loss 1.3984 (1.1209) acc 62.5000 (72.2188) lr 1.0628e-03 eta 10:48:36 +epoch [26/50] batch [205/1000] time 1.570 (1.569) data 0.001 (0.005) loss 0.7036 (1.1215) acc 84.3750 (72.1494) lr 1.0628e-03 eta 10:48:25 +epoch [26/50] batch [210/1000] time 1.556 (1.569) data 0.000 (0.005) loss 0.7788 (1.1208) acc 75.0000 (72.1577) lr 1.0628e-03 eta 10:48:16 +epoch [26/50] batch [215/1000] time 1.557 (1.569) data 0.001 (0.005) loss 1.0156 (1.1193) acc 75.0000 (72.2093) lr 1.0628e-03 eta 10:48:05 +epoch [26/50] batch [220/1000] time 1.564 (1.569) data 0.000 (0.005) loss 1.2334 (1.1174) acc 68.7500 (72.3153) lr 1.0628e-03 eta 10:47:55 +epoch [26/50] batch [225/1000] time 1.560 (1.569) data 0.000 (0.005) loss 1.3203 (1.1194) acc 78.1250 (72.2361) lr 1.0628e-03 eta 10:47:46 +epoch [26/50] batch [230/1000] time 1.547 (1.569) data 0.000 (0.005) loss 1.5723 (1.1172) acc 56.2500 (72.1603) lr 1.0628e-03 eta 10:47:33 +epoch [26/50] batch [235/1000] time 1.554 (1.568) data 0.001 (0.005) loss 1.0947 (1.1226) acc 71.8750 (72.0612) lr 1.0628e-03 eta 10:47:20 +epoch [26/50] batch [240/1000] time 1.557 (1.568) data 0.000 (0.004) loss 1.0498 (1.1249) acc 68.7500 (72.0443) lr 1.0628e-03 eta 10:47:10 +epoch [26/50] batch [245/1000] time 1.569 (1.568) data 0.001 (0.004) loss 1.7500 (1.1281) acc 65.6250 (72.0281) lr 1.0628e-03 eta 10:47:01 +epoch [26/50] batch [250/1000] time 1.561 (1.568) data 0.000 (0.004) loss 0.6816 (1.1235) acc 81.2500 (72.1500) lr 1.0628e-03 eta 10:46:48 +epoch [26/50] batch [255/1000] time 1.561 (1.568) data 0.001 (0.004) loss 1.2559 (1.1210) acc 71.8750 (72.2181) lr 1.0628e-03 eta 10:46:49 +epoch [26/50] batch [260/1000] time 1.558 (1.568) data 0.000 (0.004) loss 1.5127 (1.1205) acc 65.6250 (72.2236) lr 1.0628e-03 eta 10:46:34 +epoch [26/50] batch [265/1000] time 1.540 (1.568) data 0.001 (0.004) loss 1.6094 (1.1205) acc 56.2500 (72.1816) lr 1.0628e-03 eta 10:46:19 +epoch [26/50] batch [270/1000] time 1.548 (1.568) data 0.000 (0.004) loss 1.1973 (1.1202) acc 68.7500 (72.2106) lr 1.0628e-03 eta 10:46:04 +epoch [26/50] batch [275/1000] time 1.553 (1.567) data 0.000 (0.004) loss 1.4736 (1.1202) acc 65.6250 (72.2614) lr 1.0628e-03 eta 10:45:50 +epoch [26/50] batch [280/1000] time 1.559 (1.567) data 0.001 (0.004) loss 0.4009 (1.1175) acc 84.3750 (72.3438) lr 1.0628e-03 eta 10:45:38 +epoch [26/50] batch [285/1000] time 1.553 (1.567) data 0.000 (0.004) loss 0.8496 (1.1159) acc 78.1250 (72.3246) lr 1.0628e-03 eta 10:45:25 +epoch [26/50] batch [290/1000] time 1.553 (1.567) data 0.001 (0.004) loss 1.8262 (1.1169) acc 56.2500 (72.2414) lr 1.0628e-03 eta 10:45:16 +epoch [26/50] batch [295/1000] time 1.564 (1.567) data 0.000 (0.004) loss 1.0273 (1.1193) acc 75.0000 (72.1822) lr 1.0628e-03 eta 10:45:07 +epoch [26/50] batch [300/1000] time 1.559 (1.567) data 0.000 (0.004) loss 1.1533 (1.1175) acc 68.7500 (72.1771) lr 1.0628e-03 eta 10:45:08 +epoch [26/50] batch [305/1000] time 1.549 (1.567) data 0.000 (0.004) loss 0.6914 (1.1153) acc 81.2500 (72.2848) lr 1.0628e-03 eta 10:44:55 +epoch [26/50] batch [310/1000] time 1.550 (1.567) data 0.001 (0.004) loss 0.7852 (1.1151) acc 87.5000 (72.3589) lr 1.0628e-03 eta 10:44:46 +epoch [26/50] batch [315/1000] time 1.547 (1.567) data 0.001 (0.004) loss 1.0186 (1.1184) acc 65.6250 (72.2718) lr 1.0628e-03 eta 10:44:36 +epoch [26/50] batch [320/1000] time 1.561 (1.567) data 0.000 (0.003) loss 1.8643 (1.1232) acc 71.8750 (72.2266) lr 1.0628e-03 eta 10:44:25 +epoch [26/50] batch [325/1000] time 1.557 (1.567) data 0.001 (0.003) loss 1.1309 (1.1249) acc 68.7500 (72.1250) lr 1.0628e-03 eta 10:44:13 +epoch [26/50] batch [330/1000] time 1.588 (1.567) data 0.001 (0.003) loss 1.0781 (1.1244) acc 71.8750 (72.1496) lr 1.0628e-03 eta 10:44:07 +epoch [26/50] batch [335/1000] time 1.553 (1.566) data 0.001 (0.003) loss 1.0107 (1.1248) acc 71.8750 (72.0989) lr 1.0628e-03 eta 10:43:56 +epoch [26/50] batch [340/1000] time 1.556 (1.567) data 0.000 (0.003) loss 1.0107 (1.1262) acc 75.0000 (72.0404) lr 1.0628e-03 eta 10:44:02 +epoch [26/50] batch [345/1000] time 1.561 (1.567) data 0.000 (0.003) loss 0.8906 (1.1235) acc 68.7500 (72.0471) lr 1.0628e-03 eta 10:43:51 +epoch [26/50] batch [350/1000] time 1.578 (1.567) data 0.001 (0.003) loss 0.6387 (1.1222) acc 81.2500 (72.0536) lr 1.0628e-03 eta 10:43:40 +epoch [26/50] batch [355/1000] time 1.563 (1.567) data 0.000 (0.003) loss 0.8257 (1.1239) acc 78.1250 (72.0335) lr 1.0628e-03 eta 10:43:30 +epoch [26/50] batch [360/1000] time 1.563 (1.567) data 0.001 (0.003) loss 1.1885 (1.1198) acc 71.8750 (72.0833) lr 1.0628e-03 eta 10:43:18 +epoch [26/50] batch [365/1000] time 1.549 (1.566) data 0.001 (0.003) loss 1.1523 (1.1197) acc 75.0000 (72.0890) lr 1.0628e-03 eta 10:43:10 +epoch [26/50] batch [370/1000] time 1.561 (1.566) data 0.000 (0.003) loss 1.1826 (1.1207) acc 68.7500 (72.0270) lr 1.0628e-03 eta 10:43:00 +epoch [26/50] batch [375/1000] time 1.567 (1.567) data 0.000 (0.003) loss 1.1641 (1.1217) acc 68.7500 (72.0083) lr 1.0628e-03 eta 10:42:57 +epoch [26/50] batch [380/1000] time 1.548 (1.567) data 0.000 (0.003) loss 1.2695 (1.1209) acc 75.0000 (72.0724) lr 1.0628e-03 eta 10:42:49 +epoch [26/50] batch [385/1000] time 1.565 (1.567) data 0.000 (0.003) loss 0.8062 (1.1187) acc 84.3750 (72.0779) lr 1.0628e-03 eta 10:42:40 +epoch [26/50] batch [390/1000] time 1.556 (1.567) data 0.000 (0.003) loss 0.8525 (1.1192) acc 78.1250 (72.0593) lr 1.0628e-03 eta 10:42:31 +epoch [26/50] batch [395/1000] time 1.586 (1.567) data 0.000 (0.003) loss 1.1094 (1.1193) acc 68.7500 (72.0411) lr 1.0628e-03 eta 10:42:23 +epoch [26/50] batch [400/1000] time 1.561 (1.566) data 0.000 (0.003) loss 1.3203 (1.1194) acc 56.2500 (72.0391) lr 1.0628e-03 eta 10:42:11 +epoch [26/50] batch [405/1000] time 1.572 (1.567) data 0.000 (0.003) loss 0.9023 (1.1180) acc 78.1250 (72.0833) lr 1.0628e-03 eta 10:42:11 +epoch [26/50] batch [410/1000] time 1.575 (1.567) data 0.000 (0.003) loss 1.0742 (1.1144) acc 68.7500 (72.1037) lr 1.0628e-03 eta 10:42:02 +epoch [26/50] batch [415/1000] time 1.562 (1.566) data 0.000 (0.003) loss 0.9243 (1.1135) acc 71.8750 (72.0858) lr 1.0628e-03 eta 10:41:52 +epoch [26/50] batch [420/1000] time 1.569 (1.567) data 0.001 (0.003) loss 0.9116 (1.1122) acc 81.2500 (72.1577) lr 1.0628e-03 eta 10:41:45 +epoch [26/50] batch [425/1000] time 1.562 (1.566) data 0.000 (0.003) loss 0.9468 (1.1142) acc 65.6250 (72.1029) lr 1.0628e-03 eta 10:41:36 +epoch [26/50] batch [430/1000] time 1.560 (1.566) data 0.001 (0.003) loss 1.1797 (1.1146) acc 68.7500 (72.1076) lr 1.0628e-03 eta 10:41:26 +epoch [26/50] batch [435/1000] time 1.568 (1.566) data 0.000 (0.003) loss 1.2061 (1.1170) acc 65.6250 (72.0330) lr 1.0628e-03 eta 10:41:17 +epoch [26/50] batch [440/1000] time 1.587 (1.566) data 0.000 (0.003) loss 0.8604 (1.1154) acc 75.0000 (72.0526) lr 1.0628e-03 eta 10:41:11 +epoch [26/50] batch [445/1000] time 1.583 (1.566) data 0.001 (0.003) loss 0.9434 (1.1144) acc 65.6250 (72.0225) lr 1.0628e-03 eta 10:41:04 +epoch [26/50] batch [450/1000] time 1.556 (1.567) data 0.000 (0.003) loss 0.7271 (1.1137) acc 84.3750 (71.9861) lr 1.0628e-03 eta 10:41:02 +epoch [26/50] batch [455/1000] time 1.576 (1.567) data 0.000 (0.003) loss 0.6147 (1.1136) acc 78.1250 (72.0124) lr 1.0628e-03 eta 10:40:56 +epoch [26/50] batch [460/1000] time 1.558 (1.567) data 0.000 (0.003) loss 1.0957 (1.1145) acc 68.7500 (71.9633) lr 1.0628e-03 eta 10:40:48 +epoch [26/50] batch [465/1000] time 1.588 (1.567) data 0.001 (0.003) loss 1.0469 (1.1143) acc 71.8750 (71.9624) lr 1.0628e-03 eta 10:40:37 +epoch [26/50] batch [470/1000] time 1.555 (1.567) data 0.001 (0.003) loss 1.2422 (1.1169) acc 75.0000 (71.8949) lr 1.0628e-03 eta 10:40:29 +epoch [26/50] batch [475/1000] time 1.567 (1.567) data 0.001 (0.003) loss 1.1660 (1.1149) acc 65.6250 (71.9342) lr 1.0628e-03 eta 10:40:18 +epoch [26/50] batch [480/1000] time 1.572 (1.567) data 0.001 (0.002) loss 0.9736 (1.1148) acc 78.1250 (71.9206) lr 1.0628e-03 eta 10:40:13 +epoch [26/50] batch [485/1000] time 1.566 (1.567) data 0.000 (0.002) loss 1.1523 (1.1163) acc 71.8750 (71.9394) lr 1.0628e-03 eta 10:40:05 +epoch [26/50] batch [490/1000] time 1.731 (1.567) data 0.001 (0.002) loss 0.9121 (1.1152) acc 65.6250 (71.9579) lr 1.0628e-03 eta 10:40:07 +epoch [26/50] batch [495/1000] time 1.570 (1.567) data 0.001 (0.002) loss 0.8462 (1.1127) acc 81.2500 (72.0076) lr 1.0628e-03 eta 10:40:01 +epoch [26/50] batch [500/1000] time 1.559 (1.567) data 0.001 (0.002) loss 0.6069 (1.1116) acc 84.3750 (72.0438) lr 1.0628e-03 eta 10:39:54 +epoch [26/50] batch [505/1000] time 1.541 (1.567) data 0.000 (0.002) loss 1.8320 (1.1156) acc 62.5000 (71.9616) lr 1.0628e-03 eta 10:39:44 +epoch [26/50] batch [510/1000] time 1.563 (1.567) data 0.001 (0.002) loss 1.4688 (1.1179) acc 56.2500 (71.8750) lr 1.0628e-03 eta 10:39:34 +epoch [26/50] batch [515/1000] time 1.552 (1.567) data 0.000 (0.002) loss 1.3828 (1.1183) acc 56.2500 (71.8386) lr 1.0628e-03 eta 10:39:23 +epoch [26/50] batch [520/1000] time 1.566 (1.567) data 0.000 (0.002) loss 1.0029 (1.1172) acc 71.8750 (71.8389) lr 1.0628e-03 eta 10:39:14 +epoch [26/50] batch [525/1000] time 1.571 (1.567) data 0.000 (0.002) loss 0.8945 (1.1152) acc 68.7500 (71.8750) lr 1.0628e-03 eta 10:39:03 +epoch [26/50] batch [530/1000] time 1.572 (1.567) data 0.000 (0.002) loss 1.0801 (1.1159) acc 75.0000 (71.8691) lr 1.0628e-03 eta 10:38:57 +epoch [26/50] batch [535/1000] time 1.561 (1.567) data 0.000 (0.002) loss 1.6846 (1.1161) acc 65.6250 (71.8692) lr 1.0628e-03 eta 10:38:51 +epoch [26/50] batch [540/1000] time 1.555 (1.567) data 0.000 (0.002) loss 1.2568 (1.1152) acc 65.6250 (71.8981) lr 1.0628e-03 eta 10:38:42 +epoch [26/50] batch [545/1000] time 1.562 (1.567) data 0.001 (0.002) loss 0.8203 (1.1129) acc 87.5000 (71.9782) lr 1.0628e-03 eta 10:38:33 +epoch [26/50] batch [550/1000] time 1.554 (1.567) data 0.000 (0.002) loss 1.3516 (1.1119) acc 71.8750 (72.0227) lr 1.0628e-03 eta 10:38:26 +epoch [26/50] batch [555/1000] time 1.538 (1.567) data 0.000 (0.002) loss 1.1504 (1.1126) acc 65.6250 (72.0101) lr 1.0628e-03 eta 10:38:22 +epoch [26/50] batch [560/1000] time 1.546 (1.567) data 0.000 (0.002) loss 1.3389 (1.1114) acc 68.7500 (72.0145) lr 1.0628e-03 eta 10:38:14 +epoch [26/50] batch [565/1000] time 1.580 (1.567) data 0.000 (0.002) loss 1.4629 (1.1119) acc 56.2500 (71.9746) lr 1.0628e-03 eta 10:38:05 +epoch [26/50] batch [570/1000] time 1.547 (1.567) data 0.000 (0.002) loss 1.4268 (1.1128) acc 62.5000 (71.9627) lr 1.0628e-03 eta 10:37:57 +epoch [26/50] batch [575/1000] time 1.537 (1.567) data 0.000 (0.002) loss 1.1826 (1.1131) acc 71.8750 (71.9511) lr 1.0628e-03 eta 10:37:45 +epoch [26/50] batch [580/1000] time 1.565 (1.567) data 0.000 (0.002) loss 0.9668 (1.1106) acc 81.2500 (71.9828) lr 1.0628e-03 eta 10:37:36 +epoch [26/50] batch [585/1000] time 1.551 (1.567) data 0.001 (0.002) loss 0.8589 (1.1105) acc 68.7500 (71.9605) lr 1.0628e-03 eta 10:37:27 +epoch [26/50] batch [590/1000] time 1.567 (1.566) data 0.001 (0.002) loss 1.2217 (1.1088) acc 71.8750 (72.0233) lr 1.0628e-03 eta 10:37:17 +epoch [26/50] batch [595/1000] time 1.554 (1.566) data 0.001 (0.002) loss 1.7354 (1.1098) acc 56.2500 (72.0011) lr 1.0628e-03 eta 10:37:07 +epoch [26/50] batch [600/1000] time 1.552 (1.567) data 0.001 (0.002) loss 1.1963 (1.1099) acc 78.1250 (72.0052) lr 1.0628e-03 eta 10:37:07 +epoch [26/50] batch [605/1000] time 1.559 (1.567) data 0.001 (0.002) loss 0.8735 (1.1089) acc 75.0000 (71.9886) lr 1.0628e-03 eta 10:36:58 +epoch [26/50] batch [610/1000] time 1.563 (1.567) data 0.001 (0.002) loss 1.0049 (1.1099) acc 75.0000 (71.9621) lr 1.0628e-03 eta 10:36:50 +epoch [26/50] batch [615/1000] time 1.554 (1.567) data 0.000 (0.002) loss 0.6924 (1.1093) acc 87.5000 (71.9665) lr 1.0628e-03 eta 10:36:42 +epoch [26/50] batch [620/1000] time 1.548 (1.566) data 0.000 (0.002) loss 0.7261 (1.1084) acc 75.0000 (71.9758) lr 1.0628e-03 eta 10:36:30 +epoch [26/50] batch [625/1000] time 1.569 (1.566) data 0.001 (0.002) loss 1.1807 (1.1098) acc 71.8750 (71.9600) lr 1.0628e-03 eta 10:36:22 +epoch [26/50] batch [630/1000] time 1.571 (1.566) data 0.001 (0.002) loss 1.0342 (1.1105) acc 65.6250 (71.9246) lr 1.0628e-03 eta 10:36:15 +epoch [26/50] batch [635/1000] time 1.569 (1.567) data 0.001 (0.002) loss 0.6348 (1.1090) acc 84.3750 (71.9636) lr 1.0628e-03 eta 10:36:07 +epoch [26/50] batch [640/1000] time 1.542 (1.566) data 0.001 (0.002) loss 0.9395 (1.1099) acc 71.8750 (71.9482) lr 1.0628e-03 eta 10:35:58 +epoch [26/50] batch [645/1000] time 1.565 (1.567) data 0.000 (0.002) loss 1.4072 (1.1093) acc 78.1250 (71.9477) lr 1.0628e-03 eta 10:35:54 +epoch [26/50] batch [650/1000] time 1.549 (1.567) data 0.000 (0.002) loss 1.2168 (1.1096) acc 68.7500 (71.9519) lr 1.0628e-03 eta 10:35:45 +epoch [26/50] batch [655/1000] time 1.543 (1.566) data 0.000 (0.002) loss 0.8813 (1.1081) acc 71.8750 (71.9609) lr 1.0628e-03 eta 10:35:36 +epoch [26/50] batch [660/1000] time 1.564 (1.566) data 0.000 (0.002) loss 1.4785 (1.1079) acc 71.8750 (71.9839) lr 1.0628e-03 eta 10:35:27 +epoch [26/50] batch [665/1000] time 1.541 (1.566) data 0.000 (0.002) loss 1.0811 (1.1076) acc 71.8750 (72.0207) lr 1.0628e-03 eta 10:35:16 +epoch [26/50] batch [670/1000] time 1.558 (1.566) data 0.001 (0.002) loss 1.3564 (1.1078) acc 65.6250 (72.0336) lr 1.0628e-03 eta 10:35:07 +epoch [26/50] batch [675/1000] time 1.573 (1.566) data 0.001 (0.002) loss 0.9004 (1.1070) acc 75.0000 (72.0463) lr 1.0628e-03 eta 10:34:59 +epoch [26/50] batch [680/1000] time 1.547 (1.566) data 0.001 (0.002) loss 1.4541 (1.1079) acc 68.7500 (72.0496) lr 1.0628e-03 eta 10:34:47 +epoch [26/50] batch [685/1000] time 1.564 (1.566) data 0.000 (0.002) loss 0.9917 (1.1067) acc 78.1250 (72.0849) lr 1.0628e-03 eta 10:34:39 +epoch [26/50] batch [690/1000] time 1.561 (1.566) data 0.000 (0.002) loss 1.0332 (1.1058) acc 75.0000 (72.1014) lr 1.0628e-03 eta 10:34:30 +epoch [26/50] batch [695/1000] time 1.557 (1.566) data 0.001 (0.002) loss 1.1113 (1.1043) acc 75.0000 (72.1403) lr 1.0628e-03 eta 10:34:21 +epoch [26/50] batch [700/1000] time 1.567 (1.566) data 0.001 (0.002) loss 1.1758 (1.1043) acc 68.7500 (72.1429) lr 1.0628e-03 eta 10:34:11 +epoch [26/50] batch [705/1000] time 1.546 (1.566) data 0.001 (0.002) loss 1.1904 (1.1031) acc 65.6250 (72.1498) lr 1.0628e-03 eta 10:34:09 +epoch [26/50] batch [710/1000] time 1.571 (1.566) data 0.000 (0.002) loss 1.5049 (1.1032) acc 65.6250 (72.1303) lr 1.0628e-03 eta 10:34:01 +epoch [26/50] batch [715/1000] time 1.557 (1.566) data 0.000 (0.002) loss 1.1211 (1.1022) acc 81.2500 (72.1372) lr 1.0628e-03 eta 10:33:52 +epoch [26/50] batch [720/1000] time 1.566 (1.566) data 0.000 (0.002) loss 1.3877 (1.1007) acc 65.6250 (72.1398) lr 1.0628e-03 eta 10:33:42 +epoch [26/50] batch [725/1000] time 1.545 (1.566) data 0.000 (0.002) loss 0.6152 (1.0993) acc 78.1250 (72.1681) lr 1.0628e-03 eta 10:33:33 +epoch [26/50] batch [730/1000] time 1.561 (1.566) data 0.000 (0.002) loss 1.7510 (1.0986) acc 65.6250 (72.2003) lr 1.0628e-03 eta 10:33:25 +epoch [26/50] batch [735/1000] time 1.533 (1.566) data 0.001 (0.002) loss 1.2295 (1.0999) acc 75.0000 (72.1726) lr 1.0628e-03 eta 10:33:15 +epoch [26/50] batch [740/1000] time 1.582 (1.566) data 0.000 (0.002) loss 1.4775 (1.1000) acc 56.2500 (72.1917) lr 1.0628e-03 eta 10:33:07 +epoch [26/50] batch [745/1000] time 1.561 (1.566) data 0.000 (0.002) loss 1.1533 (1.0999) acc 68.7500 (72.1980) lr 1.0628e-03 eta 10:32:58 +epoch [26/50] batch [750/1000] time 1.570 (1.566) data 0.001 (0.002) loss 0.8799 (1.0986) acc 71.8750 (72.2292) lr 1.0628e-03 eta 10:32:55 +epoch [26/50] batch [755/1000] time 1.582 (1.566) data 0.000 (0.002) loss 0.8584 (1.0994) acc 81.2500 (72.2020) lr 1.0628e-03 eta 10:32:46 +epoch [26/50] batch [760/1000] time 1.563 (1.566) data 0.000 (0.002) loss 0.8921 (1.0986) acc 75.0000 (72.2122) lr 1.0628e-03 eta 10:32:37 +epoch [26/50] batch [765/1000] time 1.581 (1.566) data 0.000 (0.002) loss 1.0781 (1.0988) acc 71.8750 (72.2059) lr 1.0628e-03 eta 10:32:29 +epoch [26/50] batch [770/1000] time 1.573 (1.566) data 0.001 (0.002) loss 0.8804 (1.0980) acc 84.3750 (72.2240) lr 1.0628e-03 eta 10:32:21 +epoch [26/50] batch [775/1000] time 1.572 (1.566) data 0.000 (0.002) loss 1.6328 (1.0977) acc 68.7500 (72.2460) lr 1.0628e-03 eta 10:32:12 +epoch [26/50] batch [780/1000] time 1.574 (1.566) data 0.001 (0.002) loss 0.8877 (1.0986) acc 84.3750 (72.2436) lr 1.0628e-03 eta 10:32:04 +epoch [26/50] batch [785/1000] time 1.570 (1.566) data 0.001 (0.002) loss 0.9717 (1.0996) acc 78.1250 (72.2333) lr 1.0628e-03 eta 10:31:55 +epoch [26/50] batch [790/1000] time 1.558 (1.566) data 0.001 (0.002) loss 1.2275 (1.0997) acc 65.6250 (72.2389) lr 1.0628e-03 eta 10:31:46 +epoch [26/50] batch [795/1000] time 1.549 (1.566) data 0.000 (0.002) loss 1.2783 (1.1000) acc 75.0000 (72.2524) lr 1.0628e-03 eta 10:31:42 +epoch [26/50] batch [800/1000] time 1.527 (1.566) data 0.000 (0.002) loss 1.0068 (1.0992) acc 65.6250 (72.2422) lr 1.0628e-03 eta 10:31:31 +epoch [26/50] batch [805/1000] time 1.539 (1.566) data 0.000 (0.002) loss 1.2227 (1.0986) acc 75.0000 (72.2593) lr 1.0628e-03 eta 10:31:23 +epoch [26/50] batch [810/1000] time 1.561 (1.566) data 0.001 (0.002) loss 0.7729 (1.0980) acc 75.0000 (72.2762) lr 1.0628e-03 eta 10:31:16 +epoch [26/50] batch [815/1000] time 1.580 (1.566) data 0.001 (0.002) loss 1.3594 (1.0984) acc 68.7500 (72.2929) lr 1.0628e-03 eta 10:31:08 +epoch [26/50] batch [820/1000] time 1.567 (1.566) data 0.001 (0.002) loss 1.6143 (1.1010) acc 71.8750 (72.2523) lr 1.0628e-03 eta 10:31:02 +epoch [26/50] batch [825/1000] time 1.562 (1.566) data 0.000 (0.002) loss 1.3457 (1.1029) acc 65.6250 (72.2235) lr 1.0628e-03 eta 10:30:54 +epoch [26/50] batch [830/1000] time 1.576 (1.566) data 0.000 (0.002) loss 0.9492 (1.1033) acc 78.1250 (72.2252) lr 1.0628e-03 eta 10:30:47 +epoch [26/50] batch [835/1000] time 1.580 (1.566) data 0.000 (0.002) loss 1.2559 (1.1031) acc 62.5000 (72.2305) lr 1.0628e-03 eta 10:30:38 +epoch [26/50] batch [840/1000] time 1.539 (1.566) data 0.001 (0.002) loss 0.8486 (1.1016) acc 78.1250 (72.2545) lr 1.0628e-03 eta 10:30:29 +epoch [26/50] batch [845/1000] time 1.584 (1.566) data 0.001 (0.002) loss 1.3838 (1.1024) acc 71.8750 (72.2300) lr 1.0628e-03 eta 10:30:22 +epoch [26/50] batch [850/1000] time 1.570 (1.566) data 0.000 (0.002) loss 0.6431 (1.1024) acc 78.1250 (72.2279) lr 1.0628e-03 eta 10:30:14 +epoch [26/50] batch [855/1000] time 1.724 (1.566) data 0.001 (0.002) loss 1.9326 (1.1033) acc 62.5000 (72.2113) lr 1.0628e-03 eta 10:30:12 +epoch [26/50] batch [860/1000] time 1.575 (1.566) data 0.001 (0.002) loss 1.4082 (1.1042) acc 62.5000 (72.1948) lr 1.0628e-03 eta 10:30:04 +epoch [26/50] batch [865/1000] time 1.578 (1.566) data 0.001 (0.002) loss 0.8188 (1.1045) acc 81.2500 (72.1749) lr 1.0628e-03 eta 10:29:56 +epoch [26/50] batch [870/1000] time 1.569 (1.566) data 0.000 (0.002) loss 0.7778 (1.1034) acc 87.5000 (72.2126) lr 1.0628e-03 eta 10:29:48 +epoch [26/50] batch [875/1000] time 1.567 (1.566) data 0.001 (0.002) loss 1.7812 (1.1030) acc 59.3750 (72.2071) lr 1.0628e-03 eta 10:29:40 +epoch [26/50] batch [880/1000] time 1.573 (1.566) data 0.000 (0.002) loss 1.2314 (1.1033) acc 62.5000 (72.1839) lr 1.0628e-03 eta 10:29:32 +epoch [26/50] batch [885/1000] time 1.551 (1.566) data 0.001 (0.002) loss 1.2861 (1.1024) acc 65.6250 (72.2140) lr 1.0628e-03 eta 10:29:25 +epoch [26/50] batch [890/1000] time 1.531 (1.566) data 0.001 (0.002) loss 0.8887 (1.1014) acc 78.1250 (72.2261) lr 1.0628e-03 eta 10:29:16 +epoch [26/50] batch [895/1000] time 1.535 (1.566) data 0.001 (0.002) loss 0.6162 (1.1014) acc 81.2500 (72.2207) lr 1.0628e-03 eta 10:29:07 +epoch [26/50] batch [900/1000] time 1.714 (1.566) data 0.000 (0.002) loss 1.7168 (1.1025) acc 59.3750 (72.1979) lr 1.0628e-03 eta 10:29:02 +epoch [26/50] batch [905/1000] time 1.582 (1.566) data 0.001 (0.002) loss 0.8716 (1.1019) acc 75.0000 (72.2134) lr 1.0628e-03 eta 10:28:53 +epoch [26/50] batch [910/1000] time 1.529 (1.566) data 0.001 (0.002) loss 1.3926 (1.1019) acc 62.5000 (72.2012) lr 1.0628e-03 eta 10:28:44 +epoch [26/50] batch [915/1000] time 1.594 (1.566) data 0.001 (0.002) loss 1.0342 (1.1018) acc 81.2500 (72.2029) lr 1.0628e-03 eta 10:28:35 +epoch [26/50] batch [920/1000] time 1.550 (1.566) data 0.001 (0.002) loss 1.2256 (1.1021) acc 68.7500 (72.2147) lr 1.0628e-03 eta 10:28:27 +epoch [26/50] batch [925/1000] time 1.585 (1.566) data 0.000 (0.002) loss 1.6914 (1.1039) acc 59.3750 (72.1824) lr 1.0628e-03 eta 10:28:20 +epoch [26/50] batch [930/1000] time 1.561 (1.566) data 0.000 (0.002) loss 0.5356 (1.1033) acc 87.5000 (72.2144) lr 1.0628e-03 eta 10:28:11 +epoch [26/50] batch [935/1000] time 1.559 (1.566) data 0.000 (0.002) loss 1.3340 (1.1038) acc 65.6250 (72.1959) lr 1.0628e-03 eta 10:28:04 +epoch [26/50] batch [940/1000] time 1.570 (1.566) data 0.001 (0.002) loss 1.3945 (1.1043) acc 62.5000 (72.1975) lr 1.0628e-03 eta 10:27:55 +epoch [26/50] batch [945/1000] time 1.564 (1.566) data 0.001 (0.002) loss 0.8296 (1.1047) acc 75.0000 (72.1825) lr 1.0628e-03 eta 10:27:51 +epoch [26/50] batch [950/1000] time 1.587 (1.566) data 0.001 (0.002) loss 0.9976 (1.1044) acc 75.0000 (72.2007) lr 1.0628e-03 eta 10:27:43 +epoch [26/50] batch [955/1000] time 1.566 (1.566) data 0.000 (0.001) loss 1.3975 (1.1042) acc 62.5000 (72.1957) lr 1.0628e-03 eta 10:27:34 +epoch [26/50] batch [960/1000] time 1.557 (1.566) data 0.001 (0.001) loss 0.8735 (1.1050) acc 78.1250 (72.1810) lr 1.0628e-03 eta 10:27:25 +epoch [26/50] batch [965/1000] time 1.565 (1.566) data 0.000 (0.001) loss 1.4609 (1.1057) acc 65.6250 (72.1794) lr 1.0628e-03 eta 10:27:18 +epoch [26/50] batch [970/1000] time 1.600 (1.566) data 0.001 (0.001) loss 1.2490 (1.1060) acc 71.8750 (72.1617) lr 1.0628e-03 eta 10:27:10 +epoch [26/50] batch [975/1000] time 1.554 (1.566) data 0.001 (0.001) loss 1.2188 (1.1075) acc 71.8750 (72.1282) lr 1.0628e-03 eta 10:27:03 +epoch [26/50] batch [980/1000] time 1.578 (1.566) data 0.001 (0.001) loss 0.5898 (1.1073) acc 87.5000 (72.1429) lr 1.0628e-03 eta 10:26:56 +epoch [26/50] batch [985/1000] time 1.563 (1.566) data 0.001 (0.001) loss 1.1152 (1.1092) acc 68.7500 (72.0876) lr 1.0628e-03 eta 10:26:47 +epoch [26/50] batch [990/1000] time 1.543 (1.566) data 0.000 (0.001) loss 1.2051 (1.1102) acc 75.0000 (72.0676) lr 1.0628e-03 eta 10:26:39 +epoch [26/50] batch [995/1000] time 1.557 (1.566) data 0.000 (0.001) loss 1.0303 (1.1095) acc 78.1250 (72.0760) lr 1.0628e-03 eta 10:26:31 +epoch [26/50] batch [1000/1000] time 1.551 (1.566) data 0.000 (0.001) loss 0.9736 (1.1094) acc 78.1250 (72.0781) lr 1.0000e-03 eta 10:26:22 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,227 +* accuracy: 78.5% +* error: 21.5% +* macro_f1: 78.0% +epoch [27/50] batch [5/1000] time 1.532 (1.691) data 0.000 (0.185) loss 0.9019 (1.0143) acc 78.1250 (75.0000) lr 1.0000e-03 eta 11:16:23 +epoch [27/50] batch [10/1000] time 1.566 (1.624) data 0.000 (0.093) loss 1.1934 (1.0619) acc 75.0000 (73.1250) lr 1.0000e-03 eta 10:49:14 +epoch [27/50] batch [15/1000] time 1.557 (1.601) data 0.000 (0.062) loss 0.5830 (0.9937) acc 87.5000 (73.7500) lr 1.0000e-03 eta 10:39:56 +epoch [27/50] batch [20/1000] time 1.545 (1.589) data 0.001 (0.047) loss 0.9517 (1.0061) acc 78.1250 (74.0625) lr 1.0000e-03 eta 10:35:15 +epoch [27/50] batch [25/1000] time 1.584 (1.597) data 0.000 (0.037) loss 1.4443 (1.0250) acc 65.6250 (74.1250) lr 1.0000e-03 eta 10:38:18 +epoch [27/50] batch [30/1000] time 1.573 (1.595) data 0.001 (0.031) loss 0.9224 (1.0546) acc 81.2500 (73.9583) lr 1.0000e-03 eta 10:37:05 +epoch [27/50] batch [35/1000] time 1.542 (1.590) data 0.001 (0.027) loss 1.1748 (1.0801) acc 75.0000 (73.3929) lr 1.0000e-03 eta 10:35:07 +epoch [27/50] batch [40/1000] time 1.577 (1.588) data 0.001 (0.024) loss 1.1543 (1.0924) acc 65.6250 (73.0469) lr 1.0000e-03 eta 10:34:08 +epoch [27/50] batch [45/1000] time 1.530 (1.584) data 0.000 (0.021) loss 1.0283 (1.0659) acc 84.3750 (73.6806) lr 1.0000e-03 eta 10:32:34 +epoch [27/50] batch [50/1000] time 1.574 (1.583) data 0.000 (0.019) loss 1.2520 (1.0796) acc 62.5000 (73.1875) lr 1.0000e-03 eta 10:31:59 +epoch [27/50] batch [55/1000] time 1.538 (1.581) data 0.000 (0.017) loss 1.0537 (1.0932) acc 75.0000 (72.7273) lr 1.0000e-03 eta 10:31:01 +epoch [27/50] batch [60/1000] time 1.575 (1.580) data 0.001 (0.016) loss 0.8774 (1.1098) acc 75.0000 (72.7604) lr 1.0000e-03 eta 10:30:15 +epoch [27/50] batch [65/1000] time 1.576 (1.578) data 0.000 (0.015) loss 0.7422 (1.1157) acc 81.2500 (72.9327) lr 1.0000e-03 eta 10:29:25 +epoch [27/50] batch [70/1000] time 1.576 (1.580) data 0.000 (0.014) loss 0.8579 (1.1090) acc 68.7500 (73.0357) lr 1.0000e-03 eta 10:30:07 +epoch [27/50] batch [75/1000] time 1.552 (1.579) data 0.001 (0.013) loss 1.1855 (1.1044) acc 75.0000 (73.0833) lr 1.0000e-03 eta 10:29:27 +epoch [27/50] batch [80/1000] time 1.557 (1.577) data 0.001 (0.012) loss 0.9331 (1.1011) acc 75.0000 (73.2031) lr 1.0000e-03 eta 10:28:35 +epoch [27/50] batch [85/1000] time 1.588 (1.576) data 0.000 (0.011) loss 0.6748 (1.1010) acc 84.3750 (73.3824) lr 1.0000e-03 eta 10:28:16 +epoch [27/50] batch [90/1000] time 1.563 (1.576) data 0.001 (0.011) loss 1.3994 (1.1106) acc 71.8750 (73.1597) lr 1.0000e-03 eta 10:27:54 +epoch [27/50] batch [95/1000] time 1.561 (1.575) data 0.000 (0.010) loss 0.9487 (1.1081) acc 71.8750 (73.1579) lr 1.0000e-03 eta 10:27:27 +epoch [27/50] batch [100/1000] time 1.553 (1.574) data 0.001 (0.010) loss 1.6924 (1.1042) acc 65.6250 (73.2812) lr 1.0000e-03 eta 10:27:01 +epoch [27/50] batch [105/1000] time 1.551 (1.573) data 0.000 (0.009) loss 1.0938 (1.1165) acc 65.6250 (73.0060) lr 1.0000e-03 eta 10:26:37 +epoch [27/50] batch [110/1000] time 1.569 (1.573) data 0.001 (0.009) loss 0.7861 (1.1203) acc 81.2500 (72.9261) lr 1.0000e-03 eta 10:26:26 +epoch [27/50] batch [115/1000] time 1.574 (1.573) data 0.000 (0.009) loss 0.9062 (1.1167) acc 71.8750 (72.8533) lr 1.0000e-03 eta 10:26:15 +epoch [27/50] batch [120/1000] time 1.574 (1.574) data 0.000 (0.008) loss 0.9355 (1.1265) acc 78.1250 (72.6823) lr 1.0000e-03 eta 10:26:24 +epoch [27/50] batch [125/1000] time 1.585 (1.574) data 0.001 (0.008) loss 1.1279 (1.1247) acc 78.1250 (72.8250) lr 1.0000e-03 eta 10:26:20 +epoch [27/50] batch [130/1000] time 1.546 (1.573) data 0.001 (0.008) loss 1.3340 (1.1278) acc 59.3750 (72.6923) lr 1.0000e-03 eta 10:25:53 +epoch [27/50] batch [135/1000] time 1.533 (1.573) data 0.000 (0.007) loss 1.5410 (1.1273) acc 65.6250 (72.5231) lr 1.0000e-03 eta 10:25:32 +epoch [27/50] batch [140/1000] time 1.555 (1.572) data 0.000 (0.007) loss 1.1064 (1.1232) acc 71.8750 (72.6116) lr 1.0000e-03 eta 10:25:11 +epoch [27/50] batch [145/1000] time 1.558 (1.572) data 0.001 (0.007) loss 1.2617 (1.1244) acc 71.8750 (72.5862) lr 1.0000e-03 eta 10:25:00 +epoch [27/50] batch [150/1000] time 1.566 (1.572) data 0.001 (0.007) loss 1.0098 (1.1250) acc 65.6250 (72.5208) lr 1.0000e-03 eta 10:24:46 +epoch [27/50] batch [155/1000] time 1.562 (1.571) data 0.000 (0.006) loss 0.9727 (1.1262) acc 75.0000 (72.4194) lr 1.0000e-03 eta 10:24:29 +epoch [27/50] batch [160/1000] time 1.552 (1.571) data 0.001 (0.006) loss 1.6035 (1.1216) acc 59.3750 (72.4414) lr 1.0000e-03 eta 10:24:12 +epoch [27/50] batch [165/1000] time 1.570 (1.571) data 0.001 (0.006) loss 1.5186 (1.1252) acc 71.8750 (72.3674) lr 1.0000e-03 eta 10:24:10 +epoch [27/50] batch [170/1000] time 1.560 (1.571) data 0.000 (0.006) loss 1.8506 (1.1259) acc 56.2500 (72.3529) lr 1.0000e-03 eta 10:23:58 +epoch [27/50] batch [175/1000] time 1.581 (1.572) data 0.000 (0.006) loss 1.4502 (1.1240) acc 68.7500 (72.3571) lr 1.0000e-03 eta 10:24:15 +epoch [27/50] batch [180/1000] time 1.582 (1.572) data 0.000 (0.006) loss 0.5562 (1.1197) acc 84.3750 (72.3785) lr 1.0000e-03 eta 10:24:00 +epoch [27/50] batch [185/1000] time 1.568 (1.571) data 0.000 (0.005) loss 1.3555 (1.1250) acc 68.7500 (72.3311) lr 1.0000e-03 eta 10:23:41 +epoch [27/50] batch [190/1000] time 1.560 (1.571) data 0.000 (0.005) loss 1.4873 (1.1284) acc 65.6250 (72.2862) lr 1.0000e-03 eta 10:23:25 +epoch [27/50] batch [195/1000] time 1.554 (1.571) data 0.001 (0.005) loss 0.5220 (1.1295) acc 81.2500 (72.2756) lr 1.0000e-03 eta 10:23:16 +epoch [27/50] batch [200/1000] time 1.579 (1.571) data 0.001 (0.005) loss 0.8784 (1.1286) acc 71.8750 (72.1406) lr 1.0000e-03 eta 10:23:05 +epoch [27/50] batch [205/1000] time 1.541 (1.571) data 0.000 (0.005) loss 0.7007 (1.1245) acc 81.2500 (72.2713) lr 1.0000e-03 eta 10:22:51 +epoch [27/50] batch [210/1000] time 1.547 (1.570) data 0.001 (0.005) loss 0.9180 (1.1236) acc 81.2500 (72.3512) lr 1.0000e-03 eta 10:22:35 +epoch [27/50] batch [215/1000] time 1.570 (1.570) data 0.000 (0.005) loss 0.6514 (1.1208) acc 78.1250 (72.3837) lr 1.0000e-03 eta 10:22:15 +epoch [27/50] batch [220/1000] time 1.565 (1.570) data 0.000 (0.005) loss 1.2637 (1.1277) acc 78.1250 (72.2301) lr 1.0000e-03 eta 10:22:23 +epoch [27/50] batch [225/1000] time 1.542 (1.570) data 0.000 (0.005) loss 1.1377 (1.1304) acc 68.7500 (72.2222) lr 1.0000e-03 eta 10:22:09 +epoch [27/50] batch [230/1000] time 1.556 (1.570) data 0.000 (0.005) loss 1.8037 (1.1316) acc 53.1250 (72.1332) lr 1.0000e-03 eta 10:21:53 +epoch [27/50] batch [235/1000] time 1.559 (1.570) data 0.000 (0.004) loss 0.7754 (1.1276) acc 81.2500 (72.1277) lr 1.0000e-03 eta 10:21:40 +epoch [27/50] batch [240/1000] time 1.587 (1.570) data 0.001 (0.004) loss 1.3467 (1.1288) acc 65.6250 (72.0964) lr 1.0000e-03 eta 10:21:33 +epoch [27/50] batch [245/1000] time 1.557 (1.569) data 0.001 (0.004) loss 1.8008 (1.1306) acc 65.6250 (72.0918) lr 1.0000e-03 eta 10:21:23 +epoch [27/50] batch [250/1000] time 1.560 (1.570) data 0.001 (0.004) loss 0.7803 (1.1257) acc 71.8750 (72.1500) lr 1.0000e-03 eta 10:21:16 +epoch [27/50] batch [255/1000] time 1.574 (1.570) data 0.000 (0.004) loss 1.1816 (1.1192) acc 68.7500 (72.2549) lr 1.0000e-03 eta 10:21:07 +epoch [27/50] batch [260/1000] time 1.701 (1.570) data 0.000 (0.004) loss 1.0049 (1.1125) acc 75.0000 (72.4279) lr 1.0000e-03 eta 10:21:13 +epoch [27/50] batch [265/1000] time 1.552 (1.570) data 0.000 (0.004) loss 1.4150 (1.1139) acc 71.8750 (72.3939) lr 1.0000e-03 eta 10:21:02 +epoch [27/50] batch [270/1000] time 1.567 (1.570) data 0.000 (0.004) loss 1.5664 (1.1107) acc 65.6250 (72.5231) lr 1.0000e-03 eta 10:20:49 +epoch [27/50] batch [275/1000] time 1.554 (1.569) data 0.000 (0.004) loss 0.5684 (1.1062) acc 78.1250 (72.6136) lr 1.0000e-03 eta 10:20:34 +epoch [27/50] batch [280/1000] time 1.574 (1.570) data 0.000 (0.004) loss 1.1953 (1.1060) acc 71.8750 (72.5781) lr 1.0000e-03 eta 10:20:32 +epoch [27/50] batch [285/1000] time 1.566 (1.570) data 0.001 (0.004) loss 0.9849 (1.1067) acc 75.0000 (72.5987) lr 1.0000e-03 eta 10:20:22 +epoch [27/50] batch [290/1000] time 1.569 (1.569) data 0.000 (0.004) loss 0.7217 (1.1036) acc 84.3750 (72.6401) lr 1.0000e-03 eta 10:20:11 +epoch [27/50] batch [295/1000] time 1.558 (1.569) data 0.000 (0.004) loss 0.8862 (1.0998) acc 81.2500 (72.7436) lr 1.0000e-03 eta 10:19:55 +epoch [27/50] batch [300/1000] time 1.563 (1.569) data 0.000 (0.004) loss 1.1436 (1.1006) acc 78.1250 (72.7500) lr 1.0000e-03 eta 10:19:41 +epoch [27/50] batch [305/1000] time 1.557 (1.569) data 0.000 (0.004) loss 1.2266 (1.1043) acc 65.6250 (72.6639) lr 1.0000e-03 eta 10:19:32 +epoch [27/50] batch [310/1000] time 1.564 (1.569) data 0.000 (0.003) loss 1.0879 (1.1022) acc 81.2500 (72.7722) lr 1.0000e-03 eta 10:19:18 +epoch [27/50] batch [315/1000] time 1.558 (1.568) data 0.001 (0.003) loss 1.2441 (1.1076) acc 71.8750 (72.6984) lr 1.0000e-03 eta 10:19:06 +epoch [27/50] batch [320/1000] time 1.557 (1.568) data 0.001 (0.003) loss 1.2363 (1.1085) acc 71.8750 (72.7246) lr 1.0000e-03 eta 10:19:00 +epoch [27/50] batch [325/1000] time 1.557 (1.569) data 0.000 (0.003) loss 0.8052 (1.1067) acc 78.1250 (72.7596) lr 1.0000e-03 eta 10:19:04 +epoch [27/50] batch [330/1000] time 1.566 (1.569) data 0.000 (0.003) loss 0.9692 (1.1049) acc 65.6250 (72.7557) lr 1.0000e-03 eta 10:18:53 +epoch [27/50] batch [335/1000] time 1.557 (1.569) data 0.000 (0.003) loss 1.4424 (1.1058) acc 50.0000 (72.6866) lr 1.0000e-03 eta 10:18:42 +epoch [27/50] batch [340/1000] time 1.567 (1.569) data 0.000 (0.003) loss 0.5596 (1.1024) acc 84.3750 (72.7757) lr 1.0000e-03 eta 10:18:31 +epoch [27/50] batch [345/1000] time 1.572 (1.568) data 0.001 (0.003) loss 1.2725 (1.1000) acc 75.0000 (72.8442) lr 1.0000e-03 eta 10:18:22 +epoch [27/50] batch [350/1000] time 1.561 (1.568) data 0.000 (0.003) loss 0.6611 (1.0958) acc 78.1250 (72.9464) lr 1.0000e-03 eta 10:18:09 +epoch [27/50] batch [355/1000] time 1.561 (1.568) data 0.001 (0.003) loss 0.5098 (1.0934) acc 84.3750 (72.9930) lr 1.0000e-03 eta 10:17:57 +epoch [27/50] batch [360/1000] time 1.563 (1.568) data 0.000 (0.003) loss 1.1924 (1.0972) acc 75.0000 (72.9167) lr 1.0000e-03 eta 10:17:47 +epoch [27/50] batch [365/1000] time 1.582 (1.568) data 0.000 (0.003) loss 0.7627 (1.0946) acc 75.0000 (72.9623) lr 1.0000e-03 eta 10:17:37 +epoch [27/50] batch [370/1000] time 1.582 (1.568) data 0.001 (0.003) loss 1.2734 (1.0982) acc 71.8750 (72.8378) lr 1.0000e-03 eta 10:17:38 +epoch [27/50] batch [375/1000] time 1.552 (1.568) data 0.000 (0.003) loss 0.7964 (1.0953) acc 78.1250 (72.9167) lr 1.0000e-03 eta 10:17:33 +epoch [27/50] batch [380/1000] time 1.531 (1.568) data 0.001 (0.003) loss 1.2344 (1.0948) acc 53.1250 (72.8043) lr 1.0000e-03 eta 10:17:23 +epoch [27/50] batch [385/1000] time 1.567 (1.568) data 0.000 (0.003) loss 1.4346 (1.0945) acc 59.3750 (72.8247) lr 1.0000e-03 eta 10:17:16 +epoch [27/50] batch [390/1000] time 1.592 (1.568) data 0.000 (0.003) loss 1.5977 (1.0958) acc 65.6250 (72.8606) lr 1.0000e-03 eta 10:17:06 +epoch [27/50] batch [395/1000] time 1.548 (1.568) data 0.000 (0.003) loss 0.9873 (1.0978) acc 71.8750 (72.8085) lr 1.0000e-03 eta 10:16:56 +epoch [27/50] batch [400/1000] time 1.545 (1.568) data 0.000 (0.003) loss 1.3564 (1.0987) acc 59.3750 (72.7578) lr 1.0000e-03 eta 10:16:42 +epoch [27/50] batch [405/1000] time 1.567 (1.568) data 0.000 (0.003) loss 0.8535 (1.0998) acc 78.1250 (72.7315) lr 1.0000e-03 eta 10:16:30 +epoch [27/50] batch [410/1000] time 1.566 (1.568) data 0.001 (0.003) loss 1.0615 (1.1001) acc 65.6250 (72.6677) lr 1.0000e-03 eta 10:16:24 +epoch [27/50] batch [415/1000] time 1.555 (1.568) data 0.001 (0.003) loss 0.7598 (1.0972) acc 84.3750 (72.7334) lr 1.0000e-03 eta 10:16:23 +epoch [27/50] batch [420/1000] time 1.553 (1.568) data 0.000 (0.003) loss 1.4863 (1.0978) acc 71.8750 (72.7381) lr 1.0000e-03 eta 10:16:12 +epoch [27/50] batch [425/1000] time 1.558 (1.568) data 0.000 (0.003) loss 0.9849 (1.0997) acc 71.8750 (72.7206) lr 1.0000e-03 eta 10:16:05 +epoch [27/50] batch [430/1000] time 1.583 (1.568) data 0.000 (0.003) loss 0.9165 (1.0994) acc 78.1250 (72.6962) lr 1.0000e-03 eta 10:15:56 +epoch [27/50] batch [435/1000] time 1.551 (1.568) data 0.000 (0.003) loss 0.5044 (1.0974) acc 84.3750 (72.7155) lr 1.0000e-03 eta 10:15:46 +epoch [27/50] batch [440/1000] time 1.557 (1.568) data 0.001 (0.003) loss 0.7539 (1.0965) acc 78.1250 (72.7344) lr 1.0000e-03 eta 10:15:37 +epoch [27/50] batch [445/1000] time 1.587 (1.568) data 0.000 (0.003) loss 1.2129 (1.0947) acc 65.6250 (72.7669) lr 1.0000e-03 eta 10:15:28 +epoch [27/50] batch [450/1000] time 1.556 (1.568) data 0.000 (0.003) loss 0.5781 (1.0939) acc 84.3750 (72.7500) lr 1.0000e-03 eta 10:15:18 +epoch [27/50] batch [455/1000] time 1.579 (1.568) data 0.000 (0.003) loss 1.3760 (1.0944) acc 59.3750 (72.7060) lr 1.0000e-03 eta 10:15:12 +epoch [27/50] batch [460/1000] time 1.549 (1.568) data 0.001 (0.002) loss 0.7329 (1.0926) acc 81.2500 (72.7242) lr 1.0000e-03 eta 10:15:02 +epoch [27/50] batch [465/1000] time 1.555 (1.568) data 0.001 (0.002) loss 0.9712 (1.0894) acc 78.1250 (72.8159) lr 1.0000e-03 eta 10:14:55 +epoch [27/50] batch [470/1000] time 1.550 (1.568) data 0.000 (0.002) loss 1.1221 (1.0882) acc 71.8750 (72.8391) lr 1.0000e-03 eta 10:14:43 +epoch [27/50] batch [475/1000] time 1.554 (1.568) data 0.001 (0.002) loss 1.5303 (1.0879) acc 62.5000 (72.8421) lr 1.0000e-03 eta 10:14:39 +epoch [27/50] batch [480/1000] time 1.555 (1.567) data 0.001 (0.002) loss 1.0078 (1.0875) acc 75.0000 (72.8451) lr 1.0000e-03 eta 10:14:27 +epoch [27/50] batch [485/1000] time 1.565 (1.567) data 0.000 (0.002) loss 0.6445 (1.0879) acc 81.2500 (72.8737) lr 1.0000e-03 eta 10:14:16 +epoch [27/50] batch [490/1000] time 1.547 (1.567) data 0.001 (0.002) loss 1.0381 (1.0885) acc 78.1250 (72.8444) lr 1.0000e-03 eta 10:14:06 +epoch [27/50] batch [495/1000] time 1.561 (1.567) data 0.000 (0.002) loss 0.9380 (1.0882) acc 68.7500 (72.8535) lr 1.0000e-03 eta 10:13:55 +epoch [27/50] batch [500/1000] time 1.547 (1.567) data 0.001 (0.002) loss 1.6133 (1.0916) acc 53.1250 (72.7375) lr 1.0000e-03 eta 10:13:46 +epoch [27/50] batch [505/1000] time 1.536 (1.567) data 0.000 (0.002) loss 0.7676 (1.0907) acc 84.3750 (72.7537) lr 1.0000e-03 eta 10:13:38 +epoch [27/50] batch [510/1000] time 1.560 (1.567) data 0.001 (0.002) loss 0.8145 (1.0917) acc 78.1250 (72.7390) lr 1.0000e-03 eta 10:13:32 +epoch [27/50] batch [515/1000] time 1.578 (1.567) data 0.000 (0.002) loss 0.6631 (1.0931) acc 81.2500 (72.7245) lr 1.0000e-03 eta 10:13:23 +epoch [27/50] batch [520/1000] time 1.550 (1.567) data 0.001 (0.002) loss 1.1904 (1.0936) acc 65.6250 (72.6923) lr 1.0000e-03 eta 10:13:20 +epoch [27/50] batch [525/1000] time 1.575 (1.567) data 0.000 (0.002) loss 1.4492 (1.0957) acc 59.3750 (72.6071) lr 1.0000e-03 eta 10:13:11 +epoch [27/50] batch [530/1000] time 1.557 (1.567) data 0.001 (0.002) loss 0.9380 (1.0953) acc 78.1250 (72.6297) lr 1.0000e-03 eta 10:13:00 +epoch [27/50] batch [535/1000] time 1.554 (1.567) data 0.000 (0.002) loss 1.4668 (1.0976) acc 71.8750 (72.5701) lr 1.0000e-03 eta 10:12:48 +epoch [27/50] batch [540/1000] time 1.578 (1.567) data 0.000 (0.002) loss 0.9062 (1.0990) acc 81.2500 (72.5579) lr 1.0000e-03 eta 10:12:40 +epoch [27/50] batch [545/1000] time 1.581 (1.567) data 0.000 (0.002) loss 1.1855 (1.0990) acc 65.6250 (72.5287) lr 1.0000e-03 eta 10:12:31 +epoch [27/50] batch [550/1000] time 1.570 (1.567) data 0.000 (0.002) loss 1.1201 (1.0982) acc 75.0000 (72.5170) lr 1.0000e-03 eta 10:12:22 +epoch [27/50] batch [555/1000] time 1.526 (1.567) data 0.000 (0.002) loss 0.6758 (1.0953) acc 78.1250 (72.5563) lr 1.0000e-03 eta 10:12:08 +epoch [27/50] batch [560/1000] time 1.562 (1.567) data 0.001 (0.002) loss 1.4111 (1.0959) acc 71.8750 (72.5502) lr 1.0000e-03 eta 10:11:59 +epoch [27/50] batch [565/1000] time 1.571 (1.567) data 0.000 (0.002) loss 0.7031 (1.0966) acc 81.2500 (72.4889) lr 1.0000e-03 eta 10:11:59 +epoch [27/50] batch [570/1000] time 1.550 (1.567) data 0.000 (0.002) loss 1.3018 (1.0968) acc 59.3750 (72.4671) lr 1.0000e-03 eta 10:11:51 +epoch [27/50] batch [575/1000] time 1.566 (1.567) data 0.000 (0.002) loss 0.9731 (1.0960) acc 71.8750 (72.4511) lr 1.0000e-03 eta 10:11:41 +epoch [27/50] batch [580/1000] time 1.572 (1.567) data 0.000 (0.002) loss 0.7432 (1.0959) acc 75.0000 (72.4677) lr 1.0000e-03 eta 10:11:32 +epoch [27/50] batch [585/1000] time 1.587 (1.567) data 0.001 (0.002) loss 0.8755 (1.0979) acc 78.1250 (72.4359) lr 1.0000e-03 eta 10:11:25 +epoch [27/50] batch [590/1000] time 1.557 (1.567) data 0.000 (0.002) loss 1.2656 (1.0991) acc 68.7500 (72.4153) lr 1.0000e-03 eta 10:11:16 +epoch [27/50] batch [595/1000] time 1.550 (1.567) data 0.000 (0.002) loss 1.0430 (1.1012) acc 71.8750 (72.3687) lr 1.0000e-03 eta 10:11:06 +epoch [27/50] batch [600/1000] time 1.569 (1.567) data 0.001 (0.002) loss 0.7251 (1.1014) acc 75.0000 (72.3438) lr 1.0000e-03 eta 10:10:58 +epoch [27/50] batch [605/1000] time 1.548 (1.567) data 0.000 (0.002) loss 0.9497 (1.1006) acc 84.3750 (72.3812) lr 1.0000e-03 eta 10:10:48 +epoch [27/50] batch [610/1000] time 1.543 (1.566) data 0.000 (0.002) loss 0.6680 (1.0993) acc 84.3750 (72.4334) lr 1.0000e-03 eta 10:10:39 +epoch [27/50] batch [615/1000] time 1.560 (1.566) data 0.000 (0.002) loss 1.0527 (1.0988) acc 75.0000 (72.4289) lr 1.0000e-03 eta 10:10:31 +epoch [27/50] batch [620/1000] time 1.566 (1.567) data 0.001 (0.002) loss 1.2295 (1.1002) acc 75.0000 (72.4244) lr 1.0000e-03 eta 10:10:25 +epoch [27/50] batch [625/1000] time 1.716 (1.567) data 0.000 (0.002) loss 0.4480 (1.1005) acc 87.5000 (72.4450) lr 1.0000e-03 eta 10:10:22 +epoch [27/50] batch [630/1000] time 1.541 (1.567) data 0.000 (0.002) loss 0.6460 (1.1026) acc 84.3750 (72.4157) lr 1.0000e-03 eta 10:10:11 +epoch [27/50] batch [635/1000] time 1.554 (1.567) data 0.000 (0.002) loss 1.1992 (1.1031) acc 68.7500 (72.3819) lr 1.0000e-03 eta 10:10:02 +epoch [27/50] batch [640/1000] time 1.542 (1.567) data 0.000 (0.002) loss 1.4180 (1.1026) acc 59.3750 (72.3682) lr 1.0000e-03 eta 10:09:53 +epoch [27/50] batch [645/1000] time 1.558 (1.566) data 0.001 (0.002) loss 0.9897 (1.1019) acc 68.7500 (72.3789) lr 1.0000e-03 eta 10:09:43 +epoch [27/50] batch [650/1000] time 1.556 (1.566) data 0.000 (0.002) loss 0.8960 (1.1011) acc 78.1250 (72.3462) lr 1.0000e-03 eta 10:09:32 +epoch [27/50] batch [655/1000] time 1.565 (1.566) data 0.000 (0.002) loss 1.0889 (1.1015) acc 78.1250 (72.3473) lr 1.0000e-03 eta 10:09:24 +epoch [27/50] batch [660/1000] time 1.536 (1.566) data 0.000 (0.002) loss 1.0361 (1.1018) acc 68.7500 (72.3343) lr 1.0000e-03 eta 10:09:14 +epoch [27/50] batch [665/1000] time 1.557 (1.566) data 0.000 (0.002) loss 0.7666 (1.1004) acc 81.2500 (72.3778) lr 1.0000e-03 eta 10:09:04 +epoch [27/50] batch [670/1000] time 1.713 (1.566) data 0.001 (0.002) loss 1.6494 (1.1017) acc 68.7500 (72.3461) lr 1.0000e-03 eta 10:09:03 +epoch [27/50] batch [675/1000] time 1.575 (1.566) data 0.000 (0.002) loss 0.7930 (1.1016) acc 81.2500 (72.3657) lr 1.0000e-03 eta 10:08:56 +epoch [27/50] batch [680/1000] time 1.540 (1.566) data 0.000 (0.002) loss 1.1738 (1.1007) acc 65.6250 (72.3851) lr 1.0000e-03 eta 10:08:47 +epoch [27/50] batch [685/1000] time 1.571 (1.566) data 0.001 (0.002) loss 0.8818 (1.1020) acc 78.1250 (72.3677) lr 1.0000e-03 eta 10:08:38 +epoch [27/50] batch [690/1000] time 1.538 (1.566) data 0.000 (0.002) loss 0.9570 (1.1030) acc 81.2500 (72.3732) lr 1.0000e-03 eta 10:08:30 +epoch [27/50] batch [695/1000] time 1.574 (1.566) data 0.000 (0.002) loss 1.0479 (1.1027) acc 59.3750 (72.3651) lr 1.0000e-03 eta 10:08:20 +epoch [27/50] batch [700/1000] time 1.568 (1.566) data 0.000 (0.002) loss 0.7939 (1.1033) acc 75.0000 (72.3527) lr 1.0000e-03 eta 10:08:13 +epoch [27/50] batch [705/1000] time 1.552 (1.566) data 0.001 (0.002) loss 1.2031 (1.1047) acc 62.5000 (72.3094) lr 1.0000e-03 eta 10:08:03 +epoch [27/50] batch [710/1000] time 1.543 (1.566) data 0.000 (0.002) loss 1.6631 (1.1063) acc 71.8750 (72.2843) lr 1.0000e-03 eta 10:07:53 +epoch [27/50] batch [715/1000] time 1.571 (1.566) data 0.000 (0.002) loss 1.5439 (1.1067) acc 56.2500 (72.2684) lr 1.0000e-03 eta 10:07:50 +epoch [27/50] batch [720/1000] time 1.556 (1.566) data 0.000 (0.002) loss 1.0889 (1.1065) acc 75.0000 (72.2830) lr 1.0000e-03 eta 10:07:40 +epoch [27/50] batch [725/1000] time 1.571 (1.566) data 0.001 (0.002) loss 0.8066 (1.1066) acc 81.2500 (72.2759) lr 1.0000e-03 eta 10:07:33 +epoch [27/50] batch [730/1000] time 1.568 (1.566) data 0.000 (0.002) loss 1.0088 (1.1065) acc 75.0000 (72.3031) lr 1.0000e-03 eta 10:07:24 +epoch [27/50] batch [735/1000] time 1.576 (1.566) data 0.000 (0.002) loss 1.1826 (1.1061) acc 68.7500 (72.3257) lr 1.0000e-03 eta 10:07:15 +epoch [27/50] batch [740/1000] time 1.541 (1.566) data 0.000 (0.002) loss 0.9326 (1.1049) acc 75.0000 (72.3649) lr 1.0000e-03 eta 10:07:06 +epoch [27/50] batch [745/1000] time 1.546 (1.566) data 0.001 (0.002) loss 0.9062 (1.1053) acc 75.0000 (72.3532) lr 1.0000e-03 eta 10:06:57 +epoch [27/50] batch [750/1000] time 1.551 (1.566) data 0.001 (0.002) loss 0.9834 (1.1069) acc 68.7500 (72.3375) lr 1.0000e-03 eta 10:06:48 +epoch [27/50] batch [755/1000] time 1.557 (1.566) data 0.000 (0.002) loss 1.1025 (1.1075) acc 78.1250 (72.3179) lr 1.0000e-03 eta 10:06:38 +epoch [27/50] batch [760/1000] time 1.553 (1.566) data 0.000 (0.002) loss 0.9160 (1.1065) acc 78.1250 (72.3232) lr 1.0000e-03 eta 10:06:30 +epoch [27/50] batch [765/1000] time 1.546 (1.566) data 0.001 (0.002) loss 1.8281 (1.1076) acc 62.5000 (72.3080) lr 1.0000e-03 eta 10:06:20 +epoch [27/50] batch [770/1000] time 1.529 (1.566) data 0.001 (0.002) loss 0.8667 (1.1090) acc 84.3750 (72.2930) lr 1.0000e-03 eta 10:06:12 +epoch [27/50] batch [775/1000] time 1.582 (1.566) data 0.001 (0.002) loss 0.7319 (1.1100) acc 81.2500 (72.2863) lr 1.0000e-03 eta 10:06:04 +epoch [27/50] batch [780/1000] time 1.546 (1.566) data 0.001 (0.002) loss 0.8413 (1.1101) acc 71.8750 (72.2796) lr 1.0000e-03 eta 10:06:00 +epoch [27/50] batch [785/1000] time 1.577 (1.566) data 0.000 (0.002) loss 1.2051 (1.1110) acc 75.0000 (72.2691) lr 1.0000e-03 eta 10:05:49 +epoch [27/50] batch [790/1000] time 1.570 (1.566) data 0.000 (0.002) loss 0.8105 (1.1104) acc 71.8750 (72.2824) lr 1.0000e-03 eta 10:05:42 +epoch [27/50] batch [795/1000] time 1.557 (1.566) data 0.000 (0.002) loss 0.7920 (1.1092) acc 81.2500 (72.3035) lr 1.0000e-03 eta 10:05:35 +epoch [27/50] batch [800/1000] time 1.556 (1.566) data 0.000 (0.002) loss 1.2773 (1.1100) acc 62.5000 (72.2852) lr 1.0000e-03 eta 10:05:27 +epoch [27/50] batch [805/1000] time 1.579 (1.566) data 0.000 (0.002) loss 0.8022 (1.1115) acc 75.0000 (72.2554) lr 1.0000e-03 eta 10:05:21 +epoch [27/50] batch [810/1000] time 1.561 (1.566) data 0.000 (0.002) loss 1.2412 (1.1119) acc 68.7500 (72.2261) lr 1.0000e-03 eta 10:05:11 +epoch [27/50] batch [815/1000] time 1.565 (1.566) data 0.000 (0.002) loss 0.6143 (1.1123) acc 84.3750 (72.2086) lr 1.0000e-03 eta 10:05:01 +epoch [27/50] batch [820/1000] time 1.554 (1.566) data 0.000 (0.002) loss 0.6953 (1.1120) acc 78.1250 (72.2066) lr 1.0000e-03 eta 10:04:54 +epoch [27/50] batch [825/1000] time 1.572 (1.566) data 0.000 (0.002) loss 1.0830 (1.1118) acc 68.7500 (72.2045) lr 1.0000e-03 eta 10:04:50 +epoch [27/50] batch [830/1000] time 1.552 (1.566) data 0.000 (0.002) loss 1.0605 (1.1117) acc 68.7500 (72.2139) lr 1.0000e-03 eta 10:04:41 +epoch [27/50] batch [835/1000] time 1.571 (1.566) data 0.000 (0.002) loss 1.0732 (1.1112) acc 75.0000 (72.2231) lr 1.0000e-03 eta 10:04:33 +epoch [27/50] batch [840/1000] time 1.538 (1.566) data 0.000 (0.002) loss 0.6460 (1.1113) acc 81.2500 (72.2173) lr 1.0000e-03 eta 10:04:23 +epoch [27/50] batch [845/1000] time 1.569 (1.566) data 0.001 (0.002) loss 1.2832 (1.1114) acc 65.6250 (72.2041) lr 1.0000e-03 eta 10:04:14 +epoch [27/50] batch [850/1000] time 1.547 (1.566) data 0.000 (0.002) loss 0.7100 (1.1092) acc 81.2500 (72.2390) lr 1.0000e-03 eta 10:04:05 +epoch [27/50] batch [855/1000] time 1.543 (1.566) data 0.000 (0.002) loss 1.0889 (1.1099) acc 71.8750 (72.2332) lr 1.0000e-03 eta 10:03:55 +epoch [27/50] batch [860/1000] time 1.583 (1.566) data 0.000 (0.002) loss 1.1543 (1.1101) acc 71.8750 (72.2384) lr 1.0000e-03 eta 10:03:47 +epoch [27/50] batch [865/1000] time 1.566 (1.566) data 0.000 (0.002) loss 1.3848 (1.1110) acc 56.2500 (72.2146) lr 1.0000e-03 eta 10:03:44 +epoch [27/50] batch [870/1000] time 1.539 (1.566) data 0.000 (0.002) loss 1.2041 (1.1117) acc 68.7500 (72.1947) lr 1.0000e-03 eta 10:03:36 +epoch [27/50] batch [875/1000] time 1.579 (1.566) data 0.000 (0.002) loss 0.6948 (1.1113) acc 81.2500 (72.1821) lr 1.0000e-03 eta 10:03:26 +epoch [27/50] batch [880/1000] time 1.556 (1.566) data 0.001 (0.002) loss 0.5776 (1.1112) acc 84.3750 (72.2053) lr 1.0000e-03 eta 10:03:18 +epoch [27/50] batch [885/1000] time 1.575 (1.566) data 0.001 (0.002) loss 1.1738 (1.1112) acc 75.0000 (72.2140) lr 1.0000e-03 eta 10:03:10 +epoch [27/50] batch [890/1000] time 1.571 (1.566) data 0.001 (0.002) loss 1.7324 (1.1119) acc 62.5000 (72.1945) lr 1.0000e-03 eta 10:03:02 +epoch [27/50] batch [895/1000] time 1.555 (1.566) data 0.000 (0.002) loss 1.1279 (1.1114) acc 68.7500 (72.1892) lr 1.0000e-03 eta 10:02:54 +epoch [27/50] batch [900/1000] time 1.565 (1.566) data 0.001 (0.002) loss 0.9790 (1.1118) acc 75.0000 (72.1667) lr 1.0000e-03 eta 10:02:45 +epoch [27/50] batch [905/1000] time 1.576 (1.566) data 0.000 (0.002) loss 0.8921 (1.1114) acc 65.6250 (72.1581) lr 1.0000e-03 eta 10:02:37 +epoch [27/50] batch [910/1000] time 1.558 (1.565) data 0.000 (0.002) loss 1.4580 (1.1118) acc 68.7500 (72.1600) lr 1.0000e-03 eta 10:02:27 +epoch [27/50] batch [915/1000] time 1.553 (1.565) data 0.000 (0.001) loss 0.7437 (1.1121) acc 84.3750 (72.1721) lr 1.0000e-03 eta 10:02:17 +epoch [27/50] batch [920/1000] time 1.547 (1.565) data 0.000 (0.001) loss 0.8496 (1.1115) acc 75.0000 (72.1637) lr 1.0000e-03 eta 10:02:07 +epoch [27/50] batch [925/1000] time 1.552 (1.565) data 0.001 (0.001) loss 0.6416 (1.1118) acc 87.5000 (72.1723) lr 1.0000e-03 eta 10:01:58 +epoch [27/50] batch [930/1000] time 1.548 (1.565) data 0.000 (0.001) loss 1.1719 (1.1122) acc 71.8750 (72.1472) lr 1.0000e-03 eta 10:01:53 +epoch [27/50] batch [935/1000] time 1.560 (1.565) data 0.000 (0.001) loss 1.2900 (1.1118) acc 65.6250 (72.1557) lr 1.0000e-03 eta 10:01:43 +epoch [27/50] batch [940/1000] time 1.567 (1.565) data 0.000 (0.001) loss 1.9531 (1.1122) acc 56.2500 (72.1310) lr 1.0000e-03 eta 10:01:35 +epoch [27/50] batch [945/1000] time 1.572 (1.565) data 0.001 (0.001) loss 0.8911 (1.1123) acc 78.1250 (72.1462) lr 1.0000e-03 eta 10:01:28 +epoch [27/50] batch [950/1000] time 1.539 (1.565) data 0.000 (0.001) loss 0.8223 (1.1115) acc 78.1250 (72.1645) lr 1.0000e-03 eta 10:01:19 +epoch [27/50] batch [955/1000] time 1.549 (1.565) data 0.000 (0.001) loss 0.9087 (1.1103) acc 75.0000 (72.1728) lr 1.0000e-03 eta 10:01:10 +epoch [27/50] batch [960/1000] time 1.550 (1.565) data 0.000 (0.001) loss 0.9263 (1.1096) acc 71.8750 (72.1842) lr 1.0000e-03 eta 10:01:00 +epoch [27/50] batch [965/1000] time 1.561 (1.565) data 0.000 (0.001) loss 1.1699 (1.1093) acc 65.6250 (72.1859) lr 1.0000e-03 eta 10:00:51 +epoch [27/50] batch [970/1000] time 1.570 (1.565) data 0.001 (0.001) loss 1.0762 (1.1099) acc 65.6250 (72.1843) lr 1.0000e-03 eta 10:00:44 +epoch [27/50] batch [975/1000] time 1.566 (1.565) data 0.000 (0.001) loss 1.4365 (1.1101) acc 62.5000 (72.1763) lr 1.0000e-03 eta 10:00:38 +epoch [27/50] batch [980/1000] time 1.544 (1.565) data 0.000 (0.001) loss 1.0293 (1.1105) acc 75.0000 (72.1939) lr 1.0000e-03 eta 10:00:29 +epoch [27/50] batch [985/1000] time 1.560 (1.565) data 0.001 (0.001) loss 0.6167 (1.1102) acc 78.1250 (72.1954) lr 1.0000e-03 eta 10:00:20 +epoch [27/50] batch [990/1000] time 1.534 (1.565) data 0.000 (0.001) loss 1.1523 (1.1111) acc 62.5000 (72.1812) lr 1.0000e-03 eta 10:00:11 +epoch [27/50] batch [995/1000] time 1.579 (1.565) data 0.000 (0.001) loss 1.7959 (1.1124) acc 59.3750 (72.1577) lr 1.0000e-03 eta 10:00:03 +epoch [27/50] batch [1000/1000] time 1.548 (1.565) data 0.000 (0.001) loss 1.1621 (1.1112) acc 71.8750 (72.1750) lr 9.3721e-04 eta 9:59:55 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,274 +* accuracy: 78.5% +* error: 21.5% +* macro_f1: 78.1% +epoch [28/50] batch [5/1000] time 1.535 (1.693) data 0.001 (0.192) loss 1.0273 (0.9180) acc 75.0000 (77.5000) lr 9.3721e-04 eta 10:48:39 +epoch [28/50] batch [10/1000] time 1.555 (1.623) data 0.000 (0.096) loss 1.3701 (1.0493) acc 68.7500 (74.3750) lr 9.3721e-04 eta 10:21:42 +epoch [28/50] batch [15/1000] time 1.544 (1.600) data 0.001 (0.064) loss 1.1572 (1.0903) acc 62.5000 (71.4583) lr 9.3721e-04 eta 10:13:00 +epoch [28/50] batch [20/1000] time 1.531 (1.587) data 0.000 (0.048) loss 0.6626 (1.0807) acc 87.5000 (71.5625) lr 9.3721e-04 eta 10:07:51 +epoch [28/50] batch [25/1000] time 1.578 (1.583) data 0.001 (0.039) loss 0.7300 (1.0912) acc 81.2500 (71.2500) lr 9.3721e-04 eta 10:06:10 +epoch [28/50] batch [30/1000] time 1.551 (1.579) data 0.000 (0.032) loss 1.0410 (1.0970) acc 68.7500 (71.1458) lr 9.3721e-04 eta 10:04:31 +epoch [28/50] batch [35/1000] time 1.583 (1.578) data 0.000 (0.028) loss 0.9844 (1.1314) acc 75.0000 (70.6250) lr 9.3721e-04 eta 10:03:59 +epoch [28/50] batch [40/1000] time 1.576 (1.577) data 0.000 (0.024) loss 1.2969 (1.1552) acc 65.6250 (69.9219) lr 9.3721e-04 eta 10:03:27 +epoch [28/50] batch [45/1000] time 1.543 (1.575) data 0.000 (0.022) loss 0.8599 (1.1384) acc 78.1250 (70.9722) lr 9.3721e-04 eta 10:02:30 +epoch [28/50] batch [50/1000] time 1.574 (1.574) data 0.000 (0.020) loss 1.3623 (1.1296) acc 62.5000 (71.1250) lr 9.3721e-04 eta 10:02:07 +epoch [28/50] batch [55/1000] time 1.565 (1.573) data 0.000 (0.018) loss 1.1230 (1.1526) acc 71.8750 (70.5114) lr 9.3721e-04 eta 10:01:24 +epoch [28/50] batch [60/1000] time 1.547 (1.572) data 0.001 (0.016) loss 1.5273 (1.1450) acc 65.6250 (70.8333) lr 9.3721e-04 eta 10:00:51 +epoch [28/50] batch [65/1000] time 1.569 (1.571) data 0.001 (0.015) loss 0.8652 (1.1402) acc 75.0000 (70.8654) lr 9.3721e-04 eta 10:00:27 +epoch [28/50] batch [70/1000] time 1.571 (1.570) data 0.000 (0.014) loss 1.1797 (1.1404) acc 71.8750 (70.6696) lr 9.3721e-04 eta 10:00:03 +epoch [28/50] batch [75/1000] time 1.563 (1.573) data 0.000 (0.013) loss 1.6445 (1.1480) acc 62.5000 (70.6667) lr 9.3721e-04 eta 10:00:51 +epoch [28/50] batch [80/1000] time 1.572 (1.572) data 0.001 (0.012) loss 1.5527 (1.1391) acc 65.6250 (71.0156) lr 9.3721e-04 eta 10:00:25 +epoch [28/50] batch [85/1000] time 1.543 (1.570) data 0.000 (0.012) loss 0.8198 (1.1304) acc 71.8750 (71.1397) lr 9.3721e-04 eta 9:59:36 +epoch [28/50] batch [90/1000] time 1.559 (1.569) data 0.000 (0.011) loss 1.1729 (1.1333) acc 65.6250 (70.9028) lr 9.3721e-04 eta 9:59:14 +epoch [28/50] batch [95/1000] time 1.588 (1.570) data 0.000 (0.011) loss 1.8926 (1.1360) acc 56.2500 (70.8882) lr 9.3721e-04 eta 9:59:13 +epoch [28/50] batch [100/1000] time 1.587 (1.570) data 0.001 (0.010) loss 1.0674 (1.1385) acc 75.0000 (70.7188) lr 9.3721e-04 eta 9:59:11 +epoch [28/50] batch [105/1000] time 1.588 (1.570) data 0.000 (0.010) loss 1.2188 (1.1431) acc 75.0000 (70.8036) lr 9.3721e-04 eta 9:58:55 +epoch [28/50] batch [110/1000] time 1.552 (1.569) data 0.000 (0.009) loss 0.8599 (1.1377) acc 71.8750 (71.0227) lr 9.3721e-04 eta 9:58:40 +epoch [28/50] batch [115/1000] time 1.556 (1.569) data 0.000 (0.009) loss 1.1934 (1.1335) acc 68.7500 (71.0326) lr 9.3721e-04 eta 9:58:30 +epoch [28/50] batch [120/1000] time 1.567 (1.569) data 0.001 (0.008) loss 0.9800 (1.1360) acc 75.0000 (71.2240) lr 9.3721e-04 eta 9:58:22 +epoch [28/50] batch [125/1000] time 1.569 (1.571) data 0.001 (0.008) loss 0.8735 (1.1362) acc 75.0000 (71.2000) lr 9.3721e-04 eta 9:58:54 +epoch [28/50] batch [130/1000] time 1.568 (1.571) data 0.000 (0.008) loss 1.1641 (1.1325) acc 75.0000 (71.2500) lr 9.3721e-04 eta 9:58:43 +epoch [28/50] batch [135/1000] time 1.543 (1.570) data 0.000 (0.008) loss 1.1191 (1.1292) acc 75.0000 (71.3426) lr 9.3721e-04 eta 9:58:28 +epoch [28/50] batch [140/1000] time 1.553 (1.570) data 0.001 (0.007) loss 0.9531 (1.1234) acc 71.8750 (71.3839) lr 9.3721e-04 eta 9:58:15 +epoch [28/50] batch [145/1000] time 1.554 (1.570) data 0.000 (0.007) loss 1.0439 (1.1247) acc 68.7500 (71.2500) lr 9.3721e-04 eta 9:58:02 +epoch [28/50] batch [150/1000] time 1.592 (1.570) data 0.000 (0.007) loss 1.1699 (1.1192) acc 75.0000 (71.3750) lr 9.3721e-04 eta 9:57:54 +epoch [28/50] batch [155/1000] time 1.573 (1.570) data 0.000 (0.007) loss 1.3682 (1.1116) acc 68.7500 (71.5726) lr 9.3721e-04 eta 9:57:42 +epoch [28/50] batch [160/1000] time 1.534 (1.569) data 0.001 (0.006) loss 0.7153 (1.1186) acc 78.1250 (71.2891) lr 9.3721e-04 eta 9:57:18 +epoch [28/50] batch [165/1000] time 1.572 (1.569) data 0.000 (0.006) loss 0.8369 (1.1121) acc 78.1250 (71.3826) lr 9.3721e-04 eta 9:56:57 +epoch [28/50] batch [170/1000] time 1.559 (1.568) data 0.000 (0.006) loss 1.3486 (1.1154) acc 65.6250 (71.2868) lr 9.3721e-04 eta 9:56:41 +epoch [28/50] batch [175/1000] time 1.566 (1.568) data 0.000 (0.006) loss 1.3359 (1.1130) acc 71.8750 (71.3393) lr 9.3721e-04 eta 9:56:22 +epoch [28/50] batch [180/1000] time 1.540 (1.567) data 0.000 (0.006) loss 0.5430 (1.1016) acc 87.5000 (71.5972) lr 9.3721e-04 eta 9:56:08 +epoch [28/50] batch [185/1000] time 1.574 (1.567) data 0.001 (0.006) loss 1.0039 (1.1009) acc 81.2500 (71.7230) lr 9.3721e-04 eta 9:55:54 +epoch [28/50] batch [190/1000] time 1.582 (1.567) data 0.001 (0.006) loss 0.8872 (1.1057) acc 78.1250 (71.6447) lr 9.3721e-04 eta 9:55:45 +epoch [28/50] batch [195/1000] time 1.556 (1.567) data 0.001 (0.005) loss 0.8750 (1.1043) acc 78.1250 (71.7308) lr 9.3721e-04 eta 9:55:39 +epoch [28/50] batch [200/1000] time 1.558 (1.567) data 0.000 (0.005) loss 1.2490 (1.1056) acc 71.8750 (71.7344) lr 9.3721e-04 eta 9:55:33 +epoch [28/50] batch [205/1000] time 1.572 (1.567) data 0.000 (0.005) loss 1.0176 (1.1050) acc 84.3750 (71.8445) lr 9.3721e-04 eta 9:55:26 +epoch [28/50] batch [210/1000] time 1.536 (1.567) data 0.000 (0.005) loss 1.2227 (1.1014) acc 56.2500 (71.8155) lr 9.3721e-04 eta 9:55:13 +epoch [28/50] batch [215/1000] time 1.572 (1.567) data 0.000 (0.005) loss 1.2256 (1.1027) acc 68.7500 (71.8605) lr 9.3721e-04 eta 9:54:58 +epoch [28/50] batch [220/1000] time 1.554 (1.567) data 0.001 (0.005) loss 0.5098 (1.0996) acc 90.6250 (71.9744) lr 9.3721e-04 eta 9:54:49 +epoch [28/50] batch [225/1000] time 1.729 (1.567) data 0.000 (0.005) loss 1.1045 (1.0960) acc 68.7500 (72.0417) lr 9.3721e-04 eta 9:54:53 +epoch [28/50] batch [230/1000] time 1.573 (1.567) data 0.000 (0.005) loss 1.1279 (1.0967) acc 75.0000 (72.0245) lr 9.3721e-04 eta 9:54:45 +epoch [28/50] batch [235/1000] time 1.555 (1.567) data 0.001 (0.005) loss 0.7822 (1.0964) acc 71.8750 (71.8750) lr 9.3721e-04 eta 9:54:36 +epoch [28/50] batch [240/1000] time 1.561 (1.567) data 0.000 (0.004) loss 1.9727 (1.1004) acc 46.8750 (71.7969) lr 9.3721e-04 eta 9:54:27 +epoch [28/50] batch [245/1000] time 1.582 (1.567) data 0.001 (0.004) loss 1.5137 (1.1018) acc 65.6250 (71.7985) lr 9.3721e-04 eta 9:54:20 +epoch [28/50] batch [250/1000] time 1.566 (1.567) data 0.000 (0.004) loss 1.4492 (1.1053) acc 75.0000 (71.8500) lr 9.3721e-04 eta 9:54:11 +epoch [28/50] batch [255/1000] time 1.571 (1.567) data 0.000 (0.004) loss 0.7251 (1.1071) acc 81.2500 (71.8137) lr 9.3721e-04 eta 9:53:59 +epoch [28/50] batch [260/1000] time 1.575 (1.567) data 0.000 (0.004) loss 1.1914 (1.1076) acc 68.7500 (71.8510) lr 9.3721e-04 eta 9:53:52 +epoch [28/50] batch [265/1000] time 1.574 (1.567) data 0.000 (0.004) loss 0.9644 (1.1041) acc 68.7500 (71.9340) lr 9.3721e-04 eta 9:53:41 +epoch [28/50] batch [270/1000] time 1.546 (1.567) data 0.000 (0.004) loss 0.8521 (1.1056) acc 75.0000 (71.9329) lr 9.3721e-04 eta 9:53:26 +epoch [28/50] batch [275/1000] time 1.587 (1.567) data 0.000 (0.004) loss 1.2256 (1.1037) acc 65.6250 (71.9659) lr 9.3721e-04 eta 9:53:29 +epoch [28/50] batch [280/1000] time 1.529 (1.567) data 0.000 (0.004) loss 1.1973 (1.1012) acc 75.0000 (72.0871) lr 9.3721e-04 eta 9:53:18 +epoch [28/50] batch [285/1000] time 1.567 (1.567) data 0.000 (0.004) loss 1.2617 (1.1044) acc 62.5000 (71.9956) lr 9.3721e-04 eta 9:53:03 +epoch [28/50] batch [290/1000] time 1.566 (1.566) data 0.001 (0.004) loss 0.5991 (1.1008) acc 87.5000 (72.0582) lr 9.3721e-04 eta 9:52:50 +epoch [28/50] batch [295/1000] time 1.547 (1.566) data 0.000 (0.004) loss 1.0244 (1.1022) acc 71.8750 (72.0339) lr 9.3721e-04 eta 9:52:39 +epoch [28/50] batch [300/1000] time 1.544 (1.566) data 0.001 (0.004) loss 1.6777 (1.1053) acc 59.3750 (71.9375) lr 9.3721e-04 eta 9:52:22 +epoch [28/50] batch [305/1000] time 1.554 (1.566) data 0.000 (0.004) loss 1.7695 (1.1091) acc 68.7500 (71.9262) lr 9.3721e-04 eta 9:52:11 +epoch [28/50] batch [310/1000] time 1.540 (1.565) data 0.000 (0.004) loss 1.0596 (1.1085) acc 78.1250 (71.9556) lr 9.3721e-04 eta 9:51:58 +epoch [28/50] batch [315/1000] time 1.587 (1.565) data 0.000 (0.003) loss 0.9531 (1.1102) acc 71.8750 (71.9147) lr 9.3721e-04 eta 9:51:52 +epoch [28/50] batch [320/1000] time 1.566 (1.565) data 0.000 (0.003) loss 1.3730 (1.1079) acc 62.5000 (71.9531) lr 9.3721e-04 eta 9:51:38 +epoch [28/50] batch [325/1000] time 1.575 (1.565) data 0.000 (0.003) loss 0.8989 (1.1048) acc 78.1250 (72.0192) lr 9.3721e-04 eta 9:51:35 +epoch [28/50] batch [330/1000] time 1.566 (1.565) data 0.001 (0.003) loss 1.3652 (1.1028) acc 59.3750 (72.0644) lr 9.3721e-04 eta 9:51:26 +epoch [28/50] batch [335/1000] time 1.574 (1.565) data 0.000 (0.003) loss 0.6582 (1.1020) acc 84.3750 (72.1269) lr 9.3721e-04 eta 9:51:16 +epoch [28/50] batch [340/1000] time 1.550 (1.565) data 0.001 (0.003) loss 0.8740 (1.1025) acc 75.0000 (72.1415) lr 9.3721e-04 eta 9:51:08 +epoch [28/50] batch [345/1000] time 1.559 (1.565) data 0.000 (0.003) loss 1.3008 (1.1002) acc 59.3750 (72.1920) lr 9.3721e-04 eta 9:50:59 +epoch [28/50] batch [350/1000] time 1.569 (1.565) data 0.000 (0.003) loss 1.3076 (1.0992) acc 62.5000 (72.2054) lr 9.3721e-04 eta 9:50:53 +epoch [28/50] batch [355/1000] time 1.577 (1.565) data 0.000 (0.003) loss 1.2627 (1.0979) acc 65.6250 (72.2535) lr 9.3721e-04 eta 9:50:43 +epoch [28/50] batch [360/1000] time 1.582 (1.565) data 0.000 (0.003) loss 0.9883 (1.1009) acc 75.0000 (72.1962) lr 9.3721e-04 eta 9:50:33 +epoch [28/50] batch [365/1000] time 1.556 (1.565) data 0.001 (0.003) loss 0.6899 (1.1018) acc 75.0000 (72.1490) lr 9.3721e-04 eta 9:50:20 +epoch [28/50] batch [370/1000] time 1.562 (1.565) data 0.000 (0.003) loss 0.9058 (1.1001) acc 81.2500 (72.1791) lr 9.3721e-04 eta 9:50:10 +epoch [28/50] batch [375/1000] time 1.565 (1.565) data 0.000 (0.003) loss 0.7266 (1.1005) acc 81.2500 (72.1750) lr 9.3721e-04 eta 9:50:01 +epoch [28/50] batch [380/1000] time 1.543 (1.565) data 0.000 (0.003) loss 1.4053 (1.1043) acc 62.5000 (72.0970) lr 9.3721e-04 eta 9:49:56 +epoch [28/50] batch [385/1000] time 1.541 (1.565) data 0.000 (0.003) loss 1.2617 (1.1060) acc 68.7500 (72.0373) lr 9.3721e-04 eta 9:49:46 +epoch [28/50] batch [390/1000] time 1.568 (1.565) data 0.000 (0.003) loss 0.9150 (1.1071) acc 75.0000 (72.0192) lr 9.3721e-04 eta 9:49:36 +epoch [28/50] batch [395/1000] time 1.537 (1.564) data 0.000 (0.003) loss 1.0264 (1.1064) acc 75.0000 (72.0253) lr 9.3721e-04 eta 9:49:23 +epoch [28/50] batch [400/1000] time 1.556 (1.564) data 0.000 (0.003) loss 1.1611 (1.1059) acc 71.8750 (72.0547) lr 9.3721e-04 eta 9:49:15 +epoch [28/50] batch [405/1000] time 1.575 (1.564) data 0.000 (0.003) loss 1.2705 (1.1061) acc 71.8750 (72.0988) lr 9.3721e-04 eta 9:49:04 +epoch [28/50] batch [410/1000] time 1.558 (1.564) data 0.000 (0.003) loss 0.6685 (1.1028) acc 84.3750 (72.1799) lr 9.3721e-04 eta 9:48:55 +epoch [28/50] batch [415/1000] time 1.579 (1.564) data 0.000 (0.003) loss 0.7900 (1.1011) acc 75.0000 (72.2063) lr 9.3721e-04 eta 9:48:49 +epoch [28/50] batch [420/1000] time 1.555 (1.564) data 0.000 (0.003) loss 1.0127 (1.1012) acc 78.1250 (72.2098) lr 9.3721e-04 eta 9:48:43 +epoch [28/50] batch [425/1000] time 1.558 (1.565) data 0.000 (0.003) loss 0.8081 (1.0990) acc 71.8750 (72.2426) lr 9.3721e-04 eta 9:48:45 +epoch [28/50] batch [430/1000] time 1.568 (1.565) data 0.000 (0.003) loss 1.2764 (1.0990) acc 78.1250 (72.2674) lr 9.3721e-04 eta 9:48:39 +epoch [28/50] batch [435/1000] time 1.569 (1.565) data 0.000 (0.003) loss 0.6704 (1.1002) acc 81.2500 (72.2342) lr 9.3721e-04 eta 9:48:30 +epoch [28/50] batch [440/1000] time 1.569 (1.565) data 0.000 (0.003) loss 0.7920 (1.0985) acc 78.1250 (72.3082) lr 9.3721e-04 eta 9:48:24 +epoch [28/50] batch [445/1000] time 1.561 (1.565) data 0.000 (0.003) loss 0.6675 (1.0978) acc 81.2500 (72.3174) lr 9.3721e-04 eta 9:48:13 +epoch [28/50] batch [450/1000] time 1.542 (1.565) data 0.000 (0.003) loss 1.2188 (1.0964) acc 65.6250 (72.3125) lr 9.3721e-04 eta 9:48:02 +epoch [28/50] batch [455/1000] time 1.559 (1.565) data 0.000 (0.003) loss 0.9834 (1.0962) acc 75.0000 (72.3420) lr 9.3721e-04 eta 9:47:54 +epoch [28/50] batch [460/1000] time 1.558 (1.565) data 0.000 (0.003) loss 1.1123 (1.0969) acc 75.0000 (72.3641) lr 9.3721e-04 eta 9:47:48 +epoch [28/50] batch [465/1000] time 1.583 (1.565) data 0.001 (0.002) loss 0.6016 (1.0954) acc 87.5000 (72.3723) lr 9.3721e-04 eta 9:47:44 +epoch [28/50] batch [470/1000] time 1.561 (1.565) data 0.000 (0.002) loss 1.2266 (1.0982) acc 78.1250 (72.3072) lr 9.3721e-04 eta 9:47:34 +epoch [28/50] batch [475/1000] time 1.574 (1.565) data 0.002 (0.002) loss 1.9033 (1.0999) acc 46.8750 (72.2434) lr 9.3721e-04 eta 9:47:26 +epoch [28/50] batch [480/1000] time 1.564 (1.565) data 0.000 (0.002) loss 1.1260 (1.0991) acc 71.8750 (72.2656) lr 9.3721e-04 eta 9:47:19 +epoch [28/50] batch [485/1000] time 1.573 (1.565) data 0.000 (0.002) loss 1.0840 (1.0992) acc 65.6250 (72.2680) lr 9.3721e-04 eta 9:47:09 +epoch [28/50] batch [490/1000] time 1.566 (1.565) data 0.000 (0.002) loss 0.8677 (1.0984) acc 75.0000 (72.2895) lr 9.3721e-04 eta 9:47:02 +epoch [28/50] batch [495/1000] time 1.570 (1.565) data 0.000 (0.002) loss 1.1924 (1.1001) acc 62.5000 (72.2096) lr 9.3721e-04 eta 9:46:55 +epoch [28/50] batch [500/1000] time 1.545 (1.565) data 0.000 (0.002) loss 1.5674 (1.1007) acc 59.3750 (72.2062) lr 9.3721e-04 eta 9:46:45 +epoch [28/50] batch [505/1000] time 1.573 (1.565) data 0.000 (0.002) loss 1.2178 (1.1014) acc 71.8750 (72.2092) lr 9.3721e-04 eta 9:46:36 +epoch [28/50] batch [510/1000] time 1.544 (1.565) data 0.000 (0.002) loss 1.3018 (1.1018) acc 65.6250 (72.1936) lr 9.3721e-04 eta 9:46:28 +epoch [28/50] batch [515/1000] time 1.566 (1.565) data 0.001 (0.002) loss 0.9756 (1.1003) acc 65.6250 (72.1966) lr 9.3721e-04 eta 9:46:20 +epoch [28/50] batch [520/1000] time 1.580 (1.565) data 0.001 (0.002) loss 1.4199 (1.1002) acc 65.6250 (72.2115) lr 9.3721e-04 eta 9:46:14 +epoch [28/50] batch [525/1000] time 1.545 (1.565) data 0.001 (0.002) loss 1.2744 (1.1000) acc 68.7500 (72.2381) lr 9.3721e-04 eta 9:46:06 +epoch [28/50] batch [530/1000] time 1.569 (1.565) data 0.000 (0.002) loss 1.2051 (1.1016) acc 65.6250 (72.2347) lr 9.3721e-04 eta 9:46:03 +epoch [28/50] batch [535/1000] time 1.560 (1.565) data 0.000 (0.002) loss 0.6543 (1.1011) acc 87.5000 (72.2605) lr 9.3721e-04 eta 9:45:56 +epoch [28/50] batch [540/1000] time 1.566 (1.565) data 0.000 (0.002) loss 1.5459 (1.1019) acc 62.5000 (72.2396) lr 9.3721e-04 eta 9:45:47 +epoch [28/50] batch [545/1000] time 1.570 (1.565) data 0.001 (0.002) loss 0.9673 (1.1024) acc 78.1250 (72.2592) lr 9.3721e-04 eta 9:45:41 +epoch [28/50] batch [550/1000] time 1.557 (1.565) data 0.000 (0.002) loss 1.0664 (1.1027) acc 62.5000 (72.2557) lr 9.3721e-04 eta 9:45:33 +epoch [28/50] batch [555/1000] time 1.557 (1.565) data 0.000 (0.002) loss 1.0430 (1.1008) acc 68.7500 (72.2635) lr 9.3721e-04 eta 9:45:24 +epoch [28/50] batch [560/1000] time 1.565 (1.565) data 0.000 (0.002) loss 1.4209 (1.0998) acc 68.7500 (72.2656) lr 9.3721e-04 eta 9:45:17 +epoch [28/50] batch [565/1000] time 1.582 (1.565) data 0.000 (0.002) loss 0.6973 (1.1003) acc 81.2500 (72.2677) lr 9.3721e-04 eta 9:45:10 +epoch [28/50] batch [570/1000] time 1.561 (1.565) data 0.000 (0.002) loss 1.8027 (1.1028) acc 56.2500 (72.1930) lr 9.3721e-04 eta 9:45:05 +epoch [28/50] batch [575/1000] time 1.548 (1.565) data 0.000 (0.002) loss 1.2012 (1.1029) acc 75.0000 (72.2011) lr 9.3721e-04 eta 9:45:01 +epoch [28/50] batch [580/1000] time 1.546 (1.565) data 0.000 (0.002) loss 1.0215 (1.1020) acc 68.7500 (72.2037) lr 9.3721e-04 eta 9:44:52 +epoch [28/50] batch [585/1000] time 1.557 (1.565) data 0.000 (0.002) loss 1.3213 (1.1025) acc 56.2500 (72.1848) lr 9.3721e-04 eta 9:44:43 +epoch [28/50] batch [590/1000] time 1.565 (1.565) data 0.000 (0.002) loss 0.7588 (1.1016) acc 84.3750 (72.2087) lr 9.3721e-04 eta 9:44:33 +epoch [28/50] batch [595/1000] time 1.554 (1.565) data 0.000 (0.002) loss 1.6748 (1.1039) acc 62.5000 (72.1586) lr 9.3721e-04 eta 9:44:24 +epoch [28/50] batch [600/1000] time 1.568 (1.565) data 0.001 (0.002) loss 0.9053 (1.1028) acc 81.2500 (72.1979) lr 9.3721e-04 eta 9:44:16 +epoch [28/50] batch [605/1000] time 1.534 (1.565) data 0.000 (0.002) loss 1.2256 (1.1051) acc 68.7500 (72.1333) lr 9.3721e-04 eta 9:44:07 +epoch [28/50] batch [610/1000] time 1.555 (1.565) data 0.000 (0.002) loss 1.3408 (1.1041) acc 65.6250 (72.1414) lr 9.3721e-04 eta 9:43:59 +epoch [28/50] batch [615/1000] time 1.569 (1.565) data 0.000 (0.002) loss 0.7856 (1.1049) acc 84.3750 (72.1392) lr 9.3721e-04 eta 9:43:51 +epoch [28/50] batch [620/1000] time 1.555 (1.565) data 0.000 (0.002) loss 0.7456 (1.1031) acc 78.1250 (72.1623) lr 9.3721e-04 eta 9:43:43 +epoch [28/50] batch [625/1000] time 1.559 (1.565) data 0.000 (0.002) loss 1.5127 (1.1030) acc 56.2500 (72.1500) lr 9.3721e-04 eta 9:43:35 +epoch [28/50] batch [630/1000] time 1.539 (1.565) data 0.000 (0.002) loss 1.4180 (1.1032) acc 59.3750 (72.1230) lr 9.3721e-04 eta 9:43:25 +epoch [28/50] batch [635/1000] time 1.565 (1.565) data 0.000 (0.002) loss 1.0391 (1.1036) acc 68.7500 (72.1358) lr 9.3721e-04 eta 9:43:17 +epoch [28/50] batch [640/1000] time 1.560 (1.565) data 0.000 (0.002) loss 0.9429 (1.1028) acc 81.2500 (72.1729) lr 9.3721e-04 eta 9:43:08 +epoch [28/50] batch [645/1000] time 1.554 (1.565) data 0.000 (0.002) loss 1.1094 (1.1015) acc 75.0000 (72.1851) lr 9.3721e-04 eta 9:43:01 +epoch [28/50] batch [650/1000] time 1.538 (1.565) data 0.000 (0.002) loss 1.8145 (1.1023) acc 59.3750 (72.1731) lr 9.3721e-04 eta 9:42:51 +epoch [28/50] batch [655/1000] time 1.596 (1.565) data 0.000 (0.002) loss 1.1533 (1.1004) acc 68.7500 (72.2042) lr 9.3721e-04 eta 9:42:42 +epoch [28/50] batch [660/1000] time 1.561 (1.565) data 0.001 (0.002) loss 0.6396 (1.1007) acc 81.2500 (72.2112) lr 9.3721e-04 eta 9:42:35 +epoch [28/50] batch [665/1000] time 1.558 (1.565) data 0.000 (0.002) loss 0.8628 (1.1011) acc 78.1250 (72.2133) lr 9.3721e-04 eta 9:42:26 +epoch [28/50] batch [670/1000] time 1.530 (1.565) data 0.001 (0.002) loss 0.9795 (1.1009) acc 81.2500 (72.2155) lr 9.3721e-04 eta 9:42:15 +epoch [28/50] batch [675/1000] time 1.560 (1.565) data 0.000 (0.002) loss 0.8975 (1.1010) acc 81.2500 (72.2361) lr 9.3721e-04 eta 9:42:07 +epoch [28/50] batch [680/1000] time 1.537 (1.565) data 0.001 (0.002) loss 0.8125 (1.1009) acc 78.1250 (72.2243) lr 9.3721e-04 eta 9:42:02 +epoch [28/50] batch [685/1000] time 1.556 (1.565) data 0.000 (0.002) loss 0.7222 (1.0991) acc 78.1250 (72.2582) lr 9.3721e-04 eta 9:41:53 +epoch [28/50] batch [690/1000] time 1.563 (1.564) data 0.000 (0.002) loss 0.3691 (1.0993) acc 87.5000 (72.2736) lr 9.3721e-04 eta 9:41:43 +epoch [28/50] batch [695/1000] time 1.571 (1.565) data 0.001 (0.002) loss 1.1758 (1.1002) acc 71.8750 (72.2257) lr 9.3721e-04 eta 9:41:36 +epoch [28/50] batch [700/1000] time 1.548 (1.565) data 0.001 (0.002) loss 0.8120 (1.0997) acc 81.2500 (72.2589) lr 9.3721e-04 eta 9:41:29 +epoch [28/50] batch [705/1000] time 1.562 (1.565) data 0.001 (0.002) loss 0.8335 (1.0989) acc 75.0000 (72.2695) lr 9.3721e-04 eta 9:41:21 +epoch [28/50] batch [710/1000] time 1.558 (1.565) data 0.000 (0.002) loss 0.6079 (1.0974) acc 81.2500 (72.3151) lr 9.3721e-04 eta 9:41:15 +epoch [28/50] batch [715/1000] time 1.569 (1.565) data 0.001 (0.002) loss 0.4653 (1.0986) acc 87.5000 (72.3252) lr 9.3721e-04 eta 9:41:08 +epoch [28/50] batch [720/1000] time 1.580 (1.565) data 0.001 (0.002) loss 0.8569 (1.0968) acc 68.7500 (72.3307) lr 9.3721e-04 eta 9:41:00 +epoch [28/50] batch [725/1000] time 1.702 (1.565) data 0.000 (0.002) loss 0.6069 (1.0953) acc 81.2500 (72.3578) lr 9.3721e-04 eta 9:40:56 +epoch [28/50] batch [730/1000] time 1.556 (1.565) data 0.000 (0.002) loss 1.6611 (1.0976) acc 59.3750 (72.3202) lr 9.3721e-04 eta 9:40:47 +epoch [28/50] batch [735/1000] time 1.600 (1.565) data 0.001 (0.002) loss 0.9556 (1.0970) acc 75.0000 (72.3299) lr 9.3721e-04 eta 9:40:40 +epoch [28/50] batch [740/1000] time 1.542 (1.565) data 0.000 (0.002) loss 0.8794 (1.0970) acc 65.6250 (72.2931) lr 9.3721e-04 eta 9:40:32 +epoch [28/50] batch [745/1000] time 1.583 (1.565) data 0.000 (0.002) loss 0.6802 (1.0961) acc 84.3750 (72.3196) lr 9.3721e-04 eta 9:40:26 +epoch [28/50] batch [750/1000] time 1.587 (1.565) data 0.000 (0.002) loss 0.7739 (1.0942) acc 75.0000 (72.3792) lr 9.3721e-04 eta 9:40:19 +epoch [28/50] batch [755/1000] time 1.595 (1.565) data 0.000 (0.002) loss 0.8853 (1.0939) acc 78.1250 (72.3675) lr 9.3721e-04 eta 9:40:12 +epoch [28/50] batch [760/1000] time 1.541 (1.565) data 0.000 (0.002) loss 1.1455 (1.0944) acc 68.7500 (72.3561) lr 9.3721e-04 eta 9:40:03 +epoch [28/50] batch [765/1000] time 1.568 (1.565) data 0.000 (0.002) loss 0.9688 (1.0931) acc 75.0000 (72.3775) lr 9.3721e-04 eta 9:39:54 +epoch [28/50] batch [770/1000] time 1.557 (1.565) data 0.000 (0.002) loss 1.6914 (1.0930) acc 68.7500 (72.4026) lr 9.3721e-04 eta 9:39:44 +epoch [28/50] batch [775/1000] time 1.586 (1.565) data 0.001 (0.002) loss 1.1680 (1.0931) acc 75.0000 (72.4032) lr 9.3721e-04 eta 9:39:38 +epoch [28/50] batch [780/1000] time 1.558 (1.565) data 0.000 (0.002) loss 1.2666 (1.0935) acc 71.8750 (72.4319) lr 9.3721e-04 eta 9:39:29 +epoch [28/50] batch [785/1000] time 1.578 (1.565) data 0.000 (0.002) loss 0.7036 (1.0922) acc 87.5000 (72.4881) lr 9.3721e-04 eta 9:39:21 +epoch [28/50] batch [790/1000] time 1.562 (1.565) data 0.001 (0.002) loss 0.7827 (1.0924) acc 81.2500 (72.4921) lr 9.3721e-04 eta 9:39:13 +epoch [28/50] batch [795/1000] time 1.554 (1.565) data 0.001 (0.002) loss 1.3936 (1.0924) acc 65.6250 (72.5000) lr 9.3721e-04 eta 9:39:05 +epoch [28/50] batch [800/1000] time 1.544 (1.565) data 0.001 (0.002) loss 1.8105 (1.0924) acc 65.6250 (72.5234) lr 9.3721e-04 eta 9:38:57 +epoch [28/50] batch [805/1000] time 1.564 (1.565) data 0.000 (0.002) loss 1.1611 (1.0936) acc 62.5000 (72.4845) lr 9.3721e-04 eta 9:38:48 +epoch [28/50] batch [810/1000] time 1.575 (1.565) data 0.001 (0.002) loss 1.5947 (1.0940) acc 62.5000 (72.4846) lr 9.3721e-04 eta 9:38:39 +epoch [28/50] batch [815/1000] time 1.560 (1.565) data 0.000 (0.002) loss 1.1787 (1.0942) acc 68.7500 (72.4885) lr 9.3721e-04 eta 9:38:29 +epoch [28/50] batch [820/1000] time 1.544 (1.565) data 0.000 (0.002) loss 0.6489 (1.0932) acc 81.2500 (72.5152) lr 9.3721e-04 eta 9:38:22 +epoch [28/50] batch [825/1000] time 1.560 (1.565) data 0.000 (0.002) loss 0.8193 (1.0915) acc 71.8750 (72.5379) lr 9.3721e-04 eta 9:38:15 +epoch [28/50] batch [830/1000] time 1.554 (1.565) data 0.000 (0.002) loss 0.9800 (1.0920) acc 75.0000 (72.5264) lr 9.3721e-04 eta 9:38:12 +epoch [28/50] batch [835/1000] time 1.556 (1.565) data 0.000 (0.002) loss 1.0869 (1.0932) acc 75.0000 (72.5037) lr 9.3721e-04 eta 9:38:03 +epoch [28/50] batch [840/1000] time 1.537 (1.565) data 0.000 (0.002) loss 1.9062 (1.0935) acc 56.2500 (72.4851) lr 9.3721e-04 eta 9:37:54 +epoch [28/50] batch [845/1000] time 1.564 (1.565) data 0.000 (0.002) loss 1.2910 (1.0936) acc 65.6250 (72.4815) lr 9.3721e-04 eta 9:37:46 +epoch [28/50] batch [850/1000] time 1.590 (1.565) data 0.000 (0.002) loss 0.9585 (1.0943) acc 75.0000 (72.4559) lr 9.3721e-04 eta 9:37:39 +epoch [28/50] batch [855/1000] time 1.562 (1.565) data 0.000 (0.002) loss 1.3564 (1.0948) acc 62.5000 (72.4525) lr 9.3721e-04 eta 9:37:30 +epoch [28/50] batch [860/1000] time 1.569 (1.565) data 0.000 (0.002) loss 1.1836 (1.0951) acc 68.7500 (72.4455) lr 9.3721e-04 eta 9:37:23 +epoch [28/50] batch [865/1000] time 1.569 (1.565) data 0.001 (0.002) loss 1.0225 (1.0958) acc 78.1250 (72.4205) lr 9.3721e-04 eta 9:37:15 +epoch [28/50] batch [870/1000] time 1.555 (1.565) data 0.001 (0.002) loss 1.7217 (1.0979) acc 62.5000 (72.3635) lr 9.3721e-04 eta 9:37:05 +epoch [28/50] batch [875/1000] time 1.565 (1.565) data 0.000 (0.002) loss 0.9053 (1.0997) acc 81.2500 (72.3357) lr 9.3721e-04 eta 9:36:57 +epoch [28/50] batch [880/1000] time 1.561 (1.565) data 0.001 (0.002) loss 0.8208 (1.0990) acc 78.1250 (72.3366) lr 9.3721e-04 eta 9:36:54 +epoch [28/50] batch [885/1000] time 1.557 (1.565) data 0.000 (0.002) loss 1.3828 (1.0998) acc 71.8750 (72.3482) lr 9.3721e-04 eta 9:36:45 +epoch [28/50] batch [890/1000] time 1.550 (1.565) data 0.000 (0.002) loss 1.2080 (1.0992) acc 78.1250 (72.3806) lr 9.3721e-04 eta 9:36:35 +epoch [28/50] batch [895/1000] time 1.543 (1.565) data 0.000 (0.001) loss 1.9307 (1.1011) acc 53.1250 (72.3534) lr 9.3721e-04 eta 9:36:27 +epoch [28/50] batch [900/1000] time 1.568 (1.565) data 0.000 (0.001) loss 1.6748 (1.1019) acc 62.5000 (72.3368) lr 9.3721e-04 eta 9:36:20 +epoch [28/50] batch [905/1000] time 1.563 (1.565) data 0.000 (0.001) loss 1.1064 (1.1016) acc 81.2500 (72.3550) lr 9.3721e-04 eta 9:36:11 +epoch [28/50] batch [910/1000] time 1.562 (1.565) data 0.001 (0.001) loss 1.1357 (1.1010) acc 78.1250 (72.3729) lr 9.3721e-04 eta 9:36:06 +epoch [28/50] batch [915/1000] time 1.563 (1.565) data 0.000 (0.001) loss 0.8394 (1.1019) acc 75.0000 (72.3531) lr 9.3721e-04 eta 9:35:57 +epoch [28/50] batch [920/1000] time 1.536 (1.565) data 0.000 (0.001) loss 1.2041 (1.1010) acc 75.0000 (72.3641) lr 9.3721e-04 eta 9:35:47 +epoch [28/50] batch [925/1000] time 1.539 (1.565) data 0.000 (0.001) loss 1.5801 (1.1009) acc 65.6250 (72.3547) lr 9.3721e-04 eta 9:35:37 +epoch [28/50] batch [930/1000] time 1.590 (1.565) data 0.000 (0.001) loss 1.1533 (1.1012) acc 68.7500 (72.3454) lr 9.3721e-04 eta 9:35:30 +epoch [28/50] batch [935/1000] time 1.542 (1.565) data 0.000 (0.001) loss 1.1543 (1.1016) acc 68.7500 (72.3463) lr 9.3721e-04 eta 9:35:21 +epoch [28/50] batch [940/1000] time 1.556 (1.565) data 0.000 (0.001) loss 1.3584 (1.1017) acc 75.0000 (72.3338) lr 9.3721e-04 eta 9:35:14 +epoch [28/50] batch [945/1000] time 1.530 (1.565) data 0.001 (0.001) loss 1.0137 (1.1016) acc 75.0000 (72.3413) lr 9.3721e-04 eta 9:35:06 +epoch [28/50] batch [950/1000] time 1.542 (1.564) data 0.001 (0.001) loss 1.0391 (1.1029) acc 71.8750 (72.3158) lr 9.3721e-04 eta 9:34:56 +epoch [28/50] batch [955/1000] time 1.569 (1.564) data 0.000 (0.001) loss 1.1455 (1.1037) acc 71.8750 (72.2938) lr 9.3721e-04 eta 9:34:48 +epoch [28/50] batch [960/1000] time 1.564 (1.564) data 0.000 (0.001) loss 0.8896 (1.1040) acc 71.8750 (72.2949) lr 9.3721e-04 eta 9:34:39 +epoch [28/50] batch [965/1000] time 1.558 (1.564) data 0.000 (0.001) loss 0.5439 (1.1045) acc 81.2500 (72.3122) lr 9.3721e-04 eta 9:34:31 +epoch [28/50] batch [970/1000] time 1.539 (1.564) data 0.000 (0.001) loss 0.8916 (1.1047) acc 75.0000 (72.2938) lr 9.3721e-04 eta 9:34:21 +epoch [28/50] batch [975/1000] time 1.541 (1.564) data 0.000 (0.001) loss 1.2295 (1.1050) acc 71.8750 (72.3013) lr 9.3721e-04 eta 9:34:12 +epoch [28/50] batch [980/1000] time 1.540 (1.564) data 0.000 (0.001) loss 1.1357 (1.1052) acc 78.1250 (72.2927) lr 9.3721e-04 eta 9:34:08 +epoch [28/50] batch [985/1000] time 1.558 (1.564) data 0.001 (0.001) loss 0.8657 (1.1042) acc 78.1250 (72.3001) lr 9.3721e-04 eta 9:34:00 +epoch [28/50] batch [990/1000] time 1.542 (1.564) data 0.000 (0.001) loss 1.2803 (1.1048) acc 68.7500 (72.3138) lr 9.3721e-04 eta 9:33:52 +epoch [28/50] batch [995/1000] time 1.576 (1.564) data 0.000 (0.001) loss 0.8628 (1.1052) acc 68.7500 (72.2802) lr 9.3721e-04 eta 9:33:44 +epoch [28/50] batch [1000/1000] time 1.553 (1.564) data 0.000 (0.001) loss 0.8125 (1.1054) acc 71.8750 (72.2594) lr 8.7467e-04 eta 9:33:36 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,313 +* accuracy: 78.6% +* error: 21.4% +* macro_f1: 78.2% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [29/50] batch [5/1000] time 1.533 (1.768) data 0.000 (0.270) loss 1.0645 (1.0448) acc 71.8750 (75.6250) lr 8.7467e-04 eta 10:47:58 +epoch [29/50] batch [10/1000] time 1.547 (1.690) data 0.000 (0.135) loss 1.3799 (1.0825) acc 65.6250 (74.3750) lr 8.7467e-04 eta 10:19:24 +epoch [29/50] batch [15/1000] time 1.567 (1.646) data 0.001 (0.090) loss 0.8784 (1.0154) acc 62.5000 (74.7917) lr 8.7467e-04 eta 10:03:03 +epoch [29/50] batch [20/1000] time 1.587 (1.626) data 0.000 (0.068) loss 0.9658 (1.0087) acc 71.8750 (74.6875) lr 8.7467e-04 eta 9:55:29 +epoch [29/50] batch [25/1000] time 1.540 (1.612) data 0.000 (0.054) loss 0.9175 (1.0119) acc 84.3750 (74.8750) lr 8.7467e-04 eta 9:50:13 +epoch [29/50] batch [30/1000] time 1.569 (1.602) data 0.000 (0.045) loss 1.5244 (1.0269) acc 68.7500 (74.8958) lr 8.7467e-04 eta 9:46:46 +epoch [29/50] batch [35/1000] time 1.567 (1.598) data 0.000 (0.039) loss 0.9624 (1.0342) acc 75.0000 (74.8214) lr 8.7467e-04 eta 9:45:08 +epoch [29/50] batch [40/1000] time 1.575 (1.594) data 0.000 (0.034) loss 0.4290 (1.0263) acc 87.5000 (75.0000) lr 8.7467e-04 eta 9:43:29 +epoch [29/50] batch [45/1000] time 1.579 (1.592) data 0.000 (0.030) loss 1.0527 (1.0479) acc 78.1250 (74.3056) lr 8.7467e-04 eta 9:42:25 +epoch [29/50] batch [50/1000] time 1.554 (1.589) data 0.001 (0.027) loss 1.5000 (1.0557) acc 56.2500 (73.8125) lr 8.7467e-04 eta 9:41:24 +epoch [29/50] batch [55/1000] time 1.535 (1.586) data 0.001 (0.025) loss 1.1328 (1.0772) acc 75.0000 (73.6932) lr 8.7467e-04 eta 9:40:04 +epoch [29/50] batch [60/1000] time 1.554 (1.583) data 0.000 (0.023) loss 1.2646 (1.0875) acc 71.8750 (73.4896) lr 8.7467e-04 eta 9:39:01 +epoch [29/50] batch [65/1000] time 1.565 (1.581) data 0.000 (0.021) loss 1.0439 (1.1025) acc 75.0000 (73.0769) lr 8.7467e-04 eta 9:38:09 +epoch [29/50] batch [70/1000] time 1.587 (1.580) data 0.010 (0.020) loss 1.4043 (1.0871) acc 65.6250 (73.2143) lr 8.7467e-04 eta 9:37:33 +epoch [29/50] batch [75/1000] time 1.555 (1.579) data 0.000 (0.019) loss 1.0840 (1.0944) acc 71.8750 (73.0000) lr 8.7467e-04 eta 9:36:56 +epoch [29/50] batch [80/1000] time 1.553 (1.578) data 0.000 (0.017) loss 1.3662 (1.0977) acc 65.6250 (72.8906) lr 8.7467e-04 eta 9:36:25 +epoch [29/50] batch [85/1000] time 1.551 (1.577) data 0.001 (0.016) loss 1.6748 (1.1106) acc 65.6250 (72.3529) lr 8.7467e-04 eta 9:35:54 +epoch [29/50] batch [90/1000] time 1.555 (1.576) data 0.000 (0.016) loss 1.3076 (1.1185) acc 59.3750 (72.0486) lr 8.7467e-04 eta 9:35:35 +epoch [29/50] batch [95/1000] time 1.567 (1.576) data 0.000 (0.015) loss 1.0840 (1.1226) acc 68.7500 (72.0724) lr 8.7467e-04 eta 9:35:14 +epoch [29/50] batch [100/1000] time 1.581 (1.575) data 0.000 (0.014) loss 1.5898 (1.1230) acc 62.5000 (72.0312) lr 8.7467e-04 eta 9:35:02 +epoch [29/50] batch [105/1000] time 1.583 (1.575) data 0.001 (0.013) loss 1.1631 (1.1212) acc 71.8750 (72.0833) lr 8.7467e-04 eta 9:34:52 +epoch [29/50] batch [110/1000] time 1.561 (1.576) data 0.001 (0.013) loss 1.6230 (1.1232) acc 59.3750 (72.0170) lr 8.7467e-04 eta 9:35:02 +epoch [29/50] batch [115/1000] time 1.580 (1.576) data 0.001 (0.012) loss 0.7456 (1.1168) acc 81.2500 (72.1196) lr 8.7467e-04 eta 9:34:41 +epoch [29/50] batch [120/1000] time 1.549 (1.575) data 0.000 (0.012) loss 1.1357 (1.1166) acc 68.7500 (71.9792) lr 8.7467e-04 eta 9:34:29 +epoch [29/50] batch [125/1000] time 1.578 (1.575) data 0.001 (0.011) loss 0.5894 (1.1092) acc 84.3750 (72.2250) lr 8.7467e-04 eta 9:34:11 +epoch [29/50] batch [130/1000] time 1.578 (1.575) data 0.000 (0.011) loss 0.9209 (1.1009) acc 71.8750 (72.3798) lr 8.7467e-04 eta 9:33:56 +epoch [29/50] batch [135/1000] time 1.550 (1.574) data 0.001 (0.011) loss 1.2627 (1.1058) acc 71.8750 (72.2222) lr 8.7467e-04 eta 9:33:32 +epoch [29/50] batch [140/1000] time 1.568 (1.574) data 0.000 (0.010) loss 0.9971 (1.1098) acc 71.8750 (72.1652) lr 8.7467e-04 eta 9:33:33 +epoch [29/50] batch [145/1000] time 1.572 (1.574) data 0.000 (0.010) loss 0.9443 (1.1070) acc 78.1250 (72.2845) lr 8.7467e-04 eta 9:33:26 +epoch [29/50] batch [150/1000] time 1.558 (1.574) data 0.001 (0.010) loss 1.6143 (1.1149) acc 68.7500 (72.1042) lr 8.7467e-04 eta 9:33:16 +epoch [29/50] batch [155/1000] time 1.558 (1.575) data 0.000 (0.009) loss 0.9922 (1.1116) acc 68.7500 (72.1774) lr 8.7467e-04 eta 9:33:26 +epoch [29/50] batch [160/1000] time 1.561 (1.575) data 0.000 (0.009) loss 0.7168 (1.1045) acc 78.1250 (72.3242) lr 8.7467e-04 eta 9:33:15 +epoch [29/50] batch [165/1000] time 1.585 (1.575) data 0.001 (0.009) loss 0.9736 (1.1083) acc 81.2500 (72.2348) lr 8.7467e-04 eta 9:33:12 +epoch [29/50] batch [170/1000] time 1.564 (1.575) data 0.001 (0.008) loss 1.1748 (1.1101) acc 75.0000 (72.2243) lr 8.7467e-04 eta 9:32:58 +epoch [29/50] batch [175/1000] time 1.564 (1.574) data 0.000 (0.008) loss 0.9316 (1.1127) acc 81.2500 (72.2679) lr 8.7467e-04 eta 9:32:41 +epoch [29/50] batch [180/1000] time 1.557 (1.574) data 0.001 (0.008) loss 0.8364 (1.1143) acc 68.7500 (72.2917) lr 8.7467e-04 eta 9:32:25 +epoch [29/50] batch [185/1000] time 1.553 (1.574) data 0.000 (0.008) loss 1.4238 (1.1141) acc 56.2500 (72.2804) lr 8.7467e-04 eta 9:32:10 +epoch [29/50] batch [190/1000] time 1.552 (1.573) data 0.000 (0.008) loss 0.9375 (1.1152) acc 65.6250 (72.1711) lr 8.7467e-04 eta 9:31:50 +epoch [29/50] batch [195/1000] time 1.574 (1.573) data 0.000 (0.007) loss 1.2832 (1.1153) acc 62.5000 (72.1635) lr 8.7467e-04 eta 9:31:35 +epoch [29/50] batch [200/1000] time 1.563 (1.573) data 0.000 (0.007) loss 0.7549 (1.1146) acc 81.2500 (72.2188) lr 8.7467e-04 eta 9:31:36 +epoch [29/50] batch [205/1000] time 1.560 (1.573) data 0.001 (0.007) loss 0.9033 (1.1179) acc 75.0000 (72.1037) lr 8.7467e-04 eta 9:31:28 +epoch [29/50] batch [210/1000] time 1.559 (1.573) data 0.000 (0.007) loss 1.5068 (1.1153) acc 68.7500 (72.2024) lr 8.7467e-04 eta 9:31:10 +epoch [29/50] batch [215/1000] time 1.561 (1.573) data 0.001 (0.007) loss 1.5654 (1.1196) acc 65.6250 (72.1512) lr 8.7467e-04 eta 9:30:56 +epoch [29/50] batch [220/1000] time 1.570 (1.572) data 0.000 (0.007) loss 1.5537 (1.1220) acc 59.3750 (72.0597) lr 8.7467e-04 eta 9:30:44 +epoch [29/50] batch [225/1000] time 1.567 (1.572) data 0.000 (0.006) loss 0.8452 (1.1150) acc 78.1250 (72.2500) lr 8.7467e-04 eta 9:30:34 +epoch [29/50] batch [230/1000] time 1.572 (1.572) data 0.000 (0.006) loss 1.3066 (1.1116) acc 75.0000 (72.2690) lr 8.7467e-04 eta 9:30:22 +epoch [29/50] batch [235/1000] time 1.572 (1.572) data 0.000 (0.006) loss 1.3105 (1.1148) acc 68.7500 (72.2074) lr 8.7467e-04 eta 9:30:11 +epoch [29/50] batch [240/1000] time 1.555 (1.572) data 0.000 (0.006) loss 0.8652 (1.1090) acc 71.8750 (72.3438) lr 8.7467e-04 eta 9:30:01 +epoch [29/50] batch [245/1000] time 1.547 (1.571) data 0.000 (0.006) loss 0.8428 (1.1112) acc 81.2500 (72.2577) lr 8.7467e-04 eta 9:29:44 +epoch [29/50] batch [250/1000] time 1.600 (1.571) data 0.000 (0.006) loss 1.5156 (1.1112) acc 65.6250 (72.2500) lr 8.7467e-04 eta 9:29:29 +epoch [29/50] batch [255/1000] time 1.571 (1.571) data 0.000 (0.006) loss 2.2031 (1.1190) acc 46.8750 (72.0466) lr 8.7467e-04 eta 9:29:14 +epoch [29/50] batch [260/1000] time 1.737 (1.571) data 0.000 (0.006) loss 1.2285 (1.1196) acc 71.8750 (72.0433) lr 8.7467e-04 eta 9:29:18 +epoch [29/50] batch [265/1000] time 1.572 (1.571) data 0.000 (0.006) loss 0.8750 (1.1175) acc 75.0000 (72.0991) lr 8.7467e-04 eta 9:29:09 +epoch [29/50] batch [270/1000] time 1.564 (1.571) data 0.001 (0.005) loss 0.8140 (1.1152) acc 81.2500 (72.1528) lr 8.7467e-04 eta 9:28:57 +epoch [29/50] batch [275/1000] time 1.590 (1.571) data 0.000 (0.005) loss 0.9829 (1.1148) acc 78.1250 (72.1477) lr 8.7467e-04 eta 9:28:51 +epoch [29/50] batch [280/1000] time 1.564 (1.571) data 0.001 (0.005) loss 1.3809 (1.1169) acc 65.6250 (72.1205) lr 8.7467e-04 eta 9:28:39 +epoch [29/50] batch [285/1000] time 1.549 (1.571) data 0.001 (0.005) loss 0.5708 (1.1126) acc 81.2500 (72.1491) lr 8.7467e-04 eta 9:28:28 +epoch [29/50] batch [290/1000] time 1.581 (1.571) data 0.000 (0.005) loss 1.1250 (1.1145) acc 68.7500 (72.1444) lr 8.7467e-04 eta 9:28:18 +epoch [29/50] batch [295/1000] time 1.567 (1.570) data 0.001 (0.005) loss 1.0205 (1.1148) acc 68.7500 (72.1398) lr 8.7467e-04 eta 9:28:02 +epoch [29/50] batch [300/1000] time 1.549 (1.570) data 0.000 (0.005) loss 1.0889 (1.1174) acc 68.7500 (72.0729) lr 8.7467e-04 eta 9:27:48 +epoch [29/50] batch [305/1000] time 1.711 (1.570) data 0.001 (0.005) loss 1.2363 (1.1142) acc 68.7500 (72.1619) lr 8.7467e-04 eta 9:27:49 +epoch [29/50] batch [310/1000] time 1.542 (1.570) data 0.001 (0.005) loss 1.4404 (1.1128) acc 62.5000 (72.1472) lr 8.7467e-04 eta 9:27:32 +epoch [29/50] batch [315/1000] time 1.587 (1.570) data 0.001 (0.005) loss 1.3320 (1.1142) acc 68.7500 (72.0933) lr 8.7467e-04 eta 9:27:21 +epoch [29/50] batch [320/1000] time 1.573 (1.570) data 0.000 (0.005) loss 0.8174 (1.1143) acc 75.0000 (72.0898) lr 8.7467e-04 eta 9:27:13 +epoch [29/50] batch [325/1000] time 1.569 (1.570) data 0.000 (0.005) loss 1.1826 (1.1136) acc 78.1250 (72.1058) lr 8.7467e-04 eta 9:27:04 +epoch [29/50] batch [330/1000] time 1.542 (1.570) data 0.000 (0.005) loss 0.5469 (1.1119) acc 81.2500 (72.1117) lr 8.7467e-04 eta 9:26:56 +epoch [29/50] batch [335/1000] time 1.581 (1.570) data 0.001 (0.005) loss 0.9902 (1.1107) acc 65.6250 (72.0989) lr 8.7467e-04 eta 9:26:46 +epoch [29/50] batch [340/1000] time 1.570 (1.570) data 0.000 (0.004) loss 1.4951 (1.1118) acc 59.3750 (72.0037) lr 8.7467e-04 eta 9:26:35 +epoch [29/50] batch [345/1000] time 1.562 (1.569) data 0.000 (0.004) loss 1.2373 (1.1145) acc 59.3750 (71.9293) lr 8.7467e-04 eta 9:26:23 +epoch [29/50] batch [350/1000] time 1.549 (1.570) data 0.000 (0.004) loss 1.0811 (1.1127) acc 71.8750 (71.9554) lr 8.7467e-04 eta 9:26:21 +epoch [29/50] batch [355/1000] time 1.583 (1.569) data 0.000 (0.004) loss 0.8716 (1.1122) acc 78.1250 (72.0070) lr 8.7467e-04 eta 9:26:10 +epoch [29/50] batch [360/1000] time 1.576 (1.570) data 0.001 (0.004) loss 0.6875 (1.1057) acc 81.2500 (72.1354) lr 8.7467e-04 eta 9:26:07 +epoch [29/50] batch [365/1000] time 1.565 (1.570) data 0.001 (0.004) loss 1.0039 (1.1050) acc 75.0000 (72.1404) lr 8.7467e-04 eta 9:25:56 +epoch [29/50] batch [370/1000] time 1.576 (1.569) data 0.000 (0.004) loss 1.0840 (1.1046) acc 65.6250 (72.1030) lr 8.7467e-04 eta 9:25:44 +epoch [29/50] batch [375/1000] time 1.553 (1.569) data 0.000 (0.004) loss 1.3184 (1.1010) acc 78.1250 (72.2000) lr 8.7467e-04 eta 9:25:35 +epoch [29/50] batch [380/1000] time 1.563 (1.569) data 0.000 (0.004) loss 1.5723 (1.1051) acc 65.6250 (72.1546) lr 8.7467e-04 eta 9:25:26 +epoch [29/50] batch [385/1000] time 1.546 (1.569) data 0.000 (0.004) loss 1.1953 (1.1004) acc 78.1250 (72.2565) lr 8.7467e-04 eta 9:25:18 +epoch [29/50] batch [390/1000] time 1.583 (1.569) data 0.000 (0.004) loss 0.9727 (1.0999) acc 78.1250 (72.2997) lr 8.7467e-04 eta 9:25:08 +epoch [29/50] batch [395/1000] time 1.551 (1.569) data 0.001 (0.004) loss 1.3154 (1.0994) acc 65.6250 (72.2706) lr 8.7467e-04 eta 9:25:02 +epoch [29/50] batch [400/1000] time 1.560 (1.569) data 0.000 (0.004) loss 0.9438 (1.0993) acc 81.2500 (72.2891) lr 8.7467e-04 eta 9:24:48 +epoch [29/50] batch [405/1000] time 1.563 (1.569) data 0.001 (0.004) loss 1.5361 (1.1005) acc 71.8750 (72.2531) lr 8.7467e-04 eta 9:24:39 +epoch [29/50] batch [410/1000] time 1.597 (1.569) data 0.001 (0.004) loss 1.3975 (1.1028) acc 68.7500 (72.1646) lr 8.7467e-04 eta 9:24:32 +epoch [29/50] batch [415/1000] time 1.554 (1.569) data 0.001 (0.004) loss 1.3477 (1.1054) acc 68.7500 (72.1235) lr 8.7467e-04 eta 9:24:33 +epoch [29/50] batch [420/1000] time 1.567 (1.569) data 0.001 (0.004) loss 1.1367 (1.1067) acc 75.0000 (72.1280) lr 8.7467e-04 eta 9:24:26 +epoch [29/50] batch [425/1000] time 1.568 (1.569) data 0.001 (0.004) loss 1.0361 (1.1056) acc 65.6250 (72.1250) lr 8.7467e-04 eta 9:24:17 +epoch [29/50] batch [430/1000] time 1.566 (1.569) data 0.000 (0.004) loss 0.8062 (1.1066) acc 75.0000 (72.1512) lr 8.7467e-04 eta 9:24:09 +epoch [29/50] batch [435/1000] time 1.577 (1.569) data 0.000 (0.004) loss 1.3555 (1.1070) acc 62.5000 (72.1336) lr 8.7467e-04 eta 9:24:01 +epoch [29/50] batch [440/1000] time 1.557 (1.569) data 0.001 (0.004) loss 0.8574 (1.1061) acc 71.8750 (72.1307) lr 8.7467e-04 eta 9:23:49 +epoch [29/50] batch [445/1000] time 1.573 (1.569) data 0.001 (0.004) loss 1.5059 (1.1043) acc 53.1250 (72.0997) lr 8.7467e-04 eta 9:23:42 +epoch [29/50] batch [450/1000] time 1.555 (1.569) data 0.000 (0.003) loss 1.3350 (1.1047) acc 68.7500 (72.1181) lr 8.7467e-04 eta 9:23:32 +epoch [29/50] batch [455/1000] time 1.574 (1.569) data 0.001 (0.003) loss 1.8096 (1.1067) acc 62.5000 (72.0879) lr 8.7467e-04 eta 9:23:21 +epoch [29/50] batch [460/1000] time 1.562 (1.569) data 0.000 (0.003) loss 1.0654 (1.1068) acc 75.0000 (72.1060) lr 8.7467e-04 eta 9:23:18 +epoch [29/50] batch [465/1000] time 1.547 (1.569) data 0.000 (0.003) loss 0.9609 (1.1084) acc 84.3750 (72.1035) lr 8.7467e-04 eta 9:23:05 +epoch [29/50] batch [470/1000] time 1.572 (1.569) data 0.000 (0.003) loss 0.7646 (1.1060) acc 87.5000 (72.1543) lr 8.7467e-04 eta 9:22:55 +epoch [29/50] batch [475/1000] time 1.570 (1.569) data 0.001 (0.003) loss 0.8789 (1.1068) acc 71.8750 (72.1579) lr 8.7467e-04 eta 9:22:45 +epoch [29/50] batch [480/1000] time 1.552 (1.569) data 0.001 (0.003) loss 0.5337 (1.1060) acc 81.2500 (72.1680) lr 8.7467e-04 eta 9:22:34 +epoch [29/50] batch [485/1000] time 1.553 (1.568) data 0.000 (0.003) loss 1.0479 (1.1055) acc 75.0000 (72.1392) lr 8.7467e-04 eta 9:22:24 +epoch [29/50] batch [490/1000] time 1.539 (1.568) data 0.000 (0.003) loss 0.9375 (1.1070) acc 75.0000 (72.1365) lr 8.7467e-04 eta 9:22:14 +epoch [29/50] batch [495/1000] time 1.559 (1.568) data 0.000 (0.003) loss 1.2207 (1.1059) acc 71.8750 (72.1528) lr 8.7467e-04 eta 9:22:05 +epoch [29/50] batch [500/1000] time 1.557 (1.568) data 0.000 (0.003) loss 1.7578 (1.1074) acc 59.3750 (72.1250) lr 8.7467e-04 eta 9:22:02 +epoch [29/50] batch [505/1000] time 1.550 (1.568) data 0.000 (0.003) loss 0.8223 (1.1070) acc 75.0000 (72.1349) lr 8.7467e-04 eta 9:21:51 +epoch [29/50] batch [510/1000] time 1.555 (1.568) data 0.000 (0.003) loss 0.8286 (1.1065) acc 78.1250 (72.1507) lr 8.7467e-04 eta 9:21:40 +epoch [29/50] batch [515/1000] time 1.549 (1.568) data 0.000 (0.003) loss 1.1895 (1.1050) acc 68.7500 (72.1299) lr 8.7467e-04 eta 9:21:33 +epoch [29/50] batch [520/1000] time 1.556 (1.568) data 0.000 (0.003) loss 0.7051 (1.1015) acc 75.0000 (72.1875) lr 8.7467e-04 eta 9:21:23 +epoch [29/50] batch [525/1000] time 1.566 (1.568) data 0.000 (0.003) loss 1.2725 (1.0996) acc 65.6250 (72.2440) lr 8.7467e-04 eta 9:21:14 +epoch [29/50] batch [530/1000] time 1.574 (1.568) data 0.000 (0.003) loss 1.2451 (1.0997) acc 65.6250 (72.2347) lr 8.7467e-04 eta 9:21:02 +epoch [29/50] batch [535/1000] time 1.557 (1.568) data 0.001 (0.003) loss 1.0967 (1.1012) acc 71.8750 (72.2138) lr 8.7467e-04 eta 9:20:54 +epoch [29/50] batch [540/1000] time 1.589 (1.568) data 0.000 (0.003) loss 0.6431 (1.1006) acc 84.3750 (72.2338) lr 8.7467e-04 eta 9:20:46 +epoch [29/50] batch [545/1000] time 1.560 (1.568) data 0.000 (0.003) loss 1.0566 (1.0992) acc 68.7500 (72.2362) lr 8.7467e-04 eta 9:20:36 +epoch [29/50] batch [550/1000] time 1.573 (1.568) data 0.000 (0.003) loss 0.8213 (1.0990) acc 75.0000 (72.2784) lr 8.7467e-04 eta 9:20:28 +epoch [29/50] batch [555/1000] time 1.547 (1.568) data 0.000 (0.003) loss 0.7783 (1.1000) acc 81.2500 (72.2635) lr 8.7467e-04 eta 9:20:18 +epoch [29/50] batch [560/1000] time 1.535 (1.568) data 0.000 (0.003) loss 0.6636 (1.1008) acc 84.3750 (72.2321) lr 8.7467e-04 eta 9:20:08 +epoch [29/50] batch [565/1000] time 1.551 (1.568) data 0.000 (0.003) loss 1.0957 (1.0994) acc 75.0000 (72.2677) lr 8.7467e-04 eta 9:20:07 +epoch [29/50] batch [570/1000] time 1.559 (1.568) data 0.000 (0.003) loss 0.8501 (1.0999) acc 81.2500 (72.2917) lr 8.7467e-04 eta 9:19:59 +epoch [29/50] batch [575/1000] time 1.574 (1.568) data 0.001 (0.003) loss 0.9443 (1.1020) acc 75.0000 (72.2663) lr 8.7467e-04 eta 9:19:51 +epoch [29/50] batch [580/1000] time 1.563 (1.568) data 0.000 (0.003) loss 1.0605 (1.1021) acc 75.0000 (72.2414) lr 8.7467e-04 eta 9:19:44 +epoch [29/50] batch [585/1000] time 1.556 (1.568) data 0.000 (0.003) loss 1.3965 (1.1021) acc 50.0000 (72.1955) lr 8.7467e-04 eta 9:19:35 +epoch [29/50] batch [590/1000] time 1.568 (1.568) data 0.000 (0.003) loss 1.1924 (1.1034) acc 81.2500 (72.2034) lr 8.7467e-04 eta 9:19:26 +epoch [29/50] batch [595/1000] time 1.581 (1.568) data 0.000 (0.003) loss 1.1299 (1.1033) acc 81.2500 (72.2059) lr 8.7467e-04 eta 9:19:17 +epoch [29/50] batch [600/1000] time 1.542 (1.568) data 0.001 (0.003) loss 1.4277 (1.1021) acc 65.6250 (72.2344) lr 8.7467e-04 eta 9:19:07 +epoch [29/50] batch [605/1000] time 1.570 (1.568) data 0.000 (0.003) loss 0.6812 (1.1001) acc 75.0000 (72.2624) lr 8.7467e-04 eta 9:18:58 +epoch [29/50] batch [610/1000] time 1.554 (1.568) data 0.001 (0.003) loss 1.5166 (1.0997) acc 65.6250 (72.2951) lr 8.7467e-04 eta 9:18:55 +epoch [29/50] batch [615/1000] time 1.560 (1.568) data 0.000 (0.003) loss 1.5557 (1.0997) acc 62.5000 (72.2815) lr 8.7467e-04 eta 9:18:45 +epoch [29/50] batch [620/1000] time 1.581 (1.568) data 0.000 (0.003) loss 0.8198 (1.0996) acc 81.2500 (72.3034) lr 8.7467e-04 eta 9:18:38 +epoch [29/50] batch [625/1000] time 1.553 (1.568) data 0.000 (0.003) loss 1.3955 (1.1007) acc 68.7500 (72.2550) lr 8.7467e-04 eta 9:18:28 +epoch [29/50] batch [630/1000] time 1.575 (1.568) data 0.000 (0.003) loss 1.1348 (1.0992) acc 78.1250 (72.2917) lr 8.7467e-04 eta 9:18:21 +epoch [29/50] batch [635/1000] time 1.570 (1.568) data 0.000 (0.003) loss 1.0576 (1.0986) acc 71.8750 (72.3081) lr 8.7467e-04 eta 9:18:12 +epoch [29/50] batch [640/1000] time 1.534 (1.567) data 0.000 (0.003) loss 0.6665 (1.0974) acc 84.3750 (72.3389) lr 8.7467e-04 eta 9:17:59 +epoch [29/50] batch [645/1000] time 1.601 (1.567) data 0.000 (0.003) loss 1.5020 (1.0994) acc 68.7500 (72.3401) lr 8.7467e-04 eta 9:17:50 +epoch [29/50] batch [650/1000] time 1.725 (1.567) data 0.000 (0.003) loss 1.3496 (1.0995) acc 62.5000 (72.3317) lr 8.7467e-04 eta 9:17:45 +epoch [29/50] batch [655/1000] time 1.573 (1.567) data 0.000 (0.003) loss 1.2012 (1.0994) acc 68.7500 (72.2901) lr 8.7467e-04 eta 9:17:35 +epoch [29/50] batch [660/1000] time 1.561 (1.567) data 0.000 (0.003) loss 0.9268 (1.1010) acc 75.0000 (72.2680) lr 8.7467e-04 eta 9:17:27 +epoch [29/50] batch [665/1000] time 1.547 (1.567) data 0.000 (0.003) loss 0.6016 (1.1016) acc 84.3750 (72.2650) lr 8.7467e-04 eta 9:17:17 +epoch [29/50] batch [670/1000] time 1.541 (1.567) data 0.000 (0.002) loss 1.0664 (1.1001) acc 68.7500 (72.3041) lr 8.7467e-04 eta 9:17:08 +epoch [29/50] batch [675/1000] time 1.535 (1.567) data 0.000 (0.002) loss 1.2402 (1.0991) acc 71.8750 (72.3148) lr 8.7467e-04 eta 9:16:59 +epoch [29/50] batch [680/1000] time 1.547 (1.567) data 0.000 (0.002) loss 1.2461 (1.0997) acc 59.3750 (72.2932) lr 8.7467e-04 eta 9:16:50 +epoch [29/50] batch [685/1000] time 1.558 (1.567) data 0.000 (0.002) loss 1.2256 (1.1000) acc 68.7500 (72.2947) lr 8.7467e-04 eta 9:16:40 +epoch [29/50] batch [690/1000] time 1.559 (1.567) data 0.000 (0.002) loss 1.2383 (1.1015) acc 56.2500 (72.2645) lr 8.7467e-04 eta 9:16:31 +epoch [29/50] batch [695/1000] time 1.559 (1.567) data 0.000 (0.002) loss 1.0283 (1.1000) acc 75.0000 (72.2887) lr 8.7467e-04 eta 9:16:23 +epoch [29/50] batch [700/1000] time 1.579 (1.567) data 0.000 (0.002) loss 0.4111 (1.0991) acc 93.7500 (72.3259) lr 8.7467e-04 eta 9:16:15 +epoch [29/50] batch [705/1000] time 1.579 (1.567) data 0.001 (0.002) loss 1.4512 (1.1018) acc 59.3750 (72.2917) lr 8.7467e-04 eta 9:16:07 +epoch [29/50] batch [710/1000] time 1.582 (1.567) data 0.000 (0.002) loss 1.9346 (1.1037) acc 62.5000 (72.2711) lr 8.7467e-04 eta 9:15:58 +epoch [29/50] batch [715/1000] time 1.556 (1.567) data 0.000 (0.002) loss 0.8877 (1.1036) acc 71.8750 (72.2684) lr 8.7467e-04 eta 9:15:55 +epoch [29/50] batch [720/1000] time 1.546 (1.567) data 0.000 (0.002) loss 0.9917 (1.1037) acc 81.2500 (72.2483) lr 8.7467e-04 eta 9:15:46 +epoch [29/50] batch [725/1000] time 1.552 (1.567) data 0.000 (0.002) loss 0.8501 (1.1039) acc 84.3750 (72.2586) lr 8.7467e-04 eta 9:15:37 +epoch [29/50] batch [730/1000] time 1.580 (1.567) data 0.001 (0.002) loss 1.8271 (1.1031) acc 56.2500 (72.2817) lr 8.7467e-04 eta 9:15:30 +epoch [29/50] batch [735/1000] time 1.561 (1.567) data 0.001 (0.002) loss 1.8721 (1.1036) acc 65.6250 (72.2662) lr 8.7467e-04 eta 9:15:22 +epoch [29/50] batch [740/1000] time 1.547 (1.567) data 0.000 (0.002) loss 1.4131 (1.1049) acc 62.5000 (72.2593) lr 8.7467e-04 eta 9:15:14 +epoch [29/50] batch [745/1000] time 1.561 (1.567) data 0.001 (0.002) loss 0.7393 (1.1043) acc 75.0000 (72.2693) lr 8.7467e-04 eta 9:15:06 +epoch [29/50] batch [750/1000] time 1.568 (1.567) data 0.000 (0.002) loss 0.7051 (1.1049) acc 87.5000 (72.2542) lr 8.7467e-04 eta 9:14:57 +epoch [29/50] batch [755/1000] time 1.581 (1.567) data 0.000 (0.002) loss 0.7627 (1.1048) acc 81.2500 (72.2806) lr 8.7467e-04 eta 9:14:48 +epoch [29/50] batch [760/1000] time 1.548 (1.567) data 0.001 (0.002) loss 1.2471 (1.1048) acc 78.1250 (72.3026) lr 8.7467e-04 eta 9:14:43 +epoch [29/50] batch [765/1000] time 1.532 (1.567) data 0.000 (0.002) loss 0.8550 (1.1039) acc 75.0000 (72.2998) lr 8.7467e-04 eta 9:14:32 +epoch [29/50] batch [770/1000] time 1.555 (1.567) data 0.000 (0.002) loss 0.9922 (1.1045) acc 78.1250 (72.2930) lr 8.7467e-04 eta 9:14:22 +epoch [29/50] batch [775/1000] time 1.546 (1.567) data 0.000 (0.002) loss 1.3916 (1.1046) acc 62.5000 (72.3024) lr 8.7467e-04 eta 9:14:11 +epoch [29/50] batch [780/1000] time 1.549 (1.566) data 0.000 (0.002) loss 1.0098 (1.1051) acc 71.8750 (72.2917) lr 8.7467e-04 eta 9:14:00 +epoch [29/50] batch [785/1000] time 1.537 (1.566) data 0.000 (0.002) loss 1.1934 (1.1057) acc 71.8750 (72.2651) lr 8.7467e-04 eta 9:13:53 +epoch [29/50] batch [790/1000] time 1.559 (1.567) data 0.001 (0.002) loss 1.1006 (1.1053) acc 71.8750 (72.2785) lr 8.7467e-04 eta 9:13:45 +epoch [29/50] batch [795/1000] time 1.536 (1.566) data 0.001 (0.002) loss 1.0381 (1.1058) acc 71.8750 (72.2799) lr 8.7467e-04 eta 9:13:36 +epoch [29/50] batch [800/1000] time 1.543 (1.566) data 0.001 (0.002) loss 0.3418 (1.1054) acc 93.7500 (72.2930) lr 8.7467e-04 eta 9:13:26 +epoch [29/50] batch [805/1000] time 1.567 (1.566) data 0.000 (0.002) loss 0.8218 (1.1057) acc 84.3750 (72.2865) lr 8.7467e-04 eta 9:13:21 +epoch [29/50] batch [810/1000] time 1.594 (1.566) data 0.001 (0.002) loss 1.2734 (1.1043) acc 65.6250 (72.3225) lr 8.7467e-04 eta 9:13:12 +epoch [29/50] batch [815/1000] time 1.564 (1.566) data 0.001 (0.002) loss 1.2754 (1.1046) acc 62.5000 (72.3236) lr 8.7467e-04 eta 9:13:04 +epoch [29/50] batch [820/1000] time 1.547 (1.566) data 0.000 (0.002) loss 1.0225 (1.1036) acc 78.1250 (72.3514) lr 8.7467e-04 eta 9:12:55 +epoch [29/50] batch [825/1000] time 1.544 (1.566) data 0.000 (0.002) loss 1.1035 (1.1028) acc 75.0000 (72.3598) lr 8.7467e-04 eta 9:12:46 +epoch [29/50] batch [830/1000] time 1.550 (1.566) data 0.000 (0.002) loss 1.7666 (1.1024) acc 59.3750 (72.3758) lr 8.7467e-04 eta 9:12:37 +epoch [29/50] batch [835/1000] time 1.559 (1.566) data 0.000 (0.002) loss 1.1504 (1.1037) acc 71.8750 (72.3503) lr 8.7467e-04 eta 9:12:29 +epoch [29/50] batch [840/1000] time 1.548 (1.566) data 0.000 (0.002) loss 1.1895 (1.1041) acc 65.6250 (72.3251) lr 8.7467e-04 eta 9:12:21 +epoch [29/50] batch [845/1000] time 1.564 (1.566) data 0.001 (0.002) loss 0.7378 (1.1044) acc 81.2500 (72.3225) lr 8.7467e-04 eta 9:12:13 +epoch [29/50] batch [850/1000] time 1.566 (1.566) data 0.000 (0.002) loss 1.3350 (1.1055) acc 59.3750 (72.3088) lr 8.7467e-04 eta 9:12:04 +epoch [29/50] batch [855/1000] time 1.567 (1.566) data 0.001 (0.002) loss 1.5576 (1.1054) acc 59.3750 (72.3063) lr 8.7467e-04 eta 9:11:55 +epoch [29/50] batch [860/1000] time 1.575 (1.566) data 0.001 (0.002) loss 1.2900 (1.1059) acc 71.8750 (72.3183) lr 8.7467e-04 eta 9:11:48 +epoch [29/50] batch [865/1000] time 1.564 (1.566) data 0.000 (0.002) loss 1.1631 (1.1046) acc 71.8750 (72.3483) lr 8.7467e-04 eta 9:11:44 +epoch [29/50] batch [870/1000] time 1.569 (1.566) data 0.000 (0.002) loss 0.9087 (1.1053) acc 71.8750 (72.3384) lr 8.7467e-04 eta 9:11:36 +epoch [29/50] batch [875/1000] time 1.592 (1.566) data 0.001 (0.002) loss 0.9321 (1.1048) acc 68.7500 (72.3429) lr 8.7467e-04 eta 9:11:28 +epoch [29/50] batch [880/1000] time 1.548 (1.566) data 0.000 (0.002) loss 2.0898 (1.1050) acc 59.3750 (72.3260) lr 8.7467e-04 eta 9:11:20 +epoch [29/50] batch [885/1000] time 1.539 (1.566) data 0.001 (0.002) loss 1.7520 (1.1071) acc 68.7500 (72.2669) lr 8.7467e-04 eta 9:11:10 +epoch [29/50] batch [890/1000] time 1.555 (1.566) data 0.000 (0.002) loss 0.9233 (1.1061) acc 75.0000 (72.2963) lr 8.7467e-04 eta 9:11:01 +epoch [29/50] batch [895/1000] time 1.553 (1.566) data 0.001 (0.002) loss 1.0508 (1.1066) acc 68.7500 (72.2800) lr 8.7467e-04 eta 9:10:51 +epoch [29/50] batch [900/1000] time 1.559 (1.566) data 0.001 (0.002) loss 1.1777 (1.1060) acc 68.7500 (72.2812) lr 8.7467e-04 eta 9:10:43 +epoch [29/50] batch [905/1000] time 1.558 (1.566) data 0.000 (0.002) loss 1.0684 (1.1055) acc 75.0000 (72.2894) lr 8.7467e-04 eta 9:10:36 +epoch [29/50] batch [910/1000] time 1.567 (1.566) data 0.000 (0.002) loss 1.1357 (1.1051) acc 68.7500 (72.2905) lr 8.7467e-04 eta 9:10:32 +epoch [29/50] batch [915/1000] time 1.550 (1.566) data 0.000 (0.002) loss 0.7056 (1.1047) acc 84.3750 (72.3122) lr 8.7467e-04 eta 9:10:23 +epoch [29/50] batch [920/1000] time 1.567 (1.566) data 0.000 (0.002) loss 1.1045 (1.1048) acc 68.7500 (72.3030) lr 8.7467e-04 eta 9:10:15 +epoch [29/50] batch [925/1000] time 1.581 (1.566) data 0.000 (0.002) loss 1.0107 (1.1050) acc 68.7500 (72.3074) lr 8.7467e-04 eta 9:10:07 +epoch [29/50] batch [930/1000] time 1.586 (1.566) data 0.001 (0.002) loss 1.1572 (1.1051) acc 62.5000 (72.3051) lr 8.7467e-04 eta 9:10:00 +epoch [29/50] batch [935/1000] time 1.547 (1.566) data 0.001 (0.002) loss 0.7563 (1.1044) acc 75.0000 (72.3061) lr 8.7467e-04 eta 9:09:52 +epoch [29/50] batch [940/1000] time 1.573 (1.566) data 0.000 (0.002) loss 1.2598 (1.1047) acc 75.0000 (72.3105) lr 8.7467e-04 eta 9:09:43 +epoch [29/50] batch [945/1000] time 1.565 (1.566) data 0.000 (0.002) loss 0.9980 (1.1047) acc 81.2500 (72.3247) lr 8.7467e-04 eta 9:09:34 +epoch [29/50] batch [950/1000] time 1.540 (1.566) data 0.001 (0.002) loss 0.9814 (1.1048) acc 81.2500 (72.3355) lr 8.7467e-04 eta 9:09:25 +epoch [29/50] batch [955/1000] time 1.558 (1.566) data 0.000 (0.002) loss 1.1719 (1.1043) acc 71.8750 (72.3527) lr 8.7467e-04 eta 9:09:20 +epoch [29/50] batch [960/1000] time 1.590 (1.566) data 0.000 (0.002) loss 1.0244 (1.1045) acc 71.8750 (72.3372) lr 8.7467e-04 eta 9:09:12 +epoch [29/50] batch [965/1000] time 1.600 (1.566) data 0.000 (0.002) loss 1.0479 (1.1036) acc 71.8750 (72.3413) lr 8.7467e-04 eta 9:09:06 +epoch [29/50] batch [970/1000] time 1.572 (1.566) data 0.000 (0.002) loss 1.3936 (1.1040) acc 62.5000 (72.3164) lr 8.7467e-04 eta 9:08:58 +epoch [29/50] batch [975/1000] time 1.569 (1.566) data 0.001 (0.002) loss 1.2168 (1.1048) acc 59.3750 (72.3045) lr 8.7467e-04 eta 9:08:50 +epoch [29/50] batch [980/1000] time 1.558 (1.566) data 0.001 (0.002) loss 1.3643 (1.1047) acc 65.6250 (72.3087) lr 8.7467e-04 eta 9:08:41 +epoch [29/50] batch [985/1000] time 1.551 (1.566) data 0.001 (0.002) loss 1.1865 (1.1055) acc 65.6250 (72.2970) lr 8.7467e-04 eta 9:08:33 +epoch [29/50] batch [990/1000] time 1.586 (1.566) data 0.000 (0.002) loss 0.9321 (1.1051) acc 75.0000 (72.3106) lr 8.7467e-04 eta 9:08:25 +epoch [29/50] batch [995/1000] time 1.577 (1.566) data 0.000 (0.002) loss 1.3535 (1.1047) acc 59.3750 (72.3178) lr 8.7467e-04 eta 9:08:17 +epoch [29/50] batch [1000/1000] time 1.561 (1.566) data 0.000 (0.002) loss 1.0703 (1.1057) acc 78.1250 (72.3125) lr 8.1262e-04 eta 9:08:08 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,333 +* accuracy: 78.7% +* error: 21.3% +* macro_f1: 78.2% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [30/50] batch [5/1000] time 1.567 (1.703) data 0.000 (0.195) loss 1.0635 (0.8290) acc 87.5000 (81.2500) lr 8.1262e-04 eta 9:55:46 +epoch [30/50] batch [10/1000] time 1.588 (1.636) data 0.001 (0.098) loss 1.6396 (0.9426) acc 65.6250 (78.1250) lr 8.1262e-04 eta 9:32:25 +epoch [30/50] batch [15/1000] time 1.561 (1.612) data 0.000 (0.065) loss 1.5205 (0.9632) acc 71.8750 (77.7083) lr 8.1262e-04 eta 9:23:50 +epoch [30/50] batch [20/1000] time 1.555 (1.601) data 0.000 (0.049) loss 1.0508 (0.9953) acc 65.6250 (76.2500) lr 8.1262e-04 eta 9:19:56 +epoch [30/50] batch [25/1000] time 1.561 (1.594) data 0.000 (0.039) loss 0.8916 (1.0317) acc 75.0000 (74.3750) lr 8.1262e-04 eta 9:17:23 +epoch [30/50] batch [30/1000] time 1.561 (1.598) data 0.000 (0.033) loss 1.1885 (1.0150) acc 71.8750 (74.0625) lr 8.1262e-04 eta 9:18:34 +epoch [30/50] batch [35/1000] time 1.559 (1.593) data 0.000 (0.028) loss 1.3701 (1.0452) acc 59.3750 (73.3929) lr 8.1262e-04 eta 9:16:46 +epoch [30/50] batch [40/1000] time 1.568 (1.590) data 0.000 (0.025) loss 0.8379 (1.0400) acc 75.0000 (73.4375) lr 8.1262e-04 eta 9:15:18 +epoch [30/50] batch [45/1000] time 1.580 (1.587) data 0.000 (0.022) loss 0.8438 (1.0473) acc 71.8750 (73.1250) lr 8.1262e-04 eta 9:14:13 +epoch [30/50] batch [50/1000] time 1.566 (1.586) data 0.000 (0.020) loss 1.7412 (1.0532) acc 65.6250 (72.8750) lr 8.1262e-04 eta 9:13:37 +epoch [30/50] batch [55/1000] time 1.558 (1.584) data 0.001 (0.018) loss 1.5039 (1.0591) acc 62.5000 (72.7841) lr 8.1262e-04 eta 9:12:53 +epoch [30/50] batch [60/1000] time 1.546 (1.581) data 0.000 (0.017) loss 0.8330 (1.0489) acc 87.5000 (72.9688) lr 8.1262e-04 eta 9:11:52 +epoch [30/50] batch [65/1000] time 1.562 (1.581) data 0.001 (0.015) loss 1.3281 (1.0647) acc 75.0000 (72.7885) lr 8.1262e-04 eta 9:11:30 +epoch [30/50] batch [70/1000] time 1.716 (1.582) data 0.000 (0.014) loss 0.9141 (1.0650) acc 78.1250 (72.6786) lr 8.1262e-04 eta 9:11:41 +epoch [30/50] batch [75/1000] time 1.576 (1.581) data 0.001 (0.013) loss 1.1768 (1.0807) acc 71.8750 (72.7083) lr 8.1262e-04 eta 9:11:16 +epoch [30/50] batch [80/1000] time 1.559 (1.579) data 0.001 (0.013) loss 0.9565 (1.0722) acc 78.1250 (72.9297) lr 8.1262e-04 eta 9:10:41 +epoch [30/50] batch [85/1000] time 1.547 (1.579) data 0.001 (0.012) loss 1.0029 (1.0707) acc 75.0000 (72.9779) lr 8.1262e-04 eta 9:10:15 +epoch [30/50] batch [90/1000] time 1.559 (1.578) data 0.000 (0.011) loss 1.3857 (1.0690) acc 65.6250 (72.8472) lr 8.1262e-04 eta 9:09:46 +epoch [30/50] batch [95/1000] time 1.530 (1.576) data 0.000 (0.011) loss 0.7588 (1.0616) acc 78.1250 (72.9276) lr 8.1262e-04 eta 9:09:16 +epoch [30/50] batch [100/1000] time 1.573 (1.575) data 0.000 (0.010) loss 1.9844 (1.0685) acc 62.5000 (72.9375) lr 8.1262e-04 eta 9:08:47 +epoch [30/50] batch [105/1000] time 1.561 (1.575) data 0.000 (0.010) loss 0.6162 (1.0590) acc 84.3750 (73.0952) lr 8.1262e-04 eta 9:08:27 +epoch [30/50] batch [110/1000] time 1.564 (1.574) data 0.001 (0.009) loss 0.7646 (1.0563) acc 78.1250 (73.0682) lr 8.1262e-04 eta 9:07:56 +epoch [30/50] batch [115/1000] time 1.552 (1.573) data 0.000 (0.009) loss 0.9614 (1.0599) acc 71.8750 (72.9076) lr 8.1262e-04 eta 9:07:36 +epoch [30/50] batch [120/1000] time 1.573 (1.573) data 0.001 (0.009) loss 2.0781 (1.0735) acc 50.0000 (72.7083) lr 8.1262e-04 eta 9:07:24 +epoch [30/50] batch [125/1000] time 1.578 (1.573) data 0.001 (0.008) loss 1.2500 (1.0792) acc 75.0000 (72.6750) lr 8.1262e-04 eta 9:07:22 +epoch [30/50] batch [130/1000] time 1.560 (1.573) data 0.001 (0.008) loss 0.9355 (1.0712) acc 71.8750 (72.7163) lr 8.1262e-04 eta 9:07:05 +epoch [30/50] batch [135/1000] time 1.552 (1.574) data 0.000 (0.008) loss 0.7251 (1.0668) acc 78.1250 (72.9167) lr 8.1262e-04 eta 9:07:15 +epoch [30/50] batch [140/1000] time 1.549 (1.573) data 0.001 (0.007) loss 1.3115 (1.0804) acc 71.8750 (72.7455) lr 8.1262e-04 eta 9:06:55 +epoch [30/50] batch [145/1000] time 1.560 (1.572) data 0.000 (0.007) loss 0.9297 (1.0735) acc 65.6250 (72.7155) lr 8.1262e-04 eta 9:06:32 +epoch [30/50] batch [150/1000] time 1.564 (1.572) data 0.001 (0.007) loss 0.8857 (1.0730) acc 71.8750 (72.6667) lr 8.1262e-04 eta 9:06:17 +epoch [30/50] batch [155/1000] time 1.577 (1.572) data 0.000 (0.007) loss 0.8716 (1.0731) acc 71.8750 (72.6613) lr 8.1262e-04 eta 9:06:08 +epoch [30/50] batch [160/1000] time 1.555 (1.572) data 0.001 (0.007) loss 0.5015 (1.0721) acc 87.5000 (72.6953) lr 8.1262e-04 eta 9:05:59 +epoch [30/50] batch [165/1000] time 1.559 (1.572) data 0.000 (0.006) loss 1.3281 (1.0738) acc 68.7500 (72.5947) lr 8.1262e-04 eta 9:05:57 +epoch [30/50] batch [170/1000] time 1.565 (1.572) data 0.000 (0.006) loss 0.8521 (1.0773) acc 78.1250 (72.4816) lr 8.1262e-04 eta 9:05:41 +epoch [30/50] batch [175/1000] time 1.544 (1.571) data 0.001 (0.006) loss 1.2617 (1.0795) acc 71.8750 (72.5000) lr 8.1262e-04 eta 9:05:25 +epoch [30/50] batch [180/1000] time 1.562 (1.572) data 0.000 (0.006) loss 0.8682 (1.0763) acc 75.0000 (72.5347) lr 8.1262e-04 eta 9:05:24 +epoch [30/50] batch [185/1000] time 1.552 (1.571) data 0.000 (0.006) loss 1.3555 (1.0838) acc 68.7500 (72.4155) lr 8.1262e-04 eta 9:05:10 +epoch [30/50] batch [190/1000] time 1.543 (1.571) data 0.001 (0.006) loss 0.7920 (1.0757) acc 81.2500 (72.5822) lr 8.1262e-04 eta 9:04:53 +epoch [30/50] batch [195/1000] time 1.546 (1.571) data 0.001 (0.006) loss 1.1807 (1.0753) acc 59.3750 (72.4840) lr 8.1262e-04 eta 9:04:36 +epoch [30/50] batch [200/1000] time 1.550 (1.570) data 0.000 (0.005) loss 0.8140 (1.0831) acc 81.2500 (72.4531) lr 8.1262e-04 eta 9:04:21 +epoch [30/50] batch [205/1000] time 1.572 (1.570) data 0.001 (0.005) loss 1.3633 (1.0903) acc 59.3750 (72.3018) lr 8.1262e-04 eta 9:04:12 +epoch [30/50] batch [210/1000] time 1.553 (1.570) data 0.001 (0.005) loss 0.9429 (1.0896) acc 78.1250 (72.3363) lr 8.1262e-04 eta 9:03:58 +epoch [30/50] batch [215/1000] time 1.534 (1.570) data 0.000 (0.005) loss 1.2080 (1.0892) acc 62.5000 (72.3692) lr 8.1262e-04 eta 9:03:43 +epoch [30/50] batch [220/1000] time 1.550 (1.569) data 0.000 (0.005) loss 0.8799 (1.0900) acc 75.0000 (72.4290) lr 8.1262e-04 eta 9:03:27 +epoch [30/50] batch [225/1000] time 1.545 (1.570) data 0.001 (0.005) loss 1.1641 (1.0894) acc 75.0000 (72.4028) lr 8.1262e-04 eta 9:03:35 +epoch [30/50] batch [230/1000] time 1.550 (1.570) data 0.000 (0.005) loss 1.2363 (1.0890) acc 65.6250 (72.2826) lr 8.1262e-04 eta 9:03:21 +epoch [30/50] batch [235/1000] time 1.540 (1.569) data 0.000 (0.005) loss 1.1729 (1.0897) acc 75.0000 (72.3271) lr 8.1262e-04 eta 9:03:10 +epoch [30/50] batch [240/1000] time 1.534 (1.569) data 0.000 (0.005) loss 1.3057 (1.0935) acc 65.6250 (72.2005) lr 8.1262e-04 eta 9:02:51 +epoch [30/50] batch [245/1000] time 1.548 (1.569) data 0.000 (0.004) loss 1.3604 (1.0944) acc 71.8750 (72.1939) lr 8.1262e-04 eta 9:02:39 +epoch [30/50] batch [250/1000] time 1.559 (1.568) data 0.001 (0.004) loss 0.8970 (1.0925) acc 75.0000 (72.2500) lr 8.1262e-04 eta 9:02:25 +epoch [30/50] batch [255/1000] time 1.557 (1.568) data 0.001 (0.004) loss 1.5166 (1.0928) acc 62.5000 (72.2672) lr 8.1262e-04 eta 9:02:11 +epoch [30/50] batch [260/1000] time 1.561 (1.568) data 0.001 (0.004) loss 1.1855 (1.0971) acc 68.7500 (72.2115) lr 8.1262e-04 eta 9:02:00 +epoch [30/50] batch [265/1000] time 1.588 (1.568) data 0.000 (0.004) loss 1.6553 (1.0990) acc 59.3750 (72.1816) lr 8.1262e-04 eta 9:01:49 +epoch [30/50] batch [270/1000] time 1.553 (1.568) data 0.001 (0.004) loss 1.0898 (1.1005) acc 75.0000 (72.1991) lr 8.1262e-04 eta 9:01:39 +epoch [30/50] batch [275/1000] time 1.560 (1.568) data 0.000 (0.004) loss 0.8682 (1.1016) acc 78.1250 (72.2159) lr 8.1262e-04 eta 9:01:27 +epoch [30/50] batch [280/1000] time 1.571 (1.567) data 0.000 (0.004) loss 1.2275 (1.1063) acc 68.7500 (72.1875) lr 8.1262e-04 eta 9:01:17 +epoch [30/50] batch [285/1000] time 1.552 (1.568) data 0.000 (0.004) loss 1.2129 (1.1083) acc 75.0000 (72.0724) lr 8.1262e-04 eta 9:01:18 +epoch [30/50] batch [290/1000] time 1.541 (1.568) data 0.001 (0.004) loss 1.2041 (1.1079) acc 78.1250 (72.1228) lr 8.1262e-04 eta 9:01:03 +epoch [30/50] batch [295/1000] time 1.566 (1.567) data 0.000 (0.004) loss 1.4199 (1.1105) acc 75.0000 (72.1081) lr 8.1262e-04 eta 9:00:52 +epoch [30/50] batch [300/1000] time 1.535 (1.567) data 0.000 (0.004) loss 1.0801 (1.1109) acc 81.2500 (72.1458) lr 8.1262e-04 eta 9:00:36 +epoch [30/50] batch [305/1000] time 1.565 (1.567) data 0.001 (0.004) loss 0.9448 (1.1103) acc 71.8750 (72.1516) lr 8.1262e-04 eta 9:00:27 +epoch [30/50] batch [310/1000] time 1.548 (1.567) data 0.000 (0.004) loss 0.8184 (1.1105) acc 75.0000 (72.1774) lr 8.1262e-04 eta 9:00:17 +epoch [30/50] batch [315/1000] time 1.553 (1.567) data 0.000 (0.004) loss 0.8765 (1.1101) acc 75.0000 (72.2024) lr 8.1262e-04 eta 9:00:07 +epoch [30/50] batch [320/1000] time 1.591 (1.567) data 0.000 (0.004) loss 0.9355 (1.1120) acc 81.2500 (72.1973) lr 8.1262e-04 eta 8:59:59 +epoch [30/50] batch [325/1000] time 1.546 (1.567) data 0.000 (0.003) loss 1.0596 (1.1073) acc 68.7500 (72.2500) lr 8.1262e-04 eta 8:59:49 +epoch [30/50] batch [330/1000] time 1.549 (1.567) data 0.001 (0.003) loss 1.2246 (1.1083) acc 62.5000 (72.2064) lr 8.1262e-04 eta 8:59:50 +epoch [30/50] batch [335/1000] time 1.543 (1.567) data 0.000 (0.003) loss 0.9658 (1.1024) acc 75.0000 (72.3507) lr 8.1262e-04 eta 8:59:36 +epoch [30/50] batch [340/1000] time 1.566 (1.567) data 0.000 (0.003) loss 0.5366 (1.1029) acc 87.5000 (72.3713) lr 8.1262e-04 eta 8:59:27 +epoch [30/50] batch [345/1000] time 1.541 (1.566) data 0.000 (0.003) loss 0.8760 (1.1017) acc 78.1250 (72.4275) lr 8.1262e-04 eta 8:59:12 +epoch [30/50] batch [350/1000] time 1.582 (1.566) data 0.000 (0.003) loss 1.0703 (1.1047) acc 65.6250 (72.3571) lr 8.1262e-04 eta 8:59:03 +epoch [30/50] batch [355/1000] time 1.555 (1.566) data 0.000 (0.003) loss 1.6348 (1.1076) acc 62.5000 (72.2711) lr 8.1262e-04 eta 8:58:54 +epoch [30/50] batch [360/1000] time 1.561 (1.566) data 0.001 (0.003) loss 1.1094 (1.1069) acc 65.6250 (72.2483) lr 8.1262e-04 eta 8:58:46 +epoch [30/50] batch [365/1000] time 1.560 (1.566) data 0.001 (0.003) loss 0.7627 (1.1047) acc 84.3750 (72.2603) lr 8.1262e-04 eta 8:58:37 +epoch [30/50] batch [370/1000] time 1.571 (1.566) data 0.000 (0.003) loss 1.1553 (1.1033) acc 75.0000 (72.3057) lr 8.1262e-04 eta 8:58:29 +epoch [30/50] batch [375/1000] time 1.530 (1.566) data 0.001 (0.003) loss 0.9600 (1.1012) acc 78.1250 (72.4000) lr 8.1262e-04 eta 8:58:26 +epoch [30/50] batch [380/1000] time 1.557 (1.566) data 0.001 (0.003) loss 1.6758 (1.1006) acc 68.7500 (72.4013) lr 8.1262e-04 eta 8:58:17 +epoch [30/50] batch [385/1000] time 1.587 (1.567) data 0.000 (0.003) loss 0.7441 (1.1018) acc 84.3750 (72.3864) lr 8.1262e-04 eta 8:58:14 +epoch [30/50] batch [390/1000] time 1.568 (1.567) data 0.001 (0.003) loss 1.5361 (1.1024) acc 53.1250 (72.2837) lr 8.1262e-04 eta 8:58:05 +epoch [30/50] batch [395/1000] time 1.569 (1.566) data 0.000 (0.003) loss 1.1621 (1.1035) acc 65.6250 (72.2468) lr 8.1262e-04 eta 8:57:54 +epoch [30/50] batch [400/1000] time 1.587 (1.566) data 0.001 (0.003) loss 1.3037 (1.1034) acc 56.2500 (72.2109) lr 8.1262e-04 eta 8:57:45 +epoch [30/50] batch [405/1000] time 1.546 (1.566) data 0.001 (0.003) loss 1.2930 (1.1065) acc 68.7500 (72.1528) lr 8.1262e-04 eta 8:57:38 +epoch [30/50] batch [410/1000] time 1.570 (1.566) data 0.000 (0.003) loss 0.7085 (1.1034) acc 78.1250 (72.1951) lr 8.1262e-04 eta 8:57:30 +epoch [30/50] batch [415/1000] time 1.546 (1.566) data 0.000 (0.003) loss 0.9424 (1.1034) acc 78.1250 (72.2515) lr 8.1262e-04 eta 8:57:21 +epoch [30/50] batch [420/1000] time 1.549 (1.566) data 0.000 (0.003) loss 1.2090 (1.1045) acc 65.6250 (72.2470) lr 8.1262e-04 eta 8:57:11 +epoch [30/50] batch [425/1000] time 1.560 (1.566) data 0.000 (0.003) loss 0.5000 (1.1049) acc 84.3750 (72.2574) lr 8.1262e-04 eta 8:56:59 +epoch [30/50] batch [430/1000] time 1.562 (1.566) data 0.000 (0.003) loss 1.5156 (1.1074) acc 62.5000 (72.2166) lr 8.1262e-04 eta 8:56:51 +epoch [30/50] batch [435/1000] time 1.721 (1.566) data 0.001 (0.003) loss 1.5781 (1.1127) acc 56.2500 (72.1264) lr 8.1262e-04 eta 8:56:52 +epoch [30/50] batch [440/1000] time 1.571 (1.566) data 0.000 (0.003) loss 1.1328 (1.1123) acc 68.7500 (72.1520) lr 8.1262e-04 eta 8:56:44 +epoch [30/50] batch [445/1000] time 1.576 (1.566) data 0.000 (0.003) loss 0.5693 (1.1134) acc 84.3750 (72.0927) lr 8.1262e-04 eta 8:56:38 +epoch [30/50] batch [450/1000] time 1.582 (1.567) data 0.000 (0.003) loss 1.0703 (1.1143) acc 68.7500 (72.0417) lr 8.1262e-04 eta 8:56:33 +epoch [30/50] batch [455/1000] time 1.578 (1.567) data 0.000 (0.003) loss 1.0137 (1.1149) acc 75.0000 (72.0192) lr 8.1262e-04 eta 8:56:27 +epoch [30/50] batch [460/1000] time 1.563 (1.567) data 0.000 (0.003) loss 1.3213 (1.1154) acc 71.8750 (72.0245) lr 8.1262e-04 eta 8:56:22 +epoch [30/50] batch [465/1000] time 1.562 (1.567) data 0.001 (0.003) loss 0.8887 (1.1134) acc 78.1250 (72.0296) lr 8.1262e-04 eta 8:56:15 +epoch [30/50] batch [470/1000] time 1.556 (1.567) data 0.000 (0.003) loss 0.8774 (1.1124) acc 71.8750 (72.0412) lr 8.1262e-04 eta 8:56:04 +epoch [30/50] batch [475/1000] time 1.564 (1.567) data 0.001 (0.003) loss 1.5674 (1.1124) acc 68.7500 (72.0461) lr 8.1262e-04 eta 8:55:55 +epoch [30/50] batch [480/1000] time 1.713 (1.567) data 0.001 (0.003) loss 0.8242 (1.1089) acc 78.1250 (72.0898) lr 8.1262e-04 eta 8:55:55 +epoch [30/50] batch [485/1000] time 1.589 (1.567) data 0.000 (0.002) loss 1.3828 (1.1084) acc 59.3750 (72.0876) lr 8.1262e-04 eta 8:55:45 +epoch [30/50] batch [490/1000] time 1.559 (1.567) data 0.000 (0.002) loss 0.7339 (1.1076) acc 84.3750 (72.0982) lr 8.1262e-04 eta 8:55:37 +epoch [30/50] batch [495/1000] time 1.577 (1.567) data 0.001 (0.002) loss 1.7266 (1.1080) acc 65.6250 (72.1275) lr 8.1262e-04 eta 8:55:28 +epoch [30/50] batch [500/1000] time 1.562 (1.567) data 0.001 (0.002) loss 0.4141 (1.1044) acc 93.7500 (72.2125) lr 8.1262e-04 eta 8:55:18 +epoch [30/50] batch [505/1000] time 1.534 (1.567) data 0.000 (0.002) loss 1.5830 (1.1045) acc 65.6250 (72.2277) lr 8.1262e-04 eta 8:55:10 +epoch [30/50] batch [510/1000] time 1.578 (1.567) data 0.000 (0.002) loss 0.5732 (1.1043) acc 87.5000 (72.2794) lr 8.1262e-04 eta 8:55:01 +epoch [30/50] batch [515/1000] time 1.568 (1.566) data 0.000 (0.002) loss 1.5137 (1.1052) acc 62.5000 (72.2694) lr 8.1262e-04 eta 8:54:49 +epoch [30/50] batch [520/1000] time 1.557 (1.566) data 0.001 (0.002) loss 1.4893 (1.1063) acc 62.5000 (72.2356) lr 8.1262e-04 eta 8:54:40 +epoch [30/50] batch [525/1000] time 1.548 (1.567) data 0.000 (0.002) loss 0.7285 (1.1048) acc 75.0000 (72.2500) lr 8.1262e-04 eta 8:54:37 +epoch [30/50] batch [530/1000] time 1.550 (1.567) data 0.000 (0.002) loss 1.1436 (1.1041) acc 65.6250 (72.2465) lr 8.1262e-04 eta 8:54:27 +epoch [30/50] batch [535/1000] time 1.576 (1.566) data 0.001 (0.002) loss 0.9243 (1.1035) acc 78.1250 (72.2547) lr 8.1262e-04 eta 8:54:17 +epoch [30/50] batch [540/1000] time 1.549 (1.566) data 0.001 (0.002) loss 0.8442 (1.1007) acc 78.1250 (72.3032) lr 8.1262e-04 eta 8:54:08 +epoch [30/50] batch [545/1000] time 1.538 (1.566) data 0.001 (0.002) loss 0.9155 (1.1015) acc 71.8750 (72.2878) lr 8.1262e-04 eta 8:54:00 +epoch [30/50] batch [550/1000] time 1.574 (1.566) data 0.000 (0.002) loss 1.2490 (1.1015) acc 78.1250 (72.3295) lr 8.1262e-04 eta 8:53:53 +epoch [30/50] batch [555/1000] time 1.564 (1.567) data 0.001 (0.002) loss 1.6309 (1.1019) acc 75.0000 (72.3423) lr 8.1262e-04 eta 8:53:48 +epoch [30/50] batch [560/1000] time 1.536 (1.566) data 0.000 (0.002) loss 0.9146 (1.1014) acc 68.7500 (72.3493) lr 8.1262e-04 eta 8:53:38 +epoch [30/50] batch [565/1000] time 1.563 (1.566) data 0.001 (0.002) loss 1.1445 (1.1032) acc 75.0000 (72.3230) lr 8.1262e-04 eta 8:53:28 +epoch [30/50] batch [570/1000] time 1.545 (1.566) data 0.000 (0.002) loss 1.5625 (1.1075) acc 62.5000 (72.2423) lr 8.1262e-04 eta 8:53:19 +epoch [30/50] batch [575/1000] time 1.548 (1.566) data 0.001 (0.002) loss 1.3076 (1.1078) acc 65.6250 (72.2120) lr 8.1262e-04 eta 8:53:09 +epoch [30/50] batch [580/1000] time 1.552 (1.566) data 0.000 (0.002) loss 0.7295 (1.1064) acc 75.0000 (72.2414) lr 8.1262e-04 eta 8:52:59 +epoch [30/50] batch [585/1000] time 1.554 (1.566) data 0.000 (0.002) loss 1.1729 (1.1079) acc 75.0000 (72.2169) lr 8.1262e-04 eta 8:52:50 +epoch [30/50] batch [590/1000] time 1.551 (1.566) data 0.000 (0.002) loss 0.8931 (1.1079) acc 75.0000 (72.2193) lr 8.1262e-04 eta 8:52:45 +epoch [30/50] batch [595/1000] time 1.544 (1.566) data 0.000 (0.002) loss 1.2432 (1.1090) acc 75.0000 (72.2006) lr 8.1262e-04 eta 8:52:34 +epoch [30/50] batch [600/1000] time 1.534 (1.566) data 0.000 (0.002) loss 1.7285 (1.1083) acc 71.8750 (72.2344) lr 8.1262e-04 eta 8:52:24 +epoch [30/50] batch [605/1000] time 1.549 (1.566) data 0.001 (0.002) loss 0.7520 (1.1094) acc 71.8750 (72.2004) lr 8.1262e-04 eta 8:52:15 +epoch [30/50] batch [610/1000] time 1.530 (1.566) data 0.000 (0.002) loss 0.8262 (1.1102) acc 71.8750 (72.1773) lr 8.1262e-04 eta 8:52:03 +epoch [30/50] batch [615/1000] time 1.559 (1.566) data 0.000 (0.002) loss 1.2402 (1.1122) acc 68.7500 (72.1697) lr 8.1262e-04 eta 8:51:54 +epoch [30/50] batch [620/1000] time 1.543 (1.565) data 0.000 (0.002) loss 1.3672 (1.1131) acc 75.0000 (72.1623) lr 8.1262e-04 eta 8:51:44 +epoch [30/50] batch [625/1000] time 1.577 (1.566) data 0.000 (0.002) loss 0.8647 (1.1129) acc 75.0000 (72.1550) lr 8.1262e-04 eta 8:51:37 +epoch [30/50] batch [630/1000] time 1.577 (1.566) data 0.000 (0.002) loss 0.9492 (1.1123) acc 65.6250 (72.1726) lr 8.1262e-04 eta 8:51:30 +epoch [30/50] batch [635/1000] time 1.547 (1.566) data 0.001 (0.002) loss 0.6270 (1.1126) acc 84.3750 (72.1506) lr 8.1262e-04 eta 8:51:27 +epoch [30/50] batch [640/1000] time 1.574 (1.566) data 0.001 (0.002) loss 1.0781 (1.1128) acc 71.8750 (72.1582) lr 8.1262e-04 eta 8:51:19 +epoch [30/50] batch [645/1000] time 1.566 (1.566) data 0.001 (0.002) loss 1.7412 (1.1143) acc 53.1250 (72.1172) lr 8.1262e-04 eta 8:51:12 +epoch [30/50] batch [650/1000] time 1.573 (1.566) data 0.001 (0.002) loss 1.4756 (1.1145) acc 75.0000 (72.1346) lr 8.1262e-04 eta 8:51:03 +epoch [30/50] batch [655/1000] time 1.560 (1.566) data 0.001 (0.002) loss 1.3057 (1.1153) acc 68.7500 (72.1279) lr 8.1262e-04 eta 8:50:52 +epoch [30/50] batch [660/1000] time 1.541 (1.566) data 0.000 (0.002) loss 1.0996 (1.1158) acc 71.8750 (72.1023) lr 8.1262e-04 eta 8:50:44 +epoch [30/50] batch [665/1000] time 1.548 (1.566) data 0.000 (0.002) loss 1.1963 (1.1145) acc 78.1250 (72.1523) lr 8.1262e-04 eta 8:50:35 +epoch [30/50] batch [670/1000] time 1.557 (1.566) data 0.001 (0.002) loss 1.8652 (1.1150) acc 50.0000 (72.1595) lr 8.1262e-04 eta 8:50:27 +epoch [30/50] batch [675/1000] time 1.566 (1.566) data 0.000 (0.002) loss 1.5039 (1.1149) acc 65.6250 (72.1806) lr 8.1262e-04 eta 8:50:23 +epoch [30/50] batch [680/1000] time 1.562 (1.566) data 0.001 (0.002) loss 0.7593 (1.1140) acc 81.2500 (72.1783) lr 8.1262e-04 eta 8:50:15 +epoch [30/50] batch [685/1000] time 1.579 (1.566) data 0.001 (0.002) loss 1.1338 (1.1154) acc 75.0000 (72.1715) lr 8.1262e-04 eta 8:50:07 +epoch [30/50] batch [690/1000] time 1.597 (1.566) data 0.000 (0.002) loss 1.0195 (1.1186) acc 71.8750 (72.1467) lr 8.1262e-04 eta 8:49:59 +epoch [30/50] batch [695/1000] time 1.566 (1.566) data 0.001 (0.002) loss 0.4495 (1.1175) acc 84.3750 (72.1583) lr 8.1262e-04 eta 8:49:52 +epoch [30/50] batch [700/1000] time 1.568 (1.566) data 0.000 (0.002) loss 1.3721 (1.1171) acc 65.6250 (72.1473) lr 8.1262e-04 eta 8:49:44 +epoch [30/50] batch [705/1000] time 1.569 (1.566) data 0.000 (0.002) loss 0.8828 (1.1159) acc 78.1250 (72.1720) lr 8.1262e-04 eta 8:49:35 +epoch [30/50] batch [710/1000] time 1.536 (1.566) data 0.000 (0.002) loss 1.6338 (1.1163) acc 62.5000 (72.1743) lr 8.1262e-04 eta 8:49:25 +epoch [30/50] batch [715/1000] time 1.584 (1.566) data 0.001 (0.002) loss 0.9858 (1.1157) acc 65.6250 (72.1897) lr 8.1262e-04 eta 8:49:17 +epoch [30/50] batch [720/1000] time 1.555 (1.565) data 0.000 (0.002) loss 0.9932 (1.1144) acc 71.8750 (72.1918) lr 8.1262e-04 eta 8:49:07 +epoch [30/50] batch [725/1000] time 1.533 (1.565) data 0.001 (0.002) loss 1.3838 (1.1161) acc 59.3750 (72.1509) lr 8.1262e-04 eta 8:48:58 +epoch [30/50] batch [730/1000] time 1.578 (1.565) data 0.000 (0.002) loss 0.9736 (1.1163) acc 68.7500 (72.1447) lr 8.1262e-04 eta 8:48:51 +epoch [30/50] batch [735/1000] time 1.544 (1.565) data 0.001 (0.002) loss 1.1787 (1.1163) acc 68.7500 (72.1173) lr 8.1262e-04 eta 8:48:41 +epoch [30/50] batch [740/1000] time 1.557 (1.565) data 0.000 (0.002) loss 0.2844 (1.1154) acc 93.7500 (72.1622) lr 8.1262e-04 eta 8:48:36 +epoch [30/50] batch [745/1000] time 1.549 (1.565) data 0.000 (0.002) loss 0.7666 (1.1159) acc 87.5000 (72.1477) lr 8.1262e-04 eta 8:48:27 +epoch [30/50] batch [750/1000] time 1.558 (1.565) data 0.000 (0.002) loss 0.7563 (1.1146) acc 84.3750 (72.1833) lr 8.1262e-04 eta 8:48:18 +epoch [30/50] batch [755/1000] time 1.553 (1.565) data 0.001 (0.002) loss 1.0322 (1.1139) acc 78.1250 (72.1978) lr 8.1262e-04 eta 8:48:10 +epoch [30/50] batch [760/1000] time 1.582 (1.565) data 0.001 (0.002) loss 0.5430 (1.1147) acc 81.2500 (72.1505) lr 8.1262e-04 eta 8:48:02 +epoch [30/50] batch [765/1000] time 1.564 (1.565) data 0.000 (0.002) loss 1.3994 (1.1153) acc 75.0000 (72.1528) lr 8.1262e-04 eta 8:47:53 +epoch [30/50] batch [770/1000] time 1.580 (1.565) data 0.000 (0.002) loss 0.8794 (1.1149) acc 78.1250 (72.1510) lr 8.1262e-04 eta 8:47:45 +epoch [30/50] batch [775/1000] time 1.556 (1.565) data 0.000 (0.002) loss 1.0303 (1.1153) acc 75.0000 (72.1129) lr 8.1262e-04 eta 8:47:36 +epoch [30/50] batch [780/1000] time 1.563 (1.565) data 0.001 (0.002) loss 1.1123 (1.1163) acc 68.7500 (72.0913) lr 8.1262e-04 eta 8:47:28 +epoch [30/50] batch [785/1000] time 1.568 (1.565) data 0.000 (0.002) loss 1.4893 (1.1170) acc 62.5000 (72.0661) lr 8.1262e-04 eta 8:47:25 +epoch [30/50] batch [790/1000] time 1.545 (1.565) data 0.000 (0.002) loss 0.9150 (1.1171) acc 71.8750 (72.0728) lr 8.1262e-04 eta 8:47:16 +epoch [30/50] batch [795/1000] time 1.587 (1.565) data 0.000 (0.002) loss 1.2139 (1.1175) acc 62.5000 (72.0676) lr 8.1262e-04 eta 8:47:08 +epoch [30/50] batch [800/1000] time 1.577 (1.565) data 0.001 (0.002) loss 1.9209 (1.1188) acc 62.5000 (72.0391) lr 8.1262e-04 eta 8:47:00 +epoch [30/50] batch [805/1000] time 1.576 (1.565) data 0.000 (0.002) loss 1.0176 (1.1197) acc 71.8750 (72.0264) lr 8.1262e-04 eta 8:46:51 +epoch [30/50] batch [810/1000] time 1.558 (1.565) data 0.000 (0.002) loss 1.5830 (1.1197) acc 59.3750 (72.0409) lr 8.1262e-04 eta 8:46:41 +epoch [30/50] batch [815/1000] time 1.564 (1.565) data 0.000 (0.002) loss 1.3193 (1.1199) acc 68.7500 (72.0130) lr 8.1262e-04 eta 8:46:33 +epoch [30/50] batch [820/1000] time 1.557 (1.565) data 0.000 (0.002) loss 1.3047 (1.1214) acc 56.2500 (71.9741) lr 8.1262e-04 eta 8:46:24 +epoch [30/50] batch [825/1000] time 1.704 (1.565) data 0.000 (0.002) loss 0.8486 (1.1209) acc 75.0000 (71.9583) lr 8.1262e-04 eta 8:46:19 +epoch [30/50] batch [830/1000] time 1.556 (1.565) data 0.000 (0.002) loss 0.8315 (1.1218) acc 75.0000 (71.9465) lr 8.1262e-04 eta 8:46:10 +epoch [30/50] batch [835/1000] time 1.540 (1.565) data 0.000 (0.002) loss 1.5127 (1.1223) acc 68.7500 (71.9461) lr 8.1262e-04 eta 8:46:01 +epoch [30/50] batch [840/1000] time 1.558 (1.565) data 0.001 (0.002) loss 1.2793 (1.1224) acc 65.6250 (71.9271) lr 8.1262e-04 eta 8:45:52 +epoch [30/50] batch [845/1000] time 1.544 (1.565) data 0.000 (0.002) loss 0.7964 (1.1209) acc 84.3750 (71.9712) lr 8.1262e-04 eta 8:45:44 +epoch [30/50] batch [850/1000] time 1.547 (1.565) data 0.000 (0.002) loss 0.6353 (1.1195) acc 78.1250 (71.9926) lr 8.1262e-04 eta 8:45:36 +epoch [30/50] batch [855/1000] time 1.571 (1.565) data 0.000 (0.002) loss 1.0469 (1.1186) acc 65.6250 (71.9956) lr 8.1262e-04 eta 8:45:28 +epoch [30/50] batch [860/1000] time 1.555 (1.565) data 0.000 (0.002) loss 0.9404 (1.1183) acc 68.7500 (71.9804) lr 8.1262e-04 eta 8:45:21 +epoch [30/50] batch [865/1000] time 1.557 (1.565) data 0.000 (0.002) loss 1.0742 (1.1185) acc 75.0000 (71.9870) lr 8.1262e-04 eta 8:45:12 +epoch [30/50] batch [870/1000] time 1.556 (1.565) data 0.001 (0.002) loss 0.8896 (1.1186) acc 71.8750 (71.9648) lr 8.1262e-04 eta 8:45:03 +epoch [30/50] batch [875/1000] time 1.578 (1.565) data 0.000 (0.002) loss 0.8115 (1.1187) acc 75.0000 (71.9750) lr 8.1262e-04 eta 8:44:55 +epoch [30/50] batch [880/1000] time 1.567 (1.565) data 0.001 (0.002) loss 0.9609 (1.1181) acc 75.0000 (71.9638) lr 8.1262e-04 eta 8:44:46 +epoch [30/50] batch [885/1000] time 1.568 (1.565) data 0.000 (0.002) loss 1.2041 (1.1183) acc 75.0000 (71.9527) lr 8.1262e-04 eta 8:44:37 +epoch [30/50] batch [890/1000] time 1.592 (1.565) data 0.000 (0.002) loss 0.8584 (1.1174) acc 81.2500 (71.9698) lr 8.1262e-04 eta 8:44:33 +epoch [30/50] batch [895/1000] time 1.561 (1.565) data 0.001 (0.002) loss 0.7744 (1.1173) acc 81.2500 (71.9763) lr 8.1262e-04 eta 8:44:23 +epoch [30/50] batch [900/1000] time 1.565 (1.565) data 0.000 (0.002) loss 1.0459 (1.1169) acc 71.8750 (71.9826) lr 8.1262e-04 eta 8:44:15 +epoch [30/50] batch [905/1000] time 1.564 (1.565) data 0.001 (0.002) loss 1.4268 (1.1178) acc 56.2500 (71.9613) lr 8.1262e-04 eta 8:44:07 +epoch [30/50] batch [910/1000] time 1.547 (1.565) data 0.000 (0.002) loss 0.8569 (1.1178) acc 68.7500 (71.9334) lr 8.1262e-04 eta 8:43:57 +epoch [30/50] batch [915/1000] time 1.550 (1.565) data 0.000 (0.002) loss 1.3008 (1.1167) acc 68.7500 (71.9604) lr 8.1262e-04 eta 8:43:49 +epoch [30/50] batch [920/1000] time 1.561 (1.565) data 0.000 (0.002) loss 0.8354 (1.1165) acc 81.2500 (71.9735) lr 8.1262e-04 eta 8:43:41 +epoch [30/50] batch [925/1000] time 1.575 (1.565) data 0.001 (0.002) loss 1.3418 (1.1175) acc 65.6250 (71.9392) lr 8.1262e-04 eta 8:43:34 +epoch [30/50] batch [930/1000] time 1.534 (1.565) data 0.000 (0.002) loss 1.9355 (1.1188) acc 53.1250 (71.9153) lr 8.1262e-04 eta 8:43:25 +epoch [30/50] batch [935/1000] time 1.559 (1.565) data 0.001 (0.002) loss 0.9653 (1.1177) acc 75.0000 (71.9352) lr 8.1262e-04 eta 8:43:18 +epoch [30/50] batch [940/1000] time 1.560 (1.565) data 0.000 (0.002) loss 1.2109 (1.1176) acc 71.8750 (71.9415) lr 8.1262e-04 eta 8:43:10 +epoch [30/50] batch [945/1000] time 1.606 (1.565) data 0.000 (0.002) loss 1.0693 (1.1176) acc 75.0000 (71.9577) lr 8.1262e-04 eta 8:43:03 +epoch [30/50] batch [950/1000] time 1.561 (1.565) data 0.001 (0.002) loss 0.5894 (1.1163) acc 84.3750 (72.0000) lr 8.1262e-04 eta 8:42:54 +epoch [30/50] batch [955/1000] time 1.551 (1.565) data 0.000 (0.002) loss 1.0488 (1.1162) acc 65.6250 (72.0059) lr 8.1262e-04 eta 8:42:47 +epoch [30/50] batch [960/1000] time 1.569 (1.565) data 0.000 (0.001) loss 1.1104 (1.1169) acc 78.1250 (72.0085) lr 8.1262e-04 eta 8:42:38 +epoch [30/50] batch [965/1000] time 1.551 (1.565) data 0.000 (0.001) loss 1.1895 (1.1184) acc 75.0000 (72.0013) lr 8.1262e-04 eta 8:42:28 +epoch [30/50] batch [970/1000] time 1.556 (1.565) data 0.000 (0.001) loss 0.8682 (1.1194) acc 78.1250 (71.9684) lr 8.1262e-04 eta 8:42:19 +epoch [30/50] batch [975/1000] time 1.558 (1.565) data 0.001 (0.001) loss 0.9922 (1.1196) acc 78.1250 (71.9776) lr 8.1262e-04 eta 8:42:10 +epoch [30/50] batch [980/1000] time 1.564 (1.565) data 0.001 (0.001) loss 1.1387 (1.1204) acc 71.8750 (71.9579) lr 8.1262e-04 eta 8:42:05 +epoch [30/50] batch [985/1000] time 1.559 (1.565) data 0.001 (0.001) loss 1.3350 (1.1209) acc 65.6250 (71.9543) lr 8.1262e-04 eta 8:41:57 +epoch [30/50] batch [990/1000] time 1.580 (1.565) data 0.000 (0.001) loss 1.0059 (1.1214) acc 65.6250 (71.9571) lr 8.1262e-04 eta 8:41:49 +epoch [30/50] batch [995/1000] time 1.574 (1.565) data 0.000 (0.001) loss 0.7163 (1.1204) acc 90.6250 (71.9912) lr 8.1262e-04 eta 8:41:41 +epoch [30/50] batch [1000/1000] time 1.551 (1.565) data 0.000 (0.001) loss 0.8726 (1.1196) acc 87.5000 (72.0187) lr 7.5131e-04 eta 8:41:33 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,282 +* accuracy: 78.6% +* error: 21.4% +* macro_f1: 78.1% +epoch [31/50] batch [5/1000] time 1.563 (1.698) data 0.000 (0.194) loss 0.5386 (1.1358) acc 90.6250 (73.1250) lr 7.5131e-04 eta 9:25:52 +epoch [31/50] batch [10/1000] time 1.553 (1.627) data 0.000 (0.097) loss 0.6226 (0.9636) acc 81.2500 (75.6250) lr 7.5131e-04 eta 9:02:04 +epoch [31/50] batch [15/1000] time 1.547 (1.602) data 0.000 (0.065) loss 0.9932 (0.9703) acc 75.0000 (75.0000) lr 7.5131e-04 eta 8:53:38 +epoch [31/50] batch [20/1000] time 1.563 (1.589) data 0.001 (0.049) loss 0.6162 (0.9857) acc 78.1250 (74.3750) lr 7.5131e-04 eta 8:49:17 +epoch [31/50] batch [25/1000] time 1.551 (1.585) data 0.001 (0.039) loss 1.4766 (1.0104) acc 59.3750 (73.3750) lr 7.5131e-04 eta 8:47:48 +epoch [31/50] batch [30/1000] time 1.581 (1.584) data 0.000 (0.033) loss 0.7700 (0.9952) acc 78.1250 (73.7500) lr 7.5131e-04 eta 8:47:10 +epoch [31/50] batch [35/1000] time 1.551 (1.580) data 0.001 (0.028) loss 0.9370 (1.0059) acc 71.8750 (73.5714) lr 7.5131e-04 eta 8:45:46 +epoch [31/50] batch [40/1000] time 1.568 (1.578) data 0.000 (0.025) loss 1.1406 (1.0592) acc 68.7500 (72.4219) lr 7.5131e-04 eta 8:44:47 +epoch [31/50] batch [45/1000] time 1.773 (1.580) data 0.000 (0.022) loss 0.9536 (1.0536) acc 71.8750 (72.7083) lr 7.5131e-04 eta 8:45:25 +epoch [31/50] batch [50/1000] time 1.545 (1.579) data 0.001 (0.020) loss 1.0732 (1.0451) acc 75.0000 (73.2500) lr 7.5131e-04 eta 8:44:53 +epoch [31/50] batch [55/1000] time 1.544 (1.576) data 0.001 (0.018) loss 0.8560 (1.0406) acc 84.3750 (73.6364) lr 7.5131e-04 eta 8:43:48 +epoch [31/50] batch [60/1000] time 1.558 (1.575) data 0.001 (0.017) loss 1.8955 (1.0586) acc 59.3750 (73.5417) lr 7.5131e-04 eta 8:43:15 +epoch [31/50] batch [65/1000] time 1.580 (1.573) data 0.001 (0.015) loss 1.1162 (1.0739) acc 71.8750 (72.9808) lr 7.5131e-04 eta 8:42:31 +epoch [31/50] batch [70/1000] time 1.570 (1.573) data 0.001 (0.014) loss 0.8130 (1.0826) acc 84.3750 (73.1250) lr 7.5131e-04 eta 8:42:20 +epoch [31/50] batch [75/1000] time 1.577 (1.572) data 0.001 (0.013) loss 0.8564 (1.0784) acc 75.0000 (73.2083) lr 7.5131e-04 eta 8:42:02 +epoch [31/50] batch [80/1000] time 1.586 (1.571) data 0.000 (0.013) loss 0.9751 (1.0712) acc 75.0000 (73.4766) lr 7.5131e-04 eta 8:41:44 +epoch [31/50] batch [85/1000] time 1.555 (1.571) data 0.001 (0.012) loss 1.3555 (1.0701) acc 75.0000 (73.7132) lr 7.5131e-04 eta 8:41:27 +epoch [31/50] batch [90/1000] time 1.546 (1.572) data 0.000 (0.011) loss 1.0312 (1.0766) acc 65.6250 (73.4028) lr 7.5131e-04 eta 8:41:38 +epoch [31/50] batch [95/1000] time 1.567 (1.571) data 0.001 (0.011) loss 0.6104 (1.0788) acc 81.2500 (73.3553) lr 7.5131e-04 eta 8:41:20 +epoch [31/50] batch [100/1000] time 1.562 (1.571) data 0.001 (0.010) loss 0.9531 (1.0763) acc 75.0000 (73.3750) lr 7.5131e-04 eta 8:40:55 +epoch [31/50] batch [105/1000] time 1.564 (1.570) data 0.000 (0.010) loss 0.8506 (1.0672) acc 87.5000 (73.6607) lr 7.5131e-04 eta 8:40:39 +epoch [31/50] batch [110/1000] time 1.540 (1.569) data 0.000 (0.009) loss 1.2188 (1.0702) acc 75.0000 (73.6080) lr 7.5131e-04 eta 8:40:15 +epoch [31/50] batch [115/1000] time 1.558 (1.569) data 0.001 (0.009) loss 0.6797 (1.0732) acc 87.5000 (73.4511) lr 7.5131e-04 eta 8:40:02 +epoch [31/50] batch [120/1000] time 1.546 (1.569) data 0.000 (0.009) loss 0.9834 (1.0788) acc 78.1250 (73.3854) lr 7.5131e-04 eta 8:39:50 +epoch [31/50] batch [125/1000] time 1.569 (1.569) data 0.001 (0.008) loss 0.7490 (1.0761) acc 71.8750 (73.4750) lr 7.5131e-04 eta 8:39:43 +epoch [31/50] batch [130/1000] time 1.549 (1.568) data 0.001 (0.008) loss 1.4004 (1.0733) acc 71.8750 (73.5817) lr 7.5131e-04 eta 8:39:18 +epoch [31/50] batch [135/1000] time 1.570 (1.568) data 0.001 (0.008) loss 0.9106 (1.0750) acc 71.8750 (73.5185) lr 7.5131e-04 eta 8:39:07 +epoch [31/50] batch [140/1000] time 1.572 (1.567) data 0.001 (0.007) loss 1.0264 (1.0800) acc 75.0000 (73.3929) lr 7.5131e-04 eta 8:38:49 +epoch [31/50] batch [145/1000] time 1.559 (1.567) data 0.000 (0.007) loss 0.7432 (1.0763) acc 78.1250 (73.3836) lr 7.5131e-04 eta 8:38:38 +epoch [31/50] batch [150/1000] time 1.571 (1.569) data 0.001 (0.007) loss 1.6279 (1.0807) acc 62.5000 (73.3958) lr 7.5131e-04 eta 8:38:56 +epoch [31/50] batch [155/1000] time 1.556 (1.568) data 0.001 (0.007) loss 1.0439 (1.0820) acc 81.2500 (73.2661) lr 7.5131e-04 eta 8:38:44 +epoch [31/50] batch [160/1000] time 1.576 (1.568) data 0.001 (0.007) loss 1.9590 (1.0887) acc 56.2500 (73.1055) lr 7.5131e-04 eta 8:38:35 +epoch [31/50] batch [165/1000] time 1.567 (1.568) data 0.001 (0.006) loss 1.0635 (1.0993) acc 71.8750 (72.9167) lr 7.5131e-04 eta 8:38:22 +epoch [31/50] batch [170/1000] time 1.549 (1.568) data 0.001 (0.006) loss 1.0908 (1.0987) acc 71.8750 (72.9412) lr 7.5131e-04 eta 8:38:04 +epoch [31/50] batch [175/1000] time 1.582 (1.567) data 0.001 (0.006) loss 1.0166 (1.0963) acc 65.6250 (72.8571) lr 7.5131e-04 eta 8:37:47 +epoch [31/50] batch [180/1000] time 1.570 (1.567) data 0.000 (0.006) loss 0.6670 (1.0990) acc 87.5000 (72.9167) lr 7.5131e-04 eta 8:37:32 +epoch [31/50] batch [185/1000] time 1.565 (1.567) data 0.000 (0.006) loss 0.9639 (1.0981) acc 78.1250 (72.8885) lr 7.5131e-04 eta 8:37:20 +epoch [31/50] batch [190/1000] time 1.568 (1.567) data 0.001 (0.006) loss 0.8960 (1.0945) acc 78.1250 (72.8947) lr 7.5131e-04 eta 8:37:14 +epoch [31/50] batch [195/1000] time 1.585 (1.567) data 0.000 (0.005) loss 0.9414 (1.1029) acc 68.7500 (72.6603) lr 7.5131e-04 eta 8:37:22 +epoch [31/50] batch [200/1000] time 1.558 (1.567) data 0.000 (0.005) loss 1.0439 (1.1011) acc 75.0000 (72.7031) lr 7.5131e-04 eta 8:37:10 +epoch [31/50] batch [205/1000] time 1.565 (1.567) data 0.001 (0.005) loss 0.6089 (1.0962) acc 81.2500 (72.8201) lr 7.5131e-04 eta 8:36:56 +epoch [31/50] batch [210/1000] time 1.560 (1.567) data 0.000 (0.005) loss 1.0850 (1.1010) acc 71.8750 (72.7232) lr 7.5131e-04 eta 8:36:44 +epoch [31/50] batch [215/1000] time 1.550 (1.566) data 0.001 (0.005) loss 0.7246 (1.1028) acc 71.8750 (72.5581) lr 7.5131e-04 eta 8:36:30 +epoch [31/50] batch [220/1000] time 1.540 (1.566) data 0.001 (0.005) loss 1.1406 (1.1029) acc 71.8750 (72.5000) lr 7.5131e-04 eta 8:36:17 +epoch [31/50] batch [225/1000] time 1.542 (1.566) data 0.001 (0.005) loss 1.0957 (1.1025) acc 78.1250 (72.5000) lr 7.5131e-04 eta 8:36:04 +epoch [31/50] batch [230/1000] time 1.560 (1.566) data 0.001 (0.005) loss 0.8438 (1.1013) acc 81.2500 (72.5679) lr 7.5131e-04 eta 8:35:51 +epoch [31/50] batch [235/1000] time 1.565 (1.566) data 0.001 (0.005) loss 0.7407 (1.1012) acc 81.2500 (72.5399) lr 7.5131e-04 eta 8:35:42 +epoch [31/50] batch [240/1000] time 1.551 (1.566) data 0.001 (0.005) loss 0.8516 (1.0963) acc 75.0000 (72.6432) lr 7.5131e-04 eta 8:35:48 +epoch [31/50] batch [245/1000] time 1.564 (1.566) data 0.000 (0.004) loss 1.1895 (1.0940) acc 65.6250 (72.5893) lr 7.5131e-04 eta 8:35:36 +epoch [31/50] batch [250/1000] time 1.549 (1.566) data 0.001 (0.004) loss 0.6416 (1.0893) acc 87.5000 (72.6625) lr 7.5131e-04 eta 8:35:23 +epoch [31/50] batch [255/1000] time 1.555 (1.566) data 0.000 (0.004) loss 0.3381 (1.0879) acc 87.5000 (72.6961) lr 7.5131e-04 eta 8:35:12 +epoch [31/50] batch [260/1000] time 1.570 (1.565) data 0.001 (0.004) loss 0.8535 (1.0892) acc 81.2500 (72.7404) lr 7.5131e-04 eta 8:35:00 +epoch [31/50] batch [265/1000] time 1.571 (1.565) data 0.001 (0.004) loss 1.2246 (1.0900) acc 62.5000 (72.7712) lr 7.5131e-04 eta 8:34:49 +epoch [31/50] batch [270/1000] time 1.553 (1.565) data 0.000 (0.004) loss 0.9404 (1.0918) acc 75.0000 (72.6505) lr 7.5131e-04 eta 8:34:38 +epoch [31/50] batch [275/1000] time 1.574 (1.565) data 0.000 (0.004) loss 0.8281 (1.0928) acc 75.0000 (72.6136) lr 7.5131e-04 eta 8:34:33 +epoch [31/50] batch [280/1000] time 1.535 (1.565) data 0.001 (0.004) loss 1.2832 (1.0956) acc 65.6250 (72.5558) lr 7.5131e-04 eta 8:34:26 +epoch [31/50] batch [285/1000] time 1.568 (1.565) data 0.000 (0.004) loss 0.8271 (1.0938) acc 78.1250 (72.5548) lr 7.5131e-04 eta 8:34:12 +epoch [31/50] batch [290/1000] time 1.554 (1.565) data 0.001 (0.004) loss 1.2031 (1.0967) acc 68.7500 (72.4892) lr 7.5131e-04 eta 8:34:01 +epoch [31/50] batch [295/1000] time 1.561 (1.565) data 0.000 (0.004) loss 1.3604 (1.1012) acc 68.7500 (72.4047) lr 7.5131e-04 eta 8:33:53 +epoch [31/50] batch [300/1000] time 1.726 (1.565) data 0.001 (0.004) loss 1.1338 (1.1050) acc 62.5000 (72.3021) lr 7.5131e-04 eta 8:33:51 +epoch [31/50] batch [305/1000] time 1.588 (1.565) data 0.000 (0.004) loss 0.8003 (1.1036) acc 68.7500 (72.2951) lr 7.5131e-04 eta 8:33:43 +epoch [31/50] batch [310/1000] time 1.538 (1.565) data 0.000 (0.004) loss 1.2461 (1.1037) acc 65.6250 (72.2782) lr 7.5131e-04 eta 8:33:36 +epoch [31/50] batch [315/1000] time 1.593 (1.565) data 0.001 (0.004) loss 0.7227 (1.1020) acc 75.0000 (72.3016) lr 7.5131e-04 eta 8:33:30 +epoch [31/50] batch [320/1000] time 1.524 (1.565) data 0.000 (0.004) loss 0.9243 (1.1017) acc 75.0000 (72.2656) lr 7.5131e-04 eta 8:33:18 +epoch [31/50] batch [325/1000] time 1.566 (1.565) data 0.000 (0.003) loss 1.6504 (1.1065) acc 62.5000 (72.1058) lr 7.5131e-04 eta 8:33:06 +epoch [31/50] batch [330/1000] time 1.539 (1.565) data 0.000 (0.003) loss 0.9419 (1.1062) acc 71.8750 (72.0928) lr 7.5131e-04 eta 8:32:56 +epoch [31/50] batch [335/1000] time 1.557 (1.564) data 0.001 (0.003) loss 1.1045 (1.1040) acc 71.8750 (72.1175) lr 7.5131e-04 eta 8:32:43 +epoch [31/50] batch [340/1000] time 1.575 (1.564) data 0.000 (0.003) loss 0.7065 (1.1032) acc 81.2500 (72.1048) lr 7.5131e-04 eta 8:32:36 +epoch [31/50] batch [345/1000] time 1.742 (1.565) data 0.000 (0.003) loss 1.1240 (1.1025) acc 84.3750 (72.1377) lr 7.5131e-04 eta 8:32:39 +epoch [31/50] batch [350/1000] time 1.536 (1.565) data 0.000 (0.003) loss 1.1445 (1.1048) acc 75.0000 (72.0714) lr 7.5131e-04 eta 8:32:28 +epoch [31/50] batch [355/1000] time 1.575 (1.565) data 0.001 (0.003) loss 1.0527 (1.1038) acc 62.5000 (72.0335) lr 7.5131e-04 eta 8:32:20 +epoch [31/50] batch [360/1000] time 1.579 (1.565) data 0.000 (0.003) loss 1.2598 (1.1114) acc 68.7500 (71.9184) lr 7.5131e-04 eta 8:32:12 +epoch [31/50] batch [365/1000] time 1.564 (1.565) data 0.000 (0.003) loss 0.7368 (1.1103) acc 81.2500 (71.9435) lr 7.5131e-04 eta 8:32:04 +epoch [31/50] batch [370/1000] time 1.555 (1.565) data 0.000 (0.003) loss 0.8008 (1.1072) acc 87.5000 (72.0608) lr 7.5131e-04 eta 8:31:54 +epoch [31/50] batch [375/1000] time 1.567 (1.565) data 0.000 (0.003) loss 1.1748 (1.1052) acc 71.8750 (72.1250) lr 7.5131e-04 eta 8:31:44 +epoch [31/50] batch [380/1000] time 1.552 (1.565) data 0.000 (0.003) loss 1.8145 (1.1046) acc 53.1250 (72.1628) lr 7.5131e-04 eta 8:31:36 +epoch [31/50] batch [385/1000] time 1.569 (1.565) data 0.000 (0.003) loss 1.0020 (1.1019) acc 78.1250 (72.1997) lr 7.5131e-04 eta 8:31:30 +epoch [31/50] batch [390/1000] time 1.565 (1.565) data 0.000 (0.003) loss 1.0869 (1.1031) acc 81.2500 (72.1875) lr 7.5131e-04 eta 8:31:32 +epoch [31/50] batch [395/1000] time 1.566 (1.565) data 0.000 (0.003) loss 1.1445 (1.1043) acc 75.0000 (72.1440) lr 7.5131e-04 eta 8:31:23 +epoch [31/50] batch [400/1000] time 1.576 (1.565) data 0.000 (0.003) loss 1.1973 (1.1032) acc 65.6250 (72.1641) lr 7.5131e-04 eta 8:31:16 +epoch [31/50] batch [405/1000] time 1.591 (1.565) data 0.001 (0.003) loss 1.5586 (1.1049) acc 53.1250 (72.0988) lr 7.5131e-04 eta 8:31:09 +epoch [31/50] batch [410/1000] time 1.562 (1.565) data 0.000 (0.003) loss 1.1523 (1.1065) acc 65.6250 (72.1037) lr 7.5131e-04 eta 8:31:01 +epoch [31/50] batch [415/1000] time 1.553 (1.565) data 0.001 (0.003) loss 1.3369 (1.1069) acc 68.7500 (72.0858) lr 7.5131e-04 eta 8:30:53 +epoch [31/50] batch [420/1000] time 1.551 (1.565) data 0.001 (0.003) loss 1.1475 (1.1056) acc 78.1250 (72.0759) lr 7.5131e-04 eta 8:30:43 +epoch [31/50] batch [425/1000] time 1.571 (1.565) data 0.000 (0.003) loss 0.7939 (1.1033) acc 75.0000 (72.1029) lr 7.5131e-04 eta 8:30:36 +epoch [31/50] batch [430/1000] time 1.563 (1.565) data 0.001 (0.003) loss 1.0791 (1.1023) acc 78.1250 (72.1439) lr 7.5131e-04 eta 8:30:28 +epoch [31/50] batch [435/1000] time 1.530 (1.565) data 0.001 (0.003) loss 0.5488 (1.0998) acc 81.2500 (72.2055) lr 7.5131e-04 eta 8:30:19 +epoch [31/50] batch [440/1000] time 1.542 (1.565) data 0.000 (0.003) loss 1.3184 (1.1002) acc 75.0000 (72.1875) lr 7.5131e-04 eta 8:30:10 +epoch [31/50] batch [445/1000] time 1.576 (1.565) data 0.000 (0.003) loss 1.0850 (1.1005) acc 68.7500 (72.1419) lr 7.5131e-04 eta 8:30:02 +epoch [31/50] batch [450/1000] time 1.574 (1.565) data 0.001 (0.003) loss 0.5303 (1.0980) acc 84.3750 (72.2361) lr 7.5131e-04 eta 8:29:56 +epoch [31/50] batch [455/1000] time 1.537 (1.565) data 0.001 (0.003) loss 0.9795 (1.1000) acc 78.1250 (72.2253) lr 7.5131e-04 eta 8:29:53 +epoch [31/50] batch [460/1000] time 1.561 (1.565) data 0.001 (0.003) loss 0.8384 (1.0981) acc 81.2500 (72.2894) lr 7.5131e-04 eta 8:29:46 +epoch [31/50] batch [465/1000] time 1.540 (1.565) data 0.000 (0.003) loss 0.6221 (1.0978) acc 78.1250 (72.3118) lr 7.5131e-04 eta 8:29:38 +epoch [31/50] batch [470/1000] time 1.568 (1.565) data 0.001 (0.003) loss 1.8877 (1.1009) acc 62.5000 (72.2739) lr 7.5131e-04 eta 8:29:31 +epoch [31/50] batch [475/1000] time 1.558 (1.565) data 0.001 (0.003) loss 0.6929 (1.0993) acc 81.2500 (72.3026) lr 7.5131e-04 eta 8:29:24 +epoch [31/50] batch [480/1000] time 1.575 (1.565) data 0.001 (0.003) loss 1.0361 (1.1002) acc 75.0000 (72.2982) lr 7.5131e-04 eta 8:29:17 +epoch [31/50] batch [485/1000] time 1.560 (1.565) data 0.001 (0.003) loss 1.2773 (1.0986) acc 71.8750 (72.3647) lr 7.5131e-04 eta 8:29:10 +epoch [31/50] batch [490/1000] time 1.568 (1.565) data 0.000 (0.002) loss 1.1924 (1.1000) acc 75.0000 (72.3469) lr 7.5131e-04 eta 8:29:00 +epoch [31/50] batch [495/1000] time 1.571 (1.565) data 0.001 (0.002) loss 1.5635 (1.1005) acc 59.3750 (72.3169) lr 7.5131e-04 eta 8:28:51 +epoch [31/50] batch [500/1000] time 1.586 (1.566) data 0.001 (0.002) loss 0.9595 (1.1022) acc 71.8750 (72.2562) lr 7.5131e-04 eta 8:28:49 +epoch [31/50] batch [505/1000] time 1.554 (1.566) data 0.001 (0.002) loss 1.4043 (1.1033) acc 62.5000 (72.2339) lr 7.5131e-04 eta 8:28:41 +epoch [31/50] batch [510/1000] time 1.569 (1.566) data 0.000 (0.002) loss 1.6211 (1.1034) acc 62.5000 (72.2549) lr 7.5131e-04 eta 8:28:33 +epoch [31/50] batch [515/1000] time 1.552 (1.566) data 0.000 (0.002) loss 1.1270 (1.1050) acc 75.0000 (72.2087) lr 7.5131e-04 eta 8:28:24 +epoch [31/50] batch [520/1000] time 1.547 (1.566) data 0.000 (0.002) loss 1.2363 (1.1038) acc 68.7500 (72.2356) lr 7.5131e-04 eta 8:28:16 +epoch [31/50] batch [525/1000] time 1.577 (1.566) data 0.001 (0.002) loss 0.5513 (1.1038) acc 87.5000 (72.2619) lr 7.5131e-04 eta 8:28:08 +epoch [31/50] batch [530/1000] time 1.567 (1.566) data 0.001 (0.002) loss 1.1855 (1.1052) acc 62.5000 (72.1993) lr 7.5131e-04 eta 8:28:00 +epoch [31/50] batch [535/1000] time 1.549 (1.566) data 0.001 (0.002) loss 1.1982 (1.1055) acc 65.6250 (72.2079) lr 7.5131e-04 eta 8:27:52 +epoch [31/50] batch [540/1000] time 1.559 (1.566) data 0.001 (0.002) loss 0.9067 (1.1047) acc 81.2500 (72.2338) lr 7.5131e-04 eta 8:27:49 +epoch [31/50] batch [545/1000] time 1.549 (1.566) data 0.000 (0.002) loss 0.8105 (1.1023) acc 78.1250 (72.2821) lr 7.5131e-04 eta 8:27:43 +epoch [31/50] batch [550/1000] time 1.572 (1.566) data 0.001 (0.002) loss 1.4971 (1.1043) acc 68.7500 (72.2386) lr 7.5131e-04 eta 8:27:34 +epoch [31/50] batch [555/1000] time 1.551 (1.566) data 0.001 (0.002) loss 1.0176 (1.1040) acc 65.6250 (72.2297) lr 7.5131e-04 eta 8:27:26 +epoch [31/50] batch [560/1000] time 1.566 (1.566) data 0.001 (0.002) loss 0.8438 (1.1034) acc 71.8750 (72.2266) lr 7.5131e-04 eta 8:27:19 +epoch [31/50] batch [565/1000] time 1.558 (1.566) data 0.000 (0.002) loss 1.1221 (1.1029) acc 71.8750 (72.2400) lr 7.5131e-04 eta 8:27:09 +epoch [31/50] batch [570/1000] time 1.572 (1.566) data 0.000 (0.002) loss 1.1387 (1.1024) acc 71.8750 (72.2533) lr 7.5131e-04 eta 8:27:00 +epoch [31/50] batch [575/1000] time 1.551 (1.565) data 0.000 (0.002) loss 0.7310 (1.1010) acc 81.2500 (72.2772) lr 7.5131e-04 eta 8:26:49 +epoch [31/50] batch [580/1000] time 1.539 (1.565) data 0.001 (0.002) loss 1.7871 (1.1006) acc 71.8750 (72.3168) lr 7.5131e-04 eta 8:26:38 +epoch [31/50] batch [585/1000] time 1.561 (1.565) data 0.000 (0.002) loss 1.0664 (1.1013) acc 78.1250 (72.3024) lr 7.5131e-04 eta 8:26:29 +epoch [31/50] batch [590/1000] time 1.549 (1.565) data 0.000 (0.002) loss 1.0557 (1.1019) acc 71.8750 (72.2934) lr 7.5131e-04 eta 8:26:19 +epoch [31/50] batch [595/1000] time 1.567 (1.565) data 0.001 (0.002) loss 0.7280 (1.1021) acc 84.3750 (72.2899) lr 7.5131e-04 eta 8:26:11 +epoch [31/50] batch [600/1000] time 1.558 (1.565) data 0.001 (0.002) loss 1.1543 (1.1014) acc 71.8750 (72.3229) lr 7.5131e-04 eta 8:26:03 +epoch [31/50] batch [605/1000] time 1.588 (1.566) data 0.000 (0.002) loss 0.7959 (1.1000) acc 81.2500 (72.3244) lr 7.5131e-04 eta 8:26:03 +epoch [31/50] batch [610/1000] time 1.547 (1.566) data 0.001 (0.002) loss 1.1543 (1.0989) acc 71.8750 (72.3207) lr 7.5131e-04 eta 8:25:55 +epoch [31/50] batch [615/1000] time 1.555 (1.565) data 0.001 (0.002) loss 1.3643 (1.0989) acc 71.8750 (72.3272) lr 7.5131e-04 eta 8:25:47 +epoch [31/50] batch [620/1000] time 1.559 (1.565) data 0.001 (0.002) loss 1.1396 (1.0985) acc 68.7500 (72.3085) lr 7.5131e-04 eta 8:25:39 +epoch [31/50] batch [625/1000] time 1.538 (1.565) data 0.000 (0.002) loss 1.4805 (1.0982) acc 71.8750 (72.3050) lr 7.5131e-04 eta 8:25:28 +epoch [31/50] batch [630/1000] time 1.553 (1.565) data 0.000 (0.002) loss 1.3281 (1.0988) acc 71.8750 (72.2917) lr 7.5131e-04 eta 8:25:21 +epoch [31/50] batch [635/1000] time 1.553 (1.565) data 0.000 (0.002) loss 0.4324 (1.0996) acc 90.6250 (72.2736) lr 7.5131e-04 eta 8:25:12 +epoch [31/50] batch [640/1000] time 1.586 (1.565) data 0.001 (0.002) loss 1.9385 (1.1018) acc 65.6250 (72.2266) lr 7.5131e-04 eta 8:25:03 +epoch [31/50] batch [645/1000] time 1.573 (1.565) data 0.001 (0.002) loss 1.4287 (1.1026) acc 68.7500 (72.2141) lr 7.5131e-04 eta 8:24:53 +epoch [31/50] batch [650/1000] time 1.557 (1.565) data 0.001 (0.002) loss 1.0664 (1.1029) acc 65.6250 (72.2115) lr 7.5131e-04 eta 8:24:48 +epoch [31/50] batch [655/1000] time 1.564 (1.565) data 0.000 (0.002) loss 1.1924 (1.1038) acc 68.7500 (72.1851) lr 7.5131e-04 eta 8:24:37 +epoch [31/50] batch [660/1000] time 1.562 (1.565) data 0.001 (0.002) loss 1.2686 (1.1025) acc 68.7500 (72.2064) lr 7.5131e-04 eta 8:24:29 +epoch [31/50] batch [665/1000] time 1.542 (1.565) data 0.000 (0.002) loss 0.7046 (1.1014) acc 78.1250 (72.2321) lr 7.5131e-04 eta 8:24:21 +epoch [31/50] batch [670/1000] time 1.545 (1.565) data 0.000 (0.002) loss 1.5547 (1.1022) acc 62.5000 (72.1968) lr 7.5131e-04 eta 8:24:10 +epoch [31/50] batch [675/1000] time 1.551 (1.565) data 0.000 (0.002) loss 1.0186 (1.1021) acc 71.8750 (72.1667) lr 7.5131e-04 eta 8:24:02 +epoch [31/50] batch [680/1000] time 1.568 (1.565) data 0.001 (0.002) loss 1.4590 (1.1025) acc 65.6250 (72.1461) lr 7.5131e-04 eta 8:23:53 +epoch [31/50] batch [685/1000] time 1.573 (1.565) data 0.000 (0.002) loss 0.6499 (1.1017) acc 81.2500 (72.1259) lr 7.5131e-04 eta 8:23:45 +epoch [31/50] batch [690/1000] time 1.731 (1.565) data 0.000 (0.002) loss 1.0996 (1.1007) acc 71.8750 (72.1467) lr 7.5131e-04 eta 8:23:42 +epoch [31/50] batch [695/1000] time 1.566 (1.565) data 0.001 (0.002) loss 1.3252 (1.1004) acc 65.6250 (72.1673) lr 7.5131e-04 eta 8:23:34 +epoch [31/50] batch [700/1000] time 1.563 (1.565) data 0.000 (0.002) loss 0.7158 (1.1022) acc 84.3750 (72.1473) lr 7.5131e-04 eta 8:23:26 +epoch [31/50] batch [705/1000] time 1.543 (1.565) data 0.000 (0.002) loss 0.9097 (1.1027) acc 78.1250 (72.1365) lr 7.5131e-04 eta 8:23:19 +epoch [31/50] batch [710/1000] time 1.575 (1.565) data 0.001 (0.002) loss 1.3018 (1.1041) acc 71.8750 (72.1391) lr 7.5131e-04 eta 8:23:11 +epoch [31/50] batch [715/1000] time 1.562 (1.565) data 0.000 (0.002) loss 1.4746 (1.1047) acc 59.3750 (72.1241) lr 7.5131e-04 eta 8:23:04 +epoch [31/50] batch [720/1000] time 1.568 (1.565) data 0.000 (0.002) loss 0.8105 (1.1035) acc 81.2500 (72.1745) lr 7.5131e-04 eta 8:22:55 +epoch [31/50] batch [725/1000] time 1.554 (1.565) data 0.001 (0.002) loss 1.1367 (1.1034) acc 65.6250 (72.1595) lr 7.5131e-04 eta 8:22:47 +epoch [31/50] batch [730/1000] time 1.542 (1.565) data 0.001 (0.002) loss 0.8188 (1.1040) acc 81.2500 (72.1704) lr 7.5131e-04 eta 8:22:37 +epoch [31/50] batch [735/1000] time 1.583 (1.565) data 0.001 (0.002) loss 1.1943 (1.1028) acc 78.1250 (72.1896) lr 7.5131e-04 eta 8:22:30 +epoch [31/50] batch [740/1000] time 1.563 (1.565) data 0.000 (0.002) loss 1.1406 (1.1027) acc 68.7500 (72.1833) lr 7.5131e-04 eta 8:22:21 +epoch [31/50] batch [745/1000] time 1.557 (1.565) data 0.001 (0.002) loss 1.4854 (1.1035) acc 65.6250 (72.2106) lr 7.5131e-04 eta 8:22:14 +epoch [31/50] batch [750/1000] time 1.566 (1.565) data 0.000 (0.002) loss 0.8975 (1.1048) acc 71.8750 (72.1792) lr 7.5131e-04 eta 8:22:05 +epoch [31/50] batch [755/1000] time 1.575 (1.565) data 0.001 (0.002) loss 1.3643 (1.1080) acc 68.7500 (72.1233) lr 7.5131e-04 eta 8:22:03 +epoch [31/50] batch [760/1000] time 1.566 (1.565) data 0.001 (0.002) loss 1.0977 (1.1081) acc 71.8750 (72.1176) lr 7.5131e-04 eta 8:21:54 +epoch [31/50] batch [765/1000] time 1.568 (1.565) data 0.000 (0.002) loss 1.1094 (1.1074) acc 75.0000 (72.1324) lr 7.5131e-04 eta 8:21:48 +epoch [31/50] batch [770/1000] time 1.569 (1.565) data 0.000 (0.002) loss 1.0264 (1.1080) acc 75.0000 (72.1144) lr 7.5131e-04 eta 8:21:38 +epoch [31/50] batch [775/1000] time 1.543 (1.565) data 0.000 (0.002) loss 1.0820 (1.1090) acc 71.8750 (72.1129) lr 7.5131e-04 eta 8:21:30 +epoch [31/50] batch [780/1000] time 1.560 (1.565) data 0.001 (0.002) loss 1.4629 (1.1093) acc 62.5000 (72.0994) lr 7.5131e-04 eta 8:21:23 +epoch [31/50] batch [785/1000] time 1.552 (1.565) data 0.001 (0.002) loss 0.9053 (1.1086) acc 78.1250 (72.1019) lr 7.5131e-04 eta 8:21:14 +epoch [31/50] batch [790/1000] time 1.570 (1.565) data 0.001 (0.002) loss 1.1846 (1.1075) acc 68.7500 (72.1203) lr 7.5131e-04 eta 8:21:05 +epoch [31/50] batch [795/1000] time 1.557 (1.565) data 0.001 (0.002) loss 0.6880 (1.1068) acc 84.3750 (72.1423) lr 7.5131e-04 eta 8:20:56 +epoch [31/50] batch [800/1000] time 1.553 (1.565) data 0.000 (0.002) loss 1.1230 (1.1064) acc 71.8750 (72.1523) lr 7.5131e-04 eta 8:20:52 +epoch [31/50] batch [805/1000] time 1.582 (1.565) data 0.000 (0.002) loss 1.2051 (1.1090) acc 75.0000 (72.1390) lr 7.5131e-04 eta 8:20:45 +epoch [31/50] batch [810/1000] time 1.548 (1.565) data 0.001 (0.002) loss 1.2393 (1.1079) acc 75.0000 (72.1644) lr 7.5131e-04 eta 8:20:36 +epoch [31/50] batch [815/1000] time 1.556 (1.565) data 0.000 (0.002) loss 1.0938 (1.1077) acc 78.1250 (72.1894) lr 7.5131e-04 eta 8:20:28 +epoch [31/50] batch [820/1000] time 1.586 (1.565) data 0.000 (0.002) loss 1.4033 (1.1088) acc 71.8750 (72.1494) lr 7.5131e-04 eta 8:20:21 +epoch [31/50] batch [825/1000] time 1.596 (1.565) data 0.000 (0.002) loss 0.9204 (1.1075) acc 71.8750 (72.1667) lr 7.5131e-04 eta 8:20:14 +epoch [31/50] batch [830/1000] time 1.577 (1.565) data 0.000 (0.002) loss 0.9263 (1.1075) acc 75.0000 (72.1762) lr 7.5131e-04 eta 8:20:08 +epoch [31/50] batch [835/1000] time 1.561 (1.565) data 0.000 (0.002) loss 0.8379 (1.1067) acc 71.8750 (72.1707) lr 7.5131e-04 eta 8:19:59 +epoch [31/50] batch [840/1000] time 1.543 (1.565) data 0.001 (0.002) loss 0.9126 (1.1054) acc 68.7500 (72.1912) lr 7.5131e-04 eta 8:19:50 +epoch [31/50] batch [845/1000] time 1.596 (1.566) data 0.001 (0.002) loss 1.1279 (1.1043) acc 75.0000 (72.2189) lr 7.5131e-04 eta 8:19:47 +epoch [31/50] batch [850/1000] time 1.555 (1.566) data 0.000 (0.002) loss 1.0928 (1.1053) acc 65.6250 (72.1801) lr 7.5131e-04 eta 8:19:40 +epoch [31/50] batch [855/1000] time 1.537 (1.566) data 0.000 (0.002) loss 1.2295 (1.1056) acc 75.0000 (72.1820) lr 7.5131e-04 eta 8:19:31 +epoch [31/50] batch [860/1000] time 1.577 (1.566) data 0.001 (0.002) loss 1.1348 (1.1066) acc 78.1250 (72.1657) lr 7.5131e-04 eta 8:19:24 +epoch [31/50] batch [865/1000] time 1.548 (1.566) data 0.001 (0.002) loss 1.3574 (1.1064) acc 75.0000 (72.1532) lr 7.5131e-04 eta 8:19:15 +epoch [31/50] batch [870/1000] time 1.575 (1.566) data 0.001 (0.002) loss 1.3428 (1.1060) acc 68.7500 (72.1552) lr 7.5131e-04 eta 8:19:08 +epoch [31/50] batch [875/1000] time 1.556 (1.566) data 0.001 (0.002) loss 0.6338 (1.1047) acc 84.3750 (72.1786) lr 7.5131e-04 eta 8:19:00 +epoch [31/50] batch [880/1000] time 1.559 (1.565) data 0.000 (0.002) loss 0.9019 (1.1040) acc 75.0000 (72.2017) lr 7.5131e-04 eta 8:18:51 +epoch [31/50] batch [885/1000] time 1.581 (1.565) data 0.000 (0.002) loss 0.7563 (1.1042) acc 75.0000 (72.2034) lr 7.5131e-04 eta 8:18:43 +epoch [31/50] batch [890/1000] time 1.575 (1.565) data 0.000 (0.002) loss 0.8057 (1.1035) acc 78.1250 (72.2331) lr 7.5131e-04 eta 8:18:35 +epoch [31/50] batch [895/1000] time 1.569 (1.565) data 0.000 (0.002) loss 1.2764 (1.1039) acc 62.5000 (72.2207) lr 7.5131e-04 eta 8:18:27 +epoch [31/50] batch [900/1000] time 1.576 (1.565) data 0.000 (0.002) loss 1.0410 (1.1018) acc 78.1250 (72.2743) lr 7.5131e-04 eta 8:18:18 +epoch [31/50] batch [905/1000] time 1.560 (1.566) data 0.000 (0.002) loss 1.0361 (1.1032) acc 62.5000 (72.2445) lr 7.5131e-04 eta 8:18:13 +epoch [31/50] batch [910/1000] time 1.576 (1.566) data 0.001 (0.002) loss 0.7798 (1.1026) acc 78.1250 (72.2699) lr 7.5131e-04 eta 8:18:05 +epoch [31/50] batch [915/1000] time 1.560 (1.566) data 0.001 (0.002) loss 0.8506 (1.1024) acc 75.0000 (72.2814) lr 7.5131e-04 eta 8:17:58 +epoch [31/50] batch [920/1000] time 1.551 (1.566) data 0.001 (0.002) loss 0.8394 (1.1024) acc 71.8750 (72.2588) lr 7.5131e-04 eta 8:17:50 +epoch [31/50] batch [925/1000] time 1.525 (1.565) data 0.001 (0.002) loss 1.3096 (1.1021) acc 68.7500 (72.2703) lr 7.5131e-04 eta 8:17:41 +epoch [31/50] batch [930/1000] time 1.590 (1.565) data 0.000 (0.002) loss 1.9307 (1.1022) acc 56.2500 (72.2648) lr 7.5131e-04 eta 8:17:33 +epoch [31/50] batch [935/1000] time 1.557 (1.565) data 0.001 (0.002) loss 0.9658 (1.1013) acc 71.8750 (72.2527) lr 7.5131e-04 eta 8:17:25 +epoch [31/50] batch [940/1000] time 1.539 (1.565) data 0.001 (0.002) loss 1.1055 (1.1004) acc 71.8750 (72.2739) lr 7.5131e-04 eta 8:17:16 +epoch [31/50] batch [945/1000] time 1.565 (1.565) data 0.001 (0.002) loss 2.2617 (1.1005) acc 50.0000 (72.2718) lr 7.5131e-04 eta 8:17:08 +epoch [31/50] batch [950/1000] time 1.571 (1.566) data 0.000 (0.002) loss 0.7852 (1.0991) acc 75.0000 (72.2928) lr 7.5131e-04 eta 8:17:02 +epoch [31/50] batch [955/1000] time 1.556 (1.565) data 0.001 (0.002) loss 1.4932 (1.1006) acc 59.3750 (72.2677) lr 7.5131e-04 eta 8:16:54 +epoch [31/50] batch [960/1000] time 1.566 (1.565) data 0.001 (0.002) loss 0.8921 (1.0996) acc 68.7500 (72.2721) lr 7.5131e-04 eta 8:16:46 +epoch [31/50] batch [965/1000] time 1.563 (1.565) data 0.000 (0.002) loss 1.0146 (1.0990) acc 84.3750 (72.2960) lr 7.5131e-04 eta 8:16:38 +epoch [31/50] batch [970/1000] time 1.590 (1.566) data 0.000 (0.002) loss 1.1006 (1.1002) acc 71.8750 (72.3003) lr 7.5131e-04 eta 8:16:31 +epoch [31/50] batch [975/1000] time 1.582 (1.566) data 0.000 (0.001) loss 1.0947 (1.1002) acc 59.3750 (72.3013) lr 7.5131e-04 eta 8:16:23 +epoch [31/50] batch [980/1000] time 1.556 (1.566) data 0.000 (0.001) loss 1.2998 (1.0996) acc 62.5000 (72.3055) lr 7.5131e-04 eta 8:16:16 +epoch [31/50] batch [985/1000] time 1.543 (1.565) data 0.001 (0.001) loss 0.7173 (1.0989) acc 84.3750 (72.3319) lr 7.5131e-04 eta 8:16:07 +epoch [31/50] batch [990/1000] time 1.564 (1.565) data 0.000 (0.001) loss 1.3994 (1.0996) acc 62.5000 (72.3232) lr 7.5131e-04 eta 8:15:59 +epoch [31/50] batch [995/1000] time 1.571 (1.566) data 0.000 (0.001) loss 1.6270 (1.0996) acc 62.5000 (72.3178) lr 7.5131e-04 eta 8:15:55 +epoch [31/50] batch [1000/1000] time 1.559 (1.566) data 0.000 (0.001) loss 0.8525 (1.0995) acc 84.3750 (72.3219) lr 6.9098e-04 eta 8:15:47 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,309 +* accuracy: 78.6% +* error: 21.4% +* macro_f1: 78.2% +epoch [32/50] batch [5/1000] time 1.562 (1.698) data 0.001 (0.201) loss 1.4502 (1.2303) acc 71.8750 (68.1250) lr 6.9098e-04 eta 8:57:32 +epoch [32/50] batch [10/1000] time 1.539 (1.626) data 0.001 (0.101) loss 1.1777 (1.2021) acc 71.8750 (69.6875) lr 6.9098e-04 eta 8:34:38 +epoch [32/50] batch [15/1000] time 1.537 (1.619) data 0.001 (0.067) loss 0.5049 (1.1011) acc 81.2500 (73.1250) lr 6.9098e-04 eta 8:32:13 +epoch [32/50] batch [20/1000] time 1.555 (1.602) data 0.000 (0.051) loss 0.8457 (1.1016) acc 81.2500 (73.9062) lr 6.9098e-04 eta 8:26:41 +epoch [32/50] batch [25/1000] time 1.582 (1.593) data 0.001 (0.041) loss 1.9092 (1.1453) acc 59.3750 (72.8750) lr 6.9098e-04 eta 8:23:42 +epoch [32/50] batch [30/1000] time 1.561 (1.587) data 0.001 (0.034) loss 1.8975 (1.1535) acc 65.6250 (72.9167) lr 6.9098e-04 eta 8:21:44 +epoch [32/50] batch [35/1000] time 1.551 (1.583) data 0.001 (0.029) loss 1.2832 (1.1276) acc 68.7500 (73.5714) lr 6.9098e-04 eta 8:20:16 +epoch [32/50] batch [40/1000] time 1.565 (1.580) data 0.000 (0.026) loss 1.4111 (1.1488) acc 65.6250 (72.7344) lr 6.9098e-04 eta 8:19:14 +epoch [32/50] batch [45/1000] time 1.573 (1.579) data 0.001 (0.023) loss 0.9517 (1.1446) acc 71.8750 (72.8472) lr 6.9098e-04 eta 8:18:51 +epoch [32/50] batch [50/1000] time 1.571 (1.578) data 0.001 (0.021) loss 1.1309 (1.1273) acc 71.8750 (73.3750) lr 6.9098e-04 eta 8:18:25 +epoch [32/50] batch [55/1000] time 1.569 (1.577) data 0.001 (0.019) loss 0.6431 (1.0961) acc 84.3750 (73.9773) lr 6.9098e-04 eta 8:17:57 +epoch [32/50] batch [60/1000] time 1.546 (1.576) data 0.001 (0.017) loss 1.4102 (1.0933) acc 53.1250 (73.7500) lr 6.9098e-04 eta 8:17:22 +epoch [32/50] batch [65/1000] time 1.585 (1.576) data 0.001 (0.016) loss 0.8696 (1.0920) acc 71.8750 (73.7500) lr 6.9098e-04 eta 8:17:15 +epoch [32/50] batch [70/1000] time 1.575 (1.576) data 0.001 (0.015) loss 1.0312 (1.0942) acc 75.0000 (73.8393) lr 6.9098e-04 eta 8:17:08 +epoch [32/50] batch [75/1000] time 1.556 (1.579) data 0.000 (0.014) loss 2.3301 (1.1170) acc 53.1250 (73.4583) lr 6.9098e-04 eta 8:18:01 +epoch [32/50] batch [80/1000] time 1.548 (1.578) data 0.001 (0.013) loss 1.4512 (1.1210) acc 71.8750 (73.3984) lr 6.9098e-04 eta 8:17:43 +epoch [32/50] batch [85/1000] time 1.569 (1.578) data 0.001 (0.012) loss 0.9502 (1.1137) acc 75.0000 (73.4559) lr 6.9098e-04 eta 8:17:33 +epoch [32/50] batch [90/1000] time 1.554 (1.577) data 0.000 (0.012) loss 1.5586 (1.1183) acc 62.5000 (73.1944) lr 6.9098e-04 eta 8:17:08 +epoch [32/50] batch [95/1000] time 1.543 (1.577) data 0.000 (0.011) loss 1.4229 (1.1129) acc 65.6250 (73.2237) lr 6.9098e-04 eta 8:16:44 +epoch [32/50] batch [100/1000] time 1.582 (1.576) data 0.000 (0.011) loss 1.0566 (1.1048) acc 81.2500 (73.2188) lr 6.9098e-04 eta 8:16:17 +epoch [32/50] batch [105/1000] time 1.547 (1.575) data 0.000 (0.010) loss 0.8413 (1.0957) acc 81.2500 (73.5417) lr 6.9098e-04 eta 8:15:50 +epoch [32/50] batch [110/1000] time 1.554 (1.574) data 0.000 (0.010) loss 1.1904 (1.0889) acc 78.1250 (73.6648) lr 6.9098e-04 eta 8:15:32 +epoch [32/50] batch [115/1000] time 1.584 (1.574) data 0.001 (0.009) loss 1.4570 (1.1070) acc 62.5000 (73.2609) lr 6.9098e-04 eta 8:15:22 +epoch [32/50] batch [120/1000] time 1.594 (1.575) data 0.000 (0.009) loss 0.6538 (1.1057) acc 81.2500 (73.3594) lr 6.9098e-04 eta 8:15:41 +epoch [32/50] batch [125/1000] time 1.563 (1.575) data 0.000 (0.009) loss 0.6499 (1.1102) acc 90.6250 (73.3500) lr 6.9098e-04 eta 8:15:22 +epoch [32/50] batch [130/1000] time 1.581 (1.574) data 0.001 (0.008) loss 0.7017 (1.1117) acc 87.5000 (73.2212) lr 6.9098e-04 eta 8:15:09 +epoch [32/50] batch [135/1000] time 1.542 (1.574) data 0.000 (0.008) loss 1.2812 (1.1068) acc 81.2500 (73.3565) lr 6.9098e-04 eta 8:14:55 +epoch [32/50] batch [140/1000] time 1.580 (1.574) data 0.001 (0.008) loss 0.6050 (1.1039) acc 84.3750 (73.4821) lr 6.9098e-04 eta 8:14:43 +epoch [32/50] batch [145/1000] time 1.559 (1.573) data 0.001 (0.007) loss 0.9092 (1.1006) acc 75.0000 (73.6422) lr 6.9098e-04 eta 8:14:27 +epoch [32/50] batch [150/1000] time 1.557 (1.573) data 0.000 (0.007) loss 0.4524 (1.0944) acc 90.6250 (73.7292) lr 6.9098e-04 eta 8:14:15 +epoch [32/50] batch [155/1000] time 1.543 (1.572) data 0.000 (0.007) loss 0.9492 (1.0905) acc 71.8750 (73.7097) lr 6.9098e-04 eta 8:13:52 +epoch [32/50] batch [160/1000] time 1.578 (1.572) data 0.001 (0.007) loss 0.8179 (1.0883) acc 78.1250 (73.5938) lr 6.9098e-04 eta 8:13:42 +epoch [32/50] batch [165/1000] time 1.549 (1.573) data 0.000 (0.007) loss 1.1855 (1.0929) acc 78.1250 (73.4659) lr 6.9098e-04 eta 8:13:49 +epoch [32/50] batch [170/1000] time 1.557 (1.573) data 0.000 (0.006) loss 0.5620 (1.0927) acc 84.3750 (73.5294) lr 6.9098e-04 eta 8:13:34 +epoch [32/50] batch [175/1000] time 1.569 (1.572) data 0.000 (0.006) loss 1.2842 (1.0919) acc 65.6250 (73.5893) lr 6.9098e-04 eta 8:13:22 +epoch [32/50] batch [180/1000] time 1.559 (1.572) data 0.001 (0.006) loss 1.2480 (1.0907) acc 59.3750 (73.5243) lr 6.9098e-04 eta 8:13:08 +epoch [32/50] batch [185/1000] time 1.555 (1.572) data 0.000 (0.006) loss 1.7314 (1.0946) acc 59.3750 (73.4628) lr 6.9098e-04 eta 8:13:01 +epoch [32/50] batch [190/1000] time 1.561 (1.572) data 0.001 (0.006) loss 1.0850 (1.0950) acc 65.6250 (73.4046) lr 6.9098e-04 eta 8:12:53 +epoch [32/50] batch [195/1000] time 1.552 (1.572) data 0.001 (0.006) loss 1.0361 (1.1041) acc 65.6250 (73.1090) lr 6.9098e-04 eta 8:12:41 +epoch [32/50] batch [200/1000] time 1.560 (1.572) data 0.000 (0.006) loss 0.9814 (1.0986) acc 71.8750 (73.2031) lr 6.9098e-04 eta 8:12:27 +epoch [32/50] batch [205/1000] time 1.554 (1.571) data 0.001 (0.005) loss 1.0732 (1.1028) acc 78.1250 (73.0793) lr 6.9098e-04 eta 8:12:11 +epoch [32/50] batch [210/1000] time 1.543 (1.571) data 0.000 (0.005) loss 1.1621 (1.0991) acc 71.8750 (73.1696) lr 6.9098e-04 eta 8:12:02 +epoch [32/50] batch [215/1000] time 1.562 (1.571) data 0.000 (0.005) loss 0.9448 (1.0954) acc 71.8750 (73.2267) lr 6.9098e-04 eta 8:11:54 +epoch [32/50] batch [220/1000] time 1.565 (1.571) data 0.000 (0.005) loss 1.6162 (1.0951) acc 59.3750 (73.1250) lr 6.9098e-04 eta 8:11:44 +epoch [32/50] batch [225/1000] time 1.714 (1.571) data 0.000 (0.005) loss 0.6206 (1.0921) acc 87.5000 (73.2083) lr 6.9098e-04 eta 8:11:43 +epoch [32/50] batch [230/1000] time 1.555 (1.571) data 0.000 (0.005) loss 1.0293 (1.0954) acc 75.0000 (73.1386) lr 6.9098e-04 eta 8:11:30 +epoch [32/50] batch [235/1000] time 1.579 (1.571) data 0.000 (0.005) loss 1.0996 (1.0943) acc 78.1250 (73.1782) lr 6.9098e-04 eta 8:11:21 +epoch [32/50] batch [240/1000] time 1.558 (1.571) data 0.000 (0.005) loss 0.8271 (1.0938) acc 78.1250 (73.2161) lr 6.9098e-04 eta 8:11:12 +epoch [32/50] batch [245/1000] time 1.551 (1.571) data 0.000 (0.005) loss 1.5352 (1.0946) acc 59.3750 (73.2908) lr 6.9098e-04 eta 8:11:00 +epoch [32/50] batch [250/1000] time 1.564 (1.571) data 0.000 (0.004) loss 1.6797 (1.0989) acc 62.5000 (73.2250) lr 6.9098e-04 eta 8:10:47 +epoch [32/50] batch [255/1000] time 1.567 (1.570) data 0.000 (0.004) loss 1.1025 (1.0977) acc 65.6250 (73.2598) lr 6.9098e-04 eta 8:10:38 +epoch [32/50] batch [260/1000] time 1.557 (1.570) data 0.001 (0.004) loss 0.8994 (1.0968) acc 75.0000 (73.2212) lr 6.9098e-04 eta 8:10:27 +epoch [32/50] batch [265/1000] time 1.561 (1.570) data 0.001 (0.004) loss 1.3252 (1.1002) acc 71.8750 (73.1722) lr 6.9098e-04 eta 8:10:15 +epoch [32/50] batch [270/1000] time 1.732 (1.571) data 0.001 (0.004) loss 0.8101 (1.1046) acc 81.2500 (73.1713) lr 6.9098e-04 eta 8:10:15 +epoch [32/50] batch [275/1000] time 1.572 (1.570) data 0.000 (0.004) loss 0.9829 (1.1005) acc 81.2500 (73.2841) lr 6.9098e-04 eta 8:10:01 +epoch [32/50] batch [280/1000] time 1.571 (1.570) data 0.000 (0.004) loss 1.3896 (1.1029) acc 65.6250 (73.2366) lr 6.9098e-04 eta 8:09:52 +epoch [32/50] batch [285/1000] time 1.564 (1.570) data 0.001 (0.004) loss 0.8965 (1.1050) acc 78.1250 (73.2127) lr 6.9098e-04 eta 8:09:44 +epoch [32/50] batch [290/1000] time 1.549 (1.570) data 0.000 (0.004) loss 0.7686 (1.1032) acc 78.1250 (73.2220) lr 6.9098e-04 eta 8:09:29 +epoch [32/50] batch [295/1000] time 1.563 (1.570) data 0.001 (0.004) loss 0.9932 (1.1041) acc 71.8750 (73.2097) lr 6.9098e-04 eta 8:09:17 +epoch [32/50] batch [300/1000] time 1.541 (1.569) data 0.001 (0.004) loss 0.7793 (1.1031) acc 75.0000 (73.2292) lr 6.9098e-04 eta 8:09:07 +epoch [32/50] batch [305/1000] time 1.573 (1.569) data 0.000 (0.004) loss 0.9854 (1.1054) acc 71.8750 (73.1762) lr 6.9098e-04 eta 8:08:56 +epoch [32/50] batch [310/1000] time 1.567 (1.569) data 0.001 (0.004) loss 1.3945 (1.1059) acc 68.7500 (73.2258) lr 6.9098e-04 eta 8:08:47 +epoch [32/50] batch [315/1000] time 1.543 (1.570) data 0.000 (0.004) loss 1.7939 (1.1092) acc 56.2500 (73.1052) lr 6.9098e-04 eta 8:08:47 +epoch [32/50] batch [320/1000] time 1.548 (1.570) data 0.000 (0.004) loss 1.3330 (1.1085) acc 65.6250 (73.0566) lr 6.9098e-04 eta 8:08:40 +epoch [32/50] batch [325/1000] time 1.534 (1.569) data 0.001 (0.004) loss 0.8638 (1.1060) acc 78.1250 (73.0673) lr 6.9098e-04 eta 8:08:29 +epoch [32/50] batch [330/1000] time 1.563 (1.569) data 0.000 (0.004) loss 1.2910 (1.1074) acc 59.3750 (72.9830) lr 6.9098e-04 eta 8:08:20 +epoch [32/50] batch [335/1000] time 1.566 (1.569) data 0.001 (0.003) loss 1.2959 (1.1097) acc 68.7500 (72.9851) lr 6.9098e-04 eta 8:08:11 +epoch [32/50] batch [340/1000] time 1.556 (1.569) data 0.000 (0.003) loss 0.9507 (1.1085) acc 78.1250 (72.9871) lr 6.9098e-04 eta 8:07:59 +epoch [32/50] batch [345/1000] time 1.584 (1.569) data 0.000 (0.003) loss 1.0947 (1.1083) acc 78.1250 (72.9891) lr 6.9098e-04 eta 8:07:50 +epoch [32/50] batch [350/1000] time 1.554 (1.569) data 0.001 (0.003) loss 1.2656 (1.1090) acc 68.7500 (72.9821) lr 6.9098e-04 eta 8:07:41 +epoch [32/50] batch [355/1000] time 1.558 (1.569) data 0.001 (0.003) loss 0.7227 (1.1052) acc 81.2500 (73.0810) lr 6.9098e-04 eta 8:07:31 +epoch [32/50] batch [360/1000] time 1.541 (1.569) data 0.001 (0.003) loss 1.7197 (1.1079) acc 65.6250 (73.0556) lr 6.9098e-04 eta 8:07:23 +epoch [32/50] batch [365/1000] time 1.544 (1.569) data 0.001 (0.003) loss 0.9839 (1.1038) acc 78.1250 (73.0822) lr 6.9098e-04 eta 8:07:13 +epoch [32/50] batch [370/1000] time 1.569 (1.569) data 0.001 (0.003) loss 1.7422 (1.1055) acc 62.5000 (73.0490) lr 6.9098e-04 eta 8:07:02 +epoch [32/50] batch [375/1000] time 1.579 (1.569) data 0.001 (0.003) loss 1.4141 (1.1061) acc 65.6250 (73.0417) lr 6.9098e-04 eta 8:06:54 +epoch [32/50] batch [380/1000] time 1.571 (1.569) data 0.000 (0.003) loss 1.3252 (1.1060) acc 65.6250 (73.0181) lr 6.9098e-04 eta 8:06:54 +epoch [32/50] batch [385/1000] time 1.542 (1.569) data 0.001 (0.003) loss 0.7788 (1.1067) acc 75.0000 (72.9870) lr 6.9098e-04 eta 8:06:44 +epoch [32/50] batch [390/1000] time 1.560 (1.569) data 0.000 (0.003) loss 0.7007 (1.1031) acc 81.2500 (73.0849) lr 6.9098e-04 eta 8:06:36 +epoch [32/50] batch [395/1000] time 1.574 (1.569) data 0.001 (0.003) loss 1.1875 (1.1032) acc 68.7500 (73.0617) lr 6.9098e-04 eta 8:06:25 +epoch [32/50] batch [400/1000] time 1.570 (1.569) data 0.000 (0.003) loss 1.1846 (1.1025) acc 68.7500 (73.0859) lr 6.9098e-04 eta 8:06:14 +epoch [32/50] batch [405/1000] time 1.575 (1.569) data 0.001 (0.003) loss 1.1387 (1.0998) acc 68.7500 (73.1867) lr 6.9098e-04 eta 8:06:07 +epoch [32/50] batch [410/1000] time 1.562 (1.569) data 0.001 (0.003) loss 1.5947 (1.0985) acc 65.6250 (73.1860) lr 6.9098e-04 eta 8:05:59 +epoch [32/50] batch [415/1000] time 1.564 (1.568) data 0.000 (0.003) loss 1.0400 (1.0986) acc 78.1250 (73.2003) lr 6.9098e-04 eta 8:05:49 +epoch [32/50] batch [420/1000] time 1.571 (1.568) data 0.001 (0.003) loss 1.0459 (1.1003) acc 75.0000 (73.1994) lr 6.9098e-04 eta 8:05:39 +epoch [32/50] batch [425/1000] time 1.554 (1.569) data 0.001 (0.003) loss 1.0830 (1.1023) acc 75.0000 (73.1765) lr 6.9098e-04 eta 8:05:36 +epoch [32/50] batch [430/1000] time 1.577 (1.569) data 0.000 (0.003) loss 0.8252 (1.0993) acc 81.2500 (73.2413) lr 6.9098e-04 eta 8:05:27 +epoch [32/50] batch [435/1000] time 1.560 (1.568) data 0.000 (0.003) loss 0.8281 (1.0982) acc 78.1250 (73.2256) lr 6.9098e-04 eta 8:05:18 +epoch [32/50] batch [440/1000] time 1.578 (1.568) data 0.001 (0.003) loss 1.3838 (1.0993) acc 62.5000 (73.1676) lr 6.9098e-04 eta 8:05:10 +epoch [32/50] batch [445/1000] time 1.551 (1.568) data 0.001 (0.003) loss 0.7075 (1.0978) acc 81.2500 (73.1742) lr 6.9098e-04 eta 8:05:00 +epoch [32/50] batch [450/1000] time 1.570 (1.568) data 0.001 (0.003) loss 1.0439 (1.0966) acc 71.8750 (73.1875) lr 6.9098e-04 eta 8:04:53 +epoch [32/50] batch [455/1000] time 1.551 (1.568) data 0.000 (0.003) loss 0.6567 (1.0946) acc 75.0000 (73.2280) lr 6.9098e-04 eta 8:04:43 +epoch [32/50] batch [460/1000] time 1.563 (1.568) data 0.001 (0.003) loss 1.0410 (1.0937) acc 65.6250 (73.2337) lr 6.9098e-04 eta 8:04:33 +epoch [32/50] batch [465/1000] time 1.562 (1.568) data 0.001 (0.003) loss 1.5811 (1.0940) acc 62.5000 (73.1922) lr 6.9098e-04 eta 8:04:31 +epoch [32/50] batch [470/1000] time 1.566 (1.568) data 0.001 (0.003) loss 0.8418 (1.0938) acc 78.1250 (73.2314) lr 6.9098e-04 eta 8:04:22 +epoch [32/50] batch [475/1000] time 1.558 (1.568) data 0.000 (0.003) loss 0.8018 (1.0949) acc 71.8750 (73.1711) lr 6.9098e-04 eta 8:04:13 +epoch [32/50] batch [480/1000] time 1.567 (1.568) data 0.001 (0.003) loss 1.2246 (1.0944) acc 71.8750 (73.1641) lr 6.9098e-04 eta 8:04:05 +epoch [32/50] batch [485/1000] time 1.556 (1.568) data 0.000 (0.003) loss 0.8345 (1.0941) acc 84.3750 (73.2023) lr 6.9098e-04 eta 8:03:55 +epoch [32/50] batch [490/1000] time 1.546 (1.568) data 0.001 (0.003) loss 0.9614 (1.0942) acc 71.8750 (73.1888) lr 6.9098e-04 eta 8:03:43 +epoch [32/50] batch [495/1000] time 1.545 (1.568) data 0.001 (0.003) loss 1.1836 (1.0941) acc 75.0000 (73.1692) lr 6.9098e-04 eta 8:03:34 +epoch [32/50] batch [500/1000] time 1.571 (1.568) data 0.001 (0.002) loss 1.0566 (1.0921) acc 78.1250 (73.2313) lr 6.9098e-04 eta 8:03:26 +epoch [32/50] batch [505/1000] time 1.577 (1.568) data 0.001 (0.002) loss 0.7837 (1.0937) acc 87.5000 (73.1745) lr 6.9098e-04 eta 8:03:17 +epoch [32/50] batch [510/1000] time 1.564 (1.568) data 0.000 (0.002) loss 1.7939 (1.0957) acc 59.3750 (73.1556) lr 6.9098e-04 eta 8:03:07 +epoch [32/50] batch [515/1000] time 1.579 (1.568) data 0.000 (0.002) loss 1.1006 (1.0945) acc 78.1250 (73.2039) lr 6.9098e-04 eta 8:02:58 +epoch [32/50] batch [520/1000] time 1.580 (1.568) data 0.000 (0.002) loss 1.4170 (1.0940) acc 65.6250 (73.1731) lr 6.9098e-04 eta 8:02:49 +epoch [32/50] batch [525/1000] time 1.532 (1.567) data 0.001 (0.002) loss 0.7676 (1.0926) acc 75.0000 (73.1905) lr 6.9098e-04 eta 8:02:38 +epoch [32/50] batch [530/1000] time 1.562 (1.568) data 0.001 (0.002) loss 1.7314 (1.0942) acc 71.8750 (73.1781) lr 6.9098e-04 eta 8:02:34 +epoch [32/50] batch [535/1000] time 1.558 (1.568) data 0.000 (0.002) loss 0.4800 (1.0912) acc 87.5000 (73.2301) lr 6.9098e-04 eta 8:02:24 +epoch [32/50] batch [540/1000] time 1.554 (1.567) data 0.000 (0.002) loss 0.6719 (1.0925) acc 81.2500 (73.2002) lr 6.9098e-04 eta 8:02:15 +epoch [32/50] batch [545/1000] time 1.590 (1.567) data 0.000 (0.002) loss 0.8364 (1.0918) acc 81.2500 (73.1823) lr 6.9098e-04 eta 8:02:06 +epoch [32/50] batch [550/1000] time 1.546 (1.567) data 0.004 (0.002) loss 0.8418 (1.0920) acc 81.2500 (73.1818) lr 6.9098e-04 eta 8:01:58 +epoch [32/50] batch [555/1000] time 1.553 (1.567) data 0.000 (0.002) loss 0.7490 (1.0920) acc 75.0000 (73.1363) lr 6.9098e-04 eta 8:01:48 +epoch [32/50] batch [560/1000] time 1.554 (1.567) data 0.000 (0.002) loss 1.3584 (1.0924) acc 68.7500 (73.1306) lr 6.9098e-04 eta 8:01:39 +epoch [32/50] batch [565/1000] time 1.568 (1.567) data 0.001 (0.002) loss 0.8096 (1.0922) acc 68.7500 (73.0752) lr 6.9098e-04 eta 8:01:30 +epoch [32/50] batch [570/1000] time 1.542 (1.567) data 0.001 (0.002) loss 1.2236 (1.0933) acc 65.6250 (73.0373) lr 6.9098e-04 eta 8:01:22 +epoch [32/50] batch [575/1000] time 1.598 (1.567) data 0.000 (0.002) loss 1.0967 (1.0918) acc 65.6250 (73.0054) lr 6.9098e-04 eta 8:01:20 +epoch [32/50] batch [580/1000] time 1.568 (1.567) data 0.000 (0.002) loss 1.1562 (1.0926) acc 59.3750 (72.9795) lr 6.9098e-04 eta 8:01:12 +epoch [32/50] batch [585/1000] time 1.571 (1.567) data 0.000 (0.002) loss 0.8384 (1.0921) acc 78.1250 (72.9701) lr 6.9098e-04 eta 8:01:03 +epoch [32/50] batch [590/1000] time 1.571 (1.567) data 0.000 (0.002) loss 1.3105 (1.0928) acc 75.0000 (72.9502) lr 6.9098e-04 eta 8:00:56 +epoch [32/50] batch [595/1000] time 1.552 (1.567) data 0.000 (0.002) loss 1.1709 (1.0946) acc 78.1250 (72.9097) lr 6.9098e-04 eta 8:00:47 +epoch [32/50] batch [600/1000] time 1.567 (1.567) data 0.000 (0.002) loss 0.4033 (1.0946) acc 90.6250 (72.8490) lr 6.9098e-04 eta 8:00:39 +epoch [32/50] batch [605/1000] time 1.563 (1.567) data 0.001 (0.002) loss 1.1201 (1.0959) acc 65.6250 (72.8099) lr 6.9098e-04 eta 8:00:30 +epoch [32/50] batch [610/1000] time 1.561 (1.567) data 0.000 (0.002) loss 1.0537 (1.0955) acc 68.7500 (72.7920) lr 6.9098e-04 eta 8:00:22 +epoch [32/50] batch [615/1000] time 1.698 (1.567) data 0.000 (0.002) loss 1.1328 (1.0954) acc 75.0000 (72.7795) lr 6.9098e-04 eta 8:00:17 +epoch [32/50] batch [620/1000] time 1.549 (1.567) data 0.000 (0.002) loss 0.8125 (1.0933) acc 84.3750 (72.8075) lr 6.9098e-04 eta 8:00:07 +epoch [32/50] batch [625/1000] time 1.540 (1.567) data 0.001 (0.002) loss 1.0312 (1.0923) acc 78.1250 (72.8250) lr 6.9098e-04 eta 7:59:56 +epoch [32/50] batch [630/1000] time 1.551 (1.567) data 0.001 (0.002) loss 1.3135 (1.0923) acc 68.7500 (72.8274) lr 6.9098e-04 eta 7:59:46 +epoch [32/50] batch [635/1000] time 1.574 (1.567) data 0.001 (0.002) loss 1.2109 (1.0930) acc 68.7500 (72.8346) lr 6.9098e-04 eta 7:59:36 +epoch [32/50] batch [640/1000] time 1.572 (1.567) data 0.000 (0.002) loss 1.0059 (1.0936) acc 75.0000 (72.8174) lr 6.9098e-04 eta 7:59:29 +epoch [32/50] batch [645/1000] time 1.564 (1.567) data 0.001 (0.002) loss 0.8569 (1.0933) acc 75.0000 (72.8149) lr 6.9098e-04 eta 7:59:20 +epoch [32/50] batch [650/1000] time 1.584 (1.567) data 0.000 (0.002) loss 1.1211 (1.0949) acc 81.2500 (72.8029) lr 6.9098e-04 eta 7:59:13 +epoch [32/50] batch [655/1000] time 1.546 (1.567) data 0.001 (0.002) loss 1.2402 (1.0967) acc 65.6250 (72.7672) lr 6.9098e-04 eta 7:59:03 +epoch [32/50] batch [660/1000] time 1.549 (1.567) data 0.000 (0.002) loss 1.1738 (1.0970) acc 59.3750 (72.7225) lr 6.9098e-04 eta 7:58:55 +epoch [32/50] batch [665/1000] time 1.563 (1.567) data 0.001 (0.002) loss 1.5312 (1.0971) acc 65.6250 (72.7256) lr 6.9098e-04 eta 7:58:47 +epoch [32/50] batch [670/1000] time 1.537 (1.567) data 0.001 (0.002) loss 1.0576 (1.0975) acc 65.6250 (72.7006) lr 6.9098e-04 eta 7:58:38 +epoch [32/50] batch [675/1000] time 1.555 (1.567) data 0.000 (0.002) loss 1.2490 (1.0996) acc 65.6250 (72.6667) lr 6.9098e-04 eta 7:58:30 +epoch [32/50] batch [680/1000] time 1.556 (1.567) data 0.000 (0.002) loss 1.3770 (1.0990) acc 71.8750 (72.6700) lr 6.9098e-04 eta 7:58:26 +epoch [32/50] batch [685/1000] time 1.560 (1.567) data 0.000 (0.002) loss 0.8228 (1.0986) acc 81.2500 (72.6870) lr 6.9098e-04 eta 7:58:16 +epoch [32/50] batch [690/1000] time 1.575 (1.567) data 0.001 (0.002) loss 1.0322 (1.0990) acc 81.2500 (72.6857) lr 6.9098e-04 eta 7:58:09 +epoch [32/50] batch [695/1000] time 1.559 (1.567) data 0.001 (0.002) loss 1.3105 (1.0997) acc 65.6250 (72.6574) lr 6.9098e-04 eta 7:58:00 +epoch [32/50] batch [700/1000] time 1.574 (1.567) data 0.000 (0.002) loss 1.0332 (1.0992) acc 68.7500 (72.6562) lr 6.9098e-04 eta 7:57:52 +epoch [32/50] batch [705/1000] time 1.587 (1.567) data 0.000 (0.002) loss 1.3945 (1.0987) acc 71.8750 (72.6684) lr 6.9098e-04 eta 7:57:44 +epoch [32/50] batch [710/1000] time 1.540 (1.567) data 0.000 (0.002) loss 1.2051 (1.0974) acc 65.6250 (72.6761) lr 6.9098e-04 eta 7:57:35 +epoch [32/50] batch [715/1000] time 1.592 (1.567) data 0.001 (0.002) loss 0.9946 (1.0963) acc 75.0000 (72.6923) lr 6.9098e-04 eta 7:57:27 +epoch [32/50] batch [720/1000] time 1.578 (1.567) data 0.000 (0.002) loss 0.8105 (1.0955) acc 71.8750 (72.7127) lr 6.9098e-04 eta 7:57:19 +epoch [32/50] batch [725/1000] time 1.569 (1.567) data 0.001 (0.002) loss 0.9297 (1.0961) acc 75.0000 (72.6940) lr 6.9098e-04 eta 7:57:13 +epoch [32/50] batch [730/1000] time 1.568 (1.567) data 0.000 (0.002) loss 1.3896 (1.0961) acc 68.7500 (72.6926) lr 6.9098e-04 eta 7:57:04 +epoch [32/50] batch [735/1000] time 1.562 (1.567) data 0.001 (0.002) loss 1.3037 (1.0969) acc 65.6250 (72.6786) lr 6.9098e-04 eta 7:56:56 +epoch [32/50] batch [740/1000] time 1.570 (1.567) data 0.001 (0.002) loss 1.1328 (1.0982) acc 78.1250 (72.6436) lr 6.9098e-04 eta 7:56:47 +epoch [32/50] batch [745/1000] time 1.547 (1.567) data 0.001 (0.002) loss 1.3037 (1.0974) acc 75.0000 (72.6720) lr 6.9098e-04 eta 7:56:40 +epoch [32/50] batch [750/1000] time 1.596 (1.567) data 0.000 (0.002) loss 0.8027 (1.0979) acc 84.3750 (72.6750) lr 6.9098e-04 eta 7:56:32 +epoch [32/50] batch [755/1000] time 1.572 (1.567) data 0.000 (0.002) loss 0.5630 (1.0958) acc 81.2500 (72.7194) lr 6.9098e-04 eta 7:56:22 +epoch [32/50] batch [760/1000] time 1.573 (1.567) data 0.000 (0.002) loss 1.0312 (1.0966) acc 71.8750 (72.7179) lr 6.9098e-04 eta 7:56:14 +epoch [32/50] batch [765/1000] time 1.556 (1.567) data 0.000 (0.002) loss 1.4883 (1.0987) acc 65.6250 (72.6511) lr 6.9098e-04 eta 7:56:06 +epoch [32/50] batch [770/1000] time 1.546 (1.567) data 0.000 (0.002) loss 0.8018 (1.0996) acc 65.6250 (72.6258) lr 6.9098e-04 eta 7:56:00 +epoch [32/50] batch [775/1000] time 1.566 (1.567) data 0.000 (0.002) loss 1.3174 (1.1003) acc 68.7500 (72.6169) lr 6.9098e-04 eta 7:55:51 +epoch [32/50] batch [780/1000] time 1.549 (1.567) data 0.001 (0.002) loss 0.8174 (1.0986) acc 81.2500 (72.6402) lr 6.9098e-04 eta 7:55:41 +epoch [32/50] batch [785/1000] time 1.559 (1.566) data 0.001 (0.002) loss 1.1006 (1.0989) acc 71.8750 (72.6314) lr 6.9098e-04 eta 7:55:32 +epoch [32/50] batch [790/1000] time 1.582 (1.566) data 0.000 (0.002) loss 1.4531 (1.0983) acc 71.8750 (72.6582) lr 6.9098e-04 eta 7:55:25 +epoch [32/50] batch [795/1000] time 1.563 (1.566) data 0.001 (0.002) loss 1.1143 (1.0979) acc 62.5000 (72.6494) lr 6.9098e-04 eta 7:55:17 +epoch [32/50] batch [800/1000] time 1.556 (1.566) data 0.001 (0.002) loss 0.7739 (1.0984) acc 75.0000 (72.6484) lr 6.9098e-04 eta 7:55:09 +epoch [32/50] batch [805/1000] time 1.584 (1.566) data 0.001 (0.002) loss 1.4590 (1.0988) acc 62.5000 (72.6398) lr 6.9098e-04 eta 7:55:01 +epoch [32/50] batch [810/1000] time 1.557 (1.566) data 0.000 (0.002) loss 0.4124 (1.0974) acc 90.6250 (72.6968) lr 6.9098e-04 eta 7:54:52 +epoch [32/50] batch [815/1000] time 1.562 (1.566) data 0.000 (0.002) loss 0.6733 (1.0973) acc 81.2500 (72.6994) lr 6.9098e-04 eta 7:54:45 +epoch [32/50] batch [820/1000] time 1.562 (1.566) data 0.000 (0.002) loss 1.2373 (1.0982) acc 68.7500 (72.6944) lr 6.9098e-04 eta 7:54:36 +epoch [32/50] batch [825/1000] time 1.581 (1.566) data 0.000 (0.002) loss 0.9375 (1.0971) acc 75.0000 (72.7008) lr 6.9098e-04 eta 7:54:29 +epoch [32/50] batch [830/1000] time 1.562 (1.567) data 0.000 (0.002) loss 0.9199 (1.0968) acc 78.1250 (72.7108) lr 6.9098e-04 eta 7:54:25 +epoch [32/50] batch [835/1000] time 1.558 (1.567) data 0.001 (0.002) loss 1.2480 (1.0977) acc 78.1250 (72.6909) lr 6.9098e-04 eta 7:54:17 +epoch [32/50] batch [840/1000] time 1.533 (1.567) data 0.000 (0.002) loss 1.8789 (1.0978) acc 62.5000 (72.6786) lr 6.9098e-04 eta 7:54:09 +epoch [32/50] batch [845/1000] time 1.537 (1.566) data 0.001 (0.002) loss 0.9614 (1.1001) acc 75.0000 (72.6479) lr 6.9098e-04 eta 7:53:59 +epoch [32/50] batch [850/1000] time 1.568 (1.566) data 0.001 (0.002) loss 1.1162 (1.1002) acc 75.0000 (72.6471) lr 6.9098e-04 eta 7:53:51 +epoch [32/50] batch [855/1000] time 1.569 (1.566) data 0.000 (0.002) loss 0.5801 (1.1004) acc 84.3750 (72.6608) lr 6.9098e-04 eta 7:53:43 +epoch [32/50] batch [860/1000] time 1.555 (1.566) data 0.000 (0.002) loss 0.9844 (1.1013) acc 68.7500 (72.6562) lr 6.9098e-04 eta 7:53:35 +epoch [32/50] batch [865/1000] time 1.568 (1.567) data 0.000 (0.002) loss 1.1699 (1.1028) acc 75.0000 (72.6264) lr 6.9098e-04 eta 7:53:28 +epoch [32/50] batch [870/1000] time 1.598 (1.567) data 0.000 (0.002) loss 1.0576 (1.1026) acc 75.0000 (72.6293) lr 6.9098e-04 eta 7:53:20 +epoch [32/50] batch [875/1000] time 1.537 (1.567) data 0.001 (0.002) loss 0.9014 (1.1017) acc 81.2500 (72.6536) lr 6.9098e-04 eta 7:53:15 +epoch [32/50] batch [880/1000] time 1.576 (1.567) data 0.001 (0.002) loss 1.8018 (1.1026) acc 59.3750 (72.6491) lr 6.9098e-04 eta 7:53:07 +epoch [32/50] batch [885/1000] time 1.594 (1.567) data 0.001 (0.002) loss 1.5352 (1.1033) acc 59.3750 (72.6130) lr 6.9098e-04 eta 7:52:59 +epoch [32/50] batch [890/1000] time 1.579 (1.567) data 0.000 (0.002) loss 1.2988 (1.1033) acc 75.0000 (72.6334) lr 6.9098e-04 eta 7:52:51 +epoch [32/50] batch [895/1000] time 1.572 (1.567) data 0.000 (0.002) loss 0.7427 (1.1044) acc 93.7500 (72.6222) lr 6.9098e-04 eta 7:52:41 +epoch [32/50] batch [900/1000] time 1.557 (1.566) data 0.000 (0.002) loss 0.5693 (1.1041) acc 81.2500 (72.6215) lr 6.9098e-04 eta 7:52:31 +epoch [32/50] batch [905/1000] time 1.575 (1.566) data 0.001 (0.002) loss 0.9331 (1.1050) acc 81.2500 (72.6105) lr 6.9098e-04 eta 7:52:23 +epoch [32/50] batch [910/1000] time 1.535 (1.566) data 0.001 (0.002) loss 1.6807 (1.1060) acc 65.6250 (72.5962) lr 6.9098e-04 eta 7:52:14 +epoch [32/50] batch [915/1000] time 1.570 (1.566) data 0.001 (0.002) loss 0.7856 (1.1063) acc 75.0000 (72.5990) lr 6.9098e-04 eta 7:52:06 +epoch [32/50] batch [920/1000] time 1.543 (1.566) data 0.001 (0.002) loss 1.4414 (1.1071) acc 53.1250 (72.5713) lr 6.9098e-04 eta 7:52:00 +epoch [32/50] batch [925/1000] time 1.542 (1.566) data 0.000 (0.002) loss 1.3311 (1.1075) acc 71.8750 (72.5642) lr 6.9098e-04 eta 7:51:52 +epoch [32/50] batch [930/1000] time 1.577 (1.566) data 0.000 (0.002) loss 0.8848 (1.1069) acc 78.1250 (72.5538) lr 6.9098e-04 eta 7:51:44 +epoch [32/50] batch [935/1000] time 1.569 (1.566) data 0.001 (0.002) loss 0.6548 (1.1056) acc 81.2500 (72.5635) lr 6.9098e-04 eta 7:51:36 +epoch [32/50] batch [940/1000] time 1.564 (1.566) data 0.001 (0.002) loss 0.6006 (1.1056) acc 75.0000 (72.5465) lr 6.9098e-04 eta 7:51:28 +epoch [32/50] batch [945/1000] time 1.571 (1.566) data 0.000 (0.002) loss 0.8740 (1.1074) acc 84.3750 (72.5198) lr 6.9098e-04 eta 7:51:19 +epoch [32/50] batch [950/1000] time 1.572 (1.566) data 0.001 (0.002) loss 1.0430 (1.1064) acc 78.1250 (72.5658) lr 6.9098e-04 eta 7:51:11 +epoch [32/50] batch [955/1000] time 1.542 (1.566) data 0.000 (0.002) loss 1.1025 (1.1056) acc 65.6250 (72.5622) lr 6.9098e-04 eta 7:51:02 +epoch [32/50] batch [960/1000] time 1.546 (1.566) data 0.000 (0.002) loss 1.0527 (1.1050) acc 68.7500 (72.5423) lr 6.9098e-04 eta 7:50:53 +epoch [32/50] batch [965/1000] time 1.581 (1.566) data 0.002 (0.002) loss 1.9971 (1.1055) acc 59.3750 (72.5389) lr 6.9098e-04 eta 7:50:46 +epoch [32/50] batch [970/1000] time 1.554 (1.566) data 0.000 (0.002) loss 1.0732 (1.1051) acc 71.8750 (72.5580) lr 6.9098e-04 eta 7:50:38 +epoch [32/50] batch [975/1000] time 1.587 (1.566) data 0.001 (0.002) loss 0.6440 (1.1053) acc 75.0000 (72.5481) lr 6.9098e-04 eta 7:50:29 +epoch [32/50] batch [980/1000] time 1.731 (1.566) data 0.001 (0.002) loss 0.8618 (1.1056) acc 75.0000 (72.5351) lr 6.9098e-04 eta 7:50:24 +epoch [32/50] batch [985/1000] time 1.581 (1.566) data 0.001 (0.002) loss 0.9595 (1.1058) acc 75.0000 (72.5286) lr 6.9098e-04 eta 7:50:15 +epoch [32/50] batch [990/1000] time 1.573 (1.566) data 0.000 (0.002) loss 1.1729 (1.1061) acc 71.8750 (72.5095) lr 6.9098e-04 eta 7:50:07 +epoch [32/50] batch [995/1000] time 1.567 (1.566) data 0.000 (0.002) loss 1.0576 (1.1049) acc 78.1250 (72.5345) lr 6.9098e-04 eta 7:49:58 +epoch [32/50] batch [1000/1000] time 1.572 (1.566) data 0.000 (0.001) loss 1.2246 (1.1064) acc 75.0000 (72.5156) lr 6.3188e-04 eta 7:49:50 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,349 +* accuracy: 78.7% +* error: 21.3% +* macro_f1: 78.2% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [33/50] batch [5/1000] time 1.549 (1.752) data 0.001 (0.250) loss 0.9609 (1.1116) acc 81.2500 (72.5000) lr 6.3188e-04 eta 8:45:28 +epoch [33/50] batch [10/1000] time 1.542 (1.653) data 0.001 (0.125) loss 0.6885 (1.0459) acc 78.1250 (74.3750) lr 6.3188e-04 eta 8:15:37 +epoch [33/50] batch [15/1000] time 1.555 (1.620) data 0.000 (0.084) loss 1.1924 (1.0623) acc 68.7500 (73.7500) lr 6.3188e-04 eta 8:05:43 +epoch [33/50] batch [20/1000] time 1.578 (1.609) data 0.001 (0.063) loss 1.0547 (1.0429) acc 81.2500 (74.6875) lr 6.3188e-04 eta 8:02:07 +epoch [33/50] batch [25/1000] time 1.576 (1.600) data 0.001 (0.051) loss 1.2119 (1.0816) acc 71.8750 (73.0000) lr 6.3188e-04 eta 7:59:18 +epoch [33/50] batch [30/1000] time 1.557 (1.594) data 0.001 (0.042) loss 1.1611 (1.0755) acc 78.1250 (73.4375) lr 6.3188e-04 eta 7:57:26 +epoch [33/50] batch [35/1000] time 1.589 (1.591) data 0.001 (0.036) loss 1.0215 (1.0954) acc 84.3750 (73.0357) lr 6.3188e-04 eta 7:56:30 +epoch [33/50] batch [40/1000] time 1.557 (1.588) data 0.001 (0.032) loss 0.7417 (1.0669) acc 75.0000 (73.4375) lr 6.3188e-04 eta 7:55:25 +epoch [33/50] batch [45/1000] time 1.560 (1.586) data 0.001 (0.028) loss 0.9683 (1.0608) acc 78.1250 (73.8194) lr 6.3188e-04 eta 7:54:42 +epoch [33/50] batch [50/1000] time 1.557 (1.589) data 0.000 (0.026) loss 1.1270 (1.0522) acc 59.3750 (73.8125) lr 6.3188e-04 eta 7:55:26 +epoch [33/50] batch [55/1000] time 1.576 (1.587) data 0.001 (0.023) loss 0.6714 (1.0403) acc 84.3750 (73.5795) lr 6.3188e-04 eta 7:54:39 +epoch [33/50] batch [60/1000] time 1.570 (1.585) data 0.001 (0.021) loss 1.1299 (1.0385) acc 65.6250 (73.3333) lr 6.3188e-04 eta 7:53:55 +epoch [33/50] batch [65/1000] time 1.549 (1.584) data 0.000 (0.020) loss 1.1436 (1.0337) acc 68.7500 (73.4135) lr 6.3188e-04 eta 7:53:26 +epoch [33/50] batch [70/1000] time 1.574 (1.582) data 0.000 (0.018) loss 1.1914 (1.0274) acc 71.8750 (73.7946) lr 6.3188e-04 eta 7:52:48 +epoch [33/50] batch [75/1000] time 1.577 (1.581) data 0.001 (0.017) loss 1.7178 (1.0307) acc 71.8750 (73.9167) lr 6.3188e-04 eta 7:52:26 +epoch [33/50] batch [80/1000] time 1.568 (1.580) data 0.001 (0.016) loss 1.2217 (1.0390) acc 68.7500 (73.7109) lr 6.3188e-04 eta 7:51:55 +epoch [33/50] batch [85/1000] time 1.536 (1.579) data 0.001 (0.015) loss 0.5107 (1.0377) acc 81.2500 (73.5662) lr 6.3188e-04 eta 7:51:27 +epoch [33/50] batch [90/1000] time 1.536 (1.580) data 0.001 (0.014) loss 1.6006 (1.0358) acc 59.3750 (73.4722) lr 6.3188e-04 eta 7:51:31 +epoch [33/50] batch [95/1000] time 1.566 (1.579) data 0.000 (0.014) loss 0.8223 (1.0358) acc 75.0000 (73.2895) lr 6.3188e-04 eta 7:51:07 +epoch [33/50] batch [100/1000] time 1.561 (1.578) data 0.000 (0.013) loss 0.7476 (1.0353) acc 81.2500 (73.4375) lr 6.3188e-04 eta 7:50:47 +epoch [33/50] batch [105/1000] time 1.565 (1.577) data 0.000 (0.012) loss 0.5229 (1.0306) acc 84.3750 (73.5417) lr 6.3188e-04 eta 7:50:25 +epoch [33/50] batch [110/1000] time 1.567 (1.577) data 0.001 (0.012) loss 0.8940 (1.0227) acc 78.1250 (73.9205) lr 6.3188e-04 eta 7:50:08 +epoch [33/50] batch [115/1000] time 1.534 (1.576) data 0.001 (0.011) loss 1.2109 (1.0279) acc 75.0000 (73.9130) lr 6.3188e-04 eta 7:49:51 +epoch [33/50] batch [120/1000] time 1.580 (1.576) data 0.001 (0.011) loss 1.3945 (1.0445) acc 75.0000 (73.6198) lr 6.3188e-04 eta 7:49:35 +epoch [33/50] batch [125/1000] time 1.558 (1.575) data 0.000 (0.011) loss 1.2480 (1.0447) acc 75.0000 (73.7000) lr 6.3188e-04 eta 7:49:15 +epoch [33/50] batch [130/1000] time 1.558 (1.575) data 0.000 (0.010) loss 1.2803 (1.0447) acc 65.6250 (73.7019) lr 6.3188e-04 eta 7:49:08 +epoch [33/50] batch [135/1000] time 1.555 (1.575) data 0.000 (0.010) loss 0.8345 (1.0444) acc 75.0000 (73.7269) lr 6.3188e-04 eta 7:48:48 +epoch [33/50] batch [140/1000] time 1.564 (1.574) data 0.001 (0.009) loss 0.9692 (1.0422) acc 78.1250 (73.6830) lr 6.3188e-04 eta 7:48:36 +epoch [33/50] batch [145/1000] time 1.564 (1.574) data 0.001 (0.009) loss 0.8467 (1.0483) acc 78.1250 (73.6422) lr 6.3188e-04 eta 7:48:20 +epoch [33/50] batch [150/1000] time 1.699 (1.574) data 0.000 (0.009) loss 0.6616 (1.0579) acc 78.1250 (73.4375) lr 6.3188e-04 eta 7:48:20 +epoch [33/50] batch [155/1000] time 1.572 (1.574) data 0.001 (0.009) loss 1.1855 (1.0621) acc 65.6250 (73.4073) lr 6.3188e-04 eta 7:48:02 +epoch [33/50] batch [160/1000] time 1.542 (1.573) data 0.000 (0.008) loss 0.8164 (1.0570) acc 87.5000 (73.6133) lr 6.3188e-04 eta 7:47:45 +epoch [33/50] batch [165/1000] time 1.581 (1.573) data 0.001 (0.008) loss 0.6978 (1.0602) acc 81.2500 (73.5795) lr 6.3188e-04 eta 7:47:29 +epoch [33/50] batch [170/1000] time 1.562 (1.572) data 0.000 (0.008) loss 0.9448 (1.0663) acc 68.7500 (73.4007) lr 6.3188e-04 eta 7:47:14 +epoch [33/50] batch [175/1000] time 1.588 (1.572) data 0.001 (0.008) loss 0.8276 (1.0633) acc 71.8750 (73.4107) lr 6.3188e-04 eta 7:47:06 +epoch [33/50] batch [180/1000] time 1.557 (1.572) data 0.000 (0.007) loss 1.3643 (1.0608) acc 65.6250 (73.3854) lr 6.3188e-04 eta 7:46:49 +epoch [33/50] batch [185/1000] time 1.556 (1.572) data 0.000 (0.007) loss 1.0020 (1.0597) acc 81.2500 (73.4291) lr 6.3188e-04 eta 7:46:38 +epoch [33/50] batch [190/1000] time 1.548 (1.571) data 0.000 (0.007) loss 1.3135 (1.0647) acc 71.8750 (73.3388) lr 6.3188e-04 eta 7:46:19 +epoch [33/50] batch [195/1000] time 1.750 (1.572) data 0.001 (0.007) loss 1.5518 (1.0638) acc 56.2500 (73.3494) lr 6.3188e-04 eta 7:46:24 +epoch [33/50] batch [200/1000] time 1.566 (1.571) data 0.001 (0.007) loss 1.0742 (1.0642) acc 59.3750 (73.3281) lr 6.3188e-04 eta 7:46:11 +epoch [33/50] batch [205/1000] time 1.550 (1.572) data 0.000 (0.007) loss 1.1826 (1.0667) acc 62.5000 (73.2165) lr 6.3188e-04 eta 7:46:05 +epoch [33/50] batch [210/1000] time 1.568 (1.571) data 0.000 (0.006) loss 1.0762 (1.0651) acc 65.6250 (73.1845) lr 6.3188e-04 eta 7:45:52 +epoch [33/50] batch [215/1000] time 1.565 (1.571) data 0.000 (0.006) loss 1.0859 (1.0681) acc 71.8750 (73.0959) lr 6.3188e-04 eta 7:45:42 +epoch [33/50] batch [220/1000] time 1.585 (1.571) data 0.000 (0.006) loss 0.6113 (1.0693) acc 84.3750 (73.0824) lr 6.3188e-04 eta 7:45:34 +epoch [33/50] batch [225/1000] time 1.571 (1.571) data 0.001 (0.006) loss 1.2881 (1.0705) acc 75.0000 (73.0278) lr 6.3188e-04 eta 7:45:22 +epoch [33/50] batch [230/1000] time 1.585 (1.571) data 0.000 (0.006) loss 1.8340 (1.0713) acc 46.8750 (72.9348) lr 6.3188e-04 eta 7:45:12 +epoch [33/50] batch [235/1000] time 1.570 (1.570) data 0.000 (0.006) loss 1.2529 (1.0747) acc 68.7500 (72.8856) lr 6.3188e-04 eta 7:44:58 +epoch [33/50] batch [240/1000] time 1.561 (1.571) data 0.000 (0.006) loss 1.7891 (1.0761) acc 50.0000 (72.7995) lr 6.3188e-04 eta 7:44:59 +epoch [33/50] batch [245/1000] time 1.552 (1.571) data 0.001 (0.006) loss 0.6533 (1.0734) acc 81.2500 (72.8061) lr 6.3188e-04 eta 7:44:47 +epoch [33/50] batch [250/1000] time 1.561 (1.570) data 0.000 (0.006) loss 0.6050 (1.0663) acc 84.3750 (72.9875) lr 6.3188e-04 eta 7:44:35 +epoch [33/50] batch [255/1000] time 1.566 (1.570) data 0.001 (0.005) loss 1.2812 (1.0664) acc 65.6250 (72.9412) lr 6.3188e-04 eta 7:44:26 +epoch [33/50] batch [260/1000] time 1.593 (1.571) data 0.000 (0.005) loss 0.5967 (1.0683) acc 84.3750 (72.8966) lr 6.3188e-04 eta 7:44:21 +epoch [33/50] batch [265/1000] time 1.561 (1.570) data 0.000 (0.005) loss 0.8740 (1.0627) acc 84.3750 (73.0778) lr 6.3188e-04 eta 7:44:12 +epoch [33/50] batch [270/1000] time 1.568 (1.570) data 0.000 (0.005) loss 1.2314 (1.0645) acc 65.6250 (73.0787) lr 6.3188e-04 eta 7:44:01 +epoch [33/50] batch [275/1000] time 1.574 (1.570) data 0.001 (0.005) loss 1.4941 (1.0672) acc 68.7500 (73.0227) lr 6.3188e-04 eta 7:43:52 +epoch [33/50] batch [280/1000] time 1.546 (1.570) data 0.001 (0.005) loss 0.9937 (1.0662) acc 68.7500 (73.0134) lr 6.3188e-04 eta 7:43:41 +epoch [33/50] batch [285/1000] time 1.571 (1.570) data 0.000 (0.005) loss 1.3740 (1.0664) acc 62.5000 (72.9386) lr 6.3188e-04 eta 7:43:31 +epoch [33/50] batch [290/1000] time 1.572 (1.570) data 0.001 (0.005) loss 1.4023 (1.0690) acc 65.6250 (72.8233) lr 6.3188e-04 eta 7:43:24 +epoch [33/50] batch [295/1000] time 1.558 (1.570) data 0.001 (0.005) loss 1.3418 (1.0692) acc 65.6250 (72.7966) lr 6.3188e-04 eta 7:43:15 +epoch [33/50] batch [300/1000] time 1.575 (1.570) data 0.001 (0.005) loss 0.8662 (1.0680) acc 75.0000 (72.8854) lr 6.3188e-04 eta 7:43:07 +epoch [33/50] batch [305/1000] time 1.553 (1.570) data 0.000 (0.005) loss 1.7168 (1.0706) acc 56.2500 (72.8586) lr 6.3188e-04 eta 7:43:09 +epoch [33/50] batch [310/1000] time 1.555 (1.570) data 0.000 (0.005) loss 0.9272 (1.0731) acc 84.3750 (72.8730) lr 6.3188e-04 eta 7:42:59 +epoch [33/50] batch [315/1000] time 1.559 (1.570) data 0.000 (0.004) loss 1.0791 (1.0720) acc 78.1250 (72.9067) lr 6.3188e-04 eta 7:42:46 +epoch [33/50] batch [320/1000] time 1.554 (1.570) data 0.001 (0.004) loss 0.7764 (1.0710) acc 78.1250 (72.9785) lr 6.3188e-04 eta 7:42:36 +epoch [33/50] batch [325/1000] time 1.585 (1.570) data 0.000 (0.004) loss 1.3594 (1.0705) acc 68.7500 (72.9904) lr 6.3188e-04 eta 7:42:29 +epoch [33/50] batch [330/1000] time 1.560 (1.570) data 0.000 (0.004) loss 0.4893 (1.0706) acc 84.3750 (73.0587) lr 6.3188e-04 eta 7:42:18 +epoch [33/50] batch [335/1000] time 1.549 (1.570) data 0.000 (0.004) loss 1.5010 (1.0727) acc 68.7500 (73.0317) lr 6.3188e-04 eta 7:42:06 +epoch [33/50] batch [340/1000] time 1.583 (1.569) data 0.000 (0.004) loss 0.6221 (1.0735) acc 84.3750 (72.9871) lr 6.3188e-04 eta 7:41:55 +epoch [33/50] batch [345/1000] time 1.571 (1.569) data 0.000 (0.004) loss 0.6987 (1.0735) acc 71.8750 (72.9710) lr 6.3188e-04 eta 7:41:44 +epoch [33/50] batch [350/1000] time 1.562 (1.570) data 0.001 (0.004) loss 1.2041 (1.0716) acc 68.7500 (73.0179) lr 6.3188e-04 eta 7:41:44 +epoch [33/50] batch [355/1000] time 1.556 (1.570) data 0.000 (0.004) loss 1.0166 (1.0714) acc 71.8750 (73.0458) lr 6.3188e-04 eta 7:41:35 +epoch [33/50] batch [360/1000] time 1.566 (1.570) data 0.000 (0.004) loss 0.8120 (1.0704) acc 78.1250 (73.0295) lr 6.3188e-04 eta 7:41:26 +epoch [33/50] batch [365/1000] time 1.562 (1.570) data 0.000 (0.004) loss 1.4766 (1.0727) acc 68.7500 (73.0137) lr 6.3188e-04 eta 7:41:18 +epoch [33/50] batch [370/1000] time 1.559 (1.569) data 0.001 (0.004) loss 1.0254 (1.0744) acc 75.0000 (72.9899) lr 6.3188e-04 eta 7:41:07 +epoch [33/50] batch [375/1000] time 1.544 (1.569) data 0.001 (0.004) loss 1.2061 (1.0771) acc 75.0000 (72.9917) lr 6.3188e-04 eta 7:40:55 +epoch [33/50] batch [380/1000] time 1.568 (1.569) data 0.001 (0.004) loss 0.9727 (1.0748) acc 62.5000 (73.0016) lr 6.3188e-04 eta 7:40:45 +epoch [33/50] batch [385/1000] time 1.576 (1.569) data 0.000 (0.004) loss 0.6445 (1.0753) acc 78.1250 (72.9789) lr 6.3188e-04 eta 7:40:37 +epoch [33/50] batch [390/1000] time 1.556 (1.569) data 0.001 (0.004) loss 0.8984 (1.0760) acc 84.3750 (72.9647) lr 6.3188e-04 eta 7:40:36 +epoch [33/50] batch [395/1000] time 1.579 (1.569) data 0.000 (0.004) loss 1.6348 (1.0794) acc 53.1250 (72.9035) lr 6.3188e-04 eta 7:40:26 +epoch [33/50] batch [400/1000] time 1.538 (1.569) data 0.001 (0.004) loss 1.0283 (1.0784) acc 71.8750 (72.8906) lr 6.3188e-04 eta 7:40:17 +epoch [33/50] batch [405/1000] time 1.557 (1.569) data 0.000 (0.004) loss 0.9233 (1.0787) acc 81.2500 (72.9012) lr 6.3188e-04 eta 7:40:07 +epoch [33/50] batch [410/1000] time 1.552 (1.569) data 0.001 (0.004) loss 1.0781 (1.0799) acc 71.8750 (72.8811) lr 6.3188e-04 eta 7:39:56 +epoch [33/50] batch [415/1000] time 1.566 (1.569) data 0.001 (0.004) loss 1.1455 (1.0802) acc 75.0000 (72.8690) lr 6.3188e-04 eta 7:39:47 +epoch [33/50] batch [420/1000] time 1.539 (1.569) data 0.001 (0.003) loss 0.8599 (1.0782) acc 81.2500 (72.9836) lr 6.3188e-04 eta 7:39:37 +epoch [33/50] batch [425/1000] time 1.585 (1.569) data 0.000 (0.003) loss 1.0654 (1.0779) acc 68.7500 (73.0221) lr 6.3188e-04 eta 7:39:29 +epoch [33/50] batch [430/1000] time 1.579 (1.569) data 0.000 (0.003) loss 1.1406 (1.0808) acc 65.6250 (72.9433) lr 6.3188e-04 eta 7:39:22 +epoch [33/50] batch [435/1000] time 1.543 (1.569) data 0.001 (0.003) loss 0.2993 (1.0811) acc 96.8750 (72.9885) lr 6.3188e-04 eta 7:39:14 +epoch [33/50] batch [440/1000] time 1.575 (1.569) data 0.001 (0.003) loss 0.9678 (1.0813) acc 68.7500 (72.9688) lr 6.3188e-04 eta 7:39:04 +epoch [33/50] batch [445/1000] time 1.564 (1.569) data 0.001 (0.003) loss 1.5107 (1.0838) acc 56.2500 (72.8933) lr 6.3188e-04 eta 7:38:56 +epoch [33/50] batch [450/1000] time 1.605 (1.569) data 0.001 (0.003) loss 1.2451 (1.0827) acc 71.8750 (72.9583) lr 6.3188e-04 eta 7:38:49 +epoch [33/50] batch [455/1000] time 1.583 (1.569) data 0.001 (0.003) loss 1.4600 (1.0838) acc 71.8750 (72.9739) lr 6.3188e-04 eta 7:38:49 +epoch [33/50] batch [460/1000] time 1.548 (1.569) data 0.001 (0.003) loss 1.4727 (1.0828) acc 68.7500 (73.0095) lr 6.3188e-04 eta 7:38:40 +epoch [33/50] batch [465/1000] time 1.608 (1.569) data 0.001 (0.003) loss 0.5820 (1.0824) acc 78.1250 (73.0309) lr 6.3188e-04 eta 7:38:35 +epoch [33/50] batch [470/1000] time 1.571 (1.569) data 0.000 (0.003) loss 0.7529 (1.0790) acc 81.2500 (73.0785) lr 6.3188e-04 eta 7:38:28 +epoch [33/50] batch [475/1000] time 1.563 (1.569) data 0.000 (0.003) loss 1.1709 (1.0788) acc 71.8750 (73.0789) lr 6.3188e-04 eta 7:38:18 +epoch [33/50] batch [480/1000] time 1.568 (1.569) data 0.000 (0.003) loss 1.9883 (1.0817) acc 53.1250 (73.0078) lr 6.3188e-04 eta 7:38:08 +epoch [33/50] batch [485/1000] time 1.543 (1.569) data 0.001 (0.003) loss 1.2441 (1.0825) acc 71.8750 (73.0090) lr 6.3188e-04 eta 7:37:59 +epoch [33/50] batch [490/1000] time 1.562 (1.569) data 0.001 (0.003) loss 1.4775 (1.0841) acc 68.7500 (72.9911) lr 6.3188e-04 eta 7:37:50 +epoch [33/50] batch [495/1000] time 1.555 (1.569) data 0.001 (0.003) loss 0.8096 (1.0828) acc 81.2500 (73.0429) lr 6.3188e-04 eta 7:37:40 +epoch [33/50] batch [500/1000] time 1.571 (1.569) data 0.000 (0.003) loss 1.2480 (1.0871) acc 56.2500 (72.9375) lr 6.3188e-04 eta 7:37:39 +epoch [33/50] batch [505/1000] time 1.557 (1.569) data 0.001 (0.003) loss 0.7344 (1.0871) acc 75.0000 (72.9208) lr 6.3188e-04 eta 7:37:30 +epoch [33/50] batch [510/1000] time 1.583 (1.569) data 0.001 (0.003) loss 2.0723 (1.0885) acc 59.3750 (72.8493) lr 6.3188e-04 eta 7:37:22 +epoch [33/50] batch [515/1000] time 1.540 (1.569) data 0.000 (0.003) loss 0.9165 (1.0882) acc 78.1250 (72.8459) lr 6.3188e-04 eta 7:37:12 +epoch [33/50] batch [520/1000] time 1.572 (1.569) data 0.000 (0.003) loss 0.8486 (1.0879) acc 81.2500 (72.8606) lr 6.3188e-04 eta 7:37:04 +epoch [33/50] batch [525/1000] time 1.579 (1.569) data 0.000 (0.003) loss 1.1338 (1.0876) acc 75.0000 (72.8036) lr 6.3188e-04 eta 7:36:55 +epoch [33/50] batch [530/1000] time 1.547 (1.569) data 0.000 (0.003) loss 1.1279 (1.0889) acc 71.8750 (72.7653) lr 6.3188e-04 eta 7:36:48 +epoch [33/50] batch [535/1000] time 1.574 (1.569) data 0.001 (0.003) loss 1.1104 (1.0930) acc 71.8750 (72.6869) lr 6.3188e-04 eta 7:36:40 +epoch [33/50] batch [540/1000] time 1.751 (1.569) data 0.001 (0.003) loss 1.5547 (1.0926) acc 68.7500 (72.7025) lr 6.3188e-04 eta 7:36:38 +epoch [33/50] batch [545/1000] time 1.549 (1.569) data 0.000 (0.003) loss 0.8081 (1.0910) acc 71.8750 (72.7408) lr 6.3188e-04 eta 7:36:28 +epoch [33/50] batch [550/1000] time 1.584 (1.569) data 0.000 (0.003) loss 1.1201 (1.0913) acc 68.7500 (72.7102) lr 6.3188e-04 eta 7:36:20 +epoch [33/50] batch [555/1000] time 1.550 (1.569) data 0.000 (0.003) loss 0.7114 (1.0900) acc 84.3750 (72.7365) lr 6.3188e-04 eta 7:36:11 +epoch [33/50] batch [560/1000] time 1.537 (1.569) data 0.000 (0.003) loss 1.1367 (1.0892) acc 71.8750 (72.7344) lr 6.3188e-04 eta 7:36:02 +epoch [33/50] batch [565/1000] time 1.563 (1.569) data 0.000 (0.003) loss 1.1230 (1.0891) acc 68.7500 (72.7434) lr 6.3188e-04 eta 7:35:52 +epoch [33/50] batch [570/1000] time 1.581 (1.569) data 0.001 (0.003) loss 1.0195 (1.0895) acc 71.8750 (72.7193) lr 6.3188e-04 eta 7:35:46 +epoch [33/50] batch [575/1000] time 1.570 (1.569) data 0.000 (0.003) loss 1.1436 (1.0879) acc 78.1250 (72.7554) lr 6.3188e-04 eta 7:35:37 +epoch [33/50] batch [580/1000] time 1.558 (1.569) data 0.000 (0.003) loss 0.8252 (1.0881) acc 84.3750 (72.7694) lr 6.3188e-04 eta 7:35:27 +epoch [33/50] batch [585/1000] time 1.575 (1.569) data 0.001 (0.003) loss 1.2773 (1.0880) acc 68.7500 (72.7724) lr 6.3188e-04 eta 7:35:19 +epoch [33/50] batch [590/1000] time 1.567 (1.569) data 0.000 (0.003) loss 1.8809 (1.0894) acc 53.1250 (72.7701) lr 6.3188e-04 eta 7:35:11 +epoch [33/50] batch [595/1000] time 1.557 (1.569) data 0.000 (0.003) loss 1.0469 (1.0894) acc 75.0000 (72.7416) lr 6.3188e-04 eta 7:35:02 +epoch [33/50] batch [600/1000] time 1.564 (1.569) data 0.001 (0.003) loss 0.8936 (1.0896) acc 75.0000 (72.6979) lr 6.3188e-04 eta 7:34:54 +epoch [33/50] batch [605/1000] time 1.558 (1.569) data 0.000 (0.003) loss 1.5391 (1.0919) acc 65.6250 (72.6498) lr 6.3188e-04 eta 7:34:49 +epoch [33/50] batch [610/1000] time 1.571 (1.569) data 0.001 (0.003) loss 0.7280 (1.0919) acc 78.1250 (72.6537) lr 6.3188e-04 eta 7:34:41 +epoch [33/50] batch [615/1000] time 1.553 (1.569) data 0.001 (0.003) loss 1.1445 (1.0916) acc 78.1250 (72.6829) lr 6.3188e-04 eta 7:34:33 +epoch [33/50] batch [620/1000] time 1.580 (1.569) data 0.000 (0.003) loss 1.0723 (1.0904) acc 71.8750 (72.6764) lr 6.3188e-04 eta 7:34:23 +epoch [33/50] batch [625/1000] time 1.554 (1.569) data 0.000 (0.003) loss 1.4482 (1.0915) acc 65.6250 (72.6350) lr 6.3188e-04 eta 7:34:14 +epoch [33/50] batch [630/1000] time 1.546 (1.569) data 0.001 (0.002) loss 0.6123 (1.0907) acc 84.3750 (72.6488) lr 6.3188e-04 eta 7:34:05 +epoch [33/50] batch [635/1000] time 1.552 (1.568) data 0.000 (0.002) loss 1.1396 (1.0903) acc 71.8750 (72.6673) lr 6.3188e-04 eta 7:33:56 +epoch [33/50] batch [640/1000] time 1.591 (1.568) data 0.001 (0.002) loss 0.8296 (1.0905) acc 71.8750 (72.6465) lr 6.3188e-04 eta 7:33:47 +epoch [33/50] batch [645/1000] time 1.559 (1.568) data 0.000 (0.002) loss 1.1504 (1.0900) acc 65.6250 (72.6357) lr 6.3188e-04 eta 7:33:38 +epoch [33/50] batch [650/1000] time 1.551 (1.568) data 0.001 (0.002) loss 1.4238 (1.0913) acc 81.2500 (72.6298) lr 6.3188e-04 eta 7:33:32 +epoch [33/50] batch [655/1000] time 1.582 (1.568) data 0.000 (0.002) loss 1.1201 (1.0909) acc 75.0000 (72.6431) lr 6.3188e-04 eta 7:33:24 +epoch [33/50] batch [660/1000] time 1.563 (1.568) data 0.000 (0.002) loss 1.5762 (1.0937) acc 65.6250 (72.5852) lr 6.3188e-04 eta 7:33:15 +epoch [33/50] batch [665/1000] time 1.551 (1.568) data 0.000 (0.002) loss 2.2246 (1.0953) acc 59.3750 (72.5611) lr 6.3188e-04 eta 7:33:07 +epoch [33/50] batch [670/1000] time 1.556 (1.568) data 0.000 (0.002) loss 1.3008 (1.0946) acc 78.1250 (72.5840) lr 6.3188e-04 eta 7:32:59 +epoch [33/50] batch [675/1000] time 1.572 (1.568) data 0.000 (0.002) loss 1.0322 (1.0946) acc 68.7500 (72.5880) lr 6.3188e-04 eta 7:32:49 +epoch [33/50] batch [680/1000] time 1.557 (1.568) data 0.000 (0.002) loss 1.0410 (1.0942) acc 68.7500 (72.5827) lr 6.3188e-04 eta 7:32:40 +epoch [33/50] batch [685/1000] time 1.581 (1.568) data 0.001 (0.002) loss 1.3682 (1.0955) acc 65.6250 (72.5776) lr 6.3188e-04 eta 7:32:32 +epoch [33/50] batch [690/1000] time 1.572 (1.568) data 0.001 (0.002) loss 0.9844 (1.0964) acc 75.0000 (72.5634) lr 6.3188e-04 eta 7:32:24 +epoch [33/50] batch [695/1000] time 1.565 (1.568) data 0.001 (0.002) loss 0.9614 (1.0959) acc 75.0000 (72.5719) lr 6.3188e-04 eta 7:32:21 +epoch [33/50] batch [700/1000] time 1.537 (1.568) data 0.001 (0.002) loss 1.1904 (1.0971) acc 68.7500 (72.5268) lr 6.3188e-04 eta 7:32:12 +epoch [33/50] batch [705/1000] time 1.554 (1.568) data 0.000 (0.002) loss 1.0488 (1.0952) acc 71.8750 (72.5621) lr 6.3188e-04 eta 7:32:03 +epoch [33/50] batch [710/1000] time 1.535 (1.568) data 0.000 (0.002) loss 1.3750 (1.0953) acc 62.5000 (72.5528) lr 6.3188e-04 eta 7:31:54 +epoch [33/50] batch [715/1000] time 1.532 (1.568) data 0.001 (0.002) loss 0.5684 (1.0943) acc 87.5000 (72.5787) lr 6.3188e-04 eta 7:31:45 +epoch [33/50] batch [720/1000] time 1.535 (1.568) data 0.000 (0.002) loss 1.7930 (1.0945) acc 53.1250 (72.5955) lr 6.3188e-04 eta 7:31:36 +epoch [33/50] batch [725/1000] time 1.542 (1.568) data 0.001 (0.002) loss 1.6895 (1.0955) acc 65.6250 (72.5905) lr 6.3188e-04 eta 7:31:28 +epoch [33/50] batch [730/1000] time 1.545 (1.568) data 0.001 (0.002) loss 1.2178 (1.0962) acc 71.8750 (72.5728) lr 6.3188e-04 eta 7:31:20 +epoch [33/50] batch [735/1000] time 1.602 (1.568) data 0.001 (0.002) loss 1.0996 (1.0973) acc 68.7500 (72.5595) lr 6.3188e-04 eta 7:31:12 +epoch [33/50] batch [740/1000] time 1.553 (1.568) data 0.000 (0.002) loss 0.7896 (1.0956) acc 78.1250 (72.5760) lr 6.3188e-04 eta 7:31:02 +epoch [33/50] batch [745/1000] time 1.555 (1.568) data 0.000 (0.002) loss 0.7251 (1.0950) acc 81.2500 (72.5671) lr 6.3188e-04 eta 7:30:52 +epoch [33/50] batch [750/1000] time 1.552 (1.568) data 0.000 (0.002) loss 0.9854 (1.0940) acc 84.3750 (72.6125) lr 6.3188e-04 eta 7:30:44 +epoch [33/50] batch [755/1000] time 1.565 (1.568) data 0.000 (0.002) loss 0.9668 (1.0923) acc 71.8750 (72.6283) lr 6.3188e-04 eta 7:30:41 +epoch [33/50] batch [760/1000] time 1.561 (1.568) data 0.000 (0.002) loss 1.5293 (1.0944) acc 71.8750 (72.5740) lr 6.3188e-04 eta 7:30:33 +epoch [33/50] batch [765/1000] time 1.581 (1.568) data 0.001 (0.002) loss 1.0605 (1.0958) acc 68.7500 (72.5368) lr 6.3188e-04 eta 7:30:26 +epoch [33/50] batch [770/1000] time 1.554 (1.568) data 0.000 (0.002) loss 1.0938 (1.0956) acc 68.7500 (72.5325) lr 6.3188e-04 eta 7:30:17 +epoch [33/50] batch [775/1000] time 1.553 (1.568) data 0.000 (0.002) loss 0.9204 (1.0956) acc 75.0000 (72.5323) lr 6.3188e-04 eta 7:30:07 +epoch [33/50] batch [780/1000] time 1.564 (1.568) data 0.001 (0.002) loss 1.6133 (1.0970) acc 65.6250 (72.5080) lr 6.3188e-04 eta 7:29:59 +epoch [33/50] batch [785/1000] time 1.530 (1.568) data 0.000 (0.002) loss 1.1611 (1.0982) acc 59.3750 (72.4562) lr 6.3188e-04 eta 7:29:49 +epoch [33/50] batch [790/1000] time 1.556 (1.568) data 0.000 (0.002) loss 1.1621 (1.0982) acc 68.7500 (72.4644) lr 6.3188e-04 eta 7:29:42 +epoch [33/50] batch [795/1000] time 1.557 (1.568) data 0.001 (0.002) loss 0.8652 (1.0978) acc 75.0000 (72.4843) lr 6.3188e-04 eta 7:29:34 +epoch [33/50] batch [800/1000] time 1.554 (1.568) data 0.001 (0.002) loss 0.8062 (1.0983) acc 75.0000 (72.4805) lr 6.3188e-04 eta 7:29:29 +epoch [33/50] batch [805/1000] time 1.560 (1.568) data 0.000 (0.002) loss 1.0557 (1.0973) acc 68.7500 (72.5000) lr 6.3188e-04 eta 7:29:19 +epoch [33/50] batch [810/1000] time 1.557 (1.568) data 0.001 (0.002) loss 1.0469 (1.0982) acc 71.8750 (72.4769) lr 6.3188e-04 eta 7:29:10 +epoch [33/50] batch [815/1000] time 1.561 (1.568) data 0.001 (0.002) loss 1.4678 (1.0991) acc 59.3750 (72.4655) lr 6.3188e-04 eta 7:29:01 +epoch [33/50] batch [820/1000] time 1.546 (1.568) data 0.000 (0.002) loss 0.8843 (1.0979) acc 68.7500 (72.4543) lr 6.3188e-04 eta 7:28:53 +epoch [33/50] batch [825/1000] time 1.567 (1.568) data 0.001 (0.002) loss 0.8677 (1.0975) acc 75.0000 (72.4508) lr 6.3188e-04 eta 7:28:45 +epoch [33/50] batch [830/1000] time 1.557 (1.568) data 0.000 (0.002) loss 0.9746 (1.0983) acc 84.3750 (72.4586) lr 6.3188e-04 eta 7:28:36 +epoch [33/50] batch [835/1000] time 1.553 (1.568) data 0.001 (0.002) loss 0.9858 (1.0971) acc 65.6250 (72.4513) lr 6.3188e-04 eta 7:28:29 +epoch [33/50] batch [840/1000] time 1.541 (1.568) data 0.000 (0.002) loss 1.6797 (1.0981) acc 53.1250 (72.4256) lr 6.3188e-04 eta 7:28:19 +epoch [33/50] batch [845/1000] time 1.561 (1.568) data 0.000 (0.002) loss 0.8545 (1.0979) acc 71.8750 (72.4408) lr 6.3188e-04 eta 7:28:15 +epoch [33/50] batch [850/1000] time 1.581 (1.568) data 0.000 (0.002) loss 0.8164 (1.0970) acc 81.2500 (72.4743) lr 6.3188e-04 eta 7:28:07 +epoch [33/50] batch [855/1000] time 1.570 (1.568) data 0.000 (0.002) loss 1.3223 (1.0976) acc 68.7500 (72.4561) lr 6.3188e-04 eta 7:27:59 +epoch [33/50] batch [860/1000] time 1.538 (1.568) data 0.000 (0.002) loss 1.4990 (1.0983) acc 75.0000 (72.4637) lr 6.3188e-04 eta 7:27:51 +epoch [33/50] batch [865/1000] time 1.586 (1.568) data 0.000 (0.002) loss 0.8477 (1.0981) acc 78.1250 (72.4675) lr 6.3188e-04 eta 7:27:42 +epoch [33/50] batch [870/1000] time 1.577 (1.568) data 0.000 (0.002) loss 1.1816 (1.0984) acc 78.1250 (72.4784) lr 6.3188e-04 eta 7:27:34 +epoch [33/50] batch [875/1000] time 1.572 (1.568) data 0.001 (0.002) loss 1.5479 (1.0978) acc 62.5000 (72.4786) lr 6.3188e-04 eta 7:27:26 +epoch [33/50] batch [880/1000] time 1.577 (1.568) data 0.000 (0.002) loss 0.8652 (1.0963) acc 78.1250 (72.4929) lr 6.3188e-04 eta 7:27:18 +epoch [33/50] batch [885/1000] time 1.561 (1.568) data 0.000 (0.002) loss 0.7588 (1.0969) acc 78.1250 (72.5071) lr 6.3188e-04 eta 7:27:10 +epoch [33/50] batch [890/1000] time 1.589 (1.568) data 0.000 (0.002) loss 1.6064 (1.0976) acc 75.0000 (72.5176) lr 6.3188e-04 eta 7:27:01 +epoch [33/50] batch [895/1000] time 1.569 (1.568) data 0.000 (0.002) loss 1.1143 (1.0985) acc 75.0000 (72.5105) lr 6.3188e-04 eta 7:26:55 +epoch [33/50] batch [900/1000] time 1.559 (1.568) data 0.000 (0.002) loss 1.3887 (1.0976) acc 71.8750 (72.5417) lr 6.3188e-04 eta 7:26:47 +epoch [33/50] batch [905/1000] time 1.734 (1.568) data 0.001 (0.002) loss 1.1914 (1.0987) acc 71.8750 (72.5345) lr 6.3188e-04 eta 7:26:41 +epoch [33/50] batch [910/1000] time 1.570 (1.568) data 0.001 (0.002) loss 1.2598 (1.0988) acc 75.0000 (72.5343) lr 6.3188e-04 eta 7:26:33 +epoch [33/50] batch [915/1000] time 1.602 (1.568) data 0.001 (0.002) loss 0.8047 (1.0990) acc 81.2500 (72.5444) lr 6.3188e-04 eta 7:26:25 +epoch [33/50] batch [920/1000] time 1.578 (1.568) data 0.000 (0.002) loss 0.6953 (1.0984) acc 78.1250 (72.5442) lr 6.3188e-04 eta 7:26:16 +epoch [33/50] batch [925/1000] time 1.554 (1.568) data 0.000 (0.002) loss 1.0322 (1.0990) acc 65.6250 (72.5169) lr 6.3188e-04 eta 7:26:08 +epoch [33/50] batch [930/1000] time 1.562 (1.568) data 0.001 (0.002) loss 0.6826 (1.0987) acc 81.2500 (72.5403) lr 6.3188e-04 eta 7:25:59 +epoch [33/50] batch [935/1000] time 1.561 (1.568) data 0.001 (0.002) loss 1.2188 (1.0999) acc 78.1250 (72.5468) lr 6.3188e-04 eta 7:25:51 +epoch [33/50] batch [940/1000] time 1.572 (1.568) data 0.001 (0.002) loss 1.4424 (1.1004) acc 71.8750 (72.5432) lr 6.3188e-04 eta 7:25:43 +epoch [33/50] batch [945/1000] time 1.581 (1.568) data 0.001 (0.002) loss 0.8711 (1.1017) acc 71.8750 (72.5099) lr 6.3188e-04 eta 7:25:35 +epoch [33/50] batch [950/1000] time 1.722 (1.568) data 0.000 (0.002) loss 1.2979 (1.1020) acc 68.7500 (72.5263) lr 6.3188e-04 eta 7:25:29 +epoch [33/50] batch [955/1000] time 1.571 (1.568) data 0.000 (0.002) loss 0.6348 (1.1009) acc 78.1250 (72.5491) lr 6.3188e-04 eta 7:25:21 +epoch [33/50] batch [960/1000] time 1.571 (1.568) data 0.000 (0.002) loss 0.6548 (1.1011) acc 78.1250 (72.5488) lr 6.3188e-04 eta 7:25:13 +epoch [33/50] batch [965/1000] time 1.569 (1.568) data 0.000 (0.002) loss 0.9800 (1.1012) acc 75.0000 (72.5421) lr 6.3188e-04 eta 7:25:05 +epoch [33/50] batch [970/1000] time 1.568 (1.568) data 0.001 (0.002) loss 1.2148 (1.1013) acc 62.5000 (72.5290) lr 6.3188e-04 eta 7:24:57 +epoch [33/50] batch [975/1000] time 1.563 (1.568) data 0.000 (0.002) loss 0.9844 (1.1022) acc 75.0000 (72.5192) lr 6.3188e-04 eta 7:24:49 +epoch [33/50] batch [980/1000] time 1.558 (1.568) data 0.000 (0.002) loss 0.6499 (1.1011) acc 78.1250 (72.5383) lr 6.3188e-04 eta 7:24:40 +epoch [33/50] batch [985/1000] time 1.571 (1.568) data 0.001 (0.002) loss 2.1777 (1.1024) acc 53.1250 (72.5159) lr 6.3188e-04 eta 7:24:34 +epoch [33/50] batch [990/1000] time 1.545 (1.568) data 0.000 (0.002) loss 1.4658 (1.1042) acc 62.5000 (72.4716) lr 6.3188e-04 eta 7:24:25 +epoch [33/50] batch [995/1000] time 1.563 (1.568) data 0.000 (0.002) loss 1.4990 (1.1053) acc 62.5000 (72.4749) lr 6.3188e-04 eta 7:24:17 +epoch [33/50] batch [1000/1000] time 1.564 (1.568) data 0.000 (0.002) loss 0.9502 (1.1054) acc 71.8750 (72.4750) lr 5.7422e-04 eta 7:24:08 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,364 +* accuracy: 78.7% +* error: 21.3% +* macro_f1: 78.3% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [34/50] batch [5/1000] time 1.551 (1.691) data 0.000 (0.184) loss 0.8276 (1.0518) acc 71.8750 (72.5000) lr 5.7422e-04 eta 7:58:59 +epoch [34/50] batch [10/1000] time 1.559 (1.628) data 0.000 (0.092) loss 1.1660 (1.0277) acc 68.7500 (72.8125) lr 5.7422e-04 eta 7:40:56 +epoch [34/50] batch [15/1000] time 1.552 (1.610) data 0.000 (0.062) loss 0.8013 (1.0152) acc 75.0000 (73.1250) lr 5.7422e-04 eta 7:35:41 +epoch [34/50] batch [20/1000] time 1.547 (1.594) data 0.001 (0.046) loss 1.6523 (0.9846) acc 62.5000 (73.9062) lr 5.7422e-04 eta 7:31:00 +epoch [34/50] batch [25/1000] time 1.536 (1.589) data 0.001 (0.037) loss 0.7646 (1.0036) acc 84.3750 (74.0000) lr 5.7422e-04 eta 7:29:37 +epoch [34/50] batch [30/1000] time 1.579 (1.585) data 0.001 (0.031) loss 1.6094 (1.0511) acc 68.7500 (73.1250) lr 5.7422e-04 eta 7:28:19 +epoch [34/50] batch [35/1000] time 1.542 (1.582) data 0.000 (0.027) loss 0.6377 (1.0882) acc 84.3750 (72.5000) lr 5.7422e-04 eta 7:27:17 +epoch [34/50] batch [40/1000] time 1.547 (1.579) data 0.000 (0.023) loss 1.0566 (1.0711) acc 75.0000 (73.2031) lr 5.7422e-04 eta 7:26:24 +epoch [34/50] batch [45/1000] time 1.573 (1.577) data 0.001 (0.021) loss 1.0918 (1.0567) acc 84.3750 (74.0278) lr 5.7422e-04 eta 7:25:41 +epoch [34/50] batch [50/1000] time 1.570 (1.576) data 0.001 (0.019) loss 0.6592 (1.0722) acc 81.2500 (73.6875) lr 5.7422e-04 eta 7:25:19 +epoch [34/50] batch [55/1000] time 1.591 (1.575) data 0.000 (0.017) loss 0.7539 (1.0553) acc 84.3750 (73.8068) lr 5.7422e-04 eta 7:24:56 +epoch [34/50] batch [60/1000] time 1.845 (1.580) data 0.000 (0.016) loss 1.2021 (1.0605) acc 68.7500 (73.9583) lr 5.7422e-04 eta 7:26:02 +epoch [34/50] batch [65/1000] time 1.544 (1.579) data 0.000 (0.015) loss 0.8374 (1.0558) acc 78.1250 (73.7981) lr 5.7422e-04 eta 7:25:42 +epoch [34/50] batch [70/1000] time 1.576 (1.579) data 0.000 (0.014) loss 0.8413 (1.0571) acc 71.8750 (73.6161) lr 5.7422e-04 eta 7:25:24 +epoch [34/50] batch [75/1000] time 1.557 (1.578) data 0.000 (0.013) loss 0.9395 (1.0541) acc 84.3750 (73.6250) lr 5.7422e-04 eta 7:24:59 +epoch [34/50] batch [80/1000] time 1.579 (1.577) data 0.000 (0.012) loss 0.5928 (1.0645) acc 87.5000 (73.3984) lr 5.7422e-04 eta 7:24:44 +epoch [34/50] batch [85/1000] time 1.575 (1.577) data 0.001 (0.011) loss 1.2529 (1.0723) acc 81.2500 (73.2353) lr 5.7422e-04 eta 7:24:32 +epoch [34/50] batch [90/1000] time 1.574 (1.576) data 0.000 (0.011) loss 1.0127 (1.0695) acc 87.5000 (73.4028) lr 5.7422e-04 eta 7:24:05 +epoch [34/50] batch [95/1000] time 1.534 (1.574) data 0.000 (0.010) loss 0.6294 (1.0586) acc 87.5000 (73.7171) lr 5.7422e-04 eta 7:23:33 +epoch [34/50] batch [100/1000] time 1.549 (1.573) data 0.000 (0.010) loss 1.2852 (1.0560) acc 78.1250 (73.7188) lr 5.7422e-04 eta 7:23:06 +epoch [34/50] batch [105/1000] time 1.561 (1.573) data 0.001 (0.009) loss 0.7690 (1.0528) acc 87.5000 (73.7202) lr 5.7422e-04 eta 7:22:50 +epoch [34/50] batch [110/1000] time 1.568 (1.574) data 0.001 (0.009) loss 0.8218 (1.0588) acc 75.0000 (73.6648) lr 5.7422e-04 eta 7:23:06 +epoch [34/50] batch [115/1000] time 1.546 (1.574) data 0.000 (0.008) loss 0.6973 (1.0618) acc 78.1250 (73.4511) lr 5.7422e-04 eta 7:22:59 +epoch [34/50] batch [120/1000] time 1.573 (1.573) data 0.000 (0.008) loss 1.0332 (1.0595) acc 75.0000 (73.3333) lr 5.7422e-04 eta 7:22:35 +epoch [34/50] batch [125/1000] time 1.549 (1.573) data 0.001 (0.008) loss 0.8389 (1.0546) acc 84.3750 (73.5250) lr 5.7422e-04 eta 7:22:18 +epoch [34/50] batch [130/1000] time 1.577 (1.572) data 0.000 (0.008) loss 1.2881 (1.0602) acc 71.8750 (73.4615) lr 5.7422e-04 eta 7:22:04 +epoch [34/50] batch [135/1000] time 1.572 (1.572) data 0.000 (0.007) loss 0.7881 (1.0643) acc 84.3750 (73.3333) lr 5.7422e-04 eta 7:21:55 +epoch [34/50] batch [140/1000] time 1.569 (1.572) data 0.000 (0.007) loss 1.1670 (1.0751) acc 71.8750 (73.2589) lr 5.7422e-04 eta 7:21:51 +epoch [34/50] batch [145/1000] time 1.567 (1.572) data 0.000 (0.007) loss 0.9199 (1.0740) acc 75.0000 (73.2974) lr 5.7422e-04 eta 7:21:42 +epoch [34/50] batch [150/1000] time 1.552 (1.572) data 0.001 (0.007) loss 0.8247 (1.0746) acc 81.2500 (73.3542) lr 5.7422e-04 eta 7:21:31 +epoch [34/50] batch [155/1000] time 1.582 (1.572) data 0.001 (0.006) loss 0.9399 (1.0745) acc 81.2500 (73.3266) lr 5.7422e-04 eta 7:21:20 +epoch [34/50] batch [160/1000] time 1.561 (1.572) data 0.000 (0.006) loss 0.7798 (1.0735) acc 75.0000 (73.3789) lr 5.7422e-04 eta 7:21:10 +epoch [34/50] batch [165/1000] time 1.545 (1.571) data 0.001 (0.006) loss 0.8569 (1.0701) acc 81.2500 (73.5417) lr 5.7422e-04 eta 7:20:54 +epoch [34/50] batch [170/1000] time 1.558 (1.571) data 0.000 (0.006) loss 1.0020 (1.0681) acc 78.1250 (73.6029) lr 5.7422e-04 eta 7:20:38 +epoch [34/50] batch [175/1000] time 1.560 (1.571) data 0.001 (0.006) loss 0.9429 (1.0710) acc 71.8750 (73.4643) lr 5.7422e-04 eta 7:20:27 +epoch [34/50] batch [180/1000] time 1.551 (1.570) data 0.001 (0.006) loss 1.5479 (1.0710) acc 59.3750 (73.4896) lr 5.7422e-04 eta 7:20:12 +epoch [34/50] batch [185/1000] time 1.544 (1.570) data 0.001 (0.005) loss 1.3096 (1.0682) acc 68.7500 (73.4966) lr 5.7422e-04 eta 7:19:55 +epoch [34/50] batch [190/1000] time 1.598 (1.570) data 0.000 (0.005) loss 0.7295 (1.0684) acc 84.3750 (73.5362) lr 5.7422e-04 eta 7:19:48 +epoch [34/50] batch [195/1000] time 1.570 (1.570) data 0.000 (0.005) loss 1.1982 (1.0651) acc 71.8750 (73.6058) lr 5.7422e-04 eta 7:19:41 +epoch [34/50] batch [200/1000] time 1.545 (1.570) data 0.000 (0.005) loss 1.0889 (1.0720) acc 68.7500 (73.4219) lr 5.7422e-04 eta 7:19:32 +epoch [34/50] batch [205/1000] time 1.589 (1.570) data 0.000 (0.005) loss 1.1279 (1.0700) acc 71.8750 (73.4756) lr 5.7422e-04 eta 7:19:28 +epoch [34/50] batch [210/1000] time 1.575 (1.570) data 0.001 (0.005) loss 1.1191 (1.0716) acc 68.7500 (73.4226) lr 5.7422e-04 eta 7:19:19 +epoch [34/50] batch [215/1000] time 1.555 (1.570) data 0.001 (0.005) loss 0.6860 (1.0718) acc 90.6250 (73.4448) lr 5.7422e-04 eta 7:19:20 +epoch [34/50] batch [220/1000] time 1.548 (1.570) data 0.000 (0.005) loss 0.8335 (1.0749) acc 78.1250 (73.3807) lr 5.7422e-04 eta 7:19:08 +epoch [34/50] batch [225/1000] time 1.576 (1.570) data 0.000 (0.005) loss 1.1465 (1.0741) acc 59.3750 (73.3194) lr 5.7422e-04 eta 7:18:58 +epoch [34/50] batch [230/1000] time 1.559 (1.570) data 0.000 (0.004) loss 0.8730 (1.0729) acc 78.1250 (73.3288) lr 5.7422e-04 eta 7:18:46 +epoch [34/50] batch [235/1000] time 1.539 (1.570) data 0.000 (0.004) loss 1.9150 (1.0763) acc 59.3750 (73.3245) lr 5.7422e-04 eta 7:18:35 +epoch [34/50] batch [240/1000] time 1.540 (1.569) data 0.000 (0.004) loss 0.8403 (1.0741) acc 71.8750 (73.3594) lr 5.7422e-04 eta 7:18:22 +epoch [34/50] batch [245/1000] time 1.553 (1.569) data 0.000 (0.004) loss 0.3257 (1.0686) acc 90.6250 (73.4949) lr 5.7422e-04 eta 7:18:11 +epoch [34/50] batch [250/1000] time 1.553 (1.569) data 0.001 (0.004) loss 1.2539 (1.0714) acc 75.0000 (73.4625) lr 5.7422e-04 eta 7:18:00 +epoch [34/50] batch [255/1000] time 1.566 (1.569) data 0.000 (0.004) loss 0.8813 (1.0702) acc 68.7500 (73.3333) lr 5.7422e-04 eta 7:17:50 +epoch [34/50] batch [260/1000] time 1.583 (1.569) data 0.000 (0.004) loss 1.3340 (1.0680) acc 71.8750 (73.3894) lr 5.7422e-04 eta 7:17:53 +epoch [34/50] batch [265/1000] time 1.576 (1.570) data 0.000 (0.004) loss 1.1553 (1.0653) acc 75.0000 (73.4552) lr 5.7422e-04 eta 7:17:46 +epoch [34/50] batch [270/1000] time 1.547 (1.569) data 0.001 (0.004) loss 0.8105 (1.0637) acc 84.3750 (73.5069) lr 5.7422e-04 eta 7:17:32 +epoch [34/50] batch [275/1000] time 1.555 (1.569) data 0.000 (0.004) loss 0.9131 (1.0608) acc 75.0000 (73.5341) lr 5.7422e-04 eta 7:17:20 +epoch [34/50] batch [280/1000] time 1.561 (1.569) data 0.001 (0.004) loss 0.5840 (1.0622) acc 81.2500 (73.4598) lr 5.7422e-04 eta 7:17:07 +epoch [34/50] batch [285/1000] time 1.561 (1.569) data 0.000 (0.004) loss 0.6519 (1.0619) acc 81.2500 (73.4539) lr 5.7422e-04 eta 7:17:00 +epoch [34/50] batch [290/1000] time 1.587 (1.569) data 0.001 (0.004) loss 0.7183 (1.0618) acc 71.8750 (73.4375) lr 5.7422e-04 eta 7:16:52 +epoch [34/50] batch [295/1000] time 1.559 (1.569) data 0.000 (0.004) loss 0.8491 (1.0639) acc 81.2500 (73.4110) lr 5.7422e-04 eta 7:16:43 +epoch [34/50] batch [300/1000] time 1.535 (1.568) data 0.000 (0.004) loss 0.8174 (1.0637) acc 84.3750 (73.4375) lr 5.7422e-04 eta 7:16:33 +epoch [34/50] batch [305/1000] time 1.559 (1.568) data 0.000 (0.003) loss 0.8242 (1.0642) acc 71.8750 (73.3914) lr 5.7422e-04 eta 7:16:20 +epoch [34/50] batch [310/1000] time 1.547 (1.568) data 0.000 (0.003) loss 1.1895 (1.0661) acc 75.0000 (73.3569) lr 5.7422e-04 eta 7:16:09 +epoch [34/50] batch [315/1000] time 1.578 (1.568) data 0.000 (0.003) loss 0.9839 (1.0653) acc 65.6250 (73.2937) lr 5.7422e-04 eta 7:16:01 +epoch [34/50] batch [320/1000] time 1.572 (1.568) data 0.000 (0.003) loss 1.1670 (1.0648) acc 65.6250 (73.3105) lr 5.7422e-04 eta 7:15:52 +epoch [34/50] batch [325/1000] time 1.554 (1.568) data 0.000 (0.003) loss 1.5371 (1.0677) acc 68.7500 (73.3173) lr 5.7422e-04 eta 7:15:41 +epoch [34/50] batch [330/1000] time 1.562 (1.568) data 0.000 (0.003) loss 0.8599 (1.0693) acc 71.8750 (73.2386) lr 5.7422e-04 eta 7:15:30 +epoch [34/50] batch [335/1000] time 1.567 (1.567) data 0.000 (0.003) loss 1.6562 (1.0713) acc 65.6250 (73.1996) lr 5.7422e-04 eta 7:15:22 +epoch [34/50] batch [340/1000] time 1.555 (1.567) data 0.001 (0.003) loss 1.1338 (1.0743) acc 71.8750 (73.1710) lr 5.7422e-04 eta 7:15:14 +epoch [34/50] batch [345/1000] time 1.564 (1.567) data 0.001 (0.003) loss 1.0596 (1.0726) acc 81.2500 (73.2428) lr 5.7422e-04 eta 7:15:03 +epoch [34/50] batch [350/1000] time 1.541 (1.567) data 0.000 (0.003) loss 1.1201 (1.0744) acc 75.0000 (73.2054) lr 5.7422e-04 eta 7:14:53 +epoch [34/50] batch [355/1000] time 1.566 (1.567) data 0.000 (0.003) loss 1.9268 (1.0765) acc 62.5000 (73.1690) lr 5.7422e-04 eta 7:14:43 +epoch [34/50] batch [360/1000] time 1.560 (1.567) data 0.000 (0.003) loss 1.1797 (1.0787) acc 78.1250 (73.1771) lr 5.7422e-04 eta 7:14:34 +epoch [34/50] batch [365/1000] time 1.566 (1.568) data 0.000 (0.003) loss 1.6504 (1.0806) acc 62.5000 (73.0993) lr 5.7422e-04 eta 7:14:36 +epoch [34/50] batch [370/1000] time 1.562 (1.568) data 0.000 (0.003) loss 1.0781 (1.0825) acc 81.2500 (73.0574) lr 5.7422e-04 eta 7:14:29 +epoch [34/50] batch [375/1000] time 1.568 (1.568) data 0.001 (0.003) loss 1.1348 (1.0842) acc 78.1250 (73.0750) lr 5.7422e-04 eta 7:14:21 +epoch [34/50] batch [380/1000] time 1.586 (1.568) data 0.001 (0.003) loss 1.4902 (1.0832) acc 68.7500 (73.0757) lr 5.7422e-04 eta 7:14:17 +epoch [34/50] batch [385/1000] time 1.576 (1.568) data 0.000 (0.003) loss 0.6665 (1.0840) acc 87.5000 (73.0763) lr 5.7422e-04 eta 7:14:07 +epoch [34/50] batch [390/1000] time 1.570 (1.568) data 0.001 (0.003) loss 1.1885 (1.0858) acc 68.7500 (73.0449) lr 5.7422e-04 eta 7:14:00 +epoch [34/50] batch [395/1000] time 1.551 (1.568) data 0.000 (0.003) loss 0.9946 (1.0862) acc 75.0000 (73.0301) lr 5.7422e-04 eta 7:13:52 +epoch [34/50] batch [400/1000] time 1.568 (1.568) data 0.000 (0.003) loss 0.4734 (1.0839) acc 84.3750 (73.0625) lr 5.7422e-04 eta 7:13:43 +epoch [34/50] batch [405/1000] time 1.590 (1.568) data 0.001 (0.003) loss 1.1309 (1.0848) acc 71.8750 (73.0324) lr 5.7422e-04 eta 7:13:38 +epoch [34/50] batch [410/1000] time 1.530 (1.568) data 0.000 (0.003) loss 0.9526 (1.0869) acc 62.5000 (72.9726) lr 5.7422e-04 eta 7:13:34 +epoch [34/50] batch [415/1000] time 1.584 (1.568) data 0.000 (0.003) loss 1.1211 (1.0873) acc 68.7500 (72.9443) lr 5.7422e-04 eta 7:13:25 +epoch [34/50] batch [420/1000] time 1.563 (1.568) data 0.000 (0.003) loss 1.0010 (1.0864) acc 75.0000 (72.9315) lr 5.7422e-04 eta 7:13:17 +epoch [34/50] batch [425/1000] time 1.566 (1.568) data 0.000 (0.003) loss 0.6768 (1.0858) acc 78.1250 (72.9191) lr 5.7422e-04 eta 7:13:07 +epoch [34/50] batch [430/1000] time 1.569 (1.568) data 0.000 (0.003) loss 1.1162 (1.0833) acc 62.5000 (72.9651) lr 5.7422e-04 eta 7:12:59 +epoch [34/50] batch [435/1000] time 1.573 (1.568) data 0.000 (0.003) loss 1.6367 (1.0830) acc 65.6250 (72.9526) lr 5.7422e-04 eta 7:12:47 +epoch [34/50] batch [440/1000] time 1.562 (1.567) data 0.000 (0.003) loss 1.0586 (1.0830) acc 62.5000 (72.9616) lr 5.7422e-04 eta 7:12:36 +epoch [34/50] batch [445/1000] time 1.572 (1.567) data 0.000 (0.003) loss 1.2568 (1.0846) acc 68.7500 (72.9213) lr 5.7422e-04 eta 7:12:29 +epoch [34/50] batch [450/1000] time 1.578 (1.568) data 0.000 (0.002) loss 0.9473 (1.0856) acc 71.8750 (72.8819) lr 5.7422e-04 eta 7:12:26 +epoch [34/50] batch [455/1000] time 1.558 (1.568) data 0.001 (0.002) loss 0.9644 (1.0845) acc 78.1250 (72.9258) lr 5.7422e-04 eta 7:12:16 +epoch [34/50] batch [460/1000] time 1.551 (1.567) data 0.000 (0.002) loss 0.9814 (1.0844) acc 75.0000 (72.9076) lr 5.7422e-04 eta 7:12:06 +epoch [34/50] batch [465/1000] time 1.573 (1.567) data 0.001 (0.002) loss 0.9194 (1.0855) acc 75.0000 (72.8495) lr 5.7422e-04 eta 7:11:56 +epoch [34/50] batch [470/1000] time 1.555 (1.567) data 0.000 (0.002) loss 0.3586 (1.0843) acc 90.6250 (72.8790) lr 5.7422e-04 eta 7:11:46 +epoch [34/50] batch [475/1000] time 1.575 (1.567) data 0.000 (0.002) loss 0.9487 (1.0847) acc 81.2500 (72.8684) lr 5.7422e-04 eta 7:11:39 +epoch [34/50] batch [480/1000] time 1.547 (1.567) data 0.000 (0.002) loss 1.1768 (1.0853) acc 78.1250 (72.8581) lr 5.7422e-04 eta 7:11:30 +epoch [34/50] batch [485/1000] time 1.585 (1.567) data 0.000 (0.002) loss 0.9575 (1.0852) acc 78.1250 (72.8544) lr 5.7422e-04 eta 7:11:21 +epoch [34/50] batch [490/1000] time 1.579 (1.567) data 0.000 (0.002) loss 1.3643 (1.0853) acc 71.8750 (72.8444) lr 5.7422e-04 eta 7:11:12 +epoch [34/50] batch [495/1000] time 1.579 (1.567) data 0.000 (0.002) loss 1.3887 (1.0881) acc 62.5000 (72.7967) lr 5.7422e-04 eta 7:11:03 +epoch [34/50] batch [500/1000] time 1.558 (1.567) data 0.001 (0.002) loss 0.9009 (1.0860) acc 78.1250 (72.8312) lr 5.7422e-04 eta 7:10:56 +epoch [34/50] batch [505/1000] time 1.574 (1.567) data 0.000 (0.002) loss 1.2666 (1.0890) acc 68.7500 (72.7661) lr 5.7422e-04 eta 7:10:46 +epoch [34/50] batch [510/1000] time 1.565 (1.567) data 0.000 (0.002) loss 0.7031 (1.0883) acc 78.1250 (72.7635) lr 5.7422e-04 eta 7:10:37 +epoch [34/50] batch [515/1000] time 1.562 (1.567) data 0.000 (0.002) loss 1.2930 (1.0886) acc 68.7500 (72.7427) lr 5.7422e-04 eta 7:10:34 +epoch [34/50] batch [520/1000] time 1.561 (1.567) data 0.000 (0.002) loss 0.9111 (1.0910) acc 75.0000 (72.7284) lr 5.7422e-04 eta 7:10:27 +epoch [34/50] batch [525/1000] time 1.569 (1.567) data 0.000 (0.002) loss 0.7979 (1.0909) acc 84.3750 (72.7321) lr 5.7422e-04 eta 7:10:18 +epoch [34/50] batch [530/1000] time 1.559 (1.567) data 0.001 (0.002) loss 1.0967 (1.0906) acc 71.8750 (72.7476) lr 5.7422e-04 eta 7:10:10 +epoch [34/50] batch [535/1000] time 1.557 (1.567) data 0.001 (0.002) loss 0.8130 (1.0906) acc 81.2500 (72.7512) lr 5.7422e-04 eta 7:10:02 +epoch [34/50] batch [540/1000] time 1.574 (1.567) data 0.001 (0.002) loss 1.1055 (1.0896) acc 68.7500 (72.7836) lr 5.7422e-04 eta 7:09:55 +epoch [34/50] batch [545/1000] time 1.559 (1.567) data 0.000 (0.002) loss 0.9556 (1.0883) acc 81.2500 (72.8612) lr 5.7422e-04 eta 7:09:45 +epoch [34/50] batch [550/1000] time 1.581 (1.567) data 0.000 (0.002) loss 1.0371 (1.0910) acc 68.7500 (72.8636) lr 5.7422e-04 eta 7:09:36 +epoch [34/50] batch [555/1000] time 1.553 (1.567) data 0.001 (0.002) loss 1.1885 (1.0916) acc 71.8750 (72.8660) lr 5.7422e-04 eta 7:09:28 +epoch [34/50] batch [560/1000] time 1.607 (1.567) data 0.001 (0.002) loss 1.3467 (1.0922) acc 62.5000 (72.8516) lr 5.7422e-04 eta 7:09:26 +epoch [34/50] batch [565/1000] time 1.551 (1.567) data 0.000 (0.002) loss 0.9707 (1.0935) acc 81.2500 (72.8595) lr 5.7422e-04 eta 7:09:18 +epoch [34/50] batch [570/1000] time 1.552 (1.567) data 0.001 (0.002) loss 1.2920 (1.0929) acc 75.0000 (72.8673) lr 5.7422e-04 eta 7:09:10 +epoch [34/50] batch [575/1000] time 1.559 (1.567) data 0.000 (0.002) loss 0.7739 (1.0908) acc 87.5000 (72.9185) lr 5.7422e-04 eta 7:09:01 +epoch [34/50] batch [580/1000] time 1.546 (1.567) data 0.000 (0.002) loss 1.1260 (1.0914) acc 65.6250 (72.8933) lr 5.7422e-04 eta 7:08:51 +epoch [34/50] batch [585/1000] time 1.555 (1.567) data 0.000 (0.002) loss 0.9858 (1.0903) acc 78.1250 (72.8900) lr 5.7422e-04 eta 7:08:44 +epoch [34/50] batch [590/1000] time 1.548 (1.567) data 0.001 (0.002) loss 1.0439 (1.0896) acc 78.1250 (72.9078) lr 5.7422e-04 eta 7:08:34 +epoch [34/50] batch [595/1000] time 1.562 (1.567) data 0.001 (0.002) loss 1.2812 (1.0897) acc 68.7500 (72.9202) lr 5.7422e-04 eta 7:08:28 +epoch [34/50] batch [600/1000] time 1.752 (1.567) data 0.001 (0.002) loss 1.1113 (1.0897) acc 75.0000 (72.9062) lr 5.7422e-04 eta 7:08:25 +epoch [34/50] batch [605/1000] time 1.556 (1.567) data 0.001 (0.002) loss 0.9897 (1.0895) acc 65.6250 (72.8874) lr 5.7422e-04 eta 7:08:17 +epoch [34/50] batch [610/1000] time 1.586 (1.567) data 0.000 (0.002) loss 1.2109 (1.0876) acc 68.7500 (72.9201) lr 5.7422e-04 eta 7:08:09 +epoch [34/50] batch [615/1000] time 1.553 (1.567) data 0.000 (0.002) loss 0.9189 (1.0880) acc 78.1250 (72.9217) lr 5.7422e-04 eta 7:08:01 +epoch [34/50] batch [620/1000] time 1.553 (1.567) data 0.000 (0.002) loss 0.4580 (1.0860) acc 84.3750 (72.9637) lr 5.7422e-04 eta 7:07:51 +epoch [34/50] batch [625/1000] time 1.558 (1.567) data 0.004 (0.002) loss 0.7944 (1.0857) acc 75.0000 (72.9650) lr 5.7422e-04 eta 7:07:42 +epoch [34/50] batch [630/1000] time 1.554 (1.567) data 0.001 (0.002) loss 0.7666 (1.0851) acc 81.2500 (72.9911) lr 5.7422e-04 eta 7:07:33 +epoch [34/50] batch [635/1000] time 1.569 (1.567) data 0.001 (0.002) loss 0.9746 (1.0856) acc 68.7500 (72.9380) lr 5.7422e-04 eta 7:07:25 +epoch [34/50] batch [640/1000] time 1.598 (1.567) data 0.000 (0.002) loss 0.9854 (1.0878) acc 71.8750 (72.8955) lr 5.7422e-04 eta 7:07:17 +epoch [34/50] batch [645/1000] time 1.551 (1.567) data 0.000 (0.002) loss 1.1846 (1.0895) acc 68.7500 (72.8391) lr 5.7422e-04 eta 7:07:08 +epoch [34/50] batch [650/1000] time 1.566 (1.567) data 0.000 (0.002) loss 0.9927 (1.0894) acc 78.1250 (72.8558) lr 5.7422e-04 eta 7:07:00 +epoch [34/50] batch [655/1000] time 1.566 (1.567) data 0.001 (0.002) loss 1.3281 (1.0900) acc 71.8750 (72.8483) lr 5.7422e-04 eta 7:06:51 +epoch [34/50] batch [660/1000] time 1.569 (1.567) data 0.001 (0.002) loss 0.7905 (1.0912) acc 81.2500 (72.8551) lr 5.7422e-04 eta 7:06:43 +epoch [34/50] batch [665/1000] time 1.551 (1.567) data 0.000 (0.002) loss 1.3115 (1.0914) acc 62.5000 (72.8195) lr 5.7422e-04 eta 7:06:38 +epoch [34/50] batch [670/1000] time 1.574 (1.567) data 0.001 (0.002) loss 1.5186 (1.0907) acc 59.3750 (72.8265) lr 5.7422e-04 eta 7:06:30 +epoch [34/50] batch [675/1000] time 1.545 (1.567) data 0.001 (0.002) loss 1.3672 (1.0912) acc 68.7500 (72.8472) lr 5.7422e-04 eta 7:06:23 +epoch [34/50] batch [680/1000] time 1.575 (1.567) data 0.000 (0.002) loss 0.4180 (1.0905) acc 87.5000 (72.8860) lr 5.7422e-04 eta 7:06:14 +epoch [34/50] batch [685/1000] time 1.556 (1.567) data 0.000 (0.002) loss 0.8398 (1.0907) acc 71.8750 (72.8786) lr 5.7422e-04 eta 7:06:06 +epoch [34/50] batch [690/1000] time 1.565 (1.567) data 0.000 (0.002) loss 1.6982 (1.0924) acc 59.3750 (72.8397) lr 5.7422e-04 eta 7:05:58 +epoch [34/50] batch [695/1000] time 1.555 (1.567) data 0.000 (0.002) loss 0.9204 (1.0921) acc 78.1250 (72.8642) lr 5.7422e-04 eta 7:05:49 +epoch [34/50] batch [700/1000] time 1.567 (1.567) data 0.001 (0.002) loss 0.8613 (1.0912) acc 78.1250 (72.9018) lr 5.7422e-04 eta 7:05:41 +epoch [34/50] batch [705/1000] time 1.577 (1.567) data 0.001 (0.002) loss 0.9927 (1.0905) acc 75.0000 (72.9078) lr 5.7422e-04 eta 7:05:33 +epoch [34/50] batch [710/1000] time 1.557 (1.567) data 0.001 (0.002) loss 1.2168 (1.0898) acc 68.7500 (72.9313) lr 5.7422e-04 eta 7:05:29 +epoch [34/50] batch [715/1000] time 1.532 (1.567) data 0.000 (0.002) loss 0.9502 (1.0887) acc 78.1250 (72.9371) lr 5.7422e-04 eta 7:05:20 +epoch [34/50] batch [720/1000] time 1.558 (1.567) data 0.001 (0.002) loss 1.1797 (1.0891) acc 68.7500 (72.9253) lr 5.7422e-04 eta 7:05:12 +epoch [34/50] batch [725/1000] time 1.588 (1.567) data 0.001 (0.002) loss 1.3545 (1.0897) acc 59.3750 (72.9095) lr 5.7422e-04 eta 7:05:04 +epoch [34/50] batch [730/1000] time 1.570 (1.567) data 0.001 (0.002) loss 0.7817 (1.0899) acc 84.3750 (72.9195) lr 5.7422e-04 eta 7:04:56 +epoch [34/50] batch [735/1000] time 1.557 (1.567) data 0.001 (0.002) loss 0.6841 (1.0901) acc 75.0000 (72.9124) lr 5.7422e-04 eta 7:04:46 +epoch [34/50] batch [740/1000] time 1.574 (1.567) data 0.001 (0.002) loss 1.1484 (1.0904) acc 78.1250 (72.9096) lr 5.7422e-04 eta 7:04:38 +epoch [34/50] batch [745/1000] time 1.574 (1.567) data 0.001 (0.002) loss 1.5332 (1.0909) acc 59.3750 (72.8985) lr 5.7422e-04 eta 7:04:30 +epoch [34/50] batch [750/1000] time 1.578 (1.567) data 0.001 (0.002) loss 0.7256 (1.0904) acc 87.5000 (72.9042) lr 5.7422e-04 eta 7:04:23 +epoch [34/50] batch [755/1000] time 1.579 (1.567) data 0.001 (0.002) loss 1.4805 (1.0923) acc 59.3750 (72.8435) lr 5.7422e-04 eta 7:04:18 +epoch [34/50] batch [760/1000] time 1.588 (1.567) data 0.000 (0.002) loss 0.7744 (1.0924) acc 75.0000 (72.8372) lr 5.7422e-04 eta 7:04:11 +epoch [34/50] batch [765/1000] time 1.558 (1.567) data 0.001 (0.002) loss 1.2109 (1.0920) acc 65.6250 (72.8472) lr 5.7422e-04 eta 7:04:03 +epoch [34/50] batch [770/1000] time 1.548 (1.567) data 0.000 (0.002) loss 0.8267 (1.0916) acc 68.7500 (72.8369) lr 5.7422e-04 eta 7:03:53 +epoch [34/50] batch [775/1000] time 1.586 (1.567) data 0.001 (0.002) loss 0.7598 (1.0909) acc 71.8750 (72.8226) lr 5.7422e-04 eta 7:03:47 +epoch [34/50] batch [780/1000] time 1.576 (1.567) data 0.001 (0.002) loss 0.8091 (1.0901) acc 75.0000 (72.8446) lr 5.7422e-04 eta 7:03:38 +epoch [34/50] batch [785/1000] time 1.562 (1.567) data 0.000 (0.002) loss 1.6631 (1.0916) acc 62.5000 (72.8105) lr 5.7422e-04 eta 7:03:30 +epoch [34/50] batch [790/1000] time 1.571 (1.567) data 0.000 (0.002) loss 0.9219 (1.0910) acc 75.0000 (72.8125) lr 5.7422e-04 eta 7:03:22 +epoch [34/50] batch [795/1000] time 1.559 (1.567) data 0.001 (0.002) loss 0.8247 (1.0918) acc 78.1250 (72.8184) lr 5.7422e-04 eta 7:03:14 +epoch [34/50] batch [800/1000] time 1.555 (1.567) data 0.000 (0.002) loss 1.1621 (1.0926) acc 68.7500 (72.8125) lr 5.7422e-04 eta 7:03:04 +epoch [34/50] batch [805/1000] time 1.556 (1.567) data 0.001 (0.002) loss 0.7324 (1.0921) acc 81.2500 (72.8222) lr 5.7422e-04 eta 7:02:56 +epoch [34/50] batch [810/1000] time 1.548 (1.567) data 0.000 (0.002) loss 1.2949 (1.0926) acc 65.6250 (72.8125) lr 5.7422e-04 eta 7:02:47 +epoch [34/50] batch [815/1000] time 1.577 (1.567) data 0.001 (0.002) loss 1.5791 (1.0922) acc 71.8750 (72.8106) lr 5.7422e-04 eta 7:02:42 +epoch [34/50] batch [820/1000] time 1.575 (1.567) data 0.001 (0.002) loss 1.4189 (1.0934) acc 59.3750 (72.7973) lr 5.7422e-04 eta 7:02:35 +epoch [34/50] batch [825/1000] time 1.554 (1.567) data 0.001 (0.002) loss 0.6675 (1.0918) acc 81.2500 (72.8220) lr 5.7422e-04 eta 7:02:27 +epoch [34/50] batch [830/1000] time 1.552 (1.567) data 0.000 (0.002) loss 1.7129 (1.0931) acc 53.1250 (72.7899) lr 5.7422e-04 eta 7:02:18 +epoch [34/50] batch [835/1000] time 1.553 (1.567) data 0.001 (0.002) loss 0.5210 (1.0941) acc 84.3750 (72.7769) lr 5.7422e-04 eta 7:02:11 +epoch [34/50] batch [840/1000] time 1.564 (1.567) data 0.001 (0.002) loss 1.4658 (1.0949) acc 56.2500 (72.7344) lr 5.7422e-04 eta 7:02:02 +epoch [34/50] batch [845/1000] time 1.573 (1.567) data 0.000 (0.002) loss 1.6484 (1.0947) acc 71.8750 (72.7478) lr 5.7422e-04 eta 7:01:53 +epoch [34/50] batch [850/1000] time 1.551 (1.567) data 0.001 (0.002) loss 1.9170 (1.0955) acc 50.0000 (72.7279) lr 5.7422e-04 eta 7:01:43 +epoch [34/50] batch [855/1000] time 1.557 (1.567) data 0.000 (0.002) loss 0.9448 (1.0942) acc 81.2500 (72.7595) lr 5.7422e-04 eta 7:01:35 +epoch [34/50] batch [860/1000] time 1.550 (1.567) data 0.001 (0.002) loss 1.4375 (1.0965) acc 68.7500 (72.7071) lr 5.7422e-04 eta 7:01:28 +epoch [34/50] batch [865/1000] time 1.569 (1.567) data 0.001 (0.002) loss 1.2852 (1.0960) acc 65.6250 (72.7132) lr 5.7422e-04 eta 7:01:21 +epoch [34/50] batch [870/1000] time 1.583 (1.567) data 0.000 (0.002) loss 0.7896 (1.0965) acc 84.3750 (72.7047) lr 5.7422e-04 eta 7:01:13 +epoch [34/50] batch [875/1000] time 1.551 (1.567) data 0.000 (0.002) loss 1.0762 (1.0969) acc 65.6250 (72.6821) lr 5.7422e-04 eta 7:01:05 +epoch [34/50] batch [880/1000] time 1.573 (1.567) data 0.001 (0.002) loss 1.4600 (1.0983) acc 62.5000 (72.6314) lr 5.7422e-04 eta 7:00:57 +epoch [34/50] batch [885/1000] time 1.544 (1.567) data 0.001 (0.002) loss 1.7930 (1.0989) acc 56.2500 (72.6201) lr 5.7422e-04 eta 7:00:48 +epoch [34/50] batch [890/1000] time 1.547 (1.567) data 0.000 (0.002) loss 1.5146 (1.0996) acc 78.1250 (72.6088) lr 5.7422e-04 eta 7:00:39 +epoch [34/50] batch [895/1000] time 1.575 (1.567) data 0.000 (0.001) loss 1.0967 (1.0991) acc 68.7500 (72.6292) lr 5.7422e-04 eta 7:00:31 +epoch [34/50] batch [900/1000] time 1.532 (1.567) data 0.000 (0.001) loss 1.4814 (1.0988) acc 62.5000 (72.6181) lr 5.7422e-04 eta 7:00:22 +epoch [34/50] batch [905/1000] time 1.568 (1.567) data 0.000 (0.001) loss 0.9893 (1.0986) acc 68.7500 (72.6070) lr 5.7422e-04 eta 7:00:16 +epoch [34/50] batch [910/1000] time 1.558 (1.567) data 0.001 (0.001) loss 1.2695 (1.0983) acc 71.8750 (72.6168) lr 5.7422e-04 eta 7:00:08 +epoch [34/50] batch [915/1000] time 1.555 (1.567) data 0.000 (0.001) loss 0.9497 (1.0971) acc 71.8750 (72.6264) lr 5.7422e-04 eta 6:59:59 +epoch [34/50] batch [920/1000] time 1.559 (1.567) data 0.000 (0.001) loss 0.7554 (1.0968) acc 71.8750 (72.6155) lr 5.7422e-04 eta 6:59:51 +epoch [34/50] batch [925/1000] time 1.559 (1.567) data 0.001 (0.001) loss 1.5039 (1.0974) acc 65.6250 (72.6047) lr 5.7422e-04 eta 6:59:43 +epoch [34/50] batch [930/1000] time 1.564 (1.567) data 0.001 (0.001) loss 1.2764 (1.0973) acc 65.6250 (72.5874) lr 5.7422e-04 eta 6:59:35 +epoch [34/50] batch [935/1000] time 1.554 (1.567) data 0.001 (0.001) loss 1.1133 (1.0990) acc 75.0000 (72.5802) lr 5.7422e-04 eta 6:59:27 +epoch [34/50] batch [940/1000] time 1.558 (1.567) data 0.000 (0.001) loss 1.0479 (1.0990) acc 71.8750 (72.5598) lr 5.7422e-04 eta 6:59:19 +epoch [34/50] batch [945/1000] time 1.588 (1.567) data 0.001 (0.001) loss 1.2031 (1.0982) acc 56.2500 (72.5661) lr 5.7422e-04 eta 6:59:12 +epoch [34/50] batch [950/1000] time 1.560 (1.567) data 0.001 (0.001) loss 0.4988 (1.0980) acc 78.1250 (72.5658) lr 5.7422e-04 eta 6:59:04 +epoch [34/50] batch [955/1000] time 1.572 (1.567) data 0.000 (0.001) loss 1.2637 (1.0984) acc 68.7500 (72.5589) lr 5.7422e-04 eta 6:58:57 +epoch [34/50] batch [960/1000] time 1.558 (1.567) data 0.000 (0.001) loss 1.1963 (1.0987) acc 71.8750 (72.5716) lr 5.7422e-04 eta 6:58:48 +epoch [34/50] batch [965/1000] time 1.732 (1.567) data 0.000 (0.001) loss 0.8550 (1.0980) acc 87.5000 (72.6036) lr 5.7422e-04 eta 6:58:43 +epoch [34/50] batch [970/1000] time 1.561 (1.567) data 0.001 (0.001) loss 1.2119 (1.0979) acc 65.6250 (72.5902) lr 5.7422e-04 eta 6:58:34 +epoch [34/50] batch [975/1000] time 1.556 (1.567) data 0.000 (0.001) loss 0.8281 (1.0975) acc 75.0000 (72.6058) lr 5.7422e-04 eta 6:58:26 +epoch [34/50] batch [980/1000] time 1.549 (1.567) data 0.001 (0.001) loss 1.0225 (1.0972) acc 78.1250 (72.6084) lr 5.7422e-04 eta 6:58:17 +epoch [34/50] batch [985/1000] time 1.548 (1.567) data 0.001 (0.001) loss 0.9824 (1.0964) acc 81.2500 (72.6396) lr 5.7422e-04 eta 6:58:09 +epoch [34/50] batch [990/1000] time 1.557 (1.567) data 0.001 (0.001) loss 0.8320 (1.0976) acc 75.0000 (72.6168) lr 5.7422e-04 eta 6:58:00 +epoch [34/50] batch [995/1000] time 1.581 (1.567) data 0.000 (0.001) loss 1.0020 (1.0975) acc 68.7500 (72.6005) lr 5.7422e-04 eta 6:57:52 +epoch [34/50] batch [1000/1000] time 1.554 (1.566) data 0.000 (0.001) loss 0.7886 (1.0973) acc 78.1250 (72.5938) lr 5.1825e-04 eta 6:57:43 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,326 +* accuracy: 78.7% +* error: 21.3% +* macro_f1: 78.2% +epoch [35/50] batch [5/1000] time 1.568 (1.703) data 0.000 (0.191) loss 1.0830 (1.0053) acc 71.8750 (72.5000) lr 5.1825e-04 eta 7:33:57 +epoch [35/50] batch [10/1000] time 1.550 (1.624) data 0.000 (0.096) loss 0.8267 (0.9676) acc 75.0000 (73.7500) lr 5.1825e-04 eta 7:12:41 +epoch [35/50] batch [15/1000] time 1.581 (1.605) data 0.000 (0.064) loss 0.6294 (1.0411) acc 81.2500 (71.8750) lr 5.1825e-04 eta 7:07:28 +epoch [35/50] batch [20/1000] time 1.551 (1.593) data 0.000 (0.048) loss 0.9419 (1.0371) acc 81.2500 (72.1875) lr 5.1825e-04 eta 7:04:14 +epoch [35/50] batch [25/1000] time 1.566 (1.585) data 0.000 (0.038) loss 1.2119 (1.0445) acc 68.7500 (72.7500) lr 5.1825e-04 eta 7:02:03 +epoch [35/50] batch [30/1000] time 1.561 (1.582) data 0.000 (0.032) loss 0.9766 (1.0386) acc 65.6250 (72.1875) lr 5.1825e-04 eta 7:01:01 +epoch [35/50] batch [35/1000] time 1.557 (1.580) data 0.000 (0.028) loss 0.8862 (1.0389) acc 78.1250 (72.6786) lr 5.1825e-04 eta 7:00:20 +epoch [35/50] batch [40/1000] time 1.576 (1.578) data 0.000 (0.024) loss 1.1982 (1.0492) acc 71.8750 (72.9688) lr 5.1825e-04 eta 6:59:41 +epoch [35/50] batch [45/1000] time 1.571 (1.577) data 0.000 (0.022) loss 1.0947 (1.0635) acc 75.0000 (72.7083) lr 5.1825e-04 eta 6:59:16 +epoch [35/50] batch [50/1000] time 1.564 (1.575) data 0.000 (0.019) loss 1.3584 (1.0741) acc 71.8750 (72.3125) lr 5.1825e-04 eta 6:58:43 +epoch [35/50] batch [55/1000] time 1.567 (1.574) data 0.000 (0.018) loss 0.7114 (1.0740) acc 84.3750 (72.3295) lr 5.1825e-04 eta 6:58:21 +epoch [35/50] batch [60/1000] time 1.551 (1.573) data 0.000 (0.016) loss 0.4956 (1.0586) acc 90.6250 (72.9167) lr 5.1825e-04 eta 6:57:45 +epoch [35/50] batch [65/1000] time 1.561 (1.572) data 0.000 (0.015) loss 1.6807 (1.0625) acc 59.3750 (72.8365) lr 5.1825e-04 eta 6:57:36 +epoch [35/50] batch [70/1000] time 1.583 (1.572) data 0.000 (0.014) loss 1.3428 (1.0600) acc 75.0000 (72.9911) lr 5.1825e-04 eta 6:57:19 +epoch [35/50] batch [75/1000] time 1.573 (1.572) data 0.000 (0.013) loss 1.2568 (1.0538) acc 68.7500 (73.0833) lr 5.1825e-04 eta 6:57:14 +epoch [35/50] batch [80/1000] time 1.552 (1.571) data 0.000 (0.012) loss 0.9297 (1.0407) acc 75.0000 (73.3594) lr 5.1825e-04 eta 6:56:57 +epoch [35/50] batch [85/1000] time 1.539 (1.571) data 0.000 (0.012) loss 0.9326 (1.0586) acc 65.6250 (72.7574) lr 5.1825e-04 eta 6:56:40 +epoch [35/50] batch [90/1000] time 1.541 (1.571) data 0.001 (0.011) loss 1.3115 (1.0533) acc 68.7500 (72.8125) lr 5.1825e-04 eta 6:56:28 +epoch [35/50] batch [95/1000] time 1.560 (1.570) data 0.000 (0.010) loss 2.3633 (1.0603) acc 50.0000 (72.6974) lr 5.1825e-04 eta 6:56:13 +epoch [35/50] batch [100/1000] time 1.719 (1.571) data 0.001 (0.010) loss 1.1143 (1.0639) acc 65.6250 (72.5625) lr 5.1825e-04 eta 6:56:20 +epoch [35/50] batch [105/1000] time 1.582 (1.571) data 0.001 (0.009) loss 1.1006 (1.0631) acc 75.0000 (72.4702) lr 5.1825e-04 eta 6:56:14 +epoch [35/50] batch [110/1000] time 1.560 (1.571) data 0.001 (0.009) loss 0.8643 (1.0523) acc 68.7500 (72.5000) lr 5.1825e-04 eta 6:56:04 +epoch [35/50] batch [115/1000] time 1.541 (1.570) data 0.000 (0.009) loss 0.6938 (1.0406) acc 78.1250 (72.7989) lr 5.1825e-04 eta 6:55:47 +epoch [35/50] batch [120/1000] time 1.570 (1.570) data 0.000 (0.008) loss 1.5566 (1.0450) acc 71.8750 (72.8646) lr 5.1825e-04 eta 6:55:33 +epoch [35/50] batch [125/1000] time 1.563 (1.570) data 0.001 (0.008) loss 1.1016 (1.0428) acc 62.5000 (72.8500) lr 5.1825e-04 eta 6:55:18 +epoch [35/50] batch [130/1000] time 1.592 (1.569) data 0.000 (0.008) loss 0.8213 (1.0432) acc 84.3750 (72.9327) lr 5.1825e-04 eta 6:55:04 +epoch [35/50] batch [135/1000] time 1.578 (1.570) data 0.001 (0.008) loss 0.8389 (1.0418) acc 78.1250 (72.9167) lr 5.1825e-04 eta 6:55:01 +epoch [35/50] batch [140/1000] time 1.586 (1.569) data 0.001 (0.007) loss 0.8086 (1.0440) acc 78.1250 (72.9241) lr 5.1825e-04 eta 6:54:51 +epoch [35/50] batch [145/1000] time 1.554 (1.569) data 0.001 (0.007) loss 0.8398 (1.0432) acc 68.7500 (72.8233) lr 5.1825e-04 eta 6:54:40 +epoch [35/50] batch [150/1000] time 1.550 (1.570) data 0.000 (0.007) loss 1.5928 (1.0450) acc 59.3750 (72.8958) lr 5.1825e-04 eta 6:54:48 +epoch [35/50] batch [155/1000] time 1.573 (1.570) data 0.000 (0.007) loss 1.0742 (1.0430) acc 71.8750 (72.9839) lr 5.1825e-04 eta 6:54:36 +epoch [35/50] batch [160/1000] time 1.575 (1.570) data 0.000 (0.006) loss 1.3779 (1.0435) acc 75.0000 (73.0078) lr 5.1825e-04 eta 6:54:24 +epoch [35/50] batch [165/1000] time 1.560 (1.570) data 0.001 (0.006) loss 0.9087 (1.0408) acc 78.1250 (73.1439) lr 5.1825e-04 eta 6:54:19 +epoch [35/50] batch [170/1000] time 1.571 (1.569) data 0.000 (0.006) loss 0.8091 (1.0386) acc 75.0000 (73.1434) lr 5.1825e-04 eta 6:54:03 +epoch [35/50] batch [175/1000] time 1.542 (1.569) data 0.001 (0.006) loss 0.5947 (1.0336) acc 75.0000 (73.2679) lr 5.1825e-04 eta 6:53:48 +epoch [35/50] batch [180/1000] time 1.595 (1.569) data 0.001 (0.006) loss 1.2344 (1.0339) acc 68.7500 (73.2639) lr 5.1825e-04 eta 6:53:41 +epoch [35/50] batch [185/1000] time 1.548 (1.569) data 0.000 (0.006) loss 0.8730 (1.0361) acc 81.2500 (73.1926) lr 5.1825e-04 eta 6:53:30 +epoch [35/50] batch [190/1000] time 1.563 (1.569) data 0.001 (0.005) loss 0.9072 (1.0356) acc 75.0000 (73.2566) lr 5.1825e-04 eta 6:53:19 +epoch [35/50] batch [195/1000] time 1.559 (1.568) data 0.000 (0.005) loss 2.0664 (1.0429) acc 59.3750 (73.1090) lr 5.1825e-04 eta 6:53:06 +epoch [35/50] batch [200/1000] time 1.551 (1.568) data 0.001 (0.005) loss 1.2109 (1.0460) acc 71.8750 (73.1406) lr 5.1825e-04 eta 6:52:56 +epoch [35/50] batch [205/1000] time 1.551 (1.568) data 0.001 (0.005) loss 1.1650 (1.0484) acc 71.8750 (73.1860) lr 5.1825e-04 eta 6:52:39 +epoch [35/50] batch [210/1000] time 1.574 (1.567) data 0.001 (0.005) loss 1.1387 (1.0478) acc 68.7500 (73.1250) lr 5.1825e-04 eta 6:52:27 +epoch [35/50] batch [215/1000] time 1.547 (1.567) data 0.000 (0.005) loss 1.3838 (1.0487) acc 71.8750 (73.0233) lr 5.1825e-04 eta 6:52:15 +epoch [35/50] batch [220/1000] time 1.549 (1.567) data 0.000 (0.005) loss 1.6104 (1.0557) acc 71.8750 (72.9688) lr 5.1825e-04 eta 6:52:05 +epoch [35/50] batch [225/1000] time 1.551 (1.567) data 0.000 (0.005) loss 1.2207 (1.0582) acc 65.6250 (72.8750) lr 5.1825e-04 eta 6:51:55 +epoch [35/50] batch [230/1000] time 1.548 (1.566) data 0.000 (0.005) loss 1.1064 (1.0560) acc 68.7500 (72.8261) lr 5.1825e-04 eta 6:51:42 +epoch [35/50] batch [235/1000] time 1.569 (1.566) data 0.000 (0.005) loss 0.6045 (1.0539) acc 87.5000 (72.9654) lr 5.1825e-04 eta 6:51:33 +epoch [35/50] batch [240/1000] time 1.548 (1.566) data 0.000 (0.004) loss 0.9678 (1.0550) acc 71.8750 (73.0469) lr 5.1825e-04 eta 6:51:21 +epoch [35/50] batch [245/1000] time 1.560 (1.566) data 0.001 (0.004) loss 1.3721 (1.0573) acc 62.5000 (72.9592) lr 5.1825e-04 eta 6:51:11 +epoch [35/50] batch [250/1000] time 1.708 (1.567) data 0.001 (0.004) loss 0.7686 (1.0590) acc 75.0000 (72.9500) lr 5.1825e-04 eta 6:51:12 +epoch [35/50] batch [255/1000] time 1.563 (1.566) data 0.000 (0.004) loss 1.4023 (1.0627) acc 71.8750 (72.8554) lr 5.1825e-04 eta 6:51:00 +epoch [35/50] batch [260/1000] time 1.534 (1.566) data 0.000 (0.004) loss 0.8950 (1.0618) acc 78.1250 (72.8486) lr 5.1825e-04 eta 6:50:49 +epoch [35/50] batch [265/1000] time 1.579 (1.566) data 0.001 (0.004) loss 0.5215 (1.0615) acc 84.3750 (72.8656) lr 5.1825e-04 eta 6:50:42 +epoch [35/50] batch [270/1000] time 1.561 (1.566) data 0.000 (0.004) loss 0.8750 (1.0618) acc 84.3750 (72.8356) lr 5.1825e-04 eta 6:50:35 +epoch [35/50] batch [275/1000] time 1.579 (1.566) data 0.001 (0.004) loss 1.2021 (1.0626) acc 78.1250 (72.8295) lr 5.1825e-04 eta 6:50:27 +epoch [35/50] batch [280/1000] time 1.561 (1.566) data 0.000 (0.004) loss 1.3643 (1.0613) acc 71.8750 (72.8683) lr 5.1825e-04 eta 6:50:17 +epoch [35/50] batch [285/1000] time 1.553 (1.566) data 0.000 (0.004) loss 1.8906 (1.0637) acc 53.1250 (72.8399) lr 5.1825e-04 eta 6:50:05 +epoch [35/50] batch [290/1000] time 1.541 (1.566) data 0.000 (0.004) loss 1.4658 (1.0638) acc 59.3750 (72.8125) lr 5.1825e-04 eta 6:49:55 +epoch [35/50] batch [295/1000] time 1.675 (1.566) data 0.000 (0.004) loss 0.8262 (1.0617) acc 78.1250 (72.8390) lr 5.1825e-04 eta 6:49:51 +epoch [35/50] batch [300/1000] time 1.556 (1.566) data 0.001 (0.004) loss 1.3271 (1.0641) acc 75.0000 (72.8438) lr 5.1825e-04 eta 6:49:41 +epoch [35/50] batch [305/1000] time 1.569 (1.566) data 0.001 (0.004) loss 0.9380 (1.0669) acc 75.0000 (72.7766) lr 5.1825e-04 eta 6:49:33 +epoch [35/50] batch [310/1000] time 1.555 (1.566) data 0.000 (0.004) loss 1.0791 (1.0691) acc 81.2500 (72.8125) lr 5.1825e-04 eta 6:49:22 +epoch [35/50] batch [315/1000] time 1.567 (1.565) data 0.000 (0.003) loss 1.7197 (1.0725) acc 62.5000 (72.7480) lr 5.1825e-04 eta 6:49:12 +epoch [35/50] batch [320/1000] time 1.569 (1.565) data 0.000 (0.003) loss 0.9067 (1.0735) acc 81.2500 (72.7734) lr 5.1825e-04 eta 6:49:03 +epoch [35/50] batch [325/1000] time 1.557 (1.565) data 0.000 (0.003) loss 0.8569 (1.0734) acc 68.7500 (72.7596) lr 5.1825e-04 eta 6:48:54 +epoch [35/50] batch [330/1000] time 1.532 (1.565) data 0.001 (0.003) loss 0.7725 (1.0731) acc 75.0000 (72.7557) lr 5.1825e-04 eta 6:48:45 +epoch [35/50] batch [335/1000] time 1.555 (1.565) data 0.000 (0.003) loss 1.5752 (1.0774) acc 68.7500 (72.7519) lr 5.1825e-04 eta 6:48:36 +epoch [35/50] batch [340/1000] time 1.558 (1.565) data 0.000 (0.003) loss 1.0488 (1.0751) acc 71.8750 (72.8493) lr 5.1825e-04 eta 6:48:34 +epoch [35/50] batch [345/1000] time 1.547 (1.565) data 0.000 (0.003) loss 1.3066 (1.0798) acc 65.6250 (72.7355) lr 5.1825e-04 eta 6:48:25 +epoch [35/50] batch [350/1000] time 1.570 (1.565) data 0.000 (0.003) loss 0.6704 (1.0781) acc 87.5000 (72.8125) lr 5.1825e-04 eta 6:48:18 +epoch [35/50] batch [355/1000] time 1.533 (1.565) data 0.000 (0.003) loss 0.9849 (1.0793) acc 71.8750 (72.7465) lr 5.1825e-04 eta 6:48:07 +epoch [35/50] batch [360/1000] time 1.563 (1.565) data 0.001 (0.003) loss 0.8174 (1.0786) acc 84.3750 (72.8038) lr 5.1825e-04 eta 6:47:58 +epoch [35/50] batch [365/1000] time 1.559 (1.565) data 0.000 (0.003) loss 1.5264 (1.0806) acc 65.6250 (72.7825) lr 5.1825e-04 eta 6:47:49 +epoch [35/50] batch [370/1000] time 1.563 (1.565) data 0.000 (0.003) loss 0.7944 (1.0797) acc 71.8750 (72.7787) lr 5.1825e-04 eta 6:47:42 +epoch [35/50] batch [375/1000] time 1.578 (1.565) data 0.001 (0.003) loss 1.4854 (1.0777) acc 75.0000 (72.8667) lr 5.1825e-04 eta 6:47:33 +epoch [35/50] batch [380/1000] time 1.541 (1.565) data 0.000 (0.003) loss 1.3203 (1.0793) acc 71.8750 (72.8372) lr 5.1825e-04 eta 6:47:24 +epoch [35/50] batch [385/1000] time 1.592 (1.565) data 0.000 (0.003) loss 1.2871 (1.0784) acc 65.6250 (72.8328) lr 5.1825e-04 eta 6:47:17 +epoch [35/50] batch [390/1000] time 1.565 (1.565) data 0.000 (0.003) loss 0.8066 (1.0761) acc 75.0000 (72.8846) lr 5.1825e-04 eta 6:47:08 +epoch [35/50] batch [395/1000] time 1.545 (1.565) data 0.000 (0.003) loss 1.5010 (1.0772) acc 65.6250 (72.8877) lr 5.1825e-04 eta 6:47:00 +epoch [35/50] batch [400/1000] time 1.563 (1.565) data 0.000 (0.003) loss 1.4727 (1.0770) acc 71.8750 (72.9062) lr 5.1825e-04 eta 6:46:52 +epoch [35/50] batch [405/1000] time 1.553 (1.565) data 0.000 (0.003) loss 0.7363 (1.0740) acc 71.8750 (72.9552) lr 5.1825e-04 eta 6:46:52 +epoch [35/50] batch [410/1000] time 1.554 (1.565) data 0.000 (0.003) loss 1.2900 (1.0756) acc 56.2500 (72.9345) lr 5.1825e-04 eta 6:46:42 +epoch [35/50] batch [415/1000] time 1.568 (1.565) data 0.000 (0.003) loss 1.7168 (1.0753) acc 62.5000 (72.9292) lr 5.1825e-04 eta 6:46:32 +epoch [35/50] batch [420/1000] time 1.550 (1.565) data 0.000 (0.003) loss 0.8076 (1.0755) acc 75.0000 (72.9315) lr 5.1825e-04 eta 6:46:23 +epoch [35/50] batch [425/1000] time 1.548 (1.565) data 0.000 (0.003) loss 1.0088 (1.0769) acc 71.8750 (72.8676) lr 5.1825e-04 eta 6:46:12 +epoch [35/50] batch [430/1000] time 1.552 (1.565) data 0.000 (0.003) loss 1.2090 (1.0754) acc 71.8750 (72.8924) lr 5.1825e-04 eta 6:46:03 +epoch [35/50] batch [435/1000] time 1.555 (1.565) data 0.001 (0.003) loss 0.8389 (1.0754) acc 81.2500 (72.8807) lr 5.1825e-04 eta 6:45:53 +epoch [35/50] batch [440/1000] time 1.559 (1.564) data 0.000 (0.003) loss 0.9468 (1.0753) acc 71.8750 (72.8480) lr 5.1825e-04 eta 6:45:43 +epoch [35/50] batch [445/1000] time 1.557 (1.564) data 0.000 (0.003) loss 0.9785 (1.0752) acc 68.7500 (72.8301) lr 5.1825e-04 eta 6:45:33 +epoch [35/50] batch [450/1000] time 1.541 (1.565) data 0.001 (0.003) loss 0.9126 (1.0766) acc 68.7500 (72.8264) lr 5.1825e-04 eta 6:45:28 +epoch [35/50] batch [455/1000] time 1.576 (1.564) data 0.001 (0.003) loss 1.4854 (1.0784) acc 68.7500 (72.8297) lr 5.1825e-04 eta 6:45:19 +epoch [35/50] batch [460/1000] time 1.562 (1.564) data 0.000 (0.003) loss 1.1084 (1.0776) acc 68.7500 (72.8601) lr 5.1825e-04 eta 6:45:09 +epoch [35/50] batch [465/1000] time 1.560 (1.564) data 0.001 (0.002) loss 1.2627 (1.0804) acc 75.0000 (72.8427) lr 5.1825e-04 eta 6:45:02 +epoch [35/50] batch [470/1000] time 1.559 (1.564) data 0.001 (0.002) loss 0.8052 (1.0794) acc 75.0000 (72.8723) lr 5.1825e-04 eta 6:44:52 +epoch [35/50] batch [475/1000] time 1.566 (1.564) data 0.001 (0.002) loss 0.9102 (1.0799) acc 81.2500 (72.8618) lr 5.1825e-04 eta 6:44:44 +epoch [35/50] batch [480/1000] time 1.559 (1.564) data 0.001 (0.002) loss 0.9873 (1.0798) acc 75.0000 (72.8711) lr 5.1825e-04 eta 6:44:34 +epoch [35/50] batch [485/1000] time 1.555 (1.564) data 0.000 (0.002) loss 1.4541 (1.0831) acc 71.8750 (72.7899) lr 5.1825e-04 eta 6:44:26 +epoch [35/50] batch [490/1000] time 1.579 (1.565) data 0.000 (0.002) loss 1.3428 (1.0822) acc 62.5000 (72.8316) lr 5.1825e-04 eta 6:44:26 +epoch [35/50] batch [495/1000] time 1.545 (1.564) data 0.000 (0.002) loss 1.0000 (1.0821) acc 71.8750 (72.8409) lr 5.1825e-04 eta 6:44:17 +epoch [35/50] batch [500/1000] time 1.548 (1.564) data 0.001 (0.002) loss 0.6147 (1.0808) acc 81.2500 (72.8625) lr 5.1825e-04 eta 6:44:09 +epoch [35/50] batch [505/1000] time 1.565 (1.564) data 0.001 (0.002) loss 0.8086 (1.0787) acc 81.2500 (72.9332) lr 5.1825e-04 eta 6:44:01 +epoch [35/50] batch [510/1000] time 1.566 (1.565) data 0.001 (0.002) loss 1.6416 (1.0787) acc 65.6250 (72.9289) lr 5.1825e-04 eta 6:43:54 +epoch [35/50] batch [515/1000] time 1.570 (1.565) data 0.000 (0.002) loss 1.0146 (1.0798) acc 78.1250 (72.8944) lr 5.1825e-04 eta 6:43:47 +epoch [35/50] batch [520/1000] time 1.549 (1.565) data 0.000 (0.002) loss 1.6465 (1.0804) acc 53.1250 (72.8486) lr 5.1825e-04 eta 6:43:39 +epoch [35/50] batch [525/1000] time 1.586 (1.565) data 0.001 (0.002) loss 1.2246 (1.0820) acc 75.0000 (72.8214) lr 5.1825e-04 eta 6:43:33 +epoch [35/50] batch [530/1000] time 1.556 (1.565) data 0.001 (0.002) loss 1.5273 (1.0818) acc 59.3750 (72.8066) lr 5.1825e-04 eta 6:43:26 +epoch [35/50] batch [535/1000] time 1.551 (1.565) data 0.000 (0.002) loss 1.2852 (1.0825) acc 65.6250 (72.8329) lr 5.1825e-04 eta 6:43:18 +epoch [35/50] batch [540/1000] time 1.538 (1.565) data 0.000 (0.002) loss 1.5625 (1.0854) acc 68.7500 (72.7894) lr 5.1825e-04 eta 6:43:10 +epoch [35/50] batch [545/1000] time 1.556 (1.565) data 0.001 (0.002) loss 1.2529 (1.0845) acc 71.8750 (72.7982) lr 5.1825e-04 eta 6:43:01 +epoch [35/50] batch [550/1000] time 1.565 (1.565) data 0.000 (0.002) loss 1.1562 (1.0868) acc 71.8750 (72.7670) lr 5.1825e-04 eta 6:42:54 +epoch [35/50] batch [555/1000] time 1.577 (1.565) data 0.000 (0.002) loss 0.7329 (1.0857) acc 78.1250 (72.7928) lr 5.1825e-04 eta 6:42:52 +epoch [35/50] batch [560/1000] time 1.559 (1.565) data 0.001 (0.002) loss 0.9688 (1.0846) acc 78.1250 (72.8013) lr 5.1825e-04 eta 6:42:45 +epoch [35/50] batch [565/1000] time 1.529 (1.565) data 0.000 (0.002) loss 0.7153 (1.0834) acc 68.7500 (72.8042) lr 5.1825e-04 eta 6:42:35 +epoch [35/50] batch [570/1000] time 1.576 (1.565) data 0.001 (0.002) loss 1.2168 (1.0827) acc 68.7500 (72.8235) lr 5.1825e-04 eta 6:42:27 +epoch [35/50] batch [575/1000] time 1.566 (1.565) data 0.000 (0.002) loss 1.0264 (1.0824) acc 68.7500 (72.8424) lr 5.1825e-04 eta 6:42:20 +epoch [35/50] batch [580/1000] time 1.545 (1.565) data 0.001 (0.002) loss 0.5825 (1.0819) acc 84.3750 (72.8394) lr 5.1825e-04 eta 6:42:11 +epoch [35/50] batch [585/1000] time 1.557 (1.565) data 0.001 (0.002) loss 1.1445 (1.0811) acc 68.7500 (72.8419) lr 5.1825e-04 eta 6:42:02 +epoch [35/50] batch [590/1000] time 1.551 (1.565) data 0.001 (0.002) loss 0.6523 (1.0787) acc 87.5000 (72.9184) lr 5.1825e-04 eta 6:41:53 +epoch [35/50] batch [595/1000] time 1.545 (1.565) data 0.000 (0.002) loss 1.1299 (1.0813) acc 68.7500 (72.8676) lr 5.1825e-04 eta 6:41:43 +epoch [35/50] batch [600/1000] time 1.554 (1.565) data 0.000 (0.002) loss 1.3086 (1.0831) acc 62.5000 (72.7969) lr 5.1825e-04 eta 6:41:38 +epoch [35/50] batch [605/1000] time 1.569 (1.565) data 0.000 (0.002) loss 1.1318 (1.0841) acc 75.0000 (72.7841) lr 5.1825e-04 eta 6:41:31 +epoch [35/50] batch [610/1000] time 1.579 (1.565) data 0.000 (0.002) loss 0.6699 (1.0840) acc 84.3750 (72.8023) lr 5.1825e-04 eta 6:41:24 +epoch [35/50] batch [615/1000] time 1.568 (1.565) data 0.000 (0.002) loss 1.6943 (1.0847) acc 65.6250 (72.7591) lr 5.1825e-04 eta 6:41:16 +epoch [35/50] batch [620/1000] time 1.552 (1.565) data 0.001 (0.002) loss 1.2900 (1.0866) acc 78.1250 (72.7470) lr 5.1825e-04 eta 6:41:07 +epoch [35/50] batch [625/1000] time 1.558 (1.565) data 0.000 (0.002) loss 0.7896 (1.0847) acc 78.1250 (72.7550) lr 5.1825e-04 eta 6:40:59 +epoch [35/50] batch [630/1000] time 1.564 (1.565) data 0.001 (0.002) loss 0.8438 (1.0848) acc 81.2500 (72.7183) lr 5.1825e-04 eta 6:40:51 +epoch [35/50] batch [635/1000] time 1.533 (1.565) data 0.001 (0.002) loss 1.4375 (1.0841) acc 62.5000 (72.7018) lr 5.1825e-04 eta 6:40:42 +epoch [35/50] batch [640/1000] time 1.725 (1.565) data 0.000 (0.002) loss 1.5234 (1.0856) acc 59.3750 (72.6416) lr 5.1825e-04 eta 6:40:39 +epoch [35/50] batch [645/1000] time 1.610 (1.565) data 0.000 (0.002) loss 1.2783 (1.0857) acc 62.5000 (72.6453) lr 5.1825e-04 eta 6:40:34 +epoch [35/50] batch [650/1000] time 1.560 (1.565) data 0.001 (0.002) loss 1.5039 (1.0862) acc 68.7500 (72.6442) lr 5.1825e-04 eta 6:40:26 +epoch [35/50] batch [655/1000] time 1.547 (1.565) data 0.000 (0.002) loss 0.4663 (1.0837) acc 81.2500 (72.6765) lr 5.1825e-04 eta 6:40:17 +epoch [35/50] batch [660/1000] time 1.548 (1.565) data 0.000 (0.002) loss 0.9351 (1.0823) acc 81.2500 (72.7131) lr 5.1825e-04 eta 6:40:10 +epoch [35/50] batch [665/1000] time 1.552 (1.565) data 0.000 (0.002) loss 1.5078 (1.0828) acc 68.7500 (72.7162) lr 5.1825e-04 eta 6:40:01 +epoch [35/50] batch [670/1000] time 1.584 (1.565) data 0.001 (0.002) loss 1.1768 (1.0836) acc 81.2500 (72.7146) lr 5.1825e-04 eta 6:39:55 +epoch [35/50] batch [675/1000] time 1.563 (1.565) data 0.000 (0.002) loss 1.9873 (1.0844) acc 59.3750 (72.6898) lr 5.1825e-04 eta 6:39:47 +epoch [35/50] batch [680/1000] time 1.581 (1.565) data 0.001 (0.002) loss 1.3311 (1.0847) acc 71.8750 (72.6746) lr 5.1825e-04 eta 6:39:40 +epoch [35/50] batch [685/1000] time 1.551 (1.565) data 0.000 (0.002) loss 1.4668 (1.0850) acc 56.2500 (72.6505) lr 5.1825e-04 eta 6:39:31 +epoch [35/50] batch [690/1000] time 1.546 (1.565) data 0.000 (0.002) loss 1.2939 (1.0862) acc 68.7500 (72.6087) lr 5.1825e-04 eta 6:39:22 +epoch [35/50] batch [695/1000] time 1.550 (1.565) data 0.001 (0.002) loss 0.9658 (1.0871) acc 59.3750 (72.5405) lr 5.1825e-04 eta 6:39:13 +epoch [35/50] batch [700/1000] time 1.558 (1.565) data 0.001 (0.002) loss 0.8965 (1.0865) acc 75.0000 (72.5580) lr 5.1825e-04 eta 6:39:04 +epoch [35/50] batch [705/1000] time 1.566 (1.565) data 0.001 (0.002) loss 0.6348 (1.0849) acc 78.1250 (72.5754) lr 5.1825e-04 eta 6:39:00 +epoch [35/50] batch [710/1000] time 1.566 (1.565) data 0.001 (0.002) loss 1.6807 (1.0857) acc 62.5000 (72.5792) lr 5.1825e-04 eta 6:38:51 +epoch [35/50] batch [715/1000] time 1.588 (1.565) data 0.001 (0.002) loss 1.1260 (1.0856) acc 68.7500 (72.6049) lr 5.1825e-04 eta 6:38:44 +epoch [35/50] batch [720/1000] time 1.578 (1.565) data 0.000 (0.002) loss 0.7915 (1.0844) acc 87.5000 (72.6389) lr 5.1825e-04 eta 6:38:36 +epoch [35/50] batch [725/1000] time 1.567 (1.565) data 0.000 (0.002) loss 0.7905 (1.0834) acc 81.2500 (72.6595) lr 5.1825e-04 eta 6:38:28 +epoch [35/50] batch [730/1000] time 1.561 (1.565) data 0.001 (0.002) loss 1.3574 (1.0853) acc 75.0000 (72.6370) lr 5.1825e-04 eta 6:38:20 +epoch [35/50] batch [735/1000] time 1.546 (1.565) data 0.000 (0.002) loss 0.6880 (1.0850) acc 84.3750 (72.6446) lr 5.1825e-04 eta 6:38:11 +epoch [35/50] batch [740/1000] time 1.559 (1.565) data 0.000 (0.002) loss 1.5664 (1.0857) acc 56.2500 (72.6351) lr 5.1825e-04 eta 6:38:03 +epoch [35/50] batch [745/1000] time 1.560 (1.565) data 0.000 (0.002) loss 1.1973 (1.0870) acc 75.0000 (72.6174) lr 5.1825e-04 eta 6:37:56 +epoch [35/50] batch [750/1000] time 1.554 (1.565) data 0.000 (0.002) loss 1.4707 (1.0867) acc 71.8750 (72.6167) lr 5.1825e-04 eta 6:37:51 +epoch [35/50] batch [755/1000] time 1.594 (1.565) data 0.001 (0.002) loss 1.4365 (1.0855) acc 68.7500 (72.6325) lr 5.1825e-04 eta 6:37:44 +epoch [35/50] batch [760/1000] time 1.563 (1.565) data 0.001 (0.002) loss 0.6484 (1.0858) acc 81.2500 (72.6234) lr 5.1825e-04 eta 6:37:37 +epoch [35/50] batch [765/1000] time 1.558 (1.565) data 0.001 (0.002) loss 0.9219 (1.0868) acc 75.0000 (72.5817) lr 5.1825e-04 eta 6:37:28 +epoch [35/50] batch [770/1000] time 1.549 (1.565) data 0.001 (0.002) loss 0.6333 (1.0856) acc 84.3750 (72.6096) lr 5.1825e-04 eta 6:37:20 +epoch [35/50] batch [775/1000] time 1.554 (1.565) data 0.001 (0.002) loss 1.3594 (1.0873) acc 65.6250 (72.5685) lr 5.1825e-04 eta 6:37:11 +epoch [35/50] batch [780/1000] time 1.534 (1.565) data 0.000 (0.002) loss 0.9355 (1.0873) acc 84.3750 (72.5721) lr 5.1825e-04 eta 6:37:01 +epoch [35/50] batch [785/1000] time 1.576 (1.565) data 0.001 (0.002) loss 1.2344 (1.0879) acc 46.8750 (72.5159) lr 5.1825e-04 eta 6:36:54 +epoch [35/50] batch [790/1000] time 1.557 (1.565) data 0.001 (0.002) loss 0.8213 (1.0871) acc 78.1250 (72.5396) lr 5.1825e-04 eta 6:36:46 +epoch [35/50] batch [795/1000] time 1.571 (1.565) data 0.000 (0.002) loss 1.3564 (1.0878) acc 65.6250 (72.5079) lr 5.1825e-04 eta 6:36:42 +epoch [35/50] batch [800/1000] time 1.571 (1.565) data 0.000 (0.002) loss 1.3154 (1.0877) acc 65.6250 (72.5117) lr 5.1825e-04 eta 6:36:33 +epoch [35/50] batch [805/1000] time 1.579 (1.565) data 0.001 (0.002) loss 1.5156 (1.0871) acc 65.6250 (72.5388) lr 5.1825e-04 eta 6:36:25 +epoch [35/50] batch [810/1000] time 1.563 (1.565) data 0.001 (0.002) loss 1.4121 (1.0881) acc 62.5000 (72.5270) lr 5.1825e-04 eta 6:36:17 +epoch [35/50] batch [815/1000] time 1.573 (1.565) data 0.000 (0.002) loss 1.1924 (1.0875) acc 65.6250 (72.5575) lr 5.1825e-04 eta 6:36:10 +epoch [35/50] batch [820/1000] time 1.562 (1.565) data 0.000 (0.002) loss 0.5698 (1.0881) acc 81.2500 (72.5343) lr 5.1825e-04 eta 6:36:02 +epoch [35/50] batch [825/1000] time 1.569 (1.565) data 0.001 (0.002) loss 1.1230 (1.0881) acc 75.0000 (72.5303) lr 5.1825e-04 eta 6:35:55 +epoch [35/50] batch [830/1000] time 1.554 (1.565) data 0.000 (0.002) loss 0.9639 (1.0875) acc 75.0000 (72.5339) lr 5.1825e-04 eta 6:35:47 +epoch [35/50] batch [835/1000] time 1.561 (1.566) data 0.000 (0.002) loss 1.0273 (1.0877) acc 84.3750 (72.5449) lr 5.1825e-04 eta 6:35:41 +epoch [35/50] batch [840/1000] time 1.549 (1.565) data 0.000 (0.002) loss 1.6514 (1.0894) acc 59.3750 (72.4926) lr 5.1825e-04 eta 6:35:32 +epoch [35/50] batch [845/1000] time 1.543 (1.565) data 0.001 (0.002) loss 1.4336 (1.0904) acc 71.8750 (72.5148) lr 5.1825e-04 eta 6:35:23 +epoch [35/50] batch [850/1000] time 1.537 (1.565) data 0.000 (0.002) loss 1.3730 (1.0896) acc 65.6250 (72.5110) lr 5.1825e-04 eta 6:35:14 +epoch [35/50] batch [855/1000] time 1.579 (1.566) data 0.000 (0.002) loss 1.2539 (1.0900) acc 75.0000 (72.5000) lr 5.1825e-04 eta 6:35:09 +epoch [35/50] batch [860/1000] time 1.561 (1.565) data 0.000 (0.002) loss 1.0479 (1.0916) acc 71.8750 (72.4709) lr 5.1825e-04 eta 6:35:00 +epoch [35/50] batch [865/1000] time 1.574 (1.565) data 0.000 (0.002) loss 0.7808 (1.0906) acc 84.3750 (72.5036) lr 5.1825e-04 eta 6:34:52 +epoch [35/50] batch [870/1000] time 1.575 (1.565) data 0.001 (0.002) loss 1.4131 (1.0913) acc 78.1250 (72.4964) lr 5.1825e-04 eta 6:34:45 +epoch [35/50] batch [875/1000] time 1.571 (1.565) data 0.001 (0.002) loss 0.9956 (1.0905) acc 75.0000 (72.5143) lr 5.1825e-04 eta 6:34:37 +epoch [35/50] batch [880/1000] time 1.555 (1.565) data 0.000 (0.002) loss 1.3799 (1.0917) acc 62.5000 (72.4858) lr 5.1825e-04 eta 6:34:29 +epoch [35/50] batch [885/1000] time 1.565 (1.565) data 0.001 (0.002) loss 1.8213 (1.0915) acc 65.6250 (72.4894) lr 5.1825e-04 eta 6:34:21 +epoch [35/50] batch [890/1000] time 1.557 (1.565) data 0.001 (0.002) loss 1.2471 (1.0916) acc 65.6250 (72.4719) lr 5.1825e-04 eta 6:34:12 +epoch [35/50] batch [895/1000] time 1.558 (1.565) data 0.000 (0.002) loss 1.2822 (1.0910) acc 56.2500 (72.4511) lr 5.1825e-04 eta 6:34:03 +epoch [35/50] batch [900/1000] time 1.562 (1.565) data 0.000 (0.002) loss 0.5894 (1.0913) acc 78.1250 (72.4549) lr 5.1825e-04 eta 6:33:57 +epoch [35/50] batch [905/1000] time 1.559 (1.565) data 0.000 (0.002) loss 1.2275 (1.0904) acc 65.6250 (72.4793) lr 5.1825e-04 eta 6:33:49 +epoch [35/50] batch [910/1000] time 1.565 (1.565) data 0.000 (0.002) loss 0.8994 (1.0906) acc 81.2500 (72.4863) lr 5.1825e-04 eta 6:33:41 +epoch [35/50] batch [915/1000] time 1.569 (1.565) data 0.000 (0.002) loss 0.5884 (1.0897) acc 87.5000 (72.5171) lr 5.1825e-04 eta 6:33:33 +epoch [35/50] batch [920/1000] time 1.557 (1.565) data 0.001 (0.001) loss 1.1387 (1.0905) acc 78.1250 (72.5102) lr 5.1825e-04 eta 6:33:25 +epoch [35/50] batch [925/1000] time 1.557 (1.565) data 0.000 (0.001) loss 0.9819 (1.0900) acc 75.0000 (72.5304) lr 5.1825e-04 eta 6:33:18 +epoch [35/50] batch [930/1000] time 1.580 (1.565) data 0.001 (0.001) loss 1.0586 (1.0906) acc 65.6250 (72.4966) lr 5.1825e-04 eta 6:33:11 +epoch [35/50] batch [935/1000] time 1.551 (1.565) data 0.001 (0.001) loss 0.9038 (1.0902) acc 78.1250 (72.5000) lr 5.1825e-04 eta 6:33:03 +epoch [35/50] batch [940/1000] time 1.549 (1.565) data 0.000 (0.001) loss 1.0420 (1.0900) acc 65.6250 (72.4801) lr 5.1825e-04 eta 6:32:54 +epoch [35/50] batch [945/1000] time 1.568 (1.566) data 0.000 (0.001) loss 0.6348 (1.0898) acc 87.5000 (72.4868) lr 5.1825e-04 eta 6:32:49 +epoch [35/50] batch [950/1000] time 1.596 (1.566) data 0.000 (0.001) loss 0.5327 (1.0897) acc 84.3750 (72.4901) lr 5.1825e-04 eta 6:32:42 +epoch [35/50] batch [955/1000] time 1.556 (1.566) data 0.001 (0.001) loss 0.3501 (1.0889) acc 84.3750 (72.5131) lr 5.1825e-04 eta 6:32:33 +epoch [35/50] batch [960/1000] time 1.542 (1.566) data 0.001 (0.001) loss 0.8789 (1.0883) acc 81.2500 (72.5260) lr 5.1825e-04 eta 6:32:25 +epoch [35/50] batch [965/1000] time 1.543 (1.565) data 0.001 (0.001) loss 0.8701 (1.0887) acc 68.7500 (72.5227) lr 5.1825e-04 eta 6:32:16 +epoch [35/50] batch [970/1000] time 1.562 (1.566) data 0.000 (0.001) loss 1.1758 (1.0883) acc 65.6250 (72.5387) lr 5.1825e-04 eta 6:32:09 +epoch [35/50] batch [975/1000] time 1.548 (1.565) data 0.001 (0.001) loss 1.7402 (1.0885) acc 75.0000 (72.5673) lr 5.1825e-04 eta 6:32:01 +epoch [35/50] batch [980/1000] time 1.560 (1.565) data 0.000 (0.001) loss 1.0156 (1.0885) acc 68.7500 (72.5670) lr 5.1825e-04 eta 6:31:52 +epoch [35/50] batch [985/1000] time 1.558 (1.565) data 0.001 (0.001) loss 1.2832 (1.0888) acc 71.8750 (72.5698) lr 5.1825e-04 eta 6:31:43 +epoch [35/50] batch [990/1000] time 1.571 (1.565) data 0.000 (0.001) loss 1.3906 (1.0902) acc 71.8750 (72.5663) lr 5.1825e-04 eta 6:31:35 +epoch [35/50] batch [995/1000] time 1.564 (1.565) data 0.000 (0.001) loss 1.0645 (1.0912) acc 71.8750 (72.5408) lr 5.1825e-04 eta 6:31:27 +epoch [35/50] batch [1000/1000] time 1.567 (1.565) data 0.000 (0.001) loss 0.9634 (1.0904) acc 78.1250 (72.5469) lr 4.6417e-04 eta 6:31:19 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,306 +* accuracy: 78.6% +* error: 21.4% +* macro_f1: 78.1% +epoch [36/50] batch [5/1000] time 1.575 (1.709) data 0.001 (0.193) loss 0.8984 (1.0677) acc 71.8750 (70.6250) lr 4.6417e-04 eta 7:07:10 +epoch [36/50] batch [10/1000] time 1.571 (1.638) data 0.000 (0.097) loss 1.0605 (0.9944) acc 68.7500 (73.7500) lr 4.6417e-04 eta 6:49:20 +epoch [36/50] batch [15/1000] time 1.552 (1.612) data 0.000 (0.065) loss 1.3447 (1.0453) acc 65.6250 (74.3750) lr 4.6417e-04 eta 6:42:37 +epoch [36/50] batch [20/1000] time 1.847 (1.614) data 0.001 (0.049) loss 0.8823 (1.0066) acc 81.2500 (75.0000) lr 4.6417e-04 eta 6:42:52 +epoch [36/50] batch [25/1000] time 1.540 (1.603) data 0.001 (0.039) loss 1.0420 (0.9769) acc 71.8750 (74.8750) lr 4.6417e-04 eta 6:40:00 +epoch [36/50] batch [30/1000] time 1.558 (1.595) data 0.001 (0.033) loss 1.0615 (0.9996) acc 71.8750 (74.1667) lr 4.6417e-04 eta 6:38:00 +epoch [36/50] batch [35/1000] time 1.590 (1.593) data 0.001 (0.028) loss 0.9419 (1.0246) acc 78.1250 (74.1071) lr 4.6417e-04 eta 6:37:20 +epoch [36/50] batch [40/1000] time 1.548 (1.587) data 0.001 (0.025) loss 0.9243 (1.0332) acc 71.8750 (73.8281) lr 4.6417e-04 eta 6:35:47 +epoch [36/50] batch [45/1000] time 1.580 (1.585) data 0.000 (0.022) loss 1.0898 (1.0479) acc 81.2500 (73.7500) lr 4.6417e-04 eta 6:35:01 +epoch [36/50] batch [50/1000] time 1.531 (1.583) data 0.001 (0.020) loss 1.1914 (1.0648) acc 71.8750 (73.2500) lr 4.6417e-04 eta 6:34:24 +epoch [36/50] batch [55/1000] time 1.539 (1.580) data 0.001 (0.018) loss 0.8950 (1.0583) acc 75.0000 (73.4091) lr 4.6417e-04 eta 6:33:40 +epoch [36/50] batch [60/1000] time 1.565 (1.578) data 0.001 (0.017) loss 1.3662 (1.0540) acc 68.7500 (73.1771) lr 4.6417e-04 eta 6:32:57 +epoch [36/50] batch [65/1000] time 1.583 (1.577) data 0.001 (0.015) loss 0.6782 (1.0657) acc 68.7500 (72.4519) lr 4.6417e-04 eta 6:32:32 +epoch [36/50] batch [70/1000] time 1.537 (1.576) data 0.000 (0.014) loss 0.8740 (1.0615) acc 71.8750 (72.5000) lr 4.6417e-04 eta 6:32:13 +epoch [36/50] batch [75/1000] time 1.536 (1.574) data 0.001 (0.013) loss 1.0293 (1.0625) acc 78.1250 (72.2083) lr 4.6417e-04 eta 6:31:35 +epoch [36/50] batch [80/1000] time 1.565 (1.573) data 0.001 (0.013) loss 0.9326 (1.0508) acc 81.2500 (72.6172) lr 4.6417e-04 eta 6:31:13 +epoch [36/50] batch [85/1000] time 1.578 (1.573) data 0.000 (0.012) loss 0.8970 (1.0612) acc 68.7500 (72.1324) lr 4.6417e-04 eta 6:31:01 +epoch [36/50] batch [90/1000] time 1.576 (1.573) data 0.000 (0.011) loss 1.2676 (1.0676) acc 65.6250 (72.1181) lr 4.6417e-04 eta 6:30:50 +epoch [36/50] batch [95/1000] time 1.559 (1.572) data 0.001 (0.011) loss 0.6958 (1.0680) acc 75.0000 (72.0395) lr 4.6417e-04 eta 6:30:30 +epoch [36/50] batch [100/1000] time 1.543 (1.572) data 0.001 (0.010) loss 0.8062 (1.0669) acc 75.0000 (72.0000) lr 4.6417e-04 eta 6:30:18 +epoch [36/50] batch [105/1000] time 1.535 (1.570) data 0.001 (0.010) loss 1.2158 (1.0745) acc 62.5000 (71.9048) lr 4.6417e-04 eta 6:29:48 +epoch [36/50] batch [110/1000] time 1.556 (1.570) data 0.001 (0.009) loss 0.8433 (1.0646) acc 75.0000 (72.1591) lr 4.6417e-04 eta 6:29:34 +epoch [36/50] batch [115/1000] time 1.585 (1.570) data 0.001 (0.009) loss 0.7856 (1.0645) acc 78.1250 (72.1467) lr 4.6417e-04 eta 6:29:23 +epoch [36/50] batch [120/1000] time 1.548 (1.569) data 0.000 (0.009) loss 1.4580 (1.0733) acc 71.8750 (72.1354) lr 4.6417e-04 eta 6:29:06 +epoch [36/50] batch [125/1000] time 1.565 (1.570) data 0.000 (0.008) loss 1.1523 (1.0647) acc 78.1250 (72.3750) lr 4.6417e-04 eta 6:29:13 +epoch [36/50] batch [130/1000] time 1.545 (1.570) data 0.001 (0.008) loss 1.0293 (1.0676) acc 78.1250 (72.4279) lr 4.6417e-04 eta 6:29:02 +epoch [36/50] batch [135/1000] time 1.583 (1.570) data 0.000 (0.008) loss 1.6006 (1.0739) acc 65.6250 (72.2685) lr 4.6417e-04 eta 6:28:52 +epoch [36/50] batch [140/1000] time 1.576 (1.570) data 0.000 (0.007) loss 0.9360 (1.0722) acc 71.8750 (72.2545) lr 4.6417e-04 eta 6:28:46 +epoch [36/50] batch [145/1000] time 1.574 (1.569) data 0.000 (0.007) loss 1.2031 (1.0769) acc 65.6250 (72.1552) lr 4.6417e-04 eta 6:28:34 +epoch [36/50] batch [150/1000] time 1.559 (1.569) data 0.001 (0.007) loss 0.8960 (1.0755) acc 71.8750 (72.2292) lr 4.6417e-04 eta 6:28:19 +epoch [36/50] batch [155/1000] time 1.560 (1.568) data 0.000 (0.007) loss 1.3184 (1.0804) acc 71.8750 (72.1169) lr 4.6417e-04 eta 6:28:02 +epoch [36/50] batch [160/1000] time 1.562 (1.568) data 0.001 (0.007) loss 0.9741 (1.0799) acc 68.7500 (72.1289) lr 4.6417e-04 eta 6:27:46 +epoch [36/50] batch [165/1000] time 1.542 (1.567) data 0.000 (0.006) loss 1.1484 (1.0817) acc 65.6250 (72.1023) lr 4.6417e-04 eta 6:27:33 +epoch [36/50] batch [170/1000] time 1.567 (1.568) data 0.001 (0.006) loss 1.2334 (1.0817) acc 59.3750 (72.0037) lr 4.6417e-04 eta 6:27:37 +epoch [36/50] batch [175/1000] time 1.545 (1.568) data 0.000 (0.006) loss 1.4658 (1.0787) acc 71.8750 (72.1250) lr 4.6417e-04 eta 6:27:24 +epoch [36/50] batch [180/1000] time 1.590 (1.568) data 0.000 (0.006) loss 0.7090 (1.0757) acc 75.0000 (72.1875) lr 4.6417e-04 eta 6:27:17 +epoch [36/50] batch [185/1000] time 1.554 (1.568) data 0.001 (0.006) loss 0.6704 (1.0731) acc 84.3750 (72.3649) lr 4.6417e-04 eta 6:27:04 +epoch [36/50] batch [190/1000] time 1.561 (1.567) data 0.000 (0.006) loss 0.8516 (1.0711) acc 68.7500 (72.4013) lr 4.6417e-04 eta 6:26:51 +epoch [36/50] batch [195/1000] time 1.556 (1.567) data 0.001 (0.005) loss 0.8950 (1.0732) acc 75.0000 (72.4199) lr 4.6417e-04 eta 6:26:37 +epoch [36/50] batch [200/1000] time 1.581 (1.567) data 0.001 (0.005) loss 0.8701 (1.0730) acc 71.8750 (72.4375) lr 4.6417e-04 eta 6:26:27 +epoch [36/50] batch [205/1000] time 1.565 (1.567) data 0.000 (0.005) loss 1.2500 (1.0754) acc 59.3750 (72.3780) lr 4.6417e-04 eta 6:26:17 +epoch [36/50] batch [210/1000] time 1.562 (1.566) data 0.000 (0.005) loss 1.1494 (1.0730) acc 68.7500 (72.4256) lr 4.6417e-04 eta 6:26:06 +epoch [36/50] batch [215/1000] time 1.558 (1.567) data 0.000 (0.005) loss 0.7378 (1.0720) acc 81.2500 (72.5291) lr 4.6417e-04 eta 6:26:12 +epoch [36/50] batch [220/1000] time 1.541 (1.567) data 0.001 (0.005) loss 0.7490 (1.0690) acc 81.2500 (72.5426) lr 4.6417e-04 eta 6:26:01 +epoch [36/50] batch [225/1000] time 1.554 (1.567) data 0.001 (0.005) loss 0.8228 (1.0707) acc 75.0000 (72.4444) lr 4.6417e-04 eta 6:25:50 +epoch [36/50] batch [230/1000] time 1.581 (1.567) data 0.000 (0.005) loss 0.8154 (1.0736) acc 75.0000 (72.4185) lr 4.6417e-04 eta 6:25:40 +epoch [36/50] batch [235/1000] time 1.559 (1.567) data 0.001 (0.005) loss 0.5942 (1.0751) acc 87.5000 (72.4335) lr 4.6417e-04 eta 6:25:31 +epoch [36/50] batch [240/1000] time 1.561 (1.566) data 0.000 (0.005) loss 0.9888 (1.0737) acc 71.8750 (72.4089) lr 4.6417e-04 eta 6:25:18 +epoch [36/50] batch [245/1000] time 1.571 (1.566) data 0.001 (0.004) loss 1.2871 (1.0754) acc 65.6250 (72.4362) lr 4.6417e-04 eta 6:25:11 +epoch [36/50] batch [250/1000] time 1.569 (1.566) data 0.000 (0.004) loss 1.0596 (1.0763) acc 71.8750 (72.4500) lr 4.6417e-04 eta 6:25:02 +epoch [36/50] batch [255/1000] time 1.590 (1.566) data 0.001 (0.004) loss 1.4893 (1.0784) acc 68.7500 (72.4265) lr 4.6417e-04 eta 6:24:55 +epoch [36/50] batch [260/1000] time 1.596 (1.567) data 0.000 (0.004) loss 0.7734 (1.0824) acc 78.1250 (72.3317) lr 4.6417e-04 eta 6:24:50 +epoch [36/50] batch [265/1000] time 1.537 (1.566) data 0.001 (0.004) loss 1.1973 (1.0827) acc 75.0000 (72.3585) lr 4.6417e-04 eta 6:24:40 +epoch [36/50] batch [270/1000] time 1.546 (1.566) data 0.000 (0.004) loss 1.0068 (1.0785) acc 81.2500 (72.5000) lr 4.6417e-04 eta 6:24:28 +epoch [36/50] batch [275/1000] time 1.575 (1.567) data 0.001 (0.004) loss 1.3486 (1.0786) acc 65.6250 (72.4886) lr 4.6417e-04 eta 6:24:29 +epoch [36/50] batch [280/1000] time 1.588 (1.567) data 0.000 (0.004) loss 0.9658 (1.0815) acc 78.1250 (72.4665) lr 4.6417e-04 eta 6:24:24 +epoch [36/50] batch [285/1000] time 1.543 (1.567) data 0.000 (0.004) loss 1.3721 (1.0807) acc 65.6250 (72.5110) lr 4.6417e-04 eta 6:24:14 +epoch [36/50] batch [290/1000] time 1.561 (1.567) data 0.001 (0.004) loss 1.6816 (1.0832) acc 65.6250 (72.5000) lr 4.6417e-04 eta 6:24:05 +epoch [36/50] batch [295/1000] time 1.591 (1.567) data 0.000 (0.004) loss 1.3613 (1.0861) acc 71.8750 (72.4682) lr 4.6417e-04 eta 6:23:55 +epoch [36/50] batch [300/1000] time 1.557 (1.566) data 0.001 (0.004) loss 1.4980 (1.0867) acc 65.6250 (72.4479) lr 4.6417e-04 eta 6:23:47 +epoch [36/50] batch [305/1000] time 1.556 (1.566) data 0.001 (0.004) loss 0.7939 (1.0868) acc 81.2500 (72.4180) lr 4.6417e-04 eta 6:23:37 +epoch [36/50] batch [310/1000] time 1.545 (1.566) data 0.000 (0.004) loss 1.2432 (1.0849) acc 68.7500 (72.4093) lr 4.6417e-04 eta 6:23:27 +epoch [36/50] batch [315/1000] time 1.557 (1.566) data 0.000 (0.004) loss 1.4268 (1.0859) acc 62.5000 (72.3909) lr 4.6417e-04 eta 6:23:19 +epoch [36/50] batch [320/1000] time 1.540 (1.566) data 0.000 (0.004) loss 1.3926 (1.0874) acc 62.5000 (72.4219) lr 4.6417e-04 eta 6:23:15 +epoch [36/50] batch [325/1000] time 1.569 (1.566) data 0.000 (0.003) loss 0.7725 (1.0867) acc 71.8750 (72.4231) lr 4.6417e-04 eta 6:23:07 +epoch [36/50] batch [330/1000] time 1.548 (1.566) data 0.001 (0.003) loss 0.8418 (1.0839) acc 71.8750 (72.4527) lr 4.6417e-04 eta 6:22:58 +epoch [36/50] batch [335/1000] time 1.566 (1.566) data 0.000 (0.003) loss 1.1211 (1.0894) acc 65.6250 (72.3321) lr 4.6417e-04 eta 6:22:49 +epoch [36/50] batch [340/1000] time 1.568 (1.566) data 0.000 (0.003) loss 1.0381 (1.0895) acc 75.0000 (72.3438) lr 4.6417e-04 eta 6:22:39 +epoch [36/50] batch [345/1000] time 1.555 (1.566) data 0.000 (0.003) loss 0.5527 (1.0871) acc 87.5000 (72.4185) lr 4.6417e-04 eta 6:22:32 +epoch [36/50] batch [350/1000] time 1.571 (1.566) data 0.000 (0.003) loss 0.9331 (1.0868) acc 71.8750 (72.4196) lr 4.6417e-04 eta 6:22:25 +epoch [36/50] batch [355/1000] time 1.560 (1.566) data 0.001 (0.003) loss 1.0518 (1.0848) acc 62.5000 (72.4120) lr 4.6417e-04 eta 6:22:17 +epoch [36/50] batch [360/1000] time 1.560 (1.566) data 0.000 (0.003) loss 1.3428 (1.0833) acc 71.8750 (72.4132) lr 4.6417e-04 eta 6:22:07 +epoch [36/50] batch [365/1000] time 1.566 (1.566) data 0.001 (0.003) loss 1.2402 (1.0810) acc 71.8750 (72.4572) lr 4.6417e-04 eta 6:22:04 +epoch [36/50] batch [370/1000] time 1.577 (1.566) data 0.001 (0.003) loss 1.7158 (1.0830) acc 62.5000 (72.3902) lr 4.6417e-04 eta 6:21:54 +epoch [36/50] batch [375/1000] time 1.555 (1.566) data 0.000 (0.003) loss 1.2256 (1.0823) acc 75.0000 (72.4333) lr 4.6417e-04 eta 6:21:44 +epoch [36/50] batch [380/1000] time 1.563 (1.566) data 0.001 (0.003) loss 0.4104 (1.0825) acc 87.5000 (72.4095) lr 4.6417e-04 eta 6:21:38 +epoch [36/50] batch [385/1000] time 1.571 (1.566) data 0.001 (0.003) loss 1.9238 (1.0831) acc 56.2500 (72.3945) lr 4.6417e-04 eta 6:21:32 +epoch [36/50] batch [390/1000] time 1.558 (1.566) data 0.001 (0.003) loss 1.7461 (1.0846) acc 68.7500 (72.3878) lr 4.6417e-04 eta 6:21:22 +epoch [36/50] batch [395/1000] time 1.570 (1.566) data 0.001 (0.003) loss 1.4473 (1.0841) acc 71.8750 (72.4525) lr 4.6417e-04 eta 6:21:12 +epoch [36/50] batch [400/1000] time 1.541 (1.566) data 0.000 (0.003) loss 1.5762 (1.0869) acc 65.6250 (72.4062) lr 4.6417e-04 eta 6:21:02 +epoch [36/50] batch [405/1000] time 1.548 (1.566) data 0.000 (0.003) loss 1.2275 (1.0865) acc 78.1250 (72.3997) lr 4.6417e-04 eta 6:20:54 +epoch [36/50] batch [410/1000] time 1.554 (1.566) data 0.000 (0.003) loss 0.9385 (1.0856) acc 84.3750 (72.4466) lr 4.6417e-04 eta 6:20:47 +epoch [36/50] batch [415/1000] time 1.561 (1.566) data 0.000 (0.003) loss 1.4375 (1.0839) acc 75.0000 (72.5151) lr 4.6417e-04 eta 6:20:39 +epoch [36/50] batch [420/1000] time 1.542 (1.566) data 0.000 (0.003) loss 1.1396 (1.0857) acc 68.7500 (72.4256) lr 4.6417e-04 eta 6:20:30 +epoch [36/50] batch [425/1000] time 1.723 (1.566) data 0.001 (0.003) loss 0.9429 (1.0854) acc 75.0000 (72.4191) lr 4.6417e-04 eta 6:20:27 +epoch [36/50] batch [430/1000] time 1.559 (1.566) data 0.001 (0.003) loss 1.6943 (1.0860) acc 68.7500 (72.3910) lr 4.6417e-04 eta 6:20:21 +epoch [36/50] batch [435/1000] time 1.577 (1.566) data 0.000 (0.003) loss 1.1631 (1.0881) acc 65.6250 (72.3491) lr 4.6417e-04 eta 6:20:14 +epoch [36/50] batch [440/1000] time 1.555 (1.567) data 0.001 (0.003) loss 1.4688 (1.0890) acc 62.5000 (72.3295) lr 4.6417e-04 eta 6:20:08 +epoch [36/50] batch [445/1000] time 1.576 (1.567) data 0.000 (0.003) loss 1.2539 (1.0896) acc 68.7500 (72.2683) lr 4.6417e-04 eta 6:20:01 +epoch [36/50] batch [450/1000] time 1.563 (1.566) data 0.000 (0.003) loss 1.6865 (1.0904) acc 59.3750 (72.2639) lr 4.6417e-04 eta 6:19:50 +epoch [36/50] batch [455/1000] time 1.571 (1.566) data 0.001 (0.003) loss 1.2715 (1.0917) acc 68.7500 (72.2390) lr 4.6417e-04 eta 6:19:42 +epoch [36/50] batch [460/1000] time 1.579 (1.566) data 0.000 (0.003) loss 1.3174 (1.0920) acc 65.6250 (72.2351) lr 4.6417e-04 eta 6:19:34 +epoch [36/50] batch [465/1000] time 1.555 (1.566) data 0.001 (0.003) loss 1.0576 (1.0912) acc 75.0000 (72.2513) lr 4.6417e-04 eta 6:19:24 +epoch [36/50] batch [470/1000] time 1.773 (1.567) data 0.001 (0.003) loss 1.4180 (1.0908) acc 71.8750 (72.2739) lr 4.6417e-04 eta 6:19:22 +epoch [36/50] batch [475/1000] time 1.564 (1.566) data 0.001 (0.003) loss 1.0508 (1.0897) acc 65.6250 (72.2368) lr 4.6417e-04 eta 6:19:13 +epoch [36/50] batch [480/1000] time 1.561 (1.566) data 0.001 (0.003) loss 1.0107 (1.0902) acc 75.0000 (72.2266) lr 4.6417e-04 eta 6:19:03 +epoch [36/50] batch [485/1000] time 1.558 (1.566) data 0.001 (0.002) loss 1.4014 (1.0896) acc 62.5000 (72.2358) lr 4.6417e-04 eta 6:18:55 +epoch [36/50] batch [490/1000] time 1.560 (1.566) data 0.001 (0.002) loss 0.9985 (1.0873) acc 75.0000 (72.2768) lr 4.6417e-04 eta 6:18:45 +epoch [36/50] batch [495/1000] time 1.538 (1.566) data 0.001 (0.002) loss 0.9976 (1.0860) acc 71.8750 (72.2727) lr 4.6417e-04 eta 6:18:36 +epoch [36/50] batch [500/1000] time 1.560 (1.566) data 0.001 (0.002) loss 1.4023 (1.0853) acc 62.5000 (72.2500) lr 4.6417e-04 eta 6:18:27 +epoch [36/50] batch [505/1000] time 1.551 (1.566) data 0.001 (0.002) loss 1.3145 (1.0843) acc 71.8750 (72.2772) lr 4.6417e-04 eta 6:18:19 +epoch [36/50] batch [510/1000] time 1.562 (1.566) data 0.000 (0.002) loss 2.2129 (1.0884) acc 53.1250 (72.2059) lr 4.6417e-04 eta 6:18:11 +epoch [36/50] batch [515/1000] time 1.565 (1.566) data 0.000 (0.002) loss 1.9062 (1.0884) acc 56.2500 (72.2148) lr 4.6417e-04 eta 6:18:09 +epoch [36/50] batch [520/1000] time 1.568 (1.566) data 0.000 (0.002) loss 1.1670 (1.0892) acc 65.6250 (72.1995) lr 4.6417e-04 eta 6:18:02 +epoch [36/50] batch [525/1000] time 1.560 (1.567) data 0.000 (0.002) loss 0.8750 (1.0903) acc 75.0000 (72.1607) lr 4.6417e-04 eta 6:17:55 +epoch [36/50] batch [530/1000] time 1.556 (1.566) data 0.000 (0.002) loss 1.5811 (1.0929) acc 71.8750 (72.1403) lr 4.6417e-04 eta 6:17:46 +epoch [36/50] batch [535/1000] time 1.555 (1.566) data 0.001 (0.002) loss 1.3281 (1.0922) acc 68.7500 (72.1612) lr 4.6417e-04 eta 6:17:37 +epoch [36/50] batch [540/1000] time 1.564 (1.566) data 0.001 (0.002) loss 0.8745 (1.0919) acc 75.0000 (72.1412) lr 4.6417e-04 eta 6:17:28 +epoch [36/50] batch [545/1000] time 1.553 (1.566) data 0.001 (0.002) loss 1.1152 (1.0927) acc 71.8750 (72.1560) lr 4.6417e-04 eta 6:17:19 +epoch [36/50] batch [550/1000] time 1.552 (1.566) data 0.001 (0.002) loss 0.8242 (1.0924) acc 81.2500 (72.1875) lr 4.6417e-04 eta 6:17:10 +epoch [36/50] batch [555/1000] time 1.538 (1.566) data 0.001 (0.002) loss 0.6963 (1.0913) acc 84.3750 (72.2185) lr 4.6417e-04 eta 6:17:01 +epoch [36/50] batch [560/1000] time 1.566 (1.566) data 0.001 (0.002) loss 1.2373 (1.0915) acc 65.6250 (72.2433) lr 4.6417e-04 eta 6:16:52 +epoch [36/50] batch [565/1000] time 1.541 (1.566) data 0.001 (0.002) loss 1.3418 (1.0923) acc 59.3750 (72.2124) lr 4.6417e-04 eta 6:16:43 +epoch [36/50] batch [570/1000] time 1.564 (1.566) data 0.000 (0.002) loss 0.9395 (1.0930) acc 78.1250 (72.1985) lr 4.6417e-04 eta 6:16:33 +epoch [36/50] batch [575/1000] time 1.573 (1.566) data 0.000 (0.002) loss 1.2812 (1.0932) acc 68.7500 (72.2120) lr 4.6417e-04 eta 6:16:25 +epoch [36/50] batch [580/1000] time 1.574 (1.566) data 0.000 (0.002) loss 1.1855 (1.0919) acc 75.0000 (72.2144) lr 4.6417e-04 eta 6:16:20 +epoch [36/50] batch [585/1000] time 1.533 (1.566) data 0.001 (0.002) loss 1.4912 (1.0926) acc 53.1250 (72.1955) lr 4.6417e-04 eta 6:16:12 +epoch [36/50] batch [590/1000] time 1.546 (1.566) data 0.001 (0.002) loss 1.1943 (1.0927) acc 68.7500 (72.1875) lr 4.6417e-04 eta 6:16:01 +epoch [36/50] batch [595/1000] time 1.565 (1.566) data 0.001 (0.002) loss 0.7227 (1.0919) acc 78.1250 (72.1796) lr 4.6417e-04 eta 6:15:54 +epoch [36/50] batch [600/1000] time 1.551 (1.566) data 0.000 (0.002) loss 0.8076 (1.0939) acc 81.2500 (72.1615) lr 4.6417e-04 eta 6:15:45 +epoch [36/50] batch [605/1000] time 1.555 (1.566) data 0.000 (0.002) loss 0.9248 (1.0916) acc 78.1250 (72.1952) lr 4.6417e-04 eta 6:15:36 +epoch [36/50] batch [610/1000] time 1.564 (1.566) data 0.001 (0.002) loss 2.4258 (1.0952) acc 59.3750 (72.1516) lr 4.6417e-04 eta 6:15:29 +epoch [36/50] batch [615/1000] time 1.562 (1.566) data 0.000 (0.002) loss 0.3936 (1.0952) acc 87.5000 (72.1646) lr 4.6417e-04 eta 6:15:20 +epoch [36/50] batch [620/1000] time 1.595 (1.566) data 0.000 (0.002) loss 0.6748 (1.0930) acc 81.2500 (72.1976) lr 4.6417e-04 eta 6:15:14 +epoch [36/50] batch [625/1000] time 1.558 (1.566) data 0.000 (0.002) loss 1.3896 (1.0936) acc 65.6250 (72.1900) lr 4.6417e-04 eta 6:15:09 +epoch [36/50] batch [630/1000] time 1.538 (1.566) data 0.001 (0.002) loss 1.1924 (1.0934) acc 78.1250 (72.2123) lr 4.6417e-04 eta 6:15:00 +epoch [36/50] batch [635/1000] time 1.516 (1.566) data 0.000 (0.002) loss 1.0166 (1.0920) acc 68.7500 (72.2293) lr 4.6417e-04 eta 6:14:49 +epoch [36/50] batch [640/1000] time 1.561 (1.566) data 0.001 (0.002) loss 1.7783 (1.0939) acc 59.3750 (72.2021) lr 4.6417e-04 eta 6:14:41 +epoch [36/50] batch [645/1000] time 1.588 (1.566) data 0.000 (0.002) loss 0.9185 (1.0936) acc 78.1250 (72.2141) lr 4.6417e-04 eta 6:14:34 +epoch [36/50] batch [650/1000] time 1.585 (1.566) data 0.001 (0.002) loss 1.1924 (1.0941) acc 71.8750 (72.2067) lr 4.6417e-04 eta 6:14:26 +epoch [36/50] batch [655/1000] time 1.553 (1.566) data 0.000 (0.002) loss 1.0342 (1.0944) acc 75.0000 (72.1899) lr 4.6417e-04 eta 6:14:17 +epoch [36/50] batch [660/1000] time 1.530 (1.565) data 0.000 (0.002) loss 1.2988 (1.0945) acc 62.5000 (72.1828) lr 4.6417e-04 eta 6:14:08 +epoch [36/50] batch [665/1000] time 1.573 (1.566) data 0.001 (0.002) loss 1.0098 (1.0946) acc 75.0000 (72.1992) lr 4.6417e-04 eta 6:14:04 +epoch [36/50] batch [670/1000] time 1.558 (1.566) data 0.001 (0.002) loss 0.9448 (1.0934) acc 71.8750 (72.2295) lr 4.6417e-04 eta 6:13:55 +epoch [36/50] batch [675/1000] time 1.562 (1.566) data 0.001 (0.002) loss 1.1377 (1.0937) acc 68.7500 (72.2361) lr 4.6417e-04 eta 6:13:46 +epoch [36/50] batch [680/1000] time 1.569 (1.566) data 0.000 (0.002) loss 1.6221 (1.0950) acc 62.5000 (72.2105) lr 4.6417e-04 eta 6:13:38 +epoch [36/50] batch [685/1000] time 1.555 (1.566) data 0.000 (0.002) loss 1.2100 (1.0963) acc 71.8750 (72.1624) lr 4.6417e-04 eta 6:13:31 +epoch [36/50] batch [690/1000] time 1.551 (1.566) data 0.001 (0.002) loss 0.6270 (1.0935) acc 78.1250 (72.2147) lr 4.6417e-04 eta 6:13:23 +epoch [36/50] batch [695/1000] time 1.579 (1.566) data 0.000 (0.002) loss 1.0791 (1.0940) acc 78.1250 (72.2212) lr 4.6417e-04 eta 6:13:15 +epoch [36/50] batch [700/1000] time 1.567 (1.566) data 0.001 (0.002) loss 0.5278 (1.0922) acc 87.5000 (72.2768) lr 4.6417e-04 eta 6:13:07 +epoch [36/50] batch [705/1000] time 1.549 (1.566) data 0.000 (0.002) loss 0.7891 (1.0928) acc 75.0000 (72.2518) lr 4.6417e-04 eta 6:12:59 +epoch [36/50] batch [710/1000] time 1.559 (1.566) data 0.001 (0.002) loss 1.1895 (1.0931) acc 65.6250 (72.2403) lr 4.6417e-04 eta 6:12:52 +epoch [36/50] batch [715/1000] time 1.572 (1.566) data 0.000 (0.002) loss 1.0928 (1.0933) acc 68.7500 (72.2509) lr 4.6417e-04 eta 6:12:44 +epoch [36/50] batch [720/1000] time 1.578 (1.566) data 0.000 (0.002) loss 1.1963 (1.0927) acc 71.8750 (72.2483) lr 4.6417e-04 eta 6:12:37 +epoch [36/50] batch [725/1000] time 1.558 (1.566) data 0.001 (0.002) loss 0.8623 (1.0902) acc 78.1250 (72.2888) lr 4.6417e-04 eta 6:12:29 +epoch [36/50] batch [730/1000] time 1.547 (1.566) data 0.001 (0.002) loss 1.1465 (1.0917) acc 68.7500 (72.2603) lr 4.6417e-04 eta 6:12:23 +epoch [36/50] batch [735/1000] time 1.557 (1.566) data 0.000 (0.002) loss 0.8945 (1.0906) acc 81.2500 (72.2789) lr 4.6417e-04 eta 6:12:14 +epoch [36/50] batch [740/1000] time 1.544 (1.566) data 0.000 (0.002) loss 0.6860 (1.0908) acc 87.5000 (72.2804) lr 4.6417e-04 eta 6:12:05 +epoch [36/50] batch [745/1000] time 1.574 (1.566) data 0.001 (0.002) loss 0.9165 (1.0907) acc 81.2500 (72.2819) lr 4.6417e-04 eta 6:11:57 +epoch [36/50] batch [750/1000] time 1.550 (1.566) data 0.000 (0.002) loss 1.2998 (1.0923) acc 68.7500 (72.2750) lr 4.6417e-04 eta 6:11:48 +epoch [36/50] batch [755/1000] time 1.549 (1.565) data 0.000 (0.002) loss 1.0693 (1.0929) acc 78.1250 (72.2848) lr 4.6417e-04 eta 6:11:40 +epoch [36/50] batch [760/1000] time 1.573 (1.565) data 0.000 (0.002) loss 1.6797 (1.0931) acc 59.3750 (72.2862) lr 4.6417e-04 eta 6:11:31 +epoch [36/50] batch [765/1000] time 1.580 (1.565) data 0.001 (0.002) loss 1.1826 (1.0924) acc 65.6250 (72.2876) lr 4.6417e-04 eta 6:11:23 +epoch [36/50] batch [770/1000] time 1.571 (1.565) data 0.001 (0.002) loss 1.0176 (1.0921) acc 78.1250 (72.2890) lr 4.6417e-04 eta 6:11:16 +epoch [36/50] batch [775/1000] time 1.570 (1.566) data 0.001 (0.002) loss 0.8921 (1.0928) acc 84.3750 (72.2581) lr 4.6417e-04 eta 6:11:11 +epoch [36/50] batch [780/1000] time 1.565 (1.566) data 0.000 (0.002) loss 1.3262 (1.0942) acc 68.7500 (72.2276) lr 4.6417e-04 eta 6:11:03 +epoch [36/50] batch [785/1000] time 1.541 (1.566) data 0.000 (0.002) loss 1.2979 (1.0942) acc 71.8750 (72.2174) lr 4.6417e-04 eta 6:10:54 +epoch [36/50] batch [790/1000] time 1.552 (1.566) data 0.000 (0.002) loss 1.0361 (1.0934) acc 75.0000 (72.2152) lr 4.6417e-04 eta 6:10:46 +epoch [36/50] batch [795/1000] time 1.584 (1.566) data 0.000 (0.002) loss 1.3594 (1.0931) acc 75.0000 (72.2288) lr 4.6417e-04 eta 6:10:38 +epoch [36/50] batch [800/1000] time 1.555 (1.565) data 0.001 (0.002) loss 1.2783 (1.0937) acc 81.2500 (72.2305) lr 4.6417e-04 eta 6:10:29 +epoch [36/50] batch [805/1000] time 1.568 (1.565) data 0.000 (0.002) loss 0.7749 (1.0946) acc 78.1250 (72.2205) lr 4.6417e-04 eta 6:10:21 +epoch [36/50] batch [810/1000] time 1.543 (1.565) data 0.000 (0.002) loss 0.5337 (1.0939) acc 81.2500 (72.2184) lr 4.6417e-04 eta 6:10:14 +epoch [36/50] batch [815/1000] time 1.742 (1.566) data 0.001 (0.002) loss 1.2822 (1.0939) acc 68.7500 (72.2508) lr 4.6417e-04 eta 6:10:09 +epoch [36/50] batch [820/1000] time 1.569 (1.566) data 0.000 (0.002) loss 1.4658 (1.0943) acc 62.5000 (72.2409) lr 4.6417e-04 eta 6:10:02 +epoch [36/50] batch [825/1000] time 1.602 (1.566) data 0.001 (0.002) loss 1.6855 (1.0967) acc 53.1250 (72.1780) lr 4.6417e-04 eta 6:09:55 +epoch [36/50] batch [830/1000] time 1.563 (1.566) data 0.001 (0.002) loss 1.0752 (1.0970) acc 81.2500 (72.1724) lr 4.6417e-04 eta 6:09:48 +epoch [36/50] batch [835/1000] time 1.575 (1.566) data 0.000 (0.002) loss 0.7744 (1.0967) acc 81.2500 (72.1856) lr 4.6417e-04 eta 6:09:41 +epoch [36/50] batch [840/1000] time 1.577 (1.566) data 0.000 (0.002) loss 0.6504 (1.0968) acc 81.2500 (72.1875) lr 4.6417e-04 eta 6:09:34 +epoch [36/50] batch [845/1000] time 1.566 (1.566) data 0.001 (0.002) loss 1.1953 (1.0965) acc 65.6250 (72.1930) lr 4.6417e-04 eta 6:09:26 +epoch [36/50] batch [850/1000] time 1.570 (1.566) data 0.000 (0.002) loss 0.7012 (1.0950) acc 81.2500 (72.2096) lr 4.6417e-04 eta 6:09:18 +epoch [36/50] batch [855/1000] time 1.562 (1.566) data 0.001 (0.002) loss 0.6821 (1.0932) acc 78.1250 (72.2405) lr 4.6417e-04 eta 6:09:10 +epoch [36/50] batch [860/1000] time 1.560 (1.566) data 0.000 (0.002) loss 1.2578 (1.0936) acc 62.5000 (72.2529) lr 4.6417e-04 eta 6:09:02 +epoch [36/50] batch [865/1000] time 1.543 (1.566) data 0.000 (0.002) loss 1.5869 (1.0949) acc 65.6250 (72.2146) lr 4.6417e-04 eta 6:08:54 +epoch [36/50] batch [870/1000] time 1.557 (1.566) data 0.001 (0.002) loss 0.8540 (1.0943) acc 81.2500 (72.2306) lr 4.6417e-04 eta 6:08:45 +epoch [36/50] batch [875/1000] time 1.554 (1.566) data 0.000 (0.002) loss 0.7461 (1.0943) acc 81.2500 (72.2429) lr 4.6417e-04 eta 6:08:36 +epoch [36/50] batch [880/1000] time 1.557 (1.566) data 0.001 (0.002) loss 0.7363 (1.0939) acc 84.3750 (72.2585) lr 4.6417e-04 eta 6:08:30 +epoch [36/50] batch [885/1000] time 1.561 (1.566) data 0.001 (0.002) loss 0.9229 (1.0924) acc 75.0000 (72.2952) lr 4.6417e-04 eta 6:08:22 +epoch [36/50] batch [890/1000] time 1.562 (1.566) data 0.001 (0.002) loss 1.2100 (1.0933) acc 62.5000 (72.2753) lr 4.6417e-04 eta 6:08:14 +epoch [36/50] batch [895/1000] time 1.558 (1.566) data 0.001 (0.002) loss 1.3311 (1.0930) acc 65.6250 (72.2730) lr 4.6417e-04 eta 6:08:05 +epoch [36/50] batch [900/1000] time 1.575 (1.566) data 0.001 (0.002) loss 0.8340 (1.0935) acc 75.0000 (72.2674) lr 4.6417e-04 eta 6:07:57 +epoch [36/50] batch [905/1000] time 1.540 (1.566) data 0.001 (0.002) loss 1.3262 (1.0932) acc 75.0000 (72.2894) lr 4.6417e-04 eta 6:07:48 +epoch [36/50] batch [910/1000] time 1.556 (1.566) data 0.000 (0.002) loss 0.7974 (1.0929) acc 84.3750 (72.3043) lr 4.6417e-04 eta 6:07:40 +epoch [36/50] batch [915/1000] time 1.550 (1.566) data 0.001 (0.002) loss 0.8521 (1.0919) acc 75.0000 (72.3087) lr 4.6417e-04 eta 6:07:33 +epoch [36/50] batch [920/1000] time 1.548 (1.566) data 0.000 (0.002) loss 1.1094 (1.0917) acc 68.7500 (72.3268) lr 4.6417e-04 eta 6:07:24 +epoch [36/50] batch [925/1000] time 1.566 (1.566) data 0.001 (0.002) loss 0.8916 (1.0905) acc 75.0000 (72.3547) lr 4.6417e-04 eta 6:07:20 +epoch [36/50] batch [930/1000] time 1.563 (1.566) data 0.000 (0.002) loss 0.2690 (1.0897) acc 87.5000 (72.3555) lr 4.6417e-04 eta 6:07:12 +epoch [36/50] batch [935/1000] time 1.559 (1.566) data 0.000 (0.002) loss 1.1807 (1.0886) acc 62.5000 (72.3797) lr 4.6417e-04 eta 6:07:03 +epoch [36/50] batch [940/1000] time 1.553 (1.566) data 0.000 (0.002) loss 1.0068 (1.0882) acc 78.1250 (72.4069) lr 4.6417e-04 eta 6:06:54 +epoch [36/50] batch [945/1000] time 1.570 (1.566) data 0.000 (0.002) loss 0.5010 (1.0879) acc 87.5000 (72.4339) lr 4.6417e-04 eta 6:06:46 +epoch [36/50] batch [950/1000] time 1.570 (1.566) data 0.000 (0.002) loss 0.7671 (1.0882) acc 78.1250 (72.4309) lr 4.6417e-04 eta 6:06:39 +epoch [36/50] batch [955/1000] time 1.574 (1.566) data 0.000 (0.002) loss 1.5029 (1.0894) acc 65.6250 (72.4149) lr 4.6417e-04 eta 6:06:30 +epoch [36/50] batch [960/1000] time 1.566 (1.566) data 0.000 (0.002) loss 1.2861 (1.0899) acc 62.5000 (72.3991) lr 4.6417e-04 eta 6:06:22 +epoch [36/50] batch [965/1000] time 1.547 (1.566) data 0.000 (0.002) loss 1.0967 (1.0888) acc 56.2500 (72.4223) lr 4.6417e-04 eta 6:06:13 +epoch [36/50] batch [970/1000] time 1.544 (1.566) data 0.000 (0.001) loss 1.2705 (1.0892) acc 71.8750 (72.4356) lr 4.6417e-04 eta 6:06:07 +epoch [36/50] batch [975/1000] time 1.566 (1.566) data 0.000 (0.001) loss 1.4395 (1.0901) acc 65.6250 (72.4231) lr 4.6417e-04 eta 6:05:59 +epoch [36/50] batch [980/1000] time 1.572 (1.566) data 0.000 (0.001) loss 0.8301 (1.0902) acc 81.2500 (72.4203) lr 4.6417e-04 eta 6:05:52 +epoch [36/50] batch [985/1000] time 1.555 (1.566) data 0.001 (0.001) loss 1.3193 (1.0893) acc 65.6250 (72.4429) lr 4.6417e-04 eta 6:05:44 +epoch [36/50] batch [990/1000] time 1.558 (1.566) data 0.000 (0.001) loss 1.3535 (1.0901) acc 71.8750 (72.4369) lr 4.6417e-04 eta 6:05:36 +epoch [36/50] batch [995/1000] time 1.600 (1.566) data 0.000 (0.001) loss 0.8325 (1.0898) acc 75.0000 (72.4466) lr 4.6417e-04 eta 6:05:28 +epoch [36/50] batch [1000/1000] time 1.560 (1.566) data 0.000 (0.001) loss 1.2178 (1.0893) acc 75.0000 (72.4656) lr 4.1221e-04 eta 6:05:21 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,330 +* accuracy: 78.7% +* error: 21.3% +* macro_f1: 78.2% +epoch [37/50] batch [5/1000] time 1.552 (1.706) data 0.000 (0.193) loss 0.9331 (1.0564) acc 75.0000 (75.6250) lr 4.1221e-04 eta 6:37:53 +epoch [37/50] batch [10/1000] time 1.575 (1.630) data 0.001 (0.097) loss 1.5088 (1.2935) acc 68.7500 (69.6875) lr 4.1221e-04 eta 6:20:09 +epoch [37/50] batch [15/1000] time 1.567 (1.608) data 0.001 (0.065) loss 1.1748 (1.2136) acc 65.6250 (69.7917) lr 4.1221e-04 eta 6:14:51 +epoch [37/50] batch [20/1000] time 1.575 (1.601) data 0.001 (0.049) loss 1.0410 (1.1495) acc 75.0000 (71.5625) lr 4.1221e-04 eta 6:13:02 +epoch [37/50] batch [25/1000] time 1.572 (1.594) data 0.001 (0.039) loss 1.3066 (1.1751) acc 75.0000 (71.0000) lr 4.1221e-04 eta 6:11:19 +epoch [37/50] batch [30/1000] time 1.546 (1.589) data 0.001 (0.033) loss 1.3848 (1.1861) acc 75.0000 (70.5208) lr 4.1221e-04 eta 6:10:00 +epoch [37/50] batch [35/1000] time 1.555 (1.585) data 0.001 (0.028) loss 1.3896 (1.1833) acc 65.6250 (70.8036) lr 4.1221e-04 eta 6:08:48 +epoch [37/50] batch [40/1000] time 1.583 (1.591) data 0.000 (0.025) loss 0.8428 (1.1503) acc 75.0000 (71.6406) lr 4.1221e-04 eta 6:10:12 +epoch [37/50] batch [45/1000] time 1.556 (1.589) data 0.000 (0.022) loss 0.7144 (1.1260) acc 81.2500 (71.9444) lr 4.1221e-04 eta 6:09:38 +epoch [37/50] batch [50/1000] time 1.547 (1.585) data 0.001 (0.020) loss 0.9062 (1.1542) acc 75.0000 (71.1250) lr 4.1221e-04 eta 6:08:29 +epoch [37/50] batch [55/1000] time 1.550 (1.581) data 0.000 (0.018) loss 0.7251 (1.1441) acc 78.1250 (71.3068) lr 4.1221e-04 eta 6:07:30 +epoch [37/50] batch [60/1000] time 1.573 (1.579) data 0.001 (0.017) loss 1.1592 (1.1716) acc 71.8750 (71.0938) lr 4.1221e-04 eta 6:06:48 +epoch [37/50] batch [65/1000] time 1.561 (1.578) data 0.000 (0.015) loss 1.1182 (1.1521) acc 65.6250 (71.4423) lr 4.1221e-04 eta 6:06:33 +epoch [37/50] batch [70/1000] time 1.554 (1.577) data 0.000 (0.014) loss 1.0029 (1.1521) acc 71.8750 (71.4732) lr 4.1221e-04 eta 6:06:04 +epoch [37/50] batch [75/1000] time 1.564 (1.576) data 0.000 (0.013) loss 0.8218 (1.1505) acc 81.2500 (71.2917) lr 4.1221e-04 eta 6:05:45 +epoch [37/50] batch [80/1000] time 1.754 (1.577) data 0.000 (0.013) loss 0.7129 (1.1355) acc 81.2500 (71.5625) lr 4.1221e-04 eta 6:05:57 +epoch [37/50] batch [85/1000] time 1.554 (1.576) data 0.000 (0.012) loss 1.4336 (1.1378) acc 68.7500 (71.3971) lr 4.1221e-04 eta 6:05:28 +epoch [37/50] batch [90/1000] time 1.552 (1.576) data 0.000 (0.011) loss 1.1709 (1.1353) acc 75.0000 (71.5278) lr 4.1221e-04 eta 6:05:16 +epoch [37/50] batch [95/1000] time 1.575 (1.575) data 0.000 (0.011) loss 0.7578 (1.1324) acc 78.1250 (71.6118) lr 4.1221e-04 eta 6:04:55 +epoch [37/50] batch [100/1000] time 1.598 (1.575) data 0.000 (0.010) loss 1.1279 (1.1315) acc 68.7500 (71.4375) lr 4.1221e-04 eta 6:04:48 +epoch [37/50] batch [105/1000] time 1.567 (1.574) data 0.000 (0.010) loss 1.4414 (1.1343) acc 71.8750 (71.4583) lr 4.1221e-04 eta 6:04:34 +epoch [37/50] batch [110/1000] time 1.545 (1.573) data 0.000 (0.009) loss 1.3369 (1.1321) acc 68.7500 (71.5341) lr 4.1221e-04 eta 6:04:12 +epoch [37/50] batch [115/1000] time 1.558 (1.573) data 0.000 (0.009) loss 1.0869 (1.1312) acc 68.7500 (71.4946) lr 4.1221e-04 eta 6:04:04 +epoch [37/50] batch [120/1000] time 1.586 (1.573) data 0.001 (0.008) loss 1.2480 (1.1258) acc 65.6250 (71.4323) lr 4.1221e-04 eta 6:03:53 +epoch [37/50] batch [125/1000] time 1.580 (1.573) data 0.000 (0.008) loss 1.3633 (1.1252) acc 71.8750 (71.6000) lr 4.1221e-04 eta 6:03:41 +epoch [37/50] batch [130/1000] time 1.571 (1.572) data 0.000 (0.008) loss 0.4436 (1.1092) acc 84.3750 (71.8510) lr 4.1221e-04 eta 6:03:30 +epoch [37/50] batch [135/1000] time 1.567 (1.572) data 0.000 (0.008) loss 0.7593 (1.1080) acc 81.2500 (71.7593) lr 4.1221e-04 eta 6:03:21 +epoch [37/50] batch [140/1000] time 1.560 (1.572) data 0.000 (0.007) loss 0.6797 (1.1094) acc 78.1250 (71.7188) lr 4.1221e-04 eta 6:03:06 +epoch [37/50] batch [145/1000] time 1.546 (1.572) data 0.000 (0.007) loss 0.4895 (1.0941) acc 81.2500 (71.8966) lr 4.1221e-04 eta 6:03:03 +epoch [37/50] batch [150/1000] time 1.581 (1.572) data 0.001 (0.007) loss 0.9614 (1.0995) acc 71.8750 (71.8333) lr 4.1221e-04 eta 6:02:57 +epoch [37/50] batch [155/1000] time 1.557 (1.572) data 0.001 (0.007) loss 0.8687 (1.0977) acc 75.0000 (71.9758) lr 4.1221e-04 eta 6:02:47 +epoch [37/50] batch [160/1000] time 1.560 (1.572) data 0.000 (0.006) loss 1.2266 (1.1016) acc 65.6250 (71.9141) lr 4.1221e-04 eta 6:02:31 +epoch [37/50] batch [165/1000] time 1.566 (1.571) data 0.001 (0.006) loss 1.4287 (1.1110) acc 65.6250 (71.7045) lr 4.1221e-04 eta 6:02:20 +epoch [37/50] batch [170/1000] time 1.566 (1.571) data 0.000 (0.006) loss 1.7100 (1.1118) acc 65.6250 (71.6544) lr 4.1221e-04 eta 6:02:08 +epoch [37/50] batch [175/1000] time 1.578 (1.571) data 0.001 (0.006) loss 0.7314 (1.1097) acc 84.3750 (71.7500) lr 4.1221e-04 eta 6:01:56 +epoch [37/50] batch [180/1000] time 1.583 (1.570) data 0.001 (0.006) loss 0.8662 (1.1048) acc 68.7500 (71.8403) lr 4.1221e-04 eta 6:01:39 +epoch [37/50] batch [185/1000] time 1.578 (1.570) data 0.000 (0.006) loss 1.2285 (1.1062) acc 75.0000 (71.8919) lr 4.1221e-04 eta 6:01:27 +epoch [37/50] batch [190/1000] time 1.567 (1.570) data 0.001 (0.006) loss 1.2490 (1.1065) acc 78.1250 (71.9737) lr 4.1221e-04 eta 6:01:25 +epoch [37/50] batch [195/1000] time 1.594 (1.570) data 0.001 (0.005) loss 1.1816 (1.1078) acc 65.6250 (71.9391) lr 4.1221e-04 eta 6:01:15 +epoch [37/50] batch [200/1000] time 1.541 (1.569) data 0.001 (0.005) loss 1.4150 (1.1089) acc 65.6250 (71.9531) lr 4.1221e-04 eta 6:00:58 +epoch [37/50] batch [205/1000] time 1.557 (1.569) data 0.000 (0.005) loss 1.0059 (1.1109) acc 78.1250 (71.8598) lr 4.1221e-04 eta 6:00:47 +epoch [37/50] batch [210/1000] time 1.548 (1.569) data 0.001 (0.005) loss 0.5815 (1.1095) acc 90.6250 (71.8899) lr 4.1221e-04 eta 6:00:37 +epoch [37/50] batch [215/1000] time 1.572 (1.569) data 0.000 (0.005) loss 0.9819 (1.1077) acc 75.0000 (71.9767) lr 4.1221e-04 eta 6:00:29 +epoch [37/50] batch [220/1000] time 1.572 (1.569) data 0.001 (0.005) loss 1.3076 (1.1103) acc 75.0000 (71.9744) lr 4.1221e-04 eta 6:00:20 +epoch [37/50] batch [225/1000] time 1.545 (1.569) data 0.000 (0.005) loss 0.9209 (1.1017) acc 68.7500 (72.0972) lr 4.1221e-04 eta 6:00:09 +epoch [37/50] batch [230/1000] time 1.548 (1.568) data 0.000 (0.005) loss 0.8721 (1.0958) acc 71.8750 (72.2283) lr 4.1221e-04 eta 5:59:57 +epoch [37/50] batch [235/1000] time 1.541 (1.569) data 0.000 (0.005) loss 0.6382 (1.0916) acc 78.1250 (72.2739) lr 4.1221e-04 eta 5:59:51 +epoch [37/50] batch [240/1000] time 1.565 (1.568) data 0.000 (0.004) loss 1.2979 (1.0918) acc 65.6250 (72.2786) lr 4.1221e-04 eta 5:59:37 +epoch [37/50] batch [245/1000] time 1.524 (1.568) data 0.000 (0.004) loss 0.7725 (1.0940) acc 75.0000 (72.2321) lr 4.1221e-04 eta 5:59:24 +epoch [37/50] batch [250/1000] time 1.537 (1.568) data 0.000 (0.004) loss 1.1494 (1.0944) acc 81.2500 (72.2625) lr 4.1221e-04 eta 5:59:13 +epoch [37/50] batch [255/1000] time 1.553 (1.567) data 0.001 (0.004) loss 0.9097 (1.0933) acc 81.2500 (72.3284) lr 4.1221e-04 eta 5:59:02 +epoch [37/50] batch [260/1000] time 1.549 (1.567) data 0.000 (0.004) loss 1.1650 (1.0980) acc 75.0000 (72.3197) lr 4.1221e-04 eta 5:58:53 +epoch [37/50] batch [265/1000] time 1.583 (1.567) data 0.000 (0.004) loss 0.5176 (1.0973) acc 84.3750 (72.3585) lr 4.1221e-04 eta 5:58:43 +epoch [37/50] batch [270/1000] time 1.571 (1.567) data 0.000 (0.004) loss 0.8745 (1.0995) acc 68.7500 (72.3264) lr 4.1221e-04 eta 5:58:34 +epoch [37/50] batch [275/1000] time 1.577 (1.567) data 0.000 (0.004) loss 1.0615 (1.0992) acc 75.0000 (72.3636) lr 4.1221e-04 eta 5:58:25 +epoch [37/50] batch [280/1000] time 1.567 (1.567) data 0.000 (0.004) loss 1.0850 (1.0993) acc 68.7500 (72.3103) lr 4.1221e-04 eta 5:58:17 +epoch [37/50] batch [285/1000] time 1.586 (1.567) data 0.000 (0.004) loss 1.1084 (1.1016) acc 71.8750 (72.2368) lr 4.1221e-04 eta 5:58:11 +epoch [37/50] batch [290/1000] time 1.546 (1.567) data 0.000 (0.004) loss 1.7529 (1.1040) acc 59.3750 (72.1983) lr 4.1221e-04 eta 5:58:03 +epoch [37/50] batch [295/1000] time 1.587 (1.568) data 0.000 (0.004) loss 0.8706 (1.1046) acc 75.0000 (72.2140) lr 4.1221e-04 eta 5:58:05 +epoch [37/50] batch [300/1000] time 1.555 (1.568) data 0.000 (0.004) loss 0.8003 (1.1024) acc 65.6250 (72.2292) lr 4.1221e-04 eta 5:57:58 +epoch [37/50] batch [305/1000] time 1.588 (1.568) data 0.001 (0.004) loss 0.9019 (1.1042) acc 68.7500 (72.1004) lr 4.1221e-04 eta 5:57:49 +epoch [37/50] batch [310/1000] time 1.533 (1.568) data 0.001 (0.004) loss 0.9639 (1.1024) acc 71.8750 (72.1875) lr 4.1221e-04 eta 5:57:40 +epoch [37/50] batch [315/1000] time 1.553 (1.567) data 0.001 (0.004) loss 1.0078 (1.1003) acc 78.1250 (72.2619) lr 4.1221e-04 eta 5:57:28 +epoch [37/50] batch [320/1000] time 1.586 (1.567) data 0.000 (0.003) loss 1.2344 (1.0991) acc 71.8750 (72.2559) lr 4.1221e-04 eta 5:57:18 +epoch [37/50] batch [325/1000] time 1.551 (1.567) data 0.000 (0.003) loss 0.9624 (1.1016) acc 75.0000 (72.1827) lr 4.1221e-04 eta 5:57:09 +epoch [37/50] batch [330/1000] time 1.561 (1.567) data 0.001 (0.003) loss 0.8213 (1.1029) acc 78.1250 (72.1307) lr 4.1221e-04 eta 5:57:03 +epoch [37/50] batch [335/1000] time 1.578 (1.567) data 0.000 (0.003) loss 0.9565 (1.1008) acc 62.5000 (72.1362) lr 4.1221e-04 eta 5:56:54 +epoch [37/50] batch [340/1000] time 1.582 (1.567) data 0.001 (0.003) loss 0.7466 (1.1025) acc 84.3750 (72.1967) lr 4.1221e-04 eta 5:56:50 +epoch [37/50] batch [345/1000] time 1.555 (1.567) data 0.000 (0.003) loss 1.2520 (1.1012) acc 65.6250 (72.2192) lr 4.1221e-04 eta 5:56:42 +epoch [37/50] batch [350/1000] time 1.538 (1.567) data 0.001 (0.003) loss 0.7598 (1.1018) acc 81.2500 (72.1696) lr 4.1221e-04 eta 5:56:31 +epoch [37/50] batch [355/1000] time 1.555 (1.567) data 0.001 (0.003) loss 1.0039 (1.1049) acc 65.6250 (72.0599) lr 4.1221e-04 eta 5:56:23 +epoch [37/50] batch [360/1000] time 1.585 (1.567) data 0.001 (0.003) loss 0.9868 (1.1022) acc 75.0000 (72.1181) lr 4.1221e-04 eta 5:56:16 +epoch [37/50] batch [365/1000] time 1.553 (1.567) data 0.000 (0.003) loss 1.2471 (1.1036) acc 71.8750 (72.1233) lr 4.1221e-04 eta 5:56:07 +epoch [37/50] batch [370/1000] time 1.566 (1.567) data 0.001 (0.003) loss 0.9561 (1.1034) acc 78.1250 (72.1453) lr 4.1221e-04 eta 5:55:58 +epoch [37/50] batch [375/1000] time 1.576 (1.567) data 0.001 (0.003) loss 1.1006 (1.1027) acc 75.0000 (72.1750) lr 4.1221e-04 eta 5:55:49 +epoch [37/50] batch [380/1000] time 1.550 (1.567) data 0.000 (0.003) loss 0.9028 (1.0996) acc 78.1250 (72.2533) lr 4.1221e-04 eta 5:55:40 +epoch [37/50] batch [385/1000] time 1.549 (1.567) data 0.001 (0.003) loss 0.9297 (1.0949) acc 75.0000 (72.3620) lr 4.1221e-04 eta 5:55:39 +epoch [37/50] batch [390/1000] time 1.571 (1.567) data 0.000 (0.003) loss 1.1660 (1.0937) acc 68.7500 (72.3798) lr 4.1221e-04 eta 5:55:30 +epoch [37/50] batch [395/1000] time 1.544 (1.567) data 0.000 (0.003) loss 1.3486 (1.0915) acc 62.5000 (72.4367) lr 4.1221e-04 eta 5:55:19 +epoch [37/50] batch [400/1000] time 1.545 (1.567) data 0.001 (0.003) loss 0.7837 (1.0905) acc 78.1250 (72.4844) lr 4.1221e-04 eta 5:55:07 +epoch [37/50] batch [405/1000] time 1.545 (1.567) data 0.000 (0.003) loss 1.2148 (1.0922) acc 75.0000 (72.4846) lr 4.1221e-04 eta 5:54:58 +epoch [37/50] batch [410/1000] time 1.583 (1.567) data 0.001 (0.003) loss 0.9795 (1.0920) acc 65.6250 (72.4848) lr 4.1221e-04 eta 5:54:50 +epoch [37/50] batch [415/1000] time 1.566 (1.567) data 0.001 (0.003) loss 1.1602 (1.0933) acc 71.8750 (72.4849) lr 4.1221e-04 eta 5:54:43 +epoch [37/50] batch [420/1000] time 1.567 (1.567) data 0.001 (0.003) loss 0.9570 (1.0921) acc 75.0000 (72.5074) lr 4.1221e-04 eta 5:54:37 +epoch [37/50] batch [425/1000] time 1.555 (1.567) data 0.000 (0.003) loss 0.6963 (1.0924) acc 81.2500 (72.5000) lr 4.1221e-04 eta 5:54:27 +epoch [37/50] batch [430/1000] time 1.570 (1.567) data 0.000 (0.003) loss 1.2695 (1.0923) acc 65.6250 (72.4564) lr 4.1221e-04 eta 5:54:19 +epoch [37/50] batch [435/1000] time 1.562 (1.567) data 0.000 (0.003) loss 1.5039 (1.0954) acc 65.6250 (72.4066) lr 4.1221e-04 eta 5:54:12 +epoch [37/50] batch [440/1000] time 1.575 (1.567) data 0.001 (0.003) loss 0.8926 (1.0969) acc 65.6250 (72.4006) lr 4.1221e-04 eta 5:54:03 +epoch [37/50] batch [445/1000] time 1.751 (1.567) data 0.000 (0.003) loss 1.2637 (1.1022) acc 68.7500 (72.3174) lr 4.1221e-04 eta 5:54:02 +epoch [37/50] batch [450/1000] time 1.579 (1.567) data 0.000 (0.003) loss 1.3857 (1.1020) acc 68.7500 (72.3264) lr 4.1221e-04 eta 5:53:54 +epoch [37/50] batch [455/1000] time 1.589 (1.567) data 0.001 (0.003) loss 1.0625 (1.1007) acc 65.6250 (72.3489) lr 4.1221e-04 eta 5:53:45 +epoch [37/50] batch [460/1000] time 1.559 (1.567) data 0.000 (0.003) loss 0.7036 (1.1010) acc 75.0000 (72.2962) lr 4.1221e-04 eta 5:53:38 +epoch [37/50] batch [465/1000] time 1.568 (1.567) data 0.001 (0.003) loss 1.0439 (1.1004) acc 71.8750 (72.3387) lr 4.1221e-04 eta 5:53:29 +epoch [37/50] batch [470/1000] time 1.562 (1.567) data 0.000 (0.003) loss 0.9888 (1.0984) acc 78.1250 (72.4069) lr 4.1221e-04 eta 5:53:20 +epoch [37/50] batch [475/1000] time 1.577 (1.567) data 0.001 (0.002) loss 1.5137 (1.0991) acc 65.6250 (72.3750) lr 4.1221e-04 eta 5:53:14 +epoch [37/50] batch [480/1000] time 1.575 (1.567) data 0.000 (0.002) loss 0.8330 (1.0990) acc 84.3750 (72.4089) lr 4.1221e-04 eta 5:53:06 +epoch [37/50] batch [485/1000] time 1.561 (1.567) data 0.000 (0.002) loss 0.7974 (1.0969) acc 84.3750 (72.4613) lr 4.1221e-04 eta 5:52:57 +epoch [37/50] batch [490/1000] time 1.724 (1.567) data 0.000 (0.002) loss 1.7959 (1.0983) acc 53.1250 (72.4107) lr 4.1221e-04 eta 5:52:53 +epoch [37/50] batch [495/1000] time 1.548 (1.567) data 0.001 (0.002) loss 1.1357 (1.0976) acc 62.5000 (72.3737) lr 4.1221e-04 eta 5:52:45 +epoch [37/50] batch [500/1000] time 1.574 (1.567) data 0.001 (0.002) loss 0.9468 (1.0974) acc 75.0000 (72.3812) lr 4.1221e-04 eta 5:52:38 +epoch [37/50] batch [505/1000] time 1.546 (1.567) data 0.000 (0.002) loss 1.2529 (1.0976) acc 68.7500 (72.3824) lr 4.1221e-04 eta 5:52:27 +epoch [37/50] batch [510/1000] time 1.581 (1.567) data 0.000 (0.002) loss 1.1387 (1.0972) acc 68.7500 (72.4142) lr 4.1221e-04 eta 5:52:18 +epoch [37/50] batch [515/1000] time 1.553 (1.567) data 0.001 (0.002) loss 0.7476 (1.0977) acc 84.3750 (72.4150) lr 4.1221e-04 eta 5:52:09 +epoch [37/50] batch [520/1000] time 1.557 (1.567) data 0.000 (0.002) loss 0.8032 (1.0986) acc 75.0000 (72.3798) lr 4.1221e-04 eta 5:52:02 +epoch [37/50] batch [525/1000] time 1.573 (1.567) data 0.000 (0.002) loss 1.3086 (1.1006) acc 65.6250 (72.3750) lr 4.1221e-04 eta 5:51:54 +epoch [37/50] batch [530/1000] time 1.562 (1.567) data 0.001 (0.002) loss 1.1279 (1.1000) acc 71.8750 (72.3821) lr 4.1221e-04 eta 5:51:46 +epoch [37/50] batch [535/1000] time 1.555 (1.567) data 0.000 (0.002) loss 1.0469 (1.1034) acc 71.8750 (72.3131) lr 4.1221e-04 eta 5:51:42 +epoch [37/50] batch [540/1000] time 1.555 (1.567) data 0.001 (0.002) loss 1.3926 (1.1034) acc 56.2500 (72.2917) lr 4.1221e-04 eta 5:51:34 +epoch [37/50] batch [545/1000] time 1.562 (1.567) data 0.001 (0.002) loss 0.9736 (1.1030) acc 68.7500 (72.2821) lr 4.1221e-04 eta 5:51:27 +epoch [37/50] batch [550/1000] time 1.580 (1.567) data 0.001 (0.002) loss 0.7251 (1.1022) acc 81.2500 (72.3125) lr 4.1221e-04 eta 5:51:19 +epoch [37/50] batch [555/1000] time 1.568 (1.567) data 0.000 (0.002) loss 1.0391 (1.1020) acc 75.0000 (72.3367) lr 4.1221e-04 eta 5:51:10 +epoch [37/50] batch [560/1000] time 1.562 (1.567) data 0.000 (0.002) loss 1.3379 (1.1020) acc 71.8750 (72.3382) lr 4.1221e-04 eta 5:51:02 +epoch [37/50] batch [565/1000] time 1.602 (1.567) data 0.000 (0.002) loss 1.2422 (1.1021) acc 59.3750 (72.3396) lr 4.1221e-04 eta 5:50:55 +epoch [37/50] batch [570/1000] time 1.555 (1.567) data 0.001 (0.002) loss 1.1631 (1.1009) acc 68.7500 (72.3684) lr 4.1221e-04 eta 5:50:47 +epoch [37/50] batch [575/1000] time 1.574 (1.567) data 0.000 (0.002) loss 0.9468 (1.1021) acc 75.0000 (72.3478) lr 4.1221e-04 eta 5:50:39 +epoch [37/50] batch [580/1000] time 1.567 (1.567) data 0.000 (0.002) loss 0.6934 (1.1017) acc 81.2500 (72.3545) lr 4.1221e-04 eta 5:50:32 +epoch [37/50] batch [585/1000] time 1.556 (1.567) data 0.000 (0.002) loss 1.6973 (1.1027) acc 71.8750 (72.3237) lr 4.1221e-04 eta 5:50:24 +epoch [37/50] batch [590/1000] time 1.555 (1.567) data 0.000 (0.002) loss 1.0703 (1.1023) acc 78.1250 (72.3305) lr 4.1221e-04 eta 5:50:14 +epoch [37/50] batch [595/1000] time 1.581 (1.567) data 0.001 (0.002) loss 1.0576 (1.1034) acc 71.8750 (72.3214) lr 4.1221e-04 eta 5:50:06 +epoch [37/50] batch [600/1000] time 1.578 (1.567) data 0.000 (0.002) loss 1.3389 (1.1048) acc 53.1250 (72.2448) lr 4.1221e-04 eta 5:50:02 +epoch [37/50] batch [605/1000] time 1.560 (1.567) data 0.000 (0.002) loss 1.2852 (1.1056) acc 65.6250 (72.2314) lr 4.1221e-04 eta 5:49:53 +epoch [37/50] batch [610/1000] time 1.549 (1.567) data 0.000 (0.002) loss 1.0010 (1.1047) acc 68.7500 (72.2541) lr 4.1221e-04 eta 5:49:44 +epoch [37/50] batch [615/1000] time 1.561 (1.567) data 0.001 (0.002) loss 1.2666 (1.1040) acc 68.7500 (72.2663) lr 4.1221e-04 eta 5:49:36 +epoch [37/50] batch [620/1000] time 1.557 (1.567) data 0.001 (0.002) loss 1.2441 (1.1050) acc 71.8750 (72.2480) lr 4.1221e-04 eta 5:49:27 +epoch [37/50] batch [625/1000] time 1.584 (1.567) data 0.000 (0.002) loss 1.1826 (1.1048) acc 71.8750 (72.2550) lr 4.1221e-04 eta 5:49:19 +epoch [37/50] batch [630/1000] time 1.557 (1.567) data 0.000 (0.002) loss 1.3477 (1.1037) acc 71.8750 (72.2867) lr 4.1221e-04 eta 5:49:09 +epoch [37/50] batch [635/1000] time 1.529 (1.567) data 0.000 (0.002) loss 1.8320 (1.1045) acc 62.5000 (72.2638) lr 4.1221e-04 eta 5:49:01 +epoch [37/50] batch [640/1000] time 1.558 (1.567) data 0.000 (0.002) loss 0.2341 (1.1016) acc 100.0000 (72.3486) lr 4.1221e-04 eta 5:48:53 +epoch [37/50] batch [645/1000] time 1.559 (1.567) data 0.000 (0.002) loss 1.0039 (1.1009) acc 84.3750 (72.3740) lr 4.1221e-04 eta 5:48:47 +epoch [37/50] batch [650/1000] time 1.573 (1.567) data 0.001 (0.002) loss 1.3818 (1.1007) acc 62.5000 (72.3606) lr 4.1221e-04 eta 5:48:38 +epoch [37/50] batch [655/1000] time 1.578 (1.567) data 0.000 (0.002) loss 0.6748 (1.1010) acc 75.0000 (72.3664) lr 4.1221e-04 eta 5:48:30 +epoch [37/50] batch [660/1000] time 1.568 (1.567) data 0.000 (0.002) loss 1.6455 (1.1019) acc 68.7500 (72.3201) lr 4.1221e-04 eta 5:48:21 +epoch [37/50] batch [665/1000] time 1.543 (1.567) data 0.000 (0.002) loss 0.8970 (1.1015) acc 78.1250 (72.3590) lr 4.1221e-04 eta 5:48:13 +epoch [37/50] batch [670/1000] time 1.579 (1.567) data 0.000 (0.002) loss 1.4277 (1.1034) acc 65.6250 (72.3181) lr 4.1221e-04 eta 5:48:04 +epoch [37/50] batch [675/1000] time 1.558 (1.567) data 0.000 (0.002) loss 1.2158 (1.1031) acc 78.1250 (72.3241) lr 4.1221e-04 eta 5:47:55 +epoch [37/50] batch [680/1000] time 1.554 (1.567) data 0.000 (0.002) loss 0.7441 (1.1023) acc 71.8750 (72.3300) lr 4.1221e-04 eta 5:47:46 +epoch [37/50] batch [685/1000] time 1.579 (1.567) data 0.000 (0.002) loss 1.1025 (1.1022) acc 78.1250 (72.3266) lr 4.1221e-04 eta 5:47:42 +epoch [37/50] batch [690/1000] time 1.554 (1.567) data 0.000 (0.002) loss 1.6982 (1.1023) acc 71.8750 (72.3460) lr 4.1221e-04 eta 5:47:35 +epoch [37/50] batch [695/1000] time 1.562 (1.567) data 0.001 (0.002) loss 0.8511 (1.1016) acc 78.1250 (72.3831) lr 4.1221e-04 eta 5:47:26 +epoch [37/50] batch [700/1000] time 1.559 (1.567) data 0.001 (0.002) loss 1.3262 (1.1029) acc 65.6250 (72.3616) lr 4.1221e-04 eta 5:47:17 +epoch [37/50] batch [705/1000] time 1.574 (1.567) data 0.000 (0.002) loss 1.1260 (1.1021) acc 71.8750 (72.3803) lr 4.1221e-04 eta 5:47:09 +epoch [37/50] batch [710/1000] time 1.561 (1.567) data 0.000 (0.002) loss 0.9243 (1.1011) acc 71.8750 (72.3856) lr 4.1221e-04 eta 5:47:00 +epoch [37/50] batch [715/1000] time 1.555 (1.567) data 0.001 (0.002) loss 0.9092 (1.1009) acc 78.1250 (72.3645) lr 4.1221e-04 eta 5:46:52 +epoch [37/50] batch [720/1000] time 1.544 (1.567) data 0.001 (0.002) loss 1.1455 (1.1020) acc 65.6250 (72.3394) lr 4.1221e-04 eta 5:46:44 +epoch [37/50] batch [725/1000] time 1.556 (1.567) data 0.001 (0.002) loss 1.0586 (1.1031) acc 62.5000 (72.2974) lr 4.1221e-04 eta 5:46:37 +epoch [37/50] batch [730/1000] time 1.554 (1.567) data 0.000 (0.002) loss 1.3369 (1.1027) acc 65.6250 (72.3074) lr 4.1221e-04 eta 5:46:29 +epoch [37/50] batch [735/1000] time 1.594 (1.567) data 0.001 (0.002) loss 1.5273 (1.1040) acc 62.5000 (72.2917) lr 4.1221e-04 eta 5:46:21 +epoch [37/50] batch [740/1000] time 1.563 (1.567) data 0.001 (0.002) loss 0.9351 (1.1027) acc 78.1250 (72.3395) lr 4.1221e-04 eta 5:46:13 +epoch [37/50] batch [745/1000] time 1.575 (1.567) data 0.000 (0.002) loss 1.2031 (1.1030) acc 62.5000 (72.3280) lr 4.1221e-04 eta 5:46:05 +epoch [37/50] batch [750/1000] time 1.563 (1.567) data 0.000 (0.002) loss 1.2432 (1.1033) acc 59.3750 (72.3083) lr 4.1221e-04 eta 5:46:00 +epoch [37/50] batch [755/1000] time 1.575 (1.567) data 0.001 (0.002) loss 1.2852 (1.1033) acc 65.6250 (72.2972) lr 4.1221e-04 eta 5:45:53 +epoch [37/50] batch [760/1000] time 1.581 (1.567) data 0.000 (0.002) loss 1.2773 (1.1039) acc 75.0000 (72.2985) lr 4.1221e-04 eta 5:45:45 +epoch [37/50] batch [765/1000] time 1.556 (1.567) data 0.000 (0.002) loss 0.8052 (1.1033) acc 71.8750 (72.3080) lr 4.1221e-04 eta 5:45:36 +epoch [37/50] batch [770/1000] time 1.578 (1.567) data 0.000 (0.002) loss 1.3154 (1.1032) acc 62.5000 (72.3011) lr 4.1221e-04 eta 5:45:28 +epoch [37/50] batch [775/1000] time 1.569 (1.567) data 0.000 (0.002) loss 0.8008 (1.1032) acc 81.2500 (72.2944) lr 4.1221e-04 eta 5:45:20 +epoch [37/50] batch [780/1000] time 1.582 (1.567) data 0.001 (0.002) loss 0.5298 (1.1029) acc 84.3750 (72.3077) lr 4.1221e-04 eta 5:45:13 +epoch [37/50] batch [785/1000] time 1.555 (1.567) data 0.000 (0.002) loss 1.0996 (1.1034) acc 68.7500 (72.3209) lr 4.1221e-04 eta 5:45:05 +epoch [37/50] batch [790/1000] time 1.592 (1.567) data 0.000 (0.002) loss 0.6001 (1.1028) acc 87.5000 (72.3418) lr 4.1221e-04 eta 5:44:57 +epoch [37/50] batch [795/1000] time 1.533 (1.567) data 0.001 (0.002) loss 0.5493 (1.1019) acc 81.2500 (72.3467) lr 4.1221e-04 eta 5:44:51 +epoch [37/50] batch [800/1000] time 1.535 (1.567) data 0.000 (0.002) loss 0.9336 (1.1030) acc 78.1250 (72.3398) lr 4.1221e-04 eta 5:44:43 +epoch [37/50] batch [805/1000] time 1.574 (1.567) data 0.000 (0.002) loss 2.0215 (1.1028) acc 59.3750 (72.3602) lr 4.1221e-04 eta 5:44:36 +epoch [37/50] batch [810/1000] time 1.527 (1.567) data 0.000 (0.002) loss 1.4912 (1.1040) acc 65.6250 (72.3418) lr 4.1221e-04 eta 5:44:27 +epoch [37/50] batch [815/1000] time 1.546 (1.567) data 0.000 (0.002) loss 1.1143 (1.1025) acc 78.1250 (72.3965) lr 4.1221e-04 eta 5:44:19 +epoch [37/50] batch [820/1000] time 1.581 (1.567) data 0.000 (0.002) loss 0.9282 (1.1015) acc 78.1250 (72.4009) lr 4.1221e-04 eta 5:44:11 +epoch [37/50] batch [825/1000] time 1.595 (1.567) data 0.001 (0.002) loss 1.3750 (1.1008) acc 62.5000 (72.4129) lr 4.1221e-04 eta 5:44:03 +epoch [37/50] batch [830/1000] time 1.599 (1.567) data 0.001 (0.002) loss 1.0225 (1.1006) acc 71.8750 (72.4247) lr 4.1221e-04 eta 5:43:56 +epoch [37/50] batch [835/1000] time 1.719 (1.567) data 0.000 (0.002) loss 1.1914 (1.0997) acc 71.8750 (72.4364) lr 4.1221e-04 eta 5:43:49 +epoch [37/50] batch [840/1000] time 1.535 (1.567) data 0.001 (0.002) loss 0.7744 (1.0979) acc 84.3750 (72.4888) lr 4.1221e-04 eta 5:43:41 +epoch [37/50] batch [845/1000] time 1.602 (1.567) data 0.000 (0.002) loss 0.8242 (1.0979) acc 65.6250 (72.4889) lr 4.1221e-04 eta 5:43:34 +epoch [37/50] batch [850/1000] time 1.550 (1.567) data 0.000 (0.002) loss 0.9810 (1.0980) acc 68.7500 (72.4669) lr 4.1221e-04 eta 5:43:25 +epoch [37/50] batch [855/1000] time 1.550 (1.567) data 0.000 (0.002) loss 1.1758 (1.0995) acc 75.0000 (72.4342) lr 4.1221e-04 eta 5:43:16 +epoch [37/50] batch [860/1000] time 1.544 (1.567) data 0.000 (0.002) loss 1.0391 (1.0992) acc 71.8750 (72.4419) lr 4.1221e-04 eta 5:43:08 +epoch [37/50] batch [865/1000] time 1.559 (1.567) data 0.000 (0.002) loss 0.9531 (1.0998) acc 84.3750 (72.4386) lr 4.1221e-04 eta 5:42:59 +epoch [37/50] batch [870/1000] time 1.569 (1.567) data 0.000 (0.002) loss 0.7417 (1.0991) acc 78.1250 (72.4282) lr 4.1221e-04 eta 5:42:51 +epoch [37/50] batch [875/1000] time 1.576 (1.567) data 0.000 (0.002) loss 1.0225 (1.0996) acc 62.5000 (72.3786) lr 4.1221e-04 eta 5:42:45 +epoch [37/50] batch [880/1000] time 1.555 (1.567) data 0.001 (0.002) loss 1.5371 (1.1003) acc 59.3750 (72.3757) lr 4.1221e-04 eta 5:42:37 +epoch [37/50] batch [885/1000] time 1.549 (1.567) data 0.001 (0.002) loss 0.5952 (1.0998) acc 81.2500 (72.3976) lr 4.1221e-04 eta 5:42:29 +epoch [37/50] batch [890/1000] time 1.575 (1.567) data 0.000 (0.002) loss 0.6436 (1.0994) acc 81.2500 (72.4017) lr 4.1221e-04 eta 5:42:21 +epoch [37/50] batch [895/1000] time 1.555 (1.567) data 0.000 (0.002) loss 1.7598 (1.0999) acc 62.5000 (72.4022) lr 4.1221e-04 eta 5:42:12 +epoch [37/50] batch [900/1000] time 1.559 (1.567) data 0.000 (0.002) loss 1.2256 (1.1002) acc 62.5000 (72.3854) lr 4.1221e-04 eta 5:42:07 +epoch [37/50] batch [905/1000] time 1.569 (1.567) data 0.001 (0.002) loss 1.4697 (1.1009) acc 65.6250 (72.3895) lr 4.1221e-04 eta 5:41:59 +epoch [37/50] batch [910/1000] time 1.593 (1.567) data 0.001 (0.002) loss 1.4873 (1.1026) acc 62.5000 (72.3283) lr 4.1221e-04 eta 5:41:52 +epoch [37/50] batch [915/1000] time 1.555 (1.567) data 0.001 (0.002) loss 1.0664 (1.1029) acc 75.0000 (72.3156) lr 4.1221e-04 eta 5:41:44 +epoch [37/50] batch [920/1000] time 1.564 (1.567) data 0.001 (0.002) loss 0.7505 (1.1021) acc 90.6250 (72.3471) lr 4.1221e-04 eta 5:41:35 +epoch [37/50] batch [925/1000] time 1.556 (1.567) data 0.001 (0.002) loss 1.7861 (1.1025) acc 59.3750 (72.3446) lr 4.1221e-04 eta 5:41:26 +epoch [37/50] batch [930/1000] time 1.545 (1.567) data 0.000 (0.001) loss 1.0596 (1.1017) acc 65.6250 (72.3589) lr 4.1221e-04 eta 5:41:18 +epoch [37/50] batch [935/1000] time 1.576 (1.567) data 0.001 (0.001) loss 1.0049 (1.1012) acc 78.1250 (72.3830) lr 4.1221e-04 eta 5:41:10 +epoch [37/50] batch [940/1000] time 1.589 (1.567) data 0.001 (0.001) loss 1.0371 (1.1010) acc 71.8750 (72.3903) lr 4.1221e-04 eta 5:41:03 +epoch [37/50] batch [945/1000] time 1.557 (1.567) data 0.000 (0.001) loss 0.7881 (1.1010) acc 71.8750 (72.3942) lr 4.1221e-04 eta 5:40:56 +epoch [37/50] batch [950/1000] time 1.561 (1.567) data 0.000 (0.001) loss 1.0850 (1.1004) acc 71.8750 (72.3882) lr 4.1221e-04 eta 5:40:48 +epoch [37/50] batch [955/1000] time 1.546 (1.567) data 0.000 (0.001) loss 0.9121 (1.1008) acc 71.8750 (72.3822) lr 4.1221e-04 eta 5:40:39 +epoch [37/50] batch [960/1000] time 1.557 (1.567) data 0.000 (0.001) loss 1.2764 (1.1011) acc 71.8750 (72.3665) lr 4.1221e-04 eta 5:40:31 +epoch [37/50] batch [965/1000] time 1.587 (1.567) data 0.000 (0.001) loss 0.8140 (1.0997) acc 81.2500 (72.3964) lr 4.1221e-04 eta 5:40:22 +epoch [37/50] batch [970/1000] time 1.543 (1.567) data 0.001 (0.001) loss 0.6514 (1.0985) acc 84.3750 (72.4227) lr 4.1221e-04 eta 5:40:14 +epoch [37/50] batch [975/1000] time 1.549 (1.567) data 0.000 (0.001) loss 1.1348 (1.0995) acc 78.1250 (72.4071) lr 4.1221e-04 eta 5:40:05 +epoch [37/50] batch [980/1000] time 1.557 (1.567) data 0.001 (0.001) loss 1.1709 (1.0996) acc 65.6250 (72.3948) lr 4.1221e-04 eta 5:39:56 +epoch [37/50] batch [985/1000] time 1.558 (1.567) data 0.001 (0.001) loss 0.5767 (1.0991) acc 81.2500 (72.4048) lr 4.1221e-04 eta 5:39:48 +epoch [37/50] batch [990/1000] time 1.541 (1.567) data 0.000 (0.001) loss 1.3369 (1.0989) acc 75.0000 (72.4085) lr 4.1221e-04 eta 5:39:41 +epoch [37/50] batch [995/1000] time 1.546 (1.567) data 0.000 (0.001) loss 1.1182 (1.0992) acc 68.7500 (72.3869) lr 4.1221e-04 eta 5:39:32 +epoch [37/50] batch [1000/1000] time 1.553 (1.566) data 0.000 (0.001) loss 1.8027 (1.0997) acc 50.0000 (72.3656) lr 3.6258e-04 eta 5:39:23 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,360 +* accuracy: 78.7% +* error: 21.3% +* macro_f1: 78.3% +epoch [38/50] batch [5/1000] time 1.533 (1.947) data 0.000 (0.464) loss 0.9707 (1.0643) acc 71.8750 (72.5000) lr 3.6258e-04 eta 7:01:36 +epoch [38/50] batch [10/1000] time 1.575 (1.756) data 0.000 (0.232) loss 0.9370 (1.0711) acc 81.2500 (74.6875) lr 3.6258e-04 eta 6:20:04 +epoch [38/50] batch [15/1000] time 1.572 (1.692) data 0.001 (0.155) loss 1.3896 (1.1267) acc 71.8750 (72.9167) lr 3.6258e-04 eta 6:06:17 +epoch [38/50] batch [20/1000] time 1.565 (1.661) data 0.001 (0.116) loss 0.9624 (1.1211) acc 68.7500 (72.0312) lr 3.6258e-04 eta 5:59:18 +epoch [38/50] batch [25/1000] time 1.563 (1.643) data 0.001 (0.093) loss 1.2539 (1.1345) acc 75.0000 (71.5000) lr 3.6258e-04 eta 5:55:19 +epoch [38/50] batch [30/1000] time 1.532 (1.628) data 0.001 (0.078) loss 0.9023 (1.1398) acc 84.3750 (71.9792) lr 3.6258e-04 eta 5:51:53 +epoch [38/50] batch [35/1000] time 1.582 (1.619) data 0.000 (0.067) loss 1.2490 (1.1122) acc 62.5000 (72.5893) lr 3.6258e-04 eta 5:49:49 +epoch [38/50] batch [40/1000] time 1.580 (1.613) data 0.000 (0.058) loss 1.3486 (1.1160) acc 65.6250 (72.5781) lr 3.6258e-04 eta 5:48:30 +epoch [38/50] batch [45/1000] time 1.576 (1.609) data 0.001 (0.052) loss 1.8848 (1.1384) acc 59.3750 (71.9444) lr 3.6258e-04 eta 5:47:20 +epoch [38/50] batch [50/1000] time 1.556 (1.603) data 0.000 (0.047) loss 0.8608 (1.1488) acc 71.8750 (71.8750) lr 3.6258e-04 eta 5:46:03 +epoch [38/50] batch [55/1000] time 1.566 (1.600) data 0.000 (0.043) loss 1.4883 (1.1384) acc 68.7500 (72.1591) lr 3.6258e-04 eta 5:45:06 +epoch [38/50] batch [60/1000] time 1.541 (1.599) data 0.001 (0.039) loss 1.1670 (1.1532) acc 81.2500 (72.0833) lr 3.6258e-04 eta 5:44:54 +epoch [38/50] batch [65/1000] time 1.582 (1.596) data 0.000 (0.036) loss 0.8804 (1.1374) acc 78.1250 (72.3077) lr 3.6258e-04 eta 5:44:07 +epoch [38/50] batch [70/1000] time 1.566 (1.594) data 0.000 (0.034) loss 1.3633 (1.1366) acc 71.8750 (72.6339) lr 3.6258e-04 eta 5:43:28 +epoch [38/50] batch [75/1000] time 1.558 (1.592) data 0.000 (0.031) loss 1.2061 (1.1370) acc 75.0000 (72.6667) lr 3.6258e-04 eta 5:43:00 +epoch [38/50] batch [80/1000] time 1.571 (1.591) data 0.001 (0.029) loss 1.4277 (1.1361) acc 65.6250 (72.5000) lr 3.6258e-04 eta 5:42:30 +epoch [38/50] batch [85/1000] time 1.573 (1.589) data 0.000 (0.028) loss 1.0527 (1.1393) acc 78.1250 (72.5735) lr 3.6258e-04 eta 5:42:07 +epoch [38/50] batch [90/1000] time 1.572 (1.588) data 0.000 (0.026) loss 1.4131 (1.1347) acc 65.6250 (72.5000) lr 3.6258e-04 eta 5:41:44 +epoch [38/50] batch [95/1000] time 1.563 (1.586) data 0.001 (0.025) loss 1.1924 (1.1433) acc 65.6250 (72.1053) lr 3.6258e-04 eta 5:41:10 +epoch [38/50] batch [100/1000] time 1.553 (1.585) data 0.001 (0.024) loss 1.0537 (1.1402) acc 71.8750 (72.3438) lr 3.6258e-04 eta 5:40:48 +epoch [38/50] batch [105/1000] time 1.560 (1.585) data 0.000 (0.023) loss 0.9438 (1.1297) acc 81.2500 (72.5298) lr 3.6258e-04 eta 5:40:42 +epoch [38/50] batch [110/1000] time 1.560 (1.584) data 0.000 (0.022) loss 0.9712 (1.1273) acc 78.1250 (72.6705) lr 3.6258e-04 eta 5:40:17 +epoch [38/50] batch [115/1000] time 1.584 (1.583) data 0.000 (0.021) loss 0.8081 (1.1266) acc 68.7500 (72.3370) lr 3.6258e-04 eta 5:40:02 +epoch [38/50] batch [120/1000] time 1.539 (1.582) data 0.000 (0.020) loss 2.3066 (1.1363) acc 46.8750 (72.1094) lr 3.6258e-04 eta 5:39:36 +epoch [38/50] batch [125/1000] time 1.561 (1.582) data 0.001 (0.019) loss 0.8643 (1.1271) acc 78.1250 (72.3750) lr 3.6258e-04 eta 5:39:22 +epoch [38/50] batch [130/1000] time 1.563 (1.581) data 0.000 (0.018) loss 1.1885 (1.1189) acc 62.5000 (72.5000) lr 3.6258e-04 eta 5:39:06 +epoch [38/50] batch [135/1000] time 1.533 (1.580) data 0.000 (0.018) loss 1.2666 (1.1197) acc 62.5000 (72.4074) lr 3.6258e-04 eta 5:38:44 +epoch [38/50] batch [140/1000] time 1.560 (1.579) data 0.001 (0.017) loss 1.3037 (1.1248) acc 78.1250 (72.4777) lr 3.6258e-04 eta 5:38:29 +epoch [38/50] batch [145/1000] time 1.568 (1.580) data 0.001 (0.016) loss 0.6406 (1.1249) acc 75.0000 (72.4353) lr 3.6258e-04 eta 5:38:30 +epoch [38/50] batch [150/1000] time 1.549 (1.580) data 0.000 (0.016) loss 0.6938 (1.1186) acc 78.1250 (72.5208) lr 3.6258e-04 eta 5:38:18 +epoch [38/50] batch [155/1000] time 1.574 (1.579) data 0.000 (0.015) loss 0.9141 (1.1174) acc 81.2500 (72.6008) lr 3.6258e-04 eta 5:38:06 +epoch [38/50] batch [160/1000] time 1.557 (1.579) data 0.001 (0.015) loss 1.0010 (1.1160) acc 75.0000 (72.5391) lr 3.6258e-04 eta 5:37:49 +epoch [38/50] batch [165/1000] time 1.542 (1.578) data 0.001 (0.015) loss 0.8384 (1.1099) acc 81.2500 (72.4621) lr 3.6258e-04 eta 5:37:33 +epoch [38/50] batch [170/1000] time 1.553 (1.577) data 0.000 (0.014) loss 0.9053 (1.1070) acc 71.8750 (72.5184) lr 3.6258e-04 eta 5:37:19 +epoch [38/50] batch [175/1000] time 1.576 (1.577) data 0.000 (0.014) loss 0.9121 (1.1041) acc 84.3750 (72.6250) lr 3.6258e-04 eta 5:37:09 +epoch [38/50] batch [180/1000] time 1.556 (1.577) data 0.001 (0.013) loss 1.2979 (1.1017) acc 68.7500 (72.6562) lr 3.6258e-04 eta 5:36:56 +epoch [38/50] batch [185/1000] time 1.540 (1.576) data 0.001 (0.013) loss 0.8403 (1.0960) acc 84.3750 (72.8209) lr 3.6258e-04 eta 5:36:42 +epoch [38/50] batch [190/1000] time 1.558 (1.576) data 0.001 (0.013) loss 0.7915 (1.0948) acc 87.5000 (72.8783) lr 3.6258e-04 eta 5:36:30 +epoch [38/50] batch [195/1000] time 1.557 (1.576) data 0.000 (0.012) loss 0.7334 (1.0912) acc 75.0000 (72.9647) lr 3.6258e-04 eta 5:36:20 +epoch [38/50] batch [200/1000] time 1.561 (1.576) data 0.001 (0.012) loss 1.0059 (1.0927) acc 68.7500 (72.9531) lr 3.6258e-04 eta 5:36:06 +epoch [38/50] batch [205/1000] time 1.549 (1.575) data 0.000 (0.012) loss 1.2891 (1.0950) acc 71.8750 (72.9573) lr 3.6258e-04 eta 5:35:54 +epoch [38/50] batch [210/1000] time 1.553 (1.576) data 0.000 (0.012) loss 0.8828 (1.0891) acc 71.8750 (73.0655) lr 3.6258e-04 eta 5:35:53 +epoch [38/50] batch [215/1000] time 1.553 (1.575) data 0.001 (0.011) loss 1.3428 (1.0883) acc 75.0000 (73.0959) lr 3.6258e-04 eta 5:35:40 +epoch [38/50] batch [220/1000] time 1.547 (1.575) data 0.001 (0.011) loss 0.9258 (1.0839) acc 78.1250 (73.2244) lr 3.6258e-04 eta 5:35:24 +epoch [38/50] batch [225/1000] time 1.522 (1.574) data 0.000 (0.011) loss 1.2861 (1.0854) acc 71.8750 (73.2222) lr 3.6258e-04 eta 5:35:08 +epoch [38/50] batch [230/1000] time 1.552 (1.574) data 0.001 (0.011) loss 0.7969 (1.0804) acc 84.3750 (73.4375) lr 3.6258e-04 eta 5:34:56 +epoch [38/50] batch [235/1000] time 1.572 (1.573) data 0.000 (0.010) loss 1.0156 (1.0811) acc 81.2500 (73.3910) lr 3.6258e-04 eta 5:34:44 +epoch [38/50] batch [240/1000] time 1.558 (1.573) data 0.000 (0.010) loss 0.5474 (1.0811) acc 87.5000 (73.3724) lr 3.6258e-04 eta 5:34:35 +epoch [38/50] batch [245/1000] time 1.582 (1.573) data 0.001 (0.010) loss 0.7798 (1.0784) acc 81.2500 (73.4311) lr 3.6258e-04 eta 5:34:28 +epoch [38/50] batch [250/1000] time 1.575 (1.573) data 0.000 (0.010) loss 0.9473 (1.0767) acc 71.8750 (73.3125) lr 3.6258e-04 eta 5:34:18 +epoch [38/50] batch [255/1000] time 1.558 (1.574) data 0.000 (0.010) loss 0.7036 (1.0748) acc 78.1250 (73.3701) lr 3.6258e-04 eta 5:34:14 +epoch [38/50] batch [260/1000] time 1.553 (1.573) data 0.000 (0.009) loss 0.8032 (1.0734) acc 71.8750 (73.4255) lr 3.6258e-04 eta 5:34:04 +epoch [38/50] batch [265/1000] time 1.560 (1.573) data 0.001 (0.009) loss 0.9053 (1.0770) acc 81.2500 (73.2665) lr 3.6258e-04 eta 5:33:51 +epoch [38/50] batch [270/1000] time 1.524 (1.573) data 0.001 (0.009) loss 0.8105 (1.0753) acc 84.3750 (73.2986) lr 3.6258e-04 eta 5:33:38 +epoch [38/50] batch [275/1000] time 1.556 (1.572) data 0.000 (0.009) loss 1.2383 (1.0748) acc 71.8750 (73.2727) lr 3.6258e-04 eta 5:33:28 +epoch [38/50] batch [280/1000] time 1.597 (1.572) data 0.001 (0.009) loss 0.9795 (1.0743) acc 75.0000 (73.3036) lr 3.6258e-04 eta 5:33:17 +epoch [38/50] batch [285/1000] time 1.591 (1.572) data 0.000 (0.009) loss 1.1855 (1.0763) acc 71.8750 (73.2785) lr 3.6258e-04 eta 5:33:07 +epoch [38/50] batch [290/1000] time 1.546 (1.572) data 0.000 (0.008) loss 1.4482 (1.0790) acc 68.7500 (73.1897) lr 3.6258e-04 eta 5:32:57 +epoch [38/50] batch [295/1000] time 1.731 (1.572) data 0.000 (0.008) loss 0.9365 (1.0762) acc 75.0000 (73.1992) lr 3.6258e-04 eta 5:32:55 +epoch [38/50] batch [300/1000] time 1.553 (1.572) data 0.001 (0.008) loss 1.1494 (1.0753) acc 65.6250 (73.1458) lr 3.6258e-04 eta 5:32:43 +epoch [38/50] batch [305/1000] time 1.581 (1.572) data 0.000 (0.008) loss 0.6006 (1.0759) acc 81.2500 (73.1148) lr 3.6258e-04 eta 5:32:32 +epoch [38/50] batch [310/1000] time 1.563 (1.572) data 0.000 (0.008) loss 1.0869 (1.0780) acc 78.1250 (73.0444) lr 3.6258e-04 eta 5:32:23 +epoch [38/50] batch [315/1000] time 1.544 (1.571) data 0.001 (0.008) loss 0.8105 (1.0765) acc 81.2500 (73.0456) lr 3.6258e-04 eta 5:32:11 +epoch [38/50] batch [320/1000] time 1.571 (1.571) data 0.001 (0.008) loss 0.8774 (1.0754) acc 75.0000 (73.0371) lr 3.6258e-04 eta 5:32:00 +epoch [38/50] batch [325/1000] time 1.570 (1.571) data 0.000 (0.008) loss 0.8633 (1.0733) acc 75.0000 (73.0385) lr 3.6258e-04 eta 5:31:53 +epoch [38/50] batch [330/1000] time 1.548 (1.571) data 0.001 (0.007) loss 0.8369 (1.0730) acc 81.2500 (73.0019) lr 3.6258e-04 eta 5:31:44 +epoch [38/50] batch [335/1000] time 1.558 (1.571) data 0.000 (0.007) loss 1.4902 (1.0723) acc 71.8750 (72.9757) lr 3.6258e-04 eta 5:31:35 +epoch [38/50] batch [340/1000] time 1.583 (1.571) data 0.001 (0.007) loss 0.5566 (1.0703) acc 87.5000 (73.0331) lr 3.6258e-04 eta 5:31:28 +epoch [38/50] batch [345/1000] time 1.573 (1.571) data 0.001 (0.007) loss 0.9502 (1.0689) acc 78.1250 (73.0344) lr 3.6258e-04 eta 5:31:20 +epoch [38/50] batch [350/1000] time 1.573 (1.571) data 0.001 (0.007) loss 1.0732 (1.0714) acc 75.0000 (72.9554) lr 3.6258e-04 eta 5:31:11 +epoch [38/50] batch [355/1000] time 1.547 (1.571) data 0.001 (0.007) loss 1.0273 (1.0690) acc 75.0000 (73.0018) lr 3.6258e-04 eta 5:31:01 +epoch [38/50] batch [360/1000] time 1.569 (1.571) data 0.001 (0.007) loss 0.7900 (1.0664) acc 84.3750 (73.0556) lr 3.6258e-04 eta 5:30:57 +epoch [38/50] batch [365/1000] time 1.537 (1.571) data 0.000 (0.007) loss 0.9121 (1.0653) acc 71.8750 (73.0565) lr 3.6258e-04 eta 5:30:46 +epoch [38/50] batch [370/1000] time 1.562 (1.571) data 0.000 (0.007) loss 1.0850 (1.0686) acc 68.7500 (72.9730) lr 3.6258e-04 eta 5:30:39 +epoch [38/50] batch [375/1000] time 1.544 (1.571) data 0.000 (0.007) loss 0.9590 (1.0656) acc 65.6250 (73.0250) lr 3.6258e-04 eta 5:30:31 +epoch [38/50] batch [380/1000] time 1.579 (1.571) data 0.001 (0.007) loss 1.4854 (1.0665) acc 68.7500 (73.0592) lr 3.6258e-04 eta 5:30:23 +epoch [38/50] batch [385/1000] time 1.555 (1.571) data 0.001 (0.006) loss 1.2881 (1.0683) acc 71.8750 (73.0195) lr 3.6258e-04 eta 5:30:15 +epoch [38/50] batch [390/1000] time 1.585 (1.571) data 0.001 (0.006) loss 1.8330 (1.0741) acc 62.5000 (72.9327) lr 3.6258e-04 eta 5:30:05 +epoch [38/50] batch [395/1000] time 1.562 (1.571) data 0.001 (0.006) loss 1.1318 (1.0753) acc 78.1250 (72.9430) lr 3.6258e-04 eta 5:29:57 +epoch [38/50] batch [400/1000] time 1.592 (1.571) data 0.001 (0.006) loss 0.8813 (1.0776) acc 81.2500 (72.9297) lr 3.6258e-04 eta 5:29:50 +epoch [38/50] batch [405/1000] time 1.549 (1.571) data 0.001 (0.006) loss 1.0820 (1.0796) acc 65.6250 (72.8395) lr 3.6258e-04 eta 5:29:46 +epoch [38/50] batch [410/1000] time 1.568 (1.571) data 0.000 (0.006) loss 0.8418 (1.0806) acc 78.1250 (72.8201) lr 3.6258e-04 eta 5:29:38 +epoch [38/50] batch [415/1000] time 1.573 (1.571) data 0.000 (0.006) loss 1.1309 (1.0807) acc 65.6250 (72.7711) lr 3.6258e-04 eta 5:29:30 +epoch [38/50] batch [420/1000] time 1.564 (1.571) data 0.000 (0.006) loss 1.1338 (1.0831) acc 75.0000 (72.6935) lr 3.6258e-04 eta 5:29:21 +epoch [38/50] batch [425/1000] time 1.564 (1.571) data 0.000 (0.006) loss 0.7334 (1.0822) acc 78.1250 (72.7279) lr 3.6258e-04 eta 5:29:12 +epoch [38/50] batch [430/1000] time 1.589 (1.571) data 0.000 (0.006) loss 1.7012 (1.0833) acc 68.7500 (72.7180) lr 3.6258e-04 eta 5:29:04 +epoch [38/50] batch [435/1000] time 1.566 (1.571) data 0.000 (0.006) loss 1.4746 (1.0818) acc 62.5000 (72.7299) lr 3.6258e-04 eta 5:28:58 +epoch [38/50] batch [440/1000] time 1.577 (1.571) data 0.000 (0.006) loss 0.7827 (1.0811) acc 78.1250 (72.7557) lr 3.6258e-04 eta 5:28:51 +epoch [38/50] batch [445/1000] time 1.563 (1.571) data 0.001 (0.006) loss 1.1494 (1.0838) acc 71.8750 (72.6826) lr 3.6258e-04 eta 5:28:42 +epoch [38/50] batch [450/1000] time 1.542 (1.571) data 0.001 (0.006) loss 1.0557 (1.0836) acc 68.7500 (72.6806) lr 3.6258e-04 eta 5:28:37 +epoch [38/50] batch [455/1000] time 1.566 (1.571) data 0.001 (0.006) loss 1.4326 (1.0854) acc 62.5000 (72.6099) lr 3.6258e-04 eta 5:28:28 +epoch [38/50] batch [460/1000] time 1.587 (1.571) data 0.001 (0.006) loss 0.9404 (1.0851) acc 68.7500 (72.5543) lr 3.6258e-04 eta 5:28:21 +epoch [38/50] batch [465/1000] time 1.564 (1.571) data 0.001 (0.005) loss 0.8071 (1.0839) acc 71.8750 (72.5538) lr 3.6258e-04 eta 5:28:13 +epoch [38/50] batch [470/1000] time 1.537 (1.571) data 0.001 (0.005) loss 0.8262 (1.0845) acc 68.7500 (72.5266) lr 3.6258e-04 eta 5:28:04 +epoch [38/50] batch [475/1000] time 1.549 (1.571) data 0.001 (0.005) loss 0.8701 (1.0827) acc 71.8750 (72.5526) lr 3.6258e-04 eta 5:27:54 +epoch [38/50] batch [480/1000] time 1.556 (1.571) data 0.000 (0.005) loss 1.5977 (1.0827) acc 65.6250 (72.5846) lr 3.6258e-04 eta 5:27:44 +epoch [38/50] batch [485/1000] time 1.537 (1.571) data 0.000 (0.005) loss 0.9619 (1.0825) acc 71.8750 (72.5709) lr 3.6258e-04 eta 5:27:35 +epoch [38/50] batch [490/1000] time 1.562 (1.570) data 0.000 (0.005) loss 0.9868 (1.0828) acc 81.2500 (72.5893) lr 3.6258e-04 eta 5:27:26 +epoch [38/50] batch [495/1000] time 1.546 (1.570) data 0.000 (0.005) loss 1.4531 (1.0840) acc 71.8750 (72.5947) lr 3.6258e-04 eta 5:27:15 +epoch [38/50] batch [500/1000] time 1.558 (1.570) data 0.001 (0.005) loss 0.9102 (1.0826) acc 71.8750 (72.5938) lr 3.6258e-04 eta 5:27:07 +epoch [38/50] batch [505/1000] time 1.555 (1.570) data 0.000 (0.005) loss 0.8936 (1.0816) acc 78.1250 (72.6052) lr 3.6258e-04 eta 5:26:56 +epoch [38/50] batch [510/1000] time 1.571 (1.570) data 0.001 (0.005) loss 1.3066 (1.0844) acc 65.6250 (72.5551) lr 3.6258e-04 eta 5:26:52 +epoch [38/50] batch [515/1000] time 1.566 (1.570) data 0.000 (0.005) loss 1.0049 (1.0822) acc 71.8750 (72.5850) lr 3.6258e-04 eta 5:26:44 +epoch [38/50] batch [520/1000] time 1.554 (1.570) data 0.000 (0.005) loss 1.3779 (1.0847) acc 68.7500 (72.5240) lr 3.6258e-04 eta 5:26:35 +epoch [38/50] batch [525/1000] time 1.586 (1.570) data 0.001 (0.005) loss 0.8232 (1.0852) acc 75.0000 (72.5060) lr 3.6258e-04 eta 5:26:26 +epoch [38/50] batch [530/1000] time 1.570 (1.570) data 0.001 (0.005) loss 0.8394 (1.0844) acc 71.8750 (72.4941) lr 3.6258e-04 eta 5:26:18 +epoch [38/50] batch [535/1000] time 1.570 (1.570) data 0.000 (0.005) loss 1.1152 (1.0838) acc 75.0000 (72.5175) lr 3.6258e-04 eta 5:26:10 +epoch [38/50] batch [540/1000] time 1.555 (1.570) data 0.000 (0.005) loss 1.0605 (1.0826) acc 68.7500 (72.5521) lr 3.6258e-04 eta 5:26:01 +epoch [38/50] batch [545/1000] time 1.543 (1.570) data 0.000 (0.005) loss 0.6792 (1.0809) acc 81.2500 (72.5803) lr 3.6258e-04 eta 5:25:51 +epoch [38/50] batch [550/1000] time 1.539 (1.570) data 0.001 (0.005) loss 1.0420 (1.0836) acc 75.0000 (72.5398) lr 3.6258e-04 eta 5:25:42 +epoch [38/50] batch [555/1000] time 1.590 (1.570) data 0.001 (0.005) loss 0.9395 (1.0817) acc 78.1250 (72.5901) lr 3.6258e-04 eta 5:25:37 +epoch [38/50] batch [560/1000] time 1.559 (1.570) data 0.000 (0.005) loss 0.7153 (1.0814) acc 84.3750 (72.6116) lr 3.6258e-04 eta 5:25:28 +epoch [38/50] batch [565/1000] time 1.557 (1.570) data 0.001 (0.005) loss 1.0137 (1.0802) acc 68.7500 (72.6217) lr 3.6258e-04 eta 5:25:18 +epoch [38/50] batch [570/1000] time 1.552 (1.570) data 0.000 (0.005) loss 0.6455 (1.0797) acc 84.3750 (72.5987) lr 3.6258e-04 eta 5:25:09 +epoch [38/50] batch [575/1000] time 1.563 (1.569) data 0.001 (0.005) loss 0.8413 (1.0798) acc 78.1250 (72.5978) lr 3.6258e-04 eta 5:25:00 +epoch [38/50] batch [580/1000] time 1.562 (1.569) data 0.001 (0.004) loss 1.1426 (1.0785) acc 71.8750 (72.6185) lr 3.6258e-04 eta 5:24:52 +epoch [38/50] batch [585/1000] time 1.566 (1.569) data 0.000 (0.004) loss 0.9429 (1.0802) acc 68.7500 (72.6015) lr 3.6258e-04 eta 5:24:45 +epoch [38/50] batch [590/1000] time 1.542 (1.569) data 0.000 (0.004) loss 0.9248 (1.0784) acc 68.7500 (72.6324) lr 3.6258e-04 eta 5:24:36 +epoch [38/50] batch [595/1000] time 1.550 (1.569) data 0.001 (0.004) loss 0.8848 (1.0789) acc 78.1250 (72.6471) lr 3.6258e-04 eta 5:24:28 +epoch [38/50] batch [600/1000] time 1.567 (1.570) data 0.001 (0.004) loss 1.0479 (1.0775) acc 65.6250 (72.6562) lr 3.6258e-04 eta 5:24:23 +epoch [38/50] batch [605/1000] time 1.557 (1.570) data 0.001 (0.004) loss 1.1064 (1.0788) acc 68.7500 (72.6343) lr 3.6258e-04 eta 5:24:15 +epoch [38/50] batch [610/1000] time 1.575 (1.570) data 0.000 (0.004) loss 0.9502 (1.0780) acc 71.8750 (72.6332) lr 3.6258e-04 eta 5:24:07 +epoch [38/50] batch [615/1000] time 1.579 (1.570) data 0.001 (0.004) loss 0.7983 (1.0774) acc 81.2500 (72.6575) lr 3.6258e-04 eta 5:23:59 +epoch [38/50] batch [620/1000] time 1.569 (1.570) data 0.001 (0.004) loss 1.6270 (1.0782) acc 56.2500 (72.6562) lr 3.6258e-04 eta 5:23:52 +epoch [38/50] batch [625/1000] time 1.575 (1.570) data 0.001 (0.004) loss 1.3750 (1.0789) acc 68.7500 (72.6400) lr 3.6258e-04 eta 5:23:43 +epoch [38/50] batch [630/1000] time 1.543 (1.570) data 0.000 (0.004) loss 0.7744 (1.0795) acc 78.1250 (72.6190) lr 3.6258e-04 eta 5:23:35 +epoch [38/50] batch [635/1000] time 1.595 (1.570) data 0.001 (0.004) loss 1.5029 (1.0794) acc 62.5000 (72.6378) lr 3.6258e-04 eta 5:23:28 +epoch [38/50] batch [640/1000] time 1.593 (1.570) data 0.000 (0.004) loss 0.8462 (1.0779) acc 81.2500 (72.6904) lr 3.6258e-04 eta 5:23:21 +epoch [38/50] batch [645/1000] time 1.564 (1.570) data 0.000 (0.004) loss 1.4805 (1.0785) acc 62.5000 (72.6696) lr 3.6258e-04 eta 5:23:13 +epoch [38/50] batch [650/1000] time 1.576 (1.570) data 0.001 (0.004) loss 0.6187 (1.0776) acc 87.5000 (72.7067) lr 3.6258e-04 eta 5:23:04 +epoch [38/50] batch [655/1000] time 1.554 (1.570) data 0.000 (0.004) loss 0.8770 (1.0779) acc 75.0000 (72.7004) lr 3.6258e-04 eta 5:22:56 +epoch [38/50] batch [660/1000] time 1.720 (1.570) data 0.001 (0.004) loss 0.6519 (1.0769) acc 75.0000 (72.7273) lr 3.6258e-04 eta 5:22:51 +epoch [38/50] batch [665/1000] time 1.556 (1.570) data 0.000 (0.004) loss 1.1250 (1.0771) acc 75.0000 (72.7444) lr 3.6258e-04 eta 5:22:43 +epoch [38/50] batch [670/1000] time 1.558 (1.570) data 0.001 (0.004) loss 0.9102 (1.0762) acc 71.8750 (72.7612) lr 3.6258e-04 eta 5:22:34 +epoch [38/50] batch [675/1000] time 1.571 (1.570) data 0.000 (0.004) loss 1.5234 (1.0780) acc 68.7500 (72.7454) lr 3.6258e-04 eta 5:22:26 +epoch [38/50] batch [680/1000] time 1.553 (1.570) data 0.000 (0.004) loss 1.3906 (1.0773) acc 62.5000 (72.7574) lr 3.6258e-04 eta 5:22:18 +epoch [38/50] batch [685/1000] time 1.561 (1.570) data 0.000 (0.004) loss 0.9268 (1.0786) acc 71.8750 (72.7281) lr 3.6258e-04 eta 5:22:09 +epoch [38/50] batch [690/1000] time 1.576 (1.570) data 0.001 (0.004) loss 1.0576 (1.0775) acc 75.0000 (72.7536) lr 3.6258e-04 eta 5:22:03 +epoch [38/50] batch [695/1000] time 1.574 (1.570) data 0.000 (0.004) loss 0.7202 (1.0778) acc 81.2500 (72.7878) lr 3.6258e-04 eta 5:21:56 +epoch [38/50] batch [700/1000] time 1.578 (1.570) data 0.001 (0.004) loss 1.2920 (1.0769) acc 65.6250 (72.8080) lr 3.6258e-04 eta 5:21:48 +epoch [38/50] batch [705/1000] time 1.727 (1.570) data 0.001 (0.004) loss 0.8203 (1.0769) acc 75.0000 (72.7881) lr 3.6258e-04 eta 5:21:41 +epoch [38/50] batch [710/1000] time 1.559 (1.570) data 0.001 (0.004) loss 1.2412 (1.0777) acc 75.0000 (72.7553) lr 3.6258e-04 eta 5:21:32 +epoch [38/50] batch [715/1000] time 1.557 (1.570) data 0.001 (0.004) loss 0.8579 (1.0764) acc 71.8750 (72.7972) lr 3.6258e-04 eta 5:21:24 +epoch [38/50] batch [720/1000] time 1.567 (1.570) data 0.000 (0.004) loss 1.1924 (1.0770) acc 68.7500 (72.7908) lr 3.6258e-04 eta 5:21:15 +epoch [38/50] batch [725/1000] time 1.592 (1.570) data 0.000 (0.004) loss 1.8574 (1.0774) acc 62.5000 (72.7716) lr 3.6258e-04 eta 5:21:07 +epoch [38/50] batch [730/1000] time 1.536 (1.570) data 0.001 (0.004) loss 0.5244 (1.0768) acc 84.3750 (72.7825) lr 3.6258e-04 eta 5:20:59 +epoch [38/50] batch [735/1000] time 1.556 (1.570) data 0.000 (0.004) loss 1.0850 (1.0764) acc 75.0000 (72.7891) lr 3.6258e-04 eta 5:20:51 +epoch [38/50] batch [740/1000] time 1.552 (1.570) data 0.001 (0.004) loss 0.8760 (1.0751) acc 71.8750 (72.8252) lr 3.6258e-04 eta 5:20:42 +epoch [38/50] batch [745/1000] time 1.559 (1.570) data 0.000 (0.004) loss 0.8315 (1.0762) acc 75.0000 (72.8020) lr 3.6258e-04 eta 5:20:34 +epoch [38/50] batch [750/1000] time 1.564 (1.570) data 0.000 (0.004) loss 0.8799 (1.0761) acc 68.7500 (72.7958) lr 3.6258e-04 eta 5:20:29 +epoch [38/50] batch [755/1000] time 1.587 (1.570) data 0.001 (0.004) loss 0.6660 (1.0755) acc 84.3750 (72.8146) lr 3.6258e-04 eta 5:20:20 +epoch [38/50] batch [760/1000] time 1.518 (1.570) data 0.000 (0.004) loss 0.8032 (1.0741) acc 81.2500 (72.8207) lr 3.6258e-04 eta 5:20:10 +epoch [38/50] batch [765/1000] time 1.568 (1.569) data 0.000 (0.004) loss 1.0078 (1.0733) acc 75.0000 (72.8268) lr 3.6258e-04 eta 5:20:01 +epoch [38/50] batch [770/1000] time 1.574 (1.569) data 0.000 (0.004) loss 0.7402 (1.0717) acc 75.0000 (72.8328) lr 3.6258e-04 eta 5:19:53 +epoch [38/50] batch [775/1000] time 1.580 (1.569) data 0.001 (0.003) loss 0.8179 (1.0706) acc 71.8750 (72.8427) lr 3.6258e-04 eta 5:19:43 +epoch [38/50] batch [780/1000] time 1.567 (1.569) data 0.000 (0.003) loss 1.2490 (1.0702) acc 62.5000 (72.8446) lr 3.6258e-04 eta 5:19:36 +epoch [38/50] batch [785/1000] time 1.547 (1.569) data 0.001 (0.003) loss 1.0039 (1.0722) acc 68.7500 (72.7946) lr 3.6258e-04 eta 5:19:28 +epoch [38/50] batch [790/1000] time 1.553 (1.569) data 0.001 (0.003) loss 0.7612 (1.0710) acc 75.0000 (72.8125) lr 3.6258e-04 eta 5:19:20 +epoch [38/50] batch [795/1000] time 1.556 (1.569) data 0.001 (0.003) loss 0.3250 (1.0707) acc 84.3750 (72.8066) lr 3.6258e-04 eta 5:19:12 +epoch [38/50] batch [800/1000] time 1.566 (1.569) data 0.000 (0.003) loss 0.9614 (1.0705) acc 78.1250 (72.8242) lr 3.6258e-04 eta 5:19:04 +epoch [38/50] batch [805/1000] time 1.586 (1.569) data 0.000 (0.003) loss 0.9800 (1.0699) acc 71.8750 (72.8455) lr 3.6258e-04 eta 5:18:56 +epoch [38/50] batch [810/1000] time 1.546 (1.569) data 0.001 (0.003) loss 1.0205 (1.0703) acc 75.0000 (72.8318) lr 3.6258e-04 eta 5:18:47 +epoch [38/50] batch [815/1000] time 1.559 (1.569) data 0.001 (0.003) loss 1.0381 (1.0703) acc 75.0000 (72.8451) lr 3.6258e-04 eta 5:18:42 +epoch [38/50] batch [820/1000] time 1.579 (1.569) data 0.000 (0.003) loss 1.2979 (1.0698) acc 78.1250 (72.8773) lr 3.6258e-04 eta 5:18:33 +epoch [38/50] batch [825/1000] time 1.560 (1.569) data 0.000 (0.003) loss 1.7422 (1.0722) acc 59.3750 (72.8409) lr 3.6258e-04 eta 5:18:25 +epoch [38/50] batch [830/1000] time 1.577 (1.569) data 0.001 (0.003) loss 0.8477 (1.0730) acc 75.0000 (72.8313) lr 3.6258e-04 eta 5:18:17 +epoch [38/50] batch [835/1000] time 1.546 (1.569) data 0.000 (0.003) loss 1.3857 (1.0717) acc 68.7500 (72.8743) lr 3.6258e-04 eta 5:18:08 +epoch [38/50] batch [840/1000] time 1.571 (1.569) data 0.001 (0.003) loss 0.9370 (1.0715) acc 81.2500 (72.8683) lr 3.6258e-04 eta 5:18:00 +epoch [38/50] batch [845/1000] time 1.571 (1.569) data 0.001 (0.003) loss 1.5762 (1.0734) acc 62.5000 (72.8513) lr 3.6258e-04 eta 5:17:53 +epoch [38/50] batch [850/1000] time 1.543 (1.569) data 0.001 (0.003) loss 0.9751 (1.0737) acc 78.1250 (72.8235) lr 3.6258e-04 eta 5:17:44 +epoch [38/50] batch [855/1000] time 1.560 (1.569) data 0.001 (0.003) loss 1.3350 (1.0730) acc 78.1250 (72.8545) lr 3.6258e-04 eta 5:17:36 +epoch [38/50] batch [860/1000] time 1.565 (1.569) data 0.000 (0.003) loss 0.9238 (1.0738) acc 81.2500 (72.8452) lr 3.6258e-04 eta 5:17:30 +epoch [38/50] batch [865/1000] time 1.562 (1.569) data 0.001 (0.003) loss 1.1445 (1.0731) acc 78.1250 (72.8613) lr 3.6258e-04 eta 5:17:21 +epoch [38/50] batch [870/1000] time 1.558 (1.569) data 0.001 (0.003) loss 0.8936 (1.0720) acc 78.1250 (72.8843) lr 3.6258e-04 eta 5:17:13 +epoch [38/50] batch [875/1000] time 1.528 (1.569) data 0.000 (0.003) loss 1.0488 (1.0716) acc 81.2500 (72.8893) lr 3.6258e-04 eta 5:17:04 +epoch [38/50] batch [880/1000] time 1.557 (1.569) data 0.001 (0.003) loss 0.8594 (1.0720) acc 84.3750 (72.9013) lr 3.6258e-04 eta 5:16:56 +epoch [38/50] batch [885/1000] time 1.553 (1.569) data 0.001 (0.003) loss 0.5112 (1.0715) acc 84.3750 (72.9273) lr 3.6258e-04 eta 5:16:47 +epoch [38/50] batch [890/1000] time 1.553 (1.569) data 0.000 (0.003) loss 1.2246 (1.0711) acc 71.8750 (72.9354) lr 3.6258e-04 eta 5:16:38 +epoch [38/50] batch [895/1000] time 1.580 (1.569) data 0.001 (0.003) loss 1.6846 (1.0720) acc 62.5000 (72.9190) lr 3.6258e-04 eta 5:16:30 +epoch [38/50] batch [900/1000] time 1.570 (1.569) data 0.000 (0.003) loss 1.5479 (1.0728) acc 65.6250 (72.8924) lr 3.6258e-04 eta 5:16:24 +epoch [38/50] batch [905/1000] time 1.542 (1.569) data 0.001 (0.003) loss 0.9253 (1.0739) acc 75.0000 (72.8764) lr 3.6258e-04 eta 5:16:15 +epoch [38/50] batch [910/1000] time 1.560 (1.569) data 0.001 (0.003) loss 1.0508 (1.0748) acc 71.8750 (72.8571) lr 3.6258e-04 eta 5:16:07 +epoch [38/50] batch [915/1000] time 1.591 (1.569) data 0.001 (0.003) loss 0.5801 (1.0738) acc 81.2500 (72.8586) lr 3.6258e-04 eta 5:15:59 +epoch [38/50] batch [920/1000] time 1.562 (1.569) data 0.000 (0.003) loss 1.3203 (1.0747) acc 78.1250 (72.8533) lr 3.6258e-04 eta 5:15:51 +epoch [38/50] batch [925/1000] time 1.563 (1.569) data 0.000 (0.003) loss 1.0713 (1.0746) acc 78.1250 (72.8649) lr 3.6258e-04 eta 5:15:43 +epoch [38/50] batch [930/1000] time 1.578 (1.569) data 0.001 (0.003) loss 1.0811 (1.0735) acc 62.5000 (72.8696) lr 3.6258e-04 eta 5:15:35 +epoch [38/50] batch [935/1000] time 1.581 (1.569) data 0.000 (0.003) loss 1.0264 (1.0735) acc 71.8750 (72.8676) lr 3.6258e-04 eta 5:15:27 +epoch [38/50] batch [940/1000] time 1.572 (1.569) data 0.000 (0.003) loss 0.7637 (1.0725) acc 81.2500 (72.9122) lr 3.6258e-04 eta 5:15:18 +epoch [38/50] batch [945/1000] time 1.538 (1.569) data 0.001 (0.003) loss 1.3271 (1.0718) acc 65.6250 (72.9167) lr 3.6258e-04 eta 5:15:10 +epoch [38/50] batch [950/1000] time 1.552 (1.569) data 0.000 (0.003) loss 0.8716 (1.0708) acc 81.2500 (72.9572) lr 3.6258e-04 eta 5:15:01 +epoch [38/50] batch [955/1000] time 1.556 (1.569) data 0.001 (0.003) loss 1.2227 (1.0708) acc 71.8750 (72.9450) lr 3.6258e-04 eta 5:14:52 +epoch [38/50] batch [960/1000] time 1.580 (1.569) data 0.001 (0.003) loss 1.0586 (1.0704) acc 75.0000 (72.9525) lr 3.6258e-04 eta 5:14:45 +epoch [38/50] batch [965/1000] time 1.579 (1.569) data 0.001 (0.003) loss 1.4512 (1.0711) acc 65.6250 (72.9598) lr 3.6258e-04 eta 5:14:39 +epoch [38/50] batch [970/1000] time 1.574 (1.569) data 0.000 (0.003) loss 1.3457 (1.0717) acc 68.7500 (72.9543) lr 3.6258e-04 eta 5:14:31 +epoch [38/50] batch [975/1000] time 1.574 (1.569) data 0.001 (0.003) loss 1.5352 (1.0724) acc 59.3750 (72.9327) lr 3.6258e-04 eta 5:14:23 +epoch [38/50] batch [980/1000] time 1.564 (1.569) data 0.000 (0.003) loss 1.3984 (1.0725) acc 68.7500 (72.9273) lr 3.6258e-04 eta 5:14:15 +epoch [38/50] batch [985/1000] time 1.562 (1.569) data 0.001 (0.003) loss 1.3965 (1.0726) acc 65.6250 (72.9093) lr 3.6258e-04 eta 5:14:07 +epoch [38/50] batch [990/1000] time 1.563 (1.569) data 0.000 (0.003) loss 0.7676 (1.0724) acc 78.1250 (72.9230) lr 3.6258e-04 eta 5:13:59 +epoch [38/50] batch [995/1000] time 1.570 (1.569) data 0.000 (0.003) loss 1.3223 (1.0721) acc 62.5000 (72.9177) lr 3.6258e-04 eta 5:13:50 +epoch [38/50] batch [1000/1000] time 1.569 (1.569) data 0.000 (0.003) loss 0.6133 (1.0732) acc 84.3750 (72.8969) lr 3.1545e-04 eta 5:13:42 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,352 +* accuracy: 78.7% +* error: 21.3% +* macro_f1: 78.3% +epoch [39/50] batch [5/1000] time 1.565 (1.710) data 0.001 (0.208) loss 1.5947 (1.4638) acc 62.5000 (67.5000) lr 3.1545e-04 eta 5:41:49 +epoch [39/50] batch [10/1000] time 1.557 (1.643) data 0.001 (0.105) loss 1.3545 (1.2501) acc 56.2500 (68.1250) lr 3.1545e-04 eta 5:28:19 +epoch [39/50] batch [15/1000] time 1.544 (1.616) data 0.000 (0.070) loss 1.2178 (1.1426) acc 68.7500 (70.4167) lr 3.1545e-04 eta 5:22:46 +epoch [39/50] batch [20/1000] time 1.553 (1.604) data 0.001 (0.053) loss 1.1406 (1.1414) acc 71.8750 (70.9375) lr 3.1545e-04 eta 5:20:16 +epoch [39/50] batch [25/1000] time 1.552 (1.597) data 0.001 (0.042) loss 1.7314 (1.1653) acc 59.3750 (70.2500) lr 3.1545e-04 eta 5:18:43 +epoch [39/50] batch [30/1000] time 1.530 (1.589) data 0.001 (0.035) loss 0.9331 (1.1761) acc 78.1250 (70.5208) lr 3.1545e-04 eta 5:17:02 +epoch [39/50] batch [35/1000] time 1.557 (1.591) data 0.000 (0.030) loss 1.3779 (1.1273) acc 65.6250 (71.8750) lr 3.1545e-04 eta 5:17:20 +epoch [39/50] batch [40/1000] time 1.548 (1.588) data 0.001 (0.027) loss 1.5400 (1.1357) acc 71.8750 (71.9531) lr 3.1545e-04 eta 5:16:26 +epoch [39/50] batch [45/1000] time 1.549 (1.583) data 0.001 (0.024) loss 1.3896 (1.1347) acc 65.6250 (71.4583) lr 3.1545e-04 eta 5:15:29 +epoch [39/50] batch [50/1000] time 1.560 (1.581) data 0.000 (0.021) loss 0.5864 (1.1151) acc 84.3750 (71.6250) lr 3.1545e-04 eta 5:14:49 +epoch [39/50] batch [55/1000] time 1.544 (1.578) data 0.000 (0.019) loss 1.3340 (1.1100) acc 68.7500 (72.0455) lr 3.1545e-04 eta 5:14:10 +epoch [39/50] batch [60/1000] time 1.561 (1.576) data 0.001 (0.018) loss 1.7949 (1.1108) acc 62.5000 (72.0833) lr 3.1545e-04 eta 5:13:37 +epoch [39/50] batch [65/1000] time 1.559 (1.575) data 0.000 (0.017) loss 1.4170 (1.1201) acc 65.6250 (71.7788) lr 3.1545e-04 eta 5:13:16 +epoch [39/50] batch [70/1000] time 1.546 (1.573) data 0.000 (0.015) loss 1.5010 (1.1310) acc 68.7500 (71.5179) lr 3.1545e-04 eta 5:12:47 +epoch [39/50] batch [75/1000] time 1.577 (1.575) data 0.001 (0.014) loss 1.6113 (1.1456) acc 59.3750 (71.0000) lr 3.1545e-04 eta 5:13:00 +epoch [39/50] batch [80/1000] time 1.582 (1.574) data 0.001 (0.014) loss 1.4727 (1.1431) acc 75.0000 (70.8594) lr 3.1545e-04 eta 5:12:43 +epoch [39/50] batch [85/1000] time 1.546 (1.573) data 0.000 (0.013) loss 1.1426 (1.1394) acc 65.6250 (70.9559) lr 3.1545e-04 eta 5:12:26 +epoch [39/50] batch [90/1000] time 1.553 (1.573) data 0.001 (0.012) loss 2.1973 (1.1442) acc 56.2500 (70.9028) lr 3.1545e-04 eta 5:12:14 +epoch [39/50] batch [95/1000] time 1.584 (1.572) data 0.001 (0.011) loss 0.9995 (1.1481) acc 75.0000 (70.9539) lr 3.1545e-04 eta 5:11:58 +epoch [39/50] batch [100/1000] time 1.563 (1.572) data 0.000 (0.011) loss 1.2910 (1.1470) acc 59.3750 (70.7812) lr 3.1545e-04 eta 5:11:42 +epoch [39/50] batch [105/1000] time 1.583 (1.572) data 0.000 (0.010) loss 1.1797 (1.1475) acc 68.7500 (70.7440) lr 3.1545e-04 eta 5:11:33 +epoch [39/50] batch [110/1000] time 1.587 (1.572) data 0.001 (0.010) loss 1.8252 (1.1485) acc 56.2500 (70.7670) lr 3.1545e-04 eta 5:11:30 +epoch [39/50] batch [115/1000] time 1.561 (1.572) data 0.001 (0.010) loss 1.1602 (1.1403) acc 65.6250 (70.8696) lr 3.1545e-04 eta 5:11:21 +epoch [39/50] batch [120/1000] time 1.554 (1.572) data 0.000 (0.009) loss 0.8579 (1.1450) acc 75.0000 (70.7812) lr 3.1545e-04 eta 5:11:11 +epoch [39/50] batch [125/1000] time 1.550 (1.571) data 0.000 (0.009) loss 0.9956 (1.1473) acc 71.8750 (70.5500) lr 3.1545e-04 eta 5:10:57 +epoch [39/50] batch [130/1000] time 1.556 (1.571) data 0.000 (0.009) loss 1.3037 (1.1451) acc 68.7500 (70.6010) lr 3.1545e-04 eta 5:10:45 +epoch [39/50] batch [135/1000] time 1.566 (1.571) data 0.000 (0.008) loss 0.9678 (1.1436) acc 68.7500 (70.7407) lr 3.1545e-04 eta 5:10:35 +epoch [39/50] batch [140/1000] time 1.564 (1.571) data 0.000 (0.008) loss 1.2129 (1.1509) acc 62.5000 (70.7143) lr 3.1545e-04 eta 5:10:36 +epoch [39/50] batch [145/1000] time 1.576 (1.571) data 0.001 (0.008) loss 0.4233 (1.1364) acc 90.6250 (70.9483) lr 3.1545e-04 eta 5:10:29 +epoch [39/50] batch [150/1000] time 1.560 (1.571) data 0.001 (0.007) loss 0.9150 (1.1326) acc 65.6250 (70.8958) lr 3.1545e-04 eta 5:10:16 +epoch [39/50] batch [155/1000] time 1.562 (1.571) data 0.000 (0.007) loss 0.7041 (1.1300) acc 75.0000 (70.9476) lr 3.1545e-04 eta 5:10:07 +epoch [39/50] batch [160/1000] time 1.565 (1.571) data 0.000 (0.007) loss 0.8867 (1.1305) acc 68.7500 (70.9375) lr 3.1545e-04 eta 5:09:55 +epoch [39/50] batch [165/1000] time 1.568 (1.570) data 0.000 (0.007) loss 0.8027 (1.1312) acc 81.2500 (71.0038) lr 3.1545e-04 eta 5:09:46 +epoch [39/50] batch [170/1000] time 1.558 (1.570) data 0.000 (0.007) loss 1.0000 (1.1318) acc 71.8750 (71.0110) lr 3.1545e-04 eta 5:09:34 +epoch [39/50] batch [175/1000] time 1.566 (1.570) data 0.000 (0.006) loss 0.8726 (1.1262) acc 84.3750 (71.1964) lr 3.1545e-04 eta 5:09:25 +epoch [39/50] batch [180/1000] time 1.573 (1.570) data 0.000 (0.006) loss 0.5645 (1.1258) acc 78.1250 (71.2326) lr 3.1545e-04 eta 5:09:17 +epoch [39/50] batch [185/1000] time 1.554 (1.570) data 0.000 (0.006) loss 1.1826 (1.1234) acc 71.8750 (71.3682) lr 3.1545e-04 eta 5:09:11 +epoch [39/50] batch [190/1000] time 1.544 (1.570) data 0.000 (0.006) loss 1.2822 (1.1206) acc 75.0000 (71.4967) lr 3.1545e-04 eta 5:08:57 +epoch [39/50] batch [195/1000] time 1.566 (1.569) data 0.001 (0.006) loss 1.3193 (1.1222) acc 65.6250 (71.5064) lr 3.1545e-04 eta 5:08:44 +epoch [39/50] batch [200/1000] time 1.563 (1.569) data 0.000 (0.006) loss 1.5557 (1.1248) acc 65.6250 (71.4531) lr 3.1545e-04 eta 5:08:35 +epoch [39/50] batch [205/1000] time 1.574 (1.569) data 0.001 (0.006) loss 1.2627 (1.1296) acc 71.8750 (71.3872) lr 3.1545e-04 eta 5:08:27 +epoch [39/50] batch [210/1000] time 1.552 (1.569) data 0.001 (0.005) loss 0.9390 (1.1288) acc 78.1250 (71.5179) lr 3.1545e-04 eta 5:08:15 +epoch [39/50] batch [215/1000] time 1.542 (1.568) data 0.001 (0.005) loss 1.0537 (1.1295) acc 65.6250 (71.5116) lr 3.1545e-04 eta 5:08:04 +epoch [39/50] batch [220/1000] time 1.545 (1.568) data 0.000 (0.005) loss 0.9624 (1.1270) acc 78.1250 (71.6193) lr 3.1545e-04 eta 5:07:51 +epoch [39/50] batch [225/1000] time 1.551 (1.569) data 0.001 (0.005) loss 1.1475 (1.1247) acc 75.0000 (71.6806) lr 3.1545e-04 eta 5:07:50 +epoch [39/50] batch [230/1000] time 1.563 (1.568) data 0.001 (0.005) loss 1.1533 (1.1184) acc 65.6250 (71.8071) lr 3.1545e-04 eta 5:07:37 +epoch [39/50] batch [235/1000] time 1.570 (1.568) data 0.000 (0.005) loss 0.9165 (1.1219) acc 75.0000 (71.7686) lr 3.1545e-04 eta 5:07:26 +epoch [39/50] batch [240/1000] time 1.567 (1.568) data 0.001 (0.005) loss 1.1533 (1.1184) acc 71.8750 (71.7839) lr 3.1545e-04 eta 5:07:20 +epoch [39/50] batch [245/1000] time 1.535 (1.567) data 0.001 (0.005) loss 0.7568 (1.1174) acc 84.3750 (71.9005) lr 3.1545e-04 eta 5:07:05 +epoch [39/50] batch [250/1000] time 1.563 (1.567) data 0.001 (0.005) loss 1.2217 (1.1204) acc 68.7500 (71.7625) lr 3.1545e-04 eta 5:06:56 +epoch [39/50] batch [255/1000] time 1.562 (1.567) data 0.000 (0.005) loss 0.7212 (1.1169) acc 81.2500 (71.8750) lr 3.1545e-04 eta 5:06:48 +epoch [39/50] batch [260/1000] time 1.544 (1.567) data 0.000 (0.004) loss 1.2979 (1.1216) acc 81.2500 (71.8630) lr 3.1545e-04 eta 5:06:37 +epoch [39/50] batch [265/1000] time 1.552 (1.567) data 0.000 (0.004) loss 1.8154 (1.1234) acc 59.3750 (71.7807) lr 3.1545e-04 eta 5:06:27 +epoch [39/50] batch [270/1000] time 1.567 (1.567) data 0.000 (0.004) loss 1.1807 (1.1266) acc 65.6250 (71.7593) lr 3.1545e-04 eta 5:06:18 +epoch [39/50] batch [275/1000] time 1.572 (1.567) data 0.001 (0.004) loss 0.8550 (1.1217) acc 75.0000 (71.8864) lr 3.1545e-04 eta 5:06:11 +epoch [39/50] batch [280/1000] time 1.561 (1.567) data 0.001 (0.004) loss 0.8252 (1.1176) acc 84.3750 (71.9866) lr 3.1545e-04 eta 5:06:02 +epoch [39/50] batch [285/1000] time 1.552 (1.567) data 0.000 (0.004) loss 1.4561 (1.1203) acc 65.6250 (71.9737) lr 3.1545e-04 eta 5:05:55 +epoch [39/50] batch [290/1000] time 1.568 (1.567) data 0.001 (0.004) loss 1.2002 (1.1181) acc 71.8750 (71.9935) lr 3.1545e-04 eta 5:05:54 +epoch [39/50] batch [295/1000] time 1.566 (1.567) data 0.001 (0.004) loss 1.1279 (1.1212) acc 65.6250 (71.9068) lr 3.1545e-04 eta 5:05:43 +epoch [39/50] batch [300/1000] time 1.557 (1.567) data 0.000 (0.004) loss 0.8071 (1.1173) acc 78.1250 (71.9896) lr 3.1545e-04 eta 5:05:35 +epoch [39/50] batch [305/1000] time 1.535 (1.567) data 0.000 (0.004) loss 0.8569 (1.1180) acc 78.1250 (72.0082) lr 3.1545e-04 eta 5:05:24 +epoch [39/50] batch [310/1000] time 1.551 (1.567) data 0.000 (0.004) loss 1.6250 (1.1206) acc 68.7500 (71.9960) lr 3.1545e-04 eta 5:05:14 +epoch [39/50] batch [315/1000] time 1.573 (1.567) data 0.000 (0.004) loss 1.0898 (1.1235) acc 78.1250 (71.9246) lr 3.1545e-04 eta 5:05:05 +epoch [39/50] batch [320/1000] time 1.563 (1.567) data 0.000 (0.004) loss 0.8745 (1.1227) acc 78.1250 (71.9238) lr 3.1545e-04 eta 5:04:57 +epoch [39/50] batch [325/1000] time 1.571 (1.567) data 0.000 (0.004) loss 1.0244 (1.1240) acc 71.8750 (71.9231) lr 3.1545e-04 eta 5:04:49 +epoch [39/50] batch [330/1000] time 1.552 (1.566) data 0.001 (0.004) loss 0.8330 (1.1225) acc 71.8750 (71.9223) lr 3.1545e-04 eta 5:04:40 +epoch [39/50] batch [335/1000] time 1.560 (1.567) data 0.001 (0.004) loss 0.9106 (1.1221) acc 71.8750 (71.9030) lr 3.1545e-04 eta 5:04:36 +epoch [39/50] batch [340/1000] time 1.565 (1.567) data 0.001 (0.004) loss 0.9028 (1.1186) acc 75.0000 (72.0037) lr 3.1545e-04 eta 5:04:27 +epoch [39/50] batch [345/1000] time 1.567 (1.567) data 0.000 (0.004) loss 1.2178 (1.1199) acc 78.1250 (72.0471) lr 3.1545e-04 eta 5:04:19 +epoch [39/50] batch [350/1000] time 1.569 (1.566) data 0.000 (0.003) loss 0.6323 (1.1168) acc 84.3750 (72.0357) lr 3.1545e-04 eta 5:04:09 +epoch [39/50] batch [355/1000] time 1.576 (1.566) data 0.001 (0.003) loss 0.8140 (1.1156) acc 78.1250 (72.0863) lr 3.1545e-04 eta 5:04:01 +epoch [39/50] batch [360/1000] time 1.564 (1.566) data 0.000 (0.003) loss 0.9419 (1.1142) acc 81.2500 (72.1007) lr 3.1545e-04 eta 5:03:52 +epoch [39/50] batch [365/1000] time 1.557 (1.566) data 0.000 (0.003) loss 1.0967 (1.1155) acc 71.8750 (72.1318) lr 3.1545e-04 eta 5:03:44 +epoch [39/50] batch [370/1000] time 1.554 (1.566) data 0.000 (0.003) loss 1.1445 (1.1167) acc 75.0000 (72.1199) lr 3.1545e-04 eta 5:03:35 +epoch [39/50] batch [375/1000] time 1.722 (1.567) data 0.000 (0.003) loss 0.9355 (1.1148) acc 75.0000 (72.1500) lr 3.1545e-04 eta 5:03:32 +epoch [39/50] batch [380/1000] time 1.558 (1.567) data 0.001 (0.003) loss 1.6367 (1.1153) acc 59.3750 (72.1217) lr 3.1545e-04 eta 5:03:24 +epoch [39/50] batch [385/1000] time 1.560 (1.567) data 0.001 (0.003) loss 1.2021 (1.1152) acc 68.7500 (72.1185) lr 3.1545e-04 eta 5:03:15 +epoch [39/50] batch [390/1000] time 1.559 (1.566) data 0.001 (0.003) loss 1.2266 (1.1152) acc 65.6250 (72.1154) lr 3.1545e-04 eta 5:03:06 +epoch [39/50] batch [395/1000] time 1.567 (1.566) data 0.000 (0.003) loss 1.2021 (1.1151) acc 71.8750 (72.1044) lr 3.1545e-04 eta 5:02:58 +epoch [39/50] batch [400/1000] time 1.535 (1.566) data 0.000 (0.003) loss 1.3682 (1.1177) acc 75.0000 (72.0703) lr 3.1545e-04 eta 5:02:47 +epoch [39/50] batch [405/1000] time 1.539 (1.566) data 0.000 (0.003) loss 0.9438 (1.1162) acc 78.1250 (72.0602) lr 3.1545e-04 eta 5:02:37 +epoch [39/50] batch [410/1000] time 1.571 (1.566) data 0.000 (0.003) loss 1.0664 (1.1174) acc 71.8750 (72.0732) lr 3.1545e-04 eta 5:02:29 +epoch [39/50] batch [415/1000] time 1.572 (1.566) data 0.001 (0.003) loss 1.8320 (1.1185) acc 62.5000 (72.0557) lr 3.1545e-04 eta 5:02:20 +epoch [39/50] batch [420/1000] time 1.587 (1.566) data 0.001 (0.003) loss 0.9297 (1.1193) acc 78.1250 (72.0312) lr 3.1545e-04 eta 5:02:13 +epoch [39/50] batch [425/1000] time 1.555 (1.566) data 0.001 (0.003) loss 1.5938 (1.1213) acc 68.7500 (72.0000) lr 3.1545e-04 eta 5:02:04 +epoch [39/50] batch [430/1000] time 1.584 (1.566) data 0.000 (0.003) loss 0.9478 (1.1215) acc 68.7500 (71.9767) lr 3.1545e-04 eta 5:01:57 +epoch [39/50] batch [435/1000] time 1.597 (1.566) data 0.001 (0.003) loss 1.0654 (1.1214) acc 65.6250 (71.9899) lr 3.1545e-04 eta 5:01:51 +epoch [39/50] batch [440/1000] time 1.593 (1.567) data 0.000 (0.003) loss 1.4258 (1.1217) acc 62.5000 (71.9957) lr 3.1545e-04 eta 5:01:49 +epoch [39/50] batch [445/1000] time 1.567 (1.567) data 0.001 (0.003) loss 1.0791 (1.1204) acc 71.8750 (72.0506) lr 3.1545e-04 eta 5:01:40 +epoch [39/50] batch [450/1000] time 1.563 (1.566) data 0.000 (0.003) loss 1.1992 (1.1202) acc 71.8750 (72.0694) lr 3.1545e-04 eta 5:01:32 +epoch [39/50] batch [455/1000] time 1.553 (1.566) data 0.000 (0.003) loss 1.1963 (1.1213) acc 71.8750 (72.0536) lr 3.1545e-04 eta 5:01:22 +epoch [39/50] batch [460/1000] time 1.567 (1.566) data 0.000 (0.003) loss 1.3535 (1.1220) acc 65.6250 (72.0245) lr 3.1545e-04 eta 5:01:14 +epoch [39/50] batch [465/1000] time 1.560 (1.566) data 0.001 (0.003) loss 0.9419 (1.1190) acc 84.3750 (72.0833) lr 3.1545e-04 eta 5:01:06 +epoch [39/50] batch [470/1000] time 1.559 (1.566) data 0.000 (0.003) loss 1.0830 (1.1174) acc 71.8750 (72.1144) lr 3.1545e-04 eta 5:00:57 +epoch [39/50] batch [475/1000] time 1.581 (1.566) data 0.001 (0.003) loss 1.5137 (1.1158) acc 56.2500 (72.1118) lr 3.1545e-04 eta 5:00:50 +epoch [39/50] batch [480/1000] time 1.592 (1.566) data 0.001 (0.003) loss 0.6221 (1.1153) acc 78.1250 (72.1549) lr 3.1545e-04 eta 5:00:43 +epoch [39/50] batch [485/1000] time 1.571 (1.567) data 0.000 (0.003) loss 0.7778 (1.1146) acc 78.1250 (72.1972) lr 3.1545e-04 eta 5:00:38 +epoch [39/50] batch [490/1000] time 1.538 (1.566) data 0.000 (0.003) loss 0.5991 (1.1128) acc 81.2500 (72.2194) lr 3.1545e-04 eta 5:00:29 +epoch [39/50] batch [495/1000] time 1.571 (1.566) data 0.000 (0.003) loss 1.4219 (1.1121) acc 59.3750 (72.2159) lr 3.1545e-04 eta 5:00:22 +epoch [39/50] batch [500/1000] time 1.555 (1.566) data 0.001 (0.003) loss 1.1270 (1.1121) acc 75.0000 (72.2562) lr 3.1545e-04 eta 5:00:13 +epoch [39/50] batch [505/1000] time 1.559 (1.566) data 0.000 (0.003) loss 1.1191 (1.1115) acc 75.0000 (72.2463) lr 3.1545e-04 eta 5:00:04 +epoch [39/50] batch [510/1000] time 1.563 (1.566) data 0.001 (0.003) loss 1.2002 (1.1137) acc 65.6250 (72.1875) lr 3.1545e-04 eta 4:59:56 +epoch [39/50] batch [515/1000] time 1.543 (1.566) data 0.000 (0.003) loss 1.2412 (1.1147) acc 71.8750 (72.1723) lr 3.1545e-04 eta 4:59:47 +epoch [39/50] batch [520/1000] time 1.548 (1.566) data 0.001 (0.002) loss 1.0283 (1.1141) acc 78.1250 (72.2115) lr 3.1545e-04 eta 4:59:38 +epoch [39/50] batch [525/1000] time 1.560 (1.566) data 0.000 (0.002) loss 1.5273 (1.1155) acc 75.0000 (72.1786) lr 3.1545e-04 eta 4:59:28 +epoch [39/50] batch [530/1000] time 1.552 (1.566) data 0.000 (0.002) loss 1.1748 (1.1157) acc 59.3750 (72.1285) lr 3.1545e-04 eta 4:59:24 +epoch [39/50] batch [535/1000] time 1.528 (1.566) data 0.000 (0.002) loss 1.8379 (1.1176) acc 59.3750 (72.1086) lr 3.1545e-04 eta 4:59:15 +epoch [39/50] batch [540/1000] time 1.541 (1.566) data 0.000 (0.002) loss 0.7100 (1.1171) acc 81.2500 (72.0949) lr 3.1545e-04 eta 4:59:06 +epoch [39/50] batch [545/1000] time 1.553 (1.566) data 0.000 (0.002) loss 1.8906 (1.1187) acc 68.7500 (72.0470) lr 3.1545e-04 eta 4:58:57 +epoch [39/50] batch [550/1000] time 1.585 (1.566) data 0.001 (0.002) loss 1.2793 (1.1203) acc 75.0000 (72.0000) lr 3.1545e-04 eta 4:58:49 +epoch [39/50] batch [555/1000] time 1.554 (1.566) data 0.000 (0.002) loss 1.0391 (1.1205) acc 75.0000 (71.9989) lr 3.1545e-04 eta 4:58:41 +epoch [39/50] batch [560/1000] time 1.524 (1.566) data 0.000 (0.002) loss 0.8950 (1.1200) acc 68.7500 (71.9587) lr 3.1545e-04 eta 4:58:32 +epoch [39/50] batch [565/1000] time 1.547 (1.566) data 0.001 (0.002) loss 1.0430 (1.1194) acc 68.7500 (71.9580) lr 3.1545e-04 eta 4:58:22 +epoch [39/50] batch [570/1000] time 1.546 (1.565) data 0.000 (0.002) loss 1.1133 (1.1172) acc 71.8750 (71.9956) lr 3.1545e-04 eta 4:58:13 +epoch [39/50] batch [575/1000] time 1.571 (1.566) data 0.001 (0.002) loss 0.4985 (1.1166) acc 81.2500 (72.0109) lr 3.1545e-04 eta 4:58:05 +epoch [39/50] batch [580/1000] time 1.573 (1.565) data 0.000 (0.002) loss 1.6953 (1.1172) acc 62.5000 (72.0097) lr 3.1545e-04 eta 4:57:57 +epoch [39/50] batch [585/1000] time 1.535 (1.565) data 0.001 (0.002) loss 1.1992 (1.1159) acc 68.7500 (72.0406) lr 3.1545e-04 eta 4:57:47 +epoch [39/50] batch [590/1000] time 1.558 (1.566) data 0.001 (0.002) loss 0.7510 (1.1163) acc 75.0000 (72.0286) lr 3.1545e-04 eta 4:57:42 +epoch [39/50] batch [595/1000] time 1.575 (1.565) data 0.000 (0.002) loss 1.1084 (1.1173) acc 71.8750 (71.9958) lr 3.1545e-04 eta 4:57:34 +epoch [39/50] batch [600/1000] time 1.555 (1.565) data 0.001 (0.002) loss 0.9873 (1.1167) acc 71.8750 (71.9844) lr 3.1545e-04 eta 4:57:25 +epoch [39/50] batch [605/1000] time 1.562 (1.565) data 0.001 (0.002) loss 0.4536 (1.1140) acc 87.5000 (72.0455) lr 3.1545e-04 eta 4:57:17 +epoch [39/50] batch [610/1000] time 1.542 (1.565) data 0.000 (0.002) loss 0.8501 (1.1131) acc 71.8750 (72.0543) lr 3.1545e-04 eta 4:57:08 +epoch [39/50] batch [615/1000] time 1.553 (1.565) data 0.001 (0.002) loss 1.2783 (1.1121) acc 68.7500 (72.0681) lr 3.1545e-04 eta 4:57:00 +epoch [39/50] batch [620/1000] time 1.561 (1.565) data 0.001 (0.002) loss 1.0430 (1.1138) acc 71.8750 (72.0363) lr 3.1545e-04 eta 4:56:51 +epoch [39/50] batch [625/1000] time 1.589 (1.565) data 0.001 (0.002) loss 1.0547 (1.1165) acc 68.7500 (72.0200) lr 3.1545e-04 eta 4:56:42 +epoch [39/50] batch [630/1000] time 1.556 (1.565) data 0.001 (0.002) loss 0.5435 (1.1146) acc 87.5000 (72.0188) lr 3.1545e-04 eta 4:56:33 +epoch [39/50] batch [635/1000] time 1.550 (1.565) data 0.001 (0.002) loss 0.5728 (1.1129) acc 84.3750 (72.0571) lr 3.1545e-04 eta 4:56:27 +epoch [39/50] batch [640/1000] time 1.558 (1.565) data 0.001 (0.002) loss 1.1650 (1.1132) acc 71.8750 (72.0605) lr 3.1545e-04 eta 4:56:19 +epoch [39/50] batch [645/1000] time 1.550 (1.565) data 0.000 (0.002) loss 0.8994 (1.1137) acc 78.1250 (72.0591) lr 3.1545e-04 eta 4:56:09 +epoch [39/50] batch [650/1000] time 1.559 (1.565) data 0.000 (0.002) loss 0.8008 (1.1111) acc 75.0000 (72.1250) lr 3.1545e-04 eta 4:56:01 +epoch [39/50] batch [655/1000] time 1.549 (1.565) data 0.001 (0.002) loss 1.5254 (1.1127) acc 68.7500 (72.0897) lr 3.1545e-04 eta 4:55:52 +epoch [39/50] batch [660/1000] time 1.565 (1.565) data 0.001 (0.002) loss 0.8462 (1.1135) acc 78.1250 (72.0644) lr 3.1545e-04 eta 4:55:45 +epoch [39/50] batch [665/1000] time 1.577 (1.565) data 0.000 (0.002) loss 1.2402 (1.1145) acc 68.7500 (72.0536) lr 3.1545e-04 eta 4:55:36 +epoch [39/50] batch [670/1000] time 1.547 (1.565) data 0.000 (0.002) loss 1.1611 (1.1147) acc 78.1250 (72.0382) lr 3.1545e-04 eta 4:55:28 +epoch [39/50] batch [675/1000] time 1.565 (1.565) data 0.001 (0.002) loss 0.6196 (1.1135) acc 84.3750 (72.0787) lr 3.1545e-04 eta 4:55:21 +epoch [39/50] batch [680/1000] time 1.557 (1.565) data 0.000 (0.002) loss 1.0381 (1.1121) acc 65.6250 (72.1002) lr 3.1545e-04 eta 4:55:15 +epoch [39/50] batch [685/1000] time 1.563 (1.565) data 0.000 (0.002) loss 0.6909 (1.1110) acc 87.5000 (72.1214) lr 3.1545e-04 eta 4:55:07 +epoch [39/50] batch [690/1000] time 1.548 (1.565) data 0.000 (0.002) loss 1.1553 (1.1115) acc 62.5000 (72.0924) lr 3.1545e-04 eta 4:54:59 +epoch [39/50] batch [695/1000] time 1.580 (1.565) data 0.001 (0.002) loss 1.3350 (1.1113) acc 68.7500 (72.1133) lr 3.1545e-04 eta 4:54:52 +epoch [39/50] batch [700/1000] time 1.561 (1.565) data 0.000 (0.002) loss 0.9429 (1.1114) acc 75.0000 (72.1205) lr 3.1545e-04 eta 4:54:43 +epoch [39/50] batch [705/1000] time 1.576 (1.565) data 0.000 (0.002) loss 1.0283 (1.1098) acc 71.8750 (72.1720) lr 3.1545e-04 eta 4:54:36 +epoch [39/50] batch [710/1000] time 1.569 (1.565) data 0.000 (0.002) loss 1.1758 (1.1114) acc 68.7500 (72.1347) lr 3.1545e-04 eta 4:54:28 +epoch [39/50] batch [715/1000] time 1.547 (1.565) data 0.000 (0.002) loss 0.5708 (1.1106) acc 84.3750 (72.1329) lr 3.1545e-04 eta 4:54:19 +epoch [39/50] batch [720/1000] time 1.541 (1.565) data 0.001 (0.002) loss 1.1494 (1.1101) acc 59.3750 (72.1441) lr 3.1545e-04 eta 4:54:11 +epoch [39/50] batch [725/1000] time 1.546 (1.565) data 0.001 (0.002) loss 1.1367 (1.1097) acc 75.0000 (72.1466) lr 3.1545e-04 eta 4:54:02 +epoch [39/50] batch [730/1000] time 1.565 (1.565) data 0.000 (0.002) loss 0.9971 (1.1106) acc 81.2500 (72.1361) lr 3.1545e-04 eta 4:53:53 +epoch [39/50] batch [735/1000] time 1.551 (1.565) data 0.001 (0.002) loss 0.9668 (1.1101) acc 78.1250 (72.1301) lr 3.1545e-04 eta 4:53:45 +epoch [39/50] batch [740/1000] time 1.738 (1.565) data 0.000 (0.002) loss 1.2109 (1.1113) acc 68.7500 (72.0988) lr 3.1545e-04 eta 4:53:40 +epoch [39/50] batch [745/1000] time 1.559 (1.565) data 0.000 (0.002) loss 0.7759 (1.1121) acc 75.0000 (72.0889) lr 3.1545e-04 eta 4:53:31 +epoch [39/50] batch [750/1000] time 1.569 (1.565) data 0.000 (0.002) loss 0.8662 (1.1102) acc 71.8750 (72.1042) lr 3.1545e-04 eta 4:53:23 +epoch [39/50] batch [755/1000] time 1.548 (1.565) data 0.001 (0.002) loss 0.9492 (1.1100) acc 75.0000 (72.1192) lr 3.1545e-04 eta 4:53:15 +epoch [39/50] batch [760/1000] time 1.555 (1.565) data 0.001 (0.002) loss 0.9287 (1.1113) acc 75.0000 (72.0847) lr 3.1545e-04 eta 4:53:07 +epoch [39/50] batch [765/1000] time 1.541 (1.565) data 0.000 (0.002) loss 1.6895 (1.1113) acc 56.2500 (72.0792) lr 3.1545e-04 eta 4:52:58 +epoch [39/50] batch [770/1000] time 1.553 (1.565) data 0.000 (0.002) loss 1.0811 (1.1103) acc 71.8750 (72.0657) lr 3.1545e-04 eta 4:52:49 +epoch [39/50] batch [775/1000] time 1.574 (1.565) data 0.000 (0.002) loss 1.4902 (1.1112) acc 65.6250 (72.0444) lr 3.1545e-04 eta 4:52:41 +epoch [39/50] batch [780/1000] time 1.580 (1.565) data 0.000 (0.002) loss 1.4014 (1.1116) acc 75.0000 (72.0553) lr 3.1545e-04 eta 4:52:34 +epoch [39/50] batch [785/1000] time 1.712 (1.565) data 0.000 (0.002) loss 1.3896 (1.1124) acc 56.2500 (72.0462) lr 3.1545e-04 eta 4:52:28 +epoch [39/50] batch [790/1000] time 1.551 (1.565) data 0.001 (0.002) loss 0.9414 (1.1116) acc 75.0000 (72.0570) lr 3.1545e-04 eta 4:52:20 +epoch [39/50] batch [795/1000] time 1.555 (1.565) data 0.000 (0.002) loss 0.8921 (1.1113) acc 71.8750 (72.0676) lr 3.1545e-04 eta 4:52:11 +epoch [39/50] batch [800/1000] time 1.568 (1.565) data 0.000 (0.002) loss 1.5088 (1.1118) acc 71.8750 (72.0625) lr 3.1545e-04 eta 4:52:03 +epoch [39/50] batch [805/1000] time 1.536 (1.565) data 0.000 (0.002) loss 1.5107 (1.1115) acc 59.3750 (72.0613) lr 3.1545e-04 eta 4:51:54 +epoch [39/50] batch [810/1000] time 1.529 (1.564) data 0.001 (0.002) loss 1.4775 (1.1122) acc 68.7500 (72.0370) lr 3.1545e-04 eta 4:51:46 +epoch [39/50] batch [815/1000] time 1.560 (1.564) data 0.000 (0.002) loss 0.7803 (1.1117) acc 75.0000 (72.0475) lr 3.1545e-04 eta 4:51:37 +epoch [39/50] batch [820/1000] time 1.545 (1.564) data 0.000 (0.002) loss 0.8545 (1.1115) acc 78.1250 (72.0389) lr 3.1545e-04 eta 4:51:30 +epoch [39/50] batch [825/1000] time 1.569 (1.564) data 0.001 (0.002) loss 0.8013 (1.1117) acc 84.3750 (72.0379) lr 3.1545e-04 eta 4:51:22 +epoch [39/50] batch [830/1000] time 1.593 (1.565) data 0.001 (0.002) loss 1.4922 (1.1122) acc 56.2500 (71.9917) lr 3.1545e-04 eta 4:51:17 +epoch [39/50] batch [835/1000] time 1.569 (1.565) data 0.000 (0.002) loss 1.6084 (1.1128) acc 53.1250 (71.9798) lr 3.1545e-04 eta 4:51:10 +epoch [39/50] batch [840/1000] time 1.552 (1.565) data 0.001 (0.002) loss 1.1123 (1.1131) acc 65.6250 (71.9680) lr 3.1545e-04 eta 4:51:02 +epoch [39/50] batch [845/1000] time 1.581 (1.565) data 0.000 (0.002) loss 1.0771 (1.1129) acc 71.8750 (71.9859) lr 3.1545e-04 eta 4:50:54 +epoch [39/50] batch [850/1000] time 1.572 (1.565) data 0.000 (0.002) loss 0.7959 (1.1128) acc 78.1250 (71.9816) lr 3.1545e-04 eta 4:50:46 +epoch [39/50] batch [855/1000] time 1.576 (1.565) data 0.000 (0.002) loss 1.4512 (1.1125) acc 62.5000 (71.9810) lr 3.1545e-04 eta 4:50:39 +epoch [39/50] batch [860/1000] time 1.600 (1.565) data 0.000 (0.002) loss 0.8076 (1.1127) acc 81.2500 (71.9695) lr 3.1545e-04 eta 4:50:31 +epoch [39/50] batch [865/1000] time 1.555 (1.565) data 0.001 (0.002) loss 0.8696 (1.1134) acc 75.0000 (71.9617) lr 3.1545e-04 eta 4:50:24 +epoch [39/50] batch [870/1000] time 1.549 (1.565) data 0.001 (0.002) loss 1.3408 (1.1136) acc 68.7500 (71.9792) lr 3.1545e-04 eta 4:50:15 +epoch [39/50] batch [875/1000] time 1.565 (1.565) data 0.000 (0.002) loss 1.2236 (1.1131) acc 68.7500 (71.9786) lr 3.1545e-04 eta 4:50:07 +epoch [39/50] batch [880/1000] time 1.545 (1.565) data 0.000 (0.002) loss 1.4170 (1.1135) acc 71.8750 (71.9851) lr 3.1545e-04 eta 4:49:59 +epoch [39/50] batch [885/1000] time 1.551 (1.565) data 0.000 (0.002) loss 1.4355 (1.1163) acc 68.7500 (71.9597) lr 3.1545e-04 eta 4:49:51 +epoch [39/50] batch [890/1000] time 1.577 (1.565) data 0.001 (0.002) loss 0.7178 (1.1161) acc 87.5000 (71.9803) lr 3.1545e-04 eta 4:49:43 +epoch [39/50] batch [895/1000] time 1.550 (1.565) data 0.001 (0.002) loss 1.0186 (1.1154) acc 65.6250 (71.9797) lr 3.1545e-04 eta 4:49:37 +epoch [39/50] batch [900/1000] time 1.548 (1.565) data 0.001 (0.002) loss 0.9517 (1.1151) acc 78.1250 (71.9826) lr 3.1545e-04 eta 4:49:29 +epoch [39/50] batch [905/1000] time 1.570 (1.565) data 0.000 (0.002) loss 0.7388 (1.1146) acc 81.2500 (71.9993) lr 3.1545e-04 eta 4:49:21 +epoch [39/50] batch [910/1000] time 1.560 (1.565) data 0.000 (0.002) loss 1.4307 (1.1160) acc 65.6250 (71.9574) lr 3.1545e-04 eta 4:49:13 +epoch [39/50] batch [915/1000] time 1.514 (1.565) data 0.000 (0.002) loss 1.0410 (1.1159) acc 75.0000 (71.9706) lr 3.1545e-04 eta 4:49:04 +epoch [39/50] batch [920/1000] time 1.567 (1.565) data 0.001 (0.002) loss 1.8750 (1.1162) acc 56.2500 (71.9837) lr 3.1545e-04 eta 4:48:56 +epoch [39/50] batch [925/1000] time 1.551 (1.565) data 0.001 (0.002) loss 1.0938 (1.1152) acc 75.0000 (72.0034) lr 3.1545e-04 eta 4:48:49 +epoch [39/50] batch [930/1000] time 1.561 (1.565) data 0.001 (0.002) loss 1.5566 (1.1149) acc 71.8750 (72.0094) lr 3.1545e-04 eta 4:48:41 +epoch [39/50] batch [935/1000] time 1.580 (1.565) data 0.001 (0.002) loss 1.1777 (1.1141) acc 78.1250 (72.0455) lr 3.1545e-04 eta 4:48:33 +epoch [39/50] batch [940/1000] time 1.576 (1.565) data 0.000 (0.002) loss 1.0312 (1.1136) acc 71.8750 (72.0578) lr 3.1545e-04 eta 4:48:27 +epoch [39/50] batch [945/1000] time 1.553 (1.565) data 0.001 (0.002) loss 1.0830 (1.1126) acc 71.8750 (72.0833) lr 3.1545e-04 eta 4:48:19 +epoch [39/50] batch [950/1000] time 1.552 (1.565) data 0.001 (0.002) loss 0.8477 (1.1114) acc 78.1250 (72.1020) lr 3.1545e-04 eta 4:48:10 +epoch [39/50] batch [955/1000] time 1.573 (1.565) data 0.000 (0.002) loss 0.7373 (1.1099) acc 78.1250 (72.1204) lr 3.1545e-04 eta 4:48:02 +epoch [39/50] batch [960/1000] time 1.551 (1.565) data 0.001 (0.002) loss 1.1123 (1.1100) acc 71.8750 (72.1159) lr 3.1545e-04 eta 4:47:54 +epoch [39/50] batch [965/1000] time 1.557 (1.565) data 0.000 (0.002) loss 0.8916 (1.1091) acc 75.0000 (72.1341) lr 3.1545e-04 eta 4:47:46 +epoch [39/50] batch [970/1000] time 1.557 (1.565) data 0.001 (0.002) loss 0.6372 (1.1086) acc 84.3750 (72.1424) lr 3.1545e-04 eta 4:47:37 +epoch [39/50] batch [975/1000] time 1.587 (1.565) data 0.001 (0.002) loss 0.6182 (1.1079) acc 75.0000 (72.1474) lr 3.1545e-04 eta 4:47:30 +epoch [39/50] batch [980/1000] time 1.561 (1.565) data 0.000 (0.002) loss 0.8281 (1.1083) acc 81.2500 (72.1237) lr 3.1545e-04 eta 4:47:24 +epoch [39/50] batch [985/1000] time 1.554 (1.565) data 0.001 (0.002) loss 1.4814 (1.1095) acc 65.6250 (72.0971) lr 3.1545e-04 eta 4:47:17 +epoch [39/50] batch [990/1000] time 1.565 (1.565) data 0.000 (0.002) loss 1.1318 (1.1089) acc 75.0000 (72.1244) lr 3.1545e-04 eta 4:47:09 +epoch [39/50] batch [995/1000] time 1.568 (1.565) data 0.000 (0.002) loss 0.4731 (1.1064) acc 78.1250 (72.1734) lr 3.1545e-04 eta 4:47:01 +epoch [39/50] batch [1000/1000] time 1.588 (1.565) data 0.000 (0.002) loss 1.5508 (1.1069) acc 62.5000 (72.1719) lr 2.7103e-04 eta 4:46:53 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,398 +* accuracy: 78.8% +* error: 21.2% +* macro_f1: 78.4% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [40/50] batch [5/1000] time 1.582 (1.717) data 0.000 (0.206) loss 1.4727 (1.1109) acc 59.3750 (75.6250) lr 2.7103e-04 eta 5:14:39 +epoch [40/50] batch [10/1000] time 1.572 (1.636) data 0.001 (0.104) loss 0.5205 (1.1531) acc 87.5000 (74.3750) lr 2.7103e-04 eta 4:59:41 +epoch [40/50] batch [15/1000] time 1.561 (1.614) data 0.001 (0.069) loss 1.5771 (1.1555) acc 62.5000 (73.9583) lr 2.7103e-04 eta 4:55:24 +epoch [40/50] batch [20/1000] time 1.582 (1.602) data 0.001 (0.052) loss 1.4727 (1.1303) acc 56.2500 (73.1250) lr 2.7103e-04 eta 4:53:12 +epoch [40/50] batch [25/1000] time 1.558 (1.593) data 0.000 (0.042) loss 1.6230 (1.0959) acc 53.1250 (73.0000) lr 2.7103e-04 eta 4:51:26 +epoch [40/50] batch [30/1000] time 1.540 (1.586) data 0.001 (0.035) loss 1.4355 (1.0857) acc 56.2500 (72.2917) lr 2.7103e-04 eta 4:49:54 +epoch [40/50] batch [35/1000] time 1.551 (1.582) data 0.000 (0.030) loss 0.9839 (1.0638) acc 71.8750 (72.5893) lr 2.7103e-04 eta 4:49:01 +epoch [40/50] batch [40/1000] time 1.558 (1.579) data 0.000 (0.026) loss 0.9380 (1.0645) acc 78.1250 (73.0469) lr 2.7103e-04 eta 4:48:24 +epoch [40/50] batch [45/1000] time 1.561 (1.578) data 0.000 (0.023) loss 1.1543 (1.0799) acc 71.8750 (72.8472) lr 2.7103e-04 eta 4:48:02 +epoch [40/50] batch [50/1000] time 1.565 (1.581) data 0.001 (0.021) loss 1.0996 (1.0589) acc 71.8750 (73.4375) lr 2.7103e-04 eta 4:48:32 +epoch [40/50] batch [55/1000] time 1.554 (1.578) data 0.000 (0.019) loss 0.7773 (1.0606) acc 81.2500 (73.4659) lr 2.7103e-04 eta 4:47:53 +epoch [40/50] batch [60/1000] time 1.568 (1.577) data 0.000 (0.018) loss 1.0537 (1.0490) acc 65.6250 (73.2812) lr 2.7103e-04 eta 4:47:28 +epoch [40/50] batch [65/1000] time 1.573 (1.576) data 0.001 (0.016) loss 1.1611 (1.0395) acc 68.7500 (73.4615) lr 2.7103e-04 eta 4:47:08 +epoch [40/50] batch [70/1000] time 1.582 (1.575) data 0.000 (0.015) loss 1.0430 (1.0363) acc 78.1250 (73.5268) lr 2.7103e-04 eta 4:46:53 +epoch [40/50] batch [75/1000] time 1.562 (1.574) data 0.001 (0.014) loss 0.8516 (1.0464) acc 81.2500 (73.3750) lr 2.7103e-04 eta 4:46:37 +epoch [40/50] batch [80/1000] time 1.552 (1.573) data 0.001 (0.013) loss 0.8555 (1.0317) acc 75.0000 (73.5547) lr 2.7103e-04 eta 4:46:15 +epoch [40/50] batch [85/1000] time 1.560 (1.572) data 0.000 (0.013) loss 1.6982 (1.0272) acc 62.5000 (73.7868) lr 2.7103e-04 eta 4:46:02 +epoch [40/50] batch [90/1000] time 1.561 (1.572) data 0.001 (0.012) loss 1.6045 (1.0492) acc 62.5000 (73.1250) lr 2.7103e-04 eta 4:45:49 +epoch [40/50] batch [95/1000] time 1.558 (1.574) data 0.000 (0.011) loss 0.9688 (1.0478) acc 68.7500 (73.2566) lr 2.7103e-04 eta 4:45:59 +epoch [40/50] batch [100/1000] time 1.553 (1.573) data 0.000 (0.011) loss 1.1768 (1.0439) acc 71.8750 (73.2812) lr 2.7103e-04 eta 4:45:47 +epoch [40/50] batch [105/1000] time 1.552 (1.572) data 0.000 (0.010) loss 1.6572 (1.0537) acc 59.3750 (73.0655) lr 2.7103e-04 eta 4:45:31 +epoch [40/50] batch [110/1000] time 1.562 (1.572) data 0.000 (0.010) loss 1.2383 (1.0621) acc 68.7500 (73.0114) lr 2.7103e-04 eta 4:45:16 +epoch [40/50] batch [115/1000] time 1.557 (1.571) data 0.000 (0.009) loss 0.7173 (1.0598) acc 81.2500 (73.0707) lr 2.7103e-04 eta 4:45:02 +epoch [40/50] batch [120/1000] time 1.581 (1.571) data 0.001 (0.009) loss 0.6133 (1.0520) acc 78.1250 (73.3073) lr 2.7103e-04 eta 4:44:51 +epoch [40/50] batch [125/1000] time 1.567 (1.570) data 0.001 (0.009) loss 0.8276 (1.0537) acc 71.8750 (73.3250) lr 2.7103e-04 eta 4:44:38 +epoch [40/50] batch [130/1000] time 1.596 (1.571) data 0.001 (0.008) loss 1.5439 (1.0465) acc 62.5000 (73.5577) lr 2.7103e-04 eta 4:44:34 +epoch [40/50] batch [135/1000] time 1.573 (1.571) data 0.000 (0.008) loss 0.6675 (1.0426) acc 81.2500 (73.6574) lr 2.7103e-04 eta 4:44:26 +epoch [40/50] batch [140/1000] time 1.572 (1.572) data 0.000 (0.008) loss 1.2568 (1.0548) acc 59.3750 (73.4375) lr 2.7103e-04 eta 4:44:30 +epoch [40/50] batch [145/1000] time 1.558 (1.571) data 0.001 (0.008) loss 0.9663 (1.0581) acc 71.8750 (73.3190) lr 2.7103e-04 eta 4:44:18 +epoch [40/50] batch [150/1000] time 1.543 (1.571) data 0.000 (0.007) loss 1.1895 (1.0605) acc 65.6250 (73.2708) lr 2.7103e-04 eta 4:44:01 +epoch [40/50] batch [155/1000] time 1.548 (1.570) data 0.000 (0.007) loss 0.9517 (1.0562) acc 78.1250 (73.3266) lr 2.7103e-04 eta 4:43:46 +epoch [40/50] batch [160/1000] time 1.551 (1.570) data 0.000 (0.007) loss 1.1992 (1.0597) acc 65.6250 (73.3203) lr 2.7103e-04 eta 4:43:36 +epoch [40/50] batch [165/1000] time 1.576 (1.570) data 0.000 (0.007) loss 0.8584 (1.0535) acc 75.0000 (73.4280) lr 2.7103e-04 eta 4:43:30 +epoch [40/50] batch [170/1000] time 1.575 (1.569) data 0.000 (0.007) loss 1.2715 (1.0620) acc 59.3750 (73.2721) lr 2.7103e-04 eta 4:43:17 +epoch [40/50] batch [175/1000] time 1.574 (1.569) data 0.001 (0.006) loss 1.0664 (1.0611) acc 71.8750 (73.2679) lr 2.7103e-04 eta 4:43:06 +epoch [40/50] batch [180/1000] time 1.539 (1.569) data 0.001 (0.006) loss 1.2695 (1.0688) acc 59.3750 (73.0035) lr 2.7103e-04 eta 4:42:52 +epoch [40/50] batch [185/1000] time 1.556 (1.568) data 0.001 (0.006) loss 1.1426 (1.0750) acc 68.7500 (72.9561) lr 2.7103e-04 eta 4:42:40 +epoch [40/50] batch [190/1000] time 1.530 (1.568) data 0.000 (0.006) loss 1.4004 (1.0738) acc 62.5000 (72.9605) lr 2.7103e-04 eta 4:42:29 +epoch [40/50] batch [195/1000] time 1.562 (1.568) data 0.001 (0.006) loss 1.1797 (1.0708) acc 78.1250 (73.0929) lr 2.7103e-04 eta 4:42:17 +epoch [40/50] batch [200/1000] time 1.721 (1.568) data 0.000 (0.006) loss 0.9385 (1.0722) acc 75.0000 (73.0312) lr 2.7103e-04 eta 4:42:15 +epoch [40/50] batch [205/1000] time 1.563 (1.568) data 0.001 (0.005) loss 0.8008 (1.0658) acc 84.3750 (73.1860) lr 2.7103e-04 eta 4:42:06 +epoch [40/50] batch [210/1000] time 1.577 (1.568) data 0.000 (0.005) loss 1.0273 (1.0666) acc 71.8750 (73.1994) lr 2.7103e-04 eta 4:41:56 +epoch [40/50] batch [215/1000] time 1.546 (1.567) data 0.001 (0.005) loss 0.9023 (1.0684) acc 71.8750 (73.1105) lr 2.7103e-04 eta 4:41:44 +epoch [40/50] batch [220/1000] time 1.554 (1.567) data 0.001 (0.005) loss 1.1230 (1.0697) acc 68.7500 (73.0966) lr 2.7103e-04 eta 4:41:34 +epoch [40/50] batch [225/1000] time 1.565 (1.567) data 0.000 (0.005) loss 0.9785 (1.0718) acc 68.7500 (73.0139) lr 2.7103e-04 eta 4:41:23 +epoch [40/50] batch [230/1000] time 1.556 (1.566) data 0.000 (0.005) loss 1.4980 (1.0746) acc 81.2500 (73.0027) lr 2.7103e-04 eta 4:41:10 +epoch [40/50] batch [235/1000] time 1.574 (1.566) data 0.000 (0.005) loss 1.7422 (1.0790) acc 59.3750 (72.9388) lr 2.7103e-04 eta 4:40:59 +epoch [40/50] batch [240/1000] time 1.546 (1.566) data 0.001 (0.005) loss 0.9229 (1.0793) acc 75.0000 (72.9427) lr 2.7103e-04 eta 4:40:47 +epoch [40/50] batch [245/1000] time 1.728 (1.567) data 0.000 (0.005) loss 0.8696 (1.0801) acc 78.1250 (72.9592) lr 2.7103e-04 eta 4:40:48 +epoch [40/50] batch [250/1000] time 1.554 (1.566) data 0.000 (0.005) loss 0.5522 (1.0745) acc 84.3750 (73.0250) lr 2.7103e-04 eta 4:40:38 +epoch [40/50] batch [255/1000] time 1.535 (1.566) data 0.000 (0.005) loss 0.8125 (1.0710) acc 78.1250 (73.1127) lr 2.7103e-04 eta 4:40:26 +epoch [40/50] batch [260/1000] time 1.569 (1.566) data 0.000 (0.004) loss 0.7358 (1.0696) acc 78.1250 (73.1370) lr 2.7103e-04 eta 4:40:16 +epoch [40/50] batch [265/1000] time 1.555 (1.566) data 0.000 (0.004) loss 1.3096 (1.0720) acc 68.7500 (73.0660) lr 2.7103e-04 eta 4:40:06 +epoch [40/50] batch [270/1000] time 1.547 (1.565) data 0.001 (0.004) loss 1.4082 (1.0717) acc 71.8750 (73.1019) lr 2.7103e-04 eta 4:39:56 +epoch [40/50] batch [275/1000] time 1.540 (1.565) data 0.000 (0.004) loss 0.8511 (1.0726) acc 71.8750 (73.0114) lr 2.7103e-04 eta 4:39:47 +epoch [40/50] batch [280/1000] time 1.602 (1.565) data 0.000 (0.004) loss 0.8325 (1.0677) acc 84.3750 (73.0915) lr 2.7103e-04 eta 4:39:40 +epoch [40/50] batch [285/1000] time 1.551 (1.565) data 0.000 (0.004) loss 0.9707 (1.0668) acc 71.8750 (73.1469) lr 2.7103e-04 eta 4:39:32 +epoch [40/50] batch [290/1000] time 1.569 (1.566) data 0.001 (0.004) loss 0.5757 (1.0665) acc 81.2500 (73.0927) lr 2.7103e-04 eta 4:39:28 +epoch [40/50] batch [295/1000] time 1.569 (1.566) data 0.000 (0.004) loss 1.4014 (1.0709) acc 68.7500 (73.0085) lr 2.7103e-04 eta 4:39:20 +epoch [40/50] batch [300/1000] time 1.567 (1.566) data 0.001 (0.004) loss 1.6494 (1.0680) acc 62.5000 (73.0833) lr 2.7103e-04 eta 4:39:13 +epoch [40/50] batch [305/1000] time 1.528 (1.565) data 0.000 (0.004) loss 1.0537 (1.0704) acc 62.5000 (73.0328) lr 2.7103e-04 eta 4:39:01 +epoch [40/50] batch [310/1000] time 1.578 (1.565) data 0.000 (0.004) loss 1.1865 (1.0711) acc 65.6250 (72.9940) lr 2.7103e-04 eta 4:38:53 +epoch [40/50] batch [315/1000] time 1.546 (1.565) data 0.001 (0.004) loss 1.3750 (1.0719) acc 68.7500 (73.0060) lr 2.7103e-04 eta 4:38:44 +epoch [40/50] batch [320/1000] time 1.563 (1.565) data 0.000 (0.004) loss 1.0684 (1.0699) acc 65.6250 (72.9492) lr 2.7103e-04 eta 4:38:36 +epoch [40/50] batch [325/1000] time 1.539 (1.565) data 0.000 (0.004) loss 1.3359 (1.0738) acc 68.7500 (72.8750) lr 2.7103e-04 eta 4:38:25 +epoch [40/50] batch [330/1000] time 1.538 (1.565) data 0.000 (0.004) loss 0.8647 (1.0749) acc 78.1250 (72.8504) lr 2.7103e-04 eta 4:38:16 +epoch [40/50] batch [335/1000] time 1.578 (1.565) data 0.000 (0.004) loss 0.7456 (1.0727) acc 68.7500 (72.8825) lr 2.7103e-04 eta 4:38:09 +epoch [40/50] batch [340/1000] time 1.550 (1.565) data 0.000 (0.003) loss 0.7656 (1.0753) acc 84.3750 (72.8493) lr 2.7103e-04 eta 4:38:00 +epoch [40/50] batch [345/1000] time 1.554 (1.565) data 0.000 (0.003) loss 1.0342 (1.0755) acc 75.0000 (72.8895) lr 2.7103e-04 eta 4:37:53 +epoch [40/50] batch [350/1000] time 1.565 (1.565) data 0.001 (0.003) loss 1.1396 (1.0752) acc 68.7500 (72.9018) lr 2.7103e-04 eta 4:37:44 +epoch [40/50] batch [355/1000] time 1.555 (1.565) data 0.001 (0.003) loss 1.0215 (1.0741) acc 71.8750 (72.9401) lr 2.7103e-04 eta 4:37:40 +epoch [40/50] batch [360/1000] time 1.574 (1.565) data 0.000 (0.003) loss 0.7812 (1.0745) acc 84.3750 (72.9514) lr 2.7103e-04 eta 4:37:32 +epoch [40/50] batch [365/1000] time 1.542 (1.565) data 0.000 (0.003) loss 2.0586 (1.0756) acc 56.2500 (72.9709) lr 2.7103e-04 eta 4:37:23 +epoch [40/50] batch [370/1000] time 1.545 (1.565) data 0.000 (0.003) loss 1.1006 (1.0763) acc 68.7500 (72.9307) lr 2.7103e-04 eta 4:37:13 +epoch [40/50] batch [375/1000] time 1.557 (1.565) data 0.000 (0.003) loss 1.0068 (1.0767) acc 71.8750 (72.9167) lr 2.7103e-04 eta 4:37:03 +epoch [40/50] batch [380/1000] time 1.554 (1.564) data 0.001 (0.003) loss 1.0449 (1.0769) acc 75.0000 (72.8783) lr 2.7103e-04 eta 4:36:54 +epoch [40/50] batch [385/1000] time 1.554 (1.564) data 0.000 (0.003) loss 1.0010 (1.0793) acc 71.8750 (72.8571) lr 2.7103e-04 eta 4:36:46 +epoch [40/50] batch [390/1000] time 1.541 (1.564) data 0.000 (0.003) loss 1.2334 (1.0783) acc 71.8750 (72.8686) lr 2.7103e-04 eta 4:36:38 +epoch [40/50] batch [395/1000] time 1.604 (1.565) data 0.000 (0.003) loss 1.2490 (1.0802) acc 75.0000 (72.8085) lr 2.7103e-04 eta 4:36:31 +epoch [40/50] batch [400/1000] time 1.585 (1.565) data 0.000 (0.003) loss 1.1416 (1.0773) acc 71.8750 (72.8516) lr 2.7103e-04 eta 4:36:29 +epoch [40/50] batch [405/1000] time 1.566 (1.565) data 0.001 (0.003) loss 0.7070 (1.0774) acc 78.1250 (72.7701) lr 2.7103e-04 eta 4:36:21 +epoch [40/50] batch [410/1000] time 1.535 (1.565) data 0.000 (0.003) loss 1.0234 (1.0785) acc 68.7500 (72.7210) lr 2.7103e-04 eta 4:36:12 +epoch [40/50] batch [415/1000] time 1.572 (1.565) data 0.000 (0.003) loss 0.4355 (1.0748) acc 93.7500 (72.8238) lr 2.7103e-04 eta 4:36:05 +epoch [40/50] batch [420/1000] time 1.589 (1.565) data 0.001 (0.003) loss 1.2236 (1.0774) acc 71.8750 (72.7307) lr 2.7103e-04 eta 4:35:59 +epoch [40/50] batch [425/1000] time 1.546 (1.565) data 0.000 (0.003) loss 1.1562 (1.0773) acc 65.6250 (72.7353) lr 2.7103e-04 eta 4:35:49 +epoch [40/50] batch [430/1000] time 1.567 (1.565) data 0.001 (0.003) loss 1.1582 (1.0771) acc 68.7500 (72.7398) lr 2.7103e-04 eta 4:35:41 +epoch [40/50] batch [435/1000] time 1.562 (1.565) data 0.000 (0.003) loss 1.8027 (1.0777) acc 53.1250 (72.7155) lr 2.7103e-04 eta 4:35:32 +epoch [40/50] batch [440/1000] time 1.565 (1.565) data 0.000 (0.003) loss 0.8960 (1.0798) acc 71.8750 (72.6847) lr 2.7103e-04 eta 4:35:28 +epoch [40/50] batch [445/1000] time 1.582 (1.565) data 0.001 (0.003) loss 1.5674 (1.0817) acc 68.7500 (72.7037) lr 2.7103e-04 eta 4:35:21 +epoch [40/50] batch [450/1000] time 1.558 (1.565) data 0.000 (0.003) loss 1.3203 (1.0825) acc 68.7500 (72.6736) lr 2.7103e-04 eta 4:35:14 +epoch [40/50] batch [455/1000] time 1.560 (1.565) data 0.000 (0.003) loss 0.8906 (1.0806) acc 78.1250 (72.7404) lr 2.7103e-04 eta 4:35:06 +epoch [40/50] batch [460/1000] time 1.576 (1.565) data 0.000 (0.003) loss 1.0977 (1.0788) acc 78.1250 (72.7582) lr 2.7103e-04 eta 4:34:59 +epoch [40/50] batch [465/1000] time 1.550 (1.565) data 0.000 (0.003) loss 1.3057 (1.0788) acc 59.3750 (72.7487) lr 2.7103e-04 eta 4:34:49 +epoch [40/50] batch [470/1000] time 1.580 (1.565) data 0.001 (0.003) loss 0.8887 (1.0785) acc 71.8750 (72.7394) lr 2.7103e-04 eta 4:34:42 +epoch [40/50] batch [475/1000] time 1.583 (1.565) data 0.001 (0.003) loss 1.2637 (1.0786) acc 71.8750 (72.7434) lr 2.7103e-04 eta 4:34:34 +epoch [40/50] batch [480/1000] time 1.542 (1.565) data 0.000 (0.003) loss 1.0352 (1.0778) acc 75.0000 (72.7604) lr 2.7103e-04 eta 4:34:25 +epoch [40/50] batch [485/1000] time 1.563 (1.565) data 0.000 (0.003) loss 0.6860 (1.0777) acc 84.3750 (72.7835) lr 2.7103e-04 eta 4:34:17 +epoch [40/50] batch [490/1000] time 1.594 (1.565) data 0.000 (0.003) loss 0.8735 (1.0799) acc 71.8750 (72.7360) lr 2.7103e-04 eta 4:34:11 +epoch [40/50] batch [495/1000] time 1.560 (1.565) data 0.001 (0.003) loss 1.5254 (1.0805) acc 65.6250 (72.7462) lr 2.7103e-04 eta 4:34:03 +epoch [40/50] batch [500/1000] time 1.571 (1.565) data 0.000 (0.003) loss 0.9683 (1.0791) acc 68.7500 (72.7875) lr 2.7103e-04 eta 4:33:54 +epoch [40/50] batch [505/1000] time 1.591 (1.566) data 0.001 (0.003) loss 1.1738 (1.0788) acc 68.7500 (72.8156) lr 2.7103e-04 eta 4:33:51 +epoch [40/50] batch [510/1000] time 1.577 (1.566) data 0.001 (0.002) loss 0.8701 (1.0797) acc 78.1250 (72.7880) lr 2.7103e-04 eta 4:33:43 +epoch [40/50] batch [515/1000] time 1.568 (1.566) data 0.001 (0.002) loss 0.4233 (1.0787) acc 93.7500 (72.8398) lr 2.7103e-04 eta 4:33:37 +epoch [40/50] batch [520/1000] time 1.580 (1.566) data 0.001 (0.002) loss 1.7090 (1.0796) acc 59.3750 (72.7885) lr 2.7103e-04 eta 4:33:31 +epoch [40/50] batch [525/1000] time 1.547 (1.566) data 0.001 (0.002) loss 0.8745 (1.0796) acc 71.8750 (72.7976) lr 2.7103e-04 eta 4:33:22 +epoch [40/50] batch [530/1000] time 1.600 (1.566) data 0.000 (0.002) loss 1.4160 (1.0808) acc 65.6250 (72.7948) lr 2.7103e-04 eta 4:33:16 +epoch [40/50] batch [535/1000] time 1.545 (1.566) data 0.000 (0.002) loss 1.1260 (1.0795) acc 71.8750 (72.8271) lr 2.7103e-04 eta 4:33:07 +epoch [40/50] batch [540/1000] time 1.575 (1.566) data 0.000 (0.002) loss 0.8789 (1.0791) acc 71.8750 (72.8414) lr 2.7103e-04 eta 4:32:59 +epoch [40/50] batch [545/1000] time 1.591 (1.566) data 0.000 (0.002) loss 0.8555 (1.0789) acc 75.0000 (72.8383) lr 2.7103e-04 eta 4:32:51 +epoch [40/50] batch [550/1000] time 1.557 (1.566) data 0.001 (0.002) loss 1.2910 (1.0800) acc 71.8750 (72.8182) lr 2.7103e-04 eta 4:32:48 +epoch [40/50] batch [555/1000] time 1.559 (1.566) data 0.001 (0.002) loss 1.0186 (1.0797) acc 71.8750 (72.8378) lr 2.7103e-04 eta 4:32:39 +epoch [40/50] batch [560/1000] time 1.579 (1.566) data 0.001 (0.002) loss 0.7886 (1.0784) acc 78.1250 (72.8683) lr 2.7103e-04 eta 4:32:30 +epoch [40/50] batch [565/1000] time 1.562 (1.566) data 0.001 (0.002) loss 0.5884 (1.0769) acc 78.1250 (72.8706) lr 2.7103e-04 eta 4:32:22 +epoch [40/50] batch [570/1000] time 1.558 (1.566) data 0.000 (0.002) loss 1.1924 (1.0765) acc 71.8750 (72.8728) lr 2.7103e-04 eta 4:32:14 +epoch [40/50] batch [575/1000] time 1.570 (1.566) data 0.001 (0.002) loss 0.9111 (1.0763) acc 78.1250 (72.8696) lr 2.7103e-04 eta 4:32:05 +epoch [40/50] batch [580/1000] time 1.582 (1.566) data 0.001 (0.002) loss 1.3857 (1.0798) acc 75.0000 (72.7963) lr 2.7103e-04 eta 4:31:58 +epoch [40/50] batch [585/1000] time 1.558 (1.566) data 0.000 (0.002) loss 0.9351 (1.0811) acc 78.1250 (72.7778) lr 2.7103e-04 eta 4:31:50 +epoch [40/50] batch [590/1000] time 1.735 (1.566) data 0.000 (0.002) loss 0.7778 (1.0821) acc 71.8750 (72.7701) lr 2.7103e-04 eta 4:31:45 +epoch [40/50] batch [595/1000] time 1.541 (1.566) data 0.001 (0.002) loss 1.0654 (1.0820) acc 75.0000 (72.7941) lr 2.7103e-04 eta 4:31:35 +epoch [40/50] batch [600/1000] time 1.572 (1.566) data 0.001 (0.002) loss 0.4854 (1.0810) acc 90.6250 (72.8281) lr 2.7103e-04 eta 4:31:28 +epoch [40/50] batch [605/1000] time 1.534 (1.566) data 0.001 (0.002) loss 1.2744 (1.0820) acc 65.6250 (72.7893) lr 2.7103e-04 eta 4:31:19 +epoch [40/50] batch [610/1000] time 1.589 (1.566) data 0.001 (0.002) loss 1.3047 (1.0838) acc 56.2500 (72.7254) lr 2.7103e-04 eta 4:31:12 +epoch [40/50] batch [615/1000] time 1.571 (1.566) data 0.000 (0.002) loss 1.2998 (1.0828) acc 65.6250 (72.7388) lr 2.7103e-04 eta 4:31:04 +epoch [40/50] batch [620/1000] time 1.565 (1.566) data 0.000 (0.002) loss 0.8623 (1.0820) acc 78.1250 (72.7520) lr 2.7103e-04 eta 4:30:56 +epoch [40/50] batch [625/1000] time 1.538 (1.566) data 0.001 (0.002) loss 1.2607 (1.0826) acc 68.7500 (72.7300) lr 2.7103e-04 eta 4:30:47 +epoch [40/50] batch [630/1000] time 1.576 (1.566) data 0.000 (0.002) loss 0.8076 (1.0814) acc 75.0000 (72.7530) lr 2.7103e-04 eta 4:30:39 +epoch [40/50] batch [635/1000] time 1.545 (1.566) data 0.000 (0.002) loss 1.2188 (1.0815) acc 75.0000 (72.7904) lr 2.7103e-04 eta 4:30:30 +epoch [40/50] batch [640/1000] time 1.570 (1.566) data 0.001 (0.002) loss 0.7808 (1.0808) acc 81.2500 (72.8174) lr 2.7103e-04 eta 4:30:23 +epoch [40/50] batch [645/1000] time 1.573 (1.566) data 0.001 (0.002) loss 1.1406 (1.0812) acc 68.7500 (72.7859) lr 2.7103e-04 eta 4:30:15 +epoch [40/50] batch [650/1000] time 1.554 (1.566) data 0.000 (0.002) loss 0.7866 (1.0823) acc 84.3750 (72.7356) lr 2.7103e-04 eta 4:30:06 +epoch [40/50] batch [655/1000] time 1.572 (1.566) data 0.001 (0.002) loss 1.3564 (1.0809) acc 71.8750 (72.7767) lr 2.7103e-04 eta 4:30:00 +epoch [40/50] batch [660/1000] time 1.576 (1.566) data 0.001 (0.002) loss 1.3525 (1.0811) acc 65.6250 (72.7652) lr 2.7103e-04 eta 4:29:51 +epoch [40/50] batch [665/1000] time 1.569 (1.566) data 0.000 (0.002) loss 1.2227 (1.0801) acc 75.0000 (72.7820) lr 2.7103e-04 eta 4:29:42 +epoch [40/50] batch [670/1000] time 1.566 (1.566) data 0.000 (0.002) loss 0.7808 (1.0784) acc 78.1250 (72.8125) lr 2.7103e-04 eta 4:29:35 +epoch [40/50] batch [675/1000] time 1.535 (1.566) data 0.000 (0.002) loss 1.2314 (1.0801) acc 71.8750 (72.7963) lr 2.7103e-04 eta 4:29:26 +epoch [40/50] batch [680/1000] time 1.558 (1.566) data 0.001 (0.002) loss 1.0566 (1.0803) acc 75.0000 (72.7895) lr 2.7103e-04 eta 4:29:19 +epoch [40/50] batch [685/1000] time 1.584 (1.566) data 0.001 (0.002) loss 0.9707 (1.0809) acc 71.8750 (72.7600) lr 2.7103e-04 eta 4:29:11 +epoch [40/50] batch [690/1000] time 1.564 (1.566) data 0.000 (0.002) loss 2.0996 (1.0811) acc 53.1250 (72.7763) lr 2.7103e-04 eta 4:29:04 +epoch [40/50] batch [695/1000] time 1.578 (1.566) data 0.001 (0.002) loss 0.4526 (1.0803) acc 84.3750 (72.7968) lr 2.7103e-04 eta 4:28:57 +epoch [40/50] batch [700/1000] time 1.570 (1.566) data 0.001 (0.002) loss 1.1797 (1.0795) acc 75.0000 (72.8259) lr 2.7103e-04 eta 4:28:51 +epoch [40/50] batch [705/1000] time 1.569 (1.566) data 0.001 (0.002) loss 0.9370 (1.0798) acc 68.7500 (72.8413) lr 2.7103e-04 eta 4:28:44 +epoch [40/50] batch [710/1000] time 1.564 (1.566) data 0.000 (0.002) loss 1.6494 (1.0809) acc 56.2500 (72.8345) lr 2.7103e-04 eta 4:28:36 +epoch [40/50] batch [715/1000] time 1.541 (1.566) data 0.000 (0.002) loss 0.9731 (1.0799) acc 71.8750 (72.8497) lr 2.7103e-04 eta 4:28:28 +epoch [40/50] batch [720/1000] time 1.553 (1.566) data 0.000 (0.002) loss 0.7803 (1.0799) acc 75.0000 (72.8472) lr 2.7103e-04 eta 4:28:20 +epoch [40/50] batch [725/1000] time 1.570 (1.566) data 0.000 (0.002) loss 1.3555 (1.0801) acc 65.6250 (72.8362) lr 2.7103e-04 eta 4:28:12 +epoch [40/50] batch [730/1000] time 1.568 (1.566) data 0.001 (0.002) loss 1.1582 (1.0797) acc 78.1250 (72.8510) lr 2.7103e-04 eta 4:28:03 +epoch [40/50] batch [735/1000] time 1.581 (1.566) data 0.000 (0.002) loss 0.7422 (1.0787) acc 75.0000 (72.8656) lr 2.7103e-04 eta 4:27:56 +epoch [40/50] batch [740/1000] time 1.561 (1.566) data 0.000 (0.002) loss 1.1768 (1.0799) acc 78.1250 (72.8378) lr 2.7103e-04 eta 4:27:47 +epoch [40/50] batch [745/1000] time 1.558 (1.566) data 0.001 (0.002) loss 1.2314 (1.0806) acc 62.5000 (72.8020) lr 2.7103e-04 eta 4:27:42 +epoch [40/50] batch [750/1000] time 1.546 (1.566) data 0.000 (0.002) loss 1.3516 (1.0825) acc 65.6250 (72.7667) lr 2.7103e-04 eta 4:27:33 +epoch [40/50] batch [755/1000] time 1.556 (1.566) data 0.000 (0.002) loss 0.9023 (1.0821) acc 71.8750 (72.7608) lr 2.7103e-04 eta 4:27:25 +epoch [40/50] batch [760/1000] time 1.568 (1.566) data 0.001 (0.002) loss 1.8838 (1.0825) acc 56.2500 (72.7508) lr 2.7103e-04 eta 4:27:18 +epoch [40/50] batch [765/1000] time 1.573 (1.566) data 0.001 (0.002) loss 1.1885 (1.0818) acc 62.5000 (72.7614) lr 2.7103e-04 eta 4:27:10 +epoch [40/50] batch [770/1000] time 1.558 (1.566) data 0.000 (0.002) loss 0.5884 (1.0818) acc 84.3750 (72.7881) lr 2.7103e-04 eta 4:27:02 +epoch [40/50] batch [775/1000] time 1.551 (1.566) data 0.000 (0.002) loss 1.0283 (1.0811) acc 71.8750 (72.7944) lr 2.7103e-04 eta 4:26:54 +epoch [40/50] batch [780/1000] time 1.552 (1.566) data 0.001 (0.002) loss 1.0596 (1.0801) acc 71.8750 (72.7965) lr 2.7103e-04 eta 4:26:45 +epoch [40/50] batch [785/1000] time 1.568 (1.566) data 0.000 (0.002) loss 1.5420 (1.0811) acc 68.7500 (72.7906) lr 2.7103e-04 eta 4:26:37 +epoch [40/50] batch [790/1000] time 1.575 (1.566) data 0.000 (0.002) loss 1.2178 (1.0812) acc 81.2500 (72.7769) lr 2.7103e-04 eta 4:26:30 +epoch [40/50] batch [795/1000] time 1.563 (1.566) data 0.000 (0.002) loss 0.8164 (1.0814) acc 78.1250 (72.7909) lr 2.7103e-04 eta 4:26:21 +epoch [40/50] batch [800/1000] time 1.552 (1.566) data 0.001 (0.002) loss 1.3564 (1.0819) acc 71.8750 (72.7891) lr 2.7103e-04 eta 4:26:13 +epoch [40/50] batch [805/1000] time 1.563 (1.566) data 0.000 (0.002) loss 1.1465 (1.0818) acc 78.1250 (72.7834) lr 2.7103e-04 eta 4:26:07 +epoch [40/50] batch [810/1000] time 1.562 (1.566) data 0.001 (0.002) loss 1.1543 (1.0817) acc 71.8750 (72.7894) lr 2.7103e-04 eta 4:25:59 +epoch [40/50] batch [815/1000] time 1.582 (1.566) data 0.000 (0.002) loss 0.8120 (1.0805) acc 75.0000 (72.8106) lr 2.7103e-04 eta 4:25:52 +epoch [40/50] batch [820/1000] time 1.545 (1.566) data 0.001 (0.002) loss 0.9785 (1.0804) acc 71.8750 (72.8087) lr 2.7103e-04 eta 4:25:43 +epoch [40/50] batch [825/1000] time 1.540 (1.566) data 0.000 (0.002) loss 0.9009 (1.0795) acc 87.5000 (72.8258) lr 2.7103e-04 eta 4:25:35 +epoch [40/50] batch [830/1000] time 1.566 (1.566) data 0.000 (0.002) loss 1.1357 (1.0805) acc 78.1250 (72.8200) lr 2.7103e-04 eta 4:25:27 +epoch [40/50] batch [835/1000] time 1.555 (1.566) data 0.000 (0.002) loss 0.8262 (1.0797) acc 78.1250 (72.8256) lr 2.7103e-04 eta 4:25:18 +epoch [40/50] batch [840/1000] time 1.553 (1.566) data 0.000 (0.002) loss 0.5518 (1.0796) acc 90.6250 (72.8385) lr 2.7103e-04 eta 4:25:09 +epoch [40/50] batch [845/1000] time 1.562 (1.566) data 0.000 (0.002) loss 1.0303 (1.0785) acc 65.6250 (72.8624) lr 2.7103e-04 eta 4:25:01 +epoch [40/50] batch [850/1000] time 1.555 (1.566) data 0.000 (0.002) loss 1.6338 (1.0786) acc 71.8750 (72.8676) lr 2.7103e-04 eta 4:24:55 +epoch [40/50] batch [855/1000] time 1.550 (1.566) data 0.000 (0.002) loss 0.8203 (1.0777) acc 78.1250 (72.9094) lr 2.7103e-04 eta 4:24:47 +epoch [40/50] batch [860/1000] time 1.563 (1.566) data 0.000 (0.002) loss 0.7349 (1.0776) acc 81.2500 (72.9324) lr 2.7103e-04 eta 4:24:38 +epoch [40/50] batch [865/1000] time 1.579 (1.566) data 0.000 (0.002) loss 0.6997 (1.0774) acc 84.3750 (72.9408) lr 2.7103e-04 eta 4:24:30 +epoch [40/50] batch [870/1000] time 1.563 (1.566) data 0.000 (0.002) loss 1.0859 (1.0771) acc 71.8750 (72.9418) lr 2.7103e-04 eta 4:24:22 +epoch [40/50] batch [875/1000] time 1.563 (1.566) data 0.001 (0.002) loss 1.0635 (1.0767) acc 71.8750 (72.9607) lr 2.7103e-04 eta 4:24:15 +epoch [40/50] batch [880/1000] time 1.551 (1.566) data 0.001 (0.002) loss 1.6094 (1.0764) acc 75.0000 (72.9865) lr 2.7103e-04 eta 4:24:07 +epoch [40/50] batch [885/1000] time 1.584 (1.566) data 0.000 (0.002) loss 1.7051 (1.0772) acc 62.5000 (72.9555) lr 2.7103e-04 eta 4:23:59 +epoch [40/50] batch [890/1000] time 1.535 (1.566) data 0.000 (0.002) loss 0.8809 (1.0783) acc 71.8750 (72.9494) lr 2.7103e-04 eta 4:23:50 +epoch [40/50] batch [895/1000] time 1.586 (1.566) data 0.001 (0.002) loss 0.9795 (1.0787) acc 81.2500 (72.9504) lr 2.7103e-04 eta 4:23:45 +epoch [40/50] batch [900/1000] time 1.574 (1.566) data 0.000 (0.002) loss 0.8999 (1.0786) acc 87.5000 (72.9653) lr 2.7103e-04 eta 4:23:37 +epoch [40/50] batch [905/1000] time 1.550 (1.566) data 0.001 (0.002) loss 1.3115 (1.0792) acc 68.7500 (72.9420) lr 2.7103e-04 eta 4:23:29 +epoch [40/50] batch [910/1000] time 1.538 (1.566) data 0.000 (0.002) loss 1.0439 (1.0799) acc 62.5000 (72.9293) lr 2.7103e-04 eta 4:23:20 +epoch [40/50] batch [915/1000] time 1.538 (1.566) data 0.001 (0.002) loss 0.7812 (1.0801) acc 87.5000 (72.9337) lr 2.7103e-04 eta 4:23:12 +epoch [40/50] batch [920/1000] time 1.572 (1.566) data 0.000 (0.002) loss 0.5566 (1.0797) acc 78.1250 (72.9280) lr 2.7103e-04 eta 4:23:04 +epoch [40/50] batch [925/1000] time 1.605 (1.566) data 0.001 (0.002) loss 1.2432 (1.0804) acc 75.0000 (72.9122) lr 2.7103e-04 eta 4:22:56 +epoch [40/50] batch [930/1000] time 1.572 (1.566) data 0.001 (0.002) loss 1.2236 (1.0806) acc 59.3750 (72.9066) lr 2.7103e-04 eta 4:22:48 +epoch [40/50] batch [935/1000] time 1.576 (1.566) data 0.000 (0.002) loss 1.2959 (1.0808) acc 65.6250 (72.9044) lr 2.7103e-04 eta 4:22:41 +epoch [40/50] batch [940/1000] time 1.552 (1.566) data 0.000 (0.002) loss 0.7466 (1.0804) acc 78.1250 (72.9156) lr 2.7103e-04 eta 4:22:33 +epoch [40/50] batch [945/1000] time 1.539 (1.566) data 0.000 (0.002) loss 0.9092 (1.0815) acc 81.2500 (72.9134) lr 2.7103e-04 eta 4:22:24 +epoch [40/50] batch [950/1000] time 1.547 (1.566) data 0.000 (0.002) loss 0.9556 (1.0811) acc 65.6250 (72.9145) lr 2.7103e-04 eta 4:22:15 +epoch [40/50] batch [955/1000] time 1.715 (1.566) data 0.000 (0.002) loss 1.4092 (1.0814) acc 65.6250 (72.8959) lr 2.7103e-04 eta 4:22:09 +epoch [40/50] batch [960/1000] time 1.553 (1.566) data 0.000 (0.002) loss 1.2305 (1.0816) acc 65.6250 (72.8939) lr 2.7103e-04 eta 4:22:01 +epoch [40/50] batch [965/1000] time 1.560 (1.566) data 0.000 (0.002) loss 1.4404 (1.0812) acc 59.3750 (72.9210) lr 2.7103e-04 eta 4:21:53 +epoch [40/50] batch [970/1000] time 1.537 (1.566) data 0.000 (0.002) loss 0.6792 (1.0796) acc 87.5000 (72.9639) lr 2.7103e-04 eta 4:21:45 +epoch [40/50] batch [975/1000] time 1.560 (1.566) data 0.000 (0.002) loss 1.8154 (1.0810) acc 56.2500 (72.9199) lr 2.7103e-04 eta 4:21:38 +epoch [40/50] batch [980/1000] time 1.570 (1.566) data 0.000 (0.002) loss 1.2178 (1.0807) acc 65.6250 (72.9050) lr 2.7103e-04 eta 4:21:30 +epoch [40/50] batch [985/1000] time 1.555 (1.566) data 0.001 (0.002) loss 0.7100 (1.0798) acc 78.1250 (72.9156) lr 2.7103e-04 eta 4:21:22 +epoch [40/50] batch [990/1000] time 1.552 (1.566) data 0.000 (0.002) loss 1.0586 (1.0798) acc 71.8750 (72.9135) lr 2.7103e-04 eta 4:21:13 +epoch [40/50] batch [995/1000] time 1.541 (1.566) data 0.000 (0.002) loss 1.0762 (1.0805) acc 78.1250 (72.9083) lr 2.7103e-04 eta 4:21:05 +epoch [40/50] batch [1000/1000] time 1.695 (1.566) data 0.000 (0.001) loss 0.9175 (1.0813) acc 84.3750 (72.8906) lr 2.2949e-04 eta 4:20:59 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,399 +* accuracy: 78.8% +* error: 21.2% +* macro_f1: 78.4% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [41/50] batch [5/1000] time 1.541 (1.700) data 0.000 (0.206) loss 0.4487 (1.1647) acc 87.5000 (75.6250) lr 2.2949e-04 eta 4:43:06 +epoch [41/50] batch [10/1000] time 1.552 (1.633) data 0.000 (0.103) loss 0.8735 (1.1973) acc 78.1250 (74.6875) lr 2.2949e-04 eta 4:31:54 +epoch [41/50] batch [15/1000] time 1.558 (1.610) data 0.001 (0.069) loss 0.7847 (1.0888) acc 75.0000 (74.3750) lr 2.2949e-04 eta 4:27:55 +epoch [41/50] batch [20/1000] time 1.551 (1.596) data 0.000 (0.052) loss 1.0859 (1.0669) acc 68.7500 (73.7500) lr 2.2949e-04 eta 4:25:31 +epoch [41/50] batch [25/1000] time 1.559 (1.590) data 0.000 (0.042) loss 0.9409 (1.1090) acc 84.3750 (74.0000) lr 2.2949e-04 eta 4:24:17 +epoch [41/50] batch [30/1000] time 1.800 (1.593) data 0.001 (0.035) loss 1.0996 (1.1038) acc 68.7500 (73.6458) lr 2.2949e-04 eta 4:24:43 +epoch [41/50] batch [35/1000] time 1.547 (1.590) data 0.001 (0.030) loss 1.2363 (1.0961) acc 71.8750 (73.3929) lr 2.2949e-04 eta 4:23:59 +epoch [41/50] batch [40/1000] time 1.584 (1.587) data 0.001 (0.026) loss 1.4258 (1.1052) acc 75.0000 (73.2031) lr 2.2949e-04 eta 4:23:27 +epoch [41/50] batch [45/1000] time 1.565 (1.584) data 0.001 (0.023) loss 0.9194 (1.1097) acc 78.1250 (72.9167) lr 2.2949e-04 eta 4:22:45 +epoch [41/50] batch [50/1000] time 1.574 (1.581) data 0.001 (0.021) loss 0.7847 (1.0998) acc 75.0000 (73.1875) lr 2.2949e-04 eta 4:22:08 +epoch [41/50] batch [55/1000] time 1.571 (1.579) data 0.000 (0.019) loss 0.8506 (1.1079) acc 75.0000 (73.0682) lr 2.2949e-04 eta 4:21:44 +epoch [41/50] batch [60/1000] time 1.567 (1.577) data 0.000 (0.018) loss 1.5830 (1.1060) acc 59.3750 (72.9688) lr 2.2949e-04 eta 4:21:17 +epoch [41/50] batch [65/1000] time 1.560 (1.576) data 0.000 (0.016) loss 1.6357 (1.1173) acc 75.0000 (73.1250) lr 2.2949e-04 eta 4:20:55 +epoch [41/50] batch [70/1000] time 1.556 (1.574) data 0.000 (0.015) loss 0.7920 (1.1160) acc 81.2500 (73.2589) lr 2.2949e-04 eta 4:20:29 +epoch [41/50] batch [75/1000] time 1.571 (1.573) data 0.000 (0.014) loss 0.9932 (1.1245) acc 78.1250 (72.7500) lr 2.2949e-04 eta 4:20:15 +epoch [41/50] batch [80/1000] time 1.582 (1.573) data 0.001 (0.013) loss 0.4580 (1.1405) acc 81.2500 (72.1875) lr 2.2949e-04 eta 4:20:01 +epoch [41/50] batch [85/1000] time 1.547 (1.572) data 0.001 (0.013) loss 1.2090 (1.1496) acc 68.7500 (72.1324) lr 2.2949e-04 eta 4:19:45 +epoch [41/50] batch [90/1000] time 1.590 (1.571) data 0.000 (0.012) loss 1.1846 (1.1404) acc 75.0000 (72.4306) lr 2.2949e-04 eta 4:19:31 +epoch [41/50] batch [95/1000] time 1.544 (1.573) data 0.001 (0.011) loss 1.1514 (1.1368) acc 71.8750 (72.4671) lr 2.2949e-04 eta 4:19:36 +epoch [41/50] batch [100/1000] time 1.580 (1.572) data 0.000 (0.011) loss 0.8516 (1.1324) acc 81.2500 (72.5938) lr 2.2949e-04 eta 4:19:26 +epoch [41/50] batch [105/1000] time 1.533 (1.571) data 0.000 (0.010) loss 1.6592 (1.1274) acc 65.6250 (72.6786) lr 2.2949e-04 eta 4:19:07 +epoch [41/50] batch [110/1000] time 1.555 (1.571) data 0.001 (0.010) loss 1.0674 (1.1190) acc 68.7500 (72.7841) lr 2.2949e-04 eta 4:18:54 +epoch [41/50] batch [115/1000] time 1.557 (1.570) data 0.000 (0.009) loss 1.2959 (1.1197) acc 68.7500 (72.6630) lr 2.2949e-04 eta 4:18:37 +epoch [41/50] batch [120/1000] time 1.558 (1.570) data 0.001 (0.009) loss 0.7979 (1.1156) acc 75.0000 (72.6562) lr 2.2949e-04 eta 4:18:29 +epoch [41/50] batch [125/1000] time 1.553 (1.570) data 0.000 (0.009) loss 1.3516 (1.1208) acc 75.0000 (72.6250) lr 2.2949e-04 eta 4:18:19 +epoch [41/50] batch [130/1000] time 1.554 (1.569) data 0.000 (0.008) loss 1.4814 (1.1252) acc 71.8750 (72.5240) lr 2.2949e-04 eta 4:18:08 +epoch [41/50] batch [135/1000] time 1.579 (1.569) data 0.001 (0.008) loss 1.7754 (1.1278) acc 65.6250 (72.4769) lr 2.2949e-04 eta 4:17:58 +epoch [41/50] batch [140/1000] time 1.543 (1.570) data 0.000 (0.008) loss 1.2246 (1.1206) acc 75.0000 (72.5446) lr 2.2949e-04 eta 4:17:59 +epoch [41/50] batch [145/1000] time 1.558 (1.569) data 0.000 (0.008) loss 0.4973 (1.1121) acc 90.6250 (72.7371) lr 2.2949e-04 eta 4:17:46 +epoch [41/50] batch [150/1000] time 1.533 (1.569) data 0.000 (0.007) loss 0.7476 (1.1110) acc 81.2500 (72.7917) lr 2.2949e-04 eta 4:17:33 +epoch [41/50] batch [155/1000] time 1.570 (1.568) data 0.000 (0.007) loss 0.9453 (1.1073) acc 75.0000 (72.8831) lr 2.2949e-04 eta 4:17:20 +epoch [41/50] batch [160/1000] time 1.561 (1.568) data 0.000 (0.007) loss 1.5332 (1.1121) acc 68.7500 (72.8125) lr 2.2949e-04 eta 4:17:11 +epoch [41/50] batch [165/1000] time 1.567 (1.568) data 0.000 (0.007) loss 0.9268 (1.1117) acc 78.1250 (72.7083) lr 2.2949e-04 eta 4:17:00 +epoch [41/50] batch [170/1000] time 1.543 (1.567) data 0.001 (0.007) loss 1.0771 (1.1153) acc 68.7500 (72.6654) lr 2.2949e-04 eta 4:16:46 +epoch [41/50] batch [175/1000] time 1.551 (1.567) data 0.000 (0.006) loss 0.9473 (1.1199) acc 78.1250 (72.6607) lr 2.2949e-04 eta 4:16:36 +epoch [41/50] batch [180/1000] time 1.595 (1.567) data 0.000 (0.006) loss 0.6621 (1.1132) acc 81.2500 (72.8299) lr 2.2949e-04 eta 4:16:27 +epoch [41/50] batch [185/1000] time 1.548 (1.567) data 0.000 (0.006) loss 1.3896 (1.1134) acc 71.8750 (72.8378) lr 2.2949e-04 eta 4:16:24 +epoch [41/50] batch [190/1000] time 1.537 (1.567) data 0.000 (0.006) loss 1.0957 (1.1127) acc 71.8750 (72.8783) lr 2.2949e-04 eta 4:16:14 +epoch [41/50] batch [195/1000] time 1.561 (1.567) data 0.001 (0.006) loss 0.7285 (1.1105) acc 84.3750 (72.9808) lr 2.2949e-04 eta 4:16:04 +epoch [41/50] batch [200/1000] time 1.545 (1.567) data 0.001 (0.006) loss 1.1260 (1.1079) acc 65.6250 (73.0000) lr 2.2949e-04 eta 4:15:53 +epoch [41/50] batch [205/1000] time 1.533 (1.566) data 0.001 (0.006) loss 1.0469 (1.1071) acc 71.8750 (73.0640) lr 2.2949e-04 eta 4:15:42 +epoch [41/50] batch [210/1000] time 1.557 (1.566) data 0.000 (0.005) loss 0.8213 (1.1082) acc 81.2500 (73.1101) lr 2.2949e-04 eta 4:15:32 +epoch [41/50] batch [215/1000] time 1.556 (1.566) data 0.000 (0.005) loss 1.2568 (1.1079) acc 75.0000 (73.0669) lr 2.2949e-04 eta 4:15:24 +epoch [41/50] batch [220/1000] time 1.565 (1.566) data 0.000 (0.005) loss 1.3486 (1.1128) acc 68.7500 (73.0398) lr 2.2949e-04 eta 4:15:19 +epoch [41/50] batch [225/1000] time 1.568 (1.566) data 0.001 (0.005) loss 1.2920 (1.1118) acc 56.2500 (72.9583) lr 2.2949e-04 eta 4:15:11 +epoch [41/50] batch [230/1000] time 1.568 (1.566) data 0.001 (0.005) loss 1.3779 (1.1106) acc 78.1250 (73.0435) lr 2.2949e-04 eta 4:15:03 +epoch [41/50] batch [235/1000] time 1.550 (1.566) data 0.001 (0.005) loss 1.0312 (1.1122) acc 71.8750 (73.0053) lr 2.2949e-04 eta 4:14:52 +epoch [41/50] batch [240/1000] time 1.530 (1.566) data 0.000 (0.005) loss 1.0059 (1.1113) acc 78.1250 (73.0208) lr 2.2949e-04 eta 4:14:40 +epoch [41/50] batch [245/1000] time 1.558 (1.566) data 0.000 (0.005) loss 0.8887 (1.1125) acc 75.0000 (72.9974) lr 2.2949e-04 eta 4:14:37 +epoch [41/50] batch [250/1000] time 1.582 (1.566) data 0.001 (0.005) loss 1.0908 (1.1131) acc 75.0000 (72.9625) lr 2.2949e-04 eta 4:14:29 +epoch [41/50] batch [255/1000] time 1.564 (1.566) data 0.001 (0.005) loss 0.7529 (1.1047) acc 71.8750 (73.1495) lr 2.2949e-04 eta 4:14:22 +epoch [41/50] batch [260/1000] time 1.532 (1.566) data 0.001 (0.004) loss 0.9106 (1.1070) acc 78.1250 (73.1130) lr 2.2949e-04 eta 4:14:11 +epoch [41/50] batch [265/1000] time 1.543 (1.566) data 0.001 (0.004) loss 1.0186 (1.1061) acc 78.1250 (73.1250) lr 2.2949e-04 eta 4:14:02 +epoch [41/50] batch [270/1000] time 1.576 (1.566) data 0.000 (0.004) loss 0.8193 (1.1063) acc 71.8750 (73.1597) lr 2.2949e-04 eta 4:13:55 +epoch [41/50] batch [275/1000] time 1.552 (1.566) data 0.001 (0.004) loss 0.8955 (1.1068) acc 71.8750 (73.0568) lr 2.2949e-04 eta 4:13:45 +epoch [41/50] batch [280/1000] time 1.575 (1.566) data 0.001 (0.004) loss 0.7749 (1.1105) acc 78.1250 (72.9464) lr 2.2949e-04 eta 4:13:38 +epoch [41/50] batch [285/1000] time 1.571 (1.566) data 0.004 (0.004) loss 0.6689 (1.1093) acc 84.3750 (73.0044) lr 2.2949e-04 eta 4:13:31 +epoch [41/50] batch [290/1000] time 1.550 (1.566) data 0.001 (0.004) loss 0.8755 (1.1084) acc 68.7500 (72.9957) lr 2.2949e-04 eta 4:13:28 +epoch [41/50] batch [295/1000] time 1.569 (1.566) data 0.001 (0.004) loss 1.2568 (1.1050) acc 62.5000 (72.9449) lr 2.2949e-04 eta 4:13:19 +epoch [41/50] batch [300/1000] time 1.566 (1.566) data 0.000 (0.004) loss 1.1279 (1.1013) acc 71.8750 (72.9479) lr 2.2949e-04 eta 4:13:10 +epoch [41/50] batch [305/1000] time 1.561 (1.566) data 0.001 (0.004) loss 0.9873 (1.0967) acc 75.0000 (73.0225) lr 2.2949e-04 eta 4:13:00 +epoch [41/50] batch [310/1000] time 1.543 (1.566) data 0.000 (0.004) loss 1.2500 (1.1009) acc 56.2500 (72.8730) lr 2.2949e-04 eta 4:12:51 +epoch [41/50] batch [315/1000] time 1.586 (1.566) data 0.000 (0.004) loss 0.9341 (1.1035) acc 78.1250 (72.8075) lr 2.2949e-04 eta 4:12:44 +epoch [41/50] batch [320/1000] time 1.573 (1.566) data 0.000 (0.004) loss 1.0283 (1.1044) acc 68.7500 (72.7734) lr 2.2949e-04 eta 4:12:36 +epoch [41/50] batch [325/1000] time 1.550 (1.566) data 0.000 (0.004) loss 1.4941 (1.1040) acc 68.7500 (72.7885) lr 2.2949e-04 eta 4:12:28 +epoch [41/50] batch [330/1000] time 1.551 (1.566) data 0.001 (0.004) loss 0.6597 (1.1024) acc 84.3750 (72.8504) lr 2.2949e-04 eta 4:12:18 +epoch [41/50] batch [335/1000] time 1.560 (1.566) data 0.001 (0.004) loss 1.5986 (1.1028) acc 71.8750 (72.8731) lr 2.2949e-04 eta 4:12:14 +epoch [41/50] batch [340/1000] time 1.577 (1.566) data 0.000 (0.004) loss 1.0449 (1.1030) acc 75.0000 (72.8401) lr 2.2949e-04 eta 4:12:08 +epoch [41/50] batch [345/1000] time 1.577 (1.566) data 0.000 (0.004) loss 0.8506 (1.1056) acc 78.1250 (72.8080) lr 2.2949e-04 eta 4:12:01 +epoch [41/50] batch [350/1000] time 1.561 (1.566) data 0.000 (0.003) loss 1.0635 (1.1061) acc 75.0000 (72.7857) lr 2.2949e-04 eta 4:11:52 +epoch [41/50] batch [355/1000] time 1.531 (1.566) data 0.001 (0.003) loss 1.0039 (1.1030) acc 68.7500 (72.7993) lr 2.2949e-04 eta 4:11:43 +epoch [41/50] batch [360/1000] time 1.569 (1.566) data 0.001 (0.003) loss 1.2969 (1.1020) acc 71.8750 (72.8559) lr 2.2949e-04 eta 4:11:34 +epoch [41/50] batch [365/1000] time 1.557 (1.566) data 0.000 (0.003) loss 1.3408 (1.1001) acc 71.8750 (72.8767) lr 2.2949e-04 eta 4:11:24 +epoch [41/50] batch [370/1000] time 1.585 (1.566) data 0.000 (0.003) loss 1.0645 (1.0992) acc 75.0000 (72.9139) lr 2.2949e-04 eta 4:11:16 +epoch [41/50] batch [375/1000] time 1.587 (1.566) data 0.000 (0.003) loss 1.8809 (1.0992) acc 56.2500 (72.9167) lr 2.2949e-04 eta 4:11:09 +epoch [41/50] batch [380/1000] time 1.538 (1.566) data 0.000 (0.003) loss 0.7124 (1.0985) acc 84.3750 (72.9441) lr 2.2949e-04 eta 4:11:00 +epoch [41/50] batch [385/1000] time 1.555 (1.565) data 0.000 (0.003) loss 1.6504 (1.1006) acc 59.3750 (72.8653) lr 2.2949e-04 eta 4:10:50 +epoch [41/50] batch [390/1000] time 1.582 (1.565) data 0.000 (0.003) loss 0.9160 (1.1010) acc 81.2500 (72.8285) lr 2.2949e-04 eta 4:10:43 +epoch [41/50] batch [395/1000] time 1.712 (1.566) data 0.000 (0.003) loss 1.2188 (1.1005) acc 62.5000 (72.7927) lr 2.2949e-04 eta 4:10:38 +epoch [41/50] batch [400/1000] time 1.566 (1.566) data 0.000 (0.003) loss 0.9990 (1.0980) acc 78.1250 (72.8672) lr 2.2949e-04 eta 4:10:30 +epoch [41/50] batch [405/1000] time 1.570 (1.566) data 0.000 (0.003) loss 1.3477 (1.0968) acc 71.8750 (72.8781) lr 2.2949e-04 eta 4:10:21 +epoch [41/50] batch [410/1000] time 1.578 (1.566) data 0.000 (0.003) loss 0.6406 (1.0951) acc 81.2500 (72.8811) lr 2.2949e-04 eta 4:10:14 +epoch [41/50] batch [415/1000] time 1.548 (1.566) data 0.000 (0.003) loss 1.0332 (1.0979) acc 75.0000 (72.7861) lr 2.2949e-04 eta 4:10:06 +epoch [41/50] batch [420/1000] time 1.547 (1.565) data 0.000 (0.003) loss 0.4172 (1.0961) acc 87.5000 (72.8348) lr 2.2949e-04 eta 4:09:57 +epoch [41/50] batch [425/1000] time 1.588 (1.566) data 0.000 (0.003) loss 0.8662 (1.0954) acc 71.8750 (72.8529) lr 2.2949e-04 eta 4:09:49 +epoch [41/50] batch [430/1000] time 1.559 (1.566) data 0.000 (0.003) loss 1.1279 (1.0945) acc 68.7500 (72.8634) lr 2.2949e-04 eta 4:09:43 +epoch [41/50] batch [435/1000] time 1.561 (1.566) data 0.001 (0.003) loss 0.5020 (1.0926) acc 81.2500 (72.8736) lr 2.2949e-04 eta 4:09:36 +epoch [41/50] batch [440/1000] time 1.726 (1.566) data 0.000 (0.003) loss 0.8159 (1.0919) acc 75.0000 (72.8622) lr 2.2949e-04 eta 4:09:31 +epoch [41/50] batch [445/1000] time 1.584 (1.566) data 0.001 (0.003) loss 0.8706 (1.0921) acc 78.1250 (72.8792) lr 2.2949e-04 eta 4:09:24 +epoch [41/50] batch [450/1000] time 1.558 (1.566) data 0.000 (0.003) loss 0.8389 (1.0920) acc 75.0000 (72.8403) lr 2.2949e-04 eta 4:09:17 +epoch [41/50] batch [455/1000] time 1.576 (1.566) data 0.001 (0.003) loss 0.9883 (1.0923) acc 75.0000 (72.8297) lr 2.2949e-04 eta 4:09:09 +epoch [41/50] batch [460/1000] time 1.544 (1.566) data 0.001 (0.003) loss 1.4150 (1.0930) acc 59.3750 (72.8125) lr 2.2949e-04 eta 4:09:01 +epoch [41/50] batch [465/1000] time 1.569 (1.566) data 0.001 (0.003) loss 1.0557 (1.0918) acc 71.8750 (72.7890) lr 2.2949e-04 eta 4:08:53 +epoch [41/50] batch [470/1000] time 1.554 (1.566) data 0.001 (0.003) loss 1.3184 (1.0958) acc 78.1250 (72.7261) lr 2.2949e-04 eta 4:08:44 +epoch [41/50] batch [475/1000] time 1.540 (1.566) data 0.000 (0.003) loss 0.7710 (1.0958) acc 81.2500 (72.7171) lr 2.2949e-04 eta 4:08:35 +epoch [41/50] batch [480/1000] time 1.554 (1.566) data 0.000 (0.003) loss 1.4307 (1.0958) acc 71.8750 (72.7409) lr 2.2949e-04 eta 4:08:26 +epoch [41/50] batch [485/1000] time 1.577 (1.566) data 0.000 (0.003) loss 0.8418 (1.0955) acc 78.1250 (72.7448) lr 2.2949e-04 eta 4:08:21 +epoch [41/50] batch [490/1000] time 1.569 (1.566) data 0.001 (0.003) loss 0.9639 (1.0947) acc 78.1250 (72.7934) lr 2.2949e-04 eta 4:08:14 +epoch [41/50] batch [495/1000] time 1.566 (1.566) data 0.001 (0.003) loss 0.7974 (1.0955) acc 75.0000 (72.7652) lr 2.2949e-04 eta 4:08:06 +epoch [41/50] batch [500/1000] time 1.541 (1.566) data 0.000 (0.003) loss 1.3154 (1.0954) acc 68.7500 (72.7438) lr 2.2949e-04 eta 4:07:57 +epoch [41/50] batch [505/1000] time 1.555 (1.566) data 0.001 (0.003) loss 0.9092 (1.0934) acc 71.8750 (72.7847) lr 2.2949e-04 eta 4:07:48 +epoch [41/50] batch [510/1000] time 1.534 (1.566) data 0.001 (0.003) loss 0.8896 (1.0922) acc 75.0000 (72.7941) lr 2.2949e-04 eta 4:07:38 +epoch [41/50] batch [515/1000] time 1.570 (1.566) data 0.000 (0.003) loss 1.3057 (1.0933) acc 68.7500 (72.7609) lr 2.2949e-04 eta 4:07:29 +epoch [41/50] batch [520/1000] time 1.560 (1.565) data 0.000 (0.002) loss 1.1465 (1.0951) acc 65.6250 (72.7163) lr 2.2949e-04 eta 4:07:20 +epoch [41/50] batch [525/1000] time 1.560 (1.565) data 0.000 (0.002) loss 0.6143 (1.0937) acc 84.3750 (72.7619) lr 2.2949e-04 eta 4:07:11 +epoch [41/50] batch [530/1000] time 1.545 (1.565) data 0.001 (0.002) loss 0.7397 (1.0909) acc 84.3750 (72.8007) lr 2.2949e-04 eta 4:07:02 +epoch [41/50] batch [535/1000] time 1.573 (1.565) data 0.000 (0.002) loss 1.5879 (1.0931) acc 62.5000 (72.7395) lr 2.2949e-04 eta 4:06:54 +epoch [41/50] batch [540/1000] time 1.538 (1.565) data 0.000 (0.002) loss 1.0664 (1.0944) acc 75.0000 (72.7373) lr 2.2949e-04 eta 4:06:45 +epoch [41/50] batch [545/1000] time 1.578 (1.565) data 0.000 (0.002) loss 1.6934 (1.0956) acc 62.5000 (72.7007) lr 2.2949e-04 eta 4:06:37 +epoch [41/50] batch [550/1000] time 1.582 (1.565) data 0.000 (0.002) loss 1.1338 (1.0950) acc 71.8750 (72.7102) lr 2.2949e-04 eta 4:06:32 +epoch [41/50] batch [555/1000] time 1.551 (1.565) data 0.001 (0.002) loss 0.6914 (1.0941) acc 78.1250 (72.7083) lr 2.2949e-04 eta 4:06:23 +epoch [41/50] batch [560/1000] time 1.521 (1.565) data 0.001 (0.002) loss 1.4785 (1.0925) acc 71.8750 (72.7400) lr 2.2949e-04 eta 4:06:13 +epoch [41/50] batch [565/1000] time 1.550 (1.565) data 0.000 (0.002) loss 1.0576 (1.0951) acc 78.1250 (72.6770) lr 2.2949e-04 eta 4:06:06 +epoch [41/50] batch [570/1000] time 1.551 (1.565) data 0.000 (0.002) loss 1.3076 (1.0954) acc 71.8750 (72.6754) lr 2.2949e-04 eta 4:05:58 +epoch [41/50] batch [575/1000] time 1.559 (1.565) data 0.000 (0.002) loss 1.7705 (1.0964) acc 53.1250 (72.6630) lr 2.2949e-04 eta 4:05:49 +epoch [41/50] batch [580/1000] time 1.585 (1.565) data 0.000 (0.002) loss 0.8589 (1.0973) acc 81.2500 (72.6886) lr 2.2949e-04 eta 4:05:41 +epoch [41/50] batch [585/1000] time 1.600 (1.565) data 0.000 (0.002) loss 0.7905 (1.0954) acc 81.2500 (72.7404) lr 2.2949e-04 eta 4:05:34 +epoch [41/50] batch [590/1000] time 1.565 (1.565) data 0.000 (0.002) loss 0.4590 (1.0956) acc 90.6250 (72.7489) lr 2.2949e-04 eta 4:05:27 +epoch [41/50] batch [595/1000] time 1.558 (1.565) data 0.000 (0.002) loss 0.7202 (1.0956) acc 78.1250 (72.7468) lr 2.2949e-04 eta 4:05:20 +epoch [41/50] batch [600/1000] time 1.558 (1.565) data 0.000 (0.002) loss 1.2891 (1.0971) acc 65.6250 (72.7135) lr 2.2949e-04 eta 4:05:12 +epoch [41/50] batch [605/1000] time 1.554 (1.565) data 0.000 (0.002) loss 1.5264 (1.0972) acc 65.6250 (72.7169) lr 2.2949e-04 eta 4:05:03 +epoch [41/50] batch [610/1000] time 1.562 (1.565) data 0.001 (0.002) loss 1.4229 (1.0985) acc 65.6250 (72.6793) lr 2.2949e-04 eta 4:04:54 +epoch [41/50] batch [615/1000] time 1.540 (1.565) data 0.000 (0.002) loss 0.9814 (1.0979) acc 75.0000 (72.7185) lr 2.2949e-04 eta 4:04:46 +epoch [41/50] batch [620/1000] time 1.577 (1.565) data 0.000 (0.002) loss 1.7344 (1.0994) acc 62.5000 (72.6966) lr 2.2949e-04 eta 4:04:37 +epoch [41/50] batch [625/1000] time 1.552 (1.565) data 0.001 (0.002) loss 0.5664 (1.0992) acc 81.2500 (72.7100) lr 2.2949e-04 eta 4:04:28 +epoch [41/50] batch [630/1000] time 1.562 (1.565) data 0.001 (0.002) loss 1.0703 (1.0983) acc 68.7500 (72.7133) lr 2.2949e-04 eta 4:04:20 +epoch [41/50] batch [635/1000] time 1.558 (1.565) data 0.001 (0.002) loss 0.8213 (1.0960) acc 90.6250 (72.7854) lr 2.2949e-04 eta 4:04:14 +epoch [41/50] batch [640/1000] time 1.550 (1.565) data 0.001 (0.002) loss 0.8320 (1.0936) acc 75.0000 (72.8369) lr 2.2949e-04 eta 4:04:06 +epoch [41/50] batch [645/1000] time 1.552 (1.565) data 0.000 (0.002) loss 1.0156 (1.0932) acc 71.8750 (72.8295) lr 2.2949e-04 eta 4:03:58 +epoch [41/50] batch [650/1000] time 1.574 (1.565) data 0.000 (0.002) loss 1.0332 (1.0947) acc 81.2500 (72.8077) lr 2.2949e-04 eta 4:03:49 +epoch [41/50] batch [655/1000] time 1.561 (1.565) data 0.001 (0.002) loss 1.4121 (1.0938) acc 59.3750 (72.8006) lr 2.2949e-04 eta 4:03:42 +epoch [41/50] batch [660/1000] time 1.601 (1.565) data 0.001 (0.002) loss 0.9683 (1.0956) acc 68.7500 (72.7794) lr 2.2949e-04 eta 4:03:35 +epoch [41/50] batch [665/1000] time 1.561 (1.565) data 0.001 (0.002) loss 0.6558 (1.0938) acc 75.0000 (72.7914) lr 2.2949e-04 eta 4:03:27 +epoch [41/50] batch [670/1000] time 1.576 (1.565) data 0.000 (0.002) loss 0.9980 (1.0942) acc 68.7500 (72.7705) lr 2.2949e-04 eta 4:03:19 +epoch [41/50] batch [675/1000] time 1.555 (1.565) data 0.000 (0.002) loss 0.5347 (1.0938) acc 84.3750 (72.7731) lr 2.2949e-04 eta 4:03:11 +epoch [41/50] batch [680/1000] time 1.570 (1.565) data 0.000 (0.002) loss 0.9585 (1.0927) acc 75.0000 (72.7711) lr 2.2949e-04 eta 4:03:03 +epoch [41/50] batch [685/1000] time 1.566 (1.565) data 0.000 (0.002) loss 0.6328 (1.0915) acc 84.3750 (72.7874) lr 2.2949e-04 eta 4:02:56 +epoch [41/50] batch [690/1000] time 1.572 (1.565) data 0.000 (0.002) loss 1.0654 (1.0907) acc 78.1250 (72.7989) lr 2.2949e-04 eta 4:02:48 +epoch [41/50] batch [695/1000] time 1.544 (1.565) data 0.001 (0.002) loss 1.1650 (1.0916) acc 62.5000 (72.7608) lr 2.2949e-04 eta 4:02:39 +epoch [41/50] batch [700/1000] time 1.562 (1.565) data 0.000 (0.002) loss 1.0928 (1.0913) acc 65.6250 (72.7679) lr 2.2949e-04 eta 4:02:32 +epoch [41/50] batch [705/1000] time 1.585 (1.565) data 0.000 (0.002) loss 0.4487 (1.0931) acc 90.6250 (72.7216) lr 2.2949e-04 eta 4:02:25 +epoch [41/50] batch [710/1000] time 1.567 (1.565) data 0.000 (0.002) loss 0.9258 (1.0922) acc 78.1250 (72.7421) lr 2.2949e-04 eta 4:02:18 +epoch [41/50] batch [715/1000] time 1.586 (1.565) data 0.000 (0.002) loss 0.8843 (1.0909) acc 75.0000 (72.7448) lr 2.2949e-04 eta 4:02:10 +epoch [41/50] batch [720/1000] time 1.581 (1.565) data 0.001 (0.002) loss 0.6362 (1.0908) acc 87.5000 (72.7257) lr 2.2949e-04 eta 4:02:03 +epoch [41/50] batch [725/1000] time 1.541 (1.565) data 0.000 (0.002) loss 1.5938 (1.0915) acc 71.8750 (72.7026) lr 2.2949e-04 eta 4:01:55 +epoch [41/50] batch [730/1000] time 1.532 (1.565) data 0.001 (0.002) loss 0.5762 (1.0910) acc 87.5000 (72.7183) lr 2.2949e-04 eta 4:01:45 +epoch [41/50] batch [735/1000] time 1.564 (1.565) data 0.000 (0.002) loss 1.4629 (1.0913) acc 75.0000 (72.7253) lr 2.2949e-04 eta 4:01:37 +epoch [41/50] batch [740/1000] time 1.571 (1.565) data 0.001 (0.002) loss 0.3674 (1.0897) acc 90.6250 (72.7703) lr 2.2949e-04 eta 4:01:29 +epoch [41/50] batch [745/1000] time 1.559 (1.565) data 0.001 (0.002) loss 0.4954 (1.0890) acc 87.5000 (72.8020) lr 2.2949e-04 eta 4:01:24 +epoch [41/50] batch [750/1000] time 1.557 (1.565) data 0.000 (0.002) loss 0.6211 (1.0873) acc 84.3750 (72.8250) lr 2.2949e-04 eta 4:01:16 +epoch [41/50] batch [755/1000] time 1.562 (1.565) data 0.000 (0.002) loss 1.1816 (1.0875) acc 65.6250 (72.8311) lr 2.2949e-04 eta 4:01:08 +epoch [41/50] batch [760/1000] time 1.563 (1.565) data 0.000 (0.002) loss 0.9717 (1.0872) acc 81.2500 (72.8577) lr 2.2949e-04 eta 4:00:59 +epoch [41/50] batch [765/1000] time 1.580 (1.565) data 0.000 (0.002) loss 1.6523 (1.0873) acc 68.7500 (72.8472) lr 2.2949e-04 eta 4:00:52 +epoch [41/50] batch [770/1000] time 1.562 (1.565) data 0.000 (0.002) loss 0.6519 (1.0860) acc 78.1250 (72.8693) lr 2.2949e-04 eta 4:00:44 +epoch [41/50] batch [775/1000] time 1.543 (1.565) data 0.000 (0.002) loss 1.0996 (1.0853) acc 59.3750 (72.8427) lr 2.2949e-04 eta 4:00:35 +epoch [41/50] batch [780/1000] time 1.565 (1.565) data 0.001 (0.002) loss 0.9854 (1.0850) acc 78.1250 (72.8446) lr 2.2949e-04 eta 4:00:27 +epoch [41/50] batch [785/1000] time 1.723 (1.565) data 0.001 (0.002) loss 1.1602 (1.0843) acc 78.1250 (72.8543) lr 2.2949e-04 eta 4:00:20 +epoch [41/50] batch [790/1000] time 1.554 (1.565) data 0.001 (0.002) loss 1.3125 (1.0836) acc 71.8750 (72.8600) lr 2.2949e-04 eta 4:00:12 +epoch [41/50] batch [795/1000] time 1.564 (1.565) data 0.001 (0.002) loss 1.2529 (1.0840) acc 59.3750 (72.8420) lr 2.2949e-04 eta 4:00:05 +epoch [41/50] batch [800/1000] time 1.535 (1.565) data 0.001 (0.002) loss 1.5098 (1.0843) acc 68.7500 (72.8281) lr 2.2949e-04 eta 3:59:56 +epoch [41/50] batch [805/1000] time 1.560 (1.565) data 0.000 (0.002) loss 1.0654 (1.0849) acc 78.1250 (72.8028) lr 2.2949e-04 eta 3:59:47 +epoch [41/50] batch [810/1000] time 1.548 (1.565) data 0.000 (0.002) loss 1.6865 (1.0855) acc 59.3750 (72.8009) lr 2.2949e-04 eta 3:59:39 +epoch [41/50] batch [815/1000] time 1.536 (1.565) data 0.001 (0.002) loss 1.1904 (1.0867) acc 65.6250 (72.7569) lr 2.2949e-04 eta 3:59:31 +epoch [41/50] batch [820/1000] time 1.552 (1.565) data 0.000 (0.002) loss 0.4507 (1.0859) acc 90.6250 (72.7973) lr 2.2949e-04 eta 3:59:23 +epoch [41/50] batch [825/1000] time 1.566 (1.565) data 0.000 (0.002) loss 0.8940 (1.0867) acc 84.3750 (72.7652) lr 2.2949e-04 eta 3:59:14 +epoch [41/50] batch [830/1000] time 1.545 (1.564) data 0.000 (0.002) loss 0.9976 (1.0862) acc 65.6250 (72.7560) lr 2.2949e-04 eta 3:59:06 +epoch [41/50] batch [835/1000] time 1.550 (1.564) data 0.000 (0.002) loss 1.0898 (1.0860) acc 68.7500 (72.7507) lr 2.2949e-04 eta 3:58:57 +epoch [41/50] batch [840/1000] time 1.549 (1.564) data 0.000 (0.002) loss 1.4844 (1.0869) acc 62.5000 (72.7455) lr 2.2949e-04 eta 3:58:50 +epoch [41/50] batch [845/1000] time 1.572 (1.564) data 0.001 (0.002) loss 1.0137 (1.0874) acc 71.8750 (72.7404) lr 2.2949e-04 eta 3:58:42 +epoch [41/50] batch [850/1000] time 1.565 (1.565) data 0.000 (0.002) loss 0.8740 (1.0868) acc 71.8750 (72.7500) lr 2.2949e-04 eta 3:58:36 +epoch [41/50] batch [855/1000] time 1.559 (1.565) data 0.000 (0.002) loss 1.1729 (1.0874) acc 75.0000 (72.7449) lr 2.2949e-04 eta 3:58:28 +epoch [41/50] batch [860/1000] time 1.545 (1.565) data 0.000 (0.002) loss 1.0459 (1.0875) acc 68.7500 (72.7253) lr 2.2949e-04 eta 3:58:20 +epoch [41/50] batch [865/1000] time 1.558 (1.565) data 0.000 (0.002) loss 0.9277 (1.0871) acc 81.2500 (72.7421) lr 2.2949e-04 eta 3:58:12 +epoch [41/50] batch [870/1000] time 1.574 (1.565) data 0.001 (0.002) loss 0.8745 (1.0875) acc 78.1250 (72.7550) lr 2.2949e-04 eta 3:58:05 +epoch [41/50] batch [875/1000] time 1.574 (1.565) data 0.000 (0.002) loss 0.9775 (1.0888) acc 71.8750 (72.7321) lr 2.2949e-04 eta 3:57:57 +epoch [41/50] batch [880/1000] time 1.547 (1.565) data 0.000 (0.002) loss 1.5156 (1.0898) acc 65.6250 (72.7095) lr 2.2949e-04 eta 3:57:49 +epoch [41/50] batch [885/1000] time 1.565 (1.565) data 0.001 (0.002) loss 1.1514 (1.0893) acc 75.0000 (72.7366) lr 2.2949e-04 eta 3:57:41 +epoch [41/50] batch [890/1000] time 1.563 (1.565) data 0.001 (0.002) loss 1.1387 (1.0882) acc 75.0000 (72.7353) lr 2.2949e-04 eta 3:57:33 +epoch [41/50] batch [895/1000] time 1.532 (1.565) data 0.001 (0.002) loss 0.7930 (1.0879) acc 81.2500 (72.7270) lr 2.2949e-04 eta 3:57:27 +epoch [41/50] batch [900/1000] time 1.578 (1.565) data 0.001 (0.002) loss 1.1768 (1.0879) acc 78.1250 (72.7396) lr 2.2949e-04 eta 3:57:19 +epoch [41/50] batch [905/1000] time 1.559 (1.565) data 0.001 (0.002) loss 0.9722 (1.0873) acc 78.1250 (72.7279) lr 2.2949e-04 eta 3:57:10 +epoch [41/50] batch [910/1000] time 1.559 (1.565) data 0.001 (0.002) loss 0.6440 (1.0862) acc 87.5000 (72.7644) lr 2.2949e-04 eta 3:57:02 +epoch [41/50] batch [915/1000] time 1.570 (1.565) data 0.001 (0.002) loss 0.6646 (1.0855) acc 81.2500 (72.7937) lr 2.2949e-04 eta 3:56:54 +epoch [41/50] batch [920/1000] time 1.539 (1.565) data 0.001 (0.002) loss 1.2070 (1.0863) acc 75.0000 (72.7785) lr 2.2949e-04 eta 3:56:46 +epoch [41/50] batch [925/1000] time 1.570 (1.565) data 0.000 (0.002) loss 1.7715 (1.0864) acc 62.5000 (72.7703) lr 2.2949e-04 eta 3:56:38 +epoch [41/50] batch [930/1000] time 1.564 (1.565) data 0.000 (0.002) loss 0.9199 (1.0867) acc 78.1250 (72.7890) lr 2.2949e-04 eta 3:56:30 +epoch [41/50] batch [935/1000] time 1.549 (1.565) data 0.001 (0.002) loss 1.0508 (1.0872) acc 75.0000 (72.7741) lr 2.2949e-04 eta 3:56:22 +epoch [41/50] batch [940/1000] time 1.590 (1.565) data 0.000 (0.002) loss 1.2119 (1.0859) acc 75.0000 (72.7959) lr 2.2949e-04 eta 3:56:16 +epoch [41/50] batch [945/1000] time 1.575 (1.565) data 0.000 (0.002) loss 0.8628 (1.0848) acc 75.0000 (72.8075) lr 2.2949e-04 eta 3:56:08 +epoch [41/50] batch [950/1000] time 1.552 (1.565) data 0.000 (0.002) loss 1.1592 (1.0846) acc 62.5000 (72.8158) lr 2.2949e-04 eta 3:56:00 +epoch [41/50] batch [955/1000] time 1.561 (1.565) data 0.000 (0.002) loss 0.8716 (1.0842) acc 71.8750 (72.8076) lr 2.2949e-04 eta 3:55:52 +epoch [41/50] batch [960/1000] time 1.547 (1.565) data 0.000 (0.002) loss 0.6753 (1.0837) acc 84.3750 (72.8385) lr 2.2949e-04 eta 3:55:44 +epoch [41/50] batch [965/1000] time 1.575 (1.565) data 0.000 (0.002) loss 1.4844 (1.0840) acc 68.7500 (72.8530) lr 2.2949e-04 eta 3:55:36 +epoch [41/50] batch [970/1000] time 1.547 (1.565) data 0.000 (0.002) loss 0.6445 (1.0829) acc 81.2500 (72.8705) lr 2.2949e-04 eta 3:55:27 +epoch [41/50] batch [975/1000] time 1.586 (1.565) data 0.000 (0.002) loss 0.9570 (1.0839) acc 71.8750 (72.8494) lr 2.2949e-04 eta 3:55:19 +epoch [41/50] batch [980/1000] time 1.560 (1.565) data 0.000 (0.002) loss 0.9395 (1.0831) acc 78.1250 (72.8699) lr 2.2949e-04 eta 3:55:11 +epoch [41/50] batch [985/1000] time 1.558 (1.564) data 0.001 (0.002) loss 0.7583 (1.0823) acc 81.2500 (72.8807) lr 2.2949e-04 eta 3:55:03 +epoch [41/50] batch [990/1000] time 1.553 (1.564) data 0.000 (0.002) loss 0.9468 (1.0824) acc 75.0000 (72.8788) lr 2.2949e-04 eta 3:54:55 +epoch [41/50] batch [995/1000] time 1.542 (1.564) data 0.000 (0.002) loss 1.2598 (1.0812) acc 71.8750 (72.8957) lr 2.2949e-04 eta 3:54:46 +epoch [41/50] batch [1000/1000] time 1.691 (1.564) data 0.001 (0.002) loss 1.1689 (1.0806) acc 62.5000 (72.9125) lr 1.9098e-04 eta 3:54:39 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,400 +* accuracy: 78.8% +* error: 21.2% +* macro_f1: 78.4% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [42/50] batch [5/1000] time 1.569 (1.715) data 0.001 (0.209) loss 1.1084 (1.0374) acc 71.8750 (73.7500) lr 1.9098e-04 eta 4:17:09 +epoch [42/50] batch [10/1000] time 1.564 (1.632) data 0.000 (0.105) loss 0.9697 (1.0807) acc 78.1250 (74.6875) lr 1.9098e-04 eta 4:04:30 +epoch [42/50] batch [15/1000] time 1.556 (1.607) data 0.000 (0.070) loss 1.7471 (1.2029) acc 62.5000 (72.2917) lr 1.9098e-04 eta 4:00:42 +epoch [42/50] batch [20/1000] time 1.544 (1.613) data 0.001 (0.053) loss 1.2275 (1.1369) acc 62.5000 (72.9688) lr 1.9098e-04 eta 4:01:29 +epoch [42/50] batch [25/1000] time 1.563 (1.605) data 0.001 (0.042) loss 1.0205 (1.0551) acc 71.8750 (74.3750) lr 1.9098e-04 eta 4:00:05 +epoch [42/50] batch [30/1000] time 1.552 (1.597) data 0.000 (0.035) loss 1.1699 (1.0558) acc 68.7500 (73.9583) lr 1.9098e-04 eta 3:58:46 +epoch [42/50] batch [35/1000] time 1.569 (1.592) data 0.001 (0.030) loss 1.4072 (1.0877) acc 71.8750 (72.9464) lr 1.9098e-04 eta 3:57:49 +epoch [42/50] batch [40/1000] time 1.572 (1.589) data 0.001 (0.027) loss 0.9624 (1.0890) acc 71.8750 (72.3438) lr 1.9098e-04 eta 3:57:15 +epoch [42/50] batch [45/1000] time 1.537 (1.584) data 0.001 (0.024) loss 1.3232 (1.0893) acc 68.7500 (72.1528) lr 1.9098e-04 eta 3:56:23 +epoch [42/50] batch [50/1000] time 1.567 (1.582) data 0.001 (0.021) loss 0.9629 (1.0993) acc 78.1250 (72.1250) lr 1.9098e-04 eta 3:55:58 +epoch [42/50] batch [55/1000] time 1.546 (1.579) data 0.001 (0.019) loss 1.2188 (1.1054) acc 68.7500 (72.0455) lr 1.9098e-04 eta 3:55:27 +epoch [42/50] batch [60/1000] time 1.561 (1.579) data 0.001 (0.018) loss 1.3750 (1.0996) acc 65.6250 (72.4479) lr 1.9098e-04 eta 3:55:18 +epoch [42/50] batch [65/1000] time 1.547 (1.577) data 0.000 (0.017) loss 0.7656 (1.1003) acc 78.1250 (72.5481) lr 1.9098e-04 eta 3:54:51 +epoch [42/50] batch [70/1000] time 1.564 (1.577) data 0.001 (0.016) loss 0.5244 (1.0859) acc 84.3750 (72.7679) lr 1.9098e-04 eta 3:54:44 +epoch [42/50] batch [75/1000] time 1.563 (1.576) data 0.000 (0.014) loss 1.1328 (1.0727) acc 75.0000 (73.1250) lr 1.9098e-04 eta 3:54:25 +epoch [42/50] batch [80/1000] time 1.560 (1.576) data 0.000 (0.014) loss 1.4502 (1.0760) acc 68.7500 (73.0078) lr 1.9098e-04 eta 3:54:18 +epoch [42/50] batch [85/1000] time 1.579 (1.576) data 0.001 (0.013) loss 1.0488 (1.0652) acc 71.8750 (72.9779) lr 1.9098e-04 eta 3:54:05 +epoch [42/50] batch [90/1000] time 1.534 (1.575) data 0.000 (0.012) loss 0.9038 (1.0610) acc 65.6250 (72.7431) lr 1.9098e-04 eta 3:53:55 +epoch [42/50] batch [95/1000] time 1.581 (1.575) data 0.001 (0.012) loss 0.9570 (1.0513) acc 78.1250 (72.7632) lr 1.9098e-04 eta 3:53:43 +epoch [42/50] batch [100/1000] time 1.554 (1.575) data 0.000 (0.011) loss 0.9644 (1.0485) acc 84.3750 (72.9688) lr 1.9098e-04 eta 3:53:33 +epoch [42/50] batch [105/1000] time 1.555 (1.574) data 0.000 (0.011) loss 0.8462 (1.0478) acc 71.8750 (72.7679) lr 1.9098e-04 eta 3:53:22 +epoch [42/50] batch [110/1000] time 1.546 (1.574) data 0.000 (0.010) loss 0.2983 (1.0439) acc 93.7500 (72.8977) lr 1.9098e-04 eta 3:53:09 +epoch [42/50] batch [115/1000] time 1.573 (1.573) data 0.000 (0.010) loss 1.5078 (1.0476) acc 71.8750 (72.9348) lr 1.9098e-04 eta 3:52:58 +epoch [42/50] batch [120/1000] time 1.548 (1.573) data 0.000 (0.009) loss 0.9053 (1.0475) acc 84.3750 (72.9948) lr 1.9098e-04 eta 3:52:45 +epoch [42/50] batch [125/1000] time 1.549 (1.574) data 0.001 (0.009) loss 1.1533 (1.0635) acc 78.1250 (72.7250) lr 1.9098e-04 eta 3:52:46 +epoch [42/50] batch [130/1000] time 1.534 (1.573) data 0.001 (0.009) loss 1.2617 (1.0673) acc 65.6250 (72.5721) lr 1.9098e-04 eta 3:52:31 +epoch [42/50] batch [135/1000] time 1.538 (1.572) data 0.001 (0.008) loss 0.6680 (1.0615) acc 87.5000 (72.7546) lr 1.9098e-04 eta 3:52:19 +epoch [42/50] batch [140/1000] time 1.552 (1.572) data 0.000 (0.008) loss 0.5884 (1.0519) acc 87.5000 (73.0134) lr 1.9098e-04 eta 3:52:06 +epoch [42/50] batch [145/1000] time 1.578 (1.571) data 0.000 (0.008) loss 1.4766 (1.0578) acc 62.5000 (72.9741) lr 1.9098e-04 eta 3:51:52 +epoch [42/50] batch [150/1000] time 1.583 (1.571) data 0.000 (0.008) loss 0.9790 (1.0591) acc 78.1250 (72.9792) lr 1.9098e-04 eta 3:51:42 +epoch [42/50] batch [155/1000] time 1.582 (1.571) data 0.001 (0.007) loss 0.8169 (1.0522) acc 75.0000 (73.1250) lr 1.9098e-04 eta 3:51:32 +epoch [42/50] batch [160/1000] time 1.583 (1.571) data 0.001 (0.007) loss 1.0479 (1.0536) acc 78.1250 (73.1250) lr 1.9098e-04 eta 3:51:23 +epoch [42/50] batch [165/1000] time 1.581 (1.571) data 0.000 (0.007) loss 1.6191 (1.0590) acc 62.5000 (72.9924) lr 1.9098e-04 eta 3:51:15 +epoch [42/50] batch [170/1000] time 1.570 (1.571) data 0.000 (0.007) loss 1.0654 (1.0532) acc 68.7500 (73.1985) lr 1.9098e-04 eta 3:51:13 +epoch [42/50] batch [175/1000] time 1.543 (1.571) data 0.000 (0.007) loss 1.2568 (1.0523) acc 59.3750 (73.1964) lr 1.9098e-04 eta 3:51:01 +epoch [42/50] batch [180/1000] time 1.585 (1.571) data 0.000 (0.006) loss 1.3027 (1.0566) acc 62.5000 (73.0903) lr 1.9098e-04 eta 3:50:54 +epoch [42/50] batch [185/1000] time 1.552 (1.570) data 0.000 (0.006) loss 1.2256 (1.0559) acc 71.8750 (73.0574) lr 1.9098e-04 eta 3:50:41 +epoch [42/50] batch [190/1000] time 1.545 (1.570) data 0.000 (0.006) loss 1.1543 (1.0617) acc 71.8750 (73.0099) lr 1.9098e-04 eta 3:50:28 +epoch [42/50] batch [195/1000] time 1.598 (1.570) data 0.001 (0.006) loss 1.2949 (1.0624) acc 68.7500 (73.0609) lr 1.9098e-04 eta 3:50:20 +epoch [42/50] batch [200/1000] time 1.556 (1.569) data 0.000 (0.006) loss 1.1299 (1.0621) acc 59.3750 (73.0156) lr 1.9098e-04 eta 3:50:10 +epoch [42/50] batch [205/1000] time 1.552 (1.569) data 0.000 (0.006) loss 1.4990 (1.0667) acc 65.6250 (72.9268) lr 1.9098e-04 eta 3:50:01 +epoch [42/50] batch [210/1000] time 1.561 (1.570) data 0.000 (0.005) loss 1.0664 (1.0718) acc 68.7500 (72.8571) lr 1.9098e-04 eta 3:49:58 +epoch [42/50] batch [215/1000] time 1.550 (1.570) data 0.000 (0.005) loss 0.9341 (1.0702) acc 65.6250 (72.8488) lr 1.9098e-04 eta 3:49:49 +epoch [42/50] batch [220/1000] time 1.546 (1.570) data 0.000 (0.005) loss 0.7139 (1.0658) acc 78.1250 (72.9261) lr 1.9098e-04 eta 3:49:40 +epoch [42/50] batch [225/1000] time 1.558 (1.569) data 0.001 (0.005) loss 1.1221 (1.0655) acc 68.7500 (72.8472) lr 1.9098e-04 eta 3:49:30 +epoch [42/50] batch [230/1000] time 1.559 (1.569) data 0.001 (0.005) loss 1.2324 (1.0712) acc 68.7500 (72.7446) lr 1.9098e-04 eta 3:49:20 +epoch [42/50] batch [235/1000] time 1.547 (1.569) data 0.001 (0.005) loss 0.6348 (1.0649) acc 78.1250 (72.8457) lr 1.9098e-04 eta 3:49:10 +epoch [42/50] batch [240/1000] time 1.578 (1.569) data 0.000 (0.005) loss 0.8115 (1.0652) acc 81.2500 (72.8906) lr 1.9098e-04 eta 3:49:04 +epoch [42/50] batch [245/1000] time 1.555 (1.569) data 0.001 (0.005) loss 1.5684 (1.0663) acc 65.6250 (72.9082) lr 1.9098e-04 eta 3:48:54 +epoch [42/50] batch [250/1000] time 1.565 (1.569) data 0.000 (0.005) loss 1.3115 (1.0669) acc 68.7500 (72.9125) lr 1.9098e-04 eta 3:48:47 +epoch [42/50] batch [255/1000] time 1.565 (1.569) data 0.000 (0.005) loss 1.4619 (1.0689) acc 71.8750 (72.9167) lr 1.9098e-04 eta 3:48:38 +epoch [42/50] batch [260/1000] time 1.572 (1.569) data 0.000 (0.005) loss 1.2939 (1.0666) acc 71.8750 (72.9688) lr 1.9098e-04 eta 3:48:29 +epoch [42/50] batch [265/1000] time 1.557 (1.568) data 0.000 (0.004) loss 1.2803 (1.0677) acc 65.6250 (72.9717) lr 1.9098e-04 eta 3:48:20 +epoch [42/50] batch [270/1000] time 1.555 (1.568) data 0.000 (0.004) loss 1.4746 (1.0693) acc 59.3750 (72.9167) lr 1.9098e-04 eta 3:48:11 +epoch [42/50] batch [275/1000] time 1.600 (1.569) data 0.000 (0.004) loss 1.5498 (1.0701) acc 68.7500 (72.8409) lr 1.9098e-04 eta 3:48:11 +epoch [42/50] batch [280/1000] time 1.562 (1.569) data 0.000 (0.004) loss 0.8843 (1.0696) acc 78.1250 (72.8460) lr 1.9098e-04 eta 3:48:03 +epoch [42/50] batch [285/1000] time 1.574 (1.569) data 0.000 (0.004) loss 0.6294 (1.0712) acc 75.0000 (72.8618) lr 1.9098e-04 eta 3:47:55 +epoch [42/50] batch [290/1000] time 1.524 (1.569) data 0.000 (0.004) loss 1.0234 (1.0707) acc 65.6250 (72.8125) lr 1.9098e-04 eta 3:47:44 +epoch [42/50] batch [295/1000] time 1.559 (1.569) data 0.001 (0.004) loss 0.9014 (1.0728) acc 78.1250 (72.8284) lr 1.9098e-04 eta 3:47:34 +epoch [42/50] batch [300/1000] time 1.562 (1.569) data 0.000 (0.004) loss 1.0381 (1.0722) acc 68.7500 (72.8542) lr 1.9098e-04 eta 3:47:26 +epoch [42/50] batch [305/1000] time 1.577 (1.569) data 0.000 (0.004) loss 1.3584 (1.0727) acc 68.7500 (72.8381) lr 1.9098e-04 eta 3:47:19 +epoch [42/50] batch [310/1000] time 1.556 (1.569) data 0.000 (0.004) loss 1.7656 (1.0768) acc 65.6250 (72.7319) lr 1.9098e-04 eta 3:47:10 +epoch [42/50] batch [315/1000] time 1.544 (1.568) data 0.001 (0.004) loss 0.6704 (1.0723) acc 87.5000 (72.8770) lr 1.9098e-04 eta 3:47:01 +epoch [42/50] batch [320/1000] time 1.547 (1.569) data 0.000 (0.004) loss 1.1855 (1.0697) acc 75.0000 (72.9883) lr 1.9098e-04 eta 3:46:56 +epoch [42/50] batch [325/1000] time 1.559 (1.568) data 0.000 (0.004) loss 0.8384 (1.0693) acc 81.2500 (73.0288) lr 1.9098e-04 eta 3:46:46 +epoch [42/50] batch [330/1000] time 1.583 (1.568) data 0.000 (0.004) loss 1.3057 (1.0695) acc 68.7500 (73.0114) lr 1.9098e-04 eta 3:46:37 +epoch [42/50] batch [335/1000] time 1.516 (1.568) data 0.000 (0.004) loss 1.1748 (1.0691) acc 71.8750 (73.0690) lr 1.9098e-04 eta 3:46:27 +epoch [42/50] batch [340/1000] time 1.556 (1.568) data 0.000 (0.004) loss 0.6182 (1.0682) acc 87.5000 (73.0790) lr 1.9098e-04 eta 3:46:17 +epoch [42/50] batch [345/1000] time 1.532 (1.568) data 0.000 (0.004) loss 0.9575 (1.0666) acc 75.0000 (73.0707) lr 1.9098e-04 eta 3:46:08 +epoch [42/50] batch [350/1000] time 1.566 (1.567) data 0.000 (0.003) loss 0.9189 (1.0660) acc 75.0000 (73.0804) lr 1.9098e-04 eta 3:45:58 +epoch [42/50] batch [355/1000] time 1.556 (1.567) data 0.000 (0.003) loss 1.0742 (1.0645) acc 75.0000 (73.0898) lr 1.9098e-04 eta 3:45:49 +epoch [42/50] batch [360/1000] time 1.706 (1.568) data 0.001 (0.003) loss 0.8193 (1.0638) acc 81.2500 (73.1250) lr 1.9098e-04 eta 3:45:43 +epoch [42/50] batch [365/1000] time 1.572 (1.567) data 0.000 (0.003) loss 1.5645 (1.0657) acc 62.5000 (73.1079) lr 1.9098e-04 eta 3:45:35 +epoch [42/50] batch [370/1000] time 1.562 (1.567) data 0.001 (0.003) loss 0.9731 (1.0668) acc 75.0000 (73.0405) lr 1.9098e-04 eta 3:45:26 +epoch [42/50] batch [375/1000] time 1.587 (1.567) data 0.000 (0.003) loss 1.2500 (1.0659) acc 71.8750 (73.0833) lr 1.9098e-04 eta 3:45:17 +epoch [42/50] batch [380/1000] time 1.564 (1.567) data 0.000 (0.003) loss 1.3672 (1.0659) acc 65.6250 (73.1003) lr 1.9098e-04 eta 3:45:10 +epoch [42/50] batch [385/1000] time 1.558 (1.567) data 0.000 (0.003) loss 0.8511 (1.0658) acc 84.3750 (73.1656) lr 1.9098e-04 eta 3:45:01 +epoch [42/50] batch [390/1000] time 1.545 (1.567) data 0.000 (0.003) loss 0.6035 (1.0638) acc 81.2500 (73.2051) lr 1.9098e-04 eta 3:44:51 +epoch [42/50] batch [395/1000] time 1.565 (1.567) data 0.001 (0.003) loss 1.0771 (1.0646) acc 68.7500 (73.2278) lr 1.9098e-04 eta 3:44:43 +epoch [42/50] batch [400/1000] time 1.569 (1.567) data 0.001 (0.003) loss 1.1787 (1.0612) acc 62.5000 (73.2656) lr 1.9098e-04 eta 3:44:35 +epoch [42/50] batch [405/1000] time 1.562 (1.567) data 0.000 (0.003) loss 0.9165 (1.0623) acc 81.2500 (73.2407) lr 1.9098e-04 eta 3:44:26 +epoch [42/50] batch [410/1000] time 1.558 (1.567) data 0.001 (0.003) loss 1.3447 (1.0671) acc 56.2500 (73.1250) lr 1.9098e-04 eta 3:44:18 +epoch [42/50] batch [415/1000] time 1.554 (1.567) data 0.000 (0.003) loss 1.2725 (1.0666) acc 78.1250 (73.1777) lr 1.9098e-04 eta 3:44:09 +epoch [42/50] batch [420/1000] time 1.590 (1.567) data 0.000 (0.003) loss 0.8110 (1.0694) acc 75.0000 (73.0729) lr 1.9098e-04 eta 3:44:01 +epoch [42/50] batch [425/1000] time 1.577 (1.567) data 0.000 (0.003) loss 1.0791 (1.0711) acc 78.1250 (73.0515) lr 1.9098e-04 eta 3:43:56 +epoch [42/50] batch [430/1000] time 1.543 (1.567) data 0.001 (0.003) loss 1.3613 (1.0739) acc 59.3750 (72.9578) lr 1.9098e-04 eta 3:43:46 +epoch [42/50] batch [435/1000] time 1.557 (1.567) data 0.000 (0.003) loss 1.0234 (1.0746) acc 75.0000 (72.9310) lr 1.9098e-04 eta 3:43:38 +epoch [42/50] batch [440/1000] time 1.563 (1.567) data 0.000 (0.003) loss 0.8970 (1.0757) acc 75.0000 (72.8977) lr 1.9098e-04 eta 3:43:31 +epoch [42/50] batch [445/1000] time 1.557 (1.567) data 0.000 (0.003) loss 1.3447 (1.0760) acc 65.6250 (72.8792) lr 1.9098e-04 eta 3:43:23 +epoch [42/50] batch [450/1000] time 1.556 (1.567) data 0.001 (0.003) loss 0.8286 (1.0744) acc 81.2500 (72.9375) lr 1.9098e-04 eta 3:43:13 +epoch [42/50] batch [455/1000] time 1.575 (1.567) data 0.001 (0.003) loss 1.5225 (1.0753) acc 62.5000 (72.9258) lr 1.9098e-04 eta 3:43:06 +epoch [42/50] batch [460/1000] time 1.568 (1.567) data 0.000 (0.003) loss 0.4646 (1.0727) acc 84.3750 (72.9688) lr 1.9098e-04 eta 3:42:59 +epoch [42/50] batch [465/1000] time 1.559 (1.567) data 0.001 (0.003) loss 0.9282 (1.0717) acc 81.2500 (73.0108) lr 1.9098e-04 eta 3:42:52 +epoch [42/50] batch [470/1000] time 1.564 (1.567) data 0.001 (0.003) loss 1.0869 (1.0716) acc 68.7500 (73.0452) lr 1.9098e-04 eta 3:42:47 +epoch [42/50] batch [475/1000] time 1.582 (1.567) data 0.000 (0.003) loss 1.0410 (1.0712) acc 78.1250 (73.0658) lr 1.9098e-04 eta 3:42:38 +epoch [42/50] batch [480/1000] time 1.533 (1.567) data 0.000 (0.003) loss 0.9419 (1.0734) acc 78.1250 (73.0469) lr 1.9098e-04 eta 3:42:29 +epoch [42/50] batch [485/1000] time 1.573 (1.567) data 0.000 (0.003) loss 0.9370 (1.0759) acc 78.1250 (73.0477) lr 1.9098e-04 eta 3:42:21 +epoch [42/50] batch [490/1000] time 1.552 (1.567) data 0.001 (0.003) loss 1.1582 (1.0775) acc 68.7500 (73.0421) lr 1.9098e-04 eta 3:42:13 +epoch [42/50] batch [495/1000] time 1.557 (1.567) data 0.000 (0.003) loss 1.1396 (1.0748) acc 75.0000 (73.0682) lr 1.9098e-04 eta 3:42:04 +epoch [42/50] batch [500/1000] time 1.553 (1.567) data 0.000 (0.003) loss 1.0498 (1.0766) acc 68.7500 (73.0062) lr 1.9098e-04 eta 3:41:56 +epoch [42/50] batch [505/1000] time 1.573 (1.567) data 0.001 (0.003) loss 0.8477 (1.0773) acc 78.1250 (73.0012) lr 1.9098e-04 eta 3:41:48 +epoch [42/50] batch [510/1000] time 1.578 (1.567) data 0.000 (0.003) loss 1.0469 (1.0766) acc 68.7500 (72.9963) lr 1.9098e-04 eta 3:41:40 +epoch [42/50] batch [515/1000] time 1.589 (1.567) data 0.001 (0.003) loss 0.6938 (1.0778) acc 84.3750 (72.9733) lr 1.9098e-04 eta 3:41:35 +epoch [42/50] batch [520/1000] time 1.567 (1.567) data 0.001 (0.003) loss 0.9902 (1.0783) acc 68.7500 (72.9627) lr 1.9098e-04 eta 3:41:27 +epoch [42/50] batch [525/1000] time 1.571 (1.567) data 0.000 (0.002) loss 0.9800 (1.0788) acc 81.2500 (72.9881) lr 1.9098e-04 eta 3:41:18 +epoch [42/50] batch [530/1000] time 1.573 (1.567) data 0.000 (0.002) loss 1.5039 (1.0787) acc 71.8750 (72.9953) lr 1.9098e-04 eta 3:41:11 +epoch [42/50] batch [535/1000] time 1.564 (1.567) data 0.000 (0.002) loss 1.0420 (1.0792) acc 71.8750 (72.9614) lr 1.9098e-04 eta 3:41:03 +epoch [42/50] batch [540/1000] time 1.564 (1.567) data 0.001 (0.002) loss 1.2744 (1.0799) acc 75.0000 (72.9514) lr 1.9098e-04 eta 3:40:54 +epoch [42/50] batch [545/1000] time 1.561 (1.567) data 0.001 (0.002) loss 1.0205 (1.0796) acc 71.8750 (72.9300) lr 1.9098e-04 eta 3:40:47 +epoch [42/50] batch [550/1000] time 1.549 (1.567) data 0.000 (0.002) loss 0.7993 (1.0797) acc 81.2500 (72.9545) lr 1.9098e-04 eta 3:40:38 +epoch [42/50] batch [555/1000] time 1.556 (1.567) data 0.000 (0.002) loss 1.9248 (1.0806) acc 62.5000 (72.9505) lr 1.9098e-04 eta 3:40:29 +epoch [42/50] batch [560/1000] time 1.558 (1.566) data 0.000 (0.002) loss 0.8916 (1.0814) acc 81.2500 (72.9408) lr 1.9098e-04 eta 3:40:20 +epoch [42/50] batch [565/1000] time 1.548 (1.566) data 0.000 (0.002) loss 0.7256 (1.0787) acc 84.3750 (73.0144) lr 1.9098e-04 eta 3:40:11 +epoch [42/50] batch [570/1000] time 1.567 (1.566) data 0.000 (0.002) loss 1.3008 (1.0777) acc 68.7500 (73.0318) lr 1.9098e-04 eta 3:40:03 +epoch [42/50] batch [575/1000] time 1.577 (1.566) data 0.001 (0.002) loss 1.7422 (1.0781) acc 59.3750 (73.0163) lr 1.9098e-04 eta 3:39:57 +epoch [42/50] batch [580/1000] time 1.558 (1.566) data 0.000 (0.002) loss 1.5566 (1.0771) acc 65.6250 (73.0603) lr 1.9098e-04 eta 3:39:49 +epoch [42/50] batch [585/1000] time 1.554 (1.566) data 0.001 (0.002) loss 1.0840 (1.0779) acc 71.8750 (73.0235) lr 1.9098e-04 eta 3:39:41 +epoch [42/50] batch [590/1000] time 1.552 (1.566) data 0.000 (0.002) loss 0.5439 (1.0750) acc 81.2500 (73.1038) lr 1.9098e-04 eta 3:39:34 +epoch [42/50] batch [595/1000] time 1.555 (1.566) data 0.000 (0.002) loss 0.5444 (1.0729) acc 84.3750 (73.1460) lr 1.9098e-04 eta 3:39:25 +epoch [42/50] batch [600/1000] time 1.547 (1.566) data 0.001 (0.002) loss 0.9028 (1.0729) acc 75.0000 (73.1458) lr 1.9098e-04 eta 3:39:16 +epoch [42/50] batch [605/1000] time 1.539 (1.566) data 0.000 (0.002) loss 1.0762 (1.0715) acc 71.8750 (73.1663) lr 1.9098e-04 eta 3:39:07 +epoch [42/50] batch [610/1000] time 1.557 (1.566) data 0.000 (0.002) loss 1.0586 (1.0711) acc 68.7500 (73.1609) lr 1.9098e-04 eta 3:38:59 +epoch [42/50] batch [615/1000] time 1.564 (1.566) data 0.001 (0.002) loss 0.9048 (1.0708) acc 71.8750 (73.1809) lr 1.9098e-04 eta 3:38:51 +epoch [42/50] batch [620/1000] time 1.583 (1.566) data 0.001 (0.002) loss 0.6968 (1.0693) acc 81.2500 (73.1956) lr 1.9098e-04 eta 3:38:45 +epoch [42/50] batch [625/1000] time 1.553 (1.566) data 0.001 (0.002) loss 1.8359 (1.0700) acc 59.3750 (73.1750) lr 1.9098e-04 eta 3:38:37 +epoch [42/50] batch [630/1000] time 1.584 (1.566) data 0.001 (0.002) loss 1.3428 (1.0699) acc 71.8750 (73.1746) lr 1.9098e-04 eta 3:38:28 +epoch [42/50] batch [635/1000] time 1.545 (1.566) data 0.001 (0.002) loss 1.0674 (1.0702) acc 71.8750 (73.1545) lr 1.9098e-04 eta 3:38:20 +epoch [42/50] batch [640/1000] time 1.545 (1.566) data 0.001 (0.002) loss 0.7090 (1.0710) acc 84.3750 (73.1543) lr 1.9098e-04 eta 3:38:12 +epoch [42/50] batch [645/1000] time 1.564 (1.566) data 0.000 (0.002) loss 1.2363 (1.0705) acc 75.0000 (73.1589) lr 1.9098e-04 eta 3:38:03 +epoch [42/50] batch [650/1000] time 1.591 (1.566) data 0.001 (0.002) loss 0.8774 (1.0699) acc 75.0000 (73.1875) lr 1.9098e-04 eta 3:37:56 +epoch [42/50] batch [655/1000] time 1.559 (1.566) data 0.000 (0.002) loss 1.5947 (1.0710) acc 68.7500 (73.1727) lr 1.9098e-04 eta 3:37:48 +epoch [42/50] batch [660/1000] time 1.544 (1.566) data 0.001 (0.002) loss 1.2471 (1.0732) acc 78.1250 (73.1581) lr 1.9098e-04 eta 3:37:39 +epoch [42/50] batch [665/1000] time 1.561 (1.566) data 0.001 (0.002) loss 1.4229 (1.0762) acc 68.7500 (73.0921) lr 1.9098e-04 eta 3:37:34 +epoch [42/50] batch [670/1000] time 1.549 (1.566) data 0.000 (0.002) loss 1.4248 (1.0763) acc 75.0000 (73.1110) lr 1.9098e-04 eta 3:37:25 +epoch [42/50] batch [675/1000] time 1.577 (1.566) data 0.000 (0.002) loss 1.3467 (1.0766) acc 59.3750 (73.0926) lr 1.9098e-04 eta 3:37:17 +epoch [42/50] batch [680/1000] time 1.559 (1.566) data 0.001 (0.002) loss 1.3428 (1.0765) acc 78.1250 (73.1020) lr 1.9098e-04 eta 3:37:08 +epoch [42/50] batch [685/1000] time 1.561 (1.566) data 0.001 (0.002) loss 0.8823 (1.0757) acc 75.0000 (73.1022) lr 1.9098e-04 eta 3:36:59 +epoch [42/50] batch [690/1000] time 1.604 (1.566) data 0.001 (0.002) loss 1.4980 (1.0770) acc 75.0000 (73.0888) lr 1.9098e-04 eta 3:36:52 +epoch [42/50] batch [695/1000] time 1.559 (1.566) data 0.000 (0.002) loss 1.1113 (1.0765) acc 68.7500 (73.0800) lr 1.9098e-04 eta 3:36:44 +epoch [42/50] batch [700/1000] time 1.590 (1.566) data 0.000 (0.002) loss 1.0771 (1.0761) acc 78.1250 (73.0759) lr 1.9098e-04 eta 3:36:36 +epoch [42/50] batch [705/1000] time 1.553 (1.566) data 0.001 (0.002) loss 0.5000 (1.0741) acc 81.2500 (73.1294) lr 1.9098e-04 eta 3:36:28 +epoch [42/50] batch [710/1000] time 1.546 (1.566) data 0.001 (0.002) loss 1.1260 (1.0740) acc 62.5000 (73.1074) lr 1.9098e-04 eta 3:36:20 +epoch [42/50] batch [715/1000] time 1.565 (1.566) data 0.000 (0.002) loss 1.3027 (1.0734) acc 71.8750 (73.1469) lr 1.9098e-04 eta 3:36:12 +epoch [42/50] batch [720/1000] time 1.544 (1.566) data 0.000 (0.002) loss 0.8613 (1.0722) acc 71.8750 (73.1597) lr 1.9098e-04 eta 3:36:03 +epoch [42/50] batch [725/1000] time 1.728 (1.566) data 0.000 (0.002) loss 0.9688 (1.0718) acc 84.3750 (73.2069) lr 1.9098e-04 eta 3:35:57 +epoch [42/50] batch [730/1000] time 1.561 (1.566) data 0.001 (0.002) loss 1.4561 (1.0732) acc 59.3750 (73.1721) lr 1.9098e-04 eta 3:35:49 +epoch [42/50] batch [735/1000] time 1.551 (1.566) data 0.001 (0.002) loss 1.1494 (1.0745) acc 71.8750 (73.1590) lr 1.9098e-04 eta 3:35:40 +epoch [42/50] batch [740/1000] time 1.571 (1.566) data 0.001 (0.002) loss 1.7227 (1.0759) acc 75.0000 (73.1377) lr 1.9098e-04 eta 3:35:32 +epoch [42/50] batch [745/1000] time 1.553 (1.566) data 0.001 (0.002) loss 1.0508 (1.0760) acc 81.2500 (73.1711) lr 1.9098e-04 eta 3:35:24 +epoch [42/50] batch [750/1000] time 1.548 (1.566) data 0.001 (0.002) loss 1.1416 (1.0764) acc 78.1250 (73.1625) lr 1.9098e-04 eta 3:35:15 +epoch [42/50] batch [755/1000] time 1.551 (1.565) data 0.000 (0.002) loss 1.3213 (1.0761) acc 65.6250 (73.1829) lr 1.9098e-04 eta 3:35:07 +epoch [42/50] batch [760/1000] time 1.569 (1.566) data 0.001 (0.002) loss 1.2725 (1.0768) acc 62.5000 (73.1414) lr 1.9098e-04 eta 3:35:00 +epoch [42/50] batch [765/1000] time 1.552 (1.566) data 0.000 (0.002) loss 0.8857 (1.0787) acc 81.2500 (73.1209) lr 1.9098e-04 eta 3:34:51 +epoch [42/50] batch [770/1000] time 1.715 (1.566) data 0.000 (0.002) loss 1.6152 (1.0801) acc 65.6250 (73.1047) lr 1.9098e-04 eta 3:34:45 +epoch [42/50] batch [775/1000] time 1.570 (1.566) data 0.001 (0.002) loss 0.9985 (1.0807) acc 71.8750 (73.0806) lr 1.9098e-04 eta 3:34:37 +epoch [42/50] batch [780/1000] time 1.567 (1.566) data 0.001 (0.002) loss 1.1045 (1.0806) acc 81.2500 (73.0970) lr 1.9098e-04 eta 3:34:29 +epoch [42/50] batch [785/1000] time 1.567 (1.566) data 0.000 (0.002) loss 1.1650 (1.0800) acc 71.8750 (73.1051) lr 1.9098e-04 eta 3:34:21 +epoch [42/50] batch [790/1000] time 1.558 (1.566) data 0.000 (0.002) loss 0.8662 (1.0803) acc 75.0000 (73.0973) lr 1.9098e-04 eta 3:34:13 +epoch [42/50] batch [795/1000] time 1.562 (1.565) data 0.000 (0.002) loss 0.9907 (1.0799) acc 71.8750 (73.0857) lr 1.9098e-04 eta 3:34:04 +epoch [42/50] batch [800/1000] time 1.565 (1.565) data 0.000 (0.002) loss 1.1152 (1.0800) acc 59.3750 (73.0547) lr 1.9098e-04 eta 3:33:57 +epoch [42/50] batch [805/1000] time 1.564 (1.565) data 0.001 (0.002) loss 1.2490 (1.0810) acc 71.8750 (73.0512) lr 1.9098e-04 eta 3:33:48 +epoch [42/50] batch [810/1000] time 1.562 (1.565) data 0.000 (0.002) loss 1.3662 (1.0825) acc 65.6250 (73.0131) lr 1.9098e-04 eta 3:33:40 +epoch [42/50] batch [815/1000] time 1.556 (1.566) data 0.001 (0.002) loss 0.7026 (1.0821) acc 78.1250 (73.0061) lr 1.9098e-04 eta 3:33:34 +epoch [42/50] batch [820/1000] time 1.547 (1.566) data 0.001 (0.002) loss 1.2178 (1.0822) acc 68.7500 (72.9878) lr 1.9098e-04 eta 3:33:26 +epoch [42/50] batch [825/1000] time 1.572 (1.566) data 0.000 (0.002) loss 0.8828 (1.0827) acc 68.7500 (72.9924) lr 1.9098e-04 eta 3:33:18 +epoch [42/50] batch [830/1000] time 1.554 (1.566) data 0.001 (0.002) loss 1.2979 (1.0827) acc 68.7500 (72.9970) lr 1.9098e-04 eta 3:33:10 +epoch [42/50] batch [835/1000] time 1.547 (1.566) data 0.001 (0.002) loss 0.6616 (1.0808) acc 78.1250 (73.0352) lr 1.9098e-04 eta 3:33:03 +epoch [42/50] batch [840/1000] time 1.577 (1.566) data 0.001 (0.002) loss 1.1172 (1.0819) acc 68.7500 (73.0097) lr 1.9098e-04 eta 3:32:55 +epoch [42/50] batch [845/1000] time 1.568 (1.566) data 0.000 (0.002) loss 1.0947 (1.0820) acc 71.8750 (73.0141) lr 1.9098e-04 eta 3:32:47 +epoch [42/50] batch [850/1000] time 1.566 (1.566) data 0.000 (0.002) loss 1.1670 (1.0821) acc 71.8750 (72.9890) lr 1.9098e-04 eta 3:32:39 +epoch [42/50] batch [855/1000] time 1.547 (1.566) data 0.000 (0.002) loss 1.3447 (1.0820) acc 62.5000 (72.9898) lr 1.9098e-04 eta 3:32:31 +epoch [42/50] batch [860/1000] time 1.572 (1.566) data 0.001 (0.002) loss 1.1416 (1.0833) acc 71.8750 (72.9433) lr 1.9098e-04 eta 3:32:23 +epoch [42/50] batch [865/1000] time 1.580 (1.566) data 0.001 (0.002) loss 0.6846 (1.0827) acc 75.0000 (72.9371) lr 1.9098e-04 eta 3:32:15 +epoch [42/50] batch [870/1000] time 1.555 (1.566) data 0.001 (0.002) loss 0.9536 (1.0827) acc 71.8750 (72.9274) lr 1.9098e-04 eta 3:32:07 +epoch [42/50] batch [875/1000] time 1.549 (1.566) data 0.000 (0.002) loss 0.9321 (1.0828) acc 84.3750 (72.9429) lr 1.9098e-04 eta 3:31:59 +epoch [42/50] batch [880/1000] time 1.550 (1.566) data 0.001 (0.002) loss 1.7070 (1.0835) acc 71.8750 (72.9190) lr 1.9098e-04 eta 3:31:52 +epoch [42/50] batch [885/1000] time 1.572 (1.566) data 0.000 (0.002) loss 0.9883 (1.0834) acc 75.0000 (72.9237) lr 1.9098e-04 eta 3:31:44 +epoch [42/50] batch [890/1000] time 1.555 (1.566) data 0.000 (0.002) loss 0.9092 (1.0824) acc 78.1250 (72.9459) lr 1.9098e-04 eta 3:31:37 +epoch [42/50] batch [895/1000] time 1.565 (1.566) data 0.001 (0.002) loss 1.1387 (1.0817) acc 59.3750 (72.9679) lr 1.9098e-04 eta 3:31:29 +epoch [42/50] batch [900/1000] time 1.558 (1.565) data 0.001 (0.002) loss 1.2324 (1.0816) acc 71.8750 (72.9757) lr 1.9098e-04 eta 3:31:20 +epoch [42/50] batch [905/1000] time 1.580 (1.566) data 0.001 (0.002) loss 1.1230 (1.0826) acc 81.2500 (72.9593) lr 1.9098e-04 eta 3:31:12 +epoch [42/50] batch [910/1000] time 1.553 (1.566) data 0.000 (0.002) loss 0.8027 (1.0822) acc 75.0000 (72.9533) lr 1.9098e-04 eta 3:31:05 +epoch [42/50] batch [915/1000] time 1.559 (1.565) data 0.001 (0.002) loss 1.2549 (1.0815) acc 65.6250 (72.9713) lr 1.9098e-04 eta 3:30:56 +epoch [42/50] batch [920/1000] time 1.583 (1.565) data 0.001 (0.002) loss 0.8311 (1.0811) acc 81.2500 (73.0095) lr 1.9098e-04 eta 3:30:48 +epoch [42/50] batch [925/1000] time 1.558 (1.566) data 0.001 (0.002) loss 0.8022 (1.0817) acc 75.0000 (72.9899) lr 1.9098e-04 eta 3:30:42 +epoch [42/50] batch [930/1000] time 1.533 (1.566) data 0.001 (0.002) loss 0.9321 (1.0813) acc 71.8750 (72.9906) lr 1.9098e-04 eta 3:30:33 +epoch [42/50] batch [935/1000] time 1.566 (1.566) data 0.000 (0.002) loss 0.8779 (1.0811) acc 71.8750 (73.0047) lr 1.9098e-04 eta 3:30:25 +epoch [42/50] batch [940/1000] time 1.570 (1.566) data 0.001 (0.002) loss 1.6084 (1.0811) acc 65.6250 (72.9953) lr 1.9098e-04 eta 3:30:18 +epoch [42/50] batch [945/1000] time 1.561 (1.566) data 0.001 (0.002) loss 1.0957 (1.0812) acc 68.7500 (72.9828) lr 1.9098e-04 eta 3:30:10 +epoch [42/50] batch [950/1000] time 1.561 (1.566) data 0.001 (0.002) loss 1.2441 (1.0824) acc 71.8750 (72.9737) lr 1.9098e-04 eta 3:30:02 +epoch [42/50] batch [955/1000] time 1.552 (1.566) data 0.000 (0.002) loss 0.8970 (1.0825) acc 75.0000 (72.9679) lr 1.9098e-04 eta 3:29:54 +epoch [42/50] batch [960/1000] time 1.575 (1.566) data 0.001 (0.002) loss 1.4717 (1.0826) acc 75.0000 (72.9720) lr 1.9098e-04 eta 3:29:46 +epoch [42/50] batch [965/1000] time 1.556 (1.566) data 0.001 (0.002) loss 0.9971 (1.0822) acc 75.0000 (72.9890) lr 1.9098e-04 eta 3:29:40 +epoch [42/50] batch [970/1000] time 1.573 (1.566) data 0.000 (0.002) loss 0.7983 (1.0819) acc 81.2500 (72.9929) lr 1.9098e-04 eta 3:29:32 +epoch [42/50] batch [975/1000] time 1.558 (1.566) data 0.000 (0.002) loss 0.7798 (1.0823) acc 84.3750 (72.9936) lr 1.9098e-04 eta 3:29:24 +epoch [42/50] batch [980/1000] time 1.532 (1.566) data 0.000 (0.002) loss 1.7490 (1.0834) acc 68.7500 (72.9815) lr 1.9098e-04 eta 3:29:16 +epoch [42/50] batch [985/1000] time 1.541 (1.566) data 0.001 (0.002) loss 0.9971 (1.0836) acc 68.7500 (72.9759) lr 1.9098e-04 eta 3:29:08 +epoch [42/50] batch [990/1000] time 1.563 (1.566) data 0.000 (0.002) loss 1.3398 (1.0841) acc 62.5000 (72.9672) lr 1.9098e-04 eta 3:28:59 +epoch [42/50] batch [995/1000] time 1.591 (1.566) data 0.000 (0.002) loss 1.5947 (1.0849) acc 65.6250 (72.9585) lr 1.9098e-04 eta 3:28:51 +epoch [42/50] batch [1000/1000] time 1.562 (1.565) data 0.000 (0.002) loss 0.9590 (1.0851) acc 75.0000 (72.9313) lr 1.5567e-04 eta 3:28:43 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,333 +* accuracy: 78.7% +* error: 21.3% +* macro_f1: 78.2% +epoch [43/50] batch [5/1000] time 1.545 (1.694) data 0.000 (0.196) loss 1.6094 (1.0738) acc 53.1250 (68.1250) lr 1.5567e-04 eta 3:45:40 +epoch [43/50] batch [10/1000] time 1.538 (1.623) data 0.000 (0.098) loss 0.9756 (1.0037) acc 84.3750 (70.9375) lr 1.5567e-04 eta 3:36:06 +epoch [43/50] batch [15/1000] time 1.589 (1.606) data 0.001 (0.066) loss 0.5371 (0.9270) acc 87.5000 (73.5417) lr 1.5567e-04 eta 3:33:46 +epoch [43/50] batch [20/1000] time 1.574 (1.598) data 0.001 (0.049) loss 0.9658 (0.9839) acc 71.8750 (72.5000) lr 1.5567e-04 eta 3:32:29 +epoch [43/50] batch [25/1000] time 1.554 (1.590) data 0.001 (0.040) loss 0.7607 (1.0216) acc 81.2500 (72.1250) lr 1.5567e-04 eta 3:31:23 +epoch [43/50] batch [30/1000] time 1.584 (1.589) data 0.001 (0.033) loss 1.0195 (1.0000) acc 75.0000 (72.6042) lr 1.5567e-04 eta 3:31:03 +epoch [43/50] batch [35/1000] time 1.820 (1.593) data 0.000 (0.028) loss 0.7485 (0.9876) acc 75.0000 (72.6786) lr 1.5567e-04 eta 3:31:25 +epoch [43/50] batch [40/1000] time 1.534 (1.588) data 0.000 (0.025) loss 1.7529 (1.0097) acc 65.6250 (73.0469) lr 1.5567e-04 eta 3:30:38 +epoch [43/50] batch [45/1000] time 1.540 (1.584) data 0.000 (0.022) loss 0.8066 (1.0108) acc 75.0000 (73.1250) lr 1.5567e-04 eta 3:29:57 +epoch [43/50] batch [50/1000] time 1.572 (1.582) data 0.000 (0.020) loss 0.9131 (1.0272) acc 71.8750 (73.1875) lr 1.5567e-04 eta 3:29:37 +epoch [43/50] batch [55/1000] time 1.568 (1.579) data 0.001 (0.018) loss 0.6113 (1.0109) acc 81.2500 (73.3523) lr 1.5567e-04 eta 3:29:09 +epoch [43/50] batch [60/1000] time 1.571 (1.578) data 0.001 (0.017) loss 0.9873 (1.0024) acc 65.6250 (73.5417) lr 1.5567e-04 eta 3:28:51 +epoch [43/50] batch [65/1000] time 1.581 (1.577) data 0.000 (0.016) loss 0.5962 (1.0103) acc 84.3750 (73.4135) lr 1.5567e-04 eta 3:28:37 +epoch [43/50] batch [70/1000] time 1.552 (1.576) data 0.000 (0.014) loss 1.4600 (1.0152) acc 62.5000 (73.6161) lr 1.5567e-04 eta 3:28:19 +epoch [43/50] batch [75/1000] time 1.573 (1.575) data 0.000 (0.014) loss 1.0889 (1.0031) acc 71.8750 (73.7917) lr 1.5567e-04 eta 3:28:00 +epoch [43/50] batch [80/1000] time 1.558 (1.576) data 0.000 (0.013) loss 0.7319 (0.9938) acc 71.8750 (73.8281) lr 1.5567e-04 eta 3:28:02 +epoch [43/50] batch [85/1000] time 1.564 (1.575) data 0.001 (0.012) loss 1.6279 (1.0006) acc 68.7500 (73.6765) lr 1.5567e-04 eta 3:27:44 +epoch [43/50] batch [90/1000] time 1.550 (1.574) data 0.000 (0.011) loss 0.9365 (1.0065) acc 75.0000 (73.4028) lr 1.5567e-04 eta 3:27:31 +epoch [43/50] batch [95/1000] time 1.584 (1.574) data 0.000 (0.011) loss 0.9121 (1.0270) acc 75.0000 (72.9934) lr 1.5567e-04 eta 3:27:22 +epoch [43/50] batch [100/1000] time 1.557 (1.573) data 0.000 (0.010) loss 1.6807 (1.0339) acc 71.8750 (73.0000) lr 1.5567e-04 eta 3:27:05 +epoch [43/50] batch [105/1000] time 1.564 (1.572) data 0.000 (0.010) loss 1.1064 (1.0322) acc 68.7500 (73.0952) lr 1.5567e-04 eta 3:26:53 +epoch [43/50] batch [110/1000] time 1.551 (1.572) data 0.000 (0.009) loss 0.7861 (1.0353) acc 78.1250 (73.2102) lr 1.5567e-04 eta 3:26:40 +epoch [43/50] batch [115/1000] time 1.551 (1.572) data 0.000 (0.009) loss 1.1631 (1.0411) acc 65.6250 (73.2609) lr 1.5567e-04 eta 3:26:31 +epoch [43/50] batch [120/1000] time 1.541 (1.571) data 0.001 (0.009) loss 1.2520 (1.0375) acc 71.8750 (73.2552) lr 1.5567e-04 eta 3:26:18 +epoch [43/50] batch [125/1000] time 1.581 (1.571) data 0.000 (0.008) loss 0.9771 (1.0294) acc 81.2500 (73.5500) lr 1.5567e-04 eta 3:26:07 +epoch [43/50] batch [130/1000] time 1.555 (1.570) data 0.000 (0.008) loss 0.9307 (1.0346) acc 71.8750 (73.4135) lr 1.5567e-04 eta 3:25:57 +epoch [43/50] batch [135/1000] time 1.561 (1.570) data 0.001 (0.008) loss 0.6191 (1.0257) acc 75.0000 (73.4954) lr 1.5567e-04 eta 3:25:48 +epoch [43/50] batch [140/1000] time 1.552 (1.570) data 0.001 (0.007) loss 1.5195 (1.0180) acc 62.5000 (73.7946) lr 1.5567e-04 eta 3:25:38 +epoch [43/50] batch [145/1000] time 1.573 (1.571) data 0.000 (0.007) loss 1.2090 (1.0196) acc 68.7500 (73.7069) lr 1.5567e-04 eta 3:25:37 +epoch [43/50] batch [150/1000] time 1.572 (1.570) data 0.001 (0.007) loss 0.9131 (1.0258) acc 71.8750 (73.5417) lr 1.5567e-04 eta 3:25:27 +epoch [43/50] batch [155/1000] time 1.560 (1.570) data 0.001 (0.007) loss 0.6895 (1.0308) acc 84.3750 (73.5282) lr 1.5567e-04 eta 3:25:18 +epoch [43/50] batch [160/1000] time 1.572 (1.570) data 0.001 (0.007) loss 0.9888 (1.0422) acc 78.1250 (73.2617) lr 1.5567e-04 eta 3:25:07 +epoch [43/50] batch [165/1000] time 1.553 (1.570) data 0.001 (0.006) loss 0.9438 (1.0434) acc 65.6250 (73.1818) lr 1.5567e-04 eta 3:24:57 +epoch [43/50] batch [170/1000] time 1.587 (1.569) data 0.000 (0.006) loss 1.2520 (1.0416) acc 68.7500 (73.2169) lr 1.5567e-04 eta 3:24:48 +epoch [43/50] batch [175/1000] time 1.563 (1.569) data 0.000 (0.006) loss 0.9097 (1.0489) acc 78.1250 (73.1786) lr 1.5567e-04 eta 3:24:39 +epoch [43/50] batch [180/1000] time 1.584 (1.569) data 0.001 (0.006) loss 1.1006 (1.0492) acc 81.2500 (73.2639) lr 1.5567e-04 eta 3:24:33 +epoch [43/50] batch [185/1000] time 1.551 (1.569) data 0.000 (0.006) loss 1.3535 (1.0475) acc 68.7500 (73.2939) lr 1.5567e-04 eta 3:24:21 +epoch [43/50] batch [190/1000] time 1.549 (1.570) data 0.001 (0.006) loss 1.2109 (1.0510) acc 71.8750 (73.2895) lr 1.5567e-04 eta 3:24:21 +epoch [43/50] batch [195/1000] time 1.560 (1.570) data 0.001 (0.006) loss 0.9561 (1.0503) acc 75.0000 (73.3494) lr 1.5567e-04 eta 3:24:10 +epoch [43/50] batch [200/1000] time 1.558 (1.569) data 0.000 (0.005) loss 1.3789 (1.0514) acc 71.8750 (73.3281) lr 1.5567e-04 eta 3:24:00 +epoch [43/50] batch [205/1000] time 1.571 (1.569) data 0.001 (0.005) loss 1.4014 (1.0557) acc 62.5000 (73.2317) lr 1.5567e-04 eta 3:23:53 +epoch [43/50] batch [210/1000] time 1.563 (1.569) data 0.000 (0.005) loss 1.0420 (1.0558) acc 68.7500 (73.1696) lr 1.5567e-04 eta 3:23:44 +epoch [43/50] batch [215/1000] time 1.559 (1.569) data 0.000 (0.005) loss 1.0068 (1.0516) acc 75.0000 (73.2558) lr 1.5567e-04 eta 3:23:34 +epoch [43/50] batch [220/1000] time 1.555 (1.569) data 0.001 (0.005) loss 0.7612 (1.0506) acc 81.2500 (73.2955) lr 1.5567e-04 eta 3:23:23 +epoch [43/50] batch [225/1000] time 1.573 (1.569) data 0.000 (0.005) loss 1.1895 (1.0522) acc 78.1250 (73.3472) lr 1.5567e-04 eta 3:23:18 +epoch [43/50] batch [230/1000] time 1.549 (1.570) data 0.001 (0.005) loss 0.6426 (1.0571) acc 75.0000 (73.1929) lr 1.5567e-04 eta 3:23:15 +epoch [43/50] batch [235/1000] time 1.541 (1.569) data 0.000 (0.005) loss 1.6875 (1.0597) acc 71.8750 (73.2048) lr 1.5567e-04 eta 3:23:05 +epoch [43/50] batch [240/1000] time 1.558 (1.569) data 0.001 (0.005) loss 1.4033 (1.0665) acc 71.8750 (73.0859) lr 1.5567e-04 eta 3:22:53 +epoch [43/50] batch [245/1000] time 1.589 (1.569) data 0.000 (0.004) loss 0.9321 (1.0704) acc 78.1250 (72.9847) lr 1.5567e-04 eta 3:22:45 +epoch [43/50] batch [250/1000] time 1.562 (1.569) data 0.000 (0.004) loss 1.0029 (1.0712) acc 65.6250 (72.9625) lr 1.5567e-04 eta 3:22:36 +epoch [43/50] batch [255/1000] time 1.549 (1.568) data 0.000 (0.004) loss 0.9844 (1.0687) acc 75.0000 (72.9779) lr 1.5567e-04 eta 3:22:26 +epoch [43/50] batch [260/1000] time 1.544 (1.568) data 0.000 (0.004) loss 1.1240 (1.0667) acc 75.0000 (73.0048) lr 1.5567e-04 eta 3:22:18 +epoch [43/50] batch [265/1000] time 1.588 (1.568) data 0.001 (0.004) loss 0.6011 (1.0679) acc 90.6250 (73.0307) lr 1.5567e-04 eta 3:22:10 +epoch [43/50] batch [270/1000] time 1.554 (1.568) data 0.000 (0.004) loss 1.3047 (1.0659) acc 65.6250 (73.0787) lr 1.5567e-04 eta 3:22:00 +epoch [43/50] batch [275/1000] time 1.594 (1.568) data 0.001 (0.004) loss 0.8291 (1.0647) acc 78.1250 (73.1136) lr 1.5567e-04 eta 3:21:52 +epoch [43/50] batch [280/1000] time 1.559 (1.568) data 0.001 (0.004) loss 1.0137 (1.0622) acc 65.6250 (73.1473) lr 1.5567e-04 eta 3:21:44 +epoch [43/50] batch [285/1000] time 1.556 (1.568) data 0.001 (0.004) loss 0.7876 (1.0611) acc 84.3750 (73.1908) lr 1.5567e-04 eta 3:21:36 +epoch [43/50] batch [290/1000] time 1.557 (1.568) data 0.001 (0.004) loss 1.2988 (1.0625) acc 65.6250 (73.1466) lr 1.5567e-04 eta 3:21:26 +epoch [43/50] batch [295/1000] time 1.548 (1.568) data 0.000 (0.004) loss 0.8892 (1.0645) acc 59.3750 (73.1250) lr 1.5567e-04 eta 3:21:22 +epoch [43/50] batch [300/1000] time 1.569 (1.568) data 0.001 (0.004) loss 0.8022 (1.0644) acc 71.8750 (73.0729) lr 1.5567e-04 eta 3:21:13 +epoch [43/50] batch [305/1000] time 1.541 (1.568) data 0.001 (0.004) loss 1.2861 (1.0615) acc 68.7500 (73.0840) lr 1.5567e-04 eta 3:21:04 +epoch [43/50] batch [310/1000] time 1.566 (1.568) data 0.000 (0.004) loss 0.9048 (1.0618) acc 78.1250 (73.0746) lr 1.5567e-04 eta 3:20:56 +epoch [43/50] batch [315/1000] time 1.589 (1.568) data 0.000 (0.004) loss 1.6797 (1.0646) acc 68.7500 (73.1052) lr 1.5567e-04 eta 3:20:48 +epoch [43/50] batch [320/1000] time 1.538 (1.568) data 0.001 (0.004) loss 1.0488 (1.0674) acc 62.5000 (73.0566) lr 1.5567e-04 eta 3:20:41 +epoch [43/50] batch [325/1000] time 1.548 (1.568) data 0.000 (0.004) loss 1.3076 (1.0666) acc 62.5000 (73.0481) lr 1.5567e-04 eta 3:20:31 +epoch [43/50] batch [330/1000] time 1.557 (1.568) data 0.000 (0.003) loss 0.9976 (1.0658) acc 65.6250 (73.0019) lr 1.5567e-04 eta 3:20:23 +epoch [43/50] batch [335/1000] time 1.543 (1.567) data 0.000 (0.003) loss 0.8813 (1.0686) acc 78.1250 (72.9757) lr 1.5567e-04 eta 3:20:13 +epoch [43/50] batch [340/1000] time 1.557 (1.568) data 0.001 (0.003) loss 1.6758 (1.0677) acc 59.3750 (72.9596) lr 1.5567e-04 eta 3:20:08 +epoch [43/50] batch [345/1000] time 1.570 (1.568) data 0.001 (0.003) loss 1.3564 (1.0683) acc 65.6250 (72.8714) lr 1.5567e-04 eta 3:20:01 +epoch [43/50] batch [350/1000] time 1.565 (1.568) data 0.000 (0.003) loss 1.6670 (1.0704) acc 68.7500 (72.8839) lr 1.5567e-04 eta 3:19:53 +epoch [43/50] batch [355/1000] time 1.573 (1.568) data 0.001 (0.003) loss 0.9854 (1.0694) acc 78.1250 (72.9049) lr 1.5567e-04 eta 3:19:44 +epoch [43/50] batch [360/1000] time 1.570 (1.568) data 0.001 (0.003) loss 1.1279 (1.0716) acc 71.8750 (72.8733) lr 1.5567e-04 eta 3:19:36 +epoch [43/50] batch [365/1000] time 1.536 (1.567) data 0.001 (0.003) loss 1.4844 (1.0737) acc 62.5000 (72.7911) lr 1.5567e-04 eta 3:19:26 +epoch [43/50] batch [370/1000] time 1.543 (1.567) data 0.001 (0.003) loss 0.8970 (1.0719) acc 81.2500 (72.8716) lr 1.5567e-04 eta 3:19:18 +epoch [43/50] batch [375/1000] time 1.551 (1.567) data 0.001 (0.003) loss 1.1328 (1.0729) acc 71.8750 (72.9250) lr 1.5567e-04 eta 3:19:08 +epoch [43/50] batch [380/1000] time 1.718 (1.567) data 0.001 (0.003) loss 0.8945 (1.0708) acc 78.1250 (72.9688) lr 1.5567e-04 eta 3:19:03 +epoch [43/50] batch [385/1000] time 1.580 (1.567) data 0.000 (0.003) loss 0.9116 (1.0671) acc 71.8750 (73.0438) lr 1.5567e-04 eta 3:18:55 +epoch [43/50] batch [390/1000] time 1.569 (1.567) data 0.000 (0.003) loss 1.8398 (1.0695) acc 68.7500 (72.9888) lr 1.5567e-04 eta 3:18:47 +epoch [43/50] batch [395/1000] time 1.577 (1.567) data 0.000 (0.003) loss 1.1738 (1.0703) acc 78.1250 (72.9509) lr 1.5567e-04 eta 3:18:39 +epoch [43/50] batch [400/1000] time 1.541 (1.567) data 0.000 (0.003) loss 0.9102 (1.0695) acc 84.3750 (73.0000) lr 1.5567e-04 eta 3:18:32 +epoch [43/50] batch [405/1000] time 1.561 (1.567) data 0.001 (0.003) loss 1.4258 (1.0724) acc 65.6250 (72.9475) lr 1.5567e-04 eta 3:18:24 +epoch [43/50] batch [410/1000] time 1.558 (1.567) data 0.001 (0.003) loss 0.9233 (1.0728) acc 65.6250 (72.8735) lr 1.5567e-04 eta 3:18:16 +epoch [43/50] batch [415/1000] time 1.540 (1.567) data 0.000 (0.003) loss 0.8843 (1.0731) acc 71.8750 (72.8690) lr 1.5567e-04 eta 3:18:06 +epoch [43/50] batch [420/1000] time 1.572 (1.567) data 0.000 (0.003) loss 0.6133 (1.0694) acc 84.3750 (72.9390) lr 1.5567e-04 eta 3:17:58 +epoch [43/50] batch [425/1000] time 1.575 (1.567) data 0.000 (0.003) loss 1.0215 (1.0680) acc 65.6250 (72.9118) lr 1.5567e-04 eta 3:17:49 +epoch [43/50] batch [430/1000] time 1.564 (1.567) data 0.000 (0.003) loss 0.9805 (1.0698) acc 71.8750 (72.9070) lr 1.5567e-04 eta 3:17:41 +epoch [43/50] batch [435/1000] time 1.543 (1.567) data 0.001 (0.003) loss 0.7056 (1.0684) acc 84.3750 (72.9239) lr 1.5567e-04 eta 3:17:32 +epoch [43/50] batch [440/1000] time 1.537 (1.566) data 0.000 (0.003) loss 0.8188 (1.0656) acc 75.0000 (72.9972) lr 1.5567e-04 eta 3:17:22 +epoch [43/50] batch [445/1000] time 1.571 (1.567) data 0.000 (0.003) loss 1.4746 (1.0671) acc 68.7500 (72.9494) lr 1.5567e-04 eta 3:17:17 +epoch [43/50] batch [450/1000] time 1.569 (1.567) data 0.000 (0.003) loss 1.3193 (1.0701) acc 75.0000 (72.8889) lr 1.5567e-04 eta 3:17:09 +epoch [43/50] batch [455/1000] time 1.531 (1.567) data 0.000 (0.003) loss 1.4268 (1.0721) acc 68.7500 (72.8365) lr 1.5567e-04 eta 3:17:00 +epoch [43/50] batch [460/1000] time 1.573 (1.567) data 0.001 (0.003) loss 1.5742 (1.0734) acc 62.5000 (72.7853) lr 1.5567e-04 eta 3:16:51 +epoch [43/50] batch [465/1000] time 1.561 (1.566) data 0.001 (0.003) loss 1.0820 (1.0711) acc 71.8750 (72.8562) lr 1.5567e-04 eta 3:16:42 +epoch [43/50] batch [470/1000] time 1.575 (1.566) data 0.000 (0.003) loss 1.1611 (1.0716) acc 71.8750 (72.8457) lr 1.5567e-04 eta 3:16:33 +epoch [43/50] batch [475/1000] time 1.564 (1.566) data 0.001 (0.003) loss 0.7231 (1.0728) acc 78.1250 (72.8224) lr 1.5567e-04 eta 3:16:26 +epoch [43/50] batch [480/1000] time 1.570 (1.566) data 0.000 (0.003) loss 1.2617 (1.0729) acc 65.6250 (72.8060) lr 1.5567e-04 eta 3:16:18 +epoch [43/50] batch [485/1000] time 1.568 (1.566) data 0.000 (0.003) loss 1.6533 (1.0736) acc 65.6250 (72.8222) lr 1.5567e-04 eta 3:16:09 +epoch [43/50] batch [490/1000] time 1.562 (1.566) data 0.000 (0.002) loss 0.6001 (1.0728) acc 84.3750 (72.8189) lr 1.5567e-04 eta 3:16:03 +epoch [43/50] batch [495/1000] time 1.565 (1.566) data 0.000 (0.002) loss 0.9971 (1.0724) acc 65.6250 (72.8093) lr 1.5567e-04 eta 3:15:55 +epoch [43/50] batch [500/1000] time 1.569 (1.566) data 0.000 (0.002) loss 0.7842 (1.0700) acc 71.8750 (72.8312) lr 1.5567e-04 eta 3:15:47 +epoch [43/50] batch [505/1000] time 1.549 (1.566) data 0.000 (0.002) loss 1.0205 (1.0695) acc 68.7500 (72.8218) lr 1.5567e-04 eta 3:15:39 +epoch [43/50] batch [510/1000] time 1.571 (1.566) data 0.001 (0.002) loss 1.4619 (1.0687) acc 65.6250 (72.8493) lr 1.5567e-04 eta 3:15:31 +epoch [43/50] batch [515/1000] time 1.577 (1.566) data 0.001 (0.002) loss 1.4902 (1.0705) acc 62.5000 (72.8277) lr 1.5567e-04 eta 3:15:24 +epoch [43/50] batch [520/1000] time 1.541 (1.566) data 0.001 (0.002) loss 0.7290 (1.0688) acc 84.3750 (72.8846) lr 1.5567e-04 eta 3:15:15 +epoch [43/50] batch [525/1000] time 1.536 (1.566) data 0.000 (0.002) loss 1.7578 (1.0693) acc 68.7500 (72.9048) lr 1.5567e-04 eta 3:15:07 +epoch [43/50] batch [530/1000] time 1.534 (1.566) data 0.000 (0.002) loss 0.6904 (1.0676) acc 78.1250 (72.9245) lr 1.5567e-04 eta 3:14:58 +epoch [43/50] batch [535/1000] time 1.559 (1.566) data 0.001 (0.002) loss 0.8286 (1.0671) acc 65.6250 (72.9089) lr 1.5567e-04 eta 3:14:53 +epoch [43/50] batch [540/1000] time 1.543 (1.566) data 0.000 (0.002) loss 1.1484 (1.0668) acc 65.6250 (72.8646) lr 1.5567e-04 eta 3:14:44 +epoch [43/50] batch [545/1000] time 1.579 (1.566) data 0.000 (0.002) loss 1.5605 (1.0692) acc 53.1250 (72.7867) lr 1.5567e-04 eta 3:14:35 +epoch [43/50] batch [550/1000] time 1.575 (1.566) data 0.000 (0.002) loss 1.2090 (1.0721) acc 75.0000 (72.7500) lr 1.5567e-04 eta 3:14:28 +epoch [43/50] batch [555/1000] time 1.558 (1.566) data 0.001 (0.002) loss 0.9521 (1.0726) acc 71.8750 (72.7421) lr 1.5567e-04 eta 3:14:20 +epoch [43/50] batch [560/1000] time 1.545 (1.566) data 0.001 (0.002) loss 1.0693 (1.0755) acc 68.7500 (72.6786) lr 1.5567e-04 eta 3:14:11 +epoch [43/50] batch [565/1000] time 1.586 (1.566) data 0.001 (0.002) loss 0.7114 (1.0748) acc 87.5000 (72.7046) lr 1.5567e-04 eta 3:14:04 +epoch [43/50] batch [570/1000] time 1.581 (1.566) data 0.000 (0.002) loss 0.9038 (1.0733) acc 78.1250 (72.7577) lr 1.5567e-04 eta 3:13:56 +epoch [43/50] batch [575/1000] time 1.558 (1.566) data 0.001 (0.002) loss 0.6929 (1.0709) acc 84.3750 (72.8370) lr 1.5567e-04 eta 3:13:47 +epoch [43/50] batch [580/1000] time 1.570 (1.566) data 0.001 (0.002) loss 1.0762 (1.0714) acc 68.7500 (72.8125) lr 1.5567e-04 eta 3:13:38 +epoch [43/50] batch [585/1000] time 1.588 (1.566) data 0.000 (0.002) loss 1.4229 (1.0712) acc 68.7500 (72.8472) lr 1.5567e-04 eta 3:13:30 +epoch [43/50] batch [590/1000] time 1.554 (1.566) data 0.000 (0.002) loss 1.2295 (1.0727) acc 62.5000 (72.8231) lr 1.5567e-04 eta 3:13:21 +epoch [43/50] batch [595/1000] time 1.558 (1.566) data 0.001 (0.002) loss 1.0693 (1.0730) acc 75.0000 (72.8256) lr 1.5567e-04 eta 3:13:15 +epoch [43/50] batch [600/1000] time 1.571 (1.566) data 0.001 (0.002) loss 1.1104 (1.0724) acc 71.8750 (72.8594) lr 1.5567e-04 eta 3:13:06 +epoch [43/50] batch [605/1000] time 1.589 (1.566) data 0.001 (0.002) loss 0.7085 (1.0723) acc 87.5000 (72.9236) lr 1.5567e-04 eta 3:12:58 +epoch [43/50] batch [610/1000] time 1.554 (1.566) data 0.000 (0.002) loss 1.0400 (1.0731) acc 75.0000 (72.9047) lr 1.5567e-04 eta 3:12:50 +epoch [43/50] batch [615/1000] time 1.556 (1.566) data 0.000 (0.002) loss 0.8462 (1.0740) acc 84.3750 (72.9065) lr 1.5567e-04 eta 3:12:42 +epoch [43/50] batch [620/1000] time 1.530 (1.566) data 0.001 (0.002) loss 1.1865 (1.0748) acc 78.1250 (72.9032) lr 1.5567e-04 eta 3:12:34 +epoch [43/50] batch [625/1000] time 1.558 (1.566) data 0.001 (0.002) loss 0.9341 (1.0753) acc 81.2500 (72.9100) lr 1.5567e-04 eta 3:12:25 +epoch [43/50] batch [630/1000] time 1.544 (1.565) data 0.001 (0.002) loss 1.0020 (1.0760) acc 78.1250 (72.8869) lr 1.5567e-04 eta 3:12:17 +epoch [43/50] batch [635/1000] time 1.560 (1.565) data 0.001 (0.002) loss 0.8271 (1.0762) acc 81.2500 (72.8986) lr 1.5567e-04 eta 3:12:08 +epoch [43/50] batch [640/1000] time 1.574 (1.566) data 0.000 (0.002) loss 0.9307 (1.0755) acc 75.0000 (72.9150) lr 1.5567e-04 eta 3:12:02 +epoch [43/50] batch [645/1000] time 1.564 (1.566) data 0.000 (0.002) loss 1.0576 (1.0755) acc 71.8750 (72.8828) lr 1.5567e-04 eta 3:11:54 +epoch [43/50] batch [650/1000] time 1.561 (1.565) data 0.001 (0.002) loss 0.8662 (1.0755) acc 75.0000 (72.8750) lr 1.5567e-04 eta 3:11:46 +epoch [43/50] batch [655/1000] time 1.542 (1.565) data 0.000 (0.002) loss 0.3381 (1.0747) acc 90.6250 (72.8865) lr 1.5567e-04 eta 3:11:37 +epoch [43/50] batch [660/1000] time 1.566 (1.565) data 0.001 (0.002) loss 0.8735 (1.0758) acc 75.0000 (72.8551) lr 1.5567e-04 eta 3:11:29 +epoch [43/50] batch [665/1000] time 1.549 (1.565) data 0.000 (0.002) loss 1.1416 (1.0770) acc 68.7500 (72.8289) lr 1.5567e-04 eta 3:11:21 +epoch [43/50] batch [670/1000] time 1.566 (1.565) data 0.000 (0.002) loss 0.8057 (1.0759) acc 68.7500 (72.8685) lr 1.5567e-04 eta 3:11:12 +epoch [43/50] batch [675/1000] time 1.576 (1.565) data 0.001 (0.002) loss 1.1113 (1.0765) acc 81.2500 (72.8611) lr 1.5567e-04 eta 3:11:04 +epoch [43/50] batch [680/1000] time 1.567 (1.565) data 0.001 (0.002) loss 0.8472 (1.0759) acc 78.1250 (72.8585) lr 1.5567e-04 eta 3:10:56 +epoch [43/50] batch [685/1000] time 1.539 (1.565) data 0.000 (0.002) loss 1.3115 (1.0748) acc 75.0000 (72.9060) lr 1.5567e-04 eta 3:10:49 +epoch [43/50] batch [690/1000] time 1.547 (1.565) data 0.000 (0.002) loss 1.0977 (1.0755) acc 84.3750 (72.8940) lr 1.5567e-04 eta 3:10:40 +epoch [43/50] batch [695/1000] time 1.540 (1.565) data 0.000 (0.002) loss 1.1934 (1.0764) acc 62.5000 (72.8777) lr 1.5567e-04 eta 3:10:32 +epoch [43/50] batch [700/1000] time 1.569 (1.565) data 0.000 (0.002) loss 1.3818 (1.0777) acc 68.7500 (72.8438) lr 1.5567e-04 eta 3:10:24 +epoch [43/50] batch [705/1000] time 1.554 (1.565) data 0.001 (0.002) loss 0.9731 (1.0782) acc 71.8750 (72.8413) lr 1.5567e-04 eta 3:10:16 +epoch [43/50] batch [710/1000] time 1.559 (1.565) data 0.000 (0.002) loss 0.9307 (1.0793) acc 75.0000 (72.8257) lr 1.5567e-04 eta 3:10:07 +epoch [43/50] batch [715/1000] time 1.554 (1.565) data 0.000 (0.002) loss 0.7378 (1.0799) acc 84.3750 (72.8409) lr 1.5567e-04 eta 3:09:59 +epoch [43/50] batch [720/1000] time 1.552 (1.565) data 0.001 (0.002) loss 1.3232 (1.0815) acc 75.0000 (72.8125) lr 1.5567e-04 eta 3:09:51 +epoch [43/50] batch [725/1000] time 1.595 (1.565) data 0.001 (0.002) loss 0.7676 (1.0801) acc 68.7500 (72.8233) lr 1.5567e-04 eta 3:09:43 +epoch [43/50] batch [730/1000] time 1.536 (1.565) data 0.001 (0.002) loss 1.0488 (1.0812) acc 75.0000 (72.7911) lr 1.5567e-04 eta 3:09:35 +epoch [43/50] batch [735/1000] time 1.547 (1.565) data 0.001 (0.002) loss 0.7949 (1.0813) acc 81.2500 (72.8146) lr 1.5567e-04 eta 3:09:26 +epoch [43/50] batch [740/1000] time 1.575 (1.565) data 0.001 (0.002) loss 1.5742 (1.0822) acc 71.8750 (72.8041) lr 1.5567e-04 eta 3:09:18 +epoch [43/50] batch [745/1000] time 1.727 (1.565) data 0.001 (0.002) loss 0.9097 (1.0810) acc 68.7500 (72.8146) lr 1.5567e-04 eta 3:09:12 +epoch [43/50] batch [750/1000] time 1.550 (1.565) data 0.000 (0.002) loss 1.1543 (1.0810) acc 68.7500 (72.8000) lr 1.5567e-04 eta 3:09:03 +epoch [43/50] batch [755/1000] time 1.577 (1.565) data 0.000 (0.002) loss 0.9189 (1.0817) acc 81.2500 (72.7732) lr 1.5567e-04 eta 3:08:55 +epoch [43/50] batch [760/1000] time 1.558 (1.565) data 0.000 (0.002) loss 1.3809 (1.0824) acc 71.8750 (72.7508) lr 1.5567e-04 eta 3:08:47 +epoch [43/50] batch [765/1000] time 1.558 (1.565) data 0.001 (0.002) loss 0.9878 (1.0811) acc 56.2500 (72.7533) lr 1.5567e-04 eta 3:08:39 +epoch [43/50] batch [770/1000] time 1.565 (1.564) data 0.000 (0.002) loss 0.9780 (1.0810) acc 75.0000 (72.7719) lr 1.5567e-04 eta 3:08:31 +epoch [43/50] batch [775/1000] time 1.547 (1.564) data 0.001 (0.002) loss 0.9282 (1.0800) acc 81.2500 (72.8024) lr 1.5567e-04 eta 3:08:22 +epoch [43/50] batch [780/1000] time 1.555 (1.564) data 0.000 (0.002) loss 1.8867 (1.0807) acc 56.2500 (72.7965) lr 1.5567e-04 eta 3:08:14 +epoch [43/50] batch [785/1000] time 1.545 (1.564) data 0.000 (0.002) loss 1.2334 (1.0813) acc 75.0000 (72.8145) lr 1.5567e-04 eta 3:08:05 +epoch [43/50] batch [790/1000] time 1.740 (1.564) data 0.000 (0.002) loss 1.3535 (1.0828) acc 71.8750 (72.7888) lr 1.5567e-04 eta 3:07:59 +epoch [43/50] batch [795/1000] time 1.531 (1.564) data 0.001 (0.002) loss 1.1768 (1.0825) acc 71.8750 (72.7987) lr 1.5567e-04 eta 3:07:51 +epoch [43/50] batch [800/1000] time 1.563 (1.564) data 0.001 (0.002) loss 1.7637 (1.0829) acc 56.2500 (72.7852) lr 1.5567e-04 eta 3:07:43 +epoch [43/50] batch [805/1000] time 1.557 (1.564) data 0.001 (0.002) loss 1.1064 (1.0836) acc 71.8750 (72.7717) lr 1.5567e-04 eta 3:07:35 +epoch [43/50] batch [810/1000] time 1.562 (1.564) data 0.001 (0.002) loss 0.5581 (1.0821) acc 87.5000 (72.8086) lr 1.5567e-04 eta 3:07:27 +epoch [43/50] batch [815/1000] time 1.550 (1.564) data 0.000 (0.002) loss 0.6274 (1.0823) acc 81.2500 (72.7991) lr 1.5567e-04 eta 3:07:18 +epoch [43/50] batch [820/1000] time 1.564 (1.564) data 0.000 (0.002) loss 1.1162 (1.0820) acc 68.7500 (72.8087) lr 1.5567e-04 eta 3:07:10 +epoch [43/50] batch [825/1000] time 1.538 (1.564) data 0.000 (0.002) loss 1.7070 (1.0820) acc 62.5000 (72.8106) lr 1.5567e-04 eta 3:07:02 +epoch [43/50] batch [830/1000] time 1.546 (1.564) data 0.000 (0.002) loss 1.1074 (1.0829) acc 71.8750 (72.8163) lr 1.5567e-04 eta 3:06:54 +epoch [43/50] batch [835/1000] time 1.546 (1.564) data 0.001 (0.002) loss 0.6460 (1.0823) acc 78.1250 (72.8219) lr 1.5567e-04 eta 3:06:47 +epoch [43/50] batch [840/1000] time 1.548 (1.564) data 0.001 (0.002) loss 0.9424 (1.0835) acc 71.8750 (72.7827) lr 1.5567e-04 eta 3:06:39 +epoch [43/50] batch [845/1000] time 1.569 (1.564) data 0.001 (0.002) loss 1.1084 (1.0835) acc 78.1250 (72.7663) lr 1.5567e-04 eta 3:06:31 +epoch [43/50] batch [850/1000] time 1.558 (1.564) data 0.000 (0.002) loss 1.1289 (1.0839) acc 68.7500 (72.7463) lr 1.5567e-04 eta 3:06:23 +epoch [43/50] batch [855/1000] time 1.558 (1.564) data 0.000 (0.002) loss 0.9258 (1.0837) acc 68.7500 (72.7522) lr 1.5567e-04 eta 3:06:15 +epoch [43/50] batch [860/1000] time 1.536 (1.564) data 0.000 (0.002) loss 1.4688 (1.0842) acc 68.7500 (72.7326) lr 1.5567e-04 eta 3:06:08 +epoch [43/50] batch [865/1000] time 1.529 (1.564) data 0.000 (0.002) loss 1.0547 (1.0847) acc 62.5000 (72.7240) lr 1.5567e-04 eta 3:05:59 +epoch [43/50] batch [870/1000] time 1.551 (1.564) data 0.000 (0.002) loss 1.3135 (1.0846) acc 71.8750 (72.7335) lr 1.5567e-04 eta 3:05:51 +epoch [43/50] batch [875/1000] time 1.572 (1.564) data 0.000 (0.002) loss 1.0732 (1.0855) acc 75.0000 (72.7357) lr 1.5567e-04 eta 3:05:44 +epoch [43/50] batch [880/1000] time 1.566 (1.564) data 0.000 (0.002) loss 1.0322 (1.0868) acc 68.7500 (72.7202) lr 1.5567e-04 eta 3:05:35 +epoch [43/50] batch [885/1000] time 1.545 (1.564) data 0.000 (0.002) loss 1.3730 (1.0877) acc 68.7500 (72.7013) lr 1.5567e-04 eta 3:05:27 +epoch [43/50] batch [890/1000] time 1.546 (1.564) data 0.000 (0.002) loss 1.4834 (1.0884) acc 71.8750 (72.6896) lr 1.5567e-04 eta 3:05:19 +epoch [43/50] batch [895/1000] time 1.581 (1.564) data 0.001 (0.002) loss 1.2842 (1.0888) acc 59.3750 (72.6466) lr 1.5567e-04 eta 3:05:12 +epoch [43/50] batch [900/1000] time 1.557 (1.564) data 0.001 (0.002) loss 0.7769 (1.0890) acc 84.3750 (72.6354) lr 1.5567e-04 eta 3:05:05 +epoch [43/50] batch [905/1000] time 1.565 (1.564) data 0.001 (0.002) loss 1.4658 (1.0894) acc 71.8750 (72.6209) lr 1.5567e-04 eta 3:04:57 +epoch [43/50] batch [910/1000] time 1.539 (1.564) data 0.000 (0.002) loss 2.0176 (1.0911) acc 56.2500 (72.5927) lr 1.5567e-04 eta 3:04:49 +epoch [43/50] batch [915/1000] time 1.586 (1.564) data 0.000 (0.002) loss 1.6387 (1.0909) acc 62.5000 (72.5888) lr 1.5567e-04 eta 3:04:41 +epoch [43/50] batch [920/1000] time 1.569 (1.564) data 0.000 (0.002) loss 1.0820 (1.0907) acc 65.6250 (72.5951) lr 1.5567e-04 eta 3:04:33 +epoch [43/50] batch [925/1000] time 1.540 (1.564) data 0.000 (0.002) loss 0.7852 (1.0900) acc 78.1250 (72.6182) lr 1.5567e-04 eta 3:04:25 +epoch [43/50] batch [930/1000] time 1.591 (1.564) data 0.000 (0.002) loss 0.9365 (1.0895) acc 78.1250 (72.6310) lr 1.5567e-04 eta 3:04:17 +epoch [43/50] batch [935/1000] time 1.547 (1.564) data 0.001 (0.002) loss 1.0605 (1.0889) acc 65.6250 (72.6437) lr 1.5567e-04 eta 3:04:09 +epoch [43/50] batch [940/1000] time 1.570 (1.564) data 0.000 (0.002) loss 1.2471 (1.0891) acc 71.8750 (72.6363) lr 1.5567e-04 eta 3:04:02 +epoch [43/50] batch [945/1000] time 1.569 (1.564) data 0.001 (0.002) loss 0.6362 (1.0876) acc 87.5000 (72.6819) lr 1.5567e-04 eta 3:03:55 +epoch [43/50] batch [950/1000] time 1.577 (1.564) data 0.000 (0.002) loss 0.8589 (1.0878) acc 75.0000 (72.6678) lr 1.5567e-04 eta 3:03:47 +epoch [43/50] batch [955/1000] time 1.573 (1.564) data 0.000 (0.002) loss 0.7700 (1.0876) acc 75.0000 (72.6767) lr 1.5567e-04 eta 3:03:39 +epoch [43/50] batch [960/1000] time 1.549 (1.564) data 0.001 (0.002) loss 1.3359 (1.0860) acc 62.5000 (72.7018) lr 1.5567e-04 eta 3:03:31 +epoch [43/50] batch [965/1000] time 1.542 (1.564) data 0.000 (0.001) loss 1.1953 (1.0861) acc 68.7500 (72.6846) lr 1.5567e-04 eta 3:03:23 +epoch [43/50] batch [970/1000] time 1.550 (1.564) data 0.001 (0.001) loss 1.1904 (1.0858) acc 65.6250 (72.6804) lr 1.5567e-04 eta 3:03:15 +epoch [43/50] batch [975/1000] time 1.577 (1.564) data 0.001 (0.001) loss 1.5410 (1.0867) acc 68.7500 (72.6538) lr 1.5567e-04 eta 3:03:07 +epoch [43/50] batch [980/1000] time 1.580 (1.564) data 0.001 (0.001) loss 0.9536 (1.0874) acc 68.7500 (72.6212) lr 1.5567e-04 eta 3:02:59 +epoch [43/50] batch [985/1000] time 1.537 (1.564) data 0.001 (0.001) loss 0.4348 (1.0861) acc 84.3750 (72.6428) lr 1.5567e-04 eta 3:02:52 +epoch [43/50] batch [990/1000] time 1.570 (1.564) data 0.000 (0.001) loss 1.0781 (1.0865) acc 62.5000 (72.6294) lr 1.5567e-04 eta 3:02:45 +epoch [43/50] batch [995/1000] time 1.551 (1.564) data 0.000 (0.001) loss 1.2217 (1.0864) acc 65.6250 (72.6225) lr 1.5567e-04 eta 3:02:37 +epoch [43/50] batch [1000/1000] time 1.569 (1.564) data 0.000 (0.001) loss 0.6567 (1.0852) acc 71.8750 (72.6500) lr 1.2369e-04 eta 3:02:29 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,401 +* accuracy: 78.8% +* error: 21.2% +* macro_f1: 78.4% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar +epoch [44/50] batch [5/1000] time 1.558 (1.714) data 0.000 (0.212) loss 0.7197 (0.8562) acc 78.1250 (78.7500) lr 1.2369e-04 eta 3:19:52 +epoch [44/50] batch [10/1000] time 1.548 (1.640) data 0.001 (0.106) loss 1.0684 (1.0427) acc 68.7500 (74.3750) lr 1.2369e-04 eta 3:11:04 +epoch [44/50] batch [15/1000] time 1.566 (1.613) data 0.000 (0.071) loss 0.9155 (1.0370) acc 75.0000 (75.0000) lr 1.2369e-04 eta 3:07:49 +epoch [44/50] batch [20/1000] time 1.571 (1.599) data 0.001 (0.053) loss 0.4973 (1.0187) acc 87.5000 (76.0938) lr 1.2369e-04 eta 3:06:03 +epoch [44/50] batch [25/1000] time 1.566 (1.593) data 0.001 (0.043) loss 1.1953 (1.0516) acc 75.0000 (75.5000) lr 1.2369e-04 eta 3:05:10 +epoch [44/50] batch [30/1000] time 1.572 (1.588) data 0.000 (0.036) loss 1.0391 (1.0200) acc 71.8750 (75.7292) lr 1.2369e-04 eta 3:04:28 +epoch [44/50] batch [35/1000] time 1.828 (1.593) data 0.000 (0.031) loss 0.8125 (1.0475) acc 81.2500 (75.3571) lr 1.2369e-04 eta 3:04:55 +epoch [44/50] batch [40/1000] time 1.547 (1.588) data 0.000 (0.027) loss 1.1250 (1.0423) acc 68.7500 (75.3906) lr 1.2369e-04 eta 3:04:13 +epoch [44/50] batch [45/1000] time 1.546 (1.584) data 0.001 (0.024) loss 0.9331 (1.0598) acc 68.7500 (74.3750) lr 1.2369e-04 eta 3:03:36 +epoch [44/50] batch [50/1000] time 1.561 (1.582) data 0.000 (0.022) loss 0.4504 (1.0463) acc 90.6250 (74.8125) lr 1.2369e-04 eta 3:03:17 +epoch [44/50] batch [55/1000] time 1.555 (1.580) data 0.001 (0.020) loss 0.7788 (1.0446) acc 78.1250 (74.7727) lr 1.2369e-04 eta 3:02:54 +epoch [44/50] batch [60/1000] time 1.545 (1.577) data 0.001 (0.018) loss 1.1826 (1.0479) acc 71.8750 (74.2188) lr 1.2369e-04 eta 3:02:22 +epoch [44/50] batch [65/1000] time 1.531 (1.574) data 0.000 (0.017) loss 1.9619 (1.0597) acc 59.3750 (73.7981) lr 1.2369e-04 eta 3:01:56 +epoch [44/50] batch [70/1000] time 1.587 (1.573) data 0.001 (0.016) loss 0.8608 (1.0566) acc 71.8750 (73.6607) lr 1.2369e-04 eta 3:01:42 +epoch [44/50] batch [75/1000] time 1.576 (1.572) data 0.001 (0.015) loss 0.7285 (1.0400) acc 84.3750 (73.7500) lr 1.2369e-04 eta 3:01:29 +epoch [44/50] batch [80/1000] time 1.564 (1.572) data 0.000 (0.014) loss 1.1387 (1.0528) acc 81.2500 (73.7500) lr 1.2369e-04 eta 3:01:18 +epoch [44/50] batch [85/1000] time 1.544 (1.573) data 0.001 (0.013) loss 1.4854 (1.0516) acc 68.7500 (73.7868) lr 1.2369e-04 eta 3:01:17 +epoch [44/50] batch [90/1000] time 1.576 (1.572) data 0.000 (0.012) loss 1.0811 (1.0566) acc 71.8750 (73.4722) lr 1.2369e-04 eta 3:01:03 +epoch [44/50] batch [95/1000] time 1.549 (1.571) data 0.001 (0.012) loss 0.9990 (1.0635) acc 78.1250 (73.4211) lr 1.2369e-04 eta 3:00:50 +epoch [44/50] batch [100/1000] time 1.558 (1.571) data 0.000 (0.011) loss 1.3164 (1.0655) acc 62.5000 (73.4375) lr 1.2369e-04 eta 3:00:42 +epoch [44/50] batch [105/1000] time 1.560 (1.571) data 0.001 (0.011) loss 0.5371 (1.0576) acc 84.3750 (73.7500) lr 1.2369e-04 eta 3:00:35 +epoch [44/50] batch [110/1000] time 1.571 (1.571) data 0.001 (0.010) loss 1.0908 (1.0580) acc 68.7500 (73.8920) lr 1.2369e-04 eta 3:00:25 +epoch [44/50] batch [115/1000] time 1.571 (1.571) data 0.001 (0.010) loss 0.8667 (1.0615) acc 81.2500 (73.8315) lr 1.2369e-04 eta 3:00:14 +epoch [44/50] batch [120/1000] time 1.563 (1.570) data 0.001 (0.009) loss 0.7764 (1.0562) acc 78.1250 (74.1406) lr 1.2369e-04 eta 3:00:04 +epoch [44/50] batch [125/1000] time 1.547 (1.570) data 0.000 (0.009) loss 0.8521 (1.0498) acc 71.8750 (74.1500) lr 1.2369e-04 eta 2:59:52 +epoch [44/50] batch [130/1000] time 1.549 (1.569) data 0.001 (0.009) loss 1.0723 (1.0538) acc 68.7500 (73.9663) lr 1.2369e-04 eta 2:59:40 +epoch [44/50] batch [135/1000] time 1.563 (1.569) data 0.001 (0.008) loss 1.3301 (1.0548) acc 65.6250 (73.9583) lr 1.2369e-04 eta 2:59:29 +epoch [44/50] batch [140/1000] time 1.552 (1.569) data 0.001 (0.008) loss 1.8887 (1.0614) acc 65.6250 (73.9062) lr 1.2369e-04 eta 2:59:21 +epoch [44/50] batch [145/1000] time 1.551 (1.568) data 0.000 (0.008) loss 1.2461 (1.0656) acc 75.0000 (73.8793) lr 1.2369e-04 eta 2:59:10 +epoch [44/50] batch [150/1000] time 1.552 (1.568) data 0.001 (0.008) loss 1.3799 (1.0743) acc 71.8750 (73.7917) lr 1.2369e-04 eta 2:59:02 +epoch [44/50] batch [155/1000] time 1.580 (1.568) data 0.001 (0.007) loss 1.9014 (1.0945) acc 56.2500 (73.3871) lr 1.2369e-04 eta 2:58:54 +epoch [44/50] batch [160/1000] time 1.577 (1.568) data 0.000 (0.007) loss 1.1025 (1.0956) acc 65.6250 (73.2617) lr 1.2369e-04 eta 2:58:46 +epoch [44/50] batch [165/1000] time 1.561 (1.568) data 0.000 (0.007) loss 1.0566 (1.0959) acc 68.7500 (73.1818) lr 1.2369e-04 eta 2:58:39 +epoch [44/50] batch [170/1000] time 1.592 (1.568) data 0.001 (0.007) loss 1.9600 (1.1052) acc 62.5000 (73.0515) lr 1.2369e-04 eta 2:58:29 +epoch [44/50] batch [175/1000] time 1.554 (1.568) data 0.001 (0.007) loss 1.0049 (1.1042) acc 84.3750 (73.0893) lr 1.2369e-04 eta 2:58:22 +epoch [44/50] batch [180/1000] time 1.545 (1.568) data 0.000 (0.006) loss 0.7866 (1.0962) acc 84.3750 (73.3333) lr 1.2369e-04 eta 2:58:12 +epoch [44/50] batch [185/1000] time 1.722 (1.568) data 0.000 (0.006) loss 1.0654 (1.0983) acc 62.5000 (73.1588) lr 1.2369e-04 eta 2:58:06 +epoch [44/50] batch [190/1000] time 1.592 (1.568) data 0.000 (0.006) loss 1.1318 (1.0957) acc 65.6250 (73.1250) lr 1.2369e-04 eta 2:58:00 +epoch [44/50] batch [195/1000] time 1.567 (1.568) data 0.001 (0.006) loss 1.3838 (1.1035) acc 68.7500 (73.0288) lr 1.2369e-04 eta 2:57:53 +epoch [44/50] batch [200/1000] time 1.529 (1.568) data 0.000 (0.006) loss 0.7427 (1.1028) acc 90.6250 (73.0781) lr 1.2369e-04 eta 2:57:42 +epoch [44/50] batch [205/1000] time 1.563 (1.568) data 0.000 (0.006) loss 1.1113 (1.1071) acc 71.8750 (72.9878) lr 1.2369e-04 eta 2:57:33 +epoch [44/50] batch [210/1000] time 1.545 (1.568) data 0.000 (0.006) loss 1.3467 (1.1119) acc 62.5000 (72.8274) lr 1.2369e-04 eta 2:57:23 +epoch [44/50] batch [215/1000] time 1.560 (1.567) data 0.000 (0.005) loss 0.7773 (1.1082) acc 71.8750 (72.8488) lr 1.2369e-04 eta 2:57:14 +epoch [44/50] batch [220/1000] time 1.577 (1.568) data 0.000 (0.005) loss 1.3193 (1.1097) acc 75.0000 (72.8693) lr 1.2369e-04 eta 2:57:08 +epoch [44/50] batch [225/1000] time 1.549 (1.567) data 0.000 (0.005) loss 1.1729 (1.1096) acc 78.1250 (72.8194) lr 1.2369e-04 eta 2:56:58 +epoch [44/50] batch [230/1000] time 1.674 (1.568) data 0.000 (0.005) loss 0.8257 (1.1070) acc 81.2500 (72.7310) lr 1.2369e-04 eta 2:56:54 +epoch [44/50] batch [235/1000] time 1.527 (1.567) data 0.000 (0.005) loss 1.3047 (1.1046) acc 59.3750 (72.7926) lr 1.2369e-04 eta 2:56:43 +epoch [44/50] batch [240/1000] time 1.563 (1.567) data 0.001 (0.005) loss 0.9194 (1.1036) acc 75.0000 (72.8516) lr 1.2369e-04 eta 2:56:32 +epoch [44/50] batch [245/1000] time 1.555 (1.567) data 0.000 (0.005) loss 1.3232 (1.1029) acc 68.7500 (72.8571) lr 1.2369e-04 eta 2:56:24 +epoch [44/50] batch [250/1000] time 1.535 (1.567) data 0.000 (0.005) loss 0.8984 (1.0995) acc 71.8750 (72.8500) lr 1.2369e-04 eta 2:56:15 +epoch [44/50] batch [255/1000] time 1.539 (1.566) data 0.000 (0.005) loss 1.4131 (1.0958) acc 62.5000 (72.9289) lr 1.2369e-04 eta 2:56:05 +epoch [44/50] batch [260/1000] time 1.555 (1.566) data 0.001 (0.005) loss 0.6558 (1.0897) acc 78.1250 (73.1010) lr 1.2369e-04 eta 2:55:56 +epoch [44/50] batch [265/1000] time 1.543 (1.566) data 0.000 (0.004) loss 1.2891 (1.0902) acc 62.5000 (73.0425) lr 1.2369e-04 eta 2:55:48 +epoch [44/50] batch [270/1000] time 1.536 (1.566) data 0.000 (0.004) loss 1.2930 (1.0904) acc 68.7500 (73.0324) lr 1.2369e-04 eta 2:55:39 +epoch [44/50] batch [275/1000] time 1.570 (1.567) data 0.000 (0.004) loss 1.2881 (1.0901) acc 75.0000 (73.0227) lr 1.2369e-04 eta 2:55:34 +epoch [44/50] batch [280/1000] time 1.572 (1.567) data 0.000 (0.004) loss 1.1699 (1.0889) acc 71.8750 (73.0357) lr 1.2369e-04 eta 2:55:28 +epoch [44/50] batch [285/1000] time 1.564 (1.567) data 0.000 (0.004) loss 1.6885 (1.0908) acc 56.2500 (73.0044) lr 1.2369e-04 eta 2:55:19 +epoch [44/50] batch [290/1000] time 1.546 (1.566) data 0.000 (0.004) loss 0.6357 (1.0904) acc 81.2500 (72.9310) lr 1.2369e-04 eta 2:55:11 +epoch [44/50] batch [295/1000] time 1.574 (1.566) data 0.001 (0.004) loss 1.2861 (1.0909) acc 65.6250 (72.9237) lr 1.2369e-04 eta 2:55:02 +epoch [44/50] batch [300/1000] time 1.568 (1.566) data 0.000 (0.004) loss 1.3574 (1.0899) acc 65.6250 (72.9792) lr 1.2369e-04 eta 2:54:55 +epoch [44/50] batch [305/1000] time 1.588 (1.566) data 0.000 (0.004) loss 0.8345 (1.0872) acc 75.0000 (72.9508) lr 1.2369e-04 eta 2:54:47 +epoch [44/50] batch [310/1000] time 1.543 (1.566) data 0.000 (0.004) loss 0.8140 (1.0833) acc 78.1250 (73.0343) lr 1.2369e-04 eta 2:54:38 +epoch [44/50] batch [315/1000] time 1.559 (1.566) data 0.001 (0.004) loss 0.6577 (1.0808) acc 78.1250 (73.0655) lr 1.2369e-04 eta 2:54:30 +epoch [44/50] batch [320/1000] time 1.569 (1.566) data 0.000 (0.004) loss 0.7441 (1.0815) acc 78.1250 (73.1055) lr 1.2369e-04 eta 2:54:21 +epoch [44/50] batch [325/1000] time 1.536 (1.566) data 0.001 (0.004) loss 1.0029 (1.0811) acc 75.0000 (73.1154) lr 1.2369e-04 eta 2:54:11 +epoch [44/50] batch [330/1000] time 1.549 (1.566) data 0.000 (0.004) loss 0.8027 (1.0778) acc 81.2500 (73.2386) lr 1.2369e-04 eta 2:54:04 +epoch [44/50] batch [335/1000] time 1.565 (1.566) data 0.000 (0.004) loss 1.0186 (1.0787) acc 75.0000 (73.2183) lr 1.2369e-04 eta 2:53:57 +epoch [44/50] batch [340/1000] time 1.567 (1.566) data 0.000 (0.004) loss 0.8145 (1.0786) acc 84.3750 (73.2353) lr 1.2369e-04 eta 2:53:51 +epoch [44/50] batch [345/1000] time 1.549 (1.566) data 0.001 (0.004) loss 0.5708 (1.0786) acc 84.3750 (73.2518) lr 1.2369e-04 eta 2:53:43 +epoch [44/50] batch [350/1000] time 1.562 (1.566) data 0.000 (0.003) loss 1.5732 (1.0787) acc 65.6250 (73.2143) lr 1.2369e-04 eta 2:53:34 +epoch [44/50] batch [355/1000] time 1.550 (1.566) data 0.000 (0.003) loss 1.5527 (1.0830) acc 62.5000 (73.0898) lr 1.2369e-04 eta 2:53:25 +epoch [44/50] batch [360/1000] time 1.551 (1.566) data 0.000 (0.003) loss 0.6025 (1.0809) acc 84.3750 (73.1424) lr 1.2369e-04 eta 2:53:17 +epoch [44/50] batch [365/1000] time 1.555 (1.566) data 0.000 (0.003) loss 1.8154 (1.0789) acc 53.1250 (73.2021) lr 1.2369e-04 eta 2:53:10 +epoch [44/50] batch [370/1000] time 1.555 (1.566) data 0.000 (0.003) loss 1.1211 (1.0788) acc 71.8750 (73.1503) lr 1.2369e-04 eta 2:53:01 +epoch [44/50] batch [375/1000] time 1.575 (1.566) data 0.001 (0.003) loss 0.7168 (1.0773) acc 75.0000 (73.1167) lr 1.2369e-04 eta 2:52:53 +epoch [44/50] batch [380/1000] time 1.556 (1.566) data 0.001 (0.003) loss 0.5327 (1.0744) acc 75.0000 (73.1497) lr 1.2369e-04 eta 2:52:45 +epoch [44/50] batch [385/1000] time 1.579 (1.566) data 0.000 (0.003) loss 0.9448 (1.0735) acc 75.0000 (73.1818) lr 1.2369e-04 eta 2:52:40 +epoch [44/50] batch [390/1000] time 1.581 (1.566) data 0.000 (0.003) loss 1.0068 (1.0735) acc 68.7500 (73.1731) lr 1.2369e-04 eta 2:52:33 +epoch [44/50] batch [395/1000] time 1.570 (1.566) data 0.000 (0.003) loss 0.8828 (1.0758) acc 81.2500 (73.1804) lr 1.2369e-04 eta 2:52:25 +epoch [44/50] batch [400/1000] time 1.524 (1.566) data 0.001 (0.003) loss 1.0684 (1.0748) acc 71.8750 (73.1641) lr 1.2369e-04 eta 2:52:16 +epoch [44/50] batch [405/1000] time 1.566 (1.566) data 0.001 (0.003) loss 1.2959 (1.0762) acc 68.7500 (73.0941) lr 1.2369e-04 eta 2:52:08 +epoch [44/50] batch [410/1000] time 1.546 (1.566) data 0.000 (0.003) loss 1.1816 (1.0783) acc 71.8750 (73.0793) lr 1.2369e-04 eta 2:52:00 +epoch [44/50] batch [415/1000] time 1.576 (1.566) data 0.000 (0.003) loss 1.4766 (1.0812) acc 62.5000 (72.9669) lr 1.2369e-04 eta 2:51:52 +epoch [44/50] batch [420/1000] time 1.577 (1.566) data 0.000 (0.003) loss 0.9702 (1.0828) acc 71.8750 (72.9539) lr 1.2369e-04 eta 2:51:45 +epoch [44/50] batch [425/1000] time 1.555 (1.566) data 0.000 (0.003) loss 0.6406 (1.0822) acc 87.5000 (72.9044) lr 1.2369e-04 eta 2:51:39 +epoch [44/50] batch [430/1000] time 1.567 (1.566) data 0.000 (0.003) loss 1.6250 (1.0832) acc 65.6250 (72.8997) lr 1.2369e-04 eta 2:51:31 +epoch [44/50] batch [435/1000] time 1.539 (1.566) data 0.000 (0.003) loss 1.2207 (1.0815) acc 68.7500 (72.9239) lr 1.2369e-04 eta 2:51:22 +epoch [44/50] batch [440/1000] time 1.543 (1.566) data 0.000 (0.003) loss 1.3496 (1.0819) acc 71.8750 (72.9332) lr 1.2369e-04 eta 2:51:14 +epoch [44/50] batch [445/1000] time 1.572 (1.566) data 0.000 (0.003) loss 1.0391 (1.0797) acc 75.0000 (73.0126) lr 1.2369e-04 eta 2:51:06 +epoch [44/50] batch [450/1000] time 1.543 (1.566) data 0.000 (0.003) loss 1.0439 (1.0774) acc 78.1250 (73.0347) lr 1.2369e-04 eta 2:50:58 +epoch [44/50] batch [455/1000] time 1.581 (1.566) data 0.000 (0.003) loss 0.7998 (1.0751) acc 90.6250 (73.1044) lr 1.2369e-04 eta 2:50:50 +epoch [44/50] batch [460/1000] time 1.594 (1.566) data 0.000 (0.003) loss 1.4131 (1.0772) acc 68.7500 (73.0707) lr 1.2369e-04 eta 2:50:43 +epoch [44/50] batch [465/1000] time 1.573 (1.566) data 0.000 (0.003) loss 1.2705 (1.0779) acc 75.0000 (73.0309) lr 1.2369e-04 eta 2:50:35 +epoch [44/50] batch [470/1000] time 1.542 (1.566) data 0.001 (0.003) loss 0.4451 (1.0770) acc 84.3750 (73.0718) lr 1.2369e-04 eta 2:50:27 +epoch [44/50] batch [475/1000] time 1.548 (1.566) data 0.001 (0.003) loss 0.9551 (1.0773) acc 71.8750 (73.0789) lr 1.2369e-04 eta 2:50:19 +epoch [44/50] batch [480/1000] time 1.557 (1.566) data 0.000 (0.003) loss 0.8184 (1.0757) acc 78.1250 (73.0924) lr 1.2369e-04 eta 2:50:10 +epoch [44/50] batch [485/1000] time 1.582 (1.566) data 0.000 (0.003) loss 1.2090 (1.0754) acc 62.5000 (73.0670) lr 1.2369e-04 eta 2:50:03 +epoch [44/50] batch [490/1000] time 1.544 (1.566) data 0.001 (0.003) loss 1.1475 (1.0746) acc 75.0000 (73.0740) lr 1.2369e-04 eta 2:49:56 +epoch [44/50] batch [495/1000] time 1.558 (1.566) data 0.001 (0.003) loss 1.6816 (1.0789) acc 65.6250 (73.0177) lr 1.2369e-04 eta 2:49:48 +epoch [44/50] batch [500/1000] time 1.537 (1.566) data 0.000 (0.003) loss 0.1835 (1.0786) acc 96.8750 (73.0312) lr 1.2369e-04 eta 2:49:39 +epoch [44/50] batch [505/1000] time 1.535 (1.566) data 0.000 (0.003) loss 0.9067 (1.0791) acc 87.5000 (73.0384) lr 1.2369e-04 eta 2:49:30 +epoch [44/50] batch [510/1000] time 1.554 (1.566) data 0.000 (0.003) loss 1.1309 (1.0790) acc 68.7500 (73.0453) lr 1.2369e-04 eta 2:49:21 +epoch [44/50] batch [515/1000] time 1.581 (1.566) data 0.000 (0.003) loss 0.6934 (1.0776) acc 84.3750 (73.0643) lr 1.2369e-04 eta 2:49:13 +epoch [44/50] batch [520/1000] time 1.556 (1.566) data 0.000 (0.002) loss 1.3203 (1.0763) acc 68.7500 (73.1190) lr 1.2369e-04 eta 2:49:05 +epoch [44/50] batch [525/1000] time 1.560 (1.566) data 0.001 (0.002) loss 0.9326 (1.0756) acc 71.8750 (73.1190) lr 1.2369e-04 eta 2:48:57 +epoch [44/50] batch [530/1000] time 1.563 (1.566) data 0.001 (0.002) loss 1.3477 (1.0758) acc 65.6250 (73.0955) lr 1.2369e-04 eta 2:48:49 +epoch [44/50] batch [535/1000] time 1.558 (1.566) data 0.001 (0.002) loss 1.6279 (1.0754) acc 53.1250 (73.1192) lr 1.2369e-04 eta 2:48:42 +epoch [44/50] batch [540/1000] time 1.565 (1.566) data 0.000 (0.002) loss 0.5029 (1.0728) acc 87.5000 (73.1597) lr 1.2369e-04 eta 2:48:34 +epoch [44/50] batch [545/1000] time 1.574 (1.566) data 0.000 (0.002) loss 1.4961 (1.0717) acc 65.6250 (73.1995) lr 1.2369e-04 eta 2:48:26 +epoch [44/50] batch [550/1000] time 1.558 (1.566) data 0.000 (0.002) loss 1.2432 (1.0722) acc 71.8750 (73.1648) lr 1.2369e-04 eta 2:48:18 +epoch [44/50] batch [555/1000] time 1.551 (1.566) data 0.001 (0.002) loss 0.8657 (1.0707) acc 81.2500 (73.2151) lr 1.2369e-04 eta 2:48:09 +epoch [44/50] batch [560/1000] time 1.561 (1.566) data 0.001 (0.002) loss 1.2979 (1.0722) acc 71.8750 (73.1808) lr 1.2369e-04 eta 2:48:02 +epoch [44/50] batch [565/1000] time 1.554 (1.566) data 0.001 (0.002) loss 1.3916 (1.0724) acc 56.2500 (73.1471) lr 1.2369e-04 eta 2:47:54 +epoch [44/50] batch [570/1000] time 1.569 (1.565) data 0.000 (0.002) loss 1.3838 (1.0720) acc 68.7500 (73.1579) lr 1.2369e-04 eta 2:47:46 +epoch [44/50] batch [575/1000] time 1.701 (1.566) data 0.000 (0.002) loss 0.9048 (1.0732) acc 78.1250 (73.1087) lr 1.2369e-04 eta 2:47:39 +epoch [44/50] batch [580/1000] time 1.542 (1.565) data 0.000 (0.002) loss 0.7822 (1.0717) acc 78.1250 (73.1196) lr 1.2369e-04 eta 2:47:30 +epoch [44/50] batch [585/1000] time 1.550 (1.565) data 0.000 (0.002) loss 0.7671 (1.0705) acc 81.2500 (73.1517) lr 1.2369e-04 eta 2:47:22 +epoch [44/50] batch [590/1000] time 1.580 (1.565) data 0.000 (0.002) loss 1.2422 (1.0710) acc 75.0000 (73.1462) lr 1.2369e-04 eta 2:47:13 +epoch [44/50] batch [595/1000] time 1.597 (1.565) data 0.001 (0.002) loss 0.7041 (1.0696) acc 81.2500 (73.1775) lr 1.2369e-04 eta 2:47:06 +epoch [44/50] batch [600/1000] time 1.561 (1.565) data 0.001 (0.002) loss 0.8657 (1.0696) acc 75.0000 (73.1771) lr 1.2369e-04 eta 2:46:57 +epoch [44/50] batch [605/1000] time 1.541 (1.565) data 0.000 (0.002) loss 1.4697 (1.0689) acc 65.6250 (73.1973) lr 1.2369e-04 eta 2:46:49 +epoch [44/50] batch [610/1000] time 1.560 (1.565) data 0.000 (0.002) loss 1.2969 (1.0701) acc 68.7500 (73.1660) lr 1.2369e-04 eta 2:46:41 +epoch [44/50] batch [615/1000] time 1.552 (1.565) data 0.000 (0.002) loss 1.0088 (1.0693) acc 84.3750 (73.1707) lr 1.2369e-04 eta 2:46:33 +epoch [44/50] batch [620/1000] time 1.556 (1.565) data 0.001 (0.002) loss 1.5947 (1.0695) acc 71.8750 (73.1804) lr 1.2369e-04 eta 2:46:25 +epoch [44/50] batch [625/1000] time 1.584 (1.565) data 0.001 (0.002) loss 1.3232 (1.0711) acc 65.6250 (73.1500) lr 1.2369e-04 eta 2:46:17 +epoch [44/50] batch [630/1000] time 1.553 (1.565) data 0.001 (0.002) loss 1.0654 (1.0724) acc 68.7500 (73.1399) lr 1.2369e-04 eta 2:46:08 +epoch [44/50] batch [635/1000] time 1.552 (1.565) data 0.001 (0.002) loss 1.2939 (1.0721) acc 65.6250 (73.1102) lr 1.2369e-04 eta 2:46:00 +epoch [44/50] batch [640/1000] time 1.563 (1.565) data 0.000 (0.002) loss 1.3525 (1.0725) acc 62.5000 (73.0957) lr 1.2369e-04 eta 2:45:53 +epoch [44/50] batch [645/1000] time 1.534 (1.565) data 0.000 (0.002) loss 1.5400 (1.0743) acc 62.5000 (73.0572) lr 1.2369e-04 eta 2:45:44 +epoch [44/50] batch [650/1000] time 1.534 (1.565) data 0.001 (0.002) loss 1.2461 (1.0752) acc 62.5000 (73.0385) lr 1.2369e-04 eta 2:45:36 +epoch [44/50] batch [655/1000] time 1.575 (1.565) data 0.000 (0.002) loss 1.6924 (1.0757) acc 75.0000 (73.0534) lr 1.2369e-04 eta 2:45:28 +epoch [44/50] batch [660/1000] time 1.574 (1.565) data 0.001 (0.002) loss 0.9287 (1.0749) acc 75.0000 (73.0682) lr 1.2369e-04 eta 2:45:21 +epoch [44/50] batch [665/1000] time 1.549 (1.565) data 0.001 (0.002) loss 0.8335 (1.0745) acc 78.1250 (73.0874) lr 1.2369e-04 eta 2:45:14 +epoch [44/50] batch [670/1000] time 1.571 (1.565) data 0.001 (0.002) loss 0.5322 (1.0727) acc 87.5000 (73.1203) lr 1.2369e-04 eta 2:45:06 +epoch [44/50] batch [675/1000] time 1.555 (1.565) data 0.000 (0.002) loss 0.7983 (1.0734) acc 75.0000 (73.1111) lr 1.2369e-04 eta 2:44:57 +epoch [44/50] batch [680/1000] time 1.537 (1.565) data 0.001 (0.002) loss 0.8413 (1.0734) acc 75.0000 (73.1066) lr 1.2369e-04 eta 2:44:49 +epoch [44/50] batch [685/1000] time 1.566 (1.565) data 0.000 (0.002) loss 1.0645 (1.0722) acc 71.8750 (73.1524) lr 1.2369e-04 eta 2:44:42 +epoch [44/50] batch [690/1000] time 1.542 (1.565) data 0.001 (0.002) loss 1.1338 (1.0724) acc 71.8750 (73.1295) lr 1.2369e-04 eta 2:44:33 +epoch [44/50] batch [695/1000] time 1.543 (1.565) data 0.000 (0.002) loss 0.9980 (1.0730) acc 81.2500 (73.1250) lr 1.2369e-04 eta 2:44:25 +epoch [44/50] batch [700/1000] time 1.563 (1.565) data 0.000 (0.002) loss 0.7144 (1.0729) acc 84.3750 (73.1429) lr 1.2369e-04 eta 2:44:16 +epoch [44/50] batch [705/1000] time 1.548 (1.565) data 0.001 (0.002) loss 0.8271 (1.0742) acc 87.5000 (73.1250) lr 1.2369e-04 eta 2:44:08 +epoch [44/50] batch [710/1000] time 1.554 (1.564) data 0.001 (0.002) loss 0.7495 (1.0724) acc 87.5000 (73.1866) lr 1.2369e-04 eta 2:44:00 +epoch [44/50] batch [715/1000] time 1.552 (1.564) data 0.001 (0.002) loss 0.8882 (1.0720) acc 75.0000 (73.1906) lr 1.2369e-04 eta 2:43:52 +epoch [44/50] batch [720/1000] time 1.558 (1.564) data 0.000 (0.002) loss 0.9419 (1.0724) acc 81.2500 (73.1814) lr 1.2369e-04 eta 2:43:44 +epoch [44/50] batch [725/1000] time 1.574 (1.564) data 0.000 (0.002) loss 1.8506 (1.0726) acc 59.3750 (73.1897) lr 1.2369e-04 eta 2:43:36 +epoch [44/50] batch [730/1000] time 1.545 (1.565) data 0.000 (0.002) loss 1.0674 (1.0725) acc 71.8750 (73.1935) lr 1.2369e-04 eta 2:43:29 +epoch [44/50] batch [735/1000] time 1.570 (1.565) data 0.000 (0.002) loss 1.0732 (1.0717) acc 78.1250 (73.2100) lr 1.2369e-04 eta 2:43:21 +epoch [44/50] batch [740/1000] time 1.582 (1.564) data 0.001 (0.002) loss 1.0596 (1.0709) acc 81.2500 (73.2348) lr 1.2369e-04 eta 2:43:13 +epoch [44/50] batch [745/1000] time 1.561 (1.564) data 0.001 (0.002) loss 0.9888 (1.0701) acc 78.1250 (73.2341) lr 1.2369e-04 eta 2:43:05 +epoch [44/50] batch [750/1000] time 1.557 (1.564) data 0.001 (0.002) loss 1.2549 (1.0690) acc 78.1250 (73.2667) lr 1.2369e-04 eta 2:42:57 +epoch [44/50] batch [755/1000] time 1.561 (1.564) data 0.000 (0.002) loss 0.9575 (1.0680) acc 71.8750 (73.2947) lr 1.2369e-04 eta 2:42:49 +epoch [44/50] batch [760/1000] time 1.561 (1.564) data 0.000 (0.002) loss 1.1494 (1.0688) acc 68.7500 (73.2812) lr 1.2369e-04 eta 2:42:41 +epoch [44/50] batch [765/1000] time 1.547 (1.564) data 0.001 (0.002) loss 1.4609 (1.0685) acc 71.8750 (73.3007) lr 1.2369e-04 eta 2:42:33 +epoch [44/50] batch [770/1000] time 1.561 (1.564) data 0.001 (0.002) loss 0.7954 (1.0688) acc 81.2500 (73.2670) lr 1.2369e-04 eta 2:42:25 +epoch [44/50] batch [775/1000] time 1.574 (1.564) data 0.000 (0.002) loss 0.4668 (1.0699) acc 84.3750 (73.2379) lr 1.2369e-04 eta 2:42:17 +epoch [44/50] batch [780/1000] time 1.590 (1.564) data 0.000 (0.002) loss 1.3887 (1.0705) acc 56.2500 (73.1771) lr 1.2369e-04 eta 2:42:09 +epoch [44/50] batch [785/1000] time 1.538 (1.564) data 0.000 (0.002) loss 1.0830 (1.0701) acc 71.8750 (73.1927) lr 1.2369e-04 eta 2:42:02 +epoch [44/50] batch [790/1000] time 1.556 (1.564) data 0.001 (0.002) loss 1.3457 (1.0703) acc 71.8750 (73.1922) lr 1.2369e-04 eta 2:41:55 +epoch [44/50] batch [795/1000] time 1.562 (1.564) data 0.001 (0.002) loss 0.9717 (1.0702) acc 62.5000 (73.1722) lr 1.2369e-04 eta 2:41:47 +epoch [44/50] batch [800/1000] time 1.570 (1.564) data 0.001 (0.002) loss 0.5342 (1.0706) acc 87.5000 (73.1602) lr 1.2369e-04 eta 2:41:39 +epoch [44/50] batch [805/1000] time 1.582 (1.564) data 0.001 (0.002) loss 1.4316 (1.0704) acc 78.1250 (73.1561) lr 1.2369e-04 eta 2:41:31 +epoch [44/50] batch [810/1000] time 1.553 (1.564) data 0.000 (0.002) loss 0.9761 (1.0711) acc 71.8750 (73.1327) lr 1.2369e-04 eta 2:41:23 +epoch [44/50] batch [815/1000] time 1.566 (1.564) data 0.001 (0.002) loss 1.2686 (1.0708) acc 75.0000 (73.1442) lr 1.2369e-04 eta 2:41:14 +epoch [44/50] batch [820/1000] time 1.570 (1.564) data 0.000 (0.002) loss 0.7759 (1.0709) acc 78.1250 (73.1555) lr 1.2369e-04 eta 2:41:06 +epoch [44/50] batch [825/1000] time 1.543 (1.564) data 0.001 (0.002) loss 0.9404 (1.0709) acc 78.1250 (73.1553) lr 1.2369e-04 eta 2:40:58 +epoch [44/50] batch [830/1000] time 1.553 (1.564) data 0.000 (0.002) loss 1.0400 (1.0707) acc 68.7500 (73.1589) lr 1.2369e-04 eta 2:40:50 +epoch [44/50] batch [835/1000] time 1.571 (1.564) data 0.000 (0.002) loss 1.0127 (1.0731) acc 75.0000 (73.1250) lr 1.2369e-04 eta 2:40:44 +epoch [44/50] batch [840/1000] time 1.555 (1.564) data 0.001 (0.002) loss 0.4531 (1.0718) acc 87.5000 (73.1659) lr 1.2369e-04 eta 2:40:36 +epoch [44/50] batch [845/1000] time 1.547 (1.564) data 0.000 (0.002) loss 1.0205 (1.0728) acc 68.7500 (73.1213) lr 1.2369e-04 eta 2:40:28 +epoch [44/50] batch [850/1000] time 1.562 (1.564) data 0.001 (0.002) loss 1.6729 (1.0732) acc 62.5000 (73.1029) lr 1.2369e-04 eta 2:40:20 +epoch [44/50] batch [855/1000] time 1.536 (1.564) data 0.000 (0.002) loss 1.6260 (1.0743) acc 59.3750 (73.0775) lr 1.2369e-04 eta 2:40:12 +epoch [44/50] batch [860/1000] time 1.559 (1.564) data 0.001 (0.002) loss 1.9932 (1.0759) acc 59.3750 (73.0560) lr 1.2369e-04 eta 2:40:04 +epoch [44/50] batch [865/1000] time 1.586 (1.564) data 0.001 (0.002) loss 0.6670 (1.0754) acc 81.2500 (73.0419) lr 1.2369e-04 eta 2:39:57 +epoch [44/50] batch [870/1000] time 1.579 (1.564) data 0.001 (0.002) loss 0.5210 (1.0752) acc 81.2500 (73.0280) lr 1.2369e-04 eta 2:39:49 +epoch [44/50] batch [875/1000] time 1.555 (1.564) data 0.001 (0.002) loss 1.0840 (1.0755) acc 71.8750 (73.0143) lr 1.2369e-04 eta 2:39:41 +epoch [44/50] batch [880/1000] time 1.546 (1.565) data 0.000 (0.002) loss 1.1865 (1.0764) acc 81.2500 (73.0078) lr 1.2369e-04 eta 2:39:34 +epoch [44/50] batch [885/1000] time 1.564 (1.565) data 0.000 (0.002) loss 1.5029 (1.0773) acc 65.6250 (72.9767) lr 1.2369e-04 eta 2:39:27 +epoch [44/50] batch [890/1000] time 1.531 (1.565) data 0.000 (0.002) loss 1.4316 (1.0775) acc 68.7500 (72.9740) lr 1.2369e-04 eta 2:39:19 +epoch [44/50] batch [895/1000] time 1.558 (1.564) data 0.001 (0.002) loss 1.1211 (1.0789) acc 68.7500 (72.9539) lr 1.2369e-04 eta 2:39:11 +epoch [44/50] batch [900/1000] time 1.545 (1.564) data 0.001 (0.002) loss 0.9438 (1.0788) acc 68.7500 (72.9410) lr 1.2369e-04 eta 2:39:02 +epoch [44/50] batch [905/1000] time 1.557 (1.564) data 0.001 (0.002) loss 1.1260 (1.0779) acc 65.6250 (72.9558) lr 1.2369e-04 eta 2:38:54 +epoch [44/50] batch [910/1000] time 1.552 (1.564) data 0.000 (0.002) loss 0.7676 (1.0772) acc 75.0000 (72.9602) lr 1.2369e-04 eta 2:38:46 +epoch [44/50] batch [915/1000] time 1.567 (1.564) data 0.001 (0.002) loss 0.8892 (1.0769) acc 75.0000 (72.9781) lr 1.2369e-04 eta 2:38:39 +epoch [44/50] batch [920/1000] time 1.581 (1.564) data 0.000 (0.002) loss 1.0293 (1.0769) acc 71.8750 (72.9755) lr 1.2369e-04 eta 2:38:31 +epoch [44/50] batch [925/1000] time 1.540 (1.564) data 0.000 (0.002) loss 1.2930 (1.0763) acc 71.8750 (72.9899) lr 1.2369e-04 eta 2:38:23 +epoch [44/50] batch [930/1000] time 1.549 (1.564) data 0.000 (0.002) loss 1.2900 (1.0774) acc 68.7500 (72.9603) lr 1.2369e-04 eta 2:38:16 +epoch [44/50] batch [935/1000] time 1.528 (1.564) data 0.000 (0.002) loss 0.7261 (1.0770) acc 81.2500 (72.9746) lr 1.2369e-04 eta 2:38:08 +epoch [44/50] batch [940/1000] time 1.736 (1.565) data 0.000 (0.002) loss 1.6465 (1.0781) acc 65.6250 (72.9654) lr 1.2369e-04 eta 2:38:01 +epoch [44/50] batch [945/1000] time 1.574 (1.565) data 0.001 (0.002) loss 0.8945 (1.0777) acc 75.0000 (72.9795) lr 1.2369e-04 eta 2:37:53 +epoch [44/50] batch [950/1000] time 1.558 (1.565) data 0.001 (0.002) loss 1.0244 (1.0774) acc 84.3750 (72.9901) lr 1.2369e-04 eta 2:37:45 +epoch [44/50] batch [955/1000] time 1.580 (1.565) data 0.000 (0.002) loss 1.2900 (1.0772) acc 75.0000 (73.0039) lr 1.2369e-04 eta 2:37:37 +epoch [44/50] batch [960/1000] time 1.574 (1.565) data 0.000 (0.002) loss 0.6157 (1.0775) acc 84.3750 (73.0013) lr 1.2369e-04 eta 2:37:29 +epoch [44/50] batch [965/1000] time 1.548 (1.564) data 0.000 (0.002) loss 0.9414 (1.0763) acc 71.8750 (73.0246) lr 1.2369e-04 eta 2:37:21 +epoch [44/50] batch [970/1000] time 1.546 (1.564) data 0.000 (0.002) loss 1.4648 (1.0750) acc 65.6250 (73.0541) lr 1.2369e-04 eta 2:37:13 +epoch [44/50] batch [975/1000] time 1.568 (1.564) data 0.000 (0.002) loss 0.6934 (1.0752) acc 87.5000 (73.0609) lr 1.2369e-04 eta 2:37:05 +epoch [44/50] batch [980/1000] time 1.556 (1.564) data 0.001 (0.002) loss 0.7305 (1.0749) acc 75.0000 (73.0421) lr 1.2369e-04 eta 2:36:56 +epoch [44/50] batch [985/1000] time 1.719 (1.564) data 0.001 (0.002) loss 1.2012 (1.0741) acc 78.1250 (73.0711) lr 1.2369e-04 eta 2:36:49 +epoch [44/50] batch [990/1000] time 1.583 (1.564) data 0.001 (0.002) loss 0.8467 (1.0746) acc 78.1250 (73.0682) lr 1.2369e-04 eta 2:36:41 +epoch [44/50] batch [995/1000] time 1.552 (1.564) data 0.000 (0.002) loss 1.0645 (1.0757) acc 75.0000 (73.0685) lr 1.2369e-04 eta 2:36:33 +epoch [44/50] batch [1000/1000] time 1.555 (1.564) data 0.000 (0.002) loss 1.0498 (1.0762) acc 78.1250 (73.0625) lr 9.5173e-05 eta 2:36:25 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,380 +* accuracy: 78.8% +* error: 21.2% +* macro_f1: 78.3% +epoch [45/50] batch [5/1000] time 1.581 (1.703) data 0.001 (0.186) loss 1.6680 (1.1404) acc 62.5000 (70.0000) lr 9.5173e-05 eta 2:50:08 +epoch [45/50] batch [10/1000] time 1.550 (1.627) data 0.001 (0.093) loss 0.9707 (1.0150) acc 78.1250 (72.1875) lr 9.5173e-05 eta 2:42:24 +epoch [45/50] batch [15/1000] time 1.556 (1.605) data 0.001 (0.062) loss 0.7544 (1.0583) acc 75.0000 (71.4583) lr 9.5173e-05 eta 2:40:06 +epoch [45/50] batch [20/1000] time 1.541 (1.607) data 0.001 (0.047) loss 0.7769 (1.0566) acc 84.3750 (71.8750) lr 9.5173e-05 eta 2:40:11 +epoch [45/50] batch [25/1000] time 1.541 (1.598) data 0.001 (0.038) loss 1.1074 (1.0728) acc 71.8750 (70.8750) lr 9.5173e-05 eta 2:39:06 +epoch [45/50] batch [30/1000] time 1.568 (1.591) data 0.001 (0.031) loss 1.3604 (1.0768) acc 75.0000 (71.6667) lr 9.5173e-05 eta 2:38:20 +epoch [45/50] batch [35/1000] time 1.592 (1.588) data 0.000 (0.027) loss 1.1250 (1.0882) acc 75.0000 (71.9643) lr 9.5173e-05 eta 2:37:51 +epoch [45/50] batch [40/1000] time 1.546 (1.584) data 0.001 (0.024) loss 1.0332 (1.0786) acc 75.0000 (71.7969) lr 9.5173e-05 eta 2:37:20 +epoch [45/50] batch [45/1000] time 1.528 (1.581) data 0.000 (0.021) loss 1.1621 (1.0847) acc 75.0000 (71.7361) lr 9.5173e-05 eta 2:36:54 +epoch [45/50] batch [50/1000] time 1.568 (1.579) data 0.000 (0.019) loss 1.0537 (1.0875) acc 71.8750 (72.0000) lr 9.5173e-05 eta 2:36:34 +epoch [45/50] batch [55/1000] time 1.552 (1.577) data 0.001 (0.017) loss 1.0342 (1.0870) acc 68.7500 (71.5909) lr 9.5173e-05 eta 2:36:12 +epoch [45/50] batch [60/1000] time 1.571 (1.579) data 0.001 (0.016) loss 1.0117 (1.0860) acc 68.7500 (71.4062) lr 9.5173e-05 eta 2:36:18 +epoch [45/50] batch [65/1000] time 1.596 (1.578) data 0.001 (0.015) loss 1.7158 (1.0877) acc 62.5000 (71.6827) lr 9.5173e-05 eta 2:36:08 +epoch [45/50] batch [70/1000] time 1.569 (1.578) data 0.000 (0.014) loss 1.3359 (1.0765) acc 71.8750 (72.1429) lr 9.5173e-05 eta 2:35:55 +epoch [45/50] batch [75/1000] time 1.550 (1.577) data 0.000 (0.013) loss 1.1338 (1.0818) acc 56.2500 (71.7917) lr 9.5173e-05 eta 2:35:41 +epoch [45/50] batch [80/1000] time 1.537 (1.575) data 0.000 (0.012) loss 0.4666 (1.0711) acc 84.3750 (72.2266) lr 9.5173e-05 eta 2:35:25 +epoch [45/50] batch [85/1000] time 1.547 (1.574) data 0.001 (0.011) loss 1.0166 (1.0710) acc 81.2500 (72.3529) lr 9.5173e-05 eta 2:35:12 +epoch [45/50] batch [90/1000] time 1.549 (1.574) data 0.001 (0.011) loss 1.3906 (1.0916) acc 65.6250 (72.1181) lr 9.5173e-05 eta 2:35:01 +epoch [45/50] batch [95/1000] time 1.558 (1.573) data 0.001 (0.010) loss 0.5728 (1.0938) acc 81.2500 (72.0395) lr 9.5173e-05 eta 2:34:50 +epoch [45/50] batch [100/1000] time 1.535 (1.572) data 0.001 (0.010) loss 1.9912 (1.1012) acc 56.2500 (72.0000) lr 9.5173e-05 eta 2:34:35 +epoch [45/50] batch [105/1000] time 1.573 (1.572) data 0.001 (0.009) loss 1.3828 (1.1111) acc 65.6250 (71.7857) lr 9.5173e-05 eta 2:34:28 +epoch [45/50] batch [110/1000] time 1.569 (1.572) data 0.001 (0.009) loss 1.2480 (1.1112) acc 78.1250 (71.8466) lr 9.5173e-05 eta 2:34:18 +epoch [45/50] batch [115/1000] time 1.581 (1.572) data 0.001 (0.009) loss 1.2119 (1.1030) acc 71.8750 (72.0924) lr 9.5173e-05 eta 2:34:09 +epoch [45/50] batch [120/1000] time 1.576 (1.572) data 0.001 (0.008) loss 1.9092 (1.1095) acc 50.0000 (71.9010) lr 9.5173e-05 eta 2:34:02 +epoch [45/50] batch [125/1000] time 1.541 (1.573) data 0.000 (0.008) loss 1.0039 (1.1076) acc 78.1250 (71.9250) lr 9.5173e-05 eta 2:34:00 +epoch [45/50] batch [130/1000] time 1.543 (1.572) data 0.000 (0.008) loss 1.1396 (1.1055) acc 81.2500 (72.0913) lr 9.5173e-05 eta 2:33:49 +epoch [45/50] batch [135/1000] time 1.529 (1.572) data 0.001 (0.007) loss 1.5938 (1.1066) acc 68.7500 (72.1296) lr 9.5173e-05 eta 2:33:37 +epoch [45/50] batch [140/1000] time 1.558 (1.571) data 0.001 (0.007) loss 1.1875 (1.1088) acc 62.5000 (72.1875) lr 9.5173e-05 eta 2:33:28 +epoch [45/50] batch [145/1000] time 1.572 (1.571) data 0.001 (0.007) loss 1.0283 (1.0997) acc 81.2500 (72.5000) lr 9.5173e-05 eta 2:33:19 +epoch [45/50] batch [150/1000] time 1.568 (1.571) data 0.001 (0.007) loss 1.1514 (1.1062) acc 81.2500 (72.5208) lr 9.5173e-05 eta 2:33:10 +epoch [45/50] batch [155/1000] time 1.572 (1.570) data 0.000 (0.006) loss 1.2041 (1.1156) acc 75.0000 (72.3992) lr 9.5173e-05 eta 2:32:58 +epoch [45/50] batch [160/1000] time 1.558 (1.570) data 0.000 (0.006) loss 0.8218 (1.1140) acc 75.0000 (72.3633) lr 9.5173e-05 eta 2:32:51 +epoch [45/50] batch [165/1000] time 1.581 (1.570) data 0.000 (0.006) loss 1.5342 (1.1193) acc 75.0000 (72.2727) lr 9.5173e-05 eta 2:32:42 +epoch [45/50] batch [170/1000] time 1.575 (1.571) data 0.001 (0.006) loss 0.8960 (1.1111) acc 78.1250 (72.3897) lr 9.5173e-05 eta 2:32:40 +epoch [45/50] batch [175/1000] time 1.542 (1.571) data 0.000 (0.006) loss 1.1904 (1.1086) acc 62.5000 (72.2679) lr 9.5173e-05 eta 2:32:29 +epoch [45/50] batch [180/1000] time 1.563 (1.570) data 0.001 (0.006) loss 1.2686 (1.1051) acc 71.8750 (72.3438) lr 9.5173e-05 eta 2:32:17 +epoch [45/50] batch [185/1000] time 1.564 (1.570) data 0.000 (0.006) loss 1.2969 (1.1054) acc 68.7500 (72.2973) lr 9.5173e-05 eta 2:32:07 +epoch [45/50] batch [190/1000] time 1.549 (1.569) data 0.000 (0.005) loss 0.7148 (1.1059) acc 81.2500 (72.2862) lr 9.5173e-05 eta 2:31:57 +epoch [45/50] batch [195/1000] time 1.550 (1.569) data 0.001 (0.005) loss 1.2021 (1.1035) acc 65.6250 (72.3718) lr 9.5173e-05 eta 2:31:48 +epoch [45/50] batch [200/1000] time 1.580 (1.569) data 0.001 (0.005) loss 1.1963 (1.1035) acc 65.6250 (72.3750) lr 9.5173e-05 eta 2:31:39 +epoch [45/50] batch [205/1000] time 1.546 (1.569) data 0.000 (0.005) loss 1.7100 (1.1100) acc 56.2500 (72.2104) lr 9.5173e-05 eta 2:31:30 +epoch [45/50] batch [210/1000] time 1.717 (1.569) data 0.001 (0.005) loss 1.8086 (1.1144) acc 53.1250 (72.1577) lr 9.5173e-05 eta 2:31:26 +epoch [45/50] batch [215/1000] time 1.571 (1.569) data 0.001 (0.005) loss 1.1836 (1.1175) acc 65.6250 (72.0349) lr 9.5173e-05 eta 2:31:17 +epoch [45/50] batch [220/1000] time 1.581 (1.569) data 0.000 (0.005) loss 1.0684 (1.1141) acc 78.1250 (72.1023) lr 9.5173e-05 eta 2:31:10 +epoch [45/50] batch [225/1000] time 1.550 (1.569) data 0.001 (0.005) loss 1.4580 (1.1149) acc 59.3750 (71.9583) lr 9.5173e-05 eta 2:31:01 +epoch [45/50] batch [230/1000] time 1.561 (1.569) data 0.001 (0.005) loss 1.8867 (1.1173) acc 59.3750 (71.9429) lr 9.5173e-05 eta 2:30:55 +epoch [45/50] batch [235/1000] time 1.567 (1.570) data 0.000 (0.004) loss 1.1123 (1.1175) acc 59.3750 (71.8883) lr 9.5173e-05 eta 2:30:48 +epoch [45/50] batch [240/1000] time 1.524 (1.569) data 0.000 (0.004) loss 1.0859 (1.1166) acc 62.5000 (71.9010) lr 9.5173e-05 eta 2:30:39 +epoch [45/50] batch [245/1000] time 1.555 (1.569) data 0.000 (0.004) loss 1.2139 (1.1170) acc 78.1250 (71.9133) lr 9.5173e-05 eta 2:30:29 +epoch [45/50] batch [250/1000] time 1.565 (1.569) data 0.000 (0.004) loss 0.6128 (1.1173) acc 81.2500 (71.8750) lr 9.5173e-05 eta 2:30:19 +epoch [45/50] batch [255/1000] time 1.566 (1.569) data 0.001 (0.004) loss 0.5269 (1.1145) acc 84.3750 (71.8995) lr 9.5173e-05 eta 2:30:12 +epoch [45/50] batch [260/1000] time 1.576 (1.569) data 0.000 (0.004) loss 1.1182 (1.1122) acc 68.7500 (71.9231) lr 9.5173e-05 eta 2:30:04 +epoch [45/50] batch [265/1000] time 1.548 (1.569) data 0.000 (0.004) loss 0.9399 (1.1084) acc 75.0000 (72.0283) lr 9.5173e-05 eta 2:29:56 +epoch [45/50] batch [270/1000] time 1.574 (1.569) data 0.000 (0.004) loss 1.0039 (1.1088) acc 75.0000 (72.0602) lr 9.5173e-05 eta 2:29:47 +epoch [45/50] batch [275/1000] time 1.560 (1.569) data 0.000 (0.004) loss 1.6318 (1.1142) acc 65.6250 (72.0114) lr 9.5173e-05 eta 2:29:42 +epoch [45/50] batch [280/1000] time 1.551 (1.569) data 0.001 (0.004) loss 0.7964 (1.1128) acc 75.0000 (72.0536) lr 9.5173e-05 eta 2:29:33 +epoch [45/50] batch [285/1000] time 1.574 (1.569) data 0.001 (0.004) loss 0.4167 (1.1119) acc 87.5000 (72.0066) lr 9.5173e-05 eta 2:29:24 +epoch [45/50] batch [290/1000] time 1.593 (1.568) data 0.000 (0.004) loss 0.6846 (1.1093) acc 81.2500 (72.1121) lr 9.5173e-05 eta 2:29:15 +epoch [45/50] batch [295/1000] time 1.584 (1.569) data 0.000 (0.004) loss 0.8379 (1.1056) acc 78.1250 (72.1928) lr 9.5173e-05 eta 2:29:08 +epoch [45/50] batch [300/1000] time 1.584 (1.569) data 0.000 (0.004) loss 0.9863 (1.1006) acc 68.7500 (72.3438) lr 9.5173e-05 eta 2:29:01 +epoch [45/50] batch [305/1000] time 1.558 (1.568) data 0.000 (0.004) loss 1.1162 (1.1006) acc 68.7500 (72.2746) lr 9.5173e-05 eta 2:28:52 +epoch [45/50] batch [310/1000] time 1.578 (1.568) data 0.000 (0.003) loss 1.0459 (1.1006) acc 75.0000 (72.1774) lr 9.5173e-05 eta 2:28:43 +epoch [45/50] batch [315/1000] time 1.565 (1.568) data 0.000 (0.003) loss 1.1396 (1.0978) acc 71.8750 (72.1925) lr 9.5173e-05 eta 2:28:34 +epoch [45/50] batch [320/1000] time 1.534 (1.568) data 0.000 (0.003) loss 1.3682 (1.0988) acc 68.7500 (72.2266) lr 9.5173e-05 eta 2:28:28 +epoch [45/50] batch [325/1000] time 1.590 (1.568) data 0.000 (0.003) loss 1.2930 (1.0971) acc 68.7500 (72.2788) lr 9.5173e-05 eta 2:28:20 +epoch [45/50] batch [330/1000] time 1.545 (1.568) data 0.001 (0.003) loss 1.7881 (1.0980) acc 50.0000 (72.2538) lr 9.5173e-05 eta 2:28:12 +epoch [45/50] batch [335/1000] time 1.556 (1.568) data 0.000 (0.003) loss 0.5806 (1.0947) acc 84.3750 (72.2761) lr 9.5173e-05 eta 2:28:03 +epoch [45/50] batch [340/1000] time 1.574 (1.568) data 0.001 (0.003) loss 1.0166 (1.0926) acc 78.1250 (72.3070) lr 9.5173e-05 eta 2:27:54 +epoch [45/50] batch [345/1000] time 1.574 (1.568) data 0.000 (0.003) loss 1.0420 (1.0953) acc 75.0000 (72.2645) lr 9.5173e-05 eta 2:27:45 +epoch [45/50] batch [350/1000] time 1.562 (1.568) data 0.001 (0.003) loss 0.7041 (1.0943) acc 71.8750 (72.2679) lr 9.5173e-05 eta 2:27:37 +epoch [45/50] batch [355/1000] time 1.549 (1.568) data 0.001 (0.003) loss 1.0088 (1.0914) acc 71.8750 (72.3151) lr 9.5173e-05 eta 2:27:28 +epoch [45/50] batch [360/1000] time 1.534 (1.568) data 0.000 (0.003) loss 1.5049 (1.0932) acc 71.8750 (72.3003) lr 9.5173e-05 eta 2:27:21 +epoch [45/50] batch [365/1000] time 1.557 (1.568) data 0.001 (0.003) loss 1.0674 (1.0976) acc 71.8750 (72.2603) lr 9.5173e-05 eta 2:27:14 +epoch [45/50] batch [370/1000] time 1.554 (1.568) data 0.000 (0.003) loss 1.0127 (1.0987) acc 68.7500 (72.2213) lr 9.5173e-05 eta 2:27:07 +epoch [45/50] batch [375/1000] time 1.560 (1.568) data 0.001 (0.003) loss 1.2881 (1.0959) acc 81.2500 (72.3500) lr 9.5173e-05 eta 2:26:58 +epoch [45/50] batch [380/1000] time 1.555 (1.568) data 0.000 (0.003) loss 1.1611 (1.0988) acc 65.6250 (72.2615) lr 9.5173e-05 eta 2:26:50 +epoch [45/50] batch [385/1000] time 1.539 (1.568) data 0.000 (0.003) loss 1.3398 (1.0979) acc 65.6250 (72.3133) lr 9.5173e-05 eta 2:26:41 +epoch [45/50] batch [390/1000] time 1.556 (1.567) data 0.000 (0.003) loss 0.8521 (1.0971) acc 68.7500 (72.3638) lr 9.5173e-05 eta 2:26:33 +epoch [45/50] batch [395/1000] time 1.580 (1.567) data 0.001 (0.003) loss 0.5669 (1.0926) acc 84.3750 (72.4684) lr 9.5173e-05 eta 2:26:25 +epoch [45/50] batch [400/1000] time 1.532 (1.567) data 0.001 (0.003) loss 0.9404 (1.0925) acc 75.0000 (72.4609) lr 9.5173e-05 eta 2:26:15 +epoch [45/50] batch [405/1000] time 1.536 (1.567) data 0.001 (0.003) loss 1.6787 (1.0959) acc 53.1250 (72.4151) lr 9.5173e-05 eta 2:26:07 +epoch [45/50] batch [410/1000] time 1.568 (1.567) data 0.000 (0.003) loss 1.3252 (1.0955) acc 68.7500 (72.4771) lr 9.5173e-05 eta 2:25:58 +epoch [45/50] batch [415/1000] time 1.569 (1.567) data 0.000 (0.003) loss 1.3594 (1.0969) acc 68.7500 (72.4398) lr 9.5173e-05 eta 2:25:50 +epoch [45/50] batch [420/1000] time 1.582 (1.567) data 0.000 (0.003) loss 1.1162 (1.0987) acc 81.2500 (72.4330) lr 9.5173e-05 eta 2:25:42 +epoch [45/50] batch [425/1000] time 1.589 (1.567) data 0.001 (0.003) loss 1.4023 (1.0976) acc 62.5000 (72.4412) lr 9.5173e-05 eta 2:25:37 +epoch [45/50] batch [430/1000] time 1.564 (1.567) data 0.001 (0.003) loss 0.7847 (1.0963) acc 68.7500 (72.4055) lr 9.5173e-05 eta 2:25:29 +epoch [45/50] batch [435/1000] time 1.557 (1.567) data 0.000 (0.003) loss 1.0801 (1.0977) acc 75.0000 (72.4210) lr 9.5173e-05 eta 2:25:21 +epoch [45/50] batch [440/1000] time 1.565 (1.567) data 0.001 (0.003) loss 1.1680 (1.0988) acc 62.5000 (72.3651) lr 9.5173e-05 eta 2:25:13 +epoch [45/50] batch [445/1000] time 1.561 (1.567) data 0.000 (0.003) loss 1.0527 (1.1003) acc 81.2500 (72.3666) lr 9.5173e-05 eta 2:25:04 +epoch [45/50] batch [450/1000] time 1.585 (1.567) data 0.000 (0.003) loss 1.3643 (1.1015) acc 71.8750 (72.3750) lr 9.5173e-05 eta 2:24:56 +epoch [45/50] batch [455/1000] time 1.561 (1.567) data 0.000 (0.003) loss 0.7734 (1.0987) acc 78.1250 (72.4245) lr 9.5173e-05 eta 2:24:49 +epoch [45/50] batch [460/1000] time 1.579 (1.567) data 0.000 (0.003) loss 0.8677 (1.0975) acc 81.2500 (72.4796) lr 9.5173e-05 eta 2:24:41 +epoch [45/50] batch [465/1000] time 1.550 (1.567) data 0.001 (0.002) loss 1.4688 (1.0967) acc 71.8750 (72.5269) lr 9.5173e-05 eta 2:24:33 +epoch [45/50] batch [470/1000] time 1.569 (1.567) data 0.001 (0.002) loss 1.3037 (1.0957) acc 75.0000 (72.5598) lr 9.5173e-05 eta 2:24:26 +epoch [45/50] batch [475/1000] time 1.571 (1.567) data 0.000 (0.002) loss 1.4053 (1.0969) acc 59.3750 (72.5395) lr 9.5173e-05 eta 2:24:18 +epoch [45/50] batch [480/1000] time 1.560 (1.567) data 0.001 (0.002) loss 1.6191 (1.0989) acc 71.8750 (72.5000) lr 9.5173e-05 eta 2:24:09 +epoch [45/50] batch [485/1000] time 1.574 (1.567) data 0.000 (0.002) loss 1.1104 (1.0997) acc 68.7500 (72.4871) lr 9.5173e-05 eta 2:24:01 +epoch [45/50] batch [490/1000] time 1.576 (1.567) data 0.000 (0.002) loss 0.6826 (1.1006) acc 81.2500 (72.4872) lr 9.5173e-05 eta 2:23:53 +epoch [45/50] batch [495/1000] time 1.590 (1.567) data 0.001 (0.002) loss 0.4685 (1.0995) acc 90.6250 (72.5063) lr 9.5173e-05 eta 2:23:45 +epoch [45/50] batch [500/1000] time 1.563 (1.567) data 0.000 (0.002) loss 1.1553 (1.1022) acc 81.2500 (72.4625) lr 9.5173e-05 eta 2:23:38 +epoch [45/50] batch [505/1000] time 1.568 (1.567) data 0.000 (0.002) loss 2.0703 (1.1040) acc 62.5000 (72.4567) lr 9.5173e-05 eta 2:23:30 +epoch [45/50] batch [510/1000] time 1.551 (1.567) data 0.000 (0.002) loss 0.9141 (1.1032) acc 81.2500 (72.4755) lr 9.5173e-05 eta 2:23:22 +epoch [45/50] batch [515/1000] time 1.581 (1.567) data 0.001 (0.002) loss 0.9849 (1.1019) acc 78.1250 (72.5303) lr 9.5173e-05 eta 2:23:15 +epoch [45/50] batch [520/1000] time 1.561 (1.567) data 0.001 (0.002) loss 0.8359 (1.1004) acc 75.0000 (72.5601) lr 9.5173e-05 eta 2:23:07 +epoch [45/50] batch [525/1000] time 1.569 (1.567) data 0.001 (0.002) loss 0.7734 (1.1010) acc 75.0000 (72.5238) lr 9.5173e-05 eta 2:22:59 +epoch [45/50] batch [530/1000] time 1.562 (1.567) data 0.001 (0.002) loss 1.0322 (1.0994) acc 68.7500 (72.5413) lr 9.5173e-05 eta 2:22:52 +epoch [45/50] batch [535/1000] time 1.564 (1.567) data 0.001 (0.002) loss 1.4092 (1.0990) acc 62.5000 (72.5409) lr 9.5173e-05 eta 2:22:43 +epoch [45/50] batch [540/1000] time 1.559 (1.567) data 0.001 (0.002) loss 1.3604 (1.1005) acc 62.5000 (72.4942) lr 9.5173e-05 eta 2:22:35 +epoch [45/50] batch [545/1000] time 1.558 (1.567) data 0.001 (0.002) loss 1.4043 (1.1000) acc 62.5000 (72.5229) lr 9.5173e-05 eta 2:22:27 +epoch [45/50] batch [550/1000] time 1.586 (1.567) data 0.001 (0.002) loss 1.0986 (1.0999) acc 68.7500 (72.5057) lr 9.5173e-05 eta 2:22:20 +epoch [45/50] batch [555/1000] time 1.605 (1.567) data 0.000 (0.002) loss 0.9346 (1.0988) acc 78.1250 (72.5225) lr 9.5173e-05 eta 2:22:12 +epoch [45/50] batch [560/1000] time 1.546 (1.567) data 0.000 (0.002) loss 1.0830 (1.0974) acc 78.1250 (72.5725) lr 9.5173e-05 eta 2:22:04 +epoch [45/50] batch [565/1000] time 1.579 (1.567) data 0.001 (0.002) loss 0.9761 (1.0959) acc 75.0000 (72.6162) lr 9.5173e-05 eta 2:21:56 +epoch [45/50] batch [570/1000] time 1.536 (1.567) data 0.001 (0.002) loss 1.1133 (1.0962) acc 68.7500 (72.5822) lr 9.5173e-05 eta 2:21:48 +epoch [45/50] batch [575/1000] time 1.750 (1.567) data 0.001 (0.002) loss 0.9565 (1.0966) acc 81.2500 (72.6141) lr 9.5173e-05 eta 2:21:42 +epoch [45/50] batch [580/1000] time 1.582 (1.567) data 0.000 (0.002) loss 0.5566 (1.0954) acc 78.1250 (72.6401) lr 9.5173e-05 eta 2:21:33 +epoch [45/50] batch [585/1000] time 1.569 (1.567) data 0.001 (0.002) loss 1.4980 (1.0958) acc 71.8750 (72.6335) lr 9.5173e-05 eta 2:21:25 +epoch [45/50] batch [590/1000] time 1.567 (1.567) data 0.000 (0.002) loss 1.2031 (1.0952) acc 68.7500 (72.6483) lr 9.5173e-05 eta 2:21:17 +epoch [45/50] batch [595/1000] time 1.558 (1.567) data 0.001 (0.002) loss 0.9312 (1.0946) acc 75.0000 (72.6786) lr 9.5173e-05 eta 2:21:09 +epoch [45/50] batch [600/1000] time 1.533 (1.567) data 0.001 (0.002) loss 1.0518 (1.0959) acc 75.0000 (72.6667) lr 9.5173e-05 eta 2:21:01 +epoch [45/50] batch [605/1000] time 1.543 (1.567) data 0.001 (0.002) loss 1.0186 (1.0951) acc 75.0000 (72.6705) lr 9.5173e-05 eta 2:20:53 +epoch [45/50] batch [610/1000] time 1.535 (1.567) data 0.000 (0.002) loss 1.0156 (1.0954) acc 75.0000 (72.6588) lr 9.5173e-05 eta 2:20:44 +epoch [45/50] batch [615/1000] time 1.558 (1.567) data 0.001 (0.002) loss 0.8330 (1.0941) acc 87.5000 (72.6931) lr 9.5173e-05 eta 2:20:37 +epoch [45/50] batch [620/1000] time 1.740 (1.567) data 0.000 (0.002) loss 0.8066 (1.0933) acc 81.2500 (72.7218) lr 9.5173e-05 eta 2:20:30 +epoch [45/50] batch [625/1000] time 1.562 (1.567) data 0.001 (0.002) loss 0.8530 (1.0919) acc 78.1250 (72.7800) lr 9.5173e-05 eta 2:20:23 +epoch [45/50] batch [630/1000] time 1.562 (1.567) data 0.000 (0.002) loss 1.4600 (1.0925) acc 65.6250 (72.7976) lr 9.5173e-05 eta 2:20:15 +epoch [45/50] batch [635/1000] time 1.543 (1.567) data 0.000 (0.002) loss 0.9189 (1.0928) acc 75.0000 (72.8100) lr 9.5173e-05 eta 2:20:06 +epoch [45/50] batch [640/1000] time 1.586 (1.567) data 0.001 (0.002) loss 0.7959 (1.0921) acc 68.7500 (72.8027) lr 9.5173e-05 eta 2:19:59 +epoch [45/50] batch [645/1000] time 1.574 (1.567) data 0.000 (0.002) loss 1.1846 (1.0903) acc 71.8750 (72.8198) lr 9.5173e-05 eta 2:19:51 +epoch [45/50] batch [650/1000] time 1.570 (1.567) data 0.000 (0.002) loss 0.7476 (1.0892) acc 81.2500 (72.8558) lr 9.5173e-05 eta 2:19:43 +epoch [45/50] batch [655/1000] time 1.570 (1.567) data 0.000 (0.002) loss 0.8975 (1.0886) acc 81.2500 (72.9008) lr 9.5173e-05 eta 2:19:35 +epoch [45/50] batch [660/1000] time 1.556 (1.567) data 0.000 (0.002) loss 0.7749 (1.0871) acc 81.2500 (72.9214) lr 9.5173e-05 eta 2:19:27 +epoch [45/50] batch [665/1000] time 1.566 (1.567) data 0.000 (0.002) loss 1.0166 (1.0882) acc 84.3750 (72.9182) lr 9.5173e-05 eta 2:19:20 +epoch [45/50] batch [670/1000] time 1.574 (1.567) data 0.001 (0.002) loss 0.5889 (1.0881) acc 81.2500 (72.9151) lr 9.5173e-05 eta 2:19:12 +epoch [45/50] batch [675/1000] time 1.559 (1.567) data 0.000 (0.002) loss 1.4365 (1.0893) acc 59.3750 (72.8750) lr 9.5173e-05 eta 2:19:03 +epoch [45/50] batch [680/1000] time 1.556 (1.567) data 0.001 (0.002) loss 0.6875 (1.0887) acc 78.1250 (72.8906) lr 9.5173e-05 eta 2:18:56 +epoch [45/50] batch [685/1000] time 1.555 (1.567) data 0.000 (0.002) loss 1.2490 (1.0894) acc 65.6250 (72.8604) lr 9.5173e-05 eta 2:18:48 +epoch [45/50] batch [690/1000] time 1.581 (1.567) data 0.000 (0.002) loss 1.2939 (1.0891) acc 68.7500 (72.8759) lr 9.5173e-05 eta 2:18:40 +epoch [45/50] batch [695/1000] time 1.577 (1.567) data 0.001 (0.002) loss 0.8848 (1.0889) acc 84.3750 (72.9047) lr 9.5173e-05 eta 2:18:32 +epoch [45/50] batch [700/1000] time 1.563 (1.567) data 0.000 (0.002) loss 1.3799 (1.0896) acc 65.6250 (72.8929) lr 9.5173e-05 eta 2:18:24 +epoch [45/50] batch [705/1000] time 1.556 (1.567) data 0.000 (0.002) loss 0.7515 (1.0900) acc 84.3750 (72.8989) lr 9.5173e-05 eta 2:18:16 +epoch [45/50] batch [710/1000] time 1.547 (1.567) data 0.000 (0.002) loss 1.1758 (1.0904) acc 68.7500 (72.8873) lr 9.5173e-05 eta 2:18:07 +epoch [45/50] batch [715/1000] time 1.549 (1.567) data 0.001 (0.002) loss 1.4199 (1.0895) acc 71.8750 (72.9108) lr 9.5173e-05 eta 2:17:59 +epoch [45/50] batch [720/1000] time 1.564 (1.567) data 0.000 (0.002) loss 0.8745 (1.0873) acc 71.8750 (72.9470) lr 9.5173e-05 eta 2:17:51 +epoch [45/50] batch [725/1000] time 1.554 (1.567) data 0.000 (0.002) loss 0.6455 (1.0874) acc 75.0000 (72.9353) lr 9.5173e-05 eta 2:17:43 +epoch [45/50] batch [730/1000] time 1.573 (1.567) data 0.000 (0.002) loss 0.8721 (1.0862) acc 75.0000 (72.9666) lr 9.5173e-05 eta 2:17:36 +epoch [45/50] batch [735/1000] time 1.567 (1.567) data 0.001 (0.002) loss 1.0820 (1.0857) acc 68.7500 (72.9592) lr 9.5173e-05 eta 2:17:28 +epoch [45/50] batch [740/1000] time 1.555 (1.567) data 0.000 (0.002) loss 1.0195 (1.0861) acc 78.1250 (72.9434) lr 9.5173e-05 eta 2:17:20 +epoch [45/50] batch [745/1000] time 1.564 (1.567) data 0.000 (0.002) loss 0.6401 (1.0862) acc 78.1250 (72.9237) lr 9.5173e-05 eta 2:17:12 +epoch [45/50] batch [750/1000] time 1.551 (1.567) data 0.000 (0.002) loss 0.9985 (1.0865) acc 71.8750 (72.8958) lr 9.5173e-05 eta 2:17:04 +epoch [45/50] batch [755/1000] time 1.579 (1.567) data 0.000 (0.002) loss 1.1953 (1.0859) acc 75.0000 (72.9098) lr 9.5173e-05 eta 2:16:56 +epoch [45/50] batch [760/1000] time 1.548 (1.567) data 0.001 (0.002) loss 0.8472 (1.0860) acc 78.1250 (72.9482) lr 9.5173e-05 eta 2:16:48 +epoch [45/50] batch [765/1000] time 1.560 (1.567) data 0.000 (0.002) loss 1.3330 (1.0861) acc 68.7500 (72.9575) lr 9.5173e-05 eta 2:16:40 +epoch [45/50] batch [770/1000] time 1.560 (1.567) data 0.000 (0.002) loss 0.7871 (1.0855) acc 71.8750 (72.9667) lr 9.5173e-05 eta 2:16:32 +epoch [45/50] batch [775/1000] time 1.581 (1.567) data 0.000 (0.002) loss 1.4209 (1.0869) acc 65.6250 (72.9153) lr 9.5173e-05 eta 2:16:26 +epoch [45/50] batch [780/1000] time 1.548 (1.567) data 0.001 (0.002) loss 1.3193 (1.0864) acc 71.8750 (72.9327) lr 9.5173e-05 eta 2:16:18 +epoch [45/50] batch [785/1000] time 1.554 (1.567) data 0.000 (0.002) loss 1.4062 (1.0871) acc 62.5000 (72.8941) lr 9.5173e-05 eta 2:16:09 +epoch [45/50] batch [790/1000] time 1.554 (1.567) data 0.001 (0.002) loss 0.5688 (1.0863) acc 81.2500 (72.9153) lr 9.5173e-05 eta 2:16:01 +epoch [45/50] batch [795/1000] time 1.598 (1.567) data 0.001 (0.002) loss 1.2451 (1.0862) acc 68.7500 (72.9009) lr 9.5173e-05 eta 2:15:53 +epoch [45/50] batch [800/1000] time 1.568 (1.567) data 0.000 (0.002) loss 1.1172 (1.0862) acc 71.8750 (72.8984) lr 9.5173e-05 eta 2:15:45 +epoch [45/50] batch [805/1000] time 1.587 (1.567) data 0.001 (0.002) loss 0.8062 (1.0858) acc 81.2500 (72.9076) lr 9.5173e-05 eta 2:15:38 +epoch [45/50] batch [810/1000] time 1.560 (1.567) data 0.001 (0.002) loss 1.4795 (1.0858) acc 59.3750 (72.9090) lr 9.5173e-05 eta 2:15:30 +epoch [45/50] batch [815/1000] time 1.570 (1.567) data 0.001 (0.002) loss 0.4758 (1.0869) acc 84.3750 (72.8911) lr 9.5173e-05 eta 2:15:23 +epoch [45/50] batch [820/1000] time 1.561 (1.567) data 0.000 (0.002) loss 1.0391 (1.0876) acc 75.0000 (72.8659) lr 9.5173e-05 eta 2:15:15 +epoch [45/50] batch [825/1000] time 1.585 (1.567) data 0.001 (0.002) loss 1.4863 (1.0886) acc 65.6250 (72.8333) lr 9.5173e-05 eta 2:15:07 +epoch [45/50] batch [830/1000] time 1.559 (1.567) data 0.000 (0.002) loss 0.7520 (1.0871) acc 78.1250 (72.8652) lr 9.5173e-05 eta 2:14:59 +epoch [45/50] batch [835/1000] time 1.578 (1.567) data 0.001 (0.002) loss 1.3594 (1.0878) acc 68.7500 (72.8481) lr 9.5173e-05 eta 2:14:51 +epoch [45/50] batch [840/1000] time 1.577 (1.567) data 0.000 (0.002) loss 0.7710 (1.0866) acc 81.2500 (72.8757) lr 9.5173e-05 eta 2:14:43 +epoch [45/50] batch [845/1000] time 1.577 (1.567) data 0.001 (0.002) loss 1.5029 (1.0872) acc 71.8750 (72.8883) lr 9.5173e-05 eta 2:14:35 +epoch [45/50] batch [850/1000] time 1.560 (1.567) data 0.000 (0.002) loss 1.2158 (1.0873) acc 75.0000 (72.8750) lr 9.5173e-05 eta 2:14:28 +epoch [45/50] batch [855/1000] time 1.576 (1.567) data 0.000 (0.002) loss 0.9590 (1.0885) acc 75.0000 (72.8436) lr 9.5173e-05 eta 2:14:20 +epoch [45/50] batch [860/1000] time 1.564 (1.567) data 0.000 (0.002) loss 1.5400 (1.0909) acc 65.6250 (72.8089) lr 9.5173e-05 eta 2:14:12 +epoch [45/50] batch [865/1000] time 1.551 (1.567) data 0.000 (0.002) loss 1.0332 (1.0902) acc 71.8750 (72.8288) lr 9.5173e-05 eta 2:14:04 +epoch [45/50] batch [870/1000] time 1.562 (1.567) data 0.001 (0.002) loss 1.0771 (1.0910) acc 75.0000 (72.8233) lr 9.5173e-05 eta 2:13:56 +epoch [45/50] batch [875/1000] time 1.559 (1.566) data 0.001 (0.002) loss 0.9277 (1.0915) acc 71.8750 (72.8071) lr 9.5173e-05 eta 2:13:48 +epoch [45/50] batch [880/1000] time 1.563 (1.567) data 0.001 (0.002) loss 1.1504 (1.0907) acc 65.6250 (72.7876) lr 9.5173e-05 eta 2:13:41 +epoch [45/50] batch [885/1000] time 1.563 (1.567) data 0.001 (0.002) loss 1.1289 (1.0906) acc 81.2500 (72.7931) lr 9.5173e-05 eta 2:13:33 +epoch [45/50] batch [890/1000] time 1.563 (1.567) data 0.000 (0.002) loss 1.0811 (1.0908) acc 75.0000 (72.7949) lr 9.5173e-05 eta 2:13:25 +epoch [45/50] batch [895/1000] time 1.552 (1.567) data 0.000 (0.002) loss 0.9185 (1.0894) acc 75.0000 (72.8317) lr 9.5173e-05 eta 2:13:17 +epoch [45/50] batch [900/1000] time 1.558 (1.567) data 0.000 (0.002) loss 1.0869 (1.0894) acc 71.8750 (72.8472) lr 9.5173e-05 eta 2:13:09 +epoch [45/50] batch [905/1000] time 1.579 (1.566) data 0.000 (0.002) loss 1.1436 (1.0894) acc 68.7500 (72.8453) lr 9.5173e-05 eta 2:13:01 +epoch [45/50] batch [910/1000] time 1.559 (1.566) data 0.000 (0.002) loss 0.6616 (1.0885) acc 84.3750 (72.8743) lr 9.5173e-05 eta 2:12:53 +epoch [45/50] batch [915/1000] time 1.579 (1.567) data 0.000 (0.002) loss 1.0166 (1.0879) acc 75.0000 (72.8893) lr 9.5173e-05 eta 2:12:45 +epoch [45/50] batch [920/1000] time 1.556 (1.567) data 0.001 (0.001) loss 1.5195 (1.0883) acc 65.6250 (72.8872) lr 9.5173e-05 eta 2:12:37 +epoch [45/50] batch [925/1000] time 1.559 (1.567) data 0.001 (0.001) loss 1.4053 (1.0883) acc 68.7500 (72.8851) lr 9.5173e-05 eta 2:12:30 +epoch [45/50] batch [930/1000] time 1.573 (1.567) data 0.001 (0.001) loss 0.9141 (1.0874) acc 78.1250 (72.9066) lr 9.5173e-05 eta 2:12:22 +epoch [45/50] batch [935/1000] time 1.571 (1.567) data 0.001 (0.001) loss 1.2764 (1.0881) acc 71.8750 (72.8977) lr 9.5173e-05 eta 2:12:15 +epoch [45/50] batch [940/1000] time 1.582 (1.567) data 0.000 (0.001) loss 0.9448 (1.0885) acc 68.7500 (72.8856) lr 9.5173e-05 eta 2:12:06 +epoch [45/50] batch [945/1000] time 1.557 (1.567) data 0.001 (0.001) loss 0.9971 (1.0879) acc 62.5000 (72.8770) lr 9.5173e-05 eta 2:11:59 +epoch [45/50] batch [950/1000] time 1.561 (1.567) data 0.001 (0.001) loss 1.1270 (1.0866) acc 75.0000 (72.8947) lr 9.5173e-05 eta 2:11:51 +epoch [45/50] batch [955/1000] time 1.542 (1.566) data 0.000 (0.001) loss 1.1992 (1.0871) acc 71.8750 (72.9123) lr 9.5173e-05 eta 2:11:42 +epoch [45/50] batch [960/1000] time 1.570 (1.566) data 0.001 (0.001) loss 1.2979 (1.0866) acc 68.7500 (72.9102) lr 9.5173e-05 eta 2:11:35 +epoch [45/50] batch [965/1000] time 1.689 (1.567) data 0.000 (0.001) loss 0.5664 (1.0857) acc 78.1250 (72.8983) lr 9.5173e-05 eta 2:11:27 +epoch [45/50] batch [970/1000] time 1.558 (1.567) data 0.001 (0.001) loss 0.7197 (1.0849) acc 78.1250 (72.9059) lr 9.5173e-05 eta 2:11:19 +epoch [45/50] batch [975/1000] time 1.558 (1.567) data 0.000 (0.001) loss 1.2402 (1.0852) acc 75.0000 (72.8814) lr 9.5173e-05 eta 2:11:11 +epoch [45/50] batch [980/1000] time 1.578 (1.567) data 0.000 (0.001) loss 0.9917 (1.0838) acc 71.8750 (72.9114) lr 9.5173e-05 eta 2:11:04 +epoch [45/50] batch [985/1000] time 1.549 (1.567) data 0.001 (0.001) loss 1.5342 (1.0837) acc 59.3750 (72.9156) lr 9.5173e-05 eta 2:10:56 +epoch [45/50] batch [990/1000] time 1.547 (1.567) data 0.000 (0.001) loss 0.9365 (1.0831) acc 78.1250 (72.9198) lr 9.5173e-05 eta 2:10:48 +epoch [45/50] batch [995/1000] time 1.540 (1.566) data 0.000 (0.001) loss 0.8027 (1.0839) acc 78.1250 (72.8957) lr 9.5173e-05 eta 2:10:39 +epoch [45/50] batch [1000/1000] time 1.555 (1.566) data 0.001 (0.001) loss 0.7886 (1.0841) acc 81.2500 (72.9094) lr 7.0224e-05 eta 2:10:32 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,387 +* accuracy: 78.8% +* error: 21.2% +* macro_f1: 78.4% +epoch [46/50] batch [5/1000] time 1.531 (1.694) data 0.000 (0.194) loss 1.2939 (0.9167) acc 62.5000 (74.3750) lr 7.0224e-05 eta 2:21:00 +epoch [46/50] batch [10/1000] time 1.539 (1.624) data 0.000 (0.097) loss 1.4336 (1.0947) acc 65.6250 (73.1250) lr 7.0224e-05 eta 2:15:03 +epoch [46/50] batch [15/1000] time 1.569 (1.604) data 0.001 (0.065) loss 1.2061 (1.0596) acc 68.7500 (73.3333) lr 7.0224e-05 eta 2:13:17 +epoch [46/50] batch [20/1000] time 1.564 (1.594) data 0.000 (0.049) loss 1.2939 (1.0421) acc 62.5000 (73.1250) lr 7.0224e-05 eta 2:12:19 +epoch [46/50] batch [25/1000] time 1.565 (1.590) data 0.001 (0.039) loss 0.6582 (1.0385) acc 71.8750 (72.8750) lr 7.0224e-05 eta 2:11:48 +epoch [46/50] batch [30/1000] time 1.552 (1.584) data 0.000 (0.033) loss 1.1826 (1.0404) acc 78.1250 (73.4375) lr 7.0224e-05 eta 2:11:11 +epoch [46/50] batch [35/1000] time 1.531 (1.581) data 0.001 (0.028) loss 0.8887 (1.0768) acc 75.0000 (72.3214) lr 7.0224e-05 eta 2:10:50 +epoch [46/50] batch [40/1000] time 1.563 (1.586) data 0.000 (0.025) loss 0.9082 (1.0685) acc 78.1250 (72.8125) lr 7.0224e-05 eta 2:11:06 +epoch [46/50] batch [45/1000] time 1.552 (1.583) data 0.000 (0.022) loss 1.3330 (1.0905) acc 53.1250 (72.6389) lr 7.0224e-05 eta 2:10:43 +epoch [46/50] batch [50/1000] time 1.554 (1.582) data 0.000 (0.020) loss 1.0693 (1.0793) acc 71.8750 (72.5625) lr 7.0224e-05 eta 2:10:30 +epoch [46/50] batch [55/1000] time 1.571 (1.581) data 0.000 (0.018) loss 1.2461 (1.0707) acc 75.0000 (72.7841) lr 7.0224e-05 eta 2:10:19 +epoch [46/50] batch [60/1000] time 1.567 (1.580) data 0.001 (0.017) loss 1.2949 (1.0735) acc 65.6250 (72.4479) lr 7.0224e-05 eta 2:10:05 +epoch [46/50] batch [65/1000] time 1.577 (1.579) data 0.000 (0.015) loss 1.2676 (1.0771) acc 65.6250 (72.4519) lr 7.0224e-05 eta 2:09:52 +epoch [46/50] batch [70/1000] time 1.541 (1.578) data 0.001 (0.014) loss 0.9966 (1.0720) acc 78.1250 (72.5446) lr 7.0224e-05 eta 2:09:38 +epoch [46/50] batch [75/1000] time 1.545 (1.576) data 0.001 (0.013) loss 0.7666 (1.0717) acc 78.1250 (72.7083) lr 7.0224e-05 eta 2:09:21 +epoch [46/50] batch [80/1000] time 1.565 (1.575) data 0.000 (0.013) loss 1.6963 (1.0824) acc 65.6250 (72.6562) lr 7.0224e-05 eta 2:09:08 +epoch [46/50] batch [85/1000] time 1.567 (1.577) data 0.000 (0.012) loss 0.7827 (1.0755) acc 75.0000 (72.8309) lr 7.0224e-05 eta 2:09:09 +epoch [46/50] batch [90/1000] time 1.567 (1.576) data 0.000 (0.011) loss 1.5215 (1.0736) acc 59.3750 (72.6042) lr 7.0224e-05 eta 2:08:57 +epoch [46/50] batch [95/1000] time 1.567 (1.575) data 0.000 (0.011) loss 1.0156 (1.0746) acc 75.0000 (72.6316) lr 7.0224e-05 eta 2:08:46 +epoch [46/50] batch [100/1000] time 1.580 (1.575) data 0.000 (0.010) loss 1.0029 (1.0854) acc 71.8750 (72.5312) lr 7.0224e-05 eta 2:08:36 +epoch [46/50] batch [105/1000] time 1.546 (1.574) data 0.000 (0.010) loss 0.8853 (1.0824) acc 78.1250 (72.7083) lr 7.0224e-05 eta 2:08:25 +epoch [46/50] batch [110/1000] time 1.592 (1.574) data 0.000 (0.009) loss 1.0684 (1.0812) acc 62.5000 (72.7273) lr 7.0224e-05 eta 2:08:14 +epoch [46/50] batch [115/1000] time 1.570 (1.573) data 0.000 (0.009) loss 0.6616 (1.0790) acc 75.0000 (72.7989) lr 7.0224e-05 eta 2:08:06 +epoch [46/50] batch [120/1000] time 1.581 (1.573) data 0.000 (0.009) loss 1.6934 (1.0847) acc 65.6250 (72.6823) lr 7.0224e-05 eta 2:07:56 +epoch [46/50] batch [125/1000] time 1.549 (1.573) data 0.000 (0.008) loss 0.7949 (1.0844) acc 71.8750 (72.7000) lr 7.0224e-05 eta 2:07:47 +epoch [46/50] batch [130/1000] time 1.559 (1.573) data 0.001 (0.008) loss 1.1826 (1.0828) acc 75.0000 (72.6923) lr 7.0224e-05 eta 2:07:42 +epoch [46/50] batch [135/1000] time 1.556 (1.573) data 0.000 (0.008) loss 0.9819 (1.0799) acc 71.8750 (72.7083) lr 7.0224e-05 eta 2:07:32 +epoch [46/50] batch [140/1000] time 1.579 (1.572) data 0.000 (0.007) loss 0.5981 (1.0777) acc 78.1250 (72.7009) lr 7.0224e-05 eta 2:07:22 +epoch [46/50] batch [145/1000] time 1.563 (1.572) data 0.001 (0.007) loss 0.6846 (1.0740) acc 81.2500 (72.7155) lr 7.0224e-05 eta 2:07:12 +epoch [46/50] batch [150/1000] time 1.561 (1.571) data 0.000 (0.007) loss 0.6943 (1.0698) acc 71.8750 (72.7917) lr 7.0224e-05 eta 2:07:01 +epoch [46/50] batch [155/1000] time 1.561 (1.571) data 0.001 (0.007) loss 1.1279 (1.0749) acc 68.7500 (72.5403) lr 7.0224e-05 eta 2:06:49 +epoch [46/50] batch [160/1000] time 1.550 (1.570) data 0.001 (0.007) loss 0.9971 (1.0777) acc 84.3750 (72.5195) lr 7.0224e-05 eta 2:06:39 +epoch [46/50] batch [165/1000] time 1.564 (1.570) data 0.001 (0.006) loss 1.1582 (1.0881) acc 62.5000 (72.2538) lr 7.0224e-05 eta 2:06:29 +epoch [46/50] batch [170/1000] time 1.556 (1.569) data 0.000 (0.006) loss 0.7129 (1.0846) acc 84.3750 (72.4081) lr 7.0224e-05 eta 2:06:18 +epoch [46/50] batch [175/1000] time 1.556 (1.569) data 0.000 (0.006) loss 0.8999 (1.0790) acc 75.0000 (72.4821) lr 7.0224e-05 eta 2:06:08 +epoch [46/50] batch [180/1000] time 1.553 (1.568) data 0.000 (0.006) loss 0.4426 (1.0708) acc 87.5000 (72.7083) lr 7.0224e-05 eta 2:05:58 +epoch [46/50] batch [185/1000] time 1.558 (1.568) data 0.000 (0.006) loss 1.0918 (1.0713) acc 75.0000 (72.7027) lr 7.0224e-05 eta 2:05:48 +epoch [46/50] batch [190/1000] time 1.741 (1.568) data 0.000 (0.006) loss 1.0576 (1.0752) acc 81.2500 (72.6151) lr 7.0224e-05 eta 2:05:44 +epoch [46/50] batch [195/1000] time 1.573 (1.568) data 0.001 (0.005) loss 1.5254 (1.0776) acc 65.6250 (72.6442) lr 7.0224e-05 eta 2:05:35 +epoch [46/50] batch [200/1000] time 1.560 (1.568) data 0.000 (0.005) loss 1.3711 (1.0833) acc 62.5000 (72.5469) lr 7.0224e-05 eta 2:05:26 +epoch [46/50] batch [205/1000] time 1.540 (1.568) data 0.001 (0.005) loss 1.5234 (1.0872) acc 56.2500 (72.4390) lr 7.0224e-05 eta 2:05:16 +epoch [46/50] batch [210/1000] time 1.574 (1.568) data 0.000 (0.005) loss 0.9985 (1.0860) acc 81.2500 (72.5298) lr 7.0224e-05 eta 2:05:09 +epoch [46/50] batch [215/1000] time 1.564 (1.568) data 0.001 (0.005) loss 1.2939 (1.0868) acc 65.6250 (72.5581) lr 7.0224e-05 eta 2:05:01 +epoch [46/50] batch [220/1000] time 1.571 (1.567) data 0.000 (0.005) loss 0.5532 (1.0887) acc 87.5000 (72.4858) lr 7.0224e-05 eta 2:04:52 +epoch [46/50] batch [225/1000] time 1.576 (1.567) data 0.000 (0.005) loss 1.3242 (1.0896) acc 68.7500 (72.5556) lr 7.0224e-05 eta 2:04:44 +epoch [46/50] batch [230/1000] time 1.566 (1.567) data 0.000 (0.005) loss 1.1055 (1.0848) acc 62.5000 (72.6495) lr 7.0224e-05 eta 2:04:36 +epoch [46/50] batch [235/1000] time 1.733 (1.568) data 0.001 (0.005) loss 1.0205 (1.0831) acc 81.2500 (72.7394) lr 7.0224e-05 eta 2:04:32 +epoch [46/50] batch [240/1000] time 1.566 (1.568) data 0.001 (0.005) loss 0.7769 (1.0817) acc 84.3750 (72.8255) lr 7.0224e-05 eta 2:04:23 +epoch [46/50] batch [245/1000] time 1.568 (1.568) data 0.001 (0.004) loss 0.9380 (1.0795) acc 71.8750 (72.8444) lr 7.0224e-05 eta 2:04:15 +epoch [46/50] batch [250/1000] time 1.554 (1.568) data 0.001 (0.004) loss 0.5781 (1.0772) acc 75.0000 (72.8500) lr 7.0224e-05 eta 2:04:08 +epoch [46/50] batch [255/1000] time 1.551 (1.568) data 0.001 (0.004) loss 0.9326 (1.0762) acc 81.2500 (72.8064) lr 7.0224e-05 eta 2:03:59 +epoch [46/50] batch [260/1000] time 1.590 (1.568) data 0.001 (0.004) loss 0.7778 (1.0759) acc 75.0000 (72.8125) lr 7.0224e-05 eta 2:03:51 +epoch [46/50] batch [265/1000] time 1.551 (1.568) data 0.000 (0.004) loss 0.9019 (1.0727) acc 75.0000 (72.8656) lr 7.0224e-05 eta 2:03:43 +epoch [46/50] batch [270/1000] time 1.552 (1.568) data 0.000 (0.004) loss 1.7148 (1.0753) acc 62.5000 (72.8819) lr 7.0224e-05 eta 2:03:35 +epoch [46/50] batch [275/1000] time 1.553 (1.568) data 0.000 (0.004) loss 0.9219 (1.0716) acc 71.8750 (72.9773) lr 7.0224e-05 eta 2:03:26 +epoch [46/50] batch [280/1000] time 1.574 (1.568) data 0.001 (0.004) loss 0.7617 (1.0737) acc 68.7500 (72.8906) lr 7.0224e-05 eta 2:03:22 +epoch [46/50] batch [285/1000] time 1.574 (1.568) data 0.001 (0.004) loss 1.1973 (1.0713) acc 65.6250 (72.9167) lr 7.0224e-05 eta 2:03:14 +epoch [46/50] batch [290/1000] time 1.567 (1.568) data 0.001 (0.004) loss 1.0098 (1.0687) acc 81.2500 (73.0603) lr 7.0224e-05 eta 2:03:05 +epoch [46/50] batch [295/1000] time 1.565 (1.568) data 0.000 (0.004) loss 0.8843 (1.0689) acc 68.7500 (73.0403) lr 7.0224e-05 eta 2:02:58 +epoch [46/50] batch [300/1000] time 1.546 (1.568) data 0.000 (0.004) loss 0.6504 (1.0648) acc 84.3750 (73.2083) lr 7.0224e-05 eta 2:02:50 +epoch [46/50] batch [305/1000] time 1.559 (1.568) data 0.000 (0.004) loss 1.2578 (1.0641) acc 68.7500 (73.2480) lr 7.0224e-05 eta 2:02:41 +epoch [46/50] batch [310/1000] time 1.571 (1.568) data 0.001 (0.004) loss 0.5298 (1.0625) acc 87.5000 (73.3165) lr 7.0224e-05 eta 2:02:32 +epoch [46/50] batch [315/1000] time 1.562 (1.568) data 0.001 (0.004) loss 1.0430 (1.0622) acc 65.6250 (73.2738) lr 7.0224e-05 eta 2:02:24 +epoch [46/50] batch [320/1000] time 1.550 (1.568) data 0.001 (0.004) loss 1.1543 (1.0619) acc 68.7500 (73.2910) lr 7.0224e-05 eta 2:02:16 +epoch [46/50] batch [325/1000] time 1.560 (1.568) data 0.000 (0.003) loss 1.0576 (1.0602) acc 68.7500 (73.3077) lr 7.0224e-05 eta 2:02:08 +epoch [46/50] batch [330/1000] time 1.562 (1.567) data 0.001 (0.003) loss 1.0283 (1.0644) acc 65.6250 (73.1818) lr 7.0224e-05 eta 2:01:59 +epoch [46/50] batch [335/1000] time 1.560 (1.567) data 0.000 (0.003) loss 1.2305 (1.0668) acc 71.8750 (73.1623) lr 7.0224e-05 eta 2:01:51 +epoch [46/50] batch [340/1000] time 1.588 (1.567) data 0.001 (0.003) loss 1.0303 (1.0638) acc 78.1250 (73.2629) lr 7.0224e-05 eta 2:01:43 +epoch [46/50] batch [345/1000] time 1.554 (1.568) data 0.000 (0.003) loss 0.9258 (1.0641) acc 75.0000 (73.2971) lr 7.0224e-05 eta 2:01:37 +epoch [46/50] batch [350/1000] time 1.538 (1.567) data 0.001 (0.003) loss 1.0605 (1.0614) acc 68.7500 (73.2857) lr 7.0224e-05 eta 2:01:28 +epoch [46/50] batch [355/1000] time 1.582 (1.567) data 0.001 (0.003) loss 0.7275 (1.0619) acc 78.1250 (73.2746) lr 7.0224e-05 eta 2:01:19 +epoch [46/50] batch [360/1000] time 1.551 (1.567) data 0.001 (0.003) loss 0.7979 (1.0616) acc 71.8750 (73.2552) lr 7.0224e-05 eta 2:01:12 +epoch [46/50] batch [365/1000] time 1.564 (1.567) data 0.001 (0.003) loss 0.9834 (1.0626) acc 78.1250 (73.2620) lr 7.0224e-05 eta 2:01:05 +epoch [46/50] batch [370/1000] time 1.534 (1.567) data 0.000 (0.003) loss 0.7295 (1.0624) acc 78.1250 (73.2601) lr 7.0224e-05 eta 2:00:56 +epoch [46/50] batch [375/1000] time 1.558 (1.567) data 0.001 (0.003) loss 0.7485 (1.0612) acc 81.2500 (73.2917) lr 7.0224e-05 eta 2:00:47 +epoch [46/50] batch [380/1000] time 1.550 (1.567) data 0.001 (0.003) loss 0.6655 (1.0620) acc 90.6250 (73.2319) lr 7.0224e-05 eta 2:00:39 +epoch [46/50] batch [385/1000] time 1.550 (1.567) data 0.000 (0.003) loss 0.9028 (1.0609) acc 78.1250 (73.2873) lr 7.0224e-05 eta 2:00:31 +epoch [46/50] batch [390/1000] time 1.558 (1.567) data 0.001 (0.003) loss 1.2686 (1.0614) acc 65.6250 (73.2372) lr 7.0224e-05 eta 2:00:24 +epoch [46/50] batch [395/1000] time 1.573 (1.567) data 0.000 (0.003) loss 0.8325 (1.0615) acc 81.2500 (73.2278) lr 7.0224e-05 eta 2:00:16 +epoch [46/50] batch [400/1000] time 1.540 (1.567) data 0.001 (0.003) loss 1.2861 (1.0617) acc 68.7500 (73.1875) lr 7.0224e-05 eta 2:00:08 +epoch [46/50] batch [405/1000] time 1.555 (1.567) data 0.000 (0.003) loss 1.1191 (1.0610) acc 71.8750 (73.2176) lr 7.0224e-05 eta 2:00:00 +epoch [46/50] batch [410/1000] time 1.574 (1.567) data 0.000 (0.003) loss 1.1895 (1.0602) acc 71.8750 (73.2698) lr 7.0224e-05 eta 1:59:52 +epoch [46/50] batch [415/1000] time 1.584 (1.567) data 0.000 (0.003) loss 1.4482 (1.0608) acc 75.0000 (73.2380) lr 7.0224e-05 eta 1:59:44 +epoch [46/50] batch [420/1000] time 1.546 (1.567) data 0.000 (0.003) loss 0.6445 (1.0603) acc 84.3750 (73.2887) lr 7.0224e-05 eta 1:59:36 +epoch [46/50] batch [425/1000] time 1.579 (1.567) data 0.000 (0.003) loss 1.2803 (1.0618) acc 81.2500 (73.2941) lr 7.0224e-05 eta 1:59:28 +epoch [46/50] batch [430/1000] time 1.573 (1.567) data 0.000 (0.003) loss 1.0205 (1.0632) acc 71.8750 (73.2485) lr 7.0224e-05 eta 1:59:21 +epoch [46/50] batch [435/1000] time 1.561 (1.567) data 0.001 (0.003) loss 1.0029 (1.0637) acc 68.7500 (73.1968) lr 7.0224e-05 eta 1:59:13 +epoch [46/50] batch [440/1000] time 1.540 (1.567) data 0.001 (0.003) loss 0.4700 (1.0627) acc 78.1250 (73.2031) lr 7.0224e-05 eta 1:59:05 +epoch [46/50] batch [445/1000] time 1.571 (1.567) data 0.000 (0.003) loss 1.3896 (1.0624) acc 65.6250 (73.2233) lr 7.0224e-05 eta 1:58:57 +epoch [46/50] batch [450/1000] time 1.576 (1.567) data 0.000 (0.003) loss 0.8862 (1.0626) acc 75.0000 (73.2500) lr 7.0224e-05 eta 1:58:49 +epoch [46/50] batch [455/1000] time 1.568 (1.567) data 0.001 (0.003) loss 1.4473 (1.0636) acc 65.6250 (73.2212) lr 7.0224e-05 eta 1:58:41 +epoch [46/50] batch [460/1000] time 1.572 (1.567) data 0.000 (0.003) loss 1.7080 (1.0636) acc 62.5000 (73.2269) lr 7.0224e-05 eta 1:58:33 +epoch [46/50] batch [465/1000] time 1.577 (1.567) data 0.000 (0.003) loss 1.4600 (1.0648) acc 71.8750 (73.2056) lr 7.0224e-05 eta 1:58:24 +epoch [46/50] batch [470/1000] time 1.578 (1.567) data 0.000 (0.003) loss 0.8691 (1.0622) acc 81.2500 (73.2713) lr 7.0224e-05 eta 1:58:17 +epoch [46/50] batch [475/1000] time 1.577 (1.567) data 0.000 (0.003) loss 0.8818 (1.0607) acc 68.7500 (73.2697) lr 7.0224e-05 eta 1:58:09 +epoch [46/50] batch [480/1000] time 1.581 (1.567) data 0.000 (0.002) loss 0.8916 (1.0580) acc 75.0000 (73.3073) lr 7.0224e-05 eta 1:58:01 +epoch [46/50] batch [485/1000] time 1.569 (1.567) data 0.000 (0.002) loss 1.0381 (1.0570) acc 68.7500 (73.3312) lr 7.0224e-05 eta 1:57:53 +epoch [46/50] batch [490/1000] time 1.554 (1.567) data 0.000 (0.002) loss 1.6338 (1.0588) acc 62.5000 (73.2972) lr 7.0224e-05 eta 1:57:45 +epoch [46/50] batch [495/1000] time 1.542 (1.567) data 0.001 (0.002) loss 1.3965 (1.0585) acc 68.7500 (73.3081) lr 7.0224e-05 eta 1:57:38 +epoch [46/50] batch [500/1000] time 1.565 (1.567) data 0.001 (0.002) loss 1.3740 (1.0615) acc 62.5000 (73.2625) lr 7.0224e-05 eta 1:57:30 +epoch [46/50] batch [505/1000] time 1.548 (1.567) data 0.000 (0.002) loss 0.7412 (1.0616) acc 81.2500 (73.2673) lr 7.0224e-05 eta 1:57:21 +epoch [46/50] batch [510/1000] time 1.561 (1.566) data 0.000 (0.002) loss 0.5215 (1.0607) acc 87.5000 (73.3211) lr 7.0224e-05 eta 1:57:13 +epoch [46/50] batch [515/1000] time 1.552 (1.566) data 0.000 (0.002) loss 0.8228 (1.0594) acc 81.2500 (73.3556) lr 7.0224e-05 eta 1:57:05 +epoch [46/50] batch [520/1000] time 1.574 (1.566) data 0.001 (0.002) loss 1.2539 (1.0603) acc 71.8750 (73.3233) lr 7.0224e-05 eta 1:56:57 +epoch [46/50] batch [525/1000] time 1.555 (1.566) data 0.001 (0.002) loss 1.3223 (1.0629) acc 68.7500 (73.2500) lr 7.0224e-05 eta 1:56:48 +epoch [46/50] batch [530/1000] time 1.546 (1.566) data 0.001 (0.002) loss 1.0059 (1.0617) acc 68.7500 (73.2488) lr 7.0224e-05 eta 1:56:40 +epoch [46/50] batch [535/1000] time 1.545 (1.566) data 0.000 (0.002) loss 1.2305 (1.0602) acc 71.8750 (73.2827) lr 7.0224e-05 eta 1:56:32 +epoch [46/50] batch [540/1000] time 1.568 (1.566) data 0.000 (0.002) loss 1.3096 (1.0604) acc 75.0000 (73.3218) lr 7.0224e-05 eta 1:56:25 +epoch [46/50] batch [545/1000] time 1.565 (1.566) data 0.000 (0.002) loss 1.4639 (1.0629) acc 68.7500 (73.2913) lr 7.0224e-05 eta 1:56:17 +epoch [46/50] batch [550/1000] time 1.565 (1.566) data 0.000 (0.002) loss 1.0957 (1.0630) acc 71.8750 (73.2614) lr 7.0224e-05 eta 1:56:09 +epoch [46/50] batch [555/1000] time 1.574 (1.566) data 0.001 (0.002) loss 1.3291 (1.0634) acc 65.6250 (73.1926) lr 7.0224e-05 eta 1:56:01 +epoch [46/50] batch [560/1000] time 1.553 (1.566) data 0.001 (0.002) loss 1.4883 (1.0640) acc 62.5000 (73.1920) lr 7.0224e-05 eta 1:55:53 +epoch [46/50] batch [565/1000] time 1.560 (1.566) data 0.001 (0.002) loss 0.9961 (1.0637) acc 78.1250 (73.2024) lr 7.0224e-05 eta 1:55:46 +epoch [46/50] batch [570/1000] time 1.558 (1.566) data 0.001 (0.002) loss 0.9834 (1.0626) acc 71.8750 (73.2292) lr 7.0224e-05 eta 1:55:38 +epoch [46/50] batch [575/1000] time 1.544 (1.566) data 0.000 (0.002) loss 0.7661 (1.0622) acc 84.3750 (73.2228) lr 7.0224e-05 eta 1:55:29 +epoch [46/50] batch [580/1000] time 1.742 (1.566) data 0.001 (0.002) loss 0.9951 (1.0620) acc 75.0000 (73.2435) lr 7.0224e-05 eta 1:55:23 +epoch [46/50] batch [585/1000] time 1.560 (1.566) data 0.001 (0.002) loss 1.3564 (1.0618) acc 59.3750 (73.2585) lr 7.0224e-05 eta 1:55:14 +epoch [46/50] batch [590/1000] time 1.564 (1.566) data 0.000 (0.002) loss 0.8369 (1.0598) acc 87.5000 (73.3316) lr 7.0224e-05 eta 1:55:06 +epoch [46/50] batch [595/1000] time 1.553 (1.566) data 0.000 (0.002) loss 0.9297 (1.0601) acc 78.1250 (73.3246) lr 7.0224e-05 eta 1:54:58 +epoch [46/50] batch [600/1000] time 1.536 (1.566) data 0.000 (0.002) loss 1.2266 (1.0619) acc 62.5000 (73.2865) lr 7.0224e-05 eta 1:54:50 +epoch [46/50] batch [605/1000] time 1.547 (1.566) data 0.001 (0.002) loss 1.9795 (1.0632) acc 56.2500 (73.2800) lr 7.0224e-05 eta 1:54:42 +epoch [46/50] batch [610/1000] time 1.538 (1.566) data 0.001 (0.002) loss 1.3994 (1.0623) acc 68.7500 (73.2941) lr 7.0224e-05 eta 1:54:33 +epoch [46/50] batch [615/1000] time 1.540 (1.566) data 0.000 (0.002) loss 1.2568 (1.0630) acc 71.8750 (73.2825) lr 7.0224e-05 eta 1:54:25 +epoch [46/50] batch [620/1000] time 1.574 (1.566) data 0.000 (0.002) loss 1.1504 (1.0629) acc 65.6250 (73.2863) lr 7.0224e-05 eta 1:54:17 +epoch [46/50] batch [625/1000] time 1.571 (1.566) data 0.001 (0.002) loss 1.8311 (1.0655) acc 65.6250 (73.2450) lr 7.0224e-05 eta 1:54:09 +epoch [46/50] batch [630/1000] time 1.577 (1.566) data 0.001 (0.002) loss 1.0234 (1.0642) acc 78.1250 (73.2837) lr 7.0224e-05 eta 1:54:01 +epoch [46/50] batch [635/1000] time 1.560 (1.566) data 0.000 (0.002) loss 1.1309 (1.0652) acc 71.8750 (73.2382) lr 7.0224e-05 eta 1:53:53 +epoch [46/50] batch [640/1000] time 1.555 (1.565) data 0.000 (0.002) loss 0.7998 (1.0662) acc 71.8750 (73.1836) lr 7.0224e-05 eta 1:53:45 +epoch [46/50] batch [645/1000] time 1.545 (1.566) data 0.000 (0.002) loss 0.8696 (1.0668) acc 78.1250 (73.1734) lr 7.0224e-05 eta 1:53:38 +epoch [46/50] batch [650/1000] time 1.555 (1.566) data 0.000 (0.002) loss 1.4668 (1.0682) acc 75.0000 (73.1538) lr 7.0224e-05 eta 1:53:30 +epoch [46/50] batch [655/1000] time 1.562 (1.565) data 0.001 (0.002) loss 0.6133 (1.0672) acc 81.2500 (73.1823) lr 7.0224e-05 eta 1:53:21 +epoch [46/50] batch [660/1000] time 1.557 (1.565) data 0.001 (0.002) loss 0.9795 (1.0670) acc 75.0000 (73.1723) lr 7.0224e-05 eta 1:53:13 +epoch [46/50] batch [665/1000] time 1.560 (1.565) data 0.001 (0.002) loss 0.7173 (1.0663) acc 78.1250 (73.1767) lr 7.0224e-05 eta 1:53:05 +epoch [46/50] batch [670/1000] time 1.562 (1.565) data 0.001 (0.002) loss 0.8096 (1.0667) acc 71.8750 (73.1343) lr 7.0224e-05 eta 1:52:57 +epoch [46/50] batch [675/1000] time 1.571 (1.565) data 0.000 (0.002) loss 0.7891 (1.0676) acc 71.8750 (73.1157) lr 7.0224e-05 eta 1:52:50 +epoch [46/50] batch [680/1000] time 1.533 (1.565) data 0.001 (0.002) loss 0.7300 (1.0671) acc 81.2500 (73.1434) lr 7.0224e-05 eta 1:52:42 +epoch [46/50] batch [685/1000] time 1.570 (1.565) data 0.001 (0.002) loss 0.4216 (1.0647) acc 90.6250 (73.2071) lr 7.0224e-05 eta 1:52:34 +epoch [46/50] batch [690/1000] time 1.555 (1.565) data 0.000 (0.002) loss 1.1143 (1.0649) acc 65.6250 (73.2111) lr 7.0224e-05 eta 1:52:27 +epoch [46/50] batch [695/1000] time 1.553 (1.565) data 0.000 (0.002) loss 1.2480 (1.0666) acc 68.7500 (73.1790) lr 7.0224e-05 eta 1:52:18 +epoch [46/50] batch [700/1000] time 1.566 (1.565) data 0.000 (0.002) loss 0.8188 (1.0659) acc 78.1250 (73.1920) lr 7.0224e-05 eta 1:52:10 +epoch [46/50] batch [705/1000] time 1.549 (1.565) data 0.001 (0.002) loss 1.3984 (1.0658) acc 65.6250 (73.2048) lr 7.0224e-05 eta 1:52:02 +epoch [46/50] batch [710/1000] time 1.561 (1.565) data 0.001 (0.002) loss 1.9766 (1.0659) acc 56.2500 (73.1998) lr 7.0224e-05 eta 1:51:54 +epoch [46/50] batch [715/1000] time 1.558 (1.565) data 0.001 (0.002) loss 1.0693 (1.0662) acc 75.0000 (73.2037) lr 7.0224e-05 eta 1:51:46 +epoch [46/50] batch [720/1000] time 1.572 (1.565) data 0.000 (0.002) loss 1.1982 (1.0665) acc 78.1250 (73.2075) lr 7.0224e-05 eta 1:51:39 +epoch [46/50] batch [725/1000] time 1.600 (1.565) data 0.001 (0.002) loss 0.9023 (1.0661) acc 78.1250 (73.2198) lr 7.0224e-05 eta 1:51:32 +epoch [46/50] batch [730/1000] time 1.553 (1.565) data 0.000 (0.002) loss 1.1309 (1.0643) acc 78.1250 (73.2620) lr 7.0224e-05 eta 1:51:24 +epoch [46/50] batch [735/1000] time 1.597 (1.566) data 0.001 (0.002) loss 1.5977 (1.0640) acc 68.7500 (73.2781) lr 7.0224e-05 eta 1:51:17 +epoch [46/50] batch [740/1000] time 1.546 (1.566) data 0.000 (0.002) loss 1.0264 (1.0633) acc 75.0000 (73.2897) lr 7.0224e-05 eta 1:51:09 +epoch [46/50] batch [745/1000] time 1.577 (1.566) data 0.000 (0.002) loss 1.1670 (1.0643) acc 65.6250 (73.2760) lr 7.0224e-05 eta 1:51:01 +epoch [46/50] batch [750/1000] time 1.540 (1.566) data 0.000 (0.002) loss 1.0146 (1.0650) acc 75.0000 (73.2500) lr 7.0224e-05 eta 1:50:53 +epoch [46/50] batch [755/1000] time 1.557 (1.565) data 0.000 (0.002) loss 0.6157 (1.0640) acc 84.3750 (73.2823) lr 7.0224e-05 eta 1:50:45 +epoch [46/50] batch [760/1000] time 1.540 (1.565) data 0.001 (0.002) loss 1.7051 (1.0654) acc 62.5000 (73.2525) lr 7.0224e-05 eta 1:50:37 +epoch [46/50] batch [765/1000] time 1.540 (1.565) data 0.000 (0.002) loss 0.8101 (1.0651) acc 78.1250 (73.2843) lr 7.0224e-05 eta 1:50:29 +epoch [46/50] batch [770/1000] time 1.542 (1.565) data 0.001 (0.002) loss 0.6074 (1.0641) acc 81.2500 (73.3076) lr 7.0224e-05 eta 1:50:21 +epoch [46/50] batch [775/1000] time 1.582 (1.565) data 0.001 (0.002) loss 1.4297 (1.0655) acc 75.0000 (73.2984) lr 7.0224e-05 eta 1:50:13 +epoch [46/50] batch [780/1000] time 1.536 (1.565) data 0.000 (0.002) loss 0.8169 (1.0653) acc 71.8750 (73.3093) lr 7.0224e-05 eta 1:50:05 +epoch [46/50] batch [785/1000] time 1.547 (1.565) data 0.001 (0.002) loss 0.8330 (1.0652) acc 81.2500 (73.3240) lr 7.0224e-05 eta 1:49:57 +epoch [46/50] batch [790/1000] time 1.559 (1.565) data 0.001 (0.002) loss 1.4453 (1.0661) acc 68.7500 (73.3070) lr 7.0224e-05 eta 1:49:50 +epoch [46/50] batch [795/1000] time 1.584 (1.566) data 0.000 (0.002) loss 2.1895 (1.0682) acc 59.3750 (73.2704) lr 7.0224e-05 eta 1:49:43 +epoch [46/50] batch [800/1000] time 1.555 (1.566) data 0.000 (0.002) loss 0.6709 (1.0683) acc 84.3750 (73.2773) lr 7.0224e-05 eta 1:49:35 +epoch [46/50] batch [805/1000] time 1.566 (1.566) data 0.000 (0.002) loss 0.3962 (1.0681) acc 90.6250 (73.2764) lr 7.0224e-05 eta 1:49:27 +epoch [46/50] batch [810/1000] time 1.545 (1.565) data 0.000 (0.002) loss 1.4854 (1.0692) acc 59.3750 (73.2446) lr 7.0224e-05 eta 1:49:19 +epoch [46/50] batch [815/1000] time 1.553 (1.565) data 0.000 (0.002) loss 1.7119 (1.0707) acc 68.7500 (73.2324) lr 7.0224e-05 eta 1:49:10 +epoch [46/50] batch [820/1000] time 1.572 (1.565) data 0.000 (0.002) loss 0.9326 (1.0694) acc 68.7500 (73.2622) lr 7.0224e-05 eta 1:49:03 +epoch [46/50] batch [825/1000] time 1.557 (1.565) data 0.000 (0.002) loss 1.2295 (1.0689) acc 68.7500 (73.2765) lr 7.0224e-05 eta 1:48:55 +epoch [46/50] batch [830/1000] time 1.561 (1.565) data 0.001 (0.002) loss 1.0107 (1.0691) acc 75.0000 (73.2681) lr 7.0224e-05 eta 1:48:47 +epoch [46/50] batch [835/1000] time 1.539 (1.565) data 0.000 (0.002) loss 1.0479 (1.0684) acc 75.0000 (73.2784) lr 7.0224e-05 eta 1:48:39 +epoch [46/50] batch [840/1000] time 1.591 (1.565) data 0.000 (0.002) loss 1.3330 (1.0684) acc 71.8750 (73.2850) lr 7.0224e-05 eta 1:48:32 +epoch [46/50] batch [845/1000] time 1.565 (1.565) data 0.000 (0.002) loss 0.7231 (1.0668) acc 75.0000 (73.3321) lr 7.0224e-05 eta 1:48:24 +epoch [46/50] batch [850/1000] time 1.543 (1.565) data 0.001 (0.002) loss 0.9917 (1.0657) acc 68.7500 (73.3309) lr 7.0224e-05 eta 1:48:15 +epoch [46/50] batch [855/1000] time 1.566 (1.565) data 0.000 (0.002) loss 1.1748 (1.0678) acc 68.7500 (73.2822) lr 7.0224e-05 eta 1:48:07 +epoch [46/50] batch [860/1000] time 1.590 (1.565) data 0.001 (0.002) loss 0.9790 (1.0682) acc 84.3750 (73.3031) lr 7.0224e-05 eta 1:48:00 +epoch [46/50] batch [865/1000] time 1.534 (1.565) data 0.000 (0.002) loss 1.0615 (1.0673) acc 81.2500 (73.3345) lr 7.0224e-05 eta 1:47:51 +epoch [46/50] batch [870/1000] time 1.542 (1.565) data 0.001 (0.002) loss 1.4102 (1.0667) acc 75.0000 (73.3621) lr 7.0224e-05 eta 1:47:43 +epoch [46/50] batch [875/1000] time 1.551 (1.565) data 0.001 (0.002) loss 1.4092 (1.0671) acc 68.7500 (73.3393) lr 7.0224e-05 eta 1:47:35 +epoch [46/50] batch [880/1000] time 1.563 (1.565) data 0.000 (0.002) loss 0.7412 (1.0653) acc 84.3750 (73.3771) lr 7.0224e-05 eta 1:47:27 +epoch [46/50] batch [885/1000] time 1.565 (1.565) data 0.000 (0.002) loss 1.1270 (1.0648) acc 75.0000 (73.3898) lr 7.0224e-05 eta 1:47:20 +epoch [46/50] batch [890/1000] time 1.538 (1.565) data 0.000 (0.002) loss 0.7070 (1.0650) acc 90.6250 (73.3883) lr 7.0224e-05 eta 1:47:12 +epoch [46/50] batch [895/1000] time 1.553 (1.565) data 0.000 (0.002) loss 0.9419 (1.0644) acc 65.6250 (73.3834) lr 7.0224e-05 eta 1:47:04 +epoch [46/50] batch [900/1000] time 1.540 (1.565) data 0.000 (0.002) loss 1.2246 (1.0654) acc 68.7500 (73.3785) lr 7.0224e-05 eta 1:46:56 +epoch [46/50] batch [905/1000] time 1.572 (1.565) data 0.001 (0.002) loss 1.1924 (1.0654) acc 78.1250 (73.3874) lr 7.0224e-05 eta 1:46:48 +epoch [46/50] batch [910/1000] time 1.590 (1.565) data 0.001 (0.002) loss 1.1162 (1.0652) acc 68.7500 (73.3654) lr 7.0224e-05 eta 1:46:40 +epoch [46/50] batch [915/1000] time 1.531 (1.565) data 0.000 (0.002) loss 1.1387 (1.0660) acc 71.8750 (73.3402) lr 7.0224e-05 eta 1:46:32 +epoch [46/50] batch [920/1000] time 1.547 (1.565) data 0.000 (0.002) loss 0.8687 (1.0660) acc 75.0000 (73.3220) lr 7.0224e-05 eta 1:46:24 +epoch [46/50] batch [925/1000] time 1.561 (1.565) data 0.000 (0.002) loss 0.9712 (1.0662) acc 75.0000 (73.3209) lr 7.0224e-05 eta 1:46:16 +epoch [46/50] batch [930/1000] time 1.560 (1.565) data 0.000 (0.002) loss 1.2949 (1.0658) acc 68.7500 (73.3333) lr 7.0224e-05 eta 1:46:08 +epoch [46/50] batch [935/1000] time 1.551 (1.565) data 0.001 (0.002) loss 0.9780 (1.0673) acc 81.2500 (73.3189) lr 7.0224e-05 eta 1:46:00 +epoch [46/50] batch [940/1000] time 1.560 (1.564) data 0.000 (0.002) loss 0.9385 (1.0669) acc 75.0000 (73.3178) lr 7.0224e-05 eta 1:45:51 +epoch [46/50] batch [945/1000] time 1.720 (1.565) data 0.000 (0.002) loss 0.8530 (1.0664) acc 68.7500 (73.3135) lr 7.0224e-05 eta 1:45:44 +epoch [46/50] batch [950/1000] time 1.523 (1.565) data 0.000 (0.002) loss 1.4492 (1.0672) acc 65.6250 (73.3191) lr 7.0224e-05 eta 1:45:36 +epoch [46/50] batch [955/1000] time 1.567 (1.565) data 0.000 (0.001) loss 0.9751 (1.0677) acc 71.8750 (73.3344) lr 7.0224e-05 eta 1:45:28 +epoch [46/50] batch [960/1000] time 1.559 (1.565) data 0.000 (0.001) loss 1.1895 (1.0678) acc 68.7500 (73.3171) lr 7.0224e-05 eta 1:45:20 +epoch [46/50] batch [965/1000] time 1.548 (1.565) data 0.001 (0.001) loss 1.3584 (1.0682) acc 62.5000 (73.2966) lr 7.0224e-05 eta 1:45:12 +epoch [46/50] batch [970/1000] time 1.541 (1.564) data 0.001 (0.001) loss 0.9287 (1.0690) acc 78.1250 (73.2796) lr 7.0224e-05 eta 1:45:04 +epoch [46/50] batch [975/1000] time 1.562 (1.564) data 0.000 (0.001) loss 1.3809 (1.0698) acc 65.6250 (73.2628) lr 7.0224e-05 eta 1:44:56 +epoch [46/50] batch [980/1000] time 1.552 (1.564) data 0.000 (0.001) loss 1.3154 (1.0707) acc 68.7500 (73.2398) lr 7.0224e-05 eta 1:44:48 +epoch [46/50] batch [985/1000] time 1.540 (1.564) data 0.001 (0.001) loss 0.5469 (1.0706) acc 81.2500 (73.2329) lr 7.0224e-05 eta 1:44:40 +epoch [46/50] batch [990/1000] time 1.735 (1.564) data 0.000 (0.001) loss 1.0830 (1.0713) acc 71.8750 (73.2197) lr 7.0224e-05 eta 1:44:33 +epoch [46/50] batch [995/1000] time 1.559 (1.564) data 0.000 (0.001) loss 2.0098 (1.0714) acc 62.5000 (73.2161) lr 7.0224e-05 eta 1:44:25 +epoch [46/50] batch [1000/1000] time 1.553 (1.564) data 0.000 (0.001) loss 0.8594 (1.0717) acc 75.0000 (73.2125) lr 4.8943e-05 eta 1:44:17 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,375 +* accuracy: 78.8% +* error: 21.2% +* macro_f1: 78.3% +epoch [47/50] batch [5/1000] time 1.574 (1.696) data 0.001 (0.185) loss 0.9556 (0.8328) acc 81.2500 (79.3750) lr 4.8943e-05 eta 1:52:53 +epoch [47/50] batch [10/1000] time 1.548 (1.633) data 0.000 (0.093) loss 1.2578 (1.0708) acc 68.7500 (74.0625) lr 4.8943e-05 eta 1:48:35 +epoch [47/50] batch [15/1000] time 1.564 (1.610) data 0.000 (0.062) loss 1.5410 (1.1118) acc 68.7500 (73.1250) lr 4.8943e-05 eta 1:46:54 +epoch [47/50] batch [20/1000] time 1.565 (1.599) data 0.000 (0.047) loss 0.6221 (1.0771) acc 78.1250 (72.9688) lr 4.8943e-05 eta 1:46:04 +epoch [47/50] batch [25/1000] time 1.563 (1.602) data 0.000 (0.037) loss 1.1914 (1.1256) acc 65.6250 (72.2500) lr 4.8943e-05 eta 1:46:07 +epoch [47/50] batch [30/1000] time 1.576 (1.595) data 0.001 (0.031) loss 1.1494 (1.1337) acc 68.7500 (72.7083) lr 4.8943e-05 eta 1:45:33 +epoch [47/50] batch [35/1000] time 1.547 (1.591) data 0.001 (0.027) loss 0.5278 (1.1105) acc 84.3750 (72.9464) lr 4.8943e-05 eta 1:45:09 +epoch [47/50] batch [40/1000] time 1.555 (1.587) data 0.001 (0.024) loss 0.9595 (1.1254) acc 71.8750 (72.5781) lr 4.8943e-05 eta 1:44:43 +epoch [47/50] batch [45/1000] time 1.542 (1.583) data 0.001 (0.021) loss 1.1377 (1.1510) acc 71.8750 (72.2917) lr 4.8943e-05 eta 1:44:21 +epoch [47/50] batch [50/1000] time 1.568 (1.581) data 0.000 (0.019) loss 1.3057 (1.1601) acc 62.5000 (72.3750) lr 4.8943e-05 eta 1:44:03 +epoch [47/50] batch [55/1000] time 1.557 (1.580) data 0.001 (0.017) loss 0.9609 (1.1580) acc 62.5000 (72.3295) lr 4.8943e-05 eta 1:43:51 +epoch [47/50] batch [60/1000] time 1.546 (1.578) data 0.001 (0.016) loss 1.1133 (1.1548) acc 71.8750 (72.3958) lr 4.8943e-05 eta 1:43:36 +epoch [47/50] batch [65/1000] time 1.568 (1.577) data 0.001 (0.015) loss 0.6362 (1.1329) acc 71.8750 (72.5962) lr 4.8943e-05 eta 1:43:26 +epoch [47/50] batch [70/1000] time 1.570 (1.575) data 0.000 (0.014) loss 1.0615 (1.1150) acc 78.1250 (72.9911) lr 4.8943e-05 eta 1:43:11 +epoch [47/50] batch [75/1000] time 1.553 (1.574) data 0.000 (0.013) loss 1.3096 (1.0900) acc 68.7500 (73.4583) lr 4.8943e-05 eta 1:42:59 +epoch [47/50] batch [80/1000] time 1.537 (1.573) data 0.000 (0.012) loss 1.2373 (1.0825) acc 65.6250 (73.4766) lr 4.8943e-05 eta 1:42:46 +epoch [47/50] batch [85/1000] time 1.553 (1.572) data 0.001 (0.011) loss 1.1445 (1.0671) acc 65.6250 (73.7868) lr 4.8943e-05 eta 1:42:35 +epoch [47/50] batch [90/1000] time 1.570 (1.573) data 0.001 (0.011) loss 1.0576 (1.0802) acc 71.8750 (73.4722) lr 4.8943e-05 eta 1:42:32 +epoch [47/50] batch [95/1000] time 1.543 (1.573) data 0.001 (0.010) loss 1.2070 (1.0759) acc 68.7500 (73.5197) lr 4.8943e-05 eta 1:42:21 +epoch [47/50] batch [100/1000] time 1.574 (1.572) data 0.000 (0.010) loss 1.1279 (1.0724) acc 75.0000 (73.5938) lr 4.8943e-05 eta 1:42:12 +epoch [47/50] batch [105/1000] time 1.574 (1.572) data 0.001 (0.009) loss 1.2666 (1.0773) acc 62.5000 (73.3929) lr 4.8943e-05 eta 1:42:04 +epoch [47/50] batch [110/1000] time 1.570 (1.572) data 0.001 (0.009) loss 0.6743 (1.0746) acc 81.2500 (73.3523) lr 4.8943e-05 eta 1:41:55 +epoch [47/50] batch [115/1000] time 1.546 (1.572) data 0.001 (0.009) loss 1.2051 (1.0737) acc 78.1250 (73.3424) lr 4.8943e-05 eta 1:41:45 +epoch [47/50] batch [120/1000] time 1.550 (1.571) data 0.000 (0.008) loss 1.2070 (1.0775) acc 71.8750 (73.3333) lr 4.8943e-05 eta 1:41:36 +epoch [47/50] batch [125/1000] time 1.606 (1.571) data 0.001 (0.008) loss 1.3145 (1.0897) acc 75.0000 (73.1500) lr 4.8943e-05 eta 1:41:27 +epoch [47/50] batch [130/1000] time 1.553 (1.571) data 0.000 (0.008) loss 1.1621 (1.0987) acc 65.6250 (73.0529) lr 4.8943e-05 eta 1:41:18 +epoch [47/50] batch [135/1000] time 1.562 (1.571) data 0.000 (0.007) loss 1.2979 (1.0983) acc 65.6250 (72.9398) lr 4.8943e-05 eta 1:41:12 +epoch [47/50] batch [140/1000] time 1.542 (1.571) data 0.000 (0.007) loss 0.4290 (1.0912) acc 87.5000 (73.1250) lr 4.8943e-05 eta 1:41:04 +epoch [47/50] batch [145/1000] time 1.559 (1.571) data 0.001 (0.007) loss 1.2051 (1.0895) acc 62.5000 (73.1034) lr 4.8943e-05 eta 1:40:55 +epoch [47/50] batch [150/1000] time 1.557 (1.570) data 0.001 (0.007) loss 1.0332 (1.0844) acc 75.0000 (73.2292) lr 4.8943e-05 eta 1:40:45 +epoch [47/50] batch [155/1000] time 1.563 (1.570) data 0.000 (0.007) loss 1.1309 (1.0840) acc 75.0000 (73.2056) lr 4.8943e-05 eta 1:40:37 +epoch [47/50] batch [160/1000] time 1.571 (1.570) data 0.001 (0.006) loss 0.7354 (1.0826) acc 75.0000 (73.1641) lr 4.8943e-05 eta 1:40:29 +epoch [47/50] batch [165/1000] time 1.580 (1.570) data 0.000 (0.006) loss 0.7891 (1.0792) acc 75.0000 (73.1629) lr 4.8943e-05 eta 1:40:21 +epoch [47/50] batch [170/1000] time 1.541 (1.570) data 0.001 (0.006) loss 1.6006 (1.0799) acc 56.2500 (73.1801) lr 4.8943e-05 eta 1:40:11 +epoch [47/50] batch [175/1000] time 1.737 (1.570) data 0.001 (0.006) loss 0.5288 (1.0731) acc 81.2500 (73.3393) lr 4.8943e-05 eta 1:40:06 +epoch [47/50] batch [180/1000] time 1.588 (1.570) data 0.001 (0.006) loss 0.6016 (1.0741) acc 81.2500 (73.3854) lr 4.8943e-05 eta 1:39:57 +epoch [47/50] batch [185/1000] time 1.552 (1.570) data 0.000 (0.006) loss 0.8813 (1.0701) acc 78.1250 (73.4459) lr 4.8943e-05 eta 1:39:48 +epoch [47/50] batch [190/1000] time 1.560 (1.569) data 0.000 (0.005) loss 1.7363 (1.0769) acc 56.2500 (73.3717) lr 4.8943e-05 eta 1:39:38 +epoch [47/50] batch [195/1000] time 1.549 (1.569) data 0.000 (0.005) loss 1.5098 (1.0744) acc 56.2500 (73.3333) lr 4.8943e-05 eta 1:39:30 +epoch [47/50] batch [200/1000] time 1.558 (1.569) data 0.000 (0.005) loss 1.1055 (1.0686) acc 75.0000 (73.4688) lr 4.8943e-05 eta 1:39:21 +epoch [47/50] batch [205/1000] time 1.566 (1.569) data 0.000 (0.005) loss 0.9810 (1.0704) acc 81.2500 (73.4299) lr 4.8943e-05 eta 1:39:14 +epoch [47/50] batch [210/1000] time 1.571 (1.569) data 0.001 (0.005) loss 0.8477 (1.0652) acc 75.0000 (73.5417) lr 4.8943e-05 eta 1:39:04 +epoch [47/50] batch [215/1000] time 1.554 (1.568) data 0.000 (0.005) loss 0.7349 (1.0618) acc 84.3750 (73.6192) lr 4.8943e-05 eta 1:38:55 +epoch [47/50] batch [220/1000] time 1.564 (1.568) data 0.000 (0.005) loss 1.8447 (1.0634) acc 59.3750 (73.6648) lr 4.8943e-05 eta 1:38:46 +epoch [47/50] batch [225/1000] time 1.562 (1.568) data 0.000 (0.005) loss 1.1299 (1.0650) acc 75.0000 (73.6111) lr 4.8943e-05 eta 1:38:38 +epoch [47/50] batch [230/1000] time 1.553 (1.568) data 0.001 (0.005) loss 0.5659 (1.0596) acc 78.1250 (73.5734) lr 4.8943e-05 eta 1:38:29 +epoch [47/50] batch [235/1000] time 1.528 (1.567) data 0.000 (0.004) loss 1.1084 (1.0602) acc 75.0000 (73.5904) lr 4.8943e-05 eta 1:38:20 +epoch [47/50] batch [240/1000] time 1.566 (1.568) data 0.000 (0.004) loss 0.8911 (1.0602) acc 71.8750 (73.5026) lr 4.8943e-05 eta 1:38:16 +epoch [47/50] batch [245/1000] time 1.540 (1.568) data 0.000 (0.004) loss 0.7329 (1.0575) acc 81.2500 (73.4949) lr 4.8943e-05 eta 1:38:07 +epoch [47/50] batch [250/1000] time 1.555 (1.567) data 0.001 (0.004) loss 1.0869 (1.0579) acc 75.0000 (73.4375) lr 4.8943e-05 eta 1:37:57 +epoch [47/50] batch [255/1000] time 1.578 (1.567) data 0.000 (0.004) loss 0.8447 (1.0574) acc 71.8750 (73.3578) lr 4.8943e-05 eta 1:37:49 +epoch [47/50] batch [260/1000] time 1.566 (1.567) data 0.000 (0.004) loss 0.5767 (1.0543) acc 87.5000 (73.4615) lr 4.8943e-05 eta 1:37:40 +epoch [47/50] batch [265/1000] time 1.562 (1.567) data 0.000 (0.004) loss 1.3232 (1.0517) acc 68.7500 (73.5142) lr 4.8943e-05 eta 1:37:31 +epoch [47/50] batch [270/1000] time 1.559 (1.566) data 0.001 (0.004) loss 0.6421 (1.0541) acc 84.3750 (73.4838) lr 4.8943e-05 eta 1:37:22 +epoch [47/50] batch [275/1000] time 1.548 (1.566) data 0.001 (0.004) loss 0.6836 (1.0524) acc 84.3750 (73.4773) lr 4.8943e-05 eta 1:37:14 +epoch [47/50] batch [280/1000] time 1.551 (1.566) data 0.000 (0.004) loss 0.7544 (1.0513) acc 75.0000 (73.4487) lr 4.8943e-05 eta 1:37:06 +epoch [47/50] batch [285/1000] time 1.555 (1.567) data 0.000 (0.004) loss 0.6343 (1.0492) acc 84.3750 (73.4759) lr 4.8943e-05 eta 1:37:01 +epoch [47/50] batch [290/1000] time 1.571 (1.567) data 0.000 (0.004) loss 0.9551 (1.0476) acc 68.7500 (73.5022) lr 4.8943e-05 eta 1:36:53 +epoch [47/50] batch [295/1000] time 1.567 (1.567) data 0.000 (0.004) loss 1.3018 (1.0467) acc 68.7500 (73.5275) lr 4.8943e-05 eta 1:36:44 +epoch [47/50] batch [300/1000] time 1.543 (1.566) data 0.000 (0.004) loss 1.3955 (1.0477) acc 71.8750 (73.5208) lr 4.8943e-05 eta 1:36:35 +epoch [47/50] batch [305/1000] time 1.584 (1.566) data 0.000 (0.004) loss 1.1973 (1.0483) acc 71.8750 (73.5246) lr 4.8943e-05 eta 1:36:28 +epoch [47/50] batch [310/1000] time 1.585 (1.567) data 0.000 (0.003) loss 1.4355 (1.0508) acc 68.7500 (73.4577) lr 4.8943e-05 eta 1:36:20 +epoch [47/50] batch [315/1000] time 1.553 (1.567) data 0.000 (0.003) loss 0.7822 (1.0511) acc 78.1250 (73.4722) lr 4.8943e-05 eta 1:36:12 +epoch [47/50] batch [320/1000] time 1.566 (1.566) data 0.000 (0.003) loss 1.6104 (1.0547) acc 65.6250 (73.3984) lr 4.8943e-05 eta 1:36:04 +epoch [47/50] batch [325/1000] time 1.551 (1.566) data 0.001 (0.003) loss 0.9873 (1.0554) acc 65.6250 (73.3942) lr 4.8943e-05 eta 1:35:56 +epoch [47/50] batch [330/1000] time 1.578 (1.567) data 0.000 (0.003) loss 1.7676 (1.0571) acc 59.3750 (73.3239) lr 4.8943e-05 eta 1:35:50 +epoch [47/50] batch [335/1000] time 1.586 (1.567) data 0.000 (0.003) loss 1.2695 (1.0567) acc 68.7500 (73.3209) lr 4.8943e-05 eta 1:35:42 +epoch [47/50] batch [340/1000] time 1.586 (1.567) data 0.001 (0.003) loss 1.0586 (1.0559) acc 75.0000 (73.3732) lr 4.8943e-05 eta 1:35:35 +epoch [47/50] batch [345/1000] time 1.561 (1.567) data 0.001 (0.003) loss 0.9761 (1.0556) acc 71.8750 (73.4058) lr 4.8943e-05 eta 1:35:27 +epoch [47/50] batch [350/1000] time 1.570 (1.567) data 0.000 (0.003) loss 0.7114 (1.0560) acc 87.5000 (73.4196) lr 4.8943e-05 eta 1:35:20 +epoch [47/50] batch [355/1000] time 1.588 (1.567) data 0.001 (0.003) loss 0.6650 (1.0556) acc 78.1250 (73.4331) lr 4.8943e-05 eta 1:35:12 +epoch [47/50] batch [360/1000] time 1.572 (1.567) data 0.001 (0.003) loss 0.6519 (1.0517) acc 84.3750 (73.5069) lr 4.8943e-05 eta 1:35:04 +epoch [47/50] batch [365/1000] time 1.537 (1.567) data 0.000 (0.003) loss 0.6592 (1.0489) acc 84.3750 (73.5616) lr 4.8943e-05 eta 1:34:56 +epoch [47/50] batch [370/1000] time 1.560 (1.567) data 0.000 (0.003) loss 0.9473 (1.0523) acc 65.6250 (73.4966) lr 4.8943e-05 eta 1:34:48 +epoch [47/50] batch [375/1000] time 1.572 (1.567) data 0.001 (0.003) loss 1.2988 (1.0530) acc 71.8750 (73.4750) lr 4.8943e-05 eta 1:34:41 +epoch [47/50] batch [380/1000] time 1.547 (1.567) data 0.001 (0.003) loss 0.9922 (1.0554) acc 84.3750 (73.4704) lr 4.8943e-05 eta 1:34:32 +epoch [47/50] batch [385/1000] time 1.589 (1.567) data 0.001 (0.003) loss 0.8901 (1.0561) acc 75.0000 (73.4497) lr 4.8943e-05 eta 1:34:25 +epoch [47/50] batch [390/1000] time 1.563 (1.568) data 0.000 (0.003) loss 0.7886 (1.0530) acc 81.2500 (73.5176) lr 4.8943e-05 eta 1:34:19 +epoch [47/50] batch [395/1000] time 1.573 (1.568) data 0.001 (0.003) loss 1.1670 (1.0518) acc 53.1250 (73.5047) lr 4.8943e-05 eta 1:34:11 +epoch [47/50] batch [400/1000] time 1.563 (1.568) data 0.000 (0.003) loss 0.9673 (1.0503) acc 81.2500 (73.5234) lr 4.8943e-05 eta 1:34:03 +epoch [47/50] batch [405/1000] time 1.548 (1.568) data 0.000 (0.003) loss 0.9609 (1.0529) acc 62.5000 (73.4105) lr 4.8943e-05 eta 1:33:55 +epoch [47/50] batch [410/1000] time 1.554 (1.568) data 0.000 (0.003) loss 0.8389 (1.0515) acc 65.6250 (73.3994) lr 4.8943e-05 eta 1:33:47 +epoch [47/50] batch [415/1000] time 1.542 (1.567) data 0.001 (0.003) loss 1.1328 (1.0526) acc 71.8750 (73.4262) lr 4.8943e-05 eta 1:33:39 +epoch [47/50] batch [420/1000] time 1.524 (1.567) data 0.000 (0.003) loss 1.0986 (1.0530) acc 75.0000 (73.4077) lr 4.8943e-05 eta 1:33:30 +epoch [47/50] batch [425/1000] time 1.550 (1.567) data 0.000 (0.003) loss 0.7358 (1.0529) acc 81.2500 (73.4191) lr 4.8943e-05 eta 1:33:22 +epoch [47/50] batch [430/1000] time 1.570 (1.567) data 0.001 (0.003) loss 2.0059 (1.0535) acc 68.7500 (73.4230) lr 4.8943e-05 eta 1:33:14 +epoch [47/50] batch [435/1000] time 1.576 (1.568) data 0.001 (0.003) loss 1.4414 (1.0554) acc 65.6250 (73.3693) lr 4.8943e-05 eta 1:33:08 +epoch [47/50] batch [440/1000] time 1.561 (1.567) data 0.000 (0.003) loss 1.2207 (1.0569) acc 65.6250 (73.3452) lr 4.8943e-05 eta 1:33:00 +epoch [47/50] batch [445/1000] time 1.564 (1.567) data 0.000 (0.003) loss 0.7280 (1.0570) acc 78.1250 (73.3216) lr 4.8943e-05 eta 1:32:52 +epoch [47/50] batch [450/1000] time 1.561 (1.567) data 0.000 (0.003) loss 1.2695 (1.0580) acc 71.8750 (73.3542) lr 4.8943e-05 eta 1:32:43 +epoch [47/50] batch [455/1000] time 1.556 (1.567) data 0.000 (0.003) loss 0.9517 (1.0606) acc 84.3750 (73.3242) lr 4.8943e-05 eta 1:32:35 +epoch [47/50] batch [460/1000] time 1.551 (1.567) data 0.000 (0.003) loss 0.8945 (1.0601) acc 78.1250 (73.3288) lr 4.8943e-05 eta 1:32:27 +epoch [47/50] batch [465/1000] time 1.572 (1.567) data 0.000 (0.002) loss 1.2168 (1.0612) acc 65.6250 (73.2863) lr 4.8943e-05 eta 1:32:19 +epoch [47/50] batch [470/1000] time 1.570 (1.567) data 0.000 (0.002) loss 1.3428 (1.0612) acc 71.8750 (73.2646) lr 4.8943e-05 eta 1:32:11 +epoch [47/50] batch [475/1000] time 1.578 (1.567) data 0.000 (0.002) loss 1.2246 (1.0603) acc 71.8750 (73.2961) lr 4.8943e-05 eta 1:32:04 +epoch [47/50] batch [480/1000] time 1.584 (1.567) data 0.000 (0.002) loss 1.5459 (1.0618) acc 62.5000 (73.2682) lr 4.8943e-05 eta 1:31:57 +epoch [47/50] batch [485/1000] time 1.556 (1.567) data 0.000 (0.002) loss 0.9868 (1.0644) acc 75.0000 (73.2023) lr 4.8943e-05 eta 1:31:49 +epoch [47/50] batch [490/1000] time 1.550 (1.567) data 0.001 (0.002) loss 1.7871 (1.0676) acc 65.6250 (73.1441) lr 4.8943e-05 eta 1:31:41 +epoch [47/50] batch [495/1000] time 1.562 (1.567) data 0.001 (0.002) loss 1.0742 (1.0673) acc 71.8750 (73.1818) lr 4.8943e-05 eta 1:31:32 +epoch [47/50] batch [500/1000] time 1.551 (1.567) data 0.001 (0.002) loss 0.7075 (1.0666) acc 75.0000 (73.2062) lr 4.8943e-05 eta 1:31:24 +epoch [47/50] batch [505/1000] time 1.600 (1.567) data 0.001 (0.002) loss 0.5171 (1.0661) acc 84.3750 (73.1807) lr 4.8943e-05 eta 1:31:16 +epoch [47/50] batch [510/1000] time 1.558 (1.567) data 0.001 (0.002) loss 1.0615 (1.0658) acc 71.8750 (73.1618) lr 4.8943e-05 eta 1:31:08 +epoch [47/50] batch [515/1000] time 1.530 (1.567) data 0.001 (0.002) loss 0.5459 (1.0658) acc 84.3750 (73.1371) lr 4.8943e-05 eta 1:31:00 +epoch [47/50] batch [520/1000] time 1.557 (1.567) data 0.000 (0.002) loss 0.8130 (1.0650) acc 87.5000 (73.1971) lr 4.8943e-05 eta 1:30:52 +epoch [47/50] batch [525/1000] time 1.549 (1.567) data 0.001 (0.002) loss 1.4307 (1.0659) acc 81.2500 (73.2262) lr 4.8943e-05 eta 1:30:44 +epoch [47/50] batch [530/1000] time 1.576 (1.567) data 0.001 (0.002) loss 1.1982 (1.0678) acc 78.1250 (73.1840) lr 4.8943e-05 eta 1:30:36 +epoch [47/50] batch [535/1000] time 1.576 (1.567) data 0.000 (0.002) loss 1.2109 (1.0704) acc 71.8750 (73.1542) lr 4.8943e-05 eta 1:30:28 +epoch [47/50] batch [540/1000] time 1.708 (1.567) data 0.000 (0.002) loss 1.2744 (1.0709) acc 65.6250 (73.0671) lr 4.8943e-05 eta 1:30:21 +epoch [47/50] batch [545/1000] time 1.536 (1.567) data 0.000 (0.002) loss 0.8579 (1.0705) acc 81.2500 (73.0906) lr 4.8943e-05 eta 1:30:13 +epoch [47/50] batch [550/1000] time 1.570 (1.567) data 0.000 (0.002) loss 1.0146 (1.0698) acc 75.0000 (73.1193) lr 4.8943e-05 eta 1:30:06 +epoch [47/50] batch [555/1000] time 1.550 (1.567) data 0.000 (0.002) loss 1.2910 (1.0706) acc 59.3750 (73.0968) lr 4.8943e-05 eta 1:29:57 +epoch [47/50] batch [560/1000] time 1.549 (1.567) data 0.000 (0.002) loss 1.0068 (1.0724) acc 65.6250 (73.0525) lr 4.8943e-05 eta 1:29:49 +epoch [47/50] batch [565/1000] time 1.554 (1.567) data 0.001 (0.002) loss 0.6636 (1.0709) acc 84.3750 (73.0642) lr 4.8943e-05 eta 1:29:41 +epoch [47/50] batch [570/1000] time 1.575 (1.567) data 0.000 (0.002) loss 1.0762 (1.0722) acc 68.7500 (73.0208) lr 4.8943e-05 eta 1:29:33 +epoch [47/50] batch [575/1000] time 1.565 (1.567) data 0.000 (0.002) loss 1.9062 (1.0744) acc 65.6250 (72.9728) lr 4.8943e-05 eta 1:29:25 +epoch [47/50] batch [580/1000] time 1.575 (1.567) data 0.000 (0.002) loss 1.4258 (1.0768) acc 65.6250 (72.9041) lr 4.8943e-05 eta 1:29:18 +epoch [47/50] batch [585/1000] time 1.722 (1.567) data 0.000 (0.002) loss 1.3916 (1.0775) acc 65.6250 (72.8846) lr 4.8943e-05 eta 1:29:11 +epoch [47/50] batch [590/1000] time 1.550 (1.567) data 0.000 (0.002) loss 1.5830 (1.0789) acc 71.8750 (72.8761) lr 4.8943e-05 eta 1:29:03 +epoch [47/50] batch [595/1000] time 1.561 (1.567) data 0.001 (0.002) loss 0.7090 (1.0770) acc 84.3750 (72.9307) lr 4.8943e-05 eta 1:28:55 +epoch [47/50] batch [600/1000] time 1.574 (1.567) data 0.001 (0.002) loss 0.7031 (1.0775) acc 81.2500 (72.9062) lr 4.8943e-05 eta 1:28:47 +epoch [47/50] batch [605/1000] time 1.560 (1.567) data 0.001 (0.002) loss 0.5566 (1.0755) acc 84.3750 (72.9132) lr 4.8943e-05 eta 1:28:39 +epoch [47/50] batch [610/1000] time 1.556 (1.567) data 0.001 (0.002) loss 0.9448 (1.0754) acc 81.2500 (72.9150) lr 4.8943e-05 eta 1:28:31 +epoch [47/50] batch [615/1000] time 1.567 (1.567) data 0.001 (0.002) loss 0.8149 (1.0764) acc 81.2500 (72.9116) lr 4.8943e-05 eta 1:28:23 +epoch [47/50] batch [620/1000] time 1.551 (1.567) data 0.000 (0.002) loss 1.2227 (1.0763) acc 71.8750 (72.9083) lr 4.8943e-05 eta 1:28:15 +epoch [47/50] batch [625/1000] time 1.571 (1.567) data 0.001 (0.002) loss 1.2178 (1.0773) acc 65.6250 (72.9000) lr 4.8943e-05 eta 1:28:07 +epoch [47/50] batch [630/1000] time 1.581 (1.567) data 0.000 (0.002) loss 0.6694 (1.0774) acc 75.0000 (72.8819) lr 4.8943e-05 eta 1:28:00 +epoch [47/50] batch [635/1000] time 1.543 (1.567) data 0.001 (0.002) loss 0.9629 (1.0776) acc 75.0000 (72.8740) lr 4.8943e-05 eta 1:27:51 +epoch [47/50] batch [640/1000] time 1.538 (1.567) data 0.000 (0.002) loss 1.3340 (1.0786) acc 75.0000 (72.8564) lr 4.8943e-05 eta 1:27:43 +epoch [47/50] batch [645/1000] time 1.591 (1.566) data 0.001 (0.002) loss 0.6582 (1.0774) acc 71.8750 (72.8731) lr 4.8943e-05 eta 1:27:35 +epoch [47/50] batch [650/1000] time 1.544 (1.566) data 0.000 (0.002) loss 0.9839 (1.0791) acc 75.0000 (72.8510) lr 4.8943e-05 eta 1:27:27 +epoch [47/50] batch [655/1000] time 1.543 (1.566) data 0.000 (0.002) loss 0.6040 (1.0804) acc 84.3750 (72.8387) lr 4.8943e-05 eta 1:27:19 +epoch [47/50] batch [660/1000] time 1.562 (1.566) data 0.001 (0.002) loss 1.3447 (1.0823) acc 65.6250 (72.8172) lr 4.8943e-05 eta 1:27:11 +epoch [47/50] batch [665/1000] time 1.549 (1.566) data 0.001 (0.002) loss 0.7637 (1.0808) acc 81.2500 (72.8336) lr 4.8943e-05 eta 1:27:03 +epoch [47/50] batch [670/1000] time 1.571 (1.566) data 0.001 (0.002) loss 0.8013 (1.0807) acc 78.1250 (72.8498) lr 4.8943e-05 eta 1:26:54 +epoch [47/50] batch [675/1000] time 1.527 (1.566) data 0.000 (0.002) loss 1.4414 (1.0804) acc 68.7500 (72.8380) lr 4.8943e-05 eta 1:26:46 +epoch [47/50] batch [680/1000] time 1.549 (1.566) data 0.000 (0.002) loss 1.0273 (1.0795) acc 71.8750 (72.8447) lr 4.8943e-05 eta 1:26:38 +epoch [47/50] batch [685/1000] time 1.582 (1.566) data 0.000 (0.002) loss 1.0469 (1.0780) acc 71.8750 (72.8650) lr 4.8943e-05 eta 1:26:30 +epoch [47/50] batch [690/1000] time 1.547 (1.566) data 0.001 (0.002) loss 0.9790 (1.0780) acc 71.8750 (72.8487) lr 4.8943e-05 eta 1:26:23 +epoch [47/50] batch [695/1000] time 1.557 (1.566) data 0.000 (0.002) loss 1.4336 (1.0785) acc 71.8750 (72.8732) lr 4.8943e-05 eta 1:26:16 +epoch [47/50] batch [700/1000] time 1.552 (1.566) data 0.000 (0.002) loss 1.8438 (1.0808) acc 71.8750 (72.8527) lr 4.8943e-05 eta 1:26:08 +epoch [47/50] batch [705/1000] time 1.566 (1.566) data 0.001 (0.002) loss 1.1553 (1.0812) acc 78.1250 (72.8679) lr 4.8943e-05 eta 1:26:00 +epoch [47/50] batch [710/1000] time 1.572 (1.566) data 0.001 (0.002) loss 0.6426 (1.0789) acc 75.0000 (72.9181) lr 4.8943e-05 eta 1:25:53 +epoch [47/50] batch [715/1000] time 1.548 (1.566) data 0.001 (0.002) loss 1.4365 (1.0788) acc 68.7500 (72.9108) lr 4.8943e-05 eta 1:25:44 +epoch [47/50] batch [720/1000] time 1.566 (1.566) data 0.000 (0.002) loss 0.8647 (1.0783) acc 78.1250 (72.9080) lr 4.8943e-05 eta 1:25:37 +epoch [47/50] batch [725/1000] time 1.558 (1.566) data 0.000 (0.002) loss 1.2676 (1.0788) acc 68.7500 (72.9138) lr 4.8943e-05 eta 1:25:28 +epoch [47/50] batch [730/1000] time 1.571 (1.566) data 0.001 (0.002) loss 1.0244 (1.0788) acc 81.2500 (72.9281) lr 4.8943e-05 eta 1:25:21 +epoch [47/50] batch [735/1000] time 1.552 (1.566) data 0.001 (0.002) loss 0.8105 (1.0783) acc 71.8750 (72.9124) lr 4.8943e-05 eta 1:25:13 +epoch [47/50] batch [740/1000] time 1.548 (1.566) data 0.000 (0.002) loss 1.5771 (1.0777) acc 56.2500 (72.9181) lr 4.8943e-05 eta 1:25:05 +epoch [47/50] batch [745/1000] time 1.542 (1.566) data 0.001 (0.002) loss 0.5938 (1.0769) acc 84.3750 (72.9404) lr 4.8943e-05 eta 1:24:57 +epoch [47/50] batch [750/1000] time 1.572 (1.566) data 0.000 (0.002) loss 0.9131 (1.0759) acc 68.7500 (72.9333) lr 4.8943e-05 eta 1:24:49 +epoch [47/50] batch [755/1000] time 1.563 (1.566) data 0.000 (0.002) loss 1.1572 (1.0742) acc 62.5000 (72.9553) lr 4.8943e-05 eta 1:24:41 +epoch [47/50] batch [760/1000] time 1.531 (1.566) data 0.000 (0.002) loss 1.1201 (1.0727) acc 68.7500 (72.9934) lr 4.8943e-05 eta 1:24:33 +epoch [47/50] batch [765/1000] time 1.563 (1.566) data 0.000 (0.002) loss 0.5786 (1.0720) acc 87.5000 (72.9984) lr 4.8943e-05 eta 1:24:25 +epoch [47/50] batch [770/1000] time 1.564 (1.566) data 0.000 (0.002) loss 1.8115 (1.0720) acc 53.1250 (73.0032) lr 4.8943e-05 eta 1:24:17 +epoch [47/50] batch [775/1000] time 1.550 (1.566) data 0.000 (0.002) loss 0.7734 (1.0703) acc 75.0000 (73.0202) lr 4.8943e-05 eta 1:24:09 +epoch [47/50] batch [780/1000] time 1.569 (1.566) data 0.001 (0.002) loss 0.9873 (1.0706) acc 68.7500 (73.0208) lr 4.8943e-05 eta 1:24:02 +epoch [47/50] batch [785/1000] time 1.544 (1.566) data 0.001 (0.002) loss 0.5649 (1.0696) acc 81.2500 (73.0255) lr 4.8943e-05 eta 1:23:54 +epoch [47/50] batch [790/1000] time 1.570 (1.566) data 0.001 (0.002) loss 0.9995 (1.0707) acc 75.0000 (72.9984) lr 4.8943e-05 eta 1:23:46 +epoch [47/50] batch [795/1000] time 1.554 (1.566) data 0.001 (0.002) loss 1.0928 (1.0700) acc 78.1250 (73.0110) lr 4.8943e-05 eta 1:23:38 +epoch [47/50] batch [800/1000] time 1.578 (1.566) data 0.001 (0.002) loss 0.8203 (1.0701) acc 81.2500 (73.0078) lr 4.8943e-05 eta 1:23:30 +epoch [47/50] batch [805/1000] time 1.539 (1.566) data 0.001 (0.002) loss 1.9326 (1.0712) acc 59.3750 (72.9930) lr 4.8943e-05 eta 1:23:22 +epoch [47/50] batch [810/1000] time 1.545 (1.566) data 0.001 (0.002) loss 1.2100 (1.0709) acc 78.1250 (73.0093) lr 4.8943e-05 eta 1:23:14 +epoch [47/50] batch [815/1000] time 1.574 (1.566) data 0.000 (0.002) loss 0.7964 (1.0704) acc 81.2500 (73.0138) lr 4.8943e-05 eta 1:23:06 +epoch [47/50] batch [820/1000] time 1.564 (1.566) data 0.001 (0.002) loss 0.9434 (1.0696) acc 71.8750 (73.0069) lr 4.8943e-05 eta 1:22:58 +epoch [47/50] batch [825/1000] time 1.550 (1.566) data 0.000 (0.002) loss 1.6260 (1.0698) acc 59.3750 (72.9962) lr 4.8943e-05 eta 1:22:50 +epoch [47/50] batch [830/1000] time 1.574 (1.566) data 0.000 (0.002) loss 0.3889 (1.0692) acc 90.6250 (73.0233) lr 4.8943e-05 eta 1:22:43 +epoch [47/50] batch [835/1000] time 1.546 (1.566) data 0.000 (0.002) loss 1.1230 (1.0693) acc 65.6250 (73.0277) lr 4.8943e-05 eta 1:22:35 +epoch [47/50] batch [840/1000] time 1.541 (1.565) data 0.000 (0.002) loss 0.9834 (1.0693) acc 78.1250 (73.0506) lr 4.8943e-05 eta 1:22:26 +epoch [47/50] batch [845/1000] time 1.600 (1.566) data 0.001 (0.002) loss 1.1523 (1.0701) acc 75.0000 (73.0325) lr 4.8943e-05 eta 1:22:19 +epoch [47/50] batch [850/1000] time 1.545 (1.566) data 0.001 (0.002) loss 1.7471 (1.0722) acc 62.5000 (73.0037) lr 4.8943e-05 eta 1:22:11 +epoch [47/50] batch [855/1000] time 1.575 (1.566) data 0.001 (0.002) loss 0.3154 (1.0711) acc 87.5000 (73.0044) lr 4.8943e-05 eta 1:22:03 +epoch [47/50] batch [860/1000] time 1.561 (1.566) data 0.001 (0.002) loss 1.4521 (1.0714) acc 71.8750 (72.9942) lr 4.8943e-05 eta 1:21:55 +epoch [47/50] batch [865/1000] time 1.574 (1.566) data 0.001 (0.002) loss 1.1523 (1.0709) acc 62.5000 (72.9877) lr 4.8943e-05 eta 1:21:48 +epoch [47/50] batch [870/1000] time 1.558 (1.566) data 0.000 (0.002) loss 1.0684 (1.0694) acc 84.3750 (73.0280) lr 4.8943e-05 eta 1:21:40 +epoch [47/50] batch [875/1000] time 1.556 (1.565) data 0.001 (0.002) loss 1.1777 (1.0690) acc 68.7500 (73.0321) lr 4.8943e-05 eta 1:21:32 +epoch [47/50] batch [880/1000] time 1.592 (1.565) data 0.001 (0.002) loss 0.7534 (1.0689) acc 81.2500 (73.0220) lr 4.8943e-05 eta 1:21:24 +epoch [47/50] batch [885/1000] time 1.554 (1.565) data 0.000 (0.002) loss 1.1436 (1.0693) acc 71.8750 (73.0367) lr 4.8943e-05 eta 1:21:16 +epoch [47/50] batch [890/1000] time 1.570 (1.566) data 0.000 (0.002) loss 0.5015 (1.0678) acc 81.2500 (73.0758) lr 4.8943e-05 eta 1:21:09 +epoch [47/50] batch [895/1000] time 1.571 (1.566) data 0.000 (0.002) loss 0.8481 (1.0677) acc 87.5000 (73.0901) lr 4.8943e-05 eta 1:21:01 +epoch [47/50] batch [900/1000] time 1.586 (1.566) data 0.000 (0.002) loss 1.2695 (1.0680) acc 65.6250 (73.0729) lr 4.8943e-05 eta 1:20:53 +epoch [47/50] batch [905/1000] time 1.565 (1.566) data 0.000 (0.002) loss 1.3584 (1.0686) acc 50.0000 (73.0352) lr 4.8943e-05 eta 1:20:45 +epoch [47/50] batch [910/1000] time 1.551 (1.566) data 0.000 (0.002) loss 1.1738 (1.0676) acc 62.5000 (73.0391) lr 4.8943e-05 eta 1:20:37 +epoch [47/50] batch [915/1000] time 1.570 (1.566) data 0.000 (0.002) loss 1.0977 (1.0684) acc 75.0000 (73.0157) lr 4.8943e-05 eta 1:20:29 +epoch [47/50] batch [920/1000] time 1.548 (1.566) data 0.000 (0.002) loss 1.2188 (1.0692) acc 75.0000 (73.0129) lr 4.8943e-05 eta 1:20:21 +epoch [47/50] batch [925/1000] time 1.542 (1.565) data 0.000 (0.002) loss 1.4033 (1.0702) acc 68.7500 (72.9966) lr 4.8943e-05 eta 1:20:13 +epoch [47/50] batch [930/1000] time 1.728 (1.566) data 0.001 (0.001) loss 0.8359 (1.0700) acc 75.0000 (73.0040) lr 4.8943e-05 eta 1:20:06 +epoch [47/50] batch [935/1000] time 1.582 (1.566) data 0.001 (0.001) loss 1.3262 (1.0707) acc 59.3750 (72.9947) lr 4.8943e-05 eta 1:19:58 +epoch [47/50] batch [940/1000] time 1.584 (1.566) data 0.001 (0.001) loss 1.1680 (1.0700) acc 71.8750 (73.0286) lr 4.8943e-05 eta 1:19:51 +epoch [47/50] batch [945/1000] time 1.582 (1.566) data 0.001 (0.001) loss 1.3262 (1.0703) acc 75.0000 (73.0324) lr 4.8943e-05 eta 1:19:43 +epoch [47/50] batch [950/1000] time 1.560 (1.566) data 0.000 (0.001) loss 0.7646 (1.0715) acc 81.2500 (73.0099) lr 4.8943e-05 eta 1:19:35 +epoch [47/50] batch [955/1000] time 1.574 (1.566) data 0.000 (0.001) loss 1.2041 (1.0732) acc 71.8750 (72.9876) lr 4.8943e-05 eta 1:19:27 +epoch [47/50] batch [960/1000] time 1.536 (1.566) data 0.001 (0.001) loss 0.9390 (1.0727) acc 71.8750 (72.9980) lr 4.8943e-05 eta 1:19:19 +epoch [47/50] batch [965/1000] time 1.545 (1.566) data 0.001 (0.001) loss 0.6216 (1.0727) acc 84.3750 (73.0084) lr 4.8943e-05 eta 1:19:11 +epoch [47/50] batch [970/1000] time 1.537 (1.566) data 0.000 (0.001) loss 1.0225 (1.0731) acc 71.8750 (73.0026) lr 4.8943e-05 eta 1:19:04 +epoch [47/50] batch [975/1000] time 1.576 (1.566) data 0.000 (0.001) loss 1.1357 (1.0733) acc 71.8750 (73.0096) lr 4.8943e-05 eta 1:18:56 +epoch [47/50] batch [980/1000] time 1.568 (1.566) data 0.001 (0.001) loss 1.4844 (1.0735) acc 68.7500 (73.0006) lr 4.8943e-05 eta 1:18:48 +epoch [47/50] batch [985/1000] time 1.551 (1.566) data 0.001 (0.001) loss 0.9824 (1.0747) acc 78.1250 (72.9886) lr 4.8943e-05 eta 1:18:40 +epoch [47/50] batch [990/1000] time 1.543 (1.566) data 0.000 (0.001) loss 0.4468 (1.0741) acc 90.6250 (72.9924) lr 4.8943e-05 eta 1:18:32 +epoch [47/50] batch [995/1000] time 1.563 (1.566) data 0.000 (0.001) loss 1.1211 (1.0748) acc 68.7500 (72.9805) lr 4.8943e-05 eta 1:18:25 +epoch [47/50] batch [1000/1000] time 1.569 (1.566) data 0.000 (0.001) loss 1.0752 (1.0750) acc 75.0000 (72.9688) lr 3.1417e-05 eta 1:18:17 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,383 +* accuracy: 78.8% +* error: 21.2% +* macro_f1: 78.3% +epoch [48/50] batch [5/1000] time 1.571 (1.699) data 0.000 (0.189) loss 1.2422 (1.1694) acc 68.7500 (66.8750) lr 3.1417e-05 eta 1:24:47 +epoch [48/50] batch [10/1000] time 1.569 (1.633) data 0.000 (0.095) loss 1.0469 (1.0943) acc 78.1250 (70.6250) lr 3.1417e-05 eta 1:21:22 +epoch [48/50] batch [15/1000] time 1.569 (1.634) data 0.000 (0.063) loss 0.9712 (1.0634) acc 71.8750 (71.2500) lr 3.1417e-05 eta 1:21:16 +epoch [48/50] batch [20/1000] time 1.547 (1.618) data 0.001 (0.048) loss 1.5117 (1.1425) acc 65.6250 (70.6250) lr 3.1417e-05 eta 1:20:20 +epoch [48/50] batch [25/1000] time 1.579 (1.609) data 0.001 (0.038) loss 0.5591 (1.0919) acc 84.3750 (71.1250) lr 3.1417e-05 eta 1:19:47 +epoch [48/50] batch [30/1000] time 1.570 (1.603) data 0.001 (0.032) loss 0.7119 (1.0588) acc 84.3750 (72.6042) lr 3.1417e-05 eta 1:19:20 +epoch [48/50] batch [35/1000] time 1.579 (1.597) data 0.000 (0.027) loss 0.8628 (1.0591) acc 78.1250 (73.2143) lr 3.1417e-05 eta 1:18:55 +epoch [48/50] batch [40/1000] time 1.559 (1.593) data 0.000 (0.024) loss 0.9736 (1.0619) acc 75.0000 (72.9688) lr 3.1417e-05 eta 1:18:34 +epoch [48/50] batch [45/1000] time 1.530 (1.588) data 0.000 (0.021) loss 0.2778 (1.0140) acc 90.6250 (73.9583) lr 3.1417e-05 eta 1:18:13 +epoch [48/50] batch [50/1000] time 1.560 (1.586) data 0.000 (0.019) loss 1.0781 (1.0264) acc 78.1250 (73.8750) lr 3.1417e-05 eta 1:17:57 +epoch [48/50] batch [55/1000] time 1.572 (1.584) data 0.000 (0.018) loss 0.4934 (1.0370) acc 78.1250 (73.9773) lr 3.1417e-05 eta 1:17:45 +epoch [48/50] batch [60/1000] time 1.561 (1.586) data 0.001 (0.016) loss 2.0586 (1.0428) acc 56.2500 (73.5938) lr 3.1417e-05 eta 1:17:42 +epoch [48/50] batch [65/1000] time 1.553 (1.585) data 0.001 (0.015) loss 0.7417 (1.0297) acc 81.2500 (73.9904) lr 3.1417e-05 eta 1:17:31 +epoch [48/50] batch [70/1000] time 1.557 (1.583) data 0.000 (0.014) loss 1.0332 (1.0309) acc 68.7500 (73.8839) lr 3.1417e-05 eta 1:17:17 +epoch [48/50] batch [75/1000] time 1.559 (1.581) data 0.000 (0.013) loss 0.9473 (1.0321) acc 78.1250 (73.7500) lr 3.1417e-05 eta 1:17:03 +epoch [48/50] batch [80/1000] time 1.571 (1.579) data 0.001 (0.012) loss 1.2549 (1.0406) acc 68.7500 (73.5156) lr 3.1417e-05 eta 1:16:51 +epoch [48/50] batch [85/1000] time 1.550 (1.578) data 0.000 (0.012) loss 1.3086 (1.0431) acc 65.6250 (73.5662) lr 3.1417e-05 eta 1:16:41 +epoch [48/50] batch [90/1000] time 1.576 (1.578) data 0.001 (0.011) loss 1.1543 (1.0415) acc 65.6250 (73.6111) lr 3.1417e-05 eta 1:16:32 +epoch [48/50] batch [95/1000] time 1.573 (1.578) data 0.001 (0.010) loss 0.7817 (1.0370) acc 78.1250 (73.6184) lr 3.1417e-05 eta 1:16:23 +epoch [48/50] batch [100/1000] time 1.538 (1.577) data 0.000 (0.010) loss 1.6309 (1.0393) acc 59.3750 (73.5625) lr 3.1417e-05 eta 1:16:11 +epoch [48/50] batch [105/1000] time 1.568 (1.576) data 0.000 (0.009) loss 1.2998 (1.0365) acc 68.7500 (73.6607) lr 3.1417e-05 eta 1:16:01 +epoch [48/50] batch [110/1000] time 1.533 (1.575) data 0.000 (0.009) loss 1.1211 (1.0369) acc 78.1250 (73.8068) lr 3.1417e-05 eta 1:15:50 +epoch [48/50] batch [115/1000] time 1.560 (1.573) data 0.000 (0.009) loss 1.2051 (1.0381) acc 71.8750 (73.8315) lr 3.1417e-05 eta 1:15:38 +epoch [48/50] batch [120/1000] time 1.586 (1.575) data 0.001 (0.008) loss 1.1113 (1.0423) acc 59.3750 (73.5938) lr 3.1417e-05 eta 1:15:34 +epoch [48/50] batch [125/1000] time 1.583 (1.574) data 0.000 (0.008) loss 1.4180 (1.0465) acc 65.6250 (73.5250) lr 3.1417e-05 eta 1:15:24 +epoch [48/50] batch [130/1000] time 1.579 (1.574) data 0.000 (0.008) loss 0.7734 (1.0532) acc 71.8750 (73.3894) lr 3.1417e-05 eta 1:15:16 +epoch [48/50] batch [135/1000] time 1.577 (1.573) data 0.000 (0.007) loss 1.3799 (1.0621) acc 53.1250 (73.0324) lr 3.1417e-05 eta 1:15:07 +epoch [48/50] batch [140/1000] time 1.541 (1.573) data 0.001 (0.007) loss 0.7905 (1.0603) acc 78.1250 (73.1027) lr 3.1417e-05 eta 1:14:58 +epoch [48/50] batch [145/1000] time 1.561 (1.573) data 0.000 (0.007) loss 1.4238 (1.0586) acc 62.5000 (73.1250) lr 3.1417e-05 eta 1:14:49 +epoch [48/50] batch [150/1000] time 1.570 (1.572) data 0.000 (0.007) loss 0.9873 (1.0567) acc 71.8750 (73.2083) lr 3.1417e-05 eta 1:14:40 +epoch [48/50] batch [155/1000] time 1.569 (1.572) data 0.000 (0.007) loss 0.6025 (1.0533) acc 78.1250 (73.1452) lr 3.1417e-05 eta 1:14:31 +epoch [48/50] batch [160/1000] time 1.533 (1.571) data 0.001 (0.006) loss 1.6807 (1.0549) acc 68.7500 (73.2227) lr 3.1417e-05 eta 1:14:22 +epoch [48/50] batch [165/1000] time 1.554 (1.572) data 0.000 (0.006) loss 1.2715 (1.0535) acc 71.8750 (73.2386) lr 3.1417e-05 eta 1:14:16 +epoch [48/50] batch [170/1000] time 1.581 (1.572) data 0.001 (0.006) loss 0.8062 (1.0498) acc 75.0000 (73.3640) lr 3.1417e-05 eta 1:14:08 +epoch [48/50] batch [175/1000] time 1.572 (1.572) data 0.001 (0.006) loss 0.7632 (1.0460) acc 78.1250 (73.4464) lr 3.1417e-05 eta 1:13:59 +epoch [48/50] batch [180/1000] time 1.554 (1.571) data 0.001 (0.006) loss 1.6777 (1.0491) acc 59.3750 (73.4201) lr 3.1417e-05 eta 1:13:51 +epoch [48/50] batch [185/1000] time 1.556 (1.571) data 0.000 (0.006) loss 0.8623 (1.0453) acc 78.1250 (73.4628) lr 3.1417e-05 eta 1:13:43 +epoch [48/50] batch [190/1000] time 1.593 (1.571) data 0.000 (0.005) loss 1.3086 (1.0430) acc 62.5000 (73.5033) lr 3.1417e-05 eta 1:13:35 +epoch [48/50] batch [195/1000] time 1.567 (1.571) data 0.001 (0.005) loss 1.1201 (1.0499) acc 68.7500 (73.3494) lr 3.1417e-05 eta 1:13:26 +epoch [48/50] batch [200/1000] time 1.547 (1.571) data 0.001 (0.005) loss 0.9902 (1.0534) acc 75.0000 (73.2656) lr 3.1417e-05 eta 1:13:17 +epoch [48/50] batch [205/1000] time 1.586 (1.570) data 0.000 (0.005) loss 1.0322 (1.0583) acc 65.6250 (73.0793) lr 3.1417e-05 eta 1:13:09 +epoch [48/50] batch [210/1000] time 1.588 (1.571) data 0.001 (0.005) loss 0.9644 (1.0618) acc 68.7500 (73.0208) lr 3.1417e-05 eta 1:13:03 +epoch [48/50] batch [215/1000] time 1.567 (1.571) data 0.000 (0.005) loss 1.3398 (1.0629) acc 65.6250 (72.9651) lr 3.1417e-05 eta 1:12:55 +epoch [48/50] batch [220/1000] time 1.549 (1.571) data 0.000 (0.005) loss 1.2705 (1.0617) acc 78.1250 (73.1108) lr 3.1417e-05 eta 1:12:47 +epoch [48/50] batch [225/1000] time 1.595 (1.571) data 0.001 (0.005) loss 0.8848 (1.0694) acc 75.0000 (73.0278) lr 3.1417e-05 eta 1:12:39 +epoch [48/50] batch [230/1000] time 1.574 (1.571) data 0.000 (0.005) loss 1.5381 (1.0754) acc 59.3750 (72.9484) lr 3.1417e-05 eta 1:12:31 +epoch [48/50] batch [235/1000] time 1.585 (1.571) data 0.000 (0.005) loss 0.6011 (1.0722) acc 84.3750 (73.0452) lr 3.1417e-05 eta 1:12:23 +epoch [48/50] batch [240/1000] time 1.549 (1.570) data 0.000 (0.004) loss 1.1201 (1.0727) acc 68.7500 (73.1250) lr 3.1417e-05 eta 1:12:14 +epoch [48/50] batch [245/1000] time 1.555 (1.570) data 0.001 (0.004) loss 1.3115 (1.0776) acc 78.1250 (73.0995) lr 3.1417e-05 eta 1:12:06 +epoch [48/50] batch [250/1000] time 1.574 (1.570) data 0.000 (0.004) loss 1.2490 (1.0766) acc 65.6250 (73.1125) lr 3.1417e-05 eta 1:11:58 +epoch [48/50] batch [255/1000] time 1.574 (1.570) data 0.000 (0.004) loss 1.5488 (1.0762) acc 65.6250 (73.1127) lr 3.1417e-05 eta 1:11:50 +epoch [48/50] batch [260/1000] time 1.548 (1.570) data 0.000 (0.004) loss 0.5630 (1.0741) acc 78.1250 (73.1490) lr 3.1417e-05 eta 1:11:41 +epoch [48/50] batch [265/1000] time 1.550 (1.570) data 0.000 (0.004) loss 1.2959 (1.0800) acc 68.7500 (72.9835) lr 3.1417e-05 eta 1:11:33 +epoch [48/50] batch [270/1000] time 1.727 (1.570) data 0.001 (0.004) loss 0.4924 (1.0756) acc 87.5000 (73.1481) lr 3.1417e-05 eta 1:11:26 +epoch [48/50] batch [275/1000] time 1.582 (1.570) data 0.000 (0.004) loss 1.1357 (1.0771) acc 62.5000 (73.1023) lr 3.1417e-05 eta 1:11:18 +epoch [48/50] batch [280/1000] time 1.594 (1.570) data 0.000 (0.004) loss 1.1289 (1.0789) acc 65.6250 (73.0246) lr 3.1417e-05 eta 1:11:10 +epoch [48/50] batch [285/1000] time 1.560 (1.570) data 0.000 (0.004) loss 0.5293 (1.0840) acc 81.2500 (72.9386) lr 3.1417e-05 eta 1:11:02 +epoch [48/50] batch [290/1000] time 1.549 (1.570) data 0.000 (0.004) loss 1.2051 (1.0843) acc 71.8750 (72.8879) lr 3.1417e-05 eta 1:10:54 +epoch [48/50] batch [295/1000] time 1.554 (1.570) data 0.000 (0.004) loss 0.7876 (1.0828) acc 75.0000 (72.9131) lr 3.1417e-05 eta 1:10:45 +epoch [48/50] batch [300/1000] time 1.552 (1.569) data 0.001 (0.004) loss 0.6245 (1.0831) acc 84.3750 (72.9271) lr 3.1417e-05 eta 1:10:37 +epoch [48/50] batch [305/1000] time 1.558 (1.569) data 0.001 (0.004) loss 0.8047 (1.0828) acc 78.1250 (72.9611) lr 3.1417e-05 eta 1:10:29 +epoch [48/50] batch [310/1000] time 1.523 (1.569) data 0.001 (0.004) loss 0.6021 (1.0810) acc 81.2500 (72.9234) lr 3.1417e-05 eta 1:10:21 +epoch [48/50] batch [315/1000] time 1.714 (1.570) data 0.000 (0.003) loss 1.1973 (1.0849) acc 71.8750 (72.8770) lr 3.1417e-05 eta 1:10:14 +epoch [48/50] batch [320/1000] time 1.563 (1.570) data 0.000 (0.003) loss 1.9629 (1.0842) acc 71.8750 (72.9590) lr 3.1417e-05 eta 1:10:06 +epoch [48/50] batch [325/1000] time 1.547 (1.569) data 0.000 (0.003) loss 1.4043 (1.0825) acc 71.8750 (73.0096) lr 3.1417e-05 eta 1:09:57 +epoch [48/50] batch [330/1000] time 1.547 (1.569) data 0.000 (0.003) loss 1.1260 (1.0808) acc 65.6250 (73.0019) lr 3.1417e-05 eta 1:09:49 +epoch [48/50] batch [335/1000] time 1.563 (1.569) data 0.001 (0.003) loss 1.1260 (1.0786) acc 68.7500 (72.9757) lr 3.1417e-05 eta 1:09:42 +epoch [48/50] batch [340/1000] time 1.565 (1.569) data 0.001 (0.003) loss 1.2119 (1.0791) acc 68.7500 (73.0147) lr 3.1417e-05 eta 1:09:34 +epoch [48/50] batch [345/1000] time 1.536 (1.569) data 0.001 (0.003) loss 1.0527 (1.0805) acc 75.0000 (72.9982) lr 3.1417e-05 eta 1:09:25 +epoch [48/50] batch [350/1000] time 1.553 (1.569) data 0.000 (0.003) loss 0.8867 (1.0824) acc 81.2500 (72.9643) lr 3.1417e-05 eta 1:09:17 +epoch [48/50] batch [355/1000] time 1.548 (1.569) data 0.000 (0.003) loss 0.9429 (1.0838) acc 78.1250 (72.9577) lr 3.1417e-05 eta 1:09:09 +epoch [48/50] batch [360/1000] time 1.551 (1.569) data 0.000 (0.003) loss 1.1943 (1.0842) acc 65.6250 (72.9688) lr 3.1417e-05 eta 1:09:02 +epoch [48/50] batch [365/1000] time 1.567 (1.569) data 0.000 (0.003) loss 1.0391 (1.0861) acc 75.0000 (72.8682) lr 3.1417e-05 eta 1:08:54 +epoch [48/50] batch [370/1000] time 1.572 (1.569) data 0.000 (0.003) loss 0.7373 (1.0826) acc 81.2500 (72.9307) lr 3.1417e-05 eta 1:08:47 +epoch [48/50] batch [375/1000] time 1.582 (1.569) data 0.001 (0.003) loss 0.5601 (1.0788) acc 87.5000 (72.9917) lr 3.1417e-05 eta 1:08:39 +epoch [48/50] batch [380/1000] time 1.553 (1.569) data 0.000 (0.003) loss 0.9351 (1.0792) acc 62.5000 (72.9441) lr 3.1417e-05 eta 1:08:30 +epoch [48/50] batch [385/1000] time 1.562 (1.569) data 0.001 (0.003) loss 0.5962 (1.0773) acc 84.3750 (72.9302) lr 3.1417e-05 eta 1:08:22 +epoch [48/50] batch [390/1000] time 1.566 (1.569) data 0.001 (0.003) loss 0.5513 (1.0764) acc 90.6250 (72.9327) lr 3.1417e-05 eta 1:08:14 +epoch [48/50] batch [395/1000] time 1.546 (1.569) data 0.000 (0.003) loss 1.1172 (1.0748) acc 62.5000 (72.9035) lr 3.1417e-05 eta 1:08:05 +epoch [48/50] batch [400/1000] time 1.548 (1.568) data 0.001 (0.003) loss 1.6113 (1.0746) acc 53.1250 (72.8828) lr 3.1417e-05 eta 1:07:57 +epoch [48/50] batch [405/1000] time 1.565 (1.568) data 0.000 (0.003) loss 0.4866 (1.0729) acc 81.2500 (72.9167) lr 3.1417e-05 eta 1:07:49 +epoch [48/50] batch [410/1000] time 1.575 (1.568) data 0.001 (0.003) loss 1.1084 (1.0711) acc 71.8750 (72.9573) lr 3.1417e-05 eta 1:07:41 +epoch [48/50] batch [415/1000] time 1.563 (1.568) data 0.000 (0.003) loss 0.7080 (1.0722) acc 78.1250 (72.9292) lr 3.1417e-05 eta 1:07:33 +epoch [48/50] batch [420/1000] time 1.552 (1.568) data 0.001 (0.003) loss 0.9644 (1.0720) acc 81.2500 (72.9315) lr 3.1417e-05 eta 1:07:25 +epoch [48/50] batch [425/1000] time 1.533 (1.568) data 0.000 (0.003) loss 0.6880 (1.0693) acc 84.3750 (72.9706) lr 3.1417e-05 eta 1:07:17 +epoch [48/50] batch [430/1000] time 1.553 (1.568) data 0.001 (0.003) loss 0.8398 (1.0674) acc 81.2500 (73.0305) lr 3.1417e-05 eta 1:07:09 +epoch [48/50] batch [435/1000] time 1.571 (1.568) data 0.001 (0.003) loss 0.8125 (1.0647) acc 78.1250 (73.1034) lr 3.1417e-05 eta 1:07:02 +epoch [48/50] batch [440/1000] time 1.556 (1.568) data 0.001 (0.003) loss 0.8032 (1.0641) acc 71.8750 (73.0895) lr 3.1417e-05 eta 1:06:54 +epoch [48/50] batch [445/1000] time 1.539 (1.568) data 0.000 (0.003) loss 0.8169 (1.0630) acc 81.2500 (73.1390) lr 3.1417e-05 eta 1:06:45 +epoch [48/50] batch [450/1000] time 1.544 (1.568) data 0.001 (0.003) loss 1.0762 (1.0630) acc 75.0000 (73.1181) lr 3.1417e-05 eta 1:06:37 +epoch [48/50] batch [455/1000] time 1.564 (1.568) data 0.000 (0.003) loss 1.2246 (1.0612) acc 65.6250 (73.1181) lr 3.1417e-05 eta 1:06:29 +epoch [48/50] batch [460/1000] time 1.549 (1.568) data 0.001 (0.003) loss 1.3262 (1.0590) acc 65.6250 (73.1793) lr 3.1417e-05 eta 1:06:21 +epoch [48/50] batch [465/1000] time 1.593 (1.568) data 0.001 (0.003) loss 1.0098 (1.0602) acc 71.8750 (73.1317) lr 3.1417e-05 eta 1:06:14 +epoch [48/50] batch [470/1000] time 1.565 (1.568) data 0.000 (0.002) loss 0.9590 (1.0602) acc 81.2500 (73.1250) lr 3.1417e-05 eta 1:06:07 +epoch [48/50] batch [475/1000] time 1.564 (1.568) data 0.001 (0.002) loss 1.1055 (1.0607) acc 68.7500 (73.1645) lr 3.1417e-05 eta 1:05:59 +epoch [48/50] batch [480/1000] time 1.557 (1.568) data 0.001 (0.002) loss 0.6367 (1.0594) acc 78.1250 (73.1706) lr 3.1417e-05 eta 1:05:51 +epoch [48/50] batch [485/1000] time 1.580 (1.568) data 0.001 (0.002) loss 1.1660 (1.0605) acc 68.7500 (73.1572) lr 3.1417e-05 eta 1:05:44 +epoch [48/50] batch [490/1000] time 1.538 (1.568) data 0.001 (0.002) loss 0.9839 (1.0611) acc 84.3750 (73.1505) lr 3.1417e-05 eta 1:05:36 +epoch [48/50] batch [495/1000] time 1.535 (1.568) data 0.000 (0.002) loss 1.0938 (1.0623) acc 75.0000 (73.1439) lr 3.1417e-05 eta 1:05:27 +epoch [48/50] batch [500/1000] time 1.566 (1.568) data 0.000 (0.002) loss 0.7886 (1.0615) acc 78.1250 (73.1437) lr 3.1417e-05 eta 1:05:19 +epoch [48/50] batch [505/1000] time 1.562 (1.568) data 0.001 (0.002) loss 1.2168 (1.0600) acc 65.6250 (73.1559) lr 3.1417e-05 eta 1:05:11 +epoch [48/50] batch [510/1000] time 1.541 (1.568) data 0.000 (0.002) loss 1.0908 (1.0600) acc 71.8750 (73.2108) lr 3.1417e-05 eta 1:05:04 +epoch [48/50] batch [515/1000] time 1.547 (1.568) data 0.000 (0.002) loss 0.7925 (1.0581) acc 78.1250 (73.2767) lr 3.1417e-05 eta 1:04:55 +epoch [48/50] batch [520/1000] time 1.568 (1.568) data 0.001 (0.002) loss 1.2246 (1.0606) acc 68.7500 (73.2091) lr 3.1417e-05 eta 1:04:48 +epoch [48/50] batch [525/1000] time 1.587 (1.568) data 0.001 (0.002) loss 0.9609 (1.0625) acc 75.0000 (73.1488) lr 3.1417e-05 eta 1:04:40 +epoch [48/50] batch [530/1000] time 1.561 (1.568) data 0.001 (0.002) loss 1.4043 (1.0645) acc 68.7500 (73.1014) lr 3.1417e-05 eta 1:04:32 +epoch [48/50] batch [535/1000] time 1.568 (1.568) data 0.000 (0.002) loss 1.2646 (1.0678) acc 75.0000 (73.0432) lr 3.1417e-05 eta 1:04:24 +epoch [48/50] batch [540/1000] time 1.569 (1.568) data 0.001 (0.002) loss 1.3838 (1.0681) acc 65.6250 (73.0266) lr 3.1417e-05 eta 1:04:16 +epoch [48/50] batch [545/1000] time 1.574 (1.568) data 0.000 (0.002) loss 0.8652 (1.0669) acc 68.7500 (73.0161) lr 3.1417e-05 eta 1:04:08 +epoch [48/50] batch [550/1000] time 1.569 (1.568) data 0.001 (0.002) loss 1.4395 (1.0675) acc 68.7500 (73.0341) lr 3.1417e-05 eta 1:04:00 +epoch [48/50] batch [555/1000] time 1.560 (1.568) data 0.001 (0.002) loss 1.2578 (1.0674) acc 78.1250 (73.0518) lr 3.1417e-05 eta 1:03:53 +epoch [48/50] batch [560/1000] time 1.572 (1.568) data 0.000 (0.002) loss 1.0361 (1.0684) acc 71.8750 (73.0190) lr 3.1417e-05 eta 1:03:45 +epoch [48/50] batch [565/1000] time 1.561 (1.568) data 0.000 (0.002) loss 1.2881 (1.0691) acc 68.7500 (72.9978) lr 3.1417e-05 eta 1:03:37 +epoch [48/50] batch [570/1000] time 1.569 (1.568) data 0.001 (0.002) loss 0.7749 (1.0685) acc 75.0000 (73.0318) lr 3.1417e-05 eta 1:03:29 +epoch [48/50] batch [575/1000] time 1.564 (1.568) data 0.001 (0.002) loss 0.7129 (1.0693) acc 81.2500 (73.0217) lr 3.1417e-05 eta 1:03:22 +epoch [48/50] batch [580/1000] time 1.564 (1.568) data 0.001 (0.002) loss 0.4639 (1.0689) acc 90.6250 (73.0657) lr 3.1417e-05 eta 1:03:13 +epoch [48/50] batch [585/1000] time 1.560 (1.568) data 0.000 (0.002) loss 0.8101 (1.0671) acc 78.1250 (73.1090) lr 3.1417e-05 eta 1:03:06 +epoch [48/50] batch [590/1000] time 1.587 (1.568) data 0.000 (0.002) loss 0.9077 (1.0660) acc 78.1250 (73.1250) lr 3.1417e-05 eta 1:02:58 +epoch [48/50] batch [595/1000] time 1.583 (1.568) data 0.000 (0.002) loss 1.4971 (1.0672) acc 62.5000 (73.1408) lr 3.1417e-05 eta 1:02:50 +epoch [48/50] batch [600/1000] time 1.540 (1.568) data 0.000 (0.002) loss 0.9775 (1.0664) acc 75.0000 (73.1562) lr 3.1417e-05 eta 1:02:42 +epoch [48/50] batch [605/1000] time 1.559 (1.568) data 0.000 (0.002) loss 1.1611 (1.0666) acc 78.1250 (73.1870) lr 3.1417e-05 eta 1:02:34 +epoch [48/50] batch [610/1000] time 1.574 (1.568) data 0.000 (0.002) loss 1.0684 (1.0672) acc 68.7500 (73.1865) lr 3.1417e-05 eta 1:02:26 +epoch [48/50] batch [615/1000] time 1.578 (1.568) data 0.001 (0.002) loss 1.5771 (1.0699) acc 71.8750 (73.1352) lr 3.1417e-05 eta 1:02:18 +epoch [48/50] batch [620/1000] time 1.590 (1.568) data 0.001 (0.002) loss 0.8291 (1.0694) acc 75.0000 (73.1351) lr 3.1417e-05 eta 1:02:11 +epoch [48/50] batch [625/1000] time 1.573 (1.568) data 0.000 (0.002) loss 1.3115 (1.0692) acc 62.5000 (73.1250) lr 3.1417e-05 eta 1:02:03 +epoch [48/50] batch [630/1000] time 1.572 (1.568) data 0.000 (0.002) loss 0.3694 (1.0677) acc 87.5000 (73.1696) lr 3.1417e-05 eta 1:01:55 +epoch [48/50] batch [635/1000] time 1.546 (1.568) data 0.000 (0.002) loss 0.7529 (1.0674) acc 68.7500 (73.1594) lr 3.1417e-05 eta 1:01:47 +epoch [48/50] batch [640/1000] time 1.551 (1.568) data 0.000 (0.002) loss 1.4658 (1.0660) acc 62.5000 (73.1934) lr 3.1417e-05 eta 1:01:39 +epoch [48/50] batch [645/1000] time 1.586 (1.568) data 0.000 (0.002) loss 1.3779 (1.0672) acc 71.8750 (73.1589) lr 3.1417e-05 eta 1:01:31 +epoch [48/50] batch [650/1000] time 1.571 (1.568) data 0.001 (0.002) loss 0.5342 (1.0656) acc 84.3750 (73.2067) lr 3.1417e-05 eta 1:01:23 +epoch [48/50] batch [655/1000] time 1.530 (1.568) data 0.000 (0.002) loss 0.6523 (1.0661) acc 81.2500 (73.2300) lr 3.1417e-05 eta 1:01:15 +epoch [48/50] batch [660/1000] time 1.768 (1.568) data 0.000 (0.002) loss 1.1396 (1.0679) acc 68.7500 (73.1818) lr 3.1417e-05 eta 1:01:08 +epoch [48/50] batch [665/1000] time 1.554 (1.568) data 0.000 (0.002) loss 1.4990 (1.0688) acc 65.6250 (73.1908) lr 3.1417e-05 eta 1:01:00 +epoch [48/50] batch [670/1000] time 1.577 (1.568) data 0.001 (0.002) loss 1.8027 (1.0712) acc 68.7500 (73.1716) lr 3.1417e-05 eta 1:00:53 +epoch [48/50] batch [675/1000] time 1.543 (1.568) data 0.000 (0.002) loss 1.0342 (1.0726) acc 78.1250 (73.1435) lr 3.1417e-05 eta 1:00:44 +epoch [48/50] batch [680/1000] time 1.542 (1.568) data 0.001 (0.002) loss 0.8906 (1.0716) acc 78.1250 (73.1342) lr 3.1417e-05 eta 1:00:36 +epoch [48/50] batch [685/1000] time 1.557 (1.568) data 0.000 (0.002) loss 1.2773 (1.0718) acc 65.6250 (73.1159) lr 3.1417e-05 eta 1:00:29 +epoch [48/50] batch [690/1000] time 1.581 (1.568) data 0.000 (0.002) loss 1.2480 (1.0713) acc 65.6250 (73.1159) lr 3.1417e-05 eta 1:00:21 +epoch [48/50] batch [695/1000] time 1.565 (1.568) data 0.000 (0.002) loss 0.9619 (1.0721) acc 71.8750 (73.1160) lr 3.1417e-05 eta 1:00:13 +epoch [48/50] batch [700/1000] time 1.538 (1.568) data 0.001 (0.002) loss 1.5029 (1.0725) acc 65.6250 (73.0848) lr 3.1417e-05 eta 1:00:05 +epoch [48/50] batch [705/1000] time 1.554 (1.568) data 0.001 (0.002) loss 1.4922 (1.0735) acc 71.8750 (73.0541) lr 3.1417e-05 eta 0:59:57 +epoch [48/50] batch [710/1000] time 1.569 (1.567) data 0.001 (0.002) loss 0.9595 (1.0741) acc 71.8750 (73.0458) lr 3.1417e-05 eta 0:59:49 +epoch [48/50] batch [715/1000] time 1.573 (1.567) data 0.001 (0.002) loss 0.8477 (1.0727) acc 75.0000 (73.0769) lr 3.1417e-05 eta 0:59:41 +epoch [48/50] batch [720/1000] time 1.544 (1.567) data 0.001 (0.002) loss 0.5586 (1.0726) acc 78.1250 (73.0946) lr 3.1417e-05 eta 0:59:33 +epoch [48/50] batch [725/1000] time 1.559 (1.567) data 0.000 (0.002) loss 1.2686 (1.0728) acc 71.8750 (73.0862) lr 3.1417e-05 eta 0:59:26 +epoch [48/50] batch [730/1000] time 1.553 (1.567) data 0.000 (0.002) loss 0.8452 (1.0721) acc 78.1250 (73.1036) lr 3.1417e-05 eta 0:59:18 +epoch [48/50] batch [735/1000] time 1.562 (1.567) data 0.001 (0.002) loss 1.2529 (1.0721) acc 75.0000 (73.0825) lr 3.1417e-05 eta 0:59:10 +epoch [48/50] batch [740/1000] time 1.562 (1.567) data 0.000 (0.002) loss 1.4990 (1.0721) acc 68.7500 (73.0870) lr 3.1417e-05 eta 0:59:02 +epoch [48/50] batch [745/1000] time 1.550 (1.567) data 0.000 (0.002) loss 1.3389 (1.0735) acc 78.1250 (73.0663) lr 3.1417e-05 eta 0:58:54 +epoch [48/50] batch [750/1000] time 1.554 (1.567) data 0.000 (0.002) loss 1.0703 (1.0741) acc 71.8750 (73.0458) lr 3.1417e-05 eta 0:58:46 +epoch [48/50] batch [755/1000] time 1.570 (1.567) data 0.000 (0.002) loss 0.9995 (1.0737) acc 71.8750 (73.0422) lr 3.1417e-05 eta 0:58:38 +epoch [48/50] batch [760/1000] time 1.555 (1.567) data 0.000 (0.002) loss 0.8799 (1.0744) acc 71.8750 (73.0181) lr 3.1417e-05 eta 0:58:30 +epoch [48/50] batch [765/1000] time 1.553 (1.567) data 0.000 (0.002) loss 0.8921 (1.0743) acc 78.1250 (73.0106) lr 3.1417e-05 eta 0:58:22 +epoch [48/50] batch [770/1000] time 1.560 (1.567) data 0.000 (0.002) loss 1.2383 (1.0749) acc 62.5000 (73.0235) lr 3.1417e-05 eta 0:58:14 +epoch [48/50] batch [775/1000] time 1.578 (1.567) data 0.001 (0.002) loss 1.0342 (1.0735) acc 78.1250 (73.0605) lr 3.1417e-05 eta 0:58:07 +epoch [48/50] batch [780/1000] time 1.533 (1.567) data 0.000 (0.002) loss 1.3115 (1.0721) acc 62.5000 (73.0569) lr 3.1417e-05 eta 0:57:59 +epoch [48/50] batch [785/1000] time 1.551 (1.567) data 0.000 (0.002) loss 0.7715 (1.0710) acc 81.2500 (73.0892) lr 3.1417e-05 eta 0:57:51 +epoch [48/50] batch [790/1000] time 1.557 (1.567) data 0.000 (0.002) loss 1.0010 (1.0714) acc 71.8750 (73.0736) lr 3.1417e-05 eta 0:57:43 +epoch [48/50] batch [795/1000] time 1.571 (1.567) data 0.001 (0.002) loss 1.4229 (1.0716) acc 75.0000 (73.0857) lr 3.1417e-05 eta 0:57:35 +epoch [48/50] batch [800/1000] time 1.579 (1.567) data 0.001 (0.002) loss 1.5752 (1.0717) acc 75.0000 (73.0938) lr 3.1417e-05 eta 0:57:27 +epoch [48/50] batch [805/1000] time 1.540 (1.567) data 0.000 (0.002) loss 0.7744 (1.0706) acc 71.8750 (73.1056) lr 3.1417e-05 eta 0:57:19 +epoch [48/50] batch [810/1000] time 1.587 (1.567) data 0.000 (0.002) loss 1.2822 (1.0719) acc 62.5000 (73.0826) lr 3.1417e-05 eta 0:57:11 +epoch [48/50] batch [815/1000] time 1.564 (1.567) data 0.000 (0.002) loss 0.7642 (1.0720) acc 71.8750 (73.0675) lr 3.1417e-05 eta 0:57:04 +epoch [48/50] batch [820/1000] time 1.567 (1.567) data 0.001 (0.002) loss 1.3135 (1.0715) acc 78.1250 (73.0907) lr 3.1417e-05 eta 0:56:56 +epoch [48/50] batch [825/1000] time 1.548 (1.567) data 0.000 (0.002) loss 0.8872 (1.0713) acc 68.7500 (73.0871) lr 3.1417e-05 eta 0:56:48 +epoch [48/50] batch [830/1000] time 1.547 (1.567) data 0.000 (0.002) loss 0.6816 (1.0709) acc 84.3750 (73.0986) lr 3.1417e-05 eta 0:56:40 +epoch [48/50] batch [835/1000] time 1.558 (1.567) data 0.001 (0.002) loss 0.5518 (1.0690) acc 75.0000 (73.1213) lr 3.1417e-05 eta 0:56:32 +epoch [48/50] batch [840/1000] time 1.563 (1.567) data 0.000 (0.002) loss 0.8608 (1.0699) acc 75.0000 (73.1027) lr 3.1417e-05 eta 0:56:24 +epoch [48/50] batch [845/1000] time 1.583 (1.567) data 0.001 (0.002) loss 1.0439 (1.0694) acc 75.0000 (73.1102) lr 3.1417e-05 eta 0:56:16 +epoch [48/50] batch [850/1000] time 1.571 (1.567) data 0.001 (0.002) loss 0.8267 (1.0680) acc 75.0000 (73.1507) lr 3.1417e-05 eta 0:56:08 +epoch [48/50] batch [855/1000] time 1.571 (1.567) data 0.000 (0.002) loss 0.7905 (1.0672) acc 81.2500 (73.1579) lr 3.1417e-05 eta 0:56:01 +epoch [48/50] batch [860/1000] time 1.559 (1.567) data 0.001 (0.002) loss 0.8589 (1.0665) acc 81.2500 (73.1650) lr 3.1417e-05 eta 0:55:53 +epoch [48/50] batch [865/1000] time 1.570 (1.567) data 0.001 (0.002) loss 1.4375 (1.0677) acc 62.5000 (73.1358) lr 3.1417e-05 eta 0:55:45 +epoch [48/50] batch [870/1000] time 1.572 (1.567) data 0.000 (0.002) loss 1.8203 (1.0680) acc 50.0000 (73.1106) lr 3.1417e-05 eta 0:55:37 +epoch [48/50] batch [875/1000] time 1.577 (1.567) data 0.000 (0.002) loss 0.7871 (1.0687) acc 81.2500 (73.1214) lr 3.1417e-05 eta 0:55:29 +epoch [48/50] batch [880/1000] time 1.547 (1.567) data 0.000 (0.002) loss 0.8774 (1.0677) acc 81.2500 (73.1286) lr 3.1417e-05 eta 0:55:21 +epoch [48/50] batch [885/1000] time 1.554 (1.567) data 0.000 (0.002) loss 1.2939 (1.0678) acc 78.1250 (73.1285) lr 3.1417e-05 eta 0:55:13 +epoch [48/50] batch [890/1000] time 1.569 (1.567) data 0.001 (0.002) loss 0.7310 (1.0676) acc 81.2500 (73.1320) lr 3.1417e-05 eta 0:55:05 +epoch [48/50] batch [895/1000] time 1.597 (1.567) data 0.001 (0.002) loss 1.5947 (1.0677) acc 62.5000 (73.1145) lr 3.1417e-05 eta 0:54:58 +epoch [48/50] batch [900/1000] time 1.567 (1.567) data 0.001 (0.002) loss 1.4424 (1.0689) acc 68.7500 (73.1007) lr 3.1417e-05 eta 0:54:50 +epoch [48/50] batch [905/1000] time 1.589 (1.567) data 0.001 (0.002) loss 1.0098 (1.0682) acc 65.6250 (73.1008) lr 3.1417e-05 eta 0:54:42 +epoch [48/50] batch [910/1000] time 1.546 (1.567) data 0.000 (0.002) loss 0.7783 (1.0688) acc 84.3750 (73.0941) lr 3.1417e-05 eta 0:54:34 +epoch [48/50] batch [915/1000] time 1.567 (1.567) data 0.001 (0.002) loss 1.4746 (1.0691) acc 62.5000 (73.0908) lr 3.1417e-05 eta 0:54:26 +epoch [48/50] batch [920/1000] time 1.562 (1.567) data 0.000 (0.002) loss 1.0166 (1.0693) acc 71.8750 (73.0978) lr 3.1417e-05 eta 0:54:19 +epoch [48/50] batch [925/1000] time 1.572 (1.567) data 0.000 (0.002) loss 1.5732 (1.0694) acc 65.6250 (73.1250) lr 3.1417e-05 eta 0:54:11 +epoch [48/50] batch [930/1000] time 1.589 (1.567) data 0.001 (0.002) loss 1.3818 (1.0707) acc 68.7500 (73.0813) lr 3.1417e-05 eta 0:54:03 +epoch [48/50] batch [935/1000] time 1.544 (1.567) data 0.001 (0.001) loss 1.0186 (1.0709) acc 81.2500 (73.0882) lr 3.1417e-05 eta 0:53:55 +epoch [48/50] batch [940/1000] time 1.557 (1.567) data 0.000 (0.001) loss 1.6006 (1.0724) acc 62.5000 (73.0818) lr 3.1417e-05 eta 0:53:48 +epoch [48/50] batch [945/1000] time 1.587 (1.567) data 0.000 (0.001) loss 1.5762 (1.0728) acc 68.7500 (73.0754) lr 3.1417e-05 eta 0:53:40 +epoch [48/50] batch [950/1000] time 1.571 (1.567) data 0.001 (0.001) loss 0.7231 (1.0725) acc 87.5000 (73.0921) lr 3.1417e-05 eta 0:53:32 +epoch [48/50] batch [955/1000] time 1.549 (1.567) data 0.000 (0.001) loss 1.0596 (1.0727) acc 78.1250 (73.0955) lr 3.1417e-05 eta 0:53:24 +epoch [48/50] batch [960/1000] time 1.566 (1.567) data 0.000 (0.001) loss 1.1738 (1.0735) acc 68.7500 (73.0469) lr 3.1417e-05 eta 0:53:16 +epoch [48/50] batch [965/1000] time 1.564 (1.567) data 0.000 (0.001) loss 1.6143 (1.0741) acc 59.3750 (73.0343) lr 3.1417e-05 eta 0:53:09 +epoch [48/50] batch [970/1000] time 1.567 (1.567) data 0.000 (0.001) loss 0.9316 (1.0739) acc 75.0000 (73.0445) lr 3.1417e-05 eta 0:53:01 +epoch [48/50] batch [975/1000] time 1.566 (1.567) data 0.000 (0.001) loss 0.8916 (1.0731) acc 75.0000 (73.0641) lr 3.1417e-05 eta 0:52:53 +epoch [48/50] batch [980/1000] time 1.578 (1.567) data 0.000 (0.001) loss 1.2012 (1.0731) acc 78.1250 (73.0676) lr 3.1417e-05 eta 0:52:45 +epoch [48/50] batch [985/1000] time 1.560 (1.567) data 0.001 (0.001) loss 1.2812 (1.0726) acc 71.8750 (73.0679) lr 3.1417e-05 eta 0:52:38 +epoch [48/50] batch [990/1000] time 1.592 (1.567) data 0.000 (0.001) loss 0.9502 (1.0714) acc 75.0000 (73.0808) lr 3.1417e-05 eta 0:52:30 +epoch [48/50] batch [995/1000] time 1.537 (1.567) data 0.000 (0.001) loss 1.0762 (1.0703) acc 75.0000 (73.1250) lr 3.1417e-05 eta 0:52:22 +epoch [48/50] batch [1000/1000] time 1.539 (1.567) data 0.000 (0.001) loss 1.1553 (1.0701) acc 68.7500 (73.1250) lr 1.7713e-05 eta 0:52:14 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,389 +* accuracy: 78.8% +* error: 21.2% +* macro_f1: 78.3% +epoch [49/50] batch [5/1000] time 1.559 (1.702) data 0.000 (0.193) loss 0.9688 (1.1866) acc 71.8750 (71.8750) lr 1.7713e-05 eta 0:56:36 +epoch [49/50] batch [10/1000] time 1.553 (1.632) data 0.001 (0.097) loss 0.9238 (1.2225) acc 84.3750 (69.6875) lr 1.7713e-05 eta 0:54:08 +epoch [49/50] batch [15/1000] time 1.556 (1.611) data 0.001 (0.065) loss 1.3984 (1.3448) acc 71.8750 (67.7083) lr 1.7713e-05 eta 0:53:18 +epoch [49/50] batch [20/1000] time 1.567 (1.602) data 0.001 (0.049) loss 0.7661 (1.2846) acc 81.2500 (69.6875) lr 1.7713e-05 eta 0:52:51 +epoch [49/50] batch [25/1000] time 1.586 (1.595) data 0.001 (0.039) loss 0.8765 (1.2207) acc 78.1250 (70.7500) lr 1.7713e-05 eta 0:52:30 +epoch [49/50] batch [30/1000] time 1.568 (1.590) data 0.000 (0.033) loss 1.5967 (1.2297) acc 68.7500 (70.5208) lr 1.7713e-05 eta 0:52:13 +epoch [49/50] batch [35/1000] time 1.554 (1.595) data 0.000 (0.028) loss 0.8169 (1.1774) acc 84.3750 (71.5179) lr 1.7713e-05 eta 0:52:13 +epoch [49/50] batch [40/1000] time 1.571 (1.590) data 0.000 (0.025) loss 0.4878 (1.1352) acc 81.2500 (72.2656) lr 1.7713e-05 eta 0:51:57 +epoch [49/50] batch [45/1000] time 1.562 (1.587) data 0.000 (0.022) loss 1.0566 (1.1270) acc 81.2500 (72.5694) lr 1.7713e-05 eta 0:51:43 +epoch [49/50] batch [50/1000] time 1.567 (1.585) data 0.000 (0.020) loss 1.2617 (1.1387) acc 71.8750 (72.2500) lr 1.7713e-05 eta 0:51:31 +epoch [49/50] batch [55/1000] time 1.583 (1.583) data 0.001 (0.018) loss 0.9868 (1.1217) acc 78.1250 (72.5568) lr 1.7713e-05 eta 0:51:19 +epoch [49/50] batch [60/1000] time 1.569 (1.581) data 0.000 (0.017) loss 0.7524 (1.0946) acc 90.6250 (73.3333) lr 1.7713e-05 eta 0:51:08 +epoch [49/50] batch [65/1000] time 1.573 (1.580) data 0.001 (0.015) loss 1.5205 (1.0896) acc 71.8750 (73.6538) lr 1.7713e-05 eta 0:50:57 +epoch [49/50] batch [70/1000] time 1.573 (1.580) data 0.001 (0.014) loss 1.0166 (1.0815) acc 71.8750 (73.8393) lr 1.7713e-05 eta 0:50:49 +epoch [49/50] batch [75/1000] time 1.542 (1.579) data 0.001 (0.013) loss 0.8853 (1.0807) acc 78.1250 (74.0417) lr 1.7713e-05 eta 0:50:38 +epoch [49/50] batch [80/1000] time 1.570 (1.580) data 0.000 (0.013) loss 0.4189 (1.0627) acc 87.5000 (74.1797) lr 1.7713e-05 eta 0:50:32 +epoch [49/50] batch [85/1000] time 1.563 (1.579) data 0.001 (0.012) loss 1.1836 (1.0658) acc 78.1250 (74.1912) lr 1.7713e-05 eta 0:50:22 +epoch [49/50] batch [90/1000] time 1.554 (1.577) data 0.000 (0.011) loss 0.8911 (1.0763) acc 81.2500 (74.1667) lr 1.7713e-05 eta 0:50:12 +epoch [49/50] batch [95/1000] time 1.579 (1.577) data 0.001 (0.011) loss 0.7773 (1.0765) acc 71.8750 (74.0789) lr 1.7713e-05 eta 0:50:04 +epoch [49/50] batch [100/1000] time 1.551 (1.577) data 0.001 (0.010) loss 1.2002 (1.0767) acc 68.7500 (74.0938) lr 1.7713e-05 eta 0:49:55 +epoch [49/50] batch [105/1000] time 1.551 (1.576) data 0.001 (0.010) loss 0.6816 (1.0739) acc 78.1250 (73.9881) lr 1.7713e-05 eta 0:49:46 +epoch [49/50] batch [110/1000] time 1.569 (1.576) data 0.001 (0.009) loss 1.9414 (1.0784) acc 56.2500 (73.8920) lr 1.7713e-05 eta 0:49:37 +epoch [49/50] batch [115/1000] time 1.542 (1.575) data 0.000 (0.009) loss 0.4084 (1.0732) acc 93.7500 (73.9402) lr 1.7713e-05 eta 0:49:28 +epoch [49/50] batch [120/1000] time 1.558 (1.574) data 0.001 (0.009) loss 0.6001 (1.0756) acc 90.6250 (73.8281) lr 1.7713e-05 eta 0:49:18 +epoch [49/50] batch [125/1000] time 1.579 (1.574) data 0.000 (0.008) loss 1.1895 (1.0786) acc 65.6250 (73.7250) lr 1.7713e-05 eta 0:49:10 +epoch [49/50] batch [130/1000] time 1.545 (1.573) data 0.000 (0.008) loss 0.8340 (1.0803) acc 75.0000 (73.6779) lr 1.7713e-05 eta 0:49:01 +epoch [49/50] batch [135/1000] time 1.573 (1.573) data 0.001 (0.008) loss 1.0156 (1.0782) acc 71.8750 (73.6574) lr 1.7713e-05 eta 0:48:53 +epoch [49/50] batch [140/1000] time 1.577 (1.574) data 0.000 (0.007) loss 1.3525 (1.0776) acc 68.7500 (73.7946) lr 1.7713e-05 eta 0:48:47 +epoch [49/50] batch [145/1000] time 1.548 (1.574) data 0.000 (0.007) loss 1.0537 (1.0815) acc 78.1250 (73.9224) lr 1.7713e-05 eta 0:48:38 +epoch [49/50] batch [150/1000] time 1.575 (1.573) data 0.000 (0.007) loss 0.9443 (1.0744) acc 68.7500 (73.9583) lr 1.7713e-05 eta 0:48:30 +epoch [49/50] batch [155/1000] time 1.554 (1.572) data 0.000 (0.007) loss 1.4795 (1.0803) acc 68.7500 (73.8306) lr 1.7713e-05 eta 0:48:20 +epoch [49/50] batch [160/1000] time 1.577 (1.572) data 0.000 (0.007) loss 0.8369 (1.0779) acc 78.1250 (73.9453) lr 1.7713e-05 eta 0:48:12 +epoch [49/50] batch [165/1000] time 1.536 (1.572) data 0.000 (0.006) loss 1.1553 (1.0866) acc 71.8750 (73.8258) lr 1.7713e-05 eta 0:48:04 +epoch [49/50] batch [170/1000] time 1.559 (1.572) data 0.000 (0.006) loss 1.3506 (1.0854) acc 71.8750 (73.7868) lr 1.7713e-05 eta 0:47:56 +epoch [49/50] batch [175/1000] time 1.568 (1.571) data 0.001 (0.006) loss 0.7607 (1.0891) acc 71.8750 (73.6429) lr 1.7713e-05 eta 0:47:47 +epoch [49/50] batch [180/1000] time 1.560 (1.571) data 0.000 (0.006) loss 1.3516 (1.0874) acc 65.6250 (73.6285) lr 1.7713e-05 eta 0:47:38 +epoch [49/50] batch [185/1000] time 1.561 (1.571) data 0.000 (0.006) loss 0.9106 (1.0850) acc 78.1250 (73.5980) lr 1.7713e-05 eta 0:47:31 +epoch [49/50] batch [190/1000] time 1.593 (1.571) data 0.001 (0.006) loss 0.7319 (1.0808) acc 81.2500 (73.6678) lr 1.7713e-05 eta 0:47:24 +epoch [49/50] batch [195/1000] time 1.558 (1.571) data 0.001 (0.005) loss 1.3242 (1.0854) acc 71.8750 (73.6378) lr 1.7713e-05 eta 0:47:15 +epoch [49/50] batch [200/1000] time 1.590 (1.571) data 0.000 (0.005) loss 1.0811 (1.0878) acc 65.6250 (73.4844) lr 1.7713e-05 eta 0:47:07 +epoch [49/50] batch [205/1000] time 1.544 (1.571) data 0.000 (0.005) loss 1.2354 (1.0843) acc 62.5000 (73.4756) lr 1.7713e-05 eta 0:46:59 +epoch [49/50] batch [210/1000] time 1.551 (1.570) data 0.001 (0.005) loss 0.8770 (1.0844) acc 81.2500 (73.3780) lr 1.7713e-05 eta 0:46:50 +epoch [49/50] batch [215/1000] time 1.575 (1.570) data 0.000 (0.005) loss 2.2148 (1.0928) acc 53.1250 (73.2413) lr 1.7713e-05 eta 0:46:42 +epoch [49/50] batch [220/1000] time 1.554 (1.570) data 0.000 (0.005) loss 1.3535 (1.0890) acc 71.8750 (73.2812) lr 1.7713e-05 eta 0:46:34 +epoch [49/50] batch [225/1000] time 1.548 (1.569) data 0.000 (0.005) loss 1.3164 (1.0914) acc 62.5000 (73.2222) lr 1.7713e-05 eta 0:46:25 +epoch [49/50] batch [230/1000] time 1.570 (1.570) data 0.000 (0.005) loss 0.8115 (1.0947) acc 81.2500 (73.1793) lr 1.7713e-05 eta 0:46:19 +epoch [49/50] batch [235/1000] time 1.547 (1.570) data 0.001 (0.005) loss 1.2441 (1.0992) acc 65.6250 (73.1250) lr 1.7713e-05 eta 0:46:11 +epoch [49/50] batch [240/1000] time 1.576 (1.570) data 0.000 (0.005) loss 1.0928 (1.1007) acc 75.0000 (73.0859) lr 1.7713e-05 eta 0:46:02 +epoch [49/50] batch [245/1000] time 1.561 (1.570) data 0.001 (0.004) loss 0.9419 (1.1047) acc 78.1250 (73.0612) lr 1.7713e-05 eta 0:45:54 +epoch [49/50] batch [250/1000] time 1.563 (1.569) data 0.001 (0.004) loss 1.2871 (1.1126) acc 75.0000 (73.0250) lr 1.7713e-05 eta 0:45:46 +epoch [49/50] batch [255/1000] time 1.591 (1.569) data 0.000 (0.004) loss 1.0938 (1.1109) acc 75.0000 (73.0270) lr 1.7713e-05 eta 0:45:38 +epoch [49/50] batch [260/1000] time 1.573 (1.570) data 0.000 (0.004) loss 1.2549 (1.1079) acc 75.0000 (73.1130) lr 1.7713e-05 eta 0:45:31 +epoch [49/50] batch [265/1000] time 1.574 (1.569) data 0.000 (0.004) loss 1.8135 (1.1131) acc 65.6250 (72.9363) lr 1.7713e-05 eta 0:45:22 +epoch [49/50] batch [270/1000] time 1.553 (1.569) data 0.000 (0.004) loss 1.0039 (1.1107) acc 78.1250 (73.0093) lr 1.7713e-05 eta 0:45:14 +epoch [49/50] batch [275/1000] time 1.588 (1.569) data 0.000 (0.004) loss 1.1309 (1.1074) acc 71.8750 (73.0568) lr 1.7713e-05 eta 0:45:06 +epoch [49/50] batch [280/1000] time 1.588 (1.569) data 0.000 (0.004) loss 1.1055 (1.1054) acc 65.6250 (73.1027) lr 1.7713e-05 eta 0:44:58 +epoch [49/50] batch [285/1000] time 1.571 (1.569) data 0.000 (0.004) loss 0.7456 (1.1039) acc 78.1250 (73.1250) lr 1.7713e-05 eta 0:44:50 +epoch [49/50] batch [290/1000] time 1.745 (1.570) data 0.000 (0.004) loss 0.9463 (1.1029) acc 68.7500 (73.1250) lr 1.7713e-05 eta 0:44:44 +epoch [49/50] batch [295/1000] time 1.567 (1.569) data 0.000 (0.004) loss 1.2803 (1.1035) acc 71.8750 (73.0826) lr 1.7713e-05 eta 0:44:35 +epoch [49/50] batch [300/1000] time 1.549 (1.569) data 0.001 (0.004) loss 0.7808 (1.1016) acc 84.3750 (73.1354) lr 1.7713e-05 eta 0:44:27 +epoch [49/50] batch [305/1000] time 1.550 (1.569) data 0.001 (0.004) loss 1.2246 (1.0994) acc 75.0000 (73.1967) lr 1.7713e-05 eta 0:44:20 +epoch [49/50] batch [310/1000] time 1.539 (1.569) data 0.000 (0.004) loss 0.9473 (1.0970) acc 84.3750 (73.2661) lr 1.7713e-05 eta 0:44:12 +epoch [49/50] batch [315/1000] time 1.574 (1.569) data 0.000 (0.004) loss 0.8750 (1.0968) acc 81.2500 (73.2937) lr 1.7713e-05 eta 0:44:04 +epoch [49/50] batch [320/1000] time 1.560 (1.569) data 0.000 (0.004) loss 0.8804 (1.0923) acc 81.2500 (73.4180) lr 1.7713e-05 eta 0:43:55 +epoch [49/50] batch [325/1000] time 1.573 (1.569) data 0.001 (0.003) loss 0.8008 (1.0915) acc 78.1250 (73.3942) lr 1.7713e-05 eta 0:43:47 +epoch [49/50] batch [330/1000] time 1.545 (1.569) data 0.001 (0.003) loss 1.5332 (1.0932) acc 71.8750 (73.3807) lr 1.7713e-05 eta 0:43:40 +epoch [49/50] batch [335/1000] time 1.752 (1.570) data 0.000 (0.003) loss 0.6763 (1.0912) acc 84.3750 (73.4049) lr 1.7713e-05 eta 0:43:33 +epoch [49/50] batch [340/1000] time 1.566 (1.570) data 0.001 (0.003) loss 1.1660 (1.0924) acc 68.7500 (73.3732) lr 1.7713e-05 eta 0:43:25 +epoch [49/50] batch [345/1000] time 1.583 (1.570) data 0.000 (0.003) loss 0.8438 (1.0930) acc 84.3750 (73.3514) lr 1.7713e-05 eta 0:43:17 +epoch [49/50] batch [350/1000] time 1.540 (1.569) data 0.000 (0.003) loss 0.7383 (1.0937) acc 84.3750 (73.3214) lr 1.7713e-05 eta 0:43:09 +epoch [49/50] batch [355/1000] time 1.559 (1.569) data 0.000 (0.003) loss 0.9341 (1.0924) acc 71.8750 (73.2570) lr 1.7713e-05 eta 0:43:01 +epoch [49/50] batch [360/1000] time 1.530 (1.569) data 0.000 (0.003) loss 0.9043 (1.0915) acc 78.1250 (73.2639) lr 1.7713e-05 eta 0:42:53 +epoch [49/50] batch [365/1000] time 1.590 (1.569) data 0.001 (0.003) loss 0.7168 (1.0919) acc 84.3750 (73.2449) lr 1.7713e-05 eta 0:42:45 +epoch [49/50] batch [370/1000] time 1.575 (1.569) data 0.005 (0.003) loss 0.7544 (1.0886) acc 78.1250 (73.2601) lr 1.7713e-05 eta 0:42:37 +epoch [49/50] batch [375/1000] time 1.550 (1.569) data 0.001 (0.003) loss 1.3203 (1.0889) acc 71.8750 (73.2667) lr 1.7713e-05 eta 0:42:29 +epoch [49/50] batch [380/1000] time 1.563 (1.569) data 0.000 (0.003) loss 1.0303 (1.0878) acc 68.7500 (73.2812) lr 1.7713e-05 eta 0:42:22 +epoch [49/50] batch [385/1000] time 1.536 (1.569) data 0.001 (0.003) loss 0.9121 (1.0871) acc 87.5000 (73.3442) lr 1.7713e-05 eta 0:42:13 +epoch [49/50] batch [390/1000] time 1.565 (1.569) data 0.000 (0.003) loss 1.3633 (1.0874) acc 68.7500 (73.3734) lr 1.7713e-05 eta 0:42:05 +epoch [49/50] batch [395/1000] time 1.580 (1.569) data 0.000 (0.003) loss 0.6699 (1.0861) acc 71.8750 (73.4019) lr 1.7713e-05 eta 0:41:57 +epoch [49/50] batch [400/1000] time 1.571 (1.569) data 0.000 (0.003) loss 1.0781 (1.0853) acc 68.7500 (73.3828) lr 1.7713e-05 eta 0:41:49 +epoch [49/50] batch [405/1000] time 1.564 (1.569) data 0.000 (0.003) loss 0.6763 (1.0812) acc 81.2500 (73.4182) lr 1.7713e-05 eta 0:41:41 +epoch [49/50] batch [410/1000] time 1.555 (1.568) data 0.001 (0.003) loss 1.1416 (1.0820) acc 62.5000 (73.3841) lr 1.7713e-05 eta 0:41:33 +epoch [49/50] batch [415/1000] time 1.567 (1.568) data 0.000 (0.003) loss 0.9429 (1.0799) acc 78.1250 (73.4262) lr 1.7713e-05 eta 0:41:25 +epoch [49/50] batch [420/1000] time 1.580 (1.568) data 0.001 (0.003) loss 2.3008 (1.0826) acc 53.1250 (73.3854) lr 1.7713e-05 eta 0:41:17 +epoch [49/50] batch [425/1000] time 1.566 (1.568) data 0.001 (0.003) loss 0.7251 (1.0813) acc 81.2500 (73.4044) lr 1.7713e-05 eta 0:41:09 +epoch [49/50] batch [430/1000] time 1.573 (1.568) data 0.001 (0.003) loss 0.6870 (1.0819) acc 68.7500 (73.3285) lr 1.7713e-05 eta 0:41:01 +epoch [49/50] batch [435/1000] time 1.543 (1.568) data 0.001 (0.003) loss 1.2100 (1.0806) acc 78.1250 (73.3405) lr 1.7713e-05 eta 0:40:53 +epoch [49/50] batch [440/1000] time 1.542 (1.568) data 0.000 (0.003) loss 1.1963 (1.0813) acc 68.7500 (73.2884) lr 1.7713e-05 eta 0:40:45 +epoch [49/50] batch [445/1000] time 1.561 (1.568) data 0.001 (0.003) loss 1.1631 (1.0818) acc 71.8750 (73.3427) lr 1.7713e-05 eta 0:40:38 +epoch [49/50] batch [450/1000] time 1.540 (1.568) data 0.000 (0.003) loss 1.2822 (1.0807) acc 62.5000 (73.3403) lr 1.7713e-05 eta 0:40:30 +epoch [49/50] batch [455/1000] time 1.568 (1.568) data 0.001 (0.003) loss 0.9424 (1.0801) acc 75.0000 (73.3310) lr 1.7713e-05 eta 0:40:22 +epoch [49/50] batch [460/1000] time 1.566 (1.568) data 0.001 (0.003) loss 0.7407 (1.0785) acc 78.1250 (73.3628) lr 1.7713e-05 eta 0:40:14 +epoch [49/50] batch [465/1000] time 1.548 (1.567) data 0.000 (0.003) loss 0.7256 (1.0781) acc 75.0000 (73.3468) lr 1.7713e-05 eta 0:40:06 +epoch [49/50] batch [470/1000] time 1.555 (1.567) data 0.000 (0.003) loss 1.0996 (1.0788) acc 78.1250 (73.3311) lr 1.7713e-05 eta 0:39:58 +epoch [49/50] batch [475/1000] time 1.560 (1.567) data 0.001 (0.003) loss 1.7568 (1.0789) acc 62.5000 (73.3158) lr 1.7713e-05 eta 0:39:50 +epoch [49/50] batch [480/1000] time 1.561 (1.567) data 0.000 (0.003) loss 0.8125 (1.0774) acc 75.0000 (73.3138) lr 1.7713e-05 eta 0:39:41 +epoch [49/50] batch [485/1000] time 1.549 (1.567) data 0.000 (0.002) loss 0.5518 (1.0772) acc 78.1250 (73.2796) lr 1.7713e-05 eta 0:39:33 +epoch [49/50] batch [490/1000] time 1.532 (1.567) data 0.000 (0.002) loss 1.3135 (1.0792) acc 65.6250 (73.2462) lr 1.7713e-05 eta 0:39:26 +epoch [49/50] batch [495/1000] time 1.558 (1.567) data 0.001 (0.002) loss 0.9478 (1.0796) acc 71.8750 (73.2386) lr 1.7713e-05 eta 0:39:18 +epoch [49/50] batch [500/1000] time 1.567 (1.567) data 0.000 (0.002) loss 1.2373 (1.0788) acc 71.8750 (73.2313) lr 1.7713e-05 eta 0:39:10 +epoch [49/50] batch [505/1000] time 1.569 (1.567) data 0.001 (0.002) loss 0.6729 (1.0778) acc 87.5000 (73.2550) lr 1.7713e-05 eta 0:39:02 +epoch [49/50] batch [510/1000] time 1.545 (1.566) data 0.001 (0.002) loss 0.9487 (1.0763) acc 68.7500 (73.2721) lr 1.7713e-05 eta 0:38:54 +epoch [49/50] batch [515/1000] time 1.539 (1.566) data 0.000 (0.002) loss 1.1104 (1.0735) acc 78.1250 (73.3434) lr 1.7713e-05 eta 0:38:45 +epoch [49/50] batch [520/1000] time 1.579 (1.566) data 0.000 (0.002) loss 1.0830 (1.0728) acc 71.8750 (73.3233) lr 1.7713e-05 eta 0:38:38 +epoch [49/50] batch [525/1000] time 1.548 (1.566) data 0.000 (0.002) loss 1.0820 (1.0746) acc 71.8750 (73.2798) lr 1.7713e-05 eta 0:38:30 +epoch [49/50] batch [530/1000] time 1.565 (1.567) data 0.001 (0.002) loss 1.5254 (1.0745) acc 62.5000 (73.2547) lr 1.7713e-05 eta 0:38:22 +epoch [49/50] batch [535/1000] time 1.553 (1.566) data 0.001 (0.002) loss 1.2402 (1.0734) acc 65.6250 (73.2593) lr 1.7713e-05 eta 0:38:14 +epoch [49/50] batch [540/1000] time 1.556 (1.566) data 0.000 (0.002) loss 0.9790 (1.0713) acc 71.8750 (73.3102) lr 1.7713e-05 eta 0:38:06 +epoch [49/50] batch [545/1000] time 1.568 (1.566) data 0.001 (0.002) loss 0.3525 (1.0698) acc 87.5000 (73.3544) lr 1.7713e-05 eta 0:37:58 +epoch [49/50] batch [550/1000] time 1.577 (1.566) data 0.000 (0.002) loss 0.9087 (1.0693) acc 84.3750 (73.3409) lr 1.7713e-05 eta 0:37:51 +epoch [49/50] batch [555/1000] time 1.572 (1.566) data 0.000 (0.002) loss 0.9717 (1.0695) acc 68.7500 (73.3390) lr 1.7713e-05 eta 0:37:43 +epoch [49/50] batch [560/1000] time 1.546 (1.566) data 0.000 (0.002) loss 0.6919 (1.0693) acc 87.5000 (73.3538) lr 1.7713e-05 eta 0:37:35 +epoch [49/50] batch [565/1000] time 1.558 (1.566) data 0.001 (0.002) loss 0.6182 (1.0699) acc 84.3750 (73.3296) lr 1.7713e-05 eta 0:37:27 +epoch [49/50] batch [570/1000] time 1.585 (1.566) data 0.000 (0.002) loss 0.7383 (1.0687) acc 81.2500 (73.3498) lr 1.7713e-05 eta 0:37:19 +epoch [49/50] batch [575/1000] time 1.549 (1.566) data 0.000 (0.002) loss 1.5938 (1.0708) acc 68.7500 (73.2772) lr 1.7713e-05 eta 0:37:11 +epoch [49/50] batch [580/1000] time 1.569 (1.566) data 0.000 (0.002) loss 1.6436 (1.0729) acc 65.6250 (73.2381) lr 1.7713e-05 eta 0:37:03 +epoch [49/50] batch [585/1000] time 1.534 (1.566) data 0.001 (0.002) loss 0.9272 (1.0733) acc 78.1250 (73.2105) lr 1.7713e-05 eta 0:36:55 +epoch [49/50] batch [590/1000] time 1.559 (1.566) data 0.001 (0.002) loss 1.3916 (1.0731) acc 59.3750 (73.1939) lr 1.7713e-05 eta 0:36:47 +epoch [49/50] batch [595/1000] time 1.562 (1.566) data 0.000 (0.002) loss 1.0625 (1.0725) acc 75.0000 (73.2195) lr 1.7713e-05 eta 0:36:39 +epoch [49/50] batch [600/1000] time 1.566 (1.566) data 0.001 (0.002) loss 0.9307 (1.0715) acc 78.1250 (73.2448) lr 1.7713e-05 eta 0:36:32 +epoch [49/50] batch [605/1000] time 1.554 (1.566) data 0.000 (0.002) loss 1.6240 (1.0721) acc 68.7500 (73.2386) lr 1.7713e-05 eta 0:36:24 +epoch [49/50] batch [610/1000] time 1.535 (1.566) data 0.001 (0.002) loss 1.1934 (1.0720) acc 68.7500 (73.2377) lr 1.7713e-05 eta 0:36:16 +epoch [49/50] batch [615/1000] time 1.537 (1.565) data 0.000 (0.002) loss 0.8276 (1.0703) acc 81.2500 (73.2774) lr 1.7713e-05 eta 0:36:08 +epoch [49/50] batch [620/1000] time 1.564 (1.565) data 0.000 (0.002) loss 1.3574 (1.0709) acc 65.6250 (73.2611) lr 1.7713e-05 eta 0:36:00 +epoch [49/50] batch [625/1000] time 1.555 (1.565) data 0.000 (0.002) loss 0.8823 (1.0723) acc 68.7500 (73.2150) lr 1.7713e-05 eta 0:35:52 +epoch [49/50] batch [630/1000] time 1.559 (1.565) data 0.000 (0.002) loss 1.0215 (1.0715) acc 68.7500 (73.2440) lr 1.7713e-05 eta 0:35:44 +epoch [49/50] batch [635/1000] time 1.577 (1.565) data 0.000 (0.002) loss 1.3809 (1.0718) acc 65.6250 (73.2185) lr 1.7713e-05 eta 0:35:36 +epoch [49/50] batch [640/1000] time 1.595 (1.566) data 0.000 (0.002) loss 1.2363 (1.0725) acc 71.8750 (73.2227) lr 1.7713e-05 eta 0:35:29 +epoch [49/50] batch [645/1000] time 1.560 (1.566) data 0.000 (0.002) loss 0.6919 (1.0732) acc 78.1250 (73.1734) lr 1.7713e-05 eta 0:35:21 +epoch [49/50] batch [650/1000] time 1.559 (1.565) data 0.000 (0.002) loss 0.9302 (1.0736) acc 71.8750 (73.1394) lr 1.7713e-05 eta 0:35:13 +epoch [49/50] batch [655/1000] time 1.553 (1.565) data 0.000 (0.002) loss 0.5337 (1.0727) acc 87.5000 (73.1489) lr 1.7713e-05 eta 0:35:05 +epoch [49/50] batch [660/1000] time 1.587 (1.565) data 0.000 (0.002) loss 1.2617 (1.0727) acc 68.7500 (73.1534) lr 1.7713e-05 eta 0:34:57 +epoch [49/50] batch [665/1000] time 1.579 (1.565) data 0.000 (0.002) loss 1.2793 (1.0729) acc 81.2500 (73.1673) lr 1.7713e-05 eta 0:34:49 +epoch [49/50] batch [670/1000] time 1.560 (1.565) data 0.001 (0.002) loss 0.6450 (1.0731) acc 84.3750 (73.1716) lr 1.7713e-05 eta 0:34:41 +epoch [49/50] batch [675/1000] time 1.550 (1.565) data 0.001 (0.002) loss 0.8950 (1.0725) acc 81.2500 (73.1991) lr 1.7713e-05 eta 0:34:34 +epoch [49/50] batch [680/1000] time 1.748 (1.566) data 0.000 (0.002) loss 1.0176 (1.0712) acc 71.8750 (73.2123) lr 1.7713e-05 eta 0:34:26 +epoch [49/50] batch [685/1000] time 1.545 (1.566) data 0.000 (0.002) loss 0.7529 (1.0704) acc 75.0000 (73.2071) lr 1.7713e-05 eta 0:34:18 +epoch [49/50] batch [690/1000] time 1.580 (1.566) data 0.001 (0.002) loss 1.4453 (1.0715) acc 71.8750 (73.1884) lr 1.7713e-05 eta 0:34:10 +epoch [49/50] batch [695/1000] time 1.554 (1.566) data 0.000 (0.002) loss 0.6606 (1.0703) acc 81.2500 (73.2239) lr 1.7713e-05 eta 0:34:03 +epoch [49/50] batch [700/1000] time 1.566 (1.566) data 0.000 (0.002) loss 1.3584 (1.0717) acc 65.6250 (73.1875) lr 1.7713e-05 eta 0:33:55 +epoch [49/50] batch [705/1000] time 1.567 (1.566) data 0.000 (0.002) loss 1.3691 (1.0705) acc 68.7500 (73.2048) lr 1.7713e-05 eta 0:33:47 +epoch [49/50] batch [710/1000] time 1.552 (1.566) data 0.001 (0.002) loss 0.8452 (1.0701) acc 75.0000 (73.1866) lr 1.7713e-05 eta 0:33:39 +epoch [49/50] batch [715/1000] time 1.568 (1.566) data 0.000 (0.002) loss 1.5771 (1.0712) acc 62.5000 (73.1425) lr 1.7713e-05 eta 0:33:31 +epoch [49/50] batch [720/1000] time 1.560 (1.566) data 0.000 (0.002) loss 1.3496 (1.0706) acc 71.8750 (73.1554) lr 1.7713e-05 eta 0:33:24 +epoch [49/50] batch [725/1000] time 1.583 (1.566) data 0.000 (0.002) loss 1.1055 (1.0700) acc 75.0000 (73.1509) lr 1.7713e-05 eta 0:33:16 +epoch [49/50] batch [730/1000] time 1.553 (1.566) data 0.000 (0.002) loss 1.4424 (1.0697) acc 56.2500 (73.1250) lr 1.7713e-05 eta 0:33:08 +epoch [49/50] batch [735/1000] time 1.565 (1.566) data 0.001 (0.002) loss 1.3076 (1.0702) acc 65.6250 (73.0952) lr 1.7713e-05 eta 0:33:00 +epoch [49/50] batch [740/1000] time 1.566 (1.566) data 0.000 (0.002) loss 0.6362 (1.0698) acc 75.0000 (73.0828) lr 1.7713e-05 eta 0:32:52 +epoch [49/50] batch [745/1000] time 1.606 (1.566) data 0.001 (0.002) loss 0.8281 (1.0695) acc 68.7500 (73.0663) lr 1.7713e-05 eta 0:32:45 +epoch [49/50] batch [750/1000] time 1.561 (1.566) data 0.000 (0.002) loss 1.4453 (1.0689) acc 68.7500 (73.0917) lr 1.7713e-05 eta 0:32:37 +epoch [49/50] batch [755/1000] time 1.552 (1.566) data 0.000 (0.002) loss 1.1113 (1.0685) acc 71.8750 (73.1250) lr 1.7713e-05 eta 0:32:29 +epoch [49/50] batch [760/1000] time 1.570 (1.566) data 0.001 (0.002) loss 0.8950 (1.0691) acc 78.1250 (73.1250) lr 1.7713e-05 eta 0:32:21 +epoch [49/50] batch [765/1000] time 1.548 (1.566) data 0.000 (0.002) loss 1.0830 (1.0703) acc 81.2500 (73.1046) lr 1.7713e-05 eta 0:32:13 +epoch [49/50] batch [770/1000] time 1.558 (1.566) data 0.000 (0.002) loss 1.4541 (1.0725) acc 65.6250 (73.0357) lr 1.7713e-05 eta 0:32:05 +epoch [49/50] batch [775/1000] time 1.586 (1.566) data 0.000 (0.002) loss 0.8135 (1.0737) acc 78.1250 (73.0161) lr 1.7713e-05 eta 0:31:58 +epoch [49/50] batch [780/1000] time 1.558 (1.566) data 0.001 (0.002) loss 1.1006 (1.0726) acc 78.1250 (73.0208) lr 1.7713e-05 eta 0:31:50 +epoch [49/50] batch [785/1000] time 1.565 (1.566) data 0.001 (0.002) loss 0.8960 (1.0738) acc 75.0000 (73.0096) lr 1.7713e-05 eta 0:31:42 +epoch [49/50] batch [790/1000] time 1.557 (1.566) data 0.001 (0.002) loss 0.7734 (1.0744) acc 78.1250 (72.9984) lr 1.7713e-05 eta 0:31:34 +epoch [49/50] batch [795/1000] time 1.553 (1.566) data 0.001 (0.002) loss 0.5620 (1.0733) acc 87.5000 (73.0346) lr 1.7713e-05 eta 0:31:26 +epoch [49/50] batch [800/1000] time 1.578 (1.566) data 0.001 (0.002) loss 1.8691 (1.0743) acc 62.5000 (72.9961) lr 1.7713e-05 eta 0:31:19 +epoch [49/50] batch [805/1000] time 1.564 (1.566) data 0.000 (0.002) loss 0.7197 (1.0741) acc 78.1250 (73.0047) lr 1.7713e-05 eta 0:31:11 +epoch [49/50] batch [810/1000] time 1.554 (1.566) data 0.001 (0.002) loss 0.4939 (1.0729) acc 87.5000 (73.0440) lr 1.7713e-05 eta 0:31:03 +epoch [49/50] batch [815/1000] time 1.560 (1.566) data 0.000 (0.002) loss 1.2842 (1.0736) acc 59.3750 (73.0176) lr 1.7713e-05 eta 0:30:55 +epoch [49/50] batch [820/1000] time 1.551 (1.566) data 0.001 (0.002) loss 1.1289 (1.0746) acc 68.7500 (72.9916) lr 1.7713e-05 eta 0:30:47 +epoch [49/50] batch [825/1000] time 1.601 (1.566) data 0.000 (0.002) loss 1.5889 (1.0773) acc 65.6250 (72.9470) lr 1.7713e-05 eta 0:30:39 +epoch [49/50] batch [830/1000] time 1.561 (1.566) data 0.000 (0.002) loss 0.7534 (1.0755) acc 78.1250 (73.0196) lr 1.7713e-05 eta 0:30:31 +epoch [49/50] batch [835/1000] time 1.543 (1.566) data 0.000 (0.002) loss 1.7617 (1.0767) acc 50.0000 (72.9790) lr 1.7713e-05 eta 0:30:24 +epoch [49/50] batch [840/1000] time 1.535 (1.566) data 0.000 (0.002) loss 0.8965 (1.0766) acc 78.1250 (72.9948) lr 1.7713e-05 eta 0:30:16 +epoch [49/50] batch [845/1000] time 1.553 (1.566) data 0.001 (0.002) loss 0.9302 (1.0775) acc 78.1250 (72.9734) lr 1.7713e-05 eta 0:30:08 +epoch [49/50] batch [850/1000] time 1.554 (1.566) data 0.001 (0.002) loss 1.1416 (1.0782) acc 71.8750 (72.9632) lr 1.7713e-05 eta 0:30:00 +epoch [49/50] batch [855/1000] time 1.534 (1.566) data 0.000 (0.002) loss 0.7148 (1.0767) acc 81.2500 (72.9788) lr 1.7713e-05 eta 0:29:52 +epoch [49/50] batch [860/1000] time 1.545 (1.566) data 0.000 (0.002) loss 1.2578 (1.0775) acc 68.7500 (72.9578) lr 1.7713e-05 eta 0:29:44 +epoch [49/50] batch [865/1000] time 1.567 (1.565) data 0.000 (0.002) loss 1.4883 (1.0793) acc 56.2500 (72.9155) lr 1.7713e-05 eta 0:29:36 +epoch [49/50] batch [870/1000] time 1.545 (1.565) data 0.001 (0.002) loss 0.7231 (1.0787) acc 81.2500 (72.9346) lr 1.7713e-05 eta 0:29:28 +epoch [49/50] batch [875/1000] time 1.551 (1.565) data 0.000 (0.002) loss 1.7812 (1.0796) acc 65.6250 (72.9321) lr 1.7713e-05 eta 0:29:21 +epoch [49/50] batch [880/1000] time 1.559 (1.565) data 0.001 (0.002) loss 1.3945 (1.0800) acc 59.3750 (72.9226) lr 1.7713e-05 eta 0:29:13 +epoch [49/50] batch [885/1000] time 1.581 (1.565) data 0.001 (0.002) loss 1.3828 (1.0802) acc 75.0000 (72.9273) lr 1.7713e-05 eta 0:29:05 +epoch [49/50] batch [890/1000] time 1.531 (1.565) data 0.001 (0.002) loss 1.3467 (1.0805) acc 71.8750 (72.9389) lr 1.7713e-05 eta 0:28:57 +epoch [49/50] batch [895/1000] time 1.569 (1.566) data 0.000 (0.002) loss 0.8628 (1.0804) acc 78.1250 (72.9504) lr 1.7713e-05 eta 0:28:49 +epoch [49/50] batch [900/1000] time 1.586 (1.566) data 0.001 (0.002) loss 1.0684 (1.0801) acc 78.1250 (72.9549) lr 1.7713e-05 eta 0:28:42 +epoch [49/50] batch [905/1000] time 1.575 (1.566) data 0.000 (0.002) loss 0.8628 (1.0800) acc 75.0000 (72.9523) lr 1.7713e-05 eta 0:28:34 +epoch [49/50] batch [910/1000] time 1.542 (1.566) data 0.001 (0.002) loss 0.9653 (1.0809) acc 62.5000 (72.9052) lr 1.7713e-05 eta 0:28:26 +epoch [49/50] batch [915/1000] time 1.552 (1.565) data 0.000 (0.002) loss 1.2646 (1.0824) acc 68.7500 (72.8552) lr 1.7713e-05 eta 0:28:18 +epoch [49/50] batch [920/1000] time 1.559 (1.565) data 0.000 (0.002) loss 1.0664 (1.0822) acc 68.7500 (72.8295) lr 1.7713e-05 eta 0:28:10 +epoch [49/50] batch [925/1000] time 1.545 (1.565) data 0.000 (0.002) loss 1.0264 (1.0817) acc 78.1250 (72.8311) lr 1.7713e-05 eta 0:28:02 +epoch [49/50] batch [930/1000] time 1.553 (1.565) data 0.000 (0.002) loss 1.0645 (1.0795) acc 78.1250 (72.8965) lr 1.7713e-05 eta 0:27:54 +epoch [49/50] batch [935/1000] time 1.562 (1.565) data 0.000 (0.002) loss 0.8008 (1.0796) acc 75.0000 (72.8977) lr 1.7713e-05 eta 0:27:46 +epoch [49/50] batch [940/1000] time 1.587 (1.565) data 0.001 (0.002) loss 0.8301 (1.0786) acc 78.1250 (72.9255) lr 1.7713e-05 eta 0:27:39 +epoch [49/50] batch [945/1000] time 1.569 (1.565) data 0.000 (0.002) loss 0.6890 (1.0776) acc 78.1250 (72.9398) lr 1.7713e-05 eta 0:27:31 +epoch [49/50] batch [950/1000] time 1.569 (1.565) data 0.001 (0.002) loss 0.6963 (1.0778) acc 78.1250 (72.9605) lr 1.7713e-05 eta 0:27:23 +epoch [49/50] batch [955/1000] time 1.569 (1.565) data 0.000 (0.002) loss 1.1133 (1.0777) acc 68.7500 (72.9647) lr 1.7713e-05 eta 0:27:15 +epoch [49/50] batch [960/1000] time 1.590 (1.566) data 0.001 (0.002) loss 1.0361 (1.0786) acc 75.0000 (72.9557) lr 1.7713e-05 eta 0:27:08 +epoch [49/50] batch [965/1000] time 1.565 (1.566) data 0.001 (0.001) loss 0.9263 (1.0782) acc 75.0000 (72.9501) lr 1.7713e-05 eta 0:27:00 +epoch [49/50] batch [970/1000] time 1.557 (1.565) data 0.000 (0.001) loss 1.0342 (1.0785) acc 71.8750 (72.9671) lr 1.7713e-05 eta 0:26:52 +epoch [49/50] batch [975/1000] time 1.555 (1.565) data 0.000 (0.001) loss 1.1553 (1.0788) acc 65.6250 (72.9647) lr 1.7713e-05 eta 0:26:44 +epoch [49/50] batch [980/1000] time 1.560 (1.565) data 0.000 (0.001) loss 1.0566 (1.0782) acc 65.6250 (72.9592) lr 1.7713e-05 eta 0:26:36 +epoch [49/50] batch [985/1000] time 1.571 (1.566) data 0.001 (0.001) loss 1.4277 (1.0789) acc 71.8750 (72.9600) lr 1.7713e-05 eta 0:26:29 +epoch [49/50] batch [990/1000] time 1.573 (1.566) data 0.000 (0.001) loss 1.3027 (1.0800) acc 65.6250 (72.9261) lr 1.7713e-05 eta 0:26:21 +epoch [49/50] batch [995/1000] time 1.574 (1.566) data 0.000 (0.001) loss 1.2842 (1.0801) acc 68.7500 (72.9303) lr 1.7713e-05 eta 0:26:13 +epoch [49/50] batch [1000/1000] time 1.568 (1.566) data 0.000 (0.001) loss 0.7983 (1.0799) acc 78.1250 (72.9562) lr 7.8853e-06 eta 0:26:05 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,383 +* accuracy: 78.8% +* error: 21.2% +* macro_f1: 78.3% +epoch [50/50] batch [5/1000] time 1.564 (1.718) data 0.000 (0.203) loss 0.6777 (0.9286) acc 75.0000 (73.1250) lr 7.8853e-06 eta 0:28:29 +epoch [50/50] batch [10/1000] time 1.565 (1.638) data 0.000 (0.102) loss 0.7246 (0.9001) acc 78.1250 (74.6875) lr 7.8853e-06 eta 0:27:01 +epoch [50/50] batch [15/1000] time 1.572 (1.610) data 0.001 (0.068) loss 0.8516 (0.9186) acc 65.6250 (74.5833) lr 7.8853e-06 eta 0:26:25 +epoch [50/50] batch [20/1000] time 1.565 (1.598) data 0.000 (0.051) loss 1.2949 (1.0081) acc 68.7500 (73.2812) lr 7.8853e-06 eta 0:26:06 +epoch [50/50] batch [25/1000] time 1.558 (1.593) data 0.001 (0.041) loss 1.2461 (0.9855) acc 68.7500 (74.6250) lr 7.8853e-06 eta 0:25:53 +epoch [50/50] batch [30/1000] time 1.561 (1.587) data 0.001 (0.034) loss 0.9736 (0.9946) acc 78.1250 (74.4792) lr 7.8853e-06 eta 0:25:39 +epoch [50/50] batch [35/1000] time 1.557 (1.592) data 0.000 (0.029) loss 0.8730 (1.0334) acc 87.5000 (73.9286) lr 7.8853e-06 eta 0:25:36 +epoch [50/50] batch [40/1000] time 1.556 (1.588) data 0.000 (0.026) loss 1.0771 (1.0226) acc 59.3750 (73.5156) lr 7.8853e-06 eta 0:25:24 +epoch [50/50] batch [45/1000] time 1.565 (1.587) data 0.001 (0.023) loss 1.1689 (1.0556) acc 68.7500 (72.7083) lr 7.8853e-06 eta 0:25:15 +epoch [50/50] batch [50/1000] time 1.572 (1.585) data 0.000 (0.021) loss 1.4580 (1.0893) acc 68.7500 (72.3125) lr 7.8853e-06 eta 0:25:05 +epoch [50/50] batch [55/1000] time 1.551 (1.582) data 0.001 (0.019) loss 1.4424 (1.0964) acc 78.1250 (72.1023) lr 7.8853e-06 eta 0:24:54 +epoch [50/50] batch [60/1000] time 1.563 (1.580) data 0.000 (0.017) loss 0.9194 (1.1029) acc 75.0000 (72.2396) lr 7.8853e-06 eta 0:24:44 +epoch [50/50] batch [65/1000] time 1.562 (1.578) data 0.001 (0.016) loss 1.0938 (1.0900) acc 68.7500 (72.3077) lr 7.8853e-06 eta 0:24:35 +epoch [50/50] batch [70/1000] time 1.573 (1.577) data 0.001 (0.015) loss 1.0693 (1.0940) acc 78.1250 (72.5893) lr 7.8853e-06 eta 0:24:26 +epoch [50/50] batch [75/1000] time 1.549 (1.575) data 0.001 (0.014) loss 1.8477 (1.0879) acc 50.0000 (72.6250) lr 7.8853e-06 eta 0:24:17 +epoch [50/50] batch [80/1000] time 1.534 (1.576) data 0.001 (0.013) loss 1.2744 (1.0866) acc 62.5000 (72.5000) lr 7.8853e-06 eta 0:24:09 +epoch [50/50] batch [85/1000] time 1.552 (1.575) data 0.001 (0.012) loss 1.1074 (1.0875) acc 71.8750 (72.6471) lr 7.8853e-06 eta 0:24:00 +epoch [50/50] batch [90/1000] time 1.567 (1.574) data 0.001 (0.012) loss 1.4258 (1.0985) acc 62.5000 (72.4653) lr 7.8853e-06 eta 0:23:52 +epoch [50/50] batch [95/1000] time 1.563 (1.573) data 0.000 (0.011) loss 1.2871 (1.1060) acc 65.6250 (72.3355) lr 7.8853e-06 eta 0:23:43 +epoch [50/50] batch [100/1000] time 1.550 (1.572) data 0.001 (0.011) loss 1.0420 (1.1040) acc 78.1250 (72.2812) lr 7.8853e-06 eta 0:23:35 +epoch [50/50] batch [105/1000] time 1.541 (1.572) data 0.001 (0.010) loss 0.5884 (1.0901) acc 84.3750 (72.4702) lr 7.8853e-06 eta 0:23:27 +epoch [50/50] batch [110/1000] time 1.573 (1.572) data 0.001 (0.010) loss 1.2471 (1.0815) acc 68.7500 (72.7273) lr 7.8853e-06 eta 0:23:19 +epoch [50/50] batch [115/1000] time 1.592 (1.572) data 0.000 (0.009) loss 0.7803 (1.0838) acc 62.5000 (72.4457) lr 7.8853e-06 eta 0:23:10 +epoch [50/50] batch [120/1000] time 1.748 (1.573) data 0.000 (0.009) loss 1.0742 (1.0871) acc 68.7500 (72.4219) lr 7.8853e-06 eta 0:23:04 +epoch [50/50] batch [125/1000] time 1.584 (1.573) data 0.001 (0.009) loss 0.9429 (1.0800) acc 78.1250 (72.5750) lr 7.8853e-06 eta 0:22:56 +epoch [50/50] batch [130/1000] time 1.549 (1.573) data 0.000 (0.008) loss 0.6768 (1.0744) acc 81.2500 (72.6683) lr 7.8853e-06 eta 0:22:48 +epoch [50/50] batch [135/1000] time 1.576 (1.572) data 0.001 (0.008) loss 1.4512 (1.0681) acc 71.8750 (72.8704) lr 7.8853e-06 eta 0:22:39 +epoch [50/50] batch [140/1000] time 1.568 (1.572) data 0.000 (0.008) loss 1.2070 (1.0743) acc 78.1250 (72.8125) lr 7.8853e-06 eta 0:22:32 +epoch [50/50] batch [145/1000] time 1.569 (1.572) data 0.000 (0.007) loss 0.7646 (1.0715) acc 84.3750 (72.9957) lr 7.8853e-06 eta 0:22:23 +epoch [50/50] batch [150/1000] time 1.578 (1.571) data 0.000 (0.007) loss 1.0010 (1.0665) acc 71.8750 (73.0625) lr 7.8853e-06 eta 0:22:15 +epoch [50/50] batch [155/1000] time 1.547 (1.571) data 0.000 (0.007) loss 1.5293 (1.0696) acc 75.0000 (73.1653) lr 7.8853e-06 eta 0:22:07 +epoch [50/50] batch [160/1000] time 1.552 (1.571) data 0.000 (0.007) loss 1.0410 (1.0773) acc 68.7500 (72.9492) lr 7.8853e-06 eta 0:21:59 +epoch [50/50] batch [165/1000] time 1.545 (1.570) data 0.001 (0.007) loss 0.9424 (1.0773) acc 75.0000 (72.9545) lr 7.8853e-06 eta 0:21:51 +epoch [50/50] batch [170/1000] time 1.562 (1.570) data 0.000 (0.006) loss 1.7695 (1.0829) acc 62.5000 (72.8860) lr 7.8853e-06 eta 0:21:42 +epoch [50/50] batch [175/1000] time 1.593 (1.570) data 0.000 (0.006) loss 1.5498 (1.0786) acc 59.3750 (72.9107) lr 7.8853e-06 eta 0:21:34 +epoch [50/50] batch [180/1000] time 1.551 (1.569) data 0.000 (0.006) loss 0.5684 (1.0779) acc 84.3750 (72.9514) lr 7.8853e-06 eta 0:21:26 +epoch [50/50] batch [185/1000] time 1.563 (1.570) data 0.001 (0.006) loss 1.1162 (1.0774) acc 71.8750 (72.9392) lr 7.8853e-06 eta 0:21:19 +epoch [50/50] batch [190/1000] time 1.579 (1.570) data 0.001 (0.006) loss 1.0498 (1.0783) acc 75.0000 (72.8783) lr 7.8853e-06 eta 0:21:11 +epoch [50/50] batch [195/1000] time 1.548 (1.569) data 0.000 (0.006) loss 1.3662 (1.0787) acc 75.0000 (72.8045) lr 7.8853e-06 eta 0:21:03 +epoch [50/50] batch [200/1000] time 1.576 (1.569) data 0.000 (0.006) loss 0.5459 (1.0729) acc 93.7500 (72.8906) lr 7.8853e-06 eta 0:20:55 +epoch [50/50] batch [205/1000] time 1.555 (1.569) data 0.000 (0.005) loss 0.8931 (1.0781) acc 75.0000 (72.8659) lr 7.8853e-06 eta 0:20:47 +epoch [50/50] batch [210/1000] time 1.588 (1.568) data 0.000 (0.005) loss 1.3525 (1.0799) acc 65.6250 (72.8125) lr 7.8853e-06 eta 0:20:39 +epoch [50/50] batch [215/1000] time 1.592 (1.568) data 0.001 (0.005) loss 1.0283 (1.0800) acc 71.8750 (72.7762) lr 7.8853e-06 eta 0:20:31 +epoch [50/50] batch [220/1000] time 1.568 (1.568) data 0.000 (0.005) loss 0.8496 (1.0805) acc 84.3750 (72.7415) lr 7.8853e-06 eta 0:20:23 +epoch [50/50] batch [225/1000] time 1.555 (1.568) data 0.000 (0.005) loss 0.9253 (1.0795) acc 75.0000 (72.7639) lr 7.8853e-06 eta 0:20:15 +epoch [50/50] batch [230/1000] time 1.544 (1.569) data 0.000 (0.005) loss 1.2188 (1.0850) acc 62.5000 (72.6087) lr 7.8853e-06 eta 0:20:07 +epoch [50/50] batch [235/1000] time 1.555 (1.569) data 0.000 (0.005) loss 0.4836 (1.0818) acc 87.5000 (72.6862) lr 7.8853e-06 eta 0:19:59 +epoch [50/50] batch [240/1000] time 1.562 (1.568) data 0.000 (0.005) loss 1.2295 (1.0880) acc 65.6250 (72.4349) lr 7.8853e-06 eta 0:19:51 +epoch [50/50] batch [245/1000] time 1.535 (1.568) data 0.001 (0.005) loss 1.3018 (1.0848) acc 65.6250 (72.4490) lr 7.8853e-06 eta 0:19:44 +epoch [50/50] batch [250/1000] time 1.551 (1.568) data 0.000 (0.005) loss 0.9829 (1.0835) acc 68.7500 (72.4500) lr 7.8853e-06 eta 0:19:35 +epoch [50/50] batch [255/1000] time 1.559 (1.568) data 0.000 (0.004) loss 0.5801 (1.0850) acc 81.2500 (72.4387) lr 7.8853e-06 eta 0:19:27 +epoch [50/50] batch [260/1000] time 1.543 (1.568) data 0.000 (0.004) loss 0.7041 (1.0849) acc 87.5000 (72.4519) lr 7.8853e-06 eta 0:19:20 +epoch [50/50] batch [265/1000] time 1.554 (1.567) data 0.000 (0.004) loss 0.9453 (1.0824) acc 75.0000 (72.5118) lr 7.8853e-06 eta 0:19:12 +epoch [50/50] batch [270/1000] time 1.566 (1.567) data 0.000 (0.004) loss 1.0684 (1.0859) acc 78.1250 (72.4306) lr 7.8853e-06 eta 0:19:04 +epoch [50/50] batch [275/1000] time 1.548 (1.568) data 0.000 (0.004) loss 0.8257 (1.0849) acc 87.5000 (72.4886) lr 7.8853e-06 eta 0:18:56 +epoch [50/50] batch [280/1000] time 1.564 (1.568) data 0.001 (0.004) loss 0.5381 (1.0868) acc 84.3750 (72.3996) lr 7.8853e-06 eta 0:18:48 +epoch [50/50] batch [285/1000] time 1.579 (1.567) data 0.000 (0.004) loss 1.2246 (1.0897) acc 65.6250 (72.3465) lr 7.8853e-06 eta 0:18:40 +epoch [50/50] batch [290/1000] time 1.573 (1.567) data 0.000 (0.004) loss 0.7295 (1.0868) acc 81.2500 (72.4138) lr 7.8853e-06 eta 0:18:32 +epoch [50/50] batch [295/1000] time 1.576 (1.567) data 0.000 (0.004) loss 0.7300 (1.0888) acc 75.0000 (72.3729) lr 7.8853e-06 eta 0:18:24 +epoch [50/50] batch [300/1000] time 1.545 (1.567) data 0.000 (0.004) loss 1.1582 (1.0886) acc 78.1250 (72.4062) lr 7.8853e-06 eta 0:18:16 +epoch [50/50] batch [305/1000] time 1.526 (1.567) data 0.000 (0.004) loss 1.4062 (1.0897) acc 68.7500 (72.4693) lr 7.8853e-06 eta 0:18:08 +epoch [50/50] batch [310/1000] time 1.559 (1.567) data 0.000 (0.004) loss 0.8623 (1.0917) acc 75.0000 (72.4395) lr 7.8853e-06 eta 0:18:00 +epoch [50/50] batch [315/1000] time 1.566 (1.567) data 0.001 (0.004) loss 0.9297 (1.0868) acc 75.0000 (72.5496) lr 7.8853e-06 eta 0:17:53 +epoch [50/50] batch [320/1000] time 1.562 (1.566) data 0.000 (0.004) loss 1.3691 (1.0873) acc 68.7500 (72.5684) lr 7.8853e-06 eta 0:17:45 +epoch [50/50] batch [325/1000] time 1.557 (1.566) data 0.001 (0.004) loss 1.2246 (1.0883) acc 68.7500 (72.5865) lr 7.8853e-06 eta 0:17:37 +epoch [50/50] batch [330/1000] time 1.549 (1.566) data 0.000 (0.004) loss 1.0830 (1.0882) acc 65.6250 (72.5473) lr 7.8853e-06 eta 0:17:29 +epoch [50/50] batch [335/1000] time 1.543 (1.567) data 0.001 (0.004) loss 0.7710 (1.0878) acc 78.1250 (72.6026) lr 7.8853e-06 eta 0:17:22 +epoch [50/50] batch [340/1000] time 1.569 (1.567) data 0.001 (0.003) loss 1.0020 (1.0865) acc 81.2500 (72.6379) lr 7.8853e-06 eta 0:17:14 +epoch [50/50] batch [345/1000] time 1.549 (1.567) data 0.000 (0.003) loss 1.0801 (1.0875) acc 68.7500 (72.5906) lr 7.8853e-06 eta 0:17:06 +epoch [50/50] batch [350/1000] time 1.528 (1.567) data 0.000 (0.003) loss 1.2227 (1.0867) acc 59.3750 (72.5357) lr 7.8853e-06 eta 0:16:58 +epoch [50/50] batch [355/1000] time 1.552 (1.567) data 0.001 (0.003) loss 1.0205 (1.0874) acc 75.0000 (72.5440) lr 7.8853e-06 eta 0:16:50 +epoch [50/50] batch [360/1000] time 1.589 (1.567) data 0.000 (0.003) loss 0.5459 (1.0858) acc 90.6250 (72.6128) lr 7.8853e-06 eta 0:16:42 +epoch [50/50] batch [365/1000] time 1.567 (1.567) data 0.001 (0.003) loss 1.2852 (1.0859) acc 62.5000 (72.5771) lr 7.8853e-06 eta 0:16:34 +epoch [50/50] batch [370/1000] time 1.553 (1.566) data 0.000 (0.003) loss 1.2705 (1.0848) acc 62.5000 (72.5591) lr 7.8853e-06 eta 0:16:26 +epoch [50/50] batch [375/1000] time 1.540 (1.566) data 0.000 (0.003) loss 1.5322 (1.0848) acc 59.3750 (72.5333) lr 7.8853e-06 eta 0:16:18 +epoch [50/50] batch [380/1000] time 1.565 (1.567) data 0.000 (0.003) loss 1.2578 (1.0837) acc 62.5000 (72.5493) lr 7.8853e-06 eta 0:16:11 +epoch [50/50] batch [385/1000] time 1.570 (1.566) data 0.000 (0.003) loss 1.2568 (1.0822) acc 68.7500 (72.5731) lr 7.8853e-06 eta 0:16:03 +epoch [50/50] batch [390/1000] time 1.549 (1.566) data 0.000 (0.003) loss 1.2559 (1.0836) acc 65.6250 (72.5721) lr 7.8853e-06 eta 0:15:55 +epoch [50/50] batch [395/1000] time 1.563 (1.566) data 0.000 (0.003) loss 1.4160 (1.0844) acc 78.1250 (72.6028) lr 7.8853e-06 eta 0:15:47 +epoch [50/50] batch [400/1000] time 1.551 (1.566) data 0.000 (0.003) loss 0.4646 (1.0841) acc 84.3750 (72.6562) lr 7.8853e-06 eta 0:15:39 +epoch [50/50] batch [405/1000] time 1.577 (1.566) data 0.000 (0.003) loss 0.5991 (1.0828) acc 87.5000 (72.7083) lr 7.8853e-06 eta 0:15:31 +epoch [50/50] batch [410/1000] time 1.560 (1.566) data 0.000 (0.003) loss 1.2842 (1.0821) acc 71.8750 (72.7591) lr 7.8853e-06 eta 0:15:24 +epoch [50/50] batch [415/1000] time 1.549 (1.566) data 0.001 (0.003) loss 0.8105 (1.0809) acc 84.3750 (72.7711) lr 7.8853e-06 eta 0:15:16 +epoch [50/50] batch [420/1000] time 1.548 (1.566) data 0.000 (0.003) loss 1.3008 (1.0809) acc 71.8750 (72.7753) lr 7.8853e-06 eta 0:15:08 +epoch [50/50] batch [425/1000] time 1.552 (1.566) data 0.001 (0.003) loss 1.6201 (1.0798) acc 65.6250 (72.8382) lr 7.8853e-06 eta 0:15:00 +epoch [50/50] batch [430/1000] time 1.528 (1.566) data 0.000 (0.003) loss 1.1152 (1.0815) acc 71.8750 (72.8561) lr 7.8853e-06 eta 0:14:52 +epoch [50/50] batch [435/1000] time 1.568 (1.566) data 0.000 (0.003) loss 0.6997 (1.0826) acc 81.2500 (72.8520) lr 7.8853e-06 eta 0:14:44 +epoch [50/50] batch [440/1000] time 1.572 (1.566) data 0.000 (0.003) loss 1.3154 (1.0830) acc 71.8750 (72.8267) lr 7.8853e-06 eta 0:14:37 +epoch [50/50] batch [445/1000] time 1.545 (1.566) data 0.000 (0.003) loss 0.6631 (1.0813) acc 78.1250 (72.8511) lr 7.8853e-06 eta 0:14:29 +epoch [50/50] batch [450/1000] time 1.553 (1.566) data 0.000 (0.003) loss 1.1582 (1.0815) acc 65.6250 (72.7917) lr 7.8853e-06 eta 0:14:21 +epoch [50/50] batch [455/1000] time 1.560 (1.566) data 0.001 (0.003) loss 1.0898 (1.0816) acc 75.0000 (72.8297) lr 7.8853e-06 eta 0:14:13 +epoch [50/50] batch [460/1000] time 1.570 (1.566) data 0.000 (0.003) loss 1.7441 (1.0819) acc 62.5000 (72.8261) lr 7.8853e-06 eta 0:14:05 +epoch [50/50] batch [465/1000] time 1.574 (1.566) data 0.000 (0.003) loss 1.0010 (1.0804) acc 78.1250 (72.9032) lr 7.8853e-06 eta 0:13:57 +epoch [50/50] batch [470/1000] time 1.567 (1.566) data 0.000 (0.003) loss 1.0918 (1.0800) acc 78.1250 (72.8923) lr 7.8853e-06 eta 0:13:49 +epoch [50/50] batch [475/1000] time 1.556 (1.566) data 0.000 (0.003) loss 1.4980 (1.0800) acc 59.3750 (72.8750) lr 7.8853e-06 eta 0:13:42 +epoch [50/50] batch [480/1000] time 1.547 (1.566) data 0.000 (0.003) loss 0.8604 (1.0796) acc 78.1250 (72.8385) lr 7.8853e-06 eta 0:13:34 +epoch [50/50] batch [485/1000] time 1.736 (1.566) data 0.000 (0.003) loss 0.7754 (1.0791) acc 84.3750 (72.8673) lr 7.8853e-06 eta 0:13:26 +epoch [50/50] batch [490/1000] time 1.595 (1.566) data 0.001 (0.003) loss 1.8789 (1.0795) acc 59.3750 (72.8827) lr 7.8853e-06 eta 0:13:18 +epoch [50/50] batch [495/1000] time 1.566 (1.566) data 0.000 (0.003) loss 0.6426 (1.0785) acc 87.5000 (72.9167) lr 7.8853e-06 eta 0:13:10 +epoch [50/50] batch [500/1000] time 1.581 (1.566) data 0.000 (0.002) loss 1.0635 (1.0784) acc 75.0000 (72.9437) lr 7.8853e-06 eta 0:13:03 +epoch [50/50] batch [505/1000] time 1.586 (1.566) data 0.000 (0.002) loss 1.5293 (1.0792) acc 53.1250 (72.8899) lr 7.8853e-06 eta 0:12:55 +epoch [50/50] batch [510/1000] time 1.586 (1.566) data 0.000 (0.002) loss 1.3057 (1.0789) acc 78.1250 (72.9105) lr 7.8853e-06 eta 0:12:47 +epoch [50/50] batch [515/1000] time 1.551 (1.566) data 0.000 (0.002) loss 1.5693 (1.0795) acc 56.2500 (72.8883) lr 7.8853e-06 eta 0:12:39 +epoch [50/50] batch [520/1000] time 1.550 (1.566) data 0.000 (0.002) loss 1.3008 (1.0793) acc 75.0000 (72.8846) lr 7.8853e-06 eta 0:12:31 +epoch [50/50] batch [525/1000] time 1.579 (1.566) data 0.000 (0.002) loss 1.1357 (1.0797) acc 65.6250 (72.8512) lr 7.8853e-06 eta 0:12:24 +epoch [50/50] batch [530/1000] time 1.721 (1.567) data 0.001 (0.002) loss 1.1523 (1.0775) acc 68.7500 (72.8715) lr 7.8853e-06 eta 0:12:16 +epoch [50/50] batch [535/1000] time 1.555 (1.567) data 0.001 (0.002) loss 1.0986 (1.0769) acc 78.1250 (72.8914) lr 7.8853e-06 eta 0:12:08 +epoch [50/50] batch [540/1000] time 1.562 (1.566) data 0.000 (0.002) loss 1.2764 (1.0758) acc 65.6250 (72.8935) lr 7.8853e-06 eta 0:12:00 +epoch [50/50] batch [545/1000] time 1.550 (1.566) data 0.000 (0.002) loss 0.9902 (1.0756) acc 81.2500 (72.9014) lr 7.8853e-06 eta 0:11:52 +epoch [50/50] batch [550/1000] time 1.573 (1.566) data 0.000 (0.002) loss 0.9731 (1.0734) acc 75.0000 (72.9432) lr 7.8853e-06 eta 0:11:44 +epoch [50/50] batch [555/1000] time 1.572 (1.566) data 0.000 (0.002) loss 0.6289 (1.0720) acc 75.0000 (72.9505) lr 7.8853e-06 eta 0:11:37 +epoch [50/50] batch [560/1000] time 1.563 (1.566) data 0.001 (0.002) loss 0.8696 (1.0710) acc 75.0000 (72.9297) lr 7.8853e-06 eta 0:11:29 +epoch [50/50] batch [565/1000] time 1.587 (1.566) data 0.000 (0.002) loss 1.3652 (1.0712) acc 62.5000 (72.9259) lr 7.8853e-06 eta 0:11:21 +epoch [50/50] batch [570/1000] time 1.549 (1.566) data 0.000 (0.002) loss 0.9160 (1.0698) acc 75.0000 (72.9441) lr 7.8853e-06 eta 0:11:13 +epoch [50/50] batch [575/1000] time 1.542 (1.566) data 0.000 (0.002) loss 1.2744 (1.0679) acc 75.0000 (73.0272) lr 7.8853e-06 eta 0:11:05 +epoch [50/50] batch [580/1000] time 1.569 (1.566) data 0.000 (0.002) loss 0.9712 (1.0686) acc 81.2500 (73.0334) lr 7.8853e-06 eta 0:10:57 +epoch [50/50] batch [585/1000] time 1.557 (1.566) data 0.001 (0.002) loss 0.8423 (1.0685) acc 78.1250 (73.0449) lr 7.8853e-06 eta 0:10:50 +epoch [50/50] batch [590/1000] time 1.573 (1.566) data 0.001 (0.002) loss 1.2646 (1.0688) acc 78.1250 (73.0773) lr 7.8853e-06 eta 0:10:42 +epoch [50/50] batch [595/1000] time 1.566 (1.566) data 0.000 (0.002) loss 0.8589 (1.0693) acc 78.1250 (73.0725) lr 7.8853e-06 eta 0:10:34 +epoch [50/50] batch [600/1000] time 1.527 (1.566) data 0.000 (0.002) loss 1.1191 (1.0696) acc 68.7500 (73.0677) lr 7.8853e-06 eta 0:10:26 +epoch [50/50] batch [605/1000] time 1.536 (1.566) data 0.000 (0.002) loss 1.2188 (1.0696) acc 56.2500 (73.0682) lr 7.8853e-06 eta 0:10:18 +epoch [50/50] batch [610/1000] time 1.576 (1.566) data 0.001 (0.002) loss 1.1641 (1.0676) acc 65.6250 (73.0994) lr 7.8853e-06 eta 0:10:10 +epoch [50/50] batch [615/1000] time 1.540 (1.566) data 0.000 (0.002) loss 1.2832 (1.0687) acc 75.0000 (73.0894) lr 7.8853e-06 eta 0:10:02 +epoch [50/50] batch [620/1000] time 1.556 (1.566) data 0.000 (0.002) loss 1.3730 (1.0695) acc 65.6250 (73.0595) lr 7.8853e-06 eta 0:09:54 +epoch [50/50] batch [625/1000] time 1.577 (1.566) data 0.001 (0.002) loss 1.0713 (1.0688) acc 78.1250 (73.0850) lr 7.8853e-06 eta 0:09:47 +epoch [50/50] batch [630/1000] time 1.573 (1.566) data 0.001 (0.002) loss 1.0537 (1.0691) acc 71.8750 (73.0704) lr 7.8853e-06 eta 0:09:39 +epoch [50/50] batch [635/1000] time 1.540 (1.566) data 0.000 (0.002) loss 1.0732 (1.0697) acc 65.6250 (73.0610) lr 7.8853e-06 eta 0:09:31 +epoch [50/50] batch [640/1000] time 1.578 (1.566) data 0.000 (0.002) loss 0.8198 (1.0691) acc 78.1250 (73.0762) lr 7.8853e-06 eta 0:09:23 +epoch [50/50] batch [645/1000] time 1.561 (1.566) data 0.001 (0.002) loss 0.7603 (1.0667) acc 81.2500 (73.1008) lr 7.8853e-06 eta 0:09:15 +epoch [50/50] batch [650/1000] time 1.561 (1.566) data 0.000 (0.002) loss 1.5254 (1.0674) acc 62.5000 (73.0913) lr 7.8853e-06 eta 0:09:08 +epoch [50/50] batch [655/1000] time 1.555 (1.566) data 0.000 (0.002) loss 0.9697 (1.0701) acc 75.0000 (73.0630) lr 7.8853e-06 eta 0:09:00 +epoch [50/50] batch [660/1000] time 1.571 (1.566) data 0.001 (0.002) loss 0.9951 (1.0726) acc 71.8750 (73.0208) lr 7.8853e-06 eta 0:08:52 +epoch [50/50] batch [665/1000] time 1.571 (1.566) data 0.000 (0.002) loss 0.8115 (1.0724) acc 71.8750 (72.9981) lr 7.8853e-06 eta 0:08:44 +epoch [50/50] batch [670/1000] time 1.555 (1.566) data 0.000 (0.002) loss 0.8735 (1.0713) acc 81.2500 (73.0317) lr 7.8853e-06 eta 0:08:36 +epoch [50/50] batch [675/1000] time 1.589 (1.566) data 0.000 (0.002) loss 1.5166 (1.0717) acc 65.6250 (73.0278) lr 7.8853e-06 eta 0:08:28 +epoch [50/50] batch [680/1000] time 1.562 (1.566) data 0.001 (0.002) loss 0.9185 (1.0713) acc 71.8750 (73.0515) lr 7.8853e-06 eta 0:08:21 +epoch [50/50] batch [685/1000] time 1.545 (1.566) data 0.001 (0.002) loss 1.1885 (1.0708) acc 68.7500 (73.0566) lr 7.8853e-06 eta 0:08:13 +epoch [50/50] batch [690/1000] time 1.554 (1.566) data 0.000 (0.002) loss 0.6230 (1.0709) acc 81.2500 (73.0616) lr 7.8853e-06 eta 0:08:05 +epoch [50/50] batch [695/1000] time 1.559 (1.566) data 0.000 (0.002) loss 0.9224 (1.0701) acc 71.8750 (73.0621) lr 7.8853e-06 eta 0:07:57 +epoch [50/50] batch [700/1000] time 1.558 (1.566) data 0.001 (0.002) loss 1.2695 (1.0716) acc 71.8750 (73.0625) lr 7.8853e-06 eta 0:07:49 +epoch [50/50] batch [705/1000] time 1.551 (1.566) data 0.000 (0.002) loss 1.8408 (1.0722) acc 62.5000 (73.0585) lr 7.8853e-06 eta 0:07:41 +epoch [50/50] batch [710/1000] time 1.563 (1.566) data 0.000 (0.002) loss 1.3545 (1.0725) acc 68.7500 (73.0194) lr 7.8853e-06 eta 0:07:34 +epoch [50/50] batch [715/1000] time 1.561 (1.566) data 0.001 (0.002) loss 0.9990 (1.0734) acc 65.6250 (73.0070) lr 7.8853e-06 eta 0:07:26 +epoch [50/50] batch [720/1000] time 1.568 (1.566) data 0.000 (0.002) loss 1.6348 (1.0736) acc 53.1250 (73.0035) lr 7.8853e-06 eta 0:07:18 +epoch [50/50] batch [725/1000] time 1.554 (1.566) data 0.001 (0.002) loss 0.7690 (1.0724) acc 71.8750 (73.0431) lr 7.8853e-06 eta 0:07:10 +epoch [50/50] batch [730/1000] time 1.555 (1.566) data 0.000 (0.002) loss 0.4810 (1.0713) acc 84.3750 (73.0865) lr 7.8853e-06 eta 0:07:02 +epoch [50/50] batch [735/1000] time 1.557 (1.566) data 0.001 (0.002) loss 1.3604 (1.0723) acc 71.8750 (73.0782) lr 7.8853e-06 eta 0:06:54 +epoch [50/50] batch [740/1000] time 1.580 (1.566) data 0.001 (0.002) loss 0.6548 (1.0714) acc 81.2500 (73.1039) lr 7.8853e-06 eta 0:06:47 +epoch [50/50] batch [745/1000] time 1.582 (1.566) data 0.001 (0.002) loss 1.3291 (1.0707) acc 75.0000 (73.1376) lr 7.8853e-06 eta 0:06:39 +epoch [50/50] batch [750/1000] time 1.545 (1.566) data 0.000 (0.002) loss 1.5938 (1.0724) acc 62.5000 (73.0792) lr 7.8853e-06 eta 0:06:31 +epoch [50/50] batch [755/1000] time 1.570 (1.566) data 0.000 (0.002) loss 1.2754 (1.0728) acc 75.0000 (73.0629) lr 7.8853e-06 eta 0:06:23 +epoch [50/50] batch [760/1000] time 1.566 (1.566) data 0.000 (0.002) loss 0.6982 (1.0710) acc 78.1250 (73.1003) lr 7.8853e-06 eta 0:06:15 +epoch [50/50] batch [765/1000] time 1.582 (1.566) data 0.000 (0.002) loss 1.3672 (1.0707) acc 59.3750 (73.0923) lr 7.8853e-06 eta 0:06:08 +epoch [50/50] batch [770/1000] time 1.580 (1.566) data 0.000 (0.002) loss 0.9966 (1.0705) acc 84.3750 (73.1169) lr 7.8853e-06 eta 0:06:00 +epoch [50/50] batch [775/1000] time 1.554 (1.566) data 0.000 (0.002) loss 1.5312 (1.0703) acc 59.3750 (73.1331) lr 7.8853e-06 eta 0:05:52 +epoch [50/50] batch [780/1000] time 1.534 (1.566) data 0.000 (0.002) loss 1.7314 (1.0711) acc 62.5000 (73.1170) lr 7.8853e-06 eta 0:05:44 +epoch [50/50] batch [785/1000] time 1.545 (1.566) data 0.000 (0.002) loss 1.6562 (1.0705) acc 53.1250 (73.1091) lr 7.8853e-06 eta 0:05:36 +epoch [50/50] batch [790/1000] time 1.519 (1.566) data 0.001 (0.002) loss 1.1436 (1.0694) acc 78.1250 (73.1290) lr 7.8853e-06 eta 0:05:28 +epoch [50/50] batch [795/1000] time 1.545 (1.566) data 0.000 (0.002) loss 1.1865 (1.0698) acc 65.6250 (73.1250) lr 7.8853e-06 eta 0:05:21 +epoch [50/50] batch [800/1000] time 1.562 (1.566) data 0.000 (0.002) loss 1.5068 (1.0698) acc 56.2500 (73.1133) lr 7.8853e-06 eta 0:05:13 +epoch [50/50] batch [805/1000] time 1.563 (1.566) data 0.000 (0.002) loss 0.8252 (1.0689) acc 84.3750 (73.1211) lr 7.8853e-06 eta 0:05:05 +epoch [50/50] batch [810/1000] time 1.560 (1.566) data 0.001 (0.002) loss 0.9990 (1.0701) acc 65.6250 (73.0787) lr 7.8853e-06 eta 0:04:57 +epoch [50/50] batch [815/1000] time 1.579 (1.566) data 0.000 (0.002) loss 0.7119 (1.0691) acc 78.1250 (73.0828) lr 7.8853e-06 eta 0:04:49 +epoch [50/50] batch [820/1000] time 1.557 (1.566) data 0.001 (0.002) loss 1.1680 (1.0687) acc 71.8750 (73.0907) lr 7.8853e-06 eta 0:04:41 +epoch [50/50] batch [825/1000] time 1.568 (1.566) data 0.000 (0.002) loss 1.3418 (1.0687) acc 65.6250 (73.0871) lr 7.8853e-06 eta 0:04:34 +epoch [50/50] batch [830/1000] time 1.550 (1.566) data 0.000 (0.002) loss 0.8164 (1.0689) acc 78.1250 (73.0761) lr 7.8853e-06 eta 0:04:26 +epoch [50/50] batch [835/1000] time 1.550 (1.566) data 0.001 (0.002) loss 0.8809 (1.0677) acc 71.8750 (73.0726) lr 7.8853e-06 eta 0:04:18 +epoch [50/50] batch [840/1000] time 1.556 (1.566) data 0.001 (0.002) loss 1.3174 (1.0682) acc 71.8750 (73.0729) lr 7.8853e-06 eta 0:04:10 +epoch [50/50] batch [845/1000] time 1.547 (1.566) data 0.000 (0.002) loss 0.7095 (1.0684) acc 75.0000 (73.0214) lr 7.8853e-06 eta 0:04:02 +epoch [50/50] batch [850/1000] time 1.581 (1.566) data 0.000 (0.002) loss 0.8779 (1.0684) acc 71.8750 (73.0147) lr 7.8853e-06 eta 0:03:54 +epoch [50/50] batch [855/1000] time 1.557 (1.566) data 0.001 (0.002) loss 0.7856 (1.0675) acc 81.2500 (73.0190) lr 7.8853e-06 eta 0:03:47 +epoch [50/50] batch [860/1000] time 1.551 (1.566) data 0.001 (0.002) loss 1.7520 (1.0676) acc 53.1250 (73.0269) lr 7.8853e-06 eta 0:03:39 +epoch [50/50] batch [865/1000] time 1.546 (1.566) data 0.000 (0.002) loss 0.7539 (1.0685) acc 81.2500 (73.0202) lr 7.8853e-06 eta 0:03:31 +epoch [50/50] batch [870/1000] time 1.564 (1.566) data 0.000 (0.002) loss 1.6504 (1.0688) acc 56.2500 (73.0029) lr 7.8853e-06 eta 0:03:23 +epoch [50/50] batch [875/1000] time 1.744 (1.566) data 0.001 (0.002) loss 1.8262 (1.0686) acc 59.3750 (73.0036) lr 7.8853e-06 eta 0:03:15 +epoch [50/50] batch [880/1000] time 1.536 (1.566) data 0.000 (0.002) loss 1.9727 (1.0697) acc 62.5000 (72.9759) lr 7.8853e-06 eta 0:03:07 +epoch [50/50] batch [885/1000] time 1.545 (1.566) data 0.001 (0.002) loss 0.8506 (1.0691) acc 75.0000 (72.9908) lr 7.8853e-06 eta 0:03:00 +epoch [50/50] batch [890/1000] time 1.558 (1.566) data 0.000 (0.002) loss 1.8564 (1.0708) acc 59.3750 (72.9529) lr 7.8853e-06 eta 0:02:52 +epoch [50/50] batch [895/1000] time 1.553 (1.566) data 0.000 (0.002) loss 0.7871 (1.0700) acc 78.1250 (72.9644) lr 7.8853e-06 eta 0:02:44 +epoch [50/50] batch [900/1000] time 1.547 (1.566) data 0.000 (0.002) loss 0.8916 (1.0696) acc 75.0000 (72.9826) lr 7.8853e-06 eta 0:02:36 +epoch [50/50] batch [905/1000] time 1.560 (1.566) data 0.000 (0.002) loss 1.3662 (1.0705) acc 62.5000 (72.9454) lr 7.8853e-06 eta 0:02:28 +epoch [50/50] batch [910/1000] time 1.581 (1.566) data 0.001 (0.002) loss 1.4375 (1.0720) acc 65.6250 (72.9224) lr 7.8853e-06 eta 0:02:20 +epoch [50/50] batch [915/1000] time 1.544 (1.566) data 0.000 (0.002) loss 0.4211 (1.0712) acc 90.6250 (72.9577) lr 7.8853e-06 eta 0:02:13 +epoch [50/50] batch [920/1000] time 1.540 (1.566) data 0.000 (0.002) loss 1.1621 (1.0710) acc 68.7500 (72.9552) lr 7.8853e-06 eta 0:02:05 +epoch [50/50] batch [925/1000] time 1.573 (1.566) data 0.000 (0.002) loss 1.0742 (1.0708) acc 65.6250 (72.9764) lr 7.8853e-06 eta 0:01:57 +epoch [50/50] batch [930/1000] time 1.584 (1.566) data 0.001 (0.002) loss 0.6138 (1.0708) acc 81.2500 (73.0074) lr 7.8853e-06 eta 0:01:49 +epoch [50/50] batch [935/1000] time 1.576 (1.566) data 0.000 (0.002) loss 1.5156 (1.0722) acc 71.8750 (72.9980) lr 7.8853e-06 eta 0:01:41 +epoch [50/50] batch [940/1000] time 1.565 (1.566) data 0.001 (0.002) loss 1.2344 (1.0719) acc 75.0000 (72.9887) lr 7.8853e-06 eta 0:01:33 +epoch [50/50] batch [945/1000] time 1.559 (1.566) data 0.000 (0.002) loss 0.6201 (1.0711) acc 84.3750 (72.9993) lr 7.8853e-06 eta 0:01:26 +epoch [50/50] batch [950/1000] time 1.561 (1.566) data 0.001 (0.002) loss 1.8535 (1.0719) acc 53.1250 (72.9605) lr 7.8853e-06 eta 0:01:18 +epoch [50/50] batch [955/1000] time 1.541 (1.566) data 0.000 (0.002) loss 2.2500 (1.0736) acc 53.1250 (72.9385) lr 7.8853e-06 eta 0:01:10 +epoch [50/50] batch [960/1000] time 1.565 (1.566) data 0.000 (0.002) loss 1.0967 (1.0738) acc 68.7500 (72.9362) lr 7.8853e-06 eta 0:01:02 +epoch [50/50] batch [965/1000] time 1.558 (1.566) data 0.000 (0.002) loss 1.3193 (1.0746) acc 65.6250 (72.9372) lr 7.8853e-06 eta 0:00:54 +epoch [50/50] batch [970/1000] time 1.585 (1.566) data 0.000 (0.002) loss 1.4512 (1.0735) acc 75.0000 (72.9832) lr 7.8853e-06 eta 0:00:46 +epoch [50/50] batch [975/1000] time 1.569 (1.566) data 0.000 (0.002) loss 1.0928 (1.0716) acc 75.0000 (73.0064) lr 7.8853e-06 eta 0:00:39 +epoch [50/50] batch [980/1000] time 1.566 (1.566) data 0.001 (0.002) loss 1.4277 (1.0711) acc 59.3750 (73.0070) lr 7.8853e-06 eta 0:00:31 +epoch [50/50] batch [985/1000] time 1.574 (1.566) data 0.001 (0.002) loss 1.4570 (1.0719) acc 68.7500 (73.0076) lr 7.8853e-06 eta 0:00:23 +epoch [50/50] batch [990/1000] time 1.554 (1.566) data 0.000 (0.001) loss 1.1445 (1.0714) acc 78.1250 (73.0271) lr 7.8853e-06 eta 0:00:15 +epoch [50/50] batch [995/1000] time 1.555 (1.566) data 0.000 (0.001) loss 1.4424 (1.0721) acc 68.7500 (73.0276) lr 7.8853e-06 eta 0:00:07 +epoch [50/50] batch [1000/1000] time 1.575 (1.566) data 0.000 (0.001) loss 1.7236 (1.0738) acc 65.6250 (73.0062) lr 1.9733e-06 eta 0:00:00 +Evaluate on the *val* set +=> result +* total: 50,000 +* correct: 39,372 +* accuracy: 78.7% +* error: 21.3% +* macro_f1: 78.3% +Checkpoint saved to output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50 +Finish training +Deploy the model with the best val performance +Loading weights to prompt_learner from "output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar" (epoch = 43) +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 39,401 +* accuracy: 78.8% +* error: 21.2% +* macro_f1: 78.4% +Elapsed: 1 day, 3:08:10 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/checkpoint b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/checkpoint new file mode 100644 index 00000000..a9d493d3 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/checkpoint @@ -0,0 +1 @@ +model.pth.tar-50 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar new file mode 100644 index 00000000..3a7f02e7 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model-best.pth.tar differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50 new file mode 100644 index 00000000..02c642ce Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/tensorboard/events.out.tfevents.1699553366.ckb-gpu-v.mitre.org.263339.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/tensorboard/events.out.tfevents.1699553366.ckb-gpu-v.mitre.org.263339.0 new file mode 100644 index 00000000..39a5b078 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_bestval_ep50_32shots/nctx16_cscFalse_ctpend/seed3/tensorboard/events.out.tfevents.1699553366.ckb-gpu-v.mitre.org.263339.0 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/log.txt new file mode 100644 index 00000000..73bd9893 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/log.txt @@ -0,0 +1,5342 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_l14_ep50.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '16'] +output_dir: output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 1 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 16 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-L/14 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 50 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0 +Clang version: 10.0.0-4ubuntu1 +CMake version: version 3.16.3 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-166-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: NVIDIA A100-SXM4-40GB +GPU 1: NVIDIA A100-SXM4-40GB +GPU 2: NVIDIA A100-SXM4-40GB +GPU 3: NVIDIA A100-SXM4-40GB + +Nvidia driver version: 525.125.06 +cuDNN version: Probably one of the following: +/usr/lib/x86_64-linux-gnu/libcudnn.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.9.5 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 43 bits physical, 48 bits virtual +CPU(s): 256 +On-line CPU(s) list: 0-255 +Thread(s) per core: 2 +Core(s) per socket: 64 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: AuthenticAMD +CPU family: 23 +Model: 49 +Model name: AMD EPYC 7H12 64-Core Processor +Stepping: 0 +Frequency boost: enabled +CPU MHz: 1493.800 +CPU max MHz: 2600.0000 +CPU min MHz: 1500.0000 +BogoMIPS: 5200.20 +Virtualization: AMD-V +L1d cache: 4 MiB +L1i cache: 4 MiB +L2 cache: 64 MiB +L3 cache: 512 MiB +NUMA node0 CPU(s): 0-63,128-191 +NUMA node1 CPU(s): 64-127,192-255 +Vulnerability Gather data sampling: Not affected +Vulnerability Itlb multihit: Not affected +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Mmio stale data: Not affected +Vulnerability Retbleed: Vulnerable +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Retpolines, IBPB conditional, IBRS_FW, STIBP conditional, RSB filling, PBRSB-eIBRS Not affected +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Not affected +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca sme sev sev_es + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Loading preprocessed few-shot data from /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_16-seed_1.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 16,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-L/14) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/tensorboard) +epoch [1/50] batch [5/500] time 0.899 (4.360) data 0.000 (0.173) loss 2.5977 (3.2941) acc 37.5000 (35.6250) lr 1.0000e-05 eta 1 day, 6:16:25 +epoch [1/50] batch [10/500] time 0.881 (2.619) data 0.000 (0.087) loss 2.7363 (3.0844) acc 43.7500 (40.0000) lr 1.0000e-05 eta 18:10:39 +epoch [1/50] batch [15/500] time 0.902 (2.046) data 0.000 (0.058) loss 2.2129 (2.7855) acc 50.0000 (44.5833) lr 1.0000e-05 eta 14:12:04 +epoch [1/50] batch [20/500] time 0.860 (1.755) data 0.000 (0.044) loss 2.5312 (2.6287) acc 50.0000 (48.2812) lr 1.0000e-05 eta 12:10:41 +epoch [1/50] batch [25/500] time 0.900 (1.583) data 0.000 (0.035) loss 1.8916 (2.5268) acc 56.2500 (49.1250) lr 1.0000e-05 eta 10:58:52 +epoch [1/50] batch [30/500] time 0.887 (1.469) data 0.000 (0.029) loss 1.5869 (2.4073) acc 65.6250 (51.1458) lr 1.0000e-05 eta 10:11:17 +epoch [1/50] batch [35/500] time 0.880 (1.386) data 0.000 (0.025) loss 1.3887 (2.3064) acc 59.3750 (51.7857) lr 1.0000e-05 eta 9:36:50 +epoch [1/50] batch [40/500] time 0.878 (1.323) data 0.000 (0.022) loss 2.0977 (2.2665) acc 56.2500 (52.4219) lr 1.0000e-05 eta 9:10:25 +epoch [1/50] batch [45/500] time 0.872 (1.275) data 0.000 (0.019) loss 2.7969 (2.2371) acc 43.7500 (52.7778) lr 1.0000e-05 eta 8:50:24 +epoch [1/50] batch [50/500] time 0.882 (1.237) data 0.000 (0.018) loss 1.8838 (2.1899) acc 56.2500 (53.0000) lr 1.0000e-05 eta 8:34:34 +epoch [1/50] batch [55/500] time 0.867 (1.204) data 0.000 (0.016) loss 1.3047 (2.1260) acc 78.1250 (54.2614) lr 1.0000e-05 eta 8:20:44 +epoch [1/50] batch [60/500] time 0.900 (1.176) data 0.000 (0.015) loss 1.0781 (2.0804) acc 68.7500 (54.6875) lr 1.0000e-05 eta 8:09:01 +epoch [1/50] batch [65/500] time 0.884 (1.154) data 0.000 (0.014) loss 1.3164 (2.0520) acc 59.3750 (54.9038) lr 1.0000e-05 eta 7:59:43 +epoch [1/50] batch [70/500] time 0.878 (1.135) data 0.000 (0.013) loss 2.0781 (2.0320) acc 62.5000 (55.1339) lr 1.0000e-05 eta 7:51:26 +epoch [1/50] batch [75/500] time 0.881 (1.117) data 0.000 (0.012) loss 1.3320 (1.9989) acc 65.6250 (55.6667) lr 1.0000e-05 eta 7:44:07 +epoch [1/50] batch [80/500] time 0.895 (1.103) data 0.000 (0.011) loss 1.0059 (1.9745) acc 68.7500 (56.3672) lr 1.0000e-05 eta 7:38:06 +epoch [1/50] batch [85/500] time 0.872 (1.090) data 0.000 (0.010) loss 1.5674 (1.9596) acc 71.8750 (56.8015) lr 1.0000e-05 eta 7:32:39 +epoch [1/50] batch [90/500] time 0.867 (1.078) data 0.000 (0.010) loss 2.5039 (1.9504) acc 50.0000 (56.7708) lr 1.0000e-05 eta 7:27:42 +epoch [1/50] batch [95/500] time 0.886 (1.069) data 0.000 (0.009) loss 1.5625 (1.9373) acc 65.6250 (57.2368) lr 1.0000e-05 eta 7:23:44 +epoch [1/50] batch [100/500] time 0.884 (1.060) data 0.000 (0.009) loss 1.6953 (1.9140) acc 62.5000 (57.6562) lr 1.0000e-05 eta 7:19:46 +epoch [1/50] batch [105/500] time 0.882 (1.051) data 0.000 (0.008) loss 1.8291 (1.9147) acc 68.7500 (57.5595) lr 1.0000e-05 eta 7:15:55 +epoch [1/50] batch [110/500] time 0.877 (1.043) data 0.000 (0.008) loss 1.5049 (1.8879) acc 68.7500 (58.2955) lr 1.0000e-05 eta 7:12:36 +epoch [1/50] batch [115/500] time 0.887 (1.036) data 0.000 (0.008) loss 1.9424 (1.8724) acc 53.1250 (58.5326) lr 1.0000e-05 eta 7:09:45 +epoch [1/50] batch [120/500] time 0.874 (1.029) data 0.000 (0.007) loss 2.4902 (1.8673) acc 40.6250 (58.5417) lr 1.0000e-05 eta 7:06:53 +epoch [1/50] batch [125/500] time 0.869 (1.024) data 0.000 (0.007) loss 2.1250 (1.8601) acc 46.8750 (58.6500) lr 1.0000e-05 eta 7:04:35 +epoch [1/50] batch [130/500] time 0.888 (1.019) data 0.000 (0.007) loss 1.8857 (1.8600) acc 62.5000 (58.5817) lr 1.0000e-05 eta 7:02:13 +epoch [1/50] batch [135/500] time 0.887 (1.014) data 0.000 (0.007) loss 1.8594 (1.8513) acc 56.2500 (58.6574) lr 1.0000e-05 eta 7:00:01 +epoch [1/50] batch [140/500] time 0.879 (1.009) data 0.000 (0.006) loss 1.6777 (1.8531) acc 56.2500 (58.4821) lr 1.0000e-05 eta 6:58:07 +epoch [1/50] batch [145/500] time 0.921 (1.006) data 0.000 (0.006) loss 1.5420 (1.8516) acc 62.5000 (58.3621) lr 1.0000e-05 eta 6:56:33 +epoch [1/50] batch [150/500] time 0.882 (1.002) data 0.000 (0.006) loss 2.3789 (1.8469) acc 43.7500 (58.5208) lr 1.0000e-05 eta 6:54:56 +epoch [1/50] batch [155/500] time 0.880 (0.998) data 0.000 (0.006) loss 1.3662 (1.8344) acc 59.3750 (58.6089) lr 1.0000e-05 eta 6:53:23 +epoch [1/50] batch [160/500] time 0.910 (0.996) data 0.000 (0.006) loss 0.8252 (1.8259) acc 81.2500 (58.6914) lr 1.0000e-05 eta 6:52:19 +epoch [1/50] batch [165/500] time 0.902 (0.993) data 0.000 (0.005) loss 1.6650 (1.8150) acc 50.0000 (58.8258) lr 1.0000e-05 eta 6:50:56 +epoch [1/50] batch [170/500] time 0.880 (0.990) data 0.000 (0.005) loss 1.0439 (1.8018) acc 71.8750 (59.0809) lr 1.0000e-05 eta 6:49:32 +epoch [1/50] batch [175/500] time 0.880 (0.987) data 0.000 (0.005) loss 1.9434 (1.7959) acc 59.3750 (59.1250) lr 1.0000e-05 eta 6:48:19 +epoch [1/50] batch [180/500] time 0.878 (0.984) data 0.000 (0.005) loss 1.8682 (1.7893) acc 59.3750 (59.1493) lr 1.0000e-05 eta 6:47:02 +epoch [1/50] batch [185/500] time 0.882 (0.982) data 0.000 (0.005) loss 1.3232 (1.7843) acc 65.6250 (59.2230) lr 1.0000e-05 eta 6:45:55 +epoch [1/50] batch [190/500] time 0.865 (0.979) data 0.000 (0.005) loss 1.3945 (1.7734) acc 68.7500 (59.4901) lr 1.0000e-05 eta 6:44:51 +epoch [1/50] batch [195/500] time 0.898 (0.977) data 0.001 (0.005) loss 1.6094 (1.7689) acc 59.3750 (59.5833) lr 1.0000e-05 eta 6:43:50 +epoch [1/50] batch [200/500] time 0.898 (0.975) data 0.001 (0.005) loss 1.1592 (1.7607) acc 68.7500 (59.7188) lr 1.0000e-05 eta 6:42:50 +epoch [1/50] batch [205/500] time 0.877 (0.973) data 0.000 (0.004) loss 1.0078 (1.7480) acc 68.7500 (59.8780) lr 1.0000e-05 eta 6:42:03 +epoch [1/50] batch [210/500] time 0.899 (0.971) data 0.000 (0.004) loss 1.6758 (1.7416) acc 68.7500 (60.0595) lr 1.0000e-05 eta 6:41:01 +epoch [1/50] batch [215/500] time 0.882 (0.968) data 0.000 (0.004) loss 1.3604 (1.7380) acc 75.0000 (60.1890) lr 1.0000e-05 eta 6:40:03 +epoch [1/50] batch [220/500] time 0.915 (0.967) data 0.000 (0.004) loss 1.2617 (1.7311) acc 65.6250 (60.3693) lr 1.0000e-05 eta 6:39:20 +epoch [1/50] batch [225/500] time 0.915 (0.965) data 0.000 (0.004) loss 2.0703 (1.7332) acc 59.3750 (60.3611) lr 1.0000e-05 eta 6:38:27 +epoch [1/50] batch [230/500] time 0.901 (0.963) data 0.000 (0.004) loss 1.6094 (1.7277) acc 56.2500 (60.3804) lr 1.0000e-05 eta 6:37:34 +epoch [1/50] batch [235/500] time 0.912 (0.962) data 0.000 (0.004) loss 1.5430 (1.7261) acc 62.5000 (60.4122) lr 1.0000e-05 eta 6:36:59 +epoch [1/50] batch [240/500] time 0.897 (0.961) data 0.001 (0.004) loss 1.5830 (1.7238) acc 65.6250 (60.5339) lr 1.0000e-05 eta 6:36:28 +epoch [1/50] batch [245/500] time 0.903 (0.959) data 0.000 (0.004) loss 1.4033 (1.7220) acc 68.7500 (60.5867) lr 1.0000e-05 eta 6:35:50 +epoch [1/50] batch [250/500] time 0.856 (0.958) data 0.000 (0.004) loss 1.4541 (1.7235) acc 68.7500 (60.5625) lr 1.0000e-05 eta 6:35:09 +epoch [1/50] batch [255/500] time 0.905 (0.957) data 0.000 (0.004) loss 1.5410 (1.7235) acc 59.3750 (60.6005) lr 1.0000e-05 eta 6:34:32 +epoch [1/50] batch [260/500] time 0.891 (0.955) data 0.000 (0.004) loss 2.3906 (1.7176) acc 53.1250 (60.7332) lr 1.0000e-05 eta 6:33:49 +epoch [1/50] batch [265/500] time 0.896 (0.954) data 0.000 (0.004) loss 1.7539 (1.7128) acc 65.6250 (60.8844) lr 1.0000e-05 eta 6:33:15 +epoch [1/50] batch [270/500] time 0.917 (0.953) data 0.000 (0.003) loss 1.3486 (1.7093) acc 65.6250 (60.9606) lr 1.0000e-05 eta 6:32:44 +epoch [1/50] batch [275/500] time 0.908 (0.952) data 0.000 (0.003) loss 1.8271 (1.7079) acc 71.8750 (60.9886) lr 1.0000e-05 eta 6:32:17 +epoch [1/50] batch [280/500] time 0.890 (0.951) data 0.000 (0.003) loss 2.8672 (1.7117) acc 46.8750 (60.9710) lr 1.0000e-05 eta 6:31:40 +epoch [1/50] batch [285/500] time 0.855 (0.949) data 0.000 (0.003) loss 1.1504 (1.7098) acc 75.0000 (61.0526) lr 1.0000e-05 eta 6:31:06 +epoch [1/50] batch [290/500] time 0.885 (0.949) data 0.000 (0.003) loss 2.1191 (1.7121) acc 59.3750 (61.0560) lr 1.0000e-05 eta 6:30:38 +epoch [1/50] batch [295/500] time 0.904 (0.948) data 0.000 (0.003) loss 2.1035 (1.7136) acc 46.8750 (61.0169) lr 1.0000e-05 eta 6:30:09 +epoch [1/50] batch [300/500] time 0.881 (0.946) data 0.000 (0.003) loss 1.2168 (1.7097) acc 68.7500 (61.0521) lr 1.0000e-05 eta 6:29:35 +epoch [1/50] batch [305/500] time 0.895 (0.946) data 0.000 (0.003) loss 1.2275 (1.7029) acc 68.7500 (61.1578) lr 1.0000e-05 eta 6:29:15 +epoch [1/50] batch [310/500] time 0.866 (0.945) data 0.000 (0.003) loss 1.5957 (1.6972) acc 65.6250 (61.2198) lr 1.0000e-05 eta 6:28:42 +epoch [1/50] batch [315/500] time 0.889 (0.944) data 0.000 (0.003) loss 2.0039 (1.6967) acc 62.5000 (61.2798) lr 1.0000e-05 eta 6:28:22 +epoch [1/50] batch [320/500] time 0.884 (0.943) data 0.000 (0.003) loss 1.1055 (1.6945) acc 78.1250 (61.3672) lr 1.0000e-05 eta 6:27:54 +epoch [1/50] batch [325/500] time 0.869 (0.942) data 0.000 (0.003) loss 1.3682 (1.6913) acc 71.8750 (61.4423) lr 1.0000e-05 eta 6:27:25 +epoch [1/50] batch [330/500] time 0.880 (0.941) data 0.000 (0.003) loss 2.4492 (1.6858) acc 46.8750 (61.5625) lr 1.0000e-05 eta 6:26:57 +epoch [1/50] batch [335/500] time 0.899 (0.940) data 0.000 (0.003) loss 1.8213 (1.6862) acc 65.6250 (61.5951) lr 1.0000e-05 eta 6:26:31 +epoch [1/50] batch [340/500] time 0.890 (0.940) data 0.000 (0.003) loss 0.9780 (1.6802) acc 78.1250 (61.6912) lr 1.0000e-05 eta 6:26:08 +epoch [1/50] batch [345/500] time 0.870 (0.939) data 0.000 (0.003) loss 1.6270 (1.6790) acc 59.3750 (61.6938) lr 1.0000e-05 eta 6:25:44 +epoch [1/50] batch [350/500] time 0.929 (0.938) data 0.000 (0.003) loss 1.7852 (1.6801) acc 65.6250 (61.7321) lr 1.0000e-05 eta 6:25:30 +epoch [1/50] batch [355/500] time 0.895 (0.938) data 0.000 (0.003) loss 1.4668 (1.6768) acc 62.5000 (61.7518) lr 1.0000e-05 eta 6:25:06 +epoch [1/50] batch [360/500] time 0.894 (0.937) data 0.000 (0.003) loss 1.8457 (1.6743) acc 56.2500 (61.7795) lr 1.0000e-05 eta 6:24:46 +epoch [1/50] batch [365/500] time 0.878 (0.936) data 0.000 (0.003) loss 1.0879 (1.6705) acc 68.7500 (61.8151) lr 1.0000e-05 eta 6:24:24 +epoch [1/50] batch [370/500] time 0.862 (0.935) data 0.000 (0.003) loss 1.4883 (1.6714) acc 59.3750 (61.7905) lr 1.0000e-05 eta 6:23:59 +epoch [1/50] batch [375/500] time 0.878 (0.935) data 0.000 (0.003) loss 1.3125 (1.6676) acc 65.6250 (61.8500) lr 1.0000e-05 eta 6:23:36 +epoch [1/50] batch [380/500] time 0.920 (0.934) data 0.000 (0.003) loss 1.2080 (1.6661) acc 71.8750 (61.8174) lr 1.0000e-05 eta 6:23:16 +epoch [1/50] batch [385/500] time 0.883 (0.934) data 0.000 (0.002) loss 1.1992 (1.6655) acc 62.5000 (61.8344) lr 1.0000e-05 eta 6:22:59 +epoch [1/50] batch [390/500] time 0.866 (0.933) data 0.000 (0.002) loss 2.3652 (1.6622) acc 53.1250 (61.9631) lr 1.0000e-05 eta 6:22:38 +epoch [1/50] batch [395/500] time 0.904 (0.932) data 0.000 (0.002) loss 0.9893 (1.6582) acc 75.0000 (62.0570) lr 1.0000e-05 eta 6:22:19 +epoch [1/50] batch [400/500] time 0.875 (0.932) data 0.000 (0.002) loss 1.9746 (1.6597) acc 46.8750 (62.0234) lr 1.0000e-05 eta 6:22:02 +epoch [1/50] batch [405/500] time 0.851 (0.931) data 0.000 (0.002) loss 1.0908 (1.6588) acc 65.6250 (62.0139) lr 1.0000e-05 eta 6:21:37 +epoch [1/50] batch [410/500] time 0.901 (0.931) data 0.000 (0.002) loss 1.0986 (1.6531) acc 75.0000 (62.1494) lr 1.0000e-05 eta 6:21:21 +epoch [1/50] batch [415/500] time 0.892 (0.930) data 0.000 (0.002) loss 1.3213 (1.6467) acc 65.6250 (62.2666) lr 1.0000e-05 eta 6:21:02 +epoch [1/50] batch [420/500] time 0.884 (0.929) data 0.000 (0.002) loss 2.1621 (1.6464) acc 56.2500 (62.2619) lr 1.0000e-05 eta 6:20:42 +epoch [1/50] batch [425/500] time 0.881 (0.929) data 0.000 (0.002) loss 1.2695 (1.6438) acc 75.0000 (62.3088) lr 1.0000e-05 eta 6:20:21 +epoch [1/50] batch [430/500] time 0.857 (0.928) data 0.000 (0.002) loss 1.1387 (1.6407) acc 53.1250 (62.3474) lr 1.0000e-05 eta 6:20:01 +epoch [1/50] batch [435/500] time 0.844 (0.927) data 0.000 (0.002) loss 2.1738 (1.6393) acc 59.3750 (62.3851) lr 1.0000e-05 eta 6:19:40 +epoch [1/50] batch [440/500] time 0.871 (0.927) data 0.000 (0.002) loss 1.5996 (1.6367) acc 68.7500 (62.4361) lr 1.0000e-05 eta 6:19:23 +epoch [1/50] batch [445/500] time 0.992 (0.927) data 0.000 (0.002) loss 1.3066 (1.6349) acc 71.8750 (62.4508) lr 1.0000e-05 eta 6:19:12 +epoch [1/50] batch [450/500] time 0.940 (0.926) data 0.000 (0.002) loss 2.0469 (1.6357) acc 68.7500 (62.4722) lr 1.0000e-05 eta 6:19:01 +epoch [1/50] batch [455/500] time 0.906 (0.926) data 0.000 (0.002) loss 2.0820 (1.6345) acc 56.2500 (62.4725) lr 1.0000e-05 eta 6:18:45 +epoch [1/50] batch [460/500] time 0.888 (0.925) data 0.000 (0.002) loss 1.6631 (1.6326) acc 71.8750 (62.5204) lr 1.0000e-05 eta 6:18:27 +epoch [1/50] batch [465/500] time 0.913 (0.925) data 0.000 (0.002) loss 1.7051 (1.6334) acc 65.6250 (62.5202) lr 1.0000e-05 eta 6:18:13 +epoch [1/50] batch [470/500] time 0.889 (0.924) data 0.000 (0.002) loss 1.7666 (1.6313) acc 71.8750 (62.5598) lr 1.0000e-05 eta 6:17:56 +epoch [1/50] batch [475/500] time 0.874 (0.924) data 0.000 (0.002) loss 1.2725 (1.6325) acc 62.5000 (62.5395) lr 1.0000e-05 eta 6:17:42 +epoch [1/50] batch [480/500] time 0.862 (0.924) data 0.000 (0.002) loss 1.6328 (1.6318) acc 62.5000 (62.5391) lr 1.0000e-05 eta 6:17:28 +epoch [1/50] batch [485/500] time 0.887 (0.923) data 0.001 (0.002) loss 1.5332 (1.6319) acc 53.1250 (62.5258) lr 1.0000e-05 eta 6:17:15 +epoch [1/50] batch [490/500] time 0.907 (0.923) data 0.000 (0.002) loss 1.7090 (1.6317) acc 62.5000 (62.5702) lr 1.0000e-05 eta 6:17:08 +epoch [1/50] batch [495/500] time 0.878 (0.923) data 0.000 (0.002) loss 1.1572 (1.6315) acc 75.0000 (62.5947) lr 1.0000e-05 eta 6:16:53 +epoch [1/50] batch [500/500] time 0.863 (0.923) data 0.000 (0.002) loss 1.7354 (1.6321) acc 59.3750 (62.5875) lr 2.0000e-03 eta 6:16:41 +epoch [2/50] batch [5/500] time 0.906 (1.024) data 0.000 (0.137) loss 1.3408 (1.6543) acc 65.6250 (63.1250) lr 2.0000e-03 eta 6:58:14 +epoch [2/50] batch [10/500] time 0.875 (0.954) data 0.000 (0.069) loss 2.1133 (1.6190) acc 59.3750 (62.8125) lr 2.0000e-03 eta 6:29:22 +epoch [2/50] batch [15/500] time 0.872 (0.928) data 0.000 (0.046) loss 1.0625 (1.5056) acc 75.0000 (65.0000) lr 2.0000e-03 eta 6:18:32 +epoch [2/50] batch [20/500] time 0.883 (0.917) data 0.000 (0.034) loss 1.2559 (1.4559) acc 68.7500 (65.1562) lr 2.0000e-03 eta 6:13:57 +epoch [2/50] batch [25/500] time 0.880 (0.912) data 0.000 (0.028) loss 1.4883 (1.4579) acc 53.1250 (65.0000) lr 2.0000e-03 eta 6:11:53 +epoch [2/50] batch [30/500] time 0.873 (0.907) data 0.000 (0.023) loss 0.6406 (1.4592) acc 81.2500 (64.7917) lr 2.0000e-03 eta 6:09:42 +epoch [2/50] batch [35/500] time 0.892 (0.907) data 0.000 (0.020) loss 1.1934 (1.4634) acc 68.7500 (64.9107) lr 2.0000e-03 eta 6:09:55 +epoch [2/50] batch [40/500] time 0.865 (0.903) data 0.000 (0.017) loss 1.4561 (1.4372) acc 62.5000 (65.3125) lr 2.0000e-03 eta 6:08:09 +epoch [2/50] batch [45/500] time 0.902 (0.901) data 0.000 (0.015) loss 1.6348 (1.4096) acc 65.6250 (66.3194) lr 2.0000e-03 eta 6:07:16 +epoch [2/50] batch [50/500] time 0.878 (0.899) data 0.000 (0.014) loss 1.3857 (1.4120) acc 75.0000 (66.6250) lr 2.0000e-03 eta 6:06:22 +epoch [2/50] batch [55/500] time 0.884 (0.897) data 0.000 (0.013) loss 1.8828 (1.4420) acc 59.3750 (66.1364) lr 2.0000e-03 eta 6:05:38 +epoch [2/50] batch [60/500] time 0.920 (0.896) data 0.000 (0.012) loss 1.8086 (1.4477) acc 62.5000 (65.7812) lr 2.0000e-03 eta 6:05:09 +epoch [2/50] batch [65/500] time 0.865 (0.896) data 0.000 (0.011) loss 2.2402 (1.4490) acc 56.2500 (66.2500) lr 2.0000e-03 eta 6:04:43 +epoch [2/50] batch [70/500] time 0.912 (0.896) data 0.000 (0.010) loss 1.2266 (1.4455) acc 71.8750 (66.3839) lr 2.0000e-03 eta 6:04:45 +epoch [2/50] batch [75/500] time 0.919 (0.896) data 0.000 (0.009) loss 0.8667 (1.4267) acc 81.2500 (66.6667) lr 2.0000e-03 eta 6:04:44 +epoch [2/50] batch [80/500] time 0.900 (0.895) data 0.000 (0.009) loss 1.9062 (1.4217) acc 62.5000 (66.7969) lr 2.0000e-03 eta 6:04:21 +epoch [2/50] batch [85/500] time 0.904 (0.895) data 0.000 (0.008) loss 1.4346 (1.4245) acc 65.6250 (66.5074) lr 2.0000e-03 eta 6:04:09 +epoch [2/50] batch [90/500] time 0.887 (0.894) data 0.000 (0.008) loss 1.2832 (1.4235) acc 62.5000 (66.4583) lr 2.0000e-03 eta 6:03:36 +epoch [2/50] batch [95/500] time 0.866 (0.893) data 0.000 (0.007) loss 1.5410 (1.4124) acc 78.1250 (66.8750) lr 2.0000e-03 eta 6:03:17 +epoch [2/50] batch [100/500] time 0.869 (0.892) data 0.000 (0.007) loss 1.7207 (1.4052) acc 62.5000 (67.1250) lr 2.0000e-03 eta 6:02:56 +epoch [2/50] batch [105/500] time 0.885 (0.892) data 0.000 (0.007) loss 1.5088 (1.3976) acc 68.7500 (67.1131) lr 2.0000e-03 eta 6:02:50 +epoch [2/50] batch [110/500] time 0.886 (0.892) data 0.000 (0.006) loss 1.0508 (1.3918) acc 75.0000 (67.2159) lr 2.0000e-03 eta 6:02:46 +epoch [2/50] batch [115/500] time 0.878 (0.892) data 0.000 (0.006) loss 1.5732 (1.3889) acc 71.8750 (67.2554) lr 2.0000e-03 eta 6:02:28 +epoch [2/50] batch [120/500] time 0.901 (0.891) data 0.000 (0.006) loss 1.2510 (1.3873) acc 68.7500 (67.2656) lr 2.0000e-03 eta 6:02:13 +epoch [2/50] batch [125/500] time 0.909 (0.891) data 0.000 (0.006) loss 1.4805 (1.3878) acc 65.6250 (67.4250) lr 2.0000e-03 eta 6:02:05 +epoch [2/50] batch [130/500] time 0.897 (0.891) data 0.000 (0.005) loss 1.8545 (1.3903) acc 68.7500 (67.3798) lr 2.0000e-03 eta 6:02:01 +epoch [2/50] batch [135/500] time 0.889 (0.892) data 0.000 (0.005) loss 1.4209 (1.3916) acc 65.6250 (67.1991) lr 2.0000e-03 eta 6:02:22 +epoch [2/50] batch [140/500] time 0.884 (0.892) data 0.000 (0.005) loss 1.4541 (1.3918) acc 68.7500 (67.1652) lr 2.0000e-03 eta 6:02:09 +epoch [2/50] batch [145/500] time 0.865 (0.892) data 0.000 (0.005) loss 1.1221 (1.3943) acc 75.0000 (67.1121) lr 2.0000e-03 eta 6:01:54 +epoch [2/50] batch [150/500] time 0.868 (0.891) data 0.000 (0.005) loss 1.4844 (1.3913) acc 53.1250 (67.0833) lr 2.0000e-03 eta 6:01:40 +epoch [2/50] batch [155/500] time 0.853 (0.891) data 0.000 (0.005) loss 0.8511 (1.3844) acc 75.0000 (67.1774) lr 2.0000e-03 eta 6:01:32 +epoch [2/50] batch [160/500] time 0.882 (0.891) data 0.000 (0.005) loss 1.5840 (1.3799) acc 71.8750 (67.2656) lr 2.0000e-03 eta 6:01:22 +epoch [2/50] batch [165/500] time 0.862 (0.891) data 0.000 (0.004) loss 1.5693 (1.3746) acc 62.5000 (67.3106) lr 2.0000e-03 eta 6:01:13 +epoch [2/50] batch [170/500] time 0.899 (0.891) data 0.000 (0.004) loss 1.4307 (1.3760) acc 68.7500 (67.2794) lr 2.0000e-03 eta 6:01:14 +epoch [2/50] batch [175/500] time 0.903 (0.891) data 0.000 (0.004) loss 1.1064 (1.3676) acc 56.2500 (67.3571) lr 2.0000e-03 eta 6:01:08 +epoch [2/50] batch [180/500] time 0.866 (0.891) data 0.000 (0.004) loss 1.3457 (1.3624) acc 75.0000 (67.4306) lr 2.0000e-03 eta 6:01:10 +epoch [2/50] batch [185/500] time 0.860 (0.890) data 0.000 (0.004) loss 0.8960 (1.3564) acc 71.8750 (67.5338) lr 2.0000e-03 eta 6:00:49 +epoch [2/50] batch [190/500] time 0.886 (0.890) data 0.000 (0.004) loss 1.2227 (1.3551) acc 65.6250 (67.5329) lr 2.0000e-03 eta 6:00:35 +epoch [2/50] batch [195/500] time 0.924 (0.890) data 0.000 (0.004) loss 1.1309 (1.3512) acc 62.5000 (67.6122) lr 2.0000e-03 eta 6:00:35 +epoch [2/50] batch [200/500] time 0.878 (0.890) data 0.000 (0.004) loss 0.9570 (1.3470) acc 71.8750 (67.6250) lr 2.0000e-03 eta 6:00:28 +epoch [2/50] batch [205/500] time 0.899 (0.890) data 0.000 (0.004) loss 2.7871 (1.3531) acc 53.1250 (67.4695) lr 2.0000e-03 eta 6:00:21 +epoch [2/50] batch [210/500] time 0.868 (0.890) data 0.000 (0.003) loss 1.1494 (1.3514) acc 78.1250 (67.4554) lr 2.0000e-03 eta 6:00:07 +epoch [2/50] batch [215/500] time 0.856 (0.889) data 0.000 (0.003) loss 1.1348 (1.3514) acc 65.6250 (67.3983) lr 2.0000e-03 eta 5:59:55 +epoch [2/50] batch [220/500] time 0.877 (0.889) data 0.000 (0.003) loss 1.0430 (1.3515) acc 78.1250 (67.4148) lr 2.0000e-03 eta 5:59:44 +epoch [2/50] batch [225/500] time 0.870 (0.889) data 0.000 (0.003) loss 1.2842 (1.3478) acc 75.0000 (67.5556) lr 2.0000e-03 eta 5:59:36 +epoch [2/50] batch [230/500] time 0.882 (0.889) data 0.000 (0.003) loss 0.9248 (1.3476) acc 71.8750 (67.5679) lr 2.0000e-03 eta 5:59:38 +epoch [2/50] batch [235/500] time 0.926 (0.889) data 0.000 (0.003) loss 0.9839 (1.3444) acc 71.8750 (67.5532) lr 2.0000e-03 eta 5:59:40 +epoch [2/50] batch [240/500] time 0.870 (0.889) data 0.000 (0.003) loss 2.1426 (1.3432) acc 68.7500 (67.5521) lr 2.0000e-03 eta 5:59:32 +epoch [2/50] batch [245/500] time 0.908 (0.889) data 0.000 (0.003) loss 1.8262 (1.3449) acc 68.7500 (67.6148) lr 2.0000e-03 eta 5:59:22 +epoch [2/50] batch [250/500] time 0.907 (0.889) data 0.000 (0.003) loss 0.8027 (1.3424) acc 75.0000 (67.6250) lr 2.0000e-03 eta 5:59:17 +epoch [2/50] batch [255/500] time 0.867 (0.889) data 0.000 (0.003) loss 1.1865 (1.3415) acc 65.6250 (67.6348) lr 2.0000e-03 eta 5:59:15 +epoch [2/50] batch [260/500] time 0.895 (0.889) data 0.000 (0.003) loss 1.2930 (1.3391) acc 56.2500 (67.5841) lr 2.0000e-03 eta 5:59:12 +epoch [2/50] batch [265/500] time 0.911 (0.889) data 0.000 (0.003) loss 1.8730 (1.3408) acc 62.5000 (67.6061) lr 2.0000e-03 eta 5:59:03 +epoch [2/50] batch [270/500] time 0.890 (0.889) data 0.000 (0.003) loss 1.0225 (1.3338) acc 71.8750 (67.6736) lr 2.0000e-03 eta 5:59:00 +epoch [2/50] batch [275/500] time 0.974 (0.889) data 0.000 (0.003) loss 1.6055 (1.3315) acc 65.6250 (67.6932) lr 2.0000e-03 eta 5:59:06 +epoch [2/50] batch [280/500] time 0.889 (0.889) data 0.000 (0.003) loss 1.1406 (1.3297) acc 75.0000 (67.7679) lr 2.0000e-03 eta 5:59:00 +epoch [2/50] batch [285/500] time 0.901 (0.889) data 0.000 (0.003) loss 2.0684 (1.3285) acc 65.6250 (67.8289) lr 2.0000e-03 eta 5:58:57 +epoch [2/50] batch [290/500] time 0.907 (0.890) data 0.000 (0.003) loss 1.1777 (1.3293) acc 75.0000 (67.8987) lr 2.0000e-03 eta 5:58:55 +epoch [2/50] batch [295/500] time 0.888 (0.889) data 0.000 (0.003) loss 1.0840 (1.3259) acc 71.8750 (68.0191) lr 2.0000e-03 eta 5:58:50 +epoch [2/50] batch [300/500] time 0.862 (0.889) data 0.000 (0.003) loss 1.0527 (1.3213) acc 75.0000 (68.0938) lr 2.0000e-03 eta 5:58:44 +epoch [2/50] batch [305/500] time 0.870 (0.889) data 0.000 (0.002) loss 1.0771 (1.3224) acc 81.2500 (68.0430) lr 2.0000e-03 eta 5:58:35 +epoch [2/50] batch [310/500] time 0.892 (0.889) data 0.000 (0.002) loss 1.2139 (1.3217) acc 75.0000 (68.0444) lr 2.0000e-03 eta 5:58:23 +epoch [2/50] batch [315/500] time 0.883 (0.889) data 0.000 (0.002) loss 1.0713 (1.3239) acc 75.0000 (68.0456) lr 2.0000e-03 eta 5:58:23 +epoch [2/50] batch [320/500] time 0.889 (0.889) data 0.000 (0.002) loss 1.2227 (1.3234) acc 68.7500 (68.1055) lr 2.0000e-03 eta 5:58:20 +epoch [2/50] batch [325/500] time 0.899 (0.889) data 0.000 (0.002) loss 1.3311 (1.3264) acc 65.6250 (68.0288) lr 2.0000e-03 eta 5:58:14 +epoch [2/50] batch [330/500] time 0.884 (0.889) data 0.000 (0.002) loss 1.1787 (1.3290) acc 68.7500 (67.9451) lr 2.0000e-03 eta 5:58:11 +epoch [2/50] batch [335/500] time 0.877 (0.889) data 0.000 (0.002) loss 1.3340 (1.3275) acc 59.3750 (67.9478) lr 2.0000e-03 eta 5:58:05 +epoch [2/50] batch [340/500] time 0.860 (0.889) data 0.000 (0.002) loss 1.3496 (1.3259) acc 71.8750 (67.9320) lr 2.0000e-03 eta 5:57:58 +epoch [2/50] batch [345/500] time 0.871 (0.889) data 0.000 (0.002) loss 0.9614 (1.3252) acc 81.2500 (67.9801) lr 2.0000e-03 eta 5:57:51 +epoch [2/50] batch [350/500] time 0.892 (0.889) data 0.000 (0.002) loss 0.8774 (1.3248) acc 71.8750 (67.9911) lr 2.0000e-03 eta 5:57:45 +epoch [2/50] batch [355/500] time 0.870 (0.889) data 0.000 (0.002) loss 0.9316 (1.3254) acc 75.0000 (67.9754) lr 2.0000e-03 eta 5:57:34 +epoch [2/50] batch [360/500] time 0.852 (0.888) data 0.000 (0.002) loss 0.8491 (1.3209) acc 71.8750 (68.0122) lr 2.0000e-03 eta 5:57:23 +epoch [2/50] batch [365/500] time 0.894 (0.888) data 0.000 (0.002) loss 1.2998 (1.3191) acc 53.1250 (67.9966) lr 2.0000e-03 eta 5:57:16 +epoch [2/50] batch [370/500] time 0.895 (0.888) data 0.000 (0.002) loss 1.8779 (1.3197) acc 56.2500 (68.0236) lr 2.0000e-03 eta 5:57:10 +epoch [2/50] batch [375/500] time 0.892 (0.888) data 0.000 (0.002) loss 0.9585 (1.3188) acc 68.7500 (68.0333) lr 2.0000e-03 eta 5:57:01 +epoch [2/50] batch [380/500] time 0.882 (0.888) data 0.000 (0.002) loss 0.7031 (1.3191) acc 87.5000 (68.0428) lr 2.0000e-03 eta 5:56:58 +epoch [2/50] batch [385/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.4922 (1.3203) acc 71.8750 (68.0519) lr 2.0000e-03 eta 5:56:48 +epoch [2/50] batch [390/500] time 0.867 (0.888) data 0.000 (0.002) loss 1.2471 (1.3178) acc 75.0000 (68.1330) lr 2.0000e-03 eta 5:56:42 +epoch [2/50] batch [395/500] time 0.895 (0.888) data 0.000 (0.002) loss 1.2383 (1.3160) acc 62.5000 (68.1408) lr 2.0000e-03 eta 5:56:35 +epoch [2/50] batch [400/500] time 0.875 (0.888) data 0.000 (0.002) loss 1.4824 (1.3124) acc 65.6250 (68.1953) lr 2.0000e-03 eta 5:56:33 +epoch [2/50] batch [405/500] time 0.860 (0.888) data 0.000 (0.002) loss 1.0566 (1.3094) acc 62.5000 (68.2176) lr 2.0000e-03 eta 5:56:26 +epoch [2/50] batch [410/500] time 0.874 (0.888) data 0.000 (0.002) loss 2.1250 (1.3102) acc 65.6250 (68.2393) lr 2.0000e-03 eta 5:56:21 +epoch [2/50] batch [415/500] time 0.863 (0.888) data 0.000 (0.002) loss 0.8521 (1.3090) acc 81.2500 (68.2154) lr 2.0000e-03 eta 5:56:17 +epoch [2/50] batch [420/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.1152 (1.3084) acc 65.6250 (68.2143) lr 2.0000e-03 eta 5:56:16 +epoch [2/50] batch [425/500] time 0.864 (0.888) data 0.000 (0.002) loss 1.1748 (1.3087) acc 68.7500 (68.2132) lr 2.0000e-03 eta 5:56:09 +epoch [2/50] batch [430/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.1963 (1.3063) acc 65.6250 (68.2849) lr 2.0000e-03 eta 5:56:03 +epoch [2/50] batch [435/500] time 0.893 (0.888) data 0.000 (0.002) loss 0.7368 (1.3045) acc 81.2500 (68.3118) lr 2.0000e-03 eta 5:56:03 +epoch [2/50] batch [440/500] time 0.904 (0.888) data 0.000 (0.002) loss 1.6670 (1.3064) acc 68.7500 (68.2741) lr 2.0000e-03 eta 5:56:01 +epoch [2/50] batch [445/500] time 0.891 (0.888) data 0.000 (0.002) loss 1.3398 (1.3042) acc 62.5000 (68.2725) lr 2.0000e-03 eta 5:55:56 +epoch [2/50] batch [450/500] time 0.902 (0.888) data 0.000 (0.002) loss 1.2061 (1.3029) acc 62.5000 (68.2639) lr 2.0000e-03 eta 5:55:56 +epoch [2/50] batch [455/500] time 0.891 (0.888) data 0.000 (0.002) loss 1.0605 (1.3030) acc 71.8750 (68.2555) lr 2.0000e-03 eta 5:55:52 +epoch [2/50] batch [460/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.5059 (1.3044) acc 62.5000 (68.2541) lr 2.0000e-03 eta 5:55:48 +epoch [2/50] batch [465/500] time 0.908 (0.888) data 0.000 (0.002) loss 1.1572 (1.3054) acc 75.0000 (68.2728) lr 2.0000e-03 eta 5:55:51 +epoch [2/50] batch [470/500] time 0.895 (0.888) data 0.000 (0.002) loss 1.1582 (1.3063) acc 71.8750 (68.2447) lr 2.0000e-03 eta 5:55:48 +epoch [2/50] batch [475/500] time 0.889 (0.888) data 0.000 (0.002) loss 1.3975 (1.3072) acc 75.0000 (68.2171) lr 2.0000e-03 eta 5:55:43 +epoch [2/50] batch [480/500] time 0.866 (0.888) data 0.000 (0.002) loss 0.9590 (1.3046) acc 62.5000 (68.2552) lr 2.0000e-03 eta 5:55:41 +epoch [2/50] batch [485/500] time 0.874 (0.888) data 0.001 (0.002) loss 1.0381 (1.3035) acc 68.7500 (68.2861) lr 2.0000e-03 eta 5:55:33 +epoch [2/50] batch [490/500] time 0.859 (0.888) data 0.000 (0.002) loss 1.2676 (1.3023) acc 71.8750 (68.3036) lr 2.0000e-03 eta 5:55:25 +epoch [2/50] batch [495/500] time 0.925 (0.888) data 0.000 (0.002) loss 1.9014 (1.3033) acc 59.3750 (68.2702) lr 2.0000e-03 eta 5:55:21 +epoch [2/50] batch [500/500] time 0.870 (0.888) data 0.000 (0.002) loss 1.0518 (1.3052) acc 62.5000 (68.1688) lr 1.9980e-03 eta 5:55:15 +epoch [3/50] batch [5/500] time 0.882 (1.029) data 0.000 (0.122) loss 1.0254 (1.4000) acc 81.2500 (71.2500) lr 1.9980e-03 eta 6:51:20 +epoch [3/50] batch [10/500] time 0.884 (0.961) data 0.000 (0.061) loss 0.9785 (1.2910) acc 78.1250 (71.2500) lr 1.9980e-03 eta 6:24:21 +epoch [3/50] batch [15/500] time 0.889 (0.939) data 0.000 (0.041) loss 1.0801 (1.3146) acc 68.7500 (69.3750) lr 1.9980e-03 eta 6:15:21 +epoch [3/50] batch [20/500] time 0.886 (0.929) data 0.000 (0.031) loss 1.1738 (1.3237) acc 62.5000 (68.9062) lr 1.9980e-03 eta 6:11:19 +epoch [3/50] batch [25/500] time 0.906 (0.924) data 0.000 (0.025) loss 1.3330 (1.3621) acc 65.6250 (68.8750) lr 1.9980e-03 eta 6:09:01 +epoch [3/50] batch [30/500] time 0.903 (0.919) data 0.000 (0.021) loss 1.3105 (1.3636) acc 65.6250 (68.7500) lr 1.9980e-03 eta 6:07:05 +epoch [3/50] batch [35/500] time 0.875 (0.914) data 0.000 (0.018) loss 0.5669 (1.2941) acc 84.3750 (70.0000) lr 1.9980e-03 eta 6:05:00 +epoch [3/50] batch [40/500] time 0.911 (0.913) data 0.000 (0.016) loss 1.2939 (1.3073) acc 68.7500 (69.6094) lr 1.9980e-03 eta 6:04:34 +epoch [3/50] batch [45/500] time 0.900 (0.912) data 0.000 (0.014) loss 1.2021 (1.3061) acc 65.6250 (69.4444) lr 1.9980e-03 eta 6:04:00 +epoch [3/50] batch [50/500] time 0.855 (0.908) data 0.000 (0.012) loss 1.4082 (1.2980) acc 65.6250 (69.6875) lr 1.9980e-03 eta 6:02:33 +epoch [3/50] batch [55/500] time 0.900 (0.906) data 0.000 (0.011) loss 0.8467 (1.2880) acc 75.0000 (69.6591) lr 1.9980e-03 eta 6:01:28 +epoch [3/50] batch [60/500] time 0.881 (0.904) data 0.000 (0.010) loss 1.2930 (1.2923) acc 71.8750 (69.5312) lr 1.9980e-03 eta 6:00:38 +epoch [3/50] batch [65/500] time 0.894 (0.902) data 0.000 (0.010) loss 1.3291 (1.2934) acc 71.8750 (69.5192) lr 1.9980e-03 eta 5:59:56 +epoch [3/50] batch [70/500] time 0.883 (0.902) data 0.000 (0.009) loss 1.2881 (1.2854) acc 71.8750 (69.6875) lr 1.9980e-03 eta 5:59:36 +epoch [3/50] batch [75/500] time 0.879 (0.901) data 0.000 (0.008) loss 1.0889 (1.2818) acc 68.7500 (69.8333) lr 1.9980e-03 eta 5:59:06 +epoch [3/50] batch [80/500] time 0.906 (0.900) data 0.000 (0.008) loss 0.9009 (1.2674) acc 78.1250 (70.1562) lr 1.9980e-03 eta 5:58:49 +epoch [3/50] batch [85/500] time 0.883 (0.899) data 0.000 (0.007) loss 1.6553 (1.2921) acc 62.5000 (69.3750) lr 1.9980e-03 eta 5:58:23 +epoch [3/50] batch [90/500] time 0.926 (0.899) data 0.000 (0.007) loss 1.4639 (1.2837) acc 65.6250 (69.4792) lr 1.9980e-03 eta 5:58:24 +epoch [3/50] batch [95/500] time 0.887 (0.898) data 0.000 (0.007) loss 0.8403 (1.2776) acc 65.6250 (69.5724) lr 1.9980e-03 eta 5:57:54 +epoch [3/50] batch [100/500] time 0.884 (0.898) data 0.000 (0.006) loss 1.3975 (1.2904) acc 62.5000 (69.1562) lr 1.9980e-03 eta 5:57:39 +epoch [3/50] batch [105/500] time 0.847 (0.898) data 0.000 (0.006) loss 1.0205 (1.2928) acc 68.7500 (68.9881) lr 1.9980e-03 eta 5:57:39 +epoch [3/50] batch [110/500] time 0.917 (0.898) data 0.000 (0.006) loss 1.4502 (1.2886) acc 59.3750 (69.1193) lr 1.9980e-03 eta 5:57:32 +epoch [3/50] batch [115/500] time 0.890 (0.898) data 0.000 (0.006) loss 1.0449 (1.2810) acc 71.8750 (69.3478) lr 1.9980e-03 eta 5:57:30 +epoch [3/50] batch [120/500] time 0.920 (0.897) data 0.000 (0.005) loss 0.7188 (1.2726) acc 84.3750 (69.4010) lr 1.9980e-03 eta 5:57:03 +epoch [3/50] batch [125/500] time 0.882 (0.897) data 0.000 (0.005) loss 0.9624 (1.2705) acc 68.7500 (69.4250) lr 1.9980e-03 eta 5:56:56 +epoch [3/50] batch [130/500] time 0.870 (0.896) data 0.000 (0.005) loss 0.9717 (1.2605) acc 81.2500 (69.5913) lr 1.9980e-03 eta 5:56:38 +epoch [3/50] batch [135/500] time 0.887 (0.896) data 0.000 (0.005) loss 1.4268 (1.2608) acc 62.5000 (69.5602) lr 1.9980e-03 eta 5:56:15 +epoch [3/50] batch [140/500] time 0.855 (0.895) data 0.000 (0.005) loss 1.2539 (1.2583) acc 71.8750 (69.6875) lr 1.9980e-03 eta 5:55:59 +epoch [3/50] batch [145/500] time 0.957 (0.895) data 0.000 (0.004) loss 1.4023 (1.2516) acc 65.6250 (69.8491) lr 1.9980e-03 eta 5:55:52 +epoch [3/50] batch [150/500] time 0.897 (0.894) data 0.000 (0.004) loss 1.0938 (1.2442) acc 78.1250 (69.9167) lr 1.9980e-03 eta 5:55:32 +epoch [3/50] batch [155/500] time 0.906 (0.894) data 0.000 (0.004) loss 0.9692 (1.2474) acc 75.0000 (69.5968) lr 1.9980e-03 eta 5:55:12 +epoch [3/50] batch [160/500] time 0.864 (0.894) data 0.000 (0.004) loss 1.2363 (1.2495) acc 59.3750 (69.4727) lr 1.9980e-03 eta 5:55:05 +epoch [3/50] batch [165/500] time 0.886 (0.894) data 0.000 (0.004) loss 0.7256 (1.2464) acc 84.3750 (69.4508) lr 1.9980e-03 eta 5:55:07 +epoch [3/50] batch [170/500] time 0.893 (0.894) data 0.000 (0.004) loss 1.0215 (1.2478) acc 75.0000 (69.4301) lr 1.9980e-03 eta 5:55:10 +epoch [3/50] batch [175/500] time 0.895 (0.894) data 0.000 (0.004) loss 1.0107 (1.2433) acc 71.8750 (69.5179) lr 1.9980e-03 eta 5:54:55 +epoch [3/50] batch [180/500] time 0.862 (0.893) data 0.000 (0.004) loss 0.9487 (1.2349) acc 75.0000 (69.6528) lr 1.9980e-03 eta 5:54:40 +epoch [3/50] batch [185/500] time 0.864 (0.893) data 0.000 (0.004) loss 1.3486 (1.2386) acc 71.8750 (69.6453) lr 1.9980e-03 eta 5:54:25 +epoch [3/50] batch [190/500] time 0.883 (0.893) data 0.000 (0.003) loss 1.2754 (1.2357) acc 78.1250 (69.7862) lr 1.9980e-03 eta 5:54:22 +epoch [3/50] batch [195/500] time 0.908 (0.893) data 0.000 (0.003) loss 1.2393 (1.2376) acc 81.2500 (69.7436) lr 1.9980e-03 eta 5:54:22 +epoch [3/50] batch [200/500] time 0.883 (0.893) data 0.000 (0.003) loss 1.4199 (1.2409) acc 65.6250 (69.7188) lr 1.9980e-03 eta 5:54:16 +epoch [3/50] batch [205/500] time 0.844 (0.893) data 0.000 (0.003) loss 0.8613 (1.2387) acc 75.0000 (69.7409) lr 1.9980e-03 eta 5:53:57 +epoch [3/50] batch [210/500] time 0.881 (0.892) data 0.000 (0.003) loss 1.4043 (1.2379) acc 62.5000 (69.7173) lr 1.9980e-03 eta 5:53:43 +epoch [3/50] batch [215/500] time 0.885 (0.892) data 0.000 (0.003) loss 0.8101 (1.2389) acc 84.3750 (69.7238) lr 1.9980e-03 eta 5:53:29 +epoch [3/50] batch [220/500] time 0.900 (0.892) data 0.000 (0.003) loss 1.0586 (1.2334) acc 78.1250 (69.7869) lr 1.9980e-03 eta 5:53:24 +epoch [3/50] batch [225/500] time 0.889 (0.892) data 0.000 (0.003) loss 1.2275 (1.2367) acc 56.2500 (69.6111) lr 1.9980e-03 eta 5:53:15 +epoch [3/50] batch [230/500] time 0.869 (0.891) data 0.000 (0.003) loss 1.3018 (1.2321) acc 65.6250 (69.6603) lr 1.9980e-03 eta 5:53:05 +epoch [3/50] batch [235/500] time 0.880 (0.891) data 0.000 (0.003) loss 1.5391 (1.2345) acc 62.5000 (69.6543) lr 1.9980e-03 eta 5:53:02 +epoch [3/50] batch [240/500] time 0.924 (0.892) data 0.000 (0.003) loss 1.6699 (1.2389) acc 62.5000 (69.5443) lr 1.9980e-03 eta 5:53:03 +epoch [3/50] batch [245/500] time 0.903 (0.892) data 0.000 (0.003) loss 1.2979 (1.2419) acc 81.2500 (69.5026) lr 1.9980e-03 eta 5:53:09 +epoch [3/50] batch [250/500] time 0.891 (0.892) data 0.000 (0.003) loss 0.7739 (1.2427) acc 71.8750 (69.4500) lr 1.9980e-03 eta 5:52:59 +epoch [3/50] batch [255/500] time 0.901 (0.892) data 0.000 (0.003) loss 0.6348 (1.2415) acc 84.3750 (69.5466) lr 1.9980e-03 eta 5:52:51 +epoch [3/50] batch [260/500] time 0.882 (0.891) data 0.000 (0.003) loss 1.5371 (1.2461) acc 62.5000 (69.4591) lr 1.9980e-03 eta 5:52:41 +epoch [3/50] batch [265/500] time 0.899 (0.891) data 0.000 (0.003) loss 0.9258 (1.2443) acc 78.1250 (69.4811) lr 1.9980e-03 eta 5:52:38 +epoch [3/50] batch [270/500] time 0.901 (0.891) data 0.000 (0.002) loss 1.3682 (1.2426) acc 68.7500 (69.5023) lr 1.9980e-03 eta 5:52:34 +epoch [3/50] batch [275/500] time 0.908 (0.892) data 0.000 (0.002) loss 0.9722 (1.2429) acc 65.6250 (69.4545) lr 1.9980e-03 eta 5:52:34 +epoch [3/50] batch [280/500] time 0.884 (0.891) data 0.000 (0.002) loss 1.3682 (1.2413) acc 65.6250 (69.4420) lr 1.9980e-03 eta 5:52:20 +epoch [3/50] batch [285/500] time 0.882 (0.891) data 0.000 (0.002) loss 1.4395 (1.2453) acc 71.8750 (69.4079) lr 1.9980e-03 eta 5:52:11 +epoch [3/50] batch [290/500] time 0.875 (0.891) data 0.000 (0.002) loss 1.5684 (1.2433) acc 68.7500 (69.5259) lr 1.9980e-03 eta 5:52:11 +epoch [3/50] batch [295/500] time 0.863 (0.891) data 0.000 (0.002) loss 1.9160 (1.2463) acc 56.2500 (69.4809) lr 1.9980e-03 eta 5:52:05 +epoch [3/50] batch [300/500] time 0.868 (0.891) data 0.000 (0.002) loss 1.7666 (1.2482) acc 56.2500 (69.4479) lr 1.9980e-03 eta 5:51:58 +epoch [3/50] batch [305/500] time 0.917 (0.891) data 0.000 (0.002) loss 1.4209 (1.2479) acc 62.5000 (69.4365) lr 1.9980e-03 eta 5:51:56 +epoch [3/50] batch [310/500] time 0.908 (0.891) data 0.000 (0.002) loss 0.6743 (1.2468) acc 84.3750 (69.4456) lr 1.9980e-03 eta 5:51:53 +epoch [3/50] batch [315/500] time 0.904 (0.891) data 0.000 (0.002) loss 1.1650 (1.2480) acc 68.7500 (69.3750) lr 1.9980e-03 eta 5:51:47 +epoch [3/50] batch [320/500] time 0.854 (0.891) data 0.000 (0.002) loss 0.9810 (1.2471) acc 78.1250 (69.4141) lr 1.9980e-03 eta 5:51:36 +epoch [3/50] batch [325/500] time 0.912 (0.891) data 0.000 (0.002) loss 1.3984 (1.2454) acc 56.2500 (69.4038) lr 1.9980e-03 eta 5:51:35 +epoch [3/50] batch [330/500] time 0.876 (0.891) data 0.000 (0.002) loss 1.3027 (1.2481) acc 62.5000 (69.3561) lr 1.9980e-03 eta 5:51:27 +epoch [3/50] batch [335/500] time 0.888 (0.891) data 0.000 (0.002) loss 1.2881 (1.2452) acc 68.7500 (69.3843) lr 1.9980e-03 eta 5:51:16 +epoch [3/50] batch [340/500] time 0.885 (0.891) data 0.000 (0.002) loss 1.2959 (1.2445) acc 71.8750 (69.4026) lr 1.9980e-03 eta 5:51:09 +epoch [3/50] batch [345/500] time 0.886 (0.890) data 0.000 (0.002) loss 0.8853 (1.2435) acc 84.3750 (69.4293) lr 1.9980e-03 eta 5:51:03 +epoch [3/50] batch [350/500] time 0.870 (0.890) data 0.000 (0.002) loss 1.0039 (1.2425) acc 62.5000 (69.3661) lr 1.9980e-03 eta 5:50:55 +epoch [3/50] batch [355/500] time 0.868 (0.890) data 0.000 (0.002) loss 1.5088 (1.2412) acc 71.8750 (69.4190) lr 1.9980e-03 eta 5:50:43 +epoch [3/50] batch [360/500] time 0.885 (0.890) data 0.000 (0.002) loss 1.5488 (1.2427) acc 59.3750 (69.3663) lr 1.9980e-03 eta 5:50:34 +epoch [3/50] batch [365/500] time 0.904 (0.890) data 0.000 (0.002) loss 1.4844 (1.2429) acc 68.7500 (69.3322) lr 1.9980e-03 eta 5:50:37 +epoch [3/50] batch [370/500] time 0.889 (0.890) data 0.000 (0.002) loss 1.1719 (1.2410) acc 71.8750 (69.3412) lr 1.9980e-03 eta 5:50:36 +epoch [3/50] batch [375/500] time 0.908 (0.890) data 0.000 (0.002) loss 0.9883 (1.2415) acc 75.0000 (69.3250) lr 1.9980e-03 eta 5:50:33 +epoch [3/50] batch [380/500] time 0.896 (0.890) data 0.000 (0.002) loss 1.3750 (1.2404) acc 59.3750 (69.2928) lr 1.9980e-03 eta 5:50:29 +epoch [3/50] batch [385/500] time 0.871 (0.890) data 0.000 (0.002) loss 1.1367 (1.2416) acc 75.0000 (69.3263) lr 1.9980e-03 eta 5:50:20 +epoch [3/50] batch [390/500] time 0.875 (0.890) data 0.000 (0.002) loss 1.1221 (1.2420) acc 71.8750 (69.3029) lr 1.9980e-03 eta 5:50:23 +epoch [3/50] batch [395/500] time 0.894 (0.890) data 0.000 (0.002) loss 1.2832 (1.2412) acc 65.6250 (69.2801) lr 1.9980e-03 eta 5:50:17 +epoch [3/50] batch [400/500] time 0.869 (0.890) data 0.000 (0.002) loss 0.9531 (1.2409) acc 71.8750 (69.2891) lr 1.9980e-03 eta 5:50:08 +epoch [3/50] batch [405/500] time 0.871 (0.890) data 0.000 (0.002) loss 1.3154 (1.2410) acc 71.8750 (69.2901) lr 1.9980e-03 eta 5:50:04 +epoch [3/50] batch [410/500] time 0.857 (0.890) data 0.000 (0.002) loss 1.3008 (1.2400) acc 71.8750 (69.3369) lr 1.9980e-03 eta 5:49:56 +epoch [3/50] batch [415/500] time 0.877 (0.890) data 0.000 (0.002) loss 1.3525 (1.2389) acc 78.1250 (69.3825) lr 1.9980e-03 eta 5:49:51 +epoch [3/50] batch [420/500] time 0.894 (0.890) data 0.000 (0.002) loss 1.1807 (1.2395) acc 71.8750 (69.3676) lr 1.9980e-03 eta 5:49:47 +epoch [3/50] batch [425/500] time 0.862 (0.890) data 0.000 (0.002) loss 1.2529 (1.2403) acc 78.1250 (69.3676) lr 1.9980e-03 eta 5:49:41 +epoch [3/50] batch [430/500] time 0.895 (0.890) data 0.000 (0.002) loss 1.1230 (1.2424) acc 68.7500 (69.3532) lr 1.9980e-03 eta 5:49:35 +epoch [3/50] batch [435/500] time 0.901 (0.890) data 0.000 (0.002) loss 0.9478 (1.2400) acc 87.5000 (69.4253) lr 1.9980e-03 eta 5:49:34 +epoch [3/50] batch [440/500] time 0.866 (0.890) data 0.000 (0.002) loss 1.1494 (1.2402) acc 75.0000 (69.4318) lr 1.9980e-03 eta 5:49:26 +epoch [3/50] batch [445/500] time 0.851 (0.890) data 0.000 (0.002) loss 0.8960 (1.2373) acc 78.1250 (69.4663) lr 1.9980e-03 eta 5:49:21 +epoch [3/50] batch [450/500] time 0.891 (0.890) data 0.000 (0.002) loss 0.5718 (1.2339) acc 78.1250 (69.5278) lr 1.9980e-03 eta 5:49:20 +epoch [3/50] batch [455/500] time 0.860 (0.890) data 0.000 (0.002) loss 1.3105 (1.2368) acc 68.7500 (69.4918) lr 1.9980e-03 eta 5:49:16 +epoch [3/50] batch [460/500] time 0.887 (0.890) data 0.000 (0.002) loss 0.8105 (1.2364) acc 75.0000 (69.5109) lr 1.9980e-03 eta 5:49:10 +epoch [3/50] batch [465/500] time 0.888 (0.890) data 0.000 (0.002) loss 0.9053 (1.2351) acc 71.8750 (69.5228) lr 1.9980e-03 eta 5:49:02 +epoch [3/50] batch [470/500] time 0.890 (0.890) data 0.000 (0.002) loss 1.0400 (1.2349) acc 71.8750 (69.5146) lr 1.9980e-03 eta 5:48:55 +epoch [3/50] batch [475/500] time 0.864 (0.890) data 0.000 (0.002) loss 0.5952 (1.2341) acc 81.2500 (69.5132) lr 1.9980e-03 eta 5:48:49 +epoch [3/50] batch [480/500] time 0.900 (0.890) data 0.000 (0.001) loss 1.2637 (1.2373) acc 62.5000 (69.4271) lr 1.9980e-03 eta 5:48:42 +epoch [3/50] batch [485/500] time 0.884 (0.890) data 0.000 (0.001) loss 1.1787 (1.2362) acc 68.7500 (69.4265) lr 1.9980e-03 eta 5:48:37 +epoch [3/50] batch [490/500] time 0.898 (0.890) data 0.000 (0.001) loss 0.9971 (1.2344) acc 75.0000 (69.4579) lr 1.9980e-03 eta 5:48:36 +epoch [3/50] batch [495/500] time 0.911 (0.890) data 0.000 (0.001) loss 1.5127 (1.2341) acc 65.6250 (69.4760) lr 1.9980e-03 eta 5:48:34 +epoch [3/50] batch [500/500] time 0.907 (0.890) data 0.000 (0.001) loss 1.5693 (1.2331) acc 59.3750 (69.4938) lr 1.9921e-03 eta 5:48:30 +epoch [4/50] batch [5/500] time 0.854 (0.998) data 0.000 (0.120) loss 0.8823 (1.1184) acc 75.0000 (70.6250) lr 1.9921e-03 eta 6:30:50 +epoch [4/50] batch [10/500] time 0.902 (0.940) data 0.000 (0.060) loss 1.4033 (1.1884) acc 59.3750 (68.7500) lr 1.9921e-03 eta 6:08:01 +epoch [4/50] batch [15/500] time 0.872 (0.923) data 0.000 (0.040) loss 1.2295 (1.1549) acc 75.0000 (70.0000) lr 1.9921e-03 eta 6:01:08 +epoch [4/50] batch [20/500] time 0.894 (0.912) data 0.000 (0.030) loss 1.0830 (1.1476) acc 71.8750 (70.3125) lr 1.9921e-03 eta 5:56:46 +epoch [4/50] batch [25/500] time 0.903 (0.913) data 0.000 (0.024) loss 0.7925 (1.1423) acc 78.1250 (70.5000) lr 1.9921e-03 eta 5:57:13 +epoch [4/50] batch [30/500] time 0.889 (0.911) data 0.000 (0.020) loss 1.3691 (1.1599) acc 65.6250 (70.6250) lr 1.9921e-03 eta 5:56:21 +epoch [4/50] batch [35/500] time 0.898 (0.908) data 0.000 (0.017) loss 1.3887 (1.1893) acc 62.5000 (69.8214) lr 1.9921e-03 eta 5:55:10 +epoch [4/50] batch [40/500] time 0.902 (0.905) data 0.000 (0.015) loss 1.7793 (1.2187) acc 62.5000 (68.9062) lr 1.9921e-03 eta 5:53:52 +epoch [4/50] batch [45/500] time 0.858 (0.903) data 0.000 (0.014) loss 1.1582 (1.2392) acc 68.7500 (68.6111) lr 1.9921e-03 eta 5:52:51 +epoch [4/50] batch [50/500] time 0.903 (0.900) data 0.000 (0.012) loss 1.4258 (1.2385) acc 50.0000 (67.8125) lr 1.9921e-03 eta 5:51:54 +epoch [4/50] batch [55/500] time 0.850 (0.897) data 0.000 (0.011) loss 1.3721 (1.2471) acc 65.6250 (67.8977) lr 1.9921e-03 eta 5:50:34 +epoch [4/50] batch [60/500] time 0.893 (0.896) data 0.000 (0.010) loss 1.5000 (1.2686) acc 65.6250 (67.9167) lr 1.9921e-03 eta 5:50:10 +epoch [4/50] batch [65/500] time 0.911 (0.897) data 0.000 (0.009) loss 1.5264 (1.2820) acc 68.7500 (67.8846) lr 1.9921e-03 eta 5:50:09 +epoch [4/50] batch [70/500] time 0.905 (0.896) data 0.000 (0.009) loss 1.1045 (1.2892) acc 71.8750 (68.0804) lr 1.9921e-03 eta 5:49:51 +epoch [4/50] batch [75/500] time 0.891 (0.896) data 0.000 (0.008) loss 1.6992 (1.2976) acc 56.2500 (68.1250) lr 1.9921e-03 eta 5:49:38 +epoch [4/50] batch [80/500] time 0.903 (0.897) data 0.000 (0.008) loss 1.1016 (1.3034) acc 65.6250 (68.0859) lr 1.9921e-03 eta 5:50:04 +epoch [4/50] batch [85/500] time 0.865 (0.895) data 0.000 (0.007) loss 1.0146 (1.3158) acc 68.7500 (67.9044) lr 1.9921e-03 eta 5:49:25 +epoch [4/50] batch [90/500] time 0.881 (0.894) data 0.000 (0.007) loss 0.6396 (1.3101) acc 78.1250 (68.1250) lr 1.9921e-03 eta 5:48:49 +epoch [4/50] batch [95/500] time 0.907 (0.894) data 0.000 (0.007) loss 1.6084 (1.3106) acc 56.2500 (68.1250) lr 1.9921e-03 eta 5:48:34 +epoch [4/50] batch [100/500] time 0.859 (0.893) data 0.001 (0.006) loss 1.7559 (1.3135) acc 59.3750 (67.9375) lr 1.9921e-03 eta 5:48:25 +epoch [4/50] batch [105/500] time 0.896 (0.893) data 0.000 (0.006) loss 1.1924 (1.3012) acc 68.7500 (68.2738) lr 1.9921e-03 eta 5:48:21 +epoch [4/50] batch [110/500] time 0.903 (0.893) data 0.000 (0.006) loss 0.9697 (1.2913) acc 71.8750 (68.5511) lr 1.9921e-03 eta 5:48:00 +epoch [4/50] batch [115/500] time 0.913 (0.892) data 0.000 (0.005) loss 1.0400 (1.2918) acc 75.0000 (68.4783) lr 1.9921e-03 eta 5:47:48 +epoch [4/50] batch [120/500] time 0.882 (0.892) data 0.000 (0.005) loss 0.8149 (1.2897) acc 81.2500 (68.5938) lr 1.9921e-03 eta 5:47:35 +epoch [4/50] batch [125/500] time 0.928 (0.893) data 0.000 (0.005) loss 0.9238 (1.2943) acc 78.1250 (68.5500) lr 1.9921e-03 eta 5:47:48 +epoch [4/50] batch [130/500] time 0.884 (0.893) data 0.000 (0.005) loss 0.7534 (1.2800) acc 81.2500 (68.8942) lr 1.9921e-03 eta 5:47:45 +epoch [4/50] batch [135/500] time 0.868 (0.893) data 0.000 (0.005) loss 1.1816 (1.2801) acc 75.0000 (68.8889) lr 1.9921e-03 eta 5:47:35 +epoch [4/50] batch [140/500] time 0.864 (0.892) data 0.000 (0.005) loss 1.2969 (1.2714) acc 65.6250 (69.0179) lr 1.9921e-03 eta 5:47:16 +epoch [4/50] batch [145/500] time 0.881 (0.892) data 0.000 (0.004) loss 1.1680 (1.2684) acc 62.5000 (69.0302) lr 1.9921e-03 eta 5:47:09 +epoch [4/50] batch [150/500] time 0.872 (0.891) data 0.000 (0.004) loss 1.4619 (1.2710) acc 62.5000 (68.9792) lr 1.9921e-03 eta 5:46:56 +epoch [4/50] batch [155/500] time 0.891 (0.891) data 0.000 (0.004) loss 0.9204 (1.2603) acc 68.7500 (69.1129) lr 1.9921e-03 eta 5:46:48 +epoch [4/50] batch [160/500] time 0.862 (0.891) data 0.000 (0.004) loss 0.6274 (1.2503) acc 75.0000 (69.2383) lr 1.9921e-03 eta 5:46:43 +epoch [4/50] batch [165/500] time 0.907 (0.891) data 0.000 (0.004) loss 0.7876 (1.2443) acc 78.1250 (69.3939) lr 1.9921e-03 eta 5:46:42 +epoch [4/50] batch [170/500] time 0.866 (0.891) data 0.000 (0.004) loss 1.2920 (1.2464) acc 68.7500 (69.2831) lr 1.9921e-03 eta 5:46:27 +epoch [4/50] batch [175/500] time 0.877 (0.891) data 0.000 (0.004) loss 0.9521 (1.2445) acc 78.1250 (69.2857) lr 1.9921e-03 eta 5:46:25 +epoch [4/50] batch [180/500] time 0.859 (0.891) data 0.000 (0.004) loss 1.1865 (1.2486) acc 78.1250 (69.1493) lr 1.9921e-03 eta 5:46:12 +epoch [4/50] batch [185/500] time 0.902 (0.890) data 0.000 (0.003) loss 1.7686 (1.2429) acc 59.3750 (69.2230) lr 1.9921e-03 eta 5:46:00 +epoch [4/50] batch [190/500] time 0.880 (0.890) data 0.000 (0.003) loss 0.9746 (1.2370) acc 81.2500 (69.3421) lr 1.9921e-03 eta 5:45:54 +epoch [4/50] batch [195/500] time 0.920 (0.890) data 0.000 (0.003) loss 0.8667 (1.2401) acc 84.3750 (69.3269) lr 1.9921e-03 eta 5:45:53 +epoch [4/50] batch [200/500] time 0.882 (0.890) data 0.000 (0.003) loss 2.0488 (1.2460) acc 50.0000 (69.2031) lr 1.9921e-03 eta 5:45:44 +epoch [4/50] batch [205/500] time 0.869 (0.890) data 0.000 (0.003) loss 0.6890 (1.2368) acc 90.6250 (69.4970) lr 1.9921e-03 eta 5:45:38 +epoch [4/50] batch [210/500] time 0.904 (0.890) data 0.000 (0.003) loss 1.3311 (1.2321) acc 78.1250 (69.7321) lr 1.9921e-03 eta 5:45:30 +epoch [4/50] batch [215/500] time 0.886 (0.890) data 0.000 (0.003) loss 0.9585 (1.2318) acc 78.1250 (69.7529) lr 1.9921e-03 eta 5:45:20 +epoch [4/50] batch [220/500] time 0.864 (0.890) data 0.000 (0.003) loss 1.1699 (1.2312) acc 78.1250 (69.8438) lr 1.9921e-03 eta 5:45:15 +epoch [4/50] batch [225/500] time 0.881 (0.890) data 0.000 (0.003) loss 1.7021 (1.2346) acc 56.2500 (69.7639) lr 1.9921e-03 eta 5:45:19 +epoch [4/50] batch [230/500] time 0.867 (0.890) data 0.000 (0.003) loss 0.8081 (1.2367) acc 87.5000 (69.7011) lr 1.9921e-03 eta 5:45:08 +epoch [4/50] batch [235/500] time 0.915 (0.890) data 0.000 (0.003) loss 1.2168 (1.2385) acc 75.0000 (69.6277) lr 1.9921e-03 eta 5:45:07 +epoch [4/50] batch [240/500] time 0.909 (0.890) data 0.000 (0.003) loss 0.8721 (1.2306) acc 75.0000 (69.7526) lr 1.9921e-03 eta 5:45:06 +epoch [4/50] batch [245/500] time 0.879 (0.890) data 0.000 (0.003) loss 0.9019 (1.2252) acc 81.2500 (69.8469) lr 1.9921e-03 eta 5:45:02 +epoch [4/50] batch [250/500] time 0.892 (0.890) data 0.000 (0.003) loss 1.8477 (1.2308) acc 56.2500 (69.7375) lr 1.9921e-03 eta 5:44:58 +epoch [4/50] batch [255/500] time 0.868 (0.890) data 0.000 (0.003) loss 0.8936 (1.2293) acc 75.0000 (69.7549) lr 1.9921e-03 eta 5:44:52 +epoch [4/50] batch [260/500] time 0.871 (0.890) data 0.000 (0.003) loss 1.2881 (1.2285) acc 59.3750 (69.7236) lr 1.9921e-03 eta 5:44:51 +epoch [4/50] batch [265/500] time 1.035 (0.891) data 0.000 (0.002) loss 1.7295 (1.2309) acc 59.3750 (69.6934) lr 1.9921e-03 eta 5:44:59 +epoch [4/50] batch [270/500] time 0.904 (0.891) data 0.000 (0.002) loss 1.6699 (1.2315) acc 62.5000 (69.6528) lr 1.9921e-03 eta 5:44:54 +epoch [4/50] batch [275/500] time 0.905 (0.891) data 0.000 (0.002) loss 0.7935 (1.2266) acc 84.3750 (69.7500) lr 1.9921e-03 eta 5:44:50 +epoch [4/50] batch [280/500] time 0.893 (0.891) data 0.000 (0.002) loss 1.3818 (1.2263) acc 56.2500 (69.7098) lr 1.9921e-03 eta 5:44:41 +epoch [4/50] batch [285/500] time 0.878 (0.891) data 0.000 (0.002) loss 0.9839 (1.2260) acc 71.8750 (69.6930) lr 1.9921e-03 eta 5:44:33 +epoch [4/50] batch [290/500] time 0.898 (0.890) data 0.000 (0.002) loss 1.2002 (1.2265) acc 65.6250 (69.6336) lr 1.9921e-03 eta 5:44:25 +epoch [4/50] batch [295/500] time 0.895 (0.890) data 0.000 (0.002) loss 1.6250 (1.2278) acc 75.0000 (69.6610) lr 1.9921e-03 eta 5:44:18 +epoch [4/50] batch [300/500] time 0.875 (0.890) data 0.000 (0.002) loss 1.2832 (1.2289) acc 75.0000 (69.6771) lr 1.9921e-03 eta 5:44:15 +epoch [4/50] batch [305/500] time 0.914 (0.890) data 0.000 (0.002) loss 1.6162 (1.2297) acc 46.8750 (69.5389) lr 1.9921e-03 eta 5:44:11 +epoch [4/50] batch [310/500] time 0.898 (0.890) data 0.000 (0.002) loss 1.3438 (1.2310) acc 65.6250 (69.5363) lr 1.9921e-03 eta 5:44:09 +epoch [4/50] batch [315/500] time 0.891 (0.890) data 0.000 (0.002) loss 1.3320 (1.2286) acc 75.0000 (69.6329) lr 1.9921e-03 eta 5:44:05 +epoch [4/50] batch [320/500] time 0.856 (0.890) data 0.001 (0.002) loss 1.2373 (1.2279) acc 65.6250 (69.5996) lr 1.9921e-03 eta 5:43:57 +epoch [4/50] batch [325/500] time 0.859 (0.890) data 0.000 (0.002) loss 0.9004 (1.2271) acc 78.1250 (69.6346) lr 1.9921e-03 eta 5:43:47 +epoch [4/50] batch [330/500] time 0.900 (0.890) data 0.000 (0.002) loss 0.9131 (1.2287) acc 75.0000 (69.5265) lr 1.9921e-03 eta 5:43:42 +epoch [4/50] batch [335/500] time 0.853 (0.890) data 0.000 (0.002) loss 1.3242 (1.2282) acc 62.5000 (69.5522) lr 1.9921e-03 eta 5:43:36 +epoch [4/50] batch [340/500] time 0.867 (0.890) data 0.000 (0.002) loss 1.3799 (1.2317) acc 71.8750 (69.5221) lr 1.9921e-03 eta 5:43:28 +epoch [4/50] batch [345/500] time 0.879 (0.890) data 0.000 (0.002) loss 1.2979 (1.2333) acc 65.6250 (69.5290) lr 1.9921e-03 eta 5:43:19 +epoch [4/50] batch [350/500] time 0.865 (0.889) data 0.000 (0.002) loss 1.1436 (1.2358) acc 68.7500 (69.4375) lr 1.9921e-03 eta 5:43:06 +epoch [4/50] batch [355/500] time 0.900 (0.889) data 0.000 (0.002) loss 1.2812 (1.2363) acc 71.8750 (69.4278) lr 1.9921e-03 eta 5:43:01 +epoch [4/50] batch [360/500] time 0.908 (0.889) data 0.000 (0.002) loss 2.0039 (1.2406) acc 56.2500 (69.3924) lr 1.9921e-03 eta 5:42:59 +epoch [4/50] batch [365/500] time 0.905 (0.890) data 0.000 (0.002) loss 1.2588 (1.2402) acc 75.0000 (69.4007) lr 1.9921e-03 eta 5:43:02 +epoch [4/50] batch [370/500] time 0.876 (0.890) data 0.000 (0.002) loss 1.0029 (1.2394) acc 75.0000 (69.4172) lr 1.9921e-03 eta 5:42:56 +epoch [4/50] batch [375/500] time 0.888 (0.890) data 0.000 (0.002) loss 1.7344 (1.2419) acc 62.5000 (69.3583) lr 1.9921e-03 eta 5:42:51 +epoch [4/50] batch [380/500] time 0.884 (0.890) data 0.000 (0.002) loss 1.1973 (1.2433) acc 71.8750 (69.3339) lr 1.9921e-03 eta 5:42:46 +epoch [4/50] batch [385/500] time 0.883 (0.890) data 0.000 (0.002) loss 1.6182 (1.2450) acc 68.7500 (69.3588) lr 1.9921e-03 eta 5:42:46 +epoch [4/50] batch [390/500] time 0.866 (0.890) data 0.000 (0.002) loss 1.4717 (1.2445) acc 65.6250 (69.3750) lr 1.9921e-03 eta 5:42:38 +epoch [4/50] batch [395/500] time 0.883 (0.890) data 0.000 (0.002) loss 1.2979 (1.2451) acc 68.7500 (69.4383) lr 1.9921e-03 eta 5:42:33 +epoch [4/50] batch [400/500] time 0.902 (0.890) data 0.000 (0.002) loss 0.8496 (1.2448) acc 78.1250 (69.4453) lr 1.9921e-03 eta 5:42:29 +epoch [4/50] batch [405/500] time 0.889 (0.890) data 0.000 (0.002) loss 1.3076 (1.2471) acc 68.7500 (69.4059) lr 1.9921e-03 eta 5:42:23 +epoch [4/50] batch [410/500] time 0.862 (0.890) data 0.000 (0.002) loss 1.1572 (1.2451) acc 78.1250 (69.4665) lr 1.9921e-03 eta 5:42:25 +epoch [4/50] batch [415/500] time 0.878 (0.890) data 0.000 (0.002) loss 0.7866 (1.2426) acc 78.1250 (69.5105) lr 1.9921e-03 eta 5:42:20 +epoch [4/50] batch [420/500] time 0.905 (0.890) data 0.000 (0.002) loss 1.5273 (1.2424) acc 62.5000 (69.5164) lr 1.9921e-03 eta 5:42:17 +epoch [4/50] batch [425/500] time 0.902 (0.890) data 0.000 (0.002) loss 1.4941 (1.2411) acc 62.5000 (69.5221) lr 1.9921e-03 eta 5:42:17 +epoch [4/50] batch [430/500] time 0.895 (0.890) data 0.000 (0.002) loss 1.1826 (1.2438) acc 71.8750 (69.4477) lr 1.9921e-03 eta 5:42:15 +epoch [4/50] batch [435/500] time 0.889 (0.890) data 0.000 (0.002) loss 2.0059 (1.2463) acc 37.5000 (69.3750) lr 1.9921e-03 eta 5:42:10 +epoch [4/50] batch [440/500] time 0.898 (0.890) data 0.000 (0.002) loss 2.1699 (1.2495) acc 50.0000 (69.3111) lr 1.9921e-03 eta 5:42:03 +epoch [4/50] batch [445/500] time 0.870 (0.890) data 0.000 (0.002) loss 0.8872 (1.2483) acc 68.7500 (69.3258) lr 1.9921e-03 eta 5:41:53 +epoch [4/50] batch [450/500] time 0.853 (0.889) data 0.000 (0.002) loss 1.2754 (1.2521) acc 78.1250 (69.2431) lr 1.9921e-03 eta 5:41:40 +epoch [4/50] batch [455/500] time 0.889 (0.889) data 0.000 (0.002) loss 0.8418 (1.2507) acc 84.3750 (69.2926) lr 1.9921e-03 eta 5:41:38 +epoch [4/50] batch [460/500] time 0.888 (0.889) data 0.000 (0.002) loss 1.4326 (1.2495) acc 75.0000 (69.3342) lr 1.9921e-03 eta 5:41:33 +epoch [4/50] batch [465/500] time 0.885 (0.889) data 0.000 (0.002) loss 1.4756 (1.2497) acc 65.6250 (69.3616) lr 1.9921e-03 eta 5:41:27 +epoch [4/50] batch [470/500] time 0.890 (0.889) data 0.000 (0.002) loss 0.9150 (1.2467) acc 62.5000 (69.3750) lr 1.9921e-03 eta 5:41:22 +epoch [4/50] batch [475/500] time 0.900 (0.889) data 0.000 (0.001) loss 1.1738 (1.2471) acc 68.7500 (69.3355) lr 1.9921e-03 eta 5:41:18 +epoch [4/50] batch [480/500] time 0.908 (0.889) data 0.000 (0.001) loss 0.9517 (1.2474) acc 71.8750 (69.3034) lr 1.9921e-03 eta 5:41:15 +epoch [4/50] batch [485/500] time 0.898 (0.889) data 0.000 (0.001) loss 1.1650 (1.2476) acc 75.0000 (69.3106) lr 1.9921e-03 eta 5:41:10 +epoch [4/50] batch [490/500] time 0.916 (0.889) data 0.000 (0.001) loss 1.4932 (1.2465) acc 59.3750 (69.3176) lr 1.9921e-03 eta 5:41:06 +epoch [4/50] batch [495/500] time 0.892 (0.889) data 0.000 (0.001) loss 1.7549 (1.2457) acc 56.2500 (69.3561) lr 1.9921e-03 eta 5:41:01 +epoch [4/50] batch [500/500] time 0.892 (0.889) data 0.000 (0.001) loss 1.0518 (1.2438) acc 81.2500 (69.4125) lr 1.9823e-03 eta 5:40:54 +epoch [5/50] batch [5/500] time 0.914 (1.022) data 0.000 (0.126) loss 1.2617 (1.1935) acc 65.6250 (66.8750) lr 1.9823e-03 eta 6:31:49 +epoch [5/50] batch [10/500] time 0.914 (0.961) data 0.000 (0.063) loss 1.2910 (1.1601) acc 78.1250 (69.3750) lr 1.9823e-03 eta 6:08:21 +epoch [5/50] batch [15/500] time 0.875 (0.935) data 0.000 (0.042) loss 1.2402 (1.2158) acc 78.1250 (69.5833) lr 1.9823e-03 eta 5:58:09 +epoch [5/50] batch [20/500] time 0.876 (0.921) data 0.000 (0.032) loss 1.5908 (1.2435) acc 56.2500 (67.9688) lr 1.9823e-03 eta 5:52:42 +epoch [5/50] batch [25/500] time 0.882 (0.914) data 0.000 (0.025) loss 1.5195 (1.2312) acc 59.3750 (68.6250) lr 1.9823e-03 eta 5:49:58 +epoch [5/50] batch [30/500] time 0.864 (0.909) data 0.000 (0.021) loss 1.2344 (1.2504) acc 65.6250 (67.8125) lr 1.9823e-03 eta 5:48:05 +epoch [5/50] batch [35/500] time 0.868 (0.905) data 0.000 (0.018) loss 1.2109 (1.2341) acc 71.8750 (68.4821) lr 1.9823e-03 eta 5:46:19 +epoch [5/50] batch [40/500] time 0.907 (0.902) data 0.000 (0.016) loss 1.8584 (1.2479) acc 56.2500 (68.1250) lr 1.9823e-03 eta 5:45:13 +epoch [5/50] batch [45/500] time 0.896 (0.901) data 0.000 (0.014) loss 1.0479 (1.2191) acc 75.0000 (68.8889) lr 1.9823e-03 eta 5:44:33 +epoch [5/50] batch [50/500] time 0.874 (0.899) data 0.000 (0.013) loss 1.3037 (1.2321) acc 59.3750 (68.6875) lr 1.9823e-03 eta 5:43:52 +epoch [5/50] batch [55/500] time 0.895 (0.898) data 0.000 (0.012) loss 1.7461 (1.2422) acc 62.5000 (68.6932) lr 1.9823e-03 eta 5:43:18 +epoch [5/50] batch [60/500] time 0.900 (0.897) data 0.000 (0.011) loss 1.6768 (1.2334) acc 56.2500 (68.9062) lr 1.9823e-03 eta 5:43:04 +epoch [5/50] batch [65/500] time 0.909 (0.898) data 0.000 (0.010) loss 1.4531 (1.2289) acc 62.5000 (69.0865) lr 1.9823e-03 eta 5:43:23 +epoch [5/50] batch [70/500] time 0.919 (0.897) data 0.000 (0.009) loss 1.1123 (1.2297) acc 71.8750 (68.9286) lr 1.9823e-03 eta 5:42:53 +epoch [5/50] batch [75/500] time 0.898 (0.896) data 0.000 (0.009) loss 1.3672 (1.2249) acc 59.3750 (68.9167) lr 1.9823e-03 eta 5:42:32 +epoch [5/50] batch [80/500] time 0.882 (0.897) data 0.000 (0.008) loss 1.3145 (1.2232) acc 65.6250 (69.1797) lr 1.9823e-03 eta 5:42:29 +epoch [5/50] batch [85/500] time 0.894 (0.895) data 0.000 (0.008) loss 0.9839 (1.2207) acc 65.6250 (68.9706) lr 1.9823e-03 eta 5:41:59 +epoch [5/50] batch [90/500] time 0.882 (0.895) data 0.000 (0.007) loss 1.2812 (1.2135) acc 59.3750 (69.0972) lr 1.9823e-03 eta 5:41:35 +epoch [5/50] batch [95/500] time 0.895 (0.894) data 0.000 (0.007) loss 0.7329 (1.2145) acc 81.2500 (69.2105) lr 1.9823e-03 eta 5:41:17 +epoch [5/50] batch [100/500] time 0.872 (0.893) data 0.000 (0.006) loss 1.6768 (1.2178) acc 59.3750 (69.0938) lr 1.9823e-03 eta 5:40:46 +epoch [5/50] batch [105/500] time 0.912 (0.893) data 0.000 (0.006) loss 0.9355 (1.2136) acc 65.6250 (69.0476) lr 1.9823e-03 eta 5:40:40 +epoch [5/50] batch [110/500] time 0.872 (0.893) data 0.001 (0.006) loss 0.9062 (1.2070) acc 81.2500 (69.2330) lr 1.9823e-03 eta 5:40:40 +epoch [5/50] batch [115/500] time 0.897 (0.892) data 0.000 (0.006) loss 1.5137 (1.2065) acc 59.3750 (69.2935) lr 1.9823e-03 eta 5:40:24 +epoch [5/50] batch [120/500] time 0.888 (0.892) data 0.000 (0.005) loss 1.1836 (1.2088) acc 71.8750 (69.4271) lr 1.9823e-03 eta 5:40:08 +epoch [5/50] batch [125/500] time 0.897 (0.892) data 0.000 (0.005) loss 1.3066 (1.2078) acc 68.7500 (69.4250) lr 1.9823e-03 eta 5:40:01 +epoch [5/50] batch [130/500] time 0.872 (0.892) data 0.000 (0.005) loss 1.3174 (1.2157) acc 65.6250 (69.2067) lr 1.9823e-03 eta 5:39:53 +epoch [5/50] batch [135/500] time 0.884 (0.891) data 0.000 (0.005) loss 1.3740 (1.2196) acc 68.7500 (69.1898) lr 1.9823e-03 eta 5:39:34 +epoch [5/50] batch [140/500] time 0.856 (0.891) data 0.000 (0.005) loss 1.5000 (1.2276) acc 62.5000 (69.0179) lr 1.9823e-03 eta 5:39:17 +epoch [5/50] batch [145/500] time 0.875 (0.890) data 0.000 (0.005) loss 0.9907 (1.2275) acc 75.0000 (69.0086) lr 1.9823e-03 eta 5:39:08 +epoch [5/50] batch [150/500] time 0.862 (0.890) data 0.000 (0.004) loss 0.9502 (1.2363) acc 68.7500 (68.8958) lr 1.9823e-03 eta 5:38:57 +epoch [5/50] batch [155/500] time 0.884 (0.890) data 0.000 (0.004) loss 1.9102 (1.2491) acc 62.5000 (68.8105) lr 1.9823e-03 eta 5:38:42 +epoch [5/50] batch [160/500] time 0.891 (0.890) data 0.000 (0.004) loss 1.1992 (1.2497) acc 71.8750 (68.8086) lr 1.9823e-03 eta 5:38:37 +epoch [5/50] batch [165/500] time 0.872 (0.890) data 0.000 (0.004) loss 1.1982 (1.2480) acc 68.7500 (68.8447) lr 1.9823e-03 eta 5:38:32 +epoch [5/50] batch [170/500] time 0.877 (0.889) data 0.000 (0.004) loss 1.2275 (1.2413) acc 56.2500 (68.8787) lr 1.9823e-03 eta 5:38:25 +epoch [5/50] batch [175/500] time 0.894 (0.889) data 0.000 (0.004) loss 1.0029 (1.2347) acc 75.0000 (69.0179) lr 1.9823e-03 eta 5:38:17 +epoch [5/50] batch [180/500] time 0.879 (0.888) data 0.000 (0.004) loss 1.4717 (1.2382) acc 65.6250 (68.9062) lr 1.9823e-03 eta 5:37:53 +epoch [5/50] batch [185/500] time 0.858 (0.888) data 0.000 (0.004) loss 1.5469 (1.2432) acc 65.6250 (68.7331) lr 1.9823e-03 eta 5:37:35 +epoch [5/50] batch [190/500] time 0.886 (0.887) data 0.000 (0.004) loss 0.9600 (1.2463) acc 75.0000 (68.7500) lr 1.9823e-03 eta 5:37:22 +epoch [5/50] batch [195/500] time 0.870 (0.887) data 0.000 (0.003) loss 1.1045 (1.2471) acc 68.7500 (68.7179) lr 1.9823e-03 eta 5:37:01 +epoch [5/50] batch [200/500] time 0.895 (0.887) data 0.000 (0.003) loss 1.0205 (1.2466) acc 75.0000 (68.7656) lr 1.9823e-03 eta 5:36:52 +epoch [5/50] batch [205/500] time 0.878 (0.886) data 0.000 (0.003) loss 0.9526 (1.2439) acc 75.0000 (68.8110) lr 1.9823e-03 eta 5:36:46 +epoch [5/50] batch [210/500] time 0.868 (0.887) data 0.000 (0.003) loss 1.0391 (1.2440) acc 62.5000 (68.7500) lr 1.9823e-03 eta 5:36:49 +epoch [5/50] batch [215/500] time 0.880 (0.887) data 0.000 (0.003) loss 1.7559 (1.2562) acc 56.2500 (68.5465) lr 1.9823e-03 eta 5:36:45 +epoch [5/50] batch [220/500] time 0.881 (0.887) data 0.000 (0.003) loss 1.2871 (1.2597) acc 68.7500 (68.4517) lr 1.9823e-03 eta 5:36:36 +epoch [5/50] batch [225/500] time 0.871 (0.886) data 0.000 (0.003) loss 1.6445 (1.2622) acc 59.3750 (68.4444) lr 1.9823e-03 eta 5:36:27 +epoch [5/50] batch [230/500] time 0.880 (0.886) data 0.000 (0.003) loss 0.9272 (1.2585) acc 75.0000 (68.5870) lr 1.9823e-03 eta 5:36:21 +epoch [5/50] batch [235/500] time 0.896 (0.886) data 0.000 (0.003) loss 0.7852 (1.2552) acc 84.3750 (68.6170) lr 1.9823e-03 eta 5:36:11 +epoch [5/50] batch [240/500] time 0.876 (0.886) data 0.000 (0.003) loss 1.2598 (1.2560) acc 71.8750 (68.5547) lr 1.9823e-03 eta 5:36:09 +epoch [5/50] batch [245/500] time 0.918 (0.886) data 0.000 (0.003) loss 0.9653 (1.2526) acc 71.8750 (68.6607) lr 1.9823e-03 eta 5:36:09 +epoch [5/50] batch [250/500] time 1.004 (0.887) data 0.000 (0.003) loss 1.4990 (1.2527) acc 65.6250 (68.6625) lr 1.9823e-03 eta 5:36:19 +epoch [5/50] batch [255/500] time 0.885 (0.887) data 0.000 (0.003) loss 1.0098 (1.2505) acc 71.8750 (68.7132) lr 1.9823e-03 eta 5:36:19 +epoch [5/50] batch [260/500] time 0.908 (0.887) data 0.000 (0.003) loss 1.5205 (1.2575) acc 62.5000 (68.6178) lr 1.9823e-03 eta 5:36:15 +epoch [5/50] batch [265/500] time 0.888 (0.887) data 0.000 (0.003) loss 1.5723 (1.2546) acc 62.5000 (68.7146) lr 1.9823e-03 eta 5:36:07 +epoch [5/50] batch [270/500] time 0.882 (0.887) data 0.000 (0.003) loss 1.3447 (1.2524) acc 68.7500 (68.7731) lr 1.9823e-03 eta 5:36:04 +epoch [5/50] batch [275/500] time 0.911 (0.887) data 0.000 (0.003) loss 1.3086 (1.2516) acc 56.2500 (68.7386) lr 1.9823e-03 eta 5:36:01 +epoch [5/50] batch [280/500] time 0.865 (0.887) data 0.000 (0.002) loss 0.9360 (1.2524) acc 75.0000 (68.7500) lr 1.9823e-03 eta 5:36:00 +epoch [5/50] batch [285/500] time 0.886 (0.887) data 0.000 (0.002) loss 1.2373 (1.2508) acc 65.6250 (68.7281) lr 1.9823e-03 eta 5:35:57 +epoch [5/50] batch [290/500] time 0.899 (0.887) data 0.000 (0.002) loss 1.0625 (1.2485) acc 78.1250 (68.8362) lr 1.9823e-03 eta 5:35:49 +epoch [5/50] batch [295/500] time 0.897 (0.887) data 0.001 (0.002) loss 1.9014 (1.2504) acc 59.3750 (68.7924) lr 1.9823e-03 eta 5:35:46 +epoch [5/50] batch [300/500] time 0.874 (0.887) data 0.000 (0.002) loss 0.9438 (1.2507) acc 75.0000 (68.7604) lr 1.9823e-03 eta 5:35:42 +epoch [5/50] batch [305/500] time 0.879 (0.887) data 0.000 (0.002) loss 1.4043 (1.2455) acc 65.6250 (68.9037) lr 1.9823e-03 eta 5:35:39 +epoch [5/50] batch [310/500] time 0.895 (0.887) data 0.000 (0.002) loss 0.7578 (1.2432) acc 78.1250 (68.9315) lr 1.9823e-03 eta 5:35:34 +epoch [5/50] batch [315/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.2051 (1.2437) acc 62.5000 (68.9286) lr 1.9823e-03 eta 5:35:32 +epoch [5/50] batch [320/500] time 0.902 (0.888) data 0.000 (0.002) loss 1.5938 (1.2472) acc 71.8750 (68.9160) lr 1.9823e-03 eta 5:35:30 +epoch [5/50] batch [325/500] time 0.909 (0.888) data 0.000 (0.002) loss 0.5601 (1.2430) acc 90.6250 (69.0192) lr 1.9823e-03 eta 5:35:32 +epoch [5/50] batch [330/500] time 0.884 (0.888) data 0.000 (0.002) loss 1.4434 (1.2403) acc 68.7500 (69.0814) lr 1.9823e-03 eta 5:35:30 +epoch [5/50] batch [335/500] time 0.918 (0.888) data 0.000 (0.002) loss 0.8647 (1.2347) acc 78.1250 (69.1884) lr 1.9823e-03 eta 5:35:28 +epoch [5/50] batch [340/500] time 0.893 (0.888) data 0.000 (0.002) loss 2.3398 (1.2375) acc 53.1250 (69.1820) lr 1.9823e-03 eta 5:35:23 +epoch [5/50] batch [345/500] time 0.890 (0.888) data 0.000 (0.002) loss 1.2363 (1.2366) acc 78.1250 (69.2663) lr 1.9823e-03 eta 5:35:22 +epoch [5/50] batch [350/500] time 0.866 (0.888) data 0.000 (0.002) loss 1.9863 (1.2368) acc 53.1250 (69.2500) lr 1.9823e-03 eta 5:35:20 +epoch [5/50] batch [355/500] time 0.852 (0.888) data 0.000 (0.002) loss 1.1836 (1.2374) acc 71.8750 (69.2958) lr 1.9823e-03 eta 5:35:12 +epoch [5/50] batch [360/500] time 0.861 (0.888) data 0.000 (0.002) loss 1.3281 (1.2368) acc 68.7500 (69.3316) lr 1.9823e-03 eta 5:35:04 +epoch [5/50] batch [365/500] time 0.913 (0.888) data 0.000 (0.002) loss 0.3027 (1.2373) acc 96.8750 (69.3921) lr 1.9823e-03 eta 5:35:05 +epoch [5/50] batch [370/500] time 0.864 (0.888) data 0.000 (0.002) loss 1.2490 (1.2367) acc 75.0000 (69.4172) lr 1.9823e-03 eta 5:34:57 +epoch [5/50] batch [375/500] time 0.900 (0.888) data 0.000 (0.002) loss 1.3447 (1.2366) acc 65.6250 (69.4250) lr 1.9823e-03 eta 5:34:55 +epoch [5/50] batch [380/500] time 0.860 (0.888) data 0.000 (0.002) loss 1.3496 (1.2353) acc 65.6250 (69.4737) lr 1.9823e-03 eta 5:34:46 +epoch [5/50] batch [385/500] time 0.868 (0.888) data 0.000 (0.002) loss 1.3945 (1.2343) acc 71.8750 (69.5373) lr 1.9823e-03 eta 5:34:39 +epoch [5/50] batch [390/500] time 0.866 (0.888) data 0.000 (0.002) loss 1.2373 (1.2357) acc 71.8750 (69.5192) lr 1.9823e-03 eta 5:34:33 +epoch [5/50] batch [395/500] time 0.884 (0.888) data 0.000 (0.002) loss 0.8403 (1.2320) acc 71.8750 (69.5649) lr 1.9823e-03 eta 5:34:35 +epoch [5/50] batch [400/500] time 0.886 (0.888) data 0.000 (0.002) loss 1.2129 (1.2290) acc 65.6250 (69.6250) lr 1.9823e-03 eta 5:34:32 +epoch [5/50] batch [405/500] time 0.920 (0.888) data 0.000 (0.002) loss 0.6064 (1.2246) acc 84.3750 (69.6914) lr 1.9823e-03 eta 5:34:28 +epoch [5/50] batch [410/500] time 0.913 (0.888) data 0.000 (0.002) loss 0.7847 (1.2226) acc 71.8750 (69.7256) lr 1.9823e-03 eta 5:34:28 +epoch [5/50] batch [415/500] time 0.874 (0.888) data 0.000 (0.002) loss 1.4717 (1.2227) acc 59.3750 (69.6837) lr 1.9823e-03 eta 5:34:23 +epoch [5/50] batch [420/500] time 0.894 (0.888) data 0.000 (0.002) loss 1.4727 (1.2207) acc 62.5000 (69.6726) lr 1.9823e-03 eta 5:34:18 +epoch [5/50] batch [425/500] time 0.925 (0.888) data 0.000 (0.002) loss 1.3730 (1.2200) acc 59.3750 (69.6985) lr 1.9823e-03 eta 5:34:12 +epoch [5/50] batch [430/500] time 0.906 (0.888) data 0.000 (0.002) loss 1.3965 (1.2184) acc 65.6250 (69.7093) lr 1.9823e-03 eta 5:34:08 +epoch [5/50] batch [435/500] time 0.916 (0.888) data 0.000 (0.002) loss 0.9395 (1.2190) acc 75.0000 (69.6911) lr 1.9823e-03 eta 5:34:05 +epoch [5/50] batch [440/500] time 0.876 (0.888) data 0.000 (0.002) loss 1.3721 (1.2194) acc 68.7500 (69.6733) lr 1.9823e-03 eta 5:34:00 +epoch [5/50] batch [445/500] time 0.938 (0.888) data 0.000 (0.002) loss 1.2900 (1.2193) acc 65.6250 (69.6278) lr 1.9823e-03 eta 5:34:00 +epoch [5/50] batch [450/500] time 0.869 (0.888) data 0.000 (0.002) loss 1.8604 (1.2230) acc 53.1250 (69.5764) lr 1.9823e-03 eta 5:33:55 +epoch [5/50] batch [455/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.4199 (1.2239) acc 62.5000 (69.6016) lr 1.9823e-03 eta 5:33:46 +epoch [5/50] batch [460/500] time 0.866 (0.888) data 0.000 (0.002) loss 1.7900 (1.2259) acc 59.3750 (69.5109) lr 1.9823e-03 eta 5:33:38 +epoch [5/50] batch [465/500] time 0.884 (0.888) data 0.000 (0.002) loss 1.7227 (1.2247) acc 62.5000 (69.5296) lr 1.9823e-03 eta 5:33:35 +epoch [5/50] batch [470/500] time 0.917 (0.888) data 0.000 (0.002) loss 1.3447 (1.2242) acc 71.8750 (69.5612) lr 1.9823e-03 eta 5:33:31 +epoch [5/50] batch [475/500] time 0.890 (0.888) data 0.000 (0.002) loss 1.0723 (1.2234) acc 75.0000 (69.5461) lr 1.9823e-03 eta 5:33:27 +epoch [5/50] batch [480/500] time 0.895 (0.888) data 0.000 (0.002) loss 1.1768 (1.2234) acc 59.3750 (69.5312) lr 1.9823e-03 eta 5:33:21 +epoch [5/50] batch [485/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.9912 (1.2260) acc 40.6250 (69.4330) lr 1.9823e-03 eta 5:33:16 +epoch [5/50] batch [490/500] time 0.865 (0.888) data 0.000 (0.002) loss 1.4824 (1.2257) acc 56.2500 (69.4324) lr 1.9823e-03 eta 5:33:10 +epoch [5/50] batch [495/500] time 0.865 (0.888) data 0.000 (0.001) loss 1.8604 (1.2276) acc 53.1250 (69.4066) lr 1.9823e-03 eta 5:33:07 +epoch [5/50] batch [500/500] time 0.895 (0.888) data 0.000 (0.001) loss 1.2871 (1.2286) acc 68.7500 (69.3625) lr 1.9686e-03 eta 5:33:02 +epoch [6/50] batch [5/500] time 0.898 (1.043) data 0.000 (0.156) loss 1.6416 (1.2877) acc 56.2500 (66.2500) lr 1.9686e-03 eta 6:31:10 +epoch [6/50] batch [10/500] time 0.887 (0.960) data 0.000 (0.078) loss 0.8911 (1.1644) acc 78.1250 (70.0000) lr 1.9686e-03 eta 6:00:00 +epoch [6/50] batch [15/500] time 0.891 (0.938) data 0.000 (0.052) loss 1.5068 (1.2203) acc 65.6250 (70.6250) lr 1.9686e-03 eta 5:51:22 +epoch [6/50] batch [20/500] time 0.907 (0.925) data 0.000 (0.039) loss 1.0537 (1.2047) acc 71.8750 (70.3125) lr 1.9686e-03 eta 5:46:23 +epoch [6/50] batch [25/500] time 0.889 (0.916) data 0.000 (0.031) loss 1.0020 (1.1790) acc 71.8750 (70.5000) lr 1.9686e-03 eta 5:43:11 +epoch [6/50] batch [30/500] time 0.899 (0.912) data 0.000 (0.026) loss 1.4717 (1.1881) acc 68.7500 (70.5208) lr 1.9686e-03 eta 5:41:22 +epoch [6/50] batch [35/500] time 0.893 (0.908) data 0.000 (0.023) loss 1.4785 (1.2218) acc 65.6250 (70.0893) lr 1.9686e-03 eta 5:40:02 +epoch [6/50] batch [40/500] time 0.870 (0.906) data 0.000 (0.020) loss 1.0293 (1.2114) acc 75.0000 (70.1562) lr 1.9686e-03 eta 5:39:07 +epoch [6/50] batch [45/500] time 0.890 (0.903) data 0.000 (0.018) loss 0.6426 (1.1743) acc 84.3750 (71.0417) lr 1.9686e-03 eta 5:37:47 +epoch [6/50] batch [50/500] time 0.876 (0.903) data 0.000 (0.016) loss 1.4414 (1.1963) acc 68.7500 (70.1875) lr 1.9686e-03 eta 5:37:56 +epoch [6/50] batch [55/500] time 0.904 (0.903) data 0.000 (0.014) loss 1.4443 (1.1888) acc 56.2500 (69.9432) lr 1.9686e-03 eta 5:37:51 +epoch [6/50] batch [60/500] time 0.869 (0.903) data 0.000 (0.013) loss 1.4102 (1.1850) acc 53.1250 (69.4792) lr 1.9686e-03 eta 5:37:33 +epoch [6/50] batch [65/500] time 0.918 (0.902) data 0.000 (0.012) loss 0.7910 (1.1854) acc 75.0000 (69.7115) lr 1.9686e-03 eta 5:37:09 +epoch [6/50] batch [70/500] time 0.877 (0.900) data 0.000 (0.011) loss 0.9136 (1.1807) acc 71.8750 (69.5536) lr 1.9686e-03 eta 5:36:16 +epoch [6/50] batch [75/500] time 0.884 (0.899) data 0.000 (0.011) loss 1.0596 (1.1821) acc 68.7500 (69.5000) lr 1.9686e-03 eta 5:35:51 +epoch [6/50] batch [80/500] time 0.874 (0.897) data 0.000 (0.010) loss 1.0293 (1.1869) acc 65.6250 (69.2578) lr 1.9686e-03 eta 5:35:14 +epoch [6/50] batch [85/500] time 0.869 (0.897) data 0.000 (0.009) loss 0.6631 (1.1701) acc 78.1250 (69.4853) lr 1.9686e-03 eta 5:35:00 +epoch [6/50] batch [90/500] time 0.885 (0.896) data 0.000 (0.009) loss 1.5986 (1.1784) acc 59.3750 (69.4444) lr 1.9686e-03 eta 5:34:30 +epoch [6/50] batch [95/500] time 0.867 (0.896) data 0.000 (0.008) loss 1.1748 (1.1850) acc 62.5000 (69.2763) lr 1.9686e-03 eta 5:34:31 +epoch [6/50] batch [100/500] time 0.903 (0.896) data 0.000 (0.008) loss 1.4512 (1.1861) acc 56.2500 (69.1250) lr 1.9686e-03 eta 5:34:26 +epoch [6/50] batch [105/500] time 0.903 (0.896) data 0.000 (0.008) loss 0.8735 (1.1807) acc 78.1250 (69.4345) lr 1.9686e-03 eta 5:34:16 +epoch [6/50] batch [110/500] time 0.873 (0.895) data 0.000 (0.007) loss 0.9585 (1.1819) acc 75.0000 (69.4886) lr 1.9686e-03 eta 5:34:09 +epoch [6/50] batch [115/500] time 0.877 (0.895) data 0.000 (0.007) loss 0.9565 (1.1721) acc 78.1250 (69.6196) lr 1.9686e-03 eta 5:33:47 +epoch [6/50] batch [120/500] time 0.892 (0.894) data 0.000 (0.007) loss 1.2119 (1.1847) acc 68.7500 (69.5312) lr 1.9686e-03 eta 5:33:36 +epoch [6/50] batch [125/500] time 0.878 (0.895) data 0.000 (0.006) loss 1.3896 (1.1898) acc 62.5000 (69.4000) lr 1.9686e-03 eta 5:33:38 +epoch [6/50] batch [130/500] time 0.906 (0.895) data 0.000 (0.006) loss 1.6416 (1.1971) acc 53.1250 (69.2548) lr 1.9686e-03 eta 5:33:33 +epoch [6/50] batch [135/500] time 0.902 (0.895) data 0.000 (0.006) loss 0.8496 (1.1915) acc 75.0000 (69.3981) lr 1.9686e-03 eta 5:33:32 +epoch [6/50] batch [140/500] time 0.891 (0.894) data 0.000 (0.006) loss 1.6094 (1.1992) acc 56.2500 (69.3304) lr 1.9686e-03 eta 5:33:19 +epoch [6/50] batch [145/500] time 0.868 (0.894) data 0.000 (0.006) loss 1.4072 (1.1978) acc 53.1250 (69.3750) lr 1.9686e-03 eta 5:33:03 +epoch [6/50] batch [150/500] time 0.889 (0.894) data 0.000 (0.005) loss 1.5049 (1.2004) acc 59.3750 (69.3125) lr 1.9686e-03 eta 5:32:58 +epoch [6/50] batch [155/500] time 0.880 (0.893) data 0.000 (0.005) loss 2.0176 (1.2075) acc 62.5000 (69.2742) lr 1.9686e-03 eta 5:32:43 +epoch [6/50] batch [160/500] time 0.898 (0.893) data 0.000 (0.005) loss 1.3008 (1.2051) acc 65.6250 (69.4336) lr 1.9686e-03 eta 5:32:29 +epoch [6/50] batch [165/500] time 0.897 (0.893) data 0.000 (0.005) loss 1.0293 (1.1955) acc 71.8750 (69.5833) lr 1.9686e-03 eta 5:32:17 +epoch [6/50] batch [170/500] time 0.894 (0.893) data 0.000 (0.005) loss 1.3252 (1.1978) acc 59.3750 (69.4301) lr 1.9686e-03 eta 5:32:14 +epoch [6/50] batch [175/500] time 0.877 (0.893) data 0.000 (0.005) loss 1.0557 (1.1992) acc 78.1250 (69.4107) lr 1.9686e-03 eta 5:32:15 +epoch [6/50] batch [180/500] time 0.902 (0.893) data 0.000 (0.005) loss 1.7168 (1.2027) acc 62.5000 (69.3056) lr 1.9686e-03 eta 5:32:07 +epoch [6/50] batch [185/500] time 0.907 (0.893) data 0.000 (0.004) loss 1.3701 (1.2098) acc 59.3750 (69.0709) lr 1.9686e-03 eta 5:32:08 +epoch [6/50] batch [190/500] time 0.910 (0.893) data 0.000 (0.004) loss 1.1465 (1.2071) acc 75.0000 (69.1118) lr 1.9686e-03 eta 5:32:08 +epoch [6/50] batch [195/500] time 0.870 (0.894) data 0.000 (0.004) loss 0.9580 (1.2111) acc 75.0000 (69.1667) lr 1.9686e-03 eta 5:32:13 +epoch [6/50] batch [200/500] time 0.888 (0.893) data 0.000 (0.004) loss 1.0859 (1.2091) acc 75.0000 (69.4219) lr 1.9686e-03 eta 5:32:01 +epoch [6/50] batch [205/500] time 0.890 (0.893) data 0.000 (0.004) loss 1.2490 (1.2138) acc 71.8750 (69.2683) lr 1.9686e-03 eta 5:31:56 +epoch [6/50] batch [210/500] time 0.882 (0.893) data 0.000 (0.004) loss 1.1396 (1.2135) acc 71.8750 (69.1815) lr 1.9686e-03 eta 5:31:44 +epoch [6/50] batch [215/500] time 0.865 (0.893) data 0.000 (0.004) loss 1.7520 (1.2164) acc 56.2500 (69.0698) lr 1.9686e-03 eta 5:31:33 +epoch [6/50] batch [220/500] time 0.883 (0.892) data 0.000 (0.004) loss 1.8789 (1.2226) acc 43.7500 (68.9205) lr 1.9686e-03 eta 5:31:24 +epoch [6/50] batch [225/500] time 0.871 (0.892) data 0.000 (0.004) loss 1.6621 (1.2232) acc 59.3750 (68.9167) lr 1.9686e-03 eta 5:31:10 +epoch [6/50] batch [230/500] time 0.896 (0.892) data 0.000 (0.004) loss 1.3896 (1.2258) acc 71.8750 (68.8995) lr 1.9686e-03 eta 5:31:02 +epoch [6/50] batch [235/500] time 1.000 (0.893) data 0.000 (0.004) loss 1.0381 (1.2192) acc 75.0000 (68.9894) lr 1.9686e-03 eta 5:31:12 +epoch [6/50] batch [240/500] time 0.890 (0.892) data 0.000 (0.003) loss 1.8682 (1.2162) acc 56.2500 (69.0365) lr 1.9686e-03 eta 5:30:57 +epoch [6/50] batch [245/500] time 0.899 (0.892) data 0.000 (0.003) loss 1.1201 (1.2170) acc 78.1250 (68.9923) lr 1.9686e-03 eta 5:30:51 +epoch [6/50] batch [250/500] time 0.917 (0.892) data 0.000 (0.003) loss 1.0791 (1.2126) acc 68.7500 (69.0625) lr 1.9686e-03 eta 5:30:46 +epoch [6/50] batch [255/500] time 0.884 (0.892) data 0.000 (0.003) loss 1.1416 (1.2148) acc 68.7500 (69.0931) lr 1.9686e-03 eta 5:30:32 +epoch [6/50] batch [260/500] time 0.913 (0.892) data 0.000 (0.003) loss 1.7715 (1.2144) acc 65.6250 (69.1226) lr 1.9686e-03 eta 5:30:27 +epoch [6/50] batch [265/500] time 0.877 (0.891) data 0.000 (0.003) loss 1.2646 (1.2160) acc 75.0000 (69.0802) lr 1.9686e-03 eta 5:30:22 +epoch [6/50] batch [270/500] time 0.905 (0.891) data 0.000 (0.003) loss 0.4209 (1.2163) acc 90.6250 (69.1667) lr 1.9686e-03 eta 5:30:13 +epoch [6/50] batch [275/500] time 0.874 (0.891) data 0.000 (0.003) loss 1.4062 (1.2137) acc 68.7500 (69.2500) lr 1.9686e-03 eta 5:30:08 +epoch [6/50] batch [280/500] time 0.912 (0.891) data 0.000 (0.003) loss 1.7793 (1.2116) acc 65.6250 (69.3862) lr 1.9686e-03 eta 5:30:07 +epoch [6/50] batch [285/500] time 0.869 (0.892) data 0.000 (0.003) loss 1.0791 (1.2078) acc 81.2500 (69.5066) lr 1.9686e-03 eta 5:30:06 +epoch [6/50] batch [290/500] time 0.894 (0.892) data 0.000 (0.003) loss 0.9165 (1.2089) acc 78.1250 (69.4289) lr 1.9686e-03 eta 5:30:01 +epoch [6/50] batch [295/500] time 0.886 (0.891) data 0.000 (0.003) loss 0.8452 (1.2092) acc 81.2500 (69.3644) lr 1.9686e-03 eta 5:29:53 +epoch [6/50] batch [300/500] time 0.899 (0.891) data 0.000 (0.003) loss 0.9751 (1.2070) acc 71.8750 (69.3750) lr 1.9686e-03 eta 5:29:47 +epoch [6/50] batch [305/500] time 0.873 (0.891) data 0.000 (0.003) loss 0.8115 (1.2074) acc 81.2500 (69.3955) lr 1.9686e-03 eta 5:29:35 +epoch [6/50] batch [310/500] time 0.862 (0.891) data 0.000 (0.003) loss 0.9907 (1.2073) acc 71.8750 (69.3548) lr 1.9686e-03 eta 5:29:30 +epoch [6/50] batch [315/500] time 0.880 (0.891) data 0.000 (0.003) loss 0.5586 (1.2061) acc 81.2500 (69.3452) lr 1.9686e-03 eta 5:29:23 +epoch [6/50] batch [320/500] time 0.915 (0.891) data 0.000 (0.003) loss 1.0107 (1.2077) acc 78.1250 (69.3164) lr 1.9686e-03 eta 5:29:23 +epoch [6/50] batch [325/500] time 0.893 (0.891) data 0.000 (0.003) loss 1.3311 (1.2110) acc 59.3750 (69.2885) lr 1.9686e-03 eta 5:29:20 +epoch [6/50] batch [330/500] time 0.905 (0.891) data 0.000 (0.003) loss 1.2441 (1.2086) acc 71.8750 (69.3561) lr 1.9686e-03 eta 5:29:14 +epoch [6/50] batch [335/500] time 0.884 (0.891) data 0.000 (0.003) loss 1.3213 (1.2092) acc 62.5000 (69.3563) lr 1.9686e-03 eta 5:29:16 +epoch [6/50] batch [340/500] time 0.897 (0.891) data 0.000 (0.003) loss 1.4443 (1.2115) acc 62.5000 (69.3015) lr 1.9686e-03 eta 5:29:10 +epoch [6/50] batch [345/500] time 0.902 (0.891) data 0.000 (0.002) loss 1.3096 (1.2112) acc 65.6250 (69.2844) lr 1.9686e-03 eta 5:29:06 +epoch [6/50] batch [350/500] time 0.869 (0.891) data 0.000 (0.002) loss 1.1250 (1.2096) acc 71.8750 (69.3661) lr 1.9686e-03 eta 5:28:56 +epoch [6/50] batch [355/500] time 0.878 (0.891) data 0.000 (0.002) loss 1.3193 (1.2100) acc 71.8750 (69.3750) lr 1.9686e-03 eta 5:28:51 +epoch [6/50] batch [360/500] time 0.907 (0.891) data 0.000 (0.002) loss 1.4736 (1.2157) acc 71.8750 (69.3403) lr 1.9686e-03 eta 5:28:47 +epoch [6/50] batch [365/500] time 0.885 (0.891) data 0.000 (0.002) loss 1.4473 (1.2176) acc 53.1250 (69.2723) lr 1.9686e-03 eta 5:28:45 +epoch [6/50] batch [370/500] time 0.897 (0.891) data 0.000 (0.002) loss 0.9336 (1.2166) acc 81.2500 (69.2821) lr 1.9686e-03 eta 5:28:38 +epoch [6/50] batch [375/500] time 0.874 (0.891) data 0.000 (0.002) loss 0.9492 (1.2131) acc 78.1250 (69.3333) lr 1.9686e-03 eta 5:28:31 +epoch [6/50] batch [380/500] time 0.884 (0.891) data 0.000 (0.002) loss 0.8218 (1.2132) acc 75.0000 (69.3668) lr 1.9686e-03 eta 5:28:30 +epoch [6/50] batch [385/500] time 0.882 (0.891) data 0.000 (0.002) loss 0.8716 (1.2116) acc 71.8750 (69.3588) lr 1.9686e-03 eta 5:28:23 +epoch [6/50] batch [390/500] time 0.877 (0.891) data 0.000 (0.002) loss 0.5464 (1.2064) acc 87.5000 (69.4551) lr 1.9686e-03 eta 5:28:15 +epoch [6/50] batch [395/500] time 0.858 (0.891) data 0.000 (0.002) loss 0.7686 (1.2043) acc 84.3750 (69.5174) lr 1.9686e-03 eta 5:28:09 +epoch [6/50] batch [400/500] time 0.908 (0.891) data 0.000 (0.002) loss 1.0928 (1.2035) acc 68.7500 (69.5469) lr 1.9686e-03 eta 5:28:04 +epoch [6/50] batch [405/500] time 0.883 (0.890) data 0.000 (0.002) loss 1.3359 (1.2015) acc 71.8750 (69.6219) lr 1.9686e-03 eta 5:27:55 +epoch [6/50] batch [410/500] time 0.887 (0.890) data 0.000 (0.002) loss 1.6641 (1.2033) acc 68.7500 (69.6494) lr 1.9686e-03 eta 5:27:48 +epoch [6/50] batch [415/500] time 0.842 (0.890) data 0.000 (0.002) loss 0.5601 (1.1993) acc 78.1250 (69.7364) lr 1.9686e-03 eta 5:27:37 +epoch [6/50] batch [420/500] time 0.908 (0.890) data 0.000 (0.002) loss 0.8735 (1.2003) acc 68.7500 (69.6726) lr 1.9686e-03 eta 5:27:32 +epoch [6/50] batch [425/500] time 0.893 (0.890) data 0.000 (0.002) loss 1.4775 (1.2027) acc 62.5000 (69.6103) lr 1.9686e-03 eta 5:27:28 +epoch [6/50] batch [430/500] time 0.899 (0.890) data 0.000 (0.002) loss 0.8062 (1.1994) acc 78.1250 (69.6948) lr 1.9686e-03 eta 5:27:25 +epoch [6/50] batch [435/500] time 0.899 (0.890) data 0.000 (0.002) loss 0.9980 (1.2008) acc 81.2500 (69.7126) lr 1.9686e-03 eta 5:27:20 +epoch [6/50] batch [440/500] time 0.870 (0.890) data 0.000 (0.002) loss 0.7129 (1.2026) acc 84.3750 (69.6520) lr 1.9686e-03 eta 5:27:12 +epoch [6/50] batch [445/500] time 0.882 (0.890) data 0.000 (0.002) loss 1.4707 (1.2046) acc 71.8750 (69.5716) lr 1.9686e-03 eta 5:27:08 +epoch [6/50] batch [450/500] time 0.905 (0.890) data 0.000 (0.002) loss 1.3115 (1.2072) acc 62.5000 (69.5625) lr 1.9686e-03 eta 5:27:02 +epoch [6/50] batch [455/500] time 0.904 (0.890) data 0.000 (0.002) loss 0.8901 (1.2061) acc 68.7500 (69.5673) lr 1.9686e-03 eta 5:26:54 +epoch [6/50] batch [460/500] time 0.899 (0.890) data 0.000 (0.002) loss 0.9746 (1.2041) acc 81.2500 (69.6332) lr 1.9686e-03 eta 5:26:48 +epoch [6/50] batch [465/500] time 0.874 (0.890) data 0.000 (0.002) loss 0.8198 (1.2035) acc 71.8750 (69.6237) lr 1.9686e-03 eta 5:26:43 +epoch [6/50] batch [470/500] time 0.867 (0.890) data 0.000 (0.002) loss 0.7080 (1.2048) acc 75.0000 (69.5811) lr 1.9686e-03 eta 5:26:36 +epoch [6/50] batch [475/500] time 0.912 (0.890) data 0.000 (0.002) loss 0.6846 (1.2012) acc 84.3750 (69.6645) lr 1.9686e-03 eta 5:26:32 +epoch [6/50] batch [480/500] time 0.881 (0.890) data 0.000 (0.002) loss 0.9014 (1.2020) acc 71.8750 (69.6484) lr 1.9686e-03 eta 5:26:32 +epoch [6/50] batch [485/500] time 0.908 (0.890) data 0.001 (0.002) loss 0.9219 (1.2019) acc 78.1250 (69.6198) lr 1.9686e-03 eta 5:26:26 +epoch [6/50] batch [490/500] time 0.880 (0.890) data 0.000 (0.002) loss 1.0000 (1.2038) acc 68.7500 (69.5982) lr 1.9686e-03 eta 5:26:20 +epoch [6/50] batch [495/500] time 0.883 (0.890) data 0.000 (0.002) loss 1.5352 (1.2052) acc 68.7500 (69.5960) lr 1.9686e-03 eta 5:26:15 +epoch [6/50] batch [500/500] time 0.865 (0.889) data 0.000 (0.002) loss 1.5225 (1.2040) acc 65.6250 (69.6375) lr 1.9511e-03 eta 5:26:08 +epoch [7/50] batch [5/500] time 0.902 (1.007) data 0.000 (0.120) loss 0.8662 (1.1197) acc 71.8750 (70.6250) lr 1.9511e-03 eta 6:08:59 +epoch [7/50] batch [10/500] time 0.901 (0.945) data 0.000 (0.060) loss 1.2744 (1.1073) acc 68.7500 (70.0000) lr 1.9511e-03 eta 5:46:17 +epoch [7/50] batch [15/500] time 0.873 (0.921) data 0.000 (0.040) loss 1.2666 (1.1168) acc 75.0000 (71.2500) lr 1.9511e-03 eta 5:37:25 +epoch [7/50] batch [20/500] time 0.876 (0.911) data 0.000 (0.030) loss 0.4922 (1.1121) acc 87.5000 (71.2500) lr 1.9511e-03 eta 5:33:44 +epoch [7/50] batch [25/500] time 0.875 (0.907) data 0.000 (0.024) loss 1.5234 (1.1184) acc 65.6250 (71.8750) lr 1.9511e-03 eta 5:32:02 +epoch [7/50] batch [30/500] time 0.897 (0.907) data 0.000 (0.020) loss 1.3867 (1.1416) acc 75.0000 (70.8333) lr 1.9511e-03 eta 5:32:06 +epoch [7/50] batch [35/500] time 0.930 (0.904) data 0.000 (0.017) loss 0.6924 (1.1620) acc 78.1250 (70.6250) lr 1.9511e-03 eta 5:31:02 +epoch [7/50] batch [40/500] time 0.884 (0.902) data 0.000 (0.015) loss 1.8252 (1.1610) acc 59.3750 (71.0938) lr 1.9511e-03 eta 5:29:57 +epoch [7/50] batch [45/500] time 0.879 (0.901) data 0.000 (0.014) loss 0.9810 (1.1814) acc 59.3750 (70.6944) lr 1.9511e-03 eta 5:29:45 +epoch [7/50] batch [50/500] time 0.875 (0.900) data 0.000 (0.012) loss 0.7114 (1.1721) acc 90.6250 (71.0000) lr 1.9511e-03 eta 5:29:09 +epoch [7/50] batch [55/500] time 0.896 (0.899) data 0.000 (0.011) loss 1.7295 (1.1848) acc 62.5000 (70.7386) lr 1.9511e-03 eta 5:28:53 +epoch [7/50] batch [60/500] time 0.872 (0.898) data 0.000 (0.010) loss 1.3779 (1.1969) acc 59.3750 (70.4688) lr 1.9511e-03 eta 5:28:14 +epoch [7/50] batch [65/500] time 0.889 (0.896) data 0.000 (0.009) loss 1.4062 (1.1885) acc 68.7500 (70.4808) lr 1.9511e-03 eta 5:27:41 +epoch [7/50] batch [70/500] time 0.865 (0.894) data 0.000 (0.009) loss 1.0352 (1.1917) acc 68.7500 (70.3571) lr 1.9511e-03 eta 5:26:53 +epoch [7/50] batch [75/500] time 0.891 (0.895) data 0.000 (0.008) loss 1.2285 (1.1917) acc 59.3750 (70.2500) lr 1.9511e-03 eta 5:27:10 +epoch [7/50] batch [80/500] time 0.862 (0.894) data 0.000 (0.008) loss 1.1055 (1.2016) acc 62.5000 (69.7656) lr 1.9511e-03 eta 5:26:37 +epoch [7/50] batch [85/500] time 0.852 (0.893) data 0.000 (0.007) loss 1.2666 (1.1902) acc 78.1250 (70.1471) lr 1.9511e-03 eta 5:26:01 +epoch [7/50] batch [90/500] time 0.895 (0.892) data 0.000 (0.007) loss 1.6045 (1.2021) acc 65.6250 (69.8264) lr 1.9511e-03 eta 5:25:42 +epoch [7/50] batch [95/500] time 0.871 (0.892) data 0.000 (0.007) loss 0.9131 (1.2011) acc 71.8750 (69.8355) lr 1.9511e-03 eta 5:25:31 +epoch [7/50] batch [100/500] time 0.888 (0.891) data 0.000 (0.006) loss 1.0850 (1.2054) acc 71.8750 (70.0000) lr 1.9511e-03 eta 5:25:18 +epoch [7/50] batch [105/500] time 0.885 (0.891) data 0.000 (0.006) loss 0.9800 (1.2104) acc 71.8750 (69.9702) lr 1.9511e-03 eta 5:25:04 +epoch [7/50] batch [110/500] time 0.903 (0.891) data 0.000 (0.006) loss 0.9243 (1.2009) acc 68.7500 (70.1420) lr 1.9511e-03 eta 5:24:57 +epoch [7/50] batch [115/500] time 0.861 (0.890) data 0.000 (0.005) loss 1.0371 (1.2085) acc 68.7500 (69.8370) lr 1.9511e-03 eta 5:24:27 +epoch [7/50] batch [120/500] time 0.881 (0.889) data 0.000 (0.005) loss 0.7041 (1.2048) acc 84.3750 (69.9740) lr 1.9511e-03 eta 5:24:16 +epoch [7/50] batch [125/500] time 0.920 (0.890) data 0.000 (0.005) loss 1.8408 (1.2137) acc 50.0000 (69.7250) lr 1.9511e-03 eta 5:24:19 +epoch [7/50] batch [130/500] time 0.909 (0.889) data 0.000 (0.005) loss 1.3467 (1.2046) acc 56.2500 (69.8077) lr 1.9511e-03 eta 5:24:10 +epoch [7/50] batch [135/500] time 0.893 (0.890) data 0.000 (0.005) loss 1.1602 (1.2003) acc 68.7500 (69.8611) lr 1.9511e-03 eta 5:24:10 +epoch [7/50] batch [140/500] time 0.910 (0.890) data 0.000 (0.005) loss 0.7666 (1.1986) acc 78.1250 (69.9777) lr 1.9511e-03 eta 5:24:10 +epoch [7/50] batch [145/500] time 0.895 (0.890) data 0.000 (0.004) loss 1.2715 (1.1925) acc 71.8750 (70.0647) lr 1.9511e-03 eta 5:24:08 +epoch [7/50] batch [150/500] time 0.911 (0.890) data 0.000 (0.004) loss 1.1367 (1.1918) acc 62.5000 (70.0208) lr 1.9511e-03 eta 5:24:12 +epoch [7/50] batch [155/500] time 0.904 (0.891) data 0.000 (0.004) loss 0.7593 (1.1916) acc 78.1250 (70.0806) lr 1.9511e-03 eta 5:24:13 +epoch [7/50] batch [160/500] time 0.864 (0.890) data 0.000 (0.004) loss 1.1211 (1.1879) acc 78.1250 (70.0781) lr 1.9511e-03 eta 5:24:04 +epoch [7/50] batch [165/500] time 0.900 (0.890) data 0.000 (0.004) loss 0.8843 (1.1886) acc 81.2500 (70.2083) lr 1.9511e-03 eta 5:23:52 +epoch [7/50] batch [170/500] time 0.852 (0.889) data 0.000 (0.004) loss 1.6738 (1.1871) acc 62.5000 (70.1838) lr 1.9511e-03 eta 5:23:35 +epoch [7/50] batch [175/500] time 0.887 (0.890) data 0.000 (0.004) loss 1.6777 (1.1926) acc 65.6250 (70.0714) lr 1.9511e-03 eta 5:23:40 +epoch [7/50] batch [180/500] time 0.911 (0.890) data 0.000 (0.004) loss 1.6621 (1.1991) acc 62.5000 (69.9826) lr 1.9511e-03 eta 5:23:34 +epoch [7/50] batch [185/500] time 0.913 (0.890) data 0.000 (0.003) loss 0.5483 (1.1963) acc 87.5000 (70.0507) lr 1.9511e-03 eta 5:23:32 +epoch [7/50] batch [190/500] time 0.878 (0.890) data 0.000 (0.003) loss 1.4033 (1.1955) acc 68.7500 (69.9836) lr 1.9511e-03 eta 5:23:21 +epoch [7/50] batch [195/500] time 0.853 (0.889) data 0.000 (0.003) loss 1.3281 (1.1945) acc 62.5000 (69.9199) lr 1.9511e-03 eta 5:23:05 +epoch [7/50] batch [200/500] time 0.927 (0.889) data 0.000 (0.003) loss 1.4180 (1.1925) acc 62.5000 (69.9531) lr 1.9511e-03 eta 5:23:02 +epoch [7/50] batch [205/500] time 0.883 (0.889) data 0.001 (0.003) loss 1.0459 (1.1970) acc 68.7500 (69.8323) lr 1.9511e-03 eta 5:22:58 +epoch [7/50] batch [210/500] time 0.877 (0.889) data 0.000 (0.003) loss 0.5986 (1.1865) acc 81.2500 (70.0149) lr 1.9511e-03 eta 5:22:51 +epoch [7/50] batch [215/500] time 0.863 (0.889) data 0.000 (0.003) loss 0.8350 (1.1839) acc 75.0000 (70.0436) lr 1.9511e-03 eta 5:22:37 +epoch [7/50] batch [220/500] time 0.885 (0.889) data 0.000 (0.003) loss 1.4072 (1.1792) acc 68.7500 (70.0994) lr 1.9511e-03 eta 5:22:41 +epoch [7/50] batch [225/500] time 0.918 (0.889) data 0.000 (0.003) loss 1.0029 (1.1873) acc 71.8750 (69.9306) lr 1.9511e-03 eta 5:22:43 +epoch [7/50] batch [230/500] time 0.878 (0.889) data 0.000 (0.003) loss 0.8530 (1.1834) acc 78.1250 (70.0543) lr 1.9511e-03 eta 5:22:35 +epoch [7/50] batch [235/500] time 0.884 (0.889) data 0.000 (0.003) loss 1.2051 (1.1867) acc 59.3750 (69.9734) lr 1.9511e-03 eta 5:22:27 +epoch [7/50] batch [240/500] time 0.877 (0.889) data 0.000 (0.003) loss 1.5879 (1.1902) acc 53.1250 (69.8438) lr 1.9511e-03 eta 5:22:21 +epoch [7/50] batch [245/500] time 0.868 (0.889) data 0.000 (0.003) loss 1.3896 (1.1922) acc 65.6250 (69.8087) lr 1.9511e-03 eta 5:22:17 +epoch [7/50] batch [250/500] time 0.876 (0.889) data 0.000 (0.003) loss 0.7964 (1.1892) acc 81.2500 (69.8000) lr 1.9511e-03 eta 5:22:09 +epoch [7/50] batch [255/500] time 0.928 (0.889) data 0.000 (0.003) loss 1.0459 (1.1896) acc 71.8750 (69.7672) lr 1.9511e-03 eta 5:22:12 +epoch [7/50] batch [260/500] time 0.885 (0.889) data 0.000 (0.003) loss 1.5068 (1.1876) acc 62.5000 (69.8317) lr 1.9511e-03 eta 5:22:03 +epoch [7/50] batch [265/500] time 0.888 (0.889) data 0.000 (0.002) loss 0.9272 (1.1886) acc 84.3750 (69.8703) lr 1.9511e-03 eta 5:21:57 +epoch [7/50] batch [270/500] time 0.887 (0.889) data 0.000 (0.002) loss 1.3457 (1.1845) acc 65.6250 (70.0347) lr 1.9511e-03 eta 5:21:53 +epoch [7/50] batch [275/500] time 0.918 (0.889) data 0.000 (0.002) loss 0.9434 (1.1877) acc 71.8750 (69.9659) lr 1.9511e-03 eta 5:21:55 +epoch [7/50] batch [280/500] time 0.860 (0.889) data 0.000 (0.002) loss 1.4512 (1.1882) acc 62.5000 (69.9665) lr 1.9511e-03 eta 5:21:47 +epoch [7/50] batch [285/500] time 0.896 (0.889) data 0.000 (0.002) loss 1.2285 (1.1893) acc 65.6250 (69.9890) lr 1.9511e-03 eta 5:21:44 +epoch [7/50] batch [290/500] time 0.862 (0.889) data 0.000 (0.002) loss 1.5557 (1.1924) acc 62.5000 (69.9246) lr 1.9511e-03 eta 5:21:39 +epoch [7/50] batch [295/500] time 0.847 (0.889) data 0.000 (0.002) loss 0.6182 (1.1901) acc 78.1250 (70.0212) lr 1.9511e-03 eta 5:21:28 +epoch [7/50] batch [300/500] time 0.905 (0.889) data 0.000 (0.002) loss 1.0146 (1.1894) acc 78.1250 (70.0521) lr 1.9511e-03 eta 5:21:20 +epoch [7/50] batch [305/500] time 0.884 (0.888) data 0.000 (0.002) loss 1.3252 (1.1933) acc 68.7500 (69.9693) lr 1.9511e-03 eta 5:21:13 +epoch [7/50] batch [310/500] time 0.854 (0.888) data 0.000 (0.002) loss 1.3457 (1.1957) acc 68.7500 (69.9294) lr 1.9511e-03 eta 5:21:04 +epoch [7/50] batch [315/500] time 0.972 (0.888) data 0.000 (0.002) loss 1.2617 (1.1952) acc 68.7500 (69.9504) lr 1.9511e-03 eta 5:21:03 +epoch [7/50] batch [320/500] time 0.906 (0.889) data 0.000 (0.002) loss 1.2334 (1.1964) acc 75.0000 (69.9707) lr 1.9511e-03 eta 5:21:03 +epoch [7/50] batch [325/500] time 0.907 (0.889) data 0.000 (0.002) loss 1.3535 (1.1985) acc 71.8750 (69.9519) lr 1.9511e-03 eta 5:20:59 +epoch [7/50] batch [330/500] time 0.894 (0.889) data 0.000 (0.002) loss 0.9287 (1.1948) acc 78.1250 (70.0568) lr 1.9511e-03 eta 5:20:58 +epoch [7/50] batch [335/500] time 0.882 (0.889) data 0.000 (0.002) loss 1.2148 (1.1928) acc 62.5000 (70.0746) lr 1.9511e-03 eta 5:20:52 +epoch [7/50] batch [340/500] time 0.908 (0.888) data 0.000 (0.002) loss 1.0186 (1.1902) acc 68.7500 (70.1379) lr 1.9511e-03 eta 5:20:44 +epoch [7/50] batch [345/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.2617 (1.1899) acc 75.0000 (70.2355) lr 1.9511e-03 eta 5:20:34 +epoch [7/50] batch [350/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.3281 (1.1882) acc 68.7500 (70.3125) lr 1.9511e-03 eta 5:20:32 +epoch [7/50] batch [355/500] time 0.876 (0.888) data 0.000 (0.002) loss 0.9521 (1.1843) acc 75.0000 (70.3873) lr 1.9511e-03 eta 5:20:27 +epoch [7/50] batch [360/500] time 0.880 (0.888) data 0.000 (0.002) loss 0.7935 (1.1812) acc 78.1250 (70.4688) lr 1.9511e-03 eta 5:20:24 +epoch [7/50] batch [365/500] time 0.886 (0.888) data 0.000 (0.002) loss 1.5498 (1.1824) acc 56.2500 (70.4195) lr 1.9511e-03 eta 5:20:18 +epoch [7/50] batch [370/500] time 0.896 (0.888) data 0.000 (0.002) loss 0.8477 (1.1847) acc 81.2500 (70.4223) lr 1.9511e-03 eta 5:20:13 +epoch [7/50] batch [375/500] time 0.902 (0.888) data 0.000 (0.002) loss 1.3291 (1.1855) acc 68.7500 (70.3833) lr 1.9511e-03 eta 5:20:05 +epoch [7/50] batch [380/500] time 0.915 (0.888) data 0.000 (0.002) loss 1.0049 (1.1830) acc 75.0000 (70.4112) lr 1.9511e-03 eta 5:20:01 +epoch [7/50] batch [385/500] time 0.903 (0.888) data 0.000 (0.002) loss 1.2197 (1.1836) acc 65.6250 (70.3490) lr 1.9511e-03 eta 5:19:54 +epoch [7/50] batch [390/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.3584 (1.1860) acc 71.8750 (70.2804) lr 1.9511e-03 eta 5:19:51 +epoch [7/50] batch [395/500] time 0.898 (0.888) data 0.000 (0.002) loss 1.5205 (1.1860) acc 62.5000 (70.2373) lr 1.9511e-03 eta 5:19:47 +epoch [7/50] batch [400/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.4023 (1.1875) acc 71.8750 (70.2031) lr 1.9511e-03 eta 5:19:43 +epoch [7/50] batch [405/500] time 0.884 (0.888) data 0.000 (0.002) loss 1.3418 (1.1852) acc 68.7500 (70.2701) lr 1.9511e-03 eta 5:19:38 +epoch [7/50] batch [410/500] time 0.897 (0.888) data 0.000 (0.002) loss 1.2695 (1.1839) acc 71.8750 (70.2896) lr 1.9511e-03 eta 5:19:32 +epoch [7/50] batch [415/500] time 0.891 (0.888) data 0.000 (0.002) loss 1.7910 (1.1868) acc 56.2500 (70.2259) lr 1.9511e-03 eta 5:19:28 +epoch [7/50] batch [420/500] time 0.887 (0.888) data 0.000 (0.002) loss 2.4297 (1.1903) acc 56.2500 (70.2083) lr 1.9511e-03 eta 5:19:23 +epoch [7/50] batch [425/500] time 0.851 (0.888) data 0.000 (0.002) loss 1.1494 (1.1888) acc 71.8750 (70.2206) lr 1.9511e-03 eta 5:19:14 +epoch [7/50] batch [430/500] time 0.860 (0.888) data 0.000 (0.002) loss 0.8833 (1.1879) acc 68.7500 (70.2180) lr 1.9511e-03 eta 5:19:08 +epoch [7/50] batch [435/500] time 0.907 (0.888) data 0.000 (0.002) loss 1.7148 (1.1881) acc 56.2500 (70.2155) lr 1.9511e-03 eta 5:19:05 +epoch [7/50] batch [440/500] time 0.894 (0.888) data 0.000 (0.002) loss 0.5859 (1.1861) acc 87.5000 (70.2557) lr 1.9511e-03 eta 5:18:56 +epoch [7/50] batch [445/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.8447 (1.1879) acc 56.2500 (70.2037) lr 1.9511e-03 eta 5:18:53 +epoch [7/50] batch [450/500] time 0.898 (0.888) data 0.000 (0.002) loss 1.0225 (1.1867) acc 71.8750 (70.2292) lr 1.9511e-03 eta 5:18:48 +epoch [7/50] batch [455/500] time 0.874 (0.888) data 0.000 (0.002) loss 1.0176 (1.1876) acc 71.8750 (70.2335) lr 1.9511e-03 eta 5:18:41 +epoch [7/50] batch [460/500] time 0.879 (0.888) data 0.000 (0.002) loss 1.4902 (1.1905) acc 71.8750 (70.1970) lr 1.9511e-03 eta 5:18:40 +epoch [7/50] batch [465/500] time 0.903 (0.888) data 0.000 (0.002) loss 1.0088 (1.1883) acc 78.1250 (70.2487) lr 1.9511e-03 eta 5:18:38 +epoch [7/50] batch [470/500] time 0.897 (0.888) data 0.000 (0.002) loss 1.3408 (1.1869) acc 65.6250 (70.3125) lr 1.9511e-03 eta 5:18:33 +epoch [7/50] batch [475/500] time 0.864 (0.888) data 0.000 (0.001) loss 0.6133 (1.1862) acc 84.3750 (70.3487) lr 1.9511e-03 eta 5:18:27 +epoch [7/50] batch [480/500] time 0.862 (0.888) data 0.000 (0.001) loss 1.1777 (1.1854) acc 68.7500 (70.3516) lr 1.9511e-03 eta 5:18:21 +epoch [7/50] batch [485/500] time 0.885 (0.888) data 0.001 (0.001) loss 1.6494 (1.1851) acc 65.6250 (70.3802) lr 1.9511e-03 eta 5:18:18 +epoch [7/50] batch [490/500] time 0.882 (0.888) data 0.000 (0.001) loss 1.5186 (1.1847) acc 68.7500 (70.4018) lr 1.9511e-03 eta 5:18:12 +epoch [7/50] batch [495/500] time 0.882 (0.888) data 0.000 (0.001) loss 1.1504 (1.1846) acc 68.7500 (70.3851) lr 1.9511e-03 eta 5:18:07 +epoch [7/50] batch [500/500] time 0.887 (0.888) data 0.000 (0.001) loss 1.0654 (1.1887) acc 71.8750 (70.3438) lr 1.9298e-03 eta 5:18:05 +epoch [8/50] batch [5/500] time 0.893 (1.055) data 0.000 (0.145) loss 0.9326 (1.1712) acc 75.0000 (66.8750) lr 1.9298e-03 eta 6:17:55 +epoch [8/50] batch [10/500] time 0.886 (0.971) data 0.000 (0.073) loss 1.8252 (1.3521) acc 56.2500 (65.0000) lr 1.9298e-03 eta 5:47:51 +epoch [8/50] batch [15/500] time 0.859 (0.947) data 0.000 (0.048) loss 1.1963 (1.3064) acc 68.7500 (66.0417) lr 1.9298e-03 eta 5:38:56 +epoch [8/50] batch [20/500] time 0.866 (0.931) data 0.000 (0.036) loss 0.6709 (1.2388) acc 71.8750 (67.5000) lr 1.9298e-03 eta 5:33:14 +epoch [8/50] batch [25/500] time 0.866 (0.919) data 0.000 (0.029) loss 1.5156 (1.1794) acc 68.7500 (68.8750) lr 1.9298e-03 eta 5:28:51 +epoch [8/50] batch [30/500] time 0.907 (0.914) data 0.000 (0.024) loss 1.1367 (1.2093) acc 75.0000 (68.4375) lr 1.9298e-03 eta 5:27:08 +epoch [8/50] batch [35/500] time 0.896 (0.911) data 0.000 (0.021) loss 0.5518 (1.1737) acc 81.2500 (69.7321) lr 1.9298e-03 eta 5:25:55 +epoch [8/50] batch [40/500] time 0.884 (0.908) data 0.000 (0.018) loss 0.9380 (1.1431) acc 81.2500 (70.7812) lr 1.9298e-03 eta 5:24:49 +epoch [8/50] batch [45/500] time 0.857 (0.906) data 0.000 (0.016) loss 0.7212 (1.1472) acc 71.8750 (70.6944) lr 1.9298e-03 eta 5:23:50 +epoch [8/50] batch [50/500] time 0.893 (0.906) data 0.000 (0.015) loss 1.2100 (1.1562) acc 62.5000 (70.1250) lr 1.9298e-03 eta 5:23:51 +epoch [8/50] batch [55/500] time 0.905 (0.905) data 0.000 (0.013) loss 1.2881 (1.1574) acc 65.6250 (69.8295) lr 1.9298e-03 eta 5:23:20 +epoch [8/50] batch [60/500] time 0.873 (0.903) data 0.000 (0.012) loss 1.2070 (1.1761) acc 65.6250 (69.4271) lr 1.9298e-03 eta 5:22:38 +epoch [8/50] batch [65/500] time 0.908 (0.902) data 0.000 (0.011) loss 0.5640 (1.1686) acc 81.2500 (69.8558) lr 1.9298e-03 eta 5:22:11 +epoch [8/50] batch [70/500] time 0.894 (0.901) data 0.000 (0.011) loss 1.6484 (1.1607) acc 62.5000 (70.0000) lr 1.9298e-03 eta 5:21:51 +epoch [8/50] batch [75/500] time 0.879 (0.901) data 0.000 (0.010) loss 1.2197 (1.1494) acc 68.7500 (70.2500) lr 1.9298e-03 eta 5:21:36 +epoch [8/50] batch [80/500] time 0.856 (0.899) data 0.000 (0.009) loss 1.2354 (1.1467) acc 75.0000 (70.6250) lr 1.9298e-03 eta 5:21:04 +epoch [8/50] batch [85/500] time 0.862 (0.898) data 0.000 (0.009) loss 1.3105 (1.1405) acc 50.0000 (70.3676) lr 1.9298e-03 eta 5:20:38 +epoch [8/50] batch [90/500] time 0.888 (0.897) data 0.000 (0.008) loss 0.7256 (1.1407) acc 81.2500 (70.5208) lr 1.9298e-03 eta 5:20:10 +epoch [8/50] batch [95/500] time 0.883 (0.896) data 0.000 (0.008) loss 1.2744 (1.1506) acc 84.3750 (70.5592) lr 1.9298e-03 eta 5:19:44 +epoch [8/50] batch [100/500] time 0.925 (0.897) data 0.000 (0.007) loss 1.3486 (1.1520) acc 75.0000 (70.6250) lr 1.9298e-03 eta 5:19:51 +epoch [8/50] batch [105/500] time 0.913 (0.896) data 0.001 (0.007) loss 0.9434 (1.1548) acc 75.0000 (70.7738) lr 1.9298e-03 eta 5:19:37 +epoch [8/50] batch [110/500] time 0.883 (0.895) data 0.000 (0.007) loss 0.9092 (1.1543) acc 78.1250 (70.7670) lr 1.9298e-03 eta 5:19:06 +epoch [8/50] batch [115/500] time 0.877 (0.894) data 0.000 (0.007) loss 1.8096 (1.1600) acc 62.5000 (70.7880) lr 1.9298e-03 eta 5:18:39 +epoch [8/50] batch [120/500] time 0.867 (0.894) data 0.000 (0.006) loss 0.9355 (1.1597) acc 75.0000 (70.9896) lr 1.9298e-03 eta 5:18:26 +epoch [8/50] batch [125/500] time 0.874 (0.893) data 0.000 (0.006) loss 1.2793 (1.1619) acc 68.7500 (71.1500) lr 1.9298e-03 eta 5:18:13 +epoch [8/50] batch [130/500] time 0.896 (0.893) data 0.000 (0.006) loss 1.0322 (1.1631) acc 71.8750 (71.1298) lr 1.9298e-03 eta 5:18:03 +epoch [8/50] batch [135/500] time 0.912 (0.893) data 0.000 (0.006) loss 1.0625 (1.1601) acc 78.1250 (71.2269) lr 1.9298e-03 eta 5:18:00 +epoch [8/50] batch [140/500] time 0.903 (0.893) data 0.001 (0.005) loss 1.0479 (1.1573) acc 75.0000 (71.3170) lr 1.9298e-03 eta 5:18:02 +epoch [8/50] batch [145/500] time 0.999 (0.894) data 0.000 (0.005) loss 0.6924 (1.1518) acc 81.2500 (71.2716) lr 1.9298e-03 eta 5:18:12 +epoch [8/50] batch [150/500] time 0.902 (0.894) data 0.000 (0.005) loss 0.9712 (1.1594) acc 71.8750 (71.1875) lr 1.9298e-03 eta 5:18:11 +epoch [8/50] batch [155/500] time 0.900 (0.894) data 0.000 (0.005) loss 1.0957 (1.1634) acc 75.0000 (71.0484) lr 1.9298e-03 eta 5:18:01 +epoch [8/50] batch [160/500] time 0.903 (0.894) data 0.000 (0.005) loss 0.8862 (1.1641) acc 84.3750 (71.0938) lr 1.9298e-03 eta 5:17:51 +epoch [8/50] batch [165/500] time 0.894 (0.894) data 0.000 (0.005) loss 1.4990 (1.1667) acc 53.1250 (71.1174) lr 1.9298e-03 eta 5:17:43 +epoch [8/50] batch [170/500] time 0.865 (0.893) data 0.000 (0.004) loss 1.1338 (1.1680) acc 68.7500 (71.0846) lr 1.9298e-03 eta 5:17:31 +epoch [8/50] batch [175/500] time 0.891 (0.893) data 0.000 (0.004) loss 1.1113 (1.1672) acc 65.6250 (70.9821) lr 1.9298e-03 eta 5:17:21 +epoch [8/50] batch [180/500] time 0.911 (0.893) data 0.000 (0.004) loss 1.4648 (1.1653) acc 59.3750 (70.9549) lr 1.9298e-03 eta 5:17:15 +epoch [8/50] batch [185/500] time 0.913 (0.893) data 0.000 (0.004) loss 1.7705 (1.1708) acc 65.6250 (70.8446) lr 1.9298e-03 eta 5:17:17 +epoch [8/50] batch [190/500] time 0.885 (0.894) data 0.000 (0.004) loss 0.8643 (1.1621) acc 78.1250 (71.0362) lr 1.9298e-03 eta 5:17:22 +epoch [8/50] batch [195/500] time 0.884 (0.893) data 0.000 (0.004) loss 0.7754 (1.1619) acc 71.8750 (70.9776) lr 1.9298e-03 eta 5:17:15 +epoch [8/50] batch [200/500] time 0.893 (0.893) data 0.000 (0.004) loss 1.4873 (1.1651) acc 65.6250 (70.9375) lr 1.9298e-03 eta 5:17:07 +epoch [8/50] batch [205/500] time 0.866 (0.893) data 0.000 (0.004) loss 1.1279 (1.1694) acc 71.8750 (70.7470) lr 1.9298e-03 eta 5:16:53 +epoch [8/50] batch [210/500] time 0.908 (0.893) data 0.000 (0.004) loss 1.7432 (1.1703) acc 68.7500 (70.6399) lr 1.9298e-03 eta 5:16:51 +epoch [8/50] batch [215/500] time 0.878 (0.893) data 0.000 (0.004) loss 0.6157 (1.1702) acc 87.5000 (70.7122) lr 1.9298e-03 eta 5:16:46 +epoch [8/50] batch [220/500] time 0.937 (0.893) data 0.000 (0.004) loss 1.4365 (1.1684) acc 62.5000 (70.6960) lr 1.9298e-03 eta 5:16:43 +epoch [8/50] batch [225/500] time 0.888 (0.893) data 0.000 (0.003) loss 0.6089 (1.1643) acc 75.0000 (70.8194) lr 1.9298e-03 eta 5:16:28 +epoch [8/50] batch [230/500] time 0.866 (0.892) data 0.001 (0.003) loss 1.9873 (1.1690) acc 68.7500 (70.7745) lr 1.9298e-03 eta 5:16:22 +epoch [8/50] batch [235/500] time 0.898 (0.892) data 0.001 (0.003) loss 1.5654 (1.1712) acc 59.3750 (70.7181) lr 1.9298e-03 eta 5:16:08 +epoch [8/50] batch [240/500] time 0.873 (0.892) data 0.000 (0.003) loss 0.8750 (1.1706) acc 81.2500 (70.7292) lr 1.9298e-03 eta 5:16:03 +epoch [8/50] batch [245/500] time 0.872 (0.892) data 0.000 (0.003) loss 0.5166 (1.1700) acc 87.5000 (70.7781) lr 1.9298e-03 eta 5:15:54 +epoch [8/50] batch [250/500] time 0.922 (0.892) data 0.000 (0.003) loss 1.2119 (1.1735) acc 53.1250 (70.6875) lr 1.9298e-03 eta 5:15:47 +epoch [8/50] batch [255/500] time 0.868 (0.891) data 0.000 (0.003) loss 1.1904 (1.1756) acc 71.8750 (70.6495) lr 1.9298e-03 eta 5:15:39 +epoch [8/50] batch [260/500] time 0.893 (0.891) data 0.000 (0.003) loss 1.5430 (1.1745) acc 68.7500 (70.6731) lr 1.9298e-03 eta 5:15:32 +epoch [8/50] batch [265/500] time 0.892 (0.891) data 0.001 (0.003) loss 0.5264 (1.1719) acc 87.5000 (70.7311) lr 1.9298e-03 eta 5:15:26 +epoch [8/50] batch [270/500] time 0.882 (0.891) data 0.000 (0.003) loss 1.6240 (1.1734) acc 68.7500 (70.6481) lr 1.9298e-03 eta 5:15:23 +epoch [8/50] batch [275/500] time 0.876 (0.891) data 0.000 (0.003) loss 0.8525 (1.1720) acc 75.0000 (70.7159) lr 1.9298e-03 eta 5:15:15 +epoch [8/50] batch [280/500] time 0.886 (0.891) data 0.000 (0.003) loss 1.1621 (1.1725) acc 75.0000 (70.7589) lr 1.9298e-03 eta 5:15:12 +epoch [8/50] batch [285/500] time 0.884 (0.891) data 0.000 (0.003) loss 1.2334 (1.1745) acc 71.8750 (70.7566) lr 1.9298e-03 eta 5:15:10 +epoch [8/50] batch [290/500] time 0.904 (0.892) data 0.000 (0.003) loss 2.1777 (1.1753) acc 56.2500 (70.7651) lr 1.9298e-03 eta 5:15:19 +epoch [8/50] batch [295/500] time 0.884 (0.892) data 0.000 (0.003) loss 1.1045 (1.1746) acc 68.7500 (70.7945) lr 1.9298e-03 eta 5:15:15 +epoch [8/50] batch [300/500] time 0.920 (0.892) data 0.000 (0.003) loss 1.0859 (1.1744) acc 71.8750 (70.7500) lr 1.9298e-03 eta 5:15:11 +epoch [8/50] batch [305/500] time 0.915 (0.892) data 0.000 (0.003) loss 0.9336 (1.1709) acc 68.7500 (70.8094) lr 1.9298e-03 eta 5:15:03 +epoch [8/50] batch [310/500] time 0.873 (0.892) data 0.000 (0.003) loss 1.4746 (1.1697) acc 56.2500 (70.8770) lr 1.9298e-03 eta 5:14:55 +epoch [8/50] batch [315/500] time 0.884 (0.892) data 0.000 (0.003) loss 1.6299 (1.1721) acc 62.5000 (70.8234) lr 1.9298e-03 eta 5:14:46 +epoch [8/50] batch [320/500] time 0.891 (0.891) data 0.000 (0.003) loss 1.0791 (1.1721) acc 71.8750 (70.8398) lr 1.9298e-03 eta 5:14:41 +epoch [8/50] batch [325/500] time 0.900 (0.892) data 0.000 (0.002) loss 1.1113 (1.1711) acc 78.1250 (70.8942) lr 1.9298e-03 eta 5:14:39 +epoch [8/50] batch [330/500] time 0.894 (0.892) data 0.000 (0.002) loss 1.1309 (1.1727) acc 75.0000 (70.8333) lr 1.9298e-03 eta 5:14:35 +epoch [8/50] batch [335/500] time 0.861 (0.892) data 0.000 (0.002) loss 1.3770 (1.1702) acc 68.7500 (70.9235) lr 1.9298e-03 eta 5:14:33 +epoch [8/50] batch [340/500] time 0.877 (0.892) data 0.000 (0.002) loss 1.5039 (1.1690) acc 65.6250 (70.9007) lr 1.9298e-03 eta 5:14:31 +epoch [8/50] batch [345/500] time 0.895 (0.892) data 0.000 (0.002) loss 1.4932 (1.1710) acc 62.5000 (70.8786) lr 1.9298e-03 eta 5:14:27 +epoch [8/50] batch [350/500] time 0.894 (0.892) data 0.000 (0.002) loss 0.9717 (1.1653) acc 71.8750 (70.9464) lr 1.9298e-03 eta 5:14:22 +epoch [8/50] batch [355/500] time 0.908 (0.892) data 0.000 (0.002) loss 1.2021 (1.1646) acc 78.1250 (70.9859) lr 1.9298e-03 eta 5:14:20 +epoch [8/50] batch [360/500] time 0.885 (0.892) data 0.000 (0.002) loss 1.1934 (1.1648) acc 65.6250 (70.9549) lr 1.9298e-03 eta 5:14:12 +epoch [8/50] batch [365/500] time 0.894 (0.892) data 0.000 (0.002) loss 1.7100 (1.1683) acc 59.3750 (70.8476) lr 1.9298e-03 eta 5:14:10 +epoch [8/50] batch [370/500] time 0.873 (0.892) data 0.000 (0.002) loss 1.1221 (1.1674) acc 68.7500 (70.8530) lr 1.9298e-03 eta 5:14:00 +epoch [8/50] batch [375/500] time 0.864 (0.891) data 0.000 (0.002) loss 0.7803 (1.1654) acc 81.2500 (70.8750) lr 1.9298e-03 eta 5:13:51 +epoch [8/50] batch [380/500] time 0.886 (0.891) data 0.000 (0.002) loss 1.5430 (1.1647) acc 68.7500 (70.8964) lr 1.9298e-03 eta 5:13:44 +epoch [8/50] batch [385/500] time 0.853 (0.891) data 0.000 (0.002) loss 1.1836 (1.1627) acc 75.0000 (70.9578) lr 1.9298e-03 eta 5:13:37 +epoch [8/50] batch [390/500] time 0.882 (0.891) data 0.000 (0.002) loss 1.2217 (1.1644) acc 71.8750 (70.9135) lr 1.9298e-03 eta 5:13:29 +epoch [8/50] batch [395/500] time 0.884 (0.891) data 0.000 (0.002) loss 1.2510 (1.1647) acc 71.8750 (70.9415) lr 1.9298e-03 eta 5:13:22 +epoch [8/50] batch [400/500] time 0.847 (0.891) data 0.000 (0.002) loss 1.0801 (1.1648) acc 75.0000 (70.9688) lr 1.9298e-03 eta 5:13:14 +epoch [8/50] batch [405/500] time 0.898 (0.891) data 0.000 (0.002) loss 1.1299 (1.1643) acc 71.8750 (70.9414) lr 1.9298e-03 eta 5:13:06 +epoch [8/50] batch [410/500] time 0.898 (0.891) data 0.000 (0.002) loss 1.0430 (1.1644) acc 75.0000 (70.9299) lr 1.9298e-03 eta 5:13:01 +epoch [8/50] batch [415/500] time 0.875 (0.891) data 0.000 (0.002) loss 1.6768 (1.1639) acc 62.5000 (70.9488) lr 1.9298e-03 eta 5:12:56 +epoch [8/50] batch [420/500] time 0.871 (0.891) data 0.000 (0.002) loss 1.0234 (1.1636) acc 68.7500 (70.9598) lr 1.9298e-03 eta 5:12:55 +epoch [8/50] batch [425/500] time 0.875 (0.891) data 0.000 (0.002) loss 1.4980 (1.1671) acc 65.6250 (70.8897) lr 1.9298e-03 eta 5:12:51 +epoch [8/50] batch [430/500] time 0.866 (0.891) data 0.000 (0.002) loss 0.7842 (1.1667) acc 75.0000 (70.8794) lr 1.9298e-03 eta 5:12:44 +epoch [8/50] batch [435/500] time 0.901 (0.891) data 0.000 (0.002) loss 0.8232 (1.1641) acc 81.2500 (70.9195) lr 1.9298e-03 eta 5:12:47 +epoch [8/50] batch [440/500] time 0.858 (0.891) data 0.001 (0.002) loss 1.1885 (1.1652) acc 68.7500 (70.9020) lr 1.9298e-03 eta 5:12:35 +epoch [8/50] batch [445/500] time 0.884 (0.891) data 0.000 (0.002) loss 0.9331 (1.1656) acc 81.2500 (70.9059) lr 1.9298e-03 eta 5:12:30 +epoch [8/50] batch [450/500] time 0.886 (0.890) data 0.000 (0.002) loss 1.6494 (1.1667) acc 53.1250 (70.8750) lr 1.9298e-03 eta 5:12:24 +epoch [8/50] batch [455/500] time 0.883 (0.890) data 0.000 (0.002) loss 1.2744 (1.1671) acc 62.5000 (70.8448) lr 1.9298e-03 eta 5:12:16 +epoch [8/50] batch [460/500] time 0.871 (0.890) data 0.000 (0.002) loss 1.5576 (1.1685) acc 59.3750 (70.8084) lr 1.9298e-03 eta 5:12:08 +epoch [8/50] batch [465/500] time 0.837 (0.890) data 0.000 (0.002) loss 1.0234 (1.1695) acc 78.1250 (70.7863) lr 1.9298e-03 eta 5:11:58 +epoch [8/50] batch [470/500] time 0.886 (0.890) data 0.000 (0.002) loss 1.0273 (1.1666) acc 71.8750 (70.7979) lr 1.9298e-03 eta 5:11:51 +epoch [8/50] batch [475/500] time 0.990 (0.890) data 0.000 (0.002) loss 1.5459 (1.1707) acc 75.0000 (70.7434) lr 1.9298e-03 eta 5:11:50 +epoch [8/50] batch [480/500] time 0.887 (0.890) data 0.000 (0.002) loss 1.8047 (1.1726) acc 53.1250 (70.6901) lr 1.9298e-03 eta 5:11:44 +epoch [8/50] batch [485/500] time 0.872 (0.890) data 0.000 (0.002) loss 1.4844 (1.1723) acc 56.2500 (70.6894) lr 1.9298e-03 eta 5:11:37 +epoch [8/50] batch [490/500] time 0.849 (0.890) data 0.000 (0.002) loss 0.9585 (1.1721) acc 78.1250 (70.6952) lr 1.9298e-03 eta 5:11:29 +epoch [8/50] batch [495/500] time 0.884 (0.890) data 0.000 (0.002) loss 1.8252 (1.1743) acc 65.6250 (70.6755) lr 1.9298e-03 eta 5:11:24 +epoch [8/50] batch [500/500] time 0.901 (0.890) data 0.000 (0.002) loss 1.0889 (1.1767) acc 65.6250 (70.6250) lr 1.9048e-03 eta 5:11:21 +epoch [9/50] batch [5/500] time 0.889 (0.998) data 0.000 (0.123) loss 1.2021 (1.2917) acc 71.8750 (64.3750) lr 1.9048e-03 eta 5:49:17 +epoch [9/50] batch [10/500] time 0.880 (0.935) data 0.000 (0.061) loss 0.9033 (1.2271) acc 68.7500 (67.1875) lr 1.9048e-03 eta 5:27:04 +epoch [9/50] batch [15/500] time 0.878 (0.914) data 0.000 (0.041) loss 0.9121 (1.1376) acc 78.1250 (69.3750) lr 1.9048e-03 eta 5:19:42 +epoch [9/50] batch [20/500] time 0.883 (0.916) data 0.000 (0.031) loss 0.9272 (1.0750) acc 78.1250 (70.9375) lr 1.9048e-03 eta 5:20:23 +epoch [9/50] batch [25/500] time 0.884 (0.910) data 0.000 (0.025) loss 0.8838 (1.1113) acc 71.8750 (70.2500) lr 1.9048e-03 eta 5:18:11 +epoch [9/50] batch [30/500] time 0.877 (0.905) data 0.000 (0.021) loss 0.9023 (1.0978) acc 75.0000 (71.0417) lr 1.9048e-03 eta 5:16:18 +epoch [9/50] batch [35/500] time 0.871 (0.902) data 0.000 (0.018) loss 1.4414 (1.1326) acc 62.5000 (70.3571) lr 1.9048e-03 eta 5:15:09 +epoch [9/50] batch [40/500] time 0.884 (0.901) data 0.000 (0.016) loss 1.0547 (1.1395) acc 81.2500 (70.2344) lr 1.9048e-03 eta 5:14:34 +epoch [9/50] batch [45/500] time 0.891 (0.897) data 0.000 (0.014) loss 0.9458 (1.1106) acc 78.1250 (71.2500) lr 1.9048e-03 eta 5:13:19 +epoch [9/50] batch [50/500] time 0.896 (0.896) data 0.000 (0.012) loss 2.0801 (1.1312) acc 53.1250 (71.0000) lr 1.9048e-03 eta 5:12:55 +epoch [9/50] batch [55/500] time 0.909 (0.896) data 0.000 (0.011) loss 2.0234 (1.1396) acc 65.6250 (71.2500) lr 1.9048e-03 eta 5:12:39 +epoch [9/50] batch [60/500] time 0.906 (0.896) data 0.000 (0.010) loss 0.6924 (1.1185) acc 75.0000 (71.6667) lr 1.9048e-03 eta 5:12:42 +epoch [9/50] batch [65/500] time 0.862 (0.895) data 0.000 (0.010) loss 1.3223 (1.1311) acc 78.1250 (71.3942) lr 1.9048e-03 eta 5:12:24 +epoch [9/50] batch [70/500] time 0.877 (0.894) data 0.000 (0.009) loss 0.8408 (1.1239) acc 84.3750 (71.6964) lr 1.9048e-03 eta 5:11:55 +epoch [9/50] batch [75/500] time 0.890 (0.894) data 0.000 (0.008) loss 1.6963 (1.1361) acc 56.2500 (71.5833) lr 1.9048e-03 eta 5:11:41 +epoch [9/50] batch [80/500] time 0.893 (0.894) data 0.000 (0.008) loss 1.1494 (1.1328) acc 68.7500 (71.4062) lr 1.9048e-03 eta 5:11:36 +epoch [9/50] batch [85/500] time 0.883 (0.893) data 0.000 (0.007) loss 1.7500 (1.1464) acc 68.7500 (71.2132) lr 1.9048e-03 eta 5:11:16 +epoch [9/50] batch [90/500] time 0.888 (0.892) data 0.001 (0.007) loss 0.8867 (1.1604) acc 75.0000 (70.9028) lr 1.9048e-03 eta 5:10:51 +epoch [9/50] batch [95/500] time 0.887 (0.891) data 0.000 (0.007) loss 0.9614 (1.1543) acc 75.0000 (71.1184) lr 1.9048e-03 eta 5:10:32 +epoch [9/50] batch [100/500] time 0.872 (0.891) data 0.000 (0.006) loss 0.7285 (1.1637) acc 81.2500 (71.0938) lr 1.9048e-03 eta 5:10:18 +epoch [9/50] batch [105/500] time 0.893 (0.891) data 0.000 (0.006) loss 1.3369 (1.1678) acc 59.3750 (70.9524) lr 1.9048e-03 eta 5:10:11 +epoch [9/50] batch [110/500] time 0.865 (0.890) data 0.000 (0.006) loss 1.4863 (1.1714) acc 65.6250 (70.6534) lr 1.9048e-03 eta 5:09:49 +epoch [9/50] batch [115/500] time 0.900 (0.890) data 0.000 (0.006) loss 1.2090 (1.1702) acc 75.0000 (70.8152) lr 1.9048e-03 eta 5:09:50 +epoch [9/50] batch [120/500] time 0.876 (0.891) data 0.000 (0.005) loss 0.6816 (1.1615) acc 75.0000 (70.9896) lr 1.9048e-03 eta 5:09:56 +epoch [9/50] batch [125/500] time 0.904 (0.891) data 0.000 (0.005) loss 0.8223 (1.1536) acc 75.0000 (71.1250) lr 1.9048e-03 eta 5:09:49 +epoch [9/50] batch [130/500] time 0.864 (0.890) data 0.000 (0.005) loss 0.8252 (1.1481) acc 81.2500 (71.3221) lr 1.9048e-03 eta 5:09:30 +epoch [9/50] batch [135/500] time 0.865 (0.889) data 0.000 (0.005) loss 0.9463 (1.1479) acc 71.8750 (71.2731) lr 1.9048e-03 eta 5:09:18 +epoch [9/50] batch [140/500] time 0.886 (0.889) data 0.000 (0.005) loss 0.7500 (1.1552) acc 78.1250 (71.0938) lr 1.9048e-03 eta 5:09:12 +epoch [9/50] batch [145/500] time 0.899 (0.889) data 0.000 (0.004) loss 2.1074 (1.1554) acc 56.2500 (71.1207) lr 1.9048e-03 eta 5:09:04 +epoch [9/50] batch [150/500] time 0.923 (0.889) data 0.000 (0.004) loss 1.0645 (1.1461) acc 68.7500 (71.2917) lr 1.9048e-03 eta 5:09:03 +epoch [9/50] batch [155/500] time 0.901 (0.889) data 0.000 (0.004) loss 1.3037 (1.1430) acc 71.8750 (71.3306) lr 1.9048e-03 eta 5:08:59 +epoch [9/50] batch [160/500] time 0.910 (0.889) data 0.000 (0.004) loss 0.9990 (1.1412) acc 71.8750 (71.3086) lr 1.9048e-03 eta 5:08:52 +epoch [9/50] batch [165/500] time 0.901 (0.890) data 0.000 (0.004) loss 1.1201 (1.1364) acc 71.8750 (71.3826) lr 1.9048e-03 eta 5:08:56 +epoch [9/50] batch [170/500] time 0.889 (0.889) data 0.000 (0.004) loss 1.1367 (1.1337) acc 59.3750 (71.3603) lr 1.9048e-03 eta 5:08:43 +epoch [9/50] batch [175/500] time 0.867 (0.889) data 0.000 (0.004) loss 1.0869 (1.1399) acc 78.1250 (71.3393) lr 1.9048e-03 eta 5:08:30 +epoch [9/50] batch [180/500] time 0.883 (0.889) data 0.000 (0.004) loss 1.2344 (1.1418) acc 65.6250 (71.2674) lr 1.9048e-03 eta 5:08:24 +epoch [9/50] batch [185/500] time 0.876 (0.889) data 0.000 (0.004) loss 1.2334 (1.1428) acc 68.7500 (71.1824) lr 1.9048e-03 eta 5:08:20 +epoch [9/50] batch [190/500] time 0.905 (0.889) data 0.000 (0.003) loss 1.2480 (1.1429) acc 65.6250 (71.2664) lr 1.9048e-03 eta 5:08:17 +epoch [9/50] batch [195/500] time 0.877 (0.889) data 0.000 (0.003) loss 0.9800 (1.1411) acc 75.0000 (71.2981) lr 1.9048e-03 eta 5:08:12 +epoch [9/50] batch [200/500] time 0.884 (0.889) data 0.000 (0.003) loss 0.5962 (1.1392) acc 87.5000 (71.3594) lr 1.9048e-03 eta 5:08:04 +epoch [9/50] batch [205/500] time 0.890 (0.889) data 0.000 (0.003) loss 0.9019 (1.1345) acc 75.0000 (71.5091) lr 1.9048e-03 eta 5:08:01 +epoch [9/50] batch [210/500] time 0.896 (0.889) data 0.000 (0.003) loss 1.2432 (1.1343) acc 59.3750 (71.4881) lr 1.9048e-03 eta 5:07:56 +epoch [9/50] batch [215/500] time 0.881 (0.889) data 0.000 (0.003) loss 1.1914 (1.1371) acc 71.8750 (71.4971) lr 1.9048e-03 eta 5:07:50 +epoch [9/50] batch [220/500] time 0.893 (0.889) data 0.000 (0.003) loss 1.3936 (1.1398) acc 68.7500 (71.3352) lr 1.9048e-03 eta 5:07:43 +epoch [9/50] batch [225/500] time 0.885 (0.889) data 0.000 (0.003) loss 1.9160 (1.1410) acc 50.0000 (71.3056) lr 1.9048e-03 eta 5:07:38 +epoch [9/50] batch [230/500] time 0.888 (0.889) data 0.000 (0.003) loss 1.6270 (1.1415) acc 50.0000 (71.2364) lr 1.9048e-03 eta 5:07:38 +epoch [9/50] batch [235/500] time 0.873 (0.888) data 0.000 (0.003) loss 0.8652 (1.1386) acc 81.2500 (71.2633) lr 1.9048e-03 eta 5:07:29 +epoch [9/50] batch [240/500] time 0.878 (0.888) data 0.000 (0.003) loss 1.1123 (1.1439) acc 68.7500 (71.1589) lr 1.9048e-03 eta 5:07:20 +epoch [9/50] batch [245/500] time 0.918 (0.888) data 0.000 (0.003) loss 0.9175 (1.1448) acc 81.2500 (71.0969) lr 1.9048e-03 eta 5:07:17 +epoch [9/50] batch [250/500] time 0.898 (0.888) data 0.000 (0.003) loss 1.3418 (1.1474) acc 75.0000 (71.0375) lr 1.9048e-03 eta 5:07:09 +epoch [9/50] batch [255/500] time 0.858 (0.888) data 0.000 (0.003) loss 1.2617 (1.1487) acc 71.8750 (71.0172) lr 1.9048e-03 eta 5:07:00 +epoch [9/50] batch [260/500] time 0.872 (0.888) data 0.000 (0.003) loss 1.0859 (1.1474) acc 65.6250 (71.0337) lr 1.9048e-03 eta 5:06:56 +epoch [9/50] batch [265/500] time 0.913 (0.888) data 0.000 (0.003) loss 1.0459 (1.1513) acc 78.1250 (70.9788) lr 1.9048e-03 eta 5:07:02 +epoch [9/50] batch [270/500] time 0.875 (0.888) data 0.000 (0.002) loss 0.9795 (1.1488) acc 68.7500 (70.9838) lr 1.9048e-03 eta 5:06:53 +epoch [9/50] batch [275/500] time 0.894 (0.888) data 0.000 (0.002) loss 1.0273 (1.1516) acc 81.2500 (71.0000) lr 1.9048e-03 eta 5:06:46 +epoch [9/50] batch [280/500] time 0.862 (0.888) data 0.000 (0.002) loss 0.8232 (1.1510) acc 84.3750 (71.0268) lr 1.9048e-03 eta 5:06:40 +epoch [9/50] batch [285/500] time 0.879 (0.888) data 0.000 (0.002) loss 1.4736 (1.1529) acc 65.6250 (70.9430) lr 1.9048e-03 eta 5:06:30 +epoch [9/50] batch [290/500] time 0.921 (0.888) data 0.000 (0.002) loss 1.1963 (1.1529) acc 75.0000 (70.9698) lr 1.9048e-03 eta 5:06:28 +epoch [9/50] batch [295/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.3525 (1.1535) acc 78.1250 (70.9428) lr 1.9048e-03 eta 5:06:18 +epoch [9/50] batch [300/500] time 0.893 (0.888) data 0.000 (0.002) loss 0.8315 (1.1527) acc 84.3750 (71.0625) lr 1.9048e-03 eta 5:06:12 +epoch [9/50] batch [305/500] time 0.985 (0.888) data 0.000 (0.002) loss 1.3906 (1.1567) acc 65.6250 (70.9939) lr 1.9048e-03 eta 5:06:13 +epoch [9/50] batch [310/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.3545 (1.1575) acc 68.7500 (70.9778) lr 1.9048e-03 eta 5:06:09 +epoch [9/50] batch [315/500] time 0.859 (0.888) data 0.000 (0.002) loss 0.7236 (1.1547) acc 81.2500 (71.0516) lr 1.9048e-03 eta 5:06:00 +epoch [9/50] batch [320/500] time 0.851 (0.887) data 0.000 (0.002) loss 1.9785 (1.1575) acc 43.7500 (71.0352) lr 1.9048e-03 eta 5:05:52 +epoch [9/50] batch [325/500] time 0.891 (0.887) data 0.000 (0.002) loss 1.3799 (1.1564) acc 68.7500 (71.1250) lr 1.9048e-03 eta 5:05:47 +epoch [9/50] batch [330/500] time 0.881 (0.887) data 0.000 (0.002) loss 1.3555 (1.1565) acc 68.7500 (71.1174) lr 1.9048e-03 eta 5:05:43 +epoch [9/50] batch [335/500] time 0.889 (0.888) data 0.000 (0.002) loss 1.5605 (1.1571) acc 71.8750 (71.1474) lr 1.9048e-03 eta 5:05:40 +epoch [9/50] batch [340/500] time 0.876 (0.888) data 0.000 (0.002) loss 1.1738 (1.1575) acc 71.8750 (71.1581) lr 1.9048e-03 eta 5:05:38 +epoch [9/50] batch [345/500] time 0.866 (0.888) data 0.000 (0.002) loss 1.2549 (1.1582) acc 65.6250 (71.2047) lr 1.9048e-03 eta 5:05:32 +epoch [9/50] batch [350/500] time 0.896 (0.887) data 0.000 (0.002) loss 0.8584 (1.1587) acc 71.8750 (71.1607) lr 1.9048e-03 eta 5:05:25 +epoch [9/50] batch [355/500] time 0.913 (0.888) data 0.000 (0.002) loss 1.2578 (1.1592) acc 75.0000 (71.1532) lr 1.9048e-03 eta 5:05:22 +epoch [9/50] batch [360/500] time 0.886 (0.887) data 0.000 (0.002) loss 1.0225 (1.1573) acc 75.0000 (71.1719) lr 1.9048e-03 eta 5:05:16 +epoch [9/50] batch [365/500] time 0.912 (0.888) data 0.000 (0.002) loss 0.9585 (1.1559) acc 78.1250 (71.1901) lr 1.9048e-03 eta 5:05:13 +epoch [9/50] batch [370/500] time 0.887 (0.887) data 0.000 (0.002) loss 1.0557 (1.1573) acc 68.7500 (71.1486) lr 1.9048e-03 eta 5:05:08 +epoch [9/50] batch [375/500] time 0.869 (0.887) data 0.000 (0.002) loss 1.4834 (1.1580) acc 62.5000 (71.1000) lr 1.9048e-03 eta 5:05:02 +epoch [9/50] batch [380/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.1934 (1.1569) acc 62.5000 (71.1020) lr 1.9048e-03 eta 5:04:55 +epoch [9/50] batch [385/500] time 0.860 (0.887) data 0.000 (0.002) loss 1.1504 (1.1583) acc 68.7500 (71.0877) lr 1.9048e-03 eta 5:04:52 +epoch [9/50] batch [390/500] time 0.888 (0.887) data 0.000 (0.002) loss 0.8301 (1.1564) acc 81.2500 (71.1619) lr 1.9048e-03 eta 5:04:49 +epoch [9/50] batch [395/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.3779 (1.1574) acc 68.7500 (71.0997) lr 1.9048e-03 eta 5:04:44 +epoch [9/50] batch [400/500] time 0.917 (0.888) data 0.000 (0.002) loss 1.1182 (1.1580) acc 71.8750 (71.1406) lr 1.9048e-03 eta 5:04:42 +epoch [9/50] batch [405/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.1396 (1.1561) acc 71.8750 (71.2037) lr 1.9048e-03 eta 5:04:45 +epoch [9/50] batch [410/500] time 0.869 (0.888) data 0.000 (0.002) loss 1.4141 (1.1572) acc 62.5000 (71.1662) lr 1.9048e-03 eta 5:04:39 +epoch [9/50] batch [415/500] time 0.911 (0.888) data 0.000 (0.002) loss 0.6812 (1.1566) acc 81.2500 (71.1521) lr 1.9048e-03 eta 5:04:34 +epoch [9/50] batch [420/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.0225 (1.1560) acc 71.8750 (71.1458) lr 1.9048e-03 eta 5:04:31 +epoch [9/50] batch [425/500] time 0.864 (0.888) data 0.000 (0.002) loss 0.9663 (1.1563) acc 71.8750 (71.1029) lr 1.9048e-03 eta 5:04:27 +epoch [9/50] batch [430/500] time 0.900 (0.888) data 0.000 (0.002) loss 1.4639 (1.1579) acc 62.5000 (71.0538) lr 1.9048e-03 eta 5:04:24 +epoch [9/50] batch [435/500] time 0.895 (0.888) data 0.000 (0.002) loss 1.0771 (1.1560) acc 68.7500 (71.0704) lr 1.9048e-03 eta 5:04:21 +epoch [9/50] batch [440/500] time 0.914 (0.888) data 0.000 (0.002) loss 2.0176 (1.1553) acc 65.6250 (71.1222) lr 1.9048e-03 eta 5:04:14 +epoch [9/50] batch [445/500] time 0.893 (0.888) data 0.000 (0.002) loss 1.8213 (1.1573) acc 59.3750 (71.1025) lr 1.9048e-03 eta 5:04:12 +epoch [9/50] batch [450/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.1484 (1.1577) acc 68.7500 (71.0833) lr 1.9048e-03 eta 5:04:14 +epoch [9/50] batch [455/500] time 0.893 (0.888) data 0.000 (0.002) loss 1.2295 (1.1551) acc 75.0000 (71.1607) lr 1.9048e-03 eta 5:04:10 +epoch [9/50] batch [460/500] time 0.910 (0.889) data 0.000 (0.002) loss 1.4932 (1.1559) acc 71.8750 (71.1481) lr 1.9048e-03 eta 5:04:10 +epoch [9/50] batch [465/500] time 0.905 (0.889) data 0.000 (0.002) loss 1.0850 (1.1560) acc 71.8750 (71.1559) lr 1.9048e-03 eta 5:04:08 +epoch [9/50] batch [470/500] time 0.886 (0.889) data 0.000 (0.002) loss 1.1455 (1.1585) acc 75.0000 (71.0838) lr 1.9048e-03 eta 5:04:06 +epoch [9/50] batch [475/500] time 0.872 (0.889) data 0.000 (0.002) loss 1.1738 (1.1615) acc 65.6250 (71.0000) lr 1.9048e-03 eta 5:04:02 +epoch [9/50] batch [480/500] time 0.914 (0.889) data 0.000 (0.001) loss 1.1914 (1.1610) acc 75.0000 (71.0156) lr 1.9048e-03 eta 5:03:58 +epoch [9/50] batch [485/500] time 0.886 (0.889) data 0.000 (0.001) loss 1.1387 (1.1613) acc 71.8750 (70.9923) lr 1.9048e-03 eta 5:03:54 +epoch [9/50] batch [490/500] time 0.875 (0.889) data 0.000 (0.001) loss 1.1094 (1.1608) acc 75.0000 (71.0077) lr 1.9048e-03 eta 5:03:50 +epoch [9/50] batch [495/500] time 0.885 (0.889) data 0.000 (0.001) loss 1.1670 (1.1597) acc 71.8750 (71.0227) lr 1.9048e-03 eta 5:03:44 +epoch [9/50] batch [500/500] time 0.899 (0.889) data 0.000 (0.001) loss 1.0439 (1.1610) acc 65.6250 (70.9750) lr 1.8763e-03 eta 5:03:41 +epoch [10/50] batch [5/500] time 0.903 (1.042) data 0.000 (0.138) loss 0.7217 (0.9016) acc 81.2500 (78.1250) lr 1.8763e-03 eta 5:56:03 +epoch [10/50] batch [10/500] time 0.873 (0.963) data 0.000 (0.069) loss 1.0010 (1.0198) acc 71.8750 (75.0000) lr 1.8763e-03 eta 5:28:44 +epoch [10/50] batch [15/500] time 0.872 (0.939) data 0.000 (0.046) loss 0.9058 (1.0560) acc 78.1250 (73.9583) lr 1.8763e-03 eta 5:20:38 +epoch [10/50] batch [20/500] time 0.907 (0.929) data 0.000 (0.035) loss 0.9438 (1.1145) acc 65.6250 (71.2500) lr 1.8763e-03 eta 5:17:05 +epoch [10/50] batch [25/500] time 0.893 (0.924) data 0.000 (0.028) loss 0.8901 (1.0806) acc 62.5000 (71.2500) lr 1.8763e-03 eta 5:15:15 +epoch [10/50] batch [30/500] time 0.877 (0.917) data 0.000 (0.023) loss 1.2197 (1.1001) acc 75.0000 (71.5625) lr 1.8763e-03 eta 5:13:00 +epoch [10/50] batch [35/500] time 0.880 (0.913) data 0.000 (0.020) loss 0.9829 (1.1104) acc 78.1250 (71.1607) lr 1.8763e-03 eta 5:11:30 +epoch [10/50] batch [40/500] time 0.895 (0.911) data 0.000 (0.018) loss 0.7573 (1.1039) acc 81.2500 (71.5625) lr 1.8763e-03 eta 5:10:47 +epoch [10/50] batch [45/500] time 0.891 (0.909) data 0.000 (0.016) loss 0.9116 (1.1178) acc 81.2500 (71.6667) lr 1.8763e-03 eta 5:09:46 +epoch [10/50] batch [50/500] time 0.887 (0.906) data 0.000 (0.014) loss 0.8130 (1.1098) acc 75.0000 (71.4375) lr 1.8763e-03 eta 5:08:43 +epoch [10/50] batch [55/500] time 0.898 (0.905) data 0.000 (0.013) loss 1.3682 (1.1181) acc 68.7500 (71.3636) lr 1.8763e-03 eta 5:08:26 +epoch [10/50] batch [60/500] time 0.907 (0.904) data 0.000 (0.012) loss 0.9766 (1.1202) acc 68.7500 (71.3021) lr 1.8763e-03 eta 5:07:50 +epoch [10/50] batch [65/500] time 0.864 (0.902) data 0.000 (0.011) loss 1.3867 (1.1315) acc 62.5000 (71.0096) lr 1.8763e-03 eta 5:07:10 +epoch [10/50] batch [70/500] time 0.899 (0.901) data 0.000 (0.010) loss 1.5146 (1.1217) acc 65.6250 (71.2054) lr 1.8763e-03 eta 5:06:41 +epoch [10/50] batch [75/500] time 0.861 (0.899) data 0.000 (0.009) loss 1.4951 (1.1226) acc 78.1250 (71.4583) lr 1.8763e-03 eta 5:06:11 +epoch [10/50] batch [80/500] time 0.890 (0.898) data 0.000 (0.009) loss 1.0645 (1.1243) acc 81.2500 (71.4062) lr 1.8763e-03 eta 5:05:34 +epoch [10/50] batch [85/500] time 0.887 (0.897) data 0.000 (0.008) loss 1.1611 (1.1286) acc 68.7500 (71.2132) lr 1.8763e-03 eta 5:05:07 +epoch [10/50] batch [90/500] time 0.882 (0.896) data 0.000 (0.008) loss 1.0947 (1.1376) acc 68.7500 (71.1111) lr 1.8763e-03 eta 5:04:48 +epoch [10/50] batch [95/500] time 0.907 (0.897) data 0.000 (0.008) loss 1.6230 (1.1457) acc 68.7500 (71.0197) lr 1.8763e-03 eta 5:05:05 +epoch [10/50] batch [100/500] time 0.887 (0.897) data 0.000 (0.007) loss 1.0352 (1.1488) acc 75.0000 (71.0625) lr 1.8763e-03 eta 5:05:07 +epoch [10/50] batch [105/500] time 0.877 (0.897) data 0.000 (0.007) loss 1.2744 (1.1485) acc 65.6250 (71.1607) lr 1.8763e-03 eta 5:04:44 +epoch [10/50] batch [110/500] time 0.888 (0.896) data 0.000 (0.007) loss 1.8916 (1.1539) acc 53.1250 (70.9091) lr 1.8763e-03 eta 5:04:32 +epoch [10/50] batch [115/500] time 0.901 (0.896) data 0.000 (0.006) loss 1.1328 (1.1520) acc 65.6250 (70.8152) lr 1.8763e-03 eta 5:04:30 +epoch [10/50] batch [120/500] time 0.880 (0.895) data 0.000 (0.006) loss 1.3105 (1.1644) acc 78.1250 (70.7552) lr 1.8763e-03 eta 5:04:03 +epoch [10/50] batch [125/500] time 0.879 (0.894) data 0.000 (0.006) loss 0.6548 (1.1603) acc 75.0000 (70.8250) lr 1.8763e-03 eta 5:03:37 +epoch [10/50] batch [130/500] time 0.863 (0.893) data 0.000 (0.006) loss 1.6816 (1.1650) acc 59.3750 (70.6250) lr 1.8763e-03 eta 5:03:15 +epoch [10/50] batch [135/500] time 0.996 (0.893) data 0.000 (0.005) loss 1.0820 (1.1755) acc 75.0000 (70.3241) lr 1.8763e-03 eta 5:03:13 +epoch [10/50] batch [140/500] time 0.908 (0.893) data 0.000 (0.005) loss 1.4736 (1.1746) acc 56.2500 (70.1339) lr 1.8763e-03 eta 5:03:02 +epoch [10/50] batch [145/500] time 0.879 (0.893) data 0.000 (0.005) loss 1.1631 (1.1736) acc 75.0000 (70.2371) lr 1.8763e-03 eta 5:02:52 +epoch [10/50] batch [150/500] time 0.900 (0.893) data 0.000 (0.005) loss 1.5205 (1.1756) acc 75.0000 (70.2500) lr 1.8763e-03 eta 5:02:47 +epoch [10/50] batch [155/500] time 0.859 (0.892) data 0.000 (0.005) loss 0.8462 (1.1786) acc 78.1250 (70.2823) lr 1.8763e-03 eta 5:02:36 +epoch [10/50] batch [160/500] time 0.852 (0.892) data 0.000 (0.005) loss 1.1465 (1.1780) acc 65.6250 (70.3320) lr 1.8763e-03 eta 5:02:24 +epoch [10/50] batch [165/500] time 0.925 (0.892) data 0.000 (0.004) loss 1.9404 (1.1860) acc 56.2500 (70.2841) lr 1.8763e-03 eta 5:02:14 +epoch [10/50] batch [170/500] time 0.872 (0.892) data 0.000 (0.004) loss 0.6743 (1.1834) acc 87.5000 (70.4412) lr 1.8763e-03 eta 5:02:08 +epoch [10/50] batch [175/500] time 0.891 (0.892) data 0.000 (0.004) loss 0.7534 (1.1832) acc 81.2500 (70.4821) lr 1.8763e-03 eta 5:02:00 +epoch [10/50] batch [180/500] time 0.889 (0.891) data 0.000 (0.004) loss 0.6338 (1.1777) acc 78.1250 (70.5382) lr 1.8763e-03 eta 5:01:51 +epoch [10/50] batch [185/500] time 0.882 (0.891) data 0.000 (0.004) loss 1.1631 (1.1784) acc 71.8750 (70.4054) lr 1.8763e-03 eta 5:01:46 +epoch [10/50] batch [190/500] time 0.896 (0.891) data 0.000 (0.004) loss 1.3623 (1.1765) acc 78.1250 (70.4934) lr 1.8763e-03 eta 5:01:39 +epoch [10/50] batch [195/500] time 0.869 (0.891) data 0.000 (0.004) loss 1.5117 (1.1737) acc 50.0000 (70.5128) lr 1.8763e-03 eta 5:01:28 +epoch [10/50] batch [200/500] time 0.887 (0.891) data 0.000 (0.004) loss 0.9907 (1.1717) acc 71.8750 (70.5938) lr 1.8763e-03 eta 5:01:20 +epoch [10/50] batch [205/500] time 0.892 (0.890) data 0.000 (0.004) loss 0.9927 (1.1692) acc 75.0000 (70.5335) lr 1.8763e-03 eta 5:01:07 +epoch [10/50] batch [210/500] time 0.889 (0.890) data 0.000 (0.004) loss 1.2256 (1.1750) acc 68.7500 (70.4613) lr 1.8763e-03 eta 5:00:59 +epoch [10/50] batch [215/500] time 0.891 (0.890) data 0.000 (0.003) loss 1.3574 (1.1733) acc 65.6250 (70.3779) lr 1.8763e-03 eta 5:00:53 +epoch [10/50] batch [220/500] time 0.858 (0.890) data 0.000 (0.003) loss 1.3262 (1.1732) acc 62.5000 (70.3409) lr 1.8763e-03 eta 5:00:49 +epoch [10/50] batch [225/500] time 0.866 (0.890) data 0.000 (0.003) loss 0.7822 (1.1702) acc 78.1250 (70.3889) lr 1.8763e-03 eta 5:00:42 +epoch [10/50] batch [230/500] time 0.886 (0.890) data 0.000 (0.003) loss 1.4551 (1.1653) acc 68.7500 (70.4620) lr 1.8763e-03 eta 5:00:33 +epoch [10/50] batch [235/500] time 0.900 (0.890) data 0.000 (0.003) loss 1.0166 (1.1711) acc 78.1250 (70.3856) lr 1.8763e-03 eta 5:00:38 +epoch [10/50] batch [240/500] time 0.881 (0.890) data 0.000 (0.003) loss 1.7383 (1.1754) acc 71.8750 (70.3906) lr 1.8763e-03 eta 5:00:35 +epoch [10/50] batch [245/500] time 0.872 (0.890) data 0.000 (0.003) loss 2.0820 (1.1814) acc 62.5000 (70.2934) lr 1.8763e-03 eta 5:00:27 +epoch [10/50] batch [250/500] time 0.869 (0.890) data 0.000 (0.003) loss 1.1963 (1.1771) acc 62.5000 (70.3125) lr 1.8763e-03 eta 5:00:23 +epoch [10/50] batch [255/500] time 0.888 (0.890) data 0.000 (0.003) loss 1.5537 (1.1776) acc 75.0000 (70.3676) lr 1.8763e-03 eta 5:00:18 +epoch [10/50] batch [260/500] time 0.874 (0.890) data 0.000 (0.003) loss 1.2920 (1.1798) acc 68.7500 (70.3726) lr 1.8763e-03 eta 5:00:14 +epoch [10/50] batch [265/500] time 0.888 (0.890) data 0.000 (0.003) loss 1.2197 (1.1791) acc 71.8750 (70.4481) lr 1.8763e-03 eta 5:00:13 +epoch [10/50] batch [270/500] time 0.889 (0.890) data 0.000 (0.003) loss 1.6465 (1.1778) acc 65.6250 (70.4861) lr 1.8763e-03 eta 5:00:04 +epoch [10/50] batch [275/500] time 0.908 (0.890) data 0.000 (0.003) loss 0.9517 (1.1765) acc 75.0000 (70.5795) lr 1.8763e-03 eta 4:59:56 +epoch [10/50] batch [280/500] time 0.886 (0.890) data 0.000 (0.003) loss 1.6553 (1.1810) acc 65.6250 (70.5469) lr 1.8763e-03 eta 4:59:58 +epoch [10/50] batch [285/500] time 0.879 (0.890) data 0.000 (0.003) loss 0.7222 (1.1783) acc 81.2500 (70.6140) lr 1.8763e-03 eta 4:59:55 +epoch [10/50] batch [290/500] time 0.888 (0.890) data 0.000 (0.003) loss 0.8755 (1.1771) acc 75.0000 (70.6358) lr 1.8763e-03 eta 4:59:50 +epoch [10/50] batch [295/500] time 0.917 (0.890) data 0.000 (0.003) loss 1.2383 (1.1787) acc 71.8750 (70.6356) lr 1.8763e-03 eta 4:59:46 +epoch [10/50] batch [300/500] time 0.901 (0.890) data 0.000 (0.003) loss 1.3018 (1.1791) acc 68.7500 (70.6979) lr 1.8763e-03 eta 4:59:46 +epoch [10/50] batch [305/500] time 0.915 (0.890) data 0.000 (0.002) loss 1.4111 (1.1851) acc 68.7500 (70.6250) lr 1.8763e-03 eta 4:59:42 +epoch [10/50] batch [310/500] time 0.864 (0.890) data 0.000 (0.002) loss 0.9644 (1.1822) acc 75.0000 (70.6754) lr 1.8763e-03 eta 4:59:36 +epoch [10/50] batch [315/500] time 0.881 (0.890) data 0.000 (0.002) loss 1.2744 (1.1804) acc 65.6250 (70.6845) lr 1.8763e-03 eta 4:59:27 +epoch [10/50] batch [320/500] time 0.909 (0.890) data 0.000 (0.002) loss 1.1270 (1.1821) acc 71.8750 (70.6543) lr 1.8763e-03 eta 4:59:21 +epoch [10/50] batch [325/500] time 0.890 (0.890) data 0.000 (0.002) loss 0.7910 (1.1800) acc 75.0000 (70.6731) lr 1.8763e-03 eta 4:59:16 +epoch [10/50] batch [330/500] time 0.900 (0.890) data 0.000 (0.002) loss 0.5127 (1.1753) acc 90.6250 (70.8049) lr 1.8763e-03 eta 4:59:07 +epoch [10/50] batch [335/500] time 0.853 (0.890) data 0.000 (0.002) loss 1.0469 (1.1748) acc 71.8750 (70.8116) lr 1.8763e-03 eta 4:58:58 +epoch [10/50] batch [340/500] time 0.895 (0.890) data 0.000 (0.002) loss 1.4268 (1.1783) acc 75.0000 (70.7996) lr 1.8763e-03 eta 4:58:53 +epoch [10/50] batch [345/500] time 0.877 (0.889) data 0.000 (0.002) loss 0.7969 (1.1773) acc 84.3750 (70.8696) lr 1.8763e-03 eta 4:58:47 +epoch [10/50] batch [350/500] time 0.899 (0.889) data 0.000 (0.002) loss 1.3516 (1.1791) acc 62.5000 (70.8571) lr 1.8763e-03 eta 4:58:42 +epoch [10/50] batch [355/500] time 0.866 (0.889) data 0.000 (0.002) loss 1.1084 (1.1798) acc 75.0000 (70.8803) lr 1.8763e-03 eta 4:58:37 +epoch [10/50] batch [360/500] time 0.891 (0.889) data 0.000 (0.002) loss 0.9409 (1.1802) acc 75.0000 (70.8594) lr 1.8763e-03 eta 4:58:32 +epoch [10/50] batch [365/500] time 0.897 (0.889) data 0.000 (0.002) loss 1.2373 (1.1817) acc 68.7500 (70.8647) lr 1.8763e-03 eta 4:58:26 +epoch [10/50] batch [370/500] time 0.876 (0.889) data 0.000 (0.002) loss 1.2930 (1.1803) acc 78.1250 (70.9037) lr 1.8763e-03 eta 4:58:19 +epoch [10/50] batch [375/500] time 0.880 (0.889) data 0.000 (0.002) loss 1.4404 (1.1836) acc 68.7500 (70.9083) lr 1.8763e-03 eta 4:58:13 +epoch [10/50] batch [380/500] time 0.901 (0.890) data 0.000 (0.002) loss 1.1123 (1.1821) acc 78.1250 (70.9211) lr 1.8763e-03 eta 4:58:17 +epoch [10/50] batch [385/500] time 0.876 (0.889) data 0.000 (0.002) loss 1.0869 (1.1835) acc 65.6250 (70.8523) lr 1.8763e-03 eta 4:58:07 +epoch [10/50] batch [390/500] time 0.869 (0.889) data 0.000 (0.002) loss 0.6567 (1.1816) acc 84.3750 (70.8814) lr 1.8763e-03 eta 4:58:02 +epoch [10/50] batch [395/500] time 0.892 (0.889) data 0.000 (0.002) loss 0.5825 (1.1774) acc 84.3750 (70.9494) lr 1.8763e-03 eta 4:57:58 +epoch [10/50] batch [400/500] time 0.872 (0.889) data 0.000 (0.002) loss 0.6763 (1.1734) acc 71.8750 (70.9922) lr 1.8763e-03 eta 4:57:54 +epoch [10/50] batch [405/500] time 0.901 (0.889) data 0.000 (0.002) loss 1.0078 (1.1729) acc 78.1250 (70.9877) lr 1.8763e-03 eta 4:57:52 +epoch [10/50] batch [410/500] time 0.906 (0.890) data 0.000 (0.002) loss 0.7114 (1.1732) acc 78.1250 (70.9909) lr 1.8763e-03 eta 4:57:50 +epoch [10/50] batch [415/500] time 0.923 (0.890) data 0.000 (0.002) loss 1.0869 (1.1725) acc 68.7500 (70.9864) lr 1.8763e-03 eta 4:57:45 +epoch [10/50] batch [420/500] time 0.914 (0.890) data 0.000 (0.002) loss 1.1533 (1.1717) acc 78.1250 (71.0045) lr 1.8763e-03 eta 4:57:41 +epoch [10/50] batch [425/500] time 0.897 (0.890) data 0.000 (0.002) loss 0.7861 (1.1708) acc 68.7500 (71.0221) lr 1.8763e-03 eta 4:57:42 +epoch [10/50] batch [430/500] time 0.894 (0.890) data 0.000 (0.002) loss 0.9595 (1.1706) acc 68.7500 (70.9956) lr 1.8763e-03 eta 4:57:38 +epoch [10/50] batch [435/500] time 0.853 (0.890) data 0.000 (0.002) loss 1.1777 (1.1701) acc 68.7500 (70.9842) lr 1.8763e-03 eta 4:57:29 +epoch [10/50] batch [440/500] time 0.898 (0.890) data 0.000 (0.002) loss 1.0273 (1.1708) acc 78.1250 (71.0156) lr 1.8763e-03 eta 4:57:23 +epoch [10/50] batch [445/500] time 0.856 (0.889) data 0.000 (0.002) loss 0.9751 (1.1670) acc 78.1250 (71.0815) lr 1.8763e-03 eta 4:57:18 +epoch [10/50] batch [450/500] time 0.863 (0.890) data 0.000 (0.002) loss 1.5820 (1.1659) acc 62.5000 (71.1111) lr 1.8763e-03 eta 4:57:17 +epoch [10/50] batch [455/500] time 0.895 (0.890) data 0.000 (0.002) loss 1.3457 (1.1690) acc 68.7500 (71.0440) lr 1.8763e-03 eta 4:57:13 +epoch [10/50] batch [460/500] time 0.915 (0.890) data 0.000 (0.002) loss 0.9517 (1.1704) acc 75.0000 (71.0190) lr 1.8763e-03 eta 4:57:08 +epoch [10/50] batch [465/500] time 0.911 (0.890) data 0.000 (0.002) loss 1.4541 (1.1699) acc 71.8750 (71.0618) lr 1.8763e-03 eta 4:57:07 +epoch [10/50] batch [470/500] time 0.856 (0.890) data 0.000 (0.002) loss 1.4297 (1.1727) acc 65.6250 (70.9707) lr 1.8763e-03 eta 4:57:04 +epoch [10/50] batch [475/500] time 0.869 (0.890) data 0.000 (0.002) loss 1.3682 (1.1753) acc 71.8750 (70.9671) lr 1.8763e-03 eta 4:57:01 +epoch [10/50] batch [480/500] time 0.895 (0.890) data 0.000 (0.002) loss 0.9604 (1.1766) acc 75.0000 (70.9701) lr 1.8763e-03 eta 4:56:57 +epoch [10/50] batch [485/500] time 0.870 (0.890) data 0.000 (0.002) loss 1.6055 (1.1771) acc 50.0000 (70.9214) lr 1.8763e-03 eta 4:56:50 +epoch [10/50] batch [490/500] time 0.889 (0.890) data 0.000 (0.002) loss 1.3877 (1.1766) acc 78.1250 (70.9375) lr 1.8763e-03 eta 4:56:47 +epoch [10/50] batch [495/500] time 0.871 (0.890) data 0.000 (0.002) loss 1.4434 (1.1775) acc 65.6250 (70.9470) lr 1.8763e-03 eta 4:56:41 +epoch [10/50] batch [500/500] time 0.887 (0.890) data 0.000 (0.002) loss 1.0723 (1.1791) acc 65.6250 (70.9437) lr 1.8443e-03 eta 4:56:37 +epoch [11/50] batch [5/500] time 0.882 (1.004) data 0.000 (0.134) loss 0.7759 (1.0033) acc 84.3750 (75.6250) lr 1.8443e-03 eta 5:34:44 +epoch [11/50] batch [10/500] time 0.899 (0.946) data 0.000 (0.067) loss 1.4297 (1.0652) acc 59.3750 (72.8125) lr 1.8443e-03 eta 5:15:17 +epoch [11/50] batch [15/500] time 0.872 (0.927) data 0.000 (0.045) loss 0.8247 (1.0247) acc 84.3750 (73.5417) lr 1.8443e-03 eta 5:08:53 +epoch [11/50] batch [20/500] time 0.908 (0.920) data 0.000 (0.034) loss 1.0098 (1.0709) acc 71.8750 (72.8125) lr 1.8443e-03 eta 5:06:31 +epoch [11/50] batch [25/500] time 0.901 (0.916) data 0.000 (0.027) loss 1.3174 (1.0902) acc 65.6250 (72.5000) lr 1.8443e-03 eta 5:04:54 +epoch [11/50] batch [30/500] time 0.927 (0.915) data 0.000 (0.023) loss 1.1768 (1.0963) acc 59.3750 (71.8750) lr 1.8443e-03 eta 5:04:28 +epoch [11/50] batch [35/500] time 0.863 (0.908) data 0.000 (0.019) loss 1.2334 (1.0943) acc 68.7500 (72.1429) lr 1.8443e-03 eta 5:02:17 +epoch [11/50] batch [40/500] time 0.872 (0.905) data 0.000 (0.017) loss 0.7666 (1.0716) acc 87.5000 (73.2812) lr 1.8443e-03 eta 5:01:13 +epoch [11/50] batch [45/500] time 0.876 (0.905) data 0.000 (0.015) loss 1.0488 (1.0658) acc 78.1250 (73.4722) lr 1.8443e-03 eta 5:00:54 +epoch [11/50] batch [50/500] time 0.875 (0.903) data 0.000 (0.014) loss 0.9932 (1.0953) acc 78.1250 (73.1250) lr 1.8443e-03 eta 5:00:14 +epoch [11/50] batch [55/500] time 0.886 (0.902) data 0.000 (0.012) loss 0.9448 (1.0802) acc 78.1250 (73.5795) lr 1.8443e-03 eta 4:59:47 +epoch [11/50] batch [60/500] time 0.910 (0.902) data 0.000 (0.011) loss 1.0244 (1.0748) acc 68.7500 (73.4896) lr 1.8443e-03 eta 4:59:42 +epoch [11/50] batch [65/500] time 0.875 (0.900) data 0.000 (0.011) loss 0.6689 (1.0651) acc 78.1250 (73.6538) lr 1.8443e-03 eta 4:59:07 +epoch [11/50] batch [70/500] time 0.859 (0.899) data 0.000 (0.010) loss 1.3945 (1.0697) acc 56.2500 (72.9464) lr 1.8443e-03 eta 4:58:30 +epoch [11/50] batch [75/500] time 0.894 (0.900) data 0.000 (0.009) loss 1.8477 (1.0791) acc 50.0000 (72.7500) lr 1.8443e-03 eta 4:58:47 +epoch [11/50] batch [80/500] time 0.903 (0.899) data 0.000 (0.009) loss 1.4463 (1.0923) acc 65.6250 (72.5781) lr 1.8443e-03 eta 4:58:30 +epoch [11/50] batch [85/500] time 0.869 (0.898) data 0.000 (0.008) loss 1.7646 (1.1103) acc 59.3750 (72.2059) lr 1.8443e-03 eta 4:57:59 +epoch [11/50] batch [90/500] time 0.881 (0.896) data 0.000 (0.008) loss 1.0625 (1.0992) acc 68.7500 (72.4306) lr 1.8443e-03 eta 4:57:23 +epoch [11/50] batch [95/500] time 0.884 (0.895) data 0.000 (0.007) loss 1.2471 (1.1149) acc 65.6250 (72.1053) lr 1.8443e-03 eta 4:57:01 +epoch [11/50] batch [100/500] time 0.895 (0.895) data 0.000 (0.007) loss 1.0508 (1.1261) acc 75.0000 (71.8750) lr 1.8443e-03 eta 4:56:54 +epoch [11/50] batch [105/500] time 0.892 (0.895) data 0.000 (0.007) loss 1.1660 (1.1252) acc 71.8750 (71.7560) lr 1.8443e-03 eta 4:56:41 +epoch [11/50] batch [110/500] time 0.873 (0.894) data 0.000 (0.006) loss 1.0059 (1.1282) acc 75.0000 (71.7614) lr 1.8443e-03 eta 4:56:24 +epoch [11/50] batch [115/500] time 0.861 (0.894) data 0.000 (0.006) loss 0.7393 (1.1258) acc 78.1250 (71.7663) lr 1.8443e-03 eta 4:56:16 +epoch [11/50] batch [120/500] time 0.880 (0.895) data 0.000 (0.006) loss 1.1279 (1.1279) acc 71.8750 (71.6667) lr 1.8443e-03 eta 4:56:22 +epoch [11/50] batch [125/500] time 0.895 (0.895) data 0.000 (0.006) loss 1.4102 (1.1295) acc 68.7500 (71.6000) lr 1.8443e-03 eta 4:56:23 +epoch [11/50] batch [130/500] time 0.886 (0.895) data 0.000 (0.005) loss 0.9146 (1.1350) acc 81.2500 (71.4423) lr 1.8443e-03 eta 4:56:21 +epoch [11/50] batch [135/500] time 0.911 (0.895) data 0.000 (0.005) loss 0.9785 (1.1360) acc 71.8750 (71.4352) lr 1.8443e-03 eta 4:56:21 +epoch [11/50] batch [140/500] time 0.884 (0.895) data 0.000 (0.005) loss 1.4355 (1.1379) acc 65.6250 (71.3839) lr 1.8443e-03 eta 4:56:06 +epoch [11/50] batch [145/500] time 0.896 (0.894) data 0.000 (0.005) loss 1.3330 (1.1368) acc 62.5000 (71.4655) lr 1.8443e-03 eta 4:55:54 +epoch [11/50] batch [150/500] time 0.875 (0.894) data 0.000 (0.005) loss 0.8716 (1.1301) acc 81.2500 (71.7500) lr 1.8443e-03 eta 4:55:50 +epoch [11/50] batch [155/500] time 0.889 (0.894) data 0.000 (0.005) loss 0.8252 (1.1242) acc 84.3750 (71.9556) lr 1.8443e-03 eta 4:55:35 +epoch [11/50] batch [160/500] time 0.884 (0.894) data 0.000 (0.004) loss 0.8301 (1.1217) acc 78.1250 (72.0312) lr 1.8443e-03 eta 4:55:28 +epoch [11/50] batch [165/500] time 0.879 (0.894) data 0.000 (0.004) loss 0.9985 (1.1218) acc 68.7500 (71.9886) lr 1.8443e-03 eta 4:55:24 +epoch [11/50] batch [170/500] time 0.890 (0.894) data 0.000 (0.004) loss 1.0381 (1.1227) acc 78.1250 (71.9669) lr 1.8443e-03 eta 4:55:19 +epoch [11/50] batch [175/500] time 0.910 (0.894) data 0.000 (0.004) loss 1.0020 (1.1197) acc 56.2500 (71.9286) lr 1.8443e-03 eta 4:55:17 +epoch [11/50] batch [180/500] time 0.863 (0.893) data 0.000 (0.004) loss 1.4902 (1.1208) acc 65.6250 (71.8924) lr 1.8443e-03 eta 4:55:08 +epoch [11/50] batch [185/500] time 0.900 (0.893) data 0.000 (0.004) loss 1.7021 (1.1184) acc 71.8750 (72.0101) lr 1.8443e-03 eta 4:54:59 +epoch [11/50] batch [190/500] time 0.905 (0.894) data 0.000 (0.004) loss 1.7305 (1.1224) acc 68.7500 (71.9737) lr 1.8443e-03 eta 4:55:02 +epoch [11/50] batch [195/500] time 0.903 (0.893) data 0.000 (0.004) loss 0.8115 (1.1205) acc 68.7500 (72.0032) lr 1.8443e-03 eta 4:54:53 +epoch [11/50] batch [200/500] time 0.842 (0.893) data 0.000 (0.004) loss 1.6865 (1.1290) acc 65.6250 (71.9062) lr 1.8443e-03 eta 4:54:34 +epoch [11/50] batch [205/500] time 0.865 (0.892) data 0.000 (0.004) loss 0.7725 (1.1291) acc 75.0000 (71.8598) lr 1.8443e-03 eta 4:54:23 +epoch [11/50] batch [210/500] time 0.873 (0.892) data 0.000 (0.003) loss 1.0635 (1.1353) acc 75.0000 (71.7708) lr 1.8443e-03 eta 4:54:03 +epoch [11/50] batch [215/500] time 0.994 (0.892) data 0.000 (0.003) loss 0.9824 (1.1330) acc 71.8750 (71.7442) lr 1.8443e-03 eta 4:54:02 +epoch [11/50] batch [220/500] time 0.884 (0.891) data 0.000 (0.003) loss 1.0986 (1.1358) acc 68.7500 (71.6335) lr 1.8443e-03 eta 4:53:53 +epoch [11/50] batch [225/500] time 0.880 (0.891) data 0.000 (0.003) loss 1.3506 (1.1386) acc 62.5000 (71.5556) lr 1.8443e-03 eta 4:53:38 +epoch [11/50] batch [230/500] time 0.888 (0.891) data 0.000 (0.003) loss 1.1406 (1.1382) acc 71.8750 (71.5897) lr 1.8443e-03 eta 4:53:29 +epoch [11/50] batch [235/500] time 0.871 (0.891) data 0.000 (0.003) loss 0.8740 (1.1361) acc 75.0000 (71.6090) lr 1.8443e-03 eta 4:53:27 +epoch [11/50] batch [240/500] time 0.920 (0.891) data 0.000 (0.003) loss 1.1396 (1.1377) acc 71.8750 (71.5234) lr 1.8443e-03 eta 4:53:22 +epoch [11/50] batch [245/500] time 0.864 (0.891) data 0.000 (0.003) loss 1.1172 (1.1379) acc 59.3750 (71.5179) lr 1.8443e-03 eta 4:53:13 +epoch [11/50] batch [250/500] time 0.897 (0.891) data 0.000 (0.003) loss 0.9507 (1.1416) acc 75.0000 (71.4250) lr 1.8443e-03 eta 4:53:14 +epoch [11/50] batch [255/500] time 0.908 (0.891) data 0.000 (0.003) loss 0.7935 (1.1426) acc 78.1250 (71.4951) lr 1.8443e-03 eta 4:53:10 +epoch [11/50] batch [260/500] time 0.857 (0.891) data 0.000 (0.003) loss 0.7329 (1.1393) acc 78.1250 (71.5745) lr 1.8443e-03 eta 4:53:15 +epoch [11/50] batch [265/500] time 0.886 (0.891) data 0.000 (0.003) loss 0.8408 (1.1386) acc 84.3750 (71.6392) lr 1.8443e-03 eta 4:53:04 +epoch [11/50] batch [270/500] time 0.897 (0.891) data 0.000 (0.003) loss 1.0977 (1.1371) acc 75.0000 (71.7014) lr 1.8443e-03 eta 4:53:01 +epoch [11/50] batch [275/500] time 0.892 (0.891) data 0.000 (0.003) loss 0.9961 (1.1349) acc 65.6250 (71.6818) lr 1.8443e-03 eta 4:52:50 +epoch [11/50] batch [280/500] time 0.865 (0.890) data 0.000 (0.003) loss 0.9888 (1.1386) acc 68.7500 (71.5848) lr 1.8443e-03 eta 4:52:39 +epoch [11/50] batch [285/500] time 0.904 (0.890) data 0.000 (0.003) loss 1.4512 (1.1416) acc 71.8750 (71.5680) lr 1.8443e-03 eta 4:52:34 +epoch [11/50] batch [290/500] time 0.910 (0.891) data 0.000 (0.003) loss 0.8652 (1.1404) acc 81.2500 (71.5733) lr 1.8443e-03 eta 4:52:32 +epoch [11/50] batch [295/500] time 0.878 (0.890) data 0.000 (0.003) loss 1.6084 (1.1490) acc 59.3750 (71.3877) lr 1.8443e-03 eta 4:52:23 +epoch [11/50] batch [300/500] time 0.875 (0.890) data 0.000 (0.002) loss 1.0693 (1.1489) acc 71.8750 (71.4167) lr 1.8443e-03 eta 4:52:17 +epoch [11/50] batch [305/500] time 0.890 (0.890) data 0.000 (0.002) loss 1.3760 (1.1496) acc 65.6250 (71.4037) lr 1.8443e-03 eta 4:52:14 +epoch [11/50] batch [310/500] time 0.906 (0.890) data 0.000 (0.002) loss 1.1240 (1.1504) acc 71.8750 (71.4113) lr 1.8443e-03 eta 4:52:07 +epoch [11/50] batch [315/500] time 0.846 (0.890) data 0.000 (0.002) loss 0.7207 (1.1482) acc 75.0000 (71.3790) lr 1.8443e-03 eta 4:51:57 +epoch [11/50] batch [320/500] time 0.883 (0.890) data 0.000 (0.002) loss 1.3193 (1.1424) acc 62.5000 (71.5625) lr 1.8443e-03 eta 4:51:49 +epoch [11/50] batch [325/500] time 0.867 (0.890) data 0.000 (0.002) loss 0.8687 (1.1417) acc 81.2500 (71.5769) lr 1.8443e-03 eta 4:51:42 +epoch [11/50] batch [330/500] time 0.862 (0.890) data 0.000 (0.002) loss 1.1816 (1.1409) acc 75.0000 (71.5909) lr 1.8443e-03 eta 4:51:36 +epoch [11/50] batch [335/500] time 0.896 (0.889) data 0.000 (0.002) loss 1.2783 (1.1426) acc 68.7500 (71.5485) lr 1.8443e-03 eta 4:51:28 +epoch [11/50] batch [340/500] time 0.875 (0.889) data 0.000 (0.002) loss 0.7593 (1.1424) acc 71.8750 (71.4430) lr 1.8443e-03 eta 4:51:21 +epoch [11/50] batch [345/500] time 0.881 (0.889) data 0.000 (0.002) loss 1.4912 (1.1401) acc 62.5000 (71.5036) lr 1.8443e-03 eta 4:51:15 +epoch [11/50] batch [350/500] time 0.897 (0.889) data 0.000 (0.002) loss 1.3076 (1.1417) acc 71.8750 (71.4464) lr 1.8443e-03 eta 4:51:13 +epoch [11/50] batch [355/500] time 0.885 (0.889) data 0.000 (0.002) loss 1.3760 (1.1438) acc 68.7500 (71.3996) lr 1.8443e-03 eta 4:51:07 +epoch [11/50] batch [360/500] time 0.854 (0.889) data 0.000 (0.002) loss 1.0498 (1.1434) acc 78.1250 (71.4497) lr 1.8443e-03 eta 4:51:03 +epoch [11/50] batch [365/500] time 0.904 (0.889) data 0.000 (0.002) loss 1.3555 (1.1438) acc 59.3750 (71.4127) lr 1.8443e-03 eta 4:51:01 +epoch [11/50] batch [370/500] time 0.905 (0.889) data 0.000 (0.002) loss 0.7417 (1.1443) acc 90.6250 (71.4696) lr 1.8443e-03 eta 4:50:58 +epoch [11/50] batch [375/500] time 0.884 (0.889) data 0.000 (0.002) loss 1.3682 (1.1441) acc 59.3750 (71.4417) lr 1.8443e-03 eta 4:50:48 +epoch [11/50] batch [380/500] time 0.904 (0.889) data 0.000 (0.002) loss 0.7500 (1.1477) acc 84.3750 (71.3898) lr 1.8443e-03 eta 4:50:45 +epoch [11/50] batch [385/500] time 0.899 (0.889) data 0.000 (0.002) loss 1.4268 (1.1481) acc 68.7500 (71.3636) lr 1.8443e-03 eta 4:50:37 +epoch [11/50] batch [390/500] time 0.889 (0.889) data 0.000 (0.002) loss 1.1211 (1.1490) acc 56.2500 (71.2660) lr 1.8443e-03 eta 4:50:35 +epoch [11/50] batch [395/500] time 0.857 (0.889) data 0.000 (0.002) loss 1.5527 (1.1495) acc 68.7500 (71.2263) lr 1.8443e-03 eta 4:50:29 +epoch [11/50] batch [400/500] time 0.878 (0.889) data 0.000 (0.002) loss 1.2812 (1.1517) acc 62.5000 (71.1406) lr 1.8443e-03 eta 4:50:21 +epoch [11/50] batch [405/500] time 0.878 (0.889) data 0.000 (0.002) loss 1.5469 (1.1532) acc 68.7500 (71.1497) lr 1.8443e-03 eta 4:50:17 +epoch [11/50] batch [410/500] time 0.892 (0.889) data 0.000 (0.002) loss 1.0273 (1.1525) acc 81.2500 (71.1585) lr 1.8443e-03 eta 4:50:10 +epoch [11/50] batch [415/500] time 0.859 (0.889) data 0.000 (0.002) loss 1.2617 (1.1524) acc 71.8750 (71.1898) lr 1.8443e-03 eta 4:50:04 +epoch [11/50] batch [420/500] time 0.892 (0.889) data 0.000 (0.002) loss 1.5762 (1.1535) acc 65.6250 (71.1830) lr 1.8443e-03 eta 4:50:00 +epoch [11/50] batch [425/500] time 0.865 (0.889) data 0.000 (0.002) loss 1.0898 (1.1511) acc 62.5000 (71.2132) lr 1.8443e-03 eta 4:49:53 +epoch [11/50] batch [430/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.0293 (1.1469) acc 75.0000 (71.3808) lr 1.8443e-03 eta 4:49:47 +epoch [11/50] batch [435/500] time 0.893 (0.889) data 0.000 (0.002) loss 1.2617 (1.1475) acc 78.1250 (71.4655) lr 1.8443e-03 eta 4:49:43 +epoch [11/50] batch [440/500] time 0.895 (0.889) data 0.000 (0.002) loss 0.9131 (1.1470) acc 78.1250 (71.5128) lr 1.8443e-03 eta 4:49:42 +epoch [11/50] batch [445/500] time 0.889 (0.889) data 0.000 (0.002) loss 1.6318 (1.1478) acc 59.3750 (71.4958) lr 1.8443e-03 eta 4:49:39 +epoch [11/50] batch [450/500] time 0.869 (0.889) data 0.000 (0.002) loss 1.6035 (1.1491) acc 53.1250 (71.4583) lr 1.8443e-03 eta 4:49:36 +epoch [11/50] batch [455/500] time 0.877 (0.889) data 0.000 (0.002) loss 1.3770 (1.1515) acc 59.3750 (71.3324) lr 1.8443e-03 eta 4:49:28 +epoch [11/50] batch [460/500] time 0.858 (0.888) data 0.000 (0.002) loss 0.9438 (1.1491) acc 78.1250 (71.3519) lr 1.8443e-03 eta 4:49:19 +epoch [11/50] batch [465/500] time 0.898 (0.888) data 0.000 (0.002) loss 1.3184 (1.1515) acc 65.6250 (71.3508) lr 1.8443e-03 eta 4:49:15 +epoch [11/50] batch [470/500] time 0.884 (0.889) data 0.000 (0.002) loss 1.3730 (1.1526) acc 65.6250 (71.3298) lr 1.8443e-03 eta 4:49:12 +epoch [11/50] batch [475/500] time 0.878 (0.888) data 0.000 (0.002) loss 1.0020 (1.1549) acc 65.6250 (71.2368) lr 1.8443e-03 eta 4:49:07 +epoch [11/50] batch [480/500] time 0.884 (0.888) data 0.000 (0.002) loss 0.7476 (1.1538) acc 81.2500 (71.2174) lr 1.8443e-03 eta 4:49:00 +epoch [11/50] batch [485/500] time 0.901 (0.888) data 0.000 (0.002) loss 1.2197 (1.1540) acc 68.7500 (71.1856) lr 1.8443e-03 eta 4:48:54 +epoch [11/50] batch [490/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.0020 (1.1526) acc 78.1250 (71.1990) lr 1.8443e-03 eta 4:48:51 +epoch [11/50] batch [495/500] time 0.905 (0.888) data 0.000 (0.002) loss 1.6660 (1.1548) acc 59.3750 (71.1553) lr 1.8443e-03 eta 4:48:50 +epoch [11/50] batch [500/500] time 0.896 (0.889) data 0.000 (0.002) loss 1.2354 (1.1557) acc 65.6250 (71.1188) lr 1.8090e-03 eta 4:48:46 +epoch [12/50] batch [5/500] time 0.875 (1.055) data 0.000 (0.126) loss 1.0508 (1.0737) acc 71.8750 (71.2500) lr 1.8090e-03 eta 5:42:53 +epoch [12/50] batch [10/500] time 0.889 (0.975) data 0.000 (0.063) loss 1.0029 (0.9975) acc 71.8750 (71.8750) lr 1.8090e-03 eta 5:16:37 +epoch [12/50] batch [15/500] time 0.924 (0.948) data 0.000 (0.042) loss 1.4277 (1.1227) acc 59.3750 (68.1250) lr 1.8090e-03 eta 5:07:51 +epoch [12/50] batch [20/500] time 0.856 (0.938) data 0.000 (0.032) loss 1.0898 (1.1038) acc 75.0000 (69.6875) lr 1.8090e-03 eta 5:04:24 +epoch [12/50] batch [25/500] time 0.925 (0.932) data 0.000 (0.025) loss 1.0635 (1.1702) acc 84.3750 (69.3750) lr 1.8090e-03 eta 5:02:32 +epoch [12/50] batch [30/500] time 0.928 (0.926) data 0.000 (0.021) loss 0.8916 (1.1539) acc 68.7500 (70.1042) lr 1.8090e-03 eta 5:00:28 +epoch [12/50] batch [35/500] time 0.862 (0.919) data 0.000 (0.018) loss 2.1426 (1.2024) acc 50.0000 (69.1964) lr 1.8090e-03 eta 4:58:09 +epoch [12/50] batch [40/500] time 0.887 (0.916) data 0.000 (0.016) loss 1.5107 (1.1894) acc 65.6250 (69.2969) lr 1.8090e-03 eta 4:57:05 +epoch [12/50] batch [45/500] time 0.885 (0.913) data 0.000 (0.014) loss 0.6392 (1.1752) acc 87.5000 (69.2361) lr 1.8090e-03 eta 4:55:57 +epoch [12/50] batch [50/500] time 0.864 (0.909) data 0.000 (0.013) loss 1.0156 (1.1587) acc 78.1250 (69.4375) lr 1.8090e-03 eta 4:54:37 +epoch [12/50] batch [55/500] time 0.848 (0.907) data 0.000 (0.012) loss 0.9463 (1.1696) acc 75.0000 (69.5455) lr 1.8090e-03 eta 4:53:52 +epoch [12/50] batch [60/500] time 0.881 (0.904) data 0.000 (0.011) loss 0.8130 (1.1505) acc 84.3750 (70.0000) lr 1.8090e-03 eta 4:52:49 +epoch [12/50] batch [65/500] time 0.835 (0.902) data 0.000 (0.010) loss 1.6523 (1.1687) acc 68.7500 (69.7596) lr 1.8090e-03 eta 4:52:19 +epoch [12/50] batch [70/500] time 0.882 (0.901) data 0.000 (0.009) loss 1.1689 (1.1821) acc 71.8750 (69.5089) lr 1.8090e-03 eta 4:51:39 +epoch [12/50] batch [75/500] time 0.880 (0.900) data 0.000 (0.009) loss 0.8545 (1.1729) acc 78.1250 (69.5833) lr 1.8090e-03 eta 4:51:13 +epoch [12/50] batch [80/500] time 0.879 (0.897) data 0.000 (0.008) loss 0.6987 (1.1638) acc 78.1250 (69.8047) lr 1.8090e-03 eta 4:50:19 +epoch [12/50] batch [85/500] time 0.886 (0.895) data 0.000 (0.008) loss 1.0186 (1.1535) acc 75.0000 (70.1103) lr 1.8090e-03 eta 4:49:41 +epoch [12/50] batch [90/500] time 0.874 (0.894) data 0.000 (0.007) loss 0.7944 (1.1438) acc 71.8750 (70.3819) lr 1.8090e-03 eta 4:49:19 +epoch [12/50] batch [95/500] time 0.889 (0.895) data 0.000 (0.007) loss 1.2812 (1.1524) acc 59.3750 (70.1645) lr 1.8090e-03 eta 4:49:23 +epoch [12/50] batch [100/500] time 0.901 (0.895) data 0.000 (0.007) loss 0.9507 (1.1495) acc 75.0000 (70.3125) lr 1.8090e-03 eta 4:49:16 +epoch [12/50] batch [105/500] time 1.003 (0.895) data 0.000 (0.006) loss 1.5537 (1.1601) acc 56.2500 (70.2679) lr 1.8090e-03 eta 4:49:14 +epoch [12/50] batch [110/500] time 0.895 (0.895) data 0.000 (0.006) loss 1.2891 (1.1614) acc 68.7500 (70.1989) lr 1.8090e-03 eta 4:49:09 +epoch [12/50] batch [115/500] time 0.872 (0.894) data 0.000 (0.006) loss 0.7178 (1.1509) acc 81.2500 (70.4348) lr 1.8090e-03 eta 4:48:52 +epoch [12/50] batch [120/500] time 0.897 (0.894) data 0.000 (0.005) loss 1.0820 (1.1575) acc 71.8750 (70.2865) lr 1.8090e-03 eta 4:48:43 +epoch [12/50] batch [125/500] time 0.847 (0.893) data 0.000 (0.005) loss 0.4094 (1.1477) acc 93.7500 (70.6250) lr 1.8090e-03 eta 4:48:22 +epoch [12/50] batch [130/500] time 0.895 (0.893) data 0.000 (0.005) loss 1.0527 (1.1415) acc 71.8750 (70.8173) lr 1.8090e-03 eta 4:48:09 +epoch [12/50] batch [135/500] time 0.923 (0.893) data 0.000 (0.005) loss 1.2451 (1.1431) acc 68.7500 (70.7176) lr 1.8090e-03 eta 4:48:11 +epoch [12/50] batch [140/500] time 0.888 (0.893) data 0.000 (0.005) loss 0.8125 (1.1445) acc 84.3750 (70.7143) lr 1.8090e-03 eta 4:48:04 +epoch [12/50] batch [145/500] time 0.889 (0.893) data 0.000 (0.005) loss 1.3594 (1.1478) acc 59.3750 (70.6034) lr 1.8090e-03 eta 4:47:56 +epoch [12/50] batch [150/500] time 0.888 (0.892) data 0.000 (0.004) loss 1.4375 (1.1485) acc 78.1250 (70.7917) lr 1.8090e-03 eta 4:47:43 +epoch [12/50] batch [155/500] time 0.865 (0.892) data 0.000 (0.004) loss 1.0215 (1.1502) acc 68.7500 (70.7460) lr 1.8090e-03 eta 4:47:31 +epoch [12/50] batch [160/500] time 0.907 (0.892) data 0.000 (0.004) loss 0.8784 (1.1462) acc 78.1250 (70.7812) lr 1.8090e-03 eta 4:47:22 +epoch [12/50] batch [165/500] time 0.888 (0.892) data 0.000 (0.004) loss 1.2354 (1.1497) acc 68.7500 (70.6818) lr 1.8090e-03 eta 4:47:21 +epoch [12/50] batch [170/500] time 0.859 (0.891) data 0.000 (0.004) loss 0.8374 (1.1483) acc 81.2500 (70.7904) lr 1.8090e-03 eta 4:47:10 +epoch [12/50] batch [175/500] time 0.860 (0.891) data 0.000 (0.004) loss 1.1006 (1.1492) acc 75.0000 (70.8929) lr 1.8090e-03 eta 4:47:00 +epoch [12/50] batch [180/500] time 0.914 (0.891) data 0.000 (0.004) loss 1.0693 (1.1487) acc 68.7500 (70.8854) lr 1.8090e-03 eta 4:46:52 +epoch [12/50] batch [185/500] time 0.900 (0.891) data 0.000 (0.004) loss 1.1211 (1.1443) acc 71.8750 (71.0642) lr 1.8090e-03 eta 4:46:46 +epoch [12/50] batch [190/500] time 0.858 (0.890) data 0.000 (0.004) loss 1.4277 (1.1447) acc 65.6250 (71.0362) lr 1.8090e-03 eta 4:46:33 +epoch [12/50] batch [195/500] time 0.853 (0.890) data 0.000 (0.003) loss 0.8110 (1.1446) acc 71.8750 (70.9455) lr 1.8090e-03 eta 4:46:23 +epoch [12/50] batch [200/500] time 0.901 (0.890) data 0.000 (0.003) loss 0.6626 (1.1416) acc 84.3750 (71.0469) lr 1.8090e-03 eta 4:46:23 +epoch [12/50] batch [205/500] time 0.900 (0.891) data 0.000 (0.003) loss 1.1699 (1.1487) acc 68.7500 (70.9604) lr 1.8090e-03 eta 4:46:29 +epoch [12/50] batch [210/500] time 0.899 (0.890) data 0.000 (0.003) loss 1.0049 (1.1459) acc 78.1250 (71.0119) lr 1.8090e-03 eta 4:46:17 +epoch [12/50] batch [215/500] time 0.885 (0.890) data 0.000 (0.003) loss 0.7852 (1.1443) acc 78.1250 (71.0610) lr 1.8090e-03 eta 4:46:13 +epoch [12/50] batch [220/500] time 0.876 (0.890) data 0.000 (0.003) loss 1.1934 (1.1446) acc 71.8750 (71.0795) lr 1.8090e-03 eta 4:46:02 +epoch [12/50] batch [225/500] time 0.900 (0.890) data 0.000 (0.003) loss 0.7925 (1.1413) acc 71.8750 (71.2222) lr 1.8090e-03 eta 4:45:57 +epoch [12/50] batch [230/500] time 0.895 (0.890) data 0.000 (0.003) loss 0.7988 (1.1428) acc 84.3750 (71.2228) lr 1.8090e-03 eta 4:45:52 +epoch [12/50] batch [235/500] time 0.889 (0.890) data 0.000 (0.003) loss 1.4102 (1.1455) acc 65.6250 (70.9840) lr 1.8090e-03 eta 4:45:46 +epoch [12/50] batch [240/500] time 0.898 (0.890) data 0.000 (0.003) loss 1.3408 (1.1515) acc 65.6250 (70.8854) lr 1.8090e-03 eta 4:45:40 +epoch [12/50] batch [245/500] time 0.866 (0.890) data 0.000 (0.003) loss 0.7324 (1.1565) acc 81.2500 (70.7653) lr 1.8090e-03 eta 4:45:30 +epoch [12/50] batch [250/500] time 0.893 (0.890) data 0.000 (0.003) loss 0.6855 (1.1540) acc 84.3750 (70.8625) lr 1.8090e-03 eta 4:45:33 +epoch [12/50] batch [255/500] time 0.914 (0.890) data 0.000 (0.003) loss 0.6675 (1.1534) acc 78.1250 (70.9069) lr 1.8090e-03 eta 4:45:26 +epoch [12/50] batch [260/500] time 0.902 (0.890) data 0.000 (0.003) loss 0.9409 (1.1518) acc 75.0000 (70.9856) lr 1.8090e-03 eta 4:45:18 +epoch [12/50] batch [265/500] time 0.878 (0.890) data 0.000 (0.003) loss 1.6318 (1.1542) acc 59.3750 (70.9552) lr 1.8090e-03 eta 4:45:09 +epoch [12/50] batch [270/500] time 0.900 (0.889) data 0.000 (0.003) loss 0.7822 (1.1529) acc 71.8750 (70.9954) lr 1.8090e-03 eta 4:45:04 +epoch [12/50] batch [275/500] time 0.869 (0.890) data 0.000 (0.003) loss 1.2070 (1.1542) acc 65.6250 (70.9773) lr 1.8090e-03 eta 4:45:01 +epoch [12/50] batch [280/500] time 0.883 (0.889) data 0.000 (0.002) loss 0.9102 (1.1506) acc 84.3750 (71.0603) lr 1.8090e-03 eta 4:44:51 +epoch [12/50] batch [285/500] time 0.885 (0.889) data 0.000 (0.002) loss 1.5391 (1.1544) acc 71.8750 (71.0197) lr 1.8090e-03 eta 4:44:44 +epoch [12/50] batch [290/500] time 0.898 (0.889) data 0.000 (0.002) loss 0.9375 (1.1515) acc 75.0000 (71.1315) lr 1.8090e-03 eta 4:44:41 +epoch [12/50] batch [295/500] time 0.878 (0.889) data 0.000 (0.002) loss 0.6123 (1.1513) acc 84.3750 (71.0699) lr 1.8090e-03 eta 4:44:32 +epoch [12/50] batch [300/500] time 0.918 (0.889) data 0.000 (0.002) loss 1.0381 (1.1512) acc 71.8750 (71.0729) lr 1.8090e-03 eta 4:44:32 +epoch [12/50] batch [305/500] time 0.909 (0.889) data 0.000 (0.002) loss 1.0186 (1.1487) acc 75.0000 (71.1475) lr 1.8090e-03 eta 4:44:30 +epoch [12/50] batch [310/500] time 0.860 (0.889) data 0.000 (0.002) loss 1.1152 (1.1507) acc 75.0000 (71.0685) lr 1.8090e-03 eta 4:44:22 +epoch [12/50] batch [315/500] time 0.896 (0.889) data 0.000 (0.002) loss 0.4978 (1.1506) acc 84.3750 (71.1111) lr 1.8090e-03 eta 4:44:21 +epoch [12/50] batch [320/500] time 0.881 (0.889) data 0.000 (0.002) loss 1.0801 (1.1495) acc 65.6250 (71.0938) lr 1.8090e-03 eta 4:44:15 +epoch [12/50] batch [325/500] time 0.911 (0.889) data 0.000 (0.002) loss 1.3096 (1.1438) acc 62.5000 (71.1635) lr 1.8090e-03 eta 4:44:10 +epoch [12/50] batch [330/500] time 0.896 (0.889) data 0.000 (0.002) loss 1.3740 (1.1429) acc 68.7500 (71.1837) lr 1.8090e-03 eta 4:44:07 +epoch [12/50] batch [335/500] time 0.896 (0.889) data 0.000 (0.002) loss 0.7437 (1.1441) acc 78.1250 (71.1381) lr 1.8090e-03 eta 4:44:03 +epoch [12/50] batch [340/500] time 0.906 (0.889) data 0.000 (0.002) loss 0.8257 (1.1424) acc 81.2500 (71.1949) lr 1.8090e-03 eta 4:44:02 +epoch [12/50] batch [345/500] time 0.877 (0.889) data 0.000 (0.002) loss 1.2236 (1.1416) acc 68.7500 (71.2047) lr 1.8090e-03 eta 4:43:55 +epoch [12/50] batch [350/500] time 0.894 (0.889) data 0.000 (0.002) loss 1.0498 (1.1405) acc 75.0000 (71.1964) lr 1.8090e-03 eta 4:43:53 +epoch [12/50] batch [355/500] time 0.871 (0.889) data 0.000 (0.002) loss 1.5195 (1.1395) acc 65.6250 (71.1796) lr 1.8090e-03 eta 4:43:48 +epoch [12/50] batch [360/500] time 0.883 (0.889) data 0.000 (0.002) loss 0.6865 (1.1381) acc 87.5000 (71.1979) lr 1.8090e-03 eta 4:43:44 +epoch [12/50] batch [365/500] time 0.880 (0.890) data 0.000 (0.002) loss 1.1904 (1.1417) acc 75.0000 (71.0788) lr 1.8090e-03 eta 4:43:43 +epoch [12/50] batch [370/500] time 0.879 (0.890) data 0.000 (0.002) loss 0.8584 (1.1409) acc 84.3750 (71.1233) lr 1.8090e-03 eta 4:43:40 +epoch [12/50] batch [375/500] time 0.882 (0.890) data 0.000 (0.002) loss 0.8193 (1.1404) acc 71.8750 (71.0917) lr 1.8090e-03 eta 4:43:38 +epoch [12/50] batch [380/500] time 0.893 (0.890) data 0.000 (0.002) loss 0.7852 (1.1442) acc 84.3750 (71.0280) lr 1.8090e-03 eta 4:43:34 +epoch [12/50] batch [385/500] time 0.871 (0.890) data 0.000 (0.002) loss 0.8462 (1.1422) acc 78.1250 (71.0795) lr 1.8090e-03 eta 4:43:28 +epoch [12/50] batch [390/500] time 0.869 (0.890) data 0.000 (0.002) loss 0.9863 (1.1409) acc 68.7500 (71.0897) lr 1.8090e-03 eta 4:43:23 +epoch [12/50] batch [395/500] time 0.891 (0.890) data 0.000 (0.002) loss 1.3828 (1.1453) acc 53.1250 (70.9177) lr 1.8090e-03 eta 4:43:25 +epoch [12/50] batch [400/500] time 0.906 (0.890) data 0.000 (0.002) loss 1.0801 (1.1420) acc 75.0000 (70.9922) lr 1.8090e-03 eta 4:43:20 +epoch [12/50] batch [405/500] time 0.875 (0.890) data 0.000 (0.002) loss 1.0098 (1.1407) acc 68.7500 (71.0031) lr 1.8090e-03 eta 4:43:12 +epoch [12/50] batch [410/500] time 0.881 (0.890) data 0.000 (0.002) loss 0.7676 (1.1400) acc 71.8750 (71.0290) lr 1.8090e-03 eta 4:43:06 +epoch [12/50] batch [415/500] time 0.891 (0.890) data 0.000 (0.002) loss 1.2139 (1.1426) acc 75.0000 (71.0241) lr 1.8090e-03 eta 4:43:02 +epoch [12/50] batch [420/500] time 0.883 (0.890) data 0.000 (0.002) loss 1.2588 (1.1444) acc 71.8750 (71.0268) lr 1.8090e-03 eta 4:42:57 +epoch [12/50] batch [425/500] time 0.895 (0.890) data 0.000 (0.002) loss 0.7715 (1.1418) acc 78.1250 (71.0735) lr 1.8090e-03 eta 4:42:51 +epoch [12/50] batch [430/500] time 0.883 (0.890) data 0.000 (0.002) loss 0.9966 (1.1396) acc 68.7500 (71.1192) lr 1.8090e-03 eta 4:42:45 +epoch [12/50] batch [435/500] time 0.888 (0.890) data 0.000 (0.002) loss 1.4697 (1.1402) acc 65.6250 (71.0848) lr 1.8090e-03 eta 4:42:39 +epoch [12/50] batch [440/500] time 0.865 (0.890) data 0.000 (0.002) loss 1.3379 (1.1414) acc 62.5000 (71.0511) lr 1.8090e-03 eta 4:42:34 +epoch [12/50] batch [445/500] time 0.893 (0.889) data 0.000 (0.002) loss 1.1221 (1.1411) acc 71.8750 (71.0253) lr 1.8090e-03 eta 4:42:28 +epoch [12/50] batch [450/500] time 0.881 (0.889) data 0.000 (0.002) loss 0.6768 (1.1390) acc 68.7500 (71.0208) lr 1.8090e-03 eta 4:42:24 +epoch [12/50] batch [455/500] time 0.878 (0.889) data 0.000 (0.002) loss 1.5068 (1.1403) acc 62.5000 (70.9890) lr 1.8090e-03 eta 4:42:19 +epoch [12/50] batch [460/500] time 0.895 (0.889) data 0.000 (0.002) loss 1.0557 (1.1379) acc 71.8750 (70.9986) lr 1.8090e-03 eta 4:42:14 +epoch [12/50] batch [465/500] time 0.860 (0.889) data 0.000 (0.002) loss 1.0010 (1.1404) acc 65.6250 (70.9610) lr 1.8090e-03 eta 4:42:08 +epoch [12/50] batch [470/500] time 0.887 (0.889) data 0.000 (0.002) loss 0.7148 (1.1419) acc 84.3750 (70.9707) lr 1.8090e-03 eta 4:42:01 +epoch [12/50] batch [475/500] time 0.878 (0.889) data 0.000 (0.002) loss 1.6914 (1.1429) acc 53.1250 (70.9211) lr 1.8090e-03 eta 4:41:57 +epoch [12/50] batch [480/500] time 0.897 (0.889) data 0.000 (0.002) loss 1.4482 (1.1433) acc 78.1250 (70.9375) lr 1.8090e-03 eta 4:41:51 +epoch [12/50] batch [485/500] time 0.920 (0.889) data 0.001 (0.002) loss 1.1963 (1.1442) acc 75.0000 (70.9343) lr 1.8090e-03 eta 4:41:49 +epoch [12/50] batch [490/500] time 0.983 (0.889) data 0.000 (0.002) loss 0.9233 (1.1424) acc 71.8750 (70.9439) lr 1.8090e-03 eta 4:41:46 +epoch [12/50] batch [495/500] time 0.861 (0.889) data 0.000 (0.001) loss 0.5801 (1.1431) acc 90.6250 (70.9785) lr 1.8090e-03 eta 4:41:37 +epoch [12/50] batch [500/500] time 0.891 (0.889) data 0.000 (0.001) loss 1.6865 (1.1459) acc 56.2500 (70.9125) lr 1.7705e-03 eta 4:41:35 +epoch [13/50] batch [5/500] time 0.877 (1.022) data 0.000 (0.128) loss 1.0625 (0.9570) acc 75.0000 (75.6250) lr 1.7705e-03 eta 5:23:37 +epoch [13/50] batch [10/500] time 0.885 (0.954) data 0.000 (0.064) loss 1.2334 (1.1533) acc 65.6250 (71.2500) lr 1.7705e-03 eta 5:01:55 +epoch [13/50] batch [15/500] time 0.857 (0.934) data 0.000 (0.043) loss 1.0469 (1.1953) acc 75.0000 (70.0000) lr 1.7705e-03 eta 4:55:27 +epoch [13/50] batch [20/500] time 0.908 (0.919) data 0.000 (0.032) loss 1.3721 (1.1722) acc 71.8750 (70.4688) lr 1.7705e-03 eta 4:50:47 +epoch [13/50] batch [25/500] time 0.867 (0.911) data 0.000 (0.026) loss 1.2334 (1.1618) acc 65.6250 (70.7500) lr 1.7705e-03 eta 4:48:03 +epoch [13/50] batch [30/500] time 0.903 (0.909) data 0.000 (0.021) loss 0.9575 (1.1367) acc 75.0000 (70.9375) lr 1.7705e-03 eta 4:47:29 +epoch [13/50] batch [35/500] time 0.908 (0.907) data 0.000 (0.018) loss 1.2852 (1.1384) acc 75.0000 (70.8929) lr 1.7705e-03 eta 4:46:40 +epoch [13/50] batch [40/500] time 0.895 (0.905) data 0.000 (0.016) loss 1.8643 (1.1520) acc 65.6250 (70.7812) lr 1.7705e-03 eta 4:46:03 +epoch [13/50] batch [45/500] time 0.878 (0.903) data 0.000 (0.014) loss 1.3301 (1.1447) acc 65.6250 (71.2500) lr 1.7705e-03 eta 4:45:18 +epoch [13/50] batch [50/500] time 0.918 (0.904) data 0.000 (0.013) loss 1.5049 (1.1816) acc 65.6250 (70.2500) lr 1.7705e-03 eta 4:45:35 +epoch [13/50] batch [55/500] time 0.883 (0.902) data 0.000 (0.012) loss 1.3057 (1.1762) acc 68.7500 (70.4545) lr 1.7705e-03 eta 4:44:51 +epoch [13/50] batch [60/500] time 0.870 (0.902) data 0.000 (0.011) loss 1.5645 (1.1815) acc 65.6250 (70.4167) lr 1.7705e-03 eta 4:44:38 +epoch [13/50] batch [65/500] time 0.877 (0.900) data 0.000 (0.010) loss 1.3350 (1.1738) acc 65.6250 (70.4808) lr 1.7705e-03 eta 4:43:55 +epoch [13/50] batch [70/500] time 0.864 (0.898) data 0.000 (0.009) loss 1.4873 (1.1866) acc 68.7500 (70.4911) lr 1.7705e-03 eta 4:43:27 +epoch [13/50] batch [75/500] time 0.860 (0.898) data 0.000 (0.009) loss 1.0049 (1.1930) acc 71.8750 (70.0417) lr 1.7705e-03 eta 4:43:13 +epoch [13/50] batch [80/500] time 0.906 (0.897) data 0.000 (0.008) loss 0.8428 (1.1901) acc 75.0000 (70.0000) lr 1.7705e-03 eta 4:42:58 +epoch [13/50] batch [85/500] time 0.869 (0.896) data 0.000 (0.008) loss 0.9116 (1.1878) acc 78.1250 (70.0735) lr 1.7705e-03 eta 4:42:32 +epoch [13/50] batch [90/500] time 1.019 (0.897) data 0.000 (0.007) loss 1.3652 (1.1896) acc 68.7500 (70.1389) lr 1.7705e-03 eta 4:42:37 +epoch [13/50] batch [95/500] time 0.897 (0.896) data 0.000 (0.007) loss 0.8267 (1.1848) acc 78.1250 (70.1645) lr 1.7705e-03 eta 4:42:23 +epoch [13/50] batch [100/500] time 0.904 (0.896) data 0.000 (0.007) loss 1.4639 (1.1836) acc 65.6250 (70.1875) lr 1.7705e-03 eta 4:42:21 +epoch [13/50] batch [105/500] time 0.878 (0.896) data 0.000 (0.006) loss 1.0527 (1.1944) acc 68.7500 (70.0893) lr 1.7705e-03 eta 4:42:12 +epoch [13/50] batch [110/500] time 0.888 (0.896) data 0.000 (0.006) loss 0.6289 (1.1873) acc 81.2500 (70.0000) lr 1.7705e-03 eta 4:42:00 +epoch [13/50] batch [115/500] time 0.855 (0.895) data 0.000 (0.006) loss 1.3604 (1.1901) acc 62.5000 (69.8370) lr 1.7705e-03 eta 4:41:40 +epoch [13/50] batch [120/500] time 0.888 (0.894) data 0.000 (0.006) loss 1.4307 (1.1993) acc 78.1250 (69.8177) lr 1.7705e-03 eta 4:41:20 +epoch [13/50] batch [125/500] time 0.895 (0.894) data 0.000 (0.005) loss 1.0283 (1.1988) acc 78.1250 (70.0750) lr 1.7705e-03 eta 4:41:06 +epoch [13/50] batch [130/500] time 0.872 (0.893) data 0.000 (0.005) loss 0.9824 (1.1919) acc 75.0000 (70.3125) lr 1.7705e-03 eta 4:40:47 +epoch [13/50] batch [135/500] time 0.859 (0.892) data 0.000 (0.005) loss 0.6094 (1.1891) acc 84.3750 (70.3704) lr 1.7705e-03 eta 4:40:23 +epoch [13/50] batch [140/500] time 0.884 (0.892) data 0.000 (0.005) loss 1.0127 (1.1917) acc 71.8750 (70.4241) lr 1.7705e-03 eta 4:40:18 +epoch [13/50] batch [145/500] time 0.863 (0.891) data 0.000 (0.005) loss 0.9824 (1.1854) acc 75.0000 (70.5388) lr 1.7705e-03 eta 4:40:04 +epoch [13/50] batch [150/500] time 0.889 (0.891) data 0.000 (0.004) loss 1.3486 (1.1828) acc 65.6250 (70.7500) lr 1.7705e-03 eta 4:40:01 +epoch [13/50] batch [155/500] time 0.911 (0.891) data 0.000 (0.004) loss 1.2441 (1.1814) acc 62.5000 (70.7258) lr 1.7705e-03 eta 4:39:48 +epoch [13/50] batch [160/500] time 0.872 (0.891) data 0.000 (0.004) loss 0.8267 (1.1828) acc 78.1250 (70.5664) lr 1.7705e-03 eta 4:39:41 +epoch [13/50] batch [165/500] time 0.923 (0.891) data 0.000 (0.004) loss 1.1592 (1.1876) acc 71.8750 (70.4924) lr 1.7705e-03 eta 4:39:41 +epoch [13/50] batch [170/500] time 0.885 (0.891) data 0.000 (0.004) loss 1.5742 (1.1956) acc 62.5000 (70.2941) lr 1.7705e-03 eta 4:39:33 +epoch [13/50] batch [175/500] time 0.904 (0.890) data 0.000 (0.004) loss 0.9316 (1.1943) acc 78.1250 (70.2857) lr 1.7705e-03 eta 4:39:23 +epoch [13/50] batch [180/500] time 0.872 (0.890) data 0.000 (0.004) loss 0.7051 (1.1855) acc 78.1250 (70.4861) lr 1.7705e-03 eta 4:39:08 +epoch [13/50] batch [185/500] time 0.895 (0.890) data 0.000 (0.004) loss 1.4082 (1.1839) acc 71.8750 (70.5405) lr 1.7705e-03 eta 4:39:00 +epoch [13/50] batch [190/500] time 0.883 (0.890) data 0.000 (0.004) loss 1.0957 (1.1873) acc 71.8750 (70.4770) lr 1.7705e-03 eta 4:39:00 +epoch [13/50] batch [195/500] time 0.878 (0.890) data 0.000 (0.003) loss 1.4043 (1.1941) acc 75.0000 (70.4327) lr 1.7705e-03 eta 4:38:52 +epoch [13/50] batch [200/500] time 0.893 (0.890) data 0.000 (0.003) loss 1.1162 (1.1978) acc 65.6250 (70.2031) lr 1.7705e-03 eta 4:38:45 +epoch [13/50] batch [205/500] time 0.881 (0.889) data 0.000 (0.003) loss 0.8477 (1.1985) acc 78.1250 (70.1524) lr 1.7705e-03 eta 4:38:33 +epoch [13/50] batch [210/500] time 0.872 (0.889) data 0.000 (0.003) loss 0.9595 (1.1986) acc 68.7500 (70.1190) lr 1.7705e-03 eta 4:38:29 +epoch [13/50] batch [215/500] time 0.904 (0.889) data 0.000 (0.003) loss 0.7891 (1.1942) acc 78.1250 (70.2180) lr 1.7705e-03 eta 4:38:26 +epoch [13/50] batch [220/500] time 0.889 (0.889) data 0.000 (0.003) loss 1.3828 (1.1922) acc 62.5000 (70.2415) lr 1.7705e-03 eta 4:38:21 +epoch [13/50] batch [225/500] time 0.898 (0.890) data 0.000 (0.003) loss 1.1406 (1.1927) acc 65.6250 (70.1528) lr 1.7705e-03 eta 4:38:20 +epoch [13/50] batch [230/500] time 0.915 (0.889) data 0.000 (0.003) loss 1.7744 (1.1975) acc 59.3750 (70.0815) lr 1.7705e-03 eta 4:38:12 +epoch [13/50] batch [235/500] time 0.908 (0.890) data 0.000 (0.003) loss 0.9531 (1.1957) acc 81.2500 (70.1596) lr 1.7705e-03 eta 4:38:13 +epoch [13/50] batch [240/500] time 0.875 (0.889) data 0.000 (0.003) loss 0.6738 (1.1902) acc 78.1250 (70.2734) lr 1.7705e-03 eta 4:38:05 +epoch [13/50] batch [245/500] time 0.869 (0.890) data 0.000 (0.003) loss 1.3262 (1.1904) acc 65.6250 (70.2806) lr 1.7705e-03 eta 4:38:02 +epoch [13/50] batch [250/500] time 0.860 (0.889) data 0.000 (0.003) loss 1.4082 (1.1925) acc 68.7500 (70.2875) lr 1.7705e-03 eta 4:37:57 +epoch [13/50] batch [255/500] time 0.866 (0.889) data 0.000 (0.003) loss 0.9497 (1.1924) acc 75.0000 (70.2451) lr 1.7705e-03 eta 4:37:51 +epoch [13/50] batch [260/500] time 0.876 (0.890) data 0.000 (0.003) loss 0.8540 (1.1874) acc 87.5000 (70.3966) lr 1.7705e-03 eta 4:37:49 +epoch [13/50] batch [265/500] time 0.884 (0.889) data 0.000 (0.003) loss 1.1172 (1.1857) acc 65.6250 (70.3302) lr 1.7705e-03 eta 4:37:39 +epoch [13/50] batch [270/500] time 0.874 (0.889) data 0.000 (0.003) loss 1.2334 (1.1847) acc 78.1250 (70.3935) lr 1.7705e-03 eta 4:37:29 +epoch [13/50] batch [275/500] time 0.888 (0.889) data 0.000 (0.003) loss 1.3652 (1.1843) acc 75.0000 (70.3750) lr 1.7705e-03 eta 4:37:22 +epoch [13/50] batch [280/500] time 0.872 (0.889) data 0.000 (0.002) loss 1.7490 (1.1847) acc 43.7500 (70.3125) lr 1.7705e-03 eta 4:37:18 +epoch [13/50] batch [285/500] time 0.857 (0.889) data 0.000 (0.002) loss 0.8062 (1.1860) acc 81.2500 (70.3399) lr 1.7705e-03 eta 4:37:11 +epoch [13/50] batch [290/500] time 0.886 (0.889) data 0.000 (0.002) loss 1.0410 (1.1854) acc 75.0000 (70.4203) lr 1.7705e-03 eta 4:37:10 +epoch [13/50] batch [295/500] time 0.909 (0.889) data 0.000 (0.002) loss 0.8359 (1.1856) acc 71.8750 (70.3496) lr 1.7705e-03 eta 4:37:06 +epoch [13/50] batch [300/500] time 0.890 (0.889) data 0.000 (0.002) loss 1.1152 (1.1834) acc 68.7500 (70.3438) lr 1.7705e-03 eta 4:37:02 +epoch [13/50] batch [305/500] time 0.893 (0.889) data 0.000 (0.002) loss 1.4473 (1.1840) acc 62.5000 (70.3381) lr 1.7705e-03 eta 4:36:55 +epoch [13/50] batch [310/500] time 0.868 (0.889) data 0.000 (0.002) loss 1.3887 (1.1867) acc 68.7500 (70.3226) lr 1.7705e-03 eta 4:36:48 +epoch [13/50] batch [315/500] time 0.881 (0.889) data 0.000 (0.002) loss 1.2920 (1.1858) acc 75.0000 (70.3968) lr 1.7705e-03 eta 4:36:42 +epoch [13/50] batch [320/500] time 0.868 (0.889) data 0.000 (0.002) loss 1.2529 (1.1807) acc 62.5000 (70.5176) lr 1.7705e-03 eta 4:36:39 +epoch [13/50] batch [325/500] time 0.850 (0.888) data 0.000 (0.002) loss 1.7256 (1.1820) acc 65.6250 (70.4808) lr 1.7705e-03 eta 4:36:30 +epoch [13/50] batch [330/500] time 0.870 (0.888) data 0.000 (0.002) loss 1.4043 (1.1809) acc 68.7500 (70.5114) lr 1.7705e-03 eta 4:36:23 +epoch [13/50] batch [335/500] time 0.877 (0.888) data 0.000 (0.002) loss 1.4160 (1.1797) acc 68.7500 (70.5970) lr 1.7705e-03 eta 4:36:21 +epoch [13/50] batch [340/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.3574 (1.1783) acc 71.8750 (70.6526) lr 1.7705e-03 eta 4:36:14 +epoch [13/50] batch [345/500] time 0.897 (0.888) data 0.000 (0.002) loss 1.1289 (1.1773) acc 71.8750 (70.6522) lr 1.7705e-03 eta 4:36:08 +epoch [13/50] batch [350/500] time 0.909 (0.888) data 0.000 (0.002) loss 1.2139 (1.1770) acc 62.5000 (70.6161) lr 1.7705e-03 eta 4:36:10 +epoch [13/50] batch [355/500] time 0.924 (0.889) data 0.000 (0.002) loss 1.4668 (1.1756) acc 56.2500 (70.6426) lr 1.7705e-03 eta 4:36:07 +epoch [13/50] batch [360/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.1123 (1.1738) acc 68.7500 (70.7031) lr 1.7705e-03 eta 4:36:00 +epoch [13/50] batch [365/500] time 0.923 (0.888) data 0.000 (0.002) loss 0.8916 (1.1768) acc 81.2500 (70.6592) lr 1.7705e-03 eta 4:35:54 +epoch [13/50] batch [370/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.3271 (1.1789) acc 65.6250 (70.6166) lr 1.7705e-03 eta 4:35:49 +epoch [13/50] batch [375/500] time 0.900 (0.888) data 0.000 (0.002) loss 1.2373 (1.1821) acc 62.5000 (70.5250) lr 1.7705e-03 eta 4:35:46 +epoch [13/50] batch [380/500] time 0.902 (0.889) data 0.000 (0.002) loss 1.2988 (1.1791) acc 65.6250 (70.5510) lr 1.7705e-03 eta 4:35:46 +epoch [13/50] batch [385/500] time 0.887 (0.889) data 0.000 (0.002) loss 0.8306 (1.1802) acc 78.1250 (70.4302) lr 1.7705e-03 eta 4:35:39 +epoch [13/50] batch [390/500] time 0.894 (0.888) data 0.000 (0.002) loss 0.6719 (1.1795) acc 78.1250 (70.4167) lr 1.7705e-03 eta 4:35:34 +epoch [13/50] batch [395/500] time 0.893 (0.888) data 0.000 (0.002) loss 0.8545 (1.1727) acc 81.2500 (70.5380) lr 1.7705e-03 eta 4:35:29 +epoch [13/50] batch [400/500] time 0.903 (0.888) data 0.000 (0.002) loss 0.6074 (1.1700) acc 81.2500 (70.5859) lr 1.7705e-03 eta 4:35:23 +epoch [13/50] batch [405/500] time 0.859 (0.888) data 0.000 (0.002) loss 1.1670 (1.1683) acc 78.1250 (70.6713) lr 1.7705e-03 eta 4:35:20 +epoch [13/50] batch [410/500] time 0.906 (0.888) data 0.000 (0.002) loss 0.9893 (1.1663) acc 56.2500 (70.6402) lr 1.7705e-03 eta 4:35:14 +epoch [13/50] batch [415/500] time 0.922 (0.888) data 0.000 (0.002) loss 1.0059 (1.1657) acc 75.0000 (70.6175) lr 1.7705e-03 eta 4:35:10 +epoch [13/50] batch [420/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.3076 (1.1657) acc 68.7500 (70.6176) lr 1.7705e-03 eta 4:35:01 +epoch [13/50] batch [425/500] time 0.878 (0.888) data 0.000 (0.002) loss 1.2295 (1.1684) acc 71.8750 (70.6103) lr 1.7705e-03 eta 4:34:55 +epoch [13/50] batch [430/500] time 0.886 (0.888) data 0.000 (0.002) loss 0.8701 (1.1656) acc 78.1250 (70.6613) lr 1.7705e-03 eta 4:34:48 +epoch [13/50] batch [435/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.4316 (1.1669) acc 75.0000 (70.6537) lr 1.7705e-03 eta 4:34:41 +epoch [13/50] batch [440/500] time 0.890 (0.888) data 0.000 (0.002) loss 1.0889 (1.1641) acc 78.1250 (70.7031) lr 1.7705e-03 eta 4:34:35 +epoch [13/50] batch [445/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.4092 (1.1648) acc 71.8750 (70.7163) lr 1.7705e-03 eta 4:34:29 +epoch [13/50] batch [450/500] time 0.861 (0.888) data 0.000 (0.002) loss 1.5166 (1.1642) acc 75.0000 (70.7778) lr 1.7705e-03 eta 4:34:25 +epoch [13/50] batch [455/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.4014 (1.1655) acc 65.6250 (70.7761) lr 1.7705e-03 eta 4:34:22 +epoch [13/50] batch [460/500] time 0.936 (0.888) data 0.000 (0.002) loss 1.2021 (1.1684) acc 78.1250 (70.7337) lr 1.7705e-03 eta 4:34:20 +epoch [13/50] batch [465/500] time 0.891 (0.888) data 0.000 (0.002) loss 1.0020 (1.1673) acc 68.7500 (70.7258) lr 1.7705e-03 eta 4:34:16 +epoch [13/50] batch [470/500] time 0.894 (0.888) data 0.000 (0.002) loss 0.6143 (1.1657) acc 84.3750 (70.7912) lr 1.7705e-03 eta 4:34:12 +epoch [13/50] batch [475/500] time 0.983 (0.888) data 0.000 (0.002) loss 1.3818 (1.1647) acc 62.5000 (70.7632) lr 1.7705e-03 eta 4:34:10 +epoch [13/50] batch [480/500] time 0.907 (0.888) data 0.000 (0.002) loss 1.0547 (1.1662) acc 78.1250 (70.7617) lr 1.7705e-03 eta 4:34:07 +epoch [13/50] batch [485/500] time 0.876 (0.888) data 0.000 (0.002) loss 1.0107 (1.1643) acc 71.8750 (70.7796) lr 1.7705e-03 eta 4:34:02 +epoch [13/50] batch [490/500] time 0.896 (0.888) data 0.000 (0.002) loss 0.9121 (1.1654) acc 78.1250 (70.7462) lr 1.7705e-03 eta 4:33:57 +epoch [13/50] batch [495/500] time 0.861 (0.888) data 0.000 (0.002) loss 1.0254 (1.1647) acc 71.8750 (70.7197) lr 1.7705e-03 eta 4:33:50 +epoch [13/50] batch [500/500] time 0.907 (0.888) data 0.000 (0.001) loss 1.4863 (1.1641) acc 62.5000 (70.7313) lr 1.7290e-03 eta 4:33:45 +epoch [14/50] batch [5/500] time 0.857 (1.002) data 0.000 (0.126) loss 0.6948 (1.0678) acc 75.0000 (66.8750) lr 1.7290e-03 eta 5:08:58 +epoch [14/50] batch [10/500] time 0.889 (0.941) data 0.000 (0.063) loss 0.8887 (1.1424) acc 75.0000 (69.6875) lr 1.7290e-03 eta 4:50:07 +epoch [14/50] batch [15/500] time 0.865 (0.925) data 0.000 (0.042) loss 1.2500 (1.1582) acc 68.7500 (70.4167) lr 1.7290e-03 eta 4:45:02 +epoch [14/50] batch [20/500] time 0.862 (0.912) data 0.000 (0.032) loss 1.5889 (1.1582) acc 59.3750 (70.3125) lr 1.7290e-03 eta 4:40:55 +epoch [14/50] batch [25/500] time 0.864 (0.910) data 0.000 (0.025) loss 1.0430 (1.1803) acc 65.6250 (70.2500) lr 1.7290e-03 eta 4:40:04 +epoch [14/50] batch [30/500] time 0.870 (0.908) data 0.000 (0.021) loss 1.1777 (1.1889) acc 71.8750 (70.2083) lr 1.7290e-03 eta 4:39:30 +epoch [14/50] batch [35/500] time 0.893 (0.905) data 0.000 (0.018) loss 0.8672 (1.1755) acc 71.8750 (70.4464) lr 1.7290e-03 eta 4:38:26 +epoch [14/50] batch [40/500] time 0.874 (0.901) data 0.000 (0.016) loss 1.1328 (1.1653) acc 75.0000 (70.9375) lr 1.7290e-03 eta 4:37:18 +epoch [14/50] batch [45/500] time 0.908 (0.902) data 0.000 (0.014) loss 1.1338 (1.1677) acc 62.5000 (70.4167) lr 1.7290e-03 eta 4:37:27 +epoch [14/50] batch [50/500] time 0.907 (0.901) data 0.000 (0.013) loss 1.0029 (1.1430) acc 78.1250 (70.6875) lr 1.7290e-03 eta 4:37:07 +epoch [14/50] batch [55/500] time 0.863 (0.899) data 0.000 (0.012) loss 1.5859 (1.1612) acc 68.7500 (70.6250) lr 1.7290e-03 eta 4:36:30 +epoch [14/50] batch [60/500] time 0.857 (0.898) data 0.000 (0.011) loss 1.8350 (1.1474) acc 62.5000 (70.9896) lr 1.7290e-03 eta 4:35:57 +epoch [14/50] batch [65/500] time 0.881 (0.897) data 0.000 (0.010) loss 1.0957 (1.1434) acc 68.7500 (71.1058) lr 1.7290e-03 eta 4:35:34 +epoch [14/50] batch [70/500] time 0.894 (0.898) data 0.000 (0.009) loss 1.2832 (1.1285) acc 75.0000 (71.5179) lr 1.7290e-03 eta 4:35:47 +epoch [14/50] batch [75/500] time 0.860 (0.897) data 0.000 (0.009) loss 1.2695 (1.1256) acc 68.7500 (71.5417) lr 1.7290e-03 eta 4:35:32 +epoch [14/50] batch [80/500] time 0.888 (0.897) data 0.000 (0.008) loss 1.2256 (1.1245) acc 81.2500 (71.6797) lr 1.7290e-03 eta 4:35:15 +epoch [14/50] batch [85/500] time 0.880 (0.897) data 0.000 (0.008) loss 1.0547 (1.1182) acc 71.8750 (71.8015) lr 1.7290e-03 eta 4:35:11 +epoch [14/50] batch [90/500] time 0.916 (0.897) data 0.000 (0.007) loss 1.2197 (1.1222) acc 71.8750 (71.8750) lr 1.7290e-03 eta 4:35:13 +epoch [14/50] batch [95/500] time 0.886 (0.897) data 0.000 (0.007) loss 1.4971 (1.1280) acc 56.2500 (71.7434) lr 1.7290e-03 eta 4:35:08 +epoch [14/50] batch [100/500] time 0.915 (0.896) data 0.000 (0.007) loss 1.8896 (1.1444) acc 40.6250 (71.4688) lr 1.7290e-03 eta 4:34:45 +epoch [14/50] batch [105/500] time 0.872 (0.896) data 0.000 (0.006) loss 0.7705 (1.1362) acc 78.1250 (71.6964) lr 1.7290e-03 eta 4:34:33 +epoch [14/50] batch [110/500] time 0.901 (0.895) data 0.000 (0.006) loss 0.9658 (1.1400) acc 78.1250 (71.5909) lr 1.7290e-03 eta 4:34:11 +epoch [14/50] batch [115/500] time 0.867 (0.894) data 0.000 (0.006) loss 1.1982 (1.1395) acc 78.1250 (71.6576) lr 1.7290e-03 eta 4:34:00 +epoch [14/50] batch [120/500] time 0.886 (0.894) data 0.000 (0.005) loss 1.1416 (1.1352) acc 65.6250 (71.6146) lr 1.7290e-03 eta 4:33:47 +epoch [14/50] batch [125/500] time 0.907 (0.894) data 0.000 (0.005) loss 1.0928 (1.1310) acc 71.8750 (71.6000) lr 1.7290e-03 eta 4:33:45 +epoch [14/50] batch [130/500] time 0.871 (0.893) data 0.000 (0.005) loss 0.8579 (1.1290) acc 71.8750 (71.5865) lr 1.7290e-03 eta 4:33:31 +epoch [14/50] batch [135/500] time 0.867 (0.893) data 0.000 (0.005) loss 1.4062 (1.1300) acc 75.0000 (71.7824) lr 1.7290e-03 eta 4:33:20 +epoch [14/50] batch [140/500] time 0.908 (0.893) data 0.000 (0.005) loss 0.6904 (1.1276) acc 81.2500 (71.8973) lr 1.7290e-03 eta 4:33:16 +epoch [14/50] batch [145/500] time 0.857 (0.893) data 0.000 (0.005) loss 1.2793 (1.1252) acc 75.0000 (72.1552) lr 1.7290e-03 eta 4:33:02 +epoch [14/50] batch [150/500] time 0.882 (0.892) data 0.000 (0.004) loss 1.2314 (1.1246) acc 71.8750 (72.1875) lr 1.7290e-03 eta 4:32:52 +epoch [14/50] batch [155/500] time 0.898 (0.892) data 0.000 (0.004) loss 1.0684 (1.1226) acc 71.8750 (72.2581) lr 1.7290e-03 eta 4:32:46 +epoch [14/50] batch [160/500] time 0.896 (0.892) data 0.000 (0.004) loss 0.9502 (1.1178) acc 75.0000 (72.2656) lr 1.7290e-03 eta 4:32:37 +epoch [14/50] batch [165/500] time 0.906 (0.892) data 0.000 (0.004) loss 1.5381 (1.1183) acc 68.7500 (72.3106) lr 1.7290e-03 eta 4:32:31 +epoch [14/50] batch [170/500] time 0.856 (0.892) data 0.000 (0.004) loss 1.3809 (1.1203) acc 68.7500 (72.2059) lr 1.7290e-03 eta 4:32:37 +epoch [14/50] batch [175/500] time 0.880 (0.892) data 0.000 (0.004) loss 0.9277 (1.1212) acc 81.2500 (72.1786) lr 1.7290e-03 eta 4:32:31 +epoch [14/50] batch [180/500] time 0.878 (0.892) data 0.000 (0.004) loss 1.6436 (1.1241) acc 75.0000 (72.2049) lr 1.7290e-03 eta 4:32:26 +epoch [14/50] batch [185/500] time 0.914 (0.893) data 0.000 (0.004) loss 1.1133 (1.1248) acc 65.6250 (72.0946) lr 1.7290e-03 eta 4:32:26 +epoch [14/50] batch [190/500] time 0.890 (0.892) data 0.000 (0.004) loss 0.9507 (1.1253) acc 75.0000 (72.1217) lr 1.7290e-03 eta 4:32:14 +epoch [14/50] batch [195/500] time 0.883 (0.892) data 0.000 (0.003) loss 1.4375 (1.1300) acc 65.6250 (72.0032) lr 1.7290e-03 eta 4:32:07 +epoch [14/50] batch [200/500] time 0.862 (0.891) data 0.000 (0.003) loss 0.8716 (1.1212) acc 71.8750 (72.2031) lr 1.7290e-03 eta 4:31:50 +epoch [14/50] batch [205/500] time 0.895 (0.891) data 0.000 (0.003) loss 1.6514 (1.1200) acc 65.6250 (72.2104) lr 1.7290e-03 eta 4:31:47 +epoch [14/50] batch [210/500] time 0.981 (0.892) data 0.000 (0.003) loss 1.5566 (1.1193) acc 65.6250 (72.1875) lr 1.7290e-03 eta 4:31:49 +epoch [14/50] batch [215/500] time 0.893 (0.892) data 0.000 (0.003) loss 1.2422 (1.1195) acc 75.0000 (72.2238) lr 1.7290e-03 eta 4:31:44 +epoch [14/50] batch [220/500] time 0.883 (0.891) data 0.000 (0.003) loss 1.0332 (1.1181) acc 78.1250 (72.1875) lr 1.7290e-03 eta 4:31:33 +epoch [14/50] batch [225/500] time 0.884 (0.891) data 0.000 (0.003) loss 1.4062 (1.1194) acc 71.8750 (72.1389) lr 1.7290e-03 eta 4:31:23 +epoch [14/50] batch [230/500] time 0.874 (0.891) data 0.000 (0.003) loss 1.1758 (1.1238) acc 65.6250 (71.9973) lr 1.7290e-03 eta 4:31:10 +epoch [14/50] batch [235/500] time 0.883 (0.891) data 0.000 (0.003) loss 0.7568 (1.1230) acc 71.8750 (71.9814) lr 1.7290e-03 eta 4:31:08 +epoch [14/50] batch [240/500] time 0.903 (0.891) data 0.000 (0.003) loss 1.4717 (1.1180) acc 59.3750 (72.0312) lr 1.7290e-03 eta 4:31:03 +epoch [14/50] batch [245/500] time 0.886 (0.890) data 0.000 (0.003) loss 0.8237 (1.1166) acc 71.8750 (72.0026) lr 1.7290e-03 eta 4:30:55 +epoch [14/50] batch [250/500] time 0.891 (0.890) data 0.000 (0.003) loss 1.4502 (1.1259) acc 59.3750 (71.8500) lr 1.7290e-03 eta 4:30:48 +epoch [14/50] batch [255/500] time 0.878 (0.890) data 0.000 (0.003) loss 1.2578 (1.1230) acc 65.6250 (71.8995) lr 1.7290e-03 eta 4:30:38 +epoch [14/50] batch [260/500] time 0.854 (0.890) data 0.000 (0.003) loss 1.5615 (1.1250) acc 68.7500 (71.8750) lr 1.7290e-03 eta 4:30:29 +epoch [14/50] batch [265/500] time 0.880 (0.890) data 0.000 (0.003) loss 1.4053 (1.1217) acc 56.2500 (71.8986) lr 1.7290e-03 eta 4:30:26 +epoch [14/50] batch [270/500] time 0.893 (0.890) data 0.000 (0.003) loss 0.7686 (1.1188) acc 78.1250 (71.9792) lr 1.7290e-03 eta 4:30:23 +epoch [14/50] batch [275/500] time 0.862 (0.890) data 0.000 (0.003) loss 1.3506 (1.1204) acc 71.8750 (71.8977) lr 1.7290e-03 eta 4:30:13 +epoch [14/50] batch [280/500] time 0.893 (0.890) data 0.000 (0.002) loss 1.1611 (1.1219) acc 68.7500 (71.8862) lr 1.7290e-03 eta 4:30:09 +epoch [14/50] batch [285/500] time 0.889 (0.890) data 0.000 (0.002) loss 1.2920 (1.1209) acc 75.0000 (71.8860) lr 1.7290e-03 eta 4:30:06 +epoch [14/50] batch [290/500] time 0.864 (0.889) data 0.000 (0.002) loss 1.3291 (1.1235) acc 62.5000 (71.7888) lr 1.7290e-03 eta 4:29:57 +epoch [14/50] batch [295/500] time 0.886 (0.890) data 0.000 (0.002) loss 1.5771 (1.1239) acc 68.7500 (71.7479) lr 1.7290e-03 eta 4:29:54 +epoch [14/50] batch [300/500] time 0.899 (0.889) data 0.000 (0.002) loss 1.3652 (1.1260) acc 62.5000 (71.6875) lr 1.7290e-03 eta 4:29:48 +epoch [14/50] batch [305/500] time 0.871 (0.889) data 0.000 (0.002) loss 1.2236 (1.1257) acc 65.6250 (71.6701) lr 1.7290e-03 eta 4:29:40 +epoch [14/50] batch [310/500] time 0.880 (0.889) data 0.001 (0.002) loss 0.8481 (1.1238) acc 87.5000 (71.7843) lr 1.7290e-03 eta 4:29:39 +epoch [14/50] batch [315/500] time 0.887 (0.889) data 0.000 (0.002) loss 1.2354 (1.1257) acc 68.7500 (71.8155) lr 1.7290e-03 eta 4:29:32 +epoch [14/50] batch [320/500] time 0.871 (0.889) data 0.000 (0.002) loss 1.1416 (1.1294) acc 71.8750 (71.7578) lr 1.7290e-03 eta 4:29:27 +epoch [14/50] batch [325/500] time 0.914 (0.889) data 0.000 (0.002) loss 1.1172 (1.1323) acc 65.6250 (71.6058) lr 1.7290e-03 eta 4:29:25 +epoch [14/50] batch [330/500] time 0.904 (0.889) data 0.000 (0.002) loss 1.4775 (1.1325) acc 62.5000 (71.6098) lr 1.7290e-03 eta 4:29:21 +epoch [14/50] batch [335/500] time 0.883 (0.889) data 0.000 (0.002) loss 1.0029 (1.1370) acc 75.0000 (71.4832) lr 1.7290e-03 eta 4:29:17 +epoch [14/50] batch [340/500] time 0.900 (0.889) data 0.000 (0.002) loss 1.1729 (1.1370) acc 68.7500 (71.4614) lr 1.7290e-03 eta 4:29:12 +epoch [14/50] batch [345/500] time 0.938 (0.890) data 0.000 (0.002) loss 1.1221 (1.1362) acc 71.8750 (71.4764) lr 1.7290e-03 eta 4:29:11 +epoch [14/50] batch [350/500] time 0.876 (0.889) data 0.000 (0.002) loss 0.9458 (1.1326) acc 78.1250 (71.5804) lr 1.7290e-03 eta 4:29:04 +epoch [14/50] batch [355/500] time 0.916 (0.890) data 0.000 (0.002) loss 1.5674 (1.1294) acc 56.2500 (71.6637) lr 1.7290e-03 eta 4:29:07 +epoch [14/50] batch [360/500] time 0.912 (0.890) data 0.000 (0.002) loss 0.6689 (1.1334) acc 81.2500 (71.5885) lr 1.7290e-03 eta 4:29:03 +epoch [14/50] batch [365/500] time 0.862 (0.890) data 0.000 (0.002) loss 1.6416 (1.1376) acc 56.2500 (71.5154) lr 1.7290e-03 eta 4:28:54 +epoch [14/50] batch [370/500] time 0.840 (0.889) data 0.000 (0.002) loss 1.0361 (1.1380) acc 71.8750 (71.5034) lr 1.7290e-03 eta 4:28:42 +epoch [14/50] batch [375/500] time 0.891 (0.889) data 0.000 (0.002) loss 1.7676 (1.1359) acc 59.3750 (71.5667) lr 1.7290e-03 eta 4:28:37 +epoch [14/50] batch [380/500] time 0.881 (0.889) data 0.000 (0.002) loss 1.0859 (1.1360) acc 65.6250 (71.5378) lr 1.7290e-03 eta 4:28:28 +epoch [14/50] batch [385/500] time 0.895 (0.889) data 0.000 (0.002) loss 1.2305 (1.1348) acc 78.1250 (71.5584) lr 1.7290e-03 eta 4:28:21 +epoch [14/50] batch [390/500] time 0.899 (0.889) data 0.000 (0.002) loss 1.1328 (1.1336) acc 68.7500 (71.5385) lr 1.7290e-03 eta 4:28:16 +epoch [14/50] batch [395/500] time 0.889 (0.889) data 0.000 (0.002) loss 0.7769 (1.1327) acc 84.3750 (71.5585) lr 1.7290e-03 eta 4:28:11 +epoch [14/50] batch [400/500] time 0.882 (0.889) data 0.000 (0.002) loss 1.5781 (1.1333) acc 62.5000 (71.5703) lr 1.7290e-03 eta 4:28:06 +epoch [14/50] batch [405/500] time 0.893 (0.889) data 0.000 (0.002) loss 0.9243 (1.1354) acc 68.7500 (71.5278) lr 1.7290e-03 eta 4:28:01 +epoch [14/50] batch [410/500] time 0.909 (0.889) data 0.000 (0.002) loss 0.7217 (1.1355) acc 81.2500 (71.5625) lr 1.7290e-03 eta 4:27:55 +epoch [14/50] batch [415/500] time 0.903 (0.889) data 0.000 (0.002) loss 1.4219 (1.1340) acc 65.6250 (71.6114) lr 1.7290e-03 eta 4:27:50 +epoch [14/50] batch [420/500] time 0.869 (0.888) data 0.000 (0.002) loss 0.6787 (1.1305) acc 65.6250 (71.6146) lr 1.7290e-03 eta 4:27:44 +epoch [14/50] batch [425/500] time 0.868 (0.888) data 0.000 (0.002) loss 0.9912 (1.1347) acc 62.5000 (71.5147) lr 1.7290e-03 eta 4:27:37 +epoch [14/50] batch [430/500] time 0.896 (0.888) data 0.000 (0.002) loss 0.5723 (1.1350) acc 81.2500 (71.4971) lr 1.7290e-03 eta 4:27:31 +epoch [14/50] batch [435/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.3145 (1.1340) acc 68.7500 (71.5517) lr 1.7290e-03 eta 4:27:27 +epoch [14/50] batch [440/500] time 0.878 (0.888) data 0.000 (0.002) loss 1.0020 (1.1332) acc 78.1250 (71.5909) lr 1.7290e-03 eta 4:27:21 +epoch [14/50] batch [445/500] time 0.867 (0.888) data 0.000 (0.002) loss 0.9692 (1.1344) acc 75.0000 (71.5730) lr 1.7290e-03 eta 4:27:13 +epoch [14/50] batch [450/500] time 0.854 (0.888) data 0.000 (0.002) loss 1.5869 (1.1352) acc 59.3750 (71.5417) lr 1.7290e-03 eta 4:27:06 +epoch [14/50] batch [455/500] time 0.900 (0.888) data 0.000 (0.002) loss 1.2803 (1.1349) acc 75.0000 (71.5453) lr 1.7290e-03 eta 4:27:03 +epoch [14/50] batch [460/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.3223 (1.1351) acc 65.6250 (71.5353) lr 1.7290e-03 eta 4:26:55 +epoch [14/50] batch [465/500] time 0.882 (0.888) data 0.000 (0.002) loss 1.3115 (1.1382) acc 68.7500 (71.4516) lr 1.7290e-03 eta 4:26:52 +epoch [14/50] batch [470/500] time 0.908 (0.888) data 0.000 (0.002) loss 1.3418 (1.1394) acc 81.2500 (71.4229) lr 1.7290e-03 eta 4:26:47 +epoch [14/50] batch [475/500] time 0.850 (0.888) data 0.000 (0.002) loss 1.0938 (1.1376) acc 75.0000 (71.4605) lr 1.7290e-03 eta 4:26:42 +epoch [14/50] batch [480/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.2461 (1.1377) acc 71.8750 (71.4714) lr 1.7290e-03 eta 4:26:33 +epoch [14/50] batch [485/500] time 0.883 (0.887) data 0.001 (0.002) loss 1.3984 (1.1372) acc 68.7500 (71.4884) lr 1.7290e-03 eta 4:26:28 +epoch [14/50] batch [490/500] time 0.866 (0.887) data 0.000 (0.002) loss 0.9321 (1.1348) acc 75.0000 (71.5370) lr 1.7290e-03 eta 4:26:20 +epoch [14/50] batch [495/500] time 0.884 (0.887) data 0.000 (0.002) loss 0.9683 (1.1344) acc 71.8750 (71.5657) lr 1.7290e-03 eta 4:26:15 +epoch [14/50] batch [500/500] time 0.867 (0.887) data 0.000 (0.001) loss 1.2529 (1.1369) acc 71.8750 (71.4813) lr 1.6845e-03 eta 4:26:11 +epoch [15/50] batch [5/500] time 0.894 (1.023) data 0.000 (0.130) loss 1.0010 (0.9625) acc 81.2500 (76.8750) lr 1.6845e-03 eta 5:06:43 +epoch [15/50] batch [10/500] time 0.913 (0.953) data 0.000 (0.065) loss 0.5049 (0.9946) acc 87.5000 (75.6250) lr 1.6845e-03 eta 4:45:35 +epoch [15/50] batch [15/500] time 0.885 (0.928) data 0.000 (0.043) loss 0.7339 (0.9560) acc 81.2500 (75.6250) lr 1.6845e-03 eta 4:38:18 +epoch [15/50] batch [20/500] time 0.882 (0.919) data 0.000 (0.033) loss 1.2793 (0.9946) acc 65.6250 (75.1562) lr 1.6845e-03 eta 4:35:25 +epoch [15/50] batch [25/500] time 0.866 (0.909) data 0.000 (0.026) loss 1.1670 (1.0085) acc 62.5000 (73.8750) lr 1.6845e-03 eta 4:32:26 +epoch [15/50] batch [30/500] time 0.872 (0.906) data 0.000 (0.022) loss 1.2598 (1.0631) acc 65.6250 (73.1250) lr 1.6845e-03 eta 4:31:24 +epoch [15/50] batch [35/500] time 0.909 (0.903) data 0.000 (0.019) loss 1.5576 (1.0879) acc 65.6250 (72.7679) lr 1.6845e-03 eta 4:30:19 +epoch [15/50] batch [40/500] time 1.028 (0.902) data 0.000 (0.016) loss 1.4912 (1.0889) acc 62.5000 (72.2656) lr 1.6845e-03 eta 4:30:07 +epoch [15/50] batch [45/500] time 0.881 (0.900) data 0.000 (0.015) loss 1.3467 (1.1094) acc 62.5000 (71.3194) lr 1.6845e-03 eta 4:29:28 +epoch [15/50] batch [50/500] time 0.888 (0.899) data 0.000 (0.013) loss 0.6553 (1.0972) acc 81.2500 (71.4375) lr 1.6845e-03 eta 4:28:55 +epoch [15/50] batch [55/500] time 0.857 (0.896) data 0.000 (0.012) loss 1.2051 (1.1127) acc 71.8750 (71.5909) lr 1.6845e-03 eta 4:27:59 +epoch [15/50] batch [60/500] time 0.868 (0.894) data 0.000 (0.011) loss 0.9126 (1.1117) acc 65.6250 (71.4583) lr 1.6845e-03 eta 4:27:20 +epoch [15/50] batch [65/500] time 0.884 (0.893) data 0.000 (0.010) loss 1.0420 (1.1115) acc 71.8750 (71.1538) lr 1.6845e-03 eta 4:26:57 +epoch [15/50] batch [70/500] time 0.868 (0.893) data 0.000 (0.009) loss 0.6831 (1.1068) acc 81.2500 (71.4286) lr 1.6845e-03 eta 4:26:43 +epoch [15/50] batch [75/500] time 0.913 (0.892) data 0.000 (0.009) loss 1.6309 (1.0994) acc 62.5000 (71.5417) lr 1.6845e-03 eta 4:26:33 +epoch [15/50] batch [80/500] time 0.876 (0.891) data 0.000 (0.008) loss 1.0361 (1.1016) acc 75.0000 (71.2500) lr 1.6845e-03 eta 4:26:10 +epoch [15/50] batch [85/500] time 0.894 (0.891) data 0.000 (0.008) loss 0.6504 (1.1012) acc 84.3750 (71.2868) lr 1.6845e-03 eta 4:26:07 +epoch [15/50] batch [90/500] time 0.880 (0.891) data 0.000 (0.007) loss 0.9609 (1.1026) acc 75.0000 (71.4236) lr 1.6845e-03 eta 4:25:56 +epoch [15/50] batch [95/500] time 0.874 (0.891) data 0.000 (0.007) loss 1.1533 (1.1110) acc 68.7500 (71.2171) lr 1.6845e-03 eta 4:25:52 +epoch [15/50] batch [100/500] time 0.868 (0.890) data 0.000 (0.007) loss 0.9839 (1.1144) acc 75.0000 (71.1875) lr 1.6845e-03 eta 4:25:39 +epoch [15/50] batch [105/500] time 0.879 (0.890) data 0.000 (0.006) loss 1.5088 (1.1196) acc 71.8750 (71.0714) lr 1.6845e-03 eta 4:25:22 +epoch [15/50] batch [110/500] time 0.880 (0.889) data 0.000 (0.006) loss 0.6309 (1.1196) acc 78.1250 (71.1648) lr 1.6845e-03 eta 4:25:05 +epoch [15/50] batch [115/500] time 0.892 (0.889) data 0.000 (0.006) loss 1.1553 (1.1155) acc 81.2500 (71.3587) lr 1.6845e-03 eta 4:25:06 +epoch [15/50] batch [120/500] time 0.912 (0.889) data 0.000 (0.006) loss 1.4326 (1.1224) acc 68.7500 (71.1979) lr 1.6845e-03 eta 4:24:59 +epoch [15/50] batch [125/500] time 0.892 (0.890) data 0.000 (0.005) loss 1.0625 (1.1153) acc 78.1250 (71.3500) lr 1.6845e-03 eta 4:25:00 +epoch [15/50] batch [130/500] time 0.880 (0.889) data 0.000 (0.005) loss 0.7251 (1.1173) acc 75.0000 (71.3702) lr 1.6845e-03 eta 4:24:48 +epoch [15/50] batch [135/500] time 0.861 (0.888) data 0.000 (0.005) loss 1.2803 (1.1217) acc 71.8750 (71.4120) lr 1.6845e-03 eta 4:24:31 +epoch [15/50] batch [140/500] time 0.886 (0.889) data 0.000 (0.005) loss 1.6777 (1.1251) acc 59.3750 (71.4286) lr 1.6845e-03 eta 4:24:43 +epoch [15/50] batch [145/500] time 0.903 (0.890) data 0.000 (0.005) loss 1.0674 (1.1217) acc 68.7500 (71.3578) lr 1.6845e-03 eta 4:24:42 +epoch [15/50] batch [150/500] time 0.934 (0.890) data 0.000 (0.005) loss 1.2207 (1.1210) acc 62.5000 (71.2708) lr 1.6845e-03 eta 4:24:41 +epoch [15/50] batch [155/500] time 0.852 (0.889) data 0.000 (0.004) loss 1.5156 (1.1342) acc 68.7500 (71.1290) lr 1.6845e-03 eta 4:24:33 +epoch [15/50] batch [160/500] time 0.871 (0.890) data 0.000 (0.004) loss 0.8936 (1.1325) acc 71.8750 (71.1719) lr 1.6845e-03 eta 4:24:28 +epoch [15/50] batch [165/500] time 0.904 (0.890) data 0.000 (0.004) loss 1.2148 (1.1318) acc 56.2500 (71.1553) lr 1.6845e-03 eta 4:24:26 +epoch [15/50] batch [170/500] time 0.911 (0.890) data 0.002 (0.004) loss 0.9238 (1.1305) acc 81.2500 (71.1949) lr 1.6845e-03 eta 4:24:23 +epoch [15/50] batch [175/500] time 0.887 (0.890) data 0.000 (0.004) loss 1.0957 (1.1261) acc 68.7500 (71.2679) lr 1.6845e-03 eta 4:24:17 +epoch [15/50] batch [180/500] time 0.894 (0.890) data 0.000 (0.004) loss 1.6260 (1.1367) acc 53.1250 (71.0938) lr 1.6845e-03 eta 4:24:13 +epoch [15/50] batch [185/500] time 0.865 (0.890) data 0.000 (0.004) loss 0.6934 (1.1329) acc 87.5000 (71.1824) lr 1.6845e-03 eta 4:24:15 +epoch [15/50] batch [190/500] time 0.884 (0.890) data 0.000 (0.004) loss 1.4160 (1.1353) acc 65.6250 (71.2664) lr 1.6845e-03 eta 4:24:06 +epoch [15/50] batch [195/500] time 0.896 (0.890) data 0.000 (0.004) loss 1.4600 (1.1355) acc 65.6250 (71.3141) lr 1.6845e-03 eta 4:23:59 +epoch [15/50] batch [200/500] time 0.905 (0.889) data 0.000 (0.003) loss 1.6250 (1.1438) acc 62.5000 (71.1719) lr 1.6845e-03 eta 4:23:50 +epoch [15/50] batch [205/500] time 0.900 (0.889) data 0.000 (0.003) loss 0.9746 (1.1389) acc 71.8750 (71.3567) lr 1.6845e-03 eta 4:23:48 +epoch [15/50] batch [210/500] time 0.879 (0.889) data 0.000 (0.003) loss 0.9692 (1.1428) acc 81.2500 (71.3393) lr 1.6845e-03 eta 4:23:42 +epoch [15/50] batch [215/500] time 0.900 (0.889) data 0.000 (0.003) loss 1.3672 (1.1421) acc 65.6250 (71.4244) lr 1.6845e-03 eta 4:23:34 +epoch [15/50] batch [220/500] time 0.894 (0.889) data 0.000 (0.003) loss 1.2119 (1.1419) acc 71.8750 (71.4205) lr 1.6845e-03 eta 4:23:30 +epoch [15/50] batch [225/500] time 0.904 (0.889) data 0.000 (0.003) loss 1.4062 (1.1414) acc 62.5000 (71.4444) lr 1.6845e-03 eta 4:23:24 +epoch [15/50] batch [230/500] time 0.880 (0.889) data 0.000 (0.003) loss 0.7896 (1.1431) acc 78.1250 (71.4402) lr 1.6845e-03 eta 4:23:16 +epoch [15/50] batch [235/500] time 0.880 (0.889) data 0.000 (0.003) loss 1.3193 (1.1415) acc 68.7500 (71.5824) lr 1.6845e-03 eta 4:23:12 +epoch [15/50] batch [240/500] time 0.861 (0.889) data 0.000 (0.003) loss 0.7988 (1.1387) acc 84.3750 (71.5885) lr 1.6845e-03 eta 4:23:06 +epoch [15/50] batch [245/500] time 0.897 (0.889) data 0.000 (0.003) loss 1.9863 (1.1436) acc 62.5000 (71.5434) lr 1.6845e-03 eta 4:22:55 +epoch [15/50] batch [250/500] time 0.891 (0.889) data 0.000 (0.003) loss 1.1045 (1.1424) acc 68.7500 (71.5250) lr 1.6845e-03 eta 4:22:52 +epoch [15/50] batch [255/500] time 0.883 (0.889) data 0.000 (0.003) loss 1.2207 (1.1405) acc 75.0000 (71.5686) lr 1.6845e-03 eta 4:22:47 +epoch [15/50] batch [260/500] time 0.872 (0.889) data 0.000 (0.003) loss 1.2783 (1.1390) acc 71.8750 (71.6226) lr 1.6845e-03 eta 4:22:42 +epoch [15/50] batch [265/500] time 0.864 (0.888) data 0.000 (0.003) loss 1.0420 (1.1403) acc 81.2500 (71.6038) lr 1.6845e-03 eta 4:22:36 +epoch [15/50] batch [270/500] time 0.890 (0.888) data 0.000 (0.003) loss 1.7461 (1.1427) acc 65.6250 (71.5162) lr 1.6845e-03 eta 4:22:28 +epoch [15/50] batch [275/500] time 0.898 (0.888) data 0.000 (0.003) loss 0.7896 (1.1423) acc 75.0000 (71.5341) lr 1.6845e-03 eta 4:22:26 +epoch [15/50] batch [280/500] time 0.859 (0.888) data 0.000 (0.003) loss 1.6201 (1.1427) acc 56.2500 (71.5179) lr 1.6845e-03 eta 4:22:19 +epoch [15/50] batch [285/500] time 0.918 (0.889) data 0.000 (0.003) loss 0.8154 (1.1414) acc 75.0000 (71.5461) lr 1.6845e-03 eta 4:22:22 +epoch [15/50] batch [290/500] time 0.904 (0.889) data 0.000 (0.002) loss 0.5908 (1.1368) acc 87.5000 (71.5517) lr 1.6845e-03 eta 4:22:17 +epoch [15/50] batch [295/500] time 0.910 (0.889) data 0.000 (0.002) loss 1.3887 (1.1402) acc 71.8750 (71.4831) lr 1.6845e-03 eta 4:22:17 +epoch [15/50] batch [300/500] time 0.905 (0.889) data 0.000 (0.002) loss 0.9380 (1.1433) acc 62.5000 (71.3750) lr 1.6845e-03 eta 4:22:15 +epoch [15/50] batch [305/500] time 0.878 (0.889) data 0.000 (0.002) loss 1.3428 (1.1418) acc 62.5000 (71.3217) lr 1.6845e-03 eta 4:22:08 +epoch [15/50] batch [310/500] time 0.875 (0.889) data 0.000 (0.002) loss 0.7334 (1.1402) acc 81.2500 (71.3306) lr 1.6845e-03 eta 4:22:01 +epoch [15/50] batch [315/500] time 0.870 (0.889) data 0.000 (0.002) loss 0.6128 (1.1378) acc 78.1250 (71.3889) lr 1.6845e-03 eta 4:21:54 +epoch [15/50] batch [320/500] time 0.853 (0.888) data 0.000 (0.002) loss 2.2578 (1.1409) acc 59.3750 (71.2988) lr 1.6845e-03 eta 4:21:44 +epoch [15/50] batch [325/500] time 0.848 (0.888) data 0.000 (0.002) loss 1.5967 (1.1425) acc 59.3750 (71.2404) lr 1.6845e-03 eta 4:21:36 +epoch [15/50] batch [330/500] time 0.916 (0.888) data 0.000 (0.002) loss 1.1299 (1.1410) acc 71.8750 (71.2689) lr 1.6845e-03 eta 4:21:38 +epoch [15/50] batch [335/500] time 0.870 (0.888) data 0.000 (0.002) loss 0.9917 (1.1438) acc 71.8750 (71.2220) lr 1.6845e-03 eta 4:21:33 +epoch [15/50] batch [340/500] time 0.897 (0.888) data 0.000 (0.002) loss 0.7764 (1.1452) acc 71.8750 (71.1949) lr 1.6845e-03 eta 4:21:28 +epoch [15/50] batch [345/500] time 0.881 (0.888) data 0.000 (0.002) loss 1.3525 (1.1437) acc 75.0000 (71.2319) lr 1.6845e-03 eta 4:21:25 +epoch [15/50] batch [350/500] time 0.860 (0.888) data 0.000 (0.002) loss 1.3145 (1.1442) acc 68.7500 (71.1875) lr 1.6845e-03 eta 4:21:19 +epoch [15/50] batch [355/500] time 0.897 (0.888) data 0.000 (0.002) loss 0.7466 (1.1433) acc 81.2500 (71.1884) lr 1.6845e-03 eta 4:21:11 +epoch [15/50] batch [360/500] time 0.904 (0.888) data 0.000 (0.002) loss 0.8730 (1.1425) acc 78.1250 (71.2587) lr 1.6845e-03 eta 4:21:05 +epoch [15/50] batch [365/500] time 0.885 (0.888) data 0.000 (0.002) loss 0.8042 (1.1401) acc 75.0000 (71.3099) lr 1.6845e-03 eta 4:21:00 +epoch [15/50] batch [370/500] time 0.855 (0.888) data 0.000 (0.002) loss 1.2568 (1.1400) acc 68.7500 (71.3429) lr 1.6845e-03 eta 4:20:54 +epoch [15/50] batch [375/500] time 0.899 (0.888) data 0.000 (0.002) loss 1.1699 (1.1427) acc 68.7500 (71.2667) lr 1.6845e-03 eta 4:20:53 +epoch [15/50] batch [380/500] time 0.921 (0.888) data 0.000 (0.002) loss 0.6143 (1.1421) acc 84.3750 (71.3158) lr 1.6845e-03 eta 4:20:54 +epoch [15/50] batch [385/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.6104 (1.1444) acc 65.6250 (71.3231) lr 1.6845e-03 eta 4:20:44 +epoch [15/50] batch [390/500] time 0.922 (0.888) data 0.000 (0.002) loss 1.1055 (1.1418) acc 71.8750 (71.4022) lr 1.6845e-03 eta 4:20:40 +epoch [15/50] batch [395/500] time 0.867 (0.888) data 0.000 (0.002) loss 1.5859 (1.1432) acc 68.7500 (71.3924) lr 1.6845e-03 eta 4:20:36 +epoch [15/50] batch [400/500] time 0.881 (0.888) data 0.000 (0.002) loss 1.0596 (1.1412) acc 68.7500 (71.4531) lr 1.6845e-03 eta 4:20:31 +epoch [15/50] batch [405/500] time 0.881 (0.888) data 0.000 (0.002) loss 0.7754 (1.1416) acc 68.7500 (71.3812) lr 1.6845e-03 eta 4:20:27 +epoch [15/50] batch [410/500] time 0.909 (0.888) data 0.000 (0.002) loss 1.8594 (1.1435) acc 59.3750 (71.3338) lr 1.6845e-03 eta 4:20:24 +epoch [15/50] batch [415/500] time 0.900 (0.888) data 0.000 (0.002) loss 1.4307 (1.1436) acc 62.5000 (71.3102) lr 1.6845e-03 eta 4:20:19 +epoch [15/50] batch [420/500] time 0.875 (0.888) data 0.000 (0.002) loss 1.3643 (1.1415) acc 65.6250 (71.3690) lr 1.6845e-03 eta 4:20:15 +epoch [15/50] batch [425/500] time 1.015 (0.889) data 0.000 (0.002) loss 0.9683 (1.1422) acc 62.5000 (71.2574) lr 1.6845e-03 eta 4:20:15 +epoch [15/50] batch [430/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.2842 (1.1431) acc 65.6250 (71.2282) lr 1.6845e-03 eta 4:20:09 +epoch [15/50] batch [435/500] time 0.874 (0.888) data 0.000 (0.002) loss 1.3779 (1.1471) acc 68.7500 (71.2069) lr 1.6845e-03 eta 4:20:03 +epoch [15/50] batch [440/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.4053 (1.1483) acc 65.6250 (71.1932) lr 1.6845e-03 eta 4:19:56 +epoch [15/50] batch [445/500] time 0.871 (0.888) data 0.000 (0.002) loss 0.8916 (1.1462) acc 75.0000 (71.2500) lr 1.6845e-03 eta 4:19:50 +epoch [15/50] batch [450/500] time 0.867 (0.888) data 0.000 (0.002) loss 1.3672 (1.1477) acc 68.7500 (71.2153) lr 1.6845e-03 eta 4:19:41 +epoch [15/50] batch [455/500] time 0.879 (0.888) data 0.000 (0.002) loss 1.2715 (1.1484) acc 71.8750 (71.2225) lr 1.6845e-03 eta 4:19:34 +epoch [15/50] batch [460/500] time 0.901 (0.888) data 0.000 (0.002) loss 1.1250 (1.1488) acc 75.0000 (71.2160) lr 1.6845e-03 eta 4:19:29 +epoch [15/50] batch [465/500] time 0.873 (0.888) data 0.001 (0.002) loss 0.6968 (1.1469) acc 78.1250 (71.2500) lr 1.6845e-03 eta 4:19:25 +epoch [15/50] batch [470/500] time 0.881 (0.888) data 0.000 (0.002) loss 1.2295 (1.1470) acc 62.5000 (71.2035) lr 1.6845e-03 eta 4:19:23 +epoch [15/50] batch [475/500] time 0.851 (0.888) data 0.000 (0.002) loss 1.4199 (1.1478) acc 62.5000 (71.1776) lr 1.6845e-03 eta 4:19:15 +epoch [15/50] batch [480/500] time 0.859 (0.888) data 0.000 (0.002) loss 0.9668 (1.1462) acc 75.0000 (71.2305) lr 1.6845e-03 eta 4:19:09 +epoch [15/50] batch [485/500] time 0.885 (0.887) data 0.001 (0.002) loss 0.7192 (1.1432) acc 81.2500 (71.2758) lr 1.6845e-03 eta 4:19:04 +epoch [15/50] batch [490/500] time 0.867 (0.887) data 0.000 (0.002) loss 0.9038 (1.1408) acc 71.8750 (71.3074) lr 1.6845e-03 eta 4:18:58 +epoch [15/50] batch [495/500] time 0.881 (0.887) data 0.000 (0.002) loss 0.9316 (1.1408) acc 81.2500 (71.3068) lr 1.6845e-03 eta 4:18:52 +epoch [15/50] batch [500/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.0615 (1.1401) acc 81.2500 (71.3563) lr 1.6374e-03 eta 4:18:48 +epoch [16/50] batch [5/500] time 0.879 (1.022) data 0.000 (0.125) loss 1.3066 (0.9875) acc 62.5000 (72.5000) lr 1.6374e-03 eta 4:57:51 +epoch [16/50] batch [10/500] time 0.863 (0.949) data 0.000 (0.063) loss 1.0156 (1.1414) acc 71.8750 (69.0625) lr 1.6374e-03 eta 4:36:34 +epoch [16/50] batch [15/500] time 0.858 (0.933) data 0.000 (0.042) loss 1.3516 (1.1555) acc 50.0000 (69.5833) lr 1.6374e-03 eta 4:31:57 +epoch [16/50] batch [20/500] time 0.888 (0.922) data 0.000 (0.032) loss 1.2998 (1.1428) acc 62.5000 (69.0625) lr 1.6374e-03 eta 4:28:28 +epoch [16/50] batch [25/500] time 0.900 (0.915) data 0.000 (0.025) loss 1.1084 (1.0973) acc 71.8750 (70.1250) lr 1.6374e-03 eta 4:26:32 +epoch [16/50] batch [30/500] time 0.879 (0.910) data 0.001 (0.021) loss 1.3477 (1.1365) acc 75.0000 (69.6875) lr 1.6374e-03 eta 4:25:06 +epoch [16/50] batch [35/500] time 0.871 (0.907) data 0.000 (0.018) loss 1.9043 (1.1304) acc 65.6250 (70.5357) lr 1.6374e-03 eta 4:24:02 +epoch [16/50] batch [40/500] time 0.887 (0.903) data 0.001 (0.016) loss 0.7275 (1.1473) acc 78.1250 (70.3906) lr 1.6374e-03 eta 4:22:48 +epoch [16/50] batch [45/500] time 0.900 (0.901) data 0.001 (0.014) loss 0.9043 (1.1264) acc 71.8750 (71.1806) lr 1.6374e-03 eta 4:22:06 +epoch [16/50] batch [50/500] time 0.895 (0.899) data 0.000 (0.013) loss 1.3008 (1.1387) acc 75.0000 (71.2500) lr 1.6374e-03 eta 4:21:27 +epoch [16/50] batch [55/500] time 0.913 (0.898) data 0.000 (0.012) loss 0.9619 (1.1188) acc 78.1250 (71.6477) lr 1.6374e-03 eta 4:21:12 +epoch [16/50] batch [60/500] time 0.885 (0.896) data 0.001 (0.011) loss 1.3594 (1.1226) acc 59.3750 (71.6146) lr 1.6374e-03 eta 4:20:28 +epoch [16/50] batch [65/500] time 0.886 (0.895) data 0.000 (0.010) loss 0.7339 (1.1215) acc 75.0000 (71.3462) lr 1.6374e-03 eta 4:20:09 +epoch [16/50] batch [70/500] time 0.860 (0.895) data 0.000 (0.009) loss 1.6016 (1.1365) acc 56.2500 (70.9821) lr 1.6374e-03 eta 4:19:53 +epoch [16/50] batch [75/500] time 0.903 (0.894) data 0.000 (0.009) loss 0.9995 (1.1500) acc 71.8750 (70.8750) lr 1.6374e-03 eta 4:19:35 +epoch [16/50] batch [80/500] time 0.904 (0.894) data 0.000 (0.008) loss 0.9551 (1.1514) acc 65.6250 (70.6250) lr 1.6374e-03 eta 4:19:29 +epoch [16/50] batch [85/500] time 0.859 (0.893) data 0.000 (0.008) loss 1.3379 (1.1630) acc 68.7500 (70.4779) lr 1.6374e-03 eta 4:19:13 +epoch [16/50] batch [90/500] time 0.899 (0.893) data 0.001 (0.007) loss 1.2949 (1.1556) acc 65.6250 (70.5903) lr 1.6374e-03 eta 4:19:10 +epoch [16/50] batch [95/500] time 0.864 (0.893) data 0.000 (0.007) loss 0.7583 (1.1537) acc 81.2500 (70.6579) lr 1.6374e-03 eta 4:18:54 +epoch [16/50] batch [100/500] time 0.901 (0.892) data 0.000 (0.007) loss 1.0967 (1.1435) acc 71.8750 (70.9062) lr 1.6374e-03 eta 4:18:43 +epoch [16/50] batch [105/500] time 0.895 (0.892) data 0.000 (0.006) loss 0.9946 (1.1277) acc 65.6250 (71.1012) lr 1.6374e-03 eta 4:18:35 +epoch [16/50] batch [110/500] time 0.879 (0.893) data 0.001 (0.006) loss 0.7427 (1.1196) acc 71.8750 (71.2784) lr 1.6374e-03 eta 4:18:45 +epoch [16/50] batch [115/500] time 0.854 (0.892) data 0.000 (0.006) loss 1.0049 (1.1203) acc 81.2500 (71.2500) lr 1.6374e-03 eta 4:18:30 +epoch [16/50] batch [120/500] time 0.886 (0.892) data 0.000 (0.006) loss 1.0947 (1.1201) acc 71.8750 (71.2500) lr 1.6374e-03 eta 4:18:23 +epoch [16/50] batch [125/500] time 0.872 (0.891) data 0.000 (0.005) loss 0.5005 (1.1196) acc 81.2500 (71.3000) lr 1.6374e-03 eta 4:18:06 +epoch [16/50] batch [130/500] time 0.917 (0.892) data 0.000 (0.005) loss 1.3477 (1.1252) acc 65.6250 (71.2500) lr 1.6374e-03 eta 4:18:06 +epoch [16/50] batch [135/500] time 0.896 (0.892) data 0.000 (0.005) loss 1.4062 (1.1323) acc 62.5000 (71.2500) lr 1.6374e-03 eta 4:18:06 +epoch [16/50] batch [140/500] time 0.886 (0.892) data 0.000 (0.005) loss 0.6519 (1.1284) acc 78.1250 (71.3616) lr 1.6374e-03 eta 4:18:05 +epoch [16/50] batch [145/500] time 0.868 (0.892) data 0.000 (0.005) loss 1.0498 (1.1263) acc 68.7500 (71.4440) lr 1.6374e-03 eta 4:17:56 +epoch [16/50] batch [150/500] time 0.882 (0.891) data 0.000 (0.004) loss 1.0010 (1.1209) acc 75.0000 (71.5000) lr 1.6374e-03 eta 4:17:45 +epoch [16/50] batch [155/500] time 0.899 (0.892) data 0.000 (0.004) loss 1.0293 (1.1229) acc 68.7500 (71.4516) lr 1.6374e-03 eta 4:17:56 +epoch [16/50] batch [160/500] time 0.904 (0.892) data 0.000 (0.004) loss 0.9932 (1.1224) acc 78.1250 (71.5234) lr 1.6374e-03 eta 4:17:52 +epoch [16/50] batch [165/500] time 0.908 (0.892) data 0.000 (0.004) loss 1.1650 (1.1217) acc 65.6250 (71.5720) lr 1.6374e-03 eta 4:17:43 +epoch [16/50] batch [170/500] time 0.918 (0.892) data 0.000 (0.004) loss 0.6772 (1.1188) acc 84.3750 (71.6360) lr 1.6374e-03 eta 4:17:35 +epoch [16/50] batch [175/500] time 0.865 (0.891) data 0.000 (0.004) loss 0.7793 (1.1200) acc 81.2500 (71.6786) lr 1.6374e-03 eta 4:17:24 +epoch [16/50] batch [180/500] time 0.890 (0.891) data 0.000 (0.004) loss 1.2373 (1.1223) acc 75.0000 (71.7361) lr 1.6374e-03 eta 4:17:13 +epoch [16/50] batch [185/500] time 0.888 (0.891) data 0.000 (0.004) loss 0.4131 (1.1219) acc 87.5000 (71.7230) lr 1.6374e-03 eta 4:17:00 +epoch [16/50] batch [190/500] time 0.878 (0.890) data 0.000 (0.004) loss 1.1084 (1.1224) acc 68.7500 (71.6283) lr 1.6374e-03 eta 4:16:51 +epoch [16/50] batch [195/500] time 0.872 (0.890) data 0.000 (0.003) loss 1.2520 (1.1292) acc 71.8750 (71.5385) lr 1.6374e-03 eta 4:16:38 +epoch [16/50] batch [200/500] time 0.901 (0.890) data 0.000 (0.003) loss 1.5352 (1.1301) acc 68.7500 (71.5156) lr 1.6374e-03 eta 4:16:30 +epoch [16/50] batch [205/500] time 0.867 (0.890) data 0.000 (0.003) loss 0.7788 (1.1340) acc 81.2500 (71.4787) lr 1.6374e-03 eta 4:16:30 +epoch [16/50] batch [210/500] time 0.862 (0.890) data 0.000 (0.003) loss 1.4707 (1.1387) acc 59.3750 (71.3542) lr 1.6374e-03 eta 4:16:22 +epoch [16/50] batch [215/500] time 0.874 (0.890) data 0.000 (0.003) loss 0.7822 (1.1328) acc 78.1250 (71.5407) lr 1.6374e-03 eta 4:16:15 +epoch [16/50] batch [220/500] time 0.869 (0.890) data 0.000 (0.003) loss 1.3955 (1.1336) acc 62.5000 (71.5767) lr 1.6374e-03 eta 4:16:11 +epoch [16/50] batch [225/500] time 0.858 (0.889) data 0.000 (0.003) loss 1.2480 (1.1289) acc 68.7500 (71.6806) lr 1.6374e-03 eta 4:16:01 +epoch [16/50] batch [230/500] time 0.884 (0.889) data 0.000 (0.003) loss 1.7256 (1.1346) acc 62.5000 (71.6033) lr 1.6374e-03 eta 4:15:50 +epoch [16/50] batch [235/500] time 0.885 (0.889) data 0.000 (0.003) loss 1.6211 (1.1385) acc 59.3750 (71.5691) lr 1.6374e-03 eta 4:15:50 +epoch [16/50] batch [240/500] time 0.904 (0.889) data 0.000 (0.003) loss 1.0068 (1.1401) acc 75.0000 (71.6276) lr 1.6374e-03 eta 4:15:44 +epoch [16/50] batch [245/500] time 0.873 (0.889) data 0.000 (0.003) loss 1.3330 (1.1437) acc 59.3750 (71.4796) lr 1.6374e-03 eta 4:15:39 +epoch [16/50] batch [250/500] time 0.872 (0.889) data 0.000 (0.003) loss 1.0391 (1.1435) acc 65.6250 (71.4375) lr 1.6374e-03 eta 4:15:38 +epoch [16/50] batch [255/500] time 0.885 (0.890) data 0.000 (0.003) loss 1.2871 (1.1452) acc 68.7500 (71.3603) lr 1.6374e-03 eta 4:15:44 +epoch [16/50] batch [260/500] time 0.907 (0.890) data 0.000 (0.003) loss 1.1055 (1.1469) acc 81.2500 (71.4062) lr 1.6374e-03 eta 4:15:45 +epoch [16/50] batch [265/500] time 0.914 (0.890) data 0.000 (0.003) loss 0.9209 (1.1465) acc 78.1250 (71.4387) lr 1.6374e-03 eta 4:15:44 +epoch [16/50] batch [270/500] time 0.943 (0.890) data 0.000 (0.003) loss 1.7197 (1.1497) acc 68.7500 (71.3889) lr 1.6374e-03 eta 4:15:41 +epoch [16/50] batch [275/500] time 0.889 (0.890) data 0.000 (0.003) loss 1.0811 (1.1501) acc 75.0000 (71.3750) lr 1.6374e-03 eta 4:15:36 +epoch [16/50] batch [280/500] time 0.900 (0.890) data 0.000 (0.002) loss 0.7129 (1.1475) acc 81.2500 (71.4286) lr 1.6374e-03 eta 4:15:32 +epoch [16/50] batch [285/500] time 0.866 (0.890) data 0.000 (0.002) loss 0.9795 (1.1502) acc 68.7500 (71.3048) lr 1.6374e-03 eta 4:15:24 +epoch [16/50] batch [290/500] time 0.877 (0.890) data 0.000 (0.002) loss 0.5342 (1.1454) acc 84.3750 (71.3470) lr 1.6374e-03 eta 4:15:16 +epoch [16/50] batch [295/500] time 1.007 (0.891) data 0.000 (0.002) loss 1.5107 (1.1449) acc 65.6250 (71.3771) lr 1.6374e-03 eta 4:15:22 +epoch [16/50] batch [300/500] time 0.893 (0.891) data 0.000 (0.002) loss 0.7495 (1.1410) acc 81.2500 (71.4792) lr 1.6374e-03 eta 4:15:22 +epoch [16/50] batch [305/500] time 0.843 (0.891) data 0.000 (0.002) loss 1.7715 (1.1414) acc 59.3750 (71.5061) lr 1.6374e-03 eta 4:15:12 +epoch [16/50] batch [310/500] time 0.877 (0.890) data 0.000 (0.002) loss 0.7515 (1.1402) acc 81.2500 (71.4617) lr 1.6374e-03 eta 4:15:03 +epoch [16/50] batch [315/500] time 0.872 (0.890) data 0.000 (0.002) loss 1.0459 (1.1365) acc 71.8750 (71.5179) lr 1.6374e-03 eta 4:14:55 +epoch [16/50] batch [320/500] time 0.894 (0.890) data 0.000 (0.002) loss 1.6416 (1.1361) acc 62.5000 (71.5234) lr 1.6374e-03 eta 4:14:52 +epoch [16/50] batch [325/500] time 0.884 (0.890) data 0.000 (0.002) loss 1.6455 (1.1381) acc 65.6250 (71.5000) lr 1.6374e-03 eta 4:14:49 +epoch [16/50] batch [330/500] time 0.896 (0.890) data 0.000 (0.002) loss 1.0273 (1.1414) acc 78.1250 (71.4110) lr 1.6374e-03 eta 4:14:44 +epoch [16/50] batch [335/500] time 0.892 (0.890) data 0.000 (0.002) loss 1.1445 (1.1427) acc 65.6250 (71.3619) lr 1.6374e-03 eta 4:14:38 +epoch [16/50] batch [340/500] time 0.918 (0.890) data 0.000 (0.002) loss 1.4131 (1.1451) acc 71.8750 (71.3511) lr 1.6374e-03 eta 4:14:35 +epoch [16/50] batch [345/500] time 0.862 (0.890) data 0.000 (0.002) loss 1.2969 (1.1418) acc 68.7500 (71.4312) lr 1.6374e-03 eta 4:14:28 +epoch [16/50] batch [350/500] time 0.884 (0.890) data 0.000 (0.002) loss 1.0225 (1.1440) acc 71.8750 (71.3661) lr 1.6374e-03 eta 4:14:20 +epoch [16/50] batch [355/500] time 0.890 (0.890) data 0.000 (0.002) loss 1.0527 (1.1436) acc 84.3750 (71.4261) lr 1.6374e-03 eta 4:14:16 +epoch [16/50] batch [360/500] time 0.896 (0.890) data 0.000 (0.002) loss 0.9058 (1.1426) acc 84.3750 (71.4844) lr 1.6374e-03 eta 4:14:12 +epoch [16/50] batch [365/500] time 0.859 (0.890) data 0.000 (0.002) loss 0.6548 (1.1388) acc 78.1250 (71.5497) lr 1.6374e-03 eta 4:14:04 +epoch [16/50] batch [370/500] time 0.910 (0.889) data 0.000 (0.002) loss 0.8750 (1.1413) acc 71.8750 (71.5372) lr 1.6374e-03 eta 4:13:56 +epoch [16/50] batch [375/500] time 0.872 (0.889) data 0.000 (0.002) loss 0.9995 (1.1408) acc 68.7500 (71.5000) lr 1.6374e-03 eta 4:13:48 +epoch [16/50] batch [380/500] time 0.884 (0.889) data 0.000 (0.002) loss 1.5908 (1.1399) acc 53.1250 (71.4556) lr 1.6374e-03 eta 4:13:42 +epoch [16/50] batch [385/500] time 0.883 (0.889) data 0.000 (0.002) loss 1.4697 (1.1413) acc 59.3750 (71.4205) lr 1.6374e-03 eta 4:13:37 +epoch [16/50] batch [390/500] time 0.869 (0.889) data 0.000 (0.002) loss 1.3916 (1.1416) acc 53.1250 (71.3702) lr 1.6374e-03 eta 4:13:30 +epoch [16/50] batch [395/500] time 0.884 (0.889) data 0.000 (0.002) loss 1.4766 (1.1408) acc 65.6250 (71.3608) lr 1.6374e-03 eta 4:13:31 +epoch [16/50] batch [400/500] time 0.902 (0.889) data 0.000 (0.002) loss 0.8638 (1.1385) acc 81.2500 (71.3438) lr 1.6374e-03 eta 4:13:28 +epoch [16/50] batch [405/500] time 0.884 (0.889) data 0.000 (0.002) loss 1.1748 (1.1355) acc 68.7500 (71.3812) lr 1.6374e-03 eta 4:13:22 +epoch [16/50] batch [410/500] time 0.899 (0.889) data 0.000 (0.002) loss 0.6851 (1.1317) acc 71.8750 (71.4558) lr 1.6374e-03 eta 4:13:18 +epoch [16/50] batch [415/500] time 0.855 (0.889) data 0.000 (0.002) loss 0.8579 (1.1302) acc 68.7500 (71.4684) lr 1.6374e-03 eta 4:13:11 +epoch [16/50] batch [420/500] time 0.898 (0.889) data 0.000 (0.002) loss 1.2314 (1.1315) acc 71.8750 (71.4360) lr 1.6374e-03 eta 4:13:05 +epoch [16/50] batch [425/500] time 0.912 (0.889) data 0.000 (0.002) loss 1.2715 (1.1337) acc 62.5000 (71.3750) lr 1.6374e-03 eta 4:13:01 +epoch [16/50] batch [430/500] time 0.902 (0.889) data 0.000 (0.002) loss 1.2783 (1.1344) acc 71.8750 (71.3881) lr 1.6374e-03 eta 4:12:54 +epoch [16/50] batch [435/500] time 0.888 (0.889) data 0.000 (0.002) loss 1.4932 (1.1341) acc 75.0000 (71.4152) lr 1.6374e-03 eta 4:12:51 +epoch [16/50] batch [440/500] time 0.890 (0.889) data 0.000 (0.002) loss 1.5400 (1.1358) acc 62.5000 (71.4205) lr 1.6374e-03 eta 4:12:52 +epoch [16/50] batch [445/500] time 0.877 (0.889) data 0.000 (0.002) loss 1.5488 (1.1373) acc 53.1250 (71.3483) lr 1.6374e-03 eta 4:12:47 +epoch [16/50] batch [450/500] time 0.916 (0.889) data 0.000 (0.002) loss 0.9644 (1.1387) acc 78.1250 (71.3194) lr 1.6374e-03 eta 4:12:43 +epoch [16/50] batch [455/500] time 0.857 (0.889) data 0.000 (0.002) loss 1.2002 (1.1381) acc 62.5000 (71.3393) lr 1.6374e-03 eta 4:12:35 +epoch [16/50] batch [460/500] time 0.891 (0.889) data 0.000 (0.002) loss 1.5664 (1.1426) acc 65.6250 (71.2568) lr 1.6374e-03 eta 4:12:27 +epoch [16/50] batch [465/500] time 0.909 (0.889) data 0.000 (0.002) loss 1.1113 (1.1466) acc 75.0000 (71.1828) lr 1.6374e-03 eta 4:12:24 +epoch [16/50] batch [470/500] time 0.891 (0.889) data 0.000 (0.002) loss 1.0146 (1.1469) acc 71.8750 (71.1702) lr 1.6374e-03 eta 4:12:17 +epoch [16/50] batch [475/500] time 0.870 (0.889) data 0.000 (0.002) loss 0.9766 (1.1447) acc 71.8750 (71.1974) lr 1.6374e-03 eta 4:12:11 +epoch [16/50] batch [480/500] time 0.880 (0.889) data 0.000 (0.002) loss 0.8013 (1.1438) acc 78.1250 (71.1914) lr 1.6374e-03 eta 4:12:05 +epoch [16/50] batch [485/500] time 0.889 (0.889) data 0.000 (0.002) loss 1.8145 (1.1462) acc 62.5000 (71.1534) lr 1.6374e-03 eta 4:11:58 +epoch [16/50] batch [490/500] time 0.877 (0.888) data 0.000 (0.002) loss 0.7334 (1.1459) acc 78.1250 (71.1862) lr 1.6374e-03 eta 4:11:52 +epoch [16/50] batch [495/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.3262 (1.1428) acc 71.8750 (71.2374) lr 1.6374e-03 eta 4:11:46 +epoch [16/50] batch [500/500] time 0.883 (0.888) data 0.000 (0.001) loss 0.8799 (1.1419) acc 68.7500 (71.2375) lr 1.5878e-03 eta 4:11:42 +epoch [17/50] batch [5/500] time 0.899 (1.030) data 0.000 (0.137) loss 1.2383 (0.9510) acc 78.1250 (75.0000) lr 1.5878e-03 eta 4:51:44 +epoch [17/50] batch [10/500] time 0.868 (0.954) data 0.000 (0.069) loss 1.1670 (1.0809) acc 71.8750 (72.8125) lr 1.5878e-03 eta 4:30:12 +epoch [17/50] batch [15/500] time 0.893 (0.933) data 0.000 (0.046) loss 1.1455 (1.0467) acc 65.6250 (73.5417) lr 1.5878e-03 eta 4:24:05 +epoch [17/50] batch [20/500] time 0.890 (0.920) data 0.000 (0.034) loss 1.5020 (1.0450) acc 62.5000 (73.7500) lr 1.5878e-03 eta 4:20:29 +epoch [17/50] batch [25/500] time 0.993 (0.917) data 0.000 (0.028) loss 1.1143 (1.0700) acc 71.8750 (73.1250) lr 1.5878e-03 eta 4:19:34 +epoch [17/50] batch [30/500] time 0.878 (0.909) data 0.000 (0.023) loss 1.2490 (1.0884) acc 62.5000 (71.6667) lr 1.5878e-03 eta 4:17:05 +epoch [17/50] batch [35/500] time 0.855 (0.904) data 0.000 (0.020) loss 1.6035 (1.1062) acc 68.7500 (71.4286) lr 1.5878e-03 eta 4:15:36 +epoch [17/50] batch [40/500] time 0.886 (0.901) data 0.000 (0.017) loss 1.3574 (1.1008) acc 62.5000 (71.0156) lr 1.5878e-03 eta 4:14:34 +epoch [17/50] batch [45/500] time 0.905 (0.900) data 0.000 (0.015) loss 1.2363 (1.0899) acc 75.0000 (71.4583) lr 1.5878e-03 eta 4:14:20 +epoch [17/50] batch [50/500] time 0.870 (0.899) data 0.000 (0.014) loss 0.5459 (1.0714) acc 81.2500 (71.9375) lr 1.5878e-03 eta 4:14:06 +epoch [17/50] batch [55/500] time 0.889 (0.898) data 0.000 (0.013) loss 1.4209 (1.0572) acc 68.7500 (72.5568) lr 1.5878e-03 eta 4:13:33 +epoch [17/50] batch [60/500] time 0.859 (0.896) data 0.000 (0.012) loss 0.9536 (1.0693) acc 81.2500 (72.5521) lr 1.5878e-03 eta 4:13:00 +epoch [17/50] batch [65/500] time 0.880 (0.895) data 0.000 (0.011) loss 0.9834 (1.0696) acc 78.1250 (72.5481) lr 1.5878e-03 eta 4:12:43 +epoch [17/50] batch [70/500] time 0.880 (0.894) data 0.000 (0.010) loss 1.5107 (1.0690) acc 62.5000 (72.5000) lr 1.5878e-03 eta 4:12:21 +epoch [17/50] batch [75/500] time 0.878 (0.895) data 0.000 (0.009) loss 1.5059 (1.0670) acc 59.3750 (72.5417) lr 1.5878e-03 eta 4:12:23 +epoch [17/50] batch [80/500] time 0.871 (0.894) data 0.000 (0.009) loss 0.5210 (1.0721) acc 81.2500 (72.1484) lr 1.5878e-03 eta 4:12:01 +epoch [17/50] batch [85/500] time 0.997 (0.894) data 0.000 (0.008) loss 1.6445 (1.0889) acc 65.6250 (71.9485) lr 1.5878e-03 eta 4:12:01 +epoch [17/50] batch [90/500] time 0.883 (0.894) data 0.000 (0.008) loss 0.7856 (1.0968) acc 78.1250 (71.7708) lr 1.5878e-03 eta 4:11:57 +epoch [17/50] batch [95/500] time 0.861 (0.893) data 0.000 (0.007) loss 1.3994 (1.0974) acc 68.7500 (72.0066) lr 1.5878e-03 eta 4:11:40 +epoch [17/50] batch [100/500] time 0.867 (0.893) data 0.000 (0.007) loss 1.2695 (1.1099) acc 62.5000 (71.6875) lr 1.5878e-03 eta 4:11:37 +epoch [17/50] batch [105/500] time 0.881 (0.893) data 0.000 (0.007) loss 1.2539 (1.1096) acc 65.6250 (71.7262) lr 1.5878e-03 eta 4:11:29 +epoch [17/50] batch [110/500] time 0.861 (0.893) data 0.000 (0.006) loss 0.6084 (1.1069) acc 81.2500 (71.7045) lr 1.5878e-03 eta 4:11:20 +epoch [17/50] batch [115/500] time 0.911 (0.893) data 0.000 (0.006) loss 1.0381 (1.1032) acc 68.7500 (71.6848) lr 1.5878e-03 eta 4:11:16 +epoch [17/50] batch [120/500] time 0.905 (0.893) data 0.000 (0.006) loss 0.7627 (1.0996) acc 81.2500 (71.8750) lr 1.5878e-03 eta 4:11:06 +epoch [17/50] batch [125/500] time 0.886 (0.893) data 0.000 (0.006) loss 1.2598 (1.1103) acc 75.0000 (71.7250) lr 1.5878e-03 eta 4:11:00 +epoch [17/50] batch [130/500] time 0.863 (0.893) data 0.000 (0.006) loss 0.9976 (1.1032) acc 75.0000 (71.8510) lr 1.5878e-03 eta 4:11:03 +epoch [17/50] batch [135/500] time 0.871 (0.893) data 0.000 (0.005) loss 0.8496 (1.0954) acc 75.0000 (71.9444) lr 1.5878e-03 eta 4:10:53 +epoch [17/50] batch [140/500] time 0.890 (0.892) data 0.000 (0.005) loss 0.7036 (1.0974) acc 78.1250 (71.8304) lr 1.5878e-03 eta 4:10:41 +epoch [17/50] batch [145/500] time 0.879 (0.892) data 0.000 (0.005) loss 0.9443 (1.1003) acc 78.1250 (71.6595) lr 1.5878e-03 eta 4:10:35 +epoch [17/50] batch [150/500] time 0.870 (0.891) data 0.000 (0.005) loss 1.6738 (1.1056) acc 53.1250 (71.6042) lr 1.5878e-03 eta 4:10:15 +epoch [17/50] batch [155/500] time 0.889 (0.891) data 0.000 (0.005) loss 0.9922 (1.1102) acc 75.0000 (71.5524) lr 1.5878e-03 eta 4:10:08 +epoch [17/50] batch [160/500] time 0.896 (0.891) data 0.000 (0.005) loss 1.4199 (1.1176) acc 71.8750 (71.3867) lr 1.5878e-03 eta 4:10:01 +epoch [17/50] batch [165/500] time 0.901 (0.891) data 0.000 (0.004) loss 0.9023 (1.1126) acc 78.1250 (71.6098) lr 1.5878e-03 eta 4:09:53 +epoch [17/50] batch [170/500] time 0.887 (0.890) data 0.000 (0.004) loss 1.1240 (1.1093) acc 78.1250 (71.7279) lr 1.5878e-03 eta 4:09:42 +epoch [17/50] batch [175/500] time 0.877 (0.890) data 0.000 (0.004) loss 1.5176 (1.1098) acc 65.6250 (71.7143) lr 1.5878e-03 eta 4:09:36 +epoch [17/50] batch [180/500] time 0.866 (0.889) data 0.000 (0.004) loss 1.3154 (1.1090) acc 65.6250 (71.6840) lr 1.5878e-03 eta 4:09:20 +epoch [17/50] batch [185/500] time 0.896 (0.889) data 0.000 (0.004) loss 0.8076 (1.1108) acc 71.8750 (71.6385) lr 1.5878e-03 eta 4:09:12 +epoch [17/50] batch [190/500] time 0.873 (0.889) data 0.000 (0.004) loss 1.8135 (1.1128) acc 62.5000 (71.6612) lr 1.5878e-03 eta 4:09:03 +epoch [17/50] batch [195/500] time 0.904 (0.889) data 0.000 (0.004) loss 0.5024 (1.1073) acc 78.1250 (71.7468) lr 1.5878e-03 eta 4:08:52 +epoch [17/50] batch [200/500] time 0.884 (0.888) data 0.000 (0.004) loss 1.0156 (1.1101) acc 71.8750 (71.7500) lr 1.5878e-03 eta 4:08:45 +epoch [17/50] batch [205/500] time 0.877 (0.888) data 0.000 (0.004) loss 1.2812 (1.1104) acc 68.7500 (71.6921) lr 1.5878e-03 eta 4:08:36 +epoch [17/50] batch [210/500] time 0.876 (0.888) data 0.000 (0.004) loss 1.2764 (1.1138) acc 65.6250 (71.6220) lr 1.5878e-03 eta 4:08:24 +epoch [17/50] batch [215/500] time 0.909 (0.888) data 0.000 (0.003) loss 1.4570 (1.1208) acc 71.8750 (71.4099) lr 1.5878e-03 eta 4:08:17 +epoch [17/50] batch [220/500] time 0.900 (0.887) data 0.000 (0.003) loss 0.8999 (1.1154) acc 78.1250 (71.6051) lr 1.5878e-03 eta 4:08:12 +epoch [17/50] batch [225/500] time 0.921 (0.888) data 0.000 (0.003) loss 1.6260 (1.1162) acc 65.6250 (71.6806) lr 1.5878e-03 eta 4:08:11 +epoch [17/50] batch [230/500] time 0.887 (0.888) data 0.000 (0.003) loss 1.3926 (1.1148) acc 62.5000 (71.7255) lr 1.5878e-03 eta 4:08:15 +epoch [17/50] batch [235/500] time 0.880 (0.888) data 0.001 (0.003) loss 0.6636 (1.1115) acc 84.3750 (71.8617) lr 1.5878e-03 eta 4:08:09 +epoch [17/50] batch [240/500] time 0.860 (0.888) data 0.000 (0.003) loss 0.9629 (1.1116) acc 84.3750 (71.8490) lr 1.5878e-03 eta 4:08:02 +epoch [17/50] batch [245/500] time 0.893 (0.888) data 0.000 (0.003) loss 1.6152 (1.1108) acc 65.6250 (71.8878) lr 1.5878e-03 eta 4:08:00 +epoch [17/50] batch [250/500] time 0.858 (0.888) data 0.000 (0.003) loss 1.2197 (1.1130) acc 71.8750 (71.8750) lr 1.5878e-03 eta 4:07:57 +epoch [17/50] batch [255/500] time 0.931 (0.888) data 0.000 (0.003) loss 0.9756 (1.1151) acc 75.0000 (71.8995) lr 1.5878e-03 eta 4:07:56 +epoch [17/50] batch [260/500] time 0.868 (0.888) data 0.000 (0.003) loss 0.6577 (1.1161) acc 81.2500 (71.8750) lr 1.5878e-03 eta 4:07:48 +epoch [17/50] batch [265/500] time 0.879 (0.888) data 0.000 (0.003) loss 1.1543 (1.1213) acc 71.8750 (71.8278) lr 1.5878e-03 eta 4:07:40 +epoch [17/50] batch [270/500] time 0.873 (0.888) data 0.000 (0.003) loss 0.8633 (1.1201) acc 68.7500 (71.7940) lr 1.5878e-03 eta 4:07:35 +epoch [17/50] batch [275/500] time 0.894 (0.889) data 0.000 (0.003) loss 1.5820 (1.1264) acc 65.6250 (71.7159) lr 1.5878e-03 eta 4:07:40 +epoch [17/50] batch [280/500] time 0.904 (0.889) data 0.000 (0.003) loss 1.7695 (1.1317) acc 62.5000 (71.6183) lr 1.5878e-03 eta 4:07:39 +epoch [17/50] batch [285/500] time 0.880 (0.889) data 0.000 (0.003) loss 1.0879 (1.1327) acc 78.1250 (71.6118) lr 1.5878e-03 eta 4:07:37 +epoch [17/50] batch [290/500] time 0.880 (0.889) data 0.000 (0.003) loss 1.1143 (1.1339) acc 75.0000 (71.5841) lr 1.5878e-03 eta 4:07:34 +epoch [17/50] batch [295/500] time 0.871 (0.889) data 0.000 (0.003) loss 1.4902 (1.1400) acc 71.8750 (71.5254) lr 1.5878e-03 eta 4:07:26 +epoch [17/50] batch [300/500] time 0.885 (0.889) data 0.000 (0.003) loss 1.7383 (1.1417) acc 56.2500 (71.5104) lr 1.5878e-03 eta 4:07:18 +epoch [17/50] batch [305/500] time 0.870 (0.888) data 0.000 (0.002) loss 0.8823 (1.1420) acc 71.8750 (71.4652) lr 1.5878e-03 eta 4:07:10 +epoch [17/50] batch [310/500] time 0.876 (0.888) data 0.000 (0.002) loss 1.1934 (1.1398) acc 75.0000 (71.5423) lr 1.5878e-03 eta 4:07:04 +epoch [17/50] batch [315/500] time 0.935 (0.888) data 0.000 (0.002) loss 1.0479 (1.1406) acc 68.7500 (71.4980) lr 1.5878e-03 eta 4:07:03 +epoch [17/50] batch [320/500] time 0.854 (0.888) data 0.000 (0.002) loss 1.0820 (1.1357) acc 75.0000 (71.6602) lr 1.5878e-03 eta 4:06:54 +epoch [17/50] batch [325/500] time 0.891 (0.888) data 0.000 (0.002) loss 0.9600 (1.1365) acc 75.0000 (71.6250) lr 1.5878e-03 eta 4:06:47 +epoch [17/50] batch [330/500] time 0.864 (0.888) data 0.000 (0.002) loss 0.5361 (1.1328) acc 84.3750 (71.7140) lr 1.5878e-03 eta 4:06:43 +epoch [17/50] batch [335/500] time 0.876 (0.888) data 0.000 (0.002) loss 1.1924 (1.1315) acc 59.3750 (71.7071) lr 1.5878e-03 eta 4:06:35 +epoch [17/50] batch [340/500] time 0.865 (0.888) data 0.000 (0.002) loss 1.3877 (1.1389) acc 65.6250 (71.5441) lr 1.5878e-03 eta 4:06:31 +epoch [17/50] batch [345/500] time 0.886 (0.888) data 0.000 (0.002) loss 1.0518 (1.1349) acc 81.2500 (71.6123) lr 1.5878e-03 eta 4:06:27 +epoch [17/50] batch [350/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.2178 (1.1315) acc 65.6250 (71.6964) lr 1.5878e-03 eta 4:06:23 +epoch [17/50] batch [355/500] time 0.850 (0.888) data 0.000 (0.002) loss 1.2754 (1.1333) acc 65.6250 (71.6725) lr 1.5878e-03 eta 4:06:15 +epoch [17/50] batch [360/500] time 0.887 (0.888) data 0.000 (0.002) loss 0.7217 (1.1332) acc 90.6250 (71.7014) lr 1.5878e-03 eta 4:06:11 +epoch [17/50] batch [365/500] time 0.897 (0.888) data 0.000 (0.002) loss 1.1152 (1.1321) acc 75.0000 (71.7123) lr 1.5878e-03 eta 4:06:07 +epoch [17/50] batch [370/500] time 0.889 (0.888) data 0.000 (0.002) loss 0.9272 (1.1299) acc 71.8750 (71.7314) lr 1.5878e-03 eta 4:06:03 +epoch [17/50] batch [375/500] time 0.874 (0.888) data 0.000 (0.002) loss 1.3682 (1.1305) acc 62.5000 (71.7000) lr 1.5878e-03 eta 4:06:01 +epoch [17/50] batch [380/500] time 0.863 (0.888) data 0.000 (0.002) loss 0.7148 (1.1312) acc 81.2500 (71.6859) lr 1.5878e-03 eta 4:05:54 +epoch [17/50] batch [385/500] time 0.869 (0.887) data 0.000 (0.002) loss 0.6128 (1.1310) acc 78.1250 (71.7045) lr 1.5878e-03 eta 4:05:45 +epoch [17/50] batch [390/500] time 0.880 (0.887) data 0.000 (0.002) loss 1.1045 (1.1310) acc 71.8750 (71.6426) lr 1.5878e-03 eta 4:05:40 +epoch [17/50] batch [395/500] time 0.865 (0.887) data 0.000 (0.002) loss 1.1025 (1.1283) acc 75.0000 (71.6693) lr 1.5878e-03 eta 4:05:32 +epoch [17/50] batch [400/500] time 0.868 (0.887) data 0.000 (0.002) loss 0.7837 (1.1266) acc 75.0000 (71.7109) lr 1.5878e-03 eta 4:05:24 +epoch [17/50] batch [405/500] time 0.856 (0.887) data 0.000 (0.002) loss 0.7100 (1.1274) acc 81.2500 (71.7284) lr 1.5878e-03 eta 4:05:18 +epoch [17/50] batch [410/500] time 0.883 (0.887) data 0.000 (0.002) loss 0.9565 (1.1269) acc 71.8750 (71.7683) lr 1.5878e-03 eta 4:05:13 +epoch [17/50] batch [415/500] time 0.960 (0.887) data 0.000 (0.002) loss 0.9512 (1.1247) acc 81.2500 (71.8072) lr 1.5878e-03 eta 4:05:12 +epoch [17/50] batch [420/500] time 0.899 (0.887) data 0.000 (0.002) loss 1.2822 (1.1278) acc 59.3750 (71.7336) lr 1.5878e-03 eta 4:05:07 +epoch [17/50] batch [425/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.3096 (1.1286) acc 68.7500 (71.7059) lr 1.5878e-03 eta 4:05:03 +epoch [17/50] batch [430/500] time 0.904 (0.887) data 0.000 (0.002) loss 1.3682 (1.1261) acc 62.5000 (71.7369) lr 1.5878e-03 eta 4:05:02 +epoch [17/50] batch [435/500] time 0.851 (0.887) data 0.000 (0.002) loss 1.4492 (1.1288) acc 65.6250 (71.6882) lr 1.5878e-03 eta 4:04:51 +epoch [17/50] batch [440/500] time 0.893 (0.887) data 0.000 (0.002) loss 0.8887 (1.1294) acc 81.2500 (71.7188) lr 1.5878e-03 eta 4:04:45 +epoch [17/50] batch [445/500] time 0.907 (0.887) data 0.000 (0.002) loss 1.3291 (1.1281) acc 65.6250 (71.7275) lr 1.5878e-03 eta 4:04:42 +epoch [17/50] batch [450/500] time 0.894 (0.887) data 0.000 (0.002) loss 1.2314 (1.1334) acc 68.7500 (71.6389) lr 1.5878e-03 eta 4:04:37 +epoch [17/50] batch [455/500] time 0.888 (0.887) data 0.000 (0.002) loss 1.1592 (1.1337) acc 71.8750 (71.6209) lr 1.5878e-03 eta 4:04:34 +epoch [17/50] batch [460/500] time 0.912 (0.887) data 0.000 (0.002) loss 0.7812 (1.1320) acc 71.8750 (71.6101) lr 1.5878e-03 eta 4:04:30 +epoch [17/50] batch [465/500] time 0.906 (0.887) data 0.000 (0.002) loss 0.8818 (1.1322) acc 78.1250 (71.5927) lr 1.5878e-03 eta 4:04:25 +epoch [17/50] batch [470/500] time 0.888 (0.887) data 0.000 (0.002) loss 1.2363 (1.1314) acc 78.1250 (71.6489) lr 1.5878e-03 eta 4:04:20 +epoch [17/50] batch [475/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.1191 (1.1313) acc 68.7500 (71.6447) lr 1.5878e-03 eta 4:04:14 +epoch [17/50] batch [480/500] time 0.863 (0.887) data 0.000 (0.002) loss 1.0107 (1.1295) acc 65.6250 (71.6341) lr 1.5878e-03 eta 4:04:09 +epoch [17/50] batch [485/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.3594 (1.1298) acc 62.5000 (71.6173) lr 1.5878e-03 eta 4:04:03 +epoch [17/50] batch [490/500] time 0.865 (0.887) data 0.000 (0.002) loss 0.5747 (1.1282) acc 84.3750 (71.6582) lr 1.5878e-03 eta 4:03:59 +epoch [17/50] batch [495/500] time 0.894 (0.887) data 0.000 (0.002) loss 1.5254 (1.1294) acc 59.3750 (71.6225) lr 1.5878e-03 eta 4:03:56 +epoch [17/50] batch [500/500] time 0.899 (0.887) data 0.000 (0.002) loss 1.0439 (1.1287) acc 71.8750 (71.6500) lr 1.5358e-03 eta 4:03:50 +epoch [18/50] batch [5/500] time 0.870 (1.031) data 0.000 (0.136) loss 0.6836 (1.1846) acc 84.3750 (73.1250) lr 1.5358e-03 eta 4:43:20 +epoch [18/50] batch [10/500] time 1.023 (0.971) data 0.000 (0.068) loss 0.4331 (1.0475) acc 90.6250 (75.3125) lr 1.5358e-03 eta 4:26:58 +epoch [18/50] batch [15/500] time 0.912 (0.942) data 0.000 (0.045) loss 1.1162 (1.0772) acc 78.1250 (76.0417) lr 1.5358e-03 eta 4:18:49 +epoch [18/50] batch [20/500] time 0.908 (0.928) data 0.000 (0.034) loss 1.0732 (1.0626) acc 75.0000 (75.4688) lr 1.5358e-03 eta 4:14:48 +epoch [18/50] batch [25/500] time 0.896 (0.918) data 0.000 (0.027) loss 0.7007 (1.0727) acc 75.0000 (75.6250) lr 1.5358e-03 eta 4:11:58 +epoch [18/50] batch [30/500] time 0.878 (0.911) data 0.000 (0.023) loss 0.8623 (1.0552) acc 75.0000 (75.5208) lr 1.5358e-03 eta 4:09:57 +epoch [18/50] batch [35/500] time 0.894 (0.908) data 0.000 (0.020) loss 1.4834 (1.0811) acc 68.7500 (74.9107) lr 1.5358e-03 eta 4:09:18 +epoch [18/50] batch [40/500] time 0.875 (0.905) data 0.000 (0.017) loss 1.0986 (1.0718) acc 62.5000 (74.7656) lr 1.5358e-03 eta 4:08:19 +epoch [18/50] batch [45/500] time 0.919 (0.902) data 0.000 (0.015) loss 1.4746 (1.0639) acc 59.3750 (74.3056) lr 1.5358e-03 eta 4:07:22 +epoch [18/50] batch [50/500] time 0.867 (0.900) data 0.000 (0.014) loss 1.3076 (1.0555) acc 71.8750 (74.1875) lr 1.5358e-03 eta 4:06:48 +epoch [18/50] batch [55/500] time 0.901 (0.899) data 0.000 (0.013) loss 0.6699 (1.0287) acc 78.1250 (74.6591) lr 1.5358e-03 eta 4:06:19 +epoch [18/50] batch [60/500] time 0.867 (0.897) data 0.000 (0.012) loss 0.8442 (1.0291) acc 65.6250 (74.4792) lr 1.5358e-03 eta 4:05:50 +epoch [18/50] batch [65/500] time 0.876 (0.895) data 0.000 (0.011) loss 1.2236 (1.0278) acc 59.3750 (74.1827) lr 1.5358e-03 eta 4:05:15 +epoch [18/50] batch [70/500] time 0.988 (0.897) data 0.000 (0.010) loss 1.2988 (1.0301) acc 68.7500 (73.8839) lr 1.5358e-03 eta 4:05:32 +epoch [18/50] batch [75/500] time 0.879 (0.896) data 0.000 (0.009) loss 1.0566 (1.0300) acc 81.2500 (74.0833) lr 1.5358e-03 eta 4:05:13 +epoch [18/50] batch [80/500] time 0.881 (0.896) data 0.000 (0.009) loss 0.7744 (1.0320) acc 81.2500 (73.9062) lr 1.5358e-03 eta 4:05:13 +epoch [18/50] batch [85/500] time 0.900 (0.895) data 0.000 (0.008) loss 1.3838 (1.0518) acc 68.7500 (73.4191) lr 1.5358e-03 eta 4:04:50 +epoch [18/50] batch [90/500] time 0.852 (0.894) data 0.000 (0.008) loss 1.8730 (1.0665) acc 62.5000 (73.1944) lr 1.5358e-03 eta 4:04:27 +epoch [18/50] batch [95/500] time 0.878 (0.894) data 0.000 (0.007) loss 1.0459 (1.0726) acc 71.8750 (73.1579) lr 1.5358e-03 eta 4:04:19 +epoch [18/50] batch [100/500] time 0.916 (0.894) data 0.000 (0.007) loss 1.4707 (1.0801) acc 62.5000 (72.8438) lr 1.5358e-03 eta 4:04:19 +epoch [18/50] batch [105/500] time 0.883 (0.893) data 0.000 (0.007) loss 0.8740 (1.0762) acc 81.2500 (73.0655) lr 1.5358e-03 eta 4:04:00 +epoch [18/50] batch [110/500] time 0.875 (0.892) data 0.000 (0.006) loss 1.4023 (1.0925) acc 68.7500 (72.7273) lr 1.5358e-03 eta 4:03:46 +epoch [18/50] batch [115/500] time 0.894 (0.893) data 0.000 (0.006) loss 0.7554 (1.0922) acc 81.2500 (72.7174) lr 1.5358e-03 eta 4:03:56 +epoch [18/50] batch [120/500] time 0.860 (0.892) data 0.000 (0.006) loss 1.2812 (1.1014) acc 68.7500 (72.3698) lr 1.5358e-03 eta 4:03:38 +epoch [18/50] batch [125/500] time 0.886 (0.892) data 0.000 (0.006) loss 1.0098 (1.0978) acc 78.1250 (72.5000) lr 1.5358e-03 eta 4:03:28 +epoch [18/50] batch [130/500] time 0.895 (0.892) data 0.000 (0.005) loss 0.8594 (1.0920) acc 78.1250 (72.5481) lr 1.5358e-03 eta 4:03:22 +epoch [18/50] batch [135/500] time 0.874 (0.892) data 0.000 (0.005) loss 1.2783 (1.0952) acc 68.7500 (72.4074) lr 1.5358e-03 eta 4:03:19 +epoch [18/50] batch [140/500] time 0.878 (0.892) data 0.000 (0.005) loss 1.4629 (1.0917) acc 65.6250 (72.5223) lr 1.5358e-03 eta 4:03:09 +epoch [18/50] batch [145/500] time 0.895 (0.892) data 0.000 (0.005) loss 1.0176 (1.0935) acc 68.7500 (72.4569) lr 1.5358e-03 eta 4:03:04 +epoch [18/50] batch [150/500] time 0.901 (0.892) data 0.000 (0.005) loss 1.7266 (1.1038) acc 53.1250 (72.2708) lr 1.5358e-03 eta 4:02:57 +epoch [18/50] batch [155/500] time 0.920 (0.892) data 0.000 (0.005) loss 1.1680 (1.1132) acc 59.3750 (71.9758) lr 1.5358e-03 eta 4:02:53 +epoch [18/50] batch [160/500] time 0.897 (0.892) data 0.000 (0.004) loss 1.3984 (1.1146) acc 65.6250 (71.9531) lr 1.5358e-03 eta 4:02:49 +epoch [18/50] batch [165/500] time 0.851 (0.891) data 0.000 (0.004) loss 0.5488 (1.1155) acc 78.1250 (71.9886) lr 1.5358e-03 eta 4:02:42 +epoch [18/50] batch [170/500] time 0.910 (0.892) data 0.000 (0.004) loss 1.2100 (1.1169) acc 71.8750 (72.0588) lr 1.5358e-03 eta 4:02:39 +epoch [18/50] batch [175/500] time 0.894 (0.891) data 0.000 (0.004) loss 0.9414 (1.1184) acc 75.0000 (72.0000) lr 1.5358e-03 eta 4:02:30 +epoch [18/50] batch [180/500] time 0.916 (0.891) data 0.000 (0.004) loss 1.1201 (1.1180) acc 68.7500 (72.0312) lr 1.5358e-03 eta 4:02:28 +epoch [18/50] batch [185/500] time 0.869 (0.892) data 0.000 (0.004) loss 0.6543 (1.1071) acc 87.5000 (72.2973) lr 1.5358e-03 eta 4:02:25 +epoch [18/50] batch [190/500] time 0.900 (0.891) data 0.000 (0.004) loss 1.3477 (1.1058) acc 68.7500 (72.2862) lr 1.5358e-03 eta 4:02:18 +epoch [18/50] batch [195/500] time 0.871 (0.891) data 0.000 (0.004) loss 1.2695 (1.1063) acc 59.3750 (72.2115) lr 1.5358e-03 eta 4:02:12 +epoch [18/50] batch [200/500] time 0.905 (0.891) data 0.000 (0.004) loss 0.8096 (1.1066) acc 84.3750 (72.2656) lr 1.5358e-03 eta 4:02:06 +epoch [18/50] batch [205/500] time 0.884 (0.891) data 0.000 (0.004) loss 0.6768 (1.1017) acc 81.2500 (72.2561) lr 1.5358e-03 eta 4:02:00 +epoch [18/50] batch [210/500] time 0.897 (0.891) data 0.000 (0.003) loss 0.6553 (1.1019) acc 81.2500 (72.2173) lr 1.5358e-03 eta 4:01:50 +epoch [18/50] batch [215/500] time 0.904 (0.891) data 0.000 (0.003) loss 1.3486 (1.0995) acc 68.7500 (72.2384) lr 1.5358e-03 eta 4:01:44 +epoch [18/50] batch [220/500] time 0.901 (0.890) data 0.000 (0.003) loss 1.5762 (1.1003) acc 65.6250 (72.2443) lr 1.5358e-03 eta 4:01:36 +epoch [18/50] batch [225/500] time 0.910 (0.890) data 0.000 (0.003) loss 1.1387 (1.0998) acc 71.8750 (72.3333) lr 1.5358e-03 eta 4:01:32 +epoch [18/50] batch [230/500] time 0.883 (0.890) data 0.000 (0.003) loss 0.9644 (1.1000) acc 75.0000 (72.2690) lr 1.5358e-03 eta 4:01:25 +epoch [18/50] batch [235/500] time 0.864 (0.890) data 0.000 (0.003) loss 1.1064 (1.1004) acc 71.8750 (72.2606) lr 1.5358e-03 eta 4:01:14 +epoch [18/50] batch [240/500] time 0.865 (0.890) data 0.000 (0.003) loss 1.2715 (1.1015) acc 78.1250 (72.3177) lr 1.5358e-03 eta 4:01:07 +epoch [18/50] batch [245/500] time 0.900 (0.889) data 0.000 (0.003) loss 1.1650 (1.1043) acc 65.6250 (72.2321) lr 1.5358e-03 eta 4:00:57 +epoch [18/50] batch [250/500] time 0.894 (0.889) data 0.000 (0.003) loss 1.0986 (1.1038) acc 75.0000 (72.3500) lr 1.5358e-03 eta 4:00:51 +epoch [18/50] batch [255/500] time 0.877 (0.889) data 0.000 (0.003) loss 0.9634 (1.1056) acc 68.7500 (72.3407) lr 1.5358e-03 eta 4:00:46 +epoch [18/50] batch [260/500] time 0.916 (0.890) data 0.000 (0.003) loss 1.3896 (1.1118) acc 68.7500 (72.2596) lr 1.5358e-03 eta 4:00:48 +epoch [18/50] batch [265/500] time 0.862 (0.890) data 0.000 (0.003) loss 2.3867 (1.1139) acc 59.3750 (72.2877) lr 1.5358e-03 eta 4:00:43 +epoch [18/50] batch [270/500] time 0.876 (0.889) data 0.000 (0.003) loss 1.0732 (1.1131) acc 68.7500 (72.3495) lr 1.5358e-03 eta 4:00:32 +epoch [18/50] batch [275/500] time 0.884 (0.889) data 0.000 (0.003) loss 1.3721 (1.1140) acc 65.6250 (72.3068) lr 1.5358e-03 eta 4:00:23 +epoch [18/50] batch [280/500] time 0.876 (0.889) data 0.000 (0.003) loss 0.5518 (1.1130) acc 87.5000 (72.3772) lr 1.5358e-03 eta 4:00:20 +epoch [18/50] batch [285/500] time 0.887 (0.889) data 0.000 (0.003) loss 1.2334 (1.1126) acc 68.7500 (72.4013) lr 1.5358e-03 eta 4:00:12 +epoch [18/50] batch [290/500] time 0.880 (0.889) data 0.000 (0.003) loss 0.8047 (1.1086) acc 75.0000 (72.5000) lr 1.5358e-03 eta 4:00:07 +epoch [18/50] batch [295/500] time 0.899 (0.889) data 0.001 (0.003) loss 0.9507 (1.1084) acc 65.6250 (72.4364) lr 1.5358e-03 eta 4:00:00 +epoch [18/50] batch [300/500] time 0.872 (0.889) data 0.000 (0.002) loss 1.6387 (1.1129) acc 59.3750 (72.3125) lr 1.5358e-03 eta 3:59:55 +epoch [18/50] batch [305/500] time 0.845 (0.888) data 0.000 (0.002) loss 1.0127 (1.1154) acc 71.8750 (72.2439) lr 1.5358e-03 eta 3:59:47 +epoch [18/50] batch [310/500] time 0.868 (0.888) data 0.000 (0.002) loss 0.6494 (1.1122) acc 81.2500 (72.2681) lr 1.5358e-03 eta 3:59:40 +epoch [18/50] batch [315/500] time 0.900 (0.888) data 0.000 (0.002) loss 1.3701 (1.1136) acc 71.8750 (72.2321) lr 1.5358e-03 eta 3:59:34 +epoch [18/50] batch [320/500] time 0.926 (0.888) data 0.000 (0.002) loss 0.5913 (1.1125) acc 78.1250 (72.1973) lr 1.5358e-03 eta 3:59:33 +epoch [18/50] batch [325/500] time 0.869 (0.888) data 0.000 (0.002) loss 1.2773 (1.1102) acc 75.0000 (72.2596) lr 1.5358e-03 eta 3:59:28 +epoch [18/50] batch [330/500] time 0.903 (0.888) data 0.000 (0.002) loss 0.9819 (1.1123) acc 78.1250 (72.2443) lr 1.5358e-03 eta 3:59:23 +epoch [18/50] batch [335/500] time 0.873 (0.889) data 0.000 (0.002) loss 1.4902 (1.1139) acc 62.5000 (72.1642) lr 1.5358e-03 eta 3:59:22 +epoch [18/50] batch [340/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.2910 (1.1152) acc 62.5000 (72.1324) lr 1.5358e-03 eta 3:59:14 +epoch [18/50] batch [345/500] time 0.866 (0.888) data 0.000 (0.002) loss 0.9751 (1.1139) acc 78.1250 (72.1558) lr 1.5358e-03 eta 3:59:09 +epoch [18/50] batch [350/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.4805 (1.1148) acc 68.7500 (72.1429) lr 1.5358e-03 eta 3:59:01 +epoch [18/50] batch [355/500] time 0.917 (0.888) data 0.000 (0.002) loss 2.1406 (1.1206) acc 50.0000 (72.0335) lr 1.5358e-03 eta 3:58:55 +epoch [18/50] batch [360/500] time 0.886 (0.888) data 0.000 (0.002) loss 0.7183 (1.1182) acc 68.7500 (72.0573) lr 1.5358e-03 eta 3:58:52 +epoch [18/50] batch [365/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.9805 (1.1218) acc 43.7500 (71.9863) lr 1.5358e-03 eta 3:58:45 +epoch [18/50] batch [370/500] time 0.893 (0.888) data 0.000 (0.002) loss 1.5957 (1.1238) acc 56.2500 (71.8497) lr 1.5358e-03 eta 3:58:41 +epoch [18/50] batch [375/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.0957 (1.1234) acc 71.8750 (71.8417) lr 1.5358e-03 eta 3:58:38 +epoch [18/50] batch [380/500] time 0.869 (0.888) data 0.000 (0.002) loss 0.9800 (1.1215) acc 71.8750 (71.8668) lr 1.5358e-03 eta 3:58:32 +epoch [18/50] batch [385/500] time 0.895 (0.888) data 0.000 (0.002) loss 1.1396 (1.1222) acc 62.5000 (71.7614) lr 1.5358e-03 eta 3:58:28 +epoch [18/50] batch [390/500] time 0.901 (0.888) data 0.000 (0.002) loss 0.9834 (1.1220) acc 75.0000 (71.7628) lr 1.5358e-03 eta 3:58:23 +epoch [18/50] batch [395/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.8882 (1.1189) acc 71.8750 (71.8117) lr 1.5358e-03 eta 3:58:17 +epoch [18/50] batch [400/500] time 1.046 (0.888) data 0.000 (0.002) loss 1.4863 (1.1177) acc 65.6250 (71.8516) lr 1.5358e-03 eta 3:58:16 +epoch [18/50] batch [405/500] time 0.875 (0.888) data 0.000 (0.002) loss 1.7119 (1.1194) acc 62.5000 (71.8827) lr 1.5358e-03 eta 3:58:10 +epoch [18/50] batch [410/500] time 0.887 (0.888) data 0.000 (0.002) loss 0.9424 (1.1195) acc 65.6250 (71.8521) lr 1.5358e-03 eta 3:58:02 +epoch [18/50] batch [415/500] time 0.866 (0.887) data 0.000 (0.002) loss 0.9517 (1.1181) acc 78.1250 (71.8825) lr 1.5358e-03 eta 3:57:53 +epoch [18/50] batch [420/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.2998 (1.1217) acc 65.6250 (71.7634) lr 1.5358e-03 eta 3:57:49 +epoch [18/50] batch [425/500] time 0.866 (0.887) data 0.000 (0.002) loss 1.0752 (1.1213) acc 71.8750 (71.8015) lr 1.5358e-03 eta 3:57:44 +epoch [18/50] batch [430/500] time 0.861 (0.887) data 0.000 (0.002) loss 0.7197 (1.1204) acc 81.2500 (71.8314) lr 1.5358e-03 eta 3:57:40 +epoch [18/50] batch [435/500] time 0.908 (0.887) data 0.000 (0.002) loss 0.8633 (1.1201) acc 78.1250 (71.8534) lr 1.5358e-03 eta 3:57:37 +epoch [18/50] batch [440/500] time 0.879 (0.887) data 0.000 (0.002) loss 1.2842 (1.1215) acc 75.0000 (71.8040) lr 1.5358e-03 eta 3:57:32 +epoch [18/50] batch [445/500] time 0.924 (0.888) data 0.000 (0.002) loss 1.3506 (1.1228) acc 68.7500 (71.8118) lr 1.5358e-03 eta 3:57:30 +epoch [18/50] batch [450/500] time 0.866 (0.888) data 0.001 (0.002) loss 1.3018 (1.1240) acc 59.3750 (71.8056) lr 1.5358e-03 eta 3:57:24 +epoch [18/50] batch [455/500] time 0.896 (0.888) data 0.000 (0.002) loss 1.0059 (1.1252) acc 65.6250 (71.7514) lr 1.5358e-03 eta 3:57:21 +epoch [18/50] batch [460/500] time 0.901 (0.888) data 0.000 (0.002) loss 0.9248 (1.1268) acc 78.1250 (71.7052) lr 1.5358e-03 eta 3:57:18 +epoch [18/50] batch [465/500] time 0.858 (0.887) data 0.000 (0.002) loss 1.0322 (1.1301) acc 75.0000 (71.7137) lr 1.5358e-03 eta 3:57:10 +epoch [18/50] batch [470/500] time 0.905 (0.887) data 0.000 (0.002) loss 1.2842 (1.1286) acc 71.8750 (71.7287) lr 1.5358e-03 eta 3:57:05 +epoch [18/50] batch [475/500] time 0.855 (0.887) data 0.000 (0.002) loss 1.1680 (1.1284) acc 62.5000 (71.7434) lr 1.5358e-03 eta 3:57:00 +epoch [18/50] batch [480/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.4717 (1.1293) acc 68.7500 (71.7448) lr 1.5358e-03 eta 3:56:56 +epoch [18/50] batch [485/500] time 0.893 (0.887) data 0.000 (0.002) loss 0.7422 (1.1313) acc 84.3750 (71.7139) lr 1.5358e-03 eta 3:56:51 +epoch [18/50] batch [490/500] time 0.889 (0.888) data 0.000 (0.002) loss 0.9082 (1.1323) acc 78.1250 (71.6773) lr 1.5358e-03 eta 3:56:49 +epoch [18/50] batch [495/500] time 0.881 (0.887) data 0.000 (0.002) loss 0.9370 (1.1321) acc 71.8750 (71.6856) lr 1.5358e-03 eta 3:56:42 +epoch [18/50] batch [500/500] time 0.909 (0.888) data 0.000 (0.002) loss 1.1348 (1.1337) acc 65.6250 (71.6188) lr 1.4818e-03 eta 3:56:41 +epoch [19/50] batch [5/500] time 0.901 (1.027) data 0.000 (0.124) loss 0.8916 (1.2646) acc 78.1250 (69.3750) lr 1.4818e-03 eta 4:33:49 +epoch [19/50] batch [10/500] time 0.904 (0.961) data 0.000 (0.062) loss 1.0781 (1.1807) acc 68.7500 (71.2500) lr 1.4818e-03 eta 4:15:59 +epoch [19/50] batch [15/500] time 0.905 (0.937) data 0.000 (0.042) loss 1.1152 (1.1865) acc 71.8750 (71.0417) lr 1.4818e-03 eta 4:09:32 +epoch [19/50] batch [20/500] time 0.878 (0.924) data 0.000 (0.031) loss 0.5068 (1.1651) acc 81.2500 (70.7812) lr 1.4818e-03 eta 4:06:00 +epoch [19/50] batch [25/500] time 0.880 (0.915) data 0.000 (0.025) loss 0.7520 (1.1364) acc 78.1250 (71.6250) lr 1.4818e-03 eta 4:03:38 +epoch [19/50] batch [30/500] time 0.873 (0.909) data 0.000 (0.021) loss 1.2012 (1.1269) acc 75.0000 (72.3958) lr 1.4818e-03 eta 4:01:54 +epoch [19/50] batch [35/500] time 0.892 (0.907) data 0.000 (0.018) loss 0.6445 (1.0975) acc 84.3750 (72.5893) lr 1.4818e-03 eta 4:01:14 +epoch [19/50] batch [40/500] time 0.879 (0.904) data 0.000 (0.016) loss 1.6729 (1.1433) acc 65.6250 (72.1094) lr 1.4818e-03 eta 4:00:34 +epoch [19/50] batch [45/500] time 0.882 (0.901) data 0.000 (0.014) loss 1.3301 (1.1456) acc 71.8750 (72.6389) lr 1.4818e-03 eta 3:59:36 +epoch [19/50] batch [50/500] time 0.863 (0.899) data 0.000 (0.013) loss 0.5498 (1.1205) acc 84.3750 (73.0000) lr 1.4818e-03 eta 3:59:03 +epoch [19/50] batch [55/500] time 0.894 (0.899) data 0.000 (0.012) loss 1.4844 (1.1167) acc 65.6250 (72.9545) lr 1.4818e-03 eta 3:58:51 +epoch [19/50] batch [60/500] time 0.879 (0.900) data 0.000 (0.011) loss 1.1025 (1.1022) acc 81.2500 (73.4896) lr 1.4818e-03 eta 3:59:10 +epoch [19/50] batch [65/500] time 0.937 (0.899) data 0.000 (0.010) loss 1.0986 (1.0930) acc 68.7500 (73.3654) lr 1.4818e-03 eta 3:58:52 +epoch [19/50] batch [70/500] time 0.927 (0.901) data 0.000 (0.009) loss 0.8906 (1.0990) acc 71.8750 (72.8571) lr 1.4818e-03 eta 3:59:07 +epoch [19/50] batch [75/500] time 0.894 (0.901) data 0.000 (0.009) loss 1.7383 (1.0988) acc 65.6250 (72.9167) lr 1.4818e-03 eta 3:59:04 +epoch [19/50] batch [80/500] time 0.846 (0.900) data 0.000 (0.008) loss 1.5791 (1.1013) acc 71.8750 (73.0469) lr 1.4818e-03 eta 3:58:44 +epoch [19/50] batch [85/500] time 0.885 (0.900) data 0.000 (0.008) loss 1.2051 (1.0936) acc 84.3750 (73.4926) lr 1.4818e-03 eta 3:58:37 +epoch [19/50] batch [90/500] time 0.872 (0.899) data 0.000 (0.007) loss 0.5151 (1.0923) acc 84.3750 (73.6111) lr 1.4818e-03 eta 3:58:18 +epoch [19/50] batch [95/500] time 0.902 (0.898) data 0.000 (0.007) loss 0.8921 (1.0880) acc 68.7500 (73.7171) lr 1.4818e-03 eta 3:58:04 +epoch [19/50] batch [100/500] time 0.890 (0.898) data 0.000 (0.006) loss 1.5635 (1.0927) acc 68.7500 (73.4375) lr 1.4818e-03 eta 3:57:53 +epoch [19/50] batch [105/500] time 0.899 (0.899) data 0.000 (0.006) loss 1.1338 (1.1007) acc 65.6250 (73.0655) lr 1.4818e-03 eta 3:58:04 +epoch [19/50] batch [110/500] time 0.938 (0.899) data 0.000 (0.006) loss 0.7148 (1.1122) acc 84.3750 (72.8125) lr 1.4818e-03 eta 3:58:06 +epoch [19/50] batch [115/500] time 0.876 (0.898) data 0.000 (0.006) loss 0.9463 (1.1135) acc 68.7500 (72.7446) lr 1.4818e-03 eta 3:57:51 +epoch [19/50] batch [120/500] time 0.895 (0.898) data 0.000 (0.005) loss 0.7603 (1.1135) acc 81.2500 (72.7344) lr 1.4818e-03 eta 3:57:44 +epoch [19/50] batch [125/500] time 0.871 (0.897) data 0.000 (0.005) loss 0.8906 (1.1163) acc 78.1250 (72.6000) lr 1.4818e-03 eta 3:57:27 +epoch [19/50] batch [130/500] time 0.896 (0.897) data 0.000 (0.005) loss 1.4229 (1.1255) acc 62.5000 (72.4519) lr 1.4818e-03 eta 3:57:23 +epoch [19/50] batch [135/500] time 0.904 (0.897) data 0.000 (0.005) loss 0.9824 (1.1211) acc 75.0000 (72.4769) lr 1.4818e-03 eta 3:57:15 +epoch [19/50] batch [140/500] time 0.882 (0.897) data 0.000 (0.005) loss 0.9146 (1.1202) acc 71.8750 (72.5446) lr 1.4818e-03 eta 3:57:01 +epoch [19/50] batch [145/500] time 0.908 (0.897) data 0.000 (0.005) loss 1.0371 (1.1088) acc 71.8750 (72.7155) lr 1.4818e-03 eta 3:56:57 +epoch [19/50] batch [150/500] time 0.897 (0.897) data 0.000 (0.004) loss 1.0420 (1.0980) acc 78.1250 (72.9167) lr 1.4818e-03 eta 3:56:55 +epoch [19/50] batch [155/500] time 0.911 (0.897) data 0.000 (0.004) loss 0.6582 (1.0968) acc 87.5000 (72.8024) lr 1.4818e-03 eta 3:56:46 +epoch [19/50] batch [160/500] time 0.881 (0.897) data 0.000 (0.004) loss 0.9209 (1.0988) acc 78.1250 (72.8516) lr 1.4818e-03 eta 3:56:40 +epoch [19/50] batch [165/500] time 0.872 (0.896) data 0.000 (0.004) loss 1.5820 (1.1034) acc 59.3750 (72.7652) lr 1.4818e-03 eta 3:56:33 +epoch [19/50] batch [170/500] time 0.895 (0.896) data 0.000 (0.004) loss 0.7417 (1.1067) acc 84.3750 (72.7206) lr 1.4818e-03 eta 3:56:21 +epoch [19/50] batch [175/500] time 0.880 (0.896) data 0.000 (0.004) loss 0.8540 (1.1119) acc 78.1250 (72.5357) lr 1.4818e-03 eta 3:56:12 +epoch [19/50] batch [180/500] time 0.888 (0.895) data 0.000 (0.004) loss 1.6416 (1.1123) acc 65.6250 (72.4653) lr 1.4818e-03 eta 3:56:01 +epoch [19/50] batch [185/500] time 0.887 (0.895) data 0.000 (0.004) loss 0.9717 (1.1134) acc 75.0000 (72.4831) lr 1.4818e-03 eta 3:55:54 +epoch [19/50] batch [190/500] time 0.892 (0.895) data 0.000 (0.004) loss 0.7124 (1.1137) acc 87.5000 (72.3684) lr 1.4818e-03 eta 3:55:50 +epoch [19/50] batch [195/500] time 0.870 (0.895) data 0.000 (0.003) loss 0.8228 (1.1139) acc 75.0000 (72.3718) lr 1.4818e-03 eta 3:55:39 +epoch [19/50] batch [200/500] time 0.891 (0.895) data 0.000 (0.003) loss 1.8623 (1.1176) acc 56.2500 (72.2500) lr 1.4818e-03 eta 3:55:33 +epoch [19/50] batch [205/500] time 0.885 (0.894) data 0.000 (0.003) loss 1.0303 (1.1165) acc 62.5000 (72.1341) lr 1.4818e-03 eta 3:55:28 +epoch [19/50] batch [210/500] time 0.893 (0.894) data 0.000 (0.003) loss 0.5029 (1.1162) acc 87.5000 (72.2321) lr 1.4818e-03 eta 3:55:22 +epoch [19/50] batch [215/500] time 0.875 (0.894) data 0.000 (0.003) loss 1.0479 (1.1223) acc 75.0000 (72.1221) lr 1.4818e-03 eta 3:55:14 +epoch [19/50] batch [220/500] time 0.898 (0.894) data 0.000 (0.003) loss 1.0137 (1.1233) acc 78.1250 (72.1591) lr 1.4818e-03 eta 3:55:10 +epoch [19/50] batch [225/500] time 0.898 (0.894) data 0.000 (0.003) loss 1.3096 (1.1258) acc 59.3750 (72.0972) lr 1.4818e-03 eta 3:55:04 +epoch [19/50] batch [230/500] time 0.903 (0.894) data 0.001 (0.003) loss 0.8301 (1.1244) acc 81.2500 (72.1603) lr 1.4818e-03 eta 3:54:58 +epoch [19/50] batch [235/500] time 0.900 (0.894) data 0.000 (0.003) loss 1.0342 (1.1252) acc 75.0000 (72.1809) lr 1.4818e-03 eta 3:54:55 +epoch [19/50] batch [240/500] time 0.886 (0.894) data 0.000 (0.003) loss 1.7012 (1.1285) acc 62.5000 (72.1224) lr 1.4818e-03 eta 3:54:52 +epoch [19/50] batch [245/500] time 0.923 (0.894) data 0.000 (0.003) loss 1.3467 (1.1293) acc 56.2500 (72.0791) lr 1.4818e-03 eta 3:54:50 +epoch [19/50] batch [250/500] time 0.915 (0.895) data 0.000 (0.003) loss 0.7207 (1.1285) acc 68.7500 (72.0750) lr 1.4818e-03 eta 3:54:55 +epoch [19/50] batch [255/500] time 0.872 (0.895) data 0.000 (0.003) loss 1.2354 (1.1269) acc 75.0000 (72.0833) lr 1.4818e-03 eta 3:54:50 +epoch [19/50] batch [260/500] time 0.934 (0.895) data 0.000 (0.003) loss 1.2559 (1.1278) acc 71.8750 (72.0312) lr 1.4818e-03 eta 3:54:53 +epoch [19/50] batch [265/500] time 0.909 (0.895) data 0.000 (0.003) loss 1.4561 (1.1310) acc 68.7500 (72.0283) lr 1.4818e-03 eta 3:54:49 +epoch [19/50] batch [270/500] time 0.873 (0.895) data 0.000 (0.003) loss 1.1562 (1.1308) acc 78.1250 (72.0370) lr 1.4818e-03 eta 3:54:38 +epoch [19/50] batch [275/500] time 0.900 (0.895) data 0.000 (0.002) loss 1.0654 (1.1341) acc 71.8750 (71.9773) lr 1.4818e-03 eta 3:54:31 +epoch [19/50] batch [280/500] time 0.902 (0.895) data 0.001 (0.002) loss 1.2803 (1.1334) acc 65.6250 (71.9866) lr 1.4818e-03 eta 3:54:26 +epoch [19/50] batch [285/500] time 0.873 (0.895) data 0.000 (0.002) loss 0.9277 (1.1325) acc 81.2500 (71.9846) lr 1.4818e-03 eta 3:54:18 +epoch [19/50] batch [290/500] time 0.878 (0.894) data 0.000 (0.002) loss 0.7349 (1.1280) acc 78.1250 (72.0582) lr 1.4818e-03 eta 3:54:08 +epoch [19/50] batch [295/500] time 0.903 (0.894) data 0.000 (0.002) loss 0.9355 (1.1268) acc 75.0000 (72.0339) lr 1.4818e-03 eta 3:54:02 +epoch [19/50] batch [300/500] time 0.860 (0.894) data 0.000 (0.002) loss 0.6958 (1.1243) acc 87.5000 (72.1458) lr 1.4818e-03 eta 3:53:53 +epoch [19/50] batch [305/500] time 0.871 (0.894) data 0.000 (0.002) loss 1.0645 (1.1212) acc 62.5000 (72.1516) lr 1.4818e-03 eta 3:53:49 +epoch [19/50] batch [310/500] time 0.910 (0.894) data 0.000 (0.002) loss 1.0098 (1.1226) acc 68.7500 (72.0464) lr 1.4818e-03 eta 3:53:41 +epoch [19/50] batch [315/500] time 0.853 (0.894) data 0.000 (0.002) loss 0.8516 (1.1203) acc 75.0000 (72.0833) lr 1.4818e-03 eta 3:53:34 +epoch [19/50] batch [320/500] time 0.910 (0.894) data 0.000 (0.002) loss 0.7930 (1.1181) acc 75.0000 (72.1387) lr 1.4818e-03 eta 3:53:32 +epoch [19/50] batch [325/500] time 0.880 (0.894) data 0.000 (0.002) loss 1.1670 (1.1193) acc 65.6250 (72.0673) lr 1.4818e-03 eta 3:53:28 +epoch [19/50] batch [330/500] time 0.887 (0.894) data 0.000 (0.002) loss 0.6821 (1.1186) acc 68.7500 (72.0170) lr 1.4818e-03 eta 3:53:24 +epoch [19/50] batch [335/500] time 0.931 (0.894) data 0.000 (0.002) loss 0.9668 (1.1214) acc 81.2500 (71.9869) lr 1.4818e-03 eta 3:53:23 +epoch [19/50] batch [340/500] time 0.866 (0.894) data 0.000 (0.002) loss 0.9653 (1.1217) acc 78.1250 (71.9853) lr 1.4818e-03 eta 3:53:16 +epoch [19/50] batch [345/500] time 1.003 (0.894) data 0.000 (0.002) loss 1.2227 (1.1230) acc 68.7500 (71.9384) lr 1.4818e-03 eta 3:53:17 +epoch [19/50] batch [350/500] time 0.873 (0.894) data 0.000 (0.002) loss 1.2725 (1.1250) acc 68.7500 (71.9196) lr 1.4818e-03 eta 3:53:10 +epoch [19/50] batch [355/500] time 0.861 (0.894) data 0.000 (0.002) loss 1.0322 (1.1267) acc 71.8750 (71.8750) lr 1.4818e-03 eta 3:53:03 +epoch [19/50] batch [360/500] time 0.902 (0.894) data 0.000 (0.002) loss 1.5566 (1.1269) acc 56.2500 (71.8316) lr 1.4818e-03 eta 3:52:57 +epoch [19/50] batch [365/500] time 0.901 (0.894) data 0.000 (0.002) loss 0.9087 (1.1290) acc 75.0000 (71.7808) lr 1.4818e-03 eta 3:52:51 +epoch [19/50] batch [370/500] time 0.871 (0.893) data 0.000 (0.002) loss 1.6250 (1.1315) acc 65.6250 (71.7736) lr 1.4818e-03 eta 3:52:44 +epoch [19/50] batch [375/500] time 0.880 (0.893) data 0.000 (0.002) loss 1.2393 (1.1334) acc 65.6250 (71.7333) lr 1.4818e-03 eta 3:52:36 +epoch [19/50] batch [380/500] time 0.893 (0.893) data 0.000 (0.002) loss 1.3428 (1.1354) acc 59.3750 (71.6612) lr 1.4818e-03 eta 3:52:31 +epoch [19/50] batch [385/500] time 0.899 (0.893) data 0.000 (0.002) loss 1.1543 (1.1374) acc 65.6250 (71.5990) lr 1.4818e-03 eta 3:52:28 +epoch [19/50] batch [390/500] time 0.908 (0.894) data 0.000 (0.002) loss 1.2998 (1.1417) acc 68.7500 (71.5385) lr 1.4818e-03 eta 3:52:28 +epoch [19/50] batch [395/500] time 0.847 (0.893) data 0.000 (0.002) loss 1.0439 (1.1401) acc 62.5000 (71.5111) lr 1.4818e-03 eta 3:52:17 +epoch [19/50] batch [400/500] time 0.887 (0.893) data 0.000 (0.002) loss 1.3135 (1.1401) acc 75.0000 (71.5391) lr 1.4818e-03 eta 3:52:13 +epoch [19/50] batch [405/500] time 0.933 (0.893) data 0.000 (0.002) loss 0.7905 (1.1377) acc 75.0000 (71.5895) lr 1.4818e-03 eta 3:52:09 +epoch [19/50] batch [410/500] time 0.871 (0.893) data 0.000 (0.002) loss 0.8931 (1.1390) acc 75.0000 (71.5396) lr 1.4818e-03 eta 3:52:02 +epoch [19/50] batch [415/500] time 0.899 (0.893) data 0.000 (0.002) loss 1.0645 (1.1372) acc 71.8750 (71.5738) lr 1.4818e-03 eta 3:51:55 +epoch [19/50] batch [420/500] time 0.895 (0.893) data 0.000 (0.002) loss 1.0361 (1.1365) acc 75.0000 (71.5923) lr 1.4818e-03 eta 3:51:46 +epoch [19/50] batch [425/500] time 0.863 (0.893) data 0.000 (0.002) loss 0.4795 (1.1342) acc 90.6250 (71.6103) lr 1.4818e-03 eta 3:51:40 +epoch [19/50] batch [430/500] time 0.883 (0.892) data 0.000 (0.002) loss 1.1055 (1.1321) acc 65.6250 (71.6642) lr 1.4818e-03 eta 3:51:34 +epoch [19/50] batch [435/500] time 0.859 (0.892) data 0.000 (0.002) loss 1.1094 (1.1339) acc 71.8750 (71.6523) lr 1.4818e-03 eta 3:51:29 +epoch [19/50] batch [440/500] time 0.900 (0.892) data 0.000 (0.002) loss 1.4580 (1.1336) acc 62.5000 (71.6548) lr 1.4818e-03 eta 3:51:23 +epoch [19/50] batch [445/500] time 0.906 (0.892) data 0.000 (0.002) loss 0.9331 (1.1302) acc 75.0000 (71.7346) lr 1.4818e-03 eta 3:51:18 +epoch [19/50] batch [450/500] time 0.914 (0.892) data 0.000 (0.002) loss 1.2842 (1.1316) acc 56.2500 (71.6319) lr 1.4818e-03 eta 3:51:13 +epoch [19/50] batch [455/500] time 0.886 (0.892) data 0.000 (0.002) loss 0.6929 (1.1305) acc 87.5000 (71.6484) lr 1.4818e-03 eta 3:51:08 +epoch [19/50] batch [460/500] time 0.892 (0.892) data 0.000 (0.002) loss 0.9678 (1.1284) acc 81.2500 (71.7120) lr 1.4818e-03 eta 3:51:00 +epoch [19/50] batch [465/500] time 0.903 (0.892) data 0.000 (0.002) loss 1.0713 (1.1287) acc 71.8750 (71.7272) lr 1.4818e-03 eta 3:50:57 +epoch [19/50] batch [470/500] time 0.896 (0.892) data 0.000 (0.002) loss 0.4048 (1.1324) acc 96.8750 (71.6622) lr 1.4818e-03 eta 3:50:54 +epoch [19/50] batch [475/500] time 0.860 (0.892) data 0.000 (0.002) loss 0.7988 (1.1313) acc 81.2500 (71.6513) lr 1.4818e-03 eta 3:50:46 +epoch [19/50] batch [480/500] time 0.857 (0.892) data 0.000 (0.002) loss 1.3438 (1.1329) acc 65.6250 (71.6471) lr 1.4818e-03 eta 3:50:40 +epoch [19/50] batch [485/500] time 0.870 (0.892) data 0.000 (0.002) loss 1.3926 (1.1326) acc 65.6250 (71.6753) lr 1.4818e-03 eta 3:50:34 +epoch [19/50] batch [490/500] time 0.883 (0.892) data 0.000 (0.001) loss 1.0254 (1.1324) acc 71.8750 (71.6582) lr 1.4818e-03 eta 3:50:32 +epoch [19/50] batch [495/500] time 0.904 (0.892) data 0.000 (0.001) loss 1.4053 (1.1345) acc 68.7500 (71.6098) lr 1.4818e-03 eta 3:50:29 +epoch [19/50] batch [500/500] time 0.916 (0.892) data 0.000 (0.001) loss 0.5947 (1.1343) acc 78.1250 (71.6000) lr 1.4258e-03 eta 3:50:26 +epoch [20/50] batch [5/500] time 0.888 (1.030) data 0.000 (0.131) loss 1.2480 (1.2318) acc 62.5000 (66.8750) lr 1.4258e-03 eta 4:26:04 +epoch [20/50] batch [10/500] time 0.890 (0.959) data 0.000 (0.065) loss 1.3203 (1.2615) acc 68.7500 (67.8125) lr 1.4258e-03 eta 4:07:39 +epoch [20/50] batch [15/500] time 0.862 (0.935) data 0.000 (0.044) loss 0.9395 (1.2079) acc 78.1250 (69.1667) lr 1.4258e-03 eta 4:01:23 +epoch [20/50] batch [20/500] time 0.854 (0.917) data 0.000 (0.033) loss 0.8462 (1.1430) acc 81.2500 (70.7812) lr 1.4258e-03 eta 3:56:34 +epoch [20/50] batch [25/500] time 0.860 (0.908) data 0.000 (0.026) loss 1.8838 (1.1403) acc 59.3750 (70.8750) lr 1.4258e-03 eta 3:54:06 +epoch [20/50] batch [30/500] time 0.857 (0.905) data 0.000 (0.022) loss 1.1045 (1.1343) acc 65.6250 (70.8333) lr 1.4258e-03 eta 3:53:14 +epoch [20/50] batch [35/500] time 0.916 (0.902) data 0.000 (0.019) loss 1.4609 (1.1235) acc 62.5000 (70.8036) lr 1.4258e-03 eta 3:52:36 +epoch [20/50] batch [40/500] time 0.905 (0.901) data 0.000 (0.016) loss 1.3838 (1.1211) acc 68.7500 (71.0938) lr 1.4258e-03 eta 3:52:06 +epoch [20/50] batch [45/500] time 0.881 (0.901) data 0.000 (0.015) loss 0.7920 (1.1277) acc 75.0000 (70.9028) lr 1.4258e-03 eta 3:52:03 +epoch [20/50] batch [50/500] time 0.896 (0.901) data 0.000 (0.013) loss 1.1699 (1.1338) acc 75.0000 (70.7500) lr 1.4258e-03 eta 3:51:57 +epoch [20/50] batch [55/500] time 0.875 (0.899) data 0.000 (0.012) loss 1.4814 (1.1172) acc 65.6250 (71.2500) lr 1.4258e-03 eta 3:51:21 +epoch [20/50] batch [60/500] time 0.901 (0.898) data 0.000 (0.011) loss 1.2451 (1.1071) acc 75.0000 (71.4583) lr 1.4258e-03 eta 3:51:05 +epoch [20/50] batch [65/500] time 0.878 (0.897) data 0.000 (0.010) loss 1.1611 (1.0977) acc 62.5000 (71.6346) lr 1.4258e-03 eta 3:50:39 +epoch [20/50] batch [70/500] time 0.910 (0.896) data 0.000 (0.010) loss 1.0088 (1.0905) acc 71.8750 (71.8304) lr 1.4258e-03 eta 3:50:25 +epoch [20/50] batch [75/500] time 0.879 (0.895) data 0.000 (0.009) loss 0.6826 (1.1019) acc 81.2500 (71.6250) lr 1.4258e-03 eta 3:50:01 +epoch [20/50] batch [80/500] time 0.874 (0.893) data 0.000 (0.008) loss 1.0254 (1.1077) acc 68.7500 (71.4062) lr 1.4258e-03 eta 3:49:36 +epoch [20/50] batch [85/500] time 0.852 (0.893) data 0.000 (0.008) loss 1.1270 (1.1016) acc 78.1250 (71.4338) lr 1.4258e-03 eta 3:49:22 +epoch [20/50] batch [90/500] time 0.893 (0.893) data 0.000 (0.007) loss 1.1045 (1.1055) acc 75.0000 (71.4931) lr 1.4258e-03 eta 3:49:19 +epoch [20/50] batch [95/500] time 0.859 (0.892) data 0.000 (0.007) loss 0.9663 (1.0963) acc 71.8750 (71.6118) lr 1.4258e-03 eta 3:49:02 +epoch [20/50] batch [100/500] time 0.882 (0.892) data 0.000 (0.007) loss 0.9448 (1.0849) acc 71.8750 (71.8750) lr 1.4258e-03 eta 3:48:51 +epoch [20/50] batch [105/500] time 0.867 (0.892) data 0.000 (0.006) loss 1.7354 (1.0947) acc 65.6250 (71.9643) lr 1.4258e-03 eta 3:48:46 +epoch [20/50] batch [110/500] time 0.900 (0.892) data 0.000 (0.006) loss 1.7168 (1.1086) acc 65.6250 (71.7330) lr 1.4258e-03 eta 3:48:45 +epoch [20/50] batch [115/500] time 0.855 (0.891) data 0.000 (0.006) loss 1.3438 (1.1016) acc 56.2500 (71.7663) lr 1.4258e-03 eta 3:48:33 +epoch [20/50] batch [120/500] time 0.868 (0.891) data 0.000 (0.006) loss 0.9971 (1.0954) acc 68.7500 (71.9271) lr 1.4258e-03 eta 3:48:21 +epoch [20/50] batch [125/500] time 0.911 (0.891) data 0.000 (0.005) loss 1.2734 (1.0957) acc 65.6250 (72.0000) lr 1.4258e-03 eta 3:48:15 +epoch [20/50] batch [130/500] time 0.877 (0.890) data 0.001 (0.005) loss 1.2900 (1.0899) acc 59.3750 (72.0913) lr 1.4258e-03 eta 3:48:01 +epoch [20/50] batch [135/500] time 0.859 (0.890) data 0.000 (0.005) loss 1.1660 (1.0878) acc 71.8750 (72.0602) lr 1.4258e-03 eta 3:47:52 +epoch [20/50] batch [140/500] time 0.861 (0.890) data 0.000 (0.005) loss 1.8877 (1.0874) acc 59.3750 (72.2321) lr 1.4258e-03 eta 3:47:46 +epoch [20/50] batch [145/500] time 0.880 (0.889) data 0.000 (0.005) loss 1.1855 (1.0806) acc 65.6250 (72.3060) lr 1.4258e-03 eta 3:47:34 +epoch [20/50] batch [150/500] time 0.892 (0.890) data 0.000 (0.005) loss 0.8892 (1.0826) acc 68.7500 (72.2292) lr 1.4258e-03 eta 3:47:37 +epoch [20/50] batch [155/500] time 0.923 (0.890) data 0.000 (0.004) loss 0.8931 (1.0839) acc 71.8750 (72.2177) lr 1.4258e-03 eta 3:47:40 +epoch [20/50] batch [160/500] time 0.857 (0.890) data 0.000 (0.004) loss 0.9189 (1.0834) acc 78.1250 (72.2266) lr 1.4258e-03 eta 3:47:28 +epoch [20/50] batch [165/500] time 0.922 (0.890) data 0.000 (0.004) loss 1.3555 (1.0849) acc 71.8750 (72.1023) lr 1.4258e-03 eta 3:47:25 +epoch [20/50] batch [170/500] time 0.854 (0.889) data 0.000 (0.004) loss 0.9629 (1.0839) acc 75.0000 (72.0956) lr 1.4258e-03 eta 3:47:14 +epoch [20/50] batch [175/500] time 0.905 (0.889) data 0.000 (0.004) loss 1.2461 (1.0875) acc 62.5000 (71.9643) lr 1.4258e-03 eta 3:47:06 +epoch [20/50] batch [180/500] time 0.876 (0.889) data 0.000 (0.004) loss 1.1484 (1.0874) acc 75.0000 (71.9618) lr 1.4258e-03 eta 3:46:57 +epoch [20/50] batch [185/500] time 0.883 (0.889) data 0.000 (0.004) loss 1.2734 (1.0920) acc 62.5000 (71.8581) lr 1.4258e-03 eta 3:46:51 +epoch [20/50] batch [190/500] time 0.882 (0.889) data 0.000 (0.004) loss 1.0732 (1.0917) acc 71.8750 (71.9079) lr 1.4258e-03 eta 3:46:56 +epoch [20/50] batch [195/500] time 0.888 (0.889) data 0.000 (0.004) loss 0.7383 (1.0970) acc 81.2500 (71.7949) lr 1.4258e-03 eta 3:46:52 +epoch [20/50] batch [200/500] time 0.860 (0.889) data 0.000 (0.003) loss 0.8604 (1.0931) acc 84.3750 (71.9688) lr 1.4258e-03 eta 3:46:41 +epoch [20/50] batch [205/500] time 0.923 (0.889) data 0.000 (0.003) loss 1.4385 (1.1000) acc 59.3750 (71.8598) lr 1.4258e-03 eta 3:46:39 +epoch [20/50] batch [210/500] time 0.873 (0.889) data 0.000 (0.003) loss 1.4434 (1.1066) acc 68.7500 (71.6220) lr 1.4258e-03 eta 3:46:32 +epoch [20/50] batch [215/500] time 0.887 (0.889) data 0.000 (0.003) loss 1.6660 (1.1099) acc 62.5000 (71.5988) lr 1.4258e-03 eta 3:46:22 +epoch [20/50] batch [220/500] time 0.875 (0.888) data 0.000 (0.003) loss 0.9585 (1.1026) acc 75.0000 (71.8182) lr 1.4258e-03 eta 3:46:15 +epoch [20/50] batch [225/500] time 0.907 (0.889) data 0.000 (0.003) loss 1.1680 (1.1038) acc 75.0000 (71.8472) lr 1.4258e-03 eta 3:46:12 +epoch [20/50] batch [230/500] time 0.880 (0.888) data 0.000 (0.003) loss 0.9478 (1.1039) acc 68.7500 (71.8478) lr 1.4258e-03 eta 3:46:04 +epoch [20/50] batch [235/500] time 0.873 (0.889) data 0.000 (0.003) loss 1.0918 (1.1064) acc 75.0000 (71.7686) lr 1.4258e-03 eta 3:46:06 +epoch [20/50] batch [240/500] time 0.880 (0.889) data 0.000 (0.003) loss 1.3809 (1.1080) acc 56.2500 (71.6927) lr 1.4258e-03 eta 3:45:59 +epoch [20/50] batch [245/500] time 0.927 (0.889) data 0.000 (0.003) loss 1.0166 (1.1108) acc 75.0000 (71.6709) lr 1.4258e-03 eta 3:45:58 +epoch [20/50] batch [250/500] time 0.912 (0.889) data 0.000 (0.003) loss 0.9277 (1.1107) acc 71.8750 (71.6125) lr 1.4258e-03 eta 3:45:57 +epoch [20/50] batch [255/500] time 0.911 (0.889) data 0.000 (0.003) loss 1.7705 (1.1161) acc 56.2500 (71.5196) lr 1.4258e-03 eta 3:45:51 +epoch [20/50] batch [260/500] time 0.883 (0.889) data 0.000 (0.003) loss 1.2363 (1.1130) acc 68.7500 (71.5865) lr 1.4258e-03 eta 3:45:42 +epoch [20/50] batch [265/500] time 0.900 (0.889) data 0.000 (0.003) loss 0.9717 (1.1114) acc 78.1250 (71.6156) lr 1.4258e-03 eta 3:45:41 +epoch [20/50] batch [270/500] time 0.862 (0.889) data 0.000 (0.003) loss 0.6636 (1.1113) acc 78.1250 (71.5509) lr 1.4258e-03 eta 3:45:39 +epoch [20/50] batch [275/500] time 0.868 (0.889) data 0.000 (0.003) loss 1.6572 (1.1112) acc 62.5000 (71.5909) lr 1.4258e-03 eta 3:45:31 +epoch [20/50] batch [280/500] time 0.859 (0.889) data 0.000 (0.003) loss 0.7651 (1.1114) acc 78.1250 (71.6183) lr 1.4258e-03 eta 3:45:23 +epoch [20/50] batch [285/500] time 0.870 (0.888) data 0.000 (0.003) loss 1.1768 (1.1146) acc 65.6250 (71.5351) lr 1.4258e-03 eta 3:45:15 +epoch [20/50] batch [290/500] time 0.895 (0.888) data 0.000 (0.002) loss 1.3594 (1.1155) acc 68.7500 (71.5517) lr 1.4258e-03 eta 3:45:12 +epoch [20/50] batch [295/500] time 0.876 (0.888) data 0.000 (0.002) loss 0.5391 (1.1093) acc 81.2500 (71.6949) lr 1.4258e-03 eta 3:45:05 +epoch [20/50] batch [300/500] time 0.897 (0.888) data 0.000 (0.002) loss 1.9648 (1.1123) acc 53.1250 (71.5938) lr 1.4258e-03 eta 3:45:02 +epoch [20/50] batch [305/500] time 0.871 (0.888) data 0.000 (0.002) loss 0.9189 (1.1136) acc 78.1250 (71.5779) lr 1.4258e-03 eta 3:44:58 +epoch [20/50] batch [310/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.7959 (1.1164) acc 65.6250 (71.5020) lr 1.4258e-03 eta 3:44:53 +epoch [20/50] batch [315/500] time 0.884 (0.888) data 0.000 (0.002) loss 1.1523 (1.1132) acc 62.5000 (71.5278) lr 1.4258e-03 eta 3:44:48 +epoch [20/50] batch [320/500] time 0.868 (0.888) data 0.000 (0.002) loss 1.3350 (1.1172) acc 68.7500 (71.4258) lr 1.4258e-03 eta 3:44:42 +epoch [20/50] batch [325/500] time 0.897 (0.888) data 0.000 (0.002) loss 1.1758 (1.1136) acc 75.0000 (71.4712) lr 1.4258e-03 eta 3:44:40 +epoch [20/50] batch [330/500] time 0.988 (0.889) data 0.000 (0.002) loss 1.3350 (1.1145) acc 50.0000 (71.4299) lr 1.4258e-03 eta 3:44:39 +epoch [20/50] batch [335/500] time 0.872 (0.889) data 0.000 (0.002) loss 1.0000 (1.1155) acc 75.0000 (71.4739) lr 1.4258e-03 eta 3:44:34 +epoch [20/50] batch [340/500] time 0.874 (0.888) data 0.000 (0.002) loss 1.3252 (1.1183) acc 71.8750 (71.4522) lr 1.4258e-03 eta 3:44:27 +epoch [20/50] batch [345/500] time 0.853 (0.888) data 0.000 (0.002) loss 1.2031 (1.1207) acc 71.8750 (71.4130) lr 1.4258e-03 eta 3:44:18 +epoch [20/50] batch [350/500] time 0.882 (0.888) data 0.000 (0.002) loss 1.5566 (1.1227) acc 71.8750 (71.3929) lr 1.4258e-03 eta 3:44:13 +epoch [20/50] batch [355/500] time 0.905 (0.888) data 0.000 (0.002) loss 0.8701 (1.1230) acc 65.6250 (71.3556) lr 1.4258e-03 eta 3:44:06 +epoch [20/50] batch [360/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.3887 (1.1236) acc 75.0000 (71.4062) lr 1.4258e-03 eta 3:43:59 +epoch [20/50] batch [365/500] time 0.901 (0.888) data 0.001 (0.002) loss 1.1230 (1.1248) acc 65.6250 (71.3442) lr 1.4258e-03 eta 3:43:58 +epoch [20/50] batch [370/500] time 0.877 (0.888) data 0.000 (0.002) loss 1.0430 (1.1272) acc 71.8750 (71.3176) lr 1.4258e-03 eta 3:43:52 +epoch [20/50] batch [375/500] time 0.894 (0.888) data 0.000 (0.002) loss 1.3105 (1.1287) acc 65.6250 (71.3083) lr 1.4258e-03 eta 3:43:51 +epoch [20/50] batch [380/500] time 0.875 (0.888) data 0.000 (0.002) loss 0.9307 (1.1252) acc 75.0000 (71.3651) lr 1.4258e-03 eta 3:43:43 +epoch [20/50] batch [385/500] time 0.863 (0.888) data 0.000 (0.002) loss 1.1445 (1.1228) acc 71.8750 (71.3718) lr 1.4258e-03 eta 3:43:40 +epoch [20/50] batch [390/500] time 0.900 (0.888) data 0.000 (0.002) loss 0.7314 (1.1231) acc 75.0000 (71.3702) lr 1.4258e-03 eta 3:43:37 +epoch [20/50] batch [395/500] time 0.869 (0.888) data 0.000 (0.002) loss 0.7622 (1.1216) acc 71.8750 (71.4161) lr 1.4258e-03 eta 3:43:32 +epoch [20/50] batch [400/500] time 0.861 (0.888) data 0.000 (0.002) loss 1.2695 (1.1204) acc 71.8750 (71.4453) lr 1.4258e-03 eta 3:43:26 +epoch [20/50] batch [405/500] time 0.878 (0.888) data 0.000 (0.002) loss 1.4854 (1.1197) acc 62.5000 (71.4660) lr 1.4258e-03 eta 3:43:21 +epoch [20/50] batch [410/500] time 0.902 (0.888) data 0.000 (0.002) loss 0.9121 (1.1174) acc 75.0000 (71.5244) lr 1.4258e-03 eta 3:43:16 +epoch [20/50] batch [415/500] time 0.877 (0.888) data 0.000 (0.002) loss 1.4844 (1.1161) acc 68.7500 (71.5437) lr 1.4258e-03 eta 3:43:11 +epoch [20/50] batch [420/500] time 0.896 (0.888) data 0.000 (0.002) loss 0.7642 (1.1182) acc 78.1250 (71.4881) lr 1.4258e-03 eta 3:43:07 +epoch [20/50] batch [425/500] time 0.891 (0.888) data 0.000 (0.002) loss 1.0635 (1.1177) acc 71.8750 (71.5147) lr 1.4258e-03 eta 3:43:02 +epoch [20/50] batch [430/500] time 0.894 (0.888) data 0.000 (0.002) loss 1.8252 (1.1213) acc 56.2500 (71.4462) lr 1.4258e-03 eta 3:42:58 +epoch [20/50] batch [435/500] time 0.869 (0.888) data 0.000 (0.002) loss 1.2568 (1.1198) acc 62.5000 (71.4727) lr 1.4258e-03 eta 3:42:51 +epoch [20/50] batch [440/500] time 0.875 (0.888) data 0.000 (0.002) loss 1.6953 (1.1228) acc 56.2500 (71.4134) lr 1.4258e-03 eta 3:42:46 +epoch [20/50] batch [445/500] time 0.879 (0.888) data 0.000 (0.002) loss 1.3809 (1.1211) acc 78.1250 (71.5028) lr 1.4258e-03 eta 3:42:41 +epoch [20/50] batch [450/500] time 0.873 (0.887) data 0.000 (0.002) loss 0.7852 (1.1169) acc 81.2500 (71.6042) lr 1.4258e-03 eta 3:42:36 +epoch [20/50] batch [455/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.6260 (1.1169) acc 53.1250 (71.6277) lr 1.4258e-03 eta 3:42:30 +epoch [20/50] batch [460/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.0684 (1.1170) acc 75.0000 (71.6372) lr 1.4258e-03 eta 3:42:25 +epoch [20/50] batch [465/500] time 0.906 (0.887) data 0.000 (0.002) loss 0.8315 (1.1164) acc 78.1250 (71.6734) lr 1.4258e-03 eta 3:42:19 +epoch [20/50] batch [470/500] time 0.887 (0.887) data 0.000 (0.002) loss 1.2451 (1.1165) acc 71.8750 (71.6888) lr 1.4258e-03 eta 3:42:15 +epoch [20/50] batch [475/500] time 0.871 (0.887) data 0.000 (0.002) loss 1.7715 (1.1223) acc 56.2500 (71.6316) lr 1.4258e-03 eta 3:42:11 +epoch [20/50] batch [480/500] time 0.870 (0.887) data 0.000 (0.002) loss 0.7446 (1.1209) acc 75.0000 (71.6471) lr 1.4258e-03 eta 3:42:04 +epoch [20/50] batch [485/500] time 0.899 (0.887) data 0.001 (0.002) loss 1.0801 (1.1208) acc 78.1250 (71.6817) lr 1.4258e-03 eta 3:41:59 +epoch [20/50] batch [490/500] time 0.880 (0.887) data 0.000 (0.002) loss 1.0762 (1.1201) acc 78.1250 (71.7219) lr 1.4258e-03 eta 3:41:55 +epoch [20/50] batch [495/500] time 0.897 (0.887) data 0.000 (0.002) loss 1.5068 (1.1219) acc 71.8750 (71.7172) lr 1.4258e-03 eta 3:41:51 +epoch [20/50] batch [500/500] time 0.887 (0.887) data 0.000 (0.002) loss 0.8169 (1.1217) acc 84.3750 (71.7250) lr 1.3681e-03 eta 3:41:46 +epoch [21/50] batch [5/500] time 0.874 (1.020) data 0.000 (0.126) loss 1.3262 (1.2138) acc 62.5000 (68.7500) lr 1.3681e-03 eta 4:14:51 +epoch [21/50] batch [10/500] time 0.903 (0.955) data 0.000 (0.063) loss 0.6763 (0.9973) acc 78.1250 (73.1250) lr 1.3681e-03 eta 3:58:38 +epoch [21/50] batch [15/500] time 0.883 (0.934) data 0.000 (0.042) loss 1.0908 (1.1722) acc 65.6250 (70.8333) lr 1.3681e-03 eta 3:53:15 +epoch [21/50] batch [20/500] time 0.872 (0.921) data 0.000 (0.032) loss 0.8887 (1.2039) acc 87.5000 (70.7812) lr 1.3681e-03 eta 3:49:50 +epoch [21/50] batch [25/500] time 0.882 (0.915) data 0.000 (0.025) loss 1.0215 (1.2078) acc 81.2500 (70.2500) lr 1.3681e-03 eta 3:48:19 +epoch [21/50] batch [30/500] time 0.906 (0.912) data 0.000 (0.021) loss 1.2471 (1.2232) acc 71.8750 (70.2083) lr 1.3681e-03 eta 3:47:25 +epoch [21/50] batch [35/500] time 0.881 (0.908) data 0.000 (0.018) loss 0.7603 (1.1669) acc 78.1250 (71.0714) lr 1.3681e-03 eta 3:46:23 +epoch [21/50] batch [40/500] time 0.897 (0.906) data 0.000 (0.016) loss 1.5898 (1.1699) acc 56.2500 (70.7812) lr 1.3681e-03 eta 3:45:48 +epoch [21/50] batch [45/500] time 0.892 (0.904) data 0.000 (0.014) loss 0.6870 (1.1703) acc 81.2500 (70.9722) lr 1.3681e-03 eta 3:45:18 +epoch [21/50] batch [50/500] time 0.894 (0.901) data 0.000 (0.013) loss 1.0898 (1.1600) acc 65.6250 (71.1250) lr 1.3681e-03 eta 3:44:29 +epoch [21/50] batch [55/500] time 0.882 (0.900) data 0.000 (0.012) loss 0.8540 (1.1477) acc 75.0000 (70.9659) lr 1.3681e-03 eta 3:44:07 +epoch [21/50] batch [60/500] time 0.884 (0.898) data 0.000 (0.011) loss 1.1123 (1.1398) acc 84.3750 (71.6667) lr 1.3681e-03 eta 3:43:41 +epoch [21/50] batch [65/500] time 0.951 (0.899) data 0.000 (0.010) loss 0.9590 (1.1422) acc 78.1250 (71.8269) lr 1.3681e-03 eta 3:43:39 +epoch [21/50] batch [70/500] time 0.876 (0.898) data 0.000 (0.009) loss 0.8667 (1.1277) acc 71.8750 (72.2768) lr 1.3681e-03 eta 3:43:23 +epoch [21/50] batch [75/500] time 0.845 (0.897) data 0.000 (0.009) loss 1.4980 (1.1466) acc 68.7500 (71.7500) lr 1.3681e-03 eta 3:43:02 +epoch [21/50] batch [80/500] time 0.873 (0.895) data 0.000 (0.008) loss 1.0908 (1.1419) acc 68.7500 (71.4453) lr 1.3681e-03 eta 3:42:39 +epoch [21/50] batch [85/500] time 0.866 (0.895) data 0.000 (0.008) loss 1.2295 (1.1394) acc 68.7500 (71.2132) lr 1.3681e-03 eta 3:42:28 +epoch [21/50] batch [90/500] time 0.898 (0.895) data 0.000 (0.007) loss 1.0674 (1.1444) acc 75.0000 (71.2847) lr 1.3681e-03 eta 3:42:17 +epoch [21/50] batch [95/500] time 0.894 (0.894) data 0.000 (0.007) loss 1.6846 (1.1506) acc 65.6250 (71.2171) lr 1.3681e-03 eta 3:42:09 +epoch [21/50] batch [100/500] time 0.871 (0.894) data 0.000 (0.006) loss 1.4062 (1.1520) acc 65.6250 (71.2812) lr 1.3681e-03 eta 3:42:02 +epoch [21/50] batch [105/500] time 0.890 (0.894) data 0.000 (0.006) loss 1.0088 (1.1503) acc 81.2500 (71.2798) lr 1.3681e-03 eta 3:41:59 +epoch [21/50] batch [110/500] time 0.896 (0.895) data 0.000 (0.006) loss 1.3076 (1.1496) acc 71.8750 (71.4205) lr 1.3681e-03 eta 3:41:59 +epoch [21/50] batch [115/500] time 0.887 (0.894) data 0.000 (0.006) loss 1.1602 (1.1496) acc 68.7500 (71.3043) lr 1.3681e-03 eta 3:41:46 +epoch [21/50] batch [120/500] time 0.865 (0.894) data 0.000 (0.005) loss 0.7671 (1.1465) acc 78.1250 (71.3802) lr 1.3681e-03 eta 3:41:37 +epoch [21/50] batch [125/500] time 0.914 (0.894) data 0.000 (0.005) loss 1.0811 (1.1486) acc 78.1250 (71.2500) lr 1.3681e-03 eta 3:41:36 +epoch [21/50] batch [130/500] time 0.912 (0.894) data 0.000 (0.005) loss 0.9297 (1.1436) acc 75.0000 (71.2740) lr 1.3681e-03 eta 3:41:35 +epoch [21/50] batch [135/500] time 0.921 (0.895) data 0.000 (0.005) loss 0.9272 (1.1438) acc 71.8750 (71.2269) lr 1.3681e-03 eta 3:41:43 +epoch [21/50] batch [140/500] time 0.888 (0.895) data 0.000 (0.005) loss 1.9580 (1.1512) acc 56.2500 (71.1161) lr 1.3681e-03 eta 3:41:38 +epoch [21/50] batch [145/500] time 0.892 (0.894) data 0.000 (0.005) loss 0.6777 (1.1552) acc 81.2500 (71.0129) lr 1.3681e-03 eta 3:41:27 +epoch [21/50] batch [150/500] time 0.908 (0.894) data 0.000 (0.004) loss 0.6426 (1.1543) acc 78.1250 (71.0833) lr 1.3681e-03 eta 3:41:20 +epoch [21/50] batch [155/500] time 0.871 (0.894) data 0.000 (0.004) loss 1.2949 (1.1524) acc 75.0000 (71.0685) lr 1.3681e-03 eta 3:41:12 +epoch [21/50] batch [160/500] time 0.878 (0.894) data 0.000 (0.004) loss 0.5654 (1.1523) acc 84.3750 (71.0156) lr 1.3681e-03 eta 3:41:03 +epoch [21/50] batch [165/500] time 0.860 (0.894) data 0.000 (0.004) loss 0.6797 (1.1522) acc 87.5000 (71.1174) lr 1.3681e-03 eta 3:41:09 +epoch [21/50] batch [170/500] time 0.890 (0.895) data 0.000 (0.004) loss 1.6641 (1.1492) acc 62.5000 (71.2132) lr 1.3681e-03 eta 3:41:07 +epoch [21/50] batch [175/500] time 0.900 (0.895) data 0.000 (0.004) loss 1.0098 (1.1496) acc 71.8750 (71.1607) lr 1.3681e-03 eta 3:41:03 +epoch [21/50] batch [180/500] time 0.859 (0.894) data 0.000 (0.004) loss 0.9888 (1.1444) acc 81.2500 (71.3542) lr 1.3681e-03 eta 3:40:50 +epoch [21/50] batch [185/500] time 0.900 (0.894) data 0.000 (0.004) loss 1.1533 (1.1439) acc 68.7500 (71.3007) lr 1.3681e-03 eta 3:40:44 +epoch [21/50] batch [190/500] time 0.922 (0.894) data 0.000 (0.004) loss 0.7710 (1.1443) acc 75.0000 (71.2336) lr 1.3681e-03 eta 3:40:43 +epoch [21/50] batch [195/500] time 0.893 (0.894) data 0.000 (0.003) loss 1.1631 (1.1483) acc 68.7500 (71.0897) lr 1.3681e-03 eta 3:40:38 +epoch [21/50] batch [200/500] time 0.884 (0.894) data 0.000 (0.003) loss 1.4072 (1.1530) acc 68.7500 (71.0469) lr 1.3681e-03 eta 3:40:31 +epoch [21/50] batch [205/500] time 0.889 (0.894) data 0.000 (0.003) loss 1.4512 (1.1579) acc 62.5000 (70.9451) lr 1.3681e-03 eta 3:40:28 +epoch [21/50] batch [210/500] time 0.897 (0.894) data 0.000 (0.003) loss 0.5762 (1.1545) acc 81.2500 (71.0268) lr 1.3681e-03 eta 3:40:29 +epoch [21/50] batch [215/500] time 0.886 (0.894) data 0.000 (0.003) loss 0.9326 (1.1532) acc 68.7500 (70.9884) lr 1.3681e-03 eta 3:40:21 +epoch [21/50] batch [220/500] time 0.885 (0.894) data 0.000 (0.003) loss 1.2334 (1.1513) acc 75.0000 (70.9801) lr 1.3681e-03 eta 3:40:12 +epoch [21/50] batch [225/500] time 0.869 (0.894) data 0.000 (0.003) loss 1.1533 (1.1515) acc 65.6250 (70.9167) lr 1.3681e-03 eta 3:40:01 +epoch [21/50] batch [230/500] time 0.901 (0.893) data 0.000 (0.003) loss 1.3721 (1.1519) acc 68.7500 (70.9647) lr 1.3681e-03 eta 3:39:55 +epoch [21/50] batch [235/500] time 0.872 (0.893) data 0.000 (0.003) loss 0.5581 (1.1474) acc 87.5000 (71.1303) lr 1.3681e-03 eta 3:39:49 +epoch [21/50] batch [240/500] time 0.889 (0.893) data 0.000 (0.003) loss 0.7935 (1.1479) acc 68.7500 (71.0807) lr 1.3681e-03 eta 3:39:45 +epoch [21/50] batch [245/500] time 0.863 (0.893) data 0.000 (0.003) loss 1.2803 (1.1506) acc 62.5000 (70.9694) lr 1.3681e-03 eta 3:39:37 +epoch [21/50] batch [250/500] time 0.863 (0.893) data 0.000 (0.003) loss 0.6963 (1.1477) acc 75.0000 (71.0500) lr 1.3681e-03 eta 3:39:24 +epoch [21/50] batch [255/500] time 0.869 (0.892) data 0.000 (0.003) loss 1.0957 (1.1441) acc 78.1250 (71.1152) lr 1.3681e-03 eta 3:39:15 +epoch [21/50] batch [260/500] time 0.878 (0.892) data 0.000 (0.003) loss 0.7720 (1.1415) acc 75.0000 (71.1298) lr 1.3681e-03 eta 3:39:08 +epoch [21/50] batch [265/500] time 0.879 (0.892) data 0.000 (0.003) loss 0.9282 (1.1404) acc 81.2500 (71.1557) lr 1.3681e-03 eta 3:39:01 +epoch [21/50] batch [270/500] time 0.891 (0.892) data 0.000 (0.003) loss 0.9814 (1.1418) acc 87.5000 (71.1921) lr 1.3681e-03 eta 3:38:54 +epoch [21/50] batch [275/500] time 0.877 (0.891) data 0.000 (0.003) loss 1.2490 (1.1429) acc 71.8750 (71.1705) lr 1.3681e-03 eta 3:38:44 +epoch [21/50] batch [280/500] time 0.887 (0.891) data 0.000 (0.002) loss 1.2842 (1.1380) acc 71.8750 (71.2054) lr 1.3681e-03 eta 3:38:38 +epoch [21/50] batch [285/500] time 0.883 (0.891) data 0.000 (0.002) loss 0.9849 (1.1394) acc 78.1250 (71.2390) lr 1.3681e-03 eta 3:38:33 +epoch [21/50] batch [290/500] time 0.880 (0.891) data 0.000 (0.002) loss 1.1416 (1.1371) acc 65.6250 (71.2931) lr 1.3681e-03 eta 3:38:26 +epoch [21/50] batch [295/500] time 0.885 (0.891) data 0.000 (0.002) loss 1.1709 (1.1359) acc 68.7500 (71.3453) lr 1.3681e-03 eta 3:38:17 +epoch [21/50] batch [300/500] time 0.896 (0.891) data 0.001 (0.002) loss 1.1182 (1.1363) acc 75.0000 (71.3438) lr 1.3681e-03 eta 3:38:12 +epoch [21/50] batch [305/500] time 0.912 (0.891) data 0.000 (0.002) loss 1.4121 (1.1376) acc 68.7500 (71.3115) lr 1.3681e-03 eta 3:38:09 +epoch [21/50] batch [310/500] time 0.860 (0.891) data 0.000 (0.002) loss 1.0957 (1.1397) acc 78.1250 (71.2601) lr 1.3681e-03 eta 3:38:08 +epoch [21/50] batch [315/500] time 0.885 (0.891) data 0.000 (0.002) loss 0.8896 (1.1403) acc 75.0000 (71.2996) lr 1.3681e-03 eta 3:38:01 +epoch [21/50] batch [320/500] time 0.907 (0.891) data 0.000 (0.002) loss 1.1318 (1.1392) acc 75.0000 (71.3281) lr 1.3681e-03 eta 3:37:55 +epoch [21/50] batch [325/500] time 0.916 (0.891) data 0.000 (0.002) loss 1.5244 (1.1411) acc 56.2500 (71.2308) lr 1.3681e-03 eta 3:37:50 +epoch [21/50] batch [330/500] time 0.899 (0.891) data 0.000 (0.002) loss 0.7241 (1.1380) acc 78.1250 (71.2500) lr 1.3681e-03 eta 3:37:45 +epoch [21/50] batch [335/500] time 0.876 (0.891) data 0.000 (0.002) loss 0.7109 (1.1368) acc 81.2500 (71.2966) lr 1.3681e-03 eta 3:37:40 +epoch [21/50] batch [340/500] time 0.882 (0.891) data 0.000 (0.002) loss 1.0127 (1.1352) acc 78.1250 (71.3327) lr 1.3681e-03 eta 3:37:35 +epoch [21/50] batch [345/500] time 0.870 (0.891) data 0.000 (0.002) loss 0.8643 (1.1340) acc 71.8750 (71.3768) lr 1.3681e-03 eta 3:37:31 +epoch [21/50] batch [350/500] time 0.881 (0.890) data 0.000 (0.002) loss 0.8413 (1.1320) acc 71.8750 (71.3929) lr 1.3681e-03 eta 3:37:25 +epoch [21/50] batch [355/500] time 0.907 (0.891) data 0.000 (0.002) loss 1.2686 (1.1322) acc 65.6250 (71.4349) lr 1.3681e-03 eta 3:37:25 +epoch [21/50] batch [360/500] time 0.877 (0.891) data 0.000 (0.002) loss 1.2129 (1.1308) acc 68.7500 (71.4323) lr 1.3681e-03 eta 3:37:19 +epoch [21/50] batch [365/500] time 0.870 (0.891) data 0.000 (0.002) loss 1.5049 (1.1300) acc 62.5000 (71.4640) lr 1.3681e-03 eta 3:37:15 +epoch [21/50] batch [370/500] time 0.904 (0.891) data 0.000 (0.002) loss 0.8511 (1.1274) acc 81.2500 (71.5794) lr 1.3681e-03 eta 3:37:11 +epoch [21/50] batch [375/500] time 0.900 (0.891) data 0.000 (0.002) loss 1.2324 (1.1267) acc 75.0000 (71.6417) lr 1.3681e-03 eta 3:37:05 +epoch [21/50] batch [380/500] time 0.848 (0.890) data 0.000 (0.002) loss 1.1748 (1.1241) acc 75.0000 (71.7434) lr 1.3681e-03 eta 3:36:56 +epoch [21/50] batch [385/500] time 0.923 (0.890) data 0.000 (0.002) loss 1.1406 (1.1254) acc 65.6250 (71.6802) lr 1.3681e-03 eta 3:36:53 +epoch [21/50] batch [390/500] time 0.884 (0.890) data 0.000 (0.002) loss 1.2217 (1.1234) acc 65.6250 (71.6506) lr 1.3681e-03 eta 3:36:46 +epoch [21/50] batch [395/500] time 0.922 (0.890) data 0.000 (0.002) loss 0.8169 (1.1220) acc 81.2500 (71.7089) lr 1.3681e-03 eta 3:36:41 +epoch [21/50] batch [400/500] time 0.911 (0.890) data 0.000 (0.002) loss 1.3887 (1.1240) acc 71.8750 (71.6953) lr 1.3681e-03 eta 3:36:39 +epoch [21/50] batch [405/500] time 0.935 (0.890) data 0.000 (0.002) loss 1.5430 (1.1259) acc 68.7500 (71.6590) lr 1.3681e-03 eta 3:36:36 +epoch [21/50] batch [410/500] time 0.861 (0.890) data 0.000 (0.002) loss 0.9468 (1.1255) acc 75.0000 (71.6616) lr 1.3681e-03 eta 3:36:27 +epoch [21/50] batch [415/500] time 0.892 (0.890) data 0.000 (0.002) loss 1.1406 (1.1252) acc 68.7500 (71.6340) lr 1.3681e-03 eta 3:36:24 +epoch [21/50] batch [420/500] time 0.901 (0.890) data 0.000 (0.002) loss 1.3262 (1.1240) acc 62.5000 (71.6071) lr 1.3681e-03 eta 3:36:18 +epoch [21/50] batch [425/500] time 0.883 (0.890) data 0.000 (0.002) loss 0.7593 (1.1243) acc 81.2500 (71.6250) lr 1.3681e-03 eta 3:36:13 +epoch [21/50] batch [430/500] time 0.898 (0.890) data 0.000 (0.002) loss 1.6182 (1.1228) acc 65.6250 (71.6424) lr 1.3681e-03 eta 3:36:08 +epoch [21/50] batch [435/500] time 0.902 (0.890) data 0.000 (0.002) loss 1.4980 (1.1232) acc 78.1250 (71.6236) lr 1.3681e-03 eta 3:36:03 +epoch [21/50] batch [440/500] time 0.890 (0.890) data 0.000 (0.002) loss 1.1270 (1.1232) acc 75.0000 (71.6264) lr 1.3681e-03 eta 3:35:59 +epoch [21/50] batch [445/500] time 0.888 (0.890) data 0.000 (0.002) loss 1.1895 (1.1235) acc 68.7500 (71.6152) lr 1.3681e-03 eta 3:35:53 +epoch [21/50] batch [450/500] time 0.999 (0.890) data 0.000 (0.002) loss 0.7646 (1.1233) acc 75.0000 (71.6319) lr 1.3681e-03 eta 3:35:52 +epoch [21/50] batch [455/500] time 0.871 (0.890) data 0.000 (0.002) loss 0.8994 (1.1235) acc 78.1250 (71.6552) lr 1.3681e-03 eta 3:35:46 +epoch [21/50] batch [460/500] time 0.888 (0.890) data 0.000 (0.002) loss 1.2764 (1.1258) acc 68.7500 (71.6101) lr 1.3681e-03 eta 3:35:40 +epoch [21/50] batch [465/500] time 0.873 (0.890) data 0.000 (0.002) loss 0.9321 (1.1267) acc 68.7500 (71.5524) lr 1.3681e-03 eta 3:35:32 +epoch [21/50] batch [470/500] time 0.866 (0.890) data 0.000 (0.002) loss 0.7124 (1.1286) acc 81.2500 (71.5293) lr 1.3681e-03 eta 3:35:27 +epoch [21/50] batch [475/500] time 0.853 (0.890) data 0.000 (0.002) loss 1.3223 (1.1282) acc 75.0000 (71.5789) lr 1.3681e-03 eta 3:35:20 +epoch [21/50] batch [480/500] time 0.864 (0.890) data 0.000 (0.002) loss 0.8823 (1.1257) acc 68.7500 (71.6016) lr 1.3681e-03 eta 3:35:16 +epoch [21/50] batch [485/500] time 0.864 (0.889) data 0.001 (0.002) loss 0.5864 (1.1247) acc 78.1250 (71.6237) lr 1.3681e-03 eta 3:35:09 +epoch [21/50] batch [490/500] time 0.865 (0.889) data 0.000 (0.002) loss 0.9683 (1.1264) acc 75.0000 (71.6008) lr 1.3681e-03 eta 3:35:03 +epoch [21/50] batch [495/500] time 0.889 (0.889) data 0.000 (0.001) loss 0.6768 (1.1259) acc 81.2500 (71.5909) lr 1.3681e-03 eta 3:35:01 +epoch [21/50] batch [500/500] time 0.900 (0.889) data 0.000 (0.001) loss 0.8125 (1.1251) acc 87.5000 (71.6500) lr 1.3090e-03 eta 3:34:55 +epoch [22/50] batch [5/500] time 0.896 (1.013) data 0.000 (0.125) loss 1.2119 (1.0371) acc 65.6250 (72.5000) lr 1.3090e-03 eta 4:04:37 +epoch [22/50] batch [10/500] time 0.871 (0.956) data 0.000 (0.063) loss 0.9878 (1.0353) acc 62.5000 (71.2500) lr 1.3090e-03 eta 3:50:50 +epoch [22/50] batch [15/500] time 0.869 (0.929) data 0.000 (0.042) loss 0.9893 (1.0647) acc 71.8750 (72.7083) lr 1.3090e-03 eta 3:44:13 +epoch [22/50] batch [20/500] time 0.865 (0.915) data 0.000 (0.031) loss 1.4561 (1.1017) acc 65.6250 (72.9688) lr 1.3090e-03 eta 3:40:44 +epoch [22/50] batch [25/500] time 0.858 (0.907) data 0.000 (0.025) loss 1.4902 (1.1273) acc 68.7500 (72.6250) lr 1.3090e-03 eta 3:38:43 +epoch [22/50] batch [30/500] time 0.874 (0.901) data 0.000 (0.021) loss 0.9990 (1.1220) acc 71.8750 (72.0833) lr 1.3090e-03 eta 3:37:19 +epoch [22/50] batch [35/500] time 0.857 (0.897) data 0.000 (0.018) loss 1.2812 (1.1353) acc 75.0000 (72.2321) lr 1.3090e-03 eta 3:36:16 +epoch [22/50] batch [40/500] time 0.897 (0.900) data 0.000 (0.016) loss 0.7368 (1.1255) acc 68.7500 (72.2656) lr 1.3090e-03 eta 3:36:51 +epoch [22/50] batch [45/500] time 0.874 (0.899) data 0.000 (0.014) loss 0.7451 (1.1061) acc 81.2500 (72.0833) lr 1.3090e-03 eta 3:36:32 +epoch [22/50] batch [50/500] time 0.903 (0.898) data 0.000 (0.013) loss 0.8555 (1.1093) acc 78.1250 (72.1250) lr 1.3090e-03 eta 3:36:14 +epoch [22/50] batch [55/500] time 0.883 (0.898) data 0.000 (0.012) loss 1.2158 (1.1213) acc 68.7500 (71.7614) lr 1.3090e-03 eta 3:36:11 +epoch [22/50] batch [60/500] time 0.911 (0.899) data 0.000 (0.011) loss 0.7129 (1.1077) acc 81.2500 (72.2917) lr 1.3090e-03 eta 3:36:15 +epoch [22/50] batch [65/500] time 0.903 (0.898) data 0.000 (0.010) loss 1.0811 (1.1134) acc 68.7500 (71.8269) lr 1.3090e-03 eta 3:35:58 +epoch [22/50] batch [70/500] time 0.853 (0.897) data 0.000 (0.009) loss 1.3896 (1.1182) acc 68.7500 (71.6071) lr 1.3090e-03 eta 3:35:41 +epoch [22/50] batch [75/500] time 0.892 (0.896) data 0.000 (0.009) loss 0.8403 (1.1131) acc 78.1250 (71.9583) lr 1.3090e-03 eta 3:35:24 +epoch [22/50] batch [80/500] time 0.885 (0.894) data 0.000 (0.008) loss 0.7822 (1.1255) acc 71.8750 (71.5625) lr 1.3090e-03 eta 3:34:56 +epoch [22/50] batch [85/500] time 0.868 (0.894) data 0.000 (0.008) loss 0.9316 (1.1171) acc 78.1250 (71.8382) lr 1.3090e-03 eta 3:34:42 +epoch [22/50] batch [90/500] time 0.875 (0.894) data 0.000 (0.007) loss 1.4678 (1.1147) acc 65.6250 (71.9792) lr 1.3090e-03 eta 3:34:35 +epoch [22/50] batch [95/500] time 0.903 (0.893) data 0.000 (0.007) loss 1.2832 (1.1105) acc 78.1250 (72.3026) lr 1.3090e-03 eta 3:34:20 +epoch [22/50] batch [100/500] time 0.897 (0.893) data 0.000 (0.006) loss 0.8892 (1.1049) acc 59.3750 (72.1562) lr 1.3090e-03 eta 3:34:14 +epoch [22/50] batch [105/500] time 0.897 (0.893) data 0.000 (0.006) loss 1.1416 (1.1167) acc 68.7500 (72.0536) lr 1.3090e-03 eta 3:34:14 +epoch [22/50] batch [110/500] time 0.874 (0.893) data 0.000 (0.006) loss 1.0879 (1.1056) acc 65.6250 (72.2443) lr 1.3090e-03 eta 3:34:03 +epoch [22/50] batch [115/500] time 0.886 (0.892) data 0.000 (0.006) loss 1.2949 (1.1024) acc 78.1250 (72.3098) lr 1.3090e-03 eta 3:33:55 +epoch [22/50] batch [120/500] time 0.939 (0.892) data 0.000 (0.005) loss 1.2139 (1.1054) acc 75.0000 (72.3438) lr 1.3090e-03 eta 3:33:51 +epoch [22/50] batch [125/500] time 0.885 (0.892) data 0.000 (0.005) loss 1.4229 (1.1076) acc 59.3750 (72.3000) lr 1.3090e-03 eta 3:33:39 +epoch [22/50] batch [130/500] time 0.880 (0.891) data 0.000 (0.005) loss 0.7227 (1.1051) acc 87.5000 (72.4519) lr 1.3090e-03 eta 3:33:24 +epoch [22/50] batch [135/500] time 0.895 (0.890) data 0.000 (0.005) loss 0.8477 (1.0994) acc 78.1250 (72.5926) lr 1.3090e-03 eta 3:33:10 +epoch [22/50] batch [140/500] time 0.905 (0.891) data 0.001 (0.005) loss 1.4971 (1.0941) acc 65.6250 (72.7009) lr 1.3090e-03 eta 3:33:16 +epoch [22/50] batch [145/500] time 0.885 (0.891) data 0.000 (0.005) loss 1.5332 (1.0921) acc 65.6250 (72.7155) lr 1.3090e-03 eta 3:33:10 +epoch [22/50] batch [150/500] time 0.897 (0.891) data 0.000 (0.004) loss 1.3496 (1.0959) acc 68.7500 (72.6042) lr 1.3090e-03 eta 3:33:01 +epoch [22/50] batch [155/500] time 0.900 (0.891) data 0.000 (0.004) loss 0.9736 (1.0865) acc 71.8750 (72.7621) lr 1.3090e-03 eta 3:32:57 +epoch [22/50] batch [160/500] time 0.920 (0.891) data 0.000 (0.004) loss 0.9604 (1.0844) acc 78.1250 (72.7148) lr 1.3090e-03 eta 3:32:51 +epoch [22/50] batch [165/500] time 0.880 (0.891) data 0.000 (0.004) loss 1.1035 (1.0859) acc 78.1250 (72.7841) lr 1.3090e-03 eta 3:32:46 +epoch [22/50] batch [170/500] time 0.859 (0.890) data 0.000 (0.004) loss 0.8066 (1.0785) acc 87.5000 (72.9963) lr 1.3090e-03 eta 3:32:36 +epoch [22/50] batch [175/500] time 0.887 (0.890) data 0.000 (0.004) loss 0.9541 (1.0837) acc 68.7500 (72.8929) lr 1.3090e-03 eta 3:32:30 +epoch [22/50] batch [180/500] time 0.863 (0.889) data 0.000 (0.004) loss 0.7871 (1.0819) acc 78.1250 (72.9167) lr 1.3090e-03 eta 3:32:14 +epoch [22/50] batch [185/500] time 0.891 (0.890) data 0.000 (0.004) loss 1.4443 (1.0906) acc 59.3750 (72.7534) lr 1.3090e-03 eta 3:32:13 +epoch [22/50] batch [190/500] time 0.870 (0.889) data 0.000 (0.004) loss 1.1787 (1.0898) acc 68.7500 (72.6974) lr 1.3090e-03 eta 3:32:05 +epoch [22/50] batch [195/500] time 0.871 (0.889) data 0.000 (0.003) loss 1.6855 (1.0919) acc 62.5000 (72.6442) lr 1.3090e-03 eta 3:31:58 +epoch [22/50] batch [200/500] time 0.877 (0.889) data 0.000 (0.003) loss 0.6748 (1.0909) acc 75.0000 (72.6406) lr 1.3090e-03 eta 3:31:52 +epoch [22/50] batch [205/500] time 0.903 (0.889) data 0.000 (0.003) loss 1.5400 (1.0937) acc 56.2500 (72.5610) lr 1.3090e-03 eta 3:31:47 +epoch [22/50] batch [210/500] time 0.923 (0.889) data 0.000 (0.003) loss 1.1475 (1.0916) acc 68.7500 (72.5893) lr 1.3090e-03 eta 3:31:44 +epoch [22/50] batch [215/500] time 0.901 (0.889) data 0.000 (0.003) loss 1.4570 (1.0925) acc 59.3750 (72.5000) lr 1.3090e-03 eta 3:31:37 +epoch [22/50] batch [220/500] time 0.887 (0.889) data 0.000 (0.003) loss 0.7769 (1.0918) acc 78.1250 (72.5000) lr 1.3090e-03 eta 3:31:29 +epoch [22/50] batch [225/500] time 0.880 (0.889) data 0.000 (0.003) loss 1.3809 (1.0881) acc 68.7500 (72.5972) lr 1.3090e-03 eta 3:31:28 +epoch [22/50] batch [230/500] time 0.888 (0.889) data 0.000 (0.003) loss 1.0898 (1.0908) acc 65.6250 (72.5000) lr 1.3090e-03 eta 3:31:23 +epoch [22/50] batch [235/500] time 0.902 (0.889) data 0.000 (0.003) loss 1.1279 (1.0915) acc 62.5000 (72.4202) lr 1.3090e-03 eta 3:31:20 +epoch [22/50] batch [240/500] time 0.901 (0.889) data 0.000 (0.003) loss 0.9053 (1.0884) acc 78.1250 (72.5260) lr 1.3090e-03 eta 3:31:15 +epoch [22/50] batch [245/500] time 0.866 (0.888) data 0.000 (0.003) loss 1.0137 (1.0853) acc 78.1250 (72.5765) lr 1.3090e-03 eta 3:31:04 +epoch [22/50] batch [250/500] time 0.876 (0.888) data 0.000 (0.003) loss 1.1338 (1.0914) acc 71.8750 (72.4000) lr 1.3090e-03 eta 3:30:58 +epoch [22/50] batch [255/500] time 0.884 (0.888) data 0.000 (0.003) loss 1.5225 (1.0944) acc 65.6250 (72.4510) lr 1.3090e-03 eta 3:30:50 +epoch [22/50] batch [260/500] time 0.873 (0.888) data 0.000 (0.003) loss 1.6055 (1.1017) acc 53.1250 (72.1875) lr 1.3090e-03 eta 3:30:44 +epoch [22/50] batch [265/500] time 0.868 (0.888) data 0.000 (0.003) loss 1.2275 (1.1014) acc 71.8750 (72.1698) lr 1.3090e-03 eta 3:30:35 +epoch [22/50] batch [270/500] time 0.873 (0.887) data 0.000 (0.003) loss 0.7573 (1.0987) acc 78.1250 (72.1759) lr 1.3090e-03 eta 3:30:26 +epoch [22/50] batch [275/500] time 0.892 (0.887) data 0.000 (0.003) loss 0.9976 (1.1024) acc 71.8750 (72.0795) lr 1.3090e-03 eta 3:30:21 +epoch [22/50] batch [280/500] time 1.003 (0.888) data 0.000 (0.002) loss 1.3252 (1.1014) acc 65.6250 (72.1317) lr 1.3090e-03 eta 3:30:23 +epoch [22/50] batch [285/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.7354 (1.1028) acc 68.7500 (72.1272) lr 1.3090e-03 eta 3:30:19 +epoch [22/50] batch [290/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.5332 (1.1055) acc 62.5000 (72.0582) lr 1.3090e-03 eta 3:30:14 +epoch [22/50] batch [295/500] time 0.858 (0.888) data 0.000 (0.002) loss 0.8896 (1.1055) acc 78.1250 (72.0445) lr 1.3090e-03 eta 3:30:07 +epoch [22/50] batch [300/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.0977 (1.1045) acc 71.8750 (72.0729) lr 1.3090e-03 eta 3:30:01 +epoch [22/50] batch [305/500] time 0.899 (0.887) data 0.000 (0.002) loss 1.4238 (1.1081) acc 75.0000 (72.0594) lr 1.3090e-03 eta 3:29:57 +epoch [22/50] batch [310/500] time 0.881 (0.887) data 0.000 (0.002) loss 1.0146 (1.1063) acc 75.0000 (72.0968) lr 1.3090e-03 eta 3:29:52 +epoch [22/50] batch [315/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.1094 (1.1080) acc 84.3750 (72.0635) lr 1.3090e-03 eta 3:29:44 +epoch [22/50] batch [320/500] time 0.855 (0.887) data 0.000 (0.002) loss 1.1094 (1.1104) acc 71.8750 (72.0117) lr 1.3090e-03 eta 3:29:38 +epoch [22/50] batch [325/500] time 0.863 (0.887) data 0.000 (0.002) loss 1.3516 (1.1129) acc 59.3750 (71.9904) lr 1.3090e-03 eta 3:29:36 +epoch [22/50] batch [330/500] time 0.899 (0.887) data 0.000 (0.002) loss 0.8008 (1.1116) acc 84.3750 (72.0455) lr 1.3090e-03 eta 3:29:33 +epoch [22/50] batch [335/500] time 0.902 (0.887) data 0.000 (0.002) loss 1.3223 (1.1101) acc 81.2500 (72.1175) lr 1.3090e-03 eta 3:29:28 +epoch [22/50] batch [340/500] time 0.847 (0.887) data 0.000 (0.002) loss 1.1982 (1.1133) acc 78.1250 (72.0588) lr 1.3090e-03 eta 3:29:22 +epoch [22/50] batch [345/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.2285 (1.1134) acc 62.5000 (72.0018) lr 1.3090e-03 eta 3:29:18 +epoch [22/50] batch [350/500] time 0.905 (0.887) data 0.000 (0.002) loss 0.8125 (1.1124) acc 81.2500 (72.0625) lr 1.3090e-03 eta 3:29:17 +epoch [22/50] batch [355/500] time 0.875 (0.887) data 0.000 (0.002) loss 1.7168 (1.1129) acc 56.2500 (72.0246) lr 1.3090e-03 eta 3:29:10 +epoch [22/50] batch [360/500] time 0.865 (0.887) data 0.000 (0.002) loss 1.1230 (1.1122) acc 65.6250 (72.0312) lr 1.3090e-03 eta 3:29:07 +epoch [22/50] batch [365/500] time 0.864 (0.887) data 0.000 (0.002) loss 1.1514 (1.1116) acc 68.7500 (72.0377) lr 1.3090e-03 eta 3:29:01 +epoch [22/50] batch [370/500] time 0.853 (0.887) data 0.000 (0.002) loss 1.0957 (1.1077) acc 75.0000 (72.1030) lr 1.3090e-03 eta 3:28:52 +epoch [22/50] batch [375/500] time 0.875 (0.887) data 0.000 (0.002) loss 0.9985 (1.1071) acc 68.7500 (72.1000) lr 1.3090e-03 eta 3:28:46 +epoch [22/50] batch [380/500] time 0.884 (0.887) data 0.000 (0.002) loss 0.8403 (1.1110) acc 71.8750 (71.9819) lr 1.3090e-03 eta 3:28:42 +epoch [22/50] batch [385/500] time 0.873 (0.887) data 0.000 (0.002) loss 1.7031 (1.1116) acc 59.3750 (71.9968) lr 1.3090e-03 eta 3:28:36 +epoch [22/50] batch [390/500] time 0.860 (0.887) data 0.000 (0.002) loss 0.8525 (1.1094) acc 78.1250 (72.0353) lr 1.3090e-03 eta 3:28:32 +epoch [22/50] batch [395/500] time 0.908 (0.887) data 0.000 (0.002) loss 1.1592 (1.1119) acc 59.3750 (71.9462) lr 1.3090e-03 eta 3:28:27 +epoch [22/50] batch [400/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.5312 (1.1135) acc 62.5000 (71.9062) lr 1.3090e-03 eta 3:28:24 +epoch [22/50] batch [405/500] time 0.861 (0.887) data 0.000 (0.002) loss 1.2080 (1.1127) acc 65.6250 (71.9059) lr 1.3090e-03 eta 3:28:19 +epoch [22/50] batch [410/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.4551 (1.1132) acc 59.3750 (71.8598) lr 1.3090e-03 eta 3:28:15 +epoch [22/50] batch [415/500] time 0.883 (0.887) data 0.000 (0.002) loss 0.7603 (1.1136) acc 78.1250 (71.8148) lr 1.3090e-03 eta 3:28:10 +epoch [22/50] batch [420/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.2676 (1.1126) acc 75.0000 (71.8527) lr 1.3090e-03 eta 3:28:02 +epoch [22/50] batch [425/500] time 0.916 (0.887) data 0.000 (0.002) loss 0.8472 (1.1092) acc 78.1250 (71.9338) lr 1.3090e-03 eta 3:28:01 +epoch [22/50] batch [430/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.0820 (1.1077) acc 65.6250 (71.9622) lr 1.3090e-03 eta 3:27:56 +epoch [22/50] batch [435/500] time 0.853 (0.886) data 0.000 (0.002) loss 1.3379 (1.1102) acc 65.6250 (71.9181) lr 1.3090e-03 eta 3:27:48 +epoch [22/50] batch [440/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.0420 (1.1099) acc 75.0000 (71.8963) lr 1.3090e-03 eta 3:27:44 +epoch [22/50] batch [445/500] time 0.892 (0.887) data 0.000 (0.002) loss 1.9189 (1.1102) acc 65.6250 (71.8961) lr 1.3090e-03 eta 3:27:39 +epoch [22/50] batch [450/500] time 0.900 (0.886) data 0.000 (0.002) loss 1.0225 (1.1091) acc 78.1250 (71.9167) lr 1.3090e-03 eta 3:27:34 +epoch [22/50] batch [455/500] time 0.886 (0.886) data 0.000 (0.002) loss 0.9116 (1.1092) acc 78.1250 (71.9643) lr 1.3090e-03 eta 3:27:28 +epoch [22/50] batch [460/500] time 0.853 (0.886) data 0.000 (0.002) loss 1.2646 (1.1103) acc 62.5000 (71.9293) lr 1.3090e-03 eta 3:27:23 +epoch [22/50] batch [465/500] time 0.887 (0.886) data 0.000 (0.002) loss 1.4170 (1.1099) acc 65.6250 (71.9489) lr 1.3090e-03 eta 3:27:20 +epoch [22/50] batch [470/500] time 0.857 (0.886) data 0.000 (0.002) loss 1.8721 (1.1129) acc 59.3750 (71.9082) lr 1.3090e-03 eta 3:27:17 +epoch [22/50] batch [475/500] time 0.881 (0.886) data 0.000 (0.002) loss 0.6406 (1.1129) acc 84.3750 (71.8882) lr 1.3090e-03 eta 3:27:12 +epoch [22/50] batch [480/500] time 0.872 (0.886) data 0.000 (0.002) loss 1.9268 (1.1147) acc 53.1250 (71.8229) lr 1.3090e-03 eta 3:27:08 +epoch [22/50] batch [485/500] time 0.912 (0.887) data 0.001 (0.002) loss 1.8535 (1.1178) acc 62.5000 (71.7590) lr 1.3090e-03 eta 3:27:05 +epoch [22/50] batch [490/500] time 0.874 (0.887) data 0.000 (0.002) loss 1.2510 (1.1177) acc 78.1250 (71.7730) lr 1.3090e-03 eta 3:27:01 +epoch [22/50] batch [495/500] time 0.887 (0.887) data 0.000 (0.001) loss 2.3926 (1.1203) acc 46.8750 (71.7109) lr 1.3090e-03 eta 3:26:57 +epoch [22/50] batch [500/500] time 0.873 (0.887) data 0.000 (0.001) loss 2.1953 (1.1237) acc 56.2500 (71.6312) lr 1.2487e-03 eta 3:26:53 +epoch [23/50] batch [5/500] time 0.865 (1.018) data 0.000 (0.130) loss 1.1992 (1.2163) acc 68.7500 (73.1250) lr 1.2487e-03 eta 3:57:29 +epoch [23/50] batch [10/500] time 0.997 (0.963) data 0.000 (0.065) loss 1.6797 (1.2399) acc 65.6250 (69.3750) lr 1.2487e-03 eta 3:44:36 +epoch [23/50] batch [15/500] time 0.886 (0.938) data 0.000 (0.043) loss 1.1562 (1.2373) acc 78.1250 (70.6250) lr 1.2487e-03 eta 3:38:33 +epoch [23/50] batch [20/500] time 0.911 (0.929) data 0.000 (0.033) loss 0.6714 (1.0990) acc 87.5000 (73.5938) lr 1.2487e-03 eta 3:36:23 +epoch [23/50] batch [25/500] time 0.876 (0.921) data 0.000 (0.026) loss 1.4512 (1.1102) acc 56.2500 (72.2500) lr 1.2487e-03 eta 3:34:29 +epoch [23/50] batch [30/500] time 0.865 (0.914) data 0.000 (0.022) loss 0.6089 (1.0502) acc 81.2500 (73.0208) lr 1.2487e-03 eta 3:32:42 +epoch [23/50] batch [35/500] time 0.896 (0.909) data 0.000 (0.019) loss 1.8066 (1.0534) acc 65.6250 (73.6607) lr 1.2487e-03 eta 3:31:40 +epoch [23/50] batch [40/500] time 0.910 (0.906) data 0.000 (0.016) loss 1.1201 (1.0879) acc 68.7500 (73.0469) lr 1.2487e-03 eta 3:30:51 +epoch [23/50] batch [45/500] time 0.894 (0.904) data 0.000 (0.015) loss 0.8643 (1.0994) acc 81.2500 (73.1250) lr 1.2487e-03 eta 3:30:09 +epoch [23/50] batch [50/500] time 1.034 (0.905) data 0.000 (0.013) loss 1.0479 (1.1126) acc 75.0000 (72.6250) lr 1.2487e-03 eta 3:30:20 +epoch [23/50] batch [55/500] time 0.872 (0.903) data 0.000 (0.012) loss 0.9038 (1.1221) acc 75.0000 (72.0455) lr 1.2487e-03 eta 3:29:49 +epoch [23/50] batch [60/500] time 0.884 (0.900) data 0.000 (0.011) loss 0.5039 (1.0907) acc 87.5000 (72.8125) lr 1.2487e-03 eta 3:29:12 +epoch [23/50] batch [65/500] time 0.895 (0.900) data 0.000 (0.010) loss 1.0996 (1.0859) acc 75.0000 (73.0769) lr 1.2487e-03 eta 3:28:58 +epoch [23/50] batch [70/500] time 0.909 (0.899) data 0.000 (0.009) loss 1.3721 (1.0903) acc 68.7500 (73.1250) lr 1.2487e-03 eta 3:28:40 +epoch [23/50] batch [75/500] time 0.870 (0.897) data 0.000 (0.009) loss 1.1191 (1.0829) acc 75.0000 (73.2500) lr 1.2487e-03 eta 3:28:16 +epoch [23/50] batch [80/500] time 0.900 (0.896) data 0.000 (0.008) loss 0.8691 (1.0910) acc 71.8750 (73.0469) lr 1.2487e-03 eta 3:27:47 +epoch [23/50] batch [85/500] time 0.901 (0.895) data 0.000 (0.008) loss 0.6714 (1.0778) acc 78.1250 (73.1618) lr 1.2487e-03 eta 3:27:35 +epoch [23/50] batch [90/500] time 0.880 (0.894) data 0.000 (0.007) loss 0.7100 (1.0741) acc 84.3750 (73.2292) lr 1.2487e-03 eta 3:27:11 +epoch [23/50] batch [95/500] time 0.858 (0.893) data 0.000 (0.007) loss 0.7427 (1.0712) acc 81.2500 (73.3553) lr 1.2487e-03 eta 3:26:51 +epoch [23/50] batch [100/500] time 0.862 (0.892) data 0.000 (0.007) loss 0.5908 (1.0642) acc 75.0000 (73.6250) lr 1.2487e-03 eta 3:26:40 +epoch [23/50] batch [105/500] time 0.904 (0.892) data 0.000 (0.006) loss 1.4990 (1.0738) acc 68.7500 (73.4524) lr 1.2487e-03 eta 3:26:30 +epoch [23/50] batch [110/500] time 0.960 (0.892) data 0.000 (0.006) loss 0.8677 (1.0757) acc 68.7500 (73.3523) lr 1.2487e-03 eta 3:26:25 +epoch [23/50] batch [115/500] time 0.869 (0.891) data 0.000 (0.006) loss 1.6045 (1.0798) acc 62.5000 (73.2337) lr 1.2487e-03 eta 3:26:17 +epoch [23/50] batch [120/500] time 0.856 (0.891) data 0.000 (0.006) loss 1.2695 (1.0748) acc 56.2500 (73.1771) lr 1.2487e-03 eta 3:26:05 +epoch [23/50] batch [125/500] time 0.885 (0.890) data 0.000 (0.005) loss 1.3926 (1.0802) acc 68.7500 (73.1000) lr 1.2487e-03 eta 3:25:50 +epoch [23/50] batch [130/500] time 0.883 (0.890) data 0.000 (0.005) loss 1.0830 (1.0835) acc 75.0000 (72.9327) lr 1.2487e-03 eta 3:25:46 +epoch [23/50] batch [135/500] time 0.864 (0.890) data 0.000 (0.005) loss 1.1123 (1.0823) acc 75.0000 (72.8935) lr 1.2487e-03 eta 3:25:33 +epoch [23/50] batch [140/500] time 0.896 (0.889) data 0.000 (0.005) loss 1.2119 (1.0931) acc 62.5000 (72.5223) lr 1.2487e-03 eta 3:25:23 +epoch [23/50] batch [145/500] time 0.896 (0.889) data 0.000 (0.005) loss 1.1104 (1.0867) acc 65.6250 (72.6293) lr 1.2487e-03 eta 3:25:22 +epoch [23/50] batch [150/500] time 0.896 (0.889) data 0.000 (0.005) loss 1.3193 (1.0854) acc 68.7500 (72.5833) lr 1.2487e-03 eta 3:25:11 +epoch [23/50] batch [155/500] time 0.892 (0.889) data 0.000 (0.004) loss 1.5850 (1.0864) acc 62.5000 (72.5403) lr 1.2487e-03 eta 3:25:12 +epoch [23/50] batch [160/500] time 0.894 (0.889) data 0.000 (0.004) loss 0.9497 (1.0789) acc 78.1250 (72.7539) lr 1.2487e-03 eta 3:25:02 +epoch [23/50] batch [165/500] time 0.883 (0.889) data 0.000 (0.004) loss 1.0156 (1.0764) acc 68.7500 (72.8409) lr 1.2487e-03 eta 3:24:52 +epoch [23/50] batch [170/500] time 0.914 (0.888) data 0.000 (0.004) loss 1.0361 (1.0782) acc 68.7500 (72.8860) lr 1.2487e-03 eta 3:24:42 +epoch [23/50] batch [175/500] time 0.896 (0.888) data 0.000 (0.004) loss 1.1172 (1.0731) acc 65.6250 (72.9643) lr 1.2487e-03 eta 3:24:36 +epoch [23/50] batch [180/500] time 0.889 (0.888) data 0.000 (0.004) loss 1.1992 (1.0708) acc 65.6250 (72.9861) lr 1.2487e-03 eta 3:24:26 +epoch [23/50] batch [185/500] time 0.882 (0.887) data 0.000 (0.004) loss 1.6035 (1.0757) acc 62.5000 (72.8547) lr 1.2487e-03 eta 3:24:20 +epoch [23/50] batch [190/500] time 0.898 (0.887) data 0.000 (0.004) loss 1.2861 (1.0815) acc 59.3750 (72.5658) lr 1.2487e-03 eta 3:24:12 +epoch [23/50] batch [195/500] time 0.900 (0.887) data 0.000 (0.004) loss 0.8350 (1.0940) acc 78.1250 (72.4359) lr 1.2487e-03 eta 3:24:03 +epoch [23/50] batch [200/500] time 0.889 (0.887) data 0.000 (0.003) loss 1.3066 (1.1002) acc 68.7500 (72.3125) lr 1.2487e-03 eta 3:23:59 +epoch [23/50] batch [205/500] time 0.897 (0.887) data 0.000 (0.003) loss 1.1250 (1.0968) acc 75.0000 (72.4085) lr 1.2487e-03 eta 3:23:53 +epoch [23/50] batch [210/500] time 0.897 (0.887) data 0.000 (0.003) loss 1.5244 (1.1022) acc 68.7500 (72.3363) lr 1.2487e-03 eta 3:23:47 +epoch [23/50] batch [215/500] time 0.909 (0.887) data 0.000 (0.003) loss 1.0205 (1.1011) acc 71.8750 (72.3547) lr 1.2487e-03 eta 3:23:40 +epoch [23/50] batch [220/500] time 0.892 (0.886) data 0.000 (0.003) loss 1.1113 (1.1010) acc 81.2500 (72.3295) lr 1.2487e-03 eta 3:23:30 +epoch [23/50] batch [225/500] time 0.921 (0.886) data 0.000 (0.003) loss 1.4473 (1.1020) acc 59.3750 (72.3056) lr 1.2487e-03 eta 3:23:28 +epoch [23/50] batch [230/500] time 0.885 (0.886) data 0.000 (0.003) loss 1.6816 (1.1068) acc 56.2500 (72.1875) lr 1.2487e-03 eta 3:23:22 +epoch [23/50] batch [235/500] time 0.887 (0.886) data 0.000 (0.003) loss 0.9697 (1.1072) acc 81.2500 (72.2872) lr 1.2487e-03 eta 3:23:19 +epoch [23/50] batch [240/500] time 0.862 (0.886) data 0.000 (0.003) loss 1.3496 (1.1092) acc 68.7500 (72.2396) lr 1.2487e-03 eta 3:23:13 +epoch [23/50] batch [245/500] time 0.906 (0.886) data 0.000 (0.003) loss 1.2236 (1.1106) acc 75.0000 (72.2449) lr 1.2487e-03 eta 3:23:10 +epoch [23/50] batch [250/500] time 0.914 (0.886) data 0.000 (0.003) loss 1.0469 (1.1093) acc 68.7500 (72.2875) lr 1.2487e-03 eta 3:23:08 +epoch [23/50] batch [255/500] time 0.897 (0.887) data 0.000 (0.003) loss 1.4541 (1.1111) acc 68.7500 (72.2672) lr 1.2487e-03 eta 3:23:14 +epoch [23/50] batch [260/500] time 0.872 (0.887) data 0.000 (0.003) loss 1.5000 (1.1100) acc 56.2500 (72.3197) lr 1.2487e-03 eta 3:23:09 +epoch [23/50] batch [265/500] time 0.903 (0.887) data 0.000 (0.003) loss 1.3135 (1.1106) acc 75.0000 (72.2642) lr 1.2487e-03 eta 3:23:07 +epoch [23/50] batch [270/500] time 0.890 (0.887) data 0.000 (0.003) loss 1.5107 (1.1129) acc 43.7500 (72.1759) lr 1.2487e-03 eta 3:23:04 +epoch [23/50] batch [275/500] time 0.896 (0.887) data 0.000 (0.003) loss 1.6152 (1.1174) acc 65.6250 (72.1023) lr 1.2487e-03 eta 3:22:59 +epoch [23/50] batch [280/500] time 0.872 (0.887) data 0.000 (0.003) loss 1.6572 (1.1202) acc 62.5000 (71.9643) lr 1.2487e-03 eta 3:22:52 +epoch [23/50] batch [285/500] time 0.873 (0.887) data 0.000 (0.002) loss 1.0225 (1.1189) acc 81.2500 (72.0943) lr 1.2487e-03 eta 3:22:45 +epoch [23/50] batch [290/500] time 0.868 (0.887) data 0.000 (0.002) loss 1.3350 (1.1209) acc 78.1250 (72.0043) lr 1.2487e-03 eta 3:22:42 +epoch [23/50] batch [295/500] time 0.902 (0.887) data 0.000 (0.002) loss 0.9897 (1.1192) acc 71.8750 (72.0021) lr 1.2487e-03 eta 3:22:39 +epoch [23/50] batch [300/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.9521 (1.1166) acc 75.0000 (72.0625) lr 1.2487e-03 eta 3:22:38 +epoch [23/50] batch [305/500] time 0.907 (0.888) data 0.000 (0.002) loss 0.7271 (1.1163) acc 78.1250 (72.1107) lr 1.2487e-03 eta 3:22:34 +epoch [23/50] batch [310/500] time 0.865 (0.887) data 0.000 (0.002) loss 0.7227 (1.1153) acc 87.5000 (72.1573) lr 1.2487e-03 eta 3:22:29 +epoch [23/50] batch [315/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.3701 (1.1145) acc 71.8750 (72.1825) lr 1.2487e-03 eta 3:22:24 +epoch [23/50] batch [320/500] time 0.863 (0.887) data 0.000 (0.002) loss 0.8633 (1.1111) acc 75.0000 (72.2656) lr 1.2487e-03 eta 3:22:20 +epoch [23/50] batch [325/500] time 0.891 (0.888) data 0.000 (0.002) loss 0.5908 (1.1105) acc 84.3750 (72.2981) lr 1.2487e-03 eta 3:22:17 +epoch [23/50] batch [330/500] time 0.878 (0.887) data 0.000 (0.002) loss 0.9380 (1.1099) acc 71.8750 (72.2822) lr 1.2487e-03 eta 3:22:11 +epoch [23/50] batch [335/500] time 0.871 (0.887) data 0.000 (0.002) loss 1.1543 (1.1098) acc 65.6250 (72.2668) lr 1.2487e-03 eta 3:22:05 +epoch [23/50] batch [340/500] time 0.863 (0.887) data 0.000 (0.002) loss 0.8740 (1.1109) acc 75.0000 (72.2610) lr 1.2487e-03 eta 3:22:01 +epoch [23/50] batch [345/500] time 0.903 (0.887) data 0.000 (0.002) loss 1.0439 (1.1102) acc 75.0000 (72.2736) lr 1.2487e-03 eta 3:21:53 +epoch [23/50] batch [350/500] time 0.842 (0.887) data 0.000 (0.002) loss 1.5547 (1.1124) acc 71.8750 (72.2589) lr 1.2487e-03 eta 3:21:46 +epoch [23/50] batch [355/500] time 0.887 (0.887) data 0.000 (0.002) loss 0.8223 (1.1123) acc 75.0000 (72.2447) lr 1.2487e-03 eta 3:21:40 +epoch [23/50] batch [360/500] time 0.866 (0.887) data 0.000 (0.002) loss 0.6919 (1.1094) acc 81.2500 (72.3351) lr 1.2487e-03 eta 3:21:35 +epoch [23/50] batch [365/500] time 0.856 (0.887) data 0.000 (0.002) loss 1.8018 (1.1071) acc 59.3750 (72.3801) lr 1.2487e-03 eta 3:21:29 +epoch [23/50] batch [370/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.8369 (1.1104) acc 68.7500 (72.3649) lr 1.2487e-03 eta 3:21:26 +epoch [23/50] batch [375/500] time 0.873 (0.887) data 0.000 (0.002) loss 0.7881 (1.1095) acc 84.3750 (72.3917) lr 1.2487e-03 eta 3:21:18 +epoch [23/50] batch [380/500] time 0.869 (0.886) data 0.000 (0.002) loss 1.0439 (1.1075) acc 75.0000 (72.4013) lr 1.2487e-03 eta 3:21:11 +epoch [23/50] batch [385/500] time 0.894 (0.886) data 0.000 (0.002) loss 1.7549 (1.1114) acc 65.6250 (72.2971) lr 1.2487e-03 eta 3:21:07 +epoch [23/50] batch [390/500] time 0.886 (0.886) data 0.000 (0.002) loss 0.9248 (1.1144) acc 71.8750 (72.2516) lr 1.2487e-03 eta 3:21:02 +epoch [23/50] batch [395/500] time 0.885 (0.886) data 0.000 (0.002) loss 1.4541 (1.1170) acc 65.6250 (72.1677) lr 1.2487e-03 eta 3:20:57 +epoch [23/50] batch [400/500] time 0.912 (0.887) data 0.000 (0.002) loss 1.2871 (1.1176) acc 71.8750 (72.1719) lr 1.2487e-03 eta 3:20:59 +epoch [23/50] batch [405/500] time 0.907 (0.887) data 0.000 (0.002) loss 0.8247 (1.1141) acc 75.0000 (72.2377) lr 1.2487e-03 eta 3:20:55 +epoch [23/50] batch [410/500] time 0.912 (0.887) data 0.000 (0.002) loss 1.3662 (1.1148) acc 75.0000 (72.2256) lr 1.2487e-03 eta 3:20:52 +epoch [23/50] batch [415/500] time 0.897 (0.887) data 0.000 (0.002) loss 1.3496 (1.1190) acc 62.5000 (72.1235) lr 1.2487e-03 eta 3:20:49 +epoch [23/50] batch [420/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.2666 (1.1202) acc 71.8750 (72.1280) lr 1.2487e-03 eta 3:20:43 +epoch [23/50] batch [425/500] time 0.878 (0.887) data 0.000 (0.002) loss 0.7837 (1.1193) acc 75.0000 (72.1324) lr 1.2487e-03 eta 3:20:40 +epoch [23/50] batch [430/500] time 0.892 (0.887) data 0.000 (0.002) loss 1.6113 (1.1210) acc 62.5000 (72.0930) lr 1.2487e-03 eta 3:20:35 +epoch [23/50] batch [435/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.4736 (1.1238) acc 62.5000 (72.0546) lr 1.2487e-03 eta 3:20:30 +epoch [23/50] batch [440/500] time 1.011 (0.887) data 0.000 (0.002) loss 1.4258 (1.1232) acc 65.6250 (72.0455) lr 1.2487e-03 eta 3:20:29 +epoch [23/50] batch [445/500] time 0.912 (0.887) data 0.001 (0.002) loss 1.3359 (1.1247) acc 75.0000 (72.0154) lr 1.2487e-03 eta 3:20:26 +epoch [23/50] batch [450/500] time 0.892 (0.887) data 0.000 (0.002) loss 0.8882 (1.1227) acc 71.8750 (72.0417) lr 1.2487e-03 eta 3:20:22 +epoch [23/50] batch [455/500] time 0.887 (0.887) data 0.000 (0.002) loss 1.0391 (1.1237) acc 78.1250 (72.0673) lr 1.2487e-03 eta 3:20:17 +epoch [23/50] batch [460/500] time 0.860 (0.887) data 0.000 (0.002) loss 0.7007 (1.1212) acc 84.3750 (72.1128) lr 1.2487e-03 eta 3:20:09 +epoch [23/50] batch [465/500] time 0.891 (0.887) data 0.000 (0.002) loss 1.5225 (1.1206) acc 62.5000 (72.1304) lr 1.2487e-03 eta 3:20:05 +epoch [23/50] batch [470/500] time 0.904 (0.887) data 0.000 (0.002) loss 0.6133 (1.1193) acc 81.2500 (72.1210) lr 1.2487e-03 eta 3:19:59 +epoch [23/50] batch [475/500] time 0.877 (0.887) data 0.000 (0.002) loss 0.9819 (1.1191) acc 71.8750 (72.1382) lr 1.2487e-03 eta 3:19:54 +epoch [23/50] batch [480/500] time 0.891 (0.887) data 0.000 (0.002) loss 0.8506 (1.1180) acc 87.5000 (72.1615) lr 1.2487e-03 eta 3:19:50 +epoch [23/50] batch [485/500] time 0.892 (0.887) data 0.001 (0.002) loss 1.0391 (1.1173) acc 68.7500 (72.1392) lr 1.2487e-03 eta 3:19:46 +epoch [23/50] batch [490/500] time 0.920 (0.887) data 0.000 (0.002) loss 1.0781 (1.1189) acc 71.8750 (72.1110) lr 1.2487e-03 eta 3:19:40 +epoch [23/50] batch [495/500] time 0.878 (0.887) data 0.000 (0.002) loss 0.9487 (1.1192) acc 71.8750 (72.1212) lr 1.2487e-03 eta 3:19:35 +epoch [23/50] batch [500/500] time 0.868 (0.887) data 0.000 (0.002) loss 1.4180 (1.1203) acc 71.8750 (72.1188) lr 1.1874e-03 eta 3:19:30 +epoch [24/50] batch [5/500] time 0.864 (1.009) data 0.000 (0.125) loss 1.8955 (1.2322) acc 59.3750 (68.7500) lr 1.1874e-03 eta 3:46:53 +epoch [24/50] batch [10/500] time 0.912 (0.950) data 0.000 (0.063) loss 0.7397 (1.1812) acc 75.0000 (69.3750) lr 1.1874e-03 eta 3:33:37 +epoch [24/50] batch [15/500] time 0.875 (0.929) data 0.000 (0.042) loss 1.3789 (1.1942) acc 68.7500 (69.7917) lr 1.1874e-03 eta 3:28:46 +epoch [24/50] batch [20/500] time 0.904 (0.915) data 0.000 (0.031) loss 1.2510 (1.1561) acc 78.1250 (71.4062) lr 1.1874e-03 eta 3:25:39 +epoch [24/50] batch [25/500] time 0.899 (0.910) data 0.000 (0.025) loss 1.3076 (1.2164) acc 71.8750 (70.7500) lr 1.1874e-03 eta 3:24:25 +epoch [24/50] batch [30/500] time 0.864 (0.909) data 0.000 (0.021) loss 0.9648 (1.1882) acc 75.0000 (71.0417) lr 1.1874e-03 eta 3:24:01 +epoch [24/50] batch [35/500] time 0.876 (0.905) data 0.000 (0.018) loss 1.0059 (1.1876) acc 68.7500 (71.1607) lr 1.1874e-03 eta 3:23:05 +epoch [24/50] batch [40/500] time 0.866 (0.902) data 0.000 (0.016) loss 1.2900 (1.1736) acc 68.7500 (71.6406) lr 1.1874e-03 eta 3:22:20 +epoch [24/50] batch [45/500] time 0.886 (0.901) data 0.000 (0.014) loss 0.6997 (1.1362) acc 78.1250 (72.0833) lr 1.1874e-03 eta 3:22:06 +epoch [24/50] batch [50/500] time 0.854 (0.899) data 0.000 (0.013) loss 1.1318 (1.1380) acc 62.5000 (72.0625) lr 1.1874e-03 eta 3:21:30 +epoch [24/50] batch [55/500] time 0.900 (0.898) data 0.000 (0.012) loss 1.4590 (1.1366) acc 62.5000 (71.9318) lr 1.1874e-03 eta 3:21:09 +epoch [24/50] batch [60/500] time 0.895 (0.897) data 0.000 (0.011) loss 0.7681 (1.1339) acc 75.0000 (71.9271) lr 1.1874e-03 eta 3:20:49 +epoch [24/50] batch [65/500] time 0.878 (0.895) data 0.000 (0.010) loss 0.6753 (1.1087) acc 87.5000 (72.5481) lr 1.1874e-03 eta 3:20:25 +epoch [24/50] batch [70/500] time 0.896 (0.894) data 0.000 (0.009) loss 1.0928 (1.0984) acc 68.7500 (72.3661) lr 1.1874e-03 eta 3:20:02 +epoch [24/50] batch [75/500] time 0.896 (0.894) data 0.000 (0.009) loss 0.9702 (1.1036) acc 71.8750 (72.3750) lr 1.1874e-03 eta 3:19:55 +epoch [24/50] batch [80/500] time 0.877 (0.893) data 0.000 (0.008) loss 1.1963 (1.1145) acc 65.6250 (72.2266) lr 1.1874e-03 eta 3:19:37 +epoch [24/50] batch [85/500] time 0.894 (0.893) data 0.000 (0.008) loss 1.1240 (1.1275) acc 65.6250 (72.0221) lr 1.1874e-03 eta 3:19:33 +epoch [24/50] batch [90/500] time 0.889 (0.894) data 0.000 (0.007) loss 1.4150 (1.1245) acc 65.6250 (72.0139) lr 1.1874e-03 eta 3:19:47 +epoch [24/50] batch [95/500] time 0.866 (0.893) data 0.000 (0.007) loss 0.7271 (1.1148) acc 71.8750 (71.9408) lr 1.1874e-03 eta 3:19:29 +epoch [24/50] batch [100/500] time 0.885 (0.892) data 0.000 (0.006) loss 1.1523 (1.1133) acc 81.2500 (71.9688) lr 1.1874e-03 eta 3:19:11 +epoch [24/50] batch [105/500] time 0.893 (0.892) data 0.000 (0.006) loss 1.5205 (1.1196) acc 65.6250 (72.1131) lr 1.1874e-03 eta 3:19:07 +epoch [24/50] batch [110/500] time 0.875 (0.891) data 0.000 (0.006) loss 1.3926 (1.1314) acc 65.6250 (71.9034) lr 1.1874e-03 eta 3:18:54 +epoch [24/50] batch [115/500] time 0.872 (0.892) data 0.000 (0.006) loss 0.9385 (1.1289) acc 75.0000 (72.0109) lr 1.1874e-03 eta 3:18:54 +epoch [24/50] batch [120/500] time 0.888 (0.892) data 0.000 (0.005) loss 1.0771 (1.1311) acc 71.8750 (71.9531) lr 1.1874e-03 eta 3:18:53 +epoch [24/50] batch [125/500] time 0.907 (0.892) data 0.000 (0.005) loss 1.4551 (1.1193) acc 59.3750 (72.2250) lr 1.1874e-03 eta 3:18:47 +epoch [24/50] batch [130/500] time 0.863 (0.891) data 0.000 (0.005) loss 1.7480 (1.1306) acc 62.5000 (71.9712) lr 1.1874e-03 eta 3:18:35 +epoch [24/50] batch [135/500] time 0.895 (0.892) data 0.000 (0.005) loss 1.0322 (1.1318) acc 71.8750 (71.9907) lr 1.1874e-03 eta 3:18:36 +epoch [24/50] batch [140/500] time 0.906 (0.892) data 0.000 (0.005) loss 0.9312 (1.1244) acc 75.0000 (72.2321) lr 1.1874e-03 eta 3:18:35 +epoch [24/50] batch [145/500] time 0.900 (0.892) data 0.000 (0.005) loss 0.7095 (1.1206) acc 81.2500 (72.3707) lr 1.1874e-03 eta 3:18:26 +epoch [24/50] batch [150/500] time 0.848 (0.891) data 0.000 (0.004) loss 0.7495 (1.1113) acc 81.2500 (72.4792) lr 1.1874e-03 eta 3:18:16 +epoch [24/50] batch [155/500] time 0.915 (0.891) data 0.000 (0.004) loss 1.6221 (1.1138) acc 65.6250 (72.3185) lr 1.1874e-03 eta 3:18:11 +epoch [24/50] batch [160/500] time 0.910 (0.891) data 0.000 (0.004) loss 0.7104 (1.1160) acc 78.1250 (72.4023) lr 1.1874e-03 eta 3:18:06 +epoch [24/50] batch [165/500] time 0.868 (0.891) data 0.000 (0.004) loss 1.2461 (1.1211) acc 71.8750 (72.2917) lr 1.1874e-03 eta 3:18:03 +epoch [24/50] batch [170/500] time 0.887 (0.891) data 0.000 (0.004) loss 1.1143 (1.1176) acc 81.2500 (72.2978) lr 1.1874e-03 eta 3:17:58 +epoch [24/50] batch [175/500] time 0.857 (0.891) data 0.000 (0.004) loss 1.3926 (1.1249) acc 62.5000 (72.0714) lr 1.1874e-03 eta 3:17:53 +epoch [24/50] batch [180/500] time 0.864 (0.891) data 0.000 (0.004) loss 1.5430 (1.1286) acc 62.5000 (71.9792) lr 1.1874e-03 eta 3:17:46 +epoch [24/50] batch [185/500] time 0.887 (0.891) data 0.000 (0.004) loss 0.8496 (1.1252) acc 71.8750 (72.1284) lr 1.1874e-03 eta 3:17:42 +epoch [24/50] batch [190/500] time 0.876 (0.891) data 0.000 (0.004) loss 0.6348 (1.1189) acc 71.8750 (72.1711) lr 1.1874e-03 eta 3:17:37 +epoch [24/50] batch [195/500] time 0.891 (0.891) data 0.000 (0.003) loss 1.7129 (1.1271) acc 59.3750 (72.0032) lr 1.1874e-03 eta 3:17:29 +epoch [24/50] batch [200/500] time 0.905 (0.890) data 0.000 (0.003) loss 1.2148 (1.1250) acc 59.3750 (71.8906) lr 1.1874e-03 eta 3:17:20 +epoch [24/50] batch [205/500] time 0.888 (0.890) data 0.000 (0.003) loss 1.2236 (1.1238) acc 65.6250 (71.9055) lr 1.1874e-03 eta 3:17:14 +epoch [24/50] batch [210/500] time 0.882 (0.890) data 0.000 (0.003) loss 1.3467 (1.1240) acc 53.1250 (71.8304) lr 1.1874e-03 eta 3:17:08 +epoch [24/50] batch [215/500] time 0.931 (0.890) data 0.000 (0.003) loss 1.1250 (1.1267) acc 75.0000 (71.7587) lr 1.1874e-03 eta 3:17:04 +epoch [24/50] batch [220/500] time 0.885 (0.890) data 0.000 (0.003) loss 0.8320 (1.1237) acc 81.2500 (71.8182) lr 1.1874e-03 eta 3:16:56 +epoch [24/50] batch [225/500] time 0.910 (0.890) data 0.000 (0.003) loss 0.8379 (1.1213) acc 78.1250 (71.8472) lr 1.1874e-03 eta 3:16:54 +epoch [24/50] batch [230/500] time 0.988 (0.890) data 0.000 (0.003) loss 1.2910 (1.1224) acc 62.5000 (71.9022) lr 1.1874e-03 eta 3:16:52 +epoch [24/50] batch [235/500] time 0.876 (0.890) data 0.000 (0.003) loss 0.7139 (1.1192) acc 78.1250 (71.9814) lr 1.1874e-03 eta 3:16:44 +epoch [24/50] batch [240/500] time 0.909 (0.890) data 0.000 (0.003) loss 0.9307 (1.1168) acc 78.1250 (72.0573) lr 1.1874e-03 eta 3:16:40 +epoch [24/50] batch [245/500] time 0.915 (0.890) data 0.000 (0.003) loss 1.0947 (1.1130) acc 81.2500 (72.2321) lr 1.1874e-03 eta 3:16:36 +epoch [24/50] batch [250/500] time 0.889 (0.890) data 0.000 (0.003) loss 1.0498 (1.1098) acc 65.6250 (72.2500) lr 1.1874e-03 eta 3:16:31 +epoch [24/50] batch [255/500] time 0.895 (0.890) data 0.000 (0.003) loss 1.0215 (1.1099) acc 81.2500 (72.3284) lr 1.1874e-03 eta 3:16:24 +epoch [24/50] batch [260/500] time 0.861 (0.890) data 0.000 (0.003) loss 2.1289 (1.1164) acc 56.2500 (72.2716) lr 1.1874e-03 eta 3:16:18 +epoch [24/50] batch [265/500] time 0.862 (0.889) data 0.000 (0.003) loss 1.0986 (1.1168) acc 65.6250 (72.2052) lr 1.1874e-03 eta 3:16:10 +epoch [24/50] batch [270/500] time 0.864 (0.889) data 0.000 (0.003) loss 0.7031 (1.1187) acc 84.3750 (72.1875) lr 1.1874e-03 eta 3:16:03 +epoch [24/50] batch [275/500] time 0.900 (0.889) data 0.000 (0.002) loss 1.1816 (1.1224) acc 71.8750 (72.1136) lr 1.1874e-03 eta 3:16:00 +epoch [24/50] batch [280/500] time 0.881 (0.889) data 0.000 (0.002) loss 1.1914 (1.1245) acc 62.5000 (71.9866) lr 1.1874e-03 eta 3:15:53 +epoch [24/50] batch [285/500] time 0.890 (0.889) data 0.000 (0.002) loss 1.1543 (1.1266) acc 78.1250 (72.0285) lr 1.1874e-03 eta 3:15:47 +epoch [24/50] batch [290/500] time 0.866 (0.889) data 0.000 (0.002) loss 1.2686 (1.1228) acc 68.7500 (72.0690) lr 1.1874e-03 eta 3:15:43 +epoch [24/50] batch [295/500] time 0.874 (0.889) data 0.000 (0.002) loss 1.3125 (1.1258) acc 78.1250 (72.0339) lr 1.1874e-03 eta 3:15:40 +epoch [24/50] batch [300/500] time 0.882 (0.889) data 0.000 (0.002) loss 0.8228 (1.1256) acc 78.1250 (72.0729) lr 1.1874e-03 eta 3:15:37 +epoch [24/50] batch [305/500] time 0.881 (0.889) data 0.000 (0.002) loss 0.6787 (1.1233) acc 81.2500 (72.1107) lr 1.1874e-03 eta 3:15:36 +epoch [24/50] batch [310/500] time 0.879 (0.890) data 0.000 (0.002) loss 0.6230 (1.1189) acc 90.6250 (72.2077) lr 1.1874e-03 eta 3:15:33 +epoch [24/50] batch [315/500] time 0.903 (0.890) data 0.000 (0.002) loss 0.8770 (1.1160) acc 71.8750 (72.1925) lr 1.1874e-03 eta 3:15:30 +epoch [24/50] batch [320/500] time 0.883 (0.890) data 0.000 (0.002) loss 1.8564 (1.1175) acc 62.5000 (72.2070) lr 1.1874e-03 eta 3:15:25 +epoch [24/50] batch [325/500] time 0.894 (0.890) data 0.000 (0.002) loss 0.7192 (1.1163) acc 90.6250 (72.2788) lr 1.1874e-03 eta 3:15:19 +epoch [24/50] batch [330/500] time 0.866 (0.889) data 0.000 (0.002) loss 0.9385 (1.1167) acc 81.2500 (72.2633) lr 1.1874e-03 eta 3:15:12 +epoch [24/50] batch [335/500] time 0.868 (0.889) data 0.000 (0.002) loss 1.2676 (1.1132) acc 68.7500 (72.3228) lr 1.1874e-03 eta 3:15:08 +epoch [24/50] batch [340/500] time 0.850 (0.889) data 0.000 (0.002) loss 1.4570 (1.1142) acc 56.2500 (72.2151) lr 1.1874e-03 eta 3:15:03 +epoch [24/50] batch [345/500] time 0.864 (0.889) data 0.000 (0.002) loss 1.0889 (1.1151) acc 62.5000 (72.1649) lr 1.1874e-03 eta 3:14:56 +epoch [24/50] batch [350/500] time 0.881 (0.889) data 0.000 (0.002) loss 1.1914 (1.1149) acc 71.8750 (72.1518) lr 1.1874e-03 eta 3:14:52 +epoch [24/50] batch [355/500] time 0.899 (0.889) data 0.000 (0.002) loss 1.0742 (1.1155) acc 75.0000 (72.1831) lr 1.1874e-03 eta 3:14:48 +epoch [24/50] batch [360/500] time 0.904 (0.889) data 0.000 (0.002) loss 1.2061 (1.1163) acc 71.8750 (72.2049) lr 1.1874e-03 eta 3:14:44 +epoch [24/50] batch [365/500] time 0.854 (0.889) data 0.000 (0.002) loss 1.0088 (1.1166) acc 65.6250 (72.1747) lr 1.1874e-03 eta 3:14:39 +epoch [24/50] batch [370/500] time 0.901 (0.889) data 0.000 (0.002) loss 0.9229 (1.1148) acc 75.0000 (72.1537) lr 1.1874e-03 eta 3:14:33 +epoch [24/50] batch [375/500] time 0.893 (0.889) data 0.000 (0.002) loss 1.0117 (1.1143) acc 71.8750 (72.1917) lr 1.1874e-03 eta 3:14:30 +epoch [24/50] batch [380/500] time 0.887 (0.889) data 0.000 (0.002) loss 0.8267 (1.1144) acc 78.1250 (72.2122) lr 1.1874e-03 eta 3:14:25 +epoch [24/50] batch [385/500] time 0.846 (0.889) data 0.000 (0.002) loss 1.2861 (1.1157) acc 65.6250 (72.2240) lr 1.1874e-03 eta 3:14:18 +epoch [24/50] batch [390/500] time 0.882 (0.889) data 0.000 (0.002) loss 1.0898 (1.1142) acc 68.7500 (72.2276) lr 1.1874e-03 eta 3:14:11 +epoch [24/50] batch [395/500] time 0.860 (0.889) data 0.000 (0.002) loss 2.1133 (1.1166) acc 53.1250 (72.1677) lr 1.1874e-03 eta 3:14:04 +epoch [24/50] batch [400/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.1025 (1.1155) acc 71.8750 (72.1875) lr 1.1874e-03 eta 3:13:57 +epoch [24/50] batch [405/500] time 0.879 (0.888) data 0.000 (0.002) loss 1.1445 (1.1159) acc 75.0000 (72.1836) lr 1.1874e-03 eta 3:13:52 +epoch [24/50] batch [410/500] time 0.865 (0.888) data 0.000 (0.002) loss 0.6548 (1.1171) acc 84.3750 (72.1951) lr 1.1874e-03 eta 3:13:46 +epoch [24/50] batch [415/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.3486 (1.1185) acc 62.5000 (72.1310) lr 1.1874e-03 eta 3:13:39 +epoch [24/50] batch [420/500] time 0.899 (0.888) data 0.000 (0.002) loss 1.3887 (1.1192) acc 68.7500 (72.1205) lr 1.1874e-03 eta 3:13:37 +epoch [24/50] batch [425/500] time 0.914 (0.888) data 0.000 (0.002) loss 1.1250 (1.1189) acc 71.8750 (72.1471) lr 1.1874e-03 eta 3:13:36 +epoch [24/50] batch [430/500] time 0.877 (0.888) data 0.000 (0.002) loss 1.3242 (1.1206) acc 62.5000 (72.0930) lr 1.1874e-03 eta 3:13:31 +epoch [24/50] batch [435/500] time 0.892 (0.888) data 0.000 (0.002) loss 1.7012 (1.1195) acc 50.0000 (72.0833) lr 1.1874e-03 eta 3:13:26 +epoch [24/50] batch [440/500] time 0.929 (0.888) data 0.000 (0.002) loss 0.9023 (1.1226) acc 65.6250 (71.9815) lr 1.1874e-03 eta 3:13:21 +epoch [24/50] batch [445/500] time 0.861 (0.888) data 0.000 (0.002) loss 0.8955 (1.1220) acc 78.1250 (72.0014) lr 1.1874e-03 eta 3:13:16 +epoch [24/50] batch [450/500] time 0.891 (0.888) data 0.000 (0.002) loss 0.7588 (1.1217) acc 78.1250 (71.9514) lr 1.1874e-03 eta 3:13:12 +epoch [24/50] batch [455/500] time 0.911 (0.888) data 0.000 (0.002) loss 1.1025 (1.1212) acc 68.7500 (71.9849) lr 1.1874e-03 eta 3:13:08 +epoch [24/50] batch [460/500] time 0.909 (0.888) data 0.000 (0.002) loss 1.2109 (1.1213) acc 68.7500 (71.9905) lr 1.1874e-03 eta 3:13:04 +epoch [24/50] batch [465/500] time 0.853 (0.888) data 0.000 (0.002) loss 1.1045 (1.1209) acc 75.0000 (71.9892) lr 1.1874e-03 eta 3:12:58 +epoch [24/50] batch [470/500] time 0.882 (0.888) data 0.000 (0.002) loss 1.4268 (1.1230) acc 62.5000 (71.9215) lr 1.1874e-03 eta 3:12:54 +epoch [24/50] batch [475/500] time 0.904 (0.888) data 0.000 (0.002) loss 0.9395 (1.1205) acc 75.0000 (71.9934) lr 1.1874e-03 eta 3:12:51 +epoch [24/50] batch [480/500] time 0.903 (0.888) data 0.000 (0.002) loss 0.6675 (1.1194) acc 71.8750 (71.9922) lr 1.1874e-03 eta 3:12:46 +epoch [24/50] batch [485/500] time 0.881 (0.888) data 0.000 (0.002) loss 0.8047 (1.1208) acc 81.2500 (72.0103) lr 1.1874e-03 eta 3:12:41 +epoch [24/50] batch [490/500] time 0.890 (0.888) data 0.000 (0.001) loss 1.3447 (1.1229) acc 75.0000 (71.9834) lr 1.1874e-03 eta 3:12:36 +epoch [24/50] batch [495/500] time 0.902 (0.888) data 0.000 (0.001) loss 0.8662 (1.1247) acc 75.0000 (71.9571) lr 1.1874e-03 eta 3:12:32 +epoch [24/50] batch [500/500] time 0.883 (0.888) data 0.000 (0.001) loss 1.0820 (1.1229) acc 68.7500 (71.9938) lr 1.1253e-03 eta 3:12:28 +epoch [25/50] batch [5/500] time 0.907 (1.028) data 0.000 (0.134) loss 1.3145 (1.0421) acc 68.7500 (70.6250) lr 1.1253e-03 eta 3:42:41 +epoch [25/50] batch [10/500] time 0.900 (0.954) data 0.000 (0.067) loss 0.6377 (1.0457) acc 81.2500 (70.6250) lr 1.1253e-03 eta 3:26:32 +epoch [25/50] batch [15/500] time 0.884 (0.928) data 0.000 (0.045) loss 0.4390 (0.9820) acc 93.7500 (73.9583) lr 1.1253e-03 eta 3:20:50 +epoch [25/50] batch [20/500] time 0.871 (0.920) data 0.000 (0.034) loss 1.0303 (1.0408) acc 75.0000 (72.3438) lr 1.1253e-03 eta 3:18:55 +epoch [25/50] batch [25/500] time 0.885 (0.915) data 0.000 (0.027) loss 1.8174 (1.0711) acc 62.5000 (71.7500) lr 1.1253e-03 eta 3:17:49 +epoch [25/50] batch [30/500] time 0.887 (0.912) data 0.000 (0.023) loss 0.9512 (1.0611) acc 78.1250 (72.1875) lr 1.1253e-03 eta 3:17:04 +epoch [25/50] batch [35/500] time 0.896 (0.908) data 0.000 (0.019) loss 0.9639 (1.0575) acc 78.1250 (72.6786) lr 1.1253e-03 eta 3:16:14 +epoch [25/50] batch [40/500] time 0.865 (0.905) data 0.000 (0.017) loss 1.2197 (1.0604) acc 68.7500 (72.7344) lr 1.1253e-03 eta 3:15:23 +epoch [25/50] batch [45/500] time 0.881 (0.902) data 0.000 (0.015) loss 0.7773 (1.0483) acc 78.1250 (73.1250) lr 1.1253e-03 eta 3:14:39 +epoch [25/50] batch [50/500] time 0.890 (0.900) data 0.000 (0.014) loss 1.1045 (1.0397) acc 65.6250 (73.3125) lr 1.1253e-03 eta 3:14:20 +epoch [25/50] batch [55/500] time 0.872 (0.899) data 0.000 (0.012) loss 0.6338 (1.0479) acc 81.2500 (73.1818) lr 1.1253e-03 eta 3:13:51 +epoch [25/50] batch [60/500] time 0.904 (0.898) data 0.000 (0.011) loss 1.0283 (1.0537) acc 68.7500 (72.8646) lr 1.1253e-03 eta 3:13:33 +epoch [25/50] batch [65/500] time 0.860 (0.895) data 0.000 (0.011) loss 0.9067 (1.0500) acc 71.8750 (72.7404) lr 1.1253e-03 eta 3:12:57 +epoch [25/50] batch [70/500] time 0.876 (0.895) data 0.000 (0.010) loss 1.0039 (1.0568) acc 65.6250 (72.5446) lr 1.1253e-03 eta 3:12:56 +epoch [25/50] batch [75/500] time 0.892 (0.894) data 0.000 (0.009) loss 1.3877 (1.0614) acc 75.0000 (72.5417) lr 1.1253e-03 eta 3:12:41 +epoch [25/50] batch [80/500] time 0.898 (0.894) data 0.000 (0.009) loss 1.2754 (1.0561) acc 68.7500 (72.7344) lr 1.1253e-03 eta 3:12:35 +epoch [25/50] batch [85/500] time 0.859 (0.894) data 0.000 (0.008) loss 1.5771 (1.0697) acc 65.6250 (72.6103) lr 1.1253e-03 eta 3:12:24 +epoch [25/50] batch [90/500] time 0.885 (0.893) data 0.000 (0.008) loss 1.7266 (1.0778) acc 59.3750 (72.4306) lr 1.1253e-03 eta 3:12:09 +epoch [25/50] batch [95/500] time 0.903 (0.893) data 0.000 (0.007) loss 1.4873 (1.0896) acc 65.6250 (72.2697) lr 1.1253e-03 eta 3:12:04 +epoch [25/50] batch [100/500] time 0.875 (0.893) data 0.000 (0.007) loss 1.2236 (1.0954) acc 65.6250 (72.1562) lr 1.1253e-03 eta 3:11:54 +epoch [25/50] batch [105/500] time 0.896 (0.892) data 0.000 (0.007) loss 1.8213 (1.1081) acc 65.6250 (72.0536) lr 1.1253e-03 eta 3:11:43 +epoch [25/50] batch [110/500] time 0.890 (0.891) data 0.000 (0.006) loss 0.8950 (1.1058) acc 78.1250 (72.0739) lr 1.1253e-03 eta 3:11:26 +epoch [25/50] batch [115/500] time 0.870 (0.892) data 0.000 (0.006) loss 0.9985 (1.0983) acc 78.1250 (72.2011) lr 1.1253e-03 eta 3:11:32 +epoch [25/50] batch [120/500] time 0.888 (0.891) data 0.000 (0.006) loss 1.3027 (1.1027) acc 71.8750 (72.1615) lr 1.1253e-03 eta 3:11:21 +epoch [25/50] batch [125/500] time 0.891 (0.891) data 0.000 (0.006) loss 1.5176 (1.1014) acc 65.6250 (72.1750) lr 1.1253e-03 eta 3:11:15 +epoch [25/50] batch [130/500] time 0.897 (0.892) data 0.000 (0.005) loss 0.8560 (1.1043) acc 78.1250 (72.2115) lr 1.1253e-03 eta 3:11:13 +epoch [25/50] batch [135/500] time 0.901 (0.891) data 0.000 (0.005) loss 1.2773 (1.1012) acc 75.0000 (72.4074) lr 1.1253e-03 eta 3:11:07 +epoch [25/50] batch [140/500] time 0.857 (0.891) data 0.000 (0.005) loss 1.1611 (1.0980) acc 78.1250 (72.3661) lr 1.1253e-03 eta 3:10:57 +epoch [25/50] batch [145/500] time 0.867 (0.890) data 0.000 (0.005) loss 1.1924 (1.1068) acc 71.8750 (72.3060) lr 1.1253e-03 eta 3:10:47 +epoch [25/50] batch [150/500] time 0.899 (0.890) data 0.000 (0.005) loss 0.8608 (1.1004) acc 62.5000 (72.3125) lr 1.1253e-03 eta 3:10:36 +epoch [25/50] batch [155/500] time 0.893 (0.890) data 0.000 (0.005) loss 0.7251 (1.0939) acc 78.1250 (72.4597) lr 1.1253e-03 eta 3:10:27 +epoch [25/50] batch [160/500] time 0.883 (0.889) data 0.000 (0.004) loss 1.4688 (1.1034) acc 56.2500 (72.2266) lr 1.1253e-03 eta 3:10:20 +epoch [25/50] batch [165/500] time 0.897 (0.889) data 0.000 (0.004) loss 1.4121 (1.1066) acc 75.0000 (72.1970) lr 1.1253e-03 eta 3:10:10 +epoch [25/50] batch [170/500] time 0.883 (0.888) data 0.000 (0.004) loss 0.9248 (1.1093) acc 65.6250 (72.1140) lr 1.1253e-03 eta 3:09:58 +epoch [25/50] batch [175/500] time 0.905 (0.888) data 0.000 (0.004) loss 0.9229 (1.1105) acc 81.2500 (72.1786) lr 1.1253e-03 eta 3:09:53 +epoch [25/50] batch [180/500] time 0.882 (0.888) data 0.000 (0.004) loss 1.5273 (1.1163) acc 65.6250 (72.0833) lr 1.1253e-03 eta 3:09:49 +epoch [25/50] batch [185/500] time 0.878 (0.888) data 0.000 (0.004) loss 0.8203 (1.1107) acc 71.8750 (72.1284) lr 1.1253e-03 eta 3:09:41 +epoch [25/50] batch [190/500] time 0.888 (0.888) data 0.000 (0.004) loss 1.9424 (1.1178) acc 50.0000 (71.9572) lr 1.1253e-03 eta 3:09:34 +epoch [25/50] batch [195/500] time 0.884 (0.888) data 0.000 (0.004) loss 0.6182 (1.1161) acc 81.2500 (72.0192) lr 1.1253e-03 eta 3:09:30 +epoch [25/50] batch [200/500] time 0.908 (0.888) data 0.000 (0.004) loss 0.9058 (1.1155) acc 75.0000 (72.0312) lr 1.1253e-03 eta 3:09:27 +epoch [25/50] batch [205/500] time 0.853 (0.887) data 0.000 (0.004) loss 1.7959 (1.1197) acc 65.6250 (71.9665) lr 1.1253e-03 eta 3:09:15 +epoch [25/50] batch [210/500] time 0.899 (0.887) data 0.000 (0.003) loss 1.1709 (1.1219) acc 65.6250 (71.8155) lr 1.1253e-03 eta 3:09:09 +epoch [25/50] batch [215/500] time 0.858 (0.888) data 0.000 (0.003) loss 0.4912 (1.1212) acc 90.6250 (71.8605) lr 1.1253e-03 eta 3:09:09 +epoch [25/50] batch [220/500] time 0.877 (0.887) data 0.000 (0.003) loss 1.1592 (1.1270) acc 68.7500 (71.7756) lr 1.1253e-03 eta 3:09:00 +epoch [25/50] batch [225/500] time 0.852 (0.887) data 0.000 (0.003) loss 0.8696 (1.1265) acc 68.7500 (71.6667) lr 1.1253e-03 eta 3:08:50 +epoch [25/50] batch [230/500] time 0.884 (0.887) data 0.000 (0.003) loss 0.7710 (1.1264) acc 78.1250 (71.7120) lr 1.1253e-03 eta 3:08:47 +epoch [25/50] batch [235/500] time 0.909 (0.887) data 0.000 (0.003) loss 1.0195 (1.1263) acc 56.2500 (71.7021) lr 1.1253e-03 eta 3:08:46 +epoch [25/50] batch [240/500] time 0.915 (0.888) data 0.000 (0.003) loss 1.5508 (1.1311) acc 65.6250 (71.5885) lr 1.1253e-03 eta 3:08:46 +epoch [25/50] batch [245/500] time 0.868 (0.887) data 0.000 (0.003) loss 0.9971 (1.1305) acc 65.6250 (71.5179) lr 1.1253e-03 eta 3:08:38 +epoch [25/50] batch [250/500] time 0.867 (0.887) data 0.000 (0.003) loss 0.7046 (1.1274) acc 81.2500 (71.5125) lr 1.1253e-03 eta 3:08:28 +epoch [25/50] batch [255/500] time 1.013 (0.887) data 0.000 (0.003) loss 0.6069 (1.1220) acc 78.1250 (71.5931) lr 1.1253e-03 eta 3:08:29 +epoch [25/50] batch [260/500] time 0.884 (0.887) data 0.000 (0.003) loss 1.2988 (1.1216) acc 75.0000 (71.6466) lr 1.1253e-03 eta 3:08:23 +epoch [25/50] batch [265/500] time 0.876 (0.887) data 0.000 (0.003) loss 1.0967 (1.1209) acc 78.1250 (71.6863) lr 1.1253e-03 eta 3:08:16 +epoch [25/50] batch [270/500] time 0.878 (0.887) data 0.000 (0.003) loss 1.3018 (1.1231) acc 65.6250 (71.6667) lr 1.1253e-03 eta 3:08:11 +epoch [25/50] batch [275/500] time 0.876 (0.887) data 0.000 (0.003) loss 0.9585 (1.1225) acc 68.7500 (71.5909) lr 1.1253e-03 eta 3:08:09 +epoch [25/50] batch [280/500] time 0.913 (0.887) data 0.000 (0.003) loss 0.7563 (1.1216) acc 87.5000 (71.6518) lr 1.1253e-03 eta 3:08:05 +epoch [25/50] batch [285/500] time 0.883 (0.887) data 0.000 (0.003) loss 1.1660 (1.1208) acc 81.2500 (71.7544) lr 1.1253e-03 eta 3:08:02 +epoch [25/50] batch [290/500] time 0.869 (0.887) data 0.000 (0.003) loss 0.4636 (1.1147) acc 84.3750 (71.8427) lr 1.1253e-03 eta 3:07:59 +epoch [25/50] batch [295/500] time 0.897 (0.887) data 0.000 (0.002) loss 0.8247 (1.1144) acc 78.1250 (71.8644) lr 1.1253e-03 eta 3:07:54 +epoch [25/50] batch [300/500] time 0.858 (0.887) data 0.000 (0.002) loss 1.2139 (1.1134) acc 65.6250 (71.9167) lr 1.1253e-03 eta 3:07:50 +epoch [25/50] batch [305/500] time 0.861 (0.887) data 0.000 (0.002) loss 0.5879 (1.1123) acc 84.3750 (71.9057) lr 1.1253e-03 eta 3:07:44 +epoch [25/50] batch [310/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.5771 (1.1137) acc 71.8750 (71.9052) lr 1.1253e-03 eta 3:07:39 +epoch [25/50] batch [315/500] time 0.877 (0.887) data 0.000 (0.002) loss 1.3105 (1.1181) acc 68.7500 (71.8948) lr 1.1253e-03 eta 3:07:33 +epoch [25/50] batch [320/500] time 0.867 (0.887) data 0.000 (0.002) loss 1.0430 (1.1204) acc 68.7500 (71.8164) lr 1.1253e-03 eta 3:07:29 +epoch [25/50] batch [325/500] time 0.875 (0.887) data 0.000 (0.002) loss 0.9141 (1.1190) acc 71.8750 (71.8365) lr 1.1253e-03 eta 3:07:22 +epoch [25/50] batch [330/500] time 0.878 (0.887) data 0.000 (0.002) loss 0.9341 (1.1206) acc 75.0000 (71.7898) lr 1.1253e-03 eta 3:07:16 +epoch [25/50] batch [335/500] time 0.867 (0.887) data 0.000 (0.002) loss 1.5957 (1.1230) acc 62.5000 (71.7351) lr 1.1253e-03 eta 3:07:14 +epoch [25/50] batch [340/500] time 0.909 (0.887) data 0.000 (0.002) loss 1.6611 (1.1224) acc 59.3750 (71.7279) lr 1.1253e-03 eta 3:07:10 +epoch [25/50] batch [345/500] time 0.896 (0.887) data 0.000 (0.002) loss 0.8638 (1.1251) acc 78.1250 (71.7029) lr 1.1253e-03 eta 3:07:06 +epoch [25/50] batch [350/500] time 0.924 (0.887) data 0.000 (0.002) loss 1.9385 (1.1287) acc 65.6250 (71.6786) lr 1.1253e-03 eta 3:07:03 +epoch [25/50] batch [355/500] time 0.866 (0.888) data 0.000 (0.002) loss 1.1592 (1.1302) acc 78.1250 (71.6989) lr 1.1253e-03 eta 3:07:03 +epoch [25/50] batch [360/500] time 0.884 (0.888) data 0.000 (0.002) loss 2.4121 (1.1311) acc 65.6250 (71.7361) lr 1.1253e-03 eta 3:07:00 +epoch [25/50] batch [365/500] time 0.873 (0.887) data 0.000 (0.002) loss 1.0576 (1.1297) acc 68.7500 (71.7466) lr 1.1253e-03 eta 3:06:52 +epoch [25/50] batch [370/500] time 0.862 (0.887) data 0.000 (0.002) loss 0.5869 (1.1287) acc 84.3750 (71.7568) lr 1.1253e-03 eta 3:06:44 +epoch [25/50] batch [375/500] time 0.887 (0.887) data 0.000 (0.002) loss 1.5996 (1.1299) acc 59.3750 (71.7083) lr 1.1253e-03 eta 3:06:38 +epoch [25/50] batch [380/500] time 0.898 (0.887) data 0.000 (0.002) loss 0.7231 (1.1276) acc 84.3750 (71.7681) lr 1.1253e-03 eta 3:06:34 +epoch [25/50] batch [385/500] time 0.859 (0.887) data 0.000 (0.002) loss 1.4121 (1.1285) acc 59.3750 (71.7127) lr 1.1253e-03 eta 3:06:27 +epoch [25/50] batch [390/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.1445 (1.1294) acc 71.8750 (71.6907) lr 1.1253e-03 eta 3:06:21 +epoch [25/50] batch [395/500] time 0.859 (0.887) data 0.000 (0.002) loss 1.1729 (1.1278) acc 75.0000 (71.7247) lr 1.1253e-03 eta 3:06:14 +epoch [25/50] batch [400/500] time 0.866 (0.887) data 0.000 (0.002) loss 0.9902 (1.1252) acc 68.7500 (71.7188) lr 1.1253e-03 eta 3:06:11 +epoch [25/50] batch [405/500] time 0.897 (0.887) data 0.000 (0.002) loss 0.8677 (1.1290) acc 78.1250 (71.6590) lr 1.1253e-03 eta 3:06:08 +epoch [25/50] batch [410/500] time 0.912 (0.887) data 0.000 (0.002) loss 1.5078 (1.1276) acc 62.5000 (71.6768) lr 1.1253e-03 eta 3:06:03 +epoch [25/50] batch [415/500] time 0.895 (0.887) data 0.000 (0.002) loss 0.7363 (1.1281) acc 75.0000 (71.6792) lr 1.1253e-03 eta 3:05:59 +epoch [25/50] batch [420/500] time 0.877 (0.887) data 0.000 (0.002) loss 0.6860 (1.1280) acc 81.2500 (71.6592) lr 1.1253e-03 eta 3:05:55 +epoch [25/50] batch [425/500] time 0.878 (0.887) data 0.000 (0.002) loss 0.9233 (1.1255) acc 81.2500 (71.6985) lr 1.1253e-03 eta 3:05:50 +epoch [25/50] batch [430/500] time 0.916 (0.887) data 0.000 (0.002) loss 0.9990 (1.1238) acc 81.2500 (71.7442) lr 1.1253e-03 eta 3:05:46 +epoch [25/50] batch [435/500] time 0.917 (0.887) data 0.000 (0.002) loss 0.7988 (1.1218) acc 90.6250 (71.7960) lr 1.1253e-03 eta 3:05:42 +epoch [25/50] batch [440/500] time 0.899 (0.887) data 0.000 (0.002) loss 1.2188 (1.1207) acc 68.7500 (71.8040) lr 1.1253e-03 eta 3:05:38 +epoch [25/50] batch [445/500] time 0.888 (0.887) data 0.000 (0.002) loss 1.1641 (1.1236) acc 71.8750 (71.7135) lr 1.1253e-03 eta 3:05:33 +epoch [25/50] batch [450/500] time 0.871 (0.887) data 0.000 (0.002) loss 1.2803 (1.1251) acc 65.6250 (71.6597) lr 1.1253e-03 eta 3:05:29 +epoch [25/50] batch [455/500] time 0.889 (0.887) data 0.000 (0.002) loss 0.6636 (1.1230) acc 81.2500 (71.6896) lr 1.1253e-03 eta 3:05:24 +epoch [25/50] batch [460/500] time 0.915 (0.887) data 0.000 (0.002) loss 0.8765 (1.1243) acc 75.0000 (71.6916) lr 1.1253e-03 eta 3:05:19 +epoch [25/50] batch [465/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.4355 (1.1258) acc 65.6250 (71.6667) lr 1.1253e-03 eta 3:05:16 +epoch [25/50] batch [470/500] time 0.863 (0.887) data 0.000 (0.002) loss 1.0137 (1.1240) acc 71.8750 (71.7221) lr 1.1253e-03 eta 3:05:10 +epoch [25/50] batch [475/500] time 0.878 (0.887) data 0.000 (0.002) loss 0.6123 (1.1211) acc 84.3750 (71.8158) lr 1.1253e-03 eta 3:05:05 +epoch [25/50] batch [480/500] time 0.904 (0.887) data 0.000 (0.002) loss 1.1670 (1.1209) acc 65.6250 (71.7839) lr 1.1253e-03 eta 3:05:00 +epoch [25/50] batch [485/500] time 0.912 (0.887) data 0.000 (0.002) loss 0.7163 (1.1185) acc 78.1250 (71.8492) lr 1.1253e-03 eta 3:04:58 +epoch [25/50] batch [490/500] time 0.912 (0.887) data 0.000 (0.002) loss 0.8452 (1.1170) acc 78.1250 (71.9069) lr 1.1253e-03 eta 3:04:57 +epoch [25/50] batch [495/500] time 0.877 (0.887) data 0.000 (0.002) loss 1.5664 (1.1171) acc 62.5000 (71.9066) lr 1.1253e-03 eta 3:04:52 +epoch [25/50] batch [500/500] time 0.892 (0.887) data 0.000 (0.002) loss 1.5850 (1.1185) acc 59.3750 (71.9000) lr 1.0628e-03 eta 3:04:52 +epoch [26/50] batch [5/500] time 0.876 (1.018) data 0.000 (0.130) loss 0.8159 (1.1847) acc 71.8750 (67.5000) lr 1.0628e-03 eta 3:31:57 +epoch [26/50] batch [10/500] time 0.913 (0.962) data 0.000 (0.065) loss 0.7759 (1.1929) acc 78.1250 (69.3750) lr 1.0628e-03 eta 3:20:17 +epoch [26/50] batch [15/500] time 0.878 (0.933) data 0.000 (0.043) loss 1.7412 (1.1351) acc 71.8750 (72.7083) lr 1.0628e-03 eta 3:14:11 +epoch [26/50] batch [20/500] time 0.907 (0.924) data 0.000 (0.033) loss 1.2100 (1.1551) acc 62.5000 (72.0312) lr 1.0628e-03 eta 3:12:06 +epoch [26/50] batch [25/500] time 0.903 (0.919) data 0.000 (0.026) loss 1.1221 (1.1392) acc 78.1250 (72.8750) lr 1.0628e-03 eta 3:10:59 +epoch [26/50] batch [30/500] time 0.864 (0.911) data 0.000 (0.022) loss 1.4502 (1.1375) acc 62.5000 (72.1875) lr 1.0628e-03 eta 3:09:16 +epoch [26/50] batch [35/500] time 0.890 (0.907) data 0.000 (0.019) loss 0.9312 (1.1299) acc 84.3750 (72.8571) lr 1.0628e-03 eta 3:08:31 +epoch [26/50] batch [40/500] time 0.862 (0.904) data 0.000 (0.016) loss 1.5264 (1.1296) acc 68.7500 (72.2656) lr 1.0628e-03 eta 3:07:49 +epoch [26/50] batch [45/500] time 0.878 (0.901) data 0.000 (0.015) loss 1.0264 (1.1035) acc 78.1250 (72.5000) lr 1.0628e-03 eta 3:07:07 +epoch [26/50] batch [50/500] time 0.865 (0.900) data 0.000 (0.013) loss 1.3594 (1.1021) acc 75.0000 (72.6875) lr 1.0628e-03 eta 3:06:42 +epoch [26/50] batch [55/500] time 0.872 (0.897) data 0.000 (0.012) loss 1.0273 (1.1077) acc 65.6250 (72.6705) lr 1.0628e-03 eta 3:05:59 +epoch [26/50] batch [60/500] time 0.873 (0.898) data 0.000 (0.011) loss 1.3066 (1.1153) acc 71.8750 (72.6562) lr 1.0628e-03 eta 3:06:09 +epoch [26/50] batch [65/500] time 0.904 (0.896) data 0.000 (0.010) loss 1.0049 (1.1104) acc 78.1250 (72.7404) lr 1.0628e-03 eta 3:05:39 +epoch [26/50] batch [70/500] time 0.874 (0.895) data 0.000 (0.009) loss 1.1172 (1.1286) acc 68.7500 (72.2768) lr 1.0628e-03 eta 3:05:24 +epoch [26/50] batch [75/500] time 0.911 (0.894) data 0.000 (0.009) loss 1.3945 (1.1293) acc 62.5000 (72.1667) lr 1.0628e-03 eta 3:05:14 +epoch [26/50] batch [80/500] time 0.872 (0.894) data 0.000 (0.008) loss 1.1094 (1.1265) acc 68.7500 (72.0312) lr 1.0628e-03 eta 3:04:59 +epoch [26/50] batch [85/500] time 0.868 (0.893) data 0.000 (0.008) loss 0.6704 (1.1337) acc 84.3750 (71.8015) lr 1.0628e-03 eta 3:04:45 +epoch [26/50] batch [90/500] time 0.888 (0.893) data 0.000 (0.007) loss 1.0889 (1.1351) acc 75.0000 (71.8750) lr 1.0628e-03 eta 3:04:37 +epoch [26/50] batch [95/500] time 0.871 (0.892) data 0.000 (0.007) loss 0.9487 (1.1242) acc 71.8750 (71.9737) lr 1.0628e-03 eta 3:04:26 +epoch [26/50] batch [100/500] time 0.910 (0.893) data 0.000 (0.007) loss 0.6445 (1.1205) acc 90.6250 (72.0000) lr 1.0628e-03 eta 3:04:31 +epoch [26/50] batch [105/500] time 0.865 (0.893) data 0.000 (0.006) loss 1.2949 (1.1118) acc 68.7500 (72.0833) lr 1.0628e-03 eta 3:04:23 +epoch [26/50] batch [110/500] time 0.881 (0.892) data 0.000 (0.006) loss 1.0303 (1.1034) acc 78.1250 (72.3580) lr 1.0628e-03 eta 3:04:10 +epoch [26/50] batch [115/500] time 0.889 (0.891) data 0.000 (0.006) loss 1.0713 (1.1020) acc 75.0000 (72.4728) lr 1.0628e-03 eta 3:04:00 +epoch [26/50] batch [120/500] time 0.888 (0.891) data 0.000 (0.006) loss 0.9214 (1.0896) acc 78.1250 (72.8385) lr 1.0628e-03 eta 3:03:50 +epoch [26/50] batch [125/500] time 0.886 (0.891) data 0.000 (0.005) loss 0.7046 (1.0920) acc 78.1250 (72.8000) lr 1.0628e-03 eta 3:03:42 +epoch [26/50] batch [130/500] time 0.878 (0.890) data 0.000 (0.005) loss 0.9775 (1.0912) acc 75.0000 (72.8365) lr 1.0628e-03 eta 3:03:29 +epoch [26/50] batch [135/500] time 0.887 (0.890) data 0.000 (0.005) loss 0.6006 (1.0830) acc 75.0000 (72.9630) lr 1.0628e-03 eta 3:03:23 +epoch [26/50] batch [140/500] time 0.903 (0.890) data 0.000 (0.005) loss 0.9819 (1.0818) acc 68.7500 (73.0134) lr 1.0628e-03 eta 3:03:19 +epoch [26/50] batch [145/500] time 0.886 (0.890) data 0.000 (0.005) loss 1.1855 (1.0742) acc 75.0000 (73.1466) lr 1.0628e-03 eta 3:03:15 +epoch [26/50] batch [150/500] time 0.880 (0.889) data 0.000 (0.005) loss 0.8320 (1.0723) acc 78.1250 (73.1042) lr 1.0628e-03 eta 3:03:03 +epoch [26/50] batch [155/500] time 0.884 (0.889) data 0.000 (0.004) loss 1.5918 (1.0759) acc 59.3750 (73.0847) lr 1.0628e-03 eta 3:03:00 +epoch [26/50] batch [160/500] time 0.901 (0.889) data 0.000 (0.004) loss 0.9414 (1.0774) acc 75.0000 (72.9883) lr 1.0628e-03 eta 3:02:54 +epoch [26/50] batch [165/500] time 0.860 (0.889) data 0.000 (0.004) loss 1.6553 (1.0819) acc 59.3750 (72.8977) lr 1.0628e-03 eta 3:02:43 +epoch [26/50] batch [170/500] time 0.877 (0.888) data 0.000 (0.004) loss 1.5186 (1.0815) acc 56.2500 (72.7574) lr 1.0628e-03 eta 3:02:34 +epoch [26/50] batch [175/500] time 0.854 (0.888) data 0.000 (0.004) loss 0.9561 (1.0843) acc 78.1250 (72.6786) lr 1.0628e-03 eta 3:02:30 +epoch [26/50] batch [180/500] time 0.898 (0.888) data 0.000 (0.004) loss 1.0586 (1.0847) acc 71.8750 (72.7257) lr 1.0628e-03 eta 3:02:24 +epoch [26/50] batch [185/500] time 0.855 (0.888) data 0.000 (0.004) loss 1.1182 (1.0837) acc 78.1250 (72.8209) lr 1.0628e-03 eta 3:02:12 +epoch [26/50] batch [190/500] time 0.879 (0.887) data 0.000 (0.004) loss 1.2764 (1.0843) acc 65.6250 (72.8454) lr 1.0628e-03 eta 3:02:02 +epoch [26/50] batch [195/500] time 0.867 (0.887) data 0.000 (0.004) loss 1.3916 (1.0864) acc 68.7500 (72.8205) lr 1.0628e-03 eta 3:01:57 +epoch [26/50] batch [200/500] time 0.880 (0.888) data 0.000 (0.003) loss 1.6230 (1.0935) acc 65.6250 (72.7188) lr 1.0628e-03 eta 3:01:57 +epoch [26/50] batch [205/500] time 0.909 (0.887) data 0.000 (0.003) loss 0.8765 (1.0923) acc 75.0000 (72.7439) lr 1.0628e-03 eta 3:01:50 +epoch [26/50] batch [210/500] time 0.897 (0.887) data 0.000 (0.003) loss 0.8506 (1.0865) acc 68.7500 (72.8125) lr 1.0628e-03 eta 3:01:47 +epoch [26/50] batch [215/500] time 0.898 (0.888) data 0.000 (0.003) loss 0.6704 (1.0818) acc 78.1250 (72.9070) lr 1.0628e-03 eta 3:01:44 +epoch [26/50] batch [220/500] time 0.877 (0.888) data 0.000 (0.003) loss 1.0566 (1.0860) acc 62.5000 (72.7983) lr 1.0628e-03 eta 3:01:42 +epoch [26/50] batch [225/500] time 0.902 (0.888) data 0.000 (0.003) loss 1.3350 (1.0884) acc 62.5000 (72.7639) lr 1.0628e-03 eta 3:01:39 +epoch [26/50] batch [230/500] time 0.872 (0.888) data 0.000 (0.003) loss 1.3594 (1.0898) acc 62.5000 (72.7717) lr 1.0628e-03 eta 3:01:38 +epoch [26/50] batch [235/500] time 0.915 (0.888) data 0.000 (0.003) loss 1.4648 (1.0939) acc 53.1250 (72.6862) lr 1.0628e-03 eta 3:01:33 +epoch [26/50] batch [240/500] time 1.017 (0.889) data 0.000 (0.003) loss 1.3379 (1.0982) acc 71.8750 (72.6302) lr 1.0628e-03 eta 3:01:35 +epoch [26/50] batch [245/500] time 0.904 (0.889) data 0.000 (0.003) loss 1.2881 (1.0938) acc 62.5000 (72.6531) lr 1.0628e-03 eta 3:01:31 +epoch [26/50] batch [250/500] time 0.898 (0.889) data 0.000 (0.003) loss 1.5664 (1.0937) acc 53.1250 (72.6000) lr 1.0628e-03 eta 3:01:30 +epoch [26/50] batch [255/500] time 0.901 (0.889) data 0.000 (0.003) loss 1.8125 (1.0990) acc 56.2500 (72.4142) lr 1.0628e-03 eta 3:01:22 +epoch [26/50] batch [260/500] time 0.875 (0.889) data 0.000 (0.003) loss 0.8979 (1.0964) acc 78.1250 (72.4760) lr 1.0628e-03 eta 3:01:19 +epoch [26/50] batch [265/500] time 0.907 (0.889) data 0.000 (0.003) loss 1.3613 (1.0975) acc 62.5000 (72.4528) lr 1.0628e-03 eta 3:01:16 +epoch [26/50] batch [270/500] time 0.866 (0.889) data 0.000 (0.003) loss 0.7705 (1.0931) acc 84.3750 (72.5579) lr 1.0628e-03 eta 3:01:09 +epoch [26/50] batch [275/500] time 0.926 (0.889) data 0.000 (0.003) loss 0.8950 (1.0872) acc 78.1250 (72.6818) lr 1.0628e-03 eta 3:01:04 +epoch [26/50] batch [280/500] time 0.871 (0.888) data 0.000 (0.003) loss 1.0615 (1.0868) acc 68.7500 (72.6339) lr 1.0628e-03 eta 3:00:56 +epoch [26/50] batch [285/500] time 0.905 (0.888) data 0.000 (0.002) loss 0.8970 (1.0860) acc 75.0000 (72.6535) lr 1.0628e-03 eta 3:00:50 +epoch [26/50] batch [290/500] time 0.901 (0.888) data 0.000 (0.002) loss 1.0791 (1.0827) acc 68.7500 (72.6401) lr 1.0628e-03 eta 3:00:43 +epoch [26/50] batch [295/500] time 0.900 (0.888) data 0.000 (0.002) loss 1.0430 (1.0867) acc 81.2500 (72.5953) lr 1.0628e-03 eta 3:00:39 +epoch [26/50] batch [300/500] time 0.895 (0.888) data 0.000 (0.002) loss 0.5991 (1.0800) acc 81.2500 (72.7812) lr 1.0628e-03 eta 3:00:33 +epoch [26/50] batch [305/500] time 0.879 (0.888) data 0.000 (0.002) loss 1.0068 (1.0830) acc 78.1250 (72.6947) lr 1.0628e-03 eta 3:00:28 +epoch [26/50] batch [310/500] time 0.858 (0.888) data 0.000 (0.002) loss 1.5371 (1.0876) acc 62.5000 (72.5504) lr 1.0628e-03 eta 3:00:20 +epoch [26/50] batch [315/500] time 0.872 (0.888) data 0.000 (0.002) loss 0.6279 (1.0867) acc 81.2500 (72.5397) lr 1.0628e-03 eta 3:00:14 +epoch [26/50] batch [320/500] time 0.910 (0.887) data 0.000 (0.002) loss 1.2783 (1.0860) acc 65.6250 (72.4902) lr 1.0628e-03 eta 3:00:09 +epoch [26/50] batch [325/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.2793 (1.0863) acc 68.7500 (72.4712) lr 1.0628e-03 eta 3:00:00 +epoch [26/50] batch [330/500] time 0.868 (0.887) data 0.000 (0.002) loss 1.4072 (1.0931) acc 59.3750 (72.3864) lr 1.0628e-03 eta 2:59:54 +epoch [26/50] batch [335/500] time 0.881 (0.887) data 0.000 (0.002) loss 1.1123 (1.0919) acc 68.7500 (72.4160) lr 1.0628e-03 eta 2:59:48 +epoch [26/50] batch [340/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.6182 (1.0928) acc 65.6250 (72.3989) lr 1.0628e-03 eta 2:59:47 +epoch [26/50] batch [345/500] time 0.900 (0.887) data 0.000 (0.002) loss 0.5361 (1.0908) acc 84.3750 (72.3551) lr 1.0628e-03 eta 2:59:41 +epoch [26/50] batch [350/500] time 0.910 (0.887) data 0.000 (0.002) loss 1.8701 (1.0931) acc 59.3750 (72.3304) lr 1.0628e-03 eta 2:59:38 +epoch [26/50] batch [355/500] time 0.863 (0.887) data 0.000 (0.002) loss 0.9404 (1.0924) acc 75.0000 (72.3327) lr 1.0628e-03 eta 2:59:32 +epoch [26/50] batch [360/500] time 0.918 (0.887) data 0.000 (0.002) loss 1.5303 (1.0961) acc 65.6250 (72.3003) lr 1.0628e-03 eta 2:59:29 +epoch [26/50] batch [365/500] time 0.902 (0.887) data 0.000 (0.002) loss 1.0596 (1.0929) acc 71.8750 (72.3801) lr 1.0628e-03 eta 2:59:24 +epoch [26/50] batch [370/500] time 0.862 (0.887) data 0.000 (0.002) loss 1.0430 (1.0925) acc 75.0000 (72.3395) lr 1.0628e-03 eta 2:59:20 +epoch [26/50] batch [375/500] time 0.917 (0.887) data 0.000 (0.002) loss 1.4902 (1.0909) acc 71.8750 (72.4000) lr 1.0628e-03 eta 2:59:17 +epoch [26/50] batch [380/500] time 0.892 (0.887) data 0.000 (0.002) loss 0.7095 (1.0870) acc 71.8750 (72.4589) lr 1.0628e-03 eta 2:59:16 +epoch [26/50] batch [385/500] time 0.927 (0.888) data 0.000 (0.002) loss 1.0244 (1.0877) acc 71.8750 (72.4026) lr 1.0628e-03 eta 2:59:17 +epoch [26/50] batch [390/500] time 0.881 (0.888) data 0.000 (0.002) loss 1.0723 (1.0891) acc 75.0000 (72.3958) lr 1.0628e-03 eta 2:59:14 +epoch [26/50] batch [395/500] time 0.880 (0.888) data 0.000 (0.002) loss 0.5371 (1.0883) acc 84.3750 (72.4051) lr 1.0628e-03 eta 2:59:09 +epoch [26/50] batch [400/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.4844 (1.0898) acc 78.1250 (72.3828) lr 1.0628e-03 eta 2:59:02 +epoch [26/50] batch [405/500] time 0.911 (0.888) data 0.000 (0.002) loss 1.1426 (1.0899) acc 68.7500 (72.3920) lr 1.0628e-03 eta 2:58:58 +epoch [26/50] batch [410/500] time 0.882 (0.888) data 0.000 (0.002) loss 0.9634 (1.0922) acc 71.8750 (72.3171) lr 1.0628e-03 eta 2:58:53 +epoch [26/50] batch [415/500] time 0.892 (0.888) data 0.000 (0.002) loss 1.3564 (1.0933) acc 78.1250 (72.2967) lr 1.0628e-03 eta 2:58:50 +epoch [26/50] batch [420/500] time 0.894 (0.888) data 0.000 (0.002) loss 0.8389 (1.0947) acc 68.7500 (72.2098) lr 1.0628e-03 eta 2:58:43 +epoch [26/50] batch [425/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.2295 (1.0952) acc 62.5000 (72.1471) lr 1.0628e-03 eta 2:58:38 +epoch [26/50] batch [430/500] time 0.889 (0.888) data 0.000 (0.002) loss 0.7241 (1.0943) acc 84.3750 (72.1512) lr 1.0628e-03 eta 2:58:34 +epoch [26/50] batch [435/500] time 0.893 (0.888) data 0.000 (0.002) loss 1.2275 (1.0961) acc 65.6250 (72.1408) lr 1.0628e-03 eta 2:58:30 +epoch [26/50] batch [440/500] time 0.905 (0.888) data 0.000 (0.002) loss 1.9834 (1.0965) acc 53.1250 (72.1520) lr 1.0628e-03 eta 2:58:27 +epoch [26/50] batch [445/500] time 0.881 (0.888) data 0.000 (0.002) loss 0.9463 (1.0961) acc 78.1250 (72.1559) lr 1.0628e-03 eta 2:58:21 +epoch [26/50] batch [450/500] time 0.857 (0.888) data 0.000 (0.002) loss 1.1074 (1.0954) acc 81.2500 (72.1875) lr 1.0628e-03 eta 2:58:16 +epoch [26/50] batch [455/500] time 0.887 (0.887) data 0.000 (0.002) loss 0.9448 (1.0954) acc 78.1250 (72.2047) lr 1.0628e-03 eta 2:58:09 +epoch [26/50] batch [460/500] time 0.906 (0.888) data 0.000 (0.002) loss 0.5747 (1.0958) acc 81.2500 (72.1671) lr 1.0628e-03 eta 2:58:06 +epoch [26/50] batch [465/500] time 0.869 (0.887) data 0.000 (0.002) loss 0.9546 (1.0970) acc 81.2500 (72.1640) lr 1.0628e-03 eta 2:58:00 +epoch [26/50] batch [470/500] time 0.879 (0.888) data 0.000 (0.002) loss 0.7842 (1.0956) acc 84.3750 (72.2141) lr 1.0628e-03 eta 2:57:57 +epoch [26/50] batch [475/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.9727 (1.0963) acc 75.0000 (72.1776) lr 1.0628e-03 eta 2:57:55 +epoch [26/50] batch [480/500] time 0.882 (0.888) data 0.000 (0.002) loss 1.8896 (1.1011) acc 62.5000 (72.1549) lr 1.0628e-03 eta 2:57:50 +epoch [26/50] batch [485/500] time 0.898 (0.888) data 0.000 (0.002) loss 1.2246 (1.1029) acc 75.0000 (72.1714) lr 1.0628e-03 eta 2:57:49 +epoch [26/50] batch [490/500] time 0.906 (0.888) data 0.000 (0.002) loss 0.9814 (1.1008) acc 71.8750 (72.2066) lr 1.0628e-03 eta 2:57:45 +epoch [26/50] batch [495/500] time 0.895 (0.888) data 0.000 (0.002) loss 1.3330 (1.1017) acc 68.7500 (72.1780) lr 1.0628e-03 eta 2:57:40 +epoch [26/50] batch [500/500] time 0.884 (0.888) data 0.000 (0.002) loss 0.7793 (1.1000) acc 65.6250 (72.1500) lr 1.0000e-03 eta 2:57:36 +epoch [27/50] batch [5/500] time 0.882 (1.011) data 0.000 (0.122) loss 0.6553 (1.1332) acc 84.3750 (71.8750) lr 1.0000e-03 eta 3:22:06 +epoch [27/50] batch [10/500] time 0.848 (0.940) data 0.000 (0.061) loss 0.8940 (1.0958) acc 78.1250 (73.4375) lr 1.0000e-03 eta 3:07:48 +epoch [27/50] batch [15/500] time 0.840 (0.914) data 0.000 (0.041) loss 0.7007 (1.1246) acc 84.3750 (72.5000) lr 1.0000e-03 eta 3:02:35 +epoch [27/50] batch [20/500] time 0.865 (0.907) data 0.000 (0.031) loss 0.4810 (1.0975) acc 87.5000 (73.5938) lr 1.0000e-03 eta 3:01:06 +epoch [27/50] batch [25/500] time 0.870 (0.901) data 0.000 (0.025) loss 1.3857 (1.1771) acc 71.8750 (71.8750) lr 1.0000e-03 eta 2:59:53 +epoch [27/50] batch [30/500] time 0.891 (0.900) data 0.000 (0.021) loss 1.3594 (1.1795) acc 65.6250 (72.0833) lr 1.0000e-03 eta 2:59:33 +epoch [27/50] batch [35/500] time 0.884 (0.903) data 0.000 (0.018) loss 0.9473 (1.1468) acc 75.0000 (72.5000) lr 1.0000e-03 eta 3:00:03 +epoch [27/50] batch [40/500] time 0.898 (0.902) data 0.000 (0.015) loss 1.1221 (1.1242) acc 78.1250 (73.0469) lr 1.0000e-03 eta 2:59:51 +epoch [27/50] batch [45/500] time 0.860 (0.902) data 0.000 (0.014) loss 1.0596 (1.1129) acc 78.1250 (73.4722) lr 1.0000e-03 eta 2:59:43 +epoch [27/50] batch [50/500] time 0.911 (0.901) data 0.000 (0.012) loss 1.0273 (1.1063) acc 78.1250 (73.5000) lr 1.0000e-03 eta 2:59:23 +epoch [27/50] batch [55/500] time 0.881 (0.900) data 0.000 (0.011) loss 0.4485 (1.0754) acc 87.5000 (73.8636) lr 1.0000e-03 eta 2:59:11 +epoch [27/50] batch [60/500] time 0.909 (0.899) data 0.000 (0.010) loss 1.2559 (1.0960) acc 75.0000 (73.2812) lr 1.0000e-03 eta 2:58:56 +epoch [27/50] batch [65/500] time 0.857 (0.897) data 0.000 (0.010) loss 1.1475 (1.0865) acc 78.1250 (73.7019) lr 1.0000e-03 eta 2:58:22 +epoch [27/50] batch [70/500] time 0.883 (0.896) data 0.000 (0.009) loss 1.2754 (1.0991) acc 71.8750 (73.1250) lr 1.0000e-03 eta 2:58:06 +epoch [27/50] batch [75/500] time 0.904 (0.896) data 0.000 (0.008) loss 0.4751 (1.0953) acc 81.2500 (73.0833) lr 1.0000e-03 eta 2:58:03 +epoch [27/50] batch [80/500] time 0.876 (0.897) data 0.000 (0.008) loss 1.2363 (1.0842) acc 62.5000 (73.1250) lr 1.0000e-03 eta 2:58:15 +epoch [27/50] batch [85/500] time 0.878 (0.897) data 0.000 (0.007) loss 1.0938 (1.0782) acc 78.1250 (73.1250) lr 1.0000e-03 eta 2:58:06 +epoch [27/50] batch [90/500] time 0.890 (0.896) data 0.000 (0.007) loss 0.8564 (1.0836) acc 75.0000 (73.2292) lr 1.0000e-03 eta 2:57:53 +epoch [27/50] batch [95/500] time 0.887 (0.896) data 0.000 (0.007) loss 1.1465 (1.0855) acc 71.8750 (73.1579) lr 1.0000e-03 eta 2:57:41 +epoch [27/50] batch [100/500] time 0.882 (0.895) data 0.000 (0.006) loss 0.8125 (1.1007) acc 71.8750 (72.8438) lr 1.0000e-03 eta 2:57:28 +epoch [27/50] batch [105/500] time 0.884 (0.894) data 0.000 (0.006) loss 1.1865 (1.1013) acc 71.8750 (72.8869) lr 1.0000e-03 eta 2:57:15 +epoch [27/50] batch [110/500] time 0.924 (0.894) data 0.000 (0.006) loss 0.5864 (1.0975) acc 87.5000 (73.0966) lr 1.0000e-03 eta 2:57:07 +epoch [27/50] batch [115/500] time 0.894 (0.893) data 0.000 (0.006) loss 1.0674 (1.0905) acc 75.0000 (73.2609) lr 1.0000e-03 eta 2:56:56 +epoch [27/50] batch [120/500] time 0.862 (0.892) data 0.000 (0.005) loss 0.9365 (1.0822) acc 71.8750 (73.3854) lr 1.0000e-03 eta 2:56:42 +epoch [27/50] batch [125/500] time 0.917 (0.893) data 0.000 (0.005) loss 1.8564 (1.0866) acc 56.2500 (73.2000) lr 1.0000e-03 eta 2:56:39 +epoch [27/50] batch [130/500] time 0.887 (0.892) data 0.000 (0.005) loss 0.5049 (1.0840) acc 87.5000 (73.2692) lr 1.0000e-03 eta 2:56:29 +epoch [27/50] batch [135/500] time 0.897 (0.892) data 0.000 (0.005) loss 0.8916 (1.0809) acc 78.1250 (73.2870) lr 1.0000e-03 eta 2:56:19 +epoch [27/50] batch [140/500] time 0.888 (0.891) data 0.000 (0.005) loss 1.2275 (1.0840) acc 59.3750 (73.1473) lr 1.0000e-03 eta 2:56:08 +epoch [27/50] batch [145/500] time 0.862 (0.891) data 0.000 (0.004) loss 0.5474 (1.0796) acc 81.2500 (73.2328) lr 1.0000e-03 eta 2:56:02 +epoch [27/50] batch [150/500] time 0.960 (0.891) data 0.000 (0.004) loss 1.2266 (1.0840) acc 71.8750 (73.1042) lr 1.0000e-03 eta 2:56:03 +epoch [27/50] batch [155/500] time 0.903 (0.892) data 0.000 (0.004) loss 0.5708 (1.0867) acc 84.3750 (72.9435) lr 1.0000e-03 eta 2:56:02 +epoch [27/50] batch [160/500] time 0.905 (0.892) data 0.000 (0.004) loss 1.4502 (1.0926) acc 71.8750 (72.8906) lr 1.0000e-03 eta 2:55:58 +epoch [27/50] batch [165/500] time 0.847 (0.891) data 0.000 (0.004) loss 1.2754 (1.0982) acc 71.8750 (72.7462) lr 1.0000e-03 eta 2:55:50 +epoch [27/50] batch [170/500] time 0.912 (0.891) data 0.000 (0.004) loss 0.5728 (1.0928) acc 84.3750 (72.7574) lr 1.0000e-03 eta 2:55:45 +epoch [27/50] batch [175/500] time 0.882 (0.891) data 0.000 (0.004) loss 1.2773 (1.0949) acc 56.2500 (72.6786) lr 1.0000e-03 eta 2:55:38 +epoch [27/50] batch [180/500] time 0.873 (0.892) data 0.000 (0.004) loss 0.9424 (1.0940) acc 68.7500 (72.6736) lr 1.0000e-03 eta 2:55:42 +epoch [27/50] batch [185/500] time 0.897 (0.892) data 0.000 (0.004) loss 1.3867 (1.1020) acc 68.7500 (72.5000) lr 1.0000e-03 eta 2:55:42 +epoch [27/50] batch [190/500] time 0.896 (0.893) data 0.000 (0.003) loss 1.0645 (1.0996) acc 68.7500 (72.6151) lr 1.0000e-03 eta 2:55:40 +epoch [27/50] batch [195/500] time 0.895 (0.893) data 0.000 (0.003) loss 1.3184 (1.1036) acc 68.7500 (72.6122) lr 1.0000e-03 eta 2:55:37 +epoch [27/50] batch [200/500] time 0.893 (0.893) data 0.000 (0.003) loss 1.3838 (1.1021) acc 68.7500 (72.6406) lr 1.0000e-03 eta 2:55:32 +epoch [27/50] batch [205/500] time 0.908 (0.893) data 0.000 (0.003) loss 1.4941 (1.1038) acc 65.6250 (72.6067) lr 1.0000e-03 eta 2:55:29 +epoch [27/50] batch [210/500] time 0.893 (0.893) data 0.000 (0.003) loss 0.6670 (1.1003) acc 81.2500 (72.6786) lr 1.0000e-03 eta 2:55:24 +epoch [27/50] batch [215/500] time 0.887 (0.892) data 0.000 (0.003) loss 1.0850 (1.1011) acc 75.0000 (72.7035) lr 1.0000e-03 eta 2:55:16 +epoch [27/50] batch [220/500] time 0.891 (0.892) data 0.000 (0.003) loss 1.4346 (1.0987) acc 68.7500 (72.7415) lr 1.0000e-03 eta 2:55:11 +epoch [27/50] batch [225/500] time 0.886 (0.893) data 0.000 (0.003) loss 0.8770 (1.1008) acc 81.2500 (72.6944) lr 1.0000e-03 eta 2:55:10 +epoch [27/50] batch [230/500] time 0.861 (0.892) data 0.000 (0.003) loss 0.8735 (1.1029) acc 78.1250 (72.7038) lr 1.0000e-03 eta 2:55:02 +epoch [27/50] batch [235/500] time 0.895 (0.892) data 0.000 (0.003) loss 0.7568 (1.1069) acc 84.3750 (72.6596) lr 1.0000e-03 eta 2:54:55 +epoch [27/50] batch [240/500] time 0.883 (0.892) data 0.000 (0.003) loss 1.0156 (1.1029) acc 71.8750 (72.7083) lr 1.0000e-03 eta 2:54:47 +epoch [27/50] batch [245/500] time 0.919 (0.892) data 0.000 (0.003) loss 1.0752 (1.0992) acc 75.0000 (72.7551) lr 1.0000e-03 eta 2:54:45 +epoch [27/50] batch [250/500] time 0.904 (0.892) data 0.000 (0.003) loss 1.2139 (1.0979) acc 68.7500 (72.7625) lr 1.0000e-03 eta 2:54:40 +epoch [27/50] batch [255/500] time 0.891 (0.892) data 0.000 (0.003) loss 1.0205 (1.0972) acc 81.2500 (72.8309) lr 1.0000e-03 eta 2:54:35 +epoch [27/50] batch [260/500] time 0.873 (0.892) data 0.000 (0.003) loss 1.1797 (1.0980) acc 71.8750 (72.8005) lr 1.0000e-03 eta 2:54:26 +epoch [27/50] batch [265/500] time 0.865 (0.891) data 0.000 (0.003) loss 1.6035 (1.0979) acc 59.3750 (72.7830) lr 1.0000e-03 eta 2:54:20 +epoch [27/50] batch [270/500] time 0.916 (0.891) data 0.000 (0.002) loss 1.0195 (1.0975) acc 65.6250 (72.6968) lr 1.0000e-03 eta 2:54:17 +epoch [27/50] batch [275/500] time 0.880 (0.891) data 0.000 (0.002) loss 0.6523 (1.0994) acc 84.3750 (72.6591) lr 1.0000e-03 eta 2:54:12 +epoch [27/50] batch [280/500] time 0.884 (0.891) data 0.000 (0.002) loss 1.3828 (1.1014) acc 71.8750 (72.6451) lr 1.0000e-03 eta 2:54:04 +epoch [27/50] batch [285/500] time 0.865 (0.891) data 0.000 (0.002) loss 0.9609 (1.0982) acc 75.0000 (72.7412) lr 1.0000e-03 eta 2:54:02 +epoch [27/50] batch [290/500] time 0.882 (0.891) data 0.000 (0.002) loss 1.2305 (1.1027) acc 71.8750 (72.6616) lr 1.0000e-03 eta 2:53:59 +epoch [27/50] batch [295/500] time 0.907 (0.892) data 0.000 (0.002) loss 0.8794 (1.1040) acc 75.0000 (72.6695) lr 1.0000e-03 eta 2:53:55 +epoch [27/50] batch [300/500] time 0.865 (0.891) data 0.000 (0.002) loss 1.6270 (1.1036) acc 71.8750 (72.6875) lr 1.0000e-03 eta 2:53:49 +epoch [27/50] batch [305/500] time 0.888 (0.891) data 0.000 (0.002) loss 0.9917 (1.1040) acc 81.2500 (72.7049) lr 1.0000e-03 eta 2:53:42 +epoch [27/50] batch [310/500] time 0.881 (0.891) data 0.000 (0.002) loss 0.9424 (1.1055) acc 78.1250 (72.6915) lr 1.0000e-03 eta 2:53:35 +epoch [27/50] batch [315/500] time 0.883 (0.891) data 0.000 (0.002) loss 1.4346 (1.1049) acc 59.3750 (72.6488) lr 1.0000e-03 eta 2:53:32 +epoch [27/50] batch [320/500] time 1.011 (0.891) data 0.000 (0.002) loss 0.6382 (1.1037) acc 81.2500 (72.6855) lr 1.0000e-03 eta 2:53:31 +epoch [27/50] batch [325/500] time 0.910 (0.891) data 0.000 (0.002) loss 0.9092 (1.1080) acc 78.1250 (72.6250) lr 1.0000e-03 eta 2:53:25 +epoch [27/50] batch [330/500] time 0.877 (0.891) data 0.000 (0.002) loss 0.6865 (1.1083) acc 81.2500 (72.5947) lr 1.0000e-03 eta 2:53:18 +epoch [27/50] batch [335/500] time 0.892 (0.891) data 0.000 (0.002) loss 1.1621 (1.1090) acc 65.6250 (72.5280) lr 1.0000e-03 eta 2:53:15 +epoch [27/50] batch [340/500] time 0.850 (0.891) data 0.000 (0.002) loss 1.3320 (1.1095) acc 71.8750 (72.4908) lr 1.0000e-03 eta 2:53:06 +epoch [27/50] batch [345/500] time 0.876 (0.891) data 0.000 (0.002) loss 1.0127 (1.1059) acc 65.6250 (72.5634) lr 1.0000e-03 eta 2:53:00 +epoch [27/50] batch [350/500] time 0.870 (0.891) data 0.000 (0.002) loss 1.7900 (1.1069) acc 53.1250 (72.5179) lr 1.0000e-03 eta 2:52:54 +epoch [27/50] batch [355/500] time 0.882 (0.890) data 0.000 (0.002) loss 0.7256 (1.1111) acc 84.3750 (72.5000) lr 1.0000e-03 eta 2:52:48 +epoch [27/50] batch [360/500] time 0.907 (0.890) data 0.000 (0.002) loss 1.5332 (1.1135) acc 68.7500 (72.4826) lr 1.0000e-03 eta 2:52:42 +epoch [27/50] batch [365/500] time 0.870 (0.890) data 0.000 (0.002) loss 1.1787 (1.1137) acc 71.8750 (72.4914) lr 1.0000e-03 eta 2:52:40 +epoch [27/50] batch [370/500] time 0.875 (0.890) data 0.000 (0.002) loss 1.0557 (1.1118) acc 75.0000 (72.5676) lr 1.0000e-03 eta 2:52:34 +epoch [27/50] batch [375/500] time 0.901 (0.890) data 0.000 (0.002) loss 1.1621 (1.1131) acc 68.7500 (72.4917) lr 1.0000e-03 eta 2:52:28 +epoch [27/50] batch [380/500] time 0.903 (0.890) data 0.000 (0.002) loss 1.1016 (1.1126) acc 78.1250 (72.5000) lr 1.0000e-03 eta 2:52:23 +epoch [27/50] batch [385/500] time 0.905 (0.890) data 0.000 (0.002) loss 0.9170 (1.1131) acc 75.0000 (72.4513) lr 1.0000e-03 eta 2:52:19 +epoch [27/50] batch [390/500] time 0.888 (0.890) data 0.000 (0.002) loss 0.8877 (1.1123) acc 78.1250 (72.4599) lr 1.0000e-03 eta 2:52:15 +epoch [27/50] batch [395/500] time 0.879 (0.890) data 0.000 (0.002) loss 0.6587 (1.1095) acc 78.1250 (72.5237) lr 1.0000e-03 eta 2:52:09 +epoch [27/50] batch [400/500] time 0.852 (0.890) data 0.000 (0.002) loss 0.9614 (1.1094) acc 71.8750 (72.5625) lr 1.0000e-03 eta 2:52:02 +epoch [27/50] batch [405/500] time 0.889 (0.890) data 0.000 (0.002) loss 1.3115 (1.1086) acc 59.3750 (72.5617) lr 1.0000e-03 eta 2:51:55 +epoch [27/50] batch [410/500] time 0.885 (0.890) data 0.000 (0.002) loss 1.4609 (1.1079) acc 59.3750 (72.5610) lr 1.0000e-03 eta 2:51:52 +epoch [27/50] batch [415/500] time 0.864 (0.889) data 0.000 (0.002) loss 1.0918 (1.1101) acc 68.7500 (72.4849) lr 1.0000e-03 eta 2:51:44 +epoch [27/50] batch [420/500] time 0.862 (0.889) data 0.000 (0.002) loss 0.9624 (1.1117) acc 75.0000 (72.4702) lr 1.0000e-03 eta 2:51:38 +epoch [27/50] batch [425/500] time 0.879 (0.889) data 0.000 (0.002) loss 1.4756 (1.1133) acc 68.7500 (72.4632) lr 1.0000e-03 eta 2:51:33 +epoch [27/50] batch [430/500] time 0.856 (0.889) data 0.000 (0.002) loss 1.3223 (1.1119) acc 68.7500 (72.4855) lr 1.0000e-03 eta 2:51:29 +epoch [27/50] batch [435/500] time 0.905 (0.889) data 0.000 (0.002) loss 1.5410 (1.1140) acc 68.7500 (72.4641) lr 1.0000e-03 eta 2:51:26 +epoch [27/50] batch [440/500] time 0.896 (0.889) data 0.000 (0.002) loss 0.7261 (1.1142) acc 84.3750 (72.4645) lr 1.0000e-03 eta 2:51:21 +epoch [27/50] batch [445/500] time 0.883 (0.889) data 0.000 (0.002) loss 0.8218 (1.1159) acc 78.1250 (72.4228) lr 1.0000e-03 eta 2:51:15 +epoch [27/50] batch [450/500] time 0.894 (0.889) data 0.000 (0.002) loss 1.1846 (1.1167) acc 65.6250 (72.3681) lr 1.0000e-03 eta 2:51:09 +epoch [27/50] batch [455/500] time 0.876 (0.889) data 0.000 (0.002) loss 0.8857 (1.1178) acc 71.8750 (72.3146) lr 1.0000e-03 eta 2:51:04 +epoch [27/50] batch [460/500] time 0.873 (0.889) data 0.000 (0.002) loss 1.3594 (1.1158) acc 62.5000 (72.3505) lr 1.0000e-03 eta 2:50:59 +epoch [27/50] batch [465/500] time 0.872 (0.889) data 0.000 (0.002) loss 1.1221 (1.1182) acc 62.5000 (72.3051) lr 1.0000e-03 eta 2:50:55 +epoch [27/50] batch [470/500] time 0.884 (0.889) data 0.000 (0.002) loss 1.1016 (1.1178) acc 65.6250 (72.2540) lr 1.0000e-03 eta 2:50:52 +epoch [27/50] batch [475/500] time 0.873 (0.889) data 0.000 (0.002) loss 0.9282 (1.1181) acc 75.0000 (72.2368) lr 1.0000e-03 eta 2:50:46 +epoch [27/50] batch [480/500] time 0.896 (0.889) data 0.000 (0.002) loss 0.6621 (1.1166) acc 78.1250 (72.2526) lr 1.0000e-03 eta 2:50:40 +epoch [27/50] batch [485/500] time 0.881 (0.889) data 0.000 (0.001) loss 0.8433 (1.1157) acc 81.2500 (72.2616) lr 1.0000e-03 eta 2:50:35 +epoch [27/50] batch [490/500] time 0.877 (0.889) data 0.000 (0.001) loss 0.8506 (1.1171) acc 87.5000 (72.2258) lr 1.0000e-03 eta 2:50:30 +epoch [27/50] batch [495/500] time 0.878 (0.889) data 0.000 (0.001) loss 1.3320 (1.1202) acc 68.7500 (72.1528) lr 1.0000e-03 eta 2:50:24 +epoch [27/50] batch [500/500] time 0.897 (0.889) data 0.000 (0.001) loss 1.4424 (1.1205) acc 68.7500 (72.1437) lr 9.3721e-04 eta 2:50:21 +epoch [28/50] batch [5/500] time 0.865 (1.033) data 0.000 (0.139) loss 0.9375 (0.8666) acc 75.0000 (79.3750) lr 9.3721e-04 eta 3:17:50 +epoch [28/50] batch [10/500] time 0.999 (0.970) data 0.000 (0.070) loss 1.5381 (0.9676) acc 68.7500 (76.8750) lr 9.3721e-04 eta 3:05:44 +epoch [28/50] batch [15/500] time 0.914 (0.940) data 0.000 (0.047) loss 1.1963 (0.9958) acc 68.7500 (76.0417) lr 9.3721e-04 eta 2:59:58 +epoch [28/50] batch [20/500] time 0.908 (0.928) data 0.000 (0.035) loss 0.9331 (0.9806) acc 75.0000 (76.0938) lr 9.3721e-04 eta 2:57:32 +epoch [28/50] batch [25/500] time 0.879 (0.919) data 0.000 (0.028) loss 1.2725 (0.9874) acc 65.6250 (75.6250) lr 9.3721e-04 eta 2:55:43 +epoch [28/50] batch [30/500] time 0.876 (0.913) data 0.000 (0.023) loss 0.5166 (0.9925) acc 90.6250 (74.8958) lr 9.3721e-04 eta 2:54:32 +epoch [28/50] batch [35/500] time 0.904 (0.908) data 0.000 (0.020) loss 0.5059 (0.9902) acc 81.2500 (74.9107) lr 9.3721e-04 eta 2:53:25 +epoch [28/50] batch [40/500] time 0.909 (0.907) data 0.000 (0.018) loss 0.7417 (1.0055) acc 81.2500 (74.7656) lr 9.3721e-04 eta 2:53:17 +epoch [28/50] batch [45/500] time 0.883 (0.905) data 0.000 (0.016) loss 1.7725 (1.0329) acc 65.6250 (74.3056) lr 9.3721e-04 eta 2:52:47 +epoch [28/50] batch [50/500] time 0.905 (0.904) data 0.000 (0.014) loss 0.8984 (1.0219) acc 68.7500 (74.5000) lr 9.3721e-04 eta 2:52:25 +epoch [28/50] batch [55/500] time 0.848 (0.904) data 0.000 (0.013) loss 1.1182 (1.0160) acc 71.8750 (74.7159) lr 9.3721e-04 eta 2:52:25 +epoch [28/50] batch [60/500] time 0.871 (0.901) data 0.000 (0.012) loss 0.4734 (1.0111) acc 81.2500 (74.8438) lr 9.3721e-04 eta 2:51:50 +epoch [28/50] batch [65/500] time 0.906 (0.900) data 0.000 (0.011) loss 0.7583 (1.0119) acc 75.0000 (74.6154) lr 9.3721e-04 eta 2:51:35 +epoch [28/50] batch [70/500] time 0.890 (0.899) data 0.000 (0.010) loss 0.9658 (1.0136) acc 75.0000 (74.5536) lr 9.3721e-04 eta 2:51:15 +epoch [28/50] batch [75/500] time 0.904 (0.898) data 0.000 (0.009) loss 1.0557 (1.0114) acc 71.8750 (74.6667) lr 9.3721e-04 eta 2:50:59 +epoch [28/50] batch [80/500] time 0.884 (0.896) data 0.000 (0.009) loss 0.9087 (1.0160) acc 78.1250 (74.3750) lr 9.3721e-04 eta 2:50:37 +epoch [28/50] batch [85/500] time 0.884 (0.896) data 0.000 (0.008) loss 1.0000 (1.0290) acc 75.0000 (74.0074) lr 9.3721e-04 eta 2:50:27 +epoch [28/50] batch [90/500] time 0.896 (0.896) data 0.000 (0.008) loss 1.0449 (1.0278) acc 68.7500 (74.0625) lr 9.3721e-04 eta 2:50:22 +epoch [28/50] batch [95/500] time 0.900 (0.895) data 0.000 (0.008) loss 1.1748 (1.0311) acc 75.0000 (73.9145) lr 9.3721e-04 eta 2:50:10 +epoch [28/50] batch [100/500] time 0.892 (0.894) data 0.000 (0.007) loss 0.7192 (1.0375) acc 78.1250 (73.8125) lr 9.3721e-04 eta 2:49:54 +epoch [28/50] batch [105/500] time 0.908 (0.894) data 0.000 (0.007) loss 0.8486 (1.0342) acc 71.8750 (73.6905) lr 9.3721e-04 eta 2:49:49 +epoch [28/50] batch [110/500] time 0.868 (0.894) data 0.000 (0.007) loss 1.5039 (1.0404) acc 62.5000 (73.5795) lr 9.3721e-04 eta 2:49:39 +epoch [28/50] batch [115/500] time 0.891 (0.894) data 0.000 (0.006) loss 1.4150 (1.0419) acc 62.5000 (73.5326) lr 9.3721e-04 eta 2:49:33 +epoch [28/50] batch [120/500] time 0.862 (0.893) data 0.000 (0.006) loss 0.9487 (1.0452) acc 62.5000 (73.3594) lr 9.3721e-04 eta 2:49:25 +epoch [28/50] batch [125/500] time 0.896 (0.894) data 0.000 (0.006) loss 1.0420 (1.0469) acc 71.8750 (73.4000) lr 9.3721e-04 eta 2:49:23 +epoch [28/50] batch [130/500] time 0.859 (0.893) data 0.000 (0.006) loss 1.2705 (1.0510) acc 71.8750 (73.2933) lr 9.3721e-04 eta 2:49:10 +epoch [28/50] batch [135/500] time 0.900 (0.893) data 0.000 (0.005) loss 1.1504 (1.0548) acc 78.1250 (73.2407) lr 9.3721e-04 eta 2:49:10 +epoch [28/50] batch [140/500] time 0.891 (0.893) data 0.000 (0.005) loss 1.3906 (1.0595) acc 65.6250 (73.2143) lr 9.3721e-04 eta 2:49:05 +epoch [28/50] batch [145/500] time 0.873 (0.893) data 0.000 (0.005) loss 1.6865 (1.0627) acc 59.3750 (73.1466) lr 9.3721e-04 eta 2:49:00 +epoch [28/50] batch [150/500] time 0.987 (0.894) data 0.000 (0.005) loss 1.1523 (1.0709) acc 65.6250 (73.1042) lr 9.3721e-04 eta 2:49:02 +epoch [28/50] batch [155/500] time 0.897 (0.894) data 0.000 (0.005) loss 1.2285 (1.0782) acc 65.6250 (72.9234) lr 9.3721e-04 eta 2:49:01 +epoch [28/50] batch [160/500] time 0.865 (0.894) data 0.000 (0.005) loss 0.7529 (1.0781) acc 71.8750 (72.8125) lr 9.3721e-04 eta 2:48:57 +epoch [28/50] batch [165/500] time 0.895 (0.894) data 0.000 (0.004) loss 1.3086 (1.0838) acc 62.5000 (72.6705) lr 9.3721e-04 eta 2:48:51 +epoch [28/50] batch [170/500] time 0.888 (0.894) data 0.000 (0.004) loss 0.9727 (1.0825) acc 75.0000 (72.6471) lr 9.3721e-04 eta 2:48:47 +epoch [28/50] batch [175/500] time 0.892 (0.894) data 0.000 (0.004) loss 0.8960 (1.0785) acc 75.0000 (72.7321) lr 9.3721e-04 eta 2:48:42 +epoch [28/50] batch [180/500] time 0.891 (0.894) data 0.000 (0.004) loss 1.0068 (1.0763) acc 71.8750 (72.7431) lr 9.3721e-04 eta 2:48:35 +epoch [28/50] batch [185/500] time 0.897 (0.893) data 0.000 (0.004) loss 0.8945 (1.0765) acc 78.1250 (72.8716) lr 9.3721e-04 eta 2:48:28 +epoch [28/50] batch [190/500] time 0.899 (0.893) data 0.000 (0.004) loss 1.0830 (1.0842) acc 71.8750 (72.6151) lr 9.3721e-04 eta 2:48:21 +epoch [28/50] batch [195/500] time 0.876 (0.894) data 0.000 (0.004) loss 0.9395 (1.0844) acc 68.7500 (72.5000) lr 9.3721e-04 eta 2:48:22 +epoch [28/50] batch [200/500] time 0.872 (0.893) data 0.000 (0.004) loss 0.6802 (1.0820) acc 78.1250 (72.5625) lr 9.3721e-04 eta 2:48:14 +epoch [28/50] batch [205/500] time 0.875 (0.893) data 0.000 (0.004) loss 1.0771 (1.0792) acc 68.7500 (72.5457) lr 9.3721e-04 eta 2:48:07 +epoch [28/50] batch [210/500] time 0.888 (0.893) data 0.000 (0.004) loss 1.5527 (1.0775) acc 56.2500 (72.5000) lr 9.3721e-04 eta 2:48:01 +epoch [28/50] batch [215/500] time 0.903 (0.893) data 0.000 (0.003) loss 0.8442 (1.0827) acc 75.0000 (72.4273) lr 9.3721e-04 eta 2:47:58 +epoch [28/50] batch [220/500] time 0.870 (0.893) data 0.000 (0.003) loss 1.0244 (1.0821) acc 75.0000 (72.4858) lr 9.3721e-04 eta 2:47:51 +epoch [28/50] batch [225/500] time 0.913 (0.892) data 0.000 (0.003) loss 0.7554 (1.0775) acc 81.2500 (72.5694) lr 9.3721e-04 eta 2:47:42 +epoch [28/50] batch [230/500] time 0.868 (0.892) data 0.000 (0.003) loss 1.3701 (1.0781) acc 56.2500 (72.5272) lr 9.3721e-04 eta 2:47:34 +epoch [28/50] batch [235/500] time 0.873 (0.892) data 0.000 (0.003) loss 0.5840 (1.0835) acc 84.3750 (72.3803) lr 9.3721e-04 eta 2:47:28 +epoch [28/50] batch [240/500] time 0.884 (0.892) data 0.000 (0.003) loss 1.5059 (1.0831) acc 68.7500 (72.3828) lr 9.3721e-04 eta 2:47:24 +epoch [28/50] batch [245/500] time 0.911 (0.892) data 0.000 (0.003) loss 1.2881 (1.0817) acc 65.6250 (72.4362) lr 9.3721e-04 eta 2:47:19 +epoch [28/50] batch [250/500] time 0.866 (0.892) data 0.000 (0.003) loss 1.1572 (1.0777) acc 71.8750 (72.4750) lr 9.3721e-04 eta 2:47:12 +epoch [28/50] batch [255/500] time 0.855 (0.891) data 0.000 (0.003) loss 1.2910 (1.0796) acc 78.1250 (72.5245) lr 9.3721e-04 eta 2:47:03 +epoch [28/50] batch [260/500] time 0.886 (0.891) data 0.000 (0.003) loss 1.4980 (1.0808) acc 65.6250 (72.5240) lr 9.3721e-04 eta 2:46:58 +epoch [28/50] batch [265/500] time 0.853 (0.891) data 0.000 (0.003) loss 0.9287 (1.0777) acc 71.8750 (72.5590) lr 9.3721e-04 eta 2:46:52 +epoch [28/50] batch [270/500] time 0.907 (0.891) data 0.000 (0.003) loss 1.0986 (1.0790) acc 68.7500 (72.4884) lr 9.3721e-04 eta 2:46:49 +epoch [28/50] batch [275/500] time 0.889 (0.891) data 0.000 (0.003) loss 1.0674 (1.0805) acc 81.2500 (72.5000) lr 9.3721e-04 eta 2:46:42 +epoch [28/50] batch [280/500] time 0.900 (0.891) data 0.000 (0.003) loss 0.7627 (1.0818) acc 81.2500 (72.5223) lr 9.3721e-04 eta 2:46:38 +epoch [28/50] batch [285/500] time 0.873 (0.891) data 0.000 (0.003) loss 0.7178 (1.0808) acc 78.1250 (72.5110) lr 9.3721e-04 eta 2:46:31 +epoch [28/50] batch [290/500] time 0.894 (0.891) data 0.000 (0.003) loss 0.7329 (1.0797) acc 75.0000 (72.4892) lr 9.3721e-04 eta 2:46:27 +epoch [28/50] batch [295/500] time 0.843 (0.891) data 0.000 (0.003) loss 0.9248 (1.0778) acc 68.7500 (72.4894) lr 9.3721e-04 eta 2:46:25 +epoch [28/50] batch [300/500] time 0.916 (0.891) data 0.000 (0.003) loss 1.0391 (1.0758) acc 75.0000 (72.5938) lr 9.3721e-04 eta 2:46:18 +epoch [28/50] batch [305/500] time 0.881 (0.891) data 0.000 (0.002) loss 1.4619 (1.0804) acc 53.1250 (72.4898) lr 9.3721e-04 eta 2:46:12 +epoch [28/50] batch [310/500] time 0.878 (0.891) data 0.000 (0.002) loss 1.2930 (1.0795) acc 78.1250 (72.5504) lr 9.3721e-04 eta 2:46:06 +epoch [28/50] batch [315/500] time 0.919 (0.891) data 0.000 (0.002) loss 1.2715 (1.0805) acc 59.3750 (72.5496) lr 9.3721e-04 eta 2:46:03 +epoch [28/50] batch [320/500] time 0.911 (0.891) data 0.000 (0.002) loss 0.9375 (1.0809) acc 75.0000 (72.5684) lr 9.3721e-04 eta 2:45:56 +epoch [28/50] batch [325/500] time 0.854 (0.890) data 0.000 (0.002) loss 1.2695 (1.0810) acc 68.7500 (72.5962) lr 9.3721e-04 eta 2:45:48 +epoch [28/50] batch [330/500] time 0.863 (0.890) data 0.000 (0.002) loss 1.0977 (1.0812) acc 68.7500 (72.6042) lr 9.3721e-04 eta 2:45:41 +epoch [28/50] batch [335/500] time 0.862 (0.890) data 0.000 (0.002) loss 1.2666 (1.0836) acc 68.7500 (72.5187) lr 9.3721e-04 eta 2:45:36 +epoch [28/50] batch [340/500] time 0.877 (0.890) data 0.000 (0.002) loss 1.2627 (1.0874) acc 62.5000 (72.4357) lr 9.3721e-04 eta 2:45:34 +epoch [28/50] batch [345/500] time 0.864 (0.890) data 0.000 (0.002) loss 1.2080 (1.0871) acc 53.1250 (72.3822) lr 9.3721e-04 eta 2:45:27 +epoch [28/50] batch [350/500] time 0.883 (0.890) data 0.000 (0.002) loss 1.1904 (1.0894) acc 71.8750 (72.3482) lr 9.3721e-04 eta 2:45:22 +epoch [28/50] batch [355/500] time 0.880 (0.890) data 0.000 (0.002) loss 1.9043 (1.0921) acc 53.1250 (72.3063) lr 9.3721e-04 eta 2:45:16 +epoch [28/50] batch [360/500] time 0.899 (0.890) data 0.000 (0.002) loss 0.7554 (1.0884) acc 84.3750 (72.4392) lr 9.3721e-04 eta 2:45:11 +epoch [28/50] batch [365/500] time 0.871 (0.890) data 0.000 (0.002) loss 0.8867 (1.0880) acc 81.2500 (72.4743) lr 9.3721e-04 eta 2:45:04 +epoch [28/50] batch [370/500] time 0.877 (0.889) data 0.000 (0.002) loss 1.4082 (1.0894) acc 65.6250 (72.4071) lr 9.3721e-04 eta 2:44:59 +epoch [28/50] batch [375/500] time 0.867 (0.889) data 0.000 (0.002) loss 1.2783 (1.0900) acc 68.7500 (72.3667) lr 9.3721e-04 eta 2:44:55 +epoch [28/50] batch [380/500] time 0.871 (0.890) data 0.000 (0.002) loss 1.0098 (1.0896) acc 75.0000 (72.3355) lr 9.3721e-04 eta 2:44:52 +epoch [28/50] batch [385/500] time 0.900 (0.890) data 0.000 (0.002) loss 0.8838 (1.0898) acc 78.1250 (72.3052) lr 9.3721e-04 eta 2:44:49 +epoch [28/50] batch [390/500] time 0.887 (0.890) data 0.000 (0.002) loss 1.0186 (1.0890) acc 84.3750 (72.3558) lr 9.3721e-04 eta 2:44:48 +epoch [28/50] batch [395/500] time 0.920 (0.890) data 0.000 (0.002) loss 0.9297 (1.0876) acc 81.2500 (72.4130) lr 9.3721e-04 eta 2:44:45 +epoch [28/50] batch [400/500] time 0.903 (0.890) data 0.000 (0.002) loss 0.4531 (1.0859) acc 93.7500 (72.4844) lr 9.3721e-04 eta 2:44:40 +epoch [28/50] batch [405/500] time 0.910 (0.890) data 0.000 (0.002) loss 1.0068 (1.0863) acc 68.7500 (72.4769) lr 9.3721e-04 eta 2:44:37 +epoch [28/50] batch [410/500] time 0.875 (0.890) data 0.000 (0.002) loss 0.8159 (1.0860) acc 75.0000 (72.4771) lr 9.3721e-04 eta 2:44:33 +epoch [28/50] batch [415/500] time 0.890 (0.890) data 0.001 (0.002) loss 1.0049 (1.0868) acc 68.7500 (72.4322) lr 9.3721e-04 eta 2:44:30 +epoch [28/50] batch [420/500] time 0.879 (0.890) data 0.000 (0.002) loss 1.0947 (1.0860) acc 71.8750 (72.4405) lr 9.3721e-04 eta 2:44:24 +epoch [28/50] batch [425/500] time 0.903 (0.890) data 0.000 (0.002) loss 1.0039 (1.0863) acc 71.8750 (72.3897) lr 9.3721e-04 eta 2:44:19 +epoch [28/50] batch [430/500] time 0.899 (0.890) data 0.000 (0.002) loss 0.9844 (1.0864) acc 71.8750 (72.4128) lr 9.3721e-04 eta 2:44:15 +epoch [28/50] batch [435/500] time 0.897 (0.890) data 0.000 (0.002) loss 1.2529 (1.0870) acc 71.8750 (72.3922) lr 9.3721e-04 eta 2:44:09 +epoch [28/50] batch [440/500] time 0.885 (0.890) data 0.000 (0.002) loss 1.0908 (1.0860) acc 65.6250 (72.4219) lr 9.3721e-04 eta 2:44:06 +epoch [28/50] batch [445/500] time 0.899 (0.890) data 0.000 (0.002) loss 1.2422 (1.0843) acc 75.0000 (72.4508) lr 9.3721e-04 eta 2:44:02 +epoch [28/50] batch [450/500] time 0.861 (0.890) data 0.000 (0.002) loss 0.8682 (1.0851) acc 81.2500 (72.4514) lr 9.3721e-04 eta 2:43:57 +epoch [28/50] batch [455/500] time 0.878 (0.890) data 0.000 (0.002) loss 1.3535 (1.0854) acc 68.7500 (72.4313) lr 9.3721e-04 eta 2:43:53 +epoch [28/50] batch [460/500] time 0.896 (0.890) data 0.000 (0.002) loss 0.9038 (1.0851) acc 68.7500 (72.4389) lr 9.3721e-04 eta 2:43:47 +epoch [28/50] batch [465/500] time 0.925 (0.890) data 0.000 (0.002) loss 0.5601 (1.0838) acc 84.3750 (72.4328) lr 9.3721e-04 eta 2:43:43 +epoch [28/50] batch [470/500] time 0.881 (0.890) data 0.000 (0.002) loss 0.8804 (1.0848) acc 68.7500 (72.4202) lr 9.3721e-04 eta 2:43:36 +epoch [28/50] batch [475/500] time 0.886 (0.890) data 0.000 (0.002) loss 1.6475 (1.0862) acc 59.3750 (72.3882) lr 9.3721e-04 eta 2:43:29 +epoch [28/50] batch [480/500] time 0.997 (0.890) data 0.000 (0.002) loss 1.3955 (1.0905) acc 62.5000 (72.2917) lr 9.3721e-04 eta 2:43:26 +epoch [28/50] batch [485/500] time 0.872 (0.890) data 0.001 (0.002) loss 1.0947 (1.0909) acc 78.1250 (72.3003) lr 9.3721e-04 eta 2:43:20 +epoch [28/50] batch [490/500] time 0.899 (0.890) data 0.000 (0.002) loss 1.1104 (1.0903) acc 78.1250 (72.3469) lr 9.3721e-04 eta 2:43:14 +epoch [28/50] batch [495/500] time 0.898 (0.890) data 0.000 (0.002) loss 1.0830 (1.0898) acc 71.8750 (72.3674) lr 9.3721e-04 eta 2:43:10 +epoch [28/50] batch [500/500] time 0.859 (0.889) data 0.000 (0.002) loss 0.9893 (1.0903) acc 78.1250 (72.3500) lr 8.7467e-04 eta 2:43:04 +epoch [29/50] batch [5/500] time 0.904 (1.029) data 0.000 (0.133) loss 0.8867 (1.1085) acc 84.3750 (73.1250) lr 8.7467e-04 eta 3:08:35 +epoch [29/50] batch [10/500] time 0.899 (0.957) data 0.000 (0.067) loss 1.5996 (1.1880) acc 65.6250 (73.7500) lr 8.7467e-04 eta 2:55:12 +epoch [29/50] batch [15/500] time 0.861 (0.932) data 0.000 (0.044) loss 1.4775 (1.2202) acc 62.5000 (72.5000) lr 8.7467e-04 eta 2:50:36 +epoch [29/50] batch [20/500] time 0.897 (0.923) data 0.000 (0.033) loss 1.4512 (1.2255) acc 71.8750 (73.1250) lr 8.7467e-04 eta 2:48:52 +epoch [29/50] batch [25/500] time 0.862 (0.921) data 0.000 (0.027) loss 0.6938 (1.1558) acc 68.7500 (73.0000) lr 8.7467e-04 eta 2:48:27 +epoch [29/50] batch [30/500] time 0.897 (0.915) data 0.000 (0.022) loss 0.9673 (1.1185) acc 71.8750 (73.4375) lr 8.7467e-04 eta 2:47:19 +epoch [29/50] batch [35/500] time 0.872 (0.911) data 0.000 (0.019) loss 1.5352 (1.1242) acc 56.2500 (73.3929) lr 8.7467e-04 eta 2:46:28 +epoch [29/50] batch [40/500] time 0.858 (0.908) data 0.000 (0.017) loss 1.1406 (1.0801) acc 68.7500 (74.0625) lr 8.7467e-04 eta 2:45:48 +epoch [29/50] batch [45/500] time 0.885 (0.906) data 0.000 (0.015) loss 1.4473 (1.0973) acc 68.7500 (73.7500) lr 8.7467e-04 eta 2:45:20 +epoch [29/50] batch [50/500] time 0.876 (0.904) data 0.000 (0.014) loss 0.6572 (1.0784) acc 71.8750 (73.8750) lr 8.7467e-04 eta 2:44:59 +epoch [29/50] batch [55/500] time 0.895 (0.903) data 0.000 (0.012) loss 1.3018 (1.1059) acc 62.5000 (73.4659) lr 8.7467e-04 eta 2:44:46 +epoch [29/50] batch [60/500] time 0.886 (0.902) data 0.000 (0.011) loss 0.7539 (1.1288) acc 75.0000 (73.0729) lr 8.7467e-04 eta 2:44:28 +epoch [29/50] batch [65/500] time 0.884 (0.903) data 0.000 (0.010) loss 1.7881 (1.1498) acc 53.1250 (72.5481) lr 8.7467e-04 eta 2:44:32 +epoch [29/50] batch [70/500] time 0.850 (0.901) data 0.000 (0.010) loss 1.1162 (1.1519) acc 71.8750 (72.3661) lr 8.7467e-04 eta 2:44:06 +epoch [29/50] batch [75/500] time 0.863 (0.899) data 0.000 (0.009) loss 0.9243 (1.1474) acc 65.6250 (72.3333) lr 8.7467e-04 eta 2:43:46 +epoch [29/50] batch [80/500] time 0.915 (0.899) data 0.000 (0.009) loss 0.8223 (1.1370) acc 81.2500 (72.7344) lr 8.7467e-04 eta 2:43:33 +epoch [29/50] batch [85/500] time 0.901 (0.898) data 0.000 (0.008) loss 0.9829 (1.1425) acc 78.1250 (72.8309) lr 8.7467e-04 eta 2:43:19 +epoch [29/50] batch [90/500] time 0.862 (0.897) data 0.000 (0.008) loss 1.1504 (1.1317) acc 75.0000 (72.7778) lr 8.7467e-04 eta 2:43:05 +epoch [29/50] batch [95/500] time 0.909 (0.897) data 0.000 (0.007) loss 0.8921 (1.1247) acc 75.0000 (72.7632) lr 8.7467e-04 eta 2:42:56 +epoch [29/50] batch [100/500] time 0.895 (0.896) data 0.000 (0.007) loss 0.9814 (1.1158) acc 71.8750 (72.8438) lr 8.7467e-04 eta 2:42:51 +epoch [29/50] batch [105/500] time 0.901 (0.896) data 0.000 (0.007) loss 1.4326 (1.1185) acc 62.5000 (72.8274) lr 8.7467e-04 eta 2:42:43 +epoch [29/50] batch [110/500] time 0.904 (0.896) data 0.000 (0.006) loss 0.8481 (1.1124) acc 71.8750 (72.8125) lr 8.7467e-04 eta 2:42:35 +epoch [29/50] batch [115/500] time 0.912 (0.895) data 0.000 (0.006) loss 1.2266 (1.1161) acc 71.8750 (72.6902) lr 8.7467e-04 eta 2:42:27 +epoch [29/50] batch [120/500] time 0.880 (0.895) data 0.000 (0.006) loss 1.8193 (1.1183) acc 53.1250 (72.6042) lr 8.7467e-04 eta 2:42:14 +epoch [29/50] batch [125/500] time 0.883 (0.895) data 0.000 (0.006) loss 0.5176 (1.1088) acc 84.3750 (72.7500) lr 8.7467e-04 eta 2:42:16 +epoch [29/50] batch [130/500] time 0.860 (0.894) data 0.000 (0.005) loss 1.4053 (1.1083) acc 78.1250 (72.8606) lr 8.7467e-04 eta 2:41:57 +epoch [29/50] batch [135/500] time 0.880 (0.893) data 0.000 (0.005) loss 0.9604 (1.1042) acc 75.0000 (72.9630) lr 8.7467e-04 eta 2:41:46 +epoch [29/50] batch [140/500] time 0.872 (0.893) data 0.000 (0.005) loss 1.0791 (1.1067) acc 68.7500 (72.7902) lr 8.7467e-04 eta 2:41:39 +epoch [29/50] batch [145/500] time 0.872 (0.892) data 0.000 (0.005) loss 0.6479 (1.1035) acc 81.2500 (72.8233) lr 8.7467e-04 eta 2:41:27 +epoch [29/50] batch [150/500] time 0.868 (0.892) data 0.000 (0.005) loss 1.2988 (1.1030) acc 62.5000 (72.6667) lr 8.7467e-04 eta 2:41:19 +epoch [29/50] batch [155/500] time 0.877 (0.892) data 0.000 (0.004) loss 0.7847 (1.1028) acc 68.7500 (72.5806) lr 8.7467e-04 eta 2:41:11 +epoch [29/50] batch [160/500] time 0.867 (0.892) data 0.000 (0.004) loss 1.5322 (1.1090) acc 65.6250 (72.5586) lr 8.7467e-04 eta 2:41:05 +epoch [29/50] batch [165/500] time 0.889 (0.892) data 0.000 (0.004) loss 0.7202 (1.1113) acc 87.5000 (72.6326) lr 8.7467e-04 eta 2:41:03 +epoch [29/50] batch [170/500] time 0.909 (0.893) data 0.000 (0.004) loss 1.4346 (1.1064) acc 62.5000 (72.6287) lr 8.7467e-04 eta 2:41:05 +epoch [29/50] batch [175/500] time 0.858 (0.892) data 0.000 (0.004) loss 0.8105 (1.1080) acc 71.8750 (72.5536) lr 8.7467e-04 eta 2:40:57 +epoch [29/50] batch [180/500] time 0.866 (0.892) data 0.000 (0.004) loss 0.9287 (1.1070) acc 81.2500 (72.6042) lr 8.7467e-04 eta 2:40:48 +epoch [29/50] batch [185/500] time 0.876 (0.892) data 0.000 (0.004) loss 1.2314 (1.1002) acc 68.7500 (72.6689) lr 8.7467e-04 eta 2:40:42 +epoch [29/50] batch [190/500] time 0.903 (0.891) data 0.000 (0.004) loss 1.3691 (1.0995) acc 75.0000 (72.8289) lr 8.7467e-04 eta 2:40:34 +epoch [29/50] batch [195/500] time 0.919 (0.891) data 0.000 (0.004) loss 1.3389 (1.0945) acc 68.7500 (72.9167) lr 8.7467e-04 eta 2:40:29 +epoch [29/50] batch [200/500] time 0.889 (0.891) data 0.000 (0.004) loss 0.8223 (1.0940) acc 84.3750 (72.9062) lr 8.7467e-04 eta 2:40:25 +epoch [29/50] batch [205/500] time 0.865 (0.891) data 0.000 (0.003) loss 1.0693 (1.0943) acc 75.0000 (72.8811) lr 8.7467e-04 eta 2:40:19 +epoch [29/50] batch [210/500] time 0.874 (0.891) data 0.000 (0.003) loss 1.9619 (1.0991) acc 53.1250 (72.7530) lr 8.7467e-04 eta 2:40:09 +epoch [29/50] batch [215/500] time 0.862 (0.890) data 0.000 (0.003) loss 0.7183 (1.0919) acc 78.1250 (72.8779) lr 8.7467e-04 eta 2:40:01 +epoch [29/50] batch [220/500] time 0.890 (0.891) data 0.000 (0.003) loss 0.7539 (1.0951) acc 78.1250 (72.7841) lr 8.7467e-04 eta 2:40:00 +epoch [29/50] batch [225/500] time 0.896 (0.890) data 0.000 (0.003) loss 1.0156 (1.0955) acc 78.1250 (72.8333) lr 8.7467e-04 eta 2:39:55 +epoch [29/50] batch [230/500] time 0.865 (0.890) data 0.000 (0.003) loss 1.1279 (1.0965) acc 75.0000 (72.8940) lr 8.7467e-04 eta 2:39:46 +epoch [29/50] batch [235/500] time 0.884 (0.890) data 0.000 (0.003) loss 0.9541 (1.0950) acc 78.1250 (73.0452) lr 8.7467e-04 eta 2:39:41 +epoch [29/50] batch [240/500] time 0.902 (0.890) data 0.000 (0.003) loss 0.9395 (1.0912) acc 75.0000 (73.0729) lr 8.7467e-04 eta 2:39:38 +epoch [29/50] batch [245/500] time 0.906 (0.890) data 0.000 (0.003) loss 0.6982 (1.0893) acc 75.0000 (73.0612) lr 8.7467e-04 eta 2:39:34 +epoch [29/50] batch [250/500] time 0.844 (0.890) data 0.000 (0.003) loss 0.8711 (1.0890) acc 78.1250 (73.0375) lr 8.7467e-04 eta 2:39:27 +epoch [29/50] batch [255/500] time 0.893 (0.890) data 0.000 (0.003) loss 1.2568 (1.0878) acc 68.7500 (73.0270) lr 8.7467e-04 eta 2:39:22 +epoch [29/50] batch [260/500] time 0.882 (0.889) data 0.000 (0.003) loss 1.1387 (1.0891) acc 84.3750 (73.0769) lr 8.7467e-04 eta 2:39:13 +epoch [29/50] batch [265/500] time 0.894 (0.889) data 0.000 (0.003) loss 0.9268 (1.0920) acc 71.8750 (72.9835) lr 8.7467e-04 eta 2:39:06 +epoch [29/50] batch [270/500] time 0.898 (0.890) data 0.000 (0.003) loss 1.5117 (1.0935) acc 62.5000 (72.9051) lr 8.7467e-04 eta 2:39:04 +epoch [29/50] batch [275/500] time 0.894 (0.889) data 0.000 (0.003) loss 0.8311 (1.0927) acc 78.1250 (72.9545) lr 8.7467e-04 eta 2:38:59 +epoch [29/50] batch [280/500] time 0.864 (0.889) data 0.000 (0.003) loss 0.9053 (1.0912) acc 75.0000 (72.9464) lr 8.7467e-04 eta 2:38:51 +epoch [29/50] batch [285/500] time 0.878 (0.889) data 0.000 (0.003) loss 0.6772 (1.0880) acc 75.0000 (72.9605) lr 8.7467e-04 eta 2:38:45 +epoch [29/50] batch [290/500] time 0.872 (0.889) data 0.000 (0.003) loss 1.1875 (1.0858) acc 71.8750 (72.9849) lr 8.7467e-04 eta 2:38:39 +epoch [29/50] batch [295/500] time 0.855 (0.889) data 0.000 (0.002) loss 0.8398 (1.0863) acc 81.2500 (72.9873) lr 8.7467e-04 eta 2:38:33 +epoch [29/50] batch [300/500] time 0.845 (0.888) data 0.000 (0.002) loss 1.0811 (1.0884) acc 78.1250 (72.8854) lr 8.7467e-04 eta 2:38:25 +epoch [29/50] batch [305/500] time 0.884 (0.888) data 0.000 (0.002) loss 0.8857 (1.0874) acc 78.1250 (72.8689) lr 8.7467e-04 eta 2:38:19 +epoch [29/50] batch [310/500] time 0.955 (0.888) data 0.000 (0.002) loss 1.3047 (1.0886) acc 71.8750 (72.8931) lr 8.7467e-04 eta 2:38:16 +epoch [29/50] batch [315/500] time 0.889 (0.889) data 0.000 (0.002) loss 0.6733 (1.0886) acc 84.3750 (72.8472) lr 8.7467e-04 eta 2:38:13 +epoch [29/50] batch [320/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.0469 (1.0872) acc 68.7500 (72.8711) lr 8.7467e-04 eta 2:38:08 +epoch [29/50] batch [325/500] time 0.896 (0.888) data 0.000 (0.002) loss 0.9922 (1.0887) acc 78.1250 (72.8846) lr 8.7467e-04 eta 2:38:03 +epoch [29/50] batch [330/500] time 0.899 (0.888) data 0.000 (0.002) loss 0.8320 (1.0861) acc 81.2500 (72.9545) lr 8.7467e-04 eta 2:37:58 +epoch [29/50] batch [335/500] time 0.887 (0.888) data 0.000 (0.002) loss 0.6011 (1.0822) acc 87.5000 (72.9851) lr 8.7467e-04 eta 2:37:54 +epoch [29/50] batch [340/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.1895 (1.0833) acc 75.0000 (72.9044) lr 8.7467e-04 eta 2:37:50 +epoch [29/50] batch [345/500] time 0.896 (0.888) data 0.000 (0.002) loss 0.5347 (1.0827) acc 81.2500 (72.9620) lr 8.7467e-04 eta 2:37:45 +epoch [29/50] batch [350/500] time 0.896 (0.888) data 0.000 (0.002) loss 0.7861 (1.0813) acc 78.1250 (72.9643) lr 8.7467e-04 eta 2:37:40 +epoch [29/50] batch [355/500] time 0.861 (0.888) data 0.000 (0.002) loss 0.7930 (1.0825) acc 81.2500 (72.9225) lr 8.7467e-04 eta 2:37:32 +epoch [29/50] batch [360/500] time 0.909 (0.888) data 0.000 (0.002) loss 0.6392 (1.0808) acc 78.1250 (72.9427) lr 8.7467e-04 eta 2:37:28 +epoch [29/50] batch [365/500] time 0.908 (0.888) data 0.000 (0.002) loss 1.1787 (1.0828) acc 68.7500 (72.8767) lr 8.7467e-04 eta 2:37:23 +epoch [29/50] batch [370/500] time 0.870 (0.888) data 0.000 (0.002) loss 0.8516 (1.0840) acc 71.8750 (72.8125) lr 8.7467e-04 eta 2:37:17 +epoch [29/50] batch [375/500] time 0.889 (0.888) data 0.000 (0.002) loss 0.6831 (1.0820) acc 68.7500 (72.8333) lr 8.7467e-04 eta 2:37:13 +epoch [29/50] batch [380/500] time 0.857 (0.888) data 0.000 (0.002) loss 1.1963 (1.0817) acc 75.0000 (72.8618) lr 8.7467e-04 eta 2:37:09 +epoch [29/50] batch [385/500] time 0.879 (0.888) data 0.000 (0.002) loss 1.4863 (1.0840) acc 68.7500 (72.8247) lr 8.7467e-04 eta 2:37:05 +epoch [29/50] batch [390/500] time 0.897 (0.888) data 0.000 (0.002) loss 1.1836 (1.0848) acc 68.7500 (72.8125) lr 8.7467e-04 eta 2:37:01 +epoch [29/50] batch [395/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.9087 (1.0826) acc 71.8750 (72.7769) lr 8.7467e-04 eta 2:36:55 +epoch [29/50] batch [400/500] time 0.891 (0.888) data 0.000 (0.002) loss 1.6260 (1.0881) acc 53.1250 (72.6953) lr 8.7467e-04 eta 2:36:49 +epoch [29/50] batch [405/500] time 0.862 (0.888) data 0.000 (0.002) loss 0.8560 (1.0856) acc 71.8750 (72.7623) lr 8.7467e-04 eta 2:36:44 +epoch [29/50] batch [410/500] time 0.886 (0.888) data 0.000 (0.002) loss 0.9297 (1.0831) acc 75.0000 (72.8201) lr 8.7467e-04 eta 2:36:43 +epoch [29/50] batch [415/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.0928 (1.0838) acc 75.0000 (72.8389) lr 8.7467e-04 eta 2:36:39 +epoch [29/50] batch [420/500] time 0.905 (0.888) data 0.000 (0.002) loss 1.0098 (1.0835) acc 78.1250 (72.8646) lr 8.7467e-04 eta 2:36:35 +epoch [29/50] batch [425/500] time 0.881 (0.888) data 0.000 (0.002) loss 0.6572 (1.0865) acc 81.2500 (72.7794) lr 8.7467e-04 eta 2:36:28 +epoch [29/50] batch [430/500] time 0.864 (0.888) data 0.000 (0.002) loss 1.1562 (1.0866) acc 71.8750 (72.7544) lr 8.7467e-04 eta 2:36:22 +epoch [29/50] batch [435/500] time 0.887 (0.888) data 0.000 (0.002) loss 0.7852 (1.0871) acc 78.1250 (72.7443) lr 8.7467e-04 eta 2:36:17 +epoch [29/50] batch [440/500] time 0.876 (0.888) data 0.000 (0.002) loss 1.6406 (1.0903) acc 56.2500 (72.7202) lr 8.7467e-04 eta 2:36:12 +epoch [29/50] batch [445/500] time 0.874 (0.887) data 0.000 (0.002) loss 0.6245 (1.0879) acc 78.1250 (72.7388) lr 8.7467e-04 eta 2:36:07 +epoch [29/50] batch [450/500] time 0.881 (0.887) data 0.000 (0.002) loss 1.1094 (1.0897) acc 78.1250 (72.7708) lr 8.7467e-04 eta 2:36:01 +epoch [29/50] batch [455/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.0137 (1.0889) acc 71.8750 (72.7610) lr 8.7467e-04 eta 2:35:58 +epoch [29/50] batch [460/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.2158 (1.0861) acc 71.8750 (72.8601) lr 8.7467e-04 eta 2:35:52 +epoch [29/50] batch [465/500] time 0.865 (0.887) data 0.000 (0.002) loss 0.9150 (1.0847) acc 78.1250 (72.9032) lr 8.7467e-04 eta 2:35:47 +epoch [29/50] batch [470/500] time 0.898 (0.887) data 0.000 (0.002) loss 1.2998 (1.0866) acc 62.5000 (72.8856) lr 8.7467e-04 eta 2:35:43 +epoch [29/50] batch [475/500] time 0.912 (0.887) data 0.000 (0.002) loss 1.2588 (1.0880) acc 68.7500 (72.8421) lr 8.7467e-04 eta 2:35:39 +epoch [29/50] batch [480/500] time 0.888 (0.887) data 0.000 (0.002) loss 1.2412 (1.0877) acc 68.7500 (72.7995) lr 8.7467e-04 eta 2:35:35 +epoch [29/50] batch [485/500] time 0.868 (0.887) data 0.000 (0.002) loss 0.8198 (1.0857) acc 78.1250 (72.8737) lr 8.7467e-04 eta 2:35:29 +epoch [29/50] batch [490/500] time 0.876 (0.887) data 0.000 (0.002) loss 1.1494 (1.0849) acc 68.7500 (72.8699) lr 8.7467e-04 eta 2:35:26 +epoch [29/50] batch [495/500] time 0.847 (0.887) data 0.000 (0.002) loss 0.9160 (1.0861) acc 68.7500 (72.8409) lr 8.7467e-04 eta 2:35:20 +epoch [29/50] batch [500/500] time 0.898 (0.887) data 0.000 (0.002) loss 0.9614 (1.0846) acc 75.0000 (72.8375) lr 8.1262e-04 eta 2:35:16 +epoch [30/50] batch [5/500] time 0.883 (1.014) data 0.000 (0.126) loss 0.7031 (0.9355) acc 90.6250 (79.3750) lr 8.1262e-04 eta 2:57:24 +epoch [30/50] batch [10/500] time 0.883 (0.950) data 0.000 (0.063) loss 1.3125 (1.0835) acc 71.8750 (75.0000) lr 8.1262e-04 eta 2:46:08 +epoch [30/50] batch [15/500] time 0.904 (0.928) data 0.000 (0.042) loss 1.2510 (1.0232) acc 56.2500 (73.7500) lr 8.1262e-04 eta 2:42:09 +epoch [30/50] batch [20/500] time 0.906 (0.921) data 0.000 (0.032) loss 0.8999 (1.0430) acc 75.0000 (73.7500) lr 8.1262e-04 eta 2:40:54 +epoch [30/50] batch [25/500] time 0.910 (0.916) data 0.000 (0.025) loss 0.8750 (1.0084) acc 68.7500 (73.3750) lr 8.1262e-04 eta 2:39:57 +epoch [30/50] batch [30/500] time 0.865 (0.911) data 0.000 (0.021) loss 1.2002 (1.0282) acc 75.0000 (72.9167) lr 8.1262e-04 eta 2:39:02 +epoch [30/50] batch [35/500] time 0.894 (0.908) data 0.000 (0.018) loss 1.2617 (1.0471) acc 65.6250 (71.8750) lr 8.1262e-04 eta 2:38:25 +epoch [30/50] batch [40/500] time 0.875 (0.905) data 0.000 (0.016) loss 1.2803 (1.0489) acc 65.6250 (72.3438) lr 8.1262e-04 eta 2:37:41 +epoch [30/50] batch [45/500] time 0.882 (0.901) data 0.000 (0.014) loss 1.4141 (1.0519) acc 75.0000 (72.8472) lr 8.1262e-04 eta 2:36:59 +epoch [30/50] batch [50/500] time 0.903 (0.899) data 0.000 (0.013) loss 0.4209 (1.0503) acc 90.6250 (72.9375) lr 8.1262e-04 eta 2:36:31 +epoch [30/50] batch [55/500] time 0.906 (0.898) data 0.000 (0.012) loss 1.1240 (1.0614) acc 71.8750 (72.7841) lr 8.1262e-04 eta 2:36:24 +epoch [30/50] batch [60/500] time 0.865 (0.897) data 0.000 (0.011) loss 0.6299 (1.0492) acc 90.6250 (73.0729) lr 8.1262e-04 eta 2:36:07 +epoch [30/50] batch [65/500] time 0.914 (0.897) data 0.000 (0.010) loss 0.9438 (1.0518) acc 71.8750 (72.9808) lr 8.1262e-04 eta 2:36:01 +epoch [30/50] batch [70/500] time 0.878 (0.897) data 0.000 (0.009) loss 0.7441 (1.0529) acc 81.2500 (73.1250) lr 8.1262e-04 eta 2:35:51 +epoch [30/50] batch [75/500] time 0.916 (0.897) data 0.000 (0.009) loss 1.0732 (1.0550) acc 87.5000 (73.4583) lr 8.1262e-04 eta 2:35:46 +epoch [30/50] batch [80/500] time 0.874 (0.896) data 0.000 (0.008) loss 0.6748 (1.0516) acc 84.3750 (73.6328) lr 8.1262e-04 eta 2:35:33 +epoch [30/50] batch [85/500] time 0.880 (0.895) data 0.000 (0.008) loss 1.7646 (1.0655) acc 71.8750 (73.3088) lr 8.1262e-04 eta 2:35:17 +epoch [30/50] batch [90/500] time 0.878 (0.894) data 0.000 (0.007) loss 1.7139 (1.0696) acc 56.2500 (73.0208) lr 8.1262e-04 eta 2:35:02 +epoch [30/50] batch [95/500] time 0.866 (0.894) data 0.000 (0.007) loss 1.4092 (1.0766) acc 75.0000 (72.8947) lr 8.1262e-04 eta 2:35:00 +epoch [30/50] batch [100/500] time 0.906 (0.893) data 0.000 (0.007) loss 1.0547 (1.0795) acc 68.7500 (72.9375) lr 8.1262e-04 eta 2:34:44 +epoch [30/50] batch [105/500] time 0.906 (0.893) data 0.000 (0.006) loss 1.0566 (1.0833) acc 75.0000 (72.8274) lr 8.1262e-04 eta 2:34:38 +epoch [30/50] batch [110/500] time 0.884 (0.893) data 0.000 (0.006) loss 1.2725 (1.0854) acc 68.7500 (72.6136) lr 8.1262e-04 eta 2:34:35 +epoch [30/50] batch [115/500] time 0.872 (0.892) data 0.000 (0.006) loss 1.1807 (1.0882) acc 59.3750 (72.4728) lr 8.1262e-04 eta 2:34:23 +epoch [30/50] batch [120/500] time 0.864 (0.891) data 0.000 (0.005) loss 0.9219 (1.0809) acc 78.1250 (72.6823) lr 8.1262e-04 eta 2:34:10 +epoch [30/50] batch [125/500] time 0.859 (0.891) data 0.000 (0.005) loss 1.1221 (1.0896) acc 59.3750 (72.3000) lr 8.1262e-04 eta 2:34:02 +epoch [30/50] batch [130/500] time 0.897 (0.891) data 0.000 (0.005) loss 0.8623 (1.0853) acc 78.1250 (72.2837) lr 8.1262e-04 eta 2:33:56 +epoch [30/50] batch [135/500] time 0.856 (0.890) data 0.000 (0.005) loss 0.9971 (1.0862) acc 75.0000 (72.2454) lr 8.1262e-04 eta 2:33:50 +epoch [30/50] batch [140/500] time 0.870 (0.891) data 0.000 (0.005) loss 0.9180 (1.0861) acc 78.1250 (72.2545) lr 8.1262e-04 eta 2:33:50 +epoch [30/50] batch [145/500] time 0.869 (0.890) data 0.000 (0.005) loss 0.6782 (1.0803) acc 81.2500 (72.3922) lr 8.1262e-04 eta 2:33:39 +epoch [30/50] batch [150/500] time 0.890 (0.891) data 0.000 (0.004) loss 1.9258 (1.0890) acc 56.2500 (72.3125) lr 8.1262e-04 eta 2:33:39 +epoch [30/50] batch [155/500] time 0.873 (0.890) data 0.000 (0.004) loss 0.7524 (1.0866) acc 81.2500 (72.3790) lr 8.1262e-04 eta 2:33:32 +epoch [30/50] batch [160/500] time 0.881 (0.891) data 0.000 (0.004) loss 0.9668 (1.0930) acc 75.0000 (72.3242) lr 8.1262e-04 eta 2:33:29 +epoch [30/50] batch [165/500] time 0.884 (0.890) data 0.000 (0.004) loss 0.7192 (1.0866) acc 81.2500 (72.4811) lr 8.1262e-04 eta 2:33:21 +epoch [30/50] batch [170/500] time 0.900 (0.890) data 0.000 (0.004) loss 0.9297 (1.0852) acc 78.1250 (72.5000) lr 8.1262e-04 eta 2:33:15 +epoch [30/50] batch [175/500] time 0.891 (0.890) data 0.000 (0.004) loss 1.4756 (1.0854) acc 65.6250 (72.5000) lr 8.1262e-04 eta 2:33:09 +epoch [30/50] batch [180/500] time 0.872 (0.890) data 0.000 (0.004) loss 1.1211 (1.0815) acc 75.0000 (72.5868) lr 8.1262e-04 eta 2:33:02 +epoch [30/50] batch [185/500] time 0.847 (0.889) data 0.000 (0.004) loss 0.6748 (1.0772) acc 78.1250 (72.7196) lr 8.1262e-04 eta 2:32:54 +epoch [30/50] batch [190/500] time 0.884 (0.889) data 0.000 (0.004) loss 0.9297 (1.0745) acc 81.2500 (72.8125) lr 8.1262e-04 eta 2:32:46 +epoch [30/50] batch [195/500] time 0.897 (0.889) data 0.000 (0.003) loss 1.1885 (1.0775) acc 65.6250 (72.7564) lr 8.1262e-04 eta 2:32:42 +epoch [30/50] batch [200/500] time 0.864 (0.889) data 0.000 (0.003) loss 1.2646 (1.0757) acc 75.0000 (72.7969) lr 8.1262e-04 eta 2:32:32 +epoch [30/50] batch [205/500] time 0.860 (0.889) data 0.000 (0.003) loss 0.5273 (1.0726) acc 84.3750 (72.9116) lr 8.1262e-04 eta 2:32:28 +epoch [30/50] batch [210/500] time 0.874 (0.888) data 0.000 (0.003) loss 1.2354 (1.0741) acc 78.1250 (72.9315) lr 8.1262e-04 eta 2:32:22 +epoch [30/50] batch [215/500] time 0.924 (0.888) data 0.000 (0.003) loss 1.4072 (1.0759) acc 68.7500 (72.8198) lr 8.1262e-04 eta 2:32:16 +epoch [30/50] batch [220/500] time 0.875 (0.888) data 0.000 (0.003) loss 0.9888 (1.0745) acc 78.1250 (72.8267) lr 8.1262e-04 eta 2:32:11 +epoch [30/50] batch [225/500] time 0.873 (0.888) data 0.000 (0.003) loss 1.2881 (1.0734) acc 65.6250 (72.8472) lr 8.1262e-04 eta 2:32:05 +epoch [30/50] batch [230/500] time 0.854 (0.888) data 0.000 (0.003) loss 1.7734 (1.0725) acc 59.3750 (72.9076) lr 8.1262e-04 eta 2:31:57 +epoch [30/50] batch [235/500] time 1.027 (0.888) data 0.000 (0.003) loss 0.9062 (1.0708) acc 71.8750 (72.9122) lr 8.1262e-04 eta 2:31:56 +epoch [30/50] batch [240/500] time 0.897 (0.888) data 0.000 (0.003) loss 1.2305 (1.0688) acc 71.8750 (72.9557) lr 8.1262e-04 eta 2:31:49 +epoch [30/50] batch [245/500] time 0.895 (0.888) data 0.000 (0.003) loss 0.5225 (1.0649) acc 84.3750 (73.0485) lr 8.1262e-04 eta 2:31:41 +epoch [30/50] batch [250/500] time 0.872 (0.888) data 0.000 (0.003) loss 0.6450 (1.0652) acc 71.8750 (73.0125) lr 8.1262e-04 eta 2:31:37 +epoch [30/50] batch [255/500] time 0.877 (0.888) data 0.000 (0.003) loss 0.6030 (1.0646) acc 87.5000 (72.9289) lr 8.1262e-04 eta 2:31:33 +epoch [30/50] batch [260/500] time 0.889 (0.887) data 0.000 (0.003) loss 1.6396 (1.0673) acc 65.6250 (72.9688) lr 8.1262e-04 eta 2:31:27 +epoch [30/50] batch [265/500] time 0.897 (0.888) data 0.000 (0.003) loss 1.1387 (1.0721) acc 68.7500 (72.8302) lr 8.1262e-04 eta 2:31:24 +epoch [30/50] batch [270/500] time 0.898 (0.887) data 0.000 (0.003) loss 1.3076 (1.0712) acc 65.6250 (72.8472) lr 8.1262e-04 eta 2:31:18 +epoch [30/50] batch [275/500] time 0.876 (0.888) data 0.000 (0.003) loss 1.8643 (1.0746) acc 56.2500 (72.8182) lr 8.1262e-04 eta 2:31:15 +epoch [30/50] batch [280/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.1104 (1.0781) acc 71.8750 (72.7567) lr 8.1262e-04 eta 2:31:15 +epoch [30/50] batch [285/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.3311 (1.0817) acc 68.7500 (72.6535) lr 8.1262e-04 eta 2:31:10 +epoch [30/50] batch [290/500] time 0.880 (0.888) data 0.000 (0.002) loss 0.8462 (1.0819) acc 78.1250 (72.6401) lr 8.1262e-04 eta 2:31:07 +epoch [30/50] batch [295/500] time 0.891 (0.888) data 0.000 (0.002) loss 0.8169 (1.0793) acc 78.1250 (72.6695) lr 8.1262e-04 eta 2:31:00 +epoch [30/50] batch [300/500] time 0.904 (0.888) data 0.000 (0.002) loss 0.9180 (1.0788) acc 75.0000 (72.6875) lr 8.1262e-04 eta 2:30:56 +epoch [30/50] batch [305/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.6846 (1.0763) acc 81.2500 (72.7664) lr 8.1262e-04 eta 2:30:51 +epoch [30/50] batch [310/500] time 0.891 (0.888) data 0.000 (0.002) loss 0.9697 (1.0772) acc 71.8750 (72.7016) lr 8.1262e-04 eta 2:30:47 +epoch [30/50] batch [315/500] time 0.856 (0.888) data 0.000 (0.002) loss 0.9805 (1.0772) acc 68.7500 (72.6687) lr 8.1262e-04 eta 2:30:40 +epoch [30/50] batch [320/500] time 0.866 (0.887) data 0.000 (0.002) loss 0.5386 (1.0766) acc 87.5000 (72.6465) lr 8.1262e-04 eta 2:30:34 +epoch [30/50] batch [325/500] time 0.870 (0.888) data 0.000 (0.002) loss 0.9409 (1.0770) acc 71.8750 (72.5865) lr 8.1262e-04 eta 2:30:30 +epoch [30/50] batch [330/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.3926 (1.0747) acc 78.1250 (72.6799) lr 8.1262e-04 eta 2:30:25 +epoch [30/50] batch [335/500] time 0.909 (0.888) data 0.000 (0.002) loss 1.3164 (1.0763) acc 75.0000 (72.6586) lr 8.1262e-04 eta 2:30:24 +epoch [30/50] batch [340/500] time 0.909 (0.888) data 0.000 (0.002) loss 1.1279 (1.0783) acc 71.8750 (72.6011) lr 8.1262e-04 eta 2:30:20 +epoch [30/50] batch [345/500] time 0.906 (0.888) data 0.000 (0.002) loss 1.0215 (1.0772) acc 65.6250 (72.6449) lr 8.1262e-04 eta 2:30:17 +epoch [30/50] batch [350/500] time 0.900 (0.888) data 0.000 (0.002) loss 1.3838 (1.0774) acc 71.8750 (72.6518) lr 8.1262e-04 eta 2:30:13 +epoch [30/50] batch [355/500] time 0.863 (0.888) data 0.000 (0.002) loss 1.1348 (1.0760) acc 78.1250 (72.6761) lr 8.1262e-04 eta 2:30:08 +epoch [30/50] batch [360/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.0791 (1.0771) acc 71.8750 (72.6389) lr 8.1262e-04 eta 2:30:04 +epoch [30/50] batch [365/500] time 0.857 (0.888) data 0.000 (0.002) loss 0.3523 (1.0733) acc 90.6250 (72.7055) lr 8.1262e-04 eta 2:30:00 +epoch [30/50] batch [370/500] time 0.875 (0.888) data 0.000 (0.002) loss 0.7061 (1.0734) acc 81.2500 (72.7027) lr 8.1262e-04 eta 2:29:53 +epoch [30/50] batch [375/500] time 0.864 (0.888) data 0.000 (0.002) loss 0.9824 (1.0739) acc 65.6250 (72.6417) lr 8.1262e-04 eta 2:29:47 +epoch [30/50] batch [380/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.3398 (1.0753) acc 71.8750 (72.6398) lr 8.1262e-04 eta 2:29:44 +epoch [30/50] batch [385/500] time 0.862 (0.888) data 0.000 (0.002) loss 1.2236 (1.0767) acc 59.3750 (72.5893) lr 8.1262e-04 eta 2:29:38 +epoch [30/50] batch [390/500] time 0.930 (0.888) data 0.000 (0.002) loss 0.5259 (1.0755) acc 84.3750 (72.5962) lr 8.1262e-04 eta 2:29:35 +epoch [30/50] batch [395/500] time 0.882 (0.888) data 0.000 (0.002) loss 1.3701 (1.0769) acc 59.3750 (72.5158) lr 8.1262e-04 eta 2:29:31 +epoch [30/50] batch [400/500] time 0.899 (0.888) data 0.000 (0.002) loss 1.8535 (1.0802) acc 65.6250 (72.5078) lr 8.1262e-04 eta 2:29:26 +epoch [30/50] batch [405/500] time 0.866 (0.888) data 0.000 (0.002) loss 1.4004 (1.0813) acc 78.1250 (72.5309) lr 8.1262e-04 eta 2:29:21 +epoch [30/50] batch [410/500] time 0.893 (0.888) data 0.000 (0.002) loss 0.9883 (1.0815) acc 71.8750 (72.5457) lr 8.1262e-04 eta 2:29:15 +epoch [30/50] batch [415/500] time 0.901 (0.888) data 0.000 (0.002) loss 0.7017 (1.0790) acc 78.1250 (72.5828) lr 8.1262e-04 eta 2:29:12 +epoch [30/50] batch [420/500] time 0.854 (0.888) data 0.000 (0.002) loss 1.1914 (1.0799) acc 62.5000 (72.5298) lr 8.1262e-04 eta 2:29:07 +epoch [30/50] batch [425/500] time 0.854 (0.888) data 0.000 (0.002) loss 0.6929 (1.0821) acc 84.3750 (72.5441) lr 8.1262e-04 eta 2:29:03 +epoch [30/50] batch [430/500] time 0.879 (0.888) data 0.000 (0.002) loss 0.8545 (1.0804) acc 65.6250 (72.6017) lr 8.1262e-04 eta 2:28:57 +epoch [30/50] batch [435/500] time 0.892 (0.887) data 0.000 (0.002) loss 0.8701 (1.0807) acc 75.0000 (72.6365) lr 8.1262e-04 eta 2:28:52 +epoch [30/50] batch [440/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.0469 (1.0824) acc 68.7500 (72.5994) lr 8.1262e-04 eta 2:28:49 +epoch [30/50] batch [445/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.1074 (1.0819) acc 65.6250 (72.5913) lr 8.1262e-04 eta 2:28:43 +epoch [30/50] batch [450/500] time 0.880 (0.887) data 0.000 (0.002) loss 1.0986 (1.0801) acc 78.1250 (72.6736) lr 8.1262e-04 eta 2:28:38 +epoch [30/50] batch [455/500] time 0.889 (0.888) data 0.000 (0.002) loss 0.7329 (1.0790) acc 78.1250 (72.6854) lr 8.1262e-04 eta 2:28:35 +epoch [30/50] batch [460/500] time 0.887 (0.887) data 0.000 (0.002) loss 0.9546 (1.0775) acc 68.7500 (72.7106) lr 8.1262e-04 eta 2:28:29 +epoch [30/50] batch [465/500] time 0.876 (0.887) data 0.000 (0.002) loss 1.1533 (1.0776) acc 65.6250 (72.6815) lr 8.1262e-04 eta 2:28:24 +epoch [30/50] batch [470/500] time 0.911 (0.887) data 0.000 (0.002) loss 1.3271 (1.0765) acc 68.7500 (72.7128) lr 8.1262e-04 eta 2:28:19 +epoch [30/50] batch [475/500] time 0.882 (0.887) data 0.000 (0.002) loss 1.6416 (1.0779) acc 59.3750 (72.6842) lr 8.1262e-04 eta 2:28:15 +epoch [30/50] batch [480/500] time 0.859 (0.888) data 0.000 (0.002) loss 1.0439 (1.0776) acc 71.8750 (72.6888) lr 8.1262e-04 eta 2:28:13 +epoch [30/50] batch [485/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.2324 (1.0761) acc 68.7500 (72.7255) lr 8.1262e-04 eta 2:28:08 +epoch [30/50] batch [490/500] time 0.868 (0.887) data 0.000 (0.002) loss 1.0381 (1.0795) acc 71.8750 (72.6084) lr 8.1262e-04 eta 2:28:03 +epoch [30/50] batch [495/500] time 0.880 (0.887) data 0.000 (0.002) loss 0.6489 (1.0800) acc 87.5000 (72.6199) lr 8.1262e-04 eta 2:27:58 +epoch [30/50] batch [500/500] time 0.899 (0.887) data 0.000 (0.001) loss 1.5420 (1.0819) acc 71.8750 (72.5812) lr 7.5131e-04 eta 2:27:53 +epoch [31/50] batch [5/500] time 0.853 (1.000) data 0.000 (0.127) loss 1.6455 (1.0826) acc 65.6250 (73.7500) lr 7.5131e-04 eta 2:46:35 +epoch [31/50] batch [10/500] time 0.868 (0.936) data 0.000 (0.063) loss 0.9639 (1.0008) acc 81.2500 (75.9375) lr 7.5131e-04 eta 2:35:48 +epoch [31/50] batch [15/500] time 0.912 (0.918) data 0.000 (0.042) loss 1.1484 (1.1188) acc 75.0000 (73.1250) lr 7.5131e-04 eta 2:32:42 +epoch [31/50] batch [20/500] time 0.887 (0.911) data 0.000 (0.032) loss 0.7393 (1.1454) acc 81.2500 (72.1875) lr 7.5131e-04 eta 2:31:26 +epoch [31/50] batch [25/500] time 0.902 (0.906) data 0.000 (0.025) loss 0.7192 (1.1466) acc 75.0000 (71.5000) lr 7.5131e-04 eta 2:30:39 +epoch [31/50] batch [30/500] time 0.899 (0.902) data 0.000 (0.021) loss 0.7563 (1.1100) acc 78.1250 (72.6042) lr 7.5131e-04 eta 2:29:56 +epoch [31/50] batch [35/500] time 0.894 (0.899) data 0.000 (0.018) loss 0.7622 (1.1293) acc 84.3750 (72.5000) lr 7.5131e-04 eta 2:29:23 +epoch [31/50] batch [40/500] time 0.898 (0.897) data 0.000 (0.016) loss 1.0762 (1.1233) acc 68.7500 (72.7344) lr 7.5131e-04 eta 2:28:51 +epoch [31/50] batch [45/500] time 0.901 (0.895) data 0.000 (0.014) loss 0.6450 (1.1125) acc 71.8750 (72.5694) lr 7.5131e-04 eta 2:28:34 +epoch [31/50] batch [50/500] time 0.870 (0.895) data 0.000 (0.013) loss 1.5752 (1.1156) acc 65.6250 (72.4375) lr 7.5131e-04 eta 2:28:21 +epoch [31/50] batch [55/500] time 0.847 (0.893) data 0.000 (0.012) loss 1.0986 (1.1146) acc 71.8750 (72.3295) lr 7.5131e-04 eta 2:28:03 +epoch [31/50] batch [60/500] time 0.896 (0.893) data 0.000 (0.011) loss 0.8296 (1.1003) acc 78.1250 (72.4479) lr 7.5131e-04 eta 2:27:53 +epoch [31/50] batch [65/500] time 0.897 (0.892) data 0.000 (0.010) loss 1.0820 (1.1021) acc 71.8750 (72.3558) lr 7.5131e-04 eta 2:27:39 +epoch [31/50] batch [70/500] time 0.909 (0.892) data 0.000 (0.009) loss 0.5239 (1.0870) acc 84.3750 (72.5000) lr 7.5131e-04 eta 2:27:39 +epoch [31/50] batch [75/500] time 0.926 (0.893) data 0.000 (0.009) loss 0.8057 (1.0974) acc 78.1250 (72.2500) lr 7.5131e-04 eta 2:27:45 +epoch [31/50] batch [80/500] time 0.882 (0.893) data 0.000 (0.008) loss 1.2627 (1.1012) acc 65.6250 (72.0312) lr 7.5131e-04 eta 2:27:42 +epoch [31/50] batch [85/500] time 0.862 (0.893) data 0.000 (0.008) loss 1.5459 (1.1127) acc 56.2500 (71.9485) lr 7.5131e-04 eta 2:27:29 +epoch [31/50] batch [90/500] time 0.885 (0.892) data 0.000 (0.007) loss 0.7764 (1.1126) acc 71.8750 (71.9444) lr 7.5131e-04 eta 2:27:18 +epoch [31/50] batch [95/500] time 0.905 (0.891) data 0.000 (0.007) loss 0.6514 (1.1079) acc 68.7500 (71.6447) lr 7.5131e-04 eta 2:27:04 +epoch [31/50] batch [100/500] time 0.904 (0.890) data 0.000 (0.007) loss 1.0859 (1.1030) acc 71.8750 (71.7500) lr 7.5131e-04 eta 2:26:54 +epoch [31/50] batch [105/500] time 0.876 (0.889) data 0.000 (0.006) loss 0.9536 (1.0994) acc 78.1250 (71.8155) lr 7.5131e-04 eta 2:26:40 +epoch [31/50] batch [110/500] time 0.872 (0.889) data 0.000 (0.006) loss 1.1113 (1.0970) acc 75.0000 (72.1023) lr 7.5131e-04 eta 2:26:32 +epoch [31/50] batch [115/500] time 0.882 (0.888) data 0.000 (0.006) loss 0.9014 (1.1046) acc 78.1250 (72.0924) lr 7.5131e-04 eta 2:26:21 +epoch [31/50] batch [120/500] time 0.911 (0.890) data 0.000 (0.005) loss 1.0625 (1.1101) acc 78.1250 (71.9792) lr 7.5131e-04 eta 2:26:31 +epoch [31/50] batch [125/500] time 0.879 (0.890) data 0.000 (0.005) loss 1.1934 (1.1034) acc 71.8750 (72.1000) lr 7.5131e-04 eta 2:26:27 +epoch [31/50] batch [130/500] time 0.874 (0.890) data 0.000 (0.005) loss 1.0117 (1.1031) acc 78.1250 (72.1635) lr 7.5131e-04 eta 2:26:21 +epoch [31/50] batch [135/500] time 0.904 (0.889) data 0.000 (0.005) loss 0.7178 (1.1027) acc 75.0000 (72.1065) lr 7.5131e-04 eta 2:26:13 +epoch [31/50] batch [140/500] time 0.867 (0.889) data 0.000 (0.005) loss 1.1865 (1.1077) acc 78.1250 (71.8973) lr 7.5131e-04 eta 2:26:06 +epoch [31/50] batch [145/500] time 0.882 (0.889) data 0.000 (0.005) loss 1.3525 (1.1069) acc 68.7500 (71.9397) lr 7.5131e-04 eta 2:25:59 +epoch [31/50] batch [150/500] time 0.860 (0.888) data 0.000 (0.004) loss 0.9609 (1.1081) acc 78.1250 (72.0208) lr 7.5131e-04 eta 2:25:46 +epoch [31/50] batch [155/500] time 0.864 (0.888) data 0.000 (0.004) loss 0.6396 (1.1017) acc 84.3750 (72.0968) lr 7.5131e-04 eta 2:25:40 +epoch [31/50] batch [160/500] time 0.896 (0.888) data 0.000 (0.004) loss 1.4775 (1.1040) acc 62.5000 (71.9141) lr 7.5131e-04 eta 2:25:34 +epoch [31/50] batch [165/500] time 0.905 (0.888) data 0.000 (0.004) loss 1.0410 (1.0976) acc 68.7500 (72.1023) lr 7.5131e-04 eta 2:25:31 +epoch [31/50] batch [170/500] time 0.869 (0.887) data 0.000 (0.004) loss 0.7334 (1.0893) acc 75.0000 (72.2794) lr 7.5131e-04 eta 2:25:24 +epoch [31/50] batch [175/500] time 0.883 (0.887) data 0.000 (0.004) loss 1.4600 (1.0874) acc 68.7500 (72.2857) lr 7.5131e-04 eta 2:25:16 +epoch [31/50] batch [180/500] time 0.877 (0.887) data 0.000 (0.004) loss 1.5488 (1.0907) acc 65.6250 (72.2569) lr 7.5131e-04 eta 2:25:11 +epoch [31/50] batch [185/500] time 0.859 (0.887) data 0.000 (0.004) loss 1.7500 (1.0953) acc 56.2500 (72.1453) lr 7.5131e-04 eta 2:25:07 +epoch [31/50] batch [190/500] time 0.871 (0.887) data 0.000 (0.004) loss 1.0723 (1.0945) acc 68.7500 (72.1382) lr 7.5131e-04 eta 2:25:03 +epoch [31/50] batch [195/500] time 0.849 (0.887) data 0.000 (0.003) loss 1.2354 (1.0991) acc 68.7500 (72.0353) lr 7.5131e-04 eta 2:24:56 +epoch [31/50] batch [200/500] time 0.888 (0.887) data 0.000 (0.003) loss 1.4248 (1.1013) acc 65.6250 (72.0312) lr 7.5131e-04 eta 2:24:53 +epoch [31/50] batch [205/500] time 0.909 (0.887) data 0.000 (0.003) loss 1.3320 (1.1035) acc 62.5000 (72.0274) lr 7.5131e-04 eta 2:24:50 +epoch [31/50] batch [210/500] time 0.885 (0.887) data 0.000 (0.003) loss 0.9673 (1.1044) acc 71.8750 (72.0387) lr 7.5131e-04 eta 2:24:45 +epoch [31/50] batch [215/500] time 0.897 (0.887) data 0.000 (0.003) loss 1.1523 (1.1047) acc 71.8750 (72.0494) lr 7.5131e-04 eta 2:24:42 +epoch [31/50] batch [220/500] time 0.873 (0.888) data 0.000 (0.003) loss 0.8164 (1.1024) acc 78.1250 (72.0597) lr 7.5131e-04 eta 2:24:43 +epoch [31/50] batch [225/500] time 0.927 (0.888) data 0.000 (0.003) loss 0.9644 (1.1006) acc 75.0000 (72.0833) lr 7.5131e-04 eta 2:24:41 +epoch [31/50] batch [230/500] time 0.895 (0.888) data 0.000 (0.003) loss 0.9561 (1.0997) acc 71.8750 (72.0516) lr 7.5131e-04 eta 2:24:38 +epoch [31/50] batch [235/500] time 0.871 (0.888) data 0.000 (0.003) loss 1.1094 (1.0965) acc 71.8750 (72.1277) lr 7.5131e-04 eta 2:24:33 +epoch [31/50] batch [240/500] time 0.899 (0.888) data 0.000 (0.003) loss 2.2461 (1.0998) acc 46.8750 (72.0964) lr 7.5131e-04 eta 2:24:28 +epoch [31/50] batch [245/500] time 0.873 (0.888) data 0.000 (0.003) loss 0.8867 (1.1070) acc 75.0000 (72.0408) lr 7.5131e-04 eta 2:24:22 +epoch [31/50] batch [250/500] time 0.908 (0.888) data 0.000 (0.003) loss 1.0283 (1.1075) acc 75.0000 (72.0500) lr 7.5131e-04 eta 2:24:18 +epoch [31/50] batch [255/500] time 0.880 (0.888) data 0.000 (0.003) loss 0.7500 (1.1109) acc 75.0000 (71.9853) lr 7.5131e-04 eta 2:24:13 +epoch [31/50] batch [260/500] time 0.957 (0.888) data 0.000 (0.003) loss 1.5605 (1.1143) acc 62.5000 (71.9712) lr 7.5131e-04 eta 2:24:10 +epoch [31/50] batch [265/500] time 0.865 (0.888) data 0.000 (0.003) loss 1.5010 (1.1181) acc 71.8750 (71.9575) lr 7.5131e-04 eta 2:24:03 +epoch [31/50] batch [270/500] time 0.868 (0.888) data 0.000 (0.003) loss 1.1611 (1.1174) acc 78.1250 (71.9676) lr 7.5131e-04 eta 2:23:57 +epoch [31/50] batch [275/500] time 0.920 (0.888) data 0.000 (0.003) loss 0.8872 (1.1165) acc 78.1250 (71.9318) lr 7.5131e-04 eta 2:23:53 +epoch [31/50] batch [280/500] time 0.884 (0.888) data 0.000 (0.002) loss 0.9478 (1.1168) acc 68.7500 (71.8527) lr 7.5131e-04 eta 2:23:48 +epoch [31/50] batch [285/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.4648 (1.1201) acc 71.8750 (71.7544) lr 7.5131e-04 eta 2:23:42 +epoch [31/50] batch [290/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.1533 (1.1231) acc 71.8750 (71.6918) lr 7.5131e-04 eta 2:23:37 +epoch [31/50] batch [295/500] time 0.875 (0.888) data 0.000 (0.002) loss 1.1904 (1.1230) acc 68.7500 (71.6949) lr 7.5131e-04 eta 2:23:33 +epoch [31/50] batch [300/500] time 0.853 (0.887) data 0.000 (0.002) loss 1.1670 (1.1204) acc 68.7500 (71.7500) lr 7.5131e-04 eta 2:23:25 +epoch [31/50] batch [305/500] time 0.864 (0.887) data 0.001 (0.002) loss 0.6353 (1.1174) acc 87.5000 (71.7725) lr 7.5131e-04 eta 2:23:20 +epoch [31/50] batch [310/500] time 0.876 (0.887) data 0.000 (0.002) loss 1.4424 (1.1215) acc 62.5000 (71.6532) lr 7.5131e-04 eta 2:23:15 +epoch [31/50] batch [315/500] time 0.910 (0.887) data 0.000 (0.002) loss 1.1426 (1.1211) acc 65.6250 (71.6964) lr 7.5131e-04 eta 2:23:11 +epoch [31/50] batch [320/500] time 0.902 (0.887) data 0.000 (0.002) loss 1.5410 (1.1199) acc 59.3750 (71.6992) lr 7.5131e-04 eta 2:23:08 +epoch [31/50] batch [325/500] time 0.890 (0.887) data 0.000 (0.002) loss 0.9121 (1.1140) acc 62.5000 (71.7692) lr 7.5131e-04 eta 2:23:04 +epoch [31/50] batch [330/500] time 0.883 (0.887) data 0.000 (0.002) loss 0.9097 (1.1114) acc 78.1250 (71.8561) lr 7.5131e-04 eta 2:22:59 +epoch [31/50] batch [335/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.6250 (1.1109) acc 59.3750 (71.8470) lr 7.5131e-04 eta 2:22:52 +epoch [31/50] batch [340/500] time 0.894 (0.887) data 0.000 (0.002) loss 1.3926 (1.1162) acc 71.8750 (71.7647) lr 7.5131e-04 eta 2:22:48 +epoch [31/50] batch [345/500] time 0.908 (0.887) data 0.000 (0.002) loss 0.7881 (1.1145) acc 87.5000 (71.8569) lr 7.5131e-04 eta 2:22:43 +epoch [31/50] batch [350/500] time 0.890 (0.887) data 0.000 (0.002) loss 1.6914 (1.1167) acc 62.5000 (71.8125) lr 7.5131e-04 eta 2:22:38 +epoch [31/50] batch [355/500] time 0.875 (0.887) data 0.000 (0.002) loss 0.9473 (1.1155) acc 68.7500 (71.7782) lr 7.5131e-04 eta 2:22:33 +epoch [31/50] batch [360/500] time 0.903 (0.887) data 0.000 (0.002) loss 1.3945 (1.1158) acc 71.8750 (71.8056) lr 7.5131e-04 eta 2:22:33 +epoch [31/50] batch [365/500] time 0.915 (0.888) data 0.000 (0.002) loss 0.9531 (1.1180) acc 65.6250 (71.7551) lr 7.5131e-04 eta 2:22:31 +epoch [31/50] batch [370/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.2832 (1.1194) acc 59.3750 (71.7230) lr 7.5131e-04 eta 2:22:25 +epoch [31/50] batch [375/500] time 0.902 (0.887) data 0.000 (0.002) loss 1.0869 (1.1207) acc 78.1250 (71.7167) lr 7.5131e-04 eta 2:22:21 +epoch [31/50] batch [380/500] time 0.898 (0.887) data 0.001 (0.002) loss 0.9150 (1.1207) acc 75.0000 (71.7599) lr 7.5131e-04 eta 2:22:17 +epoch [31/50] batch [385/500] time 0.877 (0.887) data 0.000 (0.002) loss 1.6162 (1.1192) acc 62.5000 (71.8588) lr 7.5131e-04 eta 2:22:11 +epoch [31/50] batch [390/500] time 0.884 (0.887) data 0.000 (0.002) loss 0.8501 (1.1180) acc 84.3750 (71.9071) lr 7.5131e-04 eta 2:22:07 +epoch [31/50] batch [395/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.9521 (1.1201) acc 56.2500 (71.8829) lr 7.5131e-04 eta 2:22:02 +epoch [31/50] batch [400/500] time 0.890 (0.887) data 0.000 (0.002) loss 0.6113 (1.1171) acc 87.5000 (71.9219) lr 7.5131e-04 eta 2:21:59 +epoch [31/50] batch [405/500] time 0.905 (0.888) data 0.000 (0.002) loss 1.0537 (1.1175) acc 71.8750 (71.8981) lr 7.5131e-04 eta 2:22:00 +epoch [31/50] batch [410/500] time 0.864 (0.888) data 0.000 (0.002) loss 0.9390 (1.1173) acc 84.3750 (71.9284) lr 7.5131e-04 eta 2:21:55 +epoch [31/50] batch [415/500] time 0.909 (0.888) data 0.000 (0.002) loss 1.1074 (1.1192) acc 68.7500 (71.9277) lr 7.5131e-04 eta 2:21:50 +epoch [31/50] batch [420/500] time 0.854 (0.888) data 0.000 (0.002) loss 0.7642 (1.1189) acc 71.8750 (71.9271) lr 7.5131e-04 eta 2:21:44 +epoch [31/50] batch [425/500] time 0.867 (0.888) data 0.000 (0.002) loss 0.9219 (1.1167) acc 78.1250 (71.9632) lr 7.5131e-04 eta 2:21:41 +epoch [31/50] batch [430/500] time 0.908 (0.888) data 0.000 (0.002) loss 0.9302 (1.1157) acc 87.5000 (71.9985) lr 7.5131e-04 eta 2:21:37 +epoch [31/50] batch [435/500] time 0.919 (0.888) data 0.000 (0.002) loss 0.9268 (1.1119) acc 81.2500 (72.0905) lr 7.5131e-04 eta 2:21:33 +epoch [31/50] batch [440/500] time 0.896 (0.888) data 0.000 (0.002) loss 1.3438 (1.1128) acc 53.1250 (72.0455) lr 7.5131e-04 eta 2:21:30 +epoch [31/50] batch [445/500] time 0.867 (0.888) data 0.000 (0.002) loss 0.9478 (1.1130) acc 71.8750 (72.0506) lr 7.5131e-04 eta 2:21:27 +epoch [31/50] batch [450/500] time 0.893 (0.888) data 0.000 (0.002) loss 1.3008 (1.1123) acc 68.7500 (72.0486) lr 7.5131e-04 eta 2:21:24 +epoch [31/50] batch [455/500] time 0.899 (0.888) data 0.000 (0.002) loss 1.1680 (1.1116) acc 68.7500 (72.0742) lr 7.5131e-04 eta 2:21:20 +epoch [31/50] batch [460/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.0029 (1.1138) acc 84.3750 (72.0720) lr 7.5131e-04 eta 2:21:14 +epoch [31/50] batch [465/500] time 0.899 (0.888) data 0.000 (0.002) loss 1.5186 (1.1142) acc 59.3750 (72.0497) lr 7.5131e-04 eta 2:21:10 +epoch [31/50] batch [470/500] time 0.860 (0.888) data 0.000 (0.002) loss 1.3213 (1.1145) acc 68.7500 (72.0479) lr 7.5131e-04 eta 2:21:05 +epoch [31/50] batch [475/500] time 0.915 (0.888) data 0.000 (0.002) loss 0.3916 (1.1116) acc 87.5000 (72.1118) lr 7.5131e-04 eta 2:21:00 +epoch [31/50] batch [480/500] time 0.885 (0.888) data 0.000 (0.002) loss 0.7017 (1.1102) acc 81.2500 (72.1615) lr 7.5131e-04 eta 2:20:56 +epoch [31/50] batch [485/500] time 0.878 (0.888) data 0.001 (0.002) loss 1.4961 (1.1122) acc 68.7500 (72.1263) lr 7.5131e-04 eta 2:20:52 +epoch [31/50] batch [490/500] time 0.870 (0.888) data 0.000 (0.002) loss 1.0889 (1.1125) acc 62.5000 (72.0855) lr 7.5131e-04 eta 2:20:47 +epoch [31/50] batch [495/500] time 0.861 (0.888) data 0.000 (0.002) loss 1.0107 (1.1104) acc 78.1250 (72.1717) lr 7.5131e-04 eta 2:20:41 +epoch [31/50] batch [500/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.1797 (1.1090) acc 71.8750 (72.2062) lr 6.9098e-04 eta 2:20:38 +epoch [32/50] batch [5/500] time 0.902 (1.048) data 0.001 (0.123) loss 0.9927 (0.9703) acc 78.1250 (73.1250) lr 6.9098e-04 eta 2:45:50 +epoch [32/50] batch [10/500] time 0.888 (0.965) data 0.000 (0.062) loss 0.9873 (1.0657) acc 68.7500 (70.6250) lr 6.9098e-04 eta 2:32:34 +epoch [32/50] batch [15/500] time 0.915 (0.939) data 0.000 (0.041) loss 1.6143 (1.1300) acc 65.6250 (69.7917) lr 6.9098e-04 eta 2:28:28 +epoch [32/50] batch [20/500] time 0.900 (0.926) data 0.000 (0.031) loss 1.5420 (1.0987) acc 65.6250 (71.4062) lr 6.9098e-04 eta 2:26:16 +epoch [32/50] batch [25/500] time 0.908 (0.918) data 0.000 (0.025) loss 1.0547 (1.0458) acc 75.0000 (72.7500) lr 6.9098e-04 eta 2:25:02 +epoch [32/50] batch [30/500] time 0.907 (0.913) data 0.000 (0.021) loss 1.4854 (1.0692) acc 62.5000 (72.5000) lr 6.9098e-04 eta 2:24:06 +epoch [32/50] batch [35/500] time 0.912 (0.909) data 0.000 (0.018) loss 0.7246 (1.0502) acc 81.2500 (73.4821) lr 6.9098e-04 eta 2:23:25 +epoch [32/50] batch [40/500] time 0.895 (0.909) data 0.000 (0.016) loss 0.9399 (1.0456) acc 81.2500 (73.5938) lr 6.9098e-04 eta 2:23:22 +epoch [32/50] batch [45/500] time 0.874 (0.906) data 0.000 (0.014) loss 1.4424 (1.0593) acc 84.3750 (73.7500) lr 6.9098e-04 eta 2:22:46 +epoch [32/50] batch [50/500] time 0.880 (0.904) data 0.000 (0.013) loss 1.3418 (1.0773) acc 68.7500 (73.5000) lr 6.9098e-04 eta 2:22:27 +epoch [32/50] batch [55/500] time 0.901 (0.904) data 0.000 (0.011) loss 0.8154 (1.0810) acc 81.2500 (73.2955) lr 6.9098e-04 eta 2:22:20 +epoch [32/50] batch [60/500] time 0.886 (0.903) data 0.000 (0.010) loss 1.1738 (1.0882) acc 75.0000 (73.1771) lr 6.9098e-04 eta 2:22:05 +epoch [32/50] batch [65/500] time 0.876 (0.903) data 0.000 (0.010) loss 1.7773 (1.0880) acc 68.7500 (73.4135) lr 6.9098e-04 eta 2:22:03 +epoch [32/50] batch [70/500] time 0.903 (0.902) data 0.000 (0.009) loss 0.9209 (1.0981) acc 81.2500 (73.3036) lr 6.9098e-04 eta 2:21:45 +epoch [32/50] batch [75/500] time 0.896 (0.901) data 0.000 (0.008) loss 0.3982 (1.0920) acc 84.3750 (73.2083) lr 6.9098e-04 eta 2:21:34 +epoch [32/50] batch [80/500] time 0.863 (0.900) data 0.000 (0.008) loss 1.7871 (1.0972) acc 53.1250 (73.0859) lr 6.9098e-04 eta 2:21:17 +epoch [32/50] batch [85/500] time 0.877 (0.898) data 0.000 (0.007) loss 1.3457 (1.1125) acc 68.7500 (72.7574) lr 6.9098e-04 eta 2:20:56 +epoch [32/50] batch [90/500] time 0.861 (0.897) data 0.000 (0.007) loss 1.2637 (1.1173) acc 81.2500 (72.7083) lr 6.9098e-04 eta 2:20:44 +epoch [32/50] batch [95/500] time 0.862 (0.897) data 0.000 (0.007) loss 0.8604 (1.1180) acc 75.0000 (72.4013) lr 6.9098e-04 eta 2:20:36 +epoch [32/50] batch [100/500] time 0.886 (0.897) data 0.000 (0.006) loss 1.1230 (1.1203) acc 71.8750 (72.3438) lr 6.9098e-04 eta 2:20:28 +epoch [32/50] batch [105/500] time 0.896 (0.896) data 0.000 (0.006) loss 0.8579 (1.1141) acc 81.2500 (72.5298) lr 6.9098e-04 eta 2:20:16 +epoch [32/50] batch [110/500] time 0.884 (0.896) data 0.000 (0.006) loss 0.9331 (1.1097) acc 75.0000 (72.6420) lr 6.9098e-04 eta 2:20:15 +epoch [32/50] batch [115/500] time 0.902 (0.896) data 0.000 (0.006) loss 1.8115 (1.1188) acc 68.7500 (72.4728) lr 6.9098e-04 eta 2:20:09 +epoch [32/50] batch [120/500] time 0.880 (0.896) data 0.000 (0.005) loss 1.3701 (1.1209) acc 62.5000 (72.3438) lr 6.9098e-04 eta 2:20:03 +epoch [32/50] batch [125/500] time 0.888 (0.895) data 0.000 (0.005) loss 1.3486 (1.1222) acc 62.5000 (72.1500) lr 6.9098e-04 eta 2:19:54 +epoch [32/50] batch [130/500] time 0.847 (0.895) data 0.000 (0.005) loss 0.9131 (1.1128) acc 75.0000 (72.3798) lr 6.9098e-04 eta 2:19:41 +epoch [32/50] batch [135/500] time 0.901 (0.894) data 0.001 (0.005) loss 0.6841 (1.1079) acc 87.5000 (72.4306) lr 6.9098e-04 eta 2:19:35 +epoch [32/50] batch [140/500] time 0.882 (0.893) data 0.000 (0.005) loss 1.2988 (1.1084) acc 68.7500 (72.4330) lr 6.9098e-04 eta 2:19:19 +epoch [32/50] batch [145/500] time 0.872 (0.892) data 0.000 (0.005) loss 1.8057 (1.1093) acc 65.6250 (72.4138) lr 6.9098e-04 eta 2:19:09 +epoch [32/50] batch [150/500] time 0.883 (0.892) data 0.000 (0.004) loss 0.7280 (1.1000) acc 81.2500 (72.5208) lr 6.9098e-04 eta 2:18:59 +epoch [32/50] batch [155/500] time 0.876 (0.892) data 0.000 (0.004) loss 1.5459 (1.1080) acc 56.2500 (72.2379) lr 6.9098e-04 eta 2:18:52 +epoch [32/50] batch [160/500] time 0.886 (0.892) data 0.000 (0.004) loss 0.9258 (1.1127) acc 65.6250 (71.9727) lr 6.9098e-04 eta 2:18:48 +epoch [32/50] batch [165/500] time 0.879 (0.892) data 0.000 (0.004) loss 0.8462 (1.1068) acc 68.7500 (72.0455) lr 6.9098e-04 eta 2:18:47 +epoch [32/50] batch [170/500] time 0.924 (0.892) data 0.000 (0.004) loss 1.0605 (1.1057) acc 71.8750 (72.0588) lr 6.9098e-04 eta 2:18:43 +epoch [32/50] batch [175/500] time 0.857 (0.892) data 0.000 (0.004) loss 1.1582 (1.1034) acc 62.5000 (72.1071) lr 6.9098e-04 eta 2:18:38 +epoch [32/50] batch [180/500] time 0.850 (0.892) data 0.000 (0.004) loss 1.0596 (1.1012) acc 78.1250 (72.2222) lr 6.9098e-04 eta 2:18:32 +epoch [32/50] batch [185/500] time 0.867 (0.892) data 0.000 (0.004) loss 1.1162 (1.1001) acc 71.8750 (72.2128) lr 6.9098e-04 eta 2:18:28 +epoch [32/50] batch [190/500] time 0.897 (0.892) data 0.000 (0.003) loss 0.8477 (1.0977) acc 78.1250 (72.3026) lr 6.9098e-04 eta 2:18:23 +epoch [32/50] batch [195/500] time 0.912 (0.892) data 0.000 (0.003) loss 1.1768 (1.0967) acc 59.3750 (72.1955) lr 6.9098e-04 eta 2:18:20 +epoch [32/50] batch [200/500] time 0.890 (0.892) data 0.000 (0.003) loss 1.0059 (1.0931) acc 68.7500 (72.2969) lr 6.9098e-04 eta 2:18:18 +epoch [32/50] batch [205/500] time 1.009 (0.892) data 0.000 (0.003) loss 1.3057 (1.0931) acc 75.0000 (72.3933) lr 6.9098e-04 eta 2:18:13 +epoch [32/50] batch [210/500] time 0.869 (0.892) data 0.000 (0.003) loss 1.3252 (1.0939) acc 68.7500 (72.3810) lr 6.9098e-04 eta 2:18:09 +epoch [32/50] batch [215/500] time 0.912 (0.893) data 0.000 (0.003) loss 0.8877 (1.0926) acc 75.0000 (72.3983) lr 6.9098e-04 eta 2:18:07 +epoch [32/50] batch [220/500] time 0.866 (0.893) data 0.000 (0.003) loss 1.8848 (1.0954) acc 53.1250 (72.3438) lr 6.9098e-04 eta 2:18:03 +epoch [32/50] batch [225/500] time 0.895 (0.893) data 0.000 (0.003) loss 2.2246 (1.0967) acc 56.2500 (72.2639) lr 6.9098e-04 eta 2:17:58 +epoch [32/50] batch [230/500] time 0.886 (0.892) data 0.000 (0.003) loss 1.4590 (1.0973) acc 50.0000 (72.1467) lr 6.9098e-04 eta 2:17:49 +epoch [32/50] batch [235/500] time 0.874 (0.892) data 0.000 (0.003) loss 0.8169 (1.0894) acc 68.7500 (72.3005) lr 6.9098e-04 eta 2:17:43 +epoch [32/50] batch [240/500] time 0.888 (0.892) data 0.000 (0.003) loss 1.6885 (1.0913) acc 53.1250 (72.2266) lr 6.9098e-04 eta 2:17:35 +epoch [32/50] batch [245/500] time 0.860 (0.891) data 0.000 (0.003) loss 1.6807 (1.0966) acc 71.8750 (72.2066) lr 6.9098e-04 eta 2:17:29 +epoch [32/50] batch [250/500] time 0.871 (0.891) data 0.000 (0.003) loss 0.7407 (1.0925) acc 71.8750 (72.2125) lr 6.9098e-04 eta 2:17:24 +epoch [32/50] batch [255/500] time 0.886 (0.891) data 0.000 (0.003) loss 1.0391 (1.0917) acc 62.5000 (72.2181) lr 6.9098e-04 eta 2:17:20 +epoch [32/50] batch [260/500] time 0.900 (0.891) data 0.000 (0.003) loss 1.1963 (1.0948) acc 71.8750 (72.1394) lr 6.9098e-04 eta 2:17:16 +epoch [32/50] batch [265/500] time 0.881 (0.891) data 0.000 (0.003) loss 0.8755 (1.0940) acc 75.0000 (72.2288) lr 6.9098e-04 eta 2:17:11 +epoch [32/50] batch [270/500] time 0.859 (0.891) data 0.000 (0.003) loss 1.1113 (1.0902) acc 75.0000 (72.3148) lr 6.9098e-04 eta 2:17:04 +epoch [32/50] batch [275/500] time 0.865 (0.891) data 0.000 (0.002) loss 0.8838 (1.0881) acc 78.1250 (72.3523) lr 6.9098e-04 eta 2:16:58 +epoch [32/50] batch [280/500] time 0.907 (0.891) data 0.000 (0.002) loss 1.2695 (1.0919) acc 59.3750 (72.2098) lr 6.9098e-04 eta 2:16:55 +epoch [32/50] batch [285/500] time 0.903 (0.891) data 0.000 (0.002) loss 0.7485 (1.0859) acc 84.3750 (72.3684) lr 6.9098e-04 eta 2:16:50 +epoch [32/50] batch [290/500] time 0.911 (0.891) data 0.000 (0.002) loss 1.7402 (1.0940) acc 53.1250 (72.2198) lr 6.9098e-04 eta 2:16:45 +epoch [32/50] batch [295/500] time 0.889 (0.891) data 0.000 (0.002) loss 1.1865 (1.0973) acc 62.5000 (72.1610) lr 6.9098e-04 eta 2:16:38 +epoch [32/50] batch [300/500] time 0.908 (0.891) data 0.000 (0.002) loss 1.1367 (1.0978) acc 75.0000 (72.2083) lr 6.9098e-04 eta 2:16:34 +epoch [32/50] batch [305/500] time 0.864 (0.890) data 0.000 (0.002) loss 1.0674 (1.0980) acc 71.8750 (72.2234) lr 6.9098e-04 eta 2:16:26 +epoch [32/50] batch [310/500] time 0.849 (0.890) data 0.000 (0.002) loss 1.3047 (1.1014) acc 75.0000 (72.1774) lr 6.9098e-04 eta 2:16:21 +epoch [32/50] batch [315/500] time 0.874 (0.890) data 0.000 (0.002) loss 0.8696 (1.0973) acc 75.0000 (72.2520) lr 6.9098e-04 eta 2:16:15 +epoch [32/50] batch [320/500] time 0.860 (0.890) data 0.000 (0.002) loss 1.0947 (1.1014) acc 78.1250 (72.1484) lr 6.9098e-04 eta 2:16:07 +epoch [32/50] batch [325/500] time 0.876 (0.890) data 0.000 (0.002) loss 0.5928 (1.0991) acc 81.2500 (72.2308) lr 6.9098e-04 eta 2:16:02 +epoch [32/50] batch [330/500] time 0.864 (0.890) data 0.000 (0.002) loss 0.7637 (1.1003) acc 78.1250 (72.2348) lr 6.9098e-04 eta 2:15:57 +epoch [32/50] batch [335/500] time 0.884 (0.889) data 0.000 (0.002) loss 0.7881 (1.0993) acc 84.3750 (72.2668) lr 6.9098e-04 eta 2:15:51 +epoch [32/50] batch [340/500] time 0.868 (0.889) data 0.000 (0.002) loss 1.4277 (1.1001) acc 78.1250 (72.2794) lr 6.9098e-04 eta 2:15:46 +epoch [32/50] batch [345/500] time 0.874 (0.889) data 0.000 (0.002) loss 0.9170 (1.0974) acc 75.0000 (72.3098) lr 6.9098e-04 eta 2:15:39 +epoch [32/50] batch [350/500] time 0.885 (0.889) data 0.000 (0.002) loss 1.5410 (1.0995) acc 75.0000 (72.2589) lr 6.9098e-04 eta 2:15:38 +epoch [32/50] batch [355/500] time 0.907 (0.889) data 0.000 (0.002) loss 0.8770 (1.0959) acc 81.2500 (72.3239) lr 6.9098e-04 eta 2:15:33 +epoch [32/50] batch [360/500] time 0.902 (0.890) data 0.000 (0.002) loss 1.1152 (1.0977) acc 65.6250 (72.2309) lr 6.9098e-04 eta 2:15:30 +epoch [32/50] batch [365/500] time 0.871 (0.889) data 0.000 (0.002) loss 1.2148 (1.0987) acc 59.3750 (72.2003) lr 6.9098e-04 eta 2:15:23 +epoch [32/50] batch [370/500] time 0.854 (0.889) data 0.000 (0.002) loss 1.0811 (1.0968) acc 81.2500 (72.2720) lr 6.9098e-04 eta 2:15:16 +epoch [32/50] batch [375/500] time 0.857 (0.889) data 0.000 (0.002) loss 1.2988 (1.0984) acc 68.7500 (72.2000) lr 6.9098e-04 eta 2:15:09 +epoch [32/50] batch [380/500] time 0.912 (0.889) data 0.000 (0.002) loss 1.7061 (1.1005) acc 50.0000 (72.1382) lr 6.9098e-04 eta 2:15:04 +epoch [32/50] batch [385/500] time 0.916 (0.889) data 0.000 (0.002) loss 1.2021 (1.0976) acc 65.6250 (72.1997) lr 6.9098e-04 eta 2:14:59 +epoch [32/50] batch [390/500] time 0.874 (0.889) data 0.000 (0.002) loss 0.7700 (1.0949) acc 68.7500 (72.2596) lr 6.9098e-04 eta 2:14:54 +epoch [32/50] batch [395/500] time 0.878 (0.889) data 0.000 (0.002) loss 0.7207 (1.0952) acc 75.0000 (72.2073) lr 6.9098e-04 eta 2:14:53 +epoch [32/50] batch [400/500] time 0.921 (0.889) data 0.000 (0.002) loss 0.9609 (1.0964) acc 78.1250 (72.1953) lr 6.9098e-04 eta 2:14:48 +epoch [32/50] batch [405/500] time 0.885 (0.889) data 0.000 (0.002) loss 0.8423 (1.0964) acc 68.7500 (72.1836) lr 6.9098e-04 eta 2:14:44 +epoch [32/50] batch [410/500] time 0.883 (0.889) data 0.000 (0.002) loss 1.1328 (1.0945) acc 75.0000 (72.2104) lr 6.9098e-04 eta 2:14:40 +epoch [32/50] batch [415/500] time 0.902 (0.889) data 0.000 (0.002) loss 1.0967 (1.0927) acc 71.8750 (72.2515) lr 6.9098e-04 eta 2:14:37 +epoch [32/50] batch [420/500] time 0.896 (0.889) data 0.000 (0.002) loss 1.8770 (1.0919) acc 59.3750 (72.2917) lr 6.9098e-04 eta 2:14:33 +epoch [32/50] batch [425/500] time 0.883 (0.889) data 0.000 (0.002) loss 1.2471 (1.0945) acc 59.3750 (72.2353) lr 6.9098e-04 eta 2:14:28 +epoch [32/50] batch [430/500] time 0.886 (0.889) data 0.000 (0.002) loss 1.8389 (1.0965) acc 53.1250 (72.1730) lr 6.9098e-04 eta 2:14:22 +epoch [32/50] batch [435/500] time 0.905 (0.889) data 0.000 (0.002) loss 1.7793 (1.0988) acc 71.8750 (72.1408) lr 6.9098e-04 eta 2:14:18 +epoch [32/50] batch [440/500] time 0.892 (0.889) data 0.000 (0.002) loss 0.7275 (1.0988) acc 81.2500 (72.1662) lr 6.9098e-04 eta 2:14:13 +epoch [32/50] batch [445/500] time 0.872 (0.889) data 0.000 (0.002) loss 1.1865 (1.0974) acc 81.2500 (72.2542) lr 6.9098e-04 eta 2:14:09 +epoch [32/50] batch [450/500] time 0.865 (0.889) data 0.000 (0.002) loss 0.8257 (1.0959) acc 87.5000 (72.3403) lr 6.9098e-04 eta 2:14:04 +epoch [32/50] batch [455/500] time 0.920 (0.889) data 0.000 (0.002) loss 1.4473 (1.0961) acc 65.6250 (72.3352) lr 6.9098e-04 eta 2:13:59 +epoch [32/50] batch [460/500] time 0.883 (0.889) data 0.000 (0.002) loss 1.2686 (1.0936) acc 62.5000 (72.3913) lr 6.9098e-04 eta 2:13:55 +epoch [32/50] batch [465/500] time 0.864 (0.889) data 0.000 (0.002) loss 1.0615 (1.0952) acc 71.8750 (72.3589) lr 6.9098e-04 eta 2:13:49 +epoch [32/50] batch [470/500] time 0.878 (0.889) data 0.000 (0.002) loss 1.1348 (1.0964) acc 68.7500 (72.3471) lr 6.9098e-04 eta 2:13:44 +epoch [32/50] batch [475/500] time 0.897 (0.889) data 0.000 (0.002) loss 1.2852 (1.0960) acc 62.5000 (72.3224) lr 6.9098e-04 eta 2:13:40 +epoch [32/50] batch [480/500] time 0.920 (0.889) data 0.000 (0.002) loss 1.9189 (1.0974) acc 68.7500 (72.3372) lr 6.9098e-04 eta 2:13:35 +epoch [32/50] batch [485/500] time 0.902 (0.889) data 0.000 (0.002) loss 1.2725 (1.0993) acc 71.8750 (72.3325) lr 6.9098e-04 eta 2:13:31 +epoch [32/50] batch [490/500] time 0.905 (0.889) data 0.000 (0.001) loss 1.0820 (1.0977) acc 71.8750 (72.3533) lr 6.9098e-04 eta 2:13:28 +epoch [32/50] batch [495/500] time 0.869 (0.889) data 0.000 (0.001) loss 0.9121 (1.0981) acc 81.2500 (72.3737) lr 6.9098e-04 eta 2:13:26 +epoch [32/50] batch [500/500] time 0.929 (0.889) data 0.000 (0.001) loss 1.1514 (1.0979) acc 75.0000 (72.3937) lr 6.3188e-04 eta 2:13:22 +epoch [33/50] batch [5/500] time 0.882 (1.011) data 0.000 (0.124) loss 0.8999 (1.0192) acc 78.1250 (76.2500) lr 6.3188e-04 eta 2:31:35 +epoch [33/50] batch [10/500] time 0.897 (0.950) data 0.000 (0.062) loss 1.3936 (1.0940) acc 68.7500 (74.0625) lr 6.3188e-04 eta 2:22:24 +epoch [33/50] batch [15/500] time 0.913 (0.934) data 0.000 (0.041) loss 0.9023 (1.1522) acc 71.8750 (72.5000) lr 6.3188e-04 eta 2:19:55 +epoch [33/50] batch [20/500] time 0.878 (0.922) data 0.000 (0.031) loss 1.1982 (1.1228) acc 71.8750 (72.5000) lr 6.3188e-04 eta 2:17:57 +epoch [33/50] batch [25/500] time 0.883 (0.915) data 0.000 (0.025) loss 0.9233 (1.1103) acc 75.0000 (72.0000) lr 6.3188e-04 eta 2:16:54 +epoch [33/50] batch [30/500] time 0.903 (0.912) data 0.000 (0.021) loss 0.7275 (1.1018) acc 90.6250 (73.0208) lr 6.3188e-04 eta 2:16:23 +epoch [33/50] batch [35/500] time 0.882 (0.910) data 0.000 (0.018) loss 1.1426 (1.1112) acc 75.0000 (72.6786) lr 6.3188e-04 eta 2:15:54 +epoch [33/50] batch [40/500] time 0.868 (0.906) data 0.000 (0.016) loss 1.7500 (1.1221) acc 56.2500 (71.9531) lr 6.3188e-04 eta 2:15:20 +epoch [33/50] batch [45/500] time 0.866 (0.904) data 0.000 (0.014) loss 1.0156 (1.1142) acc 68.7500 (71.8750) lr 6.3188e-04 eta 2:14:56 +epoch [33/50] batch [50/500] time 0.876 (0.905) data 0.000 (0.013) loss 0.7305 (1.1183) acc 81.2500 (72.0000) lr 6.3188e-04 eta 2:15:02 +epoch [33/50] batch [55/500] time 0.904 (0.904) data 0.000 (0.011) loss 0.5791 (1.0973) acc 75.0000 (72.6136) lr 6.3188e-04 eta 2:14:46 +epoch [33/50] batch [60/500] time 0.902 (0.902) data 0.000 (0.011) loss 0.8613 (1.0976) acc 71.8750 (72.4479) lr 6.3188e-04 eta 2:14:24 +epoch [33/50] batch [65/500] time 0.863 (0.901) data 0.000 (0.010) loss 0.9292 (1.0702) acc 81.2500 (73.3654) lr 6.3188e-04 eta 2:14:09 +epoch [33/50] batch [70/500] time 0.896 (0.899) data 0.000 (0.009) loss 1.4570 (1.0920) acc 65.6250 (73.3482) lr 6.3188e-04 eta 2:13:51 +epoch [33/50] batch [75/500] time 0.870 (0.898) data 0.000 (0.008) loss 1.1611 (1.0889) acc 71.8750 (73.2917) lr 6.3188e-04 eta 2:13:32 +epoch [33/50] batch [80/500] time 0.885 (0.897) data 0.000 (0.008) loss 1.5195 (1.0964) acc 62.5000 (73.1641) lr 6.3188e-04 eta 2:13:22 +epoch [33/50] batch [85/500] time 0.883 (0.895) data 0.000 (0.008) loss 1.3584 (1.0976) acc 71.8750 (73.1618) lr 6.3188e-04 eta 2:13:01 +epoch [33/50] batch [90/500] time 0.895 (0.896) data 0.000 (0.007) loss 1.1270 (1.1042) acc 65.6250 (72.9514) lr 6.3188e-04 eta 2:12:59 +epoch [33/50] batch [95/500] time 0.883 (0.897) data 0.000 (0.007) loss 1.2930 (1.0987) acc 65.6250 (73.0263) lr 6.3188e-04 eta 2:13:05 +epoch [33/50] batch [100/500] time 0.891 (0.896) data 0.000 (0.006) loss 1.6895 (1.0985) acc 68.7500 (72.9688) lr 6.3188e-04 eta 2:12:56 +epoch [33/50] batch [105/500] time 0.902 (0.895) data 0.000 (0.006) loss 1.0957 (1.1020) acc 75.0000 (72.8274) lr 6.3188e-04 eta 2:12:41 +epoch [33/50] batch [110/500] time 0.877 (0.894) data 0.000 (0.006) loss 1.0635 (1.1022) acc 62.5000 (72.7273) lr 6.3188e-04 eta 2:12:31 +epoch [33/50] batch [115/500] time 0.903 (0.894) data 0.000 (0.006) loss 0.8755 (1.0975) acc 81.2500 (72.7446) lr 6.3188e-04 eta 2:12:26 +epoch [33/50] batch [120/500] time 0.901 (0.894) data 0.000 (0.005) loss 1.1797 (1.1020) acc 65.6250 (72.6042) lr 6.3188e-04 eta 2:12:17 +epoch [33/50] batch [125/500] time 0.884 (0.894) data 0.000 (0.005) loss 0.6479 (1.1019) acc 81.2500 (72.7000) lr 6.3188e-04 eta 2:12:10 +epoch [33/50] batch [130/500] time 0.857 (0.893) data 0.000 (0.005) loss 1.0381 (1.0998) acc 78.1250 (72.6683) lr 6.3188e-04 eta 2:11:56 +epoch [33/50] batch [135/500] time 0.865 (0.892) data 0.000 (0.005) loss 1.6240 (1.1148) acc 65.6250 (72.3843) lr 6.3188e-04 eta 2:11:48 +epoch [33/50] batch [140/500] time 0.850 (0.892) data 0.000 (0.005) loss 1.4512 (1.1117) acc 62.5000 (72.3884) lr 6.3188e-04 eta 2:11:38 +epoch [33/50] batch [145/500] time 0.900 (0.891) data 0.000 (0.004) loss 1.5547 (1.1151) acc 53.1250 (72.2414) lr 6.3188e-04 eta 2:11:33 +epoch [33/50] batch [150/500] time 0.848 (0.890) data 0.000 (0.004) loss 1.1885 (1.1196) acc 65.6250 (72.1667) lr 6.3188e-04 eta 2:11:20 +epoch [33/50] batch [155/500] time 0.888 (0.890) data 0.000 (0.004) loss 1.0371 (1.1246) acc 71.8750 (71.9960) lr 6.3188e-04 eta 2:11:13 +epoch [33/50] batch [160/500] time 0.886 (0.890) data 0.000 (0.004) loss 0.8003 (1.1173) acc 81.2500 (72.2656) lr 6.3188e-04 eta 2:11:07 +epoch [33/50] batch [165/500] time 0.895 (0.890) data 0.000 (0.004) loss 0.5542 (1.1170) acc 87.5000 (72.3106) lr 6.3188e-04 eta 2:11:02 +epoch [33/50] batch [170/500] time 0.903 (0.889) data 0.000 (0.004) loss 1.3896 (1.1197) acc 68.7500 (72.2059) lr 6.3188e-04 eta 2:10:52 +epoch [33/50] batch [175/500] time 0.878 (0.889) data 0.000 (0.004) loss 0.8945 (1.1158) acc 75.0000 (72.3036) lr 6.3188e-04 eta 2:10:43 +epoch [33/50] batch [180/500] time 0.868 (0.888) data 0.000 (0.004) loss 1.2871 (1.1116) acc 68.7500 (72.4826) lr 6.3188e-04 eta 2:10:35 +epoch [33/50] batch [185/500] time 0.878 (0.888) data 0.000 (0.004) loss 1.3320 (1.1114) acc 62.5000 (72.4831) lr 6.3188e-04 eta 2:10:29 +epoch [33/50] batch [190/500] time 0.976 (0.888) data 0.000 (0.003) loss 1.3311 (1.1129) acc 68.7500 (72.4671) lr 6.3188e-04 eta 2:10:27 +epoch [33/50] batch [195/500] time 0.871 (0.888) data 0.000 (0.003) loss 0.8047 (1.1087) acc 81.2500 (72.4840) lr 6.3188e-04 eta 2:10:21 +epoch [33/50] batch [200/500] time 0.862 (0.888) data 0.000 (0.003) loss 1.2520 (1.1102) acc 68.7500 (72.4375) lr 6.3188e-04 eta 2:10:13 +epoch [33/50] batch [205/500] time 0.868 (0.888) data 0.000 (0.003) loss 0.7183 (1.1076) acc 71.8750 (72.4543) lr 6.3188e-04 eta 2:10:08 +epoch [33/50] batch [210/500] time 0.892 (0.888) data 0.000 (0.003) loss 1.3896 (1.1064) acc 68.7500 (72.5000) lr 6.3188e-04 eta 2:10:03 +epoch [33/50] batch [215/500] time 0.928 (0.888) data 0.000 (0.003) loss 1.2988 (1.1103) acc 65.6250 (72.4709) lr 6.3188e-04 eta 2:10:01 +epoch [33/50] batch [220/500] time 0.841 (0.888) data 0.000 (0.003) loss 1.4316 (1.1075) acc 59.3750 (72.4716) lr 6.3188e-04 eta 2:09:52 +epoch [33/50] batch [225/500] time 0.907 (0.888) data 0.000 (0.003) loss 1.4658 (1.1084) acc 56.2500 (72.4167) lr 6.3188e-04 eta 2:09:48 +epoch [33/50] batch [230/500] time 0.868 (0.888) data 0.000 (0.003) loss 0.8662 (1.1072) acc 78.1250 (72.4728) lr 6.3188e-04 eta 2:09:46 +epoch [33/50] batch [235/500] time 0.886 (0.888) data 0.000 (0.003) loss 0.9678 (1.1115) acc 71.8750 (72.4069) lr 6.3188e-04 eta 2:09:45 +epoch [33/50] batch [240/500] time 0.883 (0.888) data 0.000 (0.003) loss 1.1934 (1.1148) acc 59.3750 (72.2656) lr 6.3188e-04 eta 2:09:40 +epoch [33/50] batch [245/500] time 0.897 (0.888) data 0.000 (0.003) loss 0.8813 (1.1155) acc 75.0000 (72.3214) lr 6.3188e-04 eta 2:09:37 +epoch [33/50] batch [250/500] time 0.867 (0.888) data 0.000 (0.003) loss 0.8027 (1.1126) acc 75.0000 (72.3500) lr 6.3188e-04 eta 2:09:28 +epoch [33/50] batch [255/500] time 0.884 (0.888) data 0.000 (0.003) loss 1.2988 (1.1143) acc 68.7500 (72.3407) lr 6.3188e-04 eta 2:09:23 +epoch [33/50] batch [260/500] time 0.883 (0.888) data 0.000 (0.003) loss 1.0703 (1.1127) acc 81.2500 (72.3918) lr 6.3188e-04 eta 2:09:18 +epoch [33/50] batch [265/500] time 0.915 (0.888) data 0.000 (0.003) loss 1.2148 (1.1089) acc 75.0000 (72.5000) lr 6.3188e-04 eta 2:09:17 +epoch [33/50] batch [270/500] time 0.898 (0.888) data 0.000 (0.003) loss 1.0723 (1.1068) acc 75.0000 (72.5000) lr 6.3188e-04 eta 2:09:11 +epoch [33/50] batch [275/500] time 0.904 (0.888) data 0.000 (0.002) loss 1.3457 (1.1088) acc 59.3750 (72.3977) lr 6.3188e-04 eta 2:09:07 +epoch [33/50] batch [280/500] time 0.893 (0.888) data 0.000 (0.002) loss 0.7808 (1.1092) acc 78.1250 (72.3772) lr 6.3188e-04 eta 2:09:06 +epoch [33/50] batch [285/500] time 0.896 (0.888) data 0.000 (0.002) loss 0.6484 (1.1086) acc 81.2500 (72.3904) lr 6.3188e-04 eta 2:08:59 +epoch [33/50] batch [290/500] time 0.862 (0.888) data 0.000 (0.002) loss 1.0244 (1.1051) acc 78.1250 (72.5108) lr 6.3188e-04 eta 2:08:54 +epoch [33/50] batch [295/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.1006 (1.1040) acc 65.6250 (72.5212) lr 6.3188e-04 eta 2:08:48 +epoch [33/50] batch [300/500] time 0.896 (0.888) data 0.000 (0.002) loss 1.3340 (1.1040) acc 62.5000 (72.5000) lr 6.3188e-04 eta 2:08:44 +epoch [33/50] batch [305/500] time 0.867 (0.888) data 0.000 (0.002) loss 1.0742 (1.1042) acc 75.0000 (72.5205) lr 6.3188e-04 eta 2:08:39 +epoch [33/50] batch [310/500] time 0.889 (0.888) data 0.000 (0.002) loss 0.8730 (1.1034) acc 71.8750 (72.4899) lr 6.3188e-04 eta 2:08:34 +epoch [33/50] batch [315/500] time 0.896 (0.888) data 0.000 (0.002) loss 0.6260 (1.0995) acc 78.1250 (72.5198) lr 6.3188e-04 eta 2:08:31 +epoch [33/50] batch [320/500] time 0.867 (0.888) data 0.000 (0.002) loss 0.8384 (1.0986) acc 87.5000 (72.5391) lr 6.3188e-04 eta 2:08:26 +epoch [33/50] batch [325/500] time 0.861 (0.888) data 0.000 (0.002) loss 1.0273 (1.0980) acc 71.8750 (72.5962) lr 6.3188e-04 eta 2:08:21 +epoch [33/50] batch [330/500] time 0.881 (0.887) data 0.000 (0.002) loss 0.9497 (1.0955) acc 65.6250 (72.6042) lr 6.3188e-04 eta 2:08:13 +epoch [33/50] batch [335/500] time 0.857 (0.888) data 0.000 (0.002) loss 0.9048 (1.0933) acc 78.1250 (72.6399) lr 6.3188e-04 eta 2:08:10 +epoch [33/50] batch [340/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.7666 (1.0895) acc 81.2500 (72.7390) lr 6.3188e-04 eta 2:08:05 +epoch [33/50] batch [345/500] time 0.900 (0.887) data 0.000 (0.002) loss 1.2500 (1.0897) acc 65.6250 (72.7536) lr 6.3188e-04 eta 2:08:00 +epoch [33/50] batch [350/500] time 0.861 (0.887) data 0.000 (0.002) loss 1.1172 (1.0917) acc 84.3750 (72.7232) lr 6.3188e-04 eta 2:07:55 +epoch [33/50] batch [355/500] time 0.902 (0.887) data 0.000 (0.002) loss 1.4062 (1.0907) acc 65.6250 (72.7201) lr 6.3188e-04 eta 2:07:52 +epoch [33/50] batch [360/500] time 0.911 (0.887) data 0.000 (0.002) loss 1.1230 (1.0888) acc 71.8750 (72.7517) lr 6.3188e-04 eta 2:07:47 +epoch [33/50] batch [365/500] time 0.872 (0.887) data 0.000 (0.002) loss 0.9131 (1.0872) acc 81.2500 (72.7825) lr 6.3188e-04 eta 2:07:42 +epoch [33/50] batch [370/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.6191 (1.0917) acc 65.6250 (72.7111) lr 6.3188e-04 eta 2:07:37 +epoch [33/50] batch [375/500] time 0.860 (0.887) data 0.000 (0.002) loss 0.9404 (1.0931) acc 78.1250 (72.6750) lr 6.3188e-04 eta 2:07:32 +epoch [33/50] batch [380/500] time 0.861 (0.887) data 0.000 (0.002) loss 1.2080 (1.0900) acc 71.8750 (72.7056) lr 6.3188e-04 eta 2:07:28 +epoch [33/50] batch [385/500] time 0.883 (0.887) data 0.000 (0.002) loss 0.4031 (1.0906) acc 87.5000 (72.6623) lr 6.3188e-04 eta 2:07:24 +epoch [33/50] batch [390/500] time 0.891 (0.887) data 0.000 (0.002) loss 1.0381 (1.0897) acc 65.6250 (72.6683) lr 6.3188e-04 eta 2:07:18 +epoch [33/50] batch [395/500] time 0.874 (0.887) data 0.000 (0.002) loss 0.8252 (1.0886) acc 87.5000 (72.7136) lr 6.3188e-04 eta 2:07:13 +epoch [33/50] batch [400/500] time 0.859 (0.887) data 0.000 (0.002) loss 1.2793 (1.0893) acc 65.6250 (72.7109) lr 6.3188e-04 eta 2:07:07 +epoch [33/50] batch [405/500] time 0.852 (0.887) data 0.000 (0.002) loss 0.7905 (1.0861) acc 81.2500 (72.7546) lr 6.3188e-04 eta 2:07:02 +epoch [33/50] batch [410/500] time 0.870 (0.887) data 0.000 (0.002) loss 1.7832 (1.0885) acc 65.6250 (72.7439) lr 6.3188e-04 eta 2:06:58 +epoch [33/50] batch [415/500] time 0.880 (0.887) data 0.000 (0.002) loss 1.1631 (1.0861) acc 59.3750 (72.7636) lr 6.3188e-04 eta 2:06:53 +epoch [33/50] batch [420/500] time 0.872 (0.887) data 0.000 (0.002) loss 0.9258 (1.0847) acc 75.0000 (72.7827) lr 6.3188e-04 eta 2:06:49 +epoch [33/50] batch [425/500] time 0.871 (0.887) data 0.000 (0.002) loss 0.8447 (1.0837) acc 71.8750 (72.7500) lr 6.3188e-04 eta 2:06:44 +epoch [33/50] batch [430/500] time 0.847 (0.887) data 0.000 (0.002) loss 1.4883 (1.0842) acc 56.2500 (72.7108) lr 6.3188e-04 eta 2:06:40 +epoch [33/50] batch [435/500] time 0.865 (0.887) data 0.000 (0.002) loss 0.7373 (1.0826) acc 81.2500 (72.7227) lr 6.3188e-04 eta 2:06:35 +epoch [33/50] batch [440/500] time 0.861 (0.887) data 0.000 (0.002) loss 1.1123 (1.0834) acc 75.0000 (72.7486) lr 6.3188e-04 eta 2:06:29 +epoch [33/50] batch [445/500] time 0.877 (0.887) data 0.000 (0.002) loss 1.3838 (1.0837) acc 65.6250 (72.7247) lr 6.3188e-04 eta 2:06:25 +epoch [33/50] batch [450/500] time 0.860 (0.886) data 0.000 (0.002) loss 0.9717 (1.0835) acc 75.0000 (72.7431) lr 6.3188e-04 eta 2:06:19 +epoch [33/50] batch [455/500] time 0.863 (0.886) data 0.000 (0.002) loss 1.6807 (1.0822) acc 68.7500 (72.8159) lr 6.3188e-04 eta 2:06:13 +epoch [33/50] batch [460/500] time 0.899 (0.886) data 0.000 (0.002) loss 0.7666 (1.0809) acc 75.0000 (72.8193) lr 6.3188e-04 eta 2:06:09 +epoch [33/50] batch [465/500] time 0.873 (0.886) data 0.000 (0.002) loss 1.3770 (1.0813) acc 59.3750 (72.7688) lr 6.3188e-04 eta 2:06:03 +epoch [33/50] batch [470/500] time 0.867 (0.886) data 0.000 (0.002) loss 1.3076 (1.0833) acc 75.0000 (72.7593) lr 6.3188e-04 eta 2:05:57 +epoch [33/50] batch [475/500] time 0.892 (0.886) data 0.000 (0.002) loss 1.2021 (1.0834) acc 81.2500 (72.7763) lr 6.3188e-04 eta 2:05:52 +epoch [33/50] batch [480/500] time 0.871 (0.886) data 0.000 (0.002) loss 1.6084 (1.0842) acc 65.6250 (72.8190) lr 6.3188e-04 eta 2:05:49 +epoch [33/50] batch [485/500] time 0.884 (0.886) data 0.000 (0.002) loss 1.0420 (1.0818) acc 68.7500 (72.8415) lr 6.3188e-04 eta 2:05:45 +epoch [33/50] batch [490/500] time 0.897 (0.886) data 0.000 (0.001) loss 1.6484 (1.0803) acc 65.6250 (72.8827) lr 6.3188e-04 eta 2:05:41 +epoch [33/50] batch [495/500] time 0.861 (0.886) data 0.000 (0.001) loss 0.9595 (1.0805) acc 68.7500 (72.8598) lr 6.3188e-04 eta 2:05:36 +epoch [33/50] batch [500/500] time 0.894 (0.886) data 0.000 (0.001) loss 0.7827 (1.0784) acc 71.8750 (72.8875) lr 5.7422e-04 eta 2:05:32 +epoch [34/50] batch [5/500] time 0.879 (1.012) data 0.000 (0.129) loss 0.7812 (0.9236) acc 81.2500 (72.5000) lr 5.7422e-04 eta 2:23:17 +epoch [34/50] batch [10/500] time 0.911 (0.947) data 0.000 (0.065) loss 1.0234 (0.9314) acc 68.7500 (75.3125) lr 5.7422e-04 eta 2:14:04 +epoch [34/50] batch [15/500] time 0.879 (0.930) data 0.000 (0.043) loss 1.2461 (0.9562) acc 68.7500 (74.7917) lr 5.7422e-04 eta 2:11:35 +epoch [34/50] batch [20/500] time 0.885 (0.918) data 0.000 (0.032) loss 1.0283 (0.9897) acc 75.0000 (74.0625) lr 5.7422e-04 eta 2:09:48 +epoch [34/50] batch [25/500] time 0.901 (0.911) data 0.000 (0.026) loss 0.9375 (1.0200) acc 78.1250 (73.5000) lr 5.7422e-04 eta 2:08:40 +epoch [34/50] batch [30/500] time 0.896 (0.911) data 0.000 (0.022) loss 1.0527 (1.0752) acc 71.8750 (72.7083) lr 5.7422e-04 eta 2:08:40 +epoch [34/50] batch [35/500] time 0.871 (0.907) data 0.000 (0.019) loss 1.1523 (1.0954) acc 68.7500 (72.5000) lr 5.7422e-04 eta 2:07:55 +epoch [34/50] batch [40/500] time 0.868 (0.904) data 0.000 (0.016) loss 1.0713 (1.0895) acc 75.0000 (72.5000) lr 5.7422e-04 eta 2:07:25 +epoch [34/50] batch [45/500] time 0.900 (0.900) data 0.000 (0.015) loss 1.3604 (1.0784) acc 75.0000 (72.7778) lr 5.7422e-04 eta 2:06:51 +epoch [34/50] batch [50/500] time 0.888 (0.898) data 0.000 (0.013) loss 0.7798 (1.0813) acc 71.8750 (72.7500) lr 5.7422e-04 eta 2:06:31 +epoch [34/50] batch [55/500] time 0.923 (0.899) data 0.000 (0.012) loss 1.2920 (1.0925) acc 71.8750 (72.7273) lr 5.7422e-04 eta 2:06:30 +epoch [34/50] batch [60/500] time 0.896 (0.898) data 0.000 (0.011) loss 1.0400 (1.0900) acc 75.0000 (72.7083) lr 5.7422e-04 eta 2:06:19 +epoch [34/50] batch [65/500] time 0.900 (0.898) data 0.000 (0.010) loss 1.1064 (1.0821) acc 65.6250 (72.5481) lr 5.7422e-04 eta 2:06:11 +epoch [34/50] batch [70/500] time 0.920 (0.897) data 0.000 (0.009) loss 0.8940 (1.0662) acc 81.2500 (73.0804) lr 5.7422e-04 eta 2:06:04 +epoch [34/50] batch [75/500] time 0.887 (0.898) data 0.000 (0.009) loss 0.6138 (1.0584) acc 84.3750 (73.2083) lr 5.7422e-04 eta 2:06:08 +epoch [34/50] batch [80/500] time 0.899 (0.898) data 0.000 (0.008) loss 0.8281 (1.0555) acc 75.0000 (73.2812) lr 5.7422e-04 eta 2:06:01 +epoch [34/50] batch [85/500] time 0.871 (0.897) data 0.000 (0.008) loss 0.7344 (1.0695) acc 81.2500 (73.0515) lr 5.7422e-04 eta 2:05:51 +epoch [34/50] batch [90/500] time 0.876 (0.896) data 0.000 (0.007) loss 1.2285 (1.0781) acc 65.6250 (72.8125) lr 5.7422e-04 eta 2:05:39 +epoch [34/50] batch [95/500] time 0.869 (0.896) data 0.000 (0.007) loss 0.8853 (1.0711) acc 78.1250 (72.9276) lr 5.7422e-04 eta 2:05:30 +epoch [34/50] batch [100/500] time 0.883 (0.896) data 0.000 (0.007) loss 0.6846 (1.0592) acc 84.3750 (73.1875) lr 5.7422e-04 eta 2:05:27 +epoch [34/50] batch [105/500] time 0.933 (0.896) data 0.000 (0.006) loss 1.0518 (1.0552) acc 71.8750 (73.2143) lr 5.7422e-04 eta 2:05:23 +epoch [34/50] batch [110/500] time 0.878 (0.896) data 0.000 (0.006) loss 0.6753 (1.0582) acc 84.3750 (73.3523) lr 5.7422e-04 eta 2:05:17 +epoch [34/50] batch [115/500] time 0.891 (0.895) data 0.000 (0.006) loss 0.9819 (1.0611) acc 71.8750 (73.3696) lr 5.7422e-04 eta 2:05:08 +epoch [34/50] batch [120/500] time 0.900 (0.895) data 0.000 (0.006) loss 0.9038 (1.0605) acc 68.7500 (73.3333) lr 5.7422e-04 eta 2:05:03 +epoch [34/50] batch [125/500] time 0.892 (0.895) data 0.000 (0.005) loss 0.8638 (1.0677) acc 78.1250 (73.2500) lr 5.7422e-04 eta 2:04:59 +epoch [34/50] batch [130/500] time 0.888 (0.896) data 0.000 (0.005) loss 0.7026 (1.0625) acc 81.2500 (73.2933) lr 5.7422e-04 eta 2:04:57 +epoch [34/50] batch [135/500] time 0.852 (0.895) data 0.000 (0.005) loss 0.5869 (1.0592) acc 87.5000 (73.5648) lr 5.7422e-04 eta 2:04:47 +epoch [34/50] batch [140/500] time 0.905 (0.895) data 0.000 (0.005) loss 0.9897 (1.0586) acc 81.2500 (73.5045) lr 5.7422e-04 eta 2:04:42 +epoch [34/50] batch [145/500] time 0.875 (0.895) data 0.000 (0.005) loss 0.7451 (1.0481) acc 84.3750 (73.7500) lr 5.7422e-04 eta 2:04:34 +epoch [34/50] batch [150/500] time 0.901 (0.895) data 0.000 (0.005) loss 1.6396 (1.0607) acc 56.2500 (73.4792) lr 5.7422e-04 eta 2:04:30 +epoch [34/50] batch [155/500] time 0.876 (0.894) data 0.000 (0.004) loss 1.0273 (1.0655) acc 68.7500 (73.2661) lr 5.7422e-04 eta 2:04:20 +epoch [34/50] batch [160/500] time 0.879 (0.893) data 0.000 (0.004) loss 0.8438 (1.0674) acc 78.1250 (73.3008) lr 5.7422e-04 eta 2:04:10 +epoch [34/50] batch [165/500] time 0.885 (0.893) data 0.000 (0.004) loss 1.3252 (1.0679) acc 65.6250 (73.2576) lr 5.7422e-04 eta 2:04:07 +epoch [34/50] batch [170/500] time 0.865 (0.893) data 0.000 (0.004) loss 1.3896 (1.0673) acc 65.6250 (73.2169) lr 5.7422e-04 eta 2:04:00 +epoch [34/50] batch [175/500] time 0.876 (0.893) data 0.000 (0.004) loss 0.8110 (1.0619) acc 78.1250 (73.2321) lr 5.7422e-04 eta 2:03:57 +epoch [34/50] batch [180/500] time 0.889 (0.893) data 0.000 (0.004) loss 1.2783 (1.0695) acc 68.7500 (73.1076) lr 5.7422e-04 eta 2:03:51 +epoch [34/50] batch [185/500] time 0.863 (0.893) data 0.000 (0.004) loss 0.5464 (1.0635) acc 87.5000 (73.3446) lr 5.7422e-04 eta 2:03:43 +epoch [34/50] batch [190/500] time 0.883 (0.893) data 0.000 (0.004) loss 0.9019 (1.0701) acc 75.0000 (73.2237) lr 5.7422e-04 eta 2:03:37 +epoch [34/50] batch [195/500] time 0.910 (0.893) data 0.000 (0.004) loss 1.2451 (1.0742) acc 68.7500 (73.1731) lr 5.7422e-04 eta 2:03:32 +epoch [34/50] batch [200/500] time 0.874 (0.892) data 0.000 (0.003) loss 0.7466 (1.0710) acc 84.3750 (73.3125) lr 5.7422e-04 eta 2:03:25 +epoch [34/50] batch [205/500] time 0.877 (0.892) data 0.000 (0.003) loss 1.2979 (1.0707) acc 62.5000 (73.3079) lr 5.7422e-04 eta 2:03:16 +epoch [34/50] batch [210/500] time 0.895 (0.892) data 0.000 (0.003) loss 0.8447 (1.0663) acc 65.6250 (73.3185) lr 5.7422e-04 eta 2:03:11 +epoch [34/50] batch [215/500] time 1.039 (0.892) data 0.000 (0.003) loss 1.3408 (1.0648) acc 65.6250 (73.4302) lr 5.7422e-04 eta 2:03:12 +epoch [34/50] batch [220/500] time 0.879 (0.892) data 0.000 (0.003) loss 0.6279 (1.0647) acc 78.1250 (73.3807) lr 5.7422e-04 eta 2:03:08 +epoch [34/50] batch [225/500] time 0.875 (0.892) data 0.000 (0.003) loss 0.7402 (1.0637) acc 75.0000 (73.4444) lr 5.7422e-04 eta 2:03:02 +epoch [34/50] batch [230/500] time 0.892 (0.892) data 0.001 (0.003) loss 0.8140 (1.0619) acc 78.1250 (73.4918) lr 5.7422e-04 eta 2:02:58 +epoch [34/50] batch [235/500] time 0.880 (0.892) data 0.000 (0.003) loss 1.0605 (1.0591) acc 75.0000 (73.5372) lr 5.7422e-04 eta 2:02:54 +epoch [34/50] batch [240/500] time 0.885 (0.892) data 0.000 (0.003) loss 1.0996 (1.0580) acc 71.8750 (73.5547) lr 5.7422e-04 eta 2:02:51 +epoch [34/50] batch [245/500] time 0.866 (0.893) data 0.000 (0.003) loss 1.0371 (1.0596) acc 78.1250 (73.4949) lr 5.7422e-04 eta 2:02:47 +epoch [34/50] batch [250/500] time 0.873 (0.892) data 0.000 (0.003) loss 0.9019 (1.0632) acc 78.1250 (73.4375) lr 5.7422e-04 eta 2:02:41 +epoch [34/50] batch [255/500] time 0.865 (0.892) data 0.000 (0.003) loss 0.9634 (1.0616) acc 71.8750 (73.3824) lr 5.7422e-04 eta 2:02:37 +epoch [34/50] batch [260/500] time 0.903 (0.892) data 0.000 (0.003) loss 1.3271 (1.0633) acc 62.5000 (73.3654) lr 5.7422e-04 eta 2:02:33 +epoch [34/50] batch [265/500] time 0.866 (0.892) data 0.000 (0.003) loss 0.9282 (1.0664) acc 68.7500 (73.2901) lr 5.7422e-04 eta 2:02:26 +epoch [34/50] batch [270/500] time 0.885 (0.892) data 0.000 (0.003) loss 1.1191 (1.0660) acc 78.1250 (73.3449) lr 5.7422e-04 eta 2:02:20 +epoch [34/50] batch [275/500] time 0.892 (0.892) data 0.000 (0.003) loss 0.9741 (1.0687) acc 68.7500 (73.1932) lr 5.7422e-04 eta 2:02:14 +epoch [34/50] batch [280/500] time 0.892 (0.891) data 0.000 (0.003) loss 1.2900 (1.0704) acc 65.6250 (73.1027) lr 5.7422e-04 eta 2:02:07 +epoch [34/50] batch [285/500] time 0.883 (0.891) data 0.000 (0.002) loss 1.4531 (1.0718) acc 68.7500 (73.0921) lr 5.7422e-04 eta 2:02:01 +epoch [34/50] batch [290/500] time 0.901 (0.891) data 0.000 (0.002) loss 1.3906 (1.0743) acc 68.7500 (73.1142) lr 5.7422e-04 eta 2:01:57 +epoch [34/50] batch [295/500] time 0.884 (0.891) data 0.000 (0.002) loss 1.6836 (1.0781) acc 65.6250 (73.0297) lr 5.7422e-04 eta 2:01:52 +epoch [34/50] batch [300/500] time 0.857 (0.891) data 0.000 (0.002) loss 1.7227 (1.0798) acc 65.6250 (73.1354) lr 5.7422e-04 eta 2:01:47 +epoch [34/50] batch [305/500] time 0.866 (0.891) data 0.000 (0.002) loss 1.1025 (1.0788) acc 75.0000 (73.1250) lr 5.7422e-04 eta 2:01:42 +epoch [34/50] batch [310/500] time 0.885 (0.891) data 0.000 (0.002) loss 1.0713 (1.0811) acc 75.0000 (73.0948) lr 5.7422e-04 eta 2:01:37 +epoch [34/50] batch [315/500] time 0.882 (0.891) data 0.000 (0.002) loss 0.9390 (1.0819) acc 75.0000 (73.1052) lr 5.7422e-04 eta 2:01:36 +epoch [34/50] batch [320/500] time 0.874 (0.891) data 0.000 (0.002) loss 1.4150 (1.0814) acc 59.3750 (73.0859) lr 5.7422e-04 eta 2:01:31 +epoch [34/50] batch [325/500] time 0.860 (0.891) data 0.000 (0.002) loss 0.7617 (1.0853) acc 78.1250 (73.0288) lr 5.7422e-04 eta 2:01:24 +epoch [34/50] batch [330/500] time 0.906 (0.891) data 0.000 (0.002) loss 1.2705 (1.0875) acc 68.7500 (72.9545) lr 5.7422e-04 eta 2:01:20 +epoch [34/50] batch [335/500] time 0.892 (0.891) data 0.000 (0.002) loss 0.9346 (1.0870) acc 62.5000 (72.8638) lr 5.7422e-04 eta 2:01:13 +epoch [34/50] batch [340/500] time 0.892 (0.891) data 0.000 (0.002) loss 0.7065 (1.0868) acc 75.0000 (72.8952) lr 5.7422e-04 eta 2:01:08 +epoch [34/50] batch [345/500] time 0.875 (0.891) data 0.000 (0.002) loss 0.9766 (1.0867) acc 75.0000 (72.8442) lr 5.7422e-04 eta 2:01:02 +epoch [34/50] batch [350/500] time 0.889 (0.891) data 0.000 (0.002) loss 0.6636 (1.0865) acc 84.3750 (72.8839) lr 5.7422e-04 eta 2:00:58 +epoch [34/50] batch [355/500] time 0.871 (0.891) data 0.000 (0.002) loss 1.3721 (1.0873) acc 62.5000 (72.8521) lr 5.7422e-04 eta 2:00:53 +epoch [34/50] batch [360/500] time 0.897 (0.892) data 0.000 (0.002) loss 0.7588 (1.0863) acc 75.0000 (72.8299) lr 5.7422e-04 eta 2:00:57 +epoch [34/50] batch [365/500] time 0.878 (0.892) data 0.000 (0.002) loss 0.8706 (1.0855) acc 71.8750 (72.7825) lr 5.7422e-04 eta 2:00:52 +epoch [34/50] batch [370/500] time 0.901 (0.892) data 0.000 (0.002) loss 1.4014 (1.0853) acc 62.5000 (72.7872) lr 5.7422e-04 eta 2:00:49 +epoch [34/50] batch [375/500] time 0.902 (0.892) data 0.000 (0.002) loss 0.8027 (1.0849) acc 78.1250 (72.7667) lr 5.7422e-04 eta 2:00:44 +epoch [34/50] batch [380/500] time 0.923 (0.892) data 0.000 (0.002) loss 1.1396 (1.0850) acc 71.8750 (72.7549) lr 5.7422e-04 eta 2:00:39 +epoch [34/50] batch [385/500] time 0.865 (0.891) data 0.000 (0.002) loss 0.9199 (1.0837) acc 68.7500 (72.7841) lr 5.7422e-04 eta 2:00:34 +epoch [34/50] batch [390/500] time 0.888 (0.891) data 0.000 (0.002) loss 1.1494 (1.0826) acc 65.6250 (72.8285) lr 5.7422e-04 eta 2:00:27 +epoch [34/50] batch [395/500] time 0.893 (0.891) data 0.000 (0.002) loss 0.9170 (1.0810) acc 78.1250 (72.9035) lr 5.7422e-04 eta 2:00:22 +epoch [34/50] batch [400/500] time 0.893 (0.891) data 0.000 (0.002) loss 0.9761 (1.0806) acc 81.2500 (72.9453) lr 5.7422e-04 eta 2:00:17 +epoch [34/50] batch [405/500] time 0.914 (0.891) data 0.000 (0.002) loss 0.6982 (1.0792) acc 84.3750 (73.0015) lr 5.7422e-04 eta 2:00:11 +epoch [34/50] batch [410/500] time 0.906 (0.891) data 0.000 (0.002) loss 1.3682 (1.0809) acc 75.0000 (72.9802) lr 5.7422e-04 eta 2:00:06 +epoch [34/50] batch [415/500] time 0.909 (0.891) data 0.000 (0.002) loss 0.6758 (1.0814) acc 78.1250 (72.9518) lr 5.7422e-04 eta 2:00:02 +epoch [34/50] batch [420/500] time 0.892 (0.891) data 0.000 (0.002) loss 0.8208 (1.0790) acc 78.1250 (73.0283) lr 5.7422e-04 eta 1:59:58 +epoch [34/50] batch [425/500] time 0.888 (0.891) data 0.000 (0.002) loss 1.0420 (1.0786) acc 71.8750 (73.0294) lr 5.7422e-04 eta 1:59:54 +epoch [34/50] batch [430/500] time 0.868 (0.891) data 0.000 (0.002) loss 0.4087 (1.0773) acc 93.7500 (73.0523) lr 5.7422e-04 eta 1:59:50 +epoch [34/50] batch [435/500] time 0.883 (0.891) data 0.000 (0.002) loss 1.4834 (1.0790) acc 68.7500 (72.9885) lr 5.7422e-04 eta 1:59:45 +epoch [34/50] batch [440/500] time 0.882 (0.891) data 0.000 (0.002) loss 1.8252 (1.0786) acc 59.3750 (72.9616) lr 5.7422e-04 eta 1:59:39 +epoch [34/50] batch [445/500] time 0.882 (0.891) data 0.000 (0.002) loss 1.1318 (1.0772) acc 65.6250 (72.9635) lr 5.7422e-04 eta 1:59:34 +epoch [34/50] batch [450/500] time 0.882 (0.891) data 0.000 (0.002) loss 0.9497 (1.0780) acc 71.8750 (72.9097) lr 5.7422e-04 eta 1:59:30 +epoch [34/50] batch [455/500] time 0.917 (0.891) data 0.000 (0.002) loss 0.5112 (1.0781) acc 90.6250 (72.9052) lr 5.7422e-04 eta 1:59:27 +epoch [34/50] batch [460/500] time 0.894 (0.891) data 0.000 (0.002) loss 1.6973 (1.0784) acc 56.2500 (72.9144) lr 5.7422e-04 eta 1:59:24 +epoch [34/50] batch [465/500] time 0.869 (0.891) data 0.000 (0.002) loss 0.9282 (1.0782) acc 84.3750 (72.9570) lr 5.7422e-04 eta 1:59:18 +epoch [34/50] batch [470/500] time 0.879 (0.891) data 0.000 (0.002) loss 1.3232 (1.0799) acc 68.7500 (72.9322) lr 5.7422e-04 eta 1:59:13 +epoch [34/50] batch [475/500] time 0.904 (0.891) data 0.000 (0.002) loss 0.8892 (1.0789) acc 81.2500 (72.9934) lr 5.7422e-04 eta 1:59:09 +epoch [34/50] batch [480/500] time 0.891 (0.891) data 0.000 (0.002) loss 1.4111 (1.0799) acc 68.7500 (72.9362) lr 5.7422e-04 eta 1:59:04 +epoch [34/50] batch [485/500] time 0.883 (0.891) data 0.000 (0.002) loss 1.5625 (1.0803) acc 65.6250 (72.9381) lr 5.7422e-04 eta 1:58:59 +epoch [34/50] batch [490/500] time 0.896 (0.891) data 0.000 (0.002) loss 0.6377 (1.0782) acc 78.1250 (72.9847) lr 5.7422e-04 eta 1:58:54 +epoch [34/50] batch [495/500] time 0.896 (0.891) data 0.000 (0.002) loss 1.2861 (1.0808) acc 68.7500 (72.9356) lr 5.7422e-04 eta 1:58:51 +epoch [34/50] batch [500/500] time 0.889 (0.891) data 0.000 (0.002) loss 1.0010 (1.0808) acc 75.0000 (72.9000) lr 5.1825e-04 eta 1:58:47 +epoch [35/50] batch [5/500] time 0.871 (1.033) data 0.000 (0.124) loss 0.7930 (0.9722) acc 84.3750 (75.0000) lr 5.1825e-04 eta 2:17:41 +epoch [35/50] batch [10/500] time 0.893 (0.961) data 0.000 (0.062) loss 0.9561 (0.9406) acc 65.6250 (73.4375) lr 5.1825e-04 eta 2:07:55 +epoch [35/50] batch [15/500] time 0.901 (0.939) data 0.000 (0.041) loss 1.1797 (0.9706) acc 71.8750 (75.0000) lr 5.1825e-04 eta 2:04:53 +epoch [35/50] batch [20/500] time 0.937 (0.929) data 0.000 (0.031) loss 0.7881 (1.0141) acc 81.2500 (73.9062) lr 5.1825e-04 eta 2:03:30 +epoch [35/50] batch [25/500] time 0.879 (0.921) data 0.000 (0.025) loss 0.9648 (1.0310) acc 75.0000 (73.1250) lr 5.1825e-04 eta 2:02:27 +epoch [35/50] batch [30/500] time 0.888 (0.914) data 0.000 (0.021) loss 1.2100 (1.0561) acc 81.2500 (73.4375) lr 5.1825e-04 eta 2:01:28 +epoch [35/50] batch [35/500] time 0.864 (0.911) data 0.000 (0.018) loss 1.3799 (1.0737) acc 65.6250 (73.3929) lr 5.1825e-04 eta 2:00:55 +epoch [35/50] batch [40/500] time 0.898 (0.908) data 0.000 (0.016) loss 0.5498 (1.0586) acc 87.5000 (73.4375) lr 5.1825e-04 eta 2:00:27 +epoch [35/50] batch [45/500] time 1.007 (0.908) data 0.000 (0.014) loss 0.5894 (1.0587) acc 87.5000 (73.5417) lr 5.1825e-04 eta 2:00:23 +epoch [35/50] batch [50/500] time 0.875 (0.904) data 0.000 (0.013) loss 0.7578 (1.0389) acc 81.2500 (74.3125) lr 5.1825e-04 eta 1:59:50 +epoch [35/50] batch [55/500] time 0.874 (0.902) data 0.000 (0.011) loss 1.7139 (1.0604) acc 46.8750 (73.6932) lr 5.1825e-04 eta 1:59:24 +epoch [35/50] batch [60/500] time 0.897 (0.900) data 0.000 (0.011) loss 1.3936 (1.0579) acc 56.2500 (73.2292) lr 5.1825e-04 eta 1:59:06 +epoch [35/50] batch [65/500] time 0.907 (0.900) data 0.000 (0.010) loss 1.5508 (1.0545) acc 68.7500 (73.5096) lr 5.1825e-04 eta 1:59:01 +epoch [35/50] batch [70/500] time 0.906 (0.899) data 0.000 (0.009) loss 0.9355 (1.0329) acc 75.0000 (73.8839) lr 5.1825e-04 eta 1:58:50 +epoch [35/50] batch [75/500] time 0.876 (0.899) data 0.000 (0.008) loss 1.2607 (1.0474) acc 78.1250 (73.8750) lr 5.1825e-04 eta 1:58:42 +epoch [35/50] batch [80/500] time 0.896 (0.898) data 0.000 (0.008) loss 0.8130 (1.0470) acc 78.1250 (73.7891) lr 5.1825e-04 eta 1:58:31 +epoch [35/50] batch [85/500] time 0.870 (0.897) data 0.000 (0.008) loss 0.7451 (1.0341) acc 78.1250 (74.0074) lr 5.1825e-04 eta 1:58:21 +epoch [35/50] batch [90/500] time 0.886 (0.897) data 0.000 (0.007) loss 0.9634 (1.0306) acc 75.0000 (74.0972) lr 5.1825e-04 eta 1:58:14 +epoch [35/50] batch [95/500] time 0.879 (0.896) data 0.000 (0.007) loss 1.4746 (1.0434) acc 65.6250 (73.9474) lr 5.1825e-04 eta 1:58:02 +epoch [35/50] batch [100/500] time 0.913 (0.896) data 0.000 (0.006) loss 0.8755 (1.0443) acc 81.2500 (73.9375) lr 5.1825e-04 eta 1:57:58 +epoch [35/50] batch [105/500] time 0.874 (0.895) data 0.000 (0.006) loss 1.0352 (1.0466) acc 78.1250 (73.6905) lr 5.1825e-04 eta 1:57:46 +epoch [35/50] batch [110/500] time 0.860 (0.895) data 0.000 (0.006) loss 2.0684 (1.0586) acc 56.2500 (73.4943) lr 5.1825e-04 eta 1:57:39 +epoch [35/50] batch [115/500] time 0.863 (0.893) data 0.000 (0.006) loss 0.9312 (1.0537) acc 71.8750 (73.5054) lr 5.1825e-04 eta 1:57:23 +epoch [35/50] batch [120/500] time 0.884 (0.893) data 0.000 (0.005) loss 0.8057 (1.0474) acc 81.2500 (73.6719) lr 5.1825e-04 eta 1:57:14 +epoch [35/50] batch [125/500] time 0.885 (0.892) data 0.000 (0.005) loss 1.3613 (1.0433) acc 71.8750 (73.8000) lr 5.1825e-04 eta 1:57:06 +epoch [35/50] batch [130/500] time 0.894 (0.892) data 0.000 (0.005) loss 0.6265 (1.0447) acc 78.1250 (73.7981) lr 5.1825e-04 eta 1:56:59 +epoch [35/50] batch [135/500] time 0.884 (0.892) data 0.000 (0.005) loss 1.3105 (1.0567) acc 62.5000 (73.5648) lr 5.1825e-04 eta 1:56:54 +epoch [35/50] batch [140/500] time 0.894 (0.892) data 0.000 (0.005) loss 0.8604 (1.0504) acc 78.1250 (73.7054) lr 5.1825e-04 eta 1:56:48 +epoch [35/50] batch [145/500] time 0.878 (0.892) data 0.000 (0.004) loss 0.6611 (1.0469) acc 81.2500 (73.7931) lr 5.1825e-04 eta 1:56:50 +epoch [35/50] batch [150/500] time 0.899 (0.892) data 0.000 (0.004) loss 0.8994 (1.0483) acc 68.7500 (73.6667) lr 5.1825e-04 eta 1:56:42 +epoch [35/50] batch [155/500] time 0.910 (0.892) data 0.000 (0.004) loss 1.9824 (1.0505) acc 62.5000 (73.6895) lr 5.1825e-04 eta 1:56:35 +epoch [35/50] batch [160/500] time 0.881 (0.891) data 0.000 (0.004) loss 0.8066 (1.0481) acc 71.8750 (73.6523) lr 5.1825e-04 eta 1:56:27 +epoch [35/50] batch [165/500] time 0.884 (0.891) data 0.000 (0.004) loss 1.3965 (1.0509) acc 68.7500 (73.5795) lr 5.1825e-04 eta 1:56:21 +epoch [35/50] batch [170/500] time 0.895 (0.891) data 0.000 (0.004) loss 0.7344 (1.0502) acc 81.2500 (73.5846) lr 5.1825e-04 eta 1:56:14 +epoch [35/50] batch [175/500] time 0.848 (0.890) data 0.000 (0.004) loss 1.5254 (1.0483) acc 65.6250 (73.6250) lr 5.1825e-04 eta 1:56:07 +epoch [35/50] batch [180/500] time 0.894 (0.891) data 0.000 (0.004) loss 1.3486 (1.0496) acc 68.7500 (73.4896) lr 5.1825e-04 eta 1:56:05 +epoch [35/50] batch [185/500] time 0.866 (0.890) data 0.000 (0.004) loss 0.8936 (1.0501) acc 71.8750 (73.3784) lr 5.1825e-04 eta 1:55:56 +epoch [35/50] batch [190/500] time 0.870 (0.890) data 0.000 (0.003) loss 1.3125 (1.0554) acc 71.8750 (73.3059) lr 5.1825e-04 eta 1:55:53 +epoch [35/50] batch [195/500] time 0.883 (0.890) data 0.000 (0.003) loss 1.4297 (1.0615) acc 78.1250 (73.2051) lr 5.1825e-04 eta 1:55:45 +epoch [35/50] batch [200/500] time 0.869 (0.890) data 0.000 (0.003) loss 1.0645 (1.0582) acc 75.0000 (73.3906) lr 5.1825e-04 eta 1:55:39 +epoch [35/50] batch [205/500] time 0.923 (0.890) data 0.000 (0.003) loss 1.3545 (1.0646) acc 71.8750 (73.2165) lr 5.1825e-04 eta 1:55:38 +epoch [35/50] batch [210/500] time 0.909 (0.890) data 0.000 (0.003) loss 0.9531 (1.0618) acc 75.0000 (73.1994) lr 5.1825e-04 eta 1:55:34 +epoch [35/50] batch [215/500] time 0.939 (0.891) data 0.000 (0.003) loss 0.8647 (1.0627) acc 75.0000 (73.0959) lr 5.1825e-04 eta 1:55:33 +epoch [35/50] batch [220/500] time 0.889 (0.891) data 0.000 (0.003) loss 0.7144 (1.0571) acc 78.1250 (73.2670) lr 5.1825e-04 eta 1:55:28 +epoch [35/50] batch [225/500] time 0.900 (0.891) data 0.000 (0.003) loss 0.8335 (1.0551) acc 87.5000 (73.3472) lr 5.1825e-04 eta 1:55:23 +epoch [35/50] batch [230/500] time 0.876 (0.890) data 0.000 (0.003) loss 1.1924 (1.0583) acc 62.5000 (73.2880) lr 5.1825e-04 eta 1:55:18 +epoch [35/50] batch [235/500] time 0.869 (0.891) data 0.000 (0.003) loss 0.8320 (1.0543) acc 68.7500 (73.3245) lr 5.1825e-04 eta 1:55:15 +epoch [35/50] batch [240/500] time 0.917 (0.891) data 0.000 (0.003) loss 1.1689 (1.0589) acc 75.0000 (73.1901) lr 5.1825e-04 eta 1:55:11 +epoch [35/50] batch [245/500] time 0.863 (0.891) data 0.000 (0.003) loss 1.0420 (1.0550) acc 71.8750 (73.3036) lr 5.1825e-04 eta 1:55:07 +epoch [35/50] batch [250/500] time 0.893 (0.891) data 0.000 (0.003) loss 1.0654 (1.0541) acc 75.0000 (73.3625) lr 5.1825e-04 eta 1:55:02 +epoch [35/50] batch [255/500] time 0.903 (0.890) data 0.000 (0.003) loss 0.8213 (1.0549) acc 81.2500 (73.3701) lr 5.1825e-04 eta 1:54:56 +epoch [35/50] batch [260/500] time 0.874 (0.890) data 0.000 (0.003) loss 1.2363 (1.0618) acc 65.6250 (73.2933) lr 5.1825e-04 eta 1:54:51 +epoch [35/50] batch [265/500] time 0.849 (0.890) data 0.000 (0.003) loss 0.6196 (1.0581) acc 81.2500 (73.4080) lr 5.1825e-04 eta 1:54:45 +epoch [35/50] batch [270/500] time 0.875 (0.890) data 0.000 (0.003) loss 1.3369 (1.0596) acc 62.5000 (73.3681) lr 5.1825e-04 eta 1:54:40 +epoch [35/50] batch [275/500] time 0.871 (0.890) data 0.000 (0.002) loss 1.2344 (1.0591) acc 65.6250 (73.3636) lr 5.1825e-04 eta 1:54:34 +epoch [35/50] batch [280/500] time 0.910 (0.890) data 0.000 (0.002) loss 1.2793 (1.0657) acc 71.8750 (73.3259) lr 5.1825e-04 eta 1:54:29 +epoch [35/50] batch [285/500] time 0.889 (0.890) data 0.000 (0.002) loss 0.9082 (1.0663) acc 75.0000 (73.3114) lr 5.1825e-04 eta 1:54:25 +epoch [35/50] batch [290/500] time 0.885 (0.890) data 0.000 (0.002) loss 1.3633 (1.0667) acc 68.7500 (73.2974) lr 5.1825e-04 eta 1:54:23 +epoch [35/50] batch [295/500] time 0.904 (0.890) data 0.000 (0.002) loss 1.2812 (1.0650) acc 62.5000 (73.3051) lr 5.1825e-04 eta 1:54:18 +epoch [35/50] batch [300/500] time 0.911 (0.890) data 0.000 (0.002) loss 1.2422 (1.0671) acc 71.8750 (73.2292) lr 5.1825e-04 eta 1:54:14 +epoch [35/50] batch [305/500] time 0.860 (0.890) data 0.000 (0.002) loss 1.4395 (1.0680) acc 65.6250 (73.1660) lr 5.1825e-04 eta 1:54:08 +epoch [35/50] batch [310/500] time 0.877 (0.890) data 0.000 (0.002) loss 0.9785 (1.0714) acc 65.6250 (73.0544) lr 5.1825e-04 eta 1:54:03 +epoch [35/50] batch [315/500] time 0.864 (0.890) data 0.000 (0.002) loss 1.1309 (1.0737) acc 75.0000 (73.0357) lr 5.1825e-04 eta 1:53:58 +epoch [35/50] batch [320/500] time 0.897 (0.890) data 0.000 (0.002) loss 1.0918 (1.0727) acc 71.8750 (73.0566) lr 5.1825e-04 eta 1:53:52 +epoch [35/50] batch [325/500] time 0.885 (0.890) data 0.000 (0.002) loss 0.6538 (1.0725) acc 81.2500 (73.0385) lr 5.1825e-04 eta 1:53:48 +epoch [35/50] batch [330/500] time 0.884 (0.890) data 0.000 (0.002) loss 1.1055 (1.0725) acc 56.2500 (72.9924) lr 5.1825e-04 eta 1:53:43 +epoch [35/50] batch [335/500] time 0.871 (0.890) data 0.000 (0.002) loss 1.4307 (1.0744) acc 68.7500 (72.9384) lr 5.1825e-04 eta 1:53:41 +epoch [35/50] batch [340/500] time 0.897 (0.890) data 0.000 (0.002) loss 1.4971 (1.0786) acc 59.3750 (72.8676) lr 5.1825e-04 eta 1:53:36 +epoch [35/50] batch [345/500] time 0.926 (0.890) data 0.000 (0.002) loss 0.4458 (1.0766) acc 90.6250 (72.9167) lr 5.1825e-04 eta 1:53:33 +epoch [35/50] batch [350/500] time 0.874 (0.890) data 0.000 (0.002) loss 1.0020 (1.0743) acc 68.7500 (72.9554) lr 5.1825e-04 eta 1:53:28 +epoch [35/50] batch [355/500] time 0.897 (0.890) data 0.000 (0.002) loss 1.2803 (1.0753) acc 68.7500 (72.9137) lr 5.1825e-04 eta 1:53:22 +epoch [35/50] batch [360/500] time 0.889 (0.890) data 0.000 (0.002) loss 0.8521 (1.0728) acc 87.5000 (73.0295) lr 5.1825e-04 eta 1:53:17 +epoch [35/50] batch [365/500] time 0.904 (0.890) data 0.000 (0.002) loss 1.0615 (1.0719) acc 81.2500 (73.1164) lr 5.1825e-04 eta 1:53:12 +epoch [35/50] batch [370/500] time 0.887 (0.890) data 0.000 (0.002) loss 1.1309 (1.0684) acc 71.8750 (73.1841) lr 5.1825e-04 eta 1:53:07 +epoch [35/50] batch [375/500] time 0.858 (0.889) data 0.000 (0.002) loss 1.4805 (1.0711) acc 56.2500 (73.0833) lr 5.1825e-04 eta 1:53:02 +epoch [35/50] batch [380/500] time 0.855 (0.889) data 0.000 (0.002) loss 0.7803 (1.0702) acc 75.0000 (73.0345) lr 5.1825e-04 eta 1:52:57 +epoch [35/50] batch [385/500] time 0.898 (0.889) data 0.000 (0.002) loss 0.4102 (1.0678) acc 90.6250 (73.1250) lr 5.1825e-04 eta 1:52:52 +epoch [35/50] batch [390/500] time 0.868 (0.889) data 0.000 (0.002) loss 0.8730 (1.0636) acc 84.3750 (73.2372) lr 5.1825e-04 eta 1:52:46 +epoch [35/50] batch [395/500] time 0.873 (0.889) data 0.000 (0.002) loss 1.2949 (1.0644) acc 75.0000 (73.2041) lr 5.1825e-04 eta 1:52:41 +epoch [35/50] batch [400/500] time 0.907 (0.889) data 0.000 (0.002) loss 1.2354 (1.0643) acc 62.5000 (73.1875) lr 5.1825e-04 eta 1:52:37 +epoch [35/50] batch [405/500] time 0.865 (0.889) data 0.000 (0.002) loss 1.1494 (1.0662) acc 68.7500 (73.1713) lr 5.1825e-04 eta 1:52:32 +epoch [35/50] batch [410/500] time 0.921 (0.889) data 0.000 (0.002) loss 0.8574 (1.0675) acc 87.5000 (73.1402) lr 5.1825e-04 eta 1:52:27 +epoch [35/50] batch [415/500] time 0.908 (0.889) data 0.000 (0.002) loss 1.6699 (1.0670) acc 65.6250 (73.1702) lr 5.1825e-04 eta 1:52:25 +epoch [35/50] batch [420/500] time 0.899 (0.889) data 0.000 (0.002) loss 1.1748 (1.0663) acc 75.0000 (73.2217) lr 5.1825e-04 eta 1:52:20 +epoch [35/50] batch [425/500] time 0.863 (0.889) data 0.000 (0.002) loss 0.6768 (1.0681) acc 87.5000 (73.2353) lr 5.1825e-04 eta 1:52:15 +epoch [35/50] batch [430/500] time 1.007 (0.889) data 0.000 (0.002) loss 0.9277 (1.0690) acc 71.8750 (73.1395) lr 5.1825e-04 eta 1:52:13 +epoch [35/50] batch [435/500] time 0.860 (0.889) data 0.000 (0.002) loss 1.3867 (1.0670) acc 68.7500 (73.1609) lr 5.1825e-04 eta 1:52:07 +epoch [35/50] batch [440/500] time 0.904 (0.889) data 0.000 (0.002) loss 0.8486 (1.0651) acc 68.7500 (73.1534) lr 5.1825e-04 eta 1:52:03 +epoch [35/50] batch [445/500] time 0.918 (0.889) data 0.000 (0.002) loss 1.2432 (1.0657) acc 78.1250 (73.1110) lr 5.1825e-04 eta 1:51:58 +epoch [35/50] batch [450/500] time 0.876 (0.889) data 0.000 (0.002) loss 0.8521 (1.0674) acc 68.7500 (73.0972) lr 5.1825e-04 eta 1:51:53 +epoch [35/50] batch [455/500] time 0.888 (0.889) data 0.000 (0.002) loss 0.8867 (1.0661) acc 81.2500 (73.1113) lr 5.1825e-04 eta 1:51:49 +epoch [35/50] batch [460/500] time 0.904 (0.889) data 0.000 (0.002) loss 1.3809 (1.0667) acc 65.6250 (73.0910) lr 5.1825e-04 eta 1:51:45 +epoch [35/50] batch [465/500] time 0.898 (0.889) data 0.000 (0.002) loss 1.2100 (1.0664) acc 59.3750 (73.0645) lr 5.1825e-04 eta 1:51:41 +epoch [35/50] batch [470/500] time 0.901 (0.889) data 0.000 (0.002) loss 1.4648 (1.0681) acc 71.8750 (72.9721) lr 5.1825e-04 eta 1:51:36 +epoch [35/50] batch [475/500] time 0.878 (0.890) data 0.000 (0.002) loss 1.6006 (1.0691) acc 59.3750 (72.9474) lr 5.1825e-04 eta 1:51:36 +epoch [35/50] batch [480/500] time 0.930 (0.890) data 0.000 (0.002) loss 0.9287 (1.0683) acc 75.0000 (72.9557) lr 5.1825e-04 eta 1:51:32 +epoch [35/50] batch [485/500] time 0.924 (0.890) data 0.001 (0.002) loss 1.2920 (1.0685) acc 71.8750 (72.9381) lr 5.1825e-04 eta 1:51:27 +epoch [35/50] batch [490/500] time 0.868 (0.890) data 0.000 (0.001) loss 1.4648 (1.0705) acc 62.5000 (72.8890) lr 5.1825e-04 eta 1:51:22 +epoch [35/50] batch [495/500] time 0.884 (0.890) data 0.000 (0.001) loss 1.2715 (1.0716) acc 71.8750 (72.9040) lr 5.1825e-04 eta 1:51:17 +epoch [35/50] batch [500/500] time 0.882 (0.890) data 0.000 (0.001) loss 1.1914 (1.0694) acc 75.0000 (72.9437) lr 4.6417e-04 eta 1:51:12 +epoch [36/50] batch [5/500] time 0.912 (1.066) data 0.000 (0.171) loss 0.8442 (1.0047) acc 81.2500 (74.3750) lr 4.6417e-04 eta 2:13:13 +epoch [36/50] batch [10/500] time 0.883 (0.979) data 0.000 (0.086) loss 0.7393 (1.0389) acc 81.2500 (77.1875) lr 4.6417e-04 eta 2:02:15 +epoch [36/50] batch [15/500] time 0.912 (0.952) data 0.000 (0.057) loss 1.2334 (1.0400) acc 62.5000 (75.4167) lr 4.6417e-04 eta 1:58:43 +epoch [36/50] batch [20/500] time 0.891 (0.945) data 0.000 (0.043) loss 0.9766 (1.0872) acc 68.7500 (73.9062) lr 4.6417e-04 eta 1:57:48 +epoch [36/50] batch [25/500] time 0.875 (0.933) data 0.000 (0.035) loss 1.2393 (1.0605) acc 71.8750 (74.1250) lr 4.6417e-04 eta 1:56:11 +epoch [36/50] batch [30/500] time 0.880 (0.925) data 0.000 (0.029) loss 1.1631 (1.0274) acc 71.8750 (74.2708) lr 4.6417e-04 eta 1:55:09 +epoch [36/50] batch [35/500] time 0.885 (0.920) data 0.000 (0.025) loss 1.0557 (1.0202) acc 68.7500 (74.1071) lr 4.6417e-04 eta 1:54:31 +epoch [36/50] batch [40/500] time 0.897 (0.918) data 0.000 (0.022) loss 1.0938 (1.0223) acc 68.7500 (73.8281) lr 4.6417e-04 eta 1:54:05 +epoch [36/50] batch [45/500] time 0.940 (0.916) data 0.000 (0.019) loss 0.6167 (1.0204) acc 78.1250 (73.5417) lr 4.6417e-04 eta 1:53:50 +epoch [36/50] batch [50/500] time 0.890 (0.915) data 0.000 (0.017) loss 1.3730 (1.0403) acc 68.7500 (73.5000) lr 4.6417e-04 eta 1:53:36 +epoch [36/50] batch [55/500] time 0.885 (0.913) data 0.000 (0.016) loss 0.9614 (1.0368) acc 71.8750 (73.5227) lr 4.6417e-04 eta 1:53:16 +epoch [36/50] batch [60/500] time 0.879 (0.914) data 0.000 (0.015) loss 0.8169 (1.0218) acc 75.0000 (73.5417) lr 4.6417e-04 eta 1:53:18 +epoch [36/50] batch [65/500] time 0.852 (0.912) data 0.000 (0.013) loss 1.0605 (1.0158) acc 68.7500 (73.7500) lr 4.6417e-04 eta 1:52:58 +epoch [36/50] batch [70/500] time 0.907 (0.911) data 0.001 (0.013) loss 1.0293 (1.0126) acc 71.8750 (73.6161) lr 4.6417e-04 eta 1:52:47 +epoch [36/50] batch [75/500] time 0.918 (0.910) data 0.000 (0.012) loss 0.7876 (1.0078) acc 78.1250 (74.0417) lr 4.6417e-04 eta 1:52:39 +epoch [36/50] batch [80/500] time 0.902 (0.909) data 0.000 (0.011) loss 1.6836 (1.0149) acc 62.5000 (73.7891) lr 4.6417e-04 eta 1:52:27 +epoch [36/50] batch [85/500] time 0.904 (0.909) data 0.001 (0.010) loss 1.0928 (1.0144) acc 62.5000 (73.6397) lr 4.6417e-04 eta 1:52:17 +epoch [36/50] batch [90/500] time 0.884 (0.908) data 0.000 (0.010) loss 1.1904 (1.0239) acc 75.0000 (73.6111) lr 4.6417e-04 eta 1:52:07 +epoch [36/50] batch [95/500] time 0.898 (0.907) data 0.000 (0.009) loss 1.3008 (1.0288) acc 78.1250 (73.7500) lr 4.6417e-04 eta 1:51:58 +epoch [36/50] batch [100/500] time 0.872 (0.906) data 0.000 (0.009) loss 0.9297 (1.0326) acc 75.0000 (73.6562) lr 4.6417e-04 eta 1:51:43 +epoch [36/50] batch [105/500] time 0.915 (0.905) data 0.000 (0.008) loss 0.9297 (1.0238) acc 78.1250 (73.6905) lr 4.6417e-04 eta 1:51:30 +epoch [36/50] batch [110/500] time 0.847 (0.904) data 0.000 (0.008) loss 0.9800 (1.0352) acc 78.1250 (73.4943) lr 4.6417e-04 eta 1:51:21 +epoch [36/50] batch [115/500] time 0.871 (0.904) data 0.000 (0.008) loss 1.2812 (1.0346) acc 68.7500 (73.6413) lr 4.6417e-04 eta 1:51:12 +epoch [36/50] batch [120/500] time 0.864 (0.904) data 0.000 (0.007) loss 0.7935 (1.0277) acc 84.3750 (73.8542) lr 4.6417e-04 eta 1:51:12 +epoch [36/50] batch [125/500] time 0.899 (0.904) data 0.000 (0.007) loss 0.7783 (1.0291) acc 75.0000 (73.7500) lr 4.6417e-04 eta 1:51:04 +epoch [36/50] batch [130/500] time 0.869 (0.903) data 0.000 (0.007) loss 0.8174 (1.0312) acc 78.1250 (73.7981) lr 4.6417e-04 eta 1:50:51 +epoch [36/50] batch [135/500] time 0.887 (0.902) data 0.000 (0.007) loss 0.6226 (1.0305) acc 87.5000 (73.7731) lr 4.6417e-04 eta 1:50:46 +epoch [36/50] batch [140/500] time 0.888 (0.902) data 0.000 (0.006) loss 1.1768 (1.0325) acc 68.7500 (73.6607) lr 4.6417e-04 eta 1:50:36 +epoch [36/50] batch [145/500] time 0.886 (0.902) data 0.000 (0.006) loss 1.4297 (1.0372) acc 65.6250 (73.4052) lr 4.6417e-04 eta 1:50:31 +epoch [36/50] batch [150/500] time 0.911 (0.901) data 0.000 (0.006) loss 0.7832 (1.0423) acc 71.8750 (73.3958) lr 4.6417e-04 eta 1:50:25 +epoch [36/50] batch [155/500] time 0.890 (0.901) data 0.000 (0.006) loss 0.9985 (1.0458) acc 78.1250 (73.3669) lr 4.6417e-04 eta 1:50:21 +epoch [36/50] batch [160/500] time 0.900 (0.901) data 0.000 (0.006) loss 0.9380 (1.0412) acc 68.7500 (73.4375) lr 4.6417e-04 eta 1:50:16 +epoch [36/50] batch [165/500] time 0.870 (0.902) data 0.000 (0.005) loss 0.7544 (1.0354) acc 84.3750 (73.6742) lr 4.6417e-04 eta 1:50:18 +epoch [36/50] batch [170/500] time 0.863 (0.902) data 0.000 (0.005) loss 1.8398 (1.0421) acc 65.6250 (73.5478) lr 4.6417e-04 eta 1:50:10 +epoch [36/50] batch [175/500] time 0.920 (0.902) data 0.000 (0.005) loss 1.4512 (1.0449) acc 78.1250 (73.6429) lr 4.6417e-04 eta 1:50:04 +epoch [36/50] batch [180/500] time 0.865 (0.901) data 0.000 (0.005) loss 1.2227 (1.0510) acc 71.8750 (73.5764) lr 4.6417e-04 eta 1:49:57 +epoch [36/50] batch [185/500] time 0.869 (0.901) data 0.000 (0.005) loss 0.9941 (1.0537) acc 68.7500 (73.4797) lr 4.6417e-04 eta 1:49:49 +epoch [36/50] batch [190/500] time 0.883 (0.900) data 0.000 (0.005) loss 0.9185 (1.0549) acc 71.8750 (73.4704) lr 4.6417e-04 eta 1:49:38 +epoch [36/50] batch [195/500] time 0.860 (0.900) data 0.000 (0.005) loss 1.1572 (1.0513) acc 65.6250 (73.5256) lr 4.6417e-04 eta 1:49:31 +epoch [36/50] batch [200/500] time 0.899 (0.899) data 0.000 (0.005) loss 0.5967 (1.0464) acc 90.6250 (73.6719) lr 4.6417e-04 eta 1:49:24 +epoch [36/50] batch [205/500] time 0.900 (0.899) data 0.001 (0.004) loss 0.9653 (1.0516) acc 75.0000 (73.5823) lr 4.6417e-04 eta 1:49:20 +epoch [36/50] batch [210/500] time 0.908 (0.899) data 0.000 (0.004) loss 0.9565 (1.0555) acc 68.7500 (73.5417) lr 4.6417e-04 eta 1:49:16 +epoch [36/50] batch [215/500] time 0.874 (0.899) data 0.000 (0.004) loss 0.6958 (1.0542) acc 84.3750 (73.6192) lr 4.6417e-04 eta 1:49:07 +epoch [36/50] batch [220/500] time 0.899 (0.898) data 0.000 (0.004) loss 1.0596 (1.0541) acc 78.1250 (73.6648) lr 4.6417e-04 eta 1:48:59 +epoch [36/50] batch [225/500] time 0.904 (0.898) data 0.000 (0.004) loss 1.0869 (1.0542) acc 71.8750 (73.6667) lr 4.6417e-04 eta 1:48:53 +epoch [36/50] batch [230/500] time 0.868 (0.898) data 0.000 (0.004) loss 1.1689 (1.0585) acc 68.7500 (73.6005) lr 4.6417e-04 eta 1:48:45 +epoch [36/50] batch [235/500] time 0.897 (0.897) data 0.000 (0.004) loss 1.1152 (1.0552) acc 71.8750 (73.6968) lr 4.6417e-04 eta 1:48:39 +epoch [36/50] batch [240/500] time 0.895 (0.897) data 0.000 (0.004) loss 0.5908 (1.0515) acc 81.2500 (73.7500) lr 4.6417e-04 eta 1:48:32 +epoch [36/50] batch [245/500] time 0.896 (0.897) data 0.001 (0.004) loss 1.6250 (1.0525) acc 62.5000 (73.6990) lr 4.6417e-04 eta 1:48:27 +epoch [36/50] batch [250/500] time 0.884 (0.897) data 0.000 (0.004) loss 1.2168 (1.0584) acc 78.1250 (73.4750) lr 4.6417e-04 eta 1:48:22 +epoch [36/50] batch [255/500] time 0.882 (0.897) data 0.000 (0.004) loss 0.5557 (1.0544) acc 71.8750 (73.5049) lr 4.6417e-04 eta 1:48:16 +epoch [36/50] batch [260/500] time 0.996 (0.897) data 0.000 (0.004) loss 0.8286 (1.0507) acc 75.0000 (73.5337) lr 4.6417e-04 eta 1:48:16 +epoch [36/50] batch [265/500] time 0.928 (0.897) data 0.000 (0.004) loss 1.4971 (1.0541) acc 78.1250 (73.4906) lr 4.6417e-04 eta 1:48:12 +epoch [36/50] batch [270/500] time 0.899 (0.897) data 0.000 (0.003) loss 1.1162 (1.0519) acc 62.5000 (73.4954) lr 4.6417e-04 eta 1:48:06 +epoch [36/50] batch [275/500] time 0.861 (0.897) data 0.000 (0.003) loss 0.7339 (1.0522) acc 78.1250 (73.4773) lr 4.6417e-04 eta 1:47:59 +epoch [36/50] batch [280/500] time 0.874 (0.896) data 0.000 (0.003) loss 0.9580 (1.0523) acc 78.1250 (73.5156) lr 4.6417e-04 eta 1:47:52 +epoch [36/50] batch [285/500] time 0.870 (0.896) data 0.000 (0.003) loss 1.1592 (1.0522) acc 75.0000 (73.5746) lr 4.6417e-04 eta 1:47:47 +epoch [36/50] batch [290/500] time 0.895 (0.896) data 0.000 (0.003) loss 0.3303 (1.0505) acc 93.7500 (73.5991) lr 4.6417e-04 eta 1:47:40 +epoch [36/50] batch [295/500] time 0.898 (0.896) data 0.000 (0.003) loss 1.3770 (1.0513) acc 78.1250 (73.6653) lr 4.6417e-04 eta 1:47:34 +epoch [36/50] batch [300/500] time 0.880 (0.896) data 0.000 (0.003) loss 1.1885 (1.0531) acc 71.8750 (73.6250) lr 4.6417e-04 eta 1:47:29 +epoch [36/50] batch [305/500] time 0.871 (0.896) data 0.000 (0.003) loss 1.2041 (1.0521) acc 68.7500 (73.5963) lr 4.6417e-04 eta 1:47:26 +epoch [36/50] batch [310/500] time 0.848 (0.895) data 0.000 (0.003) loss 2.1211 (1.0561) acc 46.8750 (73.4980) lr 4.6417e-04 eta 1:47:18 +epoch [36/50] batch [315/500] time 0.886 (0.895) data 0.001 (0.003) loss 0.7471 (1.0551) acc 78.1250 (73.5813) lr 4.6417e-04 eta 1:47:11 +epoch [36/50] batch [320/500] time 0.872 (0.895) data 0.000 (0.003) loss 1.4268 (1.0542) acc 71.8750 (73.6328) lr 4.6417e-04 eta 1:47:05 +epoch [36/50] batch [325/500] time 0.898 (0.895) data 0.000 (0.003) loss 0.7930 (1.0536) acc 81.2500 (73.6346) lr 4.6417e-04 eta 1:47:00 +epoch [36/50] batch [330/500] time 0.917 (0.895) data 0.000 (0.003) loss 0.9272 (1.0545) acc 78.1250 (73.6269) lr 4.6417e-04 eta 1:46:55 +epoch [36/50] batch [335/500] time 0.909 (0.895) data 0.000 (0.003) loss 0.9014 (1.0547) acc 78.1250 (73.6381) lr 4.6417e-04 eta 1:46:50 +epoch [36/50] batch [340/500] time 0.887 (0.895) data 0.000 (0.003) loss 0.5942 (1.0548) acc 81.2500 (73.5938) lr 4.6417e-04 eta 1:46:45 +epoch [36/50] batch [345/500] time 0.871 (0.894) data 0.000 (0.003) loss 0.8208 (1.0549) acc 78.1250 (73.5688) lr 4.6417e-04 eta 1:46:39 +epoch [36/50] batch [350/500] time 0.863 (0.894) data 0.000 (0.003) loss 0.8037 (1.0540) acc 75.0000 (73.6161) lr 4.6417e-04 eta 1:46:32 +epoch [36/50] batch [355/500] time 0.879 (0.894) data 0.000 (0.003) loss 1.8926 (1.0560) acc 65.6250 (73.5827) lr 4.6417e-04 eta 1:46:26 +epoch [36/50] batch [360/500] time 0.903 (0.894) data 0.000 (0.003) loss 0.8193 (1.0555) acc 78.1250 (73.5938) lr 4.6417e-04 eta 1:46:20 +epoch [36/50] batch [365/500] time 0.885 (0.893) data 0.000 (0.003) loss 1.0479 (1.0524) acc 68.7500 (73.6558) lr 4.6417e-04 eta 1:46:15 +epoch [36/50] batch [370/500] time 0.874 (0.893) data 0.000 (0.003) loss 0.6162 (1.0524) acc 84.3750 (73.7162) lr 4.6417e-04 eta 1:46:10 +epoch [36/50] batch [375/500] time 0.897 (0.893) data 0.000 (0.003) loss 0.7480 (1.0501) acc 78.1250 (73.7250) lr 4.6417e-04 eta 1:46:05 +epoch [36/50] batch [380/500] time 0.895 (0.893) data 0.000 (0.003) loss 1.0205 (1.0478) acc 68.7500 (73.7664) lr 4.6417e-04 eta 1:46:01 +epoch [36/50] batch [385/500] time 0.888 (0.893) data 0.000 (0.002) loss 0.6743 (1.0467) acc 84.3750 (73.7744) lr 4.6417e-04 eta 1:45:56 +epoch [36/50] batch [390/500] time 0.885 (0.893) data 0.000 (0.002) loss 0.7847 (1.0475) acc 75.0000 (73.7420) lr 4.6417e-04 eta 1:45:50 +epoch [36/50] batch [395/500] time 0.902 (0.893) data 0.000 (0.002) loss 1.0303 (1.0466) acc 65.6250 (73.7421) lr 4.6417e-04 eta 1:45:45 +epoch [36/50] batch [400/500] time 0.866 (0.893) data 0.000 (0.002) loss 1.4287 (1.0494) acc 68.7500 (73.7344) lr 4.6417e-04 eta 1:45:39 +epoch [36/50] batch [405/500] time 0.908 (0.893) data 0.000 (0.002) loss 1.0605 (1.0485) acc 78.1250 (73.8040) lr 4.6417e-04 eta 1:45:37 +epoch [36/50] batch [410/500] time 0.852 (0.893) data 0.000 (0.002) loss 2.0781 (1.0495) acc 56.2500 (73.7957) lr 4.6417e-04 eta 1:45:31 +epoch [36/50] batch [415/500] time 0.864 (0.893) data 0.000 (0.002) loss 1.1904 (1.0492) acc 68.7500 (73.8027) lr 4.6417e-04 eta 1:45:26 +epoch [36/50] batch [420/500] time 0.868 (0.893) data 0.000 (0.002) loss 1.0488 (1.0520) acc 68.7500 (73.6979) lr 4.6417e-04 eta 1:45:20 +epoch [36/50] batch [425/500] time 0.877 (0.893) data 0.000 (0.002) loss 1.2490 (1.0540) acc 71.8750 (73.6324) lr 4.6417e-04 eta 1:45:14 +epoch [36/50] batch [430/500] time 0.891 (0.892) data 0.000 (0.002) loss 1.9727 (1.0578) acc 53.1250 (73.5610) lr 4.6417e-04 eta 1:45:09 +epoch [36/50] batch [435/500] time 0.869 (0.892) data 0.000 (0.002) loss 1.3994 (1.0583) acc 71.8750 (73.5848) lr 4.6417e-04 eta 1:45:05 +epoch [36/50] batch [440/500] time 0.883 (0.893) data 0.000 (0.002) loss 1.2480 (1.0564) acc 65.6250 (73.5866) lr 4.6417e-04 eta 1:45:01 +epoch [36/50] batch [445/500] time 0.902 (0.893) data 0.000 (0.002) loss 0.8145 (1.0567) acc 78.1250 (73.5674) lr 4.6417e-04 eta 1:44:57 +epoch [36/50] batch [450/500] time 0.872 (0.893) data 0.000 (0.002) loss 0.8975 (1.0576) acc 81.2500 (73.5625) lr 4.6417e-04 eta 1:44:53 +epoch [36/50] batch [455/500] time 0.852 (0.893) data 0.000 (0.002) loss 1.4287 (1.0586) acc 65.6250 (73.5440) lr 4.6417e-04 eta 1:44:47 +epoch [36/50] batch [460/500] time 0.913 (0.893) data 0.000 (0.002) loss 1.1133 (1.0569) acc 65.6250 (73.5666) lr 4.6417e-04 eta 1:44:43 +epoch [36/50] batch [465/500] time 0.886 (0.893) data 0.000 (0.002) loss 1.0547 (1.0574) acc 75.0000 (73.5753) lr 4.6417e-04 eta 1:44:39 +epoch [36/50] batch [470/500] time 0.900 (0.893) data 0.000 (0.002) loss 0.8975 (1.0582) acc 68.7500 (73.5040) lr 4.6417e-04 eta 1:44:35 +epoch [36/50] batch [475/500] time 0.883 (0.893) data 0.000 (0.002) loss 0.6035 (1.0559) acc 84.3750 (73.5658) lr 4.6417e-04 eta 1:44:30 +epoch [36/50] batch [480/500] time 0.873 (0.892) data 0.000 (0.002) loss 1.2207 (1.0575) acc 71.8750 (73.5417) lr 4.6417e-04 eta 1:44:24 +epoch [36/50] batch [485/500] time 0.920 (0.893) data 0.001 (0.002) loss 0.9204 (1.0592) acc 71.8750 (73.5180) lr 4.6417e-04 eta 1:44:21 +epoch [36/50] batch [490/500] time 0.869 (0.892) data 0.000 (0.002) loss 1.0547 (1.0586) acc 75.0000 (73.5332) lr 4.6417e-04 eta 1:44:15 +epoch [36/50] batch [495/500] time 0.917 (0.893) data 0.000 (0.002) loss 0.9878 (1.0591) acc 75.0000 (73.5227) lr 4.6417e-04 eta 1:44:11 +epoch [36/50] batch [500/500] time 0.897 (0.893) data 0.000 (0.002) loss 1.3623 (1.0605) acc 71.8750 (73.5187) lr 4.1221e-04 eta 1:44:07 +epoch [37/50] batch [5/500] time 0.851 (1.018) data 0.000 (0.152) loss 0.9868 (0.8891) acc 78.1250 (77.5000) lr 4.1221e-04 eta 1:58:37 +epoch [37/50] batch [10/500] time 0.871 (0.950) data 0.000 (0.076) loss 1.5664 (1.0388) acc 65.6250 (75.3125) lr 4.1221e-04 eta 1:50:43 +epoch [37/50] batch [15/500] time 0.838 (0.925) data 0.000 (0.051) loss 0.9453 (1.0132) acc 81.2500 (75.0000) lr 4.1221e-04 eta 1:47:41 +epoch [37/50] batch [20/500] time 0.856 (0.912) data 0.000 (0.038) loss 1.0850 (1.0207) acc 71.8750 (72.9688) lr 4.1221e-04 eta 1:46:04 +epoch [37/50] batch [25/500] time 0.908 (0.906) data 0.000 (0.031) loss 1.2207 (1.0340) acc 71.8750 (72.5000) lr 4.1221e-04 eta 1:45:20 +epoch [37/50] batch [30/500] time 0.858 (0.902) data 0.000 (0.026) loss 1.3887 (1.0707) acc 62.5000 (72.2917) lr 4.1221e-04 eta 1:44:50 +epoch [37/50] batch [35/500] time 0.909 (0.902) data 0.000 (0.022) loss 1.0928 (1.0782) acc 65.6250 (71.8750) lr 4.1221e-04 eta 1:44:40 +epoch [37/50] batch [40/500] time 0.863 (0.899) data 0.000 (0.019) loss 0.7173 (1.0782) acc 84.3750 (72.1094) lr 4.1221e-04 eta 1:44:14 +epoch [37/50] batch [45/500] time 0.865 (0.897) data 0.000 (0.017) loss 0.6152 (1.0518) acc 81.2500 (72.7083) lr 4.1221e-04 eta 1:43:57 +epoch [37/50] batch [50/500] time 0.884 (0.895) data 0.000 (0.015) loss 1.0098 (1.0296) acc 71.8750 (73.1250) lr 4.1221e-04 eta 1:43:37 +epoch [37/50] batch [55/500] time 0.897 (0.894) data 0.000 (0.014) loss 1.4355 (1.0363) acc 71.8750 (73.1818) lr 4.1221e-04 eta 1:43:25 +epoch [37/50] batch [60/500] time 0.890 (0.893) data 0.001 (0.013) loss 1.2578 (1.0381) acc 62.5000 (73.1250) lr 4.1221e-04 eta 1:43:15 +epoch [37/50] batch [65/500] time 0.889 (0.893) data 0.000 (0.012) loss 0.7085 (1.0326) acc 84.3750 (73.3654) lr 4.1221e-04 eta 1:43:11 +epoch [37/50] batch [70/500] time 0.895 (0.893) data 0.000 (0.011) loss 0.7764 (1.0262) acc 75.0000 (73.7054) lr 4.1221e-04 eta 1:43:07 +epoch [37/50] batch [75/500] time 0.897 (0.893) data 0.000 (0.010) loss 0.8608 (1.0248) acc 71.8750 (73.8333) lr 4.1221e-04 eta 1:43:06 +epoch [37/50] batch [80/500] time 0.873 (0.893) data 0.000 (0.010) loss 0.8916 (1.0311) acc 71.8750 (73.4766) lr 4.1221e-04 eta 1:42:57 +epoch [37/50] batch [85/500] time 0.889 (0.892) data 0.000 (0.009) loss 0.7642 (1.0314) acc 81.2500 (73.5662) lr 4.1221e-04 eta 1:42:50 +epoch [37/50] batch [90/500] time 1.012 (0.893) data 0.000 (0.009) loss 0.8467 (1.0286) acc 75.0000 (73.5764) lr 4.1221e-04 eta 1:42:49 +epoch [37/50] batch [95/500] time 0.877 (0.893) data 0.000 (0.008) loss 1.2832 (1.0235) acc 68.7500 (73.8158) lr 4.1221e-04 eta 1:42:43 +epoch [37/50] batch [100/500] time 0.908 (0.892) data 0.000 (0.008) loss 1.3398 (1.0342) acc 59.3750 (73.4062) lr 4.1221e-04 eta 1:42:36 +epoch [37/50] batch [105/500] time 0.905 (0.892) data 0.000 (0.007) loss 0.8628 (1.0288) acc 78.1250 (73.5417) lr 4.1221e-04 eta 1:42:31 +epoch [37/50] batch [110/500] time 0.888 (0.892) data 0.000 (0.007) loss 1.2295 (1.0144) acc 62.5000 (73.8068) lr 4.1221e-04 eta 1:42:28 +epoch [37/50] batch [115/500] time 0.900 (0.893) data 0.000 (0.007) loss 1.1660 (1.0243) acc 84.3750 (73.7772) lr 4.1221e-04 eta 1:42:25 +epoch [37/50] batch [120/500] time 0.888 (0.892) data 0.000 (0.007) loss 0.6982 (1.0215) acc 87.5000 (74.0625) lr 4.1221e-04 eta 1:42:19 +epoch [37/50] batch [125/500] time 0.873 (0.892) data 0.000 (0.006) loss 0.8169 (1.0253) acc 68.7500 (73.7000) lr 4.1221e-04 eta 1:42:15 +epoch [37/50] batch [130/500] time 0.900 (0.892) data 0.000 (0.006) loss 0.9688 (1.0212) acc 75.0000 (73.8702) lr 4.1221e-04 eta 1:42:09 +epoch [37/50] batch [135/500] time 0.878 (0.893) data 0.000 (0.006) loss 1.6553 (1.0181) acc 56.2500 (73.9583) lr 4.1221e-04 eta 1:42:09 +epoch [37/50] batch [140/500] time 0.904 (0.892) data 0.000 (0.006) loss 1.0693 (1.0210) acc 68.7500 (73.9062) lr 4.1221e-04 eta 1:42:02 +epoch [37/50] batch [145/500] time 0.902 (0.893) data 0.000 (0.005) loss 1.3271 (1.0212) acc 62.5000 (73.7931) lr 4.1221e-04 eta 1:41:58 +epoch [37/50] batch [150/500] time 0.903 (0.892) data 0.000 (0.005) loss 0.7393 (1.0184) acc 75.0000 (73.8125) lr 4.1221e-04 eta 1:41:50 +epoch [37/50] batch [155/500] time 0.869 (0.892) data 0.000 (0.005) loss 1.0742 (1.0169) acc 75.0000 (73.7097) lr 4.1221e-04 eta 1:41:44 +epoch [37/50] batch [160/500] time 0.913 (0.892) data 0.001 (0.005) loss 1.1396 (1.0242) acc 65.6250 (73.6133) lr 4.1221e-04 eta 1:41:40 +epoch [37/50] batch [165/500] time 0.900 (0.892) data 0.000 (0.005) loss 1.3447 (1.0245) acc 59.3750 (73.5038) lr 4.1221e-04 eta 1:41:36 +epoch [37/50] batch [170/500] time 0.872 (0.892) data 0.000 (0.005) loss 0.9053 (1.0275) acc 71.8750 (73.4926) lr 4.1221e-04 eta 1:41:30 +epoch [37/50] batch [175/500] time 0.880 (0.892) data 0.000 (0.005) loss 1.5244 (1.0314) acc 71.8750 (73.3571) lr 4.1221e-04 eta 1:41:26 +epoch [37/50] batch [180/500] time 0.912 (0.892) data 0.000 (0.004) loss 1.5127 (1.0263) acc 62.5000 (73.5069) lr 4.1221e-04 eta 1:41:23 +epoch [37/50] batch [185/500] time 0.869 (0.891) data 0.000 (0.004) loss 1.3369 (1.0279) acc 68.7500 (73.5304) lr 4.1221e-04 eta 1:41:15 +epoch [37/50] batch [190/500] time 0.901 (0.891) data 0.000 (0.004) loss 1.1943 (1.0322) acc 65.6250 (73.3717) lr 4.1221e-04 eta 1:41:09 +epoch [37/50] batch [195/500] time 0.900 (0.891) data 0.001 (0.004) loss 0.7441 (1.0282) acc 81.2500 (73.4776) lr 4.1221e-04 eta 1:41:06 +epoch [37/50] batch [200/500] time 0.884 (0.891) data 0.000 (0.004) loss 1.0107 (1.0269) acc 75.0000 (73.5156) lr 4.1221e-04 eta 1:41:01 +epoch [37/50] batch [205/500] time 0.892 (0.891) data 0.000 (0.004) loss 1.4141 (1.0311) acc 71.8750 (73.5366) lr 4.1221e-04 eta 1:40:56 +epoch [37/50] batch [210/500] time 0.884 (0.891) data 0.000 (0.004) loss 1.9561 (1.0375) acc 59.3750 (73.4524) lr 4.1221e-04 eta 1:40:51 +epoch [37/50] batch [215/500] time 0.876 (0.891) data 0.000 (0.004) loss 1.5869 (1.0436) acc 71.8750 (73.4593) lr 4.1221e-04 eta 1:40:45 +epoch [37/50] batch [220/500] time 0.889 (0.891) data 0.000 (0.004) loss 1.6602 (1.0426) acc 62.5000 (73.5085) lr 4.1221e-04 eta 1:40:41 +epoch [37/50] batch [225/500] time 0.908 (0.891) data 0.000 (0.004) loss 1.1523 (1.0414) acc 75.0000 (73.5417) lr 4.1221e-04 eta 1:40:37 +epoch [37/50] batch [230/500] time 0.854 (0.891) data 0.000 (0.004) loss 1.7676 (1.0444) acc 65.6250 (73.5734) lr 4.1221e-04 eta 1:40:30 +epoch [37/50] batch [235/500] time 0.884 (0.891) data 0.001 (0.003) loss 1.5361 (1.0457) acc 68.7500 (73.6170) lr 4.1221e-04 eta 1:40:28 +epoch [37/50] batch [240/500] time 0.898 (0.891) data 0.000 (0.003) loss 1.3340 (1.0444) acc 68.7500 (73.6979) lr 4.1221e-04 eta 1:40:24 +epoch [37/50] batch [245/500] time 0.890 (0.891) data 0.000 (0.003) loss 1.8730 (1.0497) acc 50.0000 (73.5204) lr 4.1221e-04 eta 1:40:20 +epoch [37/50] batch [250/500] time 0.904 (0.891) data 0.000 (0.003) loss 1.0918 (1.0487) acc 81.2500 (73.5875) lr 4.1221e-04 eta 1:40:17 +epoch [37/50] batch [255/500] time 0.881 (0.891) data 0.000 (0.003) loss 1.1777 (1.0515) acc 65.6250 (73.5049) lr 4.1221e-04 eta 1:40:10 +epoch [37/50] batch [260/500] time 0.895 (0.891) data 0.000 (0.003) loss 0.9004 (1.0556) acc 81.2500 (73.5096) lr 4.1221e-04 eta 1:40:05 +epoch [37/50] batch [265/500] time 0.873 (0.891) data 0.000 (0.003) loss 1.3955 (1.0538) acc 62.5000 (73.5024) lr 4.1221e-04 eta 1:39:59 +epoch [37/50] batch [270/500] time 0.899 (0.891) data 0.000 (0.003) loss 0.6206 (1.0563) acc 93.7500 (73.5301) lr 4.1221e-04 eta 1:39:54 +epoch [37/50] batch [275/500] time 0.910 (0.890) data 0.000 (0.003) loss 0.5776 (1.0558) acc 81.2500 (73.4886) lr 4.1221e-04 eta 1:39:48 +epoch [37/50] batch [280/500] time 0.887 (0.891) data 0.001 (0.003) loss 1.3604 (1.0585) acc 68.7500 (73.4598) lr 4.1221e-04 eta 1:39:47 +epoch [37/50] batch [285/500] time 0.903 (0.891) data 0.000 (0.003) loss 0.7065 (1.0595) acc 81.2500 (73.4430) lr 4.1221e-04 eta 1:39:42 +epoch [37/50] batch [290/500] time 0.878 (0.891) data 0.000 (0.003) loss 0.5981 (1.0566) acc 78.1250 (73.5345) lr 4.1221e-04 eta 1:39:39 +epoch [37/50] batch [295/500] time 0.906 (0.891) data 0.000 (0.003) loss 0.6763 (1.0564) acc 84.3750 (73.5169) lr 4.1221e-04 eta 1:39:37 +epoch [37/50] batch [300/500] time 0.866 (0.891) data 0.000 (0.003) loss 1.2178 (1.0589) acc 62.5000 (73.5104) lr 4.1221e-04 eta 1:39:30 +epoch [37/50] batch [305/500] time 0.887 (0.891) data 0.000 (0.003) loss 0.9937 (1.0595) acc 71.8750 (73.4836) lr 4.1221e-04 eta 1:39:24 +epoch [37/50] batch [310/500] time 0.896 (0.891) data 0.000 (0.003) loss 1.1670 (1.0578) acc 81.2500 (73.5383) lr 4.1221e-04 eta 1:39:19 +epoch [37/50] batch [315/500] time 0.905 (0.891) data 0.000 (0.003) loss 1.0381 (1.0553) acc 71.8750 (73.5714) lr 4.1221e-04 eta 1:39:15 +epoch [37/50] batch [320/500] time 0.905 (0.891) data 0.000 (0.003) loss 1.4287 (1.0544) acc 68.7500 (73.6035) lr 4.1221e-04 eta 1:39:09 +epoch [37/50] batch [325/500] time 0.897 (0.891) data 0.000 (0.003) loss 1.9492 (1.0583) acc 56.2500 (73.5385) lr 4.1221e-04 eta 1:39:07 +epoch [37/50] batch [330/500] time 0.869 (0.891) data 0.000 (0.003) loss 0.7949 (1.0565) acc 75.0000 (73.5038) lr 4.1221e-04 eta 1:39:02 +epoch [37/50] batch [335/500] time 0.889 (0.891) data 0.000 (0.003) loss 1.2754 (1.0559) acc 62.5000 (73.4515) lr 4.1221e-04 eta 1:38:57 +epoch [37/50] batch [340/500] time 0.875 (0.891) data 0.000 (0.002) loss 0.4475 (1.0542) acc 87.5000 (73.4835) lr 4.1221e-04 eta 1:38:51 +epoch [37/50] batch [345/500] time 0.900 (0.890) data 0.000 (0.002) loss 1.2148 (1.0560) acc 75.0000 (73.4692) lr 4.1221e-04 eta 1:38:45 +epoch [37/50] batch [350/500] time 0.886 (0.890) data 0.000 (0.002) loss 1.7910 (1.0576) acc 53.1250 (73.3929) lr 4.1221e-04 eta 1:38:39 +epoch [37/50] batch [355/500] time 0.859 (0.890) data 0.000 (0.002) loss 1.1025 (1.0593) acc 65.6250 (73.3099) lr 4.1221e-04 eta 1:38:34 +epoch [37/50] batch [360/500] time 0.901 (0.890) data 0.000 (0.002) loss 1.2891 (1.0605) acc 59.3750 (73.2552) lr 4.1221e-04 eta 1:38:29 +epoch [37/50] batch [365/500] time 0.897 (0.890) data 0.000 (0.002) loss 1.1357 (1.0622) acc 75.0000 (73.2877) lr 4.1221e-04 eta 1:38:26 +epoch [37/50] batch [370/500] time 0.874 (0.890) data 0.000 (0.002) loss 1.1777 (1.0602) acc 68.7500 (73.3361) lr 4.1221e-04 eta 1:38:21 +epoch [37/50] batch [375/500] time 0.867 (0.890) data 0.000 (0.002) loss 1.5371 (1.0638) acc 62.5000 (73.2833) lr 4.1221e-04 eta 1:38:16 +epoch [37/50] batch [380/500] time 0.903 (0.890) data 0.000 (0.002) loss 1.1475 (1.0656) acc 68.7500 (73.2072) lr 4.1221e-04 eta 1:38:13 +epoch [37/50] batch [385/500] time 0.869 (0.890) data 0.000 (0.002) loss 1.3838 (1.0658) acc 65.6250 (73.2143) lr 4.1221e-04 eta 1:38:08 +epoch [37/50] batch [390/500] time 0.909 (0.890) data 0.000 (0.002) loss 1.3721 (1.0651) acc 68.7500 (73.2532) lr 4.1221e-04 eta 1:38:03 +epoch [37/50] batch [395/500] time 0.891 (0.890) data 0.000 (0.002) loss 1.0156 (1.0628) acc 81.2500 (73.3465) lr 4.1221e-04 eta 1:37:58 +epoch [37/50] batch [400/500] time 0.903 (0.890) data 0.001 (0.002) loss 0.7148 (1.0627) acc 81.2500 (73.3125) lr 4.1221e-04 eta 1:37:54 +epoch [37/50] batch [405/500] time 0.874 (0.890) data 0.000 (0.002) loss 1.1113 (1.0649) acc 68.7500 (73.2716) lr 4.1221e-04 eta 1:37:48 +epoch [37/50] batch [410/500] time 0.886 (0.890) data 0.000 (0.002) loss 0.6113 (1.0630) acc 90.6250 (73.3079) lr 4.1221e-04 eta 1:37:42 +epoch [37/50] batch [415/500] time 0.875 (0.890) data 0.000 (0.002) loss 0.8018 (1.0607) acc 84.3750 (73.3735) lr 4.1221e-04 eta 1:37:37 +epoch [37/50] batch [420/500] time 1.022 (0.890) data 0.000 (0.002) loss 1.4863 (1.0615) acc 71.8750 (73.2961) lr 4.1221e-04 eta 1:37:35 +epoch [37/50] batch [425/500] time 0.848 (0.890) data 0.000 (0.002) loss 1.2783 (1.0631) acc 65.6250 (73.2868) lr 4.1221e-04 eta 1:37:29 +epoch [37/50] batch [430/500] time 0.885 (0.890) data 0.000 (0.002) loss 1.2646 (1.0637) acc 75.0000 (73.2267) lr 4.1221e-04 eta 1:37:25 +epoch [37/50] batch [435/500] time 0.868 (0.890) data 0.000 (0.002) loss 0.9761 (1.0645) acc 75.0000 (73.2040) lr 4.1221e-04 eta 1:37:20 +epoch [37/50] batch [440/500] time 0.878 (0.890) data 0.000 (0.002) loss 1.1738 (1.0644) acc 65.6250 (73.2031) lr 4.1221e-04 eta 1:37:15 +epoch [37/50] batch [445/500] time 0.880 (0.889) data 0.000 (0.002) loss 1.6533 (1.0633) acc 56.2500 (73.2093) lr 4.1221e-04 eta 1:37:10 +epoch [37/50] batch [450/500] time 0.890 (0.889) data 0.000 (0.002) loss 0.8315 (1.0619) acc 84.3750 (73.2222) lr 4.1221e-04 eta 1:37:06 +epoch [37/50] batch [455/500] time 0.865 (0.889) data 0.000 (0.002) loss 0.8076 (1.0612) acc 75.0000 (73.2418) lr 4.1221e-04 eta 1:37:00 +epoch [37/50] batch [460/500] time 0.899 (0.889) data 0.000 (0.002) loss 1.2109 (1.0620) acc 78.1250 (73.2473) lr 4.1221e-04 eta 1:36:56 +epoch [37/50] batch [465/500] time 0.898 (0.889) data 0.000 (0.002) loss 1.0283 (1.0619) acc 71.8750 (73.2124) lr 4.1221e-04 eta 1:36:51 +epoch [37/50] batch [470/500] time 0.869 (0.889) data 0.000 (0.002) loss 1.4434 (1.0641) acc 65.6250 (73.1715) lr 4.1221e-04 eta 1:36:47 +epoch [37/50] batch [475/500] time 0.890 (0.889) data 0.000 (0.002) loss 1.6523 (1.0662) acc 56.2500 (73.1382) lr 4.1221e-04 eta 1:36:42 +epoch [37/50] batch [480/500] time 0.864 (0.889) data 0.000 (0.002) loss 1.3604 (1.0662) acc 65.6250 (73.0990) lr 4.1221e-04 eta 1:36:37 +epoch [37/50] batch [485/500] time 0.885 (0.889) data 0.000 (0.002) loss 1.5225 (1.0691) acc 59.3750 (73.0477) lr 4.1221e-04 eta 1:36:32 +epoch [37/50] batch [490/500] time 0.886 (0.889) data 0.000 (0.002) loss 0.6875 (1.0692) acc 78.1250 (73.0357) lr 4.1221e-04 eta 1:36:27 +epoch [37/50] batch [495/500] time 0.902 (0.889) data 0.000 (0.002) loss 0.8320 (1.0686) acc 75.0000 (73.0492) lr 4.1221e-04 eta 1:36:22 +epoch [37/50] batch [500/500] time 0.898 (0.889) data 0.000 (0.002) loss 1.3311 (1.0677) acc 78.1250 (73.1000) lr 3.6258e-04 eta 1:36:18 +epoch [38/50] batch [5/500] time 0.896 (1.051) data 0.000 (0.145) loss 1.8438 (1.1118) acc 62.5000 (75.0000) lr 3.6258e-04 eta 1:53:48 +epoch [38/50] batch [10/500] time 0.896 (0.972) data 0.000 (0.073) loss 0.8472 (0.9925) acc 87.5000 (76.8750) lr 3.6258e-04 eta 1:45:08 +epoch [38/50] batch [15/500] time 0.897 (0.956) data 0.000 (0.049) loss 0.9941 (0.9951) acc 81.2500 (77.7083) lr 3.6258e-04 eta 1:43:19 +epoch [38/50] batch [20/500] time 0.886 (0.938) data 0.000 (0.037) loss 0.8950 (0.9842) acc 84.3750 (77.9688) lr 3.6258e-04 eta 1:41:16 +epoch [38/50] batch [25/500] time 0.867 (0.922) data 0.000 (0.029) loss 0.7915 (0.9866) acc 81.2500 (77.0000) lr 3.6258e-04 eta 1:39:31 +epoch [38/50] batch [30/500] time 0.881 (0.916) data 0.000 (0.024) loss 0.3931 (0.9901) acc 84.3750 (76.4583) lr 3.6258e-04 eta 1:38:45 +epoch [38/50] batch [35/500] time 0.868 (0.911) data 0.000 (0.021) loss 0.8682 (1.0015) acc 81.2500 (75.8929) lr 3.6258e-04 eta 1:38:12 +epoch [38/50] batch [40/500] time 0.891 (0.906) data 0.000 (0.018) loss 1.1924 (1.0162) acc 65.6250 (74.8438) lr 3.6258e-04 eta 1:37:34 +epoch [38/50] batch [45/500] time 0.885 (0.903) data 0.000 (0.016) loss 1.2109 (1.0301) acc 78.1250 (74.7917) lr 3.6258e-04 eta 1:37:11 +epoch [38/50] batch [50/500] time 0.886 (0.901) data 0.000 (0.015) loss 0.5967 (1.0157) acc 87.5000 (75.0625) lr 3.6258e-04 eta 1:36:52 +epoch [38/50] batch [55/500] time 0.865 (0.899) data 0.000 (0.013) loss 0.8843 (1.0106) acc 81.2500 (75.3409) lr 3.6258e-04 eta 1:36:36 +epoch [38/50] batch [60/500] time 0.889 (0.898) data 0.000 (0.012) loss 0.8462 (1.0198) acc 75.0000 (75.1562) lr 3.6258e-04 eta 1:36:25 +epoch [38/50] batch [65/500] time 0.897 (0.898) data 0.000 (0.011) loss 0.9854 (1.0076) acc 78.1250 (75.3846) lr 3.6258e-04 eta 1:36:20 +epoch [38/50] batch [70/500] time 0.884 (0.899) data 0.000 (0.011) loss 1.1660 (0.9974) acc 75.0000 (75.5804) lr 3.6258e-04 eta 1:36:20 +epoch [38/50] batch [75/500] time 0.879 (0.900) data 0.000 (0.010) loss 1.2275 (1.0161) acc 75.0000 (75.2500) lr 3.6258e-04 eta 1:36:24 +epoch [38/50] batch [80/500] time 0.908 (0.900) data 0.000 (0.009) loss 1.0039 (1.0175) acc 75.0000 (75.2734) lr 3.6258e-04 eta 1:36:17 +epoch [38/50] batch [85/500] time 0.891 (0.898) data 0.000 (0.009) loss 1.8076 (1.0279) acc 65.6250 (75.2574) lr 3.6258e-04 eta 1:36:03 +epoch [38/50] batch [90/500] time 0.867 (0.897) data 0.000 (0.008) loss 1.1816 (1.0254) acc 59.3750 (75.1389) lr 3.6258e-04 eta 1:35:49 +epoch [38/50] batch [95/500] time 0.889 (0.897) data 0.000 (0.008) loss 0.6724 (1.0292) acc 78.1250 (74.9013) lr 3.6258e-04 eta 1:35:44 +epoch [38/50] batch [100/500] time 0.907 (0.897) data 0.000 (0.007) loss 1.1094 (1.0346) acc 65.6250 (74.5625) lr 3.6258e-04 eta 1:35:39 +epoch [38/50] batch [105/500] time 0.863 (0.895) data 0.000 (0.007) loss 1.2295 (1.0374) acc 53.1250 (74.1369) lr 3.6258e-04 eta 1:35:26 +epoch [38/50] batch [110/500] time 0.901 (0.895) data 0.000 (0.007) loss 0.8940 (1.0292) acc 71.8750 (74.1761) lr 3.6258e-04 eta 1:35:21 +epoch [38/50] batch [115/500] time 1.029 (0.896) data 0.000 (0.007) loss 1.7686 (1.0291) acc 59.3750 (74.1576) lr 3.6258e-04 eta 1:35:21 +epoch [38/50] batch [120/500] time 0.893 (0.896) data 0.000 (0.006) loss 1.2217 (1.0370) acc 62.5000 (73.9323) lr 3.6258e-04 eta 1:35:15 +epoch [38/50] batch [125/500] time 0.890 (0.896) data 0.000 (0.006) loss 1.0986 (1.0435) acc 65.6250 (73.8500) lr 3.6258e-04 eta 1:35:08 +epoch [38/50] batch [130/500] time 0.892 (0.895) data 0.000 (0.006) loss 0.6377 (1.0424) acc 81.2500 (73.8462) lr 3.6258e-04 eta 1:35:03 +epoch [38/50] batch [135/500] time 0.925 (0.895) data 0.000 (0.006) loss 0.8530 (1.0351) acc 68.7500 (73.8426) lr 3.6258e-04 eta 1:34:58 +epoch [38/50] batch [140/500] time 0.893 (0.895) data 0.000 (0.005) loss 1.1113 (1.0388) acc 78.1250 (73.6384) lr 3.6258e-04 eta 1:34:53 +epoch [38/50] batch [145/500] time 0.901 (0.895) data 0.000 (0.005) loss 1.4248 (1.0432) acc 78.1250 (73.5560) lr 3.6258e-04 eta 1:34:48 +epoch [38/50] batch [150/500] time 0.904 (0.894) data 0.000 (0.005) loss 0.9824 (1.0440) acc 71.8750 (73.4583) lr 3.6258e-04 eta 1:34:39 +epoch [38/50] batch [155/500] time 0.859 (0.894) data 0.000 (0.005) loss 1.4492 (1.0472) acc 53.1250 (73.3468) lr 3.6258e-04 eta 1:34:30 +epoch [38/50] batch [160/500] time 0.900 (0.893) data 0.000 (0.005) loss 1.0684 (1.0494) acc 75.0000 (73.4180) lr 3.6258e-04 eta 1:34:23 +epoch [38/50] batch [165/500] time 0.877 (0.893) data 0.000 (0.005) loss 2.3086 (1.0557) acc 53.1250 (73.3712) lr 3.6258e-04 eta 1:34:19 +epoch [38/50] batch [170/500] time 0.874 (0.893) data 0.000 (0.005) loss 0.9575 (1.0564) acc 68.7500 (73.2353) lr 3.6258e-04 eta 1:34:12 +epoch [38/50] batch [175/500] time 0.892 (0.893) data 0.000 (0.004) loss 1.6416 (1.0590) acc 62.5000 (73.1250) lr 3.6258e-04 eta 1:34:06 +epoch [38/50] batch [180/500] time 0.878 (0.893) data 0.000 (0.004) loss 0.7061 (1.0527) acc 84.3750 (73.2292) lr 3.6258e-04 eta 1:34:01 +epoch [38/50] batch [185/500] time 0.895 (0.893) data 0.000 (0.004) loss 1.8066 (1.0613) acc 62.5000 (73.0574) lr 3.6258e-04 eta 1:33:57 +epoch [38/50] batch [190/500] time 0.898 (0.892) data 0.000 (0.004) loss 1.1738 (1.0642) acc 65.6250 (73.0263) lr 3.6258e-04 eta 1:33:51 +epoch [38/50] batch [195/500] time 0.872 (0.892) data 0.000 (0.004) loss 1.3398 (1.0611) acc 65.6250 (73.1090) lr 3.6258e-04 eta 1:33:46 +epoch [38/50] batch [200/500] time 0.867 (0.892) data 0.000 (0.004) loss 0.9551 (1.0574) acc 81.2500 (73.3281) lr 3.6258e-04 eta 1:33:39 +epoch [38/50] batch [205/500] time 0.912 (0.892) data 0.000 (0.004) loss 0.7788 (1.0547) acc 81.2500 (73.4146) lr 3.6258e-04 eta 1:33:35 +epoch [38/50] batch [210/500] time 0.909 (0.892) data 0.000 (0.004) loss 0.7783 (1.0483) acc 81.2500 (73.5119) lr 3.6258e-04 eta 1:33:31 +epoch [38/50] batch [215/500] time 0.865 (0.893) data 0.000 (0.004) loss 0.6021 (1.0465) acc 87.5000 (73.5610) lr 3.6258e-04 eta 1:33:30 +epoch [38/50] batch [220/500] time 0.875 (0.892) data 0.000 (0.004) loss 1.0439 (1.0454) acc 81.2500 (73.5938) lr 3.6258e-04 eta 1:33:24 +epoch [38/50] batch [225/500] time 0.883 (0.892) data 0.000 (0.003) loss 2.1504 (1.0518) acc 65.6250 (73.5000) lr 3.6258e-04 eta 1:33:19 +epoch [38/50] batch [230/500] time 0.878 (0.892) data 0.000 (0.003) loss 0.6685 (1.0472) acc 78.1250 (73.6141) lr 3.6258e-04 eta 1:33:13 +epoch [38/50] batch [235/500] time 0.875 (0.892) data 0.000 (0.003) loss 1.5195 (1.0503) acc 56.2500 (73.5372) lr 3.6258e-04 eta 1:33:07 +epoch [38/50] batch [240/500] time 0.876 (0.892) data 0.000 (0.003) loss 1.5059 (1.0532) acc 68.7500 (73.4635) lr 3.6258e-04 eta 1:33:03 +epoch [38/50] batch [245/500] time 0.881 (0.892) data 0.000 (0.003) loss 0.8467 (1.0528) acc 84.3750 (73.4439) lr 3.6258e-04 eta 1:32:58 +epoch [38/50] batch [250/500] time 0.881 (0.892) data 0.000 (0.003) loss 0.7275 (1.0554) acc 81.2500 (73.3500) lr 3.6258e-04 eta 1:32:53 +epoch [38/50] batch [255/500] time 0.866 (0.891) data 0.000 (0.003) loss 1.0107 (1.0545) acc 68.7500 (73.3701) lr 3.6258e-04 eta 1:32:47 +epoch [38/50] batch [260/500] time 0.883 (0.892) data 0.000 (0.003) loss 0.7759 (1.0523) acc 78.1250 (73.3654) lr 3.6258e-04 eta 1:32:44 +epoch [38/50] batch [265/500] time 0.858 (0.892) data 0.000 (0.003) loss 0.8242 (1.0525) acc 81.2500 (73.3608) lr 3.6258e-04 eta 1:32:40 +epoch [38/50] batch [270/500] time 0.898 (0.892) data 0.000 (0.003) loss 0.9272 (1.0530) acc 71.8750 (73.3449) lr 3.6258e-04 eta 1:32:36 +epoch [38/50] batch [275/500] time 0.884 (0.892) data 0.000 (0.003) loss 1.4316 (1.0546) acc 65.6250 (73.3068) lr 3.6258e-04 eta 1:32:31 +epoch [38/50] batch [280/500] time 0.924 (0.892) data 0.000 (0.003) loss 0.6108 (1.0538) acc 81.2500 (73.2924) lr 3.6258e-04 eta 1:32:27 +epoch [38/50] batch [285/500] time 0.902 (0.892) data 0.000 (0.003) loss 1.2578 (1.0569) acc 65.6250 (73.2127) lr 3.6258e-04 eta 1:32:23 +epoch [38/50] batch [290/500] time 0.885 (0.892) data 0.000 (0.003) loss 0.5640 (1.0567) acc 87.5000 (73.2328) lr 3.6258e-04 eta 1:32:18 +epoch [38/50] batch [295/500] time 0.921 (0.892) data 0.000 (0.003) loss 0.9033 (1.0565) acc 78.1250 (73.1356) lr 3.6258e-04 eta 1:32:14 +epoch [38/50] batch [300/500] time 0.900 (0.892) data 0.000 (0.003) loss 1.5225 (1.0597) acc 59.3750 (73.0312) lr 3.6258e-04 eta 1:32:09 +epoch [38/50] batch [305/500] time 0.889 (0.892) data 0.000 (0.003) loss 0.7725 (1.0561) acc 81.2500 (73.0943) lr 3.6258e-04 eta 1:32:04 +epoch [38/50] batch [310/500] time 0.914 (0.892) data 0.000 (0.003) loss 1.2842 (1.0537) acc 59.3750 (73.0847) lr 3.6258e-04 eta 1:32:01 +epoch [38/50] batch [315/500] time 0.881 (0.892) data 0.000 (0.003) loss 1.0156 (1.0536) acc 81.2500 (73.1548) lr 3.6258e-04 eta 1:31:54 +epoch [38/50] batch [320/500] time 0.885 (0.891) data 0.000 (0.003) loss 1.3076 (1.0548) acc 75.0000 (73.1445) lr 3.6258e-04 eta 1:31:49 +epoch [38/50] batch [325/500] time 0.862 (0.891) data 0.000 (0.002) loss 0.9951 (1.0568) acc 71.8750 (73.1154) lr 3.6258e-04 eta 1:31:43 +epoch [38/50] batch [330/500] time 0.871 (0.891) data 0.000 (0.002) loss 0.8052 (1.0583) acc 71.8750 (73.0777) lr 3.6258e-04 eta 1:31:37 +epoch [38/50] batch [335/500] time 0.892 (0.891) data 0.000 (0.002) loss 1.0840 (1.0572) acc 65.6250 (73.0690) lr 3.6258e-04 eta 1:31:32 +epoch [38/50] batch [340/500] time 0.897 (0.891) data 0.000 (0.002) loss 1.1592 (1.0591) acc 71.8750 (73.0239) lr 3.6258e-04 eta 1:31:26 +epoch [38/50] batch [345/500] time 0.893 (0.891) data 0.000 (0.002) loss 1.1621 (1.0586) acc 78.1250 (73.0797) lr 3.6258e-04 eta 1:31:22 +epoch [38/50] batch [350/500] time 0.873 (0.891) data 0.000 (0.002) loss 0.9854 (1.0598) acc 78.1250 (73.0536) lr 3.6258e-04 eta 1:31:17 +epoch [38/50] batch [355/500] time 0.861 (0.891) data 0.000 (0.002) loss 0.6738 (1.0574) acc 75.0000 (73.0634) lr 3.6258e-04 eta 1:31:13 +epoch [38/50] batch [360/500] time 0.931 (0.891) data 0.000 (0.002) loss 1.2686 (1.0591) acc 75.0000 (73.0729) lr 3.6258e-04 eta 1:31:12 +epoch [38/50] batch [365/500] time 0.885 (0.891) data 0.000 (0.002) loss 1.0000 (1.0577) acc 68.7500 (73.0565) lr 3.6258e-04 eta 1:31:06 +epoch [38/50] batch [370/500] time 0.895 (0.891) data 0.000 (0.002) loss 0.9551 (1.0586) acc 71.8750 (72.9899) lr 3.6258e-04 eta 1:31:02 +epoch [38/50] batch [375/500] time 0.884 (0.891) data 0.000 (0.002) loss 1.2568 (1.0606) acc 71.8750 (72.9250) lr 3.6258e-04 eta 1:30:57 +epoch [38/50] batch [380/500] time 0.884 (0.891) data 0.000 (0.002) loss 0.7778 (1.0615) acc 78.1250 (72.9112) lr 3.6258e-04 eta 1:30:52 +epoch [38/50] batch [385/500] time 0.899 (0.891) data 0.000 (0.002) loss 1.1992 (1.0638) acc 75.0000 (72.8896) lr 3.6258e-04 eta 1:30:47 +epoch [38/50] batch [390/500] time 0.898 (0.891) data 0.000 (0.002) loss 0.6904 (1.0629) acc 84.3750 (72.9087) lr 3.6258e-04 eta 1:30:42 +epoch [38/50] batch [395/500] time 0.897 (0.891) data 0.000 (0.002) loss 1.6221 (1.0657) acc 62.5000 (72.8639) lr 3.6258e-04 eta 1:30:37 +epoch [38/50] batch [400/500] time 0.895 (0.891) data 0.000 (0.002) loss 0.9692 (1.0636) acc 71.8750 (72.9219) lr 3.6258e-04 eta 1:30:33 +epoch [38/50] batch [405/500] time 0.879 (0.891) data 0.000 (0.002) loss 1.8018 (1.0663) acc 68.7500 (72.8472) lr 3.6258e-04 eta 1:30:31 +epoch [38/50] batch [410/500] time 0.891 (0.891) data 0.000 (0.002) loss 1.3711 (1.0667) acc 59.3750 (72.8125) lr 3.6258e-04 eta 1:30:26 +epoch [38/50] batch [415/500] time 0.878 (0.891) data 0.000 (0.002) loss 0.9951 (1.0652) acc 75.0000 (72.8389) lr 3.6258e-04 eta 1:30:20 +epoch [38/50] batch [420/500] time 0.902 (0.891) data 0.000 (0.002) loss 0.7627 (1.0670) acc 78.1250 (72.7753) lr 3.6258e-04 eta 1:30:16 +epoch [38/50] batch [425/500] time 0.878 (0.891) data 0.000 (0.002) loss 0.6914 (1.0682) acc 84.3750 (72.7426) lr 3.6258e-04 eta 1:30:12 +epoch [38/50] batch [430/500] time 0.888 (0.891) data 0.000 (0.002) loss 1.3438 (1.0683) acc 56.2500 (72.7398) lr 3.6258e-04 eta 1:30:08 +epoch [38/50] batch [435/500] time 0.881 (0.891) data 0.000 (0.002) loss 0.6196 (1.0661) acc 90.6250 (72.8017) lr 3.6258e-04 eta 1:30:03 +epoch [38/50] batch [440/500] time 0.865 (0.891) data 0.000 (0.002) loss 1.2275 (1.0681) acc 71.8750 (72.7628) lr 3.6258e-04 eta 1:29:59 +epoch [38/50] batch [445/500] time 0.881 (0.891) data 0.000 (0.002) loss 0.5679 (1.0660) acc 87.5000 (72.8160) lr 3.6258e-04 eta 1:29:54 +epoch [38/50] batch [450/500] time 0.876 (0.891) data 0.000 (0.002) loss 1.5361 (1.0679) acc 65.6250 (72.7708) lr 3.6258e-04 eta 1:29:49 +epoch [38/50] batch [455/500] time 0.886 (0.891) data 0.000 (0.002) loss 1.3516 (1.0701) acc 75.0000 (72.7816) lr 3.6258e-04 eta 1:29:44 +epoch [38/50] batch [460/500] time 0.906 (0.891) data 0.000 (0.002) loss 1.2178 (1.0689) acc 68.7500 (72.7989) lr 3.6258e-04 eta 1:29:40 +epoch [38/50] batch [465/500] time 0.879 (0.891) data 0.000 (0.002) loss 1.2744 (1.0676) acc 68.7500 (72.8159) lr 3.6258e-04 eta 1:29:34 +epoch [38/50] batch [470/500] time 0.889 (0.890) data 0.000 (0.002) loss 1.3174 (1.0689) acc 68.7500 (72.7726) lr 3.6258e-04 eta 1:29:29 +epoch [38/50] batch [475/500] time 0.871 (0.890) data 0.000 (0.002) loss 0.7100 (1.0694) acc 78.1250 (72.7632) lr 3.6258e-04 eta 1:29:24 +epoch [38/50] batch [480/500] time 0.880 (0.891) data 0.000 (0.002) loss 0.6592 (1.0675) acc 87.5000 (72.8581) lr 3.6258e-04 eta 1:29:20 +epoch [38/50] batch [485/500] time 0.904 (0.891) data 0.001 (0.002) loss 0.9126 (1.0667) acc 71.8750 (72.8866) lr 3.6258e-04 eta 1:29:16 +epoch [38/50] batch [490/500] time 0.892 (0.890) data 0.000 (0.002) loss 1.2031 (1.0680) acc 71.8750 (72.8508) lr 3.6258e-04 eta 1:29:11 +epoch [38/50] batch [495/500] time 0.880 (0.890) data 0.000 (0.002) loss 0.6499 (1.0666) acc 75.0000 (72.8346) lr 3.6258e-04 eta 1:29:07 +epoch [38/50] batch [500/500] time 1.017 (0.891) data 0.000 (0.002) loss 1.7080 (1.0664) acc 65.6250 (72.8500) lr 3.1545e-04 eta 1:29:04 +epoch [39/50] batch [5/500] time 0.874 (1.025) data 0.000 (0.153) loss 0.4690 (1.0459) acc 84.3750 (75.0000) lr 3.1545e-04 eta 1:42:22 +epoch [39/50] batch [10/500] time 0.863 (0.952) data 0.000 (0.077) loss 1.0283 (1.0414) acc 71.8750 (74.3750) lr 3.1545e-04 eta 1:35:05 +epoch [39/50] batch [15/500] time 0.874 (0.931) data 0.000 (0.051) loss 0.5576 (1.0218) acc 78.1250 (74.3750) lr 3.1545e-04 eta 1:32:49 +epoch [39/50] batch [20/500] time 0.908 (0.921) data 0.000 (0.039) loss 0.8301 (1.0572) acc 71.8750 (74.2188) lr 3.1545e-04 eta 1:31:50 +epoch [39/50] batch [25/500] time 0.873 (0.914) data 0.000 (0.031) loss 0.9556 (1.0424) acc 78.1250 (73.5000) lr 3.1545e-04 eta 1:31:00 +epoch [39/50] batch [30/500] time 0.858 (0.908) data 0.000 (0.026) loss 0.5874 (1.0350) acc 78.1250 (73.0208) lr 3.1545e-04 eta 1:30:20 +epoch [39/50] batch [35/500] time 0.901 (0.905) data 0.000 (0.022) loss 0.8486 (1.0359) acc 78.1250 (72.7679) lr 3.1545e-04 eta 1:30:00 +epoch [39/50] batch [40/500] time 0.888 (0.903) data 0.000 (0.019) loss 1.4189 (1.0515) acc 62.5000 (71.7188) lr 3.1545e-04 eta 1:29:41 +epoch [39/50] batch [45/500] time 0.899 (0.903) data 0.000 (0.017) loss 1.0498 (1.0276) acc 65.6250 (72.0833) lr 3.1545e-04 eta 1:29:36 +epoch [39/50] batch [50/500] time 0.913 (0.902) data 0.000 (0.016) loss 1.0723 (1.0240) acc 71.8750 (72.3125) lr 3.1545e-04 eta 1:29:29 +epoch [39/50] batch [55/500] time 0.886 (0.902) data 0.000 (0.014) loss 1.7734 (1.0444) acc 59.3750 (71.9318) lr 3.1545e-04 eta 1:29:22 +epoch [39/50] batch [60/500] time 1.023 (0.904) data 0.000 (0.013) loss 0.9614 (1.0331) acc 81.2500 (72.2917) lr 3.1545e-04 eta 1:29:27 +epoch [39/50] batch [65/500] time 0.874 (0.902) data 0.000 (0.012) loss 0.6553 (1.0187) acc 75.0000 (72.9327) lr 3.1545e-04 eta 1:29:14 +epoch [39/50] batch [70/500] time 0.899 (0.901) data 0.000 (0.011) loss 0.7256 (1.0203) acc 75.0000 (73.0804) lr 3.1545e-04 eta 1:29:03 +epoch [39/50] batch [75/500] time 0.908 (0.901) data 0.000 (0.011) loss 0.8457 (1.0105) acc 81.2500 (73.4167) lr 3.1545e-04 eta 1:28:56 +epoch [39/50] batch [80/500] time 0.873 (0.900) data 0.000 (0.010) loss 0.7056 (1.0138) acc 81.2500 (73.5156) lr 3.1545e-04 eta 1:28:46 +epoch [39/50] batch [85/500] time 0.882 (0.899) data 0.000 (0.009) loss 0.6357 (1.0142) acc 78.1250 (73.4559) lr 3.1545e-04 eta 1:28:35 +epoch [39/50] batch [90/500] time 0.899 (0.897) data 0.000 (0.009) loss 0.7671 (1.0216) acc 68.7500 (73.2986) lr 3.1545e-04 eta 1:28:20 +epoch [39/50] batch [95/500] time 0.881 (0.897) data 0.000 (0.008) loss 0.8428 (1.0238) acc 68.7500 (73.0921) lr 3.1545e-04 eta 1:28:14 +epoch [39/50] batch [100/500] time 0.890 (0.896) data 0.000 (0.008) loss 0.5630 (1.0175) acc 81.2500 (73.2188) lr 3.1545e-04 eta 1:28:07 +epoch [39/50] batch [105/500] time 0.923 (0.897) data 0.000 (0.008) loss 0.9229 (1.0308) acc 81.2500 (73.0357) lr 3.1545e-04 eta 1:28:08 +epoch [39/50] batch [110/500] time 0.864 (0.896) data 0.000 (0.007) loss 1.3799 (1.0261) acc 65.6250 (73.1534) lr 3.1545e-04 eta 1:27:59 +epoch [39/50] batch [115/500] time 0.900 (0.896) data 0.000 (0.007) loss 0.5400 (1.0252) acc 87.5000 (73.2337) lr 3.1545e-04 eta 1:27:52 +epoch [39/50] batch [120/500] time 0.899 (0.896) data 0.001 (0.007) loss 1.1104 (1.0251) acc 65.6250 (73.1510) lr 3.1545e-04 eta 1:27:46 +epoch [39/50] batch [125/500] time 0.895 (0.896) data 0.000 (0.006) loss 1.1924 (1.0259) acc 71.8750 (73.2000) lr 3.1545e-04 eta 1:27:41 +epoch [39/50] batch [130/500] time 0.892 (0.895) data 0.000 (0.006) loss 0.9263 (1.0320) acc 81.2500 (73.2452) lr 3.1545e-04 eta 1:27:34 +epoch [39/50] batch [135/500] time 0.897 (0.895) data 0.000 (0.006) loss 0.8599 (1.0339) acc 78.1250 (73.1713) lr 3.1545e-04 eta 1:27:29 +epoch [39/50] batch [140/500] time 0.905 (0.895) data 0.000 (0.006) loss 0.6816 (1.0267) acc 87.5000 (73.4598) lr 3.1545e-04 eta 1:27:24 +epoch [39/50] batch [145/500] time 0.860 (0.894) data 0.000 (0.006) loss 0.8369 (1.0212) acc 78.1250 (73.6638) lr 3.1545e-04 eta 1:27:15 +epoch [39/50] batch [150/500] time 0.894 (0.894) data 0.000 (0.005) loss 1.1328 (1.0190) acc 65.6250 (73.6875) lr 3.1545e-04 eta 1:27:08 +epoch [39/50] batch [155/500] time 0.885 (0.893) data 0.000 (0.005) loss 1.0547 (1.0255) acc 68.7500 (73.6492) lr 3.1545e-04 eta 1:27:01 +epoch [39/50] batch [160/500] time 0.920 (0.893) data 0.000 (0.005) loss 0.8472 (1.0200) acc 75.0000 (73.6719) lr 3.1545e-04 eta 1:26:57 +epoch [39/50] batch [165/500] time 0.881 (0.893) data 0.000 (0.005) loss 1.7090 (1.0276) acc 68.7500 (73.6932) lr 3.1545e-04 eta 1:26:50 +epoch [39/50] batch [170/500] time 0.912 (0.893) data 0.000 (0.005) loss 1.1133 (1.0245) acc 71.8750 (73.6581) lr 3.1545e-04 eta 1:26:43 +epoch [39/50] batch [175/500] time 0.885 (0.892) data 0.000 (0.005) loss 1.1514 (1.0238) acc 75.0000 (73.7321) lr 3.1545e-04 eta 1:26:36 +epoch [39/50] batch [180/500] time 0.902 (0.892) data 0.000 (0.005) loss 1.0469 (1.0302) acc 75.0000 (73.6111) lr 3.1545e-04 eta 1:26:32 +epoch [39/50] batch [185/500] time 0.913 (0.893) data 0.000 (0.004) loss 1.6309 (1.0334) acc 56.2500 (73.6486) lr 3.1545e-04 eta 1:26:30 +epoch [39/50] batch [190/500] time 0.872 (0.892) data 0.000 (0.004) loss 1.1953 (1.0342) acc 71.8750 (73.7007) lr 3.1545e-04 eta 1:26:22 +epoch [39/50] batch [195/500] time 0.856 (0.891) data 0.000 (0.004) loss 1.0283 (1.0380) acc 71.8750 (73.7340) lr 3.1545e-04 eta 1:26:14 +epoch [39/50] batch [200/500] time 0.898 (0.891) data 0.000 (0.004) loss 0.9512 (1.0368) acc 78.1250 (73.7812) lr 3.1545e-04 eta 1:26:10 +epoch [39/50] batch [205/500] time 0.869 (0.892) data 0.000 (0.004) loss 1.0303 (1.0442) acc 78.1250 (73.6738) lr 3.1545e-04 eta 1:26:06 +epoch [39/50] batch [210/500] time 0.866 (0.892) data 0.000 (0.004) loss 1.4678 (1.0490) acc 68.7500 (73.5863) lr 3.1545e-04 eta 1:26:01 +epoch [39/50] batch [215/500] time 0.873 (0.891) data 0.000 (0.004) loss 0.7368 (1.0479) acc 78.1250 (73.6047) lr 3.1545e-04 eta 1:25:55 +epoch [39/50] batch [220/500] time 0.896 (0.891) data 0.000 (0.004) loss 0.6118 (1.0484) acc 84.3750 (73.6222) lr 3.1545e-04 eta 1:25:51 +epoch [39/50] batch [225/500] time 0.861 (0.891) data 0.000 (0.004) loss 1.0283 (1.0472) acc 71.8750 (73.6389) lr 3.1545e-04 eta 1:25:44 +epoch [39/50] batch [230/500] time 0.902 (0.891) data 0.000 (0.004) loss 0.7603 (1.0481) acc 78.1250 (73.5870) lr 3.1545e-04 eta 1:25:39 +epoch [39/50] batch [235/500] time 0.908 (0.891) data 0.000 (0.004) loss 1.2988 (1.0524) acc 71.8750 (73.4973) lr 3.1545e-04 eta 1:25:34 +epoch [39/50] batch [240/500] time 0.888 (0.891) data 0.000 (0.003) loss 0.7549 (1.0462) acc 78.1250 (73.6198) lr 3.1545e-04 eta 1:25:30 +epoch [39/50] batch [245/500] time 0.884 (0.891) data 0.000 (0.003) loss 1.2041 (1.0459) acc 68.7500 (73.6990) lr 3.1545e-04 eta 1:25:25 +epoch [39/50] batch [250/500] time 0.890 (0.891) data 0.000 (0.003) loss 0.8057 (1.0460) acc 81.2500 (73.6625) lr 3.1545e-04 eta 1:25:22 +epoch [39/50] batch [255/500] time 0.882 (0.891) data 0.000 (0.003) loss 0.8887 (1.0421) acc 62.5000 (73.6887) lr 3.1545e-04 eta 1:25:17 +epoch [39/50] batch [260/500] time 0.892 (0.891) data 0.000 (0.003) loss 1.2598 (1.0415) acc 62.5000 (73.6779) lr 3.1545e-04 eta 1:25:12 +epoch [39/50] batch [265/500] time 0.861 (0.891) data 0.000 (0.003) loss 0.8647 (1.0433) acc 68.7500 (73.5849) lr 3.1545e-04 eta 1:25:07 +epoch [39/50] batch [270/500] time 0.905 (0.891) data 0.000 (0.003) loss 1.2373 (1.0435) acc 75.0000 (73.5995) lr 3.1545e-04 eta 1:25:03 +epoch [39/50] batch [275/500] time 0.895 (0.890) data 0.000 (0.003) loss 1.0039 (1.0433) acc 75.0000 (73.6591) lr 3.1545e-04 eta 1:24:57 +epoch [39/50] batch [280/500] time 0.913 (0.890) data 0.000 (0.003) loss 0.6860 (1.0438) acc 84.3750 (73.6496) lr 3.1545e-04 eta 1:24:53 +epoch [39/50] batch [285/500] time 0.899 (0.890) data 0.000 (0.003) loss 1.4072 (1.0471) acc 62.5000 (73.5965) lr 3.1545e-04 eta 1:24:48 +epoch [39/50] batch [290/500] time 0.876 (0.890) data 0.000 (0.003) loss 1.2783 (1.0499) acc 56.2500 (73.5022) lr 3.1545e-04 eta 1:24:43 +epoch [39/50] batch [295/500] time 0.873 (0.890) data 0.000 (0.003) loss 0.9824 (1.0488) acc 81.2500 (73.6017) lr 3.1545e-04 eta 1:24:38 +epoch [39/50] batch [300/500] time 0.888 (0.890) data 0.000 (0.003) loss 1.0010 (1.0500) acc 81.2500 (73.5625) lr 3.1545e-04 eta 1:24:33 +epoch [39/50] batch [305/500] time 0.855 (0.890) data 0.000 (0.003) loss 1.1387 (1.0486) acc 75.0000 (73.5656) lr 3.1545e-04 eta 1:24:27 +epoch [39/50] batch [310/500] time 0.887 (0.890) data 0.000 (0.003) loss 0.7344 (1.0478) acc 71.8750 (73.4980) lr 3.1545e-04 eta 1:24:21 +epoch [39/50] batch [315/500] time 0.904 (0.889) data 0.000 (0.003) loss 1.1113 (1.0458) acc 71.8750 (73.4722) lr 3.1545e-04 eta 1:24:16 +epoch [39/50] batch [320/500] time 0.886 (0.889) data 0.000 (0.003) loss 0.6001 (1.0473) acc 81.2500 (73.4277) lr 3.1545e-04 eta 1:24:10 +epoch [39/50] batch [325/500] time 0.857 (0.889) data 0.000 (0.003) loss 1.0615 (1.0470) acc 75.0000 (73.4519) lr 3.1545e-04 eta 1:24:04 +epoch [39/50] batch [330/500] time 0.895 (0.889) data 0.000 (0.003) loss 1.4160 (1.0513) acc 56.2500 (73.3428) lr 3.1545e-04 eta 1:23:59 +epoch [39/50] batch [335/500] time 0.851 (0.889) data 0.000 (0.003) loss 1.3672 (1.0520) acc 65.6250 (73.2836) lr 3.1545e-04 eta 1:23:53 +epoch [39/50] batch [340/500] time 0.889 (0.889) data 0.000 (0.003) loss 0.5459 (1.0513) acc 84.3750 (73.2445) lr 3.1545e-04 eta 1:23:49 +epoch [39/50] batch [345/500] time 0.871 (0.888) data 0.000 (0.002) loss 0.8188 (1.0522) acc 81.2500 (73.2790) lr 3.1545e-04 eta 1:23:43 +epoch [39/50] batch [350/500] time 0.848 (0.888) data 0.000 (0.002) loss 0.6792 (1.0533) acc 68.7500 (73.2857) lr 3.1545e-04 eta 1:23:39 +epoch [39/50] batch [355/500] time 0.876 (0.888) data 0.000 (0.002) loss 1.0938 (1.0600) acc 75.0000 (73.1866) lr 3.1545e-04 eta 1:23:34 +epoch [39/50] batch [360/500] time 0.875 (0.888) data 0.000 (0.002) loss 1.2568 (1.0571) acc 65.6250 (73.2292) lr 3.1545e-04 eta 1:23:29 +epoch [39/50] batch [365/500] time 0.864 (0.888) data 0.000 (0.002) loss 1.2295 (1.0559) acc 75.0000 (73.2534) lr 3.1545e-04 eta 1:23:24 +epoch [39/50] batch [370/500] time 0.915 (0.888) data 0.000 (0.002) loss 0.8999 (1.0564) acc 75.0000 (73.1926) lr 3.1545e-04 eta 1:23:20 +epoch [39/50] batch [375/500] time 0.896 (0.888) data 0.000 (0.002) loss 0.9136 (1.0541) acc 75.0000 (73.2667) lr 3.1545e-04 eta 1:23:15 +epoch [39/50] batch [380/500] time 0.871 (0.888) data 0.000 (0.002) loss 0.6812 (1.0511) acc 84.3750 (73.3553) lr 3.1545e-04 eta 1:23:10 +epoch [39/50] batch [385/500] time 0.891 (0.888) data 0.000 (0.002) loss 1.1162 (1.0471) acc 68.7500 (73.4010) lr 3.1545e-04 eta 1:23:06 +epoch [39/50] batch [390/500] time 1.029 (0.888) data 0.000 (0.002) loss 0.6343 (1.0441) acc 84.3750 (73.4615) lr 3.1545e-04 eta 1:23:04 +epoch [39/50] batch [395/500] time 0.879 (0.888) data 0.000 (0.002) loss 0.7192 (1.0441) acc 81.2500 (73.4731) lr 3.1545e-04 eta 1:22:59 +epoch [39/50] batch [400/500] time 0.902 (0.888) data 0.000 (0.002) loss 1.3730 (1.0460) acc 62.5000 (73.3906) lr 3.1545e-04 eta 1:22:55 +epoch [39/50] batch [405/500] time 0.869 (0.888) data 0.000 (0.002) loss 0.7192 (1.0464) acc 78.1250 (73.3642) lr 3.1545e-04 eta 1:22:50 +epoch [39/50] batch [410/500] time 0.905 (0.888) data 0.000 (0.002) loss 0.9570 (1.0463) acc 71.8750 (73.3689) lr 3.1545e-04 eta 1:22:45 +epoch [39/50] batch [415/500] time 0.872 (0.888) data 0.000 (0.002) loss 0.9346 (1.0483) acc 81.2500 (73.3886) lr 3.1545e-04 eta 1:22:42 +epoch [39/50] batch [420/500] time 0.860 (0.888) data 0.000 (0.002) loss 0.3914 (1.0467) acc 84.3750 (73.3705) lr 3.1545e-04 eta 1:22:37 +epoch [39/50] batch [425/500] time 0.899 (0.888) data 0.000 (0.002) loss 1.2441 (1.0478) acc 68.7500 (73.3382) lr 3.1545e-04 eta 1:22:32 +epoch [39/50] batch [430/500] time 0.899 (0.888) data 0.000 (0.002) loss 0.9556 (1.0491) acc 71.8750 (73.3212) lr 3.1545e-04 eta 1:22:26 +epoch [39/50] batch [435/500] time 0.887 (0.888) data 0.000 (0.002) loss 0.9102 (1.0488) acc 75.0000 (73.3118) lr 3.1545e-04 eta 1:22:22 +epoch [39/50] batch [440/500] time 0.882 (0.888) data 0.000 (0.002) loss 0.9111 (1.0506) acc 78.1250 (73.2884) lr 3.1545e-04 eta 1:22:17 +epoch [39/50] batch [445/500] time 0.866 (0.888) data 0.000 (0.002) loss 0.8057 (1.0499) acc 81.2500 (73.3216) lr 3.1545e-04 eta 1:22:13 +epoch [39/50] batch [450/500] time 0.886 (0.888) data 0.000 (0.002) loss 0.7622 (1.0475) acc 84.3750 (73.3889) lr 3.1545e-04 eta 1:22:09 +epoch [39/50] batch [455/500] time 0.886 (0.888) data 0.000 (0.002) loss 1.2568 (1.0503) acc 78.1250 (73.3585) lr 3.1545e-04 eta 1:22:05 +epoch [39/50] batch [460/500] time 0.886 (0.888) data 0.000 (0.002) loss 1.4346 (1.0536) acc 68.7500 (73.3016) lr 3.1545e-04 eta 1:22:00 +epoch [39/50] batch [465/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.0957 (1.0512) acc 71.8750 (73.3401) lr 3.1545e-04 eta 1:21:56 +epoch [39/50] batch [470/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.2949 (1.0531) acc 65.6250 (73.3112) lr 3.1545e-04 eta 1:21:52 +epoch [39/50] batch [475/500] time 0.845 (0.888) data 0.000 (0.002) loss 0.8311 (1.0509) acc 78.1250 (73.3553) lr 3.1545e-04 eta 1:21:47 +epoch [39/50] batch [480/500] time 0.889 (0.888) data 0.000 (0.002) loss 1.1416 (1.0521) acc 75.0000 (73.3203) lr 3.1545e-04 eta 1:21:42 +epoch [39/50] batch [485/500] time 0.862 (0.888) data 0.000 (0.002) loss 0.7935 (1.0527) acc 84.3750 (73.3183) lr 3.1545e-04 eta 1:21:37 +epoch [39/50] batch [490/500] time 0.881 (0.888) data 0.000 (0.002) loss 0.7998 (1.0522) acc 75.0000 (73.2781) lr 3.1545e-04 eta 1:21:33 +epoch [39/50] batch [495/500] time 0.892 (0.888) data 0.000 (0.002) loss 0.8823 (1.0543) acc 65.6250 (73.1818) lr 3.1545e-04 eta 1:21:28 +epoch [39/50] batch [500/500] time 0.899 (0.888) data 0.000 (0.002) loss 1.5020 (1.0553) acc 65.6250 (73.1875) lr 2.7103e-04 eta 1:21:24 +epoch [40/50] batch [5/500] time 0.880 (1.047) data 0.000 (0.152) loss 1.1416 (1.1608) acc 68.7500 (72.5000) lr 2.7103e-04 eta 1:35:52 +epoch [40/50] batch [10/500] time 0.909 (0.969) data 0.000 (0.076) loss 1.0312 (1.0127) acc 71.8750 (72.8125) lr 2.7103e-04 eta 1:28:42 +epoch [40/50] batch [15/500] time 0.915 (0.941) data 0.000 (0.051) loss 1.1934 (1.0323) acc 71.8750 (73.3333) lr 2.7103e-04 eta 1:26:00 +epoch [40/50] batch [20/500] time 0.863 (0.924) data 0.000 (0.038) loss 1.4414 (1.0825) acc 59.3750 (72.5000) lr 2.7103e-04 eta 1:24:23 +epoch [40/50] batch [25/500] time 0.861 (0.914) data 0.000 (0.031) loss 1.1514 (1.0833) acc 75.0000 (73.1250) lr 2.7103e-04 eta 1:23:25 +epoch [40/50] batch [30/500] time 0.864 (0.907) data 0.000 (0.026) loss 1.1338 (1.1166) acc 75.0000 (72.6042) lr 2.7103e-04 eta 1:22:41 +epoch [40/50] batch [35/500] time 0.896 (0.905) data 0.000 (0.022) loss 1.2041 (1.0967) acc 71.8750 (72.6786) lr 2.7103e-04 eta 1:22:23 +epoch [40/50] batch [40/500] time 0.907 (0.903) data 0.000 (0.019) loss 0.7559 (1.0671) acc 87.5000 (73.6719) lr 2.7103e-04 eta 1:22:08 +epoch [40/50] batch [45/500] time 1.028 (0.905) data 0.000 (0.017) loss 1.5068 (1.1090) acc 62.5000 (72.7083) lr 2.7103e-04 eta 1:22:17 +epoch [40/50] batch [50/500] time 0.899 (0.903) data 0.000 (0.015) loss 0.7656 (1.0941) acc 78.1250 (72.8125) lr 2.7103e-04 eta 1:22:01 +epoch [40/50] batch [55/500] time 0.900 (0.901) data 0.000 (0.014) loss 0.4604 (1.0883) acc 93.7500 (73.1250) lr 2.7103e-04 eta 1:21:46 +epoch [40/50] batch [60/500] time 0.906 (0.900) data 0.000 (0.013) loss 0.9453 (1.0722) acc 81.2500 (73.5417) lr 2.7103e-04 eta 1:21:33 +epoch [40/50] batch [65/500] time 0.864 (0.898) data 0.000 (0.012) loss 1.5537 (1.0794) acc 68.7500 (73.4135) lr 2.7103e-04 eta 1:21:22 +epoch [40/50] batch [70/500] time 0.911 (0.897) data 0.000 (0.011) loss 1.0039 (1.0591) acc 75.0000 (73.6607) lr 2.7103e-04 eta 1:21:12 +epoch [40/50] batch [75/500] time 0.872 (0.897) data 0.000 (0.010) loss 0.6812 (1.0522) acc 81.2500 (74.0000) lr 2.7103e-04 eta 1:21:07 +epoch [40/50] batch [80/500] time 0.888 (0.896) data 0.000 (0.010) loss 0.6201 (1.0491) acc 75.0000 (73.9844) lr 2.7103e-04 eta 1:20:57 +epoch [40/50] batch [85/500] time 0.903 (0.895) data 0.000 (0.009) loss 1.7832 (1.0678) acc 71.8750 (73.8235) lr 2.7103e-04 eta 1:20:49 +epoch [40/50] batch [90/500] time 0.872 (0.896) data 0.000 (0.009) loss 1.3682 (1.0694) acc 68.7500 (73.9236) lr 2.7103e-04 eta 1:20:48 +epoch [40/50] batch [95/500] time 0.901 (0.896) data 0.000 (0.008) loss 0.6963 (1.0605) acc 81.2500 (73.9803) lr 2.7103e-04 eta 1:20:42 +epoch [40/50] batch [100/500] time 0.911 (0.896) data 0.000 (0.008) loss 0.9629 (1.0602) acc 75.0000 (73.8750) lr 2.7103e-04 eta 1:20:39 +epoch [40/50] batch [105/500] time 0.867 (0.896) data 0.000 (0.007) loss 0.7119 (1.0600) acc 81.2500 (73.8393) lr 2.7103e-04 eta 1:20:31 +epoch [40/50] batch [110/500] time 0.860 (0.895) data 0.000 (0.007) loss 0.8018 (1.0598) acc 68.7500 (73.8068) lr 2.7103e-04 eta 1:20:23 +epoch [40/50] batch [115/500] time 0.890 (0.894) data 0.000 (0.007) loss 0.9648 (1.0588) acc 62.5000 (73.8587) lr 2.7103e-04 eta 1:20:16 +epoch [40/50] batch [120/500] time 0.890 (0.894) data 0.000 (0.007) loss 1.0967 (1.0591) acc 65.6250 (73.7760) lr 2.7103e-04 eta 1:20:11 +epoch [40/50] batch [125/500] time 0.898 (0.894) data 0.000 (0.006) loss 1.1162 (1.0609) acc 65.6250 (73.5500) lr 2.7103e-04 eta 1:20:06 +epoch [40/50] batch [130/500] time 0.891 (0.893) data 0.000 (0.006) loss 1.1338 (1.0583) acc 68.7500 (73.5337) lr 2.7103e-04 eta 1:19:57 +epoch [40/50] batch [135/500] time 0.883 (0.893) data 0.000 (0.006) loss 0.8315 (1.0584) acc 81.2500 (73.5880) lr 2.7103e-04 eta 1:19:51 +epoch [40/50] batch [140/500] time 0.894 (0.892) data 0.000 (0.006) loss 1.8447 (1.0566) acc 62.5000 (73.6384) lr 2.7103e-04 eta 1:19:43 +epoch [40/50] batch [145/500] time 0.869 (0.892) data 0.000 (0.005) loss 0.6265 (1.0515) acc 81.2500 (73.7284) lr 2.7103e-04 eta 1:19:37 +epoch [40/50] batch [150/500] time 0.858 (0.892) data 0.001 (0.005) loss 1.0117 (1.0501) acc 75.0000 (73.7917) lr 2.7103e-04 eta 1:19:32 +epoch [40/50] batch [155/500] time 0.855 (0.892) data 0.000 (0.005) loss 1.1543 (1.0631) acc 71.8750 (73.5685) lr 2.7103e-04 eta 1:19:25 +epoch [40/50] batch [160/500] time 0.891 (0.891) data 0.000 (0.005) loss 0.9365 (1.0569) acc 71.8750 (73.6133) lr 2.7103e-04 eta 1:19:20 +epoch [40/50] batch [165/500] time 0.886 (0.891) data 0.000 (0.005) loss 1.0801 (1.0655) acc 65.6250 (73.3333) lr 2.7103e-04 eta 1:19:15 +epoch [40/50] batch [170/500] time 0.897 (0.892) data 0.000 (0.005) loss 0.9619 (1.0727) acc 68.7500 (73.1618) lr 2.7103e-04 eta 1:19:12 +epoch [40/50] batch [175/500] time 0.898 (0.892) data 0.000 (0.005) loss 1.1602 (1.0707) acc 68.7500 (73.0893) lr 2.7103e-04 eta 1:19:09 +epoch [40/50] batch [180/500] time 0.897 (0.892) data 0.000 (0.004) loss 0.9575 (1.0671) acc 68.7500 (73.1771) lr 2.7103e-04 eta 1:19:04 +epoch [40/50] batch [185/500] time 0.864 (0.892) data 0.000 (0.004) loss 1.7588 (1.0687) acc 59.3750 (73.1419) lr 2.7103e-04 eta 1:18:59 +epoch [40/50] batch [190/500] time 0.907 (0.892) data 0.000 (0.004) loss 0.5005 (1.0643) acc 87.5000 (73.2895) lr 2.7103e-04 eta 1:18:57 +epoch [40/50] batch [195/500] time 0.891 (0.893) data 0.000 (0.004) loss 1.2109 (1.0641) acc 71.8750 (73.2372) lr 2.7103e-04 eta 1:18:55 +epoch [40/50] batch [200/500] time 0.860 (0.893) data 0.000 (0.004) loss 0.4216 (1.0653) acc 87.5000 (73.2812) lr 2.7103e-04 eta 1:18:50 +epoch [40/50] batch [205/500] time 0.899 (0.893) data 0.000 (0.004) loss 0.7407 (1.0670) acc 78.1250 (73.2317) lr 2.7103e-04 eta 1:18:46 +epoch [40/50] batch [210/500] time 0.906 (0.892) data 0.000 (0.004) loss 1.2910 (1.0664) acc 71.8750 (73.2738) lr 2.7103e-04 eta 1:18:40 +epoch [40/50] batch [215/500] time 0.889 (0.892) data 0.000 (0.004) loss 1.1094 (1.0696) acc 78.1250 (73.2413) lr 2.7103e-04 eta 1:18:34 +epoch [40/50] batch [220/500] time 0.889 (0.892) data 0.000 (0.004) loss 0.9922 (1.0709) acc 71.8750 (73.1676) lr 2.7103e-04 eta 1:18:27 +epoch [40/50] batch [225/500] time 0.900 (0.892) data 0.000 (0.004) loss 0.7407 (1.0755) acc 78.1250 (73.0972) lr 2.7103e-04 eta 1:18:22 +epoch [40/50] batch [230/500] time 0.899 (0.891) data 0.000 (0.004) loss 0.8032 (1.0726) acc 75.0000 (73.1386) lr 2.7103e-04 eta 1:18:18 +epoch [40/50] batch [235/500] time 0.913 (0.892) data 0.000 (0.003) loss 0.9209 (1.0734) acc 71.8750 (73.1250) lr 2.7103e-04 eta 1:18:16 +epoch [40/50] batch [240/500] time 0.920 (0.892) data 0.000 (0.003) loss 1.6475 (1.0711) acc 53.1250 (73.1771) lr 2.7103e-04 eta 1:18:12 +epoch [40/50] batch [245/500] time 0.883 (0.892) data 0.000 (0.003) loss 0.7271 (1.0700) acc 78.1250 (73.2015) lr 2.7103e-04 eta 1:18:07 +epoch [40/50] batch [250/500] time 0.902 (0.892) data 0.000 (0.003) loss 0.7607 (1.0657) acc 75.0000 (73.2375) lr 2.7103e-04 eta 1:18:03 +epoch [40/50] batch [255/500] time 0.854 (0.892) data 0.000 (0.003) loss 1.0332 (1.0676) acc 71.8750 (73.1863) lr 2.7103e-04 eta 1:17:57 +epoch [40/50] batch [260/500] time 0.896 (0.892) data 0.000 (0.003) loss 1.1494 (1.0694) acc 56.2500 (73.1130) lr 2.7103e-04 eta 1:17:52 +epoch [40/50] batch [265/500] time 0.872 (0.891) data 0.000 (0.003) loss 1.2129 (1.0695) acc 56.2500 (73.1132) lr 2.7103e-04 eta 1:17:46 +epoch [40/50] batch [270/500] time 0.888 (0.892) data 0.000 (0.003) loss 0.9531 (1.0682) acc 71.8750 (73.0208) lr 2.7103e-04 eta 1:17:43 +epoch [40/50] batch [275/500] time 0.898 (0.892) data 0.000 (0.003) loss 0.9395 (1.0681) acc 87.5000 (73.1250) lr 2.7103e-04 eta 1:17:38 +epoch [40/50] batch [280/500] time 0.887 (0.892) data 0.000 (0.003) loss 0.5762 (1.0711) acc 87.5000 (73.1027) lr 2.7103e-04 eta 1:17:34 +epoch [40/50] batch [285/500] time 0.863 (0.891) data 0.000 (0.003) loss 0.5562 (1.0714) acc 81.2500 (73.1140) lr 2.7103e-04 eta 1:17:27 +epoch [40/50] batch [290/500] time 0.917 (0.891) data 0.000 (0.003) loss 1.2764 (1.0707) acc 75.0000 (73.0927) lr 2.7103e-04 eta 1:17:22 +epoch [40/50] batch [295/500] time 0.896 (0.891) data 0.000 (0.003) loss 1.0010 (1.0740) acc 68.7500 (73.0297) lr 2.7103e-04 eta 1:17:17 +epoch [40/50] batch [300/500] time 0.887 (0.891) data 0.000 (0.003) loss 1.0811 (1.0773) acc 62.5000 (72.9792) lr 2.7103e-04 eta 1:17:13 +epoch [40/50] batch [305/500] time 0.884 (0.891) data 0.000 (0.003) loss 1.1855 (1.0791) acc 75.0000 (72.9816) lr 2.7103e-04 eta 1:17:08 +epoch [40/50] batch [310/500] time 0.876 (0.891) data 0.000 (0.003) loss 0.6177 (1.0759) acc 84.3750 (73.0444) lr 2.7103e-04 eta 1:17:04 +epoch [40/50] batch [315/500] time 0.865 (0.891) data 0.000 (0.003) loss 0.7373 (1.0800) acc 78.1250 (72.9663) lr 2.7103e-04 eta 1:16:59 +epoch [40/50] batch [320/500] time 0.907 (0.891) data 0.000 (0.003) loss 1.1904 (1.0810) acc 62.5000 (72.9004) lr 2.7103e-04 eta 1:16:55 +epoch [40/50] batch [325/500] time 0.909 (0.891) data 0.000 (0.003) loss 0.9233 (1.0831) acc 75.0000 (72.9327) lr 2.7103e-04 eta 1:16:51 +epoch [40/50] batch [330/500] time 0.891 (0.891) data 0.000 (0.003) loss 1.0459 (1.0832) acc 68.7500 (72.8977) lr 2.7103e-04 eta 1:16:46 +epoch [40/50] batch [335/500] time 0.888 (0.891) data 0.000 (0.003) loss 0.7300 (1.0833) acc 84.3750 (72.8918) lr 2.7103e-04 eta 1:16:42 +epoch [40/50] batch [340/500] time 0.885 (0.891) data 0.000 (0.002) loss 0.5879 (1.0840) acc 90.6250 (72.9412) lr 2.7103e-04 eta 1:16:37 +epoch [40/50] batch [345/500] time 0.888 (0.891) data 0.000 (0.002) loss 0.7915 (1.0808) acc 75.0000 (72.9801) lr 2.7103e-04 eta 1:16:32 +epoch [40/50] batch [350/500] time 0.851 (0.890) data 0.000 (0.002) loss 1.0127 (1.0789) acc 75.0000 (72.9911) lr 2.7103e-04 eta 1:16:25 +epoch [40/50] batch [355/500] time 0.900 (0.890) data 0.000 (0.002) loss 0.8701 (1.0780) acc 84.3750 (73.0194) lr 2.7103e-04 eta 1:16:19 +epoch [40/50] batch [360/500] time 0.894 (0.890) data 0.000 (0.002) loss 0.7407 (1.0737) acc 78.1250 (73.0556) lr 2.7103e-04 eta 1:16:14 +epoch [40/50] batch [365/500] time 0.912 (0.890) data 0.000 (0.002) loss 0.7188 (1.0728) acc 81.2500 (73.0565) lr 2.7103e-04 eta 1:16:10 +epoch [40/50] batch [370/500] time 0.890 (0.890) data 0.000 (0.002) loss 1.4541 (1.0725) acc 71.8750 (73.0490) lr 2.7103e-04 eta 1:16:05 +epoch [40/50] batch [375/500] time 1.008 (0.890) data 0.000 (0.002) loss 1.3955 (1.0745) acc 68.7500 (72.9750) lr 2.7103e-04 eta 1:16:02 +epoch [40/50] batch [380/500] time 0.884 (0.890) data 0.000 (0.002) loss 1.4170 (1.0741) acc 75.0000 (72.9770) lr 2.7103e-04 eta 1:15:57 +epoch [40/50] batch [385/500] time 0.909 (0.890) data 0.000 (0.002) loss 0.7876 (1.0776) acc 81.2500 (72.9627) lr 2.7103e-04 eta 1:15:53 +epoch [40/50] batch [390/500] time 0.899 (0.890) data 0.000 (0.002) loss 1.0771 (1.0785) acc 75.0000 (72.9567) lr 2.7103e-04 eta 1:15:49 +epoch [40/50] batch [395/500] time 0.901 (0.890) data 0.000 (0.002) loss 1.0723 (1.0788) acc 68.7500 (72.8956) lr 2.7103e-04 eta 1:15:44 +epoch [40/50] batch [400/500] time 0.875 (0.890) data 0.000 (0.002) loss 1.2412 (1.0775) acc 71.8750 (72.9219) lr 2.7103e-04 eta 1:15:40 +epoch [40/50] batch [405/500] time 0.879 (0.890) data 0.000 (0.002) loss 0.7476 (1.0780) acc 81.2500 (72.9012) lr 2.7103e-04 eta 1:15:37 +epoch [40/50] batch [410/500] time 0.883 (0.890) data 0.000 (0.002) loss 1.4600 (1.0803) acc 59.3750 (72.8277) lr 2.7103e-04 eta 1:15:32 +epoch [40/50] batch [415/500] time 0.865 (0.890) data 0.000 (0.002) loss 0.8726 (1.0783) acc 71.8750 (72.8690) lr 2.7103e-04 eta 1:15:26 +epoch [40/50] batch [420/500] time 0.866 (0.890) data 0.000 (0.002) loss 0.8428 (1.0784) acc 71.8750 (72.8125) lr 2.7103e-04 eta 1:15:22 +epoch [40/50] batch [425/500] time 0.876 (0.890) data 0.000 (0.002) loss 0.7593 (1.0781) acc 84.3750 (72.8162) lr 2.7103e-04 eta 1:15:17 +epoch [40/50] batch [430/500] time 0.863 (0.890) data 0.000 (0.002) loss 0.7212 (1.0786) acc 81.2500 (72.8198) lr 2.7103e-04 eta 1:15:12 +epoch [40/50] batch [435/500] time 0.872 (0.890) data 0.000 (0.002) loss 1.1465 (1.0796) acc 65.6250 (72.8089) lr 2.7103e-04 eta 1:15:05 +epoch [40/50] batch [440/500] time 0.854 (0.889) data 0.000 (0.002) loss 2.0195 (1.0834) acc 56.2500 (72.7486) lr 2.7103e-04 eta 1:15:00 +epoch [40/50] batch [445/500] time 0.892 (0.889) data 0.000 (0.002) loss 1.2500 (1.0844) acc 71.8750 (72.7247) lr 2.7103e-04 eta 1:14:56 +epoch [40/50] batch [450/500] time 0.853 (0.889) data 0.000 (0.002) loss 0.8750 (1.0840) acc 78.1250 (72.7778) lr 2.7103e-04 eta 1:14:51 +epoch [40/50] batch [455/500] time 0.888 (0.889) data 0.000 (0.002) loss 2.0547 (1.0868) acc 53.1250 (72.7266) lr 2.7103e-04 eta 1:14:46 +epoch [40/50] batch [460/500] time 0.917 (0.889) data 0.000 (0.002) loss 1.2500 (1.0864) acc 75.0000 (72.7174) lr 2.7103e-04 eta 1:14:41 +epoch [40/50] batch [465/500] time 0.866 (0.889) data 0.000 (0.002) loss 0.6177 (1.0846) acc 81.2500 (72.7352) lr 2.7103e-04 eta 1:14:37 +epoch [40/50] batch [470/500] time 0.862 (0.889) data 0.000 (0.002) loss 1.2861 (1.0854) acc 68.7500 (72.6928) lr 2.7103e-04 eta 1:14:32 +epoch [40/50] batch [475/500] time 0.914 (0.889) data 0.000 (0.002) loss 1.0273 (1.0851) acc 71.8750 (72.7105) lr 2.7103e-04 eta 1:14:29 +epoch [40/50] batch [480/500] time 0.889 (0.889) data 0.000 (0.002) loss 1.0586 (1.0830) acc 78.1250 (72.7930) lr 2.7103e-04 eta 1:14:24 +epoch [40/50] batch [485/500] time 0.892 (0.889) data 0.000 (0.002) loss 1.2705 (1.0848) acc 68.7500 (72.7513) lr 2.7103e-04 eta 1:14:19 +epoch [40/50] batch [490/500] time 0.898 (0.889) data 0.000 (0.002) loss 1.5068 (1.0846) acc 71.8750 (72.7997) lr 2.7103e-04 eta 1:14:15 +epoch [40/50] batch [495/500] time 0.898 (0.889) data 0.000 (0.002) loss 1.1240 (1.0835) acc 68.7500 (72.8220) lr 2.7103e-04 eta 1:14:10 +epoch [40/50] batch [500/500] time 0.882 (0.889) data 0.000 (0.002) loss 1.2480 (1.0844) acc 71.8750 (72.8250) lr 2.2949e-04 eta 1:14:06 +epoch [41/50] batch [5/500] time 0.884 (1.027) data 0.000 (0.148) loss 0.8105 (1.0602) acc 78.1250 (71.2500) lr 2.2949e-04 eta 1:25:32 +epoch [41/50] batch [10/500] time 0.892 (0.957) data 0.000 (0.074) loss 0.6592 (1.0748) acc 78.1250 (70.6250) lr 2.2949e-04 eta 1:19:36 +epoch [41/50] batch [15/500] time 0.897 (0.935) data 0.000 (0.049) loss 1.0391 (1.0599) acc 68.7500 (70.6250) lr 2.2949e-04 eta 1:17:40 +epoch [41/50] batch [20/500] time 0.898 (0.923) data 0.000 (0.037) loss 0.9551 (1.0625) acc 71.8750 (70.6250) lr 2.2949e-04 eta 1:16:38 +epoch [41/50] batch [25/500] time 1.035 (0.922) data 0.000 (0.030) loss 1.2988 (1.0298) acc 78.1250 (71.6250) lr 2.2949e-04 eta 1:16:26 +epoch [41/50] batch [30/500] time 0.884 (0.917) data 0.001 (0.025) loss 1.2207 (1.0515) acc 65.6250 (71.8750) lr 2.2949e-04 eta 1:15:57 +epoch [41/50] batch [35/500] time 0.885 (0.914) data 0.000 (0.021) loss 1.1514 (1.0884) acc 75.0000 (71.4286) lr 2.2949e-04 eta 1:15:35 +epoch [41/50] batch [40/500] time 0.889 (0.910) data 0.000 (0.019) loss 2.1172 (1.1198) acc 62.5000 (71.2500) lr 2.2949e-04 eta 1:15:14 +epoch [41/50] batch [45/500] time 0.902 (0.908) data 0.000 (0.017) loss 0.8613 (1.1085) acc 71.8750 (71.5278) lr 2.2949e-04 eta 1:15:00 +epoch [41/50] batch [50/500] time 0.872 (0.906) data 0.000 (0.015) loss 1.5635 (1.1286) acc 68.7500 (71.3125) lr 2.2949e-04 eta 1:14:46 +epoch [41/50] batch [55/500] time 0.869 (0.904) data 0.000 (0.014) loss 1.1445 (1.1231) acc 68.7500 (71.6477) lr 2.2949e-04 eta 1:14:28 +epoch [41/50] batch [60/500] time 0.886 (0.902) data 0.000 (0.013) loss 1.3457 (1.1249) acc 78.1250 (71.9792) lr 2.2949e-04 eta 1:14:17 +epoch [41/50] batch [65/500] time 0.889 (0.901) data 0.000 (0.012) loss 1.0459 (1.1155) acc 81.2500 (72.3077) lr 2.2949e-04 eta 1:14:05 +epoch [41/50] batch [70/500] time 0.874 (0.902) data 0.000 (0.011) loss 0.8994 (1.0957) acc 65.6250 (72.7232) lr 2.2949e-04 eta 1:14:05 +epoch [41/50] batch [75/500] time 0.884 (0.901) data 0.000 (0.010) loss 0.7363 (1.0844) acc 87.5000 (72.8750) lr 2.2949e-04 eta 1:13:57 +epoch [41/50] batch [80/500] time 0.893 (0.900) data 0.000 (0.009) loss 0.9912 (1.0909) acc 81.2500 (72.7344) lr 2.2949e-04 eta 1:13:49 +epoch [41/50] batch [85/500] time 0.896 (0.900) data 0.000 (0.009) loss 1.2207 (1.0939) acc 71.8750 (72.7206) lr 2.2949e-04 eta 1:13:42 +epoch [41/50] batch [90/500] time 0.880 (0.898) data 0.000 (0.008) loss 1.3906 (1.0938) acc 78.1250 (72.9514) lr 2.2949e-04 eta 1:13:31 +epoch [41/50] batch [95/500] time 0.900 (0.898) data 0.000 (0.008) loss 0.8828 (1.0876) acc 71.8750 (73.0263) lr 2.2949e-04 eta 1:13:24 +epoch [41/50] batch [100/500] time 0.855 (0.897) data 0.000 (0.008) loss 0.5918 (1.0710) acc 90.6250 (73.5312) lr 2.2949e-04 eta 1:13:15 +epoch [41/50] batch [105/500] time 0.864 (0.896) data 0.000 (0.007) loss 1.1152 (1.0857) acc 65.6250 (73.0952) lr 2.2949e-04 eta 1:13:06 +epoch [41/50] batch [110/500] time 0.908 (0.896) data 0.000 (0.007) loss 1.3652 (1.0850) acc 59.3750 (73.0114) lr 2.2949e-04 eta 1:13:01 +epoch [41/50] batch [115/500] time 0.889 (0.896) data 0.000 (0.007) loss 1.3564 (1.0920) acc 59.3750 (72.8261) lr 2.2949e-04 eta 1:12:56 +epoch [41/50] batch [120/500] time 0.859 (0.895) data 0.000 (0.006) loss 0.9248 (1.0855) acc 75.0000 (72.9948) lr 2.2949e-04 eta 1:12:47 +epoch [41/50] batch [125/500] time 0.862 (0.894) data 0.000 (0.006) loss 0.6748 (1.0735) acc 65.6250 (73.1750) lr 2.2949e-04 eta 1:12:39 +epoch [41/50] batch [130/500] time 0.889 (0.894) data 0.000 (0.006) loss 0.7402 (1.0737) acc 87.5000 (73.1250) lr 2.2949e-04 eta 1:12:31 +epoch [41/50] batch [135/500] time 0.896 (0.894) data 0.000 (0.006) loss 1.5918 (1.0793) acc 56.2500 (73.0093) lr 2.2949e-04 eta 1:12:27 +epoch [41/50] batch [140/500] time 0.887 (0.893) data 0.000 (0.006) loss 0.6592 (1.0668) acc 78.1250 (73.2812) lr 2.2949e-04 eta 1:12:21 +epoch [41/50] batch [145/500] time 0.900 (0.893) data 0.000 (0.005) loss 1.2070 (1.0630) acc 75.0000 (73.4483) lr 2.2949e-04 eta 1:12:16 +epoch [41/50] batch [150/500] time 0.883 (0.893) data 0.000 (0.005) loss 1.2109 (1.0691) acc 75.0000 (73.3333) lr 2.2949e-04 eta 1:12:11 +epoch [41/50] batch [155/500] time 0.898 (0.893) data 0.000 (0.005) loss 0.7393 (1.0711) acc 81.2500 (73.2863) lr 2.2949e-04 eta 1:12:07 +epoch [41/50] batch [160/500] time 0.909 (0.893) data 0.000 (0.005) loss 1.2334 (1.0677) acc 65.6250 (73.3008) lr 2.2949e-04 eta 1:12:00 +epoch [41/50] batch [165/500] time 1.037 (0.893) data 0.000 (0.005) loss 1.2461 (1.0664) acc 65.6250 (73.3144) lr 2.2949e-04 eta 1:11:59 +epoch [41/50] batch [170/500] time 0.891 (0.894) data 0.000 (0.005) loss 1.0781 (1.0653) acc 75.0000 (73.3640) lr 2.2949e-04 eta 1:11:56 +epoch [41/50] batch [175/500] time 0.907 (0.894) data 0.000 (0.004) loss 1.2314 (1.0706) acc 68.7500 (73.1607) lr 2.2949e-04 eta 1:11:52 +epoch [41/50] batch [180/500] time 0.900 (0.894) data 0.000 (0.004) loss 1.2275 (1.0694) acc 71.8750 (73.2465) lr 2.2949e-04 eta 1:11:47 +epoch [41/50] batch [185/500] time 0.891 (0.894) data 0.000 (0.004) loss 1.3604 (1.0711) acc 71.8750 (73.1926) lr 2.2949e-04 eta 1:11:44 +epoch [41/50] batch [190/500] time 0.861 (0.893) data 0.000 (0.004) loss 1.4893 (1.0728) acc 75.0000 (73.2237) lr 2.2949e-04 eta 1:11:37 +epoch [41/50] batch [195/500] time 0.903 (0.893) data 0.000 (0.004) loss 1.2676 (1.0722) acc 65.6250 (73.2051) lr 2.2949e-04 eta 1:11:31 +epoch [41/50] batch [200/500] time 0.866 (0.893) data 0.000 (0.004) loss 1.0186 (1.0757) acc 75.0000 (73.1562) lr 2.2949e-04 eta 1:11:26 +epoch [41/50] batch [205/500] time 0.863 (0.892) data 0.000 (0.004) loss 1.0488 (1.0764) acc 75.0000 (73.1555) lr 2.2949e-04 eta 1:11:19 +epoch [41/50] batch [210/500] time 0.896 (0.893) data 0.000 (0.004) loss 0.8350 (1.0743) acc 75.0000 (73.1548) lr 2.2949e-04 eta 1:11:17 +epoch [41/50] batch [215/500] time 0.898 (0.893) data 0.000 (0.004) loss 1.1816 (1.0759) acc 62.5000 (73.0959) lr 2.2949e-04 eta 1:11:12 +epoch [41/50] batch [220/500] time 0.884 (0.892) data 0.000 (0.004) loss 1.1387 (1.0798) acc 71.8750 (73.0966) lr 2.2949e-04 eta 1:11:06 +epoch [41/50] batch [225/500] time 0.896 (0.892) data 0.000 (0.004) loss 1.1533 (1.0792) acc 71.8750 (73.0972) lr 2.2949e-04 eta 1:11:00 +epoch [41/50] batch [230/500] time 0.881 (0.892) data 0.000 (0.003) loss 1.3799 (1.0806) acc 78.1250 (73.1386) lr 2.2949e-04 eta 1:10:54 +epoch [41/50] batch [235/500] time 0.888 (0.892) data 0.000 (0.003) loss 0.6821 (1.0794) acc 81.2500 (73.1915) lr 2.2949e-04 eta 1:10:48 +epoch [41/50] batch [240/500] time 0.889 (0.891) data 0.000 (0.003) loss 0.8496 (1.0774) acc 84.3750 (73.2682) lr 2.2949e-04 eta 1:10:42 +epoch [41/50] batch [245/500] time 0.887 (0.891) data 0.000 (0.003) loss 0.7041 (1.0795) acc 78.1250 (73.2015) lr 2.2949e-04 eta 1:10:37 +epoch [41/50] batch [250/500] time 0.909 (0.891) data 0.000 (0.003) loss 0.5327 (1.0756) acc 78.1250 (73.2375) lr 2.2949e-04 eta 1:10:33 +epoch [41/50] batch [255/500] time 0.913 (0.891) data 0.000 (0.003) loss 1.3174 (1.0748) acc 62.5000 (73.2721) lr 2.2949e-04 eta 1:10:29 +epoch [41/50] batch [260/500] time 0.872 (0.891) data 0.000 (0.003) loss 1.6543 (1.0755) acc 75.0000 (73.2572) lr 2.2949e-04 eta 1:10:23 +epoch [41/50] batch [265/500] time 0.913 (0.891) data 0.000 (0.003) loss 1.2441 (1.0787) acc 68.7500 (73.1958) lr 2.2949e-04 eta 1:10:19 +epoch [41/50] batch [270/500] time 0.883 (0.891) data 0.000 (0.003) loss 1.0098 (1.0767) acc 68.7500 (73.2060) lr 2.2949e-04 eta 1:10:14 +epoch [41/50] batch [275/500] time 0.896 (0.891) data 0.000 (0.003) loss 0.6572 (1.0761) acc 81.2500 (73.2273) lr 2.2949e-04 eta 1:10:10 +epoch [41/50] batch [280/500] time 0.873 (0.891) data 0.000 (0.003) loss 0.8906 (1.0730) acc 84.3750 (73.3371) lr 2.2949e-04 eta 1:10:04 +epoch [41/50] batch [285/500] time 0.898 (0.891) data 0.000 (0.003) loss 0.6665 (1.0749) acc 78.1250 (73.1908) lr 2.2949e-04 eta 1:09:59 +epoch [41/50] batch [290/500] time 0.890 (0.891) data 0.000 (0.003) loss 1.0420 (1.0744) acc 71.8750 (73.1681) lr 2.2949e-04 eta 1:09:54 +epoch [41/50] batch [295/500] time 0.871 (0.890) data 0.000 (0.003) loss 1.3174 (1.0764) acc 56.2500 (73.0508) lr 2.2949e-04 eta 1:09:48 +epoch [41/50] batch [300/500] time 0.881 (0.890) data 0.000 (0.003) loss 1.1631 (1.0733) acc 71.8750 (73.1458) lr 2.2949e-04 eta 1:09:44 +epoch [41/50] batch [305/500] time 0.884 (0.890) data 0.000 (0.003) loss 1.1924 (1.0728) acc 68.7500 (73.1250) lr 2.2949e-04 eta 1:09:40 +epoch [41/50] batch [310/500] time 0.875 (0.891) data 0.000 (0.003) loss 1.1924 (1.0732) acc 68.7500 (73.0746) lr 2.2949e-04 eta 1:09:37 +epoch [41/50] batch [315/500] time 0.862 (0.891) data 0.000 (0.003) loss 1.1914 (1.0751) acc 68.7500 (72.9663) lr 2.2949e-04 eta 1:09:32 +epoch [41/50] batch [320/500] time 0.884 (0.891) data 0.000 (0.003) loss 1.3877 (1.0762) acc 62.5000 (72.9590) lr 2.2949e-04 eta 1:09:28 +epoch [41/50] batch [325/500] time 0.888 (0.891) data 0.000 (0.003) loss 1.1650 (1.0727) acc 62.5000 (72.9904) lr 2.2949e-04 eta 1:09:23 +epoch [41/50] batch [330/500] time 0.864 (0.891) data 0.000 (0.002) loss 1.0693 (1.0747) acc 62.5000 (72.9167) lr 2.2949e-04 eta 1:09:18 +epoch [41/50] batch [335/500] time 0.885 (0.890) data 0.000 (0.002) loss 1.0850 (1.0740) acc 75.0000 (72.9478) lr 2.2949e-04 eta 1:09:12 +epoch [41/50] batch [340/500] time 0.891 (0.890) data 0.000 (0.002) loss 0.8057 (1.0709) acc 78.1250 (73.0331) lr 2.2949e-04 eta 1:09:08 +epoch [41/50] batch [345/500] time 0.888 (0.890) data 0.000 (0.002) loss 0.9927 (1.0685) acc 78.1250 (73.0888) lr 2.2949e-04 eta 1:09:03 +epoch [41/50] batch [350/500] time 0.892 (0.890) data 0.001 (0.002) loss 0.8955 (1.0699) acc 81.2500 (73.1161) lr 2.2949e-04 eta 1:08:59 +epoch [41/50] batch [355/500] time 0.887 (0.891) data 0.001 (0.002) loss 0.5215 (1.0670) acc 84.3750 (73.1778) lr 2.2949e-04 eta 1:08:58 +epoch [41/50] batch [360/500] time 0.911 (0.891) data 0.000 (0.002) loss 0.7510 (1.0687) acc 84.3750 (73.1858) lr 2.2949e-04 eta 1:08:55 +epoch [41/50] batch [365/500] time 0.857 (0.891) data 0.000 (0.002) loss 1.3457 (1.0724) acc 62.5000 (73.1507) lr 2.2949e-04 eta 1:08:50 +epoch [41/50] batch [370/500] time 0.922 (0.891) data 0.000 (0.002) loss 1.1973 (1.0757) acc 71.8750 (73.0828) lr 2.2949e-04 eta 1:08:46 +epoch [41/50] batch [375/500] time 0.902 (0.891) data 0.000 (0.002) loss 1.6162 (1.0738) acc 65.6250 (73.1083) lr 2.2949e-04 eta 1:08:42 +epoch [41/50] batch [380/500] time 0.879 (0.891) data 0.000 (0.002) loss 1.2070 (1.0717) acc 65.6250 (73.1661) lr 2.2949e-04 eta 1:08:37 +epoch [41/50] batch [385/500] time 0.889 (0.891) data 0.000 (0.002) loss 1.4580 (1.0721) acc 65.6250 (73.1818) lr 2.2949e-04 eta 1:08:33 +epoch [41/50] batch [390/500] time 0.864 (0.891) data 0.000 (0.002) loss 0.7256 (1.0709) acc 81.2500 (73.1971) lr 2.2949e-04 eta 1:08:28 +epoch [41/50] batch [395/500] time 0.865 (0.891) data 0.000 (0.002) loss 1.0625 (1.0703) acc 68.7500 (73.2041) lr 2.2949e-04 eta 1:08:23 +epoch [41/50] batch [400/500] time 0.880 (0.891) data 0.000 (0.002) loss 0.7261 (1.0694) acc 78.1250 (73.2656) lr 2.2949e-04 eta 1:08:18 +epoch [41/50] batch [405/500] time 0.885 (0.891) data 0.000 (0.002) loss 1.0576 (1.0700) acc 81.2500 (73.2330) lr 2.2949e-04 eta 1:08:13 +epoch [41/50] batch [410/500] time 0.862 (0.891) data 0.001 (0.002) loss 0.9282 (1.0714) acc 78.1250 (73.2393) lr 2.2949e-04 eta 1:08:09 +epoch [41/50] batch [415/500] time 0.881 (0.891) data 0.000 (0.002) loss 1.1484 (1.0716) acc 68.7500 (73.2605) lr 2.2949e-04 eta 1:08:04 +epoch [41/50] batch [420/500] time 0.892 (0.891) data 0.000 (0.002) loss 0.8452 (1.0701) acc 84.3750 (73.3333) lr 2.2949e-04 eta 1:07:59 +epoch [41/50] batch [425/500] time 0.873 (0.891) data 0.000 (0.002) loss 1.3877 (1.0702) acc 75.0000 (73.3897) lr 2.2949e-04 eta 1:07:55 +epoch [41/50] batch [430/500] time 0.884 (0.891) data 0.000 (0.002) loss 1.3135 (1.0701) acc 71.8750 (73.3576) lr 2.2949e-04 eta 1:07:51 +epoch [41/50] batch [435/500] time 0.871 (0.891) data 0.000 (0.002) loss 0.5532 (1.0692) acc 81.2500 (73.3764) lr 2.2949e-04 eta 1:07:46 +epoch [41/50] batch [440/500] time 0.913 (0.891) data 0.000 (0.002) loss 0.9473 (1.0679) acc 78.1250 (73.4162) lr 2.2949e-04 eta 1:07:41 +epoch [41/50] batch [445/500] time 0.876 (0.891) data 0.000 (0.002) loss 0.9199 (1.0671) acc 78.1250 (73.4621) lr 2.2949e-04 eta 1:07:37 +epoch [41/50] batch [450/500] time 0.891 (0.891) data 0.000 (0.002) loss 1.2197 (1.0672) acc 75.0000 (73.4583) lr 2.2949e-04 eta 1:07:32 +epoch [41/50] batch [455/500] time 0.896 (0.891) data 0.000 (0.002) loss 1.0703 (1.0673) acc 71.8750 (73.4478) lr 2.2949e-04 eta 1:07:29 +epoch [41/50] batch [460/500] time 0.876 (0.891) data 0.000 (0.002) loss 0.9692 (1.0692) acc 81.2500 (73.4443) lr 2.2949e-04 eta 1:07:24 +epoch [41/50] batch [465/500] time 0.894 (0.891) data 0.000 (0.002) loss 1.0820 (1.0691) acc 75.0000 (73.4274) lr 2.2949e-04 eta 1:07:20 +epoch [41/50] batch [470/500] time 0.877 (0.891) data 0.000 (0.002) loss 1.1680 (1.0699) acc 65.6250 (73.3777) lr 2.2949e-04 eta 1:07:15 +epoch [41/50] batch [475/500] time 0.890 (0.891) data 0.000 (0.002) loss 1.4004 (1.0698) acc 71.8750 (73.3487) lr 2.2949e-04 eta 1:07:10 +epoch [41/50] batch [480/500] time 0.906 (0.891) data 0.000 (0.002) loss 1.2324 (1.0707) acc 68.7500 (73.3203) lr 2.2949e-04 eta 1:07:06 +epoch [41/50] batch [485/500] time 0.853 (0.891) data 0.001 (0.002) loss 0.9751 (1.0690) acc 71.8750 (73.3827) lr 2.2949e-04 eta 1:07:01 +epoch [41/50] batch [490/500] time 0.877 (0.891) data 0.000 (0.002) loss 0.9277 (1.0701) acc 75.0000 (73.3482) lr 2.2949e-04 eta 1:06:56 +epoch [41/50] batch [495/500] time 1.012 (0.891) data 0.000 (0.002) loss 0.7261 (1.0684) acc 78.1250 (73.3333) lr 2.2949e-04 eta 1:06:52 +epoch [41/50] batch [500/500] time 0.861 (0.891) data 0.000 (0.002) loss 1.1318 (1.0661) acc 68.7500 (73.3563) lr 1.9098e-04 eta 1:06:47 +epoch [42/50] batch [5/500] time 0.864 (1.028) data 0.000 (0.147) loss 0.5605 (0.8788) acc 84.3750 (77.5000) lr 1.9098e-04 eta 1:17:01 +epoch [42/50] batch [10/500] time 0.907 (0.956) data 0.000 (0.074) loss 1.5537 (1.0698) acc 59.3750 (73.7500) lr 1.9098e-04 eta 1:11:31 +epoch [42/50] batch [15/500] time 0.879 (0.928) data 0.000 (0.049) loss 1.0244 (1.0806) acc 78.1250 (72.0833) lr 1.9098e-04 eta 1:09:22 +epoch [42/50] batch [20/500] time 0.911 (0.915) data 0.000 (0.037) loss 0.7759 (1.0518) acc 71.8750 (72.5000) lr 1.9098e-04 eta 1:08:19 +epoch [42/50] batch [25/500] time 0.881 (0.910) data 0.000 (0.030) loss 1.1660 (1.0416) acc 75.0000 (72.6250) lr 1.9098e-04 eta 1:07:52 +epoch [42/50] batch [30/500] time 0.887 (0.904) data 0.000 (0.025) loss 0.7925 (1.0555) acc 78.1250 (72.3958) lr 1.9098e-04 eta 1:07:21 +epoch [42/50] batch [35/500] time 0.885 (0.903) data 0.000 (0.021) loss 0.4365 (1.0139) acc 87.5000 (73.4821) lr 1.9098e-04 eta 1:07:10 +epoch [42/50] batch [40/500] time 0.911 (0.905) data 0.000 (0.019) loss 0.9971 (1.0250) acc 68.7500 (72.8906) lr 1.9098e-04 eta 1:07:17 +epoch [42/50] batch [45/500] time 0.852 (0.902) data 0.000 (0.017) loss 0.6621 (1.0079) acc 81.2500 (73.3333) lr 1.9098e-04 eta 1:06:59 +epoch [42/50] batch [50/500] time 0.857 (0.900) data 0.000 (0.015) loss 0.6626 (0.9975) acc 84.3750 (73.8750) lr 1.9098e-04 eta 1:06:44 +epoch [42/50] batch [55/500] time 0.872 (0.899) data 0.000 (0.014) loss 1.2148 (1.0039) acc 75.0000 (73.5227) lr 1.9098e-04 eta 1:06:34 +epoch [42/50] batch [60/500] time 0.875 (0.898) data 0.000 (0.012) loss 0.8154 (1.0153) acc 84.3750 (73.3333) lr 1.9098e-04 eta 1:06:25 +epoch [42/50] batch [65/500] time 0.919 (0.897) data 0.000 (0.012) loss 1.2891 (1.0170) acc 68.7500 (73.2212) lr 1.9098e-04 eta 1:06:20 +epoch [42/50] batch [70/500] time 0.872 (0.897) data 0.000 (0.011) loss 1.3066 (1.0299) acc 68.7500 (72.6786) lr 1.9098e-04 eta 1:06:11 +epoch [42/50] batch [75/500] time 0.914 (0.897) data 0.000 (0.010) loss 1.2383 (1.0335) acc 68.7500 (72.6667) lr 1.9098e-04 eta 1:06:07 +epoch [42/50] batch [80/500] time 0.897 (0.896) data 0.000 (0.009) loss 1.8203 (1.0496) acc 59.3750 (72.3828) lr 1.9098e-04 eta 1:05:58 +epoch [42/50] batch [85/500] time 0.872 (0.895) data 0.000 (0.009) loss 1.2539 (1.0659) acc 71.8750 (72.0956) lr 1.9098e-04 eta 1:05:51 +epoch [42/50] batch [90/500] time 0.874 (0.894) data 0.000 (0.008) loss 1.4355 (1.0739) acc 71.8750 (71.9444) lr 1.9098e-04 eta 1:05:40 +epoch [42/50] batch [95/500] time 0.885 (0.893) data 0.000 (0.008) loss 1.3252 (1.0725) acc 75.0000 (72.2039) lr 1.9098e-04 eta 1:05:32 +epoch [42/50] batch [100/500] time 0.870 (0.892) data 0.000 (0.008) loss 1.1309 (1.0719) acc 75.0000 (72.4375) lr 1.9098e-04 eta 1:05:25 +epoch [42/50] batch [105/500] time 0.890 (0.892) data 0.000 (0.007) loss 1.4541 (1.0859) acc 65.6250 (72.0238) lr 1.9098e-04 eta 1:05:22 +epoch [42/50] batch [110/500] time 0.878 (0.892) data 0.000 (0.007) loss 1.0459 (1.0909) acc 71.8750 (71.9602) lr 1.9098e-04 eta 1:05:17 +epoch [42/50] batch [115/500] time 0.883 (0.892) data 0.000 (0.007) loss 1.4697 (1.0811) acc 71.8750 (72.1467) lr 1.9098e-04 eta 1:05:13 +epoch [42/50] batch [120/500] time 0.904 (0.892) data 0.000 (0.006) loss 0.8276 (1.0839) acc 87.5000 (72.2396) lr 1.9098e-04 eta 1:05:06 +epoch [42/50] batch [125/500] time 0.861 (0.891) data 0.000 (0.006) loss 0.6416 (1.0805) acc 81.2500 (72.1750) lr 1.9098e-04 eta 1:04:59 +epoch [42/50] batch [130/500] time 0.910 (0.891) data 0.000 (0.006) loss 0.4150 (1.0804) acc 93.7500 (72.2356) lr 1.9098e-04 eta 1:04:55 +epoch [42/50] batch [135/500] time 0.894 (0.891) data 0.000 (0.006) loss 0.7510 (1.0747) acc 62.5000 (72.4306) lr 1.9098e-04 eta 1:04:49 +epoch [42/50] batch [140/500] time 0.907 (0.892) data 0.000 (0.005) loss 0.4741 (1.0651) acc 90.6250 (72.6786) lr 1.9098e-04 eta 1:04:49 +epoch [42/50] batch [145/500] time 0.864 (0.892) data 0.000 (0.005) loss 0.5381 (1.0611) acc 84.3750 (72.7586) lr 1.9098e-04 eta 1:04:42 +epoch [42/50] batch [150/500] time 0.898 (0.891) data 0.000 (0.005) loss 0.8555 (1.0629) acc 78.1250 (72.7292) lr 1.9098e-04 eta 1:04:36 +epoch [42/50] batch [155/500] time 0.900 (0.891) data 0.000 (0.005) loss 0.6064 (1.0624) acc 87.5000 (72.7621) lr 1.9098e-04 eta 1:04:32 +epoch [42/50] batch [160/500] time 0.907 (0.892) data 0.000 (0.005) loss 1.3926 (1.0651) acc 68.7500 (72.7539) lr 1.9098e-04 eta 1:04:29 +epoch [42/50] batch [165/500] time 0.903 (0.892) data 0.000 (0.005) loss 1.4717 (1.0668) acc 65.6250 (72.7652) lr 1.9098e-04 eta 1:04:25 +epoch [42/50] batch [170/500] time 0.861 (0.892) data 0.000 (0.005) loss 0.8237 (1.0712) acc 81.2500 (72.7022) lr 1.9098e-04 eta 1:04:20 +epoch [42/50] batch [175/500] time 0.883 (0.891) data 0.000 (0.004) loss 0.9448 (1.0668) acc 71.8750 (72.7321) lr 1.9098e-04 eta 1:04:15 +epoch [42/50] batch [180/500] time 0.888 (0.892) data 0.000 (0.004) loss 1.7080 (1.0658) acc 62.5000 (72.7778) lr 1.9098e-04 eta 1:04:12 +epoch [42/50] batch [185/500] time 0.862 (0.892) data 0.000 (0.004) loss 0.8779 (1.0650) acc 71.8750 (72.8209) lr 1.9098e-04 eta 1:04:08 +epoch [42/50] batch [190/500] time 0.895 (0.892) data 0.000 (0.004) loss 0.7480 (1.0607) acc 81.2500 (72.8783) lr 1.9098e-04 eta 1:04:04 +epoch [42/50] batch [195/500] time 0.882 (0.892) data 0.000 (0.004) loss 0.7729 (1.0598) acc 78.1250 (72.9167) lr 1.9098e-04 eta 1:03:58 +epoch [42/50] batch [200/500] time 0.892 (0.892) data 0.000 (0.004) loss 0.8643 (1.0586) acc 68.7500 (72.9062) lr 1.9098e-04 eta 1:03:53 +epoch [42/50] batch [205/500] time 0.875 (0.892) data 0.001 (0.004) loss 0.8979 (1.0633) acc 75.0000 (72.9573) lr 1.9098e-04 eta 1:03:49 +epoch [42/50] batch [210/500] time 0.935 (0.892) data 0.001 (0.004) loss 0.8872 (1.0615) acc 71.8750 (73.0060) lr 1.9098e-04 eta 1:03:44 +epoch [42/50] batch [215/500] time 0.891 (0.892) data 0.000 (0.004) loss 1.5527 (1.0664) acc 59.3750 (72.9360) lr 1.9098e-04 eta 1:03:40 +epoch [42/50] batch [220/500] time 0.919 (0.891) data 0.000 (0.004) loss 0.5859 (1.0624) acc 81.2500 (73.0256) lr 1.9098e-04 eta 1:03:35 +epoch [42/50] batch [225/500] time 0.879 (0.891) data 0.001 (0.004) loss 0.8735 (1.0546) acc 71.8750 (73.2083) lr 1.9098e-04 eta 1:03:30 +epoch [42/50] batch [230/500] time 0.896 (0.891) data 0.000 (0.003) loss 1.2012 (1.0603) acc 71.8750 (73.0571) lr 1.9098e-04 eta 1:03:24 +epoch [42/50] batch [235/500] time 0.862 (0.891) data 0.000 (0.003) loss 0.7090 (1.0602) acc 75.0000 (73.0452) lr 1.9098e-04 eta 1:03:19 +epoch [42/50] batch [240/500] time 0.898 (0.891) data 0.000 (0.003) loss 1.6309 (1.0611) acc 71.8750 (73.1250) lr 1.9098e-04 eta 1:03:15 +epoch [42/50] batch [245/500] time 0.864 (0.891) data 0.000 (0.003) loss 1.0996 (1.0587) acc 78.1250 (73.1888) lr 1.9098e-04 eta 1:03:10 +epoch [42/50] batch [250/500] time 0.877 (0.891) data 0.000 (0.003) loss 0.5996 (1.0555) acc 75.0000 (73.1625) lr 1.9098e-04 eta 1:03:05 +epoch [42/50] batch [255/500] time 0.873 (0.891) data 0.000 (0.003) loss 1.0684 (1.0549) acc 78.1250 (73.2230) lr 1.9098e-04 eta 1:03:01 +epoch [42/50] batch [260/500] time 0.884 (0.891) data 0.000 (0.003) loss 1.2393 (1.0561) acc 68.7500 (73.1851) lr 1.9098e-04 eta 1:02:56 +epoch [42/50] batch [265/500] time 0.897 (0.891) data 0.000 (0.003) loss 0.8809 (1.0575) acc 81.2500 (73.0778) lr 1.9098e-04 eta 1:02:52 +epoch [42/50] batch [270/500] time 0.908 (0.891) data 0.000 (0.003) loss 1.0957 (1.0576) acc 68.7500 (73.0324) lr 1.9098e-04 eta 1:02:47 +epoch [42/50] batch [275/500] time 0.887 (0.891) data 0.000 (0.003) loss 1.5957 (1.0624) acc 59.3750 (72.9432) lr 1.9098e-04 eta 1:02:42 +epoch [42/50] batch [280/500] time 0.867 (0.891) data 0.000 (0.003) loss 1.2969 (1.0669) acc 65.6250 (72.8906) lr 1.9098e-04 eta 1:02:38 +epoch [42/50] batch [285/500] time 0.888 (0.891) data 0.000 (0.003) loss 0.6353 (1.0667) acc 81.2500 (72.9825) lr 1.9098e-04 eta 1:02:34 +epoch [42/50] batch [290/500] time 0.898 (0.891) data 0.000 (0.003) loss 0.8926 (1.0622) acc 78.1250 (73.1681) lr 1.9098e-04 eta 1:02:30 +epoch [42/50] batch [295/500] time 0.868 (0.891) data 0.000 (0.003) loss 0.8853 (1.0600) acc 78.1250 (73.2097) lr 1.9098e-04 eta 1:02:24 +epoch [42/50] batch [300/500] time 0.879 (0.890) data 0.000 (0.003) loss 1.4062 (1.0603) acc 68.7500 (73.2083) lr 1.9098e-04 eta 1:02:19 +epoch [42/50] batch [305/500] time 0.909 (0.890) data 0.000 (0.003) loss 0.8267 (1.0600) acc 78.1250 (73.2377) lr 1.9098e-04 eta 1:02:14 +epoch [42/50] batch [310/500] time 0.895 (0.890) data 0.000 (0.003) loss 1.0840 (1.0609) acc 81.2500 (73.2359) lr 1.9098e-04 eta 1:02:10 +epoch [42/50] batch [315/500] time 0.894 (0.890) data 0.000 (0.003) loss 0.8423 (1.0590) acc 75.0000 (73.2242) lr 1.9098e-04 eta 1:02:06 +epoch [42/50] batch [320/500] time 0.898 (0.890) data 0.000 (0.003) loss 1.0146 (1.0572) acc 68.7500 (73.2520) lr 1.9098e-04 eta 1:02:01 +epoch [42/50] batch [325/500] time 0.999 (0.891) data 0.001 (0.003) loss 1.3857 (1.0583) acc 62.5000 (73.2115) lr 1.9098e-04 eta 1:01:58 +epoch [42/50] batch [330/500] time 0.888 (0.891) data 0.000 (0.002) loss 0.9458 (1.0591) acc 71.8750 (73.2102) lr 1.9098e-04 eta 1:01:54 +epoch [42/50] batch [335/500] time 0.889 (0.891) data 0.000 (0.002) loss 1.3086 (1.0602) acc 75.0000 (73.1716) lr 1.9098e-04 eta 1:01:50 +epoch [42/50] batch [340/500] time 0.890 (0.891) data 0.001 (0.002) loss 0.9062 (1.0566) acc 71.8750 (73.2169) lr 1.9098e-04 eta 1:01:45 +epoch [42/50] batch [345/500] time 0.898 (0.891) data 0.000 (0.002) loss 1.4893 (1.0589) acc 71.8750 (73.1431) lr 1.9098e-04 eta 1:01:40 +epoch [42/50] batch [350/500] time 0.896 (0.891) data 0.000 (0.002) loss 0.6313 (1.0577) acc 81.2500 (73.1518) lr 1.9098e-04 eta 1:01:36 +epoch [42/50] batch [355/500] time 0.905 (0.891) data 0.000 (0.002) loss 1.0264 (1.0577) acc 75.0000 (73.1426) lr 1.9098e-04 eta 1:01:31 +epoch [42/50] batch [360/500] time 0.867 (0.891) data 0.000 (0.002) loss 0.9272 (1.0565) acc 78.1250 (73.1944) lr 1.9098e-04 eta 1:01:27 +epoch [42/50] batch [365/500] time 0.883 (0.891) data 0.000 (0.002) loss 1.3965 (1.0591) acc 75.0000 (73.1678) lr 1.9098e-04 eta 1:01:22 +epoch [42/50] batch [370/500] time 0.885 (0.891) data 0.000 (0.002) loss 1.0059 (1.0583) acc 71.8750 (73.2095) lr 1.9098e-04 eta 1:01:18 +epoch [42/50] batch [375/500] time 0.848 (0.891) data 0.000 (0.002) loss 1.2354 (1.0579) acc 78.1250 (73.2333) lr 1.9098e-04 eta 1:01:13 +epoch [42/50] batch [380/500] time 0.875 (0.890) data 0.000 (0.002) loss 0.7778 (1.0544) acc 78.1250 (73.3141) lr 1.9098e-04 eta 1:01:08 +epoch [42/50] batch [385/500] time 0.896 (0.890) data 0.000 (0.002) loss 1.1768 (1.0543) acc 71.8750 (73.3279) lr 1.9098e-04 eta 1:01:04 +epoch [42/50] batch [390/500] time 0.906 (0.890) data 0.000 (0.002) loss 1.1338 (1.0542) acc 81.2500 (73.3253) lr 1.9098e-04 eta 1:00:59 +epoch [42/50] batch [395/500] time 0.911 (0.890) data 0.000 (0.002) loss 0.5811 (1.0525) acc 78.1250 (73.3544) lr 1.9098e-04 eta 1:00:55 +epoch [42/50] batch [400/500] time 0.884 (0.891) data 0.000 (0.002) loss 0.5645 (1.0508) acc 81.2500 (73.3672) lr 1.9098e-04 eta 1:00:51 +epoch [42/50] batch [405/500] time 0.899 (0.891) data 0.000 (0.002) loss 0.8584 (1.0518) acc 75.0000 (73.3179) lr 1.9098e-04 eta 1:00:46 +epoch [42/50] batch [410/500] time 0.877 (0.890) data 0.000 (0.002) loss 0.3835 (1.0498) acc 87.5000 (73.3689) lr 1.9098e-04 eta 1:00:41 +epoch [42/50] batch [415/500] time 0.896 (0.890) data 0.000 (0.002) loss 0.6689 (1.0474) acc 81.2500 (73.4337) lr 1.9098e-04 eta 1:00:37 +epoch [42/50] batch [420/500] time 0.886 (0.890) data 0.000 (0.002) loss 0.6528 (1.0472) acc 78.1250 (73.4301) lr 1.9098e-04 eta 1:00:32 +epoch [42/50] batch [425/500] time 0.852 (0.891) data 0.000 (0.002) loss 1.0557 (1.0475) acc 71.8750 (73.3750) lr 1.9098e-04 eta 1:00:28 +epoch [42/50] batch [430/500] time 0.869 (0.891) data 0.000 (0.002) loss 0.4895 (1.0467) acc 87.5000 (73.4084) lr 1.9098e-04 eta 1:00:24 +epoch [42/50] batch [435/500] time 0.865 (0.890) data 0.000 (0.002) loss 1.1338 (1.0461) acc 68.7500 (73.3693) lr 1.9098e-04 eta 1:00:19 +epoch [42/50] batch [440/500] time 0.876 (0.890) data 0.000 (0.002) loss 1.1377 (1.0475) acc 62.5000 (73.2884) lr 1.9098e-04 eta 1:00:14 +epoch [42/50] batch [445/500] time 0.864 (0.890) data 0.000 (0.002) loss 0.8564 (1.0487) acc 71.8750 (73.2795) lr 1.9098e-04 eta 1:00:09 +epoch [42/50] batch [450/500] time 0.920 (0.890) data 0.000 (0.002) loss 1.1846 (1.0476) acc 81.2500 (73.3125) lr 1.9098e-04 eta 1:00:05 +epoch [42/50] batch [455/500] time 0.893 (0.890) data 0.000 (0.002) loss 1.1357 (1.0490) acc 68.7500 (73.3242) lr 1.9098e-04 eta 1:00:00 +epoch [42/50] batch [460/500] time 0.857 (0.890) data 0.000 (0.002) loss 1.4941 (1.0504) acc 59.3750 (73.2948) lr 1.9098e-04 eta 0:59:56 +epoch [42/50] batch [465/500] time 0.887 (0.890) data 0.000 (0.002) loss 1.0547 (1.0510) acc 65.6250 (73.2325) lr 1.9098e-04 eta 0:59:51 +epoch [42/50] batch [470/500] time 0.876 (0.890) data 0.000 (0.002) loss 0.6523 (1.0502) acc 78.1250 (73.2580) lr 1.9098e-04 eta 0:59:47 +epoch [42/50] batch [475/500] time 0.893 (0.890) data 0.000 (0.002) loss 1.2676 (1.0512) acc 71.8750 (73.2303) lr 1.9098e-04 eta 0:59:42 +epoch [42/50] batch [480/500] time 0.895 (0.890) data 0.000 (0.002) loss 1.1133 (1.0508) acc 75.0000 (73.2422) lr 1.9098e-04 eta 0:59:38 +epoch [42/50] batch [485/500] time 0.868 (0.890) data 0.000 (0.002) loss 0.4990 (1.0514) acc 81.2500 (73.2410) lr 1.9098e-04 eta 0:59:33 +epoch [42/50] batch [490/500] time 0.866 (0.890) data 0.000 (0.002) loss 0.9351 (1.0496) acc 71.8750 (73.2844) lr 1.9098e-04 eta 0:59:29 +epoch [42/50] batch [495/500] time 0.880 (0.890) data 0.000 (0.002) loss 1.3271 (1.0525) acc 68.7500 (73.2513) lr 1.9098e-04 eta 0:59:25 +epoch [42/50] batch [500/500] time 0.911 (0.890) data 0.000 (0.002) loss 0.8354 (1.0529) acc 84.3750 (73.2125) lr 1.5567e-04 eta 0:59:20 +epoch [43/50] batch [5/500] time 0.876 (1.046) data 0.000 (0.155) loss 1.7920 (1.1332) acc 71.8750 (76.8750) lr 1.5567e-04 eta 1:09:38 +epoch [43/50] batch [10/500] time 1.042 (0.980) data 0.000 (0.077) loss 1.5918 (1.0938) acc 59.3750 (75.6250) lr 1.5567e-04 eta 1:05:11 +epoch [43/50] batch [15/500] time 0.885 (0.947) data 0.000 (0.052) loss 1.2080 (1.1256) acc 62.5000 (73.3333) lr 1.5567e-04 eta 1:02:54 +epoch [43/50] batch [20/500] time 0.887 (0.930) data 0.000 (0.039) loss 0.8247 (1.1231) acc 81.2500 (73.7500) lr 1.5567e-04 eta 1:01:42 +epoch [43/50] batch [25/500] time 0.911 (0.920) data 0.000 (0.031) loss 1.1504 (1.1179) acc 62.5000 (73.3750) lr 1.5567e-04 eta 1:00:57 +epoch [43/50] batch [30/500] time 0.865 (0.913) data 0.000 (0.026) loss 1.5342 (1.1534) acc 50.0000 (72.8125) lr 1.5567e-04 eta 1:00:25 +epoch [43/50] batch [35/500] time 0.890 (0.909) data 0.000 (0.022) loss 0.8315 (1.1722) acc 81.2500 (71.5179) lr 1.5567e-04 eta 1:00:04 +epoch [43/50] batch [40/500] time 0.884 (0.904) data 0.000 (0.020) loss 1.1113 (1.1617) acc 78.1250 (71.8750) lr 1.5567e-04 eta 0:59:40 +epoch [43/50] batch [45/500] time 0.896 (0.903) data 0.000 (0.017) loss 1.1621 (1.1401) acc 75.0000 (72.5000) lr 1.5567e-04 eta 0:59:30 +epoch [43/50] batch [50/500] time 0.893 (0.902) data 0.000 (0.016) loss 0.7065 (1.1233) acc 75.0000 (72.8125) lr 1.5567e-04 eta 0:59:21 +epoch [43/50] batch [55/500] time 0.919 (0.900) data 0.000 (0.014) loss 1.5234 (1.1203) acc 62.5000 (72.8977) lr 1.5567e-04 eta 0:59:11 +epoch [43/50] batch [60/500] time 0.930 (0.900) data 0.000 (0.013) loss 0.7275 (1.1241) acc 78.1250 (72.6562) lr 1.5567e-04 eta 0:59:05 +epoch [43/50] batch [65/500] time 0.861 (0.898) data 0.000 (0.012) loss 0.8438 (1.1062) acc 71.8750 (72.8846) lr 1.5567e-04 eta 0:58:53 +epoch [43/50] batch [70/500] time 0.873 (0.896) data 0.000 (0.011) loss 0.8516 (1.0937) acc 84.3750 (72.9464) lr 1.5567e-04 eta 0:58:39 +epoch [43/50] batch [75/500] time 0.899 (0.895) data 0.000 (0.011) loss 1.0410 (1.0994) acc 71.8750 (72.5833) lr 1.5567e-04 eta 0:58:31 +epoch [43/50] batch [80/500] time 0.896 (0.894) data 0.000 (0.010) loss 0.9868 (1.1119) acc 75.0000 (72.3438) lr 1.5567e-04 eta 0:58:22 +epoch [43/50] batch [85/500] time 0.917 (0.894) data 0.000 (0.009) loss 0.3865 (1.1160) acc 90.6250 (72.2794) lr 1.5567e-04 eta 0:58:18 +epoch [43/50] batch [90/500] time 0.887 (0.893) data 0.000 (0.009) loss 0.9980 (1.1171) acc 65.6250 (72.2222) lr 1.5567e-04 eta 0:58:12 +epoch [43/50] batch [95/500] time 0.901 (0.892) data 0.000 (0.008) loss 1.2695 (1.1098) acc 75.0000 (72.3684) lr 1.5567e-04 eta 0:58:04 +epoch [43/50] batch [100/500] time 0.877 (0.891) data 0.000 (0.008) loss 0.3093 (1.1055) acc 93.7500 (72.5000) lr 1.5567e-04 eta 0:57:56 +epoch [43/50] batch [105/500] time 0.884 (0.891) data 0.000 (0.008) loss 0.9121 (1.1066) acc 75.0000 (72.5000) lr 1.5567e-04 eta 0:57:51 +epoch [43/50] batch [110/500] time 0.883 (0.893) data 0.000 (0.007) loss 1.1533 (1.1124) acc 71.8750 (72.4432) lr 1.5567e-04 eta 0:57:53 +epoch [43/50] batch [115/500] time 0.874 (0.892) data 0.000 (0.007) loss 0.9272 (1.1071) acc 75.0000 (72.6359) lr 1.5567e-04 eta 0:57:45 +epoch [43/50] batch [120/500] time 0.900 (0.892) data 0.000 (0.007) loss 0.7944 (1.1091) acc 78.1250 (72.6302) lr 1.5567e-04 eta 0:57:39 +epoch [43/50] batch [125/500] time 0.878 (0.891) data 0.000 (0.006) loss 0.6753 (1.1049) acc 84.3750 (72.7250) lr 1.5567e-04 eta 0:57:33 +epoch [43/50] batch [130/500] time 0.898 (0.891) data 0.000 (0.006) loss 1.2627 (1.1025) acc 78.1250 (72.9327) lr 1.5567e-04 eta 0:57:27 +epoch [43/50] batch [135/500] time 0.876 (0.891) data 0.000 (0.006) loss 0.8228 (1.0905) acc 84.3750 (73.1481) lr 1.5567e-04 eta 0:57:21 +epoch [43/50] batch [140/500] time 0.871 (0.890) data 0.000 (0.006) loss 1.2910 (1.0900) acc 65.6250 (73.3036) lr 1.5567e-04 eta 0:57:16 +epoch [43/50] batch [145/500] time 0.907 (0.890) data 0.000 (0.006) loss 0.8188 (1.0862) acc 84.3750 (73.4483) lr 1.5567e-04 eta 0:57:12 +epoch [43/50] batch [150/500] time 0.897 (0.890) data 0.000 (0.005) loss 1.0469 (1.0891) acc 68.7500 (73.3333) lr 1.5567e-04 eta 0:57:07 +epoch [43/50] batch [155/500] time 0.897 (0.891) data 0.001 (0.005) loss 1.1260 (1.0839) acc 71.8750 (73.4274) lr 1.5567e-04 eta 0:57:05 +epoch [43/50] batch [160/500] time 0.902 (0.891) data 0.000 (0.005) loss 0.8525 (1.0774) acc 81.2500 (73.5547) lr 1.5567e-04 eta 0:57:00 +epoch [43/50] batch [165/500] time 0.896 (0.891) data 0.000 (0.005) loss 1.2891 (1.0817) acc 68.7500 (73.4280) lr 1.5567e-04 eta 0:56:55 +epoch [43/50] batch [170/500] time 0.894 (0.890) data 0.000 (0.005) loss 1.2471 (1.0809) acc 68.7500 (73.3272) lr 1.5567e-04 eta 0:56:50 +epoch [43/50] batch [175/500] time 0.897 (0.890) data 0.000 (0.005) loss 0.9463 (1.0769) acc 75.0000 (73.2143) lr 1.5567e-04 eta 0:56:45 +epoch [43/50] batch [180/500] time 0.921 (0.890) data 0.000 (0.005) loss 1.2451 (1.0763) acc 75.0000 (73.1597) lr 1.5567e-04 eta 0:56:40 +epoch [43/50] batch [185/500] time 0.912 (0.891) data 0.000 (0.004) loss 0.7402 (1.0696) acc 78.1250 (73.4122) lr 1.5567e-04 eta 0:56:37 +epoch [43/50] batch [190/500] time 0.897 (0.891) data 0.000 (0.004) loss 0.8496 (1.0695) acc 81.2500 (73.4375) lr 1.5567e-04 eta 0:56:33 +epoch [43/50] batch [195/500] time 0.900 (0.890) data 0.000 (0.004) loss 1.1338 (1.0714) acc 71.8750 (73.3814) lr 1.5567e-04 eta 0:56:27 +epoch [43/50] batch [200/500] time 0.884 (0.890) data 0.000 (0.004) loss 0.9536 (1.0700) acc 75.0000 (73.4688) lr 1.5567e-04 eta 0:56:22 +epoch [43/50] batch [205/500] time 0.910 (0.890) data 0.000 (0.004) loss 1.0049 (1.0724) acc 65.6250 (73.3537) lr 1.5567e-04 eta 0:56:18 +epoch [43/50] batch [210/500] time 0.912 (0.891) data 0.000 (0.004) loss 1.8965 (1.0757) acc 59.3750 (73.2589) lr 1.5567e-04 eta 0:56:15 +epoch [43/50] batch [215/500] time 0.872 (0.891) data 0.000 (0.004) loss 1.5068 (1.0787) acc 65.6250 (73.2122) lr 1.5567e-04 eta 0:56:11 +epoch [43/50] batch [220/500] time 0.866 (0.891) data 0.000 (0.004) loss 1.1221 (1.0763) acc 78.1250 (73.2244) lr 1.5567e-04 eta 0:56:06 +epoch [43/50] batch [225/500] time 0.883 (0.890) data 0.000 (0.004) loss 1.2520 (1.0781) acc 78.1250 (73.2083) lr 1.5567e-04 eta 0:56:01 +epoch [43/50] batch [230/500] time 0.904 (0.890) data 0.000 (0.004) loss 0.7612 (1.0737) acc 78.1250 (73.3152) lr 1.5567e-04 eta 0:55:57 +epoch [43/50] batch [235/500] time 0.891 (0.890) data 0.000 (0.004) loss 1.5410 (1.0759) acc 78.1250 (73.3245) lr 1.5567e-04 eta 0:55:51 +epoch [43/50] batch [240/500] time 0.888 (0.890) data 0.000 (0.003) loss 1.1982 (1.0742) acc 78.1250 (73.3854) lr 1.5567e-04 eta 0:55:46 +epoch [43/50] batch [245/500] time 0.895 (0.890) data 0.000 (0.003) loss 0.7588 (1.0707) acc 84.3750 (73.4949) lr 1.5567e-04 eta 0:55:41 +epoch [43/50] batch [250/500] time 1.022 (0.890) data 0.000 (0.003) loss 0.6855 (1.0695) acc 84.3750 (73.6250) lr 1.5567e-04 eta 0:55:39 +epoch [43/50] batch [255/500] time 0.897 (0.890) data 0.000 (0.003) loss 0.6968 (1.0696) acc 78.1250 (73.5907) lr 1.5567e-04 eta 0:55:34 +epoch [43/50] batch [260/500] time 0.884 (0.890) data 0.000 (0.003) loss 0.6802 (1.0679) acc 78.1250 (73.6659) lr 1.5567e-04 eta 0:55:29 +epoch [43/50] batch [265/500] time 0.860 (0.890) data 0.000 (0.003) loss 1.1299 (1.0664) acc 81.2500 (73.7146) lr 1.5567e-04 eta 0:55:24 +epoch [43/50] batch [270/500] time 0.860 (0.890) data 0.000 (0.003) loss 1.3174 (1.0655) acc 68.7500 (73.7500) lr 1.5567e-04 eta 0:55:19 +epoch [43/50] batch [275/500] time 0.895 (0.890) data 0.000 (0.003) loss 0.9731 (1.0637) acc 68.7500 (73.7841) lr 1.5567e-04 eta 0:55:13 +epoch [43/50] batch [280/500] time 0.860 (0.889) data 0.000 (0.003) loss 0.9595 (1.0623) acc 75.0000 (73.8393) lr 1.5567e-04 eta 0:55:08 +epoch [43/50] batch [285/500] time 0.873 (0.889) data 0.000 (0.003) loss 1.1982 (1.0609) acc 71.8750 (73.8596) lr 1.5567e-04 eta 0:55:02 +epoch [43/50] batch [290/500] time 0.914 (0.889) data 0.000 (0.003) loss 0.9175 (1.0609) acc 78.1250 (73.8793) lr 1.5567e-04 eta 0:54:57 +epoch [43/50] batch [295/500] time 0.890 (0.889) data 0.000 (0.003) loss 1.1338 (1.0617) acc 71.8750 (73.8242) lr 1.5567e-04 eta 0:54:54 +epoch [43/50] batch [300/500] time 0.868 (0.889) data 0.000 (0.003) loss 1.1191 (1.0626) acc 68.7500 (73.7604) lr 1.5567e-04 eta 0:54:50 +epoch [43/50] batch [305/500] time 0.861 (0.889) data 0.000 (0.003) loss 0.8223 (1.0628) acc 81.2500 (73.7602) lr 1.5567e-04 eta 0:54:45 +epoch [43/50] batch [310/500] time 0.885 (0.889) data 0.000 (0.003) loss 0.5342 (1.0596) acc 84.3750 (73.8407) lr 1.5567e-04 eta 0:54:40 +epoch [43/50] batch [315/500] time 0.900 (0.889) data 0.000 (0.003) loss 0.4929 (1.0586) acc 84.3750 (73.8492) lr 1.5567e-04 eta 0:54:36 +epoch [43/50] batch [320/500] time 0.871 (0.889) data 0.000 (0.003) loss 0.9697 (1.0592) acc 68.7500 (73.7891) lr 1.5567e-04 eta 0:54:32 +epoch [43/50] batch [325/500] time 0.883 (0.889) data 0.000 (0.003) loss 1.4834 (1.0589) acc 71.8750 (73.8462) lr 1.5567e-04 eta 0:54:27 +epoch [43/50] batch [330/500] time 0.861 (0.889) data 0.001 (0.003) loss 0.9512 (1.0607) acc 68.7500 (73.8068) lr 1.5567e-04 eta 0:54:23 +epoch [43/50] batch [335/500] time 0.887 (0.889) data 0.000 (0.003) loss 1.9385 (1.0629) acc 68.7500 (73.8340) lr 1.5567e-04 eta 0:54:18 +epoch [43/50] batch [340/500] time 0.902 (0.889) data 0.000 (0.003) loss 1.6826 (1.0643) acc 62.5000 (73.8235) lr 1.5567e-04 eta 0:54:14 +epoch [43/50] batch [345/500] time 0.917 (0.889) data 0.000 (0.002) loss 1.8496 (1.0656) acc 56.2500 (73.7953) lr 1.5567e-04 eta 0:54:11 +epoch [43/50] batch [350/500] time 0.916 (0.890) data 0.000 (0.002) loss 0.8618 (1.0628) acc 81.2500 (73.8393) lr 1.5567e-04 eta 0:54:06 +epoch [43/50] batch [355/500] time 0.886 (0.890) data 0.000 (0.002) loss 0.9243 (1.0639) acc 68.7500 (73.7324) lr 1.5567e-04 eta 0:54:02 +epoch [43/50] batch [360/500] time 0.884 (0.890) data 0.000 (0.002) loss 1.4385 (1.0655) acc 65.6250 (73.6979) lr 1.5567e-04 eta 0:53:58 +epoch [43/50] batch [365/500] time 0.924 (0.890) data 0.000 (0.002) loss 0.9487 (1.0632) acc 71.8750 (73.7072) lr 1.5567e-04 eta 0:53:54 +epoch [43/50] batch [370/500] time 0.895 (0.890) data 0.000 (0.002) loss 1.4180 (1.0647) acc 68.7500 (73.6318) lr 1.5567e-04 eta 0:53:49 +epoch [43/50] batch [375/500] time 0.879 (0.890) data 0.000 (0.002) loss 0.8560 (1.0627) acc 81.2500 (73.6667) lr 1.5567e-04 eta 0:53:45 +epoch [43/50] batch [380/500] time 0.883 (0.890) data 0.000 (0.002) loss 1.0078 (1.0653) acc 68.7500 (73.5855) lr 1.5567e-04 eta 0:53:40 +epoch [43/50] batch [385/500] time 0.882 (0.890) data 0.000 (0.002) loss 0.7129 (1.0642) acc 87.5000 (73.5958) lr 1.5567e-04 eta 0:53:35 +epoch [43/50] batch [390/500] time 0.903 (0.890) data 0.000 (0.002) loss 0.8755 (1.0624) acc 78.1250 (73.6218) lr 1.5567e-04 eta 0:53:31 +epoch [43/50] batch [395/500] time 0.858 (0.890) data 0.000 (0.002) loss 0.9673 (1.0604) acc 71.8750 (73.6313) lr 1.5567e-04 eta 0:53:27 +epoch [43/50] batch [400/500] time 0.865 (0.890) data 0.000 (0.002) loss 0.7754 (1.0606) acc 84.3750 (73.6406) lr 1.5567e-04 eta 0:53:23 +epoch [43/50] batch [405/500] time 0.876 (0.890) data 0.000 (0.002) loss 0.8818 (1.0607) acc 84.3750 (73.6806) lr 1.5567e-04 eta 0:53:18 +epoch [43/50] batch [410/500] time 0.905 (0.890) data 0.000 (0.002) loss 2.0371 (1.0653) acc 59.3750 (73.6357) lr 1.5567e-04 eta 0:53:14 +epoch [43/50] batch [415/500] time 0.901 (0.890) data 0.000 (0.002) loss 0.9360 (1.0660) acc 78.1250 (73.6446) lr 1.5567e-04 eta 0:53:10 +epoch [43/50] batch [420/500] time 0.883 (0.890) data 0.000 (0.002) loss 1.5918 (1.0655) acc 65.6250 (73.6979) lr 1.5567e-04 eta 0:53:05 +epoch [43/50] batch [425/500] time 0.922 (0.890) data 0.000 (0.002) loss 0.7705 (1.0674) acc 84.3750 (73.7132) lr 1.5567e-04 eta 0:53:01 +epoch [43/50] batch [430/500] time 0.887 (0.890) data 0.000 (0.002) loss 0.6440 (1.0664) acc 75.0000 (73.7137) lr 1.5567e-04 eta 0:52:56 +epoch [43/50] batch [435/500] time 0.866 (0.890) data 0.001 (0.002) loss 1.5820 (1.0661) acc 71.8750 (73.7284) lr 1.5567e-04 eta 0:52:51 +epoch [43/50] batch [440/500] time 0.889 (0.890) data 0.000 (0.002) loss 1.0078 (1.0649) acc 81.2500 (73.7784) lr 1.5567e-04 eta 0:52:48 +epoch [43/50] batch [445/500] time 0.864 (0.890) data 0.000 (0.002) loss 1.0723 (1.0658) acc 71.8750 (73.7500) lr 1.5567e-04 eta 0:52:43 +epoch [43/50] batch [450/500] time 0.923 (0.890) data 0.000 (0.002) loss 0.8262 (1.0669) acc 75.0000 (73.7431) lr 1.5567e-04 eta 0:52:39 +epoch [43/50] batch [455/500] time 0.850 (0.890) data 0.000 (0.002) loss 1.6553 (1.0671) acc 62.5000 (73.7225) lr 1.5567e-04 eta 0:52:34 +epoch [43/50] batch [460/500] time 0.893 (0.890) data 0.000 (0.002) loss 1.4678 (1.0688) acc 65.6250 (73.6889) lr 1.5567e-04 eta 0:52:29 +epoch [43/50] batch [465/500] time 0.890 (0.890) data 0.000 (0.002) loss 1.2344 (1.0693) acc 59.3750 (73.6425) lr 1.5567e-04 eta 0:52:24 +epoch [43/50] batch [470/500] time 0.867 (0.889) data 0.000 (0.002) loss 1.2988 (1.0709) acc 71.8750 (73.6436) lr 1.5567e-04 eta 0:52:19 +epoch [43/50] batch [475/500] time 0.903 (0.889) data 0.000 (0.002) loss 0.9429 (1.0707) acc 75.0000 (73.6382) lr 1.5567e-04 eta 0:52:14 +epoch [43/50] batch [480/500] time 0.886 (0.889) data 0.000 (0.002) loss 0.8721 (1.0726) acc 81.2500 (73.6133) lr 1.5567e-04 eta 0:52:09 +epoch [43/50] batch [485/500] time 0.903 (0.889) data 0.000 (0.002) loss 1.0557 (1.0721) acc 75.0000 (73.6082) lr 1.5567e-04 eta 0:52:05 +epoch [43/50] batch [490/500] time 0.864 (0.889) data 0.000 (0.002) loss 1.0127 (1.0727) acc 75.0000 (73.5778) lr 1.5567e-04 eta 0:52:00 +epoch [43/50] batch [495/500] time 0.899 (0.889) data 0.000 (0.002) loss 1.5420 (1.0723) acc 65.6250 (73.5732) lr 1.5567e-04 eta 0:51:56 +epoch [43/50] batch [500/500] time 0.868 (0.889) data 0.000 (0.002) loss 0.6646 (1.0703) acc 84.3750 (73.5687) lr 1.2369e-04 eta 0:51:51 +epoch [44/50] batch [5/500] time 0.861 (1.041) data 0.001 (0.145) loss 1.1123 (0.7923) acc 75.0000 (80.0000) lr 1.2369e-04 eta 1:00:38 +epoch [44/50] batch [10/500] time 0.932 (0.967) data 0.000 (0.073) loss 0.8765 (1.0028) acc 75.0000 (74.3750) lr 1.2369e-04 eta 0:56:14 +epoch [44/50] batch [15/500] time 0.885 (0.942) data 0.000 (0.049) loss 1.0479 (0.9841) acc 71.8750 (74.3750) lr 1.2369e-04 eta 0:54:41 +epoch [44/50] batch [20/500] time 0.877 (0.927) data 0.000 (0.037) loss 0.9722 (0.9857) acc 68.7500 (73.7500) lr 1.2369e-04 eta 0:53:47 +epoch [44/50] batch [25/500] time 0.872 (0.926) data 0.000 (0.029) loss 1.1299 (0.9736) acc 68.7500 (74.0000) lr 1.2369e-04 eta 0:53:39 +epoch [44/50] batch [30/500] time 0.869 (0.919) data 0.000 (0.024) loss 1.0664 (0.9574) acc 75.0000 (74.5833) lr 1.2369e-04 eta 0:53:08 +epoch [44/50] batch [35/500] time 0.884 (0.914) data 0.000 (0.021) loss 1.1182 (0.9644) acc 68.7500 (73.6607) lr 1.2369e-04 eta 0:52:47 +epoch [44/50] batch [40/500] time 0.862 (0.908) data 0.000 (0.018) loss 0.7119 (0.9432) acc 78.1250 (73.9844) lr 1.2369e-04 eta 0:52:23 +epoch [44/50] batch [45/500] time 0.896 (0.907) data 0.000 (0.016) loss 0.9990 (0.9443) acc 81.2500 (74.1667) lr 1.2369e-04 eta 0:52:13 +epoch [44/50] batch [50/500] time 0.878 (0.904) data 0.000 (0.015) loss 1.2305 (0.9607) acc 81.2500 (74.1875) lr 1.2369e-04 eta 0:51:59 +epoch [44/50] batch [55/500] time 0.902 (0.902) data 0.000 (0.013) loss 1.1299 (0.9680) acc 78.1250 (74.3182) lr 1.2369e-04 eta 0:51:49 +epoch [44/50] batch [60/500] time 0.897 (0.902) data 0.001 (0.012) loss 0.5601 (0.9691) acc 87.5000 (74.5833) lr 1.2369e-04 eta 0:51:41 +epoch [44/50] batch [65/500] time 0.892 (0.900) data 0.000 (0.011) loss 1.1523 (0.9717) acc 78.1250 (74.4712) lr 1.2369e-04 eta 0:51:32 +epoch [44/50] batch [70/500] time 0.887 (0.899) data 0.000 (0.011) loss 1.2461 (0.9854) acc 65.6250 (74.1071) lr 1.2369e-04 eta 0:51:24 +epoch [44/50] batch [75/500] time 0.899 (0.899) data 0.000 (0.010) loss 0.8467 (0.9847) acc 78.1250 (74.0833) lr 1.2369e-04 eta 0:51:17 +epoch [44/50] batch [80/500] time 0.862 (0.898) data 0.000 (0.009) loss 1.0869 (1.0058) acc 75.0000 (73.7500) lr 1.2369e-04 eta 0:51:11 +epoch [44/50] batch [85/500] time 0.842 (0.899) data 0.000 (0.009) loss 0.7261 (1.0052) acc 84.3750 (73.7132) lr 1.2369e-04 eta 0:51:08 +epoch [44/50] batch [90/500] time 0.890 (0.898) data 0.000 (0.008) loss 1.6387 (1.0237) acc 65.6250 (73.4722) lr 1.2369e-04 eta 0:51:02 +epoch [44/50] batch [95/500] time 0.905 (0.898) data 0.000 (0.008) loss 0.9390 (1.0228) acc 65.6250 (73.5526) lr 1.2369e-04 eta 0:50:57 +epoch [44/50] batch [100/500] time 0.883 (0.897) data 0.000 (0.008) loss 1.0566 (1.0373) acc 71.8750 (73.4375) lr 1.2369e-04 eta 0:50:51 +epoch [44/50] batch [105/500] time 0.930 (0.897) data 0.000 (0.007) loss 1.4932 (1.0409) acc 71.8750 (73.5417) lr 1.2369e-04 eta 0:50:44 +epoch [44/50] batch [110/500] time 0.872 (0.896) data 0.000 (0.007) loss 0.8926 (1.0399) acc 81.2500 (73.6648) lr 1.2369e-04 eta 0:50:37 +epoch [44/50] batch [115/500] time 0.883 (0.895) data 0.000 (0.007) loss 0.9189 (1.0329) acc 65.6250 (73.7228) lr 1.2369e-04 eta 0:50:31 +epoch [44/50] batch [120/500] time 0.888 (0.895) data 0.000 (0.006) loss 0.7656 (1.0313) acc 81.2500 (73.6979) lr 1.2369e-04 eta 0:50:25 +epoch [44/50] batch [125/500] time 0.892 (0.895) data 0.000 (0.006) loss 1.4756 (1.0222) acc 56.2500 (73.8000) lr 1.2369e-04 eta 0:50:18 +epoch [44/50] batch [130/500] time 0.890 (0.895) data 0.000 (0.006) loss 1.0137 (1.0191) acc 68.7500 (73.7981) lr 1.2369e-04 eta 0:50:16 +epoch [44/50] batch [135/500] time 0.859 (0.895) data 0.000 (0.006) loss 1.4375 (1.0223) acc 65.6250 (73.7731) lr 1.2369e-04 eta 0:50:11 +epoch [44/50] batch [140/500] time 0.860 (0.895) data 0.000 (0.005) loss 1.1357 (1.0206) acc 65.6250 (73.7723) lr 1.2369e-04 eta 0:50:06 +epoch [44/50] batch [145/500] time 0.886 (0.894) data 0.000 (0.005) loss 0.9434 (1.0203) acc 75.0000 (73.8793) lr 1.2369e-04 eta 0:49:59 +epoch [44/50] batch [150/500] time 0.878 (0.894) data 0.000 (0.005) loss 0.9766 (1.0278) acc 75.0000 (73.6667) lr 1.2369e-04 eta 0:49:53 +epoch [44/50] batch [155/500] time 0.858 (0.893) data 0.000 (0.005) loss 0.7046 (1.0297) acc 84.3750 (73.6492) lr 1.2369e-04 eta 0:49:47 +epoch [44/50] batch [160/500] time 0.886 (0.893) data 0.000 (0.005) loss 0.6748 (1.0225) acc 81.2500 (73.8086) lr 1.2369e-04 eta 0:49:41 +epoch [44/50] batch [165/500] time 0.894 (0.892) data 0.000 (0.005) loss 0.7446 (1.0197) acc 68.7500 (73.7879) lr 1.2369e-04 eta 0:49:35 +epoch [44/50] batch [170/500] time 0.867 (0.892) data 0.000 (0.005) loss 1.5273 (1.0236) acc 71.8750 (73.8419) lr 1.2369e-04 eta 0:49:28 +epoch [44/50] batch [175/500] time 0.870 (0.891) data 0.000 (0.004) loss 0.4092 (1.0186) acc 87.5000 (73.9821) lr 1.2369e-04 eta 0:49:23 +epoch [44/50] batch [180/500] time 0.864 (0.891) data 0.000 (0.004) loss 0.7822 (1.0195) acc 81.2500 (73.9931) lr 1.2369e-04 eta 0:49:17 +epoch [44/50] batch [185/500] time 0.857 (0.890) data 0.000 (0.004) loss 1.0977 (1.0203) acc 71.8750 (73.9189) lr 1.2369e-04 eta 0:49:11 +epoch [44/50] batch [190/500] time 0.847 (0.889) data 0.000 (0.004) loss 1.2451 (1.0201) acc 65.6250 (73.8816) lr 1.2369e-04 eta 0:49:03 +epoch [44/50] batch [195/500] time 0.871 (0.889) data 0.000 (0.004) loss 1.3057 (1.0199) acc 65.6250 (73.9263) lr 1.2369e-04 eta 0:48:59 +epoch [44/50] batch [200/500] time 0.884 (0.889) data 0.000 (0.004) loss 0.9072 (1.0206) acc 68.7500 (73.8594) lr 1.2369e-04 eta 0:48:54 +epoch [44/50] batch [205/500] time 0.878 (0.889) data 0.000 (0.004) loss 0.8198 (1.0232) acc 71.8750 (73.7195) lr 1.2369e-04 eta 0:48:50 +epoch [44/50] batch [210/500] time 0.907 (0.889) data 0.000 (0.004) loss 1.1846 (1.0223) acc 75.0000 (73.7649) lr 1.2369e-04 eta 0:48:45 +epoch [44/50] batch [215/500] time 0.898 (0.889) data 0.000 (0.004) loss 1.1094 (1.0282) acc 75.0000 (73.6773) lr 1.2369e-04 eta 0:48:41 +epoch [44/50] batch [220/500] time 0.877 (0.889) data 0.000 (0.004) loss 0.4771 (1.0238) acc 93.7500 (73.8210) lr 1.2369e-04 eta 0:48:36 +epoch [44/50] batch [225/500] time 0.893 (0.889) data 0.000 (0.003) loss 1.7432 (1.0257) acc 65.6250 (73.8611) lr 1.2369e-04 eta 0:48:32 +epoch [44/50] batch [230/500] time 0.887 (0.890) data 0.000 (0.003) loss 0.4983 (1.0215) acc 84.3750 (73.9810) lr 1.2369e-04 eta 0:48:29 +epoch [44/50] batch [235/500] time 0.869 (0.889) data 0.000 (0.003) loss 1.0879 (1.0167) acc 68.7500 (74.0691) lr 1.2369e-04 eta 0:48:23 +epoch [44/50] batch [240/500] time 0.907 (0.889) data 0.000 (0.003) loss 0.5234 (1.0156) acc 84.3750 (74.0755) lr 1.2369e-04 eta 0:48:18 +epoch [44/50] batch [245/500] time 0.875 (0.889) data 0.000 (0.003) loss 0.9780 (1.0126) acc 75.0000 (74.1582) lr 1.2369e-04 eta 0:48:14 +epoch [44/50] batch [250/500] time 0.901 (0.889) data 0.000 (0.003) loss 1.3701 (1.0155) acc 56.2500 (74.1000) lr 1.2369e-04 eta 0:48:09 +epoch [44/50] batch [255/500] time 0.893 (0.889) data 0.000 (0.003) loss 1.3057 (1.0196) acc 68.7500 (74.0319) lr 1.2369e-04 eta 0:48:05 +epoch [44/50] batch [260/500] time 0.909 (0.889) data 0.000 (0.003) loss 1.2842 (1.0245) acc 81.2500 (74.0264) lr 1.2369e-04 eta 0:48:00 +epoch [44/50] batch [265/500] time 0.909 (0.889) data 0.000 (0.003) loss 0.8833 (1.0219) acc 75.0000 (74.1627) lr 1.2369e-04 eta 0:47:56 +epoch [44/50] batch [270/500] time 0.891 (0.889) data 0.000 (0.003) loss 1.2100 (1.0258) acc 65.6250 (74.0741) lr 1.2369e-04 eta 0:47:51 +epoch [44/50] batch [275/500] time 0.885 (0.889) data 0.000 (0.003) loss 0.8164 (1.0225) acc 75.0000 (74.1250) lr 1.2369e-04 eta 0:47:48 +epoch [44/50] batch [280/500] time 0.888 (0.889) data 0.000 (0.003) loss 1.1846 (1.0238) acc 68.7500 (74.1183) lr 1.2369e-04 eta 0:47:43 +epoch [44/50] batch [285/500] time 0.860 (0.889) data 0.001 (0.003) loss 0.7930 (1.0231) acc 71.8750 (74.1338) lr 1.2369e-04 eta 0:47:38 +epoch [44/50] batch [290/500] time 0.858 (0.889) data 0.000 (0.003) loss 1.2402 (1.0243) acc 78.1250 (74.1164) lr 1.2369e-04 eta 0:47:33 +epoch [44/50] batch [295/500] time 0.868 (0.889) data 0.000 (0.003) loss 0.5190 (1.0258) acc 87.5000 (74.0890) lr 1.2369e-04 eta 0:47:28 +epoch [44/50] batch [300/500] time 0.872 (0.888) data 0.000 (0.003) loss 1.0391 (1.0227) acc 75.0000 (74.1875) lr 1.2369e-04 eta 0:47:22 +epoch [44/50] batch [305/500] time 0.873 (0.888) data 0.000 (0.003) loss 0.8208 (1.0223) acc 78.1250 (74.2418) lr 1.2369e-04 eta 0:47:18 +epoch [44/50] batch [310/500] time 0.841 (0.888) data 0.000 (0.003) loss 1.0742 (1.0205) acc 68.7500 (74.2843) lr 1.2369e-04 eta 0:47:13 +epoch [44/50] batch [315/500] time 0.896 (0.888) data 0.000 (0.003) loss 1.6680 (1.0220) acc 62.5000 (74.2262) lr 1.2369e-04 eta 0:47:09 +epoch [44/50] batch [320/500] time 0.912 (0.888) data 0.000 (0.003) loss 0.6709 (1.0208) acc 75.0000 (74.2773) lr 1.2369e-04 eta 0:47:05 +epoch [44/50] batch [325/500] time 0.886 (0.888) data 0.000 (0.002) loss 0.9922 (1.0191) acc 68.7500 (74.2308) lr 1.2369e-04 eta 0:47:00 +epoch [44/50] batch [330/500] time 0.886 (0.888) data 0.000 (0.002) loss 1.2520 (1.0200) acc 71.8750 (74.2045) lr 1.2369e-04 eta 0:46:56 +epoch [44/50] batch [335/500] time 0.889 (0.889) data 0.000 (0.002) loss 0.5508 (1.0198) acc 81.2500 (74.1978) lr 1.2369e-04 eta 0:46:52 +epoch [44/50] batch [340/500] time 0.883 (0.889) data 0.000 (0.002) loss 1.4326 (1.0255) acc 75.0000 (74.1176) lr 1.2369e-04 eta 0:46:47 +epoch [44/50] batch [345/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.1621 (1.0270) acc 71.8750 (74.1395) lr 1.2369e-04 eta 0:46:43 +epoch [44/50] batch [350/500] time 0.890 (0.888) data 0.000 (0.002) loss 1.2158 (1.0287) acc 71.8750 (74.1429) lr 1.2369e-04 eta 0:46:38 +epoch [44/50] batch [355/500] time 0.920 (0.888) data 0.000 (0.002) loss 1.3896 (1.0284) acc 75.0000 (74.2077) lr 1.2369e-04 eta 0:46:34 +epoch [44/50] batch [360/500] time 0.891 (0.888) data 0.000 (0.002) loss 1.0234 (1.0317) acc 71.8750 (74.1927) lr 1.2369e-04 eta 0:46:29 +epoch [44/50] batch [365/500] time 0.892 (0.888) data 0.001 (0.002) loss 1.2773 (1.0341) acc 71.8750 (74.1610) lr 1.2369e-04 eta 0:46:25 +epoch [44/50] batch [370/500] time 1.009 (0.889) data 0.000 (0.002) loss 0.9121 (1.0344) acc 84.3750 (74.1639) lr 1.2369e-04 eta 0:46:21 +epoch [44/50] batch [375/500] time 0.854 (0.888) data 0.000 (0.002) loss 1.0879 (1.0361) acc 68.7500 (74.1000) lr 1.2369e-04 eta 0:46:16 +epoch [44/50] batch [380/500] time 0.908 (0.888) data 0.000 (0.002) loss 1.0264 (1.0393) acc 68.7500 (74.0296) lr 1.2369e-04 eta 0:46:12 +epoch [44/50] batch [385/500] time 0.877 (0.888) data 0.000 (0.002) loss 1.1963 (1.0422) acc 71.8750 (73.9854) lr 1.2369e-04 eta 0:46:07 +epoch [44/50] batch [390/500] time 0.870 (0.888) data 0.000 (0.002) loss 1.2324 (1.0427) acc 71.8750 (73.9423) lr 1.2369e-04 eta 0:46:02 +epoch [44/50] batch [395/500] time 0.865 (0.888) data 0.000 (0.002) loss 1.2188 (1.0451) acc 71.8750 (73.8924) lr 1.2369e-04 eta 0:45:58 +epoch [44/50] batch [400/500] time 0.875 (0.888) data 0.000 (0.002) loss 0.9956 (1.0455) acc 68.7500 (73.8594) lr 1.2369e-04 eta 0:45:53 +epoch [44/50] batch [405/500] time 0.877 (0.888) data 0.000 (0.002) loss 1.2588 (1.0461) acc 65.6250 (73.8812) lr 1.2369e-04 eta 0:45:49 +epoch [44/50] batch [410/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.0732 (1.0477) acc 68.7500 (73.8262) lr 1.2369e-04 eta 0:45:44 +epoch [44/50] batch [415/500] time 0.895 (0.889) data 0.000 (0.002) loss 1.3418 (1.0499) acc 71.8750 (73.8027) lr 1.2369e-04 eta 0:45:41 +epoch [44/50] batch [420/500] time 0.882 (0.889) data 0.000 (0.002) loss 0.9087 (1.0501) acc 71.8750 (73.7798) lr 1.2369e-04 eta 0:45:36 +epoch [44/50] batch [425/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.7510 (1.0479) acc 78.1250 (73.8382) lr 1.2369e-04 eta 0:45:32 +epoch [44/50] batch [430/500] time 0.911 (0.888) data 0.000 (0.002) loss 1.6777 (1.0524) acc 62.5000 (73.7645) lr 1.2369e-04 eta 0:45:27 +epoch [44/50] batch [435/500] time 0.895 (0.888) data 0.000 (0.002) loss 1.4160 (1.0539) acc 68.7500 (73.7141) lr 1.2369e-04 eta 0:45:23 +epoch [44/50] batch [440/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.2236 (1.0544) acc 65.6250 (73.6790) lr 1.2369e-04 eta 0:45:18 +epoch [44/50] batch [445/500] time 0.886 (0.888) data 0.000 (0.002) loss 0.4016 (1.0522) acc 87.5000 (73.7360) lr 1.2369e-04 eta 0:45:13 +epoch [44/50] batch [450/500] time 0.920 (0.888) data 0.000 (0.002) loss 0.8252 (1.0514) acc 75.0000 (73.7153) lr 1.2369e-04 eta 0:45:09 +epoch [44/50] batch [455/500] time 0.865 (0.888) data 0.000 (0.002) loss 1.1943 (1.0517) acc 71.8750 (73.6813) lr 1.2369e-04 eta 0:45:04 +epoch [44/50] batch [460/500] time 0.863 (0.888) data 0.000 (0.002) loss 0.9458 (1.0527) acc 71.8750 (73.6141) lr 1.2369e-04 eta 0:44:59 +epoch [44/50] batch [465/500] time 0.898 (0.888) data 0.000 (0.002) loss 1.2568 (1.0509) acc 68.7500 (73.6694) lr 1.2369e-04 eta 0:44:55 +epoch [44/50] batch [470/500] time 0.908 (0.888) data 0.000 (0.002) loss 1.0400 (1.0499) acc 71.8750 (73.7101) lr 1.2369e-04 eta 0:44:50 +epoch [44/50] batch [475/500] time 0.895 (0.888) data 0.000 (0.002) loss 1.2881 (1.0519) acc 75.0000 (73.6974) lr 1.2369e-04 eta 0:44:46 +epoch [44/50] batch [480/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.1035 (1.0516) acc 71.8750 (73.7044) lr 1.2369e-04 eta 0:44:41 +epoch [44/50] batch [485/500] time 0.858 (0.888) data 0.001 (0.002) loss 1.3896 (1.0519) acc 71.8750 (73.6920) lr 1.2369e-04 eta 0:44:36 +epoch [44/50] batch [490/500] time 0.898 (0.888) data 0.000 (0.002) loss 1.0156 (1.0504) acc 71.8750 (73.7181) lr 1.2369e-04 eta 0:44:32 +epoch [44/50] batch [495/500] time 0.908 (0.888) data 0.000 (0.002) loss 0.4182 (1.0487) acc 90.6250 (73.7437) lr 1.2369e-04 eta 0:44:27 +epoch [44/50] batch [500/500] time 0.911 (0.888) data 0.000 (0.002) loss 0.8408 (1.0480) acc 81.2500 (73.7875) lr 9.5173e-05 eta 0:44:23 +epoch [45/50] batch [5/500] time 0.884 (1.025) data 0.000 (0.148) loss 1.0312 (1.2334) acc 71.8750 (66.8750) lr 9.5173e-05 eta 0:51:10 +epoch [45/50] batch [10/500] time 0.884 (0.971) data 0.000 (0.074) loss 0.7334 (1.0415) acc 87.5000 (71.5625) lr 9.5173e-05 eta 0:48:22 +epoch [45/50] batch [15/500] time 0.872 (0.940) data 0.000 (0.049) loss 1.4814 (1.0949) acc 68.7500 (71.4583) lr 9.5173e-05 eta 0:46:44 +epoch [45/50] batch [20/500] time 0.919 (0.930) data 0.000 (0.037) loss 0.9546 (1.0580) acc 81.2500 (73.1250) lr 9.5173e-05 eta 0:46:10 +epoch [45/50] batch [25/500] time 0.891 (0.919) data 0.000 (0.030) loss 0.9775 (1.0593) acc 84.3750 (73.5000) lr 9.5173e-05 eta 0:45:32 +epoch [45/50] batch [30/500] time 0.859 (0.912) data 0.000 (0.025) loss 1.0889 (1.0769) acc 81.2500 (73.3333) lr 9.5173e-05 eta 0:45:08 +epoch [45/50] batch [35/500] time 0.893 (0.908) data 0.000 (0.021) loss 1.2744 (1.1024) acc 71.8750 (72.1429) lr 9.5173e-05 eta 0:44:53 +epoch [45/50] batch [40/500] time 0.894 (0.906) data 0.000 (0.019) loss 1.1416 (1.0811) acc 68.7500 (72.8125) lr 9.5173e-05 eta 0:44:42 +epoch [45/50] batch [45/500] time 0.869 (0.903) data 0.000 (0.017) loss 1.6533 (1.1079) acc 62.5000 (72.5694) lr 9.5173e-05 eta 0:44:27 +epoch [45/50] batch [50/500] time 0.860 (0.901) data 0.000 (0.015) loss 0.8696 (1.1087) acc 71.8750 (72.5000) lr 9.5173e-05 eta 0:44:18 +epoch [45/50] batch [55/500] time 0.907 (0.900) data 0.000 (0.014) loss 0.4519 (1.1226) acc 90.6250 (72.3295) lr 9.5173e-05 eta 0:44:09 +epoch [45/50] batch [60/500] time 0.883 (0.899) data 0.000 (0.013) loss 0.3408 (1.1210) acc 96.8750 (72.4479) lr 9.5173e-05 eta 0:44:02 +epoch [45/50] batch [65/500] time 0.902 (0.898) data 0.000 (0.012) loss 1.1982 (1.1137) acc 62.5000 (72.0192) lr 9.5173e-05 eta 0:43:56 +epoch [45/50] batch [70/500] time 0.859 (0.899) data 0.000 (0.011) loss 0.8989 (1.1230) acc 84.3750 (72.2321) lr 9.5173e-05 eta 0:43:52 +epoch [45/50] batch [75/500] time 0.850 (0.897) data 0.000 (0.010) loss 0.8921 (1.1143) acc 75.0000 (72.5417) lr 9.5173e-05 eta 0:43:43 +epoch [45/50] batch [80/500] time 0.875 (0.896) data 0.000 (0.009) loss 0.8066 (1.1088) acc 78.1250 (72.6172) lr 9.5173e-05 eta 0:43:35 +epoch [45/50] batch [85/500] time 0.911 (0.896) data 0.000 (0.009) loss 1.3467 (1.1047) acc 71.8750 (72.6838) lr 9.5173e-05 eta 0:43:31 +epoch [45/50] batch [90/500] time 0.867 (0.895) data 0.000 (0.008) loss 1.8428 (1.1259) acc 56.2500 (72.2222) lr 9.5173e-05 eta 0:43:24 +epoch [45/50] batch [95/500] time 0.907 (0.895) data 0.000 (0.008) loss 1.3477 (1.1204) acc 71.8750 (72.4671) lr 9.5173e-05 eta 0:43:19 +epoch [45/50] batch [100/500] time 0.880 (0.894) data 0.000 (0.008) loss 1.0205 (1.1221) acc 68.7500 (72.4062) lr 9.5173e-05 eta 0:43:12 +epoch [45/50] batch [105/500] time 0.897 (0.893) data 0.000 (0.007) loss 0.5552 (1.1134) acc 81.2500 (72.5595) lr 9.5173e-05 eta 0:43:05 +epoch [45/50] batch [110/500] time 0.855 (0.892) data 0.000 (0.007) loss 0.8970 (1.1030) acc 71.8750 (72.6705) lr 9.5173e-05 eta 0:42:58 +epoch [45/50] batch [115/500] time 0.850 (0.892) data 0.000 (0.007) loss 1.3447 (1.1019) acc 75.0000 (72.7989) lr 9.5173e-05 eta 0:42:54 +epoch [45/50] batch [120/500] time 0.872 (0.892) data 0.000 (0.006) loss 1.0908 (1.0935) acc 71.8750 (72.9427) lr 9.5173e-05 eta 0:42:48 +epoch [45/50] batch [125/500] time 0.885 (0.891) data 0.000 (0.006) loss 1.0449 (1.0954) acc 75.0000 (72.9500) lr 9.5173e-05 eta 0:42:42 +epoch [45/50] batch [130/500] time 0.886 (0.891) data 0.000 (0.006) loss 0.8623 (1.0974) acc 71.8750 (72.8125) lr 9.5173e-05 eta 0:42:36 +epoch [45/50] batch [135/500] time 0.871 (0.891) data 0.000 (0.006) loss 1.4785 (1.0956) acc 71.8750 (72.8704) lr 9.5173e-05 eta 0:42:31 +epoch [45/50] batch [140/500] time 0.881 (0.890) data 0.000 (0.006) loss 0.9058 (1.0930) acc 81.2500 (72.9464) lr 9.5173e-05 eta 0:42:25 +epoch [45/50] batch [145/500] time 0.875 (0.890) data 0.000 (0.005) loss 0.5610 (1.0899) acc 84.3750 (73.0388) lr 9.5173e-05 eta 0:42:20 +epoch [45/50] batch [150/500] time 0.902 (0.889) data 0.000 (0.005) loss 0.6743 (1.0956) acc 78.1250 (72.9792) lr 9.5173e-05 eta 0:42:14 +epoch [45/50] batch [155/500] time 0.903 (0.889) data 0.000 (0.005) loss 0.7593 (1.1004) acc 81.2500 (72.8629) lr 9.5173e-05 eta 0:42:09 +epoch [45/50] batch [160/500] time 0.905 (0.889) data 0.000 (0.005) loss 0.6211 (1.1022) acc 87.5000 (72.8711) lr 9.5173e-05 eta 0:42:04 +epoch [45/50] batch [165/500] time 0.853 (0.889) data 0.000 (0.005) loss 1.0957 (1.1064) acc 62.5000 (72.6326) lr 9.5173e-05 eta 0:41:59 +epoch [45/50] batch [170/500] time 0.881 (0.889) data 0.000 (0.005) loss 1.4707 (1.1039) acc 65.6250 (72.6654) lr 9.5173e-05 eta 0:41:54 +epoch [45/50] batch [175/500] time 0.862 (0.889) data 0.000 (0.004) loss 2.0547 (1.1058) acc 62.5000 (72.6607) lr 9.5173e-05 eta 0:41:50 +epoch [45/50] batch [180/500] time 0.889 (0.888) data 0.000 (0.004) loss 0.9517 (1.1049) acc 81.2500 (72.7778) lr 9.5173e-05 eta 0:41:45 +epoch [45/50] batch [185/500] time 0.869 (0.888) data 0.000 (0.004) loss 1.1680 (1.1031) acc 71.8750 (72.8378) lr 9.5173e-05 eta 0:41:39 +epoch [45/50] batch [190/500] time 0.884 (0.888) data 0.000 (0.004) loss 1.0957 (1.1015) acc 68.7500 (72.8947) lr 9.5173e-05 eta 0:41:34 +epoch [45/50] batch [195/500] time 0.863 (0.888) data 0.000 (0.004) loss 0.7739 (1.0967) acc 75.0000 (73.0128) lr 9.5173e-05 eta 0:41:29 +epoch [45/50] batch [200/500] time 0.859 (0.887) data 0.000 (0.004) loss 1.0781 (1.0950) acc 81.2500 (73.0781) lr 9.5173e-05 eta 0:41:24 +epoch [45/50] batch [205/500] time 0.868 (0.887) data 0.000 (0.004) loss 1.0225 (1.0951) acc 75.0000 (73.0945) lr 9.5173e-05 eta 0:41:20 +epoch [45/50] batch [210/500] time 0.878 (0.887) data 0.000 (0.004) loss 1.4043 (1.0926) acc 65.6250 (73.1399) lr 9.5173e-05 eta 0:41:15 +epoch [45/50] batch [215/500] time 0.884 (0.888) data 0.000 (0.004) loss 0.9624 (1.0950) acc 75.0000 (73.1395) lr 9.5173e-05 eta 0:41:12 +epoch [45/50] batch [220/500] time 0.887 (0.888) data 0.000 (0.004) loss 0.5820 (1.0901) acc 87.5000 (73.1960) lr 9.5173e-05 eta 0:41:09 +epoch [45/50] batch [225/500] time 0.896 (0.888) data 0.000 (0.004) loss 1.0352 (1.0893) acc 68.7500 (73.2778) lr 9.5173e-05 eta 0:41:05 +epoch [45/50] batch [230/500] time 0.895 (0.888) data 0.000 (0.003) loss 1.0059 (1.0860) acc 75.0000 (73.2473) lr 9.5173e-05 eta 0:41:00 +epoch [45/50] batch [235/500] time 0.860 (0.888) data 0.000 (0.003) loss 0.6807 (1.0864) acc 81.2500 (73.2314) lr 9.5173e-05 eta 0:40:55 +epoch [45/50] batch [240/500] time 0.905 (0.888) data 0.000 (0.003) loss 1.8037 (1.0907) acc 59.3750 (73.2031) lr 9.5173e-05 eta 0:40:50 +epoch [45/50] batch [245/500] time 0.872 (0.888) data 0.000 (0.003) loss 0.4973 (1.0887) acc 81.2500 (73.1633) lr 9.5173e-05 eta 0:40:45 +epoch [45/50] batch [250/500] time 0.850 (0.887) data 0.000 (0.003) loss 1.1973 (1.0859) acc 71.8750 (73.1750) lr 9.5173e-05 eta 0:40:40 +epoch [45/50] batch [255/500] time 0.909 (0.888) data 0.000 (0.003) loss 0.8633 (1.0893) acc 84.3750 (73.1618) lr 9.5173e-05 eta 0:40:37 +epoch [45/50] batch [260/500] time 0.894 (0.888) data 0.000 (0.003) loss 1.1719 (1.0873) acc 75.0000 (73.1611) lr 9.5173e-05 eta 0:40:33 +epoch [45/50] batch [265/500] time 0.880 (0.888) data 0.000 (0.003) loss 0.8916 (1.0843) acc 87.5000 (73.2429) lr 9.5173e-05 eta 0:40:28 +epoch [45/50] batch [270/500] time 0.856 (0.888) data 0.000 (0.003) loss 0.9131 (1.0841) acc 71.8750 (73.2176) lr 9.5173e-05 eta 0:40:23 +epoch [45/50] batch [275/500] time 0.853 (0.887) data 0.000 (0.003) loss 0.7769 (1.0817) acc 78.1250 (73.2386) lr 9.5173e-05 eta 0:40:18 +epoch [45/50] batch [280/500] time 0.930 (0.888) data 0.000 (0.003) loss 0.9629 (1.0829) acc 71.8750 (73.2366) lr 9.5173e-05 eta 0:40:14 +epoch [45/50] batch [285/500] time 0.855 (0.888) data 0.000 (0.003) loss 0.6382 (1.0811) acc 90.6250 (73.2237) lr 9.5173e-05 eta 0:40:09 +epoch [45/50] batch [290/500] time 0.880 (0.888) data 0.000 (0.003) loss 0.5884 (1.0786) acc 78.1250 (73.2220) lr 9.5173e-05 eta 0:40:05 +epoch [45/50] batch [295/500] time 0.920 (0.888) data 0.000 (0.003) loss 0.9199 (1.0771) acc 78.1250 (73.2097) lr 9.5173e-05 eta 0:40:01 +epoch [45/50] batch [300/500] time 0.879 (0.888) data 0.000 (0.003) loss 1.3359 (1.0796) acc 78.1250 (73.2083) lr 9.5173e-05 eta 0:39:56 +epoch [45/50] batch [305/500] time 0.881 (0.888) data 0.000 (0.003) loss 0.9761 (1.0780) acc 68.7500 (73.2275) lr 9.5173e-05 eta 0:39:52 +epoch [45/50] batch [310/500] time 0.902 (0.888) data 0.000 (0.003) loss 0.7773 (1.0750) acc 78.1250 (73.2762) lr 9.5173e-05 eta 0:39:47 +epoch [45/50] batch [315/500] time 0.887 (0.888) data 0.000 (0.003) loss 0.6240 (1.0724) acc 87.5000 (73.3333) lr 9.5173e-05 eta 0:39:43 +epoch [45/50] batch [320/500] time 0.871 (0.887) data 0.000 (0.003) loss 1.2959 (1.0730) acc 68.7500 (73.3301) lr 9.5173e-05 eta 0:39:37 +epoch [45/50] batch [325/500] time 0.883 (0.887) data 0.000 (0.003) loss 1.7314 (1.0748) acc 68.7500 (73.2692) lr 9.5173e-05 eta 0:39:32 +epoch [45/50] batch [330/500] time 0.885 (0.887) data 0.000 (0.002) loss 0.6548 (1.0732) acc 78.1250 (73.2481) lr 9.5173e-05 eta 0:39:28 +epoch [45/50] batch [335/500] time 0.899 (0.887) data 0.000 (0.002) loss 0.6924 (1.0679) acc 78.1250 (73.3302) lr 9.5173e-05 eta 0:39:23 +epoch [45/50] batch [340/500] time 0.894 (0.887) data 0.000 (0.002) loss 1.1963 (1.0691) acc 65.6250 (73.3180) lr 9.5173e-05 eta 0:39:19 +epoch [45/50] batch [345/500] time 0.914 (0.887) data 0.000 (0.002) loss 0.8125 (1.0704) acc 81.2500 (73.3605) lr 9.5173e-05 eta 0:39:15 +epoch [45/50] batch [350/500] time 0.910 (0.887) data 0.000 (0.002) loss 0.5381 (1.0666) acc 87.5000 (73.4643) lr 9.5173e-05 eta 0:39:11 +epoch [45/50] batch [355/500] time 1.053 (0.888) data 0.000 (0.002) loss 1.4102 (1.0695) acc 56.2500 (73.3451) lr 9.5173e-05 eta 0:39:08 +epoch [45/50] batch [360/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.5869 (1.0712) acc 56.2500 (73.2465) lr 9.5173e-05 eta 0:39:04 +epoch [45/50] batch [365/500] time 0.899 (0.888) data 0.000 (0.002) loss 0.7578 (1.0740) acc 81.2500 (73.2106) lr 9.5173e-05 eta 0:39:00 +epoch [45/50] batch [370/500] time 0.903 (0.888) data 0.000 (0.002) loss 0.7617 (1.0735) acc 81.2500 (73.2348) lr 9.5173e-05 eta 0:38:56 +epoch [45/50] batch [375/500] time 0.886 (0.888) data 0.000 (0.002) loss 0.6987 (1.0728) acc 75.0000 (73.2667) lr 9.5173e-05 eta 0:38:51 +epoch [45/50] batch [380/500] time 0.882 (0.888) data 0.000 (0.002) loss 1.3623 (1.0737) acc 65.6250 (73.2648) lr 9.5173e-05 eta 0:38:46 +epoch [45/50] batch [385/500] time 0.892 (0.888) data 0.000 (0.002) loss 0.7451 (1.0721) acc 81.2500 (73.2955) lr 9.5173e-05 eta 0:38:42 +epoch [45/50] batch [390/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.0215 (1.0752) acc 75.0000 (73.2452) lr 9.5173e-05 eta 0:38:37 +epoch [45/50] batch [395/500] time 0.907 (0.888) data 0.000 (0.002) loss 0.9624 (1.0744) acc 68.7500 (73.2278) lr 9.5173e-05 eta 0:38:33 +epoch [45/50] batch [400/500] time 0.875 (0.888) data 0.000 (0.002) loss 0.6733 (1.0719) acc 81.2500 (73.2656) lr 9.5173e-05 eta 0:38:29 +epoch [45/50] batch [405/500] time 0.893 (0.888) data 0.000 (0.002) loss 0.7944 (1.0701) acc 75.0000 (73.2948) lr 9.5173e-05 eta 0:38:24 +epoch [45/50] batch [410/500] time 0.881 (0.888) data 0.000 (0.002) loss 1.2793 (1.0704) acc 62.5000 (73.2927) lr 9.5173e-05 eta 0:38:20 +epoch [45/50] batch [415/500] time 0.872 (0.888) data 0.000 (0.002) loss 0.8623 (1.0681) acc 75.0000 (73.3358) lr 9.5173e-05 eta 0:38:16 +epoch [45/50] batch [420/500] time 0.896 (0.888) data 0.000 (0.002) loss 1.0469 (1.0674) acc 71.8750 (73.3259) lr 9.5173e-05 eta 0:38:11 +epoch [45/50] batch [425/500] time 0.842 (0.888) data 0.000 (0.002) loss 0.8599 (1.0663) acc 78.1250 (73.3382) lr 9.5173e-05 eta 0:38:06 +epoch [45/50] batch [430/500] time 0.913 (0.888) data 0.000 (0.002) loss 1.0771 (1.0658) acc 81.2500 (73.3794) lr 9.5173e-05 eta 0:38:02 +epoch [45/50] batch [435/500] time 0.874 (0.888) data 0.000 (0.002) loss 1.5635 (1.0647) acc 68.7500 (73.3980) lr 9.5173e-05 eta 0:37:57 +epoch [45/50] batch [440/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.5479 (1.0646) acc 62.5000 (73.4091) lr 9.5173e-05 eta 0:37:52 +epoch [45/50] batch [445/500] time 0.896 (0.888) data 0.000 (0.002) loss 0.4731 (1.0623) acc 87.5000 (73.4129) lr 9.5173e-05 eta 0:37:48 +epoch [45/50] batch [450/500] time 0.887 (0.888) data 0.000 (0.002) loss 0.5991 (1.0601) acc 78.1250 (73.4375) lr 9.5173e-05 eta 0:37:44 +epoch [45/50] batch [455/500] time 0.917 (0.888) data 0.000 (0.002) loss 1.1846 (1.0571) acc 71.8750 (73.5302) lr 9.5173e-05 eta 0:37:39 +epoch [45/50] batch [460/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.0977 (1.0578) acc 71.8750 (73.5190) lr 9.5173e-05 eta 0:37:35 +epoch [45/50] batch [465/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.8154 (1.0574) acc 56.2500 (73.5081) lr 9.5173e-05 eta 0:37:30 +epoch [45/50] batch [470/500] time 0.863 (0.888) data 0.000 (0.002) loss 0.8462 (1.0550) acc 81.2500 (73.5505) lr 9.5173e-05 eta 0:37:26 +epoch [45/50] batch [475/500] time 0.881 (0.888) data 0.000 (0.002) loss 0.8760 (1.0535) acc 75.0000 (73.5592) lr 9.5173e-05 eta 0:37:21 +epoch [45/50] batch [480/500] time 0.906 (0.888) data 0.000 (0.002) loss 1.1631 (1.0544) acc 65.6250 (73.5547) lr 9.5173e-05 eta 0:37:17 +epoch [45/50] batch [485/500] time 0.851 (0.888) data 0.000 (0.002) loss 1.1533 (1.0556) acc 75.0000 (73.5503) lr 9.5173e-05 eta 0:37:12 +epoch [45/50] batch [490/500] time 0.862 (0.888) data 0.000 (0.002) loss 1.3467 (1.0570) acc 59.3750 (73.4758) lr 9.5173e-05 eta 0:37:08 +epoch [45/50] batch [495/500] time 0.896 (0.888) data 0.000 (0.002) loss 1.3633 (1.0558) acc 68.7500 (73.4975) lr 9.5173e-05 eta 0:37:03 +epoch [45/50] batch [500/500] time 0.882 (0.888) data 0.000 (0.002) loss 1.7256 (1.0599) acc 59.3750 (73.4188) lr 7.0224e-05 eta 0:36:59 +epoch [46/50] batch [5/500] time 0.905 (1.047) data 0.000 (0.153) loss 0.7402 (0.7970) acc 81.2500 (76.2500) lr 7.0224e-05 eta 0:43:32 +epoch [46/50] batch [10/500] time 0.898 (0.969) data 0.000 (0.077) loss 1.2197 (0.9218) acc 78.1250 (75.3125) lr 7.0224e-05 eta 0:40:12 +epoch [46/50] batch [15/500] time 0.859 (0.939) data 0.000 (0.051) loss 0.9160 (0.9370) acc 75.0000 (73.9583) lr 7.0224e-05 eta 0:38:52 +epoch [46/50] batch [20/500] time 0.887 (0.921) data 0.000 (0.038) loss 1.0010 (0.9815) acc 81.2500 (73.7500) lr 7.0224e-05 eta 0:38:03 +epoch [46/50] batch [25/500] time 0.898 (0.912) data 0.000 (0.031) loss 1.1318 (1.0513) acc 71.8750 (73.2500) lr 7.0224e-05 eta 0:37:37 +epoch [46/50] batch [30/500] time 0.859 (0.906) data 0.000 (0.026) loss 0.8657 (1.0788) acc 78.1250 (72.8125) lr 7.0224e-05 eta 0:37:18 +epoch [46/50] batch [35/500] time 0.881 (0.901) data 0.000 (0.022) loss 0.9282 (1.0519) acc 81.2500 (73.1250) lr 7.0224e-05 eta 0:37:01 +epoch [46/50] batch [40/500] time 0.881 (0.898) data 0.000 (0.019) loss 1.0752 (1.0757) acc 71.8750 (72.4219) lr 7.0224e-05 eta 0:36:48 +epoch [46/50] batch [45/500] time 0.888 (0.896) data 0.001 (0.017) loss 1.4990 (1.0687) acc 65.6250 (72.8472) lr 7.0224e-05 eta 0:36:39 +epoch [46/50] batch [50/500] time 0.904 (0.896) data 0.000 (0.016) loss 0.5815 (1.0640) acc 81.2500 (73.0625) lr 7.0224e-05 eta 0:36:34 +epoch [46/50] batch [55/500] time 0.913 (0.896) data 0.001 (0.014) loss 0.7495 (1.0530) acc 84.3750 (73.4659) lr 7.0224e-05 eta 0:36:30 +epoch [46/50] batch [60/500] time 0.900 (0.898) data 0.000 (0.013) loss 1.2842 (1.0614) acc 71.8750 (73.1250) lr 7.0224e-05 eta 0:36:31 +epoch [46/50] batch [65/500] time 0.883 (0.898) data 0.000 (0.012) loss 1.4219 (1.0835) acc 62.5000 (72.8846) lr 7.0224e-05 eta 0:36:27 +epoch [46/50] batch [70/500] time 0.896 (0.897) data 0.000 (0.011) loss 0.8618 (1.0738) acc 68.7500 (72.7232) lr 7.0224e-05 eta 0:36:20 +epoch [46/50] batch [75/500] time 0.889 (0.897) data 0.000 (0.010) loss 1.4375 (1.0732) acc 68.7500 (72.8333) lr 7.0224e-05 eta 0:36:14 +epoch [46/50] batch [80/500] time 0.886 (0.896) data 0.000 (0.010) loss 0.5317 (1.0696) acc 78.1250 (72.8125) lr 7.0224e-05 eta 0:36:07 +epoch [46/50] batch [85/500] time 0.905 (0.896) data 0.000 (0.009) loss 0.6816 (1.0637) acc 81.2500 (72.8309) lr 7.0224e-05 eta 0:36:03 +epoch [46/50] batch [90/500] time 0.899 (0.895) data 0.000 (0.009) loss 0.8643 (1.0649) acc 75.0000 (72.8819) lr 7.0224e-05 eta 0:35:56 +epoch [46/50] batch [95/500] time 0.880 (0.894) data 0.000 (0.008) loss 1.1309 (1.0803) acc 75.0000 (72.4342) lr 7.0224e-05 eta 0:35:49 +epoch [46/50] batch [100/500] time 0.865 (0.893) data 0.000 (0.008) loss 1.1611 (1.0888) acc 65.6250 (72.0000) lr 7.0224e-05 eta 0:35:43 +epoch [46/50] batch [105/500] time 0.896 (0.895) data 0.001 (0.008) loss 1.1396 (1.0807) acc 71.8750 (72.4107) lr 7.0224e-05 eta 0:35:43 +epoch [46/50] batch [110/500] time 0.902 (0.894) data 0.000 (0.007) loss 0.5083 (1.0701) acc 78.1250 (72.5284) lr 7.0224e-05 eta 0:35:37 +epoch [46/50] batch [115/500] time 0.879 (0.894) data 0.000 (0.007) loss 1.3799 (1.0643) acc 62.5000 (72.5543) lr 7.0224e-05 eta 0:35:31 +epoch [46/50] batch [120/500] time 0.898 (0.893) data 0.000 (0.007) loss 1.0879 (1.0707) acc 75.0000 (72.5521) lr 7.0224e-05 eta 0:35:26 +epoch [46/50] batch [125/500] time 0.912 (0.893) data 0.000 (0.006) loss 0.6758 (1.0651) acc 84.3750 (72.6750) lr 7.0224e-05 eta 0:35:21 +epoch [46/50] batch [130/500] time 0.900 (0.893) data 0.000 (0.006) loss 1.0732 (1.0543) acc 68.7500 (72.8846) lr 7.0224e-05 eta 0:35:16 +epoch [46/50] batch [135/500] time 0.885 (0.893) data 0.000 (0.006) loss 1.0654 (1.0613) acc 87.5000 (72.8241) lr 7.0224e-05 eta 0:35:11 +epoch [46/50] batch [140/500] time 0.899 (0.892) data 0.000 (0.006) loss 1.0293 (1.0593) acc 71.8750 (72.7232) lr 7.0224e-05 eta 0:35:06 +epoch [46/50] batch [145/500] time 0.872 (0.892) data 0.000 (0.006) loss 0.5864 (1.0570) acc 84.3750 (72.8879) lr 7.0224e-05 eta 0:35:00 +epoch [46/50] batch [150/500] time 0.866 (0.892) data 0.000 (0.005) loss 0.6484 (1.0501) acc 87.5000 (73.1042) lr 7.0224e-05 eta 0:34:55 +epoch [46/50] batch [155/500] time 0.883 (0.891) data 0.000 (0.005) loss 0.9204 (1.0456) acc 75.0000 (73.2056) lr 7.0224e-05 eta 0:34:50 +epoch [46/50] batch [160/500] time 0.875 (0.891) data 0.000 (0.005) loss 1.3799 (1.0528) acc 59.3750 (73.0859) lr 7.0224e-05 eta 0:34:44 +epoch [46/50] batch [165/500] time 0.839 (0.891) data 0.000 (0.005) loss 0.8872 (1.0521) acc 78.1250 (73.1061) lr 7.0224e-05 eta 0:34:39 +epoch [46/50] batch [170/500] time 0.892 (0.890) data 0.000 (0.005) loss 0.8530 (1.0475) acc 81.2500 (73.1066) lr 7.0224e-05 eta 0:34:33 +epoch [46/50] batch [175/500] time 0.896 (0.890) data 0.000 (0.005) loss 1.3740 (1.0493) acc 59.3750 (73.0536) lr 7.0224e-05 eta 0:34:28 +epoch [46/50] batch [180/500] time 0.886 (0.889) data 0.000 (0.004) loss 0.7505 (1.0485) acc 84.3750 (73.1250) lr 7.0224e-05 eta 0:34:22 +epoch [46/50] batch [185/500] time 0.894 (0.889) data 0.000 (0.004) loss 0.7212 (1.0458) acc 84.3750 (73.2770) lr 7.0224e-05 eta 0:34:19 +epoch [46/50] batch [190/500] time 0.893 (0.889) data 0.000 (0.004) loss 0.7861 (1.0492) acc 71.8750 (73.2237) lr 7.0224e-05 eta 0:34:14 +epoch [46/50] batch [195/500] time 0.876 (0.889) data 0.000 (0.004) loss 1.1504 (1.0508) acc 71.8750 (73.1731) lr 7.0224e-05 eta 0:34:10 +epoch [46/50] batch [200/500] time 0.859 (0.889) data 0.000 (0.004) loss 1.6084 (1.0586) acc 65.6250 (72.9531) lr 7.0224e-05 eta 0:34:05 +epoch [46/50] batch [205/500] time 0.887 (0.890) data 0.000 (0.004) loss 0.8345 (1.0574) acc 75.0000 (72.9726) lr 7.0224e-05 eta 0:34:02 +epoch [46/50] batch [210/500] time 0.871 (0.890) data 0.000 (0.004) loss 1.0479 (1.0580) acc 62.5000 (72.8720) lr 7.0224e-05 eta 0:33:57 +epoch [46/50] batch [215/500] time 0.884 (0.889) data 0.000 (0.004) loss 0.7588 (1.0552) acc 75.0000 (72.8198) lr 7.0224e-05 eta 0:33:52 +epoch [46/50] batch [220/500] time 0.876 (0.889) data 0.000 (0.004) loss 0.5815 (1.0550) acc 81.2500 (72.9403) lr 7.0224e-05 eta 0:33:47 +epoch [46/50] batch [225/500] time 0.867 (0.889) data 0.000 (0.004) loss 0.7339 (1.0513) acc 81.2500 (73.0000) lr 7.0224e-05 eta 0:33:41 +epoch [46/50] batch [230/500] time 0.882 (0.889) data 0.000 (0.004) loss 0.8203 (1.0470) acc 71.8750 (73.0299) lr 7.0224e-05 eta 0:33:37 +epoch [46/50] batch [235/500] time 0.897 (0.889) data 0.000 (0.004) loss 1.1152 (1.0490) acc 65.6250 (72.9388) lr 7.0224e-05 eta 0:33:32 +epoch [46/50] batch [240/500] time 0.900 (0.888) data 0.000 (0.003) loss 0.5723 (1.0463) acc 87.5000 (72.9948) lr 7.0224e-05 eta 0:33:27 +epoch [46/50] batch [245/500] time 1.009 (0.889) data 0.000 (0.003) loss 0.8057 (1.0441) acc 78.1250 (73.0102) lr 7.0224e-05 eta 0:33:24 +epoch [46/50] batch [250/500] time 0.874 (0.889) data 0.001 (0.003) loss 0.8086 (1.0423) acc 78.1250 (73.0250) lr 7.0224e-05 eta 0:33:20 +epoch [46/50] batch [255/500] time 0.871 (0.889) data 0.000 (0.003) loss 1.3379 (1.0442) acc 71.8750 (73.0392) lr 7.0224e-05 eta 0:33:16 +epoch [46/50] batch [260/500] time 0.880 (0.889) data 0.000 (0.003) loss 0.9263 (1.0427) acc 78.1250 (73.0649) lr 7.0224e-05 eta 0:33:11 +epoch [46/50] batch [265/500] time 0.875 (0.889) data 0.000 (0.003) loss 1.2988 (1.0422) acc 71.8750 (73.1486) lr 7.0224e-05 eta 0:33:06 +epoch [46/50] batch [270/500] time 0.866 (0.889) data 0.000 (0.003) loss 0.9307 (1.0385) acc 78.1250 (73.2292) lr 7.0224e-05 eta 0:33:01 +epoch [46/50] batch [275/500] time 0.913 (0.889) data 0.000 (0.003) loss 0.8359 (1.0348) acc 75.0000 (73.3523) lr 7.0224e-05 eta 0:32:57 +epoch [46/50] batch [280/500] time 0.868 (0.889) data 0.000 (0.003) loss 0.9795 (1.0316) acc 71.8750 (73.4040) lr 7.0224e-05 eta 0:32:52 +epoch [46/50] batch [285/500] time 0.874 (0.888) data 0.000 (0.003) loss 0.7568 (1.0301) acc 81.2500 (73.4101) lr 7.0224e-05 eta 0:32:47 +epoch [46/50] batch [290/500] time 0.878 (0.888) data 0.000 (0.003) loss 1.1328 (1.0321) acc 71.8750 (73.3728) lr 7.0224e-05 eta 0:32:43 +epoch [46/50] batch [295/500] time 0.877 (0.889) data 0.000 (0.003) loss 0.8901 (1.0312) acc 71.8750 (73.3898) lr 7.0224e-05 eta 0:32:39 +epoch [46/50] batch [300/500] time 0.900 (0.888) data 0.000 (0.003) loss 0.8281 (1.0350) acc 71.8750 (73.2500) lr 7.0224e-05 eta 0:32:34 +epoch [46/50] batch [305/500] time 0.884 (0.889) data 0.000 (0.003) loss 1.2285 (1.0386) acc 71.8750 (73.1557) lr 7.0224e-05 eta 0:32:30 +epoch [46/50] batch [310/500] time 0.896 (0.888) data 0.000 (0.003) loss 0.5488 (1.0357) acc 81.2500 (73.2560) lr 7.0224e-05 eta 0:32:25 +epoch [46/50] batch [315/500] time 0.863 (0.888) data 0.000 (0.003) loss 0.6782 (1.0373) acc 68.7500 (73.2044) lr 7.0224e-05 eta 0:32:21 +epoch [46/50] batch [320/500] time 0.897 (0.889) data 0.000 (0.003) loss 0.9634 (1.0378) acc 71.8750 (73.2031) lr 7.0224e-05 eta 0:32:17 +epoch [46/50] batch [325/500] time 0.909 (0.889) data 0.000 (0.003) loss 1.1455 (1.0339) acc 68.7500 (73.2692) lr 7.0224e-05 eta 0:32:12 +epoch [46/50] batch [330/500] time 0.920 (0.889) data 0.000 (0.003) loss 0.5630 (1.0335) acc 84.3750 (73.2670) lr 7.0224e-05 eta 0:32:08 +epoch [46/50] batch [335/500] time 0.897 (0.889) data 0.000 (0.003) loss 0.8359 (1.0326) acc 81.2500 (73.2556) lr 7.0224e-05 eta 0:32:03 +epoch [46/50] batch [340/500] time 0.899 (0.888) data 0.000 (0.003) loss 0.9478 (1.0351) acc 65.6250 (73.2445) lr 7.0224e-05 eta 0:31:58 +epoch [46/50] batch [345/500] time 0.914 (0.889) data 0.000 (0.002) loss 1.1191 (1.0343) acc 75.0000 (73.2337) lr 7.0224e-05 eta 0:31:55 +epoch [46/50] batch [350/500] time 0.876 (0.889) data 0.000 (0.002) loss 0.9795 (1.0319) acc 78.1250 (73.3214) lr 7.0224e-05 eta 0:31:50 +epoch [46/50] batch [355/500] time 0.891 (0.889) data 0.000 (0.002) loss 0.6611 (1.0323) acc 84.3750 (73.3187) lr 7.0224e-05 eta 0:31:46 +epoch [46/50] batch [360/500] time 0.885 (0.889) data 0.000 (0.002) loss 1.3594 (1.0319) acc 65.6250 (73.3767) lr 7.0224e-05 eta 0:31:42 +epoch [46/50] batch [365/500] time 0.867 (0.889) data 0.000 (0.002) loss 1.1523 (1.0308) acc 75.0000 (73.4846) lr 7.0224e-05 eta 0:31:37 +epoch [46/50] batch [370/500] time 0.864 (0.889) data 0.000 (0.002) loss 0.7520 (1.0287) acc 75.0000 (73.5304) lr 7.0224e-05 eta 0:31:33 +epoch [46/50] batch [375/500] time 0.890 (0.889) data 0.000 (0.002) loss 0.9102 (1.0288) acc 78.1250 (73.5083) lr 7.0224e-05 eta 0:31:28 +epoch [46/50] batch [380/500] time 0.919 (0.889) data 0.000 (0.002) loss 1.2041 (1.0282) acc 75.0000 (73.5444) lr 7.0224e-05 eta 0:31:24 +epoch [46/50] batch [385/500] time 0.847 (0.889) data 0.000 (0.002) loss 0.5024 (1.0236) acc 87.5000 (73.6851) lr 7.0224e-05 eta 0:31:19 +epoch [46/50] batch [390/500] time 0.897 (0.889) data 0.000 (0.002) loss 0.9023 (1.0230) acc 78.1250 (73.6779) lr 7.0224e-05 eta 0:31:16 +epoch [46/50] batch [395/500] time 0.912 (0.889) data 0.000 (0.002) loss 1.1201 (1.0226) acc 71.8750 (73.7104) lr 7.0224e-05 eta 0:31:12 +epoch [46/50] batch [400/500] time 0.884 (0.889) data 0.000 (0.002) loss 1.2686 (1.0232) acc 71.8750 (73.7109) lr 7.0224e-05 eta 0:31:07 +epoch [46/50] batch [405/500] time 0.898 (0.889) data 0.000 (0.002) loss 0.7109 (1.0244) acc 81.2500 (73.7191) lr 7.0224e-05 eta 0:31:03 +epoch [46/50] batch [410/500] time 0.879 (0.889) data 0.000 (0.002) loss 1.1533 (1.0265) acc 75.0000 (73.7195) lr 7.0224e-05 eta 0:30:58 +epoch [46/50] batch [415/500] time 0.862 (0.889) data 0.000 (0.002) loss 1.1787 (1.0256) acc 75.0000 (73.7500) lr 7.0224e-05 eta 0:30:54 +epoch [46/50] batch [420/500] time 0.909 (0.889) data 0.000 (0.002) loss 1.0469 (1.0245) acc 81.2500 (73.8170) lr 7.0224e-05 eta 0:30:49 +epoch [46/50] batch [425/500] time 0.864 (0.889) data 0.000 (0.002) loss 1.0879 (1.0259) acc 71.8750 (73.7721) lr 7.0224e-05 eta 0:30:44 +epoch [46/50] batch [430/500] time 0.873 (0.889) data 0.000 (0.002) loss 1.0400 (1.0287) acc 68.7500 (73.7282) lr 7.0224e-05 eta 0:30:39 +epoch [46/50] batch [435/500] time 0.861 (0.889) data 0.000 (0.002) loss 1.2500 (1.0309) acc 71.8750 (73.6853) lr 7.0224e-05 eta 0:30:35 +epoch [46/50] batch [440/500] time 0.862 (0.889) data 0.000 (0.002) loss 1.0166 (1.0333) acc 78.1250 (73.6151) lr 7.0224e-05 eta 0:30:30 +epoch [46/50] batch [445/500] time 0.905 (0.889) data 0.000 (0.002) loss 1.4355 (1.0329) acc 65.6250 (73.6166) lr 7.0224e-05 eta 0:30:26 +epoch [46/50] batch [450/500] time 0.868 (0.888) data 0.000 (0.002) loss 1.1445 (1.0329) acc 75.0000 (73.6667) lr 7.0224e-05 eta 0:30:21 +epoch [46/50] batch [455/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.1426 (1.0340) acc 75.0000 (73.6470) lr 7.0224e-05 eta 0:30:16 +epoch [46/50] batch [460/500] time 0.862 (0.888) data 0.000 (0.002) loss 1.3037 (1.0328) acc 68.7500 (73.6821) lr 7.0224e-05 eta 0:30:11 +epoch [46/50] batch [465/500] time 0.899 (0.888) data 0.000 (0.002) loss 0.9336 (1.0350) acc 84.3750 (73.6626) lr 7.0224e-05 eta 0:30:07 +epoch [46/50] batch [470/500] time 0.858 (0.888) data 0.000 (0.002) loss 1.1133 (1.0339) acc 75.0000 (73.7168) lr 7.0224e-05 eta 0:30:03 +epoch [46/50] batch [475/500] time 0.894 (0.888) data 0.000 (0.002) loss 0.9097 (1.0333) acc 78.1250 (73.7237) lr 7.0224e-05 eta 0:29:58 +epoch [46/50] batch [480/500] time 0.921 (0.888) data 0.000 (0.002) loss 0.7886 (1.0328) acc 78.1250 (73.7305) lr 7.0224e-05 eta 0:29:54 +epoch [46/50] batch [485/500] time 0.892 (0.888) data 0.000 (0.002) loss 0.4854 (1.0326) acc 78.1250 (73.7178) lr 7.0224e-05 eta 0:29:49 +epoch [46/50] batch [490/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.2188 (1.0358) acc 78.1250 (73.6798) lr 7.0224e-05 eta 0:29:45 +epoch [46/50] batch [495/500] time 0.889 (0.888) data 0.000 (0.002) loss 1.1055 (1.0364) acc 68.7500 (73.6174) lr 7.0224e-05 eta 0:29:40 +epoch [46/50] batch [500/500] time 0.854 (0.888) data 0.000 (0.002) loss 1.4775 (1.0392) acc 68.7500 (73.5625) lr 4.8943e-05 eta 0:29:36 +epoch [47/50] batch [5/500] time 0.865 (1.030) data 0.000 (0.145) loss 0.7412 (0.8224) acc 84.3750 (80.0000) lr 4.8943e-05 eta 0:34:15 +epoch [47/50] batch [10/500] time 0.866 (0.953) data 0.000 (0.073) loss 1.1475 (0.8718) acc 75.0000 (78.7500) lr 4.8943e-05 eta 0:31:37 +epoch [47/50] batch [15/500] time 0.873 (0.929) data 0.000 (0.049) loss 1.1719 (0.8863) acc 71.8750 (77.9167) lr 4.8943e-05 eta 0:30:44 +epoch [47/50] batch [20/500] time 0.857 (0.915) data 0.000 (0.037) loss 1.1914 (0.9131) acc 71.8750 (78.2812) lr 4.8943e-05 eta 0:30:12 +epoch [47/50] batch [25/500] time 0.874 (0.909) data 0.000 (0.029) loss 1.2188 (0.9780) acc 65.6250 (76.7500) lr 4.8943e-05 eta 0:29:55 +epoch [47/50] batch [30/500] time 0.924 (0.908) data 0.000 (0.024) loss 1.3633 (1.0074) acc 65.6250 (76.3542) lr 4.8943e-05 eta 0:29:48 +epoch [47/50] batch [35/500] time 0.899 (0.906) data 0.000 (0.021) loss 0.7568 (1.0303) acc 81.2500 (75.5357) lr 4.8943e-05 eta 0:29:39 +epoch [47/50] batch [40/500] time 0.874 (0.902) data 0.000 (0.018) loss 1.3535 (1.0471) acc 65.6250 (75.2344) lr 4.8943e-05 eta 0:29:28 +epoch [47/50] batch [45/500] time 0.897 (0.902) data 0.000 (0.016) loss 0.7656 (1.0393) acc 81.2500 (75.1389) lr 4.8943e-05 eta 0:29:24 +epoch [47/50] batch [50/500] time 0.902 (0.902) data 0.000 (0.015) loss 0.9980 (1.0499) acc 68.7500 (74.7500) lr 4.8943e-05 eta 0:29:19 +epoch [47/50] batch [55/500] time 0.902 (0.901) data 0.000 (0.013) loss 0.4907 (1.0503) acc 78.1250 (74.5455) lr 4.8943e-05 eta 0:29:11 +epoch [47/50] batch [60/500] time 0.873 (0.898) data 0.000 (0.012) loss 1.2295 (1.0645) acc 68.7500 (74.0625) lr 4.8943e-05 eta 0:29:02 +epoch [47/50] batch [65/500] time 0.878 (0.898) data 0.000 (0.011) loss 0.7041 (1.0630) acc 78.1250 (73.8462) lr 4.8943e-05 eta 0:28:57 +epoch [47/50] batch [70/500] time 0.885 (0.897) data 0.000 (0.011) loss 0.7461 (1.0619) acc 75.0000 (73.7946) lr 4.8943e-05 eta 0:28:51 +epoch [47/50] batch [75/500] time 0.883 (0.896) data 0.000 (0.010) loss 0.6079 (1.0594) acc 84.3750 (74.0417) lr 4.8943e-05 eta 0:28:44 +epoch [47/50] batch [80/500] time 0.865 (0.895) data 0.000 (0.009) loss 1.1279 (1.0403) acc 75.0000 (74.4141) lr 4.8943e-05 eta 0:28:38 +epoch [47/50] batch [85/500] time 0.991 (0.895) data 0.000 (0.009) loss 1.0176 (1.0440) acc 78.1250 (74.1544) lr 4.8943e-05 eta 0:28:34 +epoch [47/50] batch [90/500] time 0.880 (0.895) data 0.000 (0.008) loss 1.6455 (1.0452) acc 68.7500 (74.2708) lr 4.8943e-05 eta 0:28:29 +epoch [47/50] batch [95/500] time 0.911 (0.894) data 0.001 (0.008) loss 0.7227 (1.0361) acc 75.0000 (74.0789) lr 4.8943e-05 eta 0:28:23 +epoch [47/50] batch [100/500] time 0.896 (0.895) data 0.000 (0.007) loss 1.2979 (1.0277) acc 59.3750 (74.2188) lr 4.8943e-05 eta 0:28:19 +epoch [47/50] batch [105/500] time 0.912 (0.894) data 0.000 (0.007) loss 1.1055 (1.0269) acc 71.8750 (74.1071) lr 4.8943e-05 eta 0:28:14 +epoch [47/50] batch [110/500] time 0.904 (0.894) data 0.000 (0.007) loss 1.1377 (1.0234) acc 65.6250 (74.0625) lr 4.8943e-05 eta 0:28:10 +epoch [47/50] batch [115/500] time 0.874 (0.894) data 0.000 (0.007) loss 1.0615 (1.0345) acc 78.1250 (73.9130) lr 4.8943e-05 eta 0:28:05 +epoch [47/50] batch [120/500] time 0.868 (0.894) data 0.000 (0.006) loss 1.3008 (1.0374) acc 65.6250 (73.9583) lr 4.8943e-05 eta 0:28:00 +epoch [47/50] batch [125/500] time 0.920 (0.894) data 0.000 (0.006) loss 1.1123 (1.0375) acc 71.8750 (74.0500) lr 4.8943e-05 eta 0:27:55 +epoch [47/50] batch [130/500] time 0.889 (0.893) data 0.000 (0.006) loss 1.0303 (1.0372) acc 81.2500 (74.1106) lr 4.8943e-05 eta 0:27:50 +epoch [47/50] batch [135/500] time 0.889 (0.893) data 0.000 (0.006) loss 0.9375 (1.0420) acc 78.1250 (74.0741) lr 4.8943e-05 eta 0:27:44 +epoch [47/50] batch [140/500] time 0.906 (0.893) data 0.000 (0.005) loss 1.0010 (1.0439) acc 78.1250 (73.9732) lr 4.8943e-05 eta 0:27:40 +epoch [47/50] batch [145/500] time 0.840 (0.893) data 0.000 (0.005) loss 1.1416 (1.0424) acc 75.0000 (74.0086) lr 4.8943e-05 eta 0:27:35 +epoch [47/50] batch [150/500] time 0.863 (0.892) data 0.000 (0.005) loss 1.2334 (1.0493) acc 75.0000 (73.8542) lr 4.8943e-05 eta 0:27:30 +epoch [47/50] batch [155/500] time 0.890 (0.892) data 0.000 (0.005) loss 0.6201 (1.0477) acc 84.3750 (73.8508) lr 4.8943e-05 eta 0:27:26 +epoch [47/50] batch [160/500] time 0.902 (0.892) data 0.000 (0.005) loss 1.1270 (1.0516) acc 65.6250 (73.7500) lr 4.8943e-05 eta 0:27:21 +epoch [47/50] batch [165/500] time 0.912 (0.892) data 0.000 (0.005) loss 1.4541 (1.0572) acc 68.7500 (73.4848) lr 4.8943e-05 eta 0:27:16 +epoch [47/50] batch [170/500] time 0.900 (0.892) data 0.000 (0.004) loss 1.0459 (1.0586) acc 65.6250 (73.3824) lr 4.8943e-05 eta 0:27:13 +epoch [47/50] batch [175/500] time 0.860 (0.892) data 0.000 (0.004) loss 0.9580 (1.0609) acc 75.0000 (73.3929) lr 4.8943e-05 eta 0:27:08 +epoch [47/50] batch [180/500] time 0.887 (0.892) data 0.000 (0.004) loss 1.8164 (1.0574) acc 50.0000 (73.3507) lr 4.8943e-05 eta 0:27:03 +epoch [47/50] batch [185/500] time 0.914 (0.893) data 0.000 (0.004) loss 0.9658 (1.0555) acc 81.2500 (73.4122) lr 4.8943e-05 eta 0:27:00 +epoch [47/50] batch [190/500] time 0.881 (0.892) data 0.000 (0.004) loss 0.8564 (1.0485) acc 75.0000 (73.6020) lr 4.8943e-05 eta 0:26:55 +epoch [47/50] batch [195/500] time 0.900 (0.892) data 0.001 (0.004) loss 1.0654 (1.0496) acc 71.8750 (73.5096) lr 4.8943e-05 eta 0:26:50 +epoch [47/50] batch [200/500] time 0.887 (0.892) data 0.000 (0.004) loss 0.8765 (1.0462) acc 84.3750 (73.6250) lr 4.8943e-05 eta 0:26:45 +epoch [47/50] batch [205/500] time 0.900 (0.892) data 0.000 (0.004) loss 0.4343 (1.0435) acc 87.5000 (73.6433) lr 4.8943e-05 eta 0:26:41 +epoch [47/50] batch [210/500] time 0.892 (0.892) data 0.000 (0.004) loss 1.4473 (1.0450) acc 71.8750 (73.6310) lr 4.8943e-05 eta 0:26:36 +epoch [47/50] batch [215/500] time 0.897 (0.892) data 0.000 (0.004) loss 0.7900 (1.0448) acc 78.1250 (73.6483) lr 4.8943e-05 eta 0:26:32 +epoch [47/50] batch [220/500] time 0.885 (0.892) data 0.000 (0.004) loss 0.7119 (1.0435) acc 84.3750 (73.6648) lr 4.8943e-05 eta 0:26:27 +epoch [47/50] batch [225/500] time 0.888 (0.892) data 0.000 (0.003) loss 0.6851 (1.0446) acc 81.2500 (73.6111) lr 4.8943e-05 eta 0:26:22 +epoch [47/50] batch [230/500] time 0.899 (0.892) data 0.000 (0.003) loss 1.0586 (1.0442) acc 84.3750 (73.7228) lr 4.8943e-05 eta 0:26:18 +epoch [47/50] batch [235/500] time 0.875 (0.892) data 0.000 (0.003) loss 0.5352 (1.0376) acc 87.5000 (73.8564) lr 4.8943e-05 eta 0:26:14 +epoch [47/50] batch [240/500] time 0.909 (0.892) data 0.000 (0.003) loss 0.6846 (1.0412) acc 81.2500 (73.8021) lr 4.8943e-05 eta 0:26:09 +epoch [47/50] batch [245/500] time 0.884 (0.891) data 0.000 (0.003) loss 1.4033 (1.0442) acc 62.5000 (73.7628) lr 4.8943e-05 eta 0:26:04 +epoch [47/50] batch [250/500] time 0.869 (0.891) data 0.000 (0.003) loss 1.3828 (1.0440) acc 56.2500 (73.7625) lr 4.8943e-05 eta 0:25:59 +epoch [47/50] batch [255/500] time 0.889 (0.891) data 0.000 (0.003) loss 0.7935 (1.0463) acc 75.0000 (73.7255) lr 4.8943e-05 eta 0:25:55 +epoch [47/50] batch [260/500] time 0.877 (0.891) data 0.000 (0.003) loss 1.0557 (1.0464) acc 75.0000 (73.7139) lr 4.8943e-05 eta 0:25:50 +epoch [47/50] batch [265/500] time 0.892 (0.891) data 0.000 (0.003) loss 0.7485 (1.0480) acc 78.1250 (73.5849) lr 4.8943e-05 eta 0:25:45 +epoch [47/50] batch [270/500] time 0.875 (0.891) data 0.000 (0.003) loss 1.1621 (1.0432) acc 71.8750 (73.7153) lr 4.8943e-05 eta 0:25:40 +epoch [47/50] batch [275/500] time 0.858 (0.890) data 0.000 (0.003) loss 0.8545 (1.0424) acc 68.7500 (73.7159) lr 4.8943e-05 eta 0:25:36 +epoch [47/50] batch [280/500] time 0.888 (0.891) data 0.000 (0.003) loss 1.4277 (1.0441) acc 65.6250 (73.6384) lr 4.8943e-05 eta 0:25:31 +epoch [47/50] batch [285/500] time 0.908 (0.891) data 0.000 (0.003) loss 0.6978 (1.0425) acc 81.2500 (73.6294) lr 4.8943e-05 eta 0:25:27 +epoch [47/50] batch [290/500] time 0.896 (0.891) data 0.000 (0.003) loss 1.3672 (1.0458) acc 68.7500 (73.6207) lr 4.8943e-05 eta 0:25:22 +epoch [47/50] batch [295/500] time 0.872 (0.890) data 0.000 (0.003) loss 1.1328 (1.0448) acc 65.6250 (73.6441) lr 4.8943e-05 eta 0:25:18 +epoch [47/50] batch [300/500] time 0.868 (0.890) data 0.000 (0.003) loss 1.1641 (1.0453) acc 75.0000 (73.6667) lr 4.8943e-05 eta 0:25:13 +epoch [47/50] batch [305/500] time 0.853 (0.890) data 0.000 (0.003) loss 0.8877 (1.0468) acc 81.2500 (73.5861) lr 4.8943e-05 eta 0:25:08 +epoch [47/50] batch [310/500] time 0.888 (0.890) data 0.000 (0.003) loss 1.6650 (1.0471) acc 59.3750 (73.5685) lr 4.8943e-05 eta 0:25:04 +epoch [47/50] batch [315/500] time 0.883 (0.890) data 0.000 (0.003) loss 1.7607 (1.0513) acc 65.6250 (73.5020) lr 4.8943e-05 eta 0:25:00 +epoch [47/50] batch [320/500] time 0.875 (0.890) data 0.000 (0.003) loss 1.0527 (1.0500) acc 65.6250 (73.4863) lr 4.8943e-05 eta 0:24:55 +epoch [47/50] batch [325/500] time 1.017 (0.891) data 0.000 (0.002) loss 1.2178 (1.0513) acc 68.7500 (73.4904) lr 4.8943e-05 eta 0:24:52 +epoch [47/50] batch [330/500] time 0.932 (0.891) data 0.000 (0.002) loss 1.3428 (1.0535) acc 65.6250 (73.4754) lr 4.8943e-05 eta 0:24:47 +epoch [47/50] batch [335/500] time 0.916 (0.891) data 0.000 (0.002) loss 1.0771 (1.0522) acc 71.8750 (73.5354) lr 4.8943e-05 eta 0:24:43 +epoch [47/50] batch [340/500] time 0.872 (0.891) data 0.000 (0.002) loss 1.4863 (1.0524) acc 62.5000 (73.5662) lr 4.8943e-05 eta 0:24:38 +epoch [47/50] batch [345/500] time 0.867 (0.891) data 0.000 (0.002) loss 1.1152 (1.0525) acc 71.8750 (73.5870) lr 4.8943e-05 eta 0:24:34 +epoch [47/50] batch [350/500] time 0.885 (0.891) data 0.000 (0.002) loss 0.7280 (1.0521) acc 78.1250 (73.5982) lr 4.8943e-05 eta 0:24:29 +epoch [47/50] batch [355/500] time 0.857 (0.890) data 0.000 (0.002) loss 0.8896 (1.0515) acc 78.1250 (73.5299) lr 4.8943e-05 eta 0:24:24 +epoch [47/50] batch [360/500] time 0.905 (0.890) data 0.000 (0.002) loss 1.0791 (1.0518) acc 75.0000 (73.5156) lr 4.8943e-05 eta 0:24:19 +epoch [47/50] batch [365/500] time 0.877 (0.890) data 0.000 (0.002) loss 1.1758 (1.0509) acc 71.8750 (73.5788) lr 4.8943e-05 eta 0:24:15 +epoch [47/50] batch [370/500] time 0.879 (0.890) data 0.000 (0.002) loss 1.7852 (1.0504) acc 62.5000 (73.6064) lr 4.8943e-05 eta 0:24:11 +epoch [47/50] batch [375/500] time 0.922 (0.890) data 0.000 (0.002) loss 1.8125 (1.0502) acc 59.3750 (73.6333) lr 4.8943e-05 eta 0:24:06 +epoch [47/50] batch [380/500] time 0.911 (0.891) data 0.000 (0.002) loss 0.8530 (1.0505) acc 75.0000 (73.6513) lr 4.8943e-05 eta 0:24:02 +epoch [47/50] batch [385/500] time 0.876 (0.890) data 0.000 (0.002) loss 0.8511 (1.0486) acc 75.0000 (73.7013) lr 4.8943e-05 eta 0:23:58 +epoch [47/50] batch [390/500] time 0.893 (0.890) data 0.000 (0.002) loss 1.8252 (1.0473) acc 62.5000 (73.7260) lr 4.8943e-05 eta 0:23:53 +epoch [47/50] batch [395/500] time 0.875 (0.891) data 0.000 (0.002) loss 1.3633 (1.0481) acc 71.8750 (73.6946) lr 4.8943e-05 eta 0:23:49 +epoch [47/50] batch [400/500] time 0.904 (0.891) data 0.000 (0.002) loss 0.6177 (1.0472) acc 81.2500 (73.6953) lr 4.8943e-05 eta 0:23:45 +epoch [47/50] batch [405/500] time 0.871 (0.890) data 0.000 (0.002) loss 1.3154 (1.0470) acc 65.6250 (73.6883) lr 4.8943e-05 eta 0:23:40 +epoch [47/50] batch [410/500] time 0.904 (0.890) data 0.000 (0.002) loss 0.7178 (1.0480) acc 75.0000 (73.6738) lr 4.8943e-05 eta 0:23:35 +epoch [47/50] batch [415/500] time 0.858 (0.890) data 0.000 (0.002) loss 1.2588 (1.0481) acc 84.3750 (73.7123) lr 4.8943e-05 eta 0:23:31 +epoch [47/50] batch [420/500] time 0.873 (0.890) data 0.000 (0.002) loss 0.8062 (1.0491) acc 75.0000 (73.6979) lr 4.8943e-05 eta 0:23:26 +epoch [47/50] batch [425/500] time 0.855 (0.890) data 0.000 (0.002) loss 1.4873 (1.0489) acc 71.8750 (73.7132) lr 4.8943e-05 eta 0:23:21 +epoch [47/50] batch [430/500] time 0.893 (0.890) data 0.000 (0.002) loss 1.6318 (1.0501) acc 56.2500 (73.6628) lr 4.8943e-05 eta 0:23:17 +epoch [47/50] batch [435/500] time 0.907 (0.890) data 0.000 (0.002) loss 0.7637 (1.0489) acc 87.5000 (73.7213) lr 4.8943e-05 eta 0:23:12 +epoch [47/50] batch [440/500] time 0.897 (0.890) data 0.000 (0.002) loss 0.9678 (1.0481) acc 65.6250 (73.7074) lr 4.8943e-05 eta 0:23:08 +epoch [47/50] batch [445/500] time 0.884 (0.890) data 0.000 (0.002) loss 1.0059 (1.0504) acc 68.7500 (73.6587) lr 4.8943e-05 eta 0:23:03 +epoch [47/50] batch [450/500] time 0.907 (0.890) data 0.000 (0.002) loss 1.0225 (1.0470) acc 71.8750 (73.7222) lr 4.8943e-05 eta 0:22:59 +epoch [47/50] batch [455/500] time 0.864 (0.890) data 0.000 (0.002) loss 1.1836 (1.0477) acc 65.6250 (73.7019) lr 4.8943e-05 eta 0:22:55 +epoch [47/50] batch [460/500] time 0.888 (0.890) data 0.000 (0.002) loss 0.7803 (1.0462) acc 75.0000 (73.7364) lr 4.8943e-05 eta 0:22:50 +epoch [47/50] batch [465/500] time 0.911 (0.890) data 0.000 (0.002) loss 0.9258 (1.0446) acc 71.8750 (73.7836) lr 4.8943e-05 eta 0:22:46 +epoch [47/50] batch [470/500] time 0.886 (0.890) data 0.000 (0.002) loss 1.0332 (1.0446) acc 75.0000 (73.7965) lr 4.8943e-05 eta 0:22:41 +epoch [47/50] batch [475/500] time 0.897 (0.890) data 0.000 (0.002) loss 1.4346 (1.0456) acc 62.5000 (73.7961) lr 4.8943e-05 eta 0:22:37 +epoch [47/50] batch [480/500] time 0.886 (0.890) data 0.000 (0.002) loss 1.5254 (1.0463) acc 65.6250 (73.7826) lr 4.8943e-05 eta 0:22:32 +epoch [47/50] batch [485/500] time 0.905 (0.890) data 0.001 (0.002) loss 1.1514 (1.0469) acc 68.7500 (73.7500) lr 4.8943e-05 eta 0:22:28 +epoch [47/50] batch [490/500] time 0.900 (0.890) data 0.000 (0.002) loss 1.2422 (1.0484) acc 78.1250 (73.7500) lr 4.8943e-05 eta 0:22:24 +epoch [47/50] batch [495/500] time 0.872 (0.890) data 0.000 (0.002) loss 1.6113 (1.0488) acc 62.5000 (73.7311) lr 4.8943e-05 eta 0:22:19 +epoch [47/50] batch [500/500] time 0.870 (0.890) data 0.000 (0.002) loss 1.1035 (1.0494) acc 68.7500 (73.7313) lr 3.1417e-05 eta 0:22:14 +epoch [48/50] batch [5/500] time 0.881 (1.027) data 0.000 (0.145) loss 1.3623 (1.2314) acc 62.5000 (69.3750) lr 3.1417e-05 eta 0:25:35 +epoch [48/50] batch [10/500] time 0.906 (0.953) data 0.000 (0.073) loss 1.0732 (1.1185) acc 78.1250 (70.6250) lr 3.1417e-05 eta 0:23:39 +epoch [48/50] batch [15/500] time 0.881 (0.930) data 0.000 (0.048) loss 1.3545 (1.0766) acc 62.5000 (71.2500) lr 3.1417e-05 eta 0:23:01 +epoch [48/50] batch [20/500] time 0.874 (0.929) data 0.000 (0.036) loss 0.9741 (1.0630) acc 78.1250 (71.8750) lr 3.1417e-05 eta 0:22:54 +epoch [48/50] batch [25/500] time 0.927 (0.921) data 0.000 (0.029) loss 0.8306 (1.0104) acc 78.1250 (73.0000) lr 3.1417e-05 eta 0:22:38 +epoch [48/50] batch [30/500] time 0.887 (0.913) data 0.000 (0.024) loss 0.9531 (1.0499) acc 75.0000 (72.5000) lr 3.1417e-05 eta 0:22:22 +epoch [48/50] batch [35/500] time 0.919 (0.911) data 0.000 (0.021) loss 0.8726 (1.0410) acc 81.2500 (73.2143) lr 3.1417e-05 eta 0:22:15 +epoch [48/50] batch [40/500] time 0.851 (0.907) data 0.000 (0.018) loss 1.5557 (1.0358) acc 59.3750 (73.5938) lr 3.1417e-05 eta 0:22:04 +epoch [48/50] batch [45/500] time 0.901 (0.905) data 0.000 (0.016) loss 1.0596 (1.0542) acc 78.1250 (73.1250) lr 3.1417e-05 eta 0:21:56 +epoch [48/50] batch [50/500] time 0.874 (0.902) data 0.000 (0.015) loss 0.4624 (1.0486) acc 90.6250 (72.9375) lr 3.1417e-05 eta 0:21:48 +epoch [48/50] batch [55/500] time 0.884 (0.900) data 0.000 (0.013) loss 1.1182 (1.0432) acc 78.1250 (72.7841) lr 3.1417e-05 eta 0:21:40 +epoch [48/50] batch [60/500] time 0.996 (0.901) data 0.000 (0.012) loss 0.6484 (1.0433) acc 71.8750 (72.8125) lr 3.1417e-05 eta 0:21:37 +epoch [48/50] batch [65/500] time 0.889 (0.899) data 0.000 (0.011) loss 0.9004 (1.0422) acc 71.8750 (72.7404) lr 3.1417e-05 eta 0:21:30 +epoch [48/50] batch [70/500] time 0.913 (0.898) data 0.000 (0.011) loss 1.2070 (1.0383) acc 62.5000 (72.8571) lr 3.1417e-05 eta 0:21:24 +epoch [48/50] batch [75/500] time 0.882 (0.898) data 0.000 (0.010) loss 0.4485 (1.0239) acc 90.6250 (73.3333) lr 3.1417e-05 eta 0:21:19 +epoch [48/50] batch [80/500] time 0.870 (0.897) data 0.000 (0.009) loss 1.1973 (1.0242) acc 71.8750 (73.5547) lr 3.1417e-05 eta 0:21:13 +epoch [48/50] batch [85/500] time 0.858 (0.896) data 0.000 (0.009) loss 1.0371 (1.0141) acc 75.0000 (73.8971) lr 3.1417e-05 eta 0:21:07 +epoch [48/50] batch [90/500] time 0.879 (0.895) data 0.000 (0.008) loss 1.3936 (1.0290) acc 65.6250 (73.5764) lr 3.1417e-05 eta 0:21:01 +epoch [48/50] batch [95/500] time 0.888 (0.894) data 0.000 (0.008) loss 1.0361 (1.0310) acc 71.8750 (73.2895) lr 3.1417e-05 eta 0:20:56 +epoch [48/50] batch [100/500] time 0.849 (0.893) data 0.000 (0.007) loss 1.0586 (1.0258) acc 71.8750 (73.2500) lr 3.1417e-05 eta 0:20:50 +epoch [48/50] batch [105/500] time 0.886 (0.893) data 0.000 (0.007) loss 0.9722 (1.0200) acc 78.1250 (73.6012) lr 3.1417e-05 eta 0:20:45 +epoch [48/50] batch [110/500] time 0.881 (0.892) data 0.000 (0.007) loss 0.9600 (1.0131) acc 84.3750 (73.9489) lr 3.1417e-05 eta 0:20:39 +epoch [48/50] batch [115/500] time 0.839 (0.891) data 0.000 (0.007) loss 0.6255 (1.0086) acc 75.0000 (74.1304) lr 3.1417e-05 eta 0:20:33 +epoch [48/50] batch [120/500] time 0.860 (0.890) data 0.000 (0.006) loss 1.2490 (1.0055) acc 75.0000 (74.1406) lr 3.1417e-05 eta 0:20:28 +epoch [48/50] batch [125/500] time 0.908 (0.891) data 0.000 (0.006) loss 1.1387 (1.0124) acc 68.7500 (74.1250) lr 3.1417e-05 eta 0:20:25 +epoch [48/50] batch [130/500] time 0.870 (0.891) data 0.000 (0.006) loss 1.0703 (1.0192) acc 62.5000 (73.9423) lr 3.1417e-05 eta 0:20:20 +epoch [48/50] batch [135/500] time 0.854 (0.890) data 0.000 (0.006) loss 0.8442 (1.0235) acc 84.3750 (73.8889) lr 3.1417e-05 eta 0:20:15 +epoch [48/50] batch [140/500] time 0.884 (0.890) data 0.000 (0.005) loss 0.8174 (1.0272) acc 81.2500 (73.8170) lr 3.1417e-05 eta 0:20:10 +epoch [48/50] batch [145/500] time 0.907 (0.890) data 0.000 (0.005) loss 0.8164 (1.0208) acc 78.1250 (73.8793) lr 3.1417e-05 eta 0:20:06 +epoch [48/50] batch [150/500] time 0.853 (0.890) data 0.000 (0.005) loss 0.7744 (1.0177) acc 81.2500 (74.0417) lr 3.1417e-05 eta 0:20:00 +epoch [48/50] batch [155/500] time 0.902 (0.890) data 0.000 (0.005) loss 0.6455 (1.0156) acc 78.1250 (74.1129) lr 3.1417e-05 eta 0:19:56 +epoch [48/50] batch [160/500] time 0.887 (0.891) data 0.000 (0.005) loss 0.6201 (1.0135) acc 84.3750 (74.2578) lr 3.1417e-05 eta 0:19:53 +epoch [48/50] batch [165/500] time 0.914 (0.891) data 0.000 (0.005) loss 1.1084 (1.0129) acc 75.0000 (74.2992) lr 3.1417e-05 eta 0:19:49 +epoch [48/50] batch [170/500] time 0.868 (0.891) data 0.001 (0.005) loss 0.8970 (1.0130) acc 75.0000 (74.3382) lr 3.1417e-05 eta 0:19:44 +epoch [48/50] batch [175/500] time 0.873 (0.890) data 0.000 (0.004) loss 1.2129 (1.0062) acc 71.8750 (74.5714) lr 3.1417e-05 eta 0:19:38 +epoch [48/50] batch [180/500] time 0.865 (0.889) data 0.000 (0.004) loss 1.8340 (1.0118) acc 53.1250 (74.4097) lr 3.1417e-05 eta 0:19:33 +epoch [48/50] batch [185/500] time 0.880 (0.889) data 0.000 (0.004) loss 1.2275 (1.0182) acc 68.7500 (74.2399) lr 3.1417e-05 eta 0:19:28 +epoch [48/50] batch [190/500] time 0.888 (0.889) data 0.000 (0.004) loss 1.0771 (1.0176) acc 71.8750 (74.1776) lr 3.1417e-05 eta 0:19:23 +epoch [48/50] batch [195/500] time 0.879 (0.888) data 0.000 (0.004) loss 1.1611 (1.0252) acc 68.7500 (74.0064) lr 3.1417e-05 eta 0:19:19 +epoch [48/50] batch [200/500] time 0.889 (0.888) data 0.000 (0.004) loss 1.0127 (1.0229) acc 78.1250 (74.1250) lr 3.1417e-05 eta 0:19:14 +epoch [48/50] batch [205/500] time 0.909 (0.889) data 0.000 (0.004) loss 1.7656 (1.0273) acc 62.5000 (74.0549) lr 3.1417e-05 eta 0:19:11 +epoch [48/50] batch [210/500] time 0.916 (0.889) data 0.000 (0.004) loss 1.1162 (1.0289) acc 65.6250 (73.9435) lr 3.1417e-05 eta 0:19:06 +epoch [48/50] batch [215/500] time 0.893 (0.889) data 0.000 (0.004) loss 0.6880 (1.0294) acc 84.3750 (73.9244) lr 3.1417e-05 eta 0:19:02 +epoch [48/50] batch [220/500] time 0.909 (0.889) data 0.000 (0.004) loss 1.0859 (1.0294) acc 78.1250 (73.9631) lr 3.1417e-05 eta 0:18:57 +epoch [48/50] batch [225/500] time 0.882 (0.889) data 0.000 (0.003) loss 0.9468 (1.0356) acc 81.2500 (73.8194) lr 3.1417e-05 eta 0:18:53 +epoch [48/50] batch [230/500] time 0.899 (0.889) data 0.000 (0.003) loss 1.2666 (1.0353) acc 65.6250 (73.8451) lr 3.1417e-05 eta 0:18:49 +epoch [48/50] batch [235/500] time 0.898 (0.889) data 0.000 (0.003) loss 0.5508 (1.0340) acc 87.5000 (73.8963) lr 3.1417e-05 eta 0:18:45 +epoch [48/50] batch [240/500] time 0.899 (0.889) data 0.000 (0.003) loss 1.2256 (1.0329) acc 62.5000 (73.8151) lr 3.1417e-05 eta 0:18:40 +epoch [48/50] batch [245/500] time 0.862 (0.889) data 0.000 (0.003) loss 1.0020 (1.0334) acc 71.8750 (73.7883) lr 3.1417e-05 eta 0:18:35 +epoch [48/50] batch [250/500] time 0.892 (0.889) data 0.000 (0.003) loss 0.9805 (1.0365) acc 71.8750 (73.7000) lr 3.1417e-05 eta 0:18:31 +epoch [48/50] batch [255/500] time 0.887 (0.889) data 0.000 (0.003) loss 1.5049 (1.0418) acc 65.6250 (73.6397) lr 3.1417e-05 eta 0:18:26 +epoch [48/50] batch [260/500] time 0.917 (0.889) data 0.000 (0.003) loss 0.7700 (1.0380) acc 78.1250 (73.7500) lr 3.1417e-05 eta 0:18:22 +epoch [48/50] batch [265/500] time 0.892 (0.889) data 0.000 (0.003) loss 1.3086 (1.0404) acc 65.6250 (73.7146) lr 3.1417e-05 eta 0:18:17 +epoch [48/50] batch [270/500] time 0.904 (0.889) data 0.000 (0.003) loss 0.8203 (1.0421) acc 81.2500 (73.6574) lr 3.1417e-05 eta 0:18:13 +epoch [48/50] batch [275/500] time 0.913 (0.889) data 0.000 (0.003) loss 0.5894 (1.0438) acc 81.2500 (73.6250) lr 3.1417e-05 eta 0:18:09 +epoch [48/50] batch [280/500] time 0.897 (0.890) data 0.000 (0.003) loss 1.1182 (1.0455) acc 75.0000 (73.5938) lr 3.1417e-05 eta 0:18:05 +epoch [48/50] batch [285/500] time 0.892 (0.889) data 0.000 (0.003) loss 1.0293 (1.0459) acc 71.8750 (73.5965) lr 3.1417e-05 eta 0:18:00 +epoch [48/50] batch [290/500] time 0.868 (0.889) data 0.000 (0.003) loss 1.3223 (1.0453) acc 59.3750 (73.5884) lr 3.1417e-05 eta 0:17:55 +epoch [48/50] batch [295/500] time 0.872 (0.889) data 0.001 (0.003) loss 1.0850 (1.0428) acc 78.1250 (73.6441) lr 3.1417e-05 eta 0:17:51 +epoch [48/50] batch [300/500] time 0.900 (0.889) data 0.000 (0.003) loss 0.5649 (1.0394) acc 87.5000 (73.7396) lr 3.1417e-05 eta 0:17:47 +epoch [48/50] batch [305/500] time 0.905 (0.890) data 0.000 (0.003) loss 1.2100 (1.0421) acc 68.7500 (73.6988) lr 3.1417e-05 eta 0:17:43 +epoch [48/50] batch [310/500] time 0.895 (0.890) data 0.000 (0.003) loss 0.6064 (1.0407) acc 81.2500 (73.6996) lr 3.1417e-05 eta 0:17:38 +epoch [48/50] batch [315/500] time 0.952 (0.890) data 0.000 (0.003) loss 1.1484 (1.0436) acc 75.0000 (73.6706) lr 3.1417e-05 eta 0:17:34 +epoch [48/50] batch [320/500] time 0.884 (0.890) data 0.000 (0.003) loss 1.0957 (1.0452) acc 68.7500 (73.6230) lr 3.1417e-05 eta 0:17:30 +epoch [48/50] batch [325/500] time 0.889 (0.890) data 0.000 (0.002) loss 0.8906 (1.0443) acc 81.2500 (73.6442) lr 3.1417e-05 eta 0:17:25 +epoch [48/50] batch [330/500] time 0.876 (0.890) data 0.000 (0.002) loss 1.0205 (1.0426) acc 81.2500 (73.6837) lr 3.1417e-05 eta 0:17:21 +epoch [48/50] batch [335/500] time 0.882 (0.890) data 0.000 (0.002) loss 0.9478 (1.0414) acc 75.0000 (73.6847) lr 3.1417e-05 eta 0:17:16 +epoch [48/50] batch [340/500] time 0.873 (0.890) data 0.000 (0.002) loss 0.9814 (1.0413) acc 71.8750 (73.6765) lr 3.1417e-05 eta 0:17:12 +epoch [48/50] batch [345/500] time 0.880 (0.890) data 0.000 (0.002) loss 1.7754 (1.0425) acc 46.8750 (73.5960) lr 3.1417e-05 eta 0:17:07 +epoch [48/50] batch [350/500] time 0.893 (0.890) data 0.000 (0.002) loss 1.1182 (1.0431) acc 71.8750 (73.5625) lr 3.1417e-05 eta 0:17:03 +epoch [48/50] batch [355/500] time 0.891 (0.890) data 0.000 (0.002) loss 0.7471 (1.0435) acc 81.2500 (73.5827) lr 3.1417e-05 eta 0:16:58 +epoch [48/50] batch [360/500] time 0.897 (0.890) data 0.000 (0.002) loss 0.4536 (1.0405) acc 90.6250 (73.7066) lr 3.1417e-05 eta 0:16:54 +epoch [48/50] batch [365/500] time 0.898 (0.890) data 0.000 (0.002) loss 0.8174 (1.0403) acc 84.3750 (73.7414) lr 3.1417e-05 eta 0:16:50 +epoch [48/50] batch [370/500] time 0.886 (0.890) data 0.000 (0.002) loss 1.2979 (1.0427) acc 75.0000 (73.7247) lr 3.1417e-05 eta 0:16:45 +epoch [48/50] batch [375/500] time 0.905 (0.890) data 0.000 (0.002) loss 0.9175 (1.0406) acc 68.7500 (73.7500) lr 3.1417e-05 eta 0:16:40 +epoch [48/50] batch [380/500] time 0.884 (0.890) data 0.000 (0.002) loss 1.3096 (1.0411) acc 62.5000 (73.7089) lr 3.1417e-05 eta 0:16:36 +epoch [48/50] batch [385/500] time 0.896 (0.890) data 0.000 (0.002) loss 0.9834 (1.0435) acc 78.1250 (73.7094) lr 3.1417e-05 eta 0:16:32 +epoch [48/50] batch [390/500] time 0.864 (0.890) data 0.000 (0.002) loss 0.9365 (1.0430) acc 59.3750 (73.6458) lr 3.1417e-05 eta 0:16:27 +epoch [48/50] batch [395/500] time 0.878 (0.889) data 0.000 (0.002) loss 1.3203 (1.0436) acc 71.8750 (73.5997) lr 3.1417e-05 eta 0:16:22 +epoch [48/50] batch [400/500] time 0.875 (0.889) data 0.000 (0.002) loss 0.9536 (1.0433) acc 78.1250 (73.6250) lr 3.1417e-05 eta 0:16:18 +epoch [48/50] batch [405/500] time 0.872 (0.889) data 0.000 (0.002) loss 0.5537 (1.0424) acc 81.2500 (73.6574) lr 3.1417e-05 eta 0:16:13 +epoch [48/50] batch [410/500] time 0.866 (0.889) data 0.001 (0.002) loss 0.7671 (1.0440) acc 81.2500 (73.6280) lr 3.1417e-05 eta 0:16:08 +epoch [48/50] batch [415/500] time 0.870 (0.889) data 0.001 (0.002) loss 0.9326 (1.0452) acc 71.8750 (73.5994) lr 3.1417e-05 eta 0:16:04 +epoch [48/50] batch [420/500] time 0.855 (0.889) data 0.000 (0.002) loss 0.6631 (1.0447) acc 84.3750 (73.6384) lr 3.1417e-05 eta 0:15:59 +epoch [48/50] batch [425/500] time 0.905 (0.889) data 0.000 (0.002) loss 1.0020 (1.0432) acc 71.8750 (73.6985) lr 3.1417e-05 eta 0:15:55 +epoch [48/50] batch [430/500] time 0.882 (0.888) data 0.000 (0.002) loss 1.7480 (1.0454) acc 62.5000 (73.6701) lr 3.1417e-05 eta 0:15:50 +epoch [48/50] batch [435/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.4941 (1.0492) acc 62.5000 (73.5704) lr 3.1417e-05 eta 0:15:45 +epoch [48/50] batch [440/500] time 0.914 (0.888) data 0.000 (0.002) loss 0.6743 (1.0465) acc 81.2500 (73.5866) lr 3.1417e-05 eta 0:15:41 +epoch [48/50] batch [445/500] time 0.999 (0.888) data 0.000 (0.002) loss 1.4766 (1.0490) acc 68.7500 (73.5183) lr 3.1417e-05 eta 0:15:37 +epoch [48/50] batch [450/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.1768 (1.0487) acc 75.0000 (73.5278) lr 3.1417e-05 eta 0:15:32 +epoch [48/50] batch [455/500] time 0.885 (0.889) data 0.000 (0.002) loss 1.1309 (1.0520) acc 78.1250 (73.5027) lr 3.1417e-05 eta 0:15:28 +epoch [48/50] batch [460/500] time 0.906 (0.889) data 0.000 (0.002) loss 0.8193 (1.0507) acc 78.1250 (73.5394) lr 3.1417e-05 eta 0:15:24 +epoch [48/50] batch [465/500] time 0.876 (0.889) data 0.000 (0.002) loss 1.4033 (1.0523) acc 75.0000 (73.5349) lr 3.1417e-05 eta 0:15:19 +epoch [48/50] batch [470/500] time 0.910 (0.889) data 0.000 (0.002) loss 0.7378 (1.0534) acc 81.2500 (73.5439) lr 3.1417e-05 eta 0:15:15 +epoch [48/50] batch [475/500] time 0.903 (0.889) data 0.000 (0.002) loss 1.3623 (1.0546) acc 75.0000 (73.5132) lr 3.1417e-05 eta 0:15:10 +epoch [48/50] batch [480/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.1045 (1.0559) acc 78.1250 (73.4701) lr 3.1417e-05 eta 0:15:06 +epoch [48/50] batch [485/500] time 0.879 (0.889) data 0.001 (0.002) loss 0.9346 (1.0555) acc 75.0000 (73.4343) lr 3.1417e-05 eta 0:15:01 +epoch [48/50] batch [490/500] time 0.872 (0.889) data 0.000 (0.002) loss 0.9810 (1.0553) acc 68.7500 (73.4375) lr 3.1417e-05 eta 0:14:57 +epoch [48/50] batch [495/500] time 0.866 (0.888) data 0.000 (0.002) loss 1.4883 (1.0573) acc 62.5000 (73.4154) lr 3.1417e-05 eta 0:14:52 +epoch [48/50] batch [500/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.9263 (1.0572) acc 75.0000 (73.4125) lr 1.7713e-05 eta 0:14:48 +epoch [49/50] batch [5/500] time 0.896 (1.040) data 0.000 (0.148) loss 1.1367 (1.0065) acc 75.0000 (75.0000) lr 1.7713e-05 eta 0:17:14 +epoch [49/50] batch [10/500] time 0.863 (0.955) data 0.000 (0.074) loss 1.2305 (0.9947) acc 71.8750 (74.6875) lr 1.7713e-05 eta 0:15:45 +epoch [49/50] batch [15/500] time 0.879 (0.928) data 0.000 (0.049) loss 0.9614 (1.0716) acc 75.0000 (73.3333) lr 1.7713e-05 eta 0:15:14 +epoch [49/50] batch [20/500] time 0.891 (0.919) data 0.000 (0.037) loss 1.1367 (1.0568) acc 68.7500 (73.1250) lr 1.7713e-05 eta 0:15:00 +epoch [49/50] batch [25/500] time 0.894 (0.914) data 0.000 (0.030) loss 0.9414 (1.0517) acc 75.0000 (73.5000) lr 1.7713e-05 eta 0:14:50 +epoch [49/50] batch [30/500] time 0.895 (0.909) data 0.000 (0.025) loss 0.8286 (1.0537) acc 71.8750 (73.1250) lr 1.7713e-05 eta 0:14:41 +epoch [49/50] batch [35/500] time 0.894 (0.910) data 0.000 (0.021) loss 1.2041 (1.0694) acc 75.0000 (72.6786) lr 1.7713e-05 eta 0:14:38 +epoch [49/50] batch [40/500] time 0.892 (0.907) data 0.000 (0.019) loss 0.7324 (1.0519) acc 78.1250 (72.8906) lr 1.7713e-05 eta 0:14:30 +epoch [49/50] batch [45/500] time 0.911 (0.906) data 0.000 (0.017) loss 1.0127 (1.0309) acc 68.7500 (73.0556) lr 1.7713e-05 eta 0:14:24 +epoch [49/50] batch [50/500] time 0.912 (0.905) data 0.000 (0.015) loss 0.6382 (1.0098) acc 90.6250 (73.8125) lr 1.7713e-05 eta 0:14:19 +epoch [49/50] batch [55/500] time 0.869 (0.903) data 0.000 (0.014) loss 1.1797 (1.0306) acc 68.7500 (73.5795) lr 1.7713e-05 eta 0:14:13 +epoch [49/50] batch [60/500] time 0.882 (0.902) data 0.000 (0.013) loss 1.4072 (1.0478) acc 53.1250 (73.3854) lr 1.7713e-05 eta 0:14:07 +epoch [49/50] batch [65/500] time 0.853 (0.900) data 0.000 (0.012) loss 1.0029 (1.0602) acc 75.0000 (72.9327) lr 1.7713e-05 eta 0:14:01 +epoch [49/50] batch [70/500] time 0.886 (0.899) data 0.000 (0.011) loss 1.4092 (1.0585) acc 65.6250 (72.8571) lr 1.7713e-05 eta 0:13:55 +epoch [49/50] batch [75/500] time 0.887 (0.898) data 0.001 (0.010) loss 0.7969 (1.0603) acc 71.8750 (72.6250) lr 1.7713e-05 eta 0:13:50 +epoch [49/50] batch [80/500] time 0.875 (0.897) data 0.000 (0.009) loss 1.0371 (1.0541) acc 78.1250 (72.7734) lr 1.7713e-05 eta 0:13:45 +epoch [49/50] batch [85/500] time 0.888 (0.896) data 0.000 (0.009) loss 1.2139 (1.0500) acc 71.8750 (73.0515) lr 1.7713e-05 eta 0:13:39 +epoch [49/50] batch [90/500] time 0.865 (0.895) data 0.000 (0.008) loss 0.8940 (1.0506) acc 65.6250 (72.9514) lr 1.7713e-05 eta 0:13:34 +epoch [49/50] batch [95/500] time 0.902 (0.895) data 0.000 (0.008) loss 1.1055 (1.0486) acc 65.6250 (72.8947) lr 1.7713e-05 eta 0:13:29 +epoch [49/50] batch [100/500] time 0.885 (0.894) data 0.000 (0.008) loss 0.4890 (1.0538) acc 84.3750 (72.7500) lr 1.7713e-05 eta 0:13:24 +epoch [49/50] batch [105/500] time 0.916 (0.894) data 0.000 (0.007) loss 1.1104 (1.0552) acc 71.8750 (72.7083) lr 1.7713e-05 eta 0:13:20 +epoch [49/50] batch [110/500] time 0.876 (0.894) data 0.000 (0.007) loss 0.7324 (1.0530) acc 84.3750 (72.8977) lr 1.7713e-05 eta 0:13:15 +epoch [49/50] batch [115/500] time 0.905 (0.894) data 0.000 (0.007) loss 0.6162 (1.0556) acc 81.2500 (72.8261) lr 1.7713e-05 eta 0:13:10 +epoch [49/50] batch [120/500] time 0.883 (0.893) data 0.000 (0.006) loss 0.8047 (1.0556) acc 78.1250 (72.7865) lr 1.7713e-05 eta 0:13:05 +epoch [49/50] batch [125/500] time 0.903 (0.893) data 0.000 (0.006) loss 1.5957 (1.0669) acc 62.5000 (72.6250) lr 1.7713e-05 eta 0:13:01 +epoch [49/50] batch [130/500] time 0.847 (0.892) data 0.000 (0.006) loss 0.9067 (1.0721) acc 84.3750 (72.4279) lr 1.7713e-05 eta 0:12:56 +epoch [49/50] batch [135/500] time 0.877 (0.893) data 0.000 (0.006) loss 1.6621 (1.0696) acc 59.3750 (72.5000) lr 1.7713e-05 eta 0:12:52 +epoch [49/50] batch [140/500] time 0.903 (0.893) data 0.000 (0.006) loss 1.5303 (1.0755) acc 65.6250 (72.5223) lr 1.7713e-05 eta 0:12:48 +epoch [49/50] batch [145/500] time 0.896 (0.893) data 0.000 (0.005) loss 1.4551 (1.0753) acc 71.8750 (72.5000) lr 1.7713e-05 eta 0:12:43 +epoch [49/50] batch [150/500] time 0.908 (0.893) data 0.000 (0.005) loss 1.0254 (1.0750) acc 78.1250 (72.5625) lr 1.7713e-05 eta 0:12:38 +epoch [49/50] batch [155/500] time 0.875 (0.892) data 0.000 (0.005) loss 0.5132 (1.0674) acc 87.5000 (72.7218) lr 1.7713e-05 eta 0:12:34 +epoch [49/50] batch [160/500] time 0.909 (0.893) data 0.000 (0.005) loss 1.2227 (1.0661) acc 65.6250 (72.6758) lr 1.7713e-05 eta 0:12:29 +epoch [49/50] batch [165/500] time 0.862 (0.892) data 0.000 (0.005) loss 1.3906 (1.0732) acc 65.6250 (72.5947) lr 1.7713e-05 eta 0:12:25 +epoch [49/50] batch [170/500] time 0.878 (0.892) data 0.000 (0.005) loss 1.2080 (1.0803) acc 68.7500 (72.5000) lr 1.7713e-05 eta 0:12:20 +epoch [49/50] batch [175/500] time 0.870 (0.892) data 0.000 (0.004) loss 1.3457 (1.0791) acc 65.6250 (72.5179) lr 1.7713e-05 eta 0:12:15 +epoch [49/50] batch [180/500] time 0.896 (0.893) data 0.000 (0.004) loss 0.6064 (1.0704) acc 81.2500 (72.7951) lr 1.7713e-05 eta 0:12:11 +epoch [49/50] batch [185/500] time 0.906 (0.893) data 0.000 (0.004) loss 0.9336 (1.0702) acc 75.0000 (72.7534) lr 1.7713e-05 eta 0:12:07 +epoch [49/50] batch [190/500] time 0.870 (0.892) data 0.000 (0.004) loss 0.4055 (1.0661) acc 87.5000 (72.8125) lr 1.7713e-05 eta 0:12:02 +epoch [49/50] batch [195/500] time 0.877 (0.892) data 0.001 (0.004) loss 1.2002 (1.0608) acc 71.8750 (72.8526) lr 1.7713e-05 eta 0:11:58 +epoch [49/50] batch [200/500] time 0.861 (0.892) data 0.000 (0.004) loss 1.1475 (1.0592) acc 59.3750 (72.8906) lr 1.7713e-05 eta 0:11:53 +epoch [49/50] batch [205/500] time 0.867 (0.892) data 0.000 (0.004) loss 1.2109 (1.0565) acc 68.7500 (72.9878) lr 1.7713e-05 eta 0:11:48 +epoch [49/50] batch [210/500] time 0.888 (0.891) data 0.000 (0.004) loss 1.2383 (1.0514) acc 75.0000 (73.0804) lr 1.7713e-05 eta 0:11:44 +epoch [49/50] batch [215/500] time 0.894 (0.891) data 0.000 (0.004) loss 0.8184 (1.0521) acc 71.8750 (73.0669) lr 1.7713e-05 eta 0:11:39 +epoch [49/50] batch [220/500] time 0.894 (0.891) data 0.000 (0.004) loss 0.5049 (1.0510) acc 87.5000 (73.2102) lr 1.7713e-05 eta 0:11:35 +epoch [49/50] batch [225/500] time 0.872 (0.891) data 0.000 (0.004) loss 1.4893 (1.0575) acc 62.5000 (73.0833) lr 1.7713e-05 eta 0:11:30 +epoch [49/50] batch [230/500] time 0.914 (0.891) data 0.000 (0.003) loss 1.5938 (1.0632) acc 62.5000 (73.0571) lr 1.7713e-05 eta 0:11:25 +epoch [49/50] batch [235/500] time 0.866 (0.891) data 0.000 (0.003) loss 1.3193 (1.0644) acc 65.6250 (73.0718) lr 1.7713e-05 eta 0:11:21 +epoch [49/50] batch [240/500] time 0.898 (0.891) data 0.000 (0.003) loss 1.1602 (1.0616) acc 68.7500 (73.1250) lr 1.7713e-05 eta 0:11:16 +epoch [49/50] batch [245/500] time 0.881 (0.890) data 0.000 (0.003) loss 0.8252 (1.0603) acc 78.1250 (73.0995) lr 1.7713e-05 eta 0:11:12 +epoch [49/50] batch [250/500] time 0.906 (0.890) data 0.000 (0.003) loss 0.6826 (1.0603) acc 90.6250 (73.1750) lr 1.7713e-05 eta 0:11:07 +epoch [49/50] batch [255/500] time 0.896 (0.890) data 0.000 (0.003) loss 1.1416 (1.0622) acc 75.0000 (73.2108) lr 1.7713e-05 eta 0:11:02 +epoch [49/50] batch [260/500] time 0.868 (0.889) data 0.000 (0.003) loss 0.9365 (1.0608) acc 81.2500 (73.2452) lr 1.7713e-05 eta 0:10:58 +epoch [49/50] batch [265/500] time 0.889 (0.889) data 0.000 (0.003) loss 0.7529 (1.0557) acc 84.3750 (73.3726) lr 1.7713e-05 eta 0:10:53 +epoch [49/50] batch [270/500] time 0.869 (0.889) data 0.000 (0.003) loss 1.0215 (1.0533) acc 68.7500 (73.3102) lr 1.7713e-05 eta 0:10:49 +epoch [49/50] batch [275/500] time 1.013 (0.890) data 0.000 (0.003) loss 0.9443 (1.0536) acc 71.8750 (73.2727) lr 1.7713e-05 eta 0:10:45 +epoch [49/50] batch [280/500] time 0.866 (0.890) data 0.000 (0.003) loss 0.9927 (1.0549) acc 68.7500 (73.2254) lr 1.7713e-05 eta 0:10:40 +epoch [49/50] batch [285/500] time 0.880 (0.890) data 0.000 (0.003) loss 0.7974 (1.0566) acc 81.2500 (73.1360) lr 1.7713e-05 eta 0:10:36 +epoch [49/50] batch [290/500] time 0.862 (0.889) data 0.000 (0.003) loss 0.7144 (1.0529) acc 84.3750 (73.2112) lr 1.7713e-05 eta 0:10:31 +epoch [49/50] batch [295/500] time 0.859 (0.889) data 0.000 (0.003) loss 1.6582 (1.0567) acc 71.8750 (73.1356) lr 1.7713e-05 eta 0:10:27 +epoch [49/50] batch [300/500] time 0.859 (0.889) data 0.000 (0.003) loss 0.7910 (1.0560) acc 81.2500 (73.1458) lr 1.7713e-05 eta 0:10:22 +epoch [49/50] batch [305/500] time 0.877 (0.889) data 0.000 (0.003) loss 0.9653 (1.0518) acc 65.6250 (73.2275) lr 1.7713e-05 eta 0:10:17 +epoch [49/50] batch [310/500] time 0.892 (0.889) data 0.000 (0.003) loss 1.6357 (1.0566) acc 62.5000 (73.1956) lr 1.7713e-05 eta 0:10:13 +epoch [49/50] batch [315/500] time 0.904 (0.889) data 0.000 (0.003) loss 0.9062 (1.0535) acc 78.1250 (73.2440) lr 1.7713e-05 eta 0:10:08 +epoch [49/50] batch [320/500] time 0.874 (0.889) data 0.000 (0.003) loss 1.2109 (1.0541) acc 75.0000 (73.2812) lr 1.7713e-05 eta 0:10:04 +epoch [49/50] batch [325/500] time 0.879 (0.889) data 0.000 (0.003) loss 1.2354 (1.0529) acc 75.0000 (73.3462) lr 1.7713e-05 eta 0:10:00 +epoch [49/50] batch [330/500] time 0.906 (0.889) data 0.000 (0.002) loss 0.9028 (1.0535) acc 68.7500 (73.3523) lr 1.7713e-05 eta 0:09:55 +epoch [49/50] batch [335/500] time 0.883 (0.889) data 0.000 (0.002) loss 0.6685 (1.0522) acc 81.2500 (73.3489) lr 1.7713e-05 eta 0:09:51 +epoch [49/50] batch [340/500] time 0.861 (0.889) data 0.000 (0.002) loss 0.9321 (1.0517) acc 71.8750 (73.3088) lr 1.7713e-05 eta 0:09:46 +epoch [49/50] batch [345/500] time 0.886 (0.889) data 0.000 (0.002) loss 1.0713 (1.0553) acc 68.7500 (73.1703) lr 1.7713e-05 eta 0:09:41 +epoch [49/50] batch [350/500] time 0.886 (0.888) data 0.000 (0.002) loss 1.4863 (1.0560) acc 59.3750 (73.1339) lr 1.7713e-05 eta 0:09:37 +epoch [49/50] batch [355/500] time 0.875 (0.889) data 0.000 (0.002) loss 0.6831 (1.0544) acc 81.2500 (73.1514) lr 1.7713e-05 eta 0:09:33 +epoch [49/50] batch [360/500] time 0.858 (0.889) data 0.000 (0.002) loss 0.6226 (1.0534) acc 78.1250 (73.1597) lr 1.7713e-05 eta 0:09:28 +epoch [49/50] batch [365/500] time 0.910 (0.889) data 0.000 (0.002) loss 0.7158 (1.0538) acc 75.0000 (73.1164) lr 1.7713e-05 eta 0:09:24 +epoch [49/50] batch [370/500] time 0.867 (0.888) data 0.000 (0.002) loss 0.9722 (1.0535) acc 75.0000 (73.1334) lr 1.7713e-05 eta 0:09:19 +epoch [49/50] batch [375/500] time 0.892 (0.889) data 0.000 (0.002) loss 0.6489 (1.0529) acc 84.3750 (73.1583) lr 1.7713e-05 eta 0:09:15 +epoch [49/50] batch [380/500] time 0.886 (0.888) data 0.000 (0.002) loss 0.8599 (1.0547) acc 75.0000 (73.0839) lr 1.7713e-05 eta 0:09:10 +epoch [49/50] batch [385/500] time 0.884 (0.888) data 0.000 (0.002) loss 0.9580 (1.0540) acc 81.2500 (73.1088) lr 1.7713e-05 eta 0:09:06 +epoch [49/50] batch [390/500] time 0.889 (0.888) data 0.000 (0.002) loss 1.4355 (1.0568) acc 65.6250 (73.0369) lr 1.7713e-05 eta 0:09:01 +epoch [49/50] batch [395/500] time 0.886 (0.888) data 0.000 (0.002) loss 0.9810 (1.0571) acc 78.1250 (73.1092) lr 1.7713e-05 eta 0:08:57 +epoch [49/50] batch [400/500] time 0.850 (0.888) data 0.000 (0.002) loss 0.6909 (1.0542) acc 84.3750 (73.1562) lr 1.7713e-05 eta 0:08:52 +epoch [49/50] batch [405/500] time 0.873 (0.888) data 0.000 (0.002) loss 0.6411 (1.0540) acc 78.1250 (73.1327) lr 1.7713e-05 eta 0:08:48 +epoch [49/50] batch [410/500] time 0.901 (0.888) data 0.000 (0.002) loss 0.5854 (1.0547) acc 84.3750 (73.1098) lr 1.7713e-05 eta 0:08:43 +epoch [49/50] batch [415/500] time 0.882 (0.888) data 0.000 (0.002) loss 0.6782 (1.0557) acc 81.2500 (73.1325) lr 1.7713e-05 eta 0:08:39 +epoch [49/50] batch [420/500] time 0.895 (0.888) data 0.000 (0.002) loss 1.2090 (1.0562) acc 71.8750 (73.1250) lr 1.7713e-05 eta 0:08:35 +epoch [49/50] batch [425/500] time 0.903 (0.888) data 0.000 (0.002) loss 1.0488 (1.0569) acc 81.2500 (73.0882) lr 1.7713e-05 eta 0:08:30 +epoch [49/50] batch [430/500] time 0.906 (0.888) data 0.000 (0.002) loss 1.2510 (1.0564) acc 75.0000 (73.1177) lr 1.7713e-05 eta 0:08:26 +epoch [49/50] batch [435/500] time 0.874 (0.888) data 0.000 (0.002) loss 0.7500 (1.0566) acc 68.7500 (73.1250) lr 1.7713e-05 eta 0:08:21 +epoch [49/50] batch [440/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.7129 (1.0593) acc 65.6250 (73.0895) lr 1.7713e-05 eta 0:08:17 +epoch [49/50] batch [445/500] time 0.902 (0.888) data 0.000 (0.002) loss 1.3125 (1.0613) acc 68.7500 (73.0899) lr 1.7713e-05 eta 0:08:12 +epoch [49/50] batch [450/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.2314 (1.0614) acc 71.8750 (73.0972) lr 1.7713e-05 eta 0:08:08 +epoch [49/50] batch [455/500] time 0.907 (0.888) data 0.000 (0.002) loss 0.9292 (1.0622) acc 71.8750 (73.1113) lr 1.7713e-05 eta 0:08:04 +epoch [49/50] batch [460/500] time 0.848 (0.888) data 0.000 (0.002) loss 1.3652 (1.0645) acc 75.0000 (73.0842) lr 1.7713e-05 eta 0:07:59 +epoch [49/50] batch [465/500] time 0.879 (0.888) data 0.000 (0.002) loss 0.9521 (1.0653) acc 68.7500 (73.0645) lr 1.7713e-05 eta 0:07:55 +epoch [49/50] batch [470/500] time 0.893 (0.888) data 0.000 (0.002) loss 0.9443 (1.0646) acc 78.1250 (73.0851) lr 1.7713e-05 eta 0:07:50 +epoch [49/50] batch [475/500] time 0.895 (0.888) data 0.000 (0.002) loss 1.3242 (1.0651) acc 75.0000 (73.0921) lr 1.7713e-05 eta 0:07:46 +epoch [49/50] batch [480/500] time 0.881 (0.888) data 0.000 (0.002) loss 0.6016 (1.0647) acc 87.5000 (73.1315) lr 1.7713e-05 eta 0:07:41 +epoch [49/50] batch [485/500] time 0.895 (0.888) data 0.001 (0.002) loss 1.4551 (1.0649) acc 62.5000 (73.1314) lr 1.7713e-05 eta 0:07:37 +epoch [49/50] batch [490/500] time 0.891 (0.888) data 0.000 (0.002) loss 0.7192 (1.0650) acc 84.3750 (73.1314) lr 1.7713e-05 eta 0:07:32 +epoch [49/50] batch [495/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.4248 (1.0654) acc 71.8750 (73.1313) lr 1.7713e-05 eta 0:07:28 +epoch [49/50] batch [500/500] time 0.898 (0.888) data 0.000 (0.002) loss 0.5918 (1.0632) acc 81.2500 (73.1562) lr 7.8853e-06 eta 0:07:24 +epoch [50/50] batch [5/500] time 0.879 (1.060) data 0.000 (0.149) loss 1.0283 (0.8270) acc 71.8750 (76.8750) lr 7.8853e-06 eta 0:08:44 +epoch [50/50] batch [10/500] time 0.881 (0.967) data 0.000 (0.074) loss 0.8853 (0.9888) acc 78.1250 (71.5625) lr 7.8853e-06 eta 0:07:53 +epoch [50/50] batch [15/500] time 0.886 (0.935) data 0.000 (0.050) loss 1.4463 (0.9821) acc 62.5000 (72.5000) lr 7.8853e-06 eta 0:07:33 +epoch [50/50] batch [20/500] time 0.858 (0.919) data 0.000 (0.037) loss 0.6895 (1.0333) acc 81.2500 (71.7188) lr 7.8853e-06 eta 0:07:20 +epoch [50/50] batch [25/500] time 0.896 (0.912) data 0.000 (0.030) loss 1.0264 (1.0192) acc 68.7500 (71.1250) lr 7.8853e-06 eta 0:07:13 +epoch [50/50] batch [30/500] time 0.884 (0.908) data 0.000 (0.025) loss 1.4297 (1.0243) acc 71.8750 (71.8750) lr 7.8853e-06 eta 0:07:06 +epoch [50/50] batch [35/500] time 0.885 (0.905) data 0.000 (0.021) loss 1.6992 (1.0205) acc 65.6250 (72.3214) lr 7.8853e-06 eta 0:07:01 +epoch [50/50] batch [40/500] time 0.893 (0.903) data 0.000 (0.019) loss 1.5225 (1.0242) acc 68.7500 (72.7344) lr 7.8853e-06 eta 0:06:55 +epoch [50/50] batch [45/500] time 0.876 (0.901) data 0.000 (0.017) loss 0.9150 (1.0465) acc 75.0000 (72.5000) lr 7.8853e-06 eta 0:06:49 +epoch [50/50] batch [50/500] time 0.899 (0.899) data 0.000 (0.015) loss 0.6519 (1.0176) acc 84.3750 (73.1250) lr 7.8853e-06 eta 0:06:44 +epoch [50/50] batch [55/500] time 0.885 (0.897) data 0.000 (0.014) loss 0.7646 (1.0115) acc 75.0000 (73.3523) lr 7.8853e-06 eta 0:06:39 +epoch [50/50] batch [60/500] time 0.868 (0.896) data 0.000 (0.013) loss 0.7549 (1.0044) acc 81.2500 (73.8021) lr 7.8853e-06 eta 0:06:34 +epoch [50/50] batch [65/500] time 0.899 (0.894) data 0.000 (0.012) loss 1.0332 (1.0149) acc 81.2500 (73.8942) lr 7.8853e-06 eta 0:06:29 +epoch [50/50] batch [70/500] time 0.855 (0.893) data 0.000 (0.011) loss 1.2695 (1.0202) acc 65.6250 (73.7054) lr 7.8853e-06 eta 0:06:23 +epoch [50/50] batch [75/500] time 0.874 (0.892) data 0.000 (0.010) loss 1.3350 (1.0189) acc 62.5000 (73.6667) lr 7.8853e-06 eta 0:06:19 +epoch [50/50] batch [80/500] time 0.898 (0.891) data 0.000 (0.010) loss 1.5879 (1.0267) acc 62.5000 (73.4766) lr 7.8853e-06 eta 0:06:14 +epoch [50/50] batch [85/500] time 0.902 (0.891) data 0.000 (0.009) loss 1.2080 (1.0378) acc 68.7500 (73.1250) lr 7.8853e-06 eta 0:06:09 +epoch [50/50] batch [90/500] time 0.913 (0.891) data 0.000 (0.008) loss 1.2549 (1.0506) acc 65.6250 (73.0208) lr 7.8853e-06 eta 0:06:05 +epoch [50/50] batch [95/500] time 0.883 (0.890) data 0.000 (0.008) loss 1.4619 (1.0598) acc 53.1250 (72.6974) lr 7.8853e-06 eta 0:06:00 +epoch [50/50] batch [100/500] time 0.873 (0.890) data 0.000 (0.008) loss 0.8232 (1.0647) acc 71.8750 (72.4375) lr 7.8853e-06 eta 0:05:56 +epoch [50/50] batch [105/500] time 0.874 (0.891) data 0.000 (0.007) loss 1.3584 (1.0739) acc 68.7500 (72.1726) lr 7.8853e-06 eta 0:05:52 +epoch [50/50] batch [110/500] time 0.874 (0.891) data 0.000 (0.007) loss 1.4404 (1.0659) acc 68.7500 (72.4432) lr 7.8853e-06 eta 0:05:47 +epoch [50/50] batch [115/500] time 0.885 (0.891) data 0.000 (0.007) loss 0.7012 (1.0692) acc 78.1250 (72.5543) lr 7.8853e-06 eta 0:05:42 +epoch [50/50] batch [120/500] time 0.910 (0.891) data 0.000 (0.006) loss 0.8467 (1.0606) acc 68.7500 (72.4479) lr 7.8853e-06 eta 0:05:38 +epoch [50/50] batch [125/500] time 0.883 (0.890) data 0.000 (0.006) loss 1.0137 (1.0621) acc 75.0000 (72.4500) lr 7.8853e-06 eta 0:05:33 +epoch [50/50] batch [130/500] time 0.895 (0.891) data 0.000 (0.006) loss 0.6157 (1.0558) acc 87.5000 (72.6683) lr 7.8853e-06 eta 0:05:29 +epoch [50/50] batch [135/500] time 0.889 (0.890) data 0.000 (0.006) loss 1.2197 (1.0576) acc 71.8750 (72.8009) lr 7.8853e-06 eta 0:05:25 +epoch [50/50] batch [140/500] time 0.920 (0.890) data 0.000 (0.006) loss 0.8032 (1.0557) acc 75.0000 (72.9018) lr 7.8853e-06 eta 0:05:20 +epoch [50/50] batch [145/500] time 0.984 (0.891) data 0.000 (0.005) loss 0.7505 (1.0521) acc 75.0000 (73.0388) lr 7.8853e-06 eta 0:05:16 +epoch [50/50] batch [150/500] time 0.891 (0.890) data 0.000 (0.005) loss 1.0879 (1.0518) acc 65.6250 (73.0000) lr 7.8853e-06 eta 0:05:11 +epoch [50/50] batch [155/500] time 0.884 (0.890) data 0.000 (0.005) loss 0.9736 (1.0578) acc 78.1250 (73.0242) lr 7.8853e-06 eta 0:05:07 +epoch [50/50] batch [160/500] time 0.867 (0.890) data 0.000 (0.005) loss 0.7852 (1.0572) acc 75.0000 (73.0078) lr 7.8853e-06 eta 0:05:02 +epoch [50/50] batch [165/500] time 0.884 (0.889) data 0.000 (0.005) loss 0.4619 (1.0546) acc 90.6250 (73.1061) lr 7.8853e-06 eta 0:04:57 +epoch [50/50] batch [170/500] time 0.896 (0.890) data 0.000 (0.005) loss 1.1748 (1.0499) acc 78.1250 (73.2721) lr 7.8853e-06 eta 0:04:53 +epoch [50/50] batch [175/500] time 0.887 (0.890) data 0.000 (0.004) loss 0.6191 (1.0453) acc 71.8750 (73.3036) lr 7.8853e-06 eta 0:04:49 +epoch [50/50] batch [180/500] time 0.880 (0.890) data 0.000 (0.004) loss 0.5381 (1.0366) acc 84.3750 (73.5243) lr 7.8853e-06 eta 0:04:44 +epoch [50/50] batch [185/500] time 0.893 (0.890) data 0.000 (0.004) loss 1.3887 (1.0410) acc 62.5000 (73.4459) lr 7.8853e-06 eta 0:04:40 +epoch [50/50] batch [190/500] time 0.888 (0.890) data 0.000 (0.004) loss 0.8145 (1.0399) acc 84.3750 (73.4539) lr 7.8853e-06 eta 0:04:35 +epoch [50/50] batch [195/500] time 0.910 (0.890) data 0.000 (0.004) loss 1.6074 (1.0450) acc 68.7500 (73.3494) lr 7.8853e-06 eta 0:04:31 +epoch [50/50] batch [200/500] time 0.917 (0.890) data 0.000 (0.004) loss 0.9858 (1.0498) acc 81.2500 (73.2031) lr 7.8853e-06 eta 0:04:27 +epoch [50/50] batch [205/500] time 0.895 (0.890) data 0.000 (0.004) loss 1.3564 (1.0511) acc 68.7500 (73.1098) lr 7.8853e-06 eta 0:04:22 +epoch [50/50] batch [210/500] time 0.903 (0.890) data 0.000 (0.004) loss 0.9658 (1.0501) acc 81.2500 (73.1399) lr 7.8853e-06 eta 0:04:17 +epoch [50/50] batch [215/500] time 0.864 (0.889) data 0.000 (0.004) loss 0.6138 (1.0463) acc 78.1250 (73.2703) lr 7.8853e-06 eta 0:04:13 +epoch [50/50] batch [220/500] time 0.919 (0.889) data 0.000 (0.004) loss 0.8218 (1.0445) acc 81.2500 (73.3523) lr 7.8853e-06 eta 0:04:09 +epoch [50/50] batch [225/500] time 0.896 (0.890) data 0.000 (0.004) loss 1.4238 (1.0424) acc 65.6250 (73.3333) lr 7.8853e-06 eta 0:04:04 +epoch [50/50] batch [230/500] time 0.880 (0.890) data 0.000 (0.003) loss 0.8193 (1.0427) acc 81.2500 (73.3424) lr 7.8853e-06 eta 0:04:00 +epoch [50/50] batch [235/500] time 0.897 (0.890) data 0.000 (0.003) loss 0.6768 (1.0457) acc 78.1250 (73.3511) lr 7.8853e-06 eta 0:03:55 +epoch [50/50] batch [240/500] time 0.873 (0.889) data 0.000 (0.003) loss 0.7178 (1.0439) acc 81.2500 (73.4375) lr 7.8853e-06 eta 0:03:51 +epoch [50/50] batch [245/500] time 0.879 (0.890) data 0.000 (0.003) loss 1.0576 (1.0443) acc 71.8750 (73.4694) lr 7.8853e-06 eta 0:03:46 +epoch [50/50] batch [250/500] time 0.855 (0.890) data 0.000 (0.003) loss 0.6836 (1.0460) acc 78.1250 (73.4000) lr 7.8853e-06 eta 0:03:42 +epoch [50/50] batch [255/500] time 0.884 (0.889) data 0.000 (0.003) loss 1.0205 (1.0462) acc 65.6250 (73.3456) lr 7.8853e-06 eta 0:03:37 +epoch [50/50] batch [260/500] time 0.889 (0.889) data 0.000 (0.003) loss 1.1221 (1.0477) acc 62.5000 (73.2692) lr 7.8853e-06 eta 0:03:33 +epoch [50/50] batch [265/500] time 0.870 (0.889) data 0.000 (0.003) loss 1.1758 (1.0454) acc 75.0000 (73.3373) lr 7.8853e-06 eta 0:03:28 +epoch [50/50] batch [270/500] time 0.892 (0.889) data 0.000 (0.003) loss 0.6284 (1.0449) acc 78.1250 (73.2870) lr 7.8853e-06 eta 0:03:24 +epoch [50/50] batch [275/500] time 0.876 (0.889) data 0.000 (0.003) loss 1.0527 (1.0439) acc 81.2500 (73.3409) lr 7.8853e-06 eta 0:03:19 +epoch [50/50] batch [280/500] time 0.880 (0.889) data 0.000 (0.003) loss 0.8892 (1.0427) acc 71.8750 (73.3259) lr 7.8853e-06 eta 0:03:15 +epoch [50/50] batch [285/500] time 0.851 (0.889) data 0.000 (0.003) loss 0.8530 (1.0456) acc 75.0000 (73.2018) lr 7.8853e-06 eta 0:03:11 +epoch [50/50] batch [290/500] time 0.895 (0.889) data 0.000 (0.003) loss 0.8555 (1.0469) acc 81.2500 (73.1358) lr 7.8853e-06 eta 0:03:06 +epoch [50/50] batch [295/500] time 0.854 (0.889) data 0.000 (0.003) loss 1.2783 (1.0480) acc 75.0000 (73.1780) lr 7.8853e-06 eta 0:03:02 +epoch [50/50] batch [300/500] time 0.866 (0.889) data 0.000 (0.003) loss 0.6821 (1.0446) acc 90.6250 (73.2917) lr 7.8853e-06 eta 0:02:57 +epoch [50/50] batch [305/500] time 0.863 (0.888) data 0.000 (0.003) loss 1.0146 (1.0463) acc 81.2500 (73.2889) lr 7.8853e-06 eta 0:02:53 +epoch [50/50] batch [310/500] time 0.871 (0.888) data 0.000 (0.003) loss 0.9561 (1.0485) acc 71.8750 (73.2460) lr 7.8853e-06 eta 0:02:48 +epoch [50/50] batch [315/500] time 0.874 (0.888) data 0.000 (0.003) loss 1.5576 (1.0492) acc 62.5000 (73.2540) lr 7.8853e-06 eta 0:02:44 +epoch [50/50] batch [320/500] time 0.861 (0.888) data 0.000 (0.003) loss 0.7373 (1.0474) acc 87.5000 (73.3203) lr 7.8853e-06 eta 0:02:39 +epoch [50/50] batch [325/500] time 0.914 (0.888) data 0.000 (0.003) loss 1.7461 (1.0522) acc 65.6250 (73.2692) lr 7.8853e-06 eta 0:02:35 +epoch [50/50] batch [330/500] time 0.866 (0.888) data 0.000 (0.002) loss 0.7881 (1.0547) acc 81.2500 (73.2481) lr 7.8853e-06 eta 0:02:30 +epoch [50/50] batch [335/500] time 0.901 (0.888) data 0.000 (0.002) loss 0.4353 (1.0531) acc 93.7500 (73.3022) lr 7.8853e-06 eta 0:02:26 +epoch [50/50] batch [340/500] time 0.898 (0.888) data 0.000 (0.002) loss 0.8335 (1.0554) acc 81.2500 (73.2812) lr 7.8853e-06 eta 0:02:22 +epoch [50/50] batch [345/500] time 0.874 (0.888) data 0.000 (0.002) loss 0.9746 (1.0563) acc 78.1250 (73.1975) lr 7.8853e-06 eta 0:02:17 +epoch [50/50] batch [350/500] time 0.873 (0.888) data 0.000 (0.002) loss 0.7119 (1.0551) acc 71.8750 (73.1696) lr 7.8853e-06 eta 0:02:13 +epoch [50/50] batch [355/500] time 0.891 (0.887) data 0.000 (0.002) loss 1.0996 (1.0559) acc 75.0000 (73.1426) lr 7.8853e-06 eta 0:02:08 +epoch [50/50] batch [360/500] time 0.898 (0.887) data 0.000 (0.002) loss 0.9941 (1.0548) acc 78.1250 (73.1684) lr 7.8853e-06 eta 0:02:04 +epoch [50/50] batch [365/500] time 0.861 (0.887) data 0.000 (0.002) loss 1.5703 (1.0553) acc 71.8750 (73.1678) lr 7.8853e-06 eta 0:01:59 +epoch [50/50] batch [370/500] time 0.900 (0.887) data 0.000 (0.002) loss 0.9814 (1.0516) acc 68.7500 (73.2348) lr 7.8853e-06 eta 0:01:55 +epoch [50/50] batch [375/500] time 0.894 (0.887) data 0.000 (0.002) loss 2.1621 (1.0522) acc 59.3750 (73.2667) lr 7.8853e-06 eta 0:01:50 +epoch [50/50] batch [380/500] time 0.865 (0.887) data 0.000 (0.002) loss 1.4688 (1.0521) acc 65.6250 (73.2977) lr 7.8853e-06 eta 0:01:46 +epoch [50/50] batch [385/500] time 0.888 (0.887) data 0.000 (0.002) loss 0.7954 (1.0531) acc 81.2500 (73.3198) lr 7.8853e-06 eta 0:01:42 +epoch [50/50] batch [390/500] time 0.875 (0.887) data 0.000 (0.002) loss 0.6636 (1.0527) acc 78.1250 (73.3173) lr 7.8853e-06 eta 0:01:37 +epoch [50/50] batch [395/500] time 0.892 (0.887) data 0.000 (0.002) loss 0.7432 (1.0526) acc 78.1250 (73.3228) lr 7.8853e-06 eta 0:01:33 +epoch [50/50] batch [400/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.2910 (1.0532) acc 68.7500 (73.3438) lr 7.8853e-06 eta 0:01:28 +epoch [50/50] batch [405/500] time 0.899 (0.887) data 0.000 (0.002) loss 1.4844 (1.0531) acc 65.6250 (73.3410) lr 7.8853e-06 eta 0:01:24 +epoch [50/50] batch [410/500] time 0.912 (0.887) data 0.000 (0.002) loss 0.7090 (1.0505) acc 78.1250 (73.4223) lr 7.8853e-06 eta 0:01:19 +epoch [50/50] batch [415/500] time 0.914 (0.888) data 0.000 (0.002) loss 1.0820 (1.0497) acc 78.1250 (73.4864) lr 7.8853e-06 eta 0:01:15 +epoch [50/50] batch [420/500] time 0.865 (0.888) data 0.000 (0.002) loss 0.6450 (1.0479) acc 81.2500 (73.5045) lr 7.8853e-06 eta 0:01:11 +epoch [50/50] batch [425/500] time 0.877 (0.888) data 0.000 (0.002) loss 0.3677 (1.0458) acc 90.6250 (73.5809) lr 7.8853e-06 eta 0:01:06 +epoch [50/50] batch [430/500] time 0.909 (0.888) data 0.000 (0.002) loss 1.3506 (1.0465) acc 71.8750 (73.5756) lr 7.8853e-06 eta 0:01:02 +epoch [50/50] batch [435/500] time 0.851 (0.888) data 0.000 (0.002) loss 1.3906 (1.0462) acc 56.2500 (73.5632) lr 7.8853e-06 eta 0:00:57 +epoch [50/50] batch [440/500] time 0.895 (0.887) data 0.000 (0.002) loss 0.6631 (1.0476) acc 71.8750 (73.5440) lr 7.8853e-06 eta 0:00:53 +epoch [50/50] batch [445/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.2051 (1.0457) acc 68.7500 (73.5885) lr 7.8853e-06 eta 0:00:48 +epoch [50/50] batch [450/500] time 0.911 (0.887) data 0.000 (0.002) loss 1.3496 (1.0489) acc 71.8750 (73.5694) lr 7.8853e-06 eta 0:00:44 +epoch [50/50] batch [455/500] time 0.876 (0.887) data 0.000 (0.002) loss 0.7402 (1.0482) acc 75.0000 (73.5646) lr 7.8853e-06 eta 0:00:39 +epoch [50/50] batch [460/500] time 0.918 (0.887) data 0.000 (0.002) loss 1.0596 (1.0478) acc 75.0000 (73.5462) lr 7.8853e-06 eta 0:00:35 +epoch [50/50] batch [465/500] time 0.885 (0.887) data 0.000 (0.002) loss 0.7373 (1.0466) acc 84.3750 (73.5954) lr 7.8853e-06 eta 0:00:31 +epoch [50/50] batch [470/500] time 0.886 (0.887) data 0.000 (0.002) loss 1.7842 (1.0493) acc 71.8750 (73.5638) lr 7.8853e-06 eta 0:00:26 +epoch [50/50] batch [475/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.1602 (1.0486) acc 81.2500 (73.6382) lr 7.8853e-06 eta 0:00:22 +epoch [50/50] batch [480/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.0117 (1.0495) acc 78.1250 (73.6589) lr 7.8853e-06 eta 0:00:17 +epoch [50/50] batch [485/500] time 0.923 (0.887) data 0.000 (0.002) loss 0.7983 (1.0481) acc 78.1250 (73.7242) lr 7.8853e-06 eta 0:00:13 +epoch [50/50] batch [490/500] time 0.880 (0.887) data 0.000 (0.002) loss 0.7788 (1.0482) acc 75.0000 (73.6990) lr 7.8853e-06 eta 0:00:08 +epoch [50/50] batch [495/500] time 0.884 (0.887) data 0.000 (0.002) loss 0.9722 (1.0476) acc 78.1250 (73.7311) lr 7.8853e-06 eta 0:00:04 +epoch [50/50] batch [500/500] time 0.875 (0.887) data 0.000 (0.002) loss 0.8242 (1.0479) acc 78.1250 (73.7250) lr 1.9733e-06 eta 0:00:00 +Checkpoint saved to output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 +Finish training +Deploy the last-epoch model +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 38,975 +* accuracy: 78.0% +* error: 22.0% +* macro_f1: 77.5% +Elapsed: 6:13:37 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint new file mode 100644 index 00000000..a9d493d3 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint @@ -0,0 +1 @@ +model.pth.tar-50 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 new file mode 100644 index 00000000..e772079e Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1698948244.ckb-gpu-a.998015.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1698948244.ckb-gpu-a.998015.0 new file mode 100644 index 00000000..b46f9f39 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1698948244.ckb-gpu-a.998015.0 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/log.txt new file mode 100644 index 00000000..bea00dc2 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/log.txt @@ -0,0 +1,5342 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_l14_ep50.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '16'] +output_dir: output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 2 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 16 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-L/14 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 50 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0 +Clang version: 10.0.0-4ubuntu1 +CMake version: version 3.16.3 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-166-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: NVIDIA A100-SXM4-40GB +GPU 1: NVIDIA A100-SXM4-40GB +GPU 2: NVIDIA A100-SXM4-40GB +GPU 3: NVIDIA A100-SXM4-40GB + +Nvidia driver version: 525.125.06 +cuDNN version: Probably one of the following: +/usr/lib/x86_64-linux-gnu/libcudnn.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.9.5 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 43 bits physical, 48 bits virtual +CPU(s): 256 +On-line CPU(s) list: 0-255 +Thread(s) per core: 2 +Core(s) per socket: 64 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: AuthenticAMD +CPU family: 23 +Model: 49 +Model name: AMD EPYC 7H12 64-Core Processor +Stepping: 0 +Frequency boost: enabled +CPU MHz: 1579.755 +CPU max MHz: 2600.0000 +CPU min MHz: 1500.0000 +BogoMIPS: 5200.20 +Virtualization: AMD-V +L1d cache: 4 MiB +L1i cache: 4 MiB +L2 cache: 64 MiB +L3 cache: 512 MiB +NUMA node0 CPU(s): 0-63,128-191 +NUMA node1 CPU(s): 64-127,192-255 +Vulnerability Gather data sampling: Not affected +Vulnerability Itlb multihit: Not affected +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Mmio stale data: Not affected +Vulnerability Retbleed: Vulnerable +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Retpolines, IBPB conditional, IBRS_FW, STIBP conditional, RSB filling, PBRSB-eIBRS Not affected +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Not affected +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca sme sev sev_es + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Loading preprocessed few-shot data from /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_16-seed_2.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 16,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-L/14) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/tensorboard) +epoch [1/50] batch [5/500] time 0.884 (1.684) data 0.000 (0.139) loss 2.6719 (3.0027) acc 50.0000 (44.3750) lr 1.0000e-05 eta 11:41:19 +epoch [1/50] batch [10/500] time 0.876 (1.282) data 0.000 (0.069) loss 2.1621 (2.6791) acc 53.1250 (47.5000) lr 1.0000e-05 eta 8:54:02 +epoch [1/50] batch [15/500] time 0.893 (1.150) data 0.000 (0.046) loss 2.3926 (2.6344) acc 53.1250 (47.5000) lr 1.0000e-05 eta 7:58:41 +epoch [1/50] batch [20/500] time 0.885 (1.084) data 0.000 (0.035) loss 2.6055 (2.5419) acc 40.6250 (48.4375) lr 1.0000e-05 eta 7:31:16 +epoch [1/50] batch [25/500] time 0.917 (1.047) data 0.000 (0.028) loss 2.2500 (2.4515) acc 46.8750 (49.6250) lr 1.0000e-05 eta 7:15:56 +epoch [1/50] batch [30/500] time 0.900 (1.019) data 0.000 (0.023) loss 1.9385 (2.4207) acc 56.2500 (49.8958) lr 1.0000e-05 eta 7:04:08 +epoch [1/50] batch [35/500] time 0.915 (1.000) data 0.000 (0.020) loss 1.7520 (2.3321) acc 56.2500 (51.2500) lr 1.0000e-05 eta 6:56:12 +epoch [1/50] batch [40/500] time 0.872 (0.986) data 0.000 (0.018) loss 2.2129 (2.2769) acc 50.0000 (51.8750) lr 1.0000e-05 eta 6:50:14 +epoch [1/50] batch [45/500] time 0.885 (0.974) data 0.000 (0.016) loss 1.5869 (2.2609) acc 59.3750 (52.2917) lr 1.0000e-05 eta 6:45:00 +epoch [1/50] batch [50/500] time 0.880 (0.965) data 0.000 (0.014) loss 1.3604 (2.2211) acc 71.8750 (53.1250) lr 1.0000e-05 eta 6:41:14 +epoch [1/50] batch [55/500] time 0.856 (0.957) data 0.000 (0.013) loss 2.5371 (2.1803) acc 40.6250 (53.8068) lr 1.0000e-05 eta 6:38:02 +epoch [1/50] batch [60/500] time 0.887 (0.951) data 0.000 (0.012) loss 1.6885 (2.1439) acc 59.3750 (54.4792) lr 1.0000e-05 eta 6:35:27 +epoch [1/50] batch [65/500] time 0.883 (0.945) data 0.000 (0.011) loss 1.3691 (2.1062) acc 71.8750 (55.2404) lr 1.0000e-05 eta 6:32:45 +epoch [1/50] batch [70/500] time 0.848 (0.940) data 0.000 (0.010) loss 1.3203 (2.0913) acc 68.7500 (55.5804) lr 1.0000e-05 eta 6:30:28 +epoch [1/50] batch [75/500] time 0.862 (0.934) data 0.000 (0.009) loss 1.7529 (2.0629) acc 56.2500 (55.7917) lr 1.0000e-05 eta 6:28:11 +epoch [1/50] batch [80/500] time 0.876 (0.930) data 0.000 (0.009) loss 1.4912 (2.0368) acc 68.7500 (56.4453) lr 1.0000e-05 eta 6:26:25 +epoch [1/50] batch [85/500] time 0.887 (0.928) data 0.000 (0.008) loss 2.0371 (2.0134) acc 40.6250 (56.5809) lr 1.0000e-05 eta 6:25:19 +epoch [1/50] batch [90/500] time 0.862 (0.925) data 0.000 (0.008) loss 1.4316 (2.0049) acc 59.3750 (56.4236) lr 1.0000e-05 eta 6:24:00 +epoch [1/50] batch [95/500] time 0.893 (0.923) data 0.000 (0.008) loss 0.9624 (1.9725) acc 75.0000 (56.9079) lr 1.0000e-05 eta 6:22:57 +epoch [1/50] batch [100/500] time 0.894 (0.920) data 0.000 (0.007) loss 1.8184 (1.9715) acc 59.3750 (57.0625) lr 1.0000e-05 eta 6:21:59 +epoch [1/50] batch [105/500] time 0.876 (0.918) data 0.000 (0.007) loss 1.8506 (1.9651) acc 59.3750 (57.4107) lr 1.0000e-05 eta 6:20:59 +epoch [1/50] batch [110/500] time 0.896 (0.917) data 0.000 (0.007) loss 1.3242 (1.9542) acc 75.0000 (57.5852) lr 1.0000e-05 eta 6:20:24 +epoch [1/50] batch [115/500] time 0.888 (0.916) data 0.000 (0.006) loss 2.3340 (1.9418) acc 56.2500 (57.7717) lr 1.0000e-05 eta 6:19:46 +epoch [1/50] batch [120/500] time 0.872 (0.915) data 0.000 (0.006) loss 2.0195 (1.9297) acc 46.8750 (58.0469) lr 1.0000e-05 eta 6:19:20 +epoch [1/50] batch [125/500] time 0.865 (0.913) data 0.000 (0.006) loss 1.2373 (1.9106) acc 71.8750 (58.4250) lr 1.0000e-05 eta 6:18:40 +epoch [1/50] batch [130/500] time 0.864 (0.912) data 0.000 (0.006) loss 1.9961 (1.9037) acc 59.3750 (58.5817) lr 1.0000e-05 eta 6:17:57 +epoch [1/50] batch [135/500] time 0.858 (0.910) data 0.001 (0.005) loss 1.7275 (1.8902) acc 56.2500 (58.7963) lr 1.0000e-05 eta 6:17:13 +epoch [1/50] batch [140/500] time 0.878 (0.909) data 0.000 (0.005) loss 2.3594 (1.8775) acc 53.1250 (58.9509) lr 1.0000e-05 eta 6:16:32 +epoch [1/50] batch [145/500] time 0.881 (0.908) data 0.000 (0.005) loss 1.8564 (1.8715) acc 56.2500 (58.8793) lr 1.0000e-05 eta 6:16:12 +epoch [1/50] batch [150/500] time 0.861 (0.907) data 0.000 (0.005) loss 1.9980 (1.8628) acc 53.1250 (59.1667) lr 1.0000e-05 eta 6:15:38 +epoch [1/50] batch [155/500] time 0.878 (0.906) data 0.000 (0.005) loss 2.0977 (1.8558) acc 56.2500 (59.3347) lr 1.0000e-05 eta 6:15:17 +epoch [1/50] batch [160/500] time 0.861 (0.906) data 0.000 (0.005) loss 1.4668 (1.8541) acc 56.2500 (59.1797) lr 1.0000e-05 eta 6:15:08 +epoch [1/50] batch [165/500] time 0.874 (0.906) data 0.000 (0.004) loss 1.7119 (1.8491) acc 53.1250 (59.1856) lr 1.0000e-05 eta 6:14:57 +epoch [1/50] batch [170/500] time 0.880 (0.906) data 0.000 (0.004) loss 1.7588 (1.8374) acc 62.5000 (59.3566) lr 1.0000e-05 eta 6:14:48 +epoch [1/50] batch [175/500] time 0.918 (0.906) data 0.000 (0.004) loss 2.0781 (1.8385) acc 59.3750 (59.4107) lr 1.0000e-05 eta 6:14:45 +epoch [1/50] batch [180/500] time 0.889 (0.905) data 0.000 (0.004) loss 0.9956 (1.8300) acc 75.0000 (59.5486) lr 1.0000e-05 eta 6:14:28 +epoch [1/50] batch [185/500] time 0.900 (0.905) data 0.000 (0.004) loss 1.1094 (1.8253) acc 65.6250 (59.6284) lr 1.0000e-05 eta 6:14:10 +epoch [1/50] batch [190/500] time 0.885 (0.904) data 0.000 (0.004) loss 1.1211 (1.8156) acc 71.8750 (59.7697) lr 1.0000e-05 eta 6:13:44 +epoch [1/50] batch [195/500] time 0.864 (0.903) data 0.000 (0.004) loss 2.0332 (1.8151) acc 62.5000 (59.8237) lr 1.0000e-05 eta 6:13:21 +epoch [1/50] batch [200/500] time 0.876 (0.903) data 0.000 (0.004) loss 1.7812 (1.8054) acc 65.6250 (59.9844) lr 1.0000e-05 eta 6:13:03 +epoch [1/50] batch [205/500] time 0.897 (0.903) data 0.000 (0.004) loss 1.7979 (1.7951) acc 62.5000 (60.0762) lr 1.0000e-05 eta 6:13:11 +epoch [1/50] batch [210/500] time 0.896 (0.903) data 0.000 (0.004) loss 1.6221 (1.7956) acc 62.5000 (60.0595) lr 1.0000e-05 eta 6:13:01 +epoch [1/50] batch [215/500] time 0.903 (0.903) data 0.000 (0.003) loss 1.2305 (1.7844) acc 65.6250 (60.2762) lr 1.0000e-05 eta 6:12:52 +epoch [1/50] batch [220/500] time 0.885 (0.903) data 0.000 (0.003) loss 2.3125 (1.7787) acc 56.2500 (60.4403) lr 1.0000e-05 eta 6:12:46 +epoch [1/50] batch [225/500] time 0.911 (0.902) data 0.000 (0.003) loss 1.5068 (1.7711) acc 65.6250 (60.5556) lr 1.0000e-05 eta 6:12:31 +epoch [1/50] batch [230/500] time 0.877 (0.902) data 0.000 (0.003) loss 1.1816 (1.7646) acc 75.0000 (60.7337) lr 1.0000e-05 eta 6:12:12 +epoch [1/50] batch [235/500] time 0.911 (0.902) data 0.000 (0.003) loss 1.4854 (1.7569) acc 62.5000 (60.8112) lr 1.0000e-05 eta 6:12:06 +epoch [1/50] batch [240/500] time 0.873 (0.902) data 0.000 (0.003) loss 1.3838 (1.7569) acc 68.7500 (60.8854) lr 1.0000e-05 eta 6:12:01 +epoch [1/50] batch [245/500] time 0.911 (0.901) data 0.000 (0.003) loss 1.0498 (1.7544) acc 75.0000 (60.9184) lr 1.0000e-05 eta 6:11:50 +epoch [1/50] batch [250/500] time 0.880 (0.901) data 0.000 (0.003) loss 1.2861 (1.7467) acc 59.3750 (61.0000) lr 1.0000e-05 eta 6:11:36 +epoch [1/50] batch [255/500] time 0.893 (0.901) data 0.000 (0.003) loss 1.6689 (1.7407) acc 71.8750 (61.0907) lr 1.0000e-05 eta 6:11:23 +epoch [1/50] batch [260/500] time 0.877 (0.900) data 0.000 (0.003) loss 1.8457 (1.7376) acc 68.7500 (61.2260) lr 1.0000e-05 eta 6:11:15 +epoch [1/50] batch [265/500] time 0.894 (0.900) data 0.000 (0.003) loss 2.2324 (1.7313) acc 46.8750 (61.3679) lr 1.0000e-05 eta 6:11:07 +epoch [1/50] batch [270/500] time 0.903 (0.900) data 0.000 (0.003) loss 1.3438 (1.7195) acc 71.8750 (61.6204) lr 1.0000e-05 eta 6:10:55 +epoch [1/50] batch [275/500] time 0.866 (0.900) data 0.000 (0.003) loss 1.4639 (1.7159) acc 68.7500 (61.6591) lr 1.0000e-05 eta 6:10:41 +epoch [1/50] batch [280/500] time 0.889 (0.899) data 0.000 (0.003) loss 1.2188 (1.7089) acc 65.6250 (61.7969) lr 1.0000e-05 eta 6:10:30 +epoch [1/50] batch [285/500] time 0.867 (0.899) data 0.000 (0.003) loss 1.1113 (1.7032) acc 71.8750 (61.9408) lr 1.0000e-05 eta 6:10:17 +epoch [1/50] batch [290/500] time 0.906 (0.899) data 0.000 (0.003) loss 1.0664 (1.6993) acc 65.6250 (61.9612) lr 1.0000e-05 eta 6:10:04 +epoch [1/50] batch [295/500] time 0.863 (0.898) data 0.000 (0.003) loss 1.1455 (1.6939) acc 65.6250 (62.0339) lr 1.0000e-05 eta 6:09:52 +epoch [1/50] batch [300/500] time 0.866 (0.898) data 0.000 (0.003) loss 1.6172 (1.6872) acc 56.2500 (62.0208) lr 1.0000e-05 eta 6:09:39 +epoch [1/50] batch [305/500] time 0.871 (0.898) data 0.000 (0.003) loss 1.3730 (1.6857) acc 71.8750 (62.0492) lr 1.0000e-05 eta 6:09:35 +epoch [1/50] batch [310/500] time 0.872 (0.898) data 0.000 (0.002) loss 0.8818 (1.6798) acc 78.1250 (62.1774) lr 1.0000e-05 eta 6:09:26 +epoch [1/50] batch [315/500] time 0.906 (0.898) data 0.000 (0.002) loss 1.0029 (1.6796) acc 65.6250 (62.1726) lr 1.0000e-05 eta 6:09:15 +epoch [1/50] batch [320/500] time 0.880 (0.897) data 0.000 (0.002) loss 1.0869 (1.6703) acc 65.6250 (62.3242) lr 1.0000e-05 eta 6:09:09 +epoch [1/50] batch [325/500] time 0.877 (0.897) data 0.000 (0.002) loss 1.5264 (1.6672) acc 65.6250 (62.3942) lr 1.0000e-05 eta 6:08:55 +epoch [1/50] batch [330/500] time 0.883 (0.897) data 0.001 (0.002) loss 1.6621 (1.6667) acc 59.3750 (62.3674) lr 1.0000e-05 eta 6:08:44 +epoch [1/50] batch [335/500] time 0.916 (0.897) data 0.000 (0.002) loss 1.8838 (1.6611) acc 65.6250 (62.4347) lr 1.0000e-05 eta 6:08:43 +epoch [1/50] batch [340/500] time 0.915 (0.897) data 0.000 (0.002) loss 0.6094 (1.6591) acc 87.5000 (62.5184) lr 1.0000e-05 eta 6:08:38 +epoch [1/50] batch [345/500] time 0.887 (0.897) data 0.000 (0.002) loss 1.0771 (1.6564) acc 71.8750 (62.5634) lr 1.0000e-05 eta 6:08:34 +epoch [1/50] batch [350/500] time 0.912 (0.897) data 0.000 (0.002) loss 1.5244 (1.6549) acc 68.7500 (62.5982) lr 1.0000e-05 eta 6:08:35 +epoch [1/50] batch [355/500] time 0.861 (0.897) data 0.000 (0.002) loss 1.2002 (1.6517) acc 71.8750 (62.6408) lr 1.0000e-05 eta 6:08:29 +epoch [1/50] batch [360/500] time 0.871 (0.897) data 0.000 (0.002) loss 1.1689 (1.6477) acc 71.8750 (62.6736) lr 1.0000e-05 eta 6:08:14 +epoch [1/50] batch [365/500] time 0.888 (0.896) data 0.000 (0.002) loss 1.4883 (1.6436) acc 59.3750 (62.6712) lr 1.0000e-05 eta 6:08:04 +epoch [1/50] batch [370/500] time 0.905 (0.896) data 0.000 (0.002) loss 1.4648 (1.6414) acc 59.3750 (62.7196) lr 1.0000e-05 eta 6:07:58 +epoch [1/50] batch [375/500] time 0.911 (0.896) data 0.000 (0.002) loss 2.1250 (1.6374) acc 53.1250 (62.7500) lr 1.0000e-05 eta 6:07:51 +epoch [1/50] batch [380/500] time 0.879 (0.896) data 0.000 (0.002) loss 1.3408 (1.6347) acc 75.0000 (62.7632) lr 1.0000e-05 eta 6:07:42 +epoch [1/50] batch [385/500] time 0.875 (0.896) data 0.000 (0.002) loss 1.7363 (1.6326) acc 62.5000 (62.8084) lr 1.0000e-05 eta 6:07:33 +epoch [1/50] batch [390/500] time 0.890 (0.896) data 0.000 (0.002) loss 1.1152 (1.6298) acc 71.8750 (62.9407) lr 1.0000e-05 eta 6:07:23 +epoch [1/50] batch [395/500] time 0.884 (0.896) data 0.000 (0.002) loss 0.9424 (1.6234) acc 71.8750 (63.0617) lr 1.0000e-05 eta 6:07:18 +epoch [1/50] batch [400/500] time 0.900 (0.896) data 0.001 (0.002) loss 1.6748 (1.6199) acc 59.3750 (63.1797) lr 1.0000e-05 eta 6:07:14 +epoch [1/50] batch [405/500] time 0.867 (0.896) data 0.000 (0.002) loss 0.9951 (1.6166) acc 59.3750 (63.1944) lr 1.0000e-05 eta 6:07:08 +epoch [1/50] batch [410/500] time 0.884 (0.895) data 0.000 (0.002) loss 1.7021 (1.6145) acc 62.5000 (63.2088) lr 1.0000e-05 eta 6:06:58 +epoch [1/50] batch [415/500] time 0.902 (0.895) data 0.000 (0.002) loss 1.7559 (1.6136) acc 62.5000 (63.2756) lr 1.0000e-05 eta 6:06:50 +epoch [1/50] batch [420/500] time 0.857 (0.895) data 0.000 (0.002) loss 1.8350 (1.6125) acc 53.1250 (63.2812) lr 1.0000e-05 eta 6:06:38 +epoch [1/50] batch [425/500] time 0.880 (0.895) data 0.000 (0.002) loss 1.9307 (1.6128) acc 62.5000 (63.2868) lr 1.0000e-05 eta 6:06:31 +epoch [1/50] batch [430/500] time 0.918 (0.895) data 0.000 (0.002) loss 1.2988 (1.6113) acc 75.0000 (63.3285) lr 1.0000e-05 eta 6:06:25 +epoch [1/50] batch [435/500] time 0.905 (0.895) data 0.000 (0.002) loss 1.1514 (1.6075) acc 71.8750 (63.4124) lr 1.0000e-05 eta 6:06:20 +epoch [1/50] batch [440/500] time 0.881 (0.895) data 0.000 (0.002) loss 1.1328 (1.6046) acc 71.8750 (63.4588) lr 1.0000e-05 eta 6:06:12 +epoch [1/50] batch [445/500] time 0.991 (0.895) data 0.000 (0.002) loss 1.1445 (1.6021) acc 75.0000 (63.5253) lr 1.0000e-05 eta 6:06:11 +epoch [1/50] batch [450/500] time 0.873 (0.895) data 0.000 (0.002) loss 2.3711 (1.6010) acc 56.2500 (63.5278) lr 1.0000e-05 eta 6:06:03 +epoch [1/50] batch [455/500] time 0.886 (0.895) data 0.000 (0.002) loss 1.4912 (1.5982) acc 56.2500 (63.5920) lr 1.0000e-05 eta 6:05:55 +epoch [1/50] batch [460/500] time 0.926 (0.895) data 0.000 (0.002) loss 1.8564 (1.5994) acc 56.2500 (63.5938) lr 1.0000e-05 eta 6:05:53 +epoch [1/50] batch [465/500] time 0.866 (0.894) data 0.000 (0.002) loss 0.8276 (1.5967) acc 75.0000 (63.6156) lr 1.0000e-05 eta 6:05:45 +epoch [1/50] batch [470/500] time 0.885 (0.894) data 0.000 (0.002) loss 1.4160 (1.5934) acc 71.8750 (63.6636) lr 1.0000e-05 eta 6:05:41 +epoch [1/50] batch [475/500] time 0.871 (0.894) data 0.000 (0.002) loss 1.5449 (1.5898) acc 71.8750 (63.7434) lr 1.0000e-05 eta 6:05:33 +epoch [1/50] batch [480/500] time 0.909 (0.894) data 0.000 (0.002) loss 2.2285 (1.5881) acc 56.2500 (63.7695) lr 1.0000e-05 eta 6:05:28 +epoch [1/50] batch [485/500] time 0.898 (0.894) data 0.001 (0.002) loss 1.3701 (1.5830) acc 75.0000 (63.8595) lr 1.0000e-05 eta 6:05:24 +epoch [1/50] batch [490/500] time 0.905 (0.895) data 0.000 (0.002) loss 1.2080 (1.5812) acc 68.7500 (63.8776) lr 1.0000e-05 eta 6:05:25 +epoch [1/50] batch [495/500] time 0.876 (0.895) data 0.000 (0.002) loss 1.5107 (1.5811) acc 65.6250 (63.8321) lr 1.0000e-05 eta 6:05:22 +epoch [1/50] batch [500/500] time 0.901 (0.894) data 0.000 (0.002) loss 1.5723 (1.5794) acc 65.6250 (63.8563) lr 2.0000e-03 eta 6:05:14 +epoch [2/50] batch [5/500] time 0.894 (1.067) data 0.000 (0.168) loss 1.4189 (1.6854) acc 65.6250 (61.8750) lr 2.0000e-03 eta 7:15:33 +epoch [2/50] batch [10/500] time 0.870 (0.976) data 0.000 (0.084) loss 1.4326 (1.5487) acc 53.1250 (61.8750) lr 2.0000e-03 eta 6:38:20 +epoch [2/50] batch [15/500] time 0.897 (0.945) data 0.000 (0.056) loss 1.7578 (1.5940) acc 59.3750 (62.5000) lr 2.0000e-03 eta 6:25:44 +epoch [2/50] batch [20/500] time 0.860 (0.930) data 0.000 (0.042) loss 1.5996 (1.5005) acc 71.8750 (64.8438) lr 2.0000e-03 eta 6:19:22 +epoch [2/50] batch [25/500] time 0.883 (0.922) data 0.000 (0.034) loss 0.9883 (1.4500) acc 75.0000 (65.3750) lr 2.0000e-03 eta 6:15:58 +epoch [2/50] batch [30/500] time 0.875 (0.918) data 0.000 (0.028) loss 2.1055 (1.4653) acc 56.2500 (65.0000) lr 2.0000e-03 eta 6:14:25 +epoch [2/50] batch [35/500] time 0.863 (0.917) data 0.000 (0.024) loss 1.6338 (1.4829) acc 62.5000 (64.6429) lr 2.0000e-03 eta 6:13:44 +epoch [2/50] batch [40/500] time 0.905 (0.912) data 0.000 (0.021) loss 1.2402 (1.4601) acc 84.3750 (65.5469) lr 2.0000e-03 eta 6:11:58 +epoch [2/50] batch [45/500] time 0.933 (0.912) data 0.000 (0.019) loss 1.1172 (1.4590) acc 65.6250 (65.4167) lr 2.0000e-03 eta 6:11:39 +epoch [2/50] batch [50/500] time 0.867 (0.908) data 0.000 (0.017) loss 1.3027 (1.4314) acc 62.5000 (65.4375) lr 2.0000e-03 eta 6:10:08 +epoch [2/50] batch [55/500] time 0.871 (0.905) data 0.000 (0.016) loss 1.7744 (1.4307) acc 59.3750 (64.9432) lr 2.0000e-03 eta 6:08:44 +epoch [2/50] batch [60/500] time 0.881 (0.904) data 0.000 (0.014) loss 1.1621 (1.4316) acc 68.7500 (65.3125) lr 2.0000e-03 eta 6:08:09 +epoch [2/50] batch [65/500] time 0.893 (0.903) data 0.000 (0.013) loss 1.0371 (1.4093) acc 75.0000 (65.7692) lr 2.0000e-03 eta 6:07:50 +epoch [2/50] batch [70/500] time 0.908 (0.903) data 0.000 (0.012) loss 1.4082 (1.3887) acc 65.6250 (66.3393) lr 2.0000e-03 eta 6:07:49 +epoch [2/50] batch [75/500] time 0.880 (0.902) data 0.000 (0.011) loss 0.9438 (1.3539) acc 68.7500 (67.0000) lr 2.0000e-03 eta 6:07:20 +epoch [2/50] batch [80/500] time 0.879 (0.902) data 0.000 (0.011) loss 1.6768 (1.3735) acc 59.3750 (66.4844) lr 2.0000e-03 eta 6:06:54 +epoch [2/50] batch [85/500] time 0.885 (0.900) data 0.000 (0.010) loss 1.0859 (1.3614) acc 71.8750 (66.7647) lr 2.0000e-03 eta 6:06:22 +epoch [2/50] batch [90/500] time 0.876 (0.900) data 0.000 (0.010) loss 0.5703 (1.3541) acc 78.1250 (66.5972) lr 2.0000e-03 eta 6:06:00 +epoch [2/50] batch [95/500] time 0.898 (0.899) data 0.000 (0.009) loss 0.7100 (1.3496) acc 78.1250 (66.7105) lr 2.0000e-03 eta 6:05:41 +epoch [2/50] batch [100/500] time 0.898 (0.898) data 0.000 (0.009) loss 0.6499 (1.3423) acc 87.5000 (66.9688) lr 2.0000e-03 eta 6:05:18 +epoch [2/50] batch [105/500] time 0.864 (0.897) data 0.000 (0.008) loss 1.7461 (1.3455) acc 62.5000 (67.0536) lr 2.0000e-03 eta 6:04:44 +epoch [2/50] batch [110/500] time 0.881 (0.897) data 0.000 (0.008) loss 1.3076 (1.3536) acc 71.8750 (66.9318) lr 2.0000e-03 eta 6:04:33 +epoch [2/50] batch [115/500] time 0.877 (0.896) data 0.000 (0.008) loss 1.2090 (1.3473) acc 62.5000 (66.9022) lr 2.0000e-03 eta 6:04:17 +epoch [2/50] batch [120/500] time 0.870 (0.895) data 0.000 (0.007) loss 0.9180 (1.3439) acc 71.8750 (66.9531) lr 2.0000e-03 eta 6:03:44 +epoch [2/50] batch [125/500] time 0.859 (0.894) data 0.000 (0.007) loss 1.0654 (1.3375) acc 78.1250 (67.0500) lr 2.0000e-03 eta 6:03:13 +epoch [2/50] batch [130/500] time 0.884 (0.894) data 0.000 (0.007) loss 1.4600 (1.3363) acc 65.6250 (66.9952) lr 2.0000e-03 eta 6:03:01 +epoch [2/50] batch [135/500] time 0.896 (0.895) data 0.000 (0.006) loss 0.8970 (1.3313) acc 81.2500 (67.0833) lr 2.0000e-03 eta 6:03:27 +epoch [2/50] batch [140/500] time 0.874 (0.895) data 0.000 (0.006) loss 1.0986 (1.3257) acc 62.5000 (67.1429) lr 2.0000e-03 eta 6:03:16 +epoch [2/50] batch [145/500] time 0.913 (0.895) data 0.000 (0.006) loss 1.0605 (1.3214) acc 71.8750 (67.2845) lr 2.0000e-03 eta 6:03:10 +epoch [2/50] batch [150/500] time 0.915 (0.895) data 0.000 (0.006) loss 1.1279 (1.3116) acc 68.7500 (67.5000) lr 2.0000e-03 eta 6:03:15 +epoch [2/50] batch [155/500] time 0.874 (0.895) data 0.000 (0.006) loss 0.9062 (1.3007) acc 81.2500 (67.7016) lr 2.0000e-03 eta 6:03:08 +epoch [2/50] batch [160/500] time 0.901 (0.895) data 0.000 (0.006) loss 1.3555 (1.3097) acc 71.8750 (67.5977) lr 2.0000e-03 eta 6:02:55 +epoch [2/50] batch [165/500] time 0.880 (0.894) data 0.000 (0.005) loss 1.0547 (1.3168) acc 65.6250 (67.4432) lr 2.0000e-03 eta 6:02:46 +epoch [2/50] batch [170/500] time 0.908 (0.894) data 0.000 (0.005) loss 1.0098 (1.3141) acc 71.8750 (67.5551) lr 2.0000e-03 eta 6:02:32 +epoch [2/50] batch [175/500] time 0.871 (0.894) data 0.000 (0.005) loss 1.7373 (1.3198) acc 62.5000 (67.4286) lr 2.0000e-03 eta 6:02:21 +epoch [2/50] batch [180/500] time 0.895 (0.895) data 0.000 (0.005) loss 1.4453 (1.3108) acc 59.3750 (67.6042) lr 2.0000e-03 eta 6:02:35 +epoch [2/50] batch [185/500] time 0.897 (0.894) data 0.000 (0.005) loss 1.1943 (1.3051) acc 81.2500 (67.7534) lr 2.0000e-03 eta 6:02:28 +epoch [2/50] batch [190/500] time 0.863 (0.894) data 0.000 (0.005) loss 0.8296 (1.3013) acc 68.7500 (67.7303) lr 2.0000e-03 eta 6:02:08 +epoch [2/50] batch [195/500] time 0.866 (0.893) data 0.000 (0.005) loss 1.4150 (1.3035) acc 62.5000 (67.6923) lr 2.0000e-03 eta 6:01:51 +epoch [2/50] batch [200/500] time 0.892 (0.893) data 0.000 (0.004) loss 1.6357 (1.2997) acc 68.7500 (67.7344) lr 2.0000e-03 eta 6:01:39 +epoch [2/50] batch [205/500] time 0.909 (0.893) data 0.000 (0.004) loss 0.9722 (1.2944) acc 78.1250 (67.9268) lr 2.0000e-03 eta 6:01:44 +epoch [2/50] batch [210/500] time 0.891 (0.893) data 0.000 (0.004) loss 1.0088 (1.2909) acc 71.8750 (67.9613) lr 2.0000e-03 eta 6:01:40 +epoch [2/50] batch [215/500] time 0.884 (0.893) data 0.000 (0.004) loss 1.6562 (1.2892) acc 68.7500 (67.9797) lr 2.0000e-03 eta 6:01:28 +epoch [2/50] batch [220/500] time 0.874 (0.893) data 0.000 (0.004) loss 1.1094 (1.2891) acc 71.8750 (67.9545) lr 2.0000e-03 eta 6:01:17 +epoch [2/50] batch [225/500] time 0.875 (0.893) data 0.000 (0.004) loss 1.2676 (1.2870) acc 71.8750 (68.0000) lr 2.0000e-03 eta 6:01:11 +epoch [2/50] batch [230/500] time 0.866 (0.893) data 0.000 (0.004) loss 1.7168 (1.2925) acc 65.6250 (67.9484) lr 2.0000e-03 eta 6:01:03 +epoch [2/50] batch [235/500] time 0.863 (0.892) data 0.000 (0.004) loss 0.9351 (1.2870) acc 62.5000 (67.9920) lr 2.0000e-03 eta 6:00:46 +epoch [2/50] batch [240/500] time 0.910 (0.892) data 0.000 (0.004) loss 1.3633 (1.2803) acc 56.2500 (68.1250) lr 2.0000e-03 eta 6:00:37 +epoch [2/50] batch [245/500] time 0.888 (0.892) data 0.000 (0.004) loss 1.0859 (1.2775) acc 68.7500 (68.1760) lr 2.0000e-03 eta 6:00:28 +epoch [2/50] batch [250/500] time 0.888 (0.892) data 0.000 (0.004) loss 1.9092 (1.2815) acc 56.2500 (68.1250) lr 2.0000e-03 eta 6:00:22 +epoch [2/50] batch [255/500] time 0.881 (0.891) data 0.000 (0.004) loss 1.8711 (1.2801) acc 53.1250 (68.1740) lr 2.0000e-03 eta 6:00:10 +epoch [2/50] batch [260/500] time 0.899 (0.891) data 0.000 (0.003) loss 1.5557 (1.2814) acc 53.1250 (68.1370) lr 2.0000e-03 eta 6:00:09 +epoch [2/50] batch [265/500] time 0.897 (0.892) data 0.000 (0.003) loss 1.3291 (1.2801) acc 65.6250 (68.1250) lr 2.0000e-03 eta 6:00:06 +epoch [2/50] batch [270/500] time 0.886 (0.891) data 0.000 (0.003) loss 1.2949 (1.2835) acc 62.5000 (68.0787) lr 2.0000e-03 eta 5:59:54 +epoch [2/50] batch [275/500] time 0.991 (0.892) data 0.000 (0.003) loss 0.7656 (1.2793) acc 71.8750 (68.0909) lr 2.0000e-03 eta 5:59:59 +epoch [2/50] batch [280/500] time 0.875 (0.891) data 0.000 (0.003) loss 1.8750 (1.2792) acc 68.7500 (68.1696) lr 2.0000e-03 eta 5:59:49 +epoch [2/50] batch [285/500] time 0.894 (0.891) data 0.000 (0.003) loss 1.3086 (1.2793) acc 68.7500 (68.2127) lr 2.0000e-03 eta 5:59:44 +epoch [2/50] batch [290/500] time 0.875 (0.891) data 0.000 (0.003) loss 1.3516 (1.2761) acc 71.8750 (68.2651) lr 2.0000e-03 eta 5:59:42 +epoch [2/50] batch [295/500] time 0.886 (0.891) data 0.000 (0.003) loss 0.7388 (1.2731) acc 87.5000 (68.3792) lr 2.0000e-03 eta 5:59:36 +epoch [2/50] batch [300/500] time 0.871 (0.891) data 0.000 (0.003) loss 1.3262 (1.2765) acc 65.6250 (68.2917) lr 2.0000e-03 eta 5:59:33 +epoch [2/50] batch [305/500] time 0.908 (0.891) data 0.000 (0.003) loss 1.1758 (1.2767) acc 71.8750 (68.3504) lr 2.0000e-03 eta 5:59:29 +epoch [2/50] batch [310/500] time 0.910 (0.892) data 0.000 (0.003) loss 1.6562 (1.2778) acc 65.6250 (68.3367) lr 2.0000e-03 eta 5:59:26 +epoch [2/50] batch [315/500] time 0.891 (0.891) data 0.000 (0.003) loss 1.2373 (1.2780) acc 62.5000 (68.3433) lr 2.0000e-03 eta 5:59:16 +epoch [2/50] batch [320/500] time 0.886 (0.892) data 0.000 (0.003) loss 0.8784 (1.2777) acc 81.2500 (68.3887) lr 2.0000e-03 eta 5:59:18 +epoch [2/50] batch [325/500] time 0.887 (0.891) data 0.000 (0.003) loss 1.4268 (1.2784) acc 62.5000 (68.3558) lr 2.0000e-03 eta 5:59:06 +epoch [2/50] batch [330/500] time 0.902 (0.891) data 0.000 (0.003) loss 1.8887 (1.2775) acc 50.0000 (68.3617) lr 2.0000e-03 eta 5:59:03 +epoch [2/50] batch [335/500] time 0.886 (0.891) data 0.000 (0.003) loss 1.1836 (1.2758) acc 68.7500 (68.3675) lr 2.0000e-03 eta 5:59:00 +epoch [2/50] batch [340/500] time 0.889 (0.891) data 0.000 (0.003) loss 1.2471 (1.2753) acc 68.7500 (68.3915) lr 2.0000e-03 eta 5:58:53 +epoch [2/50] batch [345/500] time 0.884 (0.891) data 0.000 (0.003) loss 0.5664 (1.2715) acc 81.2500 (68.4783) lr 2.0000e-03 eta 5:58:47 +epoch [2/50] batch [350/500] time 0.892 (0.891) data 0.000 (0.003) loss 0.7305 (1.2673) acc 78.1250 (68.5714) lr 2.0000e-03 eta 5:58:45 +epoch [2/50] batch [355/500] time 0.884 (0.891) data 0.000 (0.003) loss 1.3516 (1.2673) acc 62.5000 (68.5651) lr 2.0000e-03 eta 5:58:39 +epoch [2/50] batch [360/500] time 0.891 (0.891) data 0.000 (0.003) loss 1.4941 (1.2686) acc 62.5000 (68.6111) lr 2.0000e-03 eta 5:58:33 +epoch [2/50] batch [365/500] time 0.928 (0.891) data 0.000 (0.003) loss 0.8340 (1.2696) acc 71.8750 (68.5873) lr 2.0000e-03 eta 5:58:33 +epoch [2/50] batch [370/500] time 0.871 (0.891) data 0.000 (0.003) loss 1.1338 (1.2699) acc 71.8750 (68.5980) lr 2.0000e-03 eta 5:58:27 +epoch [2/50] batch [375/500] time 0.880 (0.891) data 0.000 (0.002) loss 1.3359 (1.2704) acc 71.8750 (68.6250) lr 2.0000e-03 eta 5:58:19 +epoch [2/50] batch [380/500] time 0.887 (0.891) data 0.000 (0.002) loss 1.1104 (1.2686) acc 75.0000 (68.6842) lr 2.0000e-03 eta 5:58:14 +epoch [2/50] batch [385/500] time 0.855 (0.891) data 0.000 (0.002) loss 1.6289 (1.2686) acc 68.7500 (68.7338) lr 2.0000e-03 eta 5:58:07 +epoch [2/50] batch [390/500] time 0.877 (0.891) data 0.000 (0.002) loss 1.3213 (1.2684) acc 71.8750 (68.7580) lr 2.0000e-03 eta 5:58:01 +epoch [2/50] batch [395/500] time 0.878 (0.891) data 0.000 (0.002) loss 1.2852 (1.2667) acc 71.8750 (68.8370) lr 2.0000e-03 eta 5:57:51 +epoch [2/50] batch [400/500] time 0.879 (0.891) data 0.000 (0.002) loss 1.4473 (1.2657) acc 62.5000 (68.8516) lr 2.0000e-03 eta 5:57:42 +epoch [2/50] batch [405/500] time 0.900 (0.890) data 0.000 (0.002) loss 1.0469 (1.2644) acc 78.1250 (68.8966) lr 2.0000e-03 eta 5:57:34 +epoch [2/50] batch [410/500] time 0.881 (0.890) data 0.000 (0.002) loss 1.6055 (1.2619) acc 65.6250 (68.9329) lr 2.0000e-03 eta 5:57:25 +epoch [2/50] batch [415/500] time 0.886 (0.890) data 0.000 (0.002) loss 1.1709 (1.2599) acc 71.8750 (68.9910) lr 2.0000e-03 eta 5:57:17 +epoch [2/50] batch [420/500] time 0.881 (0.890) data 0.000 (0.002) loss 2.0020 (1.2623) acc 53.1250 (68.9211) lr 2.0000e-03 eta 5:57:14 +epoch [2/50] batch [425/500] time 0.874 (0.890) data 0.000 (0.002) loss 1.9160 (1.2647) acc 59.3750 (68.9118) lr 2.0000e-03 eta 5:57:07 +epoch [2/50] batch [430/500] time 0.861 (0.890) data 0.000 (0.002) loss 1.2490 (1.2653) acc 71.8750 (68.9026) lr 2.0000e-03 eta 5:57:00 +epoch [2/50] batch [435/500] time 0.890 (0.890) data 0.000 (0.002) loss 1.4600 (1.2654) acc 59.3750 (68.8865) lr 2.0000e-03 eta 5:56:55 +epoch [2/50] batch [440/500] time 0.913 (0.890) data 0.000 (0.002) loss 0.6128 (1.2657) acc 84.3750 (68.8778) lr 2.0000e-03 eta 5:56:51 +epoch [2/50] batch [445/500] time 0.877 (0.890) data 0.000 (0.002) loss 2.0859 (1.2681) acc 56.2500 (68.8483) lr 2.0000e-03 eta 5:56:46 +epoch [2/50] batch [450/500] time 0.908 (0.890) data 0.000 (0.002) loss 1.8877 (1.2673) acc 62.5000 (68.8889) lr 2.0000e-03 eta 5:56:41 +epoch [2/50] batch [455/500] time 0.861 (0.890) data 0.000 (0.002) loss 0.9854 (1.2624) acc 75.0000 (68.9766) lr 2.0000e-03 eta 5:56:30 +epoch [2/50] batch [460/500] time 0.916 (0.890) data 0.000 (0.002) loss 1.4199 (1.2619) acc 68.7500 (69.0217) lr 2.0000e-03 eta 5:56:28 +epoch [2/50] batch [465/500] time 0.893 (0.890) data 0.000 (0.002) loss 1.1875 (1.2636) acc 75.0000 (68.9785) lr 2.0000e-03 eta 5:56:26 +epoch [2/50] batch [470/500] time 0.880 (0.890) data 0.000 (0.002) loss 1.0596 (1.2608) acc 75.0000 (69.0293) lr 2.0000e-03 eta 5:56:22 +epoch [2/50] batch [475/500] time 0.895 (0.890) data 0.000 (0.002) loss 1.4434 (1.2598) acc 62.5000 (69.0197) lr 2.0000e-03 eta 5:56:17 +epoch [2/50] batch [480/500] time 0.895 (0.890) data 0.000 (0.002) loss 1.0225 (1.2617) acc 71.8750 (68.9648) lr 2.0000e-03 eta 5:56:14 +epoch [2/50] batch [485/500] time 0.909 (0.890) data 0.000 (0.002) loss 0.7793 (1.2619) acc 84.3750 (68.9755) lr 2.0000e-03 eta 5:56:10 +epoch [2/50] batch [490/500] time 0.883 (0.890) data 0.000 (0.002) loss 1.7412 (1.2619) acc 71.8750 (69.0242) lr 2.0000e-03 eta 5:56:02 +epoch [2/50] batch [495/500] time 0.880 (0.890) data 0.000 (0.002) loss 0.8931 (1.2625) acc 75.0000 (68.9899) lr 2.0000e-03 eta 5:55:58 +epoch [2/50] batch [500/500] time 0.884 (0.890) data 0.000 (0.002) loss 1.2881 (1.2626) acc 59.3750 (68.9750) lr 1.9980e-03 eta 5:55:51 +epoch [3/50] batch [5/500] time 0.854 (1.032) data 0.000 (0.130) loss 1.3584 (1.2938) acc 78.1250 (69.3750) lr 1.9980e-03 eta 6:52:49 +epoch [3/50] batch [10/500] time 0.890 (0.960) data 0.000 (0.065) loss 1.2217 (1.2583) acc 71.8750 (69.3750) lr 1.9980e-03 eta 6:23:44 +epoch [3/50] batch [15/500] time 0.884 (0.936) data 0.000 (0.043) loss 0.8198 (1.2198) acc 75.0000 (70.2083) lr 1.9980e-03 eta 6:14:00 +epoch [3/50] batch [20/500] time 0.906 (0.922) data 0.000 (0.033) loss 1.5664 (1.2576) acc 65.6250 (69.2188) lr 1.9980e-03 eta 6:08:18 +epoch [3/50] batch [25/500] time 0.845 (0.910) data 0.000 (0.026) loss 1.1299 (1.2525) acc 71.8750 (68.5000) lr 1.9980e-03 eta 6:03:40 +epoch [3/50] batch [30/500] time 0.876 (0.904) data 0.000 (0.022) loss 1.2021 (1.2611) acc 65.6250 (68.1250) lr 1.9980e-03 eta 6:01:16 +epoch [3/50] batch [35/500] time 0.863 (0.901) data 0.000 (0.019) loss 0.9111 (1.2191) acc 78.1250 (68.6607) lr 1.9980e-03 eta 6:00:00 +epoch [3/50] batch [40/500] time 0.883 (0.899) data 0.000 (0.016) loss 1.1162 (1.1946) acc 71.8750 (69.6094) lr 1.9980e-03 eta 5:59:11 +epoch [3/50] batch [45/500] time 0.904 (0.899) data 0.000 (0.015) loss 1.5098 (1.2096) acc 59.3750 (69.6528) lr 1.9980e-03 eta 5:58:59 +epoch [3/50] batch [50/500] time 0.877 (0.896) data 0.000 (0.013) loss 0.9062 (1.2277) acc 81.2500 (69.5625) lr 1.9980e-03 eta 5:57:42 +epoch [3/50] batch [55/500] time 0.887 (0.895) data 0.000 (0.012) loss 1.2607 (1.2067) acc 68.7500 (70.0000) lr 1.9980e-03 eta 5:57:19 +epoch [3/50] batch [60/500] time 0.911 (0.894) data 0.000 (0.011) loss 1.1367 (1.2388) acc 75.0000 (69.4271) lr 1.9980e-03 eta 5:56:39 +epoch [3/50] batch [65/500] time 0.905 (0.893) data 0.000 (0.010) loss 1.2861 (1.2366) acc 75.0000 (69.6635) lr 1.9980e-03 eta 5:56:16 +epoch [3/50] batch [70/500] time 0.865 (0.892) data 0.000 (0.009) loss 0.9404 (1.2220) acc 78.1250 (70.1339) lr 1.9980e-03 eta 5:55:36 +epoch [3/50] batch [75/500] time 0.865 (0.892) data 0.000 (0.009) loss 1.6953 (1.2248) acc 50.0000 (69.6250) lr 1.9980e-03 eta 5:55:46 +epoch [3/50] batch [80/500] time 0.884 (0.892) data 0.000 (0.008) loss 1.3535 (1.2318) acc 75.0000 (69.4922) lr 1.9980e-03 eta 5:55:36 +epoch [3/50] batch [85/500] time 0.871 (0.891) data 0.000 (0.008) loss 0.8042 (1.2231) acc 75.0000 (69.6324) lr 1.9980e-03 eta 5:55:18 +epoch [3/50] batch [90/500] time 0.897 (0.891) data 0.000 (0.007) loss 1.1436 (1.2329) acc 75.0000 (69.5139) lr 1.9980e-03 eta 5:55:03 +epoch [3/50] batch [95/500] time 0.871 (0.891) data 0.000 (0.007) loss 1.3887 (1.2366) acc 65.6250 (69.5724) lr 1.9980e-03 eta 5:54:55 +epoch [3/50] batch [100/500] time 0.911 (0.891) data 0.000 (0.007) loss 1.5078 (1.2336) acc 56.2500 (69.5312) lr 1.9980e-03 eta 5:54:56 +epoch [3/50] batch [105/500] time 0.871 (0.892) data 0.000 (0.006) loss 1.1680 (1.2304) acc 75.0000 (69.5238) lr 1.9980e-03 eta 5:55:15 +epoch [3/50] batch [110/500] time 0.893 (0.892) data 0.000 (0.006) loss 1.1494 (1.2397) acc 71.8750 (69.3750) lr 1.9980e-03 eta 5:55:18 +epoch [3/50] batch [115/500] time 0.860 (0.892) data 0.000 (0.006) loss 1.0420 (1.2281) acc 75.0000 (69.7011) lr 1.9980e-03 eta 5:55:00 +epoch [3/50] batch [120/500] time 0.899 (0.892) data 0.000 (0.006) loss 1.3643 (1.2186) acc 68.7500 (69.9479) lr 1.9980e-03 eta 5:54:58 +epoch [3/50] batch [125/500] time 0.890 (0.892) data 0.000 (0.005) loss 1.4199 (1.2203) acc 62.5000 (69.9000) lr 1.9980e-03 eta 5:54:44 +epoch [3/50] batch [130/500] time 0.863 (0.891) data 0.000 (0.005) loss 1.4844 (1.2176) acc 65.6250 (69.7596) lr 1.9980e-03 eta 5:54:32 +epoch [3/50] batch [135/500] time 0.881 (0.890) data 0.000 (0.005) loss 1.7500 (1.2136) acc 56.2500 (69.8611) lr 1.9980e-03 eta 5:54:11 +epoch [3/50] batch [140/500] time 0.878 (0.890) data 0.000 (0.005) loss 1.7812 (1.2161) acc 68.7500 (69.8661) lr 1.9980e-03 eta 5:54:00 +epoch [3/50] batch [145/500] time 1.005 (0.891) data 0.000 (0.005) loss 1.3457 (1.2158) acc 68.7500 (69.7198) lr 1.9980e-03 eta 5:54:07 +epoch [3/50] batch [150/500] time 0.869 (0.890) data 0.000 (0.005) loss 1.9180 (1.2133) acc 62.5000 (69.8750) lr 1.9980e-03 eta 5:53:45 +epoch [3/50] batch [155/500] time 0.882 (0.890) data 0.000 (0.004) loss 0.5864 (1.2096) acc 84.3750 (69.9194) lr 1.9980e-03 eta 5:53:31 +epoch [3/50] batch [160/500] time 0.865 (0.889) data 0.000 (0.004) loss 0.5625 (1.2048) acc 78.1250 (70.0000) lr 1.9980e-03 eta 5:53:06 +epoch [3/50] batch [165/500] time 0.858 (0.888) data 0.000 (0.004) loss 1.7910 (1.2125) acc 56.2500 (69.8106) lr 1.9980e-03 eta 5:52:54 +epoch [3/50] batch [170/500] time 0.874 (0.888) data 0.000 (0.004) loss 1.2197 (1.2140) acc 71.8750 (69.7610) lr 1.9980e-03 eta 5:52:49 +epoch [3/50] batch [175/500] time 0.889 (0.888) data 0.000 (0.004) loss 1.7812 (1.2202) acc 71.8750 (69.6786) lr 1.9980e-03 eta 5:52:42 +epoch [3/50] batch [180/500] time 0.908 (0.888) data 0.000 (0.004) loss 1.4561 (1.2211) acc 65.6250 (69.7569) lr 1.9980e-03 eta 5:52:38 +epoch [3/50] batch [185/500] time 0.892 (0.888) data 0.000 (0.004) loss 1.1787 (1.2240) acc 65.6250 (69.6284) lr 1.9980e-03 eta 5:52:30 +epoch [3/50] batch [190/500] time 0.885 (0.888) data 0.000 (0.004) loss 1.5537 (1.2243) acc 59.3750 (69.5888) lr 1.9980e-03 eta 5:52:29 +epoch [3/50] batch [195/500] time 0.853 (0.888) data 0.000 (0.004) loss 1.3740 (1.2240) acc 65.6250 (69.5833) lr 1.9980e-03 eta 5:52:18 +epoch [3/50] batch [200/500] time 0.918 (0.888) data 0.000 (0.003) loss 1.5469 (1.2240) acc 62.5000 (69.5312) lr 1.9980e-03 eta 5:52:22 +epoch [3/50] batch [205/500] time 0.881 (0.888) data 0.000 (0.003) loss 0.9380 (1.2233) acc 81.2500 (69.6037) lr 1.9980e-03 eta 5:52:16 +epoch [3/50] batch [210/500] time 0.875 (0.888) data 0.000 (0.003) loss 1.3408 (1.2244) acc 71.8750 (69.5387) lr 1.9980e-03 eta 5:52:08 +epoch [3/50] batch [215/500] time 0.883 (0.888) data 0.000 (0.003) loss 0.7798 (1.2247) acc 75.0000 (69.5349) lr 1.9980e-03 eta 5:51:57 +epoch [3/50] batch [220/500] time 0.874 (0.888) data 0.000 (0.003) loss 1.2510 (1.2239) acc 75.0000 (69.4744) lr 1.9980e-03 eta 5:51:47 +epoch [3/50] batch [225/500] time 0.845 (0.887) data 0.000 (0.003) loss 1.3154 (1.2266) acc 78.1250 (69.4861) lr 1.9980e-03 eta 5:51:38 +epoch [3/50] batch [230/500] time 0.870 (0.887) data 0.000 (0.003) loss 1.4600 (1.2291) acc 65.6250 (69.4973) lr 1.9980e-03 eta 5:51:29 +epoch [3/50] batch [235/500] time 0.907 (0.887) data 0.000 (0.003) loss 0.7979 (1.2325) acc 87.5000 (69.4947) lr 1.9980e-03 eta 5:51:28 +epoch [3/50] batch [240/500] time 0.882 (0.887) data 0.000 (0.003) loss 1.2021 (1.2340) acc 71.8750 (69.4531) lr 1.9980e-03 eta 5:51:26 +epoch [3/50] batch [245/500] time 0.886 (0.888) data 0.000 (0.003) loss 0.9761 (1.2302) acc 78.1250 (69.4643) lr 1.9980e-03 eta 5:51:27 +epoch [3/50] batch [250/500] time 0.895 (0.888) data 0.001 (0.003) loss 1.4287 (1.2284) acc 59.3750 (69.4375) lr 1.9980e-03 eta 5:51:24 +epoch [3/50] batch [255/500] time 0.861 (0.888) data 0.000 (0.003) loss 1.4531 (1.2263) acc 59.3750 (69.4240) lr 1.9980e-03 eta 5:51:14 +epoch [3/50] batch [260/500] time 0.909 (0.887) data 0.000 (0.003) loss 1.0986 (1.2262) acc 71.8750 (69.4231) lr 1.9980e-03 eta 5:51:09 +epoch [3/50] batch [265/500] time 0.881 (0.887) data 0.000 (0.003) loss 1.4287 (1.2261) acc 65.6250 (69.4575) lr 1.9980e-03 eta 5:51:00 +epoch [3/50] batch [270/500] time 0.864 (0.887) data 0.000 (0.003) loss 1.3477 (1.2309) acc 65.6250 (69.2940) lr 1.9980e-03 eta 5:50:55 +epoch [3/50] batch [275/500] time 0.882 (0.887) data 0.000 (0.003) loss 1.6748 (1.2330) acc 56.2500 (69.3068) lr 1.9980e-03 eta 5:50:50 +epoch [3/50] batch [280/500] time 0.883 (0.887) data 0.000 (0.003) loss 0.9751 (1.2329) acc 75.0000 (69.3080) lr 1.9980e-03 eta 5:50:49 +epoch [3/50] batch [285/500] time 0.882 (0.887) data 0.000 (0.002) loss 1.1426 (1.2332) acc 71.8750 (69.2654) lr 1.9980e-03 eta 5:50:43 +epoch [3/50] batch [290/500] time 0.878 (0.888) data 0.000 (0.002) loss 1.4873 (1.2308) acc 62.5000 (69.2457) lr 1.9980e-03 eta 5:50:45 +epoch [3/50] batch [295/500] time 0.860 (0.887) data 0.000 (0.002) loss 0.9883 (1.2294) acc 75.0000 (69.2373) lr 1.9980e-03 eta 5:50:38 +epoch [3/50] batch [300/500] time 0.888 (0.887) data 0.000 (0.002) loss 0.9727 (1.2279) acc 68.7500 (69.2500) lr 1.9980e-03 eta 5:50:32 +epoch [3/50] batch [305/500] time 0.891 (0.887) data 0.000 (0.002) loss 1.0771 (1.2284) acc 78.1250 (69.2623) lr 1.9980e-03 eta 5:50:28 +epoch [3/50] batch [310/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.6299 (1.2284) acc 65.6250 (69.2238) lr 1.9980e-03 eta 5:50:22 +epoch [3/50] batch [315/500] time 0.875 (0.887) data 0.000 (0.002) loss 0.9399 (1.2307) acc 75.0000 (69.2063) lr 1.9980e-03 eta 5:50:19 +epoch [3/50] batch [320/500] time 0.921 (0.888) data 0.000 (0.002) loss 0.9312 (1.2281) acc 78.1250 (69.2188) lr 1.9980e-03 eta 5:50:16 +epoch [3/50] batch [325/500] time 0.891 (0.887) data 0.000 (0.002) loss 1.2930 (1.2308) acc 65.6250 (69.1250) lr 1.9980e-03 eta 5:50:09 +epoch [3/50] batch [330/500] time 0.886 (0.887) data 0.000 (0.002) loss 1.1709 (1.2289) acc 53.1250 (69.1667) lr 1.9980e-03 eta 5:50:05 +epoch [3/50] batch [335/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.6074 (1.2314) acc 59.3750 (69.0765) lr 1.9980e-03 eta 5:50:00 +epoch [3/50] batch [340/500] time 0.897 (0.887) data 0.000 (0.002) loss 1.4639 (1.2338) acc 59.3750 (69.0809) lr 1.9980e-03 eta 5:49:54 +epoch [3/50] batch [345/500] time 0.900 (0.888) data 0.000 (0.002) loss 1.0176 (1.2332) acc 75.0000 (69.0851) lr 1.9980e-03 eta 5:49:54 +epoch [3/50] batch [350/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.2656 (1.2340) acc 68.7500 (69.0982) lr 1.9980e-03 eta 5:49:52 +epoch [3/50] batch [355/500] time 0.856 (0.887) data 0.000 (0.002) loss 1.2471 (1.2327) acc 65.6250 (69.0581) lr 1.9980e-03 eta 5:49:40 +epoch [3/50] batch [360/500] time 0.876 (0.887) data 0.000 (0.002) loss 1.3301 (1.2286) acc 59.3750 (69.1059) lr 1.9980e-03 eta 5:49:32 +epoch [3/50] batch [365/500] time 0.870 (0.887) data 0.000 (0.002) loss 1.0537 (1.2305) acc 62.5000 (69.1010) lr 1.9980e-03 eta 5:49:27 +epoch [3/50] batch [370/500] time 0.904 (0.887) data 0.000 (0.002) loss 1.0332 (1.2307) acc 75.0000 (69.1047) lr 1.9980e-03 eta 5:49:21 +epoch [3/50] batch [375/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.6455 (1.2339) acc 65.6250 (69.0417) lr 1.9980e-03 eta 5:49:17 +epoch [3/50] batch [380/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.2881 (1.2344) acc 65.6250 (68.9803) lr 1.9980e-03 eta 5:49:11 +epoch [3/50] batch [385/500] time 0.888 (0.887) data 0.000 (0.002) loss 0.9141 (1.2343) acc 78.1250 (69.0179) lr 1.9980e-03 eta 5:49:05 +epoch [3/50] batch [390/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.5264 (1.2382) acc 62.5000 (68.9343) lr 1.9980e-03 eta 5:49:03 +epoch [3/50] batch [395/500] time 0.873 (0.887) data 0.000 (0.002) loss 1.0127 (1.2371) acc 75.0000 (68.9794) lr 1.9980e-03 eta 5:48:55 +epoch [3/50] batch [400/500] time 0.859 (0.887) data 0.000 (0.002) loss 1.5479 (1.2365) acc 65.6250 (69.0000) lr 1.9980e-03 eta 5:48:46 +epoch [3/50] batch [405/500] time 0.898 (0.886) data 0.000 (0.002) loss 0.9233 (1.2330) acc 78.1250 (69.0741) lr 1.9980e-03 eta 5:48:36 +epoch [3/50] batch [410/500] time 0.897 (0.886) data 0.000 (0.002) loss 1.3633 (1.2339) acc 65.6250 (69.0244) lr 1.9980e-03 eta 5:48:32 +epoch [3/50] batch [415/500] time 0.872 (0.886) data 0.000 (0.002) loss 0.7759 (1.2326) acc 68.7500 (69.0512) lr 1.9980e-03 eta 5:48:24 +epoch [3/50] batch [420/500] time 0.878 (0.886) data 0.000 (0.002) loss 1.5166 (1.2359) acc 62.5000 (69.0104) lr 1.9980e-03 eta 5:48:18 +epoch [3/50] batch [425/500] time 0.884 (0.886) data 0.000 (0.002) loss 2.0488 (1.2376) acc 59.3750 (68.9706) lr 1.9980e-03 eta 5:48:13 +epoch [3/50] batch [430/500] time 0.883 (0.886) data 0.000 (0.002) loss 1.0107 (1.2372) acc 68.7500 (68.9753) lr 1.9980e-03 eta 5:48:06 +epoch [3/50] batch [435/500] time 0.883 (0.886) data 0.000 (0.002) loss 0.7231 (1.2370) acc 71.8750 (68.9727) lr 1.9980e-03 eta 5:48:06 +epoch [3/50] batch [440/500] time 0.876 (0.886) data 0.000 (0.002) loss 1.3486 (1.2355) acc 68.7500 (68.9915) lr 1.9980e-03 eta 5:48:00 +epoch [3/50] batch [445/500] time 0.873 (0.886) data 0.000 (0.002) loss 1.0420 (1.2327) acc 71.8750 (69.0449) lr 1.9980e-03 eta 5:47:56 +epoch [3/50] batch [450/500] time 0.868 (0.886) data 0.000 (0.002) loss 1.1709 (1.2319) acc 62.5000 (69.0486) lr 1.9980e-03 eta 5:47:48 +epoch [3/50] batch [455/500] time 0.864 (0.886) data 0.000 (0.002) loss 0.8872 (1.2337) acc 81.2500 (68.9904) lr 1.9980e-03 eta 5:47:42 +epoch [3/50] batch [460/500] time 0.888 (0.886) data 0.000 (0.002) loss 1.0273 (1.2318) acc 75.0000 (68.9946) lr 1.9980e-03 eta 5:47:34 +epoch [3/50] batch [465/500] time 0.850 (0.886) data 0.000 (0.002) loss 1.0977 (1.2290) acc 75.0000 (69.0457) lr 1.9980e-03 eta 5:47:21 +epoch [3/50] batch [470/500] time 0.868 (0.885) data 0.000 (0.002) loss 1.5596 (1.2293) acc 75.0000 (69.0691) lr 1.9980e-03 eta 5:47:13 +epoch [3/50] batch [475/500] time 0.884 (0.885) data 0.000 (0.002) loss 0.8643 (1.2272) acc 78.1250 (69.1382) lr 1.9980e-03 eta 5:47:08 +epoch [3/50] batch [480/500] time 0.871 (0.885) data 0.000 (0.002) loss 0.5239 (1.2287) acc 84.3750 (69.1341) lr 1.9980e-03 eta 5:47:01 +epoch [3/50] batch [485/500] time 0.863 (0.885) data 0.000 (0.002) loss 0.9224 (1.2284) acc 75.0000 (69.1495) lr 1.9980e-03 eta 5:46:53 +epoch [3/50] batch [490/500] time 0.866 (0.885) data 0.000 (0.002) loss 1.3232 (1.2290) acc 65.6250 (69.1454) lr 1.9980e-03 eta 5:46:47 +epoch [3/50] batch [495/500] time 0.869 (0.885) data 0.000 (0.002) loss 1.7881 (1.2304) acc 59.3750 (69.1035) lr 1.9980e-03 eta 5:46:39 +epoch [3/50] batch [500/500] time 0.882 (0.885) data 0.000 (0.002) loss 1.5781 (1.2331) acc 62.5000 (69.0938) lr 1.9921e-03 eta 5:46:35 +epoch [4/50] batch [5/500] time 0.889 (1.000) data 0.000 (0.130) loss 1.9238 (1.5527) acc 46.8750 (61.8750) lr 1.9921e-03 eta 6:31:45 +epoch [4/50] batch [10/500] time 0.889 (0.937) data 0.000 (0.065) loss 1.0918 (1.3561) acc 78.1250 (64.6875) lr 1.9921e-03 eta 6:06:49 +epoch [4/50] batch [15/500] time 0.881 (0.919) data 0.000 (0.044) loss 1.4248 (1.3029) acc 62.5000 (65.6250) lr 1.9921e-03 eta 5:59:41 +epoch [4/50] batch [20/500] time 0.884 (0.909) data 0.000 (0.033) loss 1.1113 (1.3033) acc 75.0000 (66.4062) lr 1.9921e-03 eta 5:55:52 +epoch [4/50] batch [25/500] time 0.886 (0.911) data 0.000 (0.026) loss 0.8994 (1.2527) acc 65.6250 (67.1250) lr 1.9921e-03 eta 5:56:21 +epoch [4/50] batch [30/500] time 0.895 (0.910) data 0.000 (0.022) loss 1.9219 (1.2590) acc 56.2500 (67.0833) lr 1.9921e-03 eta 5:56:01 +epoch [4/50] batch [35/500] time 0.874 (0.907) data 0.000 (0.019) loss 0.8354 (1.2164) acc 84.3750 (67.9464) lr 1.9921e-03 eta 5:54:32 +epoch [4/50] batch [40/500] time 0.887 (0.904) data 0.000 (0.016) loss 1.4951 (1.2252) acc 62.5000 (67.7344) lr 1.9921e-03 eta 5:53:32 +epoch [4/50] batch [45/500] time 0.871 (0.901) data 0.000 (0.015) loss 1.1904 (1.2293) acc 75.0000 (67.8472) lr 1.9921e-03 eta 5:52:21 +epoch [4/50] batch [50/500] time 0.897 (0.900) data 0.000 (0.013) loss 1.1348 (1.2367) acc 81.2500 (67.9375) lr 1.9921e-03 eta 5:51:53 +epoch [4/50] batch [55/500] time 0.873 (0.899) data 0.000 (0.012) loss 1.6318 (1.2386) acc 56.2500 (67.7841) lr 1.9921e-03 eta 5:51:19 +epoch [4/50] batch [60/500] time 0.909 (0.898) data 0.000 (0.011) loss 1.1865 (1.2522) acc 59.3750 (67.5521) lr 1.9921e-03 eta 5:50:55 +epoch [4/50] batch [65/500] time 0.859 (0.897) data 0.000 (0.010) loss 1.6367 (1.2559) acc 62.5000 (67.7404) lr 1.9921e-03 eta 5:50:15 +epoch [4/50] batch [70/500] time 0.889 (0.895) data 0.000 (0.010) loss 1.2617 (1.2522) acc 68.7500 (67.6339) lr 1.9921e-03 eta 5:49:39 +epoch [4/50] batch [75/500] time 0.858 (0.894) data 0.000 (0.009) loss 1.2246 (1.2593) acc 71.8750 (67.7500) lr 1.9921e-03 eta 5:48:55 +epoch [4/50] batch [80/500] time 0.882 (0.894) data 0.000 (0.008) loss 1.2412 (1.2598) acc 65.6250 (67.8516) lr 1.9921e-03 eta 5:48:51 +epoch [4/50] batch [85/500] time 0.888 (0.893) data 0.000 (0.008) loss 1.4043 (1.2578) acc 75.0000 (68.2353) lr 1.9921e-03 eta 5:48:22 +epoch [4/50] batch [90/500] time 0.887 (0.892) data 0.000 (0.007) loss 1.2109 (1.2419) acc 78.1250 (68.9236) lr 1.9921e-03 eta 5:48:05 +epoch [4/50] batch [95/500] time 0.886 (0.891) data 0.000 (0.007) loss 1.2354 (1.2306) acc 65.6250 (69.0461) lr 1.9921e-03 eta 5:47:29 +epoch [4/50] batch [100/500] time 0.899 (0.890) data 0.000 (0.007) loss 1.1787 (1.2235) acc 65.6250 (69.1562) lr 1.9921e-03 eta 5:47:05 +epoch [4/50] batch [105/500] time 0.888 (0.890) data 0.000 (0.006) loss 1.1406 (1.2262) acc 75.0000 (69.3155) lr 1.9921e-03 eta 5:46:55 +epoch [4/50] batch [110/500] time 0.898 (0.890) data 0.000 (0.006) loss 0.7476 (1.2123) acc 81.2500 (69.6875) lr 1.9921e-03 eta 5:46:46 +epoch [4/50] batch [115/500] time 0.904 (0.889) data 0.000 (0.006) loss 1.1064 (1.2103) acc 62.5000 (69.7826) lr 1.9921e-03 eta 5:46:37 +epoch [4/50] batch [120/500] time 0.883 (0.890) data 0.000 (0.006) loss 1.1689 (1.2073) acc 71.8750 (69.6875) lr 1.9921e-03 eta 5:46:39 +epoch [4/50] batch [125/500] time 0.890 (0.890) data 0.000 (0.005) loss 1.7881 (1.2136) acc 65.6250 (69.7250) lr 1.9921e-03 eta 5:46:43 +epoch [4/50] batch [130/500] time 0.884 (0.890) data 0.000 (0.005) loss 0.9951 (1.2078) acc 75.0000 (69.9760) lr 1.9921e-03 eta 5:46:30 +epoch [4/50] batch [135/500] time 0.897 (0.889) data 0.000 (0.005) loss 1.4160 (1.2002) acc 65.6250 (70.2083) lr 1.9921e-03 eta 5:46:23 +epoch [4/50] batch [140/500] time 0.878 (0.889) data 0.000 (0.005) loss 0.9258 (1.2021) acc 71.8750 (70.2455) lr 1.9921e-03 eta 5:46:06 +epoch [4/50] batch [145/500] time 0.893 (0.889) data 0.000 (0.005) loss 0.9653 (1.1991) acc 81.2500 (70.2586) lr 1.9921e-03 eta 5:45:53 +epoch [4/50] batch [150/500] time 0.864 (0.888) data 0.000 (0.005) loss 1.7344 (1.2005) acc 62.5000 (70.3125) lr 1.9921e-03 eta 5:45:41 +epoch [4/50] batch [155/500] time 0.894 (0.888) data 0.000 (0.004) loss 1.4150 (1.1956) acc 65.6250 (70.3226) lr 1.9921e-03 eta 5:45:38 +epoch [4/50] batch [160/500] time 0.849 (0.888) data 0.000 (0.004) loss 1.0430 (1.1962) acc 62.5000 (70.1562) lr 1.9921e-03 eta 5:45:22 +epoch [4/50] batch [165/500] time 0.880 (0.888) data 0.000 (0.004) loss 1.2988 (1.1972) acc 65.6250 (70.0758) lr 1.9921e-03 eta 5:45:25 +epoch [4/50] batch [170/500] time 0.870 (0.888) data 0.000 (0.004) loss 1.8525 (1.1998) acc 53.1250 (69.9632) lr 1.9921e-03 eta 5:45:12 +epoch [4/50] batch [175/500] time 0.880 (0.888) data 0.000 (0.004) loss 1.4072 (1.1979) acc 56.2500 (69.9286) lr 1.9921e-03 eta 5:45:09 +epoch [4/50] batch [180/500] time 0.885 (0.888) data 0.000 (0.004) loss 0.6865 (1.2055) acc 84.3750 (69.8611) lr 1.9921e-03 eta 5:45:03 +epoch [4/50] batch [185/500] time 0.895 (0.888) data 0.000 (0.004) loss 1.7510 (1.2091) acc 62.5000 (69.7804) lr 1.9921e-03 eta 5:45:04 +epoch [4/50] batch [190/500] time 0.898 (0.888) data 0.000 (0.004) loss 1.0742 (1.2089) acc 78.1250 (69.8355) lr 1.9921e-03 eta 5:45:04 +epoch [4/50] batch [195/500] time 0.870 (0.888) data 0.000 (0.004) loss 1.4150 (1.2051) acc 71.8750 (69.9359) lr 1.9921e-03 eta 5:45:03 +epoch [4/50] batch [200/500] time 0.867 (0.888) data 0.000 (0.003) loss 1.3564 (1.2003) acc 75.0000 (70.0469) lr 1.9921e-03 eta 5:44:54 +epoch [4/50] batch [205/500] time 0.875 (0.888) data 0.000 (0.003) loss 1.0537 (1.1969) acc 71.8750 (70.1067) lr 1.9921e-03 eta 5:44:38 +epoch [4/50] batch [210/500] time 0.885 (0.888) data 0.000 (0.003) loss 1.3477 (1.2010) acc 68.7500 (70.0893) lr 1.9921e-03 eta 5:44:33 +epoch [4/50] batch [215/500] time 0.879 (0.888) data 0.000 (0.003) loss 1.3018 (1.2067) acc 50.0000 (69.9273) lr 1.9921e-03 eta 5:44:26 +epoch [4/50] batch [220/500] time 0.895 (0.887) data 0.000 (0.003) loss 1.2158 (1.2100) acc 68.7500 (69.8295) lr 1.9921e-03 eta 5:44:20 +epoch [4/50] batch [225/500] time 0.853 (0.888) data 0.000 (0.003) loss 1.1553 (1.2155) acc 62.5000 (69.7639) lr 1.9921e-03 eta 5:44:25 +epoch [4/50] batch [230/500] time 0.866 (0.887) data 0.000 (0.003) loss 1.8271 (1.2191) acc 62.5000 (69.6875) lr 1.9921e-03 eta 5:44:08 +epoch [4/50] batch [235/500] time 0.883 (0.887) data 0.000 (0.003) loss 1.3066 (1.2185) acc 56.2500 (69.6809) lr 1.9921e-03 eta 5:44:01 +epoch [4/50] batch [240/500] time 0.919 (0.887) data 0.000 (0.003) loss 1.4336 (1.2183) acc 65.6250 (69.6224) lr 1.9921e-03 eta 5:44:01 +epoch [4/50] batch [245/500] time 0.897 (0.887) data 0.000 (0.003) loss 1.4307 (1.2193) acc 53.1250 (69.5153) lr 1.9921e-03 eta 5:43:51 +epoch [4/50] batch [250/500] time 0.867 (0.887) data 0.000 (0.003) loss 0.7188 (1.2167) acc 84.3750 (69.6000) lr 1.9921e-03 eta 5:43:46 +epoch [4/50] batch [255/500] time 0.900 (0.887) data 0.000 (0.003) loss 1.2490 (1.2148) acc 65.6250 (69.5833) lr 1.9921e-03 eta 5:43:42 +epoch [4/50] batch [260/500] time 0.863 (0.887) data 0.000 (0.003) loss 0.8745 (1.2126) acc 81.2500 (69.6154) lr 1.9921e-03 eta 5:43:31 +epoch [4/50] batch [265/500] time 1.004 (0.887) data 0.000 (0.003) loss 1.0557 (1.2127) acc 59.3750 (69.5873) lr 1.9921e-03 eta 5:43:38 +epoch [4/50] batch [270/500] time 0.871 (0.887) data 0.000 (0.003) loss 1.4668 (1.2136) acc 68.7500 (69.5255) lr 1.9921e-03 eta 5:43:29 +epoch [4/50] batch [275/500] time 0.885 (0.887) data 0.000 (0.003) loss 1.4277 (1.2132) acc 65.6250 (69.5682) lr 1.9921e-03 eta 5:43:23 +epoch [4/50] batch [280/500] time 0.900 (0.887) data 0.000 (0.003) loss 0.4995 (1.2094) acc 90.6250 (69.6317) lr 1.9921e-03 eta 5:43:20 +epoch [4/50] batch [285/500] time 0.894 (0.887) data 0.000 (0.003) loss 1.2871 (1.2097) acc 65.6250 (69.6601) lr 1.9921e-03 eta 5:43:17 +epoch [4/50] batch [290/500] time 0.888 (0.887) data 0.000 (0.002) loss 1.2129 (1.2116) acc 65.6250 (69.5905) lr 1.9921e-03 eta 5:43:10 +epoch [4/50] batch [295/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.1006 (1.2105) acc 65.6250 (69.5233) lr 1.9921e-03 eta 5:42:56 +epoch [4/50] batch [300/500] time 0.884 (0.887) data 0.000 (0.002) loss 0.9585 (1.2106) acc 75.0000 (69.4792) lr 1.9921e-03 eta 5:42:48 +epoch [4/50] batch [305/500] time 0.889 (0.886) data 0.000 (0.002) loss 1.4951 (1.2155) acc 65.6250 (69.3750) lr 1.9921e-03 eta 5:42:38 +epoch [4/50] batch [310/500] time 0.860 (0.886) data 0.000 (0.002) loss 0.6475 (1.2092) acc 75.0000 (69.5161) lr 1.9921e-03 eta 5:42:25 +epoch [4/50] batch [315/500] time 0.883 (0.886) data 0.000 (0.002) loss 2.0371 (1.2135) acc 50.0000 (69.4246) lr 1.9921e-03 eta 5:42:20 +epoch [4/50] batch [320/500] time 0.899 (0.886) data 0.000 (0.002) loss 1.4209 (1.2161) acc 65.6250 (69.4336) lr 1.9921e-03 eta 5:42:14 +epoch [4/50] batch [325/500] time 0.891 (0.886) data 0.000 (0.002) loss 1.0869 (1.2135) acc 71.8750 (69.5000) lr 1.9921e-03 eta 5:42:16 +epoch [4/50] batch [330/500] time 0.902 (0.886) data 0.000 (0.002) loss 1.9346 (1.2173) acc 56.2500 (69.4129) lr 1.9921e-03 eta 5:42:18 +epoch [4/50] batch [335/500] time 0.895 (0.887) data 0.000 (0.002) loss 0.7070 (1.2134) acc 84.3750 (69.5056) lr 1.9921e-03 eta 5:42:16 +epoch [4/50] batch [340/500] time 0.892 (0.887) data 0.000 (0.002) loss 0.7549 (1.2122) acc 78.1250 (69.5496) lr 1.9921e-03 eta 5:42:13 +epoch [4/50] batch [345/500] time 0.880 (0.886) data 0.000 (0.002) loss 1.2910 (1.2140) acc 68.7500 (69.4656) lr 1.9921e-03 eta 5:42:06 +epoch [4/50] batch [350/500] time 0.871 (0.887) data 0.000 (0.002) loss 1.6543 (1.2138) acc 56.2500 (69.4643) lr 1.9921e-03 eta 5:42:03 +epoch [4/50] batch [355/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.2412 (1.2166) acc 62.5000 (69.3486) lr 1.9921e-03 eta 5:41:59 +epoch [4/50] batch [360/500] time 0.884 (0.886) data 0.000 (0.002) loss 1.6455 (1.2184) acc 62.5000 (69.2969) lr 1.9921e-03 eta 5:41:52 +epoch [4/50] batch [365/500] time 0.894 (0.887) data 0.000 (0.002) loss 1.0283 (1.2187) acc 65.6250 (69.3236) lr 1.9921e-03 eta 5:41:55 +epoch [4/50] batch [370/500] time 0.861 (0.887) data 0.000 (0.002) loss 1.2402 (1.2176) acc 75.0000 (69.3412) lr 1.9921e-03 eta 5:41:48 +epoch [4/50] batch [375/500] time 0.860 (0.887) data 0.000 (0.002) loss 1.1670 (1.2176) acc 75.0000 (69.3583) lr 1.9921e-03 eta 5:41:43 +epoch [4/50] batch [380/500] time 0.911 (0.887) data 0.000 (0.002) loss 1.3760 (1.2188) acc 75.0000 (69.2928) lr 1.9921e-03 eta 5:41:43 +epoch [4/50] batch [385/500] time 0.892 (0.887) data 0.000 (0.002) loss 0.6113 (1.2165) acc 84.3750 (69.3588) lr 1.9921e-03 eta 5:41:39 +epoch [4/50] batch [390/500] time 0.874 (0.887) data 0.000 (0.002) loss 1.5508 (1.2164) acc 62.5000 (69.3510) lr 1.9921e-03 eta 5:41:33 +epoch [4/50] batch [395/500] time 0.901 (0.887) data 0.000 (0.002) loss 1.4102 (1.2165) acc 75.0000 (69.4146) lr 1.9921e-03 eta 5:41:27 +epoch [4/50] batch [400/500] time 0.868 (0.887) data 0.000 (0.002) loss 1.3467 (1.2184) acc 71.8750 (69.3984) lr 1.9921e-03 eta 5:41:18 +epoch [4/50] batch [405/500] time 0.910 (0.887) data 0.000 (0.002) loss 0.9536 (1.2165) acc 75.0000 (69.4290) lr 1.9921e-03 eta 5:41:16 +epoch [4/50] batch [410/500] time 0.866 (0.887) data 0.000 (0.002) loss 1.4531 (1.2163) acc 62.5000 (69.4741) lr 1.9921e-03 eta 5:41:17 +epoch [4/50] batch [415/500] time 0.869 (0.887) data 0.000 (0.002) loss 1.8906 (1.2197) acc 56.2500 (69.4277) lr 1.9921e-03 eta 5:41:10 +epoch [4/50] batch [420/500] time 0.845 (0.886) data 0.000 (0.002) loss 1.1680 (1.2188) acc 68.7500 (69.4345) lr 1.9921e-03 eta 5:41:00 +epoch [4/50] batch [425/500] time 0.888 (0.886) data 0.000 (0.002) loss 1.2129 (1.2185) acc 75.0000 (69.4632) lr 1.9921e-03 eta 5:40:53 +epoch [4/50] batch [430/500] time 0.894 (0.886) data 0.000 (0.002) loss 1.6143 (1.2170) acc 62.5000 (69.5203) lr 1.9921e-03 eta 5:40:49 +epoch [4/50] batch [435/500] time 0.917 (0.887) data 0.000 (0.002) loss 1.2158 (1.2169) acc 62.5000 (69.4971) lr 1.9921e-03 eta 5:40:50 +epoch [4/50] batch [440/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.1104 (1.2170) acc 68.7500 (69.4744) lr 1.9921e-03 eta 5:40:46 +epoch [4/50] batch [445/500] time 0.928 (0.887) data 0.000 (0.002) loss 0.7700 (1.2174) acc 78.1250 (69.4733) lr 1.9921e-03 eta 5:40:42 +epoch [4/50] batch [450/500] time 0.880 (0.887) data 0.000 (0.002) loss 0.9277 (1.2163) acc 75.0000 (69.4931) lr 1.9921e-03 eta 5:40:38 +epoch [4/50] batch [455/500] time 0.867 (0.887) data 0.000 (0.002) loss 0.9805 (1.2173) acc 65.6250 (69.4643) lr 1.9921e-03 eta 5:40:34 +epoch [4/50] batch [460/500] time 0.882 (0.887) data 0.000 (0.002) loss 0.9131 (1.2155) acc 78.1250 (69.5041) lr 1.9921e-03 eta 5:40:30 +epoch [4/50] batch [465/500] time 0.900 (0.887) data 0.000 (0.002) loss 1.2568 (1.2160) acc 71.8750 (69.5161) lr 1.9921e-03 eta 5:40:23 +epoch [4/50] batch [470/500] time 0.875 (0.887) data 0.000 (0.002) loss 1.0820 (1.2131) acc 75.0000 (69.5545) lr 1.9921e-03 eta 5:40:18 +epoch [4/50] batch [475/500] time 0.873 (0.886) data 0.000 (0.002) loss 1.4004 (1.2134) acc 68.7500 (69.5921) lr 1.9921e-03 eta 5:40:11 +epoch [4/50] batch [480/500] time 0.876 (0.886) data 0.000 (0.002) loss 1.6904 (1.2143) acc 59.3750 (69.5898) lr 1.9921e-03 eta 5:40:05 +epoch [4/50] batch [485/500] time 0.906 (0.886) data 0.001 (0.002) loss 1.0830 (1.2135) acc 68.7500 (69.6005) lr 1.9921e-03 eta 5:40:00 +epoch [4/50] batch [490/500] time 0.885 (0.886) data 0.000 (0.002) loss 1.2188 (1.2132) acc 68.7500 (69.5982) lr 1.9921e-03 eta 5:39:55 +epoch [4/50] batch [495/500] time 0.886 (0.886) data 0.000 (0.002) loss 1.2373 (1.2142) acc 59.3750 (69.5076) lr 1.9921e-03 eta 5:39:49 +epoch [4/50] batch [500/500] time 0.876 (0.886) data 0.000 (0.002) loss 1.3818 (1.2136) acc 68.7500 (69.5250) lr 1.9823e-03 eta 5:39:45 +epoch [5/50] batch [5/500] time 0.845 (1.042) data 0.000 (0.154) loss 1.2412 (1.2175) acc 62.5000 (64.3750) lr 1.9823e-03 eta 6:39:12 +epoch [5/50] batch [10/500] time 0.918 (0.968) data 0.000 (0.077) loss 1.5039 (1.2551) acc 71.8750 (67.5000) lr 1.9823e-03 eta 6:10:58 +epoch [5/50] batch [15/500] time 0.879 (0.939) data 0.000 (0.051) loss 1.7686 (1.2934) acc 68.7500 (67.7083) lr 1.9823e-03 eta 5:59:42 +epoch [5/50] batch [20/500] time 0.873 (0.924) data 0.000 (0.039) loss 1.6553 (1.3070) acc 56.2500 (67.1875) lr 1.9823e-03 eta 5:54:02 +epoch [5/50] batch [25/500] time 0.903 (0.920) data 0.000 (0.031) loss 1.3291 (1.2678) acc 71.8750 (68.2500) lr 1.9823e-03 eta 5:52:14 +epoch [5/50] batch [30/500] time 0.901 (0.913) data 0.000 (0.026) loss 0.7241 (1.2232) acc 84.3750 (69.4792) lr 1.9823e-03 eta 5:49:31 +epoch [5/50] batch [35/500] time 0.888 (0.909) data 0.001 (0.022) loss 1.2148 (1.2160) acc 62.5000 (69.8214) lr 1.9823e-03 eta 5:47:43 +epoch [5/50] batch [40/500] time 0.877 (0.905) data 0.000 (0.019) loss 0.7866 (1.1983) acc 78.1250 (69.8438) lr 1.9823e-03 eta 5:46:08 +epoch [5/50] batch [45/500] time 0.867 (0.901) data 0.000 (0.017) loss 0.3804 (1.1779) acc 87.5000 (70.1389) lr 1.9823e-03 eta 5:44:46 +epoch [5/50] batch [50/500] time 0.865 (0.898) data 0.000 (0.016) loss 1.3213 (1.1541) acc 71.8750 (70.6875) lr 1.9823e-03 eta 5:43:29 +epoch [5/50] batch [55/500] time 0.914 (0.898) data 0.000 (0.014) loss 0.7993 (1.1450) acc 78.1250 (71.1364) lr 1.9823e-03 eta 5:43:13 +epoch [5/50] batch [60/500] time 0.905 (0.896) data 0.000 (0.013) loss 0.6782 (1.1448) acc 78.1250 (71.1979) lr 1.9823e-03 eta 5:42:40 +epoch [5/50] batch [65/500] time 0.895 (0.896) data 0.000 (0.012) loss 0.8506 (1.1437) acc 81.2500 (71.3942) lr 1.9823e-03 eta 5:42:39 +epoch [5/50] batch [70/500] time 0.894 (0.896) data 0.000 (0.011) loss 0.9297 (1.1526) acc 71.8750 (71.2500) lr 1.9823e-03 eta 5:42:23 +epoch [5/50] batch [75/500] time 0.872 (0.895) data 0.000 (0.010) loss 1.5498 (1.1633) acc 50.0000 (70.6667) lr 1.9823e-03 eta 5:41:48 +epoch [5/50] batch [80/500] time 0.899 (0.894) data 0.000 (0.010) loss 1.1855 (1.1704) acc 71.8750 (70.3906) lr 1.9823e-03 eta 5:41:21 +epoch [5/50] batch [85/500] time 0.865 (0.892) data 0.000 (0.009) loss 1.5391 (1.1641) acc 56.2500 (70.5515) lr 1.9823e-03 eta 5:40:51 +epoch [5/50] batch [90/500] time 0.896 (0.892) data 0.000 (0.009) loss 1.2861 (1.1682) acc 71.8750 (70.6944) lr 1.9823e-03 eta 5:40:40 +epoch [5/50] batch [95/500] time 0.899 (0.892) data 0.000 (0.008) loss 0.8335 (1.1730) acc 78.1250 (70.8224) lr 1.9823e-03 eta 5:40:31 +epoch [5/50] batch [100/500] time 0.869 (0.892) data 0.000 (0.008) loss 1.1436 (1.1621) acc 75.0000 (71.0625) lr 1.9823e-03 eta 5:40:17 +epoch [5/50] batch [105/500] time 0.885 (0.891) data 0.000 (0.008) loss 1.2422 (1.1536) acc 71.8750 (71.3988) lr 1.9823e-03 eta 5:40:09 +epoch [5/50] batch [110/500] time 0.859 (0.892) data 0.000 (0.007) loss 0.9756 (1.1637) acc 71.8750 (71.1364) lr 1.9823e-03 eta 5:40:18 +epoch [5/50] batch [115/500] time 0.893 (0.892) data 0.000 (0.007) loss 1.1758 (1.1729) acc 65.6250 (70.8152) lr 1.9823e-03 eta 5:40:17 +epoch [5/50] batch [120/500] time 0.893 (0.892) data 0.000 (0.007) loss 1.4580 (1.1777) acc 62.5000 (70.5729) lr 1.9823e-03 eta 5:40:03 +epoch [5/50] batch [125/500] time 0.885 (0.892) data 0.000 (0.006) loss 1.4531 (1.1791) acc 59.3750 (70.5750) lr 1.9823e-03 eta 5:40:08 +epoch [5/50] batch [130/500] time 0.884 (0.892) data 0.000 (0.006) loss 1.0674 (1.1717) acc 68.7500 (70.6010) lr 1.9823e-03 eta 5:39:50 +epoch [5/50] batch [135/500] time 0.871 (0.891) data 0.000 (0.006) loss 0.6621 (1.1685) acc 81.2500 (70.5324) lr 1.9823e-03 eta 5:39:40 +epoch [5/50] batch [140/500] time 0.872 (0.891) data 0.000 (0.006) loss 1.2334 (1.1694) acc 68.7500 (70.6250) lr 1.9823e-03 eta 5:39:26 +epoch [5/50] batch [145/500] time 0.896 (0.891) data 0.000 (0.006) loss 1.6455 (1.1700) acc 65.6250 (70.6897) lr 1.9823e-03 eta 5:39:23 +epoch [5/50] batch [150/500] time 0.881 (0.891) data 0.000 (0.005) loss 1.2842 (1.1680) acc 59.3750 (70.6458) lr 1.9823e-03 eta 5:39:19 +epoch [5/50] batch [155/500] time 0.862 (0.891) data 0.000 (0.005) loss 1.0020 (1.1710) acc 78.1250 (70.4637) lr 1.9823e-03 eta 5:39:07 +epoch [5/50] batch [160/500] time 0.890 (0.890) data 0.000 (0.005) loss 1.5215 (1.1822) acc 65.6250 (70.3320) lr 1.9823e-03 eta 5:38:56 +epoch [5/50] batch [165/500] time 0.892 (0.890) data 0.000 (0.005) loss 1.0127 (1.1883) acc 75.0000 (70.2083) lr 1.9823e-03 eta 5:38:42 +epoch [5/50] batch [170/500] time 0.902 (0.890) data 0.000 (0.005) loss 0.8579 (1.1832) acc 78.1250 (70.3676) lr 1.9823e-03 eta 5:38:42 +epoch [5/50] batch [175/500] time 0.866 (0.890) data 0.000 (0.005) loss 1.3584 (1.1803) acc 68.7500 (70.4464) lr 1.9823e-03 eta 5:38:31 +epoch [5/50] batch [180/500] time 0.902 (0.890) data 0.000 (0.004) loss 1.0596 (1.1800) acc 65.6250 (70.4688) lr 1.9823e-03 eta 5:38:29 +epoch [5/50] batch [185/500] time 0.868 (0.890) data 0.000 (0.004) loss 0.9478 (1.1789) acc 75.0000 (70.5405) lr 1.9823e-03 eta 5:38:20 +epoch [5/50] batch [190/500] time 0.899 (0.890) data 0.000 (0.004) loss 1.2148 (1.1799) acc 71.8750 (70.5592) lr 1.9823e-03 eta 5:38:18 +epoch [5/50] batch [195/500] time 0.887 (0.890) data 0.000 (0.004) loss 0.6255 (1.1800) acc 87.5000 (70.5929) lr 1.9823e-03 eta 5:38:13 +epoch [5/50] batch [200/500] time 0.893 (0.890) data 0.000 (0.004) loss 1.1846 (1.1821) acc 62.5000 (70.5781) lr 1.9823e-03 eta 5:38:06 +epoch [5/50] batch [205/500] time 0.899 (0.890) data 0.000 (0.004) loss 0.9717 (1.1820) acc 65.6250 (70.4421) lr 1.9823e-03 eta 5:38:02 +epoch [5/50] batch [210/500] time 0.864 (0.890) data 0.000 (0.004) loss 1.0488 (1.1867) acc 75.0000 (70.3571) lr 1.9823e-03 eta 5:38:08 +epoch [5/50] batch [215/500] time 0.867 (0.890) data 0.000 (0.004) loss 0.9507 (1.1877) acc 78.1250 (70.3634) lr 1.9823e-03 eta 5:38:01 +epoch [5/50] batch [220/500] time 0.913 (0.890) data 0.000 (0.004) loss 1.2842 (1.1908) acc 65.6250 (70.2415) lr 1.9823e-03 eta 5:37:53 +epoch [5/50] batch [225/500] time 0.859 (0.890) data 0.001 (0.004) loss 1.8584 (1.1870) acc 53.1250 (70.3333) lr 1.9823e-03 eta 5:37:46 +epoch [5/50] batch [230/500] time 0.895 (0.890) data 0.000 (0.004) loss 1.3398 (1.1827) acc 65.6250 (70.4755) lr 1.9823e-03 eta 5:37:46 +epoch [5/50] batch [235/500] time 0.884 (0.890) data 0.000 (0.003) loss 0.7842 (1.1802) acc 84.3750 (70.5452) lr 1.9823e-03 eta 5:37:48 +epoch [5/50] batch [240/500] time 0.878 (0.890) data 0.000 (0.003) loss 1.2549 (1.1810) acc 65.6250 (70.4948) lr 1.9823e-03 eta 5:37:40 +epoch [5/50] batch [245/500] time 0.907 (0.890) data 0.001 (0.003) loss 1.4541 (1.1796) acc 68.7500 (70.5740) lr 1.9823e-03 eta 5:37:39 +epoch [5/50] batch [250/500] time 0.999 (0.891) data 0.000 (0.003) loss 0.6982 (1.1740) acc 75.0000 (70.7250) lr 1.9823e-03 eta 5:37:46 +epoch [5/50] batch [255/500] time 0.884 (0.891) data 0.000 (0.003) loss 1.0186 (1.1740) acc 78.1250 (70.7353) lr 1.9823e-03 eta 5:37:38 +epoch [5/50] batch [260/500] time 0.889 (0.891) data 0.000 (0.003) loss 1.3799 (1.1747) acc 71.8750 (70.7572) lr 1.9823e-03 eta 5:37:30 +epoch [5/50] batch [265/500] time 0.859 (0.890) data 0.000 (0.003) loss 1.2920 (1.1770) acc 62.5000 (70.6722) lr 1.9823e-03 eta 5:37:20 +epoch [5/50] batch [270/500] time 0.880 (0.890) data 0.000 (0.003) loss 1.2402 (1.1783) acc 75.0000 (70.7292) lr 1.9823e-03 eta 5:37:08 +epoch [5/50] batch [275/500] time 0.878 (0.890) data 0.000 (0.003) loss 1.4600 (1.1761) acc 71.8750 (70.8068) lr 1.9823e-03 eta 5:37:02 +epoch [5/50] batch [280/500] time 0.944 (0.890) data 0.000 (0.003) loss 1.1396 (1.1763) acc 71.8750 (70.7478) lr 1.9823e-03 eta 5:36:59 +epoch [5/50] batch [285/500] time 0.925 (0.890) data 0.000 (0.003) loss 0.7944 (1.1712) acc 81.2500 (70.8553) lr 1.9823e-03 eta 5:36:59 +epoch [5/50] batch [290/500] time 0.848 (0.890) data 0.000 (0.003) loss 0.9277 (1.1662) acc 78.1250 (70.9698) lr 1.9823e-03 eta 5:36:49 +epoch [5/50] batch [295/500] time 0.884 (0.890) data 0.000 (0.003) loss 1.2363 (1.1670) acc 62.5000 (70.9322) lr 1.9823e-03 eta 5:36:42 +epoch [5/50] batch [300/500] time 0.861 (0.889) data 0.000 (0.003) loss 1.3721 (1.1688) acc 71.8750 (70.8854) lr 1.9823e-03 eta 5:36:30 +epoch [5/50] batch [305/500] time 0.885 (0.889) data 0.000 (0.003) loss 0.9619 (1.1648) acc 75.0000 (70.9529) lr 1.9823e-03 eta 5:36:16 +epoch [5/50] batch [310/500] time 0.882 (0.889) data 0.000 (0.003) loss 1.4697 (1.1646) acc 65.6250 (70.9577) lr 1.9823e-03 eta 5:36:14 +epoch [5/50] batch [315/500] time 0.862 (0.889) data 0.000 (0.003) loss 0.9219 (1.1649) acc 81.2500 (70.9425) lr 1.9823e-03 eta 5:36:08 +epoch [5/50] batch [320/500] time 0.879 (0.889) data 0.000 (0.003) loss 1.1240 (1.1658) acc 68.7500 (70.8789) lr 1.9823e-03 eta 5:36:03 +epoch [5/50] batch [325/500] time 0.858 (0.889) data 0.000 (0.003) loss 1.5527 (1.1678) acc 59.3750 (70.8365) lr 1.9823e-03 eta 5:35:56 +epoch [5/50] batch [330/500] time 0.881 (0.889) data 0.000 (0.003) loss 1.6113 (1.1708) acc 59.3750 (70.7481) lr 1.9823e-03 eta 5:35:53 +epoch [5/50] batch [335/500] time 0.873 (0.889) data 0.000 (0.003) loss 0.5903 (1.1695) acc 87.5000 (70.8022) lr 1.9823e-03 eta 5:35:46 +epoch [5/50] batch [340/500] time 0.912 (0.889) data 0.000 (0.003) loss 0.9165 (1.1691) acc 75.0000 (70.7996) lr 1.9823e-03 eta 5:35:49 +epoch [5/50] batch [345/500] time 0.911 (0.889) data 0.000 (0.002) loss 1.4561 (1.1707) acc 59.3750 (70.7428) lr 1.9823e-03 eta 5:35:48 +epoch [5/50] batch [350/500] time 0.866 (0.890) data 0.000 (0.002) loss 1.3789 (1.1696) acc 62.5000 (70.6875) lr 1.9823e-03 eta 5:35:54 +epoch [5/50] batch [355/500] time 0.850 (0.890) data 0.000 (0.002) loss 1.2197 (1.1739) acc 75.0000 (70.6162) lr 1.9823e-03 eta 5:35:43 +epoch [5/50] batch [360/500] time 0.909 (0.889) data 0.000 (0.002) loss 0.9688 (1.1726) acc 81.2500 (70.6597) lr 1.9823e-03 eta 5:35:37 +epoch [5/50] batch [365/500] time 0.899 (0.889) data 0.000 (0.002) loss 1.3301 (1.1759) acc 71.8750 (70.6164) lr 1.9823e-03 eta 5:35:29 +epoch [5/50] batch [370/500] time 0.858 (0.889) data 0.000 (0.002) loss 0.8701 (1.1727) acc 71.8750 (70.6757) lr 1.9823e-03 eta 5:35:20 +epoch [5/50] batch [375/500] time 0.861 (0.889) data 0.000 (0.002) loss 1.0244 (1.1694) acc 78.1250 (70.7417) lr 1.9823e-03 eta 5:35:11 +epoch [5/50] batch [380/500] time 0.863 (0.889) data 0.000 (0.002) loss 0.9243 (1.1667) acc 71.8750 (70.7977) lr 1.9823e-03 eta 5:35:03 +epoch [5/50] batch [385/500] time 0.907 (0.889) data 0.000 (0.002) loss 0.9614 (1.1650) acc 68.7500 (70.8442) lr 1.9823e-03 eta 5:35:00 +epoch [5/50] batch [390/500] time 0.910 (0.889) data 0.000 (0.002) loss 1.3359 (1.1682) acc 65.6250 (70.8173) lr 1.9823e-03 eta 5:34:56 +epoch [5/50] batch [395/500] time 0.890 (0.889) data 0.000 (0.002) loss 1.0742 (1.1692) acc 75.0000 (70.7991) lr 1.9823e-03 eta 5:34:59 +epoch [5/50] batch [400/500] time 0.880 (0.889) data 0.001 (0.002) loss 0.9590 (1.1665) acc 71.8750 (70.8125) lr 1.9823e-03 eta 5:34:50 +epoch [5/50] batch [405/500] time 0.889 (0.889) data 0.000 (0.002) loss 1.7812 (1.1659) acc 62.5000 (70.8179) lr 1.9823e-03 eta 5:34:45 +epoch [5/50] batch [410/500] time 0.923 (0.889) data 0.000 (0.002) loss 0.8252 (1.1678) acc 68.7500 (70.7393) lr 1.9823e-03 eta 5:34:43 +epoch [5/50] batch [415/500] time 0.874 (0.889) data 0.000 (0.002) loss 1.9004 (1.1712) acc 65.6250 (70.7003) lr 1.9823e-03 eta 5:34:36 +epoch [5/50] batch [420/500] time 0.874 (0.889) data 0.000 (0.002) loss 0.8394 (1.1698) acc 84.3750 (70.7292) lr 1.9823e-03 eta 5:34:30 +epoch [5/50] batch [425/500] time 0.871 (0.889) data 0.000 (0.002) loss 1.2158 (1.1699) acc 71.8750 (70.7426) lr 1.9823e-03 eta 5:34:23 +epoch [5/50] batch [430/500] time 0.872 (0.889) data 0.000 (0.002) loss 0.9551 (1.1692) acc 75.0000 (70.7703) lr 1.9823e-03 eta 5:34:19 +epoch [5/50] batch [435/500] time 0.871 (0.889) data 0.000 (0.002) loss 1.1104 (1.1663) acc 65.6250 (70.7615) lr 1.9823e-03 eta 5:34:11 +epoch [5/50] batch [440/500] time 0.885 (0.889) data 0.000 (0.002) loss 1.0654 (1.1657) acc 75.0000 (70.7741) lr 1.9823e-03 eta 5:34:06 +epoch [5/50] batch [445/500] time 0.883 (0.889) data 0.000 (0.002) loss 1.1055 (1.1690) acc 68.7500 (70.7233) lr 1.9823e-03 eta 5:34:02 +epoch [5/50] batch [450/500] time 0.885 (0.889) data 0.000 (0.002) loss 1.1807 (1.1694) acc 78.1250 (70.7361) lr 1.9823e-03 eta 5:33:57 +epoch [5/50] batch [455/500] time 0.909 (0.889) data 0.000 (0.002) loss 1.3271 (1.1689) acc 78.1250 (70.7555) lr 1.9823e-03 eta 5:33:51 +epoch [5/50] batch [460/500] time 0.866 (0.888) data 0.000 (0.002) loss 1.2676 (1.1699) acc 65.6250 (70.7133) lr 1.9823e-03 eta 5:33:46 +epoch [5/50] batch [465/500] time 0.873 (0.888) data 0.000 (0.002) loss 0.7598 (1.1672) acc 78.1250 (70.7796) lr 1.9823e-03 eta 5:33:36 +epoch [5/50] batch [470/500] time 0.874 (0.888) data 0.000 (0.002) loss 1.0410 (1.1660) acc 81.2500 (70.8245) lr 1.9823e-03 eta 5:33:27 +epoch [5/50] batch [475/500] time 0.886 (0.888) data 0.000 (0.002) loss 1.2197 (1.1667) acc 65.6250 (70.8158) lr 1.9823e-03 eta 5:33:25 +epoch [5/50] batch [480/500] time 0.855 (0.888) data 0.000 (0.002) loss 0.7427 (1.1662) acc 75.0000 (70.8138) lr 1.9823e-03 eta 5:33:17 +epoch [5/50] batch [485/500] time 0.896 (0.888) data 0.000 (0.002) loss 1.5908 (1.1675) acc 56.2500 (70.7668) lr 1.9823e-03 eta 5:33:15 +epoch [5/50] batch [490/500] time 0.902 (0.888) data 0.000 (0.002) loss 1.9111 (1.1684) acc 56.2500 (70.7589) lr 1.9823e-03 eta 5:33:08 +epoch [5/50] batch [495/500] time 0.898 (0.888) data 0.000 (0.002) loss 1.2637 (1.1694) acc 78.1250 (70.7513) lr 1.9823e-03 eta 5:33:08 +epoch [5/50] batch [500/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.6387 (1.1717) acc 56.2500 (70.7062) lr 1.9686e-03 eta 5:33:04 +epoch [6/50] batch [5/500] time 0.884 (1.022) data 0.000 (0.139) loss 1.2070 (0.9971) acc 62.5000 (74.3750) lr 1.9686e-03 eta 6:23:11 +epoch [6/50] batch [10/500] time 0.906 (0.955) data 0.000 (0.070) loss 0.6553 (1.0462) acc 78.1250 (74.6875) lr 1.9686e-03 eta 5:58:04 +epoch [6/50] batch [15/500] time 0.889 (0.932) data 0.000 (0.046) loss 1.6641 (1.0957) acc 68.7500 (73.3333) lr 1.9686e-03 eta 5:49:23 +epoch [6/50] batch [20/500] time 0.867 (0.919) data 0.000 (0.035) loss 1.1348 (1.1579) acc 75.0000 (72.6562) lr 1.9686e-03 eta 5:44:09 +epoch [6/50] batch [25/500] time 0.909 (0.912) data 0.000 (0.028) loss 1.1484 (1.1985) acc 71.8750 (71.8750) lr 1.9686e-03 eta 5:41:43 +epoch [6/50] batch [30/500] time 0.882 (0.907) data 0.000 (0.023) loss 2.0996 (1.2312) acc 56.2500 (71.3542) lr 1.9686e-03 eta 5:39:43 +epoch [6/50] batch [35/500] time 0.862 (0.902) data 0.000 (0.020) loss 0.5127 (1.2170) acc 84.3750 (71.2500) lr 1.9686e-03 eta 5:37:53 +epoch [6/50] batch [40/500] time 0.876 (0.900) data 0.000 (0.018) loss 1.1465 (1.2007) acc 65.6250 (70.7031) lr 1.9686e-03 eta 5:36:44 +epoch [6/50] batch [45/500] time 0.870 (0.897) data 0.000 (0.016) loss 0.9561 (1.1951) acc 84.3750 (70.6250) lr 1.9686e-03 eta 5:35:51 +epoch [6/50] batch [50/500] time 0.871 (0.898) data 0.000 (0.014) loss 1.7188 (1.1930) acc 59.3750 (70.8125) lr 1.9686e-03 eta 5:36:06 +epoch [6/50] batch [55/500] time 0.882 (0.897) data 0.000 (0.013) loss 1.0840 (1.1777) acc 78.1250 (71.3636) lr 1.9686e-03 eta 5:35:37 +epoch [6/50] batch [60/500] time 0.857 (0.895) data 0.000 (0.012) loss 0.7417 (1.1636) acc 78.1250 (71.5104) lr 1.9686e-03 eta 5:34:48 +epoch [6/50] batch [65/500] time 0.900 (0.894) data 0.000 (0.011) loss 0.7051 (1.1772) acc 81.2500 (71.2019) lr 1.9686e-03 eta 5:34:19 +epoch [6/50] batch [70/500] time 0.905 (0.894) data 0.000 (0.010) loss 1.1523 (1.1913) acc 71.8750 (70.9375) lr 1.9686e-03 eta 5:34:20 +epoch [6/50] batch [75/500] time 0.877 (0.893) data 0.000 (0.009) loss 1.7734 (1.1859) acc 53.1250 (71.0000) lr 1.9686e-03 eta 5:33:49 +epoch [6/50] batch [80/500] time 0.889 (0.893) data 0.000 (0.009) loss 1.0879 (1.1841) acc 68.7500 (71.0547) lr 1.9686e-03 eta 5:33:35 +epoch [6/50] batch [85/500] time 0.849 (0.892) data 0.000 (0.008) loss 1.4893 (1.1932) acc 65.6250 (70.9559) lr 1.9686e-03 eta 5:33:12 +epoch [6/50] batch [90/500] time 0.884 (0.892) data 0.000 (0.008) loss 0.8979 (1.1804) acc 68.7500 (71.2500) lr 1.9686e-03 eta 5:33:02 +epoch [6/50] batch [95/500] time 0.925 (0.893) data 0.000 (0.008) loss 1.1719 (1.1753) acc 62.5000 (71.3487) lr 1.9686e-03 eta 5:33:26 +epoch [6/50] batch [100/500] time 0.879 (0.892) data 0.000 (0.007) loss 1.0684 (1.1808) acc 71.8750 (71.2188) lr 1.9686e-03 eta 5:33:10 +epoch [6/50] batch [105/500] time 0.901 (0.892) data 0.000 (0.007) loss 0.4023 (1.1728) acc 87.5000 (71.5179) lr 1.9686e-03 eta 5:32:49 +epoch [6/50] batch [110/500] time 0.890 (0.891) data 0.000 (0.007) loss 1.5000 (1.1782) acc 53.1250 (71.1932) lr 1.9686e-03 eta 5:32:37 +epoch [6/50] batch [115/500] time 0.866 (0.891) data 0.000 (0.006) loss 0.7432 (1.1806) acc 81.2500 (71.1685) lr 1.9686e-03 eta 5:32:21 +epoch [6/50] batch [120/500] time 0.872 (0.891) data 0.000 (0.006) loss 1.1553 (1.1794) acc 75.0000 (71.1719) lr 1.9686e-03 eta 5:32:16 +epoch [6/50] batch [125/500] time 0.868 (0.891) data 0.000 (0.006) loss 1.0332 (1.1804) acc 65.6250 (71.0250) lr 1.9686e-03 eta 5:32:07 +epoch [6/50] batch [130/500] time 0.888 (0.891) data 0.000 (0.006) loss 1.4277 (1.1829) acc 65.6250 (70.8654) lr 1.9686e-03 eta 5:32:01 +epoch [6/50] batch [135/500] time 0.882 (0.890) data 0.000 (0.005) loss 0.9419 (1.1785) acc 81.2500 (71.0417) lr 1.9686e-03 eta 5:31:55 +epoch [6/50] batch [140/500] time 0.913 (0.890) data 0.000 (0.005) loss 1.3779 (1.1752) acc 59.3750 (71.0268) lr 1.9686e-03 eta 5:31:42 +epoch [6/50] batch [145/500] time 0.907 (0.890) data 0.000 (0.005) loss 1.3965 (1.1809) acc 62.5000 (70.9914) lr 1.9686e-03 eta 5:31:35 +epoch [6/50] batch [150/500] time 0.896 (0.890) data 0.000 (0.005) loss 1.2129 (1.1781) acc 65.6250 (70.9167) lr 1.9686e-03 eta 5:31:29 +epoch [6/50] batch [155/500] time 0.892 (0.890) data 0.000 (0.005) loss 1.0879 (1.1709) acc 68.7500 (71.0484) lr 1.9686e-03 eta 5:31:25 +epoch [6/50] batch [160/500] time 0.901 (0.890) data 0.000 (0.005) loss 1.1172 (1.1705) acc 68.7500 (71.0547) lr 1.9686e-03 eta 5:31:14 +epoch [6/50] batch [165/500] time 0.900 (0.890) data 0.000 (0.004) loss 1.0781 (1.1615) acc 78.1250 (71.1932) lr 1.9686e-03 eta 5:31:08 +epoch [6/50] batch [170/500] time 0.857 (0.889) data 0.000 (0.004) loss 1.1855 (1.1645) acc 78.1250 (71.1581) lr 1.9686e-03 eta 5:30:56 +epoch [6/50] batch [175/500] time 0.869 (0.889) data 0.000 (0.004) loss 0.8447 (1.1666) acc 78.1250 (71.1429) lr 1.9686e-03 eta 5:30:49 +epoch [6/50] batch [180/500] time 0.901 (0.889) data 0.000 (0.004) loss 0.9902 (1.1719) acc 75.0000 (70.9375) lr 1.9686e-03 eta 5:30:51 +epoch [6/50] batch [185/500] time 0.890 (0.889) data 0.000 (0.004) loss 1.0879 (1.1712) acc 65.6250 (70.9459) lr 1.9686e-03 eta 5:30:42 +epoch [6/50] batch [190/500] time 0.886 (0.889) data 0.000 (0.004) loss 0.7583 (1.1721) acc 78.1250 (70.9211) lr 1.9686e-03 eta 5:30:37 +epoch [6/50] batch [195/500] time 0.874 (0.889) data 0.000 (0.004) loss 0.7632 (1.1681) acc 75.0000 (70.9615) lr 1.9686e-03 eta 5:30:36 +epoch [6/50] batch [200/500] time 0.911 (0.890) data 0.000 (0.004) loss 1.3555 (1.1703) acc 71.8750 (70.9219) lr 1.9686e-03 eta 5:30:37 +epoch [6/50] batch [205/500] time 0.887 (0.890) data 0.000 (0.004) loss 1.6357 (1.1734) acc 65.6250 (70.8841) lr 1.9686e-03 eta 5:30:33 +epoch [6/50] batch [210/500] time 0.885 (0.889) data 0.000 (0.004) loss 1.1562 (1.1672) acc 75.0000 (70.9970) lr 1.9686e-03 eta 5:30:20 +epoch [6/50] batch [215/500] time 0.871 (0.889) data 0.000 (0.003) loss 0.8428 (1.1627) acc 78.1250 (71.1483) lr 1.9686e-03 eta 5:30:07 +epoch [6/50] batch [220/500] time 0.885 (0.889) data 0.000 (0.003) loss 1.4414 (1.1627) acc 75.0000 (71.2216) lr 1.9686e-03 eta 5:30:00 +epoch [6/50] batch [225/500] time 0.872 (0.889) data 0.000 (0.003) loss 1.1201 (1.1634) acc 65.6250 (71.1389) lr 1.9686e-03 eta 5:29:53 +epoch [6/50] batch [230/500] time 0.910 (0.889) data 0.000 (0.003) loss 0.7012 (1.1631) acc 78.1250 (71.0326) lr 1.9686e-03 eta 5:29:48 +epoch [6/50] batch [235/500] time 0.986 (0.889) data 0.000 (0.003) loss 1.1875 (1.1636) acc 68.7500 (71.0372) lr 1.9686e-03 eta 5:29:47 +epoch [6/50] batch [240/500] time 0.879 (0.888) data 0.000 (0.003) loss 1.3936 (1.1672) acc 62.5000 (70.9635) lr 1.9686e-03 eta 5:29:37 +epoch [6/50] batch [245/500] time 0.882 (0.888) data 0.000 (0.003) loss 1.3770 (1.1685) acc 62.5000 (70.8673) lr 1.9686e-03 eta 5:29:31 +epoch [6/50] batch [250/500] time 0.896 (0.888) data 0.000 (0.003) loss 0.7910 (1.1712) acc 68.7500 (70.8000) lr 1.9686e-03 eta 5:29:28 +epoch [6/50] batch [255/500] time 0.924 (0.889) data 0.000 (0.003) loss 1.6035 (1.1658) acc 62.5000 (70.9314) lr 1.9686e-03 eta 5:29:32 +epoch [6/50] batch [260/500] time 0.900 (0.889) data 0.000 (0.003) loss 1.5684 (1.1703) acc 62.5000 (70.8654) lr 1.9686e-03 eta 5:29:27 +epoch [6/50] batch [265/500] time 0.901 (0.889) data 0.000 (0.003) loss 1.2871 (1.1684) acc 68.7500 (70.8844) lr 1.9686e-03 eta 5:29:24 +epoch [6/50] batch [270/500] time 0.867 (0.889) data 0.000 (0.003) loss 0.8330 (1.1661) acc 75.0000 (70.9375) lr 1.9686e-03 eta 5:29:16 +epoch [6/50] batch [275/500] time 0.876 (0.889) data 0.000 (0.003) loss 1.2979 (1.1629) acc 75.0000 (71.0568) lr 1.9686e-03 eta 5:29:07 +epoch [6/50] batch [280/500] time 0.873 (0.888) data 0.000 (0.003) loss 0.6011 (1.1636) acc 84.3750 (71.0045) lr 1.9686e-03 eta 5:29:02 +epoch [6/50] batch [285/500] time 0.895 (0.889) data 0.000 (0.003) loss 0.8608 (1.1641) acc 78.1250 (70.9759) lr 1.9686e-03 eta 5:29:00 +epoch [6/50] batch [290/500] time 0.863 (0.889) data 0.000 (0.003) loss 0.7344 (1.1573) acc 81.2500 (71.1530) lr 1.9686e-03 eta 5:28:53 +epoch [6/50] batch [295/500] time 0.851 (0.888) data 0.000 (0.003) loss 0.8906 (1.1572) acc 71.8750 (71.0805) lr 1.9686e-03 eta 5:28:42 +epoch [6/50] batch [300/500] time 0.870 (0.888) data 0.000 (0.003) loss 1.5430 (1.1562) acc 59.3750 (71.0625) lr 1.9686e-03 eta 5:28:33 +epoch [6/50] batch [305/500] time 0.861 (0.888) data 0.000 (0.002) loss 0.9849 (1.1566) acc 78.1250 (71.1168) lr 1.9686e-03 eta 5:28:24 +epoch [6/50] batch [310/500] time 0.897 (0.888) data 0.000 (0.002) loss 0.9810 (1.1530) acc 78.1250 (71.1492) lr 1.9686e-03 eta 5:28:20 +epoch [6/50] batch [315/500] time 0.870 (0.888) data 0.000 (0.002) loss 1.1660 (1.1534) acc 65.6250 (71.1607) lr 1.9686e-03 eta 5:28:17 +epoch [6/50] batch [320/500] time 0.889 (0.888) data 0.000 (0.002) loss 0.6680 (1.1528) acc 84.3750 (71.1816) lr 1.9686e-03 eta 5:28:10 +epoch [6/50] batch [325/500] time 0.868 (0.888) data 0.000 (0.002) loss 1.0332 (1.1542) acc 71.8750 (71.1827) lr 1.9686e-03 eta 5:28:05 +epoch [6/50] batch [330/500] time 0.870 (0.888) data 0.000 (0.002) loss 1.0635 (1.1531) acc 68.7500 (71.1458) lr 1.9686e-03 eta 5:28:01 +epoch [6/50] batch [335/500] time 0.859 (0.888) data 0.000 (0.002) loss 1.0039 (1.1576) acc 75.0000 (71.0821) lr 1.9686e-03 eta 5:27:55 +epoch [6/50] batch [340/500] time 0.856 (0.887) data 0.000 (0.002) loss 0.8540 (1.1557) acc 78.1250 (71.1029) lr 1.9686e-03 eta 5:27:41 +epoch [6/50] batch [345/500] time 0.861 (0.887) data 0.000 (0.002) loss 1.1348 (1.1543) acc 68.7500 (71.1051) lr 1.9686e-03 eta 5:27:36 +epoch [6/50] batch [350/500] time 0.893 (0.887) data 0.000 (0.002) loss 0.5356 (1.1528) acc 81.2500 (71.1339) lr 1.9686e-03 eta 5:27:31 +epoch [6/50] batch [355/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.2969 (1.1525) acc 68.7500 (71.1180) lr 1.9686e-03 eta 5:27:24 +epoch [6/50] batch [360/500] time 0.924 (0.887) data 0.000 (0.002) loss 0.5972 (1.1516) acc 81.2500 (71.1285) lr 1.9686e-03 eta 5:27:21 +epoch [6/50] batch [365/500] time 0.899 (0.887) data 0.000 (0.002) loss 1.2256 (1.1528) acc 68.7500 (71.1301) lr 1.9686e-03 eta 5:27:16 +epoch [6/50] batch [370/500] time 0.885 (0.887) data 0.000 (0.002) loss 0.7188 (1.1518) acc 78.1250 (71.1909) lr 1.9686e-03 eta 5:27:12 +epoch [6/50] batch [375/500] time 0.867 (0.887) data 0.000 (0.002) loss 1.4404 (1.1545) acc 68.7500 (71.1417) lr 1.9686e-03 eta 5:27:09 +epoch [6/50] batch [380/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.6943 (1.1555) acc 53.1250 (71.1102) lr 1.9686e-03 eta 5:27:13 +epoch [6/50] batch [385/500] time 0.901 (0.888) data 0.000 (0.002) loss 0.7856 (1.1535) acc 78.1250 (71.1932) lr 1.9686e-03 eta 5:27:09 +epoch [6/50] batch [390/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.1924 (1.1536) acc 59.3750 (71.1138) lr 1.9686e-03 eta 5:27:05 +epoch [6/50] batch [395/500] time 0.927 (0.888) data 0.000 (0.002) loss 1.1494 (1.1527) acc 78.1250 (71.1709) lr 1.9686e-03 eta 5:27:03 +epoch [6/50] batch [400/500] time 0.881 (0.888) data 0.000 (0.002) loss 1.2041 (1.1545) acc 71.8750 (71.1406) lr 1.9686e-03 eta 5:26:57 +epoch [6/50] batch [405/500] time 0.855 (0.888) data 0.000 (0.002) loss 1.1562 (1.1528) acc 68.7500 (71.1265) lr 1.9686e-03 eta 5:26:51 +epoch [6/50] batch [410/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.2705 (1.1527) acc 71.8750 (71.1662) lr 1.9686e-03 eta 5:26:44 +epoch [6/50] batch [415/500] time 0.901 (0.888) data 0.000 (0.002) loss 1.0254 (1.1526) acc 75.0000 (71.1145) lr 1.9686e-03 eta 5:26:44 +epoch [6/50] batch [420/500] time 0.891 (0.888) data 0.000 (0.002) loss 1.3506 (1.1532) acc 65.6250 (71.0640) lr 1.9686e-03 eta 5:26:38 +epoch [6/50] batch [425/500] time 0.901 (0.888) data 0.000 (0.002) loss 1.0010 (1.1565) acc 68.7500 (70.9559) lr 1.9686e-03 eta 5:26:38 +epoch [6/50] batch [430/500] time 0.902 (0.888) data 0.000 (0.002) loss 1.1826 (1.1590) acc 68.7500 (70.9157) lr 1.9686e-03 eta 5:26:35 +epoch [6/50] batch [435/500] time 0.852 (0.888) data 0.000 (0.002) loss 1.0479 (1.1577) acc 65.6250 (70.9483) lr 1.9686e-03 eta 5:26:33 +epoch [6/50] batch [440/500] time 0.886 (0.888) data 0.000 (0.002) loss 2.1328 (1.1600) acc 43.7500 (70.9233) lr 1.9686e-03 eta 5:26:26 +epoch [6/50] batch [445/500] time 0.870 (0.888) data 0.000 (0.002) loss 1.5117 (1.1604) acc 59.3750 (70.9340) lr 1.9686e-03 eta 5:26:18 +epoch [6/50] batch [450/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.5205 (1.1612) acc 65.6250 (70.9306) lr 1.9686e-03 eta 5:26:14 +epoch [6/50] batch [455/500] time 0.842 (0.888) data 0.000 (0.002) loss 1.2881 (1.1619) acc 65.6250 (70.8585) lr 1.9686e-03 eta 5:26:05 +epoch [6/50] batch [460/500] time 0.903 (0.887) data 0.000 (0.002) loss 1.4785 (1.1626) acc 59.3750 (70.7948) lr 1.9686e-03 eta 5:26:00 +epoch [6/50] batch [465/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.1084 (1.1624) acc 71.8750 (70.8199) lr 1.9686e-03 eta 5:25:54 +epoch [6/50] batch [470/500] time 0.898 (0.887) data 0.000 (0.002) loss 1.8008 (1.1639) acc 53.1250 (70.7979) lr 1.9686e-03 eta 5:25:49 +epoch [6/50] batch [475/500] time 0.901 (0.887) data 0.000 (0.002) loss 0.7520 (1.1628) acc 81.2500 (70.8026) lr 1.9686e-03 eta 5:25:45 +epoch [6/50] batch [480/500] time 0.886 (0.888) data 0.000 (0.002) loss 1.4844 (1.1638) acc 65.6250 (70.7878) lr 1.9686e-03 eta 5:25:44 +epoch [6/50] batch [485/500] time 0.898 (0.888) data 0.000 (0.002) loss 0.9751 (1.1613) acc 81.2500 (70.8634) lr 1.9686e-03 eta 5:25:39 +epoch [6/50] batch [490/500] time 0.898 (0.888) data 0.000 (0.002) loss 0.9766 (1.1611) acc 81.2500 (70.8865) lr 1.9686e-03 eta 5:25:34 +epoch [6/50] batch [495/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.1553 (1.1630) acc 68.7500 (70.8586) lr 1.9686e-03 eta 5:25:28 +epoch [6/50] batch [500/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.7344 (1.1666) acc 59.3750 (70.8312) lr 1.9511e-03 eta 5:25:23 +epoch [7/50] batch [5/500] time 0.907 (1.018) data 0.000 (0.126) loss 1.0801 (1.2583) acc 78.1250 (66.2500) lr 1.9511e-03 eta 6:13:12 +epoch [7/50] batch [10/500] time 0.872 (0.952) data 0.000 (0.063) loss 1.3477 (1.2522) acc 68.7500 (66.5625) lr 1.9511e-03 eta 5:49:05 +epoch [7/50] batch [15/500] time 0.865 (0.924) data 0.000 (0.042) loss 1.2900 (1.2624) acc 65.6250 (67.0833) lr 1.9511e-03 eta 5:38:33 +epoch [7/50] batch [20/500] time 0.866 (0.914) data 0.000 (0.032) loss 1.2715 (1.2213) acc 78.1250 (68.9062) lr 1.9511e-03 eta 5:34:56 +epoch [7/50] batch [25/500] time 0.874 (0.908) data 0.000 (0.025) loss 0.6938 (1.1634) acc 84.3750 (70.5000) lr 1.9511e-03 eta 5:32:28 +epoch [7/50] batch [30/500] time 0.891 (0.909) data 0.000 (0.021) loss 0.6006 (1.1370) acc 84.3750 (71.3542) lr 1.9511e-03 eta 5:32:59 +epoch [7/50] batch [35/500] time 0.880 (0.905) data 0.000 (0.018) loss 1.0400 (1.1309) acc 71.8750 (71.2500) lr 1.9511e-03 eta 5:31:27 +epoch [7/50] batch [40/500] time 0.878 (0.902) data 0.000 (0.016) loss 1.9209 (1.1605) acc 65.6250 (70.8594) lr 1.9511e-03 eta 5:30:16 +epoch [7/50] batch [45/500] time 0.880 (0.901) data 0.001 (0.014) loss 1.1250 (1.1580) acc 81.2500 (70.9028) lr 1.9511e-03 eta 5:29:40 +epoch [7/50] batch [50/500] time 0.879 (0.898) data 0.000 (0.013) loss 1.6318 (1.1630) acc 65.6250 (70.6875) lr 1.9511e-03 eta 5:28:35 +epoch [7/50] batch [55/500] time 0.872 (0.896) data 0.000 (0.012) loss 1.2480 (1.1776) acc 65.6250 (69.8864) lr 1.9511e-03 eta 5:27:41 +epoch [7/50] batch [60/500] time 0.903 (0.896) data 0.000 (0.011) loss 1.1113 (1.1949) acc 75.0000 (69.5833) lr 1.9511e-03 eta 5:27:28 +epoch [7/50] batch [65/500] time 0.892 (0.896) data 0.000 (0.010) loss 1.4736 (1.2120) acc 59.3750 (69.2308) lr 1.9511e-03 eta 5:27:28 +epoch [7/50] batch [70/500] time 0.923 (0.897) data 0.000 (0.009) loss 1.0020 (1.2212) acc 62.5000 (68.6161) lr 1.9511e-03 eta 5:27:40 +epoch [7/50] batch [75/500] time 0.883 (0.897) data 0.000 (0.009) loss 1.4365 (1.2274) acc 65.6250 (68.6667) lr 1.9511e-03 eta 5:27:57 +epoch [7/50] batch [80/500] time 0.885 (0.897) data 0.000 (0.008) loss 0.7251 (1.2092) acc 81.2500 (68.9062) lr 1.9511e-03 eta 5:27:41 +epoch [7/50] batch [85/500] time 0.927 (0.896) data 0.000 (0.008) loss 1.0098 (1.2103) acc 65.6250 (68.8603) lr 1.9511e-03 eta 5:27:17 +epoch [7/50] batch [90/500] time 0.896 (0.896) data 0.000 (0.007) loss 1.5488 (1.2079) acc 65.6250 (68.9583) lr 1.9511e-03 eta 5:27:03 +epoch [7/50] batch [95/500] time 0.901 (0.895) data 0.000 (0.007) loss 1.2061 (1.2142) acc 78.1250 (69.1447) lr 1.9511e-03 eta 5:26:49 +epoch [7/50] batch [100/500] time 0.876 (0.894) data 0.000 (0.007) loss 1.5068 (1.2136) acc 68.7500 (69.4062) lr 1.9511e-03 eta 5:26:20 +epoch [7/50] batch [105/500] time 0.896 (0.894) data 0.000 (0.006) loss 1.0889 (1.2069) acc 71.8750 (69.4940) lr 1.9511e-03 eta 5:26:04 +epoch [7/50] batch [110/500] time 0.877 (0.893) data 0.000 (0.006) loss 1.3701 (1.2035) acc 68.7500 (69.6307) lr 1.9511e-03 eta 5:25:55 +epoch [7/50] batch [115/500] time 0.883 (0.893) data 0.000 (0.006) loss 0.7642 (1.1925) acc 84.3750 (70.0000) lr 1.9511e-03 eta 5:25:42 +epoch [7/50] batch [120/500] time 0.870 (0.893) data 0.000 (0.005) loss 1.5156 (1.1965) acc 62.5000 (69.8958) lr 1.9511e-03 eta 5:25:28 +epoch [7/50] batch [125/500] time 0.870 (0.892) data 0.000 (0.005) loss 0.5063 (1.1899) acc 84.3750 (70.0000) lr 1.9511e-03 eta 5:25:06 +epoch [7/50] batch [130/500] time 0.899 (0.892) data 0.000 (0.005) loss 2.1191 (1.1984) acc 50.0000 (69.8077) lr 1.9511e-03 eta 5:25:04 +epoch [7/50] batch [135/500] time 0.922 (0.892) data 0.000 (0.005) loss 0.6973 (1.1959) acc 78.1250 (69.8611) lr 1.9511e-03 eta 5:25:04 +epoch [7/50] batch [140/500] time 0.899 (0.893) data 0.000 (0.005) loss 1.0840 (1.1930) acc 78.1250 (69.9777) lr 1.9511e-03 eta 5:25:11 +epoch [7/50] batch [145/500] time 0.903 (0.893) data 0.000 (0.005) loss 0.8569 (1.1839) acc 75.0000 (70.2371) lr 1.9511e-03 eta 5:25:11 +epoch [7/50] batch [150/500] time 0.896 (0.893) data 0.000 (0.004) loss 0.6221 (1.1781) acc 84.3750 (70.2917) lr 1.9511e-03 eta 5:25:04 +epoch [7/50] batch [155/500] time 0.850 (0.892) data 0.000 (0.004) loss 1.6172 (1.1819) acc 62.5000 (70.4234) lr 1.9511e-03 eta 5:24:45 +epoch [7/50] batch [160/500] time 0.898 (0.892) data 0.000 (0.004) loss 1.1592 (1.1788) acc 56.2500 (70.3516) lr 1.9511e-03 eta 5:24:40 +epoch [7/50] batch [165/500] time 0.848 (0.892) data 0.000 (0.004) loss 0.9780 (1.1778) acc 78.1250 (70.4356) lr 1.9511e-03 eta 5:24:31 +epoch [7/50] batch [170/500] time 0.876 (0.891) data 0.000 (0.004) loss 1.2354 (1.1768) acc 65.6250 (70.4228) lr 1.9511e-03 eta 5:24:19 +epoch [7/50] batch [175/500] time 0.913 (0.892) data 0.000 (0.004) loss 1.4629 (1.1752) acc 62.5000 (70.4286) lr 1.9511e-03 eta 5:24:25 +epoch [7/50] batch [180/500] time 0.895 (0.892) data 0.000 (0.004) loss 1.1670 (1.1729) acc 75.0000 (70.3993) lr 1.9511e-03 eta 5:24:28 +epoch [7/50] batch [185/500] time 0.896 (0.892) data 0.000 (0.004) loss 1.0303 (1.1661) acc 75.0000 (70.5743) lr 1.9511e-03 eta 5:24:25 +epoch [7/50] batch [190/500] time 0.888 (0.892) data 0.000 (0.004) loss 1.2764 (1.1737) acc 78.1250 (70.4605) lr 1.9511e-03 eta 5:24:17 +epoch [7/50] batch [195/500] time 0.898 (0.892) data 0.000 (0.003) loss 0.8901 (1.1793) acc 75.0000 (70.4808) lr 1.9511e-03 eta 5:24:19 +epoch [7/50] batch [200/500] time 0.863 (0.892) data 0.000 (0.003) loss 1.1992 (1.1814) acc 68.7500 (70.3594) lr 1.9511e-03 eta 5:24:07 +epoch [7/50] batch [205/500] time 0.880 (0.892) data 0.000 (0.003) loss 2.1836 (1.1878) acc 59.3750 (70.3049) lr 1.9511e-03 eta 5:23:58 +epoch [7/50] batch [210/500] time 0.865 (0.891) data 0.000 (0.003) loss 1.1328 (1.1908) acc 65.6250 (70.2083) lr 1.9511e-03 eta 5:23:44 +epoch [7/50] batch [215/500] time 0.901 (0.891) data 0.000 (0.003) loss 0.6411 (1.1879) acc 81.2500 (70.2907) lr 1.9511e-03 eta 5:23:40 +epoch [7/50] batch [220/500] time 0.892 (0.892) data 0.000 (0.003) loss 1.5391 (1.1928) acc 75.0000 (70.2273) lr 1.9511e-03 eta 5:23:41 +epoch [7/50] batch [225/500] time 0.924 (0.892) data 0.000 (0.003) loss 0.6880 (1.1853) acc 84.3750 (70.3472) lr 1.9511e-03 eta 5:23:42 +epoch [7/50] batch [230/500] time 0.893 (0.892) data 0.000 (0.003) loss 0.9463 (1.1838) acc 75.0000 (70.4755) lr 1.9511e-03 eta 5:23:35 +epoch [7/50] batch [235/500] time 0.876 (0.892) data 0.000 (0.003) loss 1.3428 (1.1809) acc 56.2500 (70.4654) lr 1.9511e-03 eta 5:23:26 +epoch [7/50] batch [240/500] time 0.900 (0.891) data 0.000 (0.003) loss 1.9111 (1.1780) acc 59.3750 (70.4948) lr 1.9511e-03 eta 5:23:18 +epoch [7/50] batch [245/500] time 0.867 (0.891) data 0.000 (0.003) loss 1.1016 (1.1850) acc 71.8750 (70.3827) lr 1.9511e-03 eta 5:23:08 +epoch [7/50] batch [250/500] time 0.912 (0.891) data 0.000 (0.003) loss 1.1992 (1.1880) acc 65.6250 (70.3375) lr 1.9511e-03 eta 5:22:59 +epoch [7/50] batch [255/500] time 0.901 (0.891) data 0.000 (0.003) loss 1.1787 (1.1858) acc 68.7500 (70.3186) lr 1.9511e-03 eta 5:22:52 +epoch [7/50] batch [260/500] time 0.872 (0.891) data 0.000 (0.003) loss 1.3906 (1.1897) acc 71.8750 (70.3245) lr 1.9511e-03 eta 5:22:46 +epoch [7/50] batch [265/500] time 0.858 (0.890) data 0.000 (0.003) loss 0.8833 (1.1864) acc 75.0000 (70.3774) lr 1.9511e-03 eta 5:22:34 +epoch [7/50] batch [270/500] time 0.867 (0.890) data 0.000 (0.003) loss 1.3252 (1.1906) acc 71.8750 (70.3588) lr 1.9511e-03 eta 5:22:23 +epoch [7/50] batch [275/500] time 0.874 (0.890) data 0.000 (0.003) loss 1.1709 (1.1876) acc 62.5000 (70.4205) lr 1.9511e-03 eta 5:22:13 +epoch [7/50] batch [280/500] time 0.892 (0.890) data 0.000 (0.002) loss 0.9575 (1.1852) acc 68.7500 (70.4799) lr 1.9511e-03 eta 5:22:12 +epoch [7/50] batch [285/500] time 0.880 (0.889) data 0.000 (0.002) loss 1.0586 (1.1875) acc 65.6250 (70.4934) lr 1.9511e-03 eta 5:21:55 +epoch [7/50] batch [290/500] time 0.876 (0.890) data 0.000 (0.002) loss 1.2480 (1.1841) acc 71.8750 (70.5388) lr 1.9511e-03 eta 5:21:54 +epoch [7/50] batch [295/500] time 0.871 (0.890) data 0.000 (0.002) loss 0.8232 (1.1809) acc 75.0000 (70.5932) lr 1.9511e-03 eta 5:21:47 +epoch [7/50] batch [300/500] time 0.903 (0.889) data 0.000 (0.002) loss 1.1123 (1.1807) acc 71.8750 (70.6250) lr 1.9511e-03 eta 5:21:36 +epoch [7/50] batch [305/500] time 0.881 (0.889) data 0.000 (0.002) loss 1.1309 (1.1758) acc 68.7500 (70.6967) lr 1.9511e-03 eta 5:21:25 +epoch [7/50] batch [310/500] time 0.888 (0.889) data 0.000 (0.002) loss 1.1572 (1.1753) acc 75.0000 (70.7157) lr 1.9511e-03 eta 5:21:19 +epoch [7/50] batch [315/500] time 1.000 (0.889) data 0.000 (0.002) loss 0.7744 (1.1712) acc 84.3750 (70.8433) lr 1.9511e-03 eta 5:21:23 +epoch [7/50] batch [320/500] time 0.913 (0.889) data 0.000 (0.002) loss 0.7778 (1.1689) acc 78.1250 (70.8887) lr 1.9511e-03 eta 5:21:23 +epoch [7/50] batch [325/500] time 0.897 (0.889) data 0.000 (0.002) loss 1.6992 (1.1682) acc 56.2500 (70.8654) lr 1.9511e-03 eta 5:21:17 +epoch [7/50] batch [330/500] time 0.875 (0.889) data 0.000 (0.002) loss 1.4219 (1.1666) acc 75.0000 (70.9186) lr 1.9511e-03 eta 5:21:08 +epoch [7/50] batch [335/500] time 0.866 (0.889) data 0.000 (0.002) loss 1.8135 (1.1702) acc 59.3750 (70.8489) lr 1.9511e-03 eta 5:21:01 +epoch [7/50] batch [340/500] time 0.849 (0.889) data 0.000 (0.002) loss 1.1885 (1.1714) acc 62.5000 (70.7537) lr 1.9511e-03 eta 5:20:51 +epoch [7/50] batch [345/500] time 0.854 (0.889) data 0.000 (0.002) loss 0.9644 (1.1727) acc 81.2500 (70.7609) lr 1.9511e-03 eta 5:20:42 +epoch [7/50] batch [350/500] time 0.902 (0.888) data 0.000 (0.002) loss 1.5088 (1.1715) acc 68.7500 (70.8214) lr 1.9511e-03 eta 5:20:34 +epoch [7/50] batch [355/500] time 0.913 (0.888) data 0.000 (0.002) loss 0.7720 (1.1693) acc 81.2500 (70.8627) lr 1.9511e-03 eta 5:20:29 +epoch [7/50] batch [360/500] time 0.903 (0.889) data 0.000 (0.002) loss 1.5273 (1.1705) acc 53.1250 (70.7812) lr 1.9511e-03 eta 5:20:27 +epoch [7/50] batch [365/500] time 0.892 (0.889) data 0.000 (0.002) loss 0.8750 (1.1671) acc 75.0000 (70.8305) lr 1.9511e-03 eta 5:20:23 +epoch [7/50] batch [370/500] time 0.903 (0.888) data 0.001 (0.002) loss 0.8687 (1.1671) acc 84.3750 (70.7939) lr 1.9511e-03 eta 5:20:15 +epoch [7/50] batch [375/500] time 0.913 (0.889) data 0.000 (0.002) loss 1.2021 (1.1680) acc 65.6250 (70.8000) lr 1.9511e-03 eta 5:20:15 +epoch [7/50] batch [380/500] time 0.905 (0.889) data 0.000 (0.002) loss 1.1445 (1.1704) acc 71.8750 (70.7484) lr 1.9511e-03 eta 5:20:15 +epoch [7/50] batch [385/500] time 0.866 (0.889) data 0.000 (0.002) loss 1.0459 (1.1697) acc 68.7500 (70.7224) lr 1.9511e-03 eta 5:20:08 +epoch [7/50] batch [390/500] time 0.897 (0.889) data 0.000 (0.002) loss 0.9824 (1.1706) acc 71.8750 (70.7452) lr 1.9511e-03 eta 5:20:03 +epoch [7/50] batch [395/500] time 0.886 (0.889) data 0.000 (0.002) loss 0.4917 (1.1678) acc 90.6250 (70.7832) lr 1.9511e-03 eta 5:20:01 +epoch [7/50] batch [400/500] time 0.886 (0.889) data 0.000 (0.002) loss 0.8315 (1.1689) acc 78.1250 (70.7422) lr 1.9511e-03 eta 5:19:53 +epoch [7/50] batch [405/500] time 0.860 (0.888) data 0.000 (0.002) loss 1.6943 (1.1713) acc 59.3750 (70.7330) lr 1.9511e-03 eta 5:19:46 +epoch [7/50] batch [410/500] time 0.888 (0.888) data 0.000 (0.002) loss 0.8447 (1.1712) acc 84.3750 (70.7927) lr 1.9511e-03 eta 5:19:42 +epoch [7/50] batch [415/500] time 0.882 (0.888) data 0.000 (0.002) loss 0.6792 (1.1694) acc 75.0000 (70.8208) lr 1.9511e-03 eta 5:19:37 +epoch [7/50] batch [420/500] time 0.864 (0.888) data 0.000 (0.002) loss 1.3379 (1.1699) acc 65.6250 (70.8036) lr 1.9511e-03 eta 5:19:31 +epoch [7/50] batch [425/500] time 0.884 (0.888) data 0.000 (0.002) loss 0.6963 (1.1703) acc 75.0000 (70.7574) lr 1.9511e-03 eta 5:19:27 +epoch [7/50] batch [430/500] time 0.861 (0.888) data 0.000 (0.002) loss 1.1963 (1.1729) acc 68.7500 (70.7485) lr 1.9511e-03 eta 5:19:21 +epoch [7/50] batch [435/500] time 0.916 (0.889) data 0.000 (0.002) loss 0.8887 (1.1721) acc 78.1250 (70.7543) lr 1.9511e-03 eta 5:19:21 +epoch [7/50] batch [440/500] time 0.864 (0.888) data 0.000 (0.002) loss 1.1924 (1.1730) acc 71.8750 (70.7955) lr 1.9511e-03 eta 5:19:13 +epoch [7/50] batch [445/500] time 0.888 (0.888) data 0.000 (0.002) loss 0.9282 (1.1697) acc 81.2500 (70.8497) lr 1.9511e-03 eta 5:19:07 +epoch [7/50] batch [450/500] time 0.870 (0.888) data 0.000 (0.002) loss 1.1709 (1.1686) acc 65.6250 (70.8542) lr 1.9511e-03 eta 5:19:03 +epoch [7/50] batch [455/500] time 0.869 (0.888) data 0.000 (0.002) loss 1.3086 (1.1680) acc 75.0000 (70.8929) lr 1.9511e-03 eta 5:18:58 +epoch [7/50] batch [460/500] time 0.873 (0.889) data 0.000 (0.002) loss 0.5596 (1.1688) acc 87.5000 (70.8832) lr 1.9511e-03 eta 5:18:59 +epoch [7/50] batch [465/500] time 0.920 (0.889) data 0.000 (0.002) loss 0.9546 (1.1697) acc 75.0000 (70.8535) lr 1.9511e-03 eta 5:18:57 +epoch [7/50] batch [470/500] time 0.891 (0.889) data 0.000 (0.002) loss 0.9849 (1.1690) acc 68.7500 (70.8511) lr 1.9511e-03 eta 5:18:52 +epoch [7/50] batch [475/500] time 0.907 (0.889) data 0.000 (0.002) loss 1.6719 (1.1714) acc 62.5000 (70.8224) lr 1.9511e-03 eta 5:18:49 +epoch [7/50] batch [480/500] time 0.869 (0.889) data 0.000 (0.002) loss 0.8286 (1.1704) acc 68.7500 (70.7812) lr 1.9511e-03 eta 5:18:43 +epoch [7/50] batch [485/500] time 0.884 (0.889) data 0.000 (0.002) loss 1.3721 (1.1690) acc 65.6250 (70.7925) lr 1.9511e-03 eta 5:18:37 +epoch [7/50] batch [490/500] time 0.861 (0.888) data 0.000 (0.002) loss 1.5674 (1.1674) acc 59.3750 (70.8291) lr 1.9511e-03 eta 5:18:28 +epoch [7/50] batch [495/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.4473 (1.1684) acc 62.5000 (70.8018) lr 1.9511e-03 eta 5:18:19 +epoch [7/50] batch [500/500] time 0.888 (0.888) data 0.000 (0.001) loss 0.4343 (1.1670) acc 93.7500 (70.8625) lr 1.9298e-03 eta 5:18:12 +epoch [8/50] batch [5/500] time 0.867 (1.050) data 0.000 (0.144) loss 1.6191 (1.4107) acc 71.8750 (66.8750) lr 1.9298e-03 eta 6:16:17 +epoch [8/50] batch [10/500] time 0.897 (0.966) data 0.000 (0.072) loss 0.9961 (1.2456) acc 71.8750 (67.8125) lr 1.9298e-03 eta 5:45:51 +epoch [8/50] batch [15/500] time 0.863 (0.935) data 0.000 (0.048) loss 0.9370 (1.1467) acc 71.8750 (69.3750) lr 1.9298e-03 eta 5:34:54 +epoch [8/50] batch [20/500] time 0.867 (0.920) data 0.000 (0.036) loss 1.5078 (1.1950) acc 65.6250 (69.0625) lr 1.9298e-03 eta 5:29:16 +epoch [8/50] batch [25/500] time 0.880 (0.911) data 0.000 (0.029) loss 1.2930 (1.1784) acc 71.8750 (69.7500) lr 1.9298e-03 eta 5:26:02 +epoch [8/50] batch [30/500] time 0.912 (0.908) data 0.000 (0.024) loss 1.5000 (1.1734) acc 65.6250 (69.8958) lr 1.9298e-03 eta 5:24:55 +epoch [8/50] batch [35/500] time 0.907 (0.905) data 0.000 (0.021) loss 1.2549 (1.1824) acc 62.5000 (70.0000) lr 1.9298e-03 eta 5:23:49 +epoch [8/50] batch [40/500] time 0.901 (0.903) data 0.000 (0.018) loss 1.2803 (1.1638) acc 68.7500 (70.8594) lr 1.9298e-03 eta 5:23:04 +epoch [8/50] batch [45/500] time 0.892 (0.903) data 0.000 (0.016) loss 0.6548 (1.1349) acc 84.3750 (71.3194) lr 1.9298e-03 eta 5:22:46 +epoch [8/50] batch [50/500] time 0.882 (0.903) data 0.000 (0.015) loss 1.5225 (1.1321) acc 59.3750 (70.8125) lr 1.9298e-03 eta 5:22:53 +epoch [8/50] batch [55/500] time 0.881 (0.901) data 0.000 (0.013) loss 1.2939 (1.1221) acc 71.8750 (71.0227) lr 1.9298e-03 eta 5:21:53 +epoch [8/50] batch [60/500] time 0.932 (0.899) data 0.000 (0.012) loss 1.3369 (1.1480) acc 68.7500 (70.6250) lr 1.9298e-03 eta 5:21:23 +epoch [8/50] batch [65/500] time 0.854 (0.898) data 0.000 (0.011) loss 1.2344 (1.1476) acc 71.8750 (70.6250) lr 1.9298e-03 eta 5:20:39 +epoch [8/50] batch [70/500] time 0.900 (0.895) data 0.000 (0.011) loss 0.9165 (1.1390) acc 81.2500 (71.2054) lr 1.9298e-03 eta 5:19:46 +epoch [8/50] batch [75/500] time 0.897 (0.895) data 0.000 (0.010) loss 1.8662 (1.1498) acc 53.1250 (70.9167) lr 1.9298e-03 eta 5:19:31 +epoch [8/50] batch [80/500] time 0.871 (0.893) data 0.000 (0.009) loss 1.7578 (1.1595) acc 50.0000 (70.6250) lr 1.9298e-03 eta 5:18:50 +epoch [8/50] batch [85/500] time 0.891 (0.893) data 0.000 (0.009) loss 1.9424 (1.1743) acc 53.1250 (70.2941) lr 1.9298e-03 eta 5:18:48 +epoch [8/50] batch [90/500] time 0.860 (0.893) data 0.000 (0.008) loss 1.5391 (1.1731) acc 59.3750 (70.3125) lr 1.9298e-03 eta 5:18:33 +epoch [8/50] batch [95/500] time 0.881 (0.893) data 0.000 (0.008) loss 0.4492 (1.1642) acc 87.5000 (70.5592) lr 1.9298e-03 eta 5:18:24 +epoch [8/50] batch [100/500] time 0.874 (0.892) data 0.000 (0.007) loss 1.1152 (1.1584) acc 75.0000 (70.6562) lr 1.9298e-03 eta 5:18:06 +epoch [8/50] batch [105/500] time 0.871 (0.891) data 0.000 (0.007) loss 1.0352 (1.1619) acc 75.0000 (70.4167) lr 1.9298e-03 eta 5:17:47 +epoch [8/50] batch [110/500] time 0.912 (0.892) data 0.000 (0.007) loss 2.0957 (1.1716) acc 65.6250 (70.3125) lr 1.9298e-03 eta 5:17:53 +epoch [8/50] batch [115/500] time 0.902 (0.891) data 0.000 (0.006) loss 1.5293 (1.1720) acc 65.6250 (70.4348) lr 1.9298e-03 eta 5:17:37 +epoch [8/50] batch [120/500] time 0.888 (0.891) data 0.000 (0.006) loss 1.1348 (1.1626) acc 71.8750 (70.5990) lr 1.9298e-03 eta 5:17:34 +epoch [8/50] batch [125/500] time 0.905 (0.891) data 0.000 (0.006) loss 1.3535 (1.1612) acc 65.6250 (70.6250) lr 1.9298e-03 eta 5:17:27 +epoch [8/50] batch [130/500] time 0.867 (0.891) data 0.000 (0.006) loss 0.9463 (1.1565) acc 75.0000 (70.7212) lr 1.9298e-03 eta 5:17:13 +epoch [8/50] batch [135/500] time 0.865 (0.890) data 0.000 (0.006) loss 0.9419 (1.1368) acc 75.0000 (71.1111) lr 1.9298e-03 eta 5:17:01 +epoch [8/50] batch [140/500] time 0.869 (0.890) data 0.000 (0.005) loss 1.2275 (1.1352) acc 65.6250 (71.1161) lr 1.9298e-03 eta 5:16:44 +epoch [8/50] batch [145/500] time 0.990 (0.890) data 0.000 (0.005) loss 1.0107 (1.1346) acc 65.6250 (70.9052) lr 1.9298e-03 eta 5:16:52 +epoch [8/50] batch [150/500] time 0.878 (0.890) data 0.000 (0.005) loss 0.3699 (1.1354) acc 90.6250 (70.8750) lr 1.9298e-03 eta 5:16:44 +epoch [8/50] batch [155/500] time 0.880 (0.890) data 0.000 (0.005) loss 0.9912 (1.1384) acc 71.8750 (70.8669) lr 1.9298e-03 eta 5:16:33 +epoch [8/50] batch [160/500] time 0.883 (0.890) data 0.000 (0.005) loss 0.7598 (1.1320) acc 75.0000 (70.9766) lr 1.9298e-03 eta 5:16:29 +epoch [8/50] batch [165/500] time 0.888 (0.890) data 0.000 (0.005) loss 1.2646 (1.1376) acc 62.5000 (70.9470) lr 1.9298e-03 eta 5:16:26 +epoch [8/50] batch [170/500] time 0.928 (0.890) data 0.000 (0.004) loss 0.8701 (1.1394) acc 65.6250 (70.9375) lr 1.9298e-03 eta 5:16:23 +epoch [8/50] batch [175/500] time 0.887 (0.890) data 0.000 (0.004) loss 1.2432 (1.1420) acc 68.7500 (70.9821) lr 1.9298e-03 eta 5:16:13 +epoch [8/50] batch [180/500] time 0.885 (0.890) data 0.000 (0.004) loss 1.5439 (1.1470) acc 68.7500 (70.8681) lr 1.9298e-03 eta 5:16:09 +epoch [8/50] batch [185/500] time 0.889 (0.890) data 0.000 (0.004) loss 1.4453 (1.1520) acc 71.8750 (70.7432) lr 1.9298e-03 eta 5:16:04 +epoch [8/50] batch [190/500] time 0.893 (0.890) data 0.000 (0.004) loss 1.0537 (1.1504) acc 75.0000 (70.8059) lr 1.9298e-03 eta 5:16:07 +epoch [8/50] batch [195/500] time 0.878 (0.890) data 0.000 (0.004) loss 1.0098 (1.1478) acc 75.0000 (70.8333) lr 1.9298e-03 eta 5:16:04 +epoch [8/50] batch [200/500] time 0.908 (0.890) data 0.000 (0.004) loss 0.7769 (1.1428) acc 84.3750 (70.7812) lr 1.9298e-03 eta 5:16:01 +epoch [8/50] batch [205/500] time 0.886 (0.891) data 0.000 (0.004) loss 0.6157 (1.1434) acc 84.3750 (70.8079) lr 1.9298e-03 eta 5:16:05 +epoch [8/50] batch [210/500] time 0.904 (0.890) data 0.000 (0.004) loss 1.1875 (1.1381) acc 65.6250 (70.9226) lr 1.9298e-03 eta 5:15:58 +epoch [8/50] batch [215/500] time 0.916 (0.891) data 0.000 (0.004) loss 1.4365 (1.1399) acc 62.5000 (70.8285) lr 1.9298e-03 eta 5:15:58 +epoch [8/50] batch [220/500] time 0.915 (0.891) data 0.000 (0.003) loss 1.7021 (1.1408) acc 62.5000 (70.8239) lr 1.9298e-03 eta 5:15:53 +epoch [8/50] batch [225/500] time 0.845 (0.890) data 0.000 (0.003) loss 2.1836 (1.1442) acc 59.3750 (70.7917) lr 1.9298e-03 eta 5:15:37 +epoch [8/50] batch [230/500] time 0.894 (0.890) data 0.000 (0.003) loss 0.8623 (1.1384) acc 81.2500 (70.9239) lr 1.9298e-03 eta 5:15:32 +epoch [8/50] batch [235/500] time 0.883 (0.890) data 0.000 (0.003) loss 0.7642 (1.1365) acc 78.1250 (70.9840) lr 1.9298e-03 eta 5:15:23 +epoch [8/50] batch [240/500] time 0.878 (0.890) data 0.000 (0.003) loss 1.1270 (1.1354) acc 71.8750 (70.9375) lr 1.9298e-03 eta 5:15:10 +epoch [8/50] batch [245/500] time 0.907 (0.889) data 0.000 (0.003) loss 1.3965 (1.1335) acc 56.2500 (70.8801) lr 1.9298e-03 eta 5:15:01 +epoch [8/50] batch [250/500] time 0.886 (0.889) data 0.000 (0.003) loss 1.2578 (1.1374) acc 75.0000 (70.8125) lr 1.9298e-03 eta 5:14:53 +epoch [8/50] batch [255/500] time 0.907 (0.889) data 0.000 (0.003) loss 0.7915 (1.1360) acc 71.8750 (70.8578) lr 1.9298e-03 eta 5:14:47 +epoch [8/50] batch [260/500] time 0.852 (0.889) data 0.000 (0.003) loss 0.8086 (1.1326) acc 68.7500 (70.9135) lr 1.9298e-03 eta 5:14:35 +epoch [8/50] batch [265/500] time 0.884 (0.889) data 0.000 (0.003) loss 1.2607 (1.1344) acc 68.7500 (70.8255) lr 1.9298e-03 eta 5:14:27 +epoch [8/50] batch [270/500] time 0.890 (0.888) data 0.000 (0.003) loss 0.8999 (1.1318) acc 75.0000 (70.8565) lr 1.9298e-03 eta 5:14:18 +epoch [8/50] batch [275/500] time 0.863 (0.888) data 0.000 (0.003) loss 0.7075 (1.1304) acc 84.3750 (70.8636) lr 1.9298e-03 eta 5:14:08 +epoch [8/50] batch [280/500] time 0.900 (0.888) data 0.000 (0.003) loss 1.4463 (1.1292) acc 56.2500 (70.9263) lr 1.9298e-03 eta 5:14:06 +epoch [8/50] batch [285/500] time 0.891 (0.888) data 0.000 (0.003) loss 1.0381 (1.1300) acc 71.8750 (70.8991) lr 1.9298e-03 eta 5:14:00 +epoch [8/50] batch [290/500] time 0.884 (0.888) data 0.000 (0.003) loss 1.4424 (1.1325) acc 68.7500 (70.8836) lr 1.9298e-03 eta 5:14:00 +epoch [8/50] batch [295/500] time 0.871 (0.888) data 0.000 (0.003) loss 1.4473 (1.1343) acc 62.5000 (70.8263) lr 1.9298e-03 eta 5:13:54 +epoch [8/50] batch [300/500] time 0.870 (0.888) data 0.000 (0.003) loss 1.0293 (1.1295) acc 75.0000 (70.9167) lr 1.9298e-03 eta 5:13:47 +epoch [8/50] batch [305/500] time 0.902 (0.888) data 0.000 (0.003) loss 1.6787 (1.1323) acc 65.6250 (70.9016) lr 1.9298e-03 eta 5:13:44 +epoch [8/50] batch [310/500] time 0.920 (0.888) data 0.000 (0.003) loss 1.2021 (1.1323) acc 68.7500 (70.9577) lr 1.9298e-03 eta 5:13:41 +epoch [8/50] batch [315/500] time 0.899 (0.888) data 0.000 (0.003) loss 0.8140 (1.1306) acc 78.1250 (71.0218) lr 1.9298e-03 eta 5:13:36 +epoch [8/50] batch [320/500] time 0.869 (0.888) data 0.000 (0.002) loss 2.1191 (1.1318) acc 65.6250 (71.0645) lr 1.9298e-03 eta 5:13:32 +epoch [8/50] batch [325/500] time 0.869 (0.888) data 0.000 (0.002) loss 1.6719 (1.1322) acc 62.5000 (71.0865) lr 1.9298e-03 eta 5:13:26 +epoch [8/50] batch [330/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.6021 (1.1310) acc 87.5000 (71.1080) lr 1.9298e-03 eta 5:13:18 +epoch [8/50] batch [335/500] time 0.897 (0.888) data 0.000 (0.002) loss 0.8569 (1.1322) acc 71.8750 (71.0728) lr 1.9298e-03 eta 5:13:24 +epoch [8/50] batch [340/500] time 0.878 (0.888) data 0.000 (0.002) loss 1.0264 (1.1324) acc 75.0000 (71.0478) lr 1.9298e-03 eta 5:13:17 +epoch [8/50] batch [345/500] time 0.855 (0.888) data 0.000 (0.002) loss 1.8584 (1.1355) acc 59.3750 (70.9873) lr 1.9298e-03 eta 5:13:05 +epoch [8/50] batch [350/500] time 0.877 (0.888) data 0.000 (0.002) loss 1.0469 (1.1365) acc 71.8750 (70.9821) lr 1.9298e-03 eta 5:13:01 +epoch [8/50] batch [355/500] time 0.876 (0.888) data 0.000 (0.002) loss 1.7061 (1.1386) acc 65.6250 (71.0123) lr 1.9298e-03 eta 5:12:57 +epoch [8/50] batch [360/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.8901 (1.1379) acc 75.0000 (71.0503) lr 1.9298e-03 eta 5:12:52 +epoch [8/50] batch [365/500] time 0.899 (0.888) data 0.000 (0.002) loss 0.9180 (1.1399) acc 71.8750 (70.9332) lr 1.9298e-03 eta 5:12:49 +epoch [8/50] batch [370/500] time 0.911 (0.888) data 0.000 (0.002) loss 1.3516 (1.1375) acc 65.6250 (70.9966) lr 1.9298e-03 eta 5:12:45 +epoch [8/50] batch [375/500] time 0.914 (0.888) data 0.000 (0.002) loss 1.1494 (1.1404) acc 84.3750 (70.9917) lr 1.9298e-03 eta 5:12:43 +epoch [8/50] batch [380/500] time 0.899 (0.888) data 0.000 (0.002) loss 1.4111 (1.1415) acc 75.0000 (70.9786) lr 1.9298e-03 eta 5:12:35 +epoch [8/50] batch [385/500] time 0.853 (0.888) data 0.000 (0.002) loss 1.8281 (1.1463) acc 59.3750 (70.9253) lr 1.9298e-03 eta 5:12:30 +epoch [8/50] batch [390/500] time 0.878 (0.888) data 0.000 (0.002) loss 1.5908 (1.1463) acc 62.5000 (70.9295) lr 1.9298e-03 eta 5:12:23 +epoch [8/50] batch [395/500] time 0.920 (0.888) data 0.000 (0.002) loss 1.3408 (1.1451) acc 65.6250 (70.9573) lr 1.9298e-03 eta 5:12:18 +epoch [8/50] batch [400/500] time 0.871 (0.888) data 0.000 (0.002) loss 0.9434 (1.1417) acc 71.8750 (71.0156) lr 1.9298e-03 eta 5:12:12 +epoch [8/50] batch [405/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.2539 (1.1425) acc 71.8750 (71.0417) lr 1.9298e-03 eta 5:12:07 +epoch [8/50] batch [410/500] time 0.899 (0.888) data 0.000 (0.002) loss 1.3008 (1.1430) acc 71.8750 (71.0518) lr 1.9298e-03 eta 5:12:02 +epoch [8/50] batch [415/500] time 0.875 (0.888) data 0.000 (0.002) loss 1.0098 (1.1422) acc 68.7500 (71.0392) lr 1.9298e-03 eta 5:11:55 +epoch [8/50] batch [420/500] time 0.882 (0.887) data 0.000 (0.002) loss 1.6484 (1.1446) acc 62.5000 (71.0193) lr 1.9298e-03 eta 5:11:47 +epoch [8/50] batch [425/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.2061 (1.1444) acc 68.7500 (71.0441) lr 1.9298e-03 eta 5:11:42 +epoch [8/50] batch [430/500] time 0.858 (0.887) data 0.000 (0.002) loss 0.9087 (1.1465) acc 75.0000 (71.0029) lr 1.9298e-03 eta 5:11:39 +epoch [8/50] batch [435/500] time 0.897 (0.888) data 0.000 (0.002) loss 1.5312 (1.1456) acc 68.7500 (71.0345) lr 1.9298e-03 eta 5:11:39 +epoch [8/50] batch [440/500] time 0.885 (0.888) data 0.000 (0.002) loss 0.8857 (1.1444) acc 65.6250 (71.0511) lr 1.9298e-03 eta 5:11:33 +epoch [8/50] batch [445/500] time 0.871 (0.887) data 0.000 (0.002) loss 0.8179 (1.1428) acc 75.0000 (71.0744) lr 1.9298e-03 eta 5:11:25 +epoch [8/50] batch [450/500] time 0.914 (0.887) data 0.000 (0.002) loss 1.1729 (1.1420) acc 62.5000 (71.0694) lr 1.9298e-03 eta 5:11:20 +epoch [8/50] batch [455/500] time 0.904 (0.887) data 0.000 (0.002) loss 1.3105 (1.1421) acc 65.6250 (71.0165) lr 1.9298e-03 eta 5:11:15 +epoch [8/50] batch [460/500] time 0.865 (0.887) data 0.000 (0.002) loss 1.1631 (1.1416) acc 68.7500 (70.9715) lr 1.9298e-03 eta 5:11:09 +epoch [8/50] batch [465/500] time 0.866 (0.887) data 0.000 (0.002) loss 0.8687 (1.1410) acc 81.2500 (70.9946) lr 1.9298e-03 eta 5:11:02 +epoch [8/50] batch [470/500] time 0.882 (0.887) data 0.000 (0.002) loss 0.5840 (1.1399) acc 81.2500 (71.0306) lr 1.9298e-03 eta 5:10:57 +epoch [8/50] batch [475/500] time 1.001 (0.887) data 0.000 (0.002) loss 0.6372 (1.1405) acc 84.3750 (71.0395) lr 1.9298e-03 eta 5:10:57 +epoch [8/50] batch [480/500] time 0.882 (0.887) data 0.000 (0.002) loss 0.9321 (1.1408) acc 78.1250 (71.0286) lr 1.9298e-03 eta 5:10:54 +epoch [8/50] batch [485/500] time 0.869 (0.887) data 0.000 (0.002) loss 1.5723 (1.1416) acc 62.5000 (70.9794) lr 1.9298e-03 eta 5:10:48 +epoch [8/50] batch [490/500] time 0.907 (0.887) data 0.000 (0.002) loss 1.5781 (1.1441) acc 59.3750 (70.9503) lr 1.9298e-03 eta 5:10:44 +epoch [8/50] batch [495/500] time 0.899 (0.887) data 0.000 (0.002) loss 1.2363 (1.1452) acc 71.8750 (70.9470) lr 1.9298e-03 eta 5:10:40 +epoch [8/50] batch [500/500] time 0.913 (0.888) data 0.000 (0.002) loss 1.3896 (1.1466) acc 71.8750 (70.9313) lr 1.9048e-03 eta 5:10:39 +epoch [9/50] batch [5/500] time 0.877 (1.018) data 0.000 (0.132) loss 1.0996 (1.2461) acc 65.6250 (67.5000) lr 1.9048e-03 eta 5:56:22 +epoch [9/50] batch [10/500] time 0.873 (0.948) data 0.000 (0.066) loss 1.3564 (1.2751) acc 62.5000 (67.5000) lr 1.9048e-03 eta 5:31:48 +epoch [9/50] batch [15/500] time 0.873 (0.929) data 0.000 (0.044) loss 0.7407 (1.2527) acc 75.0000 (67.7083) lr 1.9048e-03 eta 5:24:46 +epoch [9/50] batch [20/500] time 0.896 (0.927) data 0.000 (0.033) loss 1.0322 (1.2542) acc 78.1250 (67.6562) lr 1.9048e-03 eta 5:24:09 +epoch [9/50] batch [25/500] time 0.885 (0.920) data 0.000 (0.027) loss 0.9390 (1.2047) acc 65.6250 (68.8750) lr 1.9048e-03 eta 5:21:29 +epoch [9/50] batch [30/500] time 0.898 (0.915) data 0.000 (0.022) loss 0.9126 (1.1645) acc 75.0000 (70.1042) lr 1.9048e-03 eta 5:19:53 +epoch [9/50] batch [35/500] time 0.911 (0.911) data 0.000 (0.019) loss 0.7744 (1.1488) acc 81.2500 (70.6250) lr 1.9048e-03 eta 5:18:28 +epoch [9/50] batch [40/500] time 0.882 (0.907) data 0.000 (0.017) loss 1.2803 (1.1662) acc 62.5000 (69.7656) lr 1.9048e-03 eta 5:16:51 +epoch [9/50] batch [45/500] time 0.878 (0.906) data 0.000 (0.015) loss 0.9990 (1.1734) acc 75.0000 (69.7917) lr 1.9048e-03 eta 5:16:19 +epoch [9/50] batch [50/500] time 0.855 (0.902) data 0.001 (0.013) loss 1.3369 (1.1837) acc 65.6250 (69.2500) lr 1.9048e-03 eta 5:15:06 +epoch [9/50] batch [55/500] time 0.904 (0.900) data 0.000 (0.012) loss 1.6279 (1.2002) acc 71.8750 (69.3750) lr 1.9048e-03 eta 5:14:18 +epoch [9/50] batch [60/500] time 0.888 (0.900) data 0.000 (0.011) loss 1.5215 (1.2087) acc 59.3750 (69.2188) lr 1.9048e-03 eta 5:14:10 +epoch [9/50] batch [65/500] time 0.883 (0.899) data 0.000 (0.010) loss 1.2207 (1.2129) acc 71.8750 (69.1827) lr 1.9048e-03 eta 5:13:41 +epoch [9/50] batch [70/500] time 0.878 (0.898) data 0.000 (0.010) loss 1.4609 (1.1995) acc 75.0000 (69.7768) lr 1.9048e-03 eta 5:13:10 +epoch [9/50] batch [75/500] time 0.897 (0.898) data 0.000 (0.009) loss 1.1865 (1.1952) acc 65.6250 (69.8333) lr 1.9048e-03 eta 5:13:04 +epoch [9/50] batch [80/500] time 0.886 (0.896) data 0.000 (0.009) loss 1.4873 (1.2213) acc 62.5000 (69.5703) lr 1.9048e-03 eta 5:12:30 +epoch [9/50] batch [85/500] time 0.880 (0.896) data 0.000 (0.008) loss 0.9707 (1.2247) acc 62.5000 (69.1544) lr 1.9048e-03 eta 5:12:12 +epoch [9/50] batch [90/500] time 0.861 (0.895) data 0.000 (0.008) loss 0.9448 (1.2133) acc 78.1250 (69.4097) lr 1.9048e-03 eta 5:11:48 +epoch [9/50] batch [95/500] time 0.900 (0.894) data 0.000 (0.007) loss 0.9370 (1.2062) acc 68.7500 (69.4737) lr 1.9048e-03 eta 5:11:28 +epoch [9/50] batch [100/500] time 0.920 (0.895) data 0.000 (0.007) loss 2.0898 (1.2092) acc 65.6250 (69.5000) lr 1.9048e-03 eta 5:11:35 +epoch [9/50] batch [105/500] time 0.900 (0.894) data 0.000 (0.007) loss 1.3262 (1.2033) acc 68.7500 (69.7024) lr 1.9048e-03 eta 5:11:24 +epoch [9/50] batch [110/500] time 0.902 (0.894) data 0.000 (0.006) loss 0.7056 (1.2002) acc 75.0000 (69.7727) lr 1.9048e-03 eta 5:11:10 +epoch [9/50] batch [115/500] time 0.879 (0.893) data 0.000 (0.006) loss 0.8618 (1.1917) acc 71.8750 (69.8370) lr 1.9048e-03 eta 5:10:57 +epoch [9/50] batch [120/500] time 0.884 (0.894) data 0.000 (0.006) loss 1.1074 (1.1893) acc 62.5000 (69.9479) lr 1.9048e-03 eta 5:10:58 +epoch [9/50] batch [125/500] time 0.883 (0.894) data 0.000 (0.006) loss 0.9204 (1.1829) acc 68.7500 (70.1500) lr 1.9048e-03 eta 5:10:52 +epoch [9/50] batch [130/500] time 0.900 (0.893) data 0.000 (0.005) loss 1.2041 (1.1821) acc 75.0000 (70.4087) lr 1.9048e-03 eta 5:10:37 +epoch [9/50] batch [135/500] time 0.868 (0.893) data 0.000 (0.005) loss 1.2588 (1.1818) acc 62.5000 (70.3935) lr 1.9048e-03 eta 5:10:24 +epoch [9/50] batch [140/500] time 0.874 (0.892) data 0.000 (0.005) loss 0.8403 (1.1746) acc 78.1250 (70.3795) lr 1.9048e-03 eta 5:10:16 +epoch [9/50] batch [145/500] time 0.922 (0.892) data 0.000 (0.005) loss 0.9199 (1.1658) acc 84.3750 (70.4957) lr 1.9048e-03 eta 5:10:11 +epoch [9/50] batch [150/500] time 0.863 (0.892) data 0.000 (0.005) loss 0.7163 (1.1572) acc 71.8750 (70.5417) lr 1.9048e-03 eta 5:10:01 +epoch [9/50] batch [155/500] time 0.902 (0.892) data 0.000 (0.004) loss 0.8501 (1.1518) acc 84.3750 (70.6452) lr 1.9048e-03 eta 5:09:53 +epoch [9/50] batch [160/500] time 0.874 (0.891) data 0.000 (0.004) loss 1.2695 (1.1517) acc 62.5000 (70.7031) lr 1.9048e-03 eta 5:09:35 +epoch [9/50] batch [165/500] time 0.880 (0.892) data 0.000 (0.004) loss 1.8027 (1.1552) acc 59.3750 (70.5871) lr 1.9048e-03 eta 5:09:39 +epoch [9/50] batch [170/500] time 0.904 (0.892) data 0.000 (0.004) loss 1.0918 (1.1538) acc 75.0000 (70.5882) lr 1.9048e-03 eta 5:09:30 +epoch [9/50] batch [175/500] time 0.895 (0.891) data 0.000 (0.004) loss 1.0840 (1.1520) acc 65.6250 (70.6071) lr 1.9048e-03 eta 5:09:25 +epoch [9/50] batch [180/500] time 0.870 (0.891) data 0.000 (0.004) loss 1.0400 (1.1517) acc 78.1250 (70.5903) lr 1.9048e-03 eta 5:09:18 +epoch [9/50] batch [185/500] time 0.914 (0.891) data 0.000 (0.004) loss 1.5273 (1.1528) acc 65.6250 (70.7095) lr 1.9048e-03 eta 5:09:14 +epoch [9/50] batch [190/500] time 0.878 (0.891) data 0.000 (0.004) loss 0.9902 (1.1518) acc 75.0000 (70.6414) lr 1.9048e-03 eta 5:09:04 +epoch [9/50] batch [195/500] time 0.858 (0.891) data 0.000 (0.004) loss 1.5361 (1.1541) acc 65.6250 (70.5769) lr 1.9048e-03 eta 5:08:56 +epoch [9/50] batch [200/500] time 0.915 (0.891) data 0.000 (0.004) loss 1.0186 (1.1528) acc 75.0000 (70.6094) lr 1.9048e-03 eta 5:08:50 +epoch [9/50] batch [205/500] time 0.875 (0.891) data 0.000 (0.003) loss 0.8418 (1.1524) acc 68.7500 (70.5640) lr 1.9048e-03 eta 5:08:38 +epoch [9/50] batch [210/500] time 0.893 (0.890) data 0.000 (0.003) loss 1.3936 (1.1574) acc 68.7500 (70.5804) lr 1.9048e-03 eta 5:08:27 +epoch [9/50] batch [215/500] time 0.899 (0.890) data 0.000 (0.003) loss 2.0625 (1.1617) acc 62.5000 (70.5669) lr 1.9048e-03 eta 5:08:24 +epoch [9/50] batch [220/500] time 0.884 (0.890) data 0.000 (0.003) loss 0.8730 (1.1580) acc 71.8750 (70.5256) lr 1.9048e-03 eta 5:08:22 +epoch [9/50] batch [225/500] time 0.871 (0.890) data 0.000 (0.003) loss 0.9644 (1.1576) acc 71.8750 (70.5278) lr 1.9048e-03 eta 5:08:16 +epoch [9/50] batch [230/500] time 0.883 (0.890) data 0.000 (0.003) loss 1.2354 (1.1641) acc 65.6250 (70.2853) lr 1.9048e-03 eta 5:08:11 +epoch [9/50] batch [235/500] time 0.904 (0.890) data 0.000 (0.003) loss 0.6089 (1.1584) acc 81.2500 (70.4122) lr 1.9048e-03 eta 5:08:11 +epoch [9/50] batch [240/500] time 0.861 (0.890) data 0.000 (0.003) loss 0.9819 (1.1620) acc 78.1250 (70.4297) lr 1.9048e-03 eta 5:08:05 +epoch [9/50] batch [245/500] time 0.879 (0.890) data 0.000 (0.003) loss 0.4851 (1.1588) acc 84.3750 (70.5102) lr 1.9048e-03 eta 5:07:51 +epoch [9/50] batch [250/500] time 0.896 (0.890) data 0.000 (0.003) loss 1.3311 (1.1576) acc 78.1250 (70.5000) lr 1.9048e-03 eta 5:07:46 +epoch [9/50] batch [255/500] time 0.887 (0.890) data 0.000 (0.003) loss 0.9854 (1.1588) acc 71.8750 (70.4657) lr 1.9048e-03 eta 5:07:43 +epoch [9/50] batch [260/500] time 0.867 (0.890) data 0.000 (0.003) loss 0.6172 (1.1577) acc 84.3750 (70.5288) lr 1.9048e-03 eta 5:07:34 +epoch [9/50] batch [265/500] time 0.894 (0.890) data 0.000 (0.003) loss 1.1738 (1.1589) acc 75.0000 (70.5896) lr 1.9048e-03 eta 5:07:33 +epoch [9/50] batch [270/500] time 0.891 (0.890) data 0.000 (0.003) loss 1.2119 (1.1581) acc 53.1250 (70.5903) lr 1.9048e-03 eta 5:07:21 +epoch [9/50] batch [275/500] time 0.898 (0.890) data 0.000 (0.003) loss 0.5713 (1.1567) acc 84.3750 (70.6591) lr 1.9048e-03 eta 5:07:21 +epoch [9/50] batch [280/500] time 0.882 (0.890) data 0.000 (0.003) loss 0.7012 (1.1547) acc 84.3750 (70.8371) lr 1.9048e-03 eta 5:07:12 +epoch [9/50] batch [285/500] time 0.896 (0.890) data 0.000 (0.003) loss 0.6323 (1.1579) acc 84.3750 (70.7566) lr 1.9048e-03 eta 5:07:14 +epoch [9/50] batch [290/500] time 0.927 (0.890) data 0.000 (0.003) loss 0.8647 (1.1584) acc 78.1250 (70.7651) lr 1.9048e-03 eta 5:07:14 +epoch [9/50] batch [295/500] time 0.892 (0.890) data 0.000 (0.002) loss 1.6133 (1.1592) acc 68.7500 (70.7945) lr 1.9048e-03 eta 5:07:06 +epoch [9/50] batch [300/500] time 0.874 (0.890) data 0.000 (0.002) loss 0.7437 (1.1553) acc 75.0000 (70.8646) lr 1.9048e-03 eta 5:06:57 +epoch [9/50] batch [305/500] time 0.982 (0.890) data 0.000 (0.002) loss 1.1641 (1.1514) acc 68.7500 (70.9221) lr 1.9048e-03 eta 5:06:56 +epoch [9/50] batch [310/500] time 0.890 (0.890) data 0.000 (0.002) loss 1.3105 (1.1512) acc 62.5000 (70.9173) lr 1.9048e-03 eta 5:06:48 +epoch [9/50] batch [315/500] time 0.894 (0.890) data 0.000 (0.002) loss 1.1094 (1.1501) acc 65.6250 (70.9226) lr 1.9048e-03 eta 5:06:41 +epoch [9/50] batch [320/500] time 0.889 (0.890) data 0.000 (0.002) loss 1.6650 (1.1505) acc 56.2500 (70.8203) lr 1.9048e-03 eta 5:06:37 +epoch [9/50] batch [325/500] time 0.883 (0.889) data 0.000 (0.002) loss 1.1387 (1.1521) acc 75.0000 (70.7885) lr 1.9048e-03 eta 5:06:29 +epoch [9/50] batch [330/500] time 0.850 (0.889) data 0.000 (0.002) loss 0.7441 (1.1503) acc 78.1250 (70.8902) lr 1.9048e-03 eta 5:06:21 +epoch [9/50] batch [335/500] time 0.878 (0.889) data 0.000 (0.002) loss 0.6938 (1.1481) acc 84.3750 (70.9328) lr 1.9048e-03 eta 5:06:13 +epoch [9/50] batch [340/500] time 0.864 (0.889) data 0.001 (0.002) loss 0.8296 (1.1470) acc 71.8750 (70.9283) lr 1.9048e-03 eta 5:06:07 +epoch [9/50] batch [345/500] time 0.873 (0.889) data 0.000 (0.002) loss 1.0381 (1.1466) acc 65.6250 (70.8696) lr 1.9048e-03 eta 5:06:00 +epoch [9/50] batch [350/500] time 0.894 (0.889) data 0.000 (0.002) loss 1.1621 (1.1442) acc 68.7500 (70.8750) lr 1.9048e-03 eta 5:05:55 +epoch [9/50] batch [355/500] time 0.871 (0.889) data 0.000 (0.002) loss 1.1777 (1.1391) acc 71.8750 (70.9859) lr 1.9048e-03 eta 5:05:48 +epoch [9/50] batch [360/500] time 0.894 (0.889) data 0.000 (0.002) loss 1.2207 (1.1357) acc 78.1250 (71.0938) lr 1.9048e-03 eta 5:05:42 +epoch [9/50] batch [365/500] time 0.903 (0.889) data 0.000 (0.002) loss 1.6875 (1.1384) acc 62.5000 (71.0702) lr 1.9048e-03 eta 5:05:35 +epoch [9/50] batch [370/500] time 0.870 (0.888) data 0.000 (0.002) loss 0.7207 (1.1422) acc 87.5000 (71.0726) lr 1.9048e-03 eta 5:05:28 +epoch [9/50] batch [375/500] time 0.896 (0.888) data 0.000 (0.002) loss 1.1846 (1.1461) acc 71.8750 (71.0250) lr 1.9048e-03 eta 5:05:24 +epoch [9/50] batch [380/500] time 0.875 (0.888) data 0.000 (0.002) loss 0.7554 (1.1455) acc 78.1250 (71.0362) lr 1.9048e-03 eta 5:05:15 +epoch [9/50] batch [385/500] time 0.863 (0.888) data 0.000 (0.002) loss 1.2822 (1.1485) acc 68.7500 (70.9334) lr 1.9048e-03 eta 5:05:07 +epoch [9/50] batch [390/500] time 0.886 (0.888) data 0.000 (0.002) loss 0.5420 (1.1448) acc 81.2500 (71.0337) lr 1.9048e-03 eta 5:05:04 +epoch [9/50] batch [395/500] time 0.908 (0.888) data 0.000 (0.002) loss 1.5596 (1.1468) acc 62.5000 (71.0047) lr 1.9048e-03 eta 5:05:04 +epoch [9/50] batch [400/500] time 0.875 (0.888) data 0.000 (0.002) loss 1.5400 (1.1458) acc 68.7500 (71.0547) lr 1.9048e-03 eta 5:04:55 +epoch [9/50] batch [405/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.3672 (1.1470) acc 71.8750 (71.0262) lr 1.9048e-03 eta 5:04:54 +epoch [9/50] batch [410/500] time 0.848 (0.888) data 0.000 (0.002) loss 1.1533 (1.1458) acc 75.0000 (71.0518) lr 1.9048e-03 eta 5:04:46 +epoch [9/50] batch [415/500] time 0.900 (0.888) data 0.000 (0.002) loss 1.3213 (1.1462) acc 68.7500 (71.0919) lr 1.9048e-03 eta 5:04:42 +epoch [9/50] batch [420/500] time 0.889 (0.888) data 0.000 (0.002) loss 1.0781 (1.1480) acc 78.1250 (71.0640) lr 1.9048e-03 eta 5:04:33 +epoch [9/50] batch [425/500] time 0.857 (0.888) data 0.000 (0.002) loss 1.1699 (1.1469) acc 59.3750 (71.0735) lr 1.9048e-03 eta 5:04:26 +epoch [9/50] batch [430/500] time 0.900 (0.888) data 0.000 (0.002) loss 0.7202 (1.1462) acc 71.8750 (71.1119) lr 1.9048e-03 eta 5:04:21 +epoch [9/50] batch [435/500] time 0.901 (0.888) data 0.000 (0.002) loss 0.9609 (1.1451) acc 71.8750 (71.1063) lr 1.9048e-03 eta 5:04:15 +epoch [9/50] batch [440/500] time 0.867 (0.888) data 0.000 (0.002) loss 0.6694 (1.1413) acc 81.2500 (71.1861) lr 1.9048e-03 eta 5:04:08 +epoch [9/50] batch [445/500] time 0.881 (0.888) data 0.000 (0.002) loss 1.4521 (1.1423) acc 62.5000 (71.1657) lr 1.9048e-03 eta 5:04:02 +epoch [9/50] batch [450/500] time 0.875 (0.888) data 0.000 (0.002) loss 0.6870 (1.1423) acc 84.3750 (71.1736) lr 1.9048e-03 eta 5:04:01 +epoch [9/50] batch [455/500] time 0.914 (0.888) data 0.000 (0.002) loss 1.0605 (1.1405) acc 62.5000 (71.2157) lr 1.9048e-03 eta 5:03:55 +epoch [9/50] batch [460/500] time 0.861 (0.887) data 0.000 (0.002) loss 0.8662 (1.1380) acc 81.2500 (71.2772) lr 1.9048e-03 eta 5:03:47 +epoch [9/50] batch [465/500] time 0.864 (0.887) data 0.000 (0.002) loss 0.7417 (1.1367) acc 78.1250 (71.3172) lr 1.9048e-03 eta 5:03:41 +epoch [9/50] batch [470/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.3506 (1.1370) acc 65.6250 (71.2832) lr 1.9048e-03 eta 5:03:35 +epoch [9/50] batch [475/500] time 0.880 (0.887) data 0.000 (0.002) loss 1.4277 (1.1401) acc 71.8750 (71.2961) lr 1.9048e-03 eta 5:03:28 +epoch [9/50] batch [480/500] time 0.861 (0.887) data 0.000 (0.002) loss 1.3477 (1.1413) acc 65.6250 (71.2760) lr 1.9048e-03 eta 5:03:20 +epoch [9/50] batch [485/500] time 0.846 (0.887) data 0.000 (0.002) loss 1.3408 (1.1411) acc 68.7500 (71.2500) lr 1.9048e-03 eta 5:03:12 +epoch [9/50] batch [490/500] time 0.910 (0.887) data 0.000 (0.002) loss 0.7710 (1.1390) acc 71.8750 (71.2755) lr 1.9048e-03 eta 5:03:10 +epoch [9/50] batch [495/500] time 0.909 (0.887) data 0.000 (0.002) loss 1.8203 (1.1406) acc 65.6250 (71.2879) lr 1.9048e-03 eta 5:03:09 +epoch [9/50] batch [500/500] time 0.892 (0.887) data 0.000 (0.002) loss 1.7227 (1.1401) acc 59.3750 (71.2938) lr 1.8763e-03 eta 5:03:02 +epoch [10/50] batch [5/500] time 0.895 (1.027) data 0.000 (0.147) loss 0.7681 (0.8706) acc 78.1250 (77.5000) lr 1.8763e-03 eta 5:50:43 +epoch [10/50] batch [10/500] time 0.896 (0.962) data 0.000 (0.074) loss 1.1631 (0.9418) acc 75.0000 (75.3125) lr 1.8763e-03 eta 5:28:40 +epoch [10/50] batch [15/500] time 0.866 (0.932) data 0.000 (0.049) loss 1.1514 (0.9596) acc 71.8750 (74.5833) lr 1.8763e-03 eta 5:18:03 +epoch [10/50] batch [20/500] time 0.895 (0.921) data 0.000 (0.037) loss 0.9824 (0.9713) acc 75.0000 (74.0625) lr 1.8763e-03 eta 5:14:21 +epoch [10/50] batch [25/500] time 0.883 (0.912) data 0.000 (0.030) loss 0.7407 (1.0295) acc 81.2500 (72.3750) lr 1.8763e-03 eta 5:11:16 +epoch [10/50] batch [30/500] time 0.874 (0.907) data 0.000 (0.025) loss 0.8066 (1.0459) acc 78.1250 (72.3958) lr 1.8763e-03 eta 5:09:35 +epoch [10/50] batch [35/500] time 0.866 (0.903) data 0.000 (0.021) loss 1.1250 (1.0373) acc 71.8750 (72.5893) lr 1.8763e-03 eta 5:08:02 +epoch [10/50] batch [40/500] time 0.851 (0.899) data 0.000 (0.019) loss 0.9277 (1.0236) acc 75.0000 (73.2812) lr 1.8763e-03 eta 5:06:31 +epoch [10/50] batch [45/500] time 0.861 (0.896) data 0.000 (0.017) loss 1.3799 (1.0425) acc 68.7500 (73.1250) lr 1.8763e-03 eta 5:05:24 +epoch [10/50] batch [50/500] time 0.887 (0.895) data 0.000 (0.015) loss 1.0811 (1.0562) acc 68.7500 (72.5625) lr 1.8763e-03 eta 5:05:01 +epoch [10/50] batch [55/500] time 0.882 (0.893) data 0.000 (0.014) loss 1.1670 (1.0727) acc 71.8750 (72.1023) lr 1.8763e-03 eta 5:04:18 +epoch [10/50] batch [60/500] time 0.883 (0.893) data 0.000 (0.012) loss 0.8071 (1.0762) acc 78.1250 (72.0833) lr 1.8763e-03 eta 5:04:20 +epoch [10/50] batch [65/500] time 0.846 (0.892) data 0.000 (0.012) loss 1.4072 (1.0955) acc 65.6250 (71.7788) lr 1.8763e-03 eta 5:03:45 +epoch [10/50] batch [70/500] time 0.882 (0.891) data 0.000 (0.011) loss 1.6318 (1.1135) acc 59.3750 (71.4286) lr 1.8763e-03 eta 5:03:19 +epoch [10/50] batch [75/500] time 0.896 (0.890) data 0.000 (0.010) loss 1.0117 (1.1171) acc 68.7500 (71.4167) lr 1.8763e-03 eta 5:02:59 +epoch [10/50] batch [80/500] time 0.867 (0.890) data 0.000 (0.009) loss 1.0439 (1.1424) acc 71.8750 (71.2500) lr 1.8763e-03 eta 5:02:47 +epoch [10/50] batch [85/500] time 0.865 (0.890) data 0.000 (0.009) loss 1.7188 (1.1487) acc 68.7500 (71.2500) lr 1.8763e-03 eta 5:02:39 +epoch [10/50] batch [90/500] time 0.875 (0.889) data 0.000 (0.008) loss 1.5264 (1.1507) acc 65.6250 (71.2153) lr 1.8763e-03 eta 5:02:16 +epoch [10/50] batch [95/500] time 0.896 (0.890) data 0.000 (0.008) loss 1.1182 (1.1504) acc 71.8750 (71.1184) lr 1.8763e-03 eta 5:02:42 +epoch [10/50] batch [100/500] time 0.882 (0.890) data 0.000 (0.008) loss 1.3955 (1.1500) acc 65.6250 (70.9688) lr 1.8763e-03 eta 5:02:38 +epoch [10/50] batch [105/500] time 0.896 (0.890) data 0.000 (0.007) loss 1.6436 (1.1539) acc 53.1250 (70.6548) lr 1.8763e-03 eta 5:02:38 +epoch [10/50] batch [110/500] time 0.867 (0.890) data 0.000 (0.007) loss 0.8096 (1.1403) acc 78.1250 (71.0227) lr 1.8763e-03 eta 5:02:26 +epoch [10/50] batch [115/500] time 0.925 (0.890) data 0.000 (0.007) loss 1.1562 (1.1403) acc 68.7500 (71.0598) lr 1.8763e-03 eta 5:02:18 +epoch [10/50] batch [120/500] time 0.884 (0.889) data 0.000 (0.006) loss 0.6489 (1.1331) acc 75.0000 (71.0938) lr 1.8763e-03 eta 5:02:02 +epoch [10/50] batch [125/500] time 0.913 (0.890) data 0.000 (0.006) loss 1.4268 (1.1413) acc 78.1250 (71.1000) lr 1.8763e-03 eta 5:02:07 +epoch [10/50] batch [130/500] time 0.902 (0.889) data 0.000 (0.006) loss 0.8491 (1.1387) acc 71.8750 (71.1538) lr 1.8763e-03 eta 5:01:57 +epoch [10/50] batch [135/500] time 0.985 (0.890) data 0.000 (0.006) loss 1.1504 (1.1371) acc 71.8750 (71.0880) lr 1.8763e-03 eta 5:02:03 +epoch [10/50] batch [140/500] time 0.886 (0.890) data 0.000 (0.005) loss 1.9307 (1.1455) acc 59.3750 (70.8929) lr 1.8763e-03 eta 5:01:51 +epoch [10/50] batch [145/500] time 0.884 (0.890) data 0.000 (0.005) loss 1.1152 (1.1403) acc 65.6250 (70.8621) lr 1.8763e-03 eta 5:01:48 +epoch [10/50] batch [150/500] time 0.868 (0.889) data 0.000 (0.005) loss 0.7144 (1.1459) acc 78.1250 (70.6458) lr 1.8763e-03 eta 5:01:37 +epoch [10/50] batch [155/500] time 0.884 (0.889) data 0.000 (0.005) loss 0.7261 (1.1338) acc 81.2500 (70.9879) lr 1.8763e-03 eta 5:01:31 +epoch [10/50] batch [160/500] time 0.868 (0.889) data 0.000 (0.005) loss 0.7510 (1.1358) acc 81.2500 (71.0547) lr 1.8763e-03 eta 5:01:29 +epoch [10/50] batch [165/500] time 0.875 (0.890) data 0.000 (0.005) loss 1.4766 (1.1446) acc 65.6250 (71.0038) lr 1.8763e-03 eta 5:01:28 +epoch [10/50] batch [170/500] time 0.908 (0.889) data 0.000 (0.005) loss 1.2344 (1.1402) acc 62.5000 (71.1213) lr 1.8763e-03 eta 5:01:23 +epoch [10/50] batch [175/500] time 0.884 (0.890) data 0.000 (0.004) loss 1.2324 (1.1414) acc 65.6250 (71.0893) lr 1.8763e-03 eta 5:01:21 +epoch [10/50] batch [180/500] time 0.880 (0.889) data 0.000 (0.004) loss 1.2236 (1.1395) acc 68.7500 (71.1111) lr 1.8763e-03 eta 5:01:12 +epoch [10/50] batch [185/500] time 0.897 (0.890) data 0.000 (0.004) loss 1.4131 (1.1416) acc 78.1250 (71.1486) lr 1.8763e-03 eta 5:01:11 +epoch [10/50] batch [190/500] time 0.883 (0.889) data 0.000 (0.004) loss 1.2295 (1.1444) acc 75.0000 (71.2500) lr 1.8763e-03 eta 5:01:04 +epoch [10/50] batch [195/500] time 0.894 (0.889) data 0.000 (0.004) loss 1.2637 (1.1474) acc 59.3750 (71.0897) lr 1.8763e-03 eta 5:01:00 +epoch [10/50] batch [200/500] time 0.860 (0.889) data 0.000 (0.004) loss 0.6211 (1.1465) acc 90.6250 (71.2031) lr 1.8763e-03 eta 5:00:50 +epoch [10/50] batch [205/500] time 0.888 (0.889) data 0.000 (0.004) loss 1.0391 (1.1435) acc 71.8750 (71.2195) lr 1.8763e-03 eta 5:00:44 +epoch [10/50] batch [210/500] time 0.913 (0.889) data 0.000 (0.004) loss 1.7383 (1.1455) acc 56.2500 (71.1905) lr 1.8763e-03 eta 5:00:39 +epoch [10/50] batch [215/500] time 0.876 (0.889) data 0.000 (0.004) loss 1.4775 (1.1524) acc 56.2500 (71.0610) lr 1.8763e-03 eta 5:00:39 +epoch [10/50] batch [220/500] time 0.865 (0.889) data 0.000 (0.004) loss 0.7227 (1.1526) acc 75.0000 (70.9659) lr 1.8763e-03 eta 5:00:32 +epoch [10/50] batch [225/500] time 0.849 (0.889) data 0.000 (0.003) loss 1.4062 (1.1538) acc 75.0000 (70.9722) lr 1.8763e-03 eta 5:00:20 +epoch [10/50] batch [230/500] time 0.865 (0.888) data 0.000 (0.003) loss 1.1445 (1.1535) acc 68.7500 (70.9239) lr 1.8763e-03 eta 5:00:09 +epoch [10/50] batch [235/500] time 0.875 (0.889) data 0.000 (0.003) loss 0.7212 (1.1563) acc 81.2500 (70.8777) lr 1.8763e-03 eta 5:00:11 +epoch [10/50] batch [240/500] time 0.887 (0.888) data 0.000 (0.003) loss 1.2686 (1.1603) acc 81.2500 (70.7943) lr 1.8763e-03 eta 5:00:00 +epoch [10/50] batch [245/500] time 0.923 (0.888) data 0.000 (0.003) loss 1.8330 (1.1688) acc 50.0000 (70.6250) lr 1.8763e-03 eta 4:59:55 +epoch [10/50] batch [250/500] time 0.878 (0.889) data 0.000 (0.003) loss 0.4277 (1.1665) acc 84.3750 (70.6875) lr 1.8763e-03 eta 4:59:52 +epoch [10/50] batch [255/500] time 0.889 (0.889) data 0.000 (0.003) loss 1.4824 (1.1646) acc 56.2500 (70.6618) lr 1.8763e-03 eta 4:59:50 +epoch [10/50] batch [260/500] time 0.872 (0.888) data 0.000 (0.003) loss 1.2842 (1.1639) acc 75.0000 (70.7091) lr 1.8763e-03 eta 4:59:42 +epoch [10/50] batch [265/500] time 0.870 (0.888) data 0.000 (0.003) loss 1.5684 (1.1656) acc 56.2500 (70.5896) lr 1.8763e-03 eta 4:59:32 +epoch [10/50] batch [270/500] time 0.888 (0.888) data 0.000 (0.003) loss 1.2109 (1.1628) acc 65.6250 (70.6713) lr 1.8763e-03 eta 4:59:29 +epoch [10/50] batch [275/500] time 0.893 (0.888) data 0.000 (0.003) loss 1.0303 (1.1659) acc 65.6250 (70.6477) lr 1.8763e-03 eta 4:59:24 +epoch [10/50] batch [280/500] time 0.875 (0.888) data 0.000 (0.003) loss 0.8916 (1.1639) acc 78.1250 (70.6585) lr 1.8763e-03 eta 4:59:25 +epoch [10/50] batch [285/500] time 0.868 (0.888) data 0.000 (0.003) loss 1.1914 (1.1630) acc 56.2500 (70.6360) lr 1.8763e-03 eta 4:59:18 +epoch [10/50] batch [290/500] time 0.889 (0.888) data 0.000 (0.003) loss 1.1064 (1.1618) acc 71.8750 (70.6466) lr 1.8763e-03 eta 4:59:12 +epoch [10/50] batch [295/500] time 0.882 (0.888) data 0.000 (0.003) loss 0.8452 (1.1601) acc 78.1250 (70.7097) lr 1.8763e-03 eta 4:59:03 +epoch [10/50] batch [300/500] time 0.880 (0.888) data 0.000 (0.003) loss 0.5410 (1.1588) acc 84.3750 (70.7500) lr 1.8763e-03 eta 4:59:01 +epoch [10/50] batch [305/500] time 0.861 (0.888) data 0.000 (0.003) loss 1.1221 (1.1593) acc 75.0000 (70.7787) lr 1.8763e-03 eta 4:58:53 +epoch [10/50] batch [310/500] time 0.881 (0.888) data 0.000 (0.003) loss 1.1680 (1.1585) acc 65.6250 (70.7056) lr 1.8763e-03 eta 4:58:48 +epoch [10/50] batch [315/500] time 0.882 (0.888) data 0.000 (0.003) loss 1.9688 (1.1634) acc 59.3750 (70.6845) lr 1.8763e-03 eta 4:58:39 +epoch [10/50] batch [320/500] time 0.882 (0.888) data 0.000 (0.003) loss 1.0918 (1.1600) acc 78.1250 (70.7715) lr 1.8763e-03 eta 4:58:33 +epoch [10/50] batch [325/500] time 0.895 (0.888) data 0.000 (0.002) loss 0.5889 (1.1580) acc 90.6250 (70.8558) lr 1.8763e-03 eta 4:58:28 +epoch [10/50] batch [330/500] time 0.873 (0.888) data 0.000 (0.002) loss 0.8711 (1.1544) acc 71.8750 (70.9280) lr 1.8763e-03 eta 4:58:21 +epoch [10/50] batch [335/500] time 0.886 (0.887) data 0.000 (0.002) loss 1.0049 (1.1536) acc 75.0000 (70.9981) lr 1.8763e-03 eta 4:58:11 +epoch [10/50] batch [340/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.0703 (1.1552) acc 71.8750 (70.9191) lr 1.8763e-03 eta 4:58:04 +epoch [10/50] batch [345/500] time 0.873 (0.887) data 0.000 (0.002) loss 1.4941 (1.1556) acc 62.5000 (70.9511) lr 1.8763e-03 eta 4:58:00 +epoch [10/50] batch [350/500] time 0.910 (0.887) data 0.000 (0.002) loss 1.5586 (1.1565) acc 62.5000 (70.9643) lr 1.8763e-03 eta 4:57:57 +epoch [10/50] batch [355/500] time 0.883 (0.887) data 0.000 (0.002) loss 0.7905 (1.1544) acc 81.2500 (71.0387) lr 1.8763e-03 eta 4:57:51 +epoch [10/50] batch [360/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.2832 (1.1546) acc 68.7500 (71.0156) lr 1.8763e-03 eta 4:57:47 +epoch [10/50] batch [365/500] time 0.890 (0.887) data 0.000 (0.002) loss 0.8545 (1.1569) acc 78.1250 (71.0445) lr 1.8763e-03 eta 4:57:40 +epoch [10/50] batch [370/500] time 0.887 (0.887) data 0.000 (0.002) loss 0.6289 (1.1557) acc 84.3750 (71.0811) lr 1.8763e-03 eta 4:57:35 +epoch [10/50] batch [375/500] time 0.879 (0.887) data 0.000 (0.002) loss 0.8613 (1.1558) acc 75.0000 (71.0417) lr 1.8763e-03 eta 4:57:30 +epoch [10/50] batch [380/500] time 0.891 (0.887) data 0.000 (0.002) loss 1.6875 (1.1579) acc 65.6250 (70.9868) lr 1.8763e-03 eta 4:57:30 +epoch [10/50] batch [385/500] time 0.897 (0.887) data 0.000 (0.002) loss 1.1191 (1.1572) acc 65.6250 (71.0146) lr 1.8763e-03 eta 4:57:28 +epoch [10/50] batch [390/500] time 0.902 (0.887) data 0.000 (0.002) loss 0.4707 (1.1595) acc 87.5000 (70.9615) lr 1.8763e-03 eta 4:57:21 +epoch [10/50] batch [395/500] time 0.876 (0.887) data 0.000 (0.002) loss 1.0801 (1.1567) acc 68.7500 (71.0047) lr 1.8763e-03 eta 4:57:16 +epoch [10/50] batch [400/500] time 0.848 (0.887) data 0.000 (0.002) loss 1.2988 (1.1558) acc 78.1250 (71.0312) lr 1.8763e-03 eta 4:57:10 +epoch [10/50] batch [405/500] time 0.883 (0.887) data 0.000 (0.002) loss 0.9702 (1.1523) acc 68.7500 (71.0802) lr 1.8763e-03 eta 4:57:05 +epoch [10/50] batch [410/500] time 0.859 (0.887) data 0.000 (0.002) loss 0.9121 (1.1508) acc 78.1250 (71.1280) lr 1.8763e-03 eta 4:56:59 +epoch [10/50] batch [415/500] time 0.863 (0.887) data 0.000 (0.002) loss 1.0635 (1.1486) acc 78.1250 (71.2048) lr 1.8763e-03 eta 4:56:55 +epoch [10/50] batch [420/500] time 0.887 (0.887) data 0.000 (0.002) loss 0.8804 (1.1519) acc 71.8750 (71.1384) lr 1.8763e-03 eta 4:56:51 +epoch [10/50] batch [425/500] time 0.902 (0.887) data 0.000 (0.002) loss 0.8696 (1.1512) acc 78.1250 (71.1397) lr 1.8763e-03 eta 4:56:52 +epoch [10/50] batch [430/500] time 0.860 (0.887) data 0.000 (0.002) loss 0.7402 (1.1504) acc 78.1250 (71.1846) lr 1.8763e-03 eta 4:56:45 +epoch [10/50] batch [435/500] time 0.887 (0.887) data 0.000 (0.002) loss 1.0703 (1.1494) acc 78.1250 (71.2356) lr 1.8763e-03 eta 4:56:43 +epoch [10/50] batch [440/500] time 0.897 (0.887) data 0.000 (0.002) loss 1.0771 (1.1487) acc 78.1250 (71.2997) lr 1.8763e-03 eta 4:56:37 +epoch [10/50] batch [445/500] time 0.901 (0.887) data 0.000 (0.002) loss 0.6792 (1.1460) acc 81.2500 (71.3553) lr 1.8763e-03 eta 4:56:34 +epoch [10/50] batch [450/500] time 0.864 (0.887) data 0.000 (0.002) loss 1.1738 (1.1463) acc 65.6250 (71.3125) lr 1.8763e-03 eta 4:56:28 +epoch [10/50] batch [455/500] time 0.881 (0.887) data 0.000 (0.002) loss 1.2305 (1.1461) acc 78.1250 (71.3530) lr 1.8763e-03 eta 4:56:25 +epoch [10/50] batch [460/500] time 0.900 (0.887) data 0.000 (0.002) loss 0.4314 (1.1453) acc 81.2500 (71.3859) lr 1.8763e-03 eta 4:56:24 +epoch [10/50] batch [465/500] time 0.869 (0.887) data 0.000 (0.002) loss 0.8394 (1.1447) acc 71.8750 (71.3777) lr 1.8763e-03 eta 4:56:17 +epoch [10/50] batch [470/500] time 0.877 (0.887) data 0.000 (0.002) loss 0.8687 (1.1449) acc 75.0000 (71.3763) lr 1.8763e-03 eta 4:56:11 +epoch [10/50] batch [475/500] time 0.881 (0.887) data 0.000 (0.002) loss 1.4150 (1.1474) acc 68.7500 (71.3487) lr 1.8763e-03 eta 4:56:05 +epoch [10/50] batch [480/500] time 0.872 (0.887) data 0.000 (0.002) loss 0.6367 (1.1453) acc 87.5000 (71.4323) lr 1.8763e-03 eta 4:56:01 +epoch [10/50] batch [485/500] time 0.881 (0.887) data 0.001 (0.002) loss 1.1172 (1.1457) acc 65.6250 (71.3724) lr 1.8763e-03 eta 4:55:56 +epoch [10/50] batch [490/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.2910 (1.1478) acc 56.2500 (71.3265) lr 1.8763e-03 eta 4:55:52 +epoch [10/50] batch [495/500] time 0.868 (0.887) data 0.000 (0.002) loss 1.0850 (1.1451) acc 75.0000 (71.4078) lr 1.8763e-03 eta 4:55:49 +epoch [10/50] batch [500/500] time 0.878 (0.887) data 0.000 (0.002) loss 0.9614 (1.1458) acc 75.0000 (71.4250) lr 1.8443e-03 eta 4:55:44 +epoch [11/50] batch [5/500] time 0.896 (1.012) data 0.000 (0.126) loss 1.3447 (1.0546) acc 59.3750 (73.7500) lr 1.8443e-03 eta 5:37:16 +epoch [11/50] batch [10/500] time 0.890 (0.946) data 0.000 (0.063) loss 0.7769 (1.0472) acc 78.1250 (73.1250) lr 1.8443e-03 eta 5:15:04 +epoch [11/50] batch [15/500] time 0.878 (0.923) data 0.000 (0.042) loss 0.7939 (1.0520) acc 81.2500 (73.7500) lr 1.8443e-03 eta 5:07:23 +epoch [11/50] batch [20/500] time 0.879 (0.913) data 0.000 (0.032) loss 0.9854 (1.0569) acc 78.1250 (74.0625) lr 1.8443e-03 eta 5:04:03 +epoch [11/50] batch [25/500] time 0.847 (0.908) data 0.000 (0.025) loss 0.6191 (1.0622) acc 84.3750 (74.7500) lr 1.8443e-03 eta 5:02:07 +epoch [11/50] batch [30/500] time 0.881 (0.901) data 0.000 (0.021) loss 1.0176 (1.0750) acc 68.7500 (73.7500) lr 1.8443e-03 eta 5:00:00 +epoch [11/50] batch [35/500] time 0.896 (0.896) data 0.000 (0.018) loss 1.1416 (1.0589) acc 65.6250 (73.8393) lr 1.8443e-03 eta 4:58:18 +epoch [11/50] batch [40/500] time 0.869 (0.894) data 0.000 (0.016) loss 1.3369 (1.0448) acc 68.7500 (74.2969) lr 1.8443e-03 eta 4:57:25 +epoch [11/50] batch [45/500] time 0.888 (0.893) data 0.000 (0.014) loss 1.3760 (1.0833) acc 78.1250 (73.8194) lr 1.8443e-03 eta 4:56:52 +epoch [11/50] batch [50/500] time 0.854 (0.891) data 0.000 (0.013) loss 1.5771 (1.0917) acc 68.7500 (73.6250) lr 1.8443e-03 eta 4:56:11 +epoch [11/50] batch [55/500] time 0.888 (0.889) data 0.000 (0.012) loss 0.7158 (1.0960) acc 81.2500 (73.2386) lr 1.8443e-03 eta 4:55:39 +epoch [11/50] batch [60/500] time 0.875 (0.888) data 0.000 (0.011) loss 0.8457 (1.0773) acc 75.0000 (73.4375) lr 1.8443e-03 eta 4:55:00 +epoch [11/50] batch [65/500] time 0.888 (0.887) data 0.000 (0.010) loss 0.9375 (1.0746) acc 78.1250 (73.3654) lr 1.8443e-03 eta 4:54:45 +epoch [11/50] batch [70/500] time 0.891 (0.888) data 0.000 (0.009) loss 1.3652 (1.0963) acc 62.5000 (72.8571) lr 1.8443e-03 eta 4:54:56 +epoch [11/50] batch [75/500] time 0.882 (0.889) data 0.000 (0.009) loss 1.1035 (1.1016) acc 62.5000 (72.4167) lr 1.8443e-03 eta 4:55:17 +epoch [11/50] batch [80/500] time 0.880 (0.889) data 0.000 (0.008) loss 0.9971 (1.1304) acc 71.8750 (71.7188) lr 1.8443e-03 eta 4:55:10 +epoch [11/50] batch [85/500] time 0.884 (0.889) data 0.000 (0.008) loss 1.4141 (1.1240) acc 62.5000 (71.9118) lr 1.8443e-03 eta 4:55:02 +epoch [11/50] batch [90/500] time 0.909 (0.889) data 0.001 (0.007) loss 1.3037 (1.1194) acc 84.3750 (72.1528) lr 1.8443e-03 eta 4:54:57 +epoch [11/50] batch [95/500] time 0.887 (0.888) data 0.000 (0.007) loss 1.0156 (1.1128) acc 75.0000 (72.0066) lr 1.8443e-03 eta 4:54:43 +epoch [11/50] batch [100/500] time 0.880 (0.888) data 0.000 (0.007) loss 0.9312 (1.1108) acc 78.1250 (72.0625) lr 1.8443e-03 eta 4:54:32 +epoch [11/50] batch [105/500] time 0.896 (0.888) data 0.000 (0.006) loss 1.7744 (1.1173) acc 56.2500 (71.9048) lr 1.8443e-03 eta 4:54:23 +epoch [11/50] batch [110/500] time 0.905 (0.888) data 0.000 (0.006) loss 1.1172 (1.1065) acc 68.7500 (72.1023) lr 1.8443e-03 eta 4:54:18 +epoch [11/50] batch [115/500] time 0.877 (0.888) data 0.000 (0.006) loss 1.7744 (1.1144) acc 53.1250 (71.8207) lr 1.8443e-03 eta 4:54:07 +epoch [11/50] batch [120/500] time 0.868 (0.888) data 0.000 (0.005) loss 0.8208 (1.1080) acc 71.8750 (71.9531) lr 1.8443e-03 eta 4:54:14 +epoch [11/50] batch [125/500] time 0.910 (0.888) data 0.000 (0.005) loss 1.5459 (1.1143) acc 68.7500 (71.9250) lr 1.8443e-03 eta 4:54:14 +epoch [11/50] batch [130/500] time 0.877 (0.888) data 0.000 (0.005) loss 1.5908 (1.1116) acc 68.7500 (71.9712) lr 1.8443e-03 eta 4:54:05 +epoch [11/50] batch [135/500] time 0.899 (0.888) data 0.000 (0.005) loss 0.7593 (1.1050) acc 81.2500 (72.1065) lr 1.8443e-03 eta 4:54:07 +epoch [11/50] batch [140/500] time 0.884 (0.889) data 0.000 (0.005) loss 1.3271 (1.1065) acc 68.7500 (71.9420) lr 1.8443e-03 eta 4:54:07 +epoch [11/50] batch [145/500] time 0.900 (0.889) data 0.000 (0.005) loss 1.2383 (1.1084) acc 75.0000 (71.9397) lr 1.8443e-03 eta 4:54:07 +epoch [11/50] batch [150/500] time 0.909 (0.889) data 0.000 (0.004) loss 1.5078 (1.1106) acc 62.5000 (71.8750) lr 1.8443e-03 eta 4:54:08 +epoch [11/50] batch [155/500] time 0.884 (0.889) data 0.000 (0.004) loss 1.3721 (1.1112) acc 68.7500 (71.9355) lr 1.8443e-03 eta 4:54:00 +epoch [11/50] batch [160/500] time 0.880 (0.889) data 0.000 (0.004) loss 1.4541 (1.1100) acc 62.5000 (71.9141) lr 1.8443e-03 eta 4:53:49 +epoch [11/50] batch [165/500] time 0.896 (0.888) data 0.000 (0.004) loss 0.9893 (1.1066) acc 75.0000 (71.9318) lr 1.8443e-03 eta 4:53:40 +epoch [11/50] batch [170/500] time 0.866 (0.888) data 0.000 (0.004) loss 1.4248 (1.1061) acc 62.5000 (71.9485) lr 1.8443e-03 eta 4:53:31 +epoch [11/50] batch [175/500] time 0.897 (0.888) data 0.000 (0.004) loss 1.5576 (1.1048) acc 68.7500 (72.0000) lr 1.8443e-03 eta 4:53:22 +epoch [11/50] batch [180/500] time 0.887 (0.888) data 0.000 (0.004) loss 1.2217 (1.1008) acc 68.7500 (72.0833) lr 1.8443e-03 eta 4:53:18 +epoch [11/50] batch [185/500] time 0.870 (0.888) data 0.000 (0.004) loss 1.8047 (1.1048) acc 62.5000 (71.9257) lr 1.8443e-03 eta 4:53:10 +epoch [11/50] batch [190/500] time 0.861 (0.887) data 0.000 (0.004) loss 0.9751 (1.1069) acc 68.7500 (71.9079) lr 1.8443e-03 eta 4:52:59 +epoch [11/50] batch [195/500] time 0.863 (0.887) data 0.000 (0.003) loss 1.3438 (1.1098) acc 68.7500 (71.9551) lr 1.8443e-03 eta 4:52:49 +epoch [11/50] batch [200/500] time 0.858 (0.887) data 0.000 (0.003) loss 1.1699 (1.1108) acc 65.6250 (71.8906) lr 1.8443e-03 eta 4:52:41 +epoch [11/50] batch [205/500] time 0.871 (0.887) data 0.000 (0.003) loss 1.2100 (1.1173) acc 62.5000 (71.7378) lr 1.8443e-03 eta 4:52:35 +epoch [11/50] batch [210/500] time 0.861 (0.886) data 0.000 (0.003) loss 0.8896 (1.1177) acc 71.8750 (71.7560) lr 1.8443e-03 eta 4:52:19 +epoch [11/50] batch [215/500] time 0.976 (0.887) data 0.000 (0.003) loss 1.1396 (1.1142) acc 68.7500 (71.8895) lr 1.8443e-03 eta 4:52:22 +epoch [11/50] batch [220/500] time 0.895 (0.887) data 0.000 (0.003) loss 1.5430 (1.1139) acc 68.7500 (71.9034) lr 1.8443e-03 eta 4:52:16 +epoch [11/50] batch [225/500] time 0.881 (0.886) data 0.000 (0.003) loss 1.0732 (1.1106) acc 81.2500 (72.0000) lr 1.8443e-03 eta 4:52:09 +epoch [11/50] batch [230/500] time 0.909 (0.887) data 0.000 (0.003) loss 1.4570 (1.1160) acc 65.6250 (71.9701) lr 1.8443e-03 eta 4:52:09 +epoch [11/50] batch [235/500] time 0.853 (0.886) data 0.000 (0.003) loss 1.0264 (1.1188) acc 75.0000 (71.9149) lr 1.8443e-03 eta 4:51:52 +epoch [11/50] batch [240/500] time 0.853 (0.886) data 0.000 (0.003) loss 0.7749 (1.1205) acc 81.2500 (71.9401) lr 1.8443e-03 eta 4:51:44 +epoch [11/50] batch [245/500] time 0.861 (0.886) data 0.000 (0.003) loss 1.4824 (1.1236) acc 71.8750 (71.9133) lr 1.8443e-03 eta 4:51:33 +epoch [11/50] batch [250/500] time 0.890 (0.886) data 0.000 (0.003) loss 1.4326 (1.1219) acc 62.5000 (71.9500) lr 1.8443e-03 eta 4:51:31 +epoch [11/50] batch [255/500] time 0.885 (0.886) data 0.000 (0.003) loss 0.6543 (1.1214) acc 81.2500 (71.8995) lr 1.8443e-03 eta 4:51:24 +epoch [11/50] batch [260/500] time 0.902 (0.886) data 0.000 (0.003) loss 1.1152 (1.1200) acc 65.6250 (71.9231) lr 1.8443e-03 eta 4:51:33 +epoch [11/50] batch [265/500] time 0.925 (0.886) data 0.000 (0.003) loss 1.1670 (1.1203) acc 68.7500 (71.9222) lr 1.8443e-03 eta 4:51:31 +epoch [11/50] batch [270/500] time 0.882 (0.886) data 0.000 (0.003) loss 1.3379 (1.1237) acc 75.0000 (71.8981) lr 1.8443e-03 eta 4:51:26 +epoch [11/50] batch [275/500] time 0.868 (0.886) data 0.000 (0.003) loss 0.7461 (1.1235) acc 81.2500 (71.9432) lr 1.8443e-03 eta 4:51:19 +epoch [11/50] batch [280/500] time 0.874 (0.886) data 0.000 (0.002) loss 1.2666 (1.1260) acc 68.7500 (71.8638) lr 1.8443e-03 eta 4:51:14 +epoch [11/50] batch [285/500] time 0.901 (0.886) data 0.000 (0.002) loss 1.6787 (1.1299) acc 62.5000 (71.8092) lr 1.8443e-03 eta 4:51:08 +epoch [11/50] batch [290/500] time 0.880 (0.886) data 0.000 (0.002) loss 1.2188 (1.1276) acc 62.5000 (71.8103) lr 1.8443e-03 eta 4:51:03 +epoch [11/50] batch [295/500] time 0.885 (0.886) data 0.000 (0.002) loss 1.3926 (1.1252) acc 75.0000 (71.8326) lr 1.8443e-03 eta 4:50:58 +epoch [11/50] batch [300/500] time 0.896 (0.886) data 0.000 (0.002) loss 1.2822 (1.1219) acc 71.8750 (71.8438) lr 1.8443e-03 eta 4:50:59 +epoch [11/50] batch [305/500] time 0.895 (0.886) data 0.000 (0.002) loss 0.7363 (1.1213) acc 81.2500 (71.8135) lr 1.8443e-03 eta 4:50:58 +epoch [11/50] batch [310/500] time 0.878 (0.886) data 0.000 (0.002) loss 1.1738 (1.1195) acc 68.7500 (71.8145) lr 1.8443e-03 eta 4:50:50 +epoch [11/50] batch [315/500] time 0.877 (0.886) data 0.000 (0.002) loss 1.2842 (1.1198) acc 68.7500 (71.7857) lr 1.8443e-03 eta 4:50:37 +epoch [11/50] batch [320/500] time 0.887 (0.886) data 0.000 (0.002) loss 1.3789 (1.1179) acc 62.5000 (71.8066) lr 1.8443e-03 eta 4:50:29 +epoch [11/50] batch [325/500] time 0.871 (0.886) data 0.000 (0.002) loss 1.2002 (1.1199) acc 56.2500 (71.7115) lr 1.8443e-03 eta 4:50:23 +epoch [11/50] batch [330/500] time 0.913 (0.886) data 0.000 (0.002) loss 1.3018 (1.1212) acc 84.3750 (71.7708) lr 1.8443e-03 eta 4:50:22 +epoch [11/50] batch [335/500] time 0.901 (0.886) data 0.000 (0.002) loss 0.7329 (1.1209) acc 78.1250 (71.7910) lr 1.8443e-03 eta 4:50:20 +epoch [11/50] batch [340/500] time 0.881 (0.886) data 0.000 (0.002) loss 1.1748 (1.1230) acc 65.6250 (71.6912) lr 1.8443e-03 eta 4:50:11 +epoch [11/50] batch [345/500] time 0.885 (0.886) data 0.000 (0.002) loss 1.7939 (1.1239) acc 65.6250 (71.7210) lr 1.8443e-03 eta 4:50:05 +epoch [11/50] batch [350/500] time 0.896 (0.885) data 0.000 (0.002) loss 1.2373 (1.1218) acc 75.0000 (71.7589) lr 1.8443e-03 eta 4:49:55 +epoch [11/50] batch [355/500] time 0.907 (0.885) data 0.000 (0.002) loss 1.0449 (1.1225) acc 81.2500 (71.7958) lr 1.8443e-03 eta 4:49:49 +epoch [11/50] batch [360/500] time 0.880 (0.886) data 0.000 (0.002) loss 1.4160 (1.1223) acc 71.8750 (71.7882) lr 1.8443e-03 eta 4:49:53 +epoch [11/50] batch [365/500] time 0.834 (0.885) data 0.000 (0.002) loss 1.1621 (1.1232) acc 62.5000 (71.7637) lr 1.8443e-03 eta 4:49:44 +epoch [11/50] batch [370/500] time 0.871 (0.885) data 0.000 (0.002) loss 0.8906 (1.1255) acc 71.8750 (71.7230) lr 1.8443e-03 eta 4:49:37 +epoch [11/50] batch [375/500] time 0.898 (0.885) data 0.000 (0.002) loss 1.5215 (1.1268) acc 65.6250 (71.7500) lr 1.8443e-03 eta 4:49:32 +epoch [11/50] batch [380/500] time 0.856 (0.885) data 0.000 (0.002) loss 1.2354 (1.1262) acc 68.7500 (71.6941) lr 1.8443e-03 eta 4:49:21 +epoch [11/50] batch [385/500] time 0.891 (0.885) data 0.000 (0.002) loss 1.0420 (1.1249) acc 68.7500 (71.6396) lr 1.8443e-03 eta 4:49:19 +epoch [11/50] batch [390/500] time 0.858 (0.885) data 0.000 (0.002) loss 1.3125 (1.1255) acc 62.5000 (71.5946) lr 1.8443e-03 eta 4:49:11 +epoch [11/50] batch [395/500] time 0.892 (0.885) data 0.000 (0.002) loss 0.5483 (1.1219) acc 84.3750 (71.6614) lr 1.8443e-03 eta 4:49:06 +epoch [11/50] batch [400/500] time 0.884 (0.885) data 0.000 (0.002) loss 1.1582 (1.1199) acc 65.6250 (71.6797) lr 1.8443e-03 eta 4:49:01 +epoch [11/50] batch [405/500] time 0.883 (0.885) data 0.000 (0.002) loss 1.5234 (1.1227) acc 71.8750 (71.6358) lr 1.8443e-03 eta 4:49:02 +epoch [11/50] batch [410/500] time 0.880 (0.885) data 0.000 (0.002) loss 1.1865 (1.1215) acc 65.6250 (71.6387) lr 1.8443e-03 eta 4:48:59 +epoch [11/50] batch [415/500] time 0.868 (0.885) data 0.000 (0.002) loss 0.4414 (1.1189) acc 87.5000 (71.7319) lr 1.8443e-03 eta 4:48:54 +epoch [11/50] batch [420/500] time 0.890 (0.885) data 0.000 (0.002) loss 1.0674 (1.1198) acc 81.2500 (71.7188) lr 1.8443e-03 eta 4:48:53 +epoch [11/50] batch [425/500] time 0.886 (0.885) data 0.000 (0.002) loss 2.1777 (1.1229) acc 53.1250 (71.6471) lr 1.8443e-03 eta 4:48:50 +epoch [11/50] batch [430/500] time 0.854 (0.885) data 0.000 (0.002) loss 0.9331 (1.1243) acc 78.1250 (71.6570) lr 1.8443e-03 eta 4:48:43 +epoch [11/50] batch [435/500] time 0.885 (0.885) data 0.000 (0.002) loss 1.1826 (1.1248) acc 68.7500 (71.6164) lr 1.8443e-03 eta 4:48:39 +epoch [11/50] batch [440/500] time 0.910 (0.885) data 0.000 (0.002) loss 1.0938 (1.1258) acc 65.6250 (71.5483) lr 1.8443e-03 eta 4:48:33 +epoch [11/50] batch [445/500] time 0.896 (0.885) data 0.000 (0.002) loss 0.9341 (1.1268) acc 75.0000 (71.5169) lr 1.8443e-03 eta 4:48:29 +epoch [11/50] batch [450/500] time 0.876 (0.885) data 0.000 (0.002) loss 0.9185 (1.1278) acc 78.1250 (71.5417) lr 1.8443e-03 eta 4:48:23 +epoch [11/50] batch [455/500] time 0.868 (0.885) data 0.000 (0.002) loss 0.8882 (1.1290) acc 75.0000 (71.5179) lr 1.8443e-03 eta 4:48:18 +epoch [11/50] batch [460/500] time 0.884 (0.885) data 0.000 (0.002) loss 1.0254 (1.1277) acc 68.7500 (71.4878) lr 1.8443e-03 eta 4:48:16 +epoch [11/50] batch [465/500] time 0.879 (0.885) data 0.000 (0.002) loss 1.0264 (1.1283) acc 71.8750 (71.4718) lr 1.8443e-03 eta 4:48:08 +epoch [11/50] batch [470/500] time 0.898 (0.885) data 0.000 (0.002) loss 0.6484 (1.1260) acc 87.5000 (71.5492) lr 1.8443e-03 eta 4:48:03 +epoch [11/50] batch [475/500] time 0.885 (0.885) data 0.000 (0.002) loss 0.6685 (1.1244) acc 78.1250 (71.5921) lr 1.8443e-03 eta 4:48:00 +epoch [11/50] batch [480/500] time 0.896 (0.885) data 0.000 (0.002) loss 1.4521 (1.1260) acc 62.5000 (71.5885) lr 1.8443e-03 eta 4:47:58 +epoch [11/50] batch [485/500] time 0.894 (0.885) data 0.000 (0.002) loss 1.6387 (1.1272) acc 56.2500 (71.5593) lr 1.8443e-03 eta 4:47:53 +epoch [11/50] batch [490/500] time 0.899 (0.885) data 0.000 (0.002) loss 0.8750 (1.1272) acc 71.8750 (71.5370) lr 1.8443e-03 eta 4:47:47 +epoch [11/50] batch [495/500] time 0.872 (0.885) data 0.000 (0.002) loss 1.1035 (1.1302) acc 71.8750 (71.4646) lr 1.8443e-03 eta 4:47:42 +epoch [11/50] batch [500/500] time 0.935 (0.885) data 0.000 (0.001) loss 1.4463 (1.1316) acc 62.5000 (71.4250) lr 1.8090e-03 eta 4:47:43 +epoch [12/50] batch [5/500] time 0.895 (1.037) data 0.000 (0.131) loss 0.6226 (0.8726) acc 75.0000 (72.5000) lr 1.8090e-03 eta 5:36:58 +epoch [12/50] batch [10/500] time 0.911 (0.963) data 0.000 (0.066) loss 0.8818 (0.8930) acc 65.6250 (72.8125) lr 1.8090e-03 eta 5:12:46 +epoch [12/50] batch [15/500] time 0.881 (0.937) data 0.000 (0.044) loss 0.9829 (0.9626) acc 75.0000 (72.7083) lr 1.8090e-03 eta 5:04:24 +epoch [12/50] batch [20/500] time 0.847 (0.923) data 0.000 (0.033) loss 0.7827 (0.9752) acc 84.3750 (72.9688) lr 1.8090e-03 eta 4:59:48 +epoch [12/50] batch [25/500] time 0.866 (0.913) data 0.000 (0.026) loss 0.7603 (0.9716) acc 81.2500 (73.0000) lr 1.8090e-03 eta 4:56:30 +epoch [12/50] batch [30/500] time 0.882 (0.909) data 0.000 (0.022) loss 1.1533 (0.9895) acc 65.6250 (72.3958) lr 1.8090e-03 eta 4:55:03 +epoch [12/50] batch [35/500] time 0.886 (0.905) data 0.000 (0.019) loss 1.3613 (1.0311) acc 65.6250 (71.6071) lr 1.8090e-03 eta 4:53:34 +epoch [12/50] batch [40/500] time 0.874 (0.900) data 0.000 (0.017) loss 1.0752 (1.0571) acc 68.7500 (70.8594) lr 1.8090e-03 eta 4:51:55 +epoch [12/50] batch [45/500] time 0.860 (0.896) data 0.000 (0.015) loss 1.5820 (1.0638) acc 62.5000 (70.6944) lr 1.8090e-03 eta 4:50:33 +epoch [12/50] batch [50/500] time 0.874 (0.895) data 0.000 (0.013) loss 0.9097 (1.0639) acc 71.8750 (70.6875) lr 1.8090e-03 eta 4:50:00 +epoch [12/50] batch [55/500] time 0.866 (0.893) data 0.000 (0.012) loss 1.6738 (1.0648) acc 62.5000 (70.7955) lr 1.8090e-03 eta 4:49:17 +epoch [12/50] batch [60/500] time 0.878 (0.892) data 0.000 (0.011) loss 1.8438 (1.0830) acc 65.6250 (70.8333) lr 1.8090e-03 eta 4:49:01 +epoch [12/50] batch [65/500] time 0.897 (0.894) data 0.000 (0.010) loss 1.4648 (1.0967) acc 75.0000 (70.9135) lr 1.8090e-03 eta 4:49:41 +epoch [12/50] batch [70/500] time 0.863 (0.893) data 0.000 (0.010) loss 1.5605 (1.1096) acc 65.6250 (70.8036) lr 1.8090e-03 eta 4:49:16 +epoch [12/50] batch [75/500] time 0.878 (0.892) data 0.000 (0.009) loss 0.9170 (1.1095) acc 71.8750 (70.5833) lr 1.8090e-03 eta 4:48:44 +epoch [12/50] batch [80/500] time 0.875 (0.890) data 0.000 (0.008) loss 0.7495 (1.1243) acc 81.2500 (70.7031) lr 1.8090e-03 eta 4:48:10 +epoch [12/50] batch [85/500] time 0.865 (0.890) data 0.000 (0.008) loss 1.1768 (1.1304) acc 65.6250 (70.6985) lr 1.8090e-03 eta 4:47:55 +epoch [12/50] batch [90/500] time 0.860 (0.888) data 0.000 (0.008) loss 0.8359 (1.1199) acc 78.1250 (71.0069) lr 1.8090e-03 eta 4:47:23 +epoch [12/50] batch [95/500] time 0.858 (0.887) data 0.000 (0.007) loss 1.1162 (1.1330) acc 65.6250 (71.0197) lr 1.8090e-03 eta 4:46:54 +epoch [12/50] batch [100/500] time 0.896 (0.887) data 0.000 (0.007) loss 0.9604 (1.1339) acc 81.2500 (71.2500) lr 1.8090e-03 eta 4:46:46 +epoch [12/50] batch [105/500] time 0.960 (0.887) data 0.000 (0.006) loss 2.3535 (1.1554) acc 46.8750 (70.7440) lr 1.8090e-03 eta 4:46:41 +epoch [12/50] batch [110/500] time 0.890 (0.887) data 0.000 (0.006) loss 0.8774 (1.1501) acc 78.1250 (71.0227) lr 1.8090e-03 eta 4:46:41 +epoch [12/50] batch [115/500] time 0.875 (0.887) data 0.000 (0.006) loss 1.3213 (1.1529) acc 65.6250 (70.9239) lr 1.8090e-03 eta 4:46:43 +epoch [12/50] batch [120/500] time 0.884 (0.887) data 0.000 (0.006) loss 1.4912 (1.1522) acc 65.6250 (71.0156) lr 1.8090e-03 eta 4:46:30 +epoch [12/50] batch [125/500] time 0.890 (0.887) data 0.000 (0.005) loss 1.4414 (1.1516) acc 71.8750 (71.1250) lr 1.8090e-03 eta 4:46:24 +epoch [12/50] batch [130/500] time 0.865 (0.886) data 0.000 (0.005) loss 1.4297 (1.1489) acc 46.8750 (70.9615) lr 1.8090e-03 eta 4:46:11 +epoch [12/50] batch [135/500] time 0.873 (0.886) data 0.000 (0.005) loss 0.8457 (1.1449) acc 78.1250 (71.0417) lr 1.8090e-03 eta 4:46:06 +epoch [12/50] batch [140/500] time 0.908 (0.886) data 0.000 (0.005) loss 1.2686 (1.1443) acc 68.7500 (71.0714) lr 1.8090e-03 eta 4:45:55 +epoch [12/50] batch [145/500] time 0.884 (0.886) data 0.000 (0.005) loss 0.8564 (1.1434) acc 78.1250 (71.1638) lr 1.8090e-03 eta 4:45:49 +epoch [12/50] batch [150/500] time 0.888 (0.886) data 0.000 (0.005) loss 1.4688 (1.1451) acc 65.6250 (71.1250) lr 1.8090e-03 eta 4:45:52 +epoch [12/50] batch [155/500] time 0.873 (0.886) data 0.000 (0.004) loss 1.6631 (1.1546) acc 53.1250 (70.8669) lr 1.8090e-03 eta 4:45:43 +epoch [12/50] batch [160/500] time 0.921 (0.886) data 0.000 (0.004) loss 1.0713 (1.1543) acc 75.0000 (70.8789) lr 1.8090e-03 eta 4:45:37 +epoch [12/50] batch [165/500] time 0.902 (0.886) data 0.000 (0.004) loss 1.1484 (1.1499) acc 71.8750 (70.9091) lr 1.8090e-03 eta 4:45:37 +epoch [12/50] batch [170/500] time 0.897 (0.887) data 0.000 (0.004) loss 0.9658 (1.1521) acc 78.1250 (70.9559) lr 1.8090e-03 eta 4:45:36 +epoch [12/50] batch [175/500] time 0.875 (0.886) data 0.000 (0.004) loss 1.5527 (1.1581) acc 65.6250 (70.8750) lr 1.8090e-03 eta 4:45:31 +epoch [12/50] batch [180/500] time 0.855 (0.886) data 0.000 (0.004) loss 2.0977 (1.1647) acc 71.8750 (70.8507) lr 1.8090e-03 eta 4:45:22 +epoch [12/50] batch [185/500] time 0.846 (0.885) data 0.000 (0.004) loss 0.9609 (1.1617) acc 71.8750 (70.8446) lr 1.8090e-03 eta 4:45:03 +epoch [12/50] batch [190/500] time 0.860 (0.885) data 0.000 (0.004) loss 1.7490 (1.1682) acc 62.5000 (70.7895) lr 1.8090e-03 eta 4:44:52 +epoch [12/50] batch [195/500] time 0.874 (0.885) data 0.000 (0.004) loss 1.3926 (1.1698) acc 71.8750 (70.7692) lr 1.8090e-03 eta 4:44:44 +epoch [12/50] batch [200/500] time 0.878 (0.885) data 0.000 (0.003) loss 1.7930 (1.1746) acc 65.6250 (70.6719) lr 1.8090e-03 eta 4:44:32 +epoch [12/50] batch [205/500] time 0.867 (0.885) data 0.000 (0.003) loss 1.6416 (1.1728) acc 65.6250 (70.8232) lr 1.8090e-03 eta 4:44:37 +epoch [12/50] batch [210/500] time 0.924 (0.885) data 0.000 (0.003) loss 1.0215 (1.1725) acc 78.1250 (70.7887) lr 1.8090e-03 eta 4:44:36 +epoch [12/50] batch [215/500] time 0.858 (0.885) data 0.000 (0.003) loss 0.8145 (1.1659) acc 81.2500 (70.9884) lr 1.8090e-03 eta 4:44:30 +epoch [12/50] batch [220/500] time 0.864 (0.885) data 0.000 (0.003) loss 1.4053 (1.1603) acc 71.8750 (71.1648) lr 1.8090e-03 eta 4:44:20 +epoch [12/50] batch [225/500] time 0.902 (0.885) data 0.000 (0.003) loss 1.3125 (1.1588) acc 68.7500 (71.2361) lr 1.8090e-03 eta 4:44:13 +epoch [12/50] batch [230/500] time 0.891 (0.885) data 0.000 (0.003) loss 0.3970 (1.1546) acc 87.5000 (71.3315) lr 1.8090e-03 eta 4:44:07 +epoch [12/50] batch [235/500] time 0.876 (0.885) data 0.000 (0.003) loss 1.2930 (1.1518) acc 65.6250 (71.3830) lr 1.8090e-03 eta 4:44:00 +epoch [12/50] batch [240/500] time 0.902 (0.884) data 0.000 (0.003) loss 0.8975 (1.1510) acc 78.1250 (71.4323) lr 1.8090e-03 eta 4:43:53 +epoch [12/50] batch [245/500] time 0.901 (0.885) data 0.000 (0.003) loss 1.0166 (1.1498) acc 75.0000 (71.3520) lr 1.8090e-03 eta 4:43:52 +epoch [12/50] batch [250/500] time 0.910 (0.885) data 0.000 (0.003) loss 1.0654 (1.1451) acc 84.3750 (71.4750) lr 1.8090e-03 eta 4:43:58 +epoch [12/50] batch [255/500] time 0.851 (0.885) data 0.000 (0.003) loss 1.6055 (1.1431) acc 68.7500 (71.5564) lr 1.8090e-03 eta 4:43:48 +epoch [12/50] batch [260/500] time 0.904 (0.885) data 0.000 (0.003) loss 1.0117 (1.1430) acc 68.7500 (71.4904) lr 1.8090e-03 eta 4:43:43 +epoch [12/50] batch [265/500] time 0.890 (0.885) data 0.000 (0.003) loss 1.2666 (1.1427) acc 71.8750 (71.5448) lr 1.8090e-03 eta 4:43:39 +epoch [12/50] batch [270/500] time 0.878 (0.885) data 0.000 (0.003) loss 0.8042 (1.1439) acc 75.0000 (71.5278) lr 1.8090e-03 eta 4:43:35 +epoch [12/50] batch [275/500] time 0.885 (0.885) data 0.000 (0.003) loss 1.0088 (1.1436) acc 78.1250 (71.5000) lr 1.8090e-03 eta 4:43:29 +epoch [12/50] batch [280/500] time 0.889 (0.885) data 0.000 (0.003) loss 1.2744 (1.1426) acc 71.8750 (71.5067) lr 1.8090e-03 eta 4:43:25 +epoch [12/50] batch [285/500] time 0.876 (0.885) data 0.000 (0.003) loss 1.0703 (1.1442) acc 68.7500 (71.5022) lr 1.8090e-03 eta 4:43:19 +epoch [12/50] batch [290/500] time 0.884 (0.885) data 0.000 (0.002) loss 1.2666 (1.1455) acc 65.6250 (71.4978) lr 1.8090e-03 eta 4:43:13 +epoch [12/50] batch [295/500] time 0.869 (0.884) data 0.000 (0.002) loss 1.1904 (1.1456) acc 65.6250 (71.4513) lr 1.8090e-03 eta 4:43:06 +epoch [12/50] batch [300/500] time 0.877 (0.885) data 0.000 (0.002) loss 1.0771 (1.1491) acc 71.8750 (71.3542) lr 1.8090e-03 eta 4:43:03 +epoch [12/50] batch [305/500] time 0.886 (0.884) data 0.000 (0.002) loss 0.9521 (1.1486) acc 75.0000 (71.3525) lr 1.8090e-03 eta 4:42:53 +epoch [12/50] batch [310/500] time 0.882 (0.884) data 0.000 (0.002) loss 1.0752 (1.1452) acc 65.6250 (71.3508) lr 1.8090e-03 eta 4:42:47 +epoch [12/50] batch [315/500] time 0.865 (0.884) data 0.000 (0.002) loss 1.4531 (1.1480) acc 68.7500 (71.2698) lr 1.8090e-03 eta 4:42:39 +epoch [12/50] batch [320/500] time 0.889 (0.884) data 0.000 (0.002) loss 0.7959 (1.1459) acc 71.8750 (71.2988) lr 1.8090e-03 eta 4:42:29 +epoch [12/50] batch [325/500] time 0.872 (0.884) data 0.000 (0.002) loss 1.2012 (1.1456) acc 71.8750 (71.2308) lr 1.8090e-03 eta 4:42:22 +epoch [12/50] batch [330/500] time 0.911 (0.884) data 0.000 (0.002) loss 0.8467 (1.1477) acc 78.1250 (71.1742) lr 1.8090e-03 eta 4:42:22 +epoch [12/50] batch [335/500] time 0.874 (0.884) data 0.000 (0.002) loss 0.7822 (1.1455) acc 75.0000 (71.2500) lr 1.8090e-03 eta 4:42:16 +epoch [12/50] batch [340/500] time 0.883 (0.884) data 0.000 (0.002) loss 1.3086 (1.1473) acc 68.7500 (71.2132) lr 1.8090e-03 eta 4:42:09 +epoch [12/50] batch [345/500] time 0.872 (0.884) data 0.000 (0.002) loss 0.9316 (1.1479) acc 71.8750 (71.2591) lr 1.8090e-03 eta 4:42:05 +epoch [12/50] batch [350/500] time 0.905 (0.884) data 0.000 (0.002) loss 1.3760 (1.1473) acc 68.7500 (71.2768) lr 1.8090e-03 eta 4:42:09 +epoch [12/50] batch [355/500] time 0.907 (0.884) data 0.000 (0.002) loss 1.0840 (1.1497) acc 75.0000 (71.2588) lr 1.8090e-03 eta 4:42:09 +epoch [12/50] batch [360/500] time 0.907 (0.885) data 0.000 (0.002) loss 0.9253 (1.1465) acc 78.1250 (71.3281) lr 1.8090e-03 eta 4:42:09 +epoch [12/50] batch [365/500] time 0.898 (0.885) data 0.000 (0.002) loss 0.8276 (1.1437) acc 75.0000 (71.3613) lr 1.8090e-03 eta 4:42:07 +epoch [12/50] batch [370/500] time 0.885 (0.885) data 0.000 (0.002) loss 1.1465 (1.1453) acc 68.7500 (71.3345) lr 1.8090e-03 eta 4:42:05 +epoch [12/50] batch [375/500] time 0.870 (0.885) data 0.000 (0.002) loss 0.9434 (1.1451) acc 68.7500 (71.3000) lr 1.8090e-03 eta 4:41:57 +epoch [12/50] batch [380/500] time 0.897 (0.885) data 0.000 (0.002) loss 1.0146 (1.1460) acc 75.0000 (71.2418) lr 1.8090e-03 eta 4:41:52 +epoch [12/50] batch [385/500] time 0.916 (0.885) data 0.000 (0.002) loss 0.5005 (1.1445) acc 87.5000 (71.2500) lr 1.8090e-03 eta 4:41:49 +epoch [12/50] batch [390/500] time 0.859 (0.884) data 0.000 (0.002) loss 1.0342 (1.1453) acc 78.1250 (71.2500) lr 1.8090e-03 eta 4:41:40 +epoch [12/50] batch [395/500] time 0.896 (0.884) data 0.000 (0.002) loss 1.1436 (1.1448) acc 59.3750 (71.2025) lr 1.8090e-03 eta 4:41:37 +epoch [12/50] batch [400/500] time 0.855 (0.884) data 0.000 (0.002) loss 1.4619 (1.1454) acc 68.7500 (71.1719) lr 1.8090e-03 eta 4:41:32 +epoch [12/50] batch [405/500] time 0.882 (0.884) data 0.000 (0.002) loss 0.8745 (1.1441) acc 75.0000 (71.1883) lr 1.8090e-03 eta 4:41:25 +epoch [12/50] batch [410/500] time 0.863 (0.884) data 0.000 (0.002) loss 1.0723 (1.1448) acc 71.8750 (71.1814) lr 1.8090e-03 eta 4:41:17 +epoch [12/50] batch [415/500] time 0.879 (0.884) data 0.000 (0.002) loss 1.2031 (1.1421) acc 75.0000 (71.2801) lr 1.8090e-03 eta 4:41:11 +epoch [12/50] batch [420/500] time 0.883 (0.884) data 0.000 (0.002) loss 1.1182 (1.1451) acc 68.7500 (71.2351) lr 1.8090e-03 eta 4:41:05 +epoch [12/50] batch [425/500] time 0.873 (0.884) data 0.000 (0.002) loss 0.9785 (1.1463) acc 78.1250 (71.2574) lr 1.8090e-03 eta 4:40:58 +epoch [12/50] batch [430/500] time 0.886 (0.884) data 0.000 (0.002) loss 0.9072 (1.1454) acc 65.6250 (71.3154) lr 1.8090e-03 eta 4:40:56 +epoch [12/50] batch [435/500] time 0.886 (0.884) data 0.000 (0.002) loss 1.2070 (1.1457) acc 71.8750 (71.2716) lr 1.8090e-03 eta 4:40:53 +epoch [12/50] batch [440/500] time 0.883 (0.884) data 0.000 (0.002) loss 0.9258 (1.1451) acc 65.6250 (71.2642) lr 1.8090e-03 eta 4:40:47 +epoch [12/50] batch [445/500] time 0.885 (0.884) data 0.000 (0.002) loss 1.3145 (1.1470) acc 71.8750 (71.2640) lr 1.8090e-03 eta 4:40:45 +epoch [12/50] batch [450/500] time 0.881 (0.884) data 0.000 (0.002) loss 0.7471 (1.1454) acc 81.2500 (71.2917) lr 1.8090e-03 eta 4:40:40 +epoch [12/50] batch [455/500] time 0.883 (0.884) data 0.000 (0.002) loss 0.8989 (1.1430) acc 75.0000 (71.3530) lr 1.8090e-03 eta 4:40:35 +epoch [12/50] batch [460/500] time 0.856 (0.884) data 0.000 (0.002) loss 0.9624 (1.1408) acc 81.2500 (71.4538) lr 1.8090e-03 eta 4:40:29 +epoch [12/50] batch [465/500] time 0.898 (0.884) data 0.000 (0.002) loss 0.8638 (1.1413) acc 78.1250 (71.5188) lr 1.8090e-03 eta 4:40:26 +epoch [12/50] batch [470/500] time 0.875 (0.884) data 0.000 (0.002) loss 0.9165 (1.1436) acc 78.1250 (71.5293) lr 1.8090e-03 eta 4:40:20 +epoch [12/50] batch [475/500] time 0.884 (0.884) data 0.000 (0.002) loss 0.6836 (1.1421) acc 78.1250 (71.5987) lr 1.8090e-03 eta 4:40:16 +epoch [12/50] batch [480/500] time 0.867 (0.884) data 0.000 (0.002) loss 1.1445 (1.1406) acc 68.7500 (71.6211) lr 1.8090e-03 eta 4:40:12 +epoch [12/50] batch [485/500] time 0.927 (0.884) data 0.000 (0.002) loss 0.6948 (1.1402) acc 81.2500 (71.6430) lr 1.8090e-03 eta 4:40:09 +epoch [12/50] batch [490/500] time 0.979 (0.884) data 0.000 (0.002) loss 1.0771 (1.1391) acc 75.0000 (71.6773) lr 1.8090e-03 eta 4:40:07 +epoch [12/50] batch [495/500] time 0.861 (0.884) data 0.000 (0.002) loss 1.0088 (1.1395) acc 78.1250 (71.6225) lr 1.8090e-03 eta 4:40:01 +epoch [12/50] batch [500/500] time 0.863 (0.884) data 0.000 (0.002) loss 0.6743 (1.1388) acc 78.1250 (71.6437) lr 1.7705e-03 eta 4:39:55 +epoch [13/50] batch [5/500] time 0.876 (1.003) data 0.000 (0.128) loss 1.0615 (1.2906) acc 78.1250 (68.7500) lr 1.7705e-03 eta 5:17:36 +epoch [13/50] batch [10/500] time 0.864 (0.939) data 0.000 (0.064) loss 1.6885 (1.1813) acc 56.2500 (67.8125) lr 1.7705e-03 eta 4:57:04 +epoch [13/50] batch [15/500] time 0.885 (0.918) data 0.001 (0.043) loss 1.0918 (1.1677) acc 75.0000 (68.3333) lr 1.7705e-03 eta 4:50:33 +epoch [13/50] batch [20/500] time 0.896 (0.911) data 0.000 (0.032) loss 1.4590 (1.1803) acc 59.3750 (68.7500) lr 1.7705e-03 eta 4:48:07 +epoch [13/50] batch [25/500] time 0.878 (0.904) data 0.000 (0.026) loss 1.3203 (1.1786) acc 65.6250 (68.8750) lr 1.7705e-03 eta 4:45:57 +epoch [13/50] batch [30/500] time 0.895 (0.902) data 0.000 (0.022) loss 1.0557 (1.1655) acc 84.3750 (69.5833) lr 1.7705e-03 eta 4:45:01 +epoch [13/50] batch [35/500] time 0.904 (0.898) data 0.000 (0.019) loss 1.7393 (1.1784) acc 56.2500 (69.1964) lr 1.7705e-03 eta 4:43:59 +epoch [13/50] batch [40/500] time 0.901 (0.898) data 0.000 (0.016) loss 1.3174 (1.1820) acc 75.0000 (69.4531) lr 1.7705e-03 eta 4:43:38 +epoch [13/50] batch [45/500] time 0.882 (0.895) data 0.000 (0.014) loss 0.8730 (1.1342) acc 81.2500 (70.8333) lr 1.7705e-03 eta 4:42:51 +epoch [13/50] batch [50/500] time 0.866 (0.896) data 0.000 (0.013) loss 0.5322 (1.1307) acc 84.3750 (71.1250) lr 1.7705e-03 eta 4:43:01 +epoch [13/50] batch [55/500] time 0.911 (0.895) data 0.000 (0.012) loss 0.9990 (1.1092) acc 71.8750 (71.5909) lr 1.7705e-03 eta 4:42:38 +epoch [13/50] batch [60/500] time 0.908 (0.894) data 0.000 (0.011) loss 1.4053 (1.1094) acc 65.6250 (71.7188) lr 1.7705e-03 eta 4:42:07 +epoch [13/50] batch [65/500] time 0.851 (0.892) data 0.000 (0.010) loss 0.7910 (1.0829) acc 75.0000 (72.1635) lr 1.7705e-03 eta 4:41:22 +epoch [13/50] batch [70/500] time 0.866 (0.891) data 0.000 (0.009) loss 1.2842 (1.0799) acc 71.8750 (72.2321) lr 1.7705e-03 eta 4:40:57 +epoch [13/50] batch [75/500] time 0.863 (0.889) data 0.000 (0.009) loss 1.2256 (1.0844) acc 68.7500 (72.2500) lr 1.7705e-03 eta 4:40:20 +epoch [13/50] batch [80/500] time 0.855 (0.889) data 0.000 (0.008) loss 1.3955 (1.0822) acc 62.5000 (72.1875) lr 1.7705e-03 eta 4:40:23 +epoch [13/50] batch [85/500] time 0.904 (0.889) data 0.000 (0.008) loss 0.8833 (1.0736) acc 71.8750 (72.3162) lr 1.7705e-03 eta 4:40:14 +epoch [13/50] batch [90/500] time 0.965 (0.890) data 0.000 (0.007) loss 0.9346 (1.0876) acc 68.7500 (72.0139) lr 1.7705e-03 eta 4:40:21 +epoch [13/50] batch [95/500] time 0.872 (0.889) data 0.000 (0.007) loss 0.5425 (1.0887) acc 81.2500 (72.1711) lr 1.7705e-03 eta 4:40:03 +epoch [13/50] batch [100/500] time 0.911 (0.888) data 0.000 (0.007) loss 1.1445 (1.0923) acc 65.6250 (72.2500) lr 1.7705e-03 eta 4:39:51 +epoch [13/50] batch [105/500] time 0.871 (0.887) data 0.000 (0.006) loss 0.9668 (1.1031) acc 68.7500 (71.9940) lr 1.7705e-03 eta 4:39:28 +epoch [13/50] batch [110/500] time 0.858 (0.887) data 0.000 (0.006) loss 1.2227 (1.1076) acc 62.5000 (71.9318) lr 1.7705e-03 eta 4:39:16 +epoch [13/50] batch [115/500] time 0.878 (0.887) data 0.000 (0.006) loss 0.8774 (1.1034) acc 75.0000 (72.1196) lr 1.7705e-03 eta 4:39:06 +epoch [13/50] batch [120/500] time 0.866 (0.887) data 0.000 (0.006) loss 0.8774 (1.1051) acc 71.8750 (72.0312) lr 1.7705e-03 eta 4:39:05 +epoch [13/50] batch [125/500] time 0.898 (0.887) data 0.000 (0.005) loss 0.7505 (1.0956) acc 81.2500 (72.2500) lr 1.7705e-03 eta 4:38:59 +epoch [13/50] batch [130/500] time 0.885 (0.886) data 0.000 (0.005) loss 1.2227 (1.1004) acc 65.6250 (72.2356) lr 1.7705e-03 eta 4:38:43 +epoch [13/50] batch [135/500] time 0.872 (0.886) data 0.000 (0.005) loss 1.9609 (1.1130) acc 68.7500 (72.0602) lr 1.7705e-03 eta 4:38:41 +epoch [13/50] batch [140/500] time 0.904 (0.886) data 0.000 (0.005) loss 0.6562 (1.1035) acc 78.1250 (72.2321) lr 1.7705e-03 eta 4:38:35 +epoch [13/50] batch [145/500] time 0.888 (0.886) data 0.000 (0.005) loss 0.9702 (1.0966) acc 65.6250 (72.2845) lr 1.7705e-03 eta 4:38:30 +epoch [13/50] batch [150/500] time 0.901 (0.886) data 0.000 (0.005) loss 0.8423 (1.1016) acc 78.1250 (72.2500) lr 1.7705e-03 eta 4:38:28 +epoch [13/50] batch [155/500] time 0.879 (0.886) data 0.000 (0.004) loss 1.2354 (1.0927) acc 75.0000 (72.4597) lr 1.7705e-03 eta 4:38:24 +epoch [13/50] batch [160/500] time 0.865 (0.886) data 0.000 (0.004) loss 1.0303 (1.1013) acc 78.1250 (72.2852) lr 1.7705e-03 eta 4:38:18 +epoch [13/50] batch [165/500] time 0.900 (0.886) data 0.000 (0.004) loss 1.5098 (1.0997) acc 62.5000 (72.3295) lr 1.7705e-03 eta 4:38:07 +epoch [13/50] batch [170/500] time 0.891 (0.886) data 0.000 (0.004) loss 1.4736 (1.0998) acc 68.7500 (72.2794) lr 1.7705e-03 eta 4:38:02 +epoch [13/50] batch [175/500] time 0.864 (0.885) data 0.000 (0.004) loss 1.6484 (1.1000) acc 62.5000 (72.2857) lr 1.7705e-03 eta 4:37:43 +epoch [13/50] batch [180/500] time 0.880 (0.885) data 0.000 (0.004) loss 1.6504 (1.0967) acc 56.2500 (72.3785) lr 1.7705e-03 eta 4:37:33 +epoch [13/50] batch [185/500] time 0.869 (0.885) data 0.000 (0.004) loss 0.8359 (1.0997) acc 81.2500 (72.3311) lr 1.7705e-03 eta 4:37:27 +epoch [13/50] batch [190/500] time 0.891 (0.885) data 0.000 (0.004) loss 1.4287 (1.1026) acc 56.2500 (72.1711) lr 1.7705e-03 eta 4:37:32 +epoch [13/50] batch [195/500] time 0.853 (0.885) data 0.000 (0.004) loss 0.9844 (1.0954) acc 78.1250 (72.3397) lr 1.7705e-03 eta 4:37:25 +epoch [13/50] batch [200/500] time 0.878 (0.885) data 0.000 (0.003) loss 0.7485 (1.1008) acc 78.1250 (72.2031) lr 1.7705e-03 eta 4:37:26 +epoch [13/50] batch [205/500] time 0.876 (0.885) data 0.000 (0.003) loss 0.5151 (1.1020) acc 84.3750 (72.2104) lr 1.7705e-03 eta 4:37:19 +epoch [13/50] batch [210/500] time 0.901 (0.886) data 0.000 (0.003) loss 0.8569 (1.0954) acc 78.1250 (72.3065) lr 1.7705e-03 eta 4:37:22 +epoch [13/50] batch [215/500] time 0.867 (0.885) data 0.000 (0.003) loss 0.5498 (1.0896) acc 87.5000 (72.4709) lr 1.7705e-03 eta 4:37:14 +epoch [13/50] batch [220/500] time 0.889 (0.886) data 0.000 (0.003) loss 0.9624 (1.0909) acc 81.2500 (72.5284) lr 1.7705e-03 eta 4:37:14 +epoch [13/50] batch [225/500] time 0.883 (0.886) data 0.000 (0.003) loss 1.7334 (1.0936) acc 56.2500 (72.4722) lr 1.7705e-03 eta 4:37:09 +epoch [13/50] batch [230/500] time 0.888 (0.885) data 0.000 (0.003) loss 1.5283 (1.0968) acc 62.5000 (72.3370) lr 1.7705e-03 eta 4:37:00 +epoch [13/50] batch [235/500] time 0.896 (0.886) data 0.000 (0.003) loss 0.8911 (1.0954) acc 84.3750 (72.4069) lr 1.7705e-03 eta 4:36:59 +epoch [13/50] batch [240/500] time 0.881 (0.886) data 0.000 (0.003) loss 1.1348 (1.0948) acc 65.6250 (72.4089) lr 1.7705e-03 eta 4:36:56 +epoch [13/50] batch [245/500] time 0.871 (0.886) data 0.000 (0.003) loss 1.0498 (1.0974) acc 78.1250 (72.3597) lr 1.7705e-03 eta 4:36:49 +epoch [13/50] batch [250/500] time 0.872 (0.885) data 0.000 (0.003) loss 1.0137 (1.0997) acc 71.8750 (72.3250) lr 1.7705e-03 eta 4:36:41 +epoch [13/50] batch [255/500] time 0.851 (0.885) data 0.000 (0.003) loss 0.5762 (1.0994) acc 75.0000 (72.2549) lr 1.7705e-03 eta 4:36:29 +epoch [13/50] batch [260/500] time 0.905 (0.885) data 0.000 (0.003) loss 0.9146 (1.0985) acc 62.5000 (72.1755) lr 1.7705e-03 eta 4:36:27 +epoch [13/50] batch [265/500] time 0.875 (0.885) data 0.000 (0.003) loss 1.0430 (1.0982) acc 71.8750 (72.2642) lr 1.7705e-03 eta 4:36:21 +epoch [13/50] batch [270/500] time 0.897 (0.885) data 0.000 (0.003) loss 1.3877 (1.1007) acc 71.8750 (72.2685) lr 1.7705e-03 eta 4:36:16 +epoch [13/50] batch [275/500] time 0.908 (0.885) data 0.000 (0.003) loss 1.6426 (1.1031) acc 68.7500 (72.2500) lr 1.7705e-03 eta 4:36:15 +epoch [13/50] batch [280/500] time 0.865 (0.885) data 0.000 (0.003) loss 1.1416 (1.0980) acc 87.5000 (72.4219) lr 1.7705e-03 eta 4:36:09 +epoch [13/50] batch [285/500] time 0.872 (0.885) data 0.000 (0.002) loss 1.9199 (1.1005) acc 62.5000 (72.3684) lr 1.7705e-03 eta 4:35:58 +epoch [13/50] batch [290/500] time 0.885 (0.885) data 0.000 (0.002) loss 0.9648 (1.1003) acc 75.0000 (72.3276) lr 1.7705e-03 eta 4:35:49 +epoch [13/50] batch [295/500] time 0.863 (0.884) data 0.000 (0.002) loss 0.9912 (1.0994) acc 71.8750 (72.3305) lr 1.7705e-03 eta 4:35:43 +epoch [13/50] batch [300/500] time 0.900 (0.884) data 0.000 (0.002) loss 0.7539 (1.0992) acc 81.2500 (72.2812) lr 1.7705e-03 eta 4:35:38 +epoch [13/50] batch [305/500] time 0.879 (0.885) data 0.000 (0.002) loss 1.1436 (1.1012) acc 68.7500 (72.2643) lr 1.7705e-03 eta 4:35:37 +epoch [13/50] batch [310/500] time 0.889 (0.885) data 0.000 (0.002) loss 0.9639 (1.1019) acc 75.0000 (72.2077) lr 1.7705e-03 eta 4:35:33 +epoch [13/50] batch [315/500] time 0.863 (0.885) data 0.000 (0.002) loss 1.2139 (1.1059) acc 62.5000 (72.0933) lr 1.7705e-03 eta 4:35:29 +epoch [13/50] batch [320/500] time 0.901 (0.885) data 0.000 (0.002) loss 0.4836 (1.1083) acc 90.6250 (72.0508) lr 1.7705e-03 eta 4:35:27 +epoch [13/50] batch [325/500] time 0.897 (0.885) data 0.000 (0.002) loss 0.7349 (1.1083) acc 78.1250 (72.0865) lr 1.7705e-03 eta 4:35:23 +epoch [13/50] batch [330/500] time 0.861 (0.885) data 0.000 (0.002) loss 1.4473 (1.1086) acc 59.3750 (72.0739) lr 1.7705e-03 eta 4:35:16 +epoch [13/50] batch [335/500] time 0.876 (0.885) data 0.000 (0.002) loss 1.5361 (1.1093) acc 68.7500 (72.0522) lr 1.7705e-03 eta 4:35:16 +epoch [13/50] batch [340/500] time 0.885 (0.885) data 0.000 (0.002) loss 0.9443 (1.1107) acc 78.1250 (72.0680) lr 1.7705e-03 eta 4:35:10 +epoch [13/50] batch [345/500] time 0.901 (0.885) data 0.000 (0.002) loss 1.2764 (1.1132) acc 71.8750 (72.0018) lr 1.7705e-03 eta 4:35:04 +epoch [13/50] batch [350/500] time 0.864 (0.885) data 0.000 (0.002) loss 1.0469 (1.1149) acc 75.0000 (71.9554) lr 1.7705e-03 eta 4:34:56 +epoch [13/50] batch [355/500] time 0.881 (0.884) data 0.000 (0.002) loss 1.3691 (1.1121) acc 75.0000 (72.0335) lr 1.7705e-03 eta 4:34:49 +epoch [13/50] batch [360/500] time 0.902 (0.884) data 0.000 (0.002) loss 0.9805 (1.1129) acc 84.3750 (72.0573) lr 1.7705e-03 eta 4:34:46 +epoch [13/50] batch [365/500] time 0.885 (0.884) data 0.000 (0.002) loss 1.4492 (1.1145) acc 65.6250 (71.9949) lr 1.7705e-03 eta 4:34:38 +epoch [13/50] batch [370/500] time 0.864 (0.884) data 0.000 (0.002) loss 1.6074 (1.1181) acc 65.6250 (71.9341) lr 1.7705e-03 eta 4:34:29 +epoch [13/50] batch [375/500] time 0.894 (0.884) data 0.000 (0.002) loss 1.2070 (1.1206) acc 65.6250 (71.9083) lr 1.7705e-03 eta 4:34:25 +epoch [13/50] batch [380/500] time 0.878 (0.884) data 0.000 (0.002) loss 1.2754 (1.1214) acc 71.8750 (71.9079) lr 1.7705e-03 eta 4:34:23 +epoch [13/50] batch [385/500] time 0.896 (0.884) data 0.000 (0.002) loss 1.6582 (1.1214) acc 65.6250 (71.9075) lr 1.7705e-03 eta 4:34:19 +epoch [13/50] batch [390/500] time 0.880 (0.884) data 0.000 (0.002) loss 0.9473 (1.1202) acc 78.1250 (71.9071) lr 1.7705e-03 eta 4:34:17 +epoch [13/50] batch [395/500] time 0.896 (0.884) data 0.000 (0.002) loss 1.2217 (1.1219) acc 68.7500 (71.8829) lr 1.7705e-03 eta 4:34:13 +epoch [13/50] batch [400/500] time 0.858 (0.884) data 0.000 (0.002) loss 1.0850 (1.1204) acc 75.0000 (71.9531) lr 1.7705e-03 eta 4:34:07 +epoch [13/50] batch [405/500] time 0.896 (0.884) data 0.000 (0.002) loss 0.9219 (1.1228) acc 78.1250 (71.9136) lr 1.7705e-03 eta 4:34:05 +epoch [13/50] batch [410/500] time 0.894 (0.884) data 0.000 (0.002) loss 0.5791 (1.1199) acc 90.6250 (72.0122) lr 1.7705e-03 eta 4:34:00 +epoch [13/50] batch [415/500] time 0.853 (0.884) data 0.000 (0.002) loss 0.8818 (1.1179) acc 81.2500 (72.0708) lr 1.7705e-03 eta 4:33:55 +epoch [13/50] batch [420/500] time 0.901 (0.884) data 0.000 (0.002) loss 1.0430 (1.1164) acc 81.2500 (72.0982) lr 1.7705e-03 eta 4:33:52 +epoch [13/50] batch [425/500] time 0.882 (0.884) data 0.000 (0.002) loss 0.6709 (1.1142) acc 81.2500 (72.0956) lr 1.7705e-03 eta 4:33:47 +epoch [13/50] batch [430/500] time 0.870 (0.884) data 0.000 (0.002) loss 1.0283 (1.1140) acc 71.8750 (72.1148) lr 1.7705e-03 eta 4:33:39 +epoch [13/50] batch [435/500] time 0.862 (0.884) data 0.000 (0.002) loss 0.8569 (1.1163) acc 75.0000 (72.0761) lr 1.7705e-03 eta 4:33:31 +epoch [13/50] batch [440/500] time 0.874 (0.884) data 0.000 (0.002) loss 0.6582 (1.1154) acc 78.1250 (72.0597) lr 1.7705e-03 eta 4:33:27 +epoch [13/50] batch [445/500] time 0.907 (0.884) data 0.000 (0.002) loss 1.3701 (1.1159) acc 62.5000 (72.0154) lr 1.7705e-03 eta 4:33:26 +epoch [13/50] batch [450/500] time 0.884 (0.884) data 0.000 (0.002) loss 0.8867 (1.1144) acc 81.2500 (72.0347) lr 1.7705e-03 eta 4:33:26 +epoch [13/50] batch [455/500] time 0.891 (0.884) data 0.000 (0.002) loss 1.1523 (1.1128) acc 75.0000 (72.0604) lr 1.7705e-03 eta 4:33:22 +epoch [13/50] batch [460/500] time 0.879 (0.885) data 0.000 (0.002) loss 0.8550 (1.1135) acc 75.0000 (72.0720) lr 1.7705e-03 eta 4:33:19 +epoch [13/50] batch [465/500] time 0.881 (0.884) data 0.000 (0.002) loss 1.3613 (1.1128) acc 65.6250 (72.0833) lr 1.7705e-03 eta 4:33:14 +epoch [13/50] batch [470/500] time 0.891 (0.885) data 0.000 (0.002) loss 1.3037 (1.1120) acc 62.5000 (72.0944) lr 1.7705e-03 eta 4:33:13 +epoch [13/50] batch [475/500] time 0.991 (0.885) data 0.000 (0.002) loss 0.8765 (1.1121) acc 78.1250 (72.1053) lr 1.7705e-03 eta 4:33:11 +epoch [13/50] batch [480/500] time 0.886 (0.885) data 0.000 (0.002) loss 1.3018 (1.1122) acc 68.7500 (72.1289) lr 1.7705e-03 eta 4:33:05 +epoch [13/50] batch [485/500] time 0.860 (0.885) data 0.000 (0.002) loss 0.9922 (1.1140) acc 81.2500 (72.1005) lr 1.7705e-03 eta 4:32:59 +epoch [13/50] batch [490/500] time 0.887 (0.885) data 0.000 (0.002) loss 1.8135 (1.1162) acc 59.3750 (72.0599) lr 1.7705e-03 eta 4:32:54 +epoch [13/50] batch [495/500] time 0.878 (0.885) data 0.000 (0.002) loss 1.3945 (1.1166) acc 68.7500 (72.0455) lr 1.7705e-03 eta 4:32:50 +epoch [13/50] batch [500/500] time 0.868 (0.885) data 0.000 (0.002) loss 0.6514 (1.1166) acc 84.3750 (72.0375) lr 1.7290e-03 eta 4:32:44 +epoch [14/50] batch [5/500] time 0.876 (1.012) data 0.000 (0.130) loss 1.1719 (0.9198) acc 68.7500 (73.7500) lr 1.7290e-03 eta 5:11:52 +epoch [14/50] batch [10/500] time 0.890 (0.948) data 0.000 (0.065) loss 0.7842 (1.0100) acc 84.3750 (73.7500) lr 1.7290e-03 eta 4:52:04 +epoch [14/50] batch [15/500] time 0.914 (0.931) data 0.000 (0.043) loss 1.2734 (1.0865) acc 78.1250 (72.5000) lr 1.7290e-03 eta 4:46:58 +epoch [14/50] batch [20/500] time 0.899 (0.919) data 0.000 (0.033) loss 1.0537 (1.1179) acc 71.8750 (71.4062) lr 1.7290e-03 eta 4:43:06 +epoch [14/50] batch [25/500] time 0.877 (0.911) data 0.000 (0.026) loss 1.0020 (1.0985) acc 75.0000 (71.7500) lr 1.7290e-03 eta 4:40:37 +epoch [14/50] batch [30/500] time 0.888 (0.909) data 0.000 (0.022) loss 0.6025 (1.0905) acc 87.5000 (72.2917) lr 1.7290e-03 eta 4:39:49 +epoch [14/50] batch [35/500] time 0.916 (0.907) data 0.000 (0.019) loss 1.0693 (1.0860) acc 75.0000 (72.5893) lr 1.7290e-03 eta 4:39:12 +epoch [14/50] batch [40/500] time 0.889 (0.903) data 0.000 (0.016) loss 1.0898 (1.0829) acc 75.0000 (72.6562) lr 1.7290e-03 eta 4:37:52 +epoch [14/50] batch [45/500] time 0.900 (0.902) data 0.000 (0.015) loss 1.4238 (1.1057) acc 71.8750 (72.5694) lr 1.7290e-03 eta 4:37:22 +epoch [14/50] batch [50/500] time 0.886 (0.900) data 0.000 (0.013) loss 1.1162 (1.1034) acc 65.6250 (72.3750) lr 1.7290e-03 eta 4:36:36 +epoch [14/50] batch [55/500] time 0.894 (0.898) data 0.000 (0.012) loss 0.9561 (1.1013) acc 71.8750 (72.1591) lr 1.7290e-03 eta 4:36:02 +epoch [14/50] batch [60/500] time 0.880 (0.897) data 0.000 (0.011) loss 1.2402 (1.1029) acc 75.0000 (72.1875) lr 1.7290e-03 eta 4:35:43 +epoch [14/50] batch [65/500] time 0.879 (0.897) data 0.000 (0.010) loss 1.0059 (1.1163) acc 71.8750 (72.0673) lr 1.7290e-03 eta 4:35:37 +epoch [14/50] batch [70/500] time 0.895 (0.898) data 0.000 (0.009) loss 0.7915 (1.1087) acc 78.1250 (72.2768) lr 1.7290e-03 eta 4:35:41 +epoch [14/50] batch [75/500] time 0.884 (0.897) data 0.000 (0.009) loss 1.5166 (1.1172) acc 68.7500 (71.9583) lr 1.7290e-03 eta 4:35:21 +epoch [14/50] batch [80/500] time 0.886 (0.896) data 0.000 (0.008) loss 0.6025 (1.1111) acc 84.3750 (72.0703) lr 1.7290e-03 eta 4:35:06 +epoch [14/50] batch [85/500] time 0.869 (0.896) data 0.000 (0.008) loss 1.5938 (1.1192) acc 56.2500 (71.9118) lr 1.7290e-03 eta 4:34:54 +epoch [14/50] batch [90/500] time 0.923 (0.896) data 0.000 (0.007) loss 0.5303 (1.1078) acc 84.3750 (72.0833) lr 1.7290e-03 eta 4:34:49 +epoch [14/50] batch [95/500] time 0.923 (0.896) data 0.000 (0.007) loss 1.2031 (1.1061) acc 71.8750 (72.0395) lr 1.7290e-03 eta 4:34:56 +epoch [14/50] batch [100/500] time 0.896 (0.897) data 0.000 (0.007) loss 1.2168 (1.1065) acc 68.7500 (72.1875) lr 1.7290e-03 eta 4:34:56 +epoch [14/50] batch [105/500] time 0.868 (0.896) data 0.000 (0.006) loss 0.6289 (1.0980) acc 81.2500 (72.4107) lr 1.7290e-03 eta 4:34:48 +epoch [14/50] batch [110/500] time 0.852 (0.896) data 0.000 (0.006) loss 1.0176 (1.0980) acc 68.7500 (72.2159) lr 1.7290e-03 eta 4:34:33 +epoch [14/50] batch [115/500] time 0.869 (0.896) data 0.000 (0.006) loss 1.2783 (1.1027) acc 65.6250 (72.2011) lr 1.7290e-03 eta 4:34:24 +epoch [14/50] batch [120/500] time 0.887 (0.895) data 0.000 (0.006) loss 1.1543 (1.1044) acc 84.3750 (72.1354) lr 1.7290e-03 eta 4:34:07 +epoch [14/50] batch [125/500] time 0.886 (0.894) data 0.000 (0.005) loss 0.5288 (1.1041) acc 81.2500 (72.1500) lr 1.7290e-03 eta 4:33:55 +epoch [14/50] batch [130/500] time 0.879 (0.894) data 0.000 (0.005) loss 1.1064 (1.1069) acc 65.6250 (71.8269) lr 1.7290e-03 eta 4:33:43 +epoch [14/50] batch [135/500] time 0.889 (0.894) data 0.000 (0.005) loss 1.1123 (1.1018) acc 71.8750 (71.8750) lr 1.7290e-03 eta 4:33:37 +epoch [14/50] batch [140/500] time 0.905 (0.894) data 0.000 (0.005) loss 1.3662 (1.1041) acc 68.7500 (71.7634) lr 1.7290e-03 eta 4:33:30 +epoch [14/50] batch [145/500] time 0.844 (0.893) data 0.000 (0.005) loss 0.6416 (1.1154) acc 87.5000 (71.7888) lr 1.7290e-03 eta 4:33:14 +epoch [14/50] batch [150/500] time 0.872 (0.893) data 0.000 (0.005) loss 1.1279 (1.1131) acc 68.7500 (71.8125) lr 1.7290e-03 eta 4:33:02 +epoch [14/50] batch [155/500] time 0.874 (0.892) data 0.000 (0.004) loss 1.0498 (1.1071) acc 71.8750 (71.8952) lr 1.7290e-03 eta 4:32:47 +epoch [14/50] batch [160/500] time 0.875 (0.892) data 0.000 (0.004) loss 0.7256 (1.1036) acc 87.5000 (72.1289) lr 1.7290e-03 eta 4:32:34 +epoch [14/50] batch [165/500] time 0.873 (0.891) data 0.000 (0.004) loss 1.1221 (1.1017) acc 75.0000 (72.1591) lr 1.7290e-03 eta 4:32:23 +epoch [14/50] batch [170/500] time 0.848 (0.892) data 0.000 (0.004) loss 0.7793 (1.0985) acc 71.8750 (72.2059) lr 1.7290e-03 eta 4:32:21 +epoch [14/50] batch [175/500] time 0.867 (0.891) data 0.000 (0.004) loss 1.0566 (1.0975) acc 75.0000 (72.2321) lr 1.7290e-03 eta 4:32:08 +epoch [14/50] batch [180/500] time 0.877 (0.890) data 0.000 (0.004) loss 1.2842 (1.0922) acc 65.6250 (72.2917) lr 1.7290e-03 eta 4:31:53 +epoch [14/50] batch [185/500] time 0.900 (0.890) data 0.000 (0.004) loss 1.0049 (1.0950) acc 75.0000 (72.1791) lr 1.7290e-03 eta 4:31:45 +epoch [14/50] batch [190/500] time 0.929 (0.890) data 0.000 (0.004) loss 1.0830 (1.0905) acc 68.7500 (72.2368) lr 1.7290e-03 eta 4:31:43 +epoch [14/50] batch [195/500] time 0.920 (0.891) data 0.000 (0.004) loss 0.7114 (1.0830) acc 81.2500 (72.4038) lr 1.7290e-03 eta 4:31:44 +epoch [14/50] batch [200/500] time 0.867 (0.890) data 0.000 (0.003) loss 0.8101 (1.0851) acc 78.1250 (72.3281) lr 1.7290e-03 eta 4:31:31 +epoch [14/50] batch [205/500] time 0.884 (0.890) data 0.000 (0.003) loss 1.4160 (1.0917) acc 68.7500 (72.2561) lr 1.7290e-03 eta 4:31:25 +epoch [14/50] batch [210/500] time 0.970 (0.890) data 0.000 (0.003) loss 0.4688 (1.0859) acc 93.7500 (72.3512) lr 1.7290e-03 eta 4:31:21 +epoch [14/50] batch [215/500] time 0.876 (0.890) data 0.000 (0.003) loss 0.5190 (1.0873) acc 87.5000 (72.3110) lr 1.7290e-03 eta 4:31:14 +epoch [14/50] batch [220/500] time 0.900 (0.890) data 0.000 (0.003) loss 0.8711 (1.0884) acc 81.2500 (72.3864) lr 1.7290e-03 eta 4:31:06 +epoch [14/50] batch [225/500] time 0.922 (0.890) data 0.000 (0.003) loss 1.2656 (1.0914) acc 65.6250 (72.3611) lr 1.7290e-03 eta 4:31:01 +epoch [14/50] batch [230/500] time 0.851 (0.889) data 0.000 (0.003) loss 1.3486 (1.0930) acc 59.3750 (72.3370) lr 1.7290e-03 eta 4:30:47 +epoch [14/50] batch [235/500] time 0.875 (0.890) data 0.000 (0.003) loss 0.8394 (1.0985) acc 75.0000 (72.2473) lr 1.7290e-03 eta 4:30:47 +epoch [14/50] batch [240/500] time 0.870 (0.889) data 0.000 (0.003) loss 1.6465 (1.0973) acc 59.3750 (72.2266) lr 1.7290e-03 eta 4:30:33 +epoch [14/50] batch [245/500] time 0.889 (0.889) data 0.000 (0.003) loss 1.3779 (1.0965) acc 75.0000 (72.2577) lr 1.7290e-03 eta 4:30:26 +epoch [14/50] batch [250/500] time 0.870 (0.889) data 0.000 (0.003) loss 1.6963 (1.0982) acc 62.5000 (72.1500) lr 1.7290e-03 eta 4:30:19 +epoch [14/50] batch [255/500] time 0.862 (0.889) data 0.000 (0.003) loss 0.9077 (1.0945) acc 84.3750 (72.2672) lr 1.7290e-03 eta 4:30:11 +epoch [14/50] batch [260/500] time 0.907 (0.888) data 0.000 (0.003) loss 1.0957 (1.0929) acc 75.0000 (72.2356) lr 1.7290e-03 eta 4:30:03 +epoch [14/50] batch [265/500] time 0.886 (0.888) data 0.000 (0.003) loss 1.4092 (1.0922) acc 71.8750 (72.3113) lr 1.7290e-03 eta 4:29:55 +epoch [14/50] batch [270/500] time 0.902 (0.888) data 0.000 (0.003) loss 0.7070 (1.0950) acc 84.3750 (72.3148) lr 1.7290e-03 eta 4:29:50 +epoch [14/50] batch [275/500] time 0.860 (0.888) data 0.000 (0.003) loss 1.4053 (1.0965) acc 71.8750 (72.3977) lr 1.7290e-03 eta 4:29:44 +epoch [14/50] batch [280/500] time 0.878 (0.888) data 0.000 (0.003) loss 0.6621 (1.0981) acc 78.1250 (72.3103) lr 1.7290e-03 eta 4:29:41 +epoch [14/50] batch [285/500] time 0.853 (0.888) data 0.000 (0.003) loss 0.9746 (1.0987) acc 75.0000 (72.3136) lr 1.7290e-03 eta 4:29:30 +epoch [14/50] batch [290/500] time 0.912 (0.888) data 0.000 (0.002) loss 1.7900 (1.1043) acc 62.5000 (72.2414) lr 1.7290e-03 eta 4:29:27 +epoch [14/50] batch [295/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.1895 (1.1024) acc 71.8750 (72.3093) lr 1.7290e-03 eta 4:29:20 +epoch [14/50] batch [300/500] time 0.887 (0.888) data 0.000 (0.002) loss 2.1367 (1.1038) acc 56.2500 (72.2708) lr 1.7290e-03 eta 4:29:20 +epoch [14/50] batch [305/500] time 0.894 (0.888) data 0.000 (0.002) loss 0.9087 (1.1034) acc 71.8750 (72.2848) lr 1.7290e-03 eta 4:29:16 +epoch [14/50] batch [310/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.1816 (1.1012) acc 65.6250 (72.3286) lr 1.7290e-03 eta 4:29:17 +epoch [14/50] batch [315/500] time 0.889 (0.889) data 0.000 (0.002) loss 1.2490 (1.0982) acc 68.7500 (72.4008) lr 1.7290e-03 eta 4:29:17 +epoch [14/50] batch [320/500] time 0.865 (0.888) data 0.000 (0.002) loss 1.0000 (1.0963) acc 81.2500 (72.4023) lr 1.7290e-03 eta 4:29:10 +epoch [14/50] batch [325/500] time 0.921 (0.889) data 0.000 (0.002) loss 1.3057 (1.0961) acc 65.6250 (72.4038) lr 1.7290e-03 eta 4:29:09 +epoch [14/50] batch [330/500] time 0.888 (0.889) data 0.000 (0.002) loss 0.8398 (1.0950) acc 81.2500 (72.4811) lr 1.7290e-03 eta 4:29:06 +epoch [14/50] batch [335/500] time 0.900 (0.889) data 0.000 (0.002) loss 1.1182 (1.0949) acc 65.6250 (72.4720) lr 1.7290e-03 eta 4:28:59 +epoch [14/50] batch [340/500] time 0.862 (0.888) data 0.000 (0.002) loss 1.1211 (1.0971) acc 71.8750 (72.3438) lr 1.7290e-03 eta 4:28:51 +epoch [14/50] batch [345/500] time 0.892 (0.888) data 0.000 (0.002) loss 1.0664 (1.0986) acc 78.1250 (72.3551) lr 1.7290e-03 eta 4:28:44 +epoch [14/50] batch [350/500] time 0.857 (0.888) data 0.000 (0.002) loss 1.0918 (1.0991) acc 68.7500 (72.2946) lr 1.7290e-03 eta 4:28:37 +epoch [14/50] batch [355/500] time 0.871 (0.888) data 0.000 (0.002) loss 0.6064 (1.0978) acc 87.5000 (72.3768) lr 1.7290e-03 eta 4:28:36 +epoch [14/50] batch [360/500] time 0.876 (0.888) data 0.000 (0.002) loss 1.1465 (1.0960) acc 71.8750 (72.4132) lr 1.7290e-03 eta 4:28:31 +epoch [14/50] batch [365/500] time 0.899 (0.888) data 0.000 (0.002) loss 0.7227 (1.0956) acc 87.5000 (72.4829) lr 1.7290e-03 eta 4:28:29 +epoch [14/50] batch [370/500] time 0.896 (0.888) data 0.000 (0.002) loss 0.8965 (1.0976) acc 75.0000 (72.4324) lr 1.7290e-03 eta 4:28:23 +epoch [14/50] batch [375/500] time 0.884 (0.888) data 0.000 (0.002) loss 0.7642 (1.0977) acc 87.5000 (72.5250) lr 1.7290e-03 eta 4:28:17 +epoch [14/50] batch [380/500] time 0.885 (0.888) data 0.000 (0.002) loss 0.7554 (1.0949) acc 78.1250 (72.5576) lr 1.7290e-03 eta 4:28:09 +epoch [14/50] batch [385/500] time 0.879 (0.888) data 0.000 (0.002) loss 1.0430 (1.0966) acc 75.0000 (72.5162) lr 1.7290e-03 eta 4:28:00 +epoch [14/50] batch [390/500] time 0.851 (0.888) data 0.000 (0.002) loss 0.8252 (1.0939) acc 84.3750 (72.5881) lr 1.7290e-03 eta 4:27:54 +epoch [14/50] batch [395/500] time 0.881 (0.887) data 0.000 (0.002) loss 1.1855 (1.0961) acc 68.7500 (72.5396) lr 1.7290e-03 eta 4:27:46 +epoch [14/50] batch [400/500] time 0.900 (0.887) data 0.000 (0.002) loss 1.5400 (1.0958) acc 59.3750 (72.5547) lr 1.7290e-03 eta 4:27:41 +epoch [14/50] batch [405/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.3652 (1.0947) acc 62.5000 (72.5849) lr 1.7290e-03 eta 4:27:36 +epoch [14/50] batch [410/500] time 0.877 (0.887) data 0.000 (0.002) loss 1.3076 (1.0988) acc 68.7500 (72.4924) lr 1.7290e-03 eta 4:27:32 +epoch [14/50] batch [415/500] time 0.890 (0.887) data 0.000 (0.002) loss 1.3418 (1.1034) acc 71.8750 (72.4172) lr 1.7290e-03 eta 4:27:27 +epoch [14/50] batch [420/500] time 0.873 (0.887) data 0.000 (0.002) loss 1.3125 (1.1034) acc 68.7500 (72.4182) lr 1.7290e-03 eta 4:27:19 +epoch [14/50] batch [425/500] time 0.867 (0.887) data 0.000 (0.002) loss 1.2744 (1.1080) acc 65.6250 (72.3088) lr 1.7290e-03 eta 4:27:10 +epoch [14/50] batch [430/500] time 0.862 (0.887) data 0.000 (0.002) loss 0.5474 (1.1084) acc 87.5000 (72.3328) lr 1.7290e-03 eta 4:27:02 +epoch [14/50] batch [435/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.4746 (1.1110) acc 62.5000 (72.2557) lr 1.7290e-03 eta 4:26:58 +epoch [14/50] batch [440/500] time 0.897 (0.887) data 0.000 (0.002) loss 1.5518 (1.1110) acc 62.5000 (72.2585) lr 1.7290e-03 eta 4:26:55 +epoch [14/50] batch [445/500] time 0.880 (0.887) data 0.000 (0.002) loss 1.1309 (1.1104) acc 71.8750 (72.3034) lr 1.7290e-03 eta 4:26:51 +epoch [14/50] batch [450/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.1855 (1.1108) acc 68.7500 (72.2986) lr 1.7290e-03 eta 4:26:46 +epoch [14/50] batch [455/500] time 0.908 (0.887) data 0.000 (0.002) loss 1.2227 (1.1126) acc 65.6250 (72.2184) lr 1.7290e-03 eta 4:26:47 +epoch [14/50] batch [460/500] time 0.907 (0.887) data 0.000 (0.002) loss 1.2959 (1.1135) acc 68.7500 (72.2351) lr 1.7290e-03 eta 4:26:43 +epoch [14/50] batch [465/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.0156 (1.1128) acc 75.0000 (72.2379) lr 1.7290e-03 eta 4:26:38 +epoch [14/50] batch [470/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.7900 (1.1158) acc 65.6250 (72.1676) lr 1.7290e-03 eta 4:26:32 +epoch [14/50] batch [475/500] time 0.890 (0.887) data 0.000 (0.002) loss 0.6035 (1.1142) acc 81.2500 (72.1645) lr 1.7290e-03 eta 4:26:27 +epoch [14/50] batch [480/500] time 0.894 (0.887) data 0.000 (0.002) loss 1.4482 (1.1155) acc 59.3750 (72.1159) lr 1.7290e-03 eta 4:26:26 +epoch [14/50] batch [485/500] time 0.899 (0.887) data 0.000 (0.002) loss 1.3125 (1.1147) acc 71.8750 (72.1392) lr 1.7290e-03 eta 4:26:25 +epoch [14/50] batch [490/500] time 0.903 (0.887) data 0.000 (0.002) loss 1.2432 (1.1150) acc 68.7500 (72.0918) lr 1.7290e-03 eta 4:26:20 +epoch [14/50] batch [495/500] time 0.879 (0.887) data 0.000 (0.002) loss 1.5430 (1.1138) acc 59.3750 (72.1023) lr 1.7290e-03 eta 4:26:18 +epoch [14/50] batch [500/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.4521 (1.1163) acc 65.6250 (72.0938) lr 1.6845e-03 eta 4:26:16 +epoch [15/50] batch [5/500] time 0.880 (1.015) data 0.000 (0.132) loss 0.7168 (1.0029) acc 78.1250 (72.5000) lr 1.6845e-03 eta 5:04:17 +epoch [15/50] batch [10/500] time 0.867 (0.954) data 0.000 (0.066) loss 1.2021 (0.9957) acc 75.0000 (72.8125) lr 1.6845e-03 eta 4:45:57 +epoch [15/50] batch [15/500] time 0.872 (0.928) data 0.000 (0.044) loss 0.6255 (1.0501) acc 84.3750 (72.2917) lr 1.6845e-03 eta 4:38:16 +epoch [15/50] batch [20/500] time 0.885 (0.917) data 0.000 (0.033) loss 1.3643 (1.1020) acc 68.7500 (71.2500) lr 1.6845e-03 eta 4:34:44 +epoch [15/50] batch [25/500] time 0.864 (0.912) data 0.000 (0.027) loss 1.0859 (1.0829) acc 59.3750 (71.3750) lr 1.6845e-03 eta 4:33:06 +epoch [15/50] batch [30/500] time 0.904 (0.908) data 0.000 (0.022) loss 1.0723 (1.0434) acc 78.1250 (72.8125) lr 1.6845e-03 eta 4:31:51 +epoch [15/50] batch [35/500] time 0.880 (0.904) data 0.000 (0.019) loss 0.9443 (1.0417) acc 87.5000 (73.2143) lr 1.6845e-03 eta 4:30:33 +epoch [15/50] batch [40/500] time 1.018 (0.904) data 0.000 (0.017) loss 1.4170 (1.0413) acc 75.0000 (73.3594) lr 1.6845e-03 eta 4:30:38 +epoch [15/50] batch [45/500] time 0.871 (0.902) data 0.000 (0.015) loss 1.6748 (1.0708) acc 59.3750 (72.7778) lr 1.6845e-03 eta 4:29:53 +epoch [15/50] batch [50/500] time 0.901 (0.900) data 0.000 (0.013) loss 0.8784 (1.0602) acc 78.1250 (73.1875) lr 1.6845e-03 eta 4:29:20 +epoch [15/50] batch [55/500] time 0.852 (0.899) data 0.000 (0.012) loss 1.0645 (1.0631) acc 68.7500 (72.9545) lr 1.6845e-03 eta 4:28:44 +epoch [15/50] batch [60/500] time 0.865 (0.897) data 0.000 (0.011) loss 1.4268 (1.0933) acc 62.5000 (72.3958) lr 1.6845e-03 eta 4:28:16 +epoch [15/50] batch [65/500] time 0.896 (0.897) data 0.000 (0.010) loss 1.4238 (1.1006) acc 68.7500 (72.3077) lr 1.6845e-03 eta 4:28:04 +epoch [15/50] batch [70/500] time 0.888 (0.896) data 0.000 (0.010) loss 1.1836 (1.0931) acc 59.3750 (72.6339) lr 1.6845e-03 eta 4:27:46 +epoch [15/50] batch [75/500] time 0.872 (0.894) data 0.000 (0.009) loss 0.8589 (1.0776) acc 68.7500 (72.7083) lr 1.6845e-03 eta 4:27:11 +epoch [15/50] batch [80/500] time 0.851 (0.893) data 0.000 (0.008) loss 1.0391 (1.0823) acc 71.8750 (72.5391) lr 1.6845e-03 eta 4:26:46 +epoch [15/50] batch [85/500] time 0.915 (0.893) data 0.000 (0.008) loss 0.9521 (1.0777) acc 75.0000 (72.6471) lr 1.6845e-03 eta 4:26:44 +epoch [15/50] batch [90/500] time 0.870 (0.893) data 0.000 (0.008) loss 0.7393 (1.0801) acc 78.1250 (72.6042) lr 1.6845e-03 eta 4:26:29 +epoch [15/50] batch [95/500] time 0.877 (0.892) data 0.000 (0.007) loss 1.5127 (1.0801) acc 59.3750 (72.5329) lr 1.6845e-03 eta 4:26:10 +epoch [15/50] batch [100/500] time 0.907 (0.892) data 0.000 (0.007) loss 0.3989 (1.0734) acc 90.6250 (72.6562) lr 1.6845e-03 eta 4:25:59 +epoch [15/50] batch [105/500] time 0.898 (0.891) data 0.000 (0.007) loss 1.1582 (1.0762) acc 65.6250 (72.6786) lr 1.6845e-03 eta 4:25:51 +epoch [15/50] batch [110/500] time 0.893 (0.891) data 0.000 (0.006) loss 1.2061 (1.0808) acc 78.1250 (72.5568) lr 1.6845e-03 eta 4:25:40 +epoch [15/50] batch [115/500] time 0.896 (0.891) data 0.000 (0.006) loss 0.9282 (1.0736) acc 75.0000 (72.8261) lr 1.6845e-03 eta 4:25:35 +epoch [15/50] batch [120/500] time 0.861 (0.891) data 0.000 (0.006) loss 1.0420 (1.0707) acc 75.0000 (72.9948) lr 1.6845e-03 eta 4:25:35 +epoch [15/50] batch [125/500] time 0.885 (0.891) data 0.000 (0.006) loss 0.7544 (1.0675) acc 81.2500 (72.9500) lr 1.6845e-03 eta 4:25:27 +epoch [15/50] batch [130/500] time 0.881 (0.891) data 0.000 (0.005) loss 0.8574 (1.0758) acc 75.0000 (72.9087) lr 1.6845e-03 eta 4:25:16 +epoch [15/50] batch [135/500] time 0.870 (0.890) data 0.000 (0.005) loss 1.2275 (1.0857) acc 78.1250 (72.9167) lr 1.6845e-03 eta 4:25:00 +epoch [15/50] batch [140/500] time 0.868 (0.890) data 0.000 (0.005) loss 0.3828 (1.0766) acc 96.8750 (73.1473) lr 1.6845e-03 eta 4:25:03 +epoch [15/50] batch [145/500] time 0.896 (0.891) data 0.000 (0.005) loss 0.8711 (1.0717) acc 71.8750 (73.2543) lr 1.6845e-03 eta 4:25:03 +epoch [15/50] batch [150/500] time 0.882 (0.891) data 0.000 (0.005) loss 0.7363 (1.0785) acc 81.2500 (73.2292) lr 1.6845e-03 eta 4:24:55 +epoch [15/50] batch [155/500] time 0.895 (0.891) data 0.000 (0.004) loss 1.3428 (1.0776) acc 65.6250 (73.1855) lr 1.6845e-03 eta 4:24:56 +epoch [15/50] batch [160/500] time 0.867 (0.891) data 0.000 (0.004) loss 0.9268 (1.0754) acc 71.8750 (73.2227) lr 1.6845e-03 eta 4:24:51 +epoch [15/50] batch [165/500] time 0.897 (0.890) data 0.000 (0.004) loss 1.2236 (1.0751) acc 62.5000 (73.2008) lr 1.6845e-03 eta 4:24:34 +epoch [15/50] batch [170/500] time 0.896 (0.890) data 0.000 (0.004) loss 1.0166 (1.0820) acc 81.2500 (73.2169) lr 1.6845e-03 eta 4:24:28 +epoch [15/50] batch [175/500] time 0.869 (0.890) data 0.000 (0.004) loss 1.0576 (1.0753) acc 71.8750 (73.3036) lr 1.6845e-03 eta 4:24:17 +epoch [15/50] batch [180/500] time 0.888 (0.890) data 0.000 (0.004) loss 1.6201 (1.0793) acc 75.0000 (73.2639) lr 1.6845e-03 eta 4:24:16 +epoch [15/50] batch [185/500] time 0.882 (0.890) data 0.000 (0.004) loss 1.0312 (1.0797) acc 78.1250 (73.2770) lr 1.6845e-03 eta 4:24:14 +epoch [15/50] batch [190/500] time 0.860 (0.890) data 0.000 (0.004) loss 1.0283 (1.0834) acc 71.8750 (73.1908) lr 1.6845e-03 eta 4:24:02 +epoch [15/50] batch [195/500] time 0.874 (0.889) data 0.000 (0.004) loss 1.4375 (1.0862) acc 59.3750 (73.1250) lr 1.6845e-03 eta 4:23:56 +epoch [15/50] batch [200/500] time 0.884 (0.889) data 0.000 (0.004) loss 0.7778 (1.0906) acc 75.0000 (73.1094) lr 1.6845e-03 eta 4:23:48 +epoch [15/50] batch [205/500] time 0.872 (0.889) data 0.000 (0.003) loss 0.6929 (1.0867) acc 81.2500 (73.1707) lr 1.6845e-03 eta 4:23:33 +epoch [15/50] batch [210/500] time 0.888 (0.888) data 0.000 (0.003) loss 1.5029 (1.0840) acc 62.5000 (73.1845) lr 1.6845e-03 eta 4:23:26 +epoch [15/50] batch [215/500] time 0.866 (0.888) data 0.000 (0.003) loss 1.0186 (1.0871) acc 78.1250 (73.1105) lr 1.6845e-03 eta 4:23:18 +epoch [15/50] batch [220/500] time 0.890 (0.888) data 0.000 (0.003) loss 1.3145 (1.0846) acc 65.6250 (73.1960) lr 1.6845e-03 eta 4:23:09 +epoch [15/50] batch [225/500] time 0.879 (0.888) data 0.000 (0.003) loss 0.9473 (1.0830) acc 75.0000 (73.1667) lr 1.6845e-03 eta 4:23:04 +epoch [15/50] batch [230/500] time 0.872 (0.888) data 0.000 (0.003) loss 1.4033 (1.0870) acc 65.6250 (73.1658) lr 1.6845e-03 eta 4:22:57 +epoch [15/50] batch [235/500] time 0.899 (0.888) data 0.000 (0.003) loss 1.1631 (1.0852) acc 71.8750 (73.2580) lr 1.6845e-03 eta 4:22:49 +epoch [15/50] batch [240/500] time 0.920 (0.888) data 0.000 (0.003) loss 0.6445 (1.0807) acc 78.1250 (73.3464) lr 1.6845e-03 eta 4:22:44 +epoch [15/50] batch [245/500] time 0.871 (0.887) data 0.000 (0.003) loss 0.8940 (1.0776) acc 75.0000 (73.3291) lr 1.6845e-03 eta 4:22:33 +epoch [15/50] batch [250/500] time 0.889 (0.887) data 0.000 (0.003) loss 1.2295 (1.0808) acc 75.0000 (73.2375) lr 1.6845e-03 eta 4:22:24 +epoch [15/50] batch [255/500] time 0.892 (0.887) data 0.000 (0.003) loss 1.2256 (1.0861) acc 71.8750 (73.1863) lr 1.6845e-03 eta 4:22:21 +epoch [15/50] batch [260/500] time 0.867 (0.887) data 0.000 (0.003) loss 0.8296 (1.0865) acc 59.3750 (73.0288) lr 1.6845e-03 eta 4:22:20 +epoch [15/50] batch [265/500] time 0.887 (0.887) data 0.000 (0.003) loss 1.3955 (1.0889) acc 65.6250 (72.8892) lr 1.6845e-03 eta 4:22:13 +epoch [15/50] batch [270/500] time 0.882 (0.887) data 0.000 (0.003) loss 1.5771 (1.0930) acc 59.3750 (72.8588) lr 1.6845e-03 eta 4:22:09 +epoch [15/50] batch [275/500] time 0.849 (0.887) data 0.000 (0.003) loss 0.8682 (1.0907) acc 75.0000 (72.9318) lr 1.6845e-03 eta 4:22:01 +epoch [15/50] batch [280/500] time 0.895 (0.887) data 0.000 (0.003) loss 1.3838 (1.0907) acc 71.8750 (72.9241) lr 1.6845e-03 eta 4:21:54 +epoch [15/50] batch [285/500] time 0.905 (0.887) data 0.000 (0.003) loss 1.4209 (1.0885) acc 65.6250 (72.9715) lr 1.6845e-03 eta 4:21:58 +epoch [15/50] batch [290/500] time 0.896 (0.887) data 0.000 (0.003) loss 0.9106 (1.0849) acc 71.8750 (73.0065) lr 1.6845e-03 eta 4:21:55 +epoch [15/50] batch [295/500] time 0.907 (0.888) data 0.000 (0.002) loss 1.1797 (1.0855) acc 68.7500 (72.9449) lr 1.6845e-03 eta 4:21:55 +epoch [15/50] batch [300/500] time 0.877 (0.888) data 0.000 (0.002) loss 0.9868 (1.0856) acc 78.1250 (72.9583) lr 1.6845e-03 eta 4:21:51 +epoch [15/50] batch [305/500] time 0.869 (0.888) data 0.000 (0.002) loss 1.1807 (1.0849) acc 71.8750 (72.9406) lr 1.6845e-03 eta 4:21:48 +epoch [15/50] batch [310/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.5479 (1.0878) acc 59.3750 (72.8931) lr 1.6845e-03 eta 4:21:42 +epoch [15/50] batch [315/500] time 0.889 (0.888) data 0.000 (0.002) loss 1.0098 (1.0857) acc 78.1250 (72.9266) lr 1.6845e-03 eta 4:21:37 +epoch [15/50] batch [320/500] time 0.884 (0.888) data 0.000 (0.002) loss 1.2373 (1.0862) acc 78.1250 (72.9395) lr 1.6845e-03 eta 4:21:34 +epoch [15/50] batch [325/500] time 0.878 (0.888) data 0.000 (0.002) loss 1.1807 (1.0866) acc 78.1250 (72.9615) lr 1.6845e-03 eta 4:21:30 +epoch [15/50] batch [330/500] time 0.895 (0.888) data 0.000 (0.002) loss 1.3213 (1.0852) acc 62.5000 (72.9451) lr 1.6845e-03 eta 4:21:32 +epoch [15/50] batch [335/500] time 0.855 (0.888) data 0.000 (0.002) loss 1.6055 (1.0882) acc 56.2500 (72.8265) lr 1.6845e-03 eta 4:21:28 +epoch [15/50] batch [340/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.0293 (1.0863) acc 75.0000 (72.8768) lr 1.6845e-03 eta 4:21:22 +epoch [15/50] batch [345/500] time 0.902 (0.888) data 0.000 (0.002) loss 1.0742 (1.0856) acc 68.7500 (72.8804) lr 1.6845e-03 eta 4:21:19 +epoch [15/50] batch [350/500] time 0.900 (0.888) data 0.000 (0.002) loss 0.7861 (1.0869) acc 71.8750 (72.7857) lr 1.6845e-03 eta 4:21:16 +epoch [15/50] batch [355/500] time 0.877 (0.888) data 0.000 (0.002) loss 1.0029 (1.0879) acc 78.1250 (72.8257) lr 1.6845e-03 eta 4:21:10 +epoch [15/50] batch [360/500] time 0.850 (0.888) data 0.000 (0.002) loss 1.1846 (1.0867) acc 59.3750 (72.8125) lr 1.6845e-03 eta 4:21:02 +epoch [15/50] batch [365/500] time 0.874 (0.888) data 0.000 (0.002) loss 1.4336 (1.0893) acc 68.7500 (72.7055) lr 1.6845e-03 eta 4:20:55 +epoch [15/50] batch [370/500] time 0.877 (0.888) data 0.000 (0.002) loss 1.1328 (1.0891) acc 81.2500 (72.7534) lr 1.6845e-03 eta 4:20:47 +epoch [15/50] batch [375/500] time 0.902 (0.888) data 0.000 (0.002) loss 1.0811 (1.0907) acc 81.2500 (72.7417) lr 1.6845e-03 eta 4:20:42 +epoch [15/50] batch [380/500] time 0.870 (0.888) data 0.000 (0.002) loss 0.5972 (1.0881) acc 84.3750 (72.8125) lr 1.6845e-03 eta 4:20:38 +epoch [15/50] batch [385/500] time 0.911 (0.887) data 0.000 (0.002) loss 1.4990 (1.0912) acc 65.6250 (72.7354) lr 1.6845e-03 eta 4:20:33 +epoch [15/50] batch [390/500] time 0.888 (0.887) data 0.000 (0.002) loss 0.8008 (1.0903) acc 71.8750 (72.7244) lr 1.6845e-03 eta 4:20:27 +epoch [15/50] batch [395/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.5898 (1.0934) acc 62.5000 (72.6503) lr 1.6845e-03 eta 4:20:20 +epoch [15/50] batch [400/500] time 0.875 (0.887) data 0.000 (0.002) loss 0.9600 (1.0942) acc 68.7500 (72.6094) lr 1.6845e-03 eta 4:20:12 +epoch [15/50] batch [405/500] time 0.884 (0.887) data 0.000 (0.002) loss 0.7437 (1.0928) acc 84.3750 (72.6389) lr 1.6845e-03 eta 4:20:06 +epoch [15/50] batch [410/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.1338 (1.0924) acc 71.8750 (72.6524) lr 1.6845e-03 eta 4:20:00 +epoch [15/50] batch [415/500] time 0.911 (0.887) data 0.000 (0.002) loss 0.8740 (1.0908) acc 75.0000 (72.6958) lr 1.6845e-03 eta 4:19:56 +epoch [15/50] batch [420/500] time 0.846 (0.887) data 0.000 (0.002) loss 1.3721 (1.0931) acc 68.7500 (72.6116) lr 1.6845e-03 eta 4:19:49 +epoch [15/50] batch [425/500] time 1.010 (0.887) data 0.000 (0.002) loss 2.2676 (1.0953) acc 50.0000 (72.5809) lr 1.6845e-03 eta 4:19:48 +epoch [15/50] batch [430/500] time 0.864 (0.887) data 0.000 (0.002) loss 1.6514 (1.0978) acc 65.6250 (72.5872) lr 1.6845e-03 eta 4:19:42 +epoch [15/50] batch [435/500] time 0.899 (0.887) data 0.000 (0.002) loss 0.8052 (1.0976) acc 81.2500 (72.6078) lr 1.6845e-03 eta 4:19:37 +epoch [15/50] batch [440/500] time 0.909 (0.887) data 0.000 (0.002) loss 0.9302 (1.0963) acc 81.2500 (72.6562) lr 1.6845e-03 eta 4:19:34 +epoch [15/50] batch [445/500] time 0.882 (0.887) data 0.000 (0.002) loss 1.0811 (1.0953) acc 75.0000 (72.6756) lr 1.6845e-03 eta 4:19:29 +epoch [15/50] batch [450/500] time 0.933 (0.887) data 0.000 (0.002) loss 2.1367 (1.1005) acc 65.6250 (72.6528) lr 1.6845e-03 eta 4:19:29 +epoch [15/50] batch [455/500] time 0.898 (0.887) data 0.000 (0.002) loss 0.6704 (1.0997) acc 87.5000 (72.7060) lr 1.6845e-03 eta 4:19:27 +epoch [15/50] batch [460/500] time 0.898 (0.887) data 0.000 (0.002) loss 0.8359 (1.0991) acc 75.0000 (72.6766) lr 1.6845e-03 eta 4:19:23 +epoch [15/50] batch [465/500] time 0.863 (0.887) data 0.000 (0.002) loss 1.4551 (1.0988) acc 53.1250 (72.6210) lr 1.6845e-03 eta 4:19:18 +epoch [15/50] batch [470/500] time 0.892 (0.887) data 0.000 (0.002) loss 0.9980 (1.0969) acc 75.0000 (72.6463) lr 1.6845e-03 eta 4:19:15 +epoch [15/50] batch [475/500] time 0.908 (0.887) data 0.000 (0.002) loss 0.8608 (1.0978) acc 75.0000 (72.6513) lr 1.6845e-03 eta 4:19:12 +epoch [15/50] batch [480/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.5615 (1.0969) acc 68.7500 (72.6758) lr 1.6845e-03 eta 4:19:08 +epoch [15/50] batch [485/500] time 0.876 (0.887) data 0.000 (0.002) loss 0.4067 (1.1003) acc 87.5000 (72.6095) lr 1.6845e-03 eta 4:19:02 +epoch [15/50] batch [490/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.0117 (1.0990) acc 75.0000 (72.6467) lr 1.6845e-03 eta 4:18:58 +epoch [15/50] batch [495/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.0674 (1.0972) acc 84.3750 (72.7083) lr 1.6845e-03 eta 4:18:53 +epoch [15/50] batch [500/500] time 0.901 (0.887) data 0.000 (0.002) loss 1.0254 (1.0973) acc 68.7500 (72.6688) lr 1.6374e-03 eta 4:18:48 +epoch [16/50] batch [5/500] time 0.885 (1.034) data 0.000 (0.141) loss 1.2461 (1.3461) acc 62.5000 (65.6250) lr 1.6374e-03 eta 5:01:31 +epoch [16/50] batch [10/500] time 0.901 (0.960) data 0.000 (0.070) loss 1.2344 (1.2266) acc 68.7500 (69.3750) lr 1.6374e-03 eta 4:39:43 +epoch [16/50] batch [15/500] time 0.876 (0.939) data 0.000 (0.047) loss 1.1963 (1.1723) acc 68.7500 (70.4167) lr 1.6374e-03 eta 4:33:38 +epoch [16/50] batch [20/500] time 0.897 (0.923) data 0.000 (0.035) loss 1.1328 (1.1716) acc 65.6250 (71.2500) lr 1.6374e-03 eta 4:28:50 +epoch [16/50] batch [25/500] time 0.872 (0.913) data 0.000 (0.028) loss 1.5430 (1.1544) acc 68.7500 (71.5000) lr 1.6374e-03 eta 4:25:47 +epoch [16/50] batch [30/500] time 0.881 (0.908) data 0.000 (0.024) loss 0.6021 (1.1285) acc 71.8750 (71.0417) lr 1.6374e-03 eta 4:24:29 +epoch [16/50] batch [35/500] time 0.877 (0.905) data 0.000 (0.020) loss 0.9473 (1.1434) acc 75.0000 (71.3393) lr 1.6374e-03 eta 4:23:24 +epoch [16/50] batch [40/500] time 0.880 (0.904) data 0.000 (0.018) loss 0.5894 (1.1076) acc 75.0000 (71.4062) lr 1.6374e-03 eta 4:23:06 +epoch [16/50] batch [45/500] time 0.875 (0.902) data 0.000 (0.016) loss 0.9888 (1.0974) acc 75.0000 (71.6667) lr 1.6374e-03 eta 4:22:25 +epoch [16/50] batch [50/500] time 0.884 (0.900) data 0.000 (0.014) loss 0.7432 (1.1013) acc 68.7500 (71.5625) lr 1.6374e-03 eta 4:21:53 +epoch [16/50] batch [55/500] time 0.911 (0.900) data 0.000 (0.013) loss 1.7637 (1.1016) acc 71.8750 (72.1023) lr 1.6374e-03 eta 4:21:31 +epoch [16/50] batch [60/500] time 0.875 (0.898) data 0.000 (0.012) loss 1.1680 (1.1071) acc 71.8750 (71.9792) lr 1.6374e-03 eta 4:21:06 +epoch [16/50] batch [65/500] time 0.915 (0.898) data 0.000 (0.011) loss 1.0098 (1.1011) acc 65.6250 (72.1154) lr 1.6374e-03 eta 4:20:53 +epoch [16/50] batch [70/500] time 0.896 (0.897) data 0.000 (0.010) loss 0.6885 (1.0965) acc 84.3750 (72.1875) lr 1.6374e-03 eta 4:20:39 +epoch [16/50] batch [75/500] time 0.866 (0.896) data 0.000 (0.010) loss 0.7988 (1.0797) acc 78.1250 (72.5417) lr 1.6374e-03 eta 4:20:18 +epoch [16/50] batch [80/500] time 0.882 (0.896) data 0.000 (0.009) loss 1.2510 (1.0794) acc 75.0000 (72.5781) lr 1.6374e-03 eta 4:20:10 +epoch [16/50] batch [85/500] time 0.900 (0.896) data 0.000 (0.008) loss 1.0586 (1.0742) acc 78.1250 (72.6103) lr 1.6374e-03 eta 4:19:58 +epoch [16/50] batch [90/500] time 0.894 (0.895) data 0.000 (0.008) loss 1.4785 (1.0758) acc 56.2500 (72.3611) lr 1.6374e-03 eta 4:19:42 +epoch [16/50] batch [95/500] time 0.894 (0.895) data 0.000 (0.008) loss 0.6641 (1.0802) acc 78.1250 (72.3355) lr 1.6374e-03 eta 4:19:32 +epoch [16/50] batch [100/500] time 0.894 (0.895) data 0.000 (0.007) loss 1.3770 (1.0775) acc 59.3750 (72.1875) lr 1.6374e-03 eta 4:19:27 +epoch [16/50] batch [105/500] time 0.904 (0.895) data 0.000 (0.007) loss 1.0049 (1.0839) acc 75.0000 (72.2024) lr 1.6374e-03 eta 4:19:22 +epoch [16/50] batch [110/500] time 0.879 (0.896) data 0.000 (0.007) loss 1.0234 (1.0832) acc 68.7500 (72.2159) lr 1.6374e-03 eta 4:19:35 +epoch [16/50] batch [115/500] time 0.907 (0.895) data 0.000 (0.006) loss 1.0166 (1.0840) acc 68.7500 (72.1467) lr 1.6374e-03 eta 4:19:19 +epoch [16/50] batch [120/500] time 0.878 (0.894) data 0.000 (0.006) loss 0.7974 (1.0763) acc 81.2500 (72.3177) lr 1.6374e-03 eta 4:19:05 +epoch [16/50] batch [125/500] time 0.911 (0.895) data 0.000 (0.006) loss 1.0840 (1.0796) acc 71.8750 (72.1500) lr 1.6374e-03 eta 4:19:02 +epoch [16/50] batch [130/500] time 0.895 (0.894) data 0.000 (0.006) loss 1.3945 (1.0899) acc 68.7500 (72.0673) lr 1.6374e-03 eta 4:18:56 +epoch [16/50] batch [135/500] time 0.890 (0.894) data 0.000 (0.005) loss 0.5981 (1.0880) acc 81.2500 (72.0139) lr 1.6374e-03 eta 4:18:44 +epoch [16/50] batch [140/500] time 0.862 (0.894) data 0.000 (0.005) loss 0.8384 (1.0908) acc 75.0000 (72.0089) lr 1.6374e-03 eta 4:18:31 +epoch [16/50] batch [145/500] time 0.866 (0.893) data 0.000 (0.005) loss 1.3496 (1.0970) acc 78.1250 (72.0690) lr 1.6374e-03 eta 4:18:23 +epoch [16/50] batch [150/500] time 0.898 (0.894) data 0.000 (0.005) loss 1.3838 (1.0997) acc 68.7500 (71.8542) lr 1.6374e-03 eta 4:18:28 +epoch [16/50] batch [155/500] time 0.882 (0.894) data 0.000 (0.005) loss 1.6895 (1.1018) acc 59.3750 (71.7339) lr 1.6374e-03 eta 4:18:28 +epoch [16/50] batch [160/500] time 0.884 (0.894) data 0.000 (0.005) loss 1.3125 (1.1058) acc 59.3750 (71.6602) lr 1.6374e-03 eta 4:18:15 +epoch [16/50] batch [165/500] time 0.908 (0.894) data 0.000 (0.004) loss 1.4629 (1.1151) acc 62.5000 (71.6098) lr 1.6374e-03 eta 4:18:14 +epoch [16/50] batch [170/500] time 0.900 (0.893) data 0.000 (0.004) loss 1.2871 (1.1168) acc 65.6250 (71.5809) lr 1.6374e-03 eta 4:18:03 +epoch [16/50] batch [175/500] time 0.903 (0.894) data 0.000 (0.004) loss 1.4375 (1.1196) acc 68.7500 (71.4464) lr 1.6374e-03 eta 4:18:01 +epoch [16/50] batch [180/500] time 0.897 (0.893) data 0.000 (0.004) loss 0.8311 (1.1184) acc 75.0000 (71.4757) lr 1.6374e-03 eta 4:17:52 +epoch [16/50] batch [185/500] time 0.854 (0.893) data 0.000 (0.004) loss 0.9077 (1.1187) acc 68.7500 (71.4189) lr 1.6374e-03 eta 4:17:39 +epoch [16/50] batch [190/500] time 0.883 (0.893) data 0.000 (0.004) loss 0.6948 (1.1156) acc 75.0000 (71.4145) lr 1.6374e-03 eta 4:17:29 +epoch [16/50] batch [195/500] time 0.881 (0.892) data 0.000 (0.004) loss 1.1807 (1.1193) acc 65.6250 (71.2981) lr 1.6374e-03 eta 4:17:23 +epoch [16/50] batch [200/500] time 0.875 (0.892) data 0.000 (0.004) loss 1.3203 (1.1193) acc 71.8750 (71.2500) lr 1.6374e-03 eta 4:17:14 +epoch [16/50] batch [205/500] time 0.873 (0.892) data 0.000 (0.004) loss 1.1133 (1.1216) acc 68.7500 (71.2805) lr 1.6374e-03 eta 4:17:03 +epoch [16/50] batch [210/500] time 0.894 (0.892) data 0.000 (0.004) loss 1.3574 (1.1226) acc 62.5000 (71.2798) lr 1.6374e-03 eta 4:17:01 +epoch [16/50] batch [215/500] time 0.883 (0.892) data 0.000 (0.004) loss 0.8247 (1.1229) acc 81.2500 (71.2064) lr 1.6374e-03 eta 4:16:56 +epoch [16/50] batch [220/500] time 0.861 (0.892) data 0.000 (0.003) loss 1.0957 (1.1226) acc 59.3750 (71.1222) lr 1.6374e-03 eta 4:16:45 +epoch [16/50] batch [225/500] time 0.868 (0.891) data 0.000 (0.003) loss 0.8511 (1.1183) acc 75.0000 (71.2639) lr 1.6374e-03 eta 4:16:35 +epoch [16/50] batch [230/500] time 0.867 (0.891) data 0.000 (0.003) loss 0.6709 (1.1138) acc 84.3750 (71.3315) lr 1.6374e-03 eta 4:16:24 +epoch [16/50] batch [235/500] time 0.916 (0.891) data 0.000 (0.003) loss 1.2002 (1.1199) acc 65.6250 (71.2101) lr 1.6374e-03 eta 4:16:18 +epoch [16/50] batch [240/500] time 0.908 (0.891) data 0.000 (0.003) loss 1.5879 (1.1225) acc 62.5000 (71.1849) lr 1.6374e-03 eta 4:16:14 +epoch [16/50] batch [245/500] time 0.880 (0.891) data 0.000 (0.003) loss 1.3018 (1.1231) acc 71.8750 (71.1607) lr 1.6374e-03 eta 4:16:05 +epoch [16/50] batch [250/500] time 0.883 (0.890) data 0.000 (0.003) loss 0.7583 (1.1194) acc 81.2500 (71.2750) lr 1.6374e-03 eta 4:15:58 +epoch [16/50] batch [255/500] time 0.877 (0.891) data 0.000 (0.003) loss 0.9814 (1.1225) acc 75.0000 (71.2868) lr 1.6374e-03 eta 4:15:58 +epoch [16/50] batch [260/500] time 0.883 (0.891) data 0.000 (0.003) loss 0.8770 (1.1218) acc 71.8750 (71.3221) lr 1.6374e-03 eta 4:15:54 +epoch [16/50] batch [265/500] time 0.897 (0.891) data 0.000 (0.003) loss 1.3350 (1.1227) acc 59.3750 (71.2736) lr 1.6374e-03 eta 4:15:53 +epoch [16/50] batch [270/500] time 0.895 (0.891) data 0.000 (0.003) loss 0.6240 (1.1196) acc 81.2500 (71.3773) lr 1.6374e-03 eta 4:15:47 +epoch [16/50] batch [275/500] time 0.868 (0.890) data 0.000 (0.003) loss 0.7100 (1.1209) acc 78.1250 (71.3523) lr 1.6374e-03 eta 4:15:34 +epoch [16/50] batch [280/500] time 0.877 (0.890) data 0.000 (0.003) loss 1.4043 (1.1215) acc 68.7500 (71.3393) lr 1.6374e-03 eta 4:15:28 +epoch [16/50] batch [285/500] time 0.894 (0.890) data 0.000 (0.003) loss 1.1211 (1.1205) acc 75.0000 (71.4035) lr 1.6374e-03 eta 4:15:21 +epoch [16/50] batch [290/500] time 0.907 (0.890) data 0.000 (0.003) loss 1.1924 (1.1215) acc 71.8750 (71.3578) lr 1.6374e-03 eta 4:15:13 +epoch [16/50] batch [295/500] time 0.999 (0.890) data 0.000 (0.003) loss 1.2617 (1.1189) acc 65.6250 (71.3665) lr 1.6374e-03 eta 4:15:12 +epoch [16/50] batch [300/500] time 0.869 (0.890) data 0.000 (0.003) loss 1.2793 (1.1168) acc 71.8750 (71.4167) lr 1.6374e-03 eta 4:15:04 +epoch [16/50] batch [305/500] time 0.896 (0.890) data 0.000 (0.003) loss 0.7852 (1.1127) acc 65.6250 (71.4754) lr 1.6374e-03 eta 4:14:59 +epoch [16/50] batch [310/500] time 0.861 (0.889) data 0.000 (0.002) loss 1.4375 (1.1164) acc 68.7500 (71.3911) lr 1.6374e-03 eta 4:14:49 +epoch [16/50] batch [315/500] time 0.887 (0.889) data 0.000 (0.002) loss 0.9224 (1.1166) acc 78.1250 (71.3988) lr 1.6374e-03 eta 4:14:42 +epoch [16/50] batch [320/500] time 0.901 (0.889) data 0.000 (0.002) loss 1.7891 (1.1130) acc 56.2500 (71.4648) lr 1.6374e-03 eta 4:14:35 +epoch [16/50] batch [325/500] time 0.904 (0.889) data 0.000 (0.002) loss 0.4253 (1.1127) acc 87.5000 (71.5096) lr 1.6374e-03 eta 4:14:28 +epoch [16/50] batch [330/500] time 0.862 (0.889) data 0.000 (0.002) loss 0.8926 (1.1128) acc 62.5000 (71.4678) lr 1.6374e-03 eta 4:14:16 +epoch [16/50] batch [335/500] time 0.894 (0.889) data 0.000 (0.002) loss 0.7173 (1.1136) acc 90.6250 (71.5205) lr 1.6374e-03 eta 4:14:11 +epoch [16/50] batch [340/500] time 0.884 (0.889) data 0.000 (0.002) loss 1.6436 (1.1137) acc 56.2500 (71.5257) lr 1.6374e-03 eta 4:14:07 +epoch [16/50] batch [345/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.2549 (1.1126) acc 71.8750 (71.5670) lr 1.6374e-03 eta 4:14:01 +epoch [16/50] batch [350/500] time 0.886 (0.888) data 0.000 (0.002) loss 1.5557 (1.1131) acc 65.6250 (71.5714) lr 1.6374e-03 eta 4:13:54 +epoch [16/50] batch [355/500] time 0.864 (0.888) data 0.000 (0.002) loss 1.9551 (1.1156) acc 53.1250 (71.4789) lr 1.6374e-03 eta 4:13:43 +epoch [16/50] batch [360/500] time 0.882 (0.888) data 0.000 (0.002) loss 1.0625 (1.1161) acc 75.0000 (71.4670) lr 1.6374e-03 eta 4:13:37 +epoch [16/50] batch [365/500] time 0.881 (0.888) data 0.000 (0.002) loss 0.8833 (1.1164) acc 75.0000 (71.4897) lr 1.6374e-03 eta 4:13:32 +epoch [16/50] batch [370/500] time 0.851 (0.888) data 0.000 (0.002) loss 0.8882 (1.1148) acc 68.7500 (71.4949) lr 1.6374e-03 eta 4:13:22 +epoch [16/50] batch [375/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.1064 (1.1163) acc 65.6250 (71.4000) lr 1.6374e-03 eta 4:13:18 +epoch [16/50] batch [380/500] time 0.871 (0.887) data 0.000 (0.002) loss 0.6938 (1.1138) acc 84.3750 (71.4638) lr 1.6374e-03 eta 4:13:11 +epoch [16/50] batch [385/500] time 0.867 (0.887) data 0.000 (0.002) loss 1.1143 (1.1153) acc 78.1250 (71.5097) lr 1.6374e-03 eta 4:13:04 +epoch [16/50] batch [390/500] time 0.852 (0.887) data 0.000 (0.002) loss 0.7358 (1.1163) acc 81.2500 (71.5625) lr 1.6374e-03 eta 4:12:58 +epoch [16/50] batch [395/500] time 0.858 (0.887) data 0.000 (0.002) loss 0.9858 (1.1166) acc 65.6250 (71.5665) lr 1.6374e-03 eta 4:12:55 +epoch [16/50] batch [400/500] time 0.861 (0.887) data 0.000 (0.002) loss 1.0166 (1.1162) acc 68.7500 (71.5781) lr 1.6374e-03 eta 4:12:47 +epoch [16/50] batch [405/500] time 0.875 (0.887) data 0.000 (0.002) loss 1.6729 (1.1171) acc 59.3750 (71.5509) lr 1.6374e-03 eta 4:12:41 +epoch [16/50] batch [410/500] time 0.890 (0.887) data 0.000 (0.002) loss 1.4619 (1.1173) acc 59.3750 (71.4939) lr 1.6374e-03 eta 4:12:37 +epoch [16/50] batch [415/500] time 0.887 (0.887) data 0.000 (0.002) loss 0.6846 (1.1161) acc 78.1250 (71.5286) lr 1.6374e-03 eta 4:12:34 +epoch [16/50] batch [420/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.2891 (1.1161) acc 68.7500 (71.5848) lr 1.6374e-03 eta 4:12:29 +epoch [16/50] batch [425/500] time 0.909 (0.887) data 0.000 (0.002) loss 0.8501 (1.1128) acc 75.0000 (71.6471) lr 1.6374e-03 eta 4:12:26 +epoch [16/50] batch [430/500] time 0.871 (0.887) data 0.000 (0.002) loss 0.8711 (1.1107) acc 78.1250 (71.7006) lr 1.6374e-03 eta 4:12:24 +epoch [16/50] batch [435/500] time 0.893 (0.887) data 0.000 (0.002) loss 0.6089 (1.1107) acc 75.0000 (71.7026) lr 1.6374e-03 eta 4:12:21 +epoch [16/50] batch [440/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.4062 (1.1118) acc 62.5000 (71.6548) lr 1.6374e-03 eta 4:12:20 +epoch [16/50] batch [445/500] time 0.894 (0.887) data 0.000 (0.002) loss 0.7280 (1.1121) acc 84.3750 (71.6784) lr 1.6374e-03 eta 4:12:12 +epoch [16/50] batch [450/500] time 0.890 (0.887) data 0.000 (0.002) loss 1.2490 (1.1109) acc 71.8750 (71.6875) lr 1.6374e-03 eta 4:12:05 +epoch [16/50] batch [455/500] time 0.857 (0.887) data 0.000 (0.002) loss 1.0840 (1.1112) acc 68.7500 (71.7102) lr 1.6374e-03 eta 4:11:58 +epoch [16/50] batch [460/500] time 0.870 (0.887) data 0.000 (0.002) loss 1.8008 (1.1144) acc 56.2500 (71.6576) lr 1.6374e-03 eta 4:11:50 +epoch [16/50] batch [465/500] time 0.862 (0.887) data 0.000 (0.002) loss 0.7773 (1.1134) acc 75.0000 (71.6868) lr 1.6374e-03 eta 4:11:43 +epoch [16/50] batch [470/500] time 0.914 (0.887) data 0.000 (0.002) loss 1.3418 (1.1153) acc 59.3750 (71.6223) lr 1.6374e-03 eta 4:11:38 +epoch [16/50] batch [475/500] time 0.882 (0.887) data 0.000 (0.002) loss 2.0469 (1.1166) acc 50.0000 (71.5921) lr 1.6374e-03 eta 4:11:34 +epoch [16/50] batch [480/500] time 0.856 (0.886) data 0.000 (0.002) loss 1.0205 (1.1181) acc 75.0000 (71.5625) lr 1.6374e-03 eta 4:11:27 +epoch [16/50] batch [485/500] time 0.878 (0.886) data 0.000 (0.002) loss 1.1826 (1.1179) acc 78.1250 (71.5915) lr 1.6374e-03 eta 4:11:21 +epoch [16/50] batch [490/500] time 0.874 (0.886) data 0.000 (0.002) loss 1.1348 (1.1160) acc 68.7500 (71.6263) lr 1.6374e-03 eta 4:11:14 +epoch [16/50] batch [495/500] time 0.902 (0.886) data 0.000 (0.002) loss 1.2002 (1.1148) acc 78.1250 (71.6667) lr 1.6374e-03 eta 4:11:07 +epoch [16/50] batch [500/500] time 0.879 (0.886) data 0.000 (0.002) loss 1.6836 (1.1148) acc 62.5000 (71.7000) lr 1.5878e-03 eta 4:11:02 +epoch [17/50] batch [5/500] time 0.868 (1.008) data 0.000 (0.125) loss 0.8335 (1.0637) acc 81.2500 (75.0000) lr 1.5878e-03 eta 4:45:32 +epoch [17/50] batch [10/500] time 0.908 (0.944) data 0.000 (0.063) loss 1.0566 (1.0037) acc 78.1250 (75.6250) lr 1.5878e-03 eta 4:27:17 +epoch [17/50] batch [15/500] time 0.901 (0.926) data 0.000 (0.042) loss 0.5659 (0.9962) acc 90.6250 (76.0417) lr 1.5878e-03 eta 4:22:09 +epoch [17/50] batch [20/500] time 0.858 (0.913) data 0.000 (0.031) loss 1.2256 (1.0416) acc 81.2500 (74.5312) lr 1.5878e-03 eta 4:18:24 +epoch [17/50] batch [25/500] time 0.979 (0.909) data 0.000 (0.025) loss 0.7305 (1.0387) acc 84.3750 (74.6250) lr 1.5878e-03 eta 4:17:09 +epoch [17/50] batch [30/500] time 0.903 (0.906) data 0.000 (0.021) loss 1.7793 (1.0779) acc 62.5000 (73.7500) lr 1.5878e-03 eta 4:16:10 +epoch [17/50] batch [35/500] time 0.894 (0.905) data 0.000 (0.018) loss 1.4375 (1.1018) acc 59.3750 (72.6786) lr 1.5878e-03 eta 4:15:52 +epoch [17/50] batch [40/500] time 0.882 (0.902) data 0.000 (0.016) loss 1.5078 (1.1082) acc 65.6250 (73.0469) lr 1.5878e-03 eta 4:14:57 +epoch [17/50] batch [45/500] time 0.886 (0.899) data 0.000 (0.014) loss 0.7080 (1.1020) acc 78.1250 (72.7778) lr 1.5878e-03 eta 4:14:07 +epoch [17/50] batch [50/500] time 0.889 (0.897) data 0.000 (0.013) loss 0.9805 (1.1115) acc 75.0000 (72.7500) lr 1.5878e-03 eta 4:13:28 +epoch [17/50] batch [55/500] time 0.877 (0.897) data 0.000 (0.012) loss 1.0625 (1.1059) acc 78.1250 (73.0682) lr 1.5878e-03 eta 4:13:13 +epoch [17/50] batch [60/500] time 0.860 (0.894) data 0.000 (0.011) loss 0.4937 (1.0964) acc 87.5000 (73.3333) lr 1.5878e-03 eta 4:12:30 +epoch [17/50] batch [65/500] time 0.893 (0.893) data 0.000 (0.010) loss 1.0332 (1.0869) acc 68.7500 (73.2212) lr 1.5878e-03 eta 4:12:08 +epoch [17/50] batch [70/500] time 0.865 (0.892) data 0.000 (0.009) loss 1.0869 (1.0896) acc 68.7500 (72.9911) lr 1.5878e-03 eta 4:11:38 +epoch [17/50] batch [75/500] time 0.931 (0.893) data 0.000 (0.009) loss 1.0293 (1.0841) acc 71.8750 (73.0417) lr 1.5878e-03 eta 4:11:54 +epoch [17/50] batch [80/500] time 0.897 (0.893) data 0.000 (0.008) loss 1.4150 (1.0973) acc 68.7500 (72.5781) lr 1.5878e-03 eta 4:11:50 +epoch [17/50] batch [85/500] time 0.984 (0.895) data 0.000 (0.008) loss 1.3525 (1.1001) acc 65.6250 (72.6103) lr 1.5878e-03 eta 4:12:13 +epoch [17/50] batch [90/500] time 0.874 (0.894) data 0.000 (0.007) loss 1.0762 (1.0891) acc 78.1250 (72.7431) lr 1.5878e-03 eta 4:12:03 +epoch [17/50] batch [95/500] time 0.874 (0.894) data 0.000 (0.007) loss 1.2500 (1.0901) acc 71.8750 (72.8618) lr 1.5878e-03 eta 4:11:51 +epoch [17/50] batch [100/500] time 0.878 (0.893) data 0.000 (0.006) loss 1.1650 (1.0881) acc 71.8750 (72.8125) lr 1.5878e-03 eta 4:11:37 +epoch [17/50] batch [105/500] time 0.900 (0.893) data 0.000 (0.006) loss 0.9893 (1.0847) acc 78.1250 (72.9167) lr 1.5878e-03 eta 4:11:32 +epoch [17/50] batch [110/500] time 0.884 (0.893) data 0.000 (0.006) loss 1.3213 (1.0892) acc 65.6250 (72.8693) lr 1.5878e-03 eta 4:11:24 +epoch [17/50] batch [115/500] time 0.866 (0.892) data 0.000 (0.006) loss 1.2549 (1.0934) acc 68.7500 (72.8804) lr 1.5878e-03 eta 4:11:03 +epoch [17/50] batch [120/500] time 0.901 (0.892) data 0.000 (0.005) loss 1.0547 (1.0950) acc 68.7500 (72.7604) lr 1.5878e-03 eta 4:10:56 +epoch [17/50] batch [125/500] time 0.868 (0.891) data 0.000 (0.005) loss 1.6768 (1.1057) acc 68.7500 (72.5500) lr 1.5878e-03 eta 4:10:43 +epoch [17/50] batch [130/500] time 0.890 (0.892) data 0.000 (0.005) loss 1.2637 (1.1028) acc 71.8750 (72.4760) lr 1.5878e-03 eta 4:10:50 +epoch [17/50] batch [135/500] time 0.884 (0.892) data 0.000 (0.005) loss 1.1064 (1.0988) acc 75.0000 (72.3843) lr 1.5878e-03 eta 4:10:42 +epoch [17/50] batch [140/500] time 0.911 (0.892) data 0.000 (0.005) loss 1.1484 (1.1002) acc 71.8750 (72.5670) lr 1.5878e-03 eta 4:10:31 +epoch [17/50] batch [145/500] time 0.874 (0.891) data 0.000 (0.005) loss 0.7446 (1.0914) acc 78.1250 (72.8017) lr 1.5878e-03 eta 4:10:21 +epoch [17/50] batch [150/500] time 0.879 (0.891) data 0.000 (0.004) loss 0.8418 (1.0882) acc 87.5000 (73.0208) lr 1.5878e-03 eta 4:10:10 +epoch [17/50] batch [155/500] time 0.894 (0.891) data 0.000 (0.004) loss 1.6416 (1.0855) acc 75.0000 (73.1855) lr 1.5878e-03 eta 4:10:01 +epoch [17/50] batch [160/500] time 0.868 (0.890) data 0.000 (0.004) loss 0.7886 (1.0847) acc 81.2500 (73.1641) lr 1.5878e-03 eta 4:09:43 +epoch [17/50] batch [165/500] time 0.874 (0.889) data 0.000 (0.004) loss 0.9819 (1.0877) acc 75.0000 (73.1061) lr 1.5878e-03 eta 4:09:33 +epoch [17/50] batch [170/500] time 0.889 (0.889) data 0.000 (0.004) loss 1.1611 (1.0814) acc 75.0000 (73.2721) lr 1.5878e-03 eta 4:09:29 +epoch [17/50] batch [175/500] time 0.883 (0.889) data 0.000 (0.004) loss 0.7412 (1.0823) acc 78.1250 (73.1786) lr 1.5878e-03 eta 4:09:18 +epoch [17/50] batch [180/500] time 0.905 (0.889) data 0.000 (0.004) loss 0.8164 (1.0766) acc 78.1250 (73.2639) lr 1.5878e-03 eta 4:09:18 +epoch [17/50] batch [185/500] time 0.881 (0.889) data 0.000 (0.004) loss 1.2568 (1.0762) acc 59.3750 (73.1250) lr 1.5878e-03 eta 4:09:07 +epoch [17/50] batch [190/500] time 0.899 (0.889) data 0.000 (0.004) loss 1.1084 (1.0734) acc 68.7500 (73.1579) lr 1.5878e-03 eta 4:09:02 +epoch [17/50] batch [195/500] time 0.887 (0.889) data 0.000 (0.003) loss 1.0498 (1.0765) acc 75.0000 (73.0609) lr 1.5878e-03 eta 4:08:59 +epoch [17/50] batch [200/500] time 0.896 (0.889) data 0.000 (0.003) loss 1.0068 (1.0770) acc 71.8750 (73.1094) lr 1.5878e-03 eta 4:08:49 +epoch [17/50] batch [205/500] time 0.894 (0.889) data 0.000 (0.003) loss 1.4922 (1.0808) acc 75.0000 (73.0640) lr 1.5878e-03 eta 4:08:47 +epoch [17/50] batch [210/500] time 0.912 (0.889) data 0.000 (0.003) loss 1.4697 (1.0883) acc 71.8750 (72.8423) lr 1.5878e-03 eta 4:08:51 +epoch [17/50] batch [215/500] time 0.880 (0.889) data 0.000 (0.003) loss 0.8179 (1.0840) acc 78.1250 (72.8634) lr 1.5878e-03 eta 4:08:43 +epoch [17/50] batch [220/500] time 0.879 (0.889) data 0.000 (0.003) loss 1.1582 (1.0834) acc 71.8750 (72.8835) lr 1.5878e-03 eta 4:08:37 +epoch [17/50] batch [225/500] time 0.912 (0.889) data 0.000 (0.003) loss 1.1201 (1.0841) acc 65.6250 (72.8750) lr 1.5878e-03 eta 4:08:32 +epoch [17/50] batch [230/500] time 0.869 (0.889) data 0.000 (0.003) loss 1.5615 (1.0910) acc 53.1250 (72.6902) lr 1.5878e-03 eta 4:08:30 +epoch [17/50] batch [235/500] time 0.889 (0.889) data 0.000 (0.003) loss 0.5239 (1.0881) acc 87.5000 (72.7128) lr 1.5878e-03 eta 4:08:25 +epoch [17/50] batch [240/500] time 0.897 (0.889) data 0.000 (0.003) loss 0.9985 (1.0889) acc 71.8750 (72.6953) lr 1.5878e-03 eta 4:08:24 +epoch [17/50] batch [245/500] time 0.899 (0.889) data 0.000 (0.003) loss 1.1465 (1.0872) acc 65.6250 (72.6658) lr 1.5878e-03 eta 4:08:19 +epoch [17/50] batch [250/500] time 0.891 (0.889) data 0.000 (0.003) loss 0.8838 (1.0851) acc 81.2500 (72.6625) lr 1.5878e-03 eta 4:08:10 +epoch [17/50] batch [255/500] time 0.906 (0.889) data 0.000 (0.003) loss 0.7871 (1.0812) acc 84.3750 (72.7451) lr 1.5878e-03 eta 4:08:07 +epoch [17/50] batch [260/500] time 0.883 (0.889) data 0.000 (0.003) loss 0.6182 (1.0810) acc 81.2500 (72.7764) lr 1.5878e-03 eta 4:08:02 +epoch [17/50] batch [265/500] time 0.895 (0.889) data 0.000 (0.003) loss 1.0068 (1.0771) acc 71.8750 (72.8774) lr 1.5878e-03 eta 4:07:57 +epoch [17/50] batch [270/500] time 0.906 (0.889) data 0.000 (0.003) loss 1.6074 (1.0794) acc 62.5000 (72.8009) lr 1.5878e-03 eta 4:07:53 +epoch [17/50] batch [275/500] time 0.877 (0.889) data 0.000 (0.002) loss 0.9048 (1.0739) acc 71.8750 (72.9205) lr 1.5878e-03 eta 4:07:54 +epoch [17/50] batch [280/500] time 0.880 (0.889) data 0.000 (0.002) loss 0.8374 (1.0729) acc 84.3750 (72.9911) lr 1.5878e-03 eta 4:07:48 +epoch [17/50] batch [285/500] time 0.888 (0.889) data 0.000 (0.002) loss 0.9277 (1.0742) acc 75.0000 (72.9496) lr 1.5878e-03 eta 4:07:41 +epoch [17/50] batch [290/500] time 0.866 (0.889) data 0.000 (0.002) loss 0.7998 (1.0747) acc 87.5000 (72.9957) lr 1.5878e-03 eta 4:07:33 +epoch [17/50] batch [295/500] time 0.890 (0.889) data 0.000 (0.002) loss 1.1826 (1.0747) acc 68.7500 (72.9979) lr 1.5878e-03 eta 4:07:30 +epoch [17/50] batch [300/500] time 0.867 (0.889) data 0.000 (0.002) loss 0.8438 (1.0750) acc 81.2500 (73.0208) lr 1.5878e-03 eta 4:07:18 +epoch [17/50] batch [305/500] time 0.877 (0.888) data 0.000 (0.002) loss 0.9375 (1.0745) acc 75.0000 (73.0225) lr 1.5878e-03 eta 4:07:09 +epoch [17/50] batch [310/500] time 0.853 (0.888) data 0.000 (0.002) loss 1.0625 (1.0728) acc 81.2500 (73.0746) lr 1.5878e-03 eta 4:07:00 +epoch [17/50] batch [315/500] time 0.881 (0.888) data 0.000 (0.002) loss 0.5947 (1.0695) acc 87.5000 (73.1746) lr 1.5878e-03 eta 4:06:54 +epoch [17/50] batch [320/500] time 0.892 (0.888) data 0.000 (0.002) loss 0.6050 (1.0710) acc 81.2500 (73.1445) lr 1.5878e-03 eta 4:06:48 +epoch [17/50] batch [325/500] time 0.898 (0.888) data 0.000 (0.002) loss 0.9634 (1.0711) acc 81.2500 (73.1731) lr 1.5878e-03 eta 4:06:43 +epoch [17/50] batch [330/500] time 0.877 (0.888) data 0.000 (0.002) loss 0.9077 (1.0695) acc 65.6250 (73.1723) lr 1.5878e-03 eta 4:06:37 +epoch [17/50] batch [335/500] time 0.878 (0.888) data 0.000 (0.002) loss 1.3887 (1.0701) acc 71.8750 (73.2183) lr 1.5878e-03 eta 4:06:32 +epoch [17/50] batch [340/500] time 0.863 (0.887) data 0.000 (0.002) loss 0.7192 (1.0699) acc 81.2500 (73.1893) lr 1.5878e-03 eta 4:06:25 +epoch [17/50] batch [345/500] time 0.875 (0.888) data 0.000 (0.002) loss 0.9863 (1.0676) acc 84.3750 (73.2971) lr 1.5878e-03 eta 4:06:22 +epoch [17/50] batch [350/500] time 0.897 (0.888) data 0.000 (0.002) loss 0.8555 (1.0709) acc 75.0000 (73.1875) lr 1.5878e-03 eta 4:06:16 +epoch [17/50] batch [355/500] time 0.876 (0.888) data 0.000 (0.002) loss 1.5908 (1.0731) acc 62.5000 (73.1690) lr 1.5878e-03 eta 4:06:13 +epoch [17/50] batch [360/500] time 0.889 (0.888) data 0.000 (0.002) loss 1.2031 (1.0750) acc 71.8750 (73.1858) lr 1.5878e-03 eta 4:06:09 +epoch [17/50] batch [365/500] time 0.890 (0.888) data 0.000 (0.002) loss 0.7954 (1.0757) acc 81.2500 (73.1336) lr 1.5878e-03 eta 4:06:04 +epoch [17/50] batch [370/500] time 0.898 (0.888) data 0.000 (0.002) loss 0.7554 (1.0749) acc 78.1250 (73.1503) lr 1.5878e-03 eta 4:06:01 +epoch [17/50] batch [375/500] time 0.920 (0.888) data 0.000 (0.002) loss 0.6055 (1.0718) acc 84.3750 (73.1917) lr 1.5878e-03 eta 4:06:01 +epoch [17/50] batch [380/500] time 0.881 (0.888) data 0.000 (0.002) loss 0.9658 (1.0702) acc 75.0000 (73.1990) lr 1.5878e-03 eta 4:05:54 +epoch [17/50] batch [385/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.2002 (1.0704) acc 62.5000 (73.1250) lr 1.5878e-03 eta 4:05:48 +epoch [17/50] batch [390/500] time 0.870 (0.888) data 0.000 (0.002) loss 1.6816 (1.0768) acc 56.2500 (73.0048) lr 1.5878e-03 eta 4:05:44 +epoch [17/50] batch [395/500] time 0.875 (0.887) data 0.000 (0.002) loss 0.6309 (1.0764) acc 81.2500 (73.0222) lr 1.5878e-03 eta 4:05:35 +epoch [17/50] batch [400/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.1855 (1.0778) acc 78.1250 (72.9922) lr 1.5878e-03 eta 4:05:30 +epoch [17/50] batch [405/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.5322 (1.0790) acc 62.5000 (72.9630) lr 1.5878e-03 eta 4:05:27 +epoch [17/50] batch [410/500] time 0.867 (0.888) data 0.000 (0.002) loss 0.6582 (1.0788) acc 81.2500 (73.0030) lr 1.5878e-03 eta 4:05:23 +epoch [17/50] batch [415/500] time 0.988 (0.888) data 0.000 (0.002) loss 0.9551 (1.0803) acc 71.8750 (73.0120) lr 1.5878e-03 eta 4:05:20 +epoch [17/50] batch [420/500] time 0.892 (0.887) data 0.000 (0.002) loss 0.6821 (1.0812) acc 81.2500 (73.0134) lr 1.5878e-03 eta 4:05:12 +epoch [17/50] batch [425/500] time 0.851 (0.887) data 0.000 (0.002) loss 1.4131 (1.0825) acc 59.3750 (72.9706) lr 1.5878e-03 eta 4:05:04 +epoch [17/50] batch [430/500] time 0.866 (0.887) data 0.000 (0.002) loss 1.6182 (1.0832) acc 59.3750 (72.9869) lr 1.5878e-03 eta 4:04:59 +epoch [17/50] batch [435/500] time 0.920 (0.887) data 0.000 (0.002) loss 1.0732 (1.0825) acc 75.0000 (73.0172) lr 1.5878e-03 eta 4:04:57 +epoch [17/50] batch [440/500] time 0.912 (0.887) data 0.001 (0.002) loss 1.0273 (1.0794) acc 78.1250 (73.0895) lr 1.5878e-03 eta 4:04:54 +epoch [17/50] batch [445/500] time 0.855 (0.887) data 0.000 (0.002) loss 1.1338 (1.0782) acc 75.0000 (73.1039) lr 1.5878e-03 eta 4:04:50 +epoch [17/50] batch [450/500] time 0.886 (0.887) data 0.000 (0.002) loss 0.7964 (1.0803) acc 78.1250 (73.0833) lr 1.5878e-03 eta 4:04:45 +epoch [17/50] batch [455/500] time 0.867 (0.887) data 0.000 (0.002) loss 1.0801 (1.0830) acc 75.0000 (73.0014) lr 1.5878e-03 eta 4:04:39 +epoch [17/50] batch [460/500] time 0.868 (0.887) data 0.000 (0.002) loss 0.9561 (1.0819) acc 78.1250 (73.0299) lr 1.5878e-03 eta 4:04:32 +epoch [17/50] batch [465/500] time 0.887 (0.887) data 0.000 (0.002) loss 1.1758 (1.0817) acc 68.7500 (73.0578) lr 1.5878e-03 eta 4:04:28 +epoch [17/50] batch [470/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.5732 (1.0795) acc 65.6250 (73.1250) lr 1.5878e-03 eta 4:04:22 +epoch [17/50] batch [475/500] time 0.886 (0.887) data 0.000 (0.002) loss 1.4414 (1.0823) acc 62.5000 (73.0329) lr 1.5878e-03 eta 4:04:18 +epoch [17/50] batch [480/500] time 0.883 (0.887) data 0.000 (0.002) loss 0.6787 (1.0826) acc 84.3750 (73.0599) lr 1.5878e-03 eta 4:04:14 +epoch [17/50] batch [485/500] time 0.887 (0.887) data 0.000 (0.002) loss 0.5850 (1.0828) acc 84.3750 (73.0735) lr 1.5878e-03 eta 4:04:10 +epoch [17/50] batch [490/500] time 0.908 (0.887) data 0.000 (0.002) loss 1.2012 (1.0823) acc 65.6250 (73.0995) lr 1.5878e-03 eta 4:04:07 +epoch [17/50] batch [495/500] time 0.910 (0.887) data 0.000 (0.001) loss 1.1826 (1.0838) acc 65.6250 (73.0366) lr 1.5878e-03 eta 4:04:03 +epoch [17/50] batch [500/500] time 0.883 (0.887) data 0.000 (0.001) loss 1.4150 (1.0861) acc 68.7500 (73.0000) lr 1.5358e-03 eta 4:03:58 +epoch [18/50] batch [5/500] time 0.890 (1.014) data 0.000 (0.127) loss 1.5088 (1.1306) acc 65.6250 (71.8750) lr 1.5358e-03 eta 4:38:39 +epoch [18/50] batch [10/500] time 1.025 (0.967) data 0.000 (0.063) loss 0.8926 (1.1134) acc 71.8750 (72.1875) lr 1.5358e-03 eta 4:25:49 +epoch [18/50] batch [15/500] time 0.901 (0.941) data 0.000 (0.042) loss 0.9463 (1.0809) acc 75.0000 (73.9583) lr 1.5358e-03 eta 4:18:27 +epoch [18/50] batch [20/500] time 0.889 (0.927) data 0.000 (0.032) loss 1.3320 (1.0568) acc 68.7500 (73.9062) lr 1.5358e-03 eta 4:14:31 +epoch [18/50] batch [25/500] time 0.885 (0.918) data 0.000 (0.025) loss 1.0352 (1.0755) acc 75.0000 (74.0000) lr 1.5358e-03 eta 4:12:08 +epoch [18/50] batch [30/500] time 0.891 (0.913) data 0.000 (0.021) loss 1.2754 (1.0953) acc 59.3750 (73.2292) lr 1.5358e-03 eta 4:10:35 +epoch [18/50] batch [35/500] time 0.893 (0.908) data 0.000 (0.018) loss 1.1455 (1.0887) acc 68.7500 (73.2143) lr 1.5358e-03 eta 4:09:08 +epoch [18/50] batch [40/500] time 0.870 (0.904) data 0.000 (0.016) loss 1.2607 (1.0961) acc 68.7500 (72.8125) lr 1.5358e-03 eta 4:07:58 +epoch [18/50] batch [45/500] time 0.878 (0.901) data 0.000 (0.014) loss 1.3574 (1.1015) acc 71.8750 (72.7778) lr 1.5358e-03 eta 4:06:57 +epoch [18/50] batch [50/500] time 0.884 (0.899) data 0.000 (0.013) loss 1.0947 (1.0885) acc 81.2500 (73.2500) lr 1.5358e-03 eta 4:06:26 +epoch [18/50] batch [55/500] time 0.846 (0.897) data 0.000 (0.012) loss 0.5635 (1.0585) acc 84.3750 (73.5227) lr 1.5358e-03 eta 4:05:49 +epoch [18/50] batch [60/500] time 0.864 (0.895) data 0.000 (0.011) loss 0.7979 (1.0533) acc 84.3750 (73.4896) lr 1.5358e-03 eta 4:05:13 +epoch [18/50] batch [65/500] time 0.869 (0.894) data 0.000 (0.010) loss 0.9180 (1.0438) acc 68.7500 (73.7500) lr 1.5358e-03 eta 4:04:57 +epoch [18/50] batch [70/500] time 0.989 (0.895) data 0.000 (0.009) loss 1.4395 (1.0466) acc 71.8750 (73.8393) lr 1.5358e-03 eta 4:05:06 +epoch [18/50] batch [75/500] time 0.900 (0.894) data 0.000 (0.009) loss 1.1436 (1.0524) acc 75.0000 (73.5000) lr 1.5358e-03 eta 4:04:41 +epoch [18/50] batch [80/500] time 0.885 (0.893) data 0.000 (0.008) loss 1.1338 (1.0499) acc 68.7500 (73.4375) lr 1.5358e-03 eta 4:04:19 +epoch [18/50] batch [85/500] time 0.863 (0.892) data 0.000 (0.008) loss 1.6641 (1.0638) acc 68.7500 (73.2353) lr 1.5358e-03 eta 4:04:09 +epoch [18/50] batch [90/500] time 0.880 (0.892) data 0.000 (0.007) loss 1.0625 (1.0772) acc 62.5000 (72.8125) lr 1.5358e-03 eta 4:03:55 +epoch [18/50] batch [95/500] time 0.912 (0.892) data 0.000 (0.007) loss 1.5400 (1.0776) acc 65.6250 (73.0921) lr 1.5358e-03 eta 4:03:50 +epoch [18/50] batch [100/500] time 0.858 (0.891) data 0.000 (0.007) loss 1.2188 (1.0895) acc 71.8750 (72.9062) lr 1.5358e-03 eta 4:03:37 +epoch [18/50] batch [105/500] time 0.859 (0.891) data 0.000 (0.006) loss 1.1992 (1.0924) acc 68.7500 (72.8571) lr 1.5358e-03 eta 4:03:22 +epoch [18/50] batch [110/500] time 0.888 (0.890) data 0.000 (0.006) loss 1.5488 (1.0938) acc 65.6250 (72.8977) lr 1.5358e-03 eta 4:03:13 +epoch [18/50] batch [115/500] time 0.896 (0.891) data 0.000 (0.006) loss 0.7539 (1.0937) acc 78.1250 (72.7989) lr 1.5358e-03 eta 4:03:19 +epoch [18/50] batch [120/500] time 0.837 (0.890) data 0.000 (0.005) loss 0.8037 (1.0988) acc 75.0000 (72.7865) lr 1.5358e-03 eta 4:02:59 +epoch [18/50] batch [125/500] time 0.893 (0.890) data 0.000 (0.005) loss 1.4541 (1.0966) acc 62.5000 (72.8500) lr 1.5358e-03 eta 4:02:56 +epoch [18/50] batch [130/500] time 0.896 (0.890) data 0.000 (0.005) loss 1.1289 (1.1036) acc 71.8750 (72.6442) lr 1.5358e-03 eta 4:02:54 +epoch [18/50] batch [135/500] time 0.894 (0.890) data 0.000 (0.005) loss 0.9771 (1.1093) acc 75.0000 (72.5694) lr 1.5358e-03 eta 4:02:47 +epoch [18/50] batch [140/500] time 0.884 (0.890) data 0.000 (0.005) loss 1.6455 (1.1059) acc 62.5000 (72.6786) lr 1.5358e-03 eta 4:02:37 +epoch [18/50] batch [145/500] time 0.875 (0.889) data 0.000 (0.005) loss 1.1406 (1.1078) acc 78.1250 (72.6293) lr 1.5358e-03 eta 4:02:27 +epoch [18/50] batch [150/500] time 0.886 (0.889) data 0.000 (0.004) loss 1.0254 (1.1018) acc 71.8750 (72.7500) lr 1.5358e-03 eta 4:02:12 +epoch [18/50] batch [155/500] time 0.893 (0.889) data 0.000 (0.004) loss 1.1318 (1.1017) acc 75.0000 (72.8024) lr 1.5358e-03 eta 4:02:10 +epoch [18/50] batch [160/500] time 0.860 (0.889) data 0.000 (0.004) loss 1.5566 (1.1038) acc 56.2500 (72.6367) lr 1.5358e-03 eta 4:02:08 +epoch [18/50] batch [165/500] time 0.885 (0.889) data 0.000 (0.004) loss 0.6172 (1.1046) acc 84.3750 (72.4811) lr 1.5358e-03 eta 4:02:04 +epoch [18/50] batch [170/500] time 0.883 (0.889) data 0.000 (0.004) loss 1.5303 (1.1107) acc 65.6250 (72.4816) lr 1.5358e-03 eta 4:02:02 +epoch [18/50] batch [175/500] time 0.887 (0.890) data 0.000 (0.004) loss 0.6143 (1.1060) acc 81.2500 (72.6071) lr 1.5358e-03 eta 4:02:04 +epoch [18/50] batch [180/500] time 0.858 (0.890) data 0.000 (0.004) loss 1.1328 (1.1010) acc 68.7500 (72.6910) lr 1.5358e-03 eta 4:01:57 +epoch [18/50] batch [185/500] time 0.898 (0.890) data 0.000 (0.004) loss 0.9014 (1.0991) acc 68.7500 (72.7196) lr 1.5358e-03 eta 4:01:52 +epoch [18/50] batch [190/500] time 0.880 (0.889) data 0.000 (0.004) loss 1.2383 (1.1006) acc 68.7500 (72.7796) lr 1.5358e-03 eta 4:01:44 +epoch [18/50] batch [195/500] time 0.862 (0.889) data 0.000 (0.003) loss 1.7598 (1.1090) acc 65.6250 (72.6442) lr 1.5358e-03 eta 4:01:35 +epoch [18/50] batch [200/500] time 0.886 (0.889) data 0.000 (0.003) loss 0.8110 (1.1057) acc 68.7500 (72.6094) lr 1.5358e-03 eta 4:01:27 +epoch [18/50] batch [205/500] time 0.894 (0.889) data 0.000 (0.003) loss 1.0859 (1.1058) acc 62.5000 (72.5915) lr 1.5358e-03 eta 4:01:24 +epoch [18/50] batch [210/500] time 0.855 (0.889) data 0.000 (0.003) loss 1.3887 (1.1064) acc 56.2500 (72.5595) lr 1.5358e-03 eta 4:01:17 +epoch [18/50] batch [215/500] time 0.892 (0.889) data 0.000 (0.003) loss 1.0791 (1.1083) acc 81.2500 (72.5872) lr 1.5358e-03 eta 4:01:15 +epoch [18/50] batch [220/500] time 0.886 (0.889) data 0.000 (0.003) loss 0.9771 (1.1065) acc 75.0000 (72.6278) lr 1.5358e-03 eta 4:01:10 +epoch [18/50] batch [225/500] time 0.862 (0.889) data 0.000 (0.003) loss 1.0537 (1.0996) acc 68.7500 (72.7639) lr 1.5358e-03 eta 4:01:01 +epoch [18/50] batch [230/500] time 0.914 (0.889) data 0.000 (0.003) loss 1.3311 (1.1013) acc 71.8750 (72.7038) lr 1.5358e-03 eta 4:01:01 +epoch [18/50] batch [235/500] time 0.865 (0.888) data 0.000 (0.003) loss 0.9238 (1.0995) acc 75.0000 (72.6995) lr 1.5358e-03 eta 4:00:50 +epoch [18/50] batch [240/500] time 0.871 (0.888) data 0.000 (0.003) loss 0.6309 (1.0999) acc 87.5000 (72.6953) lr 1.5358e-03 eta 4:00:41 +epoch [18/50] batch [245/500] time 0.880 (0.888) data 0.000 (0.003) loss 0.8638 (1.0978) acc 78.1250 (72.7679) lr 1.5358e-03 eta 4:00:36 +epoch [18/50] batch [250/500] time 0.891 (0.888) data 0.000 (0.003) loss 1.4004 (1.1008) acc 71.8750 (72.7125) lr 1.5358e-03 eta 4:00:30 +epoch [18/50] batch [255/500] time 0.857 (0.888) data 0.000 (0.003) loss 0.6958 (1.1019) acc 81.2500 (72.6961) lr 1.5358e-03 eta 4:00:24 +epoch [18/50] batch [260/500] time 0.907 (0.888) data 0.000 (0.003) loss 1.1494 (1.1015) acc 68.7500 (72.6923) lr 1.5358e-03 eta 4:00:25 +epoch [18/50] batch [265/500] time 0.893 (0.888) data 0.000 (0.003) loss 1.0137 (1.1019) acc 68.7500 (72.6415) lr 1.5358e-03 eta 4:00:21 +epoch [18/50] batch [270/500] time 0.860 (0.888) data 0.000 (0.003) loss 0.9961 (1.1015) acc 68.7500 (72.6852) lr 1.5358e-03 eta 4:00:13 +epoch [18/50] batch [275/500] time 0.875 (0.888) data 0.000 (0.003) loss 1.5479 (1.1050) acc 65.6250 (72.6364) lr 1.5358e-03 eta 4:00:07 +epoch [18/50] batch [280/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.3271 (1.1099) acc 68.7500 (72.4888) lr 1.5358e-03 eta 4:00:03 +epoch [18/50] batch [285/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.1562 (1.1075) acc 75.0000 (72.5219) lr 1.5358e-03 eta 3:59:58 +epoch [18/50] batch [290/500] time 0.880 (0.888) data 0.000 (0.002) loss 0.5864 (1.1048) acc 81.2500 (72.5431) lr 1.5358e-03 eta 3:59:56 +epoch [18/50] batch [295/500] time 0.885 (0.888) data 0.000 (0.002) loss 0.7275 (1.1072) acc 84.3750 (72.4894) lr 1.5358e-03 eta 3:59:50 +epoch [18/50] batch [300/500] time 0.873 (0.888) data 0.000 (0.002) loss 0.9258 (1.1099) acc 81.2500 (72.4375) lr 1.5358e-03 eta 3:59:43 +epoch [18/50] batch [305/500] time 0.862 (0.888) data 0.000 (0.002) loss 1.0801 (1.1084) acc 71.8750 (72.4488) lr 1.5358e-03 eta 3:59:34 +epoch [18/50] batch [310/500] time 0.878 (0.888) data 0.000 (0.002) loss 1.0664 (1.1053) acc 78.1250 (72.5101) lr 1.5358e-03 eta 3:59:29 +epoch [18/50] batch [315/500] time 0.875 (0.887) data 0.000 (0.002) loss 1.1738 (1.1045) acc 68.7500 (72.5496) lr 1.5358e-03 eta 3:59:23 +epoch [18/50] batch [320/500] time 0.874 (0.887) data 0.000 (0.002) loss 1.1201 (1.1063) acc 56.2500 (72.5098) lr 1.5358e-03 eta 3:59:19 +epoch [18/50] batch [325/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.2744 (1.1061) acc 65.6250 (72.4904) lr 1.5358e-03 eta 3:59:14 +epoch [18/50] batch [330/500] time 0.859 (0.887) data 0.000 (0.002) loss 0.7812 (1.1040) acc 84.3750 (72.5379) lr 1.5358e-03 eta 3:59:06 +epoch [18/50] batch [335/500] time 0.891 (0.887) data 0.000 (0.002) loss 1.2588 (1.1050) acc 68.7500 (72.5560) lr 1.5358e-03 eta 3:59:02 +epoch [18/50] batch [340/500] time 0.849 (0.887) data 0.000 (0.002) loss 0.8560 (1.1063) acc 78.1250 (72.5368) lr 1.5358e-03 eta 3:58:57 +epoch [18/50] batch [345/500] time 0.900 (0.887) data 0.000 (0.002) loss 0.8896 (1.1056) acc 78.1250 (72.5996) lr 1.5358e-03 eta 3:58:48 +epoch [18/50] batch [350/500] time 0.865 (0.887) data 0.000 (0.002) loss 0.8335 (1.1046) acc 68.7500 (72.5714) lr 1.5358e-03 eta 3:58:42 +epoch [18/50] batch [355/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.1514 (1.1036) acc 78.1250 (72.5968) lr 1.5358e-03 eta 3:58:34 +epoch [18/50] batch [360/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.2236 (1.1045) acc 68.7500 (72.6476) lr 1.5358e-03 eta 3:58:35 +epoch [18/50] batch [365/500] time 0.893 (0.887) data 0.000 (0.002) loss 0.5942 (1.1017) acc 84.3750 (72.7140) lr 1.5358e-03 eta 3:58:28 +epoch [18/50] batch [370/500] time 0.894 (0.887) data 0.000 (0.002) loss 0.9648 (1.1013) acc 71.8750 (72.7365) lr 1.5358e-03 eta 3:58:23 +epoch [18/50] batch [375/500] time 0.867 (0.887) data 0.000 (0.002) loss 0.8232 (1.0996) acc 75.0000 (72.7583) lr 1.5358e-03 eta 3:58:16 +epoch [18/50] batch [380/500] time 0.864 (0.887) data 0.000 (0.002) loss 1.0449 (1.1001) acc 78.1250 (72.7961) lr 1.5358e-03 eta 3:58:12 +epoch [18/50] batch [385/500] time 0.855 (0.886) data 0.000 (0.002) loss 0.9175 (1.0993) acc 81.2500 (72.8166) lr 1.5358e-03 eta 3:58:05 +epoch [18/50] batch [390/500] time 0.898 (0.887) data 0.000 (0.002) loss 1.1484 (1.1012) acc 71.8750 (72.7404) lr 1.5358e-03 eta 3:58:01 +epoch [18/50] batch [395/500] time 0.882 (0.886) data 0.000 (0.002) loss 1.2764 (1.1039) acc 62.5000 (72.6661) lr 1.5358e-03 eta 3:57:56 +epoch [18/50] batch [400/500] time 0.993 (0.887) data 0.000 (0.002) loss 1.5713 (1.1061) acc 65.6250 (72.6250) lr 1.5358e-03 eta 3:57:53 +epoch [18/50] batch [405/500] time 0.874 (0.887) data 0.000 (0.002) loss 0.8813 (1.1074) acc 75.0000 (72.5463) lr 1.5358e-03 eta 3:57:48 +epoch [18/50] batch [410/500] time 0.890 (0.886) data 0.000 (0.002) loss 1.7061 (1.1074) acc 68.7500 (72.5610) lr 1.5358e-03 eta 3:57:42 +epoch [18/50] batch [415/500] time 0.884 (0.886) data 0.000 (0.002) loss 0.6147 (1.1067) acc 81.2500 (72.5602) lr 1.5358e-03 eta 3:57:38 +epoch [18/50] batch [420/500] time 0.872 (0.886) data 0.000 (0.002) loss 2.2090 (1.1066) acc 59.3750 (72.5893) lr 1.5358e-03 eta 3:57:32 +epoch [18/50] batch [425/500] time 0.860 (0.886) data 0.000 (0.002) loss 1.3799 (1.1097) acc 68.7500 (72.5735) lr 1.5358e-03 eta 3:57:27 +epoch [18/50] batch [430/500] time 0.877 (0.886) data 0.000 (0.002) loss 0.7505 (1.1099) acc 78.1250 (72.5872) lr 1.5358e-03 eta 3:57:24 +epoch [18/50] batch [435/500] time 0.881 (0.886) data 0.000 (0.002) loss 1.2480 (1.1122) acc 65.6250 (72.5287) lr 1.5358e-03 eta 3:57:19 +epoch [18/50] batch [440/500] time 0.871 (0.886) data 0.000 (0.002) loss 1.3604 (1.1125) acc 78.1250 (72.5355) lr 1.5358e-03 eta 3:57:15 +epoch [18/50] batch [445/500] time 0.890 (0.886) data 0.000 (0.002) loss 0.7700 (1.1094) acc 78.1250 (72.6053) lr 1.5358e-03 eta 3:57:10 +epoch [18/50] batch [450/500] time 0.847 (0.886) data 0.000 (0.002) loss 1.2197 (1.1079) acc 68.7500 (72.6042) lr 1.5358e-03 eta 3:57:02 +epoch [18/50] batch [455/500] time 0.906 (0.886) data 0.000 (0.002) loss 1.0430 (1.1071) acc 75.0000 (72.6374) lr 1.5358e-03 eta 3:56:57 +epoch [18/50] batch [460/500] time 0.865 (0.886) data 0.000 (0.002) loss 0.9302 (1.1072) acc 78.1250 (72.6223) lr 1.5358e-03 eta 3:56:50 +epoch [18/50] batch [465/500] time 0.904 (0.886) data 0.000 (0.002) loss 0.8892 (1.1083) acc 78.1250 (72.5470) lr 1.5358e-03 eta 3:56:44 +epoch [18/50] batch [470/500] time 0.852 (0.886) data 0.000 (0.002) loss 1.1279 (1.1080) acc 75.0000 (72.5465) lr 1.5358e-03 eta 3:56:38 +epoch [18/50] batch [475/500] time 0.866 (0.886) data 0.000 (0.002) loss 1.0479 (1.1079) acc 71.8750 (72.5526) lr 1.5358e-03 eta 3:56:30 +epoch [18/50] batch [480/500] time 0.875 (0.886) data 0.000 (0.002) loss 1.2363 (1.1075) acc 71.8750 (72.5391) lr 1.5358e-03 eta 3:56:27 +epoch [18/50] batch [485/500] time 0.885 (0.886) data 0.000 (0.002) loss 0.8149 (1.1088) acc 71.8750 (72.5064) lr 1.5358e-03 eta 3:56:21 +epoch [18/50] batch [490/500] time 0.861 (0.885) data 0.000 (0.002) loss 1.8721 (1.1103) acc 62.5000 (72.4936) lr 1.5358e-03 eta 3:56:16 +epoch [18/50] batch [495/500] time 0.883 (0.885) data 0.000 (0.002) loss 0.7212 (1.1081) acc 84.3750 (72.5758) lr 1.5358e-03 eta 3:56:12 +epoch [18/50] batch [500/500] time 0.900 (0.886) data 0.000 (0.001) loss 1.1221 (1.1063) acc 68.7500 (72.6125) lr 1.4818e-03 eta 3:56:12 +epoch [19/50] batch [5/500] time 0.880 (1.023) data 0.000 (0.131) loss 1.4014 (1.1604) acc 59.3750 (65.6250) lr 1.4818e-03 eta 4:32:45 +epoch [19/50] batch [10/500] time 0.887 (0.957) data 0.000 (0.066) loss 0.7681 (1.0548) acc 78.1250 (71.2500) lr 1.4818e-03 eta 4:15:09 +epoch [19/50] batch [15/500] time 0.897 (0.941) data 0.000 (0.044) loss 0.7944 (1.0406) acc 84.3750 (72.7083) lr 1.4818e-03 eta 4:10:37 +epoch [19/50] batch [20/500] time 0.874 (0.927) data 0.000 (0.033) loss 1.3594 (1.0702) acc 68.7500 (72.6562) lr 1.4818e-03 eta 4:07:00 +epoch [19/50] batch [25/500] time 0.889 (0.917) data 0.000 (0.026) loss 1.3018 (1.1049) acc 71.8750 (72.2500) lr 1.4818e-03 eta 4:04:05 +epoch [19/50] batch [30/500] time 0.904 (0.912) data 0.000 (0.022) loss 0.7769 (1.0759) acc 68.7500 (72.7083) lr 1.4818e-03 eta 4:02:45 +epoch [19/50] batch [35/500] time 0.895 (0.909) data 0.000 (0.019) loss 0.9521 (1.0571) acc 84.3750 (73.4821) lr 1.4818e-03 eta 4:01:51 +epoch [19/50] batch [40/500] time 0.850 (0.905) data 0.000 (0.017) loss 1.5117 (1.0745) acc 56.2500 (73.1250) lr 1.4818e-03 eta 4:00:36 +epoch [19/50] batch [45/500] time 0.880 (0.902) data 0.000 (0.015) loss 0.4683 (1.0812) acc 84.3750 (72.9861) lr 1.4818e-03 eta 3:59:55 +epoch [19/50] batch [50/500] time 0.888 (0.900) data 0.000 (0.013) loss 1.0879 (1.0626) acc 78.1250 (73.8125) lr 1.4818e-03 eta 3:59:09 +epoch [19/50] batch [55/500] time 0.875 (0.897) data 0.000 (0.012) loss 0.8091 (1.0632) acc 68.7500 (73.5227) lr 1.4818e-03 eta 3:58:26 +epoch [19/50] batch [60/500] time 0.892 (0.897) data 0.000 (0.011) loss 0.7939 (1.0460) acc 71.8750 (73.3333) lr 1.4818e-03 eta 3:58:25 +epoch [19/50] batch [65/500] time 0.878 (0.896) data 0.000 (0.010) loss 0.5239 (1.0456) acc 87.5000 (73.5096) lr 1.4818e-03 eta 3:57:51 +epoch [19/50] batch [70/500] time 0.897 (0.895) data 0.000 (0.010) loss 0.6152 (1.0308) acc 87.5000 (74.1964) lr 1.4818e-03 eta 3:57:31 +epoch [19/50] batch [75/500] time 0.874 (0.893) data 0.000 (0.009) loss 0.9795 (1.0221) acc 78.1250 (74.2500) lr 1.4818e-03 eta 3:57:07 +epoch [19/50] batch [80/500] time 0.870 (0.892) data 0.000 (0.008) loss 1.3184 (1.0345) acc 75.0000 (74.2188) lr 1.4818e-03 eta 3:56:45 +epoch [19/50] batch [85/500] time 0.880 (0.891) data 0.000 (0.008) loss 1.0811 (1.0286) acc 75.0000 (74.3750) lr 1.4818e-03 eta 3:56:27 +epoch [19/50] batch [90/500] time 0.891 (0.891) data 0.000 (0.007) loss 1.0215 (1.0305) acc 75.0000 (74.2708) lr 1.4818e-03 eta 3:56:16 +epoch [19/50] batch [95/500] time 0.875 (0.891) data 0.000 (0.007) loss 0.8130 (1.0274) acc 68.7500 (74.3750) lr 1.4818e-03 eta 3:56:03 +epoch [19/50] batch [100/500] time 0.900 (0.890) data 0.000 (0.007) loss 1.0869 (1.0344) acc 65.6250 (74.0938) lr 1.4818e-03 eta 3:55:52 +epoch [19/50] batch [105/500] time 0.878 (0.890) data 0.000 (0.006) loss 1.0498 (1.0420) acc 68.7500 (73.8690) lr 1.4818e-03 eta 3:55:51 +epoch [19/50] batch [110/500] time 0.863 (0.890) data 0.000 (0.006) loss 1.2529 (1.0442) acc 71.8750 (73.7500) lr 1.4818e-03 eta 3:55:34 +epoch [19/50] batch [115/500] time 0.883 (0.889) data 0.000 (0.006) loss 1.7100 (1.0464) acc 56.2500 (73.6957) lr 1.4818e-03 eta 3:55:14 +epoch [19/50] batch [120/500] time 0.877 (0.889) data 0.000 (0.006) loss 1.3457 (1.0522) acc 75.0000 (73.6458) lr 1.4818e-03 eta 3:55:09 +epoch [19/50] batch [125/500] time 0.909 (0.888) data 0.000 (0.005) loss 1.0166 (1.0606) acc 71.8750 (73.4500) lr 1.4818e-03 eta 3:55:00 +epoch [19/50] batch [130/500] time 0.848 (0.888) data 0.000 (0.005) loss 1.1797 (1.0516) acc 75.0000 (73.6538) lr 1.4818e-03 eta 3:54:47 +epoch [19/50] batch [135/500] time 0.901 (0.887) data 0.000 (0.005) loss 0.8975 (1.0389) acc 75.0000 (73.8194) lr 1.4818e-03 eta 3:54:37 +epoch [19/50] batch [140/500] time 0.872 (0.887) data 0.000 (0.005) loss 1.2900 (1.0402) acc 62.5000 (73.8393) lr 1.4818e-03 eta 3:54:32 +epoch [19/50] batch [145/500] time 0.895 (0.888) data 0.000 (0.005) loss 1.4805 (1.0366) acc 68.7500 (73.9224) lr 1.4818e-03 eta 3:54:31 +epoch [19/50] batch [150/500] time 0.861 (0.888) data 0.000 (0.005) loss 1.2705 (1.0364) acc 65.6250 (73.8750) lr 1.4818e-03 eta 3:54:27 +epoch [19/50] batch [155/500] time 0.890 (0.887) data 0.000 (0.004) loss 0.5947 (1.0321) acc 81.2500 (73.9113) lr 1.4818e-03 eta 3:54:18 +epoch [19/50] batch [160/500] time 0.871 (0.887) data 0.000 (0.004) loss 0.7427 (1.0366) acc 78.1250 (73.8281) lr 1.4818e-03 eta 3:54:04 +epoch [19/50] batch [165/500] time 0.914 (0.887) data 0.000 (0.004) loss 0.7954 (1.0352) acc 81.2500 (73.6932) lr 1.4818e-03 eta 3:54:00 +epoch [19/50] batch [170/500] time 0.886 (0.887) data 0.000 (0.004) loss 0.6719 (1.0354) acc 75.0000 (73.5478) lr 1.4818e-03 eta 3:53:56 +epoch [19/50] batch [175/500] time 0.872 (0.887) data 0.000 (0.004) loss 0.5283 (1.0352) acc 81.2500 (73.5000) lr 1.4818e-03 eta 3:53:53 +epoch [19/50] batch [180/500] time 0.892 (0.886) data 0.000 (0.004) loss 1.3594 (1.0407) acc 62.5000 (73.2986) lr 1.4818e-03 eta 3:53:44 +epoch [19/50] batch [185/500] time 0.892 (0.887) data 0.000 (0.004) loss 0.9756 (1.0441) acc 81.2500 (73.2264) lr 1.4818e-03 eta 3:53:41 +epoch [19/50] batch [190/500] time 0.891 (0.886) data 0.000 (0.004) loss 1.6465 (1.0477) acc 53.1250 (73.2072) lr 1.4818e-03 eta 3:53:33 +epoch [19/50] batch [195/500] time 0.909 (0.887) data 0.000 (0.004) loss 1.2275 (1.0419) acc 71.8750 (73.3013) lr 1.4818e-03 eta 3:53:32 +epoch [19/50] batch [200/500] time 0.859 (0.886) data 0.000 (0.004) loss 0.8467 (1.0377) acc 71.8750 (73.3438) lr 1.4818e-03 eta 3:53:24 +epoch [19/50] batch [205/500] time 0.872 (0.887) data 0.000 (0.003) loss 1.0596 (1.0386) acc 75.0000 (73.3994) lr 1.4818e-03 eta 3:53:23 +epoch [19/50] batch [210/500] time 0.872 (0.887) data 0.000 (0.003) loss 1.0830 (1.0364) acc 78.1250 (73.4970) lr 1.4818e-03 eta 3:53:18 +epoch [19/50] batch [215/500] time 0.870 (0.886) data 0.000 (0.003) loss 1.6807 (1.0417) acc 75.0000 (73.4738) lr 1.4818e-03 eta 3:53:07 +epoch [19/50] batch [220/500] time 0.892 (0.886) data 0.000 (0.003) loss 1.0918 (1.0413) acc 75.0000 (73.5227) lr 1.4818e-03 eta 3:53:00 +epoch [19/50] batch [225/500] time 0.885 (0.886) data 0.000 (0.003) loss 1.2861 (1.0422) acc 68.7500 (73.5417) lr 1.4818e-03 eta 3:52:53 +epoch [19/50] batch [230/500] time 0.872 (0.886) data 0.000 (0.003) loss 0.8120 (1.0492) acc 71.8750 (73.3560) lr 1.4818e-03 eta 3:52:50 +epoch [19/50] batch [235/500] time 0.858 (0.886) data 0.000 (0.003) loss 1.1465 (1.0491) acc 65.6250 (73.2846) lr 1.4818e-03 eta 3:52:41 +epoch [19/50] batch [240/500] time 0.898 (0.885) data 0.001 (0.003) loss 0.9688 (1.0500) acc 75.0000 (73.2161) lr 1.4818e-03 eta 3:52:34 +epoch [19/50] batch [245/500] time 0.893 (0.886) data 0.000 (0.003) loss 1.3799 (1.0505) acc 65.6250 (73.2653) lr 1.4818e-03 eta 3:52:32 +epoch [19/50] batch [250/500] time 0.896 (0.886) data 0.000 (0.003) loss 1.3564 (1.0467) acc 65.6250 (73.3250) lr 1.4818e-03 eta 3:52:37 +epoch [19/50] batch [255/500] time 0.893 (0.886) data 0.000 (0.003) loss 1.6172 (1.0492) acc 62.5000 (73.3088) lr 1.4818e-03 eta 3:52:37 +epoch [19/50] batch [260/500] time 0.915 (0.887) data 0.000 (0.003) loss 1.2236 (1.0502) acc 75.0000 (73.3654) lr 1.4818e-03 eta 3:52:36 +epoch [19/50] batch [265/500] time 0.890 (0.887) data 0.000 (0.003) loss 1.0986 (1.0530) acc 71.8750 (73.3255) lr 1.4818e-03 eta 3:52:32 +epoch [19/50] batch [270/500] time 0.883 (0.887) data 0.000 (0.003) loss 1.1807 (1.0512) acc 75.0000 (73.3681) lr 1.4818e-03 eta 3:52:26 +epoch [19/50] batch [275/500] time 0.899 (0.887) data 0.000 (0.003) loss 0.4810 (1.0585) acc 90.6250 (73.2614) lr 1.4818e-03 eta 3:52:27 +epoch [19/50] batch [280/500] time 0.861 (0.887) data 0.000 (0.003) loss 1.5371 (1.0621) acc 62.5000 (73.1920) lr 1.4818e-03 eta 3:52:19 +epoch [19/50] batch [285/500] time 0.885 (0.887) data 0.000 (0.003) loss 0.7075 (1.0583) acc 81.2500 (73.2127) lr 1.4818e-03 eta 3:52:13 +epoch [19/50] batch [290/500] time 0.909 (0.887) data 0.000 (0.002) loss 1.1855 (1.0593) acc 75.0000 (73.2651) lr 1.4818e-03 eta 3:52:12 +epoch [19/50] batch [295/500] time 0.887 (0.887) data 0.000 (0.002) loss 1.3477 (1.0602) acc 65.6250 (73.2309) lr 1.4818e-03 eta 3:52:06 +epoch [19/50] batch [300/500] time 0.865 (0.887) data 0.000 (0.002) loss 1.7725 (1.0623) acc 53.1250 (73.1875) lr 1.4818e-03 eta 3:51:58 +epoch [19/50] batch [305/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.4482 (1.0621) acc 68.7500 (73.1660) lr 1.4818e-03 eta 3:51:54 +epoch [19/50] batch [310/500] time 0.885 (0.886) data 0.000 (0.002) loss 1.1895 (1.0658) acc 65.6250 (73.1048) lr 1.4818e-03 eta 3:51:45 +epoch [19/50] batch [315/500] time 0.865 (0.886) data 0.000 (0.002) loss 1.5137 (1.0671) acc 56.2500 (73.0456) lr 1.4818e-03 eta 3:51:40 +epoch [19/50] batch [320/500] time 0.900 (0.886) data 0.000 (0.002) loss 1.1484 (1.0702) acc 75.0000 (73.0078) lr 1.4818e-03 eta 3:51:34 +epoch [19/50] batch [325/500] time 0.850 (0.886) data 0.000 (0.002) loss 1.2861 (1.0734) acc 62.5000 (72.9808) lr 1.4818e-03 eta 3:51:27 +epoch [19/50] batch [330/500] time 0.898 (0.886) data 0.000 (0.002) loss 1.2354 (1.0755) acc 71.8750 (72.9072) lr 1.4818e-03 eta 3:51:19 +epoch [19/50] batch [335/500] time 0.863 (0.886) data 0.000 (0.002) loss 1.0742 (1.0777) acc 68.7500 (72.7892) lr 1.4818e-03 eta 3:51:16 +epoch [19/50] batch [340/500] time 0.855 (0.885) data 0.000 (0.002) loss 1.0850 (1.0806) acc 78.1250 (72.7114) lr 1.4818e-03 eta 3:51:05 +epoch [19/50] batch [345/500] time 0.993 (0.886) data 0.000 (0.002) loss 1.5420 (1.0810) acc 71.8750 (72.7717) lr 1.4818e-03 eta 3:51:03 +epoch [19/50] batch [350/500] time 0.858 (0.885) data 0.000 (0.002) loss 0.8384 (1.0799) acc 78.1250 (72.8125) lr 1.4818e-03 eta 3:50:54 +epoch [19/50] batch [355/500] time 0.898 (0.885) data 0.000 (0.002) loss 1.6182 (1.0804) acc 65.6250 (72.8081) lr 1.4818e-03 eta 3:50:49 +epoch [19/50] batch [360/500] time 0.861 (0.885) data 0.000 (0.002) loss 0.8271 (1.0806) acc 75.0000 (72.7778) lr 1.4818e-03 eta 3:50:41 +epoch [19/50] batch [365/500] time 0.894 (0.885) data 0.000 (0.002) loss 0.7339 (1.0813) acc 84.3750 (72.7740) lr 1.4818e-03 eta 3:50:37 +epoch [19/50] batch [370/500] time 0.896 (0.885) data 0.000 (0.002) loss 1.5459 (1.0817) acc 62.5000 (72.8125) lr 1.4818e-03 eta 3:50:33 +epoch [19/50] batch [375/500] time 0.905 (0.885) data 0.000 (0.002) loss 1.1309 (1.0794) acc 71.8750 (72.8667) lr 1.4818e-03 eta 3:50:29 +epoch [19/50] batch [380/500] time 0.862 (0.885) data 0.000 (0.002) loss 1.6211 (1.0811) acc 65.6250 (72.8536) lr 1.4818e-03 eta 3:50:22 +epoch [19/50] batch [385/500] time 0.895 (0.885) data 0.000 (0.002) loss 0.8340 (1.0801) acc 78.1250 (72.8328) lr 1.4818e-03 eta 3:50:18 +epoch [19/50] batch [390/500] time 0.902 (0.885) data 0.000 (0.002) loss 0.8506 (1.0838) acc 71.8750 (72.7404) lr 1.4818e-03 eta 3:50:19 +epoch [19/50] batch [395/500] time 0.854 (0.885) data 0.000 (0.002) loss 0.6353 (1.0824) acc 81.2500 (72.8165) lr 1.4818e-03 eta 3:50:11 +epoch [19/50] batch [400/500] time 0.858 (0.885) data 0.000 (0.002) loss 0.9644 (1.0821) acc 71.8750 (72.8438) lr 1.4818e-03 eta 3:50:04 +epoch [19/50] batch [405/500] time 0.884 (0.885) data 0.000 (0.002) loss 0.8794 (1.0870) acc 68.7500 (72.7623) lr 1.4818e-03 eta 3:50:00 +epoch [19/50] batch [410/500] time 0.932 (0.885) data 0.000 (0.002) loss 1.2725 (1.0877) acc 68.7500 (72.7363) lr 1.4818e-03 eta 3:49:59 +epoch [19/50] batch [415/500] time 0.892 (0.885) data 0.000 (0.002) loss 1.2939 (1.0887) acc 65.6250 (72.6958) lr 1.4818e-03 eta 3:49:53 +epoch [19/50] batch [420/500] time 0.894 (0.885) data 0.000 (0.002) loss 0.9194 (1.0898) acc 78.1250 (72.7158) lr 1.4818e-03 eta 3:49:47 +epoch [19/50] batch [425/500] time 0.880 (0.885) data 0.000 (0.002) loss 1.4355 (1.0905) acc 65.6250 (72.6912) lr 1.4818e-03 eta 3:49:42 +epoch [19/50] batch [430/500] time 0.917 (0.885) data 0.000 (0.002) loss 1.1377 (1.0923) acc 71.8750 (72.6962) lr 1.4818e-03 eta 3:49:40 +epoch [19/50] batch [435/500] time 0.907 (0.885) data 0.000 (0.002) loss 0.7095 (1.0919) acc 78.1250 (72.7011) lr 1.4818e-03 eta 3:49:40 +epoch [19/50] batch [440/500] time 0.904 (0.885) data 0.000 (0.002) loss 2.0762 (1.0936) acc 65.6250 (72.6989) lr 1.4818e-03 eta 3:49:36 +epoch [19/50] batch [445/500] time 0.858 (0.885) data 0.000 (0.002) loss 1.2168 (1.0947) acc 65.6250 (72.6756) lr 1.4818e-03 eta 3:49:30 +epoch [19/50] batch [450/500] time 0.877 (0.885) data 0.000 (0.002) loss 1.2002 (1.0957) acc 68.7500 (72.6528) lr 1.4818e-03 eta 3:49:23 +epoch [19/50] batch [455/500] time 0.884 (0.885) data 0.000 (0.002) loss 0.8628 (1.0930) acc 75.0000 (72.7198) lr 1.4818e-03 eta 3:49:19 +epoch [19/50] batch [460/500] time 0.872 (0.885) data 0.000 (0.002) loss 1.0410 (1.0935) acc 75.0000 (72.7446) lr 1.4818e-03 eta 3:49:14 +epoch [19/50] batch [465/500] time 0.866 (0.885) data 0.000 (0.002) loss 1.2900 (1.0944) acc 75.0000 (72.7688) lr 1.4818e-03 eta 3:49:09 +epoch [19/50] batch [470/500] time 0.908 (0.885) data 0.000 (0.002) loss 0.9302 (1.0953) acc 75.0000 (72.7194) lr 1.4818e-03 eta 3:49:06 +epoch [19/50] batch [475/500] time 0.867 (0.885) data 0.000 (0.002) loss 1.0186 (1.0930) acc 75.0000 (72.7500) lr 1.4818e-03 eta 3:48:59 +epoch [19/50] batch [480/500] time 0.859 (0.885) data 0.000 (0.002) loss 1.3252 (1.0940) acc 59.3750 (72.6953) lr 1.4818e-03 eta 3:48:54 +epoch [19/50] batch [485/500] time 0.909 (0.885) data 0.000 (0.002) loss 1.2588 (1.0948) acc 68.7500 (72.7126) lr 1.4818e-03 eta 3:48:49 +epoch [19/50] batch [490/500] time 0.894 (0.885) data 0.000 (0.002) loss 1.0449 (1.0948) acc 75.0000 (72.6722) lr 1.4818e-03 eta 3:48:50 +epoch [19/50] batch [495/500] time 0.890 (0.885) data 0.000 (0.002) loss 0.7393 (1.0932) acc 71.8750 (72.6705) lr 1.4818e-03 eta 3:48:46 +epoch [19/50] batch [500/500] time 0.866 (0.885) data 0.000 (0.002) loss 1.3633 (1.0937) acc 65.6250 (72.6625) lr 1.4258e-03 eta 3:48:42 +epoch [20/50] batch [5/500] time 0.870 (1.027) data 0.000 (0.129) loss 1.0293 (0.9031) acc 68.7500 (75.6250) lr 1.4258e-03 eta 4:25:08 +epoch [20/50] batch [10/500] time 0.865 (0.960) data 0.000 (0.065) loss 0.8184 (1.0091) acc 71.8750 (73.4375) lr 1.4258e-03 eta 4:07:45 +epoch [20/50] batch [15/500] time 0.892 (0.939) data 0.000 (0.043) loss 0.7666 (1.0685) acc 87.5000 (72.7083) lr 1.4258e-03 eta 4:02:20 +epoch [20/50] batch [20/500] time 0.889 (0.925) data 0.000 (0.032) loss 0.9517 (1.0745) acc 78.1250 (73.7500) lr 1.4258e-03 eta 3:58:33 +epoch [20/50] batch [25/500] time 0.867 (0.917) data 0.000 (0.026) loss 1.3467 (1.1152) acc 75.0000 (72.8750) lr 1.4258e-03 eta 3:56:30 +epoch [20/50] batch [30/500] time 0.899 (0.912) data 0.000 (0.022) loss 1.0498 (1.1166) acc 68.7500 (72.7083) lr 1.4258e-03 eta 3:55:15 +epoch [20/50] batch [35/500] time 0.849 (0.907) data 0.000 (0.019) loss 0.7290 (1.0852) acc 84.3750 (73.3036) lr 1.4258e-03 eta 3:53:43 +epoch [20/50] batch [40/500] time 0.897 (0.904) data 0.000 (0.016) loss 1.3662 (1.0916) acc 65.6250 (73.1250) lr 1.4258e-03 eta 3:53:02 +epoch [20/50] batch [45/500] time 0.876 (0.905) data 0.000 (0.015) loss 1.5293 (1.0869) acc 71.8750 (73.3333) lr 1.4258e-03 eta 3:53:05 +epoch [20/50] batch [50/500] time 0.861 (0.902) data 0.000 (0.013) loss 0.6963 (1.0647) acc 84.3750 (73.6875) lr 1.4258e-03 eta 3:52:19 +epoch [20/50] batch [55/500] time 0.884 (0.900) data 0.000 (0.012) loss 0.9458 (1.0666) acc 71.8750 (73.5227) lr 1.4258e-03 eta 3:51:40 +epoch [20/50] batch [60/500] time 0.905 (0.899) data 0.000 (0.011) loss 1.2676 (1.0588) acc 56.2500 (73.4375) lr 1.4258e-03 eta 3:51:13 +epoch [20/50] batch [65/500] time 0.887 (0.898) data 0.000 (0.010) loss 1.0391 (1.0706) acc 75.0000 (73.0288) lr 1.4258e-03 eta 3:50:59 +epoch [20/50] batch [70/500] time 0.901 (0.897) data 0.000 (0.009) loss 1.2188 (1.0838) acc 75.0000 (73.0357) lr 1.4258e-03 eta 3:50:36 +epoch [20/50] batch [75/500] time 0.900 (0.896) data 0.000 (0.009) loss 0.9482 (1.0802) acc 81.2500 (73.2083) lr 1.4258e-03 eta 3:50:13 +epoch [20/50] batch [80/500] time 0.873 (0.895) data 0.000 (0.008) loss 0.6152 (1.0619) acc 84.3750 (73.6719) lr 1.4258e-03 eta 3:49:56 +epoch [20/50] batch [85/500] time 0.859 (0.894) data 0.000 (0.008) loss 0.8223 (1.0533) acc 78.1250 (73.7868) lr 1.4258e-03 eta 3:49:36 +epoch [20/50] batch [90/500] time 0.890 (0.895) data 0.000 (0.007) loss 0.8901 (1.0444) acc 71.8750 (73.6111) lr 1.4258e-03 eta 3:49:48 +epoch [20/50] batch [95/500] time 0.917 (0.895) data 0.000 (0.007) loss 1.3623 (1.0457) acc 75.0000 (73.6842) lr 1.4258e-03 eta 3:49:42 +epoch [20/50] batch [100/500] time 0.874 (0.895) data 0.000 (0.007) loss 0.9668 (1.0443) acc 71.8750 (73.8438) lr 1.4258e-03 eta 3:49:38 +epoch [20/50] batch [105/500] time 0.885 (0.894) data 0.000 (0.006) loss 0.6582 (1.0367) acc 87.5000 (73.9881) lr 1.4258e-03 eta 3:49:26 +epoch [20/50] batch [110/500] time 0.856 (0.894) data 0.000 (0.006) loss 1.1094 (1.0401) acc 78.1250 (74.0057) lr 1.4258e-03 eta 3:49:12 +epoch [20/50] batch [115/500] time 0.856 (0.894) data 0.000 (0.006) loss 1.8760 (1.0543) acc 65.6250 (73.7772) lr 1.4258e-03 eta 3:49:08 +epoch [20/50] batch [120/500] time 0.872 (0.893) data 0.000 (0.006) loss 1.2305 (1.0532) acc 59.3750 (73.6458) lr 1.4258e-03 eta 3:49:00 +epoch [20/50] batch [125/500] time 0.906 (0.893) data 0.000 (0.005) loss 1.5889 (1.0529) acc 68.7500 (73.8500) lr 1.4258e-03 eta 3:48:53 +epoch [20/50] batch [130/500] time 0.870 (0.893) data 0.000 (0.005) loss 1.2373 (1.0552) acc 68.7500 (73.6779) lr 1.4258e-03 eta 3:48:46 +epoch [20/50] batch [135/500] time 0.879 (0.893) data 0.000 (0.005) loss 0.9453 (1.0531) acc 71.8750 (73.6574) lr 1.4258e-03 eta 3:48:44 +epoch [20/50] batch [140/500] time 0.871 (0.893) data 0.000 (0.005) loss 1.0723 (1.0585) acc 78.1250 (73.5714) lr 1.4258e-03 eta 3:48:37 +epoch [20/50] batch [145/500] time 0.888 (0.893) data 0.000 (0.005) loss 1.1865 (1.0616) acc 71.8750 (73.6207) lr 1.4258e-03 eta 3:48:27 +epoch [20/50] batch [150/500] time 0.899 (0.893) data 0.000 (0.005) loss 0.9053 (1.0605) acc 68.7500 (73.5833) lr 1.4258e-03 eta 3:48:19 +epoch [20/50] batch [155/500] time 0.880 (0.892) data 0.000 (0.004) loss 1.2129 (1.0698) acc 65.6250 (73.4274) lr 1.4258e-03 eta 3:48:09 +epoch [20/50] batch [160/500] time 0.871 (0.892) data 0.000 (0.004) loss 0.9390 (1.0683) acc 71.8750 (73.4375) lr 1.4258e-03 eta 3:48:02 +epoch [20/50] batch [165/500] time 0.861 (0.891) data 0.000 (0.004) loss 1.1230 (1.0738) acc 68.7500 (73.2576) lr 1.4258e-03 eta 3:47:49 +epoch [20/50] batch [170/500] time 0.885 (0.891) data 0.000 (0.004) loss 0.2976 (1.0786) acc 90.6250 (73.1250) lr 1.4258e-03 eta 3:47:40 +epoch [20/50] batch [175/500] time 0.891 (0.891) data 0.000 (0.004) loss 0.9048 (1.0784) acc 75.0000 (73.1786) lr 1.4258e-03 eta 3:47:32 +epoch [20/50] batch [180/500] time 0.888 (0.890) data 0.000 (0.004) loss 1.3115 (1.0806) acc 59.3750 (73.0729) lr 1.4258e-03 eta 3:47:21 +epoch [20/50] batch [185/500] time 0.866 (0.890) data 0.000 (0.004) loss 0.6074 (1.0791) acc 87.5000 (73.0574) lr 1.4258e-03 eta 3:47:10 +epoch [20/50] batch [190/500] time 0.891 (0.890) data 0.000 (0.004) loss 1.1641 (1.0827) acc 75.0000 (73.1086) lr 1.4258e-03 eta 3:47:08 +epoch [20/50] batch [195/500] time 0.856 (0.889) data 0.000 (0.004) loss 1.0098 (1.0847) acc 75.0000 (73.0449) lr 1.4258e-03 eta 3:46:53 +epoch [20/50] batch [200/500] time 0.854 (0.889) data 0.000 (0.003) loss 0.9819 (1.0916) acc 81.2500 (72.8438) lr 1.4258e-03 eta 3:46:42 +epoch [20/50] batch [205/500] time 0.895 (0.889) data 0.000 (0.003) loss 0.9854 (1.0866) acc 81.2500 (72.9573) lr 1.4258e-03 eta 3:46:42 +epoch [20/50] batch [210/500] time 0.872 (0.889) data 0.000 (0.003) loss 1.6416 (1.0890) acc 68.7500 (72.8869) lr 1.4258e-03 eta 3:46:39 +epoch [20/50] batch [215/500] time 0.906 (0.889) data 0.000 (0.003) loss 1.2090 (1.0869) acc 71.8750 (72.9797) lr 1.4258e-03 eta 3:46:31 +epoch [20/50] batch [220/500] time 0.875 (0.889) data 0.000 (0.003) loss 0.9414 (1.0857) acc 81.2500 (72.8977) lr 1.4258e-03 eta 3:46:22 +epoch [20/50] batch [225/500] time 0.860 (0.888) data 0.000 (0.003) loss 1.3555 (1.0872) acc 65.6250 (72.9167) lr 1.4258e-03 eta 3:46:10 +epoch [20/50] batch [230/500] time 0.848 (0.888) data 0.000 (0.003) loss 1.2070 (1.0923) acc 71.8750 (72.8533) lr 1.4258e-03 eta 3:45:58 +epoch [20/50] batch [235/500] time 0.891 (0.888) data 0.000 (0.003) loss 0.9355 (1.0898) acc 71.8750 (72.8723) lr 1.4258e-03 eta 3:46:02 +epoch [20/50] batch [240/500] time 0.908 (0.888) data 0.000 (0.003) loss 1.5029 (1.0894) acc 56.2500 (72.8776) lr 1.4258e-03 eta 3:45:58 +epoch [20/50] batch [245/500] time 0.883 (0.888) data 0.000 (0.003) loss 0.9551 (1.0877) acc 78.1250 (72.8699) lr 1.4258e-03 eta 3:45:49 +epoch [20/50] batch [250/500] time 0.896 (0.888) data 0.000 (0.003) loss 1.3223 (1.0882) acc 62.5000 (72.8500) lr 1.4258e-03 eta 3:45:48 +epoch [20/50] batch [255/500] time 0.905 (0.889) data 0.000 (0.003) loss 0.8052 (1.0852) acc 71.8750 (72.8799) lr 1.4258e-03 eta 3:45:45 +epoch [20/50] batch [260/500] time 0.881 (0.888) data 0.000 (0.003) loss 1.4570 (1.0903) acc 65.6250 (72.7524) lr 1.4258e-03 eta 3:45:37 +epoch [20/50] batch [265/500] time 0.877 (0.888) data 0.000 (0.003) loss 1.3486 (1.0889) acc 62.5000 (72.7948) lr 1.4258e-03 eta 3:45:27 +epoch [20/50] batch [270/500] time 0.885 (0.888) data 0.000 (0.003) loss 1.2178 (1.0862) acc 75.0000 (72.8009) lr 1.4258e-03 eta 3:45:20 +epoch [20/50] batch [275/500] time 0.871 (0.888) data 0.000 (0.003) loss 0.7183 (1.0856) acc 78.1250 (72.8636) lr 1.4258e-03 eta 3:45:12 +epoch [20/50] batch [280/500] time 0.877 (0.888) data 0.000 (0.003) loss 1.2168 (1.0845) acc 62.5000 (72.8460) lr 1.4258e-03 eta 3:45:07 +epoch [20/50] batch [285/500] time 0.896 (0.887) data 0.000 (0.002) loss 0.8579 (1.0808) acc 65.6250 (72.8399) lr 1.4258e-03 eta 3:45:02 +epoch [20/50] batch [290/500] time 0.874 (0.887) data 0.000 (0.002) loss 1.3711 (1.0811) acc 71.8750 (72.8448) lr 1.4258e-03 eta 3:44:57 +epoch [20/50] batch [295/500] time 0.877 (0.887) data 0.000 (0.002) loss 0.8418 (1.0788) acc 78.1250 (72.8814) lr 1.4258e-03 eta 3:44:51 +epoch [20/50] batch [300/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.0820 (1.0777) acc 65.6250 (72.8229) lr 1.4258e-03 eta 3:44:45 +epoch [20/50] batch [305/500] time 0.876 (0.887) data 0.000 (0.002) loss 1.0537 (1.0787) acc 71.8750 (72.7459) lr 1.4258e-03 eta 3:44:40 +epoch [20/50] batch [310/500] time 0.852 (0.887) data 0.000 (0.002) loss 1.0586 (1.0784) acc 87.5000 (72.7923) lr 1.4258e-03 eta 3:44:33 +epoch [20/50] batch [315/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.4814 (1.0805) acc 68.7500 (72.7679) lr 1.4258e-03 eta 3:44:28 +epoch [20/50] batch [320/500] time 0.859 (0.887) data 0.000 (0.002) loss 1.3428 (1.0802) acc 65.6250 (72.7734) lr 1.4258e-03 eta 3:44:23 +epoch [20/50] batch [325/500] time 0.858 (0.887) data 0.000 (0.002) loss 1.2041 (1.0787) acc 75.0000 (72.8654) lr 1.4258e-03 eta 3:44:17 +epoch [20/50] batch [330/500] time 0.983 (0.887) data 0.000 (0.002) loss 0.9146 (1.0784) acc 75.0000 (72.8125) lr 1.4258e-03 eta 3:44:14 +epoch [20/50] batch [335/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.0352 (1.0776) acc 75.0000 (72.7892) lr 1.4258e-03 eta 3:44:06 +epoch [20/50] batch [340/500] time 0.887 (0.886) data 0.000 (0.002) loss 1.1230 (1.0815) acc 68.7500 (72.7574) lr 1.4258e-03 eta 3:43:58 +epoch [20/50] batch [345/500] time 0.860 (0.886) data 0.000 (0.002) loss 1.3281 (1.0819) acc 68.7500 (72.7355) lr 1.4258e-03 eta 3:43:50 +epoch [20/50] batch [350/500] time 0.898 (0.886) data 0.000 (0.002) loss 1.2520 (1.0852) acc 68.7500 (72.6518) lr 1.4258e-03 eta 3:43:47 +epoch [20/50] batch [355/500] time 0.903 (0.886) data 0.000 (0.002) loss 1.2969 (1.0853) acc 59.3750 (72.6232) lr 1.4258e-03 eta 3:43:45 +epoch [20/50] batch [360/500] time 0.885 (0.886) data 0.001 (0.002) loss 0.6367 (1.0857) acc 87.5000 (72.6042) lr 1.4258e-03 eta 3:43:38 +epoch [20/50] batch [365/500] time 0.909 (0.887) data 0.000 (0.002) loss 1.5322 (1.0876) acc 59.3750 (72.5428) lr 1.4258e-03 eta 3:43:37 +epoch [20/50] batch [370/500] time 0.843 (0.886) data 0.000 (0.002) loss 0.9170 (1.0882) acc 81.2500 (72.5507) lr 1.4258e-03 eta 3:43:29 +epoch [20/50] batch [375/500] time 0.875 (0.886) data 0.000 (0.002) loss 1.0762 (1.0878) acc 68.7500 (72.5333) lr 1.4258e-03 eta 3:43:27 +epoch [20/50] batch [380/500] time 0.887 (0.886) data 0.000 (0.002) loss 0.6958 (1.0842) acc 75.0000 (72.6151) lr 1.4258e-03 eta 3:43:23 +epoch [20/50] batch [385/500] time 0.914 (0.886) data 0.000 (0.002) loss 1.3945 (1.0860) acc 59.3750 (72.5325) lr 1.4258e-03 eta 3:43:17 +epoch [20/50] batch [390/500] time 0.874 (0.886) data 0.000 (0.002) loss 1.2637 (1.0878) acc 65.6250 (72.4599) lr 1.4258e-03 eta 3:43:11 +epoch [20/50] batch [395/500] time 0.899 (0.886) data 0.000 (0.002) loss 1.4707 (1.0869) acc 65.6250 (72.4446) lr 1.4258e-03 eta 3:43:08 +epoch [20/50] batch [400/500] time 0.909 (0.887) data 0.000 (0.002) loss 1.0400 (1.0858) acc 71.8750 (72.5000) lr 1.4258e-03 eta 3:43:07 +epoch [20/50] batch [405/500] time 0.903 (0.887) data 0.000 (0.002) loss 0.9541 (1.0832) acc 78.1250 (72.5849) lr 1.4258e-03 eta 3:43:02 +epoch [20/50] batch [410/500] time 0.871 (0.886) data 0.000 (0.002) loss 1.4082 (1.0827) acc 62.5000 (72.5534) lr 1.4258e-03 eta 3:42:55 +epoch [20/50] batch [415/500] time 0.875 (0.886) data 0.000 (0.002) loss 1.1914 (1.0844) acc 71.8750 (72.5602) lr 1.4258e-03 eta 3:42:48 +epoch [20/50] batch [420/500] time 0.872 (0.886) data 0.000 (0.002) loss 1.0176 (1.0832) acc 71.8750 (72.5670) lr 1.4258e-03 eta 3:42:42 +epoch [20/50] batch [425/500] time 0.901 (0.886) data 0.000 (0.002) loss 0.5029 (1.0803) acc 87.5000 (72.6250) lr 1.4258e-03 eta 3:42:37 +epoch [20/50] batch [430/500] time 0.866 (0.886) data 0.000 (0.002) loss 1.1484 (1.0827) acc 78.1250 (72.6090) lr 1.4258e-03 eta 3:42:31 +epoch [20/50] batch [435/500] time 0.857 (0.886) data 0.000 (0.002) loss 0.4231 (1.0830) acc 84.3750 (72.5862) lr 1.4258e-03 eta 3:42:27 +epoch [20/50] batch [440/500] time 0.873 (0.886) data 0.000 (0.002) loss 1.0107 (1.0834) acc 71.8750 (72.5852) lr 1.4258e-03 eta 3:42:22 +epoch [20/50] batch [445/500] time 0.874 (0.886) data 0.000 (0.002) loss 0.9307 (1.0818) acc 65.6250 (72.5562) lr 1.4258e-03 eta 3:42:16 +epoch [20/50] batch [450/500] time 0.873 (0.886) data 0.000 (0.002) loss 1.8750 (1.0855) acc 62.5000 (72.5139) lr 1.4258e-03 eta 3:42:12 +epoch [20/50] batch [455/500] time 0.916 (0.886) data 0.000 (0.002) loss 0.6704 (1.0867) acc 84.3750 (72.5137) lr 1.4258e-03 eta 3:42:08 +epoch [20/50] batch [460/500] time 0.908 (0.886) data 0.000 (0.002) loss 1.0850 (1.0874) acc 68.7500 (72.5136) lr 1.4258e-03 eta 3:42:02 +epoch [20/50] batch [465/500] time 0.886 (0.886) data 0.000 (0.002) loss 1.4570 (1.0878) acc 65.6250 (72.4866) lr 1.4258e-03 eta 3:41:57 +epoch [20/50] batch [470/500] time 0.898 (0.886) data 0.000 (0.002) loss 0.4722 (1.0852) acc 87.5000 (72.5266) lr 1.4258e-03 eta 3:41:51 +epoch [20/50] batch [475/500] time 0.846 (0.886) data 0.000 (0.002) loss 1.2451 (1.0860) acc 75.0000 (72.5461) lr 1.4258e-03 eta 3:41:47 +epoch [20/50] batch [480/500] time 0.882 (0.886) data 0.000 (0.002) loss 1.0840 (1.0863) acc 75.0000 (72.5846) lr 1.4258e-03 eta 3:41:40 +epoch [20/50] batch [485/500] time 0.884 (0.886) data 0.000 (0.002) loss 1.6006 (1.0855) acc 59.3750 (72.6095) lr 1.4258e-03 eta 3:41:37 +epoch [20/50] batch [490/500] time 0.882 (0.886) data 0.000 (0.002) loss 1.3652 (1.0842) acc 53.1250 (72.6148) lr 1.4258e-03 eta 3:41:33 +epoch [20/50] batch [495/500] time 0.863 (0.885) data 0.000 (0.002) loss 0.9287 (1.0853) acc 75.0000 (72.6136) lr 1.4258e-03 eta 3:41:25 +epoch [20/50] batch [500/500] time 0.908 (0.885) data 0.000 (0.002) loss 1.8008 (1.0844) acc 71.8750 (72.6813) lr 1.3681e-03 eta 3:41:20 +epoch [21/50] batch [5/500] time 0.879 (1.032) data 0.000 (0.136) loss 1.2061 (1.2308) acc 71.8750 (69.3750) lr 1.3681e-03 eta 4:17:56 +epoch [21/50] batch [10/500] time 0.857 (0.962) data 0.000 (0.068) loss 1.0762 (1.0793) acc 75.0000 (72.5000) lr 1.3681e-03 eta 4:00:27 +epoch [21/50] batch [15/500] time 0.861 (0.935) data 0.000 (0.045) loss 1.0244 (1.0627) acc 75.0000 (74.1667) lr 1.3681e-03 eta 3:53:30 +epoch [21/50] batch [20/500] time 0.922 (0.924) data 0.000 (0.034) loss 1.0537 (1.0434) acc 78.1250 (75.0000) lr 1.3681e-03 eta 3:50:43 +epoch [21/50] batch [25/500] time 0.886 (0.922) data 0.000 (0.027) loss 1.1309 (1.0312) acc 65.6250 (74.5000) lr 1.3681e-03 eta 3:49:59 +epoch [21/50] batch [30/500] time 0.886 (0.919) data 0.000 (0.023) loss 0.7173 (1.0296) acc 87.5000 (74.7917) lr 1.3681e-03 eta 3:49:10 +epoch [21/50] batch [35/500] time 0.911 (0.914) data 0.000 (0.020) loss 1.0811 (1.0235) acc 71.8750 (74.5536) lr 1.3681e-03 eta 3:47:58 +epoch [21/50] batch [40/500] time 0.868 (0.910) data 0.000 (0.017) loss 1.1709 (1.0231) acc 75.0000 (74.5312) lr 1.3681e-03 eta 3:46:59 +epoch [21/50] batch [45/500] time 0.853 (0.907) data 0.000 (0.015) loss 1.1289 (1.0321) acc 78.1250 (74.2361) lr 1.3681e-03 eta 3:46:00 +epoch [21/50] batch [50/500] time 0.905 (0.906) data 0.000 (0.014) loss 0.9482 (1.0243) acc 81.2500 (74.4375) lr 1.3681e-03 eta 3:45:50 +epoch [21/50] batch [55/500] time 0.887 (0.905) data 0.000 (0.013) loss 1.0791 (1.0150) acc 68.7500 (74.5455) lr 1.3681e-03 eta 3:45:19 +epoch [21/50] batch [60/500] time 0.868 (0.902) data 0.000 (0.012) loss 0.9937 (1.0260) acc 71.8750 (74.2188) lr 1.3681e-03 eta 3:44:39 +epoch [21/50] batch [65/500] time 0.858 (0.900) data 0.000 (0.011) loss 1.2129 (1.0314) acc 71.8750 (74.0865) lr 1.3681e-03 eta 3:43:57 +epoch [21/50] batch [70/500] time 0.864 (0.900) data 0.000 (0.010) loss 0.4465 (1.0200) acc 84.3750 (74.2857) lr 1.3681e-03 eta 3:43:52 +epoch [21/50] batch [75/500] time 0.892 (0.898) data 0.000 (0.009) loss 0.8242 (1.0102) acc 81.2500 (74.4583) lr 1.3681e-03 eta 3:43:28 +epoch [21/50] batch [80/500] time 0.912 (0.898) data 0.000 (0.009) loss 1.2842 (1.0079) acc 62.5000 (74.4531) lr 1.3681e-03 eta 3:43:13 +epoch [21/50] batch [85/500] time 0.900 (0.898) data 0.000 (0.008) loss 1.3242 (1.0183) acc 65.6250 (74.2279) lr 1.3681e-03 eta 3:43:13 +epoch [21/50] batch [90/500] time 0.878 (0.897) data 0.000 (0.008) loss 0.8057 (1.0115) acc 78.1250 (74.3403) lr 1.3681e-03 eta 3:42:55 +epoch [21/50] batch [95/500] time 0.867 (0.896) data 0.000 (0.007) loss 0.9980 (1.0032) acc 81.2500 (74.5395) lr 1.3681e-03 eta 3:42:29 +epoch [21/50] batch [100/500] time 0.870 (0.895) data 0.000 (0.007) loss 0.7393 (1.0092) acc 75.0000 (74.2188) lr 1.3681e-03 eta 3:42:13 +epoch [21/50] batch [105/500] time 0.897 (0.895) data 0.000 (0.007) loss 0.7842 (1.0077) acc 75.0000 (74.1667) lr 1.3681e-03 eta 3:42:03 +epoch [21/50] batch [110/500] time 0.857 (0.894) data 0.000 (0.006) loss 0.4792 (1.0004) acc 93.7500 (74.3182) lr 1.3681e-03 eta 3:41:50 +epoch [21/50] batch [115/500] time 0.856 (0.893) data 0.000 (0.006) loss 1.1670 (1.0051) acc 65.6250 (74.1848) lr 1.3681e-03 eta 3:41:33 +epoch [21/50] batch [120/500] time 0.905 (0.893) data 0.000 (0.006) loss 0.8247 (1.0080) acc 68.7500 (74.0104) lr 1.3681e-03 eta 3:41:25 +epoch [21/50] batch [125/500] time 0.913 (0.892) data 0.000 (0.006) loss 0.6235 (1.0053) acc 78.1250 (73.9500) lr 1.3681e-03 eta 3:41:14 +epoch [21/50] batch [130/500] time 0.885 (0.891) data 0.000 (0.005) loss 1.0430 (1.0202) acc 75.0000 (73.6298) lr 1.3681e-03 eta 3:40:56 +epoch [21/50] batch [135/500] time 0.884 (0.891) data 0.000 (0.005) loss 1.5391 (1.0217) acc 65.6250 (73.6806) lr 1.3681e-03 eta 3:40:44 +epoch [21/50] batch [140/500] time 0.898 (0.890) data 0.000 (0.005) loss 1.1992 (1.0236) acc 68.7500 (73.5714) lr 1.3681e-03 eta 3:40:32 +epoch [21/50] batch [145/500] time 0.847 (0.890) data 0.000 (0.005) loss 1.3096 (1.0228) acc 62.5000 (73.5560) lr 1.3681e-03 eta 3:40:18 +epoch [21/50] batch [150/500] time 0.888 (0.889) data 0.000 (0.005) loss 1.3066 (1.0258) acc 59.3750 (73.5000) lr 1.3681e-03 eta 3:40:07 +epoch [21/50] batch [155/500] time 0.889 (0.889) data 0.000 (0.005) loss 1.4629 (1.0277) acc 71.8750 (73.5081) lr 1.3681e-03 eta 3:39:58 +epoch [21/50] batch [160/500] time 0.865 (0.890) data 0.000 (0.004) loss 1.0850 (1.0326) acc 68.7500 (73.3984) lr 1.3681e-03 eta 3:40:02 +epoch [21/50] batch [165/500] time 0.864 (0.890) data 0.000 (0.004) loss 1.2705 (1.0379) acc 65.6250 (73.2576) lr 1.3681e-03 eta 3:40:05 +epoch [21/50] batch [170/500] time 0.891 (0.890) data 0.000 (0.004) loss 0.9126 (1.0372) acc 78.1250 (73.2353) lr 1.3681e-03 eta 3:39:53 +epoch [21/50] batch [175/500] time 0.880 (0.889) data 0.000 (0.004) loss 1.2939 (1.0410) acc 65.6250 (73.0714) lr 1.3681e-03 eta 3:39:45 +epoch [21/50] batch [180/500] time 0.880 (0.889) data 0.000 (0.004) loss 0.9966 (1.0454) acc 78.1250 (72.9688) lr 1.3681e-03 eta 3:39:38 +epoch [21/50] batch [185/500] time 0.922 (0.889) data 0.000 (0.004) loss 0.9995 (1.0454) acc 71.8750 (73.0574) lr 1.3681e-03 eta 3:39:34 +epoch [21/50] batch [190/500] time 0.867 (0.889) data 0.000 (0.004) loss 1.4551 (1.0494) acc 62.5000 (72.9441) lr 1.3681e-03 eta 3:39:28 +epoch [21/50] batch [195/500] time 0.883 (0.889) data 0.000 (0.004) loss 1.2734 (1.0565) acc 62.5000 (72.8365) lr 1.3681e-03 eta 3:39:24 +epoch [21/50] batch [200/500] time 0.888 (0.889) data 0.000 (0.004) loss 0.6362 (1.0562) acc 84.3750 (72.8906) lr 1.3681e-03 eta 3:39:17 +epoch [21/50] batch [205/500] time 0.877 (0.889) data 0.000 (0.004) loss 1.8242 (1.0585) acc 62.5000 (72.8963) lr 1.3681e-03 eta 3:39:09 +epoch [21/50] batch [210/500] time 0.906 (0.889) data 0.000 (0.003) loss 1.0537 (1.0580) acc 65.6250 (72.9018) lr 1.3681e-03 eta 3:39:15 +epoch [21/50] batch [215/500] time 0.875 (0.889) data 0.000 (0.003) loss 0.7954 (1.0548) acc 90.6250 (72.9942) lr 1.3681e-03 eta 3:39:09 +epoch [21/50] batch [220/500] time 0.908 (0.889) data 0.000 (0.003) loss 0.4707 (1.0576) acc 93.7500 (72.9972) lr 1.3681e-03 eta 3:39:03 +epoch [21/50] batch [225/500] time 0.886 (0.889) data 0.000 (0.003) loss 1.1670 (1.0571) acc 71.8750 (73.0278) lr 1.3681e-03 eta 3:38:58 +epoch [21/50] batch [230/500] time 0.893 (0.889) data 0.000 (0.003) loss 0.9785 (1.0568) acc 75.0000 (73.1114) lr 1.3681e-03 eta 3:38:57 +epoch [21/50] batch [235/500] time 0.907 (0.889) data 0.000 (0.003) loss 0.9170 (1.0606) acc 68.7500 (72.9920) lr 1.3681e-03 eta 3:38:52 +epoch [21/50] batch [240/500] time 0.876 (0.889) data 0.000 (0.003) loss 0.8740 (1.0601) acc 71.8750 (73.0599) lr 1.3681e-03 eta 3:38:46 +epoch [21/50] batch [245/500] time 0.906 (0.889) data 0.000 (0.003) loss 1.1035 (1.0615) acc 75.0000 (72.9974) lr 1.3681e-03 eta 3:38:39 +epoch [21/50] batch [250/500] time 0.872 (0.889) data 0.000 (0.003) loss 1.4531 (1.0650) acc 65.6250 (73.0000) lr 1.3681e-03 eta 3:38:31 +epoch [21/50] batch [255/500] time 0.901 (0.889) data 0.000 (0.003) loss 1.2900 (1.0624) acc 68.7500 (73.1005) lr 1.3681e-03 eta 3:38:26 +epoch [21/50] batch [260/500] time 0.887 (0.889) data 0.000 (0.003) loss 1.5928 (1.0617) acc 56.2500 (73.0889) lr 1.3681e-03 eta 3:38:23 +epoch [21/50] batch [265/500] time 0.888 (0.889) data 0.000 (0.003) loss 0.5947 (1.0586) acc 81.2500 (73.1368) lr 1.3681e-03 eta 3:38:17 +epoch [21/50] batch [270/500] time 0.920 (0.889) data 0.000 (0.003) loss 0.9702 (1.0588) acc 65.6250 (73.0787) lr 1.3681e-03 eta 3:38:16 +epoch [21/50] batch [275/500] time 0.876 (0.889) data 0.000 (0.003) loss 0.7329 (1.0600) acc 75.0000 (73.0455) lr 1.3681e-03 eta 3:38:08 +epoch [21/50] batch [280/500] time 0.902 (0.889) data 0.000 (0.003) loss 1.0938 (1.0558) acc 78.1250 (73.1138) lr 1.3681e-03 eta 3:38:02 +epoch [21/50] batch [285/500] time 0.875 (0.889) data 0.000 (0.003) loss 0.6777 (1.0544) acc 84.3750 (73.0921) lr 1.3681e-03 eta 3:37:55 +epoch [21/50] batch [290/500] time 0.880 (0.888) data 0.000 (0.003) loss 0.9697 (1.0544) acc 81.2500 (73.1250) lr 1.3681e-03 eta 3:37:48 +epoch [21/50] batch [295/500] time 0.922 (0.889) data 0.000 (0.003) loss 0.6274 (1.0488) acc 84.3750 (73.2521) lr 1.3681e-03 eta 3:37:46 +epoch [21/50] batch [300/500] time 0.877 (0.888) data 0.000 (0.002) loss 1.1826 (1.0511) acc 75.0000 (73.1979) lr 1.3681e-03 eta 3:37:39 +epoch [21/50] batch [305/500] time 0.857 (0.888) data 0.000 (0.002) loss 1.1963 (1.0509) acc 68.7500 (73.1967) lr 1.3681e-03 eta 3:37:31 +epoch [21/50] batch [310/500] time 0.889 (0.888) data 0.000 (0.002) loss 1.0107 (1.0542) acc 75.0000 (73.0847) lr 1.3681e-03 eta 3:37:30 +epoch [21/50] batch [315/500] time 0.879 (0.888) data 0.000 (0.002) loss 0.8442 (1.0551) acc 68.7500 (73.0754) lr 1.3681e-03 eta 3:37:22 +epoch [21/50] batch [320/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.6357 (1.0590) acc 56.2500 (72.9980) lr 1.3681e-03 eta 3:37:13 +epoch [21/50] batch [325/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.3887 (1.0608) acc 65.6250 (72.9519) lr 1.3681e-03 eta 3:37:06 +epoch [21/50] batch [330/500] time 0.882 (0.888) data 0.000 (0.002) loss 1.1289 (1.0621) acc 71.8750 (72.9072) lr 1.3681e-03 eta 3:37:00 +epoch [21/50] batch [335/500] time 0.904 (0.887) data 0.000 (0.002) loss 1.0332 (1.0632) acc 71.8750 (72.9011) lr 1.3681e-03 eta 3:36:55 +epoch [21/50] batch [340/500] time 0.871 (0.887) data 0.000 (0.002) loss 1.7100 (1.0672) acc 68.7500 (72.8676) lr 1.3681e-03 eta 3:36:50 +epoch [21/50] batch [345/500] time 0.892 (0.888) data 0.000 (0.002) loss 0.8389 (1.0667) acc 71.8750 (72.8986) lr 1.3681e-03 eta 3:36:46 +epoch [21/50] batch [350/500] time 0.879 (0.887) data 0.000 (0.002) loss 1.3350 (1.0675) acc 68.7500 (72.8839) lr 1.3681e-03 eta 3:36:41 +epoch [21/50] batch [355/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.0391 (1.0672) acc 65.6250 (72.8169) lr 1.3681e-03 eta 3:36:37 +epoch [21/50] batch [360/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.0625 (1.0638) acc 71.8750 (72.8733) lr 1.3681e-03 eta 3:36:34 +epoch [21/50] batch [365/500] time 0.852 (0.887) data 0.000 (0.002) loss 0.7773 (1.0618) acc 84.3750 (72.9281) lr 1.3681e-03 eta 3:36:27 +epoch [21/50] batch [370/500] time 0.877 (0.887) data 0.000 (0.002) loss 1.3750 (1.0608) acc 68.7500 (72.9899) lr 1.3681e-03 eta 3:36:21 +epoch [21/50] batch [375/500] time 0.863 (0.887) data 0.000 (0.002) loss 0.9653 (1.0624) acc 75.0000 (72.9750) lr 1.3681e-03 eta 3:36:14 +epoch [21/50] batch [380/500] time 0.877 (0.887) data 0.000 (0.002) loss 1.6523 (1.0656) acc 65.6250 (72.9194) lr 1.3681e-03 eta 3:36:09 +epoch [21/50] batch [385/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.5049 (1.0676) acc 68.7500 (72.8896) lr 1.3681e-03 eta 3:36:02 +epoch [21/50] batch [390/500] time 0.858 (0.887) data 0.000 (0.002) loss 1.9062 (1.0694) acc 46.8750 (72.7965) lr 1.3681e-03 eta 3:35:56 +epoch [21/50] batch [395/500] time 0.881 (0.887) data 0.000 (0.002) loss 1.2930 (1.0689) acc 62.5000 (72.7848) lr 1.3681e-03 eta 3:35:51 +epoch [21/50] batch [400/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.1123 (1.0675) acc 68.7500 (72.8203) lr 1.3681e-03 eta 3:35:46 +epoch [21/50] batch [405/500] time 0.874 (0.887) data 0.000 (0.002) loss 1.0830 (1.0650) acc 75.0000 (72.8858) lr 1.3681e-03 eta 3:35:40 +epoch [21/50] batch [410/500] time 0.868 (0.887) data 0.000 (0.002) loss 1.8037 (1.0707) acc 53.1250 (72.7210) lr 1.3681e-03 eta 3:35:35 +epoch [21/50] batch [415/500] time 0.913 (0.887) data 0.000 (0.002) loss 0.4875 (1.0715) acc 78.1250 (72.6506) lr 1.3681e-03 eta 3:35:31 +epoch [21/50] batch [420/500] time 0.883 (0.886) data 0.000 (0.002) loss 0.8784 (1.0742) acc 78.1250 (72.6190) lr 1.3681e-03 eta 3:35:24 +epoch [21/50] batch [425/500] time 0.902 (0.887) data 0.000 (0.002) loss 1.5225 (1.0737) acc 50.0000 (72.6324) lr 1.3681e-03 eta 3:35:21 +epoch [21/50] batch [430/500] time 0.886 (0.886) data 0.000 (0.002) loss 1.2959 (1.0737) acc 59.3750 (72.6308) lr 1.3681e-03 eta 3:35:16 +epoch [21/50] batch [435/500] time 0.848 (0.886) data 0.000 (0.002) loss 1.1221 (1.0722) acc 81.2500 (72.6940) lr 1.3681e-03 eta 3:35:08 +epoch [21/50] batch [440/500] time 0.910 (0.886) data 0.000 (0.002) loss 0.9287 (1.0736) acc 75.0000 (72.6207) lr 1.3681e-03 eta 3:35:04 +epoch [21/50] batch [445/500] time 0.902 (0.886) data 0.000 (0.002) loss 1.1309 (1.0731) acc 71.8750 (72.6264) lr 1.3681e-03 eta 3:35:02 +epoch [21/50] batch [450/500] time 1.011 (0.887) data 0.000 (0.002) loss 1.5029 (1.0739) acc 56.2500 (72.6042) lr 1.3681e-03 eta 3:35:04 +epoch [21/50] batch [455/500] time 0.911 (0.887) data 0.000 (0.002) loss 1.1338 (1.0730) acc 75.0000 (72.6442) lr 1.3681e-03 eta 3:34:59 +epoch [21/50] batch [460/500] time 0.860 (0.887) data 0.000 (0.002) loss 1.0400 (1.0730) acc 75.0000 (72.6495) lr 1.3681e-03 eta 3:34:53 +epoch [21/50] batch [465/500] time 0.899 (0.887) data 0.000 (0.002) loss 1.5918 (1.0726) acc 71.8750 (72.7016) lr 1.3681e-03 eta 3:34:48 +epoch [21/50] batch [470/500] time 0.875 (0.887) data 0.000 (0.002) loss 1.3936 (1.0729) acc 71.8750 (72.6729) lr 1.3681e-03 eta 3:34:46 +epoch [21/50] batch [475/500] time 0.883 (0.887) data 0.000 (0.002) loss 0.8901 (1.0701) acc 71.8750 (72.7105) lr 1.3681e-03 eta 3:34:40 +epoch [21/50] batch [480/500] time 0.857 (0.887) data 0.000 (0.002) loss 1.3428 (1.0719) acc 68.7500 (72.6758) lr 1.3681e-03 eta 3:34:36 +epoch [21/50] batch [485/500] time 0.942 (0.887) data 0.000 (0.002) loss 1.5107 (1.0712) acc 71.8750 (72.7255) lr 1.3681e-03 eta 3:34:34 +epoch [21/50] batch [490/500] time 0.890 (0.887) data 0.000 (0.002) loss 1.0791 (1.0724) acc 75.0000 (72.7041) lr 1.3681e-03 eta 3:34:29 +epoch [21/50] batch [495/500] time 0.851 (0.887) data 0.000 (0.002) loss 0.8716 (1.0719) acc 78.1250 (72.7399) lr 1.3681e-03 eta 3:34:28 +epoch [21/50] batch [500/500] time 0.893 (0.887) data 0.000 (0.002) loss 0.7827 (1.0723) acc 81.2500 (72.7188) lr 1.3090e-03 eta 3:34:22 +epoch [22/50] batch [5/500] time 0.887 (1.040) data 0.000 (0.156) loss 1.6475 (1.1812) acc 59.3750 (68.1250) lr 1.3090e-03 eta 4:11:21 +epoch [22/50] batch [10/500] time 0.883 (0.966) data 0.000 (0.078) loss 0.6875 (1.0938) acc 81.2500 (71.5625) lr 1.3090e-03 eta 3:53:13 +epoch [22/50] batch [15/500] time 0.884 (0.939) data 0.000 (0.052) loss 1.1084 (1.0483) acc 78.1250 (72.9167) lr 1.3090e-03 eta 3:46:37 +epoch [22/50] batch [20/500] time 0.881 (0.924) data 0.000 (0.039) loss 1.0098 (1.0422) acc 78.1250 (73.5938) lr 1.3090e-03 eta 3:42:55 +epoch [22/50] batch [25/500] time 0.888 (0.916) data 0.000 (0.031) loss 1.4170 (1.0621) acc 62.5000 (72.3750) lr 1.3090e-03 eta 3:41:06 +epoch [22/50] batch [30/500] time 0.882 (0.911) data 0.000 (0.026) loss 0.9741 (1.0213) acc 75.0000 (72.9167) lr 1.3090e-03 eta 3:39:35 +epoch [22/50] batch [35/500] time 0.867 (0.907) data 0.000 (0.022) loss 1.1230 (1.0094) acc 71.8750 (73.2143) lr 1.3090e-03 eta 3:38:37 +epoch [22/50] batch [40/500] time 0.854 (0.907) data 0.000 (0.020) loss 1.0723 (1.0189) acc 71.8750 (72.8125) lr 1.3090e-03 eta 3:38:40 +epoch [22/50] batch [45/500] time 0.898 (0.905) data 0.000 (0.018) loss 1.1328 (1.0080) acc 71.8750 (72.9861) lr 1.3090e-03 eta 3:38:05 +epoch [22/50] batch [50/500] time 0.906 (0.902) data 0.000 (0.016) loss 0.7090 (1.0051) acc 87.5000 (73.1875) lr 1.3090e-03 eta 3:37:14 +epoch [22/50] batch [55/500] time 0.852 (0.899) data 0.000 (0.014) loss 0.9741 (1.0049) acc 68.7500 (73.0682) lr 1.3090e-03 eta 3:36:24 +epoch [22/50] batch [60/500] time 0.870 (0.897) data 0.000 (0.013) loss 0.6465 (1.0033) acc 84.3750 (73.4375) lr 1.3090e-03 eta 3:35:56 +epoch [22/50] batch [65/500] time 0.913 (0.897) data 0.000 (0.012) loss 0.8945 (1.0055) acc 78.1250 (73.7019) lr 1.3090e-03 eta 3:35:47 +epoch [22/50] batch [70/500] time 0.884 (0.896) data 0.000 (0.011) loss 0.7793 (1.0125) acc 68.7500 (73.5268) lr 1.3090e-03 eta 3:35:34 +epoch [22/50] batch [75/500] time 0.907 (0.895) data 0.000 (0.011) loss 1.2100 (1.0056) acc 71.8750 (73.6667) lr 1.3090e-03 eta 3:35:16 +epoch [22/50] batch [80/500] time 0.902 (0.895) data 0.000 (0.010) loss 1.2207 (1.0156) acc 68.7500 (73.7109) lr 1.3090e-03 eta 3:35:03 +epoch [22/50] batch [85/500] time 0.889 (0.894) data 0.000 (0.009) loss 0.9683 (1.0142) acc 71.8750 (73.8603) lr 1.3090e-03 eta 3:34:46 +epoch [22/50] batch [90/500] time 0.894 (0.894) data 0.000 (0.009) loss 0.6528 (1.0111) acc 78.1250 (73.9931) lr 1.3090e-03 eta 3:34:36 +epoch [22/50] batch [95/500] time 0.898 (0.893) data 0.000 (0.008) loss 0.9580 (1.0030) acc 75.0000 (74.0132) lr 1.3090e-03 eta 3:34:27 +epoch [22/50] batch [100/500] time 0.864 (0.893) data 0.000 (0.008) loss 1.1240 (1.0178) acc 75.0000 (73.8125) lr 1.3090e-03 eta 3:34:15 +epoch [22/50] batch [105/500] time 0.866 (0.892) data 0.000 (0.008) loss 0.8848 (1.0242) acc 87.5000 (73.7202) lr 1.3090e-03 eta 3:34:03 +epoch [22/50] batch [110/500] time 0.884 (0.892) data 0.000 (0.007) loss 0.8945 (1.0299) acc 78.1250 (73.6080) lr 1.3090e-03 eta 3:34:02 +epoch [22/50] batch [115/500] time 0.895 (0.892) data 0.000 (0.007) loss 0.7563 (1.0291) acc 71.8750 (73.6413) lr 1.3090e-03 eta 3:33:58 +epoch [22/50] batch [120/500] time 0.897 (0.892) data 0.000 (0.007) loss 0.8994 (1.0325) acc 75.0000 (73.6458) lr 1.3090e-03 eta 3:33:54 +epoch [22/50] batch [125/500] time 0.904 (0.892) data 0.000 (0.006) loss 0.7344 (1.0307) acc 78.1250 (73.6750) lr 1.3090e-03 eta 3:33:45 +epoch [22/50] batch [130/500] time 0.909 (0.892) data 0.000 (0.006) loss 1.0527 (1.0312) acc 71.8750 (73.6538) lr 1.3090e-03 eta 3:33:32 +epoch [22/50] batch [135/500] time 0.906 (0.891) data 0.000 (0.006) loss 1.5156 (1.0355) acc 71.8750 (73.7037) lr 1.3090e-03 eta 3:33:23 +epoch [22/50] batch [140/500] time 0.902 (0.892) data 0.000 (0.006) loss 1.8213 (1.0425) acc 53.1250 (73.4375) lr 1.3090e-03 eta 3:33:28 +epoch [22/50] batch [145/500] time 0.858 (0.891) data 0.000 (0.006) loss 0.7920 (1.0440) acc 71.8750 (73.1897) lr 1.3090e-03 eta 3:33:17 +epoch [22/50] batch [150/500] time 0.896 (0.891) data 0.000 (0.005) loss 1.2480 (1.0406) acc 65.6250 (73.2292) lr 1.3090e-03 eta 3:33:04 +epoch [22/50] batch [155/500] time 0.900 (0.891) data 0.000 (0.005) loss 1.0225 (1.0388) acc 62.5000 (73.0847) lr 1.3090e-03 eta 3:32:56 +epoch [22/50] batch [160/500] time 0.869 (0.890) data 0.000 (0.005) loss 1.1133 (1.0395) acc 68.7500 (72.9688) lr 1.3090e-03 eta 3:32:49 +epoch [22/50] batch [165/500] time 0.881 (0.891) data 0.000 (0.005) loss 1.2744 (1.0382) acc 68.7500 (72.9167) lr 1.3090e-03 eta 3:32:47 +epoch [22/50] batch [170/500] time 0.903 (0.891) data 0.000 (0.005) loss 1.1191 (1.0437) acc 65.6250 (72.8676) lr 1.3090e-03 eta 3:32:44 +epoch [22/50] batch [175/500] time 0.880 (0.890) data 0.000 (0.005) loss 0.5977 (1.0355) acc 78.1250 (73.0357) lr 1.3090e-03 eta 3:32:36 +epoch [22/50] batch [180/500] time 0.875 (0.890) data 0.000 (0.005) loss 0.8081 (1.0305) acc 75.0000 (73.1771) lr 1.3090e-03 eta 3:32:20 +epoch [22/50] batch [185/500] time 0.896 (0.890) data 0.000 (0.004) loss 0.9043 (1.0297) acc 78.1250 (73.2264) lr 1.3090e-03 eta 3:32:24 +epoch [22/50] batch [190/500] time 0.883 (0.890) data 0.000 (0.004) loss 1.2061 (1.0310) acc 71.8750 (73.1743) lr 1.3090e-03 eta 3:32:19 +epoch [22/50] batch [195/500] time 0.843 (0.890) data 0.000 (0.004) loss 1.6602 (1.0390) acc 53.1250 (72.9487) lr 1.3090e-03 eta 3:32:11 +epoch [22/50] batch [200/500] time 0.880 (0.890) data 0.000 (0.004) loss 0.9180 (1.0360) acc 68.7500 (72.9219) lr 1.3090e-03 eta 3:32:02 +epoch [22/50] batch [205/500] time 0.908 (0.890) data 0.000 (0.004) loss 1.4395 (1.0353) acc 59.3750 (72.9726) lr 1.3090e-03 eta 3:31:57 +epoch [22/50] batch [210/500] time 0.872 (0.889) data 0.000 (0.004) loss 1.3203 (1.0381) acc 65.6250 (72.8869) lr 1.3090e-03 eta 3:31:43 +epoch [22/50] batch [215/500] time 0.876 (0.889) data 0.000 (0.004) loss 1.0811 (1.0343) acc 68.7500 (72.9360) lr 1.3090e-03 eta 3:31:37 +epoch [22/50] batch [220/500] time 0.892 (0.889) data 0.000 (0.004) loss 0.9282 (1.0405) acc 71.8750 (72.8125) lr 1.3090e-03 eta 3:31:34 +epoch [22/50] batch [225/500] time 0.873 (0.889) data 0.000 (0.004) loss 1.1768 (1.0446) acc 65.6250 (72.6944) lr 1.3090e-03 eta 3:31:26 +epoch [22/50] batch [230/500] time 0.855 (0.888) data 0.000 (0.004) loss 1.1943 (1.0455) acc 75.0000 (72.7446) lr 1.3090e-03 eta 3:31:17 +epoch [22/50] batch [235/500] time 0.863 (0.888) data 0.000 (0.004) loss 0.6660 (1.0396) acc 81.2500 (72.8324) lr 1.3090e-03 eta 3:31:09 +epoch [22/50] batch [240/500] time 0.881 (0.888) data 0.000 (0.003) loss 0.8862 (1.0401) acc 68.7500 (72.8255) lr 1.3090e-03 eta 3:31:05 +epoch [22/50] batch [245/500] time 0.892 (0.888) data 0.000 (0.003) loss 1.6279 (1.0426) acc 59.3750 (72.8189) lr 1.3090e-03 eta 3:31:00 +epoch [22/50] batch [250/500] time 0.889 (0.888) data 0.000 (0.003) loss 1.4707 (1.0449) acc 62.5000 (72.7375) lr 1.3090e-03 eta 3:30:57 +epoch [22/50] batch [255/500] time 0.904 (0.888) data 0.000 (0.003) loss 1.2715 (1.0470) acc 62.5000 (72.7083) lr 1.3090e-03 eta 3:30:49 +epoch [22/50] batch [260/500] time 0.870 (0.888) data 0.000 (0.003) loss 1.1611 (1.0473) acc 75.0000 (72.6803) lr 1.3090e-03 eta 3:30:40 +epoch [22/50] batch [265/500] time 0.886 (0.888) data 0.000 (0.003) loss 0.5635 (1.0444) acc 81.2500 (72.7476) lr 1.3090e-03 eta 3:30:34 +epoch [22/50] batch [270/500] time 0.876 (0.888) data 0.000 (0.003) loss 1.0693 (1.0453) acc 75.0000 (72.7662) lr 1.3090e-03 eta 3:30:30 +epoch [22/50] batch [275/500] time 0.896 (0.888) data 0.000 (0.003) loss 1.1582 (1.0460) acc 68.7500 (72.7955) lr 1.3090e-03 eta 3:30:26 +epoch [22/50] batch [280/500] time 0.983 (0.888) data 0.000 (0.003) loss 1.0088 (1.0441) acc 81.2500 (72.8571) lr 1.3090e-03 eta 3:30:26 +epoch [22/50] batch [285/500] time 0.869 (0.887) data 0.000 (0.003) loss 1.3408 (1.0454) acc 59.3750 (72.8399) lr 1.3090e-03 eta 3:30:15 +epoch [22/50] batch [290/500] time 0.853 (0.887) data 0.000 (0.003) loss 1.2832 (1.0486) acc 75.0000 (72.8879) lr 1.3090e-03 eta 3:30:07 +epoch [22/50] batch [295/500] time 0.865 (0.887) data 0.000 (0.003) loss 1.0625 (1.0491) acc 81.2500 (72.8814) lr 1.3090e-03 eta 3:30:02 +epoch [22/50] batch [300/500] time 0.873 (0.887) data 0.000 (0.003) loss 0.7998 (1.0483) acc 81.2500 (72.8750) lr 1.3090e-03 eta 3:29:53 +epoch [22/50] batch [305/500] time 0.860 (0.887) data 0.000 (0.003) loss 0.4951 (1.0441) acc 81.2500 (72.9303) lr 1.3090e-03 eta 3:29:44 +epoch [22/50] batch [310/500] time 0.864 (0.886) data 0.000 (0.003) loss 0.9014 (1.0494) acc 84.3750 (72.8327) lr 1.3090e-03 eta 3:29:36 +epoch [22/50] batch [315/500] time 0.888 (0.886) data 0.000 (0.003) loss 1.3682 (1.0493) acc 68.7500 (72.7976) lr 1.3090e-03 eta 3:29:30 +epoch [22/50] batch [320/500] time 0.908 (0.886) data 0.000 (0.003) loss 1.0576 (1.0510) acc 75.0000 (72.7539) lr 1.3090e-03 eta 3:29:27 +epoch [22/50] batch [325/500] time 0.900 (0.887) data 0.000 (0.003) loss 1.0020 (1.0507) acc 78.1250 (72.7788) lr 1.3090e-03 eta 3:29:27 +epoch [22/50] batch [330/500] time 0.857 (0.886) data 0.000 (0.003) loss 0.7788 (1.0491) acc 87.5000 (72.8220) lr 1.3090e-03 eta 3:29:20 +epoch [22/50] batch [335/500] time 0.889 (0.886) data 0.000 (0.003) loss 1.1143 (1.0490) acc 68.7500 (72.8358) lr 1.3090e-03 eta 3:29:13 +epoch [22/50] batch [340/500] time 0.901 (0.886) data 0.000 (0.003) loss 0.9082 (1.0490) acc 81.2500 (72.8217) lr 1.3090e-03 eta 3:29:07 +epoch [22/50] batch [345/500] time 0.874 (0.886) data 0.000 (0.002) loss 0.9980 (1.0490) acc 78.1250 (72.8533) lr 1.3090e-03 eta 3:29:00 +epoch [22/50] batch [350/500] time 0.869 (0.886) data 0.001 (0.002) loss 0.7012 (1.0473) acc 78.1250 (72.8571) lr 1.3090e-03 eta 3:28:55 +epoch [22/50] batch [355/500] time 0.871 (0.886) data 0.000 (0.002) loss 1.2529 (1.0469) acc 78.1250 (72.8961) lr 1.3090e-03 eta 3:28:50 +epoch [22/50] batch [360/500] time 0.857 (0.886) data 0.000 (0.002) loss 0.8872 (1.0501) acc 75.0000 (72.8733) lr 1.3090e-03 eta 3:28:46 +epoch [22/50] batch [365/500] time 0.869 (0.886) data 0.000 (0.002) loss 0.8228 (1.0526) acc 71.8750 (72.8253) lr 1.3090e-03 eta 3:28:42 +epoch [22/50] batch [370/500] time 0.860 (0.886) data 0.000 (0.002) loss 1.4971 (1.0519) acc 56.2500 (72.8125) lr 1.3090e-03 eta 3:28:36 +epoch [22/50] batch [375/500] time 0.902 (0.886) data 0.000 (0.002) loss 0.6768 (1.0504) acc 75.0000 (72.8167) lr 1.3090e-03 eta 3:28:30 +epoch [22/50] batch [380/500] time 0.885 (0.886) data 0.000 (0.002) loss 1.0918 (1.0501) acc 62.5000 (72.7961) lr 1.3090e-03 eta 3:28:24 +epoch [22/50] batch [385/500] time 0.897 (0.886) data 0.000 (0.002) loss 1.3613 (1.0530) acc 68.7500 (72.7273) lr 1.3090e-03 eta 3:28:20 +epoch [22/50] batch [390/500] time 0.892 (0.885) data 0.000 (0.002) loss 1.2881 (1.0530) acc 75.0000 (72.7885) lr 1.3090e-03 eta 3:28:13 +epoch [22/50] batch [395/500] time 0.871 (0.885) data 0.000 (0.002) loss 1.2480 (1.0517) acc 71.8750 (72.8165) lr 1.3090e-03 eta 3:28:05 +epoch [22/50] batch [400/500] time 0.896 (0.885) data 0.000 (0.002) loss 0.9028 (1.0496) acc 78.1250 (72.8984) lr 1.3090e-03 eta 3:28:01 +epoch [22/50] batch [405/500] time 0.900 (0.885) data 0.000 (0.002) loss 0.6030 (1.0483) acc 84.3750 (72.9244) lr 1.3090e-03 eta 3:27:56 +epoch [22/50] batch [410/500] time 0.869 (0.885) data 0.000 (0.002) loss 1.2314 (1.0498) acc 65.6250 (72.8506) lr 1.3090e-03 eta 3:27:51 +epoch [22/50] batch [415/500] time 0.882 (0.885) data 0.000 (0.002) loss 1.3496 (1.0508) acc 65.6250 (72.8012) lr 1.3090e-03 eta 3:27:46 +epoch [22/50] batch [420/500] time 0.891 (0.885) data 0.000 (0.002) loss 1.3760 (1.0525) acc 65.6250 (72.7827) lr 1.3090e-03 eta 3:27:42 +epoch [22/50] batch [425/500] time 0.888 (0.885) data 0.000 (0.002) loss 1.2363 (1.0561) acc 62.5000 (72.7132) lr 1.3090e-03 eta 3:27:40 +epoch [22/50] batch [430/500] time 0.893 (0.885) data 0.000 (0.002) loss 0.9985 (1.0566) acc 68.7500 (72.6962) lr 1.3090e-03 eta 3:27:34 +epoch [22/50] batch [435/500] time 0.879 (0.885) data 0.000 (0.002) loss 0.9468 (1.0586) acc 78.1250 (72.7083) lr 1.3090e-03 eta 3:27:31 +epoch [22/50] batch [440/500] time 0.879 (0.885) data 0.000 (0.002) loss 0.8086 (1.0565) acc 75.0000 (72.7628) lr 1.3090e-03 eta 3:27:24 +epoch [22/50] batch [445/500] time 0.881 (0.885) data 0.000 (0.002) loss 1.3359 (1.0583) acc 78.1250 (72.7739) lr 1.3090e-03 eta 3:27:17 +epoch [22/50] batch [450/500] time 0.904 (0.885) data 0.000 (0.002) loss 1.2695 (1.0571) acc 71.8750 (72.7569) lr 1.3090e-03 eta 3:27:16 +epoch [22/50] batch [455/500] time 0.870 (0.885) data 0.000 (0.002) loss 0.7578 (1.0594) acc 81.2500 (72.6923) lr 1.3090e-03 eta 3:27:10 +epoch [22/50] batch [460/500] time 0.868 (0.885) data 0.000 (0.002) loss 1.2656 (1.0604) acc 71.8750 (72.6834) lr 1.3090e-03 eta 3:27:04 +epoch [22/50] batch [465/500] time 0.870 (0.885) data 0.000 (0.002) loss 1.1338 (1.0623) acc 71.8750 (72.6546) lr 1.3090e-03 eta 3:26:58 +epoch [22/50] batch [470/500] time 0.884 (0.885) data 0.000 (0.002) loss 1.0117 (1.0625) acc 75.0000 (72.6197) lr 1.3090e-03 eta 3:26:57 +epoch [22/50] batch [475/500] time 0.890 (0.885) data 0.000 (0.002) loss 1.5205 (1.0637) acc 62.5000 (72.5789) lr 1.3090e-03 eta 3:26:51 +epoch [22/50] batch [480/500] time 0.883 (0.885) data 0.000 (0.002) loss 1.1855 (1.0637) acc 65.6250 (72.5521) lr 1.3090e-03 eta 3:26:44 +epoch [22/50] batch [485/500] time 0.892 (0.885) data 0.000 (0.002) loss 1.0674 (1.0646) acc 68.7500 (72.5322) lr 1.3090e-03 eta 3:26:39 +epoch [22/50] batch [490/500] time 0.897 (0.885) data 0.000 (0.002) loss 1.6270 (1.0690) acc 71.8750 (72.4426) lr 1.3090e-03 eta 3:26:35 +epoch [22/50] batch [495/500] time 0.866 (0.885) data 0.000 (0.002) loss 1.6250 (1.0710) acc 59.3750 (72.3801) lr 1.3090e-03 eta 3:26:30 +epoch [22/50] batch [500/500] time 0.912 (0.885) data 0.000 (0.002) loss 1.6074 (1.0730) acc 65.6250 (72.3375) lr 1.2487e-03 eta 3:26:26 +epoch [23/50] batch [5/500] time 0.875 (1.015) data 0.000 (0.128) loss 0.9702 (1.0694) acc 71.8750 (72.5000) lr 1.2487e-03 eta 3:56:39 +epoch [23/50] batch [10/500] time 0.893 (0.960) data 0.000 (0.064) loss 0.7173 (1.0163) acc 84.3750 (72.5000) lr 1.2487e-03 eta 3:43:51 +epoch [23/50] batch [15/500] time 0.873 (0.932) data 0.000 (0.043) loss 0.8672 (1.0677) acc 84.3750 (72.7083) lr 1.2487e-03 eta 3:37:07 +epoch [23/50] batch [20/500] time 0.889 (0.920) data 0.000 (0.032) loss 1.7744 (1.0919) acc 56.2500 (72.8125) lr 1.2487e-03 eta 3:34:15 +epoch [23/50] batch [25/500] time 0.886 (0.912) data 0.000 (0.026) loss 0.9146 (1.0933) acc 75.0000 (72.7500) lr 1.2487e-03 eta 3:32:22 +epoch [23/50] batch [30/500] time 0.878 (0.909) data 0.000 (0.021) loss 0.9067 (1.0749) acc 75.0000 (73.3333) lr 1.2487e-03 eta 3:31:36 +epoch [23/50] batch [35/500] time 0.883 (0.906) data 0.000 (0.018) loss 1.1514 (1.0774) acc 84.3750 (73.9286) lr 1.2487e-03 eta 3:30:59 +epoch [23/50] batch [40/500] time 0.889 (0.904) data 0.000 (0.016) loss 0.9453 (1.0532) acc 78.1250 (73.7500) lr 1.2487e-03 eta 3:30:14 +epoch [23/50] batch [45/500] time 0.893 (0.904) data 0.000 (0.014) loss 1.1201 (1.0545) acc 78.1250 (73.4028) lr 1.2487e-03 eta 3:30:18 +epoch [23/50] batch [50/500] time 0.888 (0.903) data 0.000 (0.013) loss 0.5840 (1.0596) acc 78.1250 (73.0000) lr 1.2487e-03 eta 3:29:57 +epoch [23/50] batch [55/500] time 0.881 (0.902) data 0.000 (0.012) loss 1.1660 (1.0442) acc 65.6250 (73.4659) lr 1.2487e-03 eta 3:29:36 +epoch [23/50] batch [60/500] time 0.898 (0.901) data 0.000 (0.011) loss 1.0713 (1.0521) acc 78.1250 (73.7500) lr 1.2487e-03 eta 3:29:20 +epoch [23/50] batch [65/500] time 0.877 (0.900) data 0.000 (0.010) loss 0.8467 (1.0528) acc 78.1250 (73.6058) lr 1.2487e-03 eta 3:29:01 +epoch [23/50] batch [70/500] time 0.906 (0.899) data 0.000 (0.009) loss 1.5215 (1.0556) acc 68.7500 (73.6161) lr 1.2487e-03 eta 3:28:44 +epoch [23/50] batch [75/500] time 0.888 (0.898) data 0.000 (0.009) loss 1.4639 (1.0635) acc 68.7500 (73.4583) lr 1.2487e-03 eta 3:28:29 +epoch [23/50] batch [80/500] time 0.862 (0.897) data 0.000 (0.008) loss 1.0732 (1.0693) acc 75.0000 (73.1250) lr 1.2487e-03 eta 3:28:02 +epoch [23/50] batch [85/500] time 0.908 (0.897) data 0.000 (0.008) loss 1.5850 (1.0682) acc 71.8750 (73.1618) lr 1.2487e-03 eta 3:27:59 +epoch [23/50] batch [90/500] time 0.897 (0.897) data 0.000 (0.007) loss 0.9204 (1.0618) acc 75.0000 (73.4028) lr 1.2487e-03 eta 3:27:54 +epoch [23/50] batch [95/500] time 0.888 (0.897) data 0.000 (0.007) loss 1.3740 (1.0722) acc 62.5000 (73.1579) lr 1.2487e-03 eta 3:27:47 +epoch [23/50] batch [100/500] time 0.898 (0.896) data 0.000 (0.007) loss 1.4824 (1.0778) acc 62.5000 (72.8125) lr 1.2487e-03 eta 3:27:33 +epoch [23/50] batch [105/500] time 0.894 (0.895) data 0.000 (0.006) loss 0.9966 (1.0801) acc 75.0000 (72.8274) lr 1.2487e-03 eta 3:27:22 +epoch [23/50] batch [110/500] time 0.868 (0.896) data 0.000 (0.006) loss 1.3672 (1.0837) acc 62.5000 (72.6705) lr 1.2487e-03 eta 3:27:24 +epoch [23/50] batch [115/500] time 0.881 (0.895) data 0.000 (0.006) loss 1.0957 (1.0830) acc 78.1250 (72.7989) lr 1.2487e-03 eta 3:27:12 +epoch [23/50] batch [120/500] time 0.885 (0.895) data 0.000 (0.006) loss 0.9976 (1.0753) acc 71.8750 (72.8906) lr 1.2487e-03 eta 3:27:01 +epoch [23/50] batch [125/500] time 0.886 (0.895) data 0.000 (0.005) loss 1.0820 (1.0677) acc 84.3750 (73.3000) lr 1.2487e-03 eta 3:26:55 +epoch [23/50] batch [130/500] time 0.864 (0.895) data 0.000 (0.005) loss 1.0176 (1.0709) acc 71.8750 (73.1731) lr 1.2487e-03 eta 3:26:51 +epoch [23/50] batch [135/500] time 0.874 (0.894) data 0.000 (0.005) loss 1.0684 (1.0727) acc 78.1250 (73.2176) lr 1.2487e-03 eta 3:26:34 +epoch [23/50] batch [140/500] time 0.885 (0.894) data 0.000 (0.005) loss 1.5244 (1.0727) acc 65.6250 (73.2589) lr 1.2487e-03 eta 3:26:24 +epoch [23/50] batch [145/500] time 0.873 (0.893) data 0.000 (0.005) loss 1.3320 (1.0725) acc 62.5000 (73.2974) lr 1.2487e-03 eta 3:26:13 +epoch [23/50] batch [150/500] time 1.007 (0.894) data 0.000 (0.004) loss 1.1387 (1.0720) acc 71.8750 (73.3958) lr 1.2487e-03 eta 3:26:19 +epoch [23/50] batch [155/500] time 0.883 (0.894) data 0.000 (0.004) loss 0.9688 (1.0793) acc 75.0000 (73.2863) lr 1.2487e-03 eta 3:26:11 +epoch [23/50] batch [160/500] time 0.870 (0.893) data 0.000 (0.004) loss 1.3809 (1.0753) acc 53.1250 (73.2617) lr 1.2487e-03 eta 3:25:58 +epoch [23/50] batch [165/500] time 0.876 (0.893) data 0.000 (0.004) loss 1.0820 (1.0743) acc 65.6250 (73.2197) lr 1.2487e-03 eta 3:25:48 +epoch [23/50] batch [170/500] time 0.892 (0.892) data 0.000 (0.004) loss 0.8379 (1.0710) acc 75.0000 (73.3272) lr 1.2487e-03 eta 3:25:36 +epoch [23/50] batch [175/500] time 0.922 (0.892) data 0.000 (0.004) loss 0.6421 (1.0691) acc 84.3750 (73.3929) lr 1.2487e-03 eta 3:25:30 +epoch [23/50] batch [180/500] time 0.894 (0.892) data 0.000 (0.004) loss 0.9956 (1.0720) acc 81.2500 (73.4201) lr 1.2487e-03 eta 3:25:29 +epoch [23/50] batch [185/500] time 0.872 (0.892) data 0.000 (0.004) loss 0.5576 (1.0703) acc 78.1250 (73.3953) lr 1.2487e-03 eta 3:25:18 +epoch [23/50] batch [190/500] time 0.887 (0.891) data 0.000 (0.004) loss 1.0635 (1.0716) acc 81.2500 (73.4704) lr 1.2487e-03 eta 3:25:11 +epoch [23/50] batch [195/500] time 0.869 (0.891) data 0.000 (0.003) loss 1.0117 (1.0715) acc 71.8750 (73.4936) lr 1.2487e-03 eta 3:25:05 +epoch [23/50] batch [200/500] time 0.888 (0.891) data 0.000 (0.003) loss 0.8896 (1.0661) acc 81.2500 (73.6719) lr 1.2487e-03 eta 3:25:01 +epoch [23/50] batch [205/500] time 0.863 (0.891) data 0.000 (0.003) loss 0.7725 (1.0649) acc 75.0000 (73.6738) lr 1.2487e-03 eta 3:24:49 +epoch [23/50] batch [210/500] time 0.873 (0.890) data 0.000 (0.003) loss 1.4844 (1.0676) acc 59.3750 (73.4375) lr 1.2487e-03 eta 3:24:38 +epoch [23/50] batch [215/500] time 0.883 (0.890) data 0.000 (0.003) loss 1.2510 (1.0738) acc 65.6250 (73.2703) lr 1.2487e-03 eta 3:24:30 +epoch [23/50] batch [220/500] time 0.893 (0.890) data 0.000 (0.003) loss 0.9512 (1.0752) acc 78.1250 (73.2386) lr 1.2487e-03 eta 3:24:21 +epoch [23/50] batch [225/500] time 0.876 (0.890) data 0.000 (0.003) loss 1.4326 (1.0757) acc 78.1250 (73.2222) lr 1.2487e-03 eta 3:24:14 +epoch [23/50] batch [230/500] time 0.895 (0.890) data 0.000 (0.003) loss 1.1543 (1.0745) acc 65.6250 (73.2065) lr 1.2487e-03 eta 3:24:09 +epoch [23/50] batch [235/500] time 0.870 (0.889) data 0.000 (0.003) loss 1.3281 (1.0729) acc 68.7500 (73.1516) lr 1.2487e-03 eta 3:24:00 +epoch [23/50] batch [240/500] time 0.876 (0.889) data 0.000 (0.003) loss 0.7920 (1.0702) acc 78.1250 (73.1380) lr 1.2487e-03 eta 3:23:56 +epoch [23/50] batch [245/500] time 0.880 (0.889) data 0.000 (0.003) loss 1.3125 (1.0749) acc 68.7500 (73.0230) lr 1.2487e-03 eta 3:23:51 +epoch [23/50] batch [250/500] time 0.893 (0.890) data 0.000 (0.003) loss 1.6445 (1.0835) acc 71.8750 (72.8625) lr 1.2487e-03 eta 3:23:56 +epoch [23/50] batch [255/500] time 0.885 (0.890) data 0.000 (0.003) loss 1.0107 (1.0840) acc 78.1250 (72.8431) lr 1.2487e-03 eta 3:23:53 +epoch [23/50] batch [260/500] time 0.861 (0.890) data 0.000 (0.003) loss 1.2217 (1.0855) acc 78.1250 (72.8846) lr 1.2487e-03 eta 3:23:44 +epoch [23/50] batch [265/500] time 0.898 (0.890) data 0.000 (0.003) loss 0.9053 (1.0823) acc 78.1250 (72.9717) lr 1.2487e-03 eta 3:23:42 +epoch [23/50] batch [270/500] time 0.921 (0.890) data 0.000 (0.003) loss 1.2861 (1.0812) acc 75.0000 (72.9977) lr 1.2487e-03 eta 3:23:44 +epoch [23/50] batch [275/500] time 0.873 (0.890) data 0.000 (0.003) loss 0.9385 (1.0803) acc 78.1250 (73.0000) lr 1.2487e-03 eta 3:23:37 +epoch [23/50] batch [280/500] time 0.885 (0.890) data 0.000 (0.003) loss 1.3135 (1.0809) acc 65.6250 (73.0022) lr 1.2487e-03 eta 3:23:31 +epoch [23/50] batch [285/500] time 0.882 (0.890) data 0.000 (0.002) loss 0.7437 (1.0791) acc 78.1250 (72.9934) lr 1.2487e-03 eta 3:23:26 +epoch [23/50] batch [290/500] time 0.878 (0.890) data 0.000 (0.002) loss 1.1846 (1.0750) acc 68.7500 (72.9957) lr 1.2487e-03 eta 3:23:22 +epoch [23/50] batch [295/500] time 0.873 (0.890) data 0.000 (0.002) loss 0.8887 (1.0719) acc 71.8750 (73.0720) lr 1.2487e-03 eta 3:23:22 +epoch [23/50] batch [300/500] time 0.858 (0.890) data 0.000 (0.002) loss 1.0781 (1.0702) acc 75.0000 (73.0625) lr 1.2487e-03 eta 3:23:17 +epoch [23/50] batch [305/500] time 0.897 (0.890) data 0.000 (0.002) loss 1.1592 (1.0715) acc 71.8750 (73.0020) lr 1.2487e-03 eta 3:23:09 +epoch [23/50] batch [310/500] time 0.895 (0.890) data 0.000 (0.002) loss 1.2256 (1.0740) acc 71.8750 (72.9234) lr 1.2487e-03 eta 3:23:02 +epoch [23/50] batch [315/500] time 0.870 (0.890) data 0.000 (0.002) loss 1.3320 (1.0748) acc 59.3750 (72.9464) lr 1.2487e-03 eta 3:22:55 +epoch [23/50] batch [320/500] time 0.862 (0.890) data 0.000 (0.002) loss 0.8950 (1.0719) acc 75.0000 (72.9688) lr 1.2487e-03 eta 3:22:49 +epoch [23/50] batch [325/500] time 0.914 (0.890) data 0.000 (0.002) loss 0.9985 (1.0717) acc 81.2500 (73.0192) lr 1.2487e-03 eta 3:22:46 +epoch [23/50] batch [330/500] time 0.868 (0.889) data 0.000 (0.002) loss 0.7041 (1.0695) acc 78.1250 (73.0303) lr 1.2487e-03 eta 3:22:39 +epoch [23/50] batch [335/500] time 0.899 (0.890) data 0.000 (0.002) loss 0.4849 (1.0680) acc 87.5000 (73.0597) lr 1.2487e-03 eta 3:22:36 +epoch [23/50] batch [340/500] time 0.902 (0.890) data 0.000 (0.002) loss 0.8516 (1.0715) acc 78.1250 (72.9963) lr 1.2487e-03 eta 3:22:32 +epoch [23/50] batch [345/500] time 0.891 (0.890) data 0.000 (0.002) loss 1.7812 (1.0716) acc 62.5000 (73.0072) lr 1.2487e-03 eta 3:22:28 +epoch [23/50] batch [350/500] time 0.863 (0.890) data 0.000 (0.002) loss 1.7070 (1.0722) acc 53.1250 (72.9375) lr 1.2487e-03 eta 3:22:22 +epoch [23/50] batch [355/500] time 0.886 (0.889) data 0.000 (0.002) loss 1.2158 (1.0710) acc 75.0000 (73.0106) lr 1.2487e-03 eta 3:22:16 +epoch [23/50] batch [360/500] time 0.895 (0.889) data 0.000 (0.002) loss 1.0586 (1.0693) acc 68.7500 (73.0208) lr 1.2487e-03 eta 3:22:08 +epoch [23/50] batch [365/500] time 0.874 (0.889) data 0.000 (0.002) loss 1.2617 (1.0699) acc 78.1250 (73.0223) lr 1.2487e-03 eta 3:22:03 +epoch [23/50] batch [370/500] time 0.876 (0.889) data 0.000 (0.002) loss 0.7334 (1.0688) acc 81.2500 (72.9899) lr 1.2487e-03 eta 3:21:57 +epoch [23/50] batch [375/500] time 0.889 (0.889) data 0.000 (0.002) loss 2.3047 (1.0729) acc 62.5000 (72.9917) lr 1.2487e-03 eta 3:21:52 +epoch [23/50] batch [380/500] time 0.912 (0.889) data 0.000 (0.002) loss 0.7793 (1.0745) acc 81.2500 (72.9688) lr 1.2487e-03 eta 3:21:51 +epoch [23/50] batch [385/500] time 0.885 (0.889) data 0.000 (0.002) loss 1.6680 (1.0746) acc 65.6250 (72.9545) lr 1.2487e-03 eta 3:21:49 +epoch [23/50] batch [390/500] time 0.900 (0.889) data 0.000 (0.002) loss 1.2959 (1.0731) acc 71.8750 (72.9968) lr 1.2487e-03 eta 3:21:44 +epoch [23/50] batch [395/500] time 0.887 (0.890) data 0.000 (0.002) loss 0.3552 (1.0706) acc 90.6250 (73.1013) lr 1.2487e-03 eta 3:21:45 +epoch [23/50] batch [400/500] time 0.875 (0.890) data 0.000 (0.002) loss 1.0771 (1.0704) acc 78.1250 (73.1250) lr 1.2487e-03 eta 3:21:39 +epoch [23/50] batch [405/500] time 0.882 (0.890) data 0.000 (0.002) loss 0.7930 (1.0685) acc 84.3750 (73.2176) lr 1.2487e-03 eta 3:21:33 +epoch [23/50] batch [410/500] time 0.858 (0.890) data 0.001 (0.002) loss 1.0430 (1.0669) acc 71.8750 (73.2622) lr 1.2487e-03 eta 3:21:29 +epoch [23/50] batch [415/500] time 0.883 (0.890) data 0.000 (0.002) loss 0.7710 (1.0658) acc 78.1250 (73.2756) lr 1.2487e-03 eta 3:21:24 +epoch [23/50] batch [420/500] time 0.899 (0.890) data 0.000 (0.002) loss 1.7031 (1.0688) acc 59.3750 (73.2366) lr 1.2487e-03 eta 3:21:20 +epoch [23/50] batch [425/500] time 0.888 (0.890) data 0.000 (0.002) loss 1.4385 (1.0698) acc 65.6250 (73.1765) lr 1.2487e-03 eta 3:21:15 +epoch [23/50] batch [430/500] time 0.902 (0.889) data 0.000 (0.002) loss 0.8901 (1.0677) acc 71.8750 (73.1904) lr 1.2487e-03 eta 3:21:09 +epoch [23/50] batch [435/500] time 0.881 (0.889) data 0.000 (0.002) loss 1.0547 (1.0681) acc 81.2500 (73.1897) lr 1.2487e-03 eta 3:21:03 +epoch [23/50] batch [440/500] time 0.906 (0.890) data 0.000 (0.002) loss 1.3223 (1.0663) acc 75.0000 (73.2457) lr 1.2487e-03 eta 3:21:01 +epoch [23/50] batch [445/500] time 0.901 (0.889) data 0.000 (0.002) loss 1.3135 (1.0675) acc 59.3750 (73.2022) lr 1.2487e-03 eta 3:20:55 +epoch [23/50] batch [450/500] time 0.849 (0.889) data 0.000 (0.002) loss 1.0752 (1.0682) acc 71.8750 (73.1736) lr 1.2487e-03 eta 3:20:48 +epoch [23/50] batch [455/500] time 0.858 (0.889) data 0.000 (0.002) loss 0.4790 (1.0663) acc 84.3750 (73.2212) lr 1.2487e-03 eta 3:20:43 +epoch [23/50] batch [460/500] time 0.898 (0.889) data 0.000 (0.002) loss 1.0547 (1.0647) acc 78.1250 (73.2473) lr 1.2487e-03 eta 3:20:38 +epoch [23/50] batch [465/500] time 0.905 (0.889) data 0.000 (0.002) loss 0.7974 (1.0626) acc 71.8750 (73.3132) lr 1.2487e-03 eta 3:20:33 +epoch [23/50] batch [470/500] time 0.890 (0.889) data 0.000 (0.002) loss 1.3564 (1.0600) acc 65.6250 (73.3710) lr 1.2487e-03 eta 3:20:26 +epoch [23/50] batch [475/500] time 0.852 (0.889) data 0.000 (0.002) loss 1.1953 (1.0615) acc 71.8750 (73.3750) lr 1.2487e-03 eta 3:20:20 +epoch [23/50] batch [480/500] time 0.892 (0.889) data 0.000 (0.002) loss 1.3252 (1.0608) acc 71.8750 (73.3854) lr 1.2487e-03 eta 3:20:15 +epoch [23/50] batch [485/500] time 0.897 (0.889) data 0.000 (0.002) loss 0.7104 (1.0602) acc 78.1250 (73.3956) lr 1.2487e-03 eta 3:20:09 +epoch [23/50] batch [490/500] time 0.877 (0.889) data 0.000 (0.002) loss 1.2354 (1.0616) acc 68.7500 (73.3865) lr 1.2487e-03 eta 3:20:04 +epoch [23/50] batch [495/500] time 0.885 (0.889) data 0.000 (0.002) loss 1.0596 (1.0631) acc 78.1250 (73.3712) lr 1.2487e-03 eta 3:20:00 +epoch [23/50] batch [500/500] time 0.902 (0.889) data 0.000 (0.002) loss 1.3018 (1.0645) acc 68.7500 (73.3375) lr 1.1874e-03 eta 3:19:55 +epoch [24/50] batch [5/500] time 0.879 (1.019) data 0.000 (0.131) loss 1.4600 (1.0775) acc 71.8750 (72.5000) lr 1.1874e-03 eta 3:49:14 +epoch [24/50] batch [10/500] time 0.908 (0.959) data 0.000 (0.066) loss 0.6816 (1.0018) acc 90.6250 (74.6875) lr 1.1874e-03 eta 3:35:39 +epoch [24/50] batch [15/500] time 0.921 (0.939) data 0.000 (0.044) loss 0.7856 (0.9006) acc 87.5000 (77.0833) lr 1.1874e-03 eta 3:30:57 +epoch [24/50] batch [20/500] time 0.884 (0.925) data 0.000 (0.033) loss 1.0166 (0.9829) acc 75.0000 (76.8750) lr 1.1874e-03 eta 3:27:47 +epoch [24/50] batch [25/500] time 0.854 (0.915) data 0.000 (0.026) loss 1.0322 (1.0125) acc 75.0000 (76.0000) lr 1.1874e-03 eta 3:25:28 +epoch [24/50] batch [30/500] time 0.898 (0.911) data 0.000 (0.022) loss 1.3662 (1.0093) acc 65.6250 (75.4167) lr 1.1874e-03 eta 3:24:36 +epoch [24/50] batch [35/500] time 0.890 (0.908) data 0.000 (0.019) loss 0.7964 (1.0099) acc 71.8750 (75.2679) lr 1.1874e-03 eta 3:23:41 +epoch [24/50] batch [40/500] time 0.915 (0.906) data 0.000 (0.017) loss 0.9116 (1.0011) acc 75.0000 (75.3906) lr 1.1874e-03 eta 3:23:15 +epoch [24/50] batch [45/500] time 0.885 (0.904) data 0.000 (0.015) loss 1.1826 (1.0137) acc 68.7500 (75.3472) lr 1.1874e-03 eta 3:22:42 +epoch [24/50] batch [50/500] time 0.902 (0.902) data 0.000 (0.013) loss 1.1064 (1.0049) acc 75.0000 (75.5625) lr 1.1874e-03 eta 3:22:16 +epoch [24/50] batch [55/500] time 0.865 (0.900) data 0.000 (0.012) loss 1.1982 (1.0004) acc 68.7500 (75.4545) lr 1.1874e-03 eta 3:21:39 +epoch [24/50] batch [60/500] time 0.885 (0.897) data 0.000 (0.011) loss 0.7886 (0.9916) acc 75.0000 (75.6771) lr 1.1874e-03 eta 3:20:56 +epoch [24/50] batch [65/500] time 0.869 (0.896) data 0.000 (0.010) loss 0.6890 (0.9848) acc 71.8750 (75.5288) lr 1.1874e-03 eta 3:20:36 +epoch [24/50] batch [70/500] time 0.884 (0.895) data 0.000 (0.010) loss 0.6797 (0.9851) acc 78.1250 (75.4464) lr 1.1874e-03 eta 3:20:15 +epoch [24/50] batch [75/500] time 0.898 (0.894) data 0.000 (0.009) loss 1.4551 (1.0046) acc 65.6250 (75.0417) lr 1.1874e-03 eta 3:20:05 +epoch [24/50] batch [80/500] time 0.895 (0.894) data 0.000 (0.008) loss 1.0459 (1.0042) acc 75.0000 (74.8828) lr 1.1874e-03 eta 3:20:02 +epoch [24/50] batch [85/500] time 0.916 (0.896) data 0.000 (0.008) loss 0.7974 (1.0038) acc 71.8750 (74.6691) lr 1.1874e-03 eta 3:20:19 +epoch [24/50] batch [90/500] time 0.882 (0.896) data 0.000 (0.008) loss 0.4766 (1.0075) acc 87.5000 (74.6181) lr 1.1874e-03 eta 3:20:12 +epoch [24/50] batch [95/500] time 0.880 (0.896) data 0.000 (0.007) loss 1.2441 (1.0094) acc 62.5000 (74.3750) lr 1.1874e-03 eta 3:20:04 +epoch [24/50] batch [100/500] time 0.922 (0.895) data 0.000 (0.007) loss 1.4150 (1.0151) acc 65.6250 (74.2500) lr 1.1874e-03 eta 3:19:54 +epoch [24/50] batch [105/500] time 0.878 (0.895) data 0.000 (0.006) loss 1.0361 (1.0103) acc 71.8750 (74.2857) lr 1.1874e-03 eta 3:19:42 +epoch [24/50] batch [110/500] time 0.898 (0.894) data 0.000 (0.006) loss 1.4492 (1.0124) acc 46.8750 (73.9773) lr 1.1874e-03 eta 3:19:32 +epoch [24/50] batch [115/500] time 0.900 (0.894) data 0.000 (0.006) loss 0.9790 (1.0121) acc 78.1250 (73.9946) lr 1.1874e-03 eta 3:19:22 +epoch [24/50] batch [120/500] time 0.892 (0.893) data 0.000 (0.006) loss 0.7407 (1.0086) acc 84.3750 (74.0625) lr 1.1874e-03 eta 3:19:14 +epoch [24/50] batch [125/500] time 0.849 (0.893) data 0.000 (0.005) loss 1.3350 (1.0104) acc 78.1250 (74.1750) lr 1.1874e-03 eta 3:19:01 +epoch [24/50] batch [130/500] time 0.884 (0.893) data 0.000 (0.005) loss 1.2334 (1.0126) acc 71.8750 (74.1827) lr 1.1874e-03 eta 3:19:01 +epoch [24/50] batch [135/500] time 0.865 (0.892) data 0.000 (0.005) loss 1.4570 (1.0149) acc 71.8750 (74.1667) lr 1.1874e-03 eta 3:18:46 +epoch [24/50] batch [140/500] time 0.884 (0.891) data 0.000 (0.005) loss 1.1553 (1.0100) acc 75.0000 (74.3527) lr 1.1874e-03 eta 3:18:27 +epoch [24/50] batch [145/500] time 0.893 (0.891) data 0.000 (0.005) loss 1.4990 (1.0107) acc 62.5000 (74.1595) lr 1.1874e-03 eta 3:18:13 +epoch [24/50] batch [150/500] time 0.875 (0.890) data 0.000 (0.005) loss 2.1699 (1.0272) acc 59.3750 (73.8125) lr 1.1874e-03 eta 3:18:01 +epoch [24/50] batch [155/500] time 0.876 (0.890) data 0.000 (0.004) loss 1.3350 (1.0333) acc 65.6250 (73.6895) lr 1.1874e-03 eta 3:17:54 +epoch [24/50] batch [160/500] time 0.882 (0.890) data 0.000 (0.004) loss 0.6943 (1.0331) acc 84.3750 (73.6328) lr 1.1874e-03 eta 3:17:49 +epoch [24/50] batch [165/500] time 0.883 (0.890) data 0.000 (0.004) loss 1.0088 (1.0467) acc 75.0000 (73.5606) lr 1.1874e-03 eta 3:17:41 +epoch [24/50] batch [170/500] time 0.870 (0.889) data 0.000 (0.004) loss 1.3604 (1.0511) acc 56.2500 (73.4559) lr 1.1874e-03 eta 3:17:32 +epoch [24/50] batch [175/500] time 0.879 (0.889) data 0.000 (0.004) loss 1.3535 (1.0573) acc 78.1250 (73.3750) lr 1.1874e-03 eta 3:17:23 +epoch [24/50] batch [180/500] time 0.873 (0.889) data 0.000 (0.004) loss 0.8604 (1.0592) acc 75.0000 (73.2639) lr 1.1874e-03 eta 3:17:20 +epoch [24/50] batch [185/500] time 0.872 (0.889) data 0.000 (0.004) loss 1.2568 (1.0578) acc 68.7500 (73.2601) lr 1.1874e-03 eta 3:17:13 +epoch [24/50] batch [190/500] time 0.884 (0.889) data 0.000 (0.004) loss 0.9053 (1.0549) acc 78.1250 (73.2730) lr 1.1874e-03 eta 3:17:08 +epoch [24/50] batch [195/500] time 0.882 (0.889) data 0.000 (0.004) loss 1.1719 (1.0570) acc 71.8750 (73.2853) lr 1.1874e-03 eta 3:17:04 +epoch [24/50] batch [200/500] time 0.916 (0.889) data 0.000 (0.004) loss 1.0781 (1.0537) acc 75.0000 (73.3594) lr 1.1874e-03 eta 3:16:59 +epoch [24/50] batch [205/500] time 0.909 (0.889) data 0.000 (0.003) loss 1.4385 (1.0587) acc 62.5000 (73.1860) lr 1.1874e-03 eta 3:16:55 +epoch [24/50] batch [210/500] time 0.850 (0.888) data 0.000 (0.003) loss 1.0908 (1.0630) acc 65.6250 (73.0357) lr 1.1874e-03 eta 3:16:47 +epoch [24/50] batch [215/500] time 0.898 (0.888) data 0.000 (0.003) loss 0.9507 (1.0619) acc 78.1250 (73.1250) lr 1.1874e-03 eta 3:16:40 +epoch [24/50] batch [220/500] time 0.860 (0.888) data 0.000 (0.003) loss 1.0625 (1.0667) acc 75.0000 (73.0966) lr 1.1874e-03 eta 3:16:34 +epoch [24/50] batch [225/500] time 0.889 (0.888) data 0.000 (0.003) loss 0.9702 (1.0648) acc 78.1250 (73.1528) lr 1.1874e-03 eta 3:16:23 +epoch [24/50] batch [230/500] time 0.885 (0.888) data 0.000 (0.003) loss 0.5762 (1.0596) acc 78.1250 (73.1929) lr 1.1874e-03 eta 3:16:23 +epoch [24/50] batch [235/500] time 0.863 (0.888) data 0.000 (0.003) loss 1.3662 (1.0578) acc 62.5000 (73.2181) lr 1.1874e-03 eta 3:16:18 +epoch [24/50] batch [240/500] time 0.910 (0.888) data 0.000 (0.003) loss 0.7485 (1.0600) acc 75.0000 (73.2552) lr 1.1874e-03 eta 3:16:13 +epoch [24/50] batch [245/500] time 0.898 (0.888) data 0.000 (0.003) loss 0.5303 (1.0552) acc 87.5000 (73.3163) lr 1.1874e-03 eta 3:16:09 +epoch [24/50] batch [250/500] time 0.871 (0.888) data 0.000 (0.003) loss 1.1309 (1.0561) acc 68.7500 (73.2125) lr 1.1874e-03 eta 3:16:01 +epoch [24/50] batch [255/500] time 0.893 (0.888) data 0.000 (0.003) loss 0.7754 (1.0557) acc 78.1250 (73.2353) lr 1.1874e-03 eta 3:15:55 +epoch [24/50] batch [260/500] time 0.870 (0.888) data 0.000 (0.003) loss 1.4609 (1.0607) acc 65.6250 (73.1490) lr 1.1874e-03 eta 3:15:51 +epoch [24/50] batch [265/500] time 0.867 (0.888) data 0.000 (0.003) loss 1.2119 (1.0669) acc 68.7500 (73.0425) lr 1.1874e-03 eta 3:15:47 +epoch [24/50] batch [270/500] time 0.993 (0.888) data 0.000 (0.003) loss 1.1367 (1.0656) acc 75.0000 (73.0324) lr 1.1874e-03 eta 3:15:46 +epoch [24/50] batch [275/500] time 0.885 (0.888) data 0.000 (0.003) loss 1.2021 (1.0673) acc 81.2500 (73.0568) lr 1.1874e-03 eta 3:15:40 +epoch [24/50] batch [280/500] time 0.877 (0.888) data 0.000 (0.003) loss 1.4600 (1.0723) acc 65.6250 (72.9911) lr 1.1874e-03 eta 3:15:33 +epoch [24/50] batch [285/500] time 0.899 (0.887) data 0.000 (0.003) loss 1.4727 (1.0768) acc 62.5000 (72.9167) lr 1.1874e-03 eta 3:15:27 +epoch [24/50] batch [290/500] time 0.852 (0.887) data 0.000 (0.002) loss 1.0518 (1.0796) acc 71.8750 (72.7909) lr 1.1874e-03 eta 3:15:21 +epoch [24/50] batch [295/500] time 0.883 (0.887) data 0.000 (0.002) loss 0.8804 (1.0777) acc 68.7500 (72.7542) lr 1.1874e-03 eta 3:15:17 +epoch [24/50] batch [300/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.2080 (1.0757) acc 62.5000 (72.7917) lr 1.1874e-03 eta 3:15:10 +epoch [24/50] batch [305/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.0430 (1.0742) acc 81.2500 (72.8074) lr 1.1874e-03 eta 3:15:04 +epoch [24/50] batch [310/500] time 0.875 (0.887) data 0.000 (0.002) loss 1.8174 (1.0766) acc 62.5000 (72.7722) lr 1.1874e-03 eta 3:14:59 +epoch [24/50] batch [315/500] time 0.911 (0.887) data 0.000 (0.002) loss 1.4326 (1.0784) acc 62.5000 (72.7282) lr 1.1874e-03 eta 3:14:55 +epoch [24/50] batch [320/500] time 0.848 (0.887) data 0.000 (0.002) loss 0.9263 (1.0759) acc 81.2500 (72.8223) lr 1.1874e-03 eta 3:14:48 +epoch [24/50] batch [325/500] time 0.900 (0.887) data 0.000 (0.002) loss 1.6523 (1.0836) acc 65.6250 (72.6538) lr 1.1874e-03 eta 3:14:46 +epoch [24/50] batch [330/500] time 0.930 (0.887) data 0.000 (0.002) loss 1.2246 (1.0835) acc 71.8750 (72.6326) lr 1.1874e-03 eta 3:14:44 +epoch [24/50] batch [335/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.6045 (1.0870) acc 68.7500 (72.5653) lr 1.1874e-03 eta 3:14:37 +epoch [24/50] batch [340/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.0879 (1.0887) acc 71.8750 (72.5643) lr 1.1874e-03 eta 3:14:29 +epoch [24/50] batch [345/500] time 0.880 (0.887) data 0.000 (0.002) loss 0.8330 (1.0879) acc 75.0000 (72.5634) lr 1.1874e-03 eta 3:14:23 +epoch [24/50] batch [350/500] time 0.848 (0.886) data 0.000 (0.002) loss 1.2822 (1.0897) acc 71.8750 (72.5357) lr 1.1874e-03 eta 3:14:16 +epoch [24/50] batch [355/500] time 0.887 (0.886) data 0.000 (0.002) loss 1.1201 (1.0881) acc 75.0000 (72.5880) lr 1.1874e-03 eta 3:14:09 +epoch [24/50] batch [360/500] time 0.850 (0.886) data 0.000 (0.002) loss 0.7031 (1.0880) acc 84.3750 (72.6042) lr 1.1874e-03 eta 3:14:02 +epoch [24/50] batch [365/500] time 0.863 (0.886) data 0.000 (0.002) loss 1.0137 (1.0847) acc 71.8750 (72.6541) lr 1.1874e-03 eta 3:13:56 +epoch [24/50] batch [370/500] time 0.854 (0.886) data 0.000 (0.002) loss 1.6543 (1.0845) acc 65.6250 (72.6436) lr 1.1874e-03 eta 3:13:53 +epoch [24/50] batch [375/500] time 0.899 (0.886) data 0.000 (0.002) loss 1.1543 (1.0837) acc 65.6250 (72.6833) lr 1.1874e-03 eta 3:13:50 +epoch [24/50] batch [380/500] time 0.866 (0.886) data 0.000 (0.002) loss 1.0547 (1.0805) acc 68.7500 (72.7467) lr 1.1874e-03 eta 3:13:45 +epoch [24/50] batch [385/500] time 0.868 (0.886) data 0.000 (0.002) loss 0.7959 (1.0820) acc 81.2500 (72.6786) lr 1.1874e-03 eta 3:13:40 +epoch [24/50] batch [390/500] time 0.876 (0.886) data 0.000 (0.002) loss 1.2744 (1.0831) acc 71.8750 (72.6763) lr 1.1874e-03 eta 3:13:35 +epoch [24/50] batch [395/500] time 0.884 (0.886) data 0.000 (0.002) loss 1.0859 (1.0816) acc 71.8750 (72.7136) lr 1.1874e-03 eta 3:13:32 +epoch [24/50] batch [400/500] time 0.881 (0.886) data 0.000 (0.002) loss 1.4111 (1.0802) acc 62.5000 (72.7266) lr 1.1874e-03 eta 3:13:27 +epoch [24/50] batch [405/500] time 0.874 (0.886) data 0.000 (0.002) loss 1.3311 (1.0797) acc 62.5000 (72.7083) lr 1.1874e-03 eta 3:13:23 +epoch [24/50] batch [410/500] time 0.869 (0.886) data 0.000 (0.002) loss 1.0889 (1.0796) acc 71.8750 (72.7363) lr 1.1874e-03 eta 3:13:18 +epoch [24/50] batch [415/500] time 0.908 (0.886) data 0.000 (0.002) loss 0.8110 (1.0827) acc 71.8750 (72.7184) lr 1.1874e-03 eta 3:13:19 +epoch [24/50] batch [420/500] time 0.851 (0.886) data 0.000 (0.002) loss 0.6890 (1.0831) acc 75.0000 (72.7009) lr 1.1874e-03 eta 3:13:11 +epoch [24/50] batch [425/500] time 0.886 (0.886) data 0.000 (0.002) loss 0.9678 (1.0829) acc 71.8750 (72.7059) lr 1.1874e-03 eta 3:13:05 +epoch [24/50] batch [430/500] time 0.884 (0.886) data 0.000 (0.002) loss 0.8384 (1.0805) acc 75.0000 (72.7253) lr 1.1874e-03 eta 3:12:59 +epoch [24/50] batch [435/500] time 0.883 (0.886) data 0.000 (0.002) loss 1.2354 (1.0830) acc 71.8750 (72.6580) lr 1.1874e-03 eta 3:12:52 +epoch [24/50] batch [440/500] time 0.916 (0.886) data 0.000 (0.002) loss 1.0645 (1.0848) acc 71.8750 (72.6420) lr 1.1874e-03 eta 3:12:48 +epoch [24/50] batch [445/500] time 0.877 (0.886) data 0.000 (0.002) loss 1.1113 (1.0825) acc 71.8750 (72.6896) lr 1.1874e-03 eta 3:12:42 +epoch [24/50] batch [450/500] time 0.886 (0.886) data 0.000 (0.002) loss 1.4990 (1.0830) acc 62.5000 (72.6528) lr 1.1874e-03 eta 3:12:37 +epoch [24/50] batch [455/500] time 0.876 (0.886) data 0.000 (0.002) loss 0.8228 (1.0806) acc 71.8750 (72.6923) lr 1.1874e-03 eta 3:12:32 +epoch [24/50] batch [460/500] time 0.913 (0.886) data 0.000 (0.002) loss 1.7891 (1.0814) acc 62.5000 (72.6834) lr 1.1874e-03 eta 3:12:30 +epoch [24/50] batch [465/500] time 0.871 (0.886) data 0.000 (0.002) loss 0.8818 (1.0822) acc 68.7500 (72.6613) lr 1.1874e-03 eta 3:12:26 +epoch [24/50] batch [470/500] time 0.886 (0.886) data 0.000 (0.002) loss 1.4863 (1.0822) acc 65.6250 (72.6995) lr 1.1874e-03 eta 3:12:22 +epoch [24/50] batch [475/500] time 0.855 (0.886) data 0.001 (0.002) loss 0.9097 (1.0854) acc 81.2500 (72.6579) lr 1.1874e-03 eta 3:12:17 +epoch [24/50] batch [480/500] time 0.885 (0.886) data 0.000 (0.002) loss 1.1240 (1.0836) acc 71.8750 (72.6823) lr 1.1874e-03 eta 3:12:13 +epoch [24/50] batch [485/500] time 0.867 (0.886) data 0.000 (0.002) loss 0.5894 (1.0864) acc 84.3750 (72.6740) lr 1.1874e-03 eta 3:12:06 +epoch [24/50] batch [490/500] time 0.896 (0.886) data 0.000 (0.002) loss 0.8457 (1.0860) acc 84.3750 (72.7232) lr 1.1874e-03 eta 3:12:05 +epoch [24/50] batch [495/500] time 0.891 (0.886) data 0.000 (0.002) loss 1.0762 (1.0866) acc 71.8750 (72.7462) lr 1.1874e-03 eta 3:11:59 +epoch [24/50] batch [500/500] time 0.881 (0.886) data 0.000 (0.002) loss 1.2666 (1.0863) acc 68.7500 (72.7313) lr 1.1253e-03 eta 3:11:53 +epoch [25/50] batch [5/500] time 0.860 (1.003) data 0.000 (0.127) loss 1.1025 (1.1628) acc 65.6250 (71.8750) lr 1.1253e-03 eta 3:37:10 +epoch [25/50] batch [10/500] time 0.883 (0.954) data 0.000 (0.064) loss 1.6123 (1.0915) acc 62.5000 (72.1875) lr 1.1253e-03 eta 3:26:35 +epoch [25/50] batch [15/500] time 0.893 (0.935) data 0.000 (0.043) loss 1.3408 (1.1609) acc 75.0000 (71.8750) lr 1.1253e-03 eta 3:22:19 +epoch [25/50] batch [20/500] time 0.902 (0.925) data 0.000 (0.032) loss 0.9302 (1.1218) acc 78.1250 (72.1875) lr 1.1253e-03 eta 3:20:12 +epoch [25/50] batch [25/500] time 0.889 (0.916) data 0.000 (0.026) loss 0.7856 (1.1209) acc 81.2500 (71.8750) lr 1.1253e-03 eta 3:18:06 +epoch [25/50] batch [30/500] time 0.910 (0.910) data 0.000 (0.021) loss 0.6113 (1.0946) acc 84.3750 (72.1875) lr 1.1253e-03 eta 3:16:42 +epoch [25/50] batch [35/500] time 0.871 (0.905) data 0.000 (0.018) loss 0.9565 (1.0684) acc 75.0000 (72.7679) lr 1.1253e-03 eta 3:15:33 +epoch [25/50] batch [40/500] time 0.919 (0.904) data 0.000 (0.016) loss 1.4629 (1.0758) acc 75.0000 (73.1250) lr 1.1253e-03 eta 3:15:21 +epoch [25/50] batch [45/500] time 0.859 (0.901) data 0.000 (0.014) loss 1.5381 (1.0931) acc 68.7500 (73.1944) lr 1.1253e-03 eta 3:14:29 +epoch [25/50] batch [50/500] time 0.912 (0.900) data 0.000 (0.013) loss 1.0117 (1.0831) acc 81.2500 (73.5625) lr 1.1253e-03 eta 3:14:08 +epoch [25/50] batch [55/500] time 0.919 (0.899) data 0.000 (0.012) loss 1.2871 (1.0749) acc 65.6250 (73.6932) lr 1.1253e-03 eta 3:13:52 +epoch [25/50] batch [60/500] time 0.888 (0.897) data 0.000 (0.011) loss 1.0615 (1.0635) acc 65.6250 (73.6458) lr 1.1253e-03 eta 3:13:30 +epoch [25/50] batch [65/500] time 0.874 (0.895) data 0.000 (0.010) loss 1.6934 (1.0600) acc 65.6250 (73.7019) lr 1.1253e-03 eta 3:13:02 +epoch [25/50] batch [70/500] time 0.883 (0.896) data 0.000 (0.009) loss 1.1670 (1.0594) acc 68.7500 (73.5268) lr 1.1253e-03 eta 3:13:07 +epoch [25/50] batch [75/500] time 0.873 (0.895) data 0.000 (0.009) loss 0.9297 (1.0517) acc 84.3750 (73.7500) lr 1.1253e-03 eta 3:12:43 +epoch [25/50] batch [80/500] time 0.859 (0.893) data 0.000 (0.008) loss 1.0498 (1.0658) acc 81.2500 (73.3203) lr 1.1253e-03 eta 3:12:19 +epoch [25/50] batch [85/500] time 0.896 (0.893) data 0.000 (0.008) loss 1.1211 (1.0660) acc 71.8750 (73.1618) lr 1.1253e-03 eta 3:12:11 +epoch [25/50] batch [90/500] time 0.862 (0.893) data 0.000 (0.007) loss 1.2178 (1.0663) acc 78.1250 (73.3333) lr 1.1253e-03 eta 3:12:02 +epoch [25/50] batch [95/500] time 0.881 (0.892) data 0.000 (0.007) loss 1.1182 (1.0698) acc 65.6250 (73.3553) lr 1.1253e-03 eta 3:11:52 +epoch [25/50] batch [100/500] time 0.866 (0.891) data 0.000 (0.007) loss 1.1104 (1.0677) acc 68.7500 (73.3750) lr 1.1253e-03 eta 3:11:38 +epoch [25/50] batch [105/500] time 0.873 (0.891) data 0.000 (0.006) loss 1.0586 (1.0631) acc 68.7500 (73.1548) lr 1.1253e-03 eta 3:11:28 +epoch [25/50] batch [110/500] time 0.871 (0.890) data 0.000 (0.006) loss 0.8013 (1.0509) acc 84.3750 (73.5795) lr 1.1253e-03 eta 3:11:17 +epoch [25/50] batch [115/500] time 0.893 (0.891) data 0.000 (0.006) loss 1.6152 (1.0553) acc 62.5000 (73.5326) lr 1.1253e-03 eta 3:11:20 +epoch [25/50] batch [120/500] time 0.870 (0.891) data 0.000 (0.006) loss 1.1611 (1.0637) acc 65.6250 (73.1771) lr 1.1253e-03 eta 3:11:13 +epoch [25/50] batch [125/500] time 0.892 (0.891) data 0.000 (0.005) loss 1.2451 (1.0608) acc 68.7500 (73.2250) lr 1.1253e-03 eta 3:11:06 +epoch [25/50] batch [130/500] time 0.888 (0.890) data 0.000 (0.005) loss 0.9116 (1.0721) acc 75.0000 (73.1250) lr 1.1253e-03 eta 3:10:58 +epoch [25/50] batch [135/500] time 0.883 (0.890) data 0.000 (0.005) loss 0.8691 (1.0691) acc 71.8750 (73.2639) lr 1.1253e-03 eta 3:10:44 +epoch [25/50] batch [140/500] time 0.861 (0.889) data 0.000 (0.005) loss 1.1484 (1.0683) acc 62.5000 (73.3259) lr 1.1253e-03 eta 3:10:36 +epoch [25/50] batch [145/500] time 0.871 (0.889) data 0.000 (0.005) loss 0.7749 (1.0647) acc 78.1250 (73.5345) lr 1.1253e-03 eta 3:10:25 +epoch [25/50] batch [150/500] time 0.872 (0.889) data 0.000 (0.004) loss 1.0928 (1.0639) acc 65.6250 (73.4375) lr 1.1253e-03 eta 3:10:19 +epoch [25/50] batch [155/500] time 0.867 (0.888) data 0.000 (0.004) loss 0.5767 (1.0636) acc 87.5000 (73.5081) lr 1.1253e-03 eta 3:10:10 +epoch [25/50] batch [160/500] time 0.869 (0.888) data 0.000 (0.004) loss 1.7314 (1.0725) acc 62.5000 (73.3203) lr 1.1253e-03 eta 3:10:04 +epoch [25/50] batch [165/500] time 0.880 (0.888) data 0.000 (0.004) loss 0.8057 (1.0684) acc 84.3750 (73.4280) lr 1.1253e-03 eta 3:09:56 +epoch [25/50] batch [170/500] time 0.915 (0.888) data 0.000 (0.004) loss 0.9980 (1.0725) acc 71.8750 (73.3824) lr 1.1253e-03 eta 3:09:49 +epoch [25/50] batch [175/500] time 0.893 (0.888) data 0.000 (0.004) loss 0.7275 (1.0719) acc 87.5000 (73.2857) lr 1.1253e-03 eta 3:09:46 +epoch [25/50] batch [180/500] time 0.898 (0.888) data 0.000 (0.004) loss 0.9028 (1.0699) acc 78.1250 (73.3507) lr 1.1253e-03 eta 3:09:41 +epoch [25/50] batch [185/500] time 0.882 (0.888) data 0.000 (0.004) loss 0.8350 (1.0767) acc 75.0000 (73.2601) lr 1.1253e-03 eta 3:09:35 +epoch [25/50] batch [190/500] time 0.897 (0.887) data 0.000 (0.004) loss 0.9937 (1.0844) acc 75.0000 (73.1743) lr 1.1253e-03 eta 3:09:26 +epoch [25/50] batch [195/500] time 0.902 (0.887) data 0.000 (0.003) loss 0.8965 (1.0795) acc 71.8750 (73.3333) lr 1.1253e-03 eta 3:09:22 +epoch [25/50] batch [200/500] time 0.895 (0.887) data 0.000 (0.003) loss 1.0605 (1.0797) acc 71.8750 (73.3438) lr 1.1253e-03 eta 3:09:12 +epoch [25/50] batch [205/500] time 0.891 (0.887) data 0.000 (0.003) loss 0.9839 (1.0801) acc 75.0000 (73.2622) lr 1.1253e-03 eta 3:09:04 +epoch [25/50] batch [210/500] time 0.871 (0.886) data 0.000 (0.003) loss 1.1123 (1.0789) acc 68.7500 (73.2738) lr 1.1253e-03 eta 3:08:56 +epoch [25/50] batch [215/500] time 0.865 (0.887) data 0.000 (0.003) loss 0.8081 (1.0761) acc 75.0000 (73.3721) lr 1.1253e-03 eta 3:08:59 +epoch [25/50] batch [220/500] time 0.877 (0.887) data 0.000 (0.003) loss 1.0469 (1.0711) acc 71.8750 (73.4801) lr 1.1253e-03 eta 3:08:52 +epoch [25/50] batch [225/500] time 0.853 (0.886) data 0.000 (0.003) loss 1.1318 (1.0712) acc 65.6250 (73.4444) lr 1.1253e-03 eta 3:08:44 +epoch [25/50] batch [230/500] time 0.893 (0.886) data 0.000 (0.003) loss 0.9590 (1.0763) acc 78.1250 (73.3696) lr 1.1253e-03 eta 3:08:39 +epoch [25/50] batch [235/500] time 0.871 (0.886) data 0.000 (0.003) loss 1.4014 (1.0827) acc 62.5000 (73.1516) lr 1.1253e-03 eta 3:08:35 +epoch [25/50] batch [240/500] time 0.896 (0.886) data 0.000 (0.003) loss 0.6353 (1.0826) acc 71.8750 (73.0599) lr 1.1253e-03 eta 3:08:31 +epoch [25/50] batch [245/500] time 0.867 (0.887) data 0.000 (0.003) loss 1.0293 (1.0820) acc 71.8750 (73.0485) lr 1.1253e-03 eta 3:08:29 +epoch [25/50] batch [250/500] time 0.891 (0.887) data 0.000 (0.003) loss 1.1260 (1.0793) acc 68.7500 (73.1250) lr 1.1253e-03 eta 3:08:23 +epoch [25/50] batch [255/500] time 0.986 (0.887) data 0.000 (0.003) loss 1.1904 (1.0801) acc 78.1250 (73.1618) lr 1.1253e-03 eta 3:08:26 +epoch [25/50] batch [260/500] time 0.891 (0.887) data 0.000 (0.003) loss 1.6396 (1.0850) acc 71.8750 (73.1611) lr 1.1253e-03 eta 3:08:21 +epoch [25/50] batch [265/500] time 0.853 (0.887) data 0.000 (0.003) loss 0.9673 (1.0816) acc 65.6250 (73.1722) lr 1.1253e-03 eta 3:08:14 +epoch [25/50] batch [270/500] time 0.859 (0.886) data 0.000 (0.003) loss 1.3281 (1.0836) acc 75.0000 (73.1829) lr 1.1253e-03 eta 3:08:04 +epoch [25/50] batch [275/500] time 0.928 (0.887) data 0.000 (0.003) loss 0.8965 (1.0835) acc 75.0000 (73.1477) lr 1.1253e-03 eta 3:08:02 +epoch [25/50] batch [280/500] time 0.892 (0.887) data 0.000 (0.002) loss 0.9321 (1.0859) acc 75.0000 (73.0469) lr 1.1253e-03 eta 3:07:59 +epoch [25/50] batch [285/500] time 0.880 (0.887) data 0.000 (0.002) loss 1.7676 (1.0890) acc 53.1250 (72.9715) lr 1.1253e-03 eta 3:07:53 +epoch [25/50] batch [290/500] time 0.878 (0.886) data 0.000 (0.002) loss 0.6284 (1.0871) acc 75.0000 (72.9634) lr 1.1253e-03 eta 3:07:45 +epoch [25/50] batch [295/500] time 0.873 (0.887) data 0.000 (0.002) loss 0.7197 (1.0871) acc 78.1250 (72.9343) lr 1.1253e-03 eta 3:07:44 +epoch [25/50] batch [300/500] time 0.895 (0.887) data 0.000 (0.002) loss 0.7915 (1.0857) acc 84.3750 (72.9792) lr 1.1253e-03 eta 3:07:39 +epoch [25/50] batch [305/500] time 0.924 (0.887) data 0.000 (0.002) loss 0.8228 (1.0868) acc 75.0000 (72.9201) lr 1.1253e-03 eta 3:07:36 +epoch [25/50] batch [310/500] time 0.870 (0.887) data 0.000 (0.002) loss 0.7632 (1.0845) acc 84.3750 (72.9637) lr 1.1253e-03 eta 3:07:30 +epoch [25/50] batch [315/500] time 0.910 (0.887) data 0.000 (0.002) loss 0.8218 (1.0832) acc 78.1250 (72.9960) lr 1.1253e-03 eta 3:07:29 +epoch [25/50] batch [320/500] time 0.895 (0.887) data 0.000 (0.002) loss 0.5469 (1.0781) acc 87.5000 (73.1445) lr 1.1253e-03 eta 3:07:25 +epoch [25/50] batch [325/500] time 0.913 (0.887) data 0.000 (0.002) loss 0.8154 (1.0746) acc 84.3750 (73.1827) lr 1.1253e-03 eta 3:07:23 +epoch [25/50] batch [330/500] time 0.882 (0.887) data 0.000 (0.002) loss 0.9043 (1.0742) acc 75.0000 (73.2102) lr 1.1253e-03 eta 3:07:20 +epoch [25/50] batch [335/500] time 0.921 (0.887) data 0.000 (0.002) loss 1.3887 (1.0745) acc 68.7500 (73.2183) lr 1.1253e-03 eta 3:07:19 +epoch [25/50] batch [340/500] time 0.880 (0.887) data 0.000 (0.002) loss 1.1006 (1.0729) acc 68.7500 (73.2169) lr 1.1253e-03 eta 3:07:14 +epoch [25/50] batch [345/500] time 0.909 (0.887) data 0.000 (0.002) loss 0.8662 (1.0713) acc 81.2500 (73.2971) lr 1.1253e-03 eta 3:07:08 +epoch [25/50] batch [350/500] time 0.883 (0.887) data 0.000 (0.002) loss 0.8921 (1.0726) acc 68.7500 (73.2321) lr 1.1253e-03 eta 3:07:02 +epoch [25/50] batch [355/500] time 0.879 (0.887) data 0.000 (0.002) loss 0.8369 (1.0727) acc 81.2500 (73.2306) lr 1.1253e-03 eta 3:07:01 +epoch [25/50] batch [360/500] time 0.873 (0.887) data 0.000 (0.002) loss 0.9956 (1.0734) acc 68.7500 (73.1510) lr 1.1253e-03 eta 3:06:54 +epoch [25/50] batch [365/500] time 0.875 (0.887) data 0.000 (0.002) loss 1.0918 (1.0748) acc 71.8750 (73.1079) lr 1.1253e-03 eta 3:06:48 +epoch [25/50] batch [370/500] time 0.889 (0.887) data 0.000 (0.002) loss 0.8384 (1.0766) acc 81.2500 (73.1081) lr 1.1253e-03 eta 3:06:42 +epoch [25/50] batch [375/500] time 0.922 (0.887) data 0.000 (0.002) loss 0.7280 (1.0748) acc 75.0000 (73.1833) lr 1.1253e-03 eta 3:06:41 +epoch [25/50] batch [380/500] time 0.911 (0.887) data 0.000 (0.002) loss 1.3730 (1.0732) acc 75.0000 (73.2484) lr 1.1253e-03 eta 3:06:37 +epoch [25/50] batch [385/500] time 0.904 (0.887) data 0.000 (0.002) loss 1.2500 (1.0749) acc 62.5000 (73.2224) lr 1.1253e-03 eta 3:06:31 +epoch [25/50] batch [390/500] time 0.912 (0.887) data 0.000 (0.002) loss 0.8237 (1.0747) acc 71.8750 (73.2212) lr 1.1253e-03 eta 3:06:27 +epoch [25/50] batch [395/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.5098 (1.0762) acc 59.3750 (73.1962) lr 1.1253e-03 eta 3:06:20 +epoch [25/50] batch [400/500] time 0.900 (0.887) data 0.000 (0.002) loss 0.9434 (1.0755) acc 78.1250 (73.2344) lr 1.1253e-03 eta 3:06:20 +epoch [25/50] batch [405/500] time 0.901 (0.887) data 0.000 (0.002) loss 0.6982 (1.0742) acc 87.5000 (73.2562) lr 1.1253e-03 eta 3:06:15 +epoch [25/50] batch [410/500] time 0.898 (0.887) data 0.000 (0.002) loss 1.2510 (1.0742) acc 71.8750 (73.2774) lr 1.1253e-03 eta 3:06:10 +epoch [25/50] batch [415/500] time 0.846 (0.887) data 0.000 (0.002) loss 1.4131 (1.0715) acc 71.8750 (73.3434) lr 1.1253e-03 eta 3:06:04 +epoch [25/50] batch [420/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.4561 (1.0711) acc 62.5000 (73.3333) lr 1.1253e-03 eta 3:06:02 +epoch [25/50] batch [425/500] time 0.884 (0.887) data 0.000 (0.002) loss 0.5898 (1.0713) acc 84.3750 (73.3309) lr 1.1253e-03 eta 3:05:56 +epoch [25/50] batch [430/500] time 0.860 (0.887) data 0.000 (0.002) loss 0.6646 (1.0720) acc 81.2500 (73.2994) lr 1.1253e-03 eta 3:05:53 +epoch [25/50] batch [435/500] time 0.851 (0.887) data 0.000 (0.002) loss 1.3477 (1.0742) acc 59.3750 (73.2471) lr 1.1253e-03 eta 3:05:47 +epoch [25/50] batch [440/500] time 0.873 (0.887) data 0.000 (0.002) loss 1.0830 (1.0729) acc 59.3750 (73.2315) lr 1.1253e-03 eta 3:05:43 +epoch [25/50] batch [445/500] time 0.871 (0.887) data 0.000 (0.002) loss 0.9248 (1.0717) acc 65.6250 (73.2303) lr 1.1253e-03 eta 3:05:38 +epoch [25/50] batch [450/500] time 0.926 (0.887) data 0.000 (0.002) loss 1.3301 (1.0734) acc 68.7500 (73.1875) lr 1.1253e-03 eta 3:05:35 +epoch [25/50] batch [455/500] time 0.895 (0.887) data 0.000 (0.002) loss 0.7808 (1.0724) acc 81.2500 (73.1937) lr 1.1253e-03 eta 3:05:32 +epoch [25/50] batch [460/500] time 0.881 (0.887) data 0.000 (0.002) loss 1.2109 (1.0733) acc 65.6250 (73.1793) lr 1.1253e-03 eta 3:05:27 +epoch [25/50] batch [465/500] time 0.882 (0.887) data 0.000 (0.002) loss 1.3662 (1.0721) acc 65.6250 (73.1519) lr 1.1253e-03 eta 3:05:22 +epoch [25/50] batch [470/500] time 0.875 (0.887) data 0.000 (0.002) loss 0.8979 (1.0699) acc 71.8750 (73.1848) lr 1.1253e-03 eta 3:05:17 +epoch [25/50] batch [475/500] time 0.887 (0.887) data 0.000 (0.002) loss 0.8828 (1.0713) acc 84.3750 (73.1776) lr 1.1253e-03 eta 3:05:13 +epoch [25/50] batch [480/500] time 0.914 (0.887) data 0.000 (0.002) loss 0.9141 (1.0732) acc 81.2500 (73.1576) lr 1.1253e-03 eta 3:05:09 +epoch [25/50] batch [485/500] time 0.880 (0.887) data 0.000 (0.002) loss 0.9976 (1.0735) acc 81.2500 (73.1637) lr 1.1253e-03 eta 3:05:05 +epoch [25/50] batch [490/500] time 0.867 (0.887) data 0.000 (0.002) loss 1.2617 (1.0734) acc 71.8750 (73.1633) lr 1.1253e-03 eta 3:05:00 +epoch [25/50] batch [495/500] time 0.885 (0.887) data 0.000 (0.002) loss 0.5342 (1.0713) acc 87.5000 (73.2197) lr 1.1253e-03 eta 3:04:53 +epoch [25/50] batch [500/500] time 0.873 (0.887) data 0.000 (0.002) loss 0.7856 (1.0708) acc 81.2500 (73.2375) lr 1.0628e-03 eta 3:04:51 +epoch [26/50] batch [5/500] time 0.889 (1.030) data 0.000 (0.150) loss 0.8887 (0.8910) acc 84.3750 (78.1250) lr 1.0628e-03 eta 3:34:24 +epoch [26/50] batch [10/500] time 0.855 (0.949) data 0.000 (0.075) loss 1.0830 (1.1404) acc 71.8750 (72.1875) lr 1.0628e-03 eta 3:17:35 +epoch [26/50] batch [15/500] time 0.890 (0.932) data 0.000 (0.050) loss 0.8716 (1.0903) acc 68.7500 (72.5000) lr 1.0628e-03 eta 3:13:50 +epoch [26/50] batch [20/500] time 0.908 (0.918) data 0.000 (0.038) loss 0.9473 (1.0906) acc 75.0000 (72.8125) lr 1.0628e-03 eta 3:10:55 +epoch [26/50] batch [25/500] time 0.889 (0.911) data 0.000 (0.030) loss 1.1221 (1.0992) acc 71.8750 (72.7500) lr 1.0628e-03 eta 3:09:27 +epoch [26/50] batch [30/500] time 0.896 (0.909) data 0.000 (0.025) loss 0.6084 (1.0830) acc 84.3750 (73.0208) lr 1.0628e-03 eta 3:08:56 +epoch [26/50] batch [35/500] time 0.865 (0.906) data 0.000 (0.022) loss 1.1709 (1.1156) acc 71.8750 (72.5000) lr 1.0628e-03 eta 3:08:16 +epoch [26/50] batch [40/500] time 0.869 (0.904) data 0.000 (0.019) loss 0.9185 (1.1105) acc 78.1250 (72.5000) lr 1.0628e-03 eta 3:07:49 +epoch [26/50] batch [45/500] time 0.896 (0.903) data 0.000 (0.017) loss 1.2344 (1.0977) acc 56.2500 (72.4306) lr 1.0628e-03 eta 3:07:27 +epoch [26/50] batch [50/500] time 0.881 (0.901) data 0.000 (0.015) loss 1.2969 (1.1079) acc 71.8750 (72.2500) lr 1.0628e-03 eta 3:06:57 +epoch [26/50] batch [55/500] time 0.892 (0.900) data 0.000 (0.014) loss 0.9834 (1.1115) acc 81.2500 (72.2159) lr 1.0628e-03 eta 3:06:38 +epoch [26/50] batch [60/500] time 0.885 (0.900) data 0.000 (0.013) loss 0.8535 (1.1122) acc 81.2500 (72.3958) lr 1.0628e-03 eta 3:06:30 +epoch [26/50] batch [65/500] time 0.882 (0.899) data 0.000 (0.012) loss 0.7939 (1.0957) acc 78.1250 (72.6442) lr 1.0628e-03 eta 3:06:18 +epoch [26/50] batch [70/500] time 0.877 (0.898) data 0.000 (0.011) loss 1.3555 (1.0978) acc 62.5000 (72.6786) lr 1.0628e-03 eta 3:06:00 +epoch [26/50] batch [75/500] time 0.895 (0.896) data 0.000 (0.010) loss 1.3018 (1.1038) acc 71.8750 (72.5417) lr 1.0628e-03 eta 3:05:37 +epoch [26/50] batch [80/500] time 0.848 (0.895) data 0.000 (0.010) loss 0.6733 (1.0849) acc 93.7500 (72.9297) lr 1.0628e-03 eta 3:05:13 +epoch [26/50] batch [85/500] time 0.857 (0.894) data 0.000 (0.009) loss 1.4121 (1.0841) acc 75.0000 (73.1985) lr 1.0628e-03 eta 3:04:53 +epoch [26/50] batch [90/500] time 0.872 (0.893) data 0.000 (0.009) loss 1.1387 (1.0863) acc 75.0000 (73.1597) lr 1.0628e-03 eta 3:04:42 +epoch [26/50] batch [95/500] time 0.871 (0.892) data 0.000 (0.008) loss 0.7793 (1.0896) acc 68.7500 (73.0921) lr 1.0628e-03 eta 3:04:26 +epoch [26/50] batch [100/500] time 0.900 (0.893) data 0.000 (0.008) loss 1.0830 (1.0918) acc 75.0000 (73.0938) lr 1.0628e-03 eta 3:04:38 +epoch [26/50] batch [105/500] time 0.873 (0.893) data 0.000 (0.007) loss 1.1572 (1.0850) acc 62.5000 (73.0655) lr 1.0628e-03 eta 3:04:23 +epoch [26/50] batch [110/500] time 0.860 (0.891) data 0.000 (0.007) loss 1.2871 (1.0855) acc 71.8750 (73.0682) lr 1.0628e-03 eta 3:04:02 +epoch [26/50] batch [115/500] time 0.866 (0.891) data 0.000 (0.007) loss 0.9214 (1.0790) acc 71.8750 (73.0707) lr 1.0628e-03 eta 3:03:50 +epoch [26/50] batch [120/500] time 0.885 (0.890) data 0.000 (0.006) loss 0.6909 (1.0770) acc 75.0000 (72.9688) lr 1.0628e-03 eta 3:03:43 +epoch [26/50] batch [125/500] time 0.878 (0.890) data 0.000 (0.006) loss 1.3057 (1.0845) acc 68.7500 (72.7500) lr 1.0628e-03 eta 3:03:32 +epoch [26/50] batch [130/500] time 0.899 (0.890) data 0.000 (0.006) loss 0.5498 (1.0862) acc 81.2500 (72.5962) lr 1.0628e-03 eta 3:03:26 +epoch [26/50] batch [135/500] time 0.876 (0.890) data 0.000 (0.006) loss 0.9399 (1.0885) acc 65.6250 (72.4537) lr 1.0628e-03 eta 3:03:23 +epoch [26/50] batch [140/500] time 0.886 (0.889) data 0.000 (0.006) loss 1.4502 (1.0895) acc 71.8750 (72.5000) lr 1.0628e-03 eta 3:03:11 +epoch [26/50] batch [145/500] time 0.923 (0.889) data 0.000 (0.005) loss 1.1777 (1.0907) acc 65.6250 (72.4353) lr 1.0628e-03 eta 3:03:08 +epoch [26/50] batch [150/500] time 0.914 (0.890) data 0.000 (0.005) loss 1.0146 (1.0795) acc 75.0000 (72.6458) lr 1.0628e-03 eta 3:03:09 +epoch [26/50] batch [155/500] time 0.862 (0.890) data 0.000 (0.005) loss 0.7832 (1.0715) acc 68.7500 (72.7823) lr 1.0628e-03 eta 3:03:03 +epoch [26/50] batch [160/500] time 0.866 (0.889) data 0.000 (0.005) loss 0.6528 (1.0656) acc 84.3750 (72.8906) lr 1.0628e-03 eta 3:02:56 +epoch [26/50] batch [165/500] time 0.889 (0.889) data 0.000 (0.005) loss 1.3574 (1.0731) acc 65.6250 (72.8598) lr 1.0628e-03 eta 3:02:49 +epoch [26/50] batch [170/500] time 0.905 (0.889) data 0.000 (0.005) loss 1.1953 (1.0765) acc 71.8750 (72.7206) lr 1.0628e-03 eta 3:02:44 +epoch [26/50] batch [175/500] time 0.872 (0.889) data 0.000 (0.005) loss 1.2158 (1.0775) acc 65.6250 (72.7500) lr 1.0628e-03 eta 3:02:42 +epoch [26/50] batch [180/500] time 0.874 (0.889) data 0.000 (0.004) loss 0.9790 (1.0732) acc 75.0000 (72.6910) lr 1.0628e-03 eta 3:02:32 +epoch [26/50] batch [185/500] time 0.860 (0.889) data 0.000 (0.004) loss 0.9497 (1.0706) acc 75.0000 (72.7534) lr 1.0628e-03 eta 3:02:25 +epoch [26/50] batch [190/500] time 0.872 (0.888) data 0.000 (0.004) loss 1.5908 (1.0706) acc 59.3750 (72.7796) lr 1.0628e-03 eta 3:02:15 +epoch [26/50] batch [195/500] time 0.851 (0.888) data 0.000 (0.004) loss 1.0029 (1.0714) acc 78.1250 (72.7244) lr 1.0628e-03 eta 3:02:07 +epoch [26/50] batch [200/500] time 0.886 (0.888) data 0.000 (0.004) loss 1.7246 (1.0750) acc 59.3750 (72.7031) lr 1.0628e-03 eta 3:02:05 +epoch [26/50] batch [205/500] time 0.867 (0.888) data 0.000 (0.004) loss 1.0557 (1.0674) acc 78.1250 (72.8201) lr 1.0628e-03 eta 3:01:54 +epoch [26/50] batch [210/500] time 0.902 (0.888) data 0.000 (0.004) loss 0.6694 (1.0664) acc 81.2500 (72.9167) lr 1.0628e-03 eta 3:01:48 +epoch [26/50] batch [215/500] time 0.892 (0.887) data 0.000 (0.004) loss 0.9941 (1.0671) acc 62.5000 (72.8634) lr 1.0628e-03 eta 3:01:41 +epoch [26/50] batch [220/500] time 0.867 (0.887) data 0.000 (0.004) loss 1.1934 (1.0662) acc 62.5000 (72.8693) lr 1.0628e-03 eta 3:01:32 +epoch [26/50] batch [225/500] time 0.852 (0.887) data 0.000 (0.004) loss 1.5352 (1.0632) acc 71.8750 (72.9306) lr 1.0628e-03 eta 3:01:27 +epoch [26/50] batch [230/500] time 0.910 (0.887) data 0.000 (0.004) loss 1.6875 (1.0645) acc 68.7500 (72.9755) lr 1.0628e-03 eta 3:01:23 +epoch [26/50] batch [235/500] time 0.872 (0.886) data 0.000 (0.003) loss 0.9658 (1.0642) acc 75.0000 (72.9654) lr 1.0628e-03 eta 3:01:11 +epoch [26/50] batch [240/500] time 0.981 (0.886) data 0.000 (0.003) loss 1.0996 (1.0640) acc 68.7500 (72.9688) lr 1.0628e-03 eta 3:01:07 +epoch [26/50] batch [245/500] time 0.880 (0.886) data 0.000 (0.003) loss 0.8828 (1.0613) acc 75.0000 (73.0740) lr 1.0628e-03 eta 3:00:58 +epoch [26/50] batch [250/500] time 0.855 (0.886) data 0.000 (0.003) loss 1.6367 (1.0670) acc 56.2500 (73.0000) lr 1.0628e-03 eta 3:00:49 +epoch [26/50] batch [255/500] time 0.878 (0.886) data 0.000 (0.003) loss 1.2715 (1.0666) acc 62.5000 (72.9779) lr 1.0628e-03 eta 3:00:45 +epoch [26/50] batch [260/500] time 0.879 (0.886) data 0.000 (0.003) loss 1.1064 (1.0648) acc 68.7500 (72.9447) lr 1.0628e-03 eta 3:00:41 +epoch [26/50] batch [265/500] time 0.868 (0.886) data 0.000 (0.003) loss 1.6836 (1.0654) acc 53.1250 (72.8892) lr 1.0628e-03 eta 3:00:35 +epoch [26/50] batch [270/500] time 0.900 (0.886) data 0.000 (0.003) loss 1.1641 (1.0685) acc 62.5000 (72.8704) lr 1.0628e-03 eta 3:00:32 +epoch [26/50] batch [275/500] time 0.918 (0.886) data 0.000 (0.003) loss 1.1650 (1.0671) acc 71.8750 (72.9205) lr 1.0628e-03 eta 3:00:28 +epoch [26/50] batch [280/500] time 0.885 (0.886) data 0.000 (0.003) loss 1.0771 (1.0644) acc 75.0000 (73.0357) lr 1.0628e-03 eta 3:00:23 +epoch [26/50] batch [285/500] time 0.880 (0.886) data 0.000 (0.003) loss 1.5586 (1.0658) acc 59.3750 (72.9605) lr 1.0628e-03 eta 3:00:20 +epoch [26/50] batch [290/500] time 0.905 (0.886) data 0.000 (0.003) loss 0.9668 (1.0691) acc 71.8750 (72.9203) lr 1.0628e-03 eta 3:00:17 +epoch [26/50] batch [295/500] time 0.888 (0.886) data 0.000 (0.003) loss 1.4580 (1.0683) acc 62.5000 (72.9131) lr 1.0628e-03 eta 3:00:10 +epoch [26/50] batch [300/500] time 0.868 (0.886) data 0.000 (0.003) loss 1.2061 (1.0652) acc 65.6250 (72.9792) lr 1.0628e-03 eta 3:00:05 +epoch [26/50] batch [305/500] time 0.885 (0.886) data 0.000 (0.003) loss 0.7847 (1.0618) acc 78.1250 (72.9713) lr 1.0628e-03 eta 2:59:59 +epoch [26/50] batch [310/500] time 0.881 (0.885) data 0.000 (0.003) loss 0.9971 (1.0608) acc 68.7500 (73.0141) lr 1.0628e-03 eta 2:59:53 +epoch [26/50] batch [315/500] time 0.858 (0.885) data 0.000 (0.003) loss 1.1777 (1.0583) acc 68.7500 (73.0357) lr 1.0628e-03 eta 2:59:49 +epoch [26/50] batch [320/500] time 0.878 (0.885) data 0.000 (0.003) loss 0.4976 (1.0556) acc 81.2500 (73.0176) lr 1.0628e-03 eta 2:59:44 +epoch [26/50] batch [325/500] time 0.877 (0.885) data 0.000 (0.003) loss 0.6235 (1.0519) acc 84.3750 (73.1250) lr 1.0628e-03 eta 2:59:37 +epoch [26/50] batch [330/500] time 0.877 (0.885) data 0.000 (0.003) loss 0.7856 (1.0519) acc 75.0000 (73.1155) lr 1.0628e-03 eta 2:59:33 +epoch [26/50] batch [335/500] time 0.885 (0.885) data 0.000 (0.002) loss 0.5527 (1.0527) acc 81.2500 (73.1623) lr 1.0628e-03 eta 2:59:31 +epoch [26/50] batch [340/500] time 0.913 (0.886) data 0.000 (0.002) loss 0.8940 (1.0533) acc 71.8750 (73.1801) lr 1.0628e-03 eta 2:59:32 +epoch [26/50] batch [345/500] time 0.904 (0.886) data 0.000 (0.002) loss 1.0879 (1.0561) acc 78.1250 (73.1612) lr 1.0628e-03 eta 2:59:28 +epoch [26/50] batch [350/500] time 0.881 (0.886) data 0.000 (0.002) loss 1.1680 (1.0548) acc 68.7500 (73.1875) lr 1.0628e-03 eta 2:59:24 +epoch [26/50] batch [355/500] time 0.859 (0.886) data 0.000 (0.002) loss 0.7334 (1.0513) acc 78.1250 (73.2658) lr 1.0628e-03 eta 2:59:19 +epoch [26/50] batch [360/500] time 0.873 (0.886) data 0.000 (0.002) loss 1.1621 (1.0505) acc 68.7500 (73.2726) lr 1.0628e-03 eta 2:59:14 +epoch [26/50] batch [365/500] time 0.883 (0.886) data 0.000 (0.002) loss 0.9697 (1.0500) acc 71.8750 (73.2791) lr 1.0628e-03 eta 2:59:08 +epoch [26/50] batch [370/500] time 0.883 (0.886) data 0.000 (0.002) loss 1.3545 (1.0517) acc 68.7500 (73.2770) lr 1.0628e-03 eta 2:59:04 +epoch [26/50] batch [375/500] time 0.883 (0.886) data 0.000 (0.002) loss 1.5264 (1.0545) acc 65.6250 (73.2333) lr 1.0628e-03 eta 2:58:58 +epoch [26/50] batch [380/500] time 0.854 (0.885) data 0.000 (0.002) loss 1.5166 (1.0562) acc 71.8750 (73.1990) lr 1.0628e-03 eta 2:58:51 +epoch [26/50] batch [385/500] time 0.905 (0.886) data 0.000 (0.002) loss 1.7861 (1.0581) acc 65.6250 (73.2143) lr 1.0628e-03 eta 2:58:50 +epoch [26/50] batch [390/500] time 0.887 (0.886) data 0.000 (0.002) loss 0.8149 (1.0553) acc 75.0000 (73.2212) lr 1.0628e-03 eta 2:58:43 +epoch [26/50] batch [395/500] time 0.884 (0.886) data 0.000 (0.002) loss 0.6816 (1.0535) acc 84.3750 (73.2595) lr 1.0628e-03 eta 2:58:40 +epoch [26/50] batch [400/500] time 0.862 (0.885) data 0.000 (0.002) loss 0.7817 (1.0538) acc 78.1250 (73.2422) lr 1.0628e-03 eta 2:58:33 +epoch [26/50] batch [405/500] time 0.851 (0.885) data 0.000 (0.002) loss 0.7261 (1.0518) acc 71.8750 (73.2562) lr 1.0628e-03 eta 2:58:26 +epoch [26/50] batch [410/500] time 0.895 (0.885) data 0.000 (0.002) loss 1.0811 (1.0510) acc 68.7500 (73.2546) lr 1.0628e-03 eta 2:58:21 +epoch [26/50] batch [415/500] time 0.910 (0.885) data 0.000 (0.002) loss 1.2725 (1.0520) acc 68.7500 (73.2154) lr 1.0628e-03 eta 2:58:16 +epoch [26/50] batch [420/500] time 0.874 (0.885) data 0.000 (0.002) loss 0.9771 (1.0514) acc 75.0000 (73.2515) lr 1.0628e-03 eta 2:58:14 +epoch [26/50] batch [425/500] time 0.896 (0.885) data 0.000 (0.002) loss 1.5059 (1.0534) acc 68.7500 (73.2132) lr 1.0628e-03 eta 2:58:08 +epoch [26/50] batch [430/500] time 0.898 (0.885) data 0.000 (0.002) loss 0.8848 (1.0516) acc 84.3750 (73.2776) lr 1.0628e-03 eta 2:58:04 +epoch [26/50] batch [435/500] time 0.856 (0.885) data 0.000 (0.002) loss 0.9653 (1.0514) acc 62.5000 (73.2759) lr 1.0628e-03 eta 2:58:00 +epoch [26/50] batch [440/500] time 0.862 (0.885) data 0.000 (0.002) loss 1.4531 (1.0529) acc 65.6250 (73.2599) lr 1.0628e-03 eta 2:57:53 +epoch [26/50] batch [445/500] time 0.871 (0.885) data 0.000 (0.002) loss 0.9297 (1.0549) acc 81.2500 (73.2374) lr 1.0628e-03 eta 2:57:47 +epoch [26/50] batch [450/500] time 0.869 (0.885) data 0.000 (0.002) loss 0.9580 (1.0560) acc 78.1250 (73.2222) lr 1.0628e-03 eta 2:57:41 +epoch [26/50] batch [455/500] time 0.901 (0.885) data 0.000 (0.002) loss 1.1699 (1.0553) acc 71.8750 (73.2212) lr 1.0628e-03 eta 2:57:37 +epoch [26/50] batch [460/500] time 0.877 (0.885) data 0.000 (0.002) loss 1.2363 (1.0533) acc 71.8750 (73.2609) lr 1.0628e-03 eta 2:57:33 +epoch [26/50] batch [465/500] time 0.894 (0.885) data 0.000 (0.002) loss 1.2256 (1.0542) acc 65.6250 (73.2191) lr 1.0628e-03 eta 2:57:28 +epoch [26/50] batch [470/500] time 0.890 (0.885) data 0.000 (0.002) loss 1.2100 (1.0545) acc 62.5000 (73.1782) lr 1.0628e-03 eta 2:57:24 +epoch [26/50] batch [475/500] time 0.883 (0.885) data 0.000 (0.002) loss 0.5459 (1.0540) acc 81.2500 (73.1776) lr 1.0628e-03 eta 2:57:20 +epoch [26/50] batch [480/500] time 0.906 (0.885) data 0.000 (0.002) loss 1.2354 (1.0553) acc 68.7500 (73.1315) lr 1.0628e-03 eta 2:57:15 +epoch [26/50] batch [485/500] time 0.921 (0.885) data 0.000 (0.002) loss 1.1621 (1.0575) acc 59.3750 (73.0477) lr 1.0628e-03 eta 2:57:14 +epoch [26/50] batch [490/500] time 0.881 (0.885) data 0.000 (0.002) loss 1.0449 (1.0566) acc 62.5000 (73.0931) lr 1.0628e-03 eta 2:57:10 +epoch [26/50] batch [495/500] time 0.867 (0.885) data 0.000 (0.002) loss 1.2148 (1.0565) acc 65.6250 (73.0619) lr 1.0628e-03 eta 2:57:06 +epoch [26/50] batch [500/500] time 0.866 (0.885) data 0.000 (0.002) loss 0.9624 (1.0572) acc 71.8750 (73.0500) lr 1.0000e-03 eta 2:57:01 +epoch [27/50] batch [5/500] time 0.858 (1.016) data 0.000 (0.134) loss 0.7432 (0.8616) acc 81.2500 (77.5000) lr 1.0000e-03 eta 3:23:08 +epoch [27/50] batch [10/500] time 0.905 (0.951) data 0.000 (0.067) loss 0.9150 (0.9214) acc 75.0000 (75.0000) lr 1.0000e-03 eta 3:10:05 +epoch [27/50] batch [15/500] time 0.902 (0.930) data 0.000 (0.045) loss 0.6973 (0.9633) acc 84.3750 (74.3750) lr 1.0000e-03 eta 3:05:46 +epoch [27/50] batch [20/500] time 0.885 (0.919) data 0.000 (0.034) loss 0.8350 (0.9426) acc 71.8750 (74.6875) lr 1.0000e-03 eta 3:03:31 +epoch [27/50] batch [25/500] time 0.880 (0.914) data 0.000 (0.027) loss 1.0215 (0.9670) acc 71.8750 (74.0000) lr 1.0000e-03 eta 3:02:24 +epoch [27/50] batch [30/500] time 0.881 (0.911) data 0.000 (0.023) loss 1.3838 (0.9913) acc 68.7500 (73.8542) lr 1.0000e-03 eta 3:01:41 +epoch [27/50] batch [35/500] time 0.875 (0.911) data 0.000 (0.019) loss 0.8936 (0.9754) acc 71.8750 (74.4643) lr 1.0000e-03 eta 3:01:37 +epoch [27/50] batch [40/500] time 0.879 (0.907) data 0.000 (0.017) loss 1.0713 (0.9814) acc 65.6250 (74.1406) lr 1.0000e-03 eta 3:00:48 +epoch [27/50] batch [45/500] time 0.910 (0.906) data 0.000 (0.015) loss 1.0176 (0.9673) acc 71.8750 (74.1667) lr 1.0000e-03 eta 3:00:28 +epoch [27/50] batch [50/500] time 0.914 (0.903) data 0.000 (0.014) loss 0.5503 (0.9578) acc 93.7500 (74.6250) lr 1.0000e-03 eta 2:59:55 +epoch [27/50] batch [55/500] time 0.863 (0.900) data 0.000 (0.012) loss 0.5381 (0.9574) acc 87.5000 (74.6023) lr 1.0000e-03 eta 2:59:12 +epoch [27/50] batch [60/500] time 0.870 (0.899) data 0.000 (0.011) loss 0.9160 (0.9664) acc 81.2500 (74.4271) lr 1.0000e-03 eta 2:58:51 +epoch [27/50] batch [65/500] time 0.885 (0.898) data 0.000 (0.011) loss 1.6523 (0.9734) acc 62.5000 (74.3750) lr 1.0000e-03 eta 2:58:36 +epoch [27/50] batch [70/500] time 0.888 (0.897) data 0.000 (0.010) loss 1.2031 (0.9914) acc 62.5000 (74.0625) lr 1.0000e-03 eta 2:58:24 +epoch [27/50] batch [75/500] time 0.917 (0.898) data 0.000 (0.009) loss 1.1260 (0.9971) acc 59.3750 (73.7500) lr 1.0000e-03 eta 2:58:28 +epoch [27/50] batch [80/500] time 0.919 (0.899) data 0.000 (0.009) loss 0.8047 (0.9984) acc 71.8750 (73.7109) lr 1.0000e-03 eta 2:58:36 +epoch [27/50] batch [85/500] time 0.900 (0.898) data 0.000 (0.008) loss 1.3018 (1.0078) acc 62.5000 (73.6397) lr 1.0000e-03 eta 2:58:22 +epoch [27/50] batch [90/500] time 0.905 (0.898) data 0.000 (0.008) loss 1.1152 (1.0077) acc 75.0000 (73.6806) lr 1.0000e-03 eta 2:58:17 +epoch [27/50] batch [95/500] time 0.912 (0.899) data 0.000 (0.007) loss 0.8032 (1.0164) acc 75.0000 (73.4868) lr 1.0000e-03 eta 2:58:19 +epoch [27/50] batch [100/500] time 0.884 (0.898) data 0.000 (0.007) loss 1.1992 (1.0119) acc 71.8750 (73.5625) lr 1.0000e-03 eta 2:58:10 +epoch [27/50] batch [105/500] time 0.908 (0.897) data 0.000 (0.007) loss 1.2764 (1.0216) acc 62.5000 (73.4821) lr 1.0000e-03 eta 2:57:55 +epoch [27/50] batch [110/500] time 0.877 (0.897) data 0.000 (0.006) loss 1.2920 (1.0214) acc 65.6250 (73.6648) lr 1.0000e-03 eta 2:57:46 +epoch [27/50] batch [115/500] time 0.873 (0.896) data 0.000 (0.006) loss 0.6587 (1.0149) acc 87.5000 (73.7500) lr 1.0000e-03 eta 2:57:26 +epoch [27/50] batch [120/500] time 0.907 (0.895) data 0.000 (0.006) loss 0.9727 (1.0140) acc 75.0000 (73.8542) lr 1.0000e-03 eta 2:57:14 +epoch [27/50] batch [125/500] time 0.894 (0.895) data 0.000 (0.006) loss 1.1270 (1.0190) acc 65.6250 (73.8500) lr 1.0000e-03 eta 2:57:11 +epoch [27/50] batch [130/500] time 0.865 (0.895) data 0.000 (0.005) loss 1.0713 (1.0227) acc 78.1250 (73.8462) lr 1.0000e-03 eta 2:56:59 +epoch [27/50] batch [135/500] time 0.888 (0.894) data 0.000 (0.005) loss 1.2842 (1.0300) acc 75.0000 (73.7731) lr 1.0000e-03 eta 2:56:52 +epoch [27/50] batch [140/500] time 0.900 (0.894) data 0.000 (0.005) loss 1.2559 (1.0392) acc 71.8750 (73.5938) lr 1.0000e-03 eta 2:56:43 +epoch [27/50] batch [145/500] time 0.861 (0.894) data 0.000 (0.005) loss 0.6353 (1.0337) acc 81.2500 (73.7069) lr 1.0000e-03 eta 2:56:39 +epoch [27/50] batch [150/500] time 0.859 (0.894) data 0.000 (0.005) loss 1.0918 (1.0308) acc 75.0000 (73.8542) lr 1.0000e-03 eta 2:56:29 +epoch [27/50] batch [155/500] time 0.875 (0.893) data 0.000 (0.005) loss 1.6689 (1.0326) acc 65.6250 (73.8911) lr 1.0000e-03 eta 2:56:19 +epoch [27/50] batch [160/500] time 0.905 (0.893) data 0.000 (0.004) loss 0.8237 (1.0325) acc 78.1250 (73.8867) lr 1.0000e-03 eta 2:56:10 +epoch [27/50] batch [165/500] time 0.909 (0.893) data 0.000 (0.004) loss 0.9067 (1.0334) acc 71.8750 (73.8068) lr 1.0000e-03 eta 2:56:05 +epoch [27/50] batch [170/500] time 0.860 (0.892) data 0.000 (0.004) loss 1.6572 (1.0361) acc 46.8750 (73.6581) lr 1.0000e-03 eta 2:55:56 +epoch [27/50] batch [175/500] time 0.853 (0.892) data 0.000 (0.004) loss 1.6006 (1.0323) acc 68.7500 (73.8036) lr 1.0000e-03 eta 2:55:42 +epoch [27/50] batch [180/500] time 0.885 (0.892) data 0.000 (0.004) loss 0.7222 (1.0249) acc 75.0000 (73.9931) lr 1.0000e-03 eta 2:55:40 +epoch [27/50] batch [185/500] time 0.916 (0.892) data 0.000 (0.004) loss 1.5049 (1.0352) acc 62.5000 (73.8007) lr 1.0000e-03 eta 2:55:33 +epoch [27/50] batch [190/500] time 0.902 (0.891) data 0.000 (0.004) loss 0.8672 (1.0343) acc 71.8750 (73.7829) lr 1.0000e-03 eta 2:55:25 +epoch [27/50] batch [195/500] time 0.880 (0.891) data 0.000 (0.004) loss 1.0078 (1.0315) acc 75.0000 (73.7821) lr 1.0000e-03 eta 2:55:18 +epoch [27/50] batch [200/500] time 0.875 (0.891) data 0.000 (0.004) loss 0.7476 (1.0308) acc 75.0000 (73.8750) lr 1.0000e-03 eta 2:55:11 +epoch [27/50] batch [205/500] time 0.890 (0.891) data 0.000 (0.004) loss 0.7622 (1.0294) acc 78.1250 (73.9634) lr 1.0000e-03 eta 2:55:05 +epoch [27/50] batch [210/500] time 0.867 (0.890) data 0.000 (0.003) loss 0.8706 (1.0323) acc 78.1250 (73.8393) lr 1.0000e-03 eta 2:54:56 +epoch [27/50] batch [215/500] time 0.851 (0.890) data 0.000 (0.003) loss 1.3818 (1.0338) acc 71.8750 (73.7500) lr 1.0000e-03 eta 2:54:45 +epoch [27/50] batch [220/500] time 0.873 (0.889) data 0.000 (0.003) loss 1.7061 (1.0374) acc 62.5000 (73.7500) lr 1.0000e-03 eta 2:54:36 +epoch [27/50] batch [225/500] time 0.901 (0.890) data 0.000 (0.003) loss 0.7178 (1.0341) acc 78.1250 (73.8194) lr 1.0000e-03 eta 2:54:35 +epoch [27/50] batch [230/500] time 0.873 (0.889) data 0.000 (0.003) loss 1.0225 (1.0313) acc 68.7500 (73.8723) lr 1.0000e-03 eta 2:54:24 +epoch [27/50] batch [235/500] time 0.883 (0.889) data 0.000 (0.003) loss 0.7217 (1.0325) acc 78.1250 (73.7899) lr 1.0000e-03 eta 2:54:16 +epoch [27/50] batch [240/500] time 0.893 (0.889) data 0.000 (0.003) loss 1.1562 (1.0288) acc 71.8750 (73.8542) lr 1.0000e-03 eta 2:54:09 +epoch [27/50] batch [245/500] time 0.862 (0.888) data 0.000 (0.003) loss 0.9053 (1.0277) acc 78.1250 (73.8520) lr 1.0000e-03 eta 2:53:59 +epoch [27/50] batch [250/500] time 0.885 (0.888) data 0.000 (0.003) loss 1.0479 (1.0297) acc 71.8750 (73.7250) lr 1.0000e-03 eta 2:53:51 +epoch [27/50] batch [255/500] time 0.884 (0.887) data 0.000 (0.003) loss 1.2305 (1.0264) acc 68.7500 (73.7868) lr 1.0000e-03 eta 2:53:43 +epoch [27/50] batch [260/500] time 0.891 (0.887) data 0.000 (0.003) loss 1.1953 (1.0305) acc 68.7500 (73.7139) lr 1.0000e-03 eta 2:53:35 +epoch [27/50] batch [265/500] time 0.893 (0.887) data 0.000 (0.003) loss 1.2695 (1.0348) acc 71.8750 (73.5967) lr 1.0000e-03 eta 2:53:31 +epoch [27/50] batch [270/500] time 0.898 (0.887) data 0.000 (0.003) loss 1.2002 (1.0375) acc 65.6250 (73.5301) lr 1.0000e-03 eta 2:53:29 +epoch [27/50] batch [275/500] time 0.881 (0.888) data 0.000 (0.003) loss 1.1221 (1.0370) acc 71.8750 (73.5227) lr 1.0000e-03 eta 2:53:26 +epoch [27/50] batch [280/500] time 0.884 (0.887) data 0.000 (0.003) loss 0.5005 (1.0362) acc 84.3750 (73.5379) lr 1.0000e-03 eta 2:53:18 +epoch [27/50] batch [285/500] time 0.901 (0.887) data 0.000 (0.003) loss 0.6411 (1.0381) acc 81.2500 (73.4759) lr 1.0000e-03 eta 2:53:15 +epoch [27/50] batch [290/500] time 0.866 (0.887) data 0.000 (0.003) loss 0.9707 (1.0381) acc 75.0000 (73.5022) lr 1.0000e-03 eta 2:53:11 +epoch [27/50] batch [295/500] time 0.879 (0.887) data 0.000 (0.003) loss 1.0732 (1.0356) acc 71.8750 (73.6017) lr 1.0000e-03 eta 2:53:05 +epoch [27/50] batch [300/500] time 0.908 (0.887) data 0.000 (0.002) loss 0.9819 (1.0342) acc 75.0000 (73.6458) lr 1.0000e-03 eta 2:53:01 +epoch [27/50] batch [305/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.0811 (1.0345) acc 75.0000 (73.6475) lr 1.0000e-03 eta 2:52:57 +epoch [27/50] batch [310/500] time 0.892 (0.887) data 0.000 (0.002) loss 0.5737 (1.0363) acc 75.0000 (73.5786) lr 1.0000e-03 eta 2:52:52 +epoch [27/50] batch [315/500] time 0.880 (0.887) data 0.000 (0.002) loss 0.5884 (1.0386) acc 81.2500 (73.5119) lr 1.0000e-03 eta 2:52:46 +epoch [27/50] batch [320/500] time 0.997 (0.887) data 0.000 (0.002) loss 1.7373 (1.0420) acc 62.5000 (73.4375) lr 1.0000e-03 eta 2:52:44 +epoch [27/50] batch [325/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.0703 (1.0436) acc 68.7500 (73.3846) lr 1.0000e-03 eta 2:52:40 +epoch [27/50] batch [330/500] time 0.858 (0.887) data 0.000 (0.002) loss 1.3799 (1.0422) acc 62.5000 (73.4091) lr 1.0000e-03 eta 2:52:34 +epoch [27/50] batch [335/500] time 0.855 (0.887) data 0.000 (0.002) loss 1.1387 (1.0443) acc 62.5000 (73.4049) lr 1.0000e-03 eta 2:52:28 +epoch [27/50] batch [340/500] time 0.894 (0.887) data 0.000 (0.002) loss 0.8579 (1.0426) acc 90.6250 (73.4467) lr 1.0000e-03 eta 2:52:24 +epoch [27/50] batch [345/500] time 0.881 (0.887) data 0.000 (0.002) loss 0.6973 (1.0401) acc 78.1250 (73.4692) lr 1.0000e-03 eta 2:52:20 +epoch [27/50] batch [350/500] time 0.858 (0.887) data 0.000 (0.002) loss 1.4658 (1.0436) acc 62.5000 (73.3661) lr 1.0000e-03 eta 2:52:15 +epoch [27/50] batch [355/500] time 0.866 (0.887) data 0.000 (0.002) loss 0.9316 (1.0443) acc 71.8750 (73.3275) lr 1.0000e-03 eta 2:52:11 +epoch [27/50] batch [360/500] time 0.885 (0.887) data 0.000 (0.002) loss 0.6689 (1.0442) acc 75.0000 (73.3073) lr 1.0000e-03 eta 2:52:07 +epoch [27/50] batch [365/500] time 0.888 (0.887) data 0.000 (0.002) loss 1.0742 (1.0472) acc 78.1250 (73.2534) lr 1.0000e-03 eta 2:52:05 +epoch [27/50] batch [370/500] time 0.872 (0.887) data 0.000 (0.002) loss 0.7969 (1.0456) acc 81.2500 (73.3361) lr 1.0000e-03 eta 2:52:00 +epoch [27/50] batch [375/500] time 0.882 (0.887) data 0.000 (0.002) loss 1.0117 (1.0475) acc 68.7500 (73.3250) lr 1.0000e-03 eta 2:51:54 +epoch [27/50] batch [380/500] time 0.873 (0.887) data 0.000 (0.002) loss 1.6836 (1.0483) acc 65.6250 (73.3141) lr 1.0000e-03 eta 2:51:49 +epoch [27/50] batch [385/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.4785 (1.0495) acc 59.3750 (73.2468) lr 1.0000e-03 eta 2:51:44 +epoch [27/50] batch [390/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.1074 (1.0485) acc 71.8750 (73.2853) lr 1.0000e-03 eta 2:51:38 +epoch [27/50] batch [395/500] time 0.908 (0.887) data 0.000 (0.002) loss 0.6089 (1.0495) acc 84.3750 (73.2358) lr 1.0000e-03 eta 2:51:35 +epoch [27/50] batch [400/500] time 0.915 (0.887) data 0.000 (0.002) loss 1.4268 (1.0482) acc 68.7500 (73.2656) lr 1.0000e-03 eta 2:51:31 +epoch [27/50] batch [405/500] time 0.906 (0.887) data 0.000 (0.002) loss 0.5195 (1.0454) acc 81.2500 (73.2948) lr 1.0000e-03 eta 2:51:27 +epoch [27/50] batch [410/500] time 0.887 (0.887) data 0.000 (0.002) loss 1.2998 (1.0452) acc 65.6250 (73.2774) lr 1.0000e-03 eta 2:51:23 +epoch [27/50] batch [415/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.0635 (1.0465) acc 65.6250 (73.2229) lr 1.0000e-03 eta 2:51:18 +epoch [27/50] batch [420/500] time 0.877 (0.887) data 0.001 (0.002) loss 1.3135 (1.0497) acc 71.8750 (73.1473) lr 1.0000e-03 eta 2:51:13 +epoch [27/50] batch [425/500] time 0.888 (0.887) data 0.000 (0.002) loss 0.7163 (1.0512) acc 81.2500 (73.1397) lr 1.0000e-03 eta 2:51:07 +epoch [27/50] batch [430/500] time 0.908 (0.887) data 0.000 (0.002) loss 2.1016 (1.0548) acc 65.6250 (73.0814) lr 1.0000e-03 eta 2:51:02 +epoch [27/50] batch [435/500] time 0.857 (0.887) data 0.000 (0.002) loss 0.8027 (1.0562) acc 78.1250 (73.0603) lr 1.0000e-03 eta 2:50:57 +epoch [27/50] batch [440/500] time 0.887 (0.887) data 0.000 (0.002) loss 0.8687 (1.0590) acc 71.8750 (72.9688) lr 1.0000e-03 eta 2:50:52 +epoch [27/50] batch [445/500] time 0.909 (0.887) data 0.000 (0.002) loss 1.0254 (1.0569) acc 68.7500 (73.0056) lr 1.0000e-03 eta 2:50:48 +epoch [27/50] batch [450/500] time 0.872 (0.887) data 0.000 (0.002) loss 0.7231 (1.0592) acc 81.2500 (72.9583) lr 1.0000e-03 eta 2:50:42 +epoch [27/50] batch [455/500] time 0.862 (0.886) data 0.000 (0.002) loss 1.2305 (1.0589) acc 62.5000 (72.9396) lr 1.0000e-03 eta 2:50:34 +epoch [27/50] batch [460/500] time 0.901 (0.886) data 0.000 (0.002) loss 0.9873 (1.0561) acc 75.0000 (72.9959) lr 1.0000e-03 eta 2:50:29 +epoch [27/50] batch [465/500] time 0.869 (0.887) data 0.000 (0.002) loss 0.8442 (1.0562) acc 81.2500 (73.0242) lr 1.0000e-03 eta 2:50:27 +epoch [27/50] batch [470/500] time 0.875 (0.886) data 0.000 (0.002) loss 1.3203 (1.0578) acc 75.0000 (73.0186) lr 1.0000e-03 eta 2:50:20 +epoch [27/50] batch [475/500] time 0.895 (0.886) data 0.000 (0.002) loss 1.0400 (1.0567) acc 81.2500 (73.0197) lr 1.0000e-03 eta 2:50:15 +epoch [27/50] batch [480/500] time 0.881 (0.886) data 0.000 (0.002) loss 1.0781 (1.0557) acc 71.8750 (73.0273) lr 1.0000e-03 eta 2:50:09 +epoch [27/50] batch [485/500] time 0.892 (0.886) data 0.000 (0.002) loss 0.6729 (1.0553) acc 81.2500 (73.0348) lr 1.0000e-03 eta 2:50:04 +epoch [27/50] batch [490/500] time 0.873 (0.886) data 0.000 (0.002) loss 1.3623 (1.0597) acc 65.6250 (72.9273) lr 1.0000e-03 eta 2:50:00 +epoch [27/50] batch [495/500] time 0.869 (0.886) data 0.000 (0.002) loss 0.7651 (1.0605) acc 81.2500 (72.9230) lr 1.0000e-03 eta 2:49:54 +epoch [27/50] batch [500/500] time 0.860 (0.886) data 0.000 (0.002) loss 0.9736 (1.0617) acc 75.0000 (72.8812) lr 9.3721e-04 eta 2:49:48 +epoch [28/50] batch [5/500] time 0.879 (1.025) data 0.000 (0.131) loss 0.7944 (1.2319) acc 81.2500 (69.3750) lr 9.3721e-04 eta 3:16:17 +epoch [28/50] batch [10/500] time 1.005 (0.972) data 0.000 (0.066) loss 0.6592 (1.0800) acc 84.3750 (72.5000) lr 9.3721e-04 eta 3:06:03 +epoch [28/50] batch [15/500] time 0.890 (0.943) data 0.000 (0.044) loss 1.1455 (1.1038) acc 68.7500 (72.9167) lr 9.3721e-04 eta 3:00:27 +epoch [28/50] batch [20/500] time 0.868 (0.929) data 0.000 (0.033) loss 0.8618 (1.0674) acc 71.8750 (72.6562) lr 9.3721e-04 eta 2:57:40 +epoch [28/50] batch [25/500] time 0.864 (0.918) data 0.000 (0.026) loss 0.7251 (1.0747) acc 81.2500 (72.3750) lr 9.3721e-04 eta 2:55:29 +epoch [28/50] batch [30/500] time 0.890 (0.911) data 0.000 (0.022) loss 0.6436 (1.0433) acc 81.2500 (73.0208) lr 9.3721e-04 eta 2:54:11 +epoch [28/50] batch [35/500] time 0.916 (0.907) data 0.000 (0.019) loss 0.3418 (1.0218) acc 87.5000 (73.7500) lr 9.3721e-04 eta 2:53:14 +epoch [28/50] batch [40/500] time 0.864 (0.904) data 0.000 (0.017) loss 1.7012 (1.0867) acc 65.6250 (72.7344) lr 9.3721e-04 eta 2:52:40 +epoch [28/50] batch [45/500] time 0.862 (0.902) data 0.000 (0.015) loss 0.9600 (1.0793) acc 75.0000 (73.2639) lr 9.3721e-04 eta 2:52:15 +epoch [28/50] batch [50/500] time 0.888 (0.901) data 0.000 (0.013) loss 0.8467 (1.0462) acc 75.0000 (74.0625) lr 9.3721e-04 eta 2:52:01 +epoch [28/50] batch [55/500] time 0.884 (0.902) data 0.000 (0.012) loss 1.0273 (1.0306) acc 68.7500 (74.2045) lr 9.3721e-04 eta 2:52:08 +epoch [28/50] batch [60/500] time 0.880 (0.901) data 0.000 (0.011) loss 1.3340 (1.0440) acc 68.7500 (73.8021) lr 9.3721e-04 eta 2:51:47 +epoch [28/50] batch [65/500] time 0.883 (0.900) data 0.000 (0.010) loss 1.0146 (1.0468) acc 75.0000 (73.5577) lr 9.3721e-04 eta 2:51:32 +epoch [28/50] batch [70/500] time 0.925 (0.900) data 0.000 (0.010) loss 0.6523 (1.0418) acc 81.2500 (73.5268) lr 9.3721e-04 eta 2:51:21 +epoch [28/50] batch [75/500] time 0.874 (0.898) data 0.000 (0.009) loss 0.8057 (1.0356) acc 78.1250 (73.8333) lr 9.3721e-04 eta 2:50:58 +epoch [28/50] batch [80/500] time 0.902 (0.897) data 0.000 (0.008) loss 0.7896 (1.0442) acc 75.0000 (73.6328) lr 9.3721e-04 eta 2:50:46 +epoch [28/50] batch [85/500] time 0.848 (0.896) data 0.000 (0.008) loss 1.0723 (1.0465) acc 71.8750 (73.4191) lr 9.3721e-04 eta 2:50:31 +epoch [28/50] batch [90/500] time 0.894 (0.896) data 0.000 (0.007) loss 1.6416 (1.0610) acc 65.6250 (73.1944) lr 9.3721e-04 eta 2:50:22 +epoch [28/50] batch [95/500] time 0.897 (0.896) data 0.000 (0.007) loss 0.8633 (1.0561) acc 78.1250 (73.3882) lr 9.3721e-04 eta 2:50:18 +epoch [28/50] batch [100/500] time 0.884 (0.896) data 0.000 (0.007) loss 0.9648 (1.0566) acc 78.1250 (73.3438) lr 9.3721e-04 eta 2:50:09 +epoch [28/50] batch [105/500] time 0.883 (0.895) data 0.000 (0.006) loss 1.0811 (1.0564) acc 78.1250 (73.4524) lr 9.3721e-04 eta 2:49:57 +epoch [28/50] batch [110/500] time 0.871 (0.894) data 0.000 (0.006) loss 1.3779 (1.0628) acc 68.7500 (73.4375) lr 9.3721e-04 eta 2:49:48 +epoch [28/50] batch [115/500] time 0.886 (0.894) data 0.000 (0.006) loss 0.8662 (1.0610) acc 68.7500 (73.4511) lr 9.3721e-04 eta 2:49:35 +epoch [28/50] batch [120/500] time 0.921 (0.894) data 0.000 (0.006) loss 0.9248 (1.0644) acc 75.0000 (73.3073) lr 9.3721e-04 eta 2:49:33 +epoch [28/50] batch [125/500] time 0.882 (0.894) data 0.000 (0.005) loss 1.0117 (1.0630) acc 78.1250 (73.4500) lr 9.3721e-04 eta 2:49:24 +epoch [28/50] batch [130/500] time 0.859 (0.893) data 0.000 (0.005) loss 0.9971 (1.0633) acc 68.7500 (73.3654) lr 9.3721e-04 eta 2:49:14 +epoch [28/50] batch [135/500] time 0.884 (0.893) data 0.000 (0.005) loss 1.3291 (1.0571) acc 68.7500 (73.5185) lr 9.3721e-04 eta 2:49:09 +epoch [28/50] batch [140/500] time 0.891 (0.892) data 0.000 (0.005) loss 0.6143 (1.0529) acc 87.5000 (73.5938) lr 9.3721e-04 eta 2:48:57 +epoch [28/50] batch [145/500] time 0.859 (0.892) data 0.000 (0.005) loss 0.4099 (1.0529) acc 87.5000 (73.6853) lr 9.3721e-04 eta 2:48:51 +epoch [28/50] batch [150/500] time 0.972 (0.893) data 0.000 (0.005) loss 0.9912 (1.0460) acc 81.2500 (73.8542) lr 9.3721e-04 eta 2:48:50 +epoch [28/50] batch [155/500] time 0.890 (0.892) data 0.000 (0.004) loss 2.4473 (1.0602) acc 50.0000 (73.5685) lr 9.3721e-04 eta 2:48:43 +epoch [28/50] batch [160/500] time 0.880 (0.892) data 0.000 (0.004) loss 0.8984 (1.0556) acc 75.0000 (73.5938) lr 9.3721e-04 eta 2:48:30 +epoch [28/50] batch [165/500] time 0.877 (0.891) data 0.000 (0.004) loss 1.5234 (1.0611) acc 65.6250 (73.5606) lr 9.3721e-04 eta 2:48:24 +epoch [28/50] batch [170/500] time 0.873 (0.891) data 0.000 (0.004) loss 0.4500 (1.0575) acc 87.5000 (73.5110) lr 9.3721e-04 eta 2:48:18 +epoch [28/50] batch [175/500] time 0.865 (0.891) data 0.000 (0.004) loss 0.7700 (1.0559) acc 84.3750 (73.4464) lr 9.3721e-04 eta 2:48:11 +epoch [28/50] batch [180/500] time 0.854 (0.890) data 0.000 (0.004) loss 0.6807 (1.0560) acc 84.3750 (73.4722) lr 9.3721e-04 eta 2:47:59 +epoch [28/50] batch [185/500] time 0.851 (0.890) data 0.000 (0.004) loss 0.9189 (1.0524) acc 68.7500 (73.5642) lr 9.3721e-04 eta 2:47:51 +epoch [28/50] batch [190/500] time 0.858 (0.890) data 0.000 (0.004) loss 0.9736 (1.0511) acc 71.8750 (73.6020) lr 9.3721e-04 eta 2:47:47 +epoch [28/50] batch [195/500] time 0.900 (0.890) data 0.000 (0.004) loss 1.1162 (1.0467) acc 65.6250 (73.7660) lr 9.3721e-04 eta 2:47:45 +epoch [28/50] batch [200/500] time 0.904 (0.890) data 0.000 (0.003) loss 0.7734 (1.0441) acc 75.0000 (73.8594) lr 9.3721e-04 eta 2:47:38 +epoch [28/50] batch [205/500] time 0.904 (0.890) data 0.000 (0.003) loss 0.7651 (1.0429) acc 71.8750 (73.8567) lr 9.3721e-04 eta 2:47:31 +epoch [28/50] batch [210/500] time 0.904 (0.890) data 0.000 (0.003) loss 1.3574 (1.0517) acc 65.6250 (73.6607) lr 9.3721e-04 eta 2:47:26 +epoch [28/50] batch [215/500] time 0.892 (0.890) data 0.000 (0.003) loss 1.0986 (1.0535) acc 75.0000 (73.6483) lr 9.3721e-04 eta 2:47:18 +epoch [28/50] batch [220/500] time 0.878 (0.889) data 0.000 (0.003) loss 0.5444 (1.0512) acc 93.7500 (73.6790) lr 9.3721e-04 eta 2:47:12 +epoch [28/50] batch [225/500] time 0.891 (0.889) data 0.000 (0.003) loss 1.0127 (1.0533) acc 71.8750 (73.6250) lr 9.3721e-04 eta 2:47:06 +epoch [28/50] batch [230/500] time 0.874 (0.889) data 0.000 (0.003) loss 0.7554 (1.0542) acc 78.1250 (73.6005) lr 9.3721e-04 eta 2:46:58 +epoch [28/50] batch [235/500] time 0.901 (0.889) data 0.000 (0.003) loss 0.7900 (1.0547) acc 78.1250 (73.5638) lr 9.3721e-04 eta 2:46:55 +epoch [28/50] batch [240/500] time 0.912 (0.889) data 0.000 (0.003) loss 0.7622 (1.0528) acc 84.3750 (73.5938) lr 9.3721e-04 eta 2:46:49 +epoch [28/50] batch [245/500] time 0.899 (0.889) data 0.000 (0.003) loss 0.7510 (1.0503) acc 84.3750 (73.6480) lr 9.3721e-04 eta 2:46:45 +epoch [28/50] batch [250/500] time 0.888 (0.889) data 0.000 (0.003) loss 1.2676 (1.0466) acc 65.6250 (73.7125) lr 9.3721e-04 eta 2:46:40 +epoch [28/50] batch [255/500] time 0.871 (0.889) data 0.000 (0.003) loss 1.4873 (1.0507) acc 59.3750 (73.5907) lr 9.3721e-04 eta 2:46:37 +epoch [28/50] batch [260/500] time 0.890 (0.889) data 0.000 (0.003) loss 0.6421 (1.0480) acc 84.3750 (73.6058) lr 9.3721e-04 eta 2:46:30 +epoch [28/50] batch [265/500] time 0.871 (0.889) data 0.000 (0.003) loss 0.9502 (1.0485) acc 68.7500 (73.5495) lr 9.3721e-04 eta 2:46:23 +epoch [28/50] batch [270/500] time 0.875 (0.888) data 0.001 (0.003) loss 1.0439 (1.0472) acc 65.6250 (73.5185) lr 9.3721e-04 eta 2:46:17 +epoch [28/50] batch [275/500] time 0.896 (0.888) data 0.000 (0.003) loss 0.9878 (1.0453) acc 78.1250 (73.5682) lr 9.3721e-04 eta 2:46:11 +epoch [28/50] batch [280/500] time 0.876 (0.888) data 0.000 (0.003) loss 0.9731 (1.0441) acc 75.0000 (73.6049) lr 9.3721e-04 eta 2:46:08 +epoch [28/50] batch [285/500] time 0.858 (0.888) data 0.000 (0.003) loss 1.5107 (1.0456) acc 68.7500 (73.5746) lr 9.3721e-04 eta 2:46:01 +epoch [28/50] batch [290/500] time 0.905 (0.888) data 0.000 (0.002) loss 2.0371 (1.0497) acc 65.6250 (73.5560) lr 9.3721e-04 eta 2:45:57 +epoch [28/50] batch [295/500] time 0.886 (0.889) data 0.000 (0.002) loss 1.0742 (1.0496) acc 78.1250 (73.6017) lr 9.3721e-04 eta 2:45:56 +epoch [28/50] batch [300/500] time 0.869 (0.888) data 0.000 (0.002) loss 1.4004 (1.0501) acc 65.6250 (73.5312) lr 9.3721e-04 eta 2:45:50 +epoch [28/50] batch [305/500] time 0.886 (0.888) data 0.000 (0.002) loss 1.8057 (1.0507) acc 71.8750 (73.5656) lr 9.3721e-04 eta 2:45:43 +epoch [28/50] batch [310/500] time 0.869 (0.888) data 0.000 (0.002) loss 1.2695 (1.0531) acc 71.8750 (73.5081) lr 9.3721e-04 eta 2:45:36 +epoch [28/50] batch [315/500] time 0.901 (0.888) data 0.000 (0.002) loss 1.4229 (1.0561) acc 62.5000 (73.4524) lr 9.3721e-04 eta 2:45:32 +epoch [28/50] batch [320/500] time 0.862 (0.888) data 0.000 (0.002) loss 1.0947 (1.0575) acc 75.0000 (73.4766) lr 9.3721e-04 eta 2:45:29 +epoch [28/50] batch [325/500] time 0.899 (0.888) data 0.000 (0.002) loss 1.4609 (1.0598) acc 71.8750 (73.4904) lr 9.3721e-04 eta 2:45:24 +epoch [28/50] batch [330/500] time 0.864 (0.888) data 0.000 (0.002) loss 1.2578 (1.0595) acc 71.8750 (73.4848) lr 9.3721e-04 eta 2:45:18 +epoch [28/50] batch [335/500] time 0.870 (0.888) data 0.000 (0.002) loss 0.9507 (1.0633) acc 71.8750 (73.3862) lr 9.3721e-04 eta 2:45:12 +epoch [28/50] batch [340/500] time 0.885 (0.888) data 0.000 (0.002) loss 0.7705 (1.0638) acc 78.1250 (73.3548) lr 9.3721e-04 eta 2:45:10 +epoch [28/50] batch [345/500] time 0.862 (0.888) data 0.000 (0.002) loss 1.0840 (1.0635) acc 65.6250 (73.2971) lr 9.3721e-04 eta 2:45:02 +epoch [28/50] batch [350/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.5654 (1.0636) acc 59.3750 (73.3125) lr 9.3721e-04 eta 2:44:56 +epoch [28/50] batch [355/500] time 0.882 (0.887) data 0.000 (0.002) loss 1.2686 (1.0668) acc 65.6250 (73.1690) lr 9.3721e-04 eta 2:44:49 +epoch [28/50] batch [360/500] time 0.885 (0.887) data 0.000 (0.002) loss 2.2910 (1.0706) acc 43.7500 (73.1076) lr 9.3721e-04 eta 2:44:41 +epoch [28/50] batch [365/500] time 0.886 (0.887) data 0.000 (0.002) loss 1.2109 (1.0692) acc 81.2500 (73.2192) lr 9.3721e-04 eta 2:44:35 +epoch [28/50] batch [370/500] time 0.866 (0.887) data 0.000 (0.002) loss 1.5410 (1.0695) acc 65.6250 (73.2432) lr 9.3721e-04 eta 2:44:31 +epoch [28/50] batch [375/500] time 0.884 (0.887) data 0.000 (0.002) loss 0.8276 (1.0667) acc 71.8750 (73.2833) lr 9.3721e-04 eta 2:44:27 +epoch [28/50] batch [380/500] time 0.852 (0.887) data 0.000 (0.002) loss 1.0146 (1.0667) acc 75.0000 (73.2484) lr 9.3721e-04 eta 2:44:21 +epoch [28/50] batch [385/500] time 0.879 (0.887) data 0.000 (0.002) loss 1.4258 (1.0666) acc 65.6250 (73.2873) lr 9.3721e-04 eta 2:44:17 +epoch [28/50] batch [390/500] time 0.920 (0.887) data 0.000 (0.002) loss 1.3711 (1.0710) acc 68.7500 (73.1971) lr 9.3721e-04 eta 2:44:14 +epoch [28/50] batch [395/500] time 0.901 (0.887) data 0.000 (0.002) loss 0.8179 (1.0741) acc 81.2500 (73.1725) lr 9.3721e-04 eta 2:44:09 +epoch [28/50] batch [400/500] time 0.882 (0.887) data 0.000 (0.002) loss 0.7910 (1.0767) acc 78.1250 (73.1328) lr 9.3721e-04 eta 2:44:04 +epoch [28/50] batch [405/500] time 0.904 (0.887) data 0.000 (0.002) loss 0.7402 (1.0743) acc 78.1250 (73.1636) lr 9.3721e-04 eta 2:44:00 +epoch [28/50] batch [410/500] time 0.883 (0.887) data 0.000 (0.002) loss 0.7437 (1.0717) acc 75.0000 (73.1860) lr 9.3721e-04 eta 2:43:53 +epoch [28/50] batch [415/500] time 0.862 (0.886) data 0.000 (0.002) loss 0.7915 (1.0686) acc 81.2500 (73.2681) lr 9.3721e-04 eta 2:43:46 +epoch [28/50] batch [420/500] time 0.875 (0.886) data 0.000 (0.002) loss 0.9072 (1.0677) acc 71.8750 (73.2292) lr 9.3721e-04 eta 2:43:39 +epoch [28/50] batch [425/500] time 0.888 (0.886) data 0.000 (0.002) loss 0.9214 (1.0682) acc 78.1250 (73.2206) lr 9.3721e-04 eta 2:43:34 +epoch [28/50] batch [430/500] time 0.882 (0.886) data 0.000 (0.002) loss 2.0430 (1.0708) acc 62.5000 (73.1613) lr 9.3721e-04 eta 2:43:28 +epoch [28/50] batch [435/500] time 0.899 (0.886) data 0.000 (0.002) loss 0.8398 (1.0710) acc 78.1250 (73.1609) lr 9.3721e-04 eta 2:43:24 +epoch [28/50] batch [440/500] time 0.882 (0.887) data 0.000 (0.002) loss 0.7114 (1.0688) acc 81.2500 (73.2102) lr 9.3721e-04 eta 2:43:24 +epoch [28/50] batch [445/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.2568 (1.0711) acc 71.8750 (73.1390) lr 9.3721e-04 eta 2:43:20 +epoch [28/50] batch [450/500] time 0.846 (0.886) data 0.000 (0.002) loss 0.8022 (1.0681) acc 81.2500 (73.2014) lr 9.3721e-04 eta 2:43:15 +epoch [28/50] batch [455/500] time 0.894 (0.886) data 0.000 (0.002) loss 1.1533 (1.0681) acc 71.8750 (73.2005) lr 9.3721e-04 eta 2:43:11 +epoch [28/50] batch [460/500] time 0.903 (0.886) data 0.000 (0.002) loss 1.2256 (1.0700) acc 68.7500 (73.1114) lr 9.3721e-04 eta 2:43:04 +epoch [28/50] batch [465/500] time 0.902 (0.886) data 0.000 (0.002) loss 1.4141 (1.0704) acc 68.7500 (73.1183) lr 9.3721e-04 eta 2:43:00 +epoch [28/50] batch [470/500] time 0.884 (0.886) data 0.000 (0.002) loss 1.3770 (1.0702) acc 75.0000 (73.1649) lr 9.3721e-04 eta 2:42:55 +epoch [28/50] batch [475/500] time 0.870 (0.886) data 0.000 (0.002) loss 0.7280 (1.0683) acc 84.3750 (73.2039) lr 9.3721e-04 eta 2:42:49 +epoch [28/50] batch [480/500] time 0.975 (0.886) data 0.000 (0.002) loss 0.6855 (1.0651) acc 78.1250 (73.2422) lr 9.3721e-04 eta 2:42:46 +epoch [28/50] batch [485/500] time 0.861 (0.886) data 0.000 (0.002) loss 1.1074 (1.0647) acc 68.7500 (73.2668) lr 9.3721e-04 eta 2:42:39 +epoch [28/50] batch [490/500] time 0.873 (0.886) data 0.000 (0.002) loss 0.8315 (1.0633) acc 81.2500 (73.2844) lr 9.3721e-04 eta 2:42:35 +epoch [28/50] batch [495/500] time 0.868 (0.886) data 0.000 (0.002) loss 0.6733 (1.0623) acc 84.3750 (73.3081) lr 9.3721e-04 eta 2:42:30 +epoch [28/50] batch [500/500] time 0.872 (0.886) data 0.000 (0.002) loss 1.4326 (1.0619) acc 59.3750 (73.3250) lr 8.7467e-04 eta 2:42:26 +epoch [29/50] batch [5/500] time 0.879 (1.020) data 0.000 (0.126) loss 1.1895 (1.1889) acc 78.1250 (73.1250) lr 8.7467e-04 eta 3:06:51 +epoch [29/50] batch [10/500] time 0.897 (0.956) data 0.000 (0.063) loss 2.1426 (1.1612) acc 59.3750 (72.1875) lr 8.7467e-04 eta 2:55:07 +epoch [29/50] batch [15/500] time 0.877 (0.931) data 0.000 (0.042) loss 0.9150 (1.1131) acc 81.2500 (72.0833) lr 8.7467e-04 eta 2:50:27 +epoch [29/50] batch [20/500] time 0.865 (0.918) data 0.000 (0.032) loss 1.2969 (1.0848) acc 71.8750 (72.9688) lr 8.7467e-04 eta 2:48:05 +epoch [29/50] batch [25/500] time 0.910 (0.916) data 0.000 (0.025) loss 0.7568 (1.0324) acc 68.7500 (74.2500) lr 8.7467e-04 eta 2:47:34 +epoch [29/50] batch [30/500] time 0.848 (0.910) data 0.000 (0.021) loss 1.4189 (1.0680) acc 68.7500 (73.6458) lr 8.7467e-04 eta 2:46:27 +epoch [29/50] batch [35/500] time 0.897 (0.908) data 0.000 (0.018) loss 0.9731 (1.0637) acc 75.0000 (73.2143) lr 8.7467e-04 eta 2:45:54 +epoch [29/50] batch [40/500] time 0.866 (0.906) data 0.000 (0.016) loss 1.2793 (1.0787) acc 75.0000 (73.1250) lr 8.7467e-04 eta 2:45:31 +epoch [29/50] batch [45/500] time 0.890 (0.904) data 0.000 (0.014) loss 1.7324 (1.1132) acc 50.0000 (72.3611) lr 8.7467e-04 eta 2:44:59 +epoch [29/50] batch [50/500] time 0.908 (0.902) data 0.000 (0.013) loss 0.9419 (1.0889) acc 71.8750 (72.6250) lr 8.7467e-04 eta 2:44:38 +epoch [29/50] batch [55/500] time 0.870 (0.900) data 0.000 (0.012) loss 0.7793 (1.0885) acc 81.2500 (72.6136) lr 8.7467e-04 eta 2:44:09 +epoch [29/50] batch [60/500] time 0.906 (0.899) data 0.000 (0.011) loss 1.2754 (1.1076) acc 68.7500 (72.2917) lr 8.7467e-04 eta 2:43:58 +epoch [29/50] batch [65/500] time 0.889 (0.900) data 0.000 (0.010) loss 1.9219 (1.1085) acc 53.1250 (72.3077) lr 8.7467e-04 eta 2:43:59 +epoch [29/50] batch [70/500] time 0.847 (0.898) data 0.001 (0.009) loss 0.8525 (1.0883) acc 78.1250 (72.5446) lr 8.7467e-04 eta 2:43:35 +epoch [29/50] batch [75/500] time 0.904 (0.897) data 0.000 (0.009) loss 1.3867 (1.0695) acc 62.5000 (72.8750) lr 8.7467e-04 eta 2:43:20 +epoch [29/50] batch [80/500] time 0.908 (0.897) data 0.000 (0.008) loss 1.5537 (1.0762) acc 75.0000 (73.0078) lr 8.7467e-04 eta 2:43:12 +epoch [29/50] batch [85/500] time 0.899 (0.896) data 0.000 (0.008) loss 1.2393 (1.0664) acc 71.8750 (73.3088) lr 8.7467e-04 eta 2:43:02 +epoch [29/50] batch [90/500] time 0.859 (0.895) data 0.000 (0.007) loss 1.5801 (1.0668) acc 75.0000 (73.3681) lr 8.7467e-04 eta 2:42:45 +epoch [29/50] batch [95/500] time 0.870 (0.894) data 0.000 (0.007) loss 1.0488 (1.0577) acc 75.0000 (73.4539) lr 8.7467e-04 eta 2:42:30 +epoch [29/50] batch [100/500] time 0.863 (0.893) data 0.000 (0.007) loss 1.2793 (1.0515) acc 75.0000 (73.5625) lr 8.7467e-04 eta 2:42:17 +epoch [29/50] batch [105/500] time 0.884 (0.893) data 0.000 (0.006) loss 0.7520 (1.0442) acc 81.2500 (73.5119) lr 8.7467e-04 eta 2:42:07 +epoch [29/50] batch [110/500] time 0.888 (0.893) data 0.000 (0.006) loss 1.1035 (1.0428) acc 81.2500 (73.6648) lr 8.7467e-04 eta 2:42:00 +epoch [29/50] batch [115/500] time 0.878 (0.892) data 0.000 (0.006) loss 0.7500 (1.0469) acc 78.1250 (73.6141) lr 8.7467e-04 eta 2:41:49 +epoch [29/50] batch [120/500] time 0.855 (0.891) data 0.000 (0.005) loss 0.8672 (1.0449) acc 81.2500 (73.7240) lr 8.7467e-04 eta 2:41:38 +epoch [29/50] batch [125/500] time 0.865 (0.892) data 0.000 (0.005) loss 0.9502 (1.0469) acc 68.7500 (73.7000) lr 8.7467e-04 eta 2:41:36 +epoch [29/50] batch [130/500] time 0.908 (0.891) data 0.000 (0.005) loss 0.8076 (1.0419) acc 78.1250 (73.6779) lr 8.7467e-04 eta 2:41:30 +epoch [29/50] batch [135/500] time 0.887 (0.891) data 0.000 (0.005) loss 1.2080 (1.0554) acc 68.7500 (73.4491) lr 8.7467e-04 eta 2:41:22 +epoch [29/50] batch [140/500] time 0.864 (0.891) data 0.000 (0.005) loss 0.8906 (1.0544) acc 75.0000 (73.5045) lr 8.7467e-04 eta 2:41:13 +epoch [29/50] batch [145/500] time 0.885 (0.891) data 0.000 (0.005) loss 0.8960 (1.0587) acc 87.5000 (73.6638) lr 8.7467e-04 eta 2:41:06 +epoch [29/50] batch [150/500] time 0.903 (0.890) data 0.000 (0.004) loss 1.4541 (1.0605) acc 62.5000 (73.6667) lr 8.7467e-04 eta 2:41:00 +epoch [29/50] batch [155/500] time 0.852 (0.890) data 0.000 (0.004) loss 0.7373 (1.0498) acc 81.2500 (73.7903) lr 8.7467e-04 eta 2:40:54 +epoch [29/50] batch [160/500] time 0.890 (0.890) data 0.000 (0.004) loss 1.1436 (1.0545) acc 65.6250 (73.7500) lr 8.7467e-04 eta 2:40:47 +epoch [29/50] batch [165/500] time 0.854 (0.890) data 0.000 (0.004) loss 1.1631 (1.0557) acc 78.1250 (73.7311) lr 8.7467e-04 eta 2:40:37 +epoch [29/50] batch [170/500] time 0.892 (0.890) data 0.000 (0.004) loss 1.4932 (1.0599) acc 68.7500 (73.6581) lr 8.7467e-04 eta 2:40:39 +epoch [29/50] batch [175/500] time 0.885 (0.890) data 0.000 (0.004) loss 0.5811 (1.0617) acc 84.3750 (73.6250) lr 8.7467e-04 eta 2:40:32 +epoch [29/50] batch [180/500] time 0.868 (0.890) data 0.000 (0.004) loss 1.2754 (1.0631) acc 62.5000 (73.5764) lr 8.7467e-04 eta 2:40:26 +epoch [29/50] batch [185/500] time 0.875 (0.890) data 0.000 (0.004) loss 1.7471 (1.0710) acc 62.5000 (73.3953) lr 8.7467e-04 eta 2:40:24 +epoch [29/50] batch [190/500] time 0.903 (0.890) data 0.000 (0.004) loss 1.2881 (1.0744) acc 62.5000 (73.3224) lr 8.7467e-04 eta 2:40:17 +epoch [29/50] batch [195/500] time 0.887 (0.890) data 0.000 (0.003) loss 1.3066 (1.0758) acc 65.6250 (73.2212) lr 8.7467e-04 eta 2:40:12 +epoch [29/50] batch [200/500] time 0.892 (0.889) data 0.000 (0.003) loss 0.9707 (1.0744) acc 78.1250 (73.2969) lr 8.7467e-04 eta 2:40:06 +epoch [29/50] batch [205/500] time 0.883 (0.889) data 0.000 (0.003) loss 0.8564 (1.0736) acc 84.3750 (73.3232) lr 8.7467e-04 eta 2:39:57 +epoch [29/50] batch [210/500] time 0.861 (0.889) data 0.000 (0.003) loss 1.3066 (1.0728) acc 68.7500 (73.3333) lr 8.7467e-04 eta 2:39:53 +epoch [29/50] batch [215/500] time 0.883 (0.889) data 0.000 (0.003) loss 1.4375 (1.0754) acc 65.6250 (73.2994) lr 8.7467e-04 eta 2:39:45 +epoch [29/50] batch [220/500] time 0.903 (0.889) data 0.000 (0.003) loss 0.7695 (1.0777) acc 81.2500 (73.2670) lr 8.7467e-04 eta 2:39:38 +epoch [29/50] batch [225/500] time 0.895 (0.888) data 0.000 (0.003) loss 1.3066 (1.0807) acc 56.2500 (73.1667) lr 8.7467e-04 eta 2:39:31 +epoch [29/50] batch [230/500] time 0.896 (0.888) data 0.000 (0.003) loss 1.2910 (1.0856) acc 68.7500 (73.0027) lr 8.7467e-04 eta 2:39:25 +epoch [29/50] batch [235/500] time 0.896 (0.888) data 0.000 (0.003) loss 1.3174 (1.0861) acc 68.7500 (73.0186) lr 8.7467e-04 eta 2:39:23 +epoch [29/50] batch [240/500] time 0.888 (0.888) data 0.000 (0.003) loss 0.5942 (1.0819) acc 84.3750 (73.1250) lr 8.7467e-04 eta 2:39:18 +epoch [29/50] batch [245/500] time 0.873 (0.888) data 0.000 (0.003) loss 1.2197 (1.0849) acc 65.6250 (73.0102) lr 8.7467e-04 eta 2:39:11 +epoch [29/50] batch [250/500] time 0.887 (0.888) data 0.000 (0.003) loss 0.9619 (1.0842) acc 71.8750 (73.0000) lr 8.7467e-04 eta 2:39:05 +epoch [29/50] batch [255/500] time 0.868 (0.888) data 0.000 (0.003) loss 0.5337 (1.0827) acc 81.2500 (72.9657) lr 8.7467e-04 eta 2:38:58 +epoch [29/50] batch [260/500] time 0.858 (0.888) data 0.000 (0.003) loss 0.9097 (1.0824) acc 75.0000 (73.0048) lr 8.7467e-04 eta 2:38:52 +epoch [29/50] batch [265/500] time 0.899 (0.888) data 0.000 (0.003) loss 0.8530 (1.0812) acc 78.1250 (73.0542) lr 8.7467e-04 eta 2:38:48 +epoch [29/50] batch [270/500] time 0.891 (0.888) data 0.000 (0.003) loss 1.4717 (1.0844) acc 68.7500 (73.0208) lr 8.7467e-04 eta 2:38:46 +epoch [29/50] batch [275/500] time 0.860 (0.887) data 0.000 (0.003) loss 1.1797 (1.0840) acc 65.6250 (73.0000) lr 8.7467e-04 eta 2:38:36 +epoch [29/50] batch [280/500] time 0.880 (0.887) data 0.000 (0.002) loss 0.8730 (1.0837) acc 78.1250 (73.0134) lr 8.7467e-04 eta 2:38:32 +epoch [29/50] batch [285/500] time 0.888 (0.887) data 0.000 (0.002) loss 0.8716 (1.0808) acc 71.8750 (73.0702) lr 8.7467e-04 eta 2:38:27 +epoch [29/50] batch [290/500] time 0.868 (0.887) data 0.000 (0.002) loss 0.8975 (1.0827) acc 84.3750 (73.0711) lr 8.7467e-04 eta 2:38:21 +epoch [29/50] batch [295/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.2227 (1.0813) acc 75.0000 (73.0614) lr 8.7467e-04 eta 2:38:21 +epoch [29/50] batch [300/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.4111 (1.0806) acc 65.6250 (73.1146) lr 8.7467e-04 eta 2:38:16 +epoch [29/50] batch [305/500] time 0.886 (0.888) data 0.000 (0.002) loss 0.9419 (1.0865) acc 78.1250 (73.0430) lr 8.7467e-04 eta 2:38:12 +epoch [29/50] batch [310/500] time 0.989 (0.888) data 0.000 (0.002) loss 0.9409 (1.0828) acc 68.7500 (73.1250) lr 8.7467e-04 eta 2:38:11 +epoch [29/50] batch [315/500] time 0.870 (0.888) data 0.000 (0.002) loss 0.6187 (1.0790) acc 81.2500 (73.1944) lr 8.7467e-04 eta 2:38:06 +epoch [29/50] batch [320/500] time 0.897 (0.888) data 0.000 (0.002) loss 1.2812 (1.0795) acc 75.0000 (73.2520) lr 8.7467e-04 eta 2:38:02 +epoch [29/50] batch [325/500] time 0.853 (0.888) data 0.000 (0.002) loss 0.5664 (1.0767) acc 81.2500 (73.3077) lr 8.7467e-04 eta 2:37:56 +epoch [29/50] batch [330/500] time 0.847 (0.887) data 0.000 (0.002) loss 0.9121 (1.0757) acc 75.0000 (73.3523) lr 8.7467e-04 eta 2:37:49 +epoch [29/50] batch [335/500] time 0.880 (0.887) data 0.000 (0.002) loss 1.2588 (1.0774) acc 62.5000 (73.2836) lr 8.7467e-04 eta 2:37:42 +epoch [29/50] batch [340/500] time 0.877 (0.887) data 0.000 (0.002) loss 0.5669 (1.0800) acc 81.2500 (73.2445) lr 8.7467e-04 eta 2:37:39 +epoch [29/50] batch [345/500] time 0.904 (0.887) data 0.000 (0.002) loss 0.7300 (1.0763) acc 78.1250 (73.3786) lr 8.7467e-04 eta 2:37:35 +epoch [29/50] batch [350/500] time 0.874 (0.887) data 0.000 (0.002) loss 0.8296 (1.0715) acc 81.2500 (73.4286) lr 8.7467e-04 eta 2:37:30 +epoch [29/50] batch [355/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.1855 (1.0722) acc 65.6250 (73.3891) lr 8.7467e-04 eta 2:37:26 +epoch [29/50] batch [360/500] time 0.853 (0.887) data 0.000 (0.002) loss 1.6035 (1.0779) acc 65.6250 (73.2726) lr 8.7467e-04 eta 2:37:19 +epoch [29/50] batch [365/500] time 0.888 (0.887) data 0.000 (0.002) loss 0.6538 (1.0771) acc 81.2500 (73.3562) lr 8.7467e-04 eta 2:37:15 +epoch [29/50] batch [370/500] time 0.869 (0.887) data 0.000 (0.002) loss 1.2422 (1.0748) acc 75.0000 (73.3784) lr 8.7467e-04 eta 2:37:10 +epoch [29/50] batch [375/500] time 0.854 (0.887) data 0.000 (0.002) loss 1.0977 (1.0739) acc 78.1250 (73.4167) lr 8.7467e-04 eta 2:37:05 +epoch [29/50] batch [380/500] time 0.881 (0.887) data 0.000 (0.002) loss 0.8833 (1.0740) acc 81.2500 (73.4211) lr 8.7467e-04 eta 2:37:01 +epoch [29/50] batch [385/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.3701 (1.0771) acc 59.3750 (73.4253) lr 8.7467e-04 eta 2:36:55 +epoch [29/50] batch [390/500] time 0.897 (0.887) data 0.000 (0.002) loss 1.4238 (1.0762) acc 65.6250 (73.4535) lr 8.7467e-04 eta 2:36:49 +epoch [29/50] batch [395/500] time 0.874 (0.887) data 0.000 (0.002) loss 0.8813 (1.0761) acc 78.1250 (73.4098) lr 8.7467e-04 eta 2:36:42 +epoch [29/50] batch [400/500] time 0.852 (0.886) data 0.000 (0.002) loss 1.2100 (1.0749) acc 71.8750 (73.4141) lr 8.7467e-04 eta 2:36:34 +epoch [29/50] batch [405/500] time 0.871 (0.886) data 0.000 (0.002) loss 1.4199 (1.0750) acc 71.8750 (73.4336) lr 8.7467e-04 eta 2:36:30 +epoch [29/50] batch [410/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.4355 (1.0793) acc 71.8750 (73.3537) lr 8.7467e-04 eta 2:36:28 +epoch [29/50] batch [415/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.0469 (1.0773) acc 59.3750 (73.3886) lr 8.7467e-04 eta 2:36:24 +epoch [29/50] batch [420/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.2998 (1.0763) acc 65.6250 (73.3705) lr 8.7467e-04 eta 2:36:20 +epoch [29/50] batch [425/500] time 0.883 (0.887) data 0.000 (0.002) loss 0.8613 (1.0754) acc 75.0000 (73.3676) lr 8.7467e-04 eta 2:36:16 +epoch [29/50] batch [430/500] time 0.871 (0.887) data 0.000 (0.002) loss 0.9897 (1.0753) acc 75.0000 (73.3503) lr 8.7467e-04 eta 2:36:12 +epoch [29/50] batch [435/500] time 0.893 (0.887) data 0.000 (0.002) loss 0.8276 (1.0748) acc 81.2500 (73.3836) lr 8.7467e-04 eta 2:36:10 +epoch [29/50] batch [440/500] time 0.888 (0.887) data 0.000 (0.002) loss 1.3975 (1.0735) acc 68.7500 (73.3807) lr 8.7467e-04 eta 2:36:05 +epoch [29/50] batch [445/500] time 0.908 (0.887) data 0.000 (0.002) loss 1.0430 (1.0731) acc 65.6250 (73.3357) lr 8.7467e-04 eta 2:36:01 +epoch [29/50] batch [450/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.0410 (1.0710) acc 78.1250 (73.3889) lr 8.7467e-04 eta 2:35:57 +epoch [29/50] batch [455/500] time 0.886 (0.887) data 0.000 (0.002) loss 1.1709 (1.0705) acc 71.8750 (73.4135) lr 8.7467e-04 eta 2:35:53 +epoch [29/50] batch [460/500] time 0.855 (0.887) data 0.000 (0.002) loss 0.9126 (1.0691) acc 71.8750 (73.4035) lr 8.7467e-04 eta 2:35:47 +epoch [29/50] batch [465/500] time 0.878 (0.887) data 0.000 (0.002) loss 0.7964 (1.0656) acc 71.8750 (73.4543) lr 8.7467e-04 eta 2:35:42 +epoch [29/50] batch [470/500] time 0.869 (0.887) data 0.000 (0.002) loss 0.6992 (1.0638) acc 81.2500 (73.4840) lr 8.7467e-04 eta 2:35:37 +epoch [29/50] batch [475/500] time 0.871 (0.887) data 0.000 (0.002) loss 0.7866 (1.0628) acc 78.1250 (73.4934) lr 8.7467e-04 eta 2:35:31 +epoch [29/50] batch [480/500] time 0.889 (0.887) data 0.000 (0.002) loss 0.9995 (1.0632) acc 68.7500 (73.4701) lr 8.7467e-04 eta 2:35:26 +epoch [29/50] batch [485/500] time 0.869 (0.886) data 0.000 (0.002) loss 1.0371 (1.0639) acc 75.0000 (73.4794) lr 8.7467e-04 eta 2:35:20 +epoch [29/50] batch [490/500] time 0.866 (0.886) data 0.000 (0.002) loss 1.3252 (1.0642) acc 62.5000 (73.4311) lr 8.7467e-04 eta 2:35:15 +epoch [29/50] batch [495/500] time 0.874 (0.886) data 0.000 (0.002) loss 0.9355 (1.0657) acc 78.1250 (73.4028) lr 8.7467e-04 eta 2:35:10 +epoch [29/50] batch [500/500] time 0.897 (0.886) data 0.000 (0.001) loss 1.0527 (1.0658) acc 68.7500 (73.3750) lr 8.1262e-04 eta 2:35:06 +epoch [30/50] batch [5/500] time 0.897 (1.015) data 0.000 (0.132) loss 1.4014 (1.2132) acc 65.6250 (66.2500) lr 8.1262e-04 eta 2:57:29 +epoch [30/50] batch [10/500] time 0.858 (0.950) data 0.000 (0.066) loss 1.0205 (1.0887) acc 71.8750 (70.0000) lr 8.1262e-04 eta 2:46:05 +epoch [30/50] batch [15/500] time 0.876 (0.930) data 0.000 (0.044) loss 1.2256 (1.0347) acc 68.7500 (71.4583) lr 8.1262e-04 eta 2:42:28 +epoch [30/50] batch [20/500] time 0.873 (0.917) data 0.000 (0.033) loss 0.7661 (1.0028) acc 84.3750 (73.2812) lr 8.1262e-04 eta 2:40:10 +epoch [30/50] batch [25/500] time 0.876 (0.910) data 0.000 (0.027) loss 0.8862 (1.0118) acc 81.2500 (73.8750) lr 8.1262e-04 eta 2:38:56 +epoch [30/50] batch [30/500] time 0.908 (0.907) data 0.000 (0.022) loss 1.6465 (1.0777) acc 65.6250 (72.7083) lr 8.1262e-04 eta 2:38:20 +epoch [30/50] batch [35/500] time 0.867 (0.904) data 0.000 (0.019) loss 1.2900 (1.0648) acc 75.0000 (73.1250) lr 8.1262e-04 eta 2:37:38 +epoch [30/50] batch [40/500] time 0.881 (0.902) data 0.000 (0.017) loss 1.0000 (1.0520) acc 71.8750 (73.5938) lr 8.1262e-04 eta 2:37:12 +epoch [30/50] batch [45/500] time 0.915 (0.901) data 0.000 (0.015) loss 1.0605 (1.0407) acc 71.8750 (73.6111) lr 8.1262e-04 eta 2:36:55 +epoch [30/50] batch [50/500] time 0.868 (0.899) data 0.000 (0.013) loss 0.8257 (1.0171) acc 81.2500 (74.3125) lr 8.1262e-04 eta 2:36:32 +epoch [30/50] batch [55/500] time 0.859 (0.898) data 0.000 (0.012) loss 0.7681 (1.0117) acc 81.2500 (74.3182) lr 8.1262e-04 eta 2:36:16 +epoch [30/50] batch [60/500] time 0.904 (0.896) data 0.000 (0.011) loss 0.9707 (1.0057) acc 78.1250 (74.6875) lr 8.1262e-04 eta 2:35:53 +epoch [30/50] batch [65/500] time 0.866 (0.895) data 0.000 (0.010) loss 1.5098 (0.9933) acc 59.3750 (74.8558) lr 8.1262e-04 eta 2:35:34 +epoch [30/50] batch [70/500] time 0.894 (0.894) data 0.000 (0.010) loss 1.6182 (1.0150) acc 65.6250 (74.4643) lr 8.1262e-04 eta 2:35:22 +epoch [30/50] batch [75/500] time 0.863 (0.893) data 0.000 (0.009) loss 1.4932 (1.0401) acc 65.6250 (74.0833) lr 8.1262e-04 eta 2:35:12 +epoch [30/50] batch [80/500] time 0.913 (0.893) data 0.000 (0.009) loss 1.0273 (1.0375) acc 65.6250 (73.7500) lr 8.1262e-04 eta 2:35:01 +epoch [30/50] batch [85/500] time 0.919 (0.893) data 0.000 (0.008) loss 1.3535 (1.0541) acc 59.3750 (73.0147) lr 8.1262e-04 eta 2:34:56 +epoch [30/50] batch [90/500] time 0.898 (0.893) data 0.000 (0.008) loss 0.8359 (1.0476) acc 71.8750 (73.3681) lr 8.1262e-04 eta 2:34:51 +epoch [30/50] batch [95/500] time 0.875 (0.893) data 0.000 (0.007) loss 0.9785 (1.0444) acc 78.1250 (73.5197) lr 8.1262e-04 eta 2:34:56 +epoch [30/50] batch [100/500] time 0.876 (0.893) data 0.000 (0.007) loss 1.2021 (1.0528) acc 68.7500 (73.5000) lr 8.1262e-04 eta 2:34:45 +epoch [30/50] batch [105/500] time 0.876 (0.893) data 0.000 (0.007) loss 0.6172 (1.0418) acc 84.3750 (73.7202) lr 8.1262e-04 eta 2:34:40 +epoch [30/50] batch [110/500] time 0.901 (0.892) data 0.000 (0.006) loss 0.7827 (1.0393) acc 78.1250 (73.8352) lr 8.1262e-04 eta 2:34:31 +epoch [30/50] batch [115/500] time 0.878 (0.892) data 0.000 (0.006) loss 1.0596 (1.0335) acc 78.1250 (73.9402) lr 8.1262e-04 eta 2:34:22 +epoch [30/50] batch [120/500] time 0.893 (0.892) data 0.000 (0.006) loss 1.1143 (1.0364) acc 84.3750 (73.9062) lr 8.1262e-04 eta 2:34:22 +epoch [30/50] batch [125/500] time 0.908 (0.892) data 0.000 (0.006) loss 1.4854 (1.0302) acc 65.6250 (74.0750) lr 8.1262e-04 eta 2:34:19 +epoch [30/50] batch [130/500] time 0.907 (0.893) data 0.000 (0.005) loss 0.8008 (1.0349) acc 75.0000 (73.9183) lr 8.1262e-04 eta 2:34:20 +epoch [30/50] batch [135/500] time 0.892 (0.893) data 0.000 (0.005) loss 0.1719 (1.0276) acc 96.8750 (74.2130) lr 8.1262e-04 eta 2:34:15 +epoch [30/50] batch [140/500] time 0.901 (0.894) data 0.000 (0.005) loss 1.1670 (1.0285) acc 75.0000 (74.1518) lr 8.1262e-04 eta 2:34:19 +epoch [30/50] batch [145/500] time 0.898 (0.893) data 0.000 (0.005) loss 0.8530 (1.0322) acc 62.5000 (73.9224) lr 8.1262e-04 eta 2:34:09 +epoch [30/50] batch [150/500] time 0.883 (0.893) data 0.000 (0.005) loss 1.0703 (1.0293) acc 75.0000 (73.9583) lr 8.1262e-04 eta 2:34:00 +epoch [30/50] batch [155/500] time 0.868 (0.893) data 0.000 (0.005) loss 0.9771 (1.0351) acc 71.8750 (73.8306) lr 8.1262e-04 eta 2:33:53 +epoch [30/50] batch [160/500] time 0.856 (0.892) data 0.000 (0.004) loss 1.5361 (1.0378) acc 65.6250 (73.8672) lr 8.1262e-04 eta 2:33:43 +epoch [30/50] batch [165/500] time 0.877 (0.892) data 0.000 (0.004) loss 1.2041 (1.0373) acc 68.7500 (73.7689) lr 8.1262e-04 eta 2:33:38 +epoch [30/50] batch [170/500] time 0.895 (0.892) data 0.000 (0.004) loss 1.1855 (1.0405) acc 84.3750 (73.8419) lr 8.1262e-04 eta 2:33:35 +epoch [30/50] batch [175/500] time 0.902 (0.892) data 0.000 (0.004) loss 1.5791 (1.0457) acc 59.3750 (73.6429) lr 8.1262e-04 eta 2:33:28 +epoch [30/50] batch [180/500] time 0.885 (0.891) data 0.000 (0.004) loss 0.9121 (1.0448) acc 75.0000 (73.6458) lr 8.1262e-04 eta 2:33:18 +epoch [30/50] batch [185/500] time 0.872 (0.891) data 0.000 (0.004) loss 1.0234 (1.0455) acc 81.2500 (73.6318) lr 8.1262e-04 eta 2:33:11 +epoch [30/50] batch [190/500] time 0.874 (0.891) data 0.000 (0.004) loss 0.8066 (1.0462) acc 71.8750 (73.5855) lr 8.1262e-04 eta 2:33:02 +epoch [30/50] batch [195/500] time 0.884 (0.890) data 0.000 (0.004) loss 0.8926 (1.0455) acc 78.1250 (73.6378) lr 8.1262e-04 eta 2:32:53 +epoch [30/50] batch [200/500] time 0.862 (0.890) data 0.000 (0.004) loss 0.8159 (1.0419) acc 78.1250 (73.7344) lr 8.1262e-04 eta 2:32:47 +epoch [30/50] batch [205/500] time 0.867 (0.890) data 0.000 (0.003) loss 0.7954 (1.0379) acc 84.3750 (73.8872) lr 8.1262e-04 eta 2:32:40 +epoch [30/50] batch [210/500] time 0.898 (0.890) data 0.000 (0.003) loss 1.6709 (1.0389) acc 62.5000 (73.8393) lr 8.1262e-04 eta 2:32:38 +epoch [30/50] batch [215/500] time 0.891 (0.890) data 0.000 (0.003) loss 0.2177 (1.0325) acc 93.7500 (73.9535) lr 8.1262e-04 eta 2:32:33 +epoch [30/50] batch [220/500] time 0.881 (0.890) data 0.001 (0.003) loss 1.3906 (1.0327) acc 68.7500 (73.9205) lr 8.1262e-04 eta 2:32:30 +epoch [30/50] batch [225/500] time 0.882 (0.890) data 0.000 (0.003) loss 0.9316 (1.0313) acc 75.0000 (73.9028) lr 8.1262e-04 eta 2:32:24 +epoch [30/50] batch [230/500] time 0.867 (0.890) data 0.000 (0.003) loss 0.7744 (1.0313) acc 75.0000 (73.8995) lr 8.1262e-04 eta 2:32:20 +epoch [30/50] batch [235/500] time 0.983 (0.890) data 0.000 (0.003) loss 0.7051 (1.0306) acc 81.2500 (73.9096) lr 8.1262e-04 eta 2:32:17 +epoch [30/50] batch [240/500] time 0.873 (0.890) data 0.000 (0.003) loss 0.6890 (1.0339) acc 78.1250 (73.8542) lr 8.1262e-04 eta 2:32:11 +epoch [30/50] batch [245/500] time 0.917 (0.890) data 0.000 (0.003) loss 1.5908 (1.0320) acc 65.6250 (73.9413) lr 8.1262e-04 eta 2:32:09 +epoch [30/50] batch [250/500] time 0.886 (0.890) data 0.000 (0.003) loss 2.4551 (1.0376) acc 56.2500 (73.9000) lr 8.1262e-04 eta 2:32:01 +epoch [30/50] batch [255/500] time 0.895 (0.890) data 0.000 (0.003) loss 0.8926 (1.0392) acc 78.1250 (73.8603) lr 8.1262e-04 eta 2:31:55 +epoch [30/50] batch [260/500] time 0.879 (0.890) data 0.000 (0.003) loss 0.8906 (1.0361) acc 65.6250 (73.8221) lr 8.1262e-04 eta 2:31:50 +epoch [30/50] batch [265/500] time 0.871 (0.890) data 0.000 (0.003) loss 1.0137 (1.0400) acc 75.0000 (73.7618) lr 8.1262e-04 eta 2:31:46 +epoch [30/50] batch [270/500] time 0.868 (0.890) data 0.000 (0.003) loss 0.9805 (1.0406) acc 78.1250 (73.8079) lr 8.1262e-04 eta 2:31:39 +epoch [30/50] batch [275/500] time 0.906 (0.890) data 0.000 (0.003) loss 1.4961 (1.0412) acc 59.3750 (73.7614) lr 8.1262e-04 eta 2:31:36 +epoch [30/50] batch [280/500] time 0.869 (0.890) data 0.000 (0.003) loss 1.2324 (1.0417) acc 59.3750 (73.7388) lr 8.1262e-04 eta 2:31:36 +epoch [30/50] batch [285/500] time 0.889 (0.890) data 0.000 (0.003) loss 1.1562 (1.0434) acc 78.1250 (73.7281) lr 8.1262e-04 eta 2:31:30 +epoch [30/50] batch [290/500] time 0.896 (0.890) data 0.000 (0.003) loss 0.7236 (1.0428) acc 84.3750 (73.7392) lr 8.1262e-04 eta 2:31:23 +epoch [30/50] batch [295/500] time 0.875 (0.889) data 0.000 (0.002) loss 1.1221 (1.0436) acc 65.6250 (73.6547) lr 8.1262e-04 eta 2:31:16 +epoch [30/50] batch [300/500] time 0.910 (0.890) data 0.000 (0.002) loss 0.9365 (1.0443) acc 71.8750 (73.6354) lr 8.1262e-04 eta 2:31:13 +epoch [30/50] batch [305/500] time 0.893 (0.889) data 0.000 (0.002) loss 1.0117 (1.0405) acc 68.7500 (73.7910) lr 8.1262e-04 eta 2:31:07 +epoch [30/50] batch [310/500] time 0.857 (0.889) data 0.000 (0.002) loss 0.9155 (1.0410) acc 78.1250 (73.7601) lr 8.1262e-04 eta 2:31:00 +epoch [30/50] batch [315/500] time 0.896 (0.889) data 0.000 (0.002) loss 0.4382 (1.0364) acc 87.5000 (73.8194) lr 8.1262e-04 eta 2:30:56 +epoch [30/50] batch [320/500] time 0.885 (0.889) data 0.000 (0.002) loss 0.7954 (1.0358) acc 75.0000 (73.8184) lr 8.1262e-04 eta 2:30:53 +epoch [30/50] batch [325/500] time 0.862 (0.889) data 0.000 (0.002) loss 1.5762 (1.0358) acc 71.8750 (73.8269) lr 8.1262e-04 eta 2:30:49 +epoch [30/50] batch [330/500] time 0.856 (0.889) data 0.000 (0.002) loss 0.7651 (1.0349) acc 71.8750 (73.8258) lr 8.1262e-04 eta 2:30:43 +epoch [30/50] batch [335/500] time 0.890 (0.889) data 0.000 (0.002) loss 1.0479 (1.0375) acc 71.8750 (73.7593) lr 8.1262e-04 eta 2:30:39 +epoch [30/50] batch [340/500] time 0.881 (0.889) data 0.000 (0.002) loss 1.5352 (1.0365) acc 59.3750 (73.7868) lr 8.1262e-04 eta 2:30:35 +epoch [30/50] batch [345/500] time 0.889 (0.889) data 0.000 (0.002) loss 0.9502 (1.0340) acc 78.1250 (73.8043) lr 8.1262e-04 eta 2:30:31 +epoch [30/50] batch [350/500] time 0.874 (0.889) data 0.000 (0.002) loss 1.1152 (1.0342) acc 68.7500 (73.7321) lr 8.1262e-04 eta 2:30:26 +epoch [30/50] batch [355/500] time 0.887 (0.889) data 0.000 (0.002) loss 0.7700 (1.0324) acc 81.2500 (73.8028) lr 8.1262e-04 eta 2:30:23 +epoch [30/50] batch [360/500] time 0.886 (0.889) data 0.000 (0.002) loss 0.9404 (1.0308) acc 71.8750 (73.7934) lr 8.1262e-04 eta 2:30:19 +epoch [30/50] batch [365/500] time 0.864 (0.889) data 0.000 (0.002) loss 1.0420 (1.0333) acc 71.8750 (73.6729) lr 8.1262e-04 eta 2:30:13 +epoch [30/50] batch [370/500] time 0.887 (0.889) data 0.000 (0.002) loss 1.0430 (1.0320) acc 75.0000 (73.7078) lr 8.1262e-04 eta 2:30:08 +epoch [30/50] batch [375/500] time 0.882 (0.889) data 0.000 (0.002) loss 1.2227 (1.0348) acc 68.7500 (73.6083) lr 8.1262e-04 eta 2:30:05 +epoch [30/50] batch [380/500] time 0.889 (0.890) data 0.000 (0.002) loss 1.4658 (1.0365) acc 71.8750 (73.5938) lr 8.1262e-04 eta 2:30:04 +epoch [30/50] batch [385/500] time 0.887 (0.890) data 0.000 (0.002) loss 1.2568 (1.0371) acc 68.7500 (73.5552) lr 8.1262e-04 eta 2:30:00 +epoch [30/50] batch [390/500] time 0.916 (0.890) data 0.000 (0.002) loss 0.9287 (1.0367) acc 75.0000 (73.5497) lr 8.1262e-04 eta 2:29:56 +epoch [30/50] batch [395/500] time 0.864 (0.890) data 0.000 (0.002) loss 1.5410 (1.0345) acc 56.2500 (73.5601) lr 8.1262e-04 eta 2:29:52 +epoch [30/50] batch [400/500] time 0.886 (0.890) data 0.000 (0.002) loss 0.8594 (1.0344) acc 68.7500 (73.5234) lr 8.1262e-04 eta 2:29:48 +epoch [30/50] batch [405/500] time 0.868 (0.890) data 0.000 (0.002) loss 0.9629 (1.0336) acc 71.8750 (73.5648) lr 8.1262e-04 eta 2:29:42 +epoch [30/50] batch [410/500] time 0.865 (0.890) data 0.000 (0.002) loss 1.1631 (1.0329) acc 75.0000 (73.6128) lr 8.1262e-04 eta 2:29:35 +epoch [30/50] batch [415/500] time 0.859 (0.889) data 0.000 (0.002) loss 1.5068 (1.0370) acc 71.8750 (73.5241) lr 8.1262e-04 eta 2:29:28 +epoch [30/50] batch [420/500] time 0.866 (0.889) data 0.000 (0.002) loss 0.6934 (1.0349) acc 78.1250 (73.5714) lr 8.1262e-04 eta 2:29:21 +epoch [30/50] batch [425/500] time 0.857 (0.889) data 0.000 (0.002) loss 0.9644 (1.0352) acc 68.7500 (73.5588) lr 8.1262e-04 eta 2:29:19 +epoch [30/50] batch [430/500] time 0.882 (0.889) data 0.000 (0.002) loss 0.7729 (1.0340) acc 75.0000 (73.6410) lr 8.1262e-04 eta 2:29:14 +epoch [30/50] batch [435/500] time 0.903 (0.889) data 0.000 (0.002) loss 1.2646 (1.0362) acc 68.7500 (73.5920) lr 8.1262e-04 eta 2:29:09 +epoch [30/50] batch [440/500] time 0.887 (0.889) data 0.000 (0.002) loss 1.1270 (1.0369) acc 68.7500 (73.5511) lr 8.1262e-04 eta 2:29:04 +epoch [30/50] batch [445/500] time 0.881 (0.889) data 0.000 (0.002) loss 1.2148 (1.0363) acc 65.6250 (73.5815) lr 8.1262e-04 eta 2:28:59 +epoch [30/50] batch [450/500] time 0.882 (0.889) data 0.000 (0.002) loss 0.4814 (1.0344) acc 84.3750 (73.6181) lr 8.1262e-04 eta 2:28:52 +epoch [30/50] batch [455/500] time 0.885 (0.889) data 0.000 (0.002) loss 0.7222 (1.0339) acc 84.3750 (73.6126) lr 8.1262e-04 eta 2:28:47 +epoch [30/50] batch [460/500] time 0.882 (0.889) data 0.000 (0.002) loss 0.8457 (1.0355) acc 78.1250 (73.5598) lr 8.1262e-04 eta 2:28:43 +epoch [30/50] batch [465/500] time 0.866 (0.889) data 0.000 (0.002) loss 0.6797 (1.0350) acc 81.2500 (73.5551) lr 8.1262e-04 eta 2:28:38 +epoch [30/50] batch [470/500] time 0.895 (0.889) data 0.000 (0.002) loss 1.1611 (1.0369) acc 65.6250 (73.5106) lr 8.1262e-04 eta 2:28:33 +epoch [30/50] batch [475/500] time 0.907 (0.889) data 0.000 (0.002) loss 1.2236 (1.0388) acc 75.0000 (73.5066) lr 8.1262e-04 eta 2:28:28 +epoch [30/50] batch [480/500] time 0.890 (0.889) data 0.000 (0.002) loss 1.6377 (1.0386) acc 59.3750 (73.4961) lr 8.1262e-04 eta 2:28:23 +epoch [30/50] batch [485/500] time 0.924 (0.889) data 0.001 (0.002) loss 1.0674 (1.0400) acc 71.8750 (73.4729) lr 8.1262e-04 eta 2:28:19 +epoch [30/50] batch [490/500] time 0.872 (0.889) data 0.000 (0.002) loss 0.8130 (1.0381) acc 81.2500 (73.4885) lr 8.1262e-04 eta 2:28:15 +epoch [30/50] batch [495/500] time 0.852 (0.889) data 0.000 (0.002) loss 0.9795 (1.0388) acc 75.0000 (73.4785) lr 8.1262e-04 eta 2:28:09 +epoch [30/50] batch [500/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.4326 (1.0415) acc 62.5000 (73.4188) lr 7.5131e-04 eta 2:28:03 +epoch [31/50] batch [5/500] time 0.890 (1.023) data 0.000 (0.125) loss 1.0068 (1.0899) acc 71.8750 (71.2500) lr 7.5131e-04 eta 2:50:29 +epoch [31/50] batch [10/500] time 0.902 (0.958) data 0.000 (0.063) loss 0.7344 (1.0477) acc 87.5000 (72.5000) lr 7.5131e-04 eta 2:39:33 +epoch [31/50] batch [15/500] time 0.893 (0.935) data 0.000 (0.042) loss 0.6777 (1.0874) acc 87.5000 (73.3333) lr 7.5131e-04 eta 2:35:32 +epoch [31/50] batch [20/500] time 0.891 (0.923) data 0.000 (0.031) loss 1.7432 (1.1154) acc 65.6250 (72.8125) lr 7.5131e-04 eta 2:33:32 +epoch [31/50] batch [25/500] time 0.910 (0.917) data 0.000 (0.025) loss 1.8477 (1.0604) acc 62.5000 (74.5000) lr 7.5131e-04 eta 2:32:25 +epoch [31/50] batch [30/500] time 0.872 (0.914) data 0.000 (0.021) loss 1.3359 (1.0792) acc 71.8750 (73.5417) lr 7.5131e-04 eta 2:31:52 +epoch [31/50] batch [35/500] time 0.874 (0.910) data 0.000 (0.018) loss 0.9692 (1.1027) acc 75.0000 (73.1250) lr 7.5131e-04 eta 2:31:09 +epoch [31/50] batch [40/500] time 0.861 (0.906) data 0.000 (0.016) loss 1.0039 (1.1030) acc 68.7500 (73.0469) lr 7.5131e-04 eta 2:30:27 +epoch [31/50] batch [45/500] time 0.886 (0.902) data 0.000 (0.014) loss 1.6426 (1.1220) acc 56.2500 (72.5694) lr 7.5131e-04 eta 2:29:43 +epoch [31/50] batch [50/500] time 0.898 (0.900) data 0.000 (0.013) loss 1.2373 (1.1324) acc 71.8750 (72.3750) lr 7.5131e-04 eta 2:29:19 +epoch [31/50] batch [55/500] time 0.891 (0.899) data 0.000 (0.012) loss 1.0840 (1.1148) acc 71.8750 (72.5568) lr 7.5131e-04 eta 2:29:01 +epoch [31/50] batch [60/500] time 0.884 (0.898) data 0.000 (0.011) loss 0.7100 (1.0885) acc 81.2500 (73.2292) lr 7.5131e-04 eta 2:28:41 +epoch [31/50] batch [65/500] time 0.899 (0.896) data 0.000 (0.010) loss 1.1875 (1.0752) acc 68.7500 (73.5577) lr 7.5131e-04 eta 2:28:20 +epoch [31/50] batch [70/500] time 0.871 (0.896) data 0.000 (0.009) loss 0.6655 (1.0693) acc 78.1250 (73.7054) lr 7.5131e-04 eta 2:28:13 +epoch [31/50] batch [75/500] time 0.906 (0.897) data 0.000 (0.009) loss 1.2305 (1.0660) acc 62.5000 (73.6250) lr 7.5131e-04 eta 2:28:19 +epoch [31/50] batch [80/500] time 0.892 (0.895) data 0.000 (0.008) loss 0.5957 (1.0513) acc 75.0000 (73.5156) lr 7.5131e-04 eta 2:28:02 +epoch [31/50] batch [85/500] time 0.914 (0.895) data 0.000 (0.008) loss 0.8950 (1.0493) acc 81.2500 (73.7868) lr 7.5131e-04 eta 2:27:55 +epoch [31/50] batch [90/500] time 0.866 (0.894) data 0.000 (0.007) loss 0.9468 (1.0510) acc 81.2500 (73.8889) lr 7.5131e-04 eta 2:27:38 +epoch [31/50] batch [95/500] time 0.927 (0.894) data 0.000 (0.007) loss 0.7007 (1.0558) acc 81.2500 (73.7500) lr 7.5131e-04 eta 2:27:37 +epoch [31/50] batch [100/500] time 0.856 (0.893) data 0.000 (0.006) loss 1.4443 (1.0512) acc 59.3750 (73.6562) lr 7.5131e-04 eta 2:27:20 +epoch [31/50] batch [105/500] time 0.908 (0.893) data 0.000 (0.006) loss 0.9180 (1.0444) acc 78.1250 (73.8095) lr 7.5131e-04 eta 2:27:15 +epoch [31/50] batch [110/500] time 0.902 (0.893) data 0.000 (0.006) loss 1.2129 (1.0381) acc 71.8750 (74.0057) lr 7.5131e-04 eta 2:27:07 +epoch [31/50] batch [115/500] time 0.872 (0.892) data 0.000 (0.006) loss 1.0176 (1.0371) acc 62.5000 (73.9130) lr 7.5131e-04 eta 2:26:57 +epoch [31/50] batch [120/500] time 0.879 (0.893) data 0.000 (0.005) loss 1.4424 (1.0411) acc 78.1250 (73.9062) lr 7.5131e-04 eta 2:27:03 +epoch [31/50] batch [125/500] time 0.914 (0.893) data 0.000 (0.005) loss 1.1650 (1.0499) acc 75.0000 (73.7250) lr 7.5131e-04 eta 2:26:53 +epoch [31/50] batch [130/500] time 0.852 (0.892) data 0.000 (0.005) loss 1.1572 (1.0485) acc 71.8750 (73.6298) lr 7.5131e-04 eta 2:26:42 +epoch [31/50] batch [135/500] time 0.889 (0.891) data 0.000 (0.005) loss 1.1084 (1.0510) acc 71.8750 (73.6343) lr 7.5131e-04 eta 2:26:28 +epoch [31/50] batch [140/500] time 0.866 (0.891) data 0.000 (0.005) loss 0.9175 (1.0541) acc 78.1250 (73.5938) lr 7.5131e-04 eta 2:26:20 +epoch [31/50] batch [145/500] time 0.855 (0.890) data 0.000 (0.005) loss 1.3213 (1.0485) acc 59.3750 (73.6638) lr 7.5131e-04 eta 2:26:09 +epoch [31/50] batch [150/500] time 0.892 (0.890) data 0.000 (0.004) loss 1.0254 (1.0482) acc 71.8750 (73.6667) lr 7.5131e-04 eta 2:26:04 +epoch [31/50] batch [155/500] time 0.889 (0.889) data 0.000 (0.004) loss 1.2744 (1.0520) acc 78.1250 (73.7097) lr 7.5131e-04 eta 2:25:56 +epoch [31/50] batch [160/500] time 0.873 (0.889) data 0.000 (0.004) loss 1.5908 (1.0511) acc 78.1250 (73.8867) lr 7.5131e-04 eta 2:25:49 +epoch [31/50] batch [165/500] time 0.868 (0.889) data 0.000 (0.004) loss 1.1670 (1.0523) acc 78.1250 (73.8447) lr 7.5131e-04 eta 2:25:43 +epoch [31/50] batch [170/500] time 0.928 (0.889) data 0.000 (0.004) loss 1.1611 (1.0475) acc 78.1250 (73.9706) lr 7.5131e-04 eta 2:25:43 +epoch [31/50] batch [175/500] time 0.868 (0.889) data 0.000 (0.004) loss 1.1924 (1.0501) acc 68.7500 (73.9643) lr 7.5131e-04 eta 2:25:36 +epoch [31/50] batch [180/500] time 0.887 (0.889) data 0.000 (0.004) loss 0.5498 (1.0513) acc 87.5000 (73.9931) lr 7.5131e-04 eta 2:25:30 +epoch [31/50] batch [185/500] time 0.896 (0.889) data 0.000 (0.004) loss 1.2012 (1.0468) acc 78.1250 (74.0878) lr 7.5131e-04 eta 2:25:24 +epoch [31/50] batch [190/500] time 0.895 (0.889) data 0.000 (0.004) loss 0.8652 (1.0443) acc 81.2500 (74.1283) lr 7.5131e-04 eta 2:25:21 +epoch [31/50] batch [195/500] time 0.889 (0.889) data 0.000 (0.003) loss 0.8496 (1.0396) acc 75.0000 (74.1987) lr 7.5131e-04 eta 2:25:16 +epoch [31/50] batch [200/500] time 0.893 (0.889) data 0.000 (0.003) loss 0.5972 (1.0405) acc 84.3750 (74.1406) lr 7.5131e-04 eta 2:25:11 +epoch [31/50] batch [205/500] time 0.893 (0.889) data 0.000 (0.003) loss 1.0820 (1.0403) acc 71.8750 (74.0396) lr 7.5131e-04 eta 2:25:03 +epoch [31/50] batch [210/500] time 0.868 (0.888) data 0.000 (0.003) loss 0.9185 (1.0381) acc 78.1250 (74.0327) lr 7.5131e-04 eta 2:24:55 +epoch [31/50] batch [215/500] time 0.913 (0.889) data 0.000 (0.003) loss 0.4111 (1.0381) acc 93.7500 (74.1134) lr 7.5131e-04 eta 2:24:54 +epoch [31/50] batch [220/500] time 0.898 (0.889) data 0.000 (0.003) loss 1.1309 (1.0377) acc 75.0000 (74.1477) lr 7.5131e-04 eta 2:24:55 +epoch [31/50] batch [225/500] time 0.868 (0.889) data 0.000 (0.003) loss 1.0654 (1.0360) acc 71.8750 (74.1667) lr 7.5131e-04 eta 2:24:52 +epoch [31/50] batch [230/500] time 0.897 (0.889) data 0.000 (0.003) loss 0.5898 (1.0360) acc 78.1250 (74.1304) lr 7.5131e-04 eta 2:24:46 +epoch [31/50] batch [235/500] time 0.881 (0.889) data 0.000 (0.003) loss 1.2109 (1.0374) acc 81.2500 (74.1489) lr 7.5131e-04 eta 2:24:40 +epoch [31/50] batch [240/500] time 0.879 (0.889) data 0.000 (0.003) loss 0.9995 (1.0402) acc 75.0000 (74.1536) lr 7.5131e-04 eta 2:24:36 +epoch [31/50] batch [245/500] time 0.897 (0.889) data 0.001 (0.003) loss 0.9932 (1.0417) acc 71.8750 (74.1071) lr 7.5131e-04 eta 2:24:35 +epoch [31/50] batch [250/500] time 0.873 (0.889) data 0.000 (0.003) loss 1.5430 (1.0411) acc 68.7500 (74.1250) lr 7.5131e-04 eta 2:24:27 +epoch [31/50] batch [255/500] time 0.891 (0.889) data 0.000 (0.003) loss 1.2646 (1.0422) acc 71.8750 (74.1176) lr 7.5131e-04 eta 2:24:24 +epoch [31/50] batch [260/500] time 1.001 (0.889) data 0.000 (0.003) loss 1.6914 (1.0406) acc 62.5000 (74.1947) lr 7.5131e-04 eta 2:24:22 +epoch [31/50] batch [265/500] time 0.899 (0.889) data 0.000 (0.003) loss 0.9736 (1.0415) acc 68.7500 (74.1745) lr 7.5131e-04 eta 2:24:17 +epoch [31/50] batch [270/500] time 0.872 (0.889) data 0.000 (0.003) loss 0.7158 (1.0397) acc 84.3750 (74.1551) lr 7.5131e-04 eta 2:24:12 +epoch [31/50] batch [275/500] time 0.888 (0.889) data 0.000 (0.003) loss 1.1729 (1.0391) acc 68.7500 (74.1023) lr 7.5131e-04 eta 2:24:06 +epoch [31/50] batch [280/500] time 0.907 (0.889) data 0.000 (0.002) loss 1.0957 (1.0357) acc 78.1250 (74.2411) lr 7.5131e-04 eta 2:24:02 +epoch [31/50] batch [285/500] time 0.865 (0.889) data 0.000 (0.002) loss 1.7998 (1.0404) acc 71.8750 (74.1118) lr 7.5131e-04 eta 2:23:56 +epoch [31/50] batch [290/500] time 0.885 (0.889) data 0.000 (0.002) loss 0.9795 (1.0368) acc 75.0000 (74.1487) lr 7.5131e-04 eta 2:23:49 +epoch [31/50] batch [295/500] time 0.888 (0.889) data 0.000 (0.002) loss 0.6475 (1.0363) acc 87.5000 (74.1208) lr 7.5131e-04 eta 2:23:45 +epoch [31/50] batch [300/500] time 0.875 (0.889) data 0.000 (0.002) loss 0.7510 (1.0371) acc 75.0000 (74.0312) lr 7.5131e-04 eta 2:23:39 +epoch [31/50] batch [305/500] time 0.876 (0.889) data 0.000 (0.002) loss 0.6641 (1.0385) acc 84.3750 (73.9857) lr 7.5131e-04 eta 2:23:36 +epoch [31/50] batch [310/500] time 0.876 (0.889) data 0.000 (0.002) loss 1.2109 (1.0390) acc 71.8750 (74.0323) lr 7.5131e-04 eta 2:23:32 +epoch [31/50] batch [315/500] time 0.901 (0.889) data 0.000 (0.002) loss 0.8301 (1.0385) acc 71.8750 (74.0377) lr 7.5131e-04 eta 2:23:27 +epoch [31/50] batch [320/500] time 0.884 (0.889) data 0.000 (0.002) loss 0.4683 (1.0383) acc 90.6250 (74.0430) lr 7.5131e-04 eta 2:23:20 +epoch [31/50] batch [325/500] time 0.863 (0.888) data 0.001 (0.002) loss 0.7456 (1.0372) acc 81.2500 (74.0096) lr 7.5131e-04 eta 2:23:13 +epoch [31/50] batch [330/500] time 0.859 (0.888) data 0.000 (0.002) loss 1.1719 (1.0396) acc 56.2500 (73.8826) lr 7.5131e-04 eta 2:23:07 +epoch [31/50] batch [335/500] time 0.878 (0.888) data 0.000 (0.002) loss 1.4434 (1.0412) acc 65.6250 (73.8806) lr 7.5131e-04 eta 2:23:00 +epoch [31/50] batch [340/500] time 0.899 (0.888) data 0.000 (0.002) loss 1.1094 (1.0411) acc 68.7500 (73.8051) lr 7.5131e-04 eta 2:22:56 +epoch [31/50] batch [345/500] time 0.877 (0.888) data 0.000 (0.002) loss 1.1963 (1.0432) acc 62.5000 (73.7319) lr 7.5131e-04 eta 2:22:50 +epoch [31/50] batch [350/500] time 0.881 (0.888) data 0.000 (0.002) loss 0.8667 (1.0440) acc 75.0000 (73.6607) lr 7.5131e-04 eta 2:22:46 +epoch [31/50] batch [355/500] time 0.908 (0.888) data 0.000 (0.002) loss 1.2744 (1.0440) acc 71.8750 (73.6444) lr 7.5131e-04 eta 2:22:41 +epoch [31/50] batch [360/500] time 0.874 (0.888) data 0.000 (0.002) loss 0.4624 (1.0417) acc 87.5000 (73.6719) lr 7.5131e-04 eta 2:22:39 +epoch [31/50] batch [365/500] time 0.880 (0.888) data 0.000 (0.002) loss 0.9795 (1.0437) acc 78.1250 (73.6387) lr 7.5131e-04 eta 2:22:34 +epoch [31/50] batch [370/500] time 0.851 (0.888) data 0.000 (0.002) loss 1.1123 (1.0408) acc 75.0000 (73.7162) lr 7.5131e-04 eta 2:22:29 +epoch [31/50] batch [375/500] time 0.860 (0.887) data 0.000 (0.002) loss 1.0088 (1.0403) acc 71.8750 (73.6917) lr 7.5131e-04 eta 2:22:20 +epoch [31/50] batch [380/500] time 0.852 (0.887) data 0.000 (0.002) loss 1.6279 (1.0417) acc 65.6250 (73.7253) lr 7.5131e-04 eta 2:22:14 +epoch [31/50] batch [385/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.0996 (1.0417) acc 78.1250 (73.6932) lr 7.5131e-04 eta 2:22:07 +epoch [31/50] batch [390/500] time 0.864 (0.887) data 0.000 (0.002) loss 1.6377 (1.0443) acc 62.5000 (73.6218) lr 7.5131e-04 eta 2:22:03 +epoch [31/50] batch [395/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.2998 (1.0437) acc 62.5000 (73.5918) lr 7.5131e-04 eta 2:21:56 +epoch [31/50] batch [400/500] time 0.871 (0.886) data 0.000 (0.002) loss 1.1250 (1.0445) acc 56.2500 (73.5703) lr 7.5131e-04 eta 2:21:50 +epoch [31/50] batch [405/500] time 0.896 (0.887) data 0.000 (0.002) loss 0.5439 (1.0407) acc 93.7500 (73.6420) lr 7.5131e-04 eta 2:21:48 +epoch [31/50] batch [410/500] time 0.880 (0.887) data 0.000 (0.002) loss 0.9326 (1.0392) acc 75.0000 (73.6738) lr 7.5131e-04 eta 2:21:43 +epoch [31/50] batch [415/500] time 0.917 (0.887) data 0.000 (0.002) loss 0.8501 (1.0377) acc 81.2500 (73.7123) lr 7.5131e-04 eta 2:21:38 +epoch [31/50] batch [420/500] time 0.875 (0.887) data 0.000 (0.002) loss 1.2490 (1.0375) acc 65.6250 (73.7128) lr 7.5131e-04 eta 2:21:32 +epoch [31/50] batch [425/500] time 0.892 (0.887) data 0.000 (0.002) loss 0.9009 (1.0393) acc 75.0000 (73.6544) lr 7.5131e-04 eta 2:21:28 +epoch [31/50] batch [430/500] time 0.883 (0.886) data 0.000 (0.002) loss 0.7285 (1.0396) acc 84.3750 (73.6846) lr 7.5131e-04 eta 2:21:23 +epoch [31/50] batch [435/500] time 0.859 (0.886) data 0.000 (0.002) loss 1.1660 (1.0406) acc 71.8750 (73.6135) lr 7.5131e-04 eta 2:21:19 +epoch [31/50] batch [440/500] time 0.854 (0.886) data 0.000 (0.002) loss 1.1582 (1.0398) acc 81.2500 (73.6293) lr 7.5131e-04 eta 2:21:12 +epoch [31/50] batch [445/500] time 0.943 (0.886) data 0.000 (0.002) loss 1.2441 (1.0385) acc 68.7500 (73.6376) lr 7.5131e-04 eta 2:21:09 +epoch [31/50] batch [450/500] time 0.890 (0.886) data 0.000 (0.002) loss 1.4668 (1.0382) acc 65.6250 (73.6944) lr 7.5131e-04 eta 2:21:04 +epoch [31/50] batch [455/500] time 0.869 (0.886) data 0.000 (0.002) loss 0.8208 (1.0392) acc 78.1250 (73.6813) lr 7.5131e-04 eta 2:20:59 +epoch [31/50] batch [460/500] time 0.903 (0.886) data 0.000 (0.002) loss 0.7153 (1.0384) acc 81.2500 (73.7160) lr 7.5131e-04 eta 2:20:55 +epoch [31/50] batch [465/500] time 0.872 (0.886) data 0.000 (0.002) loss 1.2012 (1.0373) acc 62.5000 (73.7567) lr 7.5131e-04 eta 2:20:49 +epoch [31/50] batch [470/500] time 0.853 (0.886) data 0.000 (0.002) loss 0.8335 (1.0350) acc 81.2500 (73.8231) lr 7.5131e-04 eta 2:20:44 +epoch [31/50] batch [475/500] time 0.868 (0.886) data 0.000 (0.002) loss 1.1426 (1.0353) acc 65.6250 (73.8158) lr 7.5131e-04 eta 2:20:38 +epoch [31/50] batch [480/500] time 0.908 (0.886) data 0.000 (0.002) loss 1.0527 (1.0368) acc 78.1250 (73.7826) lr 7.5131e-04 eta 2:20:34 +epoch [31/50] batch [485/500] time 0.919 (0.886) data 0.000 (0.002) loss 0.9717 (1.0359) acc 84.3750 (73.8015) lr 7.5131e-04 eta 2:20:31 +epoch [31/50] batch [490/500] time 0.865 (0.886) data 0.000 (0.002) loss 0.8091 (1.0375) acc 75.0000 (73.7819) lr 7.5131e-04 eta 2:20:26 +epoch [31/50] batch [495/500] time 0.918 (0.886) data 0.000 (0.002) loss 1.3086 (1.0383) acc 62.5000 (73.7374) lr 7.5131e-04 eta 2:20:22 +epoch [31/50] batch [500/500] time 0.875 (0.886) data 0.000 (0.001) loss 1.2275 (1.0385) acc 68.7500 (73.7500) lr 6.9098e-04 eta 2:20:17 +epoch [32/50] batch [5/500] time 0.886 (1.048) data 0.000 (0.140) loss 0.8584 (0.9688) acc 71.8750 (72.5000) lr 6.9098e-04 eta 2:45:49 +epoch [32/50] batch [10/500] time 0.873 (0.961) data 0.000 (0.070) loss 1.0146 (1.0665) acc 78.1250 (72.5000) lr 6.9098e-04 eta 2:32:00 +epoch [32/50] batch [15/500] time 0.871 (0.933) data 0.000 (0.047) loss 1.1074 (1.0540) acc 75.0000 (72.5000) lr 6.9098e-04 eta 2:27:33 +epoch [32/50] batch [20/500] time 0.898 (0.920) data 0.000 (0.035) loss 0.7461 (1.0199) acc 84.3750 (73.2812) lr 6.9098e-04 eta 2:25:23 +epoch [32/50] batch [25/500] time 0.891 (0.913) data 0.000 (0.028) loss 0.9668 (1.0188) acc 68.7500 (73.0000) lr 6.9098e-04 eta 2:24:08 +epoch [32/50] batch [30/500] time 0.896 (0.909) data 0.000 (0.024) loss 0.5605 (1.0000) acc 71.8750 (73.2292) lr 6.9098e-04 eta 2:23:29 +epoch [32/50] batch [35/500] time 0.894 (0.906) data 0.000 (0.020) loss 0.6104 (1.0157) acc 84.3750 (72.5000) lr 6.9098e-04 eta 2:22:57 +epoch [32/50] batch [40/500] time 0.897 (0.903) data 0.000 (0.018) loss 1.5928 (1.0019) acc 68.7500 (73.2812) lr 6.9098e-04 eta 2:22:22 +epoch [32/50] batch [45/500] time 0.883 (0.901) data 0.000 (0.016) loss 0.9233 (0.9971) acc 71.8750 (73.0556) lr 6.9098e-04 eta 2:21:56 +epoch [32/50] batch [50/500] time 0.895 (0.901) data 0.000 (0.014) loss 1.1543 (0.9973) acc 71.8750 (73.1875) lr 6.9098e-04 eta 2:21:58 +epoch [32/50] batch [55/500] time 0.871 (0.899) data 0.000 (0.013) loss 1.8721 (1.0004) acc 65.6250 (73.2955) lr 6.9098e-04 eta 2:21:35 +epoch [32/50] batch [60/500] time 0.859 (0.898) data 0.000 (0.012) loss 0.7158 (0.9758) acc 81.2500 (73.9062) lr 6.9098e-04 eta 2:21:17 +epoch [32/50] batch [65/500] time 0.873 (0.899) data 0.000 (0.011) loss 0.8364 (0.9816) acc 65.6250 (73.7981) lr 6.9098e-04 eta 2:21:23 +epoch [32/50] batch [70/500] time 0.890 (0.898) data 0.000 (0.010) loss 0.6035 (0.9849) acc 87.5000 (73.9286) lr 6.9098e-04 eta 2:21:04 +epoch [32/50] batch [75/500] time 0.908 (0.897) data 0.000 (0.010) loss 1.1572 (0.9931) acc 81.2500 (73.5833) lr 6.9098e-04 eta 2:20:51 +epoch [32/50] batch [80/500] time 0.892 (0.896) data 0.000 (0.009) loss 1.0801 (0.9952) acc 75.0000 (73.8672) lr 6.9098e-04 eta 2:20:44 +epoch [32/50] batch [85/500] time 0.869 (0.895) data 0.000 (0.008) loss 0.5117 (0.9977) acc 84.3750 (73.9338) lr 6.9098e-04 eta 2:20:30 +epoch [32/50] batch [90/500] time 0.868 (0.894) data 0.000 (0.008) loss 0.7065 (0.9939) acc 75.0000 (74.0972) lr 6.9098e-04 eta 2:20:14 +epoch [32/50] batch [95/500] time 0.899 (0.893) data 0.000 (0.008) loss 1.4902 (1.0026) acc 62.5000 (73.9803) lr 6.9098e-04 eta 2:20:01 +epoch [32/50] batch [100/500] time 0.866 (0.892) data 0.000 (0.007) loss 0.7544 (0.9994) acc 81.2500 (74.0000) lr 6.9098e-04 eta 2:19:49 +epoch [32/50] batch [105/500] time 0.914 (0.892) data 0.000 (0.007) loss 0.8730 (1.0122) acc 78.1250 (73.9286) lr 6.9098e-04 eta 2:19:42 +epoch [32/50] batch [110/500] time 0.892 (0.893) data 0.000 (0.007) loss 1.1299 (1.0216) acc 71.8750 (73.8352) lr 6.9098e-04 eta 2:19:46 +epoch [32/50] batch [115/500] time 0.871 (0.893) data 0.000 (0.006) loss 1.0215 (1.0202) acc 68.7500 (73.9946) lr 6.9098e-04 eta 2:19:38 +epoch [32/50] batch [120/500] time 0.858 (0.892) data 0.000 (0.006) loss 0.2988 (1.0110) acc 90.6250 (74.2188) lr 6.9098e-04 eta 2:19:26 +epoch [32/50] batch [125/500] time 0.907 (0.892) data 0.000 (0.006) loss 1.0186 (1.0160) acc 65.6250 (74.0500) lr 6.9098e-04 eta 2:19:22 +epoch [32/50] batch [130/500] time 0.887 (0.892) data 0.000 (0.006) loss 1.1357 (1.0152) acc 68.7500 (74.1106) lr 6.9098e-04 eta 2:19:15 +epoch [32/50] batch [135/500] time 0.885 (0.892) data 0.000 (0.005) loss 0.7397 (1.0204) acc 81.2500 (74.0278) lr 6.9098e-04 eta 2:19:15 +epoch [32/50] batch [140/500] time 0.869 (0.892) data 0.000 (0.005) loss 0.8140 (1.0188) acc 81.2500 (74.1071) lr 6.9098e-04 eta 2:19:07 +epoch [32/50] batch [145/500] time 0.883 (0.892) data 0.000 (0.005) loss 0.7295 (1.0170) acc 78.1250 (74.0948) lr 6.9098e-04 eta 2:19:01 +epoch [32/50] batch [150/500] time 0.929 (0.892) data 0.000 (0.005) loss 1.2803 (1.0187) acc 78.1250 (74.1042) lr 6.9098e-04 eta 2:18:58 +epoch [32/50] batch [155/500] time 0.885 (0.891) data 0.000 (0.005) loss 0.9351 (1.0187) acc 78.1250 (74.1734) lr 6.9098e-04 eta 2:18:49 +epoch [32/50] batch [160/500] time 0.874 (0.891) data 0.000 (0.005) loss 1.1973 (1.0224) acc 68.7500 (74.1797) lr 6.9098e-04 eta 2:18:43 +epoch [32/50] batch [165/500] time 0.878 (0.891) data 0.000 (0.004) loss 1.1318 (1.0276) acc 78.1250 (74.1288) lr 6.9098e-04 eta 2:18:35 +epoch [32/50] batch [170/500] time 0.908 (0.891) data 0.001 (0.004) loss 1.3164 (1.0329) acc 65.6250 (73.9338) lr 6.9098e-04 eta 2:18:30 +epoch [32/50] batch [175/500] time 0.863 (0.890) data 0.000 (0.004) loss 0.9131 (1.0351) acc 75.0000 (73.9286) lr 6.9098e-04 eta 2:18:22 +epoch [32/50] batch [180/500] time 0.848 (0.890) data 0.000 (0.004) loss 1.1426 (1.0394) acc 75.0000 (73.7847) lr 6.9098e-04 eta 2:18:17 +epoch [32/50] batch [185/500] time 0.874 (0.890) data 0.000 (0.004) loss 1.7939 (1.0426) acc 68.7500 (73.6993) lr 6.9098e-04 eta 2:18:12 +epoch [32/50] batch [190/500] time 0.901 (0.890) data 0.000 (0.004) loss 1.1162 (1.0410) acc 68.7500 (73.7500) lr 6.9098e-04 eta 2:18:05 +epoch [32/50] batch [195/500] time 0.877 (0.890) data 0.000 (0.004) loss 1.0459 (1.0422) acc 65.6250 (73.7340) lr 6.9098e-04 eta 2:17:57 +epoch [32/50] batch [200/500] time 0.896 (0.890) data 0.000 (0.004) loss 0.7788 (1.0366) acc 75.0000 (73.7969) lr 6.9098e-04 eta 2:17:53 +epoch [32/50] batch [205/500] time 0.974 (0.890) data 0.000 (0.004) loss 1.1562 (1.0362) acc 68.7500 (73.8872) lr 6.9098e-04 eta 2:17:51 +epoch [32/50] batch [210/500] time 0.913 (0.890) data 0.000 (0.004) loss 1.6748 (1.0439) acc 53.1250 (73.6458) lr 6.9098e-04 eta 2:17:46 +epoch [32/50] batch [215/500] time 0.871 (0.890) data 0.000 (0.003) loss 1.5381 (1.0475) acc 65.6250 (73.5756) lr 6.9098e-04 eta 2:17:41 +epoch [32/50] batch [220/500] time 0.910 (0.890) data 0.000 (0.003) loss 0.6772 (1.0485) acc 84.3750 (73.5227) lr 6.9098e-04 eta 2:17:39 +epoch [32/50] batch [225/500] time 0.879 (0.890) data 0.000 (0.003) loss 0.6855 (1.0475) acc 81.2500 (73.6667) lr 6.9098e-04 eta 2:17:33 +epoch [32/50] batch [230/500] time 0.881 (0.890) data 0.000 (0.003) loss 1.3633 (1.0465) acc 68.7500 (73.6821) lr 6.9098e-04 eta 2:17:29 +epoch [32/50] batch [235/500] time 0.885 (0.890) data 0.000 (0.003) loss 0.4700 (1.0453) acc 87.5000 (73.8165) lr 6.9098e-04 eta 2:17:23 +epoch [32/50] batch [240/500] time 0.879 (0.890) data 0.000 (0.003) loss 1.0225 (1.0466) acc 78.1250 (73.7630) lr 6.9098e-04 eta 2:17:18 +epoch [32/50] batch [245/500] time 0.883 (0.889) data 0.000 (0.003) loss 0.6177 (1.0487) acc 87.5000 (73.7500) lr 6.9098e-04 eta 2:17:10 +epoch [32/50] batch [250/500] time 0.891 (0.890) data 0.000 (0.003) loss 1.5576 (1.0472) acc 65.6250 (73.8125) lr 6.9098e-04 eta 2:17:11 +epoch [32/50] batch [255/500] time 0.862 (0.890) data 0.000 (0.003) loss 1.0391 (1.0482) acc 71.8750 (73.7990) lr 6.9098e-04 eta 2:17:07 +epoch [32/50] batch [260/500] time 0.873 (0.890) data 0.000 (0.003) loss 1.0898 (1.0475) acc 71.8750 (73.7620) lr 6.9098e-04 eta 2:17:01 +epoch [32/50] batch [265/500] time 0.893 (0.890) data 0.000 (0.003) loss 1.0615 (1.0448) acc 75.0000 (73.7972) lr 6.9098e-04 eta 2:16:58 +epoch [32/50] batch [270/500] time 0.854 (0.890) data 0.000 (0.003) loss 1.1924 (1.0444) acc 65.6250 (73.7963) lr 6.9098e-04 eta 2:16:52 +epoch [32/50] batch [275/500] time 0.926 (0.890) data 0.000 (0.003) loss 0.7246 (1.0417) acc 75.0000 (73.8295) lr 6.9098e-04 eta 2:16:50 +epoch [32/50] batch [280/500] time 0.900 (0.890) data 0.000 (0.003) loss 1.0068 (1.0416) acc 81.2500 (73.8393) lr 6.9098e-04 eta 2:16:46 +epoch [32/50] batch [285/500] time 0.913 (0.890) data 0.000 (0.003) loss 0.9595 (1.0431) acc 75.0000 (73.8268) lr 6.9098e-04 eta 2:16:42 +epoch [32/50] batch [290/500] time 0.925 (0.890) data 0.000 (0.003) loss 0.8877 (1.0433) acc 71.8750 (73.7931) lr 6.9098e-04 eta 2:16:39 +epoch [32/50] batch [295/500] time 0.853 (0.890) data 0.000 (0.003) loss 0.9800 (1.0438) acc 68.7500 (73.7606) lr 6.9098e-04 eta 2:16:34 +epoch [32/50] batch [300/500] time 0.874 (0.890) data 0.000 (0.003) loss 0.7666 (1.0448) acc 81.2500 (73.7500) lr 6.9098e-04 eta 2:16:27 +epoch [32/50] batch [305/500] time 0.899 (0.890) data 0.000 (0.003) loss 0.5664 (1.0454) acc 84.3750 (73.7090) lr 6.9098e-04 eta 2:16:22 +epoch [32/50] batch [310/500] time 0.883 (0.890) data 0.000 (0.002) loss 0.8291 (1.0453) acc 75.0000 (73.6593) lr 6.9098e-04 eta 2:16:16 +epoch [32/50] batch [315/500] time 0.874 (0.890) data 0.000 (0.002) loss 0.9434 (1.0413) acc 75.0000 (73.7798) lr 6.9098e-04 eta 2:16:10 +epoch [32/50] batch [320/500] time 0.862 (0.889) data 0.000 (0.002) loss 0.5439 (1.0397) acc 84.3750 (73.8477) lr 6.9098e-04 eta 2:16:05 +epoch [32/50] batch [325/500] time 0.864 (0.889) data 0.000 (0.002) loss 0.8179 (1.0394) acc 71.8750 (73.8462) lr 6.9098e-04 eta 2:15:58 +epoch [32/50] batch [330/500] time 0.913 (0.889) data 0.000 (0.002) loss 1.3281 (1.0402) acc 68.7500 (73.8447) lr 6.9098e-04 eta 2:15:56 +epoch [32/50] batch [335/500] time 0.868 (0.889) data 0.000 (0.002) loss 0.6748 (1.0371) acc 84.3750 (73.8993) lr 6.9098e-04 eta 2:15:49 +epoch [32/50] batch [340/500] time 0.889 (0.889) data 0.000 (0.002) loss 0.8335 (1.0352) acc 75.0000 (73.9246) lr 6.9098e-04 eta 2:15:44 +epoch [32/50] batch [345/500] time 0.912 (0.889) data 0.000 (0.002) loss 1.1494 (1.0406) acc 65.6250 (73.8315) lr 6.9098e-04 eta 2:15:39 +epoch [32/50] batch [350/500] time 0.876 (0.889) data 0.000 (0.002) loss 1.0039 (1.0432) acc 68.7500 (73.7946) lr 6.9098e-04 eta 2:15:37 +epoch [32/50] batch [355/500] time 0.868 (0.889) data 0.000 (0.002) loss 1.5498 (1.0465) acc 68.7500 (73.6972) lr 6.9098e-04 eta 2:15:32 +epoch [32/50] batch [360/500] time 0.872 (0.889) data 0.000 (0.002) loss 0.4602 (1.0420) acc 87.5000 (73.7760) lr 6.9098e-04 eta 2:15:25 +epoch [32/50] batch [365/500] time 0.869 (0.889) data 0.000 (0.002) loss 1.1309 (1.0448) acc 68.7500 (73.6986) lr 6.9098e-04 eta 2:15:19 +epoch [32/50] batch [370/500] time 0.872 (0.889) data 0.000 (0.002) loss 1.0068 (1.0447) acc 81.2500 (73.6993) lr 6.9098e-04 eta 2:15:13 +epoch [32/50] batch [375/500] time 0.865 (0.889) data 0.000 (0.002) loss 0.9082 (1.0433) acc 78.1250 (73.7083) lr 6.9098e-04 eta 2:15:07 +epoch [32/50] batch [380/500] time 0.865 (0.888) data 0.000 (0.002) loss 1.0430 (1.0423) acc 75.0000 (73.7582) lr 6.9098e-04 eta 2:14:59 +epoch [32/50] batch [385/500] time 0.889 (0.888) data 0.000 (0.002) loss 1.6689 (1.0422) acc 53.1250 (73.7581) lr 6.9098e-04 eta 2:14:54 +epoch [32/50] batch [390/500] time 0.876 (0.888) data 0.000 (0.002) loss 1.3242 (1.0419) acc 65.6250 (73.8141) lr 6.9098e-04 eta 2:14:48 +epoch [32/50] batch [395/500] time 0.865 (0.888) data 0.000 (0.002) loss 1.2764 (1.0426) acc 68.7500 (73.7737) lr 6.9098e-04 eta 2:14:44 +epoch [32/50] batch [400/500] time 0.887 (0.888) data 0.000 (0.002) loss 0.8096 (1.0430) acc 81.2500 (73.7578) lr 6.9098e-04 eta 2:14:39 +epoch [32/50] batch [405/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.3184 (1.0446) acc 62.5000 (73.7886) lr 6.9098e-04 eta 2:14:34 +epoch [32/50] batch [410/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.3018 (1.0434) acc 68.7500 (73.8186) lr 6.9098e-04 eta 2:14:28 +epoch [32/50] batch [415/500] time 0.864 (0.888) data 0.000 (0.002) loss 1.1572 (1.0460) acc 71.8750 (73.7425) lr 6.9098e-04 eta 2:14:23 +epoch [32/50] batch [420/500] time 0.868 (0.888) data 0.000 (0.002) loss 0.9429 (1.0442) acc 78.1250 (73.7723) lr 6.9098e-04 eta 2:14:18 +epoch [32/50] batch [425/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.2871 (1.0461) acc 59.3750 (73.7279) lr 6.9098e-04 eta 2:14:15 +epoch [32/50] batch [430/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.0967 (1.0455) acc 75.0000 (73.7427) lr 6.9098e-04 eta 2:14:12 +epoch [32/50] batch [435/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.3164 (1.0455) acc 62.5000 (73.7356) lr 6.9098e-04 eta 2:14:07 +epoch [32/50] batch [440/500] time 0.891 (0.888) data 0.000 (0.002) loss 1.1650 (1.0472) acc 68.7500 (73.7145) lr 6.9098e-04 eta 2:14:02 +epoch [32/50] batch [445/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.1230 (1.0473) acc 75.0000 (73.6657) lr 6.9098e-04 eta 2:13:57 +epoch [32/50] batch [450/500] time 0.907 (0.888) data 0.000 (0.002) loss 0.9819 (1.0502) acc 68.7500 (73.5833) lr 6.9098e-04 eta 2:13:54 +epoch [32/50] batch [455/500] time 0.914 (0.888) data 0.000 (0.002) loss 0.9434 (1.0523) acc 75.0000 (73.5577) lr 6.9098e-04 eta 2:13:49 +epoch [32/50] batch [460/500] time 0.870 (0.888) data 0.000 (0.002) loss 0.9751 (1.0525) acc 71.8750 (73.5462) lr 6.9098e-04 eta 2:13:44 +epoch [32/50] batch [465/500] time 0.913 (0.888) data 0.000 (0.002) loss 0.5376 (1.0527) acc 84.3750 (73.5417) lr 6.9098e-04 eta 2:13:41 +epoch [32/50] batch [470/500] time 0.917 (0.888) data 0.000 (0.002) loss 0.8691 (1.0522) acc 71.8750 (73.5638) lr 6.9098e-04 eta 2:13:36 +epoch [32/50] batch [475/500] time 0.893 (0.888) data 0.000 (0.002) loss 1.0000 (1.0505) acc 81.2500 (73.5987) lr 6.9098e-04 eta 2:13:31 +epoch [32/50] batch [480/500] time 0.899 (0.888) data 0.000 (0.002) loss 0.8975 (1.0499) acc 75.0000 (73.6263) lr 6.9098e-04 eta 2:13:25 +epoch [32/50] batch [485/500] time 0.907 (0.887) data 0.001 (0.002) loss 1.1348 (1.0505) acc 81.2500 (73.6598) lr 6.9098e-04 eta 2:13:19 +epoch [32/50] batch [490/500] time 0.876 (0.887) data 0.000 (0.002) loss 1.7422 (1.0537) acc 59.3750 (73.6033) lr 6.9098e-04 eta 2:13:13 +epoch [32/50] batch [495/500] time 0.882 (0.887) data 0.000 (0.002) loss 0.8135 (1.0537) acc 81.2500 (73.5985) lr 6.9098e-04 eta 2:13:09 +epoch [32/50] batch [500/500] time 0.877 (0.887) data 0.000 (0.002) loss 0.6685 (1.0526) acc 81.2500 (73.6250) lr 6.3188e-04 eta 2:13:04 +epoch [33/50] batch [5/500] time 0.869 (1.040) data 0.000 (0.151) loss 0.7920 (0.7604) acc 75.0000 (78.1250) lr 6.3188e-04 eta 2:35:57 +epoch [33/50] batch [10/500] time 0.865 (0.961) data 0.000 (0.076) loss 1.1367 (0.8048) acc 75.0000 (77.1875) lr 6.3188e-04 eta 2:23:57 +epoch [33/50] batch [15/500] time 0.896 (0.935) data 0.000 (0.051) loss 1.2568 (0.9061) acc 68.7500 (76.4583) lr 6.3188e-04 eta 2:19:58 +epoch [33/50] batch [20/500] time 0.865 (0.920) data 0.000 (0.038) loss 1.9092 (0.9772) acc 62.5000 (75.6250) lr 6.3188e-04 eta 2:17:44 +epoch [33/50] batch [25/500] time 0.873 (0.913) data 0.000 (0.030) loss 1.0781 (1.0155) acc 59.3750 (74.5000) lr 6.3188e-04 eta 2:16:35 +epoch [33/50] batch [30/500] time 0.879 (0.908) data 0.000 (0.025) loss 0.9736 (1.0222) acc 75.0000 (74.3750) lr 6.3188e-04 eta 2:15:47 +epoch [33/50] batch [35/500] time 0.884 (0.906) data 0.000 (0.022) loss 1.3662 (1.0598) acc 68.7500 (73.7500) lr 6.3188e-04 eta 2:15:25 +epoch [33/50] batch [40/500] time 0.920 (0.904) data 0.000 (0.019) loss 1.2236 (1.0805) acc 71.8750 (73.1250) lr 6.3188e-04 eta 2:14:59 +epoch [33/50] batch [45/500] time 0.906 (0.904) data 0.000 (0.017) loss 0.6021 (1.0502) acc 81.2500 (73.7500) lr 6.3188e-04 eta 2:14:52 +epoch [33/50] batch [50/500] time 0.858 (0.903) data 0.000 (0.015) loss 1.2783 (1.0510) acc 68.7500 (73.9375) lr 6.3188e-04 eta 2:14:46 +epoch [33/50] batch [55/500] time 0.862 (0.901) data 0.000 (0.014) loss 1.3125 (1.0673) acc 65.6250 (73.5227) lr 6.3188e-04 eta 2:14:16 +epoch [33/50] batch [60/500] time 0.879 (0.900) data 0.000 (0.013) loss 0.9204 (1.0463) acc 68.7500 (73.6979) lr 6.3188e-04 eta 2:14:02 +epoch [33/50] batch [65/500] time 0.898 (0.899) data 0.000 (0.012) loss 1.5801 (1.0378) acc 59.3750 (73.8942) lr 6.3188e-04 eta 2:13:53 +epoch [33/50] batch [70/500] time 0.872 (0.898) data 0.000 (0.011) loss 0.7402 (1.0507) acc 81.2500 (73.7500) lr 6.3188e-04 eta 2:13:37 +epoch [33/50] batch [75/500] time 0.885 (0.897) data 0.000 (0.010) loss 0.9429 (1.0495) acc 75.0000 (73.8750) lr 6.3188e-04 eta 2:13:25 +epoch [33/50] batch [80/500] time 0.874 (0.897) data 0.000 (0.010) loss 0.5845 (1.0542) acc 81.2500 (73.7500) lr 6.3188e-04 eta 2:13:18 +epoch [33/50] batch [85/500] time 0.897 (0.896) data 0.000 (0.009) loss 0.9692 (1.0372) acc 71.8750 (74.3015) lr 6.3188e-04 eta 2:13:07 +epoch [33/50] batch [90/500] time 0.901 (0.896) data 0.000 (0.009) loss 1.0176 (1.0297) acc 71.8750 (74.3056) lr 6.3188e-04 eta 2:12:59 +epoch [33/50] batch [95/500] time 0.879 (0.896) data 0.000 (0.008) loss 0.8325 (1.0300) acc 81.2500 (74.2434) lr 6.3188e-04 eta 2:12:59 +epoch [33/50] batch [100/500] time 0.883 (0.895) data 0.000 (0.008) loss 0.5967 (1.0344) acc 81.2500 (74.1250) lr 6.3188e-04 eta 2:12:49 +epoch [33/50] batch [105/500] time 0.872 (0.895) data 0.000 (0.007) loss 1.0703 (1.0331) acc 68.7500 (74.0476) lr 6.3188e-04 eta 2:12:45 +epoch [33/50] batch [110/500] time 0.884 (0.895) data 0.000 (0.007) loss 0.7578 (1.0233) acc 81.2500 (74.2330) lr 6.3188e-04 eta 2:12:40 +epoch [33/50] batch [115/500] time 0.865 (0.895) data 0.000 (0.007) loss 0.9409 (1.0223) acc 78.1250 (74.0761) lr 6.3188e-04 eta 2:12:31 +epoch [33/50] batch [120/500] time 0.872 (0.894) data 0.000 (0.007) loss 1.0957 (1.0246) acc 78.1250 (74.0104) lr 6.3188e-04 eta 2:12:18 +epoch [33/50] batch [125/500] time 0.888 (0.893) data 0.000 (0.006) loss 1.1504 (1.0334) acc 65.6250 (73.8000) lr 6.3188e-04 eta 2:12:07 +epoch [33/50] batch [130/500] time 0.888 (0.893) data 0.000 (0.006) loss 0.6475 (1.0320) acc 84.3750 (73.7981) lr 6.3188e-04 eta 2:11:58 +epoch [33/50] batch [135/500] time 0.889 (0.893) data 0.000 (0.006) loss 0.8296 (1.0265) acc 75.0000 (73.9120) lr 6.3188e-04 eta 2:11:52 +epoch [33/50] batch [140/500] time 0.893 (0.892) data 0.000 (0.006) loss 1.8105 (1.0375) acc 59.3750 (73.7054) lr 6.3188e-04 eta 2:11:44 +epoch [33/50] batch [145/500] time 0.873 (0.892) data 0.000 (0.005) loss 0.6382 (1.0322) acc 84.3750 (73.6638) lr 6.3188e-04 eta 2:11:36 +epoch [33/50] batch [150/500] time 0.869 (0.892) data 0.000 (0.005) loss 0.9106 (1.0265) acc 68.7500 (73.6667) lr 6.3188e-04 eta 2:11:30 +epoch [33/50] batch [155/500] time 0.904 (0.891) data 0.000 (0.005) loss 0.7178 (1.0266) acc 75.0000 (73.7298) lr 6.3188e-04 eta 2:11:21 +epoch [33/50] batch [160/500] time 0.867 (0.890) data 0.000 (0.005) loss 1.3789 (1.0293) acc 53.1250 (73.5547) lr 6.3188e-04 eta 2:11:10 +epoch [33/50] batch [165/500] time 0.895 (0.890) data 0.000 (0.005) loss 1.2246 (1.0313) acc 71.8750 (73.5227) lr 6.3188e-04 eta 2:11:02 +epoch [33/50] batch [170/500] time 0.889 (0.890) data 0.000 (0.005) loss 0.9771 (1.0305) acc 75.0000 (73.5846) lr 6.3188e-04 eta 2:10:55 +epoch [33/50] batch [175/500] time 0.920 (0.890) data 0.000 (0.005) loss 1.1885 (1.0274) acc 62.5000 (73.5893) lr 6.3188e-04 eta 2:10:51 +epoch [33/50] batch [180/500] time 0.869 (0.889) data 0.000 (0.004) loss 1.0947 (1.0315) acc 68.7500 (73.4722) lr 6.3188e-04 eta 2:10:43 +epoch [33/50] batch [185/500] time 0.870 (0.889) data 0.000 (0.004) loss 0.9526 (1.0273) acc 78.1250 (73.5135) lr 6.3188e-04 eta 2:10:36 +epoch [33/50] batch [190/500] time 1.011 (0.890) data 0.000 (0.004) loss 0.7271 (1.0247) acc 68.7500 (73.4704) lr 6.3188e-04 eta 2:10:37 +epoch [33/50] batch [195/500] time 0.872 (0.889) data 0.000 (0.004) loss 0.8848 (1.0218) acc 78.1250 (73.5897) lr 6.3188e-04 eta 2:10:29 +epoch [33/50] batch [200/500] time 0.867 (0.889) data 0.000 (0.004) loss 1.3408 (1.0279) acc 71.8750 (73.5000) lr 6.3188e-04 eta 2:10:23 +epoch [33/50] batch [205/500] time 0.884 (0.889) data 0.000 (0.004) loss 1.2510 (1.0232) acc 62.5000 (73.5518) lr 6.3188e-04 eta 2:10:17 +epoch [33/50] batch [210/500] time 0.871 (0.889) data 0.000 (0.004) loss 0.7378 (1.0212) acc 75.0000 (73.5268) lr 6.3188e-04 eta 2:10:10 +epoch [33/50] batch [215/500] time 0.882 (0.888) data 0.000 (0.004) loss 1.1055 (1.0192) acc 68.7500 (73.5029) lr 6.3188e-04 eta 2:10:04 +epoch [33/50] batch [220/500] time 0.897 (0.888) data 0.000 (0.004) loss 0.9810 (1.0219) acc 75.0000 (73.4375) lr 6.3188e-04 eta 2:09:58 +epoch [33/50] batch [225/500] time 0.877 (0.888) data 0.000 (0.004) loss 1.3574 (1.0282) acc 62.5000 (73.2500) lr 6.3188e-04 eta 2:09:54 +epoch [33/50] batch [230/500] time 0.888 (0.888) data 0.000 (0.004) loss 0.8335 (1.0237) acc 78.1250 (73.3696) lr 6.3188e-04 eta 2:09:51 +epoch [33/50] batch [235/500] time 0.899 (0.889) data 0.000 (0.003) loss 1.0059 (1.0246) acc 71.8750 (73.3511) lr 6.3188e-04 eta 2:09:52 +epoch [33/50] batch [240/500] time 0.898 (0.889) data 0.000 (0.003) loss 0.4771 (1.0210) acc 93.7500 (73.4896) lr 6.3188e-04 eta 2:09:45 +epoch [33/50] batch [245/500] time 0.884 (0.889) data 0.000 (0.003) loss 0.6855 (1.0235) acc 84.3750 (73.4439) lr 6.3188e-04 eta 2:09:41 +epoch [33/50] batch [250/500] time 0.927 (0.889) data 0.000 (0.003) loss 0.6035 (1.0189) acc 84.3750 (73.5125) lr 6.3188e-04 eta 2:09:38 +epoch [33/50] batch [255/500] time 0.881 (0.889) data 0.000 (0.003) loss 0.9082 (1.0179) acc 65.6250 (73.4559) lr 6.3188e-04 eta 2:09:32 +epoch [33/50] batch [260/500] time 0.890 (0.889) data 0.000 (0.003) loss 1.3896 (1.0169) acc 65.6250 (73.4856) lr 6.3188e-04 eta 2:09:27 +epoch [33/50] batch [265/500] time 0.857 (0.888) data 0.000 (0.003) loss 1.1621 (1.0214) acc 75.0000 (73.4080) lr 6.3188e-04 eta 2:09:21 +epoch [33/50] batch [270/500] time 0.899 (0.888) data 0.000 (0.003) loss 0.7329 (1.0225) acc 84.3750 (73.4259) lr 6.3188e-04 eta 2:09:15 +epoch [33/50] batch [275/500] time 0.880 (0.888) data 0.000 (0.003) loss 1.2764 (1.0251) acc 75.0000 (73.3977) lr 6.3188e-04 eta 2:09:10 +epoch [33/50] batch [280/500] time 0.903 (0.888) data 0.000 (0.003) loss 0.6748 (1.0206) acc 87.5000 (73.5268) lr 6.3188e-04 eta 2:09:05 +epoch [33/50] batch [285/500] time 0.895 (0.888) data 0.000 (0.003) loss 0.8003 (1.0191) acc 87.5000 (73.5855) lr 6.3188e-04 eta 2:08:59 +epoch [33/50] batch [290/500] time 0.914 (0.888) data 0.000 (0.003) loss 0.7466 (1.0168) acc 81.2500 (73.6315) lr 6.3188e-04 eta 2:08:55 +epoch [33/50] batch [295/500] time 0.889 (0.888) data 0.000 (0.003) loss 0.6929 (1.0148) acc 81.2500 (73.6653) lr 6.3188e-04 eta 2:08:49 +epoch [33/50] batch [300/500] time 0.906 (0.888) data 0.000 (0.003) loss 0.8120 (1.0176) acc 78.1250 (73.6562) lr 6.3188e-04 eta 2:08:44 +epoch [33/50] batch [305/500] time 0.884 (0.888) data 0.000 (0.003) loss 0.9004 (1.0174) acc 71.8750 (73.6680) lr 6.3188e-04 eta 2:08:39 +epoch [33/50] batch [310/500] time 0.892 (0.888) data 0.000 (0.003) loss 0.8618 (1.0179) acc 84.3750 (73.6391) lr 6.3188e-04 eta 2:08:35 +epoch [33/50] batch [315/500] time 0.887 (0.888) data 0.000 (0.003) loss 1.0068 (1.0224) acc 75.0000 (73.6111) lr 6.3188e-04 eta 2:08:28 +epoch [33/50] batch [320/500] time 0.870 (0.888) data 0.000 (0.003) loss 1.0215 (1.0205) acc 71.8750 (73.6719) lr 6.3188e-04 eta 2:08:23 +epoch [33/50] batch [325/500] time 0.864 (0.887) data 0.000 (0.003) loss 1.0830 (1.0226) acc 75.0000 (73.6058) lr 6.3188e-04 eta 2:08:18 +epoch [33/50] batch [330/500] time 0.903 (0.887) data 0.000 (0.003) loss 0.9399 (1.0244) acc 75.0000 (73.5038) lr 6.3188e-04 eta 2:08:13 +epoch [33/50] batch [335/500] time 0.860 (0.888) data 0.000 (0.002) loss 1.2764 (1.0233) acc 62.5000 (73.5541) lr 6.3188e-04 eta 2:08:11 +epoch [33/50] batch [340/500] time 0.900 (0.888) data 0.000 (0.002) loss 1.2109 (1.0233) acc 71.8750 (73.5662) lr 6.3188e-04 eta 2:08:07 +epoch [33/50] batch [345/500] time 0.868 (0.888) data 0.000 (0.002) loss 1.1621 (1.0251) acc 75.0000 (73.5688) lr 6.3188e-04 eta 2:08:03 +epoch [33/50] batch [350/500] time 0.866 (0.888) data 0.000 (0.002) loss 1.2520 (1.0256) acc 78.1250 (73.5893) lr 6.3188e-04 eta 2:07:57 +epoch [33/50] batch [355/500] time 0.894 (0.888) data 0.000 (0.002) loss 1.1123 (1.0253) acc 71.8750 (73.5651) lr 6.3188e-04 eta 2:07:54 +epoch [33/50] batch [360/500] time 0.871 (0.887) data 0.000 (0.002) loss 1.1309 (1.0229) acc 65.6250 (73.5764) lr 6.3188e-04 eta 2:07:47 +epoch [33/50] batch [365/500] time 0.888 (0.887) data 0.000 (0.002) loss 0.9932 (1.0203) acc 71.8750 (73.6558) lr 6.3188e-04 eta 2:07:43 +epoch [33/50] batch [370/500] time 0.872 (0.888) data 0.000 (0.002) loss 0.7744 (1.0190) acc 90.6250 (73.7162) lr 6.3188e-04 eta 2:07:39 +epoch [33/50] batch [375/500] time 0.906 (0.888) data 0.000 (0.002) loss 1.6182 (1.0186) acc 68.7500 (73.7583) lr 6.3188e-04 eta 2:07:35 +epoch [33/50] batch [380/500] time 0.853 (0.888) data 0.000 (0.002) loss 0.8257 (1.0180) acc 65.6250 (73.7500) lr 6.3188e-04 eta 2:07:32 +epoch [33/50] batch [385/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.1943 (1.0195) acc 75.0000 (73.7419) lr 6.3188e-04 eta 2:07:28 +epoch [33/50] batch [390/500] time 0.905 (0.888) data 0.000 (0.002) loss 1.6230 (1.0229) acc 65.6250 (73.7019) lr 6.3188e-04 eta 2:07:23 +epoch [33/50] batch [395/500] time 0.907 (0.888) data 0.000 (0.002) loss 1.5146 (1.0242) acc 50.0000 (73.6472) lr 6.3188e-04 eta 2:07:19 +epoch [33/50] batch [400/500] time 0.856 (0.888) data 0.000 (0.002) loss 0.7920 (1.0225) acc 78.1250 (73.6953) lr 6.3188e-04 eta 2:07:14 +epoch [33/50] batch [405/500] time 0.874 (0.888) data 0.000 (0.002) loss 0.9995 (1.0250) acc 62.5000 (73.6651) lr 6.3188e-04 eta 2:07:09 +epoch [33/50] batch [410/500] time 0.896 (0.888) data 0.000 (0.002) loss 1.0830 (1.0243) acc 71.8750 (73.7119) lr 6.3188e-04 eta 2:07:04 +epoch [33/50] batch [415/500] time 0.851 (0.888) data 0.000 (0.002) loss 0.8838 (1.0241) acc 68.7500 (73.6898) lr 6.3188e-04 eta 2:06:59 +epoch [33/50] batch [420/500] time 0.866 (0.887) data 0.000 (0.002) loss 1.0322 (1.0222) acc 68.7500 (73.7202) lr 6.3188e-04 eta 2:06:53 +epoch [33/50] batch [425/500] time 0.908 (0.887) data 0.000 (0.002) loss 1.6689 (1.0221) acc 53.1250 (73.7132) lr 6.3188e-04 eta 2:06:49 +epoch [33/50] batch [430/500] time 0.900 (0.887) data 0.000 (0.002) loss 1.1553 (1.0228) acc 78.1250 (73.7355) lr 6.3188e-04 eta 2:06:45 +epoch [33/50] batch [435/500] time 0.895 (0.888) data 0.000 (0.002) loss 1.0986 (1.0226) acc 71.8750 (73.7069) lr 6.3188e-04 eta 2:06:41 +epoch [33/50] batch [440/500] time 0.882 (0.888) data 0.000 (0.002) loss 0.6304 (1.0215) acc 90.6250 (73.7500) lr 6.3188e-04 eta 2:06:37 +epoch [33/50] batch [445/500] time 0.895 (0.887) data 0.000 (0.002) loss 0.6377 (1.0217) acc 87.5000 (73.7851) lr 6.3188e-04 eta 2:06:31 +epoch [33/50] batch [450/500] time 0.873 (0.887) data 0.000 (0.002) loss 1.1904 (1.0220) acc 71.8750 (73.7847) lr 6.3188e-04 eta 2:06:27 +epoch [33/50] batch [455/500] time 0.924 (0.887) data 0.000 (0.002) loss 0.9688 (1.0238) acc 71.8750 (73.7637) lr 6.3188e-04 eta 2:06:23 +epoch [33/50] batch [460/500] time 0.880 (0.887) data 0.000 (0.002) loss 1.7266 (1.0272) acc 62.5000 (73.7296) lr 6.3188e-04 eta 2:06:18 +epoch [33/50] batch [465/500] time 0.901 (0.887) data 0.000 (0.002) loss 1.4150 (1.0288) acc 68.7500 (73.7298) lr 6.3188e-04 eta 2:06:14 +epoch [33/50] batch [470/500] time 0.888 (0.887) data 0.000 (0.002) loss 1.0791 (1.0284) acc 78.1250 (73.7301) lr 6.3188e-04 eta 2:06:09 +epoch [33/50] batch [475/500] time 0.901 (0.887) data 0.000 (0.002) loss 1.5020 (1.0316) acc 65.6250 (73.6842) lr 6.3188e-04 eta 2:06:04 +epoch [33/50] batch [480/500] time 0.855 (0.887) data 0.000 (0.002) loss 0.8672 (1.0328) acc 78.1250 (73.7109) lr 6.3188e-04 eta 2:06:00 +epoch [33/50] batch [485/500] time 0.896 (0.887) data 0.000 (0.002) loss 0.8350 (1.0346) acc 71.8750 (73.6147) lr 6.3188e-04 eta 2:05:56 +epoch [33/50] batch [490/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.5430 (1.0370) acc 71.8750 (73.5651) lr 6.3188e-04 eta 2:05:52 +epoch [33/50] batch [495/500] time 0.894 (0.887) data 0.000 (0.002) loss 1.1377 (1.0378) acc 68.7500 (73.5859) lr 6.3188e-04 eta 2:05:47 +epoch [33/50] batch [500/500] time 0.865 (0.887) data 0.000 (0.002) loss 0.8838 (1.0365) acc 78.1250 (73.6375) lr 5.7422e-04 eta 2:05:42 +epoch [34/50] batch [5/500] time 0.885 (1.046) data 0.000 (0.151) loss 0.9443 (1.0358) acc 81.2500 (75.0000) lr 5.7422e-04 eta 2:28:05 +epoch [34/50] batch [10/500] time 0.870 (0.966) data 0.000 (0.076) loss 0.6553 (0.9702) acc 75.0000 (75.6250) lr 5.7422e-04 eta 2:16:39 +epoch [34/50] batch [15/500] time 0.904 (0.941) data 0.000 (0.050) loss 0.8682 (0.9899) acc 71.8750 (74.7917) lr 5.7422e-04 eta 2:13:06 +epoch [34/50] batch [20/500] time 0.920 (0.929) data 0.000 (0.038) loss 1.3047 (1.0103) acc 75.0000 (74.6875) lr 5.7422e-04 eta 2:11:15 +epoch [34/50] batch [25/500] time 0.862 (0.920) data 0.000 (0.030) loss 0.6113 (0.9928) acc 81.2500 (75.2500) lr 5.7422e-04 eta 2:10:01 +epoch [34/50] batch [30/500] time 0.886 (0.918) data 0.000 (0.025) loss 1.3730 (1.0230) acc 62.5000 (74.2708) lr 5.7422e-04 eta 2:09:35 +epoch [34/50] batch [35/500] time 0.879 (0.914) data 0.000 (0.022) loss 1.0977 (1.0101) acc 65.6250 (74.0179) lr 5.7422e-04 eta 2:08:55 +epoch [34/50] batch [40/500] time 0.905 (0.910) data 0.000 (0.019) loss 1.1504 (1.0340) acc 62.5000 (73.1250) lr 5.7422e-04 eta 2:08:20 +epoch [34/50] batch [45/500] time 0.895 (0.908) data 0.000 (0.017) loss 0.8687 (1.0524) acc 75.0000 (72.4306) lr 5.7422e-04 eta 2:07:54 +epoch [34/50] batch [50/500] time 0.887 (0.905) data 0.000 (0.015) loss 1.4941 (1.0509) acc 71.8750 (73.0625) lr 5.7422e-04 eta 2:07:30 +epoch [34/50] batch [55/500] time 0.871 (0.902) data 0.000 (0.014) loss 0.6787 (1.0433) acc 71.8750 (72.8977) lr 5.7422e-04 eta 2:06:56 +epoch [34/50] batch [60/500] time 0.924 (0.902) data 0.000 (0.013) loss 1.6611 (1.0529) acc 62.5000 (72.5521) lr 5.7422e-04 eta 2:06:51 +epoch [34/50] batch [65/500] time 0.878 (0.900) data 0.000 (0.012) loss 1.2314 (1.0605) acc 75.0000 (72.7404) lr 5.7422e-04 eta 2:06:31 +epoch [34/50] batch [70/500] time 0.883 (0.899) data 0.000 (0.011) loss 0.7749 (1.0599) acc 78.1250 (72.7232) lr 5.7422e-04 eta 2:06:16 +epoch [34/50] batch [75/500] time 0.876 (0.898) data 0.000 (0.010) loss 0.8896 (1.0709) acc 78.1250 (72.6667) lr 5.7422e-04 eta 2:06:05 +epoch [34/50] batch [80/500] time 0.911 (0.897) data 0.000 (0.010) loss 0.7100 (1.0622) acc 68.7500 (72.6172) lr 5.7422e-04 eta 2:05:56 +epoch [34/50] batch [85/500] time 0.923 (0.897) data 0.000 (0.009) loss 1.1230 (1.0569) acc 71.8750 (72.8676) lr 5.7422e-04 eta 2:05:47 +epoch [34/50] batch [90/500] time 0.884 (0.896) data 0.000 (0.009) loss 0.8960 (1.0505) acc 84.3750 (72.9167) lr 5.7422e-04 eta 2:05:31 +epoch [34/50] batch [95/500] time 0.886 (0.895) data 0.000 (0.008) loss 1.0957 (1.0503) acc 75.0000 (72.9934) lr 5.7422e-04 eta 2:05:18 +epoch [34/50] batch [100/500] time 0.902 (0.894) data 0.000 (0.008) loss 0.9673 (1.0449) acc 65.6250 (73.0625) lr 5.7422e-04 eta 2:05:12 +epoch [34/50] batch [105/500] time 0.867 (0.893) data 0.000 (0.007) loss 0.6597 (1.0426) acc 84.3750 (73.1548) lr 5.7422e-04 eta 2:04:58 +epoch [34/50] batch [110/500] time 0.880 (0.893) data 0.000 (0.007) loss 0.9482 (1.0404) acc 75.0000 (73.0966) lr 5.7422e-04 eta 2:04:51 +epoch [34/50] batch [115/500] time 0.861 (0.893) data 0.000 (0.007) loss 0.9678 (1.0365) acc 71.8750 (73.2337) lr 5.7422e-04 eta 2:04:43 +epoch [34/50] batch [120/500] time 0.875 (0.892) data 0.000 (0.006) loss 1.4385 (1.0379) acc 71.8750 (73.2292) lr 5.7422e-04 eta 2:04:33 +epoch [34/50] batch [125/500] time 0.848 (0.891) data 0.000 (0.006) loss 0.6426 (1.0334) acc 84.3750 (73.3250) lr 5.7422e-04 eta 2:04:26 +epoch [34/50] batch [130/500] time 0.896 (0.891) data 0.000 (0.006) loss 1.3047 (1.0330) acc 62.5000 (73.3173) lr 5.7422e-04 eta 2:04:18 +epoch [34/50] batch [135/500] time 0.905 (0.891) data 0.000 (0.006) loss 0.5103 (1.0228) acc 81.2500 (73.4028) lr 5.7422e-04 eta 2:04:09 +epoch [34/50] batch [140/500] time 0.892 (0.891) data 0.000 (0.006) loss 0.8716 (1.0285) acc 75.0000 (73.3482) lr 5.7422e-04 eta 2:04:06 +epoch [34/50] batch [145/500] time 0.891 (0.890) data 0.000 (0.005) loss 0.8809 (1.0282) acc 78.1250 (73.3836) lr 5.7422e-04 eta 2:03:57 +epoch [34/50] batch [150/500] time 0.864 (0.889) data 0.000 (0.005) loss 0.3411 (1.0205) acc 87.5000 (73.5625) lr 5.7422e-04 eta 2:03:46 +epoch [34/50] batch [155/500] time 0.849 (0.889) data 0.000 (0.005) loss 1.4824 (1.0240) acc 62.5000 (73.5282) lr 5.7422e-04 eta 2:03:37 +epoch [34/50] batch [160/500] time 0.871 (0.888) data 0.000 (0.005) loss 0.8618 (1.0179) acc 71.8750 (73.6328) lr 5.7422e-04 eta 2:03:28 +epoch [34/50] batch [165/500] time 0.869 (0.888) data 0.000 (0.005) loss 1.0576 (1.0124) acc 71.8750 (73.7689) lr 5.7422e-04 eta 2:03:18 +epoch [34/50] batch [170/500] time 0.895 (0.888) data 0.000 (0.005) loss 0.7471 (1.0152) acc 81.2500 (73.8419) lr 5.7422e-04 eta 2:03:13 +epoch [34/50] batch [175/500] time 0.878 (0.888) data 0.000 (0.005) loss 1.1377 (1.0161) acc 68.7500 (73.8214) lr 5.7422e-04 eta 2:03:12 +epoch [34/50] batch [180/500] time 0.879 (0.888) data 0.000 (0.004) loss 1.0771 (1.0146) acc 81.2500 (73.9583) lr 5.7422e-04 eta 2:03:09 +epoch [34/50] batch [185/500] time 0.909 (0.888) data 0.000 (0.004) loss 1.2275 (1.0145) acc 75.0000 (74.0203) lr 5.7422e-04 eta 2:03:05 +epoch [34/50] batch [190/500] time 0.907 (0.888) data 0.000 (0.004) loss 1.0195 (1.0169) acc 78.1250 (74.0296) lr 5.7422e-04 eta 2:03:03 +epoch [34/50] batch [195/500] time 0.881 (0.889) data 0.000 (0.004) loss 1.0273 (1.0145) acc 75.0000 (74.1026) lr 5.7422e-04 eta 2:02:59 +epoch [34/50] batch [200/500] time 0.922 (0.889) data 0.000 (0.004) loss 0.4189 (1.0114) acc 93.7500 (74.3438) lr 5.7422e-04 eta 2:02:56 +epoch [34/50] batch [205/500] time 0.887 (0.889) data 0.000 (0.004) loss 0.7749 (1.0090) acc 78.1250 (74.2378) lr 5.7422e-04 eta 2:02:53 +epoch [34/50] batch [210/500] time 0.893 (0.889) data 0.000 (0.004) loss 0.5908 (1.0043) acc 84.3750 (74.3750) lr 5.7422e-04 eta 2:02:50 +epoch [34/50] batch [215/500] time 1.001 (0.890) data 0.000 (0.004) loss 1.2119 (1.0095) acc 68.7500 (74.2733) lr 5.7422e-04 eta 2:02:52 +epoch [34/50] batch [220/500] time 0.900 (0.890) data 0.000 (0.004) loss 1.0732 (1.0098) acc 62.5000 (74.2614) lr 5.7422e-04 eta 2:02:46 +epoch [34/50] batch [225/500] time 0.877 (0.890) data 0.000 (0.004) loss 0.9565 (1.0122) acc 81.2500 (74.2361) lr 5.7422e-04 eta 2:02:43 +epoch [34/50] batch [230/500] time 0.888 (0.890) data 0.000 (0.004) loss 1.0908 (1.0154) acc 71.8750 (74.2799) lr 5.7422e-04 eta 2:02:36 +epoch [34/50] batch [235/500] time 0.878 (0.889) data 0.000 (0.003) loss 0.6348 (1.0135) acc 84.3750 (74.2952) lr 5.7422e-04 eta 2:02:30 +epoch [34/50] batch [240/500] time 0.862 (0.889) data 0.000 (0.003) loss 0.9409 (1.0145) acc 65.6250 (74.1667) lr 5.7422e-04 eta 2:02:23 +epoch [34/50] batch [245/500] time 0.869 (0.889) data 0.000 (0.003) loss 1.1318 (1.0188) acc 71.8750 (74.0179) lr 5.7422e-04 eta 2:02:16 +epoch [34/50] batch [250/500] time 0.904 (0.889) data 0.000 (0.003) loss 1.0527 (1.0193) acc 71.8750 (74.0250) lr 5.7422e-04 eta 2:02:11 +epoch [34/50] batch [255/500] time 0.857 (0.888) data 0.000 (0.003) loss 0.8750 (1.0204) acc 81.2500 (74.0074) lr 5.7422e-04 eta 2:02:03 +epoch [34/50] batch [260/500] time 0.904 (0.888) data 0.000 (0.003) loss 0.8457 (1.0222) acc 71.8750 (73.9543) lr 5.7422e-04 eta 2:01:58 +epoch [34/50] batch [265/500] time 0.881 (0.888) data 0.000 (0.003) loss 0.6021 (1.0180) acc 87.5000 (74.0094) lr 5.7422e-04 eta 2:01:53 +epoch [34/50] batch [270/500] time 0.902 (0.888) data 0.000 (0.003) loss 1.2002 (1.0175) acc 78.1250 (73.9931) lr 5.7422e-04 eta 2:01:48 +epoch [34/50] batch [275/500] time 0.869 (0.888) data 0.000 (0.003) loss 1.1963 (1.0194) acc 75.0000 (74.0227) lr 5.7422e-04 eta 2:01:45 +epoch [34/50] batch [280/500] time 0.885 (0.888) data 0.000 (0.003) loss 1.0312 (1.0195) acc 81.2500 (73.9509) lr 5.7422e-04 eta 2:01:38 +epoch [34/50] batch [285/500] time 0.886 (0.888) data 0.000 (0.003) loss 0.9819 (1.0181) acc 81.2500 (74.0132) lr 5.7422e-04 eta 2:01:32 +epoch [34/50] batch [290/500] time 0.899 (0.888) data 0.000 (0.003) loss 1.4727 (1.0171) acc 71.8750 (74.0948) lr 5.7422e-04 eta 2:01:26 +epoch [34/50] batch [295/500] time 0.868 (0.888) data 0.000 (0.003) loss 1.3711 (1.0210) acc 56.2500 (74.0466) lr 5.7422e-04 eta 2:01:22 +epoch [34/50] batch [300/500] time 0.892 (0.888) data 0.000 (0.003) loss 0.7837 (1.0227) acc 84.3750 (74.0000) lr 5.7422e-04 eta 2:01:18 +epoch [34/50] batch [305/500] time 0.877 (0.888) data 0.000 (0.003) loss 0.9214 (1.0251) acc 78.1250 (73.9754) lr 5.7422e-04 eta 2:01:14 +epoch [34/50] batch [310/500] time 0.882 (0.888) data 0.000 (0.003) loss 0.9185 (1.0250) acc 71.8750 (73.9919) lr 5.7422e-04 eta 2:01:09 +epoch [34/50] batch [315/500] time 0.892 (0.888) data 0.000 (0.003) loss 0.5625 (1.0255) acc 75.0000 (73.9385) lr 5.7422e-04 eta 2:01:07 +epoch [34/50] batch [320/500] time 0.870 (0.888) data 0.000 (0.003) loss 1.3760 (1.0256) acc 65.6250 (73.9648) lr 5.7422e-04 eta 2:01:01 +epoch [34/50] batch [325/500] time 0.892 (0.888) data 0.000 (0.003) loss 1.0898 (1.0259) acc 62.5000 (73.9135) lr 5.7422e-04 eta 2:00:55 +epoch [34/50] batch [330/500] time 0.878 (0.887) data 0.000 (0.003) loss 0.8892 (1.0244) acc 75.0000 (74.0152) lr 5.7422e-04 eta 2:00:50 +epoch [34/50] batch [335/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.0928 (1.0239) acc 71.8750 (74.0299) lr 5.7422e-04 eta 2:00:45 +epoch [34/50] batch [340/500] time 0.901 (0.887) data 0.000 (0.002) loss 0.6235 (1.0242) acc 81.2500 (74.0074) lr 5.7422e-04 eta 2:00:40 +epoch [34/50] batch [345/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.3867 (1.0220) acc 65.6250 (74.0399) lr 5.7422e-04 eta 2:00:34 +epoch [34/50] batch [350/500] time 0.864 (0.887) data 0.000 (0.002) loss 0.7910 (1.0200) acc 81.2500 (74.0982) lr 5.7422e-04 eta 2:00:28 +epoch [34/50] batch [355/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.0635 (1.0200) acc 81.2500 (74.1373) lr 5.7422e-04 eta 2:00:23 +epoch [34/50] batch [360/500] time 0.865 (0.887) data 0.000 (0.002) loss 2.0293 (1.0231) acc 59.3750 (74.0451) lr 5.7422e-04 eta 2:00:19 +epoch [34/50] batch [365/500] time 0.901 (0.887) data 0.000 (0.002) loss 0.5776 (1.0245) acc 81.2500 (74.0240) lr 5.7422e-04 eta 2:00:15 +epoch [34/50] batch [370/500] time 0.915 (0.887) data 0.000 (0.002) loss 0.9482 (1.0233) acc 62.5000 (74.0287) lr 5.7422e-04 eta 2:00:12 +epoch [34/50] batch [375/500] time 0.896 (0.887) data 0.000 (0.002) loss 0.9014 (1.0253) acc 68.7500 (73.9667) lr 5.7422e-04 eta 2:00:08 +epoch [34/50] batch [380/500] time 0.877 (0.887) data 0.000 (0.002) loss 0.5083 (1.0247) acc 93.7500 (73.9803) lr 5.7422e-04 eta 2:00:03 +epoch [34/50] batch [385/500] time 0.893 (0.887) data 0.000 (0.002) loss 0.7808 (1.0249) acc 84.3750 (73.9935) lr 5.7422e-04 eta 1:59:57 +epoch [34/50] batch [390/500] time 0.902 (0.887) data 0.000 (0.002) loss 0.7441 (1.0249) acc 78.1250 (74.0144) lr 5.7422e-04 eta 1:59:51 +epoch [34/50] batch [395/500] time 0.868 (0.887) data 0.000 (0.002) loss 1.1855 (1.0251) acc 62.5000 (74.0348) lr 5.7422e-04 eta 1:59:46 +epoch [34/50] batch [400/500] time 0.864 (0.887) data 0.000 (0.002) loss 1.5371 (1.0267) acc 59.3750 (73.9609) lr 5.7422e-04 eta 1:59:41 +epoch [34/50] batch [405/500] time 0.908 (0.887) data 0.000 (0.002) loss 0.9097 (1.0247) acc 81.2500 (74.0046) lr 5.7422e-04 eta 1:59:36 +epoch [34/50] batch [410/500] time 0.871 (0.886) data 0.000 (0.002) loss 1.4043 (1.0246) acc 65.6250 (74.0244) lr 5.7422e-04 eta 1:59:31 +epoch [34/50] batch [415/500] time 0.895 (0.887) data 0.000 (0.002) loss 0.8789 (1.0228) acc 68.7500 (74.0136) lr 5.7422e-04 eta 1:59:28 +epoch [34/50] batch [420/500] time 0.875 (0.886) data 0.000 (0.002) loss 1.3867 (1.0241) acc 75.0000 (74.0179) lr 5.7422e-04 eta 1:59:22 +epoch [34/50] batch [425/500] time 0.874 (0.886) data 0.000 (0.002) loss 0.7495 (1.0241) acc 81.2500 (74.0735) lr 5.7422e-04 eta 1:59:17 +epoch [34/50] batch [430/500] time 0.883 (0.886) data 0.000 (0.002) loss 0.5786 (1.0243) acc 78.1250 (74.0770) lr 5.7422e-04 eta 1:59:12 +epoch [34/50] batch [435/500] time 0.863 (0.886) data 0.000 (0.002) loss 1.2041 (1.0242) acc 78.1250 (74.0876) lr 5.7422e-04 eta 1:59:08 +epoch [34/50] batch [440/500] time 0.863 (0.886) data 0.000 (0.002) loss 1.0498 (1.0266) acc 78.1250 (74.0838) lr 5.7422e-04 eta 1:59:02 +epoch [34/50] batch [445/500] time 0.891 (0.886) data 0.000 (0.002) loss 1.3154 (1.0292) acc 65.6250 (74.0449) lr 5.7422e-04 eta 1:58:58 +epoch [34/50] batch [450/500] time 0.887 (0.886) data 0.000 (0.002) loss 0.8862 (1.0331) acc 75.0000 (73.9792) lr 5.7422e-04 eta 1:58:53 +epoch [34/50] batch [455/500] time 0.906 (0.886) data 0.000 (0.002) loss 1.4385 (1.0360) acc 75.0000 (73.9766) lr 5.7422e-04 eta 1:58:49 +epoch [34/50] batch [460/500] time 0.887 (0.886) data 0.000 (0.002) loss 0.7197 (1.0353) acc 81.2500 (74.0014) lr 5.7422e-04 eta 1:58:46 +epoch [34/50] batch [465/500] time 0.889 (0.886) data 0.000 (0.002) loss 1.1953 (1.0349) acc 71.8750 (74.0054) lr 5.7422e-04 eta 1:58:42 +epoch [34/50] batch [470/500] time 0.890 (0.886) data 0.000 (0.002) loss 0.9004 (1.0343) acc 75.0000 (73.9960) lr 5.7422e-04 eta 1:58:38 +epoch [34/50] batch [475/500] time 0.873 (0.886) data 0.000 (0.002) loss 0.9478 (1.0353) acc 65.6250 (73.9539) lr 5.7422e-04 eta 1:58:33 +epoch [34/50] batch [480/500] time 0.870 (0.886) data 0.000 (0.002) loss 1.5527 (1.0339) acc 65.6250 (74.0039) lr 5.7422e-04 eta 1:58:29 +epoch [34/50] batch [485/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.1162 (1.0327) acc 75.0000 (74.0206) lr 5.7422e-04 eta 1:58:25 +epoch [34/50] batch [490/500] time 0.872 (0.886) data 0.000 (0.002) loss 1.2637 (1.0318) acc 75.0000 (74.0689) lr 5.7422e-04 eta 1:58:19 +epoch [34/50] batch [495/500] time 0.883 (0.886) data 0.000 (0.002) loss 1.2598 (1.0317) acc 59.3750 (74.0467) lr 5.7422e-04 eta 1:58:15 +epoch [34/50] batch [500/500] time 0.883 (0.886) data 0.000 (0.002) loss 0.8525 (1.0318) acc 81.2500 (74.0812) lr 5.1825e-04 eta 1:58:11 +epoch [35/50] batch [5/500] time 0.856 (1.030) data 0.000 (0.130) loss 0.6655 (0.9313) acc 78.1250 (79.3750) lr 5.1825e-04 eta 2:17:15 +epoch [35/50] batch [10/500] time 0.876 (0.953) data 0.000 (0.065) loss 1.5430 (1.0320) acc 65.6250 (75.6250) lr 5.1825e-04 eta 2:06:56 +epoch [35/50] batch [15/500] time 0.911 (0.932) data 0.000 (0.044) loss 1.0020 (1.0673) acc 78.1250 (74.7917) lr 5.1825e-04 eta 2:04:00 +epoch [35/50] batch [20/500] time 0.935 (0.921) data 0.000 (0.033) loss 1.1650 (1.1162) acc 75.0000 (73.2812) lr 5.1825e-04 eta 2:02:29 +epoch [35/50] batch [25/500] time 0.919 (0.914) data 0.000 (0.026) loss 0.4668 (1.0834) acc 81.2500 (72.8750) lr 5.1825e-04 eta 2:01:27 +epoch [35/50] batch [30/500] time 0.886 (0.908) data 0.000 (0.022) loss 1.5088 (1.0998) acc 71.8750 (72.6042) lr 5.1825e-04 eta 2:00:34 +epoch [35/50] batch [35/500] time 0.893 (0.903) data 0.000 (0.019) loss 1.1279 (1.1069) acc 68.7500 (72.0536) lr 5.1825e-04 eta 1:59:53 +epoch [35/50] batch [40/500] time 0.877 (0.901) data 0.000 (0.017) loss 1.7705 (1.1245) acc 65.6250 (72.5781) lr 5.1825e-04 eta 1:59:28 +epoch [35/50] batch [45/500] time 0.991 (0.900) data 0.000 (0.015) loss 1.1182 (1.1423) acc 78.1250 (72.5000) lr 5.1825e-04 eta 1:59:21 +epoch [35/50] batch [50/500] time 0.852 (0.898) data 0.000 (0.013) loss 1.7070 (1.1363) acc 59.3750 (72.5625) lr 5.1825e-04 eta 1:58:58 +epoch [35/50] batch [55/500] time 0.868 (0.896) data 0.000 (0.012) loss 1.0762 (1.1394) acc 71.8750 (72.3295) lr 5.1825e-04 eta 1:58:35 +epoch [35/50] batch [60/500] time 0.870 (0.894) data 0.000 (0.011) loss 1.1230 (1.1235) acc 71.8750 (72.5000) lr 5.1825e-04 eta 1:58:16 +epoch [35/50] batch [65/500] time 0.881 (0.891) data 0.000 (0.010) loss 1.2090 (1.1010) acc 71.8750 (72.9327) lr 5.1825e-04 eta 1:57:53 +epoch [35/50] batch [70/500] time 0.880 (0.891) data 0.000 (0.010) loss 0.6021 (1.0769) acc 84.3750 (73.4375) lr 5.1825e-04 eta 1:57:42 +epoch [35/50] batch [75/500] time 0.862 (0.890) data 0.000 (0.009) loss 0.9268 (1.0864) acc 71.8750 (73.4167) lr 5.1825e-04 eta 1:57:29 +epoch [35/50] batch [80/500] time 0.910 (0.889) data 0.000 (0.008) loss 1.0713 (1.0907) acc 62.5000 (73.3203) lr 5.1825e-04 eta 1:57:21 +epoch [35/50] batch [85/500] time 0.897 (0.889) data 0.000 (0.008) loss 1.3516 (1.0742) acc 68.7500 (73.5662) lr 5.1825e-04 eta 1:57:18 +epoch [35/50] batch [90/500] time 0.904 (0.890) data 0.000 (0.007) loss 1.4834 (1.0701) acc 59.3750 (73.6458) lr 5.1825e-04 eta 1:57:21 +epoch [35/50] batch [95/500] time 0.900 (0.890) data 0.000 (0.007) loss 1.1660 (1.0709) acc 78.1250 (73.5526) lr 5.1825e-04 eta 1:57:17 +epoch [35/50] batch [100/500] time 0.863 (0.890) data 0.000 (0.007) loss 0.8311 (1.0612) acc 75.0000 (73.8125) lr 5.1825e-04 eta 1:57:11 +epoch [35/50] batch [105/500] time 0.871 (0.890) data 0.000 (0.006) loss 0.4583 (1.0533) acc 84.3750 (73.9286) lr 5.1825e-04 eta 1:57:04 +epoch [35/50] batch [110/500] time 0.878 (0.890) data 0.000 (0.006) loss 0.6245 (1.0442) acc 84.3750 (74.1477) lr 5.1825e-04 eta 1:57:00 +epoch [35/50] batch [115/500] time 0.884 (0.889) data 0.000 (0.006) loss 0.9409 (1.0396) acc 75.0000 (74.1033) lr 5.1825e-04 eta 1:56:45 +epoch [35/50] batch [120/500] time 0.874 (0.888) data 0.000 (0.006) loss 0.7012 (1.0393) acc 84.3750 (74.0365) lr 5.1825e-04 eta 1:56:37 +epoch [35/50] batch [125/500] time 0.872 (0.887) data 0.000 (0.005) loss 1.1270 (1.0518) acc 68.7500 (73.8000) lr 5.1825e-04 eta 1:56:28 +epoch [35/50] batch [130/500] time 0.884 (0.887) data 0.000 (0.005) loss 1.1807 (1.0553) acc 65.6250 (73.6779) lr 5.1825e-04 eta 1:56:19 +epoch [35/50] batch [135/500] time 0.881 (0.886) data 0.000 (0.005) loss 1.8164 (1.0658) acc 65.6250 (73.5648) lr 5.1825e-04 eta 1:56:12 +epoch [35/50] batch [140/500] time 0.881 (0.886) data 0.000 (0.005) loss 0.9785 (1.0589) acc 75.0000 (73.8170) lr 5.1825e-04 eta 1:56:07 +epoch [35/50] batch [145/500] time 0.874 (0.887) data 0.000 (0.005) loss 0.5601 (1.0579) acc 78.1250 (73.8362) lr 5.1825e-04 eta 1:56:06 +epoch [35/50] batch [150/500] time 0.868 (0.887) data 0.000 (0.005) loss 1.2070 (1.0513) acc 75.0000 (73.9792) lr 5.1825e-04 eta 1:56:01 +epoch [35/50] batch [155/500] time 0.881 (0.887) data 0.000 (0.004) loss 0.8359 (1.0441) acc 75.0000 (74.0927) lr 5.1825e-04 eta 1:55:56 +epoch [35/50] batch [160/500] time 0.862 (0.886) data 0.000 (0.004) loss 0.8848 (1.0460) acc 75.0000 (74.0820) lr 5.1825e-04 eta 1:55:49 +epoch [35/50] batch [165/500] time 0.881 (0.886) data 0.000 (0.004) loss 1.3311 (1.0448) acc 68.7500 (74.0530) lr 5.1825e-04 eta 1:55:44 +epoch [35/50] batch [170/500] time 0.905 (0.886) data 0.000 (0.004) loss 0.7031 (1.0405) acc 81.2500 (74.1360) lr 5.1825e-04 eta 1:55:40 +epoch [35/50] batch [175/500] time 0.897 (0.887) data 0.000 (0.004) loss 1.0830 (1.0379) acc 75.0000 (74.2143) lr 5.1825e-04 eta 1:55:37 +epoch [35/50] batch [180/500] time 0.900 (0.887) data 0.000 (0.004) loss 1.0332 (1.0369) acc 68.7500 (74.1493) lr 5.1825e-04 eta 1:55:32 +epoch [35/50] batch [185/500] time 0.855 (0.887) data 0.000 (0.004) loss 1.1016 (1.0361) acc 68.7500 (74.1723) lr 5.1825e-04 eta 1:55:29 +epoch [35/50] batch [190/500] time 0.884 (0.887) data 0.000 (0.004) loss 1.1846 (1.0388) acc 68.7500 (74.0625) lr 5.1825e-04 eta 1:55:27 +epoch [35/50] batch [195/500] time 0.866 (0.887) data 0.000 (0.004) loss 0.9517 (1.0370) acc 78.1250 (74.1026) lr 5.1825e-04 eta 1:55:22 +epoch [35/50] batch [200/500] time 0.884 (0.887) data 0.000 (0.003) loss 0.9683 (1.0415) acc 75.0000 (74.0625) lr 5.1825e-04 eta 1:55:17 +epoch [35/50] batch [205/500] time 0.867 (0.887) data 0.000 (0.003) loss 1.3203 (1.0438) acc 68.7500 (74.0854) lr 5.1825e-04 eta 1:55:11 +epoch [35/50] batch [210/500] time 0.867 (0.887) data 0.000 (0.003) loss 1.3154 (1.0415) acc 78.1250 (74.1815) lr 5.1825e-04 eta 1:55:08 +epoch [35/50] batch [215/500] time 0.877 (0.887) data 0.000 (0.003) loss 0.6240 (1.0404) acc 84.3750 (74.1715) lr 5.1825e-04 eta 1:55:03 +epoch [35/50] batch [220/500] time 0.871 (0.887) data 0.000 (0.003) loss 0.8682 (1.0349) acc 75.0000 (74.2898) lr 5.1825e-04 eta 1:55:00 +epoch [35/50] batch [225/500] time 0.904 (0.887) data 0.000 (0.003) loss 1.1201 (1.0330) acc 71.8750 (74.3194) lr 5.1825e-04 eta 1:54:56 +epoch [35/50] batch [230/500] time 0.869 (0.887) data 0.000 (0.003) loss 0.3904 (1.0292) acc 90.6250 (74.3342) lr 5.1825e-04 eta 1:54:50 +epoch [35/50] batch [235/500] time 0.863 (0.886) data 0.000 (0.003) loss 1.3252 (1.0286) acc 75.0000 (74.4149) lr 5.1825e-04 eta 1:54:43 +epoch [35/50] batch [240/500] time 0.835 (0.886) data 0.000 (0.003) loss 0.7446 (1.0306) acc 78.1250 (74.3359) lr 5.1825e-04 eta 1:54:35 +epoch [35/50] batch [245/500] time 0.890 (0.886) data 0.000 (0.003) loss 1.1748 (1.0329) acc 71.8750 (74.2347) lr 5.1825e-04 eta 1:54:31 +epoch [35/50] batch [250/500] time 0.881 (0.886) data 0.000 (0.003) loss 1.4795 (1.0359) acc 62.5000 (74.1750) lr 5.1825e-04 eta 1:54:25 +epoch [35/50] batch [255/500] time 0.881 (0.886) data 0.001 (0.003) loss 0.5195 (1.0354) acc 78.1250 (74.2402) lr 5.1825e-04 eta 1:54:20 +epoch [35/50] batch [260/500] time 0.906 (0.886) data 0.000 (0.003) loss 0.7441 (1.0334) acc 84.3750 (74.2668) lr 5.1825e-04 eta 1:54:15 +epoch [35/50] batch [265/500] time 0.880 (0.886) data 0.000 (0.003) loss 0.6411 (1.0318) acc 87.5000 (74.2571) lr 5.1825e-04 eta 1:54:10 +epoch [35/50] batch [270/500] time 0.882 (0.886) data 0.000 (0.003) loss 1.4326 (1.0344) acc 71.8750 (74.3056) lr 5.1825e-04 eta 1:54:06 +epoch [35/50] batch [275/500] time 0.880 (0.886) data 0.000 (0.003) loss 0.6196 (1.0341) acc 78.1250 (74.3068) lr 5.1825e-04 eta 1:54:02 +epoch [35/50] batch [280/500] time 0.869 (0.886) data 0.000 (0.003) loss 0.6450 (1.0344) acc 78.1250 (74.2522) lr 5.1825e-04 eta 1:53:57 +epoch [35/50] batch [285/500] time 0.895 (0.886) data 0.000 (0.003) loss 1.3789 (1.0370) acc 68.7500 (74.1776) lr 5.1825e-04 eta 1:53:54 +epoch [35/50] batch [290/500] time 0.875 (0.886) data 0.000 (0.002) loss 0.6646 (1.0349) acc 81.2500 (74.2026) lr 5.1825e-04 eta 1:53:54 +epoch [35/50] batch [295/500] time 0.892 (0.886) data 0.000 (0.002) loss 1.0918 (1.0343) acc 78.1250 (74.2797) lr 5.1825e-04 eta 1:53:49 +epoch [35/50] batch [300/500] time 0.893 (0.886) data 0.000 (0.002) loss 1.6445 (1.0343) acc 65.6250 (74.3021) lr 5.1825e-04 eta 1:53:44 +epoch [35/50] batch [305/500] time 0.908 (0.886) data 0.000 (0.002) loss 1.0361 (1.0325) acc 78.1250 (74.3648) lr 5.1825e-04 eta 1:53:39 +epoch [35/50] batch [310/500] time 0.876 (0.886) data 0.000 (0.002) loss 0.8247 (1.0324) acc 75.0000 (74.3448) lr 5.1825e-04 eta 1:53:35 +epoch [35/50] batch [315/500] time 0.856 (0.886) data 0.000 (0.002) loss 1.2471 (1.0365) acc 71.8750 (74.2659) lr 5.1825e-04 eta 1:53:28 +epoch [35/50] batch [320/500] time 0.867 (0.886) data 0.000 (0.002) loss 0.9492 (1.0354) acc 71.8750 (74.2871) lr 5.1825e-04 eta 1:53:23 +epoch [35/50] batch [325/500] time 0.901 (0.886) data 0.000 (0.002) loss 0.9448 (1.0377) acc 65.6250 (74.2115) lr 5.1825e-04 eta 1:53:20 +epoch [35/50] batch [330/500] time 0.873 (0.886) data 0.000 (0.002) loss 1.3896 (1.0362) acc 53.1250 (74.2235) lr 5.1825e-04 eta 1:53:16 +epoch [35/50] batch [335/500] time 0.899 (0.887) data 0.000 (0.002) loss 1.1426 (1.0377) acc 65.6250 (74.0951) lr 5.1825e-04 eta 1:53:15 +epoch [35/50] batch [340/500] time 0.888 (0.886) data 0.000 (0.002) loss 0.8071 (1.0362) acc 75.0000 (74.1176) lr 5.1825e-04 eta 1:53:09 +epoch [35/50] batch [345/500] time 0.897 (0.886) data 0.000 (0.002) loss 1.3525 (1.0349) acc 62.5000 (74.1395) lr 5.1825e-04 eta 1:53:03 +epoch [35/50] batch [350/500] time 0.842 (0.886) data 0.000 (0.002) loss 1.0674 (1.0345) acc 59.3750 (74.1071) lr 5.1825e-04 eta 1:52:56 +epoch [35/50] batch [355/500] time 0.907 (0.886) data 0.000 (0.002) loss 1.3574 (1.0342) acc 71.8750 (74.1197) lr 5.1825e-04 eta 1:52:51 +epoch [35/50] batch [360/500] time 0.860 (0.886) data 0.000 (0.002) loss 0.9224 (1.0339) acc 81.2500 (74.0972) lr 5.1825e-04 eta 1:52:46 +epoch [35/50] batch [365/500] time 0.880 (0.886) data 0.000 (0.002) loss 0.9873 (1.0320) acc 81.2500 (74.1182) lr 5.1825e-04 eta 1:52:40 +epoch [35/50] batch [370/500] time 0.913 (0.885) data 0.000 (0.002) loss 0.6655 (1.0299) acc 78.1250 (74.1132) lr 5.1825e-04 eta 1:52:36 +epoch [35/50] batch [375/500] time 0.913 (0.886) data 0.000 (0.002) loss 1.5000 (1.0314) acc 65.6250 (74.0667) lr 5.1825e-04 eta 1:52:32 +epoch [35/50] batch [380/500] time 0.887 (0.886) data 0.000 (0.002) loss 1.6885 (1.0315) acc 68.7500 (74.1036) lr 5.1825e-04 eta 1:52:28 +epoch [35/50] batch [385/500] time 0.878 (0.886) data 0.000 (0.002) loss 0.4238 (1.0304) acc 87.5000 (74.1396) lr 5.1825e-04 eta 1:52:23 +epoch [35/50] batch [390/500] time 0.859 (0.885) data 0.000 (0.002) loss 0.7749 (1.0337) acc 78.1250 (74.0865) lr 5.1825e-04 eta 1:52:17 +epoch [35/50] batch [395/500] time 0.870 (0.885) data 0.000 (0.002) loss 1.0547 (1.0323) acc 68.7500 (74.1614) lr 5.1825e-04 eta 1:52:13 +epoch [35/50] batch [400/500] time 0.864 (0.885) data 0.000 (0.002) loss 0.9619 (1.0319) acc 68.7500 (74.1562) lr 5.1825e-04 eta 1:52:08 +epoch [35/50] batch [405/500] time 0.880 (0.885) data 0.000 (0.002) loss 1.3145 (1.0331) acc 68.7500 (74.1204) lr 5.1825e-04 eta 1:52:03 +epoch [35/50] batch [410/500] time 0.878 (0.885) data 0.000 (0.002) loss 0.8115 (1.0330) acc 90.6250 (74.1768) lr 5.1825e-04 eta 1:51:59 +epoch [35/50] batch [415/500] time 0.868 (0.885) data 0.000 (0.002) loss 0.7500 (1.0338) acc 84.3750 (74.1416) lr 5.1825e-04 eta 1:51:54 +epoch [35/50] batch [420/500] time 0.906 (0.885) data 0.000 (0.002) loss 1.3438 (1.0349) acc 68.7500 (74.1071) lr 5.1825e-04 eta 1:51:49 +epoch [35/50] batch [425/500] time 0.883 (0.885) data 0.000 (0.002) loss 0.7197 (1.0351) acc 84.3750 (74.0662) lr 5.1825e-04 eta 1:51:45 +epoch [35/50] batch [430/500] time 0.991 (0.886) data 0.000 (0.002) loss 0.6963 (1.0330) acc 81.2500 (74.0698) lr 5.1825e-04 eta 1:51:43 +epoch [35/50] batch [435/500] time 0.906 (0.886) data 0.000 (0.002) loss 0.7251 (1.0302) acc 81.2500 (74.1020) lr 5.1825e-04 eta 1:51:39 +epoch [35/50] batch [440/500] time 0.868 (0.886) data 0.000 (0.002) loss 0.9624 (1.0295) acc 81.2500 (74.0767) lr 5.1825e-04 eta 1:51:34 +epoch [35/50] batch [445/500] time 0.873 (0.885) data 0.000 (0.002) loss 1.3447 (1.0315) acc 71.8750 (74.0379) lr 5.1825e-04 eta 1:51:29 +epoch [35/50] batch [450/500] time 0.916 (0.886) data 0.000 (0.002) loss 0.5464 (1.0312) acc 84.3750 (74.0208) lr 5.1825e-04 eta 1:51:25 +epoch [35/50] batch [455/500] time 0.886 (0.886) data 0.000 (0.002) loss 1.1904 (1.0311) acc 68.7500 (74.0179) lr 5.1825e-04 eta 1:51:21 +epoch [35/50] batch [460/500] time 0.902 (0.885) data 0.000 (0.002) loss 0.7656 (1.0311) acc 81.2500 (73.9878) lr 5.1825e-04 eta 1:51:16 +epoch [35/50] batch [465/500] time 0.877 (0.886) data 0.000 (0.002) loss 1.1641 (1.0324) acc 75.0000 (73.9651) lr 5.1825e-04 eta 1:51:12 +epoch [35/50] batch [470/500] time 0.871 (0.886) data 0.000 (0.002) loss 1.1123 (1.0320) acc 78.1250 (73.9894) lr 5.1825e-04 eta 1:51:07 +epoch [35/50] batch [475/500] time 0.872 (0.886) data 0.000 (0.002) loss 0.8931 (1.0274) acc 87.5000 (74.0855) lr 5.1825e-04 eta 1:51:04 +epoch [35/50] batch [480/500] time 0.891 (0.886) data 0.000 (0.002) loss 0.6006 (1.0280) acc 84.3750 (74.0690) lr 5.1825e-04 eta 1:50:59 +epoch [35/50] batch [485/500] time 0.872 (0.886) data 0.000 (0.002) loss 1.1533 (1.0284) acc 65.6250 (74.0593) lr 5.1825e-04 eta 1:50:54 +epoch [35/50] batch [490/500] time 0.895 (0.886) data 0.000 (0.002) loss 0.9829 (1.0302) acc 81.2500 (74.0497) lr 5.1825e-04 eta 1:50:51 +epoch [35/50] batch [495/500] time 0.909 (0.886) data 0.000 (0.002) loss 1.0098 (1.0309) acc 68.7500 (74.0152) lr 5.1825e-04 eta 1:50:47 +epoch [35/50] batch [500/500] time 0.888 (0.886) data 0.000 (0.002) loss 1.4004 (1.0327) acc 65.6250 (73.9562) lr 4.6417e-04 eta 1:50:42 +epoch [36/50] batch [5/500] time 0.850 (1.006) data 0.000 (0.131) loss 1.3574 (1.1066) acc 65.6250 (67.5000) lr 4.6417e-04 eta 2:05:36 +epoch [36/50] batch [10/500] time 0.846 (0.938) data 0.000 (0.065) loss 1.0732 (1.0585) acc 75.0000 (71.2500) lr 4.6417e-04 eta 1:57:04 +epoch [36/50] batch [15/500] time 0.884 (0.914) data 0.000 (0.044) loss 0.6582 (0.9981) acc 84.3750 (73.1250) lr 4.6417e-04 eta 1:54:00 +epoch [36/50] batch [20/500] time 0.891 (0.914) data 0.000 (0.033) loss 1.8721 (1.0487) acc 62.5000 (72.8125) lr 4.6417e-04 eta 1:53:56 +epoch [36/50] batch [25/500] time 0.878 (0.907) data 0.000 (0.026) loss 0.9111 (1.0399) acc 71.8750 (72.5000) lr 4.6417e-04 eta 1:52:59 +epoch [36/50] batch [30/500] time 0.890 (0.905) data 0.000 (0.022) loss 0.8320 (1.0192) acc 78.1250 (73.4375) lr 4.6417e-04 eta 1:52:37 +epoch [36/50] batch [35/500] time 0.900 (0.902) data 0.000 (0.019) loss 1.4014 (1.0627) acc 59.3750 (72.5893) lr 4.6417e-04 eta 1:52:16 +epoch [36/50] batch [40/500] time 0.852 (0.900) data 0.000 (0.017) loss 0.9209 (1.0373) acc 81.2500 (73.0469) lr 4.6417e-04 eta 1:51:50 +epoch [36/50] batch [45/500] time 0.888 (0.897) data 0.000 (0.015) loss 0.8130 (1.0026) acc 71.8750 (73.8889) lr 4.6417e-04 eta 1:51:28 +epoch [36/50] batch [50/500] time 0.859 (0.896) data 0.000 (0.013) loss 1.0381 (0.9898) acc 75.0000 (74.1250) lr 4.6417e-04 eta 1:51:17 +epoch [36/50] batch [55/500] time 0.895 (0.895) data 0.000 (0.012) loss 1.0225 (1.0168) acc 78.1250 (73.8636) lr 4.6417e-04 eta 1:51:00 +epoch [36/50] batch [60/500] time 0.908 (0.896) data 0.000 (0.011) loss 1.2178 (1.0239) acc 75.0000 (73.6979) lr 4.6417e-04 eta 1:51:04 +epoch [36/50] batch [65/500] time 0.879 (0.895) data 0.000 (0.010) loss 0.7324 (1.0321) acc 81.2500 (73.7019) lr 4.6417e-04 eta 1:50:51 +epoch [36/50] batch [70/500] time 0.882 (0.894) data 0.000 (0.010) loss 1.5293 (1.0270) acc 62.5000 (73.7946) lr 4.6417e-04 eta 1:50:44 +epoch [36/50] batch [75/500] time 0.919 (0.894) data 0.000 (0.009) loss 1.0254 (1.0138) acc 78.1250 (74.1250) lr 4.6417e-04 eta 1:50:37 +epoch [36/50] batch [80/500] time 0.886 (0.893) data 0.000 (0.008) loss 1.2129 (1.0269) acc 78.1250 (74.0234) lr 4.6417e-04 eta 1:50:28 +epoch [36/50] batch [85/500] time 0.863 (0.892) data 0.000 (0.008) loss 0.7915 (1.0312) acc 81.2500 (73.9338) lr 4.6417e-04 eta 1:50:15 +epoch [36/50] batch [90/500] time 0.884 (0.892) data 0.000 (0.007) loss 0.8169 (1.0382) acc 81.2500 (73.9931) lr 4.6417e-04 eta 1:50:11 +epoch [36/50] batch [95/500] time 0.903 (0.892) data 0.000 (0.007) loss 1.2949 (1.0493) acc 68.7500 (73.6842) lr 4.6417e-04 eta 1:50:08 +epoch [36/50] batch [100/500] time 0.872 (0.892) data 0.000 (0.007) loss 1.0215 (1.0471) acc 75.0000 (73.8750) lr 4.6417e-04 eta 1:49:59 +epoch [36/50] batch [105/500] time 0.893 (0.892) data 0.000 (0.006) loss 0.4937 (1.0424) acc 84.3750 (73.9583) lr 4.6417e-04 eta 1:49:55 +epoch [36/50] batch [110/500] time 0.876 (0.891) data 0.000 (0.006) loss 0.7412 (1.0529) acc 81.2500 (73.6648) lr 4.6417e-04 eta 1:49:47 +epoch [36/50] batch [115/500] time 0.882 (0.891) data 0.000 (0.006) loss 1.3408 (1.0566) acc 78.1250 (73.7228) lr 4.6417e-04 eta 1:49:38 +epoch [36/50] batch [120/500] time 0.891 (0.891) data 0.000 (0.006) loss 0.9268 (1.0606) acc 78.1250 (73.6458) lr 4.6417e-04 eta 1:49:39 +epoch [36/50] batch [125/500] time 0.879 (0.891) data 0.000 (0.005) loss 0.6299 (1.0552) acc 81.2500 (73.6250) lr 4.6417e-04 eta 1:49:31 +epoch [36/50] batch [130/500] time 0.891 (0.891) data 0.000 (0.005) loss 1.5088 (1.0635) acc 68.7500 (73.4615) lr 4.6417e-04 eta 1:49:25 +epoch [36/50] batch [135/500] time 0.925 (0.891) data 0.000 (0.005) loss 1.5576 (1.0721) acc 62.5000 (73.4028) lr 4.6417e-04 eta 1:49:18 +epoch [36/50] batch [140/500] time 0.903 (0.891) data 0.000 (0.005) loss 1.4639 (1.0699) acc 71.8750 (73.5491) lr 4.6417e-04 eta 1:49:14 +epoch [36/50] batch [145/500] time 0.891 (0.890) data 0.000 (0.005) loss 0.7559 (1.0720) acc 71.8750 (73.3621) lr 4.6417e-04 eta 1:49:08 +epoch [36/50] batch [150/500] time 0.849 (0.890) data 0.000 (0.005) loss 1.3145 (1.0784) acc 65.6250 (73.2917) lr 4.6417e-04 eta 1:49:00 +epoch [36/50] batch [155/500] time 0.895 (0.889) data 0.000 (0.004) loss 0.7241 (1.0727) acc 81.2500 (73.4677) lr 4.6417e-04 eta 1:48:52 +epoch [36/50] batch [160/500] time 0.847 (0.889) data 0.000 (0.004) loss 1.3008 (1.0760) acc 65.6250 (73.3398) lr 4.6417e-04 eta 1:48:47 +epoch [36/50] batch [165/500] time 0.899 (0.889) data 0.000 (0.004) loss 0.8691 (1.0719) acc 75.0000 (73.3523) lr 4.6417e-04 eta 1:48:42 +epoch [36/50] batch [170/500] time 0.885 (0.889) data 0.000 (0.004) loss 1.1963 (1.0721) acc 65.6250 (73.4191) lr 4.6417e-04 eta 1:48:33 +epoch [36/50] batch [175/500] time 0.894 (0.888) data 0.000 (0.004) loss 1.2178 (1.0726) acc 71.8750 (73.5179) lr 4.6417e-04 eta 1:48:26 +epoch [36/50] batch [180/500] time 0.867 (0.888) data 0.000 (0.004) loss 1.1504 (1.0748) acc 68.7500 (73.4375) lr 4.6417e-04 eta 1:48:19 +epoch [36/50] batch [185/500] time 0.846 (0.888) data 0.000 (0.004) loss 1.2275 (1.0764) acc 68.7500 (73.4628) lr 4.6417e-04 eta 1:48:12 +epoch [36/50] batch [190/500] time 0.883 (0.888) data 0.000 (0.004) loss 0.5640 (1.0748) acc 81.2500 (73.4211) lr 4.6417e-04 eta 1:48:07 +epoch [36/50] batch [195/500] time 0.868 (0.887) data 0.000 (0.004) loss 0.8354 (1.0694) acc 78.1250 (73.5417) lr 4.6417e-04 eta 1:48:02 +epoch [36/50] batch [200/500] time 0.884 (0.888) data 0.000 (0.003) loss 0.9126 (1.0694) acc 81.2500 (73.4844) lr 4.6417e-04 eta 1:48:00 +epoch [36/50] batch [205/500] time 0.891 (0.888) data 0.000 (0.003) loss 1.0566 (1.0707) acc 68.7500 (73.3994) lr 4.6417e-04 eta 1:47:55 +epoch [36/50] batch [210/500] time 0.915 (0.888) data 0.000 (0.003) loss 1.0850 (1.0670) acc 71.8750 (73.4226) lr 4.6417e-04 eta 1:47:50 +epoch [36/50] batch [215/500] time 0.878 (0.888) data 0.000 (0.003) loss 1.0059 (1.0626) acc 71.8750 (73.5029) lr 4.6417e-04 eta 1:47:45 +epoch [36/50] batch [220/500] time 0.937 (0.888) data 0.000 (0.003) loss 1.1221 (1.0595) acc 62.5000 (73.5085) lr 4.6417e-04 eta 1:47:43 +epoch [36/50] batch [225/500] time 0.865 (0.888) data 0.000 (0.003) loss 0.8833 (1.0597) acc 75.0000 (73.5000) lr 4.6417e-04 eta 1:47:38 +epoch [36/50] batch [230/500] time 0.884 (0.888) data 0.000 (0.003) loss 1.0098 (1.0604) acc 68.7500 (73.4918) lr 4.6417e-04 eta 1:47:32 +epoch [36/50] batch [235/500] time 0.891 (0.888) data 0.000 (0.003) loss 0.9741 (1.0600) acc 78.1250 (73.5505) lr 4.6417e-04 eta 1:47:29 +epoch [36/50] batch [240/500] time 0.908 (0.888) data 0.000 (0.003) loss 0.8188 (1.0630) acc 87.5000 (73.5286) lr 4.6417e-04 eta 1:47:24 +epoch [36/50] batch [245/500] time 0.871 (0.888) data 0.000 (0.003) loss 1.3262 (1.0631) acc 71.8750 (73.4821) lr 4.6417e-04 eta 1:47:20 +epoch [36/50] batch [250/500] time 0.879 (0.888) data 0.000 (0.003) loss 1.6260 (1.0660) acc 62.5000 (73.4125) lr 4.6417e-04 eta 1:47:15 +epoch [36/50] batch [255/500] time 0.904 (0.887) data 0.000 (0.003) loss 0.8574 (1.0689) acc 81.2500 (73.3211) lr 4.6417e-04 eta 1:47:09 +epoch [36/50] batch [260/500] time 0.971 (0.888) data 0.000 (0.003) loss 0.6460 (1.0674) acc 81.2500 (73.2812) lr 4.6417e-04 eta 1:47:09 +epoch [36/50] batch [265/500] time 0.880 (0.888) data 0.000 (0.003) loss 1.2402 (1.0711) acc 71.8750 (73.2075) lr 4.6417e-04 eta 1:47:04 +epoch [36/50] batch [270/500] time 0.941 (0.888) data 0.000 (0.003) loss 0.9902 (1.0683) acc 68.7500 (73.2986) lr 4.6417e-04 eta 1:47:01 +epoch [36/50] batch [275/500] time 0.876 (0.888) data 0.000 (0.003) loss 1.0215 (1.0677) acc 62.5000 (73.2386) lr 4.6417e-04 eta 1:46:55 +epoch [36/50] batch [280/500] time 0.900 (0.888) data 0.000 (0.003) loss 0.9697 (1.0716) acc 75.0000 (73.1920) lr 4.6417e-04 eta 1:46:51 +epoch [36/50] batch [285/500] time 0.852 (0.888) data 0.000 (0.003) loss 0.9009 (1.0768) acc 75.0000 (73.0702) lr 4.6417e-04 eta 1:46:45 +epoch [36/50] batch [290/500] time 0.879 (0.888) data 0.000 (0.002) loss 0.7485 (1.0723) acc 81.2500 (73.1681) lr 4.6417e-04 eta 1:46:39 +epoch [36/50] batch [295/500] time 0.865 (0.887) data 0.000 (0.002) loss 1.2686 (1.0717) acc 65.6250 (73.1462) lr 4.6417e-04 eta 1:46:34 +epoch [36/50] batch [300/500] time 0.882 (0.887) data 0.000 (0.002) loss 1.4863 (1.0694) acc 56.2500 (73.1667) lr 4.6417e-04 eta 1:46:29 +epoch [36/50] batch [305/500] time 0.872 (0.888) data 0.000 (0.002) loss 0.9048 (1.0710) acc 81.2500 (73.2070) lr 4.6417e-04 eta 1:46:27 +epoch [36/50] batch [310/500] time 0.859 (0.888) data 0.000 (0.002) loss 0.7183 (1.0659) acc 78.1250 (73.3266) lr 4.6417e-04 eta 1:46:21 +epoch [36/50] batch [315/500] time 0.917 (0.888) data 0.000 (0.002) loss 1.3311 (1.0639) acc 68.7500 (73.3929) lr 4.6417e-04 eta 1:46:17 +epoch [36/50] batch [320/500] time 0.882 (0.887) data 0.000 (0.002) loss 0.7666 (1.0591) acc 81.2500 (73.4961) lr 4.6417e-04 eta 1:46:11 +epoch [36/50] batch [325/500] time 0.866 (0.887) data 0.000 (0.002) loss 0.7227 (1.0562) acc 81.2500 (73.5769) lr 4.6417e-04 eta 1:46:06 +epoch [36/50] batch [330/500] time 0.871 (0.887) data 0.000 (0.002) loss 1.0273 (1.0551) acc 75.0000 (73.6458) lr 4.6417e-04 eta 1:46:00 +epoch [36/50] batch [335/500] time 0.895 (0.887) data 0.000 (0.002) loss 0.9912 (1.0520) acc 68.7500 (73.7220) lr 4.6417e-04 eta 1:45:55 +epoch [36/50] batch [340/500] time 0.919 (0.887) data 0.000 (0.002) loss 1.3223 (1.0497) acc 56.2500 (73.7132) lr 4.6417e-04 eta 1:45:51 +epoch [36/50] batch [345/500] time 0.874 (0.887) data 0.000 (0.002) loss 1.0547 (1.0481) acc 71.8750 (73.7047) lr 4.6417e-04 eta 1:45:46 +epoch [36/50] batch [350/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.3604 (1.0481) acc 75.0000 (73.7857) lr 4.6417e-04 eta 1:45:41 +epoch [36/50] batch [355/500] time 0.878 (0.887) data 0.000 (0.002) loss 0.8784 (1.0481) acc 78.1250 (73.7764) lr 4.6417e-04 eta 1:45:35 +epoch [36/50] batch [360/500] time 0.903 (0.887) data 0.000 (0.002) loss 0.8179 (1.0470) acc 81.2500 (73.8194) lr 4.6417e-04 eta 1:45:30 +epoch [36/50] batch [365/500] time 0.909 (0.887) data 0.000 (0.002) loss 1.3232 (1.0490) acc 68.7500 (73.7842) lr 4.6417e-04 eta 1:45:26 +epoch [36/50] batch [370/500] time 0.867 (0.887) data 0.000 (0.002) loss 0.7041 (1.0480) acc 75.0000 (73.7669) lr 4.6417e-04 eta 1:45:21 +epoch [36/50] batch [375/500] time 0.870 (0.887) data 0.000 (0.002) loss 1.2559 (1.0437) acc 75.0000 (73.8917) lr 4.6417e-04 eta 1:45:17 +epoch [36/50] batch [380/500] time 0.861 (0.887) data 0.000 (0.002) loss 0.9810 (1.0446) acc 78.1250 (73.8816) lr 4.6417e-04 eta 1:45:14 +epoch [36/50] batch [385/500] time 0.869 (0.887) data 0.000 (0.002) loss 1.0322 (1.0451) acc 65.6250 (73.8880) lr 4.6417e-04 eta 1:45:09 +epoch [36/50] batch [390/500] time 0.900 (0.887) data 0.000 (0.002) loss 0.8359 (1.0426) acc 75.0000 (73.9103) lr 4.6417e-04 eta 1:45:05 +epoch [36/50] batch [395/500] time 0.872 (0.887) data 0.000 (0.002) loss 0.8716 (1.0400) acc 81.2500 (73.9557) lr 4.6417e-04 eta 1:45:02 +epoch [36/50] batch [400/500] time 0.906 (0.887) data 0.000 (0.002) loss 0.8218 (1.0366) acc 81.2500 (74.0547) lr 4.6417e-04 eta 1:44:58 +epoch [36/50] batch [405/500] time 0.890 (0.887) data 0.000 (0.002) loss 0.7803 (1.0363) acc 71.8750 (74.0123) lr 4.6417e-04 eta 1:44:56 +epoch [36/50] batch [410/500] time 0.909 (0.887) data 0.000 (0.002) loss 0.5454 (1.0340) acc 78.1250 (74.0244) lr 4.6417e-04 eta 1:44:51 +epoch [36/50] batch [415/500] time 0.884 (0.887) data 0.000 (0.002) loss 0.9941 (1.0314) acc 81.2500 (74.0813) lr 4.6417e-04 eta 1:44:47 +epoch [36/50] batch [420/500] time 0.906 (0.887) data 0.000 (0.002) loss 1.2871 (1.0320) acc 71.8750 (74.0774) lr 4.6417e-04 eta 1:44:43 +epoch [36/50] batch [425/500] time 0.908 (0.888) data 0.000 (0.002) loss 1.3545 (1.0331) acc 65.6250 (74.0809) lr 4.6417e-04 eta 1:44:39 +epoch [36/50] batch [430/500] time 0.869 (0.888) data 0.000 (0.002) loss 0.9316 (1.0324) acc 71.8750 (74.0770) lr 4.6417e-04 eta 1:44:34 +epoch [36/50] batch [435/500] time 0.907 (0.888) data 0.000 (0.002) loss 1.0293 (1.0336) acc 71.8750 (74.0158) lr 4.6417e-04 eta 1:44:31 +epoch [36/50] batch [440/500] time 0.865 (0.888) data 0.000 (0.002) loss 1.0312 (1.0340) acc 75.0000 (74.0412) lr 4.6417e-04 eta 1:44:26 +epoch [36/50] batch [445/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.1318 (1.0340) acc 71.8750 (74.0239) lr 4.6417e-04 eta 1:44:21 +epoch [36/50] batch [450/500] time 0.890 (0.888) data 0.000 (0.002) loss 1.0273 (1.0323) acc 71.8750 (74.0208) lr 4.6417e-04 eta 1:44:19 +epoch [36/50] batch [455/500] time 0.884 (0.888) data 0.000 (0.002) loss 1.4980 (1.0332) acc 59.3750 (73.9835) lr 4.6417e-04 eta 1:44:14 +epoch [36/50] batch [460/500] time 0.890 (0.888) data 0.000 (0.002) loss 1.5156 (1.0325) acc 53.1250 (73.9810) lr 4.6417e-04 eta 1:44:09 +epoch [36/50] batch [465/500] time 0.860 (0.888) data 0.000 (0.002) loss 1.1953 (1.0320) acc 71.8750 (73.9718) lr 4.6417e-04 eta 1:44:04 +epoch [36/50] batch [470/500] time 0.867 (0.888) data 0.000 (0.002) loss 1.4961 (1.0331) acc 65.6250 (73.9495) lr 4.6417e-04 eta 1:43:59 +epoch [36/50] batch [475/500] time 0.877 (0.887) data 0.000 (0.002) loss 1.0127 (1.0312) acc 78.1250 (73.9868) lr 4.6417e-04 eta 1:43:54 +epoch [36/50] batch [480/500] time 0.890 (0.887) data 0.000 (0.002) loss 0.9570 (1.0277) acc 75.0000 (74.0885) lr 4.6417e-04 eta 1:43:49 +epoch [36/50] batch [485/500] time 0.885 (0.887) data 0.000 (0.002) loss 0.7178 (1.0270) acc 84.3750 (74.0851) lr 4.6417e-04 eta 1:43:45 +epoch [36/50] batch [490/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.0684 (1.0272) acc 75.0000 (74.0625) lr 4.6417e-04 eta 1:43:40 +epoch [36/50] batch [495/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.7637 (1.0307) acc 59.3750 (74.0278) lr 4.6417e-04 eta 1:43:35 +epoch [36/50] batch [500/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.0967 (1.0309) acc 65.6250 (74.0000) lr 4.1221e-04 eta 1:43:30 +epoch [37/50] batch [5/500] time 0.868 (1.015) data 0.000 (0.141) loss 1.0156 (1.1576) acc 75.0000 (71.2500) lr 4.1221e-04 eta 1:58:19 +epoch [37/50] batch [10/500] time 0.883 (0.943) data 0.000 (0.070) loss 0.8252 (1.0399) acc 81.2500 (72.8125) lr 4.1221e-04 eta 1:49:50 +epoch [37/50] batch [15/500] time 0.911 (0.922) data 0.000 (0.047) loss 1.3604 (1.0266) acc 65.6250 (73.1250) lr 4.1221e-04 eta 1:47:17 +epoch [37/50] batch [20/500] time 0.909 (0.915) data 0.000 (0.035) loss 1.6143 (1.0686) acc 68.7500 (72.0312) lr 4.1221e-04 eta 1:46:23 +epoch [37/50] batch [25/500] time 0.861 (0.903) data 0.000 (0.028) loss 0.9639 (1.0626) acc 71.8750 (72.2500) lr 4.1221e-04 eta 1:44:59 +epoch [37/50] batch [30/500] time 0.882 (0.901) data 0.000 (0.024) loss 1.1123 (1.0744) acc 78.1250 (72.1875) lr 4.1221e-04 eta 1:44:37 +epoch [37/50] batch [35/500] time 0.920 (0.899) data 0.000 (0.020) loss 1.1865 (1.0554) acc 78.1250 (72.3214) lr 4.1221e-04 eta 1:44:22 +epoch [37/50] batch [40/500] time 0.881 (0.897) data 0.000 (0.018) loss 0.8657 (1.0536) acc 78.1250 (72.6562) lr 4.1221e-04 eta 1:44:01 +epoch [37/50] batch [45/500] time 0.869 (0.894) data 0.000 (0.016) loss 0.8198 (1.0512) acc 75.0000 (72.4306) lr 4.1221e-04 eta 1:43:40 +epoch [37/50] batch [50/500] time 0.879 (0.895) data 0.000 (0.014) loss 1.1943 (1.0628) acc 71.8750 (72.0000) lr 4.1221e-04 eta 1:43:41 +epoch [37/50] batch [55/500] time 0.882 (0.895) data 0.000 (0.013) loss 1.1436 (1.0703) acc 78.1250 (72.2727) lr 4.1221e-04 eta 1:43:33 +epoch [37/50] batch [60/500] time 0.890 (0.895) data 0.000 (0.012) loss 1.0479 (1.0781) acc 75.0000 (72.3438) lr 4.1221e-04 eta 1:43:28 +epoch [37/50] batch [65/500] time 0.879 (0.893) data 0.000 (0.011) loss 1.0312 (1.0703) acc 65.6250 (72.3558) lr 4.1221e-04 eta 1:43:15 +epoch [37/50] batch [70/500] time 0.854 (0.893) data 0.000 (0.010) loss 1.5254 (1.0706) acc 65.6250 (72.4554) lr 4.1221e-04 eta 1:43:08 +epoch [37/50] batch [75/500] time 0.871 (0.892) data 0.000 (0.010) loss 1.0342 (1.0637) acc 62.5000 (72.6667) lr 4.1221e-04 eta 1:42:56 +epoch [37/50] batch [80/500] time 0.875 (0.891) data 0.000 (0.009) loss 1.1924 (1.0572) acc 62.5000 (72.5391) lr 4.1221e-04 eta 1:42:48 +epoch [37/50] batch [85/500] time 0.922 (0.891) data 0.000 (0.008) loss 0.6279 (1.0561) acc 78.1250 (72.3162) lr 4.1221e-04 eta 1:42:44 +epoch [37/50] batch [90/500] time 1.003 (0.893) data 0.000 (0.008) loss 1.2051 (1.0546) acc 71.8750 (72.4653) lr 4.1221e-04 eta 1:42:48 +epoch [37/50] batch [95/500] time 0.865 (0.892) data 0.000 (0.008) loss 1.2246 (1.0592) acc 71.8750 (72.4342) lr 4.1221e-04 eta 1:42:36 +epoch [37/50] batch [100/500] time 0.886 (0.891) data 0.000 (0.007) loss 0.7700 (1.0600) acc 71.8750 (72.4062) lr 4.1221e-04 eta 1:42:26 +epoch [37/50] batch [105/500] time 0.905 (0.890) data 0.000 (0.007) loss 1.1055 (1.0604) acc 71.8750 (72.6190) lr 4.1221e-04 eta 1:42:18 +epoch [37/50] batch [110/500] time 0.908 (0.890) data 0.000 (0.007) loss 1.2168 (1.0538) acc 68.7500 (72.8125) lr 4.1221e-04 eta 1:42:13 +epoch [37/50] batch [115/500] time 0.876 (0.890) data 0.000 (0.006) loss 1.6660 (1.0529) acc 68.7500 (72.7989) lr 4.1221e-04 eta 1:42:05 +epoch [37/50] batch [120/500] time 0.900 (0.889) data 0.000 (0.006) loss 1.2129 (1.0494) acc 84.3750 (72.9688) lr 4.1221e-04 eta 1:41:59 +epoch [37/50] batch [125/500] time 0.903 (0.889) data 0.000 (0.006) loss 1.1211 (1.0495) acc 68.7500 (72.9750) lr 4.1221e-04 eta 1:41:53 +epoch [37/50] batch [130/500] time 0.879 (0.889) data 0.000 (0.006) loss 1.3203 (1.0586) acc 59.3750 (72.8125) lr 4.1221e-04 eta 1:41:49 +epoch [37/50] batch [135/500] time 0.866 (0.890) data 0.000 (0.005) loss 1.1113 (1.0518) acc 65.6250 (73.0324) lr 4.1221e-04 eta 1:41:50 +epoch [37/50] batch [140/500] time 0.898 (0.890) data 0.000 (0.005) loss 1.0303 (1.0519) acc 75.0000 (73.1027) lr 4.1221e-04 eta 1:41:46 +epoch [37/50] batch [145/500] time 0.870 (0.890) data 0.000 (0.005) loss 1.5957 (1.0527) acc 59.3750 (73.0388) lr 4.1221e-04 eta 1:41:39 +epoch [37/50] batch [150/500] time 0.879 (0.890) data 0.000 (0.005) loss 0.7261 (1.0441) acc 81.2500 (73.2708) lr 4.1221e-04 eta 1:41:33 +epoch [37/50] batch [155/500] time 0.910 (0.890) data 0.000 (0.005) loss 0.9385 (1.0431) acc 71.8750 (73.2258) lr 4.1221e-04 eta 1:41:28 +epoch [37/50] batch [160/500] time 0.872 (0.889) data 0.000 (0.005) loss 1.3135 (1.0425) acc 59.3750 (73.2422) lr 4.1221e-04 eta 1:41:22 +epoch [37/50] batch [165/500] time 0.849 (0.888) data 0.000 (0.004) loss 1.5811 (1.0508) acc 65.6250 (73.0682) lr 4.1221e-04 eta 1:41:11 +epoch [37/50] batch [170/500] time 0.907 (0.889) data 0.000 (0.004) loss 0.9443 (1.0522) acc 78.1250 (73.1434) lr 4.1221e-04 eta 1:41:09 +epoch [37/50] batch [175/500] time 0.896 (0.888) data 0.000 (0.004) loss 0.5962 (1.0460) acc 84.3750 (73.2679) lr 4.1221e-04 eta 1:41:03 +epoch [37/50] batch [180/500] time 0.885 (0.888) data 0.000 (0.004) loss 0.6416 (1.0461) acc 87.5000 (73.3681) lr 4.1221e-04 eta 1:40:55 +epoch [37/50] batch [185/500] time 0.875 (0.888) data 0.000 (0.004) loss 1.1016 (1.0453) acc 71.8750 (73.3615) lr 4.1221e-04 eta 1:40:51 +epoch [37/50] batch [190/500] time 0.862 (0.888) data 0.000 (0.004) loss 1.3770 (1.0519) acc 71.8750 (73.2895) lr 4.1221e-04 eta 1:40:47 +epoch [37/50] batch [195/500] time 0.859 (0.888) data 0.000 (0.004) loss 1.5918 (1.0550) acc 68.7500 (73.3013) lr 4.1221e-04 eta 1:40:39 +epoch [37/50] batch [200/500] time 0.884 (0.887) data 0.000 (0.004) loss 0.8389 (1.0525) acc 75.0000 (73.3281) lr 4.1221e-04 eta 1:40:33 +epoch [37/50] batch [205/500] time 0.889 (0.887) data 0.000 (0.004) loss 1.6221 (1.0502) acc 65.6250 (73.4146) lr 4.1221e-04 eta 1:40:29 +epoch [37/50] batch [210/500] time 0.885 (0.887) data 0.000 (0.004) loss 1.0859 (1.0524) acc 75.0000 (73.3482) lr 4.1221e-04 eta 1:40:24 +epoch [37/50] batch [215/500] time 0.883 (0.887) data 0.000 (0.004) loss 1.0850 (1.0502) acc 78.1250 (73.4302) lr 4.1221e-04 eta 1:40:19 +epoch [37/50] batch [220/500] time 0.910 (0.887) data 0.000 (0.003) loss 0.6318 (1.0440) acc 84.3750 (73.6364) lr 4.1221e-04 eta 1:40:15 +epoch [37/50] batch [225/500] time 0.883 (0.887) data 0.000 (0.003) loss 1.3115 (1.0457) acc 75.0000 (73.5972) lr 4.1221e-04 eta 1:40:11 +epoch [37/50] batch [230/500] time 0.871 (0.887) data 0.000 (0.003) loss 0.8340 (1.0441) acc 68.7500 (73.5190) lr 4.1221e-04 eta 1:40:04 +epoch [37/50] batch [235/500] time 0.884 (0.887) data 0.000 (0.003) loss 1.6318 (1.0429) acc 59.3750 (73.4707) lr 4.1221e-04 eta 1:40:00 +epoch [37/50] batch [240/500] time 0.895 (0.887) data 0.000 (0.003) loss 1.1758 (1.0402) acc 71.8750 (73.5547) lr 4.1221e-04 eta 1:39:56 +epoch [37/50] batch [245/500] time 0.896 (0.887) data 0.000 (0.003) loss 0.7266 (1.0397) acc 84.3750 (73.6097) lr 4.1221e-04 eta 1:39:52 +epoch [37/50] batch [250/500] time 0.866 (0.887) data 0.000 (0.003) loss 0.6812 (1.0365) acc 84.3750 (73.6375) lr 4.1221e-04 eta 1:39:47 +epoch [37/50] batch [255/500] time 0.909 (0.887) data 0.000 (0.003) loss 0.7075 (1.0379) acc 75.0000 (73.5907) lr 4.1221e-04 eta 1:39:45 +epoch [37/50] batch [260/500] time 0.855 (0.887) data 0.000 (0.003) loss 0.4468 (1.0346) acc 90.6250 (73.6418) lr 4.1221e-04 eta 1:39:40 +epoch [37/50] batch [265/500] time 0.897 (0.887) data 0.000 (0.003) loss 0.6426 (1.0332) acc 87.5000 (73.6792) lr 4.1221e-04 eta 1:39:34 +epoch [37/50] batch [270/500] time 0.887 (0.887) data 0.000 (0.003) loss 0.7563 (1.0318) acc 87.5000 (73.7384) lr 4.1221e-04 eta 1:39:29 +epoch [37/50] batch [275/500] time 0.891 (0.887) data 0.000 (0.003) loss 0.9868 (1.0328) acc 78.1250 (73.7273) lr 4.1221e-04 eta 1:39:27 +epoch [37/50] batch [280/500] time 0.874 (0.888) data 0.000 (0.003) loss 1.0322 (1.0307) acc 75.0000 (73.7612) lr 4.1221e-04 eta 1:39:25 +epoch [37/50] batch [285/500] time 0.871 (0.888) data 0.000 (0.003) loss 0.6128 (1.0294) acc 87.5000 (73.7500) lr 4.1221e-04 eta 1:39:20 +epoch [37/50] batch [290/500] time 0.885 (0.888) data 0.000 (0.003) loss 0.9873 (1.0328) acc 71.8750 (73.6207) lr 4.1221e-04 eta 1:39:15 +epoch [37/50] batch [295/500] time 0.859 (0.888) data 0.000 (0.003) loss 1.5049 (1.0373) acc 53.1250 (73.4428) lr 4.1221e-04 eta 1:39:11 +epoch [37/50] batch [300/500] time 0.887 (0.887) data 0.000 (0.003) loss 0.9775 (1.0341) acc 71.8750 (73.5104) lr 4.1221e-04 eta 1:39:06 +epoch [37/50] batch [305/500] time 0.915 (0.888) data 0.000 (0.003) loss 1.5566 (1.0351) acc 68.7500 (73.5041) lr 4.1221e-04 eta 1:39:02 +epoch [37/50] batch [310/500] time 0.894 (0.888) data 0.000 (0.002) loss 0.8711 (1.0335) acc 78.1250 (73.5081) lr 4.1221e-04 eta 1:38:59 +epoch [37/50] batch [315/500] time 0.915 (0.888) data 0.000 (0.002) loss 1.3789 (1.0351) acc 75.0000 (73.5119) lr 4.1221e-04 eta 1:38:55 +epoch [37/50] batch [320/500] time 0.860 (0.888) data 0.000 (0.002) loss 0.5830 (1.0304) acc 90.6250 (73.6621) lr 4.1221e-04 eta 1:38:49 +epoch [37/50] batch [325/500] time 0.901 (0.888) data 0.000 (0.002) loss 1.1230 (1.0343) acc 71.8750 (73.5481) lr 4.1221e-04 eta 1:38:44 +epoch [37/50] batch [330/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.1348 (1.0331) acc 78.1250 (73.6080) lr 4.1221e-04 eta 1:38:39 +epoch [37/50] batch [335/500] time 0.857 (0.887) data 0.000 (0.002) loss 0.8218 (1.0332) acc 81.2500 (73.5728) lr 4.1221e-04 eta 1:38:35 +epoch [37/50] batch [340/500] time 0.916 (0.888) data 0.000 (0.002) loss 1.1377 (1.0343) acc 78.1250 (73.5938) lr 4.1221e-04 eta 1:38:32 +epoch [37/50] batch [345/500] time 0.850 (0.888) data 0.000 (0.002) loss 0.8184 (1.0340) acc 71.8750 (73.5870) lr 4.1221e-04 eta 1:38:26 +epoch [37/50] batch [350/500] time 0.873 (0.887) data 0.000 (0.002) loss 1.4092 (1.0348) acc 62.5000 (73.5804) lr 4.1221e-04 eta 1:38:20 +epoch [37/50] batch [355/500] time 0.899 (0.887) data 0.000 (0.002) loss 0.8330 (1.0345) acc 78.1250 (73.6092) lr 4.1221e-04 eta 1:38:16 +epoch [37/50] batch [360/500] time 0.907 (0.888) data 0.000 (0.002) loss 1.2314 (1.0375) acc 78.1250 (73.5503) lr 4.1221e-04 eta 1:38:13 +epoch [37/50] batch [365/500] time 0.884 (0.888) data 0.000 (0.002) loss 0.7134 (1.0339) acc 87.5000 (73.6387) lr 4.1221e-04 eta 1:38:10 +epoch [37/50] batch [370/500] time 0.865 (0.888) data 0.000 (0.002) loss 0.4817 (1.0319) acc 84.3750 (73.6655) lr 4.1221e-04 eta 1:38:04 +epoch [37/50] batch [375/500] time 0.903 (0.888) data 0.000 (0.002) loss 1.0068 (1.0312) acc 84.3750 (73.7333) lr 4.1221e-04 eta 1:38:00 +epoch [37/50] batch [380/500] time 0.915 (0.888) data 0.000 (0.002) loss 1.0049 (1.0316) acc 65.6250 (73.7500) lr 4.1221e-04 eta 1:37:57 +epoch [37/50] batch [385/500] time 0.854 (0.888) data 0.000 (0.002) loss 0.8877 (1.0318) acc 68.7500 (73.7013) lr 4.1221e-04 eta 1:37:53 +epoch [37/50] batch [390/500] time 0.870 (0.888) data 0.000 (0.002) loss 1.0996 (1.0351) acc 78.1250 (73.6859) lr 4.1221e-04 eta 1:37:47 +epoch [37/50] batch [395/500] time 0.870 (0.888) data 0.000 (0.002) loss 0.8770 (1.0336) acc 75.0000 (73.7421) lr 4.1221e-04 eta 1:37:42 +epoch [37/50] batch [400/500] time 0.900 (0.887) data 0.000 (0.002) loss 1.4941 (1.0333) acc 68.7500 (73.7812) lr 4.1221e-04 eta 1:37:36 +epoch [37/50] batch [405/500] time 0.874 (0.887) data 0.000 (0.002) loss 0.8262 (1.0324) acc 78.1250 (73.8040) lr 4.1221e-04 eta 1:37:32 +epoch [37/50] batch [410/500] time 0.858 (0.887) data 0.000 (0.002) loss 0.9702 (1.0332) acc 75.0000 (73.8491) lr 4.1221e-04 eta 1:37:28 +epoch [37/50] batch [415/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.4775 (1.0335) acc 68.7500 (73.8404) lr 4.1221e-04 eta 1:37:22 +epoch [37/50] batch [420/500] time 1.012 (0.887) data 0.000 (0.002) loss 0.9468 (1.0337) acc 65.6250 (73.8095) lr 4.1221e-04 eta 1:37:18 +epoch [37/50] batch [425/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.5557 (1.0354) acc 59.3750 (73.7721) lr 4.1221e-04 eta 1:37:14 +epoch [37/50] batch [430/500] time 0.864 (0.887) data 0.000 (0.002) loss 1.2793 (1.0343) acc 75.0000 (73.8299) lr 4.1221e-04 eta 1:37:09 +epoch [37/50] batch [435/500] time 0.880 (0.887) data 0.000 (0.002) loss 0.9829 (1.0356) acc 71.8750 (73.7931) lr 4.1221e-04 eta 1:37:04 +epoch [37/50] batch [440/500] time 0.847 (0.887) data 0.000 (0.002) loss 1.4336 (1.0334) acc 59.3750 (73.8210) lr 4.1221e-04 eta 1:36:59 +epoch [37/50] batch [445/500] time 0.862 (0.887) data 0.000 (0.002) loss 1.1543 (1.0329) acc 75.0000 (73.8343) lr 4.1221e-04 eta 1:36:53 +epoch [37/50] batch [450/500] time 0.856 (0.887) data 0.000 (0.002) loss 0.9658 (1.0342) acc 71.8750 (73.8125) lr 4.1221e-04 eta 1:36:48 +epoch [37/50] batch [455/500] time 0.872 (0.887) data 0.000 (0.002) loss 0.9976 (1.0358) acc 87.5000 (73.8187) lr 4.1221e-04 eta 1:36:44 +epoch [37/50] batch [460/500] time 0.848 (0.887) data 0.000 (0.002) loss 0.5928 (1.0366) acc 87.5000 (73.8247) lr 4.1221e-04 eta 1:36:38 +epoch [37/50] batch [465/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.0547 (1.0377) acc 71.8750 (73.8374) lr 4.1221e-04 eta 1:36:34 +epoch [37/50] batch [470/500] time 0.910 (0.887) data 0.000 (0.002) loss 1.0723 (1.0369) acc 78.1250 (73.8364) lr 4.1221e-04 eta 1:36:29 +epoch [37/50] batch [475/500] time 0.866 (0.887) data 0.000 (0.002) loss 1.8145 (1.0377) acc 65.6250 (73.8487) lr 4.1221e-04 eta 1:36:24 +epoch [37/50] batch [480/500] time 0.888 (0.886) data 0.000 (0.002) loss 0.9360 (1.0355) acc 71.8750 (73.8802) lr 4.1221e-04 eta 1:36:19 +epoch [37/50] batch [485/500] time 0.874 (0.886) data 0.000 (0.002) loss 0.6860 (1.0345) acc 75.0000 (73.9046) lr 4.1221e-04 eta 1:36:14 +epoch [37/50] batch [490/500] time 0.867 (0.886) data 0.000 (0.002) loss 1.3887 (1.0332) acc 65.6250 (73.9413) lr 4.1221e-04 eta 1:36:09 +epoch [37/50] batch [495/500] time 0.875 (0.886) data 0.000 (0.002) loss 1.2412 (1.0341) acc 75.0000 (73.9331) lr 4.1221e-04 eta 1:36:05 +epoch [37/50] batch [500/500] time 0.878 (0.886) data 0.000 (0.002) loss 0.9292 (1.0347) acc 81.2500 (73.9125) lr 3.6258e-04 eta 1:36:01 +epoch [38/50] batch [5/500] time 0.879 (1.023) data 0.000 (0.140) loss 0.8701 (0.9125) acc 75.0000 (75.6250) lr 3.6258e-04 eta 1:50:41 +epoch [38/50] batch [10/500] time 0.871 (0.954) data 0.000 (0.070) loss 0.8179 (0.8657) acc 78.1250 (77.5000) lr 3.6258e-04 eta 1:43:11 +epoch [38/50] batch [15/500] time 0.884 (0.937) data 0.000 (0.047) loss 1.1494 (0.8815) acc 62.5000 (76.2500) lr 3.6258e-04 eta 1:41:16 +epoch [38/50] batch [20/500] time 0.893 (0.920) data 0.000 (0.035) loss 1.0439 (0.8903) acc 71.8750 (76.4062) lr 3.6258e-04 eta 1:39:23 +epoch [38/50] batch [25/500] time 0.901 (0.915) data 0.000 (0.028) loss 1.0713 (0.9079) acc 59.3750 (75.2500) lr 3.6258e-04 eta 1:38:46 +epoch [38/50] batch [30/500] time 0.897 (0.911) data 0.000 (0.023) loss 1.5684 (0.9826) acc 59.3750 (74.1667) lr 3.6258e-04 eta 1:38:12 +epoch [38/50] batch [35/500] time 0.884 (0.905) data 0.000 (0.020) loss 0.9492 (0.9897) acc 78.1250 (74.1071) lr 3.6258e-04 eta 1:37:32 +epoch [38/50] batch [40/500] time 0.859 (0.901) data 0.000 (0.018) loss 0.7373 (0.9745) acc 75.0000 (74.2188) lr 3.6258e-04 eta 1:36:59 +epoch [38/50] batch [45/500] time 0.875 (0.898) data 0.000 (0.016) loss 1.3916 (0.9777) acc 68.7500 (74.0278) lr 3.6258e-04 eta 1:36:35 +epoch [38/50] batch [50/500] time 0.908 (0.897) data 0.000 (0.014) loss 1.1162 (0.9947) acc 65.6250 (73.9375) lr 3.6258e-04 eta 1:36:25 +epoch [38/50] batch [55/500] time 0.859 (0.895) data 0.000 (0.013) loss 0.3374 (0.9817) acc 96.8750 (74.8864) lr 3.6258e-04 eta 1:36:07 +epoch [38/50] batch [60/500] time 0.892 (0.894) data 0.000 (0.012) loss 0.7993 (0.9765) acc 81.2500 (74.9479) lr 3.6258e-04 eta 1:35:59 +epoch [38/50] batch [65/500] time 0.879 (0.893) data 0.000 (0.011) loss 0.9341 (0.9977) acc 75.0000 (74.5192) lr 3.6258e-04 eta 1:35:46 +epoch [38/50] batch [70/500] time 0.942 (0.894) data 0.000 (0.010) loss 1.2012 (1.0052) acc 71.8750 (74.4643) lr 3.6258e-04 eta 1:35:49 +epoch [38/50] batch [75/500] time 0.883 (0.895) data 0.000 (0.010) loss 0.9365 (1.0097) acc 65.6250 (74.2083) lr 3.6258e-04 eta 1:35:49 +epoch [38/50] batch [80/500] time 0.877 (0.895) data 0.000 (0.009) loss 1.4531 (1.0183) acc 62.5000 (74.0625) lr 3.6258e-04 eta 1:35:42 +epoch [38/50] batch [85/500] time 0.905 (0.894) data 0.000 (0.008) loss 1.4863 (1.0266) acc 65.6250 (73.8603) lr 3.6258e-04 eta 1:35:37 +epoch [38/50] batch [90/500] time 0.898 (0.894) data 0.000 (0.008) loss 0.7261 (1.0087) acc 84.3750 (74.4444) lr 3.6258e-04 eta 1:35:29 +epoch [38/50] batch [95/500] time 0.907 (0.894) data 0.000 (0.008) loss 1.2266 (1.0122) acc 71.8750 (74.4737) lr 3.6258e-04 eta 1:35:24 +epoch [38/50] batch [100/500] time 0.898 (0.893) data 0.000 (0.007) loss 0.6646 (1.0029) acc 81.2500 (74.6562) lr 3.6258e-04 eta 1:35:17 +epoch [38/50] batch [105/500] time 0.847 (0.892) data 0.000 (0.007) loss 0.7012 (1.0001) acc 78.1250 (74.6726) lr 3.6258e-04 eta 1:35:05 +epoch [38/50] batch [110/500] time 0.888 (0.891) data 0.000 (0.007) loss 1.2207 (1.0120) acc 71.8750 (74.4318) lr 3.6258e-04 eta 1:34:55 +epoch [38/50] batch [115/500] time 0.958 (0.891) data 0.000 (0.006) loss 1.5127 (1.0178) acc 75.0000 (74.3750) lr 3.6258e-04 eta 1:34:52 +epoch [38/50] batch [120/500] time 0.900 (0.891) data 0.000 (0.006) loss 1.0049 (1.0157) acc 71.8750 (74.4271) lr 3.6258e-04 eta 1:34:47 +epoch [38/50] batch [125/500] time 0.889 (0.891) data 0.000 (0.006) loss 1.0547 (1.0184) acc 68.7500 (74.2750) lr 3.6258e-04 eta 1:34:38 +epoch [38/50] batch [130/500] time 0.911 (0.890) data 0.000 (0.006) loss 1.5449 (1.0263) acc 71.8750 (74.0865) lr 3.6258e-04 eta 1:34:29 +epoch [38/50] batch [135/500] time 0.887 (0.890) data 0.000 (0.005) loss 1.0322 (1.0256) acc 71.8750 (74.0741) lr 3.6258e-04 eta 1:34:26 +epoch [38/50] batch [140/500] time 0.894 (0.890) data 0.000 (0.005) loss 1.1055 (1.0223) acc 68.7500 (74.0625) lr 3.6258e-04 eta 1:34:21 +epoch [38/50] batch [145/500] time 0.879 (0.890) data 0.000 (0.005) loss 1.0996 (1.0246) acc 68.7500 (73.8147) lr 3.6258e-04 eta 1:34:17 +epoch [38/50] batch [150/500] time 0.862 (0.890) data 0.000 (0.005) loss 0.6953 (1.0181) acc 87.5000 (73.9792) lr 3.6258e-04 eta 1:34:10 +epoch [38/50] batch [155/500] time 0.883 (0.890) data 0.000 (0.005) loss 0.6802 (1.0160) acc 75.0000 (74.0121) lr 3.6258e-04 eta 1:34:05 +epoch [38/50] batch [160/500] time 0.903 (0.890) data 0.000 (0.005) loss 0.9912 (1.0115) acc 78.1250 (74.0820) lr 3.6258e-04 eta 1:33:59 +epoch [38/50] batch [165/500] time 0.912 (0.890) data 0.000 (0.004) loss 1.4922 (1.0145) acc 62.5000 (74.0341) lr 3.6258e-04 eta 1:33:55 +epoch [38/50] batch [170/500] time 0.906 (0.889) data 0.000 (0.004) loss 1.4492 (1.0159) acc 75.0000 (74.0074) lr 3.6258e-04 eta 1:33:49 +epoch [38/50] batch [175/500] time 0.890 (0.889) data 0.000 (0.004) loss 1.1904 (1.0141) acc 59.3750 (74.0536) lr 3.6258e-04 eta 1:33:43 +epoch [38/50] batch [180/500] time 0.909 (0.889) data 0.000 (0.004) loss 0.8589 (1.0139) acc 75.0000 (73.9931) lr 3.6258e-04 eta 1:33:40 +epoch [38/50] batch [185/500] time 0.876 (0.889) data 0.000 (0.004) loss 0.6782 (1.0138) acc 81.2500 (74.0541) lr 3.6258e-04 eta 1:33:34 +epoch [38/50] batch [190/500] time 0.893 (0.889) data 0.000 (0.004) loss 0.7983 (1.0137) acc 81.2500 (74.0296) lr 3.6258e-04 eta 1:33:27 +epoch [38/50] batch [195/500] time 0.915 (0.889) data 0.000 (0.004) loss 0.9106 (1.0099) acc 75.0000 (74.1506) lr 3.6258e-04 eta 1:33:22 +epoch [38/50] batch [200/500] time 0.902 (0.889) data 0.000 (0.004) loss 1.2998 (1.0141) acc 68.7500 (74.0938) lr 3.6258e-04 eta 1:33:20 +epoch [38/50] batch [205/500] time 0.858 (0.889) data 0.000 (0.004) loss 0.9717 (1.0122) acc 65.6250 (74.1463) lr 3.6258e-04 eta 1:33:14 +epoch [38/50] batch [210/500] time 0.871 (0.889) data 0.000 (0.004) loss 0.8174 (1.0091) acc 78.1250 (74.2113) lr 3.6258e-04 eta 1:33:09 +epoch [38/50] batch [215/500] time 0.869 (0.889) data 0.000 (0.003) loss 1.1279 (1.0130) acc 68.7500 (74.2006) lr 3.6258e-04 eta 1:33:07 +epoch [38/50] batch [220/500] time 0.859 (0.889) data 0.000 (0.003) loss 1.1475 (1.0138) acc 68.7500 (74.1477) lr 3.6258e-04 eta 1:33:02 +epoch [38/50] batch [225/500] time 0.883 (0.889) data 0.000 (0.003) loss 2.0215 (1.0145) acc 53.1250 (74.0694) lr 3.6258e-04 eta 1:32:56 +epoch [38/50] batch [230/500] time 0.903 (0.889) data 0.000 (0.003) loss 1.5029 (1.0156) acc 68.7500 (74.1712) lr 3.6258e-04 eta 1:32:50 +epoch [38/50] batch [235/500] time 0.872 (0.888) data 0.000 (0.003) loss 1.2852 (1.0187) acc 71.8750 (74.0957) lr 3.6258e-04 eta 1:32:45 +epoch [38/50] batch [240/500] time 0.861 (0.888) data 0.000 (0.003) loss 0.7476 (1.0165) acc 71.8750 (74.1797) lr 3.6258e-04 eta 1:32:39 +epoch [38/50] batch [245/500] time 0.894 (0.888) data 0.000 (0.003) loss 1.1709 (1.0145) acc 75.0000 (74.1837) lr 3.6258e-04 eta 1:32:34 +epoch [38/50] batch [250/500] time 0.912 (0.888) data 0.000 (0.003) loss 0.6602 (1.0133) acc 81.2500 (74.2000) lr 3.6258e-04 eta 1:32:30 +epoch [38/50] batch [255/500] time 0.874 (0.888) data 0.000 (0.003) loss 0.6191 (1.0094) acc 81.2500 (74.3260) lr 3.6258e-04 eta 1:32:23 +epoch [38/50] batch [260/500] time 0.885 (0.888) data 0.000 (0.003) loss 0.9663 (1.0099) acc 81.2500 (74.3029) lr 3.6258e-04 eta 1:32:22 +epoch [38/50] batch [265/500] time 0.888 (0.888) data 0.000 (0.003) loss 1.3818 (1.0149) acc 68.7500 (74.2217) lr 3.6258e-04 eta 1:32:17 +epoch [38/50] batch [270/500] time 0.866 (0.888) data 0.000 (0.003) loss 1.0771 (1.0142) acc 75.0000 (74.2708) lr 3.6258e-04 eta 1:32:12 +epoch [38/50] batch [275/500] time 0.875 (0.888) data 0.000 (0.003) loss 0.6162 (1.0117) acc 81.2500 (74.3068) lr 3.6258e-04 eta 1:32:04 +epoch [38/50] batch [280/500] time 0.880 (0.888) data 0.000 (0.003) loss 0.6792 (1.0108) acc 87.5000 (74.3527) lr 3.6258e-04 eta 1:32:00 +epoch [38/50] batch [285/500] time 0.888 (0.887) data 0.000 (0.003) loss 0.8267 (1.0053) acc 78.1250 (74.4189) lr 3.6258e-04 eta 1:31:54 +epoch [38/50] batch [290/500] time 0.857 (0.887) data 0.000 (0.003) loss 0.8052 (1.0061) acc 75.0000 (74.4289) lr 3.6258e-04 eta 1:31:48 +epoch [38/50] batch [295/500] time 0.866 (0.887) data 0.000 (0.003) loss 0.7969 (1.0051) acc 81.2500 (74.3962) lr 3.6258e-04 eta 1:31:42 +epoch [38/50] batch [300/500] time 0.890 (0.887) data 0.000 (0.003) loss 1.0459 (1.0057) acc 68.7500 (74.3854) lr 3.6258e-04 eta 1:31:38 +epoch [38/50] batch [305/500] time 0.891 (0.887) data 0.000 (0.003) loss 1.0625 (1.0064) acc 78.1250 (74.3955) lr 3.6258e-04 eta 1:31:33 +epoch [38/50] batch [310/500] time 0.862 (0.887) data 0.000 (0.002) loss 0.8823 (1.0095) acc 81.2500 (74.3649) lr 3.6258e-04 eta 1:31:28 +epoch [38/50] batch [315/500] time 0.909 (0.887) data 0.000 (0.002) loss 1.6191 (1.0121) acc 59.3750 (74.3353) lr 3.6258e-04 eta 1:31:25 +epoch [38/50] batch [320/500] time 0.856 (0.887) data 0.000 (0.002) loss 0.7300 (1.0121) acc 78.1250 (74.3359) lr 3.6258e-04 eta 1:31:19 +epoch [38/50] batch [325/500] time 0.908 (0.887) data 0.000 (0.002) loss 1.0488 (1.0124) acc 68.7500 (74.3365) lr 3.6258e-04 eta 1:31:15 +epoch [38/50] batch [330/500] time 0.934 (0.887) data 0.000 (0.002) loss 0.9692 (1.0111) acc 68.7500 (74.2992) lr 3.6258e-04 eta 1:31:12 +epoch [38/50] batch [335/500] time 0.874 (0.887) data 0.000 (0.002) loss 0.5083 (1.0112) acc 90.6250 (74.2631) lr 3.6258e-04 eta 1:31:07 +epoch [38/50] batch [340/500] time 0.908 (0.887) data 0.000 (0.002) loss 1.4229 (1.0148) acc 68.7500 (74.2096) lr 3.6258e-04 eta 1:31:04 +epoch [38/50] batch [345/500] time 0.882 (0.887) data 0.000 (0.002) loss 1.1309 (1.0142) acc 87.5000 (74.2754) lr 3.6258e-04 eta 1:31:00 +epoch [38/50] batch [350/500] time 0.870 (0.887) data 0.000 (0.002) loss 1.2002 (1.0131) acc 71.8750 (74.2946) lr 3.6258e-04 eta 1:30:54 +epoch [38/50] batch [355/500] time 0.871 (0.887) data 0.000 (0.002) loss 1.4160 (1.0132) acc 65.6250 (74.3046) lr 3.6258e-04 eta 1:30:50 +epoch [38/50] batch [360/500] time 0.849 (0.887) data 0.000 (0.002) loss 0.6973 (1.0138) acc 81.2500 (74.3229) lr 3.6258e-04 eta 1:30:46 +epoch [38/50] batch [365/500] time 0.873 (0.887) data 0.000 (0.002) loss 0.8853 (1.0116) acc 78.1250 (74.3664) lr 3.6258e-04 eta 1:30:41 +epoch [38/50] batch [370/500] time 0.902 (0.887) data 0.000 (0.002) loss 0.4758 (1.0100) acc 84.3750 (74.4003) lr 3.6258e-04 eta 1:30:36 +epoch [38/50] batch [375/500] time 0.880 (0.887) data 0.000 (0.002) loss 1.4414 (1.0112) acc 71.8750 (74.4000) lr 3.6258e-04 eta 1:30:33 +epoch [38/50] batch [380/500] time 0.891 (0.887) data 0.000 (0.002) loss 1.7168 (1.0123) acc 56.2500 (74.3092) lr 3.6258e-04 eta 1:30:28 +epoch [38/50] batch [385/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.5156 (1.0130) acc 68.7500 (74.2938) lr 3.6258e-04 eta 1:30:24 +epoch [38/50] batch [390/500] time 0.862 (0.887) data 0.000 (0.002) loss 1.3848 (1.0166) acc 71.8750 (74.1987) lr 3.6258e-04 eta 1:30:19 +epoch [38/50] batch [395/500] time 0.874 (0.887) data 0.000 (0.002) loss 2.0605 (1.0206) acc 53.1250 (74.0823) lr 3.6258e-04 eta 1:30:15 +epoch [38/50] batch [400/500] time 0.898 (0.887) data 0.000 (0.002) loss 0.8877 (1.0202) acc 78.1250 (74.1172) lr 3.6258e-04 eta 1:30:12 +epoch [38/50] batch [405/500] time 0.925 (0.888) data 0.000 (0.002) loss 0.9927 (1.0229) acc 78.1250 (74.0741) lr 3.6258e-04 eta 1:30:09 +epoch [38/50] batch [410/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.2051 (1.0235) acc 71.8750 (74.0701) lr 3.6258e-04 eta 1:30:04 +epoch [38/50] batch [415/500] time 0.909 (0.888) data 0.000 (0.002) loss 1.2529 (1.0246) acc 65.6250 (74.0211) lr 3.6258e-04 eta 1:30:00 +epoch [38/50] batch [420/500] time 0.857 (0.887) data 0.000 (0.002) loss 0.9370 (1.0253) acc 71.8750 (73.9881) lr 3.6258e-04 eta 1:29:55 +epoch [38/50] batch [425/500] time 0.898 (0.888) data 0.000 (0.002) loss 0.7139 (1.0222) acc 87.5000 (74.0735) lr 3.6258e-04 eta 1:29:51 +epoch [38/50] batch [430/500] time 0.897 (0.888) data 0.000 (0.002) loss 1.4238 (1.0222) acc 68.7500 (74.0552) lr 3.6258e-04 eta 1:29:47 +epoch [38/50] batch [435/500] time 0.891 (0.888) data 0.000 (0.002) loss 0.4919 (1.0213) acc 90.6250 (74.0876) lr 3.6258e-04 eta 1:29:43 +epoch [38/50] batch [440/500] time 0.877 (0.887) data 0.000 (0.002) loss 0.6816 (1.0213) acc 81.2500 (74.0838) lr 3.6258e-04 eta 1:29:38 +epoch [38/50] batch [445/500] time 0.904 (0.888) data 0.000 (0.002) loss 1.4980 (1.0242) acc 65.6250 (74.0730) lr 3.6258e-04 eta 1:29:34 +epoch [38/50] batch [450/500] time 0.888 (0.887) data 0.000 (0.002) loss 0.9419 (1.0232) acc 84.3750 (74.0764) lr 3.6258e-04 eta 1:29:29 +epoch [38/50] batch [455/500] time 0.900 (0.887) data 0.000 (0.002) loss 0.7217 (1.0240) acc 84.3750 (74.0728) lr 3.6258e-04 eta 1:29:24 +epoch [38/50] batch [460/500] time 0.867 (0.887) data 0.000 (0.002) loss 1.1650 (1.0240) acc 75.0000 (74.0761) lr 3.6258e-04 eta 1:29:19 +epoch [38/50] batch [465/500] time 0.849 (0.887) data 0.000 (0.002) loss 1.3809 (1.0252) acc 65.6250 (74.0323) lr 3.6258e-04 eta 1:29:14 +epoch [38/50] batch [470/500] time 0.909 (0.887) data 0.000 (0.002) loss 1.1436 (1.0281) acc 75.0000 (73.9694) lr 3.6258e-04 eta 1:29:10 +epoch [38/50] batch [475/500] time 0.866 (0.887) data 0.000 (0.002) loss 0.7378 (1.0271) acc 75.0000 (73.9474) lr 3.6258e-04 eta 1:29:05 +epoch [38/50] batch [480/500] time 0.909 (0.887) data 0.000 (0.002) loss 1.0391 (1.0274) acc 81.2500 (73.9453) lr 3.6258e-04 eta 1:29:00 +epoch [38/50] batch [485/500] time 0.894 (0.887) data 0.000 (0.002) loss 1.6123 (1.0275) acc 50.0000 (73.9175) lr 3.6258e-04 eta 1:28:56 +epoch [38/50] batch [490/500] time 0.907 (0.887) data 0.000 (0.002) loss 0.7251 (1.0270) acc 81.2500 (73.9541) lr 3.6258e-04 eta 1:28:51 +epoch [38/50] batch [495/500] time 0.908 (0.887) data 0.000 (0.002) loss 1.2588 (1.0284) acc 75.0000 (73.9520) lr 3.6258e-04 eta 1:28:47 +epoch [38/50] batch [500/500] time 0.954 (0.887) data 0.000 (0.002) loss 0.5840 (1.0275) acc 81.2500 (73.9562) lr 3.1545e-04 eta 1:28:42 +epoch [39/50] batch [5/500] time 0.885 (1.016) data 0.000 (0.129) loss 1.7178 (1.2555) acc 62.5000 (71.2500) lr 3.1545e-04 eta 1:41:31 +epoch [39/50] batch [10/500] time 0.883 (0.943) data 0.000 (0.065) loss 0.5156 (0.9835) acc 87.5000 (76.5625) lr 3.1545e-04 eta 1:34:06 +epoch [39/50] batch [15/500] time 0.881 (0.921) data 0.000 (0.043) loss 0.7056 (0.9484) acc 81.2500 (77.2917) lr 3.1545e-04 eta 1:31:50 +epoch [39/50] batch [20/500] time 0.900 (0.911) data 0.000 (0.032) loss 1.0645 (0.9542) acc 65.6250 (76.7188) lr 3.1545e-04 eta 1:30:47 +epoch [39/50] batch [25/500] time 0.883 (0.906) data 0.000 (0.026) loss 1.1338 (0.9214) acc 75.0000 (77.3750) lr 3.1545e-04 eta 1:30:10 +epoch [39/50] batch [30/500] time 0.871 (0.900) data 0.000 (0.022) loss 2.1387 (0.9899) acc 53.1250 (76.4583) lr 3.1545e-04 eta 1:29:31 +epoch [39/50] batch [35/500] time 0.891 (0.898) data 0.000 (0.019) loss 0.6196 (0.9920) acc 84.3750 (76.4286) lr 3.1545e-04 eta 1:29:18 +epoch [39/50] batch [40/500] time 0.891 (0.897) data 0.000 (0.016) loss 0.8989 (1.0212) acc 78.1250 (75.5469) lr 3.1545e-04 eta 1:29:05 +epoch [39/50] batch [45/500] time 0.896 (0.897) data 0.000 (0.015) loss 1.7217 (1.0444) acc 62.5000 (75.0694) lr 3.1545e-04 eta 1:29:01 +epoch [39/50] batch [50/500] time 0.908 (0.897) data 0.000 (0.013) loss 1.1963 (1.0387) acc 75.0000 (74.8125) lr 3.1545e-04 eta 1:28:59 +epoch [39/50] batch [55/500] time 0.887 (0.897) data 0.000 (0.012) loss 0.8149 (1.0253) acc 75.0000 (75.1136) lr 3.1545e-04 eta 1:28:50 +epoch [39/50] batch [60/500] time 0.993 (0.897) data 0.000 (0.011) loss 0.8843 (1.0132) acc 71.8750 (75.0521) lr 3.1545e-04 eta 1:28:48 +epoch [39/50] batch [65/500] time 0.872 (0.895) data 0.000 (0.010) loss 0.7959 (1.0114) acc 81.2500 (74.9519) lr 3.1545e-04 eta 1:28:32 +epoch [39/50] batch [70/500] time 0.858 (0.894) data 0.000 (0.009) loss 0.9883 (1.0164) acc 75.0000 (75.0000) lr 3.1545e-04 eta 1:28:22 +epoch [39/50] batch [75/500] time 0.863 (0.893) data 0.000 (0.009) loss 0.7598 (1.0251) acc 81.2500 (75.0417) lr 3.1545e-04 eta 1:28:08 +epoch [39/50] batch [80/500] time 0.853 (0.891) data 0.000 (0.008) loss 1.1680 (1.0297) acc 71.8750 (74.8047) lr 3.1545e-04 eta 1:27:55 +epoch [39/50] batch [85/500] time 0.896 (0.890) data 0.000 (0.008) loss 1.0342 (1.0276) acc 75.0000 (74.8897) lr 3.1545e-04 eta 1:27:47 +epoch [39/50] batch [90/500] time 0.866 (0.890) data 0.000 (0.007) loss 1.2578 (1.0382) acc 78.1250 (74.8611) lr 3.1545e-04 eta 1:27:41 +epoch [39/50] batch [95/500] time 0.884 (0.890) data 0.000 (0.007) loss 0.6807 (1.0298) acc 78.1250 (74.9013) lr 3.1545e-04 eta 1:27:33 +epoch [39/50] batch [100/500] time 0.895 (0.890) data 0.000 (0.007) loss 1.5801 (1.0286) acc 71.8750 (74.8750) lr 3.1545e-04 eta 1:27:28 +epoch [39/50] batch [105/500] time 0.865 (0.890) data 0.000 (0.006) loss 0.6875 (1.0235) acc 78.1250 (74.7024) lr 3.1545e-04 eta 1:27:28 +epoch [39/50] batch [110/500] time 0.869 (0.890) data 0.000 (0.006) loss 0.6572 (1.0298) acc 81.2500 (74.4886) lr 3.1545e-04 eta 1:27:19 +epoch [39/50] batch [115/500] time 0.874 (0.889) data 0.000 (0.006) loss 0.9482 (1.0240) acc 84.3750 (74.5924) lr 3.1545e-04 eta 1:27:13 +epoch [39/50] batch [120/500] time 0.913 (0.889) data 0.000 (0.006) loss 1.5518 (1.0267) acc 65.6250 (74.5312) lr 3.1545e-04 eta 1:27:10 +epoch [39/50] batch [125/500] time 0.873 (0.889) data 0.000 (0.005) loss 1.3643 (1.0338) acc 56.2500 (74.2500) lr 3.1545e-04 eta 1:27:04 +epoch [39/50] batch [130/500] time 0.914 (0.889) data 0.000 (0.005) loss 1.0625 (1.0379) acc 81.2500 (74.2788) lr 3.1545e-04 eta 1:27:01 +epoch [39/50] batch [135/500] time 0.874 (0.889) data 0.000 (0.005) loss 0.9229 (1.0280) acc 78.1250 (74.4676) lr 3.1545e-04 eta 1:26:54 +epoch [39/50] batch [140/500] time 0.901 (0.889) data 0.000 (0.005) loss 0.5210 (1.0171) acc 87.5000 (74.7545) lr 3.1545e-04 eta 1:26:50 +epoch [39/50] batch [145/500] time 0.864 (0.889) data 0.000 (0.005) loss 2.2109 (1.0237) acc 46.8750 (74.5905) lr 3.1545e-04 eta 1:26:43 +epoch [39/50] batch [150/500] time 0.855 (0.888) data 0.000 (0.005) loss 1.0576 (1.0271) acc 81.2500 (74.6667) lr 3.1545e-04 eta 1:26:33 +epoch [39/50] batch [155/500] time 0.894 (0.888) data 0.000 (0.004) loss 0.5137 (1.0227) acc 87.5000 (74.7379) lr 3.1545e-04 eta 1:26:27 +epoch [39/50] batch [160/500] time 0.867 (0.887) data 0.000 (0.004) loss 0.9121 (1.0222) acc 78.1250 (74.7070) lr 3.1545e-04 eta 1:26:22 +epoch [39/50] batch [165/500] time 0.873 (0.887) data 0.000 (0.004) loss 1.1318 (1.0238) acc 78.1250 (74.6780) lr 3.1545e-04 eta 1:26:16 +epoch [39/50] batch [170/500] time 0.863 (0.886) data 0.000 (0.004) loss 0.7275 (1.0259) acc 78.1250 (74.6324) lr 3.1545e-04 eta 1:26:08 +epoch [39/50] batch [175/500] time 0.904 (0.887) data 0.000 (0.004) loss 0.9082 (1.0267) acc 87.5000 (74.6250) lr 3.1545e-04 eta 1:26:06 +epoch [39/50] batch [180/500] time 0.877 (0.887) data 0.000 (0.004) loss 1.0312 (1.0311) acc 75.0000 (74.5139) lr 3.1545e-04 eta 1:26:02 +epoch [39/50] batch [185/500] time 0.869 (0.887) data 0.000 (0.004) loss 0.4797 (1.0287) acc 84.3750 (74.5101) lr 3.1545e-04 eta 1:25:55 +epoch [39/50] batch [190/500] time 0.881 (0.887) data 0.000 (0.004) loss 0.7192 (1.0247) acc 78.1250 (74.5724) lr 3.1545e-04 eta 1:25:51 +epoch [39/50] batch [195/500] time 0.881 (0.886) data 0.000 (0.004) loss 1.0596 (1.0206) acc 78.1250 (74.6795) lr 3.1545e-04 eta 1:25:43 +epoch [39/50] batch [200/500] time 0.884 (0.886) data 0.000 (0.003) loss 1.6533 (1.0267) acc 62.5000 (74.4844) lr 3.1545e-04 eta 1:25:37 +epoch [39/50] batch [205/500] time 0.888 (0.886) data 0.000 (0.003) loss 1.2520 (1.0302) acc 68.7500 (74.3902) lr 3.1545e-04 eta 1:25:35 +epoch [39/50] batch [210/500] time 0.884 (0.886) data 0.000 (0.003) loss 1.3350 (1.0268) acc 65.6250 (74.4940) lr 3.1545e-04 eta 1:25:30 +epoch [39/50] batch [215/500] time 0.900 (0.886) data 0.000 (0.003) loss 1.0732 (1.0274) acc 71.8750 (74.4622) lr 3.1545e-04 eta 1:25:27 +epoch [39/50] batch [220/500] time 0.856 (0.886) data 0.000 (0.003) loss 0.8193 (1.0310) acc 78.1250 (74.4034) lr 3.1545e-04 eta 1:25:20 +epoch [39/50] batch [225/500] time 0.905 (0.886) data 0.000 (0.003) loss 0.4980 (1.0261) acc 84.3750 (74.4167) lr 3.1545e-04 eta 1:25:17 +epoch [39/50] batch [230/500] time 0.908 (0.886) data 0.000 (0.003) loss 1.8613 (1.0307) acc 62.5000 (74.2799) lr 3.1545e-04 eta 1:25:12 +epoch [39/50] batch [235/500] time 0.855 (0.886) data 0.000 (0.003) loss 0.6396 (1.0279) acc 81.2500 (74.3085) lr 3.1545e-04 eta 1:25:07 +epoch [39/50] batch [240/500] time 0.870 (0.886) data 0.000 (0.003) loss 0.6748 (1.0233) acc 84.3750 (74.4661) lr 3.1545e-04 eta 1:25:03 +epoch [39/50] batch [245/500] time 0.855 (0.886) data 0.000 (0.003) loss 0.4980 (1.0238) acc 78.1250 (74.4260) lr 3.1545e-04 eta 1:24:59 +epoch [39/50] batch [250/500] time 0.907 (0.886) data 0.000 (0.003) loss 1.3350 (1.0236) acc 75.0000 (74.4875) lr 3.1545e-04 eta 1:24:56 +epoch [39/50] batch [255/500] time 0.857 (0.886) data 0.000 (0.003) loss 0.8550 (1.0231) acc 75.0000 (74.5221) lr 3.1545e-04 eta 1:24:51 +epoch [39/50] batch [260/500] time 0.868 (0.886) data 0.000 (0.003) loss 0.7705 (1.0218) acc 81.2500 (74.4471) lr 3.1545e-04 eta 1:24:47 +epoch [39/50] batch [265/500] time 0.862 (0.886) data 0.000 (0.003) loss 0.9526 (1.0193) acc 81.2500 (74.4222) lr 3.1545e-04 eta 1:24:41 +epoch [39/50] batch [270/500] time 0.871 (0.886) data 0.000 (0.003) loss 0.6182 (1.0171) acc 84.3750 (74.4676) lr 3.1545e-04 eta 1:24:37 +epoch [39/50] batch [275/500] time 0.889 (0.886) data 0.000 (0.003) loss 0.8999 (1.0190) acc 84.3750 (74.4205) lr 3.1545e-04 eta 1:24:32 +epoch [39/50] batch [280/500] time 0.872 (0.886) data 0.000 (0.003) loss 1.2490 (1.0199) acc 71.8750 (74.3750) lr 3.1545e-04 eta 1:24:26 +epoch [39/50] batch [285/500] time 0.894 (0.886) data 0.000 (0.002) loss 0.6426 (1.0177) acc 81.2500 (74.4079) lr 3.1545e-04 eta 1:24:22 +epoch [39/50] batch [290/500] time 0.876 (0.886) data 0.001 (0.002) loss 1.0918 (1.0175) acc 78.1250 (74.3858) lr 3.1545e-04 eta 1:24:17 +epoch [39/50] batch [295/500] time 0.900 (0.886) data 0.000 (0.002) loss 1.2754 (1.0171) acc 68.7500 (74.3750) lr 3.1545e-04 eta 1:24:14 +epoch [39/50] batch [300/500] time 0.877 (0.886) data 0.000 (0.002) loss 0.8052 (1.0157) acc 78.1250 (74.3958) lr 3.1545e-04 eta 1:24:11 +epoch [39/50] batch [305/500] time 0.866 (0.886) data 0.000 (0.002) loss 0.8931 (1.0152) acc 75.0000 (74.4160) lr 3.1545e-04 eta 1:24:06 +epoch [39/50] batch [310/500] time 0.859 (0.886) data 0.000 (0.002) loss 1.0615 (1.0143) acc 75.0000 (74.4355) lr 3.1545e-04 eta 1:24:01 +epoch [39/50] batch [315/500] time 0.895 (0.886) data 0.000 (0.002) loss 1.1211 (1.0139) acc 75.0000 (74.3849) lr 3.1545e-04 eta 1:23:56 +epoch [39/50] batch [320/500] time 0.889 (0.886) data 0.000 (0.002) loss 1.3555 (1.0145) acc 71.8750 (74.3652) lr 3.1545e-04 eta 1:23:51 +epoch [39/50] batch [325/500] time 0.889 (0.886) data 0.000 (0.002) loss 0.8135 (1.0135) acc 75.0000 (74.3654) lr 3.1545e-04 eta 1:23:47 +epoch [39/50] batch [330/500] time 0.913 (0.886) data 0.000 (0.002) loss 1.2002 (1.0152) acc 75.0000 (74.3750) lr 3.1545e-04 eta 1:23:43 +epoch [39/50] batch [335/500] time 0.886 (0.886) data 0.000 (0.002) loss 1.0762 (1.0153) acc 78.1250 (74.3377) lr 3.1545e-04 eta 1:23:39 +epoch [39/50] batch [340/500] time 0.868 (0.886) data 0.000 (0.002) loss 1.3906 (1.0140) acc 62.5000 (74.3199) lr 3.1545e-04 eta 1:23:34 +epoch [39/50] batch [345/500] time 0.900 (0.886) data 0.000 (0.002) loss 1.0771 (1.0120) acc 75.0000 (74.3841) lr 3.1545e-04 eta 1:23:30 +epoch [39/50] batch [350/500] time 0.851 (0.886) data 0.000 (0.002) loss 0.9292 (1.0115) acc 81.2500 (74.3839) lr 3.1545e-04 eta 1:23:27 +epoch [39/50] batch [355/500] time 0.913 (0.886) data 0.000 (0.002) loss 1.0654 (1.0094) acc 75.0000 (74.4278) lr 3.1545e-04 eta 1:23:23 +epoch [39/50] batch [360/500] time 0.880 (0.886) data 0.000 (0.002) loss 0.6470 (1.0104) acc 75.0000 (74.3924) lr 3.1545e-04 eta 1:23:18 +epoch [39/50] batch [365/500] time 0.900 (0.886) data 0.000 (0.002) loss 0.7158 (1.0122) acc 84.3750 (74.3921) lr 3.1545e-04 eta 1:23:15 +epoch [39/50] batch [370/500] time 0.849 (0.886) data 0.000 (0.002) loss 1.6055 (1.0143) acc 71.8750 (74.3750) lr 3.1545e-04 eta 1:23:10 +epoch [39/50] batch [375/500] time 0.871 (0.886) data 0.000 (0.002) loss 1.4375 (1.0128) acc 71.8750 (74.4333) lr 3.1545e-04 eta 1:23:05 +epoch [39/50] batch [380/500] time 0.877 (0.886) data 0.000 (0.002) loss 0.8257 (1.0140) acc 78.1250 (74.4572) lr 3.1545e-04 eta 1:23:00 +epoch [39/50] batch [385/500] time 0.891 (0.886) data 0.000 (0.002) loss 0.8745 (1.0133) acc 75.0000 (74.4805) lr 3.1545e-04 eta 1:22:56 +epoch [39/50] batch [390/500] time 0.982 (0.887) data 0.000 (0.002) loss 0.9546 (1.0126) acc 75.0000 (74.4712) lr 3.1545e-04 eta 1:22:54 +epoch [39/50] batch [395/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.0078 (1.0139) acc 68.7500 (74.4462) lr 3.1545e-04 eta 1:22:49 +epoch [39/50] batch [400/500] time 0.880 (0.887) data 0.000 (0.002) loss 1.0059 (1.0123) acc 68.7500 (74.4453) lr 3.1545e-04 eta 1:22:45 +epoch [39/50] batch [405/500] time 0.852 (0.887) data 0.000 (0.002) loss 1.2090 (1.0139) acc 65.6250 (74.4136) lr 3.1545e-04 eta 1:22:40 +epoch [39/50] batch [410/500] time 0.890 (0.887) data 0.000 (0.002) loss 1.3037 (1.0133) acc 68.7500 (74.4131) lr 3.1545e-04 eta 1:22:35 +epoch [39/50] batch [415/500] time 0.864 (0.886) data 0.000 (0.002) loss 1.0303 (1.0146) acc 65.6250 (74.3599) lr 3.1545e-04 eta 1:22:30 +epoch [39/50] batch [420/500] time 0.908 (0.886) data 0.000 (0.002) loss 0.7959 (1.0131) acc 78.1250 (74.4271) lr 3.1545e-04 eta 1:22:25 +epoch [39/50] batch [425/500] time 0.876 (0.886) data 0.000 (0.002) loss 0.2166 (1.0140) acc 93.7500 (74.4485) lr 3.1545e-04 eta 1:22:20 +epoch [39/50] batch [430/500] time 0.845 (0.886) data 0.000 (0.002) loss 1.1260 (1.0143) acc 68.7500 (74.4113) lr 3.1545e-04 eta 1:22:15 +epoch [39/50] batch [435/500] time 0.884 (0.886) data 0.000 (0.002) loss 0.9702 (1.0156) acc 68.7500 (74.3678) lr 3.1545e-04 eta 1:22:10 +epoch [39/50] batch [440/500] time 0.873 (0.886) data 0.000 (0.002) loss 0.4641 (1.0150) acc 90.6250 (74.3750) lr 3.1545e-04 eta 1:22:06 +epoch [39/50] batch [445/500] time 0.880 (0.886) data 0.000 (0.002) loss 0.5762 (1.0135) acc 84.3750 (74.4031) lr 3.1545e-04 eta 1:22:01 +epoch [39/50] batch [450/500] time 0.874 (0.886) data 0.000 (0.002) loss 1.2236 (1.0131) acc 68.7500 (74.4444) lr 3.1545e-04 eta 1:21:57 +epoch [39/50] batch [455/500] time 0.917 (0.886) data 0.000 (0.002) loss 0.9937 (1.0129) acc 81.2500 (74.4299) lr 3.1545e-04 eta 1:21:53 +epoch [39/50] batch [460/500] time 0.864 (0.886) data 0.000 (0.002) loss 1.3213 (1.0139) acc 78.1250 (74.4361) lr 3.1545e-04 eta 1:21:48 +epoch [39/50] batch [465/500] time 0.851 (0.886) data 0.000 (0.002) loss 0.8965 (1.0133) acc 75.0000 (74.4355) lr 3.1545e-04 eta 1:21:43 +epoch [39/50] batch [470/500] time 0.908 (0.886) data 0.000 (0.002) loss 0.6719 (1.0121) acc 87.5000 (74.5013) lr 3.1545e-04 eta 1:21:40 +epoch [39/50] batch [475/500] time 0.893 (0.886) data 0.000 (0.002) loss 0.5469 (1.0112) acc 87.5000 (74.4868) lr 3.1545e-04 eta 1:21:35 +epoch [39/50] batch [480/500] time 0.863 (0.886) data 0.000 (0.002) loss 0.7207 (1.0104) acc 78.1250 (74.4987) lr 3.1545e-04 eta 1:21:31 +epoch [39/50] batch [485/500] time 0.927 (0.886) data 0.000 (0.002) loss 1.2021 (1.0106) acc 71.8750 (74.4845) lr 3.1545e-04 eta 1:21:26 +epoch [39/50] batch [490/500] time 0.913 (0.886) data 0.000 (0.002) loss 0.9702 (1.0087) acc 84.3750 (74.5599) lr 3.1545e-04 eta 1:21:23 +epoch [39/50] batch [495/500] time 0.877 (0.886) data 0.000 (0.002) loss 0.9468 (1.0085) acc 65.6250 (74.5518) lr 3.1545e-04 eta 1:21:19 +epoch [39/50] batch [500/500] time 0.881 (0.886) data 0.000 (0.002) loss 1.5498 (1.0086) acc 62.5000 (74.5375) lr 2.7103e-04 eta 1:21:15 +epoch [40/50] batch [5/500] time 0.865 (1.005) data 0.000 (0.124) loss 1.1123 (1.0179) acc 71.8750 (74.3750) lr 2.7103e-04 eta 1:32:00 +epoch [40/50] batch [10/500] time 0.865 (0.947) data 0.000 (0.062) loss 0.6567 (0.9583) acc 90.6250 (75.6250) lr 2.7103e-04 eta 1:26:36 +epoch [40/50] batch [15/500] time 0.907 (0.930) data 0.000 (0.041) loss 0.9307 (1.0557) acc 75.0000 (73.9583) lr 2.7103e-04 eta 1:25:03 +epoch [40/50] batch [20/500] time 0.903 (0.921) data 0.000 (0.031) loss 1.1104 (1.0346) acc 78.1250 (75.0000) lr 2.7103e-04 eta 1:24:09 +epoch [40/50] batch [25/500] time 0.859 (0.910) data 0.000 (0.025) loss 1.1143 (1.0841) acc 65.6250 (73.3750) lr 2.7103e-04 eta 1:23:04 +epoch [40/50] batch [30/500] time 0.901 (0.907) data 0.000 (0.021) loss 1.2168 (1.0629) acc 78.1250 (74.4792) lr 2.7103e-04 eta 1:22:38 +epoch [40/50] batch [35/500] time 0.899 (0.902) data 0.000 (0.018) loss 1.3838 (1.1132) acc 65.6250 (73.9286) lr 2.7103e-04 eta 1:22:09 +epoch [40/50] batch [40/500] time 0.901 (0.900) data 0.000 (0.016) loss 1.2607 (1.1022) acc 75.0000 (73.6719) lr 2.7103e-04 eta 1:21:56 +epoch [40/50] batch [45/500] time 0.981 (0.901) data 0.000 (0.014) loss 1.4395 (1.1086) acc 68.7500 (73.4028) lr 2.7103e-04 eta 1:21:54 +epoch [40/50] batch [50/500] time 0.896 (0.899) data 0.000 (0.013) loss 0.4861 (1.0956) acc 84.3750 (73.1875) lr 2.7103e-04 eta 1:21:40 +epoch [40/50] batch [55/500] time 0.887 (0.898) data 0.000 (0.011) loss 0.9365 (1.0824) acc 75.0000 (73.1250) lr 2.7103e-04 eta 1:21:28 +epoch [40/50] batch [60/500] time 0.844 (0.896) data 0.000 (0.011) loss 0.7852 (1.0781) acc 78.1250 (73.1771) lr 2.7103e-04 eta 1:21:13 +epoch [40/50] batch [65/500] time 0.877 (0.894) data 0.000 (0.010) loss 1.3779 (1.0803) acc 59.3750 (72.6923) lr 2.7103e-04 eta 1:21:00 +epoch [40/50] batch [70/500] time 0.899 (0.895) data 0.000 (0.009) loss 1.0088 (1.0786) acc 71.8750 (72.7679) lr 2.7103e-04 eta 1:20:57 +epoch [40/50] batch [75/500] time 0.872 (0.893) data 0.000 (0.008) loss 0.8584 (1.0669) acc 78.1250 (72.8333) lr 2.7103e-04 eta 1:20:43 +epoch [40/50] batch [80/500] time 0.882 (0.892) data 0.000 (0.008) loss 0.5557 (1.0566) acc 84.3750 (73.0469) lr 2.7103e-04 eta 1:20:33 +epoch [40/50] batch [85/500] time 0.895 (0.891) data 0.000 (0.007) loss 1.0391 (1.0551) acc 68.7500 (73.0147) lr 2.7103e-04 eta 1:20:24 +epoch [40/50] batch [90/500] time 0.879 (0.891) data 0.000 (0.007) loss 0.8633 (1.0610) acc 81.2500 (73.2292) lr 2.7103e-04 eta 1:20:22 +epoch [40/50] batch [95/500] time 0.887 (0.892) data 0.000 (0.007) loss 0.6025 (1.0509) acc 78.1250 (73.2237) lr 2.7103e-04 eta 1:20:18 +epoch [40/50] batch [100/500] time 0.889 (0.891) data 0.000 (0.006) loss 0.6338 (1.0469) acc 78.1250 (73.1875) lr 2.7103e-04 eta 1:20:12 +epoch [40/50] batch [105/500] time 0.880 (0.891) data 0.000 (0.006) loss 1.0215 (1.0442) acc 75.0000 (73.4821) lr 2.7103e-04 eta 1:20:06 +epoch [40/50] batch [110/500] time 0.884 (0.890) data 0.000 (0.006) loss 1.1348 (1.0402) acc 71.8750 (73.2670) lr 2.7103e-04 eta 1:19:57 +epoch [40/50] batch [115/500] time 0.871 (0.890) data 0.000 (0.006) loss 0.9619 (1.0374) acc 75.0000 (73.3967) lr 2.7103e-04 eta 1:19:51 +epoch [40/50] batch [120/500] time 0.866 (0.889) data 0.000 (0.005) loss 1.0098 (1.0331) acc 75.0000 (73.4115) lr 2.7103e-04 eta 1:19:44 +epoch [40/50] batch [125/500] time 0.907 (0.890) data 0.000 (0.005) loss 0.9434 (1.0349) acc 75.0000 (73.3750) lr 2.7103e-04 eta 1:19:43 +epoch [40/50] batch [130/500] time 0.871 (0.889) data 0.000 (0.005) loss 0.8179 (1.0419) acc 81.2500 (73.3173) lr 2.7103e-04 eta 1:19:36 +epoch [40/50] batch [135/500] time 0.888 (0.889) data 0.000 (0.005) loss 0.8174 (1.0428) acc 78.1250 (73.3102) lr 2.7103e-04 eta 1:19:31 +epoch [40/50] batch [140/500] time 0.876 (0.889) data 0.000 (0.005) loss 0.9053 (1.0411) acc 84.3750 (73.3929) lr 2.7103e-04 eta 1:19:25 +epoch [40/50] batch [145/500] time 0.860 (0.889) data 0.000 (0.004) loss 1.3096 (1.0443) acc 62.5000 (73.3190) lr 2.7103e-04 eta 1:19:18 +epoch [40/50] batch [150/500] time 0.856 (0.888) data 0.000 (0.004) loss 1.3105 (1.0447) acc 62.5000 (73.3125) lr 2.7103e-04 eta 1:19:11 +epoch [40/50] batch [155/500] time 0.897 (0.888) data 0.000 (0.004) loss 1.4238 (1.0509) acc 59.3750 (73.1452) lr 2.7103e-04 eta 1:19:06 +epoch [40/50] batch [160/500] time 0.915 (0.888) data 0.000 (0.004) loss 0.9370 (1.0497) acc 81.2500 (73.1641) lr 2.7103e-04 eta 1:19:02 +epoch [40/50] batch [165/500] time 0.888 (0.889) data 0.000 (0.004) loss 0.8906 (1.0445) acc 75.0000 (73.2576) lr 2.7103e-04 eta 1:19:01 +epoch [40/50] batch [170/500] time 0.873 (0.889) data 0.000 (0.004) loss 0.3557 (1.0383) acc 90.6250 (73.4743) lr 2.7103e-04 eta 1:18:55 +epoch [40/50] batch [175/500] time 0.879 (0.889) data 0.000 (0.004) loss 1.4482 (1.0380) acc 65.6250 (73.5357) lr 2.7103e-04 eta 1:18:51 +epoch [40/50] batch [180/500] time 0.858 (0.888) data 0.000 (0.004) loss 1.0947 (1.0442) acc 71.8750 (73.3333) lr 2.7103e-04 eta 1:18:45 +epoch [40/50] batch [185/500] time 0.872 (0.888) data 0.000 (0.004) loss 1.3096 (1.0404) acc 71.8750 (73.4966) lr 2.7103e-04 eta 1:18:42 +epoch [40/50] batch [190/500] time 0.878 (0.889) data 0.000 (0.003) loss 1.3281 (1.0395) acc 62.5000 (73.3882) lr 2.7103e-04 eta 1:18:40 +epoch [40/50] batch [195/500] time 0.904 (0.889) data 0.000 (0.003) loss 0.7026 (1.0417) acc 78.1250 (73.4455) lr 2.7103e-04 eta 1:18:35 +epoch [40/50] batch [200/500] time 0.888 (0.889) data 0.000 (0.003) loss 1.3838 (1.0492) acc 56.2500 (73.2969) lr 2.7103e-04 eta 1:18:29 +epoch [40/50] batch [205/500] time 0.886 (0.888) data 0.000 (0.003) loss 0.4216 (1.0490) acc 90.6250 (73.3384) lr 2.7103e-04 eta 1:18:24 +epoch [40/50] batch [210/500] time 0.894 (0.889) data 0.000 (0.003) loss 1.0127 (1.0523) acc 78.1250 (73.3036) lr 2.7103e-04 eta 1:18:20 +epoch [40/50] batch [215/500] time 0.907 (0.889) data 0.000 (0.003) loss 0.9873 (1.0471) acc 78.1250 (73.3721) lr 2.7103e-04 eta 1:18:16 +epoch [40/50] batch [220/500] time 0.891 (0.889) data 0.000 (0.003) loss 1.2432 (1.0485) acc 68.7500 (73.3239) lr 2.7103e-04 eta 1:18:11 +epoch [40/50] batch [225/500] time 0.879 (0.888) data 0.000 (0.003) loss 1.4688 (1.0544) acc 62.5000 (73.2500) lr 2.7103e-04 eta 1:18:06 +epoch [40/50] batch [230/500] time 0.900 (0.888) data 0.000 (0.003) loss 0.8867 (1.0489) acc 78.1250 (73.3696) lr 2.7103e-04 eta 1:18:01 +epoch [40/50] batch [235/500] time 0.838 (0.888) data 0.000 (0.003) loss 0.5005 (1.0459) acc 84.3750 (73.4043) lr 2.7103e-04 eta 1:17:57 +epoch [40/50] batch [240/500] time 0.855 (0.888) data 0.000 (0.003) loss 1.4648 (1.0495) acc 53.1250 (73.2812) lr 2.7103e-04 eta 1:17:50 +epoch [40/50] batch [245/500] time 0.890 (0.888) data 0.000 (0.003) loss 0.7964 (1.0487) acc 84.3750 (73.2270) lr 2.7103e-04 eta 1:17:45 +epoch [40/50] batch [250/500] time 0.889 (0.888) data 0.000 (0.003) loss 0.3423 (1.0428) acc 90.6250 (73.3500) lr 2.7103e-04 eta 1:17:41 +epoch [40/50] batch [255/500] time 0.896 (0.888) data 0.000 (0.003) loss 0.8130 (1.0374) acc 75.0000 (73.4436) lr 2.7103e-04 eta 1:17:37 +epoch [40/50] batch [260/500] time 0.900 (0.888) data 0.000 (0.003) loss 1.3057 (1.0414) acc 71.8750 (73.3894) lr 2.7103e-04 eta 1:17:31 +epoch [40/50] batch [265/500] time 0.855 (0.887) data 0.000 (0.003) loss 0.6533 (1.0380) acc 84.3750 (73.4788) lr 2.7103e-04 eta 1:17:25 +epoch [40/50] batch [270/500] time 0.858 (0.887) data 0.000 (0.003) loss 0.5420 (1.0346) acc 90.6250 (73.6227) lr 2.7103e-04 eta 1:17:19 +epoch [40/50] batch [275/500] time 0.891 (0.887) data 0.000 (0.002) loss 1.2793 (1.0331) acc 71.8750 (73.6818) lr 2.7103e-04 eta 1:17:13 +epoch [40/50] batch [280/500] time 0.901 (0.887) data 0.000 (0.002) loss 1.0010 (1.0368) acc 75.0000 (73.6049) lr 2.7103e-04 eta 1:17:08 +epoch [40/50] batch [285/500] time 0.844 (0.886) data 0.000 (0.002) loss 0.9697 (1.0380) acc 75.0000 (73.6294) lr 2.7103e-04 eta 1:17:03 +epoch [40/50] batch [290/500] time 0.867 (0.887) data 0.000 (0.002) loss 1.2832 (1.0389) acc 71.8750 (73.6530) lr 2.7103e-04 eta 1:16:58 +epoch [40/50] batch [295/500] time 0.837 (0.886) data 0.000 (0.002) loss 0.8369 (1.0402) acc 78.1250 (73.5699) lr 2.7103e-04 eta 1:16:51 +epoch [40/50] batch [300/500] time 0.889 (0.886) data 0.000 (0.002) loss 0.5181 (1.0376) acc 81.2500 (73.6562) lr 2.7103e-04 eta 1:16:47 +epoch [40/50] batch [305/500] time 0.888 (0.886) data 0.000 (0.002) loss 0.5073 (1.0355) acc 87.5000 (73.6885) lr 2.7103e-04 eta 1:16:43 +epoch [40/50] batch [310/500] time 0.883 (0.886) data 0.000 (0.002) loss 2.0332 (1.0386) acc 62.5000 (73.6593) lr 2.7103e-04 eta 1:16:38 +epoch [40/50] batch [315/500] time 0.905 (0.886) data 0.000 (0.002) loss 1.1162 (1.0369) acc 75.0000 (73.7004) lr 2.7103e-04 eta 1:16:35 +epoch [40/50] batch [320/500] time 0.870 (0.886) data 0.000 (0.002) loss 0.9326 (1.0349) acc 71.8750 (73.7012) lr 2.7103e-04 eta 1:16:30 +epoch [40/50] batch [325/500] time 0.883 (0.886) data 0.000 (0.002) loss 1.2832 (1.0344) acc 62.5000 (73.6538) lr 2.7103e-04 eta 1:16:25 +epoch [40/50] batch [330/500] time 0.901 (0.886) data 0.000 (0.002) loss 1.5938 (1.0361) acc 59.3750 (73.5701) lr 2.7103e-04 eta 1:16:20 +epoch [40/50] batch [335/500] time 0.904 (0.886) data 0.000 (0.002) loss 0.9038 (1.0349) acc 71.8750 (73.6007) lr 2.7103e-04 eta 1:16:17 +epoch [40/50] batch [340/500] time 0.899 (0.886) data 0.000 (0.002) loss 1.3242 (1.0321) acc 62.5000 (73.6857) lr 2.7103e-04 eta 1:16:14 +epoch [40/50] batch [345/500] time 0.859 (0.886) data 0.000 (0.002) loss 1.6846 (1.0343) acc 65.6250 (73.6322) lr 2.7103e-04 eta 1:16:09 +epoch [40/50] batch [350/500] time 0.897 (0.886) data 0.000 (0.002) loss 0.7466 (1.0337) acc 75.0000 (73.6518) lr 2.7103e-04 eta 1:16:04 +epoch [40/50] batch [355/500] time 0.881 (0.886) data 0.000 (0.002) loss 0.4272 (1.0334) acc 87.5000 (73.6532) lr 2.7103e-04 eta 1:16:00 +epoch [40/50] batch [360/500] time 0.877 (0.886) data 0.000 (0.002) loss 1.2148 (1.0338) acc 68.7500 (73.6632) lr 2.7103e-04 eta 1:15:55 +epoch [40/50] batch [365/500] time 0.874 (0.886) data 0.000 (0.002) loss 0.9302 (1.0330) acc 81.2500 (73.6901) lr 2.7103e-04 eta 1:15:50 +epoch [40/50] batch [370/500] time 0.896 (0.886) data 0.000 (0.002) loss 1.4521 (1.0357) acc 75.0000 (73.6571) lr 2.7103e-04 eta 1:15:45 +epoch [40/50] batch [375/500] time 0.966 (0.886) data 0.000 (0.002) loss 0.4932 (1.0324) acc 93.7500 (73.7417) lr 2.7103e-04 eta 1:15:41 +epoch [40/50] batch [380/500] time 0.892 (0.886) data 0.000 (0.002) loss 1.2676 (1.0329) acc 59.3750 (73.7171) lr 2.7103e-04 eta 1:15:37 +epoch [40/50] batch [385/500] time 0.894 (0.886) data 0.000 (0.002) loss 0.7012 (1.0302) acc 87.5000 (73.7744) lr 2.7103e-04 eta 1:15:32 +epoch [40/50] batch [390/500] time 0.886 (0.886) data 0.000 (0.002) loss 1.0732 (1.0310) acc 75.0000 (73.7821) lr 2.7103e-04 eta 1:15:28 +epoch [40/50] batch [395/500] time 0.888 (0.886) data 0.000 (0.002) loss 1.2236 (1.0341) acc 62.5000 (73.6867) lr 2.7103e-04 eta 1:15:24 +epoch [40/50] batch [400/500] time 0.894 (0.886) data 0.000 (0.002) loss 0.6182 (1.0354) acc 78.1250 (73.6484) lr 2.7103e-04 eta 1:15:18 +epoch [40/50] batch [405/500] time 0.881 (0.886) data 0.000 (0.002) loss 1.1514 (1.0356) acc 71.8750 (73.6265) lr 2.7103e-04 eta 1:15:14 +epoch [40/50] batch [410/500] time 0.864 (0.886) data 0.000 (0.002) loss 1.0293 (1.0379) acc 78.1250 (73.5595) lr 2.7103e-04 eta 1:15:09 +epoch [40/50] batch [415/500] time 0.888 (0.886) data 0.000 (0.002) loss 0.3447 (1.0342) acc 93.7500 (73.6672) lr 2.7103e-04 eta 1:15:05 +epoch [40/50] batch [420/500] time 0.870 (0.886) data 0.000 (0.002) loss 1.2520 (1.0344) acc 62.5000 (73.6607) lr 2.7103e-04 eta 1:15:00 +epoch [40/50] batch [425/500] time 0.876 (0.886) data 0.000 (0.002) loss 0.8604 (1.0308) acc 71.8750 (73.7279) lr 2.7103e-04 eta 1:14:55 +epoch [40/50] batch [430/500] time 0.879 (0.886) data 0.000 (0.002) loss 1.2832 (1.0312) acc 65.6250 (73.6773) lr 2.7103e-04 eta 1:14:50 +epoch [40/50] batch [435/500] time 0.884 (0.886) data 0.000 (0.002) loss 1.1260 (1.0298) acc 71.8750 (73.7069) lr 2.7103e-04 eta 1:14:46 +epoch [40/50] batch [440/500] time 0.858 (0.886) data 0.000 (0.002) loss 0.8354 (1.0303) acc 81.2500 (73.7287) lr 2.7103e-04 eta 1:14:41 +epoch [40/50] batch [445/500] time 0.892 (0.886) data 0.000 (0.002) loss 0.8491 (1.0306) acc 78.1250 (73.7500) lr 2.7103e-04 eta 1:14:36 +epoch [40/50] batch [450/500] time 0.865 (0.885) data 0.000 (0.002) loss 0.8384 (1.0288) acc 65.6250 (73.7778) lr 2.7103e-04 eta 1:14:30 +epoch [40/50] batch [455/500] time 0.868 (0.885) data 0.000 (0.002) loss 0.6733 (1.0279) acc 75.0000 (73.7775) lr 2.7103e-04 eta 1:14:26 +epoch [40/50] batch [460/500] time 0.902 (0.885) data 0.000 (0.002) loss 1.1562 (1.0308) acc 81.2500 (73.7296) lr 2.7103e-04 eta 1:14:21 +epoch [40/50] batch [465/500] time 0.893 (0.885) data 0.000 (0.002) loss 0.8628 (1.0294) acc 71.8750 (73.7433) lr 2.7103e-04 eta 1:14:15 +epoch [40/50] batch [470/500] time 0.902 (0.885) data 0.000 (0.002) loss 1.5000 (1.0292) acc 65.6250 (73.7566) lr 2.7103e-04 eta 1:14:11 +epoch [40/50] batch [475/500] time 0.854 (0.885) data 0.000 (0.002) loss 0.9863 (1.0279) acc 71.8750 (73.7566) lr 2.7103e-04 eta 1:14:08 +epoch [40/50] batch [480/500] time 0.870 (0.885) data 0.000 (0.002) loss 0.9219 (1.0280) acc 75.0000 (73.7630) lr 2.7103e-04 eta 1:14:03 +epoch [40/50] batch [485/500] time 0.883 (0.885) data 0.000 (0.002) loss 0.8120 (1.0262) acc 75.0000 (73.7887) lr 2.7103e-04 eta 1:13:58 +epoch [40/50] batch [490/500] time 0.906 (0.885) data 0.000 (0.001) loss 0.6777 (1.0255) acc 78.1250 (73.7755) lr 2.7103e-04 eta 1:13:54 +epoch [40/50] batch [495/500] time 0.901 (0.885) data 0.000 (0.001) loss 0.8335 (1.0237) acc 81.2500 (73.7879) lr 2.7103e-04 eta 1:13:49 +epoch [40/50] batch [500/500] time 0.873 (0.885) data 0.000 (0.001) loss 0.6782 (1.0244) acc 84.3750 (73.7562) lr 2.2949e-04 eta 1:13:45 +epoch [41/50] batch [5/500] time 0.877 (1.018) data 0.000 (0.134) loss 0.7007 (0.8033) acc 84.3750 (78.7500) lr 2.2949e-04 eta 1:24:43 +epoch [41/50] batch [10/500] time 0.858 (0.950) data 0.000 (0.067) loss 0.6685 (0.7047) acc 81.2500 (80.6250) lr 2.2949e-04 eta 1:19:01 +epoch [41/50] batch [15/500] time 0.903 (0.928) data 0.000 (0.045) loss 1.8389 (0.8416) acc 62.5000 (78.7500) lr 2.2949e-04 eta 1:17:04 +epoch [41/50] batch [20/500] time 0.863 (0.915) data 0.000 (0.034) loss 1.1963 (0.9557) acc 71.8750 (76.5625) lr 2.2949e-04 eta 1:15:57 +epoch [41/50] batch [25/500] time 0.994 (0.914) data 0.000 (0.027) loss 0.7559 (0.9736) acc 78.1250 (75.5000) lr 2.2949e-04 eta 1:15:47 +epoch [41/50] batch [30/500] time 0.883 (0.911) data 0.000 (0.022) loss 1.3848 (0.9491) acc 68.7500 (75.5208) lr 2.2949e-04 eta 1:15:29 +epoch [41/50] batch [35/500] time 0.898 (0.909) data 0.000 (0.019) loss 0.5386 (0.9426) acc 84.3750 (75.4464) lr 2.2949e-04 eta 1:15:10 +epoch [41/50] batch [40/500] time 0.899 (0.907) data 0.000 (0.017) loss 1.1426 (0.9593) acc 68.7500 (75.0781) lr 2.2949e-04 eta 1:14:56 +epoch [41/50] batch [45/500] time 0.888 (0.905) data 0.000 (0.015) loss 0.9409 (0.9627) acc 71.8750 (74.5139) lr 2.2949e-04 eta 1:14:41 +epoch [41/50] batch [50/500] time 0.899 (0.903) data 0.000 (0.014) loss 0.6870 (0.9600) acc 87.5000 (74.7500) lr 2.2949e-04 eta 1:14:27 +epoch [41/50] batch [55/500] time 0.866 (0.900) data 0.000 (0.012) loss 1.1348 (0.9470) acc 71.8750 (75.2273) lr 2.2949e-04 eta 1:14:11 +epoch [41/50] batch [60/500] time 0.897 (0.900) data 0.000 (0.011) loss 0.7202 (0.9786) acc 78.1250 (74.4792) lr 2.2949e-04 eta 1:14:04 +epoch [41/50] batch [65/500] time 0.895 (0.898) data 0.000 (0.011) loss 0.8823 (0.9589) acc 81.2500 (75.0481) lr 2.2949e-04 eta 1:13:53 +epoch [41/50] batch [70/500] time 0.877 (0.899) data 0.000 (0.010) loss 0.8218 (0.9592) acc 71.8750 (74.9554) lr 2.2949e-04 eta 1:13:52 +epoch [41/50] batch [75/500] time 0.900 (0.899) data 0.000 (0.009) loss 0.8330 (0.9490) acc 71.8750 (75.1667) lr 2.2949e-04 eta 1:13:45 +epoch [41/50] batch [80/500] time 0.892 (0.898) data 0.000 (0.009) loss 0.9243 (0.9458) acc 78.1250 (75.3125) lr 2.2949e-04 eta 1:13:36 +epoch [41/50] batch [85/500] time 0.879 (0.897) data 0.000 (0.008) loss 0.6626 (0.9443) acc 84.3750 (75.4044) lr 2.2949e-04 eta 1:13:27 +epoch [41/50] batch [90/500] time 0.877 (0.896) data 0.000 (0.008) loss 0.7407 (0.9500) acc 78.1250 (75.2778) lr 2.2949e-04 eta 1:13:17 +epoch [41/50] batch [95/500] time 0.866 (0.895) data 0.000 (0.007) loss 0.7456 (0.9559) acc 71.8750 (74.9342) lr 2.2949e-04 eta 1:13:08 +epoch [41/50] batch [100/500] time 0.863 (0.894) data 0.000 (0.007) loss 1.4688 (0.9730) acc 75.0000 (74.6875) lr 2.2949e-04 eta 1:13:00 +epoch [41/50] batch [105/500] time 0.890 (0.894) data 0.000 (0.007) loss 1.1084 (0.9758) acc 68.7500 (74.7024) lr 2.2949e-04 eta 1:12:54 +epoch [41/50] batch [110/500] time 0.921 (0.894) data 0.000 (0.006) loss 1.0312 (0.9732) acc 62.5000 (74.7443) lr 2.2949e-04 eta 1:12:50 +epoch [41/50] batch [115/500] time 0.889 (0.894) data 0.000 (0.006) loss 0.8940 (0.9782) acc 71.8750 (74.4837) lr 2.2949e-04 eta 1:12:46 +epoch [41/50] batch [120/500] time 0.913 (0.894) data 0.000 (0.006) loss 0.7812 (0.9858) acc 78.1250 (74.4010) lr 2.2949e-04 eta 1:12:42 +epoch [41/50] batch [125/500] time 0.872 (0.894) data 0.001 (0.006) loss 0.6655 (0.9847) acc 75.0000 (74.4000) lr 2.2949e-04 eta 1:12:38 +epoch [41/50] batch [130/500] time 0.899 (0.893) data 0.000 (0.005) loss 1.5723 (0.9881) acc 68.7500 (74.3029) lr 2.2949e-04 eta 1:12:30 +epoch [41/50] batch [135/500] time 0.887 (0.893) data 0.000 (0.005) loss 0.6304 (0.9907) acc 81.2500 (74.1435) lr 2.2949e-04 eta 1:12:25 +epoch [41/50] batch [140/500] time 0.873 (0.892) data 0.000 (0.005) loss 0.3926 (0.9875) acc 93.7500 (74.3080) lr 2.2949e-04 eta 1:12:17 +epoch [41/50] batch [145/500] time 0.896 (0.892) data 0.000 (0.005) loss 1.0078 (0.9852) acc 65.6250 (74.2241) lr 2.2949e-04 eta 1:12:11 +epoch [41/50] batch [150/500] time 0.851 (0.892) data 0.000 (0.005) loss 0.9385 (0.9803) acc 81.2500 (74.3750) lr 2.2949e-04 eta 1:12:06 +epoch [41/50] batch [155/500] time 0.884 (0.892) data 0.000 (0.005) loss 0.5352 (0.9786) acc 81.2500 (74.3548) lr 2.2949e-04 eta 1:12:00 +epoch [41/50] batch [160/500] time 0.907 (0.892) data 0.000 (0.004) loss 1.0156 (0.9781) acc 78.1250 (74.4336) lr 2.2949e-04 eta 1:11:55 +epoch [41/50] batch [165/500] time 0.965 (0.892) data 0.000 (0.004) loss 0.6582 (0.9754) acc 71.8750 (74.4318) lr 2.2949e-04 eta 1:11:52 +epoch [41/50] batch [170/500] time 0.880 (0.891) data 0.000 (0.004) loss 0.7798 (0.9722) acc 90.6250 (74.6324) lr 2.2949e-04 eta 1:11:45 +epoch [41/50] batch [175/500] time 0.878 (0.891) data 0.000 (0.004) loss 0.9473 (0.9746) acc 81.2500 (74.6607) lr 2.2949e-04 eta 1:11:38 +epoch [41/50] batch [180/500] time 0.884 (0.891) data 0.000 (0.004) loss 1.2383 (0.9735) acc 75.0000 (74.7917) lr 2.2949e-04 eta 1:11:33 +epoch [41/50] batch [185/500] time 0.875 (0.890) data 0.000 (0.004) loss 0.8062 (0.9727) acc 81.2500 (74.7466) lr 2.2949e-04 eta 1:11:27 +epoch [41/50] batch [190/500] time 0.859 (0.890) data 0.000 (0.004) loss 0.9644 (0.9722) acc 68.7500 (74.7697) lr 2.2949e-04 eta 1:11:23 +epoch [41/50] batch [195/500] time 0.893 (0.890) data 0.000 (0.004) loss 0.9209 (0.9741) acc 78.1250 (74.8397) lr 2.2949e-04 eta 1:11:17 +epoch [41/50] batch [200/500] time 0.883 (0.890) data 0.000 (0.004) loss 0.3167 (0.9722) acc 90.6250 (74.7656) lr 2.2949e-04 eta 1:11:13 +epoch [41/50] batch [205/500] time 0.868 (0.890) data 0.000 (0.003) loss 1.2461 (0.9797) acc 62.5000 (74.5884) lr 2.2949e-04 eta 1:11:06 +epoch [41/50] batch [210/500] time 0.870 (0.890) data 0.000 (0.003) loss 0.6489 (0.9787) acc 81.2500 (74.7024) lr 2.2949e-04 eta 1:11:03 +epoch [41/50] batch [215/500] time 0.884 (0.890) data 0.000 (0.003) loss 1.8135 (0.9854) acc 65.6250 (74.6366) lr 2.2949e-04 eta 1:10:58 +epoch [41/50] batch [220/500] time 0.876 (0.890) data 0.000 (0.003) loss 1.1973 (0.9862) acc 71.8750 (74.6733) lr 2.2949e-04 eta 1:10:53 +epoch [41/50] batch [225/500] time 0.888 (0.890) data 0.000 (0.003) loss 1.0039 (0.9881) acc 71.8750 (74.6528) lr 2.2949e-04 eta 1:10:47 +epoch [41/50] batch [230/500] time 0.896 (0.890) data 0.000 (0.003) loss 0.7705 (0.9833) acc 87.5000 (74.7418) lr 2.2949e-04 eta 1:10:44 +epoch [41/50] batch [235/500] time 0.895 (0.890) data 0.000 (0.003) loss 0.9126 (0.9826) acc 81.2500 (74.8138) lr 2.2949e-04 eta 1:10:41 +epoch [41/50] batch [240/500] time 0.871 (0.890) data 0.000 (0.003) loss 1.2979 (0.9867) acc 75.0000 (74.7135) lr 2.2949e-04 eta 1:10:35 +epoch [41/50] batch [245/500] time 0.905 (0.890) data 0.000 (0.003) loss 0.7529 (0.9847) acc 84.3750 (74.8087) lr 2.2949e-04 eta 1:10:30 +epoch [41/50] batch [250/500] time 0.909 (0.890) data 0.000 (0.003) loss 1.5088 (0.9876) acc 68.7500 (74.7875) lr 2.2949e-04 eta 1:10:26 +epoch [41/50] batch [255/500] time 0.880 (0.890) data 0.000 (0.003) loss 1.3135 (0.9902) acc 62.5000 (74.7304) lr 2.2949e-04 eta 1:10:21 +epoch [41/50] batch [260/500] time 0.872 (0.890) data 0.000 (0.003) loss 0.9424 (0.9885) acc 75.0000 (74.7716) lr 2.2949e-04 eta 1:10:16 +epoch [41/50] batch [265/500] time 0.886 (0.889) data 0.000 (0.003) loss 1.5811 (0.9945) acc 65.6250 (74.5873) lr 2.2949e-04 eta 1:10:10 +epoch [41/50] batch [270/500] time 0.876 (0.889) data 0.000 (0.003) loss 1.2041 (0.9925) acc 65.6250 (74.5718) lr 2.2949e-04 eta 1:10:05 +epoch [41/50] batch [275/500] time 0.894 (0.889) data 0.000 (0.003) loss 0.9907 (0.9937) acc 78.1250 (74.5682) lr 2.2949e-04 eta 1:10:00 +epoch [41/50] batch [280/500] time 0.889 (0.889) data 0.000 (0.003) loss 1.1748 (0.9936) acc 68.7500 (74.5424) lr 2.2949e-04 eta 1:09:53 +epoch [41/50] batch [285/500] time 0.879 (0.888) data 0.000 (0.003) loss 0.5513 (0.9972) acc 84.3750 (74.5175) lr 2.2949e-04 eta 1:09:48 +epoch [41/50] batch [290/500] time 0.889 (0.888) data 0.000 (0.003) loss 0.4531 (0.9939) acc 84.3750 (74.5905) lr 2.2949e-04 eta 1:09:43 +epoch [41/50] batch [295/500] time 0.892 (0.888) data 0.000 (0.002) loss 0.5889 (0.9930) acc 84.3750 (74.5763) lr 2.2949e-04 eta 1:09:39 +epoch [41/50] batch [300/500] time 0.891 (0.888) data 0.000 (0.002) loss 1.4678 (0.9937) acc 62.5000 (74.5312) lr 2.2949e-04 eta 1:09:34 +epoch [41/50] batch [305/500] time 0.892 (0.888) data 0.000 (0.002) loss 1.0225 (0.9942) acc 75.0000 (74.5287) lr 2.2949e-04 eta 1:09:28 +epoch [41/50] batch [310/500] time 0.924 (0.888) data 0.000 (0.002) loss 0.7041 (0.9953) acc 75.0000 (74.4859) lr 2.2949e-04 eta 1:09:25 +epoch [41/50] batch [315/500] time 0.923 (0.888) data 0.000 (0.002) loss 1.0771 (0.9960) acc 65.6250 (74.4544) lr 2.2949e-04 eta 1:09:21 +epoch [41/50] batch [320/500] time 0.894 (0.888) data 0.000 (0.002) loss 0.8022 (0.9934) acc 71.8750 (74.4824) lr 2.2949e-04 eta 1:09:17 +epoch [41/50] batch [325/500] time 0.896 (0.888) data 0.000 (0.002) loss 1.1416 (0.9936) acc 78.1250 (74.5192) lr 2.2949e-04 eta 1:09:13 +epoch [41/50] batch [330/500] time 0.856 (0.888) data 0.000 (0.002) loss 0.9409 (0.9959) acc 81.2500 (74.4508) lr 2.2949e-04 eta 1:09:08 +epoch [41/50] batch [335/500] time 0.911 (0.888) data 0.000 (0.002) loss 1.0869 (0.9931) acc 75.0000 (74.5243) lr 2.2949e-04 eta 1:09:03 +epoch [41/50] batch [340/500] time 0.899 (0.888) data 0.000 (0.002) loss 0.9038 (0.9920) acc 65.6250 (74.4485) lr 2.2949e-04 eta 1:08:59 +epoch [41/50] batch [345/500] time 0.892 (0.888) data 0.000 (0.002) loss 1.3105 (0.9940) acc 62.5000 (74.4022) lr 2.2949e-04 eta 1:08:54 +epoch [41/50] batch [350/500] time 0.850 (0.888) data 0.000 (0.002) loss 0.6685 (0.9928) acc 78.1250 (74.4375) lr 2.2949e-04 eta 1:08:50 +epoch [41/50] batch [355/500] time 0.887 (0.888) data 0.000 (0.002) loss 0.6235 (0.9920) acc 87.5000 (74.4542) lr 2.2949e-04 eta 1:08:46 +epoch [41/50] batch [360/500] time 0.871 (0.888) data 0.000 (0.002) loss 0.6562 (0.9915) acc 78.1250 (74.4531) lr 2.2949e-04 eta 1:08:41 +epoch [41/50] batch [365/500] time 0.898 (0.888) data 0.000 (0.002) loss 1.1318 (0.9906) acc 81.2500 (74.4949) lr 2.2949e-04 eta 1:08:37 +epoch [41/50] batch [370/500] time 0.855 (0.888) data 0.000 (0.002) loss 1.1797 (0.9898) acc 78.1250 (74.5355) lr 2.2949e-04 eta 1:08:32 +epoch [41/50] batch [375/500] time 0.873 (0.888) data 0.000 (0.002) loss 0.8740 (0.9897) acc 78.1250 (74.5167) lr 2.2949e-04 eta 1:08:28 +epoch [41/50] batch [380/500] time 0.916 (0.888) data 0.000 (0.002) loss 0.5132 (0.9907) acc 87.5000 (74.5312) lr 2.2949e-04 eta 1:08:23 +epoch [41/50] batch [385/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.7109 (0.9918) acc 68.7500 (74.5373) lr 2.2949e-04 eta 1:08:18 +epoch [41/50] batch [390/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.4980 (0.9938) acc 71.8750 (74.5593) lr 2.2949e-04 eta 1:08:12 +epoch [41/50] batch [395/500] time 0.877 (0.888) data 0.000 (0.002) loss 1.3789 (0.9987) acc 62.5000 (74.4620) lr 2.2949e-04 eta 1:08:08 +epoch [41/50] batch [400/500] time 0.881 (0.888) data 0.000 (0.002) loss 1.1865 (1.0018) acc 78.1250 (74.4062) lr 2.2949e-04 eta 1:08:03 +epoch [41/50] batch [405/500] time 0.921 (0.888) data 0.000 (0.002) loss 1.2363 (1.0016) acc 78.1250 (74.4522) lr 2.2949e-04 eta 1:07:59 +epoch [41/50] batch [410/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.3457 (1.0024) acc 65.6250 (74.4284) lr 2.2949e-04 eta 1:07:54 +epoch [41/50] batch [415/500] time 0.907 (0.888) data 0.000 (0.002) loss 0.5664 (1.0022) acc 84.3750 (74.4729) lr 2.2949e-04 eta 1:07:50 +epoch [41/50] batch [420/500] time 0.849 (0.887) data 0.000 (0.002) loss 0.9253 (1.0023) acc 78.1250 (74.4643) lr 2.2949e-04 eta 1:07:44 +epoch [41/50] batch [425/500] time 0.885 (0.887) data 0.000 (0.002) loss 0.8237 (1.0022) acc 78.1250 (74.4485) lr 2.2949e-04 eta 1:07:39 +epoch [41/50] batch [430/500] time 0.903 (0.887) data 0.000 (0.002) loss 1.1543 (1.0026) acc 68.7500 (74.4477) lr 2.2949e-04 eta 1:07:35 +epoch [41/50] batch [435/500] time 0.899 (0.887) data 0.000 (0.002) loss 0.8442 (1.0026) acc 65.6250 (74.4037) lr 2.2949e-04 eta 1:07:30 +epoch [41/50] batch [440/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.1660 (1.0049) acc 75.0000 (74.3750) lr 2.2949e-04 eta 1:07:25 +epoch [41/50] batch [445/500] time 0.873 (0.887) data 0.000 (0.002) loss 1.0264 (1.0048) acc 71.8750 (74.3680) lr 2.2949e-04 eta 1:07:21 +epoch [41/50] batch [450/500] time 0.876 (0.887) data 0.000 (0.002) loss 0.9570 (1.0042) acc 78.1250 (74.3542) lr 2.2949e-04 eta 1:07:17 +epoch [41/50] batch [455/500] time 0.868 (0.887) data 0.000 (0.002) loss 1.3838 (1.0064) acc 78.1250 (74.3750) lr 2.2949e-04 eta 1:07:13 +epoch [41/50] batch [460/500] time 0.903 (0.887) data 0.000 (0.002) loss 0.7261 (1.0090) acc 81.2500 (74.3139) lr 2.2949e-04 eta 1:07:08 +epoch [41/50] batch [465/500] time 0.892 (0.887) data 0.001 (0.002) loss 1.1689 (1.0115) acc 71.8750 (74.2876) lr 2.2949e-04 eta 1:07:04 +epoch [41/50] batch [470/500] time 0.892 (0.888) data 0.000 (0.002) loss 1.5586 (1.0128) acc 68.7500 (74.2487) lr 2.2949e-04 eta 1:07:00 +epoch [41/50] batch [475/500] time 0.890 (0.887) data 0.000 (0.002) loss 0.5972 (1.0128) acc 87.5000 (74.2500) lr 2.2949e-04 eta 1:06:55 +epoch [41/50] batch [480/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.1309 (1.0147) acc 75.0000 (74.1927) lr 2.2949e-04 eta 1:06:50 +epoch [41/50] batch [485/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.1172 (1.0115) acc 75.0000 (74.2590) lr 2.2949e-04 eta 1:06:45 +epoch [41/50] batch [490/500] time 0.908 (0.887) data 0.000 (0.002) loss 0.8491 (1.0112) acc 84.3750 (74.2411) lr 2.2949e-04 eta 1:06:41 +epoch [41/50] batch [495/500] time 1.001 (0.887) data 0.000 (0.002) loss 0.8154 (1.0095) acc 75.0000 (74.2551) lr 2.2949e-04 eta 1:06:37 +epoch [41/50] batch [500/500] time 0.867 (0.887) data 0.000 (0.002) loss 0.8379 (1.0090) acc 81.2500 (74.2438) lr 1.9098e-04 eta 1:06:32 +epoch [42/50] batch [5/500] time 0.872 (1.007) data 0.000 (0.128) loss 0.7271 (1.1911) acc 81.2500 (69.3750) lr 1.9098e-04 eta 1:15:26 +epoch [42/50] batch [10/500] time 0.884 (0.941) data 0.000 (0.064) loss 1.3643 (1.1841) acc 65.6250 (70.6250) lr 1.9098e-04 eta 1:10:23 +epoch [42/50] batch [15/500] time 0.868 (0.923) data 0.000 (0.043) loss 0.6030 (1.2006) acc 81.2500 (71.0417) lr 1.9098e-04 eta 1:08:58 +epoch [42/50] batch [20/500] time 0.908 (0.917) data 0.000 (0.032) loss 0.4048 (1.0500) acc 87.5000 (74.8438) lr 1.9098e-04 eta 1:08:26 +epoch [42/50] batch [25/500] time 0.885 (0.912) data 0.000 (0.026) loss 1.0527 (1.0676) acc 71.8750 (74.5000) lr 1.9098e-04 eta 1:07:59 +epoch [42/50] batch [30/500] time 0.867 (0.906) data 0.000 (0.022) loss 1.1260 (1.0717) acc 75.0000 (74.0625) lr 1.9098e-04 eta 1:07:28 +epoch [42/50] batch [35/500] time 0.905 (0.903) data 0.000 (0.019) loss 1.0498 (1.0453) acc 81.2500 (74.6429) lr 1.9098e-04 eta 1:07:11 +epoch [42/50] batch [40/500] time 0.862 (0.903) data 0.001 (0.016) loss 0.5444 (1.0135) acc 84.3750 (75.4688) lr 1.9098e-04 eta 1:07:08 +epoch [42/50] batch [45/500] time 0.884 (0.901) data 0.000 (0.014) loss 1.5850 (1.0476) acc 68.7500 (74.5833) lr 1.9098e-04 eta 1:06:52 +epoch [42/50] batch [50/500] time 0.885 (0.900) data 0.000 (0.013) loss 0.3914 (1.0295) acc 87.5000 (74.6875) lr 1.9098e-04 eta 1:06:43 +epoch [42/50] batch [55/500] time 0.877 (0.899) data 0.000 (0.012) loss 0.6206 (1.0326) acc 87.5000 (74.7727) lr 1.9098e-04 eta 1:06:36 +epoch [42/50] batch [60/500] time 0.852 (0.896) data 0.000 (0.011) loss 1.3633 (1.0568) acc 65.6250 (74.1667) lr 1.9098e-04 eta 1:06:20 +epoch [42/50] batch [65/500] time 0.901 (0.896) data 0.000 (0.010) loss 1.1885 (1.0525) acc 65.6250 (74.4712) lr 1.9098e-04 eta 1:06:14 +epoch [42/50] batch [70/500] time 0.899 (0.895) data 0.000 (0.009) loss 0.7734 (1.0461) acc 78.1250 (74.6875) lr 1.9098e-04 eta 1:06:06 +epoch [42/50] batch [75/500] time 0.893 (0.894) data 0.000 (0.009) loss 1.4619 (1.0549) acc 65.6250 (74.5417) lr 1.9098e-04 eta 1:05:58 +epoch [42/50] batch [80/500] time 0.866 (0.894) data 0.000 (0.008) loss 0.7861 (1.0567) acc 78.1250 (74.4141) lr 1.9098e-04 eta 1:05:52 +epoch [42/50] batch [85/500] time 0.881 (0.893) data 0.000 (0.008) loss 0.6128 (1.0481) acc 84.3750 (74.5588) lr 1.9098e-04 eta 1:05:43 +epoch [42/50] batch [90/500] time 0.854 (0.892) data 0.000 (0.007) loss 1.3545 (1.0511) acc 68.7500 (74.4444) lr 1.9098e-04 eta 1:05:32 +epoch [42/50] batch [95/500] time 0.871 (0.891) data 0.000 (0.007) loss 0.9365 (1.0484) acc 78.1250 (74.5724) lr 1.9098e-04 eta 1:05:24 +epoch [42/50] batch [100/500] time 0.873 (0.890) data 0.000 (0.007) loss 0.9941 (1.0542) acc 75.0000 (74.3438) lr 1.9098e-04 eta 1:05:16 +epoch [42/50] batch [105/500] time 0.886 (0.890) data 0.000 (0.006) loss 0.9653 (1.0551) acc 78.1250 (74.3155) lr 1.9098e-04 eta 1:05:12 +epoch [42/50] batch [110/500] time 0.885 (0.890) data 0.000 (0.006) loss 0.7349 (1.0534) acc 71.8750 (74.3182) lr 1.9098e-04 eta 1:05:06 +epoch [42/50] batch [115/500] time 0.867 (0.889) data 0.000 (0.006) loss 0.8105 (1.0564) acc 75.0000 (74.2120) lr 1.9098e-04 eta 1:04:59 +epoch [42/50] batch [120/500] time 0.903 (0.889) data 0.000 (0.006) loss 1.6611 (1.0597) acc 68.7500 (74.1146) lr 1.9098e-04 eta 1:04:53 +epoch [42/50] batch [125/500] time 0.899 (0.889) data 0.000 (0.005) loss 0.7622 (1.0537) acc 78.1250 (74.2000) lr 1.9098e-04 eta 1:04:48 +epoch [42/50] batch [130/500] time 0.877 (0.889) data 0.000 (0.005) loss 1.0947 (1.0517) acc 81.2500 (74.3269) lr 1.9098e-04 eta 1:04:44 +epoch [42/50] batch [135/500] time 0.882 (0.889) data 0.000 (0.005) loss 1.7656 (1.0550) acc 68.7500 (74.2593) lr 1.9098e-04 eta 1:04:39 +epoch [42/50] batch [140/500] time 0.916 (0.890) data 0.000 (0.005) loss 1.1357 (1.0472) acc 65.6250 (74.3750) lr 1.9098e-04 eta 1:04:41 +epoch [42/50] batch [145/500] time 0.898 (0.890) data 0.001 (0.005) loss 1.2070 (1.0469) acc 68.7500 (74.4397) lr 1.9098e-04 eta 1:04:35 +epoch [42/50] batch [150/500] time 0.845 (0.890) data 0.000 (0.004) loss 0.8022 (1.0462) acc 78.1250 (74.3958) lr 1.9098e-04 eta 1:04:30 +epoch [42/50] batch [155/500] time 0.882 (0.889) data 0.000 (0.004) loss 0.8350 (1.0412) acc 81.2500 (74.5565) lr 1.9098e-04 eta 1:04:23 +epoch [42/50] batch [160/500] time 0.867 (0.889) data 0.000 (0.004) loss 1.0010 (1.0424) acc 68.7500 (74.5117) lr 1.9098e-04 eta 1:04:18 +epoch [42/50] batch [165/500] time 0.907 (0.890) data 0.000 (0.004) loss 0.8843 (1.0440) acc 71.8750 (74.4886) lr 1.9098e-04 eta 1:04:16 +epoch [42/50] batch [170/500] time 0.895 (0.890) data 0.000 (0.004) loss 0.7856 (1.0424) acc 68.7500 (74.3750) lr 1.9098e-04 eta 1:04:12 +epoch [42/50] batch [175/500] time 0.881 (0.890) data 0.000 (0.004) loss 1.2725 (1.0452) acc 71.8750 (74.2143) lr 1.9098e-04 eta 1:04:08 +epoch [42/50] batch [180/500] time 0.912 (0.890) data 0.000 (0.004) loss 0.5981 (1.0429) acc 78.1250 (74.2708) lr 1.9098e-04 eta 1:04:05 +epoch [42/50] batch [185/500] time 0.897 (0.891) data 0.000 (0.004) loss 0.8633 (1.0451) acc 75.0000 (74.2230) lr 1.9098e-04 eta 1:04:02 +epoch [42/50] batch [190/500] time 0.890 (0.891) data 0.000 (0.004) loss 1.5215 (1.0458) acc 62.5000 (74.2270) lr 1.9098e-04 eta 1:03:58 +epoch [42/50] batch [195/500] time 0.900 (0.890) data 0.000 (0.004) loss 1.4658 (1.0504) acc 75.0000 (74.2628) lr 1.9098e-04 eta 1:03:53 +epoch [42/50] batch [200/500] time 0.898 (0.891) data 0.000 (0.003) loss 1.2520 (1.0533) acc 68.7500 (74.1406) lr 1.9098e-04 eta 1:03:49 +epoch [42/50] batch [205/500] time 0.862 (0.890) data 0.000 (0.003) loss 0.7134 (1.0479) acc 81.2500 (74.1159) lr 1.9098e-04 eta 1:03:43 +epoch [42/50] batch [210/500] time 0.861 (0.890) data 0.000 (0.003) loss 0.8276 (1.0459) acc 75.0000 (74.1815) lr 1.9098e-04 eta 1:03:36 +epoch [42/50] batch [215/500] time 0.892 (0.889) data 0.000 (0.003) loss 0.5581 (1.0429) acc 84.3750 (74.2006) lr 1.9098e-04 eta 1:03:31 +epoch [42/50] batch [220/500] time 0.907 (0.890) data 0.000 (0.003) loss 1.0381 (1.0408) acc 71.8750 (74.1335) lr 1.9098e-04 eta 1:03:27 +epoch [42/50] batch [225/500] time 0.886 (0.890) data 0.000 (0.003) loss 1.1602 (1.0394) acc 68.7500 (74.1111) lr 1.9098e-04 eta 1:03:24 +epoch [42/50] batch [230/500] time 0.877 (0.890) data 0.000 (0.003) loss 0.5259 (1.0379) acc 90.6250 (74.1440) lr 1.9098e-04 eta 1:03:20 +epoch [42/50] batch [235/500] time 0.873 (0.890) data 0.000 (0.003) loss 0.5571 (1.0347) acc 87.5000 (74.2021) lr 1.9098e-04 eta 1:03:16 +epoch [42/50] batch [240/500] time 0.907 (0.890) data 0.000 (0.003) loss 0.5420 (1.0310) acc 87.5000 (74.3099) lr 1.9098e-04 eta 1:03:12 +epoch [42/50] batch [245/500] time 0.861 (0.890) data 0.000 (0.003) loss 0.8291 (1.0279) acc 78.1250 (74.3750) lr 1.9098e-04 eta 1:03:07 +epoch [42/50] batch [250/500] time 0.883 (0.890) data 0.000 (0.003) loss 1.1758 (1.0282) acc 71.8750 (74.3625) lr 1.9098e-04 eta 1:03:02 +epoch [42/50] batch [255/500] time 0.871 (0.890) data 0.000 (0.003) loss 0.8203 (1.0286) acc 75.0000 (74.3627) lr 1.9098e-04 eta 1:02:58 +epoch [42/50] batch [260/500] time 0.911 (0.890) data 0.000 (0.003) loss 0.7642 (1.0247) acc 90.6250 (74.5192) lr 1.9098e-04 eta 1:02:54 +epoch [42/50] batch [265/500] time 0.894 (0.890) data 0.000 (0.003) loss 1.0508 (1.0261) acc 78.1250 (74.5165) lr 1.9098e-04 eta 1:02:50 +epoch [42/50] batch [270/500] time 0.876 (0.890) data 0.000 (0.003) loss 0.9604 (1.0243) acc 71.8750 (74.5833) lr 1.9098e-04 eta 1:02:45 +epoch [42/50] batch [275/500] time 0.874 (0.890) data 0.000 (0.003) loss 1.1250 (1.0260) acc 71.8750 (74.5341) lr 1.9098e-04 eta 1:02:39 +epoch [42/50] batch [280/500] time 0.921 (0.890) data 0.000 (0.003) loss 0.7827 (1.0249) acc 84.3750 (74.5424) lr 1.9098e-04 eta 1:02:35 +epoch [42/50] batch [285/500] time 0.861 (0.890) data 0.000 (0.002) loss 0.6406 (1.0246) acc 78.1250 (74.5395) lr 1.9098e-04 eta 1:02:31 +epoch [42/50] batch [290/500] time 0.909 (0.890) data 0.000 (0.002) loss 0.5649 (1.0254) acc 75.0000 (74.5259) lr 1.9098e-04 eta 1:02:26 +epoch [42/50] batch [295/500] time 0.888 (0.890) data 0.000 (0.002) loss 0.6582 (1.0221) acc 84.3750 (74.5869) lr 1.9098e-04 eta 1:02:21 +epoch [42/50] batch [300/500] time 0.867 (0.889) data 0.000 (0.002) loss 1.0264 (1.0197) acc 81.2500 (74.6562) lr 1.9098e-04 eta 1:02:15 +epoch [42/50] batch [305/500] time 0.879 (0.890) data 0.000 (0.002) loss 1.2900 (1.0183) acc 71.8750 (74.6619) lr 1.9098e-04 eta 1:02:11 +epoch [42/50] batch [310/500] time 0.886 (0.889) data 0.000 (0.002) loss 1.4932 (1.0179) acc 68.7500 (74.6875) lr 1.9098e-04 eta 1:02:06 +epoch [42/50] batch [315/500] time 0.894 (0.889) data 0.000 (0.002) loss 1.1621 (1.0162) acc 71.8750 (74.7222) lr 1.9098e-04 eta 1:02:02 +epoch [42/50] batch [320/500] time 0.862 (0.889) data 0.000 (0.002) loss 1.1191 (1.0167) acc 62.5000 (74.6973) lr 1.9098e-04 eta 1:01:57 +epoch [42/50] batch [325/500] time 0.985 (0.890) data 0.000 (0.002) loss 1.2021 (1.0144) acc 65.6250 (74.7019) lr 1.9098e-04 eta 1:01:53 +epoch [42/50] batch [330/500] time 0.872 (0.889) data 0.000 (0.002) loss 1.3857 (1.0157) acc 65.6250 (74.6686) lr 1.9098e-04 eta 1:01:48 +epoch [42/50] batch [335/500] time 0.849 (0.889) data 0.000 (0.002) loss 1.0625 (1.0112) acc 75.0000 (74.7948) lr 1.9098e-04 eta 1:01:43 +epoch [42/50] batch [340/500] time 0.871 (0.889) data 0.000 (0.002) loss 0.9316 (1.0114) acc 75.0000 (74.8070) lr 1.9098e-04 eta 1:01:38 +epoch [42/50] batch [345/500] time 0.904 (0.889) data 0.000 (0.002) loss 1.0986 (1.0093) acc 71.8750 (74.8370) lr 1.9098e-04 eta 1:01:33 +epoch [42/50] batch [350/500] time 0.894 (0.889) data 0.000 (0.002) loss 0.6489 (1.0081) acc 75.0000 (74.8661) lr 1.9098e-04 eta 1:01:28 +epoch [42/50] batch [355/500] time 0.862 (0.889) data 0.000 (0.002) loss 1.1104 (1.0075) acc 75.0000 (74.8768) lr 1.9098e-04 eta 1:01:23 +epoch [42/50] batch [360/500] time 0.846 (0.888) data 0.000 (0.002) loss 1.0654 (1.0083) acc 71.8750 (74.9132) lr 1.9098e-04 eta 1:01:17 +epoch [42/50] batch [365/500] time 0.849 (0.888) data 0.000 (0.002) loss 0.7510 (1.0110) acc 75.0000 (74.8887) lr 1.9098e-04 eta 1:01:12 +epoch [42/50] batch [370/500] time 0.874 (0.888) data 0.000 (0.002) loss 1.0654 (1.0079) acc 65.6250 (74.9324) lr 1.9098e-04 eta 1:01:08 +epoch [42/50] batch [375/500] time 0.877 (0.888) data 0.001 (0.002) loss 1.0391 (1.0066) acc 68.7500 (74.9500) lr 1.9098e-04 eta 1:01:03 +epoch [42/50] batch [380/500] time 0.896 (0.888) data 0.000 (0.002) loss 0.6689 (1.0068) acc 81.2500 (74.9424) lr 1.9098e-04 eta 1:00:59 +epoch [42/50] batch [385/500] time 0.868 (0.888) data 0.000 (0.002) loss 0.8223 (1.0043) acc 75.0000 (75.0162) lr 1.9098e-04 eta 1:00:54 +epoch [42/50] batch [390/500] time 0.864 (0.888) data 0.000 (0.002) loss 0.7783 (1.0027) acc 78.1250 (75.0721) lr 1.9098e-04 eta 1:00:49 +epoch [42/50] batch [395/500] time 0.847 (0.888) data 0.000 (0.002) loss 0.9331 (1.0016) acc 78.1250 (75.1345) lr 1.9098e-04 eta 1:00:44 +epoch [42/50] batch [400/500] time 0.911 (0.888) data 0.000 (0.002) loss 1.4531 (1.0005) acc 68.7500 (75.1484) lr 1.9098e-04 eta 1:00:39 +epoch [42/50] batch [405/500] time 0.894 (0.888) data 0.000 (0.002) loss 1.8564 (1.0043) acc 62.5000 (75.0772) lr 1.9098e-04 eta 1:00:35 +epoch [42/50] batch [410/500] time 0.877 (0.888) data 0.000 (0.002) loss 1.2432 (1.0049) acc 71.8750 (75.0838) lr 1.9098e-04 eta 1:00:30 +epoch [42/50] batch [415/500] time 0.901 (0.888) data 0.000 (0.002) loss 1.2627 (1.0060) acc 68.7500 (75.0527) lr 1.9098e-04 eta 1:00:25 +epoch [42/50] batch [420/500] time 0.889 (0.888) data 0.000 (0.002) loss 1.1074 (1.0046) acc 75.0000 (75.0595) lr 1.9098e-04 eta 1:00:21 +epoch [42/50] batch [425/500] time 0.870 (0.888) data 0.000 (0.002) loss 0.6758 (1.0056) acc 81.2500 (75.0221) lr 1.9098e-04 eta 1:00:18 +epoch [42/50] batch [430/500] time 0.861 (0.888) data 0.000 (0.002) loss 0.6748 (1.0087) acc 75.0000 (74.9346) lr 1.9098e-04 eta 1:00:14 +epoch [42/50] batch [435/500] time 0.859 (0.888) data 0.000 (0.002) loss 0.4092 (1.0092) acc 81.2500 (74.9210) lr 1.9098e-04 eta 1:00:09 +epoch [42/50] batch [440/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.0303 (1.0099) acc 78.1250 (74.8580) lr 1.9098e-04 eta 1:00:05 +epoch [42/50] batch [445/500] time 0.870 (0.888) data 0.000 (0.002) loss 1.2627 (1.0120) acc 75.0000 (74.8104) lr 1.9098e-04 eta 1:00:00 +epoch [42/50] batch [450/500] time 0.860 (0.888) data 0.000 (0.002) loss 0.9189 (1.0112) acc 84.3750 (74.8056) lr 1.9098e-04 eta 0:59:55 +epoch [42/50] batch [455/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.5820 (1.0114) acc 71.8750 (74.8077) lr 1.9098e-04 eta 0:59:51 +epoch [42/50] batch [460/500] time 0.872 (0.888) data 0.000 (0.002) loss 0.9048 (1.0128) acc 68.7500 (74.7486) lr 1.9098e-04 eta 0:59:46 +epoch [42/50] batch [465/500] time 0.839 (0.888) data 0.000 (0.002) loss 0.6499 (1.0141) acc 81.2500 (74.7312) lr 1.9098e-04 eta 0:59:41 +epoch [42/50] batch [470/500] time 0.858 (0.888) data 0.000 (0.002) loss 1.1133 (1.0148) acc 71.8750 (74.7141) lr 1.9098e-04 eta 0:59:36 +epoch [42/50] batch [475/500] time 0.903 (0.887) data 0.000 (0.002) loss 0.8867 (1.0140) acc 78.1250 (74.7368) lr 1.9098e-04 eta 0:59:31 +epoch [42/50] batch [480/500] time 0.899 (0.887) data 0.000 (0.002) loss 1.5879 (1.0159) acc 65.6250 (74.7135) lr 1.9098e-04 eta 0:59:27 +epoch [42/50] batch [485/500] time 0.873 (0.887) data 0.000 (0.002) loss 0.8882 (1.0156) acc 81.2500 (74.7745) lr 1.9098e-04 eta 0:59:22 +epoch [42/50] batch [490/500] time 0.853 (0.887) data 0.000 (0.002) loss 1.5410 (1.0170) acc 59.3750 (74.6620) lr 1.9098e-04 eta 0:59:17 +epoch [42/50] batch [495/500] time 0.886 (0.887) data 0.000 (0.002) loss 0.6836 (1.0168) acc 68.7500 (74.5960) lr 1.9098e-04 eta 0:59:12 +epoch [42/50] batch [500/500] time 0.880 (0.887) data 0.000 (0.002) loss 1.2275 (1.0150) acc 71.8750 (74.6312) lr 1.5567e-04 eta 0:59:07 +epoch [43/50] batch [5/500] time 0.876 (1.013) data 0.000 (0.137) loss 0.8965 (0.8690) acc 81.2500 (76.2500) lr 1.5567e-04 eta 1:07:25 +epoch [43/50] batch [10/500] time 1.013 (0.963) data 0.000 (0.068) loss 1.6465 (1.0330) acc 59.3750 (72.1875) lr 1.5567e-04 eta 1:04:00 +epoch [43/50] batch [15/500] time 0.916 (0.945) data 0.000 (0.046) loss 0.6226 (1.0459) acc 81.2500 (71.8750) lr 1.5567e-04 eta 1:02:44 +epoch [43/50] batch [20/500] time 0.903 (0.931) data 0.000 (0.034) loss 0.5820 (1.0199) acc 78.1250 (71.8750) lr 1.5567e-04 eta 1:01:43 +epoch [43/50] batch [25/500] time 0.912 (0.923) data 0.000 (0.027) loss 1.0732 (1.0110) acc 75.0000 (72.6250) lr 1.5567e-04 eta 1:01:08 +epoch [43/50] batch [30/500] time 0.858 (0.916) data 0.000 (0.023) loss 0.8887 (1.0072) acc 68.7500 (73.0208) lr 1.5567e-04 eta 1:00:37 +epoch [43/50] batch [35/500] time 0.875 (0.910) data 0.000 (0.020) loss 0.9863 (1.0218) acc 68.7500 (73.2143) lr 1.5567e-04 eta 1:00:08 +epoch [43/50] batch [40/500] time 0.907 (0.906) data 0.000 (0.017) loss 1.1953 (1.0302) acc 62.5000 (72.8906) lr 1.5567e-04 eta 0:59:46 +epoch [43/50] batch [45/500] time 0.847 (0.903) data 0.000 (0.015) loss 0.6665 (1.0465) acc 81.2500 (73.2639) lr 1.5567e-04 eta 0:59:29 +epoch [43/50] batch [50/500] time 0.887 (0.901) data 0.000 (0.014) loss 0.8320 (1.0212) acc 84.3750 (73.9375) lr 1.5567e-04 eta 0:59:20 +epoch [43/50] batch [55/500] time 0.858 (0.899) data 0.000 (0.013) loss 1.3057 (1.0210) acc 71.8750 (73.9773) lr 1.5567e-04 eta 0:59:06 +epoch [43/50] batch [60/500] time 0.898 (0.897) data 0.000 (0.012) loss 1.0537 (0.9976) acc 71.8750 (74.4271) lr 1.5567e-04 eta 0:58:54 +epoch [43/50] batch [65/500] time 0.893 (0.896) data 0.000 (0.011) loss 1.1309 (1.0007) acc 68.7500 (74.1827) lr 1.5567e-04 eta 0:58:45 +epoch [43/50] batch [70/500] time 0.866 (0.895) data 0.000 (0.010) loss 0.8965 (0.9900) acc 81.2500 (74.5089) lr 1.5567e-04 eta 0:58:38 +epoch [43/50] batch [75/500] time 0.873 (0.895) data 0.000 (0.009) loss 0.9150 (0.9916) acc 71.8750 (74.4167) lr 1.5567e-04 eta 0:58:32 +epoch [43/50] batch [80/500] time 0.895 (0.895) data 0.000 (0.009) loss 1.2861 (0.9947) acc 71.8750 (74.2188) lr 1.5567e-04 eta 0:58:28 +epoch [43/50] batch [85/500] time 0.900 (0.895) data 0.000 (0.008) loss 0.6948 (0.9991) acc 84.3750 (74.0809) lr 1.5567e-04 eta 0:58:22 +epoch [43/50] batch [90/500] time 0.921 (0.894) data 0.000 (0.008) loss 1.7422 (1.0128) acc 59.3750 (73.7153) lr 1.5567e-04 eta 0:58:14 +epoch [43/50] batch [95/500] time 0.920 (0.894) data 0.000 (0.007) loss 1.2070 (1.0126) acc 75.0000 (73.9145) lr 1.5567e-04 eta 0:58:09 +epoch [43/50] batch [100/500] time 0.897 (0.893) data 0.000 (0.007) loss 0.4297 (0.9986) acc 90.6250 (74.1562) lr 1.5567e-04 eta 0:58:03 +epoch [43/50] batch [105/500] time 0.849 (0.893) data 0.000 (0.007) loss 0.7305 (0.9934) acc 84.3750 (74.4048) lr 1.5567e-04 eta 0:57:57 +epoch [43/50] batch [110/500] time 0.889 (0.893) data 0.000 (0.006) loss 1.6445 (1.0044) acc 71.8750 (74.4602) lr 1.5567e-04 eta 0:57:52 +epoch [43/50] batch [115/500] time 0.896 (0.892) data 0.000 (0.006) loss 0.6860 (0.9998) acc 81.2500 (74.5652) lr 1.5567e-04 eta 0:57:45 +epoch [43/50] batch [120/500] time 0.885 (0.892) data 0.000 (0.006) loss 1.0908 (1.0039) acc 71.8750 (74.4792) lr 1.5567e-04 eta 0:57:41 +epoch [43/50] batch [125/500] time 0.889 (0.892) data 0.000 (0.006) loss 1.0352 (1.0092) acc 65.6250 (74.3500) lr 1.5567e-04 eta 0:57:36 +epoch [43/50] batch [130/500] time 0.890 (0.892) data 0.000 (0.005) loss 0.4326 (1.0027) acc 84.3750 (74.5433) lr 1.5567e-04 eta 0:57:32 +epoch [43/50] batch [135/500] time 0.924 (0.892) data 0.000 (0.005) loss 1.1631 (1.0035) acc 65.6250 (74.5833) lr 1.5567e-04 eta 0:57:28 +epoch [43/50] batch [140/500] time 0.881 (0.892) data 0.000 (0.005) loss 0.7261 (1.0031) acc 81.2500 (74.4420) lr 1.5567e-04 eta 0:57:22 +epoch [43/50] batch [145/500] time 0.872 (0.891) data 0.000 (0.005) loss 1.1162 (1.0018) acc 71.8750 (74.5474) lr 1.5567e-04 eta 0:57:16 +epoch [43/50] batch [150/500] time 0.866 (0.891) data 0.000 (0.005) loss 0.6733 (1.0026) acc 71.8750 (74.5625) lr 1.5567e-04 eta 0:57:10 +epoch [43/50] batch [155/500] time 0.894 (0.892) data 0.000 (0.005) loss 1.1270 (1.0061) acc 81.2500 (74.4556) lr 1.5567e-04 eta 0:57:10 +epoch [43/50] batch [160/500] time 0.908 (0.893) data 0.000 (0.004) loss 0.8765 (1.0022) acc 75.0000 (74.4922) lr 1.5567e-04 eta 0:57:07 +epoch [43/50] batch [165/500] time 0.899 (0.892) data 0.000 (0.004) loss 0.8164 (1.0058) acc 81.2500 (74.3750) lr 1.5567e-04 eta 0:57:01 +epoch [43/50] batch [170/500] time 0.871 (0.892) data 0.000 (0.004) loss 0.5112 (1.0019) acc 87.5000 (74.3750) lr 1.5567e-04 eta 0:56:55 +epoch [43/50] batch [175/500] time 0.866 (0.891) data 0.000 (0.004) loss 1.1748 (1.0098) acc 71.8750 (74.2857) lr 1.5567e-04 eta 0:56:48 +epoch [43/50] batch [180/500] time 0.924 (0.891) data 0.000 (0.004) loss 1.0654 (1.0093) acc 75.0000 (74.2882) lr 1.5567e-04 eta 0:56:44 +epoch [43/50] batch [185/500] time 0.895 (0.891) data 0.001 (0.004) loss 0.7808 (1.0118) acc 81.2500 (74.1723) lr 1.5567e-04 eta 0:56:39 +epoch [43/50] batch [190/500] time 0.855 (0.891) data 0.000 (0.004) loss 1.0684 (1.0182) acc 65.6250 (74.0461) lr 1.5567e-04 eta 0:56:32 +epoch [43/50] batch [195/500] time 0.885 (0.890) data 0.000 (0.004) loss 1.2148 (1.0207) acc 62.5000 (73.9263) lr 1.5567e-04 eta 0:56:26 +epoch [43/50] batch [200/500] time 0.889 (0.890) data 0.000 (0.004) loss 1.2725 (1.0193) acc 62.5000 (73.9219) lr 1.5567e-04 eta 0:56:21 +epoch [43/50] batch [205/500] time 0.904 (0.890) data 0.000 (0.004) loss 0.6963 (1.0150) acc 81.2500 (74.0244) lr 1.5567e-04 eta 0:56:18 +epoch [43/50] batch [210/500] time 0.874 (0.890) data 0.000 (0.003) loss 0.7676 (1.0104) acc 81.2500 (74.1518) lr 1.5567e-04 eta 0:56:14 +epoch [43/50] batch [215/500] time 0.860 (0.890) data 0.000 (0.003) loss 1.3750 (1.0106) acc 71.8750 (74.1279) lr 1.5567e-04 eta 0:56:08 +epoch [43/50] batch [220/500] time 0.866 (0.890) data 0.000 (0.003) loss 1.2295 (1.0103) acc 62.5000 (74.0625) lr 1.5567e-04 eta 0:56:02 +epoch [43/50] batch [225/500] time 0.872 (0.890) data 0.000 (0.003) loss 1.1943 (1.0118) acc 71.8750 (74.0556) lr 1.5567e-04 eta 0:55:58 +epoch [43/50] batch [230/500] time 0.908 (0.890) data 0.000 (0.003) loss 0.6123 (1.0089) acc 81.2500 (74.0897) lr 1.5567e-04 eta 0:55:53 +epoch [43/50] batch [235/500] time 0.893 (0.890) data 0.000 (0.003) loss 1.1934 (1.0074) acc 68.7500 (74.1223) lr 1.5567e-04 eta 0:55:49 +epoch [43/50] batch [240/500] time 0.893 (0.889) data 0.000 (0.003) loss 0.8818 (1.0051) acc 78.1250 (74.0885) lr 1.5567e-04 eta 0:55:44 +epoch [43/50] batch [245/500] time 0.900 (0.889) data 0.000 (0.003) loss 0.7856 (1.0041) acc 75.0000 (74.0816) lr 1.5567e-04 eta 0:55:39 +epoch [43/50] batch [250/500] time 0.991 (0.890) data 0.000 (0.003) loss 0.8770 (1.0051) acc 75.0000 (74.0750) lr 1.5567e-04 eta 0:55:37 +epoch [43/50] batch [255/500] time 0.868 (0.890) data 0.000 (0.003) loss 1.0254 (1.0065) acc 68.7500 (73.9828) lr 1.5567e-04 eta 0:55:31 +epoch [43/50] batch [260/500] time 0.886 (0.889) data 0.000 (0.003) loss 1.0312 (1.0078) acc 71.8750 (73.9303) lr 1.5567e-04 eta 0:55:25 +epoch [43/50] batch [265/500] time 0.910 (0.889) data 0.000 (0.003) loss 1.0898 (1.0061) acc 78.1250 (74.0684) lr 1.5567e-04 eta 0:55:21 +epoch [43/50] batch [270/500] time 0.877 (0.889) data 0.000 (0.003) loss 0.6943 (1.0054) acc 84.3750 (74.0625) lr 1.5567e-04 eta 0:55:16 +epoch [43/50] batch [275/500] time 0.884 (0.889) data 0.000 (0.003) loss 1.5400 (1.0053) acc 71.8750 (74.1136) lr 1.5567e-04 eta 0:55:11 +epoch [43/50] batch [280/500] time 0.881 (0.889) data 0.000 (0.003) loss 0.7231 (1.0093) acc 71.8750 (73.9844) lr 1.5567e-04 eta 0:55:07 +epoch [43/50] batch [285/500] time 0.891 (0.889) data 0.000 (0.003) loss 1.0449 (1.0106) acc 71.8750 (74.0022) lr 1.5567e-04 eta 0:55:03 +epoch [43/50] batch [290/500] time 0.884 (0.889) data 0.000 (0.003) loss 1.3447 (1.0120) acc 75.0000 (73.9978) lr 1.5567e-04 eta 0:54:58 +epoch [43/50] batch [295/500] time 0.892 (0.890) data 0.000 (0.003) loss 0.6934 (1.0108) acc 81.2500 (74.0572) lr 1.5567e-04 eta 0:54:55 +epoch [43/50] batch [300/500] time 0.895 (0.889) data 0.000 (0.003) loss 0.5942 (1.0091) acc 87.5000 (74.1354) lr 1.5567e-04 eta 0:54:50 +epoch [43/50] batch [305/500] time 0.920 (0.890) data 0.000 (0.002) loss 2.2305 (1.0175) acc 62.5000 (74.0369) lr 1.5567e-04 eta 0:54:47 +epoch [43/50] batch [310/500] time 0.870 (0.889) data 0.000 (0.002) loss 1.5654 (1.0163) acc 53.1250 (74.0625) lr 1.5567e-04 eta 0:54:41 +epoch [43/50] batch [315/500] time 0.856 (0.889) data 0.000 (0.002) loss 1.2002 (1.0177) acc 75.0000 (74.0675) lr 1.5567e-04 eta 0:54:36 +epoch [43/50] batch [320/500] time 0.868 (0.889) data 0.000 (0.002) loss 0.8428 (1.0166) acc 75.0000 (74.0918) lr 1.5567e-04 eta 0:54:31 +epoch [43/50] batch [325/500] time 0.892 (0.889) data 0.000 (0.002) loss 0.8950 (1.0149) acc 75.0000 (74.1442) lr 1.5567e-04 eta 0:54:27 +epoch [43/50] batch [330/500] time 0.888 (0.889) data 0.000 (0.002) loss 1.3311 (1.0151) acc 71.8750 (74.1004) lr 1.5567e-04 eta 0:54:22 +epoch [43/50] batch [335/500] time 0.884 (0.889) data 0.000 (0.002) loss 0.9199 (1.0143) acc 81.2500 (74.1231) lr 1.5567e-04 eta 0:54:17 +epoch [43/50] batch [340/500] time 0.884 (0.889) data 0.000 (0.002) loss 0.8340 (1.0151) acc 75.0000 (74.0625) lr 1.5567e-04 eta 0:54:12 +epoch [43/50] batch [345/500] time 0.896 (0.889) data 0.000 (0.002) loss 0.9971 (1.0144) acc 68.7500 (74.0580) lr 1.5567e-04 eta 0:54:08 +epoch [43/50] batch [350/500] time 0.881 (0.889) data 0.000 (0.002) loss 2.0742 (1.0168) acc 62.5000 (74.0536) lr 1.5567e-04 eta 0:54:03 +epoch [43/50] batch [355/500] time 0.886 (0.889) data 0.000 (0.002) loss 1.1543 (1.0173) acc 78.1250 (74.0933) lr 1.5567e-04 eta 0:53:59 +epoch [43/50] batch [360/500] time 0.876 (0.889) data 0.000 (0.002) loss 1.3047 (1.0188) acc 71.8750 (74.0972) lr 1.5567e-04 eta 0:53:54 +epoch [43/50] batch [365/500] time 0.897 (0.889) data 0.000 (0.002) loss 0.6489 (1.0193) acc 78.1250 (74.1096) lr 1.5567e-04 eta 0:53:50 +epoch [43/50] batch [370/500] time 0.879 (0.889) data 0.000 (0.002) loss 0.9028 (1.0197) acc 75.0000 (74.0709) lr 1.5567e-04 eta 0:53:46 +epoch [43/50] batch [375/500] time 0.905 (0.889) data 0.000 (0.002) loss 1.0684 (1.0187) acc 78.1250 (74.1667) lr 1.5567e-04 eta 0:53:42 +epoch [43/50] batch [380/500] time 0.884 (0.889) data 0.000 (0.002) loss 0.8784 (1.0194) acc 84.3750 (74.1859) lr 1.5567e-04 eta 0:53:38 +epoch [43/50] batch [385/500] time 0.896 (0.889) data 0.000 (0.002) loss 0.9917 (1.0188) acc 78.1250 (74.2289) lr 1.5567e-04 eta 0:53:33 +epoch [43/50] batch [390/500] time 0.887 (0.889) data 0.000 (0.002) loss 0.7559 (1.0202) acc 75.0000 (74.1827) lr 1.5567e-04 eta 0:53:29 +epoch [43/50] batch [395/500] time 0.877 (0.889) data 0.000 (0.002) loss 0.6113 (1.0183) acc 90.6250 (74.2801) lr 1.5567e-04 eta 0:53:26 +epoch [43/50] batch [400/500] time 0.887 (0.889) data 0.000 (0.002) loss 1.0059 (1.0176) acc 84.3750 (74.3125) lr 1.5567e-04 eta 0:53:21 +epoch [43/50] batch [405/500] time 0.856 (0.889) data 0.000 (0.002) loss 1.2949 (1.0164) acc 75.0000 (74.3519) lr 1.5567e-04 eta 0:53:16 +epoch [43/50] batch [410/500] time 0.898 (0.889) data 0.000 (0.002) loss 0.8384 (1.0147) acc 84.3750 (74.4207) lr 1.5567e-04 eta 0:53:12 +epoch [43/50] batch [415/500] time 0.899 (0.889) data 0.000 (0.002) loss 0.7104 (1.0155) acc 78.1250 (74.3901) lr 1.5567e-04 eta 0:53:07 +epoch [43/50] batch [420/500] time 0.883 (0.889) data 0.000 (0.002) loss 1.7080 (1.0165) acc 65.6250 (74.3750) lr 1.5567e-04 eta 0:53:03 +epoch [43/50] batch [425/500] time 0.869 (0.889) data 0.000 (0.002) loss 1.1094 (1.0162) acc 65.6250 (74.3603) lr 1.5567e-04 eta 0:52:59 +epoch [43/50] batch [430/500] time 0.859 (0.889) data 0.000 (0.002) loss 0.6797 (1.0139) acc 81.2500 (74.3605) lr 1.5567e-04 eta 0:52:54 +epoch [43/50] batch [435/500] time 0.886 (0.889) data 0.000 (0.002) loss 0.9263 (1.0124) acc 81.2500 (74.3966) lr 1.5567e-04 eta 0:52:49 +epoch [43/50] batch [440/500] time 0.865 (0.889) data 0.000 (0.002) loss 1.0977 (1.0150) acc 71.8750 (74.3395) lr 1.5567e-04 eta 0:52:45 +epoch [43/50] batch [445/500] time 0.860 (0.889) data 0.000 (0.002) loss 1.7803 (1.0199) acc 62.5000 (74.2837) lr 1.5567e-04 eta 0:52:40 +epoch [43/50] batch [450/500] time 0.872 (0.889) data 0.000 (0.002) loss 0.6509 (1.0204) acc 87.5000 (74.2847) lr 1.5567e-04 eta 0:52:35 +epoch [43/50] batch [455/500] time 0.887 (0.889) data 0.000 (0.002) loss 1.1377 (1.0179) acc 65.6250 (74.3201) lr 1.5567e-04 eta 0:52:31 +epoch [43/50] batch [460/500] time 0.880 (0.889) data 0.000 (0.002) loss 1.1426 (1.0185) acc 71.8750 (74.3071) lr 1.5567e-04 eta 0:52:27 +epoch [43/50] batch [465/500] time 0.847 (0.889) data 0.000 (0.002) loss 1.2598 (1.0166) acc 68.7500 (74.3011) lr 1.5567e-04 eta 0:52:22 +epoch [43/50] batch [470/500] time 0.897 (0.889) data 0.000 (0.002) loss 1.3945 (1.0161) acc 68.7500 (74.2952) lr 1.5567e-04 eta 0:52:18 +epoch [43/50] batch [475/500] time 0.862 (0.889) data 0.000 (0.002) loss 1.1504 (1.0169) acc 65.6250 (74.2566) lr 1.5567e-04 eta 0:52:13 +epoch [43/50] batch [480/500] time 0.917 (0.889) data 0.000 (0.002) loss 0.8286 (1.0176) acc 81.2500 (74.2318) lr 1.5567e-04 eta 0:52:08 +epoch [43/50] batch [485/500] time 0.884 (0.889) data 0.001 (0.002) loss 1.0371 (1.0154) acc 71.8750 (74.2912) lr 1.5567e-04 eta 0:52:04 +epoch [43/50] batch [490/500] time 0.857 (0.889) data 0.000 (0.002) loss 1.0918 (1.0154) acc 78.1250 (74.3176) lr 1.5567e-04 eta 0:51:59 +epoch [43/50] batch [495/500] time 0.884 (0.889) data 0.000 (0.002) loss 0.8848 (1.0157) acc 78.1250 (74.2866) lr 1.5567e-04 eta 0:51:55 +epoch [43/50] batch [500/500] time 0.892 (0.889) data 0.000 (0.002) loss 0.9561 (1.0158) acc 71.8750 (74.2938) lr 1.2369e-04 eta 0:51:50 +epoch [44/50] batch [5/500] time 0.864 (1.016) data 0.000 (0.133) loss 1.0342 (0.9790) acc 78.1250 (75.6250) lr 1.2369e-04 eta 0:59:12 +epoch [44/50] batch [10/500] time 0.902 (0.949) data 0.000 (0.066) loss 1.1738 (0.8807) acc 75.0000 (78.7500) lr 1.2369e-04 eta 0:55:10 +epoch [44/50] batch [15/500] time 0.895 (0.927) data 0.000 (0.044) loss 1.1357 (0.9627) acc 71.8750 (74.7917) lr 1.2369e-04 eta 0:53:51 +epoch [44/50] batch [20/500] time 0.859 (0.916) data 0.000 (0.033) loss 0.6479 (1.0044) acc 78.1250 (74.8438) lr 1.2369e-04 eta 0:53:08 +epoch [44/50] batch [25/500] time 0.909 (0.918) data 0.000 (0.027) loss 1.4717 (0.9748) acc 65.6250 (75.7500) lr 1.2369e-04 eta 0:53:09 +epoch [44/50] batch [30/500] time 0.889 (0.912) data 0.000 (0.022) loss 1.0557 (0.9466) acc 68.7500 (76.2500) lr 1.2369e-04 eta 0:52:45 +epoch [44/50] batch [35/500] time 0.879 (0.907) data 0.000 (0.019) loss 0.6484 (0.9533) acc 78.1250 (76.1607) lr 1.2369e-04 eta 0:52:22 +epoch [44/50] batch [40/500] time 0.876 (0.903) data 0.000 (0.017) loss 0.7256 (0.9229) acc 81.2500 (77.0312) lr 1.2369e-04 eta 0:52:04 +epoch [44/50] batch [45/500] time 0.861 (0.901) data 0.000 (0.015) loss 0.6060 (0.9360) acc 87.5000 (76.8750) lr 1.2369e-04 eta 0:51:51 +epoch [44/50] batch [50/500] time 0.912 (0.901) data 0.000 (0.013) loss 1.5342 (0.9689) acc 68.7500 (76.4375) lr 1.2369e-04 eta 0:51:46 +epoch [44/50] batch [55/500] time 0.888 (0.901) data 0.000 (0.012) loss 0.7744 (0.9686) acc 84.3750 (76.5341) lr 1.2369e-04 eta 0:51:43 +epoch [44/50] batch [60/500] time 0.890 (0.900) data 0.000 (0.011) loss 1.0703 (0.9550) acc 71.8750 (76.9792) lr 1.2369e-04 eta 0:51:34 +epoch [44/50] batch [65/500] time 0.905 (0.900) data 0.000 (0.010) loss 1.1436 (0.9566) acc 68.7500 (76.8269) lr 1.2369e-04 eta 0:51:31 +epoch [44/50] batch [70/500] time 0.877 (0.898) data 0.000 (0.010) loss 1.2100 (0.9570) acc 71.8750 (76.7411) lr 1.2369e-04 eta 0:51:19 +epoch [44/50] batch [75/500] time 0.854 (0.896) data 0.000 (0.009) loss 1.0312 (0.9630) acc 71.8750 (76.4167) lr 1.2369e-04 eta 0:51:08 +epoch [44/50] batch [80/500] time 0.890 (0.895) data 0.000 (0.009) loss 1.0605 (0.9652) acc 71.8750 (76.2109) lr 1.2369e-04 eta 0:51:00 +epoch [44/50] batch [85/500] time 0.873 (0.896) data 0.000 (0.008) loss 0.9092 (0.9628) acc 78.1250 (76.2868) lr 1.2369e-04 eta 0:50:58 +epoch [44/50] batch [90/500] time 0.870 (0.896) data 0.000 (0.008) loss 1.3701 (0.9654) acc 65.6250 (76.1806) lr 1.2369e-04 eta 0:50:54 +epoch [44/50] batch [95/500] time 0.858 (0.896) data 0.000 (0.007) loss 1.5088 (0.9869) acc 56.2500 (75.4605) lr 1.2369e-04 eta 0:50:49 +epoch [44/50] batch [100/500] time 0.920 (0.895) data 0.000 (0.007) loss 1.0664 (0.9935) acc 62.5000 (75.0625) lr 1.2369e-04 eta 0:50:43 +epoch [44/50] batch [105/500] time 0.882 (0.895) data 0.000 (0.007) loss 0.9316 (0.9933) acc 75.0000 (75.1190) lr 1.2369e-04 eta 0:50:37 +epoch [44/50] batch [110/500] time 0.865 (0.894) data 0.000 (0.006) loss 0.8115 (0.9873) acc 75.0000 (75.3125) lr 1.2369e-04 eta 0:50:30 +epoch [44/50] batch [115/500] time 0.887 (0.894) data 0.000 (0.006) loss 1.1924 (0.9844) acc 68.7500 (75.3804) lr 1.2369e-04 eta 0:50:24 +epoch [44/50] batch [120/500] time 0.936 (0.894) data 0.000 (0.006) loss 0.8550 (0.9873) acc 78.1250 (75.3385) lr 1.2369e-04 eta 0:50:22 +epoch [44/50] batch [125/500] time 0.876 (0.894) data 0.000 (0.006) loss 0.4663 (0.9752) acc 96.8750 (75.7250) lr 1.2369e-04 eta 0:50:15 +epoch [44/50] batch [130/500] time 0.894 (0.894) data 0.000 (0.005) loss 0.9307 (0.9768) acc 75.0000 (75.7692) lr 1.2369e-04 eta 0:50:14 +epoch [44/50] batch [135/500] time 0.879 (0.894) data 0.000 (0.005) loss 1.5439 (0.9716) acc 62.5000 (75.9028) lr 1.2369e-04 eta 0:50:08 +epoch [44/50] batch [140/500] time 0.857 (0.893) data 0.000 (0.005) loss 1.4805 (0.9686) acc 65.6250 (75.8482) lr 1.2369e-04 eta 0:50:00 +epoch [44/50] batch [145/500] time 0.920 (0.893) data 0.000 (0.005) loss 1.0908 (0.9745) acc 78.1250 (75.8621) lr 1.2369e-04 eta 0:49:55 +epoch [44/50] batch [150/500] time 0.873 (0.892) data 0.000 (0.005) loss 1.1113 (0.9737) acc 65.6250 (75.8333) lr 1.2369e-04 eta 0:49:49 +epoch [44/50] batch [155/500] time 0.883 (0.892) data 0.000 (0.005) loss 0.8784 (0.9751) acc 81.2500 (75.7460) lr 1.2369e-04 eta 0:49:42 +epoch [44/50] batch [160/500] time 0.888 (0.892) data 0.000 (0.004) loss 1.1025 (0.9722) acc 71.8750 (75.8398) lr 1.2369e-04 eta 0:49:37 +epoch [44/50] batch [165/500] time 0.886 (0.891) data 0.000 (0.004) loss 1.2695 (0.9730) acc 71.8750 (75.7576) lr 1.2369e-04 eta 0:49:32 +epoch [44/50] batch [170/500] time 0.888 (0.891) data 0.000 (0.004) loss 0.6353 (0.9684) acc 87.5000 (75.8824) lr 1.2369e-04 eta 0:49:27 +epoch [44/50] batch [175/500] time 0.908 (0.891) data 0.000 (0.004) loss 1.0635 (0.9752) acc 81.2500 (75.8571) lr 1.2369e-04 eta 0:49:23 +epoch [44/50] batch [180/500] time 0.898 (0.891) data 0.000 (0.004) loss 1.2168 (0.9813) acc 65.6250 (75.6424) lr 1.2369e-04 eta 0:49:18 +epoch [44/50] batch [185/500] time 0.891 (0.891) data 0.000 (0.004) loss 1.4502 (0.9857) acc 62.5000 (75.6419) lr 1.2369e-04 eta 0:49:13 +epoch [44/50] batch [190/500] time 0.894 (0.891) data 0.000 (0.004) loss 1.0791 (0.9866) acc 71.8750 (75.5592) lr 1.2369e-04 eta 0:49:08 +epoch [44/50] batch [195/500] time 0.894 (0.891) data 0.000 (0.004) loss 1.0146 (0.9869) acc 68.7500 (75.4006) lr 1.2369e-04 eta 0:49:04 +epoch [44/50] batch [200/500] time 0.869 (0.891) data 0.000 (0.004) loss 0.6504 (0.9847) acc 78.1250 (75.3906) lr 1.2369e-04 eta 0:48:59 +epoch [44/50] batch [205/500] time 0.886 (0.891) data 0.000 (0.003) loss 0.9365 (0.9844) acc 71.8750 (75.3506) lr 1.2369e-04 eta 0:48:54 +epoch [44/50] batch [210/500] time 0.888 (0.890) data 0.000 (0.003) loss 0.9985 (0.9864) acc 78.1250 (75.2976) lr 1.2369e-04 eta 0:48:48 +epoch [44/50] batch [215/500] time 0.881 (0.890) data 0.000 (0.003) loss 0.4412 (0.9833) acc 90.6250 (75.3052) lr 1.2369e-04 eta 0:48:44 +epoch [44/50] batch [220/500] time 0.889 (0.890) data 0.000 (0.003) loss 0.8730 (0.9847) acc 84.3750 (75.2841) lr 1.2369e-04 eta 0:48:40 +epoch [44/50] batch [225/500] time 0.910 (0.890) data 0.000 (0.003) loss 0.6187 (0.9880) acc 84.3750 (75.3056) lr 1.2369e-04 eta 0:48:36 +epoch [44/50] batch [230/500] time 0.876 (0.891) data 0.000 (0.003) loss 1.1094 (0.9872) acc 62.5000 (75.2717) lr 1.2369e-04 eta 0:48:32 +epoch [44/50] batch [235/500] time 0.918 (0.891) data 0.000 (0.003) loss 1.5273 (0.9914) acc 56.2500 (75.1729) lr 1.2369e-04 eta 0:48:28 +epoch [44/50] batch [240/500] time 0.909 (0.891) data 0.000 (0.003) loss 0.4646 (0.9858) acc 87.5000 (75.2865) lr 1.2369e-04 eta 0:48:24 +epoch [44/50] batch [245/500] time 0.875 (0.891) data 0.000 (0.003) loss 0.9341 (0.9832) acc 68.7500 (75.2679) lr 1.2369e-04 eta 0:48:19 +epoch [44/50] batch [250/500] time 0.867 (0.891) data 0.000 (0.003) loss 0.9116 (0.9851) acc 65.6250 (75.1500) lr 1.2369e-04 eta 0:48:14 +epoch [44/50] batch [255/500] time 0.878 (0.890) data 0.000 (0.003) loss 1.3994 (0.9898) acc 56.2500 (75.0000) lr 1.2369e-04 eta 0:48:08 +epoch [44/50] batch [260/500] time 0.876 (0.890) data 0.000 (0.003) loss 1.3965 (0.9968) acc 65.6250 (74.8678) lr 1.2369e-04 eta 0:48:03 +epoch [44/50] batch [265/500] time 0.872 (0.890) data 0.000 (0.003) loss 1.3350 (0.9957) acc 71.8750 (74.8349) lr 1.2369e-04 eta 0:47:58 +epoch [44/50] batch [270/500] time 0.875 (0.890) data 0.000 (0.003) loss 1.4873 (0.9973) acc 65.6250 (74.8032) lr 1.2369e-04 eta 0:47:53 +epoch [44/50] batch [275/500] time 0.898 (0.890) data 0.000 (0.003) loss 1.2627 (0.9952) acc 65.6250 (74.8182) lr 1.2369e-04 eta 0:47:49 +epoch [44/50] batch [280/500] time 0.888 (0.889) data 0.000 (0.003) loss 1.2607 (0.9994) acc 62.5000 (74.7321) lr 1.2369e-04 eta 0:47:43 +epoch [44/50] batch [285/500] time 0.873 (0.889) data 0.000 (0.003) loss 0.8208 (0.9988) acc 81.2500 (74.7149) lr 1.2369e-04 eta 0:47:38 +epoch [44/50] batch [290/500] time 0.874 (0.889) data 0.000 (0.003) loss 1.8760 (1.0012) acc 62.5000 (74.7306) lr 1.2369e-04 eta 0:47:34 +epoch [44/50] batch [295/500] time 0.883 (0.889) data 0.000 (0.002) loss 1.5400 (1.0025) acc 68.7500 (74.6928) lr 1.2369e-04 eta 0:47:29 +epoch [44/50] batch [300/500] time 0.892 (0.889) data 0.000 (0.002) loss 1.3604 (1.0032) acc 68.7500 (74.6875) lr 1.2369e-04 eta 0:47:25 +epoch [44/50] batch [305/500] time 0.877 (0.889) data 0.000 (0.002) loss 1.4424 (1.0036) acc 62.5000 (74.6414) lr 1.2369e-04 eta 0:47:20 +epoch [44/50] batch [310/500] time 0.881 (0.889) data 0.000 (0.002) loss 0.4670 (0.9984) acc 81.2500 (74.7278) lr 1.2369e-04 eta 0:47:15 +epoch [44/50] batch [315/500] time 0.873 (0.889) data 0.000 (0.002) loss 0.9199 (1.0005) acc 71.8750 (74.6726) lr 1.2369e-04 eta 0:47:10 +epoch [44/50] batch [320/500] time 0.864 (0.888) data 0.000 (0.002) loss 0.7573 (1.0036) acc 81.2500 (74.6094) lr 1.2369e-04 eta 0:47:05 +epoch [44/50] batch [325/500] time 0.905 (0.888) data 0.000 (0.002) loss 0.5161 (1.0028) acc 84.3750 (74.6827) lr 1.2369e-04 eta 0:47:00 +epoch [44/50] batch [330/500] time 0.881 (0.888) data 0.000 (0.002) loss 1.2275 (1.0023) acc 71.8750 (74.6970) lr 1.2369e-04 eta 0:46:56 +epoch [44/50] batch [335/500] time 0.903 (0.888) data 0.000 (0.002) loss 1.0293 (0.9998) acc 81.2500 (74.7201) lr 1.2369e-04 eta 0:46:51 +epoch [44/50] batch [340/500] time 0.918 (0.888) data 0.000 (0.002) loss 1.0840 (0.9992) acc 71.8750 (74.7335) lr 1.2369e-04 eta 0:46:47 +epoch [44/50] batch [345/500] time 0.913 (0.889) data 0.000 (0.002) loss 1.9717 (0.9989) acc 53.1250 (74.6377) lr 1.2369e-04 eta 0:46:43 +epoch [44/50] batch [350/500] time 0.873 (0.889) data 0.000 (0.002) loss 1.0322 (1.0009) acc 78.1250 (74.5804) lr 1.2369e-04 eta 0:46:39 +epoch [44/50] batch [355/500] time 0.856 (0.889) data 0.000 (0.002) loss 1.5439 (1.0013) acc 59.3750 (74.5687) lr 1.2369e-04 eta 0:46:34 +epoch [44/50] batch [360/500] time 0.874 (0.888) data 0.000 (0.002) loss 0.7944 (0.9986) acc 75.0000 (74.6354) lr 1.2369e-04 eta 0:46:29 +epoch [44/50] batch [365/500] time 0.886 (0.889) data 0.000 (0.002) loss 0.8179 (0.9967) acc 71.8750 (74.5976) lr 1.2369e-04 eta 0:46:25 +epoch [44/50] batch [370/500] time 0.980 (0.889) data 0.000 (0.002) loss 1.2666 (0.9971) acc 68.7500 (74.6030) lr 1.2369e-04 eta 0:46:21 +epoch [44/50] batch [375/500] time 0.889 (0.889) data 0.000 (0.002) loss 1.1455 (0.9976) acc 71.8750 (74.6000) lr 1.2369e-04 eta 0:46:17 +epoch [44/50] batch [380/500] time 0.862 (0.889) data 0.000 (0.002) loss 1.2998 (1.0005) acc 71.8750 (74.5641) lr 1.2369e-04 eta 0:46:12 +epoch [44/50] batch [385/500] time 0.863 (0.888) data 0.000 (0.002) loss 1.1699 (0.9994) acc 68.7500 (74.6185) lr 1.2369e-04 eta 0:46:07 +epoch [44/50] batch [390/500] time 0.905 (0.888) data 0.000 (0.002) loss 1.0908 (1.0000) acc 84.3750 (74.5833) lr 1.2369e-04 eta 0:46:03 +epoch [44/50] batch [395/500] time 0.890 (0.889) data 0.000 (0.002) loss 1.0430 (1.0000) acc 78.1250 (74.5886) lr 1.2369e-04 eta 0:45:58 +epoch [44/50] batch [400/500] time 0.883 (0.889) data 0.000 (0.002) loss 1.1016 (1.0004) acc 68.7500 (74.5938) lr 1.2369e-04 eta 0:45:54 +epoch [44/50] batch [405/500] time 0.909 (0.889) data 0.000 (0.002) loss 1.5088 (1.0019) acc 62.5000 (74.5679) lr 1.2369e-04 eta 0:45:49 +epoch [44/50] batch [410/500] time 0.902 (0.888) data 0.000 (0.002) loss 0.5649 (1.0031) acc 87.5000 (74.5732) lr 1.2369e-04 eta 0:45:45 +epoch [44/50] batch [415/500] time 0.899 (0.889) data 0.000 (0.002) loss 0.6880 (1.0018) acc 81.2500 (74.6084) lr 1.2369e-04 eta 0:45:41 +epoch [44/50] batch [420/500] time 0.879 (0.889) data 0.000 (0.002) loss 1.1787 (1.0054) acc 71.8750 (74.5685) lr 1.2369e-04 eta 0:45:37 +epoch [44/50] batch [425/500] time 0.884 (0.889) data 0.000 (0.002) loss 1.7021 (1.0056) acc 65.6250 (74.5735) lr 1.2369e-04 eta 0:45:32 +epoch [44/50] batch [430/500] time 0.896 (0.889) data 0.000 (0.002) loss 0.8496 (1.0051) acc 75.0000 (74.5712) lr 1.2369e-04 eta 0:45:27 +epoch [44/50] batch [435/500] time 0.878 (0.888) data 0.000 (0.002) loss 0.6919 (1.0040) acc 78.1250 (74.5833) lr 1.2369e-04 eta 0:45:22 +epoch [44/50] batch [440/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.7446 (1.0008) acc 75.0000 (74.6378) lr 1.2369e-04 eta 0:45:18 +epoch [44/50] batch [445/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.1113 (1.0023) acc 75.0000 (74.6138) lr 1.2369e-04 eta 0:45:13 +epoch [44/50] batch [450/500] time 0.881 (0.888) data 0.000 (0.002) loss 0.7466 (1.0011) acc 75.0000 (74.6389) lr 1.2369e-04 eta 0:45:09 +epoch [44/50] batch [455/500] time 0.886 (0.888) data 0.000 (0.002) loss 0.9165 (1.0025) acc 75.0000 (74.5742) lr 1.2369e-04 eta 0:45:04 +epoch [44/50] batch [460/500] time 0.863 (0.888) data 0.000 (0.002) loss 0.5581 (1.0013) acc 84.3750 (74.6332) lr 1.2369e-04 eta 0:45:00 +epoch [44/50] batch [465/500] time 0.893 (0.888) data 0.000 (0.002) loss 1.2979 (0.9984) acc 59.3750 (74.6640) lr 1.2369e-04 eta 0:44:55 +epoch [44/50] batch [470/500] time 0.876 (0.888) data 0.000 (0.002) loss 0.8853 (0.9970) acc 75.0000 (74.6676) lr 1.2369e-04 eta 0:44:50 +epoch [44/50] batch [475/500] time 0.884 (0.888) data 0.000 (0.002) loss 1.0342 (0.9973) acc 81.2500 (74.6711) lr 1.2369e-04 eta 0:44:45 +epoch [44/50] batch [480/500] time 0.877 (0.888) data 0.000 (0.002) loss 0.8706 (0.9971) acc 81.2500 (74.6875) lr 1.2369e-04 eta 0:44:40 +epoch [44/50] batch [485/500] time 0.874 (0.888) data 0.000 (0.002) loss 1.2559 (0.9994) acc 78.1250 (74.6327) lr 1.2369e-04 eta 0:44:36 +epoch [44/50] batch [490/500] time 0.879 (0.888) data 0.000 (0.002) loss 0.7949 (0.9999) acc 78.1250 (74.6429) lr 1.2369e-04 eta 0:44:31 +epoch [44/50] batch [495/500] time 0.888 (0.887) data 0.000 (0.002) loss 1.2783 (1.0004) acc 62.5000 (74.6149) lr 1.2369e-04 eta 0:44:26 +epoch [44/50] batch [500/500] time 0.854 (0.887) data 0.000 (0.002) loss 1.3994 (1.0045) acc 65.6250 (74.5250) lr 9.5173e-05 eta 0:44:21 +epoch [45/50] batch [5/500] time 0.904 (1.004) data 0.000 (0.125) loss 0.5034 (0.7992) acc 87.5000 (76.8750) lr 9.5173e-05 eta 0:50:06 +epoch [45/50] batch [10/500] time 0.918 (0.962) data 0.001 (0.063) loss 0.9961 (0.9450) acc 68.7500 (73.7500) lr 9.5173e-05 eta 0:47:56 +epoch [45/50] batch [15/500] time 0.900 (0.934) data 0.000 (0.042) loss 0.5713 (1.0242) acc 75.0000 (72.2917) lr 9.5173e-05 eta 0:46:27 +epoch [45/50] batch [20/500] time 0.906 (0.921) data 0.000 (0.031) loss 0.3630 (0.9473) acc 90.6250 (74.5312) lr 9.5173e-05 eta 0:45:44 +epoch [45/50] batch [25/500] time 0.878 (0.912) data 0.000 (0.025) loss 1.3506 (0.9978) acc 71.8750 (74.2500) lr 9.5173e-05 eta 0:45:14 +epoch [45/50] batch [30/500] time 0.887 (0.909) data 0.000 (0.021) loss 1.1035 (1.0147) acc 81.2500 (74.8958) lr 9.5173e-05 eta 0:44:59 +epoch [45/50] batch [35/500] time 0.877 (0.907) data 0.000 (0.018) loss 1.0869 (1.0098) acc 81.2500 (75.1786) lr 9.5173e-05 eta 0:44:48 +epoch [45/50] batch [40/500] time 0.911 (0.906) data 0.000 (0.016) loss 1.0977 (1.0118) acc 81.2500 (75.3125) lr 9.5173e-05 eta 0:44:41 +epoch [45/50] batch [45/500] time 0.874 (0.902) data 0.000 (0.014) loss 0.8477 (1.0172) acc 75.0000 (75.3472) lr 9.5173e-05 eta 0:44:24 +epoch [45/50] batch [50/500] time 0.885 (0.900) data 0.000 (0.013) loss 0.8394 (0.9954) acc 75.0000 (75.5000) lr 9.5173e-05 eta 0:44:14 +epoch [45/50] batch [55/500] time 0.861 (0.899) data 0.000 (0.012) loss 2.0723 (1.0153) acc 46.8750 (74.9432) lr 9.5173e-05 eta 0:44:06 +epoch [45/50] batch [60/500] time 0.874 (0.897) data 0.000 (0.011) loss 1.0752 (1.0067) acc 68.7500 (75.1562) lr 9.5173e-05 eta 0:43:57 +epoch [45/50] batch [65/500] time 0.886 (0.897) data 0.000 (0.010) loss 1.1875 (1.0311) acc 71.8750 (74.7596) lr 9.5173e-05 eta 0:43:53 +epoch [45/50] batch [70/500] time 0.878 (0.897) data 0.000 (0.009) loss 0.7471 (1.0288) acc 78.1250 (74.6429) lr 9.5173e-05 eta 0:43:46 +epoch [45/50] batch [75/500] time 0.885 (0.895) data 0.000 (0.009) loss 0.6201 (1.0310) acc 84.3750 (74.6667) lr 9.5173e-05 eta 0:43:39 +epoch [45/50] batch [80/500] time 0.897 (0.896) data 0.000 (0.008) loss 0.8306 (1.0326) acc 68.7500 (74.6484) lr 9.5173e-05 eta 0:43:35 +epoch [45/50] batch [85/500] time 0.901 (0.895) data 0.000 (0.008) loss 0.9707 (1.0201) acc 71.8750 (74.7059) lr 9.5173e-05 eta 0:43:30 +epoch [45/50] batch [90/500] time 0.905 (0.896) data 0.000 (0.007) loss 0.9194 (1.0250) acc 75.0000 (74.5833) lr 9.5173e-05 eta 0:43:26 +epoch [45/50] batch [95/500] time 0.878 (0.896) data 0.000 (0.007) loss 0.6631 (1.0169) acc 84.3750 (74.6382) lr 9.5173e-05 eta 0:43:22 +epoch [45/50] batch [100/500] time 0.872 (0.895) data 0.000 (0.006) loss 0.8057 (1.0127) acc 81.2500 (74.5938) lr 9.5173e-05 eta 0:43:15 +epoch [45/50] batch [105/500] time 0.876 (0.894) data 0.000 (0.006) loss 1.3301 (1.0144) acc 62.5000 (74.5238) lr 9.5173e-05 eta 0:43:08 +epoch [45/50] batch [110/500] time 0.862 (0.894) data 0.000 (0.006) loss 1.8066 (1.0202) acc 62.5000 (74.3182) lr 9.5173e-05 eta 0:43:03 +epoch [45/50] batch [115/500] time 0.871 (0.894) data 0.000 (0.006) loss 1.0977 (1.0153) acc 75.0000 (74.2935) lr 9.5173e-05 eta 0:42:59 +epoch [45/50] batch [120/500] time 0.882 (0.893) data 0.000 (0.005) loss 1.2725 (1.0102) acc 75.0000 (74.4271) lr 9.5173e-05 eta 0:42:52 +epoch [45/50] batch [125/500] time 0.911 (0.893) data 0.000 (0.005) loss 1.2529 (1.0131) acc 71.8750 (74.3750) lr 9.5173e-05 eta 0:42:46 +epoch [45/50] batch [130/500] time 0.901 (0.892) data 0.000 (0.005) loss 2.0527 (1.0268) acc 53.1250 (74.1106) lr 9.5173e-05 eta 0:42:40 +epoch [45/50] batch [135/500] time 0.902 (0.892) data 0.000 (0.005) loss 0.5933 (1.0212) acc 84.3750 (74.1898) lr 9.5173e-05 eta 0:42:35 +epoch [45/50] batch [140/500] time 0.898 (0.892) data 0.000 (0.005) loss 0.2369 (1.0121) acc 93.7500 (74.3527) lr 9.5173e-05 eta 0:42:31 +epoch [45/50] batch [145/500] time 0.856 (0.892) data 0.000 (0.005) loss 1.2227 (1.0106) acc 71.8750 (74.3966) lr 9.5173e-05 eta 0:42:26 +epoch [45/50] batch [150/500] time 0.903 (0.892) data 0.000 (0.004) loss 0.7676 (1.0101) acc 81.2500 (74.3333) lr 9.5173e-05 eta 0:42:21 +epoch [45/50] batch [155/500] time 0.871 (0.892) data 0.000 (0.004) loss 1.0850 (1.0077) acc 75.0000 (74.4355) lr 9.5173e-05 eta 0:42:16 +epoch [45/50] batch [160/500] time 0.879 (0.891) data 0.000 (0.004) loss 0.8311 (1.0016) acc 71.8750 (74.5312) lr 9.5173e-05 eta 0:42:11 +epoch [45/50] batch [165/500] time 0.887 (0.892) data 0.000 (0.004) loss 1.0947 (0.9984) acc 68.7500 (74.5833) lr 9.5173e-05 eta 0:42:07 +epoch [45/50] batch [170/500] time 0.868 (0.891) data 0.000 (0.004) loss 1.0645 (1.0010) acc 71.8750 (74.5404) lr 9.5173e-05 eta 0:42:02 +epoch [45/50] batch [175/500] time 0.876 (0.891) data 0.000 (0.004) loss 0.9565 (1.0026) acc 75.0000 (74.4643) lr 9.5173e-05 eta 0:41:56 +epoch [45/50] batch [180/500] time 0.901 (0.890) data 0.000 (0.004) loss 0.7026 (0.9986) acc 75.0000 (74.4792) lr 9.5173e-05 eta 0:41:50 +epoch [45/50] batch [185/500] time 0.922 (0.890) data 0.000 (0.004) loss 1.1533 (0.9974) acc 62.5000 (74.5101) lr 9.5173e-05 eta 0:41:46 +epoch [45/50] batch [190/500] time 0.892 (0.891) data 0.000 (0.004) loss 1.4717 (1.0016) acc 68.7500 (74.5066) lr 9.5173e-05 eta 0:41:42 +epoch [45/50] batch [195/500] time 0.899 (0.890) data 0.000 (0.003) loss 1.1133 (1.0031) acc 71.8750 (74.4712) lr 9.5173e-05 eta 0:41:37 +epoch [45/50] batch [200/500] time 0.885 (0.890) data 0.000 (0.003) loss 0.9116 (0.9982) acc 78.1250 (74.6562) lr 9.5173e-05 eta 0:41:33 +epoch [45/50] batch [205/500] time 0.863 (0.890) data 0.000 (0.003) loss 1.1592 (0.9958) acc 75.0000 (74.7561) lr 9.5173e-05 eta 0:41:28 +epoch [45/50] batch [210/500] time 0.880 (0.890) data 0.000 (0.003) loss 0.5972 (0.9946) acc 81.2500 (74.7768) lr 9.5173e-05 eta 0:41:23 +epoch [45/50] batch [215/500] time 0.884 (0.890) data 0.000 (0.003) loss 1.3438 (0.9996) acc 65.6250 (74.7093) lr 9.5173e-05 eta 0:41:19 +epoch [45/50] batch [220/500] time 0.881 (0.890) data 0.000 (0.003) loss 0.6675 (0.9978) acc 84.3750 (74.7443) lr 9.5173e-05 eta 0:41:14 +epoch [45/50] batch [225/500] time 0.881 (0.890) data 0.000 (0.003) loss 1.1846 (0.9963) acc 59.3750 (74.6389) lr 9.5173e-05 eta 0:41:09 +epoch [45/50] batch [230/500] time 0.903 (0.890) data 0.000 (0.003) loss 0.3286 (0.9942) acc 84.3750 (74.5924) lr 9.5173e-05 eta 0:41:05 +epoch [45/50] batch [235/500] time 0.908 (0.890) data 0.001 (0.003) loss 1.1318 (0.9945) acc 68.7500 (74.6011) lr 9.5173e-05 eta 0:41:01 +epoch [45/50] batch [240/500] time 0.883 (0.890) data 0.000 (0.003) loss 1.1885 (0.9976) acc 71.8750 (74.5312) lr 9.5173e-05 eta 0:40:56 +epoch [45/50] batch [245/500] time 0.892 (0.890) data 0.000 (0.003) loss 1.1514 (0.9983) acc 65.6250 (74.4898) lr 9.5173e-05 eta 0:40:51 +epoch [45/50] batch [250/500] time 0.902 (0.890) data 0.000 (0.003) loss 1.5068 (1.0047) acc 65.6250 (74.3750) lr 9.5173e-05 eta 0:40:47 +epoch [45/50] batch [255/500] time 0.881 (0.890) data 0.000 (0.003) loss 0.7871 (1.0059) acc 78.1250 (74.3137) lr 9.5173e-05 eta 0:40:42 +epoch [45/50] batch [260/500] time 0.898 (0.890) data 0.000 (0.003) loss 0.9658 (1.0032) acc 71.8750 (74.3149) lr 9.5173e-05 eta 0:40:39 +epoch [45/50] batch [265/500] time 0.853 (0.890) data 0.000 (0.003) loss 1.2227 (1.0022) acc 78.1250 (74.3750) lr 9.5173e-05 eta 0:40:33 +epoch [45/50] batch [270/500] time 0.882 (0.890) data 0.000 (0.003) loss 0.5625 (1.0000) acc 87.5000 (74.4329) lr 9.5173e-05 eta 0:40:29 +epoch [45/50] batch [275/500] time 0.900 (0.890) data 0.000 (0.002) loss 1.1396 (0.9990) acc 78.1250 (74.5568) lr 9.5173e-05 eta 0:40:25 +epoch [45/50] batch [280/500] time 0.899 (0.890) data 0.000 (0.002) loss 1.0938 (0.9984) acc 71.8750 (74.5982) lr 9.5173e-05 eta 0:40:21 +epoch [45/50] batch [285/500] time 0.918 (0.890) data 0.000 (0.002) loss 0.5820 (0.9999) acc 81.2500 (74.5724) lr 9.5173e-05 eta 0:40:16 +epoch [45/50] batch [290/500] time 0.885 (0.890) data 0.000 (0.002) loss 0.7446 (1.0016) acc 84.3750 (74.6013) lr 9.5173e-05 eta 0:40:11 +epoch [45/50] batch [295/500] time 0.880 (0.890) data 0.000 (0.002) loss 0.7534 (0.9989) acc 81.2500 (74.6716) lr 9.5173e-05 eta 0:40:07 +epoch [45/50] batch [300/500] time 0.868 (0.890) data 0.000 (0.002) loss 0.9893 (0.9981) acc 71.8750 (74.6875) lr 9.5173e-05 eta 0:40:02 +epoch [45/50] batch [305/500] time 0.906 (0.890) data 0.000 (0.002) loss 1.1768 (0.9980) acc 65.6250 (74.7131) lr 9.5173e-05 eta 0:39:57 +epoch [45/50] batch [310/500] time 0.890 (0.890) data 0.000 (0.002) loss 0.8516 (0.9977) acc 75.0000 (74.7278) lr 9.5173e-05 eta 0:39:53 +epoch [45/50] batch [315/500] time 0.861 (0.890) data 0.000 (0.002) loss 0.5308 (0.9990) acc 81.2500 (74.7123) lr 9.5173e-05 eta 0:39:48 +epoch [45/50] batch [320/500] time 0.886 (0.890) data 0.000 (0.002) loss 1.1367 (0.9979) acc 65.6250 (74.7168) lr 9.5173e-05 eta 0:39:43 +epoch [45/50] batch [325/500] time 0.903 (0.889) data 0.000 (0.002) loss 1.5088 (0.9986) acc 65.6250 (74.7115) lr 9.5173e-05 eta 0:39:39 +epoch [45/50] batch [330/500] time 0.878 (0.889) data 0.000 (0.002) loss 1.2676 (1.0003) acc 68.7500 (74.6875) lr 9.5173e-05 eta 0:39:34 +epoch [45/50] batch [335/500] time 0.889 (0.889) data 0.000 (0.002) loss 0.4990 (1.0001) acc 81.2500 (74.6922) lr 9.5173e-05 eta 0:39:30 +epoch [45/50] batch [340/500] time 0.876 (0.889) data 0.000 (0.002) loss 1.0176 (0.9988) acc 78.1250 (74.6691) lr 9.5173e-05 eta 0:39:25 +epoch [45/50] batch [345/500] time 0.915 (0.889) data 0.000 (0.002) loss 1.2227 (0.9978) acc 71.8750 (74.6920) lr 9.5173e-05 eta 0:39:20 +epoch [45/50] batch [350/500] time 0.891 (0.889) data 0.003 (0.002) loss 1.5586 (1.0029) acc 59.3750 (74.5893) lr 9.5173e-05 eta 0:39:16 +epoch [45/50] batch [355/500] time 1.009 (0.890) data 0.000 (0.002) loss 0.3618 (0.9983) acc 93.7500 (74.7447) lr 9.5173e-05 eta 0:39:13 +epoch [45/50] batch [360/500] time 0.865 (0.890) data 0.000 (0.002) loss 0.9385 (0.9983) acc 71.8750 (74.7222) lr 9.5173e-05 eta 0:39:08 +epoch [45/50] batch [365/500] time 0.868 (0.889) data 0.000 (0.002) loss 1.1875 (1.0000) acc 68.7500 (74.7346) lr 9.5173e-05 eta 0:39:03 +epoch [45/50] batch [370/500] time 0.900 (0.889) data 0.000 (0.002) loss 1.1953 (0.9996) acc 68.7500 (74.7382) lr 9.5173e-05 eta 0:38:59 +epoch [45/50] batch [375/500] time 0.888 (0.889) data 0.000 (0.002) loss 1.0791 (1.0023) acc 78.1250 (74.7417) lr 9.5173e-05 eta 0:38:54 +epoch [45/50] batch [380/500] time 0.858 (0.889) data 0.000 (0.002) loss 0.7124 (1.0050) acc 75.0000 (74.6711) lr 9.5173e-05 eta 0:38:49 +epoch [45/50] batch [385/500] time 0.884 (0.889) data 0.000 (0.002) loss 1.1797 (1.0028) acc 62.5000 (74.6997) lr 9.5173e-05 eta 0:38:45 +epoch [45/50] batch [390/500] time 0.859 (0.889) data 0.000 (0.002) loss 0.7695 (1.0024) acc 78.1250 (74.6875) lr 9.5173e-05 eta 0:38:39 +epoch [45/50] batch [395/500] time 0.868 (0.889) data 0.000 (0.002) loss 0.6831 (1.0007) acc 87.5000 (74.7152) lr 9.5173e-05 eta 0:38:35 +epoch [45/50] batch [400/500] time 0.862 (0.889) data 0.000 (0.002) loss 1.4258 (1.0026) acc 75.0000 (74.6719) lr 9.5173e-05 eta 0:38:30 +epoch [45/50] batch [405/500] time 0.899 (0.889) data 0.000 (0.002) loss 1.3652 (1.0039) acc 62.5000 (74.6219) lr 9.5173e-05 eta 0:38:26 +epoch [45/50] batch [410/500] time 0.899 (0.889) data 0.000 (0.002) loss 1.0479 (1.0054) acc 78.1250 (74.6494) lr 9.5173e-05 eta 0:38:22 +epoch [45/50] batch [415/500] time 0.868 (0.889) data 0.000 (0.002) loss 1.0742 (1.0026) acc 84.3750 (74.7139) lr 9.5173e-05 eta 0:38:17 +epoch [45/50] batch [420/500] time 0.880 (0.889) data 0.000 (0.002) loss 0.9585 (1.0013) acc 65.6250 (74.7173) lr 9.5173e-05 eta 0:38:13 +epoch [45/50] batch [425/500] time 0.897 (0.889) data 0.000 (0.002) loss 1.2539 (1.0006) acc 65.6250 (74.7426) lr 9.5173e-05 eta 0:38:08 +epoch [45/50] batch [430/500] time 0.896 (0.889) data 0.000 (0.002) loss 0.9023 (1.0033) acc 78.1250 (74.6802) lr 9.5173e-05 eta 0:38:03 +epoch [45/50] batch [435/500] time 0.881 (0.889) data 0.001 (0.002) loss 0.7749 (1.0050) acc 68.7500 (74.6408) lr 9.5173e-05 eta 0:37:59 +epoch [45/50] batch [440/500] time 0.858 (0.889) data 0.000 (0.002) loss 0.5288 (1.0042) acc 84.3750 (74.6520) lr 9.5173e-05 eta 0:37:54 +epoch [45/50] batch [445/500] time 0.867 (0.889) data 0.000 (0.002) loss 0.7524 (1.0050) acc 78.1250 (74.6208) lr 9.5173e-05 eta 0:37:50 +epoch [45/50] batch [450/500] time 0.868 (0.889) data 0.000 (0.002) loss 0.7026 (1.0034) acc 78.1250 (74.6042) lr 9.5173e-05 eta 0:37:45 +epoch [45/50] batch [455/500] time 0.887 (0.889) data 0.000 (0.002) loss 1.0068 (1.0039) acc 68.7500 (74.6016) lr 9.5173e-05 eta 0:37:41 +epoch [45/50] batch [460/500] time 0.898 (0.889) data 0.000 (0.002) loss 1.4307 (1.0048) acc 68.7500 (74.6060) lr 9.5173e-05 eta 0:37:36 +epoch [45/50] batch [465/500] time 0.879 (0.888) data 0.000 (0.002) loss 1.3916 (1.0066) acc 68.7500 (74.6169) lr 9.5173e-05 eta 0:37:32 +epoch [45/50] batch [470/500] time 0.838 (0.888) data 0.000 (0.002) loss 0.7700 (1.0071) acc 84.3750 (74.5878) lr 9.5173e-05 eta 0:37:27 +epoch [45/50] batch [475/500] time 0.886 (0.888) data 0.000 (0.002) loss 1.4082 (1.0085) acc 71.8750 (74.5592) lr 9.5173e-05 eta 0:37:22 +epoch [45/50] batch [480/500] time 0.892 (0.888) data 0.000 (0.002) loss 1.2305 (1.0090) acc 65.6250 (74.5508) lr 9.5173e-05 eta 0:37:18 +epoch [45/50] batch [485/500] time 0.860 (0.888) data 0.001 (0.002) loss 1.0811 (1.0075) acc 71.8750 (74.5876) lr 9.5173e-05 eta 0:37:13 +epoch [45/50] batch [490/500] time 0.870 (0.888) data 0.000 (0.002) loss 0.3579 (1.0056) acc 90.6250 (74.6110) lr 9.5173e-05 eta 0:37:09 +epoch [45/50] batch [495/500] time 0.861 (0.888) data 0.000 (0.001) loss 1.1025 (1.0056) acc 78.1250 (74.5896) lr 9.5173e-05 eta 0:37:04 +epoch [45/50] batch [500/500] time 0.919 (0.888) data 0.000 (0.001) loss 0.7642 (1.0040) acc 78.1250 (74.6500) lr 7.0224e-05 eta 0:37:00 +epoch [46/50] batch [5/500] time 0.877 (1.012) data 0.000 (0.131) loss 1.2939 (1.1392) acc 71.8750 (75.6250) lr 7.0224e-05 eta 0:42:05 +epoch [46/50] batch [10/500] time 0.906 (0.952) data 0.000 (0.065) loss 0.7964 (0.9668) acc 78.1250 (76.2500) lr 7.0224e-05 eta 0:39:29 +epoch [46/50] batch [15/500] time 0.897 (0.935) data 0.000 (0.044) loss 0.8037 (0.9340) acc 75.0000 (76.6667) lr 7.0224e-05 eta 0:38:42 +epoch [46/50] batch [20/500] time 0.886 (0.922) data 0.001 (0.033) loss 1.2529 (0.9963) acc 71.8750 (75.9375) lr 7.0224e-05 eta 0:38:06 +epoch [46/50] batch [25/500] time 0.895 (0.917) data 0.000 (0.026) loss 0.8892 (1.0241) acc 81.2500 (75.1250) lr 7.0224e-05 eta 0:37:50 +epoch [46/50] batch [30/500] time 0.883 (0.912) data 0.001 (0.022) loss 1.1533 (1.0090) acc 75.0000 (74.7917) lr 7.0224e-05 eta 0:37:32 +epoch [46/50] batch [35/500] time 0.885 (0.909) data 0.000 (0.019) loss 1.2920 (1.0312) acc 56.2500 (74.1071) lr 7.0224e-05 eta 0:37:21 +epoch [46/50] batch [40/500] time 0.898 (0.909) data 0.000 (0.017) loss 0.9429 (1.0127) acc 68.7500 (74.1406) lr 7.0224e-05 eta 0:37:16 +epoch [46/50] batch [45/500] time 0.884 (0.907) data 0.000 (0.015) loss 0.7612 (1.0039) acc 84.3750 (74.7222) lr 7.0224e-05 eta 0:37:05 +epoch [46/50] batch [50/500] time 0.860 (0.904) data 0.000 (0.013) loss 0.6875 (1.0014) acc 87.5000 (74.9375) lr 7.0224e-05 eta 0:36:55 +epoch [46/50] batch [55/500] time 0.914 (0.903) data 0.000 (0.012) loss 1.0605 (1.0097) acc 75.0000 (74.7727) lr 7.0224e-05 eta 0:36:46 +epoch [46/50] batch [60/500] time 0.891 (0.902) data 0.000 (0.011) loss 0.8574 (1.0010) acc 81.2500 (74.7396) lr 7.0224e-05 eta 0:36:41 +epoch [46/50] batch [65/500] time 0.882 (0.901) data 0.000 (0.010) loss 1.1113 (1.0043) acc 75.0000 (74.6635) lr 7.0224e-05 eta 0:36:34 +epoch [46/50] batch [70/500] time 0.894 (0.899) data 0.000 (0.010) loss 1.4453 (1.0130) acc 65.6250 (74.5089) lr 7.0224e-05 eta 0:36:24 +epoch [46/50] batch [75/500] time 0.872 (0.898) data 0.000 (0.009) loss 0.8428 (1.0193) acc 81.2500 (74.3750) lr 7.0224e-05 eta 0:36:17 +epoch [46/50] batch [80/500] time 0.880 (0.897) data 0.000 (0.008) loss 0.7305 (1.0111) acc 75.0000 (74.3750) lr 7.0224e-05 eta 0:36:10 +epoch [46/50] batch [85/500] time 0.884 (0.896) data 0.000 (0.008) loss 0.6304 (1.0112) acc 84.3750 (74.3015) lr 7.0224e-05 eta 0:36:04 +epoch [46/50] batch [90/500] time 0.861 (0.895) data 0.000 (0.007) loss 1.0010 (1.0003) acc 75.0000 (74.3403) lr 7.0224e-05 eta 0:35:57 +epoch [46/50] batch [95/500] time 0.876 (0.895) data 0.000 (0.007) loss 1.1895 (0.9959) acc 71.8750 (74.4737) lr 7.0224e-05 eta 0:35:51 +epoch [46/50] batch [100/500] time 0.887 (0.894) data 0.000 (0.007) loss 0.8662 (1.0034) acc 75.0000 (74.2812) lr 7.0224e-05 eta 0:35:45 +epoch [46/50] batch [105/500] time 0.889 (0.895) data 0.000 (0.006) loss 1.0156 (0.9940) acc 71.8750 (74.4940) lr 7.0224e-05 eta 0:35:42 +epoch [46/50] batch [110/500] time 0.872 (0.894) data 0.000 (0.006) loss 0.6948 (0.9935) acc 78.1250 (74.4886) lr 7.0224e-05 eta 0:35:37 +epoch [46/50] batch [115/500] time 0.880 (0.894) data 0.000 (0.006) loss 0.6318 (0.9932) acc 78.1250 (74.4837) lr 7.0224e-05 eta 0:35:32 +epoch [46/50] batch [120/500] time 0.884 (0.893) data 0.000 (0.006) loss 0.9360 (0.9941) acc 81.2500 (74.5312) lr 7.0224e-05 eta 0:35:25 +epoch [46/50] batch [125/500] time 0.882 (0.893) data 0.000 (0.005) loss 1.0293 (0.9982) acc 71.8750 (74.3750) lr 7.0224e-05 eta 0:35:20 +epoch [46/50] batch [130/500] time 0.872 (0.892) data 0.000 (0.005) loss 0.4941 (0.9993) acc 87.5000 (74.4712) lr 7.0224e-05 eta 0:35:14 +epoch [46/50] batch [135/500] time 0.865 (0.891) data 0.000 (0.005) loss 0.9966 (0.9990) acc 75.0000 (74.5139) lr 7.0224e-05 eta 0:35:08 +epoch [46/50] batch [140/500] time 0.921 (0.892) data 0.000 (0.005) loss 0.5894 (0.9982) acc 81.2500 (74.6205) lr 7.0224e-05 eta 0:35:04 +epoch [46/50] batch [145/500] time 0.901 (0.892) data 0.000 (0.005) loss 0.9146 (0.9951) acc 71.8750 (74.6983) lr 7.0224e-05 eta 0:35:01 +epoch [46/50] batch [150/500] time 0.894 (0.892) data 0.000 (0.005) loss 0.8267 (0.9951) acc 81.2500 (74.7083) lr 7.0224e-05 eta 0:34:56 +epoch [46/50] batch [155/500] time 0.857 (0.892) data 0.000 (0.004) loss 1.2744 (0.9946) acc 71.8750 (74.7984) lr 7.0224e-05 eta 0:34:50 +epoch [46/50] batch [160/500] time 0.880 (0.892) data 0.000 (0.004) loss 0.4629 (0.9932) acc 90.6250 (74.9219) lr 7.0224e-05 eta 0:34:46 +epoch [46/50] batch [165/500] time 0.862 (0.892) data 0.000 (0.004) loss 1.3271 (0.9912) acc 65.6250 (74.9432) lr 7.0224e-05 eta 0:34:41 +epoch [46/50] batch [170/500] time 0.872 (0.891) data 0.000 (0.004) loss 1.1260 (0.9864) acc 68.7500 (75.0184) lr 7.0224e-05 eta 0:34:36 +epoch [46/50] batch [175/500] time 0.881 (0.891) data 0.000 (0.004) loss 1.2451 (0.9911) acc 75.0000 (74.9643) lr 7.0224e-05 eta 0:34:30 +epoch [46/50] batch [180/500] time 0.873 (0.890) data 0.000 (0.004) loss 0.6621 (0.9906) acc 78.1250 (75.0347) lr 7.0224e-05 eta 0:34:25 +epoch [46/50] batch [185/500] time 0.887 (0.890) data 0.000 (0.004) loss 0.9448 (0.9911) acc 78.1250 (74.8480) lr 7.0224e-05 eta 0:34:19 +epoch [46/50] batch [190/500] time 0.858 (0.889) data 0.000 (0.004) loss 0.6270 (0.9920) acc 75.0000 (74.7697) lr 7.0224e-05 eta 0:34:13 +epoch [46/50] batch [195/500] time 0.868 (0.889) data 0.000 (0.004) loss 1.5303 (0.9980) acc 65.6250 (74.6154) lr 7.0224e-05 eta 0:34:09 +epoch [46/50] batch [200/500] time 0.884 (0.889) data 0.000 (0.003) loss 0.8188 (0.9938) acc 81.2500 (74.7656) lr 7.0224e-05 eta 0:34:04 +epoch [46/50] batch [205/500] time 0.872 (0.889) data 0.000 (0.003) loss 0.8872 (0.9957) acc 71.8750 (74.6189) lr 7.0224e-05 eta 0:34:01 +epoch [46/50] batch [210/500] time 0.878 (0.890) data 0.000 (0.003) loss 1.0000 (0.9938) acc 68.7500 (74.6577) lr 7.0224e-05 eta 0:33:57 +epoch [46/50] batch [215/500] time 0.892 (0.890) data 0.000 (0.003) loss 1.6699 (0.9979) acc 65.6250 (74.6076) lr 7.0224e-05 eta 0:33:53 +epoch [46/50] batch [220/500] time 0.909 (0.890) data 0.000 (0.003) loss 0.7817 (0.9978) acc 81.2500 (74.6023) lr 7.0224e-05 eta 0:33:49 +epoch [46/50] batch [225/500] time 0.890 (0.890) data 0.000 (0.003) loss 0.5635 (0.9969) acc 87.5000 (74.6389) lr 7.0224e-05 eta 0:33:43 +epoch [46/50] batch [230/500] time 0.902 (0.889) data 0.000 (0.003) loss 0.9106 (0.9961) acc 81.2500 (74.6739) lr 7.0224e-05 eta 0:33:38 +epoch [46/50] batch [235/500] time 0.889 (0.889) data 0.000 (0.003) loss 1.3057 (1.0023) acc 68.7500 (74.5479) lr 7.0224e-05 eta 0:33:34 +epoch [46/50] batch [240/500] time 0.880 (0.889) data 0.000 (0.003) loss 0.9253 (1.0019) acc 75.0000 (74.5833) lr 7.0224e-05 eta 0:33:29 +epoch [46/50] batch [245/500] time 1.002 (0.890) data 0.000 (0.003) loss 1.1211 (1.0032) acc 68.7500 (74.5918) lr 7.0224e-05 eta 0:33:27 +epoch [46/50] batch [250/500] time 0.900 (0.890) data 0.000 (0.003) loss 1.2578 (1.0066) acc 65.6250 (74.4375) lr 7.0224e-05 eta 0:33:22 +epoch [46/50] batch [255/500] time 0.891 (0.890) data 0.000 (0.003) loss 1.0332 (1.0099) acc 71.8750 (74.3627) lr 7.0224e-05 eta 0:33:17 +epoch [46/50] batch [260/500] time 0.884 (0.890) data 0.000 (0.003) loss 1.1211 (1.0130) acc 68.7500 (74.2909) lr 7.0224e-05 eta 0:33:12 +epoch [46/50] batch [265/500] time 0.879 (0.890) data 0.000 (0.003) loss 0.8237 (1.0102) acc 75.0000 (74.3042) lr 7.0224e-05 eta 0:33:08 +epoch [46/50] batch [270/500] time 0.851 (0.889) data 0.000 (0.003) loss 1.2314 (1.0114) acc 65.6250 (74.2940) lr 7.0224e-05 eta 0:33:03 +epoch [46/50] batch [275/500] time 0.924 (0.889) data 0.000 (0.003) loss 1.0674 (1.0107) acc 75.0000 (74.3182) lr 7.0224e-05 eta 0:32:58 +epoch [46/50] batch [280/500] time 0.903 (0.889) data 0.001 (0.003) loss 0.7837 (1.0093) acc 75.0000 (74.3638) lr 7.0224e-05 eta 0:32:54 +epoch [46/50] batch [285/500] time 0.883 (0.889) data 0.000 (0.003) loss 1.8174 (1.0107) acc 53.1250 (74.2654) lr 7.0224e-05 eta 0:32:49 +epoch [46/50] batch [290/500] time 0.865 (0.889) data 0.000 (0.002) loss 1.9463 (1.0139) acc 62.5000 (74.2241) lr 7.0224e-05 eta 0:32:45 +epoch [46/50] batch [295/500] time 0.890 (0.889) data 0.000 (0.002) loss 0.9448 (1.0107) acc 81.2500 (74.3008) lr 7.0224e-05 eta 0:32:40 +epoch [46/50] batch [300/500] time 0.883 (0.889) data 0.000 (0.002) loss 1.0195 (1.0084) acc 71.8750 (74.3750) lr 7.0224e-05 eta 0:32:36 +epoch [46/50] batch [305/500] time 0.888 (0.889) data 0.000 (0.002) loss 1.2012 (1.0120) acc 68.7500 (74.3033) lr 7.0224e-05 eta 0:32:32 +epoch [46/50] batch [310/500] time 0.860 (0.889) data 0.000 (0.002) loss 0.9507 (1.0120) acc 84.3750 (74.3548) lr 7.0224e-05 eta 0:32:27 +epoch [46/50] batch [315/500] time 0.849 (0.889) data 0.000 (0.002) loss 1.7930 (1.0194) acc 56.2500 (74.2063) lr 7.0224e-05 eta 0:32:22 +epoch [46/50] batch [320/500] time 0.881 (0.889) data 0.000 (0.002) loss 0.8916 (1.0170) acc 84.3750 (74.2773) lr 7.0224e-05 eta 0:32:17 +epoch [46/50] batch [325/500] time 0.862 (0.889) data 0.000 (0.002) loss 0.7310 (1.0183) acc 71.8750 (74.2115) lr 7.0224e-05 eta 0:32:12 +epoch [46/50] batch [330/500] time 0.885 (0.888) data 0.000 (0.002) loss 0.5293 (1.0165) acc 84.3750 (74.2898) lr 7.0224e-05 eta 0:32:08 +epoch [46/50] batch [335/500] time 0.905 (0.888) data 0.000 (0.002) loss 0.9868 (1.0166) acc 75.0000 (74.2537) lr 7.0224e-05 eta 0:32:03 +epoch [46/50] batch [340/500] time 0.897 (0.888) data 0.000 (0.002) loss 1.2998 (1.0195) acc 71.8750 (74.2555) lr 7.0224e-05 eta 0:31:59 +epoch [46/50] batch [345/500] time 0.883 (0.889) data 0.000 (0.002) loss 1.8779 (1.0198) acc 62.5000 (74.2935) lr 7.0224e-05 eta 0:31:55 +epoch [46/50] batch [350/500] time 0.873 (0.889) data 0.000 (0.002) loss 0.9316 (1.0197) acc 75.0000 (74.3393) lr 7.0224e-05 eta 0:31:50 +epoch [46/50] batch [355/500] time 0.881 (0.888) data 0.000 (0.002) loss 1.6035 (1.0176) acc 56.2500 (74.3310) lr 7.0224e-05 eta 0:31:45 +epoch [46/50] batch [360/500] time 0.886 (0.888) data 0.000 (0.002) loss 0.9673 (1.0157) acc 68.7500 (74.3316) lr 7.0224e-05 eta 0:31:40 +epoch [46/50] batch [365/500] time 0.882 (0.888) data 0.000 (0.002) loss 1.5225 (1.0185) acc 65.6250 (74.2637) lr 7.0224e-05 eta 0:31:36 +epoch [46/50] batch [370/500] time 0.892 (0.888) data 0.000 (0.002) loss 0.9316 (1.0180) acc 75.0000 (74.2230) lr 7.0224e-05 eta 0:31:31 +epoch [46/50] batch [375/500] time 0.893 (0.888) data 0.000 (0.002) loss 1.2510 (1.0205) acc 68.7500 (74.2000) lr 7.0224e-05 eta 0:31:27 +epoch [46/50] batch [380/500] time 0.868 (0.888) data 0.000 (0.002) loss 1.0215 (1.0214) acc 71.8750 (74.1776) lr 7.0224e-05 eta 0:31:23 +epoch [46/50] batch [385/500] time 0.866 (0.888) data 0.000 (0.002) loss 0.8770 (1.0204) acc 81.2500 (74.2208) lr 7.0224e-05 eta 0:31:18 +epoch [46/50] batch [390/500] time 0.860 (0.888) data 0.000 (0.002) loss 1.5488 (1.0239) acc 65.6250 (74.1426) lr 7.0224e-05 eta 0:31:14 +epoch [46/50] batch [395/500] time 0.885 (0.888) data 0.000 (0.002) loss 0.5869 (1.0216) acc 84.3750 (74.2009) lr 7.0224e-05 eta 0:31:09 +epoch [46/50] batch [400/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.7534 (1.0232) acc 81.2500 (74.1484) lr 7.0224e-05 eta 0:31:04 +epoch [46/50] batch [405/500] time 0.882 (0.888) data 0.000 (0.002) loss 0.9526 (1.0227) acc 78.1250 (74.1898) lr 7.0224e-05 eta 0:31:00 +epoch [46/50] batch [410/500] time 0.909 (0.888) data 0.000 (0.002) loss 0.8457 (1.0249) acc 81.2500 (74.1845) lr 7.0224e-05 eta 0:30:56 +epoch [46/50] batch [415/500] time 0.887 (0.888) data 0.000 (0.002) loss 0.8857 (1.0244) acc 75.0000 (74.2093) lr 7.0224e-05 eta 0:30:51 +epoch [46/50] batch [420/500] time 0.882 (0.888) data 0.000 (0.002) loss 1.0586 (1.0233) acc 71.8750 (74.2560) lr 7.0224e-05 eta 0:30:47 +epoch [46/50] batch [425/500] time 0.916 (0.888) data 0.000 (0.002) loss 1.2842 (1.0252) acc 71.8750 (74.2353) lr 7.0224e-05 eta 0:30:43 +epoch [46/50] batch [430/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.1123 (1.0256) acc 71.8750 (74.2078) lr 7.0224e-05 eta 0:30:38 +epoch [46/50] batch [435/500] time 0.868 (0.888) data 0.000 (0.002) loss 0.7090 (1.0246) acc 84.3750 (74.2098) lr 7.0224e-05 eta 0:30:34 +epoch [46/50] batch [440/500] time 0.902 (0.888) data 0.000 (0.002) loss 1.3496 (1.0272) acc 62.5000 (74.1619) lr 7.0224e-05 eta 0:30:29 +epoch [46/50] batch [445/500] time 0.868 (0.888) data 0.000 (0.002) loss 1.0742 (1.0265) acc 71.8750 (74.1362) lr 7.0224e-05 eta 0:30:25 +epoch [46/50] batch [450/500] time 0.864 (0.888) data 0.000 (0.002) loss 0.8960 (1.0254) acc 78.1250 (74.1597) lr 7.0224e-05 eta 0:30:20 +epoch [46/50] batch [455/500] time 0.907 (0.888) data 0.000 (0.002) loss 0.7549 (1.0253) acc 78.1250 (74.1896) lr 7.0224e-05 eta 0:30:16 +epoch [46/50] batch [460/500] time 0.860 (0.888) data 0.000 (0.002) loss 1.1777 (1.0259) acc 68.7500 (74.2052) lr 7.0224e-05 eta 0:30:11 +epoch [46/50] batch [465/500] time 0.855 (0.888) data 0.000 (0.002) loss 0.8315 (1.0271) acc 84.3750 (74.2137) lr 7.0224e-05 eta 0:30:06 +epoch [46/50] batch [470/500] time 0.887 (0.888) data 0.000 (0.002) loss 0.5273 (1.0253) acc 84.3750 (74.2886) lr 7.0224e-05 eta 0:30:01 +epoch [46/50] batch [475/500] time 0.865 (0.887) data 0.000 (0.002) loss 0.5708 (1.0237) acc 78.1250 (74.3421) lr 7.0224e-05 eta 0:29:56 +epoch [46/50] batch [480/500] time 0.877 (0.887) data 0.000 (0.002) loss 1.3682 (1.0257) acc 62.5000 (74.2839) lr 7.0224e-05 eta 0:29:52 +epoch [46/50] batch [485/500] time 0.895 (0.887) data 0.000 (0.002) loss 0.6846 (1.0260) acc 90.6250 (74.3299) lr 7.0224e-05 eta 0:29:47 +epoch [46/50] batch [490/500] time 0.889 (0.887) data 0.000 (0.002) loss 0.6685 (1.0251) acc 78.1250 (74.3240) lr 7.0224e-05 eta 0:29:43 +epoch [46/50] batch [495/500] time 0.881 (0.888) data 0.000 (0.002) loss 1.4658 (1.0272) acc 53.1250 (74.2298) lr 7.0224e-05 eta 0:29:39 +epoch [46/50] batch [500/500] time 0.885 (0.887) data 0.000 (0.002) loss 0.9336 (1.0260) acc 78.1250 (74.2500) lr 4.8943e-05 eta 0:29:34 +epoch [47/50] batch [5/500] time 0.880 (1.027) data 0.000 (0.131) loss 0.8149 (1.0373) acc 78.1250 (73.1250) lr 4.8943e-05 eta 0:34:08 +epoch [47/50] batch [10/500] time 0.845 (0.954) data 0.000 (0.065) loss 0.6040 (0.9730) acc 75.0000 (73.1250) lr 4.8943e-05 eta 0:31:38 +epoch [47/50] batch [15/500] time 0.878 (0.930) data 0.000 (0.044) loss 0.9736 (0.9600) acc 65.6250 (72.2917) lr 4.8943e-05 eta 0:30:45 +epoch [47/50] batch [20/500] time 0.868 (0.917) data 0.000 (0.033) loss 1.3242 (0.9142) acc 81.2500 (75.0000) lr 4.8943e-05 eta 0:30:14 +epoch [47/50] batch [25/500] time 0.874 (0.911) data 0.000 (0.026) loss 0.7778 (0.9658) acc 75.0000 (73.5000) lr 4.8943e-05 eta 0:29:59 +epoch [47/50] batch [30/500] time 0.864 (0.905) data 0.000 (0.022) loss 0.9971 (0.9558) acc 71.8750 (74.1667) lr 4.8943e-05 eta 0:29:43 +epoch [47/50] batch [35/500] time 0.909 (0.904) data 0.001 (0.019) loss 0.7021 (0.9512) acc 81.2500 (75.2679) lr 4.8943e-05 eta 0:29:35 +epoch [47/50] batch [40/500] time 0.885 (0.901) data 0.000 (0.017) loss 0.8599 (0.9148) acc 71.8750 (76.2500) lr 4.8943e-05 eta 0:29:25 +epoch [47/50] batch [45/500] time 0.884 (0.904) data 0.000 (0.015) loss 0.8843 (0.9048) acc 65.6250 (76.5278) lr 4.8943e-05 eta 0:29:26 +epoch [47/50] batch [50/500] time 0.893 (0.904) data 0.000 (0.013) loss 0.8208 (0.9454) acc 81.2500 (75.4375) lr 4.8943e-05 eta 0:29:22 +epoch [47/50] batch [55/500] time 0.881 (0.902) data 0.000 (0.012) loss 1.1963 (0.9602) acc 65.6250 (75.1705) lr 4.8943e-05 eta 0:29:14 +epoch [47/50] batch [60/500] time 0.898 (0.901) data 0.000 (0.011) loss 1.2246 (0.9562) acc 71.8750 (75.4688) lr 4.8943e-05 eta 0:29:08 +epoch [47/50] batch [65/500] time 0.895 (0.900) data 0.000 (0.010) loss 0.7661 (0.9638) acc 81.2500 (75.4327) lr 4.8943e-05 eta 0:29:01 +epoch [47/50] batch [70/500] time 0.896 (0.900) data 0.000 (0.010) loss 0.8564 (0.9523) acc 75.0000 (75.7143) lr 4.8943e-05 eta 0:28:56 +epoch [47/50] batch [75/500] time 0.842 (0.899) data 0.000 (0.009) loss 0.7104 (0.9386) acc 71.8750 (75.9583) lr 4.8943e-05 eta 0:28:50 +epoch [47/50] batch [80/500] time 0.884 (0.898) data 0.000 (0.008) loss 1.2207 (0.9416) acc 75.0000 (75.8594) lr 4.8943e-05 eta 0:28:43 +epoch [47/50] batch [85/500] time 0.961 (0.898) data 0.000 (0.008) loss 0.7207 (0.9440) acc 81.2500 (75.9559) lr 4.8943e-05 eta 0:28:40 +epoch [47/50] batch [90/500] time 0.869 (0.897) data 0.000 (0.007) loss 0.9790 (0.9461) acc 71.8750 (75.8333) lr 4.8943e-05 eta 0:28:34 +epoch [47/50] batch [95/500] time 0.910 (0.897) data 0.000 (0.007) loss 2.0801 (0.9545) acc 59.3750 (75.6579) lr 4.8943e-05 eta 0:28:28 +epoch [47/50] batch [100/500] time 0.882 (0.896) data 0.000 (0.007) loss 0.6621 (0.9597) acc 71.8750 (75.4688) lr 4.8943e-05 eta 0:28:23 +epoch [47/50] batch [105/500] time 0.905 (0.896) data 0.000 (0.006) loss 0.6455 (0.9557) acc 87.5000 (75.7143) lr 4.8943e-05 eta 0:28:17 +epoch [47/50] batch [110/500] time 0.897 (0.896) data 0.000 (0.006) loss 1.2373 (0.9518) acc 71.8750 (75.8239) lr 4.8943e-05 eta 0:28:12 +epoch [47/50] batch [115/500] time 0.889 (0.895) data 0.000 (0.006) loss 0.6157 (0.9619) acc 75.0000 (75.7880) lr 4.8943e-05 eta 0:28:06 +epoch [47/50] batch [120/500] time 0.870 (0.894) data 0.000 (0.006) loss 1.2461 (0.9601) acc 65.6250 (75.9635) lr 4.8943e-05 eta 0:28:00 +epoch [47/50] batch [125/500] time 0.860 (0.893) data 0.000 (0.005) loss 1.0098 (0.9596) acc 65.6250 (75.8000) lr 4.8943e-05 eta 0:27:55 +epoch [47/50] batch [130/500] time 0.877 (0.893) data 0.000 (0.005) loss 0.7725 (0.9573) acc 62.5000 (75.6250) lr 4.8943e-05 eta 0:27:49 +epoch [47/50] batch [135/500] time 0.877 (0.892) data 0.000 (0.005) loss 0.8325 (0.9592) acc 78.1250 (75.4630) lr 4.8943e-05 eta 0:27:44 +epoch [47/50] batch [140/500] time 0.924 (0.892) data 0.000 (0.005) loss 1.1221 (0.9603) acc 68.7500 (75.3125) lr 4.8943e-05 eta 0:27:39 +epoch [47/50] batch [145/500] time 0.898 (0.892) data 0.000 (0.005) loss 1.5752 (0.9686) acc 59.3750 (75.2586) lr 4.8943e-05 eta 0:27:34 +epoch [47/50] batch [150/500] time 0.857 (0.892) data 0.000 (0.005) loss 0.6792 (0.9624) acc 84.3750 (75.5000) lr 4.8943e-05 eta 0:27:30 +epoch [47/50] batch [155/500] time 0.883 (0.892) data 0.000 (0.004) loss 0.7271 (0.9600) acc 78.1250 (75.5645) lr 4.8943e-05 eta 0:27:25 +epoch [47/50] batch [160/500] time 0.910 (0.892) data 0.000 (0.004) loss 0.7412 (0.9600) acc 81.2500 (75.6055) lr 4.8943e-05 eta 0:27:20 +epoch [47/50] batch [165/500] time 0.881 (0.891) data 0.000 (0.004) loss 1.0938 (0.9646) acc 75.0000 (75.5682) lr 4.8943e-05 eta 0:27:15 +epoch [47/50] batch [170/500] time 0.906 (0.891) data 0.000 (0.004) loss 0.8828 (0.9620) acc 81.2500 (75.6985) lr 4.8943e-05 eta 0:27:10 +epoch [47/50] batch [175/500] time 0.876 (0.891) data 0.001 (0.004) loss 0.9639 (0.9623) acc 71.8750 (75.7500) lr 4.8943e-05 eta 0:27:05 +epoch [47/50] batch [180/500] time 0.857 (0.891) data 0.000 (0.004) loss 1.4775 (0.9669) acc 71.8750 (75.7465) lr 4.8943e-05 eta 0:27:01 +epoch [47/50] batch [185/500] time 0.873 (0.891) data 0.000 (0.004) loss 1.7881 (0.9738) acc 53.1250 (75.4899) lr 4.8943e-05 eta 0:26:57 +epoch [47/50] batch [190/500] time 0.882 (0.891) data 0.000 (0.004) loss 1.1113 (0.9748) acc 75.0000 (75.4441) lr 4.8943e-05 eta 0:26:52 +epoch [47/50] batch [195/500] time 0.875 (0.890) data 0.000 (0.004) loss 1.5615 (0.9768) acc 68.7500 (75.4487) lr 4.8943e-05 eta 0:26:47 +epoch [47/50] batch [200/500] time 0.859 (0.890) data 0.000 (0.003) loss 1.4316 (0.9768) acc 81.2500 (75.4531) lr 4.8943e-05 eta 0:26:42 +epoch [47/50] batch [205/500] time 0.860 (0.890) data 0.000 (0.003) loss 1.4590 (0.9850) acc 68.7500 (75.2896) lr 4.8943e-05 eta 0:26:37 +epoch [47/50] batch [210/500] time 0.886 (0.890) data 0.000 (0.003) loss 1.2480 (0.9865) acc 75.0000 (75.3274) lr 4.8943e-05 eta 0:26:32 +epoch [47/50] batch [215/500] time 0.895 (0.890) data 0.000 (0.003) loss 0.9341 (0.9873) acc 78.1250 (75.3924) lr 4.8943e-05 eta 0:26:28 +epoch [47/50] batch [220/500] time 0.887 (0.890) data 0.000 (0.003) loss 1.1855 (0.9879) acc 71.8750 (75.3835) lr 4.8943e-05 eta 0:26:23 +epoch [47/50] batch [225/500] time 0.893 (0.889) data 0.000 (0.003) loss 1.1152 (0.9920) acc 78.1250 (75.2917) lr 4.8943e-05 eta 0:26:18 +epoch [47/50] batch [230/500] time 0.868 (0.889) data 0.000 (0.003) loss 0.6147 (0.9936) acc 84.3750 (75.2853) lr 4.8943e-05 eta 0:26:14 +epoch [47/50] batch [235/500] time 0.899 (0.889) data 0.000 (0.003) loss 1.9160 (0.9952) acc 62.5000 (75.2793) lr 4.8943e-05 eta 0:26:09 +epoch [47/50] batch [240/500] time 0.898 (0.889) data 0.000 (0.003) loss 0.8218 (1.0004) acc 65.6250 (75.0781) lr 4.8943e-05 eta 0:26:05 +epoch [47/50] batch [245/500] time 0.867 (0.889) data 0.000 (0.003) loss 1.0039 (0.9981) acc 65.6250 (75.1020) lr 4.8943e-05 eta 0:26:00 +epoch [47/50] batch [250/500] time 0.889 (0.889) data 0.000 (0.003) loss 0.8647 (0.9981) acc 78.1250 (75.0125) lr 4.8943e-05 eta 0:25:55 +epoch [47/50] batch [255/500] time 0.869 (0.889) data 0.000 (0.003) loss 1.0811 (0.9968) acc 71.8750 (74.9755) lr 4.8943e-05 eta 0:25:51 +epoch [47/50] batch [260/500] time 0.913 (0.889) data 0.000 (0.003) loss 0.9663 (0.9966) acc 71.8750 (74.9159) lr 4.8943e-05 eta 0:25:46 +epoch [47/50] batch [265/500] time 0.889 (0.889) data 0.000 (0.003) loss 1.0518 (1.0006) acc 78.1250 (74.8585) lr 4.8943e-05 eta 0:25:42 +epoch [47/50] batch [270/500] time 0.862 (0.889) data 0.001 (0.003) loss 0.8511 (0.9998) acc 81.2500 (74.9074) lr 4.8943e-05 eta 0:25:37 +epoch [47/50] batch [275/500] time 0.892 (0.889) data 0.001 (0.003) loss 1.3838 (1.0053) acc 62.5000 (74.8182) lr 4.8943e-05 eta 0:25:33 +epoch [47/50] batch [280/500] time 0.882 (0.889) data 0.000 (0.003) loss 0.9619 (1.0068) acc 81.2500 (74.8326) lr 4.8943e-05 eta 0:25:29 +epoch [47/50] batch [285/500] time 0.900 (0.889) data 0.000 (0.003) loss 0.9336 (1.0069) acc 81.2500 (74.8684) lr 4.8943e-05 eta 0:25:24 +epoch [47/50] batch [290/500] time 0.867 (0.889) data 0.000 (0.002) loss 1.3096 (1.0072) acc 53.1250 (74.8276) lr 4.8943e-05 eta 0:25:20 +epoch [47/50] batch [295/500] time 0.886 (0.889) data 0.000 (0.002) loss 0.8252 (1.0062) acc 81.2500 (74.9047) lr 4.8943e-05 eta 0:25:15 +epoch [47/50] batch [300/500] time 0.874 (0.889) data 0.000 (0.002) loss 1.0928 (1.0101) acc 68.7500 (74.8646) lr 4.8943e-05 eta 0:25:10 +epoch [47/50] batch [305/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.3457 (1.0113) acc 65.6250 (74.8156) lr 4.8943e-05 eta 0:25:05 +epoch [47/50] batch [310/500] time 0.888 (0.888) data 0.000 (0.002) loss 0.6602 (1.0063) acc 81.2500 (74.8992) lr 4.8943e-05 eta 0:25:00 +epoch [47/50] batch [315/500] time 0.854 (0.888) data 0.000 (0.002) loss 1.3213 (1.0104) acc 78.1250 (74.8512) lr 4.8943e-05 eta 0:24:56 +epoch [47/50] batch [320/500] time 0.857 (0.888) data 0.000 (0.002) loss 1.0059 (1.0104) acc 59.3750 (74.8535) lr 4.8943e-05 eta 0:24:51 +epoch [47/50] batch [325/500] time 0.996 (0.888) data 0.000 (0.002) loss 0.8198 (1.0099) acc 81.2500 (74.8654) lr 4.8943e-05 eta 0:24:46 +epoch [47/50] batch [330/500] time 0.862 (0.888) data 0.000 (0.002) loss 1.3789 (1.0121) acc 65.6250 (74.8295) lr 4.8943e-05 eta 0:24:42 +epoch [47/50] batch [335/500] time 0.878 (0.887) data 0.000 (0.002) loss 0.7881 (1.0130) acc 75.0000 (74.7948) lr 4.8943e-05 eta 0:24:37 +epoch [47/50] batch [340/500] time 0.911 (0.887) data 0.000 (0.002) loss 1.1494 (1.0128) acc 75.0000 (74.7426) lr 4.8943e-05 eta 0:24:32 +epoch [47/50] batch [345/500] time 0.894 (0.887) data 0.000 (0.002) loss 0.9653 (1.0119) acc 75.0000 (74.7554) lr 4.8943e-05 eta 0:24:28 +epoch [47/50] batch [350/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.4229 (1.0113) acc 78.1250 (74.8214) lr 4.8943e-05 eta 0:24:23 +epoch [47/50] batch [355/500] time 0.850 (0.887) data 0.000 (0.002) loss 1.2285 (1.0139) acc 68.7500 (74.7183) lr 4.8943e-05 eta 0:24:19 +epoch [47/50] batch [360/500] time 0.882 (0.887) data 0.000 (0.002) loss 1.0469 (1.0163) acc 78.1250 (74.6701) lr 4.8943e-05 eta 0:24:15 +epoch [47/50] batch [365/500] time 0.858 (0.887) data 0.000 (0.002) loss 0.7822 (1.0170) acc 81.2500 (74.6575) lr 4.8943e-05 eta 0:24:10 +epoch [47/50] batch [370/500] time 0.881 (0.887) data 0.000 (0.002) loss 0.8022 (1.0158) acc 78.1250 (74.6791) lr 4.8943e-05 eta 0:24:06 +epoch [47/50] batch [375/500] time 0.881 (0.887) data 0.000 (0.002) loss 1.2842 (1.0163) acc 71.8750 (74.6583) lr 4.8943e-05 eta 0:24:01 +epoch [47/50] batch [380/500] time 0.865 (0.887) data 0.000 (0.002) loss 0.6631 (1.0127) acc 84.3750 (74.7122) lr 4.8943e-05 eta 0:23:57 +epoch [47/50] batch [385/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.0146 (1.0141) acc 75.0000 (74.6997) lr 4.8943e-05 eta 0:23:52 +epoch [47/50] batch [390/500] time 0.902 (0.887) data 0.000 (0.002) loss 1.4004 (1.0147) acc 75.0000 (74.6955) lr 4.8943e-05 eta 0:23:48 +epoch [47/50] batch [395/500] time 0.877 (0.887) data 0.000 (0.002) loss 0.5630 (1.0145) acc 84.3750 (74.6440) lr 4.8943e-05 eta 0:23:43 +epoch [47/50] batch [400/500] time 0.908 (0.887) data 0.000 (0.002) loss 1.0527 (1.0133) acc 78.1250 (74.6484) lr 4.8943e-05 eta 0:23:39 +epoch [47/50] batch [405/500] time 0.881 (0.887) data 0.000 (0.002) loss 1.0879 (1.0121) acc 71.8750 (74.6605) lr 4.8943e-05 eta 0:23:35 +epoch [47/50] batch [410/500] time 0.917 (0.887) data 0.000 (0.002) loss 0.8540 (1.0109) acc 81.2500 (74.6799) lr 4.8943e-05 eta 0:23:31 +epoch [47/50] batch [415/500] time 0.918 (0.887) data 0.000 (0.002) loss 0.7280 (1.0085) acc 71.8750 (74.7139) lr 4.8943e-05 eta 0:23:26 +epoch [47/50] batch [420/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.5889 (1.0107) acc 65.6250 (74.6503) lr 4.8943e-05 eta 0:23:22 +epoch [47/50] batch [425/500] time 0.872 (0.887) data 0.000 (0.002) loss 0.8770 (1.0118) acc 75.0000 (74.6029) lr 4.8943e-05 eta 0:23:17 +epoch [47/50] batch [430/500] time 0.861 (0.887) data 0.000 (0.002) loss 0.7192 (1.0090) acc 75.0000 (74.6657) lr 4.8943e-05 eta 0:23:12 +epoch [47/50] batch [435/500] time 0.863 (0.887) data 0.000 (0.002) loss 1.4482 (1.0084) acc 62.5000 (74.6408) lr 4.8943e-05 eta 0:23:08 +epoch [47/50] batch [440/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.2529 (1.0078) acc 71.8750 (74.6662) lr 4.8943e-05 eta 0:23:03 +epoch [47/50] batch [445/500] time 0.904 (0.887) data 0.000 (0.002) loss 1.4912 (1.0128) acc 65.6250 (74.5506) lr 4.8943e-05 eta 0:22:59 +epoch [47/50] batch [450/500] time 0.866 (0.887) data 0.000 (0.002) loss 1.1934 (1.0145) acc 84.3750 (74.5486) lr 4.8943e-05 eta 0:22:54 +epoch [47/50] batch [455/500] time 0.876 (0.887) data 0.000 (0.002) loss 1.1436 (1.0169) acc 75.0000 (74.5192) lr 4.8943e-05 eta 0:22:50 +epoch [47/50] batch [460/500] time 0.869 (0.887) data 0.000 (0.002) loss 0.6479 (1.0148) acc 87.5000 (74.5448) lr 4.8943e-05 eta 0:22:45 +epoch [47/50] batch [465/500] time 0.882 (0.887) data 0.000 (0.002) loss 0.9033 (1.0128) acc 71.8750 (74.5565) lr 4.8943e-05 eta 0:22:41 +epoch [47/50] batch [470/500] time 0.894 (0.887) data 0.000 (0.002) loss 0.8916 (1.0107) acc 78.1250 (74.6277) lr 4.8943e-05 eta 0:22:37 +epoch [47/50] batch [475/500] time 0.904 (0.887) data 0.000 (0.002) loss 1.0566 (1.0112) acc 71.8750 (74.5987) lr 4.8943e-05 eta 0:22:32 +epoch [47/50] batch [480/500] time 0.905 (0.887) data 0.000 (0.002) loss 1.5029 (1.0102) acc 62.5000 (74.6484) lr 4.8943e-05 eta 0:22:28 +epoch [47/50] batch [485/500] time 0.868 (0.887) data 0.000 (0.002) loss 1.0898 (1.0093) acc 75.0000 (74.6778) lr 4.8943e-05 eta 0:22:24 +epoch [47/50] batch [490/500] time 0.897 (0.887) data 0.000 (0.002) loss 1.0723 (1.0119) acc 68.7500 (74.6110) lr 4.8943e-05 eta 0:22:19 +epoch [47/50] batch [495/500] time 0.868 (0.887) data 0.000 (0.002) loss 1.3125 (1.0103) acc 75.0000 (74.6149) lr 4.8943e-05 eta 0:22:14 +epoch [47/50] batch [500/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.2480 (1.0111) acc 75.0000 (74.6437) lr 3.1417e-05 eta 0:22:10 +epoch [48/50] batch [5/500] time 0.872 (1.004) data 0.000 (0.124) loss 1.2266 (0.9394) acc 62.5000 (75.0000) lr 3.1417e-05 eta 0:25:01 +epoch [48/50] batch [10/500] time 0.861 (0.938) data 0.000 (0.062) loss 1.1953 (1.0327) acc 71.8750 (72.1875) lr 3.1417e-05 eta 0:23:17 +epoch [48/50] batch [15/500] time 0.864 (0.918) data 0.000 (0.041) loss 0.8130 (1.0877) acc 78.1250 (72.2917) lr 3.1417e-05 eta 0:22:43 +epoch [48/50] batch [20/500] time 0.884 (0.914) data 0.000 (0.031) loss 1.1963 (1.1310) acc 71.8750 (71.2500) lr 3.1417e-05 eta 0:22:32 +epoch [48/50] batch [25/500] time 0.895 (0.907) data 0.000 (0.025) loss 0.7676 (1.1214) acc 78.1250 (71.6250) lr 3.1417e-05 eta 0:22:18 +epoch [48/50] batch [30/500] time 0.893 (0.902) data 0.000 (0.021) loss 0.9692 (1.1025) acc 65.6250 (71.2500) lr 3.1417e-05 eta 0:22:05 +epoch [48/50] batch [35/500] time 0.879 (0.899) data 0.000 (0.018) loss 0.8394 (1.0763) acc 75.0000 (71.7857) lr 3.1417e-05 eta 0:21:57 +epoch [48/50] batch [40/500] time 0.866 (0.896) data 0.000 (0.016) loss 1.0195 (1.0418) acc 68.7500 (72.8125) lr 3.1417e-05 eta 0:21:48 +epoch [48/50] batch [45/500] time 0.874 (0.894) data 0.000 (0.014) loss 1.2812 (1.0252) acc 68.7500 (73.0556) lr 3.1417e-05 eta 0:21:41 +epoch [48/50] batch [50/500] time 0.881 (0.893) data 0.000 (0.013) loss 1.4033 (1.0346) acc 65.6250 (72.9375) lr 3.1417e-05 eta 0:21:35 +epoch [48/50] batch [55/500] time 0.907 (0.894) data 0.000 (0.011) loss 1.2490 (1.0288) acc 62.5000 (73.2386) lr 3.1417e-05 eta 0:21:32 +epoch [48/50] batch [60/500] time 0.992 (0.895) data 0.000 (0.011) loss 0.8813 (1.0117) acc 78.1250 (73.5938) lr 3.1417e-05 eta 0:21:28 +epoch [48/50] batch [65/500] time 0.888 (0.894) data 0.000 (0.010) loss 0.7129 (1.0053) acc 84.3750 (73.8462) lr 3.1417e-05 eta 0:21:22 +epoch [48/50] batch [70/500] time 0.907 (0.895) data 0.000 (0.009) loss 1.0771 (1.0057) acc 71.8750 (73.8393) lr 3.1417e-05 eta 0:21:19 +epoch [48/50] batch [75/500] time 0.892 (0.894) data 0.000 (0.008) loss 0.5415 (1.0025) acc 84.3750 (73.6667) lr 3.1417e-05 eta 0:21:13 +epoch [48/50] batch [80/500] time 0.911 (0.893) data 0.000 (0.008) loss 0.9146 (1.0112) acc 90.6250 (73.8281) lr 3.1417e-05 eta 0:21:07 +epoch [48/50] batch [85/500] time 0.908 (0.893) data 0.000 (0.008) loss 0.5171 (0.9979) acc 81.2500 (73.9338) lr 3.1417e-05 eta 0:21:03 +epoch [48/50] batch [90/500] time 0.880 (0.892) data 0.000 (0.007) loss 0.9912 (0.9995) acc 78.1250 (73.9931) lr 3.1417e-05 eta 0:20:57 +epoch [48/50] batch [95/500] time 0.873 (0.891) data 0.001 (0.007) loss 1.0498 (0.9918) acc 62.5000 (73.9474) lr 3.1417e-05 eta 0:20:52 +epoch [48/50] batch [100/500] time 0.911 (0.891) data 0.000 (0.006) loss 0.8110 (0.9957) acc 75.0000 (73.8125) lr 3.1417e-05 eta 0:20:47 +epoch [48/50] batch [105/500] time 0.886 (0.891) data 0.000 (0.006) loss 1.2256 (1.0019) acc 65.6250 (73.6310) lr 3.1417e-05 eta 0:20:43 +epoch [48/50] batch [110/500] time 0.871 (0.891) data 0.000 (0.006) loss 1.0137 (1.0051) acc 68.7500 (73.6080) lr 3.1417e-05 eta 0:20:37 +epoch [48/50] batch [115/500] time 0.893 (0.890) data 0.000 (0.006) loss 1.0029 (1.0092) acc 78.1250 (73.6685) lr 3.1417e-05 eta 0:20:32 +epoch [48/50] batch [120/500] time 0.863 (0.890) data 0.000 (0.005) loss 1.0654 (1.0059) acc 68.7500 (73.6979) lr 3.1417e-05 eta 0:20:27 +epoch [48/50] batch [125/500] time 0.880 (0.889) data 0.000 (0.005) loss 1.6143 (1.0101) acc 65.6250 (73.7000) lr 3.1417e-05 eta 0:20:23 +epoch [48/50] batch [130/500] time 0.902 (0.889) data 0.000 (0.005) loss 1.0752 (1.0132) acc 68.7500 (73.7019) lr 3.1417e-05 eta 0:20:17 +epoch [48/50] batch [135/500] time 0.860 (0.889) data 0.000 (0.005) loss 0.8936 (1.0082) acc 81.2500 (73.8657) lr 3.1417e-05 eta 0:20:13 +epoch [48/50] batch [140/500] time 0.867 (0.889) data 0.000 (0.005) loss 0.6738 (1.0003) acc 78.1250 (74.1071) lr 3.1417e-05 eta 0:20:08 +epoch [48/50] batch [145/500] time 0.845 (0.889) data 0.000 (0.005) loss 1.3174 (1.0027) acc 65.6250 (74.0517) lr 3.1417e-05 eta 0:20:04 +epoch [48/50] batch [150/500] time 0.900 (0.889) data 0.000 (0.004) loss 1.5527 (1.0051) acc 68.7500 (73.8542) lr 3.1417e-05 eta 0:20:00 +epoch [48/50] batch [155/500] time 0.861 (0.888) data 0.000 (0.004) loss 0.6499 (1.0012) acc 81.2500 (74.0927) lr 3.1417e-05 eta 0:19:54 +epoch [48/50] batch [160/500] time 0.890 (0.889) data 0.000 (0.004) loss 0.5894 (0.9944) acc 81.2500 (74.3164) lr 3.1417e-05 eta 0:19:51 +epoch [48/50] batch [165/500] time 0.882 (0.889) data 0.000 (0.004) loss 0.8774 (0.9927) acc 71.8750 (74.2424) lr 3.1417e-05 eta 0:19:46 +epoch [48/50] batch [170/500] time 0.898 (0.889) data 0.000 (0.004) loss 0.9956 (0.9936) acc 65.6250 (74.2279) lr 3.1417e-05 eta 0:19:41 +epoch [48/50] batch [175/500] time 0.868 (0.888) data 0.000 (0.004) loss 0.7212 (0.9912) acc 84.3750 (74.3214) lr 3.1417e-05 eta 0:19:37 +epoch [48/50] batch [180/500] time 0.893 (0.889) data 0.000 (0.004) loss 0.9375 (0.9897) acc 75.0000 (74.2882) lr 3.1417e-05 eta 0:19:33 +epoch [48/50] batch [185/500] time 0.887 (0.889) data 0.000 (0.004) loss 1.7480 (0.9985) acc 59.3750 (74.1554) lr 3.1417e-05 eta 0:19:29 +epoch [48/50] batch [190/500] time 0.905 (0.889) data 0.000 (0.003) loss 1.7480 (1.0024) acc 62.5000 (73.9967) lr 3.1417e-05 eta 0:19:24 +epoch [48/50] batch [195/500] time 0.883 (0.889) data 0.000 (0.003) loss 1.0498 (0.9992) acc 75.0000 (74.1026) lr 3.1417e-05 eta 0:19:20 +epoch [48/50] batch [200/500] time 0.871 (0.889) data 0.000 (0.003) loss 0.8315 (1.0008) acc 87.5000 (74.1094) lr 3.1417e-05 eta 0:19:16 +epoch [48/50] batch [205/500] time 0.857 (0.890) data 0.000 (0.003) loss 0.9038 (1.0002) acc 84.3750 (74.0854) lr 3.1417e-05 eta 0:19:12 +epoch [48/50] batch [210/500] time 0.933 (0.890) data 0.000 (0.003) loss 0.9941 (1.0056) acc 75.0000 (74.0030) lr 3.1417e-05 eta 0:19:08 +epoch [48/50] batch [215/500] time 0.873 (0.889) data 0.000 (0.003) loss 0.8545 (1.0034) acc 84.3750 (74.0988) lr 3.1417e-05 eta 0:19:02 +epoch [48/50] batch [220/500] time 0.849 (0.889) data 0.000 (0.003) loss 1.0986 (1.0038) acc 75.0000 (74.1335) lr 3.1417e-05 eta 0:18:58 +epoch [48/50] batch [225/500] time 0.882 (0.889) data 0.000 (0.003) loss 0.9673 (1.0023) acc 78.1250 (74.2639) lr 3.1417e-05 eta 0:18:53 +epoch [48/50] batch [230/500] time 0.878 (0.889) data 0.000 (0.003) loss 0.6016 (1.0020) acc 87.5000 (74.2663) lr 3.1417e-05 eta 0:18:48 +epoch [48/50] batch [235/500] time 0.879 (0.889) data 0.000 (0.003) loss 0.8955 (1.0008) acc 78.1250 (74.2819) lr 3.1417e-05 eta 0:18:44 +epoch [48/50] batch [240/500] time 0.883 (0.888) data 0.000 (0.003) loss 0.8501 (1.0030) acc 78.1250 (74.2578) lr 3.1417e-05 eta 0:18:39 +epoch [48/50] batch [245/500] time 0.882 (0.888) data 0.000 (0.003) loss 1.2939 (1.0062) acc 65.6250 (74.1964) lr 3.1417e-05 eta 0:18:34 +epoch [48/50] batch [250/500] time 0.854 (0.888) data 0.000 (0.003) loss 0.7266 (1.0084) acc 81.2500 (74.1125) lr 3.1417e-05 eta 0:18:29 +epoch [48/50] batch [255/500] time 0.905 (0.888) data 0.000 (0.003) loss 1.1533 (1.0099) acc 68.7500 (74.0196) lr 3.1417e-05 eta 0:18:25 +epoch [48/50] batch [260/500] time 0.881 (0.888) data 0.000 (0.003) loss 0.6772 (1.0101) acc 84.3750 (74.0625) lr 3.1417e-05 eta 0:18:21 +epoch [48/50] batch [265/500] time 0.877 (0.888) data 0.000 (0.003) loss 1.3760 (1.0111) acc 56.2500 (73.9741) lr 3.1417e-05 eta 0:18:16 +epoch [48/50] batch [270/500] time 0.897 (0.888) data 0.000 (0.003) loss 0.6479 (1.0055) acc 84.3750 (74.1204) lr 3.1417e-05 eta 0:18:11 +epoch [48/50] batch [275/500] time 0.911 (0.888) data 0.000 (0.002) loss 0.9780 (1.0019) acc 75.0000 (74.2159) lr 3.1417e-05 eta 0:18:07 +epoch [48/50] batch [280/500] time 0.870 (0.888) data 0.000 (0.002) loss 1.0645 (1.0002) acc 65.6250 (74.2411) lr 3.1417e-05 eta 0:18:02 +epoch [48/50] batch [285/500] time 0.885 (0.888) data 0.000 (0.002) loss 0.5493 (0.9991) acc 78.1250 (74.2544) lr 3.1417e-05 eta 0:17:58 +epoch [48/50] batch [290/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.7446 (0.9967) acc 75.0000 (74.3319) lr 3.1417e-05 eta 0:17:53 +epoch [48/50] batch [295/500] time 0.905 (0.887) data 0.000 (0.002) loss 0.9419 (0.9943) acc 65.6250 (74.4174) lr 3.1417e-05 eta 0:17:49 +epoch [48/50] batch [300/500] time 0.869 (0.887) data 0.000 (0.002) loss 0.8877 (0.9911) acc 75.0000 (74.4479) lr 3.1417e-05 eta 0:17:44 +epoch [48/50] batch [305/500] time 0.911 (0.887) data 0.000 (0.002) loss 0.7993 (0.9894) acc 81.2500 (74.4877) lr 3.1417e-05 eta 0:17:40 +epoch [48/50] batch [310/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.2842 (0.9946) acc 68.7500 (74.3649) lr 3.1417e-05 eta 0:17:36 +epoch [48/50] batch [315/500] time 0.893 (0.887) data 0.000 (0.002) loss 0.8423 (0.9903) acc 87.5000 (74.4940) lr 3.1417e-05 eta 0:17:31 +epoch [48/50] batch [320/500] time 0.869 (0.887) data 0.000 (0.002) loss 0.9336 (0.9897) acc 78.1250 (74.5312) lr 3.1417e-05 eta 0:17:26 +epoch [48/50] batch [325/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.0312 (0.9910) acc 71.8750 (74.5000) lr 3.1417e-05 eta 0:17:22 +epoch [48/50] batch [330/500] time 0.930 (0.887) data 0.000 (0.002) loss 1.0312 (0.9917) acc 81.2500 (74.5076) lr 3.1417e-05 eta 0:17:18 +epoch [48/50] batch [335/500] time 0.908 (0.888) data 0.000 (0.002) loss 1.2529 (0.9989) acc 71.8750 (74.3284) lr 3.1417e-05 eta 0:17:14 +epoch [48/50] batch [340/500] time 0.920 (0.888) data 0.000 (0.002) loss 0.6562 (0.9961) acc 75.0000 (74.3107) lr 3.1417e-05 eta 0:17:09 +epoch [48/50] batch [345/500] time 0.879 (0.888) data 0.001 (0.002) loss 0.9785 (0.9927) acc 71.8750 (74.4203) lr 3.1417e-05 eta 0:17:05 +epoch [48/50] batch [350/500] time 0.867 (0.888) data 0.000 (0.002) loss 1.4648 (0.9935) acc 71.8750 (74.4464) lr 3.1417e-05 eta 0:17:01 +epoch [48/50] batch [355/500] time 0.884 (0.888) data 0.000 (0.002) loss 0.9780 (0.9925) acc 71.8750 (74.4630) lr 3.1417e-05 eta 0:16:56 +epoch [48/50] batch [360/500] time 0.924 (0.888) data 0.000 (0.002) loss 1.2207 (0.9909) acc 71.8750 (74.4705) lr 3.1417e-05 eta 0:16:52 +epoch [48/50] batch [365/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.1143 (0.9902) acc 75.0000 (74.5205) lr 3.1417e-05 eta 0:16:48 +epoch [48/50] batch [370/500] time 0.893 (0.888) data 0.000 (0.002) loss 0.8608 (0.9889) acc 84.3750 (74.6284) lr 3.1417e-05 eta 0:16:43 +epoch [48/50] batch [375/500] time 0.877 (0.888) data 0.000 (0.002) loss 1.1377 (0.9916) acc 78.1250 (74.5917) lr 3.1417e-05 eta 0:16:39 +epoch [48/50] batch [380/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.8569 (0.9922) acc 75.0000 (74.5888) lr 3.1417e-05 eta 0:16:34 +epoch [48/50] batch [385/500] time 0.889 (0.888) data 0.000 (0.002) loss 1.1133 (0.9915) acc 65.6250 (74.5779) lr 3.1417e-05 eta 0:16:30 +epoch [48/50] batch [390/500] time 0.879 (0.888) data 0.000 (0.002) loss 1.4033 (0.9926) acc 65.6250 (74.5433) lr 3.1417e-05 eta 0:16:25 +epoch [48/50] batch [395/500] time 0.874 (0.887) data 0.000 (0.002) loss 1.2549 (0.9924) acc 65.6250 (74.5253) lr 3.1417e-05 eta 0:16:20 +epoch [48/50] batch [400/500] time 0.885 (0.887) data 0.000 (0.002) loss 0.4482 (0.9947) acc 84.3750 (74.5000) lr 3.1417e-05 eta 0:16:16 +epoch [48/50] batch [405/500] time 0.877 (0.887) data 0.000 (0.002) loss 1.1846 (0.9964) acc 71.8750 (74.4522) lr 3.1417e-05 eta 0:16:11 +epoch [48/50] batch [410/500] time 0.876 (0.887) data 0.000 (0.002) loss 1.0283 (0.9940) acc 75.0000 (74.5579) lr 3.1417e-05 eta 0:16:07 +epoch [48/50] batch [415/500] time 0.887 (0.887) data 0.000 (0.002) loss 0.7656 (0.9924) acc 81.2500 (74.5934) lr 3.1417e-05 eta 0:16:02 +epoch [48/50] batch [420/500] time 0.910 (0.887) data 0.000 (0.002) loss 0.8384 (0.9918) acc 71.8750 (74.5685) lr 3.1417e-05 eta 0:15:58 +epoch [48/50] batch [425/500] time 0.877 (0.887) data 0.000 (0.002) loss 0.5244 (0.9912) acc 87.5000 (74.5735) lr 3.1417e-05 eta 0:15:53 +epoch [48/50] batch [430/500] time 0.863 (0.887) data 0.000 (0.002) loss 0.9658 (0.9895) acc 78.1250 (74.6366) lr 3.1417e-05 eta 0:15:49 +epoch [48/50] batch [435/500] time 0.876 (0.887) data 0.000 (0.002) loss 1.0820 (0.9907) acc 84.3750 (74.6193) lr 3.1417e-05 eta 0:15:44 +epoch [48/50] batch [440/500] time 0.900 (0.887) data 0.000 (0.002) loss 0.3418 (0.9911) acc 93.7500 (74.6307) lr 3.1417e-05 eta 0:15:40 +epoch [48/50] batch [445/500] time 0.965 (0.887) data 0.000 (0.002) loss 0.6265 (0.9892) acc 84.3750 (74.6699) lr 3.1417e-05 eta 0:15:36 +epoch [48/50] batch [450/500] time 0.875 (0.887) data 0.000 (0.002) loss 0.8760 (0.9871) acc 71.8750 (74.7153) lr 3.1417e-05 eta 0:15:31 +epoch [48/50] batch [455/500] time 0.853 (0.887) data 0.000 (0.002) loss 1.4209 (0.9907) acc 59.3750 (74.6223) lr 3.1417e-05 eta 0:15:26 +epoch [48/50] batch [460/500] time 0.873 (0.887) data 0.000 (0.002) loss 1.3584 (0.9906) acc 68.7500 (74.6535) lr 3.1417e-05 eta 0:15:22 +epoch [48/50] batch [465/500] time 0.863 (0.887) data 0.000 (0.002) loss 0.9502 (0.9900) acc 81.2500 (74.6707) lr 3.1417e-05 eta 0:15:17 +epoch [48/50] batch [470/500] time 0.886 (0.887) data 0.000 (0.002) loss 0.6958 (0.9895) acc 78.1250 (74.7274) lr 3.1417e-05 eta 0:15:13 +epoch [48/50] batch [475/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.1982 (0.9896) acc 71.8750 (74.7434) lr 3.1417e-05 eta 0:15:09 +epoch [48/50] batch [480/500] time 0.861 (0.887) data 0.000 (0.002) loss 0.7993 (0.9873) acc 78.1250 (74.8177) lr 3.1417e-05 eta 0:15:04 +epoch [48/50] batch [485/500] time 0.920 (0.887) data 0.000 (0.002) loss 0.8818 (0.9864) acc 78.1250 (74.8454) lr 3.1417e-05 eta 0:15:00 +epoch [48/50] batch [490/500] time 0.889 (0.887) data 0.000 (0.001) loss 0.8516 (0.9892) acc 81.2500 (74.7832) lr 3.1417e-05 eta 0:14:56 +epoch [48/50] batch [495/500] time 0.888 (0.887) data 0.000 (0.001) loss 1.3262 (0.9911) acc 62.5000 (74.7601) lr 3.1417e-05 eta 0:14:51 +epoch [48/50] batch [500/500] time 0.897 (0.887) data 0.000 (0.001) loss 1.3564 (0.9922) acc 65.6250 (74.7438) lr 1.7713e-05 eta 0:14:47 +epoch [49/50] batch [5/500] time 0.909 (1.019) data 0.000 (0.129) loss 0.8320 (1.0609) acc 78.1250 (70.0000) lr 1.7713e-05 eta 0:16:54 +epoch [49/50] batch [10/500] time 0.859 (0.954) data 0.000 (0.064) loss 0.8110 (1.0527) acc 81.2500 (72.5000) lr 1.7713e-05 eta 0:15:44 +epoch [49/50] batch [15/500] time 0.859 (0.936) data 0.000 (0.043) loss 0.9893 (1.0368) acc 78.1250 (74.1667) lr 1.7713e-05 eta 0:15:22 +epoch [49/50] batch [20/500] time 0.893 (0.925) data 0.000 (0.032) loss 1.3252 (1.0703) acc 53.1250 (72.1875) lr 1.7713e-05 eta 0:15:06 +epoch [49/50] batch [25/500] time 0.858 (0.916) data 0.000 (0.026) loss 0.5054 (0.9836) acc 81.2500 (74.2500) lr 1.7713e-05 eta 0:14:53 +epoch [49/50] batch [30/500] time 0.861 (0.909) data 0.000 (0.022) loss 1.0918 (1.0168) acc 75.0000 (74.3750) lr 1.7713e-05 eta 0:14:41 +epoch [49/50] batch [35/500] time 0.884 (0.909) data 0.000 (0.019) loss 1.3076 (1.0389) acc 62.5000 (74.1964) lr 1.7713e-05 eta 0:14:37 +epoch [49/50] batch [40/500] time 0.880 (0.906) data 0.000 (0.016) loss 0.8589 (1.0004) acc 78.1250 (74.8438) lr 1.7713e-05 eta 0:14:29 +epoch [49/50] batch [45/500] time 0.880 (0.903) data 0.000 (0.015) loss 0.5645 (0.9866) acc 87.5000 (75.0694) lr 1.7713e-05 eta 0:14:22 +epoch [49/50] batch [50/500] time 0.881 (0.903) data 0.000 (0.013) loss 0.8735 (0.9794) acc 71.8750 (75.1875) lr 1.7713e-05 eta 0:14:17 +epoch [49/50] batch [55/500] time 0.880 (0.901) data 0.000 (0.012) loss 1.1064 (0.9865) acc 68.7500 (75.0568) lr 1.7713e-05 eta 0:14:11 +epoch [49/50] batch [60/500] time 0.917 (0.900) data 0.000 (0.011) loss 0.8843 (0.9833) acc 78.1250 (74.9479) lr 1.7713e-05 eta 0:14:05 +epoch [49/50] batch [65/500] time 0.889 (0.899) data 0.000 (0.010) loss 0.8506 (0.9906) acc 81.2500 (74.5673) lr 1.7713e-05 eta 0:14:01 +epoch [49/50] batch [70/500] time 0.863 (0.898) data 0.000 (0.009) loss 1.1504 (0.9941) acc 68.7500 (74.3750) lr 1.7713e-05 eta 0:13:54 +epoch [49/50] batch [75/500] time 0.872 (0.896) data 0.000 (0.009) loss 0.7402 (0.9782) acc 84.3750 (74.7917) lr 1.7713e-05 eta 0:13:48 +epoch [49/50] batch [80/500] time 0.882 (0.895) data 0.000 (0.008) loss 1.3672 (0.9710) acc 78.1250 (75.0391) lr 1.7713e-05 eta 0:13:43 +epoch [49/50] batch [85/500] time 0.879 (0.895) data 0.000 (0.008) loss 0.7959 (0.9730) acc 81.2500 (75.0368) lr 1.7713e-05 eta 0:13:38 +epoch [49/50] batch [90/500] time 0.849 (0.893) data 0.000 (0.007) loss 1.2090 (0.9704) acc 59.3750 (75.0694) lr 1.7713e-05 eta 0:13:32 +epoch [49/50] batch [95/500] time 0.870 (0.892) data 0.000 (0.007) loss 1.3994 (0.9751) acc 71.8750 (74.9342) lr 1.7713e-05 eta 0:13:27 +epoch [49/50] batch [100/500] time 0.884 (0.891) data 0.000 (0.007) loss 0.7417 (0.9758) acc 78.1250 (74.9375) lr 1.7713e-05 eta 0:13:21 +epoch [49/50] batch [105/500] time 0.879 (0.890) data 0.000 (0.006) loss 1.6396 (0.9753) acc 56.2500 (74.9405) lr 1.7713e-05 eta 0:13:16 +epoch [49/50] batch [110/500] time 0.878 (0.890) data 0.000 (0.006) loss 1.1025 (0.9800) acc 68.7500 (74.9432) lr 1.7713e-05 eta 0:13:11 +epoch [49/50] batch [115/500] time 0.873 (0.889) data 0.000 (0.006) loss 0.8232 (0.9831) acc 78.1250 (74.8913) lr 1.7713e-05 eta 0:13:06 +epoch [49/50] batch [120/500] time 0.867 (0.888) data 0.000 (0.006) loss 0.8950 (0.9825) acc 71.8750 (74.8698) lr 1.7713e-05 eta 0:13:01 +epoch [49/50] batch [125/500] time 0.896 (0.888) data 0.000 (0.005) loss 1.4336 (0.9817) acc 71.8750 (74.8500) lr 1.7713e-05 eta 0:12:57 +epoch [49/50] batch [130/500] time 0.883 (0.888) data 0.000 (0.005) loss 1.1064 (0.9870) acc 81.2500 (74.8558) lr 1.7713e-05 eta 0:12:52 +epoch [49/50] batch [135/500] time 0.889 (0.889) data 0.000 (0.005) loss 0.5962 (0.9747) acc 81.2500 (75.0694) lr 1.7713e-05 eta 0:12:49 +epoch [49/50] batch [140/500] time 0.861 (0.889) data 0.000 (0.005) loss 0.9478 (0.9693) acc 78.1250 (75.2009) lr 1.7713e-05 eta 0:12:44 +epoch [49/50] batch [145/500] time 0.875 (0.889) data 0.000 (0.005) loss 0.7964 (0.9664) acc 87.5000 (75.4095) lr 1.7713e-05 eta 0:12:40 +epoch [49/50] batch [150/500] time 0.919 (0.889) data 0.000 (0.005) loss 0.3247 (0.9639) acc 90.6250 (75.5000) lr 1.7713e-05 eta 0:12:35 +epoch [49/50] batch [155/500] time 0.927 (0.890) data 0.000 (0.004) loss 1.0479 (0.9596) acc 75.0000 (75.7661) lr 1.7713e-05 eta 0:12:31 +epoch [49/50] batch [160/500] time 0.909 (0.890) data 0.000 (0.004) loss 1.1895 (0.9582) acc 71.8750 (75.8789) lr 1.7713e-05 eta 0:12:27 +epoch [49/50] batch [165/500] time 0.859 (0.889) data 0.000 (0.004) loss 0.9355 (0.9608) acc 68.7500 (75.7765) lr 1.7713e-05 eta 0:12:22 +epoch [49/50] batch [170/500] time 0.904 (0.889) data 0.000 (0.004) loss 1.3203 (0.9640) acc 59.3750 (75.6434) lr 1.7713e-05 eta 0:12:17 +epoch [49/50] batch [175/500] time 0.903 (0.889) data 0.000 (0.004) loss 1.0049 (0.9676) acc 78.1250 (75.5536) lr 1.7713e-05 eta 0:12:13 +epoch [49/50] batch [180/500] time 0.901 (0.889) data 0.000 (0.004) loss 1.2715 (0.9721) acc 68.7500 (75.2951) lr 1.7713e-05 eta 0:12:09 +epoch [49/50] batch [185/500] time 0.875 (0.889) data 0.000 (0.004) loss 0.9995 (0.9726) acc 78.1250 (75.3885) lr 1.7713e-05 eta 0:12:04 +epoch [49/50] batch [190/500] time 0.885 (0.889) data 0.000 (0.004) loss 2.1484 (0.9753) acc 46.8750 (75.2632) lr 1.7713e-05 eta 0:12:00 +epoch [49/50] batch [195/500] time 0.861 (0.889) data 0.000 (0.004) loss 0.9585 (0.9783) acc 68.7500 (75.1282) lr 1.7713e-05 eta 0:11:55 +epoch [49/50] batch [200/500] time 0.895 (0.888) data 0.001 (0.003) loss 0.9004 (0.9765) acc 78.1250 (75.1562) lr 1.7713e-05 eta 0:11:50 +epoch [49/50] batch [205/500] time 0.892 (0.888) data 0.000 (0.003) loss 0.7544 (0.9763) acc 75.0000 (75.0762) lr 1.7713e-05 eta 0:11:46 +epoch [49/50] batch [210/500] time 0.896 (0.888) data 0.000 (0.003) loss 0.9097 (0.9784) acc 78.1250 (75.0149) lr 1.7713e-05 eta 0:11:41 +epoch [49/50] batch [215/500] time 0.901 (0.888) data 0.000 (0.003) loss 1.1748 (0.9843) acc 68.7500 (74.9273) lr 1.7713e-05 eta 0:11:37 +epoch [49/50] batch [220/500] time 0.889 (0.888) data 0.000 (0.003) loss 0.9253 (0.9846) acc 78.1250 (74.9858) lr 1.7713e-05 eta 0:11:32 +epoch [49/50] batch [225/500] time 0.884 (0.888) data 0.000 (0.003) loss 0.9854 (0.9835) acc 71.8750 (75.0000) lr 1.7713e-05 eta 0:11:28 +epoch [49/50] batch [230/500] time 0.866 (0.888) data 0.000 (0.003) loss 1.2588 (0.9815) acc 65.6250 (75.0136) lr 1.7713e-05 eta 0:11:23 +epoch [49/50] batch [235/500] time 0.876 (0.888) data 0.000 (0.003) loss 1.0244 (0.9842) acc 81.2500 (74.9601) lr 1.7713e-05 eta 0:11:19 +epoch [49/50] batch [240/500] time 0.866 (0.888) data 0.000 (0.003) loss 0.8643 (0.9811) acc 81.2500 (74.9609) lr 1.7713e-05 eta 0:11:14 +epoch [49/50] batch [245/500] time 0.873 (0.888) data 0.000 (0.003) loss 1.0684 (0.9813) acc 62.5000 (74.9362) lr 1.7713e-05 eta 0:11:10 +epoch [49/50] batch [250/500] time 0.887 (0.888) data 0.000 (0.003) loss 0.6982 (0.9830) acc 75.0000 (74.9750) lr 1.7713e-05 eta 0:11:05 +epoch [49/50] batch [255/500] time 0.887 (0.887) data 0.000 (0.003) loss 0.9351 (0.9822) acc 71.8750 (74.9755) lr 1.7713e-05 eta 0:11:01 +epoch [49/50] batch [260/500] time 0.880 (0.887) data 0.000 (0.003) loss 1.1387 (0.9833) acc 71.8750 (74.9880) lr 1.7713e-05 eta 0:10:56 +epoch [49/50] batch [265/500] time 0.902 (0.888) data 0.000 (0.003) loss 1.2480 (0.9809) acc 62.5000 (75.0000) lr 1.7713e-05 eta 0:10:52 +epoch [49/50] batch [270/500] time 0.907 (0.888) data 0.000 (0.003) loss 1.0908 (0.9834) acc 84.3750 (74.9769) lr 1.7713e-05 eta 0:10:47 +epoch [49/50] batch [275/500] time 0.973 (0.888) data 0.000 (0.003) loss 1.2891 (0.9853) acc 75.0000 (74.9659) lr 1.7713e-05 eta 0:10:43 +epoch [49/50] batch [280/500] time 0.866 (0.888) data 0.000 (0.003) loss 0.6846 (0.9867) acc 90.6250 (74.9219) lr 1.7713e-05 eta 0:10:39 +epoch [49/50] batch [285/500] time 0.904 (0.888) data 0.000 (0.003) loss 1.2256 (0.9899) acc 75.0000 (74.8794) lr 1.7713e-05 eta 0:10:34 +epoch [49/50] batch [290/500] time 0.899 (0.888) data 0.000 (0.002) loss 0.8740 (0.9885) acc 75.0000 (74.9784) lr 1.7713e-05 eta 0:10:30 +epoch [49/50] batch [295/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.6982 (0.9866) acc 84.3750 (75.0530) lr 1.7713e-05 eta 0:10:25 +epoch [49/50] batch [300/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.4570 (0.9869) acc 68.7500 (75.0938) lr 1.7713e-05 eta 0:10:21 +epoch [49/50] batch [305/500] time 0.899 (0.888) data 0.000 (0.002) loss 1.3262 (0.9888) acc 68.7500 (75.0000) lr 1.7713e-05 eta 0:10:17 +epoch [49/50] batch [310/500] time 0.866 (0.888) data 0.000 (0.002) loss 0.8066 (0.9874) acc 62.5000 (74.9093) lr 1.7713e-05 eta 0:10:12 +epoch [49/50] batch [315/500] time 0.910 (0.888) data 0.000 (0.002) loss 0.8286 (0.9856) acc 87.5000 (74.9702) lr 1.7713e-05 eta 0:10:08 +epoch [49/50] batch [320/500] time 0.922 (0.889) data 0.000 (0.002) loss 1.2227 (0.9833) acc 75.0000 (75.0586) lr 1.7713e-05 eta 0:10:04 +epoch [49/50] batch [325/500] time 0.904 (0.889) data 0.000 (0.002) loss 0.3086 (0.9824) acc 96.8750 (75.0769) lr 1.7713e-05 eta 0:09:59 +epoch [49/50] batch [330/500] time 0.856 (0.889) data 0.000 (0.002) loss 0.7603 (0.9825) acc 75.0000 (75.0852) lr 1.7713e-05 eta 0:09:55 +epoch [49/50] batch [335/500] time 0.878 (0.889) data 0.000 (0.002) loss 0.7964 (0.9811) acc 78.1250 (75.1213) lr 1.7713e-05 eta 0:09:50 +epoch [49/50] batch [340/500] time 0.886 (0.889) data 0.000 (0.002) loss 0.5356 (0.9784) acc 78.1250 (75.1287) lr 1.7713e-05 eta 0:09:46 +epoch [49/50] batch [345/500] time 0.890 (0.888) data 0.000 (0.002) loss 0.9336 (0.9829) acc 62.5000 (74.9728) lr 1.7713e-05 eta 0:09:41 +epoch [49/50] batch [350/500] time 0.885 (0.888) data 0.000 (0.002) loss 0.5815 (0.9825) acc 78.1250 (75.0000) lr 1.7713e-05 eta 0:09:37 +epoch [49/50] batch [355/500] time 0.902 (0.888) data 0.000 (0.002) loss 0.8735 (0.9851) acc 78.1250 (74.9384) lr 1.7713e-05 eta 0:09:32 +epoch [49/50] batch [360/500] time 0.887 (0.888) data 0.000 (0.002) loss 0.9766 (0.9867) acc 71.8750 (74.8524) lr 1.7713e-05 eta 0:09:28 +epoch [49/50] batch [365/500] time 0.878 (0.888) data 0.000 (0.002) loss 0.9019 (0.9831) acc 65.6250 (74.8887) lr 1.7713e-05 eta 0:09:23 +epoch [49/50] batch [370/500] time 0.896 (0.888) data 0.000 (0.002) loss 1.2334 (0.9855) acc 68.7500 (74.8564) lr 1.7713e-05 eta 0:09:19 +epoch [49/50] batch [375/500] time 0.905 (0.888) data 0.000 (0.002) loss 0.3887 (0.9853) acc 90.6250 (74.8500) lr 1.7713e-05 eta 0:09:15 +epoch [49/50] batch [380/500] time 0.887 (0.889) data 0.000 (0.002) loss 0.9468 (0.9833) acc 75.0000 (74.9178) lr 1.7713e-05 eta 0:09:10 +epoch [49/50] batch [385/500] time 0.914 (0.889) data 0.000 (0.002) loss 0.8066 (0.9800) acc 84.3750 (74.9838) lr 1.7713e-05 eta 0:09:06 +epoch [49/50] batch [390/500] time 0.919 (0.889) data 0.000 (0.002) loss 1.0576 (0.9780) acc 78.1250 (75.0240) lr 1.7713e-05 eta 0:09:02 +epoch [49/50] batch [395/500] time 0.858 (0.889) data 0.000 (0.002) loss 0.7134 (0.9773) acc 84.3750 (75.0633) lr 1.7713e-05 eta 0:08:57 +epoch [49/50] batch [400/500] time 0.911 (0.889) data 0.000 (0.002) loss 0.8950 (0.9796) acc 75.0000 (75.0078) lr 1.7713e-05 eta 0:08:53 +epoch [49/50] batch [405/500] time 0.897 (0.889) data 0.000 (0.002) loss 1.2705 (0.9825) acc 71.8750 (74.9306) lr 1.7713e-05 eta 0:08:48 +epoch [49/50] batch [410/500] time 0.863 (0.889) data 0.000 (0.002) loss 0.8081 (0.9825) acc 81.2500 (74.9695) lr 1.7713e-05 eta 0:08:44 +epoch [49/50] batch [415/500] time 0.891 (0.888) data 0.000 (0.002) loss 1.1211 (0.9832) acc 71.8750 (74.9021) lr 1.7713e-05 eta 0:08:39 +epoch [49/50] batch [420/500] time 0.878 (0.889) data 0.000 (0.002) loss 0.7969 (0.9837) acc 81.2500 (74.8958) lr 1.7713e-05 eta 0:08:35 +epoch [49/50] batch [425/500] time 0.883 (0.889) data 0.000 (0.002) loss 0.7061 (0.9822) acc 81.2500 (74.9265) lr 1.7713e-05 eta 0:08:31 +epoch [49/50] batch [430/500] time 0.886 (0.889) data 0.000 (0.002) loss 0.9688 (0.9799) acc 78.1250 (75.0509) lr 1.7713e-05 eta 0:08:26 +epoch [49/50] batch [435/500] time 0.874 (0.889) data 0.000 (0.002) loss 1.1143 (0.9798) acc 71.8750 (75.0431) lr 1.7713e-05 eta 0:08:22 +epoch [49/50] batch [440/500] time 0.873 (0.889) data 0.000 (0.002) loss 0.9204 (0.9815) acc 78.1250 (75.0426) lr 1.7713e-05 eta 0:08:17 +epoch [49/50] batch [445/500] time 0.857 (0.889) data 0.000 (0.002) loss 1.7148 (0.9833) acc 62.5000 (75.0070) lr 1.7713e-05 eta 0:08:13 +epoch [49/50] batch [450/500] time 0.904 (0.889) data 0.000 (0.002) loss 1.0635 (0.9831) acc 78.1250 (75.0139) lr 1.7713e-05 eta 0:08:08 +epoch [49/50] batch [455/500] time 0.863 (0.889) data 0.000 (0.002) loss 0.7847 (0.9844) acc 81.2500 (74.9863) lr 1.7713e-05 eta 0:08:04 +epoch [49/50] batch [460/500] time 0.896 (0.889) data 0.000 (0.002) loss 1.3652 (0.9850) acc 75.0000 (74.9660) lr 1.7713e-05 eta 0:07:59 +epoch [49/50] batch [465/500] time 0.868 (0.889) data 0.000 (0.002) loss 1.1289 (0.9866) acc 78.1250 (74.9597) lr 1.7713e-05 eta 0:07:55 +epoch [49/50] batch [470/500] time 0.904 (0.889) data 0.000 (0.002) loss 0.9976 (0.9861) acc 75.0000 (74.9601) lr 1.7713e-05 eta 0:07:50 +epoch [49/50] batch [475/500] time 0.915 (0.889) data 0.000 (0.002) loss 1.2070 (0.9849) acc 65.6250 (74.9803) lr 1.7713e-05 eta 0:07:46 +epoch [49/50] batch [480/500] time 0.908 (0.889) data 0.000 (0.002) loss 0.6929 (0.9829) acc 78.1250 (75.0456) lr 1.7713e-05 eta 0:07:42 +epoch [49/50] batch [485/500] time 0.895 (0.889) data 0.000 (0.002) loss 0.8452 (0.9833) acc 75.0000 (75.0129) lr 1.7713e-05 eta 0:07:37 +epoch [49/50] batch [490/500] time 0.895 (0.889) data 0.000 (0.002) loss 1.1621 (0.9849) acc 81.2500 (74.9745) lr 1.7713e-05 eta 0:07:33 +epoch [49/50] batch [495/500] time 0.862 (0.888) data 0.000 (0.002) loss 1.2715 (0.9841) acc 65.6250 (74.9811) lr 1.7713e-05 eta 0:07:28 +epoch [49/50] batch [500/500] time 0.891 (0.888) data 0.000 (0.002) loss 1.1982 (0.9852) acc 68.7500 (74.9688) lr 7.8853e-06 eta 0:07:24 +epoch [50/50] batch [5/500] time 0.894 (1.054) data 0.000 (0.135) loss 0.5654 (0.8583) acc 78.1250 (75.6250) lr 7.8853e-06 eta 0:08:41 +epoch [50/50] batch [10/500] time 0.887 (0.971) data 0.000 (0.067) loss 1.0693 (0.9325) acc 71.8750 (75.0000) lr 7.8853e-06 eta 0:07:55 +epoch [50/50] batch [15/500] time 0.871 (0.940) data 0.000 (0.045) loss 0.9917 (0.9297) acc 78.1250 (73.7500) lr 7.8853e-06 eta 0:07:36 +epoch [50/50] batch [20/500] time 0.867 (0.928) data 0.000 (0.034) loss 1.7197 (0.9906) acc 68.7500 (73.9062) lr 7.8853e-06 eta 0:07:25 +epoch [50/50] batch [25/500] time 0.876 (0.916) data 0.000 (0.027) loss 0.9053 (0.9874) acc 71.8750 (74.5000) lr 7.8853e-06 eta 0:07:14 +epoch [50/50] batch [30/500] time 0.899 (0.911) data 0.000 (0.023) loss 1.6289 (1.0049) acc 62.5000 (74.1667) lr 7.8853e-06 eta 0:07:08 +epoch [50/50] batch [35/500] time 0.897 (0.909) data 0.000 (0.019) loss 0.9829 (0.9972) acc 71.8750 (74.3750) lr 7.8853e-06 eta 0:07:02 +epoch [50/50] batch [40/500] time 0.882 (0.908) data 0.000 (0.017) loss 0.6396 (0.9820) acc 81.2500 (74.4531) lr 7.8853e-06 eta 0:06:57 +epoch [50/50] batch [45/500] time 0.883 (0.904) data 0.000 (0.015) loss 0.6328 (0.9923) acc 84.3750 (74.5833) lr 7.8853e-06 eta 0:06:51 +epoch [50/50] batch [50/500] time 0.885 (0.903) data 0.000 (0.014) loss 0.8911 (0.9981) acc 68.7500 (74.4375) lr 7.8853e-06 eta 0:06:46 +epoch [50/50] batch [55/500] time 0.875 (0.901) data 0.000 (0.012) loss 1.3350 (1.0012) acc 62.5000 (74.5455) lr 7.8853e-06 eta 0:06:40 +epoch [50/50] batch [60/500] time 0.900 (0.900) data 0.000 (0.011) loss 1.4766 (0.9975) acc 71.8750 (74.6875) lr 7.8853e-06 eta 0:06:35 +epoch [50/50] batch [65/500] time 0.870 (0.899) data 0.000 (0.011) loss 0.7612 (0.9927) acc 81.2500 (74.6154) lr 7.8853e-06 eta 0:06:30 +epoch [50/50] batch [70/500] time 0.854 (0.897) data 0.000 (0.010) loss 0.8184 (0.9929) acc 65.6250 (74.5089) lr 7.8853e-06 eta 0:06:25 +epoch [50/50] batch [75/500] time 0.903 (0.896) data 0.000 (0.009) loss 1.5127 (1.0183) acc 65.6250 (74.2500) lr 7.8853e-06 eta 0:06:20 +epoch [50/50] batch [80/500] time 0.892 (0.897) data 0.000 (0.009) loss 1.4502 (1.0454) acc 62.5000 (73.6328) lr 7.8853e-06 eta 0:06:16 +epoch [50/50] batch [85/500] time 0.861 (0.896) data 0.000 (0.008) loss 1.3350 (1.0501) acc 62.5000 (73.6029) lr 7.8853e-06 eta 0:06:11 +epoch [50/50] batch [90/500] time 0.872 (0.894) data 0.000 (0.008) loss 1.1748 (1.0431) acc 65.6250 (73.5764) lr 7.8853e-06 eta 0:06:06 +epoch [50/50] batch [95/500] time 0.886 (0.894) data 0.000 (0.007) loss 0.9097 (1.0388) acc 81.2500 (73.8158) lr 7.8853e-06 eta 0:06:02 +epoch [50/50] batch [100/500] time 0.873 (0.893) data 0.000 (0.007) loss 1.3545 (1.0357) acc 75.0000 (74.0000) lr 7.8853e-06 eta 0:05:57 +epoch [50/50] batch [105/500] time 0.872 (0.893) data 0.000 (0.007) loss 0.7686 (1.0253) acc 78.1250 (74.2262) lr 7.8853e-06 eta 0:05:52 +epoch [50/50] batch [110/500] time 0.910 (0.892) data 0.000 (0.006) loss 1.2480 (1.0244) acc 59.3750 (73.9489) lr 7.8853e-06 eta 0:05:48 +epoch [50/50] batch [115/500] time 0.859 (0.892) data 0.000 (0.006) loss 1.0820 (1.0261) acc 71.8750 (73.8315) lr 7.8853e-06 eta 0:05:43 +epoch [50/50] batch [120/500] time 0.890 (0.891) data 0.000 (0.006) loss 0.8896 (1.0276) acc 75.0000 (73.7240) lr 7.8853e-06 eta 0:05:38 +epoch [50/50] batch [125/500] time 0.908 (0.891) data 0.000 (0.006) loss 1.7373 (1.0227) acc 62.5000 (73.8000) lr 7.8853e-06 eta 0:05:34 +epoch [50/50] batch [130/500] time 0.913 (0.891) data 0.000 (0.005) loss 1.0898 (1.0181) acc 62.5000 (73.7981) lr 7.8853e-06 eta 0:05:29 +epoch [50/50] batch [135/500] time 0.884 (0.892) data 0.000 (0.005) loss 1.3730 (1.0206) acc 71.8750 (73.8194) lr 7.8853e-06 eta 0:05:25 +epoch [50/50] batch [140/500] time 0.920 (0.892) data 0.000 (0.005) loss 1.4043 (1.0192) acc 56.2500 (73.7723) lr 7.8853e-06 eta 0:05:21 +epoch [50/50] batch [145/500] time 0.971 (0.892) data 0.000 (0.005) loss 1.1533 (1.0169) acc 71.8750 (73.8362) lr 7.8853e-06 eta 0:05:16 +epoch [50/50] batch [150/500] time 0.871 (0.892) data 0.000 (0.005) loss 1.4072 (1.0204) acc 68.7500 (73.7917) lr 7.8853e-06 eta 0:05:12 +epoch [50/50] batch [155/500] time 0.881 (0.892) data 0.000 (0.005) loss 1.2080 (1.0189) acc 71.8750 (73.8710) lr 7.8853e-06 eta 0:05:07 +epoch [50/50] batch [160/500] time 0.890 (0.891) data 0.000 (0.004) loss 0.4243 (1.0184) acc 87.5000 (73.8281) lr 7.8853e-06 eta 0:05:03 +epoch [50/50] batch [165/500] time 0.903 (0.891) data 0.000 (0.004) loss 1.1455 (1.0191) acc 75.0000 (73.9394) lr 7.8853e-06 eta 0:04:58 +epoch [50/50] batch [170/500] time 0.886 (0.891) data 0.000 (0.004) loss 1.5498 (1.0167) acc 68.7500 (74.0625) lr 7.8853e-06 eta 0:04:54 +epoch [50/50] batch [175/500] time 0.853 (0.891) data 0.000 (0.004) loss 0.7773 (1.0140) acc 81.2500 (74.1250) lr 7.8853e-06 eta 0:04:49 +epoch [50/50] batch [180/500] time 0.903 (0.891) data 0.000 (0.004) loss 0.6875 (1.0130) acc 81.2500 (74.1493) lr 7.8853e-06 eta 0:04:45 +epoch [50/50] batch [185/500] time 0.914 (0.892) data 0.000 (0.004) loss 1.0234 (1.0139) acc 78.1250 (74.1216) lr 7.8853e-06 eta 0:04:40 +epoch [50/50] batch [190/500] time 0.877 (0.891) data 0.000 (0.004) loss 1.1875 (1.0177) acc 75.0000 (74.0461) lr 7.8853e-06 eta 0:04:36 +epoch [50/50] batch [195/500] time 0.877 (0.891) data 0.000 (0.004) loss 1.1182 (1.0159) acc 78.1250 (74.1506) lr 7.8853e-06 eta 0:04:31 +epoch [50/50] batch [200/500] time 0.877 (0.891) data 0.000 (0.004) loss 1.3516 (1.0211) acc 59.3750 (73.9219) lr 7.8853e-06 eta 0:04:27 +epoch [50/50] batch [205/500] time 0.870 (0.890) data 0.000 (0.003) loss 0.7129 (1.0149) acc 78.1250 (74.0396) lr 7.8853e-06 eta 0:04:22 +epoch [50/50] batch [210/500] time 0.851 (0.890) data 0.001 (0.003) loss 0.9893 (1.0168) acc 75.0000 (74.0327) lr 7.8853e-06 eta 0:04:18 +epoch [50/50] batch [215/500] time 0.889 (0.890) data 0.000 (0.003) loss 0.6431 (1.0106) acc 84.3750 (74.1424) lr 7.8853e-06 eta 0:04:13 +epoch [50/50] batch [220/500] time 0.863 (0.889) data 0.000 (0.003) loss 1.0957 (1.0065) acc 65.6250 (74.2045) lr 7.8853e-06 eta 0:04:09 +epoch [50/50] batch [225/500] time 0.899 (0.889) data 0.000 (0.003) loss 0.9448 (1.0081) acc 75.0000 (74.1389) lr 7.8853e-06 eta 0:04:04 +epoch [50/50] batch [230/500] time 0.846 (0.889) data 0.000 (0.003) loss 0.8906 (1.0052) acc 71.8750 (74.1712) lr 7.8853e-06 eta 0:04:00 +epoch [50/50] batch [235/500] time 0.892 (0.889) data 0.000 (0.003) loss 1.6758 (1.0093) acc 53.1250 (74.1356) lr 7.8853e-06 eta 0:03:55 +epoch [50/50] batch [240/500] time 0.893 (0.889) data 0.000 (0.003) loss 1.2080 (1.0150) acc 71.8750 (74.0625) lr 7.8853e-06 eta 0:03:51 +epoch [50/50] batch [245/500] time 0.888 (0.890) data 0.000 (0.003) loss 0.9756 (1.0163) acc 81.2500 (74.0561) lr 7.8853e-06 eta 0:03:46 +epoch [50/50] batch [250/500] time 0.912 (0.890) data 0.000 (0.003) loss 1.0273 (1.0184) acc 68.7500 (74.0250) lr 7.8853e-06 eta 0:03:42 +epoch [50/50] batch [255/500] time 0.857 (0.889) data 0.000 (0.003) loss 1.0234 (1.0222) acc 75.0000 (73.9951) lr 7.8853e-06 eta 0:03:37 +epoch [50/50] batch [260/500] time 0.892 (0.889) data 0.000 (0.003) loss 0.4268 (1.0221) acc 90.6250 (73.9663) lr 7.8853e-06 eta 0:03:33 +epoch [50/50] batch [265/500] time 0.887 (0.889) data 0.000 (0.003) loss 0.6875 (1.0206) acc 84.3750 (74.0920) lr 7.8853e-06 eta 0:03:28 +epoch [50/50] batch [270/500] time 0.899 (0.889) data 0.000 (0.003) loss 0.6611 (1.0183) acc 87.5000 (74.2130) lr 7.8853e-06 eta 0:03:24 +epoch [50/50] batch [275/500] time 0.872 (0.889) data 0.000 (0.003) loss 1.0713 (1.0221) acc 65.6250 (74.1818) lr 7.8853e-06 eta 0:03:19 +epoch [50/50] batch [280/500] time 0.903 (0.889) data 0.000 (0.003) loss 0.6704 (1.0214) acc 81.2500 (74.2411) lr 7.8853e-06 eta 0:03:15 +epoch [50/50] batch [285/500] time 0.868 (0.889) data 0.000 (0.003) loss 0.7622 (1.0213) acc 81.2500 (74.1886) lr 7.8853e-06 eta 0:03:11 +epoch [50/50] batch [290/500] time 0.863 (0.889) data 0.000 (0.003) loss 0.7261 (1.0186) acc 71.8750 (74.2026) lr 7.8853e-06 eta 0:03:06 +epoch [50/50] batch [295/500] time 0.879 (0.889) data 0.000 (0.003) loss 1.0146 (1.0186) acc 68.7500 (74.2161) lr 7.8853e-06 eta 0:03:02 +epoch [50/50] batch [300/500] time 0.892 (0.889) data 0.000 (0.002) loss 1.1055 (1.0239) acc 71.8750 (74.1042) lr 7.8853e-06 eta 0:02:57 +epoch [50/50] batch [305/500] time 0.854 (0.889) data 0.000 (0.002) loss 0.8281 (1.0263) acc 68.7500 (74.0676) lr 7.8853e-06 eta 0:02:53 +epoch [50/50] batch [310/500] time 0.892 (0.889) data 0.000 (0.002) loss 1.2998 (1.0282) acc 65.6250 (74.0020) lr 7.8853e-06 eta 0:02:48 +epoch [50/50] batch [315/500] time 0.904 (0.888) data 0.000 (0.002) loss 0.9629 (1.0276) acc 75.0000 (74.0179) lr 7.8853e-06 eta 0:02:44 +epoch [50/50] batch [320/500] time 0.892 (0.888) data 0.000 (0.002) loss 1.6641 (1.0290) acc 56.2500 (73.9746) lr 7.8853e-06 eta 0:02:39 +epoch [50/50] batch [325/500] time 0.889 (0.888) data 0.000 (0.002) loss 0.8501 (1.0279) acc 68.7500 (73.9904) lr 7.8853e-06 eta 0:02:35 +epoch [50/50] batch [330/500] time 0.882 (0.888) data 0.000 (0.002) loss 1.2471 (1.0281) acc 65.6250 (73.9299) lr 7.8853e-06 eta 0:02:31 +epoch [50/50] batch [335/500] time 0.882 (0.888) data 0.000 (0.002) loss 0.6079 (1.0250) acc 84.3750 (74.0299) lr 7.8853e-06 eta 0:02:26 +epoch [50/50] batch [340/500] time 0.869 (0.888) data 0.000 (0.002) loss 1.1094 (1.0276) acc 75.0000 (74.0257) lr 7.8853e-06 eta 0:02:22 +epoch [50/50] batch [345/500] time 0.897 (0.888) data 0.000 (0.002) loss 0.6455 (1.0262) acc 78.1250 (74.0761) lr 7.8853e-06 eta 0:02:17 +epoch [50/50] batch [350/500] time 0.887 (0.888) data 0.000 (0.002) loss 0.8701 (1.0293) acc 78.1250 (74.0625) lr 7.8853e-06 eta 0:02:13 +epoch [50/50] batch [355/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.5654 (1.0306) acc 71.8750 (74.0669) lr 7.8853e-06 eta 0:02:08 +epoch [50/50] batch [360/500] time 0.893 (0.888) data 0.000 (0.002) loss 1.3486 (1.0299) acc 62.5000 (74.0365) lr 7.8853e-06 eta 0:02:04 +epoch [50/50] batch [365/500] time 0.892 (0.888) data 0.000 (0.002) loss 0.9985 (1.0282) acc 78.1250 (74.0582) lr 7.8853e-06 eta 0:01:59 +epoch [50/50] batch [370/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.1602 (1.0284) acc 75.0000 (74.0118) lr 7.8853e-06 eta 0:01:55 +epoch [50/50] batch [375/500] time 0.849 (0.888) data 0.000 (0.002) loss 1.2012 (1.0306) acc 71.8750 (74.0083) lr 7.8853e-06 eta 0:01:50 +epoch [50/50] batch [380/500] time 0.868 (0.888) data 0.000 (0.002) loss 1.1650 (1.0297) acc 75.0000 (74.0296) lr 7.8853e-06 eta 0:01:46 +epoch [50/50] batch [385/500] time 0.902 (0.888) data 0.000 (0.002) loss 1.1270 (1.0315) acc 75.0000 (73.9854) lr 7.8853e-06 eta 0:01:42 +epoch [50/50] batch [390/500] time 0.896 (0.888) data 0.000 (0.002) loss 0.8828 (1.0311) acc 75.0000 (73.9904) lr 7.8853e-06 eta 0:01:37 +epoch [50/50] batch [395/500] time 0.896 (0.888) data 0.000 (0.002) loss 0.7803 (1.0304) acc 75.0000 (74.0032) lr 7.8853e-06 eta 0:01:33 +epoch [50/50] batch [400/500] time 0.885 (0.888) data 0.000 (0.002) loss 0.9004 (1.0315) acc 81.2500 (74.0000) lr 7.8853e-06 eta 0:01:28 +epoch [50/50] batch [405/500] time 0.884 (0.888) data 0.000 (0.002) loss 0.9351 (1.0298) acc 71.8750 (74.0046) lr 7.8853e-06 eta 0:01:24 +epoch [50/50] batch [410/500] time 0.871 (0.888) data 0.000 (0.002) loss 0.9648 (1.0270) acc 71.8750 (74.0473) lr 7.8853e-06 eta 0:01:19 +epoch [50/50] batch [415/500] time 0.862 (0.888) data 0.000 (0.002) loss 0.7793 (1.0267) acc 81.2500 (74.0889) lr 7.8853e-06 eta 0:01:15 +epoch [50/50] batch [420/500] time 0.890 (0.888) data 0.000 (0.002) loss 0.6846 (1.0244) acc 78.1250 (74.1295) lr 7.8853e-06 eta 0:01:11 +epoch [50/50] batch [425/500] time 0.866 (0.888) data 0.000 (0.002) loss 1.3350 (1.0264) acc 71.8750 (74.0956) lr 7.8853e-06 eta 0:01:06 +epoch [50/50] batch [430/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.0352 (1.0279) acc 75.0000 (74.0698) lr 7.8853e-06 eta 0:01:02 +epoch [50/50] batch [435/500] time 0.910 (0.888) data 0.000 (0.002) loss 1.4219 (1.0277) acc 71.8750 (74.1020) lr 7.8853e-06 eta 0:00:57 +epoch [50/50] batch [440/500] time 0.912 (0.888) data 0.000 (0.002) loss 0.8687 (1.0267) acc 81.2500 (74.1548) lr 7.8853e-06 eta 0:00:53 +epoch [50/50] batch [445/500] time 0.887 (0.888) data 0.000 (0.002) loss 0.7344 (1.0252) acc 78.1250 (74.1643) lr 7.8853e-06 eta 0:00:48 +epoch [50/50] batch [450/500] time 0.921 (0.888) data 0.000 (0.002) loss 0.6040 (1.0216) acc 84.3750 (74.2431) lr 7.8853e-06 eta 0:00:44 +epoch [50/50] batch [455/500] time 0.896 (0.888) data 0.000 (0.002) loss 0.8164 (1.0207) acc 78.1250 (74.2445) lr 7.8853e-06 eta 0:00:39 +epoch [50/50] batch [460/500] time 0.893 (0.888) data 0.000 (0.002) loss 0.8198 (1.0197) acc 75.0000 (74.2459) lr 7.8853e-06 eta 0:00:35 +epoch [50/50] batch [465/500] time 0.882 (0.888) data 0.000 (0.002) loss 0.8301 (1.0183) acc 75.0000 (74.2540) lr 7.8853e-06 eta 0:00:31 +epoch [50/50] batch [470/500] time 0.890 (0.888) data 0.000 (0.002) loss 0.7725 (1.0183) acc 81.2500 (74.2553) lr 7.8853e-06 eta 0:00:26 +epoch [50/50] batch [475/500] time 0.857 (0.888) data 0.000 (0.002) loss 1.0381 (1.0183) acc 71.8750 (74.2697) lr 7.8853e-06 eta 0:00:22 +epoch [50/50] batch [480/500] time 0.896 (0.888) data 0.000 (0.002) loss 0.8540 (1.0184) acc 84.3750 (74.2643) lr 7.8853e-06 eta 0:00:17 +epoch [50/50] batch [485/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.9023 (1.0185) acc 81.2500 (74.2461) lr 7.8853e-06 eta 0:00:13 +epoch [50/50] batch [490/500] time 0.882 (0.888) data 0.000 (0.002) loss 1.0029 (1.0191) acc 81.2500 (74.2921) lr 7.8853e-06 eta 0:00:08 +epoch [50/50] batch [495/500] time 0.879 (0.888) data 0.000 (0.002) loss 0.8647 (1.0191) acc 78.1250 (74.3245) lr 7.8853e-06 eta 0:00:04 +epoch [50/50] batch [500/500] time 0.857 (0.888) data 0.000 (0.002) loss 0.9663 (1.0188) acc 78.1250 (74.3375) lr 1.9733e-06 eta 0:00:00 +Checkpoint saved to output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-50 +Finish training +Deploy the last-epoch model +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 38,987 +* accuracy: 78.0% +* error: 22.0% +* macro_f1: 77.5% +Elapsed: 6:12:36 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/checkpoint b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/checkpoint new file mode 100644 index 00000000..a9d493d3 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/checkpoint @@ -0,0 +1 @@ +model.pth.tar-50 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-50 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-50 new file mode 100644 index 00000000..84d7aeca Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-50 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1698970684.ckb-gpu-a.1343964.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1698970684.ckb-gpu-a.1343964.0 new file mode 100644 index 00000000..ca832a0d Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1698970684.ckb-gpu-a.1343964.0 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed3/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed3/log.txt new file mode 100644 index 00000000..88627ea0 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed3/log.txt @@ -0,0 +1,5342 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_l14_ep50.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '16'] +output_dir: output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed3 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 3 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 16 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-L/14 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 50 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0 +Clang version: 10.0.0-4ubuntu1 +CMake version: version 3.16.3 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-166-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: NVIDIA A100-SXM4-40GB +GPU 1: NVIDIA A100-SXM4-40GB +GPU 2: NVIDIA A100-SXM4-40GB +GPU 3: NVIDIA A100-SXM4-40GB + +Nvidia driver version: 525.125.06 +cuDNN version: Probably one of the following: +/usr/lib/x86_64-linux-gnu/libcudnn.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.9.5 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.9.5 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 43 bits physical, 48 bits virtual +CPU(s): 256 +On-line CPU(s) list: 0-255 +Thread(s) per core: 2 +Core(s) per socket: 64 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: AuthenticAMD +CPU family: 23 +Model: 49 +Model name: AMD EPYC 7H12 64-Core Processor +Stepping: 0 +Frequency boost: enabled +CPU MHz: 1499.981 +CPU max MHz: 2600.0000 +CPU min MHz: 1500.0000 +BogoMIPS: 5200.20 +Virtualization: AMD-V +L1d cache: 4 MiB +L1i cache: 4 MiB +L2 cache: 64 MiB +L3 cache: 512 MiB +NUMA node0 CPU(s): 0-63,128-191 +NUMA node1 CPU(s): 64-127,192-255 +Vulnerability Gather data sampling: Not affected +Vulnerability Itlb multihit: Not affected +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Mmio stale data: Not affected +Vulnerability Retbleed: Vulnerable +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Retpolines, IBPB conditional, IBRS_FW, STIBP conditional, RSB filling, PBRSB-eIBRS Not affected +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Not affected +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca sme sev sev_es + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Loading preprocessed few-shot data from /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_16-seed_3.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 16,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-L/14) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed3/tensorboard) +epoch [1/50] batch [5/500] time 0.886 (1.684) data 0.000 (0.133) loss 2.3535 (2.9648) acc 50.0000 (42.5000) lr 1.0000e-05 eta 11:41:34 +epoch [1/50] batch [10/500] time 0.883 (1.278) data 0.000 (0.067) loss 2.1328 (2.6693) acc 59.3750 (47.8125) lr 1.0000e-05 eta 8:52:20 +epoch [1/50] batch [15/500] time 0.870 (1.139) data 0.000 (0.045) loss 1.8613 (2.4141) acc 46.8750 (50.0000) lr 1.0000e-05 eta 7:54:18 +epoch [1/50] batch [20/500] time 0.886 (1.076) data 0.000 (0.033) loss 2.0859 (2.3501) acc 46.8750 (50.0000) lr 1.0000e-05 eta 7:27:56 +epoch [1/50] batch [25/500] time 0.887 (1.038) data 0.000 (0.027) loss 1.4404 (2.2324) acc 71.8750 (52.1250) lr 1.0000e-05 eta 7:12:08 +epoch [1/50] batch [30/500] time 0.881 (1.013) data 0.000 (0.022) loss 1.3496 (2.1673) acc 65.6250 (53.2292) lr 1.0000e-05 eta 7:01:28 +epoch [1/50] batch [35/500] time 0.874 (0.992) data 0.000 (0.019) loss 1.2822 (2.1096) acc 68.7500 (53.9286) lr 1.0000e-05 eta 6:52:57 +epoch [1/50] batch [40/500] time 0.879 (0.979) data 0.000 (0.017) loss 2.2305 (2.0646) acc 50.0000 (54.6094) lr 1.0000e-05 eta 6:47:08 +epoch [1/50] batch [45/500] time 0.883 (0.969) data 0.000 (0.015) loss 1.4131 (1.9933) acc 53.1250 (55.8333) lr 1.0000e-05 eta 6:42:57 +epoch [1/50] batch [50/500] time 0.891 (0.962) data 0.000 (0.014) loss 1.7188 (1.9862) acc 56.2500 (55.6875) lr 1.0000e-05 eta 6:39:59 +epoch [1/50] batch [55/500] time 0.868 (0.955) data 0.000 (0.012) loss 2.0117 (1.9549) acc 53.1250 (56.1932) lr 1.0000e-05 eta 6:37:11 +epoch [1/50] batch [60/500] time 0.885 (0.949) data 0.000 (0.011) loss 1.7812 (1.9369) acc 62.5000 (56.6146) lr 1.0000e-05 eta 6:34:30 +epoch [1/50] batch [65/500] time 0.871 (0.944) data 0.000 (0.010) loss 0.9346 (1.8873) acc 71.8750 (57.4519) lr 1.0000e-05 eta 6:32:25 +epoch [1/50] batch [70/500] time 0.856 (0.938) data 0.000 (0.010) loss 1.4199 (1.8519) acc 65.6250 (57.9464) lr 1.0000e-05 eta 6:29:53 +epoch [1/50] batch [75/500] time 0.870 (0.934) data 0.000 (0.009) loss 1.8916 (1.8255) acc 59.3750 (58.6667) lr 1.0000e-05 eta 6:28:08 +epoch [1/50] batch [80/500] time 0.875 (0.930) data 0.000 (0.009) loss 1.1973 (1.8047) acc 56.2500 (59.0234) lr 1.0000e-05 eta 6:26:18 +epoch [1/50] batch [85/500] time 0.851 (0.927) data 0.000 (0.008) loss 1.7314 (1.8044) acc 59.3750 (58.6765) lr 1.0000e-05 eta 6:24:58 +epoch [1/50] batch [90/500] time 0.886 (0.925) data 0.000 (0.008) loss 1.2559 (1.7867) acc 56.2500 (58.8889) lr 1.0000e-05 eta 6:23:58 +epoch [1/50] batch [95/500] time 0.874 (0.922) data 0.000 (0.007) loss 1.3574 (1.7706) acc 71.8750 (59.2105) lr 1.0000e-05 eta 6:22:53 +epoch [1/50] batch [100/500] time 0.838 (0.920) data 0.000 (0.007) loss 1.1436 (1.7552) acc 68.7500 (59.5000) lr 1.0000e-05 eta 6:21:45 +epoch [1/50] batch [105/500] time 0.870 (0.918) data 0.000 (0.007) loss 1.5820 (1.7363) acc 68.7500 (59.9107) lr 1.0000e-05 eta 6:20:42 +epoch [1/50] batch [110/500] time 0.883 (0.916) data 0.000 (0.006) loss 1.3232 (1.7167) acc 78.1250 (60.4261) lr 1.0000e-05 eta 6:19:57 +epoch [1/50] batch [115/500] time 0.876 (0.914) data 0.000 (0.006) loss 1.1436 (1.7071) acc 71.8750 (60.7065) lr 1.0000e-05 eta 6:19:14 +epoch [1/50] batch [120/500] time 0.888 (0.913) data 0.000 (0.006) loss 1.3359 (1.6979) acc 59.3750 (60.8854) lr 1.0000e-05 eta 6:18:43 +epoch [1/50] batch [125/500] time 0.856 (0.912) data 0.000 (0.006) loss 1.1484 (1.6866) acc 68.7500 (61.1750) lr 1.0000e-05 eta 6:18:00 +epoch [1/50] batch [130/500] time 0.871 (0.910) data 0.000 (0.005) loss 1.6211 (1.6868) acc 62.5000 (61.2500) lr 1.0000e-05 eta 6:17:22 +epoch [1/50] batch [135/500] time 0.879 (0.909) data 0.000 (0.005) loss 1.7900 (1.6813) acc 56.2500 (61.4815) lr 1.0000e-05 eta 6:16:52 +epoch [1/50] batch [140/500] time 0.863 (0.908) data 0.000 (0.005) loss 1.6768 (1.6732) acc 59.3750 (61.5625) lr 1.0000e-05 eta 6:16:17 +epoch [1/50] batch [145/500] time 0.912 (0.907) data 0.000 (0.005) loss 1.7451 (1.6701) acc 62.5000 (61.6164) lr 1.0000e-05 eta 6:15:46 +epoch [1/50] batch [150/500] time 0.888 (0.907) data 0.000 (0.005) loss 1.4463 (1.6643) acc 62.5000 (61.8125) lr 1.0000e-05 eta 6:15:26 +epoch [1/50] batch [155/500] time 0.906 (0.906) data 0.000 (0.005) loss 1.2051 (1.6573) acc 68.7500 (61.8548) lr 1.0000e-05 eta 6:15:13 +epoch [1/50] batch [160/500] time 0.876 (0.906) data 0.000 (0.004) loss 1.4961 (1.6522) acc 68.7500 (61.9922) lr 1.0000e-05 eta 6:14:54 +epoch [1/50] batch [165/500] time 0.914 (0.905) data 0.000 (0.004) loss 1.2881 (1.6441) acc 53.1250 (62.0455) lr 1.0000e-05 eta 6:14:42 +epoch [1/50] batch [170/500] time 0.895 (0.906) data 0.000 (0.004) loss 1.9189 (1.6441) acc 62.5000 (62.0588) lr 1.0000e-05 eta 6:14:48 +epoch [1/50] batch [175/500] time 0.867 (0.905) data 0.000 (0.004) loss 0.9766 (1.6301) acc 75.0000 (62.1964) lr 1.0000e-05 eta 6:14:27 +epoch [1/50] batch [180/500] time 0.875 (0.904) data 0.000 (0.004) loss 1.6416 (1.6304) acc 62.5000 (62.3958) lr 1.0000e-05 eta 6:14:02 +epoch [1/50] batch [185/500] time 0.900 (0.904) data 0.000 (0.004) loss 1.8340 (1.6263) acc 62.5000 (62.5845) lr 1.0000e-05 eta 6:13:52 +epoch [1/50] batch [190/500] time 0.883 (0.903) data 0.000 (0.004) loss 1.7725 (1.6252) acc 71.8750 (62.6974) lr 1.0000e-05 eta 6:13:33 +epoch [1/50] batch [195/500] time 0.889 (0.903) data 0.000 (0.004) loss 1.0332 (1.6204) acc 71.8750 (62.7885) lr 1.0000e-05 eta 6:13:24 +epoch [1/50] batch [200/500] time 0.883 (0.903) data 0.000 (0.004) loss 2.3223 (1.6220) acc 46.8750 (62.7656) lr 1.0000e-05 eta 6:13:12 +epoch [1/50] batch [205/500] time 0.867 (0.902) data 0.000 (0.003) loss 0.9194 (1.6102) acc 71.8750 (62.9573) lr 1.0000e-05 eta 6:12:53 +epoch [1/50] batch [210/500] time 0.898 (0.902) data 0.000 (0.003) loss 0.9775 (1.6063) acc 75.0000 (62.9911) lr 1.0000e-05 eta 6:12:38 +epoch [1/50] batch [215/500] time 0.861 (0.902) data 0.000 (0.003) loss 1.7578 (1.6034) acc 59.3750 (62.9942) lr 1.0000e-05 eta 6:12:34 +epoch [1/50] batch [220/500] time 0.903 (0.902) data 0.000 (0.003) loss 1.6836 (1.5979) acc 56.2500 (63.0966) lr 1.0000e-05 eta 6:12:20 +epoch [1/50] batch [225/500] time 0.884 (0.901) data 0.000 (0.003) loss 1.4414 (1.5980) acc 59.3750 (63.0556) lr 1.0000e-05 eta 6:12:10 +epoch [1/50] batch [230/500] time 0.905 (0.901) data 0.000 (0.003) loss 1.0615 (1.5911) acc 75.0000 (63.2065) lr 1.0000e-05 eta 6:11:57 +epoch [1/50] batch [235/500] time 0.920 (0.901) data 0.000 (0.003) loss 1.8740 (1.5891) acc 56.2500 (63.1915) lr 1.0000e-05 eta 6:11:45 +epoch [1/50] batch [240/500] time 0.890 (0.900) data 0.000 (0.003) loss 2.0410 (1.5903) acc 53.1250 (63.2422) lr 1.0000e-05 eta 6:11:27 +epoch [1/50] batch [245/500] time 0.890 (0.900) data 0.000 (0.003) loss 1.5566 (1.5902) acc 71.8750 (63.3163) lr 1.0000e-05 eta 6:11:14 +epoch [1/50] batch [250/500] time 0.865 (0.900) data 0.000 (0.003) loss 1.2197 (1.5843) acc 68.7500 (63.4875) lr 1.0000e-05 eta 6:11:04 +epoch [1/50] batch [255/500] time 0.889 (0.899) data 0.000 (0.003) loss 0.9180 (1.5802) acc 75.0000 (63.5539) lr 1.0000e-05 eta 6:10:47 +epoch [1/50] batch [260/500] time 0.873 (0.899) data 0.000 (0.003) loss 1.3262 (1.5795) acc 71.8750 (63.5697) lr 1.0000e-05 eta 6:10:41 +epoch [1/50] batch [265/500] time 0.876 (0.899) data 0.001 (0.003) loss 1.2803 (1.5702) acc 53.1250 (63.6792) lr 1.0000e-05 eta 6:10:33 +epoch [1/50] batch [270/500] time 0.911 (0.899) data 0.000 (0.003) loss 1.4951 (1.5715) acc 62.5000 (63.6111) lr 1.0000e-05 eta 6:10:22 +epoch [1/50] batch [275/500] time 0.930 (0.899) data 0.000 (0.003) loss 1.4434 (1.5643) acc 71.8750 (63.7614) lr 1.0000e-05 eta 6:10:15 +epoch [1/50] batch [280/500] time 0.864 (0.898) data 0.000 (0.003) loss 1.3633 (1.5627) acc 56.2500 (63.7165) lr 1.0000e-05 eta 6:10:01 +epoch [1/50] batch [285/500] time 0.872 (0.898) data 0.001 (0.003) loss 1.5420 (1.5624) acc 56.2500 (63.6623) lr 1.0000e-05 eta 6:09:49 +epoch [1/50] batch [290/500] time 0.875 (0.897) data 0.000 (0.003) loss 1.6846 (1.5611) acc 68.7500 (63.7500) lr 1.0000e-05 eta 6:09:37 +epoch [1/50] batch [295/500] time 0.892 (0.897) data 0.000 (0.002) loss 1.7695 (1.5634) acc 53.1250 (63.6970) lr 1.0000e-05 eta 6:09:26 +epoch [1/50] batch [300/500] time 0.896 (0.897) data 0.000 (0.002) loss 2.2266 (1.5659) acc 59.3750 (63.6250) lr 1.0000e-05 eta 6:09:16 +epoch [1/50] batch [305/500] time 0.862 (0.897) data 0.000 (0.002) loss 1.2363 (1.5631) acc 68.7500 (63.6885) lr 1.0000e-05 eta 6:09:06 +epoch [1/50] batch [310/500] time 1.007 (0.897) data 0.000 (0.002) loss 1.2119 (1.5628) acc 68.7500 (63.6694) lr 1.0000e-05 eta 6:09:04 +epoch [1/50] batch [315/500] time 0.914 (0.897) data 0.000 (0.002) loss 1.5137 (1.5650) acc 59.3750 (63.5714) lr 1.0000e-05 eta 6:09:01 +epoch [1/50] batch [320/500] time 0.896 (0.897) data 0.000 (0.002) loss 1.7031 (1.5659) acc 56.2500 (63.5449) lr 1.0000e-05 eta 6:08:51 +epoch [1/50] batch [325/500] time 0.905 (0.896) data 0.000 (0.002) loss 0.9380 (1.5644) acc 71.8750 (63.6058) lr 1.0000e-05 eta 6:08:40 +epoch [1/50] batch [330/500] time 0.895 (0.897) data 0.000 (0.002) loss 1.0811 (1.5617) acc 68.7500 (63.6269) lr 1.0000e-05 eta 6:08:38 +epoch [1/50] batch [335/500] time 0.865 (0.896) data 0.000 (0.002) loss 1.2051 (1.5593) acc 68.7500 (63.6754) lr 1.0000e-05 eta 6:08:27 +epoch [1/50] batch [340/500] time 0.897 (0.896) data 0.000 (0.002) loss 1.9912 (1.5579) acc 50.0000 (63.6581) lr 1.0000e-05 eta 6:08:15 +epoch [1/50] batch [345/500] time 0.867 (0.895) data 0.000 (0.002) loss 1.8516 (1.5545) acc 56.2500 (63.7591) lr 1.0000e-05 eta 6:07:57 +epoch [1/50] batch [350/500] time 0.896 (0.895) data 0.000 (0.002) loss 1.3438 (1.5545) acc 62.5000 (63.7857) lr 1.0000e-05 eta 6:07:50 +epoch [1/50] batch [355/500] time 0.897 (0.896) data 0.000 (0.002) loss 1.2588 (1.5474) acc 65.6250 (63.9437) lr 1.0000e-05 eta 6:07:50 +epoch [1/50] batch [360/500] time 0.900 (0.896) data 0.000 (0.002) loss 0.7725 (1.5438) acc 81.2500 (64.0191) lr 1.0000e-05 eta 6:07:48 +epoch [1/50] batch [365/500] time 0.898 (0.896) data 0.000 (0.002) loss 1.3936 (1.5444) acc 71.8750 (64.0154) lr 1.0000e-05 eta 6:07:41 +epoch [1/50] batch [370/500] time 0.882 (0.895) data 0.000 (0.002) loss 1.4375 (1.5431) acc 59.3750 (64.0287) lr 1.0000e-05 eta 6:07:34 +epoch [1/50] batch [375/500] time 0.891 (0.895) data 0.000 (0.002) loss 1.2969 (1.5439) acc 65.6250 (63.9583) lr 1.0000e-05 eta 6:07:25 +epoch [1/50] batch [380/500] time 0.876 (0.895) data 0.000 (0.002) loss 0.9600 (1.5425) acc 78.1250 (63.9556) lr 1.0000e-05 eta 6:07:17 +epoch [1/50] batch [385/500] time 0.885 (0.895) data 0.000 (0.002) loss 1.4258 (1.5421) acc 65.6250 (63.9286) lr 1.0000e-05 eta 6:07:05 +epoch [1/50] batch [390/500] time 0.879 (0.895) data 0.000 (0.002) loss 1.2510 (1.5395) acc 78.1250 (63.9984) lr 1.0000e-05 eta 6:06:56 +epoch [1/50] batch [395/500] time 0.887 (0.894) data 0.000 (0.002) loss 1.2822 (1.5386) acc 75.0000 (64.0111) lr 1.0000e-05 eta 6:06:48 +epoch [1/50] batch [400/500] time 0.884 (0.895) data 0.000 (0.002) loss 1.4189 (1.5356) acc 71.8750 (64.0781) lr 1.0000e-05 eta 6:06:44 +epoch [1/50] batch [405/500] time 0.886 (0.894) data 0.000 (0.002) loss 1.0469 (1.5369) acc 68.7500 (64.0355) lr 1.0000e-05 eta 6:06:37 +epoch [1/50] batch [410/500] time 0.900 (0.894) data 0.000 (0.002) loss 0.8203 (1.5333) acc 81.2500 (64.1387) lr 1.0000e-05 eta 6:06:32 +epoch [1/50] batch [415/500] time 0.894 (0.894) data 0.000 (0.002) loss 0.7822 (1.5304) acc 78.1250 (64.2018) lr 1.0000e-05 eta 6:06:25 +epoch [1/50] batch [420/500] time 0.879 (0.894) data 0.000 (0.002) loss 1.0293 (1.5273) acc 71.8750 (64.2336) lr 1.0000e-05 eta 6:06:16 +epoch [1/50] batch [425/500] time 0.872 (0.894) data 0.000 (0.002) loss 1.6895 (1.5247) acc 56.2500 (64.2353) lr 1.0000e-05 eta 6:06:12 +epoch [1/50] batch [430/500] time 0.899 (0.894) data 0.000 (0.002) loss 0.9365 (1.5220) acc 81.2500 (64.3096) lr 1.0000e-05 eta 6:06:05 +epoch [1/50] batch [435/500] time 0.882 (0.894) data 0.000 (0.002) loss 0.8052 (1.5215) acc 90.6250 (64.3894) lr 1.0000e-05 eta 6:05:57 +epoch [1/50] batch [440/500] time 0.877 (0.894) data 0.000 (0.002) loss 0.9639 (1.5231) acc 71.8750 (64.3821) lr 1.0000e-05 eta 6:05:49 +epoch [1/50] batch [445/500] time 0.858 (0.893) data 0.000 (0.002) loss 1.5244 (1.5218) acc 59.3750 (64.3469) lr 1.0000e-05 eta 6:05:36 +epoch [1/50] batch [450/500] time 0.878 (0.893) data 0.000 (0.002) loss 1.6748 (1.5194) acc 62.5000 (64.3819) lr 1.0000e-05 eta 6:05:28 +epoch [1/50] batch [455/500] time 0.878 (0.893) data 0.000 (0.002) loss 1.3291 (1.5197) acc 62.5000 (64.3681) lr 1.0000e-05 eta 6:05:30 +epoch [1/50] batch [460/500] time 0.887 (0.893) data 0.000 (0.002) loss 1.2383 (1.5183) acc 68.7500 (64.3546) lr 1.0000e-05 eta 6:05:22 +epoch [1/50] batch [465/500] time 0.899 (0.893) data 0.000 (0.002) loss 1.4102 (1.5167) acc 68.7500 (64.4086) lr 1.0000e-05 eta 6:05:13 +epoch [1/50] batch [470/500] time 0.854 (0.893) data 0.000 (0.002) loss 0.8804 (1.5152) acc 78.1250 (64.4215) lr 1.0000e-05 eta 6:05:04 +epoch [1/50] batch [475/500] time 0.907 (0.893) data 0.000 (0.002) loss 1.4141 (1.5133) acc 68.7500 (64.4474) lr 1.0000e-05 eta 6:04:58 +epoch [1/50] batch [480/500] time 0.882 (0.893) data 0.000 (0.002) loss 1.2441 (1.5096) acc 65.6250 (64.4661) lr 1.0000e-05 eta 6:04:48 +epoch [1/50] batch [485/500] time 0.880 (0.893) data 0.000 (0.002) loss 1.3701 (1.5074) acc 68.7500 (64.5296) lr 1.0000e-05 eta 6:04:42 +epoch [1/50] batch [490/500] time 0.869 (0.893) data 0.000 (0.002) loss 1.4854 (1.5057) acc 46.8750 (64.5344) lr 1.0000e-05 eta 6:04:36 +epoch [1/50] batch [495/500] time 0.886 (0.892) data 0.000 (0.002) loss 1.3203 (1.5032) acc 62.5000 (64.5644) lr 1.0000e-05 eta 6:04:30 +epoch [1/50] batch [500/500] time 0.900 (0.893) data 0.000 (0.002) loss 1.1777 (1.4989) acc 75.0000 (64.6625) lr 2.0000e-03 eta 6:04:28 +epoch [2/50] batch [5/500] time 0.907 (1.041) data 0.000 (0.148) loss 1.3398 (1.5031) acc 59.3750 (63.1250) lr 2.0000e-03 eta 7:05:06 +epoch [2/50] batch [10/500] time 0.874 (0.955) data 0.000 (0.074) loss 0.9551 (1.4393) acc 71.8750 (66.5625) lr 2.0000e-03 eta 6:29:39 +epoch [2/50] batch [15/500] time 0.911 (0.929) data 0.000 (0.049) loss 1.2363 (1.4519) acc 78.1250 (65.6250) lr 2.0000e-03 eta 6:19:12 +epoch [2/50] batch [20/500] time 0.898 (0.921) data 0.000 (0.037) loss 2.2129 (1.4383) acc 56.2500 (65.7812) lr 2.0000e-03 eta 6:15:45 +epoch [2/50] batch [25/500] time 0.899 (0.915) data 0.000 (0.030) loss 1.4424 (1.4344) acc 56.2500 (66.2500) lr 2.0000e-03 eta 6:13:04 +epoch [2/50] batch [30/500] time 0.883 (0.912) data 0.000 (0.025) loss 1.7090 (1.3946) acc 56.2500 (67.2917) lr 2.0000e-03 eta 6:11:59 +epoch [2/50] batch [35/500] time 0.909 (0.910) data 0.000 (0.021) loss 2.1680 (1.4017) acc 50.0000 (67.2321) lr 2.0000e-03 eta 6:11:11 +epoch [2/50] batch [40/500] time 0.896 (0.906) data 0.000 (0.019) loss 1.2207 (1.4166) acc 68.7500 (67.0312) lr 2.0000e-03 eta 6:09:32 +epoch [2/50] batch [45/500] time 0.873 (0.905) data 0.000 (0.017) loss 1.9492 (1.4241) acc 59.3750 (66.8056) lr 2.0000e-03 eta 6:08:52 +epoch [2/50] batch [50/500] time 0.915 (0.904) data 0.000 (0.015) loss 1.2227 (1.4280) acc 71.8750 (66.8125) lr 2.0000e-03 eta 6:08:22 +epoch [2/50] batch [55/500] time 0.855 (0.900) data 0.000 (0.014) loss 1.6123 (1.4411) acc 68.7500 (66.7045) lr 2.0000e-03 eta 6:06:35 +epoch [2/50] batch [60/500] time 0.882 (0.897) data 0.000 (0.013) loss 0.7432 (1.4255) acc 87.5000 (66.7188) lr 2.0000e-03 eta 6:05:33 +epoch [2/50] batch [65/500] time 0.860 (0.895) data 0.000 (0.012) loss 1.7354 (1.4304) acc 59.3750 (66.4904) lr 2.0000e-03 eta 6:04:34 +epoch [2/50] batch [70/500] time 0.871 (0.894) data 0.000 (0.011) loss 1.4668 (1.4372) acc 65.6250 (66.3393) lr 2.0000e-03 eta 6:03:53 +epoch [2/50] batch [75/500] time 0.872 (0.892) data 0.000 (0.010) loss 1.0693 (1.4294) acc 75.0000 (66.2500) lr 2.0000e-03 eta 6:03:14 +epoch [2/50] batch [80/500] time 0.890 (0.892) data 0.000 (0.009) loss 1.7119 (1.4277) acc 65.6250 (66.2891) lr 2.0000e-03 eta 6:02:58 +epoch [2/50] batch [85/500] time 0.904 (0.892) data 0.000 (0.009) loss 1.2090 (1.4076) acc 78.1250 (66.6176) lr 2.0000e-03 eta 6:02:54 +epoch [2/50] batch [90/500] time 0.894 (0.892) data 0.000 (0.008) loss 1.2549 (1.4022) acc 68.7500 (66.7014) lr 2.0000e-03 eta 6:02:54 +epoch [2/50] batch [95/500] time 0.881 (0.892) data 0.000 (0.008) loss 1.2266 (1.3899) acc 71.8750 (67.0724) lr 2.0000e-03 eta 6:02:41 +epoch [2/50] batch [100/500] time 0.889 (0.892) data 0.000 (0.008) loss 2.0938 (1.3869) acc 56.2500 (67.1250) lr 2.0000e-03 eta 6:02:38 +epoch [2/50] batch [105/500] time 0.878 (0.891) data 0.001 (0.007) loss 1.3330 (1.3889) acc 59.3750 (67.1726) lr 2.0000e-03 eta 6:02:20 +epoch [2/50] batch [110/500] time 0.866 (0.891) data 0.000 (0.007) loss 0.7676 (1.3915) acc 65.6250 (67.1591) lr 2.0000e-03 eta 6:02:02 +epoch [2/50] batch [115/500] time 0.897 (0.891) data 0.000 (0.007) loss 1.1621 (1.3813) acc 71.8750 (67.3098) lr 2.0000e-03 eta 6:01:55 +epoch [2/50] batch [120/500] time 0.872 (0.890) data 0.000 (0.006) loss 1.2168 (1.3799) acc 68.7500 (67.1875) lr 2.0000e-03 eta 6:01:42 +epoch [2/50] batch [125/500] time 0.873 (0.890) data 0.000 (0.006) loss 1.3691 (1.3725) acc 68.7500 (67.3250) lr 2.0000e-03 eta 6:01:25 +epoch [2/50] batch [130/500] time 0.891 (0.889) data 0.000 (0.006) loss 0.8926 (1.3640) acc 71.8750 (67.5481) lr 2.0000e-03 eta 6:01:16 +epoch [2/50] batch [135/500] time 0.902 (0.890) data 0.000 (0.006) loss 1.2207 (1.3543) acc 71.8750 (67.7546) lr 2.0000e-03 eta 6:01:34 +epoch [2/50] batch [140/500] time 0.946 (0.891) data 0.000 (0.006) loss 1.1445 (1.3528) acc 68.7500 (67.9018) lr 2.0000e-03 eta 6:01:42 +epoch [2/50] batch [145/500] time 0.890 (0.890) data 0.000 (0.005) loss 1.4229 (1.3441) acc 71.8750 (68.1250) lr 2.0000e-03 eta 6:01:25 +epoch [2/50] batch [150/500] time 0.907 (0.890) data 0.000 (0.005) loss 1.0938 (1.3465) acc 71.8750 (68.1458) lr 2.0000e-03 eta 6:01:21 +epoch [2/50] batch [155/500] time 0.878 (0.890) data 0.000 (0.005) loss 1.1387 (1.3442) acc 65.6250 (68.1048) lr 2.0000e-03 eta 6:01:04 +epoch [2/50] batch [160/500] time 0.926 (0.890) data 0.000 (0.005) loss 1.2959 (1.3416) acc 81.2500 (68.1836) lr 2.0000e-03 eta 6:01:02 +epoch [2/50] batch [165/500] time 0.870 (0.890) data 0.000 (0.005) loss 1.7373 (1.3445) acc 62.5000 (68.1629) lr 2.0000e-03 eta 6:00:55 +epoch [2/50] batch [170/500] time 0.900 (0.890) data 0.000 (0.005) loss 1.4053 (1.3420) acc 65.6250 (68.1066) lr 2.0000e-03 eta 6:00:43 +epoch [2/50] batch [175/500] time 0.891 (0.890) data 0.000 (0.004) loss 1.1768 (1.3404) acc 71.8750 (68.1250) lr 2.0000e-03 eta 6:00:41 +epoch [2/50] batch [180/500] time 0.868 (0.890) data 0.000 (0.004) loss 1.0898 (1.3419) acc 75.0000 (68.0382) lr 2.0000e-03 eta 6:00:38 +epoch [2/50] batch [185/500] time 0.890 (0.890) data 0.000 (0.004) loss 1.7285 (1.3424) acc 68.7500 (68.1081) lr 2.0000e-03 eta 6:00:50 +epoch [2/50] batch [190/500] time 0.913 (0.891) data 0.000 (0.004) loss 0.7930 (1.3379) acc 84.3750 (68.1579) lr 2.0000e-03 eta 6:00:49 +epoch [2/50] batch [195/500] time 0.881 (0.890) data 0.000 (0.004) loss 1.4238 (1.3406) acc 65.6250 (68.0609) lr 2.0000e-03 eta 6:00:37 +epoch [2/50] batch [200/500] time 0.890 (0.890) data 0.000 (0.004) loss 1.6680 (1.3364) acc 62.5000 (68.1875) lr 2.0000e-03 eta 6:00:25 +epoch [2/50] batch [205/500] time 0.918 (0.890) data 0.000 (0.004) loss 1.6221 (1.3386) acc 59.3750 (68.1098) lr 2.0000e-03 eta 6:00:25 +epoch [2/50] batch [210/500] time 0.914 (0.890) data 0.000 (0.004) loss 1.1865 (1.3379) acc 68.7500 (68.1399) lr 2.0000e-03 eta 6:00:25 +epoch [2/50] batch [215/500] time 0.901 (0.890) data 0.000 (0.004) loss 1.1133 (1.3406) acc 75.0000 (68.0669) lr 2.0000e-03 eta 6:00:16 +epoch [2/50] batch [220/500] time 0.846 (0.890) data 0.000 (0.004) loss 1.1182 (1.3399) acc 62.5000 (68.0114) lr 2.0000e-03 eta 6:00:04 +epoch [2/50] batch [225/500] time 0.862 (0.889) data 0.000 (0.004) loss 1.0361 (1.3318) acc 68.7500 (68.1528) lr 2.0000e-03 eta 5:59:45 +epoch [2/50] batch [230/500] time 0.875 (0.889) data 0.001 (0.003) loss 0.6572 (1.3255) acc 81.2500 (68.2609) lr 2.0000e-03 eta 5:59:33 +epoch [2/50] batch [235/500] time 0.871 (0.889) data 0.000 (0.003) loss 0.7480 (1.3179) acc 81.2500 (68.4176) lr 2.0000e-03 eta 5:59:26 +epoch [2/50] batch [240/500] time 0.895 (0.889) data 0.000 (0.003) loss 1.3525 (1.3133) acc 68.7500 (68.4505) lr 2.0000e-03 eta 5:59:17 +epoch [2/50] batch [245/500] time 0.876 (0.889) data 0.000 (0.003) loss 1.6250 (1.3214) acc 56.2500 (68.2398) lr 2.0000e-03 eta 5:59:10 +epoch [2/50] batch [250/500] time 0.900 (0.888) data 0.000 (0.003) loss 1.0713 (1.3203) acc 75.0000 (68.3000) lr 2.0000e-03 eta 5:59:05 +epoch [2/50] batch [255/500] time 0.901 (0.889) data 0.000 (0.003) loss 1.2021 (1.3165) acc 71.8750 (68.3824) lr 2.0000e-03 eta 5:59:01 +epoch [2/50] batch [260/500] time 0.901 (0.889) data 0.000 (0.003) loss 1.3652 (1.3161) acc 84.3750 (68.4495) lr 2.0000e-03 eta 5:58:58 +epoch [2/50] batch [265/500] time 0.883 (0.889) data 0.000 (0.003) loss 1.3467 (1.3138) acc 71.8750 (68.5142) lr 2.0000e-03 eta 5:58:57 +epoch [2/50] batch [270/500] time 0.906 (0.889) data 0.000 (0.003) loss 0.9243 (1.3074) acc 75.0000 (68.5417) lr 2.0000e-03 eta 5:58:57 +epoch [2/50] batch [275/500] time 0.879 (0.889) data 0.000 (0.003) loss 0.8726 (1.3051) acc 78.1250 (68.5568) lr 2.0000e-03 eta 5:58:55 +epoch [2/50] batch [280/500] time 0.881 (0.889) data 0.000 (0.003) loss 1.4268 (1.3037) acc 62.5000 (68.5603) lr 2.0000e-03 eta 5:58:48 +epoch [2/50] batch [285/500] time 0.895 (0.889) data 0.000 (0.003) loss 1.0029 (1.3021) acc 75.0000 (68.5526) lr 2.0000e-03 eta 5:58:52 +epoch [2/50] batch [290/500] time 0.873 (0.889) data 0.000 (0.003) loss 1.3662 (1.3023) acc 78.1250 (68.5991) lr 2.0000e-03 eta 5:58:43 +epoch [2/50] batch [295/500] time 0.900 (0.889) data 0.000 (0.003) loss 0.9092 (1.3005) acc 84.3750 (68.6547) lr 2.0000e-03 eta 5:58:38 +epoch [2/50] batch [300/500] time 0.856 (0.889) data 0.000 (0.003) loss 0.5557 (1.2936) acc 87.5000 (68.8333) lr 2.0000e-03 eta 5:58:28 +epoch [2/50] batch [305/500] time 0.891 (0.889) data 0.000 (0.003) loss 1.1523 (1.2934) acc 65.6250 (68.8934) lr 2.0000e-03 eta 5:58:19 +epoch [2/50] batch [310/500] time 0.906 (0.889) data 0.000 (0.003) loss 1.0469 (1.2940) acc 68.7500 (68.8911) lr 2.0000e-03 eta 5:58:17 +epoch [2/50] batch [315/500] time 0.922 (0.889) data 0.000 (0.003) loss 0.7388 (1.2892) acc 84.3750 (69.0377) lr 2.0000e-03 eta 5:58:17 +epoch [2/50] batch [320/500] time 0.872 (0.889) data 0.000 (0.003) loss 1.4336 (1.2935) acc 65.6250 (68.8867) lr 2.0000e-03 eta 5:58:10 +epoch [2/50] batch [325/500] time 0.931 (0.889) data 0.000 (0.003) loss 1.1982 (1.2908) acc 71.8750 (68.9808) lr 2.0000e-03 eta 5:58:09 +epoch [2/50] batch [330/500] time 0.906 (0.889) data 0.000 (0.002) loss 0.6768 (1.2869) acc 81.2500 (69.0814) lr 2.0000e-03 eta 5:58:10 +epoch [2/50] batch [335/500] time 0.879 (0.889) data 0.000 (0.002) loss 1.4932 (1.2869) acc 65.6250 (69.0485) lr 2.0000e-03 eta 5:58:09 +epoch [2/50] batch [340/500] time 0.904 (0.889) data 0.000 (0.002) loss 1.2598 (1.2849) acc 68.7500 (69.0533) lr 2.0000e-03 eta 5:58:03 +epoch [2/50] batch [345/500] time 0.889 (0.889) data 0.000 (0.002) loss 0.8843 (1.2837) acc 78.1250 (69.0851) lr 2.0000e-03 eta 5:57:54 +epoch [2/50] batch [350/500] time 0.849 (0.889) data 0.000 (0.002) loss 0.9995 (1.2867) acc 75.0000 (69.0625) lr 2.0000e-03 eta 5:57:49 +epoch [2/50] batch [355/500] time 0.873 (0.889) data 0.000 (0.002) loss 1.5986 (1.2862) acc 62.5000 (69.0757) lr 2.0000e-03 eta 5:57:44 +epoch [2/50] batch [360/500] time 0.888 (0.889) data 0.000 (0.002) loss 1.0820 (1.2838) acc 59.3750 (69.0712) lr 2.0000e-03 eta 5:57:37 +epoch [2/50] batch [365/500] time 0.873 (0.889) data 0.000 (0.002) loss 1.6660 (1.2840) acc 50.0000 (69.0668) lr 2.0000e-03 eta 5:57:32 +epoch [2/50] batch [370/500] time 0.898 (0.889) data 0.000 (0.002) loss 1.5430 (1.2827) acc 71.8750 (69.1047) lr 2.0000e-03 eta 5:57:30 +epoch [2/50] batch [375/500] time 0.889 (0.889) data 0.000 (0.002) loss 1.1025 (1.2866) acc 71.8750 (69.0083) lr 2.0000e-03 eta 5:57:24 +epoch [2/50] batch [380/500] time 0.873 (0.889) data 0.000 (0.002) loss 1.0000 (1.2895) acc 75.0000 (68.9885) lr 2.0000e-03 eta 5:57:15 +epoch [2/50] batch [385/500] time 0.894 (0.889) data 0.000 (0.002) loss 1.9375 (1.2926) acc 53.1250 (68.9286) lr 2.0000e-03 eta 5:57:13 +epoch [2/50] batch [390/500] time 0.855 (0.888) data 0.000 (0.002) loss 1.4043 (1.2917) acc 71.8750 (68.9744) lr 2.0000e-03 eta 5:57:01 +epoch [2/50] batch [395/500] time 0.899 (0.889) data 0.000 (0.002) loss 1.2100 (1.2954) acc 78.1250 (68.9003) lr 2.0000e-03 eta 5:56:58 +epoch [2/50] batch [400/500] time 0.904 (0.889) data 0.000 (0.002) loss 1.5859 (1.2983) acc 62.5000 (68.8281) lr 2.0000e-03 eta 5:56:53 +epoch [2/50] batch [405/500] time 0.858 (0.889) data 0.000 (0.002) loss 0.8291 (1.2988) acc 78.1250 (68.7963) lr 2.0000e-03 eta 5:56:50 +epoch [2/50] batch [410/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.7529 (1.3010) acc 50.0000 (68.7195) lr 2.0000e-03 eta 5:56:38 +epoch [2/50] batch [415/500] time 0.887 (0.888) data 0.000 (0.002) loss 0.6558 (1.2987) acc 90.6250 (68.8102) lr 2.0000e-03 eta 5:56:35 +epoch [2/50] batch [420/500] time 0.866 (0.888) data 0.000 (0.002) loss 1.8809 (1.2992) acc 65.6250 (68.8318) lr 2.0000e-03 eta 5:56:26 +epoch [2/50] batch [425/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.4863 (1.2998) acc 62.5000 (68.8235) lr 2.0000e-03 eta 5:56:18 +epoch [2/50] batch [430/500] time 0.858 (0.888) data 0.000 (0.002) loss 1.6973 (1.3022) acc 62.5000 (68.7645) lr 2.0000e-03 eta 5:56:15 +epoch [2/50] batch [435/500] time 0.877 (0.888) data 0.000 (0.002) loss 1.4971 (1.3007) acc 65.6250 (68.8075) lr 2.0000e-03 eta 5:56:11 +epoch [2/50] batch [440/500] time 0.860 (0.888) data 0.000 (0.002) loss 1.6562 (1.3013) acc 62.5000 (68.7784) lr 2.0000e-03 eta 5:56:01 +epoch [2/50] batch [445/500] time 0.886 (0.888) data 0.000 (0.002) loss 1.5391 (1.3021) acc 65.6250 (68.7781) lr 2.0000e-03 eta 5:55:54 +epoch [2/50] batch [450/500] time 0.890 (0.888) data 0.000 (0.002) loss 1.1367 (1.3028) acc 78.1250 (68.8264) lr 2.0000e-03 eta 5:55:46 +epoch [2/50] batch [455/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.4844 (1.3020) acc 71.8750 (68.8668) lr 2.0000e-03 eta 5:55:39 +epoch [2/50] batch [460/500] time 0.858 (0.887) data 0.000 (0.002) loss 1.3301 (1.3017) acc 65.6250 (68.8383) lr 2.0000e-03 eta 5:55:33 +epoch [2/50] batch [465/500] time 0.861 (0.887) data 0.000 (0.002) loss 1.2207 (1.2987) acc 71.8750 (68.8911) lr 2.0000e-03 eta 5:55:28 +epoch [2/50] batch [470/500] time 0.970 (0.888) data 0.000 (0.002) loss 0.7202 (1.2980) acc 90.6250 (68.9229) lr 2.0000e-03 eta 5:55:28 +epoch [2/50] batch [475/500] time 0.876 (0.888) data 0.000 (0.002) loss 1.1650 (1.2975) acc 75.0000 (68.9408) lr 2.0000e-03 eta 5:55:24 +epoch [2/50] batch [480/500] time 0.869 (0.888) data 0.000 (0.002) loss 1.5977 (1.2989) acc 65.6250 (68.9518) lr 2.0000e-03 eta 5:55:18 +epoch [2/50] batch [485/500] time 0.888 (0.888) data 0.000 (0.002) loss 0.9204 (1.2971) acc 71.8750 (68.9497) lr 2.0000e-03 eta 5:55:15 +epoch [2/50] batch [490/500] time 0.905 (0.888) data 0.000 (0.002) loss 1.1621 (1.2975) acc 65.6250 (68.9158) lr 2.0000e-03 eta 5:55:10 +epoch [2/50] batch [495/500] time 0.867 (0.888) data 0.000 (0.002) loss 1.6553 (1.2995) acc 62.5000 (68.8889) lr 2.0000e-03 eta 5:55:04 +epoch [2/50] batch [500/500] time 0.901 (0.887) data 0.000 (0.002) loss 1.5713 (1.2991) acc 65.6250 (68.8937) lr 1.9980e-03 eta 5:54:58 +epoch [3/50] batch [5/500] time 0.887 (1.024) data 0.000 (0.136) loss 1.1094 (1.3452) acc 81.2500 (67.5000) lr 1.9980e-03 eta 6:49:35 +epoch [3/50] batch [10/500] time 0.889 (0.958) data 0.000 (0.068) loss 1.1582 (1.2844) acc 68.7500 (67.8125) lr 1.9980e-03 eta 6:23:14 +epoch [3/50] batch [15/500] time 0.882 (0.943) data 0.000 (0.046) loss 1.4824 (1.2293) acc 71.8750 (69.5833) lr 1.9980e-03 eta 6:16:53 +epoch [3/50] batch [20/500] time 0.872 (0.925) data 0.000 (0.034) loss 1.1318 (1.2322) acc 78.1250 (70.3125) lr 1.9980e-03 eta 6:09:30 +epoch [3/50] batch [25/500] time 0.883 (0.916) data 0.000 (0.027) loss 0.6265 (1.2188) acc 84.3750 (70.2500) lr 1.9980e-03 eta 6:06:09 +epoch [3/50] batch [30/500] time 0.875 (0.910) data 0.000 (0.023) loss 1.1885 (1.2306) acc 62.5000 (69.7917) lr 1.9980e-03 eta 6:03:38 +epoch [3/50] batch [35/500] time 0.883 (0.907) data 0.000 (0.020) loss 1.2441 (1.2376) acc 71.8750 (70.0893) lr 1.9980e-03 eta 6:02:23 +epoch [3/50] batch [40/500] time 0.916 (0.908) data 0.000 (0.017) loss 1.3018 (1.2383) acc 59.3750 (69.7656) lr 1.9980e-03 eta 6:02:28 +epoch [3/50] batch [45/500] time 0.893 (0.907) data 0.000 (0.015) loss 1.5312 (1.2345) acc 56.2500 (69.7917) lr 1.9980e-03 eta 6:02:14 +epoch [3/50] batch [50/500] time 0.869 (0.905) data 0.000 (0.014) loss 1.1143 (1.2369) acc 65.6250 (69.5000) lr 1.9980e-03 eta 6:01:04 +epoch [3/50] batch [55/500] time 0.930 (0.903) data 0.000 (0.013) loss 1.1787 (1.2305) acc 71.8750 (69.8295) lr 1.9980e-03 eta 6:00:29 +epoch [3/50] batch [60/500] time 0.895 (0.902) data 0.000 (0.012) loss 1.2900 (1.2413) acc 71.8750 (69.5833) lr 1.9980e-03 eta 5:59:43 +epoch [3/50] batch [65/500] time 0.890 (0.900) data 0.000 (0.011) loss 1.1143 (1.2302) acc 75.0000 (69.6635) lr 1.9980e-03 eta 5:58:58 +epoch [3/50] batch [70/500] time 0.870 (0.897) data 0.000 (0.010) loss 1.3271 (1.2225) acc 59.3750 (69.5982) lr 1.9980e-03 eta 5:57:55 +epoch [3/50] batch [75/500] time 0.876 (0.896) data 0.000 (0.009) loss 1.0215 (1.2191) acc 71.8750 (69.9167) lr 1.9980e-03 eta 5:57:08 +epoch [3/50] batch [80/500] time 0.881 (0.895) data 0.000 (0.009) loss 1.2734 (1.2088) acc 71.8750 (70.1562) lr 1.9980e-03 eta 5:56:57 +epoch [3/50] batch [85/500] time 0.873 (0.895) data 0.000 (0.008) loss 1.3418 (1.2120) acc 59.3750 (69.9632) lr 1.9980e-03 eta 5:56:41 +epoch [3/50] batch [90/500] time 0.875 (0.894) data 0.000 (0.008) loss 1.1885 (1.2192) acc 68.7500 (69.8611) lr 1.9980e-03 eta 5:56:21 +epoch [3/50] batch [95/500] time 0.874 (0.893) data 0.000 (0.007) loss 1.2666 (1.2129) acc 56.2500 (69.8355) lr 1.9980e-03 eta 5:55:52 +epoch [3/50] batch [100/500] time 0.895 (0.893) data 0.000 (0.007) loss 0.8530 (1.2260) acc 71.8750 (69.5312) lr 1.9980e-03 eta 5:55:38 +epoch [3/50] batch [105/500] time 0.869 (0.892) data 0.000 (0.007) loss 0.9727 (1.2293) acc 71.8750 (69.5536) lr 1.9980e-03 eta 5:55:18 +epoch [3/50] batch [110/500] time 0.860 (0.892) data 0.000 (0.006) loss 0.5928 (1.2219) acc 78.1250 (69.6875) lr 1.9980e-03 eta 5:55:19 +epoch [3/50] batch [115/500] time 0.909 (0.892) data 0.000 (0.006) loss 2.0859 (1.2257) acc 53.1250 (69.6467) lr 1.9980e-03 eta 5:55:04 +epoch [3/50] batch [120/500] time 0.897 (0.892) data 0.000 (0.006) loss 1.5332 (1.2330) acc 68.7500 (69.5312) lr 1.9980e-03 eta 5:54:59 +epoch [3/50] batch [125/500] time 0.884 (0.892) data 0.000 (0.006) loss 0.7720 (1.2311) acc 81.2500 (69.5500) lr 1.9980e-03 eta 5:54:54 +epoch [3/50] batch [130/500] time 0.879 (0.891) data 0.000 (0.005) loss 1.3887 (1.2244) acc 68.7500 (69.6154) lr 1.9980e-03 eta 5:54:33 +epoch [3/50] batch [135/500] time 0.918 (0.891) data 0.000 (0.005) loss 1.1738 (1.2248) acc 59.3750 (69.6759) lr 1.9980e-03 eta 5:54:23 +epoch [3/50] batch [140/500] time 0.895 (0.891) data 0.000 (0.005) loss 1.0986 (1.2217) acc 59.3750 (69.6205) lr 1.9980e-03 eta 5:54:07 +epoch [3/50] batch [145/500] time 0.927 (0.891) data 0.000 (0.005) loss 1.3789 (1.2322) acc 68.7500 (69.5474) lr 1.9980e-03 eta 5:54:06 +epoch [3/50] batch [150/500] time 0.897 (0.890) data 0.000 (0.005) loss 1.0156 (1.2286) acc 71.8750 (69.5417) lr 1.9980e-03 eta 5:53:50 +epoch [3/50] batch [155/500] time 0.885 (0.891) data 0.000 (0.005) loss 1.1494 (1.2345) acc 71.8750 (69.3952) lr 1.9980e-03 eta 5:54:01 +epoch [3/50] batch [160/500] time 0.909 (0.891) data 0.000 (0.004) loss 1.1982 (1.2345) acc 71.8750 (69.4727) lr 1.9980e-03 eta 5:53:56 +epoch [3/50] batch [165/500] time 0.930 (0.891) data 0.000 (0.004) loss 1.6406 (1.2384) acc 56.2500 (69.4129) lr 1.9980e-03 eta 5:53:59 +epoch [3/50] batch [170/500] time 0.860 (0.890) data 0.000 (0.004) loss 1.3613 (1.2360) acc 62.5000 (69.4301) lr 1.9980e-03 eta 5:53:38 +epoch [3/50] batch [175/500] time 0.917 (0.890) data 0.000 (0.004) loss 1.3740 (1.2408) acc 62.5000 (69.2857) lr 1.9980e-03 eta 5:53:34 +epoch [3/50] batch [180/500] time 0.897 (0.890) data 0.000 (0.004) loss 1.7266 (1.2386) acc 62.5000 (69.3750) lr 1.9980e-03 eta 5:53:27 +epoch [3/50] batch [185/500] time 0.875 (0.890) data 0.000 (0.004) loss 1.4111 (1.2450) acc 75.0000 (69.2736) lr 1.9980e-03 eta 5:53:11 +epoch [3/50] batch [190/500] time 0.882 (0.890) data 0.000 (0.004) loss 1.4570 (1.2434) acc 71.8750 (69.3586) lr 1.9980e-03 eta 5:53:07 +epoch [3/50] batch [195/500] time 0.883 (0.890) data 0.000 (0.004) loss 1.7773 (1.2427) acc 53.1250 (69.3429) lr 1.9980e-03 eta 5:53:04 +epoch [3/50] batch [200/500] time 0.863 (0.890) data 0.000 (0.004) loss 1.6289 (1.2537) acc 68.7500 (69.1250) lr 1.9980e-03 eta 5:53:00 +epoch [3/50] batch [205/500] time 0.905 (0.890) data 0.000 (0.004) loss 1.1025 (1.2530) acc 68.7500 (69.1463) lr 1.9980e-03 eta 5:52:54 +epoch [3/50] batch [210/500] time 0.883 (0.890) data 0.000 (0.003) loss 0.6895 (1.2515) acc 84.3750 (69.1964) lr 1.9980e-03 eta 5:52:50 +epoch [3/50] batch [215/500] time 0.893 (0.890) data 0.000 (0.003) loss 1.5215 (1.2514) acc 65.6250 (69.1715) lr 1.9980e-03 eta 5:52:49 +epoch [3/50] batch [220/500] time 0.875 (0.890) data 0.000 (0.003) loss 2.0273 (1.2523) acc 53.1250 (69.1051) lr 1.9980e-03 eta 5:52:47 +epoch [3/50] batch [225/500] time 0.889 (0.890) data 0.000 (0.003) loss 1.5850 (1.2484) acc 68.7500 (69.1806) lr 1.9980e-03 eta 5:52:44 +epoch [3/50] batch [230/500] time 0.892 (0.890) data 0.000 (0.003) loss 1.1357 (1.2485) acc 78.1250 (69.1984) lr 1.9980e-03 eta 5:52:43 +epoch [3/50] batch [235/500] time 0.898 (0.890) data 0.000 (0.003) loss 0.9390 (1.2436) acc 68.7500 (69.2819) lr 1.9980e-03 eta 5:52:41 +epoch [3/50] batch [240/500] time 0.870 (0.890) data 0.000 (0.003) loss 0.7310 (1.2430) acc 78.1250 (69.2708) lr 1.9980e-03 eta 5:52:31 +epoch [3/50] batch [245/500] time 0.864 (0.890) data 0.000 (0.003) loss 1.0225 (1.2464) acc 68.7500 (69.2092) lr 1.9980e-03 eta 5:52:20 +epoch [3/50] batch [250/500] time 0.878 (0.890) data 0.000 (0.003) loss 1.5273 (1.2423) acc 65.6250 (69.3625) lr 1.9980e-03 eta 5:52:11 +epoch [3/50] batch [255/500] time 0.894 (0.890) data 0.000 (0.003) loss 1.4014 (1.2439) acc 59.3750 (69.2525) lr 1.9980e-03 eta 5:52:17 +epoch [3/50] batch [260/500] time 0.891 (0.890) data 0.000 (0.003) loss 1.2744 (1.2482) acc 68.7500 (69.2067) lr 1.9980e-03 eta 5:52:08 +epoch [3/50] batch [265/500] time 0.891 (0.890) data 0.000 (0.003) loss 1.2812 (1.2481) acc 71.8750 (69.2217) lr 1.9980e-03 eta 5:52:02 +epoch [3/50] batch [270/500] time 0.882 (0.890) data 0.000 (0.003) loss 1.1270 (1.2458) acc 75.0000 (69.3287) lr 1.9980e-03 eta 5:52:00 +epoch [3/50] batch [275/500] time 0.882 (0.890) data 0.000 (0.003) loss 1.2236 (1.2442) acc 71.8750 (69.4205) lr 1.9980e-03 eta 5:51:57 +epoch [3/50] batch [280/500] time 0.895 (0.890) data 0.000 (0.003) loss 1.8916 (1.2472) acc 59.3750 (69.4196) lr 1.9980e-03 eta 5:51:59 +epoch [3/50] batch [285/500] time 0.903 (0.890) data 0.000 (0.003) loss 1.3975 (1.2473) acc 71.8750 (69.4189) lr 1.9980e-03 eta 5:51:53 +epoch [3/50] batch [290/500] time 0.884 (0.890) data 0.000 (0.003) loss 1.0957 (1.2448) acc 65.6250 (69.5043) lr 1.9980e-03 eta 5:51:49 +epoch [3/50] batch [295/500] time 0.892 (0.890) data 0.000 (0.003) loss 1.7021 (1.2469) acc 65.6250 (69.5021) lr 1.9980e-03 eta 5:51:43 +epoch [3/50] batch [300/500] time 0.859 (0.891) data 0.000 (0.002) loss 1.1885 (1.2445) acc 65.6250 (69.4792) lr 1.9980e-03 eta 5:51:45 +epoch [3/50] batch [305/500] time 0.916 (0.890) data 0.000 (0.002) loss 1.4863 (1.2463) acc 59.3750 (69.4467) lr 1.9980e-03 eta 5:51:36 +epoch [3/50] batch [310/500] time 0.880 (0.890) data 0.000 (0.002) loss 0.7559 (1.2442) acc 84.3750 (69.5161) lr 1.9980e-03 eta 5:51:23 +epoch [3/50] batch [315/500] time 0.886 (0.890) data 0.000 (0.002) loss 1.4199 (1.2440) acc 75.0000 (69.5536) lr 1.9980e-03 eta 5:51:19 +epoch [3/50] batch [320/500] time 0.871 (0.890) data 0.000 (0.002) loss 0.7402 (1.2465) acc 78.1250 (69.4922) lr 1.9980e-03 eta 5:51:09 +epoch [3/50] batch [325/500] time 0.880 (0.890) data 0.000 (0.002) loss 1.2559 (1.2482) acc 62.5000 (69.4712) lr 1.9980e-03 eta 5:51:03 +epoch [3/50] batch [330/500] time 0.874 (0.889) data 0.000 (0.002) loss 1.3311 (1.2455) acc 68.7500 (69.5928) lr 1.9980e-03 eta 5:50:53 +epoch [3/50] batch [335/500] time 0.871 (0.889) data 0.000 (0.002) loss 1.3340 (1.2467) acc 75.0000 (69.5709) lr 1.9980e-03 eta 5:50:46 +epoch [3/50] batch [340/500] time 0.872 (0.889) data 0.000 (0.002) loss 1.5625 (1.2482) acc 62.5000 (69.5956) lr 1.9980e-03 eta 5:50:41 +epoch [3/50] batch [345/500] time 0.882 (0.889) data 0.000 (0.002) loss 1.5508 (1.2494) acc 65.6250 (69.5833) lr 1.9980e-03 eta 5:50:36 +epoch [3/50] batch [350/500] time 0.906 (0.889) data 0.000 (0.002) loss 1.1367 (1.2504) acc 71.8750 (69.5536) lr 1.9980e-03 eta 5:50:29 +epoch [3/50] batch [355/500] time 0.863 (0.889) data 0.000 (0.002) loss 1.5078 (1.2517) acc 62.5000 (69.4806) lr 1.9980e-03 eta 5:50:20 +epoch [3/50] batch [360/500] time 0.893 (0.889) data 0.000 (0.002) loss 1.3340 (1.2555) acc 65.6250 (69.3663) lr 1.9980e-03 eta 5:50:15 +epoch [3/50] batch [365/500] time 0.907 (0.889) data 0.000 (0.002) loss 1.2793 (1.2553) acc 68.7500 (69.4007) lr 1.9980e-03 eta 5:50:12 +epoch [3/50] batch [370/500] time 0.903 (0.889) data 0.000 (0.002) loss 1.1836 (1.2553) acc 65.6250 (69.3497) lr 1.9980e-03 eta 5:50:09 +epoch [3/50] batch [375/500] time 0.883 (0.889) data 0.000 (0.002) loss 1.7656 (1.2538) acc 59.3750 (69.3833) lr 1.9980e-03 eta 5:50:01 +epoch [3/50] batch [380/500] time 0.887 (0.889) data 0.000 (0.002) loss 1.2529 (1.2526) acc 62.5000 (69.3997) lr 1.9980e-03 eta 5:49:52 +epoch [3/50] batch [385/500] time 0.904 (0.889) data 0.000 (0.002) loss 1.3340 (1.2526) acc 68.7500 (69.3912) lr 1.9980e-03 eta 5:49:43 +epoch [3/50] batch [390/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.8369 (1.2512) acc 78.1250 (69.4071) lr 1.9980e-03 eta 5:49:35 +epoch [3/50] batch [395/500] time 0.987 (0.889) data 0.000 (0.002) loss 1.0381 (1.2490) acc 75.0000 (69.4699) lr 1.9980e-03 eta 5:49:35 +epoch [3/50] batch [400/500] time 0.849 (0.889) data 0.000 (0.002) loss 0.9395 (1.2483) acc 71.8750 (69.4297) lr 1.9980e-03 eta 5:49:29 +epoch [3/50] batch [405/500] time 0.907 (0.889) data 0.000 (0.002) loss 1.1025 (1.2471) acc 78.1250 (69.4522) lr 1.9980e-03 eta 5:49:25 +epoch [3/50] batch [410/500] time 0.894 (0.888) data 0.000 (0.002) loss 0.7681 (1.2448) acc 75.0000 (69.5122) lr 1.9980e-03 eta 5:49:16 +epoch [3/50] batch [415/500] time 0.894 (0.888) data 0.000 (0.002) loss 1.5049 (1.2439) acc 59.3750 (69.5105) lr 1.9980e-03 eta 5:49:09 +epoch [3/50] batch [420/500] time 0.900 (0.888) data 0.000 (0.002) loss 1.0557 (1.2445) acc 65.6250 (69.4568) lr 1.9980e-03 eta 5:49:05 +epoch [3/50] batch [425/500] time 0.889 (0.888) data 0.000 (0.002) loss 1.0479 (1.2423) acc 75.0000 (69.5147) lr 1.9980e-03 eta 5:48:59 +epoch [3/50] batch [430/500] time 0.909 (0.888) data 0.000 (0.002) loss 1.3994 (1.2411) acc 65.6250 (69.5494) lr 1.9980e-03 eta 5:48:55 +epoch [3/50] batch [435/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.4277 (1.2427) acc 68.7500 (69.5043) lr 1.9980e-03 eta 5:48:51 +epoch [3/50] batch [440/500] time 0.860 (0.888) data 0.000 (0.002) loss 1.0908 (1.2436) acc 71.8750 (69.4744) lr 1.9980e-03 eta 5:48:49 +epoch [3/50] batch [445/500] time 0.889 (0.888) data 0.000 (0.002) loss 0.6655 (1.2403) acc 81.2500 (69.5225) lr 1.9980e-03 eta 5:48:43 +epoch [3/50] batch [450/500] time 0.929 (0.888) data 0.000 (0.002) loss 0.8618 (1.2388) acc 65.6250 (69.5347) lr 1.9980e-03 eta 5:48:41 +epoch [3/50] batch [455/500] time 0.900 (0.888) data 0.000 (0.002) loss 0.9429 (1.2428) acc 68.7500 (69.4986) lr 1.9980e-03 eta 5:48:36 +epoch [3/50] batch [460/500] time 0.869 (0.888) data 0.000 (0.002) loss 0.7012 (1.2415) acc 81.2500 (69.5109) lr 1.9980e-03 eta 5:48:30 +epoch [3/50] batch [465/500] time 0.890 (0.888) data 0.000 (0.002) loss 1.2617 (1.2426) acc 71.8750 (69.5228) lr 1.9980e-03 eta 5:48:25 +epoch [3/50] batch [470/500] time 0.862 (0.888) data 0.000 (0.002) loss 1.6064 (1.2449) acc 59.3750 (69.4747) lr 1.9980e-03 eta 5:48:17 +epoch [3/50] batch [475/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.0010 (1.2460) acc 78.1250 (69.4803) lr 1.9980e-03 eta 5:48:09 +epoch [3/50] batch [480/500] time 0.915 (0.888) data 0.000 (0.002) loss 1.8906 (1.2488) acc 59.3750 (69.4206) lr 1.9980e-03 eta 5:48:05 +epoch [3/50] batch [485/500] time 0.867 (0.888) data 0.000 (0.002) loss 1.2910 (1.2486) acc 68.7500 (69.4008) lr 1.9980e-03 eta 5:47:58 +epoch [3/50] batch [490/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.1680 (1.2486) acc 65.6250 (69.3941) lr 1.9980e-03 eta 5:47:52 +epoch [3/50] batch [495/500] time 0.870 (0.888) data 0.000 (0.002) loss 1.0244 (1.2486) acc 59.3750 (69.3624) lr 1.9980e-03 eta 5:47:45 +epoch [3/50] batch [500/500] time 0.878 (0.888) data 0.000 (0.002) loss 1.3262 (1.2487) acc 68.7500 (69.3875) lr 1.9921e-03 eta 5:47:37 +epoch [4/50] batch [5/500] time 0.868 (1.019) data 0.000 (0.142) loss 1.1436 (1.0245) acc 65.6250 (70.6250) lr 1.9921e-03 eta 6:38:58 +epoch [4/50] batch [10/500] time 0.898 (0.951) data 0.000 (0.071) loss 1.4619 (1.0570) acc 65.6250 (72.8125) lr 1.9921e-03 eta 6:12:27 +epoch [4/50] batch [15/500] time 0.861 (0.929) data 0.000 (0.048) loss 1.0430 (1.0708) acc 75.0000 (72.7083) lr 1.9921e-03 eta 6:03:26 +epoch [4/50] batch [20/500] time 0.869 (0.917) data 0.000 (0.036) loss 0.9707 (1.1010) acc 71.8750 (72.5000) lr 1.9921e-03 eta 5:58:56 +epoch [4/50] batch [25/500] time 0.854 (0.908) data 0.000 (0.029) loss 0.8638 (1.1167) acc 81.2500 (72.5000) lr 1.9921e-03 eta 5:55:22 +epoch [4/50] batch [30/500] time 0.861 (0.908) data 0.000 (0.024) loss 1.4277 (1.1783) acc 78.1250 (71.6667) lr 1.9921e-03 eta 5:54:59 +epoch [4/50] batch [35/500] time 0.910 (0.904) data 0.000 (0.021) loss 0.4924 (1.1454) acc 87.5000 (72.3214) lr 1.9921e-03 eta 5:53:40 +epoch [4/50] batch [40/500] time 0.899 (0.903) data 0.000 (0.018) loss 1.1016 (1.1462) acc 62.5000 (71.7969) lr 1.9921e-03 eta 5:52:54 +epoch [4/50] batch [45/500] time 0.883 (0.900) data 0.001 (0.016) loss 1.4570 (1.1478) acc 68.7500 (71.8750) lr 1.9921e-03 eta 5:51:52 +epoch [4/50] batch [50/500] time 0.883 (0.898) data 0.000 (0.014) loss 0.6538 (1.1271) acc 84.3750 (72.6250) lr 1.9921e-03 eta 5:50:57 +epoch [4/50] batch [55/500] time 0.879 (0.897) data 0.000 (0.013) loss 1.0908 (1.1521) acc 68.7500 (71.9886) lr 1.9921e-03 eta 5:50:27 +epoch [4/50] batch [60/500] time 0.911 (0.895) data 0.000 (0.012) loss 1.0801 (1.1784) acc 65.6250 (71.5104) lr 1.9921e-03 eta 5:49:48 +epoch [4/50] batch [65/500] time 0.887 (0.895) data 0.000 (0.011) loss 0.9912 (1.1650) acc 75.0000 (71.9231) lr 1.9921e-03 eta 5:49:35 +epoch [4/50] batch [70/500] time 0.876 (0.895) data 0.000 (0.010) loss 1.0146 (1.1723) acc 75.0000 (71.8304) lr 1.9921e-03 eta 5:49:22 +epoch [4/50] batch [75/500] time 0.886 (0.894) data 0.000 (0.010) loss 0.7998 (1.1756) acc 78.1250 (71.6667) lr 1.9921e-03 eta 5:49:05 +epoch [4/50] batch [80/500] time 0.912 (0.893) data 0.000 (0.009) loss 1.1436 (1.1846) acc 75.0000 (71.2891) lr 1.9921e-03 eta 5:48:35 +epoch [4/50] batch [85/500] time 0.891 (0.892) data 0.000 (0.009) loss 1.4629 (1.1885) acc 62.5000 (71.0662) lr 1.9921e-03 eta 5:48:15 +epoch [4/50] batch [90/500] time 0.895 (0.893) data 0.000 (0.008) loss 1.2002 (1.1771) acc 65.6250 (71.0417) lr 1.9921e-03 eta 5:48:18 +epoch [4/50] batch [95/500] time 0.891 (0.892) data 0.000 (0.008) loss 1.8496 (1.1809) acc 65.6250 (70.8882) lr 1.9921e-03 eta 5:48:07 +epoch [4/50] batch [100/500] time 0.877 (0.892) data 0.000 (0.007) loss 0.9722 (1.1666) acc 78.1250 (71.2500) lr 1.9921e-03 eta 5:47:49 +epoch [4/50] batch [105/500] time 0.927 (0.892) data 0.000 (0.007) loss 1.7090 (1.1727) acc 65.6250 (70.9524) lr 1.9921e-03 eta 5:47:38 +epoch [4/50] batch [110/500] time 0.881 (0.890) data 0.000 (0.007) loss 1.5928 (1.1811) acc 65.6250 (70.6534) lr 1.9921e-03 eta 5:47:07 +epoch [4/50] batch [115/500] time 0.913 (0.891) data 0.000 (0.006) loss 1.1895 (1.1834) acc 75.0000 (70.7337) lr 1.9921e-03 eta 5:47:14 +epoch [4/50] batch [120/500] time 0.881 (0.890) data 0.000 (0.006) loss 0.8442 (1.1778) acc 71.8750 (70.6250) lr 1.9921e-03 eta 5:46:57 +epoch [4/50] batch [125/500] time 0.893 (0.890) data 0.000 (0.006) loss 1.3262 (1.1943) acc 65.6250 (70.3750) lr 1.9921e-03 eta 5:46:52 +epoch [4/50] batch [130/500] time 1.007 (0.891) data 0.000 (0.006) loss 0.9497 (1.1912) acc 78.1250 (70.4567) lr 1.9921e-03 eta 5:47:14 +epoch [4/50] batch [135/500] time 0.854 (0.891) data 0.000 (0.005) loss 0.9888 (1.1926) acc 75.0000 (70.4167) lr 1.9921e-03 eta 5:47:06 +epoch [4/50] batch [140/500] time 0.879 (0.891) data 0.000 (0.005) loss 1.4561 (1.2018) acc 71.8750 (70.2009) lr 1.9921e-03 eta 5:46:51 +epoch [4/50] batch [145/500] time 0.871 (0.891) data 0.000 (0.005) loss 0.9736 (1.2061) acc 71.8750 (69.9784) lr 1.9921e-03 eta 5:46:40 +epoch [4/50] batch [150/500] time 0.878 (0.891) data 0.000 (0.005) loss 1.0420 (1.2091) acc 78.1250 (70.1042) lr 1.9921e-03 eta 5:46:33 +epoch [4/50] batch [155/500] time 0.887 (0.890) data 0.000 (0.005) loss 1.0703 (1.2187) acc 75.0000 (69.9798) lr 1.9921e-03 eta 5:46:24 +epoch [4/50] batch [160/500] time 0.882 (0.890) data 0.000 (0.005) loss 1.0752 (1.2132) acc 75.0000 (70.0000) lr 1.9921e-03 eta 5:46:14 +epoch [4/50] batch [165/500] time 0.894 (0.890) data 0.000 (0.005) loss 1.3945 (1.2131) acc 62.5000 (69.9053) lr 1.9921e-03 eta 5:46:15 +epoch [4/50] batch [170/500] time 0.861 (0.890) data 0.000 (0.004) loss 1.1787 (1.2132) acc 62.5000 (69.7243) lr 1.9921e-03 eta 5:46:07 +epoch [4/50] batch [175/500] time 0.898 (0.890) data 0.000 (0.004) loss 1.1738 (1.2130) acc 56.2500 (69.5714) lr 1.9921e-03 eta 5:46:00 +epoch [4/50] batch [180/500] time 0.877 (0.890) data 0.000 (0.004) loss 1.0840 (1.2132) acc 68.7500 (69.7222) lr 1.9921e-03 eta 5:45:52 +epoch [4/50] batch [185/500] time 0.912 (0.890) data 0.000 (0.004) loss 1.7002 (1.2122) acc 65.6250 (69.7804) lr 1.9921e-03 eta 5:45:59 +epoch [4/50] batch [190/500] time 0.910 (0.891) data 0.000 (0.004) loss 1.4932 (1.2142) acc 65.6250 (69.7204) lr 1.9921e-03 eta 5:46:03 +epoch [4/50] batch [195/500] time 0.882 (0.891) data 0.000 (0.004) loss 1.6836 (1.2164) acc 65.6250 (69.7276) lr 1.9921e-03 eta 5:45:55 +epoch [4/50] batch [200/500] time 0.866 (0.890) data 0.000 (0.004) loss 0.8066 (1.2191) acc 71.8750 (69.5938) lr 1.9921e-03 eta 5:45:42 +epoch [4/50] batch [205/500] time 0.899 (0.890) data 0.000 (0.004) loss 1.2412 (1.2193) acc 75.0000 (69.6341) lr 1.9921e-03 eta 5:45:36 +epoch [4/50] batch [210/500] time 0.905 (0.890) data 0.000 (0.004) loss 0.9932 (1.2166) acc 75.0000 (69.7470) lr 1.9921e-03 eta 5:45:30 +epoch [4/50] batch [215/500] time 0.893 (0.890) data 0.000 (0.004) loss 0.9326 (1.2161) acc 71.8750 (69.7674) lr 1.9921e-03 eta 5:45:22 +epoch [4/50] batch [220/500] time 0.901 (0.890) data 0.000 (0.003) loss 1.0244 (1.2176) acc 78.1250 (69.7727) lr 1.9921e-03 eta 5:45:19 +epoch [4/50] batch [225/500] time 0.904 (0.890) data 0.000 (0.003) loss 1.4316 (1.2189) acc 71.8750 (69.6528) lr 1.9921e-03 eta 5:45:23 +epoch [4/50] batch [230/500] time 0.925 (0.891) data 0.000 (0.003) loss 1.3145 (1.2239) acc 68.7500 (69.5924) lr 1.9921e-03 eta 5:45:31 +epoch [4/50] batch [235/500] time 0.884 (0.891) data 0.000 (0.003) loss 0.9351 (1.2241) acc 81.2500 (69.6277) lr 1.9921e-03 eta 5:45:20 +epoch [4/50] batch [240/500] time 0.902 (0.891) data 0.000 (0.003) loss 1.8291 (1.2236) acc 59.3750 (69.7266) lr 1.9921e-03 eta 5:45:13 +epoch [4/50] batch [245/500] time 0.893 (0.890) data 0.000 (0.003) loss 1.3818 (1.2276) acc 68.7500 (69.6429) lr 1.9921e-03 eta 5:45:02 +epoch [4/50] batch [250/500] time 0.878 (0.890) data 0.000 (0.003) loss 1.0693 (1.2269) acc 78.1250 (69.7000) lr 1.9921e-03 eta 5:44:48 +epoch [4/50] batch [255/500] time 0.876 (0.890) data 0.000 (0.003) loss 1.4873 (1.2259) acc 75.0000 (69.7426) lr 1.9921e-03 eta 5:44:36 +epoch [4/50] batch [260/500] time 0.850 (0.889) data 0.000 (0.003) loss 1.1895 (1.2288) acc 68.7500 (69.6635) lr 1.9921e-03 eta 5:44:24 +epoch [4/50] batch [265/500] time 0.892 (0.889) data 0.001 (0.003) loss 1.2041 (1.2351) acc 65.6250 (69.6226) lr 1.9921e-03 eta 5:44:16 +epoch [4/50] batch [270/500] time 0.907 (0.889) data 0.000 (0.003) loss 1.1318 (1.2331) acc 78.1250 (69.6875) lr 1.9921e-03 eta 5:44:14 +epoch [4/50] batch [275/500] time 0.861 (0.889) data 0.000 (0.003) loss 1.9111 (1.2350) acc 65.6250 (69.7273) lr 1.9921e-03 eta 5:44:13 +epoch [4/50] batch [280/500] time 0.878 (0.889) data 0.000 (0.003) loss 0.7349 (1.2331) acc 71.8750 (69.7545) lr 1.9921e-03 eta 5:44:05 +epoch [4/50] batch [285/500] time 0.896 (0.889) data 0.000 (0.003) loss 0.9668 (1.2306) acc 71.8750 (69.8355) lr 1.9921e-03 eta 5:43:56 +epoch [4/50] batch [290/500] time 0.867 (0.889) data 0.000 (0.003) loss 1.4502 (1.2307) acc 65.6250 (69.8384) lr 1.9921e-03 eta 5:43:50 +epoch [4/50] batch [295/500] time 0.908 (0.889) data 0.000 (0.003) loss 1.1035 (1.2294) acc 78.1250 (69.8941) lr 1.9921e-03 eta 5:43:46 +epoch [4/50] batch [300/500] time 0.861 (0.888) data 0.000 (0.003) loss 1.5859 (1.2259) acc 62.5000 (69.9375) lr 1.9921e-03 eta 5:43:30 +epoch [4/50] batch [305/500] time 0.911 (0.888) data 0.000 (0.003) loss 0.6592 (1.2253) acc 78.1250 (69.9180) lr 1.9921e-03 eta 5:43:26 +epoch [4/50] batch [310/500] time 0.866 (0.888) data 0.000 (0.003) loss 1.6025 (1.2261) acc 53.1250 (69.9093) lr 1.9921e-03 eta 5:43:23 +epoch [4/50] batch [315/500] time 0.875 (0.888) data 0.000 (0.002) loss 1.2461 (1.2265) acc 68.7500 (69.8909) lr 1.9921e-03 eta 5:43:15 +epoch [4/50] batch [320/500] time 0.888 (0.888) data 0.001 (0.002) loss 1.2920 (1.2279) acc 71.8750 (69.8926) lr 1.9921e-03 eta 5:43:11 +epoch [4/50] batch [325/500] time 0.898 (0.888) data 0.000 (0.002) loss 0.8291 (1.2286) acc 81.2500 (69.9135) lr 1.9921e-03 eta 5:43:06 +epoch [4/50] batch [330/500] time 0.870 (0.888) data 0.000 (0.002) loss 0.9067 (1.2275) acc 81.2500 (69.9053) lr 1.9921e-03 eta 5:42:59 +epoch [4/50] batch [335/500] time 0.884 (0.888) data 0.000 (0.002) loss 1.3340 (1.2304) acc 56.2500 (69.8134) lr 1.9921e-03 eta 5:42:56 +epoch [4/50] batch [340/500] time 0.940 (0.889) data 0.000 (0.002) loss 1.9580 (1.2336) acc 65.6250 (69.7794) lr 1.9921e-03 eta 5:42:58 +epoch [4/50] batch [345/500] time 0.906 (0.889) data 0.000 (0.002) loss 1.6338 (1.2347) acc 59.3750 (69.7645) lr 1.9921e-03 eta 5:42:55 +epoch [4/50] batch [350/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.7183 (1.2314) acc 87.5000 (69.8304) lr 1.9921e-03 eta 5:42:47 +epoch [4/50] batch [355/500] time 0.902 (0.888) data 0.000 (0.002) loss 0.8262 (1.2259) acc 75.0000 (69.9472) lr 1.9921e-03 eta 5:42:42 +epoch [4/50] batch [360/500] time 0.891 (0.888) data 0.000 (0.002) loss 0.9316 (1.2265) acc 75.0000 (69.9653) lr 1.9921e-03 eta 5:42:33 +epoch [4/50] batch [365/500] time 0.876 (0.888) data 0.000 (0.002) loss 1.3145 (1.2250) acc 65.6250 (69.9914) lr 1.9921e-03 eta 5:42:27 +epoch [4/50] batch [370/500] time 0.846 (0.888) data 0.000 (0.002) loss 1.0303 (1.2230) acc 71.8750 (70.0422) lr 1.9921e-03 eta 5:42:20 +epoch [4/50] batch [375/500] time 0.868 (0.888) data 0.000 (0.002) loss 1.0508 (1.2201) acc 71.8750 (70.1000) lr 1.9921e-03 eta 5:42:19 +epoch [4/50] batch [380/500] time 0.884 (0.888) data 0.000 (0.002) loss 1.3203 (1.2222) acc 65.6250 (70.0411) lr 1.9921e-03 eta 5:42:13 +epoch [4/50] batch [385/500] time 0.924 (0.888) data 0.000 (0.002) loss 1.0088 (1.2209) acc 78.1250 (70.0487) lr 1.9921e-03 eta 5:42:10 +epoch [4/50] batch [390/500] time 0.893 (0.888) data 0.000 (0.002) loss 1.1621 (1.2205) acc 65.6250 (70.0080) lr 1.9921e-03 eta 5:42:08 +epoch [4/50] batch [395/500] time 0.881 (0.888) data 0.000 (0.002) loss 1.8125 (1.2238) acc 65.6250 (69.9604) lr 1.9921e-03 eta 5:42:03 +epoch [4/50] batch [400/500] time 0.890 (0.888) data 0.000 (0.002) loss 1.8730 (1.2257) acc 53.1250 (69.9219) lr 1.9921e-03 eta 5:41:51 +epoch [4/50] batch [405/500] time 0.912 (0.888) data 0.000 (0.002) loss 0.8486 (1.2240) acc 75.0000 (69.9460) lr 1.9921e-03 eta 5:41:49 +epoch [4/50] batch [410/500] time 0.877 (0.888) data 0.000 (0.002) loss 1.3320 (1.2242) acc 75.0000 (69.9009) lr 1.9921e-03 eta 5:41:47 +epoch [4/50] batch [415/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.4521 (1.2216) acc 59.3750 (69.9699) lr 1.9921e-03 eta 5:41:41 +epoch [4/50] batch [420/500] time 0.870 (0.888) data 0.000 (0.002) loss 0.8813 (1.2238) acc 68.7500 (69.9033) lr 1.9921e-03 eta 5:41:39 +epoch [4/50] batch [425/500] time 0.894 (0.888) data 0.000 (0.002) loss 1.4609 (1.2253) acc 56.2500 (69.8235) lr 1.9921e-03 eta 5:41:35 +epoch [4/50] batch [430/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.7695 (1.2269) acc 59.3750 (69.8110) lr 1.9921e-03 eta 5:41:27 +epoch [4/50] batch [435/500] time 0.865 (0.888) data 0.000 (0.002) loss 1.6055 (1.2289) acc 71.8750 (69.7845) lr 1.9921e-03 eta 5:41:20 +epoch [4/50] batch [440/500] time 0.857 (0.888) data 0.000 (0.002) loss 0.7847 (1.2283) acc 78.1250 (69.7514) lr 1.9921e-03 eta 5:41:15 +epoch [4/50] batch [445/500] time 0.900 (0.888) data 0.000 (0.002) loss 1.2920 (1.2284) acc 68.7500 (69.7402) lr 1.9921e-03 eta 5:41:08 +epoch [4/50] batch [450/500] time 0.897 (0.888) data 0.000 (0.002) loss 1.7783 (1.2321) acc 68.7500 (69.6667) lr 1.9921e-03 eta 5:41:01 +epoch [4/50] batch [455/500] time 0.876 (0.888) data 0.000 (0.002) loss 0.6299 (1.2298) acc 84.3750 (69.7115) lr 1.9921e-03 eta 5:40:54 +epoch [4/50] batch [460/500] time 0.860 (0.888) data 0.000 (0.002) loss 1.2715 (1.2300) acc 75.0000 (69.7147) lr 1.9921e-03 eta 5:40:50 +epoch [4/50] batch [465/500] time 0.896 (0.888) data 0.000 (0.002) loss 0.8623 (1.2300) acc 81.2500 (69.7446) lr 1.9921e-03 eta 5:40:44 +epoch [4/50] batch [470/500] time 0.862 (0.888) data 0.000 (0.002) loss 0.9019 (1.2284) acc 78.1250 (69.7806) lr 1.9921e-03 eta 5:40:41 +epoch [4/50] batch [475/500] time 0.888 (0.887) data 0.000 (0.002) loss 1.7852 (1.2291) acc 62.5000 (69.8224) lr 1.9921e-03 eta 5:40:34 +epoch [4/50] batch [480/500] time 0.891 (0.887) data 0.000 (0.002) loss 1.1719 (1.2273) acc 71.8750 (69.8698) lr 1.9921e-03 eta 5:40:27 +epoch [4/50] batch [485/500] time 0.875 (0.887) data 0.001 (0.002) loss 1.6729 (1.2272) acc 59.3750 (69.8647) lr 1.9921e-03 eta 5:40:22 +epoch [4/50] batch [490/500] time 0.877 (0.887) data 0.000 (0.002) loss 1.1289 (1.2282) acc 68.7500 (69.8661) lr 1.9921e-03 eta 5:40:17 +epoch [4/50] batch [495/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.4150 (1.2298) acc 65.6250 (69.8548) lr 1.9921e-03 eta 5:40:10 +epoch [4/50] batch [500/500] time 0.881 (0.887) data 0.000 (0.002) loss 1.4463 (1.2326) acc 78.1250 (69.8438) lr 1.9823e-03 eta 5:40:04 +epoch [5/50] batch [5/500] time 0.906 (1.033) data 0.000 (0.138) loss 1.2627 (1.0416) acc 65.6250 (75.0000) lr 1.9823e-03 eta 6:36:04 +epoch [5/50] batch [10/500] time 0.855 (0.956) data 0.000 (0.069) loss 1.5889 (1.1573) acc 68.7500 (72.5000) lr 1.9823e-03 eta 6:06:17 +epoch [5/50] batch [15/500] time 0.892 (0.938) data 0.000 (0.046) loss 1.3154 (1.1480) acc 59.3750 (73.1250) lr 1.9823e-03 eta 5:59:21 +epoch [5/50] batch [20/500] time 0.901 (0.926) data 0.000 (0.035) loss 1.1738 (1.1495) acc 71.8750 (72.9688) lr 1.9823e-03 eta 5:54:33 +epoch [5/50] batch [25/500] time 0.869 (0.918) data 0.000 (0.028) loss 1.1895 (1.1306) acc 71.8750 (72.8750) lr 1.9823e-03 eta 5:51:36 +epoch [5/50] batch [30/500] time 0.891 (0.912) data 0.000 (0.023) loss 1.3789 (1.1635) acc 62.5000 (72.3958) lr 1.9823e-03 eta 5:49:08 +epoch [5/50] batch [35/500] time 0.924 (0.909) data 0.000 (0.020) loss 1.1133 (1.1766) acc 68.7500 (71.8750) lr 1.9823e-03 eta 5:47:51 +epoch [5/50] batch [40/500] time 0.897 (0.906) data 0.000 (0.018) loss 1.3145 (1.2017) acc 65.6250 (71.0938) lr 1.9823e-03 eta 5:46:39 +epoch [5/50] batch [45/500] time 0.877 (0.904) data 0.000 (0.016) loss 1.4180 (1.2092) acc 71.8750 (71.0417) lr 1.9823e-03 eta 5:45:57 +epoch [5/50] batch [50/500] time 0.881 (0.905) data 0.000 (0.014) loss 1.3115 (1.2071) acc 62.5000 (70.9375) lr 1.9823e-03 eta 5:46:12 +epoch [5/50] batch [55/500] time 0.894 (0.904) data 0.000 (0.013) loss 0.9814 (1.2151) acc 81.2500 (70.9091) lr 1.9823e-03 eta 5:45:42 +epoch [5/50] batch [60/500] time 0.898 (0.903) data 0.000 (0.012) loss 1.0059 (1.2333) acc 65.6250 (70.3125) lr 1.9823e-03 eta 5:45:25 +epoch [5/50] batch [65/500] time 0.879 (0.901) data 0.000 (0.011) loss 0.9956 (1.2320) acc 75.0000 (70.0000) lr 1.9823e-03 eta 5:44:21 +epoch [5/50] batch [70/500] time 0.912 (0.900) data 0.000 (0.010) loss 1.3330 (1.2099) acc 68.7500 (70.4911) lr 1.9823e-03 eta 5:43:51 +epoch [5/50] batch [75/500] time 0.869 (0.900) data 0.000 (0.009) loss 1.0498 (1.2071) acc 71.8750 (70.5417) lr 1.9823e-03 eta 5:43:44 +epoch [5/50] batch [80/500] time 0.877 (0.898) data 0.000 (0.009) loss 1.6113 (1.2229) acc 65.6250 (70.3125) lr 1.9823e-03 eta 5:43:06 +epoch [5/50] batch [85/500] time 0.869 (0.897) data 0.000 (0.008) loss 1.1982 (1.2284) acc 65.6250 (70.2941) lr 1.9823e-03 eta 5:42:38 +epoch [5/50] batch [90/500] time 0.899 (0.897) data 0.000 (0.008) loss 1.1641 (1.2233) acc 68.7500 (70.4167) lr 1.9823e-03 eta 5:42:20 +epoch [5/50] batch [95/500] time 0.873 (0.895) data 0.000 (0.008) loss 0.9141 (1.2188) acc 75.0000 (70.4276) lr 1.9823e-03 eta 5:41:50 +epoch [5/50] batch [100/500] time 0.882 (0.895) data 0.000 (0.007) loss 0.8789 (1.2132) acc 71.8750 (70.5312) lr 1.9823e-03 eta 5:41:36 +epoch [5/50] batch [105/500] time 0.859 (0.894) data 0.000 (0.007) loss 0.5649 (1.2018) acc 84.3750 (70.8631) lr 1.9823e-03 eta 5:41:05 +epoch [5/50] batch [110/500] time 0.893 (0.894) data 0.000 (0.007) loss 1.0146 (1.2019) acc 71.8750 (70.8239) lr 1.9823e-03 eta 5:40:55 +epoch [5/50] batch [115/500] time 0.982 (0.895) data 0.000 (0.006) loss 1.1104 (1.1977) acc 71.8750 (70.7609) lr 1.9823e-03 eta 5:41:14 +epoch [5/50] batch [120/500] time 0.865 (0.894) data 0.000 (0.006) loss 1.3154 (1.2006) acc 62.5000 (70.5208) lr 1.9823e-03 eta 5:40:46 +epoch [5/50] batch [125/500] time 0.867 (0.893) data 0.000 (0.006) loss 1.6641 (1.1999) acc 59.3750 (70.4750) lr 1.9823e-03 eta 5:40:27 +epoch [5/50] batch [130/500] time 0.883 (0.893) data 0.000 (0.006) loss 1.1289 (1.1953) acc 75.0000 (70.6010) lr 1.9823e-03 eta 5:40:14 +epoch [5/50] batch [135/500] time 0.864 (0.892) data 0.000 (0.005) loss 1.4219 (1.2031) acc 71.8750 (70.6944) lr 1.9823e-03 eta 5:39:57 +epoch [5/50] batch [140/500] time 0.896 (0.892) data 0.000 (0.005) loss 1.2334 (1.2023) acc 68.7500 (70.6920) lr 1.9823e-03 eta 5:39:43 +epoch [5/50] batch [145/500] time 0.897 (0.891) data 0.000 (0.005) loss 0.5249 (1.1924) acc 81.2500 (70.8405) lr 1.9823e-03 eta 5:39:32 +epoch [5/50] batch [150/500] time 0.894 (0.891) data 0.000 (0.005) loss 0.8027 (1.1881) acc 87.5000 (71.0833) lr 1.9823e-03 eta 5:39:18 +epoch [5/50] batch [155/500] time 0.862 (0.890) data 0.000 (0.005) loss 0.8389 (1.1871) acc 75.0000 (71.0282) lr 1.9823e-03 eta 5:39:03 +epoch [5/50] batch [160/500] time 0.914 (0.890) data 0.000 (0.005) loss 0.9033 (1.1874) acc 71.8750 (70.8984) lr 1.9823e-03 eta 5:38:58 +epoch [5/50] batch [165/500] time 0.898 (0.890) data 0.000 (0.004) loss 0.5825 (1.1831) acc 87.5000 (70.8902) lr 1.9823e-03 eta 5:38:54 +epoch [5/50] batch [170/500] time 0.904 (0.890) data 0.000 (0.004) loss 0.6611 (1.1762) acc 75.0000 (71.0846) lr 1.9823e-03 eta 5:38:49 +epoch [5/50] batch [175/500] time 0.863 (0.890) data 0.000 (0.004) loss 1.3047 (1.1769) acc 71.8750 (71.1607) lr 1.9823e-03 eta 5:38:31 +epoch [5/50] batch [180/500] time 0.876 (0.889) data 0.000 (0.004) loss 1.2744 (1.1795) acc 71.8750 (71.0938) lr 1.9823e-03 eta 5:38:15 +epoch [5/50] batch [185/500] time 0.863 (0.889) data 0.000 (0.004) loss 1.6914 (1.1853) acc 68.7500 (70.9459) lr 1.9823e-03 eta 5:37:59 +epoch [5/50] batch [190/500] time 0.895 (0.889) data 0.000 (0.004) loss 1.1396 (1.1903) acc 78.1250 (70.9211) lr 1.9823e-03 eta 5:37:59 +epoch [5/50] batch [195/500] time 0.855 (0.888) data 0.000 (0.004) loss 1.7812 (1.2022) acc 56.2500 (70.7051) lr 1.9823e-03 eta 5:37:40 +epoch [5/50] batch [200/500] time 0.888 (0.888) data 0.000 (0.004) loss 0.6416 (1.1927) acc 81.2500 (70.8750) lr 1.9823e-03 eta 5:37:33 +epoch [5/50] batch [205/500] time 0.912 (0.888) data 0.000 (0.004) loss 1.2646 (1.1948) acc 71.8750 (70.8537) lr 1.9823e-03 eta 5:37:28 +epoch [5/50] batch [210/500] time 0.910 (0.889) data 0.000 (0.004) loss 1.0967 (1.1990) acc 75.0000 (70.8185) lr 1.9823e-03 eta 5:37:32 +epoch [5/50] batch [215/500] time 0.881 (0.889) data 0.000 (0.003) loss 1.5762 (1.1995) acc 62.5000 (70.8430) lr 1.9823e-03 eta 5:37:35 +epoch [5/50] batch [220/500] time 0.865 (0.889) data 0.000 (0.003) loss 1.1152 (1.1967) acc 81.2500 (70.9375) lr 1.9823e-03 eta 5:37:26 +epoch [5/50] batch [225/500] time 0.891 (0.889) data 0.001 (0.003) loss 0.7852 (1.1921) acc 81.2500 (71.0278) lr 1.9823e-03 eta 5:37:16 +epoch [5/50] batch [230/500] time 0.870 (0.888) data 0.000 (0.003) loss 1.6016 (1.1930) acc 68.7500 (71.0054) lr 1.9823e-03 eta 5:37:08 +epoch [5/50] batch [235/500] time 0.877 (0.888) data 0.000 (0.003) loss 1.3262 (1.2000) acc 68.7500 (70.8378) lr 1.9823e-03 eta 5:36:57 +epoch [5/50] batch [240/500] time 0.867 (0.888) data 0.000 (0.003) loss 1.5410 (1.2027) acc 50.0000 (70.6901) lr 1.9823e-03 eta 5:36:48 +epoch [5/50] batch [245/500] time 0.943 (0.888) data 0.000 (0.003) loss 1.1572 (1.1976) acc 71.8750 (70.8163) lr 1.9823e-03 eta 5:36:49 +epoch [5/50] batch [250/500] time 0.899 (0.888) data 0.000 (0.003) loss 0.9429 (1.1950) acc 78.1250 (70.8875) lr 1.9823e-03 eta 5:36:47 +epoch [5/50] batch [255/500] time 0.907 (0.888) data 0.000 (0.003) loss 1.3994 (1.1944) acc 71.8750 (70.8701) lr 1.9823e-03 eta 5:36:39 +epoch [5/50] batch [260/500] time 0.872 (0.888) data 0.000 (0.003) loss 1.6523 (1.1960) acc 68.7500 (70.8774) lr 1.9823e-03 eta 5:36:39 +epoch [5/50] batch [265/500] time 0.877 (0.888) data 0.000 (0.003) loss 1.4893 (1.2040) acc 59.3750 (70.7429) lr 1.9823e-03 eta 5:36:29 +epoch [5/50] batch [270/500] time 0.890 (0.888) data 0.000 (0.003) loss 1.3096 (1.2024) acc 65.6250 (70.6944) lr 1.9823e-03 eta 5:36:23 +epoch [5/50] batch [275/500] time 0.899 (0.888) data 0.000 (0.003) loss 1.6514 (1.2029) acc 59.3750 (70.6250) lr 1.9823e-03 eta 5:36:16 +epoch [5/50] batch [280/500] time 0.888 (0.888) data 0.000 (0.003) loss 1.2412 (1.2021) acc 75.0000 (70.6250) lr 1.9823e-03 eta 5:36:13 +epoch [5/50] batch [285/500] time 0.866 (0.888) data 0.000 (0.003) loss 1.2959 (1.2036) acc 68.7500 (70.5373) lr 1.9823e-03 eta 5:36:03 +epoch [5/50] batch [290/500] time 0.866 (0.888) data 0.000 (0.003) loss 1.1025 (1.2062) acc 81.2500 (70.5280) lr 1.9823e-03 eta 5:35:57 +epoch [5/50] batch [295/500] time 0.859 (0.887) data 0.000 (0.003) loss 0.6914 (1.2045) acc 81.2500 (70.5297) lr 1.9823e-03 eta 5:35:50 +epoch [5/50] batch [300/500] time 0.909 (0.887) data 0.000 (0.003) loss 0.6245 (1.2026) acc 78.1250 (70.5938) lr 1.9823e-03 eta 5:35:44 +epoch [5/50] batch [305/500] time 0.877 (0.887) data 0.000 (0.003) loss 1.1797 (1.2009) acc 75.0000 (70.6352) lr 1.9823e-03 eta 5:35:33 +epoch [5/50] batch [310/500] time 0.862 (0.887) data 0.000 (0.002) loss 1.8594 (1.2018) acc 53.1250 (70.5847) lr 1.9823e-03 eta 5:35:25 +epoch [5/50] batch [315/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.3623 (1.2029) acc 71.8750 (70.5853) lr 1.9823e-03 eta 5:35:19 +epoch [5/50] batch [320/500] time 0.868 (0.887) data 0.000 (0.002) loss 0.8994 (1.2018) acc 71.8750 (70.5371) lr 1.9823e-03 eta 5:35:10 +epoch [5/50] batch [325/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.4395 (1.2048) acc 65.6250 (70.4519) lr 1.9823e-03 eta 5:35:01 +epoch [5/50] batch [330/500] time 0.896 (0.886) data 0.000 (0.002) loss 1.6562 (1.2052) acc 59.3750 (70.3977) lr 1.9823e-03 eta 5:34:56 +epoch [5/50] batch [335/500] time 0.877 (0.887) data 0.000 (0.002) loss 1.6572 (1.2080) acc 59.3750 (70.3358) lr 1.9823e-03 eta 5:34:55 +epoch [5/50] batch [340/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.2529 (1.2070) acc 68.7500 (70.3585) lr 1.9823e-03 eta 5:34:52 +epoch [5/50] batch [345/500] time 0.888 (0.887) data 0.000 (0.002) loss 1.1055 (1.2077) acc 65.6250 (70.3170) lr 1.9823e-03 eta 5:34:48 +epoch [5/50] batch [350/500] time 0.897 (0.887) data 0.000 (0.002) loss 1.3916 (1.2108) acc 62.5000 (70.2500) lr 1.9823e-03 eta 5:34:47 +epoch [5/50] batch [355/500] time 0.892 (0.887) data 0.000 (0.002) loss 1.2559 (1.2132) acc 65.6250 (70.1849) lr 1.9823e-03 eta 5:34:42 +epoch [5/50] batch [360/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.7900 (1.2159) acc 62.5000 (70.1476) lr 1.9823e-03 eta 5:34:46 +epoch [5/50] batch [365/500] time 0.861 (0.887) data 0.000 (0.002) loss 1.5127 (1.2160) acc 59.3750 (70.1455) lr 1.9823e-03 eta 5:34:37 +epoch [5/50] batch [370/500] time 0.917 (0.887) data 0.000 (0.002) loss 1.2559 (1.2145) acc 59.3750 (70.1436) lr 1.9823e-03 eta 5:34:33 +epoch [5/50] batch [375/500] time 0.861 (0.887) data 0.000 (0.002) loss 1.1191 (1.2147) acc 71.8750 (70.1500) lr 1.9823e-03 eta 5:34:29 +epoch [5/50] batch [380/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.8223 (1.2150) acc 56.2500 (70.1151) lr 1.9823e-03 eta 5:34:21 +epoch [5/50] batch [385/500] time 0.886 (0.887) data 0.000 (0.002) loss 1.4033 (1.2178) acc 62.5000 (70.0731) lr 1.9823e-03 eta 5:34:17 +epoch [5/50] batch [390/500] time 0.880 (0.887) data 0.000 (0.002) loss 0.9497 (1.2135) acc 75.0000 (70.1442) lr 1.9823e-03 eta 5:34:10 +epoch [5/50] batch [395/500] time 0.912 (0.887) data 0.000 (0.002) loss 1.8213 (1.2181) acc 53.1250 (70.0554) lr 1.9823e-03 eta 5:34:08 +epoch [5/50] batch [400/500] time 0.897 (0.887) data 0.000 (0.002) loss 0.7393 (1.2158) acc 81.2500 (70.1250) lr 1.9823e-03 eta 5:34:06 +epoch [5/50] batch [405/500] time 0.871 (0.887) data 0.000 (0.002) loss 1.7939 (1.2188) acc 68.7500 (70.0849) lr 1.9823e-03 eta 5:34:08 +epoch [5/50] batch [410/500] time 0.890 (0.887) data 0.000 (0.002) loss 0.7993 (1.2207) acc 84.3750 (70.1296) lr 1.9823e-03 eta 5:34:03 +epoch [5/50] batch [415/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.4609 (1.2236) acc 62.5000 (70.0828) lr 1.9823e-03 eta 5:33:59 +epoch [5/50] batch [420/500] time 0.890 (0.887) data 0.000 (0.002) loss 1.4541 (1.2235) acc 59.3750 (70.0595) lr 1.9823e-03 eta 5:33:54 +epoch [5/50] batch [425/500] time 0.867 (0.887) data 0.000 (0.002) loss 0.8506 (1.2236) acc 71.8750 (70.0000) lr 1.9823e-03 eta 5:33:49 +epoch [5/50] batch [430/500] time 0.864 (0.887) data 0.000 (0.002) loss 1.3486 (1.2237) acc 68.7500 (70.0073) lr 1.9823e-03 eta 5:33:41 +epoch [5/50] batch [435/500] time 0.899 (0.887) data 0.000 (0.002) loss 1.0166 (1.2231) acc 81.2500 (70.0216) lr 1.9823e-03 eta 5:33:37 +epoch [5/50] batch [440/500] time 0.881 (0.887) data 0.000 (0.002) loss 1.1523 (1.2227) acc 68.7500 (70.0426) lr 1.9823e-03 eta 5:33:31 +epoch [5/50] batch [445/500] time 0.920 (0.887) data 0.000 (0.002) loss 0.6968 (1.2210) acc 90.6250 (70.1053) lr 1.9823e-03 eta 5:33:29 +epoch [5/50] batch [450/500] time 0.899 (0.887) data 0.000 (0.002) loss 1.4580 (1.2224) acc 68.7500 (70.0903) lr 1.9823e-03 eta 5:33:28 +epoch [5/50] batch [455/500] time 0.871 (0.887) data 0.000 (0.002) loss 0.6953 (1.2231) acc 81.2500 (70.0687) lr 1.9823e-03 eta 5:33:23 +epoch [5/50] batch [460/500] time 0.907 (0.887) data 0.000 (0.002) loss 1.2666 (1.2213) acc 56.2500 (70.0815) lr 1.9823e-03 eta 5:33:19 +epoch [5/50] batch [465/500] time 0.904 (0.887) data 0.000 (0.002) loss 1.0693 (1.2209) acc 65.6250 (70.0941) lr 1.9823e-03 eta 5:33:14 +epoch [5/50] batch [470/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.5137 (1.2198) acc 59.3750 (70.0864) lr 1.9823e-03 eta 5:33:09 +epoch [5/50] batch [475/500] time 0.897 (0.887) data 0.000 (0.002) loss 1.4424 (1.2206) acc 68.7500 (70.0395) lr 1.9823e-03 eta 5:33:03 +epoch [5/50] batch [480/500] time 0.854 (0.887) data 0.000 (0.002) loss 1.0938 (1.2206) acc 75.0000 (70.0456) lr 1.9823e-03 eta 5:32:59 +epoch [5/50] batch [485/500] time 0.858 (0.887) data 0.001 (0.002) loss 0.9346 (1.2210) acc 68.7500 (69.9936) lr 1.9823e-03 eta 5:32:53 +epoch [5/50] batch [490/500] time 0.891 (0.887) data 0.000 (0.002) loss 1.2334 (1.2204) acc 65.6250 (70.0128) lr 1.9823e-03 eta 5:32:48 +epoch [5/50] batch [495/500] time 0.887 (0.887) data 0.000 (0.002) loss 0.8623 (1.2209) acc 68.7500 (70.0063) lr 1.9823e-03 eta 5:32:43 +epoch [5/50] batch [500/500] time 1.017 (0.887) data 0.000 (0.002) loss 0.8657 (1.2191) acc 75.0000 (70.0500) lr 1.9686e-03 eta 5:32:42 +epoch [6/50] batch [5/500] time 0.859 (1.014) data 0.000 (0.133) loss 1.1699 (1.1808) acc 75.0000 (73.7500) lr 1.9686e-03 eta 6:20:06 +epoch [6/50] batch [10/500] time 0.890 (0.946) data 0.000 (0.067) loss 1.3408 (1.2556) acc 71.8750 (70.6250) lr 1.9686e-03 eta 5:54:31 +epoch [6/50] batch [15/500] time 0.903 (0.929) data 0.000 (0.044) loss 1.2363 (1.2404) acc 65.6250 (69.5833) lr 1.9686e-03 eta 5:48:00 +epoch [6/50] batch [20/500] time 0.869 (0.914) data 0.000 (0.033) loss 1.0791 (1.1810) acc 71.8750 (70.1562) lr 1.9686e-03 eta 5:42:19 +epoch [6/50] batch [25/500] time 0.869 (0.906) data 0.000 (0.027) loss 0.9761 (1.1660) acc 75.0000 (70.2500) lr 1.9686e-03 eta 5:39:26 +epoch [6/50] batch [30/500] time 0.901 (0.902) data 0.000 (0.022) loss 1.2129 (1.1860) acc 59.3750 (70.1042) lr 1.9686e-03 eta 5:37:40 +epoch [6/50] batch [35/500] time 0.880 (0.898) data 0.000 (0.019) loss 1.2461 (1.1696) acc 75.0000 (70.8929) lr 1.9686e-03 eta 5:36:16 +epoch [6/50] batch [40/500] time 0.894 (0.898) data 0.000 (0.017) loss 1.3926 (1.2006) acc 56.2500 (69.9219) lr 1.9686e-03 eta 5:36:04 +epoch [6/50] batch [45/500] time 0.883 (0.896) data 0.000 (0.015) loss 1.0586 (1.1662) acc 68.7500 (70.2083) lr 1.9686e-03 eta 5:35:09 +epoch [6/50] batch [50/500] time 0.893 (0.894) data 0.000 (0.014) loss 0.7246 (1.1750) acc 81.2500 (70.0625) lr 1.9686e-03 eta 5:34:19 +epoch [6/50] batch [55/500] time 0.892 (0.893) data 0.000 (0.012) loss 0.9629 (1.1675) acc 78.1250 (70.2273) lr 1.9686e-03 eta 5:34:01 +epoch [6/50] batch [60/500] time 1.008 (0.894) data 0.000 (0.011) loss 1.7305 (1.1873) acc 68.7500 (70.1042) lr 1.9686e-03 eta 5:34:29 +epoch [6/50] batch [65/500] time 0.876 (0.893) data 0.000 (0.010) loss 1.0166 (1.1895) acc 78.1250 (70.1923) lr 1.9686e-03 eta 5:34:02 +epoch [6/50] batch [70/500] time 0.880 (0.893) data 0.000 (0.010) loss 2.0332 (1.2096) acc 59.3750 (69.5982) lr 1.9686e-03 eta 5:33:40 +epoch [6/50] batch [75/500] time 0.888 (0.891) data 0.000 (0.009) loss 1.0674 (1.2191) acc 71.8750 (69.5000) lr 1.9686e-03 eta 5:33:07 +epoch [6/50] batch [80/500] time 0.860 (0.890) data 0.000 (0.009) loss 1.5303 (1.2249) acc 62.5000 (69.4531) lr 1.9686e-03 eta 5:32:33 +epoch [6/50] batch [85/500] time 0.896 (0.889) data 0.000 (0.008) loss 1.4014 (1.2299) acc 65.6250 (69.6324) lr 1.9686e-03 eta 5:32:17 +epoch [6/50] batch [90/500] time 0.901 (0.889) data 0.000 (0.008) loss 1.6338 (1.2301) acc 59.3750 (69.6528) lr 1.9686e-03 eta 5:31:54 +epoch [6/50] batch [95/500] time 0.911 (0.889) data 0.000 (0.007) loss 1.0752 (1.2155) acc 62.5000 (69.9342) lr 1.9686e-03 eta 5:31:55 +epoch [6/50] batch [100/500] time 0.874 (0.889) data 0.000 (0.007) loss 1.1240 (1.1975) acc 75.0000 (70.2188) lr 1.9686e-03 eta 5:31:54 +epoch [6/50] batch [105/500] time 0.870 (0.889) data 0.000 (0.007) loss 0.8096 (1.1895) acc 81.2500 (70.5060) lr 1.9686e-03 eta 5:31:54 +epoch [6/50] batch [110/500] time 0.858 (0.889) data 0.000 (0.006) loss 1.5420 (1.1835) acc 68.7500 (70.7670) lr 1.9686e-03 eta 5:31:34 +epoch [6/50] batch [115/500] time 0.876 (0.888) data 0.000 (0.006) loss 1.1670 (1.1826) acc 75.0000 (70.7609) lr 1.9686e-03 eta 5:31:16 +epoch [6/50] batch [120/500] time 0.875 (0.888) data 0.000 (0.006) loss 1.0938 (1.1857) acc 71.8750 (70.7031) lr 1.9686e-03 eta 5:31:04 +epoch [6/50] batch [125/500] time 0.899 (0.888) data 0.000 (0.006) loss 1.3037 (1.1936) acc 71.8750 (70.7500) lr 1.9686e-03 eta 5:31:05 +epoch [6/50] batch [130/500] time 0.862 (0.888) data 0.000 (0.005) loss 0.8237 (1.1951) acc 81.2500 (70.7692) lr 1.9686e-03 eta 5:31:05 +epoch [6/50] batch [135/500] time 0.901 (0.887) data 0.000 (0.005) loss 1.0713 (1.1937) acc 75.0000 (70.6019) lr 1.9686e-03 eta 5:30:45 +epoch [6/50] batch [140/500] time 0.901 (0.888) data 0.000 (0.005) loss 1.0205 (1.1893) acc 78.1250 (70.6473) lr 1.9686e-03 eta 5:30:47 +epoch [6/50] batch [145/500] time 0.874 (0.888) data 0.000 (0.005) loss 0.7783 (1.1795) acc 78.1250 (70.8621) lr 1.9686e-03 eta 5:30:44 +epoch [6/50] batch [150/500] time 0.916 (0.888) data 0.000 (0.005) loss 0.8862 (1.1837) acc 78.1250 (70.7500) lr 1.9686e-03 eta 5:30:37 +epoch [6/50] batch [155/500] time 0.896 (0.887) data 0.000 (0.005) loss 1.3086 (1.1827) acc 71.8750 (70.7056) lr 1.9686e-03 eta 5:30:28 +epoch [6/50] batch [160/500] time 0.866 (0.887) data 0.000 (0.004) loss 1.2305 (1.1877) acc 68.7500 (70.6836) lr 1.9686e-03 eta 5:30:17 +epoch [6/50] batch [165/500] time 0.899 (0.887) data 0.000 (0.004) loss 1.9668 (1.1898) acc 62.5000 (70.7197) lr 1.9686e-03 eta 5:30:08 +epoch [6/50] batch [170/500] time 0.885 (0.887) data 0.000 (0.004) loss 1.4863 (1.1947) acc 71.8750 (70.7904) lr 1.9686e-03 eta 5:30:02 +epoch [6/50] batch [175/500] time 0.890 (0.887) data 0.000 (0.004) loss 0.8701 (1.1942) acc 68.7500 (70.6607) lr 1.9686e-03 eta 5:29:53 +epoch [6/50] batch [180/500] time 0.882 (0.886) data 0.000 (0.004) loss 1.4717 (1.1928) acc 53.1250 (70.5729) lr 1.9686e-03 eta 5:29:41 +epoch [6/50] batch [185/500] time 0.903 (0.886) data 0.000 (0.004) loss 0.8794 (1.1931) acc 71.8750 (70.5574) lr 1.9686e-03 eta 5:29:30 +epoch [6/50] batch [190/500] time 0.911 (0.886) data 0.000 (0.004) loss 1.6025 (1.1972) acc 75.0000 (70.5757) lr 1.9686e-03 eta 5:29:32 +epoch [6/50] batch [195/500] time 0.924 (0.886) data 0.000 (0.004) loss 1.8633 (1.1988) acc 59.3750 (70.5609) lr 1.9686e-03 eta 5:29:32 +epoch [6/50] batch [200/500] time 0.880 (0.887) data 0.000 (0.004) loss 1.4434 (1.2017) acc 62.5000 (70.4062) lr 1.9686e-03 eta 5:29:34 +epoch [6/50] batch [205/500] time 0.898 (0.888) data 0.000 (0.003) loss 1.2246 (1.2051) acc 71.8750 (70.4116) lr 1.9686e-03 eta 5:29:48 +epoch [6/50] batch [210/500] time 0.886 (0.888) data 0.000 (0.003) loss 1.2803 (1.2000) acc 68.7500 (70.5506) lr 1.9686e-03 eta 5:29:44 +epoch [6/50] batch [215/500] time 0.892 (0.888) data 0.000 (0.003) loss 0.5991 (1.1945) acc 90.6250 (70.6105) lr 1.9686e-03 eta 5:29:40 +epoch [6/50] batch [220/500] time 0.898 (0.887) data 0.001 (0.003) loss 1.5771 (1.2006) acc 59.3750 (70.5682) lr 1.9686e-03 eta 5:29:31 +epoch [6/50] batch [225/500] time 0.872 (0.887) data 0.000 (0.003) loss 1.1250 (1.2067) acc 71.8750 (70.4583) lr 1.9686e-03 eta 5:29:23 +epoch [6/50] batch [230/500] time 0.906 (0.888) data 0.000 (0.003) loss 1.7783 (1.2134) acc 59.3750 (70.3397) lr 1.9686e-03 eta 5:29:28 +epoch [6/50] batch [235/500] time 0.926 (0.888) data 0.000 (0.003) loss 0.9980 (1.2132) acc 78.1250 (70.3723) lr 1.9686e-03 eta 5:29:22 +epoch [6/50] batch [240/500] time 0.907 (0.888) data 0.000 (0.003) loss 1.2549 (1.2078) acc 68.7500 (70.4427) lr 1.9686e-03 eta 5:29:28 +epoch [6/50] batch [245/500] time 0.917 (0.888) data 0.000 (0.003) loss 1.4775 (1.2075) acc 65.6250 (70.4847) lr 1.9686e-03 eta 5:29:31 +epoch [6/50] batch [250/500] time 0.885 (0.889) data 0.000 (0.003) loss 1.2178 (1.2051) acc 71.8750 (70.4375) lr 1.9686e-03 eta 5:29:34 +epoch [6/50] batch [255/500] time 0.884 (0.889) data 0.000 (0.003) loss 0.7080 (1.1991) acc 81.2500 (70.5637) lr 1.9686e-03 eta 5:29:33 +epoch [6/50] batch [260/500] time 0.893 (0.889) data 0.000 (0.003) loss 1.0527 (1.1983) acc 71.8750 (70.5288) lr 1.9686e-03 eta 5:29:32 +epoch [6/50] batch [265/500] time 0.881 (0.889) data 0.000 (0.003) loss 1.2441 (1.2036) acc 65.6250 (70.4245) lr 1.9686e-03 eta 5:29:24 +epoch [6/50] batch [270/500] time 0.934 (0.889) data 0.000 (0.003) loss 1.1943 (1.2044) acc 75.0000 (70.4398) lr 1.9686e-03 eta 5:29:24 +epoch [6/50] batch [275/500] time 0.881 (0.889) data 0.000 (0.003) loss 1.4590 (1.2021) acc 59.3750 (70.4659) lr 1.9686e-03 eta 5:29:16 +epoch [6/50] batch [280/500] time 0.891 (0.889) data 0.000 (0.003) loss 1.1025 (1.2000) acc 71.8750 (70.5469) lr 1.9686e-03 eta 5:29:12 +epoch [6/50] batch [285/500] time 0.891 (0.889) data 0.000 (0.003) loss 0.9893 (1.1971) acc 75.0000 (70.6140) lr 1.9686e-03 eta 5:29:04 +epoch [6/50] batch [290/500] time 0.916 (0.889) data 0.000 (0.003) loss 1.1729 (1.1961) acc 68.7500 (70.5927) lr 1.9686e-03 eta 5:29:02 +epoch [6/50] batch [295/500] time 0.871 (0.889) data 0.000 (0.002) loss 1.0117 (1.1940) acc 68.7500 (70.6356) lr 1.9686e-03 eta 5:28:54 +epoch [6/50] batch [300/500] time 0.904 (0.889) data 0.000 (0.002) loss 1.3262 (1.1915) acc 62.5000 (70.6354) lr 1.9686e-03 eta 5:28:49 +epoch [6/50] batch [305/500] time 0.878 (0.889) data 0.000 (0.002) loss 1.2793 (1.1919) acc 65.6250 (70.6660) lr 1.9686e-03 eta 5:28:46 +epoch [6/50] batch [310/500] time 0.892 (0.888) data 0.000 (0.002) loss 1.1738 (1.1923) acc 68.7500 (70.6452) lr 1.9686e-03 eta 5:28:33 +epoch [6/50] batch [315/500] time 0.904 (0.888) data 0.000 (0.002) loss 1.7783 (1.1971) acc 68.7500 (70.5952) lr 1.9686e-03 eta 5:28:24 +epoch [6/50] batch [320/500] time 0.859 (0.888) data 0.000 (0.002) loss 1.6035 (1.1965) acc 59.3750 (70.6250) lr 1.9686e-03 eta 5:28:11 +epoch [6/50] batch [325/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.3145 (1.1963) acc 68.7500 (70.6538) lr 1.9686e-03 eta 5:28:07 +epoch [6/50] batch [330/500] time 0.878 (0.888) data 0.000 (0.002) loss 1.3125 (1.1986) acc 62.5000 (70.6061) lr 1.9686e-03 eta 5:28:03 +epoch [6/50] batch [335/500] time 0.889 (0.888) data 0.000 (0.002) loss 0.6543 (1.1945) acc 84.3750 (70.7369) lr 1.9686e-03 eta 5:27:58 +epoch [6/50] batch [340/500] time 0.866 (0.888) data 0.000 (0.002) loss 0.8438 (1.1927) acc 71.8750 (70.7445) lr 1.9686e-03 eta 5:27:53 +epoch [6/50] batch [345/500] time 0.900 (0.888) data 0.000 (0.002) loss 1.0576 (1.1917) acc 71.8750 (70.6612) lr 1.9686e-03 eta 5:27:49 +epoch [6/50] batch [350/500] time 0.894 (0.888) data 0.000 (0.002) loss 1.4756 (1.1881) acc 59.3750 (70.6518) lr 1.9686e-03 eta 5:27:50 +epoch [6/50] batch [355/500] time 0.866 (0.888) data 0.000 (0.002) loss 0.7920 (1.1887) acc 62.5000 (70.5810) lr 1.9686e-03 eta 5:27:45 +epoch [6/50] batch [360/500] time 0.882 (0.888) data 0.000 (0.002) loss 1.4219 (1.1851) acc 65.6250 (70.6163) lr 1.9686e-03 eta 5:27:45 +epoch [6/50] batch [365/500] time 0.898 (0.888) data 0.000 (0.002) loss 1.3408 (1.1888) acc 62.5000 (70.5651) lr 1.9686e-03 eta 5:27:39 +epoch [6/50] batch [370/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.5352 (1.1910) acc 65.6250 (70.5405) lr 1.9686e-03 eta 5:27:31 +epoch [6/50] batch [375/500] time 0.880 (0.888) data 0.000 (0.002) loss 0.7471 (1.1901) acc 75.0000 (70.5417) lr 1.9686e-03 eta 5:27:27 +epoch [6/50] batch [380/500] time 0.865 (0.888) data 0.000 (0.002) loss 1.7100 (1.1917) acc 68.7500 (70.5674) lr 1.9686e-03 eta 5:27:19 +epoch [6/50] batch [385/500] time 0.895 (0.888) data 0.000 (0.002) loss 1.7129 (1.1934) acc 65.6250 (70.5519) lr 1.9686e-03 eta 5:27:17 +epoch [6/50] batch [390/500] time 0.975 (0.888) data 0.000 (0.002) loss 0.8242 (1.1920) acc 81.2500 (70.5288) lr 1.9686e-03 eta 5:27:22 +epoch [6/50] batch [395/500] time 0.852 (0.888) data 0.000 (0.002) loss 1.1631 (1.1901) acc 68.7500 (70.5775) lr 1.9686e-03 eta 5:27:13 +epoch [6/50] batch [400/500] time 0.881 (0.888) data 0.000 (0.002) loss 1.1953 (1.1911) acc 71.8750 (70.5781) lr 1.9686e-03 eta 5:27:04 +epoch [6/50] batch [405/500] time 0.868 (0.888) data 0.000 (0.002) loss 2.2637 (1.1919) acc 59.3750 (70.6096) lr 1.9686e-03 eta 5:26:55 +epoch [6/50] batch [410/500] time 0.883 (0.887) data 0.000 (0.002) loss 0.7266 (1.1954) acc 78.1250 (70.4954) lr 1.9686e-03 eta 5:26:44 +epoch [6/50] batch [415/500] time 0.893 (0.887) data 0.000 (0.002) loss 0.7739 (1.1952) acc 78.1250 (70.5346) lr 1.9686e-03 eta 5:26:37 +epoch [6/50] batch [420/500] time 0.875 (0.887) data 0.000 (0.002) loss 1.1846 (1.1953) acc 65.6250 (70.4985) lr 1.9686e-03 eta 5:26:29 +epoch [6/50] batch [425/500] time 0.867 (0.887) data 0.000 (0.002) loss 1.2783 (1.1947) acc 65.6250 (70.5294) lr 1.9686e-03 eta 5:26:22 +epoch [6/50] batch [430/500] time 0.894 (0.887) data 0.000 (0.002) loss 1.5166 (1.1977) acc 59.3750 (70.5015) lr 1.9686e-03 eta 5:26:15 +epoch [6/50] batch [435/500] time 0.860 (0.887) data 0.000 (0.002) loss 0.9023 (1.1951) acc 87.5000 (70.5675) lr 1.9686e-03 eta 5:26:09 +epoch [6/50] batch [440/500] time 0.886 (0.887) data 0.000 (0.002) loss 0.8716 (1.1968) acc 75.0000 (70.4688) lr 1.9686e-03 eta 5:26:04 +epoch [6/50] batch [445/500] time 0.910 (0.887) data 0.000 (0.002) loss 0.7368 (1.1935) acc 75.0000 (70.5618) lr 1.9686e-03 eta 5:26:01 +epoch [6/50] batch [450/500] time 0.859 (0.887) data 0.000 (0.002) loss 1.2148 (1.1926) acc 78.1250 (70.6042) lr 1.9686e-03 eta 5:25:55 +epoch [6/50] batch [455/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.0645 (1.1916) acc 75.0000 (70.5563) lr 1.9686e-03 eta 5:25:47 +epoch [6/50] batch [460/500] time 0.861 (0.887) data 0.000 (0.002) loss 0.6470 (1.1894) acc 84.3750 (70.6046) lr 1.9686e-03 eta 5:25:44 +epoch [6/50] batch [465/500] time 0.914 (0.887) data 0.000 (0.002) loss 1.0781 (1.1894) acc 75.0000 (70.5914) lr 1.9686e-03 eta 5:25:43 +epoch [6/50] batch [470/500] time 0.888 (0.887) data 0.000 (0.002) loss 1.0605 (1.1909) acc 81.2500 (70.5585) lr 1.9686e-03 eta 5:25:38 +epoch [6/50] batch [475/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.4805 (1.1929) acc 68.7500 (70.5132) lr 1.9686e-03 eta 5:25:33 +epoch [6/50] batch [480/500] time 0.899 (0.887) data 0.000 (0.002) loss 0.6074 (1.1911) acc 78.1250 (70.5273) lr 1.9686e-03 eta 5:25:28 +epoch [6/50] batch [485/500] time 0.864 (0.887) data 0.001 (0.002) loss 0.7471 (1.1902) acc 75.0000 (70.5155) lr 1.9686e-03 eta 5:25:21 +epoch [6/50] batch [490/500] time 0.869 (0.887) data 0.000 (0.002) loss 1.3721 (1.1911) acc 68.7500 (70.5102) lr 1.9686e-03 eta 5:25:23 +epoch [6/50] batch [495/500] time 0.902 (0.887) data 0.000 (0.002) loss 0.8374 (1.1914) acc 78.1250 (70.4735) lr 1.9686e-03 eta 5:25:21 +epoch [6/50] batch [500/500] time 0.873 (0.887) data 0.000 (0.002) loss 1.1348 (1.1925) acc 71.8750 (70.4313) lr 1.9511e-03 eta 5:25:20 +epoch [7/50] batch [5/500] time 0.902 (1.042) data 0.000 (0.134) loss 0.9912 (1.0775) acc 75.0000 (74.3750) lr 1.9511e-03 eta 6:22:05 +epoch [7/50] batch [10/500] time 0.859 (0.956) data 0.000 (0.067) loss 1.3350 (1.1839) acc 65.6250 (71.5625) lr 1.9511e-03 eta 5:50:31 +epoch [7/50] batch [15/500] time 0.866 (0.932) data 0.000 (0.045) loss 1.5508 (1.1809) acc 56.2500 (69.5833) lr 1.9511e-03 eta 5:41:21 +epoch [7/50] batch [20/500] time 0.866 (0.919) data 0.000 (0.034) loss 1.0732 (1.1558) acc 68.7500 (69.2188) lr 1.9511e-03 eta 5:36:37 +epoch [7/50] batch [25/500] time 0.898 (0.912) data 0.000 (0.027) loss 1.2520 (1.1521) acc 75.0000 (69.5000) lr 1.9511e-03 eta 5:34:07 +epoch [7/50] batch [30/500] time 0.866 (0.907) data 0.000 (0.023) loss 1.1934 (1.1447) acc 68.7500 (69.8958) lr 1.9511e-03 eta 5:32:14 +epoch [7/50] batch [35/500] time 0.885 (0.902) data 0.000 (0.019) loss 0.9902 (1.1873) acc 75.0000 (68.9286) lr 1.9511e-03 eta 5:30:13 +epoch [7/50] batch [40/500] time 0.909 (0.902) data 0.000 (0.017) loss 0.9194 (1.1928) acc 78.1250 (68.9062) lr 1.9511e-03 eta 5:30:15 +epoch [7/50] batch [45/500] time 0.983 (0.901) data 0.000 (0.015) loss 1.1611 (1.1943) acc 75.0000 (69.5833) lr 1.9511e-03 eta 5:29:41 +epoch [7/50] batch [50/500] time 0.883 (0.899) data 0.000 (0.014) loss 0.8989 (1.1729) acc 84.3750 (70.5625) lr 1.9511e-03 eta 5:28:57 +epoch [7/50] batch [55/500] time 0.854 (0.897) data 0.000 (0.012) loss 1.0781 (1.1710) acc 75.0000 (70.5682) lr 1.9511e-03 eta 5:28:08 +epoch [7/50] batch [60/500] time 0.893 (0.896) data 0.000 (0.011) loss 1.1904 (1.1600) acc 68.7500 (70.8854) lr 1.9511e-03 eta 5:27:33 +epoch [7/50] batch [65/500] time 0.883 (0.895) data 0.000 (0.011) loss 1.4043 (1.1598) acc 65.6250 (71.0096) lr 1.9511e-03 eta 5:27:04 +epoch [7/50] batch [70/500] time 0.865 (0.893) data 0.000 (0.010) loss 0.8359 (1.1756) acc 75.0000 (71.2500) lr 1.9511e-03 eta 5:26:29 +epoch [7/50] batch [75/500] time 0.873 (0.893) data 0.000 (0.009) loss 0.9165 (1.1665) acc 65.6250 (71.4583) lr 1.9511e-03 eta 5:26:12 +epoch [7/50] batch [80/500] time 0.868 (0.892) data 0.000 (0.009) loss 1.4746 (1.1650) acc 59.3750 (71.6016) lr 1.9511e-03 eta 5:25:57 +epoch [7/50] batch [85/500] time 0.890 (0.891) data 0.000 (0.008) loss 0.5830 (1.1585) acc 84.3750 (71.6176) lr 1.9511e-03 eta 5:25:30 +epoch [7/50] batch [90/500] time 0.876 (0.892) data 0.000 (0.008) loss 1.2012 (1.1787) acc 71.8750 (71.2153) lr 1.9511e-03 eta 5:25:44 +epoch [7/50] batch [95/500] time 0.882 (0.891) data 0.000 (0.007) loss 0.8809 (1.1763) acc 75.0000 (71.2500) lr 1.9511e-03 eta 5:25:26 +epoch [7/50] batch [100/500] time 0.896 (0.891) data 0.000 (0.007) loss 1.4268 (1.1909) acc 65.6250 (70.9375) lr 1.9511e-03 eta 5:25:11 +epoch [7/50] batch [105/500] time 0.905 (0.891) data 0.000 (0.007) loss 1.6904 (1.1942) acc 62.5000 (70.8631) lr 1.9511e-03 eta 5:25:02 +epoch [7/50] batch [110/500] time 0.887 (0.891) data 0.001 (0.006) loss 1.4922 (1.2015) acc 59.3750 (70.6250) lr 1.9511e-03 eta 5:25:04 +epoch [7/50] batch [115/500] time 0.916 (0.892) data 0.000 (0.006) loss 1.2549 (1.1970) acc 65.6250 (70.7609) lr 1.9511e-03 eta 5:25:11 +epoch [7/50] batch [120/500] time 0.879 (0.891) data 0.000 (0.006) loss 0.8618 (1.2009) acc 84.3750 (70.6771) lr 1.9511e-03 eta 5:24:50 +epoch [7/50] batch [125/500] time 0.855 (0.890) data 0.000 (0.006) loss 1.6367 (1.1990) acc 53.1250 (70.6000) lr 1.9511e-03 eta 5:24:36 +epoch [7/50] batch [130/500] time 0.880 (0.890) data 0.000 (0.005) loss 1.0449 (1.2009) acc 81.2500 (70.7452) lr 1.9511e-03 eta 5:24:27 +epoch [7/50] batch [135/500] time 0.883 (0.890) data 0.000 (0.005) loss 1.4180 (1.1993) acc 68.7500 (70.7870) lr 1.9511e-03 eta 5:24:20 +epoch [7/50] batch [140/500] time 0.901 (0.890) data 0.000 (0.005) loss 1.3945 (1.1939) acc 50.0000 (70.7589) lr 1.9511e-03 eta 5:24:09 +epoch [7/50] batch [145/500] time 0.893 (0.890) data 0.000 (0.005) loss 1.0537 (1.1878) acc 78.1250 (70.8405) lr 1.9511e-03 eta 5:24:10 +epoch [7/50] batch [150/500] time 0.893 (0.890) data 0.000 (0.005) loss 0.9424 (1.1879) acc 68.7500 (70.7917) lr 1.9511e-03 eta 5:24:02 +epoch [7/50] batch [155/500] time 0.907 (0.890) data 0.000 (0.005) loss 1.2402 (1.1897) acc 59.3750 (70.5645) lr 1.9511e-03 eta 5:23:52 +epoch [7/50] batch [160/500] time 0.899 (0.890) data 0.000 (0.004) loss 0.9141 (1.1861) acc 78.1250 (70.6836) lr 1.9511e-03 eta 5:24:03 +epoch [7/50] batch [165/500] time 0.868 (0.890) data 0.000 (0.004) loss 0.8481 (1.1847) acc 65.6250 (70.5303) lr 1.9511e-03 eta 5:23:44 +epoch [7/50] batch [170/500] time 0.893 (0.890) data 0.000 (0.004) loss 1.0420 (1.1822) acc 78.1250 (70.4963) lr 1.9511e-03 eta 5:23:50 +epoch [7/50] batch [175/500] time 0.855 (0.890) data 0.000 (0.004) loss 1.3652 (1.1842) acc 59.3750 (70.3929) lr 1.9511e-03 eta 5:23:46 +epoch [7/50] batch [180/500] time 0.887 (0.890) data 0.000 (0.004) loss 0.7725 (1.1743) acc 78.1250 (70.6250) lr 1.9511e-03 eta 5:23:31 +epoch [7/50] batch [185/500] time 1.245 (0.891) data 0.000 (0.004) loss 1.0469 (1.1787) acc 68.7500 (70.4730) lr 1.9511e-03 eta 5:24:06 +epoch [7/50] batch [190/500] time 0.872 (0.892) data 0.000 (0.004) loss 0.6528 (1.1703) acc 81.2500 (70.6250) lr 1.9511e-03 eta 5:24:09 +epoch [7/50] batch [195/500] time 0.862 (0.892) data 0.000 (0.004) loss 1.0576 (1.1700) acc 78.1250 (70.6731) lr 1.9511e-03 eta 5:24:01 +epoch [7/50] batch [200/500] time 0.890 (0.891) data 0.000 (0.004) loss 1.1855 (1.1747) acc 65.6250 (70.6406) lr 1.9511e-03 eta 5:23:53 +epoch [7/50] batch [205/500] time 0.889 (0.891) data 0.000 (0.004) loss 1.1650 (1.1771) acc 78.1250 (70.6098) lr 1.9511e-03 eta 5:23:45 +epoch [7/50] batch [210/500] time 0.880 (0.891) data 0.000 (0.003) loss 1.2549 (1.1769) acc 75.0000 (70.7292) lr 1.9511e-03 eta 5:23:36 +epoch [7/50] batch [215/500] time 0.848 (0.891) data 0.000 (0.003) loss 1.2061 (1.1817) acc 75.0000 (70.6686) lr 1.9511e-03 eta 5:23:31 +epoch [7/50] batch [220/500] time 0.909 (0.891) data 0.000 (0.003) loss 1.0029 (1.1783) acc 71.8750 (70.7528) lr 1.9511e-03 eta 5:23:20 +epoch [7/50] batch [225/500] time 0.869 (0.890) data 0.000 (0.003) loss 0.9473 (1.1825) acc 71.8750 (70.6389) lr 1.9511e-03 eta 5:23:08 +epoch [7/50] batch [230/500] time 0.863 (0.890) data 0.000 (0.003) loss 0.8159 (1.1778) acc 81.2500 (70.7201) lr 1.9511e-03 eta 5:22:54 +epoch [7/50] batch [235/500] time 0.879 (0.890) data 0.000 (0.003) loss 1.1299 (1.1794) acc 68.7500 (70.6516) lr 1.9511e-03 eta 5:22:56 +epoch [7/50] batch [240/500] time 0.895 (0.890) data 0.000 (0.003) loss 1.9043 (1.1840) acc 56.2500 (70.5599) lr 1.9511e-03 eta 5:22:57 +epoch [7/50] batch [245/500] time 0.918 (0.891) data 0.000 (0.003) loss 0.6597 (1.1814) acc 78.1250 (70.5740) lr 1.9511e-03 eta 5:22:59 +epoch [7/50] batch [250/500] time 0.880 (0.891) data 0.000 (0.003) loss 1.1777 (1.1824) acc 71.8750 (70.5625) lr 1.9511e-03 eta 5:22:49 +epoch [7/50] batch [255/500] time 0.875 (0.890) data 0.000 (0.003) loss 0.7856 (1.1806) acc 81.2500 (70.6495) lr 1.9511e-03 eta 5:22:35 +epoch [7/50] batch [260/500] time 0.885 (0.890) data 0.000 (0.003) loss 1.4678 (1.1803) acc 59.3750 (70.6490) lr 1.9511e-03 eta 5:22:23 +epoch [7/50] batch [265/500] time 0.908 (0.890) data 0.000 (0.003) loss 1.4531 (1.1775) acc 56.2500 (70.7075) lr 1.9511e-03 eta 5:22:17 +epoch [7/50] batch [270/500] time 0.853 (0.890) data 0.000 (0.003) loss 1.2158 (1.1820) acc 71.8750 (70.6829) lr 1.9511e-03 eta 5:22:10 +epoch [7/50] batch [275/500] time 0.894 (0.890) data 0.000 (0.003) loss 0.9302 (1.1827) acc 78.1250 (70.7727) lr 1.9511e-03 eta 5:22:07 +epoch [7/50] batch [280/500] time 0.885 (0.890) data 0.000 (0.003) loss 1.1221 (1.1845) acc 75.0000 (70.6808) lr 1.9511e-03 eta 5:22:05 +epoch [7/50] batch [285/500] time 0.897 (0.890) data 0.000 (0.003) loss 1.6689 (1.1844) acc 59.3750 (70.6908) lr 1.9511e-03 eta 5:22:02 +epoch [7/50] batch [290/500] time 0.925 (0.890) data 0.000 (0.003) loss 0.8262 (1.1799) acc 78.1250 (70.8297) lr 1.9511e-03 eta 5:22:04 +epoch [7/50] batch [295/500] time 0.885 (0.890) data 0.000 (0.003) loss 1.4277 (1.1808) acc 68.7500 (70.7627) lr 1.9511e-03 eta 5:21:55 +epoch [7/50] batch [300/500] time 0.873 (0.890) data 0.000 (0.002) loss 1.3379 (1.1816) acc 62.5000 (70.7083) lr 1.9511e-03 eta 5:21:48 +epoch [7/50] batch [305/500] time 0.870 (0.890) data 0.000 (0.002) loss 1.1904 (1.1815) acc 65.6250 (70.6865) lr 1.9511e-03 eta 5:21:42 +epoch [7/50] batch [310/500] time 0.893 (0.890) data 0.000 (0.002) loss 1.2158 (1.1819) acc 71.8750 (70.6754) lr 1.9511e-03 eta 5:21:37 +epoch [7/50] batch [315/500] time 0.900 (0.890) data 0.000 (0.002) loss 0.7153 (1.1838) acc 78.1250 (70.6746) lr 1.9511e-03 eta 5:21:36 +epoch [7/50] batch [320/500] time 0.911 (0.890) data 0.000 (0.002) loss 1.2344 (1.1811) acc 62.5000 (70.7129) lr 1.9511e-03 eta 5:21:32 +epoch [7/50] batch [325/500] time 0.898 (0.890) data 0.000 (0.002) loss 2.0117 (1.1813) acc 65.6250 (70.7596) lr 1.9511e-03 eta 5:21:29 +epoch [7/50] batch [330/500] time 0.858 (0.890) data 0.000 (0.002) loss 0.9536 (1.1803) acc 81.2500 (70.7765) lr 1.9511e-03 eta 5:21:22 +epoch [7/50] batch [335/500] time 0.865 (0.890) data 0.000 (0.002) loss 1.6201 (1.1837) acc 62.5000 (70.7276) lr 1.9511e-03 eta 5:21:21 +epoch [7/50] batch [340/500] time 0.878 (0.890) data 0.000 (0.002) loss 1.5732 (1.1852) acc 62.5000 (70.7353) lr 1.9511e-03 eta 5:21:15 +epoch [7/50] batch [345/500] time 0.892 (0.890) data 0.000 (0.002) loss 1.2432 (1.1873) acc 62.5000 (70.6703) lr 1.9511e-03 eta 5:21:11 +epoch [7/50] batch [350/500] time 0.883 (0.890) data 0.000 (0.002) loss 1.1279 (1.1868) acc 68.7500 (70.6786) lr 1.9511e-03 eta 5:21:04 +epoch [7/50] batch [355/500] time 0.877 (0.890) data 0.000 (0.002) loss 1.2891 (1.1871) acc 62.5000 (70.6690) lr 1.9511e-03 eta 5:21:00 +epoch [7/50] batch [360/500] time 0.858 (0.890) data 0.000 (0.002) loss 1.2607 (1.1856) acc 65.6250 (70.6684) lr 1.9511e-03 eta 5:20:54 +epoch [7/50] batch [365/500] time 0.868 (0.890) data 0.000 (0.002) loss 1.6562 (1.1882) acc 50.0000 (70.5908) lr 1.9511e-03 eta 5:20:47 +epoch [7/50] batch [370/500] time 0.903 (0.890) data 0.000 (0.002) loss 1.3096 (1.1902) acc 68.7500 (70.5743) lr 1.9511e-03 eta 5:20:43 +epoch [7/50] batch [375/500] time 0.958 (0.890) data 0.000 (0.002) loss 1.3545 (1.1924) acc 78.1250 (70.5583) lr 1.9511e-03 eta 5:20:42 +epoch [7/50] batch [380/500] time 0.851 (0.890) data 0.000 (0.002) loss 1.0928 (1.1917) acc 75.0000 (70.5674) lr 1.9511e-03 eta 5:20:33 +epoch [7/50] batch [385/500] time 0.849 (0.889) data 0.000 (0.002) loss 1.4863 (1.1921) acc 68.7500 (70.5195) lr 1.9511e-03 eta 5:20:22 +epoch [7/50] batch [390/500] time 0.872 (0.889) data 0.000 (0.002) loss 1.1191 (1.1898) acc 65.6250 (70.5048) lr 1.9511e-03 eta 5:20:13 +epoch [7/50] batch [395/500] time 0.885 (0.889) data 0.000 (0.002) loss 1.2637 (1.1862) acc 75.0000 (70.5775) lr 1.9511e-03 eta 5:20:06 +epoch [7/50] batch [400/500] time 0.880 (0.889) data 0.000 (0.002) loss 1.3477 (1.1926) acc 71.8750 (70.4766) lr 1.9511e-03 eta 5:19:57 +epoch [7/50] batch [405/500] time 0.882 (0.889) data 0.000 (0.002) loss 1.4131 (1.1925) acc 65.6250 (70.4707) lr 1.9511e-03 eta 5:19:49 +epoch [7/50] batch [410/500] time 0.879 (0.889) data 0.000 (0.002) loss 0.8340 (1.1938) acc 78.1250 (70.4649) lr 1.9511e-03 eta 5:19:43 +epoch [7/50] batch [415/500] time 0.879 (0.889) data 0.000 (0.002) loss 1.0518 (1.1931) acc 68.7500 (70.4443) lr 1.9511e-03 eta 5:19:40 +epoch [7/50] batch [420/500] time 0.866 (0.889) data 0.000 (0.002) loss 1.0703 (1.1923) acc 71.8750 (70.4464) lr 1.9511e-03 eta 5:19:36 +epoch [7/50] batch [425/500] time 0.856 (0.889) data 0.000 (0.002) loss 1.0908 (1.1941) acc 71.8750 (70.3897) lr 1.9511e-03 eta 5:19:32 +epoch [7/50] batch [430/500] time 0.910 (0.889) data 0.000 (0.002) loss 1.5898 (1.1954) acc 56.2500 (70.3634) lr 1.9511e-03 eta 5:19:27 +epoch [7/50] batch [435/500] time 0.904 (0.889) data 0.000 (0.002) loss 1.0518 (1.1937) acc 75.0000 (70.4095) lr 1.9511e-03 eta 5:19:24 +epoch [7/50] batch [440/500] time 0.875 (0.889) data 0.000 (0.002) loss 1.3633 (1.1936) acc 59.3750 (70.3977) lr 1.9511e-03 eta 5:19:17 +epoch [7/50] batch [445/500] time 0.861 (0.888) data 0.000 (0.002) loss 0.6162 (1.1945) acc 84.3750 (70.4424) lr 1.9511e-03 eta 5:19:11 +epoch [7/50] batch [450/500] time 0.879 (0.889) data 0.000 (0.002) loss 0.9028 (1.1919) acc 75.0000 (70.4861) lr 1.9511e-03 eta 5:19:08 +epoch [7/50] batch [455/500] time 0.890 (0.889) data 0.000 (0.002) loss 1.3398 (1.1922) acc 62.5000 (70.4739) lr 1.9511e-03 eta 5:19:03 +epoch [7/50] batch [460/500] time 0.875 (0.889) data 0.000 (0.002) loss 1.8398 (1.1958) acc 56.2500 (70.3397) lr 1.9511e-03 eta 5:18:59 +epoch [7/50] batch [465/500] time 0.878 (0.889) data 0.000 (0.002) loss 1.4717 (1.1958) acc 65.6250 (70.2890) lr 1.9511e-03 eta 5:18:56 +epoch [7/50] batch [470/500] time 0.871 (0.889) data 0.000 (0.002) loss 1.3984 (1.1943) acc 59.3750 (70.3125) lr 1.9511e-03 eta 5:18:50 +epoch [7/50] batch [475/500] time 0.902 (0.889) data 0.000 (0.002) loss 1.1455 (1.1944) acc 75.0000 (70.3224) lr 1.9511e-03 eta 5:18:51 +epoch [7/50] batch [480/500] time 0.885 (0.889) data 0.000 (0.002) loss 1.1943 (1.1939) acc 75.0000 (70.3385) lr 1.9511e-03 eta 5:18:46 +epoch [7/50] batch [485/500] time 0.862 (0.889) data 0.000 (0.002) loss 1.2109 (1.1954) acc 65.6250 (70.3028) lr 1.9511e-03 eta 5:18:39 +epoch [7/50] batch [490/500] time 0.870 (0.889) data 0.000 (0.002) loss 1.5293 (1.1944) acc 65.6250 (70.3444) lr 1.9511e-03 eta 5:18:32 +epoch [7/50] batch [495/500] time 0.881 (0.888) data 0.000 (0.002) loss 1.1719 (1.1942) acc 75.0000 (70.3725) lr 1.9511e-03 eta 5:18:26 +epoch [7/50] batch [500/500] time 0.897 (0.888) data 0.000 (0.002) loss 1.0752 (1.1941) acc 71.8750 (70.4125) lr 1.9298e-03 eta 5:18:21 +epoch [8/50] batch [5/500] time 0.867 (1.018) data 0.000 (0.141) loss 1.7861 (1.5096) acc 50.0000 (63.7500) lr 1.9298e-03 eta 6:04:46 +epoch [8/50] batch [10/500] time 0.846 (0.943) data 0.000 (0.071) loss 1.5908 (1.3011) acc 68.7500 (69.6875) lr 1.9298e-03 eta 5:37:42 +epoch [8/50] batch [15/500] time 0.897 (0.921) data 0.000 (0.047) loss 0.8276 (1.2100) acc 68.7500 (70.2083) lr 1.9298e-03 eta 5:29:46 +epoch [8/50] batch [20/500] time 0.892 (0.911) data 0.000 (0.035) loss 1.4443 (1.2178) acc 68.7500 (69.6875) lr 1.9298e-03 eta 5:26:11 +epoch [8/50] batch [25/500] time 1.003 (0.913) data 0.000 (0.028) loss 1.0703 (1.2219) acc 78.1250 (70.1250) lr 1.9298e-03 eta 5:26:36 +epoch [8/50] batch [30/500] time 0.877 (0.908) data 0.000 (0.024) loss 0.9482 (1.2244) acc 78.1250 (70.5208) lr 1.9298e-03 eta 5:24:58 +epoch [8/50] batch [35/500] time 0.885 (0.903) data 0.000 (0.020) loss 1.2148 (1.2146) acc 65.6250 (70.2679) lr 1.9298e-03 eta 5:23:08 +epoch [8/50] batch [40/500] time 0.872 (0.899) data 0.000 (0.018) loss 1.5723 (1.2360) acc 62.5000 (69.9219) lr 1.9298e-03 eta 5:21:36 +epoch [8/50] batch [45/500] time 0.876 (0.896) data 0.000 (0.016) loss 1.2949 (1.2139) acc 65.6250 (70.0000) lr 1.9298e-03 eta 5:20:24 +epoch [8/50] batch [50/500] time 0.872 (0.895) data 0.000 (0.014) loss 1.3369 (1.2335) acc 75.0000 (70.0000) lr 1.9298e-03 eta 5:19:51 +epoch [8/50] batch [55/500] time 0.876 (0.894) data 0.000 (0.013) loss 0.9282 (1.2304) acc 75.0000 (70.2273) lr 1.9298e-03 eta 5:19:40 +epoch [8/50] batch [60/500] time 0.905 (0.894) data 0.000 (0.012) loss 1.1709 (1.2101) acc 65.6250 (70.2604) lr 1.9298e-03 eta 5:19:29 +epoch [8/50] batch [65/500] time 0.890 (0.894) data 0.000 (0.011) loss 1.3262 (1.2301) acc 59.3750 (69.7596) lr 1.9298e-03 eta 5:19:28 +epoch [8/50] batch [70/500] time 0.906 (0.895) data 0.000 (0.010) loss 1.2441 (1.2218) acc 71.8750 (70.0000) lr 1.9298e-03 eta 5:19:49 +epoch [8/50] batch [75/500] time 0.883 (0.895) data 0.000 (0.010) loss 0.9502 (1.2142) acc 68.7500 (70.3333) lr 1.9298e-03 eta 5:19:28 +epoch [8/50] batch [80/500] time 0.859 (0.893) data 0.000 (0.009) loss 0.9453 (1.2014) acc 71.8750 (70.4297) lr 1.9298e-03 eta 5:18:57 +epoch [8/50] batch [85/500] time 0.872 (0.893) data 0.000 (0.009) loss 1.2217 (1.1979) acc 75.0000 (70.6250) lr 1.9298e-03 eta 5:18:52 +epoch [8/50] batch [90/500] time 0.880 (0.893) data 0.000 (0.008) loss 1.1318 (1.2081) acc 59.3750 (70.2778) lr 1.9298e-03 eta 5:18:45 +epoch [8/50] batch [95/500] time 0.891 (0.893) data 0.000 (0.008) loss 1.1006 (1.2156) acc 75.0000 (69.9671) lr 1.9298e-03 eta 5:18:32 +epoch [8/50] batch [100/500] time 0.887 (0.892) data 0.000 (0.007) loss 1.1328 (1.2128) acc 75.0000 (70.1562) lr 1.9298e-03 eta 5:18:14 +epoch [8/50] batch [105/500] time 0.845 (0.891) data 0.000 (0.007) loss 0.8638 (1.2063) acc 78.1250 (70.3869) lr 1.9298e-03 eta 5:17:41 +epoch [8/50] batch [110/500] time 0.866 (0.890) data 0.000 (0.007) loss 0.7852 (1.2027) acc 81.2500 (70.3977) lr 1.9298e-03 eta 5:17:16 +epoch [8/50] batch [115/500] time 0.884 (0.889) data 0.000 (0.006) loss 1.4785 (1.2002) acc 65.6250 (70.5163) lr 1.9298e-03 eta 5:16:59 +epoch [8/50] batch [120/500] time 0.864 (0.889) data 0.000 (0.006) loss 1.4854 (1.2011) acc 62.5000 (70.2865) lr 1.9298e-03 eta 5:16:42 +epoch [8/50] batch [125/500] time 0.897 (0.889) data 0.000 (0.006) loss 1.0029 (1.2037) acc 78.1250 (70.2250) lr 1.9298e-03 eta 5:16:36 +epoch [8/50] batch [130/500] time 0.899 (0.888) data 0.000 (0.006) loss 1.1816 (1.2094) acc 65.6250 (70.0962) lr 1.9298e-03 eta 5:16:23 +epoch [8/50] batch [135/500] time 0.849 (0.888) data 0.000 (0.005) loss 0.7432 (1.1969) acc 87.5000 (70.3935) lr 1.9298e-03 eta 5:16:06 +epoch [8/50] batch [140/500] time 0.902 (0.887) data 0.000 (0.005) loss 0.7334 (1.1897) acc 78.1250 (70.4464) lr 1.9298e-03 eta 5:15:56 +epoch [8/50] batch [145/500] time 0.863 (0.887) data 0.000 (0.005) loss 1.3779 (1.1940) acc 75.0000 (70.3879) lr 1.9298e-03 eta 5:15:37 +epoch [8/50] batch [150/500] time 0.904 (0.887) data 0.000 (0.005) loss 0.8232 (1.1894) acc 75.0000 (70.4792) lr 1.9298e-03 eta 5:15:33 +epoch [8/50] batch [155/500] time 0.900 (0.887) data 0.000 (0.005) loss 0.5981 (1.1878) acc 87.5000 (70.5444) lr 1.9298e-03 eta 5:15:31 +epoch [8/50] batch [160/500] time 0.872 (0.887) data 0.000 (0.005) loss 1.0498 (1.1858) acc 81.2500 (70.7227) lr 1.9298e-03 eta 5:15:18 +epoch [8/50] batch [165/500] time 0.989 (0.887) data 0.000 (0.005) loss 1.0762 (1.1859) acc 78.1250 (70.7765) lr 1.9298e-03 eta 5:15:29 +epoch [8/50] batch [170/500] time 0.856 (0.887) data 0.000 (0.004) loss 0.8633 (1.1856) acc 75.0000 (70.7904) lr 1.9298e-03 eta 5:15:21 +epoch [8/50] batch [175/500] time 0.883 (0.887) data 0.000 (0.004) loss 1.2227 (1.1885) acc 75.0000 (70.7857) lr 1.9298e-03 eta 5:15:07 +epoch [8/50] batch [180/500] time 0.893 (0.887) data 0.000 (0.004) loss 1.2588 (1.1883) acc 68.7500 (70.8681) lr 1.9298e-03 eta 5:15:07 +epoch [8/50] batch [185/500] time 0.852 (0.887) data 0.001 (0.004) loss 1.0762 (1.1869) acc 71.8750 (70.9122) lr 1.9298e-03 eta 5:14:57 +epoch [8/50] batch [190/500] time 0.857 (0.886) data 0.000 (0.004) loss 1.5869 (1.1886) acc 62.5000 (70.8388) lr 1.9298e-03 eta 5:14:41 +epoch [8/50] batch [195/500] time 0.858 (0.886) data 0.000 (0.004) loss 1.3799 (1.1852) acc 68.7500 (70.8654) lr 1.9298e-03 eta 5:14:31 +epoch [8/50] batch [200/500] time 0.914 (0.886) data 0.000 (0.004) loss 2.1211 (1.1853) acc 56.2500 (70.8750) lr 1.9298e-03 eta 5:14:28 +epoch [8/50] batch [205/500] time 0.883 (0.886) data 0.000 (0.004) loss 0.6748 (1.1858) acc 78.1250 (70.8689) lr 1.9298e-03 eta 5:14:22 +epoch [8/50] batch [210/500] time 0.868 (0.886) data 0.000 (0.004) loss 1.4600 (1.1892) acc 68.7500 (70.6845) lr 1.9298e-03 eta 5:14:28 +epoch [8/50] batch [215/500] time 0.865 (0.886) data 0.000 (0.004) loss 0.8521 (1.1903) acc 84.3750 (70.6977) lr 1.9298e-03 eta 5:14:22 +epoch [8/50] batch [220/500] time 0.904 (0.886) data 0.000 (0.003) loss 1.1475 (1.1901) acc 75.0000 (70.6818) lr 1.9298e-03 eta 5:14:20 +epoch [8/50] batch [225/500] time 0.913 (0.886) data 0.000 (0.003) loss 1.2559 (1.1936) acc 71.8750 (70.5972) lr 1.9298e-03 eta 5:14:13 +epoch [8/50] batch [230/500] time 0.886 (0.886) data 0.000 (0.003) loss 1.2266 (1.1914) acc 59.3750 (70.5978) lr 1.9298e-03 eta 5:14:04 +epoch [8/50] batch [235/500] time 0.885 (0.886) data 0.000 (0.003) loss 1.4482 (1.1941) acc 65.6250 (70.5053) lr 1.9298e-03 eta 5:13:55 +epoch [8/50] batch [240/500] time 0.900 (0.886) data 0.000 (0.003) loss 1.8223 (1.1963) acc 62.5000 (70.5339) lr 1.9298e-03 eta 5:13:56 +epoch [8/50] batch [245/500] time 0.888 (0.886) data 0.000 (0.003) loss 1.6572 (1.1957) acc 59.3750 (70.5867) lr 1.9298e-03 eta 5:13:49 +epoch [8/50] batch [250/500] time 0.859 (0.886) data 0.000 (0.003) loss 1.5254 (1.1939) acc 71.8750 (70.6375) lr 1.9298e-03 eta 5:13:42 +epoch [8/50] batch [255/500] time 0.904 (0.886) data 0.000 (0.003) loss 1.3262 (1.1942) acc 65.6250 (70.5882) lr 1.9298e-03 eta 5:13:45 +epoch [8/50] batch [260/500] time 0.897 (0.886) data 0.000 (0.003) loss 1.0586 (1.1898) acc 68.7500 (70.6611) lr 1.9298e-03 eta 5:13:43 +epoch [8/50] batch [265/500] time 0.893 (0.886) data 0.000 (0.003) loss 0.9702 (1.1943) acc 84.3750 (70.6958) lr 1.9298e-03 eta 5:13:44 +epoch [8/50] batch [270/500] time 0.880 (0.886) data 0.000 (0.003) loss 1.5762 (1.1909) acc 65.6250 (70.7870) lr 1.9298e-03 eta 5:13:37 +epoch [8/50] batch [275/500] time 0.861 (0.886) data 0.000 (0.003) loss 1.0420 (1.1912) acc 71.8750 (70.8295) lr 1.9298e-03 eta 5:13:27 +epoch [8/50] batch [280/500] time 0.897 (0.886) data 0.000 (0.003) loss 1.2148 (1.1890) acc 75.0000 (70.8929) lr 1.9298e-03 eta 5:13:18 +epoch [8/50] batch [285/500] time 0.900 (0.886) data 0.000 (0.003) loss 0.8682 (1.1860) acc 81.2500 (71.0088) lr 1.9298e-03 eta 5:13:15 +epoch [8/50] batch [290/500] time 0.887 (0.886) data 0.000 (0.003) loss 1.6455 (1.1877) acc 65.6250 (70.9914) lr 1.9298e-03 eta 5:13:06 +epoch [8/50] batch [295/500] time 0.860 (0.886) data 0.000 (0.003) loss 1.1660 (1.1914) acc 62.5000 (70.9216) lr 1.9298e-03 eta 5:12:58 +epoch [8/50] batch [300/500] time 0.895 (0.885) data 0.000 (0.003) loss 1.6465 (1.1914) acc 62.5000 (70.9792) lr 1.9298e-03 eta 5:12:51 +epoch [8/50] batch [305/500] time 0.872 (0.885) data 0.000 (0.003) loss 1.4697 (1.1954) acc 62.5000 (70.8709) lr 1.9298e-03 eta 5:12:41 +epoch [8/50] batch [310/500] time 0.878 (0.885) data 0.000 (0.003) loss 1.1455 (1.1966) acc 71.8750 (70.8569) lr 1.9298e-03 eta 5:12:41 +epoch [8/50] batch [315/500] time 0.849 (0.885) data 0.000 (0.002) loss 1.0449 (1.1946) acc 71.8750 (70.8631) lr 1.9298e-03 eta 5:12:35 +epoch [8/50] batch [320/500] time 0.863 (0.885) data 0.000 (0.002) loss 1.2871 (1.1921) acc 65.6250 (70.8984) lr 1.9298e-03 eta 5:12:34 +epoch [8/50] batch [325/500] time 0.880 (0.885) data 0.000 (0.002) loss 1.7305 (1.1936) acc 59.3750 (70.8654) lr 1.9298e-03 eta 5:12:30 +epoch [8/50] batch [330/500] time 0.845 (0.885) data 0.000 (0.002) loss 0.9287 (1.1929) acc 68.7500 (70.8333) lr 1.9298e-03 eta 5:12:17 +epoch [8/50] batch [335/500] time 0.901 (0.885) data 0.000 (0.002) loss 0.9492 (1.1928) acc 75.0000 (70.8489) lr 1.9298e-03 eta 5:12:12 +epoch [8/50] batch [340/500] time 0.864 (0.885) data 0.000 (0.002) loss 0.4861 (1.1893) acc 90.6250 (70.9926) lr 1.9298e-03 eta 5:12:04 +epoch [8/50] batch [345/500] time 0.876 (0.885) data 0.000 (0.002) loss 1.1514 (1.1927) acc 65.6250 (70.8605) lr 1.9298e-03 eta 5:11:57 +epoch [8/50] batch [350/500] time 0.850 (0.885) data 0.000 (0.002) loss 0.9170 (1.1938) acc 81.2500 (70.8929) lr 1.9298e-03 eta 5:11:50 +epoch [8/50] batch [355/500] time 0.851 (0.885) data 0.000 (0.002) loss 0.9253 (1.1924) acc 75.0000 (70.9067) lr 1.9298e-03 eta 5:11:48 +epoch [8/50] batch [360/500] time 0.872 (0.885) data 0.000 (0.002) loss 0.9331 (1.1917) acc 75.0000 (70.9375) lr 1.9298e-03 eta 5:11:43 +epoch [8/50] batch [365/500] time 0.846 (0.884) data 0.000 (0.002) loss 0.9912 (1.1920) acc 71.8750 (70.9161) lr 1.9298e-03 eta 5:11:33 +epoch [8/50] batch [370/500] time 0.872 (0.884) data 0.000 (0.002) loss 0.6836 (1.1899) acc 87.5000 (71.0051) lr 1.9298e-03 eta 5:11:26 +epoch [8/50] batch [375/500] time 0.918 (0.884) data 0.000 (0.002) loss 0.9419 (1.1927) acc 84.3750 (71.0333) lr 1.9298e-03 eta 5:11:24 +epoch [8/50] batch [380/500] time 0.906 (0.884) data 0.000 (0.002) loss 0.7876 (1.1929) acc 81.2500 (71.0444) lr 1.9298e-03 eta 5:11:19 +epoch [8/50] batch [385/500] time 0.862 (0.884) data 0.000 (0.002) loss 0.7524 (1.1937) acc 81.2500 (71.0390) lr 1.9298e-03 eta 5:11:14 +epoch [8/50] batch [390/500] time 0.895 (0.884) data 0.000 (0.002) loss 0.8516 (1.1948) acc 75.0000 (71.0096) lr 1.9298e-03 eta 5:11:11 +epoch [8/50] batch [395/500] time 0.907 (0.885) data 0.000 (0.002) loss 1.5811 (1.1941) acc 68.7500 (70.9810) lr 1.9298e-03 eta 5:11:08 +epoch [8/50] batch [400/500] time 0.866 (0.884) data 0.000 (0.002) loss 0.8145 (1.1916) acc 75.0000 (71.0156) lr 1.9298e-03 eta 5:11:02 +epoch [8/50] batch [405/500] time 0.874 (0.884) data 0.000 (0.002) loss 1.0117 (1.1896) acc 75.0000 (71.0802) lr 1.9298e-03 eta 5:10:54 +epoch [8/50] batch [410/500] time 0.885 (0.884) data 0.000 (0.002) loss 1.0459 (1.1889) acc 78.1250 (71.0976) lr 1.9298e-03 eta 5:10:45 +epoch [8/50] batch [415/500] time 0.909 (0.884) data 0.000 (0.002) loss 1.1406 (1.1887) acc 62.5000 (71.0693) lr 1.9298e-03 eta 5:10:44 +epoch [8/50] batch [420/500] time 0.881 (0.884) data 0.000 (0.002) loss 2.1426 (1.1935) acc 59.3750 (70.9598) lr 1.9298e-03 eta 5:10:38 +epoch [8/50] batch [425/500] time 0.883 (0.884) data 0.000 (0.002) loss 1.3320 (1.1955) acc 68.7500 (70.9485) lr 1.9298e-03 eta 5:10:35 +epoch [8/50] batch [430/500] time 0.882 (0.884) data 0.000 (0.002) loss 0.9614 (1.1949) acc 78.1250 (70.9884) lr 1.9298e-03 eta 5:10:32 +epoch [8/50] batch [435/500] time 0.883 (0.884) data 0.000 (0.002) loss 1.7236 (1.1960) acc 62.5000 (70.9555) lr 1.9298e-03 eta 5:10:28 +epoch [8/50] batch [440/500] time 0.895 (0.884) data 0.000 (0.002) loss 1.6348 (1.1972) acc 59.3750 (70.9091) lr 1.9298e-03 eta 5:10:26 +epoch [8/50] batch [445/500] time 0.911 (0.885) data 0.000 (0.002) loss 1.2676 (1.1967) acc 68.7500 (70.9129) lr 1.9298e-03 eta 5:10:23 +epoch [8/50] batch [450/500] time 0.909 (0.885) data 0.000 (0.002) loss 0.7666 (1.1955) acc 75.0000 (70.9097) lr 1.9298e-03 eta 5:10:23 +epoch [8/50] batch [455/500] time 0.896 (0.885) data 0.000 (0.002) loss 1.7549 (1.1969) acc 56.2500 (70.8516) lr 1.9298e-03 eta 5:10:25 +epoch [8/50] batch [460/500] time 0.915 (0.885) data 0.000 (0.002) loss 0.8120 (1.1957) acc 81.2500 (70.8899) lr 1.9298e-03 eta 5:10:22 +epoch [8/50] batch [465/500] time 0.879 (0.885) data 0.000 (0.002) loss 1.1875 (1.1958) acc 75.0000 (70.8535) lr 1.9298e-03 eta 5:10:15 +epoch [8/50] batch [470/500] time 0.884 (0.885) data 0.000 (0.002) loss 1.7812 (1.1993) acc 62.5000 (70.7713) lr 1.9298e-03 eta 5:10:08 +epoch [8/50] batch [475/500] time 0.893 (0.885) data 0.000 (0.002) loss 0.9590 (1.1987) acc 75.0000 (70.7566) lr 1.9298e-03 eta 5:10:03 +epoch [8/50] batch [480/500] time 0.879 (0.885) data 0.000 (0.002) loss 1.0264 (1.1975) acc 68.7500 (70.7682) lr 1.9298e-03 eta 5:09:58 +epoch [8/50] batch [485/500] time 0.877 (0.885) data 0.000 (0.002) loss 0.6167 (1.1981) acc 75.0000 (70.7152) lr 1.9298e-03 eta 5:09:51 +epoch [8/50] batch [490/500] time 0.869 (0.885) data 0.000 (0.002) loss 1.6133 (1.1976) acc 68.7500 (70.6888) lr 1.9298e-03 eta 5:09:47 +epoch [8/50] batch [495/500] time 0.969 (0.885) data 0.000 (0.002) loss 1.1621 (1.1985) acc 71.8750 (70.6944) lr 1.9298e-03 eta 5:09:47 +epoch [8/50] batch [500/500] time 0.910 (0.885) data 0.000 (0.002) loss 1.0488 (1.1979) acc 71.8750 (70.6750) lr 1.9048e-03 eta 5:09:42 +epoch [9/50] batch [5/500] time 0.864 (1.036) data 0.000 (0.145) loss 0.7798 (1.0421) acc 75.0000 (73.7500) lr 1.9048e-03 eta 6:02:21 +epoch [9/50] batch [10/500] time 0.868 (0.966) data 0.000 (0.073) loss 1.6348 (1.2131) acc 75.0000 (72.8125) lr 1.9048e-03 eta 5:37:50 +epoch [9/50] batch [15/500] time 0.909 (0.942) data 0.000 (0.049) loss 1.1309 (1.2172) acc 78.1250 (72.5000) lr 1.9048e-03 eta 5:29:20 +epoch [9/50] batch [20/500] time 0.853 (0.928) data 0.000 (0.036) loss 1.5957 (1.2129) acc 59.3750 (71.5625) lr 1.9048e-03 eta 5:24:26 +epoch [9/50] batch [25/500] time 0.851 (0.916) data 0.000 (0.029) loss 0.8193 (1.2033) acc 71.8750 (71.0000) lr 1.9048e-03 eta 5:20:03 +epoch [9/50] batch [30/500] time 0.895 (0.911) data 0.000 (0.024) loss 0.8115 (1.1709) acc 78.1250 (71.5625) lr 1.9048e-03 eta 5:18:28 +epoch [9/50] batch [35/500] time 0.896 (0.906) data 0.000 (0.021) loss 1.1484 (1.1732) acc 68.7500 (71.3393) lr 1.9048e-03 eta 5:16:41 +epoch [9/50] batch [40/500] time 0.879 (0.905) data 0.000 (0.018) loss 1.1914 (1.1515) acc 68.7500 (71.7188) lr 1.9048e-03 eta 5:16:09 +epoch [9/50] batch [45/500] time 0.911 (0.904) data 0.000 (0.016) loss 1.3037 (1.1432) acc 65.6250 (71.8056) lr 1.9048e-03 eta 5:15:41 +epoch [9/50] batch [50/500] time 0.882 (0.902) data 0.000 (0.015) loss 0.6680 (1.1475) acc 78.1250 (71.7500) lr 1.9048e-03 eta 5:15:04 +epoch [9/50] batch [55/500] time 0.856 (0.899) data 0.000 (0.013) loss 1.1270 (1.1584) acc 75.0000 (71.4773) lr 1.9048e-03 eta 5:13:51 +epoch [9/50] batch [60/500] time 0.873 (0.897) data 0.000 (0.012) loss 1.1768 (1.1560) acc 65.6250 (71.3542) lr 1.9048e-03 eta 5:13:03 +epoch [9/50] batch [65/500] time 0.893 (0.896) data 0.000 (0.011) loss 1.1445 (1.1597) acc 68.7500 (71.1538) lr 1.9048e-03 eta 5:12:40 +epoch [9/50] batch [70/500] time 0.867 (0.896) data 0.000 (0.011) loss 0.8130 (1.1404) acc 81.2500 (71.2500) lr 1.9048e-03 eta 5:12:31 +epoch [9/50] batch [75/500] time 0.891 (0.895) data 0.000 (0.010) loss 1.5830 (1.1462) acc 62.5000 (71.1667) lr 1.9048e-03 eta 5:12:11 +epoch [9/50] batch [80/500] time 0.895 (0.894) data 0.000 (0.009) loss 0.6528 (1.1325) acc 87.5000 (71.3672) lr 1.9048e-03 eta 5:11:52 +epoch [9/50] batch [85/500] time 0.858 (0.894) data 0.000 (0.009) loss 1.0332 (1.1403) acc 75.0000 (71.2500) lr 1.9048e-03 eta 5:11:32 +epoch [9/50] batch [90/500] time 0.871 (0.893) data 0.000 (0.008) loss 1.2969 (1.1358) acc 59.3750 (71.4583) lr 1.9048e-03 eta 5:11:18 +epoch [9/50] batch [95/500] time 0.892 (0.892) data 0.000 (0.008) loss 1.1533 (1.1305) acc 65.6250 (71.5789) lr 1.9048e-03 eta 5:10:53 +epoch [9/50] batch [100/500] time 0.893 (0.892) data 0.000 (0.007) loss 1.5117 (1.1399) acc 62.5000 (71.2188) lr 1.9048e-03 eta 5:10:46 +epoch [9/50] batch [105/500] time 0.861 (0.891) data 0.000 (0.007) loss 1.6680 (1.1474) acc 43.7500 (70.6845) lr 1.9048e-03 eta 5:10:22 +epoch [9/50] batch [110/500] time 0.880 (0.891) data 0.000 (0.007) loss 0.8384 (1.1426) acc 87.5000 (70.7955) lr 1.9048e-03 eta 5:10:13 +epoch [9/50] batch [115/500] time 0.898 (0.891) data 0.000 (0.007) loss 1.0479 (1.1396) acc 75.0000 (70.8424) lr 1.9048e-03 eta 5:10:03 +epoch [9/50] batch [120/500] time 0.847 (0.890) data 0.000 (0.006) loss 1.0166 (1.1370) acc 78.1250 (70.9115) lr 1.9048e-03 eta 5:09:41 +epoch [9/50] batch [125/500] time 0.867 (0.890) data 0.000 (0.006) loss 1.4355 (1.1339) acc 68.7500 (71.0250) lr 1.9048e-03 eta 5:09:34 +epoch [9/50] batch [130/500] time 0.895 (0.890) data 0.000 (0.006) loss 1.6064 (1.1444) acc 68.7500 (70.8894) lr 1.9048e-03 eta 5:09:23 +epoch [9/50] batch [135/500] time 0.918 (0.890) data 0.000 (0.006) loss 1.5752 (1.1553) acc 56.2500 (70.6944) lr 1.9048e-03 eta 5:09:21 +epoch [9/50] batch [140/500] time 0.879 (0.890) data 0.000 (0.005) loss 0.7769 (1.1519) acc 84.3750 (70.7366) lr 1.9048e-03 eta 5:09:25 +epoch [9/50] batch [145/500] time 0.874 (0.890) data 0.000 (0.005) loss 0.8345 (1.1485) acc 81.2500 (70.9698) lr 1.9048e-03 eta 5:09:22 +epoch [9/50] batch [150/500] time 0.897 (0.890) data 0.000 (0.005) loss 1.4951 (1.1510) acc 65.6250 (71.0000) lr 1.9048e-03 eta 5:09:12 +epoch [9/50] batch [155/500] time 0.897 (0.890) data 0.000 (0.005) loss 1.5205 (1.1539) acc 56.2500 (70.8468) lr 1.9048e-03 eta 5:09:05 +epoch [9/50] batch [160/500] time 0.870 (0.889) data 0.000 (0.005) loss 1.4473 (1.1594) acc 62.5000 (70.8203) lr 1.9048e-03 eta 5:08:51 +epoch [9/50] batch [165/500] time 0.912 (0.889) data 0.000 (0.005) loss 1.4844 (1.1566) acc 56.2500 (70.7765) lr 1.9048e-03 eta 5:08:49 +epoch [9/50] batch [170/500] time 0.883 (0.889) data 0.000 (0.005) loss 1.3662 (1.1589) acc 78.1250 (70.8272) lr 1.9048e-03 eta 5:08:47 +epoch [9/50] batch [175/500] time 0.863 (0.889) data 0.000 (0.004) loss 0.9951 (1.1562) acc 65.6250 (70.8929) lr 1.9048e-03 eta 5:08:31 +epoch [9/50] batch [180/500] time 0.885 (0.889) data 0.000 (0.004) loss 1.1445 (1.1645) acc 75.0000 (70.7986) lr 1.9048e-03 eta 5:08:22 +epoch [9/50] batch [185/500] time 0.887 (0.889) data 0.000 (0.004) loss 1.0098 (1.1690) acc 75.0000 (70.6081) lr 1.9048e-03 eta 5:08:30 +epoch [9/50] batch [190/500] time 0.883 (0.889) data 0.000 (0.004) loss 1.1445 (1.1700) acc 71.8750 (70.5757) lr 1.9048e-03 eta 5:08:22 +epoch [9/50] batch [195/500] time 0.892 (0.889) data 0.000 (0.004) loss 0.9790 (1.1692) acc 75.0000 (70.6571) lr 1.9048e-03 eta 5:08:18 +epoch [9/50] batch [200/500] time 0.901 (0.889) data 0.000 (0.004) loss 1.1562 (1.1749) acc 68.7500 (70.5469) lr 1.9048e-03 eta 5:08:13 +epoch [9/50] batch [205/500] time 0.873 (0.889) data 0.000 (0.004) loss 1.0957 (1.1759) acc 78.1250 (70.6402) lr 1.9048e-03 eta 5:08:05 +epoch [9/50] batch [210/500] time 0.862 (0.889) data 0.000 (0.004) loss 1.4658 (1.1785) acc 65.6250 (70.5655) lr 1.9048e-03 eta 5:07:53 +epoch [9/50] batch [215/500] time 0.864 (0.888) data 0.000 (0.004) loss 1.0840 (1.1841) acc 78.1250 (70.4942) lr 1.9048e-03 eta 5:07:43 +epoch [9/50] batch [220/500] time 0.867 (0.888) data 0.000 (0.004) loss 0.6001 (1.1859) acc 78.1250 (70.5256) lr 1.9048e-03 eta 5:07:29 +epoch [9/50] batch [225/500] time 0.855 (0.888) data 0.000 (0.003) loss 0.4568 (1.1811) acc 90.6250 (70.6944) lr 1.9048e-03 eta 5:07:21 +epoch [9/50] batch [230/500] time 0.879 (0.887) data 0.000 (0.003) loss 0.8174 (1.1747) acc 75.0000 (70.8152) lr 1.9048e-03 eta 5:07:11 +epoch [9/50] batch [235/500] time 0.885 (0.887) data 0.000 (0.003) loss 0.9746 (1.1728) acc 68.7500 (70.7979) lr 1.9048e-03 eta 5:07:01 +epoch [9/50] batch [240/500] time 0.863 (0.887) data 0.000 (0.003) loss 1.3516 (1.1780) acc 65.6250 (70.6641) lr 1.9048e-03 eta 5:06:54 +epoch [9/50] batch [245/500] time 0.881 (0.887) data 0.000 (0.003) loss 0.6904 (1.1731) acc 84.3750 (70.7908) lr 1.9048e-03 eta 5:06:50 +epoch [9/50] batch [250/500] time 0.889 (0.887) data 0.000 (0.003) loss 1.2803 (1.1771) acc 68.7500 (70.7250) lr 1.9048e-03 eta 5:06:40 +epoch [9/50] batch [255/500] time 0.882 (0.887) data 0.000 (0.003) loss 1.2969 (1.1783) acc 71.8750 (70.7966) lr 1.9048e-03 eta 5:06:37 +epoch [9/50] batch [260/500] time 0.857 (0.887) data 0.000 (0.003) loss 2.0664 (1.1816) acc 56.2500 (70.7692) lr 1.9048e-03 eta 5:06:33 +epoch [9/50] batch [265/500] time 0.894 (0.887) data 0.000 (0.003) loss 1.2373 (1.1810) acc 68.7500 (70.7075) lr 1.9048e-03 eta 5:06:26 +epoch [9/50] batch [270/500] time 0.893 (0.886) data 0.000 (0.003) loss 1.0215 (1.1777) acc 65.6250 (70.7639) lr 1.9048e-03 eta 5:06:16 +epoch [9/50] batch [275/500] time 0.905 (0.887) data 0.000 (0.003) loss 1.3301 (1.1765) acc 62.5000 (70.7614) lr 1.9048e-03 eta 5:06:20 +epoch [9/50] batch [280/500] time 0.869 (0.887) data 0.000 (0.003) loss 1.5742 (1.1745) acc 62.5000 (70.8259) lr 1.9048e-03 eta 5:06:11 +epoch [9/50] batch [285/500] time 0.870 (0.887) data 0.000 (0.003) loss 1.2471 (1.1750) acc 68.7500 (70.8224) lr 1.9048e-03 eta 5:06:13 +epoch [9/50] batch [290/500] time 0.895 (0.887) data 0.000 (0.003) loss 1.5596 (1.1771) acc 53.1250 (70.7543) lr 1.9048e-03 eta 5:06:12 +epoch [9/50] batch [295/500] time 0.864 (0.887) data 0.000 (0.003) loss 1.1172 (1.1774) acc 75.0000 (70.8051) lr 1.9048e-03 eta 5:06:02 +epoch [9/50] batch [300/500] time 0.880 (0.887) data 0.000 (0.003) loss 1.1748 (1.1723) acc 75.0000 (70.9167) lr 1.9048e-03 eta 5:05:59 +epoch [9/50] batch [305/500] time 0.879 (0.887) data 0.000 (0.003) loss 1.1338 (1.1744) acc 75.0000 (70.9529) lr 1.9048e-03 eta 5:05:49 +epoch [9/50] batch [310/500] time 0.912 (0.887) data 0.000 (0.003) loss 1.4473 (1.1757) acc 75.0000 (70.9375) lr 1.9048e-03 eta 5:05:44 +epoch [9/50] batch [315/500] time 0.885 (0.887) data 0.000 (0.003) loss 1.3984 (1.1776) acc 65.6250 (70.9226) lr 1.9048e-03 eta 5:05:41 +epoch [9/50] batch [320/500] time 0.907 (0.887) data 0.000 (0.003) loss 0.7949 (1.1780) acc 87.5000 (70.9277) lr 1.9048e-03 eta 5:05:39 +epoch [9/50] batch [325/500] time 1.004 (0.887) data 0.000 (0.002) loss 1.0635 (1.1758) acc 65.6250 (70.9808) lr 1.9048e-03 eta 5:05:46 +epoch [9/50] batch [330/500] time 0.864 (0.887) data 0.000 (0.002) loss 1.5215 (1.1762) acc 68.7500 (71.0038) lr 1.9048e-03 eta 5:05:39 +epoch [9/50] batch [335/500] time 0.861 (0.887) data 0.000 (0.002) loss 1.6338 (1.1801) acc 56.2500 (70.9235) lr 1.9048e-03 eta 5:05:36 +epoch [9/50] batch [340/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.5283 (1.1794) acc 62.5000 (70.9099) lr 1.9048e-03 eta 5:05:28 +epoch [9/50] batch [345/500] time 0.909 (0.887) data 0.000 (0.002) loss 1.0273 (1.1790) acc 78.1250 (70.8605) lr 1.9048e-03 eta 5:05:25 +epoch [9/50] batch [350/500] time 0.870 (0.887) data 0.000 (0.002) loss 1.3105 (1.1809) acc 75.0000 (70.8839) lr 1.9048e-03 eta 5:05:19 +epoch [9/50] batch [355/500] time 0.894 (0.887) data 0.000 (0.002) loss 1.0889 (1.1810) acc 71.8750 (70.8627) lr 1.9048e-03 eta 5:05:17 +epoch [9/50] batch [360/500] time 0.885 (0.887) data 0.000 (0.002) loss 0.7676 (1.1794) acc 62.5000 (70.8681) lr 1.9048e-03 eta 5:05:13 +epoch [9/50] batch [365/500] time 0.890 (0.887) data 0.000 (0.002) loss 1.5566 (1.1837) acc 62.5000 (70.7620) lr 1.9048e-03 eta 5:05:09 +epoch [9/50] batch [370/500] time 0.907 (0.887) data 0.000 (0.002) loss 1.3291 (1.1841) acc 75.0000 (70.8108) lr 1.9048e-03 eta 5:05:08 +epoch [9/50] batch [375/500] time 0.890 (0.887) data 0.000 (0.002) loss 1.5400 (1.1865) acc 56.2500 (70.7667) lr 1.9048e-03 eta 5:05:02 +epoch [9/50] batch [380/500] time 0.875 (0.887) data 0.000 (0.002) loss 1.1660 (1.1851) acc 71.8750 (70.7977) lr 1.9048e-03 eta 5:04:55 +epoch [9/50] batch [385/500] time 0.905 (0.887) data 0.000 (0.002) loss 0.9336 (1.1831) acc 68.7500 (70.8036) lr 1.9048e-03 eta 5:04:48 +epoch [9/50] batch [390/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.1973 (1.1824) acc 68.7500 (70.8253) lr 1.9048e-03 eta 5:04:41 +epoch [9/50] batch [395/500] time 0.900 (0.887) data 0.000 (0.002) loss 1.2666 (1.1815) acc 65.6250 (70.8623) lr 1.9048e-03 eta 5:04:37 +epoch [9/50] batch [400/500] time 0.874 (0.887) data 0.000 (0.002) loss 1.0576 (1.1791) acc 71.8750 (70.9062) lr 1.9048e-03 eta 5:04:33 +epoch [9/50] batch [405/500] time 0.899 (0.887) data 0.000 (0.002) loss 0.9766 (1.1741) acc 75.0000 (71.0340) lr 1.9048e-03 eta 5:04:27 +epoch [9/50] batch [410/500] time 0.871 (0.887) data 0.000 (0.002) loss 1.9404 (1.1770) acc 53.1250 (70.9909) lr 1.9048e-03 eta 5:04:20 +epoch [9/50] batch [415/500] time 0.877 (0.887) data 0.000 (0.002) loss 0.7939 (1.1749) acc 75.0000 (71.0542) lr 1.9048e-03 eta 5:04:15 +epoch [9/50] batch [420/500] time 0.897 (0.887) data 0.000 (0.002) loss 1.3018 (1.1752) acc 65.6250 (71.0268) lr 1.9048e-03 eta 5:04:09 +epoch [9/50] batch [425/500] time 0.901 (0.887) data 0.000 (0.002) loss 1.1875 (1.1751) acc 62.5000 (70.9853) lr 1.9048e-03 eta 5:04:10 +epoch [9/50] batch [430/500] time 0.892 (0.887) data 0.000 (0.002) loss 1.3564 (1.1766) acc 65.6250 (70.9666) lr 1.9048e-03 eta 5:04:06 +epoch [9/50] batch [435/500] time 0.854 (0.887) data 0.000 (0.002) loss 1.0645 (1.1760) acc 65.6250 (70.9914) lr 1.9048e-03 eta 5:04:03 +epoch [9/50] batch [440/500] time 0.851 (0.887) data 0.000 (0.002) loss 0.9058 (1.1767) acc 75.0000 (70.9943) lr 1.9048e-03 eta 5:03:57 +epoch [9/50] batch [445/500] time 0.860 (0.887) data 0.000 (0.002) loss 1.4229 (1.1770) acc 71.8750 (71.0112) lr 1.9048e-03 eta 5:03:50 +epoch [9/50] batch [450/500] time 0.924 (0.887) data 0.000 (0.002) loss 1.2891 (1.1764) acc 68.7500 (71.0278) lr 1.9048e-03 eta 5:03:49 +epoch [9/50] batch [455/500] time 0.885 (0.887) data 0.000 (0.002) loss 0.9639 (1.1737) acc 75.0000 (71.0577) lr 1.9048e-03 eta 5:03:46 +epoch [9/50] batch [460/500] time 0.899 (0.887) data 0.000 (0.002) loss 0.8374 (1.1727) acc 81.2500 (71.0666) lr 1.9048e-03 eta 5:03:39 +epoch [9/50] batch [465/500] time 0.885 (0.887) data 0.000 (0.002) loss 0.7246 (1.1707) acc 78.1250 (71.0820) lr 1.9048e-03 eta 5:03:35 +epoch [9/50] batch [470/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.6318 (1.1717) acc 62.5000 (71.0306) lr 1.9048e-03 eta 5:03:36 +epoch [9/50] batch [475/500] time 0.887 (0.887) data 0.000 (0.002) loss 0.9473 (1.1684) acc 71.8750 (71.0921) lr 1.9048e-03 eta 5:03:31 +epoch [9/50] batch [480/500] time 0.911 (0.887) data 0.000 (0.002) loss 1.0049 (1.1667) acc 68.7500 (71.1198) lr 1.9048e-03 eta 5:03:25 +epoch [9/50] batch [485/500] time 0.922 (0.887) data 0.000 (0.002) loss 1.4219 (1.1682) acc 71.8750 (71.1340) lr 1.9048e-03 eta 5:03:23 +epoch [9/50] batch [490/500] time 0.902 (0.887) data 0.000 (0.002) loss 1.4072 (1.1678) acc 62.5000 (71.0969) lr 1.9048e-03 eta 5:03:15 +epoch [9/50] batch [495/500] time 0.870 (0.887) data 0.000 (0.002) loss 1.6943 (1.1701) acc 65.6250 (71.0543) lr 1.9048e-03 eta 5:03:11 +epoch [9/50] batch [500/500] time 0.856 (0.887) data 0.000 (0.002) loss 1.5918 (1.1712) acc 68.7500 (71.0500) lr 1.8763e-03 eta 5:03:03 +epoch [10/50] batch [5/500] time 0.884 (1.050) data 0.000 (0.159) loss 2.1504 (1.3536) acc 56.2500 (68.7500) lr 1.8763e-03 eta 5:58:37 +epoch [10/50] batch [10/500] time 1.031 (0.985) data 0.000 (0.080) loss 1.1006 (1.1807) acc 75.0000 (72.1875) lr 1.8763e-03 eta 5:36:19 +epoch [10/50] batch [15/500] time 0.875 (0.949) data 0.000 (0.053) loss 0.8911 (1.1172) acc 81.2500 (72.7083) lr 1.8763e-03 eta 5:23:59 +epoch [10/50] batch [20/500] time 0.878 (0.932) data 0.000 (0.040) loss 1.1172 (1.1261) acc 62.5000 (72.0312) lr 1.8763e-03 eta 5:18:12 +epoch [10/50] batch [25/500] time 0.903 (0.922) data 0.000 (0.032) loss 1.3555 (1.1230) acc 65.6250 (72.7500) lr 1.8763e-03 eta 5:14:28 +epoch [10/50] batch [30/500] time 0.914 (0.918) data 0.000 (0.027) loss 0.8447 (1.1326) acc 75.0000 (72.0833) lr 1.8763e-03 eta 5:13:06 +epoch [10/50] batch [35/500] time 0.921 (0.915) data 0.000 (0.023) loss 1.8857 (1.1250) acc 62.5000 (72.6786) lr 1.8763e-03 eta 5:12:11 +epoch [10/50] batch [40/500] time 0.911 (0.912) data 0.000 (0.020) loss 1.0498 (1.1348) acc 78.1250 (72.9688) lr 1.8763e-03 eta 5:10:54 +epoch [10/50] batch [45/500] time 0.873 (0.909) data 0.000 (0.018) loss 0.3232 (1.1299) acc 93.7500 (73.4028) lr 1.8763e-03 eta 5:09:53 +epoch [10/50] batch [50/500] time 0.893 (0.907) data 0.000 (0.016) loss 0.5410 (1.1030) acc 87.5000 (73.8125) lr 1.8763e-03 eta 5:09:11 +epoch [10/50] batch [55/500] time 0.880 (0.906) data 0.000 (0.015) loss 0.9692 (1.0811) acc 87.5000 (74.3182) lr 1.8763e-03 eta 5:08:40 +epoch [10/50] batch [60/500] time 0.882 (0.904) data 0.000 (0.013) loss 0.9370 (1.0920) acc 81.2500 (74.1667) lr 1.8763e-03 eta 5:08:00 +epoch [10/50] batch [65/500] time 0.897 (0.902) data 0.000 (0.012) loss 0.9111 (1.0849) acc 75.0000 (74.0385) lr 1.8763e-03 eta 5:07:17 +epoch [10/50] batch [70/500] time 0.893 (0.900) data 0.000 (0.012) loss 1.2900 (1.1127) acc 68.7500 (73.2589) lr 1.8763e-03 eta 5:06:34 +epoch [10/50] batch [75/500] time 0.899 (0.899) data 0.000 (0.011) loss 0.9663 (1.1037) acc 68.7500 (73.3750) lr 1.8763e-03 eta 5:05:52 +epoch [10/50] batch [80/500] time 0.859 (0.898) data 0.000 (0.010) loss 1.0654 (1.1041) acc 78.1250 (73.3203) lr 1.8763e-03 eta 5:05:32 +epoch [10/50] batch [85/500] time 0.889 (0.896) data 0.000 (0.010) loss 1.3232 (1.0931) acc 62.5000 (73.3824) lr 1.8763e-03 eta 5:04:59 +epoch [10/50] batch [90/500] time 0.857 (0.896) data 0.000 (0.009) loss 1.3809 (1.0993) acc 71.8750 (73.2292) lr 1.8763e-03 eta 5:04:38 +epoch [10/50] batch [95/500] time 0.884 (0.895) data 0.000 (0.009) loss 0.8804 (1.0902) acc 78.1250 (73.4211) lr 1.8763e-03 eta 5:04:27 +epoch [10/50] batch [100/500] time 0.905 (0.895) data 0.000 (0.008) loss 1.1650 (1.0939) acc 65.6250 (73.4688) lr 1.8763e-03 eta 5:04:15 +epoch [10/50] batch [105/500] time 0.924 (0.894) data 0.000 (0.008) loss 0.8394 (1.1017) acc 84.3750 (73.6905) lr 1.8763e-03 eta 5:03:58 +epoch [10/50] batch [110/500] time 0.870 (0.894) data 0.000 (0.007) loss 1.0596 (1.0973) acc 68.7500 (73.6364) lr 1.8763e-03 eta 5:03:55 +epoch [10/50] batch [115/500] time 0.920 (0.894) data 0.000 (0.007) loss 1.0586 (1.0987) acc 78.1250 (73.6141) lr 1.8763e-03 eta 5:03:53 +epoch [10/50] batch [120/500] time 0.852 (0.894) data 0.000 (0.007) loss 1.1074 (1.1018) acc 81.2500 (73.6198) lr 1.8763e-03 eta 5:03:29 +epoch [10/50] batch [125/500] time 0.872 (0.893) data 0.000 (0.007) loss 1.3740 (1.1053) acc 78.1250 (73.3250) lr 1.8763e-03 eta 5:03:12 +epoch [10/50] batch [130/500] time 0.885 (0.893) data 0.000 (0.006) loss 1.4102 (1.1044) acc 65.6250 (73.2933) lr 1.8763e-03 eta 5:03:03 +epoch [10/50] batch [135/500] time 0.899 (0.893) data 0.000 (0.006) loss 0.7695 (1.1013) acc 84.3750 (73.3796) lr 1.8763e-03 eta 5:03:05 +epoch [10/50] batch [140/500] time 0.896 (0.893) data 0.000 (0.006) loss 1.0664 (1.1030) acc 62.5000 (73.1473) lr 1.8763e-03 eta 5:03:00 +epoch [10/50] batch [145/500] time 0.901 (0.893) data 0.000 (0.006) loss 1.5859 (1.0976) acc 65.6250 (73.2759) lr 1.8763e-03 eta 5:02:59 +epoch [10/50] batch [150/500] time 0.905 (0.893) data 0.000 (0.006) loss 1.0098 (1.1005) acc 68.7500 (73.1458) lr 1.8763e-03 eta 5:02:48 +epoch [10/50] batch [155/500] time 0.882 (0.893) data 0.000 (0.005) loss 1.4111 (1.1039) acc 71.8750 (73.0444) lr 1.8763e-03 eta 5:02:48 +epoch [10/50] batch [160/500] time 0.898 (0.893) data 0.000 (0.005) loss 0.9966 (1.1013) acc 75.0000 (73.0469) lr 1.8763e-03 eta 5:02:40 +epoch [10/50] batch [165/500] time 0.876 (0.892) data 0.000 (0.005) loss 2.4551 (1.1079) acc 56.2500 (73.0303) lr 1.8763e-03 eta 5:02:26 +epoch [10/50] batch [170/500] time 0.859 (0.892) data 0.000 (0.005) loss 1.1074 (1.1095) acc 65.6250 (72.8676) lr 1.8763e-03 eta 5:02:10 +epoch [10/50] batch [175/500] time 0.859 (0.891) data 0.000 (0.005) loss 1.5557 (1.1172) acc 71.8750 (72.7857) lr 1.8763e-03 eta 5:01:57 +epoch [10/50] batch [180/500] time 0.848 (0.891) data 0.000 (0.005) loss 0.5786 (1.1164) acc 75.0000 (72.7604) lr 1.8763e-03 eta 5:01:42 +epoch [10/50] batch [185/500] time 0.885 (0.891) data 0.000 (0.005) loss 0.9282 (1.1176) acc 78.1250 (72.6689) lr 1.8763e-03 eta 5:01:34 +epoch [10/50] batch [190/500] time 0.902 (0.891) data 0.000 (0.004) loss 0.7295 (1.1254) acc 84.3750 (72.5000) lr 1.8763e-03 eta 5:01:27 +epoch [10/50] batch [195/500] time 0.863 (0.891) data 0.000 (0.004) loss 2.1543 (1.1321) acc 50.0000 (72.3397) lr 1.8763e-03 eta 5:01:23 +epoch [10/50] batch [200/500] time 0.905 (0.891) data 0.000 (0.004) loss 0.6162 (1.1289) acc 81.2500 (72.2812) lr 1.8763e-03 eta 5:01:18 +epoch [10/50] batch [205/500] time 0.878 (0.890) data 0.000 (0.004) loss 1.1445 (1.1295) acc 65.6250 (72.2561) lr 1.8763e-03 eta 5:01:06 +epoch [10/50] batch [210/500] time 0.900 (0.890) data 0.000 (0.004) loss 1.5234 (1.1335) acc 75.0000 (72.2173) lr 1.8763e-03 eta 5:01:01 +epoch [10/50] batch [215/500] time 0.883 (0.890) data 0.000 (0.004) loss 1.0635 (1.1316) acc 75.0000 (72.2965) lr 1.8763e-03 eta 5:00:51 +epoch [10/50] batch [220/500] time 0.878 (0.890) data 0.000 (0.004) loss 1.4346 (1.1349) acc 75.0000 (72.2443) lr 1.8763e-03 eta 5:00:46 +epoch [10/50] batch [225/500] time 0.901 (0.890) data 0.000 (0.004) loss 1.8955 (1.1333) acc 59.3750 (72.2778) lr 1.8763e-03 eta 5:00:40 +epoch [10/50] batch [230/500] time 0.868 (0.890) data 0.000 (0.004) loss 0.9829 (1.1275) acc 75.0000 (72.3098) lr 1.8763e-03 eta 5:00:30 +epoch [10/50] batch [235/500] time 0.887 (0.889) data 0.000 (0.004) loss 1.3701 (1.1300) acc 71.8750 (72.2606) lr 1.8763e-03 eta 5:00:17 +epoch [10/50] batch [240/500] time 0.872 (0.889) data 0.000 (0.004) loss 1.3408 (1.1308) acc 71.8750 (72.2786) lr 1.8763e-03 eta 5:00:05 +epoch [10/50] batch [245/500] time 0.900 (0.889) data 0.000 (0.003) loss 1.3838 (1.1370) acc 65.6250 (72.1556) lr 1.8763e-03 eta 4:59:57 +epoch [10/50] batch [250/500] time 0.987 (0.889) data 0.000 (0.003) loss 1.3057 (1.1371) acc 75.0000 (72.2250) lr 1.8763e-03 eta 4:59:59 +epoch [10/50] batch [255/500] time 0.902 (0.889) data 0.000 (0.003) loss 1.8984 (1.1401) acc 53.1250 (72.1936) lr 1.8763e-03 eta 4:59:51 +epoch [10/50] batch [260/500] time 0.909 (0.889) data 0.000 (0.003) loss 0.8579 (1.1381) acc 81.2500 (72.1875) lr 1.8763e-03 eta 4:59:49 +epoch [10/50] batch [265/500] time 0.851 (0.889) data 0.000 (0.003) loss 1.0430 (1.1405) acc 71.8750 (72.1344) lr 1.8763e-03 eta 4:59:39 +epoch [10/50] batch [270/500] time 0.892 (0.888) data 0.000 (0.003) loss 1.0801 (1.1426) acc 65.6250 (72.0602) lr 1.8763e-03 eta 4:59:33 +epoch [10/50] batch [275/500] time 0.892 (0.888) data 0.000 (0.003) loss 0.6685 (1.1367) acc 81.2500 (72.1932) lr 1.8763e-03 eta 4:59:26 +epoch [10/50] batch [280/500] time 0.897 (0.888) data 0.000 (0.003) loss 0.8770 (1.1384) acc 75.0000 (72.1987) lr 1.8763e-03 eta 4:59:21 +epoch [10/50] batch [285/500] time 0.877 (0.888) data 0.000 (0.003) loss 1.2305 (1.1374) acc 56.2500 (72.1930) lr 1.8763e-03 eta 4:59:15 +epoch [10/50] batch [290/500] time 0.846 (0.888) data 0.000 (0.003) loss 1.2500 (1.1392) acc 65.6250 (72.2198) lr 1.8763e-03 eta 4:59:06 +epoch [10/50] batch [295/500] time 0.892 (0.888) data 0.000 (0.003) loss 0.7837 (1.1360) acc 78.1250 (72.2987) lr 1.8763e-03 eta 4:59:10 +epoch [10/50] batch [300/500] time 0.878 (0.888) data 0.000 (0.003) loss 1.3730 (1.1374) acc 59.3750 (72.2292) lr 1.8763e-03 eta 4:59:02 +epoch [10/50] batch [305/500] time 0.912 (0.888) data 0.000 (0.003) loss 1.3701 (1.1359) acc 71.8750 (72.1926) lr 1.8763e-03 eta 4:58:53 +epoch [10/50] batch [310/500] time 0.858 (0.888) data 0.000 (0.003) loss 1.3877 (1.1336) acc 68.7500 (72.2480) lr 1.8763e-03 eta 4:58:40 +epoch [10/50] batch [315/500] time 0.909 (0.887) data 0.000 (0.003) loss 1.3193 (1.1313) acc 78.1250 (72.3313) lr 1.8763e-03 eta 4:58:33 +epoch [10/50] batch [320/500] time 0.880 (0.887) data 0.000 (0.003) loss 1.5586 (1.1345) acc 59.3750 (72.2949) lr 1.8763e-03 eta 4:58:29 +epoch [10/50] batch [325/500] time 0.911 (0.887) data 0.000 (0.003) loss 2.1094 (1.1402) acc 62.5000 (72.2596) lr 1.8763e-03 eta 4:58:24 +epoch [10/50] batch [330/500] time 0.897 (0.888) data 0.000 (0.003) loss 0.8496 (1.1413) acc 84.3750 (72.2822) lr 1.8763e-03 eta 4:58:23 +epoch [10/50] batch [335/500] time 0.890 (0.888) data 0.000 (0.003) loss 1.4375 (1.1428) acc 68.7500 (72.3041) lr 1.8763e-03 eta 4:58:21 +epoch [10/50] batch [340/500] time 0.852 (0.887) data 0.000 (0.003) loss 1.3975 (1.1422) acc 59.3750 (72.2978) lr 1.8763e-03 eta 4:58:11 +epoch [10/50] batch [345/500] time 0.835 (0.887) data 0.000 (0.003) loss 0.9033 (1.1434) acc 68.7500 (72.2826) lr 1.8763e-03 eta 4:57:58 +epoch [10/50] batch [350/500] time 0.887 (0.887) data 0.000 (0.003) loss 1.6367 (1.1416) acc 53.1250 (72.2768) lr 1.8763e-03 eta 4:57:54 +epoch [10/50] batch [355/500] time 0.875 (0.887) data 0.000 (0.002) loss 2.1426 (1.1454) acc 59.3750 (72.2183) lr 1.8763e-03 eta 4:57:48 +epoch [10/50] batch [360/500] time 0.861 (0.887) data 0.000 (0.002) loss 0.8091 (1.1444) acc 75.0000 (72.1962) lr 1.8763e-03 eta 4:57:42 +epoch [10/50] batch [365/500] time 0.862 (0.887) data 0.000 (0.002) loss 0.7432 (1.1458) acc 87.5000 (72.2346) lr 1.8763e-03 eta 4:57:35 +epoch [10/50] batch [370/500] time 0.858 (0.887) data 0.000 (0.002) loss 1.2139 (1.1500) acc 68.7500 (72.1537) lr 1.8763e-03 eta 4:57:29 +epoch [10/50] batch [375/500] time 0.873 (0.887) data 0.000 (0.002) loss 1.0576 (1.1498) acc 75.0000 (72.1250) lr 1.8763e-03 eta 4:57:23 +epoch [10/50] batch [380/500] time 0.888 (0.887) data 0.000 (0.002) loss 0.9478 (1.1482) acc 75.0000 (72.1382) lr 1.8763e-03 eta 4:57:18 +epoch [10/50] batch [385/500] time 0.866 (0.887) data 0.000 (0.002) loss 1.1055 (1.1478) acc 71.8750 (72.1023) lr 1.8763e-03 eta 4:57:12 +epoch [10/50] batch [390/500] time 0.903 (0.886) data 0.000 (0.002) loss 1.1826 (1.1463) acc 78.1250 (72.1314) lr 1.8763e-03 eta 4:57:06 +epoch [10/50] batch [395/500] time 0.859 (0.887) data 0.000 (0.002) loss 0.7197 (1.1448) acc 81.2500 (72.0728) lr 1.8763e-03 eta 4:57:03 +epoch [10/50] batch [400/500] time 0.880 (0.886) data 0.000 (0.002) loss 1.4072 (1.1433) acc 65.6250 (72.0938) lr 1.8763e-03 eta 4:56:58 +epoch [10/50] batch [405/500] time 0.909 (0.886) data 0.000 (0.002) loss 1.1631 (1.1422) acc 78.1250 (72.0988) lr 1.8763e-03 eta 4:56:53 +epoch [10/50] batch [410/500] time 0.854 (0.886) data 0.000 (0.002) loss 1.2461 (1.1429) acc 65.6250 (72.0808) lr 1.8763e-03 eta 4:56:45 +epoch [10/50] batch [415/500] time 0.886 (0.886) data 0.000 (0.002) loss 0.9824 (1.1440) acc 84.3750 (72.0708) lr 1.8763e-03 eta 4:56:40 +epoch [10/50] batch [420/500] time 0.909 (0.886) data 0.000 (0.002) loss 1.0801 (1.1446) acc 75.0000 (72.0610) lr 1.8763e-03 eta 4:56:36 +epoch [10/50] batch [425/500] time 0.895 (0.886) data 0.000 (0.002) loss 0.7788 (1.1407) acc 71.8750 (72.1103) lr 1.8763e-03 eta 4:56:31 +epoch [10/50] batch [430/500] time 0.851 (0.886) data 0.000 (0.002) loss 1.2705 (1.1421) acc 62.5000 (72.0494) lr 1.8763e-03 eta 4:56:23 +epoch [10/50] batch [435/500] time 0.863 (0.886) data 0.000 (0.002) loss 0.9888 (1.1447) acc 68.7500 (72.0115) lr 1.8763e-03 eta 4:56:17 +epoch [10/50] batch [440/500] time 0.894 (0.886) data 0.000 (0.002) loss 0.9629 (1.1438) acc 75.0000 (72.0455) lr 1.8763e-03 eta 4:56:18 +epoch [10/50] batch [445/500] time 0.908 (0.886) data 0.000 (0.002) loss 1.3604 (1.1451) acc 62.5000 (72.0014) lr 1.8763e-03 eta 4:56:14 +epoch [10/50] batch [450/500] time 0.902 (0.886) data 0.000 (0.002) loss 1.5068 (1.1466) acc 56.2500 (71.9306) lr 1.8763e-03 eta 4:56:10 +epoch [10/50] batch [455/500] time 0.893 (0.886) data 0.000 (0.002) loss 1.0303 (1.1475) acc 78.1250 (71.9299) lr 1.8763e-03 eta 4:56:07 +epoch [10/50] batch [460/500] time 0.908 (0.887) data 0.000 (0.002) loss 1.1650 (1.1494) acc 71.8750 (71.9497) lr 1.8763e-03 eta 4:56:07 +epoch [10/50] batch [465/500] time 0.903 (0.887) data 0.000 (0.002) loss 0.9771 (1.1504) acc 68.7500 (71.9153) lr 1.8763e-03 eta 4:56:05 +epoch [10/50] batch [470/500] time 0.895 (0.887) data 0.000 (0.002) loss 0.9370 (1.1522) acc 71.8750 (71.8351) lr 1.8763e-03 eta 4:56:00 +epoch [10/50] batch [475/500] time 0.897 (0.887) data 0.000 (0.002) loss 0.3477 (1.1518) acc 96.8750 (71.8816) lr 1.8763e-03 eta 4:55:55 +epoch [10/50] batch [480/500] time 0.917 (0.887) data 0.000 (0.002) loss 1.1230 (1.1491) acc 75.0000 (71.9466) lr 1.8763e-03 eta 4:55:55 +epoch [10/50] batch [485/500] time 0.894 (0.887) data 0.000 (0.002) loss 1.3076 (1.1506) acc 59.3750 (71.8814) lr 1.8763e-03 eta 4:55:50 +epoch [10/50] batch [490/500] time 0.895 (0.887) data 0.000 (0.002) loss 0.6260 (1.1493) acc 81.2500 (71.9324) lr 1.8763e-03 eta 4:55:46 +epoch [10/50] batch [495/500] time 0.905 (0.887) data 0.000 (0.002) loss 1.6963 (1.1493) acc 53.1250 (71.9129) lr 1.8763e-03 eta 4:55:40 +epoch [10/50] batch [500/500] time 0.852 (0.887) data 0.000 (0.002) loss 0.3938 (1.1491) acc 90.6250 (71.9125) lr 1.8443e-03 eta 4:55:33 +epoch [11/50] batch [5/500] time 0.879 (1.035) data 0.000 (0.142) loss 0.8408 (1.0895) acc 75.0000 (71.8750) lr 1.8443e-03 eta 5:44:50 +epoch [11/50] batch [10/500] time 0.882 (0.966) data 0.000 (0.071) loss 0.8779 (1.1280) acc 84.3750 (71.8750) lr 1.8443e-03 eta 5:21:54 +epoch [11/50] batch [15/500] time 0.885 (0.938) data 0.000 (0.048) loss 0.8896 (1.1367) acc 78.1250 (72.9167) lr 1.8443e-03 eta 5:12:24 +epoch [11/50] batch [20/500] time 0.875 (0.925) data 0.000 (0.036) loss 1.0244 (1.1328) acc 71.8750 (72.6562) lr 1.8443e-03 eta 5:08:01 +epoch [11/50] batch [25/500] time 0.868 (0.922) data 0.000 (0.029) loss 1.3799 (1.1299) acc 62.5000 (72.7500) lr 1.8443e-03 eta 5:06:56 +epoch [11/50] batch [30/500] time 0.880 (0.914) data 0.000 (0.024) loss 0.7324 (1.1589) acc 84.3750 (71.7708) lr 1.8443e-03 eta 5:04:15 +epoch [11/50] batch [35/500] time 0.862 (0.909) data 0.000 (0.021) loss 0.9478 (1.1197) acc 75.0000 (72.5893) lr 1.8443e-03 eta 5:02:37 +epoch [11/50] batch [40/500] time 0.878 (0.905) data 0.000 (0.018) loss 1.3711 (1.1432) acc 75.0000 (71.5625) lr 1.8443e-03 eta 5:01:08 +epoch [11/50] batch [45/500] time 0.885 (0.902) data 0.000 (0.016) loss 0.7939 (1.1383) acc 75.0000 (71.8056) lr 1.8443e-03 eta 5:00:01 +epoch [11/50] batch [50/500] time 0.904 (0.900) data 0.000 (0.014) loss 1.0723 (1.1322) acc 71.8750 (72.1250) lr 1.8443e-03 eta 4:59:18 +epoch [11/50] batch [55/500] time 0.871 (0.898) data 0.000 (0.013) loss 1.0107 (1.1400) acc 78.1250 (71.9886) lr 1.8443e-03 eta 4:58:39 +epoch [11/50] batch [60/500] time 0.876 (0.898) data 0.000 (0.012) loss 1.2744 (1.1441) acc 75.0000 (71.9792) lr 1.8443e-03 eta 4:58:20 +epoch [11/50] batch [65/500] time 0.873 (0.896) data 0.000 (0.011) loss 1.0498 (1.1274) acc 68.7500 (72.3077) lr 1.8443e-03 eta 4:57:32 +epoch [11/50] batch [70/500] time 0.914 (0.894) data 0.000 (0.010) loss 0.9282 (1.1164) acc 71.8750 (72.6786) lr 1.8443e-03 eta 4:57:06 +epoch [11/50] batch [75/500] time 0.876 (0.894) data 0.000 (0.010) loss 0.9380 (1.1165) acc 78.1250 (72.8333) lr 1.8443e-03 eta 4:56:52 +epoch [11/50] batch [80/500] time 0.904 (0.894) data 0.000 (0.009) loss 1.0518 (1.1334) acc 71.8750 (72.4609) lr 1.8443e-03 eta 4:56:47 +epoch [11/50] batch [85/500] time 0.880 (0.895) data 0.000 (0.009) loss 1.2812 (1.1401) acc 68.7500 (72.2794) lr 1.8443e-03 eta 4:57:00 +epoch [11/50] batch [90/500] time 0.881 (0.895) data 0.000 (0.008) loss 1.5811 (1.1383) acc 62.5000 (72.0833) lr 1.8443e-03 eta 4:57:01 +epoch [11/50] batch [95/500] time 0.892 (0.895) data 0.000 (0.008) loss 1.2705 (1.1364) acc 62.5000 (71.9737) lr 1.8443e-03 eta 4:56:57 +epoch [11/50] batch [100/500] time 0.937 (0.896) data 0.000 (0.007) loss 1.3066 (1.1448) acc 68.7500 (71.6875) lr 1.8443e-03 eta 4:57:06 +epoch [11/50] batch [105/500] time 0.908 (0.896) data 0.000 (0.007) loss 0.6602 (1.1419) acc 81.2500 (71.4583) lr 1.8443e-03 eta 4:57:06 +epoch [11/50] batch [110/500] time 0.878 (0.896) data 0.000 (0.007) loss 1.3965 (1.1520) acc 62.5000 (71.0795) lr 1.8443e-03 eta 4:57:04 +epoch [11/50] batch [115/500] time 0.852 (0.896) data 0.000 (0.006) loss 0.8599 (1.1415) acc 68.7500 (71.4130) lr 1.8443e-03 eta 4:56:48 +epoch [11/50] batch [120/500] time 0.894 (0.895) data 0.000 (0.006) loss 1.2148 (1.1391) acc 62.5000 (71.3021) lr 1.8443e-03 eta 4:56:27 +epoch [11/50] batch [125/500] time 0.867 (0.894) data 0.000 (0.006) loss 1.5186 (1.1469) acc 65.6250 (71.1500) lr 1.8443e-03 eta 4:56:08 +epoch [11/50] batch [130/500] time 0.869 (0.895) data 0.000 (0.006) loss 1.9180 (1.1569) acc 71.8750 (71.2740) lr 1.8443e-03 eta 4:56:16 +epoch [11/50] batch [135/500] time 0.893 (0.893) data 0.000 (0.005) loss 1.0713 (1.1571) acc 78.1250 (71.3194) lr 1.8443e-03 eta 4:55:48 +epoch [11/50] batch [140/500] time 0.867 (0.893) data 0.000 (0.005) loss 1.1035 (1.1644) acc 78.1250 (71.1384) lr 1.8443e-03 eta 4:55:28 +epoch [11/50] batch [145/500] time 0.846 (0.892) data 0.000 (0.005) loss 1.5762 (1.1691) acc 59.3750 (70.8405) lr 1.8443e-03 eta 4:55:10 +epoch [11/50] batch [150/500] time 0.871 (0.892) data 0.000 (0.005) loss 0.5059 (1.1669) acc 81.2500 (70.9167) lr 1.8443e-03 eta 4:54:59 +epoch [11/50] batch [155/500] time 0.900 (0.891) data 0.000 (0.005) loss 1.0771 (1.1649) acc 68.7500 (70.9879) lr 1.8443e-03 eta 4:54:43 +epoch [11/50] batch [160/500] time 0.888 (0.891) data 0.000 (0.005) loss 1.2910 (1.1643) acc 65.6250 (70.9961) lr 1.8443e-03 eta 4:54:31 +epoch [11/50] batch [165/500] time 0.887 (0.890) data 0.000 (0.005) loss 0.8257 (1.1653) acc 81.2500 (71.0795) lr 1.8443e-03 eta 4:54:19 +epoch [11/50] batch [170/500] time 0.869 (0.890) data 0.000 (0.004) loss 1.1533 (1.1592) acc 71.8750 (71.0478) lr 1.8443e-03 eta 4:54:17 +epoch [11/50] batch [175/500] time 0.892 (0.891) data 0.000 (0.004) loss 0.6948 (1.1544) acc 75.0000 (71.0893) lr 1.8443e-03 eta 4:54:17 +epoch [11/50] batch [180/500] time 0.903 (0.891) data 0.000 (0.004) loss 0.9258 (1.1545) acc 78.1250 (71.0069) lr 1.8443e-03 eta 4:54:14 +epoch [11/50] batch [185/500] time 0.888 (0.891) data 0.000 (0.004) loss 1.0898 (1.1499) acc 71.8750 (71.1318) lr 1.8443e-03 eta 4:54:09 +epoch [11/50] batch [190/500] time 0.882 (0.890) data 0.000 (0.004) loss 0.6807 (1.1438) acc 81.2500 (71.2829) lr 1.8443e-03 eta 4:53:58 +epoch [11/50] batch [195/500] time 0.871 (0.891) data 0.000 (0.004) loss 0.6313 (1.1401) acc 81.2500 (71.3301) lr 1.8443e-03 eta 4:53:58 +epoch [11/50] batch [200/500] time 0.895 (0.890) data 0.000 (0.004) loss 1.5674 (1.1410) acc 68.7500 (71.2656) lr 1.8443e-03 eta 4:53:48 +epoch [11/50] batch [205/500] time 0.868 (0.890) data 0.000 (0.004) loss 0.9868 (1.1470) acc 75.0000 (71.2500) lr 1.8443e-03 eta 4:53:36 +epoch [11/50] batch [210/500] time 0.888 (0.890) data 0.000 (0.004) loss 0.9771 (1.1503) acc 68.7500 (71.1607) lr 1.8443e-03 eta 4:53:27 +epoch [11/50] batch [215/500] time 0.893 (0.890) data 0.000 (0.004) loss 1.6309 (1.1465) acc 71.8750 (71.2645) lr 1.8443e-03 eta 4:53:22 +epoch [11/50] batch [220/500] time 0.892 (0.890) data 0.000 (0.003) loss 1.1855 (1.1487) acc 71.8750 (71.3210) lr 1.8443e-03 eta 4:53:16 +epoch [11/50] batch [225/500] time 0.886 (0.890) data 0.000 (0.003) loss 1.4902 (1.1466) acc 78.1250 (71.4306) lr 1.8443e-03 eta 4:53:10 +epoch [11/50] batch [230/500] time 0.874 (0.890) data 0.000 (0.003) loss 1.4326 (1.1510) acc 75.0000 (71.3723) lr 1.8443e-03 eta 4:53:12 +epoch [11/50] batch [235/500] time 0.884 (0.889) data 0.000 (0.003) loss 1.2363 (1.1488) acc 75.0000 (71.4229) lr 1.8443e-03 eta 4:53:00 +epoch [11/50] batch [240/500] time 0.891 (0.889) data 0.000 (0.003) loss 0.9385 (1.1453) acc 78.1250 (71.4974) lr 1.8443e-03 eta 4:52:49 +epoch [11/50] batch [245/500] time 0.906 (0.889) data 0.000 (0.003) loss 1.2314 (1.1475) acc 78.1250 (71.5689) lr 1.8443e-03 eta 4:52:45 +epoch [11/50] batch [250/500] time 0.884 (0.889) data 0.000 (0.003) loss 0.8257 (1.1441) acc 71.8750 (71.5875) lr 1.8443e-03 eta 4:52:42 +epoch [11/50] batch [255/500] time 0.921 (0.889) data 0.000 (0.003) loss 0.6748 (1.1438) acc 81.2500 (71.5809) lr 1.8443e-03 eta 4:52:38 +epoch [11/50] batch [260/500] time 0.866 (0.889) data 0.000 (0.003) loss 1.2588 (1.1423) acc 62.5000 (71.5745) lr 1.8443e-03 eta 4:52:25 +epoch [11/50] batch [265/500] time 0.897 (0.889) data 0.000 (0.003) loss 0.7056 (1.1429) acc 81.2500 (71.5920) lr 1.8443e-03 eta 4:52:16 +epoch [11/50] batch [270/500] time 0.885 (0.888) data 0.000 (0.003) loss 0.8203 (1.1421) acc 84.3750 (71.6088) lr 1.8443e-03 eta 4:52:04 +epoch [11/50] batch [275/500] time 0.870 (0.888) data 0.000 (0.003) loss 0.6807 (1.1404) acc 81.2500 (71.6477) lr 1.8443e-03 eta 4:52:02 +epoch [11/50] batch [280/500] time 0.891 (0.888) data 0.000 (0.003) loss 1.5068 (1.1412) acc 65.6250 (71.6406) lr 1.8443e-03 eta 4:51:56 +epoch [11/50] batch [285/500] time 0.883 (0.888) data 0.000 (0.003) loss 0.6001 (1.1431) acc 84.3750 (71.6009) lr 1.8443e-03 eta 4:51:46 +epoch [11/50] batch [290/500] time 0.865 (0.888) data 0.000 (0.003) loss 1.1748 (1.1442) acc 68.7500 (71.5625) lr 1.8443e-03 eta 4:51:40 +epoch [11/50] batch [295/500] time 0.900 (0.888) data 0.000 (0.003) loss 0.7368 (1.1431) acc 81.2500 (71.6102) lr 1.8443e-03 eta 4:51:33 +epoch [11/50] batch [300/500] time 0.902 (0.888) data 0.000 (0.003) loss 1.5898 (1.1470) acc 53.1250 (71.4896) lr 1.8443e-03 eta 4:51:31 +epoch [11/50] batch [305/500] time 0.915 (0.888) data 0.000 (0.003) loss 1.0762 (1.1492) acc 62.5000 (71.4242) lr 1.8443e-03 eta 4:51:27 +epoch [11/50] batch [310/500] time 0.896 (0.888) data 0.000 (0.003) loss 1.0107 (1.1479) acc 75.0000 (71.4617) lr 1.8443e-03 eta 4:51:23 +epoch [11/50] batch [315/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.0234 (1.1485) acc 68.7500 (71.3790) lr 1.8443e-03 eta 4:51:19 +epoch [11/50] batch [320/500] time 0.881 (0.888) data 0.000 (0.002) loss 1.4775 (1.1494) acc 78.1250 (71.4160) lr 1.8443e-03 eta 4:51:15 +epoch [11/50] batch [325/500] time 0.905 (0.888) data 0.000 (0.002) loss 1.0908 (1.1510) acc 65.6250 (71.3654) lr 1.8443e-03 eta 4:51:11 +epoch [11/50] batch [330/500] time 0.866 (0.888) data 0.000 (0.002) loss 1.0254 (1.1489) acc 71.8750 (71.4110) lr 1.8443e-03 eta 4:51:06 +epoch [11/50] batch [335/500] time 0.926 (0.888) data 0.000 (0.002) loss 0.7363 (1.1465) acc 78.1250 (71.4179) lr 1.8443e-03 eta 4:51:06 +epoch [11/50] batch [340/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.2432 (1.1468) acc 75.0000 (71.3971) lr 1.8443e-03 eta 4:50:59 +epoch [11/50] batch [345/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.3154 (1.1480) acc 62.5000 (71.4040) lr 1.8443e-03 eta 4:50:52 +epoch [11/50] batch [350/500] time 0.912 (0.888) data 0.000 (0.002) loss 0.5068 (1.1470) acc 81.2500 (71.4286) lr 1.8443e-03 eta 4:50:50 +epoch [11/50] batch [355/500] time 0.895 (0.888) data 0.000 (0.002) loss 1.0635 (1.1461) acc 71.8750 (71.4701) lr 1.8443e-03 eta 4:50:48 +epoch [11/50] batch [360/500] time 0.882 (0.888) data 0.000 (0.002) loss 1.3008 (1.1455) acc 71.8750 (71.5104) lr 1.8443e-03 eta 4:50:43 +epoch [11/50] batch [365/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.3301 (1.1463) acc 71.8750 (71.4640) lr 1.8443e-03 eta 4:50:36 +epoch [11/50] batch [370/500] time 1.000 (0.888) data 0.000 (0.002) loss 0.6982 (1.1440) acc 81.2500 (71.4949) lr 1.8443e-03 eta 4:50:34 +epoch [11/50] batch [375/500] time 0.891 (0.888) data 0.000 (0.002) loss 1.6094 (1.1456) acc 68.7500 (71.4750) lr 1.8443e-03 eta 4:50:34 +epoch [11/50] batch [380/500] time 0.890 (0.888) data 0.000 (0.002) loss 0.8960 (1.1470) acc 78.1250 (71.4227) lr 1.8443e-03 eta 4:50:29 +epoch [11/50] batch [385/500] time 0.877 (0.888) data 0.000 (0.002) loss 1.2939 (1.1461) acc 62.5000 (71.4205) lr 1.8443e-03 eta 4:50:21 +epoch [11/50] batch [390/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.2764 (1.1478) acc 71.8750 (71.4103) lr 1.8443e-03 eta 4:50:16 +epoch [11/50] batch [395/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.2812 (1.1471) acc 62.5000 (71.3845) lr 1.8443e-03 eta 4:50:09 +epoch [11/50] batch [400/500] time 0.884 (0.888) data 0.000 (0.002) loss 2.1133 (1.1507) acc 59.3750 (71.2891) lr 1.8443e-03 eta 4:50:05 +epoch [11/50] batch [405/500] time 0.866 (0.888) data 0.000 (0.002) loss 1.3906 (1.1551) acc 78.1250 (71.2037) lr 1.8443e-03 eta 4:49:55 +epoch [11/50] batch [410/500] time 0.863 (0.888) data 0.000 (0.002) loss 1.6465 (1.1572) acc 59.3750 (71.1509) lr 1.8443e-03 eta 4:49:46 +epoch [11/50] batch [415/500] time 0.882 (0.888) data 0.000 (0.002) loss 0.8911 (1.1569) acc 81.2500 (71.1898) lr 1.8443e-03 eta 4:49:50 +epoch [11/50] batch [420/500] time 0.932 (0.888) data 0.000 (0.002) loss 0.6919 (1.1555) acc 84.3750 (71.2574) lr 1.8443e-03 eta 4:49:45 +epoch [11/50] batch [425/500] time 0.895 (0.888) data 0.000 (0.002) loss 0.8691 (1.1545) acc 75.0000 (71.2574) lr 1.8443e-03 eta 4:49:40 +epoch [11/50] batch [430/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.4395 (1.1560) acc 62.5000 (71.2137) lr 1.8443e-03 eta 4:49:34 +epoch [11/50] batch [435/500] time 0.876 (0.888) data 0.000 (0.002) loss 1.0713 (1.1583) acc 81.2500 (71.2141) lr 1.8443e-03 eta 4:49:30 +epoch [11/50] batch [440/500] time 0.888 (0.888) data 0.000 (0.002) loss 0.9668 (1.1561) acc 71.8750 (71.2429) lr 1.8443e-03 eta 4:49:21 +epoch [11/50] batch [445/500] time 0.874 (0.888) data 0.000 (0.002) loss 1.3789 (1.1554) acc 68.7500 (71.2781) lr 1.8443e-03 eta 4:49:15 +epoch [11/50] batch [450/500] time 0.873 (0.887) data 0.000 (0.002) loss 0.9121 (1.1536) acc 71.8750 (71.3264) lr 1.8443e-03 eta 4:49:07 +epoch [11/50] batch [455/500] time 0.876 (0.887) data 0.000 (0.002) loss 0.9082 (1.1527) acc 75.0000 (71.3530) lr 1.8443e-03 eta 4:48:59 +epoch [11/50] batch [460/500] time 0.886 (0.887) data 0.000 (0.002) loss 1.4473 (1.1547) acc 59.3750 (71.3383) lr 1.8443e-03 eta 4:48:53 +epoch [11/50] batch [465/500] time 0.892 (0.887) data 0.000 (0.002) loss 0.6831 (1.1539) acc 78.1250 (71.3374) lr 1.8443e-03 eta 4:48:47 +epoch [11/50] batch [470/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.1650 (1.1543) acc 62.5000 (71.3298) lr 1.8443e-03 eta 4:48:43 +epoch [11/50] batch [475/500] time 0.873 (0.887) data 0.000 (0.002) loss 1.0625 (1.1536) acc 71.8750 (71.3618) lr 1.8443e-03 eta 4:48:39 +epoch [11/50] batch [480/500] time 0.870 (0.887) data 0.000 (0.002) loss 1.1787 (1.1559) acc 65.6250 (71.3411) lr 1.8443e-03 eta 4:48:32 +epoch [11/50] batch [485/500] time 0.887 (0.887) data 0.000 (0.002) loss 0.9238 (1.1564) acc 81.2500 (71.3595) lr 1.8443e-03 eta 4:48:27 +epoch [11/50] batch [490/500] time 0.868 (0.887) data 0.000 (0.002) loss 1.7031 (1.1591) acc 65.6250 (71.3138) lr 1.8443e-03 eta 4:48:23 +epoch [11/50] batch [495/500] time 0.894 (0.887) data 0.000 (0.002) loss 1.2549 (1.1582) acc 68.7500 (71.3194) lr 1.8443e-03 eta 4:48:21 +epoch [11/50] batch [500/500] time 0.887 (0.887) data 0.000 (0.002) loss 0.8550 (1.1583) acc 87.5000 (71.3625) lr 1.8090e-03 eta 4:48:19 +epoch [12/50] batch [5/500] time 0.920 (1.051) data 0.000 (0.149) loss 1.4990 (1.2933) acc 68.7500 (73.1250) lr 1.8090e-03 eta 5:41:28 +epoch [12/50] batch [10/500] time 0.881 (0.976) data 0.000 (0.074) loss 0.8975 (1.1593) acc 78.1250 (73.7500) lr 1.8090e-03 eta 5:17:09 +epoch [12/50] batch [15/500] time 0.872 (0.941) data 0.000 (0.050) loss 1.0576 (1.1229) acc 71.8750 (72.7083) lr 1.8090e-03 eta 5:05:33 +epoch [12/50] batch [20/500] time 0.880 (0.926) data 0.000 (0.037) loss 0.8062 (1.1192) acc 78.1250 (72.3438) lr 1.8090e-03 eta 5:00:33 +epoch [12/50] batch [25/500] time 0.865 (0.917) data 0.000 (0.030) loss 1.0361 (1.1282) acc 68.7500 (73.3750) lr 1.8090e-03 eta 4:57:30 +epoch [12/50] batch [30/500] time 0.918 (0.914) data 0.000 (0.025) loss 0.9634 (1.1400) acc 78.1250 (72.9167) lr 1.8090e-03 eta 4:56:35 +epoch [12/50] batch [35/500] time 0.885 (0.910) data 0.000 (0.021) loss 1.0840 (1.1601) acc 75.0000 (72.1429) lr 1.8090e-03 eta 4:55:04 +epoch [12/50] batch [40/500] time 0.868 (0.906) data 0.000 (0.019) loss 0.9722 (1.1342) acc 78.1250 (72.1875) lr 1.8090e-03 eta 4:53:59 +epoch [12/50] batch [45/500] time 0.877 (0.904) data 0.000 (0.017) loss 1.4746 (1.1036) acc 59.3750 (72.8472) lr 1.8090e-03 eta 4:53:06 +epoch [12/50] batch [50/500] time 0.881 (0.903) data 0.000 (0.015) loss 1.2266 (1.1108) acc 62.5000 (72.5000) lr 1.8090e-03 eta 4:52:46 +epoch [12/50] batch [55/500] time 0.908 (0.901) data 0.000 (0.014) loss 0.8906 (1.0966) acc 65.6250 (72.7273) lr 1.8090e-03 eta 4:52:05 +epoch [12/50] batch [60/500] time 0.904 (0.900) data 0.000 (0.013) loss 1.2744 (1.1144) acc 68.7500 (72.3438) lr 1.8090e-03 eta 4:51:40 +epoch [12/50] batch [65/500] time 0.896 (0.900) data 0.000 (0.012) loss 1.0781 (1.0990) acc 68.7500 (72.5000) lr 1.8090e-03 eta 4:51:21 +epoch [12/50] batch [70/500] time 0.896 (0.900) data 0.000 (0.011) loss 1.3438 (1.1003) acc 62.5000 (72.1875) lr 1.8090e-03 eta 4:51:24 +epoch [12/50] batch [75/500] time 0.876 (0.898) data 0.000 (0.010) loss 1.5449 (1.1026) acc 75.0000 (72.2500) lr 1.8090e-03 eta 4:50:40 +epoch [12/50] batch [80/500] time 0.906 (0.898) data 0.000 (0.010) loss 1.3076 (1.1062) acc 78.1250 (72.1484) lr 1.8090e-03 eta 4:50:31 +epoch [12/50] batch [85/500] time 0.896 (0.897) data 0.000 (0.009) loss 1.2754 (1.1153) acc 68.7500 (72.1324) lr 1.8090e-03 eta 4:50:18 +epoch [12/50] batch [90/500] time 0.883 (0.896) data 0.000 (0.008) loss 1.4346 (1.1208) acc 62.5000 (72.0486) lr 1.8090e-03 eta 4:49:53 +epoch [12/50] batch [95/500] time 0.860 (0.895) data 0.000 (0.008) loss 1.2383 (1.1214) acc 71.8750 (72.0066) lr 1.8090e-03 eta 4:49:36 +epoch [12/50] batch [100/500] time 0.862 (0.895) data 0.000 (0.008) loss 1.3232 (1.1183) acc 75.0000 (72.0000) lr 1.8090e-03 eta 4:49:21 +epoch [12/50] batch [105/500] time 0.891 (0.894) data 0.000 (0.007) loss 1.2285 (1.1229) acc 65.6250 (71.7857) lr 1.8090e-03 eta 4:49:02 +epoch [12/50] batch [110/500] time 0.890 (0.894) data 0.000 (0.007) loss 0.8657 (1.1288) acc 68.7500 (71.5057) lr 1.8090e-03 eta 4:48:51 +epoch [12/50] batch [115/500] time 0.871 (0.894) data 0.000 (0.007) loss 1.0420 (1.1275) acc 75.0000 (71.5489) lr 1.8090e-03 eta 4:48:58 +epoch [12/50] batch [120/500] time 0.886 (0.894) data 0.000 (0.006) loss 1.9326 (1.1236) acc 62.5000 (71.7188) lr 1.8090e-03 eta 4:48:47 +epoch [12/50] batch [125/500] time 0.904 (0.894) data 0.000 (0.006) loss 1.1641 (1.1285) acc 81.2500 (71.5750) lr 1.8090e-03 eta 4:48:50 +epoch [12/50] batch [130/500] time 0.909 (0.894) data 0.000 (0.006) loss 0.9219 (1.1273) acc 75.0000 (71.5385) lr 1.8090e-03 eta 4:48:45 +epoch [12/50] batch [135/500] time 0.865 (0.894) data 0.000 (0.006) loss 0.8726 (1.1212) acc 75.0000 (71.5972) lr 1.8090e-03 eta 4:48:32 +epoch [12/50] batch [140/500] time 0.885 (0.894) data 0.000 (0.006) loss 1.2881 (1.1269) acc 65.6250 (71.5402) lr 1.8090e-03 eta 4:48:22 +epoch [12/50] batch [145/500] time 0.872 (0.893) data 0.000 (0.005) loss 1.0693 (1.1231) acc 71.8750 (71.7026) lr 1.8090e-03 eta 4:47:57 +epoch [12/50] batch [150/500] time 0.903 (0.893) data 0.000 (0.005) loss 0.6704 (1.1230) acc 87.5000 (71.8125) lr 1.8090e-03 eta 4:47:53 +epoch [12/50] batch [155/500] time 0.883 (0.892) data 0.000 (0.005) loss 1.0010 (1.1283) acc 78.1250 (71.6331) lr 1.8090e-03 eta 4:47:40 +epoch [12/50] batch [160/500] time 0.865 (0.892) data 0.000 (0.005) loss 1.3516 (1.1387) acc 71.8750 (71.5039) lr 1.8090e-03 eta 4:47:36 +epoch [12/50] batch [165/500] time 0.872 (0.892) data 0.000 (0.005) loss 0.9253 (1.1445) acc 78.1250 (71.4205) lr 1.8090e-03 eta 4:47:18 +epoch [12/50] batch [170/500] time 0.900 (0.891) data 0.000 (0.005) loss 1.1377 (1.1476) acc 68.7500 (71.3419) lr 1.8090e-03 eta 4:47:10 +epoch [12/50] batch [175/500] time 0.865 (0.891) data 0.000 (0.004) loss 1.6738 (1.1475) acc 56.2500 (71.3036) lr 1.8090e-03 eta 4:47:02 +epoch [12/50] batch [180/500] time 0.896 (0.891) data 0.000 (0.004) loss 1.1230 (1.1493) acc 71.8750 (71.1632) lr 1.8090e-03 eta 4:46:56 +epoch [12/50] batch [185/500] time 0.902 (0.891) data 0.000 (0.004) loss 1.2246 (1.1540) acc 68.7500 (70.9966) lr 1.8090e-03 eta 4:46:56 +epoch [12/50] batch [190/500] time 0.903 (0.891) data 0.000 (0.004) loss 1.0283 (1.1517) acc 71.8750 (70.9868) lr 1.8090e-03 eta 4:46:54 +epoch [12/50] batch [195/500] time 0.858 (0.891) data 0.000 (0.004) loss 1.2061 (1.1500) acc 68.7500 (71.0577) lr 1.8090e-03 eta 4:46:48 +epoch [12/50] batch [200/500] time 0.892 (0.891) data 0.000 (0.004) loss 2.2832 (1.1575) acc 53.1250 (70.9375) lr 1.8090e-03 eta 4:46:40 +epoch [12/50] batch [205/500] time 0.901 (0.891) data 0.000 (0.004) loss 0.7236 (1.1527) acc 78.1250 (71.0518) lr 1.8090e-03 eta 4:46:29 +epoch [12/50] batch [210/500] time 0.880 (0.891) data 0.000 (0.004) loss 1.1455 (1.1569) acc 78.1250 (71.0268) lr 1.8090e-03 eta 4:46:24 +epoch [12/50] batch [215/500] time 0.893 (0.892) data 0.001 (0.004) loss 0.8809 (1.1545) acc 75.0000 (71.1192) lr 1.8090e-03 eta 4:46:33 +epoch [12/50] batch [220/500] time 0.887 (0.891) data 0.000 (0.004) loss 1.3037 (1.1596) acc 59.3750 (70.9233) lr 1.8090e-03 eta 4:46:22 +epoch [12/50] batch [225/500] time 0.867 (0.891) data 0.000 (0.004) loss 1.1963 (1.1646) acc 62.5000 (70.8611) lr 1.8090e-03 eta 4:46:17 +epoch [12/50] batch [230/500] time 0.896 (0.891) data 0.000 (0.003) loss 1.5596 (1.1682) acc 62.5000 (70.8424) lr 1.8090e-03 eta 4:46:09 +epoch [12/50] batch [235/500] time 0.875 (0.891) data 0.000 (0.003) loss 1.2129 (1.1717) acc 62.5000 (70.6782) lr 1.8090e-03 eta 4:45:58 +epoch [12/50] batch [240/500] time 0.902 (0.890) data 0.000 (0.003) loss 1.6260 (1.1736) acc 59.3750 (70.6641) lr 1.8090e-03 eta 4:45:50 +epoch [12/50] batch [245/500] time 0.871 (0.890) data 0.001 (0.003) loss 0.7056 (1.1726) acc 78.1250 (70.6760) lr 1.8090e-03 eta 4:45:41 +epoch [12/50] batch [250/500] time 0.866 (0.890) data 0.000 (0.003) loss 0.9751 (1.1739) acc 78.1250 (70.6625) lr 1.8090e-03 eta 4:45:34 +epoch [12/50] batch [255/500] time 0.898 (0.890) data 0.000 (0.003) loss 1.1914 (1.1757) acc 78.1250 (70.6495) lr 1.8090e-03 eta 4:45:28 +epoch [12/50] batch [260/500] time 0.912 (0.890) data 0.000 (0.003) loss 0.9810 (1.1738) acc 75.0000 (70.7332) lr 1.8090e-03 eta 4:45:32 +epoch [12/50] batch [265/500] time 0.862 (0.891) data 0.000 (0.003) loss 0.9912 (1.1747) acc 71.8750 (70.7075) lr 1.8090e-03 eta 4:45:29 +epoch [12/50] batch [270/500] time 0.920 (0.891) data 0.000 (0.003) loss 1.2402 (1.1724) acc 71.8750 (70.7870) lr 1.8090e-03 eta 4:45:31 +epoch [12/50] batch [275/500] time 0.905 (0.891) data 0.000 (0.003) loss 0.9214 (1.1685) acc 71.8750 (70.8977) lr 1.8090e-03 eta 4:45:25 +epoch [12/50] batch [280/500] time 0.904 (0.891) data 0.000 (0.003) loss 1.7266 (1.1689) acc 53.1250 (70.8482) lr 1.8090e-03 eta 4:45:18 +epoch [12/50] batch [285/500] time 0.906 (0.891) data 0.000 (0.003) loss 1.0479 (1.1693) acc 71.8750 (70.8662) lr 1.8090e-03 eta 4:45:15 +epoch [12/50] batch [290/500] time 0.919 (0.891) data 0.000 (0.003) loss 0.9795 (1.1654) acc 75.0000 (70.9483) lr 1.8090e-03 eta 4:45:14 +epoch [12/50] batch [295/500] time 0.892 (0.891) data 0.000 (0.003) loss 0.9453 (1.1632) acc 75.0000 (71.0169) lr 1.8090e-03 eta 4:45:11 +epoch [12/50] batch [300/500] time 0.870 (0.891) data 0.000 (0.003) loss 1.0693 (1.1652) acc 62.5000 (70.9479) lr 1.8090e-03 eta 4:45:05 +epoch [12/50] batch [305/500] time 0.858 (0.891) data 0.000 (0.003) loss 1.1787 (1.1620) acc 68.7500 (70.9836) lr 1.8090e-03 eta 4:44:57 +epoch [12/50] batch [310/500] time 0.869 (0.891) data 0.000 (0.003) loss 1.2529 (1.1641) acc 68.7500 (70.9879) lr 1.8090e-03 eta 4:44:48 +epoch [12/50] batch [315/500] time 0.885 (0.891) data 0.000 (0.003) loss 0.5615 (1.1603) acc 87.5000 (71.1111) lr 1.8090e-03 eta 4:44:45 +epoch [12/50] batch [320/500] time 0.896 (0.891) data 0.000 (0.003) loss 1.6025 (1.1613) acc 59.3750 (71.0938) lr 1.8090e-03 eta 4:44:40 +epoch [12/50] batch [325/500] time 0.861 (0.890) data 0.000 (0.003) loss 1.3027 (1.1605) acc 59.3750 (71.0962) lr 1.8090e-03 eta 4:44:32 +epoch [12/50] batch [330/500] time 0.869 (0.890) data 0.000 (0.002) loss 0.4177 (1.1573) acc 90.6250 (71.1932) lr 1.8090e-03 eta 4:44:23 +epoch [12/50] batch [335/500] time 0.876 (0.890) data 0.000 (0.002) loss 0.9414 (1.1548) acc 81.2500 (71.2407) lr 1.8090e-03 eta 4:44:16 +epoch [12/50] batch [340/500] time 0.895 (0.890) data 0.000 (0.002) loss 0.9424 (1.1521) acc 71.8750 (71.2684) lr 1.8090e-03 eta 4:44:11 +epoch [12/50] batch [345/500] time 0.867 (0.890) data 0.000 (0.002) loss 0.6792 (1.1514) acc 78.1250 (71.2681) lr 1.8090e-03 eta 4:44:03 +epoch [12/50] batch [350/500] time 0.891 (0.890) data 0.000 (0.002) loss 1.0947 (1.1504) acc 59.3750 (71.2232) lr 1.8090e-03 eta 4:43:58 +epoch [12/50] batch [355/500] time 1.008 (0.890) data 0.000 (0.002) loss 1.5029 (1.1505) acc 75.0000 (71.2324) lr 1.8090e-03 eta 4:44:00 +epoch [12/50] batch [360/500] time 0.870 (0.890) data 0.000 (0.002) loss 0.8076 (1.1501) acc 81.2500 (71.2587) lr 1.8090e-03 eta 4:43:58 +epoch [12/50] batch [365/500] time 0.858 (0.890) data 0.000 (0.002) loss 1.3809 (1.1508) acc 65.6250 (71.3014) lr 1.8090e-03 eta 4:43:49 +epoch [12/50] batch [370/500] time 0.858 (0.890) data 0.000 (0.002) loss 0.9873 (1.1528) acc 75.0000 (71.2416) lr 1.8090e-03 eta 4:43:40 +epoch [12/50] batch [375/500] time 0.890 (0.890) data 0.000 (0.002) loss 1.4111 (1.1558) acc 71.8750 (71.2000) lr 1.8090e-03 eta 4:43:37 +epoch [12/50] batch [380/500] time 0.891 (0.890) data 0.000 (0.002) loss 1.2773 (1.1566) acc 59.3750 (71.1760) lr 1.8090e-03 eta 4:43:30 +epoch [12/50] batch [385/500] time 0.876 (0.890) data 0.000 (0.002) loss 1.1611 (1.1584) acc 75.0000 (71.1120) lr 1.8090e-03 eta 4:43:26 +epoch [12/50] batch [390/500] time 0.871 (0.890) data 0.000 (0.002) loss 1.5254 (1.1630) acc 53.1250 (71.0417) lr 1.8090e-03 eta 4:43:22 +epoch [12/50] batch [395/500] time 0.879 (0.890) data 0.000 (0.002) loss 0.6719 (1.1592) acc 84.3750 (71.1472) lr 1.8090e-03 eta 4:43:15 +epoch [12/50] batch [400/500] time 0.865 (0.890) data 0.000 (0.002) loss 0.8486 (1.1598) acc 81.2500 (71.1797) lr 1.8090e-03 eta 4:43:13 +epoch [12/50] batch [405/500] time 0.907 (0.890) data 0.000 (0.002) loss 1.0713 (1.1589) acc 78.1250 (71.2191) lr 1.8090e-03 eta 4:43:06 +epoch [12/50] batch [410/500] time 0.883 (0.890) data 0.000 (0.002) loss 1.4541 (1.1603) acc 71.8750 (71.2271) lr 1.8090e-03 eta 4:43:00 +epoch [12/50] batch [415/500] time 0.902 (0.890) data 0.000 (0.002) loss 0.5601 (1.1586) acc 81.2500 (71.2349) lr 1.8090e-03 eta 4:42:58 +epoch [12/50] batch [420/500] time 0.849 (0.890) data 0.000 (0.002) loss 1.1650 (1.1596) acc 65.6250 (71.2202) lr 1.8090e-03 eta 4:42:54 +epoch [12/50] batch [425/500] time 0.897 (0.890) data 0.000 (0.002) loss 1.6328 (1.1617) acc 65.6250 (71.1765) lr 1.8090e-03 eta 4:42:48 +epoch [12/50] batch [430/500] time 0.909 (0.890) data 0.000 (0.002) loss 0.3923 (1.1591) acc 87.5000 (71.2282) lr 1.8090e-03 eta 4:42:47 +epoch [12/50] batch [435/500] time 0.909 (0.890) data 0.000 (0.002) loss 1.1650 (1.1586) acc 68.7500 (71.2284) lr 1.8090e-03 eta 4:42:38 +epoch [12/50] batch [440/500] time 0.888 (0.889) data 0.000 (0.002) loss 1.4297 (1.1606) acc 59.3750 (71.1577) lr 1.8090e-03 eta 4:42:32 +epoch [12/50] batch [445/500] time 0.871 (0.889) data 0.000 (0.002) loss 1.7080 (1.1623) acc 53.1250 (71.1236) lr 1.8090e-03 eta 4:42:26 +epoch [12/50] batch [450/500] time 0.889 (0.889) data 0.000 (0.002) loss 0.9580 (1.1610) acc 78.1250 (71.1528) lr 1.8090e-03 eta 4:42:23 +epoch [12/50] batch [455/500] time 0.878 (0.889) data 0.000 (0.002) loss 0.6475 (1.1601) acc 78.1250 (71.1607) lr 1.8090e-03 eta 4:42:18 +epoch [12/50] batch [460/500] time 0.886 (0.889) data 0.000 (0.002) loss 1.2998 (1.1583) acc 68.7500 (71.1889) lr 1.8090e-03 eta 4:42:12 +epoch [12/50] batch [465/500] time 0.884 (0.889) data 0.000 (0.002) loss 0.7222 (1.1584) acc 78.1250 (71.1694) lr 1.8090e-03 eta 4:42:03 +epoch [12/50] batch [470/500] time 0.877 (0.889) data 0.000 (0.002) loss 0.7456 (1.1561) acc 84.3750 (71.2101) lr 1.8090e-03 eta 4:41:56 +epoch [12/50] batch [475/500] time 0.863 (0.889) data 0.000 (0.002) loss 0.8584 (1.1542) acc 75.0000 (71.2171) lr 1.8090e-03 eta 4:41:51 +epoch [12/50] batch [480/500] time 0.882 (0.889) data 0.000 (0.002) loss 1.0850 (1.1544) acc 71.8750 (71.2044) lr 1.8090e-03 eta 4:41:46 +epoch [12/50] batch [485/500] time 0.881 (0.889) data 0.000 (0.002) loss 0.7158 (1.1543) acc 78.1250 (71.2113) lr 1.8090e-03 eta 4:41:44 +epoch [12/50] batch [490/500] time 0.872 (0.889) data 0.000 (0.002) loss 0.8110 (1.1533) acc 81.2500 (71.2245) lr 1.8090e-03 eta 4:41:39 +epoch [12/50] batch [495/500] time 0.877 (0.889) data 0.000 (0.002) loss 1.3037 (1.1510) acc 62.5000 (71.2437) lr 1.8090e-03 eta 4:41:31 +epoch [12/50] batch [500/500] time 0.890 (0.889) data 0.000 (0.002) loss 1.3984 (1.1511) acc 56.2500 (71.2188) lr 1.7705e-03 eta 4:41:28 +epoch [13/50] batch [5/500] time 0.862 (1.040) data 0.000 (0.142) loss 0.6616 (1.0113) acc 87.5000 (77.5000) lr 1.7705e-03 eta 5:29:07 +epoch [13/50] batch [10/500] time 0.905 (0.961) data 0.000 (0.071) loss 1.5850 (1.0265) acc 59.3750 (75.0000) lr 1.7705e-03 eta 5:04:13 +epoch [13/50] batch [15/500] time 0.881 (0.935) data 0.000 (0.047) loss 1.6074 (1.1086) acc 68.7500 (73.5417) lr 1.7705e-03 eta 4:55:48 +epoch [13/50] batch [20/500] time 0.890 (0.923) data 0.000 (0.036) loss 0.5757 (1.0925) acc 81.2500 (73.9062) lr 1.7705e-03 eta 4:51:54 +epoch [13/50] batch [25/500] time 0.873 (0.914) data 0.000 (0.029) loss 0.7275 (1.1058) acc 75.0000 (73.1250) lr 1.7705e-03 eta 4:48:54 +epoch [13/50] batch [30/500] time 0.880 (0.906) data 0.000 (0.024) loss 0.5562 (1.0809) acc 78.1250 (73.9583) lr 1.7705e-03 eta 4:46:25 +epoch [13/50] batch [35/500] time 0.887 (0.903) data 0.000 (0.020) loss 0.9194 (1.0760) acc 71.8750 (73.5714) lr 1.7705e-03 eta 4:45:17 +epoch [13/50] batch [40/500] time 0.898 (0.901) data 0.000 (0.018) loss 0.7949 (1.1015) acc 71.8750 (72.5781) lr 1.7705e-03 eta 4:44:50 +epoch [13/50] batch [45/500] time 0.872 (0.898) data 0.000 (0.016) loss 1.2471 (1.0847) acc 71.8750 (72.5000) lr 1.7705e-03 eta 4:43:42 +epoch [13/50] batch [50/500] time 0.881 (0.896) data 0.000 (0.014) loss 0.7378 (1.0803) acc 78.1250 (72.7500) lr 1.7705e-03 eta 4:42:58 +epoch [13/50] batch [55/500] time 0.863 (0.894) data 0.000 (0.013) loss 1.7021 (1.1130) acc 56.2500 (71.6477) lr 1.7705e-03 eta 4:42:25 +epoch [13/50] batch [60/500] time 0.903 (0.895) data 0.000 (0.012) loss 0.8994 (1.0935) acc 78.1250 (71.9271) lr 1.7705e-03 eta 4:42:35 +epoch [13/50] batch [65/500] time 0.888 (0.895) data 0.000 (0.011) loss 0.7085 (1.0845) acc 81.2500 (72.0673) lr 1.7705e-03 eta 4:42:24 +epoch [13/50] batch [70/500] time 0.856 (0.893) data 0.000 (0.010) loss 0.8701 (1.0873) acc 75.0000 (71.9643) lr 1.7705e-03 eta 4:41:44 +epoch [13/50] batch [75/500] time 0.894 (0.893) data 0.000 (0.010) loss 0.8330 (1.0946) acc 81.2500 (71.9583) lr 1.7705e-03 eta 4:41:37 +epoch [13/50] batch [80/500] time 0.876 (0.892) data 0.000 (0.009) loss 1.5205 (1.1004) acc 65.6250 (71.9922) lr 1.7705e-03 eta 4:41:21 +epoch [13/50] batch [85/500] time 0.914 (0.891) data 0.000 (0.009) loss 1.1357 (1.0999) acc 71.8750 (71.9485) lr 1.7705e-03 eta 4:40:59 +epoch [13/50] batch [90/500] time 0.877 (0.891) data 0.000 (0.008) loss 0.7007 (1.0903) acc 75.0000 (72.1528) lr 1.7705e-03 eta 4:40:48 +epoch [13/50] batch [95/500] time 0.901 (0.890) data 0.000 (0.008) loss 0.7549 (1.0923) acc 81.2500 (72.2368) lr 1.7705e-03 eta 4:40:34 +epoch [13/50] batch [100/500] time 0.884 (0.891) data 0.000 (0.007) loss 1.6807 (1.0969) acc 62.5000 (72.0938) lr 1.7705e-03 eta 4:40:31 +epoch [13/50] batch [105/500] time 0.895 (0.892) data 0.000 (0.007) loss 0.7124 (1.0952) acc 84.3750 (72.1726) lr 1.7705e-03 eta 4:40:51 +epoch [13/50] batch [110/500] time 0.888 (0.892) data 0.000 (0.007) loss 1.2490 (1.0916) acc 75.0000 (72.3011) lr 1.7705e-03 eta 4:40:46 +epoch [13/50] batch [115/500] time 0.901 (0.892) data 0.000 (0.006) loss 0.7441 (1.0961) acc 71.8750 (72.2554) lr 1.7705e-03 eta 4:40:45 +epoch [13/50] batch [120/500] time 0.866 (0.891) data 0.000 (0.006) loss 1.1289 (1.1041) acc 75.0000 (72.0052) lr 1.7705e-03 eta 4:40:26 +epoch [13/50] batch [125/500] time 0.862 (0.890) data 0.000 (0.006) loss 0.8721 (1.1083) acc 87.5000 (71.8500) lr 1.7705e-03 eta 4:40:04 +epoch [13/50] batch [130/500] time 0.873 (0.890) data 0.000 (0.006) loss 1.1230 (1.1116) acc 75.0000 (71.8750) lr 1.7705e-03 eta 4:40:00 +epoch [13/50] batch [135/500] time 0.898 (0.890) data 0.000 (0.005) loss 0.8403 (1.1091) acc 75.0000 (72.1296) lr 1.7705e-03 eta 4:39:57 +epoch [13/50] batch [140/500] time 0.895 (0.890) data 0.000 (0.005) loss 0.8906 (1.1100) acc 75.0000 (72.2545) lr 1.7705e-03 eta 4:39:45 +epoch [13/50] batch [145/500] time 0.888 (0.890) data 0.000 (0.005) loss 1.2441 (1.1051) acc 78.1250 (72.5216) lr 1.7705e-03 eta 4:39:43 +epoch [13/50] batch [150/500] time 0.870 (0.889) data 0.000 (0.005) loss 1.3242 (1.1051) acc 68.7500 (72.6875) lr 1.7705e-03 eta 4:39:24 +epoch [13/50] batch [155/500] time 0.899 (0.889) data 0.000 (0.005) loss 1.9121 (1.1084) acc 71.8750 (72.6008) lr 1.7705e-03 eta 4:39:12 +epoch [13/50] batch [160/500] time 0.872 (0.889) data 0.000 (0.005) loss 1.0791 (1.1112) acc 75.0000 (72.5977) lr 1.7705e-03 eta 4:39:06 +epoch [13/50] batch [165/500] time 0.869 (0.889) data 0.000 (0.005) loss 0.8340 (1.1051) acc 71.8750 (72.5000) lr 1.7705e-03 eta 4:38:56 +epoch [13/50] batch [170/500] time 0.906 (0.888) data 0.000 (0.004) loss 1.1230 (1.1059) acc 71.8750 (72.4816) lr 1.7705e-03 eta 4:38:50 +epoch [13/50] batch [175/500] time 0.853 (0.888) data 0.000 (0.004) loss 1.1689 (1.1116) acc 75.0000 (72.4643) lr 1.7705e-03 eta 4:38:40 +epoch [13/50] batch [180/500] time 0.893 (0.888) data 0.000 (0.004) loss 1.4463 (1.1094) acc 75.0000 (72.5521) lr 1.7705e-03 eta 4:38:33 +epoch [13/50] batch [185/500] time 0.872 (0.888) data 0.000 (0.004) loss 1.5498 (1.1185) acc 68.7500 (72.3986) lr 1.7705e-03 eta 4:38:26 +epoch [13/50] batch [190/500] time 0.954 (0.888) data 0.000 (0.004) loss 1.0586 (1.1160) acc 65.6250 (72.4671) lr 1.7705e-03 eta 4:38:23 +epoch [13/50] batch [195/500] time 0.877 (0.888) data 0.000 (0.004) loss 0.9365 (1.1191) acc 68.7500 (72.3878) lr 1.7705e-03 eta 4:38:16 +epoch [13/50] batch [200/500] time 0.870 (0.888) data 0.000 (0.004) loss 1.0977 (1.1192) acc 75.0000 (72.5156) lr 1.7705e-03 eta 4:38:16 +epoch [13/50] batch [205/500] time 0.858 (0.889) data 0.000 (0.004) loss 1.4736 (1.1200) acc 65.6250 (72.5000) lr 1.7705e-03 eta 4:38:19 +epoch [13/50] batch [210/500] time 0.860 (0.889) data 0.000 (0.004) loss 1.0420 (1.1186) acc 75.0000 (72.5149) lr 1.7705e-03 eta 4:38:15 +epoch [13/50] batch [215/500] time 0.918 (0.889) data 0.000 (0.004) loss 1.3545 (1.1212) acc 62.5000 (72.5291) lr 1.7705e-03 eta 4:38:10 +epoch [13/50] batch [220/500] time 0.910 (0.889) data 0.000 (0.003) loss 1.1289 (1.1198) acc 71.8750 (72.5568) lr 1.7705e-03 eta 4:38:07 +epoch [13/50] batch [225/500] time 0.884 (0.889) data 0.000 (0.003) loss 1.1445 (1.1262) acc 78.1250 (72.3472) lr 1.7705e-03 eta 4:38:06 +epoch [13/50] batch [230/500] time 0.881 (0.889) data 0.000 (0.003) loss 0.9614 (1.1238) acc 75.0000 (72.3370) lr 1.7705e-03 eta 4:37:59 +epoch [13/50] batch [235/500] time 0.882 (0.889) data 0.000 (0.003) loss 0.8706 (1.1225) acc 81.2500 (72.4202) lr 1.7705e-03 eta 4:37:54 +epoch [13/50] batch [240/500] time 0.906 (0.888) data 0.000 (0.003) loss 1.2598 (1.1228) acc 75.0000 (72.4870) lr 1.7705e-03 eta 4:37:46 +epoch [13/50] batch [245/500] time 0.996 (0.889) data 0.000 (0.003) loss 1.1094 (1.1218) acc 71.8750 (72.4235) lr 1.7705e-03 eta 4:37:49 +epoch [13/50] batch [250/500] time 0.885 (0.889) data 0.000 (0.003) loss 1.0850 (1.1286) acc 78.1250 (72.3250) lr 1.7705e-03 eta 4:37:40 +epoch [13/50] batch [255/500] time 0.885 (0.889) data 0.000 (0.003) loss 0.7651 (1.1236) acc 81.2500 (72.4755) lr 1.7705e-03 eta 4:37:38 +epoch [13/50] batch [260/500] time 0.872 (0.889) data 0.000 (0.003) loss 1.0264 (1.1236) acc 84.3750 (72.4760) lr 1.7705e-03 eta 4:37:31 +epoch [13/50] batch [265/500] time 0.879 (0.889) data 0.000 (0.003) loss 1.0879 (1.1217) acc 71.8750 (72.5118) lr 1.7705e-03 eta 4:37:26 +epoch [13/50] batch [270/500] time 0.871 (0.889) data 0.000 (0.003) loss 1.0127 (1.1159) acc 81.2500 (72.5926) lr 1.7705e-03 eta 4:37:23 +epoch [13/50] batch [275/500] time 0.869 (0.889) data 0.000 (0.003) loss 1.7949 (1.1213) acc 65.6250 (72.4318) lr 1.7705e-03 eta 4:37:17 +epoch [13/50] batch [280/500] time 0.864 (0.888) data 0.000 (0.003) loss 1.2881 (1.1257) acc 71.8750 (72.3214) lr 1.7705e-03 eta 4:37:10 +epoch [13/50] batch [285/500] time 0.883 (0.889) data 0.000 (0.003) loss 0.5542 (1.1269) acc 81.2500 (72.3026) lr 1.7705e-03 eta 4:37:08 +epoch [13/50] batch [290/500] time 0.884 (0.889) data 0.000 (0.003) loss 1.1953 (1.1286) acc 71.8750 (72.2522) lr 1.7705e-03 eta 4:37:07 +epoch [13/50] batch [295/500] time 0.887 (0.888) data 0.000 (0.003) loss 1.0352 (1.1303) acc 75.0000 (72.2987) lr 1.7705e-03 eta 4:36:59 +epoch [13/50] batch [300/500] time 0.864 (0.888) data 0.000 (0.003) loss 1.5439 (1.1317) acc 78.1250 (72.2812) lr 1.7705e-03 eta 4:36:51 +epoch [13/50] batch [305/500] time 0.865 (0.888) data 0.000 (0.003) loss 1.2324 (1.1303) acc 71.8750 (72.3258) lr 1.7705e-03 eta 4:36:46 +epoch [13/50] batch [310/500] time 0.883 (0.888) data 0.000 (0.003) loss 1.7793 (1.1319) acc 62.5000 (72.2379) lr 1.7705e-03 eta 4:36:40 +epoch [13/50] batch [315/500] time 0.874 (0.888) data 0.000 (0.002) loss 1.0967 (1.1335) acc 71.8750 (72.1925) lr 1.7705e-03 eta 4:36:35 +epoch [13/50] batch [320/500] time 0.877 (0.888) data 0.000 (0.002) loss 0.9478 (1.1319) acc 81.2500 (72.2070) lr 1.7705e-03 eta 4:36:32 +epoch [13/50] batch [325/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.4678 (1.1285) acc 62.5000 (72.3173) lr 1.7705e-03 eta 4:36:22 +epoch [13/50] batch [330/500] time 0.863 (0.888) data 0.000 (0.002) loss 0.9033 (1.1250) acc 81.2500 (72.3580) lr 1.7705e-03 eta 4:36:15 +epoch [13/50] batch [335/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.2520 (1.1274) acc 65.6250 (72.3134) lr 1.7705e-03 eta 4:36:08 +epoch [13/50] batch [340/500] time 0.884 (0.888) data 0.000 (0.002) loss 0.8623 (1.1257) acc 71.8750 (72.3805) lr 1.7705e-03 eta 4:36:01 +epoch [13/50] batch [345/500] time 0.867 (0.888) data 0.000 (0.002) loss 1.1533 (1.1247) acc 65.6250 (72.3822) lr 1.7705e-03 eta 4:36:04 +epoch [13/50] batch [350/500] time 0.884 (0.888) data 0.000 (0.002) loss 0.8979 (1.1255) acc 78.1250 (72.3750) lr 1.7705e-03 eta 4:35:59 +epoch [13/50] batch [355/500] time 0.868 (0.888) data 0.000 (0.002) loss 1.3965 (1.1271) acc 71.8750 (72.3239) lr 1.7705e-03 eta 4:35:54 +epoch [13/50] batch [360/500] time 0.895 (0.888) data 0.000 (0.002) loss 0.6079 (1.1283) acc 78.1250 (72.2830) lr 1.7705e-03 eta 4:35:49 +epoch [13/50] batch [365/500] time 0.882 (0.888) data 0.000 (0.002) loss 0.9746 (1.1279) acc 78.1250 (72.2774) lr 1.7705e-03 eta 4:35:41 +epoch [13/50] batch [370/500] time 0.881 (0.888) data 0.000 (0.002) loss 0.9360 (1.1250) acc 75.0000 (72.3395) lr 1.7705e-03 eta 4:35:39 +epoch [13/50] batch [375/500] time 0.906 (0.888) data 0.000 (0.002) loss 1.2900 (1.1257) acc 62.5000 (72.3083) lr 1.7705e-03 eta 4:35:34 +epoch [13/50] batch [380/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.1865 (1.1225) acc 68.7500 (72.3602) lr 1.7705e-03 eta 4:35:24 +epoch [13/50] batch [385/500] time 0.859 (0.887) data 0.000 (0.002) loss 0.8921 (1.1241) acc 78.1250 (72.3133) lr 1.7705e-03 eta 4:35:17 +epoch [13/50] batch [390/500] time 0.875 (0.887) data 0.001 (0.002) loss 1.6611 (1.1269) acc 71.8750 (72.3077) lr 1.7705e-03 eta 4:35:15 +epoch [13/50] batch [395/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.4570 (1.1278) acc 62.5000 (72.2468) lr 1.7705e-03 eta 4:35:10 +epoch [13/50] batch [400/500] time 0.862 (0.887) data 0.000 (0.002) loss 1.0117 (1.1274) acc 78.1250 (72.2891) lr 1.7705e-03 eta 4:35:04 +epoch [13/50] batch [405/500] time 0.888 (0.887) data 0.000 (0.002) loss 1.4932 (1.1286) acc 65.6250 (72.2685) lr 1.7705e-03 eta 4:34:59 +epoch [13/50] batch [410/500] time 0.892 (0.887) data 0.000 (0.002) loss 1.2637 (1.1287) acc 68.7500 (72.2485) lr 1.7705e-03 eta 4:34:57 +epoch [13/50] batch [415/500] time 0.905 (0.888) data 0.000 (0.002) loss 0.9702 (1.1290) acc 75.0000 (72.2590) lr 1.7705e-03 eta 4:34:55 +epoch [13/50] batch [420/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.1543 (1.1314) acc 59.3750 (72.1280) lr 1.7705e-03 eta 4:34:51 +epoch [13/50] batch [425/500] time 0.870 (0.887) data 0.000 (0.002) loss 1.5742 (1.1291) acc 68.7500 (72.1838) lr 1.7705e-03 eta 4:34:43 +epoch [13/50] batch [430/500] time 0.886 (0.887) data 0.000 (0.002) loss 1.1191 (1.1302) acc 62.5000 (72.1366) lr 1.7705e-03 eta 4:34:39 +epoch [13/50] batch [435/500] time 0.894 (0.887) data 0.000 (0.002) loss 1.0537 (1.1350) acc 81.2500 (72.0187) lr 1.7705e-03 eta 4:34:34 +epoch [13/50] batch [440/500] time 0.892 (0.887) data 0.000 (0.002) loss 0.5928 (1.1332) acc 84.3750 (72.0668) lr 1.7705e-03 eta 4:34:30 +epoch [13/50] batch [445/500] time 0.887 (0.887) data 0.000 (0.002) loss 0.9663 (1.1334) acc 68.7500 (72.0576) lr 1.7705e-03 eta 4:34:24 +epoch [13/50] batch [450/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.3770 (1.1353) acc 65.6250 (72.0000) lr 1.7705e-03 eta 4:34:20 +epoch [13/50] batch [455/500] time 0.868 (0.887) data 0.000 (0.002) loss 1.1367 (1.1337) acc 75.0000 (72.0398) lr 1.7705e-03 eta 4:34:14 +epoch [13/50] batch [460/500] time 0.860 (0.887) data 0.000 (0.002) loss 1.8721 (1.1344) acc 56.2500 (71.9905) lr 1.7705e-03 eta 4:34:08 +epoch [13/50] batch [465/500] time 0.895 (0.887) data 0.000 (0.002) loss 0.8159 (1.1347) acc 78.1250 (71.9624) lr 1.7705e-03 eta 4:34:01 +epoch [13/50] batch [470/500] time 0.921 (0.887) data 0.000 (0.002) loss 1.5625 (1.1369) acc 65.6250 (71.9215) lr 1.7705e-03 eta 4:33:56 +epoch [13/50] batch [475/500] time 0.876 (0.887) data 0.000 (0.002) loss 1.1689 (1.1407) acc 65.6250 (71.8158) lr 1.7705e-03 eta 4:33:52 +epoch [13/50] batch [480/500] time 0.882 (0.887) data 0.000 (0.002) loss 1.5840 (1.1410) acc 50.0000 (71.7578) lr 1.7705e-03 eta 4:33:49 +epoch [13/50] batch [485/500] time 0.882 (0.887) data 0.001 (0.002) loss 1.3945 (1.1410) acc 59.3750 (71.7397) lr 1.7705e-03 eta 4:33:44 +epoch [13/50] batch [490/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.1787 (1.1422) acc 62.5000 (71.7538) lr 1.7705e-03 eta 4:33:42 +epoch [13/50] batch [495/500] time 0.892 (0.887) data 0.000 (0.002) loss 1.0439 (1.1443) acc 71.8750 (71.7298) lr 1.7705e-03 eta 4:33:40 +epoch [13/50] batch [500/500] time 0.888 (0.887) data 0.000 (0.002) loss 0.9976 (1.1451) acc 71.8750 (71.6688) lr 1.7290e-03 eta 4:33:34 +epoch [14/50] batch [5/500] time 0.870 (1.011) data 0.000 (0.131) loss 1.4463 (1.0748) acc 68.7500 (74.3750) lr 1.7290e-03 eta 5:11:34 +epoch [14/50] batch [10/500] time 0.893 (0.942) data 0.000 (0.066) loss 1.4268 (1.0068) acc 62.5000 (74.6875) lr 1.7290e-03 eta 4:50:13 +epoch [14/50] batch [15/500] time 0.899 (0.924) data 0.000 (0.044) loss 0.7417 (1.0590) acc 75.0000 (74.1667) lr 1.7290e-03 eta 4:44:31 +epoch [14/50] batch [20/500] time 0.895 (0.912) data 0.000 (0.033) loss 1.9150 (1.1523) acc 53.1250 (71.4062) lr 1.7290e-03 eta 4:40:51 +epoch [14/50] batch [25/500] time 0.896 (0.904) data 0.000 (0.026) loss 0.6372 (1.1080) acc 84.3750 (72.7500) lr 1.7290e-03 eta 4:38:26 +epoch [14/50] batch [30/500] time 0.874 (0.900) data 0.000 (0.022) loss 1.0693 (1.0857) acc 81.2500 (73.9583) lr 1.7290e-03 eta 4:37:04 +epoch [14/50] batch [35/500] time 0.874 (0.899) data 0.000 (0.019) loss 1.3311 (1.0972) acc 71.8750 (72.8571) lr 1.7290e-03 eta 4:36:36 +epoch [14/50] batch [40/500] time 0.898 (0.896) data 0.000 (0.017) loss 1.0625 (1.1205) acc 68.7500 (72.3438) lr 1.7290e-03 eta 4:35:33 +epoch [14/50] batch [45/500] time 0.890 (0.898) data 0.000 (0.015) loss 1.5947 (1.1440) acc 71.8750 (71.9444) lr 1.7290e-03 eta 4:36:08 +epoch [14/50] batch [50/500] time 0.881 (0.896) data 0.000 (0.013) loss 0.8779 (1.1200) acc 75.0000 (72.4375) lr 1.7290e-03 eta 4:35:35 +epoch [14/50] batch [55/500] time 0.859 (0.895) data 0.000 (0.012) loss 1.4590 (1.1359) acc 65.6250 (71.9318) lr 1.7290e-03 eta 4:35:05 +epoch [14/50] batch [60/500] time 0.900 (0.894) data 0.000 (0.011) loss 0.9258 (1.1469) acc 75.0000 (71.4583) lr 1.7290e-03 eta 4:34:50 +epoch [14/50] batch [65/500] time 0.870 (0.893) data 0.000 (0.010) loss 1.3613 (1.1392) acc 65.6250 (71.6827) lr 1.7290e-03 eta 4:34:25 +epoch [14/50] batch [70/500] time 0.880 (0.892) data 0.000 (0.010) loss 1.1660 (1.1199) acc 59.3750 (71.6964) lr 1.7290e-03 eta 4:34:06 +epoch [14/50] batch [75/500] time 0.869 (0.892) data 0.000 (0.009) loss 1.3818 (1.1269) acc 75.0000 (71.6250) lr 1.7290e-03 eta 4:33:58 +epoch [14/50] batch [80/500] time 0.847 (0.891) data 0.000 (0.008) loss 1.0059 (1.1369) acc 68.7500 (71.5234) lr 1.7290e-03 eta 4:33:29 +epoch [14/50] batch [85/500] time 0.992 (0.891) data 0.000 (0.008) loss 0.6719 (1.1377) acc 87.5000 (71.6912) lr 1.7290e-03 eta 4:33:36 +epoch [14/50] batch [90/500] time 0.871 (0.891) data 0.000 (0.008) loss 0.9224 (1.1467) acc 75.0000 (71.4931) lr 1.7290e-03 eta 4:33:22 +epoch [14/50] batch [95/500] time 0.874 (0.890) data 0.000 (0.007) loss 1.2500 (1.1471) acc 71.8750 (71.4803) lr 1.7290e-03 eta 4:33:04 +epoch [14/50] batch [100/500] time 0.892 (0.890) data 0.000 (0.007) loss 0.8813 (1.1447) acc 81.2500 (71.4062) lr 1.7290e-03 eta 4:32:56 +epoch [14/50] batch [105/500] time 0.888 (0.889) data 0.000 (0.006) loss 1.3623 (1.1426) acc 68.7500 (71.3988) lr 1.7290e-03 eta 4:32:37 +epoch [14/50] batch [110/500] time 0.881 (0.890) data 0.000 (0.006) loss 0.9253 (1.1385) acc 81.2500 (71.6193) lr 1.7290e-03 eta 4:32:42 +epoch [14/50] batch [115/500] time 0.896 (0.889) data 0.000 (0.006) loss 0.7017 (1.1294) acc 71.8750 (71.7391) lr 1.7290e-03 eta 4:32:29 +epoch [14/50] batch [120/500] time 0.849 (0.889) data 0.000 (0.006) loss 1.0146 (1.1208) acc 75.0000 (71.7708) lr 1.7290e-03 eta 4:32:11 +epoch [14/50] batch [125/500] time 0.884 (0.889) data 0.000 (0.005) loss 0.9263 (1.1274) acc 71.8750 (71.5750) lr 1.7290e-03 eta 4:32:08 +epoch [14/50] batch [130/500] time 0.900 (0.888) data 0.000 (0.005) loss 1.5498 (1.1307) acc 65.6250 (71.5385) lr 1.7290e-03 eta 4:32:00 +epoch [14/50] batch [135/500] time 0.859 (0.888) data 0.000 (0.005) loss 0.7559 (1.1179) acc 78.1250 (71.8287) lr 1.7290e-03 eta 4:31:44 +epoch [14/50] batch [140/500] time 0.863 (0.887) data 0.000 (0.005) loss 1.1250 (1.1209) acc 71.8750 (71.8527) lr 1.7290e-03 eta 4:31:28 +epoch [14/50] batch [145/500] time 0.916 (0.887) data 0.000 (0.005) loss 1.1650 (1.1200) acc 71.8750 (71.9181) lr 1.7290e-03 eta 4:31:21 +epoch [14/50] batch [150/500] time 0.910 (0.887) data 0.000 (0.005) loss 1.1787 (1.1177) acc 71.8750 (72.0417) lr 1.7290e-03 eta 4:31:24 +epoch [14/50] batch [155/500] time 0.873 (0.888) data 0.000 (0.004) loss 1.2021 (1.1160) acc 68.7500 (72.0565) lr 1.7290e-03 eta 4:31:23 +epoch [14/50] batch [160/500] time 0.876 (0.888) data 0.000 (0.004) loss 1.0957 (1.1141) acc 62.5000 (71.9922) lr 1.7290e-03 eta 4:31:25 +epoch [14/50] batch [165/500] time 0.868 (0.888) data 0.000 (0.004) loss 1.8594 (1.1165) acc 50.0000 (71.9129) lr 1.7290e-03 eta 4:31:26 +epoch [14/50] batch [170/500] time 0.898 (0.888) data 0.000 (0.004) loss 1.1006 (1.1152) acc 68.7500 (71.9485) lr 1.7290e-03 eta 4:31:19 +epoch [14/50] batch [175/500] time 0.907 (0.888) data 0.000 (0.004) loss 0.9521 (1.1162) acc 78.1250 (71.9286) lr 1.7290e-03 eta 4:31:13 +epoch [14/50] batch [180/500] time 0.908 (0.888) data 0.000 (0.004) loss 1.0684 (1.1139) acc 71.8750 (72.0833) lr 1.7290e-03 eta 4:31:12 +epoch [14/50] batch [185/500] time 0.908 (0.889) data 0.000 (0.004) loss 1.1299 (1.1143) acc 71.8750 (72.0270) lr 1.7290e-03 eta 4:31:22 +epoch [14/50] batch [190/500] time 0.894 (0.889) data 0.000 (0.004) loss 1.5264 (1.1199) acc 50.0000 (71.8586) lr 1.7290e-03 eta 4:31:20 +epoch [14/50] batch [195/500] time 0.923 (0.889) data 0.000 (0.004) loss 1.2207 (1.1263) acc 68.7500 (71.7308) lr 1.7290e-03 eta 4:31:16 +epoch [14/50] batch [200/500] time 0.878 (0.889) data 0.000 (0.004) loss 0.5854 (1.1260) acc 78.1250 (71.8125) lr 1.7290e-03 eta 4:31:14 +epoch [14/50] batch [205/500] time 0.882 (0.890) data 0.000 (0.003) loss 0.7896 (1.1264) acc 81.2500 (71.8445) lr 1.7290e-03 eta 4:31:14 +epoch [14/50] batch [210/500] time 0.886 (0.889) data 0.000 (0.003) loss 0.6870 (1.1199) acc 75.0000 (71.9494) lr 1.7290e-03 eta 4:31:03 +epoch [14/50] batch [215/500] time 0.906 (0.889) data 0.000 (0.003) loss 1.1904 (1.1183) acc 68.7500 (72.0930) lr 1.7290e-03 eta 4:30:50 +epoch [14/50] batch [220/500] time 0.877 (0.888) data 0.000 (0.003) loss 1.0508 (1.1176) acc 68.7500 (72.0739) lr 1.7290e-03 eta 4:30:40 +epoch [14/50] batch [225/500] time 0.858 (0.888) data 0.000 (0.003) loss 0.9634 (1.1150) acc 78.1250 (72.1389) lr 1.7290e-03 eta 4:30:31 +epoch [14/50] batch [230/500] time 0.873 (0.888) data 0.000 (0.003) loss 1.5615 (1.1207) acc 50.0000 (72.0380) lr 1.7290e-03 eta 4:30:32 +epoch [14/50] batch [235/500] time 0.866 (0.888) data 0.000 (0.003) loss 0.8799 (1.1260) acc 78.1250 (71.9548) lr 1.7290e-03 eta 4:30:21 +epoch [14/50] batch [240/500] time 0.886 (0.888) data 0.000 (0.003) loss 1.3984 (1.1242) acc 71.8750 (72.0052) lr 1.7290e-03 eta 4:30:17 +epoch [14/50] batch [245/500] time 0.909 (0.888) data 0.000 (0.003) loss 1.1357 (1.1241) acc 71.8750 (72.0791) lr 1.7290e-03 eta 4:30:16 +epoch [14/50] batch [250/500] time 0.895 (0.888) data 0.000 (0.003) loss 1.0098 (1.1241) acc 75.0000 (72.0125) lr 1.7290e-03 eta 4:30:11 +epoch [14/50] batch [255/500] time 0.863 (0.888) data 0.000 (0.003) loss 1.1221 (1.1221) acc 68.7500 (72.0343) lr 1.7290e-03 eta 4:30:02 +epoch [14/50] batch [260/500] time 0.856 (0.888) data 0.000 (0.003) loss 0.8989 (1.1252) acc 78.1250 (72.1034) lr 1.7290e-03 eta 4:29:53 +epoch [14/50] batch [265/500] time 0.884 (0.888) data 0.000 (0.003) loss 1.1338 (1.1245) acc 68.7500 (72.0283) lr 1.7290e-03 eta 4:29:47 +epoch [14/50] batch [270/500] time 0.887 (0.888) data 0.001 (0.003) loss 1.0283 (1.1262) acc 78.1250 (72.0370) lr 1.7290e-03 eta 4:29:43 +epoch [14/50] batch [275/500] time 0.904 (0.888) data 0.000 (0.003) loss 1.2764 (1.1300) acc 75.0000 (72.0114) lr 1.7290e-03 eta 4:29:40 +epoch [14/50] batch [280/500] time 0.884 (0.888) data 0.000 (0.003) loss 1.4492 (1.1293) acc 62.5000 (72.0312) lr 1.7290e-03 eta 4:29:36 +epoch [14/50] batch [285/500] time 0.914 (0.888) data 0.000 (0.003) loss 1.9854 (1.1309) acc 59.3750 (71.9627) lr 1.7290e-03 eta 4:29:35 +epoch [14/50] batch [290/500] time 0.896 (0.888) data 0.000 (0.003) loss 1.2412 (1.1316) acc 71.8750 (72.0043) lr 1.7290e-03 eta 4:29:29 +epoch [14/50] batch [295/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.1816 (1.1344) acc 71.8750 (71.9703) lr 1.7290e-03 eta 4:29:27 +epoch [14/50] batch [300/500] time 0.892 (0.888) data 0.000 (0.002) loss 1.1621 (1.1345) acc 75.0000 (71.9688) lr 1.7290e-03 eta 4:29:26 +epoch [14/50] batch [305/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.7051 (1.1344) acc 62.5000 (72.0082) lr 1.7290e-03 eta 4:29:20 +epoch [14/50] batch [310/500] time 0.879 (0.888) data 0.000 (0.002) loss 1.3164 (1.1321) acc 71.8750 (72.0565) lr 1.7290e-03 eta 4:29:15 +epoch [14/50] batch [315/500] time 0.879 (0.888) data 0.000 (0.002) loss 1.3818 (1.1322) acc 65.6250 (72.0139) lr 1.7290e-03 eta 4:29:10 +epoch [14/50] batch [320/500] time 0.873 (0.888) data 0.000 (0.002) loss 0.8926 (1.1327) acc 81.2500 (71.9824) lr 1.7290e-03 eta 4:29:00 +epoch [14/50] batch [325/500] time 0.988 (0.888) data 0.000 (0.002) loss 1.4385 (1.1336) acc 62.5000 (71.9423) lr 1.7290e-03 eta 4:29:01 +epoch [14/50] batch [330/500] time 0.912 (0.888) data 0.000 (0.002) loss 1.2559 (1.1358) acc 78.1250 (71.9223) lr 1.7290e-03 eta 4:28:58 +epoch [14/50] batch [335/500] time 0.858 (0.888) data 0.000 (0.002) loss 0.8281 (1.1361) acc 75.0000 (71.9310) lr 1.7290e-03 eta 4:28:48 +epoch [14/50] batch [340/500] time 0.864 (0.888) data 0.000 (0.002) loss 0.8726 (1.1346) acc 75.0000 (71.9301) lr 1.7290e-03 eta 4:28:44 +epoch [14/50] batch [345/500] time 0.859 (0.888) data 0.000 (0.002) loss 1.4014 (1.1348) acc 71.8750 (71.9565) lr 1.7290e-03 eta 4:28:35 +epoch [14/50] batch [350/500] time 0.864 (0.888) data 0.000 (0.002) loss 0.7192 (1.1325) acc 78.1250 (71.9732) lr 1.7290e-03 eta 4:28:29 +epoch [14/50] batch [355/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.1592 (1.1339) acc 75.0000 (71.9278) lr 1.7290e-03 eta 4:28:22 +epoch [14/50] batch [360/500] time 0.868 (0.887) data 0.000 (0.002) loss 1.5986 (1.1337) acc 65.6250 (71.9531) lr 1.7290e-03 eta 4:28:13 +epoch [14/50] batch [365/500] time 0.901 (0.887) data 0.000 (0.002) loss 1.3672 (1.1375) acc 68.7500 (71.8750) lr 1.7290e-03 eta 4:28:06 +epoch [14/50] batch [370/500] time 0.852 (0.887) data 0.000 (0.002) loss 1.0469 (1.1386) acc 78.1250 (71.8581) lr 1.7290e-03 eta 4:28:03 +epoch [14/50] batch [375/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.3311 (1.1398) acc 71.8750 (71.9000) lr 1.7290e-03 eta 4:27:55 +epoch [14/50] batch [380/500] time 0.865 (0.887) data 0.000 (0.002) loss 1.2051 (1.1396) acc 68.7500 (71.9079) lr 1.7290e-03 eta 4:27:47 +epoch [14/50] batch [385/500] time 0.880 (0.887) data 0.000 (0.002) loss 1.0889 (1.1401) acc 75.0000 (71.9156) lr 1.7290e-03 eta 4:27:42 +epoch [14/50] batch [390/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.0127 (1.1396) acc 62.5000 (71.8590) lr 1.7290e-03 eta 4:27:39 +epoch [14/50] batch [395/500] time 0.873 (0.887) data 0.000 (0.002) loss 1.1338 (1.1419) acc 65.6250 (71.7959) lr 1.7290e-03 eta 4:27:34 +epoch [14/50] batch [400/500] time 0.873 (0.887) data 0.000 (0.002) loss 0.9961 (1.1417) acc 71.8750 (71.7891) lr 1.7290e-03 eta 4:27:29 +epoch [14/50] batch [405/500] time 0.869 (0.887) data 0.000 (0.002) loss 1.5352 (1.1433) acc 65.6250 (71.7438) lr 1.7290e-03 eta 4:27:22 +epoch [14/50] batch [410/500] time 0.880 (0.887) data 0.000 (0.002) loss 1.8066 (1.1451) acc 62.5000 (71.7149) lr 1.7290e-03 eta 4:27:18 +epoch [14/50] batch [415/500] time 0.924 (0.887) data 0.000 (0.002) loss 1.5273 (1.1473) acc 59.3750 (71.6867) lr 1.7290e-03 eta 4:27:15 +epoch [14/50] batch [420/500] time 0.848 (0.886) data 0.000 (0.002) loss 1.3867 (1.1464) acc 68.7500 (71.7113) lr 1.7290e-03 eta 4:27:07 +epoch [14/50] batch [425/500] time 0.916 (0.886) data 0.000 (0.002) loss 0.8594 (1.1447) acc 81.2500 (71.7500) lr 1.7290e-03 eta 4:27:02 +epoch [14/50] batch [430/500] time 0.870 (0.886) data 0.000 (0.002) loss 1.0186 (1.1436) acc 78.1250 (71.7660) lr 1.7290e-03 eta 4:26:55 +epoch [14/50] batch [435/500] time 0.880 (0.886) data 0.000 (0.002) loss 1.2305 (1.1396) acc 68.7500 (71.8319) lr 1.7290e-03 eta 4:26:48 +epoch [14/50] batch [440/500] time 0.886 (0.886) data 0.000 (0.002) loss 0.9258 (1.1389) acc 78.1250 (71.8466) lr 1.7290e-03 eta 4:26:43 +epoch [14/50] batch [445/500] time 0.887 (0.886) data 0.000 (0.002) loss 0.9053 (1.1351) acc 75.0000 (71.8890) lr 1.7290e-03 eta 4:26:36 +epoch [14/50] batch [450/500] time 0.893 (0.886) data 0.000 (0.002) loss 0.9126 (1.1370) acc 81.2500 (71.8750) lr 1.7290e-03 eta 4:26:36 +epoch [14/50] batch [455/500] time 0.879 (0.886) data 0.000 (0.002) loss 1.1621 (1.1396) acc 75.0000 (71.8681) lr 1.7290e-03 eta 4:26:32 +epoch [14/50] batch [460/500] time 0.864 (0.886) data 0.000 (0.002) loss 1.4639 (1.1429) acc 68.7500 (71.8546) lr 1.7290e-03 eta 4:26:26 +epoch [14/50] batch [465/500] time 0.865 (0.886) data 0.000 (0.002) loss 0.8521 (1.1431) acc 81.2500 (71.8817) lr 1.7290e-03 eta 4:26:19 +epoch [14/50] batch [470/500] time 0.854 (0.886) data 0.000 (0.002) loss 0.9087 (1.1426) acc 75.0000 (71.8949) lr 1.7290e-03 eta 4:26:15 +epoch [14/50] batch [475/500] time 0.884 (0.886) data 0.000 (0.002) loss 1.2197 (1.1418) acc 75.0000 (71.9145) lr 1.7290e-03 eta 4:26:11 +epoch [14/50] batch [480/500] time 0.876 (0.886) data 0.000 (0.002) loss 1.3936 (1.1442) acc 68.7500 (71.8815) lr 1.7290e-03 eta 4:26:08 +epoch [14/50] batch [485/500] time 0.867 (0.886) data 0.000 (0.002) loss 1.3809 (1.1447) acc 68.7500 (71.8557) lr 1.7290e-03 eta 4:26:03 +epoch [14/50] batch [490/500] time 0.889 (0.886) data 0.000 (0.002) loss 0.9058 (1.1446) acc 75.0000 (71.8622) lr 1.7290e-03 eta 4:25:58 +epoch [14/50] batch [495/500] time 0.892 (0.886) data 0.000 (0.002) loss 1.7500 (1.1470) acc 59.3750 (71.7803) lr 1.7290e-03 eta 4:25:55 +epoch [14/50] batch [500/500] time 0.895 (0.886) data 0.000 (0.002) loss 1.2041 (1.1488) acc 65.6250 (71.7250) lr 1.6845e-03 eta 4:25:51 +epoch [15/50] batch [5/500] time 0.865 (1.024) data 0.000 (0.140) loss 1.2451 (1.2259) acc 65.6250 (68.1250) lr 1.6845e-03 eta 5:07:03 +epoch [15/50] batch [10/500] time 0.851 (0.949) data 0.000 (0.070) loss 0.7695 (1.1850) acc 71.8750 (67.8125) lr 1.6845e-03 eta 4:44:23 +epoch [15/50] batch [15/500] time 0.884 (0.931) data 0.000 (0.047) loss 0.8750 (1.1619) acc 62.5000 (69.7917) lr 1.6845e-03 eta 4:38:55 +epoch [15/50] batch [20/500] time 0.896 (0.925) data 0.000 (0.035) loss 1.0576 (1.2293) acc 68.7500 (69.5312) lr 1.6845e-03 eta 4:37:10 +epoch [15/50] batch [25/500] time 0.916 (0.920) data 0.000 (0.028) loss 0.9185 (1.2046) acc 81.2500 (70.7500) lr 1.6845e-03 eta 4:35:38 +epoch [15/50] batch [30/500] time 0.908 (0.916) data 0.000 (0.024) loss 1.5947 (1.1916) acc 71.8750 (71.2500) lr 1.6845e-03 eta 4:34:14 +epoch [15/50] batch [35/500] time 0.884 (0.911) data 0.000 (0.020) loss 1.3115 (1.1815) acc 68.7500 (71.4286) lr 1.6845e-03 eta 4:32:54 +epoch [15/50] batch [40/500] time 0.903 (0.910) data 0.000 (0.018) loss 0.7104 (1.1444) acc 84.3750 (72.1875) lr 1.6845e-03 eta 4:32:22 +epoch [15/50] batch [45/500] time 0.873 (0.908) data 0.000 (0.016) loss 0.6782 (1.1250) acc 78.1250 (72.5694) lr 1.6845e-03 eta 4:31:52 +epoch [15/50] batch [50/500] time 0.866 (0.904) data 0.000 (0.014) loss 1.4814 (1.1344) acc 62.5000 (72.4375) lr 1.6845e-03 eta 4:30:24 +epoch [15/50] batch [55/500] time 0.883 (0.902) data 0.000 (0.013) loss 1.9951 (1.1645) acc 50.0000 (71.8182) lr 1.6845e-03 eta 4:29:50 +epoch [15/50] batch [60/500] time 1.006 (0.903) data 0.000 (0.012) loss 0.9707 (1.1579) acc 71.8750 (71.8229) lr 1.6845e-03 eta 4:30:07 +epoch [15/50] batch [65/500] time 0.888 (0.901) data 0.000 (0.011) loss 0.7886 (1.1467) acc 84.3750 (71.9231) lr 1.6845e-03 eta 4:29:18 +epoch [15/50] batch [70/500] time 0.851 (0.899) data 0.000 (0.010) loss 1.1514 (1.1442) acc 84.3750 (72.2321) lr 1.6845e-03 eta 4:28:43 +epoch [15/50] batch [75/500] time 0.850 (0.897) data 0.000 (0.010) loss 0.8784 (1.1332) acc 68.7500 (72.4583) lr 1.6845e-03 eta 4:28:05 +epoch [15/50] batch [80/500] time 0.904 (0.896) data 0.000 (0.009) loss 1.4590 (1.1358) acc 59.3750 (72.4219) lr 1.6845e-03 eta 4:27:35 +epoch [15/50] batch [85/500] time 0.905 (0.896) data 0.000 (0.008) loss 1.0059 (1.1264) acc 78.1250 (72.6103) lr 1.6845e-03 eta 4:27:35 +epoch [15/50] batch [90/500] time 0.880 (0.896) data 0.000 (0.008) loss 1.5029 (1.1317) acc 65.6250 (72.3958) lr 1.6845e-03 eta 4:27:22 +epoch [15/50] batch [95/500] time 0.895 (0.895) data 0.000 (0.008) loss 0.9722 (1.1295) acc 71.8750 (72.5658) lr 1.6845e-03 eta 4:27:12 +epoch [15/50] batch [100/500] time 0.857 (0.894) data 0.000 (0.007) loss 0.9971 (1.1297) acc 78.1250 (72.5000) lr 1.6845e-03 eta 4:26:40 +epoch [15/50] batch [105/500] time 0.858 (0.893) data 0.000 (0.007) loss 1.1924 (1.1227) acc 65.6250 (72.6190) lr 1.6845e-03 eta 4:26:25 +epoch [15/50] batch [110/500] time 0.876 (0.893) data 0.000 (0.007) loss 1.5996 (1.1350) acc 56.2500 (72.1875) lr 1.6845e-03 eta 4:26:09 +epoch [15/50] batch [115/500] time 0.878 (0.892) data 0.000 (0.006) loss 1.5186 (1.1311) acc 65.6250 (72.2826) lr 1.6845e-03 eta 4:25:50 +epoch [15/50] batch [120/500] time 0.888 (0.892) data 0.000 (0.006) loss 1.3359 (1.1288) acc 68.7500 (72.2656) lr 1.6845e-03 eta 4:25:41 +epoch [15/50] batch [125/500] time 0.847 (0.890) data 0.000 (0.006) loss 1.3789 (1.1286) acc 65.6250 (72.2250) lr 1.6845e-03 eta 4:25:17 +epoch [15/50] batch [130/500] time 0.896 (0.891) data 0.000 (0.006) loss 1.9531 (1.1350) acc 62.5000 (72.0433) lr 1.6845e-03 eta 4:25:22 +epoch [15/50] batch [135/500] time 0.897 (0.891) data 0.000 (0.005) loss 1.0430 (1.1350) acc 75.0000 (71.9907) lr 1.6845e-03 eta 4:25:13 +epoch [15/50] batch [140/500] time 0.878 (0.890) data 0.000 (0.005) loss 0.7632 (1.1357) acc 81.2500 (72.0312) lr 1.6845e-03 eta 4:25:00 +epoch [15/50] batch [145/500] time 0.879 (0.890) data 0.000 (0.005) loss 1.4355 (1.1446) acc 59.3750 (71.8319) lr 1.6845e-03 eta 4:24:53 +epoch [15/50] batch [150/500] time 0.895 (0.890) data 0.000 (0.005) loss 1.4795 (1.1483) acc 59.3750 (71.7083) lr 1.6845e-03 eta 4:24:49 +epoch [15/50] batch [155/500] time 0.871 (0.890) data 0.000 (0.005) loss 0.8638 (1.1471) acc 87.5000 (71.7540) lr 1.6845e-03 eta 4:24:39 +epoch [15/50] batch [160/500] time 0.892 (0.890) data 0.000 (0.005) loss 0.8394 (1.1459) acc 78.1250 (71.8359) lr 1.6845e-03 eta 4:24:44 +epoch [15/50] batch [165/500] time 0.882 (0.890) data 0.000 (0.004) loss 1.1309 (1.1436) acc 78.1250 (71.9508) lr 1.6845e-03 eta 4:24:37 +epoch [15/50] batch [170/500] time 0.899 (0.890) data 0.000 (0.004) loss 1.3662 (1.1417) acc 65.6250 (72.0588) lr 1.6845e-03 eta 4:24:21 +epoch [15/50] batch [175/500] time 0.865 (0.889) data 0.000 (0.004) loss 0.7583 (1.1371) acc 84.3750 (72.2143) lr 1.6845e-03 eta 4:24:11 +epoch [15/50] batch [180/500] time 0.880 (0.889) data 0.000 (0.004) loss 1.6904 (1.1412) acc 59.3750 (72.0486) lr 1.6845e-03 eta 4:24:01 +epoch [15/50] batch [185/500] time 0.883 (0.889) data 0.000 (0.004) loss 1.0752 (1.1406) acc 71.8750 (71.9426) lr 1.6845e-03 eta 4:23:53 +epoch [15/50] batch [190/500] time 0.881 (0.889) data 0.000 (0.004) loss 1.9600 (1.1456) acc 59.3750 (71.9243) lr 1.6845e-03 eta 4:23:50 +epoch [15/50] batch [195/500] time 0.904 (0.889) data 0.000 (0.004) loss 1.3672 (1.1444) acc 68.7500 (71.8910) lr 1.6845e-03 eta 4:23:48 +epoch [15/50] batch [200/500] time 0.920 (0.889) data 0.000 (0.004) loss 1.1279 (1.1448) acc 75.0000 (71.9219) lr 1.6845e-03 eta 4:23:45 +epoch [15/50] batch [205/500] time 0.871 (0.889) data 0.000 (0.004) loss 0.9346 (1.1456) acc 71.8750 (71.9055) lr 1.6845e-03 eta 4:23:38 +epoch [15/50] batch [210/500] time 0.920 (0.889) data 0.000 (0.004) loss 0.8188 (1.1476) acc 75.0000 (71.9196) lr 1.6845e-03 eta 4:23:32 +epoch [15/50] batch [215/500] time 0.891 (0.889) data 0.000 (0.003) loss 1.3213 (1.1479) acc 68.7500 (71.9186) lr 1.6845e-03 eta 4:23:29 +epoch [15/50] batch [220/500] time 0.894 (0.889) data 0.000 (0.003) loss 1.4863 (1.1509) acc 65.6250 (71.8750) lr 1.6845e-03 eta 4:23:20 +epoch [15/50] batch [225/500] time 0.868 (0.888) data 0.000 (0.003) loss 1.1611 (1.1512) acc 75.0000 (71.9028) lr 1.6845e-03 eta 4:23:13 +epoch [15/50] batch [230/500] time 0.888 (0.888) data 0.000 (0.003) loss 1.2002 (1.1485) acc 65.6250 (71.9293) lr 1.6845e-03 eta 4:23:05 +epoch [15/50] batch [235/500] time 0.908 (0.888) data 0.000 (0.003) loss 0.8047 (1.1433) acc 65.6250 (71.9282) lr 1.6845e-03 eta 4:23:00 +epoch [15/50] batch [240/500] time 0.885 (0.888) data 0.000 (0.003) loss 1.3779 (1.1461) acc 68.7500 (71.7578) lr 1.6845e-03 eta 4:22:56 +epoch [15/50] batch [245/500] time 0.892 (0.888) data 0.000 (0.003) loss 0.9922 (1.1431) acc 68.7500 (71.8495) lr 1.6845e-03 eta 4:22:47 +epoch [15/50] batch [250/500] time 0.884 (0.888) data 0.000 (0.003) loss 0.8687 (1.1446) acc 87.5000 (71.7625) lr 1.6845e-03 eta 4:22:40 +epoch [15/50] batch [255/500] time 0.885 (0.888) data 0.000 (0.003) loss 1.7871 (1.1474) acc 65.6250 (71.7157) lr 1.6845e-03 eta 4:22:31 +epoch [15/50] batch [260/500] time 0.887 (0.888) data 0.000 (0.003) loss 1.6387 (1.1474) acc 71.8750 (71.6827) lr 1.6845e-03 eta 4:22:27 +epoch [15/50] batch [265/500] time 0.881 (0.888) data 0.000 (0.003) loss 1.0098 (1.1470) acc 75.0000 (71.6863) lr 1.6845e-03 eta 4:22:20 +epoch [15/50] batch [270/500] time 0.884 (0.887) data 0.000 (0.003) loss 1.5098 (1.1445) acc 68.7500 (71.6782) lr 1.6845e-03 eta 4:22:13 +epoch [15/50] batch [275/500] time 0.889 (0.887) data 0.000 (0.003) loss 1.2363 (1.1426) acc 62.5000 (71.6591) lr 1.6845e-03 eta 4:22:04 +epoch [15/50] batch [280/500] time 0.883 (0.887) data 0.000 (0.003) loss 1.3564 (1.1425) acc 78.1250 (71.7299) lr 1.6845e-03 eta 4:21:55 +epoch [15/50] batch [285/500] time 0.858 (0.887) data 0.000 (0.003) loss 0.7715 (1.1404) acc 84.3750 (71.7763) lr 1.6845e-03 eta 4:21:47 +epoch [15/50] batch [290/500] time 0.849 (0.886) data 0.000 (0.003) loss 1.7441 (1.1406) acc 65.6250 (71.7672) lr 1.6845e-03 eta 4:21:33 +epoch [15/50] batch [295/500] time 0.872 (0.886) data 0.000 (0.003) loss 1.0381 (1.1415) acc 68.7500 (71.7055) lr 1.6845e-03 eta 4:21:28 +epoch [15/50] batch [300/500] time 0.858 (0.886) data 0.000 (0.003) loss 1.0215 (1.1390) acc 71.8750 (71.8021) lr 1.6845e-03 eta 4:21:23 +epoch [15/50] batch [305/500] time 0.883 (0.886) data 0.000 (0.003) loss 0.7236 (1.1349) acc 81.2500 (71.9262) lr 1.6845e-03 eta 4:21:24 +epoch [15/50] batch [310/500] time 0.870 (0.886) data 0.000 (0.002) loss 0.8120 (1.1309) acc 84.3750 (71.9859) lr 1.6845e-03 eta 4:21:18 +epoch [15/50] batch [315/500] time 0.898 (0.886) data 0.000 (0.002) loss 1.2803 (1.1323) acc 75.0000 (71.9940) lr 1.6845e-03 eta 4:21:16 +epoch [15/50] batch [320/500] time 0.870 (0.886) data 0.000 (0.002) loss 1.2041 (1.1328) acc 59.3750 (71.9238) lr 1.6845e-03 eta 4:21:06 +epoch [15/50] batch [325/500] time 0.894 (0.886) data 0.000 (0.002) loss 1.5986 (1.1363) acc 65.6250 (71.8846) lr 1.6845e-03 eta 4:21:05 +epoch [15/50] batch [330/500] time 0.868 (0.886) data 0.000 (0.002) loss 0.9814 (1.1361) acc 75.0000 (71.8750) lr 1.6845e-03 eta 4:20:58 +epoch [15/50] batch [335/500] time 0.909 (0.886) data 0.000 (0.002) loss 1.2871 (1.1386) acc 68.7500 (71.8004) lr 1.6845e-03 eta 4:20:54 +epoch [15/50] batch [340/500] time 0.908 (0.886) data 0.000 (0.002) loss 1.1924 (1.1381) acc 75.0000 (71.7923) lr 1.6845e-03 eta 4:20:52 +epoch [15/50] batch [345/500] time 0.892 (0.886) data 0.000 (0.002) loss 1.2920 (1.1410) acc 62.5000 (71.7120) lr 1.6845e-03 eta 4:20:48 +epoch [15/50] batch [350/500] time 0.892 (0.887) data 0.000 (0.002) loss 0.8862 (1.1415) acc 75.0000 (71.7321) lr 1.6845e-03 eta 4:20:47 +epoch [15/50] batch [355/500] time 0.914 (0.887) data 0.000 (0.002) loss 1.1748 (1.1392) acc 71.8750 (71.7870) lr 1.6845e-03 eta 4:20:46 +epoch [15/50] batch [360/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.6670 (1.1404) acc 56.2500 (71.7101) lr 1.6845e-03 eta 4:20:45 +epoch [15/50] batch [365/500] time 0.880 (0.887) data 0.000 (0.002) loss 0.9932 (1.1377) acc 75.0000 (71.7808) lr 1.6845e-03 eta 4:20:43 +epoch [15/50] batch [370/500] time 0.874 (0.887) data 0.000 (0.002) loss 1.1729 (1.1382) acc 71.8750 (71.7314) lr 1.6845e-03 eta 4:20:39 +epoch [15/50] batch [375/500] time 0.864 (0.887) data 0.000 (0.002) loss 1.4990 (1.1406) acc 68.7500 (71.7083) lr 1.6845e-03 eta 4:20:36 +epoch [15/50] batch [380/500] time 0.872 (0.887) data 0.000 (0.002) loss 0.7295 (1.1410) acc 81.2500 (71.6776) lr 1.6845e-03 eta 4:20:31 +epoch [15/50] batch [385/500] time 0.865 (0.887) data 0.000 (0.002) loss 0.8076 (1.1404) acc 81.2500 (71.6640) lr 1.6845e-03 eta 4:20:25 +epoch [15/50] batch [390/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.2832 (1.1420) acc 71.8750 (71.6186) lr 1.6845e-03 eta 4:20:19 +epoch [15/50] batch [395/500] time 0.909 (0.887) data 0.000 (0.002) loss 1.4756 (1.1414) acc 59.3750 (71.5981) lr 1.6845e-03 eta 4:20:14 +epoch [15/50] batch [400/500] time 0.873 (0.887) data 0.000 (0.002) loss 0.7236 (1.1404) acc 81.2500 (71.6172) lr 1.6845e-03 eta 4:20:07 +epoch [15/50] batch [405/500] time 0.850 (0.887) data 0.000 (0.002) loss 1.7930 (1.1415) acc 62.5000 (71.5741) lr 1.6845e-03 eta 4:20:00 +epoch [15/50] batch [410/500] time 0.862 (0.887) data 0.000 (0.002) loss 1.3506 (1.1433) acc 68.7500 (71.5625) lr 1.6845e-03 eta 4:19:54 +epoch [15/50] batch [415/500] time 0.867 (0.887) data 0.000 (0.002) loss 1.2178 (1.1428) acc 75.0000 (71.5889) lr 1.6845e-03 eta 4:19:49 +epoch [15/50] batch [420/500] time 0.884 (0.886) data 0.000 (0.002) loss 2.0195 (1.1440) acc 43.7500 (71.5327) lr 1.6845e-03 eta 4:19:42 +epoch [15/50] batch [425/500] time 0.857 (0.886) data 0.000 (0.002) loss 1.3057 (1.1448) acc 62.5000 (71.4926) lr 1.6845e-03 eta 4:19:37 +epoch [15/50] batch [430/500] time 0.896 (0.886) data 0.000 (0.002) loss 0.7202 (1.1449) acc 81.2500 (71.4826) lr 1.6845e-03 eta 4:19:33 +epoch [15/50] batch [435/500] time 0.896 (0.886) data 0.000 (0.002) loss 1.5020 (1.1493) acc 65.6250 (71.4296) lr 1.6845e-03 eta 4:19:28 +epoch [15/50] batch [440/500] time 0.908 (0.887) data 0.000 (0.002) loss 0.9922 (1.1501) acc 78.1250 (71.3849) lr 1.6845e-03 eta 4:19:29 +epoch [15/50] batch [445/500] time 0.994 (0.887) data 0.000 (0.002) loss 0.8604 (1.1488) acc 75.0000 (71.4115) lr 1.6845e-03 eta 4:19:27 +epoch [15/50] batch [450/500] time 0.890 (0.887) data 0.000 (0.002) loss 1.1895 (1.1476) acc 71.8750 (71.4444) lr 1.6845e-03 eta 4:19:19 +epoch [15/50] batch [455/500] time 0.931 (0.887) data 0.000 (0.002) loss 1.1221 (1.1485) acc 71.8750 (71.4354) lr 1.6845e-03 eta 4:19:15 +epoch [15/50] batch [460/500] time 0.928 (0.887) data 0.000 (0.002) loss 1.0312 (1.1468) acc 75.0000 (71.4810) lr 1.6845e-03 eta 4:19:12 +epoch [15/50] batch [465/500] time 0.899 (0.887) data 0.000 (0.002) loss 1.2188 (1.1498) acc 68.7500 (71.4651) lr 1.6845e-03 eta 4:19:10 +epoch [15/50] batch [470/500] time 0.880 (0.887) data 0.000 (0.002) loss 1.3379 (1.1496) acc 75.0000 (71.4827) lr 1.6845e-03 eta 4:19:06 +epoch [15/50] batch [475/500] time 0.901 (0.887) data 0.000 (0.002) loss 2.1250 (1.1494) acc 59.3750 (71.5066) lr 1.6845e-03 eta 4:19:04 +epoch [15/50] batch [480/500] time 0.914 (0.887) data 0.000 (0.002) loss 1.6582 (1.1498) acc 59.3750 (71.4909) lr 1.6845e-03 eta 4:19:01 +epoch [15/50] batch [485/500] time 0.896 (0.887) data 0.001 (0.002) loss 0.9429 (1.1490) acc 71.8750 (71.5013) lr 1.6845e-03 eta 4:18:57 +epoch [15/50] batch [490/500] time 0.903 (0.887) data 0.000 (0.002) loss 1.3838 (1.1496) acc 65.6250 (71.5179) lr 1.6845e-03 eta 4:18:57 +epoch [15/50] batch [495/500] time 0.906 (0.887) data 0.000 (0.002) loss 1.1162 (1.1499) acc 71.8750 (71.5025) lr 1.6845e-03 eta 4:18:53 +epoch [15/50] batch [500/500] time 0.902 (0.887) data 0.000 (0.002) loss 0.8252 (1.1474) acc 81.2500 (71.5563) lr 1.6374e-03 eta 4:18:49 +epoch [16/50] batch [5/500] time 0.877 (1.034) data 0.000 (0.144) loss 1.5176 (1.1437) acc 68.7500 (71.8750) lr 1.6374e-03 eta 5:01:32 +epoch [16/50] batch [10/500] time 0.865 (0.958) data 0.000 (0.072) loss 1.8330 (1.3104) acc 59.3750 (69.3750) lr 1.6374e-03 eta 4:39:19 +epoch [16/50] batch [15/500] time 0.868 (0.926) data 0.000 (0.048) loss 1.1973 (1.3141) acc 65.6250 (68.1250) lr 1.6374e-03 eta 4:29:59 +epoch [16/50] batch [20/500] time 0.907 (0.916) data 0.000 (0.036) loss 0.8315 (1.2470) acc 75.0000 (70.0000) lr 1.6374e-03 eta 4:26:54 +epoch [16/50] batch [25/500] time 0.884 (0.910) data 0.000 (0.029) loss 0.7734 (1.1986) acc 84.3750 (71.5000) lr 1.6374e-03 eta 4:25:08 +epoch [16/50] batch [30/500] time 0.867 (0.906) data 0.000 (0.024) loss 1.1621 (1.1942) acc 68.7500 (71.4583) lr 1.6374e-03 eta 4:23:43 +epoch [16/50] batch [35/500] time 0.887 (0.906) data 0.000 (0.021) loss 1.6738 (1.1881) acc 59.3750 (71.6964) lr 1.6374e-03 eta 4:23:42 +epoch [16/50] batch [40/500] time 0.886 (0.902) data 0.000 (0.018) loss 0.9487 (1.2126) acc 78.1250 (71.2500) lr 1.6374e-03 eta 4:22:31 +epoch [16/50] batch [45/500] time 0.893 (0.899) data 0.000 (0.016) loss 1.7061 (1.2054) acc 50.0000 (71.4583) lr 1.6374e-03 eta 4:21:27 +epoch [16/50] batch [50/500] time 0.891 (0.897) data 0.000 (0.015) loss 0.9722 (1.1998) acc 65.6250 (71.4375) lr 1.6374e-03 eta 4:20:53 +epoch [16/50] batch [55/500] time 0.859 (0.894) data 0.000 (0.013) loss 1.0879 (1.1895) acc 71.8750 (71.5341) lr 1.6374e-03 eta 4:19:53 +epoch [16/50] batch [60/500] time 0.897 (0.893) data 0.000 (0.012) loss 1.2461 (1.1698) acc 75.0000 (71.8750) lr 1.6374e-03 eta 4:19:27 +epoch [16/50] batch [65/500] time 0.861 (0.892) data 0.000 (0.011) loss 1.7139 (1.1895) acc 71.8750 (71.6346) lr 1.6374e-03 eta 4:19:12 +epoch [16/50] batch [70/500] time 0.883 (0.892) data 0.000 (0.011) loss 0.4963 (1.1642) acc 81.2500 (72.0536) lr 1.6374e-03 eta 4:19:02 +epoch [16/50] batch [75/500] time 0.904 (0.892) data 0.000 (0.010) loss 0.6108 (1.1501) acc 78.1250 (72.2083) lr 1.6374e-03 eta 4:18:59 +epoch [16/50] batch [80/500] time 0.862 (0.891) data 0.000 (0.009) loss 1.3291 (1.1487) acc 71.8750 (72.3047) lr 1.6374e-03 eta 4:18:33 +epoch [16/50] batch [85/500] time 0.886 (0.890) data 0.000 (0.009) loss 0.9819 (1.1546) acc 78.1250 (72.2426) lr 1.6374e-03 eta 4:18:23 +epoch [16/50] batch [90/500] time 0.882 (0.889) data 0.000 (0.008) loss 2.0176 (1.1734) acc 59.3750 (71.9444) lr 1.6374e-03 eta 4:18:03 +epoch [16/50] batch [95/500] time 0.924 (0.889) data 0.000 (0.008) loss 1.5830 (1.1594) acc 62.5000 (72.2697) lr 1.6374e-03 eta 4:17:59 +epoch [16/50] batch [100/500] time 0.883 (0.890) data 0.000 (0.007) loss 1.0840 (1.1534) acc 81.2500 (72.2500) lr 1.6374e-03 eta 4:18:09 +epoch [16/50] batch [105/500] time 0.911 (0.890) data 0.000 (0.007) loss 1.4531 (1.1539) acc 53.1250 (72.2321) lr 1.6374e-03 eta 4:18:07 +epoch [16/50] batch [110/500] time 0.907 (0.891) data 0.000 (0.007) loss 1.6318 (1.1558) acc 68.7500 (72.3864) lr 1.6374e-03 eta 4:18:10 +epoch [16/50] batch [115/500] time 0.911 (0.891) data 0.000 (0.007) loss 0.7749 (1.1540) acc 84.3750 (72.2554) lr 1.6374e-03 eta 4:18:08 +epoch [16/50] batch [120/500] time 0.890 (0.890) data 0.000 (0.006) loss 0.9253 (1.1563) acc 68.7500 (72.0833) lr 1.6374e-03 eta 4:17:53 +epoch [16/50] batch [125/500] time 0.873 (0.890) data 0.000 (0.006) loss 1.2754 (1.1655) acc 65.6250 (71.9250) lr 1.6374e-03 eta 4:17:41 +epoch [16/50] batch [130/500] time 0.859 (0.890) data 0.000 (0.006) loss 1.1572 (1.1608) acc 75.0000 (71.8269) lr 1.6374e-03 eta 4:17:31 +epoch [16/50] batch [135/500] time 0.879 (0.890) data 0.000 (0.006) loss 1.4121 (1.1683) acc 65.6250 (71.5278) lr 1.6374e-03 eta 4:17:40 +epoch [16/50] batch [140/500] time 0.869 (0.890) data 0.000 (0.005) loss 1.8936 (1.1711) acc 53.1250 (71.3616) lr 1.6374e-03 eta 4:17:25 +epoch [16/50] batch [145/500] time 0.896 (0.890) data 0.000 (0.005) loss 1.1318 (1.1708) acc 65.6250 (71.2716) lr 1.6374e-03 eta 4:17:19 +epoch [16/50] batch [150/500] time 0.858 (0.889) data 0.000 (0.005) loss 0.6523 (1.1588) acc 87.5000 (71.5625) lr 1.6374e-03 eta 4:16:56 +epoch [16/50] batch [155/500] time 0.881 (0.888) data 0.000 (0.005) loss 1.0791 (1.1588) acc 81.2500 (71.6129) lr 1.6374e-03 eta 4:16:45 +epoch [16/50] batch [160/500] time 0.876 (0.888) data 0.000 (0.005) loss 1.0117 (1.1558) acc 75.0000 (71.6797) lr 1.6374e-03 eta 4:16:36 +epoch [16/50] batch [165/500] time 0.895 (0.888) data 0.000 (0.005) loss 1.0225 (1.1570) acc 75.0000 (71.7235) lr 1.6374e-03 eta 4:16:32 +epoch [16/50] batch [170/500] time 0.872 (0.888) data 0.000 (0.004) loss 1.6074 (1.1580) acc 62.5000 (71.6360) lr 1.6374e-03 eta 4:16:25 +epoch [16/50] batch [175/500] time 0.895 (0.888) data 0.000 (0.004) loss 1.1006 (1.1557) acc 65.6250 (71.7143) lr 1.6374e-03 eta 4:16:19 +epoch [16/50] batch [180/500] time 0.908 (0.888) data 0.000 (0.004) loss 1.3467 (1.1588) acc 62.5000 (71.6667) lr 1.6374e-03 eta 4:16:22 +epoch [16/50] batch [185/500] time 0.864 (0.888) data 0.000 (0.004) loss 0.6069 (1.1577) acc 78.1250 (71.6554) lr 1.6374e-03 eta 4:16:19 +epoch [16/50] batch [190/500] time 0.914 (0.888) data 0.000 (0.004) loss 1.1611 (1.1552) acc 84.3750 (71.7434) lr 1.6374e-03 eta 4:16:15 +epoch [16/50] batch [195/500] time 0.931 (0.888) data 0.000 (0.004) loss 1.2607 (1.1548) acc 68.7500 (71.6827) lr 1.6374e-03 eta 4:16:11 +epoch [16/50] batch [200/500] time 0.880 (0.888) data 0.000 (0.004) loss 1.2070 (1.1536) acc 65.6250 (71.6562) lr 1.6374e-03 eta 4:16:03 +epoch [16/50] batch [205/500] time 0.871 (0.888) data 0.000 (0.004) loss 0.9736 (1.1518) acc 75.0000 (71.6159) lr 1.6374e-03 eta 4:15:59 +epoch [16/50] batch [210/500] time 0.882 (0.888) data 0.000 (0.004) loss 1.2119 (1.1494) acc 78.1250 (71.6518) lr 1.6374e-03 eta 4:15:53 +epoch [16/50] batch [215/500] time 0.882 (0.888) data 0.000 (0.004) loss 1.5938 (1.1606) acc 53.1250 (71.4535) lr 1.6374e-03 eta 4:15:45 +epoch [16/50] batch [220/500] time 0.871 (0.888) data 0.000 (0.004) loss 0.9824 (1.1606) acc 71.8750 (71.4062) lr 1.6374e-03 eta 4:15:37 +epoch [16/50] batch [225/500] time 0.885 (0.887) data 0.000 (0.003) loss 1.0791 (1.1577) acc 62.5000 (71.4861) lr 1.6374e-03 eta 4:15:30 +epoch [16/50] batch [230/500] time 0.860 (0.887) data 0.000 (0.003) loss 0.7959 (1.1490) acc 81.2500 (71.6984) lr 1.6374e-03 eta 4:15:24 +epoch [16/50] batch [235/500] time 0.887 (0.887) data 0.000 (0.003) loss 1.2422 (1.1469) acc 65.6250 (71.6888) lr 1.6374e-03 eta 4:15:14 +epoch [16/50] batch [240/500] time 0.880 (0.887) data 0.000 (0.003) loss 1.0850 (1.1418) acc 65.6250 (71.7448) lr 1.6374e-03 eta 4:15:10 +epoch [16/50] batch [245/500] time 0.864 (0.887) data 0.000 (0.003) loss 1.4639 (1.1460) acc 62.5000 (71.7092) lr 1.6374e-03 eta 4:15:04 +epoch [16/50] batch [250/500] time 0.891 (0.887) data 0.000 (0.003) loss 1.1133 (1.1533) acc 68.7500 (71.5500) lr 1.6374e-03 eta 4:14:55 +epoch [16/50] batch [255/500] time 0.879 (0.886) data 0.000 (0.003) loss 1.1162 (1.1519) acc 56.2500 (71.5196) lr 1.6374e-03 eta 4:14:46 +epoch [16/50] batch [260/500] time 0.847 (0.886) data 0.000 (0.003) loss 0.9106 (1.1486) acc 78.1250 (71.6106) lr 1.6374e-03 eta 4:14:36 +epoch [16/50] batch [265/500] time 0.900 (0.886) data 0.000 (0.003) loss 0.7222 (1.1447) acc 75.0000 (71.6981) lr 1.6374e-03 eta 4:14:31 +epoch [16/50] batch [270/500] time 0.883 (0.886) data 0.000 (0.003) loss 1.0088 (1.1464) acc 78.1250 (71.6667) lr 1.6374e-03 eta 4:14:22 +epoch [16/50] batch [275/500] time 0.982 (0.886) data 0.000 (0.003) loss 0.7314 (1.1445) acc 78.1250 (71.7273) lr 1.6374e-03 eta 4:14:22 +epoch [16/50] batch [280/500] time 0.891 (0.886) data 0.000 (0.003) loss 1.5166 (1.1424) acc 53.1250 (71.6406) lr 1.6374e-03 eta 4:14:19 +epoch [16/50] batch [285/500] time 0.874 (0.886) data 0.000 (0.003) loss 1.7168 (1.1456) acc 53.1250 (71.5461) lr 1.6374e-03 eta 4:14:17 +epoch [16/50] batch [290/500] time 0.897 (0.886) data 0.000 (0.003) loss 1.5430 (1.1512) acc 56.2500 (71.3685) lr 1.6374e-03 eta 4:14:13 +epoch [16/50] batch [295/500] time 0.859 (0.886) data 0.000 (0.003) loss 0.9038 (1.1507) acc 71.8750 (71.4089) lr 1.6374e-03 eta 4:14:06 +epoch [16/50] batch [300/500] time 0.870 (0.886) data 0.000 (0.003) loss 1.3223 (1.1511) acc 81.2500 (71.4062) lr 1.6374e-03 eta 4:14:00 +epoch [16/50] batch [305/500] time 0.915 (0.886) data 0.000 (0.003) loss 1.0488 (1.1482) acc 71.8750 (71.4549) lr 1.6374e-03 eta 4:13:56 +epoch [16/50] batch [310/500] time 0.875 (0.886) data 0.000 (0.003) loss 1.5273 (1.1493) acc 65.6250 (71.4617) lr 1.6374e-03 eta 4:13:55 +epoch [16/50] batch [315/500] time 0.894 (0.886) data 0.000 (0.003) loss 0.6606 (1.1470) acc 75.0000 (71.4683) lr 1.6374e-03 eta 4:13:47 +epoch [16/50] batch [320/500] time 0.874 (0.887) data 0.000 (0.002) loss 0.7886 (1.1432) acc 78.1250 (71.4453) lr 1.6374e-03 eta 4:13:52 +epoch [16/50] batch [325/500] time 0.887 (0.887) data 0.000 (0.002) loss 1.1191 (1.1440) acc 81.2500 (71.3942) lr 1.6374e-03 eta 4:13:47 +epoch [16/50] batch [330/500] time 0.876 (0.887) data 0.000 (0.002) loss 1.2090 (1.1405) acc 68.7500 (71.4962) lr 1.6374e-03 eta 4:13:46 +epoch [16/50] batch [335/500] time 0.910 (0.887) data 0.000 (0.002) loss 1.6230 (1.1412) acc 59.3750 (71.5019) lr 1.6374e-03 eta 4:13:42 +epoch [16/50] batch [340/500] time 0.867 (0.887) data 0.000 (0.002) loss 1.4521 (1.1446) acc 62.5000 (71.5074) lr 1.6374e-03 eta 4:13:38 +epoch [16/50] batch [345/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.3115 (1.1468) acc 68.7500 (71.4764) lr 1.6374e-03 eta 4:13:37 +epoch [16/50] batch [350/500] time 0.881 (0.887) data 0.000 (0.002) loss 0.9023 (1.1452) acc 84.3750 (71.5536) lr 1.6374e-03 eta 4:13:32 +epoch [16/50] batch [355/500] time 0.898 (0.887) data 0.000 (0.002) loss 0.8364 (1.1420) acc 78.1250 (71.6813) lr 1.6374e-03 eta 4:13:25 +epoch [16/50] batch [360/500] time 0.870 (0.887) data 0.000 (0.002) loss 0.9790 (1.1414) acc 75.0000 (71.7014) lr 1.6374e-03 eta 4:13:21 +epoch [16/50] batch [365/500] time 0.878 (0.887) data 0.000 (0.002) loss 0.5317 (1.1429) acc 87.5000 (71.6952) lr 1.6374e-03 eta 4:13:17 +epoch [16/50] batch [370/500] time 0.865 (0.887) data 0.000 (0.002) loss 0.8345 (1.1438) acc 81.2500 (71.6639) lr 1.6374e-03 eta 4:13:09 +epoch [16/50] batch [375/500] time 0.863 (0.887) data 0.000 (0.002) loss 0.7251 (1.1437) acc 84.3750 (71.6500) lr 1.6374e-03 eta 4:13:01 +epoch [16/50] batch [380/500] time 0.874 (0.886) data 0.000 (0.002) loss 1.6738 (1.1459) acc 65.6250 (71.6612) lr 1.6374e-03 eta 4:12:53 +epoch [16/50] batch [385/500] time 0.858 (0.886) data 0.000 (0.002) loss 1.2520 (1.1448) acc 71.8750 (71.6558) lr 1.6374e-03 eta 4:12:44 +epoch [16/50] batch [390/500] time 0.892 (0.886) data 0.000 (0.002) loss 0.9443 (1.1468) acc 84.3750 (71.6186) lr 1.6374e-03 eta 4:12:41 +epoch [16/50] batch [395/500] time 0.867 (0.886) data 0.000 (0.002) loss 1.2451 (1.1493) acc 62.5000 (71.5823) lr 1.6374e-03 eta 4:12:36 +epoch [16/50] batch [400/500] time 0.902 (0.886) data 0.000 (0.002) loss 1.7295 (1.1511) acc 59.3750 (71.5703) lr 1.6374e-03 eta 4:12:33 +epoch [16/50] batch [405/500] time 0.906 (0.886) data 0.000 (0.002) loss 1.5527 (1.1518) acc 65.6250 (71.5895) lr 1.6374e-03 eta 4:12:30 +epoch [16/50] batch [410/500] time 0.904 (0.886) data 0.000 (0.002) loss 1.2109 (1.1503) acc 68.7500 (71.5854) lr 1.6374e-03 eta 4:12:26 +epoch [16/50] batch [415/500] time 0.882 (0.886) data 0.000 (0.002) loss 0.9702 (1.1500) acc 78.1250 (71.5663) lr 1.6374e-03 eta 4:12:21 +epoch [16/50] batch [420/500] time 0.872 (0.887) data 0.000 (0.002) loss 0.6714 (1.1503) acc 81.2500 (71.5551) lr 1.6374e-03 eta 4:12:21 +epoch [16/50] batch [425/500] time 0.900 (0.887) data 0.000 (0.002) loss 1.3232 (1.1530) acc 65.6250 (71.5000) lr 1.6374e-03 eta 4:12:17 +epoch [16/50] batch [430/500] time 0.920 (0.887) data 0.000 (0.002) loss 0.6035 (1.1516) acc 87.5000 (71.5625) lr 1.6374e-03 eta 4:12:15 +epoch [16/50] batch [435/500] time 0.899 (0.887) data 0.000 (0.002) loss 0.8779 (1.1531) acc 81.2500 (71.5589) lr 1.6374e-03 eta 4:12:11 +epoch [16/50] batch [440/500] time 0.897 (0.887) data 0.000 (0.002) loss 1.1436 (1.1522) acc 68.7500 (71.5767) lr 1.6374e-03 eta 4:12:09 +epoch [16/50] batch [445/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.1855 (1.1513) acc 75.0000 (71.5941) lr 1.6374e-03 eta 4:12:05 +epoch [16/50] batch [450/500] time 0.875 (0.887) data 0.000 (0.002) loss 1.2236 (1.1534) acc 71.8750 (71.5903) lr 1.6374e-03 eta 4:12:02 +epoch [16/50] batch [455/500] time 0.910 (0.887) data 0.000 (0.002) loss 1.4189 (1.1521) acc 62.5000 (71.5659) lr 1.6374e-03 eta 4:11:57 +epoch [16/50] batch [460/500] time 0.869 (0.887) data 0.000 (0.002) loss 1.6006 (1.1495) acc 62.5000 (71.6372) lr 1.6374e-03 eta 4:11:52 +epoch [16/50] batch [465/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.7139 (1.1486) acc 65.6250 (71.6734) lr 1.6374e-03 eta 4:11:53 +epoch [16/50] batch [470/500] time 0.864 (0.887) data 0.000 (0.002) loss 1.5215 (1.1477) acc 75.0000 (71.7354) lr 1.6374e-03 eta 4:11:47 +epoch [16/50] batch [475/500] time 0.866 (0.887) data 0.000 (0.002) loss 0.7036 (1.1464) acc 81.2500 (71.8092) lr 1.6374e-03 eta 4:11:41 +epoch [16/50] batch [480/500] time 0.861 (0.887) data 0.000 (0.002) loss 0.8784 (1.1454) acc 71.8750 (71.7773) lr 1.6374e-03 eta 4:11:37 +epoch [16/50] batch [485/500] time 0.902 (0.887) data 0.000 (0.002) loss 1.4609 (1.1462) acc 78.1250 (71.7590) lr 1.6374e-03 eta 4:11:35 +epoch [16/50] batch [490/500] time 0.886 (0.887) data 0.000 (0.002) loss 1.3301 (1.1463) acc 56.2500 (71.7156) lr 1.6374e-03 eta 4:11:30 +epoch [16/50] batch [495/500] time 0.871 (0.887) data 0.000 (0.002) loss 0.9854 (1.1472) acc 71.8750 (71.7298) lr 1.6374e-03 eta 4:11:25 +epoch [16/50] batch [500/500] time 0.895 (0.887) data 0.000 (0.002) loss 0.7080 (1.1454) acc 81.2500 (71.7500) lr 1.5878e-03 eta 4:11:21 +epoch [17/50] batch [5/500] time 0.891 (1.059) data 0.000 (0.140) loss 1.1191 (1.0931) acc 68.7500 (71.2500) lr 1.5878e-03 eta 4:59:57 +epoch [17/50] batch [10/500] time 0.878 (0.966) data 0.000 (0.070) loss 1.2061 (1.0497) acc 65.6250 (73.1250) lr 1.5878e-03 eta 4:33:28 +epoch [17/50] batch [15/500] time 0.885 (0.935) data 0.000 (0.047) loss 1.0654 (1.0718) acc 62.5000 (71.8750) lr 1.5878e-03 eta 4:24:47 +epoch [17/50] batch [20/500] time 0.895 (0.921) data 0.000 (0.035) loss 1.0537 (1.0362) acc 81.2500 (72.6562) lr 1.5878e-03 eta 4:20:32 +epoch [17/50] batch [25/500] time 0.863 (0.913) data 0.000 (0.028) loss 1.5918 (1.0682) acc 62.5000 (72.0000) lr 1.5878e-03 eta 4:18:19 +epoch [17/50] batch [30/500] time 0.865 (0.908) data 0.000 (0.023) loss 0.9448 (1.0379) acc 81.2500 (73.2292) lr 1.5878e-03 eta 4:16:40 +epoch [17/50] batch [35/500] time 0.901 (0.905) data 0.000 (0.020) loss 1.9092 (1.0948) acc 59.3750 (72.2321) lr 1.5878e-03 eta 4:16:01 +epoch [17/50] batch [40/500] time 0.917 (0.904) data 0.000 (0.018) loss 1.0449 (1.0947) acc 68.7500 (72.0312) lr 1.5878e-03 eta 4:15:39 +epoch [17/50] batch [45/500] time 0.906 (0.901) data 0.000 (0.016) loss 1.0820 (1.0981) acc 71.8750 (71.5972) lr 1.5878e-03 eta 4:14:42 +epoch [17/50] batch [50/500] time 0.893 (0.900) data 0.000 (0.014) loss 1.4229 (1.1067) acc 68.7500 (71.3750) lr 1.5878e-03 eta 4:14:07 +epoch [17/50] batch [55/500] time 0.865 (0.897) data 0.000 (0.013) loss 1.1455 (1.1042) acc 68.7500 (71.5909) lr 1.5878e-03 eta 4:13:26 +epoch [17/50] batch [60/500] time 0.873 (0.897) data 0.000 (0.012) loss 0.5815 (1.0962) acc 84.3750 (72.0833) lr 1.5878e-03 eta 4:13:07 +epoch [17/50] batch [65/500] time 0.864 (0.896) data 0.000 (0.011) loss 1.1484 (1.0901) acc 71.8750 (72.1154) lr 1.5878e-03 eta 4:13:00 +epoch [17/50] batch [70/500] time 0.892 (0.897) data 0.000 (0.010) loss 0.8604 (1.0870) acc 87.5000 (72.4554) lr 1.5878e-03 eta 4:13:03 +epoch [17/50] batch [75/500] time 0.857 (0.895) data 0.000 (0.010) loss 0.8687 (1.0920) acc 81.2500 (72.5417) lr 1.5878e-03 eta 4:12:33 +epoch [17/50] batch [80/500] time 0.872 (0.894) data 0.000 (0.009) loss 0.7822 (1.0812) acc 78.1250 (72.8125) lr 1.5878e-03 eta 4:12:11 +epoch [17/50] batch [85/500] time 0.847 (0.893) data 0.000 (0.008) loss 0.8267 (1.0895) acc 87.5000 (72.9044) lr 1.5878e-03 eta 4:11:45 +epoch [17/50] batch [90/500] time 0.863 (0.892) data 0.000 (0.008) loss 1.1973 (1.0939) acc 71.8750 (73.0556) lr 1.5878e-03 eta 4:11:18 +epoch [17/50] batch [95/500] time 0.922 (0.891) data 0.000 (0.008) loss 0.8867 (1.1010) acc 81.2500 (72.9605) lr 1.5878e-03 eta 4:11:09 +epoch [17/50] batch [100/500] time 0.885 (0.892) data 0.000 (0.007) loss 1.1777 (1.1032) acc 75.0000 (73.0000) lr 1.5878e-03 eta 4:11:07 +epoch [17/50] batch [105/500] time 0.887 (0.892) data 0.000 (0.007) loss 0.9878 (1.1003) acc 71.8750 (73.1250) lr 1.5878e-03 eta 4:11:17 +epoch [17/50] batch [110/500] time 0.894 (0.893) data 0.000 (0.007) loss 1.2627 (1.0980) acc 68.7500 (73.0682) lr 1.5878e-03 eta 4:11:23 +epoch [17/50] batch [115/500] time 0.859 (0.892) data 0.000 (0.006) loss 1.5889 (1.0996) acc 68.7500 (73.0707) lr 1.5878e-03 eta 4:11:04 +epoch [17/50] batch [120/500] time 0.879 (0.892) data 0.000 (0.006) loss 0.7212 (1.0985) acc 78.1250 (73.1250) lr 1.5878e-03 eta 4:10:54 +epoch [17/50] batch [125/500] time 0.895 (0.891) data 0.000 (0.006) loss 0.9199 (1.0937) acc 78.1250 (73.2000) lr 1.5878e-03 eta 4:10:37 +epoch [17/50] batch [130/500] time 0.877 (0.891) data 0.000 (0.006) loss 0.8862 (1.0999) acc 78.1250 (73.1490) lr 1.5878e-03 eta 4:10:27 +epoch [17/50] batch [135/500] time 0.875 (0.890) data 0.000 (0.005) loss 1.8262 (1.1011) acc 62.5000 (73.1944) lr 1.5878e-03 eta 4:10:11 +epoch [17/50] batch [140/500] time 0.866 (0.890) data 0.000 (0.005) loss 0.4644 (1.1005) acc 84.3750 (72.9464) lr 1.5878e-03 eta 4:09:57 +epoch [17/50] batch [145/500] time 0.976 (0.890) data 0.000 (0.005) loss 0.8296 (1.1023) acc 65.6250 (72.8233) lr 1.5878e-03 eta 4:09:59 +epoch [17/50] batch [150/500] time 0.862 (0.890) data 0.000 (0.005) loss 1.5176 (1.1077) acc 65.6250 (72.7083) lr 1.5878e-03 eta 4:09:54 +epoch [17/50] batch [155/500] time 0.893 (0.890) data 0.000 (0.005) loss 0.9849 (1.0995) acc 78.1250 (72.9435) lr 1.5878e-03 eta 4:09:44 +epoch [17/50] batch [160/500] time 0.910 (0.889) data 0.000 (0.005) loss 1.3594 (1.1041) acc 78.1250 (73.0664) lr 1.5878e-03 eta 4:09:33 +epoch [17/50] batch [165/500] time 0.885 (0.889) data 0.000 (0.004) loss 1.1475 (1.1072) acc 68.7500 (73.1061) lr 1.5878e-03 eta 4:09:31 +epoch [17/50] batch [170/500] time 0.876 (0.889) data 0.000 (0.004) loss 0.7402 (1.1075) acc 71.8750 (72.9963) lr 1.5878e-03 eta 4:09:29 +epoch [17/50] batch [175/500] time 0.888 (0.890) data 0.000 (0.004) loss 0.7412 (1.1110) acc 81.2500 (72.9286) lr 1.5878e-03 eta 4:09:30 +epoch [17/50] batch [180/500] time 0.908 (0.890) data 0.000 (0.004) loss 1.0166 (1.1122) acc 71.8750 (72.8819) lr 1.5878e-03 eta 4:09:23 +epoch [17/50] batch [185/500] time 0.882 (0.890) data 0.000 (0.004) loss 1.1904 (1.1113) acc 75.0000 (72.8885) lr 1.5878e-03 eta 4:09:22 +epoch [17/50] batch [190/500] time 0.885 (0.890) data 0.000 (0.004) loss 1.4561 (1.1150) acc 71.8750 (72.8125) lr 1.5878e-03 eta 4:09:14 +epoch [17/50] batch [195/500] time 0.868 (0.889) data 0.000 (0.004) loss 1.7656 (1.1202) acc 53.1250 (72.6763) lr 1.5878e-03 eta 4:09:04 +epoch [17/50] batch [200/500] time 0.881 (0.889) data 0.000 (0.004) loss 1.0430 (1.1238) acc 62.5000 (72.5000) lr 1.5878e-03 eta 4:08:53 +epoch [17/50] batch [205/500] time 0.883 (0.889) data 0.000 (0.004) loss 0.5176 (1.1274) acc 84.3750 (72.4390) lr 1.5878e-03 eta 4:08:45 +epoch [17/50] batch [210/500] time 0.905 (0.889) data 0.000 (0.004) loss 1.3350 (1.1231) acc 53.1250 (72.3810) lr 1.5878e-03 eta 4:08:45 +epoch [17/50] batch [215/500] time 0.863 (0.889) data 0.000 (0.003) loss 1.1670 (1.1270) acc 78.1250 (72.4128) lr 1.5878e-03 eta 4:08:35 +epoch [17/50] batch [220/500] time 0.872 (0.888) data 0.000 (0.003) loss 1.6338 (1.1309) acc 68.7500 (72.4006) lr 1.5878e-03 eta 4:08:27 +epoch [17/50] batch [225/500] time 0.907 (0.888) data 0.000 (0.003) loss 0.9521 (1.1305) acc 65.6250 (72.3750) lr 1.5878e-03 eta 4:08:23 +epoch [17/50] batch [230/500] time 0.859 (0.888) data 0.000 (0.003) loss 1.0215 (1.1323) acc 68.7500 (72.2962) lr 1.5878e-03 eta 4:08:11 +epoch [17/50] batch [235/500] time 0.905 (0.888) data 0.000 (0.003) loss 1.3252 (1.1301) acc 62.5000 (72.2872) lr 1.5878e-03 eta 4:08:05 +epoch [17/50] batch [240/500] time 0.873 (0.888) data 0.000 (0.003) loss 1.0469 (1.1292) acc 78.1250 (72.3047) lr 1.5878e-03 eta 4:07:55 +epoch [17/50] batch [245/500] time 0.881 (0.888) data 0.001 (0.003) loss 1.1797 (1.1311) acc 71.8750 (72.3342) lr 1.5878e-03 eta 4:07:58 +epoch [17/50] batch [250/500] time 0.875 (0.888) data 0.000 (0.003) loss 1.1035 (1.1303) acc 65.6250 (72.3000) lr 1.5878e-03 eta 4:07:48 +epoch [17/50] batch [255/500] time 0.865 (0.887) data 0.000 (0.003) loss 1.1387 (1.1274) acc 71.8750 (72.3162) lr 1.5878e-03 eta 4:07:36 +epoch [17/50] batch [260/500] time 0.908 (0.887) data 0.000 (0.003) loss 1.5020 (1.1274) acc 71.8750 (72.3798) lr 1.5878e-03 eta 4:07:30 +epoch [17/50] batch [265/500] time 0.859 (0.887) data 0.000 (0.003) loss 1.1689 (1.1277) acc 75.0000 (72.3113) lr 1.5878e-03 eta 4:07:24 +epoch [17/50] batch [270/500] time 0.875 (0.887) data 0.000 (0.003) loss 1.4590 (1.1286) acc 71.8750 (72.3727) lr 1.5878e-03 eta 4:07:17 +epoch [17/50] batch [275/500] time 0.869 (0.887) data 0.000 (0.003) loss 0.9751 (1.1317) acc 71.8750 (72.2614) lr 1.5878e-03 eta 4:07:17 +epoch [17/50] batch [280/500] time 0.891 (0.887) data 0.000 (0.003) loss 1.1885 (1.1328) acc 75.0000 (72.2656) lr 1.5878e-03 eta 4:07:13 +epoch [17/50] batch [285/500] time 0.895 (0.887) data 0.000 (0.003) loss 0.9609 (1.1306) acc 68.7500 (72.3026) lr 1.5878e-03 eta 4:07:05 +epoch [17/50] batch [290/500] time 0.883 (0.887) data 0.000 (0.003) loss 1.0771 (1.1295) acc 75.0000 (72.3060) lr 1.5878e-03 eta 4:07:00 +epoch [17/50] batch [295/500] time 0.888 (0.887) data 0.000 (0.003) loss 1.1221 (1.1302) acc 65.6250 (72.2140) lr 1.5878e-03 eta 4:06:51 +epoch [17/50] batch [300/500] time 0.855 (0.887) data 0.000 (0.003) loss 1.2891 (1.1267) acc 59.3750 (72.2812) lr 1.5878e-03 eta 4:06:44 +epoch [17/50] batch [305/500] time 0.872 (0.886) data 0.000 (0.003) loss 0.9136 (1.1289) acc 68.7500 (72.1311) lr 1.5878e-03 eta 4:06:34 +epoch [17/50] batch [310/500] time 0.886 (0.886) data 0.000 (0.002) loss 0.8047 (1.1302) acc 81.2500 (72.1573) lr 1.5878e-03 eta 4:06:32 +epoch [17/50] batch [315/500] time 0.868 (0.886) data 0.000 (0.002) loss 0.7710 (1.1272) acc 78.1250 (72.2024) lr 1.5878e-03 eta 4:06:23 +epoch [17/50] batch [320/500] time 0.881 (0.886) data 0.000 (0.002) loss 0.8813 (1.1273) acc 81.2500 (72.1777) lr 1.5878e-03 eta 4:06:24 +epoch [17/50] batch [325/500] time 0.893 (0.886) data 0.000 (0.002) loss 1.1387 (1.1258) acc 75.0000 (72.2596) lr 1.5878e-03 eta 4:06:17 +epoch [17/50] batch [330/500] time 0.871 (0.886) data 0.000 (0.002) loss 0.9512 (1.1234) acc 87.5000 (72.3485) lr 1.5878e-03 eta 4:06:13 +epoch [17/50] batch [335/500] time 0.856 (0.886) data 0.000 (0.002) loss 1.2051 (1.1258) acc 75.0000 (72.3601) lr 1.5878e-03 eta 4:06:08 +epoch [17/50] batch [340/500] time 0.880 (0.886) data 0.000 (0.002) loss 1.2441 (1.1251) acc 62.5000 (72.3805) lr 1.5878e-03 eta 4:06:00 +epoch [17/50] batch [345/500] time 0.880 (0.886) data 0.000 (0.002) loss 1.0254 (1.1266) acc 75.0000 (72.3551) lr 1.5878e-03 eta 4:05:54 +epoch [17/50] batch [350/500] time 0.882 (0.886) data 0.000 (0.002) loss 1.2529 (1.1263) acc 65.6250 (72.3750) lr 1.5878e-03 eta 4:05:48 +epoch [17/50] batch [355/500] time 0.900 (0.886) data 0.000 (0.002) loss 1.2861 (1.1246) acc 62.5000 (72.3944) lr 1.5878e-03 eta 4:05:43 +epoch [17/50] batch [360/500] time 0.902 (0.886) data 0.000 (0.002) loss 1.2422 (1.1244) acc 65.6250 (72.3785) lr 1.5878e-03 eta 4:05:39 +epoch [17/50] batch [365/500] time 0.899 (0.886) data 0.000 (0.002) loss 1.1660 (1.1270) acc 71.8750 (72.3373) lr 1.5878e-03 eta 4:05:35 +epoch [17/50] batch [370/500] time 0.896 (0.886) data 0.000 (0.002) loss 0.7510 (1.1272) acc 78.1250 (72.3142) lr 1.5878e-03 eta 4:05:31 +epoch [17/50] batch [375/500] time 0.917 (0.886) data 0.000 (0.002) loss 1.6543 (1.1292) acc 65.6250 (72.3000) lr 1.5878e-03 eta 4:05:30 +epoch [17/50] batch [380/500] time 0.901 (0.886) data 0.000 (0.002) loss 1.8057 (1.1319) acc 59.3750 (72.2368) lr 1.5878e-03 eta 4:05:26 +epoch [17/50] batch [385/500] time 0.863 (0.886) data 0.000 (0.002) loss 1.2188 (1.1320) acc 65.6250 (72.1916) lr 1.5878e-03 eta 4:05:19 +epoch [17/50] batch [390/500] time 0.879 (0.886) data 0.000 (0.002) loss 1.2988 (1.1312) acc 68.7500 (72.2276) lr 1.5878e-03 eta 4:05:17 +epoch [17/50] batch [395/500] time 0.874 (0.886) data 0.000 (0.002) loss 1.2041 (1.1315) acc 68.7500 (72.2152) lr 1.5878e-03 eta 4:05:09 +epoch [17/50] batch [400/500] time 0.901 (0.886) data 0.000 (0.002) loss 0.9468 (1.1303) acc 75.0000 (72.2422) lr 1.5878e-03 eta 4:05:04 +epoch [17/50] batch [405/500] time 0.879 (0.886) data 0.000 (0.002) loss 1.6523 (1.1312) acc 56.2500 (72.2377) lr 1.5878e-03 eta 4:04:59 +epoch [17/50] batch [410/500] time 0.861 (0.886) data 0.000 (0.002) loss 0.9390 (1.1311) acc 68.7500 (72.2027) lr 1.5878e-03 eta 4:04:56 +epoch [17/50] batch [415/500] time 0.876 (0.886) data 0.000 (0.002) loss 1.1934 (1.1317) acc 62.5000 (72.1762) lr 1.5878e-03 eta 4:04:53 +epoch [17/50] batch [420/500] time 0.899 (0.886) data 0.000 (0.002) loss 2.1270 (1.1349) acc 46.8750 (72.0610) lr 1.5878e-03 eta 4:04:47 +epoch [17/50] batch [425/500] time 0.868 (0.886) data 0.000 (0.002) loss 1.5850 (1.1356) acc 59.3750 (72.0515) lr 1.5878e-03 eta 4:04:42 +epoch [17/50] batch [430/500] time 0.896 (0.886) data 0.000 (0.002) loss 1.1123 (1.1345) acc 68.7500 (71.9985) lr 1.5878e-03 eta 4:04:39 +epoch [17/50] batch [435/500] time 0.878 (0.886) data 0.000 (0.002) loss 1.1367 (1.1357) acc 71.8750 (71.9971) lr 1.5878e-03 eta 4:04:38 +epoch [17/50] batch [440/500] time 0.872 (0.886) data 0.000 (0.002) loss 1.1123 (1.1372) acc 68.7500 (71.9815) lr 1.5878e-03 eta 4:04:31 +epoch [17/50] batch [445/500] time 0.882 (0.886) data 0.000 (0.002) loss 1.0557 (1.1356) acc 65.6250 (72.0225) lr 1.5878e-03 eta 4:04:27 +epoch [17/50] batch [450/500] time 0.880 (0.886) data 0.000 (0.002) loss 1.4424 (1.1346) acc 62.5000 (72.0764) lr 1.5878e-03 eta 4:04:21 +epoch [17/50] batch [455/500] time 0.897 (0.886) data 0.000 (0.002) loss 1.4453 (1.1346) acc 65.6250 (72.0467) lr 1.5878e-03 eta 4:04:16 +epoch [17/50] batch [460/500] time 0.861 (0.886) data 0.000 (0.002) loss 1.3115 (1.1348) acc 71.8750 (72.0312) lr 1.5878e-03 eta 4:04:11 +epoch [17/50] batch [465/500] time 0.881 (0.886) data 0.000 (0.002) loss 1.0557 (1.1326) acc 68.7500 (72.0901) lr 1.5878e-03 eta 4:04:06 +epoch [17/50] batch [470/500] time 0.886 (0.886) data 0.000 (0.002) loss 1.3379 (1.1349) acc 68.7500 (72.0346) lr 1.5878e-03 eta 4:04:00 +epoch [17/50] batch [475/500] time 0.865 (0.886) data 0.000 (0.002) loss 1.1152 (1.1333) acc 75.0000 (72.0921) lr 1.5878e-03 eta 4:03:55 +epoch [17/50] batch [480/500] time 0.911 (0.886) data 0.000 (0.002) loss 1.5771 (1.1341) acc 75.0000 (72.1159) lr 1.5878e-03 eta 4:03:52 +epoch [17/50] batch [485/500] time 0.924 (0.886) data 0.001 (0.002) loss 1.5645 (1.1355) acc 62.5000 (72.0941) lr 1.5878e-03 eta 4:03:48 +epoch [17/50] batch [490/500] time 0.868 (0.886) data 0.000 (0.002) loss 1.6348 (1.1356) acc 62.5000 (72.1365) lr 1.5878e-03 eta 4:03:44 +epoch [17/50] batch [495/500] time 0.903 (0.886) data 0.000 (0.002) loss 1.0283 (1.1340) acc 75.0000 (72.1528) lr 1.5878e-03 eta 4:03:40 +epoch [17/50] batch [500/500] time 0.891 (0.886) data 0.000 (0.002) loss 0.5850 (1.1384) acc 87.5000 (72.1188) lr 1.5358e-03 eta 4:03:35 +epoch [18/50] batch [5/500] time 0.893 (1.031) data 0.000 (0.149) loss 1.0967 (1.0416) acc 75.0000 (75.0000) lr 1.5358e-03 eta 4:43:20 +epoch [18/50] batch [10/500] time 0.874 (0.959) data 0.000 (0.075) loss 0.6133 (1.0439) acc 84.3750 (74.0625) lr 1.5358e-03 eta 4:23:35 +epoch [18/50] batch [15/500] time 0.864 (0.929) data 0.000 (0.050) loss 0.8716 (1.0582) acc 78.1250 (74.3750) lr 1.5358e-03 eta 4:15:14 +epoch [18/50] batch [20/500] time 0.900 (0.916) data 0.000 (0.037) loss 0.8877 (0.9809) acc 75.0000 (75.6250) lr 1.5358e-03 eta 4:11:32 +epoch [18/50] batch [25/500] time 0.884 (0.915) data 0.000 (0.030) loss 0.9741 (1.0430) acc 75.0000 (74.6250) lr 1.5358e-03 eta 4:11:11 +epoch [18/50] batch [30/500] time 0.860 (0.908) data 0.000 (0.025) loss 0.8159 (1.0272) acc 81.2500 (74.8958) lr 1.5358e-03 eta 4:09:14 +epoch [18/50] batch [35/500] time 0.880 (0.904) data 0.000 (0.022) loss 1.6055 (1.0764) acc 50.0000 (73.3929) lr 1.5358e-03 eta 4:08:10 +epoch [18/50] batch [40/500] time 0.859 (0.900) data 0.000 (0.019) loss 1.4170 (1.1210) acc 68.7500 (72.3438) lr 1.5358e-03 eta 4:06:57 +epoch [18/50] batch [45/500] time 0.886 (0.897) data 0.000 (0.017) loss 1.0107 (1.0942) acc 75.0000 (72.9167) lr 1.5358e-03 eta 4:05:54 +epoch [18/50] batch [50/500] time 0.871 (0.895) data 0.000 (0.015) loss 0.8486 (1.1008) acc 81.2500 (72.7500) lr 1.5358e-03 eta 4:05:24 +epoch [18/50] batch [55/500] time 0.888 (0.895) data 0.000 (0.014) loss 1.0117 (1.1041) acc 78.1250 (72.6136) lr 1.5358e-03 eta 4:05:15 +epoch [18/50] batch [60/500] time 0.865 (0.893) data 0.000 (0.013) loss 0.8867 (1.0995) acc 65.6250 (72.3958) lr 1.5358e-03 eta 4:04:46 +epoch [18/50] batch [65/500] time 0.899 (0.893) data 0.000 (0.012) loss 1.1973 (1.1041) acc 75.0000 (72.2596) lr 1.5358e-03 eta 4:04:31 +epoch [18/50] batch [70/500] time 0.911 (0.892) data 0.000 (0.011) loss 1.2266 (1.1130) acc 71.8750 (72.2321) lr 1.5358e-03 eta 4:04:21 +epoch [18/50] batch [75/500] time 0.871 (0.892) data 0.000 (0.010) loss 0.8291 (1.1295) acc 75.0000 (72.2500) lr 1.5358e-03 eta 4:04:05 +epoch [18/50] batch [80/500] time 0.917 (0.893) data 0.000 (0.010) loss 1.3018 (1.1354) acc 59.3750 (71.9531) lr 1.5358e-03 eta 4:04:18 +epoch [18/50] batch [85/500] time 0.859 (0.892) data 0.000 (0.009) loss 0.8784 (1.1342) acc 68.7500 (71.7279) lr 1.5358e-03 eta 4:03:54 +epoch [18/50] batch [90/500] time 0.884 (0.891) data 0.000 (0.009) loss 0.7964 (1.1270) acc 75.0000 (71.6319) lr 1.5358e-03 eta 4:03:44 +epoch [18/50] batch [95/500] time 0.859 (0.891) data 0.000 (0.008) loss 1.9219 (1.1178) acc 50.0000 (71.7763) lr 1.5358e-03 eta 4:03:41 +epoch [18/50] batch [100/500] time 0.868 (0.890) data 0.000 (0.008) loss 1.5605 (1.1251) acc 68.7500 (71.5938) lr 1.5358e-03 eta 4:03:20 +epoch [18/50] batch [105/500] time 0.856 (0.889) data 0.000 (0.007) loss 1.5508 (1.1299) acc 71.8750 (71.4881) lr 1.5358e-03 eta 4:02:57 +epoch [18/50] batch [110/500] time 0.894 (0.889) data 0.000 (0.007) loss 0.6157 (1.1297) acc 84.3750 (71.5057) lr 1.5358e-03 eta 4:02:51 +epoch [18/50] batch [115/500] time 0.914 (0.889) data 0.000 (0.007) loss 0.5679 (1.1170) acc 84.3750 (71.9293) lr 1.5358e-03 eta 4:02:48 +epoch [18/50] batch [120/500] time 0.885 (0.889) data 0.000 (0.006) loss 1.1016 (1.1142) acc 75.0000 (71.8490) lr 1.5358e-03 eta 4:02:42 +epoch [18/50] batch [125/500] time 0.873 (0.889) data 0.000 (0.006) loss 1.0420 (1.1150) acc 65.6250 (71.6500) lr 1.5358e-03 eta 4:02:43 +epoch [18/50] batch [130/500] time 0.881 (0.890) data 0.000 (0.006) loss 0.9600 (1.1110) acc 71.8750 (71.5385) lr 1.5358e-03 eta 4:02:47 +epoch [18/50] batch [135/500] time 0.881 (0.890) data 0.000 (0.006) loss 1.0518 (1.1045) acc 65.6250 (71.6204) lr 1.5358e-03 eta 4:02:40 +epoch [18/50] batch [140/500] time 0.881 (0.889) data 0.000 (0.006) loss 1.5361 (1.1088) acc 65.6250 (71.5625) lr 1.5358e-03 eta 4:02:26 +epoch [18/50] batch [145/500] time 0.918 (0.889) data 0.000 (0.005) loss 1.2148 (1.1063) acc 71.8750 (71.7457) lr 1.5358e-03 eta 4:02:23 +epoch [18/50] batch [150/500] time 0.883 (0.889) data 0.000 (0.005) loss 1.7461 (1.1099) acc 71.8750 (71.6875) lr 1.5358e-03 eta 4:02:18 +epoch [18/50] batch [155/500] time 0.852 (0.889) data 0.000 (0.005) loss 1.0938 (1.1134) acc 75.0000 (71.6734) lr 1.5358e-03 eta 4:02:11 +epoch [18/50] batch [160/500] time 0.872 (0.889) data 0.000 (0.005) loss 0.6870 (1.1102) acc 90.6250 (71.8164) lr 1.5358e-03 eta 4:01:58 +epoch [18/50] batch [165/500] time 0.871 (0.888) data 0.000 (0.005) loss 1.4238 (1.1060) acc 71.8750 (71.9508) lr 1.5358e-03 eta 4:01:45 +epoch [18/50] batch [170/500] time 0.877 (0.888) data 0.000 (0.005) loss 1.2246 (1.1042) acc 68.7500 (71.8934) lr 1.5358e-03 eta 4:01:45 +epoch [18/50] batch [175/500] time 0.877 (0.888) data 0.000 (0.005) loss 1.3135 (1.1011) acc 71.8750 (72.0179) lr 1.5358e-03 eta 4:01:34 +epoch [18/50] batch [180/500] time 0.899 (0.888) data 0.000 (0.004) loss 0.8608 (1.0953) acc 78.1250 (72.1701) lr 1.5358e-03 eta 4:01:29 +epoch [18/50] batch [185/500] time 0.901 (0.888) data 0.000 (0.004) loss 0.8062 (1.0867) acc 78.1250 (72.4324) lr 1.5358e-03 eta 4:01:30 +epoch [18/50] batch [190/500] time 0.887 (0.888) data 0.000 (0.004) loss 1.4189 (1.0858) acc 68.7500 (72.3684) lr 1.5358e-03 eta 4:01:27 +epoch [18/50] batch [195/500] time 0.887 (0.888) data 0.000 (0.004) loss 0.7402 (1.0817) acc 87.5000 (72.4679) lr 1.5358e-03 eta 4:01:23 +epoch [18/50] batch [200/500] time 0.858 (0.888) data 0.000 (0.004) loss 1.4658 (1.0856) acc 59.3750 (72.4375) lr 1.5358e-03 eta 4:01:13 +epoch [18/50] batch [205/500] time 0.896 (0.888) data 0.000 (0.004) loss 0.7163 (1.0878) acc 84.3750 (72.5000) lr 1.5358e-03 eta 4:01:11 +epoch [18/50] batch [210/500] time 0.899 (0.888) data 0.000 (0.004) loss 1.6826 (1.0901) acc 65.6250 (72.5149) lr 1.5358e-03 eta 4:01:06 +epoch [18/50] batch [215/500] time 0.874 (0.888) data 0.000 (0.004) loss 0.6543 (1.0906) acc 84.3750 (72.5145) lr 1.5358e-03 eta 4:00:59 +epoch [18/50] batch [220/500] time 0.895 (0.888) data 0.000 (0.004) loss 1.8916 (1.0968) acc 65.6250 (72.4290) lr 1.5358e-03 eta 4:00:52 +epoch [18/50] batch [225/500] time 0.872 (0.888) data 0.000 (0.004) loss 0.8701 (1.0919) acc 71.8750 (72.5000) lr 1.5358e-03 eta 4:00:50 +epoch [18/50] batch [230/500] time 0.862 (0.888) data 0.000 (0.003) loss 0.8271 (1.0893) acc 75.0000 (72.4864) lr 1.5358e-03 eta 4:00:40 +epoch [18/50] batch [235/500] time 0.863 (0.887) data 0.000 (0.003) loss 1.0400 (1.0926) acc 68.7500 (72.3936) lr 1.5358e-03 eta 4:00:30 +epoch [18/50] batch [240/500] time 0.873 (0.887) data 0.000 (0.003) loss 1.3613 (1.0995) acc 71.8750 (72.2786) lr 1.5358e-03 eta 4:00:29 +epoch [18/50] batch [245/500] time 0.869 (0.887) data 0.000 (0.003) loss 0.7104 (1.0988) acc 78.1250 (72.3087) lr 1.5358e-03 eta 4:00:19 +epoch [18/50] batch [250/500] time 0.878 (0.887) data 0.000 (0.003) loss 0.7412 (1.0981) acc 81.2500 (72.3625) lr 1.5358e-03 eta 4:00:10 +epoch [18/50] batch [255/500] time 0.896 (0.887) data 0.000 (0.003) loss 0.5933 (1.0977) acc 84.3750 (72.3529) lr 1.5358e-03 eta 4:00:08 +epoch [18/50] batch [260/500] time 0.891 (0.887) data 0.000 (0.003) loss 1.5273 (1.0982) acc 65.6250 (72.3558) lr 1.5358e-03 eta 4:00:04 +epoch [18/50] batch [265/500] time 0.998 (0.887) data 0.000 (0.003) loss 1.1572 (1.0969) acc 75.0000 (72.3703) lr 1.5358e-03 eta 4:00:04 +epoch [18/50] batch [270/500] time 0.873 (0.887) data 0.000 (0.003) loss 0.5244 (1.0966) acc 84.3750 (72.3611) lr 1.5358e-03 eta 3:59:57 +epoch [18/50] batch [275/500] time 0.889 (0.887) data 0.000 (0.003) loss 0.7700 (1.0967) acc 78.1250 (72.4205) lr 1.5358e-03 eta 3:59:51 +epoch [18/50] batch [280/500] time 0.905 (0.887) data 0.000 (0.003) loss 1.3145 (1.0970) acc 71.8750 (72.4554) lr 1.5358e-03 eta 3:59:45 +epoch [18/50] batch [285/500] time 0.880 (0.887) data 0.000 (0.003) loss 1.1631 (1.1001) acc 68.7500 (72.3465) lr 1.5358e-03 eta 3:59:37 +epoch [18/50] batch [290/500] time 0.893 (0.887) data 0.000 (0.003) loss 1.4727 (1.1010) acc 62.5000 (72.3815) lr 1.5358e-03 eta 3:59:33 +epoch [18/50] batch [295/500] time 0.913 (0.887) data 0.000 (0.003) loss 1.4834 (1.1043) acc 65.6250 (72.3093) lr 1.5358e-03 eta 3:59:30 +epoch [18/50] batch [300/500] time 0.858 (0.886) data 0.000 (0.003) loss 1.2061 (1.1041) acc 71.8750 (72.2708) lr 1.5358e-03 eta 3:59:20 +epoch [18/50] batch [305/500] time 0.862 (0.886) data 0.000 (0.003) loss 0.4436 (1.1018) acc 93.7500 (72.3668) lr 1.5358e-03 eta 3:59:12 +epoch [18/50] batch [310/500] time 0.875 (0.886) data 0.000 (0.003) loss 1.0684 (1.1030) acc 78.1250 (72.3488) lr 1.5358e-03 eta 3:59:07 +epoch [18/50] batch [315/500] time 0.880 (0.886) data 0.000 (0.003) loss 0.6548 (1.1020) acc 81.2500 (72.3810) lr 1.5358e-03 eta 3:59:03 +epoch [18/50] batch [320/500] time 0.859 (0.886) data 0.000 (0.003) loss 1.9053 (1.1066) acc 59.3750 (72.3145) lr 1.5358e-03 eta 3:58:57 +epoch [18/50] batch [325/500] time 0.860 (0.886) data 0.000 (0.003) loss 1.4385 (1.1102) acc 62.5000 (72.2212) lr 1.5358e-03 eta 3:58:53 +epoch [18/50] batch [330/500] time 0.876 (0.886) data 0.000 (0.003) loss 0.7876 (1.1114) acc 75.0000 (72.1591) lr 1.5358e-03 eta 3:58:48 +epoch [18/50] batch [335/500] time 0.851 (0.886) data 0.000 (0.002) loss 1.4814 (1.1115) acc 68.7500 (72.2108) lr 1.5358e-03 eta 3:58:43 +epoch [18/50] batch [340/500] time 0.894 (0.886) data 0.000 (0.002) loss 0.8628 (1.1092) acc 75.0000 (72.2518) lr 1.5358e-03 eta 3:58:37 +epoch [18/50] batch [345/500] time 0.859 (0.886) data 0.000 (0.002) loss 0.9683 (1.1076) acc 75.0000 (72.2645) lr 1.5358e-03 eta 3:58:28 +epoch [18/50] batch [350/500] time 0.873 (0.886) data 0.000 (0.002) loss 1.0742 (1.1084) acc 81.2500 (72.2768) lr 1.5358e-03 eta 3:58:22 +epoch [18/50] batch [355/500] time 0.879 (0.886) data 0.000 (0.002) loss 0.7993 (1.1087) acc 81.2500 (72.2447) lr 1.5358e-03 eta 3:58:20 +epoch [18/50] batch [360/500] time 0.888 (0.886) data 0.000 (0.002) loss 1.1230 (1.1091) acc 71.8750 (72.2222) lr 1.5358e-03 eta 3:58:13 +epoch [18/50] batch [365/500] time 0.870 (0.886) data 0.000 (0.002) loss 1.2324 (1.1122) acc 56.2500 (72.1747) lr 1.5358e-03 eta 3:58:13 +epoch [18/50] batch [370/500] time 0.876 (0.886) data 0.000 (0.002) loss 1.2520 (1.1107) acc 65.6250 (72.2128) lr 1.5358e-03 eta 3:58:06 +epoch [18/50] batch [375/500] time 0.900 (0.886) data 0.000 (0.002) loss 1.0986 (1.1124) acc 78.1250 (72.2000) lr 1.5358e-03 eta 3:58:03 +epoch [18/50] batch [380/500] time 0.857 (0.886) data 0.000 (0.002) loss 0.9390 (1.1129) acc 71.8750 (72.1546) lr 1.5358e-03 eta 3:57:55 +epoch [18/50] batch [385/500] time 0.869 (0.885) data 0.000 (0.002) loss 0.7290 (1.1133) acc 84.3750 (72.1591) lr 1.5358e-03 eta 3:57:49 +epoch [18/50] batch [390/500] time 0.891 (0.885) data 0.000 (0.002) loss 1.3848 (1.1176) acc 65.6250 (72.0833) lr 1.5358e-03 eta 3:57:43 +epoch [18/50] batch [395/500] time 0.898 (0.885) data 0.000 (0.002) loss 0.8921 (1.1149) acc 75.0000 (72.1123) lr 1.5358e-03 eta 3:57:40 +epoch [18/50] batch [400/500] time 0.910 (0.885) data 0.000 (0.002) loss 1.1709 (1.1145) acc 62.5000 (72.0859) lr 1.5358e-03 eta 3:57:34 +epoch [18/50] batch [405/500] time 0.887 (0.885) data 0.000 (0.002) loss 1.0449 (1.1140) acc 78.1250 (72.1065) lr 1.5358e-03 eta 3:57:29 +epoch [18/50] batch [410/500] time 0.884 (0.885) data 0.000 (0.002) loss 1.1367 (1.1169) acc 68.7500 (72.0198) lr 1.5358e-03 eta 3:57:25 +epoch [18/50] batch [415/500] time 0.903 (0.885) data 0.000 (0.002) loss 0.8242 (1.1160) acc 71.8750 (72.0256) lr 1.5358e-03 eta 3:57:21 +epoch [18/50] batch [420/500] time 0.902 (0.885) data 0.000 (0.002) loss 1.1152 (1.1143) acc 71.8750 (72.0610) lr 1.5358e-03 eta 3:57:18 +epoch [18/50] batch [425/500] time 0.882 (0.886) data 0.000 (0.002) loss 1.6953 (1.1160) acc 56.2500 (72.0368) lr 1.5358e-03 eta 3:57:15 +epoch [18/50] batch [430/500] time 0.884 (0.886) data 0.000 (0.002) loss 1.0723 (1.1131) acc 68.7500 (72.1003) lr 1.5358e-03 eta 3:57:13 +epoch [18/50] batch [435/500] time 0.887 (0.886) data 0.000 (0.002) loss 1.0527 (1.1140) acc 68.7500 (72.0690) lr 1.5358e-03 eta 3:57:09 +epoch [18/50] batch [440/500] time 0.883 (0.886) data 0.000 (0.002) loss 1.5469 (1.1153) acc 65.6250 (71.9957) lr 1.5358e-03 eta 3:57:03 +epoch [18/50] batch [445/500] time 0.887 (0.886) data 0.000 (0.002) loss 1.1416 (1.1160) acc 75.0000 (71.9663) lr 1.5358e-03 eta 3:57:01 +epoch [18/50] batch [450/500] time 0.914 (0.886) data 0.000 (0.002) loss 1.1963 (1.1163) acc 62.5000 (71.9097) lr 1.5358e-03 eta 3:56:56 +epoch [18/50] batch [455/500] time 0.897 (0.886) data 0.000 (0.002) loss 0.7432 (1.1157) acc 75.0000 (71.9025) lr 1.5358e-03 eta 3:56:52 +epoch [18/50] batch [460/500] time 0.899 (0.886) data 0.000 (0.002) loss 1.1289 (1.1177) acc 71.8750 (71.9090) lr 1.5358e-03 eta 3:56:50 +epoch [18/50] batch [465/500] time 0.901 (0.886) data 0.000 (0.002) loss 1.1230 (1.1181) acc 68.7500 (71.9086) lr 1.5358e-03 eta 3:56:44 +epoch [18/50] batch [470/500] time 0.870 (0.886) data 0.000 (0.002) loss 0.8096 (1.1165) acc 78.1250 (71.9282) lr 1.5358e-03 eta 3:56:37 +epoch [18/50] batch [475/500] time 0.878 (0.886) data 0.000 (0.002) loss 1.0723 (1.1180) acc 78.1250 (71.9013) lr 1.5358e-03 eta 3:56:33 +epoch [18/50] batch [480/500] time 0.891 (0.886) data 0.000 (0.002) loss 0.9331 (1.1190) acc 75.0000 (71.8620) lr 1.5358e-03 eta 3:56:27 +epoch [18/50] batch [485/500] time 0.877 (0.886) data 0.000 (0.002) loss 2.1504 (1.1218) acc 53.1250 (71.7977) lr 1.5358e-03 eta 3:56:22 +epoch [18/50] batch [490/500] time 0.872 (0.885) data 0.000 (0.002) loss 1.2012 (1.1221) acc 71.8750 (71.8240) lr 1.5358e-03 eta 3:56:15 +epoch [18/50] batch [495/500] time 0.856 (0.885) data 0.000 (0.002) loss 1.5635 (1.1226) acc 62.5000 (71.7992) lr 1.5358e-03 eta 3:56:09 +epoch [18/50] batch [500/500] time 0.879 (0.885) data 0.000 (0.002) loss 1.1396 (1.1238) acc 59.3750 (71.7188) lr 1.4818e-03 eta 3:56:03 +epoch [19/50] batch [5/500] time 0.864 (1.029) data 0.000 (0.139) loss 1.3359 (1.2385) acc 68.7500 (68.7500) lr 1.4818e-03 eta 4:34:12 +epoch [19/50] batch [10/500] time 0.877 (0.957) data 0.000 (0.070) loss 0.7744 (1.1491) acc 78.1250 (70.9375) lr 1.4818e-03 eta 4:14:57 +epoch [19/50] batch [15/500] time 0.887 (0.932) data 0.000 (0.046) loss 1.0303 (1.1187) acc 68.7500 (70.8333) lr 1.4818e-03 eta 4:08:21 +epoch [19/50] batch [20/500] time 0.881 (0.921) data 0.000 (0.035) loss 1.7842 (1.0890) acc 59.3750 (71.8750) lr 1.4818e-03 eta 4:05:18 +epoch [19/50] batch [25/500] time 0.879 (0.912) data 0.000 (0.028) loss 1.0596 (1.0735) acc 75.0000 (72.0000) lr 1.4818e-03 eta 4:02:55 +epoch [19/50] batch [30/500] time 0.871 (0.908) data 0.000 (0.023) loss 1.3984 (1.1484) acc 68.7500 (71.1458) lr 1.4818e-03 eta 4:01:38 +epoch [19/50] batch [35/500] time 0.879 (0.906) data 0.000 (0.020) loss 1.1592 (1.1294) acc 71.8750 (71.4286) lr 1.4818e-03 eta 4:01:04 +epoch [19/50] batch [40/500] time 0.870 (0.904) data 0.000 (0.018) loss 1.1680 (1.1168) acc 68.7500 (71.7188) lr 1.4818e-03 eta 4:00:20 +epoch [19/50] batch [45/500] time 0.900 (0.903) data 0.000 (0.016) loss 0.9536 (1.1354) acc 75.0000 (71.6667) lr 1.4818e-03 eta 3:59:59 +epoch [19/50] batch [50/500] time 0.892 (0.901) data 0.000 (0.014) loss 1.7520 (1.1593) acc 59.3750 (71.0000) lr 1.4818e-03 eta 3:59:24 +epoch [19/50] batch [55/500] time 0.897 (0.900) data 0.000 (0.013) loss 1.4355 (1.1462) acc 68.7500 (71.3636) lr 1.4818e-03 eta 3:59:17 +epoch [19/50] batch [60/500] time 0.870 (0.899) data 0.000 (0.012) loss 1.2344 (1.1213) acc 68.7500 (71.9792) lr 1.4818e-03 eta 3:58:46 +epoch [19/50] batch [65/500] time 0.871 (0.899) data 0.001 (0.011) loss 1.3066 (1.1235) acc 68.7500 (72.0192) lr 1.4818e-03 eta 3:58:53 +epoch [19/50] batch [70/500] time 0.900 (0.898) data 0.000 (0.010) loss 1.1689 (1.1253) acc 62.5000 (72.0982) lr 1.4818e-03 eta 3:58:26 +epoch [19/50] batch [75/500] time 0.908 (0.898) data 0.000 (0.010) loss 1.8008 (1.1468) acc 50.0000 (71.4583) lr 1.4818e-03 eta 3:58:22 +epoch [19/50] batch [80/500] time 0.869 (0.897) data 0.000 (0.009) loss 0.9263 (1.1516) acc 78.1250 (71.4453) lr 1.4818e-03 eta 3:58:07 +epoch [19/50] batch [85/500] time 0.924 (0.896) data 0.000 (0.008) loss 1.1904 (1.1440) acc 78.1250 (71.7647) lr 1.4818e-03 eta 3:57:45 +epoch [19/50] batch [90/500] time 0.862 (0.896) data 0.000 (0.008) loss 0.4741 (1.1319) acc 81.2500 (71.9444) lr 1.4818e-03 eta 3:57:27 +epoch [19/50] batch [95/500] time 0.905 (0.895) data 0.000 (0.008) loss 0.4631 (1.1256) acc 87.5000 (72.0395) lr 1.4818e-03 eta 3:57:14 +epoch [19/50] batch [100/500] time 0.892 (0.895) data 0.000 (0.007) loss 0.6294 (1.1165) acc 71.8750 (71.9375) lr 1.4818e-03 eta 3:57:07 +epoch [19/50] batch [105/500] time 0.866 (0.894) data 0.000 (0.007) loss 0.8716 (1.1115) acc 75.0000 (72.2619) lr 1.4818e-03 eta 3:56:48 +epoch [19/50] batch [110/500] time 0.876 (0.894) data 0.000 (0.007) loss 0.6826 (1.1110) acc 78.1250 (72.2443) lr 1.4818e-03 eta 3:56:41 +epoch [19/50] batch [115/500] time 0.851 (0.893) data 0.000 (0.006) loss 1.4971 (1.1126) acc 65.6250 (72.3098) lr 1.4818e-03 eta 3:56:22 +epoch [19/50] batch [120/500] time 0.853 (0.892) data 0.000 (0.006) loss 1.4814 (1.1062) acc 68.7500 (72.4219) lr 1.4818e-03 eta 3:56:02 +epoch [19/50] batch [125/500] time 0.875 (0.891) data 0.000 (0.006) loss 1.0400 (1.1008) acc 71.8750 (72.5500) lr 1.4818e-03 eta 3:55:41 +epoch [19/50] batch [130/500] time 0.911 (0.891) data 0.000 (0.006) loss 1.4971 (1.1034) acc 59.3750 (72.3317) lr 1.4818e-03 eta 3:55:40 +epoch [19/50] batch [135/500] time 0.883 (0.891) data 0.000 (0.005) loss 1.1963 (1.1032) acc 68.7500 (72.2685) lr 1.4818e-03 eta 3:55:33 +epoch [19/50] batch [140/500] time 0.856 (0.891) data 0.000 (0.005) loss 0.8379 (1.0965) acc 78.1250 (72.3214) lr 1.4818e-03 eta 3:55:24 +epoch [19/50] batch [145/500] time 0.918 (0.891) data 0.000 (0.005) loss 1.1865 (1.0947) acc 65.6250 (72.2629) lr 1.4818e-03 eta 3:55:20 +epoch [19/50] batch [150/500] time 0.904 (0.891) data 0.000 (0.005) loss 0.6885 (1.0923) acc 78.1250 (72.3750) lr 1.4818e-03 eta 3:55:16 +epoch [19/50] batch [155/500] time 0.900 (0.891) data 0.000 (0.005) loss 1.2568 (1.0885) acc 78.1250 (72.5806) lr 1.4818e-03 eta 3:55:12 +epoch [19/50] batch [160/500] time 0.884 (0.890) data 0.000 (0.005) loss 0.8262 (1.0836) acc 78.1250 (72.7344) lr 1.4818e-03 eta 3:54:58 +epoch [19/50] batch [165/500] time 0.914 (0.890) data 0.000 (0.004) loss 1.1865 (1.0830) acc 75.0000 (72.7273) lr 1.4818e-03 eta 3:54:51 +epoch [19/50] batch [170/500] time 0.900 (0.890) data 0.000 (0.004) loss 1.2041 (1.0893) acc 68.7500 (72.5551) lr 1.4818e-03 eta 3:54:45 +epoch [19/50] batch [175/500] time 0.888 (0.890) data 0.000 (0.004) loss 1.5684 (1.0916) acc 62.5000 (72.5179) lr 1.4818e-03 eta 3:54:46 +epoch [19/50] batch [180/500] time 0.853 (0.890) data 0.000 (0.004) loss 0.8345 (1.0952) acc 78.1250 (72.4479) lr 1.4818e-03 eta 3:54:36 +epoch [19/50] batch [185/500] time 0.852 (0.889) data 0.000 (0.004) loss 0.9438 (1.0918) acc 75.0000 (72.4493) lr 1.4818e-03 eta 3:54:22 +epoch [19/50] batch [190/500] time 0.868 (0.889) data 0.000 (0.004) loss 0.7666 (1.0897) acc 84.3750 (72.4342) lr 1.4818e-03 eta 3:54:11 +epoch [19/50] batch [195/500] time 0.897 (0.889) data 0.000 (0.004) loss 1.1260 (1.0916) acc 71.8750 (72.4679) lr 1.4818e-03 eta 3:54:06 +epoch [19/50] batch [200/500] time 0.850 (0.888) data 0.000 (0.004) loss 1.5566 (1.0867) acc 65.6250 (72.6250) lr 1.4818e-03 eta 3:53:55 +epoch [19/50] batch [205/500] time 0.910 (0.888) data 0.000 (0.004) loss 0.9438 (1.0835) acc 78.1250 (72.7591) lr 1.4818e-03 eta 3:53:52 +epoch [19/50] batch [210/500] time 0.903 (0.889) data 0.000 (0.004) loss 0.8462 (1.0861) acc 75.0000 (72.6637) lr 1.4818e-03 eta 3:53:53 +epoch [19/50] batch [215/500] time 0.865 (0.889) data 0.000 (0.003) loss 1.3125 (1.0855) acc 68.7500 (72.7326) lr 1.4818e-03 eta 3:53:51 +epoch [19/50] batch [220/500] time 0.882 (0.889) data 0.000 (0.003) loss 1.1729 (1.0866) acc 71.8750 (72.6989) lr 1.4818e-03 eta 3:53:43 +epoch [19/50] batch [225/500] time 0.899 (0.889) data 0.000 (0.003) loss 1.2266 (1.0876) acc 68.7500 (72.7500) lr 1.4818e-03 eta 3:53:38 +epoch [19/50] batch [230/500] time 0.912 (0.889) data 0.000 (0.003) loss 0.9688 (1.0903) acc 75.0000 (72.7174) lr 1.4818e-03 eta 3:53:36 +epoch [19/50] batch [235/500] time 0.888 (0.889) data 0.000 (0.003) loss 0.9517 (1.0928) acc 81.2500 (72.6729) lr 1.4818e-03 eta 3:53:28 +epoch [19/50] batch [240/500] time 0.887 (0.889) data 0.000 (0.003) loss 1.9688 (1.0975) acc 71.8750 (72.6823) lr 1.4818e-03 eta 3:53:24 +epoch [19/50] batch [245/500] time 0.876 (0.889) data 0.000 (0.003) loss 0.8428 (1.0949) acc 78.1250 (72.7296) lr 1.4818e-03 eta 3:53:19 +epoch [19/50] batch [250/500] time 0.980 (0.889) data 0.000 (0.003) loss 1.0566 (1.0939) acc 75.0000 (72.7375) lr 1.4818e-03 eta 3:53:19 +epoch [19/50] batch [255/500] time 0.860 (0.889) data 0.000 (0.003) loss 0.8779 (1.0946) acc 75.0000 (72.7206) lr 1.4818e-03 eta 3:53:10 +epoch [19/50] batch [260/500] time 0.853 (0.888) data 0.000 (0.003) loss 0.8379 (1.0939) acc 81.2500 (72.7163) lr 1.4818e-03 eta 3:53:01 +epoch [19/50] batch [265/500] time 0.864 (0.888) data 0.000 (0.003) loss 0.9370 (1.0945) acc 84.3750 (72.7594) lr 1.4818e-03 eta 3:52:54 +epoch [19/50] batch [270/500] time 0.879 (0.888) data 0.000 (0.003) loss 1.0107 (1.0922) acc 84.3750 (72.7778) lr 1.4818e-03 eta 3:52:45 +epoch [19/50] batch [275/500] time 0.866 (0.888) data 0.000 (0.003) loss 1.9893 (1.0961) acc 59.3750 (72.7159) lr 1.4818e-03 eta 3:52:40 +epoch [19/50] batch [280/500] time 0.888 (0.888) data 0.000 (0.003) loss 1.4209 (1.0988) acc 71.8750 (72.6562) lr 1.4818e-03 eta 3:52:37 +epoch [19/50] batch [285/500] time 0.885 (0.888) data 0.000 (0.003) loss 1.8105 (1.0999) acc 59.3750 (72.7303) lr 1.4818e-03 eta 3:52:30 +epoch [19/50] batch [290/500] time 0.878 (0.887) data 0.000 (0.003) loss 0.4875 (1.1001) acc 90.6250 (72.7694) lr 1.4818e-03 eta 3:52:22 +epoch [19/50] batch [295/500] time 0.891 (0.887) data 0.000 (0.003) loss 0.7231 (1.0982) acc 78.1250 (72.8178) lr 1.4818e-03 eta 3:52:15 +epoch [19/50] batch [300/500] time 0.873 (0.887) data 0.000 (0.003) loss 0.4968 (1.0932) acc 84.3750 (72.8854) lr 1.4818e-03 eta 3:52:10 +epoch [19/50] batch [305/500] time 0.880 (0.887) data 0.000 (0.003) loss 0.5874 (1.0904) acc 84.3750 (72.9201) lr 1.4818e-03 eta 3:52:03 +epoch [19/50] batch [310/500] time 0.901 (0.887) data 0.000 (0.002) loss 1.1689 (1.0903) acc 71.8750 (72.9536) lr 1.4818e-03 eta 3:52:01 +epoch [19/50] batch [315/500] time 0.865 (0.887) data 0.000 (0.002) loss 0.7305 (1.0885) acc 75.0000 (72.9861) lr 1.4818e-03 eta 3:51:52 +epoch [19/50] batch [320/500] time 0.899 (0.887) data 0.000 (0.002) loss 0.8135 (1.0908) acc 84.3750 (72.9297) lr 1.4818e-03 eta 3:51:47 +epoch [19/50] batch [325/500] time 0.882 (0.887) data 0.000 (0.002) loss 1.4912 (1.0916) acc 68.7500 (72.9327) lr 1.4818e-03 eta 3:51:41 +epoch [19/50] batch [330/500] time 0.887 (0.887) data 0.000 (0.002) loss 1.2314 (1.0896) acc 56.2500 (72.9545) lr 1.4818e-03 eta 3:51:34 +epoch [19/50] batch [335/500] time 0.890 (0.887) data 0.000 (0.002) loss 1.4990 (1.0927) acc 62.5000 (72.8731) lr 1.4818e-03 eta 3:51:30 +epoch [19/50] batch [340/500] time 0.886 (0.887) data 0.000 (0.002) loss 1.0771 (1.0907) acc 65.6250 (72.8768) lr 1.4818e-03 eta 3:51:27 +epoch [19/50] batch [345/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.1768 (1.0918) acc 71.8750 (72.8714) lr 1.4818e-03 eta 3:51:22 +epoch [19/50] batch [350/500] time 0.868 (0.887) data 0.000 (0.002) loss 1.6152 (1.0963) acc 68.7500 (72.7857) lr 1.4818e-03 eta 3:51:23 +epoch [19/50] batch [355/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.1514 (1.0944) acc 71.8750 (72.8345) lr 1.4818e-03 eta 3:51:19 +epoch [19/50] batch [360/500] time 0.870 (0.887) data 0.000 (0.002) loss 1.5430 (1.0941) acc 62.5000 (72.8993) lr 1.4818e-03 eta 3:51:10 +epoch [19/50] batch [365/500] time 0.890 (0.887) data 0.000 (0.002) loss 0.8086 (1.0927) acc 81.2500 (72.9110) lr 1.4818e-03 eta 3:51:03 +epoch [19/50] batch [370/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.2607 (1.0942) acc 71.8750 (72.8547) lr 1.4818e-03 eta 3:50:56 +epoch [19/50] batch [375/500] time 0.870 (0.886) data 0.000 (0.002) loss 1.2471 (1.0954) acc 62.5000 (72.8250) lr 1.4818e-03 eta 3:50:47 +epoch [19/50] batch [380/500] time 0.876 (0.886) data 0.000 (0.002) loss 0.9111 (1.0966) acc 78.1250 (72.7632) lr 1.4818e-03 eta 3:50:40 +epoch [19/50] batch [385/500] time 0.872 (0.886) data 0.000 (0.002) loss 2.2520 (1.0979) acc 65.6250 (72.7841) lr 1.4818e-03 eta 3:50:35 +epoch [19/50] batch [390/500] time 0.907 (0.886) data 0.000 (0.002) loss 1.2451 (1.0985) acc 68.7500 (72.7724) lr 1.4818e-03 eta 3:50:30 +epoch [19/50] batch [395/500] time 0.900 (0.886) data 0.000 (0.002) loss 0.7358 (1.0958) acc 75.0000 (72.8323) lr 1.4818e-03 eta 3:50:30 +epoch [19/50] batch [400/500] time 0.870 (0.886) data 0.000 (0.002) loss 0.5093 (1.0940) acc 87.5000 (72.8828) lr 1.4818e-03 eta 3:50:25 +epoch [19/50] batch [405/500] time 0.888 (0.886) data 0.000 (0.002) loss 1.6523 (1.0979) acc 56.2500 (72.8009) lr 1.4818e-03 eta 3:50:19 +epoch [19/50] batch [410/500] time 0.884 (0.886) data 0.000 (0.002) loss 1.2686 (1.0985) acc 68.7500 (72.8201) lr 1.4818e-03 eta 3:50:12 +epoch [19/50] batch [415/500] time 0.863 (0.886) data 0.000 (0.002) loss 0.9512 (1.0991) acc 81.2500 (72.8012) lr 1.4818e-03 eta 3:50:05 +epoch [19/50] batch [420/500] time 0.890 (0.886) data 0.000 (0.002) loss 1.0146 (1.0991) acc 75.0000 (72.8348) lr 1.4818e-03 eta 3:50:00 +epoch [19/50] batch [425/500] time 0.865 (0.886) data 0.000 (0.002) loss 1.0410 (1.0995) acc 71.8750 (72.8015) lr 1.4818e-03 eta 3:49:52 +epoch [19/50] batch [430/500] time 0.861 (0.885) data 0.000 (0.002) loss 1.6484 (1.1022) acc 62.5000 (72.7762) lr 1.4818e-03 eta 3:49:44 +epoch [19/50] batch [435/500] time 0.896 (0.885) data 0.000 (0.002) loss 0.6416 (1.1040) acc 75.0000 (72.7011) lr 1.4818e-03 eta 3:49:40 +epoch [19/50] batch [440/500] time 0.885 (0.885) data 0.000 (0.002) loss 1.5283 (1.1042) acc 62.5000 (72.6989) lr 1.4818e-03 eta 3:49:35 +epoch [19/50] batch [445/500] time 0.869 (0.885) data 0.000 (0.002) loss 0.9268 (1.1065) acc 71.8750 (72.6545) lr 1.4818e-03 eta 3:49:30 +epoch [19/50] batch [450/500] time 0.904 (0.885) data 0.000 (0.002) loss 1.3643 (1.1063) acc 65.6250 (72.6528) lr 1.4818e-03 eta 3:49:26 +epoch [19/50] batch [455/500] time 0.849 (0.885) data 0.000 (0.002) loss 1.3730 (1.1069) acc 65.6250 (72.5962) lr 1.4818e-03 eta 3:49:21 +epoch [19/50] batch [460/500] time 0.927 (0.885) data 0.000 (0.002) loss 0.9688 (1.1082) acc 78.1250 (72.5543) lr 1.4818e-03 eta 3:49:17 +epoch [19/50] batch [465/500] time 0.870 (0.885) data 0.000 (0.002) loss 1.0693 (1.1090) acc 75.0000 (72.5470) lr 1.4818e-03 eta 3:49:11 +epoch [19/50] batch [470/500] time 0.908 (0.885) data 0.000 (0.002) loss 1.6484 (1.1129) acc 56.2500 (72.5000) lr 1.4818e-03 eta 3:49:06 +epoch [19/50] batch [475/500] time 0.884 (0.885) data 0.000 (0.002) loss 0.8701 (1.1148) acc 81.2500 (72.4276) lr 1.4818e-03 eta 3:49:01 +epoch [19/50] batch [480/500] time 0.861 (0.885) data 0.000 (0.002) loss 1.1484 (1.1171) acc 75.0000 (72.3763) lr 1.4818e-03 eta 3:48:55 +epoch [19/50] batch [485/500] time 0.868 (0.885) data 0.001 (0.002) loss 1.3760 (1.1179) acc 65.6250 (72.3518) lr 1.4818e-03 eta 3:48:49 +epoch [19/50] batch [490/500] time 0.903 (0.885) data 0.000 (0.002) loss 1.1738 (1.1195) acc 62.5000 (72.3214) lr 1.4818e-03 eta 3:48:46 +epoch [19/50] batch [495/500] time 0.896 (0.885) data 0.000 (0.002) loss 1.3320 (1.1184) acc 65.6250 (72.3548) lr 1.4818e-03 eta 3:48:48 +epoch [19/50] batch [500/500] time 0.872 (0.885) data 0.000 (0.002) loss 1.0146 (1.1195) acc 78.1250 (72.3438) lr 1.4258e-03 eta 3:48:43 +epoch [20/50] batch [5/500] time 0.886 (1.029) data 0.000 (0.140) loss 0.8521 (1.0187) acc 78.1250 (72.5000) lr 1.4258e-03 eta 4:25:46 +epoch [20/50] batch [10/500] time 0.885 (0.952) data 0.000 (0.070) loss 0.9219 (0.9885) acc 75.0000 (73.4375) lr 1.4258e-03 eta 4:05:42 +epoch [20/50] batch [15/500] time 0.898 (0.929) data 0.000 (0.047) loss 1.2803 (0.9687) acc 65.6250 (74.7917) lr 1.4258e-03 eta 3:59:40 +epoch [20/50] batch [20/500] time 0.920 (0.925) data 0.000 (0.035) loss 1.3047 (1.0483) acc 71.8750 (73.9062) lr 1.4258e-03 eta 3:58:31 +epoch [20/50] batch [25/500] time 0.897 (0.917) data 0.000 (0.028) loss 0.6611 (1.0423) acc 84.3750 (73.7500) lr 1.4258e-03 eta 3:56:30 +epoch [20/50] batch [30/500] time 0.881 (0.910) data 0.000 (0.023) loss 1.3223 (1.0791) acc 59.3750 (73.0208) lr 1.4258e-03 eta 3:54:37 +epoch [20/50] batch [35/500] time 0.903 (0.906) data 0.000 (0.020) loss 1.3750 (1.0879) acc 65.6250 (72.6786) lr 1.4258e-03 eta 3:53:29 +epoch [20/50] batch [40/500] time 0.887 (0.902) data 0.000 (0.018) loss 0.7446 (1.0677) acc 71.8750 (73.1250) lr 1.4258e-03 eta 3:52:32 +epoch [20/50] batch [45/500] time 0.857 (0.899) data 0.000 (0.016) loss 0.9287 (1.0905) acc 81.2500 (72.9861) lr 1.4258e-03 eta 3:51:31 +epoch [20/50] batch [50/500] time 0.878 (0.899) data 0.000 (0.014) loss 1.5703 (1.0830) acc 65.6250 (73.5625) lr 1.4258e-03 eta 3:51:28 +epoch [20/50] batch [55/500] time 0.882 (0.897) data 0.000 (0.013) loss 1.1924 (1.0836) acc 75.0000 (73.8636) lr 1.4258e-03 eta 3:50:57 +epoch [20/50] batch [60/500] time 0.867 (0.896) data 0.000 (0.012) loss 1.2139 (1.0773) acc 65.6250 (73.8542) lr 1.4258e-03 eta 3:50:36 +epoch [20/50] batch [65/500] time 0.865 (0.895) data 0.000 (0.011) loss 0.7402 (1.0621) acc 78.1250 (74.0385) lr 1.4258e-03 eta 3:50:06 +epoch [20/50] batch [70/500] time 0.884 (0.893) data 0.000 (0.010) loss 0.5854 (1.0634) acc 84.3750 (73.9286) lr 1.4258e-03 eta 3:49:44 +epoch [20/50] batch [75/500] time 0.892 (0.892) data 0.000 (0.010) loss 0.7061 (1.0618) acc 75.0000 (73.8750) lr 1.4258e-03 eta 3:49:26 +epoch [20/50] batch [80/500] time 0.864 (0.892) data 0.000 (0.009) loss 1.4473 (1.0675) acc 65.6250 (73.5938) lr 1.4258e-03 eta 3:49:14 +epoch [20/50] batch [85/500] time 0.897 (0.892) data 0.000 (0.008) loss 0.9170 (1.0770) acc 71.8750 (73.6029) lr 1.4258e-03 eta 3:49:12 +epoch [20/50] batch [90/500] time 0.874 (0.892) data 0.000 (0.008) loss 1.7285 (1.0806) acc 65.6250 (73.6806) lr 1.4258e-03 eta 3:49:04 +epoch [20/50] batch [95/500] time 0.863 (0.893) data 0.000 (0.008) loss 1.3213 (1.0829) acc 56.2500 (73.5855) lr 1.4258e-03 eta 3:49:13 +epoch [20/50] batch [100/500] time 0.878 (0.892) data 0.000 (0.007) loss 0.9844 (1.0860) acc 75.0000 (73.2812) lr 1.4258e-03 eta 3:49:00 +epoch [20/50] batch [105/500] time 0.891 (0.892) data 0.000 (0.007) loss 1.0273 (1.0922) acc 78.1250 (73.0357) lr 1.4258e-03 eta 3:48:55 +epoch [20/50] batch [110/500] time 0.871 (0.891) data 0.000 (0.007) loss 1.3867 (1.1016) acc 62.5000 (72.6705) lr 1.4258e-03 eta 3:48:39 +epoch [20/50] batch [115/500] time 0.864 (0.891) data 0.000 (0.006) loss 0.8940 (1.1023) acc 81.2500 (72.6630) lr 1.4258e-03 eta 3:48:30 +epoch [20/50] batch [120/500] time 0.879 (0.890) data 0.000 (0.006) loss 1.6660 (1.1081) acc 65.6250 (72.6562) lr 1.4258e-03 eta 3:48:14 +epoch [20/50] batch [125/500] time 0.921 (0.891) data 0.000 (0.006) loss 1.2783 (1.1073) acc 62.5000 (72.6500) lr 1.4258e-03 eta 3:48:17 +epoch [20/50] batch [130/500] time 0.849 (0.891) data 0.000 (0.006) loss 0.8472 (1.1032) acc 71.8750 (72.7404) lr 1.4258e-03 eta 3:48:07 +epoch [20/50] batch [135/500] time 0.915 (0.891) data 0.000 (0.005) loss 1.2275 (1.1086) acc 68.7500 (72.5926) lr 1.4258e-03 eta 3:48:03 +epoch [20/50] batch [140/500] time 0.886 (0.890) data 0.000 (0.005) loss 1.2637 (1.1154) acc 71.8750 (72.4554) lr 1.4258e-03 eta 3:47:54 +epoch [20/50] batch [145/500] time 0.908 (0.890) data 0.000 (0.005) loss 0.8691 (1.1156) acc 78.1250 (72.4569) lr 1.4258e-03 eta 3:47:48 +epoch [20/50] batch [150/500] time 0.899 (0.890) data 0.000 (0.005) loss 1.0918 (1.1238) acc 75.0000 (72.3542) lr 1.4258e-03 eta 3:47:40 +epoch [20/50] batch [155/500] time 0.886 (0.890) data 0.000 (0.005) loss 1.0352 (1.1196) acc 68.7500 (72.3992) lr 1.4258e-03 eta 3:47:33 +epoch [20/50] batch [160/500] time 0.859 (0.890) data 0.000 (0.005) loss 0.8267 (1.1175) acc 71.8750 (72.4805) lr 1.4258e-03 eta 3:47:26 +epoch [20/50] batch [165/500] time 0.909 (0.889) data 0.000 (0.004) loss 1.0215 (1.1188) acc 75.0000 (72.4432) lr 1.4258e-03 eta 3:47:16 +epoch [20/50] batch [170/500] time 0.885 (0.888) data 0.000 (0.004) loss 1.4531 (1.1201) acc 68.7500 (72.4449) lr 1.4258e-03 eta 3:46:57 +epoch [20/50] batch [175/500] time 0.870 (0.888) data 0.000 (0.004) loss 1.2793 (1.1204) acc 75.0000 (72.4464) lr 1.4258e-03 eta 3:46:46 +epoch [20/50] batch [180/500] time 0.910 (0.888) data 0.000 (0.004) loss 1.3301 (1.1216) acc 71.8750 (72.5174) lr 1.4258e-03 eta 3:46:47 +epoch [20/50] batch [185/500] time 0.891 (0.888) data 0.001 (0.004) loss 1.2090 (1.1237) acc 71.8750 (72.5000) lr 1.4258e-03 eta 3:46:39 +epoch [20/50] batch [190/500] time 0.856 (0.888) data 0.000 (0.004) loss 0.7656 (1.1224) acc 81.2500 (72.4836) lr 1.4258e-03 eta 3:46:35 +epoch [20/50] batch [195/500] time 0.882 (0.888) data 0.000 (0.004) loss 0.8701 (1.1193) acc 78.1250 (72.6282) lr 1.4258e-03 eta 3:46:38 +epoch [20/50] batch [200/500] time 0.904 (0.888) data 0.000 (0.004) loss 1.2598 (1.1262) acc 59.3750 (72.4531) lr 1.4258e-03 eta 3:46:33 +epoch [20/50] batch [205/500] time 0.884 (0.888) data 0.000 (0.004) loss 0.8291 (1.1249) acc 81.2500 (72.4848) lr 1.4258e-03 eta 3:46:29 +epoch [20/50] batch [210/500] time 0.865 (0.888) data 0.000 (0.004) loss 0.8765 (1.1206) acc 78.1250 (72.4702) lr 1.4258e-03 eta 3:46:19 +epoch [20/50] batch [215/500] time 0.884 (0.888) data 0.000 (0.003) loss 0.8374 (1.1180) acc 78.1250 (72.4855) lr 1.4258e-03 eta 3:46:07 +epoch [20/50] batch [220/500] time 0.876 (0.888) data 0.000 (0.003) loss 1.2988 (1.1163) acc 68.7500 (72.5284) lr 1.4258e-03 eta 3:46:01 +epoch [20/50] batch [225/500] time 0.908 (0.888) data 0.000 (0.003) loss 0.9897 (1.1117) acc 71.8750 (72.5833) lr 1.4258e-03 eta 3:45:58 +epoch [20/50] batch [230/500] time 0.886 (0.887) data 0.000 (0.003) loss 1.1660 (1.1125) acc 65.6250 (72.5136) lr 1.4258e-03 eta 3:45:49 +epoch [20/50] batch [235/500] time 1.001 (0.888) data 0.000 (0.003) loss 0.8091 (1.1086) acc 78.1250 (72.5000) lr 1.4258e-03 eta 3:45:55 +epoch [20/50] batch [240/500] time 0.853 (0.888) data 0.000 (0.003) loss 1.0488 (1.1095) acc 75.0000 (72.5130) lr 1.4258e-03 eta 3:45:48 +epoch [20/50] batch [245/500] time 0.881 (0.888) data 0.000 (0.003) loss 1.6807 (1.1115) acc 68.7500 (72.4617) lr 1.4258e-03 eta 3:45:40 +epoch [20/50] batch [250/500] time 0.885 (0.887) data 0.000 (0.003) loss 0.9355 (1.1116) acc 81.2500 (72.3875) lr 1.4258e-03 eta 3:45:33 +epoch [20/50] batch [255/500] time 0.896 (0.887) data 0.000 (0.003) loss 0.7993 (1.1079) acc 78.1250 (72.4265) lr 1.4258e-03 eta 3:45:27 +epoch [20/50] batch [260/500] time 0.872 (0.887) data 0.000 (0.003) loss 0.7266 (1.1055) acc 75.0000 (72.4279) lr 1.4258e-03 eta 3:45:18 +epoch [20/50] batch [265/500] time 0.864 (0.887) data 0.000 (0.003) loss 1.2432 (1.1092) acc 59.3750 (72.2995) lr 1.4258e-03 eta 3:45:10 +epoch [20/50] batch [270/500] time 0.886 (0.887) data 0.000 (0.003) loss 0.5869 (1.1084) acc 81.2500 (72.3611) lr 1.4258e-03 eta 3:45:04 +epoch [20/50] batch [275/500] time 0.903 (0.887) data 0.001 (0.003) loss 1.4912 (1.1076) acc 75.0000 (72.4091) lr 1.4258e-03 eta 3:45:00 +epoch [20/50] batch [280/500] time 0.902 (0.887) data 0.000 (0.003) loss 0.7158 (1.1046) acc 75.0000 (72.4442) lr 1.4258e-03 eta 3:44:56 +epoch [20/50] batch [285/500] time 0.870 (0.887) data 0.000 (0.003) loss 0.8135 (1.1052) acc 75.0000 (72.3355) lr 1.4258e-03 eta 3:44:53 +epoch [20/50] batch [290/500] time 0.886 (0.887) data 0.000 (0.003) loss 1.1201 (1.1086) acc 81.2500 (72.2306) lr 1.4258e-03 eta 3:44:48 +epoch [20/50] batch [295/500] time 0.919 (0.887) data 0.000 (0.003) loss 0.6289 (1.1060) acc 84.3750 (72.2881) lr 1.4258e-03 eta 3:44:49 +epoch [20/50] batch [300/500] time 0.881 (0.887) data 0.000 (0.003) loss 1.1133 (1.1028) acc 59.3750 (72.3438) lr 1.4258e-03 eta 3:44:43 +epoch [20/50] batch [305/500] time 0.885 (0.887) data 0.000 (0.003) loss 0.4670 (1.1042) acc 87.5000 (72.3566) lr 1.4258e-03 eta 3:44:36 +epoch [20/50] batch [310/500] time 0.875 (0.887) data 0.000 (0.002) loss 1.1816 (1.1073) acc 78.1250 (72.3286) lr 1.4258e-03 eta 3:44:30 +epoch [20/50] batch [315/500] time 0.884 (0.887) data 0.000 (0.002) loss 0.9155 (1.1039) acc 81.2500 (72.3909) lr 1.4258e-03 eta 3:44:26 +epoch [20/50] batch [320/500] time 0.866 (0.887) data 0.000 (0.002) loss 1.6748 (1.1085) acc 59.3750 (72.3047) lr 1.4258e-03 eta 3:44:20 +epoch [20/50] batch [325/500] time 0.911 (0.887) data 0.000 (0.002) loss 1.1562 (1.1066) acc 65.6250 (72.3077) lr 1.4258e-03 eta 3:44:16 +epoch [20/50] batch [330/500] time 0.868 (0.887) data 0.000 (0.002) loss 2.1348 (1.1056) acc 53.1250 (72.3485) lr 1.4258e-03 eta 3:44:11 +epoch [20/50] batch [335/500] time 0.868 (0.887) data 0.000 (0.002) loss 1.3398 (1.1086) acc 56.2500 (72.2575) lr 1.4258e-03 eta 3:44:09 +epoch [20/50] batch [340/500] time 0.898 (0.887) data 0.000 (0.002) loss 0.6992 (1.1102) acc 75.0000 (72.2702) lr 1.4258e-03 eta 3:44:03 +epoch [20/50] batch [345/500] time 0.900 (0.887) data 0.000 (0.002) loss 1.2500 (1.1100) acc 68.7500 (72.2554) lr 1.4258e-03 eta 3:43:57 +epoch [20/50] batch [350/500] time 0.853 (0.887) data 0.000 (0.002) loss 0.5347 (1.1077) acc 84.3750 (72.2946) lr 1.4258e-03 eta 3:43:51 +epoch [20/50] batch [355/500] time 0.899 (0.887) data 0.000 (0.002) loss 1.1377 (1.1130) acc 78.1250 (72.2359) lr 1.4258e-03 eta 3:43:48 +epoch [20/50] batch [360/500] time 0.896 (0.887) data 0.000 (0.002) loss 0.7480 (1.1105) acc 81.2500 (72.3090) lr 1.4258e-03 eta 3:43:43 +epoch [20/50] batch [365/500] time 0.874 (0.887) data 0.000 (0.002) loss 0.9863 (1.1115) acc 71.8750 (72.2860) lr 1.4258e-03 eta 3:43:37 +epoch [20/50] batch [370/500] time 0.886 (0.886) data 0.000 (0.002) loss 1.6279 (1.1136) acc 65.6250 (72.2804) lr 1.4258e-03 eta 3:43:32 +epoch [20/50] batch [375/500] time 0.903 (0.887) data 0.000 (0.002) loss 1.5537 (1.1134) acc 68.7500 (72.3167) lr 1.4258e-03 eta 3:43:29 +epoch [20/50] batch [380/500] time 0.878 (0.887) data 0.000 (0.002) loss 0.8989 (1.1141) acc 75.0000 (72.3191) lr 1.4258e-03 eta 3:43:28 +epoch [20/50] batch [385/500] time 0.896 (0.887) data 0.001 (0.002) loss 1.1143 (1.1130) acc 71.8750 (72.3214) lr 1.4258e-03 eta 3:43:25 +epoch [20/50] batch [390/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.3779 (1.1151) acc 62.5000 (72.2676) lr 1.4258e-03 eta 3:43:20 +epoch [20/50] batch [395/500] time 0.881 (0.887) data 0.000 (0.002) loss 0.9556 (1.1144) acc 71.8750 (72.2943) lr 1.4258e-03 eta 3:43:17 +epoch [20/50] batch [400/500] time 0.894 (0.887) data 0.000 (0.002) loss 1.2715 (1.1158) acc 78.1250 (72.2734) lr 1.4258e-03 eta 3:43:12 +epoch [20/50] batch [405/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.0107 (1.1122) acc 71.8750 (72.3611) lr 1.4258e-03 eta 3:43:09 +epoch [20/50] batch [410/500] time 0.887 (0.887) data 0.000 (0.002) loss 1.3623 (1.1116) acc 71.8750 (72.3933) lr 1.4258e-03 eta 3:43:05 +epoch [20/50] batch [415/500] time 0.884 (0.887) data 0.000 (0.002) loss 0.8755 (1.1099) acc 68.7500 (72.3946) lr 1.4258e-03 eta 3:43:02 +epoch [20/50] batch [420/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.7002 (1.1164) acc 59.3750 (72.2768) lr 1.4258e-03 eta 3:43:01 +epoch [20/50] batch [425/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.1836 (1.1185) acc 71.8750 (72.2279) lr 1.4258e-03 eta 3:42:58 +epoch [20/50] batch [430/500] time 0.902 (0.888) data 0.000 (0.002) loss 1.3584 (1.1208) acc 65.6250 (72.1657) lr 1.4258e-03 eta 3:42:54 +epoch [20/50] batch [435/500] time 0.858 (0.888) data 0.000 (0.002) loss 1.4453 (1.1213) acc 75.0000 (72.1624) lr 1.4258e-03 eta 3:42:50 +epoch [20/50] batch [440/500] time 0.897 (0.887) data 0.000 (0.002) loss 1.2617 (1.1227) acc 71.8750 (72.1804) lr 1.4258e-03 eta 3:42:45 +epoch [20/50] batch [445/500] time 0.900 (0.888) data 0.000 (0.002) loss 0.7754 (1.1206) acc 78.1250 (72.2191) lr 1.4258e-03 eta 3:42:41 +epoch [20/50] batch [450/500] time 0.892 (0.888) data 0.000 (0.002) loss 0.8828 (1.1177) acc 75.0000 (72.2639) lr 1.4258e-03 eta 3:42:39 +epoch [20/50] batch [455/500] time 0.908 (0.888) data 0.000 (0.002) loss 1.7539 (1.1191) acc 65.6250 (72.2459) lr 1.4258e-03 eta 3:42:36 +epoch [20/50] batch [460/500] time 0.850 (0.888) data 0.000 (0.002) loss 0.5688 (1.1178) acc 81.2500 (72.2554) lr 1.4258e-03 eta 3:42:30 +epoch [20/50] batch [465/500] time 0.891 (0.888) data 0.000 (0.002) loss 1.0830 (1.1154) acc 71.8750 (72.3118) lr 1.4258e-03 eta 3:42:23 +epoch [20/50] batch [470/500] time 0.874 (0.888) data 0.000 (0.002) loss 1.2178 (1.1155) acc 65.6250 (72.2939) lr 1.4258e-03 eta 3:42:21 +epoch [20/50] batch [475/500] time 0.918 (0.888) data 0.000 (0.002) loss 1.2627 (1.1144) acc 78.1250 (72.3092) lr 1.4258e-03 eta 3:42:17 +epoch [20/50] batch [480/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.5332 (1.1152) acc 65.6250 (72.2721) lr 1.4258e-03 eta 3:42:14 +epoch [20/50] batch [485/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.4463 (1.1165) acc 68.7500 (72.2616) lr 1.4258e-03 eta 3:42:07 +epoch [20/50] batch [490/500] time 0.848 (0.887) data 0.000 (0.002) loss 1.1201 (1.1172) acc 68.7500 (72.2768) lr 1.4258e-03 eta 3:42:00 +epoch [20/50] batch [495/500] time 0.871 (0.887) data 0.000 (0.002) loss 2.0332 (1.1198) acc 59.3750 (72.2538) lr 1.4258e-03 eta 3:41:53 +epoch [20/50] batch [500/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.4053 (1.1207) acc 53.1250 (72.2125) lr 1.3681e-03 eta 3:41:48 +epoch [21/50] batch [5/500] time 0.849 (1.024) data 0.000 (0.135) loss 1.0078 (1.1911) acc 68.7500 (71.8750) lr 1.3681e-03 eta 4:15:49 +epoch [21/50] batch [10/500] time 0.903 (0.952) data 0.000 (0.068) loss 0.7881 (0.9700) acc 81.2500 (75.6250) lr 1.3681e-03 eta 3:57:44 +epoch [21/50] batch [15/500] time 0.882 (0.926) data 0.000 (0.045) loss 0.9780 (0.9481) acc 75.0000 (76.0417) lr 1.3681e-03 eta 3:51:23 +epoch [21/50] batch [20/500] time 0.863 (0.917) data 0.000 (0.034) loss 1.2705 (0.9182) acc 71.8750 (76.5625) lr 1.3681e-03 eta 3:48:54 +epoch [21/50] batch [25/500] time 0.879 (0.909) data 0.000 (0.027) loss 1.1045 (0.9630) acc 78.1250 (76.1250) lr 1.3681e-03 eta 3:46:50 +epoch [21/50] batch [30/500] time 0.882 (0.908) data 0.000 (0.023) loss 1.1455 (1.0114) acc 71.8750 (74.6875) lr 1.3681e-03 eta 3:46:33 +epoch [21/50] batch [35/500] time 0.878 (0.904) data 0.000 (0.020) loss 1.5430 (1.0439) acc 68.7500 (74.3750) lr 1.3681e-03 eta 3:45:28 +epoch [21/50] batch [40/500] time 0.877 (0.901) data 0.000 (0.017) loss 0.8433 (1.0599) acc 78.1250 (73.6719) lr 1.3681e-03 eta 3:44:36 +epoch [21/50] batch [45/500] time 0.866 (0.899) data 0.000 (0.015) loss 1.3076 (1.0725) acc 65.6250 (73.4722) lr 1.3681e-03 eta 3:44:11 +epoch [21/50] batch [50/500] time 0.861 (0.897) data 0.000 (0.014) loss 1.1680 (1.0891) acc 68.7500 (73.3125) lr 1.3681e-03 eta 3:43:34 +epoch [21/50] batch [55/500] time 0.876 (0.896) data 0.000 (0.013) loss 1.4346 (1.1010) acc 62.5000 (73.1818) lr 1.3681e-03 eta 3:43:08 +epoch [21/50] batch [60/500] time 0.909 (0.896) data 0.000 (0.011) loss 1.4658 (1.1042) acc 65.6250 (73.0729) lr 1.3681e-03 eta 3:42:59 +epoch [21/50] batch [65/500] time 0.853 (0.894) data 0.000 (0.011) loss 0.8105 (1.0859) acc 75.0000 (73.3654) lr 1.3681e-03 eta 3:42:30 +epoch [21/50] batch [70/500] time 0.878 (0.893) data 0.000 (0.010) loss 1.3857 (1.0746) acc 78.1250 (73.7500) lr 1.3681e-03 eta 3:42:08 +epoch [21/50] batch [75/500] time 0.852 (0.893) data 0.000 (0.009) loss 1.6729 (1.0766) acc 68.7500 (73.6667) lr 1.3681e-03 eta 3:42:06 +epoch [21/50] batch [80/500] time 0.881 (0.891) data 0.000 (0.009) loss 1.1973 (1.0763) acc 71.8750 (73.5938) lr 1.3681e-03 eta 3:41:38 +epoch [21/50] batch [85/500] time 0.872 (0.890) data 0.000 (0.008) loss 0.5615 (1.0761) acc 81.2500 (73.4191) lr 1.3681e-03 eta 3:41:17 +epoch [21/50] batch [90/500] time 0.882 (0.889) data 0.000 (0.008) loss 0.4934 (1.0699) acc 75.0000 (73.1597) lr 1.3681e-03 eta 3:41:02 +epoch [21/50] batch [95/500] time 0.873 (0.889) data 0.000 (0.007) loss 1.3320 (1.0792) acc 71.8750 (72.8947) lr 1.3681e-03 eta 3:40:56 +epoch [21/50] batch [100/500] time 0.871 (0.889) data 0.000 (0.007) loss 1.3184 (1.0821) acc 75.0000 (72.9062) lr 1.3681e-03 eta 3:40:45 +epoch [21/50] batch [105/500] time 0.891 (0.889) data 0.000 (0.007) loss 1.3672 (1.0881) acc 68.7500 (72.7083) lr 1.3681e-03 eta 3:40:38 +epoch [21/50] batch [110/500] time 0.885 (0.889) data 0.000 (0.006) loss 1.3291 (1.0825) acc 71.8750 (72.9545) lr 1.3681e-03 eta 3:40:32 +epoch [21/50] batch [115/500] time 0.862 (0.888) data 0.000 (0.006) loss 1.6895 (1.0919) acc 75.0000 (72.9076) lr 1.3681e-03 eta 3:40:23 +epoch [21/50] batch [120/500] time 0.907 (0.888) data 0.000 (0.006) loss 0.7808 (1.0823) acc 78.1250 (73.2031) lr 1.3681e-03 eta 3:40:15 +epoch [21/50] batch [125/500] time 0.899 (0.888) data 0.000 (0.006) loss 1.6191 (1.0959) acc 65.6250 (73.0250) lr 1.3681e-03 eta 3:40:03 +epoch [21/50] batch [130/500] time 0.849 (0.887) data 0.000 (0.005) loss 0.8120 (1.0999) acc 75.0000 (72.9087) lr 1.3681e-03 eta 3:39:56 +epoch [21/50] batch [135/500] time 0.895 (0.888) data 0.000 (0.005) loss 1.2832 (1.0993) acc 78.1250 (72.9630) lr 1.3681e-03 eta 3:39:57 +epoch [21/50] batch [140/500] time 0.859 (0.887) data 0.000 (0.005) loss 0.8203 (1.0956) acc 78.1250 (73.0804) lr 1.3681e-03 eta 3:39:46 +epoch [21/50] batch [145/500] time 0.881 (0.887) data 0.000 (0.005) loss 0.5352 (1.0980) acc 87.5000 (73.0603) lr 1.3681e-03 eta 3:39:32 +epoch [21/50] batch [150/500] time 0.884 (0.887) data 0.000 (0.005) loss 1.0225 (1.1016) acc 65.6250 (72.7708) lr 1.3681e-03 eta 3:39:26 +epoch [21/50] batch [155/500] time 0.915 (0.887) data 0.000 (0.005) loss 1.2891 (1.1040) acc 75.0000 (72.6815) lr 1.3681e-03 eta 3:39:24 +epoch [21/50] batch [160/500] time 0.897 (0.887) data 0.000 (0.004) loss 1.7432 (1.1104) acc 56.2500 (72.4805) lr 1.3681e-03 eta 3:39:21 +epoch [21/50] batch [165/500] time 0.882 (0.887) data 0.000 (0.004) loss 1.7500 (1.1121) acc 68.7500 (72.5189) lr 1.3681e-03 eta 3:39:12 +epoch [21/50] batch [170/500] time 0.862 (0.887) data 0.000 (0.004) loss 0.8252 (1.1109) acc 75.0000 (72.5368) lr 1.3681e-03 eta 3:39:09 +epoch [21/50] batch [175/500] time 0.897 (0.887) data 0.000 (0.004) loss 2.1738 (1.1143) acc 59.3750 (72.5000) lr 1.3681e-03 eta 3:39:14 +epoch [21/50] batch [180/500] time 0.886 (0.887) data 0.000 (0.004) loss 1.0322 (1.1114) acc 68.7500 (72.5174) lr 1.3681e-03 eta 3:39:03 +epoch [21/50] batch [185/500] time 0.889 (0.887) data 0.000 (0.004) loss 1.2217 (1.1074) acc 68.7500 (72.5338) lr 1.3681e-03 eta 3:39:00 +epoch [21/50] batch [190/500] time 0.882 (0.887) data 0.000 (0.004) loss 0.9390 (1.1010) acc 78.1250 (72.6480) lr 1.3681e-03 eta 3:38:56 +epoch [21/50] batch [195/500] time 0.858 (0.887) data 0.000 (0.004) loss 1.6768 (1.1039) acc 59.3750 (72.6442) lr 1.3681e-03 eta 3:38:46 +epoch [21/50] batch [200/500] time 0.891 (0.886) data 0.000 (0.004) loss 0.7139 (1.1008) acc 84.3750 (72.8594) lr 1.3681e-03 eta 3:38:39 +epoch [21/50] batch [205/500] time 0.871 (0.886) data 0.000 (0.004) loss 1.3691 (1.1020) acc 71.8750 (72.8049) lr 1.3681e-03 eta 3:38:35 +epoch [21/50] batch [210/500] time 0.887 (0.886) data 0.001 (0.003) loss 1.0205 (1.0989) acc 75.0000 (72.8423) lr 1.3681e-03 eta 3:38:26 +epoch [21/50] batch [215/500] time 0.880 (0.886) data 0.000 (0.003) loss 0.7407 (1.0983) acc 81.2500 (72.8052) lr 1.3681e-03 eta 3:38:22 +epoch [21/50] batch [220/500] time 0.889 (0.887) data 0.000 (0.003) loss 0.8105 (1.0937) acc 71.8750 (72.8693) lr 1.3681e-03 eta 3:38:24 +epoch [21/50] batch [225/500] time 0.866 (0.886) data 0.000 (0.003) loss 0.7012 (1.0908) acc 87.5000 (73.0139) lr 1.3681e-03 eta 3:38:16 +epoch [21/50] batch [230/500] time 0.849 (0.886) data 0.000 (0.003) loss 1.0723 (1.0914) acc 75.0000 (72.9755) lr 1.3681e-03 eta 3:38:06 +epoch [21/50] batch [235/500] time 0.891 (0.886) data 0.000 (0.003) loss 1.4531 (1.0995) acc 68.7500 (72.8457) lr 1.3681e-03 eta 3:37:56 +epoch [21/50] batch [240/500] time 0.861 (0.885) data 0.000 (0.003) loss 1.1650 (1.0976) acc 71.8750 (72.8125) lr 1.3681e-03 eta 3:37:49 +epoch [21/50] batch [245/500] time 0.908 (0.885) data 0.000 (0.003) loss 1.3486 (1.1013) acc 62.5000 (72.7423) lr 1.3681e-03 eta 3:37:44 +epoch [21/50] batch [250/500] time 0.874 (0.885) data 0.000 (0.003) loss 1.4219 (1.1029) acc 62.5000 (72.7000) lr 1.3681e-03 eta 3:37:37 +epoch [21/50] batch [255/500] time 0.868 (0.885) data 0.000 (0.003) loss 1.1641 (1.1011) acc 68.7500 (72.7083) lr 1.3681e-03 eta 3:37:33 +epoch [21/50] batch [260/500] time 0.897 (0.886) data 0.000 (0.003) loss 1.0479 (1.0978) acc 68.7500 (72.7524) lr 1.3681e-03 eta 3:37:33 +epoch [21/50] batch [265/500] time 0.898 (0.886) data 0.000 (0.003) loss 1.0322 (1.0964) acc 75.0000 (72.8538) lr 1.3681e-03 eta 3:37:31 +epoch [21/50] batch [270/500] time 0.900 (0.886) data 0.000 (0.003) loss 1.3555 (1.0965) acc 75.0000 (72.8935) lr 1.3681e-03 eta 3:37:28 +epoch [21/50] batch [275/500] time 0.883 (0.886) data 0.000 (0.003) loss 1.4033 (1.1017) acc 71.8750 (72.8182) lr 1.3681e-03 eta 3:37:24 +epoch [21/50] batch [280/500] time 0.877 (0.886) data 0.000 (0.003) loss 1.2422 (1.1017) acc 68.7500 (72.8125) lr 1.3681e-03 eta 3:37:16 +epoch [21/50] batch [285/500] time 0.909 (0.886) data 0.000 (0.003) loss 1.1943 (1.1022) acc 81.2500 (72.7851) lr 1.3681e-03 eta 3:37:17 +epoch [21/50] batch [290/500] time 0.865 (0.886) data 0.000 (0.003) loss 1.3066 (1.1032) acc 71.8750 (72.7155) lr 1.3681e-03 eta 3:37:10 +epoch [21/50] batch [295/500] time 0.906 (0.886) data 0.000 (0.003) loss 0.9663 (1.1048) acc 81.2500 (72.7225) lr 1.3681e-03 eta 3:37:11 +epoch [21/50] batch [300/500] time 0.886 (0.886) data 0.000 (0.002) loss 1.2588 (1.1068) acc 71.8750 (72.6979) lr 1.3681e-03 eta 3:37:04 +epoch [21/50] batch [305/500] time 0.883 (0.886) data 0.000 (0.002) loss 1.4863 (1.1079) acc 68.7500 (72.6639) lr 1.3681e-03 eta 3:36:58 +epoch [21/50] batch [310/500] time 0.871 (0.886) data 0.000 (0.002) loss 1.0928 (1.1084) acc 65.6250 (72.6210) lr 1.3681e-03 eta 3:36:54 +epoch [21/50] batch [315/500] time 1.021 (0.886) data 0.000 (0.002) loss 1.6348 (1.1054) acc 56.2500 (72.6687) lr 1.3681e-03 eta 3:36:57 +epoch [21/50] batch [320/500] time 0.866 (0.886) data 0.000 (0.002) loss 1.0566 (1.1039) acc 78.1250 (72.6855) lr 1.3681e-03 eta 3:36:51 +epoch [21/50] batch [325/500] time 0.882 (0.886) data 0.000 (0.002) loss 0.7974 (1.1047) acc 75.0000 (72.6538) lr 1.3681e-03 eta 3:36:43 +epoch [21/50] batch [330/500] time 0.883 (0.886) data 0.000 (0.002) loss 0.7974 (1.1052) acc 81.2500 (72.5758) lr 1.3681e-03 eta 3:36:35 +epoch [21/50] batch [335/500] time 0.875 (0.886) data 0.000 (0.002) loss 1.0801 (1.1040) acc 84.3750 (72.6493) lr 1.3681e-03 eta 3:36:30 +epoch [21/50] batch [340/500] time 0.878 (0.886) data 0.000 (0.002) loss 1.1855 (1.1017) acc 75.0000 (72.7298) lr 1.3681e-03 eta 3:36:23 +epoch [21/50] batch [345/500] time 0.894 (0.886) data 0.000 (0.002) loss 0.9209 (1.0986) acc 71.8750 (72.7808) lr 1.3681e-03 eta 3:36:19 +epoch [21/50] batch [350/500] time 0.908 (0.886) data 0.000 (0.002) loss 1.1953 (1.1016) acc 81.2500 (72.7946) lr 1.3681e-03 eta 3:36:19 +epoch [21/50] batch [355/500] time 0.866 (0.886) data 0.000 (0.002) loss 1.0244 (1.1041) acc 75.0000 (72.7113) lr 1.3681e-03 eta 3:36:12 +epoch [21/50] batch [360/500] time 0.909 (0.886) data 0.000 (0.002) loss 1.3174 (1.1046) acc 78.1250 (72.7778) lr 1.3681e-03 eta 3:36:12 +epoch [21/50] batch [365/500] time 0.873 (0.886) data 0.000 (0.002) loss 1.1006 (1.1053) acc 68.7500 (72.8082) lr 1.3681e-03 eta 3:36:05 +epoch [21/50] batch [370/500] time 0.880 (0.886) data 0.000 (0.002) loss 0.6021 (1.1056) acc 87.5000 (72.8294) lr 1.3681e-03 eta 3:35:59 +epoch [21/50] batch [375/500] time 0.870 (0.886) data 0.000 (0.002) loss 1.2314 (1.1059) acc 75.0000 (72.8083) lr 1.3681e-03 eta 3:35:55 +epoch [21/50] batch [380/500] time 0.867 (0.886) data 0.001 (0.002) loss 0.6187 (1.1068) acc 81.2500 (72.7714) lr 1.3681e-03 eta 3:35:50 +epoch [21/50] batch [385/500] time 0.879 (0.886) data 0.000 (0.002) loss 1.1367 (1.1108) acc 71.8750 (72.7029) lr 1.3681e-03 eta 3:35:43 +epoch [21/50] batch [390/500] time 0.856 (0.886) data 0.000 (0.002) loss 1.3760 (1.1095) acc 68.7500 (72.7244) lr 1.3681e-03 eta 3:35:39 +epoch [21/50] batch [395/500] time 0.866 (0.886) data 0.000 (0.002) loss 0.9351 (1.1088) acc 75.0000 (72.7136) lr 1.3681e-03 eta 3:35:34 +epoch [21/50] batch [400/500] time 0.900 (0.886) data 0.000 (0.002) loss 0.8418 (1.1048) acc 75.0000 (72.7891) lr 1.3681e-03 eta 3:35:31 +epoch [21/50] batch [405/500] time 0.893 (0.886) data 0.000 (0.002) loss 1.0088 (1.1048) acc 71.8750 (72.7701) lr 1.3681e-03 eta 3:35:26 +epoch [21/50] batch [410/500] time 0.871 (0.886) data 0.000 (0.002) loss 0.8271 (1.1043) acc 75.0000 (72.6905) lr 1.3681e-03 eta 3:35:22 +epoch [21/50] batch [415/500] time 0.901 (0.886) data 0.000 (0.002) loss 1.3232 (1.1057) acc 75.0000 (72.6657) lr 1.3681e-03 eta 3:35:17 +epoch [21/50] batch [420/500] time 0.906 (0.886) data 0.000 (0.002) loss 1.6104 (1.1045) acc 65.6250 (72.6935) lr 1.3681e-03 eta 3:35:15 +epoch [21/50] batch [425/500] time 0.919 (0.886) data 0.000 (0.002) loss 1.4404 (1.1029) acc 62.5000 (72.7426) lr 1.3681e-03 eta 3:35:12 +epoch [21/50] batch [430/500] time 0.861 (0.886) data 0.000 (0.002) loss 1.2188 (1.1019) acc 71.8750 (72.7762) lr 1.3681e-03 eta 3:35:07 +epoch [21/50] batch [435/500] time 0.894 (0.886) data 0.000 (0.002) loss 0.7534 (1.1020) acc 81.2500 (72.7658) lr 1.3681e-03 eta 3:35:02 +epoch [21/50] batch [440/500] time 0.888 (0.886) data 0.000 (0.002) loss 1.5576 (1.1056) acc 68.7500 (72.6847) lr 1.3681e-03 eta 3:34:59 +epoch [21/50] batch [445/500] time 0.884 (0.886) data 0.000 (0.002) loss 0.6821 (1.1039) acc 78.1250 (72.7317) lr 1.3681e-03 eta 3:34:55 +epoch [21/50] batch [450/500] time 0.913 (0.886) data 0.000 (0.002) loss 0.7603 (1.1025) acc 78.1250 (72.7708) lr 1.3681e-03 eta 3:34:52 +epoch [21/50] batch [455/500] time 0.854 (0.886) data 0.000 (0.002) loss 1.1836 (1.1043) acc 71.8750 (72.7404) lr 1.3681e-03 eta 3:34:44 +epoch [21/50] batch [460/500] time 0.880 (0.886) data 0.000 (0.002) loss 1.1855 (1.1059) acc 75.0000 (72.7106) lr 1.3681e-03 eta 3:34:40 +epoch [21/50] batch [465/500] time 0.889 (0.886) data 0.000 (0.002) loss 1.0830 (1.1059) acc 71.8750 (72.6882) lr 1.3681e-03 eta 3:34:36 +epoch [21/50] batch [470/500] time 0.881 (0.886) data 0.000 (0.002) loss 0.6562 (1.1044) acc 71.8750 (72.7194) lr 1.3681e-03 eta 3:34:32 +epoch [21/50] batch [475/500] time 0.872 (0.886) data 0.000 (0.002) loss 0.6216 (1.1036) acc 81.2500 (72.6974) lr 1.3681e-03 eta 3:34:28 +epoch [21/50] batch [480/500] time 0.869 (0.886) data 0.000 (0.002) loss 0.7720 (1.1037) acc 75.0000 (72.7018) lr 1.3681e-03 eta 3:34:24 +epoch [21/50] batch [485/500] time 0.876 (0.886) data 0.000 (0.002) loss 1.7422 (1.1058) acc 62.5000 (72.6740) lr 1.3681e-03 eta 3:34:21 +epoch [21/50] batch [490/500] time 0.872 (0.886) data 0.000 (0.002) loss 1.5283 (1.1071) acc 62.5000 (72.6339) lr 1.3681e-03 eta 3:34:15 +epoch [21/50] batch [495/500] time 0.847 (0.886) data 0.000 (0.002) loss 1.0244 (1.1062) acc 75.0000 (72.6515) lr 1.3681e-03 eta 3:34:08 +epoch [21/50] batch [500/500] time 0.893 (0.886) data 0.000 (0.002) loss 1.1426 (1.1069) acc 75.0000 (72.6625) lr 1.3090e-03 eta 3:34:04 +epoch [22/50] batch [5/500] time 0.871 (1.050) data 0.000 (0.137) loss 0.8438 (0.8977) acc 75.0000 (75.0000) lr 1.3090e-03 eta 4:13:38 +epoch [22/50] batch [10/500] time 0.899 (0.969) data 0.000 (0.068) loss 0.8853 (0.9600) acc 71.8750 (74.3750) lr 1.3090e-03 eta 3:53:57 +epoch [22/50] batch [15/500] time 0.868 (0.937) data 0.000 (0.046) loss 1.3613 (1.0512) acc 68.7500 (72.2917) lr 1.3090e-03 eta 3:46:16 +epoch [22/50] batch [20/500] time 0.882 (0.923) data 0.000 (0.034) loss 1.3506 (1.0802) acc 71.8750 (72.3438) lr 1.3090e-03 eta 3:42:47 +epoch [22/50] batch [25/500] time 0.890 (0.913) data 0.000 (0.027) loss 1.0361 (1.0589) acc 71.8750 (73.8750) lr 1.3090e-03 eta 3:40:11 +epoch [22/50] batch [30/500] time 0.875 (0.906) data 0.000 (0.023) loss 0.4836 (1.0629) acc 81.2500 (72.9167) lr 1.3090e-03 eta 3:38:33 +epoch [22/50] batch [35/500] time 0.900 (0.903) data 0.000 (0.020) loss 1.3457 (1.1006) acc 65.6250 (72.1429) lr 1.3090e-03 eta 3:37:46 +epoch [22/50] batch [40/500] time 0.871 (0.900) data 0.000 (0.017) loss 0.8379 (1.0856) acc 71.8750 (72.5781) lr 1.3090e-03 eta 3:36:53 +epoch [22/50] batch [45/500] time 0.896 (0.898) data 0.000 (0.015) loss 1.4805 (1.1227) acc 65.6250 (71.8056) lr 1.3090e-03 eta 3:36:27 +epoch [22/50] batch [50/500] time 0.858 (0.900) data 0.000 (0.014) loss 1.5801 (1.1228) acc 59.3750 (71.8125) lr 1.3090e-03 eta 3:36:41 +epoch [22/50] batch [55/500] time 0.900 (0.898) data 0.000 (0.013) loss 1.5264 (1.1357) acc 65.6250 (71.5909) lr 1.3090e-03 eta 3:36:09 +epoch [22/50] batch [60/500] time 0.851 (0.896) data 0.000 (0.012) loss 1.2568 (1.1270) acc 75.0000 (71.9271) lr 1.3090e-03 eta 3:35:34 +epoch [22/50] batch [65/500] time 0.866 (0.894) data 0.000 (0.011) loss 1.0166 (1.1266) acc 84.3750 (72.4038) lr 1.3090e-03 eta 3:35:03 +epoch [22/50] batch [70/500] time 0.858 (0.893) data 0.000 (0.010) loss 0.8804 (1.1316) acc 81.2500 (72.1429) lr 1.3090e-03 eta 3:34:39 +epoch [22/50] batch [75/500] time 0.873 (0.892) data 0.000 (0.009) loss 1.6201 (1.1333) acc 71.8750 (72.6250) lr 1.3090e-03 eta 3:34:24 +epoch [22/50] batch [80/500] time 0.873 (0.891) data 0.000 (0.009) loss 0.9648 (1.1288) acc 75.0000 (72.7344) lr 1.3090e-03 eta 3:34:08 +epoch [22/50] batch [85/500] time 0.893 (0.891) data 0.000 (0.008) loss 0.7979 (1.1361) acc 78.1250 (72.5000) lr 1.3090e-03 eta 3:34:08 +epoch [22/50] batch [90/500] time 0.926 (0.892) data 0.000 (0.008) loss 0.6890 (1.1410) acc 78.1250 (72.5347) lr 1.3090e-03 eta 3:34:15 +epoch [22/50] batch [95/500] time 0.875 (0.893) data 0.000 (0.007) loss 1.7129 (1.1425) acc 65.6250 (72.5329) lr 1.3090e-03 eta 3:34:20 +epoch [22/50] batch [100/500] time 0.876 (0.892) data 0.000 (0.007) loss 0.6763 (1.1223) acc 81.2500 (72.8438) lr 1.3090e-03 eta 3:34:11 +epoch [22/50] batch [105/500] time 0.894 (0.892) data 0.000 (0.007) loss 1.2324 (1.1218) acc 65.6250 (72.7679) lr 1.3090e-03 eta 3:33:58 +epoch [22/50] batch [110/500] time 0.893 (0.891) data 0.000 (0.006) loss 1.5127 (1.1218) acc 68.7500 (72.8125) lr 1.3090e-03 eta 3:33:47 +epoch [22/50] batch [115/500] time 0.863 (0.891) data 0.000 (0.006) loss 0.4099 (1.1118) acc 90.6250 (72.9348) lr 1.3090e-03 eta 3:33:39 +epoch [22/50] batch [120/500] time 0.848 (0.890) data 0.000 (0.006) loss 0.9561 (1.1035) acc 78.1250 (73.2031) lr 1.3090e-03 eta 3:33:20 +epoch [22/50] batch [125/500] time 0.912 (0.890) data 0.000 (0.006) loss 0.8706 (1.1018) acc 71.8750 (73.1250) lr 1.3090e-03 eta 3:33:13 +epoch [22/50] batch [130/500] time 0.862 (0.889) data 0.000 (0.005) loss 1.3564 (1.0988) acc 59.3750 (72.9567) lr 1.3090e-03 eta 3:33:01 +epoch [22/50] batch [135/500] time 0.893 (0.890) data 0.000 (0.005) loss 1.2646 (1.0943) acc 75.0000 (73.0093) lr 1.3090e-03 eta 3:33:03 +epoch [22/50] batch [140/500] time 0.863 (0.890) data 0.000 (0.005) loss 1.2148 (1.0925) acc 75.0000 (72.9911) lr 1.3090e-03 eta 3:33:02 +epoch [22/50] batch [145/500] time 0.990 (0.891) data 0.000 (0.005) loss 0.9761 (1.0846) acc 65.6250 (73.1250) lr 1.3090e-03 eta 3:33:09 +epoch [22/50] batch [150/500] time 0.885 (0.891) data 0.000 (0.005) loss 1.0186 (1.0864) acc 75.0000 (73.1042) lr 1.3090e-03 eta 3:33:00 +epoch [22/50] batch [155/500] time 0.877 (0.891) data 0.000 (0.005) loss 0.9746 (1.0865) acc 68.7500 (72.9435) lr 1.3090e-03 eta 3:32:54 +epoch [22/50] batch [160/500] time 0.895 (0.890) data 0.000 (0.005) loss 1.3047 (1.0930) acc 59.3750 (72.6953) lr 1.3090e-03 eta 3:32:43 +epoch [22/50] batch [165/500] time 0.878 (0.890) data 0.000 (0.004) loss 0.4846 (1.0933) acc 84.3750 (72.8220) lr 1.3090e-03 eta 3:32:33 +epoch [22/50] batch [170/500] time 0.894 (0.890) data 0.000 (0.004) loss 1.0986 (1.0916) acc 68.7500 (72.8125) lr 1.3090e-03 eta 3:32:28 +epoch [22/50] batch [175/500] time 0.873 (0.890) data 0.000 (0.004) loss 0.4424 (1.0863) acc 84.3750 (72.9643) lr 1.3090e-03 eta 3:32:23 +epoch [22/50] batch [180/500] time 0.861 (0.889) data 0.000 (0.004) loss 0.8892 (1.0840) acc 75.0000 (72.9340) lr 1.3090e-03 eta 3:32:14 +epoch [22/50] batch [185/500] time 0.873 (0.889) data 0.000 (0.004) loss 1.4561 (1.0904) acc 62.5000 (72.7534) lr 1.3090e-03 eta 3:32:07 +epoch [22/50] batch [190/500] time 0.878 (0.889) data 0.000 (0.004) loss 1.5049 (1.0952) acc 65.6250 (72.6316) lr 1.3090e-03 eta 3:32:04 +epoch [22/50] batch [195/500] time 0.867 (0.889) data 0.000 (0.004) loss 0.7690 (1.0948) acc 81.2500 (72.6603) lr 1.3090e-03 eta 3:31:57 +epoch [22/50] batch [200/500] time 0.905 (0.889) data 0.000 (0.004) loss 1.1914 (1.0958) acc 65.6250 (72.5781) lr 1.3090e-03 eta 3:31:55 +epoch [22/50] batch [205/500] time 0.896 (0.889) data 0.000 (0.004) loss 1.0186 (1.0998) acc 84.3750 (72.6067) lr 1.3090e-03 eta 3:31:53 +epoch [22/50] batch [210/500] time 0.905 (0.889) data 0.000 (0.003) loss 1.1494 (1.1012) acc 75.0000 (72.5744) lr 1.3090e-03 eta 3:31:48 +epoch [22/50] batch [215/500] time 0.875 (0.889) data 0.000 (0.003) loss 0.7305 (1.0999) acc 78.1250 (72.6890) lr 1.3090e-03 eta 3:31:44 +epoch [22/50] batch [220/500] time 0.873 (0.889) data 0.000 (0.003) loss 1.0430 (1.0992) acc 75.0000 (72.7841) lr 1.3090e-03 eta 3:31:39 +epoch [22/50] batch [225/500] time 0.871 (0.890) data 0.000 (0.003) loss 1.4404 (1.1014) acc 81.2500 (72.8472) lr 1.3090e-03 eta 3:31:38 +epoch [22/50] batch [230/500] time 0.878 (0.890) data 0.000 (0.003) loss 1.5498 (1.1021) acc 56.2500 (72.6902) lr 1.3090e-03 eta 3:31:33 +epoch [22/50] batch [235/500] time 0.912 (0.890) data 0.000 (0.003) loss 1.4902 (1.1013) acc 65.6250 (72.6995) lr 1.3090e-03 eta 3:31:32 +epoch [22/50] batch [240/500] time 0.891 (0.890) data 0.000 (0.003) loss 1.1436 (1.0974) acc 68.7500 (72.7214) lr 1.3090e-03 eta 3:31:26 +epoch [22/50] batch [245/500] time 0.896 (0.889) data 0.000 (0.003) loss 1.5439 (1.1020) acc 62.5000 (72.5510) lr 1.3090e-03 eta 3:31:19 +epoch [22/50] batch [250/500] time 0.883 (0.890) data 0.000 (0.003) loss 1.1758 (1.0983) acc 71.8750 (72.6875) lr 1.3090e-03 eta 3:31:17 +epoch [22/50] batch [255/500] time 0.878 (0.889) data 0.000 (0.003) loss 1.0098 (1.1020) acc 75.0000 (72.5980) lr 1.3090e-03 eta 3:31:07 +epoch [22/50] batch [260/500] time 0.910 (0.889) data 0.000 (0.003) loss 0.9707 (1.1006) acc 68.7500 (72.5962) lr 1.3090e-03 eta 3:31:02 +epoch [22/50] batch [265/500] time 0.878 (0.889) data 0.000 (0.003) loss 2.0371 (1.1051) acc 68.7500 (72.5708) lr 1.3090e-03 eta 3:30:53 +epoch [22/50] batch [270/500] time 0.881 (0.889) data 0.000 (0.003) loss 0.8013 (1.1035) acc 81.2500 (72.6273) lr 1.3090e-03 eta 3:30:46 +epoch [22/50] batch [275/500] time 0.895 (0.889) data 0.000 (0.003) loss 0.5630 (1.1023) acc 81.2500 (72.6364) lr 1.3090e-03 eta 3:30:41 +epoch [22/50] batch [280/500] time 0.881 (0.889) data 0.000 (0.003) loss 1.1670 (1.1025) acc 78.1250 (72.6228) lr 1.3090e-03 eta 3:30:34 +epoch [22/50] batch [285/500] time 0.859 (0.888) data 0.000 (0.003) loss 1.1904 (1.1012) acc 71.8750 (72.6535) lr 1.3090e-03 eta 3:30:25 +epoch [22/50] batch [290/500] time 0.899 (0.889) data 0.000 (0.003) loss 1.3164 (1.1026) acc 71.8750 (72.6509) lr 1.3090e-03 eta 3:30:28 +epoch [22/50] batch [295/500] time 0.919 (0.889) data 0.000 (0.003) loss 1.0186 (1.1004) acc 75.0000 (72.7331) lr 1.3090e-03 eta 3:30:28 +epoch [22/50] batch [300/500] time 0.915 (0.889) data 0.000 (0.003) loss 1.2930 (1.1036) acc 71.8750 (72.6875) lr 1.3090e-03 eta 3:30:25 +epoch [22/50] batch [305/500] time 0.869 (0.889) data 0.000 (0.002) loss 1.0996 (1.1018) acc 68.7500 (72.6639) lr 1.3090e-03 eta 3:30:18 +epoch [22/50] batch [310/500] time 0.906 (0.889) data 0.000 (0.002) loss 1.2207 (1.1033) acc 71.8750 (72.5706) lr 1.3090e-03 eta 3:30:12 +epoch [22/50] batch [315/500] time 0.877 (0.889) data 0.000 (0.002) loss 1.2236 (1.1044) acc 65.6250 (72.5496) lr 1.3090e-03 eta 3:30:05 +epoch [22/50] batch [320/500] time 0.874 (0.889) data 0.000 (0.002) loss 1.2373 (1.1034) acc 68.7500 (72.5293) lr 1.3090e-03 eta 3:30:02 +epoch [22/50] batch [325/500] time 0.866 (0.888) data 0.000 (0.002) loss 0.9507 (1.1001) acc 75.0000 (72.5769) lr 1.3090e-03 eta 3:29:54 +epoch [22/50] batch [330/500] time 0.891 (0.889) data 0.000 (0.002) loss 1.0449 (1.0997) acc 68.7500 (72.6042) lr 1.3090e-03 eta 3:29:53 +epoch [22/50] batch [335/500] time 0.910 (0.889) data 0.000 (0.002) loss 1.1826 (1.0964) acc 65.6250 (72.6586) lr 1.3090e-03 eta 3:29:53 +epoch [22/50] batch [340/500] time 0.871 (0.889) data 0.000 (0.002) loss 1.4805 (1.0958) acc 71.8750 (72.7298) lr 1.3090e-03 eta 3:29:49 +epoch [22/50] batch [345/500] time 0.911 (0.889) data 0.000 (0.002) loss 1.1729 (1.0932) acc 65.6250 (72.7536) lr 1.3090e-03 eta 3:29:45 +epoch [22/50] batch [350/500] time 0.879 (0.889) data 0.000 (0.002) loss 0.9893 (1.0907) acc 71.8750 (72.8125) lr 1.3090e-03 eta 3:29:39 +epoch [22/50] batch [355/500] time 0.871 (0.889) data 0.000 (0.002) loss 1.3242 (1.0968) acc 65.6250 (72.7113) lr 1.3090e-03 eta 3:29:32 +epoch [22/50] batch [360/500] time 0.887 (0.889) data 0.000 (0.002) loss 1.2529 (1.0956) acc 65.6250 (72.6476) lr 1.3090e-03 eta 3:29:25 +epoch [22/50] batch [365/500] time 0.875 (0.888) data 0.000 (0.002) loss 1.0752 (1.0934) acc 62.5000 (72.6969) lr 1.3090e-03 eta 3:29:18 +epoch [22/50] batch [370/500] time 0.928 (0.889) data 0.000 (0.002) loss 1.6445 (1.0951) acc 50.0000 (72.6436) lr 1.3090e-03 eta 3:29:14 +epoch [22/50] batch [375/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.0449 (1.0970) acc 78.1250 (72.5750) lr 1.3090e-03 eta 3:29:06 +epoch [22/50] batch [380/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.2139 (1.0967) acc 65.6250 (72.5658) lr 1.3090e-03 eta 3:28:59 +epoch [22/50] batch [385/500] time 0.892 (0.888) data 0.000 (0.002) loss 0.8232 (1.0972) acc 84.3750 (72.5487) lr 1.3090e-03 eta 3:28:53 +epoch [22/50] batch [390/500] time 0.894 (0.888) data 0.000 (0.002) loss 1.1152 (1.0974) acc 78.1250 (72.5881) lr 1.3090e-03 eta 3:28:45 +epoch [22/50] batch [395/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.3350 (1.0971) acc 68.7500 (72.6424) lr 1.3090e-03 eta 3:28:38 +epoch [22/50] batch [400/500] time 0.891 (0.888) data 0.000 (0.002) loss 1.2627 (1.0969) acc 78.1250 (72.6641) lr 1.3090e-03 eta 3:28:33 +epoch [22/50] batch [405/500] time 0.884 (0.888) data 0.000 (0.002) loss 0.7144 (1.0975) acc 81.2500 (72.6312) lr 1.3090e-03 eta 3:28:30 +epoch [22/50] batch [410/500] time 0.859 (0.888) data 0.000 (0.002) loss 1.4082 (1.0976) acc 65.6250 (72.6067) lr 1.3090e-03 eta 3:28:26 +epoch [22/50] batch [415/500] time 0.873 (0.887) data 0.000 (0.002) loss 0.9849 (1.0991) acc 71.8750 (72.6054) lr 1.3090e-03 eta 3:28:20 +epoch [22/50] batch [420/500] time 0.863 (0.887) data 0.000 (0.002) loss 0.8525 (1.0987) acc 71.8750 (72.5893) lr 1.3090e-03 eta 3:28:14 +epoch [22/50] batch [425/500] time 0.881 (0.887) data 0.000 (0.002) loss 1.2578 (1.0988) acc 71.8750 (72.5735) lr 1.3090e-03 eta 3:28:10 +epoch [22/50] batch [430/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.0098 (1.0984) acc 71.8750 (72.5581) lr 1.3090e-03 eta 3:28:05 +epoch [22/50] batch [435/500] time 0.884 (0.888) data 0.000 (0.002) loss 1.3896 (1.0999) acc 59.3750 (72.5072) lr 1.3090e-03 eta 3:28:03 +epoch [22/50] batch [440/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.9092 (1.1007) acc 68.7500 (72.4645) lr 1.3090e-03 eta 3:27:58 +epoch [22/50] batch [445/500] time 0.868 (0.887) data 0.000 (0.002) loss 1.1035 (1.1018) acc 71.8750 (72.4438) lr 1.3090e-03 eta 3:27:51 +epoch [22/50] batch [450/500] time 0.866 (0.887) data 0.000 (0.002) loss 0.9355 (1.1032) acc 68.7500 (72.3472) lr 1.3090e-03 eta 3:27:46 +epoch [22/50] batch [455/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.4414 (1.1035) acc 59.3750 (72.3558) lr 1.3090e-03 eta 3:27:42 +epoch [22/50] batch [460/500] time 0.895 (0.887) data 0.000 (0.002) loss 0.9116 (1.1051) acc 78.1250 (72.3166) lr 1.3090e-03 eta 3:27:39 +epoch [22/50] batch [465/500] time 0.881 (0.887) data 0.000 (0.002) loss 1.3066 (1.1025) acc 71.8750 (72.3723) lr 1.3090e-03 eta 3:27:34 +epoch [22/50] batch [470/500] time 0.864 (0.887) data 0.000 (0.002) loss 1.0234 (1.1034) acc 71.8750 (72.3737) lr 1.3090e-03 eta 3:27:29 +epoch [22/50] batch [475/500] time 0.955 (0.887) data 0.000 (0.002) loss 1.2959 (1.1030) acc 65.6250 (72.3816) lr 1.3090e-03 eta 3:27:24 +epoch [22/50] batch [480/500] time 0.880 (0.887) data 0.000 (0.002) loss 1.1006 (1.1044) acc 78.1250 (72.4154) lr 1.3090e-03 eta 3:27:18 +epoch [22/50] batch [485/500] time 0.870 (0.887) data 0.001 (0.002) loss 1.2393 (1.1075) acc 71.8750 (72.3905) lr 1.3090e-03 eta 3:27:13 +epoch [22/50] batch [490/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.7305 (1.1085) acc 59.3750 (72.3980) lr 1.3090e-03 eta 3:27:08 +epoch [22/50] batch [495/500] time 0.890 (0.887) data 0.000 (0.002) loss 1.0967 (1.1069) acc 71.8750 (72.4242) lr 1.3090e-03 eta 3:27:03 +epoch [22/50] batch [500/500] time 0.866 (0.887) data 0.000 (0.002) loss 1.2832 (1.1056) acc 65.6250 (72.4625) lr 1.2487e-03 eta 3:26:57 +epoch [23/50] batch [5/500] time 0.906 (1.019) data 0.000 (0.129) loss 1.4150 (0.9125) acc 75.0000 (77.5000) lr 1.2487e-03 eta 3:57:46 +epoch [23/50] batch [10/500] time 0.896 (0.959) data 0.000 (0.065) loss 0.9126 (1.0421) acc 81.2500 (76.2500) lr 1.2487e-03 eta 3:43:30 +epoch [23/50] batch [15/500] time 0.868 (0.930) data 0.000 (0.043) loss 0.7891 (1.0362) acc 71.8750 (76.4583) lr 1.2487e-03 eta 3:36:47 +epoch [23/50] batch [20/500] time 0.860 (0.920) data 0.000 (0.032) loss 0.7700 (1.0215) acc 75.0000 (75.9375) lr 1.2487e-03 eta 3:34:14 +epoch [23/50] batch [25/500] time 0.892 (0.912) data 0.000 (0.026) loss 1.3867 (1.0295) acc 68.7500 (75.5000) lr 1.2487e-03 eta 3:32:26 +epoch [23/50] batch [30/500] time 0.893 (0.909) data 0.000 (0.022) loss 1.1328 (1.0579) acc 87.5000 (75.0000) lr 1.2487e-03 eta 3:31:39 +epoch [23/50] batch [35/500] time 0.866 (0.905) data 0.000 (0.019) loss 1.3467 (1.0536) acc 62.5000 (74.4643) lr 1.2487e-03 eta 3:30:34 +epoch [23/50] batch [40/500] time 0.892 (0.903) data 0.000 (0.016) loss 1.5703 (1.0995) acc 59.3750 (73.5156) lr 1.2487e-03 eta 3:30:01 +epoch [23/50] batch [45/500] time 0.876 (0.902) data 0.000 (0.015) loss 1.0820 (1.1009) acc 71.8750 (73.4722) lr 1.2487e-03 eta 3:29:49 +epoch [23/50] batch [50/500] time 0.881 (0.901) data 0.000 (0.013) loss 0.6729 (1.0772) acc 84.3750 (73.6250) lr 1.2487e-03 eta 3:29:27 +epoch [23/50] batch [55/500] time 0.875 (0.900) data 0.000 (0.012) loss 1.1855 (1.0731) acc 71.8750 (73.4091) lr 1.2487e-03 eta 3:29:09 +epoch [23/50] batch [60/500] time 0.871 (0.900) data 0.000 (0.011) loss 0.8501 (1.0800) acc 75.0000 (73.4375) lr 1.2487e-03 eta 3:29:09 +epoch [23/50] batch [65/500] time 0.901 (0.899) data 0.000 (0.010) loss 1.1426 (1.0808) acc 68.7500 (73.4135) lr 1.2487e-03 eta 3:28:41 +epoch [23/50] batch [70/500] time 0.868 (0.897) data 0.000 (0.009) loss 1.3320 (1.0734) acc 71.8750 (73.5714) lr 1.2487e-03 eta 3:28:20 +epoch [23/50] batch [75/500] time 0.896 (0.897) data 0.000 (0.009) loss 1.9648 (1.0772) acc 59.3750 (73.3333) lr 1.2487e-03 eta 3:28:04 +epoch [23/50] batch [80/500] time 0.866 (0.896) data 0.000 (0.008) loss 1.3877 (1.0900) acc 56.2500 (72.9297) lr 1.2487e-03 eta 3:27:54 +epoch [23/50] batch [85/500] time 0.866 (0.895) data 0.000 (0.008) loss 1.0527 (1.0845) acc 78.1250 (72.9779) lr 1.2487e-03 eta 3:27:33 +epoch [23/50] batch [90/500] time 0.875 (0.894) data 0.000 (0.007) loss 0.5752 (1.0761) acc 75.0000 (72.8472) lr 1.2487e-03 eta 3:27:13 +epoch [23/50] batch [95/500] time 0.866 (0.893) data 0.000 (0.007) loss 1.1357 (1.0749) acc 68.7500 (72.8289) lr 1.2487e-03 eta 3:26:52 +epoch [23/50] batch [100/500] time 0.898 (0.892) data 0.000 (0.007) loss 1.0732 (1.0816) acc 78.1250 (72.8438) lr 1.2487e-03 eta 3:26:40 +epoch [23/50] batch [105/500] time 0.864 (0.892) data 0.000 (0.006) loss 1.4951 (1.0892) acc 62.5000 (72.5893) lr 1.2487e-03 eta 3:26:32 +epoch [23/50] batch [110/500] time 0.874 (0.892) data 0.000 (0.006) loss 1.0547 (1.0769) acc 75.0000 (73.0114) lr 1.2487e-03 eta 3:26:24 +epoch [23/50] batch [115/500] time 0.880 (0.891) data 0.000 (0.006) loss 1.4463 (1.0929) acc 59.3750 (72.7717) lr 1.2487e-03 eta 3:26:16 +epoch [23/50] batch [120/500] time 0.872 (0.892) data 0.000 (0.006) loss 0.9097 (1.0950) acc 62.5000 (72.5781) lr 1.2487e-03 eta 3:26:25 +epoch [23/50] batch [125/500] time 0.880 (0.891) data 0.000 (0.005) loss 1.4385 (1.0978) acc 65.6250 (72.6500) lr 1.2487e-03 eta 3:26:09 +epoch [23/50] batch [130/500] time 0.881 (0.891) data 0.000 (0.005) loss 1.8379 (1.1051) acc 56.2500 (72.5721) lr 1.2487e-03 eta 3:25:53 +epoch [23/50] batch [135/500] time 0.881 (0.890) data 0.000 (0.005) loss 1.0674 (1.0964) acc 71.8750 (72.6389) lr 1.2487e-03 eta 3:25:40 +epoch [23/50] batch [140/500] time 0.861 (0.890) data 0.000 (0.005) loss 0.9966 (1.0932) acc 75.0000 (72.7455) lr 1.2487e-03 eta 3:25:28 +epoch [23/50] batch [145/500] time 0.859 (0.889) data 0.000 (0.005) loss 0.9419 (1.0850) acc 78.1250 (72.9310) lr 1.2487e-03 eta 3:25:13 +epoch [23/50] batch [150/500] time 0.870 (0.889) data 0.000 (0.005) loss 1.6416 (1.0836) acc 71.8750 (73.1250) lr 1.2487e-03 eta 3:25:07 +epoch [23/50] batch [155/500] time 0.883 (0.888) data 0.000 (0.004) loss 1.3760 (1.0814) acc 68.7500 (73.1250) lr 1.2487e-03 eta 3:24:58 +epoch [23/50] batch [160/500] time 0.870 (0.888) data 0.000 (0.004) loss 1.3564 (1.0780) acc 62.5000 (73.1836) lr 1.2487e-03 eta 3:24:52 +epoch [23/50] batch [165/500] time 0.870 (0.888) data 0.000 (0.004) loss 0.8745 (1.0776) acc 84.3750 (73.2386) lr 1.2487e-03 eta 3:24:51 +epoch [23/50] batch [170/500] time 0.897 (0.888) data 0.000 (0.004) loss 1.2910 (1.0833) acc 65.6250 (73.1066) lr 1.2487e-03 eta 3:24:46 +epoch [23/50] batch [175/500] time 0.851 (0.888) data 0.000 (0.004) loss 1.2998 (1.0782) acc 68.7500 (73.1071) lr 1.2487e-03 eta 3:24:38 +epoch [23/50] batch [180/500] time 0.887 (0.888) data 0.000 (0.004) loss 1.5576 (1.0834) acc 65.6250 (72.9340) lr 1.2487e-03 eta 3:24:34 +epoch [23/50] batch [185/500] time 0.871 (0.888) data 0.000 (0.004) loss 1.5635 (1.0861) acc 56.2500 (72.8547) lr 1.2487e-03 eta 3:24:24 +epoch [23/50] batch [190/500] time 0.871 (0.887) data 0.000 (0.004) loss 1.2910 (1.0864) acc 71.8750 (72.9934) lr 1.2487e-03 eta 3:24:16 +epoch [23/50] batch [195/500] time 0.869 (0.887) data 0.000 (0.004) loss 0.6826 (1.0841) acc 78.1250 (73.0609) lr 1.2487e-03 eta 3:24:08 +epoch [23/50] batch [200/500] time 0.880 (0.887) data 0.000 (0.003) loss 1.8174 (1.0862) acc 56.2500 (72.9844) lr 1.2487e-03 eta 3:24:04 +epoch [23/50] batch [205/500] time 0.871 (0.887) data 0.000 (0.003) loss 0.7964 (1.0861) acc 75.0000 (72.9573) lr 1.2487e-03 eta 3:24:01 +epoch [23/50] batch [210/500] time 0.870 (0.887) data 0.000 (0.003) loss 1.3857 (1.0910) acc 71.8750 (72.9018) lr 1.2487e-03 eta 3:23:51 +epoch [23/50] batch [215/500] time 0.884 (0.887) data 0.000 (0.003) loss 0.5962 (1.0917) acc 84.3750 (72.9215) lr 1.2487e-03 eta 3:23:47 +epoch [23/50] batch [220/500] time 0.873 (0.887) data 0.000 (0.003) loss 1.2236 (1.0903) acc 68.7500 (73.0114) lr 1.2487e-03 eta 3:23:41 +epoch [23/50] batch [225/500] time 0.869 (0.887) data 0.000 (0.003) loss 1.2734 (1.0911) acc 84.3750 (73.0139) lr 1.2487e-03 eta 3:23:41 +epoch [23/50] batch [230/500] time 0.863 (0.887) data 0.000 (0.003) loss 1.5020 (1.0966) acc 68.7500 (72.8804) lr 1.2487e-03 eta 3:23:34 +epoch [23/50] batch [235/500] time 0.879 (0.887) data 0.000 (0.003) loss 1.4922 (1.0993) acc 65.6250 (72.8059) lr 1.2487e-03 eta 3:23:29 +epoch [23/50] batch [240/500] time 0.887 (0.887) data 0.000 (0.003) loss 1.6367 (1.1020) acc 68.7500 (72.7734) lr 1.2487e-03 eta 3:23:24 +epoch [23/50] batch [245/500] time 0.860 (0.887) data 0.000 (0.003) loss 1.5723 (1.1043) acc 62.5000 (72.6913) lr 1.2487e-03 eta 3:23:16 +epoch [23/50] batch [250/500] time 0.885 (0.887) data 0.000 (0.003) loss 0.6411 (1.1058) acc 78.1250 (72.6750) lr 1.2487e-03 eta 3:23:10 +epoch [23/50] batch [255/500] time 0.898 (0.887) data 0.000 (0.003) loss 1.9023 (1.1070) acc 62.5000 (72.5980) lr 1.2487e-03 eta 3:23:08 +epoch [23/50] batch [260/500] time 0.912 (0.887) data 0.000 (0.003) loss 0.6636 (1.1090) acc 71.8750 (72.6322) lr 1.2487e-03 eta 3:23:03 +epoch [23/50] batch [265/500] time 0.855 (0.887) data 0.000 (0.003) loss 0.8511 (1.1091) acc 78.1250 (72.6415) lr 1.2487e-03 eta 3:23:02 +epoch [23/50] batch [270/500] time 0.894 (0.887) data 0.000 (0.003) loss 1.0664 (1.1109) acc 75.0000 (72.6505) lr 1.2487e-03 eta 3:22:57 +epoch [23/50] batch [275/500] time 0.868 (0.887) data 0.000 (0.003) loss 1.0703 (1.1095) acc 78.1250 (72.6932) lr 1.2487e-03 eta 3:22:49 +epoch [23/50] batch [280/500] time 0.873 (0.886) data 0.000 (0.003) loss 1.3467 (1.1087) acc 62.5000 (72.6786) lr 1.2487e-03 eta 3:22:41 +epoch [23/50] batch [285/500] time 0.880 (0.886) data 0.000 (0.002) loss 0.8013 (1.1103) acc 84.3750 (72.6754) lr 1.2487e-03 eta 3:22:33 +epoch [23/50] batch [290/500] time 0.873 (0.886) data 0.000 (0.002) loss 1.3086 (1.1108) acc 68.7500 (72.6401) lr 1.2487e-03 eta 3:22:25 +epoch [23/50] batch [295/500] time 0.903 (0.886) data 0.000 (0.002) loss 0.9141 (1.1078) acc 78.1250 (72.6801) lr 1.2487e-03 eta 3:22:21 +epoch [23/50] batch [300/500] time 0.879 (0.886) data 0.000 (0.002) loss 1.1113 (1.1107) acc 71.8750 (72.6458) lr 1.2487e-03 eta 3:22:14 +epoch [23/50] batch [305/500] time 0.974 (0.886) data 0.000 (0.002) loss 0.9883 (1.1114) acc 65.6250 (72.6332) lr 1.2487e-03 eta 3:22:14 +epoch [23/50] batch [310/500] time 0.911 (0.886) data 0.000 (0.002) loss 0.8228 (1.1122) acc 71.8750 (72.5907) lr 1.2487e-03 eta 3:22:12 +epoch [23/50] batch [315/500] time 0.883 (0.886) data 0.000 (0.002) loss 1.1836 (1.1130) acc 62.5000 (72.5595) lr 1.2487e-03 eta 3:22:09 +epoch [23/50] batch [320/500] time 0.892 (0.886) data 0.000 (0.002) loss 0.6929 (1.1139) acc 84.3750 (72.5879) lr 1.2487e-03 eta 3:22:05 +epoch [23/50] batch [325/500] time 0.942 (0.887) data 0.000 (0.002) loss 1.3242 (1.1166) acc 62.5000 (72.5192) lr 1.2487e-03 eta 3:22:02 +epoch [23/50] batch [330/500] time 0.854 (0.886) data 0.000 (0.002) loss 1.1953 (1.1178) acc 62.5000 (72.4811) lr 1.2487e-03 eta 3:21:56 +epoch [23/50] batch [335/500] time 0.860 (0.886) data 0.000 (0.002) loss 1.3789 (1.1216) acc 71.8750 (72.3507) lr 1.2487e-03 eta 3:21:47 +epoch [23/50] batch [340/500] time 0.885 (0.886) data 0.000 (0.002) loss 1.0527 (1.1224) acc 75.0000 (72.3805) lr 1.2487e-03 eta 3:21:43 +epoch [23/50] batch [345/500] time 0.863 (0.886) data 0.000 (0.002) loss 0.5498 (1.1201) acc 78.1250 (72.3913) lr 1.2487e-03 eta 3:21:35 +epoch [23/50] batch [350/500] time 0.879 (0.886) data 0.000 (0.002) loss 1.1328 (1.1172) acc 71.8750 (72.4554) lr 1.2487e-03 eta 3:21:29 +epoch [23/50] batch [355/500] time 0.859 (0.886) data 0.000 (0.002) loss 0.8618 (1.1192) acc 71.8750 (72.4120) lr 1.2487e-03 eta 3:21:23 +epoch [23/50] batch [360/500] time 0.899 (0.886) data 0.000 (0.002) loss 0.9858 (1.1195) acc 75.0000 (72.4045) lr 1.2487e-03 eta 3:21:20 +epoch [23/50] batch [365/500] time 0.885 (0.886) data 0.000 (0.002) loss 1.3584 (1.1238) acc 71.8750 (72.3373) lr 1.2487e-03 eta 3:21:16 +epoch [23/50] batch [370/500] time 0.858 (0.886) data 0.000 (0.002) loss 1.6768 (1.1232) acc 59.3750 (72.3057) lr 1.2487e-03 eta 3:21:10 +epoch [23/50] batch [375/500] time 0.920 (0.886) data 0.000 (0.002) loss 1.1924 (1.1216) acc 68.7500 (72.3083) lr 1.2487e-03 eta 3:21:07 +epoch [23/50] batch [380/500] time 0.896 (0.886) data 0.000 (0.002) loss 0.9185 (1.1214) acc 75.0000 (72.2944) lr 1.2487e-03 eta 3:21:05 +epoch [23/50] batch [385/500] time 0.880 (0.886) data 0.000 (0.002) loss 0.9536 (1.1261) acc 84.3750 (72.2727) lr 1.2487e-03 eta 3:20:58 +epoch [23/50] batch [390/500] time 0.852 (0.885) data 0.000 (0.002) loss 0.7993 (1.1256) acc 81.2500 (72.3397) lr 1.2487e-03 eta 3:20:50 +epoch [23/50] batch [395/500] time 0.897 (0.886) data 0.000 (0.002) loss 0.8589 (1.1239) acc 75.0000 (72.3892) lr 1.2487e-03 eta 3:20:47 +epoch [23/50] batch [400/500] time 0.847 (0.885) data 0.000 (0.002) loss 1.1787 (1.1256) acc 71.8750 (72.3438) lr 1.2487e-03 eta 3:20:41 +epoch [23/50] batch [405/500] time 0.865 (0.886) data 0.000 (0.002) loss 0.8599 (1.1219) acc 78.1250 (72.4228) lr 1.2487e-03 eta 3:20:40 +epoch [23/50] batch [410/500] time 0.872 (0.886) data 0.000 (0.002) loss 1.8701 (1.1220) acc 62.5000 (72.4695) lr 1.2487e-03 eta 3:20:34 +epoch [23/50] batch [415/500] time 0.870 (0.886) data 0.000 (0.002) loss 0.6392 (1.1217) acc 81.2500 (72.4473) lr 1.2487e-03 eta 3:20:29 +epoch [23/50] batch [420/500] time 0.890 (0.885) data 0.000 (0.002) loss 1.2549 (1.1233) acc 75.0000 (72.4256) lr 1.2487e-03 eta 3:20:24 +epoch [23/50] batch [425/500] time 0.887 (0.885) data 0.000 (0.002) loss 1.1465 (1.1220) acc 65.6250 (72.4412) lr 1.2487e-03 eta 3:20:19 +epoch [23/50] batch [430/500] time 0.885 (0.885) data 0.000 (0.002) loss 0.9028 (1.1189) acc 75.0000 (72.5073) lr 1.2487e-03 eta 3:20:14 +epoch [23/50] batch [435/500] time 0.900 (0.885) data 0.000 (0.002) loss 1.1455 (1.1211) acc 71.8750 (72.4928) lr 1.2487e-03 eta 3:20:11 +epoch [23/50] batch [440/500] time 0.908 (0.886) data 0.000 (0.002) loss 1.5654 (1.1230) acc 68.7500 (72.4929) lr 1.2487e-03 eta 3:20:07 +epoch [23/50] batch [445/500] time 0.900 (0.885) data 0.000 (0.002) loss 0.7832 (1.1250) acc 75.0000 (72.4649) lr 1.2487e-03 eta 3:20:02 +epoch [23/50] batch [450/500] time 0.858 (0.886) data 0.000 (0.002) loss 1.5459 (1.1263) acc 56.2500 (72.4306) lr 1.2487e-03 eta 3:20:01 +epoch [23/50] batch [455/500] time 0.890 (0.886) data 0.000 (0.002) loss 1.5566 (1.1271) acc 65.6250 (72.3970) lr 1.2487e-03 eta 3:19:55 +epoch [23/50] batch [460/500] time 0.864 (0.886) data 0.000 (0.002) loss 1.2520 (1.1275) acc 75.0000 (72.4049) lr 1.2487e-03 eta 3:19:51 +epoch [23/50] batch [465/500] time 0.896 (0.886) data 0.000 (0.002) loss 1.0059 (1.1264) acc 71.8750 (72.4059) lr 1.2487e-03 eta 3:19:45 +epoch [23/50] batch [470/500] time 0.863 (0.886) data 0.000 (0.002) loss 0.6855 (1.1258) acc 90.6250 (72.3936) lr 1.2487e-03 eta 3:19:40 +epoch [23/50] batch [475/500] time 0.890 (0.885) data 0.000 (0.002) loss 1.5713 (1.1248) acc 56.2500 (72.4079) lr 1.2487e-03 eta 3:19:36 +epoch [23/50] batch [480/500] time 0.900 (0.885) data 0.000 (0.002) loss 1.1494 (1.1263) acc 71.8750 (72.3372) lr 1.2487e-03 eta 3:19:31 +epoch [23/50] batch [485/500] time 0.913 (0.886) data 0.000 (0.002) loss 0.4827 (1.1244) acc 87.5000 (72.3905) lr 1.2487e-03 eta 3:19:28 +epoch [23/50] batch [490/500] time 0.867 (0.886) data 0.000 (0.002) loss 1.0059 (1.1238) acc 68.7500 (72.4043) lr 1.2487e-03 eta 3:19:24 +epoch [23/50] batch [495/500] time 0.903 (0.886) data 0.000 (0.002) loss 1.2285 (1.1227) acc 75.0000 (72.4369) lr 1.2487e-03 eta 3:19:21 +epoch [23/50] batch [500/500] time 0.900 (0.886) data 0.000 (0.002) loss 0.5117 (1.1216) acc 87.5000 (72.4938) lr 1.1874e-03 eta 3:19:18 +epoch [24/50] batch [5/500] time 0.862 (1.028) data 0.000 (0.139) loss 0.9019 (1.0705) acc 75.0000 (71.2500) lr 1.1874e-03 eta 3:51:14 +epoch [24/50] batch [10/500] time 0.858 (0.955) data 0.000 (0.070) loss 0.8496 (0.9900) acc 78.1250 (75.0000) lr 1.1874e-03 eta 3:34:49 +epoch [24/50] batch [15/500] time 0.898 (0.931) data 0.000 (0.047) loss 0.7827 (1.0707) acc 71.8750 (72.0833) lr 1.1874e-03 eta 3:29:10 +epoch [24/50] batch [20/500] time 0.917 (0.921) data 0.000 (0.035) loss 0.8389 (1.0523) acc 78.1250 (72.3438) lr 1.1874e-03 eta 3:26:48 +epoch [24/50] batch [25/500] time 0.901 (0.914) data 0.000 (0.028) loss 0.9316 (1.0524) acc 75.0000 (72.3750) lr 1.1874e-03 eta 3:25:09 +epoch [24/50] batch [30/500] time 0.902 (0.911) data 0.000 (0.023) loss 0.8979 (1.0600) acc 65.6250 (71.8750) lr 1.1874e-03 eta 3:24:27 +epoch [24/50] batch [35/500] time 0.872 (0.907) data 0.000 (0.020) loss 0.9058 (1.0639) acc 71.8750 (71.7857) lr 1.1874e-03 eta 3:23:26 +epoch [24/50] batch [40/500] time 0.882 (0.903) data 0.000 (0.018) loss 1.4795 (1.1013) acc 65.6250 (71.4844) lr 1.1874e-03 eta 3:22:36 +epoch [24/50] batch [45/500] time 0.909 (0.902) data 0.000 (0.016) loss 1.2539 (1.1076) acc 75.0000 (71.3889) lr 1.1874e-03 eta 3:22:11 +epoch [24/50] batch [50/500] time 0.885 (0.899) data 0.000 (0.014) loss 0.9980 (1.1059) acc 68.7500 (71.3750) lr 1.1874e-03 eta 3:21:31 +epoch [24/50] batch [55/500] time 0.878 (0.899) data 0.000 (0.013) loss 1.2559 (1.1108) acc 68.7500 (71.3068) lr 1.1874e-03 eta 3:21:22 +epoch [24/50] batch [60/500] time 0.874 (0.896) data 0.000 (0.012) loss 0.7349 (1.1032) acc 81.2500 (71.7708) lr 1.1874e-03 eta 3:20:48 +epoch [24/50] batch [65/500] time 0.869 (0.895) data 0.000 (0.011) loss 0.8081 (1.1009) acc 84.3750 (72.2115) lr 1.1874e-03 eta 3:20:26 +epoch [24/50] batch [70/500] time 0.879 (0.895) data 0.000 (0.010) loss 0.8438 (1.0934) acc 84.3750 (72.6339) lr 1.1874e-03 eta 3:20:14 +epoch [24/50] batch [75/500] time 0.902 (0.894) data 0.000 (0.009) loss 1.0713 (1.1016) acc 68.7500 (72.6667) lr 1.1874e-03 eta 3:20:03 +epoch [24/50] batch [80/500] time 0.866 (0.893) data 0.000 (0.009) loss 1.0186 (1.1039) acc 65.6250 (72.5000) lr 1.1874e-03 eta 3:19:49 +epoch [24/50] batch [85/500] time 0.895 (0.893) data 0.000 (0.008) loss 0.7461 (1.0996) acc 84.3750 (72.6838) lr 1.1874e-03 eta 3:19:38 +epoch [24/50] batch [90/500] time 0.881 (0.893) data 0.000 (0.008) loss 1.2061 (1.0982) acc 68.7500 (72.6389) lr 1.1874e-03 eta 3:19:32 +epoch [24/50] batch [95/500] time 0.913 (0.894) data 0.000 (0.008) loss 0.7305 (1.1124) acc 78.1250 (72.3684) lr 1.1874e-03 eta 3:19:44 +epoch [24/50] batch [100/500] time 0.876 (0.893) data 0.000 (0.007) loss 0.7192 (1.1034) acc 87.5000 (72.5938) lr 1.1874e-03 eta 3:19:26 +epoch [24/50] batch [105/500] time 0.899 (0.893) data 0.000 (0.007) loss 0.9233 (1.0933) acc 75.0000 (72.7381) lr 1.1874e-03 eta 3:19:15 +epoch [24/50] batch [110/500] time 0.884 (0.892) data 0.000 (0.007) loss 1.5898 (1.0945) acc 65.6250 (72.8125) lr 1.1874e-03 eta 3:19:06 +epoch [24/50] batch [115/500] time 0.908 (0.892) data 0.000 (0.006) loss 1.0332 (1.0951) acc 78.1250 (72.8804) lr 1.1874e-03 eta 3:18:56 +epoch [24/50] batch [120/500] time 0.894 (0.892) data 0.000 (0.006) loss 1.5928 (1.1033) acc 68.7500 (72.8125) lr 1.1874e-03 eta 3:18:55 +epoch [24/50] batch [125/500] time 0.886 (0.892) data 0.000 (0.006) loss 0.9565 (1.0986) acc 84.3750 (72.8750) lr 1.1874e-03 eta 3:18:45 +epoch [24/50] batch [130/500] time 0.867 (0.891) data 0.000 (0.006) loss 0.9946 (1.0972) acc 75.0000 (72.9087) lr 1.1874e-03 eta 3:18:33 +epoch [24/50] batch [135/500] time 0.985 (0.892) data 0.000 (0.005) loss 0.8213 (1.0949) acc 84.3750 (73.0324) lr 1.1874e-03 eta 3:18:36 +epoch [24/50] batch [140/500] time 0.911 (0.892) data 0.000 (0.005) loss 0.7979 (1.0906) acc 78.1250 (73.1027) lr 1.1874e-03 eta 3:18:32 +epoch [24/50] batch [145/500] time 0.895 (0.892) data 0.000 (0.005) loss 1.2139 (1.0889) acc 65.6250 (73.0819) lr 1.1874e-03 eta 3:18:29 +epoch [24/50] batch [150/500] time 0.887 (0.891) data 0.000 (0.005) loss 1.6289 (1.0982) acc 59.3750 (72.8542) lr 1.1874e-03 eta 3:18:19 +epoch [24/50] batch [155/500] time 0.855 (0.891) data 0.000 (0.005) loss 1.0303 (1.1028) acc 65.6250 (72.6815) lr 1.1874e-03 eta 3:18:14 +epoch [24/50] batch [160/500] time 0.875 (0.891) data 0.000 (0.005) loss 0.8184 (1.1006) acc 75.0000 (72.7148) lr 1.1874e-03 eta 3:18:04 +epoch [24/50] batch [165/500] time 0.865 (0.891) data 0.000 (0.004) loss 1.4609 (1.1010) acc 71.8750 (72.8409) lr 1.1874e-03 eta 3:17:56 +epoch [24/50] batch [170/500] time 0.880 (0.891) data 0.000 (0.004) loss 1.7783 (1.1034) acc 59.3750 (72.8125) lr 1.1874e-03 eta 3:17:50 +epoch [24/50] batch [175/500] time 0.852 (0.890) data 0.000 (0.004) loss 0.4885 (1.0993) acc 87.5000 (72.9464) lr 1.1874e-03 eta 3:17:39 +epoch [24/50] batch [180/500] time 0.865 (0.890) data 0.001 (0.004) loss 0.7920 (1.0953) acc 75.0000 (73.0382) lr 1.1874e-03 eta 3:17:32 +epoch [24/50] batch [185/500] time 0.907 (0.890) data 0.000 (0.004) loss 1.2109 (1.0966) acc 71.8750 (72.9730) lr 1.1874e-03 eta 3:17:33 +epoch [24/50] batch [190/500] time 0.892 (0.890) data 0.000 (0.004) loss 1.1992 (1.0983) acc 68.7500 (72.8947) lr 1.1874e-03 eta 3:17:28 +epoch [24/50] batch [195/500] time 0.922 (0.890) data 0.000 (0.004) loss 1.0488 (1.0954) acc 68.7500 (72.9327) lr 1.1874e-03 eta 3:17:22 +epoch [24/50] batch [200/500] time 0.878 (0.890) data 0.000 (0.004) loss 1.1230 (1.1014) acc 71.8750 (72.7812) lr 1.1874e-03 eta 3:17:14 +epoch [24/50] batch [205/500] time 0.894 (0.889) data 0.000 (0.004) loss 0.7827 (1.0926) acc 78.1250 (72.9878) lr 1.1874e-03 eta 3:17:05 +epoch [24/50] batch [210/500] time 0.914 (0.890) data 0.000 (0.004) loss 1.3926 (1.1001) acc 62.5000 (72.9167) lr 1.1874e-03 eta 3:17:02 +epoch [24/50] batch [215/500] time 0.885 (0.890) data 0.001 (0.003) loss 0.8501 (1.0971) acc 68.7500 (72.9215) lr 1.1874e-03 eta 3:16:58 +epoch [24/50] batch [220/500] time 0.902 (0.889) data 0.000 (0.003) loss 0.7241 (1.0980) acc 78.1250 (72.8977) lr 1.1874e-03 eta 3:16:50 +epoch [24/50] batch [225/500] time 0.902 (0.889) data 0.000 (0.003) loss 1.2598 (1.1031) acc 71.8750 (72.7500) lr 1.1874e-03 eta 3:16:43 +epoch [24/50] batch [230/500] time 0.901 (0.889) data 0.000 (0.003) loss 1.4990 (1.1045) acc 65.6250 (72.7582) lr 1.1874e-03 eta 3:16:38 +epoch [24/50] batch [235/500] time 0.908 (0.890) data 0.000 (0.003) loss 1.6777 (1.1073) acc 50.0000 (72.6729) lr 1.1874e-03 eta 3:16:40 +epoch [24/50] batch [240/500] time 0.884 (0.889) data 0.000 (0.003) loss 1.0273 (1.1044) acc 65.6250 (72.6562) lr 1.1874e-03 eta 3:16:34 +epoch [24/50] batch [245/500] time 0.918 (0.890) data 0.000 (0.003) loss 1.3672 (1.1000) acc 65.6250 (72.7679) lr 1.1874e-03 eta 3:16:32 +epoch [24/50] batch [250/500] time 0.891 (0.890) data 0.000 (0.003) loss 0.5874 (1.0994) acc 90.6250 (72.8000) lr 1.1874e-03 eta 3:16:28 +epoch [24/50] batch [255/500] time 0.873 (0.890) data 0.000 (0.003) loss 1.3848 (1.1010) acc 62.5000 (72.6838) lr 1.1874e-03 eta 3:16:24 +epoch [24/50] batch [260/500] time 0.895 (0.890) data 0.000 (0.003) loss 0.6587 (1.1003) acc 81.2500 (72.6923) lr 1.1874e-03 eta 3:16:23 +epoch [24/50] batch [265/500] time 0.893 (0.890) data 0.000 (0.003) loss 1.5098 (1.0991) acc 68.7500 (72.7358) lr 1.1874e-03 eta 3:16:18 +epoch [24/50] batch [270/500] time 0.866 (0.890) data 0.000 (0.003) loss 0.8877 (1.1033) acc 81.2500 (72.6620) lr 1.1874e-03 eta 3:16:13 +epoch [24/50] batch [275/500] time 0.868 (0.890) data 0.000 (0.003) loss 0.8066 (1.1018) acc 71.8750 (72.5909) lr 1.1874e-03 eta 3:16:06 +epoch [24/50] batch [280/500] time 0.888 (0.890) data 0.000 (0.003) loss 0.8252 (1.1032) acc 81.2500 (72.5893) lr 1.1874e-03 eta 3:16:05 +epoch [24/50] batch [285/500] time 0.893 (0.890) data 0.000 (0.003) loss 1.3604 (1.1060) acc 65.6250 (72.5658) lr 1.1874e-03 eta 3:15:59 +epoch [24/50] batch [290/500] time 0.852 (0.889) data 0.000 (0.003) loss 1.1543 (1.1074) acc 71.8750 (72.5216) lr 1.1874e-03 eta 3:15:50 +epoch [24/50] batch [295/500] time 0.878 (0.889) data 0.000 (0.003) loss 0.6616 (1.1074) acc 81.2500 (72.5000) lr 1.1874e-03 eta 3:15:43 +epoch [24/50] batch [300/500] time 0.898 (0.889) data 0.000 (0.003) loss 1.6133 (1.1140) acc 53.1250 (72.2812) lr 1.1874e-03 eta 3:15:38 +epoch [24/50] batch [305/500] time 0.861 (0.889) data 0.000 (0.003) loss 1.4717 (1.1123) acc 68.7500 (72.3053) lr 1.1874e-03 eta 3:15:30 +epoch [24/50] batch [310/500] time 0.872 (0.889) data 0.000 (0.002) loss 0.7476 (1.1125) acc 75.0000 (72.3085) lr 1.1874e-03 eta 3:15:26 +epoch [24/50] batch [315/500] time 0.911 (0.889) data 0.000 (0.002) loss 0.7207 (1.1098) acc 90.6250 (72.3710) lr 1.1874e-03 eta 3:15:23 +epoch [24/50] batch [320/500] time 0.896 (0.889) data 0.000 (0.002) loss 1.4707 (1.1105) acc 62.5000 (72.3145) lr 1.1874e-03 eta 3:15:19 +epoch [24/50] batch [325/500] time 0.868 (0.889) data 0.000 (0.002) loss 1.3955 (1.1123) acc 68.7500 (72.3269) lr 1.1874e-03 eta 3:15:13 +epoch [24/50] batch [330/500] time 0.884 (0.889) data 0.000 (0.002) loss 1.5449 (1.1136) acc 65.6250 (72.3011) lr 1.1874e-03 eta 3:15:04 +epoch [24/50] batch [335/500] time 0.866 (0.888) data 0.000 (0.002) loss 0.6958 (1.1135) acc 68.7500 (72.2295) lr 1.1874e-03 eta 3:14:54 +epoch [24/50] batch [340/500] time 0.882 (0.888) data 0.000 (0.002) loss 0.9453 (1.1125) acc 68.7500 (72.2335) lr 1.1874e-03 eta 3:14:49 +epoch [24/50] batch [345/500] time 0.881 (0.888) data 0.000 (0.002) loss 1.2422 (1.1134) acc 78.1250 (72.2554) lr 1.1874e-03 eta 3:14:43 +epoch [24/50] batch [350/500] time 0.890 (0.888) data 0.000 (0.002) loss 1.3203 (1.1154) acc 65.6250 (72.1875) lr 1.1874e-03 eta 3:14:40 +epoch [24/50] batch [355/500] time 0.856 (0.888) data 0.000 (0.002) loss 1.0977 (1.1172) acc 59.3750 (72.1303) lr 1.1874e-03 eta 3:14:34 +epoch [24/50] batch [360/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.8081 (1.1146) acc 78.1250 (72.1354) lr 1.1874e-03 eta 3:14:26 +epoch [24/50] batch [365/500] time 0.877 (0.888) data 0.000 (0.002) loss 1.0049 (1.1168) acc 71.8750 (72.1832) lr 1.1874e-03 eta 3:14:18 +epoch [24/50] batch [370/500] time 0.879 (0.887) data 0.000 (0.002) loss 0.9077 (1.1152) acc 75.0000 (72.2297) lr 1.1874e-03 eta 3:14:11 +epoch [24/50] batch [375/500] time 0.862 (0.887) data 0.000 (0.002) loss 0.8159 (1.1146) acc 81.2500 (72.2417) lr 1.1874e-03 eta 3:14:05 +epoch [24/50] batch [380/500] time 0.865 (0.887) data 0.000 (0.002) loss 1.3926 (1.1157) acc 68.7500 (72.1875) lr 1.1874e-03 eta 3:14:01 +epoch [24/50] batch [385/500] time 0.875 (0.887) data 0.000 (0.002) loss 0.7783 (1.1166) acc 75.0000 (72.1753) lr 1.1874e-03 eta 3:13:56 +epoch [24/50] batch [390/500] time 0.847 (0.887) data 0.000 (0.002) loss 1.0361 (1.1131) acc 68.7500 (72.2356) lr 1.1874e-03 eta 3:13:49 +epoch [24/50] batch [395/500] time 0.868 (0.887) data 0.000 (0.002) loss 0.9561 (1.1138) acc 71.8750 (72.2785) lr 1.1874e-03 eta 3:13:45 +epoch [24/50] batch [400/500] time 0.914 (0.887) data 0.000 (0.002) loss 0.9375 (1.1117) acc 75.0000 (72.2969) lr 1.1874e-03 eta 3:13:41 +epoch [24/50] batch [405/500] time 0.875 (0.887) data 0.000 (0.002) loss 1.0791 (1.1159) acc 84.3750 (72.2917) lr 1.1874e-03 eta 3:13:35 +epoch [24/50] batch [410/500] time 0.861 (0.887) data 0.000 (0.002) loss 1.2461 (1.1144) acc 68.7500 (72.2561) lr 1.1874e-03 eta 3:13:30 +epoch [24/50] batch [415/500] time 0.856 (0.887) data 0.000 (0.002) loss 1.6797 (1.1146) acc 59.3750 (72.2440) lr 1.1874e-03 eta 3:13:23 +epoch [24/50] batch [420/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.2119 (1.1139) acc 68.7500 (72.2173) lr 1.1874e-03 eta 3:13:18 +epoch [24/50] batch [425/500] time 0.866 (0.887) data 0.000 (0.002) loss 1.6562 (1.1185) acc 62.5000 (72.1618) lr 1.1874e-03 eta 3:13:15 +epoch [24/50] batch [430/500] time 0.859 (0.887) data 0.000 (0.002) loss 1.1650 (1.1184) acc 71.8750 (72.1221) lr 1.1874e-03 eta 3:13:09 +epoch [24/50] batch [435/500] time 0.912 (0.887) data 0.000 (0.002) loss 1.1260 (1.1173) acc 68.7500 (72.1408) lr 1.1874e-03 eta 3:13:05 +epoch [24/50] batch [440/500] time 0.888 (0.887) data 0.000 (0.002) loss 0.5444 (1.1151) acc 84.3750 (72.1662) lr 1.1874e-03 eta 3:13:00 +epoch [24/50] batch [445/500] time 0.863 (0.887) data 0.000 (0.002) loss 0.8638 (1.1147) acc 75.0000 (72.1980) lr 1.1874e-03 eta 3:12:55 +epoch [24/50] batch [450/500] time 0.894 (0.887) data 0.000 (0.002) loss 0.9712 (1.1143) acc 78.1250 (72.2153) lr 1.1874e-03 eta 3:12:50 +epoch [24/50] batch [455/500] time 0.888 (0.887) data 0.000 (0.002) loss 0.9771 (1.1115) acc 81.2500 (72.3077) lr 1.1874e-03 eta 3:12:45 +epoch [24/50] batch [460/500] time 0.907 (0.887) data 0.000 (0.002) loss 1.1943 (1.1107) acc 71.8750 (72.3234) lr 1.1874e-03 eta 3:12:42 +epoch [24/50] batch [465/500] time 0.890 (0.887) data 0.000 (0.002) loss 1.3223 (1.1124) acc 71.8750 (72.2715) lr 1.1874e-03 eta 3:12:38 +epoch [24/50] batch [470/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.0293 (1.1142) acc 78.1250 (72.2473) lr 1.1874e-03 eta 3:12:32 +epoch [24/50] batch [475/500] time 0.884 (0.887) data 0.000 (0.002) loss 0.7285 (1.1126) acc 75.0000 (72.2566) lr 1.1874e-03 eta 3:12:26 +epoch [24/50] batch [480/500] time 0.875 (0.887) data 0.000 (0.002) loss 0.8872 (1.1104) acc 81.2500 (72.3307) lr 1.1874e-03 eta 3:12:22 +epoch [24/50] batch [485/500] time 0.899 (0.887) data 0.000 (0.002) loss 0.7944 (1.1089) acc 84.3750 (72.3454) lr 1.1874e-03 eta 3:12:18 +epoch [24/50] batch [490/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.4678 (1.1104) acc 75.0000 (72.3278) lr 1.1874e-03 eta 3:12:15 +epoch [24/50] batch [495/500] time 0.896 (0.887) data 0.000 (0.002) loss 0.8809 (1.1092) acc 75.0000 (72.3611) lr 1.1874e-03 eta 3:12:11 +epoch [24/50] batch [500/500] time 0.875 (0.887) data 0.000 (0.002) loss 1.6865 (1.1098) acc 59.3750 (72.3750) lr 1.1253e-03 eta 3:12:07 +epoch [25/50] batch [5/500] time 0.865 (1.026) data 0.000 (0.147) loss 0.6421 (0.8183) acc 84.3750 (81.8750) lr 1.1253e-03 eta 3:42:10 +epoch [25/50] batch [10/500] time 0.895 (0.957) data 0.000 (0.073) loss 0.6909 (0.8730) acc 81.2500 (78.1250) lr 1.1253e-03 eta 3:27:13 +epoch [25/50] batch [15/500] time 0.905 (0.935) data 0.000 (0.049) loss 0.7446 (0.9299) acc 81.2500 (76.8750) lr 1.1253e-03 eta 3:22:27 +epoch [25/50] batch [20/500] time 0.876 (0.921) data 0.000 (0.037) loss 0.6953 (0.9647) acc 84.3750 (76.5625) lr 1.1253e-03 eta 3:19:17 +epoch [25/50] batch [25/500] time 0.894 (0.914) data 0.000 (0.030) loss 1.3701 (1.0402) acc 62.5000 (74.5000) lr 1.1253e-03 eta 3:17:42 +epoch [25/50] batch [30/500] time 0.896 (0.907) data 0.000 (0.025) loss 0.8149 (1.0275) acc 87.5000 (74.4792) lr 1.1253e-03 eta 3:16:08 +epoch [25/50] batch [35/500] time 0.872 (0.903) data 0.000 (0.021) loss 0.9619 (1.0408) acc 68.7500 (73.9286) lr 1.1253e-03 eta 3:15:02 +epoch [25/50] batch [40/500] time 0.922 (0.902) data 0.000 (0.019) loss 1.4590 (1.0698) acc 68.7500 (72.8906) lr 1.1253e-03 eta 3:14:52 +epoch [25/50] batch [45/500] time 0.884 (0.902) data 0.000 (0.017) loss 0.6982 (1.0895) acc 78.1250 (72.5694) lr 1.1253e-03 eta 3:14:40 +epoch [25/50] batch [50/500] time 0.888 (0.900) data 0.000 (0.015) loss 0.9019 (1.1017) acc 68.7500 (72.2500) lr 1.1253e-03 eta 3:14:13 +epoch [25/50] batch [55/500] time 0.885 (0.899) data 0.000 (0.014) loss 0.8579 (1.1016) acc 75.0000 (72.5000) lr 1.1253e-03 eta 3:14:02 +epoch [25/50] batch [60/500] time 0.903 (0.899) data 0.000 (0.012) loss 0.9854 (1.0876) acc 71.8750 (73.0208) lr 1.1253e-03 eta 3:13:52 +epoch [25/50] batch [65/500] time 0.921 (0.898) data 0.000 (0.012) loss 0.9785 (1.0889) acc 71.8750 (72.9808) lr 1.1253e-03 eta 3:13:38 +epoch [25/50] batch [70/500] time 0.916 (0.897) data 0.000 (0.011) loss 0.9463 (1.0824) acc 75.0000 (73.2143) lr 1.1253e-03 eta 3:13:16 +epoch [25/50] batch [75/500] time 0.876 (0.898) data 0.000 (0.010) loss 0.8384 (1.0834) acc 81.2500 (73.3750) lr 1.1253e-03 eta 3:13:23 +epoch [25/50] batch [80/500] time 0.898 (0.898) data 0.000 (0.009) loss 1.0000 (1.0908) acc 71.8750 (73.2031) lr 1.1253e-03 eta 3:13:18 +epoch [25/50] batch [85/500] time 0.880 (0.897) data 0.000 (0.009) loss 1.4248 (1.0904) acc 71.8750 (73.2721) lr 1.1253e-03 eta 3:13:01 +epoch [25/50] batch [90/500] time 0.888 (0.896) data 0.000 (0.008) loss 1.5098 (1.0905) acc 68.7500 (73.1250) lr 1.1253e-03 eta 3:12:52 +epoch [25/50] batch [95/500] time 0.883 (0.896) data 0.000 (0.008) loss 1.1982 (1.0916) acc 71.8750 (73.0921) lr 1.1253e-03 eta 3:12:39 +epoch [25/50] batch [100/500] time 0.870 (0.895) data 0.000 (0.008) loss 1.3457 (1.0878) acc 65.6250 (73.1250) lr 1.1253e-03 eta 3:12:21 +epoch [25/50] batch [105/500] time 0.908 (0.894) data 0.000 (0.007) loss 1.2969 (1.0904) acc 75.0000 (73.0655) lr 1.1253e-03 eta 3:12:13 +epoch [25/50] batch [110/500] time 0.889 (0.893) data 0.000 (0.007) loss 0.8247 (1.0822) acc 78.1250 (73.0966) lr 1.1253e-03 eta 3:11:53 +epoch [25/50] batch [115/500] time 0.945 (0.893) data 0.000 (0.007) loss 1.3555 (1.0974) acc 75.0000 (73.0435) lr 1.1253e-03 eta 3:11:52 +epoch [25/50] batch [120/500] time 0.922 (0.895) data 0.000 (0.006) loss 1.2061 (1.0991) acc 71.8750 (72.8385) lr 1.1253e-03 eta 3:12:08 +epoch [25/50] batch [125/500] time 0.884 (0.895) data 0.000 (0.006) loss 1.3057 (1.0976) acc 68.7500 (73.0000) lr 1.1253e-03 eta 3:12:03 +epoch [25/50] batch [130/500] time 0.866 (0.894) data 0.000 (0.006) loss 0.8096 (1.0957) acc 78.1250 (73.0769) lr 1.1253e-03 eta 3:11:50 +epoch [25/50] batch [135/500] time 0.855 (0.894) data 0.000 (0.006) loss 0.7871 (1.0907) acc 78.1250 (73.1481) lr 1.1253e-03 eta 3:11:40 +epoch [25/50] batch [140/500] time 0.866 (0.894) data 0.000 (0.005) loss 0.7998 (1.0893) acc 81.2500 (73.2366) lr 1.1253e-03 eta 3:11:33 +epoch [25/50] batch [145/500] time 0.881 (0.894) data 0.000 (0.005) loss 1.0000 (1.0882) acc 81.2500 (73.3621) lr 1.1253e-03 eta 3:11:28 +epoch [25/50] batch [150/500] time 0.898 (0.894) data 0.000 (0.005) loss 0.5444 (1.0829) acc 81.2500 (73.4167) lr 1.1253e-03 eta 3:11:23 +epoch [25/50] batch [155/500] time 0.871 (0.893) data 0.000 (0.005) loss 1.0127 (1.0796) acc 65.6250 (73.3871) lr 1.1253e-03 eta 3:11:11 +epoch [25/50] batch [160/500] time 0.895 (0.893) data 0.000 (0.005) loss 1.5557 (1.0803) acc 56.2500 (73.4766) lr 1.1253e-03 eta 3:11:05 +epoch [25/50] batch [165/500] time 0.873 (0.893) data 0.000 (0.005) loss 1.3633 (1.0807) acc 71.8750 (73.4280) lr 1.1253e-03 eta 3:10:59 +epoch [25/50] batch [170/500] time 0.861 (0.892) data 0.000 (0.005) loss 0.8115 (1.0757) acc 75.0000 (73.6029) lr 1.1253e-03 eta 3:10:50 +epoch [25/50] batch [175/500] time 0.865 (0.892) data 0.000 (0.004) loss 0.9453 (1.0770) acc 75.0000 (73.5536) lr 1.1253e-03 eta 3:10:42 +epoch [25/50] batch [180/500] time 0.872 (0.892) data 0.000 (0.004) loss 1.6260 (1.0791) acc 65.6250 (73.4722) lr 1.1253e-03 eta 3:10:39 +epoch [25/50] batch [185/500] time 0.885 (0.892) data 0.000 (0.004) loss 1.7842 (1.0773) acc 65.6250 (73.6149) lr 1.1253e-03 eta 3:10:33 +epoch [25/50] batch [190/500] time 0.862 (0.892) data 0.000 (0.004) loss 0.7036 (1.0765) acc 78.1250 (73.6513) lr 1.1253e-03 eta 3:10:26 +epoch [25/50] batch [195/500] time 0.923 (0.892) data 0.000 (0.004) loss 1.0312 (1.0823) acc 71.8750 (73.4936) lr 1.1253e-03 eta 3:10:21 +epoch [25/50] batch [200/500] time 0.898 (0.892) data 0.000 (0.004) loss 0.6465 (1.0836) acc 81.2500 (73.5469) lr 1.1253e-03 eta 3:10:13 +epoch [25/50] batch [205/500] time 0.882 (0.892) data 0.000 (0.004) loss 0.7339 (1.0876) acc 81.2500 (73.4756) lr 1.1253e-03 eta 3:10:12 +epoch [25/50] batch [210/500] time 0.891 (0.892) data 0.000 (0.004) loss 0.8755 (1.0913) acc 78.1250 (73.3929) lr 1.1253e-03 eta 3:10:05 +epoch [25/50] batch [215/500] time 0.984 (0.892) data 0.000 (0.004) loss 1.5977 (1.0877) acc 65.6250 (73.4012) lr 1.1253e-03 eta 3:10:07 +epoch [25/50] batch [220/500] time 0.881 (0.892) data 0.000 (0.004) loss 1.0771 (1.0899) acc 65.6250 (73.2955) lr 1.1253e-03 eta 3:10:01 +epoch [25/50] batch [225/500] time 0.896 (0.892) data 0.000 (0.003) loss 0.9077 (1.0874) acc 75.0000 (73.2361) lr 1.1253e-03 eta 3:09:57 +epoch [25/50] batch [230/500] time 0.864 (0.892) data 0.000 (0.003) loss 0.8892 (1.0874) acc 75.0000 (73.2201) lr 1.1253e-03 eta 3:09:52 +epoch [25/50] batch [235/500] time 0.873 (0.892) data 0.000 (0.003) loss 1.2764 (1.0847) acc 68.7500 (73.2580) lr 1.1253e-03 eta 3:09:43 +epoch [25/50] batch [240/500] time 0.895 (0.892) data 0.000 (0.003) loss 1.2627 (1.0860) acc 81.2500 (73.2682) lr 1.1253e-03 eta 3:09:36 +epoch [25/50] batch [245/500] time 0.857 (0.891) data 0.000 (0.003) loss 0.8877 (1.0851) acc 75.0000 (73.2653) lr 1.1253e-03 eta 3:09:26 +epoch [25/50] batch [250/500] time 0.881 (0.891) data 0.000 (0.003) loss 0.9438 (1.0842) acc 75.0000 (73.3000) lr 1.1253e-03 eta 3:09:21 +epoch [25/50] batch [255/500] time 0.890 (0.891) data 0.000 (0.003) loss 1.3369 (1.0832) acc 65.6250 (73.2843) lr 1.1253e-03 eta 3:09:15 +epoch [25/50] batch [260/500] time 0.877 (0.891) data 0.000 (0.003) loss 1.4854 (1.0810) acc 56.2500 (73.3173) lr 1.1253e-03 eta 3:09:13 +epoch [25/50] batch [265/500] time 0.870 (0.891) data 0.000 (0.003) loss 0.8545 (1.0854) acc 78.1250 (73.2665) lr 1.1253e-03 eta 3:09:05 +epoch [25/50] batch [270/500] time 0.892 (0.891) data 0.000 (0.003) loss 0.5220 (1.0858) acc 84.3750 (73.2407) lr 1.1253e-03 eta 3:08:58 +epoch [25/50] batch [275/500] time 0.882 (0.891) data 0.000 (0.003) loss 0.8647 (1.0873) acc 78.1250 (73.1705) lr 1.1253e-03 eta 3:08:55 +epoch [25/50] batch [280/500] time 0.864 (0.891) data 0.000 (0.003) loss 1.2793 (1.0884) acc 71.8750 (73.2031) lr 1.1253e-03 eta 3:08:47 +epoch [25/50] batch [285/500] time 0.917 (0.891) data 0.000 (0.003) loss 0.9966 (1.0891) acc 71.8750 (73.1908) lr 1.1253e-03 eta 3:08:44 +epoch [25/50] batch [290/500] time 0.859 (0.890) data 0.000 (0.003) loss 1.3701 (1.0922) acc 56.2500 (73.0711) lr 1.1253e-03 eta 3:08:37 +epoch [25/50] batch [295/500] time 0.890 (0.890) data 0.000 (0.003) loss 1.1523 (1.0943) acc 68.7500 (72.9873) lr 1.1253e-03 eta 3:08:32 +epoch [25/50] batch [300/500] time 0.894 (0.890) data 0.000 (0.003) loss 1.2197 (1.0958) acc 68.7500 (72.9688) lr 1.1253e-03 eta 3:08:28 +epoch [25/50] batch [305/500] time 0.888 (0.890) data 0.000 (0.003) loss 1.3096 (1.0940) acc 71.8750 (73.0020) lr 1.1253e-03 eta 3:08:20 +epoch [25/50] batch [310/500] time 0.869 (0.890) data 0.000 (0.003) loss 0.9814 (1.0965) acc 78.1250 (72.9637) lr 1.1253e-03 eta 3:08:15 +epoch [25/50] batch [315/500] time 0.877 (0.890) data 0.000 (0.003) loss 0.8184 (1.0966) acc 84.3750 (72.9563) lr 1.1253e-03 eta 3:08:09 +epoch [25/50] batch [320/500] time 0.867 (0.890) data 0.000 (0.003) loss 1.2158 (1.0957) acc 71.8750 (72.9883) lr 1.1253e-03 eta 3:08:02 +epoch [25/50] batch [325/500] time 0.889 (0.890) data 0.000 (0.002) loss 1.2705 (1.0929) acc 71.8750 (73.0865) lr 1.1253e-03 eta 3:07:56 +epoch [25/50] batch [330/500] time 0.875 (0.890) data 0.000 (0.002) loss 1.0654 (1.0936) acc 78.1250 (73.1345) lr 1.1253e-03 eta 3:07:52 +epoch [25/50] batch [335/500] time 0.879 (0.890) data 0.000 (0.002) loss 0.6553 (1.0908) acc 71.8750 (73.1810) lr 1.1253e-03 eta 3:07:49 +epoch [25/50] batch [340/500] time 0.873 (0.890) data 0.000 (0.002) loss 1.0176 (1.0938) acc 71.8750 (73.1250) lr 1.1253e-03 eta 3:07:42 +epoch [25/50] batch [345/500] time 0.872 (0.890) data 0.000 (0.002) loss 0.8184 (1.0927) acc 68.7500 (73.1069) lr 1.1253e-03 eta 3:07:38 +epoch [25/50] batch [350/500] time 0.883 (0.890) data 0.000 (0.002) loss 0.9946 (1.0911) acc 71.8750 (73.1250) lr 1.1253e-03 eta 3:07:33 +epoch [25/50] batch [355/500] time 0.901 (0.890) data 0.000 (0.002) loss 1.5166 (1.0911) acc 65.6250 (73.1514) lr 1.1253e-03 eta 3:07:29 +epoch [25/50] batch [360/500] time 0.857 (0.890) data 0.000 (0.002) loss 1.6514 (1.0922) acc 62.5000 (73.1424) lr 1.1253e-03 eta 3:07:28 +epoch [25/50] batch [365/500] time 0.895 (0.890) data 0.000 (0.002) loss 0.9692 (1.0934) acc 78.1250 (73.1421) lr 1.1253e-03 eta 3:07:23 +epoch [25/50] batch [370/500] time 0.909 (0.890) data 0.000 (0.002) loss 0.8047 (1.0949) acc 84.3750 (73.1503) lr 1.1253e-03 eta 3:07:22 +epoch [25/50] batch [375/500] time 0.884 (0.890) data 0.000 (0.002) loss 1.0625 (1.0959) acc 68.7500 (73.1167) lr 1.1253e-03 eta 3:07:18 +epoch [25/50] batch [380/500] time 0.880 (0.890) data 0.000 (0.002) loss 1.1865 (1.0941) acc 81.2500 (73.1990) lr 1.1253e-03 eta 3:07:17 +epoch [25/50] batch [385/500] time 0.904 (0.890) data 0.000 (0.002) loss 0.7944 (1.0949) acc 78.1250 (73.1250) lr 1.1253e-03 eta 3:07:11 +epoch [25/50] batch [390/500] time 0.908 (0.890) data 0.000 (0.002) loss 1.2041 (1.0965) acc 71.8750 (73.1010) lr 1.1253e-03 eta 3:07:05 +epoch [25/50] batch [395/500] time 0.873 (0.890) data 0.000 (0.002) loss 1.1426 (1.0944) acc 65.6250 (73.1092) lr 1.1253e-03 eta 3:06:57 +epoch [25/50] batch [400/500] time 0.887 (0.890) data 0.000 (0.002) loss 1.1572 (1.0973) acc 75.0000 (73.0703) lr 1.1253e-03 eta 3:06:51 +epoch [25/50] batch [405/500] time 0.890 (0.890) data 0.000 (0.002) loss 1.1641 (1.0992) acc 65.6250 (73.0401) lr 1.1253e-03 eta 3:06:51 +epoch [25/50] batch [410/500] time 0.875 (0.890) data 0.000 (0.002) loss 1.0615 (1.0985) acc 75.0000 (73.0335) lr 1.1253e-03 eta 3:06:46 +epoch [25/50] batch [415/500] time 0.879 (0.890) data 0.000 (0.002) loss 1.3613 (1.0989) acc 65.6250 (73.0346) lr 1.1253e-03 eta 3:06:39 +epoch [25/50] batch [420/500] time 0.862 (0.890) data 0.000 (0.002) loss 1.3516 (1.1024) acc 62.5000 (72.9315) lr 1.1253e-03 eta 3:06:34 +epoch [25/50] batch [425/500] time 0.915 (0.890) data 0.000 (0.002) loss 1.4805 (1.1039) acc 62.5000 (72.9412) lr 1.1253e-03 eta 3:06:32 +epoch [25/50] batch [430/500] time 0.881 (0.890) data 0.000 (0.002) loss 1.9102 (1.1043) acc 59.3750 (72.8779) lr 1.1253e-03 eta 3:06:27 +epoch [25/50] batch [435/500] time 0.869 (0.890) data 0.000 (0.002) loss 0.8975 (1.1047) acc 87.5000 (72.8951) lr 1.1253e-03 eta 3:06:23 +epoch [25/50] batch [440/500] time 0.886 (0.890) data 0.000 (0.002) loss 1.1143 (1.1065) acc 71.8750 (72.8338) lr 1.1253e-03 eta 3:06:17 +epoch [25/50] batch [445/500] time 0.896 (0.890) data 0.000 (0.002) loss 0.6992 (1.1077) acc 81.2500 (72.8371) lr 1.1253e-03 eta 3:06:11 +epoch [25/50] batch [450/500] time 0.898 (0.890) data 0.000 (0.002) loss 1.1357 (1.1073) acc 65.6250 (72.8333) lr 1.1253e-03 eta 3:06:07 +epoch [25/50] batch [455/500] time 0.881 (0.890) data 0.000 (0.002) loss 1.2969 (1.1090) acc 59.3750 (72.8159) lr 1.1253e-03 eta 3:06:02 +epoch [25/50] batch [460/500] time 0.902 (0.890) data 0.000 (0.002) loss 1.0664 (1.1085) acc 78.1250 (72.8261) lr 1.1253e-03 eta 3:05:57 +epoch [25/50] batch [465/500] time 0.887 (0.890) data 0.000 (0.002) loss 0.7319 (1.1084) acc 81.2500 (72.8495) lr 1.1253e-03 eta 3:05:51 +epoch [25/50] batch [470/500] time 0.887 (0.890) data 0.000 (0.002) loss 0.8384 (1.1060) acc 87.5000 (72.8923) lr 1.1253e-03 eta 3:05:46 +epoch [25/50] batch [475/500] time 0.914 (0.890) data 0.000 (0.002) loss 1.1807 (1.1078) acc 65.6250 (72.8421) lr 1.1253e-03 eta 3:05:42 +epoch [25/50] batch [480/500] time 0.849 (0.890) data 0.000 (0.002) loss 0.4990 (1.1048) acc 84.3750 (72.8906) lr 1.1253e-03 eta 3:05:36 +epoch [25/50] batch [485/500] time 0.865 (0.889) data 0.000 (0.002) loss 0.7642 (1.1039) acc 87.5000 (72.9059) lr 1.1253e-03 eta 3:05:31 +epoch [25/50] batch [490/500] time 0.891 (0.889) data 0.000 (0.002) loss 1.4238 (1.1037) acc 50.0000 (72.8635) lr 1.1253e-03 eta 3:05:27 +epoch [25/50] batch [495/500] time 0.874 (0.889) data 0.000 (0.002) loss 2.3008 (1.1061) acc 62.5000 (72.8157) lr 1.1253e-03 eta 3:05:20 +epoch [25/50] batch [500/500] time 0.921 (0.889) data 0.000 (0.002) loss 1.2607 (1.1057) acc 65.6250 (72.8438) lr 1.0628e-03 eta 3:05:16 +epoch [26/50] batch [5/500] time 0.889 (1.076) data 0.000 (0.151) loss 1.5537 (1.3410) acc 65.6250 (67.5000) lr 1.0628e-03 eta 3:44:07 +epoch [26/50] batch [10/500] time 0.896 (0.983) data 0.000 (0.076) loss 0.9033 (1.0958) acc 81.2500 (74.3750) lr 1.0628e-03 eta 3:24:40 +epoch [26/50] batch [15/500] time 0.884 (0.950) data 0.000 (0.051) loss 1.0439 (1.0995) acc 75.0000 (73.1250) lr 1.0628e-03 eta 3:17:46 +epoch [26/50] batch [20/500] time 0.878 (0.931) data 0.000 (0.038) loss 1.3975 (1.0858) acc 68.7500 (73.4375) lr 1.0628e-03 eta 3:13:44 +epoch [26/50] batch [25/500] time 0.885 (0.922) data 0.000 (0.030) loss 0.8311 (1.0781) acc 71.8750 (72.3750) lr 1.0628e-03 eta 3:11:38 +epoch [26/50] batch [30/500] time 0.852 (0.913) data 0.000 (0.025) loss 1.4971 (1.0872) acc 59.3750 (72.6042) lr 1.0628e-03 eta 3:09:43 +epoch [26/50] batch [35/500] time 0.880 (0.904) data 0.000 (0.022) loss 1.1445 (1.0591) acc 71.8750 (72.5893) lr 1.0628e-03 eta 3:07:54 +epoch [26/50] batch [40/500] time 0.876 (0.902) data 0.000 (0.019) loss 1.1377 (1.0556) acc 68.7500 (72.8125) lr 1.0628e-03 eta 3:07:20 +epoch [26/50] batch [45/500] time 0.872 (0.900) data 0.000 (0.017) loss 1.0195 (1.0486) acc 78.1250 (73.2639) lr 1.0628e-03 eta 3:06:44 +epoch [26/50] batch [50/500] time 0.893 (0.898) data 0.000 (0.015) loss 1.4043 (1.0412) acc 71.8750 (73.1875) lr 1.0628e-03 eta 3:06:21 +epoch [26/50] batch [55/500] time 0.885 (0.898) data 0.000 (0.014) loss 0.6455 (1.0479) acc 78.1250 (73.0682) lr 1.0628e-03 eta 3:06:19 +epoch [26/50] batch [60/500] time 0.876 (0.897) data 0.000 (0.013) loss 0.9233 (1.0615) acc 75.0000 (73.0208) lr 1.0628e-03 eta 3:05:54 +epoch [26/50] batch [65/500] time 0.862 (0.897) data 0.000 (0.012) loss 0.9326 (1.0477) acc 78.1250 (73.4135) lr 1.0628e-03 eta 3:05:53 +epoch [26/50] batch [70/500] time 0.852 (0.894) data 0.000 (0.011) loss 0.9355 (1.0457) acc 75.0000 (73.4821) lr 1.0628e-03 eta 3:05:16 +epoch [26/50] batch [75/500] time 0.877 (0.894) data 0.000 (0.010) loss 0.8037 (1.0558) acc 75.0000 (73.1667) lr 1.0628e-03 eta 3:05:11 +epoch [26/50] batch [80/500] time 0.911 (0.895) data 0.000 (0.010) loss 1.3096 (1.0669) acc 75.0000 (72.8906) lr 1.0628e-03 eta 3:05:12 +epoch [26/50] batch [85/500] time 0.905 (0.894) data 0.000 (0.009) loss 1.6299 (1.0694) acc 53.1250 (72.6471) lr 1.0628e-03 eta 3:05:02 +epoch [26/50] batch [90/500] time 0.873 (0.894) data 0.000 (0.009) loss 0.8408 (1.0793) acc 78.1250 (72.4306) lr 1.0628e-03 eta 3:04:58 +epoch [26/50] batch [95/500] time 0.884 (0.893) data 0.000 (0.008) loss 0.8276 (1.0944) acc 78.1250 (72.1711) lr 1.0628e-03 eta 3:04:42 +epoch [26/50] batch [100/500] time 0.891 (0.893) data 0.000 (0.008) loss 0.6294 (1.0754) acc 81.2500 (72.6562) lr 1.0628e-03 eta 3:04:32 +epoch [26/50] batch [105/500] time 0.994 (0.895) data 0.000 (0.007) loss 2.2461 (1.0904) acc 71.8750 (72.7381) lr 1.0628e-03 eta 3:04:47 +epoch [26/50] batch [110/500] time 0.864 (0.893) data 0.000 (0.007) loss 0.7046 (1.0768) acc 81.2500 (73.1250) lr 1.0628e-03 eta 3:04:27 +epoch [26/50] batch [115/500] time 0.875 (0.893) data 0.000 (0.007) loss 0.9302 (1.0722) acc 81.2500 (73.2609) lr 1.0628e-03 eta 3:04:17 +epoch [26/50] batch [120/500] time 0.879 (0.892) data 0.000 (0.007) loss 1.1113 (1.0852) acc 78.1250 (73.1250) lr 1.0628e-03 eta 3:04:08 +epoch [26/50] batch [125/500] time 0.876 (0.892) data 0.000 (0.006) loss 1.0459 (1.0861) acc 78.1250 (73.0250) lr 1.0628e-03 eta 3:03:57 +epoch [26/50] batch [130/500] time 0.899 (0.892) data 0.000 (0.006) loss 1.1787 (1.0884) acc 68.7500 (72.8606) lr 1.0628e-03 eta 3:03:54 +epoch [26/50] batch [135/500] time 0.880 (0.892) data 0.000 (0.006) loss 1.4102 (1.0897) acc 81.2500 (72.9398) lr 1.0628e-03 eta 3:03:51 +epoch [26/50] batch [140/500] time 0.887 (0.892) data 0.000 (0.006) loss 1.7500 (1.0993) acc 59.3750 (72.6786) lr 1.0628e-03 eta 3:03:46 +epoch [26/50] batch [145/500] time 0.885 (0.892) data 0.000 (0.005) loss 1.1143 (1.0966) acc 75.0000 (72.7371) lr 1.0628e-03 eta 3:03:41 +epoch [26/50] batch [150/500] time 0.868 (0.892) data 0.000 (0.005) loss 1.0840 (1.1020) acc 75.0000 (72.5625) lr 1.0628e-03 eta 3:03:30 +epoch [26/50] batch [155/500] time 0.899 (0.891) data 0.000 (0.005) loss 1.5293 (1.1031) acc 68.7500 (72.4798) lr 1.0628e-03 eta 3:03:21 +epoch [26/50] batch [160/500] time 0.880 (0.891) data 0.000 (0.005) loss 0.6040 (1.0978) acc 81.2500 (72.6953) lr 1.0628e-03 eta 3:03:17 +epoch [26/50] batch [165/500] time 0.883 (0.891) data 0.000 (0.005) loss 1.3145 (1.1033) acc 62.5000 (72.5379) lr 1.0628e-03 eta 3:03:10 +epoch [26/50] batch [170/500] time 0.869 (0.891) data 0.000 (0.005) loss 1.0713 (1.1017) acc 71.8750 (72.5000) lr 1.0628e-03 eta 3:03:08 +epoch [26/50] batch [175/500] time 0.860 (0.891) data 0.000 (0.005) loss 0.9727 (1.1023) acc 75.0000 (72.5000) lr 1.0628e-03 eta 3:02:56 +epoch [26/50] batch [180/500] time 0.862 (0.890) data 0.000 (0.004) loss 1.5254 (1.0973) acc 65.6250 (72.6389) lr 1.0628e-03 eta 3:02:49 +epoch [26/50] batch [185/500] time 0.873 (0.890) data 0.000 (0.004) loss 0.7012 (1.0948) acc 81.2500 (72.6858) lr 1.0628e-03 eta 3:02:42 +epoch [26/50] batch [190/500] time 0.834 (0.890) data 0.000 (0.004) loss 0.7510 (1.0884) acc 78.1250 (72.7467) lr 1.0628e-03 eta 3:02:35 +epoch [26/50] batch [195/500] time 0.874 (0.890) data 0.000 (0.004) loss 0.4465 (1.0929) acc 90.6250 (72.6923) lr 1.0628e-03 eta 3:02:28 +epoch [26/50] batch [200/500] time 0.872 (0.889) data 0.000 (0.004) loss 1.4062 (1.0944) acc 75.0000 (72.7656) lr 1.0628e-03 eta 3:02:18 +epoch [26/50] batch [205/500] time 0.921 (0.890) data 0.000 (0.004) loss 1.1846 (1.0973) acc 71.8750 (72.7134) lr 1.0628e-03 eta 3:02:19 +epoch [26/50] batch [210/500] time 0.903 (0.890) data 0.000 (0.004) loss 0.9629 (1.0978) acc 81.2500 (72.7827) lr 1.0628e-03 eta 3:02:12 +epoch [26/50] batch [215/500] time 0.882 (0.890) data 0.000 (0.004) loss 0.8818 (1.0967) acc 71.8750 (72.7616) lr 1.0628e-03 eta 3:02:08 +epoch [26/50] batch [220/500] time 0.884 (0.889) data 0.000 (0.004) loss 0.7471 (1.0957) acc 78.1250 (72.6989) lr 1.0628e-03 eta 3:02:03 +epoch [26/50] batch [225/500] time 0.873 (0.889) data 0.000 (0.004) loss 1.1348 (1.0939) acc 75.0000 (72.7361) lr 1.0628e-03 eta 3:01:54 +epoch [26/50] batch [230/500] time 0.895 (0.889) data 0.000 (0.004) loss 1.4316 (1.0948) acc 68.7500 (72.6630) lr 1.0628e-03 eta 3:01:48 +epoch [26/50] batch [235/500] time 0.875 (0.889) data 0.000 (0.003) loss 0.9775 (1.0979) acc 71.8750 (72.6330) lr 1.0628e-03 eta 3:01:38 +epoch [26/50] batch [240/500] time 0.895 (0.889) data 0.000 (0.003) loss 1.1748 (1.0969) acc 68.7500 (72.6302) lr 1.0628e-03 eta 3:01:33 +epoch [26/50] batch [245/500] time 0.882 (0.889) data 0.000 (0.003) loss 1.1885 (1.0938) acc 65.6250 (72.7551) lr 1.0628e-03 eta 3:01:29 +epoch [26/50] batch [250/500] time 0.855 (0.889) data 0.000 (0.003) loss 1.2227 (1.0940) acc 71.8750 (72.7000) lr 1.0628e-03 eta 3:01:26 +epoch [26/50] batch [255/500] time 0.851 (0.888) data 0.000 (0.003) loss 1.0732 (1.0954) acc 68.7500 (72.6961) lr 1.0628e-03 eta 3:01:15 +epoch [26/50] batch [260/500] time 0.907 (0.888) data 0.000 (0.003) loss 1.0283 (1.0964) acc 71.8750 (72.6442) lr 1.0628e-03 eta 3:01:11 +epoch [26/50] batch [265/500] time 0.880 (0.888) data 0.000 (0.003) loss 1.6963 (1.1010) acc 53.1250 (72.5236) lr 1.0628e-03 eta 3:01:06 +epoch [26/50] batch [270/500] time 0.877 (0.888) data 0.000 (0.003) loss 1.3096 (1.1028) acc 68.7500 (72.5231) lr 1.0628e-03 eta 3:01:00 +epoch [26/50] batch [275/500] time 0.856 (0.888) data 0.000 (0.003) loss 0.8540 (1.1022) acc 78.1250 (72.4886) lr 1.0628e-03 eta 3:00:51 +epoch [26/50] batch [280/500] time 0.887 (0.887) data 0.000 (0.003) loss 1.3965 (1.1054) acc 71.8750 (72.4219) lr 1.0628e-03 eta 3:00:44 +epoch [26/50] batch [285/500] time 0.880 (0.888) data 0.000 (0.003) loss 0.8413 (1.1041) acc 78.1250 (72.5000) lr 1.0628e-03 eta 3:00:41 +epoch [26/50] batch [290/500] time 0.897 (0.888) data 0.000 (0.003) loss 1.5371 (1.1035) acc 62.5000 (72.5647) lr 1.0628e-03 eta 3:00:38 +epoch [26/50] batch [295/500] time 0.869 (0.888) data 0.000 (0.003) loss 0.4932 (1.1058) acc 81.2500 (72.4576) lr 1.0628e-03 eta 3:00:32 +epoch [26/50] batch [300/500] time 0.847 (0.887) data 0.000 (0.003) loss 0.4153 (1.1056) acc 87.5000 (72.4792) lr 1.0628e-03 eta 3:00:23 +epoch [26/50] batch [305/500] time 0.885 (0.887) data 0.000 (0.003) loss 1.5820 (1.1043) acc 65.6250 (72.5307) lr 1.0628e-03 eta 3:00:19 +epoch [26/50] batch [310/500] time 0.894 (0.887) data 0.000 (0.003) loss 0.3638 (1.1027) acc 93.7500 (72.6008) lr 1.0628e-03 eta 3:00:14 +epoch [26/50] batch [315/500] time 0.895 (0.887) data 0.000 (0.003) loss 0.9458 (1.1010) acc 75.0000 (72.6389) lr 1.0628e-03 eta 3:00:09 +epoch [26/50] batch [320/500] time 0.859 (0.887) data 0.000 (0.003) loss 0.4929 (1.1003) acc 84.3750 (72.6465) lr 1.0628e-03 eta 3:00:01 +epoch [26/50] batch [325/500] time 0.918 (0.887) data 0.000 (0.003) loss 1.2207 (1.1037) acc 68.7500 (72.5769) lr 1.0628e-03 eta 2:59:58 +epoch [26/50] batch [330/500] time 0.895 (0.887) data 0.000 (0.003) loss 1.2646 (1.1051) acc 75.0000 (72.5568) lr 1.0628e-03 eta 2:59:53 +epoch [26/50] batch [335/500] time 0.881 (0.887) data 0.000 (0.002) loss 0.6890 (1.1067) acc 81.2500 (72.4720) lr 1.0628e-03 eta 2:59:47 +epoch [26/50] batch [340/500] time 0.912 (0.887) data 0.000 (0.002) loss 1.1143 (1.1068) acc 65.6250 (72.4724) lr 1.0628e-03 eta 2:59:44 +epoch [26/50] batch [345/500] time 0.881 (0.887) data 0.000 (0.002) loss 1.1396 (1.1045) acc 68.7500 (72.5453) lr 1.0628e-03 eta 2:59:38 +epoch [26/50] batch [350/500] time 0.871 (0.887) data 0.000 (0.002) loss 1.1211 (1.1067) acc 68.7500 (72.4911) lr 1.0628e-03 eta 2:59:37 +epoch [26/50] batch [355/500] time 0.862 (0.887) data 0.000 (0.002) loss 0.9014 (1.1068) acc 78.1250 (72.5000) lr 1.0628e-03 eta 2:59:32 +epoch [26/50] batch [360/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.0801 (1.1051) acc 75.0000 (72.5521) lr 1.0628e-03 eta 2:59:30 +epoch [26/50] batch [365/500] time 0.889 (0.887) data 0.000 (0.002) loss 0.8613 (1.1041) acc 71.8750 (72.5428) lr 1.0628e-03 eta 2:59:27 +epoch [26/50] batch [370/500] time 0.868 (0.887) data 0.000 (0.002) loss 0.6875 (1.1010) acc 81.2500 (72.6014) lr 1.0628e-03 eta 2:59:21 +epoch [26/50] batch [375/500] time 0.868 (0.887) data 0.000 (0.002) loss 1.0889 (1.0990) acc 81.2500 (72.6667) lr 1.0628e-03 eta 2:59:14 +epoch [26/50] batch [380/500] time 0.909 (0.887) data 0.000 (0.002) loss 1.1299 (1.0987) acc 71.8750 (72.6234) lr 1.0628e-03 eta 2:59:09 +epoch [26/50] batch [385/500] time 0.891 (0.887) data 0.000 (0.002) loss 1.0635 (1.0996) acc 71.8750 (72.6055) lr 1.0628e-03 eta 2:59:04 +epoch [26/50] batch [390/500] time 0.890 (0.887) data 0.000 (0.002) loss 0.8779 (1.0998) acc 75.0000 (72.6202) lr 1.0628e-03 eta 2:59:00 +epoch [26/50] batch [395/500] time 0.874 (0.887) data 0.000 (0.002) loss 1.0283 (1.1005) acc 81.2500 (72.6266) lr 1.0628e-03 eta 2:58:56 +epoch [26/50] batch [400/500] time 0.871 (0.887) data 0.000 (0.002) loss 1.0049 (1.1014) acc 78.1250 (72.6484) lr 1.0628e-03 eta 2:58:49 +epoch [26/50] batch [405/500] time 0.875 (0.887) data 0.000 (0.002) loss 1.5879 (1.1062) acc 65.6250 (72.5231) lr 1.0628e-03 eta 2:58:44 +epoch [26/50] batch [410/500] time 0.875 (0.887) data 0.000 (0.002) loss 0.8633 (1.1065) acc 78.1250 (72.4695) lr 1.0628e-03 eta 2:58:38 +epoch [26/50] batch [415/500] time 0.880 (0.886) data 0.000 (0.002) loss 1.4863 (1.1113) acc 71.8750 (72.3795) lr 1.0628e-03 eta 2:58:33 +epoch [26/50] batch [420/500] time 0.893 (0.886) data 0.000 (0.002) loss 0.7964 (1.1111) acc 81.2500 (72.3958) lr 1.0628e-03 eta 2:58:28 +epoch [26/50] batch [425/500] time 0.857 (0.886) data 0.000 (0.002) loss 1.0859 (1.1100) acc 68.7500 (72.4118) lr 1.0628e-03 eta 2:58:22 +epoch [26/50] batch [430/500] time 0.886 (0.886) data 0.000 (0.002) loss 1.1895 (1.1112) acc 81.2500 (72.3837) lr 1.0628e-03 eta 2:58:16 +epoch [26/50] batch [435/500] time 0.867 (0.886) data 0.000 (0.002) loss 1.3330 (1.1108) acc 68.7500 (72.4066) lr 1.0628e-03 eta 2:58:10 +epoch [26/50] batch [440/500] time 0.875 (0.886) data 0.000 (0.002) loss 1.7471 (1.1141) acc 62.5000 (72.3509) lr 1.0628e-03 eta 2:58:06 +epoch [26/50] batch [445/500] time 0.871 (0.886) data 0.000 (0.002) loss 1.1152 (1.1147) acc 78.1250 (72.3455) lr 1.0628e-03 eta 2:58:01 +epoch [26/50] batch [450/500] time 0.894 (0.886) data 0.000 (0.002) loss 1.2002 (1.1149) acc 56.2500 (72.3194) lr 1.0628e-03 eta 2:57:57 +epoch [26/50] batch [455/500] time 0.861 (0.886) data 0.000 (0.002) loss 0.9512 (1.1149) acc 71.8750 (72.3626) lr 1.0628e-03 eta 2:57:51 +epoch [26/50] batch [460/500] time 0.888 (0.886) data 0.000 (0.002) loss 1.1299 (1.1153) acc 65.6250 (72.3505) lr 1.0628e-03 eta 2:57:47 +epoch [26/50] batch [465/500] time 0.898 (0.886) data 0.000 (0.002) loss 1.5176 (1.1153) acc 59.3750 (72.3656) lr 1.0628e-03 eta 2:57:42 +epoch [26/50] batch [470/500] time 0.861 (0.886) data 0.000 (0.002) loss 0.8149 (1.1142) acc 84.3750 (72.4136) lr 1.0628e-03 eta 2:57:37 +epoch [26/50] batch [475/500] time 0.887 (0.886) data 0.000 (0.002) loss 0.7700 (1.1139) acc 75.0000 (72.4276) lr 1.0628e-03 eta 2:57:34 +epoch [26/50] batch [480/500] time 0.871 (0.886) data 0.000 (0.002) loss 1.0537 (1.1138) acc 78.1250 (72.4154) lr 1.0628e-03 eta 2:57:28 +epoch [26/50] batch [485/500] time 0.873 (0.886) data 0.000 (0.002) loss 0.7617 (1.1108) acc 68.7500 (72.4742) lr 1.0628e-03 eta 2:57:24 +epoch [26/50] batch [490/500] time 0.977 (0.886) data 0.000 (0.002) loss 2.2227 (1.1129) acc 53.1250 (72.4298) lr 1.0628e-03 eta 2:57:22 +epoch [26/50] batch [495/500] time 0.871 (0.886) data 0.000 (0.002) loss 1.0098 (1.1140) acc 71.8750 (72.3801) lr 1.0628e-03 eta 2:57:16 +epoch [26/50] batch [500/500] time 0.889 (0.886) data 0.000 (0.002) loss 1.4092 (1.1162) acc 68.7500 (72.3187) lr 1.0000e-03 eta 2:57:11 +epoch [27/50] batch [5/500] time 0.894 (1.040) data 0.000 (0.141) loss 0.9297 (0.9064) acc 78.1250 (80.0000) lr 1.0000e-03 eta 3:27:55 +epoch [27/50] batch [10/500] time 0.871 (0.994) data 0.000 (0.071) loss 1.0508 (1.0493) acc 71.8750 (74.6875) lr 1.0000e-03 eta 3:18:42 +epoch [27/50] batch [15/500] time 0.906 (0.961) data 0.000 (0.047) loss 1.4150 (1.1805) acc 62.5000 (71.8750) lr 1.0000e-03 eta 3:11:53 +epoch [27/50] batch [20/500] time 0.897 (0.947) data 0.000 (0.035) loss 0.8008 (1.1488) acc 81.2500 (72.5000) lr 1.0000e-03 eta 3:09:05 +epoch [27/50] batch [25/500] time 0.903 (0.937) data 0.000 (0.028) loss 1.2021 (1.1332) acc 68.7500 (72.5000) lr 1.0000e-03 eta 3:07:04 +epoch [27/50] batch [30/500] time 0.893 (0.928) data 0.000 (0.024) loss 0.8999 (1.1160) acc 81.2500 (73.1250) lr 1.0000e-03 eta 3:05:11 +epoch [27/50] batch [35/500] time 0.895 (0.922) data 0.000 (0.020) loss 1.3340 (1.1308) acc 59.3750 (71.6964) lr 1.0000e-03 eta 3:03:49 +epoch [27/50] batch [40/500] time 0.883 (0.917) data 0.000 (0.018) loss 0.6729 (1.0907) acc 78.1250 (72.5000) lr 1.0000e-03 eta 3:02:47 +epoch [27/50] batch [45/500] time 0.855 (0.913) data 0.000 (0.016) loss 0.7891 (1.1010) acc 81.2500 (72.5000) lr 1.0000e-03 eta 3:01:50 +epoch [27/50] batch [50/500] time 0.888 (0.911) data 0.000 (0.014) loss 0.7959 (1.1030) acc 75.0000 (72.8750) lr 1.0000e-03 eta 3:01:31 +epoch [27/50] batch [55/500] time 0.885 (0.909) data 0.000 (0.013) loss 1.2646 (1.1135) acc 71.8750 (72.5000) lr 1.0000e-03 eta 3:00:56 +epoch [27/50] batch [60/500] time 0.887 (0.906) data 0.000 (0.012) loss 0.8081 (1.1105) acc 75.0000 (72.3438) lr 1.0000e-03 eta 3:00:16 +epoch [27/50] batch [65/500] time 0.876 (0.904) data 0.000 (0.011) loss 0.6104 (1.1049) acc 87.5000 (72.7885) lr 1.0000e-03 eta 2:59:45 +epoch [27/50] batch [70/500] time 0.885 (0.902) data 0.000 (0.010) loss 1.2822 (1.1095) acc 68.7500 (72.5446) lr 1.0000e-03 eta 2:59:26 +epoch [27/50] batch [75/500] time 0.900 (0.902) data 0.000 (0.010) loss 1.3252 (1.1198) acc 71.8750 (72.5000) lr 1.0000e-03 eta 2:59:13 +epoch [27/50] batch [80/500] time 0.899 (0.900) data 0.000 (0.009) loss 0.4622 (1.1244) acc 90.6250 (72.2656) lr 1.0000e-03 eta 2:58:51 +epoch [27/50] batch [85/500] time 0.857 (0.899) data 0.000 (0.009) loss 1.2695 (1.1194) acc 68.7500 (72.3897) lr 1.0000e-03 eta 2:58:30 +epoch [27/50] batch [90/500] time 0.970 (0.899) data 0.000 (0.008) loss 1.1982 (1.1249) acc 68.7500 (72.1875) lr 1.0000e-03 eta 2:58:32 +epoch [27/50] batch [95/500] time 0.869 (0.899) data 0.000 (0.008) loss 1.0361 (1.1191) acc 62.5000 (72.1382) lr 1.0000e-03 eta 2:58:23 +epoch [27/50] batch [100/500] time 0.903 (0.899) data 0.000 (0.007) loss 1.0420 (1.1120) acc 71.8750 (72.3750) lr 1.0000e-03 eta 2:58:19 +epoch [27/50] batch [105/500] time 0.899 (0.899) data 0.000 (0.007) loss 1.2617 (1.1204) acc 68.7500 (72.3512) lr 1.0000e-03 eta 2:58:16 +epoch [27/50] batch [110/500] time 0.916 (0.899) data 0.000 (0.007) loss 0.6929 (1.1076) acc 78.1250 (72.7273) lr 1.0000e-03 eta 2:58:04 +epoch [27/50] batch [115/500] time 0.888 (0.898) data 0.000 (0.006) loss 1.2920 (1.1021) acc 59.3750 (72.8261) lr 1.0000e-03 eta 2:57:51 +epoch [27/50] batch [120/500] time 0.895 (0.898) data 0.000 (0.006) loss 1.7139 (1.1139) acc 53.1250 (72.5260) lr 1.0000e-03 eta 2:57:43 +epoch [27/50] batch [125/500] time 0.902 (0.897) data 0.000 (0.006) loss 0.9604 (1.1158) acc 75.0000 (72.3000) lr 1.0000e-03 eta 2:57:32 +epoch [27/50] batch [130/500] time 0.919 (0.898) data 0.000 (0.006) loss 1.1895 (1.1217) acc 65.6250 (72.0673) lr 1.0000e-03 eta 2:57:34 +epoch [27/50] batch [135/500] time 0.891 (0.897) data 0.000 (0.005) loss 1.2959 (1.1228) acc 75.0000 (72.0370) lr 1.0000e-03 eta 2:57:28 +epoch [27/50] batch [140/500] time 0.865 (0.897) data 0.000 (0.005) loss 0.8662 (1.1179) acc 87.5000 (72.2991) lr 1.0000e-03 eta 2:57:12 +epoch [27/50] batch [145/500] time 0.881 (0.896) data 0.000 (0.005) loss 1.2949 (1.1185) acc 68.7500 (72.1767) lr 1.0000e-03 eta 2:57:02 +epoch [27/50] batch [150/500] time 0.880 (0.896) data 0.000 (0.005) loss 0.9736 (1.1159) acc 71.8750 (72.2292) lr 1.0000e-03 eta 2:56:54 +epoch [27/50] batch [155/500] time 0.859 (0.895) data 0.000 (0.005) loss 0.7021 (1.1138) acc 78.1250 (72.2379) lr 1.0000e-03 eta 2:56:41 +epoch [27/50] batch [160/500] time 0.859 (0.895) data 0.000 (0.005) loss 0.9272 (1.1119) acc 75.0000 (72.3047) lr 1.0000e-03 eta 2:56:32 +epoch [27/50] batch [165/500] time 0.846 (0.894) data 0.000 (0.005) loss 0.6567 (1.1092) acc 87.5000 (72.4053) lr 1.0000e-03 eta 2:56:19 +epoch [27/50] batch [170/500] time 0.859 (0.893) data 0.000 (0.004) loss 1.4482 (1.1122) acc 71.8750 (72.4265) lr 1.0000e-03 eta 2:56:05 +epoch [27/50] batch [175/500] time 0.904 (0.893) data 0.000 (0.004) loss 1.4258 (1.1148) acc 65.6250 (72.3214) lr 1.0000e-03 eta 2:56:00 +epoch [27/50] batch [180/500] time 0.864 (0.893) data 0.000 (0.004) loss 1.1094 (1.1145) acc 78.1250 (72.4653) lr 1.0000e-03 eta 2:55:53 +epoch [27/50] batch [185/500] time 0.888 (0.892) data 0.000 (0.004) loss 1.5107 (1.1155) acc 65.6250 (72.4493) lr 1.0000e-03 eta 2:55:42 +epoch [27/50] batch [190/500] time 0.887 (0.893) data 0.000 (0.004) loss 1.5820 (1.1117) acc 53.1250 (72.4342) lr 1.0000e-03 eta 2:55:40 +epoch [27/50] batch [195/500] time 0.885 (0.892) data 0.000 (0.004) loss 1.3291 (1.1174) acc 62.5000 (72.3237) lr 1.0000e-03 eta 2:55:33 +epoch [27/50] batch [200/500] time 0.876 (0.892) data 0.000 (0.004) loss 0.9961 (1.1146) acc 78.1250 (72.4219) lr 1.0000e-03 eta 2:55:23 +epoch [27/50] batch [205/500] time 0.886 (0.892) data 0.000 (0.004) loss 1.2168 (1.1158) acc 68.7500 (72.3933) lr 1.0000e-03 eta 2:55:15 +epoch [27/50] batch [210/500] time 0.868 (0.891) data 0.000 (0.004) loss 0.7271 (1.1149) acc 84.3750 (72.3958) lr 1.0000e-03 eta 2:55:08 +epoch [27/50] batch [215/500] time 0.873 (0.891) data 0.000 (0.004) loss 1.1787 (1.1155) acc 78.1250 (72.5000) lr 1.0000e-03 eta 2:55:04 +epoch [27/50] batch [220/500] time 0.876 (0.891) data 0.000 (0.003) loss 1.4512 (1.1164) acc 62.5000 (72.4290) lr 1.0000e-03 eta 2:54:58 +epoch [27/50] batch [225/500] time 0.890 (0.891) data 0.000 (0.003) loss 1.2109 (1.1168) acc 71.8750 (72.4167) lr 1.0000e-03 eta 2:54:52 +epoch [27/50] batch [230/500] time 0.868 (0.891) data 0.000 (0.003) loss 0.6255 (1.1137) acc 81.2500 (72.4864) lr 1.0000e-03 eta 2:54:45 +epoch [27/50] batch [235/500] time 0.866 (0.891) data 0.000 (0.003) loss 1.0400 (1.1149) acc 71.8750 (72.5000) lr 1.0000e-03 eta 2:54:46 +epoch [27/50] batch [240/500] time 0.890 (0.891) data 0.000 (0.003) loss 1.1162 (1.1139) acc 81.2500 (72.5651) lr 1.0000e-03 eta 2:54:40 +epoch [27/50] batch [245/500] time 0.876 (0.891) data 0.000 (0.003) loss 1.5918 (1.1114) acc 65.6250 (72.5893) lr 1.0000e-03 eta 2:54:33 +epoch [27/50] batch [250/500] time 0.890 (0.891) data 0.000 (0.003) loss 1.1025 (1.1094) acc 71.8750 (72.6500) lr 1.0000e-03 eta 2:54:30 +epoch [27/50] batch [255/500] time 0.895 (0.891) data 0.000 (0.003) loss 1.1113 (1.1095) acc 78.1250 (72.6961) lr 1.0000e-03 eta 2:54:26 +epoch [27/50] batch [260/500] time 0.887 (0.891) data 0.000 (0.003) loss 0.6313 (1.1058) acc 84.3750 (72.7644) lr 1.0000e-03 eta 2:54:19 +epoch [27/50] batch [265/500] time 0.886 (0.891) data 0.000 (0.003) loss 1.2549 (1.1040) acc 65.6250 (72.7712) lr 1.0000e-03 eta 2:54:11 +epoch [27/50] batch [270/500] time 0.916 (0.891) data 0.000 (0.003) loss 0.9434 (1.1058) acc 78.1250 (72.7431) lr 1.0000e-03 eta 2:54:06 +epoch [27/50] batch [275/500] time 0.895 (0.891) data 0.000 (0.003) loss 0.5000 (1.1009) acc 84.3750 (72.8750) lr 1.0000e-03 eta 2:54:02 +epoch [27/50] batch [280/500] time 0.888 (0.891) data 0.000 (0.003) loss 0.7026 (1.1026) acc 81.2500 (72.8125) lr 1.0000e-03 eta 2:53:57 +epoch [27/50] batch [285/500] time 0.924 (0.891) data 0.000 (0.003) loss 0.6343 (1.1011) acc 84.3750 (72.8289) lr 1.0000e-03 eta 2:53:55 +epoch [27/50] batch [290/500] time 0.886 (0.891) data 0.000 (0.003) loss 0.8271 (1.1018) acc 84.3750 (72.8448) lr 1.0000e-03 eta 2:53:50 +epoch [27/50] batch [295/500] time 0.904 (0.891) data 0.000 (0.003) loss 0.9141 (1.1023) acc 71.8750 (72.7966) lr 1.0000e-03 eta 2:53:47 +epoch [27/50] batch [300/500] time 0.892 (0.891) data 0.000 (0.003) loss 0.7310 (1.1032) acc 78.1250 (72.7708) lr 1.0000e-03 eta 2:53:40 +epoch [27/50] batch [305/500] time 0.883 (0.891) data 0.000 (0.003) loss 0.8774 (1.1045) acc 71.8750 (72.7664) lr 1.0000e-03 eta 2:53:36 +epoch [27/50] batch [310/500] time 0.876 (0.891) data 0.000 (0.003) loss 0.4868 (1.1063) acc 84.3750 (72.7722) lr 1.0000e-03 eta 2:53:31 +epoch [27/50] batch [315/500] time 0.908 (0.891) data 0.000 (0.002) loss 1.7930 (1.1099) acc 53.1250 (72.6984) lr 1.0000e-03 eta 2:53:26 +epoch [27/50] batch [320/500] time 0.850 (0.890) data 0.000 (0.002) loss 1.1475 (1.1109) acc 65.6250 (72.6953) lr 1.0000e-03 eta 2:53:19 +epoch [27/50] batch [325/500] time 0.894 (0.890) data 0.000 (0.002) loss 0.8945 (1.1096) acc 71.8750 (72.7115) lr 1.0000e-03 eta 2:53:12 +epoch [27/50] batch [330/500] time 0.870 (0.890) data 0.000 (0.002) loss 1.8350 (1.1096) acc 62.5000 (72.6989) lr 1.0000e-03 eta 2:53:07 +epoch [27/50] batch [335/500] time 0.868 (0.890) data 0.000 (0.002) loss 1.5400 (1.1116) acc 62.5000 (72.6959) lr 1.0000e-03 eta 2:53:07 +epoch [27/50] batch [340/500] time 0.867 (0.890) data 0.000 (0.002) loss 0.7832 (1.1095) acc 87.5000 (72.7849) lr 1.0000e-03 eta 2:53:00 +epoch [27/50] batch [345/500] time 0.872 (0.890) data 0.000 (0.002) loss 0.9155 (1.1078) acc 84.3750 (72.7536) lr 1.0000e-03 eta 2:52:52 +epoch [27/50] batch [350/500] time 0.869 (0.890) data 0.000 (0.002) loss 0.7051 (1.1063) acc 81.2500 (72.7768) lr 1.0000e-03 eta 2:52:46 +epoch [27/50] batch [355/500] time 0.895 (0.890) data 0.000 (0.002) loss 1.1113 (1.1056) acc 71.8750 (72.7641) lr 1.0000e-03 eta 2:52:40 +epoch [27/50] batch [360/500] time 0.887 (0.889) data 0.000 (0.002) loss 1.3281 (1.1050) acc 62.5000 (72.7517) lr 1.0000e-03 eta 2:52:33 +epoch [27/50] batch [365/500] time 0.907 (0.889) data 0.000 (0.002) loss 0.6602 (1.1025) acc 78.1250 (72.7911) lr 1.0000e-03 eta 2:52:28 +epoch [27/50] batch [370/500] time 0.903 (0.890) data 0.000 (0.002) loss 0.9292 (1.1022) acc 78.1250 (72.8041) lr 1.0000e-03 eta 2:52:25 +epoch [27/50] batch [375/500] time 0.904 (0.890) data 0.000 (0.002) loss 0.9712 (1.0991) acc 71.8750 (72.8500) lr 1.0000e-03 eta 2:52:21 +epoch [27/50] batch [380/500] time 0.908 (0.890) data 0.000 (0.002) loss 1.3760 (1.0988) acc 71.8750 (72.8947) lr 1.0000e-03 eta 2:52:19 +epoch [27/50] batch [385/500] time 0.885 (0.890) data 0.000 (0.002) loss 1.2012 (1.0997) acc 59.3750 (72.8247) lr 1.0000e-03 eta 2:52:11 +epoch [27/50] batch [390/500] time 0.903 (0.889) data 0.000 (0.002) loss 1.0371 (1.0991) acc 65.6250 (72.8205) lr 1.0000e-03 eta 2:52:06 +epoch [27/50] batch [395/500] time 0.882 (0.890) data 0.000 (0.002) loss 1.0098 (1.0994) acc 68.7500 (72.8323) lr 1.0000e-03 eta 2:52:02 +epoch [27/50] batch [400/500] time 0.904 (0.889) data 0.000 (0.002) loss 1.2256 (1.0995) acc 62.5000 (72.7891) lr 1.0000e-03 eta 2:51:57 +epoch [27/50] batch [405/500] time 0.848 (0.889) data 0.000 (0.002) loss 1.2158 (1.0998) acc 75.0000 (72.8395) lr 1.0000e-03 eta 2:51:50 +epoch [27/50] batch [410/500] time 0.871 (0.889) data 0.000 (0.002) loss 0.9609 (1.0989) acc 78.1250 (72.8277) lr 1.0000e-03 eta 2:51:43 +epoch [27/50] batch [415/500] time 0.875 (0.889) data 0.000 (0.002) loss 1.0645 (1.1001) acc 65.6250 (72.8313) lr 1.0000e-03 eta 2:51:37 +epoch [27/50] batch [420/500] time 0.875 (0.889) data 0.000 (0.002) loss 0.9053 (1.0988) acc 71.8750 (72.8423) lr 1.0000e-03 eta 2:51:32 +epoch [27/50] batch [425/500] time 0.873 (0.889) data 0.000 (0.002) loss 1.3154 (1.1007) acc 68.7500 (72.7794) lr 1.0000e-03 eta 2:51:27 +epoch [27/50] batch [430/500] time 0.864 (0.889) data 0.000 (0.002) loss 0.8311 (1.0998) acc 81.2500 (72.8198) lr 1.0000e-03 eta 2:51:22 +epoch [27/50] batch [435/500] time 0.873 (0.889) data 0.000 (0.002) loss 1.0244 (1.0996) acc 75.0000 (72.8089) lr 1.0000e-03 eta 2:51:19 +epoch [27/50] batch [440/500] time 0.893 (0.889) data 0.000 (0.002) loss 0.7603 (1.0986) acc 75.0000 (72.8267) lr 1.0000e-03 eta 2:51:15 +epoch [27/50] batch [445/500] time 0.917 (0.889) data 0.001 (0.002) loss 1.4941 (1.0992) acc 59.3750 (72.8301) lr 1.0000e-03 eta 2:51:12 +epoch [27/50] batch [450/500] time 0.896 (0.889) data 0.000 (0.002) loss 0.9775 (1.0982) acc 75.0000 (72.8472) lr 1.0000e-03 eta 2:51:07 +epoch [27/50] batch [455/500] time 0.887 (0.889) data 0.000 (0.002) loss 1.0898 (1.0968) acc 75.0000 (72.9258) lr 1.0000e-03 eta 2:51:01 +epoch [27/50] batch [460/500] time 0.876 (0.889) data 0.000 (0.002) loss 1.2285 (1.0979) acc 75.0000 (72.9212) lr 1.0000e-03 eta 2:50:56 +epoch [27/50] batch [465/500] time 0.877 (0.889) data 0.000 (0.002) loss 0.9292 (1.0998) acc 75.0000 (72.8965) lr 1.0000e-03 eta 2:50:51 +epoch [27/50] batch [470/500] time 0.931 (0.889) data 0.000 (0.002) loss 1.0645 (1.0975) acc 75.0000 (72.9521) lr 1.0000e-03 eta 2:50:46 +epoch [27/50] batch [475/500] time 0.987 (0.889) data 0.000 (0.002) loss 1.4893 (1.0968) acc 68.7500 (72.9803) lr 1.0000e-03 eta 2:50:45 +epoch [27/50] batch [480/500] time 0.882 (0.889) data 0.000 (0.002) loss 1.2754 (1.0982) acc 62.5000 (72.9036) lr 1.0000e-03 eta 2:50:39 +epoch [27/50] batch [485/500] time 0.887 (0.889) data 0.001 (0.002) loss 1.0947 (1.0970) acc 75.0000 (72.9381) lr 1.0000e-03 eta 2:50:32 +epoch [27/50] batch [490/500] time 0.885 (0.889) data 0.000 (0.002) loss 1.4004 (1.0965) acc 78.1250 (72.9145) lr 1.0000e-03 eta 2:50:28 +epoch [27/50] batch [495/500] time 0.912 (0.889) data 0.000 (0.002) loss 1.1738 (1.0978) acc 75.0000 (72.9040) lr 1.0000e-03 eta 2:50:22 +epoch [27/50] batch [500/500] time 0.907 (0.889) data 0.000 (0.002) loss 1.0244 (1.0992) acc 68.7500 (72.8250) lr 9.3721e-04 eta 2:50:18 +epoch [28/50] batch [5/500] time 0.869 (1.040) data 0.000 (0.161) loss 0.7280 (0.7864) acc 75.0000 (78.7500) lr 9.3721e-04 eta 3:19:10 +epoch [28/50] batch [10/500] time 0.882 (0.957) data 0.000 (0.081) loss 0.9551 (0.8587) acc 62.5000 (75.0000) lr 9.3721e-04 eta 3:03:16 +epoch [28/50] batch [15/500] time 0.895 (0.929) data 0.000 (0.054) loss 0.8081 (0.8690) acc 75.0000 (74.7917) lr 9.3721e-04 eta 2:57:55 +epoch [28/50] batch [20/500] time 0.921 (0.918) data 0.000 (0.040) loss 1.2607 (0.9511) acc 71.8750 (73.9062) lr 9.3721e-04 eta 2:55:34 +epoch [28/50] batch [25/500] time 0.873 (0.908) data 0.000 (0.032) loss 0.8848 (0.9350) acc 81.2500 (74.2500) lr 9.3721e-04 eta 2:53:36 +epoch [28/50] batch [30/500] time 0.908 (0.909) data 0.000 (0.027) loss 0.8579 (0.9194) acc 78.1250 (74.6875) lr 9.3721e-04 eta 2:53:50 +epoch [28/50] batch [35/500] time 0.877 (0.906) data 0.000 (0.023) loss 1.0850 (0.9465) acc 75.0000 (74.4643) lr 9.3721e-04 eta 2:53:10 +epoch [28/50] batch [40/500] time 0.853 (0.903) data 0.000 (0.020) loss 1.2090 (0.9686) acc 71.8750 (74.2188) lr 9.3721e-04 eta 2:52:31 +epoch [28/50] batch [45/500] time 0.887 (0.900) data 0.000 (0.018) loss 1.5508 (0.9914) acc 62.5000 (73.5417) lr 9.3721e-04 eta 2:51:53 +epoch [28/50] batch [50/500] time 0.892 (0.897) data 0.000 (0.016) loss 1.2061 (0.9783) acc 71.8750 (74.0625) lr 9.3721e-04 eta 2:51:05 +epoch [28/50] batch [55/500] time 0.871 (0.895) data 0.000 (0.015) loss 1.4717 (0.9811) acc 65.6250 (74.3750) lr 9.3721e-04 eta 2:50:42 +epoch [28/50] batch [60/500] time 0.891 (0.895) data 0.000 (0.014) loss 1.1621 (0.9963) acc 68.7500 (73.8542) lr 9.3721e-04 eta 2:50:35 +epoch [28/50] batch [65/500] time 0.908 (0.895) data 0.000 (0.013) loss 0.9995 (1.0094) acc 78.1250 (73.7500) lr 9.3721e-04 eta 2:50:31 +epoch [28/50] batch [70/500] time 0.888 (0.895) data 0.000 (0.012) loss 1.0508 (1.0069) acc 71.8750 (73.7054) lr 9.3721e-04 eta 2:50:30 +epoch [28/50] batch [75/500] time 0.881 (0.894) data 0.000 (0.011) loss 1.6816 (1.0071) acc 62.5000 (73.6667) lr 9.3721e-04 eta 2:50:11 +epoch [28/50] batch [80/500] time 0.904 (0.893) data 0.000 (0.010) loss 0.9048 (1.0109) acc 78.1250 (73.5938) lr 9.3721e-04 eta 2:49:58 +epoch [28/50] batch [85/500] time 0.888 (0.893) data 0.000 (0.010) loss 0.9771 (1.0064) acc 75.0000 (73.8603) lr 9.3721e-04 eta 2:49:50 +epoch [28/50] batch [90/500] time 0.854 (0.893) data 0.000 (0.009) loss 1.5811 (1.0220) acc 56.2500 (73.5764) lr 9.3721e-04 eta 2:49:44 +epoch [28/50] batch [95/500] time 0.860 (0.892) data 0.000 (0.009) loss 1.5117 (1.0252) acc 68.7500 (73.6184) lr 9.3721e-04 eta 2:49:31 +epoch [28/50] batch [100/500] time 0.896 (0.891) data 0.000 (0.008) loss 1.0371 (1.0244) acc 62.5000 (73.5000) lr 9.3721e-04 eta 2:49:21 +epoch [28/50] batch [105/500] time 0.860 (0.891) data 0.000 (0.008) loss 0.7271 (1.0211) acc 78.1250 (73.4524) lr 9.3721e-04 eta 2:49:11 +epoch [28/50] batch [110/500] time 0.891 (0.891) data 0.000 (0.008) loss 1.3711 (1.0170) acc 68.7500 (73.4659) lr 9.3721e-04 eta 2:49:04 +epoch [28/50] batch [115/500] time 0.858 (0.889) data 0.000 (0.007) loss 1.0400 (1.0199) acc 65.6250 (73.3696) lr 9.3721e-04 eta 2:48:45 +epoch [28/50] batch [120/500] time 0.871 (0.889) data 0.000 (0.007) loss 1.6025 (1.0243) acc 65.6250 (73.3594) lr 9.3721e-04 eta 2:48:38 +epoch [28/50] batch [125/500] time 0.877 (0.889) data 0.000 (0.007) loss 0.9663 (1.0221) acc 78.1250 (73.4750) lr 9.3721e-04 eta 2:48:27 +epoch [28/50] batch [130/500] time 0.862 (0.888) data 0.000 (0.006) loss 1.0977 (1.0270) acc 81.2500 (73.4375) lr 9.3721e-04 eta 2:48:18 +epoch [28/50] batch [135/500] time 0.896 (0.888) data 0.000 (0.006) loss 1.1338 (1.0345) acc 65.6250 (73.2176) lr 9.3721e-04 eta 2:48:13 +epoch [28/50] batch [140/500] time 0.878 (0.888) data 0.000 (0.006) loss 1.1611 (1.0371) acc 68.7500 (73.1920) lr 9.3721e-04 eta 2:48:04 +epoch [28/50] batch [145/500] time 0.889 (0.887) data 0.000 (0.006) loss 0.5703 (1.0314) acc 84.3750 (73.2759) lr 9.3721e-04 eta 2:47:52 +epoch [28/50] batch [150/500] time 0.920 (0.887) data 0.000 (0.006) loss 1.5859 (1.0287) acc 68.7500 (73.4167) lr 9.3721e-04 eta 2:47:50 +epoch [28/50] batch [155/500] time 0.890 (0.887) data 0.000 (0.005) loss 1.2588 (1.0359) acc 75.0000 (73.2460) lr 9.3721e-04 eta 2:47:46 +epoch [28/50] batch [160/500] time 0.882 (0.887) data 0.000 (0.005) loss 1.0684 (1.0362) acc 71.8750 (73.2031) lr 9.3721e-04 eta 2:47:37 +epoch [28/50] batch [165/500] time 0.839 (0.886) data 0.000 (0.005) loss 1.0996 (1.0351) acc 75.0000 (73.2955) lr 9.3721e-04 eta 2:47:25 +epoch [28/50] batch [170/500] time 0.872 (0.886) data 0.000 (0.005) loss 1.1475 (1.0361) acc 81.2500 (73.3640) lr 9.3721e-04 eta 2:47:20 +epoch [28/50] batch [175/500] time 0.868 (0.886) data 0.000 (0.005) loss 1.2617 (1.0395) acc 68.7500 (73.3750) lr 9.3721e-04 eta 2:47:13 +epoch [28/50] batch [180/500] time 0.903 (0.886) data 0.000 (0.005) loss 0.6812 (1.0421) acc 78.1250 (73.3160) lr 9.3721e-04 eta 2:47:09 +epoch [28/50] batch [185/500] time 0.898 (0.886) data 0.000 (0.005) loss 1.0859 (1.0403) acc 78.1250 (73.4628) lr 9.3721e-04 eta 2:47:03 +epoch [28/50] batch [190/500] time 0.854 (0.886) data 0.000 (0.004) loss 1.0703 (1.0415) acc 71.8750 (73.4046) lr 9.3721e-04 eta 2:46:55 +epoch [28/50] batch [195/500] time 0.853 (0.885) data 0.000 (0.004) loss 1.2646 (1.0411) acc 62.5000 (73.3654) lr 9.3721e-04 eta 2:46:46 +epoch [28/50] batch [200/500] time 0.877 (0.885) data 0.000 (0.004) loss 0.8350 (1.0390) acc 78.1250 (73.4531) lr 9.3721e-04 eta 2:46:40 +epoch [28/50] batch [205/500] time 0.869 (0.885) data 0.000 (0.004) loss 1.1074 (1.0386) acc 71.8750 (73.3689) lr 9.3721e-04 eta 2:46:33 +epoch [28/50] batch [210/500] time 0.981 (0.885) data 0.000 (0.004) loss 1.7080 (1.0447) acc 53.1250 (73.0952) lr 9.3721e-04 eta 2:46:33 +epoch [28/50] batch [215/500] time 0.889 (0.885) data 0.000 (0.004) loss 0.7910 (1.0451) acc 78.1250 (73.1831) lr 9.3721e-04 eta 2:46:25 +epoch [28/50] batch [220/500] time 0.874 (0.884) data 0.000 (0.004) loss 1.0889 (1.0464) acc 71.8750 (73.2102) lr 9.3721e-04 eta 2:46:16 +epoch [28/50] batch [225/500] time 0.870 (0.884) data 0.000 (0.004) loss 0.8887 (1.0513) acc 75.0000 (73.1389) lr 9.3721e-04 eta 2:46:12 +epoch [28/50] batch [230/500] time 0.906 (0.885) data 0.000 (0.004) loss 1.1045 (1.0522) acc 71.8750 (73.1658) lr 9.3721e-04 eta 2:46:10 +epoch [28/50] batch [235/500] time 0.927 (0.885) data 0.000 (0.004) loss 1.3516 (1.0526) acc 75.0000 (73.2314) lr 9.3721e-04 eta 2:46:08 +epoch [28/50] batch [240/500] time 0.896 (0.885) data 0.000 (0.004) loss 1.9951 (1.0563) acc 59.3750 (73.1901) lr 9.3721e-04 eta 2:46:03 +epoch [28/50] batch [245/500] time 0.896 (0.885) data 0.000 (0.004) loss 1.0566 (1.0645) acc 75.0000 (73.0867) lr 9.3721e-04 eta 2:45:58 +epoch [28/50] batch [250/500] time 0.868 (0.885) data 0.000 (0.003) loss 1.0850 (1.0627) acc 75.0000 (73.1625) lr 9.3721e-04 eta 2:45:52 +epoch [28/50] batch [255/500] time 0.887 (0.885) data 0.000 (0.003) loss 1.0303 (1.0643) acc 75.0000 (73.1495) lr 9.3721e-04 eta 2:45:46 +epoch [28/50] batch [260/500] time 0.876 (0.885) data 0.000 (0.003) loss 1.1816 (1.0624) acc 68.7500 (73.2091) lr 9.3721e-04 eta 2:45:41 +epoch [28/50] batch [265/500] time 0.897 (0.885) data 0.000 (0.003) loss 1.8320 (1.0656) acc 56.2500 (73.1014) lr 9.3721e-04 eta 2:45:37 +epoch [28/50] batch [270/500] time 0.855 (0.884) data 0.000 (0.003) loss 1.1982 (1.0650) acc 62.5000 (73.1019) lr 9.3721e-04 eta 2:45:31 +epoch [28/50] batch [275/500] time 0.920 (0.885) data 0.000 (0.003) loss 0.8687 (1.0678) acc 81.2500 (73.1250) lr 9.3721e-04 eta 2:45:28 +epoch [28/50] batch [280/500] time 0.864 (0.884) data 0.000 (0.003) loss 1.5830 (1.0694) acc 71.8750 (73.0915) lr 9.3721e-04 eta 2:45:22 +epoch [28/50] batch [285/500] time 0.882 (0.884) data 0.000 (0.003) loss 1.2930 (1.0708) acc 53.1250 (73.0263) lr 9.3721e-04 eta 2:45:16 +epoch [28/50] batch [290/500] time 0.919 (0.884) data 0.000 (0.003) loss 0.8423 (1.0728) acc 71.8750 (72.9634) lr 9.3721e-04 eta 2:45:14 +epoch [28/50] batch [295/500] time 0.889 (0.885) data 0.000 (0.003) loss 1.2041 (1.0715) acc 75.0000 (73.0297) lr 9.3721e-04 eta 2:45:10 +epoch [28/50] batch [300/500] time 0.885 (0.884) data 0.000 (0.003) loss 1.7705 (1.0817) acc 65.6250 (72.8229) lr 9.3721e-04 eta 2:45:04 +epoch [28/50] batch [305/500] time 0.882 (0.884) data 0.000 (0.003) loss 0.5679 (1.0786) acc 78.1250 (72.8996) lr 9.3721e-04 eta 2:45:01 +epoch [28/50] batch [310/500] time 0.924 (0.885) data 0.000 (0.003) loss 1.3955 (1.0791) acc 68.7500 (72.8629) lr 9.3721e-04 eta 2:45:00 +epoch [28/50] batch [315/500] time 0.911 (0.885) data 0.000 (0.003) loss 1.0205 (1.0788) acc 75.0000 (72.8472) lr 9.3721e-04 eta 2:44:54 +epoch [28/50] batch [320/500] time 0.857 (0.884) data 0.000 (0.003) loss 1.1533 (1.0784) acc 62.5000 (72.8223) lr 9.3721e-04 eta 2:44:47 +epoch [28/50] batch [325/500] time 0.889 (0.884) data 0.000 (0.003) loss 0.9536 (1.0784) acc 81.2500 (72.8365) lr 9.3721e-04 eta 2:44:42 +epoch [28/50] batch [330/500] time 0.899 (0.884) data 0.000 (0.003) loss 1.0791 (1.0802) acc 68.7500 (72.8314) lr 9.3721e-04 eta 2:44:38 +epoch [28/50] batch [335/500] time 0.866 (0.884) data 0.000 (0.003) loss 0.9629 (1.0817) acc 81.2500 (72.8358) lr 9.3721e-04 eta 2:44:31 +epoch [28/50] batch [340/500] time 0.864 (0.884) data 0.000 (0.003) loss 0.5317 (1.0799) acc 90.6250 (72.9136) lr 9.3721e-04 eta 2:44:26 +epoch [28/50] batch [345/500] time 0.889 (0.884) data 0.000 (0.003) loss 0.9380 (1.0798) acc 68.7500 (72.7989) lr 9.3721e-04 eta 2:44:20 +epoch [28/50] batch [350/500] time 0.879 (0.884) data 0.000 (0.003) loss 1.4658 (1.0801) acc 59.3750 (72.7411) lr 9.3721e-04 eta 2:44:13 +epoch [28/50] batch [355/500] time 0.868 (0.884) data 0.000 (0.003) loss 0.8184 (1.0833) acc 81.2500 (72.6849) lr 9.3721e-04 eta 2:44:11 +epoch [28/50] batch [360/500] time 0.912 (0.884) data 0.000 (0.002) loss 0.7881 (1.0840) acc 78.1250 (72.6997) lr 9.3721e-04 eta 2:44:06 +epoch [28/50] batch [365/500] time 0.874 (0.884) data 0.000 (0.002) loss 0.9185 (1.0802) acc 78.1250 (72.7654) lr 9.3721e-04 eta 2:44:04 +epoch [28/50] batch [370/500] time 0.887 (0.884) data 0.000 (0.002) loss 0.9185 (1.0778) acc 87.5000 (72.8463) lr 9.3721e-04 eta 2:43:57 +epoch [28/50] batch [375/500] time 0.858 (0.884) data 0.000 (0.002) loss 1.7969 (1.0793) acc 59.3750 (72.8583) lr 9.3721e-04 eta 2:43:52 +epoch [28/50] batch [380/500] time 0.845 (0.884) data 0.000 (0.002) loss 1.0879 (1.0778) acc 75.0000 (72.9112) lr 9.3721e-04 eta 2:43:48 +epoch [28/50] batch [385/500] time 0.858 (0.884) data 0.000 (0.002) loss 0.6582 (1.0754) acc 84.3750 (73.0032) lr 9.3721e-04 eta 2:43:41 +epoch [28/50] batch [390/500] time 0.884 (0.883) data 0.000 (0.002) loss 1.3623 (1.0767) acc 68.7500 (72.9888) lr 9.3721e-04 eta 2:43:35 +epoch [28/50] batch [395/500] time 0.926 (0.884) data 0.000 (0.002) loss 0.4746 (1.0743) acc 78.1250 (73.0380) lr 9.3721e-04 eta 2:43:31 +epoch [28/50] batch [400/500] time 0.883 (0.884) data 0.000 (0.002) loss 1.0049 (1.0734) acc 68.7500 (73.0703) lr 9.3721e-04 eta 2:43:27 +epoch [28/50] batch [405/500] time 0.901 (0.884) data 0.000 (0.002) loss 1.2549 (1.0742) acc 71.8750 (73.0401) lr 9.3721e-04 eta 2:43:23 +epoch [28/50] batch [410/500] time 0.864 (0.884) data 0.000 (0.002) loss 1.1016 (1.0718) acc 71.8750 (73.0793) lr 9.3721e-04 eta 2:43:18 +epoch [28/50] batch [415/500] time 0.898 (0.884) data 0.000 (0.002) loss 0.9487 (1.0718) acc 75.0000 (73.0648) lr 9.3721e-04 eta 2:43:14 +epoch [28/50] batch [420/500] time 0.870 (0.884) data 0.000 (0.002) loss 0.6431 (1.0716) acc 81.2500 (73.0432) lr 9.3721e-04 eta 2:43:09 +epoch [28/50] batch [425/500] time 0.908 (0.884) data 0.000 (0.002) loss 1.1465 (1.0729) acc 71.8750 (73.0221) lr 9.3721e-04 eta 2:43:05 +epoch [28/50] batch [430/500] time 0.895 (0.884) data 0.000 (0.002) loss 1.1621 (1.0757) acc 68.7500 (72.9942) lr 9.3721e-04 eta 2:43:04 +epoch [28/50] batch [435/500] time 0.887 (0.884) data 0.000 (0.002) loss 0.6196 (1.0747) acc 81.2500 (73.0172) lr 9.3721e-04 eta 2:42:59 +epoch [28/50] batch [440/500] time 0.918 (0.884) data 0.000 (0.002) loss 0.9414 (1.0750) acc 81.2500 (73.0256) lr 9.3721e-04 eta 2:42:58 +epoch [28/50] batch [445/500] time 0.908 (0.884) data 0.000 (0.002) loss 0.8838 (1.0731) acc 75.0000 (73.0688) lr 9.3721e-04 eta 2:42:55 +epoch [28/50] batch [450/500] time 0.863 (0.884) data 0.000 (0.002) loss 1.4727 (1.0748) acc 71.8750 (73.0486) lr 9.3721e-04 eta 2:42:51 +epoch [28/50] batch [455/500] time 0.878 (0.884) data 0.000 (0.002) loss 1.6289 (1.0747) acc 65.6250 (73.0838) lr 9.3721e-04 eta 2:42:49 +epoch [28/50] batch [460/500] time 0.881 (0.884) data 0.000 (0.002) loss 0.5396 (1.0745) acc 84.3750 (73.0842) lr 9.3721e-04 eta 2:42:43 +epoch [28/50] batch [465/500] time 0.931 (0.884) data 0.000 (0.002) loss 0.8730 (1.0750) acc 81.2500 (73.0578) lr 9.3721e-04 eta 2:42:40 +epoch [28/50] batch [470/500] time 0.904 (0.885) data 0.000 (0.002) loss 0.9307 (1.0770) acc 75.0000 (73.0186) lr 9.3721e-04 eta 2:42:38 +epoch [28/50] batch [475/500] time 0.865 (0.885) data 0.000 (0.002) loss 1.5234 (1.0772) acc 59.3750 (73.0132) lr 9.3721e-04 eta 2:42:35 +epoch [28/50] batch [480/500] time 0.887 (0.885) data 0.000 (0.002) loss 1.1309 (1.0756) acc 75.0000 (73.0664) lr 9.3721e-04 eta 2:42:29 +epoch [28/50] batch [485/500] time 0.872 (0.885) data 0.000 (0.002) loss 1.3955 (1.0778) acc 65.6250 (73.0541) lr 9.3721e-04 eta 2:42:24 +epoch [28/50] batch [490/500] time 0.891 (0.885) data 0.000 (0.002) loss 0.8267 (1.0791) acc 81.2500 (73.0485) lr 9.3721e-04 eta 2:42:19 +epoch [28/50] batch [495/500] time 0.882 (0.884) data 0.000 (0.002) loss 1.3574 (1.0803) acc 56.2500 (73.0051) lr 9.3721e-04 eta 2:42:13 +epoch [28/50] batch [500/500] time 0.858 (0.885) data 0.000 (0.002) loss 1.1846 (1.0823) acc 68.7500 (72.9625) lr 8.7467e-04 eta 2:42:09 +epoch [29/50] batch [5/500] time 0.871 (1.021) data 0.000 (0.140) loss 1.1133 (1.1431) acc 75.0000 (67.5000) lr 8.7467e-04 eta 3:07:07 +epoch [29/50] batch [10/500] time 0.878 (0.957) data 0.000 (0.070) loss 1.8525 (1.2104) acc 59.3750 (66.2500) lr 8.7467e-04 eta 2:55:15 +epoch [29/50] batch [15/500] time 0.868 (0.932) data 0.000 (0.047) loss 1.0156 (1.2199) acc 65.6250 (65.6250) lr 8.7467e-04 eta 2:50:42 +epoch [29/50] batch [20/500] time 0.883 (0.918) data 0.000 (0.035) loss 0.5757 (1.1381) acc 84.3750 (67.9688) lr 8.7467e-04 eta 2:48:04 +epoch [29/50] batch [25/500] time 0.887 (0.912) data 0.000 (0.028) loss 0.9062 (1.1140) acc 81.2500 (69.1250) lr 8.7467e-04 eta 2:46:47 +epoch [29/50] batch [30/500] time 0.881 (0.906) data 0.000 (0.024) loss 0.4546 (1.0509) acc 87.5000 (70.5208) lr 8.7467e-04 eta 2:45:37 +epoch [29/50] batch [35/500] time 0.864 (0.901) data 0.000 (0.020) loss 1.0410 (1.0816) acc 78.1250 (70.4464) lr 8.7467e-04 eta 2:44:37 +epoch [29/50] batch [40/500] time 1.028 (0.901) data 0.000 (0.018) loss 0.7998 (1.0567) acc 87.5000 (71.5625) lr 8.7467e-04 eta 2:44:39 +epoch [29/50] batch [45/500] time 0.875 (0.899) data 0.000 (0.016) loss 0.4253 (1.0392) acc 90.6250 (72.3611) lr 8.7467e-04 eta 2:44:10 +epoch [29/50] batch [50/500] time 0.881 (0.897) data 0.000 (0.014) loss 0.9170 (1.0185) acc 71.8750 (72.9375) lr 8.7467e-04 eta 2:43:44 +epoch [29/50] batch [55/500] time 0.884 (0.895) data 0.000 (0.013) loss 1.0352 (1.0278) acc 68.7500 (72.7841) lr 8.7467e-04 eta 2:43:15 +epoch [29/50] batch [60/500] time 0.841 (0.893) data 0.000 (0.012) loss 1.7861 (1.0465) acc 65.6250 (72.6562) lr 8.7467e-04 eta 2:42:44 +epoch [29/50] batch [65/500] time 0.878 (0.890) data 0.000 (0.011) loss 0.6826 (1.0416) acc 81.2500 (72.8365) lr 8.7467e-04 eta 2:42:12 +epoch [29/50] batch [70/500] time 0.870 (0.888) data 0.000 (0.010) loss 0.8184 (1.0432) acc 90.6250 (72.9018) lr 8.7467e-04 eta 2:41:48 +epoch [29/50] batch [75/500] time 0.891 (0.888) data 0.000 (0.010) loss 0.8037 (1.0516) acc 75.0000 (72.6250) lr 8.7467e-04 eta 2:41:40 +epoch [29/50] batch [80/500] time 0.876 (0.887) data 0.000 (0.009) loss 1.1787 (1.0682) acc 75.0000 (72.5391) lr 8.7467e-04 eta 2:41:25 +epoch [29/50] batch [85/500] time 0.858 (0.887) data 0.000 (0.008) loss 0.7476 (1.0709) acc 81.2500 (72.5000) lr 8.7467e-04 eta 2:41:22 +epoch [29/50] batch [90/500] time 0.885 (0.887) data 0.000 (0.008) loss 1.0596 (1.0733) acc 68.7500 (72.4653) lr 8.7467e-04 eta 2:41:19 +epoch [29/50] batch [95/500] time 0.872 (0.886) data 0.000 (0.008) loss 1.6465 (1.0700) acc 71.8750 (72.7632) lr 8.7467e-04 eta 2:41:06 +epoch [29/50] batch [100/500] time 0.879 (0.887) data 0.000 (0.007) loss 0.9219 (1.0755) acc 78.1250 (72.7812) lr 8.7467e-04 eta 2:41:04 +epoch [29/50] batch [105/500] time 0.882 (0.887) data 0.000 (0.007) loss 0.7261 (1.0683) acc 78.1250 (72.8571) lr 8.7467e-04 eta 2:41:01 +epoch [29/50] batch [110/500] time 0.850 (0.887) data 0.000 (0.007) loss 1.5225 (1.0769) acc 62.5000 (72.7273) lr 8.7467e-04 eta 2:40:56 +epoch [29/50] batch [115/500] time 0.900 (0.887) data 0.000 (0.006) loss 0.9771 (1.0680) acc 78.1250 (73.0163) lr 8.7467e-04 eta 2:40:50 +epoch [29/50] batch [120/500] time 0.908 (0.887) data 0.000 (0.006) loss 1.3770 (1.0633) acc 68.7500 (72.9688) lr 8.7467e-04 eta 2:40:46 +epoch [29/50] batch [125/500] time 0.860 (0.886) data 0.000 (0.006) loss 1.1992 (1.0585) acc 68.7500 (73.0250) lr 8.7467e-04 eta 2:40:33 +epoch [29/50] batch [130/500] time 0.877 (0.886) data 0.000 (0.006) loss 1.5879 (1.0612) acc 62.5000 (72.9567) lr 8.7467e-04 eta 2:40:28 +epoch [29/50] batch [135/500] time 0.896 (0.886) data 0.000 (0.005) loss 1.2861 (1.0607) acc 65.6250 (72.8241) lr 8.7467e-04 eta 2:40:26 +epoch [29/50] batch [140/500] time 0.894 (0.888) data 0.000 (0.005) loss 1.0264 (1.0618) acc 71.8750 (72.9241) lr 8.7467e-04 eta 2:40:40 +epoch [29/50] batch [145/500] time 0.866 (0.887) data 0.000 (0.005) loss 0.8682 (1.0628) acc 75.0000 (72.8879) lr 8.7467e-04 eta 2:40:32 +epoch [29/50] batch [150/500] time 0.867 (0.887) data 0.000 (0.005) loss 0.7246 (1.0565) acc 84.3750 (73.0417) lr 8.7467e-04 eta 2:40:25 +epoch [29/50] batch [155/500] time 0.858 (0.887) data 0.000 (0.005) loss 1.7119 (1.0604) acc 65.6250 (72.8629) lr 8.7467e-04 eta 2:40:15 +epoch [29/50] batch [160/500] time 0.889 (0.886) data 0.000 (0.005) loss 1.1807 (1.0672) acc 75.0000 (72.7539) lr 8.7467e-04 eta 2:40:07 +epoch [29/50] batch [165/500] time 0.889 (0.886) data 0.000 (0.004) loss 1.4160 (1.0659) acc 75.0000 (72.8788) lr 8.7467e-04 eta 2:39:57 +epoch [29/50] batch [170/500] time 0.861 (0.886) data 0.000 (0.004) loss 0.6611 (1.0667) acc 81.2500 (72.7941) lr 8.7467e-04 eta 2:39:50 +epoch [29/50] batch [175/500] time 0.929 (0.886) data 0.000 (0.004) loss 2.3926 (1.0732) acc 53.1250 (72.6786) lr 8.7467e-04 eta 2:39:52 +epoch [29/50] batch [180/500] time 0.914 (0.887) data 0.000 (0.004) loss 0.9663 (1.0734) acc 68.7500 (72.6042) lr 8.7467e-04 eta 2:39:52 +epoch [29/50] batch [185/500] time 0.886 (0.887) data 0.000 (0.004) loss 1.6729 (1.0802) acc 65.6250 (72.6182) lr 8.7467e-04 eta 2:39:54 +epoch [29/50] batch [190/500] time 0.845 (0.887) data 0.000 (0.004) loss 0.6064 (1.0790) acc 81.2500 (72.6316) lr 8.7467e-04 eta 2:39:46 +epoch [29/50] batch [195/500] time 0.908 (0.887) data 0.000 (0.004) loss 1.4229 (1.0865) acc 68.7500 (72.5481) lr 8.7467e-04 eta 2:39:42 +epoch [29/50] batch [200/500] time 0.885 (0.887) data 0.000 (0.004) loss 0.9258 (1.0836) acc 75.0000 (72.5625) lr 8.7467e-04 eta 2:39:34 +epoch [29/50] batch [205/500] time 0.883 (0.886) data 0.000 (0.004) loss 1.1123 (1.0838) acc 78.1250 (72.6067) lr 8.7467e-04 eta 2:39:27 +epoch [29/50] batch [210/500] time 0.877 (0.886) data 0.000 (0.004) loss 0.7705 (1.0821) acc 71.8750 (72.6042) lr 8.7467e-04 eta 2:39:20 +epoch [29/50] batch [215/500] time 0.902 (0.886) data 0.000 (0.003) loss 1.1699 (1.0814) acc 75.0000 (72.6163) lr 8.7467e-04 eta 2:39:15 +epoch [29/50] batch [220/500] time 0.855 (0.886) data 0.000 (0.003) loss 1.5742 (1.0828) acc 59.3750 (72.6136) lr 8.7467e-04 eta 2:39:10 +epoch [29/50] batch [225/500] time 0.906 (0.886) data 0.000 (0.003) loss 1.2754 (1.0875) acc 68.7500 (72.5694) lr 8.7467e-04 eta 2:39:04 +epoch [29/50] batch [230/500] time 0.884 (0.886) data 0.000 (0.003) loss 1.5752 (1.0916) acc 71.8750 (72.5272) lr 8.7467e-04 eta 2:39:01 +epoch [29/50] batch [235/500] time 0.905 (0.886) data 0.000 (0.003) loss 1.1982 (1.0924) acc 75.0000 (72.5399) lr 8.7467e-04 eta 2:38:59 +epoch [29/50] batch [240/500] time 0.860 (0.886) data 0.000 (0.003) loss 1.0850 (1.0911) acc 75.0000 (72.5781) lr 8.7467e-04 eta 2:38:54 +epoch [29/50] batch [245/500] time 0.873 (0.886) data 0.000 (0.003) loss 0.6689 (1.0890) acc 68.7500 (72.5765) lr 8.7467e-04 eta 2:38:52 +epoch [29/50] batch [250/500] time 0.902 (0.886) data 0.000 (0.003) loss 0.9199 (1.0870) acc 78.1250 (72.6125) lr 8.7467e-04 eta 2:38:49 +epoch [29/50] batch [255/500] time 0.885 (0.886) data 0.000 (0.003) loss 0.4158 (1.0842) acc 87.5000 (72.6838) lr 8.7467e-04 eta 2:38:44 +epoch [29/50] batch [260/500] time 0.882 (0.886) data 0.000 (0.003) loss 0.7310 (1.0841) acc 81.2500 (72.7043) lr 8.7467e-04 eta 2:38:39 +epoch [29/50] batch [265/500] time 0.881 (0.886) data 0.000 (0.003) loss 1.3027 (1.0850) acc 71.8750 (72.7712) lr 8.7467e-04 eta 2:38:34 +epoch [29/50] batch [270/500] time 0.859 (0.886) data 0.000 (0.003) loss 1.2949 (1.0846) acc 68.7500 (72.7546) lr 8.7467e-04 eta 2:38:27 +epoch [29/50] batch [275/500] time 0.885 (0.886) data 0.000 (0.003) loss 0.6758 (1.0856) acc 75.0000 (72.7500) lr 8.7467e-04 eta 2:38:23 +epoch [29/50] batch [280/500] time 0.862 (0.886) data 0.000 (0.003) loss 1.0439 (1.0830) acc 71.8750 (72.8460) lr 8.7467e-04 eta 2:38:16 +epoch [29/50] batch [285/500] time 0.875 (0.886) data 0.000 (0.003) loss 0.4417 (1.0818) acc 93.7500 (72.9386) lr 8.7467e-04 eta 2:38:16 +epoch [29/50] batch [290/500] time 0.887 (0.886) data 0.000 (0.003) loss 0.6670 (1.0780) acc 78.1250 (72.9849) lr 8.7467e-04 eta 2:38:10 +epoch [29/50] batch [295/500] time 0.877 (0.886) data 0.000 (0.003) loss 0.6973 (1.0741) acc 78.1250 (73.0508) lr 8.7467e-04 eta 2:38:06 +epoch [29/50] batch [300/500] time 0.899 (0.886) data 0.000 (0.003) loss 1.3271 (1.0767) acc 65.6250 (73.0104) lr 8.7467e-04 eta 2:38:00 +epoch [29/50] batch [305/500] time 0.870 (0.886) data 0.000 (0.003) loss 1.6953 (1.0764) acc 75.0000 (73.0328) lr 8.7467e-04 eta 2:37:57 +epoch [29/50] batch [310/500] time 0.892 (0.886) data 0.000 (0.002) loss 1.5283 (1.0802) acc 62.5000 (72.9536) lr 8.7467e-04 eta 2:37:54 +epoch [29/50] batch [315/500] time 0.844 (0.886) data 0.000 (0.002) loss 1.1299 (1.0795) acc 62.5000 (72.9365) lr 8.7467e-04 eta 2:37:49 +epoch [29/50] batch [320/500] time 0.869 (0.886) data 0.000 (0.002) loss 1.2471 (1.0779) acc 71.8750 (72.9590) lr 8.7467e-04 eta 2:37:43 +epoch [29/50] batch [325/500] time 0.881 (0.886) data 0.000 (0.002) loss 0.6909 (1.0760) acc 81.2500 (73.0288) lr 8.7467e-04 eta 2:37:37 +epoch [29/50] batch [330/500] time 0.906 (0.886) data 0.000 (0.002) loss 1.1865 (1.0718) acc 68.7500 (73.0871) lr 8.7467e-04 eta 2:37:34 +epoch [29/50] batch [335/500] time 0.880 (0.886) data 0.000 (0.002) loss 1.5371 (1.0718) acc 71.8750 (73.1343) lr 8.7467e-04 eta 2:37:30 +epoch [29/50] batch [340/500] time 0.892 (0.886) data 0.000 (0.002) loss 0.8774 (1.0700) acc 75.0000 (73.1710) lr 8.7467e-04 eta 2:37:27 +epoch [29/50] batch [345/500] time 0.879 (0.886) data 0.000 (0.002) loss 1.5078 (1.0718) acc 65.6250 (73.1431) lr 8.7467e-04 eta 2:37:21 +epoch [29/50] batch [350/500] time 0.882 (0.886) data 0.000 (0.002) loss 1.1475 (1.0700) acc 71.8750 (73.2500) lr 8.7467e-04 eta 2:37:17 +epoch [29/50] batch [355/500] time 0.895 (0.886) data 0.000 (0.002) loss 1.1934 (1.0702) acc 75.0000 (73.2130) lr 8.7467e-04 eta 2:37:12 +epoch [29/50] batch [360/500] time 0.895 (0.886) data 0.000 (0.002) loss 1.0566 (1.0683) acc 62.5000 (73.1597) lr 8.7467e-04 eta 2:37:08 +epoch [29/50] batch [365/500] time 0.871 (0.886) data 0.000 (0.002) loss 1.4307 (1.0697) acc 65.6250 (73.1421) lr 8.7467e-04 eta 2:37:04 +epoch [29/50] batch [370/500] time 0.912 (0.886) data 0.000 (0.002) loss 0.8608 (1.0690) acc 71.8750 (73.1503) lr 8.7467e-04 eta 2:37:00 +epoch [29/50] batch [375/500] time 0.878 (0.886) data 0.001 (0.002) loss 0.4360 (1.0687) acc 90.6250 (73.1583) lr 8.7467e-04 eta 2:36:55 +epoch [29/50] batch [380/500] time 0.905 (0.886) data 0.000 (0.002) loss 0.7969 (1.0695) acc 75.0000 (73.1497) lr 8.7467e-04 eta 2:36:52 +epoch [29/50] batch [385/500] time 0.887 (0.886) data 0.001 (0.002) loss 1.0889 (1.0710) acc 71.8750 (73.0925) lr 8.7467e-04 eta 2:36:48 +epoch [29/50] batch [390/500] time 0.874 (0.886) data 0.000 (0.002) loss 1.2383 (1.0718) acc 71.8750 (73.0208) lr 8.7467e-04 eta 2:36:41 +epoch [29/50] batch [395/500] time 0.887 (0.886) data 0.000 (0.002) loss 1.0264 (1.0680) acc 65.6250 (73.0934) lr 8.7467e-04 eta 2:36:34 +epoch [29/50] batch [400/500] time 0.863 (0.886) data 0.000 (0.002) loss 1.0371 (1.0658) acc 75.0000 (73.1406) lr 8.7467e-04 eta 2:36:29 +epoch [29/50] batch [405/500] time 0.860 (0.886) data 0.000 (0.002) loss 1.3760 (1.0634) acc 65.6250 (73.1944) lr 8.7467e-04 eta 2:36:24 +epoch [29/50] batch [410/500] time 0.856 (0.886) data 0.000 (0.002) loss 1.3662 (1.0645) acc 65.6250 (73.1555) lr 8.7467e-04 eta 2:36:18 +epoch [29/50] batch [415/500] time 0.923 (0.886) data 0.001 (0.002) loss 1.5791 (1.0675) acc 75.0000 (73.1401) lr 8.7467e-04 eta 2:36:15 +epoch [29/50] batch [420/500] time 0.863 (0.886) data 0.000 (0.002) loss 0.7134 (1.0674) acc 87.5000 (73.1548) lr 8.7467e-04 eta 2:36:09 +epoch [29/50] batch [425/500] time 0.987 (0.886) data 0.000 (0.002) loss 1.3350 (1.0708) acc 62.5000 (73.0662) lr 8.7467e-04 eta 2:36:07 +epoch [29/50] batch [430/500] time 0.871 (0.886) data 0.000 (0.002) loss 0.9214 (1.0699) acc 71.8750 (73.0596) lr 8.7467e-04 eta 2:36:03 +epoch [29/50] batch [435/500] time 0.899 (0.886) data 0.000 (0.002) loss 1.3828 (1.0698) acc 71.8750 (73.0963) lr 8.7467e-04 eta 2:35:59 +epoch [29/50] batch [440/500] time 0.897 (0.886) data 0.000 (0.002) loss 0.8862 (1.0707) acc 84.3750 (73.0753) lr 8.7467e-04 eta 2:35:54 +epoch [29/50] batch [445/500] time 0.883 (0.886) data 0.000 (0.002) loss 0.9482 (1.0681) acc 71.8750 (73.1320) lr 8.7467e-04 eta 2:35:50 +epoch [29/50] batch [450/500] time 0.884 (0.886) data 0.000 (0.002) loss 1.2021 (1.0663) acc 78.1250 (73.1806) lr 8.7467e-04 eta 2:35:46 +epoch [29/50] batch [455/500] time 0.884 (0.886) data 0.000 (0.002) loss 1.5430 (1.0657) acc 68.7500 (73.2005) lr 8.7467e-04 eta 2:35:40 +epoch [29/50] batch [460/500] time 0.895 (0.886) data 0.000 (0.002) loss 1.6016 (1.0665) acc 65.6250 (73.1793) lr 8.7467e-04 eta 2:35:36 +epoch [29/50] batch [465/500] time 0.871 (0.886) data 0.000 (0.002) loss 1.7988 (1.0681) acc 65.6250 (73.1384) lr 8.7467e-04 eta 2:35:31 +epoch [29/50] batch [470/500] time 0.876 (0.886) data 0.000 (0.002) loss 1.5830 (1.0697) acc 68.7500 (73.1250) lr 8.7467e-04 eta 2:35:28 +epoch [29/50] batch [475/500] time 0.909 (0.886) data 0.000 (0.002) loss 0.6982 (1.0694) acc 81.2500 (73.0987) lr 8.7467e-04 eta 2:35:25 +epoch [29/50] batch [480/500] time 0.868 (0.886) data 0.000 (0.002) loss 0.4597 (1.0699) acc 84.3750 (73.0990) lr 8.7467e-04 eta 2:35:20 +epoch [29/50] batch [485/500] time 0.883 (0.886) data 0.000 (0.002) loss 1.2715 (1.0707) acc 71.8750 (73.0863) lr 8.7467e-04 eta 2:35:15 +epoch [29/50] batch [490/500] time 0.885 (0.886) data 0.000 (0.002) loss 0.8677 (1.0705) acc 71.8750 (73.0804) lr 8.7467e-04 eta 2:35:10 +epoch [29/50] batch [495/500] time 0.872 (0.886) data 0.000 (0.002) loss 0.9844 (1.0705) acc 71.8750 (73.0934) lr 8.7467e-04 eta 2:35:05 +epoch [29/50] batch [500/500] time 0.883 (0.886) data 0.000 (0.002) loss 0.8032 (1.0712) acc 81.2500 (73.0875) lr 8.1262e-04 eta 2:35:01 +epoch [30/50] batch [5/500] time 0.889 (1.031) data 0.000 (0.146) loss 1.8721 (1.4286) acc 59.3750 (63.7500) lr 8.1262e-04 eta 3:00:21 +epoch [30/50] batch [10/500] time 0.889 (0.953) data 0.000 (0.073) loss 0.6411 (1.1606) acc 78.1250 (70.0000) lr 8.1262e-04 eta 2:46:32 +epoch [30/50] batch [15/500] time 0.888 (0.938) data 0.000 (0.049) loss 1.1064 (1.1802) acc 65.6250 (71.0417) lr 8.1262e-04 eta 2:43:53 +epoch [30/50] batch [20/500] time 0.908 (0.926) data 0.000 (0.037) loss 1.0674 (1.1363) acc 62.5000 (71.0938) lr 8.1262e-04 eta 2:41:41 +epoch [30/50] batch [25/500] time 0.918 (0.922) data 0.000 (0.029) loss 1.0537 (1.0912) acc 65.6250 (72.6250) lr 8.1262e-04 eta 2:40:56 +epoch [30/50] batch [30/500] time 0.889 (0.915) data 0.000 (0.024) loss 1.0459 (1.0970) acc 71.8750 (72.8125) lr 8.1262e-04 eta 2:39:36 +epoch [30/50] batch [35/500] time 0.908 (0.910) data 0.000 (0.021) loss 1.4365 (1.1515) acc 59.3750 (71.5179) lr 8.1262e-04 eta 2:38:44 +epoch [30/50] batch [40/500] time 0.879 (0.908) data 0.000 (0.018) loss 0.6113 (1.1371) acc 84.3750 (71.9531) lr 8.1262e-04 eta 2:38:14 +epoch [30/50] batch [45/500] time 0.910 (0.906) data 0.000 (0.016) loss 1.2686 (1.1314) acc 65.6250 (72.0833) lr 8.1262e-04 eta 2:37:50 +epoch [30/50] batch [50/500] time 0.914 (0.904) data 0.000 (0.015) loss 1.5098 (1.1102) acc 59.3750 (72.3750) lr 8.1262e-04 eta 2:37:31 +epoch [30/50] batch [55/500] time 0.875 (0.902) data 0.000 (0.013) loss 1.3145 (1.1081) acc 75.0000 (72.6136) lr 8.1262e-04 eta 2:37:03 +epoch [30/50] batch [60/500] time 0.894 (0.901) data 0.000 (0.012) loss 0.9565 (1.1080) acc 78.1250 (72.4479) lr 8.1262e-04 eta 2:36:49 +epoch [30/50] batch [65/500] time 0.874 (0.900) data 0.000 (0.011) loss 0.8721 (1.1087) acc 81.2500 (72.5000) lr 8.1262e-04 eta 2:36:28 +epoch [30/50] batch [70/500] time 0.901 (0.898) data 0.000 (0.011) loss 0.9380 (1.0986) acc 71.8750 (72.5893) lr 8.1262e-04 eta 2:36:09 +epoch [30/50] batch [75/500] time 0.884 (0.897) data 0.000 (0.010) loss 1.1875 (1.1040) acc 59.3750 (72.4167) lr 8.1262e-04 eta 2:35:51 +epoch [30/50] batch [80/500] time 0.878 (0.897) data 0.000 (0.009) loss 1.3457 (1.1185) acc 68.7500 (72.3828) lr 8.1262e-04 eta 2:35:47 +epoch [30/50] batch [85/500] time 0.886 (0.898) data 0.000 (0.009) loss 1.5557 (1.1226) acc 59.3750 (72.2426) lr 8.1262e-04 eta 2:35:47 +epoch [30/50] batch [90/500] time 0.902 (0.897) data 0.000 (0.008) loss 0.9043 (1.1159) acc 78.1250 (72.5347) lr 8.1262e-04 eta 2:35:39 +epoch [30/50] batch [95/500] time 0.890 (0.897) data 0.000 (0.008) loss 1.0293 (1.1130) acc 71.8750 (72.4671) lr 8.1262e-04 eta 2:35:32 +epoch [30/50] batch [100/500] time 0.878 (0.896) data 0.000 (0.008) loss 0.9697 (1.1018) acc 81.2500 (72.6250) lr 8.1262e-04 eta 2:35:18 +epoch [30/50] batch [105/500] time 0.875 (0.896) data 0.000 (0.007) loss 0.9331 (1.1068) acc 81.2500 (72.6190) lr 8.1262e-04 eta 2:35:12 +epoch [30/50] batch [110/500] time 0.893 (0.897) data 0.000 (0.007) loss 0.5737 (1.0998) acc 90.6250 (72.7841) lr 8.1262e-04 eta 2:35:15 +epoch [30/50] batch [115/500] time 0.890 (0.896) data 0.000 (0.007) loss 0.7461 (1.0990) acc 75.0000 (72.8804) lr 8.1262e-04 eta 2:35:08 +epoch [30/50] batch [120/500] time 0.904 (0.896) data 0.000 (0.006) loss 0.7842 (1.1004) acc 81.2500 (73.0208) lr 8.1262e-04 eta 2:35:01 +epoch [30/50] batch [125/500] time 0.881 (0.895) data 0.000 (0.006) loss 1.2500 (1.1005) acc 65.6250 (73.0500) lr 8.1262e-04 eta 2:34:46 +epoch [30/50] batch [130/500] time 0.865 (0.895) data 0.000 (0.006) loss 0.9331 (1.1010) acc 68.7500 (73.0288) lr 8.1262e-04 eta 2:34:39 +epoch [30/50] batch [135/500] time 0.904 (0.895) data 0.000 (0.006) loss 1.2080 (1.0979) acc 75.0000 (73.1713) lr 8.1262e-04 eta 2:34:35 +epoch [30/50] batch [140/500] time 0.887 (0.895) data 0.000 (0.005) loss 1.0137 (1.1013) acc 59.3750 (72.9018) lr 8.1262e-04 eta 2:34:30 +epoch [30/50] batch [145/500] time 0.925 (0.895) data 0.000 (0.005) loss 1.4131 (1.1082) acc 71.8750 (72.8879) lr 8.1262e-04 eta 2:34:23 +epoch [30/50] batch [150/500] time 0.901 (0.895) data 0.000 (0.005) loss 1.2656 (1.1134) acc 71.8750 (72.8750) lr 8.1262e-04 eta 2:34:22 +epoch [30/50] batch [155/500] time 0.870 (0.895) data 0.000 (0.005) loss 0.8789 (1.1069) acc 71.8750 (72.8831) lr 8.1262e-04 eta 2:34:22 +epoch [30/50] batch [160/500] time 0.903 (0.895) data 0.000 (0.005) loss 0.9976 (1.1052) acc 71.8750 (72.8516) lr 8.1262e-04 eta 2:34:15 +epoch [30/50] batch [165/500] time 0.875 (0.895) data 0.000 (0.005) loss 1.7529 (1.1086) acc 59.3750 (72.7652) lr 8.1262e-04 eta 2:34:05 +epoch [30/50] batch [170/500] time 0.886 (0.894) data 0.000 (0.005) loss 0.7471 (1.1027) acc 84.3750 (72.9779) lr 8.1262e-04 eta 2:34:00 +epoch [30/50] batch [175/500] time 0.895 (0.894) data 0.000 (0.004) loss 0.9375 (1.1041) acc 75.0000 (72.9643) lr 8.1262e-04 eta 2:33:48 +epoch [30/50] batch [180/500] time 0.880 (0.894) data 0.000 (0.004) loss 1.2822 (1.1037) acc 65.6250 (72.9514) lr 8.1262e-04 eta 2:33:41 +epoch [30/50] batch [185/500] time 0.874 (0.893) data 0.000 (0.004) loss 1.3076 (1.1047) acc 65.6250 (72.8716) lr 8.1262e-04 eta 2:33:30 +epoch [30/50] batch [190/500] time 0.867 (0.893) data 0.000 (0.004) loss 1.3770 (1.1057) acc 68.7500 (72.9276) lr 8.1262e-04 eta 2:33:22 +epoch [30/50] batch [195/500] time 0.871 (0.892) data 0.000 (0.004) loss 1.0586 (1.1072) acc 75.0000 (72.8846) lr 8.1262e-04 eta 2:33:16 +epoch [30/50] batch [200/500] time 0.896 (0.892) data 0.000 (0.004) loss 1.1533 (1.1068) acc 59.3750 (72.8281) lr 8.1262e-04 eta 2:33:10 +epoch [30/50] batch [205/500] time 0.858 (0.892) data 0.000 (0.004) loss 1.0205 (1.1051) acc 59.3750 (72.7439) lr 8.1262e-04 eta 2:33:00 +epoch [30/50] batch [210/500] time 0.862 (0.891) data 0.000 (0.004) loss 1.0537 (1.1040) acc 71.8750 (72.8274) lr 8.1262e-04 eta 2:32:51 +epoch [30/50] batch [215/500] time 0.904 (0.891) data 0.000 (0.004) loss 1.3496 (1.1059) acc 62.5000 (72.7180) lr 8.1262e-04 eta 2:32:43 +epoch [30/50] batch [220/500] time 0.873 (0.891) data 0.000 (0.004) loss 1.0166 (1.1042) acc 75.0000 (72.6989) lr 8.1262e-04 eta 2:32:35 +epoch [30/50] batch [225/500] time 0.896 (0.890) data 0.000 (0.003) loss 1.4072 (1.1063) acc 65.6250 (72.6806) lr 8.1262e-04 eta 2:32:28 +epoch [30/50] batch [230/500] time 0.881 (0.890) data 0.000 (0.003) loss 1.1133 (1.1034) acc 81.2500 (72.8125) lr 8.1262e-04 eta 2:32:22 +epoch [30/50] batch [235/500] time 0.876 (0.890) data 0.000 (0.003) loss 0.9370 (1.1039) acc 75.0000 (72.7793) lr 8.1262e-04 eta 2:32:17 +epoch [30/50] batch [240/500] time 0.868 (0.890) data 0.000 (0.003) loss 1.0967 (1.1015) acc 68.7500 (72.6953) lr 8.1262e-04 eta 2:32:08 +epoch [30/50] batch [245/500] time 0.873 (0.890) data 0.000 (0.003) loss 1.1025 (1.0971) acc 71.8750 (72.7934) lr 8.1262e-04 eta 2:32:03 +epoch [30/50] batch [250/500] time 0.885 (0.889) data 0.000 (0.003) loss 1.8867 (1.1032) acc 56.2500 (72.7000) lr 8.1262e-04 eta 2:31:55 +epoch [30/50] batch [255/500] time 0.888 (0.890) data 0.000 (0.003) loss 0.9487 (1.1051) acc 71.8750 (72.6348) lr 8.1262e-04 eta 2:31:56 +epoch [30/50] batch [260/500] time 0.907 (0.890) data 0.001 (0.003) loss 1.1064 (1.1026) acc 81.2500 (72.7043) lr 8.1262e-04 eta 2:31:51 +epoch [30/50] batch [265/500] time 0.885 (0.890) data 0.000 (0.003) loss 1.5312 (1.1002) acc 59.3750 (72.7476) lr 8.1262e-04 eta 2:31:46 +epoch [30/50] batch [270/500] time 0.880 (0.890) data 0.000 (0.003) loss 0.3379 (1.0971) acc 90.6250 (72.8125) lr 8.1262e-04 eta 2:31:41 +epoch [30/50] batch [275/500] time 0.872 (0.889) data 0.000 (0.003) loss 1.5176 (1.0967) acc 65.6250 (72.7955) lr 8.1262e-04 eta 2:31:34 +epoch [30/50] batch [280/500] time 0.874 (0.889) data 0.000 (0.003) loss 1.0459 (1.0943) acc 71.8750 (72.7790) lr 8.1262e-04 eta 2:31:29 +epoch [30/50] batch [285/500] time 0.896 (0.889) data 0.000 (0.003) loss 1.2451 (1.0932) acc 68.7500 (72.7412) lr 8.1262e-04 eta 2:31:23 +epoch [30/50] batch [290/500] time 0.877 (0.889) data 0.000 (0.003) loss 0.9653 (1.0959) acc 71.8750 (72.7371) lr 8.1262e-04 eta 2:31:20 +epoch [30/50] batch [295/500] time 1.003 (0.890) data 0.000 (0.003) loss 0.6978 (1.0973) acc 87.5000 (72.7013) lr 8.1262e-04 eta 2:31:18 +epoch [30/50] batch [300/500] time 0.889 (0.889) data 0.000 (0.003) loss 0.7148 (1.1020) acc 84.3750 (72.6250) lr 8.1262e-04 eta 2:31:12 +epoch [30/50] batch [305/500] time 0.903 (0.889) data 0.000 (0.003) loss 0.6904 (1.1003) acc 81.2500 (72.6639) lr 8.1262e-04 eta 2:31:08 +epoch [30/50] batch [310/500] time 0.871 (0.889) data 0.000 (0.003) loss 0.6792 (1.0985) acc 84.3750 (72.7218) lr 8.1262e-04 eta 2:31:01 +epoch [30/50] batch [315/500] time 0.864 (0.889) data 0.000 (0.003) loss 0.9185 (1.0963) acc 84.3750 (72.8274) lr 8.1262e-04 eta 2:30:56 +epoch [30/50] batch [320/500] time 0.907 (0.889) data 0.000 (0.003) loss 0.9531 (1.0946) acc 81.2500 (72.9004) lr 8.1262e-04 eta 2:30:51 +epoch [30/50] batch [325/500] time 0.899 (0.889) data 0.000 (0.002) loss 0.9575 (1.0903) acc 65.6250 (72.9808) lr 8.1262e-04 eta 2:30:46 +epoch [30/50] batch [330/500] time 0.888 (0.889) data 0.000 (0.002) loss 0.9712 (1.0859) acc 75.0000 (73.0492) lr 8.1262e-04 eta 2:30:42 +epoch [30/50] batch [335/500] time 0.863 (0.889) data 0.000 (0.002) loss 0.7617 (1.0847) acc 75.0000 (73.0970) lr 8.1262e-04 eta 2:30:35 +epoch [30/50] batch [340/500] time 0.847 (0.889) data 0.000 (0.002) loss 1.1279 (1.0840) acc 75.0000 (73.1342) lr 8.1262e-04 eta 2:30:28 +epoch [30/50] batch [345/500] time 0.898 (0.889) data 0.000 (0.002) loss 0.9946 (1.0881) acc 68.7500 (73.0344) lr 8.1262e-04 eta 2:30:25 +epoch [30/50] batch [350/500] time 0.862 (0.889) data 0.000 (0.002) loss 1.5439 (1.0924) acc 65.6250 (72.9464) lr 8.1262e-04 eta 2:30:19 +epoch [30/50] batch [355/500] time 0.890 (0.889) data 0.000 (0.002) loss 2.1250 (1.0960) acc 50.0000 (72.8873) lr 8.1262e-04 eta 2:30:13 +epoch [30/50] batch [360/500] time 0.911 (0.889) data 0.000 (0.002) loss 1.0098 (1.0959) acc 75.0000 (72.8559) lr 8.1262e-04 eta 2:30:10 +epoch [30/50] batch [365/500] time 0.918 (0.889) data 0.000 (0.002) loss 1.1318 (1.0974) acc 75.0000 (72.8168) lr 8.1262e-04 eta 2:30:06 +epoch [30/50] batch [370/500] time 0.884 (0.889) data 0.000 (0.002) loss 1.1611 (1.0963) acc 81.2500 (72.9054) lr 8.1262e-04 eta 2:30:01 +epoch [30/50] batch [375/500] time 0.914 (0.889) data 0.000 (0.002) loss 1.4648 (1.0961) acc 65.6250 (72.9250) lr 8.1262e-04 eta 2:29:58 +epoch [30/50] batch [380/500] time 0.867 (0.889) data 0.000 (0.002) loss 1.1670 (1.0959) acc 71.8750 (72.9359) lr 8.1262e-04 eta 2:29:51 +epoch [30/50] batch [385/500] time 0.889 (0.888) data 0.000 (0.002) loss 0.6553 (1.0938) acc 90.6250 (73.0114) lr 8.1262e-04 eta 2:29:46 +epoch [30/50] batch [390/500] time 0.899 (0.888) data 0.000 (0.002) loss 0.9751 (1.0960) acc 71.8750 (72.9888) lr 8.1262e-04 eta 2:29:41 +epoch [30/50] batch [395/500] time 0.870 (0.889) data 0.000 (0.002) loss 0.7490 (1.0989) acc 84.3750 (72.9826) lr 8.1262e-04 eta 2:29:38 +epoch [30/50] batch [400/500] time 0.855 (0.888) data 0.000 (0.002) loss 1.0420 (1.0971) acc 78.1250 (73.0000) lr 8.1262e-04 eta 2:29:32 +epoch [30/50] batch [405/500] time 0.873 (0.888) data 0.000 (0.002) loss 0.8496 (1.0973) acc 65.6250 (72.9707) lr 8.1262e-04 eta 2:29:28 +epoch [30/50] batch [410/500] time 0.893 (0.888) data 0.000 (0.002) loss 1.4072 (1.0970) acc 68.7500 (72.9954) lr 8.1262e-04 eta 2:29:22 +epoch [30/50] batch [415/500] time 0.862 (0.888) data 0.000 (0.002) loss 0.9380 (1.0957) acc 81.2500 (73.0271) lr 8.1262e-04 eta 2:29:15 +epoch [30/50] batch [420/500] time 0.886 (0.888) data 0.000 (0.002) loss 1.0068 (1.0931) acc 75.0000 (73.0580) lr 8.1262e-04 eta 2:29:11 +epoch [30/50] batch [425/500] time 0.865 (0.888) data 0.000 (0.002) loss 0.5557 (1.0916) acc 90.6250 (73.1103) lr 8.1262e-04 eta 2:29:05 +epoch [30/50] batch [430/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.9355 (1.0893) acc 71.8750 (73.1395) lr 8.1262e-04 eta 2:29:00 +epoch [30/50] batch [435/500] time 0.884 (0.888) data 0.000 (0.002) loss 1.1816 (1.0895) acc 75.0000 (73.1394) lr 8.1262e-04 eta 2:28:56 +epoch [30/50] batch [440/500] time 0.853 (0.888) data 0.000 (0.002) loss 0.5977 (1.0871) acc 78.1250 (73.1605) lr 8.1262e-04 eta 2:28:52 +epoch [30/50] batch [445/500] time 0.881 (0.888) data 0.000 (0.002) loss 1.2207 (1.0853) acc 75.0000 (73.2093) lr 8.1262e-04 eta 2:28:48 +epoch [30/50] batch [450/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.3008 (1.0901) acc 62.5000 (73.1528) lr 8.1262e-04 eta 2:28:42 +epoch [30/50] batch [455/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.2998 (1.0914) acc 75.0000 (73.1662) lr 8.1262e-04 eta 2:28:37 +epoch [30/50] batch [460/500] time 0.889 (0.888) data 0.000 (0.002) loss 1.1816 (1.0918) acc 65.6250 (73.1250) lr 8.1262e-04 eta 2:28:31 +epoch [30/50] batch [465/500] time 0.898 (0.887) data 0.000 (0.002) loss 1.2275 (1.0948) acc 81.2500 (73.0847) lr 8.1262e-04 eta 2:28:25 +epoch [30/50] batch [470/500] time 0.891 (0.887) data 0.000 (0.002) loss 1.4551 (1.0943) acc 65.6250 (73.0851) lr 8.1262e-04 eta 2:28:19 +epoch [30/50] batch [475/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.2373 (1.0946) acc 62.5000 (73.0263) lr 8.1262e-04 eta 2:28:13 +epoch [30/50] batch [480/500] time 0.858 (0.887) data 0.000 (0.002) loss 1.3945 (1.0946) acc 62.5000 (73.0013) lr 8.1262e-04 eta 2:28:07 +epoch [30/50] batch [485/500] time 0.895 (0.887) data 0.001 (0.002) loss 1.3125 (1.0953) acc 65.6250 (72.9510) lr 8.1262e-04 eta 2:28:02 +epoch [30/50] batch [490/500] time 0.862 (0.887) data 0.000 (0.002) loss 0.9136 (1.0953) acc 65.6250 (72.9145) lr 8.1262e-04 eta 2:27:57 +epoch [30/50] batch [495/500] time 0.865 (0.887) data 0.000 (0.002) loss 0.7896 (1.0920) acc 78.1250 (72.9609) lr 8.1262e-04 eta 2:27:51 +epoch [30/50] batch [500/500] time 0.892 (0.887) data 0.000 (0.002) loss 0.8667 (1.0910) acc 78.1250 (72.9500) lr 7.5131e-04 eta 2:27:48 +epoch [31/50] batch [5/500] time 0.876 (1.021) data 0.000 (0.136) loss 1.1074 (1.1379) acc 65.6250 (73.7500) lr 7.5131e-04 eta 2:50:00 +epoch [31/50] batch [10/500] time 0.858 (0.944) data 0.000 (0.068) loss 0.5981 (1.0514) acc 78.1250 (75.6250) lr 7.5131e-04 eta 2:37:09 +epoch [31/50] batch [15/500] time 0.893 (0.925) data 0.000 (0.045) loss 0.9761 (0.9607) acc 78.1250 (76.8750) lr 7.5131e-04 eta 2:33:58 +epoch [31/50] batch [20/500] time 0.888 (0.915) data 0.000 (0.034) loss 1.1553 (1.0042) acc 75.0000 (76.2500) lr 7.5131e-04 eta 2:32:14 +epoch [31/50] batch [25/500] time 0.999 (0.914) data 0.000 (0.027) loss 0.5469 (1.0362) acc 84.3750 (75.7500) lr 7.5131e-04 eta 2:31:54 +epoch [31/50] batch [30/500] time 0.875 (0.909) data 0.000 (0.023) loss 1.1855 (1.0574) acc 75.0000 (74.8958) lr 7.5131e-04 eta 2:31:00 +epoch [31/50] batch [35/500] time 0.870 (0.905) data 0.000 (0.020) loss 1.3057 (1.0890) acc 59.3750 (73.3036) lr 7.5131e-04 eta 2:30:14 +epoch [31/50] batch [40/500] time 0.893 (0.904) data 0.000 (0.017) loss 1.3936 (1.0803) acc 65.6250 (73.2031) lr 7.5131e-04 eta 2:30:05 +epoch [31/50] batch [45/500] time 0.908 (0.903) data 0.000 (0.015) loss 1.7588 (1.0844) acc 53.1250 (72.9167) lr 7.5131e-04 eta 2:29:45 +epoch [31/50] batch [50/500] time 0.866 (0.901) data 0.000 (0.014) loss 1.2559 (1.0819) acc 65.6250 (72.8750) lr 7.5131e-04 eta 2:29:26 +epoch [31/50] batch [55/500] time 0.869 (0.899) data 0.000 (0.013) loss 0.7188 (1.0688) acc 78.1250 (73.0682) lr 7.5131e-04 eta 2:29:00 +epoch [31/50] batch [60/500] time 0.848 (0.897) data 0.000 (0.012) loss 0.7251 (1.0587) acc 81.2500 (73.1250) lr 7.5131e-04 eta 2:28:34 +epoch [31/50] batch [65/500] time 0.875 (0.894) data 0.000 (0.011) loss 1.0391 (1.0520) acc 68.7500 (73.3173) lr 7.5131e-04 eta 2:28:06 +epoch [31/50] batch [70/500] time 0.904 (0.894) data 0.000 (0.010) loss 1.4121 (1.0553) acc 65.6250 (73.3929) lr 7.5131e-04 eta 2:27:59 +epoch [31/50] batch [75/500] time 0.868 (0.893) data 0.000 (0.009) loss 0.9722 (1.0652) acc 81.2500 (73.2500) lr 7.5131e-04 eta 2:27:43 +epoch [31/50] batch [80/500] time 0.883 (0.892) data 0.000 (0.009) loss 1.5830 (1.0653) acc 53.1250 (73.2422) lr 7.5131e-04 eta 2:27:32 +epoch [31/50] batch [85/500] time 0.990 (0.893) data 0.000 (0.008) loss 1.0664 (1.0672) acc 68.7500 (73.3824) lr 7.5131e-04 eta 2:27:30 +epoch [31/50] batch [90/500] time 0.856 (0.891) data 0.000 (0.008) loss 0.7476 (1.0560) acc 84.3750 (73.6111) lr 7.5131e-04 eta 2:27:13 +epoch [31/50] batch [95/500] time 0.859 (0.890) data 0.000 (0.007) loss 0.8354 (1.0554) acc 81.2500 (73.6842) lr 7.5131e-04 eta 2:26:59 +epoch [31/50] batch [100/500] time 0.892 (0.890) data 0.000 (0.007) loss 0.5444 (1.0491) acc 81.2500 (73.8125) lr 7.5131e-04 eta 2:26:48 +epoch [31/50] batch [105/500] time 0.878 (0.889) data 0.000 (0.007) loss 0.7500 (1.0448) acc 78.1250 (73.9583) lr 7.5131e-04 eta 2:26:41 +epoch [31/50] batch [110/500] time 0.881 (0.889) data 0.000 (0.006) loss 0.7900 (1.0445) acc 84.3750 (74.0909) lr 7.5131e-04 eta 2:26:32 +epoch [31/50] batch [115/500] time 0.916 (0.889) data 0.000 (0.006) loss 1.8145 (1.0478) acc 65.6250 (74.0761) lr 7.5131e-04 eta 2:26:29 +epoch [31/50] batch [120/500] time 0.899 (0.889) data 0.000 (0.006) loss 1.4180 (1.0505) acc 68.7500 (74.1146) lr 7.5131e-04 eta 2:26:25 +epoch [31/50] batch [125/500] time 0.876 (0.889) data 0.000 (0.006) loss 0.7334 (1.0447) acc 84.3750 (74.2000) lr 7.5131e-04 eta 2:26:15 +epoch [31/50] batch [130/500] time 0.873 (0.889) data 0.000 (0.005) loss 1.2627 (1.0463) acc 59.3750 (74.0625) lr 7.5131e-04 eta 2:26:18 +epoch [31/50] batch [135/500] time 0.866 (0.889) data 0.000 (0.005) loss 1.2090 (1.0495) acc 68.7500 (73.7963) lr 7.5131e-04 eta 2:26:09 +epoch [31/50] batch [140/500] time 0.857 (0.888) data 0.000 (0.005) loss 1.7979 (1.0631) acc 56.2500 (73.4598) lr 7.5131e-04 eta 2:25:59 +epoch [31/50] batch [145/500] time 0.853 (0.888) data 0.000 (0.005) loss 1.0312 (1.0602) acc 75.0000 (73.5345) lr 7.5131e-04 eta 2:25:48 +epoch [31/50] batch [150/500] time 0.894 (0.888) data 0.000 (0.005) loss 0.6626 (1.0493) acc 78.1250 (73.7083) lr 7.5131e-04 eta 2:25:46 +epoch [31/50] batch [155/500] time 0.903 (0.888) data 0.000 (0.005) loss 1.3643 (1.0526) acc 68.7500 (73.6492) lr 7.5131e-04 eta 2:25:42 +epoch [31/50] batch [160/500] time 0.885 (0.888) data 0.000 (0.004) loss 0.9580 (1.0513) acc 81.2500 (73.6328) lr 7.5131e-04 eta 2:25:36 +epoch [31/50] batch [165/500] time 0.874 (0.887) data 0.000 (0.004) loss 1.6973 (1.0550) acc 50.0000 (73.3712) lr 7.5131e-04 eta 2:25:26 +epoch [31/50] batch [170/500] time 0.882 (0.887) data 0.000 (0.004) loss 0.8838 (1.0565) acc 68.7500 (73.3088) lr 7.5131e-04 eta 2:25:16 +epoch [31/50] batch [175/500] time 0.868 (0.886) data 0.000 (0.004) loss 0.7339 (1.0507) acc 71.8750 (73.4464) lr 7.5131e-04 eta 2:25:09 +epoch [31/50] batch [180/500] time 0.882 (0.886) data 0.000 (0.004) loss 1.1621 (1.0552) acc 68.7500 (73.3681) lr 7.5131e-04 eta 2:24:59 +epoch [31/50] batch [185/500] time 0.870 (0.885) data 0.000 (0.004) loss 0.9106 (1.0553) acc 71.8750 (73.3615) lr 7.5131e-04 eta 2:24:49 +epoch [31/50] batch [190/500] time 0.890 (0.885) data 0.000 (0.004) loss 1.3174 (1.0583) acc 68.7500 (73.4539) lr 7.5131e-04 eta 2:24:42 +epoch [31/50] batch [195/500] time 0.873 (0.885) data 0.000 (0.004) loss 1.3633 (1.0566) acc 65.6250 (73.5096) lr 7.5131e-04 eta 2:24:36 +epoch [31/50] batch [200/500] time 0.907 (0.885) data 0.000 (0.004) loss 1.2559 (1.0608) acc 65.6250 (73.4375) lr 7.5131e-04 eta 2:24:34 +epoch [31/50] batch [205/500] time 0.865 (0.885) data 0.000 (0.004) loss 1.3750 (1.0632) acc 71.8750 (73.3232) lr 7.5131e-04 eta 2:24:31 +epoch [31/50] batch [210/500] time 0.900 (0.885) data 0.000 (0.003) loss 2.3672 (1.0755) acc 50.0000 (73.1548) lr 7.5131e-04 eta 2:24:28 +epoch [31/50] batch [215/500] time 0.892 (0.886) data 0.000 (0.003) loss 0.8237 (1.0744) acc 71.8750 (73.1686) lr 7.5131e-04 eta 2:24:25 +epoch [31/50] batch [220/500] time 0.863 (0.886) data 0.000 (0.003) loss 1.4150 (1.0781) acc 78.1250 (73.1108) lr 7.5131e-04 eta 2:24:20 +epoch [31/50] batch [225/500] time 0.877 (0.885) data 0.000 (0.003) loss 1.0586 (1.0818) acc 75.0000 (73.0972) lr 7.5131e-04 eta 2:24:13 +epoch [31/50] batch [230/500] time 0.884 (0.886) data 0.000 (0.003) loss 1.2109 (1.0793) acc 56.2500 (73.1250) lr 7.5131e-04 eta 2:24:13 +epoch [31/50] batch [235/500] time 0.885 (0.886) data 0.000 (0.003) loss 1.1416 (1.0800) acc 71.8750 (73.1250) lr 7.5131e-04 eta 2:24:10 +epoch [31/50] batch [240/500] time 0.854 (0.886) data 0.000 (0.003) loss 1.2725 (1.0822) acc 62.5000 (73.0729) lr 7.5131e-04 eta 2:24:06 +epoch [31/50] batch [245/500] time 0.884 (0.886) data 0.000 (0.003) loss 1.0410 (1.0876) acc 81.2500 (72.9719) lr 7.5131e-04 eta 2:24:03 +epoch [31/50] batch [250/500] time 0.877 (0.886) data 0.000 (0.003) loss 1.0039 (1.0883) acc 71.8750 (72.9875) lr 7.5131e-04 eta 2:23:59 +epoch [31/50] batch [255/500] time 0.875 (0.886) data 0.000 (0.003) loss 0.6357 (1.0914) acc 75.0000 (72.9412) lr 7.5131e-04 eta 2:23:52 +epoch [31/50] batch [260/500] time 0.900 (0.886) data 0.000 (0.003) loss 0.8501 (1.0896) acc 75.0000 (73.0048) lr 7.5131e-04 eta 2:23:49 +epoch [31/50] batch [265/500] time 0.857 (0.886) data 0.000 (0.003) loss 0.4709 (1.0827) acc 84.3750 (73.1014) lr 7.5131e-04 eta 2:23:42 +epoch [31/50] batch [270/500] time 0.882 (0.886) data 0.000 (0.003) loss 1.5879 (1.0888) acc 65.6250 (73.0440) lr 7.5131e-04 eta 2:23:37 +epoch [31/50] batch [275/500] time 0.876 (0.886) data 0.000 (0.003) loss 2.0137 (1.0904) acc 56.2500 (73.0114) lr 7.5131e-04 eta 2:23:36 +epoch [31/50] batch [280/500] time 0.880 (0.886) data 0.000 (0.003) loss 0.8462 (1.0902) acc 71.8750 (72.9576) lr 7.5131e-04 eta 2:23:30 +epoch [31/50] batch [285/500] time 0.892 (0.886) data 0.000 (0.003) loss 1.1914 (1.0907) acc 71.8750 (72.9276) lr 7.5131e-04 eta 2:23:27 +epoch [31/50] batch [290/500] time 0.921 (0.886) data 0.000 (0.003) loss 1.0137 (1.0918) acc 75.0000 (72.9095) lr 7.5131e-04 eta 2:23:26 +epoch [31/50] batch [295/500] time 0.877 (0.886) data 0.000 (0.003) loss 0.8159 (1.0932) acc 81.2500 (72.9131) lr 7.5131e-04 eta 2:23:21 +epoch [31/50] batch [300/500] time 0.894 (0.886) data 0.000 (0.002) loss 1.0107 (1.0914) acc 75.0000 (72.9375) lr 7.5131e-04 eta 2:23:17 +epoch [31/50] batch [305/500] time 0.901 (0.886) data 0.000 (0.002) loss 0.9307 (1.0902) acc 68.7500 (72.9303) lr 7.5131e-04 eta 2:23:14 +epoch [31/50] batch [310/500] time 0.913 (0.887) data 0.000 (0.002) loss 1.5469 (1.0909) acc 68.7500 (72.8831) lr 7.5131e-04 eta 2:23:12 +epoch [31/50] batch [315/500] time 0.906 (0.887) data 0.000 (0.002) loss 0.8701 (1.0895) acc 75.0000 (72.8175) lr 7.5131e-04 eta 2:23:09 +epoch [31/50] batch [320/500] time 0.931 (0.887) data 0.000 (0.002) loss 1.0312 (1.0900) acc 84.3750 (72.8906) lr 7.5131e-04 eta 2:23:06 +epoch [31/50] batch [325/500] time 0.863 (0.887) data 0.000 (0.002) loss 1.3125 (1.0901) acc 68.7500 (72.8269) lr 7.5131e-04 eta 2:22:59 +epoch [31/50] batch [330/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.1953 (1.0905) acc 65.6250 (72.8504) lr 7.5131e-04 eta 2:22:56 +epoch [31/50] batch [335/500] time 0.877 (0.887) data 0.000 (0.002) loss 1.4082 (1.0913) acc 71.8750 (72.7985) lr 7.5131e-04 eta 2:22:50 +epoch [31/50] batch [340/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.1719 (1.0913) acc 62.5000 (72.8309) lr 7.5131e-04 eta 2:22:45 +epoch [31/50] batch [345/500] time 0.882 (0.887) data 0.000 (0.002) loss 1.1953 (1.0929) acc 71.8750 (72.8261) lr 7.5131e-04 eta 2:22:39 +epoch [31/50] batch [350/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.6338 (1.0917) acc 62.5000 (72.8393) lr 7.5131e-04 eta 2:22:36 +epoch [31/50] batch [355/500] time 0.921 (0.887) data 0.000 (0.002) loss 0.7656 (1.0901) acc 75.0000 (72.8785) lr 7.5131e-04 eta 2:22:32 +epoch [31/50] batch [360/500] time 0.871 (0.887) data 0.000 (0.002) loss 0.7988 (1.0907) acc 78.1250 (72.8906) lr 7.5131e-04 eta 2:22:26 +epoch [31/50] batch [365/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.0254 (1.0897) acc 65.6250 (72.8853) lr 7.5131e-04 eta 2:22:22 +epoch [31/50] batch [370/500] time 0.882 (0.886) data 0.000 (0.002) loss 0.8569 (1.0893) acc 78.1250 (72.8632) lr 7.5131e-04 eta 2:22:16 +epoch [31/50] batch [375/500] time 0.907 (0.887) data 0.000 (0.002) loss 1.3662 (1.0890) acc 65.6250 (72.8833) lr 7.5131e-04 eta 2:22:16 +epoch [31/50] batch [380/500] time 0.886 (0.887) data 0.000 (0.002) loss 1.2393 (1.0941) acc 62.5000 (72.7549) lr 7.5131e-04 eta 2:22:11 +epoch [31/50] batch [385/500] time 0.885 (0.887) data 0.000 (0.002) loss 0.7715 (1.0924) acc 75.0000 (72.7435) lr 7.5131e-04 eta 2:22:07 +epoch [31/50] batch [390/500] time 0.861 (0.887) data 0.000 (0.002) loss 2.0918 (1.0948) acc 50.0000 (72.7003) lr 7.5131e-04 eta 2:22:00 +epoch [31/50] batch [395/500] time 0.897 (0.887) data 0.000 (0.002) loss 1.2852 (1.0970) acc 65.6250 (72.6266) lr 7.5131e-04 eta 2:21:55 +epoch [31/50] batch [400/500] time 0.871 (0.886) data 0.000 (0.002) loss 1.0469 (1.0978) acc 71.8750 (72.6484) lr 7.5131e-04 eta 2:21:50 +epoch [31/50] batch [405/500] time 0.884 (0.886) data 0.000 (0.002) loss 1.3389 (1.0980) acc 68.7500 (72.6312) lr 7.5131e-04 eta 2:21:44 +epoch [31/50] batch [410/500] time 0.866 (0.886) data 0.000 (0.002) loss 0.7744 (1.0961) acc 71.8750 (72.6677) lr 7.5131e-04 eta 2:21:37 +epoch [31/50] batch [415/500] time 0.987 (0.886) data 0.000 (0.002) loss 1.0146 (1.0972) acc 62.5000 (72.5979) lr 7.5131e-04 eta 2:21:35 +epoch [31/50] batch [420/500] time 0.883 (0.886) data 0.000 (0.002) loss 1.4541 (1.1016) acc 68.7500 (72.5298) lr 7.5131e-04 eta 2:21:28 +epoch [31/50] batch [425/500] time 0.871 (0.886) data 0.000 (0.002) loss 1.4307 (1.1042) acc 78.1250 (72.4779) lr 7.5131e-04 eta 2:21:23 +epoch [31/50] batch [430/500] time 0.886 (0.886) data 0.000 (0.002) loss 0.9927 (1.1029) acc 78.1250 (72.5073) lr 7.5131e-04 eta 2:21:18 +epoch [31/50] batch [435/500] time 0.898 (0.886) data 0.000 (0.002) loss 1.7812 (1.1048) acc 56.2500 (72.4713) lr 7.5131e-04 eta 2:21:13 +epoch [31/50] batch [440/500] time 0.888 (0.886) data 0.000 (0.002) loss 0.9834 (1.1042) acc 84.3750 (72.5000) lr 7.5131e-04 eta 2:21:07 +epoch [31/50] batch [445/500] time 0.887 (0.886) data 0.000 (0.002) loss 0.8086 (1.1041) acc 75.0000 (72.4649) lr 7.5131e-04 eta 2:21:03 +epoch [31/50] batch [450/500] time 0.893 (0.886) data 0.000 (0.002) loss 0.6460 (1.1047) acc 78.1250 (72.4583) lr 7.5131e-04 eta 2:20:59 +epoch [31/50] batch [455/500] time 0.880 (0.886) data 0.000 (0.002) loss 0.9800 (1.1062) acc 68.7500 (72.3901) lr 7.5131e-04 eta 2:20:57 +epoch [31/50] batch [460/500] time 0.877 (0.886) data 0.000 (0.002) loss 0.8232 (1.1046) acc 81.2500 (72.4185) lr 7.5131e-04 eta 2:20:52 +epoch [31/50] batch [465/500] time 0.857 (0.886) data 0.000 (0.002) loss 1.4961 (1.1038) acc 71.8750 (72.4328) lr 7.5131e-04 eta 2:20:46 +epoch [31/50] batch [470/500] time 0.874 (0.886) data 0.000 (0.002) loss 1.4873 (1.1032) acc 62.5000 (72.4402) lr 7.5131e-04 eta 2:20:42 +epoch [31/50] batch [475/500] time 0.900 (0.886) data 0.000 (0.002) loss 1.5098 (1.1039) acc 71.8750 (72.4605) lr 7.5131e-04 eta 2:20:37 +epoch [31/50] batch [480/500] time 0.895 (0.886) data 0.000 (0.002) loss 0.6729 (1.1043) acc 81.2500 (72.4674) lr 7.5131e-04 eta 2:20:35 +epoch [31/50] batch [485/500] time 0.912 (0.886) data 0.000 (0.002) loss 0.7051 (1.1037) acc 78.1250 (72.5064) lr 7.5131e-04 eta 2:20:30 +epoch [31/50] batch [490/500] time 0.902 (0.886) data 0.000 (0.002) loss 1.2305 (1.1024) acc 71.8750 (72.5191) lr 7.5131e-04 eta 2:20:25 +epoch [31/50] batch [495/500] time 0.861 (0.886) data 0.000 (0.002) loss 1.3457 (1.1024) acc 71.8750 (72.4874) lr 7.5131e-04 eta 2:20:19 +epoch [31/50] batch [500/500] time 0.888 (0.886) data 0.000 (0.002) loss 1.1143 (1.1051) acc 65.6250 (72.4313) lr 6.9098e-04 eta 2:20:14 +epoch [32/50] batch [5/500] time 0.861 (1.019) data 0.000 (0.137) loss 0.7676 (0.9273) acc 75.0000 (75.0000) lr 6.9098e-04 eta 2:41:20 +epoch [32/50] batch [10/500] time 1.005 (0.970) data 0.000 (0.069) loss 0.7720 (0.9270) acc 81.2500 (74.0625) lr 6.9098e-04 eta 2:33:26 +epoch [32/50] batch [15/500] time 0.863 (0.941) data 0.000 (0.046) loss 1.1543 (1.0014) acc 75.0000 (73.3333) lr 6.9098e-04 eta 2:28:48 +epoch [32/50] batch [20/500] time 0.888 (0.927) data 0.000 (0.034) loss 0.9170 (1.0134) acc 78.1250 (74.0625) lr 6.9098e-04 eta 2:26:32 +epoch [32/50] batch [25/500] time 0.875 (0.921) data 0.000 (0.028) loss 1.1260 (1.0326) acc 68.7500 (72.8750) lr 6.9098e-04 eta 2:25:21 +epoch [32/50] batch [30/500] time 0.872 (0.914) data 0.000 (0.023) loss 0.9141 (1.0113) acc 87.5000 (73.8542) lr 6.9098e-04 eta 2:24:20 +epoch [32/50] batch [35/500] time 0.857 (0.908) data 0.000 (0.020) loss 0.5459 (1.0143) acc 78.1250 (73.9286) lr 6.9098e-04 eta 2:23:18 +epoch [32/50] batch [40/500] time 0.884 (0.904) data 0.000 (0.017) loss 0.5029 (1.0210) acc 87.5000 (74.3750) lr 6.9098e-04 eta 2:22:36 +epoch [32/50] batch [45/500] time 0.864 (0.900) data 0.000 (0.015) loss 1.1973 (1.0357) acc 78.1250 (74.7222) lr 6.9098e-04 eta 2:21:53 +epoch [32/50] batch [50/500] time 0.878 (0.899) data 0.000 (0.014) loss 1.0439 (1.0454) acc 75.0000 (74.3750) lr 6.9098e-04 eta 2:21:37 +epoch [32/50] batch [55/500] time 0.885 (0.896) data 0.000 (0.013) loss 1.5605 (1.0527) acc 59.3750 (74.1477) lr 6.9098e-04 eta 2:21:06 +epoch [32/50] batch [60/500] time 0.901 (0.897) data 0.000 (0.012) loss 0.8428 (1.0469) acc 68.7500 (74.1146) lr 6.9098e-04 eta 2:21:04 +epoch [32/50] batch [65/500] time 0.901 (0.895) data 0.000 (0.011) loss 1.3691 (1.0492) acc 78.1250 (74.2788) lr 6.9098e-04 eta 2:20:49 +epoch [32/50] batch [70/500] time 0.982 (0.896) data 0.000 (0.010) loss 1.5869 (1.0513) acc 59.3750 (74.0179) lr 6.9098e-04 eta 2:20:45 +epoch [32/50] batch [75/500] time 0.915 (0.895) data 0.000 (0.009) loss 1.3369 (1.0594) acc 68.7500 (73.9167) lr 6.9098e-04 eta 2:20:35 +epoch [32/50] batch [80/500] time 0.883 (0.894) data 0.000 (0.009) loss 0.9805 (1.0588) acc 75.0000 (74.0234) lr 6.9098e-04 eta 2:20:22 +epoch [32/50] batch [85/500] time 0.875 (0.893) data 0.000 (0.008) loss 1.2148 (1.0546) acc 68.7500 (73.9338) lr 6.9098e-04 eta 2:20:11 +epoch [32/50] batch [90/500] time 0.880 (0.893) data 0.000 (0.008) loss 1.1699 (1.0615) acc 62.5000 (73.4028) lr 6.9098e-04 eta 2:20:01 +epoch [32/50] batch [95/500] time 0.893 (0.893) data 0.000 (0.007) loss 0.8311 (1.0615) acc 71.8750 (73.2566) lr 6.9098e-04 eta 2:19:58 +epoch [32/50] batch [100/500] time 0.880 (0.893) data 0.000 (0.007) loss 0.4504 (1.0478) acc 87.5000 (73.5000) lr 6.9098e-04 eta 2:19:51 +epoch [32/50] batch [105/500] time 0.901 (0.893) data 0.000 (0.007) loss 1.3135 (1.0525) acc 75.0000 (73.4821) lr 6.9098e-04 eta 2:19:52 +epoch [32/50] batch [110/500] time 0.875 (0.893) data 0.000 (0.006) loss 0.6074 (1.0451) acc 78.1250 (73.5795) lr 6.9098e-04 eta 2:19:47 +epoch [32/50] batch [115/500] time 0.863 (0.894) data 0.000 (0.006) loss 1.1904 (1.0560) acc 65.6250 (73.2880) lr 6.9098e-04 eta 2:19:51 +epoch [32/50] batch [120/500] time 0.902 (0.894) data 0.000 (0.006) loss 0.9131 (1.0592) acc 71.8750 (73.1510) lr 6.9098e-04 eta 2:19:41 +epoch [32/50] batch [125/500] time 0.896 (0.894) data 0.000 (0.006) loss 0.8340 (1.0631) acc 78.1250 (72.9500) lr 6.9098e-04 eta 2:19:37 +epoch [32/50] batch [130/500] time 0.888 (0.893) data 0.000 (0.005) loss 0.7124 (1.0564) acc 75.0000 (73.0769) lr 6.9098e-04 eta 2:19:26 +epoch [32/50] batch [135/500] time 0.863 (0.892) data 0.000 (0.005) loss 0.5742 (1.0522) acc 81.2500 (73.1481) lr 6.9098e-04 eta 2:19:14 +epoch [32/50] batch [140/500] time 0.866 (0.892) data 0.000 (0.005) loss 0.9502 (1.0582) acc 71.8750 (73.0580) lr 6.9098e-04 eta 2:19:07 +epoch [32/50] batch [145/500] time 0.897 (0.892) data 0.000 (0.005) loss 1.1250 (1.0595) acc 68.7500 (73.0172) lr 6.9098e-04 eta 2:19:00 +epoch [32/50] batch [150/500] time 0.895 (0.891) data 0.000 (0.005) loss 1.6182 (1.0682) acc 65.6250 (72.8750) lr 6.9098e-04 eta 2:18:52 +epoch [32/50] batch [155/500] time 0.886 (0.891) data 0.000 (0.005) loss 0.8052 (1.0755) acc 68.7500 (72.8024) lr 6.9098e-04 eta 2:18:43 +epoch [32/50] batch [160/500] time 0.871 (0.890) data 0.000 (0.004) loss 0.7993 (1.0708) acc 78.1250 (72.9102) lr 6.9098e-04 eta 2:18:33 +epoch [32/50] batch [165/500] time 0.859 (0.890) data 0.000 (0.004) loss 1.1602 (1.0775) acc 71.8750 (72.7841) lr 6.9098e-04 eta 2:18:25 +epoch [32/50] batch [170/500] time 0.892 (0.889) data 0.000 (0.004) loss 1.5811 (1.0833) acc 65.6250 (72.6287) lr 6.9098e-04 eta 2:18:15 +epoch [32/50] batch [175/500] time 0.854 (0.888) data 0.000 (0.004) loss 1.4404 (1.0878) acc 68.7500 (72.5179) lr 6.9098e-04 eta 2:18:04 +epoch [32/50] batch [180/500] time 0.915 (0.888) data 0.000 (0.004) loss 0.6460 (1.0860) acc 75.0000 (72.4653) lr 6.9098e-04 eta 2:18:00 +epoch [32/50] batch [185/500] time 0.884 (0.888) data 0.000 (0.004) loss 1.4658 (1.0895) acc 65.6250 (72.4324) lr 6.9098e-04 eta 2:17:53 +epoch [32/50] batch [190/500] time 0.878 (0.888) data 0.000 (0.004) loss 0.6631 (1.0904) acc 78.1250 (72.3849) lr 6.9098e-04 eta 2:17:47 +epoch [32/50] batch [195/500] time 0.919 (0.888) data 0.000 (0.004) loss 1.7598 (1.0927) acc 62.5000 (72.4359) lr 6.9098e-04 eta 2:17:46 +epoch [32/50] batch [200/500] time 0.873 (0.888) data 0.000 (0.004) loss 1.3730 (1.0923) acc 62.5000 (72.5312) lr 6.9098e-04 eta 2:17:38 +epoch [32/50] batch [205/500] time 0.876 (0.888) data 0.000 (0.004) loss 1.3105 (1.0911) acc 65.6250 (72.5457) lr 6.9098e-04 eta 2:17:31 +epoch [32/50] batch [210/500] time 0.913 (0.888) data 0.000 (0.003) loss 1.0293 (1.0896) acc 75.0000 (72.6190) lr 6.9098e-04 eta 2:17:27 +epoch [32/50] batch [215/500] time 0.869 (0.888) data 0.000 (0.003) loss 1.0020 (1.0913) acc 75.0000 (72.4855) lr 6.9098e-04 eta 2:17:25 +epoch [32/50] batch [220/500] time 0.883 (0.888) data 0.000 (0.003) loss 1.0146 (1.0870) acc 65.6250 (72.6136) lr 6.9098e-04 eta 2:17:20 +epoch [32/50] batch [225/500] time 0.938 (0.888) data 0.000 (0.003) loss 1.0781 (1.0854) acc 78.1250 (72.7083) lr 6.9098e-04 eta 2:17:19 +epoch [32/50] batch [230/500] time 0.867 (0.888) data 0.000 (0.003) loss 1.0977 (1.0831) acc 75.0000 (72.7717) lr 6.9098e-04 eta 2:17:13 +epoch [32/50] batch [235/500] time 0.894 (0.888) data 0.000 (0.003) loss 1.1504 (1.0849) acc 75.0000 (72.7128) lr 6.9098e-04 eta 2:17:09 +epoch [32/50] batch [240/500] time 0.890 (0.888) data 0.000 (0.003) loss 0.7256 (1.0817) acc 78.1250 (72.7865) lr 6.9098e-04 eta 2:17:05 +epoch [32/50] batch [245/500] time 0.865 (0.888) data 0.000 (0.003) loss 0.6426 (1.0840) acc 78.1250 (72.7041) lr 6.9098e-04 eta 2:17:01 +epoch [32/50] batch [250/500] time 0.862 (0.888) data 0.000 (0.003) loss 1.5381 (1.0883) acc 68.7500 (72.6000) lr 6.9098e-04 eta 2:16:55 +epoch [32/50] batch [255/500] time 0.872 (0.888) data 0.000 (0.003) loss 0.6816 (1.0926) acc 78.1250 (72.6103) lr 6.9098e-04 eta 2:16:51 +epoch [32/50] batch [260/500] time 0.865 (0.889) data 0.000 (0.003) loss 1.5898 (1.0929) acc 71.8750 (72.6202) lr 6.9098e-04 eta 2:16:49 +epoch [32/50] batch [265/500] time 0.864 (0.888) data 0.000 (0.003) loss 0.8398 (1.0897) acc 78.1250 (72.6887) lr 6.9098e-04 eta 2:16:43 +epoch [32/50] batch [270/500] time 0.881 (0.888) data 0.000 (0.003) loss 0.9014 (1.0877) acc 81.2500 (72.6736) lr 6.9098e-04 eta 2:16:38 +epoch [32/50] batch [275/500] time 0.883 (0.888) data 0.000 (0.003) loss 0.9302 (1.0894) acc 75.0000 (72.6477) lr 6.9098e-04 eta 2:16:31 +epoch [32/50] batch [280/500] time 0.906 (0.888) data 0.000 (0.003) loss 1.2188 (1.0867) acc 65.6250 (72.7009) lr 6.9098e-04 eta 2:16:25 +epoch [32/50] batch [285/500] time 0.886 (0.888) data 0.000 (0.003) loss 0.6719 (1.0822) acc 78.1250 (72.7741) lr 6.9098e-04 eta 2:16:21 +epoch [32/50] batch [290/500] time 0.895 (0.888) data 0.000 (0.003) loss 0.8105 (1.0808) acc 78.1250 (72.8341) lr 6.9098e-04 eta 2:16:15 +epoch [32/50] batch [295/500] time 0.869 (0.888) data 0.000 (0.003) loss 1.2178 (1.0788) acc 68.7500 (72.8178) lr 6.9098e-04 eta 2:16:11 +epoch [32/50] batch [300/500] time 0.860 (0.888) data 0.000 (0.003) loss 0.9121 (1.0760) acc 71.8750 (72.8958) lr 6.9098e-04 eta 2:16:05 +epoch [32/50] batch [305/500] time 0.859 (0.887) data 0.000 (0.002) loss 1.6084 (1.0781) acc 59.3750 (72.8074) lr 6.9098e-04 eta 2:15:58 +epoch [32/50] batch [310/500] time 0.899 (0.887) data 0.000 (0.002) loss 0.6709 (1.0769) acc 84.3750 (72.8024) lr 6.9098e-04 eta 2:15:54 +epoch [32/50] batch [315/500] time 0.894 (0.887) data 0.000 (0.002) loss 1.1533 (1.0762) acc 65.6250 (72.8373) lr 6.9098e-04 eta 2:15:49 +epoch [32/50] batch [320/500] time 0.893 (0.887) data 0.000 (0.002) loss 0.4014 (1.0759) acc 87.5000 (72.8906) lr 6.9098e-04 eta 2:15:44 +epoch [32/50] batch [325/500] time 0.860 (0.887) data 0.000 (0.002) loss 0.8979 (1.0745) acc 81.2500 (72.9327) lr 6.9098e-04 eta 2:15:39 +epoch [32/50] batch [330/500] time 0.879 (0.887) data 0.000 (0.002) loss 1.6191 (1.0758) acc 56.2500 (72.8504) lr 6.9098e-04 eta 2:15:34 +epoch [32/50] batch [335/500] time 0.870 (0.887) data 0.000 (0.002) loss 1.5088 (1.0776) acc 62.5000 (72.8172) lr 6.9098e-04 eta 2:15:29 +epoch [32/50] batch [340/500] time 0.888 (0.887) data 0.000 (0.002) loss 1.0156 (1.0796) acc 81.2500 (72.7849) lr 6.9098e-04 eta 2:15:25 +epoch [32/50] batch [345/500] time 0.886 (0.887) data 0.000 (0.002) loss 1.0195 (1.0781) acc 71.8750 (72.7808) lr 6.9098e-04 eta 2:15:22 +epoch [32/50] batch [350/500] time 0.883 (0.887) data 0.000 (0.002) loss 0.6094 (1.0765) acc 81.2500 (72.8214) lr 6.9098e-04 eta 2:15:19 +epoch [32/50] batch [355/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.2744 (1.0764) acc 71.8750 (72.8345) lr 6.9098e-04 eta 2:15:14 +epoch [32/50] batch [360/500] time 0.925 (0.888) data 0.000 (0.002) loss 0.6812 (1.0744) acc 81.2500 (72.8212) lr 6.9098e-04 eta 2:15:13 +epoch [32/50] batch [365/500] time 0.890 (0.888) data 0.000 (0.002) loss 0.8535 (1.0741) acc 78.1250 (72.8339) lr 6.9098e-04 eta 2:15:09 +epoch [32/50] batch [370/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.1514 (1.0779) acc 75.0000 (72.7618) lr 6.9098e-04 eta 2:15:06 +epoch [32/50] batch [375/500] time 0.875 (0.888) data 0.000 (0.002) loss 1.2373 (1.0788) acc 75.0000 (72.8167) lr 6.9098e-04 eta 2:15:00 +epoch [32/50] batch [380/500] time 0.895 (0.888) data 0.000 (0.002) loss 0.8740 (1.0793) acc 75.0000 (72.7796) lr 6.9098e-04 eta 2:14:54 +epoch [32/50] batch [385/500] time 0.877 (0.888) data 0.000 (0.002) loss 0.7788 (1.0763) acc 81.2500 (72.8571) lr 6.9098e-04 eta 2:14:50 +epoch [32/50] batch [390/500] time 0.865 (0.888) data 0.000 (0.002) loss 0.5054 (1.0744) acc 87.5000 (72.9247) lr 6.9098e-04 eta 2:14:46 +epoch [32/50] batch [395/500] time 0.885 (0.888) data 0.000 (0.002) loss 0.8110 (1.0715) acc 81.2500 (72.9826) lr 6.9098e-04 eta 2:14:42 +epoch [32/50] batch [400/500] time 1.015 (0.888) data 0.000 (0.002) loss 1.2012 (1.0726) acc 68.7500 (72.9531) lr 6.9098e-04 eta 2:14:40 +epoch [32/50] batch [405/500] time 0.899 (0.888) data 0.000 (0.002) loss 1.4375 (1.0733) acc 71.8750 (72.9861) lr 6.9098e-04 eta 2:14:36 +epoch [32/50] batch [410/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.0723 (1.0755) acc 65.6250 (72.9573) lr 6.9098e-04 eta 2:14:31 +epoch [32/50] batch [415/500] time 0.885 (0.888) data 0.000 (0.002) loss 0.5171 (1.0746) acc 81.2500 (73.0045) lr 6.9098e-04 eta 2:14:26 +epoch [32/50] batch [420/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.7402 (1.0748) acc 65.6250 (73.0283) lr 6.9098e-04 eta 2:14:20 +epoch [32/50] batch [425/500] time 0.901 (0.888) data 0.000 (0.002) loss 0.9780 (1.0753) acc 75.0000 (73.0074) lr 6.9098e-04 eta 2:14:15 +epoch [32/50] batch [430/500] time 0.854 (0.887) data 0.000 (0.002) loss 1.1953 (1.0788) acc 71.8750 (72.9578) lr 6.9098e-04 eta 2:14:09 +epoch [32/50] batch [435/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.3838 (1.0838) acc 71.8750 (72.8807) lr 6.9098e-04 eta 2:14:04 +epoch [32/50] batch [440/500] time 0.904 (0.887) data 0.000 (0.002) loss 0.9653 (1.0829) acc 68.7500 (72.9048) lr 6.9098e-04 eta 2:13:59 +epoch [32/50] batch [445/500] time 0.898 (0.887) data 0.000 (0.002) loss 1.2236 (1.0810) acc 62.5000 (72.9565) lr 6.9098e-04 eta 2:13:54 +epoch [32/50] batch [450/500] time 0.867 (0.887) data 0.000 (0.002) loss 1.0273 (1.0807) acc 81.2500 (72.9583) lr 6.9098e-04 eta 2:13:50 +epoch [32/50] batch [455/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.1289 (1.0808) acc 65.6250 (72.9327) lr 6.9098e-04 eta 2:13:45 +epoch [32/50] batch [460/500] time 0.885 (0.887) data 0.000 (0.002) loss 0.7295 (1.0784) acc 81.2500 (72.9620) lr 6.9098e-04 eta 2:13:40 +epoch [32/50] batch [465/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.6309 (1.0804) acc 65.6250 (72.9301) lr 6.9098e-04 eta 2:13:35 +epoch [32/50] batch [470/500] time 0.887 (0.887) data 0.000 (0.002) loss 1.1113 (1.0789) acc 71.8750 (72.9521) lr 6.9098e-04 eta 2:13:30 +epoch [32/50] batch [475/500] time 0.867 (0.887) data 0.000 (0.002) loss 1.2832 (1.0775) acc 59.3750 (72.9539) lr 6.9098e-04 eta 2:13:25 +epoch [32/50] batch [480/500] time 0.912 (0.887) data 0.000 (0.002) loss 1.1230 (1.0755) acc 75.0000 (73.0078) lr 6.9098e-04 eta 2:13:20 +epoch [32/50] batch [485/500] time 0.868 (0.887) data 0.000 (0.002) loss 2.1074 (1.0766) acc 53.1250 (72.9639) lr 6.9098e-04 eta 2:13:15 +epoch [32/50] batch [490/500] time 0.902 (0.887) data 0.000 (0.002) loss 1.9248 (1.0774) acc 65.6250 (72.9337) lr 6.9098e-04 eta 2:13:11 +epoch [32/50] batch [495/500] time 0.865 (0.887) data 0.000 (0.002) loss 0.6597 (1.0783) acc 81.2500 (72.8977) lr 6.9098e-04 eta 2:13:06 +epoch [32/50] batch [500/500] time 0.907 (0.887) data 0.000 (0.002) loss 0.7007 (1.0773) acc 84.3750 (72.9250) lr 6.3188e-04 eta 2:13:03 +epoch [33/50] batch [5/500] time 0.904 (1.021) data 0.000 (0.134) loss 0.8560 (0.9834) acc 81.2500 (75.0000) lr 6.3188e-04 eta 2:33:02 +epoch [33/50] batch [10/500] time 0.899 (0.957) data 0.000 (0.067) loss 0.4583 (0.9959) acc 84.3750 (74.6875) lr 6.3188e-04 eta 2:23:26 +epoch [33/50] batch [15/500] time 0.887 (0.935) data 0.000 (0.045) loss 1.2881 (1.0833) acc 75.0000 (73.7500) lr 6.3188e-04 eta 2:19:58 +epoch [33/50] batch [20/500] time 0.859 (0.921) data 0.000 (0.034) loss 0.5088 (1.0266) acc 93.7500 (75.0000) lr 6.3188e-04 eta 2:17:54 +epoch [33/50] batch [25/500] time 0.902 (0.914) data 0.000 (0.027) loss 0.5796 (0.9731) acc 78.1250 (75.6250) lr 6.3188e-04 eta 2:16:46 +epoch [33/50] batch [30/500] time 0.885 (0.909) data 0.000 (0.023) loss 1.0645 (1.0412) acc 81.2500 (74.4792) lr 6.3188e-04 eta 2:15:58 +epoch [33/50] batch [35/500] time 0.870 (0.906) data 0.000 (0.019) loss 1.3018 (1.0506) acc 71.8750 (73.9286) lr 6.3188e-04 eta 2:15:18 +epoch [33/50] batch [40/500] time 0.862 (0.903) data 0.000 (0.017) loss 0.7500 (1.0455) acc 75.0000 (74.0625) lr 6.3188e-04 eta 2:14:47 +epoch [33/50] batch [45/500] time 0.889 (0.900) data 0.000 (0.015) loss 1.2344 (1.0571) acc 68.7500 (73.7500) lr 6.3188e-04 eta 2:14:22 +epoch [33/50] batch [50/500] time 0.872 (0.899) data 0.000 (0.014) loss 0.9399 (1.0583) acc 71.8750 (73.6875) lr 6.3188e-04 eta 2:14:08 +epoch [33/50] batch [55/500] time 0.888 (0.897) data 0.000 (0.012) loss 0.7881 (1.0590) acc 78.1250 (73.5795) lr 6.3188e-04 eta 2:13:43 +epoch [33/50] batch [60/500] time 0.896 (0.898) data 0.000 (0.011) loss 1.0449 (1.0657) acc 71.8750 (73.1250) lr 6.3188e-04 eta 2:13:48 +epoch [33/50] batch [65/500] time 0.885 (0.897) data 0.000 (0.011) loss 1.9121 (1.0767) acc 53.1250 (72.8846) lr 6.3188e-04 eta 2:13:35 +epoch [33/50] batch [70/500] time 0.848 (0.896) data 0.000 (0.010) loss 1.0566 (1.0638) acc 78.1250 (73.2143) lr 6.3188e-04 eta 2:13:18 +epoch [33/50] batch [75/500] time 0.861 (0.894) data 0.000 (0.009) loss 1.3145 (1.0625) acc 71.8750 (73.2917) lr 6.3188e-04 eta 2:13:03 +epoch [33/50] batch [80/500] time 0.877 (0.894) data 0.000 (0.009) loss 1.5195 (1.0774) acc 71.8750 (73.1641) lr 6.3188e-04 eta 2:12:53 +epoch [33/50] batch [85/500] time 0.866 (0.893) data 0.000 (0.008) loss 1.2432 (1.0802) acc 71.8750 (72.9412) lr 6.3188e-04 eta 2:12:38 +epoch [33/50] batch [90/500] time 0.906 (0.892) data 0.000 (0.008) loss 0.8389 (1.0790) acc 81.2500 (73.1250) lr 6.3188e-04 eta 2:12:29 +epoch [33/50] batch [95/500] time 0.881 (0.892) data 0.000 (0.007) loss 0.5122 (1.0652) acc 84.3750 (73.1579) lr 6.3188e-04 eta 2:12:25 +epoch [33/50] batch [100/500] time 0.885 (0.891) data 0.000 (0.007) loss 1.3379 (1.0695) acc 68.7500 (73.0938) lr 6.3188e-04 eta 2:12:11 +epoch [33/50] batch [105/500] time 0.904 (0.892) data 0.000 (0.007) loss 0.9395 (1.0745) acc 71.8750 (73.1250) lr 6.3188e-04 eta 2:12:10 +epoch [33/50] batch [110/500] time 0.882 (0.891) data 0.000 (0.006) loss 1.5576 (1.0873) acc 71.8750 (73.0398) lr 6.3188e-04 eta 2:12:03 +epoch [33/50] batch [115/500] time 0.901 (0.891) data 0.000 (0.006) loss 0.5952 (1.0771) acc 78.1250 (73.2880) lr 6.3188e-04 eta 2:11:53 +epoch [33/50] batch [120/500] time 0.888 (0.890) data 0.000 (0.006) loss 0.5713 (1.0636) acc 81.2500 (73.4896) lr 6.3188e-04 eta 2:11:44 +epoch [33/50] batch [125/500] time 0.884 (0.890) data 0.000 (0.006) loss 0.9248 (1.0673) acc 75.0000 (73.4250) lr 6.3188e-04 eta 2:11:41 +epoch [33/50] batch [130/500] time 0.890 (0.891) data 0.000 (0.005) loss 1.1104 (1.0677) acc 62.5000 (73.3654) lr 6.3188e-04 eta 2:11:40 +epoch [33/50] batch [135/500] time 0.935 (0.891) data 0.000 (0.005) loss 0.8291 (1.0717) acc 81.2500 (73.3333) lr 6.3188e-04 eta 2:11:38 +epoch [33/50] batch [140/500] time 0.896 (0.891) data 0.000 (0.005) loss 1.5234 (1.0702) acc 71.8750 (73.4375) lr 6.3188e-04 eta 2:11:33 +epoch [33/50] batch [145/500] time 0.877 (0.891) data 0.000 (0.005) loss 1.1084 (1.0723) acc 71.8750 (73.3621) lr 6.3188e-04 eta 2:11:26 +epoch [33/50] batch [150/500] time 0.896 (0.890) data 0.000 (0.005) loss 0.7388 (1.0636) acc 75.0000 (73.4375) lr 6.3188e-04 eta 2:11:19 +epoch [33/50] batch [155/500] time 0.917 (0.890) data 0.000 (0.005) loss 1.0635 (1.0724) acc 78.1250 (73.2460) lr 6.3188e-04 eta 2:11:15 +epoch [33/50] batch [160/500] time 0.884 (0.890) data 0.000 (0.004) loss 1.1621 (1.0644) acc 78.1250 (73.5352) lr 6.3188e-04 eta 2:11:05 +epoch [33/50] batch [165/500] time 0.888 (0.890) data 0.000 (0.004) loss 1.1631 (1.0622) acc 65.6250 (73.4848) lr 6.3188e-04 eta 2:11:02 +epoch [33/50] batch [170/500] time 0.865 (0.890) data 0.000 (0.004) loss 0.7207 (1.0658) acc 78.1250 (73.4007) lr 6.3188e-04 eta 2:10:55 +epoch [33/50] batch [175/500] time 0.905 (0.889) data 0.000 (0.004) loss 1.6152 (1.0692) acc 71.8750 (73.3393) lr 6.3188e-04 eta 2:10:49 +epoch [33/50] batch [180/500] time 0.905 (0.889) data 0.000 (0.004) loss 1.7109 (1.0715) acc 68.7500 (73.4549) lr 6.3188e-04 eta 2:10:44 +epoch [33/50] batch [185/500] time 0.883 (0.889) data 0.000 (0.004) loss 1.0488 (1.0727) acc 71.8750 (73.3953) lr 6.3188e-04 eta 2:10:37 +epoch [33/50] batch [190/500] time 0.898 (0.889) data 0.000 (0.004) loss 0.5322 (1.0667) acc 78.1250 (73.4704) lr 6.3188e-04 eta 2:10:31 +epoch [33/50] batch [195/500] time 0.916 (0.889) data 0.000 (0.004) loss 1.2139 (1.0658) acc 71.8750 (73.4615) lr 6.3188e-04 eta 2:10:27 +epoch [33/50] batch [200/500] time 0.891 (0.889) data 0.000 (0.004) loss 1.4893 (1.0661) acc 68.7500 (73.4688) lr 6.3188e-04 eta 2:10:25 +epoch [33/50] batch [205/500] time 0.916 (0.890) data 0.000 (0.003) loss 0.9819 (1.0604) acc 62.5000 (73.4146) lr 6.3188e-04 eta 2:10:27 +epoch [33/50] batch [210/500] time 0.888 (0.890) data 0.000 (0.003) loss 1.5264 (1.0680) acc 59.3750 (73.2738) lr 6.3188e-04 eta 2:10:22 +epoch [33/50] batch [215/500] time 0.896 (0.890) data 0.000 (0.003) loss 0.9429 (1.0700) acc 78.1250 (73.2994) lr 6.3188e-04 eta 2:10:16 +epoch [33/50] batch [220/500] time 0.883 (0.889) data 0.000 (0.003) loss 0.9717 (1.0631) acc 71.8750 (73.4517) lr 6.3188e-04 eta 2:10:08 +epoch [33/50] batch [225/500] time 0.890 (0.889) data 0.000 (0.003) loss 1.0000 (1.0613) acc 65.6250 (73.4861) lr 6.3188e-04 eta 2:10:01 +epoch [33/50] batch [230/500] time 0.874 (0.889) data 0.000 (0.003) loss 1.2197 (1.0616) acc 71.8750 (73.5326) lr 6.3188e-04 eta 2:09:55 +epoch [33/50] batch [235/500] time 0.874 (0.889) data 0.000 (0.003) loss 1.3496 (1.0611) acc 71.8750 (73.5239) lr 6.3188e-04 eta 2:09:49 +epoch [33/50] batch [240/500] time 0.879 (0.889) data 0.000 (0.003) loss 1.1543 (1.0601) acc 71.8750 (73.5156) lr 6.3188e-04 eta 2:09:43 +epoch [33/50] batch [245/500] time 0.905 (0.889) data 0.000 (0.003) loss 1.3232 (1.0616) acc 53.1250 (73.3801) lr 6.3188e-04 eta 2:09:38 +epoch [33/50] batch [250/500] time 0.912 (0.889) data 0.000 (0.003) loss 1.5068 (1.0617) acc 65.6250 (73.3625) lr 6.3188e-04 eta 2:09:36 +epoch [33/50] batch [255/500] time 0.887 (0.889) data 0.000 (0.003) loss 1.0801 (1.0669) acc 78.1250 (73.2843) lr 6.3188e-04 eta 2:09:31 +epoch [33/50] batch [260/500] time 0.859 (0.888) data 0.000 (0.003) loss 1.2266 (1.0706) acc 68.7500 (73.2692) lr 6.3188e-04 eta 2:09:24 +epoch [33/50] batch [265/500] time 0.854 (0.888) data 0.000 (0.003) loss 1.0771 (1.0703) acc 65.6250 (73.2783) lr 6.3188e-04 eta 2:09:15 +epoch [33/50] batch [270/500] time 0.885 (0.888) data 0.000 (0.003) loss 1.1152 (1.0768) acc 65.6250 (73.1250) lr 6.3188e-04 eta 2:09:08 +epoch [33/50] batch [275/500] time 0.881 (0.887) data 0.000 (0.003) loss 0.8511 (1.0800) acc 75.0000 (73.0568) lr 6.3188e-04 eta 2:09:01 +epoch [33/50] batch [280/500] time 0.877 (0.887) data 0.000 (0.003) loss 1.0615 (1.0789) acc 65.6250 (73.0915) lr 6.3188e-04 eta 2:08:55 +epoch [33/50] batch [285/500] time 0.858 (0.887) data 0.000 (0.003) loss 1.1270 (1.0779) acc 71.8750 (73.1140) lr 6.3188e-04 eta 2:08:51 +epoch [33/50] batch [290/500] time 0.871 (0.887) data 0.000 (0.003) loss 0.9463 (1.0836) acc 75.0000 (72.9849) lr 6.3188e-04 eta 2:08:44 +epoch [33/50] batch [295/500] time 0.887 (0.887) data 0.000 (0.002) loss 2.4395 (1.0865) acc 46.8750 (72.9025) lr 6.3188e-04 eta 2:08:39 +epoch [33/50] batch [300/500] time 0.888 (0.887) data 0.000 (0.002) loss 0.8408 (1.0869) acc 84.3750 (72.9375) lr 6.3188e-04 eta 2:08:32 +epoch [33/50] batch [305/500] time 0.880 (0.887) data 0.000 (0.002) loss 1.0664 (1.0861) acc 78.1250 (72.9098) lr 6.3188e-04 eta 2:08:28 +epoch [33/50] batch [310/500] time 0.894 (0.887) data 0.000 (0.002) loss 1.2363 (1.0906) acc 68.7500 (72.8327) lr 6.3188e-04 eta 2:08:23 +epoch [33/50] batch [315/500] time 0.859 (0.886) data 0.000 (0.002) loss 1.1172 (1.0916) acc 78.1250 (72.8671) lr 6.3188e-04 eta 2:08:16 +epoch [33/50] batch [320/500] time 0.863 (0.886) data 0.000 (0.002) loss 0.7588 (1.0900) acc 75.0000 (72.9492) lr 6.3188e-04 eta 2:08:12 +epoch [33/50] batch [325/500] time 0.883 (0.886) data 0.000 (0.002) loss 0.7993 (1.0898) acc 75.0000 (72.8750) lr 6.3188e-04 eta 2:08:06 +epoch [33/50] batch [330/500] time 0.882 (0.886) data 0.000 (0.002) loss 0.7964 (1.0883) acc 75.0000 (72.8598) lr 6.3188e-04 eta 2:08:00 +epoch [33/50] batch [335/500] time 0.860 (0.886) data 0.000 (0.002) loss 0.5576 (1.0878) acc 84.3750 (72.9011) lr 6.3188e-04 eta 2:07:54 +epoch [33/50] batch [340/500] time 0.866 (0.885) data 0.000 (0.002) loss 1.0312 (1.0904) acc 81.2500 (72.8401) lr 6.3188e-04 eta 2:07:46 +epoch [33/50] batch [345/500] time 0.962 (0.886) data 0.000 (0.002) loss 1.1230 (1.0906) acc 65.6250 (72.7627) lr 6.3188e-04 eta 2:07:45 +epoch [33/50] batch [350/500] time 0.859 (0.886) data 0.000 (0.002) loss 1.7100 (1.0908) acc 62.5000 (72.8214) lr 6.3188e-04 eta 2:07:40 +epoch [33/50] batch [355/500] time 0.895 (0.885) data 0.000 (0.002) loss 0.3965 (1.0906) acc 84.3750 (72.7729) lr 6.3188e-04 eta 2:07:35 +epoch [33/50] batch [360/500] time 0.863 (0.885) data 0.000 (0.002) loss 1.0430 (1.0905) acc 75.0000 (72.7951) lr 6.3188e-04 eta 2:07:29 +epoch [33/50] batch [365/500] time 0.900 (0.885) data 0.000 (0.002) loss 1.3662 (1.0916) acc 65.6250 (72.7825) lr 6.3188e-04 eta 2:07:25 +epoch [33/50] batch [370/500] time 0.866 (0.885) data 0.000 (0.002) loss 1.3184 (1.0893) acc 53.1250 (72.7787) lr 6.3188e-04 eta 2:07:20 +epoch [33/50] batch [375/500] time 0.890 (0.885) data 0.000 (0.002) loss 0.9512 (1.0890) acc 81.2500 (72.7750) lr 6.3188e-04 eta 2:07:16 +epoch [33/50] batch [380/500] time 0.914 (0.885) data 0.000 (0.002) loss 1.5977 (1.0901) acc 68.7500 (72.7467) lr 6.3188e-04 eta 2:07:12 +epoch [33/50] batch [385/500] time 0.920 (0.886) data 0.000 (0.002) loss 0.8276 (1.0900) acc 84.3750 (72.7922) lr 6.3188e-04 eta 2:07:10 +epoch [33/50] batch [390/500] time 0.844 (0.886) data 0.000 (0.002) loss 1.3535 (1.0879) acc 71.8750 (72.8526) lr 6.3188e-04 eta 2:07:07 +epoch [33/50] batch [395/500] time 0.862 (0.886) data 0.000 (0.002) loss 0.6714 (1.0850) acc 78.1250 (72.8877) lr 6.3188e-04 eta 2:07:01 +epoch [33/50] batch [400/500] time 0.893 (0.886) data 0.000 (0.002) loss 1.5059 (1.0849) acc 65.6250 (72.8750) lr 6.3188e-04 eta 2:06:57 +epoch [33/50] batch [405/500] time 0.920 (0.886) data 0.000 (0.002) loss 2.2637 (1.0867) acc 65.6250 (72.8704) lr 6.3188e-04 eta 2:06:53 +epoch [33/50] batch [410/500] time 0.850 (0.886) data 0.000 (0.002) loss 1.2354 (1.0867) acc 75.0000 (72.9192) lr 6.3188e-04 eta 2:06:49 +epoch [33/50] batch [415/500] time 0.875 (0.886) data 0.000 (0.002) loss 0.6748 (1.0853) acc 81.2500 (72.8991) lr 6.3188e-04 eta 2:06:44 +epoch [33/50] batch [420/500] time 0.893 (0.886) data 0.000 (0.002) loss 1.0508 (1.0880) acc 65.6250 (72.8051) lr 6.3188e-04 eta 2:06:40 +epoch [33/50] batch [425/500] time 0.879 (0.886) data 0.000 (0.002) loss 2.0703 (1.0913) acc 59.3750 (72.6765) lr 6.3188e-04 eta 2:06:35 +epoch [33/50] batch [430/500] time 0.880 (0.886) data 0.000 (0.002) loss 0.9155 (1.0926) acc 68.7500 (72.6308) lr 6.3188e-04 eta 2:06:32 +epoch [33/50] batch [435/500] time 0.891 (0.886) data 0.000 (0.002) loss 0.9624 (1.0925) acc 81.2500 (72.6437) lr 6.3188e-04 eta 2:06:27 +epoch [33/50] batch [440/500] time 0.913 (0.886) data 0.000 (0.002) loss 0.8340 (1.0925) acc 71.8750 (72.6207) lr 6.3188e-04 eta 2:06:24 +epoch [33/50] batch [445/500] time 0.895 (0.886) data 0.000 (0.002) loss 0.9473 (1.0937) acc 78.1250 (72.6124) lr 6.3188e-04 eta 2:06:21 +epoch [33/50] batch [450/500] time 0.952 (0.886) data 0.000 (0.002) loss 1.3301 (1.0959) acc 71.8750 (72.6111) lr 6.3188e-04 eta 2:06:18 +epoch [33/50] batch [455/500] time 0.853 (0.886) data 0.000 (0.002) loss 1.1133 (1.0942) acc 71.8750 (72.6511) lr 6.3188e-04 eta 2:06:11 +epoch [33/50] batch [460/500] time 0.909 (0.886) data 0.000 (0.002) loss 0.9893 (1.0933) acc 65.6250 (72.6223) lr 6.3188e-04 eta 2:06:07 +epoch [33/50] batch [465/500] time 0.870 (0.886) data 0.000 (0.002) loss 1.0410 (1.0924) acc 78.1250 (72.6277) lr 6.3188e-04 eta 2:06:01 +epoch [33/50] batch [470/500] time 0.885 (0.886) data 0.000 (0.002) loss 1.1553 (1.0933) acc 71.8750 (72.6263) lr 6.3188e-04 eta 2:05:57 +epoch [33/50] batch [475/500] time 0.852 (0.886) data 0.000 (0.002) loss 0.9761 (1.0936) acc 68.7500 (72.6118) lr 6.3188e-04 eta 2:05:52 +epoch [33/50] batch [480/500] time 0.910 (0.886) data 0.000 (0.002) loss 0.9233 (1.0924) acc 78.1250 (72.6107) lr 6.3188e-04 eta 2:05:47 +epoch [33/50] batch [485/500] time 0.899 (0.886) data 0.000 (0.002) loss 0.9863 (1.0906) acc 68.7500 (72.6482) lr 6.3188e-04 eta 2:05:43 +epoch [33/50] batch [490/500] time 0.886 (0.886) data 0.000 (0.002) loss 1.4600 (1.0924) acc 62.5000 (72.6020) lr 6.3188e-04 eta 2:05:40 +epoch [33/50] batch [495/500] time 0.864 (0.886) data 0.000 (0.002) loss 2.0059 (1.0951) acc 43.7500 (72.5126) lr 6.3188e-04 eta 2:05:35 +epoch [33/50] batch [500/500] time 0.863 (0.886) data 0.000 (0.002) loss 1.2129 (1.0941) acc 75.0000 (72.5312) lr 5.7422e-04 eta 2:05:31 +epoch [34/50] batch [5/500] time 0.893 (1.048) data 0.000 (0.139) loss 0.8296 (0.9346) acc 75.0000 (71.8750) lr 5.7422e-04 eta 2:28:20 +epoch [34/50] batch [10/500] time 0.889 (0.965) data 0.000 (0.070) loss 1.3643 (1.1685) acc 68.7500 (69.6875) lr 5.7422e-04 eta 2:16:30 +epoch [34/50] batch [15/500] time 0.880 (0.932) data 0.000 (0.046) loss 1.0850 (1.1464) acc 68.7500 (71.0417) lr 5.7422e-04 eta 2:11:48 +epoch [34/50] batch [20/500] time 0.879 (0.919) data 0.000 (0.035) loss 0.7778 (1.0803) acc 75.0000 (72.3438) lr 5.7422e-04 eta 2:09:50 +epoch [34/50] batch [25/500] time 0.869 (0.911) data 0.000 (0.028) loss 1.2812 (1.0513) acc 62.5000 (73.1250) lr 5.7422e-04 eta 2:08:38 +epoch [34/50] batch [30/500] time 0.883 (0.908) data 0.000 (0.023) loss 0.8843 (1.0244) acc 75.0000 (74.2708) lr 5.7422e-04 eta 2:08:08 +epoch [34/50] batch [35/500] time 0.855 (0.903) data 0.000 (0.020) loss 0.3889 (1.0050) acc 90.6250 (74.8214) lr 5.7422e-04 eta 2:07:22 +epoch [34/50] batch [40/500] time 0.890 (0.902) data 0.000 (0.018) loss 1.5850 (1.0415) acc 56.2500 (73.6719) lr 5.7422e-04 eta 2:07:07 +epoch [34/50] batch [45/500] time 0.892 (0.905) data 0.000 (0.016) loss 1.2246 (1.0476) acc 71.8750 (73.8194) lr 5.7422e-04 eta 2:07:27 +epoch [34/50] batch [50/500] time 0.858 (0.900) data 0.000 (0.014) loss 1.5586 (1.0804) acc 68.7500 (72.9375) lr 5.7422e-04 eta 2:06:48 +epoch [34/50] batch [55/500] time 0.898 (0.898) data 0.000 (0.013) loss 1.1768 (1.0763) acc 65.6250 (73.1250) lr 5.7422e-04 eta 2:06:24 +epoch [34/50] batch [60/500] time 0.850 (0.897) data 0.000 (0.012) loss 1.7236 (1.0950) acc 68.7500 (72.7083) lr 5.7422e-04 eta 2:06:11 +epoch [34/50] batch [65/500] time 0.877 (0.897) data 0.000 (0.011) loss 1.1299 (1.1022) acc 65.6250 (72.5481) lr 5.7422e-04 eta 2:06:02 +epoch [34/50] batch [70/500] time 0.887 (0.896) data 0.000 (0.010) loss 0.8794 (1.0991) acc 75.0000 (72.7232) lr 5.7422e-04 eta 2:05:49 +epoch [34/50] batch [75/500] time 0.888 (0.895) data 0.000 (0.009) loss 0.9058 (1.0971) acc 75.0000 (72.6667) lr 5.7422e-04 eta 2:05:38 +epoch [34/50] batch [80/500] time 0.866 (0.895) data 0.001 (0.009) loss 0.9224 (1.0967) acc 81.2500 (73.0859) lr 5.7422e-04 eta 2:05:33 +epoch [34/50] batch [85/500] time 0.891 (0.894) data 0.000 (0.008) loss 1.1270 (1.1007) acc 75.0000 (72.9412) lr 5.7422e-04 eta 2:05:24 +epoch [34/50] batch [90/500] time 0.860 (0.894) data 0.000 (0.008) loss 1.0117 (1.0880) acc 71.8750 (73.0903) lr 5.7422e-04 eta 2:05:18 +epoch [34/50] batch [95/500] time 0.878 (0.893) data 0.000 (0.008) loss 1.2236 (1.0964) acc 65.6250 (72.9276) lr 5.7422e-04 eta 2:05:04 +epoch [34/50] batch [100/500] time 0.871 (0.892) data 0.000 (0.007) loss 1.4297 (1.0941) acc 75.0000 (73.1250) lr 5.7422e-04 eta 2:04:52 +epoch [34/50] batch [105/500] time 0.886 (0.891) data 0.000 (0.007) loss 0.8931 (1.0961) acc 71.8750 (72.9762) lr 5.7422e-04 eta 2:04:42 +epoch [34/50] batch [110/500] time 0.907 (0.891) data 0.000 (0.007) loss 0.5605 (1.0884) acc 96.8750 (73.1250) lr 5.7422e-04 eta 2:04:38 +epoch [34/50] batch [115/500] time 0.875 (0.891) data 0.000 (0.006) loss 1.2266 (1.0836) acc 68.7500 (73.0707) lr 5.7422e-04 eta 2:04:32 +epoch [34/50] batch [120/500] time 0.897 (0.891) data 0.000 (0.006) loss 1.3369 (1.0808) acc 62.5000 (73.0469) lr 5.7422e-04 eta 2:04:29 +epoch [34/50] batch [125/500] time 0.896 (0.891) data 0.000 (0.006) loss 1.5840 (1.0816) acc 65.6250 (73.1250) lr 5.7422e-04 eta 2:04:20 +epoch [34/50] batch [130/500] time 0.896 (0.891) data 0.000 (0.006) loss 0.4209 (1.0713) acc 84.3750 (73.3173) lr 5.7422e-04 eta 2:04:13 +epoch [34/50] batch [135/500] time 0.866 (0.890) data 0.000 (0.005) loss 0.9512 (1.0680) acc 71.8750 (73.3333) lr 5.7422e-04 eta 2:04:05 +epoch [34/50] batch [140/500] time 0.902 (0.890) data 0.000 (0.005) loss 0.6909 (1.0627) acc 81.2500 (73.3929) lr 5.7422e-04 eta 2:03:57 +epoch [34/50] batch [145/500] time 0.847 (0.889) data 0.000 (0.005) loss 0.7812 (1.0645) acc 87.5000 (73.4698) lr 5.7422e-04 eta 2:03:46 +epoch [34/50] batch [150/500] time 0.888 (0.888) data 0.000 (0.005) loss 1.2275 (1.0663) acc 65.6250 (73.5208) lr 5.7422e-04 eta 2:03:36 +epoch [34/50] batch [155/500] time 0.866 (0.888) data 0.000 (0.005) loss 0.8877 (1.0596) acc 68.7500 (73.6290) lr 5.7422e-04 eta 2:03:31 +epoch [34/50] batch [160/500] time 0.880 (0.888) data 0.000 (0.005) loss 1.2109 (1.0574) acc 62.5000 (73.6523) lr 5.7422e-04 eta 2:03:24 +epoch [34/50] batch [165/500] time 0.881 (0.888) data 0.000 (0.004) loss 0.6885 (1.0539) acc 81.2500 (73.7311) lr 5.7422e-04 eta 2:03:19 +epoch [34/50] batch [170/500] time 0.882 (0.888) data 0.000 (0.004) loss 0.7217 (1.0549) acc 65.6250 (73.6029) lr 5.7422e-04 eta 2:03:14 +epoch [34/50] batch [175/500] time 0.892 (0.888) data 0.000 (0.004) loss 1.0879 (1.0604) acc 71.8750 (73.3929) lr 5.7422e-04 eta 2:03:09 +epoch [34/50] batch [180/500] time 0.859 (0.887) data 0.000 (0.004) loss 0.4666 (1.0638) acc 87.5000 (73.3160) lr 5.7422e-04 eta 2:03:03 +epoch [34/50] batch [185/500] time 0.860 (0.887) data 0.000 (0.004) loss 1.2168 (1.0650) acc 68.7500 (73.2601) lr 5.7422e-04 eta 2:02:55 +epoch [34/50] batch [190/500] time 0.882 (0.887) data 0.000 (0.004) loss 1.0361 (1.0637) acc 71.8750 (73.2895) lr 5.7422e-04 eta 2:02:55 +epoch [34/50] batch [195/500] time 0.879 (0.887) data 0.000 (0.004) loss 1.2246 (1.0650) acc 62.5000 (73.2532) lr 5.7422e-04 eta 2:02:48 +epoch [34/50] batch [200/500] time 0.855 (0.887) data 0.000 (0.004) loss 0.5874 (1.0642) acc 84.3750 (73.2969) lr 5.7422e-04 eta 2:02:42 +epoch [34/50] batch [205/500] time 0.886 (0.887) data 0.000 (0.004) loss 1.0273 (1.0628) acc 81.2500 (73.3994) lr 5.7422e-04 eta 2:02:37 +epoch [34/50] batch [210/500] time 0.860 (0.887) data 0.000 (0.004) loss 0.8345 (1.0597) acc 71.8750 (73.3929) lr 5.7422e-04 eta 2:02:31 +epoch [34/50] batch [215/500] time 0.902 (0.887) data 0.000 (0.003) loss 1.0332 (1.0630) acc 71.8750 (73.2849) lr 5.7422e-04 eta 2:02:28 +epoch [34/50] batch [220/500] time 0.874 (0.887) data 0.000 (0.003) loss 0.9253 (1.0619) acc 78.1250 (73.2955) lr 5.7422e-04 eta 2:02:22 +epoch [34/50] batch [225/500] time 0.847 (0.887) data 0.000 (0.003) loss 1.2432 (1.0608) acc 75.0000 (73.3194) lr 5.7422e-04 eta 2:02:17 +epoch [34/50] batch [230/500] time 0.884 (0.886) data 0.000 (0.003) loss 1.3525 (1.0655) acc 81.2500 (73.3424) lr 5.7422e-04 eta 2:02:10 +epoch [34/50] batch [235/500] time 0.893 (0.887) data 0.000 (0.003) loss 1.3916 (1.0693) acc 65.6250 (73.2580) lr 5.7422e-04 eta 2:02:09 +epoch [34/50] batch [240/500] time 0.925 (0.887) data 0.000 (0.003) loss 1.6533 (1.0709) acc 68.7500 (73.2552) lr 5.7422e-04 eta 2:02:05 +epoch [34/50] batch [245/500] time 0.874 (0.886) data 0.000 (0.003) loss 1.0664 (1.0723) acc 81.2500 (73.3163) lr 5.7422e-04 eta 2:01:57 +epoch [34/50] batch [250/500] time 0.871 (0.886) data 0.000 (0.003) loss 1.1455 (1.0732) acc 75.0000 (73.3000) lr 5.7422e-04 eta 2:01:51 +epoch [34/50] batch [255/500] time 0.901 (0.886) data 0.000 (0.003) loss 0.5610 (1.0711) acc 87.5000 (73.3088) lr 5.7422e-04 eta 2:01:45 +epoch [34/50] batch [260/500] time 0.898 (0.886) data 0.000 (0.003) loss 1.6553 (1.0743) acc 62.5000 (73.2812) lr 5.7422e-04 eta 2:01:42 +epoch [34/50] batch [265/500] time 0.870 (0.886) data 0.000 (0.003) loss 0.9121 (1.0780) acc 75.0000 (73.2429) lr 5.7422e-04 eta 2:01:37 +epoch [34/50] batch [270/500] time 0.855 (0.886) data 0.000 (0.003) loss 0.6289 (1.0738) acc 84.3750 (73.3796) lr 5.7422e-04 eta 2:01:33 +epoch [34/50] batch [275/500] time 0.893 (0.886) data 0.000 (0.003) loss 1.0205 (1.0741) acc 71.8750 (73.3523) lr 5.7422e-04 eta 2:01:28 +epoch [34/50] batch [280/500] time 0.885 (0.886) data 0.000 (0.003) loss 0.7930 (1.0751) acc 81.2500 (73.3705) lr 5.7422e-04 eta 2:01:24 +epoch [34/50] batch [285/500] time 0.896 (0.886) data 0.000 (0.003) loss 1.0645 (1.0762) acc 68.7500 (73.3224) lr 5.7422e-04 eta 2:01:20 +epoch [34/50] batch [290/500] time 0.882 (0.886) data 0.000 (0.003) loss 0.7705 (1.0761) acc 71.8750 (73.3297) lr 5.7422e-04 eta 2:01:14 +epoch [34/50] batch [295/500] time 0.910 (0.886) data 0.000 (0.003) loss 1.0449 (1.0759) acc 75.0000 (73.3263) lr 5.7422e-04 eta 2:01:11 +epoch [34/50] batch [300/500] time 0.884 (0.886) data 0.000 (0.003) loss 1.2656 (1.0768) acc 65.6250 (73.3021) lr 5.7422e-04 eta 2:01:07 +epoch [34/50] batch [305/500] time 0.924 (0.887) data 0.000 (0.003) loss 1.0186 (1.0771) acc 62.5000 (73.2992) lr 5.7422e-04 eta 2:01:05 +epoch [34/50] batch [310/500] time 0.888 (0.887) data 0.000 (0.002) loss 1.0107 (1.0796) acc 71.8750 (73.2056) lr 5.7422e-04 eta 2:01:01 +epoch [34/50] batch [315/500] time 0.873 (0.886) data 0.000 (0.002) loss 1.2715 (1.0804) acc 68.7500 (73.2837) lr 5.7422e-04 eta 2:00:55 +epoch [34/50] batch [320/500] time 0.880 (0.886) data 0.000 (0.002) loss 0.8975 (1.0787) acc 81.2500 (73.2910) lr 5.7422e-04 eta 2:00:51 +epoch [34/50] batch [325/500] time 0.875 (0.887) data 0.000 (0.002) loss 1.3252 (1.0779) acc 65.6250 (73.3077) lr 5.7422e-04 eta 2:00:47 +epoch [34/50] batch [330/500] time 0.985 (0.887) data 0.000 (0.002) loss 0.8188 (1.0764) acc 78.1250 (73.3712) lr 5.7422e-04 eta 2:00:46 +epoch [34/50] batch [335/500] time 0.850 (0.887) data 0.000 (0.002) loss 1.4424 (1.0763) acc 65.6250 (73.3955) lr 5.7422e-04 eta 2:00:39 +epoch [34/50] batch [340/500] time 0.874 (0.886) data 0.000 (0.002) loss 0.8965 (1.0740) acc 71.8750 (73.4375) lr 5.7422e-04 eta 2:00:33 +epoch [34/50] batch [345/500] time 0.868 (0.886) data 0.000 (0.002) loss 1.0449 (1.0702) acc 78.1250 (73.5417) lr 5.7422e-04 eta 2:00:27 +epoch [34/50] batch [350/500] time 0.898 (0.886) data 0.000 (0.002) loss 1.3574 (1.0731) acc 62.5000 (73.4554) lr 5.7422e-04 eta 2:00:23 +epoch [34/50] batch [355/500] time 0.876 (0.886) data 0.000 (0.002) loss 1.2285 (1.0746) acc 62.5000 (73.3979) lr 5.7422e-04 eta 2:00:18 +epoch [34/50] batch [360/500] time 0.873 (0.886) data 0.000 (0.002) loss 0.9521 (1.0748) acc 78.1250 (73.4028) lr 5.7422e-04 eta 2:00:13 +epoch [34/50] batch [365/500] time 0.891 (0.886) data 0.000 (0.002) loss 0.7622 (1.0760) acc 75.0000 (73.3818) lr 5.7422e-04 eta 2:00:10 +epoch [34/50] batch [370/500] time 0.865 (0.886) data 0.000 (0.002) loss 1.0029 (1.0763) acc 71.8750 (73.3530) lr 5.7422e-04 eta 2:00:05 +epoch [34/50] batch [375/500] time 0.872 (0.887) data 0.000 (0.002) loss 0.6465 (1.0746) acc 78.1250 (73.3833) lr 5.7422e-04 eta 2:00:03 +epoch [34/50] batch [380/500] time 0.899 (0.887) data 0.000 (0.002) loss 0.9697 (1.0766) acc 65.6250 (73.3224) lr 5.7422e-04 eta 1:59:59 +epoch [34/50] batch [385/500] time 0.870 (0.886) data 0.000 (0.002) loss 1.0244 (1.0798) acc 71.8750 (73.2630) lr 5.7422e-04 eta 1:59:53 +epoch [34/50] batch [390/500] time 0.905 (0.886) data 0.000 (0.002) loss 0.6675 (1.0768) acc 78.1250 (73.3013) lr 5.7422e-04 eta 1:59:49 +epoch [34/50] batch [395/500] time 0.886 (0.887) data 0.000 (0.002) loss 0.9658 (1.0795) acc 68.7500 (73.2199) lr 5.7422e-04 eta 1:59:45 +epoch [34/50] batch [400/500] time 0.891 (0.886) data 0.000 (0.002) loss 1.2568 (1.0788) acc 78.1250 (73.2656) lr 5.7422e-04 eta 1:59:39 +epoch [34/50] batch [405/500] time 0.886 (0.886) data 0.000 (0.002) loss 1.3701 (1.0773) acc 68.7500 (73.3256) lr 5.7422e-04 eta 1:59:35 +epoch [34/50] batch [410/500] time 0.870 (0.886) data 0.000 (0.002) loss 0.9385 (1.0781) acc 75.0000 (73.2927) lr 5.7422e-04 eta 1:59:31 +epoch [34/50] batch [415/500] time 0.873 (0.886) data 0.000 (0.002) loss 1.0303 (1.0793) acc 71.8750 (73.3057) lr 5.7422e-04 eta 1:59:26 +epoch [34/50] batch [420/500] time 0.865 (0.886) data 0.000 (0.002) loss 1.0488 (1.0794) acc 65.6250 (73.2440) lr 5.7422e-04 eta 1:59:21 +epoch [34/50] batch [425/500] time 0.896 (0.886) data 0.000 (0.002) loss 1.2705 (1.0804) acc 71.8750 (73.2279) lr 5.7422e-04 eta 1:59:17 +epoch [34/50] batch [430/500] time 0.888 (0.886) data 0.000 (0.002) loss 0.9541 (1.0801) acc 71.8750 (73.2558) lr 5.7422e-04 eta 1:59:12 +epoch [34/50] batch [435/500] time 0.880 (0.886) data 0.000 (0.002) loss 1.1768 (1.0794) acc 78.1250 (73.2830) lr 5.7422e-04 eta 1:59:06 +epoch [34/50] batch [440/500] time 0.868 (0.886) data 0.000 (0.002) loss 1.5518 (1.0811) acc 59.3750 (73.2741) lr 5.7422e-04 eta 1:59:00 +epoch [34/50] batch [445/500] time 0.907 (0.886) data 0.000 (0.002) loss 0.8237 (1.0804) acc 68.7500 (73.2795) lr 5.7422e-04 eta 1:58:55 +epoch [34/50] batch [450/500] time 0.893 (0.886) data 0.000 (0.002) loss 1.0752 (1.0799) acc 78.1250 (73.2917) lr 5.7422e-04 eta 1:58:51 +epoch [34/50] batch [455/500] time 0.885 (0.886) data 0.000 (0.002) loss 1.1982 (1.0770) acc 75.0000 (73.3654) lr 5.7422e-04 eta 1:58:46 +epoch [34/50] batch [460/500] time 0.864 (0.886) data 0.000 (0.002) loss 1.0908 (1.0778) acc 75.0000 (73.3220) lr 5.7422e-04 eta 1:58:41 +epoch [34/50] batch [465/500] time 0.863 (0.886) data 0.000 (0.002) loss 1.0068 (1.0777) acc 78.1250 (73.3333) lr 5.7422e-04 eta 1:58:37 +epoch [34/50] batch [470/500] time 0.930 (0.886) data 0.000 (0.002) loss 0.7495 (1.0775) acc 81.2500 (73.3378) lr 5.7422e-04 eta 1:58:34 +epoch [34/50] batch [475/500] time 0.914 (0.886) data 0.000 (0.002) loss 1.2119 (1.0785) acc 78.1250 (73.3158) lr 5.7422e-04 eta 1:58:31 +epoch [34/50] batch [480/500] time 0.917 (0.886) data 0.000 (0.002) loss 0.8662 (1.0768) acc 84.3750 (73.3919) lr 5.7422e-04 eta 1:58:28 +epoch [34/50] batch [485/500] time 0.865 (0.886) data 0.000 (0.002) loss 1.0957 (1.0769) acc 68.7500 (73.4021) lr 5.7422e-04 eta 1:58:24 +epoch [34/50] batch [490/500] time 0.918 (0.886) data 0.000 (0.002) loss 0.5083 (1.0751) acc 81.2500 (73.4566) lr 5.7422e-04 eta 1:58:19 +epoch [34/50] batch [495/500] time 0.898 (0.886) data 0.000 (0.002) loss 0.7827 (1.0754) acc 81.2500 (73.4470) lr 5.7422e-04 eta 1:58:15 +epoch [34/50] batch [500/500] time 0.912 (0.886) data 0.000 (0.002) loss 1.5430 (1.0748) acc 62.5000 (73.4437) lr 5.1825e-04 eta 1:58:11 +epoch [35/50] batch [5/500] time 0.887 (1.031) data 0.000 (0.141) loss 0.6831 (1.1127) acc 78.1250 (73.1250) lr 5.1825e-04 eta 2:17:20 +epoch [35/50] batch [10/500] time 0.902 (0.961) data 0.000 (0.071) loss 1.4395 (1.0976) acc 65.6250 (71.8750) lr 5.1825e-04 eta 2:07:56 +epoch [35/50] batch [15/500] time 0.889 (0.933) data 0.000 (0.047) loss 1.2471 (1.0714) acc 71.8750 (72.9167) lr 5.1825e-04 eta 2:04:07 +epoch [35/50] batch [20/500] time 0.925 (0.923) data 0.000 (0.035) loss 1.1270 (1.0735) acc 78.1250 (72.9688) lr 5.1825e-04 eta 2:02:46 +epoch [35/50] batch [25/500] time 0.859 (0.916) data 0.000 (0.028) loss 0.7520 (1.0322) acc 81.2500 (73.6250) lr 5.1825e-04 eta 2:01:43 +epoch [35/50] batch [30/500] time 0.860 (0.908) data 0.000 (0.024) loss 1.0049 (1.0326) acc 75.0000 (74.0625) lr 5.1825e-04 eta 2:00:39 +epoch [35/50] batch [35/500] time 0.882 (0.905) data 0.000 (0.020) loss 1.9756 (1.0393) acc 56.2500 (74.0179) lr 5.1825e-04 eta 2:00:04 +epoch [35/50] batch [40/500] time 0.880 (0.903) data 0.000 (0.018) loss 0.7031 (1.0059) acc 78.1250 (74.1406) lr 5.1825e-04 eta 1:59:44 +epoch [35/50] batch [45/500] time 0.897 (0.902) data 0.000 (0.016) loss 1.0645 (0.9937) acc 65.6250 (74.0278) lr 5.1825e-04 eta 1:59:34 +epoch [35/50] batch [50/500] time 0.900 (0.902) data 0.000 (0.014) loss 0.9023 (0.9869) acc 81.2500 (74.4375) lr 5.1825e-04 eta 1:59:30 +epoch [35/50] batch [55/500] time 0.891 (0.901) data 0.000 (0.013) loss 1.7881 (1.0130) acc 59.3750 (74.0341) lr 5.1825e-04 eta 1:59:15 +epoch [35/50] batch [60/500] time 0.897 (0.900) data 0.000 (0.012) loss 0.8301 (1.0086) acc 81.2500 (73.8021) lr 5.1825e-04 eta 1:59:06 +epoch [35/50] batch [65/500] time 0.881 (0.899) data 0.000 (0.011) loss 0.7476 (0.9996) acc 84.3750 (73.9423) lr 5.1825e-04 eta 1:58:50 +epoch [35/50] batch [70/500] time 0.867 (0.900) data 0.000 (0.010) loss 0.6294 (1.0014) acc 78.1250 (74.0179) lr 5.1825e-04 eta 1:58:54 +epoch [35/50] batch [75/500] time 0.858 (0.898) data 0.000 (0.010) loss 1.1357 (1.0092) acc 71.8750 (74.0417) lr 5.1825e-04 eta 1:58:38 +epoch [35/50] batch [80/500] time 0.857 (0.897) data 0.000 (0.009) loss 0.9023 (1.0070) acc 75.0000 (74.1406) lr 5.1825e-04 eta 1:58:23 +epoch [35/50] batch [85/500] time 0.888 (0.896) data 0.000 (0.009) loss 0.9883 (1.0096) acc 81.2500 (74.0441) lr 5.1825e-04 eta 1:58:15 +epoch [35/50] batch [90/500] time 0.873 (0.895) data 0.000 (0.008) loss 1.1738 (1.0114) acc 81.2500 (74.2361) lr 5.1825e-04 eta 1:57:59 +epoch [35/50] batch [95/500] time 0.909 (0.895) data 0.000 (0.008) loss 0.7510 (1.0151) acc 87.5000 (74.2105) lr 5.1825e-04 eta 1:57:56 +epoch [35/50] batch [100/500] time 0.890 (0.894) data 0.000 (0.007) loss 0.7690 (1.0156) acc 68.7500 (74.0938) lr 5.1825e-04 eta 1:57:45 +epoch [35/50] batch [105/500] time 0.905 (0.895) data 0.000 (0.007) loss 1.0312 (1.0173) acc 68.7500 (74.0179) lr 5.1825e-04 eta 1:57:42 +epoch [35/50] batch [110/500] time 0.889 (0.894) data 0.000 (0.007) loss 1.1914 (1.0193) acc 75.0000 (73.8920) lr 5.1825e-04 eta 1:57:35 +epoch [35/50] batch [115/500] time 0.866 (0.893) data 0.000 (0.006) loss 0.6230 (1.0313) acc 81.2500 (73.7500) lr 5.1825e-04 eta 1:57:24 +epoch [35/50] batch [120/500] time 0.888 (0.893) data 0.000 (0.006) loss 1.5625 (1.0383) acc 68.7500 (73.6719) lr 5.1825e-04 eta 1:57:16 +epoch [35/50] batch [125/500] time 0.892 (0.892) data 0.000 (0.006) loss 1.0127 (1.0308) acc 65.6250 (73.8000) lr 5.1825e-04 eta 1:57:08 +epoch [35/50] batch [130/500] time 0.898 (0.892) data 0.000 (0.006) loss 1.3438 (1.0295) acc 75.0000 (73.9423) lr 5.1825e-04 eta 1:57:01 +epoch [35/50] batch [135/500] time 0.913 (0.892) data 0.000 (0.005) loss 1.0273 (1.0295) acc 78.1250 (73.9352) lr 5.1825e-04 eta 1:56:56 +epoch [35/50] batch [140/500] time 0.900 (0.892) data 0.000 (0.005) loss 1.4775 (1.0290) acc 65.6250 (73.8839) lr 5.1825e-04 eta 1:56:50 +epoch [35/50] batch [145/500] time 0.873 (0.892) data 0.000 (0.005) loss 1.1309 (1.0358) acc 71.8750 (73.7500) lr 5.1825e-04 eta 1:56:45 +epoch [35/50] batch [150/500] time 0.904 (0.892) data 0.000 (0.005) loss 0.9150 (1.0361) acc 78.1250 (73.8750) lr 5.1825e-04 eta 1:56:42 +epoch [35/50] batch [155/500] time 0.883 (0.892) data 0.000 (0.005) loss 1.1982 (1.0307) acc 68.7500 (74.0524) lr 5.1825e-04 eta 1:56:35 +epoch [35/50] batch [160/500] time 0.885 (0.892) data 0.000 (0.005) loss 0.9634 (1.0300) acc 75.0000 (74.1602) lr 5.1825e-04 eta 1:56:30 +epoch [35/50] batch [165/500] time 0.912 (0.892) data 0.000 (0.004) loss 1.9102 (1.0351) acc 56.2500 (74.0152) lr 5.1825e-04 eta 1:56:32 +epoch [35/50] batch [170/500] time 0.892 (0.892) data 0.000 (0.004) loss 1.7969 (1.0402) acc 53.1250 (73.9154) lr 5.1825e-04 eta 1:56:25 +epoch [35/50] batch [175/500] time 0.919 (0.892) data 0.000 (0.004) loss 0.9180 (1.0435) acc 71.8750 (73.6786) lr 5.1825e-04 eta 1:56:21 +epoch [35/50] batch [180/500] time 0.887 (0.892) data 0.000 (0.004) loss 1.4004 (1.0527) acc 75.0000 (73.6111) lr 5.1825e-04 eta 1:56:14 +epoch [35/50] batch [185/500] time 0.893 (0.892) data 0.000 (0.004) loss 1.1377 (1.0584) acc 65.6250 (73.5304) lr 5.1825e-04 eta 1:56:07 +epoch [35/50] batch [190/500] time 0.897 (0.891) data 0.000 (0.004) loss 0.5171 (1.0569) acc 81.2500 (73.4868) lr 5.1825e-04 eta 1:56:01 +epoch [35/50] batch [195/500] time 0.897 (0.891) data 0.000 (0.004) loss 0.7891 (1.0572) acc 71.8750 (73.4776) lr 5.1825e-04 eta 1:55:56 +epoch [35/50] batch [200/500] time 0.890 (0.891) data 0.000 (0.004) loss 0.6289 (1.0547) acc 84.3750 (73.5625) lr 5.1825e-04 eta 1:55:51 +epoch [35/50] batch [205/500] time 0.884 (0.891) data 0.000 (0.004) loss 0.9604 (1.0533) acc 78.1250 (73.5976) lr 5.1825e-04 eta 1:55:45 +epoch [35/50] batch [210/500] time 0.871 (0.891) data 0.000 (0.004) loss 0.8535 (1.0549) acc 78.1250 (73.6161) lr 5.1825e-04 eta 1:55:41 +epoch [35/50] batch [215/500] time 0.909 (0.891) data 0.000 (0.004) loss 0.6396 (1.0520) acc 90.6250 (73.7064) lr 5.1825e-04 eta 1:55:34 +epoch [35/50] batch [220/500] time 0.878 (0.891) data 0.000 (0.003) loss 1.4365 (1.0529) acc 71.8750 (73.6364) lr 5.1825e-04 eta 1:55:30 +epoch [35/50] batch [225/500] time 0.875 (0.890) data 0.000 (0.003) loss 0.9858 (1.0554) acc 71.8750 (73.5139) lr 5.1825e-04 eta 1:55:23 +epoch [35/50] batch [230/500] time 0.909 (0.890) data 0.000 (0.003) loss 1.3115 (1.0559) acc 68.7500 (73.4783) lr 5.1825e-04 eta 1:55:17 +epoch [35/50] batch [235/500] time 0.909 (0.890) data 0.000 (0.003) loss 0.8931 (1.0570) acc 65.6250 (73.4707) lr 5.1825e-04 eta 1:55:14 +epoch [35/50] batch [240/500] time 0.886 (0.890) data 0.000 (0.003) loss 0.9897 (1.0597) acc 75.0000 (73.4115) lr 5.1825e-04 eta 1:55:08 +epoch [35/50] batch [245/500] time 0.864 (0.890) data 0.000 (0.003) loss 1.4297 (1.0593) acc 71.8750 (73.4566) lr 5.1825e-04 eta 1:55:02 +epoch [35/50] batch [250/500] time 0.875 (0.890) data 0.000 (0.003) loss 1.9570 (1.0625) acc 59.3750 (73.4375) lr 5.1825e-04 eta 1:54:56 +epoch [35/50] batch [255/500] time 0.884 (0.889) data 0.000 (0.003) loss 0.9517 (1.0648) acc 81.2500 (73.3946) lr 5.1825e-04 eta 1:54:48 +epoch [35/50] batch [260/500] time 0.863 (0.889) data 0.000 (0.003) loss 1.5781 (1.0715) acc 68.7500 (73.3293) lr 5.1825e-04 eta 1:54:42 +epoch [35/50] batch [265/500] time 0.873 (0.889) data 0.000 (0.003) loss 1.4473 (1.0731) acc 71.8750 (73.3491) lr 5.1825e-04 eta 1:54:36 +epoch [35/50] batch [270/500] time 0.879 (0.889) data 0.000 (0.003) loss 0.6934 (1.0680) acc 81.2500 (73.4606) lr 5.1825e-04 eta 1:54:31 +epoch [35/50] batch [275/500] time 0.909 (0.889) data 0.000 (0.003) loss 1.5283 (1.0675) acc 71.8750 (73.5000) lr 5.1825e-04 eta 1:54:27 +epoch [35/50] batch [280/500] time 0.876 (0.889) data 0.000 (0.003) loss 1.4619 (1.0684) acc 62.5000 (73.3705) lr 5.1825e-04 eta 1:54:24 +epoch [35/50] batch [285/500] time 0.880 (0.889) data 0.000 (0.003) loss 0.4927 (1.0666) acc 87.5000 (73.3443) lr 5.1825e-04 eta 1:54:17 +epoch [35/50] batch [290/500] time 0.900 (0.889) data 0.000 (0.003) loss 1.3184 (1.0675) acc 68.7500 (73.3728) lr 5.1825e-04 eta 1:54:13 +epoch [35/50] batch [295/500] time 0.848 (0.888) data 0.000 (0.003) loss 1.3828 (1.0709) acc 62.5000 (73.3051) lr 5.1825e-04 eta 1:54:05 +epoch [35/50] batch [300/500] time 0.869 (0.888) data 0.000 (0.003) loss 0.5117 (1.0723) acc 75.0000 (73.2500) lr 5.1825e-04 eta 1:53:59 +epoch [35/50] batch [305/500] time 0.883 (0.888) data 0.000 (0.003) loss 1.0498 (1.0745) acc 75.0000 (73.1762) lr 5.1825e-04 eta 1:53:54 +epoch [35/50] batch [310/500] time 0.879 (0.889) data 0.000 (0.003) loss 0.9922 (1.0726) acc 71.8750 (73.2258) lr 5.1825e-04 eta 1:53:53 +epoch [35/50] batch [315/500] time 0.909 (0.889) data 0.000 (0.002) loss 1.0146 (1.0719) acc 65.6250 (73.2440) lr 5.1825e-04 eta 1:53:50 +epoch [35/50] batch [320/500] time 0.891 (0.889) data 0.000 (0.002) loss 0.5391 (1.0680) acc 87.5000 (73.2910) lr 5.1825e-04 eta 1:53:46 +epoch [35/50] batch [325/500] time 0.895 (0.889) data 0.000 (0.002) loss 1.2910 (1.0699) acc 75.0000 (73.3077) lr 5.1825e-04 eta 1:53:41 +epoch [35/50] batch [330/500] time 0.904 (0.889) data 0.000 (0.002) loss 0.6455 (1.0685) acc 81.2500 (73.3049) lr 5.1825e-04 eta 1:53:37 +epoch [35/50] batch [335/500] time 0.906 (0.889) data 0.000 (0.002) loss 0.8457 (1.0672) acc 75.0000 (73.3022) lr 5.1825e-04 eta 1:53:34 +epoch [35/50] batch [340/500] time 0.867 (0.889) data 0.000 (0.002) loss 0.8853 (1.0653) acc 84.3750 (73.3548) lr 5.1825e-04 eta 1:53:27 +epoch [35/50] batch [345/500] time 0.863 (0.888) data 0.000 (0.002) loss 1.0830 (1.0621) acc 71.8750 (73.4058) lr 5.1825e-04 eta 1:53:21 +epoch [35/50] batch [350/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.2607 (1.0619) acc 71.8750 (73.4286) lr 5.1825e-04 eta 1:53:17 +epoch [35/50] batch [355/500] time 0.898 (0.889) data 0.000 (0.002) loss 0.9634 (1.0627) acc 71.8750 (73.4155) lr 5.1825e-04 eta 1:53:13 +epoch [35/50] batch [360/500] time 0.898 (0.889) data 0.000 (0.002) loss 1.0771 (1.0645) acc 78.1250 (73.3507) lr 5.1825e-04 eta 1:53:09 +epoch [35/50] batch [365/500] time 0.874 (0.888) data 0.000 (0.002) loss 1.2412 (1.0624) acc 65.6250 (73.4075) lr 5.1825e-04 eta 1:53:03 +epoch [35/50] batch [370/500] time 0.863 (0.888) data 0.000 (0.002) loss 1.1426 (1.0629) acc 71.8750 (73.3868) lr 5.1825e-04 eta 1:52:58 +epoch [35/50] batch [375/500] time 0.857 (0.888) data 0.000 (0.002) loss 1.9062 (1.0628) acc 62.5000 (73.4333) lr 5.1825e-04 eta 1:52:52 +epoch [35/50] batch [380/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.9321 (1.0630) acc 68.7500 (73.4375) lr 5.1825e-04 eta 1:52:48 +epoch [35/50] batch [385/500] time 0.875 (0.888) data 0.000 (0.002) loss 0.9272 (1.0639) acc 78.1250 (73.4497) lr 5.1825e-04 eta 1:52:45 +epoch [35/50] batch [390/500] time 0.908 (0.888) data 0.000 (0.002) loss 1.4893 (1.0650) acc 65.6250 (73.4856) lr 5.1825e-04 eta 1:52:41 +epoch [35/50] batch [395/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.3047 (1.0653) acc 75.0000 (73.5206) lr 5.1825e-04 eta 1:52:36 +epoch [35/50] batch [400/500] time 0.891 (0.888) data 0.000 (0.002) loss 2.0957 (1.0661) acc 56.2500 (73.4922) lr 5.1825e-04 eta 1:52:31 +epoch [35/50] batch [405/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.2432 (1.0640) acc 71.8750 (73.5494) lr 5.1825e-04 eta 1:52:27 +epoch [35/50] batch [410/500] time 0.882 (0.888) data 0.000 (0.002) loss 0.8149 (1.0594) acc 81.2500 (73.6585) lr 5.1825e-04 eta 1:52:21 +epoch [35/50] batch [415/500] time 0.847 (0.888) data 0.000 (0.002) loss 1.1162 (1.0588) acc 78.1250 (73.6446) lr 5.1825e-04 eta 1:52:15 +epoch [35/50] batch [420/500] time 0.870 (0.888) data 0.000 (0.002) loss 1.9062 (1.0609) acc 59.3750 (73.5491) lr 5.1825e-04 eta 1:52:11 +epoch [35/50] batch [425/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.3242 (1.0617) acc 78.1250 (73.5956) lr 5.1825e-04 eta 1:52:06 +epoch [35/50] batch [430/500] time 0.912 (0.888) data 0.000 (0.002) loss 0.9458 (1.0632) acc 68.7500 (73.5465) lr 5.1825e-04 eta 1:52:01 +epoch [35/50] batch [435/500] time 0.897 (0.888) data 0.000 (0.002) loss 1.3232 (1.0641) acc 65.6250 (73.5560) lr 5.1825e-04 eta 1:51:57 +epoch [35/50] batch [440/500] time 0.873 (0.888) data 0.000 (0.002) loss 0.9199 (1.0658) acc 75.0000 (73.5298) lr 5.1825e-04 eta 1:51:52 +epoch [35/50] batch [445/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.5127 (1.0664) acc 59.3750 (73.5323) lr 5.1825e-04 eta 1:51:46 +epoch [35/50] batch [450/500] time 0.970 (0.888) data 0.000 (0.002) loss 0.6230 (1.0658) acc 84.3750 (73.5208) lr 5.1825e-04 eta 1:51:42 +epoch [35/50] batch [455/500] time 0.921 (0.888) data 0.000 (0.002) loss 1.2412 (1.0659) acc 75.0000 (73.5096) lr 5.1825e-04 eta 1:51:39 +epoch [35/50] batch [460/500] time 0.866 (0.888) data 0.000 (0.002) loss 1.0479 (1.0680) acc 65.6250 (73.4239) lr 5.1825e-04 eta 1:51:33 +epoch [35/50] batch [465/500] time 0.889 (0.888) data 0.000 (0.002) loss 0.2710 (1.0679) acc 90.6250 (73.4476) lr 5.1825e-04 eta 1:51:28 +epoch [35/50] batch [470/500] time 0.880 (0.888) data 0.000 (0.002) loss 1.4082 (1.0711) acc 68.7500 (73.3710) lr 5.1825e-04 eta 1:51:23 +epoch [35/50] batch [475/500] time 0.891 (0.888) data 0.000 (0.002) loss 0.9141 (1.0717) acc 81.2500 (73.3816) lr 5.1825e-04 eta 1:51:20 +epoch [35/50] batch [480/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.6006 (1.0731) acc 71.8750 (73.3659) lr 5.1825e-04 eta 1:51:15 +epoch [35/50] batch [485/500] time 0.894 (0.888) data 0.000 (0.002) loss 0.7310 (1.0731) acc 75.0000 (73.3827) lr 5.1825e-04 eta 1:51:11 +epoch [35/50] batch [490/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.2314 (1.0764) acc 71.8750 (73.3163) lr 5.1825e-04 eta 1:51:06 +epoch [35/50] batch [495/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.2402 (1.0764) acc 68.7500 (73.3018) lr 5.1825e-04 eta 1:51:02 +epoch [35/50] batch [500/500] time 0.864 (0.888) data 0.000 (0.002) loss 0.6885 (1.0769) acc 81.2500 (73.2750) lr 4.6417e-04 eta 1:50:56 +epoch [36/50] batch [5/500] time 0.862 (1.008) data 0.000 (0.132) loss 1.5439 (1.1895) acc 68.7500 (71.2500) lr 4.6417e-04 eta 2:05:55 +epoch [36/50] batch [10/500] time 0.909 (0.957) data 0.000 (0.066) loss 1.1582 (1.2669) acc 68.7500 (69.6875) lr 4.6417e-04 eta 1:59:27 +epoch [36/50] batch [15/500] time 0.870 (0.930) data 0.000 (0.044) loss 1.1650 (1.2503) acc 68.7500 (70.8333) lr 4.6417e-04 eta 1:56:00 +epoch [36/50] batch [20/500] time 0.867 (0.915) data 0.000 (0.033) loss 0.8804 (1.2338) acc 81.2500 (71.7188) lr 4.6417e-04 eta 1:54:02 +epoch [36/50] batch [25/500] time 0.874 (0.909) data 0.000 (0.027) loss 1.5420 (1.1828) acc 68.7500 (72.6250) lr 4.6417e-04 eta 1:53:16 +epoch [36/50] batch [30/500] time 0.935 (0.907) data 0.000 (0.022) loss 0.9790 (1.1647) acc 71.8750 (72.9167) lr 4.6417e-04 eta 1:52:56 +epoch [36/50] batch [35/500] time 0.893 (0.904) data 0.000 (0.019) loss 0.9893 (1.1475) acc 65.6250 (72.8571) lr 4.6417e-04 eta 1:52:25 +epoch [36/50] batch [40/500] time 0.896 (0.905) data 0.000 (0.017) loss 0.7637 (1.1114) acc 78.1250 (73.3594) lr 4.6417e-04 eta 1:52:27 +epoch [36/50] batch [45/500] time 0.905 (0.904) data 0.000 (0.015) loss 0.8320 (1.0916) acc 81.2500 (73.6806) lr 4.6417e-04 eta 1:52:21 +epoch [36/50] batch [50/500] time 0.875 (0.901) data 0.000 (0.013) loss 0.9590 (1.0996) acc 68.7500 (73.2500) lr 4.6417e-04 eta 1:51:54 +epoch [36/50] batch [55/500] time 0.882 (0.899) data 0.000 (0.012) loss 1.1953 (1.0974) acc 71.8750 (73.1818) lr 4.6417e-04 eta 1:51:32 +epoch [36/50] batch [60/500] time 0.867 (0.897) data 0.000 (0.011) loss 0.9961 (1.0863) acc 78.1250 (73.5938) lr 4.6417e-04 eta 1:51:16 +epoch [36/50] batch [65/500] time 0.886 (0.896) data 0.000 (0.010) loss 1.0527 (1.0908) acc 71.8750 (73.5577) lr 4.6417e-04 eta 1:51:00 +epoch [36/50] batch [70/500] time 0.877 (0.895) data 0.000 (0.010) loss 1.6240 (1.1023) acc 62.5000 (73.2589) lr 4.6417e-04 eta 1:50:50 +epoch [36/50] batch [75/500] time 0.904 (0.894) data 0.000 (0.009) loss 1.3389 (1.1183) acc 56.2500 (72.6667) lr 4.6417e-04 eta 1:50:40 +epoch [36/50] batch [80/500] time 0.873 (0.894) data 0.000 (0.008) loss 1.2988 (1.1224) acc 81.2500 (72.7734) lr 4.6417e-04 eta 1:50:33 +epoch [36/50] batch [85/500] time 0.885 (0.893) data 0.000 (0.008) loss 1.1016 (1.1040) acc 78.1250 (73.2721) lr 4.6417e-04 eta 1:50:22 +epoch [36/50] batch [90/500] time 0.902 (0.892) data 0.000 (0.008) loss 1.5889 (1.1020) acc 65.6250 (73.4028) lr 4.6417e-04 eta 1:50:11 +epoch [36/50] batch [95/500] time 0.880 (0.892) data 0.000 (0.007) loss 0.9497 (1.0947) acc 75.0000 (73.5197) lr 4.6417e-04 eta 1:50:04 +epoch [36/50] batch [100/500] time 0.876 (0.892) data 0.000 (0.007) loss 0.9707 (1.1035) acc 78.1250 (73.2188) lr 4.6417e-04 eta 1:49:57 +epoch [36/50] batch [105/500] time 0.862 (0.891) data 0.000 (0.006) loss 0.5703 (1.0975) acc 87.5000 (73.3333) lr 4.6417e-04 eta 1:49:51 +epoch [36/50] batch [110/500] time 0.859 (0.891) data 0.000 (0.006) loss 1.0508 (1.1067) acc 71.8750 (73.1250) lr 4.6417e-04 eta 1:49:42 +epoch [36/50] batch [115/500] time 0.884 (0.890) data 0.000 (0.006) loss 0.8657 (1.1059) acc 65.6250 (72.9348) lr 4.6417e-04 eta 1:49:33 +epoch [36/50] batch [120/500] time 0.890 (0.890) data 0.000 (0.006) loss 0.8750 (1.1019) acc 87.5000 (73.1510) lr 4.6417e-04 eta 1:49:26 +epoch [36/50] batch [125/500] time 0.871 (0.890) data 0.000 (0.005) loss 0.8701 (1.1066) acc 62.5000 (73.0500) lr 4.6417e-04 eta 1:49:20 +epoch [36/50] batch [130/500] time 0.888 (0.890) data 0.000 (0.005) loss 1.0166 (1.1016) acc 68.7500 (73.0288) lr 4.6417e-04 eta 1:49:15 +epoch [36/50] batch [135/500] time 0.875 (0.889) data 0.000 (0.005) loss 0.4004 (1.0892) acc 87.5000 (73.2176) lr 4.6417e-04 eta 1:49:08 +epoch [36/50] batch [140/500] time 0.883 (0.890) data 0.000 (0.005) loss 0.9604 (1.0882) acc 75.0000 (73.2812) lr 4.6417e-04 eta 1:49:10 +epoch [36/50] batch [145/500] time 0.877 (0.889) data 0.000 (0.005) loss 1.0742 (1.0833) acc 71.8750 (73.2974) lr 4.6417e-04 eta 1:49:00 +epoch [36/50] batch [150/500] time 0.865 (0.889) data 0.000 (0.005) loss 1.3682 (1.0780) acc 84.3750 (73.5208) lr 4.6417e-04 eta 1:48:55 +epoch [36/50] batch [155/500] time 0.902 (0.889) data 0.000 (0.004) loss 1.3086 (1.0761) acc 71.8750 (73.5685) lr 4.6417e-04 eta 1:48:51 +epoch [36/50] batch [160/500] time 0.890 (0.889) data 0.001 (0.004) loss 0.6748 (1.0753) acc 78.1250 (73.5156) lr 4.6417e-04 eta 1:48:42 +epoch [36/50] batch [165/500] time 0.855 (0.888) data 0.000 (0.004) loss 1.0371 (1.0754) acc 71.8750 (73.4659) lr 4.6417e-04 eta 1:48:35 +epoch [36/50] batch [170/500] time 0.884 (0.888) data 0.000 (0.004) loss 0.9761 (1.0716) acc 71.8750 (73.4926) lr 4.6417e-04 eta 1:48:31 +epoch [36/50] batch [175/500] time 0.885 (0.888) data 0.000 (0.004) loss 0.6035 (1.0687) acc 84.3750 (73.6429) lr 4.6417e-04 eta 1:48:27 +epoch [36/50] batch [180/500] time 0.880 (0.888) data 0.000 (0.004) loss 1.1484 (1.0667) acc 71.8750 (73.6632) lr 4.6417e-04 eta 1:48:20 +epoch [36/50] batch [185/500] time 0.874 (0.888) data 0.000 (0.004) loss 0.9385 (1.0653) acc 78.1250 (73.7162) lr 4.6417e-04 eta 1:48:18 +epoch [36/50] batch [190/500] time 0.883 (0.888) data 0.000 (0.004) loss 1.0176 (1.0652) acc 81.2500 (73.7500) lr 4.6417e-04 eta 1:48:13 +epoch [36/50] batch [195/500] time 0.860 (0.888) data 0.000 (0.004) loss 0.7876 (1.0601) acc 68.7500 (73.7340) lr 4.6417e-04 eta 1:48:04 +epoch [36/50] batch [200/500] time 0.903 (0.888) data 0.000 (0.004) loss 0.8506 (1.0586) acc 75.0000 (73.7812) lr 4.6417e-04 eta 1:47:59 +epoch [36/50] batch [205/500] time 0.888 (0.887) data 0.000 (0.003) loss 1.0996 (1.0614) acc 68.7500 (73.7043) lr 4.6417e-04 eta 1:47:54 +epoch [36/50] batch [210/500] time 0.890 (0.887) data 0.000 (0.003) loss 1.1631 (1.0575) acc 65.6250 (73.7798) lr 4.6417e-04 eta 1:47:48 +epoch [36/50] batch [215/500] time 0.852 (0.887) data 0.000 (0.003) loss 1.0537 (1.0636) acc 71.8750 (73.7209) lr 4.6417e-04 eta 1:47:42 +epoch [36/50] batch [220/500] time 0.880 (0.887) data 0.000 (0.003) loss 0.8516 (1.0644) acc 84.3750 (73.7074) lr 4.6417e-04 eta 1:47:36 +epoch [36/50] batch [225/500] time 0.880 (0.886) data 0.000 (0.003) loss 0.8936 (1.0612) acc 78.1250 (73.8333) lr 4.6417e-04 eta 1:47:29 +epoch [36/50] batch [230/500] time 0.883 (0.886) data 0.000 (0.003) loss 0.9253 (1.0604) acc 78.1250 (73.8587) lr 4.6417e-04 eta 1:47:22 +epoch [36/50] batch [235/500] time 0.877 (0.886) data 0.000 (0.003) loss 0.7598 (1.0570) acc 81.2500 (73.9362) lr 4.6417e-04 eta 1:47:18 +epoch [36/50] batch [240/500] time 0.894 (0.886) data 0.000 (0.003) loss 1.4736 (1.0559) acc 56.2500 (73.9323) lr 4.6417e-04 eta 1:47:14 +epoch [36/50] batch [245/500] time 0.883 (0.886) data 0.000 (0.003) loss 0.9976 (1.0580) acc 71.8750 (73.8648) lr 4.6417e-04 eta 1:47:09 +epoch [36/50] batch [250/500] time 0.877 (0.886) data 0.000 (0.003) loss 1.1709 (1.0592) acc 56.2500 (73.7000) lr 4.6417e-04 eta 1:47:02 +epoch [36/50] batch [255/500] time 0.894 (0.886) data 0.000 (0.003) loss 0.6284 (1.0626) acc 84.3750 (73.7255) lr 4.6417e-04 eta 1:46:58 +epoch [36/50] batch [260/500] time 0.904 (0.886) data 0.000 (0.003) loss 0.7158 (1.0616) acc 78.1250 (73.7139) lr 4.6417e-04 eta 1:46:55 +epoch [36/50] batch [265/500] time 0.878 (0.886) data 0.000 (0.003) loss 0.8936 (1.0634) acc 78.1250 (73.6792) lr 4.6417e-04 eta 1:46:49 +epoch [36/50] batch [270/500] time 0.902 (0.886) data 0.000 (0.003) loss 0.9424 (1.0612) acc 75.0000 (73.7500) lr 4.6417e-04 eta 1:46:44 +epoch [36/50] batch [275/500] time 0.879 (0.886) data 0.000 (0.003) loss 1.0938 (1.0568) acc 75.0000 (73.8523) lr 4.6417e-04 eta 1:46:39 +epoch [36/50] batch [280/500] time 1.002 (0.886) data 0.000 (0.003) loss 1.3535 (1.0585) acc 71.8750 (73.8839) lr 4.6417e-04 eta 1:46:35 +epoch [36/50] batch [285/500] time 0.921 (0.886) data 0.000 (0.003) loss 0.6934 (1.0597) acc 78.1250 (73.8268) lr 4.6417e-04 eta 1:46:31 +epoch [36/50] batch [290/500] time 0.927 (0.886) data 0.000 (0.003) loss 1.7471 (1.0600) acc 62.5000 (73.7823) lr 4.6417e-04 eta 1:46:27 +epoch [36/50] batch [295/500] time 0.878 (0.886) data 0.000 (0.002) loss 1.1270 (1.0653) acc 68.7500 (73.6653) lr 4.6417e-04 eta 1:46:23 +epoch [36/50] batch [300/500] time 0.896 (0.886) data 0.000 (0.002) loss 0.9136 (1.0676) acc 71.8750 (73.5729) lr 4.6417e-04 eta 1:46:19 +epoch [36/50] batch [305/500] time 0.862 (0.886) data 0.000 (0.002) loss 0.7148 (1.0697) acc 81.2500 (73.6168) lr 4.6417e-04 eta 1:46:13 +epoch [36/50] batch [310/500] time 0.903 (0.886) data 0.000 (0.002) loss 1.3613 (1.0691) acc 65.6250 (73.6089) lr 4.6417e-04 eta 1:46:08 +epoch [36/50] batch [315/500] time 0.882 (0.886) data 0.000 (0.002) loss 1.1680 (1.0696) acc 62.5000 (73.5516) lr 4.6417e-04 eta 1:46:03 +epoch [36/50] batch [320/500] time 0.890 (0.886) data 0.000 (0.002) loss 0.8691 (1.0703) acc 75.0000 (73.5254) lr 4.6417e-04 eta 1:45:58 +epoch [36/50] batch [325/500] time 0.900 (0.886) data 0.000 (0.002) loss 1.4492 (1.0749) acc 68.7500 (73.4038) lr 4.6417e-04 eta 1:45:56 +epoch [36/50] batch [330/500] time 0.865 (0.886) data 0.000 (0.002) loss 1.1973 (1.0789) acc 75.0000 (73.2955) lr 4.6417e-04 eta 1:45:52 +epoch [36/50] batch [335/500] time 0.866 (0.886) data 0.000 (0.002) loss 1.0400 (1.0806) acc 68.7500 (73.2649) lr 4.6417e-04 eta 1:45:46 +epoch [36/50] batch [340/500] time 0.885 (0.886) data 0.000 (0.002) loss 1.2178 (1.0829) acc 65.6250 (73.2353) lr 4.6417e-04 eta 1:45:41 +epoch [36/50] batch [345/500] time 0.886 (0.886) data 0.000 (0.002) loss 1.2656 (1.0851) acc 68.7500 (73.1612) lr 4.6417e-04 eta 1:45:36 +epoch [36/50] batch [350/500] time 0.859 (0.885) data 0.000 (0.002) loss 1.5781 (1.0870) acc 68.7500 (73.1429) lr 4.6417e-04 eta 1:45:30 +epoch [36/50] batch [355/500] time 0.899 (0.885) data 0.000 (0.002) loss 0.9028 (1.0876) acc 78.1250 (73.1250) lr 4.6417e-04 eta 1:45:26 +epoch [36/50] batch [360/500] time 0.868 (0.885) data 0.000 (0.002) loss 0.9082 (1.0849) acc 75.0000 (73.1424) lr 4.6417e-04 eta 1:45:21 +epoch [36/50] batch [365/500] time 0.896 (0.885) data 0.000 (0.002) loss 0.8096 (1.0851) acc 75.0000 (73.1592) lr 4.6417e-04 eta 1:45:16 +epoch [36/50] batch [370/500] time 0.894 (0.885) data 0.000 (0.002) loss 0.9717 (1.0847) acc 71.8750 (73.1757) lr 4.6417e-04 eta 1:45:11 +epoch [36/50] batch [375/500] time 0.870 (0.885) data 0.000 (0.002) loss 1.2969 (1.0858) acc 68.7500 (73.1167) lr 4.6417e-04 eta 1:45:06 +epoch [36/50] batch [380/500] time 0.873 (0.885) data 0.000 (0.002) loss 0.8979 (1.0834) acc 81.2500 (73.1826) lr 4.6417e-04 eta 1:45:02 +epoch [36/50] batch [385/500] time 0.866 (0.885) data 0.000 (0.002) loss 0.7080 (1.0848) acc 71.8750 (73.0925) lr 4.6417e-04 eta 1:44:57 +epoch [36/50] batch [390/500] time 0.860 (0.885) data 0.000 (0.002) loss 1.4805 (1.0839) acc 62.5000 (73.1250) lr 4.6417e-04 eta 1:44:50 +epoch [36/50] batch [395/500] time 0.894 (0.885) data 0.000 (0.002) loss 1.3643 (1.0873) acc 59.3750 (73.0380) lr 4.6417e-04 eta 1:44:46 +epoch [36/50] batch [400/500] time 0.885 (0.885) data 0.000 (0.002) loss 1.3271 (1.0850) acc 65.6250 (73.0938) lr 4.6417e-04 eta 1:44:41 +epoch [36/50] batch [405/500] time 0.861 (0.885) data 0.000 (0.002) loss 1.1943 (1.0831) acc 62.5000 (73.1019) lr 4.6417e-04 eta 1:44:36 +epoch [36/50] batch [410/500] time 0.896 (0.885) data 0.000 (0.002) loss 1.3330 (1.0809) acc 68.7500 (73.1021) lr 4.6417e-04 eta 1:44:31 +epoch [36/50] batch [415/500] time 0.884 (0.885) data 0.000 (0.002) loss 1.1924 (1.0792) acc 75.0000 (73.1099) lr 4.6417e-04 eta 1:44:27 +epoch [36/50] batch [420/500] time 0.855 (0.885) data 0.000 (0.002) loss 1.6133 (1.0792) acc 62.5000 (73.1250) lr 4.6417e-04 eta 1:44:22 +epoch [36/50] batch [425/500] time 0.866 (0.885) data 0.000 (0.002) loss 0.8599 (1.0786) acc 84.3750 (73.1544) lr 4.6417e-04 eta 1:44:18 +epoch [36/50] batch [430/500] time 0.869 (0.884) data 0.000 (0.002) loss 0.7383 (1.0775) acc 81.2500 (73.1831) lr 4.6417e-04 eta 1:44:13 +epoch [36/50] batch [435/500] time 0.930 (0.885) data 0.000 (0.002) loss 0.6973 (1.0764) acc 81.2500 (73.2184) lr 4.6417e-04 eta 1:44:11 +epoch [36/50] batch [440/500] time 0.892 (0.885) data 0.000 (0.002) loss 1.0635 (1.0783) acc 65.6250 (73.1463) lr 4.6417e-04 eta 1:44:07 +epoch [36/50] batch [445/500] time 0.868 (0.885) data 0.000 (0.002) loss 1.0459 (1.0787) acc 65.6250 (73.1320) lr 4.6417e-04 eta 1:44:03 +epoch [36/50] batch [450/500] time 0.877 (0.885) data 0.000 (0.002) loss 1.1729 (1.0782) acc 71.8750 (73.1389) lr 4.6417e-04 eta 1:43:58 +epoch [36/50] batch [455/500] time 0.875 (0.885) data 0.000 (0.002) loss 0.7812 (1.0760) acc 84.3750 (73.2143) lr 4.6417e-04 eta 1:43:52 +epoch [36/50] batch [460/500] time 0.875 (0.885) data 0.000 (0.002) loss 0.7324 (1.0738) acc 78.1250 (73.2609) lr 4.6417e-04 eta 1:43:48 +epoch [36/50] batch [465/500] time 0.895 (0.885) data 0.000 (0.002) loss 1.2197 (1.0727) acc 68.7500 (73.2997) lr 4.6417e-04 eta 1:43:43 +epoch [36/50] batch [470/500] time 0.891 (0.885) data 0.000 (0.002) loss 1.0830 (1.0735) acc 84.3750 (73.2979) lr 4.6417e-04 eta 1:43:39 +epoch [36/50] batch [475/500] time 0.860 (0.885) data 0.000 (0.002) loss 1.1875 (1.0708) acc 75.0000 (73.3487) lr 4.6417e-04 eta 1:43:34 +epoch [36/50] batch [480/500] time 0.858 (0.885) data 0.000 (0.002) loss 0.7974 (1.0704) acc 78.1250 (73.3659) lr 4.6417e-04 eta 1:43:29 +epoch [36/50] batch [485/500] time 0.901 (0.885) data 0.000 (0.002) loss 0.9814 (1.0720) acc 84.3750 (73.3956) lr 4.6417e-04 eta 1:43:25 +epoch [36/50] batch [490/500] time 0.887 (0.885) data 0.000 (0.002) loss 0.9282 (1.0717) acc 75.0000 (73.4311) lr 4.6417e-04 eta 1:43:20 +epoch [36/50] batch [495/500] time 0.877 (0.885) data 0.000 (0.002) loss 1.2578 (1.0708) acc 65.6250 (73.4785) lr 4.6417e-04 eta 1:43:16 +epoch [36/50] batch [500/500] time 0.879 (0.885) data 0.000 (0.002) loss 1.2539 (1.0710) acc 62.5000 (73.4688) lr 4.1221e-04 eta 1:43:12 +epoch [37/50] batch [5/500] time 0.860 (1.032) data 0.000 (0.141) loss 1.4951 (1.0021) acc 68.7500 (73.7500) lr 4.1221e-04 eta 2:00:18 +epoch [37/50] batch [10/500] time 1.010 (0.966) data 0.000 (0.071) loss 1.4971 (1.1247) acc 71.8750 (72.5000) lr 4.1221e-04 eta 1:52:32 +epoch [37/50] batch [15/500] time 0.886 (0.937) data 0.000 (0.047) loss 0.7520 (1.0912) acc 78.1250 (73.5417) lr 4.1221e-04 eta 1:49:04 +epoch [37/50] batch [20/500] time 0.883 (0.921) data 0.000 (0.036) loss 1.2412 (1.0627) acc 65.6250 (73.1250) lr 4.1221e-04 eta 1:47:09 +epoch [37/50] batch [25/500] time 0.870 (0.911) data 0.000 (0.028) loss 1.4873 (1.0529) acc 62.5000 (73.3750) lr 4.1221e-04 eta 1:45:57 +epoch [37/50] batch [30/500] time 0.875 (0.906) data 0.000 (0.024) loss 0.9771 (1.0079) acc 75.0000 (74.7917) lr 4.1221e-04 eta 1:45:15 +epoch [37/50] batch [35/500] time 0.873 (0.903) data 0.000 (0.020) loss 1.2236 (0.9862) acc 65.6250 (75.4464) lr 4.1221e-04 eta 1:44:50 +epoch [37/50] batch [40/500] time 0.926 (0.902) data 0.000 (0.018) loss 1.2578 (1.0202) acc 65.6250 (75.0000) lr 4.1221e-04 eta 1:44:35 +epoch [37/50] batch [45/500] time 0.850 (0.900) data 0.000 (0.016) loss 0.8750 (1.0005) acc 68.7500 (74.9306) lr 4.1221e-04 eta 1:44:16 +epoch [37/50] batch [50/500] time 0.986 (0.900) data 0.000 (0.014) loss 1.0820 (1.0075) acc 71.8750 (74.6875) lr 4.1221e-04 eta 1:44:15 +epoch [37/50] batch [55/500] time 0.921 (0.900) data 0.000 (0.013) loss 0.9761 (1.0072) acc 75.0000 (74.2045) lr 4.1221e-04 eta 1:44:12 +epoch [37/50] batch [60/500] time 0.879 (0.898) data 0.000 (0.012) loss 0.9795 (0.9905) acc 71.8750 (74.6354) lr 4.1221e-04 eta 1:43:50 +epoch [37/50] batch [65/500] time 0.867 (0.897) data 0.000 (0.011) loss 0.9336 (0.9910) acc 78.1250 (74.5673) lr 4.1221e-04 eta 1:43:37 +epoch [37/50] batch [70/500] time 0.896 (0.896) data 0.000 (0.010) loss 0.8511 (0.9864) acc 78.1250 (74.8661) lr 4.1221e-04 eta 1:43:28 +epoch [37/50] batch [75/500] time 0.906 (0.896) data 0.000 (0.010) loss 0.5562 (0.9690) acc 87.5000 (75.4167) lr 4.1221e-04 eta 1:43:27 +epoch [37/50] batch [80/500] time 0.876 (0.896) data 0.000 (0.009) loss 1.4346 (0.9919) acc 68.7500 (75.4297) lr 4.1221e-04 eta 1:43:21 +epoch [37/50] batch [85/500] time 0.913 (0.896) data 0.000 (0.009) loss 0.9883 (1.0040) acc 81.2500 (75.1471) lr 4.1221e-04 eta 1:43:14 +epoch [37/50] batch [90/500] time 0.875 (0.895) data 0.000 (0.008) loss 1.4941 (1.0281) acc 62.5000 (74.5833) lr 4.1221e-04 eta 1:43:06 +epoch [37/50] batch [95/500] time 0.883 (0.894) data 0.000 (0.008) loss 0.8096 (1.0244) acc 81.2500 (74.7368) lr 4.1221e-04 eta 1:42:56 +epoch [37/50] batch [100/500] time 0.897 (0.895) data 0.000 (0.007) loss 1.1357 (1.0311) acc 71.8750 (74.5312) lr 4.1221e-04 eta 1:42:54 +epoch [37/50] batch [105/500] time 0.871 (0.895) data 0.000 (0.007) loss 1.4023 (1.0389) acc 68.7500 (74.2857) lr 4.1221e-04 eta 1:42:48 +epoch [37/50] batch [110/500] time 0.966 (0.895) data 0.000 (0.007) loss 1.0391 (1.0521) acc 75.0000 (74.0625) lr 4.1221e-04 eta 1:42:47 +epoch [37/50] batch [115/500] time 0.899 (0.894) data 0.000 (0.006) loss 1.8428 (1.0539) acc 53.1250 (73.9402) lr 4.1221e-04 eta 1:42:36 +epoch [37/50] batch [120/500] time 0.875 (0.894) data 0.000 (0.006) loss 0.8950 (1.0611) acc 81.2500 (73.9323) lr 4.1221e-04 eta 1:42:29 +epoch [37/50] batch [125/500] time 0.934 (0.894) data 0.001 (0.006) loss 0.8096 (1.0567) acc 81.2500 (74.0000) lr 4.1221e-04 eta 1:42:24 +epoch [37/50] batch [130/500] time 0.865 (0.893) data 0.000 (0.006) loss 1.1582 (1.0579) acc 71.8750 (73.9183) lr 4.1221e-04 eta 1:42:16 +epoch [37/50] batch [135/500] time 0.901 (0.893) data 0.000 (0.005) loss 1.2490 (1.0566) acc 78.1250 (74.0509) lr 4.1221e-04 eta 1:42:08 +epoch [37/50] batch [140/500] time 0.907 (0.893) data 0.000 (0.005) loss 0.9399 (1.0557) acc 75.0000 (74.1518) lr 4.1221e-04 eta 1:42:03 +epoch [37/50] batch [145/500] time 0.898 (0.893) data 0.000 (0.005) loss 0.8950 (1.0562) acc 75.0000 (74.0733) lr 4.1221e-04 eta 1:41:58 +epoch [37/50] batch [150/500] time 0.870 (0.892) data 0.000 (0.005) loss 0.6909 (1.0581) acc 84.3750 (74.0833) lr 4.1221e-04 eta 1:41:47 +epoch [37/50] batch [155/500] time 0.935 (0.892) data 0.000 (0.005) loss 1.0322 (1.0535) acc 59.3750 (74.0323) lr 4.1221e-04 eta 1:41:46 +epoch [37/50] batch [160/500] time 0.872 (0.892) data 0.000 (0.005) loss 0.7622 (1.0465) acc 78.1250 (74.2383) lr 4.1221e-04 eta 1:41:38 +epoch [37/50] batch [165/500] time 0.868 (0.891) data 0.000 (0.005) loss 1.2168 (1.0489) acc 59.3750 (74.1098) lr 4.1221e-04 eta 1:41:31 +epoch [37/50] batch [170/500] time 0.895 (0.891) data 0.000 (0.004) loss 0.6646 (1.0440) acc 81.2500 (74.3566) lr 4.1221e-04 eta 1:41:25 +epoch [37/50] batch [175/500] time 0.899 (0.891) data 0.000 (0.004) loss 1.2021 (1.0434) acc 68.7500 (74.3750) lr 4.1221e-04 eta 1:41:19 +epoch [37/50] batch [180/500] time 0.908 (0.891) data 0.000 (0.004) loss 0.9297 (1.0411) acc 75.0000 (74.3924) lr 4.1221e-04 eta 1:41:14 +epoch [37/50] batch [185/500] time 0.879 (0.891) data 0.000 (0.004) loss 0.7837 (1.0408) acc 84.3750 (74.4257) lr 4.1221e-04 eta 1:41:09 +epoch [37/50] batch [190/500] time 0.871 (0.890) data 0.000 (0.004) loss 0.7207 (1.0445) acc 81.2500 (74.3914) lr 4.1221e-04 eta 1:41:03 +epoch [37/50] batch [195/500] time 0.861 (0.890) data 0.000 (0.004) loss 1.5234 (1.0508) acc 68.7500 (74.2949) lr 4.1221e-04 eta 1:40:59 +epoch [37/50] batch [200/500] time 0.875 (0.890) data 0.000 (0.004) loss 0.5854 (1.0465) acc 90.6250 (74.3906) lr 4.1221e-04 eta 1:40:53 +epoch [37/50] batch [205/500] time 0.862 (0.890) data 0.000 (0.004) loss 1.1670 (1.0493) acc 71.8750 (74.3598) lr 4.1221e-04 eta 1:40:46 +epoch [37/50] batch [210/500] time 0.913 (0.890) data 0.000 (0.004) loss 1.4316 (1.0526) acc 62.5000 (74.2560) lr 4.1221e-04 eta 1:40:41 +epoch [37/50] batch [215/500] time 0.917 (0.890) data 0.000 (0.004) loss 1.6318 (1.0525) acc 46.8750 (74.2006) lr 4.1221e-04 eta 1:40:37 +epoch [37/50] batch [220/500] time 0.901 (0.890) data 0.000 (0.003) loss 1.1025 (1.0510) acc 65.6250 (74.1477) lr 4.1221e-04 eta 1:40:32 +epoch [37/50] batch [225/500] time 0.866 (0.889) data 0.000 (0.003) loss 1.2051 (1.0526) acc 78.1250 (74.0833) lr 4.1221e-04 eta 1:40:25 +epoch [37/50] batch [230/500] time 0.891 (0.889) data 0.000 (0.003) loss 0.8691 (1.0563) acc 71.8750 (73.9402) lr 4.1221e-04 eta 1:40:21 +epoch [37/50] batch [235/500] time 0.860 (0.889) data 0.000 (0.003) loss 1.6494 (1.0576) acc 62.5000 (73.9229) lr 4.1221e-04 eta 1:40:14 +epoch [37/50] batch [240/500] time 0.875 (0.889) data 0.000 (0.003) loss 1.1787 (1.0583) acc 78.1250 (73.9583) lr 4.1221e-04 eta 1:40:07 +epoch [37/50] batch [245/500] time 0.885 (0.888) data 0.000 (0.003) loss 1.4062 (1.0618) acc 59.3750 (73.8776) lr 4.1221e-04 eta 1:40:01 +epoch [37/50] batch [250/500] time 0.906 (0.888) data 0.000 (0.003) loss 1.5068 (1.0694) acc 62.5000 (73.6875) lr 4.1221e-04 eta 1:39:57 +epoch [37/50] batch [255/500] time 0.876 (0.889) data 0.000 (0.003) loss 0.6890 (1.0739) acc 87.5000 (73.6275) lr 4.1221e-04 eta 1:39:54 +epoch [37/50] batch [260/500] time 0.889 (0.889) data 0.000 (0.003) loss 0.3916 (1.0689) acc 93.7500 (73.7139) lr 4.1221e-04 eta 1:39:50 +epoch [37/50] batch [265/500] time 0.908 (0.889) data 0.000 (0.003) loss 1.2588 (1.0687) acc 75.0000 (73.7146) lr 4.1221e-04 eta 1:39:45 +epoch [37/50] batch [270/500] time 0.879 (0.889) data 0.000 (0.003) loss 1.5127 (1.0689) acc 56.2500 (73.6343) lr 4.1221e-04 eta 1:39:41 +epoch [37/50] batch [275/500] time 0.899 (0.889) data 0.000 (0.003) loss 0.7437 (1.0679) acc 84.3750 (73.6932) lr 4.1221e-04 eta 1:39:35 +epoch [37/50] batch [280/500] time 0.877 (0.889) data 0.000 (0.003) loss 0.6489 (1.0694) acc 78.1250 (73.6049) lr 4.1221e-04 eta 1:39:30 +epoch [37/50] batch [285/500] time 0.872 (0.888) data 0.000 (0.003) loss 0.4897 (1.0696) acc 84.3750 (73.6294) lr 4.1221e-04 eta 1:39:26 +epoch [37/50] batch [290/500] time 0.868 (0.888) data 0.000 (0.003) loss 0.9766 (1.0658) acc 78.1250 (73.6638) lr 4.1221e-04 eta 1:39:20 +epoch [37/50] batch [295/500] time 0.899 (0.888) data 0.000 (0.003) loss 1.4541 (1.0674) acc 81.2500 (73.6864) lr 4.1221e-04 eta 1:39:15 +epoch [37/50] batch [300/500] time 0.924 (0.888) data 0.000 (0.003) loss 1.5693 (1.0735) acc 62.5000 (73.6042) lr 4.1221e-04 eta 1:39:12 +epoch [37/50] batch [305/500] time 0.910 (0.889) data 0.000 (0.003) loss 0.8936 (1.0706) acc 78.1250 (73.6066) lr 4.1221e-04 eta 1:39:08 +epoch [37/50] batch [310/500] time 0.895 (0.889) data 0.000 (0.003) loss 0.8916 (1.0687) acc 75.0000 (73.6492) lr 4.1221e-04 eta 1:39:04 +epoch [37/50] batch [315/500] time 0.914 (0.889) data 0.000 (0.002) loss 2.0488 (1.0702) acc 56.2500 (73.6210) lr 4.1221e-04 eta 1:38:59 +epoch [37/50] batch [320/500] time 0.860 (0.888) data 0.000 (0.002) loss 0.8135 (1.0681) acc 84.3750 (73.6914) lr 4.1221e-04 eta 1:38:54 +epoch [37/50] batch [325/500] time 0.865 (0.888) data 0.000 (0.002) loss 0.6606 (1.0665) acc 81.2500 (73.6827) lr 4.1221e-04 eta 1:38:49 +epoch [37/50] batch [330/500] time 0.876 (0.888) data 0.000 (0.002) loss 0.7837 (1.0645) acc 71.8750 (73.6458) lr 4.1221e-04 eta 1:38:45 +epoch [37/50] batch [335/500] time 0.870 (0.888) data 0.000 (0.002) loss 1.4561 (1.0641) acc 68.7500 (73.7220) lr 4.1221e-04 eta 1:38:38 +epoch [37/50] batch [340/500] time 0.892 (0.888) data 0.000 (0.002) loss 1.3223 (1.0663) acc 68.7500 (73.6765) lr 4.1221e-04 eta 1:38:33 +epoch [37/50] batch [345/500] time 0.879 (0.888) data 0.000 (0.002) loss 1.0664 (1.0688) acc 75.0000 (73.5870) lr 4.1221e-04 eta 1:38:28 +epoch [37/50] batch [350/500] time 0.867 (0.888) data 0.000 (0.002) loss 1.1152 (1.0709) acc 75.0000 (73.5536) lr 4.1221e-04 eta 1:38:23 +epoch [37/50] batch [355/500] time 0.858 (0.888) data 0.000 (0.002) loss 1.3428 (1.0713) acc 65.6250 (73.5387) lr 4.1221e-04 eta 1:38:17 +epoch [37/50] batch [360/500] time 0.900 (0.887) data 0.000 (0.002) loss 0.4204 (1.0721) acc 87.5000 (73.4809) lr 4.1221e-04 eta 1:38:12 +epoch [37/50] batch [365/500] time 0.867 (0.887) data 0.000 (0.002) loss 1.1885 (1.0698) acc 71.8750 (73.5531) lr 4.1221e-04 eta 1:38:07 +epoch [37/50] batch [370/500] time 0.880 (0.887) data 0.000 (0.002) loss 1.0039 (1.0691) acc 81.2500 (73.5811) lr 4.1221e-04 eta 1:38:01 +epoch [37/50] batch [375/500] time 0.871 (0.887) data 0.000 (0.002) loss 0.5488 (1.0678) acc 81.2500 (73.5750) lr 4.1221e-04 eta 1:37:56 +epoch [37/50] batch [380/500] time 0.887 (0.887) data 0.000 (0.002) loss 1.2812 (1.0673) acc 68.7500 (73.5691) lr 4.1221e-04 eta 1:37:51 +epoch [37/50] batch [385/500] time 0.911 (0.887) data 0.000 (0.002) loss 0.5933 (1.0634) acc 87.5000 (73.6851) lr 4.1221e-04 eta 1:37:46 +epoch [37/50] batch [390/500] time 0.887 (0.887) data 0.000 (0.002) loss 1.4053 (1.0642) acc 81.2500 (73.6859) lr 4.1221e-04 eta 1:37:42 +epoch [37/50] batch [395/500] time 0.881 (0.887) data 0.000 (0.002) loss 1.6240 (1.0651) acc 78.1250 (73.6946) lr 4.1221e-04 eta 1:37:36 +epoch [37/50] batch [400/500] time 0.890 (0.887) data 0.000 (0.002) loss 1.0752 (1.0651) acc 78.1250 (73.6797) lr 4.1221e-04 eta 1:37:34 +epoch [37/50] batch [405/500] time 0.886 (0.887) data 0.000 (0.002) loss 1.4365 (1.0668) acc 68.7500 (73.6651) lr 4.1221e-04 eta 1:37:30 +epoch [37/50] batch [410/500] time 0.879 (0.887) data 0.000 (0.002) loss 0.6265 (1.0656) acc 84.3750 (73.6662) lr 4.1221e-04 eta 1:37:25 +epoch [37/50] batch [415/500] time 0.901 (0.887) data 0.000 (0.002) loss 1.4355 (1.0686) acc 68.7500 (73.5693) lr 4.1221e-04 eta 1:37:21 +epoch [37/50] batch [420/500] time 0.857 (0.887) data 0.000 (0.002) loss 1.2051 (1.0677) acc 65.6250 (73.5640) lr 4.1221e-04 eta 1:37:17 +epoch [37/50] batch [425/500] time 0.885 (0.887) data 0.000 (0.002) loss 0.9336 (1.0683) acc 75.0000 (73.5294) lr 4.1221e-04 eta 1:37:12 +epoch [37/50] batch [430/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.4004 (1.0678) acc 75.0000 (73.5392) lr 4.1221e-04 eta 1:37:08 +epoch [37/50] batch [435/500] time 0.896 (0.887) data 0.000 (0.002) loss 0.8750 (1.0672) acc 78.1250 (73.5632) lr 4.1221e-04 eta 1:37:04 +epoch [37/50] batch [440/500] time 0.960 (0.887) data 0.000 (0.002) loss 0.9434 (1.0687) acc 78.1250 (73.5298) lr 4.1221e-04 eta 1:37:01 +epoch [37/50] batch [445/500] time 0.871 (0.887) data 0.000 (0.002) loss 1.2715 (1.0680) acc 65.6250 (73.4972) lr 4.1221e-04 eta 1:36:57 +epoch [37/50] batch [450/500] time 0.877 (0.887) data 0.000 (0.002) loss 1.1348 (1.0678) acc 62.5000 (73.4722) lr 4.1221e-04 eta 1:36:52 +epoch [37/50] batch [455/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.5928 (1.0697) acc 65.6250 (73.4341) lr 4.1221e-04 eta 1:36:47 +epoch [37/50] batch [460/500] time 0.915 (0.887) data 0.000 (0.002) loss 1.1611 (1.0710) acc 71.8750 (73.3832) lr 4.1221e-04 eta 1:36:43 +epoch [37/50] batch [465/500] time 0.908 (0.887) data 0.000 (0.002) loss 1.4502 (1.0697) acc 59.3750 (73.3871) lr 4.1221e-04 eta 1:36:39 +epoch [37/50] batch [470/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.0586 (1.0701) acc 78.1250 (73.4043) lr 4.1221e-04 eta 1:36:34 +epoch [37/50] batch [475/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.1680 (1.0715) acc 84.3750 (73.4276) lr 4.1221e-04 eta 1:36:30 +epoch [37/50] batch [480/500] time 0.889 (0.888) data 0.000 (0.002) loss 1.0811 (1.0714) acc 78.1250 (73.4570) lr 4.1221e-04 eta 1:36:26 +epoch [37/50] batch [485/500] time 0.861 (0.887) data 0.000 (0.002) loss 1.0322 (1.0706) acc 75.0000 (73.4665) lr 4.1221e-04 eta 1:36:20 +epoch [37/50] batch [490/500] time 0.891 (0.887) data 0.000 (0.002) loss 0.5591 (1.0707) acc 81.2500 (73.4375) lr 4.1221e-04 eta 1:36:16 +epoch [37/50] batch [495/500] time 0.847 (0.887) data 0.000 (0.002) loss 0.6655 (1.0723) acc 84.3750 (73.4091) lr 4.1221e-04 eta 1:36:11 +epoch [37/50] batch [500/500] time 0.869 (0.887) data 0.000 (0.002) loss 1.2168 (1.0743) acc 78.1250 (73.4062) lr 3.6258e-04 eta 1:36:07 +epoch [38/50] batch [5/500] time 0.864 (1.020) data 0.000 (0.145) loss 0.4302 (0.7839) acc 87.5000 (78.7500) lr 3.6258e-04 eta 1:50:21 +epoch [38/50] batch [10/500] time 0.872 (0.958) data 0.000 (0.073) loss 1.3027 (0.9963) acc 65.6250 (73.7500) lr 3.6258e-04 eta 1:43:36 +epoch [38/50] batch [15/500] time 0.864 (0.931) data 0.000 (0.049) loss 0.9136 (1.0277) acc 84.3750 (73.7500) lr 3.6258e-04 eta 1:40:35 +epoch [38/50] batch [20/500] time 0.865 (0.919) data 0.000 (0.037) loss 1.5830 (1.0603) acc 59.3750 (72.9688) lr 3.6258e-04 eta 1:39:13 +epoch [38/50] batch [25/500] time 0.872 (0.909) data 0.000 (0.029) loss 0.7720 (1.1071) acc 84.3750 (72.7500) lr 3.6258e-04 eta 1:38:03 +epoch [38/50] batch [30/500] time 0.892 (0.908) data 0.000 (0.024) loss 1.5889 (1.1872) acc 68.7500 (72.2917) lr 3.6258e-04 eta 1:37:55 +epoch [38/50] batch [35/500] time 0.885 (0.904) data 0.000 (0.021) loss 0.9722 (1.1521) acc 78.1250 (72.9464) lr 3.6258e-04 eta 1:37:22 +epoch [38/50] batch [40/500] time 0.894 (0.901) data 0.000 (0.018) loss 1.3994 (1.1381) acc 68.7500 (72.8125) lr 3.6258e-04 eta 1:37:01 +epoch [38/50] batch [45/500] time 0.890 (0.901) data 0.000 (0.016) loss 0.9258 (1.1334) acc 71.8750 (72.8472) lr 3.6258e-04 eta 1:36:55 +epoch [38/50] batch [50/500] time 0.888 (0.900) data 0.000 (0.015) loss 0.5938 (1.1367) acc 87.5000 (73.2500) lr 3.6258e-04 eta 1:36:43 +epoch [38/50] batch [55/500] time 0.919 (0.899) data 0.000 (0.013) loss 1.4912 (1.1500) acc 75.0000 (73.2955) lr 3.6258e-04 eta 1:36:30 +epoch [38/50] batch [60/500] time 0.888 (0.896) data 0.000 (0.012) loss 1.0576 (1.1471) acc 81.2500 (73.2812) lr 3.6258e-04 eta 1:36:10 +epoch [38/50] batch [65/500] time 0.909 (0.895) data 0.000 (0.011) loss 1.6045 (1.1534) acc 68.7500 (73.4135) lr 3.6258e-04 eta 1:36:02 +epoch [38/50] batch [70/500] time 0.882 (0.893) data 0.000 (0.011) loss 1.3516 (1.1449) acc 71.8750 (73.2589) lr 3.6258e-04 eta 1:35:44 +epoch [38/50] batch [75/500] time 0.883 (0.893) data 0.000 (0.010) loss 1.1582 (1.1274) acc 68.7500 (73.5000) lr 3.6258e-04 eta 1:35:34 +epoch [38/50] batch [80/500] time 0.862 (0.892) data 0.000 (0.009) loss 1.4600 (1.1236) acc 65.6250 (73.2422) lr 3.6258e-04 eta 1:35:25 +epoch [38/50] batch [85/500] time 0.909 (0.892) data 0.000 (0.009) loss 1.1113 (1.1347) acc 68.7500 (72.7941) lr 3.6258e-04 eta 1:35:21 +epoch [38/50] batch [90/500] time 0.888 (0.893) data 0.000 (0.008) loss 0.7520 (1.1293) acc 81.2500 (72.7083) lr 3.6258e-04 eta 1:35:24 +epoch [38/50] batch [95/500] time 0.880 (0.892) data 0.000 (0.008) loss 1.7539 (1.1407) acc 59.3750 (72.4342) lr 3.6258e-04 eta 1:35:15 +epoch [38/50] batch [100/500] time 0.883 (0.892) data 0.000 (0.007) loss 0.8809 (1.1369) acc 75.0000 (72.3438) lr 3.6258e-04 eta 1:35:08 +epoch [38/50] batch [105/500] time 0.884 (0.891) data 0.000 (0.007) loss 1.4775 (1.1366) acc 62.5000 (72.3512) lr 3.6258e-04 eta 1:34:59 +epoch [38/50] batch [110/500] time 0.878 (0.891) data 0.000 (0.007) loss 1.3984 (1.1371) acc 53.1250 (72.2443) lr 3.6258e-04 eta 1:34:51 +epoch [38/50] batch [115/500] time 0.866 (0.891) data 0.000 (0.007) loss 1.0557 (1.1375) acc 68.7500 (72.2011) lr 3.6258e-04 eta 1:34:46 +epoch [38/50] batch [120/500] time 0.897 (0.890) data 0.000 (0.006) loss 0.7158 (1.1293) acc 81.2500 (72.3958) lr 3.6258e-04 eta 1:34:41 +epoch [38/50] batch [125/500] time 0.890 (0.890) data 0.000 (0.006) loss 1.1006 (1.1247) acc 71.8750 (72.5000) lr 3.6258e-04 eta 1:34:33 +epoch [38/50] batch [130/500] time 0.879 (0.890) data 0.000 (0.006) loss 1.1191 (1.1192) acc 68.7500 (72.5481) lr 3.6258e-04 eta 1:34:28 +epoch [38/50] batch [135/500] time 0.889 (0.891) data 0.000 (0.006) loss 1.2754 (1.1195) acc 71.8750 (72.5926) lr 3.6258e-04 eta 1:34:31 +epoch [38/50] batch [140/500] time 0.865 (0.890) data 0.000 (0.005) loss 0.8862 (1.1194) acc 81.2500 (72.5670) lr 3.6258e-04 eta 1:34:22 +epoch [38/50] batch [145/500] time 0.890 (0.890) data 0.000 (0.005) loss 1.4492 (1.1242) acc 59.3750 (72.3707) lr 3.6258e-04 eta 1:34:15 +epoch [38/50] batch [150/500] time 0.870 (0.890) data 0.000 (0.005) loss 1.1211 (1.1216) acc 65.6250 (72.4583) lr 3.6258e-04 eta 1:34:10 +epoch [38/50] batch [155/500] time 0.882 (0.889) data 0.000 (0.005) loss 0.9263 (1.1201) acc 81.2500 (72.5605) lr 3.6258e-04 eta 1:34:03 +epoch [38/50] batch [160/500] time 0.865 (0.889) data 0.000 (0.005) loss 0.9434 (1.1143) acc 75.0000 (72.6172) lr 3.6258e-04 eta 1:33:56 +epoch [38/50] batch [165/500] time 0.859 (0.889) data 0.000 (0.005) loss 1.9062 (1.1124) acc 59.3750 (72.5758) lr 3.6258e-04 eta 1:33:49 +epoch [38/50] batch [170/500] time 0.874 (0.888) data 0.000 (0.004) loss 1.0693 (1.1094) acc 68.7500 (72.6103) lr 3.6258e-04 eta 1:33:41 +epoch [38/50] batch [175/500] time 0.916 (0.888) data 0.000 (0.004) loss 1.2578 (1.1063) acc 78.1250 (72.7143) lr 3.6258e-04 eta 1:33:36 +epoch [38/50] batch [180/500] time 0.884 (0.888) data 0.000 (0.004) loss 1.3350 (1.1078) acc 62.5000 (72.7604) lr 3.6258e-04 eta 1:33:29 +epoch [38/50] batch [185/500] time 0.876 (0.888) data 0.000 (0.004) loss 0.5903 (1.1027) acc 87.5000 (72.9223) lr 3.6258e-04 eta 1:33:24 +epoch [38/50] batch [190/500] time 0.889 (0.887) data 0.000 (0.004) loss 0.3262 (1.1024) acc 84.3750 (72.9605) lr 3.6258e-04 eta 1:33:19 +epoch [38/50] batch [195/500] time 0.912 (0.888) data 0.000 (0.004) loss 0.5991 (1.0985) acc 84.3750 (73.0288) lr 3.6258e-04 eta 1:33:17 +epoch [38/50] batch [200/500] time 0.888 (0.888) data 0.000 (0.004) loss 1.2109 (1.1014) acc 75.0000 (72.9531) lr 3.6258e-04 eta 1:33:13 +epoch [38/50] batch [205/500] time 0.875 (0.888) data 0.000 (0.004) loss 0.6104 (1.0926) acc 84.3750 (73.0945) lr 3.6258e-04 eta 1:33:07 +epoch [38/50] batch [210/500] time 0.886 (0.888) data 0.000 (0.004) loss 1.4434 (1.0971) acc 62.5000 (73.0208) lr 3.6258e-04 eta 1:33:02 +epoch [38/50] batch [215/500] time 0.916 (0.888) data 0.000 (0.004) loss 1.0352 (1.0933) acc 68.7500 (73.0669) lr 3.6258e-04 eta 1:32:59 +epoch [38/50] batch [220/500] time 0.863 (0.887) data 0.000 (0.004) loss 1.0059 (1.0953) acc 71.8750 (73.0398) lr 3.6258e-04 eta 1:32:51 +epoch [38/50] batch [225/500] time 0.911 (0.887) data 0.000 (0.003) loss 1.4062 (1.0943) acc 75.0000 (73.1389) lr 3.6258e-04 eta 1:32:47 +epoch [38/50] batch [230/500] time 1.028 (0.888) data 0.000 (0.003) loss 0.5249 (1.0888) acc 84.3750 (73.2201) lr 3.6258e-04 eta 1:32:46 +epoch [38/50] batch [235/500] time 0.855 (0.888) data 0.000 (0.003) loss 1.2676 (1.0888) acc 68.7500 (73.2580) lr 3.6258e-04 eta 1:32:43 +epoch [38/50] batch [240/500] time 0.910 (0.888) data 0.000 (0.003) loss 1.1318 (1.0907) acc 71.8750 (73.2031) lr 3.6258e-04 eta 1:32:38 +epoch [38/50] batch [245/500] time 0.891 (0.888) data 0.000 (0.003) loss 1.0303 (1.0944) acc 78.1250 (73.1505) lr 3.6258e-04 eta 1:32:36 +epoch [38/50] batch [250/500] time 0.875 (0.888) data 0.000 (0.003) loss 1.3232 (1.0891) acc 68.7500 (73.2375) lr 3.6258e-04 eta 1:32:30 +epoch [38/50] batch [255/500] time 0.885 (0.888) data 0.000 (0.003) loss 1.1846 (1.0902) acc 65.6250 (73.1863) lr 3.6258e-04 eta 1:32:24 +epoch [38/50] batch [260/500] time 0.910 (0.888) data 0.000 (0.003) loss 0.9917 (1.0908) acc 75.0000 (73.1370) lr 3.6258e-04 eta 1:32:20 +epoch [38/50] batch [265/500] time 0.898 (0.888) data 0.000 (0.003) loss 1.3135 (1.0913) acc 56.2500 (73.0778) lr 3.6258e-04 eta 1:32:15 +epoch [38/50] batch [270/500] time 0.900 (0.888) data 0.000 (0.003) loss 1.6875 (1.0931) acc 59.3750 (73.0093) lr 3.6258e-04 eta 1:32:12 +epoch [38/50] batch [275/500] time 0.865 (0.888) data 0.000 (0.003) loss 1.0098 (1.0889) acc 71.8750 (73.0455) lr 3.6258e-04 eta 1:32:08 +epoch [38/50] batch [280/500] time 0.891 (0.888) data 0.000 (0.003) loss 1.3848 (1.0885) acc 71.8750 (73.0134) lr 3.6258e-04 eta 1:32:03 +epoch [38/50] batch [285/500] time 0.884 (0.888) data 0.000 (0.003) loss 0.8242 (1.0870) acc 78.1250 (73.0482) lr 3.6258e-04 eta 1:31:59 +epoch [38/50] batch [290/500] time 0.878 (0.888) data 0.000 (0.003) loss 1.5986 (1.0861) acc 62.5000 (73.0927) lr 3.6258e-04 eta 1:31:55 +epoch [38/50] batch [295/500] time 0.880 (0.888) data 0.000 (0.003) loss 1.0439 (1.0840) acc 71.8750 (73.1462) lr 3.6258e-04 eta 1:31:51 +epoch [38/50] batch [300/500] time 0.896 (0.888) data 0.000 (0.003) loss 0.7407 (1.0835) acc 71.8750 (73.1250) lr 3.6258e-04 eta 1:31:46 +epoch [38/50] batch [305/500] time 0.871 (0.888) data 0.000 (0.003) loss 1.3682 (1.0860) acc 81.2500 (73.1148) lr 3.6258e-04 eta 1:31:41 +epoch [38/50] batch [310/500] time 0.911 (0.888) data 0.000 (0.003) loss 1.0674 (1.0915) acc 68.7500 (72.9637) lr 3.6258e-04 eta 1:31:36 +epoch [38/50] batch [315/500] time 0.873 (0.888) data 0.000 (0.003) loss 1.2109 (1.0921) acc 81.2500 (73.0258) lr 3.6258e-04 eta 1:31:32 +epoch [38/50] batch [320/500] time 0.873 (0.888) data 0.000 (0.002) loss 0.8579 (1.0894) acc 65.6250 (73.0859) lr 3.6258e-04 eta 1:31:27 +epoch [38/50] batch [325/500] time 0.884 (0.888) data 0.000 (0.002) loss 1.3301 (1.0948) acc 68.7500 (72.9712) lr 3.6258e-04 eta 1:31:23 +epoch [38/50] batch [330/500] time 0.863 (0.888) data 0.000 (0.002) loss 1.1230 (1.0946) acc 78.1250 (73.0303) lr 3.6258e-04 eta 1:31:17 +epoch [38/50] batch [335/500] time 0.892 (0.888) data 0.000 (0.002) loss 0.8965 (1.0905) acc 81.2500 (73.1063) lr 3.6258e-04 eta 1:31:12 +epoch [38/50] batch [340/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.0215 (1.0914) acc 75.0000 (73.1250) lr 3.6258e-04 eta 1:31:06 +epoch [38/50] batch [345/500] time 0.863 (0.887) data 0.000 (0.002) loss 0.9702 (1.0917) acc 78.1250 (73.1522) lr 3.6258e-04 eta 1:31:00 +epoch [38/50] batch [350/500] time 0.868 (0.887) data 0.000 (0.002) loss 0.9644 (1.0920) acc 71.8750 (73.1607) lr 3.6258e-04 eta 1:30:55 +epoch [38/50] batch [355/500] time 0.852 (0.887) data 0.000 (0.002) loss 1.0898 (1.0907) acc 78.1250 (73.2042) lr 3.6258e-04 eta 1:30:48 +epoch [38/50] batch [360/500] time 0.909 (0.887) data 0.000 (0.002) loss 0.8765 (1.0886) acc 68.7500 (73.2118) lr 3.6258e-04 eta 1:30:43 +epoch [38/50] batch [365/500] time 0.897 (0.887) data 0.000 (0.002) loss 0.6519 (1.0875) acc 78.1250 (73.2192) lr 3.6258e-04 eta 1:30:38 +epoch [38/50] batch [370/500] time 0.905 (0.887) data 0.000 (0.002) loss 0.6333 (1.0860) acc 84.3750 (73.2264) lr 3.6258e-04 eta 1:30:34 +epoch [38/50] batch [375/500] time 0.922 (0.887) data 0.000 (0.002) loss 0.5933 (1.0826) acc 75.0000 (73.3083) lr 3.6258e-04 eta 1:30:32 +epoch [38/50] batch [380/500] time 0.846 (0.887) data 0.000 (0.002) loss 1.3301 (1.0835) acc 68.7500 (73.3059) lr 3.6258e-04 eta 1:30:27 +epoch [38/50] batch [385/500] time 0.888 (0.887) data 0.000 (0.002) loss 0.9775 (1.0809) acc 78.1250 (73.4010) lr 3.6258e-04 eta 1:30:22 +epoch [38/50] batch [390/500] time 0.888 (0.887) data 0.000 (0.002) loss 0.9014 (1.0814) acc 81.2500 (73.3974) lr 3.6258e-04 eta 1:30:17 +epoch [38/50] batch [395/500] time 0.869 (0.887) data 0.000 (0.002) loss 1.3770 (1.0829) acc 65.6250 (73.3703) lr 3.6258e-04 eta 1:30:13 +epoch [38/50] batch [400/500] time 0.884 (0.887) data 0.000 (0.002) loss 1.1855 (1.0822) acc 71.8750 (73.3828) lr 3.6258e-04 eta 1:30:09 +epoch [38/50] batch [405/500] time 0.885 (0.887) data 0.000 (0.002) loss 0.6646 (1.0801) acc 81.2500 (73.3951) lr 3.6258e-04 eta 1:30:05 +epoch [38/50] batch [410/500] time 0.891 (0.887) data 0.000 (0.002) loss 1.3457 (1.0797) acc 65.6250 (73.3994) lr 3.6258e-04 eta 1:29:59 +epoch [38/50] batch [415/500] time 0.879 (0.886) data 0.000 (0.002) loss 1.0557 (1.0756) acc 68.7500 (73.4337) lr 3.6258e-04 eta 1:29:54 +epoch [38/50] batch [420/500] time 0.876 (0.887) data 0.000 (0.002) loss 0.7471 (1.0729) acc 75.0000 (73.4673) lr 3.6258e-04 eta 1:29:50 +epoch [38/50] batch [425/500] time 0.885 (0.886) data 0.000 (0.002) loss 0.7734 (1.0722) acc 75.0000 (73.4853) lr 3.6258e-04 eta 1:29:44 +epoch [38/50] batch [430/500] time 0.867 (0.886) data 0.000 (0.002) loss 1.9629 (1.0764) acc 62.5000 (73.4520) lr 3.6258e-04 eta 1:29:39 +epoch [38/50] batch [435/500] time 0.856 (0.886) data 0.000 (0.002) loss 1.0293 (1.0788) acc 81.2500 (73.4483) lr 3.6258e-04 eta 1:29:34 +epoch [38/50] batch [440/500] time 0.869 (0.886) data 0.000 (0.002) loss 0.5664 (1.0768) acc 84.3750 (73.4588) lr 3.6258e-04 eta 1:29:29 +epoch [38/50] batch [445/500] time 0.882 (0.886) data 0.000 (0.002) loss 0.7627 (1.0768) acc 84.3750 (73.4410) lr 3.6258e-04 eta 1:29:25 +epoch [38/50] batch [450/500] time 0.901 (0.886) data 0.000 (0.002) loss 1.1074 (1.0756) acc 71.8750 (73.4792) lr 3.6258e-04 eta 1:29:19 +epoch [38/50] batch [455/500] time 0.866 (0.886) data 0.000 (0.002) loss 0.9595 (1.0752) acc 71.8750 (73.4478) lr 3.6258e-04 eta 1:29:15 +epoch [38/50] batch [460/500] time 0.883 (0.886) data 0.000 (0.002) loss 0.7305 (1.0746) acc 78.1250 (73.4851) lr 3.6258e-04 eta 1:29:10 +epoch [38/50] batch [465/500] time 0.874 (0.886) data 0.000 (0.002) loss 0.9268 (1.0742) acc 71.8750 (73.4946) lr 3.6258e-04 eta 1:29:06 +epoch [38/50] batch [470/500] time 0.888 (0.886) data 0.001 (0.002) loss 1.6641 (1.0737) acc 59.3750 (73.4840) lr 3.6258e-04 eta 1:29:02 +epoch [38/50] batch [475/500] time 0.869 (0.886) data 0.000 (0.002) loss 1.3398 (1.0745) acc 68.7500 (73.4671) lr 3.6258e-04 eta 1:28:58 +epoch [38/50] batch [480/500] time 0.872 (0.886) data 0.000 (0.002) loss 1.4854 (1.0765) acc 62.5000 (73.3724) lr 3.6258e-04 eta 1:28:54 +epoch [38/50] batch [485/500] time 0.882 (0.886) data 0.000 (0.002) loss 1.0557 (1.0755) acc 81.2500 (73.4214) lr 3.6258e-04 eta 1:28:49 +epoch [38/50] batch [490/500] time 0.864 (0.886) data 0.000 (0.002) loss 0.7773 (1.0743) acc 75.0000 (73.4630) lr 3.6258e-04 eta 1:28:43 +epoch [38/50] batch [495/500] time 0.893 (0.886) data 0.000 (0.002) loss 1.4805 (1.0735) acc 65.6250 (73.4217) lr 3.6258e-04 eta 1:28:39 +epoch [38/50] batch [500/500] time 0.891 (0.886) data 0.000 (0.002) loss 0.9395 (1.0730) acc 71.8750 (73.4000) lr 3.1545e-04 eta 1:28:35 +epoch [39/50] batch [5/500] time 0.871 (1.026) data 0.000 (0.132) loss 0.7847 (1.0099) acc 81.2500 (73.7500) lr 3.1545e-04 eta 1:42:32 +epoch [39/50] batch [10/500] time 0.863 (0.944) data 0.000 (0.066) loss 1.7412 (1.1196) acc 59.3750 (71.8750) lr 3.1545e-04 eta 1:34:12 +epoch [39/50] batch [15/500] time 0.878 (0.922) data 0.000 (0.044) loss 1.0264 (1.0992) acc 75.0000 (72.5000) lr 3.1545e-04 eta 1:31:57 +epoch [39/50] batch [20/500] time 0.916 (0.913) data 0.000 (0.033) loss 1.3193 (1.1361) acc 68.7500 (72.5000) lr 3.1545e-04 eta 1:30:59 +epoch [39/50] batch [25/500] time 0.885 (0.907) data 0.000 (0.027) loss 1.1504 (1.0844) acc 62.5000 (73.3750) lr 3.1545e-04 eta 1:30:18 +epoch [39/50] batch [30/500] time 0.868 (0.903) data 0.000 (0.022) loss 1.1309 (1.1096) acc 59.3750 (72.7083) lr 3.1545e-04 eta 1:29:49 +epoch [39/50] batch [35/500] time 0.884 (0.899) data 0.000 (0.019) loss 1.1885 (1.1282) acc 71.8750 (72.5893) lr 3.1545e-04 eta 1:29:23 +epoch [39/50] batch [40/500] time 0.877 (0.895) data 0.000 (0.017) loss 1.0186 (1.1141) acc 65.6250 (72.7344) lr 3.1545e-04 eta 1:28:56 +epoch [39/50] batch [45/500] time 0.856 (0.895) data 0.000 (0.015) loss 1.5166 (1.1185) acc 62.5000 (72.7083) lr 3.1545e-04 eta 1:28:51 +epoch [39/50] batch [50/500] time 0.897 (0.893) data 0.000 (0.013) loss 0.5039 (1.1061) acc 93.7500 (73.0000) lr 3.1545e-04 eta 1:28:36 +epoch [39/50] batch [55/500] time 0.866 (0.891) data 0.000 (0.012) loss 1.2451 (1.1027) acc 75.0000 (73.0114) lr 3.1545e-04 eta 1:28:17 +epoch [39/50] batch [60/500] time 0.880 (0.891) data 0.000 (0.011) loss 1.3994 (1.1110) acc 68.7500 (72.6562) lr 3.1545e-04 eta 1:28:12 +epoch [39/50] batch [65/500] time 0.917 (0.891) data 0.000 (0.010) loss 1.3691 (1.1169) acc 71.8750 (72.4519) lr 3.1545e-04 eta 1:28:06 +epoch [39/50] batch [70/500] time 0.887 (0.892) data 0.000 (0.010) loss 0.6807 (1.1059) acc 87.5000 (72.7232) lr 3.1545e-04 eta 1:28:11 +epoch [39/50] batch [75/500] time 0.900 (0.892) data 0.000 (0.009) loss 1.2324 (1.0948) acc 65.6250 (73.0833) lr 3.1545e-04 eta 1:28:04 +epoch [39/50] batch [80/500] time 0.868 (0.891) data 0.000 (0.008) loss 2.0078 (1.0949) acc 68.7500 (73.1250) lr 3.1545e-04 eta 1:27:53 +epoch [39/50] batch [85/500] time 0.882 (0.890) data 0.000 (0.008) loss 0.8345 (1.0890) acc 68.7500 (73.0147) lr 3.1545e-04 eta 1:27:46 +epoch [39/50] batch [90/500] time 0.866 (0.889) data 0.000 (0.008) loss 0.7285 (1.0803) acc 81.2500 (73.0903) lr 3.1545e-04 eta 1:27:36 +epoch [39/50] batch [95/500] time 0.885 (0.889) data 0.000 (0.007) loss 1.0537 (1.0733) acc 71.8750 (73.1579) lr 3.1545e-04 eta 1:27:31 +epoch [39/50] batch [100/500] time 0.910 (0.889) data 0.000 (0.007) loss 1.2754 (1.0751) acc 65.6250 (72.9688) lr 3.1545e-04 eta 1:27:25 +epoch [39/50] batch [105/500] time 0.887 (0.889) data 0.000 (0.007) loss 0.8267 (1.0689) acc 78.1250 (73.0655) lr 3.1545e-04 eta 1:27:18 +epoch [39/50] batch [110/500] time 0.901 (0.888) data 0.000 (0.006) loss 0.7334 (1.0662) acc 75.0000 (73.0966) lr 3.1545e-04 eta 1:27:11 +epoch [39/50] batch [115/500] time 0.874 (0.889) data 0.000 (0.006) loss 0.9624 (1.0681) acc 81.2500 (73.1793) lr 3.1545e-04 eta 1:27:14 +epoch [39/50] batch [120/500] time 0.877 (0.890) data 0.000 (0.006) loss 0.7202 (1.0583) acc 78.1250 (73.4896) lr 3.1545e-04 eta 1:27:12 +epoch [39/50] batch [125/500] time 0.873 (0.890) data 0.000 (0.006) loss 1.1572 (1.0545) acc 62.5000 (73.4750) lr 3.1545e-04 eta 1:27:08 +epoch [39/50] batch [130/500] time 0.878 (0.890) data 0.000 (0.005) loss 1.0439 (1.0620) acc 75.0000 (73.2933) lr 3.1545e-04 eta 1:27:03 +epoch [39/50] batch [135/500] time 0.878 (0.890) data 0.000 (0.005) loss 1.4766 (1.0695) acc 62.5000 (73.0787) lr 3.1545e-04 eta 1:26:57 +epoch [39/50] batch [140/500] time 0.878 (0.889) data 0.000 (0.005) loss 1.2861 (1.0682) acc 65.6250 (73.1920) lr 3.1545e-04 eta 1:26:48 +epoch [39/50] batch [145/500] time 0.908 (0.888) data 0.000 (0.005) loss 1.0869 (1.0671) acc 71.8750 (73.1897) lr 3.1545e-04 eta 1:26:41 +epoch [39/50] batch [150/500] time 0.914 (0.889) data 0.000 (0.005) loss 1.4541 (1.0643) acc 65.6250 (73.3333) lr 3.1545e-04 eta 1:26:39 +epoch [39/50] batch [155/500] time 0.911 (0.889) data 0.000 (0.004) loss 0.8691 (1.0651) acc 65.6250 (73.2863) lr 3.1545e-04 eta 1:26:35 +epoch [39/50] batch [160/500] time 0.882 (0.889) data 0.000 (0.004) loss 1.1465 (1.0672) acc 71.8750 (73.2617) lr 3.1545e-04 eta 1:26:30 +epoch [39/50] batch [165/500] time 0.913 (0.889) data 0.000 (0.004) loss 0.8506 (1.0629) acc 81.2500 (73.3712) lr 3.1545e-04 eta 1:26:25 +epoch [39/50] batch [170/500] time 0.899 (0.889) data 0.000 (0.004) loss 1.3398 (1.0686) acc 71.8750 (73.3088) lr 3.1545e-04 eta 1:26:20 +epoch [39/50] batch [175/500] time 0.893 (0.889) data 0.000 (0.004) loss 1.2588 (1.0670) acc 71.8750 (73.2857) lr 3.1545e-04 eta 1:26:16 +epoch [39/50] batch [180/500] time 0.861 (0.888) data 0.000 (0.004) loss 0.8267 (1.0658) acc 75.0000 (73.2465) lr 3.1545e-04 eta 1:26:10 +epoch [39/50] batch [185/500] time 0.905 (0.888) data 0.000 (0.004) loss 1.2959 (1.0692) acc 78.1250 (73.2601) lr 3.1545e-04 eta 1:26:06 +epoch [39/50] batch [190/500] time 0.888 (0.888) data 0.000 (0.004) loss 0.8872 (1.0747) acc 81.2500 (73.2566) lr 3.1545e-04 eta 1:26:01 +epoch [39/50] batch [195/500] time 0.886 (0.889) data 0.000 (0.004) loss 1.3516 (1.0748) acc 71.8750 (73.2212) lr 3.1545e-04 eta 1:25:59 +epoch [39/50] batch [200/500] time 0.885 (0.889) data 0.000 (0.004) loss 0.6226 (1.0721) acc 81.2500 (73.2344) lr 3.1545e-04 eta 1:25:53 +epoch [39/50] batch [205/500] time 0.897 (0.889) data 0.000 (0.003) loss 1.0547 (1.0762) acc 81.2500 (73.2622) lr 3.1545e-04 eta 1:25:49 +epoch [39/50] batch [210/500] time 0.904 (0.889) data 0.000 (0.003) loss 0.8584 (1.0801) acc 75.0000 (73.2887) lr 3.1545e-04 eta 1:25:45 +epoch [39/50] batch [215/500] time 0.883 (0.889) data 0.000 (0.003) loss 0.7539 (1.0797) acc 81.2500 (73.3140) lr 3.1545e-04 eta 1:25:42 +epoch [39/50] batch [220/500] time 0.878 (0.889) data 0.000 (0.003) loss 0.8037 (1.0725) acc 81.2500 (73.4943) lr 3.1545e-04 eta 1:25:36 +epoch [39/50] batch [225/500] time 0.889 (0.889) data 0.000 (0.003) loss 1.1357 (1.0728) acc 78.1250 (73.5278) lr 3.1545e-04 eta 1:25:33 +epoch [39/50] batch [230/500] time 0.865 (0.889) data 0.000 (0.003) loss 1.3604 (1.0754) acc 71.8750 (73.3832) lr 3.1545e-04 eta 1:25:30 +epoch [39/50] batch [235/500] time 0.915 (0.889) data 0.000 (0.003) loss 1.2754 (1.0748) acc 71.8750 (73.4309) lr 3.1545e-04 eta 1:25:26 +epoch [39/50] batch [240/500] time 0.878 (0.889) data 0.000 (0.003) loss 0.7466 (1.0718) acc 75.0000 (73.4896) lr 3.1545e-04 eta 1:25:21 +epoch [39/50] batch [245/500] time 0.896 (0.889) data 0.000 (0.003) loss 1.0693 (1.0741) acc 65.6250 (73.3036) lr 3.1545e-04 eta 1:25:16 +epoch [39/50] batch [250/500] time 0.881 (0.889) data 0.000 (0.003) loss 1.2002 (1.0711) acc 65.6250 (73.3625) lr 3.1545e-04 eta 1:25:12 +epoch [39/50] batch [255/500] time 0.985 (0.890) data 0.000 (0.003) loss 0.6152 (1.0696) acc 75.0000 (73.3824) lr 3.1545e-04 eta 1:25:12 +epoch [39/50] batch [260/500] time 0.856 (0.890) data 0.000 (0.003) loss 1.5352 (1.0729) acc 62.5000 (73.2572) lr 3.1545e-04 eta 1:25:06 +epoch [39/50] batch [265/500] time 0.921 (0.890) data 0.000 (0.003) loss 1.6113 (1.0781) acc 59.3750 (73.1722) lr 3.1545e-04 eta 1:25:02 +epoch [39/50] batch [270/500] time 0.877 (0.890) data 0.000 (0.003) loss 0.9434 (1.0766) acc 68.7500 (73.1829) lr 3.1545e-04 eta 1:24:57 +epoch [39/50] batch [275/500] time 0.875 (0.889) data 0.001 (0.003) loss 0.5347 (1.0738) acc 84.3750 (73.2727) lr 3.1545e-04 eta 1:24:52 +epoch [39/50] batch [280/500] time 0.900 (0.889) data 0.000 (0.003) loss 0.8564 (1.0749) acc 68.7500 (73.2254) lr 3.1545e-04 eta 1:24:45 +epoch [39/50] batch [285/500] time 0.860 (0.889) data 0.000 (0.003) loss 1.5156 (1.0744) acc 68.7500 (73.2566) lr 3.1545e-04 eta 1:24:40 +epoch [39/50] batch [290/500] time 0.886 (0.889) data 0.000 (0.003) loss 0.7695 (1.0736) acc 78.1250 (73.2974) lr 3.1545e-04 eta 1:24:35 +epoch [39/50] batch [295/500] time 0.886 (0.889) data 0.000 (0.002) loss 0.8628 (1.0723) acc 75.0000 (73.2733) lr 3.1545e-04 eta 1:24:30 +epoch [39/50] batch [300/500] time 0.878 (0.889) data 0.000 (0.002) loss 1.0625 (1.0692) acc 75.0000 (73.3125) lr 3.1545e-04 eta 1:24:26 +epoch [39/50] batch [305/500] time 0.863 (0.888) data 0.000 (0.002) loss 1.0420 (1.0690) acc 75.0000 (73.3094) lr 3.1545e-04 eta 1:24:19 +epoch [39/50] batch [310/500] time 0.885 (0.888) data 0.000 (0.002) loss 0.7217 (1.0662) acc 78.1250 (73.3569) lr 3.1545e-04 eta 1:24:14 +epoch [39/50] batch [315/500] time 0.862 (0.888) data 0.000 (0.002) loss 1.5684 (1.0708) acc 71.8750 (73.2044) lr 3.1545e-04 eta 1:24:09 +epoch [39/50] batch [320/500] time 0.859 (0.888) data 0.000 (0.002) loss 0.9448 (1.0658) acc 71.8750 (73.2910) lr 3.1545e-04 eta 1:24:04 +epoch [39/50] batch [325/500] time 0.871 (0.888) data 0.000 (0.002) loss 0.9854 (1.0673) acc 81.2500 (73.2404) lr 3.1545e-04 eta 1:23:58 +epoch [39/50] batch [330/500] time 0.875 (0.888) data 0.000 (0.002) loss 1.0908 (1.0663) acc 75.0000 (73.2102) lr 3.1545e-04 eta 1:23:53 +epoch [39/50] batch [335/500] time 0.902 (0.888) data 0.000 (0.002) loss 2.3418 (1.0691) acc 53.1250 (73.1437) lr 3.1545e-04 eta 1:23:48 +epoch [39/50] batch [340/500] time 0.929 (0.888) data 0.000 (0.002) loss 1.0625 (1.0670) acc 81.2500 (73.2077) lr 3.1545e-04 eta 1:23:44 +epoch [39/50] batch [345/500] time 0.896 (0.888) data 0.000 (0.002) loss 1.3916 (1.0687) acc 71.8750 (73.1341) lr 3.1545e-04 eta 1:23:39 +epoch [39/50] batch [350/500] time 0.874 (0.888) data 0.000 (0.002) loss 0.8408 (1.0668) acc 84.3750 (73.2054) lr 3.1545e-04 eta 1:23:35 +epoch [39/50] batch [355/500] time 0.887 (0.888) data 0.000 (0.002) loss 0.8926 (1.0680) acc 71.8750 (73.1778) lr 3.1545e-04 eta 1:23:32 +epoch [39/50] batch [360/500] time 0.879 (0.888) data 0.000 (0.002) loss 0.7090 (1.0694) acc 75.0000 (73.1771) lr 3.1545e-04 eta 1:23:28 +epoch [39/50] batch [365/500] time 0.884 (0.888) data 0.000 (0.002) loss 1.0850 (1.0711) acc 78.1250 (73.1507) lr 3.1545e-04 eta 1:23:23 +epoch [39/50] batch [370/500] time 0.881 (0.888) data 0.000 (0.002) loss 1.1748 (1.0726) acc 65.6250 (73.0997) lr 3.1545e-04 eta 1:23:19 +epoch [39/50] batch [375/500] time 0.892 (0.888) data 0.000 (0.002) loss 0.7739 (1.0710) acc 84.3750 (73.1250) lr 3.1545e-04 eta 1:23:14 +epoch [39/50] batch [380/500] time 0.898 (0.888) data 0.000 (0.002) loss 1.2695 (1.0732) acc 75.0000 (73.1168) lr 3.1545e-04 eta 1:23:09 +epoch [39/50] batch [385/500] time 0.899 (0.888) data 0.000 (0.002) loss 1.2715 (1.0739) acc 65.6250 (73.0925) lr 3.1545e-04 eta 1:23:04 +epoch [39/50] batch [390/500] time 0.879 (0.888) data 0.000 (0.002) loss 0.9150 (1.0735) acc 84.3750 (73.0529) lr 3.1545e-04 eta 1:22:59 +epoch [39/50] batch [395/500] time 0.886 (0.888) data 0.000 (0.002) loss 1.3799 (1.0738) acc 68.7500 (73.0459) lr 3.1545e-04 eta 1:22:55 +epoch [39/50] batch [400/500] time 0.869 (0.888) data 0.000 (0.002) loss 0.8213 (1.0732) acc 84.3750 (73.0859) lr 3.1545e-04 eta 1:22:53 +epoch [39/50] batch [405/500] time 0.925 (0.888) data 0.000 (0.002) loss 0.9043 (1.0719) acc 87.5000 (73.1559) lr 3.1545e-04 eta 1:22:48 +epoch [39/50] batch [410/500] time 0.886 (0.888) data 0.000 (0.002) loss 0.9985 (1.0725) acc 65.6250 (73.0716) lr 3.1545e-04 eta 1:22:43 +epoch [39/50] batch [415/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.2852 (1.0726) acc 65.6250 (73.0648) lr 3.1545e-04 eta 1:22:38 +epoch [39/50] batch [420/500] time 0.903 (0.888) data 0.000 (0.002) loss 1.0469 (1.0713) acc 75.0000 (73.1101) lr 3.1545e-04 eta 1:22:34 +epoch [39/50] batch [425/500] time 0.891 (0.888) data 0.000 (0.002) loss 0.9463 (1.0706) acc 71.8750 (73.1397) lr 3.1545e-04 eta 1:22:31 +epoch [39/50] batch [430/500] time 0.888 (0.888) data 0.000 (0.002) loss 1.3779 (1.0704) acc 65.6250 (73.1105) lr 3.1545e-04 eta 1:22:27 +epoch [39/50] batch [435/500] time 0.890 (0.888) data 0.000 (0.002) loss 0.9521 (1.0677) acc 71.8750 (73.1537) lr 3.1545e-04 eta 1:22:22 +epoch [39/50] batch [440/500] time 0.904 (0.888) data 0.000 (0.002) loss 1.4395 (1.0672) acc 71.8750 (73.1747) lr 3.1545e-04 eta 1:22:18 +epoch [39/50] batch [445/500] time 0.874 (0.888) data 0.000 (0.002) loss 0.7461 (1.0671) acc 78.1250 (73.1882) lr 3.1545e-04 eta 1:22:12 +epoch [39/50] batch [450/500] time 0.890 (0.888) data 0.000 (0.002) loss 1.3730 (1.0674) acc 59.3750 (73.1806) lr 3.1545e-04 eta 1:22:07 +epoch [39/50] batch [455/500] time 0.889 (0.888) data 0.000 (0.002) loss 0.6826 (1.0689) acc 87.5000 (73.1593) lr 3.1545e-04 eta 1:22:03 +epoch [39/50] batch [460/500] time 0.887 (0.888) data 0.000 (0.002) loss 1.0713 (1.0706) acc 81.2500 (73.1318) lr 3.1545e-04 eta 1:21:59 +epoch [39/50] batch [465/500] time 0.897 (0.888) data 0.000 (0.002) loss 1.0977 (1.0702) acc 71.8750 (73.1452) lr 3.1545e-04 eta 1:21:54 +epoch [39/50] batch [470/500] time 0.912 (0.888) data 0.000 (0.002) loss 1.1748 (1.0709) acc 59.3750 (73.1250) lr 3.1545e-04 eta 1:21:49 +epoch [39/50] batch [475/500] time 0.908 (0.888) data 0.000 (0.002) loss 1.0820 (1.0722) acc 68.7500 (73.1184) lr 3.1545e-04 eta 1:21:45 +epoch [39/50] batch [480/500] time 0.909 (0.888) data 0.000 (0.002) loss 0.9624 (1.0713) acc 71.8750 (73.1380) lr 3.1545e-04 eta 1:21:42 +epoch [39/50] batch [485/500] time 0.870 (0.888) data 0.000 (0.002) loss 0.7734 (1.0701) acc 75.0000 (73.1186) lr 3.1545e-04 eta 1:21:37 +epoch [39/50] batch [490/500] time 0.902 (0.888) data 0.000 (0.002) loss 0.6357 (1.0689) acc 87.5000 (73.1633) lr 3.1545e-04 eta 1:21:33 +epoch [39/50] batch [495/500] time 0.918 (0.888) data 0.000 (0.002) loss 0.7827 (1.0682) acc 84.3750 (73.1944) lr 3.1545e-04 eta 1:21:28 +epoch [39/50] batch [500/500] time 0.884 (0.888) data 0.000 (0.002) loss 1.0176 (1.0681) acc 75.0000 (73.1937) lr 2.7103e-04 eta 1:21:25 +epoch [40/50] batch [5/500] time 0.851 (1.009) data 0.000 (0.144) loss 0.8892 (0.9932) acc 75.0000 (76.2500) lr 2.7103e-04 eta 1:32:26 +epoch [40/50] batch [10/500] time 0.857 (0.939) data 0.000 (0.072) loss 0.8838 (1.0284) acc 81.2500 (74.3750) lr 2.7103e-04 eta 1:25:52 +epoch [40/50] batch [15/500] time 0.889 (0.921) data 0.000 (0.048) loss 0.7417 (1.0437) acc 81.2500 (74.1667) lr 2.7103e-04 eta 1:24:11 +epoch [40/50] batch [20/500] time 0.859 (0.910) data 0.000 (0.036) loss 1.2314 (1.0816) acc 71.8750 (73.4375) lr 2.7103e-04 eta 1:23:08 +epoch [40/50] batch [25/500] time 0.872 (0.904) data 0.000 (0.029) loss 0.7217 (1.0304) acc 81.2500 (74.2500) lr 2.7103e-04 eta 1:22:30 +epoch [40/50] batch [30/500] time 0.913 (0.901) data 0.000 (0.024) loss 1.4209 (1.0598) acc 71.8750 (73.9583) lr 2.7103e-04 eta 1:22:10 +epoch [40/50] batch [35/500] time 0.884 (0.898) data 0.000 (0.021) loss 0.9580 (1.0212) acc 78.1250 (74.3750) lr 2.7103e-04 eta 1:21:49 +epoch [40/50] batch [40/500] time 0.899 (0.897) data 0.000 (0.018) loss 0.7305 (1.0328) acc 68.7500 (73.5156) lr 2.7103e-04 eta 1:21:40 +epoch [40/50] batch [45/500] time 0.879 (0.896) data 0.000 (0.016) loss 0.8525 (1.0137) acc 84.3750 (74.3056) lr 2.7103e-04 eta 1:21:28 +epoch [40/50] batch [50/500] time 0.881 (0.894) data 0.000 (0.015) loss 0.9741 (1.0050) acc 81.2500 (74.6250) lr 2.7103e-04 eta 1:21:14 +epoch [40/50] batch [55/500] time 0.884 (0.894) data 0.000 (0.013) loss 1.7617 (1.0257) acc 65.6250 (74.5455) lr 2.7103e-04 eta 1:21:07 +epoch [40/50] batch [60/500] time 0.879 (0.895) data 0.000 (0.012) loss 1.1855 (1.0317) acc 81.2500 (74.5833) lr 2.7103e-04 eta 1:21:08 +epoch [40/50] batch [65/500] time 0.882 (0.895) data 0.000 (0.011) loss 1.1221 (1.0330) acc 75.0000 (74.5192) lr 2.7103e-04 eta 1:21:04 +epoch [40/50] batch [70/500] time 0.882 (0.894) data 0.000 (0.010) loss 1.1045 (1.0429) acc 71.8750 (74.5982) lr 2.7103e-04 eta 1:20:53 +epoch [40/50] batch [75/500] time 0.910 (0.894) data 0.000 (0.010) loss 1.5557 (1.0715) acc 68.7500 (74.0000) lr 2.7103e-04 eta 1:20:49 +epoch [40/50] batch [80/500] time 0.847 (0.893) data 0.000 (0.009) loss 1.2656 (1.0728) acc 68.7500 (73.8672) lr 2.7103e-04 eta 1:20:38 +epoch [40/50] batch [85/500] time 0.896 (0.892) data 0.000 (0.009) loss 1.7344 (1.0785) acc 71.8750 (73.9706) lr 2.7103e-04 eta 1:20:27 +epoch [40/50] batch [90/500] time 0.894 (0.892) data 0.000 (0.008) loss 0.6406 (1.0765) acc 84.3750 (74.0625) lr 2.7103e-04 eta 1:20:23 +epoch [40/50] batch [95/500] time 0.883 (0.891) data 0.000 (0.008) loss 0.8394 (1.0647) acc 81.2500 (74.2434) lr 2.7103e-04 eta 1:20:17 +epoch [40/50] batch [100/500] time 0.868 (0.891) data 0.000 (0.007) loss 1.6377 (1.0779) acc 59.3750 (73.9375) lr 2.7103e-04 eta 1:20:12 +epoch [40/50] batch [105/500] time 0.872 (0.890) data 0.000 (0.007) loss 0.6060 (1.0710) acc 90.6250 (74.1667) lr 2.7103e-04 eta 1:20:03 +epoch [40/50] batch [110/500] time 0.937 (0.891) data 0.000 (0.007) loss 0.8496 (1.0710) acc 84.3750 (74.1193) lr 2.7103e-04 eta 1:20:02 +epoch [40/50] batch [115/500] time 0.864 (0.891) data 0.000 (0.006) loss 0.8008 (1.0699) acc 78.1250 (74.1304) lr 2.7103e-04 eta 1:19:58 +epoch [40/50] batch [120/500] time 0.906 (0.891) data 0.000 (0.006) loss 1.3232 (1.0708) acc 71.8750 (74.0885) lr 2.7103e-04 eta 1:19:55 +epoch [40/50] batch [125/500] time 0.874 (0.891) data 0.000 (0.006) loss 1.2734 (1.0728) acc 65.6250 (73.9750) lr 2.7103e-04 eta 1:19:50 +epoch [40/50] batch [130/500] time 0.890 (0.891) data 0.000 (0.006) loss 1.0615 (1.0724) acc 68.7500 (73.9423) lr 2.7103e-04 eta 1:19:43 +epoch [40/50] batch [135/500] time 0.863 (0.890) data 0.000 (0.006) loss 1.0420 (1.0740) acc 81.2500 (74.0046) lr 2.7103e-04 eta 1:19:35 +epoch [40/50] batch [140/500] time 0.880 (0.890) data 0.000 (0.005) loss 1.0127 (1.0602) acc 75.0000 (74.2188) lr 2.7103e-04 eta 1:19:31 +epoch [40/50] batch [145/500] time 0.860 (0.889) data 0.000 (0.005) loss 1.2295 (1.0703) acc 71.8750 (74.0086) lr 2.7103e-04 eta 1:19:22 +epoch [40/50] batch [150/500] time 0.881 (0.889) data 0.000 (0.005) loss 1.1982 (1.0723) acc 78.1250 (73.9583) lr 2.7103e-04 eta 1:19:16 +epoch [40/50] batch [155/500] time 0.880 (0.888) data 0.000 (0.005) loss 0.8960 (1.0695) acc 78.1250 (73.9919) lr 2.7103e-04 eta 1:19:07 +epoch [40/50] batch [160/500] time 0.886 (0.888) data 0.000 (0.005) loss 1.1230 (1.0692) acc 78.1250 (73.9844) lr 2.7103e-04 eta 1:19:02 +epoch [40/50] batch [165/500] time 0.880 (0.888) data 0.000 (0.005) loss 0.6440 (1.0588) acc 81.2500 (73.9583) lr 2.7103e-04 eta 1:18:55 +epoch [40/50] batch [170/500] time 0.897 (0.887) data 0.000 (0.004) loss 1.1836 (1.0543) acc 68.7500 (73.9522) lr 2.7103e-04 eta 1:18:50 +epoch [40/50] batch [175/500] time 0.858 (0.887) data 0.000 (0.004) loss 0.7964 (1.0587) acc 71.8750 (73.8393) lr 2.7103e-04 eta 1:18:43 +epoch [40/50] batch [180/500] time 0.865 (0.887) data 0.000 (0.004) loss 1.2490 (1.0569) acc 71.8750 (73.8194) lr 2.7103e-04 eta 1:18:39 +epoch [40/50] batch [185/500] time 0.926 (0.887) data 0.000 (0.004) loss 0.9717 (1.0566) acc 81.2500 (73.7838) lr 2.7103e-04 eta 1:18:34 +epoch [40/50] batch [190/500] time 0.870 (0.887) data 0.000 (0.004) loss 1.1270 (1.0566) acc 68.7500 (73.7829) lr 2.7103e-04 eta 1:18:29 +epoch [40/50] batch [195/500] time 0.892 (0.887) data 0.000 (0.004) loss 0.9487 (1.0519) acc 78.1250 (73.9583) lr 2.7103e-04 eta 1:18:24 +epoch [40/50] batch [200/500] time 0.892 (0.887) data 0.000 (0.004) loss 1.2871 (1.0517) acc 71.8750 (73.9531) lr 2.7103e-04 eta 1:18:22 +epoch [40/50] batch [205/500] time 0.875 (0.887) data 0.000 (0.004) loss 1.4570 (1.0520) acc 65.6250 (73.8872) lr 2.7103e-04 eta 1:18:16 +epoch [40/50] batch [210/500] time 0.863 (0.887) data 0.000 (0.004) loss 1.1826 (1.0543) acc 65.6250 (73.8244) lr 2.7103e-04 eta 1:18:10 +epoch [40/50] batch [215/500] time 0.878 (0.887) data 0.000 (0.004) loss 1.3965 (1.0620) acc 62.5000 (73.6047) lr 2.7103e-04 eta 1:18:05 +epoch [40/50] batch [220/500] time 0.897 (0.886) data 0.000 (0.003) loss 0.8691 (1.0603) acc 71.8750 (73.6080) lr 2.7103e-04 eta 1:18:00 +epoch [40/50] batch [225/500] time 0.906 (0.887) data 0.000 (0.003) loss 1.2178 (1.0638) acc 78.1250 (73.6528) lr 2.7103e-04 eta 1:17:56 +epoch [40/50] batch [230/500] time 0.872 (0.886) data 0.000 (0.003) loss 1.2559 (1.0629) acc 71.8750 (73.5870) lr 2.7103e-04 eta 1:17:50 +epoch [40/50] batch [235/500] time 0.871 (0.886) data 0.000 (0.003) loss 0.9541 (1.0636) acc 68.7500 (73.5638) lr 2.7103e-04 eta 1:17:44 +epoch [40/50] batch [240/500] time 1.013 (0.886) data 0.000 (0.003) loss 1.2939 (1.0623) acc 71.8750 (73.6979) lr 2.7103e-04 eta 1:17:42 +epoch [40/50] batch [245/500] time 0.868 (0.886) data 0.000 (0.003) loss 0.9536 (1.0614) acc 78.1250 (73.6735) lr 2.7103e-04 eta 1:17:38 +epoch [40/50] batch [250/500] time 0.892 (0.887) data 0.000 (0.003) loss 1.7061 (1.0633) acc 62.5000 (73.6750) lr 2.7103e-04 eta 1:17:34 +epoch [40/50] batch [255/500] time 0.878 (0.886) data 0.000 (0.003) loss 0.6987 (1.0587) acc 87.5000 (73.7990) lr 2.7103e-04 eta 1:17:29 +epoch [40/50] batch [260/500] time 0.875 (0.886) data 0.000 (0.003) loss 0.8066 (1.0604) acc 71.8750 (73.7500) lr 2.7103e-04 eta 1:17:23 +epoch [40/50] batch [265/500] time 0.898 (0.886) data 0.000 (0.003) loss 0.6714 (1.0557) acc 78.1250 (73.8208) lr 2.7103e-04 eta 1:17:18 +epoch [40/50] batch [270/500] time 0.875 (0.886) data 0.000 (0.003) loss 1.0029 (1.0561) acc 68.7500 (73.7731) lr 2.7103e-04 eta 1:17:13 +epoch [40/50] batch [275/500] time 0.902 (0.886) data 0.000 (0.003) loss 0.7729 (1.0517) acc 81.2500 (73.8409) lr 2.7103e-04 eta 1:17:10 +epoch [40/50] batch [280/500] time 0.846 (0.886) data 0.001 (0.003) loss 0.4495 (1.0481) acc 93.7500 (73.8839) lr 2.7103e-04 eta 1:17:04 +epoch [40/50] batch [285/500] time 0.874 (0.886) data 0.000 (0.003) loss 1.1787 (1.0485) acc 71.8750 (73.8487) lr 2.7103e-04 eta 1:16:59 +epoch [40/50] batch [290/500] time 0.885 (0.885) data 0.000 (0.003) loss 0.9702 (1.0467) acc 68.7500 (73.8362) lr 2.7103e-04 eta 1:16:53 +epoch [40/50] batch [295/500] time 0.898 (0.886) data 0.000 (0.003) loss 0.7227 (1.0432) acc 68.7500 (73.8983) lr 2.7103e-04 eta 1:16:49 +epoch [40/50] batch [300/500] time 0.929 (0.886) data 0.000 (0.003) loss 1.4287 (1.0465) acc 56.2500 (73.7917) lr 2.7103e-04 eta 1:16:46 +epoch [40/50] batch [305/500] time 0.893 (0.886) data 0.000 (0.003) loss 1.2461 (1.0494) acc 62.5000 (73.7295) lr 2.7103e-04 eta 1:16:42 +epoch [40/50] batch [310/500] time 0.889 (0.886) data 0.000 (0.003) loss 0.5112 (1.0480) acc 90.6250 (73.7601) lr 2.7103e-04 eta 1:16:37 +epoch [40/50] batch [315/500] time 0.876 (0.886) data 0.000 (0.003) loss 1.3389 (1.0488) acc 62.5000 (73.7599) lr 2.7103e-04 eta 1:16:32 +epoch [40/50] batch [320/500] time 0.860 (0.886) data 0.000 (0.002) loss 1.5205 (1.0511) acc 65.6250 (73.7402) lr 2.7103e-04 eta 1:16:27 +epoch [40/50] batch [325/500] time 0.885 (0.886) data 0.001 (0.002) loss 0.7222 (1.0521) acc 87.5000 (73.7308) lr 2.7103e-04 eta 1:16:23 +epoch [40/50] batch [330/500] time 0.878 (0.885) data 0.000 (0.002) loss 2.0176 (1.0534) acc 62.5000 (73.7216) lr 2.7103e-04 eta 1:16:17 +epoch [40/50] batch [335/500] time 0.901 (0.886) data 0.000 (0.002) loss 1.2920 (1.0512) acc 65.6250 (73.7687) lr 2.7103e-04 eta 1:16:14 +epoch [40/50] batch [340/500] time 0.893 (0.886) data 0.000 (0.002) loss 1.3057 (1.0495) acc 71.8750 (73.8419) lr 2.7103e-04 eta 1:16:11 +epoch [40/50] batch [345/500] time 0.881 (0.886) data 0.000 (0.002) loss 1.0391 (1.0511) acc 75.0000 (73.8315) lr 2.7103e-04 eta 1:16:06 +epoch [40/50] batch [350/500] time 0.906 (0.886) data 0.000 (0.002) loss 1.1465 (1.0536) acc 68.7500 (73.7946) lr 2.7103e-04 eta 1:16:03 +epoch [40/50] batch [355/500] time 0.877 (0.886) data 0.000 (0.002) loss 1.3027 (1.0513) acc 78.1250 (73.8556) lr 2.7103e-04 eta 1:15:59 +epoch [40/50] batch [360/500] time 0.887 (0.886) data 0.000 (0.002) loss 1.0391 (1.0503) acc 75.0000 (73.8715) lr 2.7103e-04 eta 1:15:54 +epoch [40/50] batch [365/500] time 0.851 (0.886) data 0.000 (0.002) loss 1.0020 (1.0473) acc 75.0000 (73.9127) lr 2.7103e-04 eta 1:15:49 +epoch [40/50] batch [370/500] time 0.897 (0.886) data 0.000 (0.002) loss 1.1523 (1.0439) acc 71.8750 (74.0203) lr 2.7103e-04 eta 1:15:45 +epoch [40/50] batch [375/500] time 0.887 (0.886) data 0.000 (0.002) loss 1.6543 (1.0468) acc 62.5000 (74.0000) lr 2.7103e-04 eta 1:15:40 +epoch [40/50] batch [380/500] time 0.904 (0.886) data 0.000 (0.002) loss 0.6304 (1.0477) acc 81.2500 (73.9720) lr 2.7103e-04 eta 1:15:36 +epoch [40/50] batch [385/500] time 0.916 (0.886) data 0.000 (0.002) loss 1.0117 (1.0489) acc 71.8750 (73.9205) lr 2.7103e-04 eta 1:15:33 +epoch [40/50] batch [390/500] time 0.871 (0.886) data 0.000 (0.002) loss 1.6162 (1.0477) acc 68.7500 (74.0144) lr 2.7103e-04 eta 1:15:29 +epoch [40/50] batch [395/500] time 0.884 (0.886) data 0.000 (0.002) loss 1.1240 (1.0499) acc 75.0000 (73.9953) lr 2.7103e-04 eta 1:15:25 +epoch [40/50] batch [400/500] time 0.868 (0.887) data 0.000 (0.002) loss 0.9766 (1.0503) acc 75.0000 (73.9453) lr 2.7103e-04 eta 1:15:21 +epoch [40/50] batch [405/500] time 0.882 (0.887) data 0.000 (0.002) loss 1.0654 (1.0488) acc 78.1250 (73.9815) lr 2.7103e-04 eta 1:15:16 +epoch [40/50] batch [410/500] time 0.903 (0.887) data 0.000 (0.002) loss 1.0088 (1.0486) acc 81.2500 (74.0091) lr 2.7103e-04 eta 1:15:13 +epoch [40/50] batch [415/500] time 0.867 (0.887) data 0.000 (0.002) loss 0.9146 (1.0469) acc 78.1250 (74.0136) lr 2.7103e-04 eta 1:15:09 +epoch [40/50] batch [420/500] time 0.897 (0.887) data 0.000 (0.002) loss 0.3640 (1.0448) acc 90.6250 (74.0774) lr 2.7103e-04 eta 1:15:05 +epoch [40/50] batch [425/500] time 0.887 (0.887) data 0.000 (0.002) loss 1.6094 (1.0453) acc 59.3750 (74.0441) lr 2.7103e-04 eta 1:15:00 +epoch [40/50] batch [430/500] time 0.904 (0.887) data 0.000 (0.002) loss 0.4963 (1.0444) acc 84.3750 (74.0334) lr 2.7103e-04 eta 1:14:55 +epoch [40/50] batch [435/500] time 0.874 (0.887) data 0.000 (0.002) loss 1.0068 (1.0441) acc 71.8750 (73.9583) lr 2.7103e-04 eta 1:14:50 +epoch [40/50] batch [440/500] time 0.863 (0.887) data 0.000 (0.002) loss 1.4521 (1.0457) acc 68.7500 (73.9560) lr 2.7103e-04 eta 1:14:46 +epoch [40/50] batch [445/500] time 0.911 (0.887) data 0.000 (0.002) loss 0.7217 (1.0449) acc 84.3750 (73.9396) lr 2.7103e-04 eta 1:14:41 +epoch [40/50] batch [450/500] time 0.897 (0.887) data 0.000 (0.002) loss 1.9902 (1.0466) acc 50.0000 (73.9028) lr 2.7103e-04 eta 1:14:38 +epoch [40/50] batch [455/500] time 0.870 (0.887) data 0.000 (0.002) loss 0.6104 (1.0454) acc 87.5000 (73.9560) lr 2.7103e-04 eta 1:14:33 +epoch [40/50] batch [460/500] time 0.873 (0.887) data 0.000 (0.002) loss 1.1094 (1.0449) acc 59.3750 (73.9266) lr 2.7103e-04 eta 1:14:29 +epoch [40/50] batch [465/500] time 0.902 (0.887) data 0.000 (0.002) loss 1.2100 (1.0457) acc 65.6250 (73.8978) lr 2.7103e-04 eta 1:14:24 +epoch [40/50] batch [470/500] time 0.905 (0.887) data 0.000 (0.002) loss 0.8716 (1.0468) acc 75.0000 (73.9029) lr 2.7103e-04 eta 1:14:20 +epoch [40/50] batch [475/500] time 0.891 (0.887) data 0.000 (0.002) loss 1.5957 (1.0479) acc 62.5000 (73.8750) lr 2.7103e-04 eta 1:14:17 +epoch [40/50] batch [480/500] time 0.862 (0.887) data 0.000 (0.002) loss 0.7954 (1.0489) acc 87.5000 (73.8802) lr 2.7103e-04 eta 1:14:12 +epoch [40/50] batch [485/500] time 0.883 (0.887) data 0.000 (0.002) loss 1.7129 (1.0530) acc 53.1250 (73.7822) lr 2.7103e-04 eta 1:14:08 +epoch [40/50] batch [490/500] time 0.906 (0.887) data 0.000 (0.002) loss 0.6455 (1.0499) acc 84.3750 (73.8648) lr 2.7103e-04 eta 1:14:04 +epoch [40/50] batch [495/500] time 0.932 (0.887) data 0.000 (0.002) loss 1.0273 (1.0483) acc 78.1250 (73.9078) lr 2.7103e-04 eta 1:14:00 +epoch [40/50] batch [500/500] time 0.878 (0.887) data 0.000 (0.002) loss 1.0166 (1.0478) acc 75.0000 (73.9062) lr 2.2949e-04 eta 1:13:56 +epoch [41/50] batch [5/500] time 0.892 (1.045) data 0.000 (0.152) loss 1.2998 (1.1821) acc 75.0000 (68.1250) lr 2.2949e-04 eta 1:26:58 +epoch [41/50] batch [10/500] time 0.869 (0.961) data 0.000 (0.076) loss 1.8867 (1.0529) acc 56.2500 (73.1250) lr 2.2949e-04 eta 1:19:53 +epoch [41/50] batch [15/500] time 0.858 (0.937) data 0.000 (0.051) loss 1.1357 (1.0827) acc 75.0000 (72.9167) lr 2.2949e-04 eta 1:17:51 +epoch [41/50] batch [20/500] time 0.868 (0.924) data 0.000 (0.038) loss 1.0615 (1.1328) acc 68.7500 (72.0312) lr 2.2949e-04 eta 1:16:39 +epoch [41/50] batch [25/500] time 0.864 (0.915) data 0.000 (0.031) loss 0.9580 (1.0821) acc 71.8750 (72.6250) lr 2.2949e-04 eta 1:15:53 +epoch [41/50] batch [30/500] time 0.858 (0.910) data 0.000 (0.026) loss 1.3096 (1.1004) acc 65.6250 (71.7708) lr 2.2949e-04 eta 1:15:22 +epoch [41/50] batch [35/500] time 0.882 (0.912) data 0.000 (0.022) loss 0.9092 (1.0886) acc 87.5000 (71.8750) lr 2.2949e-04 eta 1:15:28 +epoch [41/50] batch [40/500] time 0.877 (0.908) data 0.000 (0.019) loss 0.8374 (1.0530) acc 87.5000 (73.5938) lr 2.2949e-04 eta 1:15:03 +epoch [41/50] batch [45/500] time 0.902 (0.906) data 0.000 (0.017) loss 0.7734 (1.0398) acc 78.1250 (73.7500) lr 2.2949e-04 eta 1:14:49 +epoch [41/50] batch [50/500] time 0.914 (0.905) data 0.000 (0.015) loss 0.6777 (1.0306) acc 93.7500 (74.3125) lr 2.2949e-04 eta 1:14:38 +epoch [41/50] batch [55/500] time 0.901 (0.903) data 0.000 (0.014) loss 1.2295 (1.0304) acc 65.6250 (74.0909) lr 2.2949e-04 eta 1:14:24 +epoch [41/50] batch [60/500] time 0.888 (0.901) data 0.000 (0.013) loss 1.0078 (1.0198) acc 71.8750 (74.3229) lr 2.2949e-04 eta 1:14:09 +epoch [41/50] batch [65/500] time 0.896 (0.900) data 0.000 (0.012) loss 0.9507 (1.0303) acc 81.2500 (74.1827) lr 2.2949e-04 eta 1:14:00 +epoch [41/50] batch [70/500] time 0.862 (0.898) data 0.000 (0.011) loss 1.1309 (1.0218) acc 65.6250 (74.3304) lr 2.2949e-04 eta 1:13:46 +epoch [41/50] batch [75/500] time 0.886 (0.897) data 0.000 (0.010) loss 1.2383 (1.0119) acc 65.6250 (74.5833) lr 2.2949e-04 eta 1:13:37 +epoch [41/50] batch [80/500] time 0.883 (0.897) data 0.000 (0.010) loss 1.4180 (1.0259) acc 68.7500 (74.3750) lr 2.2949e-04 eta 1:13:34 +epoch [41/50] batch [85/500] time 0.873 (0.897) data 0.000 (0.009) loss 1.4834 (1.0244) acc 65.6250 (74.3750) lr 2.2949e-04 eta 1:13:29 +epoch [41/50] batch [90/500] time 0.925 (0.897) data 0.000 (0.009) loss 1.2861 (1.0395) acc 59.3750 (74.0625) lr 2.2949e-04 eta 1:13:23 +epoch [41/50] batch [95/500] time 0.892 (0.896) data 0.000 (0.008) loss 1.3135 (1.0500) acc 71.8750 (73.8816) lr 2.2949e-04 eta 1:13:15 +epoch [41/50] batch [100/500] time 0.878 (0.896) data 0.000 (0.008) loss 0.6797 (1.0439) acc 81.2500 (73.9375) lr 2.2949e-04 eta 1:13:10 +epoch [41/50] batch [105/500] time 0.873 (0.896) data 0.000 (0.007) loss 0.2097 (1.0501) acc 100.0000 (73.9881) lr 2.2949e-04 eta 1:13:04 +epoch [41/50] batch [110/500] time 0.891 (0.896) data 0.000 (0.007) loss 1.5762 (1.0507) acc 59.3750 (73.8352) lr 2.2949e-04 eta 1:12:59 +epoch [41/50] batch [115/500] time 0.883 (0.896) data 0.000 (0.007) loss 0.8672 (1.0423) acc 71.8750 (74.0489) lr 2.2949e-04 eta 1:12:55 +epoch [41/50] batch [120/500] time 0.873 (0.895) data 0.000 (0.007) loss 0.9351 (1.0393) acc 75.0000 (74.2448) lr 2.2949e-04 eta 1:12:49 +epoch [41/50] batch [125/500] time 0.862 (0.895) data 0.000 (0.006) loss 0.8726 (1.0367) acc 84.3750 (74.2750) lr 2.2949e-04 eta 1:12:43 +epoch [41/50] batch [130/500] time 0.864 (0.894) data 0.000 (0.006) loss 1.2070 (1.0439) acc 68.7500 (74.0385) lr 2.2949e-04 eta 1:12:34 +epoch [41/50] batch [135/500] time 0.872 (0.894) data 0.000 (0.006) loss 0.8003 (1.0363) acc 75.0000 (74.1435) lr 2.2949e-04 eta 1:12:28 +epoch [41/50] batch [140/500] time 0.917 (0.894) data 0.000 (0.006) loss 1.1738 (1.0360) acc 65.6250 (74.0625) lr 2.2949e-04 eta 1:12:24 +epoch [41/50] batch [145/500] time 0.900 (0.894) data 0.000 (0.005) loss 0.7500 (1.0345) acc 75.0000 (74.0086) lr 2.2949e-04 eta 1:12:18 +epoch [41/50] batch [150/500] time 0.894 (0.893) data 0.000 (0.005) loss 1.1328 (1.0344) acc 78.1250 (74.0625) lr 2.2949e-04 eta 1:12:11 +epoch [41/50] batch [155/500] time 0.886 (0.893) data 0.000 (0.005) loss 0.9634 (1.0343) acc 75.0000 (74.2339) lr 2.2949e-04 eta 1:12:05 +epoch [41/50] batch [160/500] time 0.881 (0.892) data 0.000 (0.005) loss 1.5098 (1.0356) acc 65.6250 (74.2383) lr 2.2949e-04 eta 1:11:59 +epoch [41/50] batch [165/500] time 0.901 (0.892) data 0.000 (0.005) loss 1.0986 (1.0348) acc 71.8750 (74.2992) lr 2.2949e-04 eta 1:11:54 +epoch [41/50] batch [170/500] time 0.877 (0.892) data 0.000 (0.005) loss 1.1426 (1.0363) acc 75.0000 (74.1728) lr 2.2949e-04 eta 1:11:48 +epoch [41/50] batch [175/500] time 0.885 (0.892) data 0.000 (0.005) loss 0.3782 (1.0289) acc 93.7500 (74.3750) lr 2.2949e-04 eta 1:11:43 +epoch [41/50] batch [180/500] time 0.890 (0.892) data 0.000 (0.004) loss 1.3057 (1.0286) acc 62.5000 (74.3576) lr 2.2949e-04 eta 1:11:40 +epoch [41/50] batch [185/500] time 0.865 (0.892) data 0.000 (0.004) loss 0.9517 (1.0280) acc 71.8750 (74.3243) lr 2.2949e-04 eta 1:11:34 +epoch [41/50] batch [190/500] time 0.887 (0.892) data 0.000 (0.004) loss 0.7124 (1.0268) acc 71.8750 (74.3586) lr 2.2949e-04 eta 1:11:29 +epoch [41/50] batch [195/500] time 0.889 (0.892) data 0.000 (0.004) loss 0.9458 (1.0230) acc 75.0000 (74.4551) lr 2.2949e-04 eta 1:11:24 +epoch [41/50] batch [200/500] time 0.907 (0.892) data 0.000 (0.004) loss 1.1191 (1.0205) acc 78.1250 (74.4844) lr 2.2949e-04 eta 1:11:20 +epoch [41/50] batch [205/500] time 0.873 (0.892) data 0.000 (0.004) loss 0.9868 (1.0238) acc 62.5000 (74.3445) lr 2.2949e-04 eta 1:11:15 +epoch [41/50] batch [210/500] time 0.866 (0.891) data 0.000 (0.004) loss 0.8657 (1.0236) acc 78.1250 (74.4345) lr 2.2949e-04 eta 1:11:08 +epoch [41/50] batch [215/500] time 0.868 (0.891) data 0.000 (0.004) loss 0.8896 (1.0218) acc 75.0000 (74.4041) lr 2.2949e-04 eta 1:11:03 +epoch [41/50] batch [220/500] time 0.905 (0.891) data 0.000 (0.004) loss 0.7817 (1.0247) acc 84.3750 (74.4318) lr 2.2949e-04 eta 1:10:58 +epoch [41/50] batch [225/500] time 0.866 (0.891) data 0.000 (0.004) loss 1.5752 (1.0255) acc 68.7500 (74.5000) lr 2.2949e-04 eta 1:10:53 +epoch [41/50] batch [230/500] time 0.905 (0.891) data 0.000 (0.004) loss 0.8608 (1.0229) acc 78.1250 (74.6196) lr 2.2949e-04 eta 1:10:49 +epoch [41/50] batch [235/500] time 0.899 (0.891) data 0.000 (0.003) loss 1.5117 (1.0253) acc 59.3750 (74.5878) lr 2.2949e-04 eta 1:10:45 +epoch [41/50] batch [240/500] time 0.884 (0.891) data 0.000 (0.003) loss 1.1367 (1.0254) acc 78.1250 (74.5703) lr 2.2949e-04 eta 1:10:40 +epoch [41/50] batch [245/500] time 0.863 (0.891) data 0.000 (0.003) loss 0.7422 (1.0275) acc 81.2500 (74.5153) lr 2.2949e-04 eta 1:10:34 +epoch [41/50] batch [250/500] time 0.883 (0.890) data 0.000 (0.003) loss 1.7344 (1.0240) acc 62.5000 (74.5500) lr 2.2949e-04 eta 1:10:29 +epoch [41/50] batch [255/500] time 0.913 (0.890) data 0.000 (0.003) loss 0.9526 (1.0228) acc 71.8750 (74.6078) lr 2.2949e-04 eta 1:10:24 +epoch [41/50] batch [260/500] time 0.885 (0.890) data 0.000 (0.003) loss 1.1797 (1.0260) acc 81.2500 (74.6034) lr 2.2949e-04 eta 1:10:19 +epoch [41/50] batch [265/500] time 0.905 (0.890) data 0.000 (0.003) loss 1.5283 (1.0282) acc 68.7500 (74.5283) lr 2.2949e-04 eta 1:10:16 +epoch [41/50] batch [270/500] time 0.898 (0.890) data 0.000 (0.003) loss 0.7588 (1.0261) acc 81.2500 (74.6065) lr 2.2949e-04 eta 1:10:11 +epoch [41/50] batch [275/500] time 0.916 (0.890) data 0.000 (0.003) loss 0.9775 (1.0264) acc 78.1250 (74.6364) lr 2.2949e-04 eta 1:10:06 +epoch [41/50] batch [280/500] time 0.847 (0.890) data 0.000 (0.003) loss 0.7554 (1.0250) acc 84.3750 (74.6205) lr 2.2949e-04 eta 1:10:00 +epoch [41/50] batch [285/500] time 0.898 (0.890) data 0.000 (0.003) loss 1.3037 (1.0252) acc 65.6250 (74.5943) lr 2.2949e-04 eta 1:09:56 +epoch [41/50] batch [290/500] time 0.889 (0.890) data 0.001 (0.003) loss 0.9819 (1.0261) acc 75.0000 (74.5259) lr 2.2949e-04 eta 1:09:51 +epoch [41/50] batch [295/500] time 0.871 (0.890) data 0.000 (0.003) loss 1.0059 (1.0262) acc 75.0000 (74.5233) lr 2.2949e-04 eta 1:09:47 +epoch [41/50] batch [300/500] time 0.885 (0.890) data 0.000 (0.003) loss 0.8975 (1.0267) acc 81.2500 (74.5208) lr 2.2949e-04 eta 1:09:42 +epoch [41/50] batch [305/500] time 0.898 (0.890) data 0.001 (0.003) loss 0.9990 (1.0238) acc 71.8750 (74.5594) lr 2.2949e-04 eta 1:09:37 +epoch [41/50] batch [310/500] time 0.890 (0.890) data 0.000 (0.003) loss 1.3184 (1.0257) acc 65.6250 (74.4456) lr 2.2949e-04 eta 1:09:32 +epoch [41/50] batch [315/500] time 0.877 (0.890) data 0.000 (0.003) loss 0.8696 (1.0249) acc 81.2500 (74.4444) lr 2.2949e-04 eta 1:09:27 +epoch [41/50] batch [320/500] time 1.005 (0.890) data 0.000 (0.003) loss 1.3955 (1.0288) acc 59.3750 (74.3359) lr 2.2949e-04 eta 1:09:24 +epoch [41/50] batch [325/500] time 0.910 (0.890) data 0.000 (0.003) loss 0.7002 (1.0255) acc 84.3750 (74.3846) lr 2.2949e-04 eta 1:09:20 +epoch [41/50] batch [330/500] time 0.880 (0.890) data 0.000 (0.003) loss 0.7524 (1.0275) acc 75.0000 (74.3277) lr 2.2949e-04 eta 1:09:16 +epoch [41/50] batch [335/500] time 0.895 (0.890) data 0.000 (0.003) loss 1.3359 (1.0270) acc 71.8750 (74.3563) lr 2.2949e-04 eta 1:09:11 +epoch [41/50] batch [340/500] time 0.897 (0.890) data 0.000 (0.002) loss 1.5225 (1.0342) acc 68.7500 (74.2463) lr 2.2949e-04 eta 1:09:07 +epoch [41/50] batch [345/500] time 0.918 (0.890) data 0.000 (0.002) loss 1.1426 (1.0344) acc 71.8750 (74.2482) lr 2.2949e-04 eta 1:09:03 +epoch [41/50] batch [350/500] time 0.862 (0.890) data 0.000 (0.002) loss 1.0000 (1.0327) acc 68.7500 (74.2321) lr 2.2949e-04 eta 1:08:58 +epoch [41/50] batch [355/500] time 0.933 (0.890) data 0.000 (0.002) loss 1.2598 (1.0353) acc 65.6250 (74.1725) lr 2.2949e-04 eta 1:08:53 +epoch [41/50] batch [360/500] time 0.921 (0.890) data 0.000 (0.002) loss 0.9717 (1.0377) acc 71.8750 (74.1580) lr 2.2949e-04 eta 1:08:48 +epoch [41/50] batch [365/500] time 0.894 (0.890) data 0.000 (0.002) loss 0.8408 (1.0388) acc 90.6250 (74.1952) lr 2.2949e-04 eta 1:08:45 +epoch [41/50] batch [370/500] time 0.861 (0.890) data 0.000 (0.002) loss 0.9790 (1.0395) acc 75.0000 (74.2145) lr 2.2949e-04 eta 1:08:40 +epoch [41/50] batch [375/500] time 0.890 (0.890) data 0.000 (0.002) loss 1.3838 (1.0418) acc 68.7500 (74.1583) lr 2.2949e-04 eta 1:08:35 +epoch [41/50] batch [380/500] time 0.881 (0.890) data 0.000 (0.002) loss 0.9180 (1.0395) acc 81.2500 (74.2105) lr 2.2949e-04 eta 1:08:30 +epoch [41/50] batch [385/500] time 0.881 (0.890) data 0.000 (0.002) loss 1.2041 (1.0395) acc 71.8750 (74.1802) lr 2.2949e-04 eta 1:08:26 +epoch [41/50] batch [390/500] time 0.938 (0.890) data 0.000 (0.002) loss 0.8296 (1.0397) acc 81.2500 (74.1506) lr 2.2949e-04 eta 1:08:22 +epoch [41/50] batch [395/500] time 0.867 (0.890) data 0.000 (0.002) loss 0.7881 (1.0365) acc 71.8750 (74.2168) lr 2.2949e-04 eta 1:08:18 +epoch [41/50] batch [400/500] time 0.895 (0.890) data 0.000 (0.002) loss 0.4783 (1.0363) acc 78.1250 (74.1953) lr 2.2949e-04 eta 1:08:14 +epoch [41/50] batch [405/500] time 0.918 (0.890) data 0.000 (0.002) loss 0.7656 (1.0378) acc 78.1250 (74.1975) lr 2.2949e-04 eta 1:08:08 +epoch [41/50] batch [410/500] time 0.916 (0.890) data 0.000 (0.002) loss 1.3438 (1.0365) acc 71.8750 (74.2149) lr 2.2949e-04 eta 1:08:04 +epoch [41/50] batch [415/500] time 0.860 (0.890) data 0.000 (0.002) loss 0.9102 (1.0355) acc 78.1250 (74.2319) lr 2.2949e-04 eta 1:07:58 +epoch [41/50] batch [420/500] time 0.876 (0.889) data 0.000 (0.002) loss 0.6382 (1.0367) acc 81.2500 (74.1964) lr 2.2949e-04 eta 1:07:53 +epoch [41/50] batch [425/500] time 0.845 (0.889) data 0.000 (0.002) loss 1.3486 (1.0394) acc 71.8750 (74.1544) lr 2.2949e-04 eta 1:07:47 +epoch [41/50] batch [430/500] time 0.900 (0.889) data 0.000 (0.002) loss 1.1631 (1.0391) acc 65.6250 (74.1570) lr 2.2949e-04 eta 1:07:43 +epoch [41/50] batch [435/500] time 0.885 (0.889) data 0.000 (0.002) loss 1.0791 (1.0408) acc 59.3750 (74.0948) lr 2.2949e-04 eta 1:07:38 +epoch [41/50] batch [440/500] time 0.877 (0.889) data 0.000 (0.002) loss 0.9907 (1.0400) acc 68.7500 (74.1051) lr 2.2949e-04 eta 1:07:33 +epoch [41/50] batch [445/500] time 0.868 (0.889) data 0.000 (0.002) loss 0.8979 (1.0394) acc 78.1250 (74.1081) lr 2.2949e-04 eta 1:07:28 +epoch [41/50] batch [450/500] time 0.895 (0.889) data 0.000 (0.002) loss 1.7549 (1.0433) acc 62.5000 (74.0347) lr 2.2949e-04 eta 1:07:23 +epoch [41/50] batch [455/500] time 0.870 (0.889) data 0.000 (0.002) loss 0.8501 (1.0434) acc 84.3750 (74.0110) lr 2.2949e-04 eta 1:07:19 +epoch [41/50] batch [460/500] time 0.893 (0.889) data 0.000 (0.002) loss 0.7314 (1.0426) acc 81.2500 (74.0557) lr 2.2949e-04 eta 1:07:15 +epoch [41/50] batch [465/500] time 0.885 (0.889) data 0.000 (0.002) loss 1.2852 (1.0407) acc 62.5000 (74.0726) lr 2.2949e-04 eta 1:07:11 +epoch [41/50] batch [470/500] time 0.882 (0.889) data 0.000 (0.002) loss 0.7324 (1.0400) acc 81.2500 (74.0625) lr 2.2949e-04 eta 1:07:06 +epoch [41/50] batch [475/500] time 0.875 (0.889) data 0.000 (0.002) loss 0.6885 (1.0395) acc 81.2500 (74.0592) lr 2.2949e-04 eta 1:07:01 +epoch [41/50] batch [480/500] time 0.896 (0.889) data 0.000 (0.002) loss 0.6846 (1.0412) acc 78.1250 (74.0495) lr 2.2949e-04 eta 1:06:56 +epoch [41/50] batch [485/500] time 0.897 (0.889) data 0.000 (0.002) loss 1.1074 (1.0424) acc 68.7500 (74.0271) lr 2.2949e-04 eta 1:06:52 +epoch [41/50] batch [490/500] time 0.895 (0.889) data 0.000 (0.002) loss 0.7622 (1.0416) acc 78.1250 (74.0115) lr 2.2949e-04 eta 1:06:47 +epoch [41/50] batch [495/500] time 0.877 (0.889) data 0.000 (0.002) loss 0.5322 (1.0409) acc 90.6250 (74.0657) lr 2.2949e-04 eta 1:06:43 +epoch [41/50] batch [500/500] time 0.915 (0.889) data 0.000 (0.002) loss 0.5122 (1.0399) acc 78.1250 (74.0500) lr 1.9098e-04 eta 1:06:39 +epoch [42/50] batch [5/500] time 0.900 (1.044) data 0.000 (0.168) loss 1.1006 (0.9206) acc 78.1250 (79.3750) lr 1.9098e-04 eta 1:18:13 +epoch [42/50] batch [10/500] time 0.994 (0.972) data 0.000 (0.084) loss 1.4883 (1.0559) acc 65.6250 (75.3125) lr 1.9098e-04 eta 1:12:43 +epoch [42/50] batch [15/500] time 0.878 (0.946) data 0.000 (0.056) loss 1.2607 (1.0522) acc 68.7500 (74.5833) lr 1.9098e-04 eta 1:10:42 +epoch [42/50] batch [20/500] time 0.903 (0.929) data 0.000 (0.042) loss 0.7705 (1.0331) acc 81.2500 (74.6875) lr 1.9098e-04 eta 1:09:23 +epoch [42/50] batch [25/500] time 0.887 (0.921) data 0.000 (0.034) loss 0.7598 (1.0042) acc 78.1250 (75.1250) lr 1.9098e-04 eta 1:08:42 +epoch [42/50] batch [30/500] time 0.852 (0.915) data 0.000 (0.028) loss 0.5083 (0.9887) acc 81.2500 (74.7917) lr 1.9098e-04 eta 1:08:09 +epoch [42/50] batch [35/500] time 0.892 (0.912) data 0.000 (0.024) loss 1.1240 (0.9688) acc 68.7500 (75.4464) lr 1.9098e-04 eta 1:07:52 +epoch [42/50] batch [40/500] time 0.899 (0.909) data 0.000 (0.021) loss 0.8096 (0.9385) acc 84.3750 (76.4062) lr 1.9098e-04 eta 1:07:32 +epoch [42/50] batch [45/500] time 0.887 (0.906) data 0.000 (0.019) loss 1.1943 (0.9595) acc 71.8750 (76.3194) lr 1.9098e-04 eta 1:07:16 +epoch [42/50] batch [50/500] time 0.895 (0.904) data 0.000 (0.017) loss 0.5986 (0.9513) acc 87.5000 (76.4375) lr 1.9098e-04 eta 1:07:02 +epoch [42/50] batch [55/500] time 0.882 (0.904) data 0.000 (0.016) loss 0.3254 (0.9395) acc 93.7500 (76.7614) lr 1.9098e-04 eta 1:06:56 +epoch [42/50] batch [60/500] time 0.897 (0.902) data 0.000 (0.014) loss 0.9395 (0.9500) acc 71.8750 (76.7188) lr 1.9098e-04 eta 1:06:46 +epoch [42/50] batch [65/500] time 0.865 (0.901) data 0.000 (0.013) loss 1.0938 (0.9571) acc 71.8750 (76.5385) lr 1.9098e-04 eta 1:06:36 +epoch [42/50] batch [70/500] time 0.887 (0.900) data 0.000 (0.012) loss 0.7388 (0.9503) acc 81.2500 (76.5625) lr 1.9098e-04 eta 1:06:26 +epoch [42/50] batch [75/500] time 0.911 (0.900) data 0.000 (0.011) loss 1.1016 (0.9574) acc 81.2500 (76.3750) lr 1.9098e-04 eta 1:06:20 +epoch [42/50] batch [80/500] time 0.880 (0.899) data 0.000 (0.011) loss 1.9609 (0.9723) acc 56.2500 (75.9375) lr 1.9098e-04 eta 1:06:11 +epoch [42/50] batch [85/500] time 0.881 (0.898) data 0.000 (0.010) loss 1.2871 (0.9940) acc 71.8750 (75.5515) lr 1.9098e-04 eta 1:06:05 +epoch [42/50] batch [90/500] time 0.902 (0.898) data 0.000 (0.010) loss 0.8232 (0.9890) acc 75.0000 (75.4167) lr 1.9098e-04 eta 1:06:00 +epoch [42/50] batch [95/500] time 0.847 (0.896) data 0.000 (0.009) loss 1.1670 (0.9987) acc 75.0000 (75.1645) lr 1.9098e-04 eta 1:05:48 +epoch [42/50] batch [100/500] time 0.858 (0.896) data 0.000 (0.009) loss 0.8843 (0.9945) acc 71.8750 (75.1875) lr 1.9098e-04 eta 1:05:41 +epoch [42/50] batch [105/500] time 0.867 (0.895) data 0.000 (0.008) loss 0.9727 (1.0131) acc 78.1250 (74.7917) lr 1.9098e-04 eta 1:05:35 +epoch [42/50] batch [110/500] time 0.884 (0.895) data 0.000 (0.008) loss 1.2666 (1.0152) acc 68.7500 (74.7159) lr 1.9098e-04 eta 1:05:27 +epoch [42/50] batch [115/500] time 0.871 (0.895) data 0.000 (0.008) loss 1.1787 (1.0148) acc 62.5000 (74.5109) lr 1.9098e-04 eta 1:05:22 +epoch [42/50] batch [120/500] time 0.871 (0.894) data 0.000 (0.007) loss 1.2969 (1.0164) acc 68.7500 (74.3750) lr 1.9098e-04 eta 1:05:14 +epoch [42/50] batch [125/500] time 0.882 (0.893) data 0.000 (0.007) loss 0.8618 (1.0189) acc 87.5000 (74.4000) lr 1.9098e-04 eta 1:05:07 +epoch [42/50] batch [130/500] time 0.893 (0.893) data 0.000 (0.007) loss 1.7266 (1.0323) acc 71.8750 (74.3750) lr 1.9098e-04 eta 1:05:02 +epoch [42/50] batch [135/500] time 0.919 (0.893) data 0.000 (0.006) loss 0.9668 (1.0353) acc 71.8750 (74.2593) lr 1.9098e-04 eta 1:04:56 +epoch [42/50] batch [140/500] time 0.903 (0.892) data 0.000 (0.006) loss 0.7969 (1.0311) acc 75.0000 (74.3080) lr 1.9098e-04 eta 1:04:49 +epoch [42/50] batch [145/500] time 0.879 (0.892) data 0.000 (0.006) loss 1.0400 (1.0343) acc 75.0000 (74.1595) lr 1.9098e-04 eta 1:04:43 +epoch [42/50] batch [150/500] time 1.014 (0.892) data 0.000 (0.006) loss 0.6191 (1.0298) acc 87.5000 (74.2292) lr 1.9098e-04 eta 1:04:40 +epoch [42/50] batch [155/500] time 0.881 (0.892) data 0.000 (0.006) loss 1.3896 (1.0336) acc 68.7500 (74.3145) lr 1.9098e-04 eta 1:04:33 +epoch [42/50] batch [160/500] time 0.888 (0.892) data 0.000 (0.005) loss 1.4102 (1.0364) acc 68.7500 (74.1992) lr 1.9098e-04 eta 1:04:29 +epoch [42/50] batch [165/500] time 0.899 (0.891) data 0.000 (0.005) loss 1.0537 (1.0363) acc 71.8750 (74.1856) lr 1.9098e-04 eta 1:04:24 +epoch [42/50] batch [170/500] time 0.890 (0.891) data 0.000 (0.005) loss 0.5693 (1.0346) acc 75.0000 (74.2647) lr 1.9098e-04 eta 1:04:18 +epoch [42/50] batch [175/500] time 0.880 (0.891) data 0.000 (0.005) loss 0.6929 (1.0393) acc 75.0000 (74.1429) lr 1.9098e-04 eta 1:04:13 +epoch [42/50] batch [180/500] time 0.901 (0.891) data 0.000 (0.005) loss 0.8003 (1.0373) acc 78.1250 (74.1667) lr 1.9098e-04 eta 1:04:08 +epoch [42/50] batch [185/500] time 0.865 (0.890) data 0.000 (0.005) loss 1.5537 (1.0417) acc 65.6250 (74.1216) lr 1.9098e-04 eta 1:04:02 +epoch [42/50] batch [190/500] time 0.867 (0.890) data 0.000 (0.005) loss 0.7100 (1.0453) acc 84.3750 (73.9967) lr 1.9098e-04 eta 1:03:55 +epoch [42/50] batch [195/500] time 0.888 (0.891) data 0.000 (0.005) loss 1.1934 (1.0473) acc 71.8750 (73.9103) lr 1.9098e-04 eta 1:03:53 +epoch [42/50] batch [200/500] time 0.879 (0.890) data 0.000 (0.004) loss 1.4014 (1.0450) acc 65.6250 (73.9688) lr 1.9098e-04 eta 1:03:48 +epoch [42/50] batch [205/500] time 0.892 (0.890) data 0.000 (0.004) loss 1.2158 (1.0429) acc 71.8750 (74.0701) lr 1.9098e-04 eta 1:03:42 +epoch [42/50] batch [210/500] time 0.904 (0.890) data 0.000 (0.004) loss 1.1631 (1.0448) acc 78.1250 (74.0179) lr 1.9098e-04 eta 1:03:37 +epoch [42/50] batch [215/500] time 0.892 (0.890) data 0.000 (0.004) loss 0.9453 (1.0449) acc 78.1250 (73.9826) lr 1.9098e-04 eta 1:03:31 +epoch [42/50] batch [220/500] time 0.903 (0.889) data 0.000 (0.004) loss 1.3066 (1.0485) acc 71.8750 (73.9347) lr 1.9098e-04 eta 1:03:26 +epoch [42/50] batch [225/500] time 0.856 (0.889) data 0.000 (0.004) loss 0.7900 (1.0469) acc 78.1250 (73.9306) lr 1.9098e-04 eta 1:03:21 +epoch [42/50] batch [230/500] time 0.859 (0.889) data 0.000 (0.004) loss 0.9785 (1.0467) acc 75.0000 (73.9130) lr 1.9098e-04 eta 1:03:16 +epoch [42/50] batch [235/500] time 0.884 (0.889) data 0.000 (0.004) loss 1.1055 (1.0444) acc 68.7500 (73.8431) lr 1.9098e-04 eta 1:03:10 +epoch [42/50] batch [240/500] time 0.901 (0.889) data 0.000 (0.004) loss 0.8003 (1.0439) acc 78.1250 (73.8672) lr 1.9098e-04 eta 1:03:06 +epoch [42/50] batch [245/500] time 0.897 (0.889) data 0.000 (0.004) loss 1.4160 (1.0518) acc 59.3750 (73.7628) lr 1.9098e-04 eta 1:03:02 +epoch [42/50] batch [250/500] time 0.841 (0.888) data 0.000 (0.004) loss 1.4590 (1.0522) acc 65.6250 (73.7750) lr 1.9098e-04 eta 1:02:55 +epoch [42/50] batch [255/500] time 0.872 (0.888) data 0.000 (0.004) loss 1.5361 (1.0558) acc 71.8750 (73.7745) lr 1.9098e-04 eta 1:02:49 +epoch [42/50] batch [260/500] time 0.862 (0.888) data 0.000 (0.003) loss 0.7983 (1.0551) acc 81.2500 (73.7260) lr 1.9098e-04 eta 1:02:44 +epoch [42/50] batch [265/500] time 0.894 (0.888) data 0.000 (0.003) loss 1.1094 (1.0537) acc 75.0000 (73.7854) lr 1.9098e-04 eta 1:02:39 +epoch [42/50] batch [270/500] time 0.910 (0.888) data 0.000 (0.003) loss 1.1904 (1.0555) acc 71.8750 (73.7500) lr 1.9098e-04 eta 1:02:35 +epoch [42/50] batch [275/500] time 0.868 (0.888) data 0.000 (0.003) loss 0.8740 (1.0504) acc 78.1250 (73.8864) lr 1.9098e-04 eta 1:02:30 +epoch [42/50] batch [280/500] time 0.897 (0.888) data 0.000 (0.003) loss 0.7158 (1.0489) acc 78.1250 (73.8839) lr 1.9098e-04 eta 1:02:26 +epoch [42/50] batch [285/500] time 0.855 (0.888) data 0.000 (0.003) loss 1.1094 (1.0532) acc 62.5000 (73.7719) lr 1.9098e-04 eta 1:02:22 +epoch [42/50] batch [290/500] time 0.852 (0.888) data 0.000 (0.003) loss 0.8184 (1.0508) acc 81.2500 (73.8578) lr 1.9098e-04 eta 1:02:16 +epoch [42/50] batch [295/500] time 0.885 (0.888) data 0.000 (0.003) loss 1.3730 (1.0553) acc 62.5000 (73.7182) lr 1.9098e-04 eta 1:02:13 +epoch [42/50] batch [300/500] time 0.888 (0.888) data 0.000 (0.003) loss 0.7134 (1.0541) acc 78.1250 (73.7188) lr 1.9098e-04 eta 1:02:09 +epoch [42/50] batch [305/500] time 0.863 (0.888) data 0.000 (0.003) loss 1.4453 (1.0552) acc 59.3750 (73.6885) lr 1.9098e-04 eta 1:02:05 +epoch [42/50] batch [310/500] time 0.875 (0.888) data 0.000 (0.003) loss 1.1143 (1.0546) acc 65.6250 (73.6996) lr 1.9098e-04 eta 1:02:00 +epoch [42/50] batch [315/500] time 0.906 (0.888) data 0.000 (0.003) loss 0.6113 (1.0552) acc 84.3750 (73.7004) lr 1.9098e-04 eta 1:01:56 +epoch [42/50] batch [320/500] time 0.866 (0.888) data 0.000 (0.003) loss 0.9790 (1.0566) acc 78.1250 (73.6621) lr 1.9098e-04 eta 1:01:51 +epoch [42/50] batch [325/500] time 0.873 (0.888) data 0.001 (0.003) loss 1.2900 (1.0576) acc 71.8750 (73.6058) lr 1.9098e-04 eta 1:01:47 +epoch [42/50] batch [330/500] time 0.889 (0.888) data 0.000 (0.003) loss 0.6284 (1.0578) acc 84.3750 (73.5985) lr 1.9098e-04 eta 1:01:43 +epoch [42/50] batch [335/500] time 0.891 (0.888) data 0.000 (0.003) loss 0.8848 (1.0604) acc 84.3750 (73.5541) lr 1.9098e-04 eta 1:01:38 +epoch [42/50] batch [340/500] time 0.868 (0.888) data 0.000 (0.003) loss 1.1006 (1.0605) acc 65.6250 (73.5386) lr 1.9098e-04 eta 1:01:35 +epoch [42/50] batch [345/500] time 0.877 (0.888) data 0.000 (0.003) loss 0.7720 (1.0591) acc 78.1250 (73.5960) lr 1.9098e-04 eta 1:01:30 +epoch [42/50] batch [350/500] time 0.909 (0.888) data 0.000 (0.003) loss 1.4658 (1.0585) acc 75.0000 (73.6071) lr 1.9098e-04 eta 1:01:26 +epoch [42/50] batch [355/500] time 0.892 (0.889) data 0.000 (0.003) loss 1.3105 (1.0576) acc 71.8750 (73.6268) lr 1.9098e-04 eta 1:01:23 +epoch [42/50] batch [360/500] time 0.898 (0.888) data 0.000 (0.003) loss 1.2500 (1.0607) acc 62.5000 (73.5330) lr 1.9098e-04 eta 1:01:18 +epoch [42/50] batch [365/500] time 0.896 (0.888) data 0.000 (0.003) loss 1.4834 (1.0618) acc 78.1250 (73.5274) lr 1.9098e-04 eta 1:01:13 +epoch [42/50] batch [370/500] time 0.883 (0.888) data 0.000 (0.003) loss 1.3047 (1.0658) acc 62.5000 (73.4882) lr 1.9098e-04 eta 1:01:08 +epoch [42/50] batch [375/500] time 0.907 (0.888) data 0.000 (0.002) loss 1.1572 (1.0661) acc 68.7500 (73.4583) lr 1.9098e-04 eta 1:01:04 +epoch [42/50] batch [380/500] time 0.874 (0.888) data 0.000 (0.002) loss 1.1523 (1.0676) acc 75.0000 (73.4539) lr 1.9098e-04 eta 1:00:59 +epoch [42/50] batch [385/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.2354 (1.0674) acc 75.0000 (73.4984) lr 1.9098e-04 eta 1:00:54 +epoch [42/50] batch [390/500] time 0.870 (0.888) data 0.000 (0.002) loss 1.2139 (1.0652) acc 68.7500 (73.5337) lr 1.9098e-04 eta 1:00:50 +epoch [42/50] batch [395/500] time 0.895 (0.888) data 0.000 (0.002) loss 0.7026 (1.0616) acc 78.1250 (73.5759) lr 1.9098e-04 eta 1:00:46 +epoch [42/50] batch [400/500] time 0.855 (0.888) data 0.000 (0.002) loss 1.0938 (1.0635) acc 65.6250 (73.4922) lr 1.9098e-04 eta 1:00:41 +epoch [42/50] batch [405/500] time 0.868 (0.888) data 0.000 (0.002) loss 0.7681 (1.0636) acc 78.1250 (73.4568) lr 1.9098e-04 eta 1:00:37 +epoch [42/50] batch [410/500] time 0.876 (0.888) data 0.000 (0.002) loss 1.2715 (1.0628) acc 65.6250 (73.4604) lr 1.9098e-04 eta 1:00:32 +epoch [42/50] batch [415/500] time 0.886 (0.888) data 0.000 (0.002) loss 0.9756 (1.0630) acc 71.8750 (73.4413) lr 1.9098e-04 eta 1:00:27 +epoch [42/50] batch [420/500] time 0.871 (0.888) data 0.000 (0.002) loss 1.1416 (1.0611) acc 75.0000 (73.4524) lr 1.9098e-04 eta 1:00:22 +epoch [42/50] batch [425/500] time 0.888 (0.888) data 0.001 (0.002) loss 1.1182 (1.0615) acc 71.8750 (73.4632) lr 1.9098e-04 eta 1:00:18 +epoch [42/50] batch [430/500] time 0.905 (0.888) data 0.000 (0.002) loss 1.0234 (1.0628) acc 71.8750 (73.4520) lr 1.9098e-04 eta 1:00:14 +epoch [42/50] batch [435/500] time 0.910 (0.888) data 0.000 (0.002) loss 1.0801 (1.0641) acc 68.7500 (73.3980) lr 1.9098e-04 eta 1:00:09 +epoch [42/50] batch [440/500] time 0.871 (0.888) data 0.000 (0.002) loss 0.5347 (1.0618) acc 87.5000 (73.4801) lr 1.9098e-04 eta 1:00:05 +epoch [42/50] batch [445/500] time 0.887 (0.888) data 0.000 (0.002) loss 0.9761 (1.0606) acc 75.0000 (73.4902) lr 1.9098e-04 eta 1:00:00 +epoch [42/50] batch [450/500] time 0.890 (0.888) data 0.000 (0.002) loss 0.7690 (1.0600) acc 84.3750 (73.5486) lr 1.9098e-04 eta 0:59:56 +epoch [42/50] batch [455/500] time 0.860 (0.888) data 0.000 (0.002) loss 0.8530 (1.0603) acc 71.8750 (73.5508) lr 1.9098e-04 eta 0:59:52 +epoch [42/50] batch [460/500] time 0.861 (0.888) data 0.000 (0.002) loss 1.2764 (1.0588) acc 68.7500 (73.6345) lr 1.9098e-04 eta 0:59:46 +epoch [42/50] batch [465/500] time 0.881 (0.888) data 0.000 (0.002) loss 1.3555 (1.0599) acc 68.7500 (73.6022) lr 1.9098e-04 eta 0:59:42 +epoch [42/50] batch [470/500] time 0.889 (0.888) data 0.000 (0.002) loss 0.5059 (1.0586) acc 81.2500 (73.6370) lr 1.9098e-04 eta 0:59:37 +epoch [42/50] batch [475/500] time 0.903 (0.888) data 0.000 (0.002) loss 0.8110 (1.0590) acc 84.3750 (73.6316) lr 1.9098e-04 eta 0:59:32 +epoch [42/50] batch [480/500] time 0.990 (0.888) data 0.000 (0.002) loss 1.3086 (1.0607) acc 62.5000 (73.6133) lr 1.9098e-04 eta 0:59:28 +epoch [42/50] batch [485/500] time 0.857 (0.888) data 0.000 (0.002) loss 1.3594 (1.0582) acc 71.8750 (73.6791) lr 1.9098e-04 eta 0:59:23 +epoch [42/50] batch [490/500] time 0.889 (0.888) data 0.000 (0.002) loss 0.7900 (1.0590) acc 78.1250 (73.6416) lr 1.9098e-04 eta 0:59:19 +epoch [42/50] batch [495/500] time 0.907 (0.888) data 0.000 (0.002) loss 0.6235 (1.0577) acc 78.1250 (73.6995) lr 1.9098e-04 eta 0:59:15 +epoch [42/50] batch [500/500] time 0.850 (0.888) data 0.000 (0.002) loss 0.9756 (1.0573) acc 71.8750 (73.6625) lr 1.5567e-04 eta 0:59:10 +epoch [43/50] batch [5/500] time 0.902 (1.044) data 0.000 (0.145) loss 1.0674 (1.2041) acc 75.0000 (67.5000) lr 1.5567e-04 eta 1:09:31 +epoch [43/50] batch [10/500] time 0.887 (0.974) data 0.000 (0.073) loss 1.0566 (1.1408) acc 71.8750 (70.3125) lr 1.5567e-04 eta 1:04:46 +epoch [43/50] batch [15/500] time 0.874 (0.941) data 0.000 (0.049) loss 1.1699 (1.1434) acc 68.7500 (70.6250) lr 1.5567e-04 eta 1:02:31 +epoch [43/50] batch [20/500] time 0.876 (0.925) data 0.000 (0.036) loss 1.1367 (1.1206) acc 68.7500 (71.0938) lr 1.5567e-04 eta 1:01:22 +epoch [43/50] batch [25/500] time 0.883 (0.924) data 0.000 (0.029) loss 0.9800 (1.0359) acc 78.1250 (73.3750) lr 1.5567e-04 eta 1:01:14 +epoch [43/50] batch [30/500] time 0.895 (0.918) data 0.000 (0.024) loss 1.4580 (1.0344) acc 62.5000 (73.4375) lr 1.5567e-04 eta 1:00:42 +epoch [43/50] batch [35/500] time 0.912 (0.913) data 0.000 (0.021) loss 1.0801 (1.0279) acc 81.2500 (73.7500) lr 1.5567e-04 eta 1:00:19 +epoch [43/50] batch [40/500] time 0.911 (0.911) data 0.000 (0.018) loss 1.2891 (1.0433) acc 65.6250 (73.5156) lr 1.5567e-04 eta 1:00:07 +epoch [43/50] batch [45/500] time 0.878 (0.908) data 0.000 (0.016) loss 1.0605 (1.0374) acc 75.0000 (73.9583) lr 1.5567e-04 eta 0:59:50 +epoch [43/50] batch [50/500] time 0.883 (0.906) data 0.000 (0.015) loss 0.8804 (1.0405) acc 71.8750 (74.0000) lr 1.5567e-04 eta 0:59:39 +epoch [43/50] batch [55/500] time 0.887 (0.904) data 0.000 (0.013) loss 0.3848 (1.0574) acc 84.3750 (73.9205) lr 1.5567e-04 eta 0:59:26 +epoch [43/50] batch [60/500] time 0.879 (0.902) data 0.000 (0.012) loss 0.4846 (1.0482) acc 87.5000 (74.2188) lr 1.5567e-04 eta 0:59:15 +epoch [43/50] batch [65/500] time 0.871 (0.903) data 0.000 (0.011) loss 1.3564 (1.0370) acc 65.6250 (74.5192) lr 1.5567e-04 eta 0:59:13 +epoch [43/50] batch [70/500] time 0.893 (0.901) data 0.000 (0.011) loss 0.8125 (1.0283) acc 78.1250 (74.5536) lr 1.5567e-04 eta 0:59:00 +epoch [43/50] batch [75/500] time 0.901 (0.900) data 0.000 (0.010) loss 1.5947 (1.0326) acc 53.1250 (74.3333) lr 1.5567e-04 eta 0:58:53 +epoch [43/50] batch [80/500] time 0.874 (0.899) data 0.000 (0.009) loss 0.8726 (1.0445) acc 81.2500 (74.1797) lr 1.5567e-04 eta 0:58:42 +epoch [43/50] batch [85/500] time 0.896 (0.898) data 0.000 (0.009) loss 1.3125 (1.0482) acc 71.8750 (74.0441) lr 1.5567e-04 eta 0:58:35 +epoch [43/50] batch [90/500] time 0.860 (0.898) data 0.000 (0.008) loss 1.2949 (1.0598) acc 59.3750 (73.7847) lr 1.5567e-04 eta 0:58:31 +epoch [43/50] batch [95/500] time 0.884 (0.898) data 0.000 (0.008) loss 1.2676 (1.0611) acc 71.8750 (73.7829) lr 1.5567e-04 eta 0:58:26 +epoch [43/50] batch [100/500] time 0.872 (0.897) data 0.000 (0.007) loss 1.3086 (1.0669) acc 65.6250 (73.5938) lr 1.5567e-04 eta 0:58:17 +epoch [43/50] batch [105/500] time 0.888 (0.896) data 0.000 (0.007) loss 0.6167 (1.0641) acc 87.5000 (73.6905) lr 1.5567e-04 eta 0:58:08 +epoch [43/50] batch [110/500] time 0.869 (0.896) data 0.000 (0.007) loss 1.2412 (1.0567) acc 71.8750 (73.7500) lr 1.5567e-04 eta 0:58:03 +epoch [43/50] batch [115/500] time 0.891 (0.896) data 0.000 (0.007) loss 1.4727 (1.0559) acc 68.7500 (73.5598) lr 1.5567e-04 eta 0:57:59 +epoch [43/50] batch [120/500] time 0.891 (0.895) data 0.000 (0.006) loss 1.0820 (1.0542) acc 71.8750 (73.6979) lr 1.5567e-04 eta 0:57:52 +epoch [43/50] batch [125/500] time 0.869 (0.895) data 0.000 (0.006) loss 1.0254 (1.0556) acc 68.7500 (73.6500) lr 1.5567e-04 eta 0:57:49 +epoch [43/50] batch [130/500] time 0.848 (0.894) data 0.000 (0.006) loss 0.5386 (1.0516) acc 84.3750 (73.8702) lr 1.5567e-04 eta 0:57:41 +epoch [43/50] batch [135/500] time 0.899 (0.894) data 0.000 (0.006) loss 1.0273 (1.0441) acc 78.1250 (73.9583) lr 1.5567e-04 eta 0:57:35 +epoch [43/50] batch [140/500] time 0.883 (0.893) data 0.000 (0.005) loss 0.6162 (1.0350) acc 84.3750 (74.2411) lr 1.5567e-04 eta 0:57:28 +epoch [43/50] batch [145/500] time 0.894 (0.893) data 0.000 (0.005) loss 1.0508 (1.0385) acc 71.8750 (74.2241) lr 1.5567e-04 eta 0:57:23 +epoch [43/50] batch [150/500] time 0.852 (0.893) data 0.000 (0.005) loss 0.6807 (1.0386) acc 84.3750 (74.2083) lr 1.5567e-04 eta 0:57:17 +epoch [43/50] batch [155/500] time 0.871 (0.893) data 0.000 (0.005) loss 0.5269 (1.0300) acc 81.2500 (74.4153) lr 1.5567e-04 eta 0:57:11 +epoch [43/50] batch [160/500] time 0.927 (0.893) data 0.000 (0.005) loss 1.0908 (1.0278) acc 75.0000 (74.4336) lr 1.5567e-04 eta 0:57:07 +epoch [43/50] batch [165/500] time 0.892 (0.893) data 0.000 (0.005) loss 0.8726 (1.0300) acc 75.0000 (74.4508) lr 1.5567e-04 eta 0:57:02 +epoch [43/50] batch [170/500] time 0.884 (0.893) data 0.000 (0.004) loss 0.7202 (1.0351) acc 78.1250 (74.3199) lr 1.5567e-04 eta 0:57:00 +epoch [43/50] batch [175/500] time 0.934 (0.893) data 0.001 (0.004) loss 0.7090 (1.0345) acc 84.3750 (74.3036) lr 1.5567e-04 eta 0:56:56 +epoch [43/50] batch [180/500] time 0.910 (0.893) data 0.000 (0.004) loss 1.2510 (1.0368) acc 71.8750 (74.3403) lr 1.5567e-04 eta 0:56:52 +epoch [43/50] batch [185/500] time 0.892 (0.893) data 0.000 (0.004) loss 0.7476 (1.0312) acc 78.1250 (74.3750) lr 1.5567e-04 eta 0:56:47 +epoch [43/50] batch [190/500] time 0.878 (0.893) data 0.000 (0.004) loss 0.9111 (1.0318) acc 78.1250 (74.4408) lr 1.5567e-04 eta 0:56:40 +epoch [43/50] batch [195/500] time 0.864 (0.892) data 0.000 (0.004) loss 1.3135 (1.0346) acc 71.8750 (74.3910) lr 1.5567e-04 eta 0:56:34 +epoch [43/50] batch [200/500] time 0.898 (0.892) data 0.000 (0.004) loss 1.6143 (1.0367) acc 62.5000 (74.3594) lr 1.5567e-04 eta 0:56:29 +epoch [43/50] batch [205/500] time 0.883 (0.892) data 0.000 (0.004) loss 0.6021 (1.0377) acc 84.3750 (74.2988) lr 1.5567e-04 eta 0:56:24 +epoch [43/50] batch [210/500] time 0.862 (0.891) data 0.000 (0.004) loss 1.0303 (1.0449) acc 75.0000 (74.1667) lr 1.5567e-04 eta 0:56:17 +epoch [43/50] batch [215/500] time 0.884 (0.891) data 0.000 (0.004) loss 0.9663 (1.0508) acc 75.0000 (73.9971) lr 1.5567e-04 eta 0:56:13 +epoch [43/50] batch [220/500] time 0.846 (0.891) data 0.000 (0.004) loss 1.0225 (1.0540) acc 71.8750 (73.9347) lr 1.5567e-04 eta 0:56:08 +epoch [43/50] batch [225/500] time 0.837 (0.891) data 0.000 (0.003) loss 1.3379 (1.0498) acc 62.5000 (74.0000) lr 1.5567e-04 eta 0:56:02 +epoch [43/50] batch [230/500] time 0.864 (0.891) data 0.000 (0.003) loss 1.6006 (1.0535) acc 56.2500 (73.9538) lr 1.5567e-04 eta 0:55:58 +epoch [43/50] batch [235/500] time 0.867 (0.890) data 0.000 (0.003) loss 1.0225 (1.0496) acc 71.8750 (74.0426) lr 1.5567e-04 eta 0:55:52 +epoch [43/50] batch [240/500] time 0.898 (0.890) data 0.000 (0.003) loss 0.6060 (1.0453) acc 71.8750 (74.0234) lr 1.5567e-04 eta 0:55:47 +epoch [43/50] batch [245/500] time 0.884 (0.890) data 0.000 (0.003) loss 0.9810 (1.0485) acc 75.0000 (73.9413) lr 1.5567e-04 eta 0:55:42 +epoch [43/50] batch [250/500] time 0.884 (0.890) data 0.000 (0.003) loss 1.0088 (1.0472) acc 71.8750 (74.0250) lr 1.5567e-04 eta 0:55:37 +epoch [43/50] batch [255/500] time 0.884 (0.890) data 0.000 (0.003) loss 1.0527 (1.0460) acc 75.0000 (74.0686) lr 1.5567e-04 eta 0:55:34 +epoch [43/50] batch [260/500] time 0.869 (0.890) data 0.000 (0.003) loss 1.3027 (1.0443) acc 68.7500 (74.0505) lr 1.5567e-04 eta 0:55:28 +epoch [43/50] batch [265/500] time 0.870 (0.890) data 0.000 (0.003) loss 0.9644 (1.0457) acc 75.0000 (74.0684) lr 1.5567e-04 eta 0:55:23 +epoch [43/50] batch [270/500] time 0.888 (0.890) data 0.000 (0.003) loss 0.4292 (1.0444) acc 84.3750 (74.0972) lr 1.5567e-04 eta 0:55:20 +epoch [43/50] batch [275/500] time 0.887 (0.890) data 0.000 (0.003) loss 1.6260 (1.0456) acc 62.5000 (74.0909) lr 1.5567e-04 eta 0:55:14 +epoch [43/50] batch [280/500] time 0.854 (0.890) data 0.000 (0.003) loss 0.9922 (1.0489) acc 71.8750 (73.9732) lr 1.5567e-04 eta 0:55:09 +epoch [43/50] batch [285/500] time 0.881 (0.889) data 0.000 (0.003) loss 0.9526 (1.0459) acc 75.0000 (73.9912) lr 1.5567e-04 eta 0:55:04 +epoch [43/50] batch [290/500] time 0.898 (0.889) data 0.000 (0.003) loss 1.0781 (1.0485) acc 78.1250 (73.9440) lr 1.5567e-04 eta 0:54:58 +epoch [43/50] batch [295/500] time 0.867 (0.889) data 0.000 (0.003) loss 0.9478 (1.0445) acc 71.8750 (73.9301) lr 1.5567e-04 eta 0:54:53 +epoch [43/50] batch [300/500] time 0.910 (0.889) data 0.000 (0.003) loss 1.5576 (1.0442) acc 65.6250 (73.9375) lr 1.5567e-04 eta 0:54:50 +epoch [43/50] batch [305/500] time 0.885 (0.889) data 0.000 (0.003) loss 1.7256 (1.0467) acc 59.3750 (73.8730) lr 1.5567e-04 eta 0:54:44 +epoch [43/50] batch [310/500] time 0.995 (0.889) data 0.000 (0.003) loss 1.3076 (1.0478) acc 65.6250 (73.8004) lr 1.5567e-04 eta 0:54:41 +epoch [43/50] batch [315/500] time 0.847 (0.889) data 0.000 (0.003) loss 0.8252 (1.0421) acc 78.1250 (73.9583) lr 1.5567e-04 eta 0:54:36 +epoch [43/50] batch [320/500] time 0.901 (0.889) data 0.000 (0.003) loss 1.1445 (1.0444) acc 62.5000 (73.8477) lr 1.5567e-04 eta 0:54:31 +epoch [43/50] batch [325/500] time 0.859 (0.889) data 0.001 (0.002) loss 1.1807 (1.0431) acc 71.8750 (73.8846) lr 1.5567e-04 eta 0:54:26 +epoch [43/50] batch [330/500] time 0.906 (0.889) data 0.000 (0.002) loss 1.3848 (1.0428) acc 78.1250 (73.9678) lr 1.5567e-04 eta 0:54:21 +epoch [43/50] batch [335/500] time 0.889 (0.889) data 0.001 (0.002) loss 0.7319 (1.0429) acc 81.2500 (73.9179) lr 1.5567e-04 eta 0:54:17 +epoch [43/50] batch [340/500] time 0.908 (0.889) data 0.000 (0.002) loss 1.2764 (1.0466) acc 71.8750 (73.9614) lr 1.5567e-04 eta 0:54:14 +epoch [43/50] batch [345/500] time 0.848 (0.889) data 0.000 (0.002) loss 0.5811 (1.0444) acc 84.3750 (74.0127) lr 1.5567e-04 eta 0:54:09 +epoch [43/50] batch [350/500] time 0.886 (0.889) data 0.000 (0.002) loss 1.4209 (1.0450) acc 65.6250 (74.0089) lr 1.5567e-04 eta 0:54:04 +epoch [43/50] batch [355/500] time 0.878 (0.889) data 0.000 (0.002) loss 0.7026 (1.0440) acc 78.1250 (74.0053) lr 1.5567e-04 eta 0:53:59 +epoch [43/50] batch [360/500] time 0.881 (0.889) data 0.000 (0.002) loss 1.4424 (1.0460) acc 62.5000 (73.9236) lr 1.5567e-04 eta 0:53:55 +epoch [43/50] batch [365/500] time 0.903 (0.889) data 0.000 (0.002) loss 1.6133 (1.0468) acc 68.7500 (73.9298) lr 1.5567e-04 eta 0:53:51 +epoch [43/50] batch [370/500] time 0.915 (0.889) data 0.000 (0.002) loss 1.0381 (1.0466) acc 75.0000 (73.9527) lr 1.5567e-04 eta 0:53:47 +epoch [43/50] batch [375/500] time 0.871 (0.889) data 0.000 (0.002) loss 0.9102 (1.0463) acc 78.1250 (73.9500) lr 1.5567e-04 eta 0:53:42 +epoch [43/50] batch [380/500] time 0.866 (0.889) data 0.000 (0.002) loss 0.7505 (1.0451) acc 71.8750 (73.9967) lr 1.5567e-04 eta 0:53:37 +epoch [43/50] batch [385/500] time 0.862 (0.889) data 0.000 (0.002) loss 1.5068 (1.0469) acc 65.6250 (73.9286) lr 1.5567e-04 eta 0:53:32 +epoch [43/50] batch [390/500] time 0.877 (0.888) data 0.000 (0.002) loss 1.2070 (1.0502) acc 65.6250 (73.8622) lr 1.5567e-04 eta 0:53:27 +epoch [43/50] batch [395/500] time 0.860 (0.888) data 0.000 (0.002) loss 1.2539 (1.0515) acc 78.1250 (73.8608) lr 1.5567e-04 eta 0:53:22 +epoch [43/50] batch [400/500] time 0.880 (0.888) data 0.000 (0.002) loss 0.6289 (1.0495) acc 81.2500 (73.9062) lr 1.5567e-04 eta 0:53:17 +epoch [43/50] batch [405/500] time 0.869 (0.888) data 0.001 (0.002) loss 1.0146 (1.0522) acc 75.0000 (73.8812) lr 1.5567e-04 eta 0:53:13 +epoch [43/50] batch [410/500] time 0.872 (0.889) data 0.000 (0.002) loss 1.0947 (1.0544) acc 65.6250 (73.8186) lr 1.5567e-04 eta 0:53:09 +epoch [43/50] batch [415/500] time 0.870 (0.888) data 0.000 (0.002) loss 0.4102 (1.0524) acc 78.1250 (73.8404) lr 1.5567e-04 eta 0:53:05 +epoch [43/50] batch [420/500] time 0.882 (0.888) data 0.000 (0.002) loss 0.8950 (1.0561) acc 81.2500 (73.7574) lr 1.5567e-04 eta 0:53:00 +epoch [43/50] batch [425/500] time 0.868 (0.888) data 0.000 (0.002) loss 1.3086 (1.0584) acc 78.1250 (73.7279) lr 1.5567e-04 eta 0:52:55 +epoch [43/50] batch [430/500] time 0.858 (0.888) data 0.000 (0.002) loss 0.8413 (1.0600) acc 78.1250 (73.7282) lr 1.5567e-04 eta 0:52:50 +epoch [43/50] batch [435/500] time 0.895 (0.888) data 0.000 (0.002) loss 1.0781 (1.0635) acc 71.8750 (73.6351) lr 1.5567e-04 eta 0:52:45 +epoch [43/50] batch [440/500] time 0.895 (0.888) data 0.000 (0.002) loss 0.6050 (1.0615) acc 81.2500 (73.6151) lr 1.5567e-04 eta 0:52:41 +epoch [43/50] batch [445/500] time 0.879 (0.888) data 0.000 (0.002) loss 1.1641 (1.0629) acc 71.8750 (73.6025) lr 1.5567e-04 eta 0:52:36 +epoch [43/50] batch [450/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.0586 (1.0602) acc 71.8750 (73.6736) lr 1.5567e-04 eta 0:52:32 +epoch [43/50] batch [455/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.3682 (1.0600) acc 65.6250 (73.6951) lr 1.5567e-04 eta 0:52:28 +epoch [43/50] batch [460/500] time 0.878 (0.888) data 0.000 (0.002) loss 0.8438 (1.0580) acc 84.3750 (73.7432) lr 1.5567e-04 eta 0:52:23 +epoch [43/50] batch [465/500] time 0.908 (0.888) data 0.000 (0.002) loss 0.7881 (1.0565) acc 81.2500 (73.7836) lr 1.5567e-04 eta 0:52:19 +epoch [43/50] batch [470/500] time 0.905 (0.888) data 0.000 (0.002) loss 1.0449 (1.0576) acc 71.8750 (73.7699) lr 1.5567e-04 eta 0:52:15 +epoch [43/50] batch [475/500] time 0.905 (0.888) data 0.000 (0.002) loss 0.8252 (1.0565) acc 75.0000 (73.7763) lr 1.5567e-04 eta 0:52:11 +epoch [43/50] batch [480/500] time 0.896 (0.888) data 0.000 (0.002) loss 0.9468 (1.0565) acc 81.2500 (73.7695) lr 1.5567e-04 eta 0:52:07 +epoch [43/50] batch [485/500] time 0.868 (0.888) data 0.001 (0.002) loss 0.5767 (1.0545) acc 84.3750 (73.8209) lr 1.5567e-04 eta 0:52:02 +epoch [43/50] batch [490/500] time 0.908 (0.888) data 0.000 (0.002) loss 0.6494 (1.0539) acc 84.3750 (73.8138) lr 1.5567e-04 eta 0:51:58 +epoch [43/50] batch [495/500] time 0.909 (0.888) data 0.000 (0.002) loss 1.2578 (1.0536) acc 75.0000 (73.8384) lr 1.5567e-04 eta 0:51:53 +epoch [43/50] batch [500/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.8525 (1.0563) acc 62.5000 (73.7938) lr 1.2369e-04 eta 0:51:49 +epoch [44/50] batch [5/500] time 0.896 (1.023) data 0.000 (0.132) loss 0.9873 (0.9143) acc 75.0000 (76.2500) lr 1.2369e-04 eta 0:59:35 +epoch [44/50] batch [10/500] time 0.873 (0.956) data 0.000 (0.066) loss 1.1982 (1.0903) acc 71.8750 (71.5625) lr 1.2369e-04 eta 0:55:37 +epoch [44/50] batch [15/500] time 0.908 (0.932) data 0.000 (0.044) loss 1.9150 (1.2427) acc 62.5000 (68.7500) lr 1.2369e-04 eta 0:54:07 +epoch [44/50] batch [20/500] time 0.897 (0.920) data 0.000 (0.033) loss 1.0342 (1.2039) acc 78.1250 (69.6875) lr 1.2369e-04 eta 0:53:21 +epoch [44/50] batch [25/500] time 0.908 (0.913) data 0.000 (0.027) loss 1.4629 (1.1976) acc 68.7500 (70.2500) lr 1.2369e-04 eta 0:52:52 +epoch [44/50] batch [30/500] time 0.864 (0.907) data 0.000 (0.022) loss 1.1387 (1.1757) acc 75.0000 (71.6667) lr 1.2369e-04 eta 0:52:28 +epoch [44/50] batch [35/500] time 0.885 (0.904) data 0.000 (0.019) loss 0.5757 (1.1202) acc 81.2500 (72.3214) lr 1.2369e-04 eta 0:52:11 +epoch [44/50] batch [40/500] time 0.872 (0.900) data 0.001 (0.017) loss 0.9829 (1.1074) acc 81.2500 (72.9688) lr 1.2369e-04 eta 0:51:54 +epoch [44/50] batch [45/500] time 0.852 (0.897) data 0.000 (0.015) loss 1.0371 (1.0936) acc 71.8750 (73.2639) lr 1.2369e-04 eta 0:51:39 +epoch [44/50] batch [50/500] time 0.861 (0.894) data 0.000 (0.013) loss 0.4802 (1.0542) acc 87.5000 (74.1250) lr 1.2369e-04 eta 0:51:24 +epoch [44/50] batch [55/500] time 0.896 (0.893) data 0.000 (0.012) loss 1.6396 (1.0649) acc 65.6250 (73.6932) lr 1.2369e-04 eta 0:51:17 +epoch [44/50] batch [60/500] time 0.894 (0.893) data 0.000 (0.011) loss 1.1104 (1.0778) acc 75.0000 (73.8542) lr 1.2369e-04 eta 0:51:11 +epoch [44/50] batch [65/500] time 0.906 (0.892) data 0.000 (0.010) loss 0.9604 (1.0720) acc 68.7500 (74.1346) lr 1.2369e-04 eta 0:51:04 +epoch [44/50] batch [70/500] time 0.894 (0.892) data 0.000 (0.010) loss 0.8950 (1.0665) acc 71.8750 (73.8393) lr 1.2369e-04 eta 0:50:58 +epoch [44/50] batch [75/500] time 0.900 (0.892) data 0.000 (0.009) loss 0.9570 (1.0704) acc 68.7500 (73.7083) lr 1.2369e-04 eta 0:50:53 +epoch [44/50] batch [80/500] time 0.907 (0.891) data 0.000 (0.008) loss 1.2207 (1.0777) acc 78.1250 (73.3984) lr 1.2369e-04 eta 0:50:48 +epoch [44/50] batch [85/500] time 0.892 (0.891) data 0.000 (0.008) loss 0.5610 (1.0610) acc 81.2500 (73.8235) lr 1.2369e-04 eta 0:50:42 +epoch [44/50] batch [90/500] time 0.888 (0.890) data 0.000 (0.008) loss 1.2354 (1.0649) acc 75.0000 (73.8889) lr 1.2369e-04 eta 0:50:36 +epoch [44/50] batch [95/500] time 0.897 (0.891) data 0.000 (0.007) loss 0.8374 (1.0647) acc 75.0000 (73.6842) lr 1.2369e-04 eta 0:50:34 +epoch [44/50] batch [100/500] time 0.889 (0.891) data 0.000 (0.007) loss 0.7354 (1.0664) acc 78.1250 (73.6875) lr 1.2369e-04 eta 0:50:29 +epoch [44/50] batch [105/500] time 0.894 (0.891) data 0.001 (0.006) loss 1.1094 (1.0758) acc 75.0000 (73.5417) lr 1.2369e-04 eta 0:50:23 +epoch [44/50] batch [110/500] time 0.870 (0.890) data 0.001 (0.006) loss 0.5508 (1.0735) acc 87.5000 (73.6648) lr 1.2369e-04 eta 0:50:18 +epoch [44/50] batch [115/500] time 0.876 (0.889) data 0.000 (0.006) loss 1.0059 (1.0773) acc 78.1250 (73.6413) lr 1.2369e-04 eta 0:50:10 +epoch [44/50] batch [120/500] time 0.902 (0.889) data 0.000 (0.006) loss 1.1865 (1.0727) acc 75.0000 (73.8281) lr 1.2369e-04 eta 0:50:06 +epoch [44/50] batch [125/500] time 0.894 (0.889) data 0.000 (0.005) loss 1.1162 (1.0769) acc 65.6250 (73.8000) lr 1.2369e-04 eta 0:50:01 +epoch [44/50] batch [130/500] time 0.903 (0.890) data 0.000 (0.005) loss 1.4971 (1.0795) acc 65.6250 (73.7260) lr 1.2369e-04 eta 0:49:57 +epoch [44/50] batch [135/500] time 0.900 (0.889) data 0.000 (0.005) loss 0.7832 (1.0673) acc 75.0000 (73.8657) lr 1.2369e-04 eta 0:49:52 +epoch [44/50] batch [140/500] time 0.882 (0.890) data 0.000 (0.005) loss 0.9141 (1.0676) acc 78.1250 (73.8393) lr 1.2369e-04 eta 0:49:50 +epoch [44/50] batch [145/500] time 0.878 (0.890) data 0.000 (0.005) loss 0.9907 (1.0719) acc 75.0000 (73.7500) lr 1.2369e-04 eta 0:49:45 +epoch [44/50] batch [150/500] time 0.893 (0.889) data 0.000 (0.005) loss 0.6470 (1.0681) acc 84.3750 (73.8958) lr 1.2369e-04 eta 0:49:39 +epoch [44/50] batch [155/500] time 0.892 (0.889) data 0.000 (0.004) loss 0.9639 (1.0746) acc 71.8750 (73.7298) lr 1.2369e-04 eta 0:49:34 +epoch [44/50] batch [160/500] time 0.882 (0.889) data 0.000 (0.004) loss 0.9902 (1.0732) acc 68.7500 (73.8477) lr 1.2369e-04 eta 0:49:28 +epoch [44/50] batch [165/500] time 0.895 (0.889) data 0.000 (0.004) loss 1.4873 (1.0726) acc 59.3750 (73.9015) lr 1.2369e-04 eta 0:49:23 +epoch [44/50] batch [170/500] time 0.895 (0.889) data 0.000 (0.004) loss 0.7310 (1.0701) acc 75.0000 (73.7684) lr 1.2369e-04 eta 0:49:18 +epoch [44/50] batch [175/500] time 0.909 (0.888) data 0.000 (0.004) loss 1.2402 (1.0720) acc 65.6250 (73.6964) lr 1.2369e-04 eta 0:49:13 +epoch [44/50] batch [180/500] time 0.882 (0.888) data 0.000 (0.004) loss 0.8203 (1.0705) acc 81.2500 (73.6979) lr 1.2369e-04 eta 0:49:08 +epoch [44/50] batch [185/500] time 0.875 (0.888) data 0.000 (0.004) loss 1.0527 (1.0680) acc 62.5000 (73.7838) lr 1.2369e-04 eta 0:49:03 +epoch [44/50] batch [190/500] time 0.887 (0.888) data 0.000 (0.004) loss 1.5596 (1.0799) acc 62.5000 (73.6184) lr 1.2369e-04 eta 0:48:59 +epoch [44/50] batch [195/500] time 0.886 (0.888) data 0.000 (0.004) loss 1.2676 (1.0854) acc 75.0000 (73.6058) lr 1.2369e-04 eta 0:48:54 +epoch [44/50] batch [200/500] time 0.880 (0.888) data 0.001 (0.004) loss 1.0342 (1.0821) acc 81.2500 (73.6406) lr 1.2369e-04 eta 0:48:49 +epoch [44/50] batch [205/500] time 0.912 (0.888) data 0.000 (0.003) loss 1.6533 (1.0837) acc 68.7500 (73.6128) lr 1.2369e-04 eta 0:48:45 +epoch [44/50] batch [210/500] time 0.897 (0.888) data 0.000 (0.003) loss 0.9033 (1.0791) acc 68.7500 (73.6607) lr 1.2369e-04 eta 0:48:40 +epoch [44/50] batch [215/500] time 0.889 (0.887) data 0.000 (0.003) loss 0.7441 (1.0748) acc 81.2500 (73.7355) lr 1.2369e-04 eta 0:48:35 +epoch [44/50] batch [220/500] time 0.893 (0.887) data 0.000 (0.003) loss 0.9092 (1.0717) acc 75.0000 (73.7926) lr 1.2369e-04 eta 0:48:30 +epoch [44/50] batch [225/500] time 0.869 (0.887) data 0.000 (0.003) loss 1.2744 (1.0705) acc 71.8750 (73.7917) lr 1.2369e-04 eta 0:48:25 +epoch [44/50] batch [230/500] time 0.868 (0.887) data 0.000 (0.003) loss 0.9141 (1.0709) acc 78.1250 (73.7500) lr 1.2369e-04 eta 0:48:20 +epoch [44/50] batch [235/500] time 0.951 (0.887) data 0.000 (0.003) loss 1.4434 (1.0724) acc 71.8750 (73.7101) lr 1.2369e-04 eta 0:48:16 +epoch [44/50] batch [240/500] time 0.884 (0.887) data 0.000 (0.003) loss 0.7144 (1.0683) acc 75.0000 (73.7109) lr 1.2369e-04 eta 0:48:12 +epoch [44/50] batch [245/500] time 0.884 (0.887) data 0.000 (0.003) loss 1.4219 (1.0717) acc 59.3750 (73.5332) lr 1.2369e-04 eta 0:48:07 +epoch [44/50] batch [250/500] time 0.910 (0.887) data 0.000 (0.003) loss 0.7173 (1.0700) acc 87.5000 (73.5500) lr 1.2369e-04 eta 0:48:03 +epoch [44/50] batch [255/500] time 0.902 (0.887) data 0.000 (0.003) loss 1.0410 (1.0707) acc 81.2500 (73.6152) lr 1.2369e-04 eta 0:47:59 +epoch [44/50] batch [260/500] time 0.876 (0.887) data 0.000 (0.003) loss 0.9668 (1.0706) acc 78.1250 (73.6538) lr 1.2369e-04 eta 0:47:54 +epoch [44/50] batch [265/500] time 0.897 (0.887) data 0.000 (0.003) loss 0.7188 (1.0692) acc 84.3750 (73.7146) lr 1.2369e-04 eta 0:47:49 +epoch [44/50] batch [270/500] time 0.897 (0.887) data 0.000 (0.003) loss 0.8506 (1.0650) acc 87.5000 (73.8310) lr 1.2369e-04 eta 0:47:44 +epoch [44/50] batch [275/500] time 0.887 (0.887) data 0.000 (0.003) loss 1.4434 (1.0676) acc 75.0000 (73.8750) lr 1.2369e-04 eta 0:47:39 +epoch [44/50] batch [280/500] time 0.873 (0.887) data 0.000 (0.003) loss 0.7720 (1.0643) acc 75.0000 (73.8951) lr 1.2369e-04 eta 0:47:36 +epoch [44/50] batch [285/500] time 0.908 (0.887) data 0.000 (0.003) loss 1.3936 (1.0650) acc 68.7500 (73.7939) lr 1.2369e-04 eta 0:47:31 +epoch [44/50] batch [290/500] time 0.887 (0.887) data 0.000 (0.002) loss 0.5830 (1.0624) acc 87.5000 (73.8147) lr 1.2369e-04 eta 0:47:27 +epoch [44/50] batch [295/500] time 0.892 (0.887) data 0.000 (0.002) loss 1.0566 (1.0619) acc 71.8750 (73.8136) lr 1.2369e-04 eta 0:47:22 +epoch [44/50] batch [300/500] time 0.879 (0.887) data 0.000 (0.002) loss 1.9316 (1.0623) acc 62.5000 (73.7708) lr 1.2369e-04 eta 0:47:18 +epoch [44/50] batch [305/500] time 0.872 (0.887) data 0.000 (0.002) loss 1.6582 (1.0661) acc 65.6250 (73.7193) lr 1.2369e-04 eta 0:47:13 +epoch [44/50] batch [310/500] time 0.868 (0.887) data 0.000 (0.002) loss 0.8955 (1.0663) acc 87.5000 (73.7198) lr 1.2369e-04 eta 0:47:08 +epoch [44/50] batch [315/500] time 0.848 (0.886) data 0.000 (0.002) loss 0.8198 (1.0629) acc 75.0000 (73.7996) lr 1.2369e-04 eta 0:47:03 +epoch [44/50] batch [320/500] time 0.897 (0.886) data 0.000 (0.002) loss 1.0254 (1.0671) acc 78.1250 (73.7500) lr 1.2369e-04 eta 0:46:58 +epoch [44/50] batch [325/500] time 0.883 (0.886) data 0.000 (0.002) loss 0.7617 (1.0646) acc 81.2500 (73.8462) lr 1.2369e-04 eta 0:46:54 +epoch [44/50] batch [330/500] time 0.867 (0.886) data 0.000 (0.002) loss 0.7617 (1.0639) acc 75.0000 (73.8068) lr 1.2369e-04 eta 0:46:49 +epoch [44/50] batch [335/500] time 0.878 (0.886) data 0.000 (0.002) loss 1.1582 (1.0653) acc 78.1250 (73.7687) lr 1.2369e-04 eta 0:46:45 +epoch [44/50] batch [340/500] time 0.917 (0.886) data 0.000 (0.002) loss 0.5566 (1.0678) acc 81.2500 (73.7040) lr 1.2369e-04 eta 0:46:40 +epoch [44/50] batch [345/500] time 0.853 (0.886) data 0.000 (0.002) loss 1.2676 (1.0687) acc 75.0000 (73.6775) lr 1.2369e-04 eta 0:46:36 +epoch [44/50] batch [350/500] time 0.909 (0.886) data 0.000 (0.002) loss 0.8540 (1.0647) acc 71.8750 (73.6875) lr 1.2369e-04 eta 0:46:32 +epoch [44/50] batch [355/500] time 0.860 (0.886) data 0.000 (0.002) loss 1.1865 (1.0628) acc 71.8750 (73.7324) lr 1.2369e-04 eta 0:46:27 +epoch [44/50] batch [360/500] time 0.888 (0.886) data 0.000 (0.002) loss 1.0312 (1.0644) acc 71.8750 (73.6892) lr 1.2369e-04 eta 0:46:23 +epoch [44/50] batch [365/500] time 0.859 (0.886) data 0.000 (0.002) loss 0.9346 (1.0664) acc 75.0000 (73.5959) lr 1.2369e-04 eta 0:46:18 +epoch [44/50] batch [370/500] time 0.851 (0.886) data 0.000 (0.002) loss 0.9209 (1.0684) acc 75.0000 (73.5220) lr 1.2369e-04 eta 0:46:13 +epoch [44/50] batch [375/500] time 0.905 (0.886) data 0.000 (0.002) loss 0.8540 (1.0702) acc 75.0000 (73.5083) lr 1.2369e-04 eta 0:46:09 +epoch [44/50] batch [380/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.0059 (1.0665) acc 71.8750 (73.5773) lr 1.2369e-04 eta 0:46:05 +epoch [44/50] batch [385/500] time 0.899 (0.887) data 0.000 (0.002) loss 1.0527 (1.0654) acc 81.2500 (73.5795) lr 1.2369e-04 eta 0:46:01 +epoch [44/50] batch [390/500] time 0.869 (0.887) data 0.000 (0.002) loss 0.7554 (1.0638) acc 81.2500 (73.5978) lr 1.2369e-04 eta 0:45:57 +epoch [44/50] batch [395/500] time 0.891 (0.887) data 0.000 (0.002) loss 1.3125 (1.0649) acc 56.2500 (73.5443) lr 1.2369e-04 eta 0:45:53 +epoch [44/50] batch [400/500] time 0.884 (0.887) data 0.000 (0.002) loss 0.8950 (1.0648) acc 75.0000 (73.5625) lr 1.2369e-04 eta 0:45:48 +epoch [44/50] batch [405/500] time 0.903 (0.887) data 0.000 (0.002) loss 1.2812 (1.0646) acc 68.7500 (73.5957) lr 1.2369e-04 eta 0:45:44 +epoch [44/50] batch [410/500] time 0.903 (0.887) data 0.000 (0.002) loss 1.9980 (1.0655) acc 62.5000 (73.6280) lr 1.2369e-04 eta 0:45:39 +epoch [44/50] batch [415/500] time 0.868 (0.887) data 0.000 (0.002) loss 0.8516 (1.0633) acc 75.0000 (73.6521) lr 1.2369e-04 eta 0:45:35 +epoch [44/50] batch [420/500] time 0.897 (0.887) data 0.000 (0.002) loss 0.7725 (1.0623) acc 78.1250 (73.7054) lr 1.2369e-04 eta 0:45:31 +epoch [44/50] batch [425/500] time 0.894 (0.887) data 0.000 (0.002) loss 1.7256 (1.0651) acc 62.5000 (73.6324) lr 1.2369e-04 eta 0:45:27 +epoch [44/50] batch [430/500] time 0.869 (0.887) data 0.000 (0.002) loss 0.5630 (1.0643) acc 75.0000 (73.5828) lr 1.2369e-04 eta 0:45:23 +epoch [44/50] batch [435/500] time 0.910 (0.887) data 0.000 (0.002) loss 1.2783 (1.0641) acc 68.7500 (73.5920) lr 1.2369e-04 eta 0:45:18 +epoch [44/50] batch [440/500] time 0.900 (0.887) data 0.000 (0.002) loss 1.0908 (1.0646) acc 65.6250 (73.5511) lr 1.2369e-04 eta 0:45:14 +epoch [44/50] batch [445/500] time 0.858 (0.887) data 0.000 (0.002) loss 1.2041 (1.0647) acc 68.7500 (73.5604) lr 1.2369e-04 eta 0:45:09 +epoch [44/50] batch [450/500] time 0.880 (0.887) data 0.000 (0.002) loss 0.9106 (1.0648) acc 84.3750 (73.5972) lr 1.2369e-04 eta 0:45:05 +epoch [44/50] batch [455/500] time 0.877 (0.887) data 0.000 (0.002) loss 0.5874 (1.0640) acc 81.2500 (73.6332) lr 1.2369e-04 eta 0:45:00 +epoch [44/50] batch [460/500] time 0.882 (0.887) data 0.000 (0.002) loss 1.1426 (1.0655) acc 71.8750 (73.5802) lr 1.2369e-04 eta 0:44:56 +epoch [44/50] batch [465/500] time 0.881 (0.887) data 0.000 (0.002) loss 0.9644 (1.0631) acc 71.8750 (73.6223) lr 1.2369e-04 eta 0:44:51 +epoch [44/50] batch [470/500] time 0.893 (0.887) data 0.000 (0.002) loss 0.6626 (1.0607) acc 84.3750 (73.6769) lr 1.2369e-04 eta 0:44:47 +epoch [44/50] batch [475/500] time 0.876 (0.887) data 0.000 (0.002) loss 0.8491 (1.0632) acc 75.0000 (73.5987) lr 1.2369e-04 eta 0:44:42 +epoch [44/50] batch [480/500] time 0.884 (0.887) data 0.000 (0.002) loss 0.7524 (1.0640) acc 78.1250 (73.5742) lr 1.2369e-04 eta 0:44:38 +epoch [44/50] batch [485/500] time 0.902 (0.887) data 0.000 (0.002) loss 0.7109 (1.0643) acc 84.3750 (73.5631) lr 1.2369e-04 eta 0:44:34 +epoch [44/50] batch [490/500] time 0.885 (0.887) data 0.000 (0.002) loss 0.9219 (1.0623) acc 65.6250 (73.5651) lr 1.2369e-04 eta 0:44:30 +epoch [44/50] batch [495/500] time 0.888 (0.887) data 0.000 (0.002) loss 0.9697 (1.0633) acc 81.2500 (73.5606) lr 1.2369e-04 eta 0:44:25 +epoch [44/50] batch [500/500] time 0.873 (0.887) data 0.000 (0.002) loss 0.6523 (1.0618) acc 90.6250 (73.6188) lr 9.5173e-05 eta 0:44:21 +epoch [45/50] batch [5/500] time 0.888 (1.036) data 0.000 (0.140) loss 0.7607 (0.8660) acc 78.1250 (78.7500) lr 9.5173e-05 eta 0:51:43 +epoch [45/50] batch [10/500] time 0.893 (0.965) data 0.000 (0.070) loss 0.3862 (0.8876) acc 87.5000 (78.1250) lr 9.5173e-05 eta 0:48:06 +epoch [45/50] batch [15/500] time 0.846 (0.937) data 0.000 (0.047) loss 0.8965 (0.9765) acc 81.2500 (75.8333) lr 9.5173e-05 eta 0:46:37 +epoch [45/50] batch [20/500] time 0.873 (0.919) data 0.000 (0.035) loss 1.1455 (1.0024) acc 68.7500 (74.5312) lr 9.5173e-05 eta 0:45:40 +epoch [45/50] batch [25/500] time 0.902 (0.914) data 0.000 (0.028) loss 1.1914 (1.0436) acc 75.0000 (74.3750) lr 9.5173e-05 eta 0:45:19 +epoch [45/50] batch [30/500] time 0.885 (0.911) data 0.000 (0.023) loss 1.2402 (1.0502) acc 71.8750 (73.9583) lr 9.5173e-05 eta 0:45:04 +epoch [45/50] batch [35/500] time 0.887 (0.908) data 0.000 (0.020) loss 1.4072 (1.0196) acc 68.7500 (74.3750) lr 9.5173e-05 eta 0:44:52 +epoch [45/50] batch [40/500] time 0.879 (0.905) data 0.000 (0.018) loss 0.7495 (1.0335) acc 78.1250 (74.2969) lr 9.5173e-05 eta 0:44:38 +epoch [45/50] batch [45/500] time 0.871 (0.902) data 0.000 (0.016) loss 0.8760 (1.0139) acc 75.0000 (75.1389) lr 9.5173e-05 eta 0:44:24 +epoch [45/50] batch [50/500] time 0.856 (0.901) data 0.000 (0.014) loss 0.8945 (1.0196) acc 78.1250 (74.8125) lr 9.5173e-05 eta 0:44:18 +epoch [45/50] batch [55/500] time 0.891 (0.899) data 0.001 (0.013) loss 0.7217 (1.0155) acc 84.3750 (75.0000) lr 9.5173e-05 eta 0:44:07 +epoch [45/50] batch [60/500] time 0.896 (0.898) data 0.000 (0.012) loss 1.7109 (1.0371) acc 59.3750 (74.7396) lr 9.5173e-05 eta 0:44:01 +epoch [45/50] batch [65/500] time 0.872 (0.897) data 0.000 (0.011) loss 1.0166 (1.0350) acc 71.8750 (74.6154) lr 9.5173e-05 eta 0:43:53 +epoch [45/50] batch [70/500] time 0.885 (0.896) data 0.000 (0.010) loss 1.6846 (1.0379) acc 65.6250 (74.5982) lr 9.5173e-05 eta 0:43:45 +epoch [45/50] batch [75/500] time 0.870 (0.897) data 0.000 (0.010) loss 1.2617 (1.0393) acc 71.8750 (74.8750) lr 9.5173e-05 eta 0:43:43 +epoch [45/50] batch [80/500] time 0.908 (0.896) data 0.000 (0.009) loss 1.0068 (1.0283) acc 68.7500 (75.0000) lr 9.5173e-05 eta 0:43:37 +epoch [45/50] batch [85/500] time 0.884 (0.896) data 0.000 (0.008) loss 0.8647 (1.0319) acc 78.1250 (74.8897) lr 9.5173e-05 eta 0:43:32 +epoch [45/50] batch [90/500] time 0.847 (0.895) data 0.000 (0.008) loss 0.8491 (1.0331) acc 84.3750 (74.8611) lr 9.5173e-05 eta 0:43:24 +epoch [45/50] batch [95/500] time 0.896 (0.895) data 0.000 (0.008) loss 1.5654 (1.0409) acc 68.7500 (74.7039) lr 9.5173e-05 eta 0:43:19 +epoch [45/50] batch [100/500] time 0.884 (0.894) data 0.000 (0.007) loss 1.1748 (1.0478) acc 75.0000 (74.6250) lr 9.5173e-05 eta 0:43:12 +epoch [45/50] batch [105/500] time 0.884 (0.893) data 0.000 (0.007) loss 1.3271 (1.0519) acc 62.5000 (74.3155) lr 9.5173e-05 eta 0:43:06 +epoch [45/50] batch [110/500] time 0.883 (0.893) data 0.000 (0.007) loss 1.0986 (1.0469) acc 68.7500 (74.2614) lr 9.5173e-05 eta 0:43:01 +epoch [45/50] batch [115/500] time 0.881 (0.893) data 0.000 (0.006) loss 1.4717 (1.0449) acc 59.3750 (74.2120) lr 9.5173e-05 eta 0:42:55 +epoch [45/50] batch [120/500] time 0.896 (0.893) data 0.000 (0.006) loss 1.4189 (1.0480) acc 65.6250 (74.1667) lr 9.5173e-05 eta 0:42:52 +epoch [45/50] batch [125/500] time 0.923 (0.893) data 0.000 (0.006) loss 0.7373 (1.0542) acc 75.0000 (73.9750) lr 9.5173e-05 eta 0:42:48 +epoch [45/50] batch [130/500] time 0.867 (0.893) data 0.000 (0.006) loss 1.1162 (1.0563) acc 71.8750 (73.9423) lr 9.5173e-05 eta 0:42:42 +epoch [45/50] batch [135/500] time 0.850 (0.893) data 0.000 (0.005) loss 1.2451 (1.0537) acc 71.8750 (73.9352) lr 9.5173e-05 eta 0:42:37 +epoch [45/50] batch [140/500] time 0.903 (0.892) data 0.000 (0.005) loss 1.5391 (1.0590) acc 53.1250 (73.8393) lr 9.5173e-05 eta 0:42:31 +epoch [45/50] batch [145/500] time 0.917 (0.891) data 0.000 (0.005) loss 1.7666 (1.0552) acc 59.3750 (73.9009) lr 9.5173e-05 eta 0:42:24 +epoch [45/50] batch [150/500] time 0.860 (0.891) data 0.000 (0.005) loss 1.7842 (1.0601) acc 62.5000 (73.8542) lr 9.5173e-05 eta 0:42:17 +epoch [45/50] batch [155/500] time 0.860 (0.890) data 0.000 (0.005) loss 1.0732 (1.0577) acc 62.5000 (73.8911) lr 9.5173e-05 eta 0:42:12 +epoch [45/50] batch [160/500] time 0.877 (0.890) data 0.000 (0.005) loss 1.0195 (1.0530) acc 71.8750 (74.0039) lr 9.5173e-05 eta 0:42:06 +epoch [45/50] batch [165/500] time 0.880 (0.889) data 0.000 (0.004) loss 1.2314 (1.0486) acc 75.0000 (74.1477) lr 9.5173e-05 eta 0:42:01 +epoch [45/50] batch [170/500] time 0.888 (0.889) data 0.000 (0.004) loss 0.6123 (1.0479) acc 78.1250 (74.1728) lr 9.5173e-05 eta 0:41:56 +epoch [45/50] batch [175/500] time 0.883 (0.890) data 0.000 (0.004) loss 1.3027 (1.0458) acc 65.6250 (74.2321) lr 9.5173e-05 eta 0:41:53 +epoch [45/50] batch [180/500] time 0.883 (0.889) data 0.000 (0.004) loss 0.7046 (1.0448) acc 75.0000 (74.1840) lr 9.5173e-05 eta 0:41:48 +epoch [45/50] batch [185/500] time 0.883 (0.889) data 0.000 (0.004) loss 1.7422 (1.0491) acc 56.2500 (74.0203) lr 9.5173e-05 eta 0:41:43 +epoch [45/50] batch [190/500] time 0.878 (0.889) data 0.000 (0.004) loss 0.9668 (1.0424) acc 81.2500 (74.1612) lr 9.5173e-05 eta 0:41:38 +epoch [45/50] batch [195/500] time 0.876 (0.889) data 0.000 (0.004) loss 1.3418 (1.0429) acc 56.2500 (74.1827) lr 9.5173e-05 eta 0:41:33 +epoch [45/50] batch [200/500] time 0.862 (0.888) data 0.000 (0.004) loss 1.5430 (1.0438) acc 71.8750 (74.2344) lr 9.5173e-05 eta 0:41:27 +epoch [45/50] batch [205/500] time 0.888 (0.888) data 0.000 (0.004) loss 0.5488 (1.0394) acc 93.7500 (74.3598) lr 9.5173e-05 eta 0:41:21 +epoch [45/50] batch [210/500] time 0.895 (0.888) data 0.000 (0.004) loss 1.3779 (1.0404) acc 65.6250 (74.3155) lr 9.5173e-05 eta 0:41:17 +epoch [45/50] batch [215/500] time 0.876 (0.888) data 0.000 (0.003) loss 1.1318 (1.0435) acc 78.1250 (74.2878) lr 9.5173e-05 eta 0:41:13 +epoch [45/50] batch [220/500] time 0.864 (0.888) data 0.000 (0.003) loss 0.8247 (1.0447) acc 78.1250 (74.2898) lr 9.5173e-05 eta 0:41:09 +epoch [45/50] batch [225/500] time 0.919 (0.888) data 0.000 (0.003) loss 1.0684 (1.0450) acc 65.6250 (74.2500) lr 9.5173e-05 eta 0:41:05 +epoch [45/50] batch [230/500] time 0.859 (0.888) data 0.000 (0.003) loss 1.5771 (1.0512) acc 62.5000 (74.1304) lr 9.5173e-05 eta 0:41:01 +epoch [45/50] batch [235/500] time 0.876 (0.888) data 0.000 (0.003) loss 1.5615 (1.0556) acc 65.6250 (74.0160) lr 9.5173e-05 eta 0:40:56 +epoch [45/50] batch [240/500] time 0.875 (0.888) data 0.000 (0.003) loss 1.9307 (1.0589) acc 56.2500 (73.9583) lr 9.5173e-05 eta 0:40:50 +epoch [45/50] batch [245/500] time 0.876 (0.888) data 0.000 (0.003) loss 1.1396 (1.0588) acc 78.1250 (73.9413) lr 9.5173e-05 eta 0:40:46 +epoch [45/50] batch [250/500] time 0.887 (0.888) data 0.000 (0.003) loss 1.3760 (1.0581) acc 68.7500 (73.9875) lr 9.5173e-05 eta 0:40:42 +epoch [45/50] batch [255/500] time 0.927 (0.888) data 0.000 (0.003) loss 1.0244 (1.0572) acc 78.1250 (73.9706) lr 9.5173e-05 eta 0:40:38 +epoch [45/50] batch [260/500] time 1.000 (0.889) data 0.000 (0.003) loss 0.9346 (1.0552) acc 75.0000 (74.0144) lr 9.5173e-05 eta 0:40:35 +epoch [45/50] batch [265/500] time 0.874 (0.889) data 0.000 (0.003) loss 1.6885 (1.0565) acc 62.5000 (73.9623) lr 9.5173e-05 eta 0:40:30 +epoch [45/50] batch [270/500] time 0.886 (0.888) data 0.000 (0.003) loss 0.7246 (1.0567) acc 87.5000 (74.0162) lr 9.5173e-05 eta 0:40:25 +epoch [45/50] batch [275/500] time 0.888 (0.888) data 0.000 (0.003) loss 1.1455 (1.0556) acc 75.0000 (74.0455) lr 9.5173e-05 eta 0:40:19 +epoch [45/50] batch [280/500] time 0.881 (0.888) data 0.000 (0.003) loss 0.6421 (1.0557) acc 81.2500 (74.0625) lr 9.5173e-05 eta 0:40:14 +epoch [45/50] batch [285/500] time 0.868 (0.888) data 0.000 (0.003) loss 1.2588 (1.0563) acc 78.1250 (74.1118) lr 9.5173e-05 eta 0:40:09 +epoch [45/50] batch [290/500] time 0.871 (0.888) data 0.000 (0.003) loss 0.9648 (1.0557) acc 75.0000 (74.0302) lr 9.5173e-05 eta 0:40:05 +epoch [45/50] batch [295/500] time 0.913 (0.888) data 0.000 (0.003) loss 0.8970 (1.0563) acc 78.1250 (74.0254) lr 9.5173e-05 eta 0:40:01 +epoch [45/50] batch [300/500] time 0.916 (0.888) data 0.000 (0.003) loss 1.5605 (1.0564) acc 71.8750 (74.0417) lr 9.5173e-05 eta 0:39:57 +epoch [45/50] batch [305/500] time 0.866 (0.888) data 0.000 (0.003) loss 1.7012 (1.0608) acc 50.0000 (73.9652) lr 9.5173e-05 eta 0:39:52 +epoch [45/50] batch [310/500] time 0.872 (0.888) data 0.000 (0.002) loss 0.7256 (1.0613) acc 84.3750 (73.9415) lr 9.5173e-05 eta 0:39:47 +epoch [45/50] batch [315/500] time 0.873 (0.888) data 0.000 (0.002) loss 0.3284 (1.0591) acc 93.7500 (73.9980) lr 9.5173e-05 eta 0:39:43 +epoch [45/50] batch [320/500] time 0.907 (0.888) data 0.000 (0.002) loss 1.4414 (1.0612) acc 68.7500 (73.9062) lr 9.5173e-05 eta 0:39:38 +epoch [45/50] batch [325/500] time 0.875 (0.888) data 0.000 (0.002) loss 0.7500 (1.0577) acc 81.2500 (73.9327) lr 9.5173e-05 eta 0:39:34 +epoch [45/50] batch [330/500] time 0.863 (0.888) data 0.000 (0.002) loss 0.9565 (1.0558) acc 78.1250 (73.9583) lr 9.5173e-05 eta 0:39:29 +epoch [45/50] batch [335/500] time 0.891 (0.888) data 0.000 (0.002) loss 0.9731 (1.0567) acc 71.8750 (73.8806) lr 9.5173e-05 eta 0:39:25 +epoch [45/50] batch [340/500] time 0.885 (0.887) data 0.000 (0.002) loss 0.4053 (1.0563) acc 87.5000 (73.8879) lr 9.5173e-05 eta 0:39:20 +epoch [45/50] batch [345/500] time 0.877 (0.887) data 0.000 (0.002) loss 0.5420 (1.0547) acc 87.5000 (73.8678) lr 9.5173e-05 eta 0:39:15 +epoch [45/50] batch [350/500] time 0.863 (0.887) data 0.000 (0.002) loss 0.4856 (1.0523) acc 87.5000 (73.8839) lr 9.5173e-05 eta 0:39:10 +epoch [45/50] batch [355/500] time 0.899 (0.887) data 0.000 (0.002) loss 1.0312 (1.0491) acc 68.7500 (73.9437) lr 9.5173e-05 eta 0:39:06 +epoch [45/50] batch [360/500] time 0.863 (0.887) data 0.001 (0.002) loss 1.2559 (1.0522) acc 65.6250 (73.9062) lr 9.5173e-05 eta 0:39:02 +epoch [45/50] batch [365/500] time 0.865 (0.887) data 0.000 (0.002) loss 1.5498 (1.0535) acc 68.7500 (73.8955) lr 9.5173e-05 eta 0:38:57 +epoch [45/50] batch [370/500] time 0.894 (0.887) data 0.000 (0.002) loss 1.0371 (1.0537) acc 75.0000 (73.8851) lr 9.5173e-05 eta 0:38:53 +epoch [45/50] batch [375/500] time 0.900 (0.887) data 0.000 (0.002) loss 0.7305 (1.0525) acc 81.2500 (73.9000) lr 9.5173e-05 eta 0:38:49 +epoch [45/50] batch [380/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.5586 (1.0525) acc 59.3750 (73.9062) lr 9.5173e-05 eta 0:38:44 +epoch [45/50] batch [385/500] time 0.888 (0.887) data 0.000 (0.002) loss 0.5039 (1.0477) acc 78.1250 (74.0097) lr 9.5173e-05 eta 0:38:40 +epoch [45/50] batch [390/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.0645 (1.0495) acc 68.7500 (73.9183) lr 9.5173e-05 eta 0:38:35 +epoch [45/50] batch [395/500] time 0.881 (0.887) data 0.000 (0.002) loss 1.2646 (1.0499) acc 71.8750 (73.8845) lr 9.5173e-05 eta 0:38:31 +epoch [45/50] batch [400/500] time 0.870 (0.887) data 0.000 (0.002) loss 1.1025 (1.0496) acc 65.6250 (73.8594) lr 9.5173e-05 eta 0:38:26 +epoch [45/50] batch [405/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.5498 (1.0523) acc 71.8750 (73.8580) lr 9.5173e-05 eta 0:38:23 +epoch [45/50] batch [410/500] time 0.902 (0.887) data 0.000 (0.002) loss 0.8828 (1.0504) acc 75.0000 (73.8567) lr 9.5173e-05 eta 0:38:18 +epoch [45/50] batch [415/500] time 0.879 (0.887) data 0.000 (0.002) loss 0.8213 (1.0504) acc 75.0000 (73.8630) lr 9.5173e-05 eta 0:38:13 +epoch [45/50] batch [420/500] time 0.912 (0.887) data 0.000 (0.002) loss 1.1826 (1.0531) acc 68.7500 (73.7872) lr 9.5173e-05 eta 0:38:08 +epoch [45/50] batch [425/500] time 0.879 (0.887) data 0.000 (0.002) loss 0.7012 (1.0516) acc 81.2500 (73.8088) lr 9.5173e-05 eta 0:38:04 +epoch [45/50] batch [430/500] time 0.897 (0.887) data 0.000 (0.002) loss 0.9028 (1.0488) acc 81.2500 (73.8445) lr 9.5173e-05 eta 0:37:59 +epoch [45/50] batch [435/500] time 0.896 (0.887) data 0.000 (0.002) loss 1.4688 (1.0497) acc 75.0000 (73.8003) lr 9.5173e-05 eta 0:37:55 +epoch [45/50] batch [440/500] time 0.897 (0.887) data 0.000 (0.002) loss 1.2617 (1.0499) acc 75.0000 (73.8423) lr 9.5173e-05 eta 0:37:50 +epoch [45/50] batch [445/500] time 0.873 (0.887) data 0.000 (0.002) loss 1.3975 (1.0518) acc 68.7500 (73.8132) lr 9.5173e-05 eta 0:37:46 +epoch [45/50] batch [450/500] time 0.860 (0.887) data 0.000 (0.002) loss 0.9785 (1.0525) acc 75.0000 (73.7708) lr 9.5173e-05 eta 0:37:41 +epoch [45/50] batch [455/500] time 0.899 (0.887) data 0.000 (0.002) loss 0.8804 (1.0527) acc 75.0000 (73.7569) lr 9.5173e-05 eta 0:37:36 +epoch [45/50] batch [460/500] time 0.896 (0.887) data 0.000 (0.002) loss 0.9468 (1.0550) acc 78.1250 (73.7296) lr 9.5173e-05 eta 0:37:32 +epoch [45/50] batch [465/500] time 0.897 (0.887) data 0.000 (0.002) loss 0.5537 (1.0557) acc 87.5000 (73.7433) lr 9.5173e-05 eta 0:37:27 +epoch [45/50] batch [470/500] time 0.898 (0.887) data 0.000 (0.002) loss 1.0225 (1.0554) acc 68.7500 (73.7766) lr 9.5173e-05 eta 0:37:23 +epoch [45/50] batch [475/500] time 0.873 (0.887) data 0.000 (0.002) loss 0.6431 (1.0551) acc 75.0000 (73.7434) lr 9.5173e-05 eta 0:37:18 +epoch [45/50] batch [480/500] time 0.850 (0.887) data 0.000 (0.002) loss 0.9922 (1.0544) acc 78.1250 (73.7565) lr 9.5173e-05 eta 0:37:14 +epoch [45/50] batch [485/500] time 0.884 (0.887) data 0.001 (0.002) loss 1.3867 (1.0554) acc 62.5000 (73.7113) lr 9.5173e-05 eta 0:37:09 +epoch [45/50] batch [490/500] time 0.865 (0.887) data 0.000 (0.002) loss 0.4382 (1.0526) acc 90.6250 (73.8138) lr 9.5173e-05 eta 0:37:05 +epoch [45/50] batch [495/500] time 0.900 (0.887) data 0.000 (0.002) loss 0.9604 (1.0530) acc 78.1250 (73.8005) lr 9.5173e-05 eta 0:37:00 +epoch [45/50] batch [500/500] time 0.889 (0.886) data 0.000 (0.002) loss 1.7744 (1.0525) acc 62.5000 (73.8312) lr 7.0224e-05 eta 0:36:56 +epoch [46/50] batch [5/500] time 0.882 (1.057) data 0.000 (0.144) loss 1.7617 (1.3197) acc 59.3750 (70.6250) lr 7.0224e-05 eta 0:43:56 +epoch [46/50] batch [10/500] time 0.863 (0.964) data 0.000 (0.072) loss 0.6685 (1.0809) acc 78.1250 (74.3750) lr 7.0224e-05 eta 0:39:59 +epoch [46/50] batch [15/500] time 0.881 (0.936) data 0.000 (0.048) loss 0.9814 (1.0551) acc 71.8750 (74.5833) lr 7.0224e-05 eta 0:38:45 +epoch [46/50] batch [20/500] time 0.895 (0.923) data 0.000 (0.036) loss 0.7793 (1.0264) acc 75.0000 (74.5312) lr 7.0224e-05 eta 0:38:07 +epoch [46/50] batch [25/500] time 0.871 (0.913) data 0.000 (0.029) loss 1.0029 (1.0857) acc 81.2500 (73.6250) lr 7.0224e-05 eta 0:37:40 +epoch [46/50] batch [30/500] time 0.881 (0.907) data 0.000 (0.024) loss 1.0752 (1.0991) acc 75.0000 (73.4375) lr 7.0224e-05 eta 0:37:20 +epoch [46/50] batch [35/500] time 0.898 (0.904) data 0.000 (0.021) loss 0.8306 (1.1065) acc 78.1250 (73.3036) lr 7.0224e-05 eta 0:37:08 +epoch [46/50] batch [40/500] time 0.879 (0.901) data 0.000 (0.018) loss 1.0537 (1.0708) acc 65.6250 (73.7500) lr 7.0224e-05 eta 0:36:56 +epoch [46/50] batch [45/500] time 0.888 (0.900) data 0.000 (0.016) loss 0.7271 (1.0544) acc 81.2500 (74.0278) lr 7.0224e-05 eta 0:36:49 +epoch [46/50] batch [50/500] time 0.934 (0.900) data 0.000 (0.015) loss 0.8540 (1.0658) acc 71.8750 (73.5625) lr 7.0224e-05 eta 0:36:44 +epoch [46/50] batch [55/500] time 0.874 (0.898) data 0.000 (0.013) loss 1.1699 (1.0785) acc 71.8750 (73.2386) lr 7.0224e-05 eta 0:36:34 +epoch [46/50] batch [60/500] time 0.885 (0.896) data 0.000 (0.012) loss 0.8823 (1.0494) acc 78.1250 (73.6979) lr 7.0224e-05 eta 0:36:26 +epoch [46/50] batch [65/500] time 0.918 (0.898) data 0.000 (0.011) loss 0.6543 (1.0524) acc 81.2500 (73.7981) lr 7.0224e-05 eta 0:36:25 +epoch [46/50] batch [70/500] time 0.882 (0.896) data 0.000 (0.011) loss 0.8657 (1.0374) acc 78.1250 (74.0625) lr 7.0224e-05 eta 0:36:18 +epoch [46/50] batch [75/500] time 0.913 (0.897) data 0.000 (0.010) loss 0.7339 (1.0442) acc 75.0000 (74.1250) lr 7.0224e-05 eta 0:36:14 +epoch [46/50] batch [80/500] time 0.885 (0.896) data 0.000 (0.009) loss 1.0352 (1.0459) acc 78.1250 (74.2188) lr 7.0224e-05 eta 0:36:07 +epoch [46/50] batch [85/500] time 0.909 (0.895) data 0.000 (0.009) loss 1.0664 (1.0376) acc 68.7500 (74.1912) lr 7.0224e-05 eta 0:36:00 +epoch [46/50] batch [90/500] time 0.913 (0.895) data 0.000 (0.008) loss 1.6562 (1.0422) acc 71.8750 (74.1667) lr 7.0224e-05 eta 0:35:55 +epoch [46/50] batch [95/500] time 0.911 (0.895) data 0.000 (0.008) loss 0.5425 (1.0389) acc 81.2500 (74.2434) lr 7.0224e-05 eta 0:35:52 +epoch [46/50] batch [100/500] time 0.873 (0.894) data 0.000 (0.007) loss 1.1553 (1.0430) acc 71.8750 (74.1562) lr 7.0224e-05 eta 0:35:44 +epoch [46/50] batch [105/500] time 0.918 (0.894) data 0.000 (0.007) loss 1.0371 (1.0371) acc 65.6250 (74.2560) lr 7.0224e-05 eta 0:35:41 +epoch [46/50] batch [110/500] time 0.884 (0.894) data 0.000 (0.007) loss 1.2441 (1.0398) acc 71.8750 (74.2045) lr 7.0224e-05 eta 0:35:37 +epoch [46/50] batch [115/500] time 0.908 (0.894) data 0.000 (0.007) loss 0.7949 (1.0373) acc 78.1250 (74.2391) lr 7.0224e-05 eta 0:35:32 +epoch [46/50] batch [120/500] time 0.869 (0.894) data 0.000 (0.006) loss 1.1992 (1.0343) acc 68.7500 (74.2188) lr 7.0224e-05 eta 0:35:28 +epoch [46/50] batch [125/500] time 0.894 (0.894) data 0.000 (0.006) loss 0.9766 (1.0372) acc 75.0000 (74.1000) lr 7.0224e-05 eta 0:35:23 +epoch [46/50] batch [130/500] time 0.897 (0.893) data 0.000 (0.006) loss 1.0713 (1.0362) acc 87.5000 (74.3029) lr 7.0224e-05 eta 0:35:17 +epoch [46/50] batch [135/500] time 0.912 (0.894) data 0.000 (0.006) loss 1.3232 (1.0360) acc 71.8750 (74.1667) lr 7.0224e-05 eta 0:35:13 +epoch [46/50] batch [140/500] time 0.892 (0.894) data 0.000 (0.005) loss 0.9053 (1.0369) acc 78.1250 (74.1964) lr 7.0224e-05 eta 0:35:08 +epoch [46/50] batch [145/500] time 0.889 (0.893) data 0.000 (0.005) loss 1.2168 (1.0378) acc 75.0000 (74.1164) lr 7.0224e-05 eta 0:35:04 +epoch [46/50] batch [150/500] time 0.924 (0.894) data 0.000 (0.005) loss 0.7705 (1.0320) acc 90.6250 (74.3125) lr 7.0224e-05 eta 0:35:00 +epoch [46/50] batch [155/500] time 0.911 (0.894) data 0.000 (0.005) loss 0.6489 (1.0238) acc 78.1250 (74.5565) lr 7.0224e-05 eta 0:34:55 +epoch [46/50] batch [160/500] time 0.891 (0.894) data 0.000 (0.005) loss 1.5684 (1.0308) acc 62.5000 (74.3750) lr 7.0224e-05 eta 0:34:50 +epoch [46/50] batch [165/500] time 0.885 (0.893) data 0.000 (0.005) loss 1.0215 (1.0290) acc 75.0000 (74.3939) lr 7.0224e-05 eta 0:34:46 +epoch [46/50] batch [170/500] time 0.921 (0.894) data 0.000 (0.004) loss 1.1973 (1.0297) acc 81.2500 (74.4485) lr 7.0224e-05 eta 0:34:41 +epoch [46/50] batch [175/500] time 0.905 (0.894) data 0.000 (0.004) loss 1.1611 (1.0356) acc 75.0000 (74.2857) lr 7.0224e-05 eta 0:34:37 +epoch [46/50] batch [180/500] time 0.901 (0.894) data 0.000 (0.004) loss 0.9038 (1.0369) acc 84.3750 (74.3229) lr 7.0224e-05 eta 0:34:33 +epoch [46/50] batch [185/500] time 0.887 (0.894) data 0.000 (0.004) loss 0.9160 (1.0364) acc 78.1250 (74.3750) lr 7.0224e-05 eta 0:34:28 +epoch [46/50] batch [190/500] time 0.880 (0.893) data 0.000 (0.004) loss 0.3621 (1.0349) acc 90.6250 (74.3750) lr 7.0224e-05 eta 0:34:23 +epoch [46/50] batch [195/500] time 0.883 (0.893) data 0.000 (0.004) loss 0.6558 (1.0396) acc 78.1250 (74.2949) lr 7.0224e-05 eta 0:34:18 +epoch [46/50] batch [200/500] time 0.917 (0.893) data 0.000 (0.004) loss 1.6543 (1.0428) acc 59.3750 (74.2656) lr 7.0224e-05 eta 0:34:14 +epoch [46/50] batch [205/500] time 1.003 (0.894) data 0.000 (0.004) loss 1.0635 (1.0404) acc 78.1250 (74.3293) lr 7.0224e-05 eta 0:34:10 +epoch [46/50] batch [210/500] time 0.875 (0.893) data 0.000 (0.004) loss 1.6709 (1.0417) acc 62.5000 (74.3006) lr 7.0224e-05 eta 0:34:05 +epoch [46/50] batch [215/500] time 0.859 (0.893) data 0.000 (0.004) loss 1.7861 (1.0473) acc 59.3750 (74.2297) lr 7.0224e-05 eta 0:33:59 +epoch [46/50] batch [220/500] time 0.877 (0.892) data 0.000 (0.004) loss 1.0391 (1.0463) acc 78.1250 (74.3182) lr 7.0224e-05 eta 0:33:54 +epoch [46/50] batch [225/500] time 0.887 (0.892) data 0.000 (0.003) loss 0.9863 (1.0506) acc 71.8750 (74.2222) lr 7.0224e-05 eta 0:33:50 +epoch [46/50] batch [230/500] time 0.882 (0.892) data 0.000 (0.003) loss 0.7622 (1.0540) acc 75.0000 (74.0761) lr 7.0224e-05 eta 0:33:45 +epoch [46/50] batch [235/500] time 0.884 (0.892) data 0.000 (0.003) loss 0.6753 (1.0500) acc 87.5000 (74.0957) lr 7.0224e-05 eta 0:33:40 +epoch [46/50] batch [240/500] time 0.870 (0.892) data 0.000 (0.003) loss 1.2480 (1.0499) acc 68.7500 (74.1016) lr 7.0224e-05 eta 0:33:35 +epoch [46/50] batch [245/500] time 0.869 (0.892) data 0.000 (0.003) loss 0.7974 (1.0474) acc 75.0000 (74.1964) lr 7.0224e-05 eta 0:33:30 +epoch [46/50] batch [250/500] time 0.902 (0.892) data 0.000 (0.003) loss 0.6357 (1.0468) acc 84.3750 (74.2000) lr 7.0224e-05 eta 0:33:26 +epoch [46/50] batch [255/500] time 0.896 (0.892) data 0.000 (0.003) loss 1.4502 (1.0487) acc 75.0000 (74.2402) lr 7.0224e-05 eta 0:33:21 +epoch [46/50] batch [260/500] time 0.885 (0.891) data 0.000 (0.003) loss 1.1045 (1.0479) acc 68.7500 (74.2548) lr 7.0224e-05 eta 0:33:16 +epoch [46/50] batch [265/500] time 0.890 (0.891) data 0.000 (0.003) loss 1.3643 (1.0480) acc 62.5000 (74.2453) lr 7.0224e-05 eta 0:33:12 +epoch [46/50] batch [270/500] time 0.869 (0.891) data 0.000 (0.003) loss 1.2676 (1.0489) acc 65.6250 (74.2014) lr 7.0224e-05 eta 0:33:07 +epoch [46/50] batch [275/500] time 0.895 (0.891) data 0.000 (0.003) loss 0.5684 (1.0494) acc 81.2500 (74.2159) lr 7.0224e-05 eta 0:33:02 +epoch [46/50] batch [280/500] time 0.865 (0.891) data 0.000 (0.003) loss 1.1230 (1.0513) acc 68.7500 (74.1741) lr 7.0224e-05 eta 0:32:57 +epoch [46/50] batch [285/500] time 0.871 (0.890) data 0.000 (0.003) loss 1.2832 (1.0522) acc 59.3750 (74.1118) lr 7.0224e-05 eta 0:32:52 +epoch [46/50] batch [290/500] time 0.851 (0.890) data 0.000 (0.003) loss 0.7666 (1.0541) acc 78.1250 (74.0409) lr 7.0224e-05 eta 0:32:47 +epoch [46/50] batch [295/500] time 0.894 (0.890) data 0.000 (0.003) loss 1.3672 (1.0512) acc 65.6250 (74.1102) lr 7.0224e-05 eta 0:32:42 +epoch [46/50] batch [300/500] time 0.881 (0.890) data 0.000 (0.003) loss 1.1016 (1.0524) acc 71.8750 (74.0521) lr 7.0224e-05 eta 0:32:37 +epoch [46/50] batch [305/500] time 0.864 (0.890) data 0.000 (0.003) loss 1.4863 (1.0550) acc 71.8750 (74.0266) lr 7.0224e-05 eta 0:32:33 +epoch [46/50] batch [310/500] time 0.913 (0.890) data 0.000 (0.003) loss 0.9956 (1.0515) acc 75.0000 (74.0726) lr 7.0224e-05 eta 0:32:29 +epoch [46/50] batch [315/500] time 0.909 (0.890) data 0.000 (0.003) loss 0.7778 (1.0525) acc 78.1250 (74.0179) lr 7.0224e-05 eta 0:32:24 +epoch [46/50] batch [320/500] time 0.884 (0.890) data 0.000 (0.002) loss 0.9443 (1.0515) acc 84.3750 (74.0918) lr 7.0224e-05 eta 0:32:20 +epoch [46/50] batch [325/500] time 0.906 (0.890) data 0.000 (0.002) loss 1.0400 (1.0552) acc 75.0000 (74.0288) lr 7.0224e-05 eta 0:32:16 +epoch [46/50] batch [330/500] time 0.900 (0.890) data 0.000 (0.002) loss 0.5239 (1.0566) acc 84.3750 (74.0436) lr 7.0224e-05 eta 0:32:11 +epoch [46/50] batch [335/500] time 0.897 (0.890) data 0.000 (0.002) loss 1.2803 (1.0553) acc 68.7500 (74.1045) lr 7.0224e-05 eta 0:32:07 +epoch [46/50] batch [340/500] time 0.861 (0.890) data 0.000 (0.002) loss 1.2939 (1.0540) acc 65.6250 (74.1360) lr 7.0224e-05 eta 0:32:02 +epoch [46/50] batch [345/500] time 0.861 (0.890) data 0.000 (0.002) loss 0.5474 (1.0533) acc 84.3750 (74.1123) lr 7.0224e-05 eta 0:31:57 +epoch [46/50] batch [350/500] time 0.897 (0.890) data 0.000 (0.002) loss 1.1025 (1.0531) acc 68.7500 (74.0982) lr 7.0224e-05 eta 0:31:53 +epoch [46/50] batch [355/500] time 0.853 (0.890) data 0.000 (0.002) loss 1.0684 (1.0559) acc 75.0000 (74.0493) lr 7.0224e-05 eta 0:31:48 +epoch [46/50] batch [360/500] time 0.891 (0.890) data 0.000 (0.002) loss 0.9351 (1.0571) acc 71.8750 (74.0365) lr 7.0224e-05 eta 0:31:43 +epoch [46/50] batch [365/500] time 0.885 (0.890) data 0.000 (0.002) loss 0.5605 (1.0534) acc 81.2500 (74.1438) lr 7.0224e-05 eta 0:31:39 +epoch [46/50] batch [370/500] time 0.908 (0.890) data 0.000 (0.002) loss 0.7241 (1.0534) acc 71.8750 (74.1554) lr 7.0224e-05 eta 0:31:35 +epoch [46/50] batch [375/500] time 0.858 (0.890) data 0.000 (0.002) loss 1.1309 (1.0527) acc 78.1250 (74.1917) lr 7.0224e-05 eta 0:31:30 +epoch [46/50] batch [380/500] time 0.899 (0.890) data 0.000 (0.002) loss 1.7168 (1.0547) acc 62.5000 (74.1530) lr 7.0224e-05 eta 0:31:25 +epoch [46/50] batch [385/500] time 0.886 (0.889) data 0.000 (0.002) loss 0.9229 (1.0535) acc 75.0000 (74.1802) lr 7.0224e-05 eta 0:31:21 +epoch [46/50] batch [390/500] time 0.854 (0.889) data 0.000 (0.002) loss 1.6533 (1.0573) acc 65.6250 (74.0946) lr 7.0224e-05 eta 0:31:16 +epoch [46/50] batch [395/500] time 0.852 (0.889) data 0.000 (0.002) loss 1.1328 (1.0540) acc 75.0000 (74.1693) lr 7.0224e-05 eta 0:31:12 +epoch [46/50] batch [400/500] time 0.883 (0.889) data 0.000 (0.002) loss 1.2715 (1.0580) acc 71.8750 (74.1250) lr 7.0224e-05 eta 0:31:07 +epoch [46/50] batch [405/500] time 0.894 (0.889) data 0.000 (0.002) loss 0.8887 (1.0557) acc 71.8750 (74.1512) lr 7.0224e-05 eta 0:31:02 +epoch [46/50] batch [410/500] time 0.872 (0.889) data 0.000 (0.002) loss 1.2412 (1.0547) acc 75.0000 (74.1768) lr 7.0224e-05 eta 0:30:58 +epoch [46/50] batch [415/500] time 0.880 (0.889) data 0.000 (0.002) loss 0.7915 (1.0537) acc 81.2500 (74.1491) lr 7.0224e-05 eta 0:30:53 +epoch [46/50] batch [420/500] time 0.881 (0.889) data 0.000 (0.002) loss 0.8511 (1.0543) acc 81.2500 (74.1443) lr 7.0224e-05 eta 0:30:49 +epoch [46/50] batch [425/500] time 0.868 (0.889) data 0.000 (0.002) loss 1.2773 (1.0538) acc 71.8750 (74.1618) lr 7.0224e-05 eta 0:30:44 +epoch [46/50] batch [430/500] time 0.867 (0.889) data 0.000 (0.002) loss 1.4688 (1.0540) acc 65.6250 (74.1279) lr 7.0224e-05 eta 0:30:40 +epoch [46/50] batch [435/500] time 0.873 (0.889) data 0.000 (0.002) loss 0.9775 (1.0534) acc 75.0000 (74.1379) lr 7.0224e-05 eta 0:30:35 +epoch [46/50] batch [440/500] time 0.906 (0.889) data 0.000 (0.002) loss 1.0889 (1.0537) acc 68.7500 (74.0767) lr 7.0224e-05 eta 0:30:31 +epoch [46/50] batch [445/500] time 0.873 (0.889) data 0.000 (0.002) loss 0.9741 (1.0510) acc 75.0000 (74.1713) lr 7.0224e-05 eta 0:30:26 +epoch [46/50] batch [450/500] time 0.895 (0.889) data 0.000 (0.002) loss 1.1367 (1.0504) acc 81.2500 (74.1875) lr 7.0224e-05 eta 0:30:22 +epoch [46/50] batch [455/500] time 0.867 (0.889) data 0.000 (0.002) loss 0.8477 (1.0480) acc 84.3750 (74.2376) lr 7.0224e-05 eta 0:30:18 +epoch [46/50] batch [460/500] time 0.884 (0.889) data 0.000 (0.002) loss 0.8047 (1.0462) acc 78.1250 (74.2527) lr 7.0224e-05 eta 0:30:13 +epoch [46/50] batch [465/500] time 0.898 (0.889) data 0.000 (0.002) loss 0.5322 (1.0431) acc 90.6250 (74.3280) lr 7.0224e-05 eta 0:30:09 +epoch [46/50] batch [470/500] time 0.883 (0.889) data 0.000 (0.002) loss 0.6196 (1.0425) acc 84.3750 (74.3351) lr 7.0224e-05 eta 0:30:04 +epoch [46/50] batch [475/500] time 0.887 (0.889) data 0.000 (0.002) loss 1.1523 (1.0435) acc 65.6250 (74.2632) lr 7.0224e-05 eta 0:30:00 +epoch [46/50] batch [480/500] time 0.882 (0.889) data 0.000 (0.002) loss 1.2012 (1.0432) acc 75.0000 (74.2839) lr 7.0224e-05 eta 0:29:55 +epoch [46/50] batch [485/500] time 0.900 (0.889) data 0.000 (0.002) loss 1.4170 (1.0428) acc 65.6250 (74.2848) lr 7.0224e-05 eta 0:29:51 +epoch [46/50] batch [490/500] time 0.896 (0.889) data 0.000 (0.002) loss 0.9736 (1.0427) acc 71.8750 (74.2730) lr 7.0224e-05 eta 0:29:47 +epoch [46/50] batch [495/500] time 0.885 (0.889) data 0.000 (0.002) loss 0.5425 (1.0411) acc 84.3750 (74.3245) lr 7.0224e-05 eta 0:29:43 +epoch [46/50] batch [500/500] time 0.878 (0.889) data 0.000 (0.002) loss 0.8057 (1.0403) acc 75.0000 (74.3375) lr 4.8943e-05 eta 0:29:38 +epoch [47/50] batch [5/500] time 0.868 (1.014) data 0.000 (0.144) loss 0.6484 (1.2520) acc 81.2500 (69.3750) lr 4.8943e-05 eta 0:33:43 +epoch [47/50] batch [10/500] time 0.861 (0.943) data 0.000 (0.072) loss 0.9180 (1.0841) acc 71.8750 (71.5625) lr 4.8943e-05 eta 0:31:16 +epoch [47/50] batch [15/500] time 0.901 (0.922) data 0.000 (0.048) loss 0.9878 (1.0787) acc 71.8750 (73.1250) lr 4.8943e-05 eta 0:30:30 +epoch [47/50] batch [20/500] time 0.877 (0.909) data 0.000 (0.036) loss 0.5649 (1.0591) acc 87.5000 (73.4375) lr 4.8943e-05 eta 0:30:00 +epoch [47/50] batch [25/500] time 0.868 (0.901) data 0.000 (0.029) loss 1.2051 (1.0573) acc 62.5000 (73.2500) lr 4.8943e-05 eta 0:29:40 +epoch [47/50] batch [30/500] time 0.879 (0.899) data 0.000 (0.024) loss 0.9653 (1.0322) acc 71.8750 (73.5417) lr 4.8943e-05 eta 0:29:31 +epoch [47/50] batch [35/500] time 0.871 (0.897) data 0.000 (0.021) loss 0.8174 (1.0168) acc 75.0000 (74.3750) lr 4.8943e-05 eta 0:29:22 +epoch [47/50] batch [40/500] time 0.883 (0.895) data 0.000 (0.018) loss 1.1582 (1.0556) acc 78.1250 (73.9062) lr 4.8943e-05 eta 0:29:14 +epoch [47/50] batch [45/500] time 0.857 (0.892) data 0.000 (0.016) loss 0.8960 (1.0602) acc 81.2500 (74.0972) lr 4.8943e-05 eta 0:29:04 +epoch [47/50] batch [50/500] time 0.850 (0.895) data 0.000 (0.015) loss 1.3428 (1.0691) acc 59.3750 (73.6250) lr 4.8943e-05 eta 0:29:04 +epoch [47/50] batch [55/500] time 0.885 (0.893) data 0.000 (0.013) loss 0.9727 (1.0617) acc 71.8750 (73.4659) lr 4.8943e-05 eta 0:28:57 +epoch [47/50] batch [60/500] time 0.857 (0.892) data 0.000 (0.012) loss 0.5449 (1.0450) acc 84.3750 (73.7500) lr 4.8943e-05 eta 0:28:49 +epoch [47/50] batch [65/500] time 0.873 (0.892) data 0.000 (0.011) loss 0.9600 (1.0430) acc 71.8750 (73.7500) lr 4.8943e-05 eta 0:28:45 +epoch [47/50] batch [70/500] time 0.847 (0.891) data 0.000 (0.011) loss 0.5693 (1.0271) acc 84.3750 (73.8839) lr 4.8943e-05 eta 0:28:39 +epoch [47/50] batch [75/500] time 0.883 (0.890) data 0.000 (0.010) loss 0.8750 (1.0380) acc 75.0000 (73.7917) lr 4.8943e-05 eta 0:28:34 +epoch [47/50] batch [80/500] time 0.868 (0.890) data 0.000 (0.009) loss 1.0068 (1.0426) acc 62.5000 (73.4375) lr 4.8943e-05 eta 0:28:28 +epoch [47/50] batch [85/500] time 0.886 (0.889) data 0.000 (0.009) loss 0.7651 (1.0314) acc 78.1250 (73.6765) lr 4.8943e-05 eta 0:28:23 +epoch [47/50] batch [90/500] time 0.877 (0.889) data 0.000 (0.008) loss 0.7041 (1.0300) acc 84.3750 (73.7847) lr 4.8943e-05 eta 0:28:18 +epoch [47/50] batch [95/500] time 0.872 (0.890) data 0.000 (0.008) loss 0.8872 (1.0252) acc 75.0000 (73.9474) lr 4.8943e-05 eta 0:28:15 +epoch [47/50] batch [100/500] time 0.857 (0.889) data 0.000 (0.007) loss 0.9219 (1.0337) acc 78.1250 (74.0625) lr 4.8943e-05 eta 0:28:08 +epoch [47/50] batch [105/500] time 0.872 (0.888) data 0.000 (0.007) loss 0.8447 (1.0412) acc 75.0000 (74.0179) lr 4.8943e-05 eta 0:28:03 +epoch [47/50] batch [110/500] time 0.915 (0.888) data 0.000 (0.007) loss 1.6367 (1.0364) acc 59.3750 (74.1477) lr 4.8943e-05 eta 0:27:58 +epoch [47/50] batch [115/500] time 0.855 (0.888) data 0.000 (0.006) loss 1.4131 (1.0497) acc 62.5000 (73.9130) lr 4.8943e-05 eta 0:27:54 +epoch [47/50] batch [120/500] time 0.866 (0.888) data 0.000 (0.006) loss 0.7603 (1.0495) acc 75.0000 (73.9062) lr 4.8943e-05 eta 0:27:48 +epoch [47/50] batch [125/500] time 0.890 (0.888) data 0.000 (0.006) loss 0.9663 (1.0387) acc 81.2500 (74.0750) lr 4.8943e-05 eta 0:27:44 +epoch [47/50] batch [130/500] time 0.872 (0.887) data 0.000 (0.006) loss 1.4365 (1.0449) acc 65.6250 (73.9423) lr 4.8943e-05 eta 0:27:39 +epoch [47/50] batch [135/500] time 0.885 (0.887) data 0.000 (0.006) loss 1.0635 (1.0395) acc 75.0000 (73.9583) lr 4.8943e-05 eta 0:27:34 +epoch [47/50] batch [140/500] time 0.874 (0.887) data 0.000 (0.005) loss 0.9282 (1.0379) acc 81.2500 (73.9955) lr 4.8943e-05 eta 0:27:29 +epoch [47/50] batch [145/500] time 0.901 (0.887) data 0.000 (0.005) loss 0.7705 (1.0333) acc 75.0000 (74.0302) lr 4.8943e-05 eta 0:27:24 +epoch [47/50] batch [150/500] time 0.901 (0.887) data 0.000 (0.005) loss 1.8340 (1.0371) acc 65.6250 (74.0000) lr 4.8943e-05 eta 0:27:20 +epoch [47/50] batch [155/500] time 0.871 (0.887) data 0.000 (0.005) loss 0.7197 (1.0295) acc 78.1250 (74.2137) lr 4.8943e-05 eta 0:27:16 +epoch [47/50] batch [160/500] time 0.910 (0.887) data 0.000 (0.005) loss 0.7144 (1.0301) acc 78.1250 (74.1602) lr 4.8943e-05 eta 0:27:11 +epoch [47/50] batch [165/500] time 0.907 (0.887) data 0.000 (0.005) loss 0.8818 (1.0350) acc 78.1250 (74.0909) lr 4.8943e-05 eta 0:27:07 +epoch [47/50] batch [170/500] time 0.874 (0.887) data 0.000 (0.004) loss 0.8052 (1.0326) acc 75.0000 (74.0441) lr 4.8943e-05 eta 0:27:03 +epoch [47/50] batch [175/500] time 0.878 (0.887) data 0.000 (0.004) loss 1.1123 (1.0322) acc 62.5000 (74.0000) lr 4.8943e-05 eta 0:26:58 +epoch [47/50] batch [180/500] time 0.875 (0.887) data 0.000 (0.004) loss 0.7979 (1.0374) acc 84.3750 (73.9062) lr 4.8943e-05 eta 0:26:54 +epoch [47/50] batch [185/500] time 0.877 (0.887) data 0.000 (0.004) loss 1.3359 (1.0401) acc 71.8750 (73.7331) lr 4.8943e-05 eta 0:26:49 +epoch [47/50] batch [190/500] time 0.984 (0.888) data 0.000 (0.004) loss 0.9395 (1.0381) acc 78.1250 (73.8322) lr 4.8943e-05 eta 0:26:46 +epoch [47/50] batch [195/500] time 0.848 (0.887) data 0.000 (0.004) loss 1.4814 (1.0443) acc 56.2500 (73.6218) lr 4.8943e-05 eta 0:26:41 +epoch [47/50] batch [200/500] time 0.889 (0.888) data 0.000 (0.004) loss 1.0000 (1.0454) acc 71.8750 (73.6562) lr 4.8943e-05 eta 0:26:37 +epoch [47/50] batch [205/500] time 0.916 (0.888) data 0.000 (0.004) loss 0.8018 (1.0475) acc 75.0000 (73.6433) lr 4.8943e-05 eta 0:26:33 +epoch [47/50] batch [210/500] time 0.940 (0.888) data 0.000 (0.004) loss 0.9336 (1.0477) acc 78.1250 (73.6458) lr 4.8943e-05 eta 0:26:29 +epoch [47/50] batch [215/500] time 0.863 (0.887) data 0.000 (0.004) loss 0.8984 (1.0534) acc 68.7500 (73.4593) lr 4.8943e-05 eta 0:26:24 +epoch [47/50] batch [220/500] time 0.894 (0.887) data 0.000 (0.004) loss 0.8813 (1.0508) acc 75.0000 (73.5511) lr 4.8943e-05 eta 0:26:19 +epoch [47/50] batch [225/500] time 0.884 (0.887) data 0.000 (0.003) loss 0.5166 (1.0499) acc 84.3750 (73.6528) lr 4.8943e-05 eta 0:26:14 +epoch [47/50] batch [230/500] time 0.869 (0.887) data 0.000 (0.003) loss 1.4102 (1.0496) acc 68.7500 (73.6957) lr 4.8943e-05 eta 0:26:09 +epoch [47/50] batch [235/500] time 0.893 (0.887) data 0.000 (0.003) loss 0.9849 (1.0513) acc 68.7500 (73.6702) lr 4.8943e-05 eta 0:26:06 +epoch [47/50] batch [240/500] time 0.872 (0.887) data 0.000 (0.003) loss 0.9165 (1.0514) acc 71.8750 (73.6589) lr 4.8943e-05 eta 0:26:01 +epoch [47/50] batch [245/500] time 0.873 (0.887) data 0.000 (0.003) loss 1.5801 (1.0535) acc 59.3750 (73.5842) lr 4.8943e-05 eta 0:25:56 +epoch [47/50] batch [250/500] time 0.891 (0.887) data 0.000 (0.003) loss 1.5586 (1.0529) acc 59.3750 (73.5500) lr 4.8943e-05 eta 0:25:51 +epoch [47/50] batch [255/500] time 0.883 (0.887) data 0.000 (0.003) loss 0.9810 (1.0471) acc 71.8750 (73.6520) lr 4.8943e-05 eta 0:25:47 +epoch [47/50] batch [260/500] time 0.900 (0.886) data 0.000 (0.003) loss 0.7515 (1.0421) acc 75.0000 (73.7740) lr 4.8943e-05 eta 0:25:42 +epoch [47/50] batch [265/500] time 0.927 (0.886) data 0.000 (0.003) loss 0.9873 (1.0392) acc 81.2500 (73.8443) lr 4.8943e-05 eta 0:25:37 +epoch [47/50] batch [270/500] time 0.871 (0.886) data 0.000 (0.003) loss 1.0166 (1.0384) acc 81.2500 (73.8773) lr 4.8943e-05 eta 0:25:33 +epoch [47/50] batch [275/500] time 0.869 (0.886) data 0.000 (0.003) loss 1.1240 (1.0444) acc 71.8750 (73.8068) lr 4.8943e-05 eta 0:25:28 +epoch [47/50] batch [280/500] time 0.884 (0.886) data 0.000 (0.003) loss 0.9043 (1.0413) acc 75.0000 (73.8728) lr 4.8943e-05 eta 0:25:23 +epoch [47/50] batch [285/500] time 0.896 (0.886) data 0.000 (0.003) loss 1.3037 (1.0416) acc 56.2500 (73.8048) lr 4.8943e-05 eta 0:25:18 +epoch [47/50] batch [290/500] time 0.895 (0.885) data 0.000 (0.003) loss 1.6309 (1.0415) acc 71.8750 (73.8685) lr 4.8943e-05 eta 0:25:14 +epoch [47/50] batch [295/500] time 0.870 (0.885) data 0.000 (0.003) loss 0.6455 (1.0403) acc 87.5000 (73.9513) lr 4.8943e-05 eta 0:25:09 +epoch [47/50] batch [300/500] time 0.863 (0.885) data 0.000 (0.003) loss 1.2520 (1.0417) acc 68.7500 (73.9688) lr 4.8943e-05 eta 0:25:05 +epoch [47/50] batch [305/500] time 0.878 (0.885) data 0.000 (0.003) loss 0.9966 (1.0403) acc 68.7500 (73.9652) lr 4.8943e-05 eta 0:25:00 +epoch [47/50] batch [310/500] time 0.881 (0.886) data 0.000 (0.003) loss 1.3818 (1.0426) acc 71.8750 (73.9315) lr 4.8943e-05 eta 0:24:56 +epoch [47/50] batch [315/500] time 0.876 (0.886) data 0.000 (0.003) loss 1.4463 (1.0414) acc 65.6250 (73.9782) lr 4.8943e-05 eta 0:24:52 +epoch [47/50] batch [320/500] time 0.884 (0.886) data 0.000 (0.002) loss 0.9658 (1.0396) acc 78.1250 (74.0527) lr 4.8943e-05 eta 0:24:47 +epoch [47/50] batch [325/500] time 0.879 (0.885) data 0.000 (0.002) loss 0.9736 (1.0397) acc 75.0000 (74.0385) lr 4.8943e-05 eta 0:24:43 +epoch [47/50] batch [330/500] time 0.916 (0.886) data 0.000 (0.002) loss 1.2969 (1.0396) acc 68.7500 (74.0341) lr 4.8943e-05 eta 0:24:38 +epoch [47/50] batch [335/500] time 0.869 (0.886) data 0.000 (0.002) loss 1.2568 (1.0398) acc 68.7500 (74.0299) lr 4.8943e-05 eta 0:24:35 +epoch [47/50] batch [340/500] time 0.850 (0.886) data 0.000 (0.002) loss 1.4199 (1.0409) acc 68.7500 (73.9982) lr 4.8943e-05 eta 0:24:30 +epoch [47/50] batch [345/500] time 0.866 (0.885) data 0.000 (0.002) loss 0.4253 (1.0420) acc 87.5000 (73.9493) lr 4.8943e-05 eta 0:24:25 +epoch [47/50] batch [350/500] time 0.878 (0.885) data 0.000 (0.002) loss 1.1807 (1.0379) acc 62.5000 (73.9643) lr 4.8943e-05 eta 0:24:20 +epoch [47/50] batch [355/500] time 0.897 (0.886) data 0.000 (0.002) loss 0.9453 (1.0371) acc 75.0000 (73.9965) lr 4.8943e-05 eta 0:24:16 +epoch [47/50] batch [360/500] time 0.883 (0.885) data 0.000 (0.002) loss 1.1191 (1.0388) acc 75.0000 (74.0017) lr 4.8943e-05 eta 0:24:12 +epoch [47/50] batch [365/500] time 0.921 (0.886) data 0.000 (0.002) loss 1.4141 (1.0409) acc 71.8750 (73.9897) lr 4.8943e-05 eta 0:24:07 +epoch [47/50] batch [370/500] time 0.875 (0.886) data 0.000 (0.002) loss 0.5269 (1.0410) acc 81.2500 (73.9865) lr 4.8943e-05 eta 0:24:03 +epoch [47/50] batch [375/500] time 0.883 (0.886) data 0.000 (0.002) loss 1.3242 (1.0399) acc 71.8750 (74.0250) lr 4.8943e-05 eta 0:23:59 +epoch [47/50] batch [380/500] time 0.883 (0.886) data 0.000 (0.002) loss 1.4170 (1.0392) acc 62.5000 (73.9885) lr 4.8943e-05 eta 0:23:55 +epoch [47/50] batch [385/500] time 0.897 (0.886) data 0.000 (0.002) loss 1.0430 (1.0388) acc 68.7500 (73.9935) lr 4.8943e-05 eta 0:23:50 +epoch [47/50] batch [390/500] time 0.897 (0.886) data 0.000 (0.002) loss 0.4329 (1.0420) acc 87.5000 (74.0064) lr 4.8943e-05 eta 0:23:46 +epoch [47/50] batch [395/500] time 0.902 (0.886) data 0.000 (0.002) loss 0.8989 (1.0390) acc 78.1250 (74.0823) lr 4.8943e-05 eta 0:23:41 +epoch [47/50] batch [400/500] time 0.896 (0.886) data 0.000 (0.002) loss 0.4871 (1.0376) acc 87.5000 (74.1094) lr 4.8943e-05 eta 0:23:37 +epoch [47/50] batch [405/500] time 0.897 (0.886) data 0.000 (0.002) loss 1.3359 (1.0391) acc 71.8750 (74.0741) lr 4.8943e-05 eta 0:23:32 +epoch [47/50] batch [410/500] time 0.902 (0.886) data 0.000 (0.002) loss 1.0918 (1.0387) acc 75.0000 (74.0701) lr 4.8943e-05 eta 0:23:28 +epoch [47/50] batch [415/500] time 0.882 (0.886) data 0.000 (0.002) loss 0.9033 (1.0374) acc 75.0000 (74.1190) lr 4.8943e-05 eta 0:23:23 +epoch [47/50] batch [420/500] time 0.852 (0.886) data 0.000 (0.002) loss 1.0205 (1.0387) acc 75.0000 (74.0699) lr 4.8943e-05 eta 0:23:19 +epoch [47/50] batch [425/500] time 0.884 (0.886) data 0.000 (0.002) loss 1.0850 (1.0390) acc 68.7500 (74.0515) lr 4.8943e-05 eta 0:23:15 +epoch [47/50] batch [430/500] time 0.927 (0.886) data 0.000 (0.002) loss 1.0234 (1.0386) acc 62.5000 (74.0480) lr 4.8943e-05 eta 0:23:10 +epoch [47/50] batch [435/500] time 0.899 (0.886) data 0.000 (0.002) loss 0.9019 (1.0387) acc 78.1250 (74.0805) lr 4.8943e-05 eta 0:23:06 +epoch [47/50] batch [440/500] time 0.911 (0.886) data 0.000 (0.002) loss 0.8501 (1.0410) acc 84.3750 (74.0483) lr 4.8943e-05 eta 0:23:01 +epoch [47/50] batch [445/500] time 0.879 (0.886) data 0.000 (0.002) loss 1.2422 (1.0405) acc 65.6250 (74.0660) lr 4.8943e-05 eta 0:22:57 +epoch [47/50] batch [450/500] time 0.884 (0.886) data 0.000 (0.002) loss 1.1611 (1.0417) acc 59.3750 (74.0069) lr 4.8943e-05 eta 0:22:52 +epoch [47/50] batch [455/500] time 0.865 (0.886) data 0.000 (0.002) loss 1.2715 (1.0424) acc 71.8750 (73.9766) lr 4.8943e-05 eta 0:22:48 +epoch [47/50] batch [460/500] time 0.901 (0.886) data 0.000 (0.002) loss 1.2412 (1.0448) acc 78.1250 (73.9674) lr 4.8943e-05 eta 0:22:43 +epoch [47/50] batch [465/500] time 0.869 (0.886) data 0.000 (0.002) loss 1.4277 (1.0451) acc 65.6250 (73.9583) lr 4.8943e-05 eta 0:22:39 +epoch [47/50] batch [470/500] time 0.878 (0.885) data 0.000 (0.002) loss 1.1660 (1.0469) acc 68.7500 (73.9029) lr 4.8943e-05 eta 0:22:34 +epoch [47/50] batch [475/500] time 0.866 (0.885) data 0.000 (0.002) loss 1.1719 (1.0451) acc 68.7500 (73.9211) lr 4.8943e-05 eta 0:22:30 +epoch [47/50] batch [480/500] time 0.897 (0.886) data 0.000 (0.002) loss 1.5439 (1.0444) acc 78.1250 (73.9583) lr 4.8943e-05 eta 0:22:25 +epoch [47/50] batch [485/500] time 0.863 (0.885) data 0.001 (0.002) loss 1.1367 (1.0451) acc 78.1250 (73.9820) lr 4.8943e-05 eta 0:22:21 +epoch [47/50] batch [490/500] time 0.867 (0.885) data 0.000 (0.002) loss 0.9868 (1.0424) acc 71.8750 (74.0115) lr 4.8943e-05 eta 0:22:16 +epoch [47/50] batch [495/500] time 0.902 (0.885) data 0.000 (0.002) loss 0.8203 (1.0413) acc 87.5000 (74.0720) lr 4.8943e-05 eta 0:22:12 +epoch [47/50] batch [500/500] time 0.881 (0.886) data 0.000 (0.002) loss 0.9199 (1.0411) acc 78.1250 (74.0938) lr 3.1417e-05 eta 0:22:08 +epoch [48/50] batch [5/500] time 0.895 (1.018) data 0.000 (0.129) loss 1.1367 (1.0168) acc 71.8750 (71.2500) lr 3.1417e-05 eta 0:25:22 +epoch [48/50] batch [10/500] time 0.883 (0.952) data 0.000 (0.065) loss 0.4167 (0.9229) acc 87.5000 (73.7500) lr 3.1417e-05 eta 0:23:38 +epoch [48/50] batch [15/500] time 0.860 (0.929) data 0.000 (0.043) loss 1.2422 (0.9997) acc 71.8750 (72.7083) lr 3.1417e-05 eta 0:23:00 +epoch [48/50] batch [20/500] time 0.869 (0.918) data 0.000 (0.032) loss 1.4473 (1.0823) acc 71.8750 (72.0312) lr 3.1417e-05 eta 0:22:38 +epoch [48/50] batch [25/500] time 0.888 (0.910) data 0.000 (0.026) loss 1.1211 (1.0772) acc 81.2500 (72.5000) lr 3.1417e-05 eta 0:22:22 +epoch [48/50] batch [30/500] time 0.873 (0.908) data 0.000 (0.022) loss 1.1787 (1.0386) acc 78.1250 (72.9167) lr 3.1417e-05 eta 0:22:14 +epoch [48/50] batch [35/500] time 0.873 (0.902) data 0.000 (0.019) loss 1.3955 (1.0494) acc 50.0000 (72.5000) lr 3.1417e-05 eta 0:22:01 +epoch [48/50] batch [40/500] time 0.872 (0.899) data 0.000 (0.016) loss 1.1982 (1.0264) acc 65.6250 (72.9688) lr 3.1417e-05 eta 0:21:52 +epoch [48/50] batch [45/500] time 0.856 (0.897) data 0.000 (0.015) loss 0.5288 (1.0173) acc 78.1250 (73.4722) lr 3.1417e-05 eta 0:21:44 +epoch [48/50] batch [50/500] time 0.888 (0.896) data 0.000 (0.013) loss 0.7280 (1.0196) acc 84.3750 (73.3125) lr 3.1417e-05 eta 0:21:39 +epoch [48/50] batch [55/500] time 0.887 (0.894) data 0.000 (0.012) loss 0.5645 (1.0120) acc 81.2500 (73.4659) lr 3.1417e-05 eta 0:21:32 +epoch [48/50] batch [60/500] time 0.859 (0.893) data 0.001 (0.011) loss 0.5449 (0.9996) acc 84.3750 (73.7500) lr 3.1417e-05 eta 0:21:25 +epoch [48/50] batch [65/500] time 0.837 (0.891) data 0.000 (0.010) loss 1.4541 (1.0116) acc 65.6250 (73.3654) lr 3.1417e-05 eta 0:21:18 +epoch [48/50] batch [70/500] time 0.870 (0.891) data 0.000 (0.009) loss 1.2832 (1.0288) acc 71.8750 (73.1696) lr 3.1417e-05 eta 0:21:13 +epoch [48/50] batch [75/500] time 0.874 (0.891) data 0.000 (0.009) loss 0.9595 (1.0287) acc 78.1250 (72.8333) lr 3.1417e-05 eta 0:21:09 +epoch [48/50] batch [80/500] time 0.888 (0.891) data 0.001 (0.008) loss 0.6582 (1.0333) acc 81.2500 (72.5781) lr 3.1417e-05 eta 0:21:04 +epoch [48/50] batch [85/500] time 0.876 (0.890) data 0.000 (0.008) loss 1.2637 (1.0395) acc 71.8750 (72.5735) lr 3.1417e-05 eta 0:20:59 +epoch [48/50] batch [90/500] time 0.895 (0.889) data 0.000 (0.007) loss 1.4209 (1.0444) acc 75.0000 (72.5347) lr 3.1417e-05 eta 0:20:54 +epoch [48/50] batch [95/500] time 0.861 (0.889) data 0.000 (0.007) loss 1.2246 (1.0475) acc 78.1250 (72.5987) lr 3.1417e-05 eta 0:20:48 +epoch [48/50] batch [100/500] time 0.888 (0.889) data 0.000 (0.007) loss 1.8105 (1.0587) acc 56.2500 (72.3125) lr 3.1417e-05 eta 0:20:44 +epoch [48/50] batch [105/500] time 0.882 (0.888) data 0.000 (0.006) loss 1.4072 (1.0600) acc 68.7500 (72.3810) lr 3.1417e-05 eta 0:20:39 +epoch [48/50] batch [110/500] time 0.888 (0.888) data 0.000 (0.006) loss 1.1475 (1.0657) acc 68.7500 (72.3295) lr 3.1417e-05 eta 0:20:34 +epoch [48/50] batch [115/500] time 0.880 (0.888) data 0.000 (0.006) loss 1.1611 (1.0571) acc 75.0000 (72.4728) lr 3.1417e-05 eta 0:20:29 +epoch [48/50] batch [120/500] time 0.877 (0.888) data 0.000 (0.006) loss 1.5596 (1.0645) acc 53.1250 (72.3698) lr 3.1417e-05 eta 0:20:25 +epoch [48/50] batch [125/500] time 0.884 (0.888) data 0.000 (0.005) loss 1.3271 (1.0608) acc 75.0000 (72.5500) lr 3.1417e-05 eta 0:20:20 +epoch [48/50] batch [130/500] time 0.893 (0.888) data 0.000 (0.005) loss 1.3320 (1.0645) acc 71.8750 (72.5481) lr 3.1417e-05 eta 0:20:16 +epoch [48/50] batch [135/500] time 0.878 (0.887) data 0.000 (0.005) loss 0.7871 (1.0582) acc 68.7500 (72.6157) lr 3.1417e-05 eta 0:20:11 +epoch [48/50] batch [140/500] time 0.889 (0.888) data 0.000 (0.005) loss 1.6016 (1.0602) acc 59.3750 (72.5223) lr 3.1417e-05 eta 0:20:07 +epoch [48/50] batch [145/500] time 0.899 (0.887) data 0.000 (0.005) loss 1.0654 (1.0576) acc 78.1250 (72.6293) lr 3.1417e-05 eta 0:20:02 +epoch [48/50] batch [150/500] time 0.881 (0.887) data 0.001 (0.005) loss 1.6484 (1.0580) acc 65.6250 (72.7500) lr 3.1417e-05 eta 0:19:57 +epoch [48/50] batch [155/500] time 0.890 (0.887) data 0.000 (0.004) loss 0.9585 (1.0548) acc 75.0000 (72.8629) lr 3.1417e-05 eta 0:19:53 +epoch [48/50] batch [160/500] time 0.869 (0.887) data 0.000 (0.004) loss 1.0801 (1.0498) acc 71.8750 (72.9688) lr 3.1417e-05 eta 0:19:47 +epoch [48/50] batch [165/500] time 0.895 (0.886) data 0.000 (0.004) loss 0.6621 (1.0467) acc 81.2500 (73.0871) lr 3.1417e-05 eta 0:19:43 +epoch [48/50] batch [170/500] time 0.870 (0.886) data 0.000 (0.004) loss 0.8335 (1.0438) acc 65.6250 (73.0331) lr 3.1417e-05 eta 0:19:38 +epoch [48/50] batch [175/500] time 0.878 (0.887) data 0.000 (0.004) loss 1.3008 (1.0406) acc 62.5000 (73.1607) lr 3.1417e-05 eta 0:19:34 +epoch [48/50] batch [180/500] time 0.875 (0.887) data 0.000 (0.004) loss 0.7471 (1.0380) acc 84.3750 (73.3507) lr 3.1417e-05 eta 0:19:30 +epoch [48/50] batch [185/500] time 0.863 (0.887) data 0.000 (0.004) loss 0.3909 (1.0386) acc 84.3750 (73.3108) lr 3.1417e-05 eta 0:19:26 +epoch [48/50] batch [190/500] time 0.892 (0.887) data 0.000 (0.004) loss 0.8076 (1.0381) acc 75.0000 (73.3553) lr 3.1417e-05 eta 0:19:21 +epoch [48/50] batch [195/500] time 0.862 (0.887) data 0.000 (0.004) loss 0.6333 (1.0365) acc 81.2500 (73.3494) lr 3.1417e-05 eta 0:19:16 +epoch [48/50] batch [200/500] time 0.924 (0.887) data 0.000 (0.003) loss 1.0684 (1.0401) acc 78.1250 (73.2500) lr 3.1417e-05 eta 0:19:12 +epoch [48/50] batch [205/500] time 0.875 (0.887) data 0.000 (0.003) loss 1.4033 (1.0451) acc 68.7500 (73.2470) lr 3.1417e-05 eta 0:19:08 +epoch [48/50] batch [210/500] time 0.897 (0.887) data 0.000 (0.003) loss 0.6396 (1.0438) acc 81.2500 (73.3185) lr 3.1417e-05 eta 0:19:04 +epoch [48/50] batch [215/500] time 0.977 (0.887) data 0.000 (0.003) loss 0.9482 (1.0403) acc 78.1250 (73.4448) lr 3.1417e-05 eta 0:18:59 +epoch [48/50] batch [220/500] time 0.896 (0.887) data 0.000 (0.003) loss 1.2012 (1.0414) acc 75.0000 (73.4659) lr 3.1417e-05 eta 0:18:55 +epoch [48/50] batch [225/500] time 0.874 (0.887) data 0.000 (0.003) loss 1.0869 (1.0427) acc 71.8750 (73.4444) lr 3.1417e-05 eta 0:18:51 +epoch [48/50] batch [230/500] time 0.888 (0.887) data 0.000 (0.003) loss 1.5547 (1.0467) acc 62.5000 (73.3832) lr 3.1417e-05 eta 0:18:46 +epoch [48/50] batch [235/500] time 0.913 (0.887) data 0.000 (0.003) loss 0.5957 (1.0476) acc 81.2500 (73.3644) lr 3.1417e-05 eta 0:18:42 +epoch [48/50] batch [240/500] time 0.875 (0.887) data 0.000 (0.003) loss 0.8804 (1.0488) acc 87.5000 (73.4245) lr 3.1417e-05 eta 0:18:37 +epoch [48/50] batch [245/500] time 0.879 (0.887) data 0.000 (0.003) loss 0.9922 (1.0458) acc 71.8750 (73.4566) lr 3.1417e-05 eta 0:18:33 +epoch [48/50] batch [250/500] time 0.906 (0.887) data 0.000 (0.003) loss 1.0439 (1.0455) acc 78.1250 (73.5500) lr 3.1417e-05 eta 0:18:28 +epoch [48/50] batch [255/500] time 0.885 (0.887) data 0.000 (0.003) loss 1.0303 (1.0435) acc 71.8750 (73.6029) lr 3.1417e-05 eta 0:18:24 +epoch [48/50] batch [260/500] time 0.874 (0.887) data 0.000 (0.003) loss 1.2764 (1.0405) acc 71.8750 (73.7139) lr 3.1417e-05 eta 0:18:19 +epoch [48/50] batch [265/500] time 0.872 (0.887) data 0.000 (0.003) loss 0.9331 (1.0393) acc 78.1250 (73.7264) lr 3.1417e-05 eta 0:18:15 +epoch [48/50] batch [270/500] time 0.890 (0.887) data 0.000 (0.003) loss 0.5620 (1.0452) acc 87.5000 (73.7037) lr 3.1417e-05 eta 0:18:10 +epoch [48/50] batch [275/500] time 0.914 (0.887) data 0.001 (0.003) loss 1.4014 (1.0518) acc 65.6250 (73.5909) lr 3.1417e-05 eta 0:18:06 +epoch [48/50] batch [280/500] time 0.880 (0.887) data 0.000 (0.003) loss 1.1836 (1.0502) acc 81.2500 (73.6049) lr 3.1417e-05 eta 0:18:01 +epoch [48/50] batch [285/500] time 0.863 (0.886) data 0.000 (0.003) loss 1.6318 (1.0498) acc 56.2500 (73.5965) lr 3.1417e-05 eta 0:17:57 +epoch [48/50] batch [290/500] time 0.913 (0.887) data 0.000 (0.002) loss 1.2236 (1.0518) acc 71.8750 (73.5776) lr 3.1417e-05 eta 0:17:52 +epoch [48/50] batch [295/500] time 0.869 (0.887) data 0.000 (0.002) loss 0.9624 (1.0553) acc 87.5000 (73.5593) lr 3.1417e-05 eta 0:17:48 +epoch [48/50] batch [300/500] time 0.877 (0.887) data 0.000 (0.002) loss 1.1074 (1.0575) acc 71.8750 (73.5938) lr 3.1417e-05 eta 0:17:44 +epoch [48/50] batch [305/500] time 0.879 (0.887) data 0.000 (0.002) loss 1.0908 (1.0569) acc 78.1250 (73.5963) lr 3.1417e-05 eta 0:17:39 +epoch [48/50] batch [310/500] time 0.900 (0.886) data 0.000 (0.002) loss 1.4023 (1.0570) acc 65.6250 (73.5786) lr 3.1417e-05 eta 0:17:34 +epoch [48/50] batch [315/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.3994 (1.0559) acc 71.8750 (73.5813) lr 3.1417e-05 eta 0:17:30 +epoch [48/50] batch [320/500] time 0.882 (0.886) data 0.000 (0.002) loss 1.1924 (1.0563) acc 68.7500 (73.5547) lr 3.1417e-05 eta 0:17:26 +epoch [48/50] batch [325/500] time 0.933 (0.887) data 0.000 (0.002) loss 1.1553 (1.0565) acc 75.0000 (73.5385) lr 3.1417e-05 eta 0:17:21 +epoch [48/50] batch [330/500] time 0.896 (0.886) data 0.000 (0.002) loss 0.5996 (1.0546) acc 87.5000 (73.5701) lr 3.1417e-05 eta 0:17:17 +epoch [48/50] batch [335/500] time 0.906 (0.887) data 0.000 (0.002) loss 1.3086 (1.0542) acc 68.7500 (73.6567) lr 3.1417e-05 eta 0:17:12 +epoch [48/50] batch [340/500] time 0.884 (0.886) data 0.000 (0.002) loss 0.4604 (1.0551) acc 87.5000 (73.7224) lr 3.1417e-05 eta 0:17:08 +epoch [48/50] batch [345/500] time 0.893 (0.886) data 0.000 (0.002) loss 1.1875 (1.0561) acc 68.7500 (73.7319) lr 3.1417e-05 eta 0:17:03 +epoch [48/50] batch [350/500] time 0.898 (0.887) data 0.000 (0.002) loss 0.8677 (1.0557) acc 75.0000 (73.7411) lr 3.1417e-05 eta 0:16:59 +epoch [48/50] batch [355/500] time 0.882 (0.887) data 0.000 (0.002) loss 0.8232 (1.0579) acc 71.8750 (73.7060) lr 3.1417e-05 eta 0:16:55 +epoch [48/50] batch [360/500] time 0.891 (0.887) data 0.000 (0.002) loss 0.9028 (1.0553) acc 71.8750 (73.7326) lr 3.1417e-05 eta 0:16:51 +epoch [48/50] batch [365/500] time 0.925 (0.887) data 0.000 (0.002) loss 1.1289 (1.0541) acc 81.2500 (73.7243) lr 3.1417e-05 eta 0:16:46 +epoch [48/50] batch [370/500] time 0.870 (0.887) data 0.000 (0.002) loss 0.7158 (1.0548) acc 84.3750 (73.6909) lr 3.1417e-05 eta 0:16:42 +epoch [48/50] batch [375/500] time 0.897 (0.887) data 0.000 (0.002) loss 0.5879 (1.0521) acc 78.1250 (73.7167) lr 3.1417e-05 eta 0:16:37 +epoch [48/50] batch [380/500] time 0.900 (0.887) data 0.000 (0.002) loss 1.1328 (1.0525) acc 75.0000 (73.7253) lr 3.1417e-05 eta 0:16:33 +epoch [48/50] batch [385/500] time 0.893 (0.887) data 0.000 (0.002) loss 1.0752 (1.0514) acc 62.5000 (73.7175) lr 3.1417e-05 eta 0:16:28 +epoch [48/50] batch [390/500] time 0.895 (0.887) data 0.000 (0.002) loss 1.3994 (1.0523) acc 59.3750 (73.6619) lr 3.1417e-05 eta 0:16:24 +epoch [48/50] batch [395/500] time 0.892 (0.887) data 0.000 (0.002) loss 1.2354 (1.0517) acc 75.0000 (73.7263) lr 3.1417e-05 eta 0:16:19 +epoch [48/50] batch [400/500] time 0.860 (0.887) data 0.000 (0.002) loss 1.2559 (1.0499) acc 78.1250 (73.7812) lr 3.1417e-05 eta 0:16:15 +epoch [48/50] batch [405/500] time 0.862 (0.887) data 0.000 (0.002) loss 0.7065 (1.0507) acc 81.2500 (73.7500) lr 3.1417e-05 eta 0:16:11 +epoch [48/50] batch [410/500] time 0.871 (0.887) data 0.000 (0.002) loss 0.7378 (1.0489) acc 81.2500 (73.7957) lr 3.1417e-05 eta 0:16:06 +epoch [48/50] batch [415/500] time 0.900 (0.887) data 0.000 (0.002) loss 1.3906 (1.0462) acc 65.6250 (73.8253) lr 3.1417e-05 eta 0:16:01 +epoch [48/50] batch [420/500] time 0.864 (0.887) data 0.000 (0.002) loss 1.0332 (1.0470) acc 78.1250 (73.8021) lr 3.1417e-05 eta 0:15:57 +epoch [48/50] batch [425/500] time 0.896 (0.886) data 0.000 (0.002) loss 0.7412 (1.0454) acc 71.8750 (73.7941) lr 3.1417e-05 eta 0:15:52 +epoch [48/50] batch [430/500] time 0.884 (0.886) data 0.000 (0.002) loss 1.0732 (1.0471) acc 65.6250 (73.7427) lr 3.1417e-05 eta 0:15:48 +epoch [48/50] batch [435/500] time 0.892 (0.886) data 0.000 (0.002) loss 2.3652 (1.0525) acc 62.5000 (73.6853) lr 3.1417e-05 eta 0:15:43 +epoch [48/50] batch [440/500] time 0.889 (0.886) data 0.000 (0.002) loss 1.0518 (1.0497) acc 62.5000 (73.7003) lr 3.1417e-05 eta 0:15:39 +epoch [48/50] batch [445/500] time 0.887 (0.886) data 0.000 (0.002) loss 1.0020 (1.0499) acc 81.2500 (73.7430) lr 3.1417e-05 eta 0:15:34 +epoch [48/50] batch [450/500] time 0.856 (0.886) data 0.000 (0.002) loss 0.7661 (1.0477) acc 78.1250 (73.7917) lr 3.1417e-05 eta 0:15:30 +epoch [48/50] batch [455/500] time 0.886 (0.886) data 0.000 (0.002) loss 1.3516 (1.0487) acc 71.8750 (73.8187) lr 3.1417e-05 eta 0:15:26 +epoch [48/50] batch [460/500] time 0.888 (0.886) data 0.000 (0.002) loss 0.8579 (1.0486) acc 87.5000 (73.8247) lr 3.1417e-05 eta 0:15:21 +epoch [48/50] batch [465/500] time 0.886 (0.887) data 0.000 (0.002) loss 0.5547 (1.0477) acc 81.2500 (73.8441) lr 3.1417e-05 eta 0:15:17 +epoch [48/50] batch [470/500] time 0.913 (0.887) data 0.000 (0.002) loss 0.6211 (1.0475) acc 78.1250 (73.8298) lr 3.1417e-05 eta 0:15:13 +epoch [48/50] batch [475/500] time 0.899 (0.887) data 0.000 (0.002) loss 0.6089 (1.0452) acc 84.3750 (73.9013) lr 3.1417e-05 eta 0:15:08 +epoch [48/50] batch [480/500] time 0.925 (0.887) data 0.000 (0.002) loss 0.8950 (1.0462) acc 84.3750 (73.9258) lr 3.1417e-05 eta 0:15:04 +epoch [48/50] batch [485/500] time 0.911 (0.887) data 0.000 (0.002) loss 1.4551 (1.0467) acc 78.1250 (73.9626) lr 3.1417e-05 eta 0:15:00 +epoch [48/50] batch [490/500] time 0.857 (0.887) data 0.000 (0.002) loss 0.7256 (1.0469) acc 87.5000 (73.9477) lr 3.1417e-05 eta 0:14:55 +epoch [48/50] batch [495/500] time 0.882 (0.887) data 0.000 (0.002) loss 1.2471 (1.0452) acc 78.1250 (73.9773) lr 3.1417e-05 eta 0:14:51 +epoch [48/50] batch [500/500] time 0.898 (0.886) data 0.000 (0.002) loss 1.0557 (1.0441) acc 56.2500 (73.9938) lr 1.7713e-05 eta 0:14:46 +epoch [49/50] batch [5/500] time 0.881 (1.045) data 0.000 (0.146) loss 0.8208 (1.0228) acc 71.8750 (75.0000) lr 1.7713e-05 eta 0:17:20 +epoch [49/50] batch [10/500] time 0.862 (0.968) data 0.000 (0.073) loss 0.7944 (0.8980) acc 81.2500 (76.2500) lr 1.7713e-05 eta 0:15:57 +epoch [49/50] batch [15/500] time 0.893 (0.945) data 0.000 (0.049) loss 0.9487 (0.8679) acc 68.7500 (77.2917) lr 1.7713e-05 eta 0:15:30 +epoch [49/50] batch [20/500] time 0.916 (0.935) data 0.000 (0.037) loss 0.7842 (0.9473) acc 78.1250 (75.9375) lr 1.7713e-05 eta 0:15:16 +epoch [49/50] batch [25/500] time 0.862 (0.926) data 0.000 (0.029) loss 0.9121 (0.9822) acc 75.0000 (75.7500) lr 1.7713e-05 eta 0:15:03 +epoch [49/50] batch [30/500] time 0.906 (0.919) data 0.000 (0.024) loss 1.0889 (1.0058) acc 78.1250 (75.5208) lr 1.7713e-05 eta 0:14:51 +epoch [49/50] batch [35/500] time 0.875 (0.913) data 0.000 (0.021) loss 0.9854 (0.9935) acc 71.8750 (75.9821) lr 1.7713e-05 eta 0:14:41 +epoch [49/50] batch [40/500] time 0.883 (0.911) data 0.000 (0.018) loss 0.9604 (0.9785) acc 78.1250 (76.1719) lr 1.7713e-05 eta 0:14:34 +epoch [49/50] batch [45/500] time 0.991 (0.910) data 0.000 (0.016) loss 1.1250 (0.9920) acc 71.8750 (76.3889) lr 1.7713e-05 eta 0:14:28 +epoch [49/50] batch [50/500] time 0.892 (0.907) data 0.000 (0.015) loss 1.1816 (1.0072) acc 78.1250 (75.9375) lr 1.7713e-05 eta 0:14:22 +epoch [49/50] batch [55/500] time 0.865 (0.907) data 0.000 (0.013) loss 1.2734 (1.0169) acc 75.0000 (75.6250) lr 1.7713e-05 eta 0:14:16 +epoch [49/50] batch [60/500] time 0.896 (0.905) data 0.000 (0.012) loss 0.8306 (1.0239) acc 71.8750 (75.4688) lr 1.7713e-05 eta 0:14:10 +epoch [49/50] batch [65/500] time 0.868 (0.903) data 0.000 (0.011) loss 1.0039 (1.0207) acc 78.1250 (75.7692) lr 1.7713e-05 eta 0:14:04 +epoch [49/50] batch [70/500] time 0.917 (0.902) data 0.000 (0.011) loss 0.8452 (1.0448) acc 71.8750 (75.3125) lr 1.7713e-05 eta 0:13:59 +epoch [49/50] batch [75/500] time 0.871 (0.901) data 0.000 (0.010) loss 1.8584 (1.0392) acc 62.5000 (75.2083) lr 1.7713e-05 eta 0:13:53 +epoch [49/50] batch [80/500] time 0.860 (0.900) data 0.000 (0.009) loss 2.0176 (1.0403) acc 53.1250 (75.0781) lr 1.7713e-05 eta 0:13:47 +epoch [49/50] batch [85/500] time 0.900 (0.899) data 0.000 (0.009) loss 0.8271 (1.0435) acc 75.0000 (74.9632) lr 1.7713e-05 eta 0:13:42 +epoch [49/50] batch [90/500] time 0.873 (0.898) data 0.000 (0.008) loss 0.9292 (1.0343) acc 71.8750 (75.0000) lr 1.7713e-05 eta 0:13:37 +epoch [49/50] batch [95/500] time 0.875 (0.897) data 0.000 (0.008) loss 1.4873 (1.0452) acc 62.5000 (74.6382) lr 1.7713e-05 eta 0:13:31 +epoch [49/50] batch [100/500] time 0.866 (0.896) data 0.000 (0.007) loss 2.2754 (1.0583) acc 62.5000 (74.5312) lr 1.7713e-05 eta 0:13:26 +epoch [49/50] batch [105/500] time 0.881 (0.896) data 0.000 (0.007) loss 1.3350 (1.0629) acc 59.3750 (74.1667) lr 1.7713e-05 eta 0:13:21 +epoch [49/50] batch [110/500] time 0.883 (0.895) data 0.000 (0.007) loss 1.3057 (1.0684) acc 81.2500 (74.1193) lr 1.7713e-05 eta 0:13:16 +epoch [49/50] batch [115/500] time 0.916 (0.895) data 0.000 (0.007) loss 0.9653 (1.0650) acc 81.2500 (74.3207) lr 1.7713e-05 eta 0:13:12 +epoch [49/50] batch [120/500] time 0.898 (0.895) data 0.000 (0.006) loss 0.8120 (1.0568) acc 81.2500 (74.6094) lr 1.7713e-05 eta 0:13:07 +epoch [49/50] batch [125/500] time 0.880 (0.895) data 0.000 (0.006) loss 1.2100 (1.0621) acc 75.0000 (74.5500) lr 1.7713e-05 eta 0:13:02 +epoch [49/50] batch [130/500] time 0.895 (0.894) data 0.000 (0.006) loss 1.3145 (1.0681) acc 71.8750 (74.4471) lr 1.7713e-05 eta 0:12:58 +epoch [49/50] batch [135/500] time 0.897 (0.894) data 0.000 (0.006) loss 1.8965 (1.0792) acc 62.5000 (74.1898) lr 1.7713e-05 eta 0:12:53 +epoch [49/50] batch [140/500] time 0.867 (0.894) data 0.000 (0.005) loss 1.0225 (1.0730) acc 78.1250 (74.2188) lr 1.7713e-05 eta 0:12:49 +epoch [49/50] batch [145/500] time 0.894 (0.895) data 0.000 (0.005) loss 0.9023 (1.0757) acc 78.1250 (74.0948) lr 1.7713e-05 eta 0:12:44 +epoch [49/50] batch [150/500] time 0.889 (0.894) data 0.000 (0.005) loss 0.8115 (1.0759) acc 84.3750 (74.1667) lr 1.7713e-05 eta 0:12:39 +epoch [49/50] batch [155/500] time 0.907 (0.894) data 0.000 (0.005) loss 1.5596 (1.0715) acc 50.0000 (74.0524) lr 1.7713e-05 eta 0:12:35 +epoch [49/50] batch [160/500] time 0.869 (0.894) data 0.000 (0.005) loss 0.7012 (1.0636) acc 71.8750 (74.0820) lr 1.7713e-05 eta 0:12:30 +epoch [49/50] batch [165/500] time 0.857 (0.894) data 0.000 (0.005) loss 1.1924 (1.0654) acc 68.7500 (74.1098) lr 1.7713e-05 eta 0:12:26 +epoch [49/50] batch [170/500] time 0.917 (0.894) data 0.000 (0.005) loss 0.9067 (1.0644) acc 75.0000 (74.0993) lr 1.7713e-05 eta 0:12:21 +epoch [49/50] batch [175/500] time 0.878 (0.894) data 0.000 (0.004) loss 0.8569 (1.0606) acc 71.8750 (74.1071) lr 1.7713e-05 eta 0:12:17 +epoch [49/50] batch [180/500] time 0.855 (0.893) data 0.000 (0.004) loss 1.0537 (1.0600) acc 71.8750 (74.0451) lr 1.7713e-05 eta 0:12:12 +epoch [49/50] batch [185/500] time 0.894 (0.892) data 0.000 (0.004) loss 1.4375 (1.0571) acc 62.5000 (74.1723) lr 1.7713e-05 eta 0:12:07 +epoch [49/50] batch [190/500] time 0.900 (0.893) data 0.000 (0.004) loss 0.5801 (1.0537) acc 84.3750 (74.2105) lr 1.7713e-05 eta 0:12:03 +epoch [49/50] batch [195/500] time 0.868 (0.893) data 0.000 (0.004) loss 2.0820 (1.0579) acc 53.1250 (74.1346) lr 1.7713e-05 eta 0:11:58 +epoch [49/50] batch [200/500] time 0.871 (0.892) data 0.000 (0.004) loss 0.8975 (1.0520) acc 78.1250 (74.1719) lr 1.7713e-05 eta 0:11:53 +epoch [49/50] batch [205/500] time 0.900 (0.892) data 0.000 (0.004) loss 0.8413 (1.0570) acc 78.1250 (74.0549) lr 1.7713e-05 eta 0:11:48 +epoch [49/50] batch [210/500] time 0.878 (0.891) data 0.000 (0.004) loss 0.5845 (1.0563) acc 84.3750 (74.1667) lr 1.7713e-05 eta 0:11:44 +epoch [49/50] batch [215/500] time 0.906 (0.891) data 0.000 (0.004) loss 1.2783 (1.0576) acc 65.6250 (74.0843) lr 1.7713e-05 eta 0:11:39 +epoch [49/50] batch [220/500] time 0.872 (0.891) data 0.000 (0.004) loss 0.5557 (1.0572) acc 81.2500 (74.0483) lr 1.7713e-05 eta 0:11:34 +epoch [49/50] batch [225/500] time 0.867 (0.890) data 0.000 (0.003) loss 1.0596 (1.0587) acc 78.1250 (74.0694) lr 1.7713e-05 eta 0:11:29 +epoch [49/50] batch [230/500] time 0.882 (0.890) data 0.000 (0.003) loss 0.7129 (1.0554) acc 81.2500 (74.1168) lr 1.7713e-05 eta 0:11:25 +epoch [49/50] batch [235/500] time 0.872 (0.890) data 0.000 (0.003) loss 1.3750 (1.0539) acc 78.1250 (74.2021) lr 1.7713e-05 eta 0:11:20 +epoch [49/50] batch [240/500] time 0.876 (0.890) data 0.000 (0.003) loss 0.6382 (1.0503) acc 81.2500 (74.2057) lr 1.7713e-05 eta 0:11:16 +epoch [49/50] batch [245/500] time 0.911 (0.890) data 0.000 (0.003) loss 1.5176 (1.0540) acc 68.7500 (73.9923) lr 1.7713e-05 eta 0:11:11 +epoch [49/50] batch [250/500] time 0.903 (0.890) data 0.000 (0.003) loss 0.7471 (1.0535) acc 75.0000 (73.9750) lr 1.7713e-05 eta 0:11:07 +epoch [49/50] batch [255/500] time 0.883 (0.890) data 0.000 (0.003) loss 1.5225 (1.0584) acc 71.8750 (73.8235) lr 1.7713e-05 eta 0:11:03 +epoch [49/50] batch [260/500] time 0.873 (0.890) data 0.000 (0.003) loss 1.0488 (1.0577) acc 71.8750 (73.7981) lr 1.7713e-05 eta 0:10:58 +epoch [49/50] batch [265/500] time 0.867 (0.890) data 0.000 (0.003) loss 2.0059 (1.0606) acc 53.1250 (73.7146) lr 1.7713e-05 eta 0:10:54 +epoch [49/50] batch [270/500] time 0.882 (0.890) data 0.000 (0.003) loss 0.7979 (1.0638) acc 71.8750 (73.6111) lr 1.7713e-05 eta 0:10:49 +epoch [49/50] batch [275/500] time 0.868 (0.890) data 0.000 (0.003) loss 0.9019 (1.0622) acc 81.2500 (73.6023) lr 1.7713e-05 eta 0:10:44 +epoch [49/50] batch [280/500] time 0.886 (0.890) data 0.000 (0.003) loss 0.8364 (1.0644) acc 78.1250 (73.5826) lr 1.7713e-05 eta 0:10:40 +epoch [49/50] batch [285/500] time 0.898 (0.889) data 0.000 (0.003) loss 1.1348 (1.0651) acc 65.6250 (73.5197) lr 1.7713e-05 eta 0:10:35 +epoch [49/50] batch [290/500] time 0.879 (0.890) data 0.000 (0.003) loss 0.6182 (1.0658) acc 87.5000 (73.5237) lr 1.7713e-05 eta 0:10:31 +epoch [49/50] batch [295/500] time 0.889 (0.890) data 0.000 (0.003) loss 0.9819 (1.0642) acc 75.0000 (73.5699) lr 1.7713e-05 eta 0:10:27 +epoch [49/50] batch [300/500] time 0.903 (0.890) data 0.000 (0.003) loss 1.2051 (1.0620) acc 68.7500 (73.5625) lr 1.7713e-05 eta 0:10:22 +epoch [49/50] batch [305/500] time 0.907 (0.890) data 0.000 (0.003) loss 0.9834 (1.0617) acc 75.0000 (73.5656) lr 1.7713e-05 eta 0:10:18 +epoch [49/50] batch [310/500] time 0.914 (0.890) data 0.000 (0.003) loss 0.9731 (1.0630) acc 75.0000 (73.5282) lr 1.7713e-05 eta 0:10:14 +epoch [49/50] batch [315/500] time 0.910 (0.890) data 0.000 (0.003) loss 1.3350 (1.0629) acc 56.2500 (73.4325) lr 1.7713e-05 eta 0:10:09 +epoch [49/50] batch [320/500] time 0.884 (0.890) data 0.000 (0.003) loss 0.6831 (1.0622) acc 84.3750 (73.4570) lr 1.7713e-05 eta 0:10:05 +epoch [49/50] batch [325/500] time 0.870 (0.890) data 0.000 (0.002) loss 1.4014 (1.0628) acc 71.8750 (73.4423) lr 1.7713e-05 eta 0:10:00 +epoch [49/50] batch [330/500] time 0.880 (0.890) data 0.000 (0.002) loss 1.0752 (1.0630) acc 75.0000 (73.3996) lr 1.7713e-05 eta 0:09:56 +epoch [49/50] batch [335/500] time 0.919 (0.890) data 0.000 (0.002) loss 1.1260 (1.0651) acc 71.8750 (73.3675) lr 1.7713e-05 eta 0:09:52 +epoch [49/50] batch [340/500] time 0.875 (0.890) data 0.000 (0.002) loss 1.4316 (1.0679) acc 59.3750 (73.3180) lr 1.7713e-05 eta 0:09:47 +epoch [49/50] batch [345/500] time 0.877 (0.890) data 0.000 (0.002) loss 0.8276 (1.0665) acc 78.1250 (73.3605) lr 1.7713e-05 eta 0:09:42 +epoch [49/50] batch [350/500] time 0.877 (0.890) data 0.000 (0.002) loss 1.0264 (1.0661) acc 81.2500 (73.4018) lr 1.7713e-05 eta 0:09:38 +epoch [49/50] batch [355/500] time 0.870 (0.890) data 0.000 (0.002) loss 0.8252 (1.0641) acc 81.2500 (73.4683) lr 1.7713e-05 eta 0:09:33 +epoch [49/50] batch [360/500] time 0.877 (0.890) data 0.000 (0.002) loss 1.2793 (1.0630) acc 71.8750 (73.5243) lr 1.7713e-05 eta 0:09:29 +epoch [49/50] batch [365/500] time 0.883 (0.889) data 0.000 (0.002) loss 0.6895 (1.0653) acc 81.2500 (73.4760) lr 1.7713e-05 eta 0:09:24 +epoch [49/50] batch [370/500] time 0.879 (0.889) data 0.000 (0.002) loss 1.1309 (1.0646) acc 65.6250 (73.4459) lr 1.7713e-05 eta 0:09:20 +epoch [49/50] batch [375/500] time 0.894 (0.889) data 0.000 (0.002) loss 0.6133 (1.0618) acc 81.2500 (73.4583) lr 1.7713e-05 eta 0:09:15 +epoch [49/50] batch [380/500] time 0.864 (0.889) data 0.000 (0.002) loss 0.8833 (1.0591) acc 75.0000 (73.4951) lr 1.7713e-05 eta 0:09:11 +epoch [49/50] batch [385/500] time 0.882 (0.889) data 0.000 (0.002) loss 1.0811 (1.0598) acc 75.0000 (73.4821) lr 1.7713e-05 eta 0:09:06 +epoch [49/50] batch [390/500] time 0.859 (0.889) data 0.000 (0.002) loss 0.6821 (1.0584) acc 84.3750 (73.5016) lr 1.7713e-05 eta 0:09:02 +epoch [49/50] batch [395/500] time 0.883 (0.889) data 0.000 (0.002) loss 0.9082 (1.0572) acc 78.1250 (73.5206) lr 1.7713e-05 eta 0:08:57 +epoch [49/50] batch [400/500] time 0.852 (0.889) data 0.000 (0.002) loss 0.4871 (1.0580) acc 78.1250 (73.5078) lr 1.7713e-05 eta 0:08:53 +epoch [49/50] batch [405/500] time 0.872 (0.888) data 0.000 (0.002) loss 1.2129 (1.0563) acc 75.0000 (73.5340) lr 1.7713e-05 eta 0:08:48 +epoch [49/50] batch [410/500] time 0.860 (0.888) data 0.000 (0.002) loss 0.6353 (1.0547) acc 81.2500 (73.6204) lr 1.7713e-05 eta 0:08:43 +epoch [49/50] batch [415/500] time 0.851 (0.888) data 0.000 (0.002) loss 0.4954 (1.0539) acc 84.3750 (73.6370) lr 1.7713e-05 eta 0:08:39 +epoch [49/50] batch [420/500] time 0.870 (0.888) data 0.000 (0.002) loss 0.9277 (1.0507) acc 78.1250 (73.6905) lr 1.7713e-05 eta 0:08:34 +epoch [49/50] batch [425/500] time 0.894 (0.888) data 0.000 (0.002) loss 0.5723 (1.0464) acc 84.3750 (73.8088) lr 1.7713e-05 eta 0:08:30 +epoch [49/50] batch [430/500] time 0.996 (0.888) data 0.000 (0.002) loss 1.0439 (1.0464) acc 81.2500 (73.8227) lr 1.7713e-05 eta 0:08:26 +epoch [49/50] batch [435/500] time 0.895 (0.888) data 0.000 (0.002) loss 0.8247 (1.0457) acc 78.1250 (73.8793) lr 1.7713e-05 eta 0:08:21 +epoch [49/50] batch [440/500] time 0.883 (0.888) data 0.000 (0.002) loss 1.7881 (1.0464) acc 65.6250 (73.9062) lr 1.7713e-05 eta 0:08:17 +epoch [49/50] batch [445/500] time 0.870 (0.888) data 0.000 (0.002) loss 0.8301 (1.0474) acc 75.0000 (73.8413) lr 1.7713e-05 eta 0:08:12 +epoch [49/50] batch [450/500] time 0.868 (0.888) data 0.000 (0.002) loss 0.9893 (1.0462) acc 71.8750 (73.8681) lr 1.7713e-05 eta 0:08:08 +epoch [49/50] batch [455/500] time 0.872 (0.888) data 0.000 (0.002) loss 0.8252 (1.0445) acc 81.2500 (73.9354) lr 1.7713e-05 eta 0:08:03 +epoch [49/50] batch [460/500] time 0.890 (0.888) data 0.000 (0.002) loss 0.8638 (1.0441) acc 87.5000 (73.9742) lr 1.7713e-05 eta 0:07:59 +epoch [49/50] batch [465/500] time 0.854 (0.888) data 0.000 (0.002) loss 0.8770 (1.0421) acc 84.3750 (74.0390) lr 1.7713e-05 eta 0:07:54 +epoch [49/50] batch [470/500] time 0.883 (0.888) data 0.000 (0.002) loss 0.8823 (1.0427) acc 75.0000 (74.0226) lr 1.7713e-05 eta 0:07:50 +epoch [49/50] batch [475/500] time 0.861 (0.888) data 0.000 (0.002) loss 1.0479 (1.0421) acc 78.1250 (74.0263) lr 1.7713e-05 eta 0:07:46 +epoch [49/50] batch [480/500] time 0.892 (0.888) data 0.000 (0.002) loss 0.8467 (1.0409) acc 84.3750 (74.0560) lr 1.7713e-05 eta 0:07:41 +epoch [49/50] batch [485/500] time 0.863 (0.888) data 0.001 (0.002) loss 0.8721 (1.0416) acc 71.8750 (74.0528) lr 1.7713e-05 eta 0:07:37 +epoch [49/50] batch [490/500] time 0.888 (0.887) data 0.000 (0.002) loss 1.0107 (1.0407) acc 75.0000 (74.0497) lr 1.7713e-05 eta 0:07:32 +epoch [49/50] batch [495/500] time 0.870 (0.887) data 0.000 (0.002) loss 1.4727 (1.0429) acc 68.7500 (74.0341) lr 1.7713e-05 eta 0:07:28 +epoch [49/50] batch [500/500] time 0.891 (0.887) data 0.000 (0.002) loss 0.8745 (1.0415) acc 78.1250 (74.0563) lr 7.8853e-06 eta 0:07:23 +epoch [50/50] batch [5/500] time 0.871 (1.053) data 0.000 (0.158) loss 1.1934 (1.1330) acc 68.7500 (71.8750) lr 7.8853e-06 eta 0:08:41 +epoch [50/50] batch [10/500] time 0.864 (0.974) data 0.000 (0.079) loss 0.6318 (1.1857) acc 84.3750 (72.1875) lr 7.8853e-06 eta 0:07:57 +epoch [50/50] batch [15/500] time 0.897 (0.946) data 0.000 (0.053) loss 1.0459 (1.1751) acc 84.3750 (72.5000) lr 7.8853e-06 eta 0:07:38 +epoch [50/50] batch [20/500] time 0.882 (0.936) data 0.000 (0.040) loss 0.9585 (1.1016) acc 71.8750 (73.2812) lr 7.8853e-06 eta 0:07:29 +epoch [50/50] batch [25/500] time 0.848 (0.926) data 0.000 (0.032) loss 1.4287 (1.0790) acc 56.2500 (73.2500) lr 7.8853e-06 eta 0:07:19 +epoch [50/50] batch [30/500] time 0.909 (0.918) data 0.000 (0.027) loss 1.1719 (1.0780) acc 75.0000 (73.0208) lr 7.8853e-06 eta 0:07:11 +epoch [50/50] batch [35/500] time 0.866 (0.914) data 0.000 (0.023) loss 0.9482 (1.0681) acc 81.2500 (73.4821) lr 7.8853e-06 eta 0:07:05 +epoch [50/50] batch [40/500] time 0.869 (0.909) data 0.000 (0.020) loss 0.7896 (1.0822) acc 81.2500 (72.8125) lr 7.8853e-06 eta 0:06:58 +epoch [50/50] batch [45/500] time 0.921 (0.909) data 0.000 (0.018) loss 1.4580 (1.0893) acc 59.3750 (72.7083) lr 7.8853e-06 eta 0:06:53 +epoch [50/50] batch [50/500] time 0.881 (0.905) data 0.000 (0.016) loss 1.5918 (1.0908) acc 71.8750 (72.9375) lr 7.8853e-06 eta 0:06:47 +epoch [50/50] batch [55/500] time 0.857 (0.903) data 0.000 (0.015) loss 1.0713 (1.0704) acc 65.6250 (72.9545) lr 7.8853e-06 eta 0:06:41 +epoch [50/50] batch [60/500] time 0.887 (0.903) data 0.000 (0.013) loss 1.5195 (1.0832) acc 62.5000 (72.6042) lr 7.8853e-06 eta 0:06:37 +epoch [50/50] batch [65/500] time 0.903 (0.901) data 0.000 (0.012) loss 0.9712 (1.0704) acc 68.7500 (73.0288) lr 7.8853e-06 eta 0:06:32 +epoch [50/50] batch [70/500] time 0.874 (0.899) data 0.000 (0.012) loss 1.6104 (1.0859) acc 59.3750 (72.6339) lr 7.8853e-06 eta 0:06:26 +epoch [50/50] batch [75/500] time 0.885 (0.898) data 0.000 (0.011) loss 1.3174 (1.0831) acc 65.6250 (72.8333) lr 7.8853e-06 eta 0:06:21 +epoch [50/50] batch [80/500] time 0.905 (0.898) data 0.000 (0.010) loss 0.9580 (1.0794) acc 62.5000 (72.8125) lr 7.8853e-06 eta 0:06:16 +epoch [50/50] batch [85/500] time 0.883 (0.897) data 0.000 (0.010) loss 1.2021 (1.0827) acc 75.0000 (72.8309) lr 7.8853e-06 eta 0:06:12 +epoch [50/50] batch [90/500] time 0.853 (0.896) data 0.000 (0.009) loss 0.8755 (1.0787) acc 78.1250 (72.9861) lr 7.8853e-06 eta 0:06:07 +epoch [50/50] batch [95/500] time 0.857 (0.895) data 0.000 (0.009) loss 0.8696 (1.0722) acc 71.8750 (73.0263) lr 7.8853e-06 eta 0:06:02 +epoch [50/50] batch [100/500] time 0.874 (0.895) data 0.000 (0.008) loss 0.7588 (1.0632) acc 84.3750 (73.3125) lr 7.8853e-06 eta 0:05:57 +epoch [50/50] batch [105/500] time 0.880 (0.894) data 0.000 (0.008) loss 0.9155 (1.0506) acc 78.1250 (73.6012) lr 7.8853e-06 eta 0:05:53 +epoch [50/50] batch [110/500] time 0.879 (0.894) data 0.000 (0.007) loss 0.6250 (1.0570) acc 84.3750 (73.5795) lr 7.8853e-06 eta 0:05:48 +epoch [50/50] batch [115/500] time 0.885 (0.894) data 0.000 (0.007) loss 0.9614 (1.0554) acc 78.1250 (73.6413) lr 7.8853e-06 eta 0:05:44 +epoch [50/50] batch [120/500] time 0.880 (0.895) data 0.000 (0.007) loss 1.7510 (1.0748) acc 56.2500 (73.2552) lr 7.8853e-06 eta 0:05:40 +epoch [50/50] batch [125/500] time 0.884 (0.894) data 0.000 (0.007) loss 0.8071 (1.0787) acc 75.0000 (73.3500) lr 7.8853e-06 eta 0:05:35 +epoch [50/50] batch [130/500] time 0.887 (0.894) data 0.000 (0.006) loss 1.2637 (1.0698) acc 71.8750 (73.5096) lr 7.8853e-06 eta 0:05:30 +epoch [50/50] batch [135/500] time 0.910 (0.894) data 0.000 (0.006) loss 0.8799 (1.0654) acc 81.2500 (73.5880) lr 7.8853e-06 eta 0:05:26 +epoch [50/50] batch [140/500] time 0.876 (0.893) data 0.000 (0.006) loss 1.1260 (1.0613) acc 71.8750 (73.7277) lr 7.8853e-06 eta 0:05:21 +epoch [50/50] batch [145/500] time 0.900 (0.893) data 0.000 (0.006) loss 0.4546 (1.0541) acc 90.6250 (73.9871) lr 7.8853e-06 eta 0:05:16 +epoch [50/50] batch [150/500] time 0.861 (0.892) data 0.000 (0.006) loss 1.3994 (1.0530) acc 65.6250 (74.0000) lr 7.8853e-06 eta 0:05:12 +epoch [50/50] batch [155/500] time 0.888 (0.892) data 0.000 (0.005) loss 1.2061 (1.0468) acc 56.2500 (74.0524) lr 7.8853e-06 eta 0:05:07 +epoch [50/50] batch [160/500] time 0.885 (0.892) data 0.000 (0.005) loss 0.7603 (1.0431) acc 87.5000 (74.1016) lr 7.8853e-06 eta 0:05:03 +epoch [50/50] batch [165/500] time 0.874 (0.892) data 0.000 (0.005) loss 0.7456 (1.0329) acc 78.1250 (74.2614) lr 7.8853e-06 eta 0:04:58 +epoch [50/50] batch [170/500] time 0.900 (0.893) data 0.000 (0.005) loss 0.7754 (1.0291) acc 78.1250 (74.3199) lr 7.8853e-06 eta 0:04:54 +epoch [50/50] batch [175/500] time 0.909 (0.893) data 0.000 (0.005) loss 1.0664 (1.0286) acc 71.8750 (74.3214) lr 7.8853e-06 eta 0:04:50 +epoch [50/50] batch [180/500] time 0.872 (0.892) data 0.000 (0.005) loss 0.8892 (1.0235) acc 75.0000 (74.3403) lr 7.8853e-06 eta 0:04:45 +epoch [50/50] batch [185/500] time 0.871 (0.892) data 0.000 (0.005) loss 1.1406 (1.0225) acc 59.3750 (74.2736) lr 7.8853e-06 eta 0:04:41 +epoch [50/50] batch [190/500] time 0.870 (0.892) data 0.000 (0.004) loss 1.0879 (1.0243) acc 62.5000 (74.2270) lr 7.8853e-06 eta 0:04:36 +epoch [50/50] batch [195/500] time 0.861 (0.892) data 0.000 (0.004) loss 0.8062 (1.0247) acc 81.2500 (74.3109) lr 7.8853e-06 eta 0:04:31 +epoch [50/50] batch [200/500] time 0.880 (0.891) data 0.000 (0.004) loss 0.7769 (1.0244) acc 75.0000 (74.3438) lr 7.8853e-06 eta 0:04:27 +epoch [50/50] batch [205/500] time 0.859 (0.891) data 0.000 (0.004) loss 0.9521 (1.0256) acc 71.8750 (74.3293) lr 7.8853e-06 eta 0:04:22 +epoch [50/50] batch [210/500] time 0.864 (0.891) data 0.000 (0.004) loss 0.9180 (1.0225) acc 78.1250 (74.4345) lr 7.8853e-06 eta 0:04:18 +epoch [50/50] batch [215/500] time 0.883 (0.890) data 0.000 (0.004) loss 1.1445 (1.0220) acc 81.2500 (74.4477) lr 7.8853e-06 eta 0:04:13 +epoch [50/50] batch [220/500] time 0.872 (0.890) data 0.000 (0.004) loss 0.9736 (1.0216) acc 78.1250 (74.4744) lr 7.8853e-06 eta 0:04:09 +epoch [50/50] batch [225/500] time 0.888 (0.890) data 0.000 (0.004) loss 1.0918 (1.0223) acc 71.8750 (74.4444) lr 7.8853e-06 eta 0:04:04 +epoch [50/50] batch [230/500] time 0.869 (0.890) data 0.000 (0.004) loss 0.5869 (1.0213) acc 78.1250 (74.4429) lr 7.8853e-06 eta 0:04:00 +epoch [50/50] batch [235/500] time 0.844 (0.889) data 0.000 (0.004) loss 0.9683 (1.0237) acc 78.1250 (74.4548) lr 7.8853e-06 eta 0:03:55 +epoch [50/50] batch [240/500] time 0.883 (0.889) data 0.000 (0.004) loss 0.9966 (1.0230) acc 68.7500 (74.4141) lr 7.8853e-06 eta 0:03:51 +epoch [50/50] batch [245/500] time 0.885 (0.889) data 0.000 (0.003) loss 0.8672 (1.0270) acc 75.0000 (74.2602) lr 7.8853e-06 eta 0:03:46 +epoch [50/50] batch [250/500] time 0.899 (0.889) data 0.000 (0.003) loss 0.9111 (1.0292) acc 71.8750 (74.1875) lr 7.8853e-06 eta 0:03:42 +epoch [50/50] batch [255/500] time 0.880 (0.888) data 0.000 (0.003) loss 0.5938 (1.0275) acc 90.6250 (74.1912) lr 7.8853e-06 eta 0:03:37 +epoch [50/50] batch [260/500] time 0.976 (0.889) data 0.000 (0.003) loss 0.5479 (1.0235) acc 81.2500 (74.3029) lr 7.8853e-06 eta 0:03:33 +epoch [50/50] batch [265/500] time 0.886 (0.888) data 0.000 (0.003) loss 0.7329 (1.0197) acc 81.2500 (74.4222) lr 7.8853e-06 eta 0:03:28 +epoch [50/50] batch [270/500] time 0.852 (0.888) data 0.000 (0.003) loss 0.6299 (1.0176) acc 84.3750 (74.5602) lr 7.8853e-06 eta 0:03:24 +epoch [50/50] batch [275/500] time 0.906 (0.889) data 0.000 (0.003) loss 1.2500 (1.0187) acc 65.6250 (74.5000) lr 7.8853e-06 eta 0:03:19 +epoch [50/50] batch [280/500] time 0.859 (0.889) data 0.000 (0.003) loss 0.8413 (1.0204) acc 78.1250 (74.4420) lr 7.8853e-06 eta 0:03:15 +epoch [50/50] batch [285/500] time 0.900 (0.889) data 0.000 (0.003) loss 0.9761 (1.0197) acc 68.7500 (74.4298) lr 7.8853e-06 eta 0:03:11 +epoch [50/50] batch [290/500] time 0.881 (0.889) data 0.000 (0.003) loss 1.2617 (1.0203) acc 75.0000 (74.4720) lr 7.8853e-06 eta 0:03:06 +epoch [50/50] batch [295/500] time 0.888 (0.889) data 0.000 (0.003) loss 0.7168 (1.0192) acc 84.3750 (74.4809) lr 7.8853e-06 eta 0:03:02 +epoch [50/50] batch [300/500] time 0.878 (0.888) data 0.000 (0.003) loss 0.3928 (1.0183) acc 93.7500 (74.5417) lr 7.8853e-06 eta 0:02:57 +epoch [50/50] batch [305/500] time 0.865 (0.889) data 0.000 (0.003) loss 1.2617 (1.0221) acc 62.5000 (74.4057) lr 7.8853e-06 eta 0:02:53 +epoch [50/50] batch [310/500] time 0.878 (0.889) data 0.000 (0.003) loss 1.2236 (1.0242) acc 59.3750 (74.2944) lr 7.8853e-06 eta 0:02:48 +epoch [50/50] batch [315/500] time 0.880 (0.889) data 0.000 (0.003) loss 0.6465 (1.0224) acc 84.3750 (74.3254) lr 7.8853e-06 eta 0:02:44 +epoch [50/50] batch [320/500] time 0.866 (0.888) data 0.000 (0.003) loss 1.1729 (1.0246) acc 75.0000 (74.3164) lr 7.8853e-06 eta 0:02:39 +epoch [50/50] batch [325/500] time 0.910 (0.888) data 0.000 (0.003) loss 1.4521 (1.0266) acc 75.0000 (74.2885) lr 7.8853e-06 eta 0:02:35 +epoch [50/50] batch [330/500] time 0.866 (0.888) data 0.000 (0.003) loss 1.4150 (1.0259) acc 71.8750 (74.2992) lr 7.8853e-06 eta 0:02:30 +epoch [50/50] batch [335/500] time 0.868 (0.888) data 0.000 (0.003) loss 1.0039 (1.0296) acc 71.8750 (74.2444) lr 7.8853e-06 eta 0:02:26 +epoch [50/50] batch [340/500] time 0.869 (0.888) data 0.000 (0.003) loss 0.7695 (1.0300) acc 84.3750 (74.2647) lr 7.8853e-06 eta 0:02:22 +epoch [50/50] batch [345/500] time 0.868 (0.888) data 0.000 (0.003) loss 0.8403 (1.0309) acc 75.0000 (74.1848) lr 7.8853e-06 eta 0:02:17 +epoch [50/50] batch [350/500] time 0.861 (0.887) data 0.000 (0.002) loss 1.0820 (1.0318) acc 75.0000 (74.1696) lr 7.8853e-06 eta 0:02:13 +epoch [50/50] batch [355/500] time 0.868 (0.887) data 0.000 (0.002) loss 1.2852 (1.0345) acc 71.8750 (74.0493) lr 7.8853e-06 eta 0:02:08 +epoch [50/50] batch [360/500] time 0.909 (0.887) data 0.000 (0.002) loss 0.8677 (1.0345) acc 78.1250 (74.0799) lr 7.8853e-06 eta 0:02:04 +epoch [50/50] batch [365/500] time 0.885 (0.887) data 0.000 (0.002) loss 1.1846 (1.0339) acc 62.5000 (74.0582) lr 7.8853e-06 eta 0:01:59 +epoch [50/50] batch [370/500] time 0.891 (0.887) data 0.000 (0.002) loss 0.9053 (1.0332) acc 81.2500 (74.1132) lr 7.8853e-06 eta 0:01:55 +epoch [50/50] batch [375/500] time 0.909 (0.888) data 0.000 (0.002) loss 0.7832 (1.0316) acc 81.2500 (74.1333) lr 7.8853e-06 eta 0:01:50 +epoch [50/50] batch [380/500] time 0.880 (0.888) data 0.000 (0.002) loss 0.9209 (1.0281) acc 78.1250 (74.2188) lr 7.8853e-06 eta 0:01:46 +epoch [50/50] batch [385/500] time 0.873 (0.888) data 0.000 (0.002) loss 1.0312 (1.0258) acc 68.7500 (74.2614) lr 7.8853e-06 eta 0:01:42 +epoch [50/50] batch [390/500] time 0.849 (0.887) data 0.001 (0.002) loss 1.1807 (1.0234) acc 71.8750 (74.3189) lr 7.8853e-06 eta 0:01:37 +epoch [50/50] batch [395/500] time 0.875 (0.887) data 0.000 (0.002) loss 1.1494 (1.0261) acc 68.7500 (74.2405) lr 7.8853e-06 eta 0:01:33 +epoch [50/50] batch [400/500] time 0.894 (0.887) data 0.000 (0.002) loss 1.0176 (1.0271) acc 78.1250 (74.2422) lr 7.8853e-06 eta 0:01:28 +epoch [50/50] batch [405/500] time 0.885 (0.888) data 0.000 (0.002) loss 1.4971 (1.0259) acc 65.6250 (74.3133) lr 7.8853e-06 eta 0:01:24 +epoch [50/50] batch [410/500] time 0.872 (0.888) data 0.000 (0.002) loss 0.7031 (1.0242) acc 75.0000 (74.3521) lr 7.8853e-06 eta 0:01:19 +epoch [50/50] batch [415/500] time 0.898 (0.888) data 0.000 (0.002) loss 1.0791 (1.0258) acc 65.6250 (74.3072) lr 7.8853e-06 eta 0:01:15 +epoch [50/50] batch [420/500] time 0.893 (0.887) data 0.000 (0.002) loss 0.9976 (1.0279) acc 62.5000 (74.3080) lr 7.8853e-06 eta 0:01:10 +epoch [50/50] batch [425/500] time 0.886 (0.887) data 0.000 (0.002) loss 0.4375 (1.0277) acc 90.6250 (74.3088) lr 7.8853e-06 eta 0:01:06 +epoch [50/50] batch [430/500] time 0.881 (0.887) data 0.000 (0.002) loss 1.4160 (1.0291) acc 65.6250 (74.2587) lr 7.8853e-06 eta 0:01:02 +epoch [50/50] batch [435/500] time 0.891 (0.887) data 0.000 (0.002) loss 0.9116 (1.0286) acc 78.1250 (74.2672) lr 7.8853e-06 eta 0:00:57 +epoch [50/50] batch [440/500] time 0.873 (0.887) data 0.000 (0.002) loss 1.0273 (1.0294) acc 71.8750 (74.2116) lr 7.8853e-06 eta 0:00:53 +epoch [50/50] batch [445/500] time 0.891 (0.887) data 0.000 (0.002) loss 1.0371 (1.0290) acc 68.7500 (74.2135) lr 7.8853e-06 eta 0:00:48 +epoch [50/50] batch [450/500] time 0.868 (0.887) data 0.000 (0.002) loss 0.5454 (1.0298) acc 84.3750 (74.2083) lr 7.8853e-06 eta 0:00:44 +epoch [50/50] batch [455/500] time 0.889 (0.887) data 0.000 (0.002) loss 1.7773 (1.0320) acc 62.5000 (74.1690) lr 7.8853e-06 eta 0:00:39 +epoch [50/50] batch [460/500] time 0.870 (0.887) data 0.000 (0.002) loss 0.9399 (1.0316) acc 68.7500 (74.1644) lr 7.8853e-06 eta 0:00:35 +epoch [50/50] batch [465/500] time 0.859 (0.887) data 0.000 (0.002) loss 1.4922 (1.0343) acc 65.6250 (74.0927) lr 7.8853e-06 eta 0:00:31 +epoch [50/50] batch [470/500] time 0.875 (0.887) data 0.000 (0.002) loss 0.8403 (1.0355) acc 78.1250 (74.1090) lr 7.8853e-06 eta 0:00:26 +epoch [50/50] batch [475/500] time 0.905 (0.887) data 0.000 (0.002) loss 1.0254 (1.0343) acc 78.1250 (74.1053) lr 7.8853e-06 eta 0:00:22 +epoch [50/50] batch [480/500] time 0.912 (0.887) data 0.000 (0.002) loss 0.9800 (1.0332) acc 68.7500 (74.1081) lr 7.8853e-06 eta 0:00:17 +epoch [50/50] batch [485/500] time 0.871 (0.887) data 0.000 (0.002) loss 1.5898 (1.0337) acc 65.6250 (74.0915) lr 7.8853e-06 eta 0:00:13 +epoch [50/50] batch [490/500] time 0.884 (0.887) data 0.000 (0.002) loss 0.6924 (1.0333) acc 84.3750 (74.1071) lr 7.8853e-06 eta 0:00:08 +epoch [50/50] batch [495/500] time 0.895 (0.887) data 0.000 (0.002) loss 0.6357 (1.0333) acc 84.3750 (74.1162) lr 7.8853e-06 eta 0:00:04 +epoch [50/50] batch [500/500] time 0.893 (0.887) data 0.000 (0.002) loss 0.9790 (1.0338) acc 78.1250 (74.0563) lr 1.9733e-06 eta 0:00:00 +Checkpoint saved to output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50 +Finish training +Deploy the last-epoch model +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 39,082 +* accuracy: 78.2% +* error: 21.8% +* macro_f1: 77.7% +Elapsed: 6:12:38 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/checkpoint b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/checkpoint new file mode 100644 index 00000000..a9d493d3 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/checkpoint @@ -0,0 +1 @@ +model.pth.tar-50 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50 new file mode 100644 index 00000000..da6abaad Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed3/tensorboard/events.out.tfevents.1698993062.ckb-gpu-a.1686533.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed3/tensorboard/events.out.tfevents.1698993062.ckb-gpu-a.1686533.0 new file mode 100644 index 00000000..241652aa Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed3/tensorboard/events.out.tfevents.1698993062.ckb-gpu-a.1686533.0 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed1/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed1/log.txt new file mode 100644 index 00000000..30b32f66 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed1/log.txt @@ -0,0 +1,639 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_l14_ep50.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '1'] +output_dir: output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed1 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 1 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 1 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-L/14 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 50 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 8.4.0-3ubuntu2) 8.4.0 +Clang version: 10.0.0-4ubuntu1 +CMake version: version 3.23.2 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-113-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: Tesla V100-SXM2-32GB +GPU 1: Tesla V100-SXM2-32GB +GPU 2: Tesla V100-SXM2-32GB +GPU 3: Tesla V100-SXM2-32GB + +Nvidia driver version: 510.73.05 +cuDNN version: Probably one of the following: +/usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 +/usr/lib/x86_64-linux-gnu/libcudnn.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.4.1 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 46 bits physical, 48 bits virtual +CPU(s): 64 +On-line CPU(s) list: 0-63 +Thread(s) per core: 2 +Core(s) per socket: 16 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +CPU family: 6 +Model: 85 +Model name: Intel(R) Xeon(R) Gold 6242 CPU @ 2.80GHz +Stepping: 7 +CPU MHz: 1200.012 +CPU max MHz: 3900.0000 +CPU min MHz: 1200.0000 +BogoMIPS: 5600.00 +Virtualization: VT-x +L1d cache: 1 MiB +L1i cache: 1 MiB +L2 cache: 32 MiB +L3 cache: 44 MiB +NUMA node0 CPU(s): 0-15,32-47 +NUMA node1 CPU(s): 16-31,48-63 +Vulnerability Itlb multihit: KVM: Mitigation: Split huge pages +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Mitigation; TSX disabled +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cdp_l3 invpcid_single intel_ppin ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm mpx rdt_a avx512f avx512dq rdseed adx smap clflushopt clwb intel_pt avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts pku ospke avx512_vnni md_clear flush_l1d arch_capabilities + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Loading preprocessed few-shot data from /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_1-seed_1.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 1,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-L/14) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed1/tensorboard) +epoch [1/50] batch [5/31] time 1.340 (2.159) data 0.000 (0.165) loss 3.2520 (3.2875) acc 34.3750 (34.3750) lr 1.0000e-05 eta 0:55:36 +epoch [1/50] batch [10/31] time 1.344 (1.752) data 0.000 (0.082) loss 3.3301 (3.0771) acc 34.3750 (36.2500) lr 1.0000e-05 eta 0:44:57 +epoch [1/50] batch [15/31] time 1.354 (1.618) data 0.000 (0.055) loss 2.5000 (2.8858) acc 50.0000 (40.2083) lr 1.0000e-05 eta 0:41:23 +epoch [1/50] batch [20/31] time 1.355 (1.552) data 0.000 (0.041) loss 3.4648 (2.8345) acc 34.3750 (42.1875) lr 1.0000e-05 eta 0:39:35 +epoch [1/50] batch [25/31] time 1.377 (1.515) data 0.000 (0.033) loss 2.0664 (2.6438) acc 65.6250 (45.3750) lr 1.0000e-05 eta 0:38:30 +epoch [1/50] batch [30/31] time 1.363 (1.489) data 0.000 (0.028) loss 2.5957 (2.5393) acc 43.7500 (46.8750) lr 1.0000e-05 eta 0:37:43 +epoch [2/50] batch [5/31] time 1.366 (1.539) data 0.000 (0.176) loss 1.4180 (1.7137) acc 62.5000 (62.5000) lr 2.0000e-03 eta 0:38:50 +epoch [2/50] batch [10/31] time 1.358 (1.450) data 0.000 (0.088) loss 1.3145 (1.6289) acc 62.5000 (62.5000) lr 2.0000e-03 eta 0:36:27 +epoch [2/50] batch [15/31] time 1.347 (1.421) data 0.000 (0.059) loss 1.9111 (1.5553) acc 68.7500 (64.7917) lr 2.0000e-03 eta 0:35:36 +epoch [2/50] batch [20/31] time 1.360 (1.403) data 0.000 (0.044) loss 1.3291 (1.5576) acc 62.5000 (64.5312) lr 2.0000e-03 eta 0:35:03 +epoch [2/50] batch [25/31] time 1.362 (1.394) data 0.000 (0.035) loss 1.2266 (1.5649) acc 65.6250 (64.5000) lr 2.0000e-03 eta 0:34:42 +epoch [2/50] batch [30/31] time 1.355 (1.386) data 0.000 (0.030) loss 1.4561 (1.5499) acc 71.8750 (64.8958) lr 2.0000e-03 eta 0:34:23 +epoch [3/50] batch [5/31] time 1.358 (1.533) data 0.000 (0.176) loss 1.3398 (1.4328) acc 78.1250 (68.1250) lr 1.9980e-03 eta 0:37:53 +epoch [3/50] batch [10/31] time 1.345 (1.444) data 0.000 (0.088) loss 1.1631 (1.4499) acc 68.7500 (67.8125) lr 1.9980e-03 eta 0:35:34 +epoch [3/50] batch [15/31] time 1.350 (1.414) data 0.000 (0.059) loss 0.9927 (1.3484) acc 68.7500 (67.5000) lr 1.9980e-03 eta 0:34:42 +epoch [3/50] batch [20/31] time 1.343 (1.401) data 0.000 (0.044) loss 1.4072 (1.3630) acc 68.7500 (67.3438) lr 1.9980e-03 eta 0:34:15 +epoch [3/50] batch [25/31] time 1.369 (1.393) data 0.000 (0.036) loss 1.1680 (1.3121) acc 75.0000 (68.1250) lr 1.9980e-03 eta 0:33:57 +epoch [3/50] batch [30/31] time 1.360 (1.386) data 0.000 (0.030) loss 1.7451 (1.3339) acc 46.8750 (67.6042) lr 1.9980e-03 eta 0:33:41 +epoch [4/50] batch [5/31] time 1.366 (1.546) data 0.000 (0.182) loss 0.8345 (1.1864) acc 75.0000 (73.1250) lr 1.9921e-03 eta 0:37:24 +epoch [4/50] batch [10/31] time 1.371 (1.453) data 0.001 (0.091) loss 0.8857 (1.1329) acc 65.6250 (71.8750) lr 1.9921e-03 eta 0:35:01 +epoch [4/50] batch [15/31] time 1.358 (1.420) data 0.000 (0.061) loss 0.9395 (1.1826) acc 81.2500 (71.2500) lr 1.9921e-03 eta 0:34:07 +epoch [4/50] batch [20/31] time 1.361 (1.405) data 0.000 (0.046) loss 1.1533 (1.2199) acc 71.8750 (70.9375) lr 1.9921e-03 eta 0:33:39 +epoch [4/50] batch [25/31] time 1.372 (1.395) data 0.001 (0.037) loss 1.0127 (1.2429) acc 75.0000 (69.6250) lr 1.9921e-03 eta 0:33:18 +epoch [4/50] batch [30/31] time 1.371 (1.390) data 0.000 (0.031) loss 1.0830 (1.2102) acc 75.0000 (69.8958) lr 1.9921e-03 eta 0:33:03 +epoch [5/50] batch [5/31] time 1.338 (1.608) data 0.000 (0.211) loss 1.0117 (0.9483) acc 78.1250 (75.6250) lr 1.9823e-03 eta 0:38:05 +epoch [5/50] batch [10/31] time 1.360 (1.482) data 0.000 (0.106) loss 1.1328 (1.0974) acc 68.7500 (73.7500) lr 1.9823e-03 eta 0:34:58 +epoch [5/50] batch [15/31] time 1.393 (1.444) data 0.000 (0.071) loss 0.5415 (1.0721) acc 81.2500 (73.5417) lr 1.9823e-03 eta 0:33:57 +epoch [5/50] batch [20/31] time 1.356 (1.424) data 0.000 (0.053) loss 0.9604 (1.1032) acc 81.2500 (73.4375) lr 1.9823e-03 eta 0:33:22 +epoch [5/50] batch [25/31] time 1.364 (1.413) data 0.000 (0.042) loss 1.7881 (1.1727) acc 68.7500 (72.7500) lr 1.9823e-03 eta 0:32:59 +epoch [5/50] batch [30/31] time 1.352 (1.404) data 0.000 (0.035) loss 1.3682 (1.2318) acc 68.7500 (70.9375) lr 1.9823e-03 eta 0:32:40 +epoch [6/50] batch [5/31] time 1.374 (1.561) data 0.000 (0.190) loss 1.1152 (1.0459) acc 78.1250 (73.7500) lr 1.9686e-03 eta 0:36:10 +epoch [6/50] batch [10/31] time 1.363 (1.459) data 0.000 (0.095) loss 1.8027 (1.1712) acc 59.3750 (73.7500) lr 1.9686e-03 eta 0:33:41 +epoch [6/50] batch [15/31] time 1.367 (1.425) data 0.000 (0.064) loss 1.0078 (1.1298) acc 78.1250 (73.9583) lr 1.9686e-03 eta 0:32:47 +epoch [6/50] batch [20/31] time 1.346 (1.416) data 0.000 (0.048) loss 0.8770 (1.1262) acc 81.2500 (72.9688) lr 1.9686e-03 eta 0:32:26 +epoch [6/50] batch [25/31] time 1.370 (1.405) data 0.000 (0.038) loss 1.1260 (1.1283) acc 71.8750 (72.3750) lr 1.9686e-03 eta 0:32:05 +epoch [6/50] batch [30/31] time 1.362 (1.397) data 0.000 (0.032) loss 1.1230 (1.1538) acc 71.8750 (72.3958) lr 1.9686e-03 eta 0:31:47 +epoch [7/50] batch [5/31] time 1.360 (1.553) data 0.000 (0.196) loss 1.2998 (1.3525) acc 71.8750 (66.2500) lr 1.9511e-03 eta 0:35:11 +epoch [7/50] batch [10/31] time 1.361 (1.461) data 0.000 (0.098) loss 0.8594 (1.1873) acc 71.8750 (70.3125) lr 1.9511e-03 eta 0:32:58 +epoch [7/50] batch [15/31] time 1.364 (1.428) data 0.000 (0.066) loss 0.8711 (1.1880) acc 71.8750 (70.2083) lr 1.9511e-03 eta 0:32:05 +epoch [7/50] batch [20/31] time 1.386 (1.413) data 0.000 (0.049) loss 1.2910 (1.1432) acc 68.7500 (71.0938) lr 1.9511e-03 eta 0:31:39 +epoch [7/50] batch [25/31] time 1.365 (1.405) data 0.000 (0.040) loss 1.2490 (1.1700) acc 68.7500 (70.2500) lr 1.9511e-03 eta 0:31:21 +epoch [7/50] batch [30/31] time 1.341 (1.402) data 0.000 (0.033) loss 1.9756 (1.1959) acc 59.3750 (70.1042) lr 1.9511e-03 eta 0:31:09 +epoch [8/50] batch [5/31] time 1.369 (1.539) data 0.000 (0.179) loss 1.3301 (1.0374) acc 65.6250 (71.8750) lr 1.9298e-03 eta 0:34:03 +epoch [8/50] batch [10/31] time 1.346 (1.446) data 0.000 (0.090) loss 1.2617 (1.1095) acc 71.8750 (70.3125) lr 1.9298e-03 eta 0:31:53 +epoch [8/50] batch [15/31] time 1.375 (1.416) data 0.000 (0.060) loss 0.8193 (1.0007) acc 75.0000 (72.9167) lr 1.9298e-03 eta 0:31:06 +epoch [8/50] batch [20/31] time 1.370 (1.401) data 0.000 (0.045) loss 0.6870 (1.0486) acc 78.1250 (72.5000) lr 1.9298e-03 eta 0:30:39 +epoch [8/50] batch [25/31] time 1.343 (1.392) data 0.000 (0.036) loss 1.0088 (1.0666) acc 78.1250 (71.8750) lr 1.9298e-03 eta 0:30:20 +epoch [8/50] batch [30/31] time 1.361 (1.387) data 0.000 (0.030) loss 0.8423 (1.0700) acc 78.1250 (72.0833) lr 1.9298e-03 eta 0:30:07 +epoch [9/50] batch [5/31] time 1.371 (1.546) data 0.000 (0.179) loss 1.7012 (1.0435) acc 62.5000 (73.7500) lr 1.9048e-03 eta 0:33:25 +epoch [9/50] batch [10/31] time 1.348 (1.466) data 0.000 (0.090) loss 1.9766 (1.1021) acc 65.6250 (74.0625) lr 1.9048e-03 eta 0:31:34 +epoch [9/50] batch [15/31] time 1.369 (1.430) data 0.000 (0.060) loss 1.6387 (1.1207) acc 62.5000 (72.9167) lr 1.9048e-03 eta 0:30:40 +epoch [9/50] batch [20/31] time 1.349 (1.413) data 0.000 (0.045) loss 1.4365 (1.1682) acc 53.1250 (70.6250) lr 1.9048e-03 eta 0:30:10 +epoch [9/50] batch [25/31] time 1.354 (1.402) data 0.000 (0.036) loss 1.3086 (1.1999) acc 68.7500 (70.0000) lr 1.9048e-03 eta 0:29:49 +epoch [9/50] batch [30/31] time 1.386 (1.396) data 0.000 (0.030) loss 1.9121 (1.2061) acc 56.2500 (70.1042) lr 1.9048e-03 eta 0:29:35 +epoch [10/50] batch [5/31] time 1.351 (1.549) data 0.001 (0.190) loss 0.9009 (1.0769) acc 81.2500 (73.1250) lr 1.8763e-03 eta 0:32:41 +epoch [10/50] batch [10/31] time 1.353 (1.451) data 0.000 (0.095) loss 1.5010 (1.0623) acc 62.5000 (73.7500) lr 1.8763e-03 eta 0:30:30 +epoch [10/50] batch [15/31] time 1.480 (1.428) data 0.000 (0.064) loss 1.5811 (1.1082) acc 65.6250 (72.9167) lr 1.8763e-03 eta 0:29:53 +epoch [10/50] batch [20/31] time 1.370 (1.411) data 0.001 (0.048) loss 1.0186 (1.0976) acc 78.1250 (73.1250) lr 1.8763e-03 eta 0:29:25 +epoch [10/50] batch [25/31] time 1.374 (1.403) data 0.000 (0.038) loss 1.0605 (1.0710) acc 78.1250 (73.5000) lr 1.8763e-03 eta 0:29:08 +epoch [10/50] batch [30/31] time 1.347 (1.396) data 0.000 (0.032) loss 0.9409 (1.0455) acc 81.2500 (73.3333) lr 1.8763e-03 eta 0:28:52 +epoch [11/50] batch [5/31] time 1.355 (1.553) data 0.000 (0.190) loss 1.1709 (1.1305) acc 62.5000 (68.1250) lr 1.8443e-03 eta 0:31:58 +epoch [11/50] batch [10/31] time 1.374 (1.457) data 0.000 (0.095) loss 1.6191 (1.1599) acc 65.6250 (69.3750) lr 1.8443e-03 eta 0:29:52 +epoch [11/50] batch [15/31] time 1.359 (1.423) data 0.000 (0.064) loss 1.0225 (1.0258) acc 84.3750 (73.9583) lr 1.8443e-03 eta 0:29:03 +epoch [11/50] batch [20/31] time 1.363 (1.409) data 0.000 (0.048) loss 0.9629 (1.0050) acc 65.6250 (74.8438) lr 1.8443e-03 eta 0:28:38 +epoch [11/50] batch [25/31] time 1.340 (1.404) data 0.000 (0.038) loss 1.5127 (1.0279) acc 62.5000 (74.2500) lr 1.8443e-03 eta 0:28:25 +epoch [11/50] batch [30/31] time 1.347 (1.397) data 0.000 (0.032) loss 1.1797 (1.0381) acc 81.2500 (73.7500) lr 1.8443e-03 eta 0:28:10 +epoch [12/50] batch [5/31] time 1.371 (1.539) data 0.000 (0.171) loss 1.4658 (1.2498) acc 78.1250 (71.2500) lr 1.8090e-03 eta 0:30:52 +epoch [12/50] batch [10/31] time 1.378 (1.451) data 0.000 (0.086) loss 1.0674 (1.0673) acc 71.8750 (74.0625) lr 1.8090e-03 eta 0:29:00 +epoch [12/50] batch [15/31] time 1.350 (1.420) data 0.000 (0.057) loss 0.8467 (1.0803) acc 71.8750 (73.3333) lr 1.8090e-03 eta 0:28:15 +epoch [12/50] batch [20/31] time 1.352 (1.402) data 0.000 (0.043) loss 0.8022 (1.0629) acc 81.2500 (74.5312) lr 1.8090e-03 eta 0:27:46 +epoch [12/50] batch [25/31] time 1.346 (1.393) data 0.000 (0.035) loss 1.0439 (1.1255) acc 78.1250 (72.7500) lr 1.8090e-03 eta 0:27:29 +epoch [12/50] batch [30/31] time 1.349 (1.387) data 0.000 (0.029) loss 0.7979 (1.1042) acc 71.8750 (72.3958) lr 1.8090e-03 eta 0:27:14 +epoch [13/50] batch [5/31] time 1.372 (1.576) data 0.000 (0.181) loss 1.2158 (1.2566) acc 78.1250 (69.3750) lr 1.7705e-03 eta 0:30:48 +epoch [13/50] batch [10/31] time 1.367 (1.472) data 0.000 (0.091) loss 1.1592 (1.1075) acc 68.7500 (71.8750) lr 1.7705e-03 eta 0:28:38 +epoch [13/50] batch [15/31] time 1.353 (1.436) data 0.000 (0.061) loss 0.8374 (1.0870) acc 75.0000 (73.7500) lr 1.7705e-03 eta 0:27:49 +epoch [13/50] batch [20/31] time 1.370 (1.419) data 0.000 (0.046) loss 0.9287 (1.0919) acc 75.0000 (72.8125) lr 1.7705e-03 eta 0:27:23 +epoch [13/50] batch [25/31] time 1.361 (1.407) data 0.000 (0.037) loss 2.1445 (1.1528) acc 62.5000 (71.5000) lr 1.7705e-03 eta 0:27:02 +epoch [13/50] batch [30/31] time 1.366 (1.399) data 0.000 (0.030) loss 0.6733 (1.1225) acc 84.3750 (72.0833) lr 1.7705e-03 eta 0:26:46 +epoch [14/50] batch [5/31] time 1.349 (1.618) data 0.000 (0.254) loss 0.7236 (0.8346) acc 68.7500 (75.0000) lr 1.7290e-03 eta 0:30:47 +epoch [14/50] batch [10/31] time 1.370 (1.487) data 0.000 (0.127) loss 0.6982 (0.8976) acc 81.2500 (76.5625) lr 1.7290e-03 eta 0:28:11 +epoch [14/50] batch [15/31] time 1.372 (1.447) data 0.000 (0.085) loss 1.0635 (1.0551) acc 75.0000 (74.5833) lr 1.7290e-03 eta 0:27:18 +epoch [14/50] batch [20/31] time 1.354 (1.435) data 0.000 (0.064) loss 0.9604 (1.0096) acc 68.7500 (74.6875) lr 1.7290e-03 eta 0:26:56 +epoch [14/50] batch [25/31] time 1.357 (1.420) data 0.000 (0.051) loss 0.4807 (1.0377) acc 87.5000 (73.8750) lr 1.7290e-03 eta 0:26:33 +epoch [14/50] batch [30/31] time 1.361 (1.411) data 0.000 (0.043) loss 0.6240 (1.0655) acc 81.2500 (73.7500) lr 1.7290e-03 eta 0:26:16 +epoch [15/50] batch [5/31] time 1.375 (1.539) data 0.000 (0.172) loss 0.5845 (0.8232) acc 81.2500 (78.7500) lr 1.6845e-03 eta 0:28:30 +epoch [15/50] batch [10/31] time 1.357 (1.451) data 0.000 (0.086) loss 0.8228 (0.9502) acc 65.6250 (75.6250) lr 1.6845e-03 eta 0:26:44 +epoch [15/50] batch [15/31] time 1.373 (1.423) data 0.000 (0.058) loss 1.1836 (0.9904) acc 68.7500 (73.9583) lr 1.6845e-03 eta 0:26:06 +epoch [15/50] batch [20/31] time 1.360 (1.405) data 0.000 (0.043) loss 0.6201 (0.9986) acc 87.5000 (73.9062) lr 1.6845e-03 eta 0:25:39 +epoch [15/50] batch [25/31] time 1.374 (1.398) data 0.000 (0.035) loss 1.1504 (0.9979) acc 78.1250 (73.8750) lr 1.6845e-03 eta 0:25:25 +epoch [15/50] batch [30/31] time 1.378 (1.394) data 0.000 (0.029) loss 1.1641 (1.0080) acc 84.3750 (74.0625) lr 1.6845e-03 eta 0:25:13 +epoch [16/50] batch [5/31] time 1.370 (1.545) data 0.000 (0.179) loss 1.0693 (0.8639) acc 68.7500 (76.2500) lr 1.6374e-03 eta 0:27:48 +epoch [16/50] batch [10/31] time 1.360 (1.454) data 0.000 (0.090) loss 0.9648 (0.9537) acc 81.2500 (75.0000) lr 1.6374e-03 eta 0:26:02 +epoch [16/50] batch [15/31] time 1.358 (1.421) data 0.000 (0.060) loss 1.2783 (0.9713) acc 65.6250 (75.2083) lr 1.6374e-03 eta 0:25:20 +epoch [16/50] batch [20/31] time 1.341 (1.405) data 0.000 (0.045) loss 0.8901 (0.9452) acc 62.5000 (75.0000) lr 1.6374e-03 eta 0:24:55 +epoch [16/50] batch [25/31] time 1.376 (1.398) data 0.000 (0.036) loss 0.7554 (0.9539) acc 78.1250 (75.1250) lr 1.6374e-03 eta 0:24:41 +epoch [16/50] batch [30/31] time 1.356 (1.391) data 0.000 (0.030) loss 0.5093 (0.9569) acc 84.3750 (75.3125) lr 1.6374e-03 eta 0:24:27 +epoch [17/50] batch [5/31] time 1.355 (1.556) data 0.000 (0.195) loss 1.4717 (1.3037) acc 65.6250 (68.7500) lr 1.5878e-03 eta 0:27:12 +epoch [17/50] batch [10/31] time 1.341 (1.456) data 0.000 (0.098) loss 0.7422 (1.0970) acc 84.3750 (73.4375) lr 1.5878e-03 eta 0:25:19 +epoch [17/50] batch [15/31] time 1.359 (1.427) data 0.000 (0.065) loss 1.0020 (1.0431) acc 75.0000 (73.9583) lr 1.5878e-03 eta 0:24:42 +epoch [17/50] batch [20/31] time 1.350 (1.416) data 0.000 (0.049) loss 0.6973 (1.0076) acc 81.2500 (75.7812) lr 1.5878e-03 eta 0:24:24 +epoch [17/50] batch [25/31] time 1.379 (1.406) data 0.000 (0.039) loss 0.9199 (1.0103) acc 71.8750 (75.5000) lr 1.5878e-03 eta 0:24:06 +epoch [17/50] batch [30/31] time 1.380 (1.398) data 0.000 (0.033) loss 0.9917 (0.9973) acc 84.3750 (75.8333) lr 1.5878e-03 eta 0:23:51 +epoch [18/50] batch [5/31] time 1.385 (1.557) data 0.000 (0.185) loss 1.0215 (0.9950) acc 71.8750 (75.0000) lr 1.5358e-03 eta 0:26:24 +epoch [18/50] batch [10/31] time 1.358 (1.462) data 0.000 (0.093) loss 1.4775 (1.0028) acc 65.6250 (75.9375) lr 1.5358e-03 eta 0:24:40 +epoch [18/50] batch [15/31] time 1.347 (1.431) data 0.000 (0.062) loss 1.0713 (0.9793) acc 71.8750 (76.8750) lr 1.5358e-03 eta 0:24:02 +epoch [18/50] batch [20/31] time 1.376 (1.414) data 0.000 (0.047) loss 1.1797 (0.9684) acc 71.8750 (76.2500) lr 1.5358e-03 eta 0:23:38 +epoch [18/50] batch [25/31] time 1.359 (1.403) data 0.000 (0.037) loss 1.0479 (0.9471) acc 75.0000 (76.8750) lr 1.5358e-03 eta 0:23:20 +epoch [18/50] batch [30/31] time 1.379 (1.401) data 0.000 (0.031) loss 0.5200 (0.9206) acc 87.5000 (77.3958) lr 1.5358e-03 eta 0:23:10 +epoch [19/50] batch [5/31] time 1.361 (1.553) data 0.000 (0.180) loss 0.9570 (0.9212) acc 71.8750 (76.8750) lr 1.4818e-03 eta 0:25:33 +epoch [19/50] batch [10/31] time 1.358 (1.459) data 0.000 (0.090) loss 0.5601 (0.9072) acc 90.6250 (77.1875) lr 1.4818e-03 eta 0:23:53 +epoch [19/50] batch [15/31] time 1.381 (1.428) data 0.000 (0.060) loss 0.9253 (0.9428) acc 78.1250 (76.4583) lr 1.4818e-03 eta 0:23:15 +epoch [19/50] batch [20/31] time 1.368 (1.414) data 0.000 (0.045) loss 0.5879 (0.9166) acc 87.5000 (76.7188) lr 1.4818e-03 eta 0:22:54 +epoch [19/50] batch [25/31] time 1.352 (1.402) data 0.000 (0.036) loss 0.8252 (0.9171) acc 84.3750 (76.8750) lr 1.4818e-03 eta 0:22:35 +epoch [19/50] batch [30/31] time 1.359 (1.395) data 0.000 (0.030) loss 1.0918 (0.8926) acc 68.7500 (77.0833) lr 1.4818e-03 eta 0:22:22 +epoch [20/50] batch [5/31] time 1.348 (1.546) data 0.001 (0.182) loss 0.2410 (0.9290) acc 93.7500 (75.0000) lr 1.4258e-03 eta 0:24:38 +epoch [20/50] batch [10/31] time 1.374 (1.470) data 0.000 (0.091) loss 0.5854 (0.8712) acc 81.2500 (75.6250) lr 1.4258e-03 eta 0:23:18 +epoch [20/50] batch [15/31] time 1.359 (1.435) data 0.000 (0.061) loss 0.9731 (0.8810) acc 71.8750 (75.2083) lr 1.4258e-03 eta 0:22:37 +epoch [20/50] batch [20/31] time 1.352 (1.414) data 0.000 (0.046) loss 1.2559 (0.9617) acc 81.2500 (74.2188) lr 1.4258e-03 eta 0:22:10 +epoch [20/50] batch [25/31] time 1.359 (1.401) data 0.000 (0.037) loss 1.2373 (0.9850) acc 68.7500 (74.1250) lr 1.4258e-03 eta 0:21:51 +epoch [20/50] batch [30/31] time 1.356 (1.395) data 0.000 (0.031) loss 1.3750 (0.9774) acc 71.8750 (74.4792) lr 1.4258e-03 eta 0:21:38 +epoch [21/50] batch [5/31] time 1.372 (1.555) data 0.000 (0.188) loss 0.4458 (0.9664) acc 90.6250 (77.5000) lr 1.3681e-03 eta 0:23:58 +epoch [21/50] batch [10/31] time 1.360 (1.457) data 0.000 (0.094) loss 0.8979 (1.0485) acc 81.2500 (76.8750) lr 1.3681e-03 eta 0:22:20 +epoch [21/50] batch [15/31] time 1.510 (1.434) data 0.000 (0.063) loss 0.9692 (1.0340) acc 78.1250 (75.6250) lr 1.3681e-03 eta 0:21:52 +epoch [21/50] batch [20/31] time 1.337 (1.415) data 0.000 (0.047) loss 0.9380 (0.9472) acc 81.2500 (77.1875) lr 1.3681e-03 eta 0:21:27 +epoch [21/50] batch [25/31] time 1.348 (1.404) data 0.000 (0.038) loss 0.8374 (0.8997) acc 71.8750 (78.1250) lr 1.3681e-03 eta 0:21:10 +epoch [21/50] batch [30/31] time 1.385 (1.396) data 0.000 (0.032) loss 1.1162 (0.9031) acc 75.0000 (77.8125) lr 1.3681e-03 eta 0:20:56 +epoch [22/50] batch [5/31] time 1.366 (1.559) data 0.000 (0.187) loss 0.4868 (0.6910) acc 87.5000 (80.6250) lr 1.3090e-03 eta 0:23:13 +epoch [22/50] batch [10/31] time 1.360 (1.460) data 0.000 (0.094) loss 0.7803 (0.7487) acc 75.0000 (77.8125) lr 1.3090e-03 eta 0:21:38 +epoch [22/50] batch [15/31] time 1.366 (1.428) data 0.001 (0.063) loss 0.8833 (0.8010) acc 71.8750 (76.6667) lr 1.3090e-03 eta 0:21:02 +epoch [22/50] batch [20/31] time 1.354 (1.411) data 0.000 (0.047) loss 0.8022 (0.7742) acc 78.1250 (78.4375) lr 1.3090e-03 eta 0:20:40 +epoch [22/50] batch [25/31] time 1.339 (1.404) data 0.000 (0.038) loss 1.2402 (0.8609) acc 68.7500 (77.1250) lr 1.3090e-03 eta 0:20:27 +epoch [22/50] batch [30/31] time 1.354 (1.398) data 0.000 (0.032) loss 1.2090 (0.8535) acc 71.8750 (77.6042) lr 1.3090e-03 eta 0:20:14 +epoch [23/50] batch [5/31] time 1.359 (1.521) data 0.000 (0.158) loss 0.3806 (0.5893) acc 93.7500 (88.1250) lr 1.2487e-03 eta 0:21:52 +epoch [23/50] batch [10/31] time 1.353 (1.442) data 0.000 (0.079) loss 0.8110 (0.7038) acc 81.2500 (84.3750) lr 1.2487e-03 eta 0:20:37 +epoch [23/50] batch [15/31] time 1.371 (1.420) data 0.000 (0.053) loss 0.5278 (0.8309) acc 87.5000 (81.4583) lr 1.2487e-03 eta 0:20:11 +epoch [23/50] batch [20/31] time 1.355 (1.405) data 0.000 (0.040) loss 0.4036 (0.8274) acc 87.5000 (80.9375) lr 1.2487e-03 eta 0:19:51 +epoch [23/50] batch [25/31] time 1.353 (1.396) data 0.000 (0.032) loss 0.9336 (0.8760) acc 75.0000 (79.8750) lr 1.2487e-03 eta 0:19:36 +epoch [23/50] batch [30/31] time 1.368 (1.391) data 0.000 (0.027) loss 0.6494 (0.8807) acc 84.3750 (79.4792) lr 1.2487e-03 eta 0:19:25 +epoch [24/50] batch [5/31] time 1.386 (1.567) data 0.000 (0.174) loss 0.5854 (0.9708) acc 90.6250 (80.0000) lr 1.1874e-03 eta 0:21:43 +epoch [24/50] batch [10/31] time 1.362 (1.467) data 0.000 (0.087) loss 0.9443 (0.8794) acc 78.1250 (80.3125) lr 1.1874e-03 eta 0:20:13 +epoch [24/50] batch [15/31] time 1.351 (1.432) data 0.000 (0.058) loss 1.3330 (0.9184) acc 62.5000 (78.1250) lr 1.1874e-03 eta 0:19:36 +epoch [24/50] batch [20/31] time 1.341 (1.412) data 0.000 (0.044) loss 0.5908 (0.9120) acc 81.2500 (77.9688) lr 1.1874e-03 eta 0:19:13 +epoch [24/50] batch [25/31] time 1.393 (1.405) data 0.000 (0.035) loss 1.0635 (0.9215) acc 78.1250 (77.1250) lr 1.1874e-03 eta 0:19:00 +epoch [24/50] batch [30/31] time 1.364 (1.397) data 0.000 (0.029) loss 0.6406 (0.8803) acc 78.1250 (78.1250) lr 1.1874e-03 eta 0:18:47 +epoch [25/50] batch [5/31] time 1.341 (1.528) data 0.000 (0.156) loss 0.7012 (0.6740) acc 81.2500 (82.5000) lr 1.1253e-03 eta 0:20:24 +epoch [25/50] batch [10/31] time 1.369 (1.449) data 0.000 (0.078) loss 0.9473 (0.7000) acc 75.0000 (81.5625) lr 1.1253e-03 eta 0:19:13 +epoch [25/50] batch [15/31] time 1.376 (1.422) data 0.001 (0.052) loss 0.4917 (0.8016) acc 87.5000 (80.2083) lr 1.1253e-03 eta 0:18:44 +epoch [25/50] batch [20/31] time 1.372 (1.416) data 0.000 (0.039) loss 1.3740 (0.8989) acc 75.0000 (78.4375) lr 1.1253e-03 eta 0:18:33 +epoch [25/50] batch [25/31] time 1.362 (1.405) data 0.000 (0.032) loss 1.2988 (0.9084) acc 75.0000 (78.5000) lr 1.1253e-03 eta 0:18:17 +epoch [25/50] batch [30/31] time 1.352 (1.399) data 0.000 (0.026) loss 1.0723 (0.8962) acc 75.0000 (78.7500) lr 1.1253e-03 eta 0:18:05 +epoch [26/50] batch [5/31] time 1.380 (1.542) data 0.000 (0.170) loss 0.5488 (0.8932) acc 78.1250 (79.3750) lr 1.0628e-03 eta 0:19:47 +epoch [26/50] batch [10/31] time 1.350 (1.449) data 0.000 (0.085) loss 0.7188 (0.7878) acc 71.8750 (80.6250) lr 1.0628e-03 eta 0:18:28 +epoch [26/50] batch [15/31] time 1.338 (1.422) data 0.000 (0.057) loss 0.9844 (0.7162) acc 71.8750 (81.4583) lr 1.0628e-03 eta 0:18:00 +epoch [26/50] batch [20/31] time 1.370 (1.407) data 0.000 (0.043) loss 0.7192 (0.7443) acc 90.6250 (81.8750) lr 1.0628e-03 eta 0:17:42 +epoch [26/50] batch [25/31] time 1.366 (1.400) data 0.000 (0.034) loss 0.7983 (0.7930) acc 84.3750 (81.0000) lr 1.0628e-03 eta 0:17:29 +epoch [26/50] batch [30/31] time 1.366 (1.394) data 0.000 (0.029) loss 0.8730 (0.7763) acc 68.7500 (80.4167) lr 1.0628e-03 eta 0:17:18 +epoch [27/50] batch [5/31] time 1.368 (1.524) data 0.000 (0.156) loss 0.2568 (0.6050) acc 100.0000 (85.0000) lr 1.0000e-03 eta 0:18:46 +epoch [27/50] batch [10/31] time 1.353 (1.446) data 0.000 (0.078) loss 0.7100 (0.7033) acc 78.1250 (82.5000) lr 1.0000e-03 eta 0:17:41 +epoch [27/50] batch [15/31] time 1.341 (1.419) data 0.000 (0.052) loss 0.6992 (0.7201) acc 84.3750 (82.5000) lr 1.0000e-03 eta 0:17:14 +epoch [27/50] batch [20/31] time 1.351 (1.404) data 0.000 (0.039) loss 0.4177 (0.7230) acc 87.5000 (82.0312) lr 1.0000e-03 eta 0:16:56 +epoch [27/50] batch [25/31] time 1.347 (1.395) data 0.000 (0.031) loss 0.4500 (0.7614) acc 87.5000 (81.1250) lr 1.0000e-03 eta 0:16:42 +epoch [27/50] batch [30/31] time 1.370 (1.391) data 0.000 (0.026) loss 0.4106 (0.7839) acc 93.7500 (80.8333) lr 1.0000e-03 eta 0:16:33 +epoch [28/50] batch [5/31] time 1.352 (1.519) data 0.000 (0.158) loss 0.4041 (0.5725) acc 90.6250 (85.0000) lr 9.3721e-04 eta 0:17:55 +epoch [28/50] batch [10/31] time 1.355 (1.442) data 0.000 (0.079) loss 0.4878 (0.7801) acc 90.6250 (82.5000) lr 9.3721e-04 eta 0:16:53 +epoch [28/50] batch [15/31] time 1.358 (1.415) data 0.000 (0.053) loss 0.6880 (0.7337) acc 87.5000 (83.3333) lr 9.3721e-04 eta 0:16:27 +epoch [28/50] batch [20/31] time 1.379 (1.409) data 0.000 (0.040) loss 0.7412 (0.7310) acc 81.2500 (82.9688) lr 9.3721e-04 eta 0:16:16 +epoch [28/50] batch [25/31] time 1.351 (1.399) data 0.000 (0.032) loss 1.3350 (0.7550) acc 81.2500 (82.7500) lr 9.3721e-04 eta 0:16:02 +epoch [28/50] batch [30/31] time 1.358 (1.391) data 0.000 (0.027) loss 1.0928 (0.8041) acc 68.7500 (80.9375) lr 9.3721e-04 eta 0:15:49 +epoch [29/50] batch [5/31] time 1.354 (1.532) data 0.000 (0.176) loss 0.5854 (0.9002) acc 84.3750 (79.3750) lr 8.7467e-04 eta 0:17:17 +epoch [29/50] batch [10/31] time 1.356 (1.441) data 0.000 (0.088) loss 0.7280 (0.7303) acc 81.2500 (81.2500) lr 8.7467e-04 eta 0:16:08 +epoch [29/50] batch [15/31] time 1.358 (1.417) data 0.000 (0.059) loss 0.9443 (0.7629) acc 78.1250 (82.0833) lr 8.7467e-04 eta 0:15:45 +epoch [29/50] batch [20/31] time 1.350 (1.403) data 0.000 (0.044) loss 0.9956 (0.8131) acc 81.2500 (80.9375) lr 8.7467e-04 eta 0:15:28 +epoch [29/50] batch [25/31] time 1.374 (1.395) data 0.000 (0.035) loss 0.3628 (0.7844) acc 87.5000 (81.6250) lr 8.7467e-04 eta 0:15:16 +epoch [29/50] batch [30/31] time 1.371 (1.394) data 0.000 (0.030) loss 0.8682 (0.8135) acc 78.1250 (81.3542) lr 8.7467e-04 eta 0:15:08 +epoch [30/50] batch [5/31] time 1.368 (1.547) data 0.000 (0.173) loss 0.8262 (0.8155) acc 78.1250 (80.6250) lr 8.1262e-04 eta 0:16:39 +epoch [30/50] batch [10/31] time 1.377 (1.453) data 0.000 (0.087) loss 0.5957 (0.8096) acc 84.3750 (80.0000) lr 8.1262e-04 eta 0:15:31 +epoch [30/50] batch [15/31] time 1.367 (1.421) data 0.000 (0.058) loss 0.8037 (0.7957) acc 84.3750 (81.2500) lr 8.1262e-04 eta 0:15:03 +epoch [30/50] batch [20/31] time 1.376 (1.403) data 0.000 (0.044) loss 0.9360 (0.8244) acc 81.2500 (81.0938) lr 8.1262e-04 eta 0:14:45 +epoch [30/50] batch [25/31] time 1.383 (1.394) data 0.000 (0.035) loss 0.6313 (0.8080) acc 84.3750 (81.3750) lr 8.1262e-04 eta 0:14:32 +epoch [30/50] batch [30/31] time 1.351 (1.387) data 0.000 (0.029) loss 1.0420 (0.8408) acc 75.0000 (80.2083) lr 8.1262e-04 eta 0:14:21 +epoch [31/50] batch [5/31] time 1.372 (1.537) data 0.000 (0.177) loss 0.7627 (0.5999) acc 84.3750 (86.8750) lr 7.5131e-04 eta 0:15:45 +epoch [31/50] batch [10/31] time 1.350 (1.457) data 0.000 (0.089) loss 0.8643 (0.7649) acc 87.5000 (84.0625) lr 7.5131e-04 eta 0:14:48 +epoch [31/50] batch [15/31] time 1.394 (1.428) data 0.000 (0.059) loss 0.4229 (0.8081) acc 90.6250 (81.4583) lr 7.5131e-04 eta 0:14:23 +epoch [31/50] batch [20/31] time 1.360 (1.408) data 0.000 (0.045) loss 0.9180 (0.7726) acc 78.1250 (82.3438) lr 7.5131e-04 eta 0:14:04 +epoch [31/50] batch [25/31] time 1.363 (1.400) data 0.000 (0.036) loss 0.5547 (0.7903) acc 84.3750 (81.5000) lr 7.5131e-04 eta 0:13:52 +epoch [31/50] batch [30/31] time 1.369 (1.393) data 0.000 (0.030) loss 0.8379 (0.7622) acc 81.2500 (82.0833) lr 7.5131e-04 eta 0:13:41 +epoch [32/50] batch [5/31] time 1.361 (1.531) data 0.000 (0.163) loss 0.8901 (0.6834) acc 75.0000 (81.2500) lr 6.9098e-04 eta 0:14:53 +epoch [32/50] batch [10/31] time 1.370 (1.443) data 0.000 (0.082) loss 0.6758 (0.8104) acc 87.5000 (80.6250) lr 6.9098e-04 eta 0:13:55 +epoch [32/50] batch [15/31] time 1.481 (1.423) data 0.001 (0.055) loss 0.8145 (0.7890) acc 75.0000 (81.2500) lr 6.9098e-04 eta 0:13:37 +epoch [32/50] batch [20/31] time 1.355 (1.407) data 0.000 (0.041) loss 0.7925 (0.7986) acc 84.3750 (81.7188) lr 6.9098e-04 eta 0:13:20 +epoch [32/50] batch [25/31] time 1.362 (1.397) data 0.000 (0.033) loss 0.3628 (0.7465) acc 87.5000 (82.2500) lr 6.9098e-04 eta 0:13:07 +epoch [32/50] batch [30/31] time 1.372 (1.392) data 0.000 (0.028) loss 1.1992 (0.7569) acc 71.8750 (81.3542) lr 6.9098e-04 eta 0:12:57 +epoch [33/50] batch [5/31] time 1.351 (1.550) data 0.000 (0.183) loss 0.7964 (0.8046) acc 84.3750 (84.3750) lr 6.3188e-04 eta 0:14:17 +epoch [33/50] batch [10/31] time 1.361 (1.452) data 0.000 (0.092) loss 0.6265 (0.7440) acc 84.3750 (81.8750) lr 6.3188e-04 eta 0:13:15 +epoch [33/50] batch [15/31] time 1.370 (1.421) data 0.001 (0.061) loss 0.7549 (0.7663) acc 78.1250 (82.0833) lr 6.3188e-04 eta 0:12:51 +epoch [33/50] batch [20/31] time 1.352 (1.407) data 0.000 (0.046) loss 0.6885 (0.7874) acc 84.3750 (82.1875) lr 6.3188e-04 eta 0:12:37 +epoch [33/50] batch [25/31] time 1.393 (1.405) data 0.000 (0.037) loss 0.8408 (0.7877) acc 81.2500 (82.6250) lr 6.3188e-04 eta 0:12:28 +epoch [33/50] batch [30/31] time 1.352 (1.399) data 0.000 (0.031) loss 0.5127 (0.7540) acc 84.3750 (83.4375) lr 6.3188e-04 eta 0:12:18 +epoch [34/50] batch [5/31] time 1.361 (1.535) data 0.000 (0.175) loss 0.6685 (0.6446) acc 87.5000 (85.6250) lr 5.7422e-04 eta 0:13:21 +epoch [34/50] batch [10/31] time 1.362 (1.451) data 0.000 (0.088) loss 0.6577 (0.5820) acc 87.5000 (86.8750) lr 5.7422e-04 eta 0:12:30 +epoch [34/50] batch [15/31] time 1.374 (1.425) data 0.000 (0.059) loss 0.6138 (0.6034) acc 75.0000 (85.4167) lr 5.7422e-04 eta 0:12:09 +epoch [34/50] batch [20/31] time 1.353 (1.410) data 0.000 (0.044) loss 1.3799 (0.7137) acc 75.0000 (83.9062) lr 5.7422e-04 eta 0:11:55 +epoch [34/50] batch [25/31] time 1.360 (1.400) data 0.000 (0.035) loss 0.4187 (0.7303) acc 90.6250 (83.5000) lr 5.7422e-04 eta 0:11:42 +epoch [34/50] batch [30/31] time 1.359 (1.394) data 0.000 (0.029) loss 0.8438 (0.7268) acc 84.3750 (83.3333) lr 5.7422e-04 eta 0:11:33 +epoch [35/50] batch [5/31] time 1.378 (1.557) data 0.000 (0.158) loss 0.6763 (0.6388) acc 87.5000 (84.3750) lr 5.1825e-04 eta 0:12:44 +epoch [35/50] batch [10/31] time 1.374 (1.462) data 0.001 (0.079) loss 0.5815 (0.6822) acc 84.3750 (85.0000) lr 5.1825e-04 eta 0:11:50 +epoch [35/50] batch [15/31] time 1.364 (1.429) data 0.000 (0.053) loss 0.5532 (0.6790) acc 90.6250 (84.7917) lr 5.1825e-04 eta 0:11:27 +epoch [35/50] batch [20/31] time 1.374 (1.413) data 0.000 (0.040) loss 0.4209 (0.6829) acc 90.6250 (85.0000) lr 5.1825e-04 eta 0:11:12 +epoch [35/50] batch [25/31] time 1.347 (1.402) data 0.000 (0.032) loss 0.7856 (0.6900) acc 78.1250 (84.1250) lr 5.1825e-04 eta 0:11:00 +epoch [35/50] batch [30/31] time 1.352 (1.396) data 0.000 (0.027) loss 0.7393 (0.7072) acc 75.0000 (83.5417) lr 5.1825e-04 eta 0:10:50 +epoch [36/50] batch [5/31] time 1.386 (1.530) data 0.000 (0.160) loss 0.6338 (0.7847) acc 84.3750 (81.8750) lr 4.6417e-04 eta 0:11:43 +epoch [36/50] batch [10/31] time 1.364 (1.446) data 0.000 (0.080) loss 0.2192 (0.6029) acc 96.8750 (86.2500) lr 4.6417e-04 eta 0:10:58 +epoch [36/50] batch [15/31] time 1.352 (1.415) data 0.000 (0.054) loss 0.8115 (0.7403) acc 87.5000 (83.7500) lr 4.6417e-04 eta 0:10:36 +epoch [36/50] batch [20/31] time 1.358 (1.410) data 0.000 (0.040) loss 0.9043 (0.7901) acc 75.0000 (81.7188) lr 4.6417e-04 eta 0:10:27 +epoch [36/50] batch [25/31] time 1.343 (1.399) data 0.000 (0.032) loss 0.5605 (0.8048) acc 87.5000 (81.2500) lr 4.6417e-04 eta 0:10:15 +epoch [36/50] batch [30/31] time 1.340 (1.392) data 0.000 (0.027) loss 0.7505 (0.8019) acc 78.1250 (81.3542) lr 4.6417e-04 eta 0:10:05 +epoch [37/50] batch [5/31] time 1.343 (1.517) data 0.000 (0.154) loss 0.9321 (0.6248) acc 75.0000 (86.8750) lr 4.1221e-04 eta 0:10:50 +epoch [37/50] batch [10/31] time 1.360 (1.435) data 0.000 (0.077) loss 0.6934 (0.6264) acc 87.5000 (87.8125) lr 4.1221e-04 eta 0:10:08 +epoch [37/50] batch [15/31] time 1.366 (1.410) data 0.000 (0.052) loss 0.2822 (0.6181) acc 93.7500 (87.2917) lr 4.1221e-04 eta 0:09:50 +epoch [37/50] batch [20/31] time 1.350 (1.395) data 0.000 (0.039) loss 1.2031 (0.6996) acc 75.0000 (85.3125) lr 4.1221e-04 eta 0:09:37 +epoch [37/50] batch [25/31] time 1.362 (1.388) data 0.000 (0.031) loss 0.4360 (0.6718) acc 87.5000 (85.6250) lr 4.1221e-04 eta 0:09:27 +epoch [37/50] batch [30/31] time 1.358 (1.385) data 0.000 (0.026) loss 0.6929 (0.6753) acc 84.3750 (85.0000) lr 4.1221e-04 eta 0:09:19 +epoch [38/50] batch [5/31] time 1.357 (1.534) data 0.000 (0.166) loss 0.4871 (0.5136) acc 84.3750 (84.3750) lr 3.6258e-04 eta 0:10:10 +epoch [38/50] batch [10/31] time 1.342 (1.443) data 0.000 (0.083) loss 0.5322 (0.5161) acc 84.3750 (85.0000) lr 3.6258e-04 eta 0:09:27 +epoch [38/50] batch [15/31] time 1.354 (1.414) data 0.000 (0.056) loss 0.3340 (0.6272) acc 93.7500 (84.3750) lr 3.6258e-04 eta 0:09:08 +epoch [38/50] batch [20/31] time 1.367 (1.400) data 0.000 (0.042) loss 0.5889 (0.6158) acc 81.2500 (84.3750) lr 3.6258e-04 eta 0:08:56 +epoch [38/50] batch [25/31] time 1.365 (1.392) data 0.000 (0.034) loss 0.6172 (0.6375) acc 87.5000 (85.0000) lr 3.6258e-04 eta 0:08:46 +epoch [38/50] batch [30/31] time 1.370 (1.387) data 0.000 (0.028) loss 0.4006 (0.6727) acc 93.7500 (84.4792) lr 3.6258e-04 eta 0:08:37 +epoch [39/50] batch [5/31] time 1.354 (1.596) data 0.000 (0.237) loss 0.4221 (0.6093) acc 81.2500 (83.7500) lr 3.1545e-04 eta 0:09:45 +epoch [39/50] batch [10/31] time 1.372 (1.480) data 0.000 (0.118) loss 0.9253 (0.6603) acc 78.1250 (82.8125) lr 3.1545e-04 eta 0:08:55 +epoch [39/50] batch [15/31] time 1.359 (1.439) data 0.000 (0.079) loss 0.5771 (0.6698) acc 84.3750 (82.5000) lr 3.1545e-04 eta 0:08:33 +epoch [39/50] batch [20/31] time 1.520 (1.425) data 0.000 (0.059) loss 0.3345 (0.6379) acc 90.6250 (83.4375) lr 3.1545e-04 eta 0:08:21 +epoch [39/50] batch [25/31] time 1.360 (1.413) data 0.000 (0.048) loss 0.4636 (0.6119) acc 84.3750 (83.7500) lr 3.1545e-04 eta 0:08:10 +epoch [39/50] batch [30/31] time 1.386 (1.406) data 0.000 (0.040) loss 0.4927 (0.6208) acc 87.5000 (84.0625) lr 3.1545e-04 eta 0:08:00 +epoch [40/50] batch [5/31] time 1.356 (1.531) data 0.000 (0.162) loss 0.7192 (0.6792) acc 87.5000 (85.0000) lr 2.7103e-04 eta 0:08:34 +epoch [40/50] batch [10/31] time 1.365 (1.446) data 0.001 (0.081) loss 1.5010 (0.8282) acc 71.8750 (81.8750) lr 2.7103e-04 eta 0:07:58 +epoch [40/50] batch [15/31] time 1.353 (1.416) data 0.000 (0.054) loss 0.7271 (0.7403) acc 90.6250 (83.5417) lr 2.7103e-04 eta 0:07:41 +epoch [40/50] batch [20/31] time 1.356 (1.402) data 0.000 (0.041) loss 0.4612 (0.7264) acc 87.5000 (83.1250) lr 2.7103e-04 eta 0:07:30 +epoch [40/50] batch [25/31] time 1.350 (1.395) data 0.000 (0.033) loss 0.6265 (0.7287) acc 87.5000 (83.2500) lr 2.7103e-04 eta 0:07:20 +epoch [40/50] batch [30/31] time 1.364 (1.388) data 0.000 (0.027) loss 1.0703 (0.7686) acc 75.0000 (82.7083) lr 2.7103e-04 eta 0:07:11 +epoch [41/50] batch [5/31] time 1.353 (1.523) data 0.000 (0.156) loss 0.4106 (0.5667) acc 93.7500 (83.1250) lr 2.2949e-04 eta 0:07:44 +epoch [41/50] batch [10/31] time 1.340 (1.432) data 0.000 (0.078) loss 0.3313 (0.5844) acc 90.6250 (83.1250) lr 2.2949e-04 eta 0:07:09 +epoch [41/50] batch [15/31] time 1.351 (1.408) data 0.000 (0.052) loss 0.3828 (0.5737) acc 96.8750 (85.2083) lr 2.2949e-04 eta 0:06:55 +epoch [41/50] batch [20/31] time 1.364 (1.397) data 0.000 (0.039) loss 0.7964 (0.6286) acc 87.5000 (85.1562) lr 2.2949e-04 eta 0:06:45 +epoch [41/50] batch [25/31] time 1.354 (1.388) data 0.000 (0.032) loss 0.7129 (0.6551) acc 84.3750 (85.2500) lr 2.2949e-04 eta 0:06:35 +epoch [41/50] batch [30/31] time 1.373 (1.384) data 0.000 (0.026) loss 0.4146 (0.6632) acc 84.3750 (84.6875) lr 2.2949e-04 eta 0:06:27 +epoch [42/50] batch [5/31] time 1.363 (1.528) data 0.000 (0.160) loss 1.2539 (0.8131) acc 65.6250 (83.1250) lr 1.9098e-04 eta 0:06:58 +epoch [42/50] batch [10/31] time 1.344 (1.452) data 0.000 (0.080) loss 0.7490 (0.8107) acc 81.2500 (83.4375) lr 1.9098e-04 eta 0:06:30 +epoch [42/50] batch [15/31] time 1.363 (1.419) data 0.000 (0.054) loss 0.9121 (0.7101) acc 84.3750 (84.7917) lr 1.9098e-04 eta 0:06:14 +epoch [42/50] batch [20/31] time 1.353 (1.403) data 0.000 (0.040) loss 0.4937 (0.6584) acc 93.7500 (85.4688) lr 1.9098e-04 eta 0:06:03 +epoch [42/50] batch [25/31] time 1.359 (1.394) data 0.000 (0.032) loss 0.7808 (0.6880) acc 78.1250 (84.6250) lr 1.9098e-04 eta 0:05:54 +epoch [42/50] batch [30/31] time 1.348 (1.387) data 0.000 (0.027) loss 1.0010 (0.6984) acc 75.0000 (84.3750) lr 1.9098e-04 eta 0:05:45 +epoch [43/50] batch [5/31] time 1.353 (1.545) data 0.000 (0.185) loss 0.8589 (0.5351) acc 84.3750 (84.3750) lr 1.5567e-04 eta 0:06:15 +epoch [43/50] batch [10/31] time 1.370 (1.451) data 0.001 (0.093) loss 0.9268 (0.5684) acc 81.2500 (84.6875) lr 1.5567e-04 eta 0:05:45 +epoch [43/50] batch [15/31] time 1.492 (1.429) data 0.000 (0.062) loss 0.6475 (0.5674) acc 84.3750 (85.4167) lr 1.5567e-04 eta 0:05:32 +epoch [43/50] batch [20/31] time 1.351 (1.413) data 0.001 (0.047) loss 0.4697 (0.5759) acc 90.6250 (85.6250) lr 1.5567e-04 eta 0:05:22 +epoch [43/50] batch [25/31] time 1.354 (1.400) data 0.000 (0.037) loss 0.9736 (0.6313) acc 81.2500 (85.5000) lr 1.5567e-04 eta 0:05:12 +epoch [43/50] batch [30/31] time 1.360 (1.393) data 0.000 (0.031) loss 0.7437 (0.6604) acc 81.2500 (84.2708) lr 1.5567e-04 eta 0:05:03 +epoch [44/50] batch [5/31] time 1.347 (1.527) data 0.000 (0.164) loss 0.5854 (0.5700) acc 87.5000 (86.2500) lr 1.2369e-04 eta 0:05:23 +epoch [44/50] batch [10/31] time 1.384 (1.446) data 0.000 (0.082) loss 0.3213 (0.5784) acc 87.5000 (86.8750) lr 1.2369e-04 eta 0:04:59 +epoch [44/50] batch [15/31] time 1.355 (1.416) data 0.000 (0.055) loss 0.5913 (0.5830) acc 84.3750 (86.8750) lr 1.2369e-04 eta 0:04:45 +epoch [44/50] batch [20/31] time 1.358 (1.402) data 0.000 (0.041) loss 0.6777 (0.6566) acc 84.3750 (85.1562) lr 1.2369e-04 eta 0:04:36 +epoch [44/50] batch [25/31] time 1.346 (1.397) data 0.000 (0.033) loss 0.5444 (0.6493) acc 90.6250 (85.3750) lr 1.2369e-04 eta 0:04:28 +epoch [44/50] batch [30/31] time 1.360 (1.391) data 0.000 (0.028) loss 0.5508 (0.6347) acc 93.7500 (85.9375) lr 1.2369e-04 eta 0:04:20 +epoch [45/50] batch [5/31] time 1.365 (1.543) data 0.000 (0.174) loss 1.0723 (0.8200) acc 81.2500 (80.0000) lr 9.5173e-05 eta 0:04:39 +epoch [45/50] batch [10/31] time 1.394 (1.454) data 0.000 (0.087) loss 0.5918 (0.8144) acc 87.5000 (81.5625) lr 9.5173e-05 eta 0:04:15 +epoch [45/50] batch [15/31] time 1.360 (1.424) data 0.000 (0.058) loss 0.6689 (0.7740) acc 84.3750 (82.2917) lr 9.5173e-05 eta 0:04:03 +epoch [45/50] batch [20/31] time 1.346 (1.405) data 0.000 (0.044) loss 0.3132 (0.7056) acc 93.7500 (83.9062) lr 9.5173e-05 eta 0:03:53 +epoch [45/50] batch [25/31] time 1.353 (1.396) data 0.000 (0.035) loss 0.7993 (0.7100) acc 84.3750 (84.1250) lr 9.5173e-05 eta 0:03:44 +epoch [45/50] batch [30/31] time 1.371 (1.390) data 0.000 (0.029) loss 0.7197 (0.7463) acc 71.8750 (83.2292) lr 9.5173e-05 eta 0:03:36 +epoch [46/50] batch [5/31] time 1.393 (1.566) data 0.001 (0.169) loss 0.6909 (0.6827) acc 87.5000 (85.6250) lr 7.0224e-05 eta 0:03:54 +epoch [46/50] batch [10/31] time 1.363 (1.464) data 0.000 (0.085) loss 0.4412 (0.5646) acc 81.2500 (87.8125) lr 7.0224e-05 eta 0:03:32 +epoch [46/50] batch [15/31] time 1.359 (1.428) data 0.001 (0.057) loss 0.4761 (0.5907) acc 90.6250 (87.7083) lr 7.0224e-05 eta 0:03:19 +epoch [46/50] batch [20/31] time 1.373 (1.410) data 0.000 (0.043) loss 1.0176 (0.6998) acc 81.2500 (85.6250) lr 7.0224e-05 eta 0:03:10 +epoch [46/50] batch [25/31] time 1.387 (1.399) data 0.000 (0.034) loss 0.8896 (0.7250) acc 84.3750 (85.0000) lr 7.0224e-05 eta 0:03:01 +epoch [46/50] batch [30/31] time 1.371 (1.393) data 0.000 (0.028) loss 1.0137 (0.7039) acc 71.8750 (84.1667) lr 7.0224e-05 eta 0:02:54 +epoch [47/50] batch [5/31] time 1.353 (1.543) data 0.000 (0.180) loss 0.3164 (0.5798) acc 96.8750 (87.5000) lr 4.8943e-05 eta 0:03:03 +epoch [47/50] batch [10/31] time 1.365 (1.457) data 0.000 (0.090) loss 0.3396 (0.6585) acc 100.0000 (87.8125) lr 4.8943e-05 eta 0:02:46 +epoch [47/50] batch [15/31] time 1.357 (1.423) data 0.000 (0.060) loss 0.5327 (0.6760) acc 93.7500 (86.4583) lr 4.8943e-05 eta 0:02:35 +epoch [47/50] batch [20/31] time 1.361 (1.415) data 0.000 (0.045) loss 1.8213 (0.7415) acc 71.8750 (85.0000) lr 4.8943e-05 eta 0:02:27 +epoch [47/50] batch [25/31] time 1.378 (1.406) data 0.000 (0.036) loss 0.4988 (0.7645) acc 87.5000 (84.0000) lr 4.8943e-05 eta 0:02:19 +epoch [47/50] batch [30/31] time 1.376 (1.398) data 0.000 (0.030) loss 1.0781 (0.7441) acc 78.1250 (84.4792) lr 4.8943e-05 eta 0:02:11 +epoch [48/50] batch [5/31] time 1.348 (1.544) data 0.000 (0.171) loss 1.1416 (0.7943) acc 81.2500 (82.5000) lr 3.1417e-05 eta 0:02:15 +epoch [48/50] batch [10/31] time 1.344 (1.450) data 0.000 (0.086) loss 0.4219 (0.6777) acc 81.2500 (82.1875) lr 3.1417e-05 eta 0:02:00 +epoch [48/50] batch [15/31] time 1.360 (1.420) data 0.000 (0.057) loss 0.5894 (0.6531) acc 90.6250 (83.5417) lr 3.1417e-05 eta 0:01:50 +epoch [48/50] batch [20/31] time 1.371 (1.406) data 0.000 (0.043) loss 0.3237 (0.6431) acc 93.7500 (84.2188) lr 3.1417e-05 eta 0:01:42 +epoch [48/50] batch [25/31] time 1.360 (1.397) data 0.000 (0.035) loss 0.8301 (0.6769) acc 84.3750 (83.0000) lr 3.1417e-05 eta 0:01:34 +epoch [48/50] batch [30/31] time 1.368 (1.391) data 0.000 (0.029) loss 0.5938 (0.6646) acc 90.6250 (83.7500) lr 3.1417e-05 eta 0:01:27 +epoch [49/50] batch [5/31] time 1.347 (1.504) data 0.001 (0.156) loss 0.6641 (0.7141) acc 93.7500 (86.2500) lr 1.7713e-05 eta 0:01:25 +epoch [49/50] batch [10/31] time 1.362 (1.435) data 0.000 (0.078) loss 0.9292 (0.6986) acc 87.5000 (86.2500) lr 1.7713e-05 eta 0:01:14 +epoch [49/50] batch [15/31] time 1.374 (1.407) data 0.000 (0.052) loss 0.4053 (0.6653) acc 87.5000 (86.2500) lr 1.7713e-05 eta 0:01:06 +epoch [49/50] batch [20/31] time 1.354 (1.394) data 0.000 (0.039) loss 0.6465 (0.6729) acc 87.5000 (85.9375) lr 1.7713e-05 eta 0:00:58 +epoch [49/50] batch [25/31] time 1.362 (1.388) data 0.000 (0.031) loss 0.6372 (0.6985) acc 78.1250 (85.0000) lr 1.7713e-05 eta 0:00:51 +epoch [49/50] batch [30/31] time 1.350 (1.383) data 0.000 (0.026) loss 0.3113 (0.6837) acc 93.7500 (84.8958) lr 1.7713e-05 eta 0:00:44 +epoch [50/50] batch [5/31] time 1.375 (1.554) data 0.000 (0.183) loss 0.7314 (0.6158) acc 81.2500 (83.1250) lr 7.8853e-06 eta 0:00:40 +epoch [50/50] batch [10/31] time 1.367 (1.460) data 0.000 (0.092) loss 1.0088 (0.6608) acc 81.2500 (83.7500) lr 7.8853e-06 eta 0:00:30 +epoch [50/50] batch [15/31] time 1.390 (1.429) data 0.000 (0.061) loss 0.8169 (0.6978) acc 84.3750 (83.9583) lr 7.8853e-06 eta 0:00:22 +epoch [50/50] batch [20/31] time 1.359 (1.420) data 0.000 (0.046) loss 0.3306 (0.6946) acc 87.5000 (84.0625) lr 7.8853e-06 eta 0:00:15 +epoch [50/50] batch [25/31] time 1.369 (1.408) data 0.000 (0.037) loss 0.6626 (0.7117) acc 81.2500 (83.5000) lr 7.8853e-06 eta 0:00:08 +epoch [50/50] batch [30/31] time 1.372 (1.400) data 0.000 (0.031) loss 1.1152 (0.7395) acc 81.2500 (83.3333) lr 7.8853e-06 eta 0:00:01 +Checkpoint saved to output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 +Finish training +Deploy the last-epoch model +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 35,855 +* accuracy: 71.7% +* error: 28.3% +* macro_f1: 70.9% +Elapsed: 0:41:39 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint new file mode 100644 index 00000000..a9d493d3 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint @@ -0,0 +1 @@ +model.pth.tar-50 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 new file mode 100644 index 00000000..f007721a Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1698414340.ckb-gpu-lambda.180810.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1698414340.ckb-gpu-lambda.180810.0 new file mode 100644 index 00000000..197ddd09 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1698414340.ckb-gpu-lambda.180810.0 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed2/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed2/log.txt new file mode 100644 index 00000000..8283fa79 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed2/log.txt @@ -0,0 +1,639 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_l14_ep50.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '1'] +output_dir: output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed2 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 2 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 1 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-L/14 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 50 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 8.4.0-3ubuntu2) 8.4.0 +Clang version: 10.0.0-4ubuntu1 +CMake version: version 3.23.2 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-113-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: Tesla V100-SXM2-32GB +GPU 1: Tesla V100-SXM2-32GB +GPU 2: Tesla V100-SXM2-32GB +GPU 3: Tesla V100-SXM2-32GB + +Nvidia driver version: 510.73.05 +cuDNN version: Probably one of the following: +/usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 +/usr/lib/x86_64-linux-gnu/libcudnn.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.4.1 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 46 bits physical, 48 bits virtual +CPU(s): 64 +On-line CPU(s) list: 0-63 +Thread(s) per core: 2 +Core(s) per socket: 16 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +CPU family: 6 +Model: 85 +Model name: Intel(R) Xeon(R) Gold 6242 CPU @ 2.80GHz +Stepping: 7 +CPU MHz: 1788.053 +CPU max MHz: 3900.0000 +CPU min MHz: 1200.0000 +BogoMIPS: 5600.00 +Virtualization: VT-x +L1d cache: 1 MiB +L1i cache: 1 MiB +L2 cache: 32 MiB +L3 cache: 44 MiB +NUMA node0 CPU(s): 0-15,32-47 +NUMA node1 CPU(s): 16-31,48-63 +Vulnerability Itlb multihit: KVM: Mitigation: Split huge pages +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Mitigation; TSX disabled +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cdp_l3 invpcid_single intel_ppin ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm mpx rdt_a avx512f avx512dq rdseed adx smap clflushopt clwb intel_pt avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts pku ospke avx512_vnni md_clear flush_l1d arch_capabilities + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Loading preprocessed few-shot data from /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_1-seed_2.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 1,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-L/14) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed2/tensorboard) +epoch [1/50] batch [5/31] time 1.339 (2.197) data 0.000 (0.177) loss 2.6309 (3.1812) acc 50.0000 (38.7500) lr 1.0000e-05 eta 0:56:33 +epoch [1/50] batch [10/31] time 1.348 (1.774) data 0.000 (0.089) loss 2.1016 (2.9586) acc 53.1250 (42.1875) lr 1.0000e-05 eta 0:45:32 +epoch [1/50] batch [15/31] time 1.364 (1.635) data 0.000 (0.059) loss 2.1426 (2.7659) acc 68.7500 (45.4167) lr 1.0000e-05 eta 0:41:50 +epoch [1/50] batch [20/31] time 1.353 (1.567) data 0.000 (0.045) loss 1.6680 (2.6183) acc 50.0000 (46.5625) lr 1.0000e-05 eta 0:39:58 +epoch [1/50] batch [25/31] time 1.357 (1.525) data 0.000 (0.036) loss 2.6172 (2.5639) acc 50.0000 (48.1250) lr 1.0000e-05 eta 0:38:45 +epoch [1/50] batch [30/31] time 1.371 (1.497) data 0.000 (0.030) loss 1.8232 (2.4429) acc 59.3750 (50.1042) lr 1.0000e-05 eta 0:37:55 +epoch [2/50] batch [5/31] time 1.364 (1.541) data 0.001 (0.176) loss 1.2686 (1.7197) acc 65.6250 (61.8750) lr 2.0000e-03 eta 0:38:53 +epoch [2/50] batch [10/31] time 1.363 (1.453) data 0.000 (0.088) loss 1.6875 (1.6651) acc 59.3750 (63.1250) lr 2.0000e-03 eta 0:36:33 +epoch [2/50] batch [15/31] time 1.384 (1.424) data 0.000 (0.059) loss 2.5840 (1.7930) acc 46.8750 (61.2500) lr 2.0000e-03 eta 0:35:41 +epoch [2/50] batch [20/31] time 1.365 (1.408) data 0.000 (0.044) loss 2.2246 (1.7135) acc 59.3750 (62.3438) lr 2.0000e-03 eta 0:35:10 +epoch [2/50] batch [25/31] time 1.360 (1.399) data 0.000 (0.035) loss 2.1699 (1.6595) acc 53.1250 (63.5000) lr 2.0000e-03 eta 0:34:50 +epoch [2/50] batch [30/31] time 1.361 (1.394) data 0.000 (0.030) loss 1.2598 (1.5757) acc 68.7500 (64.6875) lr 2.0000e-03 eta 0:34:35 +epoch [3/50] batch [5/31] time 1.375 (1.552) data 0.000 (0.179) loss 1.3213 (1.3980) acc 68.7500 (61.8750) lr 1.9980e-03 eta 0:38:20 +epoch [3/50] batch [10/31] time 1.384 (1.463) data 0.000 (0.090) loss 0.8296 (1.1829) acc 75.0000 (67.8125) lr 1.9980e-03 eta 0:36:02 +epoch [3/50] batch [15/31] time 1.349 (1.429) data 0.000 (0.060) loss 1.1035 (1.1757) acc 75.0000 (68.9583) lr 1.9980e-03 eta 0:35:04 +epoch [3/50] batch [20/31] time 1.359 (1.411) data 0.000 (0.045) loss 0.8706 (1.2156) acc 78.1250 (68.7500) lr 1.9980e-03 eta 0:34:31 +epoch [3/50] batch [25/31] time 1.362 (1.401) data 0.000 (0.036) loss 1.2256 (1.2496) acc 68.7500 (68.2500) lr 1.9980e-03 eta 0:34:09 +epoch [3/50] batch [30/31] time 1.354 (1.394) data 0.000 (0.030) loss 1.5312 (1.3330) acc 65.6250 (67.1875) lr 1.9980e-03 eta 0:33:52 +epoch [4/50] batch [5/31] time 1.348 (1.546) data 0.001 (0.180) loss 1.2080 (1.3455) acc 62.5000 (65.0000) lr 1.9921e-03 eta 0:37:25 +epoch [4/50] batch [10/31] time 1.354 (1.452) data 0.000 (0.090) loss 1.5068 (1.2898) acc 59.3750 (65.9375) lr 1.9921e-03 eta 0:35:01 +epoch [4/50] batch [15/31] time 1.363 (1.423) data 0.000 (0.060) loss 1.3164 (1.2254) acc 68.7500 (68.1250) lr 1.9921e-03 eta 0:34:11 +epoch [4/50] batch [20/31] time 1.374 (1.408) data 0.000 (0.045) loss 1.0312 (1.1816) acc 59.3750 (67.8125) lr 1.9921e-03 eta 0:33:43 +epoch [4/50] batch [25/31] time 1.364 (1.399) data 0.000 (0.036) loss 1.1445 (1.2537) acc 65.6250 (66.7500) lr 1.9921e-03 eta 0:33:22 +epoch [4/50] batch [30/31] time 1.376 (1.393) data 0.000 (0.030) loss 1.1035 (1.2443) acc 71.8750 (67.7083) lr 1.9921e-03 eta 0:33:07 +epoch [5/50] batch [5/31] time 1.354 (1.570) data 0.000 (0.176) loss 0.8760 (1.0461) acc 78.1250 (75.0000) lr 1.9823e-03 eta 0:37:10 +epoch [5/50] batch [10/31] time 1.371 (1.467) data 0.000 (0.088) loss 1.2490 (1.1023) acc 65.6250 (73.4375) lr 1.9823e-03 eta 0:34:37 +epoch [5/50] batch [15/31] time 1.354 (1.431) data 0.000 (0.059) loss 1.3379 (1.1615) acc 71.8750 (72.0833) lr 1.9823e-03 eta 0:33:39 +epoch [5/50] batch [20/31] time 1.363 (1.414) data 0.000 (0.044) loss 0.7480 (1.1119) acc 87.5000 (72.5000) lr 1.9823e-03 eta 0:33:07 +epoch [5/50] batch [25/31] time 1.377 (1.403) data 0.000 (0.036) loss 1.1807 (1.1258) acc 71.8750 (71.8750) lr 1.9823e-03 eta 0:32:45 +epoch [5/50] batch [30/31] time 1.373 (1.397) data 0.000 (0.030) loss 0.7158 (1.1297) acc 81.2500 (71.8750) lr 1.9823e-03 eta 0:32:30 +epoch [6/50] batch [5/31] time 1.362 (1.564) data 0.000 (0.190) loss 1.4688 (1.2365) acc 65.6250 (71.8750) lr 1.9686e-03 eta 0:36:13 +epoch [6/50] batch [10/31] time 1.363 (1.459) data 0.001 (0.095) loss 1.1426 (1.1428) acc 68.7500 (71.2500) lr 1.9686e-03 eta 0:33:40 +epoch [6/50] batch [15/31] time 1.360 (1.424) data 0.000 (0.064) loss 1.0586 (1.1300) acc 81.2500 (72.2917) lr 1.9686e-03 eta 0:32:44 +epoch [6/50] batch [20/31] time 1.488 (1.414) data 0.000 (0.048) loss 1.0439 (1.1024) acc 68.7500 (72.1875) lr 1.9686e-03 eta 0:32:24 +epoch [6/50] batch [25/31] time 1.373 (1.404) data 0.000 (0.038) loss 1.0908 (1.1245) acc 75.0000 (72.2500) lr 1.9686e-03 eta 0:32:03 +epoch [6/50] batch [30/31] time 1.349 (1.395) data 0.000 (0.032) loss 1.6514 (1.1931) acc 75.0000 (71.2500) lr 1.9686e-03 eta 0:31:44 +epoch [7/50] batch [5/31] time 1.364 (1.537) data 0.000 (0.180) loss 0.8696 (1.1160) acc 75.0000 (71.2500) lr 1.9511e-03 eta 0:34:49 +epoch [7/50] batch [10/31] time 1.374 (1.455) data 0.000 (0.090) loss 0.2893 (0.9391) acc 90.6250 (74.3750) lr 1.9511e-03 eta 0:32:49 +epoch [7/50] batch [15/31] time 1.361 (1.421) data 0.001 (0.060) loss 1.1846 (1.0136) acc 65.6250 (73.9583) lr 1.9511e-03 eta 0:31:57 +epoch [7/50] batch [20/31] time 1.360 (1.406) data 0.000 (0.045) loss 0.6582 (1.0456) acc 81.2500 (73.1250) lr 1.9511e-03 eta 0:31:30 +epoch [7/50] batch [25/31] time 1.350 (1.396) data 0.000 (0.036) loss 1.1006 (1.0727) acc 81.2500 (73.0000) lr 1.9511e-03 eta 0:31:09 +epoch [7/50] batch [30/31] time 1.341 (1.390) data 0.000 (0.030) loss 1.6680 (1.1117) acc 68.7500 (72.1875) lr 1.9511e-03 eta 0:30:54 +epoch [8/50] batch [5/31] time 1.383 (1.547) data 0.000 (0.175) loss 0.8936 (1.2207) acc 75.0000 (68.7500) lr 1.9298e-03 eta 0:34:14 +epoch [8/50] batch [10/31] time 1.360 (1.455) data 0.000 (0.088) loss 1.4834 (1.1402) acc 65.6250 (70.9375) lr 1.9298e-03 eta 0:32:05 +epoch [8/50] batch [15/31] time 1.363 (1.426) data 0.000 (0.059) loss 1.2197 (1.0936) acc 68.7500 (71.8750) lr 1.9298e-03 eta 0:31:19 +epoch [8/50] batch [20/31] time 1.390 (1.413) data 0.000 (0.044) loss 0.8135 (1.0998) acc 75.0000 (72.1875) lr 1.9298e-03 eta 0:30:54 +epoch [8/50] batch [25/31] time 1.375 (1.405) data 0.000 (0.035) loss 0.7212 (1.1034) acc 87.5000 (72.2500) lr 1.9298e-03 eta 0:30:37 +epoch [8/50] batch [30/31] time 1.367 (1.397) data 0.000 (0.030) loss 1.2295 (1.1330) acc 65.6250 (71.6667) lr 1.9298e-03 eta 0:30:19 +epoch [9/50] batch [5/31] time 1.361 (1.554) data 0.001 (0.179) loss 1.8262 (1.5525) acc 62.5000 (70.0000) lr 1.9048e-03 eta 0:33:34 +epoch [9/50] batch [10/31] time 1.372 (1.463) data 0.000 (0.090) loss 0.8940 (1.3555) acc 81.2500 (70.3125) lr 1.9048e-03 eta 0:31:29 +epoch [9/50] batch [15/31] time 1.359 (1.431) data 0.001 (0.060) loss 0.6133 (1.2098) acc 81.2500 (72.7083) lr 1.9048e-03 eta 0:30:41 +epoch [9/50] batch [20/31] time 1.354 (1.416) data 0.000 (0.045) loss 1.5742 (1.1588) acc 59.3750 (73.1250) lr 1.9048e-03 eta 0:30:14 +epoch [9/50] batch [25/31] time 1.349 (1.409) data 0.000 (0.036) loss 0.9971 (1.1587) acc 75.0000 (72.3750) lr 1.9048e-03 eta 0:29:58 +epoch [9/50] batch [30/31] time 1.352 (1.401) data 0.000 (0.030) loss 0.6372 (1.1231) acc 84.3750 (73.0208) lr 1.9048e-03 eta 0:29:41 +epoch [10/50] batch [5/31] time 1.353 (1.543) data 0.000 (0.175) loss 1.1992 (0.9582) acc 78.1250 (78.1250) lr 1.8763e-03 eta 0:32:34 +epoch [10/50] batch [10/31] time 1.343 (1.448) data 0.000 (0.088) loss 1.0674 (1.0365) acc 71.8750 (74.0625) lr 1.8763e-03 eta 0:30:26 +epoch [10/50] batch [15/31] time 1.353 (1.420) data 0.000 (0.059) loss 1.1221 (1.0673) acc 68.7500 (72.9167) lr 1.8763e-03 eta 0:29:43 +epoch [10/50] batch [20/31] time 1.375 (1.406) data 0.000 (0.044) loss 1.1514 (1.0994) acc 75.0000 (72.5000) lr 1.8763e-03 eta 0:29:19 +epoch [10/50] batch [25/31] time 1.344 (1.398) data 0.000 (0.035) loss 0.9282 (1.0593) acc 78.1250 (73.8750) lr 1.8763e-03 eta 0:29:01 +epoch [10/50] batch [30/31] time 1.359 (1.392) data 0.000 (0.029) loss 1.2744 (1.0554) acc 65.6250 (73.8542) lr 1.8763e-03 eta 0:28:47 +epoch [11/50] batch [5/31] time 1.354 (1.582) data 0.000 (0.185) loss 1.1963 (1.1809) acc 68.7500 (70.6250) lr 1.8443e-03 eta 0:32:34 +epoch [11/50] batch [10/31] time 1.355 (1.473) data 0.000 (0.093) loss 0.8555 (1.1093) acc 68.7500 (72.5000) lr 1.8443e-03 eta 0:30:12 +epoch [11/50] batch [15/31] time 1.372 (1.437) data 0.001 (0.062) loss 0.9307 (1.0801) acc 78.1250 (73.1250) lr 1.8443e-03 eta 0:29:20 +epoch [11/50] batch [20/31] time 1.341 (1.418) data 0.000 (0.047) loss 0.7671 (1.0963) acc 81.2500 (72.8125) lr 1.8443e-03 eta 0:28:49 +epoch [11/50] batch [25/31] time 1.371 (1.406) data 0.000 (0.037) loss 1.0352 (1.0634) acc 71.8750 (73.2500) lr 1.8443e-03 eta 0:28:27 +epoch [11/50] batch [30/31] time 1.373 (1.399) data 0.000 (0.031) loss 2.1387 (1.1130) acc 62.5000 (73.3333) lr 1.8443e-03 eta 0:28:12 +epoch [12/50] batch [5/31] time 1.349 (1.552) data 0.000 (0.176) loss 0.7222 (0.9814) acc 84.3750 (79.3750) lr 1.8090e-03 eta 0:31:09 +epoch [12/50] batch [10/31] time 1.359 (1.456) data 0.000 (0.088) loss 1.3135 (1.1145) acc 75.0000 (74.0625) lr 1.8090e-03 eta 0:29:05 +epoch [12/50] batch [15/31] time 1.379 (1.435) data 0.000 (0.059) loss 0.6489 (1.1010) acc 78.1250 (73.9583) lr 1.8090e-03 eta 0:28:33 +epoch [12/50] batch [20/31] time 1.365 (1.416) data 0.000 (0.044) loss 1.2988 (1.1057) acc 65.6250 (73.4375) lr 1.8090e-03 eta 0:28:03 +epoch [12/50] batch [25/31] time 1.358 (1.406) data 0.000 (0.035) loss 1.2012 (1.0861) acc 71.8750 (73.8750) lr 1.8090e-03 eta 0:27:45 +epoch [12/50] batch [30/31] time 1.372 (1.399) data 0.000 (0.030) loss 0.4780 (1.0821) acc 87.5000 (74.0625) lr 1.8090e-03 eta 0:27:29 +epoch [13/50] batch [5/31] time 1.380 (1.550) data 0.000 (0.172) loss 0.4683 (0.7515) acc 84.3750 (80.6250) lr 1.7705e-03 eta 0:30:18 +epoch [13/50] batch [10/31] time 1.359 (1.456) data 0.000 (0.086) loss 0.5483 (0.9109) acc 84.3750 (77.5000) lr 1.7705e-03 eta 0:28:20 +epoch [13/50] batch [15/31] time 1.367 (1.428) data 0.000 (0.058) loss 0.7412 (0.9807) acc 87.5000 (76.2500) lr 1.7705e-03 eta 0:27:41 +epoch [13/50] batch [20/31] time 1.488 (1.418) data 0.000 (0.043) loss 1.2109 (1.0094) acc 68.7500 (75.3125) lr 1.7705e-03 eta 0:27:21 +epoch [13/50] batch [25/31] time 1.356 (1.407) data 0.000 (0.035) loss 0.3787 (1.0319) acc 93.7500 (75.3750) lr 1.7705e-03 eta 0:27:02 +epoch [13/50] batch [30/31] time 1.361 (1.401) data 0.000 (0.029) loss 0.8931 (1.0260) acc 78.1250 (75.1042) lr 1.7705e-03 eta 0:26:48 +epoch [14/50] batch [5/31] time 1.360 (1.556) data 0.000 (0.189) loss 0.7227 (0.7650) acc 81.2500 (79.3750) lr 1.7290e-03 eta 0:29:36 +epoch [14/50] batch [10/31] time 1.373 (1.462) data 0.000 (0.095) loss 1.1211 (0.7201) acc 71.8750 (82.5000) lr 1.7290e-03 eta 0:27:42 +epoch [14/50] batch [15/31] time 1.371 (1.429) data 0.000 (0.063) loss 1.5332 (0.8187) acc 68.7500 (80.6250) lr 1.7290e-03 eta 0:26:57 +epoch [14/50] batch [20/31] time 1.372 (1.413) data 0.000 (0.048) loss 0.5063 (0.8612) acc 87.5000 (79.0625) lr 1.7290e-03 eta 0:26:32 +epoch [14/50] batch [25/31] time 1.398 (1.406) data 0.000 (0.038) loss 0.8613 (0.8611) acc 78.1250 (78.8750) lr 1.7290e-03 eta 0:26:17 +epoch [14/50] batch [30/31] time 1.380 (1.399) data 0.000 (0.032) loss 1.1797 (0.9074) acc 75.0000 (77.3958) lr 1.7290e-03 eta 0:26:03 +epoch [15/50] batch [5/31] time 1.389 (1.535) data 0.000 (0.167) loss 0.9385 (0.8726) acc 81.2500 (82.5000) lr 1.6845e-03 eta 0:28:25 +epoch [15/50] batch [10/31] time 1.400 (1.454) data 0.000 (0.084) loss 1.4658 (1.0495) acc 75.0000 (79.0625) lr 1.6845e-03 eta 0:26:48 +epoch [15/50] batch [15/31] time 1.360 (1.424) data 0.000 (0.056) loss 0.9355 (1.0586) acc 75.0000 (78.3333) lr 1.6845e-03 eta 0:26:07 +epoch [15/50] batch [20/31] time 1.365 (1.410) data 0.000 (0.042) loss 1.5098 (1.0480) acc 59.3750 (76.5625) lr 1.6845e-03 eta 0:25:44 +epoch [15/50] batch [25/31] time 1.382 (1.403) data 0.000 (0.034) loss 1.2734 (1.0088) acc 65.6250 (76.3750) lr 1.6845e-03 eta 0:25:30 +epoch [15/50] batch [30/31] time 1.370 (1.397) data 0.000 (0.028) loss 0.8994 (1.0047) acc 78.1250 (76.3542) lr 1.6845e-03 eta 0:25:17 +epoch [16/50] batch [5/31] time 1.364 (1.553) data 0.000 (0.184) loss 1.3789 (1.2171) acc 68.7500 (76.2500) lr 1.6374e-03 eta 0:27:57 +epoch [16/50] batch [10/31] time 1.371 (1.464) data 0.000 (0.092) loss 0.6958 (0.9961) acc 87.5000 (78.7500) lr 1.6374e-03 eta 0:26:14 +epoch [16/50] batch [15/31] time 1.360 (1.444) data 0.001 (0.062) loss 0.5381 (0.9211) acc 84.3750 (77.9167) lr 1.6374e-03 eta 0:25:44 +epoch [16/50] batch [20/31] time 1.339 (1.424) data 0.000 (0.046) loss 1.0293 (0.9518) acc 78.1250 (76.7188) lr 1.6374e-03 eta 0:25:16 +epoch [16/50] batch [25/31] time 1.373 (1.411) data 0.000 (0.037) loss 1.4551 (0.9493) acc 65.6250 (77.1250) lr 1.6374e-03 eta 0:24:55 +epoch [16/50] batch [30/31] time 1.352 (1.403) data 0.000 (0.031) loss 0.8003 (0.9575) acc 78.1250 (77.2917) lr 1.6374e-03 eta 0:24:40 +epoch [17/50] batch [5/31] time 1.367 (1.566) data 0.000 (0.197) loss 0.8853 (0.9742) acc 84.3750 (78.7500) lr 1.5878e-03 eta 0:27:22 +epoch [17/50] batch [10/31] time 1.351 (1.463) data 0.000 (0.099) loss 0.5142 (0.8353) acc 90.6250 (81.5625) lr 1.5878e-03 eta 0:25:26 +epoch [17/50] batch [15/31] time 1.380 (1.432) data 0.000 (0.066) loss 1.2100 (0.9032) acc 68.7500 (78.5417) lr 1.5878e-03 eta 0:24:47 +epoch [17/50] batch [20/31] time 1.370 (1.413) data 0.000 (0.050) loss 1.2129 (0.9579) acc 78.1250 (76.8750) lr 1.5878e-03 eta 0:24:21 +epoch [17/50] batch [25/31] time 1.354 (1.401) data 0.000 (0.040) loss 1.2207 (1.0020) acc 71.8750 (76.3750) lr 1.5878e-03 eta 0:24:01 +epoch [17/50] batch [30/31] time 1.362 (1.395) data 0.000 (0.033) loss 1.1748 (1.0117) acc 78.1250 (76.8750) lr 1.5878e-03 eta 0:23:48 +epoch [18/50] batch [5/31] time 1.355 (1.544) data 0.000 (0.177) loss 0.6372 (0.9460) acc 84.3750 (79.3750) lr 1.5358e-03 eta 0:26:11 +epoch [18/50] batch [10/31] time 1.342 (1.446) data 0.000 (0.089) loss 0.8662 (0.9426) acc 84.3750 (78.7500) lr 1.5358e-03 eta 0:24:25 +epoch [18/50] batch [15/31] time 1.353 (1.415) data 0.000 (0.059) loss 0.8633 (1.0043) acc 71.8750 (77.7083) lr 1.5358e-03 eta 0:23:46 +epoch [18/50] batch [20/31] time 1.364 (1.402) data 0.000 (0.044) loss 0.9492 (0.9782) acc 81.2500 (77.8125) lr 1.5358e-03 eta 0:23:26 +epoch [18/50] batch [25/31] time 1.370 (1.393) data 0.000 (0.036) loss 1.0264 (0.9946) acc 71.8750 (77.3750) lr 1.5358e-03 eta 0:23:10 +epoch [18/50] batch [30/31] time 1.374 (1.388) data 0.000 (0.030) loss 1.2178 (1.0288) acc 78.1250 (77.2917) lr 1.5358e-03 eta 0:22:57 +epoch [19/50] batch [5/31] time 1.367 (1.544) data 0.000 (0.181) loss 0.5332 (0.7462) acc 84.3750 (81.8750) lr 1.4818e-03 eta 0:25:24 +epoch [19/50] batch [10/31] time 1.380 (1.455) data 0.000 (0.091) loss 0.4927 (0.7446) acc 90.6250 (83.7500) lr 1.4818e-03 eta 0:23:48 +epoch [19/50] batch [15/31] time 1.366 (1.425) data 0.000 (0.061) loss 0.9512 (0.8083) acc 81.2500 (82.0833) lr 1.4818e-03 eta 0:23:12 +epoch [19/50] batch [20/31] time 1.368 (1.410) data 0.000 (0.046) loss 1.1025 (0.8643) acc 68.7500 (80.3125) lr 1.4818e-03 eta 0:22:50 +epoch [19/50] batch [25/31] time 1.377 (1.401) data 0.000 (0.036) loss 1.1289 (0.8601) acc 75.0000 (80.5000) lr 1.4818e-03 eta 0:22:34 +epoch [19/50] batch [30/31] time 1.360 (1.393) data 0.000 (0.030) loss 1.5068 (0.8909) acc 65.6250 (79.6875) lr 1.4818e-03 eta 0:22:19 +epoch [20/50] batch [5/31] time 1.348 (1.545) data 0.000 (0.183) loss 0.8291 (0.7737) acc 78.1250 (82.5000) lr 1.4258e-03 eta 0:24:36 +epoch [20/50] batch [10/31] time 1.364 (1.452) data 0.000 (0.092) loss 0.6997 (0.7899) acc 87.5000 (82.8125) lr 1.4258e-03 eta 0:23:01 +epoch [20/50] batch [15/31] time 1.369 (1.422) data 0.000 (0.061) loss 0.7983 (0.8037) acc 84.3750 (82.0833) lr 1.4258e-03 eta 0:22:25 +epoch [20/50] batch [20/31] time 1.350 (1.403) data 0.000 (0.046) loss 0.9424 (0.7940) acc 81.2500 (80.9375) lr 1.4258e-03 eta 0:22:00 +epoch [20/50] batch [25/31] time 1.364 (1.394) data 0.000 (0.037) loss 1.2256 (0.8161) acc 75.0000 (79.8750) lr 1.4258e-03 eta 0:21:45 +epoch [20/50] batch [30/31] time 1.363 (1.389) data 0.000 (0.031) loss 1.4102 (0.8352) acc 65.6250 (79.5833) lr 1.4258e-03 eta 0:21:33 +epoch [21/50] batch [5/31] time 1.356 (1.563) data 0.000 (0.204) loss 0.7344 (0.9291) acc 84.3750 (82.5000) lr 1.3681e-03 eta 0:24:06 +epoch [21/50] batch [10/31] time 1.373 (1.465) data 0.000 (0.102) loss 1.3613 (0.9051) acc 68.7500 (80.3125) lr 1.3681e-03 eta 0:22:27 +epoch [21/50] batch [15/31] time 1.364 (1.433) data 0.001 (0.068) loss 0.9033 (0.9716) acc 71.8750 (78.1250) lr 1.3681e-03 eta 0:21:50 +epoch [21/50] batch [20/31] time 1.371 (1.415) data 0.000 (0.051) loss 1.1006 (0.9459) acc 81.2500 (78.7500) lr 1.3681e-03 eta 0:21:27 +epoch [21/50] batch [25/31] time 1.370 (1.406) data 0.000 (0.041) loss 0.9390 (0.9330) acc 71.8750 (78.0000) lr 1.3681e-03 eta 0:21:12 +epoch [21/50] batch [30/31] time 1.375 (1.400) data 0.000 (0.034) loss 0.7534 (0.9385) acc 81.2500 (78.3333) lr 1.3681e-03 eta 0:21:00 +epoch [22/50] batch [5/31] time 1.370 (1.584) data 0.000 (0.172) loss 0.8057 (0.8736) acc 78.1250 (78.1250) lr 1.3090e-03 eta 0:23:35 +epoch [22/50] batch [10/31] time 1.364 (1.473) data 0.000 (0.086) loss 0.3984 (0.7127) acc 87.5000 (80.6250) lr 1.3090e-03 eta 0:21:49 +epoch [22/50] batch [15/31] time 1.361 (1.437) data 0.000 (0.058) loss 0.6348 (0.7132) acc 87.5000 (81.0417) lr 1.3090e-03 eta 0:21:10 +epoch [22/50] batch [20/31] time 1.361 (1.418) data 0.000 (0.043) loss 0.8789 (0.7317) acc 78.1250 (80.9375) lr 1.3090e-03 eta 0:20:46 +epoch [22/50] batch [25/31] time 1.366 (1.407) data 0.000 (0.035) loss 1.2910 (0.7760) acc 71.8750 (80.5000) lr 1.3090e-03 eta 0:20:29 +epoch [22/50] batch [30/31] time 1.352 (1.398) data 0.000 (0.029) loss 0.8911 (0.7979) acc 84.3750 (80.4167) lr 1.3090e-03 eta 0:20:14 +epoch [23/50] batch [5/31] time 1.386 (1.520) data 0.000 (0.157) loss 0.6367 (0.9304) acc 84.3750 (80.6250) lr 1.2487e-03 eta 0:21:51 +epoch [23/50] batch [10/31] time 1.371 (1.440) data 0.000 (0.079) loss 0.6206 (0.8460) acc 81.2500 (81.8750) lr 1.2487e-03 eta 0:20:35 +epoch [23/50] batch [15/31] time 1.382 (1.425) data 0.000 (0.053) loss 0.4351 (0.7469) acc 87.5000 (83.3333) lr 1.2487e-03 eta 0:20:15 +epoch [23/50] batch [20/31] time 1.355 (1.410) data 0.000 (0.040) loss 0.7832 (0.7764) acc 78.1250 (81.7188) lr 1.2487e-03 eta 0:19:55 +epoch [23/50] batch [25/31] time 1.336 (1.400) data 0.000 (0.032) loss 0.8726 (0.7915) acc 78.1250 (81.1250) lr 1.2487e-03 eta 0:19:40 +epoch [23/50] batch [30/31] time 1.380 (1.396) data 0.000 (0.026) loss 0.8940 (0.8372) acc 78.1250 (80.2083) lr 1.2487e-03 eta 0:19:29 +epoch [24/50] batch [5/31] time 1.349 (1.530) data 0.000 (0.164) loss 0.8105 (0.5885) acc 75.0000 (83.7500) lr 1.1874e-03 eta 0:21:13 +epoch [24/50] batch [10/31] time 1.366 (1.443) data 0.000 (0.082) loss 0.5649 (0.7113) acc 87.5000 (83.7500) lr 1.1874e-03 eta 0:19:53 +epoch [24/50] batch [15/31] time 1.371 (1.417) data 0.000 (0.055) loss 1.3018 (0.7355) acc 68.7500 (82.2917) lr 1.1874e-03 eta 0:19:24 +epoch [24/50] batch [20/31] time 1.486 (1.411) data 0.000 (0.041) loss 0.8604 (0.8207) acc 78.1250 (79.8438) lr 1.1874e-03 eta 0:19:12 +epoch [24/50] batch [25/31] time 1.363 (1.403) data 0.000 (0.033) loss 0.7803 (0.7747) acc 81.2500 (80.3750) lr 1.1874e-03 eta 0:18:58 +epoch [24/50] batch [30/31] time 1.352 (1.395) data 0.000 (0.028) loss 1.0264 (0.8143) acc 75.0000 (79.7917) lr 1.1874e-03 eta 0:18:45 +epoch [25/50] batch [5/31] time 1.368 (1.534) data 0.000 (0.165) loss 1.4531 (0.9454) acc 68.7500 (76.8750) lr 1.1253e-03 eta 0:20:28 +epoch [25/50] batch [10/31] time 1.347 (1.448) data 0.000 (0.083) loss 0.3738 (0.9524) acc 90.6250 (77.8125) lr 1.1253e-03 eta 0:19:12 +epoch [25/50] batch [15/31] time 1.348 (1.424) data 0.000 (0.055) loss 1.3818 (0.9931) acc 78.1250 (76.2500) lr 1.1253e-03 eta 0:18:46 +epoch [25/50] batch [20/31] time 1.373 (1.408) data 0.000 (0.042) loss 0.8511 (0.9481) acc 84.3750 (76.8750) lr 1.1253e-03 eta 0:18:26 +epoch [25/50] batch [25/31] time 1.355 (1.400) data 0.000 (0.033) loss 0.7529 (0.9019) acc 78.1250 (78.1250) lr 1.1253e-03 eta 0:18:13 +epoch [25/50] batch [30/31] time 1.362 (1.392) data 0.000 (0.028) loss 0.5581 (0.8817) acc 87.5000 (78.9583) lr 1.1253e-03 eta 0:18:00 +epoch [26/50] batch [5/31] time 1.367 (1.529) data 0.000 (0.161) loss 0.9248 (0.8644) acc 71.8750 (79.3750) lr 1.0628e-03 eta 0:19:37 +epoch [26/50] batch [10/31] time 1.351 (1.444) data 0.000 (0.081) loss 0.8379 (0.8039) acc 81.2500 (81.2500) lr 1.0628e-03 eta 0:18:24 +epoch [26/50] batch [15/31] time 1.355 (1.418) data 0.000 (0.054) loss 0.4382 (0.7557) acc 84.3750 (80.4167) lr 1.0628e-03 eta 0:17:57 +epoch [26/50] batch [20/31] time 1.370 (1.404) data 0.000 (0.041) loss 0.6362 (0.7535) acc 84.3750 (80.6250) lr 1.0628e-03 eta 0:17:40 +epoch [26/50] batch [25/31] time 1.370 (1.398) data 0.000 (0.033) loss 1.2988 (0.7812) acc 78.1250 (81.0000) lr 1.0628e-03 eta 0:17:28 +epoch [26/50] batch [30/31] time 1.349 (1.392) data 0.000 (0.027) loss 0.4097 (0.7494) acc 87.5000 (82.5000) lr 1.0628e-03 eta 0:17:16 +epoch [27/50] batch [5/31] time 1.375 (1.542) data 0.000 (0.164) loss 0.5742 (0.7003) acc 84.3750 (85.6250) lr 1.0000e-03 eta 0:18:59 +epoch [27/50] batch [10/31] time 1.383 (1.456) data 0.000 (0.082) loss 0.4355 (0.6444) acc 87.5000 (85.6250) lr 1.0000e-03 eta 0:17:48 +epoch [27/50] batch [15/31] time 1.381 (1.436) data 0.000 (0.055) loss 1.0342 (0.7106) acc 71.8750 (84.1667) lr 1.0000e-03 eta 0:17:26 +epoch [27/50] batch [20/31] time 1.347 (1.416) data 0.000 (0.041) loss 0.8208 (0.7064) acc 81.2500 (83.7500) lr 1.0000e-03 eta 0:17:05 +epoch [27/50] batch [25/31] time 1.340 (1.403) data 0.000 (0.033) loss 0.8120 (0.7107) acc 75.0000 (83.1250) lr 1.0000e-03 eta 0:16:48 +epoch [27/50] batch [30/31] time 1.355 (1.395) data 0.000 (0.028) loss 0.9478 (0.7113) acc 84.3750 (83.4375) lr 1.0000e-03 eta 0:16:35 +epoch [28/50] batch [5/31] time 1.372 (1.547) data 0.000 (0.176) loss 0.6484 (0.6508) acc 78.1250 (81.8750) lr 9.3721e-04 eta 0:18:15 +epoch [28/50] batch [10/31] time 1.362 (1.456) data 0.000 (0.088) loss 0.8818 (0.7413) acc 75.0000 (80.6250) lr 9.3721e-04 eta 0:17:03 +epoch [28/50] batch [15/31] time 1.376 (1.425) data 0.000 (0.059) loss 0.4780 (0.7417) acc 90.6250 (81.6667) lr 9.3721e-04 eta 0:16:34 +epoch [28/50] batch [20/31] time 1.353 (1.409) data 0.000 (0.044) loss 1.1035 (0.8083) acc 78.1250 (81.0938) lr 9.3721e-04 eta 0:16:16 +epoch [28/50] batch [25/31] time 1.362 (1.399) data 0.000 (0.035) loss 0.6855 (0.7882) acc 87.5000 (81.1250) lr 9.3721e-04 eta 0:16:02 +epoch [28/50] batch [30/31] time 1.379 (1.394) data 0.000 (0.030) loss 0.7729 (0.7868) acc 75.0000 (81.0417) lr 9.3721e-04 eta 0:15:52 +epoch [29/50] batch [5/31] time 1.367 (1.547) data 0.000 (0.178) loss 0.5488 (0.8607) acc 87.5000 (80.0000) lr 8.7467e-04 eta 0:17:27 +epoch [29/50] batch [10/31] time 1.370 (1.455) data 0.000 (0.089) loss 1.4629 (0.8281) acc 71.8750 (81.8750) lr 8.7467e-04 eta 0:16:18 +epoch [29/50] batch [15/31] time 1.364 (1.425) data 0.000 (0.059) loss 0.8525 (0.8508) acc 87.5000 (81.6667) lr 8.7467e-04 eta 0:15:50 +epoch [29/50] batch [20/31] time 1.349 (1.410) data 0.000 (0.045) loss 0.7134 (0.8552) acc 78.1250 (80.3125) lr 8.7467e-04 eta 0:15:33 +epoch [29/50] batch [25/31] time 1.371 (1.401) data 0.000 (0.036) loss 0.4089 (0.8650) acc 93.7500 (79.8750) lr 8.7467e-04 eta 0:15:20 +epoch [29/50] batch [30/31] time 1.351 (1.395) data 0.000 (0.030) loss 0.2087 (0.8269) acc 96.8750 (80.5208) lr 8.7467e-04 eta 0:15:09 +epoch [30/50] batch [5/31] time 1.349 (1.538) data 0.000 (0.170) loss 0.6987 (0.6703) acc 75.0000 (82.5000) lr 8.1262e-04 eta 0:16:33 +epoch [30/50] batch [10/31] time 1.358 (1.447) data 0.000 (0.085) loss 0.7964 (0.7357) acc 78.1250 (82.5000) lr 8.1262e-04 eta 0:15:27 +epoch [30/50] batch [15/31] time 1.354 (1.418) data 0.000 (0.057) loss 0.6274 (0.7095) acc 81.2500 (82.9167) lr 8.1262e-04 eta 0:15:01 +epoch [30/50] batch [20/31] time 1.369 (1.405) data 0.000 (0.043) loss 0.8921 (0.7403) acc 78.1250 (82.9688) lr 8.1262e-04 eta 0:14:46 +epoch [30/50] batch [25/31] time 1.347 (1.395) data 0.000 (0.034) loss 1.0879 (0.7754) acc 65.6250 (81.8750) lr 8.1262e-04 eta 0:14:32 +epoch [30/50] batch [30/31] time 1.358 (1.388) data 0.000 (0.029) loss 0.5986 (0.7686) acc 90.6250 (82.1875) lr 8.1262e-04 eta 0:14:22 +epoch [31/50] batch [5/31] time 1.351 (1.523) data 0.000 (0.158) loss 0.7334 (0.6019) acc 84.3750 (85.6250) lr 7.5131e-04 eta 0:15:36 +epoch [31/50] batch [10/31] time 1.364 (1.441) data 0.000 (0.079) loss 0.5107 (0.7018) acc 90.6250 (84.3750) lr 7.5131e-04 eta 0:14:39 +epoch [31/50] batch [15/31] time 1.369 (1.418) data 0.000 (0.053) loss 0.3875 (0.7172) acc 87.5000 (83.7500) lr 7.5131e-04 eta 0:14:17 +epoch [31/50] batch [20/31] time 1.351 (1.403) data 0.000 (0.040) loss 1.2119 (0.7237) acc 75.0000 (83.7500) lr 7.5131e-04 eta 0:14:01 +epoch [31/50] batch [25/31] time 1.359 (1.395) data 0.000 (0.032) loss 0.6621 (0.7070) acc 84.3750 (83.8750) lr 7.5131e-04 eta 0:13:50 +epoch [31/50] batch [30/31] time 1.365 (1.389) data 0.000 (0.027) loss 0.8442 (0.7184) acc 75.0000 (83.2292) lr 7.5131e-04 eta 0:13:39 +epoch [32/50] batch [5/31] time 1.365 (1.544) data 0.000 (0.175) loss 0.5547 (0.7110) acc 84.3750 (82.5000) lr 6.9098e-04 eta 0:15:01 +epoch [32/50] batch [10/31] time 1.360 (1.451) data 0.000 (0.087) loss 0.4326 (0.6441) acc 87.5000 (85.0000) lr 6.9098e-04 eta 0:14:00 +epoch [32/50] batch [15/31] time 1.340 (1.419) data 0.000 (0.058) loss 0.5171 (0.6277) acc 84.3750 (86.2500) lr 6.9098e-04 eta 0:13:34 +epoch [32/50] batch [20/31] time 1.361 (1.404) data 0.000 (0.044) loss 0.7271 (0.6657) acc 87.5000 (85.1562) lr 6.9098e-04 eta 0:13:18 +epoch [32/50] batch [25/31] time 1.342 (1.396) data 0.000 (0.035) loss 0.7246 (0.6816) acc 84.3750 (84.0000) lr 6.9098e-04 eta 0:13:07 +epoch [32/50] batch [30/31] time 1.373 (1.390) data 0.000 (0.029) loss 1.8223 (0.7671) acc 68.7500 (82.9167) lr 6.9098e-04 eta 0:12:57 +epoch [33/50] batch [5/31] time 1.356 (1.550) data 0.000 (0.151) loss 0.7568 (0.6984) acc 71.8750 (79.3750) lr 6.3188e-04 eta 0:14:17 +epoch [33/50] batch [10/31] time 1.390 (1.462) data 0.000 (0.076) loss 0.8638 (0.6344) acc 75.0000 (81.2500) lr 6.3188e-04 eta 0:13:21 +epoch [33/50] batch [15/31] time 1.366 (1.431) data 0.000 (0.051) loss 1.0781 (0.6829) acc 78.1250 (80.6250) lr 6.3188e-04 eta 0:12:57 +epoch [33/50] batch [20/31] time 1.381 (1.412) data 0.000 (0.038) loss 0.7764 (0.6729) acc 87.5000 (81.8750) lr 6.3188e-04 eta 0:12:39 +epoch [33/50] batch [25/31] time 1.375 (1.404) data 0.000 (0.031) loss 1.5029 (0.7137) acc 75.0000 (81.7500) lr 6.3188e-04 eta 0:12:28 +epoch [33/50] batch [30/31] time 1.362 (1.396) data 0.000 (0.026) loss 1.0342 (0.7179) acc 75.0000 (82.1875) lr 6.3188e-04 eta 0:12:17 +epoch [34/50] batch [5/31] time 1.376 (1.530) data 0.001 (0.166) loss 0.6533 (0.8899) acc 87.5000 (82.5000) lr 5.7422e-04 eta 0:13:18 +epoch [34/50] batch [10/31] time 1.341 (1.442) data 0.001 (0.083) loss 0.8364 (0.9657) acc 81.2500 (80.6250) lr 5.7422e-04 eta 0:12:25 +epoch [34/50] batch [15/31] time 1.347 (1.423) data 0.000 (0.056) loss 0.6606 (0.8641) acc 87.5000 (81.2500) lr 5.7422e-04 eta 0:12:08 +epoch [34/50] batch [20/31] time 1.375 (1.408) data 0.001 (0.042) loss 0.4944 (0.8094) acc 87.5000 (82.0312) lr 5.7422e-04 eta 0:11:53 +epoch [34/50] batch [25/31] time 1.364 (1.400) data 0.000 (0.033) loss 0.3835 (0.7860) acc 87.5000 (82.1250) lr 5.7422e-04 eta 0:11:42 +epoch [34/50] batch [30/31] time 1.381 (1.394) data 0.000 (0.028) loss 0.3323 (0.7922) acc 93.7500 (81.8750) lr 5.7422e-04 eta 0:11:32 +epoch [35/50] batch [5/31] time 1.362 (1.546) data 0.001 (0.178) loss 0.7188 (0.7843) acc 84.3750 (83.1250) lr 5.1825e-04 eta 0:12:39 +epoch [35/50] batch [10/31] time 1.357 (1.455) data 0.000 (0.089) loss 0.5786 (0.8573) acc 87.5000 (82.1875) lr 5.1825e-04 eta 0:11:47 +epoch [35/50] batch [15/31] time 1.375 (1.426) data 0.000 (0.060) loss 0.4961 (0.8034) acc 87.5000 (81.8750) lr 5.1825e-04 eta 0:11:25 +epoch [35/50] batch [20/31] time 1.507 (1.414) data 0.000 (0.045) loss 0.3538 (0.7639) acc 90.6250 (82.0312) lr 5.1825e-04 eta 0:11:13 +epoch [35/50] batch [25/31] time 1.381 (1.406) data 0.000 (0.036) loss 0.7935 (0.7921) acc 75.0000 (81.6250) lr 5.1825e-04 eta 0:11:02 +epoch [35/50] batch [30/31] time 1.358 (1.397) data 0.000 (0.030) loss 0.6265 (0.7452) acc 87.5000 (82.8125) lr 5.1825e-04 eta 0:10:51 +epoch [36/50] batch [5/31] time 1.372 (1.538) data 0.000 (0.170) loss 0.7769 (0.5996) acc 78.1250 (83.1250) lr 4.6417e-04 eta 0:11:47 +epoch [36/50] batch [10/31] time 1.373 (1.455) data 0.000 (0.085) loss 0.7432 (0.5945) acc 84.3750 (85.6250) lr 4.6417e-04 eta 0:11:02 +epoch [36/50] batch [15/31] time 1.345 (1.422) data 0.000 (0.057) loss 0.4368 (0.5892) acc 87.5000 (85.0000) lr 4.6417e-04 eta 0:10:39 +epoch [36/50] batch [20/31] time 1.376 (1.407) data 0.000 (0.043) loss 0.7031 (0.6033) acc 78.1250 (85.1562) lr 4.6417e-04 eta 0:10:26 +epoch [36/50] batch [25/31] time 1.371 (1.399) data 0.000 (0.034) loss 0.3713 (0.6034) acc 93.7500 (85.5000) lr 4.6417e-04 eta 0:10:15 +epoch [36/50] batch [30/31] time 1.365 (1.394) data 0.000 (0.029) loss 0.9082 (0.6258) acc 81.2500 (85.3125) lr 4.6417e-04 eta 0:10:06 +epoch [37/50] batch [5/31] time 1.373 (1.523) data 0.000 (0.157) loss 0.7734 (0.8388) acc 78.1250 (81.8750) lr 4.1221e-04 eta 0:10:53 +epoch [37/50] batch [10/31] time 1.356 (1.443) data 0.000 (0.079) loss 0.6279 (0.7868) acc 78.1250 (80.6250) lr 4.1221e-04 eta 0:10:11 +epoch [37/50] batch [15/31] time 1.358 (1.416) data 0.000 (0.053) loss 0.7383 (0.7390) acc 84.3750 (82.5000) lr 4.1221e-04 eta 0:09:53 +epoch [37/50] batch [20/31] time 1.363 (1.404) data 0.000 (0.040) loss 0.7188 (0.7381) acc 71.8750 (82.1875) lr 4.1221e-04 eta 0:09:41 +epoch [37/50] batch [25/31] time 1.373 (1.397) data 0.000 (0.032) loss 0.6240 (0.7329) acc 81.2500 (81.7500) lr 4.1221e-04 eta 0:09:31 +epoch [37/50] batch [30/31] time 1.351 (1.392) data 0.000 (0.026) loss 0.7437 (0.7424) acc 81.2500 (81.8750) lr 4.1221e-04 eta 0:09:22 +epoch [38/50] batch [5/31] time 1.374 (1.538) data 0.000 (0.166) loss 0.5059 (0.5985) acc 84.3750 (87.5000) lr 3.6258e-04 eta 0:10:12 +epoch [38/50] batch [10/31] time 1.381 (1.454) data 0.000 (0.083) loss 0.4006 (0.6747) acc 96.8750 (85.3125) lr 3.6258e-04 eta 0:09:31 +epoch [38/50] batch [15/31] time 1.374 (1.434) data 0.000 (0.056) loss 0.5933 (0.6061) acc 87.5000 (86.4583) lr 3.6258e-04 eta 0:09:16 +epoch [38/50] batch [20/31] time 1.390 (1.417) data 0.000 (0.042) loss 0.2998 (0.6390) acc 90.6250 (85.7812) lr 3.6258e-04 eta 0:09:02 +epoch [38/50] batch [25/31] time 1.372 (1.407) data 0.000 (0.034) loss 0.9204 (0.6555) acc 81.2500 (84.7500) lr 3.6258e-04 eta 0:08:51 +epoch [38/50] batch [30/31] time 1.365 (1.400) data 0.000 (0.028) loss 0.6484 (0.6298) acc 87.5000 (85.2083) lr 3.6258e-04 eta 0:08:42 +epoch [39/50] batch [5/31] time 1.363 (1.562) data 0.000 (0.199) loss 0.6216 (0.7093) acc 84.3750 (81.8750) lr 3.1545e-04 eta 0:09:33 +epoch [39/50] batch [10/31] time 1.359 (1.459) data 0.000 (0.100) loss 0.3289 (0.6390) acc 87.5000 (82.8125) lr 3.1545e-04 eta 0:08:48 +epoch [39/50] batch [15/31] time 1.364 (1.425) data 0.000 (0.067) loss 0.3794 (0.6206) acc 93.7500 (84.3750) lr 3.1545e-04 eta 0:08:28 +epoch [39/50] batch [20/31] time 1.344 (1.409) data 0.000 (0.050) loss 0.8828 (0.6505) acc 78.1250 (83.5938) lr 3.1545e-04 eta 0:08:16 +epoch [39/50] batch [25/31] time 1.361 (1.400) data 0.000 (0.040) loss 0.6206 (0.6392) acc 87.5000 (83.6250) lr 3.1545e-04 eta 0:08:05 +epoch [39/50] batch [30/31] time 1.367 (1.394) data 0.000 (0.033) loss 0.3845 (0.6519) acc 84.3750 (83.7500) lr 3.1545e-04 eta 0:07:56 +epoch [40/50] batch [5/31] time 1.361 (1.555) data 0.000 (0.189) loss 0.3818 (0.7612) acc 93.7500 (85.0000) lr 2.7103e-04 eta 0:08:42 +epoch [40/50] batch [10/31] time 1.374 (1.464) data 0.000 (0.095) loss 0.8569 (0.6901) acc 75.0000 (85.0000) lr 2.7103e-04 eta 0:08:04 +epoch [40/50] batch [15/31] time 1.371 (1.432) data 0.000 (0.063) loss 0.4783 (0.6936) acc 87.5000 (84.1667) lr 2.7103e-04 eta 0:07:46 +epoch [40/50] batch [20/31] time 1.361 (1.416) data 0.000 (0.048) loss 0.5166 (0.7121) acc 84.3750 (84.3750) lr 2.7103e-04 eta 0:07:34 +epoch [40/50] batch [25/31] time 1.360 (1.404) data 0.000 (0.038) loss 1.1533 (0.7025) acc 81.2500 (84.2500) lr 2.7103e-04 eta 0:07:23 +epoch [40/50] batch [30/31] time 1.386 (1.399) data 0.000 (0.032) loss 0.8770 (0.6815) acc 81.2500 (84.6875) lr 2.7103e-04 eta 0:07:15 +epoch [41/50] batch [5/31] time 1.379 (1.552) data 0.001 (0.174) loss 0.5151 (0.6281) acc 84.3750 (85.0000) lr 2.2949e-04 eta 0:07:53 +epoch [41/50] batch [10/31] time 1.381 (1.458) data 0.000 (0.087) loss 0.8999 (0.6392) acc 78.1250 (83.4375) lr 2.2949e-04 eta 0:07:17 +epoch [41/50] batch [15/31] time 1.348 (1.426) data 0.000 (0.058) loss 0.7646 (0.6542) acc 75.0000 (82.7083) lr 2.2949e-04 eta 0:07:00 +epoch [41/50] batch [20/31] time 1.384 (1.409) data 0.000 (0.044) loss 0.6372 (0.6645) acc 84.3750 (83.2812) lr 2.2949e-04 eta 0:06:48 +epoch [41/50] batch [25/31] time 1.356 (1.400) data 0.000 (0.035) loss 0.7119 (0.6842) acc 90.6250 (83.3750) lr 2.2949e-04 eta 0:06:38 +epoch [41/50] batch [30/31] time 1.353 (1.393) data 0.000 (0.029) loss 0.7344 (0.6997) acc 84.3750 (83.6458) lr 2.2949e-04 eta 0:06:29 +epoch [42/50] batch [5/31] time 1.338 (1.536) data 0.001 (0.167) loss 0.4834 (0.6315) acc 87.5000 (85.0000) lr 1.9098e-04 eta 0:07:00 +epoch [42/50] batch [10/31] time 1.347 (1.448) data 0.000 (0.084) loss 0.7905 (0.6166) acc 87.5000 (85.6250) lr 1.9098e-04 eta 0:06:29 +epoch [42/50] batch [15/31] time 1.349 (1.420) data 0.000 (0.056) loss 0.9141 (0.6548) acc 87.5000 (84.7917) lr 1.9098e-04 eta 0:06:14 +epoch [42/50] batch [20/31] time 1.360 (1.407) data 0.000 (0.042) loss 1.0020 (0.7151) acc 81.2500 (84.5312) lr 1.9098e-04 eta 0:06:04 +epoch [42/50] batch [25/31] time 1.337 (1.396) data 0.000 (0.034) loss 0.4656 (0.6904) acc 87.5000 (84.8750) lr 1.9098e-04 eta 0:05:54 +epoch [42/50] batch [30/31] time 1.352 (1.390) data 0.000 (0.028) loss 0.7227 (0.7144) acc 84.3750 (84.3750) lr 1.9098e-04 eta 0:05:46 +epoch [43/50] batch [5/31] time 1.358 (1.532) data 0.000 (0.165) loss 0.8823 (0.6604) acc 81.2500 (83.7500) lr 1.5567e-04 eta 0:06:12 +epoch [43/50] batch [10/31] time 1.371 (1.449) data 0.000 (0.083) loss 0.6055 (0.5907) acc 81.2500 (85.9375) lr 1.5567e-04 eta 0:05:44 +epoch [43/50] batch [15/31] time 1.373 (1.420) data 0.000 (0.055) loss 0.9219 (0.6269) acc 78.1250 (85.0000) lr 1.5567e-04 eta 0:05:30 +epoch [43/50] batch [20/31] time 1.352 (1.404) data 0.000 (0.041) loss 0.7466 (0.6634) acc 84.3750 (84.8438) lr 1.5567e-04 eta 0:05:20 +epoch [43/50] batch [25/31] time 1.364 (1.396) data 0.000 (0.033) loss 0.8345 (0.6974) acc 87.5000 (84.6250) lr 1.5567e-04 eta 0:05:11 +epoch [43/50] batch [30/31] time 1.364 (1.392) data 0.000 (0.028) loss 0.5537 (0.6874) acc 84.3750 (84.5833) lr 1.5567e-04 eta 0:05:03 +epoch [44/50] batch [5/31] time 1.353 (1.558) data 0.001 (0.166) loss 0.6255 (0.6684) acc 84.3750 (83.7500) lr 1.2369e-04 eta 0:05:30 +epoch [44/50] batch [10/31] time 1.354 (1.465) data 0.000 (0.084) loss 0.3760 (0.7240) acc 90.6250 (81.8750) lr 1.2369e-04 eta 0:05:03 +epoch [44/50] batch [15/31] time 1.362 (1.432) data 0.001 (0.056) loss 0.4844 (0.6286) acc 90.6250 (84.1667) lr 1.2369e-04 eta 0:04:49 +epoch [44/50] batch [20/31] time 1.376 (1.416) data 0.000 (0.042) loss 1.0391 (0.6150) acc 81.2500 (85.0000) lr 1.2369e-04 eta 0:04:38 +epoch [44/50] batch [25/31] time 1.360 (1.404) data 0.000 (0.034) loss 0.3862 (0.5955) acc 93.7500 (85.5000) lr 1.2369e-04 eta 0:04:29 +epoch [44/50] batch [30/31] time 1.374 (1.397) data 0.000 (0.028) loss 0.6484 (0.6309) acc 84.3750 (85.0000) lr 1.2369e-04 eta 0:04:21 +epoch [45/50] batch [5/31] time 1.353 (1.545) data 0.001 (0.181) loss 0.5874 (0.7931) acc 84.3750 (81.8750) lr 9.5173e-05 eta 0:04:39 +epoch [45/50] batch [10/31] time 1.361 (1.456) data 0.001 (0.091) loss 0.5981 (0.6588) acc 78.1250 (85.3125) lr 9.5173e-05 eta 0:04:16 +epoch [45/50] batch [15/31] time 1.373 (1.434) data 0.000 (0.061) loss 0.7192 (0.6642) acc 78.1250 (85.8333) lr 9.5173e-05 eta 0:04:05 +epoch [45/50] batch [20/31] time 1.351 (1.416) data 0.000 (0.046) loss 0.6274 (0.6595) acc 87.5000 (85.4688) lr 9.5173e-05 eta 0:03:55 +epoch [45/50] batch [25/31] time 1.346 (1.405) data 0.000 (0.037) loss 0.7437 (0.6399) acc 81.2500 (86.1250) lr 9.5173e-05 eta 0:03:46 +epoch [45/50] batch [30/31] time 1.364 (1.397) data 0.000 (0.031) loss 0.6357 (0.6369) acc 87.5000 (86.0417) lr 9.5173e-05 eta 0:03:37 +epoch [46/50] batch [5/31] time 1.363 (1.551) data 0.000 (0.183) loss 0.6479 (0.6948) acc 84.3750 (80.6250) lr 7.0224e-05 eta 0:03:52 +epoch [46/50] batch [10/31] time 1.365 (1.459) data 0.000 (0.092) loss 0.6245 (0.5827) acc 90.6250 (84.6875) lr 7.0224e-05 eta 0:03:31 +epoch [46/50] batch [15/31] time 1.379 (1.427) data 0.000 (0.061) loss 1.1338 (0.7064) acc 78.1250 (83.3333) lr 7.0224e-05 eta 0:03:19 +epoch [46/50] batch [20/31] time 1.524 (1.420) data 0.000 (0.046) loss 1.2520 (0.7294) acc 78.1250 (82.5000) lr 7.0224e-05 eta 0:03:11 +epoch [46/50] batch [25/31] time 1.364 (1.410) data 0.000 (0.037) loss 0.3203 (0.6868) acc 93.7500 (83.3750) lr 7.0224e-05 eta 0:03:03 +epoch [46/50] batch [30/31] time 1.361 (1.402) data 0.000 (0.031) loss 0.4644 (0.6704) acc 87.5000 (84.0625) lr 7.0224e-05 eta 0:02:55 +epoch [47/50] batch [5/31] time 1.350 (1.547) data 0.000 (0.171) loss 0.4895 (0.5539) acc 87.5000 (86.2500) lr 4.8943e-05 eta 0:03:04 +epoch [47/50] batch [10/31] time 1.355 (1.461) data 0.001 (0.086) loss 0.4473 (0.6357) acc 90.6250 (85.0000) lr 4.8943e-05 eta 0:02:46 +epoch [47/50] batch [15/31] time 1.367 (1.427) data 0.000 (0.057) loss 0.6655 (0.6403) acc 84.3750 (85.4167) lr 4.8943e-05 eta 0:02:35 +epoch [47/50] batch [20/31] time 1.394 (1.414) data 0.000 (0.043) loss 0.2094 (0.6658) acc 100.0000 (85.4688) lr 4.8943e-05 eta 0:02:27 +epoch [47/50] batch [25/31] time 1.353 (1.404) data 0.000 (0.034) loss 0.6538 (0.6742) acc 84.3750 (84.7500) lr 4.8943e-05 eta 0:02:19 +epoch [47/50] batch [30/31] time 1.357 (1.397) data 0.000 (0.029) loss 1.0996 (0.6986) acc 81.2500 (84.4792) lr 4.8943e-05 eta 0:02:11 +epoch [48/50] batch [5/31] time 1.380 (1.542) data 0.000 (0.162) loss 0.6299 (0.6461) acc 87.5000 (83.1250) lr 3.1417e-05 eta 0:02:15 +epoch [48/50] batch [10/31] time 1.371 (1.453) data 0.001 (0.081) loss 0.5820 (0.6527) acc 84.3750 (84.3750) lr 3.1417e-05 eta 0:02:00 +epoch [48/50] batch [15/31] time 1.379 (1.425) data 0.000 (0.054) loss 1.1318 (0.6449) acc 68.7500 (83.9583) lr 3.1417e-05 eta 0:01:51 +epoch [48/50] batch [20/31] time 1.358 (1.411) data 0.000 (0.041) loss 0.4568 (0.6140) acc 87.5000 (85.3125) lr 3.1417e-05 eta 0:01:42 +epoch [48/50] batch [25/31] time 1.356 (1.403) data 0.000 (0.033) loss 0.7852 (0.6475) acc 87.5000 (85.0000) lr 3.1417e-05 eta 0:01:35 +epoch [48/50] batch [30/31] time 1.348 (1.395) data 0.000 (0.027) loss 0.3081 (0.6465) acc 93.7500 (85.0000) lr 3.1417e-05 eta 0:01:27 +epoch [49/50] batch [5/31] time 1.365 (1.545) data 0.000 (0.170) loss 0.9312 (0.6191) acc 78.1250 (85.6250) lr 1.7713e-05 eta 0:01:28 +epoch [49/50] batch [10/31] time 1.361 (1.459) data 0.000 (0.085) loss 0.5273 (0.5782) acc 84.3750 (86.8750) lr 1.7713e-05 eta 0:01:15 +epoch [49/50] batch [15/31] time 1.374 (1.441) data 0.000 (0.057) loss 0.3965 (0.6275) acc 90.6250 (85.8333) lr 1.7713e-05 eta 0:01:07 +epoch [49/50] batch [20/31] time 1.364 (1.423) data 0.000 (0.043) loss 0.7236 (0.6272) acc 87.5000 (85.6250) lr 1.7713e-05 eta 0:00:59 +epoch [49/50] batch [25/31] time 1.373 (1.412) data 0.000 (0.034) loss 0.7075 (0.6190) acc 78.1250 (85.7500) lr 1.7713e-05 eta 0:00:52 +epoch [49/50] batch [30/31] time 1.359 (1.403) data 0.000 (0.029) loss 1.0098 (0.6578) acc 75.0000 (84.8958) lr 1.7713e-05 eta 0:00:44 +epoch [50/50] batch [5/31] time 1.357 (1.525) data 0.000 (0.162) loss 0.7188 (0.7777) acc 84.3750 (80.6250) lr 7.8853e-06 eta 0:00:39 +epoch [50/50] batch [10/31] time 1.380 (1.445) data 0.001 (0.081) loss 0.4192 (0.6395) acc 90.6250 (85.6250) lr 7.8853e-06 eta 0:00:30 +epoch [50/50] batch [15/31] time 1.384 (1.420) data 0.000 (0.054) loss 0.4712 (0.6426) acc 93.7500 (85.0000) lr 7.8853e-06 eta 0:00:22 +epoch [50/50] batch [20/31] time 1.349 (1.402) data 0.000 (0.041) loss 0.8394 (0.6361) acc 87.5000 (85.7812) lr 7.8853e-06 eta 0:00:15 +epoch [50/50] batch [25/31] time 1.393 (1.398) data 0.001 (0.033) loss 0.4070 (0.6094) acc 90.6250 (86.5000) lr 7.8853e-06 eta 0:00:08 +epoch [50/50] batch [30/31] time 1.368 (1.393) data 0.000 (0.027) loss 0.8589 (0.5958) acc 84.3750 (86.6667) lr 7.8853e-06 eta 0:00:01 +Checkpoint saved to output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-50 +Finish training +Deploy the last-epoch model +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 36,000 +* accuracy: 72.0% +* error: 28.0% +* macro_f1: 71.2% +Elapsed: 0:41:43 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/checkpoint b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/checkpoint new file mode 100644 index 00000000..a9d493d3 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/checkpoint @@ -0,0 +1 @@ +model.pth.tar-50 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-50 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-50 new file mode 100644 index 00000000..3396ba43 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed2/prompt_learner/model.pth.tar-50 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1698416867.ckb-gpu-lambda.218936.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1698416867.ckb-gpu-lambda.218936.0 new file mode 100644 index 00000000..cc27fa89 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1698416867.ckb-gpu-lambda.218936.0 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed3/log.txt b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed3/log.txt new file mode 100644 index 00000000..d88af7b8 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed3/log.txt @@ -0,0 +1,639 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_l14_ep50.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '1'] +output_dir: output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed3 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 3 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 1 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-L/14 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 50 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 8.4.0-3ubuntu2) 8.4.0 +Clang version: 10.0.0-4ubuntu1 +CMake version: version 3.23.2 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-113-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: Tesla V100-SXM2-32GB +GPU 1: Tesla V100-SXM2-32GB +GPU 2: Tesla V100-SXM2-32GB +GPU 3: Tesla V100-SXM2-32GB + +Nvidia driver version: 510.73.05 +cuDNN version: Probably one of the following: +/usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 +/usr/lib/x86_64-linux-gnu/libcudnn.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.4.1 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 46 bits physical, 48 bits virtual +CPU(s): 64 +On-line CPU(s) list: 0-63 +Thread(s) per core: 2 +Core(s) per socket: 16 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +CPU family: 6 +Model: 85 +Model name: Intel(R) Xeon(R) Gold 6242 CPU @ 2.80GHz +Stepping: 7 +CPU MHz: 1200.018 +CPU max MHz: 3900.0000 +CPU min MHz: 1200.0000 +BogoMIPS: 5600.00 +Virtualization: VT-x +L1d cache: 1 MiB +L1i cache: 1 MiB +L2 cache: 32 MiB +L3 cache: 44 MiB +NUMA node0 CPU(s): 0-15,32-47 +NUMA node1 CPU(s): 16-31,48-63 +Vulnerability Itlb multihit: KVM: Mitigation: Split huge pages +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Mitigation; TSX disabled +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cdp_l3 invpcid_single intel_ppin ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm mpx rdt_a avx512f avx512dq rdseed adx smap clflushopt clwb intel_pt avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts pku ospke avx512_vnni md_clear flush_l1d arch_capabilities + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Loading preprocessed few-shot data from /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_1-seed_3.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 1,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-L/14) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed3/tensorboard) +epoch [1/50] batch [5/31] time 1.344 (2.171) data 0.000 (0.164) loss 2.2090 (2.8641) acc 53.1250 (50.6250) lr 1.0000e-05 eta 0:55:54 +epoch [1/50] batch [10/31] time 1.355 (1.761) data 0.000 (0.082) loss 1.9395 (2.6137) acc 56.2500 (53.7500) lr 1.0000e-05 eta 0:45:12 +epoch [1/50] batch [15/31] time 1.350 (1.627) data 0.000 (0.055) loss 2.8848 (2.5189) acc 46.8750 (52.0833) lr 1.0000e-05 eta 0:41:37 +epoch [1/50] batch [20/31] time 1.360 (1.559) data 0.000 (0.041) loss 1.7744 (2.4144) acc 71.8750 (53.5938) lr 1.0000e-05 eta 0:39:45 +epoch [1/50] batch [25/31] time 1.356 (1.518) data 0.000 (0.033) loss 1.8799 (2.2971) acc 59.3750 (54.0000) lr 1.0000e-05 eta 0:38:34 +epoch [1/50] batch [30/31] time 1.361 (1.493) data 0.000 (0.028) loss 1.9287 (2.2614) acc 46.8750 (53.6458) lr 1.0000e-05 eta 0:37:49 +epoch [2/50] batch [5/31] time 1.375 (1.541) data 0.000 (0.173) loss 1.5967 (1.4787) acc 65.6250 (66.8750) lr 2.0000e-03 eta 0:38:53 +epoch [2/50] batch [10/31] time 1.348 (1.451) data 0.001 (0.087) loss 2.0352 (1.5800) acc 56.2500 (64.3750) lr 2.0000e-03 eta 0:36:29 +epoch [2/50] batch [15/31] time 1.351 (1.424) data 0.000 (0.058) loss 1.1982 (1.4487) acc 71.8750 (66.4583) lr 2.0000e-03 eta 0:35:41 +epoch [2/50] batch [20/31] time 1.363 (1.410) data 0.000 (0.044) loss 1.2295 (1.4687) acc 71.8750 (65.6250) lr 2.0000e-03 eta 0:35:13 +epoch [2/50] batch [25/31] time 1.363 (1.401) data 0.000 (0.035) loss 0.4741 (1.4165) acc 84.3750 (66.6250) lr 2.0000e-03 eta 0:34:53 +epoch [2/50] batch [30/31] time 1.338 (1.394) data 0.000 (0.029) loss 1.0176 (1.3847) acc 78.1250 (66.4583) lr 2.0000e-03 eta 0:34:36 +epoch [3/50] batch [5/31] time 1.341 (1.554) data 0.001 (0.192) loss 1.0811 (1.4348) acc 62.5000 (63.7500) lr 1.9980e-03 eta 0:38:24 +epoch [3/50] batch [10/31] time 1.367 (1.453) data 0.000 (0.096) loss 0.6226 (1.2843) acc 78.1250 (66.8750) lr 1.9980e-03 eta 0:35:47 +epoch [3/50] batch [15/31] time 1.369 (1.420) data 0.000 (0.064) loss 1.5938 (1.3120) acc 56.2500 (65.2083) lr 1.9980e-03 eta 0:34:52 +epoch [3/50] batch [20/31] time 1.349 (1.403) data 0.000 (0.048) loss 1.0977 (1.3025) acc 75.0000 (65.7812) lr 1.9980e-03 eta 0:34:19 +epoch [3/50] batch [25/31] time 1.359 (1.394) data 0.000 (0.039) loss 1.1699 (1.2904) acc 75.0000 (66.8750) lr 1.9980e-03 eta 0:33:59 +epoch [3/50] batch [30/31] time 1.347 (1.387) data 0.000 (0.032) loss 1.4697 (1.2837) acc 56.2500 (66.8750) lr 1.9980e-03 eta 0:33:42 +epoch [4/50] batch [5/31] time 1.355 (1.544) data 0.001 (0.182) loss 2.0645 (1.4373) acc 53.1250 (65.0000) lr 1.9921e-03 eta 0:37:22 +epoch [4/50] batch [10/31] time 1.365 (1.454) data 0.001 (0.091) loss 1.3145 (1.2322) acc 68.7500 (70.0000) lr 1.9921e-03 eta 0:35:03 +epoch [4/50] batch [15/31] time 1.379 (1.425) data 0.000 (0.061) loss 1.1045 (1.2187) acc 68.7500 (69.3750) lr 1.9921e-03 eta 0:34:15 +epoch [4/50] batch [20/31] time 1.376 (1.408) data 0.000 (0.046) loss 0.9609 (1.1899) acc 75.0000 (70.3125) lr 1.9921e-03 eta 0:33:43 +epoch [4/50] batch [25/31] time 1.362 (1.399) data 0.000 (0.037) loss 1.5029 (1.2535) acc 62.5000 (69.3750) lr 1.9921e-03 eta 0:33:23 +epoch [4/50] batch [30/31] time 1.355 (1.391) data 0.000 (0.031) loss 1.4102 (1.2611) acc 62.5000 (68.7500) lr 1.9921e-03 eta 0:33:05 +epoch [5/50] batch [5/31] time 1.355 (1.546) data 0.000 (0.184) loss 1.5635 (1.4598) acc 65.6250 (69.3750) lr 1.9823e-03 eta 0:36:36 +epoch [5/50] batch [10/31] time 1.363 (1.451) data 0.000 (0.092) loss 1.7539 (1.3289) acc 59.3750 (70.6250) lr 1.9823e-03 eta 0:34:15 +epoch [5/50] batch [15/31] time 1.348 (1.430) data 0.001 (0.062) loss 0.6753 (1.2113) acc 81.2500 (72.7083) lr 1.9823e-03 eta 0:33:37 +epoch [5/50] batch [20/31] time 1.350 (1.414) data 0.000 (0.046) loss 1.3369 (1.2137) acc 68.7500 (71.5625) lr 1.9823e-03 eta 0:33:07 +epoch [5/50] batch [25/31] time 1.363 (1.406) data 0.000 (0.037) loss 1.4434 (1.2101) acc 71.8750 (71.2500) lr 1.9823e-03 eta 0:32:50 +epoch [5/50] batch [30/31] time 1.363 (1.399) data 0.000 (0.031) loss 1.2871 (1.1869) acc 62.5000 (71.1458) lr 1.9823e-03 eta 0:32:33 +epoch [6/50] batch [5/31] time 1.371 (1.563) data 0.000 (0.200) loss 1.1270 (1.1187) acc 68.7500 (72.5000) lr 1.9686e-03 eta 0:36:13 +epoch [6/50] batch [10/31] time 1.367 (1.465) data 0.001 (0.100) loss 1.4834 (1.1713) acc 59.3750 (69.3750) lr 1.9686e-03 eta 0:33:49 +epoch [6/50] batch [15/31] time 1.376 (1.433) data 0.000 (0.067) loss 1.3848 (1.2168) acc 71.8750 (68.9583) lr 1.9686e-03 eta 0:32:57 +epoch [6/50] batch [20/31] time 1.371 (1.415) data 0.000 (0.050) loss 1.3281 (1.2443) acc 65.6250 (69.0625) lr 1.9686e-03 eta 0:32:25 +epoch [6/50] batch [25/31] time 1.350 (1.404) data 0.000 (0.040) loss 0.7837 (1.1878) acc 75.0000 (69.3750) lr 1.9686e-03 eta 0:32:03 +epoch [6/50] batch [30/31] time 1.362 (1.396) data 0.000 (0.034) loss 1.3799 (1.1509) acc 71.8750 (70.2083) lr 1.9686e-03 eta 0:31:45 +epoch [7/50] batch [5/31] time 1.359 (1.576) data 0.000 (0.204) loss 1.9414 (1.2611) acc 59.3750 (68.1250) lr 1.9511e-03 eta 0:35:42 +epoch [7/50] batch [10/31] time 1.362 (1.465) data 0.000 (0.102) loss 1.3740 (1.2158) acc 68.7500 (69.6875) lr 1.9511e-03 eta 0:33:03 +epoch [7/50] batch [15/31] time 1.369 (1.436) data 0.001 (0.068) loss 0.7339 (1.1883) acc 81.2500 (70.8333) lr 1.9511e-03 eta 0:32:16 +epoch [7/50] batch [20/31] time 1.366 (1.415) data 0.000 (0.051) loss 1.3164 (1.2076) acc 65.6250 (68.9062) lr 1.9511e-03 eta 0:31:42 +epoch [7/50] batch [25/31] time 1.363 (1.404) data 0.000 (0.041) loss 0.9185 (1.2005) acc 71.8750 (68.8750) lr 1.9511e-03 eta 0:31:19 +epoch [7/50] batch [30/31] time 1.365 (1.397) data 0.000 (0.034) loss 0.8936 (1.1622) acc 68.7500 (69.2708) lr 1.9511e-03 eta 0:31:04 +epoch [8/50] batch [5/31] time 1.376 (1.541) data 0.000 (0.182) loss 0.6943 (0.7115) acc 84.3750 (82.5000) lr 1.9298e-03 eta 0:34:06 +epoch [8/50] batch [10/31] time 1.370 (1.452) data 0.000 (0.091) loss 1.2188 (0.8557) acc 68.7500 (77.8125) lr 1.9298e-03 eta 0:32:01 +epoch [8/50] batch [15/31] time 1.359 (1.422) data 0.001 (0.061) loss 2.0625 (1.0053) acc 53.1250 (73.1250) lr 1.9298e-03 eta 0:31:14 +epoch [8/50] batch [20/31] time 1.361 (1.407) data 0.000 (0.046) loss 1.0908 (1.0870) acc 78.1250 (72.1875) lr 1.9298e-03 eta 0:30:47 +epoch [8/50] batch [25/31] time 1.382 (1.400) data 0.000 (0.037) loss 1.4600 (1.1130) acc 65.6250 (72.0000) lr 1.9298e-03 eta 0:30:31 +epoch [8/50] batch [30/31] time 1.346 (1.393) data 0.000 (0.031) loss 1.1943 (1.1276) acc 65.6250 (71.3542) lr 1.9298e-03 eta 0:30:14 +epoch [9/50] batch [5/31] time 1.348 (1.590) data 0.000 (0.219) loss 0.8813 (1.0277) acc 75.0000 (74.3750) lr 1.9048e-03 eta 0:34:22 +epoch [9/50] batch [10/31] time 1.366 (1.479) data 0.000 (0.110) loss 1.9629 (1.0732) acc 62.5000 (73.1250) lr 1.9048e-03 eta 0:31:51 +epoch [9/50] batch [15/31] time 1.362 (1.443) data 0.000 (0.073) loss 1.1455 (1.0668) acc 65.6250 (72.7083) lr 1.9048e-03 eta 0:30:57 +epoch [9/50] batch [20/31] time 1.354 (1.422) data 0.000 (0.055) loss 1.0801 (1.0720) acc 71.8750 (72.0312) lr 1.9048e-03 eta 0:30:22 +epoch [9/50] batch [25/31] time 1.360 (1.409) data 0.000 (0.044) loss 1.2598 (1.1489) acc 65.6250 (71.0000) lr 1.9048e-03 eta 0:29:58 +epoch [9/50] batch [30/31] time 1.364 (1.402) data 0.000 (0.037) loss 1.2695 (1.1401) acc 62.5000 (71.1458) lr 1.9048e-03 eta 0:29:42 +epoch [10/50] batch [5/31] time 1.374 (1.565) data 0.001 (0.195) loss 0.9224 (1.2187) acc 81.2500 (70.0000) lr 1.8763e-03 eta 0:33:01 +epoch [10/50] batch [10/31] time 1.363 (1.465) data 0.000 (0.098) loss 1.6562 (1.2197) acc 65.6250 (70.3125) lr 1.8763e-03 eta 0:30:47 +epoch [10/50] batch [15/31] time 1.358 (1.430) data 0.000 (0.065) loss 1.4297 (1.1538) acc 68.7500 (71.4583) lr 1.8763e-03 eta 0:29:55 +epoch [10/50] batch [20/31] time 1.372 (1.416) data 0.000 (0.049) loss 0.7461 (1.1064) acc 75.0000 (72.9688) lr 1.8763e-03 eta 0:29:30 +epoch [10/50] batch [25/31] time 1.386 (1.407) data 0.000 (0.039) loss 0.8535 (1.0455) acc 78.1250 (74.0000) lr 1.8763e-03 eta 0:29:13 +epoch [10/50] batch [30/31] time 1.376 (1.402) data 0.001 (0.033) loss 0.9009 (1.0392) acc 62.5000 (73.7500) lr 1.8763e-03 eta 0:28:59 +epoch [11/50] batch [5/31] time 1.355 (1.639) data 0.001 (0.222) loss 0.9551 (0.9437) acc 75.0000 (76.8750) lr 1.8443e-03 eta 0:33:43 +epoch [11/50] batch [10/31] time 1.371 (1.503) data 0.001 (0.111) loss 1.0889 (0.9377) acc 78.1250 (76.8750) lr 1.8443e-03 eta 0:30:48 +epoch [11/50] batch [15/31] time 1.377 (1.459) data 0.000 (0.074) loss 0.8184 (0.9035) acc 62.5000 (76.2500) lr 1.8443e-03 eta 0:29:46 +epoch [11/50] batch [20/31] time 1.361 (1.436) data 0.000 (0.056) loss 0.6880 (0.9488) acc 84.3750 (75.9375) lr 1.8443e-03 eta 0:29:12 +epoch [11/50] batch [25/31] time 1.342 (1.421) data 0.000 (0.045) loss 1.1289 (0.9870) acc 78.1250 (75.3750) lr 1.8443e-03 eta 0:28:46 +epoch [11/50] batch [30/31] time 1.357 (1.411) data 0.000 (0.037) loss 1.0938 (1.0068) acc 65.6250 (74.6875) lr 1.8443e-03 eta 0:28:27 +epoch [12/50] batch [5/31] time 1.352 (1.545) data 0.000 (0.178) loss 1.1260 (0.8168) acc 68.7500 (81.8750) lr 1.8090e-03 eta 0:31:00 +epoch [12/50] batch [10/31] time 1.382 (1.456) data 0.000 (0.089) loss 0.9819 (0.9537) acc 78.1250 (79.3750) lr 1.8090e-03 eta 0:29:06 +epoch [12/50] batch [15/31] time 1.358 (1.433) data 0.000 (0.060) loss 0.7466 (0.9906) acc 81.2500 (77.9167) lr 1.8090e-03 eta 0:28:31 +epoch [12/50] batch [20/31] time 1.352 (1.414) data 0.000 (0.045) loss 1.3936 (1.0233) acc 71.8750 (76.2500) lr 1.8090e-03 eta 0:28:01 +epoch [12/50] batch [25/31] time 1.368 (1.405) data 0.000 (0.036) loss 0.5493 (0.9898) acc 84.3750 (76.5000) lr 1.8090e-03 eta 0:27:43 +epoch [12/50] batch [30/31] time 1.383 (1.397) data 0.000 (0.030) loss 0.6719 (1.0318) acc 78.1250 (75.6250) lr 1.8090e-03 eta 0:27:27 +epoch [13/50] batch [5/31] time 1.360 (1.562) data 0.000 (0.191) loss 0.8198 (0.8534) acc 90.6250 (81.2500) lr 1.7705e-03 eta 0:30:32 +epoch [13/50] batch [10/31] time 1.375 (1.464) data 0.000 (0.096) loss 1.0938 (0.8853) acc 68.7500 (77.1875) lr 1.7705e-03 eta 0:28:30 +epoch [13/50] batch [15/31] time 1.344 (1.428) data 0.001 (0.064) loss 0.4758 (0.9076) acc 87.5000 (76.6667) lr 1.7705e-03 eta 0:27:40 +epoch [13/50] batch [20/31] time 1.499 (1.418) data 0.000 (0.048) loss 0.8745 (0.9401) acc 81.2500 (76.2500) lr 1.7705e-03 eta 0:27:22 +epoch [13/50] batch [25/31] time 1.349 (1.408) data 0.000 (0.039) loss 1.2744 (0.9911) acc 62.5000 (74.1250) lr 1.7705e-03 eta 0:27:03 +epoch [13/50] batch [30/31] time 1.357 (1.401) data 0.000 (0.032) loss 1.1152 (1.0079) acc 71.8750 (73.8542) lr 1.7705e-03 eta 0:26:47 +epoch [14/50] batch [5/31] time 1.350 (1.541) data 0.000 (0.179) loss 0.8564 (0.8718) acc 81.2500 (78.7500) lr 1.7290e-03 eta 0:29:19 +epoch [14/50] batch [10/31] time 1.373 (1.452) data 0.000 (0.090) loss 0.6309 (0.8835) acc 90.6250 (77.8125) lr 1.7290e-03 eta 0:27:31 +epoch [14/50] batch [15/31] time 1.373 (1.420) data 0.000 (0.060) loss 1.3496 (0.9000) acc 62.5000 (76.4583) lr 1.7290e-03 eta 0:26:47 +epoch [14/50] batch [20/31] time 1.346 (1.405) data 0.000 (0.045) loss 1.5801 (0.9418) acc 68.7500 (75.4688) lr 1.7290e-03 eta 0:26:23 +epoch [14/50] batch [25/31] time 1.360 (1.395) data 0.000 (0.036) loss 0.9517 (1.0132) acc 71.8750 (74.0000) lr 1.7290e-03 eta 0:26:05 +epoch [14/50] batch [30/31] time 1.363 (1.392) data 0.000 (0.030) loss 0.9663 (1.0144) acc 71.8750 (73.9583) lr 1.7290e-03 eta 0:25:54 +epoch [15/50] batch [5/31] time 1.376 (1.558) data 0.000 (0.194) loss 1.1436 (0.9313) acc 68.7500 (73.7500) lr 1.6845e-03 eta 0:28:51 +epoch [15/50] batch [10/31] time 1.367 (1.463) data 0.001 (0.097) loss 0.9746 (0.8819) acc 81.2500 (73.7500) lr 1.6845e-03 eta 0:26:57 +epoch [15/50] batch [15/31] time 1.351 (1.428) data 0.000 (0.065) loss 1.1797 (0.9610) acc 75.0000 (73.3333) lr 1.6845e-03 eta 0:26:12 +epoch [15/50] batch [20/31] time 1.355 (1.411) data 0.000 (0.049) loss 0.9292 (0.9507) acc 68.7500 (73.7500) lr 1.6845e-03 eta 0:25:46 +epoch [15/50] batch [25/31] time 1.340 (1.400) data 0.000 (0.039) loss 0.7627 (0.9696) acc 75.0000 (73.8750) lr 1.6845e-03 eta 0:25:27 +epoch [15/50] batch [30/31] time 1.362 (1.393) data 0.000 (0.033) loss 0.4795 (0.9818) acc 90.6250 (73.7500) lr 1.6845e-03 eta 0:25:12 +epoch [16/50] batch [5/31] time 1.337 (1.543) data 0.001 (0.192) loss 1.7070 (0.9654) acc 65.6250 (74.3750) lr 1.6374e-03 eta 0:27:46 +epoch [16/50] batch [10/31] time 1.360 (1.451) data 0.001 (0.096) loss 0.6021 (0.9190) acc 78.1250 (76.5625) lr 1.6374e-03 eta 0:25:59 +epoch [16/50] batch [15/31] time 1.374 (1.432) data 0.000 (0.064) loss 0.6348 (0.9849) acc 90.6250 (75.8333) lr 1.6374e-03 eta 0:25:31 +epoch [16/50] batch [20/31] time 1.362 (1.414) data 0.000 (0.048) loss 1.5391 (1.0742) acc 75.0000 (75.6250) lr 1.6374e-03 eta 0:25:06 +epoch [16/50] batch [25/31] time 1.358 (1.403) data 0.000 (0.039) loss 0.7310 (1.0913) acc 75.0000 (75.1250) lr 1.6374e-03 eta 0:24:46 +epoch [16/50] batch [30/31] time 1.356 (1.395) data 0.000 (0.032) loss 1.0547 (1.0997) acc 75.0000 (73.7500) lr 1.6374e-03 eta 0:24:31 +epoch [17/50] batch [5/31] time 1.354 (1.534) data 0.000 (0.181) loss 0.7803 (0.9734) acc 78.1250 (79.3750) lr 1.5878e-03 eta 0:26:49 +epoch [17/50] batch [10/31] time 1.347 (1.445) data 0.000 (0.091) loss 1.2070 (1.0242) acc 71.8750 (78.1250) lr 1.5878e-03 eta 0:25:09 +epoch [17/50] batch [15/31] time 1.346 (1.417) data 0.000 (0.061) loss 0.8418 (1.0136) acc 84.3750 (77.2917) lr 1.5878e-03 eta 0:24:32 +epoch [17/50] batch [20/31] time 1.362 (1.402) data 0.000 (0.045) loss 0.4880 (0.9854) acc 90.6250 (76.4062) lr 1.5878e-03 eta 0:24:09 +epoch [17/50] batch [25/31] time 1.348 (1.392) data 0.000 (0.036) loss 1.0645 (0.9854) acc 62.5000 (75.8750) lr 1.5878e-03 eta 0:23:52 +epoch [17/50] batch [30/31] time 1.388 (1.387) data 0.000 (0.030) loss 0.9419 (0.9723) acc 75.0000 (75.8333) lr 1.5878e-03 eta 0:23:40 +epoch [18/50] batch [5/31] time 1.366 (1.571) data 0.001 (0.201) loss 0.8511 (0.9091) acc 75.0000 (76.2500) lr 1.5358e-03 eta 0:26:39 +epoch [18/50] batch [10/31] time 1.377 (1.469) data 0.000 (0.101) loss 1.0439 (0.9220) acc 75.0000 (75.9375) lr 1.5358e-03 eta 0:24:47 +epoch [18/50] batch [15/31] time 1.361 (1.434) data 0.000 (0.067) loss 1.4365 (0.9169) acc 68.7500 (76.4583) lr 1.5358e-03 eta 0:24:05 +epoch [18/50] batch [20/31] time 1.365 (1.416) data 0.000 (0.050) loss 0.4116 (0.8891) acc 84.3750 (77.3438) lr 1.5358e-03 eta 0:23:40 +epoch [18/50] batch [25/31] time 1.380 (1.406) data 0.000 (0.040) loss 1.5889 (0.9571) acc 62.5000 (76.7500) lr 1.5358e-03 eta 0:23:23 +epoch [18/50] batch [30/31] time 1.385 (1.399) data 0.000 (0.034) loss 1.4287 (0.9919) acc 65.6250 (75.9375) lr 1.5358e-03 eta 0:23:09 +epoch [19/50] batch [5/31] time 1.356 (1.556) data 0.000 (0.194) loss 1.3906 (0.9702) acc 65.6250 (76.2500) lr 1.4818e-03 eta 0:25:35 +epoch [19/50] batch [10/31] time 1.375 (1.457) data 0.000 (0.097) loss 0.8101 (0.8678) acc 78.1250 (77.8125) lr 1.4818e-03 eta 0:23:50 +epoch [19/50] batch [15/31] time 1.369 (1.425) data 0.000 (0.065) loss 0.8892 (0.9222) acc 84.3750 (77.2917) lr 1.4818e-03 eta 0:23:12 +epoch [19/50] batch [20/31] time 1.363 (1.409) data 0.000 (0.049) loss 1.1885 (0.9660) acc 75.0000 (75.9375) lr 1.4818e-03 eta 0:22:49 +epoch [19/50] batch [25/31] time 1.374 (1.399) data 0.000 (0.039) loss 0.8472 (0.9753) acc 75.0000 (76.1250) lr 1.4818e-03 eta 0:22:32 +epoch [19/50] batch [30/31] time 1.361 (1.393) data 0.000 (0.033) loss 0.8706 (0.9523) acc 65.6250 (76.4583) lr 1.4818e-03 eta 0:22:20 +epoch [20/50] batch [5/31] time 1.359 (1.579) data 0.000 (0.201) loss 1.0156 (1.1243) acc 78.1250 (73.7500) lr 1.4258e-03 eta 0:25:09 +epoch [20/50] batch [10/31] time 1.374 (1.475) data 0.000 (0.101) loss 1.0352 (0.9884) acc 75.0000 (74.6875) lr 1.4258e-03 eta 0:23:22 +epoch [20/50] batch [15/31] time 1.384 (1.439) data 0.000 (0.067) loss 1.0391 (1.0142) acc 75.0000 (74.1667) lr 1.4258e-03 eta 0:22:40 +epoch [20/50] batch [20/31] time 1.371 (1.418) data 0.000 (0.050) loss 0.9897 (0.9600) acc 75.0000 (75.4688) lr 1.4258e-03 eta 0:22:14 +epoch [20/50] batch [25/31] time 1.345 (1.406) data 0.000 (0.040) loss 0.9365 (0.9455) acc 81.2500 (76.2500) lr 1.4258e-03 eta 0:21:56 +epoch [20/50] batch [30/31] time 1.377 (1.398) data 0.000 (0.034) loss 1.1504 (0.9326) acc 75.0000 (76.6667) lr 1.4258e-03 eta 0:21:41 +epoch [21/50] batch [5/31] time 1.359 (1.549) data 0.000 (0.185) loss 0.3423 (0.6707) acc 87.5000 (84.3750) lr 1.3681e-03 eta 0:23:53 +epoch [21/50] batch [10/31] time 1.373 (1.454) data 0.000 (0.093) loss 0.5986 (0.7677) acc 84.3750 (80.6250) lr 1.3681e-03 eta 0:22:17 +epoch [21/50] batch [15/31] time 1.376 (1.424) data 0.000 (0.062) loss 0.6997 (0.8298) acc 75.0000 (78.7500) lr 1.3681e-03 eta 0:21:42 +epoch [21/50] batch [20/31] time 1.357 (1.409) data 0.000 (0.046) loss 1.4590 (0.8997) acc 75.0000 (77.3438) lr 1.3681e-03 eta 0:21:22 +epoch [21/50] batch [25/31] time 1.358 (1.399) data 0.000 (0.037) loss 0.4397 (0.9040) acc 90.6250 (76.8750) lr 1.3681e-03 eta 0:21:05 +epoch [21/50] batch [30/31] time 1.377 (1.394) data 0.000 (0.031) loss 1.0293 (0.9009) acc 65.6250 (77.1875) lr 1.3681e-03 eta 0:20:54 +epoch [22/50] batch [5/31] time 1.390 (1.582) data 0.001 (0.180) loss 0.9355 (0.8396) acc 75.0000 (81.2500) lr 1.3090e-03 eta 0:23:34 +epoch [22/50] batch [10/31] time 1.374 (1.477) data 0.001 (0.090) loss 0.5669 (0.8169) acc 84.3750 (81.2500) lr 1.3090e-03 eta 0:21:53 +epoch [22/50] batch [15/31] time 1.382 (1.442) data 0.000 (0.060) loss 0.9028 (0.8554) acc 84.3750 (80.2083) lr 1.3090e-03 eta 0:21:15 +epoch [22/50] batch [20/31] time 1.344 (1.421) data 0.000 (0.045) loss 0.4634 (0.8603) acc 81.2500 (79.3750) lr 1.3090e-03 eta 0:20:49 +epoch [22/50] batch [25/31] time 1.382 (1.410) data 0.000 (0.036) loss 0.7896 (0.8932) acc 84.3750 (79.5000) lr 1.3090e-03 eta 0:20:31 +epoch [22/50] batch [30/31] time 1.373 (1.402) data 0.000 (0.030) loss 0.8491 (0.8805) acc 75.0000 (78.9583) lr 1.3090e-03 eta 0:20:18 +epoch [23/50] batch [5/31] time 1.352 (1.518) data 0.000 (0.157) loss 0.5923 (0.8858) acc 84.3750 (78.7500) lr 1.2487e-03 eta 0:21:49 +epoch [23/50] batch [10/31] time 1.348 (1.433) data 0.000 (0.079) loss 0.6733 (0.9796) acc 75.0000 (78.1250) lr 1.2487e-03 eta 0:20:29 +epoch [23/50] batch [15/31] time 1.372 (1.419) data 0.001 (0.053) loss 1.0518 (0.9943) acc 81.2500 (79.1667) lr 1.2487e-03 eta 0:20:10 +epoch [23/50] batch [20/31] time 1.342 (1.402) data 0.001 (0.040) loss 0.8892 (0.9595) acc 78.1250 (78.9062) lr 1.2487e-03 eta 0:19:48 +epoch [23/50] batch [25/31] time 1.361 (1.394) data 0.000 (0.032) loss 0.7715 (0.9306) acc 75.0000 (78.7500) lr 1.2487e-03 eta 0:19:35 +epoch [23/50] batch [30/31] time 1.356 (1.389) data 0.000 (0.027) loss 0.9756 (0.9260) acc 84.3750 (78.8542) lr 1.2487e-03 eta 0:19:23 +epoch [24/50] batch [5/31] time 1.379 (1.549) data 0.000 (0.167) loss 1.3594 (1.0318) acc 68.7500 (75.0000) lr 1.1874e-03 eta 0:21:29 +epoch [24/50] batch [10/31] time 1.376 (1.457) data 0.000 (0.084) loss 0.7734 (1.0979) acc 75.0000 (74.0625) lr 1.1874e-03 eta 0:20:04 +epoch [24/50] batch [15/31] time 1.364 (1.426) data 0.000 (0.056) loss 1.1074 (1.0117) acc 71.8750 (76.2500) lr 1.1874e-03 eta 0:19:31 +epoch [24/50] batch [20/31] time 1.516 (1.417) data 0.000 (0.042) loss 1.4678 (0.9860) acc 65.6250 (76.5625) lr 1.1874e-03 eta 0:19:17 +epoch [24/50] batch [25/31] time 1.368 (1.407) data 0.001 (0.034) loss 1.2217 (0.9663) acc 81.2500 (77.5000) lr 1.1874e-03 eta 0:19:02 +epoch [24/50] batch [30/31] time 1.348 (1.401) data 0.000 (0.028) loss 0.8438 (0.9767) acc 81.2500 (77.2917) lr 1.1874e-03 eta 0:18:50 +epoch [25/50] batch [5/31] time 1.371 (1.581) data 0.000 (0.206) loss 1.0439 (0.9533) acc 75.0000 (80.0000) lr 1.1253e-03 eta 0:21:06 +epoch [25/50] batch [10/31] time 1.349 (1.471) data 0.000 (0.103) loss 1.4092 (0.9952) acc 65.6250 (77.1875) lr 1.1253e-03 eta 0:19:30 +epoch [25/50] batch [15/31] time 1.349 (1.432) data 0.001 (0.069) loss 0.9653 (0.9823) acc 81.2500 (77.9167) lr 1.1253e-03 eta 0:18:53 +epoch [25/50] batch [20/31] time 1.357 (1.413) data 0.000 (0.052) loss 0.6953 (0.9615) acc 84.3750 (77.8125) lr 1.1253e-03 eta 0:18:30 +epoch [25/50] batch [25/31] time 1.385 (1.405) data 0.000 (0.042) loss 0.6997 (0.9647) acc 84.3750 (77.1250) lr 1.1253e-03 eta 0:18:17 +epoch [25/50] batch [30/31] time 1.355 (1.398) data 0.000 (0.035) loss 1.0225 (0.9514) acc 68.7500 (77.2917) lr 1.1253e-03 eta 0:18:04 +epoch [26/50] batch [5/31] time 1.361 (1.537) data 0.001 (0.166) loss 0.5825 (0.8684) acc 87.5000 (80.0000) lr 1.0628e-03 eta 0:19:43 +epoch [26/50] batch [10/31] time 1.356 (1.449) data 0.001 (0.083) loss 1.1592 (0.9965) acc 78.1250 (76.5625) lr 1.0628e-03 eta 0:18:28 +epoch [26/50] batch [15/31] time 1.363 (1.422) data 0.000 (0.056) loss 0.9565 (0.9533) acc 81.2500 (77.7083) lr 1.0628e-03 eta 0:18:00 +epoch [26/50] batch [20/31] time 1.370 (1.407) data 0.000 (0.042) loss 1.2998 (0.9741) acc 65.6250 (77.1875) lr 1.0628e-03 eta 0:17:42 +epoch [26/50] batch [25/31] time 1.361 (1.398) data 0.000 (0.034) loss 0.6221 (0.9233) acc 84.3750 (78.2500) lr 1.0628e-03 eta 0:17:28 +epoch [26/50] batch [30/31] time 1.355 (1.391) data 0.000 (0.028) loss 0.8198 (0.9348) acc 87.5000 (77.8125) lr 1.0628e-03 eta 0:17:16 +epoch [27/50] batch [5/31] time 1.354 (1.537) data 0.001 (0.174) loss 0.2998 (1.1063) acc 90.6250 (77.5000) lr 1.0000e-03 eta 0:18:55 +epoch [27/50] batch [10/31] time 1.363 (1.447) data 0.000 (0.087) loss 0.6597 (0.9926) acc 78.1250 (79.3750) lr 1.0000e-03 eta 0:17:42 +epoch [27/50] batch [15/31] time 1.361 (1.427) data 0.000 (0.058) loss 1.4385 (0.9410) acc 78.1250 (80.2083) lr 1.0000e-03 eta 0:17:20 +epoch [27/50] batch [20/31] time 1.366 (1.409) data 0.000 (0.044) loss 0.9722 (0.9299) acc 81.2500 (80.4688) lr 1.0000e-03 eta 0:17:00 +epoch [27/50] batch [25/31] time 1.354 (1.398) data 0.000 (0.035) loss 0.4392 (0.8876) acc 93.7500 (81.0000) lr 1.0000e-03 eta 0:16:45 +epoch [27/50] batch [30/31] time 1.365 (1.393) data 0.000 (0.029) loss 0.9893 (0.8975) acc 68.7500 (79.1667) lr 1.0000e-03 eta 0:16:34 +epoch [28/50] batch [5/31] time 1.382 (1.543) data 0.001 (0.158) loss 0.5430 (0.8247) acc 81.2500 (81.8750) lr 9.3721e-04 eta 0:18:12 +epoch [28/50] batch [10/31] time 1.372 (1.459) data 0.000 (0.079) loss 0.7754 (0.9812) acc 78.1250 (78.7500) lr 9.3721e-04 eta 0:17:05 +epoch [28/50] batch [15/31] time 1.347 (1.425) data 0.000 (0.053) loss 0.4255 (0.9052) acc 90.6250 (79.3750) lr 9.3721e-04 eta 0:16:34 +epoch [28/50] batch [20/31] time 1.367 (1.407) data 0.000 (0.040) loss 0.8735 (0.8570) acc 81.2500 (80.6250) lr 9.3721e-04 eta 0:16:14 +epoch [28/50] batch [25/31] time 1.374 (1.398) data 0.000 (0.032) loss 0.5327 (0.8394) acc 87.5000 (81.1250) lr 9.3721e-04 eta 0:16:01 +epoch [28/50] batch [30/31] time 1.345 (1.392) data 0.000 (0.027) loss 1.1123 (0.8259) acc 68.7500 (81.5625) lr 9.3721e-04 eta 0:15:50 +epoch [29/50] batch [5/31] time 1.379 (1.551) data 0.000 (0.172) loss 0.4854 (0.6497) acc 84.3750 (81.8750) lr 8.7467e-04 eta 0:17:30 +epoch [29/50] batch [10/31] time 1.381 (1.458) data 0.001 (0.086) loss 0.8750 (0.7554) acc 81.2500 (78.7500) lr 8.7467e-04 eta 0:16:19 +epoch [29/50] batch [15/31] time 1.381 (1.429) data 0.001 (0.058) loss 1.0840 (0.8131) acc 84.3750 (79.3750) lr 8.7467e-04 eta 0:15:53 +epoch [29/50] batch [20/31] time 1.353 (1.414) data 0.000 (0.043) loss 1.2852 (0.8243) acc 71.8750 (79.5312) lr 8.7467e-04 eta 0:15:35 +epoch [29/50] batch [25/31] time 1.377 (1.405) data 0.000 (0.035) loss 0.9521 (0.8640) acc 78.1250 (78.7500) lr 8.7467e-04 eta 0:15:22 +epoch [29/50] batch [30/31] time 1.350 (1.397) data 0.000 (0.029) loss 0.5435 (0.8417) acc 84.3750 (79.5833) lr 8.7467e-04 eta 0:15:10 +epoch [30/50] batch [5/31] time 1.366 (1.519) data 0.000 (0.158) loss 0.7451 (0.9563) acc 78.1250 (75.6250) lr 8.1262e-04 eta 0:16:21 +epoch [30/50] batch [10/31] time 1.343 (1.438) data 0.000 (0.079) loss 0.8613 (0.8417) acc 75.0000 (79.0625) lr 8.1262e-04 eta 0:15:21 +epoch [30/50] batch [15/31] time 1.350 (1.411) data 0.000 (0.053) loss 1.1758 (0.9116) acc 78.1250 (78.1250) lr 8.1262e-04 eta 0:14:57 +epoch [30/50] batch [20/31] time 1.362 (1.398) data 0.000 (0.040) loss 1.0068 (0.8380) acc 75.0000 (79.6875) lr 8.1262e-04 eta 0:14:42 +epoch [30/50] batch [25/31] time 1.347 (1.392) data 0.000 (0.032) loss 0.7744 (0.8055) acc 75.0000 (80.3750) lr 8.1262e-04 eta 0:14:31 +epoch [30/50] batch [30/31] time 1.361 (1.387) data 0.000 (0.027) loss 0.6860 (0.8100) acc 71.8750 (79.7917) lr 8.1262e-04 eta 0:14:21 +epoch [31/50] batch [5/31] time 1.368 (1.538) data 0.000 (0.163) loss 1.0244 (0.7375) acc 81.2500 (83.7500) lr 7.5131e-04 eta 0:15:45 +epoch [31/50] batch [10/31] time 1.353 (1.450) data 0.000 (0.082) loss 0.9980 (0.8767) acc 81.2500 (82.1875) lr 7.5131e-04 eta 0:14:44 +epoch [31/50] batch [15/31] time 1.378 (1.424) data 0.000 (0.055) loss 0.9717 (0.8434) acc 71.8750 (81.2500) lr 7.5131e-04 eta 0:14:21 +epoch [31/50] batch [20/31] time 1.400 (1.410) data 0.000 (0.041) loss 1.4316 (0.8599) acc 68.7500 (80.3125) lr 7.5131e-04 eta 0:14:05 +epoch [31/50] batch [25/31] time 1.374 (1.403) data 0.000 (0.033) loss 0.4392 (0.8023) acc 87.5000 (81.5000) lr 7.5131e-04 eta 0:13:54 +epoch [31/50] batch [30/31] time 1.358 (1.394) data 0.000 (0.028) loss 0.5654 (0.7928) acc 84.3750 (81.6667) lr 7.5131e-04 eta 0:13:42 +epoch [32/50] batch [5/31] time 1.356 (1.545) data 0.000 (0.179) loss 0.5825 (0.7062) acc 84.3750 (84.3750) lr 6.9098e-04 eta 0:15:02 +epoch [32/50] batch [10/31] time 1.375 (1.455) data 0.000 (0.090) loss 1.3379 (0.8576) acc 75.0000 (81.2500) lr 6.9098e-04 eta 0:14:02 +epoch [32/50] batch [15/31] time 1.342 (1.423) data 0.000 (0.060) loss 0.4670 (0.7964) acc 90.6250 (82.7083) lr 6.9098e-04 eta 0:13:36 +epoch [32/50] batch [20/31] time 1.363 (1.407) data 0.000 (0.045) loss 0.6299 (0.8385) acc 81.2500 (80.6250) lr 6.9098e-04 eta 0:13:20 +epoch [32/50] batch [25/31] time 1.361 (1.398) data 0.000 (0.036) loss 0.7910 (0.8194) acc 81.2500 (80.3750) lr 6.9098e-04 eta 0:13:08 +epoch [32/50] batch [30/31] time 1.368 (1.391) data 0.000 (0.030) loss 0.9937 (0.8383) acc 75.0000 (79.8958) lr 6.9098e-04 eta 0:12:57 +epoch [33/50] batch [5/31] time 1.370 (1.560) data 0.001 (0.169) loss 0.7402 (0.7306) acc 78.1250 (81.8750) lr 6.3188e-04 eta 0:14:22 +epoch [33/50] batch [10/31] time 1.362 (1.462) data 0.000 (0.085) loss 0.2800 (0.6944) acc 87.5000 (82.5000) lr 6.3188e-04 eta 0:13:21 +epoch [33/50] batch [15/31] time 1.368 (1.429) data 0.000 (0.057) loss 0.7847 (0.7208) acc 87.5000 (82.2917) lr 6.3188e-04 eta 0:12:55 +epoch [33/50] batch [20/31] time 1.365 (1.414) data 0.000 (0.043) loss 1.0059 (0.7542) acc 78.1250 (81.7188) lr 6.3188e-04 eta 0:12:40 +epoch [33/50] batch [25/31] time 1.376 (1.404) data 0.000 (0.034) loss 1.0742 (0.7548) acc 75.0000 (81.3750) lr 6.3188e-04 eta 0:12:28 +epoch [33/50] batch [30/31] time 1.359 (1.396) data 0.000 (0.028) loss 0.8643 (0.7709) acc 71.8750 (81.3542) lr 6.3188e-04 eta 0:12:17 +epoch [34/50] batch [5/31] time 1.366 (1.520) data 0.000 (0.161) loss 0.6567 (1.1291) acc 84.3750 (78.1250) lr 5.7422e-04 eta 0:13:13 +epoch [34/50] batch [10/31] time 1.375 (1.439) data 0.000 (0.081) loss 0.7334 (0.9408) acc 84.3750 (81.5625) lr 5.7422e-04 eta 0:12:23 +epoch [34/50] batch [15/31] time 1.364 (1.424) data 0.000 (0.054) loss 0.8354 (0.9220) acc 81.2500 (80.8333) lr 5.7422e-04 eta 0:12:09 +epoch [34/50] batch [20/31] time 1.363 (1.407) data 0.000 (0.041) loss 0.5142 (0.8208) acc 87.5000 (82.1875) lr 5.7422e-04 eta 0:11:53 +epoch [34/50] batch [25/31] time 1.358 (1.398) data 0.000 (0.032) loss 1.1064 (0.8122) acc 81.2500 (82.1250) lr 5.7422e-04 eta 0:11:41 +epoch [34/50] batch [30/31] time 1.363 (1.392) data 0.000 (0.027) loss 0.6597 (0.8360) acc 75.0000 (80.9375) lr 5.7422e-04 eta 0:11:31 +epoch [35/50] batch [5/31] time 1.357 (1.535) data 0.000 (0.163) loss 0.8623 (0.7092) acc 81.2500 (85.6250) lr 5.1825e-04 eta 0:12:33 +epoch [35/50] batch [10/31] time 1.352 (1.447) data 0.000 (0.082) loss 0.6948 (0.7908) acc 84.3750 (83.1250) lr 5.1825e-04 eta 0:11:43 +epoch [35/50] batch [15/31] time 1.349 (1.417) data 0.000 (0.055) loss 1.0869 (0.8430) acc 71.8750 (81.0417) lr 5.1825e-04 eta 0:11:21 +epoch [35/50] batch [20/31] time 1.503 (1.410) data 0.000 (0.041) loss 1.1211 (0.8485) acc 62.5000 (80.3125) lr 5.1825e-04 eta 0:11:11 +epoch [35/50] batch [25/31] time 1.350 (1.402) data 0.000 (0.033) loss 0.5239 (0.8221) acc 93.7500 (80.7500) lr 5.1825e-04 eta 0:11:00 +epoch [35/50] batch [30/31] time 1.381 (1.397) data 0.000 (0.027) loss 0.6870 (0.8141) acc 84.3750 (81.0417) lr 5.1825e-04 eta 0:10:50 +epoch [36/50] batch [5/31] time 1.374 (1.543) data 0.000 (0.180) loss 1.0449 (0.9161) acc 71.8750 (77.5000) lr 4.6417e-04 eta 0:11:49 +epoch [36/50] batch [10/31] time 1.381 (1.454) data 0.000 (0.090) loss 0.6631 (0.7962) acc 84.3750 (81.5625) lr 4.6417e-04 eta 0:11:01 +epoch [36/50] batch [15/31] time 1.338 (1.422) data 0.000 (0.060) loss 0.4495 (0.7560) acc 81.2500 (81.8750) lr 4.6417e-04 eta 0:10:40 +epoch [36/50] batch [20/31] time 1.347 (1.406) data 0.000 (0.045) loss 0.7339 (0.7481) acc 75.0000 (81.7188) lr 4.6417e-04 eta 0:10:25 +epoch [36/50] batch [25/31] time 1.351 (1.398) data 0.000 (0.036) loss 0.6318 (0.7395) acc 90.6250 (82.5000) lr 4.6417e-04 eta 0:10:14 +epoch [36/50] batch [30/31] time 1.359 (1.392) data 0.000 (0.030) loss 0.9883 (0.7472) acc 75.0000 (81.8750) lr 4.6417e-04 eta 0:10:05 +epoch [37/50] batch [5/31] time 1.354 (1.522) data 0.001 (0.164) loss 0.4236 (0.8015) acc 93.7500 (81.8750) lr 4.1221e-04 eta 0:10:52 +epoch [37/50] batch [10/31] time 1.351 (1.439) data 0.001 (0.082) loss 0.8120 (0.8090) acc 78.1250 (81.5625) lr 4.1221e-04 eta 0:10:10 +epoch [37/50] batch [15/31] time 1.349 (1.413) data 0.001 (0.055) loss 0.8271 (0.7500) acc 75.0000 (81.8750) lr 4.1221e-04 eta 0:09:51 +epoch [37/50] batch [20/31] time 1.350 (1.399) data 0.000 (0.041) loss 0.7256 (0.7631) acc 75.0000 (81.2500) lr 4.1221e-04 eta 0:09:39 +epoch [37/50] batch [25/31] time 1.367 (1.390) data 0.000 (0.033) loss 0.8335 (0.7629) acc 78.1250 (81.2500) lr 4.1221e-04 eta 0:09:28 +epoch [37/50] batch [30/31] time 1.372 (1.385) data 0.000 (0.028) loss 0.6538 (0.7392) acc 84.3750 (81.7708) lr 4.1221e-04 eta 0:09:19 +epoch [38/50] batch [5/31] time 1.375 (1.534) data 0.000 (0.175) loss 0.6138 (0.6263) acc 87.5000 (86.2500) lr 3.6258e-04 eta 0:10:10 +epoch [38/50] batch [10/31] time 1.361 (1.450) data 0.000 (0.088) loss 0.4949 (0.6746) acc 90.6250 (84.3750) lr 3.6258e-04 eta 0:09:29 +epoch [38/50] batch [15/31] time 1.369 (1.436) data 0.000 (0.059) loss 1.0635 (0.6936) acc 81.2500 (84.1667) lr 3.6258e-04 eta 0:09:17 +epoch [38/50] batch [20/31] time 1.381 (1.420) data 0.000 (0.044) loss 0.4934 (0.6847) acc 87.5000 (84.2188) lr 3.6258e-04 eta 0:09:03 +epoch [38/50] batch [25/31] time 1.359 (1.409) data 0.000 (0.035) loss 0.6123 (0.6870) acc 84.3750 (83.8750) lr 3.6258e-04 eta 0:08:52 +epoch [38/50] batch [30/31] time 1.352 (1.403) data 0.000 (0.030) loss 0.9077 (0.6899) acc 84.3750 (83.9583) lr 3.6258e-04 eta 0:08:43 +epoch [39/50] batch [5/31] time 1.358 (1.536) data 0.000 (0.172) loss 0.6846 (0.7354) acc 78.1250 (81.8750) lr 3.1545e-04 eta 0:09:23 +epoch [39/50] batch [10/31] time 1.359 (1.453) data 0.001 (0.086) loss 0.4294 (0.7089) acc 90.6250 (81.8750) lr 3.1545e-04 eta 0:08:45 +epoch [39/50] batch [15/31] time 1.352 (1.424) data 0.000 (0.058) loss 0.8242 (0.7614) acc 81.2500 (81.8750) lr 3.1545e-04 eta 0:08:28 +epoch [39/50] batch [20/31] time 1.344 (1.408) data 0.000 (0.043) loss 1.0127 (0.7722) acc 87.5000 (82.9688) lr 3.1545e-04 eta 0:08:15 +epoch [39/50] batch [25/31] time 1.371 (1.400) data 0.000 (0.035) loss 1.3613 (0.7588) acc 71.8750 (83.0000) lr 3.1545e-04 eta 0:08:05 +epoch [39/50] batch [30/31] time 1.350 (1.394) data 0.000 (0.029) loss 0.5239 (0.7412) acc 84.3750 (82.9167) lr 3.1545e-04 eta 0:07:56 +epoch [40/50] batch [5/31] time 1.361 (1.544) data 0.000 (0.173) loss 0.9658 (0.6842) acc 84.3750 (84.3750) lr 2.7103e-04 eta 0:08:38 +epoch [40/50] batch [10/31] time 1.382 (1.459) data 0.000 (0.087) loss 0.3757 (0.7178) acc 87.5000 (84.0625) lr 2.7103e-04 eta 0:08:02 +epoch [40/50] batch [15/31] time 1.347 (1.421) data 0.000 (0.058) loss 1.2822 (0.7696) acc 71.8750 (82.9167) lr 2.7103e-04 eta 0:07:43 +epoch [40/50] batch [20/31] time 1.366 (1.405) data 0.000 (0.044) loss 0.4683 (0.7388) acc 90.6250 (83.7500) lr 2.7103e-04 eta 0:07:31 +epoch [40/50] batch [25/31] time 1.370 (1.398) data 0.000 (0.035) loss 0.6025 (0.7200) acc 78.1250 (84.0000) lr 2.7103e-04 eta 0:07:21 +epoch [40/50] batch [30/31] time 1.396 (1.394) data 0.000 (0.029) loss 0.5200 (0.7135) acc 90.6250 (83.6458) lr 2.7103e-04 eta 0:07:13 +epoch [41/50] batch [5/31] time 1.368 (1.542) data 0.000 (0.154) loss 0.9458 (0.7002) acc 78.1250 (83.1250) lr 2.2949e-04 eta 0:07:50 +epoch [41/50] batch [10/31] time 1.370 (1.452) data 0.000 (0.077) loss 0.9302 (0.7808) acc 71.8750 (81.2500) lr 2.2949e-04 eta 0:07:15 +epoch [41/50] batch [15/31] time 1.359 (1.422) data 0.001 (0.052) loss 0.2471 (0.6696) acc 96.8750 (83.7500) lr 2.2949e-04 eta 0:06:59 +epoch [41/50] batch [20/31] time 1.362 (1.406) data 0.000 (0.039) loss 0.5269 (0.6434) acc 84.3750 (84.2188) lr 2.2949e-04 eta 0:06:47 +epoch [41/50] batch [25/31] time 1.368 (1.394) data 0.000 (0.031) loss 0.8066 (0.6244) acc 78.1250 (84.5000) lr 2.2949e-04 eta 0:06:37 +epoch [41/50] batch [30/31] time 1.348 (1.389) data 0.001 (0.026) loss 0.9160 (0.6512) acc 75.0000 (84.3750) lr 2.2949e-04 eta 0:06:28 +epoch [42/50] batch [5/31] time 1.362 (1.537) data 0.001 (0.174) loss 1.0625 (0.7685) acc 75.0000 (81.2500) lr 1.9098e-04 eta 0:07:01 +epoch [42/50] batch [10/31] time 1.374 (1.448) data 0.000 (0.087) loss 0.7002 (0.6819) acc 84.3750 (83.1250) lr 1.9098e-04 eta 0:06:29 +epoch [42/50] batch [15/31] time 1.363 (1.417) data 0.001 (0.058) loss 0.9146 (0.7701) acc 84.3750 (81.2500) lr 1.9098e-04 eta 0:06:14 +epoch [42/50] batch [20/31] time 1.360 (1.404) data 0.000 (0.044) loss 0.3889 (0.7077) acc 87.5000 (82.9688) lr 1.9098e-04 eta 0:06:03 +epoch [42/50] batch [25/31] time 1.370 (1.397) data 0.000 (0.035) loss 1.0879 (0.7479) acc 71.8750 (82.3750) lr 1.9098e-04 eta 0:05:54 +epoch [42/50] batch [30/31] time 1.360 (1.392) data 0.000 (0.029) loss 0.6860 (0.7446) acc 87.5000 (82.6042) lr 1.9098e-04 eta 0:05:46 +epoch [43/50] batch [5/31] time 1.364 (1.537) data 0.000 (0.169) loss 0.7168 (0.5835) acc 87.5000 (88.1250) lr 1.5567e-04 eta 0:06:13 +epoch [43/50] batch [10/31] time 1.365 (1.450) data 0.001 (0.085) loss 0.3188 (0.6941) acc 90.6250 (86.5625) lr 1.5567e-04 eta 0:05:44 +epoch [43/50] batch [15/31] time 1.353 (1.422) data 0.001 (0.057) loss 0.4817 (0.6191) acc 87.5000 (87.2917) lr 1.5567e-04 eta 0:05:31 +epoch [43/50] batch [20/31] time 1.387 (1.406) data 0.000 (0.043) loss 0.1838 (0.6278) acc 96.8750 (86.0938) lr 1.5567e-04 eta 0:05:20 +epoch [43/50] batch [25/31] time 1.341 (1.399) data 0.000 (0.034) loss 0.9404 (0.6569) acc 87.5000 (85.2500) lr 1.5567e-04 eta 0:05:11 +epoch [43/50] batch [30/31] time 1.371 (1.393) data 0.000 (0.029) loss 0.2668 (0.6654) acc 96.8750 (85.1042) lr 1.5567e-04 eta 0:05:03 +epoch [44/50] batch [5/31] time 1.374 (1.573) data 0.000 (0.172) loss 1.2715 (0.8109) acc 81.2500 (84.3750) lr 1.2369e-04 eta 0:05:33 +epoch [44/50] batch [10/31] time 1.367 (1.469) data 0.001 (0.086) loss 1.1387 (0.8833) acc 78.1250 (81.5625) lr 1.2369e-04 eta 0:05:04 +epoch [44/50] batch [15/31] time 1.385 (1.435) data 0.000 (0.058) loss 0.6387 (0.8218) acc 87.5000 (83.3333) lr 1.2369e-04 eta 0:04:49 +epoch [44/50] batch [20/31] time 1.359 (1.419) data 0.000 (0.043) loss 0.7095 (0.8076) acc 81.2500 (82.8125) lr 1.2369e-04 eta 0:04:39 +epoch [44/50] batch [25/31] time 1.379 (1.410) data 0.000 (0.035) loss 0.7524 (0.8098) acc 87.5000 (82.7500) lr 1.2369e-04 eta 0:04:30 +epoch [44/50] batch [30/31] time 1.357 (1.403) data 0.000 (0.029) loss 0.7153 (0.7745) acc 81.2500 (83.5417) lr 1.2369e-04 eta 0:04:22 +epoch [45/50] batch [5/31] time 1.378 (1.546) data 0.001 (0.168) loss 0.3740 (0.5265) acc 96.8750 (91.2500) lr 9.5173e-05 eta 0:04:39 +epoch [45/50] batch [10/31] time 1.361 (1.453) data 0.001 (0.084) loss 0.7480 (0.6542) acc 81.2500 (86.2500) lr 9.5173e-05 eta 0:04:15 +epoch [45/50] batch [15/31] time 1.355 (1.430) data 0.000 (0.056) loss 0.7222 (0.6557) acc 75.0000 (85.0000) lr 9.5173e-05 eta 0:04:04 +epoch [45/50] batch [20/31] time 1.348 (1.413) data 0.000 (0.042) loss 0.6523 (0.7049) acc 81.2500 (83.4375) lr 9.5173e-05 eta 0:03:54 +epoch [45/50] batch [25/31] time 1.353 (1.400) data 0.000 (0.034) loss 0.6812 (0.6774) acc 75.0000 (83.6250) lr 9.5173e-05 eta 0:03:45 +epoch [45/50] batch [30/31] time 1.360 (1.393) data 0.000 (0.028) loss 1.1201 (0.7064) acc 71.8750 (82.7083) lr 9.5173e-05 eta 0:03:37 +epoch [46/50] batch [5/31] time 1.382 (1.548) data 0.001 (0.173) loss 0.6895 (0.7261) acc 87.5000 (81.8750) lr 7.0224e-05 eta 0:03:52 +epoch [46/50] batch [10/31] time 1.356 (1.452) data 0.000 (0.087) loss 0.5161 (0.6446) acc 87.5000 (84.6875) lr 7.0224e-05 eta 0:03:30 +epoch [46/50] batch [15/31] time 1.349 (1.423) data 0.001 (0.058) loss 0.6035 (0.6144) acc 84.3750 (86.2500) lr 7.0224e-05 eta 0:03:19 +epoch [46/50] batch [20/31] time 1.496 (1.415) data 0.000 (0.044) loss 0.5181 (0.6703) acc 84.3750 (84.8438) lr 7.0224e-05 eta 0:03:11 +epoch [46/50] batch [25/31] time 1.372 (1.403) data 0.000 (0.035) loss 1.1426 (0.7124) acc 84.3750 (84.1250) lr 7.0224e-05 eta 0:03:02 +epoch [46/50] batch [30/31] time 1.364 (1.395) data 0.000 (0.029) loss 0.3047 (0.7047) acc 90.6250 (83.9583) lr 7.0224e-05 eta 0:02:54 +epoch [47/50] batch [5/31] time 1.378 (1.533) data 0.000 (0.162) loss 0.4790 (0.7105) acc 90.6250 (83.1250) lr 4.8943e-05 eta 0:03:02 +epoch [47/50] batch [10/31] time 1.372 (1.452) data 0.000 (0.081) loss 0.9678 (0.7792) acc 84.3750 (82.1875) lr 4.8943e-05 eta 0:02:45 +epoch [47/50] batch [15/31] time 1.361 (1.421) data 0.001 (0.054) loss 0.7559 (0.7316) acc 84.3750 (83.3333) lr 4.8943e-05 eta 0:02:34 +epoch [47/50] batch [20/31] time 1.378 (1.407) data 0.000 (0.041) loss 1.3027 (0.7734) acc 75.0000 (82.8125) lr 4.8943e-05 eta 0:02:26 +epoch [47/50] batch [25/31] time 1.372 (1.400) data 0.000 (0.033) loss 0.6958 (0.7290) acc 84.3750 (83.5000) lr 4.8943e-05 eta 0:02:18 +epoch [47/50] batch [30/31] time 1.370 (1.394) data 0.000 (0.027) loss 0.8804 (0.7330) acc 78.1250 (83.4375) lr 4.8943e-05 eta 0:02:11 +epoch [48/50] batch [5/31] time 1.351 (1.530) data 0.000 (0.166) loss 1.1260 (0.8375) acc 71.8750 (80.6250) lr 3.1417e-05 eta 0:02:14 +epoch [48/50] batch [10/31] time 1.361 (1.448) data 0.000 (0.083) loss 0.9106 (0.7042) acc 78.1250 (82.8125) lr 3.1417e-05 eta 0:02:00 +epoch [48/50] batch [15/31] time 1.364 (1.422) data 0.000 (0.056) loss 0.8066 (0.6822) acc 84.3750 (83.9583) lr 3.1417e-05 eta 0:01:50 +epoch [48/50] batch [20/31] time 1.369 (1.408) data 0.000 (0.042) loss 0.2722 (0.6889) acc 93.7500 (83.2812) lr 3.1417e-05 eta 0:01:42 +epoch [48/50] batch [25/31] time 1.367 (1.399) data 0.000 (0.034) loss 0.7773 (0.6799) acc 87.5000 (84.0000) lr 3.1417e-05 eta 0:01:35 +epoch [48/50] batch [30/31] time 1.372 (1.394) data 0.000 (0.028) loss 0.4861 (0.6694) acc 90.6250 (84.0625) lr 3.1417e-05 eta 0:01:27 +epoch [49/50] batch [5/31] time 1.349 (1.533) data 0.000 (0.161) loss 0.8228 (0.7031) acc 84.3750 (87.5000) lr 1.7713e-05 eta 0:01:27 +epoch [49/50] batch [10/31] time 1.367 (1.447) data 0.001 (0.081) loss 0.5732 (0.6880) acc 87.5000 (85.9375) lr 1.7713e-05 eta 0:01:15 +epoch [49/50] batch [15/31] time 1.362 (1.427) data 0.000 (0.054) loss 0.6060 (0.6962) acc 84.3750 (85.0000) lr 1.7713e-05 eta 0:01:07 +epoch [49/50] batch [20/31] time 1.351 (1.409) data 0.000 (0.041) loss 0.2756 (0.6428) acc 93.7500 (86.2500) lr 1.7713e-05 eta 0:00:59 +epoch [49/50] batch [25/31] time 1.367 (1.399) data 0.000 (0.032) loss 0.4299 (0.6455) acc 87.5000 (85.6250) lr 1.7713e-05 eta 0:00:51 +epoch [49/50] batch [30/31] time 1.353 (1.391) data 0.000 (0.027) loss 0.9780 (0.6742) acc 71.8750 (84.7917) lr 1.7713e-05 eta 0:00:44 +epoch [50/50] batch [5/31] time 1.386 (1.542) data 0.000 (0.171) loss 0.4329 (0.5537) acc 96.8750 (88.7500) lr 7.8853e-06 eta 0:00:40 +epoch [50/50] batch [10/31] time 1.344 (1.448) data 0.001 (0.086) loss 0.5361 (0.6722) acc 84.3750 (85.0000) lr 7.8853e-06 eta 0:00:30 +epoch [50/50] batch [15/31] time 1.346 (1.415) data 0.000 (0.057) loss 0.8652 (0.7430) acc 81.2500 (83.7500) lr 7.8853e-06 eta 0:00:22 +epoch [50/50] batch [20/31] time 1.355 (1.399) data 0.000 (0.043) loss 0.4609 (0.7021) acc 90.6250 (84.2188) lr 7.8853e-06 eta 0:00:15 +epoch [50/50] batch [25/31] time 1.348 (1.392) data 0.000 (0.034) loss 0.4346 (0.7164) acc 87.5000 (83.7500) lr 7.8853e-06 eta 0:00:08 +epoch [50/50] batch [30/31] time 1.364 (1.387) data 0.000 (0.029) loss 1.1328 (0.7366) acc 78.1250 (83.3333) lr 7.8853e-06 eta 0:00:01 +Checkpoint saved to output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50 +Finish training +Deploy the last-epoch model +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 36,046 +* accuracy: 72.1% +* error: 27.9% +* macro_f1: 71.4% +Elapsed: 0:41:42 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/checkpoint b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/checkpoint new file mode 100644 index 00000000..a9d493d3 --- /dev/null +++ b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/checkpoint @@ -0,0 +1 @@ +model.pth.tar-50 diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50 new file mode 100644 index 00000000..bc6c62d3 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed3/prompt_learner/model.pth.tar-50 differ diff --git a/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed3/tensorboard/events.out.tfevents.1698419398.ckb-gpu-lambda.257100.0 b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed3/tensorboard/events.out.tfevents.1698419398.ckb-gpu-lambda.257100.0 new file mode 100644 index 00000000..03cb6f49 Binary files /dev/null and b/python/ClipDetection/CoOp/output/imagenet/CoOp/vit_l14_ep50_1shots/nctx16_cscFalse_ctpend/seed3/tensorboard/events.out.tfevents.1698419398.ckb-gpu-lambda.257100.0 differ diff --git a/python/ClipDetection/CoOp/requirements.txt b/python/ClipDetection/CoOp/requirements.txt new file mode 100644 index 00000000..a7a7778b --- /dev/null +++ b/python/ClipDetection/CoOp/requirements.txt @@ -0,0 +1,3 @@ +ftfy +regex +tqdm diff --git a/python/ClipDetection/CoOp/saved_outputs/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/log.txt b/python/ClipDetection/CoOp/saved_outputs/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/log.txt new file mode 100644 index 00000000..9b268b8e --- /dev/null +++ b/python/ClipDetection/CoOp/saved_outputs/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/log.txt @@ -0,0 +1,5340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_l14_ep50.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '16'] +output_dir: output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 1 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 16 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-L/14 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 50 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 8.4.0-3ubuntu2) 8.4.0 +Clang version: 10.0.0-4ubuntu1 +CMake version: version 3.23.2 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-113-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: Tesla V100-SXM2-32GB +GPU 1: Tesla V100-SXM2-32GB +GPU 2: Tesla V100-SXM2-32GB +GPU 3: Tesla V100-SXM2-32GB + +Nvidia driver version: 510.73.05 +cuDNN version: Probably one of the following: +/usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 +/usr/lib/x86_64-linux-gnu/libcudnn.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.4.1 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 46 bits physical, 48 bits virtual +CPU(s): 64 +On-line CPU(s) list: 0-63 +Thread(s) per core: 2 +Core(s) per socket: 16 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +CPU family: 6 +Model: 85 +Model name: Intel(R) Xeon(R) Gold 6242 CPU @ 2.80GHz +Stepping: 7 +CPU MHz: 1200.024 +CPU max MHz: 3900.0000 +CPU min MHz: 1200.0000 +BogoMIPS: 5600.00 +Virtualization: VT-x +L1d cache: 1 MiB +L1i cache: 1 MiB +L2 cache: 32 MiB +L3 cache: 44 MiB +NUMA node0 CPU(s): 0-15,32-47 +NUMA node1 CPU(s): 16-31,48-63 +Vulnerability Itlb multihit: KVM: Mitigation: Split huge pages +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Mitigation; TSX disabled +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cdp_l3 invpcid_single intel_ppin ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm mpx rdt_a avx512f avx512dq rdseed adx smap clflushopt clwb intel_pt avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts pku ospke avx512_vnni md_clear flush_l1d arch_capabilities + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Creating a 16-shot dataset +Saving preprocessed few-shot data to /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_16-seed_1.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 16,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-L/14) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/tensorboard) +epoch [1/50] batch [5/500] time 1.336 (2.296) data 0.000 (0.324) loss 2.5957 (3.2949) acc 37.5000 (35.6250) lr 1.0000e-05 eta 15:56:16 +epoch [1/50] batch [10/500] time 1.355 (1.822) data 0.000 (0.162) loss 2.7363 (3.0840) acc 43.7500 (40.0000) lr 1.0000e-05 eta 12:38:55 +epoch [1/50] batch [15/500] time 1.371 (1.668) data 0.000 (0.108) loss 2.2109 (2.7854) acc 50.0000 (44.5833) lr 1.0000e-05 eta 11:34:39 +epoch [1/50] batch [20/500] time 1.348 (1.589) data 0.001 (0.081) loss 2.5312 (2.6285) acc 50.0000 (48.4375) lr 1.0000e-05 eta 11:01:38 +epoch [1/50] batch [25/500] time 1.348 (1.541) data 0.000 (0.065) loss 1.8896 (2.5264) acc 56.2500 (49.1250) lr 1.0000e-05 eta 10:41:23 +epoch [1/50] batch [30/500] time 1.363 (1.510) data 0.000 (0.054) loss 1.5879 (2.4069) acc 65.6250 (51.1458) lr 1.0000e-05 eta 10:28:19 +epoch [1/50] batch [35/500] time 1.369 (1.488) data 0.000 (0.047) loss 1.3916 (2.3061) acc 59.3750 (51.7857) lr 1.0000e-05 eta 10:19:17 +epoch [1/50] batch [40/500] time 1.343 (1.471) data 0.000 (0.041) loss 2.0996 (2.2664) acc 56.2500 (52.4219) lr 1.0000e-05 eta 10:11:56 +epoch [1/50] batch [45/500] time 1.362 (1.458) data 0.000 (0.036) loss 2.7988 (2.2370) acc 43.7500 (52.7778) lr 1.0000e-05 eta 10:06:18 +epoch [1/50] batch [50/500] time 1.348 (1.447) data 0.000 (0.033) loss 1.8828 (2.1898) acc 59.3750 (53.0625) lr 1.0000e-05 eta 10:01:41 +epoch [1/50] batch [55/500] time 1.352 (1.439) data 0.000 (0.030) loss 1.3057 (2.1259) acc 78.1250 (54.3182) lr 1.0000e-05 eta 9:58:03 +epoch [1/50] batch [60/500] time 1.350 (1.432) data 0.000 (0.027) loss 1.0781 (2.0803) acc 68.7500 (54.7396) lr 1.0000e-05 eta 9:55:06 +epoch [1/50] batch [65/500] time 1.357 (1.427) data 0.000 (0.025) loss 1.3174 (2.0519) acc 59.3750 (54.9519) lr 1.0000e-05 eta 9:52:54 +epoch [1/50] batch [70/500] time 1.327 (1.422) data 0.000 (0.023) loss 2.0801 (2.0319) acc 62.5000 (55.1786) lr 1.0000e-05 eta 9:50:42 +epoch [1/50] batch [75/500] time 1.330 (1.417) data 0.000 (0.022) loss 1.3330 (1.9988) acc 65.6250 (55.7083) lr 1.0000e-05 eta 9:48:30 +epoch [1/50] batch [80/500] time 1.368 (1.413) data 0.000 (0.021) loss 1.0049 (1.9743) acc 68.7500 (56.4062) lr 1.0000e-05 eta 9:46:59 +epoch [1/50] batch [85/500] time 1.374 (1.411) data 0.000 (0.019) loss 1.5674 (1.9595) acc 71.8750 (56.8015) lr 1.0000e-05 eta 9:45:53 +epoch [1/50] batch [90/500] time 1.365 (1.408) data 0.000 (0.018) loss 2.5020 (1.9502) acc 50.0000 (56.7708) lr 1.0000e-05 eta 9:44:37 +epoch [1/50] batch [95/500] time 1.364 (1.406) data 0.000 (0.017) loss 1.5625 (1.9372) acc 65.6250 (57.2368) lr 1.0000e-05 eta 9:43:29 +epoch [1/50] batch [100/500] time 1.372 (1.403) data 0.000 (0.017) loss 1.6943 (1.9139) acc 62.5000 (57.6562) lr 1.0000e-05 eta 9:42:26 +epoch [1/50] batch [105/500] time 1.351 (1.401) data 0.000 (0.016) loss 1.8271 (1.9145) acc 68.7500 (57.5595) lr 1.0000e-05 eta 9:41:27 +epoch [1/50] batch [110/500] time 1.353 (1.399) data 0.000 (0.015) loss 1.5078 (1.8877) acc 68.7500 (58.3239) lr 1.0000e-05 eta 9:40:32 +epoch [1/50] batch [115/500] time 1.367 (1.398) data 0.000 (0.014) loss 1.9453 (1.8722) acc 53.1250 (58.5598) lr 1.0000e-05 eta 9:39:53 +epoch [1/50] batch [120/500] time 1.363 (1.397) data 0.001 (0.014) loss 2.4883 (1.8671) acc 40.6250 (58.5677) lr 1.0000e-05 eta 9:39:08 +epoch [1/50] batch [125/500] time 1.336 (1.395) data 0.000 (0.013) loss 2.1230 (1.8599) acc 46.8750 (58.7000) lr 1.0000e-05 eta 9:38:09 +epoch [1/50] batch [130/500] time 1.361 (1.395) data 0.000 (0.013) loss 1.8857 (1.8599) acc 62.5000 (58.6298) lr 1.0000e-05 eta 9:38:03 +epoch [1/50] batch [135/500] time 1.362 (1.393) data 0.000 (0.012) loss 1.8574 (1.8511) acc 56.2500 (58.7037) lr 1.0000e-05 eta 9:37:26 +epoch [1/50] batch [140/500] time 1.350 (1.392) data 0.000 (0.012) loss 1.6787 (1.8529) acc 56.2500 (58.5268) lr 1.0000e-05 eta 9:36:41 +epoch [1/50] batch [145/500] time 1.371 (1.391) data 0.000 (0.012) loss 1.5439 (1.8515) acc 62.5000 (58.4052) lr 1.0000e-05 eta 9:36:10 +epoch [1/50] batch [150/500] time 1.363 (1.390) data 0.000 (0.011) loss 2.3809 (1.8468) acc 43.7500 (58.5625) lr 1.0000e-05 eta 9:35:41 +epoch [1/50] batch [155/500] time 1.364 (1.389) data 0.000 (0.011) loss 1.3672 (1.8343) acc 59.3750 (58.6492) lr 1.0000e-05 eta 9:35:07 +epoch [1/50] batch [160/500] time 1.355 (1.388) data 0.000 (0.010) loss 0.8252 (1.8258) acc 81.2500 (58.7305) lr 1.0000e-05 eta 9:34:35 +epoch [1/50] batch [165/500] time 1.359 (1.387) data 0.000 (0.010) loss 1.6670 (1.8149) acc 53.1250 (58.8826) lr 1.0000e-05 eta 9:33:57 +epoch [1/50] batch [170/500] time 1.372 (1.386) data 0.000 (0.010) loss 1.0439 (1.8017) acc 71.8750 (59.1360) lr 1.0000e-05 eta 9:33:34 +epoch [1/50] batch [175/500] time 1.367 (1.385) data 0.000 (0.010) loss 1.9434 (1.7958) acc 59.3750 (59.1786) lr 1.0000e-05 eta 9:33:14 +epoch [1/50] batch [180/500] time 1.355 (1.385) data 0.000 (0.009) loss 1.8701 (1.7892) acc 59.3750 (59.1840) lr 1.0000e-05 eta 9:32:45 +epoch [1/50] batch [185/500] time 1.348 (1.384) data 0.000 (0.009) loss 1.3232 (1.7843) acc 65.6250 (59.2568) lr 1.0000e-05 eta 9:32:15 +epoch [1/50] batch [190/500] time 1.358 (1.383) data 0.000 (0.009) loss 1.3926 (1.7733) acc 68.7500 (59.5230) lr 1.0000e-05 eta 9:31:57 +epoch [1/50] batch [195/500] time 1.360 (1.383) data 0.000 (0.009) loss 1.6104 (1.7688) acc 59.3750 (59.6314) lr 1.0000e-05 eta 9:31:33 +epoch [1/50] batch [200/500] time 1.355 (1.382) data 0.000 (0.008) loss 1.1621 (1.7606) acc 68.7500 (59.7656) lr 1.0000e-05 eta 9:31:09 +epoch [1/50] batch [205/500] time 1.343 (1.381) data 0.000 (0.008) loss 1.0078 (1.7479) acc 68.7500 (59.9390) lr 1.0000e-05 eta 9:30:40 +epoch [1/50] batch [210/500] time 1.366 (1.380) data 0.000 (0.008) loss 1.6758 (1.7415) acc 68.7500 (60.1190) lr 1.0000e-05 eta 9:30:16 +epoch [1/50] batch [215/500] time 1.350 (1.379) data 0.000 (0.008) loss 1.3613 (1.7380) acc 75.0000 (60.2471) lr 1.0000e-05 eta 9:29:49 +epoch [1/50] batch [220/500] time 1.368 (1.379) data 0.000 (0.008) loss 1.2627 (1.7310) acc 65.6250 (60.4261) lr 1.0000e-05 eta 9:29:29 +epoch [1/50] batch [225/500] time 1.370 (1.379) data 0.000 (0.008) loss 2.0684 (1.7332) acc 59.3750 (60.4167) lr 1.0000e-05 eta 9:29:18 +epoch [1/50] batch [230/500] time 1.373 (1.379) data 0.000 (0.007) loss 1.6084 (1.7276) acc 56.2500 (60.4348) lr 1.0000e-05 eta 9:29:14 +epoch [1/50] batch [235/500] time 1.360 (1.379) data 0.000 (0.007) loss 1.5430 (1.7261) acc 62.5000 (60.4654) lr 1.0000e-05 eta 9:29:00 +epoch [1/50] batch [240/500] time 1.354 (1.378) data 0.000 (0.007) loss 1.5811 (1.7237) acc 65.6250 (60.5859) lr 1.0000e-05 eta 9:28:35 +epoch [1/50] batch [245/500] time 1.342 (1.377) data 0.000 (0.007) loss 1.4033 (1.7219) acc 68.7500 (60.6378) lr 1.0000e-05 eta 9:28:16 +epoch [1/50] batch [250/500] time 1.359 (1.377) data 0.000 (0.007) loss 1.4551 (1.7234) acc 68.7500 (60.6125) lr 1.0000e-05 eta 9:27:59 +epoch [1/50] batch [255/500] time 1.352 (1.377) data 0.000 (0.007) loss 1.5400 (1.7234) acc 59.3750 (60.6618) lr 1.0000e-05 eta 9:27:43 +epoch [1/50] batch [260/500] time 1.352 (1.376) data 0.000 (0.007) loss 2.3906 (1.7175) acc 53.1250 (60.7933) lr 1.0000e-05 eta 9:27:31 +epoch [1/50] batch [265/500] time 1.369 (1.376) data 0.000 (0.006) loss 1.7559 (1.7127) acc 65.6250 (60.9552) lr 1.0000e-05 eta 9:27:16 +epoch [1/50] batch [270/500] time 1.353 (1.376) data 0.000 (0.006) loss 1.3486 (1.7093) acc 65.6250 (61.0301) lr 1.0000e-05 eta 9:27:01 +epoch [1/50] batch [275/500] time 1.356 (1.376) data 0.000 (0.006) loss 1.8271 (1.7078) acc 71.8750 (61.0682) lr 1.0000e-05 eta 9:26:55 +epoch [1/50] batch [280/500] time 1.370 (1.376) data 0.000 (0.006) loss 2.8652 (1.7116) acc 46.8750 (61.0491) lr 1.0000e-05 eta 9:26:45 +epoch [1/50] batch [285/500] time 1.365 (1.375) data 0.001 (0.006) loss 1.1494 (1.7097) acc 75.0000 (61.1294) lr 1.0000e-05 eta 9:26:34 +epoch [1/50] batch [290/500] time 1.358 (1.375) data 0.000 (0.006) loss 2.1191 (1.7120) acc 59.3750 (61.1422) lr 1.0000e-05 eta 9:26:20 +epoch [1/50] batch [295/500] time 1.377 (1.375) data 0.000 (0.006) loss 2.1055 (1.7135) acc 46.8750 (61.1017) lr 1.0000e-05 eta 9:26:12 +epoch [1/50] batch [300/500] time 1.366 (1.375) data 0.000 (0.006) loss 1.2188 (1.7096) acc 68.7500 (61.1354) lr 1.0000e-05 eta 9:26:00 +epoch [1/50] batch [305/500] time 1.377 (1.375) data 0.000 (0.006) loss 1.2275 (1.7028) acc 68.7500 (61.2500) lr 1.0000e-05 eta 9:25:49 +epoch [1/50] batch [310/500] time 1.351 (1.375) data 0.000 (0.006) loss 1.5947 (1.6971) acc 65.6250 (61.3105) lr 1.0000e-05 eta 9:25:39 +epoch [1/50] batch [315/500] time 1.351 (1.374) data 0.000 (0.006) loss 2.0039 (1.6965) acc 59.3750 (61.3591) lr 1.0000e-05 eta 9:25:27 +epoch [1/50] batch [320/500] time 1.361 (1.374) data 0.000 (0.005) loss 1.1045 (1.6944) acc 78.1250 (61.4453) lr 1.0000e-05 eta 9:25:13 +epoch [1/50] batch [325/500] time 1.368 (1.374) data 0.001 (0.005) loss 1.3682 (1.6912) acc 71.8750 (61.5192) lr 1.0000e-05 eta 9:25:01 +epoch [1/50] batch [330/500] time 1.346 (1.374) data 0.000 (0.005) loss 2.4453 (1.6857) acc 46.8750 (61.6383) lr 1.0000e-05 eta 9:24:46 +epoch [1/50] batch [335/500] time 1.364 (1.373) data 0.000 (0.005) loss 1.8232 (1.6861) acc 65.6250 (61.6698) lr 1.0000e-05 eta 9:24:33 +epoch [1/50] batch [340/500] time 1.375 (1.373) data 0.000 (0.005) loss 0.9780 (1.6801) acc 78.1250 (61.7647) lr 1.0000e-05 eta 9:24:24 +epoch [1/50] batch [345/500] time 1.352 (1.374) data 0.000 (0.005) loss 1.6279 (1.6789) acc 59.3750 (61.7663) lr 1.0000e-05 eta 9:24:25 +epoch [1/50] batch [350/500] time 1.356 (1.373) data 0.000 (0.005) loss 1.7861 (1.6800) acc 65.6250 (61.8036) lr 1.0000e-05 eta 9:24:13 +epoch [1/50] batch [355/500] time 1.372 (1.373) data 0.001 (0.005) loss 1.4678 (1.6768) acc 62.5000 (61.8310) lr 1.0000e-05 eta 9:24:00 +epoch [1/50] batch [360/500] time 1.368 (1.373) data 0.000 (0.005) loss 1.8447 (1.6742) acc 56.2500 (61.8576) lr 1.0000e-05 eta 9:23:48 +epoch [1/50] batch [365/500] time 1.370 (1.373) data 0.000 (0.005) loss 1.0889 (1.6704) acc 68.7500 (61.8921) lr 1.0000e-05 eta 9:23:37 +epoch [1/50] batch [370/500] time 1.368 (1.373) data 0.000 (0.005) loss 1.4863 (1.6713) acc 59.3750 (61.8666) lr 1.0000e-05 eta 9:23:28 +epoch [1/50] batch [375/500] time 1.353 (1.373) data 0.000 (0.005) loss 1.3115 (1.6675) acc 65.6250 (61.9250) lr 1.0000e-05 eta 9:23:24 +epoch [1/50] batch [380/500] time 1.364 (1.372) data 0.000 (0.005) loss 1.2100 (1.6659) acc 71.8750 (61.8914) lr 1.0000e-05 eta 9:23:10 +epoch [1/50] batch [385/500] time 1.484 (1.373) data 0.000 (0.005) loss 1.1992 (1.6654) acc 62.5000 (61.9075) lr 1.0000e-05 eta 9:23:06 +epoch [1/50] batch [390/500] time 1.349 (1.373) data 0.000 (0.005) loss 2.3633 (1.6621) acc 53.1250 (62.0353) lr 1.0000e-05 eta 9:23:00 +epoch [1/50] batch [395/500] time 1.380 (1.373) data 0.000 (0.004) loss 0.9912 (1.6581) acc 75.0000 (62.1282) lr 1.0000e-05 eta 9:22:52 +epoch [1/50] batch [400/500] time 1.352 (1.372) data 0.000 (0.004) loss 1.9746 (1.6596) acc 46.8750 (62.0938) lr 1.0000e-05 eta 9:22:43 +epoch [1/50] batch [405/500] time 1.358 (1.372) data 0.000 (0.004) loss 1.0908 (1.6587) acc 68.7500 (62.0988) lr 1.0000e-05 eta 9:22:34 +epoch [1/50] batch [410/500] time 1.364 (1.372) data 0.000 (0.004) loss 1.1006 (1.6530) acc 75.0000 (62.2332) lr 1.0000e-05 eta 9:22:25 +epoch [1/50] batch [415/500] time 1.485 (1.373) data 0.000 (0.004) loss 1.3232 (1.6466) acc 65.6250 (62.3494) lr 1.0000e-05 eta 9:22:23 +epoch [1/50] batch [420/500] time 1.351 (1.373) data 0.000 (0.004) loss 2.1602 (1.6463) acc 56.2500 (62.3438) lr 1.0000e-05 eta 9:22:16 +epoch [1/50] batch [425/500] time 1.377 (1.372) data 0.000 (0.004) loss 1.2725 (1.6437) acc 75.0000 (62.3897) lr 1.0000e-05 eta 9:22:05 +epoch [1/50] batch [430/500] time 1.577 (1.373) data 0.000 (0.004) loss 1.1377 (1.6406) acc 53.1250 (62.4273) lr 1.0000e-05 eta 9:22:15 +epoch [1/50] batch [435/500] time 1.366 (1.373) data 0.000 (0.004) loss 2.1758 (1.6393) acc 59.3750 (62.4641) lr 1.0000e-05 eta 9:22:13 +epoch [1/50] batch [440/500] time 1.353 (1.373) data 0.000 (0.004) loss 1.5996 (1.6367) acc 71.8750 (62.5213) lr 1.0000e-05 eta 9:22:04 +epoch [1/50] batch [445/500] time 1.359 (1.373) data 0.000 (0.004) loss 1.3076 (1.6349) acc 71.8750 (62.5351) lr 1.0000e-05 eta 9:21:55 +epoch [1/50] batch [450/500] time 1.361 (1.373) data 0.000 (0.004) loss 2.0469 (1.6356) acc 68.7500 (62.5556) lr 1.0000e-05 eta 9:21:43 +epoch [1/50] batch [455/500] time 1.372 (1.373) data 0.000 (0.004) loss 2.0859 (1.6344) acc 56.2500 (62.5618) lr 1.0000e-05 eta 9:21:34 +epoch [1/50] batch [460/500] time 1.379 (1.373) data 0.000 (0.004) loss 1.6621 (1.6325) acc 71.8750 (62.6155) lr 1.0000e-05 eta 9:21:27 +epoch [1/50] batch [465/500] time 1.382 (1.373) data 0.000 (0.004) loss 1.7051 (1.6333) acc 65.6250 (62.6142) lr 1.0000e-05 eta 9:21:21 +epoch [1/50] batch [470/500] time 1.362 (1.373) data 0.000 (0.004) loss 1.7656 (1.6313) acc 71.8750 (62.6662) lr 1.0000e-05 eta 9:21:12 +epoch [1/50] batch [475/500] time 1.374 (1.374) data 0.000 (0.004) loss 1.2695 (1.6325) acc 62.5000 (62.6447) lr 1.0000e-05 eta 9:21:29 +epoch [1/50] batch [480/500] time 1.351 (1.374) data 0.000 (0.004) loss 1.6318 (1.6317) acc 62.5000 (62.6432) lr 1.0000e-05 eta 9:21:22 +epoch [1/50] batch [485/500] time 1.362 (1.374) data 0.001 (0.004) loss 1.5332 (1.6318) acc 53.1250 (62.6289) lr 1.0000e-05 eta 9:21:15 +epoch [1/50] batch [490/500] time 1.327 (1.373) data 0.000 (0.004) loss 1.7109 (1.6316) acc 62.5000 (62.6722) lr 1.0000e-05 eta 9:21:01 +epoch [1/50] batch [495/500] time 1.339 (1.373) data 0.000 (0.004) loss 1.1582 (1.6314) acc 75.0000 (62.6957) lr 1.0000e-05 eta 9:20:49 +epoch [1/50] batch [500/500] time 1.324 (1.373) data 0.000 (0.004) loss 1.7373 (1.6320) acc 59.3750 (62.6812) lr 2.0000e-03 eta 9:20:36 +epoch [2/50] batch [5/500] time 1.367 (2.641) data 0.000 (1.263) loss 1.3633 (1.6576) acc 65.6250 (63.1250) lr 2.0000e-03 eta 17:58:20 +epoch [2/50] batch [10/500] time 1.595 (2.022) data 0.007 (0.632) loss 2.1133 (1.6169) acc 56.2500 (62.8125) lr 2.0000e-03 eta 13:45:28 +epoch [2/50] batch [15/500] time 1.444 (1.836) data 0.000 (0.422) loss 1.0791 (1.5046) acc 75.0000 (65.0000) lr 2.0000e-03 eta 12:29:04 +epoch [2/50] batch [20/500] time 1.341 (1.714) data 0.000 (0.316) loss 1.2920 (1.4637) acc 68.7500 (65.1562) lr 2.0000e-03 eta 11:39:22 +epoch [2/50] batch [25/500] time 1.339 (1.640) data 0.000 (0.253) loss 1.5088 (1.4666) acc 56.2500 (64.7500) lr 2.0000e-03 eta 11:09:05 +epoch [2/50] batch [30/500] time 1.352 (1.592) data 0.000 (0.211) loss 0.6338 (1.4656) acc 78.1250 (64.3750) lr 2.0000e-03 eta 10:49:22 +epoch [2/50] batch [35/500] time 1.343 (1.559) data 0.000 (0.181) loss 1.1807 (1.4606) acc 68.7500 (64.4643) lr 2.0000e-03 eta 10:35:39 +epoch [2/50] batch [40/500] time 1.374 (1.534) data 0.000 (0.158) loss 1.3232 (1.4303) acc 75.0000 (65.2344) lr 2.0000e-03 eta 10:25:29 +epoch [2/50] batch [45/500] time 1.359 (1.514) data 0.000 (0.141) loss 1.5684 (1.4009) acc 65.6250 (66.1806) lr 2.0000e-03 eta 10:17:15 +epoch [2/50] batch [50/500] time 1.337 (1.499) data 0.000 (0.127) loss 1.3037 (1.3983) acc 71.8750 (66.5000) lr 2.0000e-03 eta 10:10:53 +epoch [2/50] batch [55/500] time 1.377 (1.486) data 0.000 (0.115) loss 1.8350 (1.4262) acc 59.3750 (65.7386) lr 2.0000e-03 eta 10:05:28 +epoch [2/50] batch [60/500] time 1.375 (1.487) data 0.000 (0.106) loss 1.8643 (1.4344) acc 56.2500 (65.2604) lr 2.0000e-03 eta 10:05:53 +epoch [2/50] batch [65/500] time 1.359 (1.478) data 0.000 (0.098) loss 2.1445 (1.4404) acc 59.3750 (65.8173) lr 2.0000e-03 eta 10:01:46 +epoch [2/50] batch [70/500] time 1.490 (1.471) data 0.000 (0.091) loss 1.2617 (1.4396) acc 71.8750 (65.9821) lr 2.0000e-03 eta 9:59:08 +epoch [2/50] batch [75/500] time 1.376 (1.465) data 0.000 (0.085) loss 0.8750 (1.4210) acc 71.8750 (66.2917) lr 2.0000e-03 eta 9:56:29 +epoch [2/50] batch [80/500] time 1.378 (1.459) data 0.000 (0.079) loss 2.1074 (1.4152) acc 59.3750 (66.5625) lr 2.0000e-03 eta 9:53:39 +epoch [2/50] batch [85/500] time 1.372 (1.453) data 0.000 (0.075) loss 1.5352 (1.4188) acc 65.6250 (66.1765) lr 2.0000e-03 eta 9:51:07 +epoch [2/50] batch [90/500] time 1.374 (1.448) data 0.000 (0.071) loss 1.1748 (1.4201) acc 56.2500 (66.0069) lr 2.0000e-03 eta 9:49:03 +epoch [2/50] batch [95/500] time 1.359 (1.443) data 0.000 (0.067) loss 1.5703 (1.4118) acc 78.1250 (66.3816) lr 2.0000e-03 eta 9:47:01 +epoch [2/50] batch [100/500] time 1.391 (1.439) data 0.000 (0.064) loss 1.7461 (1.4074) acc 62.5000 (66.5625) lr 2.0000e-03 eta 9:45:17 +epoch [2/50] batch [105/500] time 1.360 (1.436) data 0.000 (0.061) loss 1.5508 (1.3973) acc 62.5000 (66.6369) lr 2.0000e-03 eta 9:43:43 +epoch [2/50] batch [110/500] time 1.372 (1.432) data 0.000 (0.058) loss 1.0303 (1.3903) acc 75.0000 (66.7614) lr 2.0000e-03 eta 9:42:11 +epoch [2/50] batch [115/500] time 1.363 (1.430) data 0.000 (0.055) loss 1.7100 (1.3875) acc 68.7500 (66.8750) lr 2.0000e-03 eta 9:41:16 +epoch [2/50] batch [120/500] time 1.371 (1.428) data 0.000 (0.053) loss 1.3340 (1.3865) acc 71.8750 (66.8229) lr 2.0000e-03 eta 9:40:03 +epoch [2/50] batch [125/500] time 1.369 (1.425) data 0.000 (0.051) loss 1.5537 (1.3890) acc 59.3750 (66.9000) lr 2.0000e-03 eta 9:38:48 +epoch [2/50] batch [130/500] time 1.367 (1.422) data 0.000 (0.049) loss 1.9326 (1.3899) acc 56.2500 (66.8510) lr 2.0000e-03 eta 9:37:36 +epoch [2/50] batch [135/500] time 1.373 (1.420) data 0.000 (0.047) loss 1.4355 (1.3928) acc 71.8750 (66.8287) lr 2.0000e-03 eta 9:36:31 +epoch [2/50] batch [140/500] time 1.343 (1.417) data 0.000 (0.046) loss 1.4541 (1.3928) acc 68.7500 (66.7634) lr 2.0000e-03 eta 9:35:24 +epoch [2/50] batch [145/500] time 1.353 (1.415) data 0.000 (0.044) loss 1.1816 (1.3948) acc 75.0000 (66.7457) lr 2.0000e-03 eta 9:34:29 +epoch [2/50] batch [150/500] time 1.350 (1.413) data 0.001 (0.043) loss 1.4922 (1.3919) acc 59.3750 (66.6875) lr 2.0000e-03 eta 9:33:30 +epoch [2/50] batch [155/500] time 1.360 (1.411) data 0.000 (0.041) loss 0.8330 (1.3843) acc 71.8750 (66.7339) lr 2.0000e-03 eta 9:32:39 +epoch [2/50] batch [160/500] time 1.343 (1.410) data 0.000 (0.040) loss 1.6191 (1.3791) acc 65.6250 (66.8359) lr 2.0000e-03 eta 9:31:48 +epoch [2/50] batch [165/500] time 1.351 (1.408) data 0.000 (0.039) loss 1.6602 (1.3755) acc 62.5000 (66.8750) lr 2.0000e-03 eta 9:31:00 +epoch [2/50] batch [170/500] time 1.349 (1.406) data 0.000 (0.038) loss 1.4395 (1.3780) acc 68.7500 (66.8199) lr 2.0000e-03 eta 9:30:17 +epoch [2/50] batch [175/500] time 1.359 (1.405) data 0.000 (0.036) loss 1.1006 (1.3700) acc 62.5000 (66.9464) lr 2.0000e-03 eta 9:29:33 +epoch [2/50] batch [180/500] time 1.363 (1.404) data 0.000 (0.035) loss 1.4150 (1.3652) acc 68.7500 (67.0312) lr 2.0000e-03 eta 9:28:58 +epoch [2/50] batch [185/500] time 1.376 (1.403) data 0.000 (0.035) loss 0.9092 (1.3595) acc 78.1250 (67.1115) lr 2.0000e-03 eta 9:28:23 +epoch [2/50] batch [190/500] time 1.349 (1.401) data 0.000 (0.034) loss 1.2373 (1.3583) acc 59.3750 (67.0395) lr 2.0000e-03 eta 9:27:45 +epoch [2/50] batch [195/500] time 1.351 (1.400) data 0.000 (0.033) loss 1.1270 (1.3544) acc 62.5000 (67.1154) lr 2.0000e-03 eta 9:27:09 +epoch [2/50] batch [200/500] time 1.354 (1.399) data 0.000 (0.032) loss 0.9917 (1.3501) acc 71.8750 (67.1562) lr 2.0000e-03 eta 9:26:38 +epoch [2/50] batch [205/500] time 1.357 (1.398) data 0.000 (0.031) loss 2.8984 (1.3572) acc 53.1250 (67.0884) lr 2.0000e-03 eta 9:26:02 +epoch [2/50] batch [210/500] time 1.361 (1.397) data 0.000 (0.030) loss 1.2012 (1.3555) acc 75.0000 (67.1280) lr 2.0000e-03 eta 9:25:27 +epoch [2/50] batch [215/500] time 1.355 (1.396) data 0.000 (0.030) loss 1.1680 (1.3556) acc 59.3750 (67.0349) lr 2.0000e-03 eta 9:25:12 +epoch [2/50] batch [220/500] time 1.361 (1.396) data 0.000 (0.029) loss 0.9897 (1.3554) acc 78.1250 (66.9886) lr 2.0000e-03 eta 9:24:46 +epoch [2/50] batch [225/500] time 1.339 (1.395) data 0.000 (0.028) loss 1.2471 (1.3512) acc 75.0000 (67.1667) lr 2.0000e-03 eta 9:24:12 +epoch [2/50] batch [230/500] time 1.333 (1.393) data 0.000 (0.028) loss 0.9287 (1.3510) acc 75.0000 (67.2011) lr 2.0000e-03 eta 9:23:38 +epoch [2/50] batch [235/500] time 1.351 (1.393) data 0.000 (0.027) loss 1.0303 (1.3490) acc 68.7500 (67.2074) lr 2.0000e-03 eta 9:23:10 +epoch [2/50] batch [240/500] time 1.349 (1.392) data 0.001 (0.027) loss 2.1055 (1.3474) acc 62.5000 (67.2135) lr 2.0000e-03 eta 9:22:45 +epoch [2/50] batch [245/500] time 1.348 (1.391) data 0.000 (0.026) loss 1.8135 (1.3497) acc 68.7500 (67.2959) lr 2.0000e-03 eta 9:22:22 +epoch [2/50] batch [250/500] time 1.350 (1.390) data 0.000 (0.026) loss 0.8511 (1.3487) acc 78.1250 (67.3250) lr 2.0000e-03 eta 9:21:54 +epoch [2/50] batch [255/500] time 1.356 (1.390) data 0.000 (0.025) loss 1.1797 (1.3481) acc 71.8750 (67.3897) lr 2.0000e-03 eta 9:21:31 +epoch [2/50] batch [260/500] time 1.339 (1.389) data 0.000 (0.025) loss 1.4189 (1.3454) acc 53.1250 (67.3798) lr 2.0000e-03 eta 9:21:15 +epoch [2/50] batch [265/500] time 1.365 (1.389) data 0.000 (0.024) loss 1.8525 (1.3472) acc 62.5000 (67.3821) lr 2.0000e-03 eta 9:20:56 +epoch [2/50] batch [270/500] time 1.357 (1.388) data 0.000 (0.024) loss 1.0342 (1.3401) acc 71.8750 (67.5231) lr 2.0000e-03 eta 9:20:39 +epoch [2/50] batch [275/500] time 1.342 (1.388) data 0.000 (0.023) loss 1.7002 (1.3379) acc 62.5000 (67.5455) lr 2.0000e-03 eta 9:20:16 +epoch [2/50] batch [280/500] time 1.373 (1.387) data 0.000 (0.023) loss 1.1797 (1.3357) acc 71.8750 (67.6004) lr 2.0000e-03 eta 9:19:59 +epoch [2/50] batch [285/500] time 1.361 (1.387) data 0.000 (0.023) loss 2.1641 (1.3352) acc 62.5000 (67.6425) lr 2.0000e-03 eta 9:19:42 +epoch [2/50] batch [290/500] time 1.342 (1.386) data 0.000 (0.022) loss 1.1074 (1.3350) acc 75.0000 (67.7155) lr 2.0000e-03 eta 9:19:22 +epoch [2/50] batch [295/500] time 1.372 (1.386) data 0.000 (0.022) loss 1.0908 (1.3313) acc 75.0000 (67.8602) lr 2.0000e-03 eta 9:19:01 +epoch [2/50] batch [300/500] time 1.356 (1.385) data 0.000 (0.021) loss 1.0156 (1.3269) acc 78.1250 (67.9271) lr 2.0000e-03 eta 9:18:46 +epoch [2/50] batch [305/500] time 1.364 (1.385) data 0.000 (0.021) loss 1.0576 (1.3278) acc 81.2500 (67.9508) lr 2.0000e-03 eta 9:18:30 +epoch [2/50] batch [310/500] time 1.373 (1.385) data 0.000 (0.021) loss 1.2822 (1.3267) acc 71.8750 (67.9335) lr 2.0000e-03 eta 9:18:15 +epoch [2/50] batch [315/500] time 1.347 (1.384) data 0.000 (0.020) loss 1.1201 (1.3282) acc 71.8750 (67.9067) lr 2.0000e-03 eta 9:18:00 +epoch [2/50] batch [320/500] time 1.359 (1.384) data 0.000 (0.020) loss 1.3281 (1.3282) acc 68.7500 (67.9492) lr 2.0000e-03 eta 9:17:44 +epoch [2/50] batch [325/500] time 1.353 (1.384) data 0.000 (0.020) loss 1.3477 (1.3316) acc 65.6250 (67.8558) lr 2.0000e-03 eta 9:17:27 +epoch [2/50] batch [330/500] time 1.353 (1.383) data 0.000 (0.020) loss 1.1533 (1.3344) acc 75.0000 (67.8220) lr 2.0000e-03 eta 9:17:12 +epoch [2/50] batch [335/500] time 1.351 (1.383) data 0.000 (0.019) loss 1.3555 (1.3326) acc 65.6250 (67.8451) lr 2.0000e-03 eta 9:16:56 +epoch [2/50] batch [340/500] time 1.360 (1.382) data 0.000 (0.019) loss 1.4795 (1.3316) acc 65.6250 (67.8125) lr 2.0000e-03 eta 9:16:38 +epoch [2/50] batch [345/500] time 1.364 (1.382) data 0.000 (0.019) loss 0.9141 (1.3312) acc 81.2500 (67.8533) lr 2.0000e-03 eta 9:16:22 +epoch [2/50] batch [350/500] time 1.356 (1.382) data 0.000 (0.018) loss 0.8252 (1.3303) acc 71.8750 (67.8482) lr 2.0000e-03 eta 9:16:05 +epoch [2/50] batch [355/500] time 1.357 (1.381) data 0.000 (0.018) loss 0.9546 (1.3303) acc 78.1250 (67.8873) lr 2.0000e-03 eta 9:15:48 +epoch [2/50] batch [360/500] time 1.359 (1.382) data 0.000 (0.018) loss 0.9019 (1.3263) acc 71.8750 (67.9080) lr 2.0000e-03 eta 9:15:49 +epoch [2/50] batch [365/500] time 1.355 (1.381) data 0.000 (0.018) loss 1.2334 (1.3242) acc 56.2500 (67.9024) lr 2.0000e-03 eta 9:15:34 +epoch [2/50] batch [370/500] time 1.358 (1.381) data 0.000 (0.017) loss 1.9756 (1.3249) acc 59.3750 (67.9223) lr 2.0000e-03 eta 9:15:18 +epoch [2/50] batch [375/500] time 1.369 (1.381) data 0.000 (0.017) loss 0.8955 (1.3240) acc 71.8750 (67.9417) lr 2.0000e-03 eta 9:15:04 +epoch [2/50] batch [380/500] time 1.353 (1.380) data 0.000 (0.017) loss 0.7451 (1.3247) acc 87.5000 (67.9605) lr 2.0000e-03 eta 9:14:51 +epoch [2/50] batch [385/500] time 1.371 (1.380) data 0.000 (0.017) loss 1.5947 (1.3257) acc 71.8750 (67.9545) lr 2.0000e-03 eta 9:14:38 +epoch [2/50] batch [390/500] time 1.373 (1.380) data 0.000 (0.017) loss 1.2705 (1.3230) acc 78.1250 (68.0449) lr 2.0000e-03 eta 9:14:24 +epoch [2/50] batch [395/500] time 1.352 (1.379) data 0.000 (0.016) loss 1.1865 (1.3210) acc 62.5000 (68.0459) lr 2.0000e-03 eta 9:14:10 +epoch [2/50] batch [400/500] time 1.523 (1.380) data 0.000 (0.016) loss 1.5771 (1.3175) acc 65.6250 (68.1094) lr 2.0000e-03 eta 9:14:06 +epoch [2/50] batch [405/500] time 1.333 (1.379) data 0.000 (0.016) loss 1.0811 (1.3145) acc 65.6250 (68.1404) lr 2.0000e-03 eta 9:13:53 +epoch [2/50] batch [410/500] time 1.340 (1.379) data 0.000 (0.016) loss 2.0254 (1.3147) acc 65.6250 (68.1860) lr 2.0000e-03 eta 9:13:39 +epoch [2/50] batch [415/500] time 1.361 (1.379) data 0.000 (0.016) loss 0.8057 (1.3130) acc 81.2500 (68.1852) lr 2.0000e-03 eta 9:13:27 +epoch [2/50] batch [420/500] time 1.356 (1.378) data 0.000 (0.015) loss 1.0908 (1.3128) acc 68.7500 (68.1845) lr 2.0000e-03 eta 9:13:13 +epoch [2/50] batch [425/500] time 1.376 (1.378) data 0.000 (0.015) loss 1.1494 (1.3129) acc 65.6250 (68.1912) lr 2.0000e-03 eta 9:13:00 +epoch [2/50] batch [430/500] time 1.369 (1.378) data 0.000 (0.015) loss 1.1875 (1.3106) acc 65.6250 (68.2485) lr 2.0000e-03 eta 9:12:49 +epoch [2/50] batch [435/500] time 1.366 (1.378) data 0.000 (0.015) loss 0.7266 (1.3089) acc 81.2500 (68.2830) lr 2.0000e-03 eta 9:12:39 +epoch [2/50] batch [440/500] time 1.358 (1.378) data 0.000 (0.015) loss 1.6221 (1.3109) acc 68.7500 (68.2528) lr 2.0000e-03 eta 9:12:27 +epoch [2/50] batch [445/500] time 1.354 (1.377) data 0.000 (0.015) loss 1.3330 (1.3089) acc 65.6250 (68.2374) lr 2.0000e-03 eta 9:12:14 +epoch [2/50] batch [450/500] time 1.342 (1.377) data 0.000 (0.014) loss 1.1387 (1.3068) acc 71.8750 (68.2639) lr 2.0000e-03 eta 9:12:00 +epoch [2/50] batch [455/500] time 1.355 (1.377) data 0.000 (0.014) loss 1.0615 (1.3071) acc 71.8750 (68.2349) lr 2.0000e-03 eta 9:11:48 +epoch [2/50] batch [460/500] time 1.331 (1.377) data 0.000 (0.014) loss 1.4766 (1.3085) acc 68.7500 (68.2473) lr 2.0000e-03 eta 9:11:34 +epoch [2/50] batch [465/500] time 1.348 (1.376) data 0.000 (0.014) loss 1.2676 (1.3098) acc 71.8750 (68.2661) lr 2.0000e-03 eta 9:11:20 +epoch [2/50] batch [470/500] time 1.368 (1.376) data 0.000 (0.014) loss 1.1113 (1.3105) acc 68.7500 (68.2314) lr 2.0000e-03 eta 9:11:07 +epoch [2/50] batch [475/500] time 1.361 (1.376) data 0.000 (0.014) loss 1.4258 (1.3111) acc 75.0000 (68.2434) lr 2.0000e-03 eta 9:10:58 +epoch [2/50] batch [480/500] time 1.348 (1.376) data 0.000 (0.014) loss 0.9766 (1.3084) acc 62.5000 (68.2878) lr 2.0000e-03 eta 9:10:46 +epoch [2/50] batch [485/500] time 1.355 (1.376) data 0.001 (0.013) loss 1.0234 (1.3072) acc 62.5000 (68.2990) lr 2.0000e-03 eta 9:10:34 +epoch [2/50] batch [490/500] time 1.344 (1.375) data 0.000 (0.013) loss 1.3164 (1.3059) acc 71.8750 (68.3227) lr 2.0000e-03 eta 9:10:21 +epoch [2/50] batch [495/500] time 1.348 (1.375) data 0.000 (0.013) loss 1.8662 (1.3069) acc 59.3750 (68.2765) lr 2.0000e-03 eta 9:10:07 +epoch [2/50] batch [500/500] time 1.349 (1.375) data 0.000 (0.013) loss 1.1055 (1.3090) acc 65.6250 (68.2000) lr 1.9980e-03 eta 9:10:00 +epoch [3/50] batch [5/500] time 1.361 (1.528) data 0.000 (0.168) loss 1.0830 (1.4080) acc 78.1250 (70.0000) lr 1.9980e-03 eta 10:10:55 +epoch [3/50] batch [10/500] time 1.345 (1.441) data 0.000 (0.084) loss 0.9473 (1.2813) acc 81.2500 (69.3750) lr 1.9980e-03 eta 9:36:13 +epoch [3/50] batch [15/500] time 1.343 (1.414) data 0.000 (0.056) loss 1.0928 (1.3155) acc 62.5000 (68.5417) lr 1.9980e-03 eta 9:25:20 +epoch [3/50] batch [20/500] time 1.355 (1.400) data 0.000 (0.042) loss 1.0898 (1.3185) acc 68.7500 (68.5938) lr 1.9980e-03 eta 9:19:31 +epoch [3/50] batch [25/500] time 1.353 (1.391) data 0.000 (0.034) loss 1.3115 (1.3565) acc 68.7500 (68.8750) lr 1.9980e-03 eta 9:16:00 +epoch [3/50] batch [30/500] time 1.343 (1.386) data 0.000 (0.028) loss 1.3135 (1.3612) acc 65.6250 (68.9583) lr 1.9980e-03 eta 9:13:44 +epoch [3/50] batch [35/500] time 1.370 (1.382) data 0.000 (0.024) loss 0.5830 (1.2937) acc 84.3750 (70.3571) lr 1.9980e-03 eta 9:12:08 +epoch [3/50] batch [40/500] time 1.374 (1.379) data 0.000 (0.021) loss 1.3086 (1.3160) acc 65.6250 (69.8438) lr 1.9980e-03 eta 9:10:42 +epoch [3/50] batch [45/500] time 1.360 (1.377) data 0.000 (0.019) loss 1.2188 (1.3084) acc 59.3750 (69.7222) lr 1.9980e-03 eta 9:09:55 +epoch [3/50] batch [50/500] time 1.344 (1.376) data 0.000 (0.017) loss 1.4395 (1.3030) acc 68.7500 (70.0000) lr 1.9980e-03 eta 9:09:21 +epoch [3/50] batch [55/500] time 1.363 (1.375) data 0.000 (0.016) loss 0.7666 (1.2902) acc 81.2500 (70.1136) lr 1.9980e-03 eta 9:08:43 +epoch [3/50] batch [60/500] time 1.375 (1.377) data 0.000 (0.014) loss 1.4854 (1.2982) acc 65.6250 (69.8438) lr 1.9980e-03 eta 9:09:19 +epoch [3/50] batch [65/500] time 1.366 (1.375) data 0.000 (0.013) loss 1.2500 (1.3009) acc 71.8750 (69.8077) lr 1.9980e-03 eta 9:08:40 +epoch [3/50] batch [70/500] time 1.363 (1.374) data 0.000 (0.012) loss 1.3789 (1.2940) acc 65.6250 (69.8661) lr 1.9980e-03 eta 9:08:08 +epoch [3/50] batch [75/500] time 1.364 (1.373) data 0.000 (0.012) loss 1.0869 (1.2900) acc 71.8750 (69.8750) lr 1.9980e-03 eta 9:07:38 +epoch [3/50] batch [80/500] time 1.356 (1.372) data 0.000 (0.011) loss 0.9634 (1.2772) acc 78.1250 (70.0781) lr 1.9980e-03 eta 9:07:02 +epoch [3/50] batch [85/500] time 1.339 (1.372) data 0.000 (0.010) loss 1.7920 (1.3042) acc 62.5000 (69.4485) lr 1.9980e-03 eta 9:06:44 +epoch [3/50] batch [90/500] time 1.366 (1.371) data 0.000 (0.010) loss 1.4736 (1.2960) acc 59.3750 (69.4444) lr 1.9980e-03 eta 9:06:21 +epoch [3/50] batch [95/500] time 1.368 (1.371) data 0.000 (0.009) loss 0.8228 (1.2889) acc 75.0000 (69.7697) lr 1.9980e-03 eta 9:06:04 +epoch [3/50] batch [100/500] time 1.363 (1.370) data 0.000 (0.009) loss 1.3311 (1.3023) acc 65.6250 (69.4062) lr 1.9980e-03 eta 9:05:43 +epoch [3/50] batch [105/500] time 1.351 (1.371) data 0.000 (0.008) loss 1.0518 (1.3053) acc 68.7500 (69.3452) lr 1.9980e-03 eta 9:05:57 +epoch [3/50] batch [110/500] time 1.368 (1.370) data 0.000 (0.008) loss 1.4844 (1.3001) acc 59.3750 (69.4318) lr 1.9980e-03 eta 9:05:35 +epoch [3/50] batch [115/500] time 1.343 (1.370) data 0.000 (0.008) loss 1.0928 (1.2921) acc 71.8750 (69.5924) lr 1.9980e-03 eta 9:05:19 +epoch [3/50] batch [120/500] time 1.358 (1.370) data 0.000 (0.007) loss 0.6426 (1.2823) acc 81.2500 (69.5573) lr 1.9980e-03 eta 9:05:04 +epoch [3/50] batch [125/500] time 1.355 (1.369) data 0.000 (0.007) loss 0.9678 (1.2784) acc 71.8750 (69.6500) lr 1.9980e-03 eta 9:04:39 +epoch [3/50] batch [130/500] time 1.349 (1.369) data 0.000 (0.007) loss 0.9614 (1.2675) acc 78.1250 (69.8077) lr 1.9980e-03 eta 9:04:26 +epoch [3/50] batch [135/500] time 1.354 (1.368) data 0.001 (0.007) loss 1.3906 (1.2671) acc 68.7500 (69.7685) lr 1.9980e-03 eta 9:04:14 +epoch [3/50] batch [140/500] time 1.390 (1.368) data 0.000 (0.006) loss 1.2988 (1.2658) acc 65.6250 (69.7545) lr 1.9980e-03 eta 9:04:05 +epoch [3/50] batch [145/500] time 1.400 (1.368) data 0.000 (0.006) loss 1.3213 (1.2584) acc 65.6250 (69.8922) lr 1.9980e-03 eta 9:03:57 +epoch [3/50] batch [150/500] time 1.378 (1.368) data 0.000 (0.006) loss 0.9941 (1.2502) acc 78.1250 (70.0208) lr 1.9980e-03 eta 9:03:47 +epoch [3/50] batch [155/500] time 1.400 (1.368) data 0.000 (0.006) loss 1.0010 (1.2519) acc 68.7500 (69.7984) lr 1.9980e-03 eta 9:03:45 +epoch [3/50] batch [160/500] time 1.388 (1.369) data 0.000 (0.006) loss 1.3398 (1.2537) acc 62.5000 (69.7461) lr 1.9980e-03 eta 9:03:45 +epoch [3/50] batch [165/500] time 1.367 (1.368) data 0.000 (0.005) loss 0.7588 (1.2505) acc 84.3750 (69.6970) lr 1.9980e-03 eta 9:03:35 +epoch [3/50] batch [170/500] time 1.355 (1.368) data 0.000 (0.005) loss 0.9868 (1.2514) acc 78.1250 (69.6691) lr 1.9980e-03 eta 9:03:21 +epoch [3/50] batch [175/500] time 1.365 (1.368) data 0.001 (0.005) loss 0.9780 (1.2471) acc 71.8750 (69.7143) lr 1.9980e-03 eta 9:03:11 +epoch [3/50] batch [180/500] time 1.365 (1.368) data 0.000 (0.005) loss 0.9868 (1.2395) acc 78.1250 (69.8611) lr 1.9980e-03 eta 9:03:03 +epoch [3/50] batch [185/500] time 1.364 (1.367) data 0.000 (0.005) loss 1.3730 (1.2430) acc 68.7500 (69.7973) lr 1.9980e-03 eta 9:02:46 +epoch [3/50] batch [190/500] time 1.352 (1.367) data 0.000 (0.005) loss 1.2861 (1.2398) acc 75.0000 (69.9342) lr 1.9980e-03 eta 9:02:29 +epoch [3/50] batch [195/500] time 1.362 (1.367) data 0.000 (0.005) loss 1.2549 (1.2431) acc 81.2500 (69.9038) lr 1.9980e-03 eta 9:02:12 +epoch [3/50] batch [200/500] time 1.354 (1.366) data 0.000 (0.005) loss 1.5352 (1.2461) acc 53.1250 (69.8125) lr 1.9980e-03 eta 9:02:00 +epoch [3/50] batch [205/500] time 1.365 (1.367) data 0.000 (0.004) loss 0.8447 (1.2432) acc 78.1250 (69.8323) lr 1.9980e-03 eta 9:01:58 +epoch [3/50] batch [210/500] time 1.371 (1.366) data 0.000 (0.004) loss 1.4277 (1.2428) acc 59.3750 (69.7173) lr 1.9980e-03 eta 9:01:47 +epoch [3/50] batch [215/500] time 1.324 (1.366) data 0.001 (0.004) loss 0.8403 (1.2444) acc 81.2500 (69.6512) lr 1.9980e-03 eta 9:01:32 +epoch [3/50] batch [220/500] time 1.346 (1.366) data 0.000 (0.004) loss 1.0459 (1.2387) acc 75.0000 (69.7443) lr 1.9980e-03 eta 9:01:20 +epoch [3/50] batch [225/500] time 1.362 (1.366) data 0.000 (0.004) loss 1.2373 (1.2414) acc 50.0000 (69.4583) lr 1.9980e-03 eta 9:01:11 +epoch [3/50] batch [230/500] time 1.360 (1.366) data 0.000 (0.004) loss 1.2764 (1.2367) acc 65.6250 (69.4701) lr 1.9980e-03 eta 9:01:03 +epoch [3/50] batch [235/500] time 1.353 (1.366) data 0.000 (0.004) loss 1.6621 (1.2391) acc 68.7500 (69.5213) lr 1.9980e-03 eta 9:00:52 +epoch [3/50] batch [240/500] time 1.368 (1.365) data 0.000 (0.004) loss 1.7549 (1.2441) acc 62.5000 (69.4141) lr 1.9980e-03 eta 9:00:41 +epoch [3/50] batch [245/500] time 1.365 (1.365) data 0.000 (0.004) loss 1.2686 (1.2472) acc 71.8750 (69.3112) lr 1.9980e-03 eta 9:00:30 +epoch [3/50] batch [250/500] time 1.367 (1.366) data 0.000 (0.004) loss 0.7148 (1.2466) acc 78.1250 (69.3250) lr 1.9980e-03 eta 9:00:35 +epoch [3/50] batch [255/500] time 1.366 (1.365) data 0.000 (0.004) loss 0.7046 (1.2460) acc 81.2500 (69.3995) lr 1.9980e-03 eta 9:00:21 +epoch [3/50] batch [260/500] time 1.344 (1.365) data 0.000 (0.004) loss 1.4717 (1.2499) acc 62.5000 (69.3029) lr 1.9980e-03 eta 9:00:11 +epoch [3/50] batch [265/500] time 1.348 (1.365) data 0.000 (0.004) loss 0.9707 (1.2484) acc 78.1250 (69.2807) lr 1.9980e-03 eta 9:00:01 +epoch [3/50] batch [270/500] time 1.368 (1.365) data 0.000 (0.003) loss 1.3926 (1.2478) acc 68.7500 (69.3056) lr 1.9980e-03 eta 8:59:51 +epoch [3/50] batch [275/500] time 1.360 (1.365) data 0.000 (0.003) loss 0.9468 (1.2483) acc 68.7500 (69.2045) lr 1.9980e-03 eta 8:59:40 +epoch [3/50] batch [280/500] time 1.350 (1.365) data 0.000 (0.003) loss 1.2598 (1.2470) acc 71.8750 (69.2076) lr 1.9980e-03 eta 8:59:26 +epoch [3/50] batch [285/500] time 1.332 (1.364) data 0.000 (0.003) loss 1.4980 (1.2522) acc 65.6250 (69.1338) lr 1.9980e-03 eta 8:59:14 +epoch [3/50] batch [290/500] time 1.368 (1.364) data 0.000 (0.003) loss 1.5645 (1.2501) acc 65.6250 (69.2241) lr 1.9980e-03 eta 8:59:03 +epoch [3/50] batch [295/500] time 1.363 (1.364) data 0.000 (0.003) loss 1.8584 (1.2536) acc 56.2500 (69.1631) lr 1.9980e-03 eta 8:58:53 +epoch [3/50] batch [300/500] time 1.370 (1.364) data 0.000 (0.003) loss 1.7148 (1.2554) acc 62.5000 (69.1458) lr 1.9980e-03 eta 8:58:43 +epoch [3/50] batch [305/500] time 1.350 (1.364) data 0.000 (0.003) loss 1.3984 (1.2553) acc 59.3750 (69.1496) lr 1.9980e-03 eta 8:58:34 +epoch [3/50] batch [310/500] time 1.350 (1.364) data 0.000 (0.003) loss 0.6860 (1.2539) acc 81.2500 (69.1431) lr 1.9980e-03 eta 8:58:27 +epoch [3/50] batch [315/500] time 1.344 (1.364) data 0.000 (0.003) loss 1.2344 (1.2547) acc 68.7500 (69.1270) lr 1.9980e-03 eta 8:58:17 +epoch [3/50] batch [320/500] time 1.355 (1.364) data 0.000 (0.003) loss 0.9360 (1.2541) acc 78.1250 (69.1602) lr 1.9980e-03 eta 8:58:13 +epoch [3/50] batch [325/500] time 1.359 (1.364) data 0.000 (0.003) loss 1.4189 (1.2517) acc 53.1250 (69.1635) lr 1.9980e-03 eta 8:58:03 +epoch [3/50] batch [330/500] time 1.354 (1.364) data 0.000 (0.003) loss 1.2227 (1.2538) acc 59.3750 (69.1193) lr 1.9980e-03 eta 8:57:55 +epoch [3/50] batch [335/500] time 1.376 (1.364) data 0.000 (0.003) loss 1.2441 (1.2506) acc 71.8750 (69.1978) lr 1.9980e-03 eta 8:57:49 +epoch [3/50] batch [340/500] time 1.366 (1.364) data 0.000 (0.003) loss 1.2686 (1.2494) acc 71.8750 (69.2279) lr 1.9980e-03 eta 8:57:41 +epoch [3/50] batch [345/500] time 1.493 (1.364) data 0.000 (0.003) loss 0.8911 (1.2481) acc 81.2500 (69.2482) lr 1.9980e-03 eta 8:57:45 +epoch [3/50] batch [350/500] time 1.367 (1.364) data 0.000 (0.003) loss 1.0176 (1.2470) acc 62.5000 (69.2143) lr 1.9980e-03 eta 8:57:38 +epoch [3/50] batch [355/500] time 1.345 (1.364) data 0.000 (0.003) loss 1.5400 (1.2457) acc 68.7500 (69.2430) lr 1.9980e-03 eta 8:57:30 +epoch [3/50] batch [360/500] time 1.369 (1.364) data 0.000 (0.003) loss 1.5254 (1.2468) acc 68.7500 (69.2188) lr 1.9980e-03 eta 8:57:25 +epoch [3/50] batch [365/500] time 1.352 (1.364) data 0.000 (0.003) loss 1.4482 (1.2475) acc 68.7500 (69.1695) lr 1.9980e-03 eta 8:57:17 +epoch [3/50] batch [370/500] time 1.355 (1.364) data 0.000 (0.003) loss 1.0850 (1.2456) acc 75.0000 (69.1554) lr 1.9980e-03 eta 8:57:10 +epoch [3/50] batch [375/500] time 1.346 (1.364) data 0.000 (0.003) loss 1.0469 (1.2462) acc 71.8750 (69.1583) lr 1.9980e-03 eta 8:57:04 +epoch [3/50] batch [380/500] time 1.361 (1.364) data 0.000 (0.003) loss 1.3545 (1.2448) acc 62.5000 (69.1447) lr 1.9980e-03 eta 8:56:59 +epoch [3/50] batch [385/500] time 1.366 (1.364) data 0.000 (0.003) loss 1.1963 (1.2461) acc 71.8750 (69.1721) lr 1.9980e-03 eta 8:56:53 +epoch [3/50] batch [390/500] time 1.368 (1.364) data 0.000 (0.003) loss 1.1367 (1.2470) acc 71.8750 (69.1426) lr 1.9980e-03 eta 8:56:52 +epoch [3/50] batch [395/500] time 1.375 (1.364) data 0.000 (0.002) loss 1.3486 (1.2463) acc 68.7500 (69.1377) lr 1.9980e-03 eta 8:56:42 +epoch [3/50] batch [400/500] time 1.346 (1.364) data 0.000 (0.002) loss 1.0420 (1.2465) acc 68.7500 (69.1484) lr 1.9980e-03 eta 8:56:34 +epoch [3/50] batch [405/500] time 1.354 (1.364) data 0.000 (0.002) loss 1.2422 (1.2462) acc 75.0000 (69.1667) lr 1.9980e-03 eta 8:56:24 +epoch [3/50] batch [410/500] time 1.394 (1.364) data 0.000 (0.002) loss 1.3330 (1.2454) acc 68.7500 (69.2073) lr 1.9980e-03 eta 8:56:18 +epoch [3/50] batch [415/500] time 1.362 (1.364) data 0.000 (0.002) loss 1.4082 (1.2447) acc 71.8750 (69.2395) lr 1.9980e-03 eta 8:56:12 +epoch [3/50] batch [420/500] time 1.371 (1.364) data 0.000 (0.002) loss 1.1650 (1.2451) acc 71.8750 (69.2411) lr 1.9980e-03 eta 8:56:06 +epoch [3/50] batch [425/500] time 1.351 (1.364) data 0.000 (0.002) loss 1.2217 (1.2453) acc 75.0000 (69.2574) lr 1.9980e-03 eta 8:55:56 +epoch [3/50] batch [430/500] time 1.358 (1.364) data 0.000 (0.002) loss 1.1445 (1.2474) acc 65.6250 (69.2587) lr 1.9980e-03 eta 8:55:48 +epoch [3/50] batch [435/500] time 1.352 (1.364) data 0.000 (0.002) loss 0.9316 (1.2448) acc 87.5000 (69.3103) lr 1.9980e-03 eta 8:55:38 +epoch [3/50] batch [440/500] time 1.366 (1.364) data 0.000 (0.002) loss 1.1035 (1.2450) acc 78.1250 (69.3466) lr 1.9980e-03 eta 8:55:28 +epoch [3/50] batch [445/500] time 1.375 (1.364) data 0.000 (0.002) loss 0.8999 (1.2417) acc 81.2500 (69.4031) lr 1.9980e-03 eta 8:55:22 +epoch [3/50] batch [450/500] time 1.373 (1.364) data 0.000 (0.002) loss 0.5747 (1.2383) acc 78.1250 (69.4653) lr 1.9980e-03 eta 8:55:19 +epoch [3/50] batch [455/500] time 1.376 (1.364) data 0.000 (0.002) loss 1.3770 (1.2419) acc 71.8750 (69.4574) lr 1.9980e-03 eta 8:55:13 +epoch [3/50] batch [460/500] time 1.351 (1.364) data 0.000 (0.002) loss 0.8047 (1.2415) acc 78.1250 (69.5177) lr 1.9980e-03 eta 8:55:05 +epoch [3/50] batch [465/500] time 1.363 (1.364) data 0.000 (0.002) loss 0.8359 (1.2398) acc 75.0000 (69.5430) lr 1.9980e-03 eta 8:54:57 +epoch [3/50] batch [470/500] time 1.346 (1.364) data 0.000 (0.002) loss 1.0654 (1.2394) acc 75.0000 (69.5346) lr 1.9980e-03 eta 8:54:49 +epoch [3/50] batch [475/500] time 1.353 (1.364) data 0.000 (0.002) loss 0.5825 (1.2386) acc 84.3750 (69.5197) lr 1.9980e-03 eta 8:54:40 +epoch [3/50] batch [480/500] time 1.363 (1.364) data 0.000 (0.002) loss 1.2910 (1.2413) acc 59.3750 (69.4466) lr 1.9980e-03 eta 8:54:29 +epoch [3/50] batch [485/500] time 1.362 (1.363) data 0.001 (0.002) loss 1.1445 (1.2404) acc 68.7500 (69.4394) lr 1.9980e-03 eta 8:54:21 +epoch [3/50] batch [490/500] time 1.352 (1.364) data 0.000 (0.002) loss 1.0273 (1.2391) acc 75.0000 (69.4707) lr 1.9980e-03 eta 8:54:17 +epoch [3/50] batch [495/500] time 1.355 (1.363) data 0.000 (0.002) loss 1.4658 (1.2386) acc 65.6250 (69.4823) lr 1.9980e-03 eta 8:54:07 +epoch [3/50] batch [500/500] time 1.344 (1.363) data 0.000 (0.002) loss 1.5986 (1.2378) acc 65.6250 (69.4938) lr 1.9921e-03 eta 8:53:59 +epoch [4/50] batch [5/500] time 1.348 (1.517) data 0.001 (0.159) loss 0.8760 (1.1238) acc 81.2500 (71.8750) lr 1.9921e-03 eta 9:54:09 +epoch [4/50] batch [10/500] time 1.352 (1.434) data 0.000 (0.080) loss 1.3760 (1.1768) acc 43.7500 (68.1250) lr 1.9921e-03 eta 9:21:36 +epoch [4/50] batch [15/500] time 1.377 (1.409) data 0.001 (0.053) loss 1.2363 (1.1448) acc 71.8750 (68.7500) lr 1.9921e-03 eta 9:11:34 +epoch [4/50] batch [20/500] time 1.351 (1.398) data 0.001 (0.040) loss 1.1426 (1.1568) acc 71.8750 (69.3750) lr 1.9921e-03 eta 9:06:55 +epoch [4/50] batch [25/500] time 1.351 (1.390) data 0.001 (0.032) loss 0.8750 (1.1478) acc 75.0000 (69.5000) lr 1.9921e-03 eta 9:03:42 +epoch [4/50] batch [30/500] time 1.345 (1.383) data 0.000 (0.027) loss 1.3770 (1.1682) acc 65.6250 (69.3750) lr 1.9921e-03 eta 9:00:50 +epoch [4/50] batch [35/500] time 1.366 (1.380) data 0.000 (0.023) loss 1.3633 (1.1949) acc 68.7500 (69.1964) lr 1.9921e-03 eta 8:59:30 +epoch [4/50] batch [40/500] time 1.358 (1.377) data 0.000 (0.020) loss 1.7490 (1.2250) acc 65.6250 (68.6719) lr 1.9921e-03 eta 8:58:18 +epoch [4/50] batch [45/500] time 1.402 (1.381) data 0.000 (0.018) loss 1.1172 (1.2429) acc 68.7500 (68.3333) lr 1.9921e-03 eta 8:59:50 +epoch [4/50] batch [50/500] time 1.353 (1.379) data 0.000 (0.016) loss 1.4404 (1.2427) acc 50.0000 (67.6250) lr 1.9921e-03 eta 8:58:54 +epoch [4/50] batch [55/500] time 1.367 (1.377) data 0.000 (0.015) loss 1.2705 (1.2497) acc 65.6250 (67.6705) lr 1.9921e-03 eta 8:58:08 +epoch [4/50] batch [60/500] time 1.331 (1.375) data 0.001 (0.014) loss 1.4883 (1.2700) acc 59.3750 (67.6042) lr 1.9921e-03 eta 8:56:59 +epoch [4/50] batch [65/500] time 1.377 (1.373) data 0.000 (0.013) loss 1.4727 (1.2813) acc 62.5000 (67.3558) lr 1.9921e-03 eta 8:56:26 +epoch [4/50] batch [70/500] time 1.361 (1.372) data 0.000 (0.012) loss 1.1865 (1.2900) acc 65.6250 (67.4107) lr 1.9921e-03 eta 8:55:56 +epoch [4/50] batch [75/500] time 1.361 (1.372) data 0.000 (0.011) loss 1.6680 (1.2977) acc 53.1250 (67.4583) lr 1.9921e-03 eta 8:55:29 +epoch [4/50] batch [80/500] time 1.343 (1.371) data 0.000 (0.010) loss 1.0430 (1.3036) acc 62.5000 (67.3828) lr 1.9921e-03 eta 8:55:03 +epoch [4/50] batch [85/500] time 1.376 (1.371) data 0.000 (0.010) loss 1.0000 (1.3128) acc 65.6250 (67.2059) lr 1.9921e-03 eta 8:54:59 +epoch [4/50] batch [90/500] time 1.355 (1.372) data 0.000 (0.009) loss 0.6758 (1.3061) acc 71.8750 (67.3611) lr 1.9921e-03 eta 8:55:07 +epoch [4/50] batch [95/500] time 1.349 (1.370) data 0.000 (0.009) loss 1.5791 (1.3078) acc 59.3750 (67.2697) lr 1.9921e-03 eta 8:54:36 +epoch [4/50] batch [100/500] time 1.364 (1.371) data 0.000 (0.008) loss 1.7236 (1.3072) acc 62.5000 (67.3438) lr 1.9921e-03 eta 8:54:33 +epoch [4/50] batch [105/500] time 1.363 (1.370) data 0.000 (0.008) loss 1.2246 (1.2953) acc 71.8750 (67.6488) lr 1.9921e-03 eta 8:54:09 +epoch [4/50] batch [110/500] time 1.362 (1.370) data 0.000 (0.008) loss 0.9277 (1.2861) acc 71.8750 (67.8977) lr 1.9921e-03 eta 8:53:57 +epoch [4/50] batch [115/500] time 1.367 (1.369) data 0.000 (0.007) loss 1.0176 (1.2868) acc 75.0000 (67.8804) lr 1.9921e-03 eta 8:53:42 +epoch [4/50] batch [120/500] time 1.365 (1.369) data 0.000 (0.007) loss 0.8091 (1.2841) acc 81.2500 (68.0469) lr 1.9921e-03 eta 8:53:24 +epoch [4/50] batch [125/500] time 1.370 (1.369) data 0.000 (0.007) loss 0.9800 (1.2893) acc 68.7500 (67.9000) lr 1.9921e-03 eta 8:53:11 +epoch [4/50] batch [130/500] time 1.375 (1.368) data 0.000 (0.006) loss 0.8057 (1.2771) acc 81.2500 (68.2212) lr 1.9921e-03 eta 8:52:59 +epoch [4/50] batch [135/500] time 1.348 (1.368) data 0.000 (0.006) loss 1.1055 (1.2751) acc 78.1250 (68.2407) lr 1.9921e-03 eta 8:52:35 +epoch [4/50] batch [140/500] time 1.363 (1.367) data 0.000 (0.006) loss 1.3496 (1.2664) acc 62.5000 (68.4375) lr 1.9921e-03 eta 8:52:17 +epoch [4/50] batch [145/500] time 1.369 (1.367) data 0.000 (0.006) loss 1.1299 (1.2633) acc 65.6250 (68.4914) lr 1.9921e-03 eta 8:52:05 +epoch [4/50] batch [150/500] time 1.380 (1.367) data 0.000 (0.006) loss 1.3584 (1.2656) acc 65.6250 (68.4792) lr 1.9921e-03 eta 8:51:49 +epoch [4/50] batch [155/500] time 1.356 (1.367) data 0.000 (0.006) loss 0.9458 (1.2554) acc 71.8750 (68.6290) lr 1.9921e-03 eta 8:51:47 +epoch [4/50] batch [160/500] time 1.347 (1.367) data 0.000 (0.005) loss 0.6299 (1.2460) acc 75.0000 (68.7891) lr 1.9921e-03 eta 8:51:34 +epoch [4/50] batch [165/500] time 1.355 (1.366) data 0.000 (0.005) loss 0.8262 (1.2409) acc 81.2500 (68.8258) lr 1.9921e-03 eta 8:51:23 +epoch [4/50] batch [170/500] time 1.369 (1.366) data 0.000 (0.005) loss 1.2695 (1.2435) acc 71.8750 (68.8051) lr 1.9921e-03 eta 8:51:13 +epoch [4/50] batch [175/500] time 1.359 (1.366) data 0.000 (0.005) loss 0.8862 (1.2414) acc 78.1250 (68.7679) lr 1.9921e-03 eta 8:51:04 +epoch [4/50] batch [180/500] time 1.344 (1.366) data 0.000 (0.005) loss 1.2480 (1.2455) acc 75.0000 (68.6979) lr 1.9921e-03 eta 8:50:46 +epoch [4/50] batch [185/500] time 1.360 (1.366) data 0.000 (0.005) loss 1.7354 (1.2400) acc 53.1250 (68.8345) lr 1.9921e-03 eta 8:50:38 +epoch [4/50] batch [190/500] time 1.370 (1.366) data 0.000 (0.005) loss 1.0420 (1.2344) acc 78.1250 (68.9309) lr 1.9921e-03 eta 8:50:46 +epoch [4/50] batch [195/500] time 1.361 (1.366) data 0.000 (0.004) loss 0.8213 (1.2379) acc 81.2500 (68.8942) lr 1.9921e-03 eta 8:50:39 +epoch [4/50] batch [200/500] time 1.363 (1.366) data 0.000 (0.004) loss 1.9883 (1.2439) acc 43.7500 (68.7500) lr 1.9921e-03 eta 8:50:33 +epoch [4/50] batch [205/500] time 1.345 (1.366) data 0.000 (0.004) loss 0.6719 (1.2345) acc 87.5000 (69.0091) lr 1.9921e-03 eta 8:50:21 +epoch [4/50] batch [210/500] time 1.360 (1.366) data 0.000 (0.004) loss 1.3643 (1.2299) acc 75.0000 (69.2708) lr 1.9921e-03 eta 8:50:12 +epoch [4/50] batch [215/500] time 1.373 (1.366) data 0.000 (0.004) loss 0.8359 (1.2302) acc 81.2500 (69.3023) lr 1.9921e-03 eta 8:50:03 +epoch [4/50] batch [220/500] time 1.349 (1.365) data 0.000 (0.004) loss 1.2373 (1.2297) acc 71.8750 (69.3182) lr 1.9921e-03 eta 8:49:47 +epoch [4/50] batch [225/500] time 1.359 (1.365) data 0.000 (0.004) loss 1.7188 (1.2347) acc 53.1250 (69.2361) lr 1.9921e-03 eta 8:49:35 +epoch [4/50] batch [230/500] time 1.342 (1.365) data 0.000 (0.004) loss 0.8511 (1.2375) acc 87.5000 (69.2120) lr 1.9921e-03 eta 8:49:17 +epoch [4/50] batch [235/500] time 1.365 (1.365) data 0.000 (0.004) loss 1.2998 (1.2400) acc 65.6250 (69.1622) lr 1.9921e-03 eta 8:49:21 +epoch [4/50] batch [240/500] time 1.377 (1.365) data 0.000 (0.004) loss 0.8755 (1.2330) acc 75.0000 (69.2969) lr 1.9921e-03 eta 8:49:09 +epoch [4/50] batch [245/500] time 1.343 (1.365) data 0.000 (0.004) loss 0.8398 (1.2271) acc 78.1250 (69.4133) lr 1.9921e-03 eta 8:48:58 +epoch [4/50] batch [250/500] time 1.355 (1.365) data 0.000 (0.004) loss 1.9268 (1.2323) acc 59.3750 (69.3875) lr 1.9921e-03 eta 8:48:48 +epoch [4/50] batch [255/500] time 1.355 (1.365) data 0.000 (0.003) loss 0.8560 (1.2298) acc 71.8750 (69.3995) lr 1.9921e-03 eta 8:48:41 +epoch [4/50] batch [260/500] time 1.359 (1.365) data 0.000 (0.003) loss 1.2510 (1.2288) acc 62.5000 (69.3870) lr 1.9921e-03 eta 8:48:36 +epoch [4/50] batch [265/500] time 1.364 (1.365) data 0.000 (0.003) loss 1.6992 (1.2302) acc 62.5000 (69.3986) lr 1.9921e-03 eta 8:48:27 +epoch [4/50] batch [270/500] time 1.370 (1.365) data 0.000 (0.003) loss 1.7119 (1.2312) acc 68.7500 (69.3750) lr 1.9921e-03 eta 8:48:21 +epoch [4/50] batch [275/500] time 1.363 (1.365) data 0.001 (0.003) loss 0.8477 (1.2263) acc 75.0000 (69.3977) lr 1.9921e-03 eta 8:48:13 +epoch [4/50] batch [280/500] time 1.372 (1.365) data 0.001 (0.003) loss 1.3369 (1.2256) acc 62.5000 (69.3862) lr 1.9921e-03 eta 8:48:03 +epoch [4/50] batch [285/500] time 1.403 (1.365) data 0.000 (0.003) loss 1.0029 (1.2254) acc 68.7500 (69.3969) lr 1.9921e-03 eta 8:48:02 +epoch [4/50] batch [290/500] time 1.359 (1.365) data 0.000 (0.003) loss 1.2598 (1.2259) acc 65.6250 (69.3750) lr 1.9921e-03 eta 8:47:56 +epoch [4/50] batch [295/500] time 1.361 (1.365) data 0.000 (0.003) loss 1.6631 (1.2273) acc 71.8750 (69.4492) lr 1.9921e-03 eta 8:47:52 +epoch [4/50] batch [300/500] time 1.360 (1.365) data 0.000 (0.003) loss 1.2979 (1.2279) acc 78.1250 (69.5208) lr 1.9921e-03 eta 8:47:46 +epoch [4/50] batch [305/500] time 1.357 (1.365) data 0.000 (0.003) loss 1.6143 (1.2292) acc 43.7500 (69.4057) lr 1.9921e-03 eta 8:47:36 +epoch [4/50] batch [310/500] time 1.364 (1.365) data 0.000 (0.003) loss 1.2920 (1.2305) acc 68.7500 (69.3548) lr 1.9921e-03 eta 8:47:27 +epoch [4/50] batch [315/500] time 1.348 (1.364) data 0.000 (0.003) loss 1.4590 (1.2287) acc 71.8750 (69.4345) lr 1.9921e-03 eta 8:47:14 +epoch [4/50] batch [320/500] time 1.341 (1.364) data 0.000 (0.003) loss 1.2695 (1.2280) acc 62.5000 (69.4043) lr 1.9921e-03 eta 8:47:05 +epoch [4/50] batch [325/500] time 1.363 (1.364) data 0.000 (0.003) loss 0.9360 (1.2277) acc 62.5000 (69.3846) lr 1.9921e-03 eta 8:46:55 +epoch [4/50] batch [330/500] time 1.489 (1.364) data 0.000 (0.003) loss 0.9106 (1.2291) acc 75.0000 (69.3182) lr 1.9921e-03 eta 8:46:54 +epoch [4/50] batch [335/500] time 1.350 (1.364) data 0.000 (0.003) loss 1.2891 (1.2287) acc 65.6250 (69.3004) lr 1.9921e-03 eta 8:46:46 +epoch [4/50] batch [340/500] time 1.361 (1.364) data 0.000 (0.003) loss 1.3848 (1.2328) acc 65.6250 (69.2923) lr 1.9921e-03 eta 8:46:33 +epoch [4/50] batch [345/500] time 1.344 (1.364) data 0.000 (0.003) loss 1.2832 (1.2349) acc 65.6250 (69.2844) lr 1.9921e-03 eta 8:46:21 +epoch [4/50] batch [350/500] time 1.377 (1.364) data 0.000 (0.003) loss 1.0537 (1.2369) acc 71.8750 (69.2321) lr 1.9921e-03 eta 8:46:12 +epoch [4/50] batch [355/500] time 1.359 (1.364) data 0.000 (0.003) loss 1.3926 (1.2381) acc 71.8750 (69.1989) lr 1.9921e-03 eta 8:46:04 +epoch [4/50] batch [360/500] time 1.368 (1.364) data 0.000 (0.003) loss 1.9395 (1.2427) acc 62.5000 (69.1840) lr 1.9921e-03 eta 8:45:59 +epoch [4/50] batch [365/500] time 1.368 (1.364) data 0.000 (0.003) loss 1.2812 (1.2418) acc 75.0000 (69.1781) lr 1.9921e-03 eta 8:45:53 +epoch [4/50] batch [370/500] time 1.362 (1.364) data 0.000 (0.003) loss 0.9482 (1.2413) acc 75.0000 (69.1892) lr 1.9921e-03 eta 8:45:44 +epoch [4/50] batch [375/500] time 1.366 (1.364) data 0.000 (0.002) loss 1.6338 (1.2435) acc 59.3750 (69.1417) lr 1.9921e-03 eta 8:45:43 +epoch [4/50] batch [380/500] time 1.364 (1.364) data 0.000 (0.003) loss 1.1475 (1.2449) acc 71.8750 (69.0954) lr 1.9921e-03 eta 8:45:39 +epoch [4/50] batch [385/500] time 1.348 (1.364) data 0.000 (0.002) loss 1.6309 (1.2469) acc 65.6250 (69.0990) lr 1.9921e-03 eta 8:45:32 +epoch [4/50] batch [390/500] time 1.358 (1.364) data 0.000 (0.002) loss 1.4277 (1.2467) acc 65.6250 (69.0785) lr 1.9921e-03 eta 8:45:23 +epoch [4/50] batch [395/500] time 1.363 (1.364) data 0.000 (0.002) loss 1.3428 (1.2479) acc 75.0000 (69.1218) lr 1.9921e-03 eta 8:45:14 +epoch [4/50] batch [400/500] time 1.337 (1.364) data 0.000 (0.002) loss 0.7896 (1.2475) acc 75.0000 (69.1250) lr 1.9921e-03 eta 8:45:04 +epoch [4/50] batch [405/500] time 1.346 (1.364) data 0.000 (0.002) loss 1.3281 (1.2505) acc 68.7500 (69.0741) lr 1.9921e-03 eta 8:44:58 +epoch [4/50] batch [410/500] time 1.340 (1.364) data 0.000 (0.002) loss 1.0801 (1.2485) acc 78.1250 (69.1082) lr 1.9921e-03 eta 8:44:52 +epoch [4/50] batch [415/500] time 1.367 (1.364) data 0.000 (0.002) loss 0.7183 (1.2457) acc 78.1250 (69.1792) lr 1.9921e-03 eta 8:44:46 +epoch [4/50] batch [420/500] time 1.350 (1.364) data 0.000 (0.002) loss 1.5186 (1.2457) acc 65.6250 (69.1815) lr 1.9921e-03 eta 8:44:36 +epoch [4/50] batch [425/500] time 1.383 (1.364) data 0.000 (0.002) loss 1.5088 (1.2444) acc 68.7500 (69.2059) lr 1.9921e-03 eta 8:44:28 +epoch [4/50] batch [430/500] time 1.365 (1.364) data 0.000 (0.002) loss 1.3125 (1.2472) acc 71.8750 (69.1860) lr 1.9921e-03 eta 8:44:20 +epoch [4/50] batch [435/500] time 1.351 (1.364) data 0.000 (0.002) loss 2.1094 (1.2500) acc 34.3750 (69.0948) lr 1.9921e-03 eta 8:44:12 +epoch [4/50] batch [440/500] time 1.377 (1.364) data 0.000 (0.002) loss 2.1562 (1.2531) acc 46.8750 (69.0270) lr 1.9921e-03 eta 8:44:04 +epoch [4/50] batch [445/500] time 1.355 (1.363) data 0.000 (0.002) loss 0.9126 (1.2517) acc 75.0000 (69.0730) lr 1.9921e-03 eta 8:43:55 +epoch [4/50] batch [450/500] time 1.365 (1.363) data 0.000 (0.002) loss 1.2266 (1.2552) acc 75.0000 (68.9931) lr 1.9921e-03 eta 8:43:46 +epoch [4/50] batch [455/500] time 1.349 (1.363) data 0.000 (0.002) loss 0.8188 (1.2536) acc 75.0000 (69.0385) lr 1.9921e-03 eta 8:43:38 +epoch [4/50] batch [460/500] time 1.352 (1.363) data 0.000 (0.002) loss 1.4980 (1.2522) acc 68.7500 (69.1033) lr 1.9921e-03 eta 8:43:31 +epoch [4/50] batch [465/500] time 1.355 (1.363) data 0.000 (0.002) loss 1.4404 (1.2524) acc 68.7500 (69.1196) lr 1.9921e-03 eta 8:43:23 +epoch [4/50] batch [470/500] time 1.353 (1.363) data 0.000 (0.002) loss 1.0254 (1.2499) acc 59.3750 (69.0691) lr 1.9921e-03 eta 8:43:14 +epoch [4/50] batch [475/500] time 1.341 (1.363) data 0.000 (0.002) loss 1.1504 (1.2500) acc 59.3750 (69.0395) lr 1.9921e-03 eta 8:43:13 +epoch [4/50] batch [480/500] time 1.346 (1.363) data 0.000 (0.002) loss 0.9863 (1.2499) acc 68.7500 (69.0104) lr 1.9921e-03 eta 8:43:04 +epoch [4/50] batch [485/500] time 1.362 (1.363) data 0.001 (0.002) loss 1.2041 (1.2502) acc 68.7500 (68.9948) lr 1.9921e-03 eta 8:42:56 +epoch [4/50] batch [490/500] time 1.370 (1.363) data 0.000 (0.002) loss 1.5391 (1.2490) acc 56.2500 (68.9987) lr 1.9921e-03 eta 8:42:47 +epoch [4/50] batch [495/500] time 1.386 (1.363) data 0.000 (0.002) loss 1.6240 (1.2481) acc 59.3750 (69.0341) lr 1.9921e-03 eta 8:42:39 +epoch [4/50] batch [500/500] time 1.358 (1.363) data 0.000 (0.002) loss 1.1211 (1.2465) acc 71.8750 (69.0563) lr 1.9823e-03 eta 8:42:30 +epoch [5/50] batch [5/500] time 1.343 (1.504) data 0.000 (0.151) loss 1.1758 (1.2017) acc 71.8750 (70.6250) lr 1.9823e-03 eta 9:36:13 +epoch [5/50] batch [10/500] time 1.343 (1.429) data 0.000 (0.075) loss 1.3066 (1.1625) acc 78.1250 (70.9375) lr 1.9823e-03 eta 9:07:22 +epoch [5/50] batch [15/500] time 1.330 (1.402) data 0.000 (0.050) loss 1.3838 (1.2286) acc 75.0000 (70.2083) lr 1.9823e-03 eta 8:56:59 +epoch [5/50] batch [20/500] time 1.361 (1.392) data 0.000 (0.038) loss 1.6680 (1.2592) acc 53.1250 (68.4375) lr 1.9823e-03 eta 8:53:04 +epoch [5/50] batch [25/500] time 1.351 (1.384) data 0.000 (0.030) loss 1.5176 (1.2458) acc 65.6250 (69.3750) lr 1.9823e-03 eta 8:49:57 +epoch [5/50] batch [30/500] time 1.360 (1.380) data 0.000 (0.025) loss 1.2021 (1.2624) acc 62.5000 (68.1250) lr 1.9823e-03 eta 8:48:16 +epoch [5/50] batch [35/500] time 1.356 (1.378) data 0.000 (0.022) loss 1.1797 (1.2388) acc 68.7500 (68.7500) lr 1.9823e-03 eta 8:47:26 +epoch [5/50] batch [40/500] time 1.367 (1.375) data 0.000 (0.019) loss 1.8486 (1.2471) acc 56.2500 (68.6719) lr 1.9823e-03 eta 8:46:12 +epoch [5/50] batch [45/500] time 1.344 (1.372) data 0.000 (0.017) loss 1.0742 (1.2228) acc 75.0000 (69.3056) lr 1.9823e-03 eta 8:44:56 +epoch [5/50] batch [50/500] time 1.340 (1.371) data 0.000 (0.015) loss 1.3242 (1.2364) acc 59.3750 (69.0000) lr 1.9823e-03 eta 8:44:17 +epoch [5/50] batch [55/500] time 1.368 (1.370) data 0.001 (0.014) loss 1.7305 (1.2470) acc 56.2500 (68.8636) lr 1.9823e-03 eta 8:43:52 +epoch [5/50] batch [60/500] time 1.376 (1.370) data 0.000 (0.013) loss 1.7383 (1.2393) acc 50.0000 (68.8021) lr 1.9823e-03 eta 8:43:51 +epoch [5/50] batch [65/500] time 1.479 (1.371) data 0.000 (0.012) loss 1.3818 (1.2310) acc 65.6250 (69.1346) lr 1.9823e-03 eta 8:44:14 +epoch [5/50] batch [70/500] time 1.358 (1.370) data 0.001 (0.011) loss 1.0957 (1.2317) acc 71.8750 (69.0179) lr 1.9823e-03 eta 8:43:42 +epoch [5/50] batch [75/500] time 1.356 (1.369) data 0.000 (0.010) loss 1.4854 (1.2323) acc 62.5000 (68.9583) lr 1.9823e-03 eta 8:43:07 +epoch [5/50] batch [80/500] time 1.361 (1.368) data 0.000 (0.010) loss 1.2754 (1.2299) acc 68.7500 (69.1016) lr 1.9823e-03 eta 8:42:39 +epoch [5/50] batch [85/500] time 1.370 (1.368) data 0.000 (0.009) loss 1.0820 (1.2252) acc 65.6250 (69.0441) lr 1.9823e-03 eta 8:42:31 +epoch [5/50] batch [90/500] time 1.358 (1.368) data 0.000 (0.009) loss 1.2949 (1.2179) acc 56.2500 (69.0278) lr 1.9823e-03 eta 8:42:24 +epoch [5/50] batch [95/500] time 1.375 (1.368) data 0.000 (0.008) loss 0.6816 (1.2184) acc 81.2500 (69.1118) lr 1.9823e-03 eta 8:42:09 +epoch [5/50] batch [100/500] time 1.353 (1.367) data 0.000 (0.008) loss 1.5869 (1.2211) acc 59.3750 (69.0625) lr 1.9823e-03 eta 8:41:42 +epoch [5/50] batch [105/500] time 1.349 (1.367) data 0.001 (0.008) loss 0.8608 (1.2179) acc 75.0000 (69.1071) lr 1.9823e-03 eta 8:41:30 +epoch [5/50] batch [110/500] time 1.371 (1.366) data 0.001 (0.007) loss 0.9932 (1.2121) acc 71.8750 (69.1761) lr 1.9823e-03 eta 8:41:12 +epoch [5/50] batch [115/500] time 1.373 (1.366) data 0.000 (0.007) loss 1.5059 (1.2111) acc 59.3750 (69.1576) lr 1.9823e-03 eta 8:41:03 +epoch [5/50] batch [120/500] time 1.371 (1.366) data 0.000 (0.007) loss 1.2236 (1.2135) acc 71.8750 (69.2708) lr 1.9823e-03 eta 8:41:03 +epoch [5/50] batch [125/500] time 1.345 (1.366) data 0.000 (0.006) loss 1.2939 (1.2113) acc 71.8750 (69.3250) lr 1.9823e-03 eta 8:40:45 +epoch [5/50] batch [130/500] time 1.367 (1.366) data 0.000 (0.006) loss 1.3584 (1.2198) acc 65.6250 (69.1587) lr 1.9823e-03 eta 8:40:36 +epoch [5/50] batch [135/500] time 1.362 (1.366) data 0.000 (0.006) loss 1.3047 (1.2234) acc 71.8750 (69.2824) lr 1.9823e-03 eta 8:40:25 +epoch [5/50] batch [140/500] time 1.370 (1.365) data 0.000 (0.006) loss 1.4883 (1.2304) acc 62.5000 (69.0848) lr 1.9823e-03 eta 8:40:10 +epoch [5/50] batch [145/500] time 1.348 (1.365) data 0.000 (0.006) loss 0.8755 (1.2301) acc 78.1250 (69.0948) lr 1.9823e-03 eta 8:39:53 +epoch [5/50] batch [150/500] time 1.386 (1.365) data 0.000 (0.005) loss 0.9922 (1.2379) acc 71.8750 (69.0208) lr 1.9823e-03 eta 8:39:45 +epoch [5/50] batch [155/500] time 1.366 (1.364) data 0.000 (0.005) loss 1.9062 (1.2501) acc 59.3750 (68.8105) lr 1.9823e-03 eta 8:39:30 +epoch [5/50] batch [160/500] time 1.357 (1.364) data 0.000 (0.005) loss 1.2119 (1.2518) acc 71.8750 (68.8086) lr 1.9823e-03 eta 8:39:22 +epoch [5/50] batch [165/500] time 1.364 (1.365) data 0.000 (0.005) loss 1.0918 (1.2496) acc 71.8750 (68.8636) lr 1.9823e-03 eta 8:39:26 +epoch [5/50] batch [170/500] time 1.340 (1.365) data 0.000 (0.005) loss 1.2354 (1.2419) acc 56.2500 (68.8971) lr 1.9823e-03 eta 8:39:14 +epoch [5/50] batch [175/500] time 1.365 (1.364) data 0.001 (0.005) loss 0.9658 (1.2350) acc 75.0000 (68.9821) lr 1.9823e-03 eta 8:39:01 +epoch [5/50] batch [180/500] time 1.391 (1.365) data 0.000 (0.005) loss 1.4492 (1.2384) acc 68.7500 (68.8889) lr 1.9823e-03 eta 8:39:04 +epoch [5/50] batch [185/500] time 1.365 (1.365) data 0.000 (0.004) loss 1.5586 (1.2431) acc 65.6250 (68.6993) lr 1.9823e-03 eta 8:39:02 +epoch [5/50] batch [190/500] time 1.358 (1.365) data 0.000 (0.004) loss 1.0996 (1.2458) acc 68.7500 (68.7500) lr 1.9823e-03 eta 8:38:47 +epoch [5/50] batch [195/500] time 1.365 (1.364) data 0.001 (0.004) loss 1.1045 (1.2459) acc 68.7500 (68.7821) lr 1.9823e-03 eta 8:38:36 +epoch [5/50] batch [200/500] time 1.360 (1.364) data 0.000 (0.004) loss 0.9893 (1.2447) acc 75.0000 (68.8281) lr 1.9823e-03 eta 8:38:30 +epoch [5/50] batch [205/500] time 1.358 (1.364) data 0.000 (0.004) loss 0.8936 (1.2411) acc 78.1250 (68.8720) lr 1.9823e-03 eta 8:38:18 +epoch [5/50] batch [210/500] time 1.353 (1.365) data 0.000 (0.004) loss 1.1514 (1.2415) acc 62.5000 (68.7798) lr 1.9823e-03 eta 8:38:23 +epoch [5/50] batch [215/500] time 1.353 (1.365) data 0.000 (0.004) loss 1.6738 (1.2529) acc 59.3750 (68.6047) lr 1.9823e-03 eta 8:38:14 +epoch [5/50] batch [220/500] time 1.362 (1.365) data 0.000 (0.004) loss 1.2168 (1.2562) acc 68.7500 (68.4943) lr 1.9823e-03 eta 8:38:11 +epoch [5/50] batch [225/500] time 1.360 (1.365) data 0.000 (0.004) loss 1.6396 (1.2573) acc 56.2500 (68.5556) lr 1.9823e-03 eta 8:38:00 +epoch [5/50] batch [230/500] time 1.376 (1.365) data 0.000 (0.004) loss 0.8789 (1.2530) acc 75.0000 (68.6549) lr 1.9823e-03 eta 8:37:54 +epoch [5/50] batch [235/500] time 1.365 (1.365) data 0.000 (0.004) loss 0.8433 (1.2499) acc 81.2500 (68.7101) lr 1.9823e-03 eta 8:37:49 +epoch [5/50] batch [240/500] time 1.362 (1.365) data 0.000 (0.004) loss 1.3203 (1.2501) acc 68.7500 (68.6589) lr 1.9823e-03 eta 8:37:37 +epoch [5/50] batch [245/500] time 1.344 (1.364) data 0.000 (0.003) loss 0.9263 (1.2472) acc 71.8750 (68.7372) lr 1.9823e-03 eta 8:37:28 +epoch [5/50] batch [250/500] time 1.364 (1.364) data 0.000 (0.003) loss 1.5615 (1.2473) acc 68.7500 (68.7625) lr 1.9823e-03 eta 8:37:17 +epoch [5/50] batch [255/500] time 1.355 (1.364) data 0.000 (0.003) loss 1.0352 (1.2444) acc 71.8750 (68.8113) lr 1.9823e-03 eta 8:37:06 +epoch [5/50] batch [260/500] time 1.389 (1.364) data 0.000 (0.003) loss 1.5488 (1.2521) acc 65.6250 (68.7500) lr 1.9823e-03 eta 8:37:00 +epoch [5/50] batch [265/500] time 1.345 (1.364) data 0.001 (0.003) loss 1.5771 (1.2495) acc 65.6250 (68.8090) lr 1.9823e-03 eta 8:36:51 +epoch [5/50] batch [270/500] time 1.371 (1.364) data 0.000 (0.003) loss 1.4385 (1.2481) acc 68.7500 (68.8657) lr 1.9823e-03 eta 8:36:41 +epoch [5/50] batch [275/500] time 1.343 (1.364) data 0.000 (0.003) loss 1.3916 (1.2472) acc 53.1250 (68.8068) lr 1.9823e-03 eta 8:36:32 +epoch [5/50] batch [280/500] time 1.345 (1.364) data 0.000 (0.003) loss 0.9126 (1.2487) acc 75.0000 (68.8170) lr 1.9823e-03 eta 8:36:23 +epoch [5/50] batch [285/500] time 1.365 (1.364) data 0.000 (0.003) loss 1.2510 (1.2474) acc 71.8750 (68.8158) lr 1.9823e-03 eta 8:36:14 +epoch [5/50] batch [290/500] time 1.362 (1.364) data 0.000 (0.003) loss 1.0615 (1.2449) acc 71.8750 (68.9440) lr 1.9823e-03 eta 8:36:05 +epoch [5/50] batch [295/500] time 1.355 (1.363) data 0.001 (0.003) loss 1.8740 (1.2465) acc 59.3750 (68.8665) lr 1.9823e-03 eta 8:35:57 +epoch [5/50] batch [300/500] time 1.355 (1.363) data 0.000 (0.003) loss 0.8438 (1.2469) acc 71.8750 (68.8125) lr 1.9823e-03 eta 8:35:45 +epoch [5/50] batch [305/500] time 1.371 (1.363) data 0.000 (0.003) loss 1.4082 (1.2418) acc 68.7500 (68.9549) lr 1.9823e-03 eta 8:35:36 +epoch [5/50] batch [310/500] time 1.360 (1.364) data 0.000 (0.003) loss 0.7939 (1.2394) acc 75.0000 (68.9617) lr 1.9823e-03 eta 8:35:39 +epoch [5/50] batch [315/500] time 1.380 (1.363) data 0.000 (0.003) loss 1.3271 (1.2405) acc 62.5000 (68.9683) lr 1.9823e-03 eta 8:35:28 +epoch [5/50] batch [320/500] time 1.348 (1.363) data 0.000 (0.003) loss 1.5781 (1.2443) acc 68.7500 (68.9355) lr 1.9823e-03 eta 8:35:20 +epoch [5/50] batch [325/500] time 1.369 (1.363) data 0.001 (0.003) loss 0.5532 (1.2403) acc 84.3750 (69.0096) lr 1.9823e-03 eta 8:35:12 +epoch [5/50] batch [330/500] time 1.360 (1.363) data 0.001 (0.003) loss 1.3770 (1.2373) acc 65.6250 (69.0814) lr 1.9823e-03 eta 8:35:03 +epoch [5/50] batch [335/500] time 1.347 (1.363) data 0.000 (0.003) loss 0.9468 (1.2318) acc 78.1250 (69.2164) lr 1.9823e-03 eta 8:34:56 +epoch [5/50] batch [340/500] time 1.373 (1.363) data 0.000 (0.003) loss 2.2344 (1.2342) acc 56.2500 (69.2096) lr 1.9823e-03 eta 8:34:51 +epoch [5/50] batch [345/500] time 1.353 (1.363) data 0.000 (0.003) loss 1.2627 (1.2337) acc 71.8750 (69.2482) lr 1.9823e-03 eta 8:34:44 +epoch [5/50] batch [350/500] time 1.355 (1.363) data 0.000 (0.003) loss 2.0098 (1.2337) acc 53.1250 (69.2411) lr 1.9823e-03 eta 8:34:37 +epoch [5/50] batch [355/500] time 1.344 (1.364) data 0.000 (0.002) loss 1.1123 (1.2345) acc 75.0000 (69.2694) lr 1.9823e-03 eta 8:34:37 +epoch [5/50] batch [360/500] time 1.361 (1.363) data 0.000 (0.002) loss 1.3477 (1.2339) acc 68.7500 (69.2882) lr 1.9823e-03 eta 8:34:27 +epoch [5/50] batch [365/500] time 1.354 (1.363) data 0.000 (0.002) loss 0.3330 (1.2344) acc 93.7500 (69.3408) lr 1.9823e-03 eta 8:34:15 +epoch [5/50] batch [370/500] time 1.350 (1.363) data 0.000 (0.002) loss 1.1562 (1.2332) acc 75.0000 (69.3666) lr 1.9823e-03 eta 8:34:06 +epoch [5/50] batch [375/500] time 1.365 (1.363) data 0.000 (0.002) loss 1.3506 (1.2328) acc 68.7500 (69.4000) lr 1.9823e-03 eta 8:33:55 +epoch [5/50] batch [380/500] time 1.366 (1.363) data 0.000 (0.002) loss 1.3516 (1.2316) acc 65.6250 (69.4079) lr 1.9823e-03 eta 8:33:46 +epoch [5/50] batch [385/500] time 1.372 (1.363) data 0.000 (0.002) loss 1.4639 (1.2308) acc 71.8750 (69.4968) lr 1.9823e-03 eta 8:33:40 +epoch [5/50] batch [390/500] time 1.356 (1.363) data 0.000 (0.002) loss 1.3340 (1.2325) acc 71.8750 (69.4792) lr 1.9823e-03 eta 8:33:31 +epoch [5/50] batch [395/500] time 1.349 (1.363) data 0.000 (0.002) loss 0.8696 (1.2287) acc 68.7500 (69.5411) lr 1.9823e-03 eta 8:33:19 +epoch [5/50] batch [400/500] time 1.368 (1.362) data 0.000 (0.002) loss 1.1123 (1.2257) acc 65.6250 (69.6016) lr 1.9823e-03 eta 8:33:11 +epoch [5/50] batch [405/500] time 1.357 (1.362) data 0.000 (0.002) loss 0.6694 (1.2214) acc 81.2500 (69.6682) lr 1.9823e-03 eta 8:33:01 +epoch [5/50] batch [410/500] time 1.359 (1.362) data 0.000 (0.002) loss 0.8223 (1.2200) acc 75.0000 (69.7104) lr 1.9823e-03 eta 8:32:49 +epoch [5/50] batch [415/500] time 1.364 (1.362) data 0.000 (0.002) loss 1.4316 (1.2199) acc 56.2500 (69.6687) lr 1.9823e-03 eta 8:32:44 +epoch [5/50] batch [420/500] time 1.353 (1.362) data 0.000 (0.002) loss 1.4268 (1.2179) acc 65.6250 (69.6875) lr 1.9823e-03 eta 8:32:38 +epoch [5/50] batch [425/500] time 1.359 (1.362) data 0.000 (0.002) loss 1.2539 (1.2163) acc 68.7500 (69.7500) lr 1.9823e-03 eta 8:32:31 +epoch [5/50] batch [430/500] time 1.353 (1.362) data 0.000 (0.002) loss 1.4199 (1.2149) acc 62.5000 (69.7456) lr 1.9823e-03 eta 8:32:23 +epoch [5/50] batch [435/500] time 1.365 (1.362) data 0.000 (0.002) loss 0.9209 (1.2157) acc 75.0000 (69.7342) lr 1.9823e-03 eta 8:32:15 +epoch [5/50] batch [440/500] time 1.345 (1.362) data 0.000 (0.002) loss 1.4043 (1.2161) acc 68.7500 (69.6946) lr 1.9823e-03 eta 8:32:06 +epoch [5/50] batch [445/500] time 1.356 (1.362) data 0.000 (0.002) loss 1.2705 (1.2159) acc 65.6250 (69.6489) lr 1.9823e-03 eta 8:31:59 +epoch [5/50] batch [450/500] time 1.465 (1.362) data 0.000 (0.002) loss 1.9268 (1.2199) acc 53.1250 (69.5833) lr 1.9823e-03 eta 8:31:58 +epoch [5/50] batch [455/500] time 1.373 (1.362) data 0.000 (0.002) loss 1.4512 (1.2206) acc 62.5000 (69.6154) lr 1.9823e-03 eta 8:31:53 +epoch [5/50] batch [460/500] time 1.366 (1.362) data 0.000 (0.002) loss 1.8711 (1.2228) acc 50.0000 (69.5245) lr 1.9823e-03 eta 8:31:48 +epoch [5/50] batch [465/500] time 1.356 (1.362) data 0.000 (0.002) loss 1.6543 (1.2219) acc 59.3750 (69.5094) lr 1.9823e-03 eta 8:31:42 +epoch [5/50] batch [470/500] time 1.368 (1.362) data 0.000 (0.002) loss 1.2559 (1.2211) acc 71.8750 (69.5213) lr 1.9823e-03 eta 8:31:35 +epoch [5/50] batch [475/500] time 1.361 (1.362) data 0.000 (0.002) loss 1.0088 (1.2201) acc 78.1250 (69.5395) lr 1.9823e-03 eta 8:31:28 +epoch [5/50] batch [480/500] time 1.350 (1.362) data 0.000 (0.002) loss 1.1924 (1.2201) acc 62.5000 (69.5247) lr 1.9823e-03 eta 8:31:19 +epoch [5/50] batch [485/500] time 1.374 (1.362) data 0.001 (0.002) loss 1.9199 (1.2226) acc 40.6250 (69.4523) lr 1.9823e-03 eta 8:31:14 +epoch [5/50] batch [490/500] time 1.338 (1.362) data 0.000 (0.002) loss 1.4531 (1.2226) acc 56.2500 (69.4388) lr 1.9823e-03 eta 8:31:06 +epoch [5/50] batch [495/500] time 1.370 (1.363) data 0.000 (0.002) loss 1.7812 (1.2246) acc 56.2500 (69.4003) lr 1.9823e-03 eta 8:31:04 +epoch [5/50] batch [500/500] time 1.369 (1.363) data 0.000 (0.002) loss 1.1533 (1.2253) acc 68.7500 (69.3750) lr 1.9686e-03 eta 8:30:59 +epoch [6/50] batch [5/500] time 1.367 (1.580) data 0.000 (0.209) loss 1.6748 (1.2891) acc 56.2500 (66.2500) lr 1.9686e-03 eta 9:52:19 +epoch [6/50] batch [10/500] time 1.363 (1.472) data 0.000 (0.104) loss 0.9614 (1.1633) acc 78.1250 (69.0625) lr 1.9686e-03 eta 9:11:45 +epoch [6/50] batch [15/500] time 1.360 (1.438) data 0.000 (0.070) loss 1.5586 (1.2190) acc 68.7500 (68.9583) lr 1.9686e-03 eta 8:58:44 +epoch [6/50] batch [20/500] time 1.342 (1.416) data 0.000 (0.052) loss 1.1113 (1.2056) acc 71.8750 (69.0625) lr 1.9686e-03 eta 8:50:41 +epoch [6/50] batch [25/500] time 1.354 (1.404) data 0.000 (0.042) loss 1.0176 (1.1884) acc 75.0000 (68.7500) lr 1.9686e-03 eta 8:45:49 +epoch [6/50] batch [30/500] time 1.329 (1.395) data 0.000 (0.035) loss 1.5518 (1.1955) acc 62.5000 (69.2708) lr 1.9686e-03 eta 8:42:24 +epoch [6/50] batch [35/500] time 1.360 (1.389) data 0.000 (0.030) loss 1.5195 (1.2249) acc 62.5000 (68.9286) lr 1.9686e-03 eta 8:40:08 +epoch [6/50] batch [40/500] time 1.361 (1.389) data 0.000 (0.026) loss 0.9932 (1.2107) acc 65.6250 (68.8281) lr 1.9686e-03 eta 8:40:00 +epoch [6/50] batch [45/500] time 1.343 (1.385) data 0.000 (0.023) loss 0.6792 (1.1794) acc 81.2500 (69.6528) lr 1.9686e-03 eta 8:38:27 +epoch [6/50] batch [50/500] time 1.353 (1.382) data 0.000 (0.021) loss 1.3643 (1.2006) acc 71.8750 (69.2500) lr 1.9686e-03 eta 8:37:12 +epoch [6/50] batch [55/500] time 1.349 (1.380) data 0.000 (0.019) loss 1.4062 (1.1916) acc 62.5000 (69.4318) lr 1.9686e-03 eta 8:36:08 +epoch [6/50] batch [60/500] time 1.363 (1.378) data 0.001 (0.018) loss 1.4180 (1.1869) acc 56.2500 (69.3750) lr 1.9686e-03 eta 8:35:25 +epoch [6/50] batch [65/500] time 1.358 (1.377) data 0.001 (0.016) loss 0.7554 (1.1855) acc 71.8750 (69.4231) lr 1.9686e-03 eta 8:34:56 +epoch [6/50] batch [70/500] time 1.346 (1.375) data 0.000 (0.015) loss 0.8774 (1.1780) acc 75.0000 (69.5089) lr 1.9686e-03 eta 8:34:05 +epoch [6/50] batch [75/500] time 1.355 (1.374) data 0.000 (0.014) loss 1.1836 (1.1813) acc 68.7500 (69.3750) lr 1.9686e-03 eta 8:33:21 +epoch [6/50] batch [80/500] time 1.352 (1.373) data 0.000 (0.013) loss 0.9023 (1.1859) acc 68.7500 (69.2578) lr 1.9686e-03 eta 8:32:53 +epoch [6/50] batch [85/500] time 1.366 (1.372) data 0.000 (0.013) loss 0.7515 (1.1723) acc 78.1250 (69.4485) lr 1.9686e-03 eta 8:32:22 +epoch [6/50] batch [90/500] time 1.358 (1.371) data 0.000 (0.012) loss 1.5830 (1.1802) acc 56.2500 (69.2708) lr 1.9686e-03 eta 8:32:02 +epoch [6/50] batch [95/500] time 1.375 (1.370) data 0.000 (0.011) loss 1.2432 (1.1853) acc 62.5000 (69.0789) lr 1.9686e-03 eta 8:31:44 +epoch [6/50] batch [100/500] time 1.369 (1.370) data 0.001 (0.011) loss 1.5479 (1.1886) acc 59.3750 (68.9375) lr 1.9686e-03 eta 8:31:24 +epoch [6/50] batch [105/500] time 1.353 (1.370) data 0.001 (0.010) loss 0.8599 (1.1825) acc 78.1250 (69.1071) lr 1.9686e-03 eta 8:31:10 +epoch [6/50] batch [110/500] time 1.392 (1.369) data 0.000 (0.010) loss 1.0635 (1.1869) acc 75.0000 (69.1477) lr 1.9686e-03 eta 8:30:52 +epoch [6/50] batch [115/500] time 1.360 (1.369) data 0.000 (0.009) loss 0.9316 (1.1789) acc 81.2500 (69.2935) lr 1.9686e-03 eta 8:30:42 +epoch [6/50] batch [120/500] time 1.383 (1.369) data 0.001 (0.009) loss 1.1641 (1.1928) acc 68.7500 (69.2708) lr 1.9686e-03 eta 8:30:44 +epoch [6/50] batch [125/500] time 1.364 (1.369) data 0.000 (0.009) loss 1.4648 (1.1969) acc 59.3750 (69.1500) lr 1.9686e-03 eta 8:30:29 +epoch [6/50] batch [130/500] time 1.350 (1.369) data 0.000 (0.008) loss 1.6494 (1.2025) acc 65.6250 (69.1106) lr 1.9686e-03 eta 8:30:14 +epoch [6/50] batch [135/500] time 1.353 (1.368) data 0.000 (0.008) loss 0.8862 (1.1978) acc 78.1250 (69.2824) lr 1.9686e-03 eta 8:30:02 +epoch [6/50] batch [140/500] time 1.377 (1.369) data 0.000 (0.008) loss 1.6260 (1.2052) acc 50.0000 (69.1964) lr 1.9686e-03 eta 8:30:17 +epoch [6/50] batch [145/500] time 1.372 (1.369) data 0.001 (0.008) loss 1.4023 (1.2041) acc 50.0000 (69.2026) lr 1.9686e-03 eta 8:30:10 +epoch [6/50] batch [150/500] time 1.373 (1.370) data 0.000 (0.007) loss 1.3662 (1.2044) acc 65.6250 (69.2292) lr 1.9686e-03 eta 8:30:09 +epoch [6/50] batch [155/500] time 1.365 (1.369) data 0.001 (0.007) loss 1.9707 (1.2113) acc 62.5000 (69.2137) lr 1.9686e-03 eta 8:29:52 +epoch [6/50] batch [160/500] time 1.357 (1.369) data 0.000 (0.007) loss 1.2764 (1.2086) acc 68.7500 (69.4141) lr 1.9686e-03 eta 8:29:43 +epoch [6/50] batch [165/500] time 1.348 (1.369) data 0.000 (0.007) loss 1.0264 (1.1993) acc 68.7500 (69.6212) lr 1.9686e-03 eta 8:29:28 +epoch [6/50] batch [170/500] time 1.359 (1.368) data 0.000 (0.007) loss 1.2979 (1.2024) acc 62.5000 (69.4485) lr 1.9686e-03 eta 8:29:13 +epoch [6/50] batch [175/500] time 1.376 (1.368) data 0.000 (0.006) loss 1.0449 (1.2042) acc 78.1250 (69.4643) lr 1.9686e-03 eta 8:29:01 +epoch [6/50] batch [180/500] time 1.372 (1.368) data 0.000 (0.006) loss 1.6191 (1.2076) acc 65.6250 (69.3750) lr 1.9686e-03 eta 8:28:55 +epoch [6/50] batch [185/500] time 1.360 (1.369) data 0.000 (0.006) loss 1.3936 (1.2143) acc 50.0000 (69.1047) lr 1.9686e-03 eta 8:29:05 +epoch [6/50] batch [190/500] time 1.374 (1.369) data 0.001 (0.006) loss 1.1963 (1.2129) acc 68.7500 (69.1118) lr 1.9686e-03 eta 8:28:57 +epoch [6/50] batch [195/500] time 1.365 (1.369) data 0.000 (0.006) loss 1.0156 (1.2167) acc 68.7500 (69.0865) lr 1.9686e-03 eta 8:28:45 +epoch [6/50] batch [200/500] time 1.376 (1.369) data 0.000 (0.006) loss 1.1904 (1.2144) acc 71.8750 (69.3125) lr 1.9686e-03 eta 8:28:42 +epoch [6/50] batch [205/500] time 1.353 (1.368) data 0.000 (0.005) loss 1.2363 (1.2195) acc 71.8750 (69.1006) lr 1.9686e-03 eta 8:28:29 +epoch [6/50] batch [210/500] time 1.370 (1.368) data 0.000 (0.005) loss 1.2021 (1.2192) acc 71.8750 (69.0476) lr 1.9686e-03 eta 8:28:18 +epoch [6/50] batch [215/500] time 1.371 (1.368) data 0.000 (0.005) loss 1.8037 (1.2227) acc 53.1250 (68.9244) lr 1.9686e-03 eta 8:28:08 +epoch [6/50] batch [220/500] time 1.364 (1.368) data 0.000 (0.005) loss 1.7256 (1.2283) acc 46.8750 (68.8068) lr 1.9686e-03 eta 8:27:55 +epoch [6/50] batch [225/500] time 1.352 (1.368) data 0.000 (0.005) loss 1.5449 (1.2293) acc 59.3750 (68.7778) lr 1.9686e-03 eta 8:27:45 +epoch [6/50] batch [230/500] time 1.364 (1.368) data 0.000 (0.005) loss 1.3545 (1.2308) acc 68.7500 (68.7636) lr 1.9686e-03 eta 8:27:36 +epoch [6/50] batch [235/500] time 1.374 (1.368) data 0.000 (0.005) loss 0.9468 (1.2238) acc 75.0000 (68.8564) lr 1.9686e-03 eta 8:27:27 +epoch [6/50] batch [240/500] time 1.379 (1.367) data 0.000 (0.005) loss 1.7139 (1.2201) acc 62.5000 (68.9453) lr 1.9686e-03 eta 8:27:20 +epoch [6/50] batch [245/500] time 1.360 (1.367) data 0.000 (0.005) loss 1.0557 (1.2215) acc 78.1250 (68.9031) lr 1.9686e-03 eta 8:27:08 +epoch [6/50] batch [250/500] time 1.353 (1.367) data 0.000 (0.005) loss 1.1113 (1.2171) acc 78.1250 (69.0000) lr 1.9686e-03 eta 8:26:55 +epoch [6/50] batch [255/500] time 1.360 (1.367) data 0.000 (0.004) loss 1.1016 (1.2195) acc 68.7500 (68.9828) lr 1.9686e-03 eta 8:26:45 +epoch [6/50] batch [260/500] time 1.366 (1.367) data 0.000 (0.004) loss 1.6914 (1.2184) acc 68.7500 (69.0264) lr 1.9686e-03 eta 8:26:38 +epoch [6/50] batch [265/500] time 1.362 (1.367) data 0.000 (0.004) loss 1.2500 (1.2198) acc 71.8750 (68.9976) lr 1.9686e-03 eta 8:26:33 +epoch [6/50] batch [270/500] time 1.357 (1.367) data 0.000 (0.004) loss 0.4658 (1.2206) acc 93.7500 (69.0741) lr 1.9686e-03 eta 8:26:26 +epoch [6/50] batch [275/500] time 1.362 (1.367) data 0.000 (0.004) loss 1.3613 (1.2180) acc 81.2500 (69.2159) lr 1.9686e-03 eta 8:26:20 +epoch [6/50] batch [280/500] time 1.494 (1.367) data 0.000 (0.004) loss 1.7188 (1.2156) acc 65.6250 (69.3638) lr 1.9686e-03 eta 8:26:20 +epoch [6/50] batch [285/500] time 1.368 (1.367) data 0.000 (0.004) loss 1.1025 (1.2115) acc 84.3750 (69.5504) lr 1.9686e-03 eta 8:26:13 +epoch [6/50] batch [290/500] time 1.369 (1.367) data 0.000 (0.004) loss 0.9238 (1.2125) acc 78.1250 (69.5043) lr 1.9686e-03 eta 8:26:03 +epoch [6/50] batch [295/500] time 1.352 (1.367) data 0.000 (0.004) loss 0.7832 (1.2125) acc 87.5000 (69.4597) lr 1.9686e-03 eta 8:25:54 +epoch [6/50] batch [300/500] time 1.366 (1.367) data 0.000 (0.004) loss 0.9902 (1.2104) acc 71.8750 (69.4688) lr 1.9686e-03 eta 8:25:45 +epoch [6/50] batch [305/500] time 1.374 (1.367) data 0.000 (0.004) loss 0.7695 (1.2104) acc 84.3750 (69.4877) lr 1.9686e-03 eta 8:25:38 +epoch [6/50] batch [310/500] time 1.341 (1.367) data 0.000 (0.004) loss 0.9688 (1.2095) acc 71.8750 (69.4456) lr 1.9686e-03 eta 8:25:26 +epoch [6/50] batch [315/500] time 1.372 (1.367) data 0.000 (0.004) loss 0.5962 (1.2087) acc 81.2500 (69.4444) lr 1.9686e-03 eta 8:25:18 +epoch [6/50] batch [320/500] time 1.362 (1.367) data 0.000 (0.004) loss 0.9614 (1.2100) acc 78.1250 (69.4141) lr 1.9686e-03 eta 8:25:09 +epoch [6/50] batch [325/500] time 1.374 (1.367) data 0.000 (0.004) loss 1.2051 (1.2127) acc 62.5000 (69.4231) lr 1.9686e-03 eta 8:25:09 +epoch [6/50] batch [330/500] time 1.357 (1.367) data 0.000 (0.004) loss 1.2178 (1.2104) acc 71.8750 (69.4602) lr 1.9686e-03 eta 8:25:00 +epoch [6/50] batch [335/500] time 1.368 (1.367) data 0.000 (0.003) loss 1.3438 (1.2109) acc 65.6250 (69.4869) lr 1.9686e-03 eta 8:24:53 +epoch [6/50] batch [340/500] time 1.366 (1.367) data 0.000 (0.003) loss 1.4297 (1.2124) acc 62.5000 (69.4393) lr 1.9686e-03 eta 8:24:44 +epoch [6/50] batch [345/500] time 1.376 (1.367) data 0.000 (0.003) loss 1.3418 (1.2123) acc 68.7500 (69.4384) lr 1.9686e-03 eta 8:24:37 +epoch [6/50] batch [350/500] time 1.363 (1.367) data 0.000 (0.003) loss 1.1670 (1.2107) acc 68.7500 (69.4911) lr 1.9686e-03 eta 8:24:29 +epoch [6/50] batch [355/500] time 1.348 (1.367) data 0.000 (0.003) loss 1.3291 (1.2117) acc 75.0000 (69.5070) lr 1.9686e-03 eta 8:24:21 +epoch [6/50] batch [360/500] time 1.332 (1.366) data 0.000 (0.003) loss 1.3760 (1.2171) acc 68.7500 (69.4878) lr 1.9686e-03 eta 8:24:11 +epoch [6/50] batch [365/500] time 1.371 (1.366) data 0.001 (0.003) loss 1.4131 (1.2193) acc 56.2500 (69.4092) lr 1.9686e-03 eta 8:24:04 +epoch [6/50] batch [370/500] time 1.344 (1.366) data 0.000 (0.003) loss 0.9092 (1.2183) acc 87.5000 (69.4172) lr 1.9686e-03 eta 8:23:57 +epoch [6/50] batch [375/500] time 1.346 (1.366) data 0.000 (0.003) loss 1.0146 (1.2152) acc 75.0000 (69.4500) lr 1.9686e-03 eta 8:23:44 +epoch [6/50] batch [380/500] time 1.374 (1.366) data 0.000 (0.003) loss 0.8569 (1.2156) acc 75.0000 (69.4572) lr 1.9686e-03 eta 8:23:33 +epoch [6/50] batch [385/500] time 1.372 (1.366) data 0.000 (0.003) loss 0.9888 (1.2144) acc 68.7500 (69.4643) lr 1.9686e-03 eta 8:23:24 +epoch [6/50] batch [390/500] time 1.370 (1.366) data 0.001 (0.003) loss 0.6074 (1.2092) acc 75.0000 (69.5353) lr 1.9686e-03 eta 8:23:17 +epoch [6/50] batch [395/500] time 1.368 (1.366) data 0.000 (0.003) loss 0.8560 (1.2079) acc 78.1250 (69.5491) lr 1.9686e-03 eta 8:23:07 +epoch [6/50] batch [400/500] time 1.332 (1.365) data 0.000 (0.003) loss 1.0664 (1.2068) acc 78.1250 (69.6016) lr 1.9686e-03 eta 8:22:57 +epoch [6/50] batch [405/500] time 1.356 (1.365) data 0.000 (0.003) loss 1.3584 (1.2048) acc 68.7500 (69.6682) lr 1.9686e-03 eta 8:22:47 +epoch [6/50] batch [410/500] time 1.355 (1.365) data 0.000 (0.003) loss 1.6328 (1.2064) acc 71.8750 (69.6723) lr 1.9686e-03 eta 8:22:37 +epoch [6/50] batch [415/500] time 1.344 (1.365) data 0.000 (0.003) loss 0.5283 (1.2022) acc 84.3750 (69.7892) lr 1.9686e-03 eta 8:22:26 +epoch [6/50] batch [420/500] time 1.347 (1.365) data 0.000 (0.003) loss 0.8872 (1.2034) acc 71.8750 (69.7321) lr 1.9686e-03 eta 8:22:19 +epoch [6/50] batch [425/500] time 1.336 (1.365) data 0.000 (0.003) loss 1.4521 (1.2061) acc 59.3750 (69.6765) lr 1.9686e-03 eta 8:22:16 +epoch [6/50] batch [430/500] time 1.364 (1.365) data 0.000 (0.003) loss 0.8545 (1.2029) acc 78.1250 (69.7602) lr 1.9686e-03 eta 8:22:05 +epoch [6/50] batch [435/500] time 1.360 (1.365) data 0.000 (0.003) loss 0.9883 (1.2043) acc 78.1250 (69.7845) lr 1.9686e-03 eta 8:21:55 +epoch [6/50] batch [440/500] time 1.363 (1.365) data 0.000 (0.003) loss 0.7139 (1.2059) acc 84.3750 (69.7727) lr 1.9686e-03 eta 8:21:49 +epoch [6/50] batch [445/500] time 1.377 (1.365) data 0.000 (0.003) loss 1.4395 (1.2077) acc 71.8750 (69.7121) lr 1.9686e-03 eta 8:21:42 +epoch [6/50] batch [450/500] time 1.371 (1.365) data 0.000 (0.003) loss 1.2764 (1.2101) acc 68.7500 (69.7014) lr 1.9686e-03 eta 8:21:37 +epoch [6/50] batch [455/500] time 1.353 (1.365) data 0.000 (0.003) loss 0.8726 (1.2087) acc 68.7500 (69.6841) lr 1.9686e-03 eta 8:21:28 +epoch [6/50] batch [460/500] time 1.346 (1.365) data 0.000 (0.003) loss 0.9829 (1.2067) acc 78.1250 (69.7486) lr 1.9686e-03 eta 8:21:18 +epoch [6/50] batch [465/500] time 1.352 (1.365) data 0.001 (0.003) loss 0.8301 (1.2063) acc 78.1250 (69.7379) lr 1.9686e-03 eta 8:21:11 +epoch [6/50] batch [470/500] time 1.354 (1.365) data 0.000 (0.003) loss 0.6162 (1.2077) acc 84.3750 (69.7008) lr 1.9686e-03 eta 8:21:11 +epoch [6/50] batch [475/500] time 1.358 (1.365) data 0.000 (0.003) loss 0.6333 (1.2040) acc 84.3750 (69.8026) lr 1.9686e-03 eta 8:21:03 +epoch [6/50] batch [480/500] time 1.355 (1.365) data 0.000 (0.003) loss 0.8911 (1.2048) acc 71.8750 (69.7656) lr 1.9686e-03 eta 8:20:55 +epoch [6/50] batch [485/500] time 1.358 (1.365) data 0.001 (0.003) loss 0.8887 (1.2043) acc 71.8750 (69.7358) lr 1.9686e-03 eta 8:20:47 +epoch [6/50] batch [490/500] time 1.364 (1.365) data 0.000 (0.003) loss 1.0176 (1.2057) acc 68.7500 (69.7130) lr 1.9686e-03 eta 8:20:37 +epoch [6/50] batch [495/500] time 1.364 (1.365) data 0.000 (0.002) loss 1.5684 (1.2072) acc 71.8750 (69.6970) lr 1.9686e-03 eta 8:20:30 +epoch [6/50] batch [500/500] time 1.370 (1.365) data 0.000 (0.002) loss 1.5098 (1.2062) acc 65.6250 (69.7062) lr 1.9511e-03 eta 8:20:23 +epoch [7/50] batch [5/500] time 1.349 (1.504) data 0.000 (0.152) loss 0.8555 (1.1247) acc 78.1250 (70.0000) lr 1.9511e-03 eta 9:11:12 +epoch [7/50] batch [10/500] time 1.364 (1.450) data 0.000 (0.076) loss 1.3320 (1.1199) acc 68.7500 (70.0000) lr 1.9511e-03 eta 8:51:24 +epoch [7/50] batch [15/500] time 1.363 (1.419) data 0.000 (0.051) loss 1.3008 (1.1356) acc 81.2500 (70.8333) lr 1.9511e-03 eta 8:39:50 +epoch [7/50] batch [20/500] time 1.345 (1.404) data 0.000 (0.038) loss 0.6084 (1.1318) acc 84.3750 (70.9375) lr 1.9511e-03 eta 8:34:11 +epoch [7/50] batch [25/500] time 1.353 (1.394) data 0.000 (0.031) loss 1.4707 (1.1420) acc 62.5000 (71.6250) lr 1.9511e-03 eta 8:30:23 +epoch [7/50] batch [30/500] time 1.358 (1.386) data 0.000 (0.026) loss 1.3643 (1.1616) acc 68.7500 (70.8333) lr 1.9511e-03 eta 8:27:25 +epoch [7/50] batch [35/500] time 1.377 (1.382) data 0.000 (0.022) loss 0.6958 (1.1753) acc 84.3750 (70.7143) lr 1.9511e-03 eta 8:25:54 +epoch [7/50] batch [40/500] time 1.360 (1.380) data 0.000 (0.019) loss 1.8076 (1.1727) acc 59.3750 (71.0938) lr 1.9511e-03 eta 8:24:56 +epoch [7/50] batch [45/500] time 1.362 (1.378) data 0.000 (0.017) loss 0.8730 (1.1924) acc 68.7500 (70.6944) lr 1.9511e-03 eta 8:24:14 +epoch [7/50] batch [50/500] time 1.370 (1.377) data 0.000 (0.016) loss 0.7480 (1.1776) acc 87.5000 (71.1875) lr 1.9511e-03 eta 8:23:50 +epoch [7/50] batch [55/500] time 1.342 (1.375) data 0.000 (0.014) loss 1.6680 (1.1881) acc 65.6250 (70.9091) lr 1.9511e-03 eta 8:23:05 +epoch [7/50] batch [60/500] time 1.355 (1.374) data 0.000 (0.013) loss 1.3398 (1.2061) acc 56.2500 (70.5208) lr 1.9511e-03 eta 8:22:16 +epoch [7/50] batch [65/500] time 1.346 (1.371) data 0.000 (0.012) loss 1.5527 (1.2015) acc 65.6250 (70.3846) lr 1.9511e-03 eta 8:21:23 +epoch [7/50] batch [70/500] time 1.367 (1.370) data 0.001 (0.011) loss 1.0713 (1.2070) acc 65.6250 (70.1786) lr 1.9511e-03 eta 8:20:50 +epoch [7/50] batch [75/500] time 1.365 (1.369) data 0.000 (0.010) loss 1.1602 (1.2079) acc 68.7500 (70.0833) lr 1.9511e-03 eta 8:20:22 +epoch [7/50] batch [80/500] time 1.351 (1.369) data 0.001 (0.010) loss 1.0957 (1.2166) acc 68.7500 (69.5703) lr 1.9511e-03 eta 8:20:05 +epoch [7/50] batch [85/500] time 1.368 (1.368) data 0.000 (0.009) loss 1.2559 (1.2055) acc 68.7500 (69.7794) lr 1.9511e-03 eta 8:19:41 +epoch [7/50] batch [90/500] time 1.353 (1.368) data 0.000 (0.009) loss 1.5781 (1.2165) acc 62.5000 (69.3750) lr 1.9511e-03 eta 8:19:22 +epoch [7/50] batch [95/500] time 1.351 (1.367) data 0.000 (0.008) loss 0.9238 (1.2158) acc 75.0000 (69.3092) lr 1.9511e-03 eta 8:19:12 +epoch [7/50] batch [100/500] time 1.368 (1.367) data 0.000 (0.008) loss 1.0898 (1.2203) acc 65.6250 (69.3125) lr 1.9511e-03 eta 8:19:02 +epoch [7/50] batch [105/500] time 1.371 (1.367) data 0.000 (0.008) loss 0.8877 (1.2233) acc 71.8750 (69.3155) lr 1.9511e-03 eta 8:18:56 +epoch [7/50] batch [110/500] time 1.358 (1.369) data 0.000 (0.007) loss 0.8252 (1.2132) acc 81.2500 (69.6023) lr 1.9511e-03 eta 8:19:18 +epoch [7/50] batch [115/500] time 1.351 (1.368) data 0.000 (0.007) loss 1.0586 (1.2213) acc 71.8750 (69.3207) lr 1.9511e-03 eta 8:19:07 +epoch [7/50] batch [120/500] time 1.367 (1.368) data 0.000 (0.007) loss 0.6797 (1.2158) acc 81.2500 (69.5573) lr 1.9511e-03 eta 8:18:55 +epoch [7/50] batch [125/500] time 1.353 (1.368) data 0.000 (0.006) loss 1.7197 (1.2232) acc 59.3750 (69.4750) lr 1.9511e-03 eta 8:18:34 +epoch [7/50] batch [130/500] time 1.354 (1.367) data 0.000 (0.006) loss 1.2676 (1.2133) acc 59.3750 (69.5673) lr 1.9511e-03 eta 8:18:25 +epoch [7/50] batch [135/500] time 1.366 (1.367) data 0.000 (0.006) loss 1.1045 (1.2099) acc 65.6250 (69.5833) lr 1.9511e-03 eta 8:18:13 +epoch [7/50] batch [140/500] time 1.358 (1.367) data 0.000 (0.006) loss 0.7363 (1.2072) acc 81.2500 (69.7321) lr 1.9511e-03 eta 8:18:06 +epoch [7/50] batch [145/500] time 1.360 (1.367) data 0.000 (0.006) loss 1.2363 (1.1982) acc 68.7500 (69.8060) lr 1.9511e-03 eta 8:17:55 +epoch [7/50] batch [150/500] time 1.507 (1.368) data 0.008 (0.006) loss 1.2002 (1.1986) acc 62.5000 (69.6667) lr 1.9511e-03 eta 8:18:06 +epoch [7/50] batch [155/500] time 1.365 (1.368) data 0.000 (0.005) loss 0.7090 (1.1977) acc 90.6250 (69.7984) lr 1.9511e-03 eta 8:17:55 +epoch [7/50] batch [160/500] time 1.358 (1.368) data 0.000 (0.005) loss 1.2422 (1.1943) acc 78.1250 (69.9023) lr 1.9511e-03 eta 8:17:54 +epoch [7/50] batch [165/500] time 1.353 (1.368) data 0.000 (0.005) loss 0.9160 (1.1947) acc 84.3750 (70.0947) lr 1.9511e-03 eta 8:17:46 +epoch [7/50] batch [170/500] time 1.362 (1.368) data 0.000 (0.005) loss 1.7520 (1.1932) acc 53.1250 (70.0000) lr 1.9511e-03 eta 8:17:37 +epoch [7/50] batch [175/500] time 1.357 (1.368) data 0.000 (0.005) loss 1.5801 (1.1992) acc 68.7500 (69.9107) lr 1.9511e-03 eta 8:17:26 +epoch [7/50] batch [180/500] time 1.351 (1.367) data 0.000 (0.005) loss 1.6230 (1.2061) acc 65.6250 (69.8785) lr 1.9511e-03 eta 8:17:12 +epoch [7/50] batch [185/500] time 1.357 (1.367) data 0.000 (0.005) loss 0.5474 (1.2052) acc 84.3750 (69.9324) lr 1.9511e-03 eta 8:16:57 +epoch [7/50] batch [190/500] time 1.363 (1.367) data 0.000 (0.004) loss 1.3545 (1.2040) acc 65.6250 (69.8849) lr 1.9511e-03 eta 8:16:45 +epoch [7/50] batch [195/500] time 1.360 (1.366) data 0.000 (0.004) loss 1.2871 (1.2023) acc 65.6250 (69.7596) lr 1.9511e-03 eta 8:16:32 +epoch [7/50] batch [200/500] time 1.374 (1.366) data 0.000 (0.004) loss 1.4697 (1.2011) acc 62.5000 (69.7344) lr 1.9511e-03 eta 8:16:22 +epoch [7/50] batch [205/500] time 1.342 (1.366) data 0.000 (0.004) loss 0.9912 (1.2043) acc 71.8750 (69.5884) lr 1.9511e-03 eta 8:16:12 +epoch [7/50] batch [210/500] time 1.375 (1.366) data 0.000 (0.004) loss 0.5664 (1.1933) acc 81.2500 (69.8065) lr 1.9511e-03 eta 8:16:02 +epoch [7/50] batch [215/500] time 1.355 (1.365) data 0.000 (0.004) loss 0.8833 (1.1892) acc 68.7500 (69.8983) lr 1.9511e-03 eta 8:15:43 +epoch [7/50] batch [220/500] time 1.376 (1.365) data 0.000 (0.004) loss 1.4014 (1.1833) acc 68.7500 (69.9858) lr 1.9511e-03 eta 8:15:29 +epoch [7/50] batch [225/500] time 1.349 (1.365) data 0.000 (0.004) loss 0.9058 (1.1917) acc 75.0000 (69.8472) lr 1.9511e-03 eta 8:15:14 +epoch [7/50] batch [230/500] time 1.374 (1.364) data 0.000 (0.004) loss 0.9106 (1.1883) acc 78.1250 (69.9185) lr 1.9511e-03 eta 8:15:04 +epoch [7/50] batch [235/500] time 1.376 (1.364) data 0.000 (0.004) loss 1.0801 (1.1917) acc 68.7500 (69.8670) lr 1.9511e-03 eta 8:14:53 +epoch [7/50] batch [240/500] time 1.367 (1.364) data 0.000 (0.004) loss 1.5020 (1.1943) acc 65.6250 (69.7917) lr 1.9511e-03 eta 8:14:43 +epoch [7/50] batch [245/500] time 1.349 (1.364) data 0.000 (0.004) loss 1.4375 (1.1963) acc 68.7500 (69.7577) lr 1.9511e-03 eta 8:14:33 +epoch [7/50] batch [250/500] time 1.359 (1.365) data 0.000 (0.003) loss 0.7642 (1.1920) acc 81.2500 (69.7750) lr 1.9511e-03 eta 8:14:38 +epoch [7/50] batch [255/500] time 1.339 (1.364) data 0.001 (0.003) loss 1.1865 (1.1926) acc 65.6250 (69.6691) lr 1.9511e-03 eta 8:14:27 +epoch [7/50] batch [260/500] time 1.382 (1.364) data 0.000 (0.003) loss 1.4512 (1.1903) acc 71.8750 (69.7837) lr 1.9511e-03 eta 8:14:20 +epoch [7/50] batch [265/500] time 1.350 (1.364) data 0.000 (0.003) loss 0.9834 (1.1909) acc 78.1250 (69.7759) lr 1.9511e-03 eta 8:14:06 +epoch [7/50] batch [270/500] time 1.361 (1.364) data 0.000 (0.003) loss 1.2881 (1.1864) acc 68.7500 (69.8958) lr 1.9511e-03 eta 8:13:59 +epoch [7/50] batch [275/500] time 1.364 (1.364) data 0.000 (0.003) loss 1.0312 (1.1893) acc 78.1250 (69.8636) lr 1.9511e-03 eta 8:13:51 +epoch [7/50] batch [280/500] time 1.375 (1.364) data 0.000 (0.003) loss 1.4248 (1.1899) acc 62.5000 (69.8661) lr 1.9511e-03 eta 8:13:43 +epoch [7/50] batch [285/500] time 1.365 (1.364) data 0.000 (0.003) loss 1.3584 (1.1915) acc 68.7500 (69.9123) lr 1.9511e-03 eta 8:13:36 +epoch [7/50] batch [290/500] time 1.376 (1.364) data 0.000 (0.003) loss 1.5830 (1.1955) acc 62.5000 (69.8491) lr 1.9511e-03 eta 8:13:29 +epoch [7/50] batch [295/500] time 1.381 (1.364) data 0.000 (0.003) loss 0.6460 (1.1928) acc 81.2500 (69.9682) lr 1.9511e-03 eta 8:13:34 +epoch [7/50] batch [300/500] time 1.360 (1.364) data 0.000 (0.003) loss 1.1602 (1.1921) acc 78.1250 (70.0208) lr 1.9511e-03 eta 8:13:26 +epoch [7/50] batch [305/500] time 1.349 (1.364) data 0.000 (0.003) loss 1.2471 (1.1948) acc 68.7500 (69.9693) lr 1.9511e-03 eta 8:13:17 +epoch [7/50] batch [310/500] time 1.350 (1.364) data 0.000 (0.003) loss 1.3555 (1.1971) acc 65.6250 (69.9194) lr 1.9511e-03 eta 8:13:06 +epoch [7/50] batch [315/500] time 1.338 (1.364) data 0.000 (0.003) loss 1.2119 (1.1964) acc 75.0000 (69.9306) lr 1.9511e-03 eta 8:12:56 +epoch [7/50] batch [320/500] time 1.339 (1.364) data 0.001 (0.003) loss 1.2021 (1.1975) acc 78.1250 (69.9512) lr 1.9511e-03 eta 8:12:45 +epoch [7/50] batch [325/500] time 1.334 (1.364) data 0.000 (0.003) loss 1.2227 (1.1983) acc 78.1250 (69.9712) lr 1.9511e-03 eta 8:12:34 +epoch [7/50] batch [330/500] time 1.354 (1.363) data 0.000 (0.003) loss 0.9858 (1.1950) acc 81.2500 (70.0568) lr 1.9511e-03 eta 8:12:21 +epoch [7/50] batch [335/500] time 1.362 (1.363) data 0.000 (0.003) loss 1.1777 (1.1926) acc 62.5000 (70.0933) lr 1.9511e-03 eta 8:12:16 +epoch [7/50] batch [340/500] time 1.371 (1.363) data 0.000 (0.003) loss 1.0186 (1.1902) acc 71.8750 (70.2022) lr 1.9511e-03 eta 8:12:11 +epoch [7/50] batch [345/500] time 1.351 (1.363) data 0.000 (0.003) loss 1.1904 (1.1897) acc 68.7500 (70.2536) lr 1.9511e-03 eta 8:12:06 +epoch [7/50] batch [350/500] time 1.362 (1.364) data 0.001 (0.003) loss 1.3262 (1.1886) acc 68.7500 (70.2857) lr 1.9511e-03 eta 8:12:00 +epoch [7/50] batch [355/500] time 1.363 (1.364) data 0.000 (0.003) loss 0.9751 (1.1846) acc 78.1250 (70.3697) lr 1.9511e-03 eta 8:11:53 +epoch [7/50] batch [360/500] time 1.366 (1.363) data 0.000 (0.003) loss 0.7827 (1.1819) acc 71.8750 (70.3733) lr 1.9511e-03 eta 8:11:46 +epoch [7/50] batch [365/500] time 1.361 (1.363) data 0.000 (0.003) loss 1.4307 (1.1826) acc 62.5000 (70.3510) lr 1.9511e-03 eta 8:11:36 +epoch [7/50] batch [370/500] time 1.341 (1.363) data 0.000 (0.002) loss 0.8740 (1.1852) acc 78.1250 (70.3378) lr 1.9511e-03 eta 8:11:28 +epoch [7/50] batch [375/500] time 1.363 (1.363) data 0.000 (0.002) loss 1.2969 (1.1855) acc 71.8750 (70.3500) lr 1.9511e-03 eta 8:11:19 +epoch [7/50] batch [380/500] time 1.347 (1.363) data 0.000 (0.002) loss 0.9541 (1.1831) acc 78.1250 (70.4112) lr 1.9511e-03 eta 8:11:09 +epoch [7/50] batch [385/500] time 1.362 (1.363) data 0.000 (0.002) loss 1.1982 (1.1832) acc 65.6250 (70.3653) lr 1.9511e-03 eta 8:11:00 +epoch [7/50] batch [390/500] time 1.351 (1.363) data 0.000 (0.002) loss 1.2666 (1.1850) acc 71.8750 (70.3205) lr 1.9511e-03 eta 8:10:52 +epoch [7/50] batch [395/500] time 1.350 (1.363) data 0.000 (0.002) loss 1.4990 (1.1850) acc 62.5000 (70.2927) lr 1.9511e-03 eta 8:10:51 +epoch [7/50] batch [400/500] time 1.371 (1.363) data 0.000 (0.002) loss 1.4912 (1.1869) acc 68.7500 (70.2578) lr 1.9511e-03 eta 8:10:43 +epoch [7/50] batch [405/500] time 1.354 (1.363) data 0.000 (0.002) loss 1.2607 (1.1841) acc 65.6250 (70.2855) lr 1.9511e-03 eta 8:10:34 +epoch [7/50] batch [410/500] time 1.364 (1.363) data 0.000 (0.002) loss 1.3916 (1.1830) acc 68.7500 (70.2896) lr 1.9511e-03 eta 8:10:25 +epoch [7/50] batch [415/500] time 1.359 (1.363) data 0.000 (0.002) loss 1.8184 (1.1859) acc 59.3750 (70.1958) lr 1.9511e-03 eta 8:10:18 +epoch [7/50] batch [420/500] time 1.339 (1.363) data 0.000 (0.002) loss 2.4375 (1.1893) acc 50.0000 (70.1488) lr 1.9511e-03 eta 8:10:06 +epoch [7/50] batch [425/500] time 1.361 (1.363) data 0.000 (0.002) loss 1.1787 (1.1881) acc 75.0000 (70.1618) lr 1.9511e-03 eta 8:09:56 +epoch [7/50] batch [430/500] time 1.360 (1.363) data 0.000 (0.002) loss 0.8623 (1.1866) acc 71.8750 (70.1817) lr 1.9511e-03 eta 8:09:49 +epoch [7/50] batch [435/500] time 1.381 (1.363) data 0.000 (0.002) loss 1.6895 (1.1867) acc 59.3750 (70.2083) lr 1.9511e-03 eta 8:09:42 +epoch [7/50] batch [440/500] time 1.378 (1.363) data 0.000 (0.002) loss 0.5308 (1.1844) acc 90.6250 (70.2486) lr 1.9511e-03 eta 8:09:39 +epoch [7/50] batch [445/500] time 1.376 (1.363) data 0.000 (0.002) loss 1.9678 (1.1869) acc 53.1250 (70.1896) lr 1.9511e-03 eta 8:09:32 +epoch [7/50] batch [450/500] time 1.374 (1.363) data 0.000 (0.002) loss 1.0459 (1.1857) acc 75.0000 (70.2153) lr 1.9511e-03 eta 8:09:22 +epoch [7/50] batch [455/500] time 1.378 (1.363) data 0.000 (0.002) loss 0.9707 (1.1867) acc 68.7500 (70.1786) lr 1.9511e-03 eta 8:09:16 +epoch [7/50] batch [460/500] time 1.369 (1.363) data 0.000 (0.002) loss 1.5029 (1.1893) acc 75.0000 (70.1495) lr 1.9511e-03 eta 8:09:10 +epoch [7/50] batch [465/500] time 1.341 (1.363) data 0.000 (0.002) loss 0.9951 (1.1874) acc 78.1250 (70.1882) lr 1.9511e-03 eta 8:09:02 +epoch [7/50] batch [470/500] time 1.365 (1.362) data 0.001 (0.002) loss 1.3438 (1.1859) acc 68.7500 (70.2593) lr 1.9511e-03 eta 8:08:53 +epoch [7/50] batch [475/500] time 1.346 (1.362) data 0.000 (0.002) loss 0.7158 (1.1852) acc 81.2500 (70.2895) lr 1.9511e-03 eta 8:08:45 +epoch [7/50] batch [480/500] time 1.355 (1.362) data 0.000 (0.002) loss 1.1562 (1.1839) acc 71.8750 (70.3060) lr 1.9511e-03 eta 8:08:37 +epoch [7/50] batch [485/500] time 1.376 (1.362) data 0.001 (0.002) loss 1.6211 (1.1844) acc 68.7500 (70.3028) lr 1.9511e-03 eta 8:08:32 +epoch [7/50] batch [490/500] time 1.356 (1.362) data 0.000 (0.002) loss 1.4639 (1.1837) acc 68.7500 (70.3125) lr 1.9511e-03 eta 8:08:22 +epoch [7/50] batch [495/500] time 1.385 (1.362) data 0.000 (0.002) loss 1.0781 (1.1834) acc 68.7500 (70.3093) lr 1.9511e-03 eta 8:08:16 +epoch [7/50] batch [500/500] time 1.354 (1.362) data 0.000 (0.002) loss 1.0596 (1.1871) acc 71.8750 (70.2875) lr 1.9298e-03 eta 8:08:08 +epoch [8/50] batch [5/500] time 1.387 (1.544) data 0.000 (0.166) loss 0.8721 (1.1436) acc 71.8750 (69.3750) lr 1.9298e-03 eta 9:13:00 +epoch [8/50] batch [10/500] time 1.337 (1.446) data 0.000 (0.083) loss 1.8770 (1.3601) acc 53.1250 (67.1875) lr 1.9298e-03 eta 8:38:04 +epoch [8/50] batch [15/500] time 1.365 (1.418) data 0.000 (0.056) loss 1.1855 (1.3075) acc 71.8750 (67.9167) lr 1.9298e-03 eta 8:27:43 +epoch [8/50] batch [20/500] time 1.391 (1.404) data 0.000 (0.042) loss 0.5347 (1.2263) acc 87.5000 (70.6250) lr 1.9298e-03 eta 8:22:41 +epoch [8/50] batch [25/500] time 1.376 (1.398) data 0.000 (0.033) loss 1.4883 (1.1684) acc 68.7500 (72.0000) lr 1.9298e-03 eta 8:20:18 +epoch [8/50] batch [30/500] time 1.360 (1.392) data 0.000 (0.028) loss 0.9976 (1.2011) acc 81.2500 (70.8333) lr 1.9298e-03 eta 8:18:00 +epoch [8/50] batch [35/500] time 1.353 (1.388) data 0.000 (0.024) loss 0.6865 (1.1692) acc 71.8750 (71.2500) lr 1.9298e-03 eta 8:16:29 +epoch [8/50] batch [40/500] time 1.351 (1.383) data 0.000 (0.021) loss 0.8887 (1.1348) acc 81.2500 (72.0312) lr 1.9298e-03 eta 8:14:49 +epoch [8/50] batch [45/500] time 1.353 (1.379) data 0.000 (0.019) loss 0.6689 (1.1357) acc 78.1250 (72.0139) lr 1.9298e-03 eta 8:13:17 +epoch [8/50] batch [50/500] time 1.360 (1.378) data 0.001 (0.017) loss 1.1250 (1.1402) acc 71.8750 (71.7500) lr 1.9298e-03 eta 8:12:30 +epoch [8/50] batch [55/500] time 1.351 (1.376) data 0.000 (0.015) loss 1.3066 (1.1397) acc 65.6250 (71.5909) lr 1.9298e-03 eta 8:11:46 +epoch [8/50] batch [60/500] time 1.349 (1.375) data 0.000 (0.014) loss 1.1816 (1.1580) acc 65.6250 (71.1458) lr 1.9298e-03 eta 8:11:28 +epoch [8/50] batch [65/500] time 1.374 (1.375) data 0.000 (0.013) loss 0.5879 (1.1536) acc 78.1250 (71.3462) lr 1.9298e-03 eta 8:11:11 +epoch [8/50] batch [70/500] time 1.370 (1.374) data 0.000 (0.012) loss 1.6377 (1.1443) acc 68.7500 (71.9196) lr 1.9298e-03 eta 8:10:42 +epoch [8/50] batch [75/500] time 1.359 (1.373) data 0.000 (0.011) loss 1.2100 (1.1357) acc 65.6250 (72.0417) lr 1.9298e-03 eta 8:10:22 +epoch [8/50] batch [80/500] time 1.343 (1.373) data 0.000 (0.011) loss 1.1719 (1.1316) acc 78.1250 (72.2656) lr 1.9298e-03 eta 8:10:01 +epoch [8/50] batch [85/500] time 1.377 (1.373) data 0.000 (0.010) loss 1.4805 (1.1314) acc 46.8750 (72.0588) lr 1.9298e-03 eta 8:10:02 +epoch [8/50] batch [90/500] time 1.345 (1.372) data 0.001 (0.010) loss 0.7334 (1.1327) acc 81.2500 (71.9792) lr 1.9298e-03 eta 8:09:41 +epoch [8/50] batch [95/500] time 1.377 (1.372) data 0.000 (0.009) loss 1.2822 (1.1410) acc 84.3750 (71.8092) lr 1.9298e-03 eta 8:09:19 +epoch [8/50] batch [100/500] time 1.352 (1.371) data 0.000 (0.009) loss 1.3320 (1.1414) acc 71.8750 (71.8750) lr 1.9298e-03 eta 8:09:02 +epoch [8/50] batch [105/500] time 1.341 (1.370) data 0.000 (0.008) loss 0.8623 (1.1442) acc 75.0000 (71.9345) lr 1.9298e-03 eta 8:08:40 +epoch [8/50] batch [110/500] time 1.370 (1.370) data 0.000 (0.008) loss 0.8784 (1.1437) acc 78.1250 (72.0170) lr 1.9298e-03 eta 8:08:25 +epoch [8/50] batch [115/500] time 1.378 (1.370) data 0.000 (0.008) loss 1.7969 (1.1493) acc 65.6250 (71.9293) lr 1.9298e-03 eta 8:08:18 +epoch [8/50] batch [120/500] time 1.375 (1.370) data 0.000 (0.007) loss 0.9966 (1.1512) acc 81.2500 (72.0052) lr 1.9298e-03 eta 8:08:10 +epoch [8/50] batch [125/500] time 1.356 (1.370) data 0.000 (0.007) loss 1.2666 (1.1527) acc 71.8750 (72.1000) lr 1.9298e-03 eta 8:07:59 +epoch [8/50] batch [130/500] time 1.384 (1.370) data 0.000 (0.007) loss 1.0127 (1.1532) acc 68.7500 (72.0192) lr 1.9298e-03 eta 8:08:06 +epoch [8/50] batch [135/500] time 1.342 (1.370) data 0.000 (0.006) loss 1.0049 (1.1527) acc 65.6250 (71.9213) lr 1.9298e-03 eta 8:07:50 +epoch [8/50] batch [140/500] time 1.365 (1.370) data 0.000 (0.006) loss 1.0469 (1.1489) acc 75.0000 (71.8527) lr 1.9298e-03 eta 8:07:39 +epoch [8/50] batch [145/500] time 1.376 (1.370) data 0.000 (0.006) loss 0.7227 (1.1416) acc 78.1250 (71.8966) lr 1.9298e-03 eta 8:07:29 +epoch [8/50] batch [150/500] time 1.372 (1.370) data 0.000 (0.006) loss 0.8799 (1.1482) acc 75.0000 (71.8125) lr 1.9298e-03 eta 8:07:24 +epoch [8/50] batch [155/500] time 1.382 (1.370) data 0.000 (0.006) loss 1.1143 (1.1519) acc 71.8750 (71.6935) lr 1.9298e-03 eta 8:07:16 +epoch [8/50] batch [160/500] time 1.349 (1.369) data 0.000 (0.006) loss 0.9395 (1.1528) acc 87.5000 (71.7383) lr 1.9298e-03 eta 8:06:58 +epoch [8/50] batch [165/500] time 1.372 (1.369) data 0.000 (0.005) loss 1.5361 (1.1548) acc 56.2500 (71.6667) lr 1.9298e-03 eta 8:06:53 +epoch [8/50] batch [170/500] time 1.377 (1.369) data 0.000 (0.005) loss 1.2021 (1.1572) acc 75.0000 (71.5809) lr 1.9298e-03 eta 8:06:45 +epoch [8/50] batch [175/500] time 1.359 (1.369) data 0.000 (0.005) loss 1.0254 (1.1566) acc 81.2500 (71.4821) lr 1.9298e-03 eta 8:06:28 +epoch [8/50] batch [180/500] time 1.353 (1.369) data 0.000 (0.005) loss 1.5127 (1.1562) acc 56.2500 (71.4062) lr 1.9298e-03 eta 8:06:17 +epoch [8/50] batch [185/500] time 1.350 (1.368) data 0.000 (0.005) loss 1.8496 (1.1626) acc 62.5000 (71.2500) lr 1.9298e-03 eta 8:06:06 +epoch [8/50] batch [190/500] time 1.359 (1.368) data 0.000 (0.005) loss 0.7949 (1.1547) acc 84.3750 (71.4803) lr 1.9298e-03 eta 8:05:53 +epoch [8/50] batch [195/500] time 1.380 (1.368) data 0.000 (0.005) loss 0.7881 (1.1541) acc 75.0000 (71.4103) lr 1.9298e-03 eta 8:05:49 +epoch [8/50] batch [200/500] time 1.363 (1.368) data 0.000 (0.004) loss 1.5332 (1.1574) acc 62.5000 (71.3281) lr 1.9298e-03 eta 8:05:38 +epoch [8/50] batch [205/500] time 1.353 (1.368) data 0.000 (0.004) loss 1.1006 (1.1623) acc 71.8750 (71.1738) lr 1.9298e-03 eta 8:05:29 +epoch [8/50] batch [210/500] time 1.371 (1.368) data 0.000 (0.004) loss 1.7617 (1.1639) acc 65.6250 (71.0565) lr 1.9298e-03 eta 8:05:18 +epoch [8/50] batch [215/500] time 1.354 (1.367) data 0.000 (0.004) loss 0.6377 (1.1640) acc 84.3750 (71.0901) lr 1.9298e-03 eta 8:05:06 +epoch [8/50] batch [220/500] time 1.348 (1.367) data 0.000 (0.004) loss 1.4082 (1.1616) acc 68.7500 (71.1364) lr 1.9298e-03 eta 8:04:50 +epoch [8/50] batch [225/500] time 1.375 (1.367) data 0.000 (0.004) loss 0.6138 (1.1580) acc 84.3750 (71.2778) lr 1.9298e-03 eta 8:04:46 +epoch [8/50] batch [230/500] time 1.360 (1.367) data 0.000 (0.004) loss 1.9824 (1.1629) acc 62.5000 (71.2092) lr 1.9298e-03 eta 8:04:43 +epoch [8/50] batch [235/500] time 1.359 (1.367) data 0.000 (0.004) loss 1.5547 (1.1651) acc 59.3750 (71.0904) lr 1.9298e-03 eta 8:04:32 +epoch [8/50] batch [240/500] time 1.349 (1.367) data 0.000 (0.004) loss 0.7554 (1.1632) acc 84.3750 (71.1458) lr 1.9298e-03 eta 8:04:17 +epoch [8/50] batch [245/500] time 1.362 (1.366) data 0.000 (0.004) loss 0.6743 (1.1635) acc 87.5000 (71.2117) lr 1.9298e-03 eta 8:04:02 +epoch [8/50] batch [250/500] time 1.360 (1.366) data 0.000 (0.004) loss 1.2129 (1.1671) acc 59.3750 (71.1500) lr 1.9298e-03 eta 8:03:49 +epoch [8/50] batch [255/500] time 1.330 (1.366) data 0.000 (0.004) loss 1.1797 (1.1680) acc 68.7500 (71.1765) lr 1.9298e-03 eta 8:03:35 +epoch [8/50] batch [260/500] time 1.371 (1.366) data 0.000 (0.004) loss 1.4141 (1.1662) acc 59.3750 (71.2139) lr 1.9298e-03 eta 8:03:23 +epoch [8/50] batch [265/500] time 1.334 (1.365) data 0.000 (0.003) loss 0.4778 (1.1636) acc 84.3750 (71.2736) lr 1.9298e-03 eta 8:03:12 +epoch [8/50] batch [270/500] time 1.498 (1.366) data 0.000 (0.003) loss 1.6465 (1.1654) acc 68.7500 (71.2384) lr 1.9298e-03 eta 8:03:14 +epoch [8/50] batch [275/500] time 1.345 (1.366) data 0.000 (0.003) loss 0.7798 (1.1640) acc 75.0000 (71.2614) lr 1.9298e-03 eta 8:03:03 +epoch [8/50] batch [280/500] time 1.360 (1.365) data 0.000 (0.003) loss 1.1543 (1.1645) acc 68.7500 (71.2612) lr 1.9298e-03 eta 8:02:51 +epoch [8/50] batch [285/500] time 1.355 (1.365) data 0.000 (0.003) loss 1.2246 (1.1659) acc 71.8750 (71.2719) lr 1.9298e-03 eta 8:02:41 +epoch [8/50] batch [290/500] time 1.360 (1.365) data 0.000 (0.003) loss 2.2520 (1.1678) acc 59.3750 (71.2500) lr 1.9298e-03 eta 8:02:34 +epoch [8/50] batch [295/500] time 1.362 (1.365) data 0.000 (0.003) loss 1.0107 (1.1661) acc 75.0000 (71.3242) lr 1.9298e-03 eta 8:02:27 +epoch [8/50] batch [300/500] time 1.348 (1.365) data 0.000 (0.003) loss 1.0859 (1.1660) acc 71.8750 (71.3021) lr 1.9298e-03 eta 8:02:16 +epoch [8/50] batch [305/500] time 1.348 (1.365) data 0.000 (0.003) loss 1.0273 (1.1627) acc 71.8750 (71.3730) lr 1.9298e-03 eta 8:02:09 +epoch [8/50] batch [310/500] time 1.342 (1.365) data 0.000 (0.003) loss 1.5176 (1.1619) acc 56.2500 (71.3710) lr 1.9298e-03 eta 8:01:59 +epoch [8/50] batch [315/500] time 1.354 (1.365) data 0.000 (0.003) loss 1.5928 (1.1643) acc 59.3750 (71.2996) lr 1.9298e-03 eta 8:01:51 +epoch [8/50] batch [320/500] time 1.356 (1.365) data 0.000 (0.003) loss 1.0547 (1.1647) acc 65.6250 (71.2500) lr 1.9298e-03 eta 8:01:40 +epoch [8/50] batch [325/500] time 1.361 (1.364) data 0.000 (0.003) loss 1.1338 (1.1641) acc 75.0000 (71.2885) lr 1.9298e-03 eta 8:01:32 +epoch [8/50] batch [330/500] time 1.360 (1.364) data 0.001 (0.003) loss 1.0830 (1.1652) acc 71.8750 (71.2027) lr 1.9298e-03 eta 8:01:23 +epoch [8/50] batch [335/500] time 1.339 (1.364) data 0.000 (0.003) loss 1.3672 (1.1630) acc 68.7500 (71.2780) lr 1.9298e-03 eta 8:01:12 +epoch [8/50] batch [340/500] time 1.350 (1.364) data 0.000 (0.003) loss 1.5352 (1.1621) acc 65.6250 (71.2868) lr 1.9298e-03 eta 8:01:00 +epoch [8/50] batch [345/500] time 1.339 (1.364) data 0.000 (0.003) loss 1.5791 (1.1639) acc 62.5000 (71.2500) lr 1.9298e-03 eta 8:00:47 +epoch [8/50] batch [350/500] time 1.343 (1.363) data 0.000 (0.003) loss 0.9375 (1.1586) acc 71.8750 (71.2946) lr 1.9298e-03 eta 8:00:36 +epoch [8/50] batch [355/500] time 1.356 (1.363) data 0.000 (0.003) loss 1.2490 (1.1582) acc 75.0000 (71.3028) lr 1.9298e-03 eta 8:00:27 +epoch [8/50] batch [360/500] time 1.372 (1.363) data 0.000 (0.003) loss 1.2129 (1.1589) acc 68.7500 (71.2587) lr 1.9298e-03 eta 8:00:19 +epoch [8/50] batch [365/500] time 1.342 (1.363) data 0.000 (0.003) loss 1.6709 (1.1628) acc 59.3750 (71.1216) lr 1.9298e-03 eta 8:00:09 +epoch [8/50] batch [370/500] time 1.340 (1.363) data 0.000 (0.003) loss 1.1885 (1.1619) acc 68.7500 (71.1402) lr 1.9298e-03 eta 8:00:05 +epoch [8/50] batch [375/500] time 1.364 (1.363) data 0.000 (0.003) loss 0.7427 (1.1601) acc 81.2500 (71.1667) lr 1.9298e-03 eta 7:59:57 +epoch [8/50] batch [380/500] time 1.378 (1.363) data 0.000 (0.003) loss 1.5137 (1.1595) acc 65.6250 (71.1678) lr 1.9298e-03 eta 7:59:48 +epoch [8/50] batch [385/500] time 1.341 (1.363) data 0.000 (0.002) loss 1.2666 (1.1580) acc 71.8750 (71.2175) lr 1.9298e-03 eta 7:59:40 +epoch [8/50] batch [390/500] time 1.343 (1.363) data 0.000 (0.002) loss 1.1650 (1.1595) acc 75.0000 (71.1619) lr 1.9298e-03 eta 7:59:29 +epoch [8/50] batch [395/500] time 1.347 (1.363) data 0.000 (0.002) loss 1.2031 (1.1593) acc 68.7500 (71.2025) lr 1.9298e-03 eta 7:59:21 +epoch [8/50] batch [400/500] time 1.333 (1.363) data 0.000 (0.002) loss 1.0449 (1.1592) acc 71.8750 (71.1953) lr 1.9298e-03 eta 7:59:12 +epoch [8/50] batch [405/500] time 1.362 (1.363) data 0.000 (0.002) loss 1.0459 (1.1587) acc 68.7500 (71.1497) lr 1.9298e-03 eta 7:59:04 +epoch [8/50] batch [410/500] time 1.351 (1.362) data 0.000 (0.002) loss 1.0107 (1.1584) acc 78.1250 (71.1738) lr 1.9298e-03 eta 7:58:54 +epoch [8/50] batch [415/500] time 1.364 (1.363) data 0.000 (0.002) loss 1.5488 (1.1578) acc 71.8750 (71.2048) lr 1.9298e-03 eta 7:58:54 +epoch [8/50] batch [420/500] time 1.380 (1.363) data 0.000 (0.002) loss 1.0781 (1.1579) acc 71.8750 (71.1979) lr 1.9298e-03 eta 7:58:50 +epoch [8/50] batch [425/500] time 1.352 (1.363) data 0.000 (0.002) loss 1.4414 (1.1607) acc 59.3750 (71.1176) lr 1.9298e-03 eta 7:58:42 +epoch [8/50] batch [430/500] time 1.367 (1.363) data 0.000 (0.002) loss 0.7378 (1.1596) acc 75.0000 (71.1119) lr 1.9298e-03 eta 7:58:34 +epoch [8/50] batch [435/500] time 1.349 (1.363) data 0.000 (0.002) loss 0.7949 (1.1574) acc 81.2500 (71.1710) lr 1.9298e-03 eta 7:58:26 +epoch [8/50] batch [440/500] time 1.349 (1.363) data 0.000 (0.002) loss 1.1416 (1.1580) acc 78.1250 (71.2003) lr 1.9298e-03 eta 7:58:20 +epoch [8/50] batch [445/500] time 1.361 (1.363) data 0.000 (0.002) loss 0.9238 (1.1581) acc 84.3750 (71.2079) lr 1.9298e-03 eta 7:58:13 +epoch [8/50] batch [450/500] time 1.369 (1.363) data 0.000 (0.002) loss 1.6650 (1.1595) acc 56.2500 (71.1875) lr 1.9298e-03 eta 7:58:06 +epoch [8/50] batch [455/500] time 1.368 (1.363) data 0.000 (0.002) loss 1.2139 (1.1596) acc 71.8750 (71.1882) lr 1.9298e-03 eta 7:58:00 +epoch [8/50] batch [460/500] time 1.385 (1.363) data 0.000 (0.002) loss 1.5488 (1.1609) acc 59.3750 (71.1141) lr 1.9298e-03 eta 7:57:53 +epoch [8/50] batch [465/500] time 1.342 (1.363) data 0.000 (0.002) loss 0.9546 (1.1615) acc 84.3750 (71.0954) lr 1.9298e-03 eta 7:57:44 +epoch [8/50] batch [470/500] time 1.370 (1.363) data 0.000 (0.002) loss 1.1279 (1.1592) acc 68.7500 (71.0904) lr 1.9298e-03 eta 7:57:38 +epoch [8/50] batch [475/500] time 1.363 (1.363) data 0.000 (0.002) loss 1.5723 (1.1632) acc 71.8750 (71.0000) lr 1.9298e-03 eta 7:57:30 +epoch [8/50] batch [480/500] time 1.377 (1.363) data 0.000 (0.002) loss 1.9238 (1.1657) acc 56.2500 (70.9049) lr 1.9298e-03 eta 7:57:24 +epoch [8/50] batch [485/500] time 1.360 (1.363) data 0.001 (0.002) loss 1.4727 (1.1655) acc 59.3750 (70.8892) lr 1.9298e-03 eta 7:57:18 +epoch [8/50] batch [490/500] time 1.357 (1.363) data 0.000 (0.002) loss 1.0068 (1.1655) acc 75.0000 (70.9120) lr 1.9298e-03 eta 7:57:10 +epoch [8/50] batch [495/500] time 1.348 (1.363) data 0.000 (0.002) loss 1.8320 (1.1674) acc 65.6250 (70.9091) lr 1.9298e-03 eta 7:57:01 +epoch [8/50] batch [500/500] time 1.356 (1.363) data 0.000 (0.002) loss 1.0391 (1.1696) acc 75.0000 (70.8688) lr 1.9048e-03 eta 7:56:54 +epoch [9/50] batch [5/500] time 1.349 (1.515) data 0.000 (0.151) loss 1.1719 (1.2445) acc 71.8750 (66.2500) lr 1.9048e-03 eta 8:50:12 +epoch [9/50] batch [10/500] time 1.367 (1.456) data 0.001 (0.076) loss 0.8496 (1.1868) acc 71.8750 (68.1250) lr 1.9048e-03 eta 8:29:24 +epoch [9/50] batch [15/500] time 1.356 (1.427) data 0.000 (0.051) loss 0.9443 (1.1190) acc 75.0000 (69.1667) lr 1.9048e-03 eta 8:18:59 +epoch [9/50] batch [20/500] time 1.352 (1.410) data 0.000 (0.038) loss 0.8608 (1.0629) acc 78.1250 (71.5625) lr 1.9048e-03 eta 8:13:04 +epoch [9/50] batch [25/500] time 1.375 (1.402) data 0.000 (0.031) loss 0.8730 (1.1050) acc 71.8750 (70.7500) lr 1.9048e-03 eta 8:10:06 +epoch [9/50] batch [30/500] time 1.374 (1.395) data 0.000 (0.026) loss 0.8589 (1.0971) acc 81.2500 (71.1458) lr 1.9048e-03 eta 8:07:25 +epoch [9/50] batch [35/500] time 1.367 (1.390) data 0.000 (0.022) loss 1.4023 (1.1372) acc 62.5000 (70.3571) lr 1.9048e-03 eta 8:05:31 +epoch [9/50] batch [40/500] time 1.356 (1.385) data 0.000 (0.019) loss 1.1094 (1.1387) acc 75.0000 (70.6250) lr 1.9048e-03 eta 8:03:43 +epoch [9/50] batch [45/500] time 1.344 (1.381) data 0.000 (0.017) loss 1.1221 (1.1161) acc 68.7500 (71.4583) lr 1.9048e-03 eta 8:02:25 +epoch [9/50] batch [50/500] time 1.347 (1.378) data 0.001 (0.015) loss 2.0039 (1.1376) acc 50.0000 (70.8750) lr 1.9048e-03 eta 8:01:07 +epoch [9/50] batch [55/500] time 1.358 (1.376) data 0.000 (0.014) loss 2.1484 (1.1477) acc 62.5000 (71.3068) lr 1.9048e-03 eta 8:00:28 +epoch [9/50] batch [60/500] time 1.367 (1.375) data 0.000 (0.013) loss 0.7603 (1.1263) acc 71.8750 (71.4583) lr 1.9048e-03 eta 7:59:55 +epoch [9/50] batch [65/500] time 1.364 (1.374) data 0.001 (0.012) loss 1.1572 (1.1370) acc 78.1250 (71.2019) lr 1.9048e-03 eta 7:59:26 +epoch [9/50] batch [70/500] time 1.354 (1.375) data 0.000 (0.011) loss 0.8335 (1.1266) acc 78.1250 (71.3839) lr 1.9048e-03 eta 7:59:35 +epoch [9/50] batch [75/500] time 1.366 (1.374) data 0.000 (0.010) loss 1.6133 (1.1349) acc 56.2500 (71.2083) lr 1.9048e-03 eta 7:59:08 +epoch [9/50] batch [80/500] time 1.357 (1.373) data 0.000 (0.010) loss 1.1279 (1.1304) acc 68.7500 (71.0547) lr 1.9048e-03 eta 7:58:44 +epoch [9/50] batch [85/500] time 1.362 (1.372) data 0.000 (0.009) loss 1.6074 (1.1407) acc 71.8750 (70.7721) lr 1.9048e-03 eta 7:58:24 +epoch [9/50] batch [90/500] time 1.366 (1.371) data 0.000 (0.009) loss 0.8052 (1.1527) acc 75.0000 (70.6597) lr 1.9048e-03 eta 7:57:56 +epoch [9/50] batch [95/500] time 1.377 (1.371) data 0.000 (0.008) loss 0.9331 (1.1450) acc 71.8750 (70.8224) lr 1.9048e-03 eta 7:57:45 +epoch [9/50] batch [100/500] time 1.368 (1.371) data 0.000 (0.008) loss 0.8081 (1.1565) acc 75.0000 (70.7812) lr 1.9048e-03 eta 7:57:35 +epoch [9/50] batch [105/500] time 1.362 (1.370) data 0.000 (0.008) loss 1.3262 (1.1595) acc 68.7500 (70.8333) lr 1.9048e-03 eta 7:57:15 +epoch [9/50] batch [110/500] time 1.361 (1.370) data 0.000 (0.007) loss 1.4688 (1.1622) acc 68.7500 (70.7955) lr 1.9048e-03 eta 7:57:00 +epoch [9/50] batch [115/500] time 1.359 (1.371) data 0.000 (0.007) loss 1.1953 (1.1603) acc 65.6250 (70.8696) lr 1.9048e-03 eta 7:57:05 +epoch [9/50] batch [120/500] time 1.368 (1.370) data 0.000 (0.007) loss 0.6924 (1.1523) acc 81.2500 (71.1198) lr 1.9048e-03 eta 7:56:45 +epoch [9/50] batch [125/500] time 1.362 (1.369) data 0.000 (0.006) loss 0.9854 (1.1457) acc 71.8750 (71.1750) lr 1.9048e-03 eta 7:56:24 +epoch [9/50] batch [130/500] time 1.348 (1.369) data 0.000 (0.006) loss 0.7114 (1.1374) acc 78.1250 (71.4663) lr 1.9048e-03 eta 7:56:01 +epoch [9/50] batch [135/500] time 1.371 (1.368) data 0.000 (0.006) loss 1.0713 (1.1384) acc 65.6250 (71.3657) lr 1.9048e-03 eta 7:55:50 +epoch [9/50] batch [140/500] time 1.360 (1.368) data 0.000 (0.006) loss 0.8013 (1.1452) acc 78.1250 (71.3170) lr 1.9048e-03 eta 7:55:38 +epoch [9/50] batch [145/500] time 1.365 (1.368) data 0.000 (0.006) loss 2.0059 (1.1460) acc 53.1250 (71.3147) lr 1.9048e-03 eta 7:55:20 +epoch [9/50] batch [150/500] time 1.358 (1.367) data 0.000 (0.005) loss 0.9380 (1.1354) acc 71.8750 (71.5625) lr 1.9048e-03 eta 7:55:05 +epoch [9/50] batch [155/500] time 1.361 (1.367) data 0.000 (0.005) loss 1.3096 (1.1325) acc 65.6250 (71.5726) lr 1.9048e-03 eta 7:54:50 +epoch [9/50] batch [160/500] time 1.362 (1.367) data 0.000 (0.005) loss 1.1436 (1.1323) acc 68.7500 (71.5039) lr 1.9048e-03 eta 7:54:43 +epoch [9/50] batch [165/500] time 1.354 (1.367) data 0.000 (0.005) loss 1.1748 (1.1283) acc 75.0000 (71.5720) lr 1.9048e-03 eta 7:54:38 +epoch [9/50] batch [170/500] time 1.357 (1.367) data 0.000 (0.005) loss 1.1758 (1.1274) acc 59.3750 (71.5441) lr 1.9048e-03 eta 7:54:24 +epoch [9/50] batch [175/500] time 1.355 (1.366) data 0.001 (0.005) loss 0.9961 (1.1332) acc 78.1250 (71.6071) lr 1.9048e-03 eta 7:54:02 +epoch [9/50] batch [180/500] time 1.372 (1.366) data 0.000 (0.005) loss 1.2070 (1.1356) acc 59.3750 (71.4062) lr 1.9048e-03 eta 7:53:53 +epoch [9/50] batch [185/500] time 1.381 (1.366) data 0.000 (0.004) loss 1.3516 (1.1363) acc 75.0000 (71.4527) lr 1.9048e-03 eta 7:53:48 +epoch [9/50] batch [190/500] time 1.353 (1.366) data 0.004 (0.004) loss 1.2627 (1.1364) acc 75.0000 (71.5296) lr 1.9048e-03 eta 7:53:41 +epoch [9/50] batch [195/500] time 1.375 (1.366) data 0.000 (0.004) loss 0.9028 (1.1340) acc 81.2500 (71.6346) lr 1.9048e-03 eta 7:53:36 +epoch [9/50] batch [200/500] time 1.371 (1.366) data 0.000 (0.004) loss 0.6211 (1.1324) acc 75.0000 (71.6562) lr 1.9048e-03 eta 7:53:31 +epoch [9/50] batch [205/500] time 1.357 (1.366) data 0.000 (0.004) loss 0.8755 (1.1277) acc 75.0000 (71.7683) lr 1.9048e-03 eta 7:53:21 +epoch [9/50] batch [210/500] time 1.362 (1.365) data 0.000 (0.004) loss 1.2979 (1.1268) acc 56.2500 (71.7262) lr 1.9048e-03 eta 7:53:08 +epoch [9/50] batch [215/500] time 1.346 (1.366) data 0.000 (0.004) loss 1.1680 (1.1292) acc 68.7500 (71.7006) lr 1.9048e-03 eta 7:53:11 +epoch [9/50] batch [220/500] time 1.373 (1.366) data 0.000 (0.004) loss 1.3525 (1.1320) acc 65.6250 (71.5909) lr 1.9048e-03 eta 7:53:04 +epoch [9/50] batch [225/500] time 1.360 (1.366) data 0.000 (0.004) loss 1.8652 (1.1332) acc 53.1250 (71.5833) lr 1.9048e-03 eta 7:52:56 +epoch [9/50] batch [230/500] time 1.362 (1.366) data 0.000 (0.004) loss 1.6328 (1.1344) acc 56.2500 (71.5489) lr 1.9048e-03 eta 7:52:46 +epoch [9/50] batch [235/500] time 1.373 (1.366) data 0.000 (0.004) loss 0.8931 (1.1315) acc 78.1250 (71.5824) lr 1.9048e-03 eta 7:52:40 +epoch [9/50] batch [240/500] time 1.379 (1.366) data 0.000 (0.004) loss 1.0996 (1.1374) acc 71.8750 (71.5104) lr 1.9048e-03 eta 7:52:32 +epoch [9/50] batch [245/500] time 1.371 (1.366) data 0.000 (0.003) loss 0.8315 (1.1384) acc 81.2500 (71.4286) lr 1.9048e-03 eta 7:52:21 +epoch [9/50] batch [250/500] time 1.376 (1.365) data 0.001 (0.003) loss 1.3398 (1.1415) acc 75.0000 (71.3875) lr 1.9048e-03 eta 7:52:08 +epoch [9/50] batch [255/500] time 1.492 (1.366) data 0.001 (0.003) loss 1.2715 (1.1435) acc 71.8750 (71.3725) lr 1.9048e-03 eta 7:52:11 +epoch [9/50] batch [260/500] time 1.369 (1.365) data 0.000 (0.003) loss 0.9761 (1.1419) acc 68.7500 (71.4062) lr 1.9048e-03 eta 7:52:00 +epoch [9/50] batch [265/500] time 1.368 (1.365) data 0.000 (0.003) loss 1.0596 (1.1461) acc 81.2500 (71.3797) lr 1.9048e-03 eta 7:51:47 +epoch [9/50] batch [270/500] time 1.352 (1.365) data 0.000 (0.003) loss 1.0439 (1.1428) acc 65.6250 (71.3194) lr 1.9048e-03 eta 7:51:40 +epoch [9/50] batch [275/500] time 1.357 (1.365) data 0.000 (0.003) loss 1.1201 (1.1454) acc 78.1250 (71.3409) lr 1.9048e-03 eta 7:51:27 +epoch [9/50] batch [280/500] time 1.341 (1.365) data 0.000 (0.003) loss 0.8843 (1.1458) acc 78.1250 (71.3393) lr 1.9048e-03 eta 7:51:14 +epoch [9/50] batch [285/500] time 1.378 (1.365) data 0.000 (0.003) loss 1.4385 (1.1482) acc 68.7500 (71.2610) lr 1.9048e-03 eta 7:51:07 +epoch [9/50] batch [290/500] time 1.367 (1.365) data 0.000 (0.003) loss 1.3633 (1.1487) acc 62.5000 (71.2500) lr 1.9048e-03 eta 7:51:01 +epoch [9/50] batch [295/500] time 1.379 (1.365) data 0.000 (0.003) loss 1.3223 (1.1496) acc 78.1250 (71.2288) lr 1.9048e-03 eta 7:50:57 +epoch [9/50] batch [300/500] time 1.355 (1.365) data 0.000 (0.003) loss 1.0479 (1.1488) acc 71.8750 (71.2708) lr 1.9048e-03 eta 7:50:51 +epoch [9/50] batch [305/500] time 1.356 (1.365) data 0.000 (0.003) loss 1.3857 (1.1531) acc 59.3750 (71.1578) lr 1.9048e-03 eta 7:50:41 +epoch [9/50] batch [310/500] time 1.350 (1.365) data 0.000 (0.003) loss 1.4365 (1.1544) acc 65.6250 (71.1290) lr 1.9048e-03 eta 7:50:32 +epoch [9/50] batch [315/500] time 1.355 (1.364) data 0.000 (0.003) loss 0.7275 (1.1518) acc 78.1250 (71.2103) lr 1.9048e-03 eta 7:50:23 +epoch [9/50] batch [320/500] time 1.347 (1.364) data 0.000 (0.003) loss 1.8779 (1.1544) acc 40.6250 (71.1230) lr 1.9048e-03 eta 7:50:15 +epoch [9/50] batch [325/500] time 1.363 (1.364) data 0.000 (0.003) loss 1.3076 (1.1535) acc 75.0000 (71.2212) lr 1.9048e-03 eta 7:50:10 +epoch [9/50] batch [330/500] time 1.361 (1.364) data 0.000 (0.003) loss 1.3848 (1.1537) acc 65.6250 (71.2216) lr 1.9048e-03 eta 7:50:03 +epoch [9/50] batch [335/500] time 1.354 (1.364) data 0.000 (0.003) loss 1.4805 (1.1540) acc 75.0000 (71.2500) lr 1.9048e-03 eta 7:49:53 +epoch [9/50] batch [340/500] time 1.364 (1.364) data 0.000 (0.003) loss 1.1660 (1.1537) acc 71.8750 (71.2684) lr 1.9048e-03 eta 7:49:44 +epoch [9/50] batch [345/500] time 1.352 (1.364) data 0.000 (0.003) loss 1.2412 (1.1542) acc 65.6250 (71.3315) lr 1.9048e-03 eta 7:49:35 +epoch [9/50] batch [350/500] time 1.378 (1.364) data 0.000 (0.003) loss 0.8364 (1.1549) acc 71.8750 (71.2857) lr 1.9048e-03 eta 7:49:30 +epoch [9/50] batch [355/500] time 1.340 (1.365) data 0.000 (0.003) loss 1.2129 (1.1558) acc 71.8750 (71.2676) lr 1.9048e-03 eta 7:49:31 +epoch [9/50] batch [360/500] time 1.365 (1.364) data 0.000 (0.002) loss 1.1123 (1.1538) acc 68.7500 (71.3194) lr 1.9048e-03 eta 7:49:22 +epoch [9/50] batch [365/500] time 1.371 (1.365) data 0.000 (0.002) loss 1.0928 (1.1526) acc 65.6250 (71.3185) lr 1.9048e-03 eta 7:49:16 +epoch [9/50] batch [370/500] time 1.371 (1.365) data 0.001 (0.002) loss 0.9585 (1.1534) acc 75.0000 (71.3007) lr 1.9048e-03 eta 7:49:10 +epoch [9/50] batch [375/500] time 1.371 (1.365) data 0.000 (0.002) loss 1.3867 (1.1536) acc 68.7500 (71.2750) lr 1.9048e-03 eta 7:49:04 +epoch [9/50] batch [380/500] time 1.373 (1.365) data 0.000 (0.002) loss 1.3096 (1.1527) acc 62.5000 (71.2829) lr 1.9048e-03 eta 7:48:58 +epoch [9/50] batch [385/500] time 1.362 (1.364) data 0.000 (0.002) loss 1.2002 (1.1540) acc 78.1250 (71.2906) lr 1.9048e-03 eta 7:48:49 +epoch [9/50] batch [390/500] time 1.361 (1.364) data 0.000 (0.002) loss 0.8374 (1.1523) acc 75.0000 (71.3301) lr 1.9048e-03 eta 7:48:40 +epoch [9/50] batch [395/500] time 1.338 (1.364) data 0.000 (0.002) loss 1.2666 (1.1532) acc 71.8750 (71.3054) lr 1.9048e-03 eta 7:48:29 +epoch [9/50] batch [400/500] time 1.360 (1.364) data 0.000 (0.002) loss 1.1543 (1.1542) acc 68.7500 (71.3281) lr 1.9048e-03 eta 7:48:28 +epoch [9/50] batch [405/500] time 1.352 (1.364) data 0.000 (0.002) loss 1.1162 (1.1530) acc 68.7500 (71.3657) lr 1.9048e-03 eta 7:48:18 +epoch [9/50] batch [410/500] time 1.386 (1.364) data 0.000 (0.002) loss 1.4434 (1.1532) acc 62.5000 (71.3186) lr 1.9048e-03 eta 7:48:14 +epoch [9/50] batch [415/500] time 1.361 (1.364) data 0.000 (0.002) loss 0.6704 (1.1526) acc 81.2500 (71.3102) lr 1.9048e-03 eta 7:48:05 +epoch [9/50] batch [420/500] time 1.376 (1.364) data 0.000 (0.002) loss 1.1104 (1.1521) acc 65.6250 (71.3021) lr 1.9048e-03 eta 7:47:57 +epoch [9/50] batch [425/500] time 1.356 (1.364) data 0.000 (0.002) loss 0.9375 (1.1531) acc 81.2500 (71.2721) lr 1.9048e-03 eta 7:47:49 +epoch [9/50] batch [430/500] time 1.356 (1.364) data 0.000 (0.002) loss 1.4971 (1.1541) acc 68.7500 (71.2791) lr 1.9048e-03 eta 7:47:42 +epoch [9/50] batch [435/500] time 1.363 (1.364) data 0.001 (0.002) loss 1.1221 (1.1526) acc 68.7500 (71.3290) lr 1.9048e-03 eta 7:47:37 +epoch [9/50] batch [440/500] time 1.350 (1.364) data 0.000 (0.002) loss 1.9580 (1.1519) acc 59.3750 (71.3849) lr 1.9048e-03 eta 7:47:29 +epoch [9/50] batch [445/500] time 1.390 (1.364) data 0.000 (0.002) loss 1.7666 (1.1534) acc 59.3750 (71.3553) lr 1.9048e-03 eta 7:47:21 +epoch [9/50] batch [450/500] time 1.340 (1.364) data 0.000 (0.002) loss 1.1816 (1.1538) acc 65.6250 (71.3611) lr 1.9048e-03 eta 7:47:12 +epoch [9/50] batch [455/500] time 1.349 (1.364) data 0.000 (0.002) loss 1.1777 (1.1517) acc 65.6250 (71.4148) lr 1.9048e-03 eta 7:47:05 +epoch [9/50] batch [460/500] time 1.361 (1.364) data 0.000 (0.002) loss 1.4619 (1.1524) acc 75.0000 (71.4130) lr 1.9048e-03 eta 7:46:57 +epoch [9/50] batch [465/500] time 1.355 (1.364) data 0.000 (0.002) loss 1.0264 (1.1524) acc 68.7500 (71.4180) lr 1.9048e-03 eta 7:46:49 +epoch [9/50] batch [470/500] time 1.352 (1.364) data 0.000 (0.002) loss 1.1875 (1.1550) acc 71.8750 (71.3763) lr 1.9048e-03 eta 7:46:42 +epoch [9/50] batch [475/500] time 1.348 (1.364) data 0.000 (0.002) loss 1.2646 (1.1581) acc 62.5000 (71.2961) lr 1.9048e-03 eta 7:46:32 +epoch [9/50] batch [480/500] time 1.371 (1.364) data 0.000 (0.002) loss 1.1016 (1.1574) acc 65.6250 (71.2695) lr 1.9048e-03 eta 7:46:25 +epoch [9/50] batch [485/500] time 1.361 (1.364) data 0.001 (0.002) loss 1.0928 (1.1577) acc 75.0000 (71.2436) lr 1.9048e-03 eta 7:46:15 +epoch [9/50] batch [490/500] time 1.364 (1.364) data 0.000 (0.002) loss 1.0547 (1.1566) acc 75.0000 (71.2628) lr 1.9048e-03 eta 7:46:08 +epoch [9/50] batch [495/500] time 1.355 (1.364) data 0.000 (0.002) loss 1.2139 (1.1561) acc 65.6250 (71.2311) lr 1.9048e-03 eta 7:45:59 +epoch [9/50] batch [500/500] time 1.374 (1.364) data 0.000 (0.002) loss 1.0020 (1.1575) acc 68.7500 (71.1937) lr 1.8763e-03 eta 7:45:57 +epoch [10/50] batch [5/500] time 1.378 (1.547) data 0.001 (0.180) loss 0.7549 (0.9256) acc 84.3750 (77.5000) lr 1.8763e-03 eta 8:48:35 +epoch [10/50] batch [10/500] time 1.357 (1.454) data 0.000 (0.090) loss 0.9609 (1.0463) acc 78.1250 (75.3125) lr 1.8763e-03 eta 8:16:38 +epoch [10/50] batch [15/500] time 1.352 (1.420) data 0.000 (0.060) loss 1.0566 (1.0711) acc 75.0000 (74.3750) lr 1.8763e-03 eta 8:04:41 +epoch [10/50] batch [20/500] time 1.368 (1.405) data 0.001 (0.045) loss 0.9590 (1.1387) acc 65.6250 (71.4062) lr 1.8763e-03 eta 7:59:36 +epoch [10/50] batch [25/500] time 1.349 (1.396) data 0.000 (0.036) loss 0.8833 (1.0930) acc 62.5000 (71.5000) lr 1.8763e-03 eta 7:56:14 +epoch [10/50] batch [30/500] time 1.368 (1.389) data 0.001 (0.030) loss 1.1904 (1.1002) acc 78.1250 (72.3958) lr 1.8763e-03 eta 7:53:49 +epoch [10/50] batch [35/500] time 1.361 (1.383) data 0.000 (0.026) loss 1.0381 (1.1039) acc 78.1250 (72.4107) lr 1.8763e-03 eta 7:51:45 +epoch [10/50] batch [40/500] time 1.348 (1.380) data 0.000 (0.023) loss 0.7173 (1.1030) acc 75.0000 (72.4219) lr 1.8763e-03 eta 7:50:39 +epoch [10/50] batch [45/500] time 1.344 (1.377) data 0.000 (0.020) loss 0.8765 (1.1165) acc 81.2500 (72.5000) lr 1.8763e-03 eta 7:49:32 +epoch [10/50] batch [50/500] time 1.355 (1.375) data 0.000 (0.018) loss 0.7109 (1.1104) acc 78.1250 (72.3750) lr 1.8763e-03 eta 7:48:46 +epoch [10/50] batch [55/500] time 1.372 (1.374) data 0.000 (0.017) loss 1.3926 (1.1206) acc 68.7500 (72.4432) lr 1.8763e-03 eta 7:48:06 +epoch [10/50] batch [60/500] time 1.360 (1.375) data 0.000 (0.015) loss 1.0068 (1.1248) acc 68.7500 (72.1875) lr 1.8763e-03 eta 7:48:17 +epoch [10/50] batch [65/500] time 1.368 (1.373) data 0.000 (0.014) loss 1.3389 (1.1361) acc 65.6250 (71.7788) lr 1.8763e-03 eta 7:47:45 +epoch [10/50] batch [70/500] time 1.360 (1.372) data 0.000 (0.013) loss 1.4912 (1.1279) acc 62.5000 (71.8750) lr 1.8763e-03 eta 7:47:16 +epoch [10/50] batch [75/500] time 1.370 (1.371) data 0.000 (0.012) loss 1.4863 (1.1314) acc 68.7500 (71.7500) lr 1.8763e-03 eta 7:46:36 +epoch [10/50] batch [80/500] time 1.375 (1.371) data 0.000 (0.012) loss 1.0859 (1.1342) acc 78.1250 (71.7188) lr 1.8763e-03 eta 7:46:28 +epoch [10/50] batch [85/500] time 1.378 (1.370) data 0.000 (0.011) loss 1.1426 (1.1376) acc 75.0000 (71.5809) lr 1.8763e-03 eta 7:46:09 +epoch [10/50] batch [90/500] time 1.350 (1.369) data 0.000 (0.010) loss 0.9824 (1.1463) acc 71.8750 (71.5278) lr 1.8763e-03 eta 7:45:48 +epoch [10/50] batch [95/500] time 1.336 (1.369) data 0.000 (0.010) loss 1.5186 (1.1538) acc 75.0000 (71.4474) lr 1.8763e-03 eta 7:45:27 +epoch [10/50] batch [100/500] time 1.342 (1.369) data 0.000 (0.009) loss 0.9312 (1.1540) acc 75.0000 (71.4688) lr 1.8763e-03 eta 7:45:33 +epoch [10/50] batch [105/500] time 1.360 (1.369) data 0.000 (0.009) loss 1.4414 (1.1549) acc 62.5000 (71.3095) lr 1.8763e-03 eta 7:45:16 +epoch [10/50] batch [110/500] time 1.386 (1.369) data 0.000 (0.009) loss 1.8193 (1.1588) acc 59.3750 (71.1932) lr 1.8763e-03 eta 7:45:06 +epoch [10/50] batch [115/500] time 1.356 (1.368) data 0.000 (0.008) loss 1.0068 (1.1559) acc 68.7500 (71.2228) lr 1.8763e-03 eta 7:44:54 +epoch [10/50] batch [120/500] time 1.362 (1.368) data 0.000 (0.008) loss 1.3105 (1.1684) acc 71.8750 (71.1458) lr 1.8763e-03 eta 7:44:38 +epoch [10/50] batch [125/500] time 1.362 (1.367) data 0.000 (0.008) loss 0.5796 (1.1651) acc 81.2500 (71.1750) lr 1.8763e-03 eta 7:44:14 +epoch [10/50] batch [130/500] time 1.355 (1.367) data 0.000 (0.007) loss 1.6494 (1.1684) acc 56.2500 (70.9856) lr 1.8763e-03 eta 7:43:58 +epoch [10/50] batch [135/500] time 1.349 (1.366) data 0.000 (0.007) loss 1.0352 (1.1766) acc 81.2500 (70.8565) lr 1.8763e-03 eta 7:43:43 +epoch [10/50] batch [140/500] time 1.368 (1.366) data 0.000 (0.007) loss 1.4199 (1.1757) acc 59.3750 (70.7589) lr 1.8763e-03 eta 7:43:32 +epoch [10/50] batch [145/500] time 1.347 (1.366) data 0.000 (0.007) loss 1.2549 (1.1751) acc 78.1250 (70.9267) lr 1.8763e-03 eta 7:43:15 +epoch [10/50] batch [150/500] time 1.385 (1.365) data 0.000 (0.006) loss 1.6045 (1.1766) acc 68.7500 (70.8958) lr 1.8763e-03 eta 7:43:07 +epoch [10/50] batch [155/500] time 1.349 (1.365) data 0.000 (0.006) loss 0.7754 (1.1807) acc 78.1250 (70.8065) lr 1.8763e-03 eta 7:42:52 +epoch [10/50] batch [160/500] time 1.363 (1.365) data 0.000 (0.006) loss 1.2061 (1.1798) acc 68.7500 (70.8984) lr 1.8763e-03 eta 7:42:50 +epoch [10/50] batch [165/500] time 1.365 (1.365) data 0.000 (0.006) loss 2.0039 (1.1878) acc 56.2500 (70.7765) lr 1.8763e-03 eta 7:42:37 +epoch [10/50] batch [170/500] time 1.362 (1.365) data 0.001 (0.006) loss 0.7134 (1.1861) acc 87.5000 (70.7904) lr 1.8763e-03 eta 7:42:32 +epoch [10/50] batch [175/500] time 1.361 (1.365) data 0.000 (0.005) loss 0.7593 (1.1858) acc 78.1250 (70.7500) lr 1.8763e-03 eta 7:42:22 +epoch [10/50] batch [180/500] time 1.351 (1.365) data 0.000 (0.005) loss 0.7036 (1.1800) acc 78.1250 (70.7639) lr 1.8763e-03 eta 7:42:13 +epoch [10/50] batch [185/500] time 1.376 (1.365) data 0.000 (0.005) loss 1.1562 (1.1799) acc 75.0000 (70.7095) lr 1.8763e-03 eta 7:42:06 +epoch [10/50] batch [190/500] time 1.372 (1.365) data 0.000 (0.005) loss 1.2549 (1.1778) acc 78.1250 (70.7401) lr 1.8763e-03 eta 7:42:01 +epoch [10/50] batch [195/500] time 1.352 (1.365) data 0.000 (0.005) loss 1.5742 (1.1765) acc 50.0000 (70.6731) lr 1.8763e-03 eta 7:41:53 +epoch [10/50] batch [200/500] time 1.360 (1.366) data 0.000 (0.005) loss 0.9868 (1.1744) acc 71.8750 (70.7031) lr 1.8763e-03 eta 7:42:01 +epoch [10/50] batch [205/500] time 1.361 (1.366) data 0.000 (0.005) loss 1.0244 (1.1722) acc 68.7500 (70.6250) lr 1.8763e-03 eta 7:41:53 +epoch [10/50] batch [210/500] time 1.357 (1.365) data 0.000 (0.005) loss 1.2461 (1.1757) acc 71.8750 (70.6101) lr 1.8763e-03 eta 7:41:40 +epoch [10/50] batch [215/500] time 1.364 (1.365) data 0.000 (0.005) loss 1.2637 (1.1738) acc 62.5000 (70.5378) lr 1.8763e-03 eta 7:41:31 +epoch [10/50] batch [220/500] time 1.354 (1.365) data 0.000 (0.004) loss 1.2822 (1.1745) acc 62.5000 (70.5114) lr 1.8763e-03 eta 7:41:21 +epoch [10/50] batch [225/500] time 1.382 (1.365) data 0.000 (0.004) loss 0.6968 (1.1709) acc 81.2500 (70.6111) lr 1.8763e-03 eta 7:41:12 +epoch [10/50] batch [230/500] time 1.355 (1.365) data 0.000 (0.004) loss 1.4150 (1.1662) acc 65.6250 (70.6658) lr 1.8763e-03 eta 7:41:05 +epoch [10/50] batch [235/500] time 1.358 (1.365) data 0.000 (0.004) loss 1.0459 (1.1715) acc 78.1250 (70.5851) lr 1.8763e-03 eta 7:40:53 +epoch [10/50] batch [240/500] time 1.504 (1.365) data 0.000 (0.004) loss 1.7578 (1.1773) acc 71.8750 (70.5599) lr 1.8763e-03 eta 7:40:55 +epoch [10/50] batch [245/500] time 1.355 (1.365) data 0.000 (0.004) loss 2.2324 (1.1840) acc 62.5000 (70.4719) lr 1.8763e-03 eta 7:40:47 +epoch [10/50] batch [250/500] time 1.368 (1.365) data 0.000 (0.004) loss 1.2295 (1.1790) acc 62.5000 (70.4750) lr 1.8763e-03 eta 7:40:37 +epoch [10/50] batch [255/500] time 1.349 (1.365) data 0.000 (0.004) loss 1.5254 (1.1798) acc 75.0000 (70.5392) lr 1.8763e-03 eta 7:40:28 +epoch [10/50] batch [260/500] time 1.363 (1.365) data 0.000 (0.004) loss 1.2158 (1.1815) acc 71.8750 (70.5889) lr 1.8763e-03 eta 7:40:19 +epoch [10/50] batch [265/500] time 1.372 (1.365) data 0.000 (0.004) loss 1.1689 (1.1801) acc 71.8750 (70.6604) lr 1.8763e-03 eta 7:40:10 +epoch [10/50] batch [270/500] time 1.373 (1.364) data 0.000 (0.004) loss 1.5605 (1.1782) acc 59.3750 (70.6481) lr 1.8763e-03 eta 7:40:03 +epoch [10/50] batch [275/500] time 1.344 (1.364) data 0.000 (0.004) loss 1.0146 (1.1773) acc 75.0000 (70.7159) lr 1.8763e-03 eta 7:39:54 +epoch [10/50] batch [280/500] time 1.372 (1.364) data 0.000 (0.004) loss 1.6924 (1.1813) acc 59.3750 (70.6473) lr 1.8763e-03 eta 7:39:44 +epoch [10/50] batch [285/500] time 1.352 (1.364) data 0.000 (0.004) loss 0.8423 (1.1788) acc 78.1250 (70.7127) lr 1.8763e-03 eta 7:39:36 +epoch [10/50] batch [290/500] time 1.360 (1.364) data 0.000 (0.003) loss 0.8560 (1.1767) acc 75.0000 (70.7543) lr 1.8763e-03 eta 7:39:26 +epoch [10/50] batch [295/500] time 1.366 (1.364) data 0.000 (0.003) loss 1.2246 (1.1780) acc 75.0000 (70.7521) lr 1.8763e-03 eta 7:39:17 +epoch [10/50] batch [300/500] time 1.374 (1.364) data 0.000 (0.003) loss 1.3223 (1.1783) acc 62.5000 (70.7500) lr 1.8763e-03 eta 7:39:10 +epoch [10/50] batch [305/500] time 1.332 (1.364) data 0.000 (0.003) loss 1.3770 (1.1823) acc 68.7500 (70.7172) lr 1.8763e-03 eta 7:39:01 +epoch [10/50] batch [310/500] time 1.357 (1.364) data 0.000 (0.003) loss 1.0234 (1.1793) acc 68.7500 (70.7460) lr 1.8763e-03 eta 7:38:50 +epoch [10/50] batch [315/500] time 1.354 (1.363) data 0.001 (0.003) loss 1.2578 (1.1776) acc 65.6250 (70.7639) lr 1.8763e-03 eta 7:38:41 +epoch [10/50] batch [320/500] time 1.353 (1.363) data 0.000 (0.003) loss 1.1396 (1.1797) acc 68.7500 (70.6934) lr 1.8763e-03 eta 7:38:34 +epoch [10/50] batch [325/500] time 1.351 (1.363) data 0.000 (0.003) loss 0.9058 (1.1780) acc 71.8750 (70.7212) lr 1.8763e-03 eta 7:38:22 +epoch [10/50] batch [330/500] time 1.345 (1.363) data 0.000 (0.003) loss 0.5195 (1.1731) acc 93.7500 (70.8333) lr 1.8763e-03 eta 7:38:11 +epoch [10/50] batch [335/500] time 1.336 (1.363) data 0.000 (0.003) loss 1.0137 (1.1726) acc 78.1250 (70.8862) lr 1.8763e-03 eta 7:38:04 +epoch [10/50] batch [340/500] time 1.378 (1.364) data 0.000 (0.003) loss 1.3711 (1.1759) acc 75.0000 (70.8732) lr 1.8763e-03 eta 7:38:10 +epoch [10/50] batch [345/500] time 1.354 (1.364) data 0.000 (0.003) loss 0.8755 (1.1750) acc 81.2500 (70.8877) lr 1.8763e-03 eta 7:38:03 +epoch [10/50] batch [350/500] time 1.355 (1.364) data 0.000 (0.003) loss 1.3477 (1.1769) acc 62.5000 (70.8839) lr 1.8763e-03 eta 7:37:56 +epoch [10/50] batch [355/500] time 1.376 (1.364) data 0.000 (0.003) loss 1.1230 (1.1775) acc 71.8750 (70.9419) lr 1.8763e-03 eta 7:37:49 +epoch [10/50] batch [360/500] time 1.349 (1.363) data 0.000 (0.003) loss 0.9771 (1.1785) acc 78.1250 (70.9201) lr 1.8763e-03 eta 7:37:38 +epoch [10/50] batch [365/500] time 1.356 (1.363) data 0.000 (0.003) loss 1.0967 (1.1793) acc 71.8750 (70.9760) lr 1.8763e-03 eta 7:37:30 +epoch [10/50] batch [370/500] time 1.375 (1.363) data 0.000 (0.003) loss 1.2168 (1.1773) acc 75.0000 (71.0304) lr 1.8763e-03 eta 7:37:22 +epoch [10/50] batch [375/500] time 1.360 (1.363) data 0.000 (0.003) loss 1.3125 (1.1800) acc 75.0000 (71.0250) lr 1.8763e-03 eta 7:37:13 +epoch [10/50] batch [380/500] time 1.356 (1.363) data 0.000 (0.003) loss 1.0840 (1.1792) acc 75.0000 (71.0197) lr 1.8763e-03 eta 7:37:07 +epoch [10/50] batch [385/500] time 1.358 (1.363) data 0.000 (0.003) loss 1.0215 (1.1800) acc 68.7500 (70.9578) lr 1.8763e-03 eta 7:37:06 +epoch [10/50] batch [390/500] time 1.354 (1.363) data 0.000 (0.003) loss 0.6157 (1.1779) acc 84.3750 (71.0256) lr 1.8763e-03 eta 7:36:58 +epoch [10/50] batch [395/500] time 1.373 (1.363) data 0.000 (0.003) loss 0.6416 (1.1734) acc 81.2500 (71.0839) lr 1.8763e-03 eta 7:36:51 +epoch [10/50] batch [400/500] time 1.337 (1.363) data 0.000 (0.003) loss 0.6763 (1.1694) acc 78.1250 (71.1484) lr 1.8763e-03 eta 7:36:42 +epoch [10/50] batch [405/500] time 1.362 (1.363) data 0.000 (0.003) loss 1.1006 (1.1698) acc 78.1250 (71.1343) lr 1.8763e-03 eta 7:36:34 +epoch [10/50] batch [410/500] time 1.355 (1.363) data 0.000 (0.003) loss 0.6763 (1.1696) acc 78.1250 (71.1357) lr 1.8763e-03 eta 7:36:30 +epoch [10/50] batch [415/500] time 1.352 (1.363) data 0.000 (0.003) loss 1.1211 (1.1686) acc 71.8750 (71.1596) lr 1.8763e-03 eta 7:36:21 +epoch [10/50] batch [420/500] time 1.349 (1.363) data 0.000 (0.002) loss 1.1240 (1.1675) acc 81.2500 (71.1979) lr 1.8763e-03 eta 7:36:13 +epoch [10/50] batch [425/500] time 1.352 (1.363) data 0.000 (0.002) loss 0.8350 (1.1672) acc 71.8750 (71.1912) lr 1.8763e-03 eta 7:36:06 +epoch [10/50] batch [430/500] time 1.363 (1.363) data 0.000 (0.002) loss 0.9243 (1.1671) acc 78.1250 (71.2209) lr 1.8763e-03 eta 7:35:56 +epoch [10/50] batch [435/500] time 1.346 (1.363) data 0.000 (0.002) loss 1.2324 (1.1662) acc 68.7500 (71.1997) lr 1.8763e-03 eta 7:35:47 +epoch [10/50] batch [440/500] time 1.344 (1.363) data 0.000 (0.002) loss 1.0801 (1.1672) acc 75.0000 (71.2216) lr 1.8763e-03 eta 7:35:37 +epoch [10/50] batch [445/500] time 1.339 (1.363) data 0.000 (0.002) loss 1.0273 (1.1636) acc 75.0000 (71.2711) lr 1.8763e-03 eta 7:35:28 +epoch [10/50] batch [450/500] time 1.372 (1.363) data 0.001 (0.002) loss 1.7412 (1.1629) acc 59.3750 (71.2639) lr 1.8763e-03 eta 7:35:21 +epoch [10/50] batch [455/500] time 1.364 (1.363) data 0.000 (0.002) loss 1.4082 (1.1662) acc 71.8750 (71.1882) lr 1.8763e-03 eta 7:35:13 +epoch [10/50] batch [460/500] time 1.374 (1.363) data 0.000 (0.002) loss 1.0479 (1.1679) acc 68.7500 (71.1481) lr 1.8763e-03 eta 7:35:07 +epoch [10/50] batch [465/500] time 1.369 (1.363) data 0.000 (0.002) loss 1.3740 (1.1671) acc 71.8750 (71.1559) lr 1.8763e-03 eta 7:35:02 +epoch [10/50] batch [470/500] time 1.361 (1.363) data 0.000 (0.002) loss 1.3994 (1.1700) acc 65.6250 (71.0838) lr 1.8763e-03 eta 7:34:55 +epoch [10/50] batch [475/500] time 1.369 (1.363) data 0.000 (0.002) loss 1.3477 (1.1725) acc 71.8750 (71.0526) lr 1.8763e-03 eta 7:34:48 +epoch [10/50] batch [480/500] time 1.368 (1.363) data 0.001 (0.002) loss 0.8608 (1.1732) acc 78.1250 (71.0547) lr 1.8763e-03 eta 7:34:40 +epoch [10/50] batch [485/500] time 1.349 (1.363) data 0.001 (0.002) loss 1.5840 (1.1737) acc 53.1250 (70.9923) lr 1.8763e-03 eta 7:34:36 +epoch [10/50] batch [490/500] time 1.373 (1.363) data 0.000 (0.002) loss 1.3643 (1.1733) acc 78.1250 (71.0077) lr 1.8763e-03 eta 7:34:30 +epoch [10/50] batch [495/500] time 1.369 (1.363) data 0.000 (0.002) loss 1.2822 (1.1740) acc 68.7500 (70.9848) lr 1.8763e-03 eta 7:34:23 +epoch [10/50] batch [500/500] time 1.354 (1.363) data 0.000 (0.002) loss 1.0234 (1.1755) acc 75.0000 (70.9875) lr 1.8443e-03 eta 7:34:13 +epoch [11/50] batch [5/500] time 1.370 (1.530) data 0.001 (0.173) loss 0.8193 (0.9979) acc 84.3750 (76.8750) lr 1.8443e-03 eta 8:30:00 +epoch [11/50] batch [10/500] time 1.363 (1.444) data 0.000 (0.087) loss 1.4785 (1.0641) acc 59.3750 (73.4375) lr 1.8443e-03 eta 8:01:00 +epoch [11/50] batch [15/500] time 1.355 (1.415) data 0.000 (0.058) loss 0.7803 (1.0170) acc 84.3750 (73.5417) lr 1.8443e-03 eta 7:51:22 +epoch [11/50] batch [20/500] time 1.366 (1.405) data 0.000 (0.043) loss 0.9707 (1.0533) acc 71.8750 (73.1250) lr 1.8443e-03 eta 7:47:55 +epoch [11/50] batch [25/500] time 1.354 (1.398) data 0.000 (0.035) loss 1.3057 (1.0742) acc 65.6250 (72.1250) lr 1.8443e-03 eta 7:45:28 +epoch [11/50] batch [30/500] time 1.360 (1.393) data 0.000 (0.029) loss 1.2041 (1.0725) acc 56.2500 (71.4583) lr 1.8443e-03 eta 7:43:28 +epoch [11/50] batch [35/500] time 1.345 (1.393) data 0.000 (0.025) loss 1.3184 (1.0775) acc 65.6250 (71.5179) lr 1.8443e-03 eta 7:43:29 +epoch [11/50] batch [40/500] time 1.370 (1.390) data 0.000 (0.022) loss 0.7788 (1.0591) acc 81.2500 (72.1094) lr 1.8443e-03 eta 7:42:14 +epoch [11/50] batch [45/500] time 1.383 (1.386) data 0.000 (0.019) loss 1.0410 (1.0535) acc 75.0000 (72.2222) lr 1.8443e-03 eta 7:41:01 +epoch [11/50] batch [50/500] time 1.353 (1.385) data 0.000 (0.018) loss 1.0215 (1.0791) acc 75.0000 (72.0000) lr 1.8443e-03 eta 7:40:21 +epoch [11/50] batch [55/500] time 1.350 (1.383) data 0.000 (0.016) loss 1.0352 (1.0658) acc 78.1250 (72.2159) lr 1.8443e-03 eta 7:39:39 +epoch [11/50] batch [60/500] time 1.349 (1.381) data 0.000 (0.015) loss 1.0244 (1.0603) acc 68.7500 (72.2917) lr 1.8443e-03 eta 7:38:56 +epoch [11/50] batch [65/500] time 1.364 (1.380) data 0.000 (0.014) loss 0.7588 (1.0540) acc 78.1250 (72.5000) lr 1.8443e-03 eta 7:38:37 +epoch [11/50] batch [70/500] time 1.357 (1.379) data 0.000 (0.013) loss 1.3730 (1.0566) acc 62.5000 (72.0536) lr 1.8443e-03 eta 7:38:12 +epoch [11/50] batch [75/500] time 1.363 (1.378) data 0.000 (0.012) loss 1.9492 (1.0681) acc 59.3750 (71.9167) lr 1.8443e-03 eta 7:37:33 +epoch [11/50] batch [80/500] time 1.394 (1.379) data 0.016 (0.011) loss 1.5215 (1.0817) acc 59.3750 (71.7969) lr 1.8443e-03 eta 7:37:54 +epoch [11/50] batch [85/500] time 1.361 (1.378) data 0.001 (0.011) loss 1.7910 (1.0975) acc 56.2500 (71.5441) lr 1.8443e-03 eta 7:37:16 +epoch [11/50] batch [90/500] time 1.364 (1.377) data 0.000 (0.010) loss 0.9312 (1.0860) acc 68.7500 (71.6667) lr 1.8443e-03 eta 7:36:48 +epoch [11/50] batch [95/500] time 1.337 (1.376) data 0.000 (0.010) loss 1.3037 (1.1007) acc 65.6250 (71.5132) lr 1.8443e-03 eta 7:36:28 +epoch [11/50] batch [100/500] time 1.346 (1.375) data 0.000 (0.009) loss 1.0947 (1.1117) acc 71.8750 (71.3750) lr 1.8443e-03 eta 7:35:56 +epoch [11/50] batch [105/500] time 1.360 (1.374) data 0.000 (0.009) loss 1.1768 (1.1102) acc 68.7500 (71.3690) lr 1.8443e-03 eta 7:35:34 +epoch [11/50] batch [110/500] time 1.361 (1.373) data 0.000 (0.008) loss 1.1475 (1.1155) acc 68.7500 (71.3352) lr 1.8443e-03 eta 7:35:16 +epoch [11/50] batch [115/500] time 1.346 (1.373) data 0.000 (0.008) loss 0.7734 (1.1145) acc 84.3750 (71.3315) lr 1.8443e-03 eta 7:34:54 +epoch [11/50] batch [120/500] time 1.349 (1.372) data 0.000 (0.008) loss 1.1289 (1.1164) acc 78.1250 (71.1979) lr 1.8443e-03 eta 7:34:33 +epoch [11/50] batch [125/500] time 1.360 (1.371) data 0.000 (0.007) loss 1.3965 (1.1166) acc 71.8750 (71.2250) lr 1.8443e-03 eta 7:34:15 +epoch [11/50] batch [130/500] time 1.345 (1.370) data 0.000 (0.007) loss 1.0244 (1.1239) acc 78.1250 (71.1779) lr 1.8443e-03 eta 7:33:50 +epoch [11/50] batch [135/500] time 1.377 (1.370) data 0.000 (0.007) loss 0.9346 (1.1250) acc 75.0000 (71.2269) lr 1.8443e-03 eta 7:33:41 +epoch [11/50] batch [140/500] time 1.375 (1.370) data 0.000 (0.007) loss 1.3301 (1.1284) acc 62.5000 (71.0714) lr 1.8443e-03 eta 7:33:25 +epoch [11/50] batch [145/500] time 1.352 (1.369) data 0.000 (0.006) loss 1.3232 (1.1280) acc 62.5000 (71.0776) lr 1.8443e-03 eta 7:33:10 +epoch [11/50] batch [150/500] time 1.358 (1.369) data 0.000 (0.006) loss 0.8604 (1.1220) acc 71.8750 (71.2292) lr 1.8443e-03 eta 7:32:52 +epoch [11/50] batch [155/500] time 1.365 (1.369) data 0.000 (0.006) loss 0.8115 (1.1170) acc 84.3750 (71.3710) lr 1.8443e-03 eta 7:32:41 +epoch [11/50] batch [160/500] time 1.354 (1.369) data 0.000 (0.006) loss 0.8809 (1.1135) acc 71.8750 (71.5039) lr 1.8443e-03 eta 7:32:31 +epoch [11/50] batch [165/500] time 1.374 (1.368) data 0.001 (0.006) loss 0.9175 (1.1138) acc 75.0000 (71.4773) lr 1.8443e-03 eta 7:32:17 +epoch [11/50] batch [170/500] time 1.371 (1.368) data 0.000 (0.005) loss 0.9741 (1.1153) acc 81.2500 (71.5257) lr 1.8443e-03 eta 7:32:05 +epoch [11/50] batch [175/500] time 1.343 (1.368) data 0.000 (0.005) loss 0.8965 (1.1113) acc 59.3750 (71.4643) lr 1.8443e-03 eta 7:31:51 +epoch [11/50] batch [180/500] time 1.347 (1.368) data 0.000 (0.005) loss 1.4590 (1.1127) acc 65.6250 (71.3715) lr 1.8443e-03 eta 7:31:59 +epoch [11/50] batch [185/500] time 1.366 (1.368) data 0.000 (0.005) loss 1.6016 (1.1110) acc 68.7500 (71.4527) lr 1.8443e-03 eta 7:31:46 +epoch [11/50] batch [190/500] time 1.348 (1.368) data 0.000 (0.005) loss 1.7393 (1.1158) acc 68.7500 (71.4967) lr 1.8443e-03 eta 7:31:36 +epoch [11/50] batch [195/500] time 1.357 (1.367) data 0.000 (0.005) loss 0.8188 (1.1134) acc 75.0000 (71.5705) lr 1.8443e-03 eta 7:31:22 +epoch [11/50] batch [200/500] time 1.375 (1.367) data 0.000 (0.005) loss 1.6777 (1.1208) acc 59.3750 (71.4531) lr 1.8443e-03 eta 7:31:14 +epoch [11/50] batch [205/500] time 1.366 (1.368) data 0.000 (0.005) loss 0.8228 (1.1215) acc 75.0000 (71.4634) lr 1.8443e-03 eta 7:31:10 +epoch [11/50] batch [210/500] time 1.357 (1.367) data 0.000 (0.005) loss 1.1387 (1.1281) acc 75.0000 (71.3988) lr 1.8443e-03 eta 7:30:56 +epoch [11/50] batch [215/500] time 1.335 (1.367) data 0.000 (0.004) loss 0.8706 (1.1250) acc 75.0000 (71.4535) lr 1.8443e-03 eta 7:30:43 +epoch [11/50] batch [220/500] time 1.364 (1.367) data 0.000 (0.004) loss 1.0391 (1.1269) acc 75.0000 (71.4489) lr 1.8443e-03 eta 7:30:32 +epoch [11/50] batch [225/500] time 1.352 (1.367) data 0.000 (0.004) loss 1.2959 (1.1301) acc 68.7500 (71.4722) lr 1.8443e-03 eta 7:30:30 +epoch [11/50] batch [230/500] time 1.347 (1.367) data 0.000 (0.004) loss 1.0752 (1.1300) acc 71.8750 (71.5353) lr 1.8443e-03 eta 7:30:22 +epoch [11/50] batch [235/500] time 1.388 (1.367) data 0.000 (0.004) loss 0.9453 (1.1280) acc 71.8750 (71.4894) lr 1.8443e-03 eta 7:30:12 +epoch [11/50] batch [240/500] time 1.354 (1.366) data 0.000 (0.004) loss 1.0381 (1.1299) acc 71.8750 (71.4062) lr 1.8443e-03 eta 7:30:01 +epoch [11/50] batch [245/500] time 1.337 (1.366) data 0.000 (0.004) loss 1.1016 (1.1304) acc 65.6250 (71.3903) lr 1.8443e-03 eta 7:29:51 +epoch [11/50] batch [250/500] time 1.371 (1.366) data 0.000 (0.004) loss 0.9956 (1.1345) acc 62.5000 (71.2750) lr 1.8443e-03 eta 7:29:39 +epoch [11/50] batch [255/500] time 1.374 (1.366) data 0.000 (0.004) loss 0.7939 (1.1354) acc 78.1250 (71.3113) lr 1.8443e-03 eta 7:29:31 +epoch [11/50] batch [260/500] time 1.379 (1.366) data 0.000 (0.004) loss 0.7417 (1.1327) acc 84.3750 (71.4423) lr 1.8443e-03 eta 7:29:21 +epoch [11/50] batch [265/500] time 1.379 (1.366) data 0.000 (0.004) loss 0.9102 (1.1316) acc 81.2500 (71.5448) lr 1.8443e-03 eta 7:29:15 +epoch [11/50] batch [270/500] time 1.348 (1.366) data 0.000 (0.004) loss 1.2217 (1.1307) acc 71.8750 (71.5625) lr 1.8443e-03 eta 7:29:04 +epoch [11/50] batch [275/500] time 1.370 (1.366) data 0.000 (0.004) loss 0.9873 (1.1284) acc 68.7500 (71.5568) lr 1.8443e-03 eta 7:28:57 +epoch [11/50] batch [280/500] time 1.368 (1.366) data 0.000 (0.003) loss 0.9111 (1.1314) acc 71.8750 (71.4732) lr 1.8443e-03 eta 7:28:49 +epoch [11/50] batch [285/500] time 1.341 (1.365) data 0.000 (0.003) loss 1.4590 (1.1337) acc 65.6250 (71.4803) lr 1.8443e-03 eta 7:28:39 +epoch [11/50] batch [290/500] time 1.397 (1.365) data 0.000 (0.003) loss 0.8188 (1.1320) acc 78.1250 (71.4655) lr 1.8443e-03 eta 7:28:33 +epoch [11/50] batch [295/500] time 1.367 (1.365) data 0.000 (0.003) loss 1.5928 (1.1402) acc 59.3750 (71.2818) lr 1.8443e-03 eta 7:28:23 +epoch [11/50] batch [300/500] time 1.339 (1.365) data 0.000 (0.003) loss 0.9829 (1.1404) acc 78.1250 (71.3333) lr 1.8443e-03 eta 7:28:11 +epoch [11/50] batch [305/500] time 1.363 (1.365) data 0.000 (0.003) loss 1.2852 (1.1403) acc 65.6250 (71.3115) lr 1.8443e-03 eta 7:28:03 +epoch [11/50] batch [310/500] time 1.366 (1.365) data 0.000 (0.003) loss 0.9951 (1.1402) acc 75.0000 (71.3508) lr 1.8443e-03 eta 7:27:53 +epoch [11/50] batch [315/500] time 1.355 (1.365) data 0.000 (0.003) loss 0.8418 (1.1391) acc 75.0000 (71.2897) lr 1.8443e-03 eta 7:27:42 +epoch [11/50] batch [320/500] time 1.460 (1.365) data 0.000 (0.003) loss 1.2471 (1.1335) acc 59.3750 (71.4258) lr 1.8443e-03 eta 7:27:40 +epoch [11/50] batch [325/500] time 1.352 (1.365) data 0.000 (0.003) loss 0.9355 (1.1331) acc 78.1250 (71.3846) lr 1.8443e-03 eta 7:27:31 +epoch [11/50] batch [330/500] time 1.363 (1.365) data 0.000 (0.003) loss 1.1074 (1.1315) acc 78.1250 (71.4110) lr 1.8443e-03 eta 7:27:22 +epoch [11/50] batch [335/500] time 1.355 (1.365) data 0.000 (0.003) loss 1.2539 (1.1329) acc 62.5000 (71.3713) lr 1.8443e-03 eta 7:27:13 +epoch [11/50] batch [340/500] time 1.343 (1.364) data 0.000 (0.003) loss 0.8198 (1.1320) acc 71.8750 (71.3051) lr 1.8443e-03 eta 7:27:03 +epoch [11/50] batch [345/500] time 1.366 (1.364) data 0.000 (0.003) loss 1.4854 (1.1304) acc 68.7500 (71.3587) lr 1.8443e-03 eta 7:26:54 +epoch [11/50] batch [350/500] time 1.370 (1.364) data 0.001 (0.003) loss 1.2031 (1.1316) acc 68.7500 (71.3214) lr 1.8443e-03 eta 7:26:44 +epoch [11/50] batch [355/500] time 1.340 (1.364) data 0.000 (0.003) loss 1.4277 (1.1339) acc 71.8750 (71.3028) lr 1.8443e-03 eta 7:26:34 +epoch [11/50] batch [360/500] time 1.372 (1.364) data 0.000 (0.003) loss 1.0469 (1.1338) acc 68.7500 (71.3108) lr 1.8443e-03 eta 7:26:26 +epoch [11/50] batch [365/500] time 1.377 (1.364) data 0.000 (0.003) loss 1.3877 (1.1344) acc 59.3750 (71.2928) lr 1.8443e-03 eta 7:26:24 +epoch [11/50] batch [370/500] time 1.390 (1.364) data 0.000 (0.003) loss 0.7490 (1.1359) acc 87.5000 (71.2922) lr 1.8443e-03 eta 7:26:17 +epoch [11/50] batch [375/500] time 1.378 (1.364) data 0.001 (0.003) loss 1.2881 (1.1351) acc 71.8750 (71.3000) lr 1.8443e-03 eta 7:26:10 +epoch [11/50] batch [380/500] time 1.358 (1.364) data 0.000 (0.003) loss 0.7373 (1.1382) acc 84.3750 (71.2911) lr 1.8443e-03 eta 7:26:02 +epoch [11/50] batch [385/500] time 1.346 (1.364) data 0.000 (0.003) loss 1.4775 (1.1382) acc 65.6250 (71.2906) lr 1.8443e-03 eta 7:25:53 +epoch [11/50] batch [390/500] time 1.367 (1.364) data 0.000 (0.003) loss 1.2539 (1.1395) acc 56.2500 (71.2500) lr 1.8443e-03 eta 7:25:46 +epoch [11/50] batch [395/500] time 1.355 (1.364) data 0.000 (0.003) loss 1.4922 (1.1397) acc 71.8750 (71.2500) lr 1.8443e-03 eta 7:25:36 +epoch [11/50] batch [400/500] time 1.352 (1.364) data 0.000 (0.003) loss 1.2598 (1.1416) acc 62.5000 (71.1875) lr 1.8443e-03 eta 7:25:28 +epoch [11/50] batch [405/500] time 1.368 (1.364) data 0.000 (0.002) loss 1.6543 (1.1436) acc 68.7500 (71.2114) lr 1.8443e-03 eta 7:25:22 +epoch [11/50] batch [410/500] time 1.357 (1.364) data 0.001 (0.002) loss 1.1377 (1.1440) acc 71.8750 (71.1814) lr 1.8443e-03 eta 7:25:14 +epoch [11/50] batch [415/500] time 1.366 (1.364) data 0.000 (0.002) loss 1.3047 (1.1438) acc 71.8750 (71.2349) lr 1.8443e-03 eta 7:25:06 +epoch [11/50] batch [420/500] time 1.354 (1.364) data 0.000 (0.002) loss 1.5205 (1.1445) acc 65.6250 (71.2202) lr 1.8443e-03 eta 7:24:58 +epoch [11/50] batch [425/500] time 1.381 (1.364) data 0.000 (0.002) loss 1.1846 (1.1425) acc 68.7500 (71.2574) lr 1.8443e-03 eta 7:24:51 +epoch [11/50] batch [430/500] time 1.380 (1.364) data 0.000 (0.002) loss 1.0234 (1.1383) acc 81.2500 (71.4462) lr 1.8443e-03 eta 7:24:44 +epoch [11/50] batch [435/500] time 1.328 (1.364) data 0.000 (0.002) loss 1.1826 (1.1392) acc 75.0000 (71.4511) lr 1.8443e-03 eta 7:24:37 +epoch [11/50] batch [440/500] time 1.358 (1.363) data 0.000 (0.002) loss 0.9609 (1.1386) acc 71.8750 (71.4915) lr 1.8443e-03 eta 7:24:27 +epoch [11/50] batch [445/500] time 1.380 (1.363) data 0.000 (0.002) loss 1.5215 (1.1395) acc 68.7500 (71.4747) lr 1.8443e-03 eta 7:24:21 +epoch [11/50] batch [450/500] time 1.365 (1.363) data 0.000 (0.002) loss 1.4258 (1.1405) acc 62.5000 (71.4514) lr 1.8443e-03 eta 7:24:15 +epoch [11/50] batch [455/500] time 1.372 (1.364) data 0.000 (0.002) loss 1.3555 (1.1431) acc 56.2500 (71.3255) lr 1.8443e-03 eta 7:24:12 +epoch [11/50] batch [460/500] time 1.362 (1.364) data 0.000 (0.002) loss 0.8716 (1.1403) acc 75.0000 (71.3587) lr 1.8443e-03 eta 7:24:06 +epoch [11/50] batch [465/500] time 1.364 (1.364) data 0.000 (0.002) loss 1.3965 (1.1430) acc 62.5000 (71.3306) lr 1.8443e-03 eta 7:24:07 +epoch [11/50] batch [470/500] time 1.339 (1.364) data 0.001 (0.002) loss 1.3594 (1.1441) acc 71.8750 (71.2899) lr 1.8443e-03 eta 7:24:00 +epoch [11/50] batch [475/500] time 1.363 (1.364) data 0.000 (0.002) loss 1.0664 (1.1467) acc 68.7500 (71.2237) lr 1.8443e-03 eta 7:23:52 +epoch [11/50] batch [480/500] time 1.356 (1.364) data 0.000 (0.002) loss 0.7979 (1.1453) acc 78.1250 (71.2240) lr 1.8443e-03 eta 7:23:46 +epoch [11/50] batch [485/500] time 1.360 (1.364) data 0.001 (0.002) loss 1.2676 (1.1458) acc 71.8750 (71.2242) lr 1.8443e-03 eta 7:23:36 +epoch [11/50] batch [490/500] time 1.354 (1.364) data 0.000 (0.002) loss 1.0303 (1.1447) acc 78.1250 (71.2564) lr 1.8443e-03 eta 7:23:27 +epoch [11/50] batch [495/500] time 1.359 (1.364) data 0.000 (0.002) loss 1.6523 (1.1469) acc 59.3750 (71.1806) lr 1.8443e-03 eta 7:23:18 +epoch [11/50] batch [500/500] time 1.341 (1.364) data 0.000 (0.002) loss 1.2754 (1.1480) acc 68.7500 (71.1500) lr 1.8090e-03 eta 7:23:11 +epoch [12/50] batch [5/500] time 1.347 (1.532) data 0.000 (0.163) loss 1.1230 (1.0367) acc 75.0000 (73.1250) lr 1.8090e-03 eta 8:17:37 +epoch [12/50] batch [10/500] time 1.566 (1.468) data 0.000 (0.082) loss 1.2236 (1.0264) acc 65.6250 (70.9375) lr 1.8090e-03 eta 7:56:53 +epoch [12/50] batch [15/500] time 1.360 (1.431) data 0.001 (0.055) loss 1.3379 (1.1494) acc 68.7500 (69.1667) lr 1.8090e-03 eta 7:44:40 +epoch [12/50] batch [20/500] time 1.352 (1.412) data 0.000 (0.041) loss 1.0898 (1.1296) acc 75.0000 (70.3125) lr 1.8090e-03 eta 7:38:20 +epoch [12/50] batch [25/500] time 1.358 (1.398) data 0.000 (0.033) loss 0.9863 (1.1824) acc 81.2500 (69.8750) lr 1.8090e-03 eta 7:33:50 +epoch [12/50] batch [30/500] time 1.374 (1.393) data 0.000 (0.028) loss 0.7881 (1.1545) acc 78.1250 (71.0417) lr 1.8090e-03 eta 7:32:01 +epoch [12/50] batch [35/500] time 1.357 (1.388) data 0.000 (0.024) loss 2.1797 (1.2045) acc 50.0000 (70.1786) lr 1.8090e-03 eta 7:30:13 +epoch [12/50] batch [40/500] time 1.354 (1.384) data 0.000 (0.021) loss 1.4531 (1.1873) acc 68.7500 (70.5469) lr 1.8090e-03 eta 7:28:57 +epoch [12/50] batch [45/500] time 1.366 (1.382) data 0.000 (0.018) loss 0.5986 (1.1727) acc 87.5000 (70.4861) lr 1.8090e-03 eta 7:27:59 +epoch [12/50] batch [50/500] time 1.362 (1.380) data 0.000 (0.017) loss 0.9517 (1.1503) acc 81.2500 (70.8750) lr 1.8090e-03 eta 7:27:14 +epoch [12/50] batch [55/500] time 1.375 (1.381) data 0.000 (0.015) loss 0.9736 (1.1602) acc 75.0000 (71.1364) lr 1.8090e-03 eta 7:27:35 +epoch [12/50] batch [60/500] time 1.347 (1.379) data 0.001 (0.014) loss 0.7256 (1.1395) acc 87.5000 (71.5104) lr 1.8090e-03 eta 7:26:56 +epoch [12/50] batch [65/500] time 1.371 (1.378) data 0.000 (0.013) loss 1.6172 (1.1574) acc 68.7500 (71.0096) lr 1.8090e-03 eta 7:26:17 +epoch [12/50] batch [70/500] time 1.367 (1.376) data 0.000 (0.012) loss 1.2217 (1.1714) acc 68.7500 (70.7589) lr 1.8090e-03 eta 7:25:42 +epoch [12/50] batch [75/500] time 1.365 (1.375) data 0.000 (0.011) loss 0.8809 (1.1619) acc 78.1250 (70.6250) lr 1.8090e-03 eta 7:25:12 +epoch [12/50] batch [80/500] time 1.343 (1.374) data 0.000 (0.011) loss 0.7363 (1.1582) acc 81.2500 (70.5469) lr 1.8090e-03 eta 7:24:40 +epoch [12/50] batch [85/500] time 1.336 (1.372) data 0.000 (0.010) loss 1.1162 (1.1492) acc 75.0000 (70.6250) lr 1.8090e-03 eta 7:24:04 +epoch [12/50] batch [90/500] time 1.361 (1.372) data 0.000 (0.009) loss 0.8584 (1.1390) acc 68.7500 (70.9028) lr 1.8090e-03 eta 7:23:43 +epoch [12/50] batch [95/500] time 1.362 (1.371) data 0.000 (0.009) loss 1.3213 (1.1432) acc 56.2500 (70.7237) lr 1.8090e-03 eta 7:23:20 +epoch [12/50] batch [100/500] time 1.371 (1.370) data 0.000 (0.009) loss 0.9453 (1.1385) acc 75.0000 (70.8125) lr 1.8090e-03 eta 7:23:03 +epoch [12/50] batch [105/500] time 1.354 (1.369) data 0.000 (0.008) loss 1.4766 (1.1483) acc 59.3750 (70.7143) lr 1.8090e-03 eta 7:22:37 +epoch [12/50] batch [110/500] time 1.345 (1.369) data 0.000 (0.008) loss 1.3018 (1.1513) acc 65.6250 (70.5682) lr 1.8090e-03 eta 7:22:17 +epoch [12/50] batch [115/500] time 1.346 (1.368) data 0.000 (0.007) loss 0.7642 (1.1404) acc 81.2500 (70.8424) lr 1.8090e-03 eta 7:21:51 +epoch [12/50] batch [120/500] time 1.348 (1.367) data 0.000 (0.007) loss 1.0605 (1.1467) acc 78.1250 (70.8594) lr 1.8090e-03 eta 7:21:33 +epoch [12/50] batch [125/500] time 1.346 (1.367) data 0.000 (0.007) loss 0.4712 (1.1348) acc 87.5000 (71.1250) lr 1.8090e-03 eta 7:21:22 +epoch [12/50] batch [130/500] time 1.362 (1.366) data 0.000 (0.007) loss 1.0225 (1.1277) acc 68.7500 (71.2740) lr 1.8090e-03 eta 7:21:06 +epoch [12/50] batch [135/500] time 1.340 (1.366) data 0.000 (0.006) loss 1.3271 (1.1306) acc 65.6250 (71.0880) lr 1.8090e-03 eta 7:20:51 +epoch [12/50] batch [140/500] time 1.337 (1.366) data 0.000 (0.006) loss 0.7573 (1.1321) acc 84.3750 (71.1607) lr 1.8090e-03 eta 7:20:42 +epoch [12/50] batch [145/500] time 1.371 (1.366) data 0.000 (0.006) loss 1.2324 (1.1350) acc 65.6250 (71.1207) lr 1.8090e-03 eta 7:20:29 +epoch [12/50] batch [150/500] time 1.480 (1.366) data 0.000 (0.006) loss 1.3291 (1.1337) acc 78.1250 (71.2500) lr 1.8090e-03 eta 7:20:36 +epoch [12/50] batch [155/500] time 1.358 (1.366) data 0.000 (0.006) loss 1.0586 (1.1358) acc 65.6250 (71.1089) lr 1.8090e-03 eta 7:20:22 +epoch [12/50] batch [160/500] time 1.340 (1.366) data 0.000 (0.005) loss 0.8999 (1.1334) acc 71.8750 (71.0156) lr 1.8090e-03 eta 7:20:09 +epoch [12/50] batch [165/500] time 1.368 (1.365) data 0.000 (0.005) loss 1.2471 (1.1374) acc 68.7500 (70.8144) lr 1.8090e-03 eta 7:20:01 +epoch [12/50] batch [170/500] time 1.363 (1.365) data 0.000 (0.005) loss 0.7896 (1.1353) acc 78.1250 (70.9007) lr 1.8090e-03 eta 7:19:53 +epoch [12/50] batch [175/500] time 1.348 (1.365) data 0.000 (0.005) loss 1.1826 (1.1368) acc 75.0000 (70.9464) lr 1.8090e-03 eta 7:19:42 +epoch [12/50] batch [180/500] time 1.372 (1.365) data 0.000 (0.005) loss 1.0000 (1.1347) acc 68.7500 (71.0069) lr 1.8090e-03 eta 7:19:37 +epoch [12/50] batch [185/500] time 1.349 (1.365) data 0.000 (0.005) loss 1.1719 (1.1307) acc 62.5000 (71.1318) lr 1.8090e-03 eta 7:19:26 +epoch [12/50] batch [190/500] time 1.353 (1.365) data 0.000 (0.005) loss 1.3818 (1.1302) acc 68.7500 (71.1020) lr 1.8090e-03 eta 7:19:12 +epoch [12/50] batch [195/500] time 1.346 (1.365) data 0.000 (0.005) loss 0.7720 (1.1296) acc 71.8750 (71.0256) lr 1.8090e-03 eta 7:19:13 +epoch [12/50] batch [200/500] time 1.341 (1.365) data 0.000 (0.004) loss 0.7334 (1.1267) acc 84.3750 (71.1094) lr 1.8090e-03 eta 7:18:58 +epoch [12/50] batch [205/500] time 1.348 (1.364) data 0.000 (0.004) loss 1.1484 (1.1336) acc 68.7500 (71.0518) lr 1.8090e-03 eta 7:18:46 +epoch [12/50] batch [210/500] time 1.356 (1.364) data 0.000 (0.004) loss 0.8979 (1.1312) acc 75.0000 (71.1161) lr 1.8090e-03 eta 7:18:37 +epoch [12/50] batch [215/500] time 1.359 (1.364) data 0.000 (0.004) loss 0.7534 (1.1287) acc 75.0000 (71.1483) lr 1.8090e-03 eta 7:18:24 +epoch [12/50] batch [220/500] time 1.363 (1.364) data 0.000 (0.004) loss 1.2930 (1.1293) acc 68.7500 (71.1364) lr 1.8090e-03 eta 7:18:12 +epoch [12/50] batch [225/500] time 1.373 (1.364) data 0.000 (0.004) loss 0.8799 (1.1269) acc 78.1250 (71.2500) lr 1.8090e-03 eta 7:18:04 +epoch [12/50] batch [230/500] time 1.353 (1.363) data 0.000 (0.004) loss 0.6875 (1.1264) acc 81.2500 (71.2500) lr 1.8090e-03 eta 7:17:50 +epoch [12/50] batch [235/500] time 1.361 (1.363) data 0.000 (0.004) loss 1.4297 (1.1300) acc 71.8750 (71.0372) lr 1.8090e-03 eta 7:17:41 +epoch [12/50] batch [240/500] time 1.352 (1.363) data 0.000 (0.004) loss 1.3945 (1.1363) acc 62.5000 (70.9766) lr 1.8090e-03 eta 7:17:31 +epoch [12/50] batch [245/500] time 1.399 (1.363) data 0.000 (0.004) loss 0.7578 (1.1416) acc 84.3750 (70.8801) lr 1.8090e-03 eta 7:17:24 +epoch [12/50] batch [250/500] time 1.359 (1.363) data 0.000 (0.004) loss 0.6548 (1.1395) acc 78.1250 (70.9375) lr 1.8090e-03 eta 7:17:15 +epoch [12/50] batch [255/500] time 1.344 (1.363) data 0.000 (0.004) loss 0.6377 (1.1382) acc 84.3750 (71.0417) lr 1.8090e-03 eta 7:17:07 +epoch [12/50] batch [260/500] time 1.351 (1.363) data 0.000 (0.003) loss 0.9678 (1.1372) acc 75.0000 (71.0817) lr 1.8090e-03 eta 7:16:57 +epoch [12/50] batch [265/500] time 1.362 (1.363) data 0.000 (0.003) loss 1.5186 (1.1397) acc 59.3750 (71.0259) lr 1.8090e-03 eta 7:16:50 +epoch [12/50] batch [270/500] time 1.386 (1.363) data 0.000 (0.003) loss 0.7915 (1.1391) acc 68.7500 (71.0301) lr 1.8090e-03 eta 7:16:49 +epoch [12/50] batch [275/500] time 1.350 (1.363) data 0.000 (0.003) loss 1.2266 (1.1417) acc 75.0000 (71.0227) lr 1.8090e-03 eta 7:16:45 +epoch [12/50] batch [280/500] time 1.365 (1.363) data 0.000 (0.003) loss 0.9424 (1.1387) acc 81.2500 (71.0714) lr 1.8090e-03 eta 7:16:37 +epoch [12/50] batch [285/500] time 1.347 (1.363) data 0.000 (0.003) loss 1.4902 (1.1421) acc 71.8750 (70.9978) lr 1.8090e-03 eta 7:16:30 +epoch [12/50] batch [290/500] time 1.350 (1.363) data 0.000 (0.003) loss 0.9688 (1.1393) acc 78.1250 (71.1315) lr 1.8090e-03 eta 7:16:22 +epoch [12/50] batch [295/500] time 1.353 (1.363) data 0.000 (0.003) loss 0.5864 (1.1387) acc 78.1250 (71.0911) lr 1.8090e-03 eta 7:16:22 +epoch [12/50] batch [300/500] time 1.361 (1.363) data 0.000 (0.003) loss 0.9663 (1.1387) acc 68.7500 (71.0625) lr 1.8090e-03 eta 7:16:16 +epoch [12/50] batch [305/500] time 1.344 (1.363) data 0.000 (0.003) loss 0.9512 (1.1366) acc 65.6250 (71.1066) lr 1.8090e-03 eta 7:16:08 +epoch [12/50] batch [310/500] time 1.353 (1.363) data 0.000 (0.003) loss 1.1523 (1.1383) acc 75.0000 (71.0383) lr 1.8090e-03 eta 7:16:03 +epoch [12/50] batch [315/500] time 1.347 (1.363) data 0.000 (0.003) loss 0.4885 (1.1384) acc 87.5000 (71.0714) lr 1.8090e-03 eta 7:15:54 +epoch [12/50] batch [320/500] time 1.390 (1.363) data 0.000 (0.003) loss 1.1748 (1.1372) acc 71.8750 (71.0938) lr 1.8090e-03 eta 7:15:50 +epoch [12/50] batch [325/500] time 1.385 (1.364) data 0.000 (0.003) loss 1.3223 (1.1316) acc 65.6250 (71.1923) lr 1.8090e-03 eta 7:15:48 +epoch [12/50] batch [330/500] time 1.364 (1.364) data 0.000 (0.003) loss 1.3252 (1.1308) acc 59.3750 (71.1458) lr 1.8090e-03 eta 7:15:42 +epoch [12/50] batch [335/500] time 1.368 (1.364) data 0.000 (0.003) loss 0.6465 (1.1318) acc 81.2500 (71.0821) lr 1.8090e-03 eta 7:15:37 +epoch [12/50] batch [340/500] time 1.355 (1.364) data 0.000 (0.003) loss 0.7886 (1.1296) acc 81.2500 (71.1489) lr 1.8090e-03 eta 7:15:33 +epoch [12/50] batch [345/500] time 1.375 (1.364) data 0.000 (0.003) loss 1.3818 (1.1291) acc 68.7500 (71.1504) lr 1.8090e-03 eta 7:15:26 +epoch [12/50] batch [350/500] time 1.347 (1.364) data 0.000 (0.003) loss 1.0703 (1.1273) acc 81.2500 (71.1875) lr 1.8090e-03 eta 7:15:18 +epoch [12/50] batch [355/500] time 1.362 (1.364) data 0.000 (0.003) loss 1.4473 (1.1257) acc 68.7500 (71.2148) lr 1.8090e-03 eta 7:15:08 +epoch [12/50] batch [360/500] time 1.336 (1.364) data 0.000 (0.003) loss 0.6860 (1.1243) acc 84.3750 (71.2240) lr 1.8090e-03 eta 7:14:57 +epoch [12/50] batch [365/500] time 1.360 (1.363) data 0.000 (0.003) loss 1.3066 (1.1288) acc 68.7500 (71.1130) lr 1.8090e-03 eta 7:14:48 +epoch [12/50] batch [370/500] time 1.372 (1.363) data 0.000 (0.003) loss 0.8032 (1.1288) acc 84.3750 (71.1149) lr 1.8090e-03 eta 7:14:42 +epoch [12/50] batch [375/500] time 1.362 (1.363) data 0.000 (0.003) loss 0.8198 (1.1276) acc 65.6250 (71.0833) lr 1.8090e-03 eta 7:14:34 +epoch [12/50] batch [380/500] time 1.374 (1.363) data 0.000 (0.002) loss 0.7183 (1.1307) acc 87.5000 (70.9868) lr 1.8090e-03 eta 7:14:26 +epoch [12/50] batch [385/500] time 1.355 (1.363) data 0.000 (0.002) loss 0.7910 (1.1287) acc 78.1250 (71.0390) lr 1.8090e-03 eta 7:14:19 +epoch [12/50] batch [390/500] time 1.362 (1.363) data 0.000 (0.002) loss 0.8867 (1.1267) acc 78.1250 (71.0897) lr 1.8090e-03 eta 7:14:12 +epoch [12/50] batch [395/500] time 1.347 (1.363) data 0.000 (0.002) loss 1.6035 (1.1315) acc 43.7500 (70.8861) lr 1.8090e-03 eta 7:14:05 +epoch [12/50] batch [400/500] time 1.339 (1.363) data 0.000 (0.002) loss 1.0225 (1.1285) acc 75.0000 (70.9531) lr 1.8090e-03 eta 7:13:55 +epoch [12/50] batch [405/500] time 1.340 (1.363) data 0.000 (0.002) loss 0.9810 (1.1271) acc 68.7500 (71.0031) lr 1.8090e-03 eta 7:13:48 +epoch [12/50] batch [410/500] time 1.341 (1.363) data 0.000 (0.002) loss 0.7671 (1.1264) acc 75.0000 (71.0213) lr 1.8090e-03 eta 7:13:36 +epoch [12/50] batch [415/500] time 1.351 (1.363) data 0.000 (0.002) loss 1.2510 (1.1287) acc 75.0000 (71.0392) lr 1.8090e-03 eta 7:13:28 +epoch [12/50] batch [420/500] time 1.362 (1.363) data 0.000 (0.002) loss 1.3672 (1.1302) acc 68.7500 (71.0342) lr 1.8090e-03 eta 7:13:20 +epoch [12/50] batch [425/500] time 1.361 (1.363) data 0.000 (0.002) loss 0.7998 (1.1277) acc 75.0000 (71.0588) lr 1.8090e-03 eta 7:13:14 +epoch [12/50] batch [430/500] time 1.326 (1.363) data 0.000 (0.002) loss 1.0127 (1.1255) acc 68.7500 (71.0901) lr 1.8090e-03 eta 7:13:03 +epoch [12/50] batch [435/500] time 1.343 (1.363) data 0.000 (0.002) loss 1.4258 (1.1265) acc 68.7500 (70.9986) lr 1.8090e-03 eta 7:12:56 +epoch [12/50] batch [440/500] time 1.349 (1.363) data 0.000 (0.002) loss 1.5820 (1.1288) acc 59.3750 (70.9801) lr 1.8090e-03 eta 7:12:53 +epoch [12/50] batch [445/500] time 1.348 (1.363) data 0.000 (0.002) loss 1.2520 (1.1287) acc 71.8750 (70.9831) lr 1.8090e-03 eta 7:12:46 +epoch [12/50] batch [450/500] time 1.368 (1.363) data 0.000 (0.002) loss 0.6265 (1.1268) acc 78.1250 (70.9722) lr 1.8090e-03 eta 7:12:40 +epoch [12/50] batch [455/500] time 1.342 (1.363) data 0.000 (0.002) loss 1.4971 (1.1281) acc 62.5000 (70.9066) lr 1.8090e-03 eta 7:12:31 +epoch [12/50] batch [460/500] time 1.375 (1.363) data 0.000 (0.002) loss 1.0791 (1.1257) acc 75.0000 (70.9375) lr 1.8090e-03 eta 7:12:23 +epoch [12/50] batch [465/500] time 1.361 (1.363) data 0.000 (0.002) loss 1.0791 (1.1282) acc 62.5000 (70.8602) lr 1.8090e-03 eta 7:12:19 +epoch [12/50] batch [470/500] time 1.357 (1.363) data 0.000 (0.002) loss 0.6934 (1.1297) acc 78.1250 (70.8444) lr 1.8090e-03 eta 7:12:11 +epoch [12/50] batch [475/500] time 1.342 (1.362) data 0.000 (0.002) loss 1.5322 (1.1307) acc 53.1250 (70.7895) lr 1.8090e-03 eta 7:12:00 +epoch [12/50] batch [480/500] time 1.491 (1.363) data 0.000 (0.002) loss 1.3809 (1.1309) acc 78.1250 (70.8268) lr 1.8090e-03 eta 7:11:58 +epoch [12/50] batch [485/500] time 1.379 (1.363) data 0.001 (0.002) loss 1.1455 (1.1314) acc 84.3750 (70.8505) lr 1.8090e-03 eta 7:11:53 +epoch [12/50] batch [490/500] time 1.366 (1.363) data 0.000 (0.002) loss 0.8726 (1.1289) acc 75.0000 (70.8801) lr 1.8090e-03 eta 7:11:46 +epoch [12/50] batch [495/500] time 1.350 (1.363) data 0.000 (0.002) loss 0.5723 (1.1300) acc 90.6250 (70.8838) lr 1.8090e-03 eta 7:11:38 +epoch [12/50] batch [500/500] time 1.352 (1.363) data 0.000 (0.002) loss 1.7832 (1.1333) acc 56.2500 (70.8125) lr 1.7705e-03 eta 7:11:31 +epoch [13/50] batch [5/500] time 1.366 (1.532) data 0.000 (0.162) loss 1.0518 (0.9411) acc 78.1250 (75.0000) lr 1.7705e-03 eta 8:05:03 +epoch [13/50] batch [10/500] time 1.365 (1.453) data 0.000 (0.081) loss 1.2031 (1.1398) acc 68.7500 (70.9375) lr 1.7705e-03 eta 7:39:43 +epoch [13/50] batch [15/500] time 1.367 (1.423) data 0.000 (0.054) loss 1.0684 (1.1714) acc 71.8750 (70.2083) lr 1.7705e-03 eta 7:30:13 +epoch [13/50] batch [20/500] time 1.361 (1.408) data 0.000 (0.041) loss 1.4082 (1.1497) acc 75.0000 (70.9375) lr 1.7705e-03 eta 7:25:21 +epoch [13/50] batch [25/500] time 1.379 (1.409) data 0.001 (0.033) loss 1.1426 (1.1346) acc 68.7500 (71.5000) lr 1.7705e-03 eta 7:25:29 +epoch [13/50] batch [30/500] time 1.351 (1.402) data 0.001 (0.027) loss 0.9092 (1.1103) acc 81.2500 (72.1875) lr 1.7705e-03 eta 7:23:07 +epoch [13/50] batch [35/500] time 1.351 (1.395) data 0.000 (0.023) loss 1.2998 (1.1115) acc 71.8750 (72.0536) lr 1.7705e-03 eta 7:20:56 +epoch [13/50] batch [40/500] time 1.340 (1.390) data 0.000 (0.021) loss 1.8779 (1.1269) acc 62.5000 (71.7188) lr 1.7705e-03 eta 7:19:05 +epoch [13/50] batch [45/500] time 1.376 (1.386) data 0.000 (0.018) loss 1.3867 (1.1243) acc 68.7500 (71.9444) lr 1.7705e-03 eta 7:17:58 +epoch [13/50] batch [50/500] time 1.351 (1.384) data 0.000 (0.016) loss 1.4141 (1.1656) acc 65.6250 (70.7500) lr 1.7705e-03 eta 7:17:00 +epoch [13/50] batch [55/500] time 1.322 (1.381) data 0.000 (0.015) loss 1.4844 (1.1683) acc 65.6250 (70.7955) lr 1.7705e-03 eta 7:16:03 +epoch [13/50] batch [60/500] time 1.353 (1.379) data 0.001 (0.014) loss 1.6250 (1.1744) acc 65.6250 (70.6250) lr 1.7705e-03 eta 7:15:10 +epoch [13/50] batch [65/500] time 1.359 (1.379) data 0.000 (0.013) loss 1.4404 (1.1637) acc 59.3750 (70.8173) lr 1.7705e-03 eta 7:15:14 +epoch [13/50] batch [70/500] time 1.352 (1.378) data 0.000 (0.012) loss 1.5332 (1.1755) acc 59.3750 (70.5357) lr 1.7705e-03 eta 7:14:37 +epoch [13/50] batch [75/500] time 1.372 (1.376) data 0.000 (0.011) loss 0.9434 (1.1803) acc 71.8750 (70.1667) lr 1.7705e-03 eta 7:14:05 +epoch [13/50] batch [80/500] time 1.359 (1.375) data 0.000 (0.010) loss 0.8521 (1.1762) acc 78.1250 (70.1953) lr 1.7705e-03 eta 7:13:36 +epoch [13/50] batch [85/500] time 1.359 (1.374) data 0.001 (0.010) loss 0.9697 (1.1749) acc 75.0000 (70.1838) lr 1.7705e-03 eta 7:13:01 +epoch [13/50] batch [90/500] time 1.355 (1.373) data 0.000 (0.009) loss 1.3623 (1.1767) acc 68.7500 (70.1042) lr 1.7705e-03 eta 7:12:37 +epoch [13/50] batch [95/500] time 1.366 (1.372) data 0.000 (0.009) loss 0.8916 (1.1733) acc 75.0000 (70.1974) lr 1.7705e-03 eta 7:12:22 +epoch [13/50] batch [100/500] time 1.351 (1.371) data 0.000 (0.008) loss 1.4854 (1.1712) acc 65.6250 (70.2812) lr 1.7705e-03 eta 7:12:00 +epoch [13/50] batch [105/500] time 1.363 (1.371) data 0.000 (0.008) loss 1.0010 (1.1840) acc 71.8750 (70.1786) lr 1.7705e-03 eta 7:11:42 +epoch [13/50] batch [110/500] time 1.338 (1.370) data 0.000 (0.008) loss 0.6040 (1.1751) acc 84.3750 (70.1989) lr 1.7705e-03 eta 7:11:24 +epoch [13/50] batch [115/500] time 1.363 (1.370) data 0.000 (0.007) loss 1.3330 (1.1772) acc 62.5000 (70.2717) lr 1.7705e-03 eta 7:11:06 +epoch [13/50] batch [120/500] time 1.359 (1.369) data 0.000 (0.007) loss 1.4658 (1.1867) acc 75.0000 (70.1042) lr 1.7705e-03 eta 7:10:54 +epoch [13/50] batch [125/500] time 1.373 (1.370) data 0.000 (0.007) loss 1.0127 (1.1862) acc 71.8750 (70.2750) lr 1.7705e-03 eta 7:11:05 +epoch [13/50] batch [130/500] time 1.350 (1.370) data 0.000 (0.007) loss 0.9556 (1.1795) acc 75.0000 (70.6010) lr 1.7705e-03 eta 7:10:50 +epoch [13/50] batch [135/500] time 1.374 (1.370) data 0.000 (0.006) loss 0.6167 (1.1780) acc 81.2500 (70.6944) lr 1.7705e-03 eta 7:10:37 +epoch [13/50] batch [140/500] time 1.348 (1.369) data 0.000 (0.006) loss 1.0156 (1.1783) acc 71.8750 (70.8259) lr 1.7705e-03 eta 7:10:19 +epoch [13/50] batch [145/500] time 1.351 (1.368) data 0.000 (0.006) loss 1.0186 (1.1728) acc 75.0000 (70.8836) lr 1.7705e-03 eta 7:10:02 +epoch [13/50] batch [150/500] time 1.364 (1.368) data 0.000 (0.006) loss 1.3125 (1.1705) acc 62.5000 (71.0000) lr 1.7705e-03 eta 7:09:43 +epoch [13/50] batch [155/500] time 1.358 (1.368) data 0.000 (0.006) loss 1.2129 (1.1686) acc 65.6250 (70.9677) lr 1.7705e-03 eta 7:09:33 +epoch [13/50] batch [160/500] time 1.383 (1.367) data 0.000 (0.005) loss 0.7778 (1.1711) acc 84.3750 (70.7227) lr 1.7705e-03 eta 7:09:20 +epoch [13/50] batch [165/500] time 1.354 (1.367) data 0.000 (0.005) loss 1.3193 (1.1764) acc 62.5000 (70.5682) lr 1.7705e-03 eta 7:09:02 +epoch [13/50] batch [170/500] time 1.353 (1.367) data 0.000 (0.005) loss 1.4502 (1.1834) acc 62.5000 (70.4412) lr 1.7705e-03 eta 7:08:59 +epoch [13/50] batch [175/500] time 1.364 (1.367) data 0.000 (0.005) loss 0.9487 (1.1819) acc 78.1250 (70.4107) lr 1.7705e-03 eta 7:08:45 +epoch [13/50] batch [180/500] time 1.358 (1.366) data 0.000 (0.005) loss 0.6421 (1.1736) acc 81.2500 (70.6076) lr 1.7705e-03 eta 7:08:32 +epoch [13/50] batch [185/500] time 1.377 (1.366) data 0.000 (0.005) loss 1.4658 (1.1738) acc 68.7500 (70.6588) lr 1.7705e-03 eta 7:08:24 +epoch [13/50] batch [190/500] time 1.365 (1.366) data 0.000 (0.005) loss 0.9287 (1.1766) acc 81.2500 (70.6579) lr 1.7705e-03 eta 7:08:13 +epoch [13/50] batch [195/500] time 1.327 (1.366) data 0.001 (0.005) loss 1.2373 (1.1814) acc 75.0000 (70.5929) lr 1.7705e-03 eta 7:08:01 +epoch [13/50] batch [200/500] time 1.375 (1.366) data 0.000 (0.004) loss 1.0518 (1.1856) acc 71.8750 (70.5000) lr 1.7705e-03 eta 7:07:59 +epoch [13/50] batch [205/500] time 1.345 (1.366) data 0.000 (0.004) loss 0.9351 (1.1869) acc 75.0000 (70.4268) lr 1.7705e-03 eta 7:07:47 +epoch [13/50] batch [210/500] time 1.357 (1.365) data 0.000 (0.004) loss 0.8467 (1.1857) acc 71.8750 (70.3869) lr 1.7705e-03 eta 7:07:36 +epoch [13/50] batch [215/500] time 1.348 (1.365) data 0.000 (0.004) loss 0.8271 (1.1839) acc 78.1250 (70.3924) lr 1.7705e-03 eta 7:07:26 +epoch [13/50] batch [220/500] time 1.369 (1.365) data 0.000 (0.004) loss 1.2754 (1.1817) acc 75.0000 (70.4119) lr 1.7705e-03 eta 7:07:23 +epoch [13/50] batch [225/500] time 1.340 (1.365) data 0.000 (0.004) loss 1.1396 (1.1820) acc 59.3750 (70.2778) lr 1.7705e-03 eta 7:07:12 +epoch [13/50] batch [230/500] time 1.371 (1.365) data 0.000 (0.004) loss 1.8379 (1.1864) acc 65.6250 (70.2446) lr 1.7705e-03 eta 7:07:08 +epoch [13/50] batch [235/500] time 1.360 (1.365) data 0.000 (0.004) loss 0.9312 (1.1841) acc 78.1250 (70.3457) lr 1.7705e-03 eta 7:07:00 +epoch [13/50] batch [240/500] time 1.357 (1.365) data 0.000 (0.004) loss 0.6338 (1.1789) acc 84.3750 (70.4948) lr 1.7705e-03 eta 7:06:49 +epoch [13/50] batch [245/500] time 1.335 (1.365) data 0.000 (0.004) loss 1.2432 (1.1790) acc 65.6250 (70.5485) lr 1.7705e-03 eta 7:06:35 +epoch [13/50] batch [250/500] time 1.350 (1.364) data 0.000 (0.004) loss 1.4277 (1.1814) acc 65.6250 (70.5500) lr 1.7705e-03 eta 7:06:23 +epoch [13/50] batch [255/500] time 1.353 (1.364) data 0.000 (0.004) loss 0.9229 (1.1812) acc 81.2500 (70.5147) lr 1.7705e-03 eta 7:06:16 +epoch [13/50] batch [260/500] time 1.353 (1.364) data 0.000 (0.003) loss 0.8535 (1.1770) acc 84.3750 (70.5889) lr 1.7705e-03 eta 7:06:04 +epoch [13/50] batch [265/500] time 1.341 (1.364) data 0.000 (0.003) loss 1.1855 (1.1756) acc 59.3750 (70.5660) lr 1.7705e-03 eta 7:05:54 +epoch [13/50] batch [270/500] time 1.343 (1.364) data 0.000 (0.003) loss 1.2402 (1.1751) acc 78.1250 (70.6250) lr 1.7705e-03 eta 7:05:53 +epoch [13/50] batch [275/500] time 1.349 (1.364) data 0.000 (0.003) loss 1.3965 (1.1752) acc 75.0000 (70.5795) lr 1.7705e-03 eta 7:05:43 +epoch [13/50] batch [280/500] time 1.345 (1.364) data 0.000 (0.003) loss 1.6260 (1.1749) acc 46.8750 (70.5246) lr 1.7705e-03 eta 7:05:33 +epoch [13/50] batch [285/500] time 1.349 (1.364) data 0.000 (0.003) loss 0.7803 (1.1755) acc 75.0000 (70.5373) lr 1.7705e-03 eta 7:05:23 +epoch [13/50] batch [290/500] time 1.384 (1.364) data 0.000 (0.003) loss 1.0127 (1.1747) acc 78.1250 (70.6466) lr 1.7705e-03 eta 7:05:16 +epoch [13/50] batch [295/500] time 1.353 (1.364) data 0.000 (0.003) loss 0.8433 (1.1748) acc 75.0000 (70.6250) lr 1.7705e-03 eta 7:05:11 +epoch [13/50] batch [300/500] time 1.368 (1.364) data 0.000 (0.003) loss 1.2012 (1.1746) acc 68.7500 (70.5729) lr 1.7705e-03 eta 7:05:04 +epoch [13/50] batch [305/500] time 1.382 (1.364) data 0.000 (0.003) loss 1.3916 (1.1752) acc 59.3750 (70.5738) lr 1.7705e-03 eta 7:04:59 +epoch [13/50] batch [310/500] time 1.493 (1.364) data 0.000 (0.003) loss 1.3291 (1.1770) acc 71.8750 (70.5847) lr 1.7705e-03 eta 7:04:58 +epoch [13/50] batch [315/500] time 1.328 (1.364) data 0.000 (0.003) loss 1.4170 (1.1774) acc 71.8750 (70.5853) lr 1.7705e-03 eta 7:04:47 +epoch [13/50] batch [320/500] time 1.358 (1.364) data 0.000 (0.003) loss 1.3447 (1.1727) acc 68.7500 (70.7227) lr 1.7705e-03 eta 7:04:38 +epoch [13/50] batch [325/500] time 1.352 (1.364) data 0.000 (0.003) loss 1.7090 (1.1737) acc 62.5000 (70.6923) lr 1.7705e-03 eta 7:04:31 +epoch [13/50] batch [330/500] time 1.362 (1.364) data 0.000 (0.003) loss 1.4355 (1.1728) acc 59.3750 (70.7292) lr 1.7705e-03 eta 7:04:22 +epoch [13/50] batch [335/500] time 1.360 (1.364) data 0.000 (0.003) loss 1.3086 (1.1723) acc 68.7500 (70.7463) lr 1.7705e-03 eta 7:04:15 +epoch [13/50] batch [340/500] time 1.342 (1.364) data 0.000 (0.003) loss 1.3799 (1.1702) acc 71.8750 (70.7904) lr 1.7705e-03 eta 7:04:05 +epoch [13/50] batch [345/500] time 1.362 (1.364) data 0.001 (0.003) loss 1.0732 (1.1691) acc 71.8750 (70.8243) lr 1.7705e-03 eta 7:03:56 +epoch [13/50] batch [350/500] time 1.359 (1.363) data 0.000 (0.003) loss 1.2002 (1.1686) acc 62.5000 (70.7589) lr 1.7705e-03 eta 7:03:46 +epoch [13/50] batch [355/500] time 1.366 (1.363) data 0.000 (0.003) loss 1.3379 (1.1677) acc 68.7500 (70.7923) lr 1.7705e-03 eta 7:03:39 +epoch [13/50] batch [360/500] time 1.364 (1.363) data 0.000 (0.003) loss 1.1582 (1.1656) acc 71.8750 (70.8767) lr 1.7705e-03 eta 7:03:29 +epoch [13/50] batch [365/500] time 1.388 (1.363) data 0.000 (0.003) loss 0.8320 (1.1686) acc 81.2500 (70.8219) lr 1.7705e-03 eta 7:03:21 +epoch [13/50] batch [370/500] time 1.329 (1.363) data 0.000 (0.003) loss 1.3408 (1.1713) acc 65.6250 (70.7348) lr 1.7705e-03 eta 7:03:14 +epoch [13/50] batch [375/500] time 1.351 (1.363) data 0.000 (0.003) loss 1.2412 (1.1742) acc 68.7500 (70.6667) lr 1.7705e-03 eta 7:03:05 +epoch [13/50] batch [380/500] time 1.341 (1.363) data 0.000 (0.003) loss 1.3594 (1.1713) acc 62.5000 (70.6908) lr 1.7705e-03 eta 7:02:56 +epoch [13/50] batch [385/500] time 1.343 (1.363) data 0.000 (0.003) loss 0.8389 (1.1720) acc 75.0000 (70.6169) lr 1.7705e-03 eta 7:02:46 +epoch [13/50] batch [390/500] time 1.358 (1.363) data 0.000 (0.002) loss 0.7017 (1.1713) acc 78.1250 (70.6250) lr 1.7705e-03 eta 7:02:38 +epoch [13/50] batch [395/500] time 1.362 (1.363) data 0.000 (0.002) loss 0.8599 (1.1646) acc 84.3750 (70.8149) lr 1.7705e-03 eta 7:02:32 +epoch [13/50] batch [400/500] time 1.353 (1.363) data 0.000 (0.002) loss 0.7656 (1.1621) acc 78.1250 (70.8750) lr 1.7705e-03 eta 7:02:25 +epoch [13/50] batch [405/500] time 1.337 (1.363) data 0.000 (0.002) loss 1.1152 (1.1604) acc 78.1250 (70.9568) lr 1.7705e-03 eta 7:02:16 +epoch [13/50] batch [410/500] time 1.338 (1.363) data 0.000 (0.002) loss 1.0068 (1.1583) acc 62.5000 (70.9604) lr 1.7705e-03 eta 7:02:12 +epoch [13/50] batch [415/500] time 1.360 (1.363) data 0.000 (0.002) loss 0.9961 (1.1576) acc 78.1250 (70.9940) lr 1.7705e-03 eta 7:02:03 +epoch [13/50] batch [420/500] time 1.348 (1.363) data 0.000 (0.002) loss 1.1494 (1.1576) acc 68.7500 (70.9896) lr 1.7705e-03 eta 7:01:56 +epoch [13/50] batch [425/500] time 1.337 (1.362) data 0.000 (0.002) loss 1.2158 (1.1599) acc 71.8750 (71.0074) lr 1.7705e-03 eta 7:01:46 +epoch [13/50] batch [430/500] time 1.380 (1.362) data 0.000 (0.002) loss 0.7871 (1.1578) acc 81.2500 (71.0465) lr 1.7705e-03 eta 7:01:39 +epoch [13/50] batch [435/500] time 1.361 (1.362) data 0.000 (0.002) loss 1.4854 (1.1594) acc 71.8750 (71.0201) lr 1.7705e-03 eta 7:01:32 +epoch [13/50] batch [440/500] time 1.338 (1.362) data 0.001 (0.002) loss 1.2412 (1.1573) acc 62.5000 (71.0369) lr 1.7705e-03 eta 7:01:25 +epoch [13/50] batch [445/500] time 1.372 (1.362) data 0.000 (0.002) loss 1.3262 (1.1579) acc 75.0000 (71.0534) lr 1.7705e-03 eta 7:01:20 +epoch [13/50] batch [450/500] time 1.370 (1.363) data 0.000 (0.002) loss 1.6230 (1.1579) acc 75.0000 (71.1111) lr 1.7705e-03 eta 7:01:15 +epoch [13/50] batch [455/500] time 1.364 (1.363) data 0.000 (0.002) loss 1.3594 (1.1586) acc 65.6250 (71.0989) lr 1.7705e-03 eta 7:01:14 +epoch [13/50] batch [460/500] time 1.345 (1.363) data 0.000 (0.002) loss 1.0811 (1.1610) acc 81.2500 (71.0258) lr 1.7705e-03 eta 7:01:04 +epoch [13/50] batch [465/500] time 1.371 (1.363) data 0.000 (0.002) loss 0.9829 (1.1600) acc 71.8750 (71.0215) lr 1.7705e-03 eta 7:00:59 +epoch [13/50] batch [470/500] time 1.359 (1.363) data 0.000 (0.002) loss 0.6733 (1.1582) acc 78.1250 (71.0372) lr 1.7705e-03 eta 7:00:52 +epoch [13/50] batch [475/500] time 1.351 (1.363) data 0.001 (0.002) loss 1.3477 (1.1577) acc 59.3750 (71.0000) lr 1.7705e-03 eta 7:00:43 +epoch [13/50] batch [480/500] time 1.363 (1.363) data 0.000 (0.002) loss 1.0264 (1.1588) acc 71.8750 (70.9896) lr 1.7705e-03 eta 7:00:37 +epoch [13/50] batch [485/500] time 1.356 (1.363) data 0.001 (0.002) loss 1.0439 (1.1565) acc 68.7500 (71.0245) lr 1.7705e-03 eta 7:00:31 +epoch [13/50] batch [490/500] time 1.353 (1.363) data 0.000 (0.002) loss 0.9468 (1.1574) acc 78.1250 (71.0013) lr 1.7705e-03 eta 7:00:23 +epoch [13/50] batch [495/500] time 1.362 (1.363) data 0.000 (0.002) loss 0.9082 (1.1567) acc 78.1250 (71.0101) lr 1.7705e-03 eta 7:00:17 +epoch [13/50] batch [500/500] time 1.345 (1.363) data 0.000 (0.002) loss 1.6299 (1.1568) acc 65.6250 (71.0375) lr 1.7290e-03 eta 7:00:08 +epoch [14/50] batch [5/500] time 1.342 (1.532) data 0.000 (0.171) loss 0.6768 (1.0694) acc 81.2500 (68.7500) lr 1.7290e-03 eta 7:52:12 +epoch [14/50] batch [10/500] time 1.344 (1.441) data 0.000 (0.086) loss 0.9653 (1.1501) acc 71.8750 (69.3750) lr 1.7290e-03 eta 7:24:12 +epoch [14/50] batch [15/500] time 1.360 (1.414) data 0.000 (0.057) loss 1.2920 (1.1634) acc 65.6250 (69.1667) lr 1.7290e-03 eta 7:15:43 +epoch [14/50] batch [20/500] time 1.341 (1.400) data 0.000 (0.043) loss 1.5527 (1.1520) acc 62.5000 (70.3125) lr 1.7290e-03 eta 7:11:18 +epoch [14/50] batch [25/500] time 1.345 (1.392) data 0.000 (0.034) loss 1.0186 (1.1622) acc 68.7500 (70.3750) lr 1.7290e-03 eta 7:08:37 +epoch [14/50] batch [30/500] time 1.358 (1.386) data 0.000 (0.029) loss 1.1768 (1.1747) acc 75.0000 (69.7917) lr 1.7290e-03 eta 7:06:41 +epoch [14/50] batch [35/500] time 1.367 (1.383) data 0.000 (0.025) loss 0.8545 (1.1630) acc 75.0000 (70.0893) lr 1.7290e-03 eta 7:05:28 +epoch [14/50] batch [40/500] time 1.332 (1.377) data 0.000 (0.022) loss 0.9849 (1.1484) acc 78.1250 (70.6250) lr 1.7290e-03 eta 7:03:36 +epoch [14/50] batch [45/500] time 1.333 (1.374) data 0.000 (0.019) loss 0.9951 (1.1483) acc 68.7500 (70.3472) lr 1.7290e-03 eta 7:02:39 +epoch [14/50] batch [50/500] time 1.383 (1.372) data 0.000 (0.017) loss 0.9604 (1.1232) acc 75.0000 (70.4375) lr 1.7290e-03 eta 7:01:50 +epoch [14/50] batch [55/500] time 1.335 (1.371) data 0.000 (0.016) loss 1.4717 (1.1386) acc 75.0000 (70.5114) lr 1.7290e-03 eta 7:01:23 +epoch [14/50] batch [60/500] time 1.379 (1.370) data 0.000 (0.015) loss 1.6436 (1.1180) acc 59.3750 (71.0417) lr 1.7290e-03 eta 7:01:09 +epoch [14/50] batch [65/500] time 1.402 (1.369) data 0.000 (0.013) loss 1.0273 (1.1202) acc 75.0000 (71.0577) lr 1.7290e-03 eta 7:00:44 +epoch [14/50] batch [70/500] time 1.360 (1.369) data 0.000 (0.013) loss 1.1416 (1.1113) acc 71.8750 (71.4286) lr 1.7290e-03 eta 7:00:28 +epoch [14/50] batch [75/500] time 1.356 (1.368) data 0.000 (0.012) loss 1.2344 (1.1064) acc 68.7500 (71.5417) lr 1.7290e-03 eta 7:00:08 +epoch [14/50] batch [80/500] time 1.353 (1.367) data 0.000 (0.011) loss 1.3164 (1.1070) acc 71.8750 (71.4844) lr 1.7290e-03 eta 6:59:38 +epoch [14/50] batch [85/500] time 1.353 (1.366) data 0.000 (0.010) loss 1.0371 (1.1032) acc 71.8750 (71.4706) lr 1.7290e-03 eta 6:59:15 +epoch [14/50] batch [90/500] time 1.350 (1.365) data 0.000 (0.010) loss 1.3711 (1.1058) acc 68.7500 (71.5972) lr 1.7290e-03 eta 6:58:46 +epoch [14/50] batch [95/500] time 1.360 (1.366) data 0.000 (0.009) loss 1.4590 (1.1127) acc 62.5000 (71.6447) lr 1.7290e-03 eta 6:58:56 +epoch [14/50] batch [100/500] time 1.371 (1.366) data 0.000 (0.009) loss 1.4883 (1.1261) acc 53.1250 (71.3438) lr 1.7290e-03 eta 6:58:47 +epoch [14/50] batch [105/500] time 1.371 (1.365) data 0.000 (0.008) loss 0.7422 (1.1188) acc 81.2500 (71.5476) lr 1.7290e-03 eta 6:58:37 +epoch [14/50] batch [110/500] time 1.358 (1.365) data 0.001 (0.008) loss 0.9941 (1.1229) acc 78.1250 (71.5057) lr 1.7290e-03 eta 6:58:27 +epoch [14/50] batch [115/500] time 1.378 (1.366) data 0.001 (0.008) loss 1.1475 (1.1228) acc 75.0000 (71.5217) lr 1.7290e-03 eta 6:58:24 +epoch [14/50] batch [120/500] time 1.374 (1.365) data 0.000 (0.007) loss 1.0654 (1.1169) acc 68.7500 (71.5365) lr 1.7290e-03 eta 6:58:09 +epoch [14/50] batch [125/500] time 1.355 (1.365) data 0.000 (0.007) loss 1.0723 (1.1129) acc 71.8750 (71.5750) lr 1.7290e-03 eta 6:57:59 +epoch [14/50] batch [130/500] time 1.367 (1.365) data 0.000 (0.007) loss 0.8511 (1.1119) acc 75.0000 (71.5625) lr 1.7290e-03 eta 6:57:57 +epoch [14/50] batch [135/500] time 1.357 (1.365) data 0.000 (0.007) loss 1.5088 (1.1144) acc 56.2500 (71.5509) lr 1.7290e-03 eta 6:57:42 +epoch [14/50] batch [140/500] time 1.353 (1.366) data 0.000 (0.006) loss 0.7266 (1.1126) acc 68.7500 (71.5402) lr 1.7290e-03 eta 6:57:53 +epoch [14/50] batch [145/500] time 1.349 (1.365) data 0.000 (0.006) loss 1.2900 (1.1103) acc 71.8750 (71.8103) lr 1.7290e-03 eta 6:57:43 +epoch [14/50] batch [150/500] time 1.349 (1.365) data 0.000 (0.006) loss 1.3340 (1.1089) acc 65.6250 (71.8958) lr 1.7290e-03 eta 6:57:35 +epoch [14/50] batch [155/500] time 1.358 (1.366) data 0.000 (0.006) loss 1.0117 (1.1066) acc 71.8750 (72.0363) lr 1.7290e-03 eta 6:57:31 +epoch [14/50] batch [160/500] time 1.381 (1.365) data 0.000 (0.006) loss 0.8862 (1.1029) acc 78.1250 (72.1289) lr 1.7290e-03 eta 6:57:22 +epoch [14/50] batch [165/500] time 1.374 (1.365) data 0.000 (0.006) loss 1.3877 (1.1020) acc 71.8750 (72.2538) lr 1.7290e-03 eta 6:57:09 +epoch [14/50] batch [170/500] time 1.378 (1.365) data 0.000 (0.005) loss 1.3994 (1.1051) acc 71.8750 (72.1324) lr 1.7290e-03 eta 6:57:02 +epoch [14/50] batch [175/500] time 1.364 (1.365) data 0.000 (0.005) loss 0.8364 (1.1052) acc 81.2500 (72.1607) lr 1.7290e-03 eta 6:56:54 +epoch [14/50] batch [180/500] time 1.378 (1.365) data 0.000 (0.005) loss 1.6064 (1.1074) acc 68.7500 (72.2396) lr 1.7290e-03 eta 6:56:47 +epoch [14/50] batch [185/500] time 1.375 (1.365) data 0.000 (0.005) loss 0.9722 (1.1084) acc 65.6250 (72.1453) lr 1.7290e-03 eta 6:56:38 +epoch [14/50] batch [190/500] time 1.365 (1.365) data 0.000 (0.005) loss 1.0342 (1.1100) acc 71.8750 (72.0888) lr 1.7290e-03 eta 6:56:31 +epoch [14/50] batch [195/500] time 1.365 (1.365) data 0.000 (0.005) loss 1.4316 (1.1143) acc 65.6250 (71.9712) lr 1.7290e-03 eta 6:56:24 +epoch [14/50] batch [200/500] time 1.351 (1.365) data 0.000 (0.005) loss 1.0361 (1.1063) acc 71.8750 (72.0625) lr 1.7290e-03 eta 6:56:16 +epoch [14/50] batch [205/500] time 1.390 (1.365) data 0.000 (0.005) loss 1.6992 (1.1062) acc 59.3750 (72.0732) lr 1.7290e-03 eta 6:56:15 +epoch [14/50] batch [210/500] time 1.372 (1.365) data 0.000 (0.004) loss 1.5742 (1.1056) acc 65.6250 (72.0685) lr 1.7290e-03 eta 6:56:12 +epoch [14/50] batch [215/500] time 1.366 (1.365) data 0.000 (0.004) loss 1.2627 (1.1068) acc 68.7500 (72.0349) lr 1.7290e-03 eta 6:56:01 +epoch [14/50] batch [220/500] time 1.350 (1.365) data 0.000 (0.004) loss 1.0586 (1.1057) acc 71.8750 (71.9886) lr 1.7290e-03 eta 6:55:52 +epoch [14/50] batch [225/500] time 1.354 (1.365) data 0.000 (0.004) loss 1.4795 (1.1070) acc 71.8750 (71.9444) lr 1.7290e-03 eta 6:55:45 +epoch [14/50] batch [230/500] time 1.351 (1.365) data 0.000 (0.004) loss 1.1074 (1.1113) acc 62.5000 (71.7799) lr 1.7290e-03 eta 6:55:33 +epoch [14/50] batch [235/500] time 1.487 (1.365) data 0.000 (0.004) loss 0.6909 (1.1113) acc 81.2500 (71.7819) lr 1.7290e-03 eta 6:55:35 +epoch [14/50] batch [240/500] time 1.359 (1.365) data 0.000 (0.004) loss 1.4912 (1.1069) acc 59.3750 (71.8620) lr 1.7290e-03 eta 6:55:24 +epoch [14/50] batch [245/500] time 1.363 (1.365) data 0.000 (0.004) loss 0.8760 (1.1060) acc 75.0000 (71.8622) lr 1.7290e-03 eta 6:55:18 +epoch [14/50] batch [250/500] time 1.370 (1.365) data 0.000 (0.004) loss 1.5205 (1.1158) acc 65.6250 (71.7000) lr 1.7290e-03 eta 6:55:09 +epoch [14/50] batch [255/500] time 1.388 (1.365) data 0.000 (0.004) loss 1.2568 (1.1132) acc 71.8750 (71.6912) lr 1.7290e-03 eta 6:55:05 +epoch [14/50] batch [260/500] time 1.356 (1.365) data 0.000 (0.004) loss 1.6631 (1.1143) acc 68.7500 (71.6947) lr 1.7290e-03 eta 6:55:01 +epoch [14/50] batch [265/500] time 1.367 (1.365) data 0.000 (0.004) loss 1.5723 (1.1123) acc 53.1250 (71.6509) lr 1.7290e-03 eta 6:54:53 +epoch [14/50] batch [270/500] time 1.343 (1.365) data 0.000 (0.004) loss 0.7944 (1.1090) acc 75.0000 (71.7245) lr 1.7290e-03 eta 6:54:43 +epoch [14/50] batch [275/500] time 1.358 (1.365) data 0.000 (0.003) loss 1.2705 (1.1111) acc 75.0000 (71.7045) lr 1.7290e-03 eta 6:54:32 +epoch [14/50] batch [280/500] time 1.353 (1.365) data 0.000 (0.003) loss 1.2051 (1.1120) acc 65.6250 (71.7076) lr 1.7290e-03 eta 6:54:30 +epoch [14/50] batch [285/500] time 1.353 (1.365) data 0.001 (0.003) loss 1.2588 (1.1118) acc 78.1250 (71.6996) lr 1.7290e-03 eta 6:54:21 +epoch [14/50] batch [290/500] time 1.364 (1.365) data 0.001 (0.003) loss 1.2275 (1.1140) acc 65.6250 (71.6056) lr 1.7290e-03 eta 6:54:12 +epoch [14/50] batch [295/500] time 1.362 (1.365) data 0.000 (0.003) loss 1.4414 (1.1128) acc 71.8750 (71.6419) lr 1.7290e-03 eta 6:54:02 +epoch [14/50] batch [300/500] time 1.355 (1.364) data 0.000 (0.003) loss 1.5918 (1.1167) acc 59.3750 (71.5521) lr 1.7290e-03 eta 6:53:53 +epoch [14/50] batch [305/500] time 1.357 (1.364) data 0.000 (0.003) loss 1.1650 (1.1165) acc 68.7500 (71.5676) lr 1.7290e-03 eta 6:53:46 +epoch [14/50] batch [310/500] time 1.341 (1.364) data 0.000 (0.003) loss 0.8530 (1.1141) acc 78.1250 (71.6532) lr 1.7290e-03 eta 6:53:37 +epoch [14/50] batch [315/500] time 1.370 (1.364) data 0.000 (0.003) loss 1.1553 (1.1160) acc 68.7500 (71.6468) lr 1.7290e-03 eta 6:53:32 +epoch [14/50] batch [320/500] time 1.395 (1.364) data 0.000 (0.003) loss 1.1582 (1.1191) acc 71.8750 (71.6113) lr 1.7290e-03 eta 6:53:24 +epoch [14/50] batch [325/500] time 1.367 (1.364) data 0.000 (0.003) loss 1.1934 (1.1225) acc 68.7500 (71.5385) lr 1.7290e-03 eta 6:53:19 +epoch [14/50] batch [330/500] time 1.343 (1.364) data 0.000 (0.003) loss 1.4658 (1.1227) acc 65.6250 (71.5530) lr 1.7290e-03 eta 6:53:10 +epoch [14/50] batch [335/500] time 1.354 (1.364) data 0.000 (0.003) loss 1.0283 (1.1275) acc 75.0000 (71.4366) lr 1.7290e-03 eta 6:53:01 +epoch [14/50] batch [340/500] time 1.351 (1.364) data 0.000 (0.003) loss 1.1523 (1.1279) acc 71.8750 (71.4706) lr 1.7290e-03 eta 6:52:51 +epoch [14/50] batch [345/500] time 1.360 (1.364) data 0.000 (0.003) loss 1.1309 (1.1264) acc 65.6250 (71.4493) lr 1.7290e-03 eta 6:52:43 +epoch [14/50] batch [350/500] time 1.394 (1.364) data 0.000 (0.003) loss 0.9775 (1.1228) acc 75.0000 (71.5446) lr 1.7290e-03 eta 6:52:37 +epoch [14/50] batch [355/500] time 1.352 (1.364) data 0.000 (0.003) loss 1.5215 (1.1198) acc 62.5000 (71.6197) lr 1.7290e-03 eta 6:52:27 +epoch [14/50] batch [360/500] time 1.362 (1.364) data 0.000 (0.003) loss 0.6255 (1.1226) acc 87.5000 (71.5712) lr 1.7290e-03 eta 6:52:21 +epoch [14/50] batch [365/500] time 1.354 (1.364) data 0.000 (0.003) loss 1.5566 (1.1269) acc 53.1250 (71.5068) lr 1.7290e-03 eta 6:52:14 +epoch [14/50] batch [370/500] time 1.376 (1.364) data 0.000 (0.003) loss 1.0811 (1.1271) acc 65.6250 (71.4865) lr 1.7290e-03 eta 6:52:06 +epoch [14/50] batch [375/500] time 1.335 (1.364) data 0.000 (0.003) loss 1.8545 (1.1256) acc 56.2500 (71.5333) lr 1.7290e-03 eta 6:51:57 +epoch [14/50] batch [380/500] time 1.353 (1.364) data 0.000 (0.003) loss 1.0547 (1.1253) acc 62.5000 (71.4967) lr 1.7290e-03 eta 6:51:52 +epoch [14/50] batch [385/500] time 1.343 (1.364) data 0.000 (0.003) loss 1.3027 (1.1245) acc 78.1250 (71.5179) lr 1.7290e-03 eta 6:51:43 +epoch [14/50] batch [390/500] time 1.352 (1.364) data 0.000 (0.003) loss 1.0947 (1.1230) acc 75.0000 (71.5224) lr 1.7290e-03 eta 6:51:36 +epoch [14/50] batch [395/500] time 1.366 (1.364) data 0.000 (0.003) loss 0.7480 (1.1221) acc 81.2500 (71.4953) lr 1.7290e-03 eta 6:51:27 +epoch [14/50] batch [400/500] time 1.356 (1.364) data 0.000 (0.002) loss 1.5557 (1.1221) acc 59.3750 (71.4844) lr 1.7290e-03 eta 6:51:20 +epoch [14/50] batch [405/500] time 1.362 (1.363) data 0.000 (0.002) loss 0.8535 (1.1242) acc 68.7500 (71.4429) lr 1.7290e-03 eta 6:51:12 +epoch [14/50] batch [410/500] time 1.355 (1.363) data 0.000 (0.002) loss 0.7417 (1.1249) acc 75.0000 (71.4634) lr 1.7290e-03 eta 6:51:04 +epoch [14/50] batch [415/500] time 1.337 (1.363) data 0.000 (0.002) loss 1.5215 (1.1240) acc 56.2500 (71.5060) lr 1.7290e-03 eta 6:50:54 +epoch [14/50] batch [420/500] time 1.362 (1.363) data 0.000 (0.002) loss 0.6865 (1.1208) acc 75.0000 (71.5030) lr 1.7290e-03 eta 6:50:44 +epoch [14/50] batch [425/500] time 1.345 (1.363) data 0.000 (0.002) loss 0.9922 (1.1252) acc 56.2500 (71.4118) lr 1.7290e-03 eta 6:50:39 +epoch [14/50] batch [430/500] time 1.345 (1.363) data 0.000 (0.002) loss 0.5884 (1.1260) acc 84.3750 (71.3953) lr 1.7290e-03 eta 6:50:31 +epoch [14/50] batch [435/500] time 1.332 (1.363) data 0.000 (0.002) loss 1.1670 (1.1248) acc 65.6250 (71.4296) lr 1.7290e-03 eta 6:50:24 +epoch [14/50] batch [440/500] time 1.376 (1.363) data 0.000 (0.002) loss 1.0479 (1.1245) acc 75.0000 (71.4276) lr 1.7290e-03 eta 6:50:17 +epoch [14/50] batch [445/500] time 1.364 (1.363) data 0.000 (0.002) loss 0.9199 (1.1256) acc 71.8750 (71.3834) lr 1.7290e-03 eta 6:50:09 +epoch [14/50] batch [450/500] time 1.354 (1.363) data 0.000 (0.002) loss 1.6309 (1.1263) acc 62.5000 (71.3681) lr 1.7290e-03 eta 6:50:01 +epoch [14/50] batch [455/500] time 1.353 (1.363) data 0.000 (0.002) loss 1.4424 (1.1259) acc 71.8750 (71.4148) lr 1.7290e-03 eta 6:49:53 +epoch [14/50] batch [460/500] time 1.355 (1.363) data 0.000 (0.002) loss 1.3057 (1.1259) acc 65.6250 (71.3927) lr 1.7290e-03 eta 6:49:46 +epoch [14/50] batch [465/500] time 1.347 (1.363) data 0.000 (0.002) loss 1.3965 (1.1285) acc 59.3750 (71.3105) lr 1.7290e-03 eta 6:49:39 +epoch [14/50] batch [470/500] time 1.343 (1.363) data 0.000 (0.002) loss 1.3750 (1.1304) acc 75.0000 (71.2699) lr 1.7290e-03 eta 6:49:31 +epoch [14/50] batch [475/500] time 1.345 (1.363) data 0.000 (0.002) loss 0.9082 (1.1283) acc 78.1250 (71.3092) lr 1.7290e-03 eta 6:49:23 +epoch [14/50] batch [480/500] time 1.358 (1.363) data 0.000 (0.002) loss 1.2920 (1.1285) acc 65.6250 (71.3021) lr 1.7290e-03 eta 6:49:16 +epoch [14/50] batch [485/500] time 1.376 (1.363) data 0.001 (0.002) loss 1.3232 (1.1286) acc 65.6250 (71.3080) lr 1.7290e-03 eta 6:49:10 +epoch [14/50] batch [490/500] time 1.377 (1.363) data 0.000 (0.002) loss 1.0674 (1.1268) acc 71.8750 (71.3457) lr 1.7290e-03 eta 6:49:04 +epoch [14/50] batch [495/500] time 1.385 (1.363) data 0.000 (0.002) loss 0.9473 (1.1264) acc 68.7500 (71.3889) lr 1.7290e-03 eta 6:49:00 +epoch [14/50] batch [500/500] time 1.376 (1.363) data 0.000 (0.002) loss 1.2090 (1.1288) acc 68.7500 (71.3250) lr 1.6845e-03 eta 6:48:55 +epoch [15/50] batch [5/500] time 1.344 (1.520) data 0.001 (0.151) loss 1.0039 (0.9524) acc 78.1250 (78.1250) lr 1.6845e-03 eta 7:35:53 +epoch [15/50] batch [10/500] time 1.372 (1.443) data 0.000 (0.076) loss 0.5327 (0.9862) acc 93.7500 (78.1250) lr 1.6845e-03 eta 7:12:34 +epoch [15/50] batch [15/500] time 1.362 (1.417) data 0.000 (0.051) loss 0.7510 (0.9539) acc 87.5000 (78.1250) lr 1.6845e-03 eta 7:04:51 +epoch [15/50] batch [20/500] time 1.354 (1.403) data 0.000 (0.038) loss 1.3701 (0.9931) acc 62.5000 (76.7188) lr 1.6845e-03 eta 7:00:19 +epoch [15/50] batch [25/500] time 1.359 (1.394) data 0.000 (0.031) loss 1.0898 (0.9988) acc 65.6250 (75.5000) lr 1.6845e-03 eta 6:57:33 +epoch [15/50] batch [30/500] time 1.352 (1.388) data 0.000 (0.026) loss 1.2822 (1.0542) acc 62.5000 (74.0625) lr 1.6845e-03 eta 6:55:39 +epoch [15/50] batch [35/500] time 1.366 (1.384) data 0.000 (0.022) loss 1.5312 (1.0828) acc 65.6250 (73.3036) lr 1.6845e-03 eta 6:54:21 +epoch [15/50] batch [40/500] time 1.353 (1.380) data 0.000 (0.019) loss 1.4023 (1.0852) acc 62.5000 (72.8125) lr 1.6845e-03 eta 6:53:12 +epoch [15/50] batch [45/500] time 1.355 (1.377) data 0.000 (0.017) loss 1.1611 (1.1054) acc 75.0000 (72.2222) lr 1.6845e-03 eta 6:52:08 +epoch [15/50] batch [50/500] time 1.375 (1.377) data 0.000 (0.015) loss 0.5796 (1.0906) acc 84.3750 (72.2500) lr 1.6845e-03 eta 6:51:57 +epoch [15/50] batch [55/500] time 1.368 (1.377) data 0.000 (0.014) loss 1.2041 (1.1047) acc 68.7500 (72.2727) lr 1.6845e-03 eta 6:51:47 +epoch [15/50] batch [60/500] time 1.380 (1.376) data 0.000 (0.013) loss 0.9897 (1.1062) acc 62.5000 (72.2396) lr 1.6845e-03 eta 6:51:17 +epoch [15/50] batch [65/500] time 1.343 (1.373) data 0.000 (0.012) loss 1.1162 (1.1067) acc 68.7500 (71.9712) lr 1.6845e-03 eta 6:50:32 +epoch [15/50] batch [70/500] time 1.362 (1.372) data 0.000 (0.011) loss 0.7070 (1.1017) acc 71.8750 (72.0982) lr 1.6845e-03 eta 6:50:08 +epoch [15/50] batch [75/500] time 1.355 (1.373) data 0.000 (0.010) loss 1.6318 (1.0933) acc 56.2500 (72.1250) lr 1.6845e-03 eta 6:50:15 +epoch [15/50] batch [80/500] time 1.347 (1.372) data 0.000 (0.010) loss 0.9268 (1.0961) acc 78.1250 (71.7188) lr 1.6845e-03 eta 6:49:44 +epoch [15/50] batch [85/500] time 1.334 (1.370) data 0.000 (0.009) loss 0.7642 (1.0931) acc 90.6250 (71.9118) lr 1.6845e-03 eta 6:49:10 +epoch [15/50] batch [90/500] time 1.355 (1.369) data 0.000 (0.009) loss 0.7861 (1.0915) acc 71.8750 (71.9444) lr 1.6845e-03 eta 6:48:44 +epoch [15/50] batch [95/500] time 1.361 (1.369) data 0.000 (0.008) loss 1.0947 (1.0984) acc 75.0000 (71.8750) lr 1.6845e-03 eta 6:48:32 +epoch [15/50] batch [100/500] time 1.340 (1.368) data 0.000 (0.008) loss 0.9722 (1.1021) acc 75.0000 (71.8750) lr 1.6845e-03 eta 6:48:02 +epoch [15/50] batch [105/500] time 1.364 (1.367) data 0.000 (0.008) loss 1.5537 (1.1091) acc 75.0000 (71.7560) lr 1.6845e-03 eta 6:47:48 +epoch [15/50] batch [110/500] time 1.352 (1.367) data 0.000 (0.007) loss 0.6660 (1.1094) acc 75.0000 (71.8182) lr 1.6845e-03 eta 6:47:29 +epoch [15/50] batch [115/500] time 1.371 (1.367) data 0.000 (0.007) loss 1.0840 (1.1052) acc 81.2500 (72.0380) lr 1.6845e-03 eta 6:47:23 +epoch [15/50] batch [120/500] time 1.340 (1.367) data 0.000 (0.007) loss 1.4316 (1.1133) acc 68.7500 (71.8229) lr 1.6845e-03 eta 6:47:23 +epoch [15/50] batch [125/500] time 1.363 (1.367) data 0.000 (0.006) loss 0.9727 (1.1063) acc 78.1250 (72.0000) lr 1.6845e-03 eta 6:47:18 +epoch [15/50] batch [130/500] time 1.381 (1.367) data 0.001 (0.006) loss 0.7583 (1.1092) acc 78.1250 (71.9952) lr 1.6845e-03 eta 6:47:12 +epoch [15/50] batch [135/500] time 1.363 (1.367) data 0.000 (0.006) loss 1.2578 (1.1132) acc 62.5000 (71.8750) lr 1.6845e-03 eta 6:47:07 +epoch [15/50] batch [140/500] time 1.355 (1.367) data 0.000 (0.006) loss 1.4277 (1.1163) acc 75.0000 (71.8750) lr 1.6845e-03 eta 6:47:00 +epoch [15/50] batch [145/500] time 1.371 (1.367) data 0.000 (0.006) loss 0.9673 (1.1127) acc 71.8750 (71.7888) lr 1.6845e-03 eta 6:46:47 +epoch [15/50] batch [150/500] time 1.361 (1.367) data 0.000 (0.005) loss 1.3730 (1.1130) acc 62.5000 (71.6875) lr 1.6845e-03 eta 6:46:42 +epoch [15/50] batch [155/500] time 1.344 (1.367) data 0.000 (0.005) loss 1.3877 (1.1268) acc 65.6250 (71.5121) lr 1.6845e-03 eta 6:46:36 +epoch [15/50] batch [160/500] time 1.380 (1.367) data 0.000 (0.005) loss 0.8423 (1.1249) acc 75.0000 (71.5430) lr 1.6845e-03 eta 6:46:23 +epoch [15/50] batch [165/500] time 1.365 (1.367) data 0.000 (0.005) loss 1.2900 (1.1258) acc 59.3750 (71.4583) lr 1.6845e-03 eta 6:46:15 +epoch [15/50] batch [170/500] time 1.359 (1.366) data 0.000 (0.005) loss 0.9512 (1.1257) acc 81.2500 (71.3971) lr 1.6845e-03 eta 6:46:02 +epoch [15/50] batch [175/500] time 1.354 (1.366) data 0.000 (0.005) loss 1.0645 (1.1219) acc 71.8750 (71.4821) lr 1.6845e-03 eta 6:45:47 +epoch [15/50] batch [180/500] time 1.340 (1.366) data 0.000 (0.005) loss 1.6709 (1.1337) acc 53.1250 (71.3194) lr 1.6845e-03 eta 6:45:33 +epoch [15/50] batch [185/500] time 1.374 (1.366) data 0.000 (0.004) loss 0.7563 (1.1296) acc 84.3750 (71.4020) lr 1.6845e-03 eta 6:45:28 +epoch [15/50] batch [190/500] time 1.372 (1.366) data 0.000 (0.004) loss 1.4170 (1.1330) acc 59.3750 (71.3487) lr 1.6845e-03 eta 6:45:20 +epoch [15/50] batch [195/500] time 1.353 (1.365) data 0.000 (0.004) loss 1.4277 (1.1319) acc 68.7500 (71.4744) lr 1.6845e-03 eta 6:45:10 +epoch [15/50] batch [200/500] time 1.338 (1.365) data 0.000 (0.004) loss 1.5322 (1.1404) acc 68.7500 (71.3594) lr 1.6845e-03 eta 6:44:54 +epoch [15/50] batch [205/500] time 1.340 (1.365) data 0.000 (0.004) loss 0.8833 (1.1350) acc 75.0000 (71.5244) lr 1.6845e-03 eta 6:44:43 +epoch [15/50] batch [210/500] time 1.337 (1.364) data 0.000 (0.004) loss 0.9312 (1.1399) acc 81.2500 (71.4881) lr 1.6845e-03 eta 6:44:28 +epoch [15/50] batch [215/500] time 1.354 (1.364) data 0.000 (0.004) loss 1.3887 (1.1398) acc 65.6250 (71.5116) lr 1.6845e-03 eta 6:44:17 +epoch [15/50] batch [220/500] time 1.373 (1.364) data 0.000 (0.004) loss 1.1689 (1.1386) acc 71.8750 (71.5057) lr 1.6845e-03 eta 6:44:18 +epoch [15/50] batch [225/500] time 1.349 (1.364) data 0.000 (0.004) loss 1.4697 (1.1389) acc 65.6250 (71.5417) lr 1.6845e-03 eta 6:44:07 +epoch [15/50] batch [230/500] time 1.363 (1.364) data 0.000 (0.004) loss 0.7861 (1.1423) acc 78.1250 (71.5489) lr 1.6845e-03 eta 6:43:59 +epoch [15/50] batch [235/500] time 1.361 (1.364) data 0.000 (0.004) loss 1.2949 (1.1415) acc 71.8750 (71.6489) lr 1.6845e-03 eta 6:43:50 +epoch [15/50] batch [240/500] time 1.348 (1.364) data 0.001 (0.004) loss 0.7930 (1.1385) acc 81.2500 (71.6536) lr 1.6845e-03 eta 6:43:39 +epoch [15/50] batch [245/500] time 1.375 (1.364) data 0.000 (0.003) loss 1.9014 (1.1427) acc 62.5000 (71.6582) lr 1.6845e-03 eta 6:43:35 +epoch [15/50] batch [250/500] time 1.347 (1.364) data 0.000 (0.003) loss 1.1543 (1.1417) acc 68.7500 (71.6250) lr 1.6845e-03 eta 6:43:28 +epoch [15/50] batch [255/500] time 1.370 (1.364) data 0.000 (0.003) loss 1.4336 (1.1411) acc 65.6250 (71.6299) lr 1.6845e-03 eta 6:43:21 +epoch [15/50] batch [260/500] time 1.483 (1.364) data 0.000 (0.003) loss 1.1445 (1.1379) acc 78.1250 (71.7428) lr 1.6845e-03 eta 6:43:24 +epoch [15/50] batch [265/500] time 1.375 (1.364) data 0.000 (0.003) loss 1.0107 (1.1390) acc 81.2500 (71.7335) lr 1.6845e-03 eta 6:43:16 +epoch [15/50] batch [270/500] time 1.351 (1.364) data 0.000 (0.003) loss 1.7529 (1.1412) acc 68.7500 (71.6551) lr 1.6845e-03 eta 6:43:09 +epoch [15/50] batch [275/500] time 1.368 (1.364) data 0.000 (0.003) loss 0.8887 (1.1410) acc 71.8750 (71.6705) lr 1.6845e-03 eta 6:43:03 +epoch [15/50] batch [280/500] time 1.356 (1.364) data 0.000 (0.003) loss 1.5410 (1.1411) acc 62.5000 (71.6741) lr 1.6845e-03 eta 6:42:52 +epoch [15/50] batch [285/500] time 1.366 (1.364) data 0.000 (0.003) loss 0.8389 (1.1397) acc 75.0000 (71.7434) lr 1.6845e-03 eta 6:42:44 +epoch [15/50] batch [290/500] time 1.364 (1.364) data 0.000 (0.003) loss 0.6230 (1.1353) acc 84.3750 (71.7780) lr 1.6845e-03 eta 6:42:39 +epoch [15/50] batch [295/500] time 1.352 (1.364) data 0.000 (0.003) loss 1.2676 (1.1381) acc 75.0000 (71.7161) lr 1.6845e-03 eta 6:42:32 +epoch [15/50] batch [300/500] time 1.363 (1.364) data 0.000 (0.003) loss 0.8252 (1.1400) acc 68.7500 (71.6771) lr 1.6845e-03 eta 6:42:28 +epoch [15/50] batch [305/500] time 1.392 (1.364) data 0.000 (0.003) loss 1.1064 (1.1368) acc 71.8750 (71.7008) lr 1.6845e-03 eta 6:42:22 +epoch [15/50] batch [310/500] time 1.356 (1.364) data 0.000 (0.003) loss 0.7856 (1.1351) acc 75.0000 (71.6431) lr 1.6845e-03 eta 6:42:12 +epoch [15/50] batch [315/500] time 1.377 (1.364) data 0.000 (0.003) loss 0.5552 (1.1319) acc 84.3750 (71.7262) lr 1.6845e-03 eta 6:42:03 +epoch [15/50] batch [320/500] time 1.342 (1.364) data 0.000 (0.003) loss 2.2656 (1.1354) acc 59.3750 (71.6504) lr 1.6845e-03 eta 6:41:54 +epoch [15/50] batch [325/500] time 1.375 (1.364) data 0.000 (0.003) loss 1.4229 (1.1365) acc 65.6250 (71.6058) lr 1.6845e-03 eta 6:41:49 +epoch [15/50] batch [330/500] time 1.375 (1.364) data 0.000 (0.003) loss 1.2002 (1.1348) acc 68.7500 (71.6383) lr 1.6845e-03 eta 6:41:42 +epoch [15/50] batch [335/500] time 1.373 (1.364) data 0.000 (0.003) loss 0.9238 (1.1368) acc 71.8750 (71.5672) lr 1.6845e-03 eta 6:41:35 +epoch [15/50] batch [340/500] time 1.358 (1.364) data 0.000 (0.003) loss 0.7803 (1.1380) acc 68.7500 (71.5257) lr 1.6845e-03 eta 6:41:26 +epoch [15/50] batch [345/500] time 1.363 (1.364) data 0.000 (0.003) loss 1.3867 (1.1360) acc 71.8750 (71.5761) lr 1.6845e-03 eta 6:41:16 +epoch [15/50] batch [350/500] time 1.340 (1.364) data 0.000 (0.003) loss 1.3271 (1.1363) acc 68.7500 (71.5625) lr 1.6845e-03 eta 6:41:06 +epoch [15/50] batch [355/500] time 1.365 (1.363) data 0.000 (0.002) loss 0.6924 (1.1348) acc 81.2500 (71.5581) lr 1.6845e-03 eta 6:40:58 +epoch [15/50] batch [360/500] time 1.360 (1.364) data 0.000 (0.002) loss 0.8550 (1.1338) acc 84.3750 (71.6146) lr 1.6845e-03 eta 6:41:00 +epoch [15/50] batch [365/500] time 1.353 (1.364) data 0.000 (0.002) loss 0.6895 (1.1316) acc 87.5000 (71.6952) lr 1.6845e-03 eta 6:40:53 +epoch [15/50] batch [370/500] time 1.361 (1.364) data 0.000 (0.002) loss 1.2139 (1.1317) acc 75.0000 (71.7483) lr 1.6845e-03 eta 6:40:43 +epoch [15/50] batch [375/500] time 1.361 (1.364) data 0.000 (0.002) loss 1.1445 (1.1339) acc 68.7500 (71.6917) lr 1.6845e-03 eta 6:40:35 +epoch [15/50] batch [380/500] time 1.362 (1.364) data 0.000 (0.002) loss 0.6543 (1.1339) acc 78.1250 (71.7105) lr 1.6845e-03 eta 6:40:26 +epoch [15/50] batch [385/500] time 1.352 (1.363) data 0.001 (0.002) loss 1.6895 (1.1364) acc 56.2500 (71.6558) lr 1.6845e-03 eta 6:40:17 +epoch [15/50] batch [390/500] time 1.352 (1.363) data 0.000 (0.002) loss 1.1152 (1.1336) acc 62.5000 (71.6987) lr 1.6845e-03 eta 6:40:09 +epoch [15/50] batch [395/500] time 1.360 (1.363) data 0.000 (0.002) loss 1.5996 (1.1346) acc 65.6250 (71.7009) lr 1.6845e-03 eta 6:40:00 +epoch [15/50] batch [400/500] time 1.365 (1.363) data 0.000 (0.002) loss 1.0410 (1.1329) acc 65.6250 (71.7500) lr 1.6845e-03 eta 6:39:51 +epoch [15/50] batch [405/500] time 1.388 (1.364) data 0.000 (0.002) loss 0.7437 (1.1329) acc 75.0000 (71.6744) lr 1.6845e-03 eta 6:39:50 +epoch [15/50] batch [410/500] time 1.357 (1.364) data 0.000 (0.002) loss 1.9492 (1.1348) acc 59.3750 (71.6235) lr 1.6845e-03 eta 6:39:45 +epoch [15/50] batch [415/500] time 1.357 (1.364) data 0.000 (0.002) loss 1.2568 (1.1346) acc 71.8750 (71.6190) lr 1.6845e-03 eta 6:39:38 +epoch [15/50] batch [420/500] time 1.377 (1.364) data 0.000 (0.002) loss 1.5059 (1.1331) acc 65.6250 (71.6667) lr 1.6845e-03 eta 6:39:31 +epoch [15/50] batch [425/500] time 1.349 (1.363) data 0.000 (0.002) loss 1.0322 (1.1348) acc 59.3750 (71.6029) lr 1.6845e-03 eta 6:39:22 +epoch [15/50] batch [430/500] time 1.342 (1.363) data 0.000 (0.002) loss 1.2354 (1.1360) acc 65.6250 (71.5552) lr 1.6845e-03 eta 6:39:12 +epoch [15/50] batch [435/500] time 1.351 (1.363) data 0.000 (0.002) loss 1.3838 (1.1391) acc 65.6250 (71.5014) lr 1.6845e-03 eta 6:39:05 +epoch [15/50] batch [440/500] time 1.345 (1.363) data 0.000 (0.002) loss 1.3643 (1.1403) acc 65.6250 (71.4489) lr 1.6845e-03 eta 6:38:57 +epoch [15/50] batch [445/500] time 1.332 (1.363) data 0.000 (0.002) loss 0.9683 (1.1386) acc 78.1250 (71.5239) lr 1.6845e-03 eta 6:38:47 +epoch [15/50] batch [450/500] time 1.365 (1.363) data 0.000 (0.002) loss 1.3164 (1.1402) acc 68.7500 (71.4583) lr 1.6845e-03 eta 6:38:41 +epoch [15/50] batch [455/500] time 1.340 (1.363) data 0.000 (0.002) loss 1.3887 (1.1409) acc 71.8750 (71.4698) lr 1.6845e-03 eta 6:38:32 +epoch [15/50] batch [460/500] time 1.355 (1.363) data 0.000 (0.002) loss 1.0850 (1.1400) acc 75.0000 (71.4878) lr 1.6845e-03 eta 6:38:24 +epoch [15/50] batch [465/500] time 1.341 (1.363) data 0.000 (0.002) loss 0.6953 (1.1388) acc 81.2500 (71.5255) lr 1.6845e-03 eta 6:38:17 +epoch [15/50] batch [470/500] time 1.358 (1.363) data 0.000 (0.002) loss 1.3975 (1.1394) acc 59.3750 (71.4894) lr 1.6845e-03 eta 6:38:11 +epoch [15/50] batch [475/500] time 1.349 (1.363) data 0.000 (0.002) loss 1.3691 (1.1406) acc 68.7500 (71.4408) lr 1.6845e-03 eta 6:38:04 +epoch [15/50] batch [480/500] time 1.351 (1.363) data 0.000 (0.002) loss 0.9590 (1.1392) acc 84.3750 (71.5039) lr 1.6845e-03 eta 6:37:56 +epoch [15/50] batch [485/500] time 1.354 (1.363) data 0.001 (0.002) loss 0.7754 (1.1365) acc 81.2500 (71.5851) lr 1.6845e-03 eta 6:37:49 +epoch [15/50] batch [490/500] time 1.385 (1.363) data 0.000 (0.002) loss 0.8550 (1.1343) acc 78.1250 (71.6263) lr 1.6845e-03 eta 6:37:43 +epoch [15/50] batch [495/500] time 1.378 (1.363) data 0.000 (0.002) loss 0.9746 (1.1345) acc 75.0000 (71.6225) lr 1.6845e-03 eta 6:37:36 +epoch [15/50] batch [500/500] time 1.363 (1.363) data 0.000 (0.002) loss 1.0596 (1.1339) acc 78.1250 (71.6625) lr 1.6374e-03 eta 6:37:29 +epoch [16/50] batch [5/500] time 1.345 (1.569) data 0.000 (0.169) loss 1.3291 (1.0121) acc 62.5000 (73.1250) lr 1.6374e-03 eta 7:37:32 +epoch [16/50] batch [10/500] time 1.361 (1.464) data 0.000 (0.085) loss 1.0264 (1.1113) acc 75.0000 (70.9375) lr 1.6374e-03 eta 7:06:40 +epoch [16/50] batch [15/500] time 1.368 (1.431) data 0.000 (0.057) loss 1.3564 (1.1447) acc 62.5000 (72.0833) lr 1.6374e-03 eta 6:57:02 +epoch [16/50] batch [20/500] time 1.400 (1.418) data 0.001 (0.043) loss 1.1924 (1.1229) acc 71.8750 (72.0312) lr 1.6374e-03 eta 6:53:12 +epoch [16/50] batch [25/500] time 1.367 (1.409) data 0.001 (0.034) loss 1.1182 (1.0797) acc 75.0000 (72.7500) lr 1.6374e-03 eta 6:50:17 +epoch [16/50] batch [30/500] time 1.374 (1.403) data 0.001 (0.029) loss 1.3721 (1.1238) acc 68.7500 (71.8750) lr 1.6374e-03 eta 6:48:21 +epoch [16/50] batch [35/500] time 1.357 (1.396) data 0.000 (0.025) loss 1.9600 (1.1236) acc 65.6250 (72.2321) lr 1.6374e-03 eta 6:46:21 +epoch [16/50] batch [40/500] time 1.348 (1.391) data 0.000 (0.022) loss 0.7520 (1.1453) acc 78.1250 (71.4062) lr 1.6374e-03 eta 6:44:39 +epoch [16/50] batch [45/500] time 1.356 (1.387) data 0.000 (0.019) loss 0.8696 (1.1213) acc 68.7500 (72.1528) lr 1.6374e-03 eta 6:43:31 +epoch [16/50] batch [50/500] time 1.360 (1.384) data 0.000 (0.017) loss 1.3584 (1.1284) acc 65.6250 (72.0000) lr 1.6374e-03 eta 6:42:22 +epoch [16/50] batch [55/500] time 1.364 (1.381) data 0.000 (0.016) loss 0.8691 (1.1078) acc 78.1250 (72.2159) lr 1.6374e-03 eta 6:41:39 +epoch [16/50] batch [60/500] time 1.357 (1.380) data 0.000 (0.014) loss 1.4287 (1.1123) acc 59.3750 (72.1354) lr 1.6374e-03 eta 6:41:00 +epoch [16/50] batch [65/500] time 1.352 (1.379) data 0.000 (0.013) loss 0.7510 (1.1131) acc 84.3750 (72.0192) lr 1.6374e-03 eta 6:40:49 +epoch [16/50] batch [70/500] time 1.375 (1.378) data 0.000 (0.012) loss 1.6279 (1.1307) acc 62.5000 (71.6071) lr 1.6374e-03 eta 6:40:15 +epoch [16/50] batch [75/500] time 1.353 (1.377) data 0.001 (0.012) loss 1.0654 (1.1447) acc 75.0000 (71.2917) lr 1.6374e-03 eta 6:39:49 +epoch [16/50] batch [80/500] time 1.349 (1.375) data 0.000 (0.011) loss 0.9907 (1.1423) acc 71.8750 (71.2500) lr 1.6374e-03 eta 6:39:14 +epoch [16/50] batch [85/500] time 1.355 (1.374) data 0.000 (0.010) loss 1.4092 (1.1568) acc 71.8750 (71.1765) lr 1.6374e-03 eta 6:38:47 +epoch [16/50] batch [90/500] time 1.358 (1.373) data 0.000 (0.010) loss 1.3281 (1.1516) acc 68.7500 (71.2847) lr 1.6374e-03 eta 6:38:28 +epoch [16/50] batch [95/500] time 1.356 (1.373) data 0.000 (0.009) loss 0.7959 (1.1521) acc 81.2500 (71.4145) lr 1.6374e-03 eta 6:38:09 +epoch [16/50] batch [100/500] time 1.360 (1.372) data 0.000 (0.009) loss 1.1172 (1.1444) acc 75.0000 (71.4375) lr 1.6374e-03 eta 6:37:53 +epoch [16/50] batch [105/500] time 1.362 (1.372) data 0.000 (0.008) loss 0.8784 (1.1274) acc 68.7500 (71.6667) lr 1.6374e-03 eta 6:37:46 +epoch [16/50] batch [110/500] time 1.346 (1.373) data 0.000 (0.008) loss 0.8076 (1.1190) acc 78.1250 (71.9602) lr 1.6374e-03 eta 6:37:49 +epoch [16/50] batch [115/500] time 1.349 (1.372) data 0.000 (0.008) loss 1.0469 (1.1212) acc 81.2500 (71.9022) lr 1.6374e-03 eta 6:37:36 +epoch [16/50] batch [120/500] time 1.344 (1.372) data 0.000 (0.007) loss 1.1885 (1.1206) acc 68.7500 (71.9010) lr 1.6374e-03 eta 6:37:19 +epoch [16/50] batch [125/500] time 1.347 (1.371) data 0.000 (0.007) loss 0.4958 (1.1216) acc 87.5000 (71.8250) lr 1.6374e-03 eta 6:36:57 +epoch [16/50] batch [130/500] time 1.374 (1.371) data 0.000 (0.007) loss 1.2363 (1.1246) acc 71.8750 (71.8750) lr 1.6374e-03 eta 6:36:54 +epoch [16/50] batch [135/500] time 1.350 (1.370) data 0.000 (0.007) loss 1.4229 (1.1317) acc 62.5000 (71.8287) lr 1.6374e-03 eta 6:36:36 +epoch [16/50] batch [140/500] time 1.359 (1.370) data 0.001 (0.006) loss 0.6494 (1.1289) acc 81.2500 (71.8973) lr 1.6374e-03 eta 6:36:19 +epoch [16/50] batch [145/500] time 1.357 (1.370) data 0.000 (0.006) loss 0.9854 (1.1268) acc 75.0000 (71.8966) lr 1.6374e-03 eta 6:36:14 +epoch [16/50] batch [150/500] time 1.365 (1.370) data 0.000 (0.006) loss 1.1064 (1.1225) acc 75.0000 (71.9583) lr 1.6374e-03 eta 6:36:02 +epoch [16/50] batch [155/500] time 1.374 (1.370) data 0.000 (0.006) loss 1.0146 (1.1241) acc 78.1250 (71.9556) lr 1.6374e-03 eta 6:35:57 +epoch [16/50] batch [160/500] time 1.350 (1.370) data 0.000 (0.006) loss 0.9409 (1.1221) acc 81.2500 (72.1094) lr 1.6374e-03 eta 6:35:47 +epoch [16/50] batch [165/500] time 1.370 (1.369) data 0.000 (0.006) loss 1.1611 (1.1246) acc 62.5000 (71.9886) lr 1.6374e-03 eta 6:35:39 +epoch [16/50] batch [170/500] time 1.348 (1.369) data 0.000 (0.005) loss 0.6958 (1.1228) acc 84.3750 (72.1324) lr 1.6374e-03 eta 6:35:23 +epoch [16/50] batch [175/500] time 1.352 (1.369) data 0.000 (0.005) loss 0.8540 (1.1241) acc 81.2500 (72.1607) lr 1.6374e-03 eta 6:35:12 +epoch [16/50] batch [180/500] time 1.345 (1.368) data 0.000 (0.005) loss 1.0684 (1.1251) acc 78.1250 (72.1354) lr 1.6374e-03 eta 6:34:57 +epoch [16/50] batch [185/500] time 1.354 (1.368) data 0.000 (0.005) loss 0.3447 (1.1253) acc 87.5000 (72.1959) lr 1.6374e-03 eta 6:34:48 +epoch [16/50] batch [190/500] time 1.367 (1.368) data 0.001 (0.005) loss 1.2861 (1.1260) acc 56.2500 (71.9737) lr 1.6374e-03 eta 6:34:38 +epoch [16/50] batch [195/500] time 1.357 (1.367) data 0.000 (0.005) loss 1.2861 (1.1321) acc 68.7500 (71.8429) lr 1.6374e-03 eta 6:34:23 +epoch [16/50] batch [200/500] time 1.362 (1.367) data 0.000 (0.005) loss 1.4893 (1.1314) acc 75.0000 (71.9062) lr 1.6374e-03 eta 6:34:15 +epoch [16/50] batch [205/500] time 1.494 (1.368) data 0.000 (0.005) loss 0.8354 (1.1353) acc 78.1250 (71.8598) lr 1.6374e-03 eta 6:34:19 +epoch [16/50] batch [210/500] time 1.370 (1.368) data 0.000 (0.004) loss 1.4697 (1.1388) acc 59.3750 (71.8006) lr 1.6374e-03 eta 6:34:11 +epoch [16/50] batch [215/500] time 1.355 (1.368) data 0.000 (0.004) loss 0.8838 (1.1330) acc 71.8750 (71.9186) lr 1.6374e-03 eta 6:33:59 +epoch [16/50] batch [220/500] time 1.360 (1.367) data 0.000 (0.004) loss 1.3145 (1.1336) acc 59.3750 (71.9602) lr 1.6374e-03 eta 6:33:47 +epoch [16/50] batch [225/500] time 1.340 (1.367) data 0.000 (0.004) loss 1.3154 (1.1291) acc 56.2500 (72.0417) lr 1.6374e-03 eta 6:33:31 +epoch [16/50] batch [230/500] time 1.338 (1.366) data 0.000 (0.004) loss 1.6250 (1.1342) acc 62.5000 (71.9429) lr 1.6374e-03 eta 6:33:18 +epoch [16/50] batch [235/500] time 1.354 (1.366) data 0.000 (0.004) loss 1.7393 (1.1395) acc 62.5000 (71.8750) lr 1.6374e-03 eta 6:33:06 +epoch [16/50] batch [240/500] time 1.345 (1.366) data 0.000 (0.004) loss 1.0098 (1.1411) acc 71.8750 (71.9141) lr 1.6374e-03 eta 6:32:55 +epoch [16/50] batch [245/500] time 1.369 (1.366) data 0.000 (0.004) loss 1.3809 (1.1447) acc 68.7500 (71.8878) lr 1.6374e-03 eta 6:32:46 +epoch [16/50] batch [250/500] time 1.372 (1.366) data 0.000 (0.004) loss 0.9771 (1.1447) acc 68.7500 (71.8875) lr 1.6374e-03 eta 6:32:45 +epoch [16/50] batch [255/500] time 1.375 (1.366) data 0.000 (0.004) loss 1.2559 (1.1452) acc 68.7500 (71.8260) lr 1.6374e-03 eta 6:32:36 +epoch [16/50] batch [260/500] time 1.356 (1.366) data 0.000 (0.004) loss 0.9663 (1.1464) acc 78.1250 (71.8510) lr 1.6374e-03 eta 6:32:24 +epoch [16/50] batch [265/500] time 1.374 (1.366) data 0.000 (0.004) loss 0.8828 (1.1465) acc 84.3750 (71.8750) lr 1.6374e-03 eta 6:32:14 +epoch [16/50] batch [270/500] time 1.384 (1.365) data 0.000 (0.004) loss 1.7031 (1.1487) acc 68.7500 (71.8171) lr 1.6374e-03 eta 6:32:07 +epoch [16/50] batch [275/500] time 1.408 (1.366) data 0.001 (0.003) loss 1.0771 (1.1482) acc 75.0000 (71.8068) lr 1.6374e-03 eta 6:32:03 +epoch [16/50] batch [280/500] time 1.380 (1.366) data 0.000 (0.003) loss 0.7021 (1.1454) acc 87.5000 (71.8750) lr 1.6374e-03 eta 6:31:58 +epoch [16/50] batch [285/500] time 1.330 (1.365) data 0.000 (0.003) loss 1.0645 (1.1481) acc 68.7500 (71.7434) lr 1.6374e-03 eta 6:31:46 +epoch [16/50] batch [290/500] time 1.364 (1.365) data 0.000 (0.003) loss 0.6309 (1.1438) acc 84.3750 (71.7672) lr 1.6374e-03 eta 6:31:37 +epoch [16/50] batch [295/500] time 1.340 (1.365) data 0.000 (0.003) loss 1.4043 (1.1417) acc 65.6250 (71.8114) lr 1.6374e-03 eta 6:31:28 +epoch [16/50] batch [300/500] time 1.377 (1.365) data 0.000 (0.003) loss 0.7109 (1.1380) acc 78.1250 (71.8958) lr 1.6374e-03 eta 6:31:19 +epoch [16/50] batch [305/500] time 1.362 (1.365) data 0.000 (0.003) loss 1.5215 (1.1377) acc 59.3750 (71.9262) lr 1.6374e-03 eta 6:31:14 +epoch [16/50] batch [310/500] time 1.354 (1.365) data 0.000 (0.003) loss 0.7026 (1.1356) acc 84.3750 (71.9052) lr 1.6374e-03 eta 6:31:06 +epoch [16/50] batch [315/500] time 1.366 (1.365) data 0.000 (0.003) loss 0.8691 (1.1323) acc 81.2500 (71.9841) lr 1.6374e-03 eta 6:30:57 +epoch [16/50] batch [320/500] time 1.339 (1.365) data 0.001 (0.003) loss 1.5361 (1.1313) acc 65.6250 (71.9629) lr 1.6374e-03 eta 6:30:47 +epoch [16/50] batch [325/500] time 1.355 (1.365) data 0.001 (0.003) loss 1.6895 (1.1337) acc 71.8750 (71.9327) lr 1.6374e-03 eta 6:30:38 +epoch [16/50] batch [330/500] time 1.349 (1.364) data 0.001 (0.003) loss 1.1338 (1.1368) acc 68.7500 (71.8466) lr 1.6374e-03 eta 6:30:28 +epoch [16/50] batch [335/500] time 1.375 (1.364) data 0.001 (0.003) loss 1.1211 (1.1375) acc 65.6250 (71.7724) lr 1.6374e-03 eta 6:30:21 +epoch [16/50] batch [340/500] time 1.355 (1.364) data 0.000 (0.003) loss 1.3164 (1.1391) acc 75.0000 (71.7279) lr 1.6374e-03 eta 6:30:09 +epoch [16/50] batch [345/500] time 1.344 (1.364) data 0.000 (0.003) loss 1.3018 (1.1362) acc 65.6250 (71.7844) lr 1.6374e-03 eta 6:30:01 +epoch [16/50] batch [350/500] time 1.380 (1.364) data 0.000 (0.003) loss 1.0459 (1.1380) acc 78.1250 (71.8036) lr 1.6374e-03 eta 6:30:01 +epoch [16/50] batch [355/500] time 1.366 (1.365) data 0.000 (0.003) loss 1.0303 (1.1376) acc 78.1250 (71.8310) lr 1.6374e-03 eta 6:29:55 +epoch [16/50] batch [360/500] time 1.355 (1.364) data 0.001 (0.003) loss 0.9380 (1.1373) acc 84.3750 (71.8490) lr 1.6374e-03 eta 6:29:45 +epoch [16/50] batch [365/500] time 1.376 (1.364) data 0.000 (0.003) loss 0.7114 (1.1339) acc 78.1250 (71.9178) lr 1.6374e-03 eta 6:29:37 +epoch [16/50] batch [370/500] time 1.362 (1.364) data 0.000 (0.003) loss 0.9839 (1.1363) acc 78.1250 (71.9172) lr 1.6374e-03 eta 6:29:27 +epoch [16/50] batch [375/500] time 1.351 (1.364) data 0.000 (0.003) loss 1.0527 (1.1360) acc 75.0000 (71.9083) lr 1.6374e-03 eta 6:29:20 +epoch [16/50] batch [380/500] time 1.366 (1.364) data 0.000 (0.003) loss 1.3799 (1.1355) acc 62.5000 (71.8421) lr 1.6374e-03 eta 6:29:13 +epoch [16/50] batch [385/500] time 1.360 (1.364) data 0.000 (0.003) loss 1.3311 (1.1364) acc 59.3750 (71.8263) lr 1.6374e-03 eta 6:29:05 +epoch [16/50] batch [390/500] time 1.336 (1.364) data 0.000 (0.003) loss 1.2637 (1.1364) acc 56.2500 (71.7869) lr 1.6374e-03 eta 6:28:56 +epoch [16/50] batch [395/500] time 1.358 (1.364) data 0.000 (0.003) loss 1.4766 (1.1362) acc 59.3750 (71.7326) lr 1.6374e-03 eta 6:28:54 +epoch [16/50] batch [400/500] time 1.347 (1.364) data 0.000 (0.003) loss 0.8960 (1.1338) acc 71.8750 (71.7344) lr 1.6374e-03 eta 6:28:45 +epoch [16/50] batch [405/500] time 1.349 (1.364) data 0.001 (0.002) loss 1.2246 (1.1311) acc 62.5000 (71.7747) lr 1.6374e-03 eta 6:28:37 +epoch [16/50] batch [410/500] time 1.362 (1.364) data 0.000 (0.002) loss 0.7314 (1.1278) acc 75.0000 (71.8293) lr 1.6374e-03 eta 6:28:30 +epoch [16/50] batch [415/500] time 1.364 (1.364) data 0.000 (0.002) loss 1.0146 (1.1270) acc 71.8750 (71.8599) lr 1.6374e-03 eta 6:28:22 +epoch [16/50] batch [420/500] time 1.349 (1.364) data 0.000 (0.002) loss 1.1650 (1.1281) acc 75.0000 (71.8304) lr 1.6374e-03 eta 6:28:14 +epoch [16/50] batch [425/500] time 1.365 (1.364) data 0.001 (0.002) loss 1.2939 (1.1300) acc 53.1250 (71.7426) lr 1.6374e-03 eta 6:28:08 +epoch [16/50] batch [430/500] time 1.379 (1.364) data 0.000 (0.002) loss 1.2031 (1.1308) acc 71.8750 (71.7587) lr 1.6374e-03 eta 6:28:02 +epoch [16/50] batch [435/500] time 1.362 (1.364) data 0.001 (0.002) loss 1.4160 (1.1302) acc 71.8750 (71.7744) lr 1.6374e-03 eta 6:27:53 +epoch [16/50] batch [440/500] time 1.373 (1.364) data 0.000 (0.002) loss 1.4873 (1.1319) acc 50.0000 (71.7330) lr 1.6374e-03 eta 6:27:48 +epoch [16/50] batch [445/500] time 1.342 (1.364) data 0.000 (0.002) loss 1.4121 (1.1330) acc 62.5000 (71.6994) lr 1.6374e-03 eta 6:27:41 +epoch [16/50] batch [450/500] time 1.365 (1.364) data 0.001 (0.002) loss 1.0430 (1.1340) acc 78.1250 (71.6597) lr 1.6374e-03 eta 6:27:33 +epoch [16/50] batch [455/500] time 1.347 (1.364) data 0.000 (0.002) loss 1.2812 (1.1334) acc 59.3750 (71.6690) lr 1.6374e-03 eta 6:27:26 +epoch [16/50] batch [460/500] time 1.361 (1.364) data 0.000 (0.002) loss 1.4893 (1.1373) acc 65.6250 (71.5761) lr 1.6374e-03 eta 6:27:18 +epoch [16/50] batch [465/500] time 1.374 (1.364) data 0.000 (0.002) loss 1.1064 (1.1412) acc 75.0000 (71.4718) lr 1.6374e-03 eta 6:27:11 +epoch [16/50] batch [470/500] time 1.374 (1.364) data 0.000 (0.002) loss 1.1406 (1.1423) acc 65.6250 (71.4694) lr 1.6374e-03 eta 6:27:04 +epoch [16/50] batch [475/500] time 1.373 (1.364) data 0.000 (0.002) loss 0.9424 (1.1400) acc 75.0000 (71.5000) lr 1.6374e-03 eta 6:26:57 +epoch [16/50] batch [480/500] time 1.341 (1.364) data 0.000 (0.002) loss 0.8003 (1.1395) acc 78.1250 (71.4648) lr 1.6374e-03 eta 6:26:48 +epoch [16/50] batch [485/500] time 1.378 (1.364) data 0.001 (0.002) loss 1.7686 (1.1414) acc 65.6250 (71.4369) lr 1.6374e-03 eta 6:26:42 +epoch [16/50] batch [490/500] time 1.381 (1.364) data 0.000 (0.002) loss 0.7056 (1.1409) acc 84.3750 (71.4796) lr 1.6374e-03 eta 6:26:36 +epoch [16/50] batch [495/500] time 1.360 (1.364) data 0.000 (0.002) loss 1.3047 (1.1378) acc 68.7500 (71.5530) lr 1.6374e-03 eta 6:26:33 +epoch [16/50] batch [500/500] time 1.371 (1.364) data 0.000 (0.002) loss 0.8247 (1.1370) acc 75.0000 (71.5750) lr 1.5878e-03 eta 6:26:28 +epoch [17/50] batch [5/500] time 1.366 (1.537) data 0.000 (0.169) loss 1.2705 (0.9274) acc 78.1250 (74.3750) lr 1.5878e-03 eta 7:15:24 +epoch [17/50] batch [10/500] time 1.381 (1.447) data 0.000 (0.085) loss 1.1768 (1.0543) acc 75.0000 (73.7500) lr 1.5878e-03 eta 6:49:47 +epoch [17/50] batch [15/500] time 1.348 (1.418) data 0.000 (0.057) loss 1.0664 (1.0238) acc 68.7500 (73.5417) lr 1.5878e-03 eta 6:41:30 +epoch [17/50] batch [20/500] time 1.362 (1.405) data 0.001 (0.042) loss 1.4844 (1.0318) acc 62.5000 (73.5938) lr 1.5878e-03 eta 6:37:29 +epoch [17/50] batch [25/500] time 1.338 (1.396) data 0.000 (0.034) loss 1.1240 (1.0573) acc 78.1250 (73.0000) lr 1.5878e-03 eta 6:34:49 +epoch [17/50] batch [30/500] time 1.374 (1.390) data 0.000 (0.028) loss 1.4033 (1.0825) acc 65.6250 (71.7708) lr 1.5878e-03 eta 6:33:02 +epoch [17/50] batch [35/500] time 1.375 (1.387) data 0.000 (0.024) loss 1.7188 (1.1046) acc 62.5000 (71.6071) lr 1.5878e-03 eta 6:32:07 +epoch [17/50] batch [40/500] time 1.381 (1.384) data 0.001 (0.021) loss 1.2578 (1.0953) acc 65.6250 (71.0938) lr 1.5878e-03 eta 6:31:18 +epoch [17/50] batch [45/500] time 1.382 (1.383) data 0.000 (0.019) loss 1.2324 (1.0863) acc 75.0000 (71.3194) lr 1.5878e-03 eta 6:30:43 +epoch [17/50] batch [50/500] time 1.375 (1.386) data 0.000 (0.017) loss 0.6362 (1.0732) acc 75.0000 (71.8125) lr 1.5878e-03 eta 6:31:27 +epoch [17/50] batch [55/500] time 1.374 (1.383) data 0.001 (0.016) loss 1.4375 (1.0569) acc 71.8750 (72.8409) lr 1.5878e-03 eta 6:30:36 +epoch [17/50] batch [60/500] time 1.370 (1.382) data 0.000 (0.014) loss 0.9443 (1.0674) acc 78.1250 (72.7083) lr 1.5878e-03 eta 6:30:07 +epoch [17/50] batch [65/500] time 1.370 (1.380) data 0.000 (0.013) loss 0.8706 (1.0681) acc 78.1250 (72.5962) lr 1.5878e-03 eta 6:29:38 +epoch [17/50] batch [70/500] time 1.348 (1.379) data 0.000 (0.012) loss 1.3721 (1.0643) acc 68.7500 (72.7232) lr 1.5878e-03 eta 6:29:08 +epoch [17/50] batch [75/500] time 1.370 (1.377) data 0.000 (0.012) loss 1.5615 (1.0623) acc 56.2500 (72.7083) lr 1.5878e-03 eta 6:28:32 +epoch [17/50] batch [80/500] time 1.357 (1.377) data 0.000 (0.011) loss 0.5156 (1.0709) acc 81.2500 (72.5781) lr 1.5878e-03 eta 6:28:15 +epoch [17/50] batch [85/500] time 1.334 (1.376) data 0.001 (0.010) loss 1.6963 (1.0882) acc 68.7500 (72.2426) lr 1.5878e-03 eta 6:27:59 +epoch [17/50] batch [90/500] time 1.377 (1.376) data 0.001 (0.010) loss 0.7520 (1.0941) acc 84.3750 (72.2917) lr 1.5878e-03 eta 6:27:48 +epoch [17/50] batch [95/500] time 1.348 (1.377) data 0.000 (0.009) loss 1.4736 (1.0934) acc 65.6250 (72.4013) lr 1.5878e-03 eta 6:27:53 +epoch [17/50] batch [100/500] time 1.390 (1.376) data 0.000 (0.009) loss 1.2568 (1.1048) acc 59.3750 (72.1250) lr 1.5878e-03 eta 6:27:37 +epoch [17/50] batch [105/500] time 1.379 (1.376) data 0.000 (0.008) loss 1.1562 (1.1047) acc 65.6250 (72.0833) lr 1.5878e-03 eta 6:27:26 +epoch [17/50] batch [110/500] time 1.343 (1.375) data 0.001 (0.008) loss 0.6143 (1.1009) acc 81.2500 (72.1023) lr 1.5878e-03 eta 6:27:10 +epoch [17/50] batch [115/500] time 1.367 (1.375) data 0.000 (0.008) loss 0.9741 (1.0948) acc 71.8750 (72.1739) lr 1.5878e-03 eta 6:26:56 +epoch [17/50] batch [120/500] time 1.378 (1.375) data 0.001 (0.007) loss 0.7515 (1.0923) acc 81.2500 (72.3698) lr 1.5878e-03 eta 6:26:47 +epoch [17/50] batch [125/500] time 1.361 (1.374) data 0.000 (0.007) loss 1.3623 (1.1020) acc 65.6250 (72.1250) lr 1.5878e-03 eta 6:26:30 +epoch [17/50] batch [130/500] time 1.344 (1.373) data 0.000 (0.007) loss 1.0107 (1.0951) acc 75.0000 (72.2837) lr 1.5878e-03 eta 6:26:09 +epoch [17/50] batch [135/500] time 1.357 (1.373) data 0.001 (0.007) loss 0.9165 (1.0883) acc 78.1250 (72.3843) lr 1.5878e-03 eta 6:25:53 +epoch [17/50] batch [140/500] time 1.373 (1.373) data 0.000 (0.006) loss 0.6865 (1.0904) acc 84.3750 (72.3661) lr 1.5878e-03 eta 6:25:43 +epoch [17/50] batch [145/500] time 1.355 (1.372) data 0.001 (0.006) loss 0.8550 (1.0925) acc 78.1250 (72.1336) lr 1.5878e-03 eta 6:25:25 +epoch [17/50] batch [150/500] time 1.356 (1.372) data 0.000 (0.006) loss 1.5967 (1.0957) acc 59.3750 (72.1042) lr 1.5878e-03 eta 6:25:12 +epoch [17/50] batch [155/500] time 1.352 (1.371) data 0.000 (0.006) loss 0.8896 (1.1006) acc 71.8750 (72.0161) lr 1.5878e-03 eta 6:24:59 +epoch [17/50] batch [160/500] time 1.368 (1.371) data 0.001 (0.006) loss 1.4580 (1.1080) acc 56.2500 (71.7383) lr 1.5878e-03 eta 6:24:46 +epoch [17/50] batch [165/500] time 1.351 (1.371) data 0.000 (0.006) loss 0.9731 (1.1040) acc 75.0000 (71.8182) lr 1.5878e-03 eta 6:24:36 +epoch [17/50] batch [170/500] time 1.360 (1.370) data 0.000 (0.005) loss 1.1797 (1.0996) acc 75.0000 (71.9669) lr 1.5878e-03 eta 6:24:23 +epoch [17/50] batch [175/500] time 1.362 (1.370) data 0.001 (0.005) loss 1.7100 (1.1012) acc 62.5000 (71.9464) lr 1.5878e-03 eta 6:24:14 +epoch [17/50] batch [180/500] time 1.371 (1.370) data 0.000 (0.005) loss 1.2656 (1.1019) acc 65.6250 (71.9271) lr 1.5878e-03 eta 6:24:04 +epoch [17/50] batch [185/500] time 1.341 (1.369) data 0.000 (0.005) loss 0.7607 (1.1026) acc 78.1250 (71.9595) lr 1.5878e-03 eta 6:23:46 +epoch [17/50] batch [190/500] time 1.507 (1.370) data 0.000 (0.005) loss 1.7520 (1.1041) acc 68.7500 (72.0230) lr 1.5878e-03 eta 6:23:43 +epoch [17/50] batch [195/500] time 1.369 (1.369) data 0.000 (0.005) loss 0.5225 (1.0974) acc 87.5000 (72.1635) lr 1.5878e-03 eta 6:23:32 +epoch [17/50] batch [200/500] time 1.357 (1.369) data 0.000 (0.005) loss 1.0439 (1.0994) acc 71.8750 (72.1250) lr 1.5878e-03 eta 6:23:18 +epoch [17/50] batch [205/500] time 1.353 (1.369) data 0.000 (0.005) loss 1.2510 (1.0992) acc 71.8750 (72.0732) lr 1.5878e-03 eta 6:23:05 +epoch [17/50] batch [210/500] time 1.341 (1.368) data 0.000 (0.004) loss 1.1836 (1.1018) acc 62.5000 (72.0238) lr 1.5878e-03 eta 6:22:52 +epoch [17/50] batch [215/500] time 1.365 (1.368) data 0.000 (0.004) loss 1.4014 (1.1071) acc 68.7500 (71.8314) lr 1.5878e-03 eta 6:22:42 +epoch [17/50] batch [220/500] time 1.369 (1.368) data 0.000 (0.004) loss 0.8047 (1.1015) acc 81.2500 (72.0028) lr 1.5878e-03 eta 6:22:34 +epoch [17/50] batch [225/500] time 1.363 (1.368) data 0.000 (0.004) loss 1.7471 (1.1020) acc 68.7500 (72.0833) lr 1.5878e-03 eta 6:22:28 +epoch [17/50] batch [230/500] time 1.363 (1.368) data 0.000 (0.004) loss 1.3369 (1.1013) acc 59.3750 (72.0924) lr 1.5878e-03 eta 6:22:23 +epoch [17/50] batch [235/500] time 1.361 (1.369) data 0.000 (0.004) loss 0.6577 (1.0986) acc 81.2500 (72.1543) lr 1.5878e-03 eta 6:22:23 +epoch [17/50] batch [240/500] time 1.346 (1.368) data 0.000 (0.004) loss 0.9673 (1.0993) acc 78.1250 (72.0964) lr 1.5878e-03 eta 6:22:11 +epoch [17/50] batch [245/500] time 1.374 (1.368) data 0.000 (0.004) loss 1.6484 (1.0992) acc 65.6250 (72.0918) lr 1.5878e-03 eta 6:22:01 +epoch [17/50] batch [250/500] time 1.380 (1.368) data 0.000 (0.004) loss 1.1592 (1.1023) acc 71.8750 (72.0375) lr 1.5878e-03 eta 6:21:53 +epoch [17/50] batch [255/500] time 1.366 (1.368) data 0.000 (0.004) loss 0.9810 (1.1055) acc 71.8750 (72.0221) lr 1.5878e-03 eta 6:21:47 +epoch [17/50] batch [260/500] time 1.346 (1.368) data 0.000 (0.004) loss 0.6177 (1.1072) acc 81.2500 (72.0192) lr 1.5878e-03 eta 6:21:36 +epoch [17/50] batch [265/500] time 1.385 (1.368) data 0.000 (0.004) loss 1.1270 (1.1126) acc 71.8750 (71.9693) lr 1.5878e-03 eta 6:21:27 +epoch [17/50] batch [270/500] time 1.354 (1.367) data 0.000 (0.004) loss 0.8794 (1.1105) acc 75.0000 (72.0255) lr 1.5878e-03 eta 6:21:17 +epoch [17/50] batch [275/500] time 1.343 (1.367) data 0.000 (0.003) loss 1.6650 (1.1180) acc 68.7500 (71.9091) lr 1.5878e-03 eta 6:21:04 +epoch [17/50] batch [280/500] time 1.351 (1.367) data 0.000 (0.003) loss 1.7432 (1.1229) acc 56.2500 (71.8080) lr 1.5878e-03 eta 6:20:50 +epoch [17/50] batch [285/500] time 1.365 (1.366) data 0.000 (0.003) loss 1.1025 (1.1234) acc 75.0000 (71.8640) lr 1.5878e-03 eta 6:20:40 +epoch [17/50] batch [290/500] time 1.367 (1.366) data 0.000 (0.003) loss 1.1641 (1.1245) acc 71.8750 (71.8642) lr 1.5878e-03 eta 6:20:32 +epoch [17/50] batch [295/500] time 1.345 (1.366) data 0.000 (0.003) loss 1.5029 (1.1311) acc 71.8750 (71.7691) lr 1.5878e-03 eta 6:20:22 +epoch [17/50] batch [300/500] time 1.341 (1.366) data 0.000 (0.003) loss 1.6641 (1.1318) acc 53.1250 (71.7917) lr 1.5878e-03 eta 6:20:13 +epoch [17/50] batch [305/500] time 1.340 (1.366) data 0.000 (0.003) loss 0.8135 (1.1326) acc 68.7500 (71.7316) lr 1.5878e-03 eta 6:20:05 +epoch [17/50] batch [310/500] time 1.361 (1.366) data 0.000 (0.003) loss 1.2021 (1.1304) acc 68.7500 (71.7540) lr 1.5878e-03 eta 6:19:55 +epoch [17/50] batch [315/500] time 1.369 (1.366) data 0.000 (0.003) loss 1.1865 (1.1319) acc 65.6250 (71.7163) lr 1.5878e-03 eta 6:19:49 +epoch [17/50] batch [320/500] time 1.377 (1.366) data 0.000 (0.003) loss 1.0342 (1.1271) acc 75.0000 (71.8262) lr 1.5878e-03 eta 6:19:40 +epoch [17/50] batch [325/500] time 1.348 (1.365) data 0.000 (0.003) loss 0.9170 (1.1276) acc 75.0000 (71.7885) lr 1.5878e-03 eta 6:19:29 +epoch [17/50] batch [330/500] time 1.377 (1.365) data 0.000 (0.003) loss 0.5396 (1.1242) acc 81.2500 (71.8750) lr 1.5878e-03 eta 6:19:19 +epoch [17/50] batch [335/500] time 1.343 (1.366) data 0.000 (0.003) loss 1.2139 (1.1237) acc 59.3750 (71.8563) lr 1.5878e-03 eta 6:19:16 +epoch [17/50] batch [340/500] time 1.361 (1.365) data 0.000 (0.003) loss 1.2949 (1.1301) acc 62.5000 (71.7188) lr 1.5878e-03 eta 6:19:07 +epoch [17/50] batch [345/500] time 1.339 (1.365) data 0.000 (0.003) loss 1.0322 (1.1266) acc 78.1250 (71.7935) lr 1.5878e-03 eta 6:18:56 +epoch [17/50] batch [350/500] time 1.372 (1.365) data 0.000 (0.003) loss 1.0322 (1.1227) acc 65.6250 (71.8482) lr 1.5878e-03 eta 6:18:47 +epoch [17/50] batch [355/500] time 1.357 (1.365) data 0.000 (0.003) loss 1.1133 (1.1240) acc 71.8750 (71.8134) lr 1.5878e-03 eta 6:18:39 +epoch [17/50] batch [360/500] time 1.338 (1.365) data 0.000 (0.003) loss 0.7695 (1.1236) acc 78.1250 (71.8229) lr 1.5878e-03 eta 6:18:28 +epoch [17/50] batch [365/500] time 1.359 (1.365) data 0.000 (0.003) loss 1.0732 (1.1223) acc 78.1250 (71.8493) lr 1.5878e-03 eta 6:18:20 +epoch [17/50] batch [370/500] time 1.370 (1.365) data 0.000 (0.003) loss 0.8750 (1.1200) acc 75.0000 (71.8834) lr 1.5878e-03 eta 6:18:12 +epoch [17/50] batch [375/500] time 1.328 (1.364) data 0.000 (0.003) loss 1.3105 (1.1205) acc 53.1250 (71.7917) lr 1.5878e-03 eta 6:18:03 +epoch [17/50] batch [380/500] time 1.356 (1.365) data 0.000 (0.003) loss 0.6533 (1.1219) acc 84.3750 (71.7681) lr 1.5878e-03 eta 6:18:00 +epoch [17/50] batch [385/500] time 1.386 (1.365) data 0.000 (0.003) loss 0.6289 (1.1219) acc 81.2500 (71.7695) lr 1.5878e-03 eta 6:17:51 +epoch [17/50] batch [390/500] time 1.353 (1.364) data 0.000 (0.003) loss 1.0488 (1.1224) acc 81.2500 (71.7067) lr 1.5878e-03 eta 6:17:43 +epoch [17/50] batch [395/500] time 1.348 (1.364) data 0.000 (0.003) loss 1.0928 (1.1201) acc 81.2500 (71.7722) lr 1.5878e-03 eta 6:17:36 +epoch [17/50] batch [400/500] time 1.354 (1.364) data 0.000 (0.003) loss 0.7100 (1.1183) acc 81.2500 (71.8203) lr 1.5878e-03 eta 6:17:25 +epoch [17/50] batch [405/500] time 1.354 (1.364) data 0.000 (0.002) loss 0.6343 (1.1191) acc 84.3750 (71.8519) lr 1.5878e-03 eta 6:17:16 +epoch [17/50] batch [410/500] time 1.365 (1.364) data 0.000 (0.002) loss 0.9463 (1.1184) acc 75.0000 (71.8674) lr 1.5878e-03 eta 6:17:09 +epoch [17/50] batch [415/500] time 1.349 (1.364) data 0.000 (0.002) loss 1.0811 (1.1167) acc 75.0000 (71.8901) lr 1.5878e-03 eta 6:17:01 +epoch [17/50] batch [420/500] time 1.353 (1.364) data 0.000 (0.002) loss 1.1885 (1.1193) acc 75.0000 (71.8378) lr 1.5878e-03 eta 6:16:54 +epoch [17/50] batch [425/500] time 1.358 (1.364) data 0.000 (0.002) loss 1.3096 (1.1203) acc 71.8750 (71.8309) lr 1.5878e-03 eta 6:16:45 +epoch [17/50] batch [430/500] time 1.377 (1.364) data 0.000 (0.002) loss 1.2139 (1.1178) acc 62.5000 (71.8314) lr 1.5878e-03 eta 6:16:36 +epoch [17/50] batch [435/500] time 1.359 (1.364) data 0.000 (0.002) loss 1.4121 (1.1211) acc 68.7500 (71.7960) lr 1.5878e-03 eta 6:16:31 +epoch [17/50] batch [440/500] time 1.369 (1.364) data 0.000 (0.002) loss 0.9800 (1.1214) acc 68.7500 (71.8182) lr 1.5878e-03 eta 6:16:25 +epoch [17/50] batch [445/500] time 1.354 (1.364) data 0.000 (0.002) loss 1.2910 (1.1204) acc 68.7500 (71.8329) lr 1.5878e-03 eta 6:16:18 +epoch [17/50] batch [450/500] time 1.388 (1.364) data 0.000 (0.002) loss 1.3320 (1.1261) acc 68.7500 (71.7431) lr 1.5878e-03 eta 6:16:12 +epoch [17/50] batch [455/500] time 1.345 (1.364) data 0.000 (0.002) loss 1.1719 (1.1268) acc 71.8750 (71.6964) lr 1.5878e-03 eta 6:16:06 +epoch [17/50] batch [460/500] time 1.387 (1.364) data 0.000 (0.002) loss 0.8867 (1.1253) acc 62.5000 (71.6848) lr 1.5878e-03 eta 6:16:01 +epoch [17/50] batch [465/500] time 1.357 (1.364) data 0.000 (0.002) loss 0.9160 (1.1256) acc 75.0000 (71.7003) lr 1.5878e-03 eta 6:15:54 +epoch [17/50] batch [470/500] time 1.375 (1.364) data 0.000 (0.002) loss 1.2617 (1.1248) acc 75.0000 (71.7553) lr 1.5878e-03 eta 6:15:48 +epoch [17/50] batch [475/500] time 1.367 (1.364) data 0.000 (0.002) loss 1.1143 (1.1246) acc 71.8750 (71.7763) lr 1.5878e-03 eta 6:15:41 +epoch [17/50] batch [480/500] time 1.354 (1.364) data 0.000 (0.002) loss 1.0312 (1.1222) acc 71.8750 (71.8099) lr 1.5878e-03 eta 6:15:39 +epoch [17/50] batch [485/500] time 1.366 (1.364) data 0.001 (0.002) loss 1.4268 (1.1225) acc 65.6250 (71.7977) lr 1.5878e-03 eta 6:15:33 +epoch [17/50] batch [490/500] time 1.356 (1.364) data 0.000 (0.002) loss 0.4888 (1.1205) acc 81.2500 (71.8431) lr 1.5878e-03 eta 6:15:26 +epoch [17/50] batch [495/500] time 1.363 (1.364) data 0.000 (0.002) loss 1.4199 (1.1207) acc 65.6250 (71.8497) lr 1.5878e-03 eta 6:15:18 +epoch [17/50] batch [500/500] time 1.341 (1.364) data 0.000 (0.002) loss 1.0293 (1.1201) acc 75.0000 (71.8875) lr 1.5358e-03 eta 6:15:09 +epoch [18/50] batch [5/500] time 1.337 (1.531) data 0.001 (0.173) loss 0.8643 (1.1965) acc 81.2500 (72.5000) lr 1.5358e-03 eta 7:00:46 +epoch [18/50] batch [10/500] time 1.358 (1.447) data 0.000 (0.087) loss 0.5210 (1.0563) acc 84.3750 (73.7500) lr 1.5358e-03 eta 6:37:40 +epoch [18/50] batch [15/500] time 1.352 (1.415) data 0.000 (0.058) loss 1.1348 (1.0751) acc 75.0000 (73.7500) lr 1.5358e-03 eta 6:28:53 +epoch [18/50] batch [20/500] time 1.349 (1.400) data 0.000 (0.044) loss 1.0479 (1.0694) acc 71.8750 (72.6562) lr 1.5358e-03 eta 6:24:37 +epoch [18/50] batch [25/500] time 1.364 (1.392) data 0.000 (0.035) loss 0.6265 (1.0809) acc 81.2500 (73.3750) lr 1.5358e-03 eta 6:22:14 +epoch [18/50] batch [30/500] time 1.364 (1.392) data 0.000 (0.029) loss 0.7925 (1.0576) acc 81.2500 (74.0625) lr 1.5358e-03 eta 6:22:08 +epoch [18/50] batch [35/500] time 1.351 (1.387) data 0.000 (0.025) loss 1.2480 (1.0813) acc 68.7500 (73.4821) lr 1.5358e-03 eta 6:20:36 +epoch [18/50] batch [40/500] time 1.352 (1.383) data 0.000 (0.022) loss 1.0771 (1.0711) acc 62.5000 (73.5156) lr 1.5358e-03 eta 6:19:31 +epoch [18/50] batch [45/500] time 1.344 (1.380) data 0.001 (0.020) loss 1.4629 (1.0605) acc 62.5000 (73.6111) lr 1.5358e-03 eta 6:18:31 +epoch [18/50] batch [50/500] time 1.361 (1.378) data 0.000 (0.018) loss 1.3984 (1.0543) acc 68.7500 (73.7500) lr 1.5358e-03 eta 6:17:43 +epoch [18/50] batch [55/500] time 1.366 (1.376) data 0.000 (0.016) loss 0.7715 (1.0289) acc 81.2500 (74.5455) lr 1.5358e-03 eta 6:17:03 +epoch [18/50] batch [60/500] time 1.362 (1.374) data 0.001 (0.015) loss 0.8179 (1.0283) acc 65.6250 (74.2708) lr 1.5358e-03 eta 6:16:28 +epoch [18/50] batch [65/500] time 1.357 (1.373) data 0.000 (0.014) loss 1.3398 (1.0304) acc 62.5000 (74.1346) lr 1.5358e-03 eta 6:16:05 +epoch [18/50] batch [70/500] time 1.351 (1.373) data 0.000 (0.013) loss 1.2559 (1.0315) acc 71.8750 (74.0625) lr 1.5358e-03 eta 6:15:50 +epoch [18/50] batch [75/500] time 1.376 (1.374) data 0.000 (0.012) loss 0.9619 (1.0294) acc 78.1250 (74.3333) lr 1.5358e-03 eta 6:16:09 +epoch [18/50] batch [80/500] time 1.372 (1.373) data 0.001 (0.011) loss 0.6914 (1.0329) acc 78.1250 (74.1016) lr 1.5358e-03 eta 6:15:52 +epoch [18/50] batch [85/500] time 1.366 (1.373) data 0.000 (0.011) loss 1.3545 (1.0517) acc 68.7500 (73.7132) lr 1.5358e-03 eta 6:15:41 +epoch [18/50] batch [90/500] time 1.363 (1.373) data 0.000 (0.010) loss 1.7334 (1.0675) acc 59.3750 (73.4028) lr 1.5358e-03 eta 6:15:34 +epoch [18/50] batch [95/500] time 1.387 (1.373) data 0.000 (0.010) loss 1.1045 (1.0745) acc 78.1250 (73.2566) lr 1.5358e-03 eta 6:15:16 +epoch [18/50] batch [100/500] time 1.372 (1.373) data 0.001 (0.009) loss 1.4922 (1.0806) acc 65.6250 (72.9375) lr 1.5358e-03 eta 6:15:11 +epoch [18/50] batch [105/500] time 1.387 (1.372) data 0.001 (0.009) loss 0.8193 (1.0753) acc 78.1250 (73.0655) lr 1.5358e-03 eta 6:14:56 +epoch [18/50] batch [110/500] time 1.373 (1.372) data 0.001 (0.008) loss 1.4365 (1.0916) acc 68.7500 (72.7557) lr 1.5358e-03 eta 6:14:44 +epoch [18/50] batch [115/500] time 1.359 (1.371) data 0.000 (0.008) loss 0.7139 (1.0900) acc 81.2500 (72.7717) lr 1.5358e-03 eta 6:14:26 +epoch [18/50] batch [120/500] time 1.340 (1.371) data 0.000 (0.008) loss 1.3398 (1.1003) acc 65.6250 (72.4219) lr 1.5358e-03 eta 6:14:12 +epoch [18/50] batch [125/500] time 1.358 (1.370) data 0.000 (0.007) loss 0.9883 (1.0960) acc 71.8750 (72.5250) lr 1.5358e-03 eta 6:13:52 +epoch [18/50] batch [130/500] time 1.355 (1.369) data 0.000 (0.007) loss 0.7993 (1.0887) acc 81.2500 (72.5721) lr 1.5358e-03 eta 6:13:35 +epoch [18/50] batch [135/500] time 1.351 (1.369) data 0.000 (0.007) loss 1.1865 (1.0905) acc 65.6250 (72.3611) lr 1.5358e-03 eta 6:13:22 +epoch [18/50] batch [140/500] time 1.369 (1.368) data 0.000 (0.007) loss 1.5840 (1.0869) acc 65.6250 (72.3438) lr 1.5358e-03 eta 6:13:07 +epoch [18/50] batch [145/500] time 1.354 (1.368) data 0.000 (0.006) loss 0.9424 (1.0884) acc 78.1250 (72.3707) lr 1.5358e-03 eta 6:12:50 +epoch [18/50] batch [150/500] time 1.356 (1.368) data 0.000 (0.006) loss 1.8730 (1.0987) acc 43.7500 (72.1667) lr 1.5358e-03 eta 6:12:40 +epoch [18/50] batch [155/500] time 1.367 (1.367) data 0.000 (0.006) loss 1.2275 (1.1088) acc 56.2500 (71.8952) lr 1.5358e-03 eta 6:12:30 +epoch [18/50] batch [160/500] time 1.364 (1.367) data 0.000 (0.006) loss 1.2793 (1.1092) acc 68.7500 (71.8164) lr 1.5358e-03 eta 6:12:19 +epoch [18/50] batch [165/500] time 1.341 (1.367) data 0.000 (0.006) loss 0.4946 (1.1094) acc 87.5000 (71.9318) lr 1.5358e-03 eta 6:12:06 +epoch [18/50] batch [170/500] time 1.345 (1.366) data 0.000 (0.006) loss 1.2939 (1.1113) acc 68.7500 (71.9485) lr 1.5358e-03 eta 6:11:52 +epoch [18/50] batch [175/500] time 1.351 (1.367) data 0.000 (0.005) loss 1.0537 (1.1140) acc 81.2500 (71.9464) lr 1.5358e-03 eta 6:11:53 +epoch [18/50] batch [180/500] time 1.328 (1.366) data 0.000 (0.005) loss 1.2031 (1.1134) acc 65.6250 (71.9965) lr 1.5358e-03 eta 6:11:38 +epoch [18/50] batch [185/500] time 1.352 (1.366) data 0.000 (0.005) loss 0.7070 (1.1036) acc 87.5000 (72.2804) lr 1.5358e-03 eta 6:11:26 +epoch [18/50] batch [190/500] time 1.351 (1.366) data 0.000 (0.005) loss 1.3135 (1.1017) acc 75.0000 (72.3520) lr 1.5358e-03 eta 6:11:20 +epoch [18/50] batch [195/500] time 1.363 (1.366) data 0.001 (0.005) loss 1.2969 (1.1025) acc 65.6250 (72.3237) lr 1.5358e-03 eta 6:11:11 +epoch [18/50] batch [200/500] time 1.359 (1.366) data 0.000 (0.005) loss 0.9336 (1.1031) acc 75.0000 (72.4062) lr 1.5358e-03 eta 6:11:03 +epoch [18/50] batch [205/500] time 1.360 (1.365) data 0.000 (0.005) loss 0.6904 (1.0976) acc 78.1250 (72.4390) lr 1.5358e-03 eta 6:10:50 +epoch [18/50] batch [210/500] time 1.376 (1.366) data 0.000 (0.005) loss 0.7383 (1.0975) acc 75.0000 (72.4405) lr 1.5358e-03 eta 6:10:44 +epoch [18/50] batch [215/500] time 1.483 (1.366) data 0.000 (0.004) loss 1.1250 (1.0940) acc 78.1250 (72.5291) lr 1.5358e-03 eta 6:10:43 +epoch [18/50] batch [220/500] time 1.388 (1.366) data 0.000 (0.004) loss 1.5596 (1.0936) acc 65.6250 (72.5000) lr 1.5358e-03 eta 6:10:40 +epoch [18/50] batch [225/500] time 1.351 (1.366) data 0.000 (0.004) loss 1.0928 (1.0925) acc 71.8750 (72.5972) lr 1.5358e-03 eta 6:10:27 +epoch [18/50] batch [230/500] time 1.338 (1.365) data 0.000 (0.004) loss 0.9302 (1.0915) acc 71.8750 (72.5815) lr 1.5358e-03 eta 6:10:16 +epoch [18/50] batch [235/500] time 1.348 (1.365) data 0.000 (0.004) loss 0.9590 (1.0913) acc 75.0000 (72.5931) lr 1.5358e-03 eta 6:10:05 +epoch [18/50] batch [240/500] time 1.370 (1.365) data 0.000 (0.004) loss 1.2188 (1.0913) acc 68.7500 (72.5781) lr 1.5358e-03 eta 6:09:57 +epoch [18/50] batch [245/500] time 1.369 (1.365) data 0.000 (0.004) loss 1.0850 (1.0937) acc 62.5000 (72.5128) lr 1.5358e-03 eta 6:09:51 +epoch [18/50] batch [250/500] time 1.344 (1.365) data 0.000 (0.004) loss 0.9971 (1.0930) acc 75.0000 (72.5875) lr 1.5358e-03 eta 6:09:40 +epoch [18/50] batch [255/500] time 1.359 (1.365) data 0.000 (0.004) loss 1.0264 (1.0954) acc 75.0000 (72.6103) lr 1.5358e-03 eta 6:09:34 +epoch [18/50] batch [260/500] time 1.357 (1.365) data 0.000 (0.004) loss 1.3115 (1.1036) acc 68.7500 (72.4399) lr 1.5358e-03 eta 6:09:26 +epoch [18/50] batch [265/500] time 1.368 (1.365) data 0.000 (0.004) loss 2.3965 (1.1048) acc 56.2500 (72.4528) lr 1.5358e-03 eta 6:09:16 +epoch [18/50] batch [270/500] time 1.369 (1.365) data 0.000 (0.004) loss 1.0312 (1.1046) acc 75.0000 (72.5231) lr 1.5358e-03 eta 6:09:09 +epoch [18/50] batch [275/500] time 1.363 (1.365) data 0.000 (0.004) loss 1.3721 (1.1057) acc 68.7500 (72.5227) lr 1.5358e-03 eta 6:09:00 +epoch [18/50] batch [280/500] time 1.380 (1.365) data 0.000 (0.003) loss 0.6172 (1.1044) acc 71.8750 (72.5781) lr 1.5358e-03 eta 6:08:54 +epoch [18/50] batch [285/500] time 1.360 (1.365) data 0.000 (0.003) loss 1.1270 (1.1042) acc 68.7500 (72.5219) lr 1.5358e-03 eta 6:08:48 +epoch [18/50] batch [290/500] time 1.367 (1.365) data 0.000 (0.003) loss 0.8481 (1.1015) acc 75.0000 (72.5754) lr 1.5358e-03 eta 6:08:41 +epoch [18/50] batch [295/500] time 1.339 (1.364) data 0.001 (0.003) loss 0.8584 (1.1005) acc 65.6250 (72.5212) lr 1.5358e-03 eta 6:08:31 +epoch [18/50] batch [300/500] time 1.367 (1.364) data 0.000 (0.003) loss 1.5801 (1.1049) acc 65.6250 (72.3646) lr 1.5358e-03 eta 6:08:22 +epoch [18/50] batch [305/500] time 1.354 (1.364) data 0.001 (0.003) loss 1.0293 (1.1069) acc 62.5000 (72.2951) lr 1.5358e-03 eta 6:08:15 +epoch [18/50] batch [310/500] time 1.345 (1.364) data 0.000 (0.003) loss 0.6968 (1.1047) acc 81.2500 (72.2782) lr 1.5358e-03 eta 6:08:07 +epoch [18/50] batch [315/500] time 1.343 (1.365) data 0.000 (0.003) loss 1.3027 (1.1065) acc 78.1250 (72.2718) lr 1.5358e-03 eta 6:08:07 +epoch [18/50] batch [320/500] time 1.362 (1.365) data 0.000 (0.003) loss 0.6362 (1.1053) acc 78.1250 (72.2461) lr 1.5358e-03 eta 6:07:59 +epoch [18/50] batch [325/500] time 1.349 (1.364) data 0.000 (0.003) loss 1.3057 (1.1031) acc 75.0000 (72.3269) lr 1.5358e-03 eta 6:07:50 +epoch [18/50] batch [330/500] time 1.349 (1.364) data 0.001 (0.003) loss 0.8999 (1.1050) acc 81.2500 (72.3011) lr 1.5358e-03 eta 6:07:39 +epoch [18/50] batch [335/500] time 1.392 (1.364) data 0.000 (0.003) loss 1.4229 (1.1068) acc 62.5000 (72.2015) lr 1.5358e-03 eta 6:07:33 +epoch [18/50] batch [340/500] time 1.362 (1.364) data 0.000 (0.003) loss 1.1113 (1.1072) acc 65.6250 (72.1507) lr 1.5358e-03 eta 6:07:27 +epoch [18/50] batch [345/500] time 1.360 (1.364) data 0.000 (0.003) loss 1.0596 (1.1066) acc 71.8750 (72.1377) lr 1.5358e-03 eta 6:07:20 +epoch [18/50] batch [350/500] time 1.357 (1.364) data 0.000 (0.003) loss 1.5107 (1.1070) acc 65.6250 (72.1339) lr 1.5358e-03 eta 6:07:13 +epoch [18/50] batch [355/500] time 1.355 (1.364) data 0.000 (0.003) loss 2.2266 (1.1127) acc 46.8750 (72.0335) lr 1.5358e-03 eta 6:07:05 +epoch [18/50] batch [360/500] time 1.342 (1.364) data 0.000 (0.003) loss 0.7075 (1.1104) acc 71.8750 (72.0747) lr 1.5358e-03 eta 6:07:01 +epoch [18/50] batch [365/500] time 1.333 (1.364) data 0.000 (0.003) loss 1.7803 (1.1132) acc 50.0000 (72.0377) lr 1.5358e-03 eta 6:06:52 +epoch [18/50] batch [370/500] time 1.362 (1.364) data 0.000 (0.003) loss 1.5332 (1.1150) acc 56.2500 (71.9848) lr 1.5358e-03 eta 6:06:44 +epoch [18/50] batch [375/500] time 1.349 (1.364) data 0.000 (0.003) loss 1.0938 (1.1144) acc 68.7500 (72.0000) lr 1.5358e-03 eta 6:06:35 +epoch [18/50] batch [380/500] time 1.343 (1.364) data 0.000 (0.003) loss 0.9077 (1.1125) acc 71.8750 (72.0066) lr 1.5358e-03 eta 6:06:26 +epoch [18/50] batch [385/500] time 1.353 (1.364) data 0.000 (0.003) loss 1.1924 (1.1138) acc 68.7500 (71.9399) lr 1.5358e-03 eta 6:06:18 +epoch [18/50] batch [390/500] time 1.364 (1.364) data 0.001 (0.003) loss 0.9536 (1.1135) acc 81.2500 (71.9311) lr 1.5358e-03 eta 6:06:09 +epoch [18/50] batch [395/500] time 1.371 (1.364) data 0.000 (0.003) loss 0.9287 (1.1108) acc 78.1250 (71.9699) lr 1.5358e-03 eta 6:06:03 +epoch [18/50] batch [400/500] time 1.353 (1.364) data 0.000 (0.003) loss 1.5967 (1.1100) acc 62.5000 (72.0156) lr 1.5358e-03 eta 6:05:55 +epoch [18/50] batch [405/500] time 1.366 (1.364) data 0.000 (0.003) loss 1.5801 (1.1116) acc 62.5000 (72.0525) lr 1.5358e-03 eta 6:05:48 +epoch [18/50] batch [410/500] time 1.379 (1.364) data 0.000 (0.003) loss 1.0254 (1.1120) acc 65.6250 (72.0198) lr 1.5358e-03 eta 6:05:42 +epoch [18/50] batch [415/500] time 1.342 (1.364) data 0.000 (0.002) loss 0.9995 (1.1110) acc 71.8750 (72.0181) lr 1.5358e-03 eta 6:05:33 +epoch [18/50] batch [420/500] time 1.349 (1.363) data 0.000 (0.002) loss 1.1680 (1.1151) acc 68.7500 (71.9494) lr 1.5358e-03 eta 6:05:25 +epoch [18/50] batch [425/500] time 1.353 (1.363) data 0.000 (0.002) loss 1.0410 (1.1146) acc 78.1250 (72.0000) lr 1.5358e-03 eta 6:05:17 +epoch [18/50] batch [430/500] time 1.364 (1.363) data 0.000 (0.002) loss 0.6533 (1.1146) acc 78.1250 (71.9913) lr 1.5358e-03 eta 6:05:10 +epoch [18/50] batch [435/500] time 1.365 (1.363) data 0.000 (0.002) loss 0.8345 (1.1136) acc 84.3750 (72.0474) lr 1.5358e-03 eta 6:05:03 +epoch [18/50] batch [440/500] time 1.339 (1.363) data 0.000 (0.002) loss 1.3174 (1.1147) acc 75.0000 (72.0170) lr 1.5358e-03 eta 6:04:53 +epoch [18/50] batch [445/500] time 1.356 (1.363) data 0.000 (0.002) loss 1.4980 (1.1161) acc 65.6250 (72.0014) lr 1.5358e-03 eta 6:04:44 +epoch [18/50] batch [450/500] time 1.362 (1.363) data 0.000 (0.002) loss 1.3047 (1.1177) acc 59.3750 (71.9722) lr 1.5358e-03 eta 6:04:36 +epoch [18/50] batch [455/500] time 1.347 (1.363) data 0.000 (0.002) loss 1.1006 (1.1192) acc 71.8750 (71.9299) lr 1.5358e-03 eta 6:04:29 +epoch [18/50] batch [460/500] time 1.345 (1.363) data 0.000 (0.002) loss 1.0508 (1.1215) acc 75.0000 (71.8750) lr 1.5358e-03 eta 6:04:24 +epoch [18/50] batch [465/500] time 1.355 (1.363) data 0.000 (0.002) loss 1.0020 (1.1243) acc 75.0000 (71.8616) lr 1.5358e-03 eta 6:04:17 +epoch [18/50] batch [470/500] time 1.339 (1.363) data 0.000 (0.002) loss 1.1514 (1.1229) acc 78.1250 (71.8816) lr 1.5358e-03 eta 6:04:08 +epoch [18/50] batch [475/500] time 1.387 (1.363) data 0.000 (0.002) loss 1.1787 (1.1232) acc 75.0000 (71.8882) lr 1.5358e-03 eta 6:04:03 +epoch [18/50] batch [480/500] time 1.365 (1.363) data 0.000 (0.002) loss 1.4951 (1.1238) acc 62.5000 (71.8815) lr 1.5358e-03 eta 6:03:56 +epoch [18/50] batch [485/500] time 1.376 (1.363) data 0.001 (0.002) loss 0.8096 (1.1258) acc 84.3750 (71.8299) lr 1.5358e-03 eta 6:03:48 +epoch [18/50] batch [490/500] time 1.352 (1.363) data 0.000 (0.002) loss 0.8135 (1.1261) acc 87.5000 (71.8240) lr 1.5358e-03 eta 6:03:41 +epoch [18/50] batch [495/500] time 1.352 (1.363) data 0.000 (0.002) loss 0.9229 (1.1255) acc 75.0000 (71.8497) lr 1.5358e-03 eta 6:03:34 +epoch [18/50] batch [500/500] time 1.363 (1.363) data 0.000 (0.002) loss 1.0312 (1.1263) acc 71.8750 (71.7687) lr 1.4818e-03 eta 6:03:27 +epoch [19/50] batch [5/500] time 1.355 (1.565) data 0.000 (0.153) loss 0.8677 (1.2450) acc 78.1250 (68.1250) lr 1.4818e-03 eta 6:57:09 +epoch [19/50] batch [10/500] time 1.361 (1.464) data 0.000 (0.077) loss 1.0967 (1.1798) acc 59.3750 (69.6875) lr 1.4818e-03 eta 6:30:02 +epoch [19/50] batch [15/500] time 1.351 (1.428) data 0.001 (0.051) loss 1.1631 (1.1978) acc 71.8750 (69.7917) lr 1.4818e-03 eta 6:20:31 +epoch [19/50] batch [20/500] time 1.378 (1.412) data 0.000 (0.039) loss 0.5312 (1.1727) acc 81.2500 (70.0000) lr 1.4818e-03 eta 6:16:08 +epoch [19/50] batch [25/500] time 1.367 (1.402) data 0.000 (0.031) loss 0.7798 (1.1456) acc 75.0000 (70.8750) lr 1.4818e-03 eta 6:13:17 +epoch [19/50] batch [30/500] time 1.359 (1.395) data 0.000 (0.026) loss 1.3262 (1.1332) acc 68.7500 (71.5625) lr 1.4818e-03 eta 6:11:13 +epoch [19/50] batch [35/500] time 1.345 (1.388) data 0.001 (0.022) loss 0.6558 (1.0981) acc 81.2500 (72.5893) lr 1.4818e-03 eta 6:09:26 +epoch [19/50] batch [40/500] time 1.370 (1.383) data 0.000 (0.019) loss 1.7344 (1.1473) acc 62.5000 (71.7188) lr 1.4818e-03 eta 6:07:59 +epoch [19/50] batch [45/500] time 1.477 (1.383) data 0.000 (0.017) loss 1.3057 (1.1484) acc 75.0000 (72.0139) lr 1.4818e-03 eta 6:07:51 +epoch [19/50] batch [50/500] time 1.341 (1.381) data 0.000 (0.016) loss 0.5708 (1.1233) acc 78.1250 (72.3750) lr 1.4818e-03 eta 6:07:05 +epoch [19/50] batch [55/500] time 1.374 (1.379) data 0.000 (0.014) loss 1.4922 (1.1194) acc 62.5000 (72.3864) lr 1.4818e-03 eta 6:06:26 +epoch [19/50] batch [60/500] time 1.343 (1.377) data 0.000 (0.013) loss 1.0820 (1.1005) acc 71.8750 (72.8125) lr 1.4818e-03 eta 6:05:50 +epoch [19/50] batch [65/500] time 1.357 (1.376) data 0.000 (0.012) loss 1.2627 (1.0964) acc 68.7500 (72.8846) lr 1.4818e-03 eta 6:05:18 +epoch [19/50] batch [70/500] time 1.343 (1.374) data 0.000 (0.011) loss 0.9526 (1.1025) acc 71.8750 (72.5446) lr 1.4818e-03 eta 6:04:49 +epoch [19/50] batch [75/500] time 1.345 (1.373) data 0.000 (0.011) loss 1.7363 (1.1006) acc 65.6250 (72.7917) lr 1.4818e-03 eta 6:04:22 +epoch [19/50] batch [80/500] time 1.349 (1.372) data 0.000 (0.010) loss 1.4404 (1.1009) acc 75.0000 (72.8125) lr 1.4818e-03 eta 6:03:54 +epoch [19/50] batch [85/500] time 1.365 (1.371) data 0.001 (0.009) loss 1.1543 (1.0939) acc 78.1250 (73.1250) lr 1.4818e-03 eta 6:03:35 +epoch [19/50] batch [90/500] time 1.343 (1.370) data 0.000 (0.009) loss 0.4878 (1.0916) acc 81.2500 (73.2986) lr 1.4818e-03 eta 6:03:17 +epoch [19/50] batch [95/500] time 1.356 (1.369) data 0.000 (0.008) loss 0.8359 (1.0881) acc 78.1250 (73.4868) lr 1.4818e-03 eta 6:02:57 +epoch [19/50] batch [100/500] time 1.372 (1.368) data 0.000 (0.008) loss 1.4463 (1.0909) acc 71.8750 (73.3438) lr 1.4818e-03 eta 6:02:39 +epoch [19/50] batch [105/500] time 1.347 (1.368) data 0.000 (0.008) loss 1.1729 (1.0992) acc 68.7500 (73.0060) lr 1.4818e-03 eta 6:02:23 +epoch [19/50] batch [110/500] time 1.366 (1.367) data 0.000 (0.007) loss 0.7812 (1.1106) acc 75.0000 (72.8693) lr 1.4818e-03 eta 6:02:08 +epoch [19/50] batch [115/500] time 1.360 (1.367) data 0.000 (0.007) loss 0.8208 (1.1086) acc 68.7500 (72.8533) lr 1.4818e-03 eta 6:01:57 +epoch [19/50] batch [120/500] time 1.367 (1.367) data 0.000 (0.007) loss 0.7603 (1.1079) acc 81.2500 (72.8906) lr 1.4818e-03 eta 6:01:47 +epoch [19/50] batch [125/500] time 1.372 (1.367) data 0.000 (0.006) loss 0.9106 (1.1121) acc 78.1250 (72.8000) lr 1.4818e-03 eta 6:01:39 +epoch [19/50] batch [130/500] time 1.352 (1.366) data 0.000 (0.006) loss 1.2930 (1.1209) acc 65.6250 (72.5962) lr 1.4818e-03 eta 6:01:25 +epoch [19/50] batch [135/500] time 1.376 (1.366) data 0.000 (0.006) loss 0.9385 (1.1147) acc 75.0000 (72.7315) lr 1.4818e-03 eta 6:01:15 +epoch [19/50] batch [140/500] time 1.362 (1.366) data 0.000 (0.006) loss 0.9458 (1.1137) acc 78.1250 (72.7679) lr 1.4818e-03 eta 6:01:05 +epoch [19/50] batch [145/500] time 1.372 (1.367) data 0.000 (0.006) loss 1.0674 (1.1037) acc 71.8750 (72.9526) lr 1.4818e-03 eta 6:01:13 +epoch [19/50] batch [150/500] time 1.358 (1.367) data 0.000 (0.005) loss 1.1094 (1.0957) acc 78.1250 (73.1250) lr 1.4818e-03 eta 6:01:04 +epoch [19/50] batch [155/500] time 1.363 (1.367) data 0.000 (0.005) loss 0.6172 (1.0938) acc 90.6250 (73.1250) lr 1.4818e-03 eta 6:00:55 +epoch [19/50] batch [160/500] time 1.363 (1.366) data 0.000 (0.005) loss 0.8721 (1.0952) acc 78.1250 (73.2031) lr 1.4818e-03 eta 6:00:44 +epoch [19/50] batch [165/500] time 1.351 (1.366) data 0.000 (0.005) loss 1.4727 (1.0983) acc 75.0000 (73.1439) lr 1.4818e-03 eta 6:00:34 +epoch [19/50] batch [170/500] time 1.362 (1.366) data 0.000 (0.005) loss 0.7109 (1.1003) acc 87.5000 (73.1985) lr 1.4818e-03 eta 6:00:27 +epoch [19/50] batch [175/500] time 1.346 (1.366) data 0.000 (0.005) loss 0.8521 (1.1047) acc 81.2500 (73.1607) lr 1.4818e-03 eta 6:00:14 +epoch [19/50] batch [180/500] time 1.374 (1.366) data 0.000 (0.005) loss 1.6494 (1.1057) acc 65.6250 (73.0556) lr 1.4818e-03 eta 6:00:05 +epoch [19/50] batch [185/500] time 1.344 (1.366) data 0.000 (0.004) loss 0.7783 (1.1051) acc 78.1250 (73.1081) lr 1.4818e-03 eta 5:59:55 +epoch [19/50] batch [190/500] time 1.343 (1.366) data 0.000 (0.004) loss 0.7051 (1.1040) acc 87.5000 (73.0592) lr 1.4818e-03 eta 5:59:54 +epoch [19/50] batch [195/500] time 1.372 (1.366) data 0.000 (0.004) loss 0.7876 (1.1044) acc 81.2500 (73.0449) lr 1.4818e-03 eta 5:59:50 +epoch [19/50] batch [200/500] time 1.359 (1.366) data 0.000 (0.004) loss 2.1504 (1.1100) acc 50.0000 (72.8125) lr 1.4818e-03 eta 5:59:40 +epoch [19/50] batch [205/500] time 1.349 (1.366) data 0.000 (0.004) loss 0.8823 (1.1073) acc 68.7500 (72.8049) lr 1.4818e-03 eta 5:59:32 +epoch [19/50] batch [210/500] time 1.362 (1.366) data 0.000 (0.004) loss 0.5542 (1.1069) acc 87.5000 (72.9167) lr 1.4818e-03 eta 5:59:24 +epoch [19/50] batch [215/500] time 1.378 (1.366) data 0.000 (0.004) loss 1.0225 (1.1142) acc 71.8750 (72.7907) lr 1.4818e-03 eta 5:59:17 +epoch [19/50] batch [220/500] time 1.359 (1.366) data 0.000 (0.004) loss 1.1162 (1.1158) acc 78.1250 (72.7841) lr 1.4818e-03 eta 5:59:12 +epoch [19/50] batch [225/500] time 1.341 (1.366) data 0.000 (0.004) loss 1.1221 (1.1170) acc 59.3750 (72.6667) lr 1.4818e-03 eta 5:59:02 +epoch [19/50] batch [230/500] time 1.382 (1.366) data 0.000 (0.004) loss 0.8159 (1.1160) acc 84.3750 (72.6902) lr 1.4818e-03 eta 5:58:57 +epoch [19/50] batch [235/500] time 1.341 (1.366) data 0.000 (0.004) loss 0.9570 (1.1161) acc 75.0000 (72.6995) lr 1.4818e-03 eta 5:58:49 +epoch [19/50] batch [240/500] time 1.354 (1.366) data 0.000 (0.004) loss 1.4922 (1.1171) acc 68.7500 (72.6953) lr 1.4818e-03 eta 5:58:41 +epoch [19/50] batch [245/500] time 1.379 (1.365) data 0.000 (0.003) loss 1.4629 (1.1196) acc 56.2500 (72.6403) lr 1.4818e-03 eta 5:58:30 +epoch [19/50] batch [250/500] time 1.369 (1.366) data 0.000 (0.003) loss 0.6680 (1.1195) acc 78.1250 (72.6125) lr 1.4818e-03 eta 5:58:27 +epoch [19/50] batch [255/500] time 1.366 (1.365) data 0.000 (0.003) loss 1.1641 (1.1175) acc 75.0000 (72.6103) lr 1.4818e-03 eta 5:58:19 +epoch [19/50] batch [260/500] time 1.369 (1.365) data 0.000 (0.003) loss 1.1650 (1.1186) acc 75.0000 (72.5601) lr 1.4818e-03 eta 5:58:10 +epoch [19/50] batch [265/500] time 1.371 (1.365) data 0.000 (0.003) loss 1.5146 (1.1217) acc 68.7500 (72.5825) lr 1.4818e-03 eta 5:58:05 +epoch [19/50] batch [270/500] time 1.367 (1.365) data 0.000 (0.003) loss 1.2607 (1.1219) acc 71.8750 (72.5810) lr 1.4818e-03 eta 5:57:54 +epoch [19/50] batch [275/500] time 1.368 (1.365) data 0.000 (0.003) loss 1.1992 (1.1243) acc 68.7500 (72.4773) lr 1.4818e-03 eta 5:57:47 +epoch [19/50] batch [280/500] time 1.365 (1.365) data 0.000 (0.003) loss 1.2236 (1.1232) acc 78.1250 (72.5335) lr 1.4818e-03 eta 5:57:35 +epoch [19/50] batch [285/500] time 1.350 (1.365) data 0.000 (0.003) loss 1.0361 (1.1234) acc 81.2500 (72.5000) lr 1.4818e-03 eta 5:57:27 +epoch [19/50] batch [290/500] time 1.353 (1.365) data 0.000 (0.003) loss 0.8369 (1.1192) acc 75.0000 (72.5754) lr 1.4818e-03 eta 5:57:23 +epoch [19/50] batch [295/500] time 1.363 (1.365) data 0.000 (0.003) loss 0.9209 (1.1176) acc 68.7500 (72.5742) lr 1.4818e-03 eta 5:57:14 +epoch [19/50] batch [300/500] time 1.367 (1.365) data 0.000 (0.003) loss 0.9575 (1.1156) acc 81.2500 (72.6875) lr 1.4818e-03 eta 5:57:06 +epoch [19/50] batch [305/500] time 1.372 (1.365) data 0.000 (0.003) loss 1.0303 (1.1129) acc 62.5000 (72.6639) lr 1.4818e-03 eta 5:56:58 +epoch [19/50] batch [310/500] time 1.339 (1.364) data 0.000 (0.003) loss 0.9614 (1.1152) acc 78.1250 (72.6109) lr 1.4818e-03 eta 5:56:48 +epoch [19/50] batch [315/500] time 1.363 (1.364) data 0.000 (0.003) loss 0.7593 (1.1117) acc 84.3750 (72.6687) lr 1.4818e-03 eta 5:56:39 +epoch [19/50] batch [320/500] time 1.355 (1.364) data 0.000 (0.003) loss 0.7485 (1.1103) acc 81.2500 (72.7246) lr 1.4818e-03 eta 5:56:31 +epoch [19/50] batch [325/500] time 1.372 (1.364) data 0.000 (0.003) loss 0.9819 (1.1113) acc 68.7500 (72.6538) lr 1.4818e-03 eta 5:56:24 +epoch [19/50] batch [330/500] time 1.370 (1.364) data 0.000 (0.003) loss 0.6797 (1.1103) acc 78.1250 (72.6610) lr 1.4818e-03 eta 5:56:19 +epoch [19/50] batch [335/500] time 1.371 (1.365) data 0.000 (0.003) loss 0.9995 (1.1137) acc 78.1250 (72.5746) lr 1.4818e-03 eta 5:56:18 +epoch [19/50] batch [340/500] time 1.358 (1.365) data 0.000 (0.003) loss 1.0186 (1.1140) acc 78.1250 (72.6103) lr 1.4818e-03 eta 5:56:11 +epoch [19/50] batch [345/500] time 1.373 (1.365) data 0.000 (0.003) loss 1.3320 (1.1153) acc 71.8750 (72.5725) lr 1.4818e-03 eta 5:56:07 +epoch [19/50] batch [350/500] time 1.369 (1.365) data 0.000 (0.003) loss 1.4619 (1.1179) acc 65.6250 (72.5268) lr 1.4818e-03 eta 5:56:00 +epoch [19/50] batch [355/500] time 1.360 (1.365) data 0.000 (0.003) loss 1.0029 (1.1193) acc 71.8750 (72.4736) lr 1.4818e-03 eta 5:55:54 +epoch [19/50] batch [360/500] time 1.351 (1.365) data 0.000 (0.002) loss 1.4346 (1.1190) acc 59.3750 (72.4566) lr 1.4818e-03 eta 5:55:47 +epoch [19/50] batch [365/500] time 1.361 (1.365) data 0.000 (0.002) loss 0.8511 (1.1213) acc 71.8750 (72.4229) lr 1.4818e-03 eta 5:55:38 +epoch [19/50] batch [370/500] time 1.359 (1.365) data 0.000 (0.002) loss 1.5527 (1.1239) acc 65.6250 (72.4071) lr 1.4818e-03 eta 5:55:29 +epoch [19/50] batch [375/500] time 1.364 (1.365) data 0.000 (0.002) loss 1.3701 (1.1263) acc 62.5000 (72.3583) lr 1.4818e-03 eta 5:55:22 +epoch [19/50] batch [380/500] time 1.370 (1.365) data 0.000 (0.002) loss 1.3525 (1.1276) acc 56.2500 (72.2862) lr 1.4818e-03 eta 5:55:14 +epoch [19/50] batch [385/500] time 1.374 (1.364) data 0.000 (0.002) loss 0.9800 (1.1294) acc 68.7500 (72.2078) lr 1.4818e-03 eta 5:55:06 +epoch [19/50] batch [390/500] time 1.342 (1.364) data 0.000 (0.002) loss 1.4326 (1.1339) acc 59.3750 (72.1554) lr 1.4818e-03 eta 5:54:58 +epoch [19/50] batch [395/500] time 1.353 (1.364) data 0.000 (0.002) loss 1.0889 (1.1324) acc 65.6250 (72.1440) lr 1.4818e-03 eta 5:54:49 +epoch [19/50] batch [400/500] time 1.344 (1.364) data 0.000 (0.002) loss 1.3447 (1.1325) acc 65.6250 (72.1641) lr 1.4818e-03 eta 5:54:40 +epoch [19/50] batch [405/500] time 1.347 (1.364) data 0.001 (0.002) loss 0.7192 (1.1303) acc 75.0000 (72.2068) lr 1.4818e-03 eta 5:54:32 +epoch [19/50] batch [410/500] time 1.356 (1.364) data 0.000 (0.002) loss 0.8164 (1.1311) acc 81.2500 (72.1570) lr 1.4818e-03 eta 5:54:25 +epoch [19/50] batch [415/500] time 1.376 (1.364) data 0.000 (0.002) loss 1.0127 (1.1292) acc 68.7500 (72.1913) lr 1.4818e-03 eta 5:54:17 +epoch [19/50] batch [420/500] time 1.356 (1.364) data 0.001 (0.002) loss 1.0107 (1.1284) acc 71.8750 (72.1726) lr 1.4818e-03 eta 5:54:09 +epoch [19/50] batch [425/500] time 1.350 (1.364) data 0.000 (0.002) loss 0.4922 (1.1262) acc 90.6250 (72.1985) lr 1.4818e-03 eta 5:54:02 +epoch [19/50] batch [430/500] time 1.507 (1.364) data 0.000 (0.002) loss 1.0439 (1.1242) acc 68.7500 (72.2456) lr 1.4818e-03 eta 5:53:58 +epoch [19/50] batch [435/500] time 1.348 (1.364) data 0.000 (0.002) loss 1.1689 (1.1263) acc 65.6250 (72.2055) lr 1.4818e-03 eta 5:53:49 +epoch [19/50] batch [440/500] time 1.348 (1.364) data 0.000 (0.002) loss 1.4639 (1.1263) acc 65.6250 (72.2088) lr 1.4818e-03 eta 5:53:39 +epoch [19/50] batch [445/500] time 1.362 (1.364) data 0.000 (0.002) loss 0.7427 (1.1229) acc 84.3750 (72.2963) lr 1.4818e-03 eta 5:53:31 +epoch [19/50] batch [450/500] time 1.361 (1.364) data 0.000 (0.002) loss 1.2539 (1.1239) acc 59.3750 (72.2153) lr 1.4818e-03 eta 5:53:22 +epoch [19/50] batch [455/500] time 1.374 (1.363) data 0.000 (0.002) loss 0.7222 (1.1237) acc 84.3750 (72.2184) lr 1.4818e-03 eta 5:53:15 +epoch [19/50] batch [460/500] time 1.358 (1.363) data 0.000 (0.002) loss 0.9468 (1.1211) acc 78.1250 (72.2894) lr 1.4818e-03 eta 5:53:07 +epoch [19/50] batch [465/500] time 1.352 (1.363) data 0.000 (0.002) loss 1.0449 (1.1211) acc 65.6250 (72.3185) lr 1.4818e-03 eta 5:53:00 +epoch [19/50] batch [470/500] time 1.360 (1.363) data 0.000 (0.002) loss 0.3945 (1.1241) acc 87.5000 (72.2340) lr 1.4818e-03 eta 5:52:51 +epoch [19/50] batch [475/500] time 1.364 (1.364) data 0.000 (0.002) loss 0.9238 (1.1231) acc 71.8750 (72.2039) lr 1.4818e-03 eta 5:52:48 +epoch [19/50] batch [480/500] time 1.341 (1.363) data 0.000 (0.002) loss 1.1602 (1.1248) acc 68.7500 (72.1875) lr 1.4818e-03 eta 5:52:40 +epoch [19/50] batch [485/500] time 1.342 (1.363) data 0.001 (0.002) loss 1.3496 (1.1248) acc 71.8750 (72.1907) lr 1.4818e-03 eta 5:52:31 +epoch [19/50] batch [490/500] time 1.375 (1.363) data 0.000 (0.002) loss 0.9443 (1.1242) acc 81.2500 (72.2003) lr 1.4818e-03 eta 5:52:23 +epoch [19/50] batch [495/500] time 1.356 (1.363) data 0.000 (0.002) loss 1.4287 (1.1259) acc 71.8750 (72.1275) lr 1.4818e-03 eta 5:52:15 +epoch [19/50] batch [500/500] time 1.376 (1.363) data 0.000 (0.002) loss 0.6421 (1.1258) acc 84.3750 (72.1437) lr 1.4258e-03 eta 5:52:07 +epoch [20/50] batch [5/500] time 1.356 (1.526) data 0.001 (0.156) loss 1.1484 (1.2323) acc 68.7500 (68.1250) lr 1.4258e-03 eta 6:34:06 +epoch [20/50] batch [10/500] time 1.363 (1.442) data 0.000 (0.078) loss 1.3164 (1.2891) acc 62.5000 (66.5625) lr 1.4258e-03 eta 6:12:09 +epoch [20/50] batch [15/500] time 1.356 (1.415) data 0.000 (0.052) loss 0.8579 (1.1994) acc 75.0000 (68.7500) lr 1.4258e-03 eta 6:05:17 +epoch [20/50] batch [20/500] time 1.347 (1.408) data 0.000 (0.039) loss 0.9370 (1.1341) acc 78.1250 (70.3125) lr 1.4258e-03 eta 6:03:09 +epoch [20/50] batch [25/500] time 1.359 (1.397) data 0.000 (0.031) loss 1.8564 (1.1339) acc 62.5000 (70.3750) lr 1.4258e-03 eta 6:00:21 +epoch [20/50] batch [30/500] time 1.369 (1.389) data 0.000 (0.026) loss 1.0059 (1.1243) acc 65.6250 (70.6250) lr 1.4258e-03 eta 5:58:10 +epoch [20/50] batch [35/500] time 1.352 (1.384) data 0.000 (0.023) loss 1.4268 (1.1110) acc 62.5000 (70.8036) lr 1.4258e-03 eta 5:56:44 +epoch [20/50] batch [40/500] time 1.367 (1.382) data 0.001 (0.020) loss 1.3936 (1.1103) acc 62.5000 (70.6250) lr 1.4258e-03 eta 5:56:11 +epoch [20/50] batch [45/500] time 1.379 (1.380) data 0.000 (0.018) loss 0.8301 (1.1109) acc 71.8750 (70.4861) lr 1.4258e-03 eta 5:55:32 +epoch [20/50] batch [50/500] time 1.373 (1.378) data 0.000 (0.016) loss 1.2119 (1.1193) acc 75.0000 (70.3125) lr 1.4258e-03 eta 5:54:56 +epoch [20/50] batch [55/500] time 1.366 (1.377) data 0.000 (0.014) loss 1.4648 (1.1046) acc 68.7500 (70.6250) lr 1.4258e-03 eta 5:54:24 +epoch [20/50] batch [60/500] time 1.355 (1.377) data 0.000 (0.013) loss 1.3672 (1.0980) acc 71.8750 (71.1458) lr 1.4258e-03 eta 5:54:24 +epoch [20/50] batch [65/500] time 1.360 (1.376) data 0.000 (0.012) loss 1.1230 (1.0882) acc 59.3750 (71.2500) lr 1.4258e-03 eta 5:54:04 +epoch [20/50] batch [70/500] time 1.353 (1.375) data 0.000 (0.011) loss 1.0303 (1.0829) acc 68.7500 (71.1607) lr 1.4258e-03 eta 5:53:36 +epoch [20/50] batch [75/500] time 1.354 (1.374) data 0.000 (0.011) loss 0.7373 (1.0940) acc 78.1250 (70.8750) lr 1.4258e-03 eta 5:53:07 +epoch [20/50] batch [80/500] time 1.343 (1.372) data 0.000 (0.010) loss 0.9844 (1.0982) acc 75.0000 (70.8984) lr 1.4258e-03 eta 5:52:43 +epoch [20/50] batch [85/500] time 1.346 (1.372) data 0.000 (0.009) loss 1.1328 (1.0919) acc 78.1250 (70.9559) lr 1.4258e-03 eta 5:52:22 +epoch [20/50] batch [90/500] time 1.367 (1.371) data 0.000 (0.009) loss 1.0957 (1.1010) acc 78.1250 (70.8333) lr 1.4258e-03 eta 5:52:03 +epoch [20/50] batch [95/500] time 1.373 (1.371) data 0.000 (0.009) loss 1.0439 (1.0939) acc 71.8750 (70.8882) lr 1.4258e-03 eta 5:51:52 +epoch [20/50] batch [100/500] time 1.348 (1.370) data 0.000 (0.008) loss 0.8691 (1.0867) acc 75.0000 (71.1562) lr 1.4258e-03 eta 5:51:38 +epoch [20/50] batch [105/500] time 1.360 (1.369) data 0.000 (0.008) loss 1.6230 (1.0962) acc 65.6250 (71.2202) lr 1.4258e-03 eta 5:51:21 +epoch [20/50] batch [110/500] time 1.355 (1.369) data 0.000 (0.007) loss 1.7432 (1.1066) acc 68.7500 (71.2500) lr 1.4258e-03 eta 5:51:16 +epoch [20/50] batch [115/500] time 1.366 (1.369) data 0.001 (0.007) loss 1.2061 (1.0977) acc 68.7500 (71.6033) lr 1.4258e-03 eta 5:51:09 +epoch [20/50] batch [120/500] time 1.365 (1.370) data 0.000 (0.007) loss 0.9102 (1.0913) acc 65.6250 (71.7188) lr 1.4258e-03 eta 5:51:06 +epoch [20/50] batch [125/500] time 1.361 (1.369) data 0.000 (0.007) loss 1.4512 (1.0944) acc 56.2500 (71.6250) lr 1.4258e-03 eta 5:50:55 +epoch [20/50] batch [130/500] time 1.366 (1.369) data 0.000 (0.006) loss 1.3398 (1.0891) acc 68.7500 (71.8990) lr 1.4258e-03 eta 5:50:40 +epoch [20/50] batch [135/500] time 1.371 (1.369) data 0.000 (0.006) loss 1.0547 (1.0850) acc 75.0000 (71.8750) lr 1.4258e-03 eta 5:50:27 +epoch [20/50] batch [140/500] time 1.360 (1.368) data 0.000 (0.006) loss 1.9219 (1.0858) acc 53.1250 (72.0089) lr 1.4258e-03 eta 5:50:17 +epoch [20/50] batch [145/500] time 1.341 (1.368) data 0.000 (0.006) loss 1.1523 (1.0796) acc 71.8750 (72.0043) lr 1.4258e-03 eta 5:50:02 +epoch [20/50] batch [150/500] time 1.354 (1.368) data 0.000 (0.006) loss 0.8481 (1.0810) acc 71.8750 (71.9792) lr 1.4258e-03 eta 5:49:52 +epoch [20/50] batch [155/500] time 1.355 (1.367) data 0.000 (0.005) loss 0.9512 (1.0820) acc 68.7500 (71.9355) lr 1.4258e-03 eta 5:49:40 +epoch [20/50] batch [160/500] time 1.351 (1.367) data 0.000 (0.005) loss 0.9717 (1.0820) acc 81.2500 (72.0703) lr 1.4258e-03 eta 5:49:24 +epoch [20/50] batch [165/500] time 1.364 (1.367) data 0.000 (0.005) loss 1.4297 (1.0826) acc 68.7500 (71.9697) lr 1.4258e-03 eta 5:49:25 +epoch [20/50] batch [170/500] time 1.376 (1.367) data 0.000 (0.005) loss 0.9272 (1.0804) acc 81.2500 (72.0037) lr 1.4258e-03 eta 5:49:21 +epoch [20/50] batch [175/500] time 1.365 (1.367) data 0.000 (0.005) loss 1.2803 (1.0846) acc 65.6250 (71.8036) lr 1.4258e-03 eta 5:49:10 +epoch [20/50] batch [180/500] time 1.380 (1.367) data 0.000 (0.005) loss 1.2100 (1.0843) acc 68.7500 (71.9097) lr 1.4258e-03 eta 5:49:06 +epoch [20/50] batch [185/500] time 1.343 (1.367) data 0.000 (0.005) loss 1.2588 (1.0895) acc 62.5000 (71.7568) lr 1.4258e-03 eta 5:48:56 +epoch [20/50] batch [190/500] time 1.368 (1.367) data 0.000 (0.004) loss 1.0352 (1.0889) acc 75.0000 (71.8750) lr 1.4258e-03 eta 5:48:47 +epoch [20/50] batch [195/500] time 1.353 (1.367) data 0.000 (0.004) loss 0.7646 (1.0948) acc 84.3750 (71.7628) lr 1.4258e-03 eta 5:48:42 +epoch [20/50] batch [200/500] time 1.370 (1.367) data 0.000 (0.004) loss 0.9639 (1.0908) acc 81.2500 (71.8906) lr 1.4258e-03 eta 5:48:32 +epoch [20/50] batch [205/500] time 1.363 (1.367) data 0.000 (0.004) loss 1.3525 (1.0950) acc 59.3750 (71.7835) lr 1.4258e-03 eta 5:48:23 +epoch [20/50] batch [210/500] time 1.348 (1.367) data 0.000 (0.004) loss 1.5273 (1.1019) acc 78.1250 (71.6369) lr 1.4258e-03 eta 5:48:16 +epoch [20/50] batch [215/500] time 1.365 (1.367) data 0.000 (0.004) loss 1.7207 (1.1057) acc 62.5000 (71.5988) lr 1.4258e-03 eta 5:48:07 +epoch [20/50] batch [220/500] time 1.378 (1.366) data 0.000 (0.004) loss 1.0420 (1.0994) acc 75.0000 (71.7614) lr 1.4258e-03 eta 5:47:59 +epoch [20/50] batch [225/500] time 1.371 (1.366) data 0.000 (0.004) loss 0.9653 (1.0990) acc 81.2500 (71.8750) lr 1.4258e-03 eta 5:47:49 +epoch [20/50] batch [230/500] time 1.330 (1.366) data 0.000 (0.004) loss 0.9839 (1.1004) acc 75.0000 (71.8750) lr 1.4258e-03 eta 5:47:38 +epoch [20/50] batch [235/500] time 1.361 (1.366) data 0.000 (0.004) loss 1.2559 (1.1045) acc 71.8750 (71.8085) lr 1.4258e-03 eta 5:47:31 +epoch [20/50] batch [240/500] time 1.359 (1.366) data 0.000 (0.004) loss 1.3633 (1.1064) acc 50.0000 (71.7057) lr 1.4258e-03 eta 5:47:22 +epoch [20/50] batch [245/500] time 1.362 (1.366) data 0.001 (0.004) loss 1.1162 (1.1100) acc 65.6250 (71.6327) lr 1.4258e-03 eta 5:47:14 +epoch [20/50] batch [250/500] time 1.371 (1.366) data 0.000 (0.003) loss 1.0312 (1.1098) acc 75.0000 (71.6000) lr 1.4258e-03 eta 5:47:09 +epoch [20/50] batch [255/500] time 1.371 (1.366) data 0.001 (0.003) loss 1.7461 (1.1147) acc 65.6250 (71.5074) lr 1.4258e-03 eta 5:46:59 +epoch [20/50] batch [260/500] time 1.488 (1.366) data 0.001 (0.003) loss 1.1553 (1.1105) acc 62.5000 (71.6106) lr 1.4258e-03 eta 5:46:56 +epoch [20/50] batch [265/500] time 1.347 (1.366) data 0.001 (0.003) loss 0.9731 (1.1082) acc 78.1250 (71.6038) lr 1.4258e-03 eta 5:46:45 +epoch [20/50] batch [270/500] time 1.348 (1.365) data 0.000 (0.003) loss 0.7153 (1.1087) acc 68.7500 (71.5394) lr 1.4258e-03 eta 5:46:36 +epoch [20/50] batch [275/500] time 1.354 (1.365) data 0.000 (0.003) loss 1.6602 (1.1086) acc 68.7500 (71.6136) lr 1.4258e-03 eta 5:46:26 +epoch [20/50] batch [280/500] time 1.371 (1.365) data 0.000 (0.003) loss 0.7051 (1.1082) acc 81.2500 (71.7188) lr 1.4258e-03 eta 5:46:18 +epoch [20/50] batch [285/500] time 1.363 (1.365) data 0.000 (0.003) loss 1.1104 (1.1120) acc 75.0000 (71.6996) lr 1.4258e-03 eta 5:46:09 +epoch [20/50] batch [290/500] time 1.374 (1.365) data 0.000 (0.003) loss 1.2607 (1.1128) acc 65.6250 (71.6487) lr 1.4258e-03 eta 5:46:02 +epoch [20/50] batch [295/500] time 1.358 (1.365) data 0.000 (0.003) loss 0.5630 (1.1073) acc 84.3750 (71.7797) lr 1.4258e-03 eta 5:45:56 +epoch [20/50] batch [300/500] time 1.378 (1.365) data 0.000 (0.003) loss 1.9053 (1.1101) acc 59.3750 (71.7708) lr 1.4258e-03 eta 5:45:47 +epoch [20/50] batch [305/500] time 1.348 (1.365) data 0.000 (0.003) loss 0.9180 (1.1118) acc 75.0000 (71.7725) lr 1.4258e-03 eta 5:45:44 +epoch [20/50] batch [310/500] time 1.373 (1.365) data 0.000 (0.003) loss 1.6943 (1.1138) acc 62.5000 (71.7339) lr 1.4258e-03 eta 5:45:34 +epoch [20/50] batch [315/500] time 1.339 (1.365) data 0.000 (0.003) loss 0.9668 (1.1101) acc 68.7500 (71.7560) lr 1.4258e-03 eta 5:45:24 +epoch [20/50] batch [320/500] time 1.359 (1.365) data 0.000 (0.003) loss 1.3105 (1.1134) acc 65.6250 (71.6797) lr 1.4258e-03 eta 5:45:15 +epoch [20/50] batch [325/500] time 1.371 (1.365) data 0.000 (0.003) loss 1.1914 (1.1100) acc 68.7500 (71.7500) lr 1.4258e-03 eta 5:45:07 +epoch [20/50] batch [330/500] time 1.370 (1.365) data 0.000 (0.003) loss 1.3809 (1.1110) acc 50.0000 (71.6856) lr 1.4258e-03 eta 5:45:00 +epoch [20/50] batch [335/500] time 1.364 (1.364) data 0.000 (0.003) loss 1.1855 (1.1128) acc 68.7500 (71.7071) lr 1.4258e-03 eta 5:44:51 +epoch [20/50] batch [340/500] time 1.364 (1.364) data 0.000 (0.003) loss 1.3887 (1.1160) acc 68.7500 (71.6728) lr 1.4258e-03 eta 5:44:44 +epoch [20/50] batch [345/500] time 1.359 (1.364) data 0.000 (0.003) loss 1.1592 (1.1181) acc 71.8750 (71.6757) lr 1.4258e-03 eta 5:44:35 +epoch [20/50] batch [350/500] time 1.360 (1.364) data 0.000 (0.003) loss 1.4834 (1.1199) acc 75.0000 (71.6518) lr 1.4258e-03 eta 5:44:26 +epoch [20/50] batch [355/500] time 1.340 (1.364) data 0.000 (0.003) loss 0.8770 (1.1211) acc 68.7500 (71.6021) lr 1.4258e-03 eta 5:44:18 +epoch [20/50] batch [360/500] time 1.370 (1.364) data 0.000 (0.003) loss 1.3438 (1.1215) acc 68.7500 (71.6319) lr 1.4258e-03 eta 5:44:09 +epoch [20/50] batch [365/500] time 1.366 (1.364) data 0.000 (0.003) loss 1.1807 (1.1231) acc 68.7500 (71.5668) lr 1.4258e-03 eta 5:44:01 +epoch [20/50] batch [370/500] time 1.383 (1.364) data 0.000 (0.002) loss 0.9771 (1.1247) acc 78.1250 (71.5963) lr 1.4258e-03 eta 5:43:55 +epoch [20/50] batch [375/500] time 1.370 (1.364) data 0.000 (0.002) loss 1.2393 (1.1258) acc 71.8750 (71.5750) lr 1.4258e-03 eta 5:43:48 +epoch [20/50] batch [380/500] time 1.348 (1.364) data 0.000 (0.002) loss 1.0068 (1.1226) acc 68.7500 (71.6201) lr 1.4258e-03 eta 5:43:39 +epoch [20/50] batch [385/500] time 1.363 (1.364) data 0.000 (0.002) loss 1.2520 (1.1206) acc 68.7500 (71.6153) lr 1.4258e-03 eta 5:43:30 +epoch [20/50] batch [390/500] time 1.361 (1.363) data 0.000 (0.002) loss 0.6387 (1.1211) acc 81.2500 (71.6026) lr 1.4258e-03 eta 5:43:21 +epoch [20/50] batch [395/500] time 1.359 (1.363) data 0.000 (0.002) loss 0.6958 (1.1194) acc 78.1250 (71.6614) lr 1.4258e-03 eta 5:43:12 +epoch [20/50] batch [400/500] time 1.348 (1.363) data 0.000 (0.002) loss 1.2871 (1.1186) acc 68.7500 (71.6953) lr 1.4258e-03 eta 5:43:05 +epoch [20/50] batch [405/500] time 1.358 (1.364) data 0.000 (0.002) loss 1.5703 (1.1181) acc 59.3750 (71.7438) lr 1.4258e-03 eta 5:43:04 +epoch [20/50] batch [410/500] time 1.333 (1.364) data 0.000 (0.002) loss 0.9502 (1.1161) acc 71.8750 (71.7988) lr 1.4258e-03 eta 5:42:55 +epoch [20/50] batch [415/500] time 1.354 (1.363) data 0.000 (0.002) loss 1.4570 (1.1149) acc 65.6250 (71.8298) lr 1.4258e-03 eta 5:42:47 +epoch [20/50] batch [420/500] time 1.343 (1.363) data 0.001 (0.002) loss 0.7500 (1.1169) acc 81.2500 (71.7783) lr 1.4258e-03 eta 5:42:39 +epoch [20/50] batch [425/500] time 1.353 (1.363) data 0.000 (0.002) loss 1.0898 (1.1166) acc 71.8750 (71.7868) lr 1.4258e-03 eta 5:42:32 +epoch [20/50] batch [430/500] time 1.371 (1.363) data 0.000 (0.002) loss 1.8027 (1.1198) acc 62.5000 (71.7587) lr 1.4258e-03 eta 5:42:25 +epoch [20/50] batch [435/500] time 1.355 (1.363) data 0.000 (0.002) loss 1.2891 (1.1187) acc 71.8750 (71.7960) lr 1.4258e-03 eta 5:42:17 +epoch [20/50] batch [440/500] time 1.344 (1.363) data 0.000 (0.002) loss 1.6816 (1.1216) acc 59.3750 (71.7614) lr 1.4258e-03 eta 5:42:07 +epoch [20/50] batch [445/500] time 1.348 (1.363) data 0.000 (0.002) loss 1.3242 (1.1197) acc 75.0000 (71.8258) lr 1.4258e-03 eta 5:41:59 +epoch [20/50] batch [450/500] time 1.348 (1.363) data 0.000 (0.002) loss 0.7700 (1.1154) acc 78.1250 (71.9236) lr 1.4258e-03 eta 5:41:56 +epoch [20/50] batch [455/500] time 1.340 (1.363) data 0.001 (0.002) loss 1.5850 (1.1151) acc 62.5000 (71.9505) lr 1.4258e-03 eta 5:41:48 +epoch [20/50] batch [460/500] time 1.352 (1.363) data 0.000 (0.002) loss 1.0176 (1.1147) acc 75.0000 (71.9497) lr 1.4258e-03 eta 5:41:39 +epoch [20/50] batch [465/500] time 1.352 (1.363) data 0.000 (0.002) loss 0.8091 (1.1143) acc 81.2500 (71.9624) lr 1.4258e-03 eta 5:41:29 +epoch [20/50] batch [470/500] time 1.345 (1.363) data 0.000 (0.002) loss 1.1826 (1.1143) acc 71.8750 (71.9614) lr 1.4258e-03 eta 5:41:20 +epoch [20/50] batch [475/500] time 1.375 (1.363) data 0.000 (0.002) loss 1.5762 (1.1193) acc 56.2500 (71.9013) lr 1.4258e-03 eta 5:41:12 +epoch [20/50] batch [480/500] time 1.326 (1.362) data 0.000 (0.002) loss 0.8013 (1.1182) acc 71.8750 (71.9206) lr 1.4258e-03 eta 5:41:04 +epoch [20/50] batch [485/500] time 1.347 (1.362) data 0.001 (0.002) loss 1.1035 (1.1181) acc 68.7500 (71.9072) lr 1.4258e-03 eta 5:40:55 +epoch [20/50] batch [490/500] time 1.352 (1.362) data 0.000 (0.002) loss 1.1367 (1.1174) acc 81.2500 (71.9388) lr 1.4258e-03 eta 5:40:46 +epoch [20/50] batch [495/500] time 1.335 (1.362) data 0.000 (0.002) loss 1.3506 (1.1190) acc 81.2500 (71.9255) lr 1.4258e-03 eta 5:40:38 +epoch [20/50] batch [500/500] time 1.357 (1.362) data 0.000 (0.002) loss 0.8628 (1.1183) acc 81.2500 (71.9625) lr 1.3681e-03 eta 5:40:31 +epoch [21/50] batch [5/500] time 1.348 (1.512) data 0.000 (0.150) loss 1.2129 (1.1723) acc 71.8750 (71.2500) lr 1.3681e-03 eta 6:17:56 +epoch [21/50] batch [10/500] time 1.375 (1.441) data 0.000 (0.075) loss 0.6167 (0.9591) acc 81.2500 (73.7500) lr 1.3681e-03 eta 6:00:01 +epoch [21/50] batch [15/500] time 1.369 (1.416) data 0.000 (0.050) loss 1.1045 (1.1557) acc 65.6250 (70.6250) lr 1.3681e-03 eta 5:53:42 +epoch [21/50] batch [20/500] time 1.372 (1.401) data 0.000 (0.038) loss 0.7617 (1.1667) acc 87.5000 (71.7188) lr 1.3681e-03 eta 5:49:52 +epoch [21/50] batch [25/500] time 1.348 (1.391) data 0.000 (0.030) loss 1.0049 (1.1713) acc 81.2500 (71.7500) lr 1.3681e-03 eta 5:47:12 +epoch [21/50] batch [30/500] time 1.361 (1.387) data 0.000 (0.025) loss 1.3193 (1.1976) acc 71.8750 (71.5625) lr 1.3681e-03 eta 5:46:00 +epoch [21/50] batch [35/500] time 1.350 (1.384) data 0.000 (0.022) loss 0.8838 (1.1518) acc 71.8750 (72.2321) lr 1.3681e-03 eta 5:45:17 +epoch [21/50] batch [40/500] time 1.371 (1.381) data 0.000 (0.019) loss 1.4834 (1.1496) acc 59.3750 (72.0312) lr 1.3681e-03 eta 5:44:21 +epoch [21/50] batch [45/500] time 1.369 (1.379) data 0.000 (0.017) loss 0.7227 (1.1521) acc 84.3750 (71.8750) lr 1.3681e-03 eta 5:43:43 +epoch [21/50] batch [50/500] time 1.353 (1.377) data 0.000 (0.015) loss 1.1514 (1.1419) acc 65.6250 (71.8125) lr 1.3681e-03 eta 5:43:00 +epoch [21/50] batch [55/500] time 1.351 (1.374) data 0.000 (0.014) loss 0.9565 (1.1302) acc 78.1250 (72.0455) lr 1.3681e-03 eta 5:42:20 +epoch [21/50] batch [60/500] time 1.359 (1.373) data 0.000 (0.013) loss 1.1279 (1.1203) acc 84.3750 (72.4479) lr 1.3681e-03 eta 5:41:54 +epoch [21/50] batch [65/500] time 1.339 (1.372) data 0.000 (0.012) loss 0.9639 (1.1248) acc 78.1250 (72.7885) lr 1.3681e-03 eta 5:41:29 +epoch [21/50] batch [70/500] time 1.343 (1.371) data 0.001 (0.011) loss 0.9453 (1.1135) acc 71.8750 (72.9911) lr 1.3681e-03 eta 5:41:03 +epoch [21/50] batch [75/500] time 1.347 (1.369) data 0.000 (0.010) loss 1.6318 (1.1351) acc 71.8750 (72.5000) lr 1.3681e-03 eta 5:40:38 +epoch [21/50] batch [80/500] time 1.366 (1.369) data 0.000 (0.010) loss 1.0820 (1.1303) acc 68.7500 (72.4219) lr 1.3681e-03 eta 5:40:30 +epoch [21/50] batch [85/500] time 1.368 (1.369) data 0.000 (0.009) loss 1.1357 (1.1255) acc 78.1250 (72.3897) lr 1.3681e-03 eta 5:40:25 +epoch [21/50] batch [90/500] time 1.504 (1.370) data 0.001 (0.009) loss 1.0459 (1.1300) acc 71.8750 (72.2917) lr 1.3681e-03 eta 5:40:28 +epoch [21/50] batch [95/500] time 1.365 (1.370) data 0.001 (0.008) loss 1.5264 (1.1360) acc 62.5000 (71.9737) lr 1.3681e-03 eta 5:40:17 +epoch [21/50] batch [100/500] time 1.356 (1.369) data 0.000 (0.008) loss 1.4062 (1.1379) acc 71.8750 (72.0938) lr 1.3681e-03 eta 5:39:54 +epoch [21/50] batch [105/500] time 1.366 (1.368) data 0.000 (0.008) loss 0.9209 (1.1329) acc 78.1250 (72.1429) lr 1.3681e-03 eta 5:39:40 +epoch [21/50] batch [110/500] time 1.362 (1.367) data 0.000 (0.007) loss 1.1934 (1.1315) acc 71.8750 (72.2727) lr 1.3681e-03 eta 5:39:21 +epoch [21/50] batch [115/500] time 1.341 (1.367) data 0.000 (0.007) loss 1.0889 (1.1322) acc 68.7500 (72.2283) lr 1.3681e-03 eta 5:39:10 +epoch [21/50] batch [120/500] time 1.351 (1.367) data 0.000 (0.007) loss 0.7358 (1.1286) acc 81.2500 (72.2656) lr 1.3681e-03 eta 5:38:58 +epoch [21/50] batch [125/500] time 1.367 (1.367) data 0.000 (0.006) loss 1.0742 (1.1307) acc 75.0000 (72.2250) lr 1.3681e-03 eta 5:38:51 +epoch [21/50] batch [130/500] time 1.360 (1.367) data 0.000 (0.006) loss 0.9448 (1.1272) acc 75.0000 (72.1875) lr 1.3681e-03 eta 5:38:43 +epoch [21/50] batch [135/500] time 1.345 (1.367) data 0.000 (0.006) loss 0.8521 (1.1268) acc 75.0000 (72.1759) lr 1.3681e-03 eta 5:38:45 +epoch [21/50] batch [140/500] time 1.367 (1.367) data 0.000 (0.006) loss 2.0254 (1.1345) acc 56.2500 (71.9196) lr 1.3681e-03 eta 5:38:39 +epoch [21/50] batch [145/500] time 1.376 (1.367) data 0.000 (0.006) loss 0.6968 (1.1388) acc 81.2500 (71.7888) lr 1.3681e-03 eta 5:38:27 +epoch [21/50] batch [150/500] time 1.330 (1.367) data 0.000 (0.005) loss 0.5464 (1.1378) acc 90.6250 (71.9375) lr 1.3681e-03 eta 5:38:18 +epoch [21/50] batch [155/500] time 1.366 (1.366) data 0.000 (0.005) loss 1.3633 (1.1376) acc 71.8750 (71.8750) lr 1.3681e-03 eta 5:38:05 +epoch [21/50] batch [160/500] time 1.356 (1.366) data 0.001 (0.005) loss 0.6421 (1.1393) acc 81.2500 (71.7578) lr 1.3681e-03 eta 5:37:54 +epoch [21/50] batch [165/500] time 1.382 (1.366) data 0.000 (0.005) loss 0.6650 (1.1390) acc 87.5000 (71.9318) lr 1.3681e-03 eta 5:37:50 +epoch [21/50] batch [170/500] time 1.376 (1.366) data 0.000 (0.005) loss 1.6484 (1.1374) acc 71.8750 (71.9669) lr 1.3681e-03 eta 5:37:41 +epoch [21/50] batch [175/500] time 1.378 (1.366) data 0.000 (0.005) loss 0.9619 (1.1378) acc 68.7500 (71.8929) lr 1.3681e-03 eta 5:37:35 +epoch [21/50] batch [180/500] time 1.362 (1.366) data 0.000 (0.005) loss 1.0420 (1.1334) acc 81.2500 (72.0139) lr 1.3681e-03 eta 5:37:26 +epoch [21/50] batch [185/500] time 1.354 (1.366) data 0.000 (0.004) loss 1.1826 (1.1328) acc 68.7500 (71.9764) lr 1.3681e-03 eta 5:37:16 +epoch [21/50] batch [190/500] time 1.363 (1.366) data 0.000 (0.004) loss 0.6318 (1.1331) acc 87.5000 (71.9737) lr 1.3681e-03 eta 5:37:06 +epoch [21/50] batch [195/500] time 1.350 (1.366) data 0.000 (0.004) loss 1.2363 (1.1364) acc 68.7500 (71.8750) lr 1.3681e-03 eta 5:36:59 +epoch [21/50] batch [200/500] time 1.351 (1.365) data 0.000 (0.004) loss 1.4551 (1.1410) acc 68.7500 (71.8281) lr 1.3681e-03 eta 5:36:46 +epoch [21/50] batch [205/500] time 1.349 (1.365) data 0.000 (0.004) loss 1.4316 (1.1456) acc 62.5000 (71.7530) lr 1.3681e-03 eta 5:36:34 +epoch [21/50] batch [210/500] time 1.349 (1.365) data 0.000 (0.004) loss 0.6016 (1.1435) acc 81.2500 (71.7857) lr 1.3681e-03 eta 5:36:25 +epoch [21/50] batch [215/500] time 1.349 (1.365) data 0.000 (0.004) loss 0.9180 (1.1425) acc 71.8750 (71.7006) lr 1.3681e-03 eta 5:36:15 +epoch [21/50] batch [220/500] time 1.349 (1.364) data 0.000 (0.004) loss 1.1621 (1.1409) acc 75.0000 (71.6477) lr 1.3681e-03 eta 5:36:03 +epoch [21/50] batch [225/500] time 1.356 (1.364) data 0.000 (0.004) loss 1.0869 (1.1414) acc 68.7500 (71.5833) lr 1.3681e-03 eta 5:35:54 +epoch [21/50] batch [230/500] time 1.363 (1.364) data 0.000 (0.004) loss 1.3438 (1.1412) acc 62.5000 (71.5761) lr 1.3681e-03 eta 5:35:43 +epoch [21/50] batch [235/500] time 1.369 (1.364) data 0.000 (0.004) loss 0.6030 (1.1363) acc 81.2500 (71.6888) lr 1.3681e-03 eta 5:35:42 +epoch [21/50] batch [240/500] time 1.330 (1.364) data 0.000 (0.004) loss 0.9487 (1.1369) acc 65.6250 (71.5755) lr 1.3681e-03 eta 5:35:28 +epoch [21/50] batch [245/500] time 1.361 (1.364) data 0.000 (0.003) loss 1.1201 (1.1394) acc 65.6250 (71.4541) lr 1.3681e-03 eta 5:35:19 +epoch [21/50] batch [250/500] time 1.345 (1.363) data 0.000 (0.003) loss 0.8643 (1.1366) acc 78.1250 (71.5500) lr 1.3681e-03 eta 5:35:10 +epoch [21/50] batch [255/500] time 1.345 (1.363) data 0.000 (0.003) loss 1.1455 (1.1326) acc 75.0000 (71.6176) lr 1.3681e-03 eta 5:35:01 +epoch [21/50] batch [260/500] time 1.374 (1.363) data 0.001 (0.003) loss 0.7798 (1.1296) acc 78.1250 (71.6587) lr 1.3681e-03 eta 5:34:54 +epoch [21/50] batch [265/500] time 1.367 (1.363) data 0.000 (0.003) loss 0.9019 (1.1277) acc 81.2500 (71.7335) lr 1.3681e-03 eta 5:34:45 +epoch [21/50] batch [270/500] time 1.341 (1.363) data 0.001 (0.003) loss 0.9272 (1.1288) acc 78.1250 (71.6898) lr 1.3681e-03 eta 5:34:34 +epoch [21/50] batch [275/500] time 1.368 (1.363) data 0.000 (0.003) loss 1.2393 (1.1305) acc 75.0000 (71.6477) lr 1.3681e-03 eta 5:34:27 +epoch [21/50] batch [280/500] time 1.363 (1.363) data 0.000 (0.003) loss 1.3799 (1.1263) acc 68.7500 (71.6518) lr 1.3681e-03 eta 5:34:28 +epoch [21/50] batch [285/500] time 1.341 (1.363) data 0.000 (0.003) loss 1.0596 (1.1277) acc 78.1250 (71.6996) lr 1.3681e-03 eta 5:34:20 +epoch [21/50] batch [290/500] time 1.365 (1.363) data 0.000 (0.003) loss 1.1592 (1.1262) acc 65.6250 (71.7241) lr 1.3681e-03 eta 5:34:11 +epoch [21/50] batch [295/500] time 1.362 (1.363) data 0.000 (0.003) loss 1.0986 (1.1241) acc 68.7500 (71.8220) lr 1.3681e-03 eta 5:34:03 +epoch [21/50] batch [300/500] time 1.362 (1.363) data 0.000 (0.003) loss 1.1465 (1.1252) acc 75.0000 (71.7812) lr 1.3681e-03 eta 5:33:54 +epoch [21/50] batch [305/500] time 1.382 (1.363) data 0.000 (0.003) loss 1.6104 (1.1284) acc 65.6250 (71.6906) lr 1.3681e-03 eta 5:33:47 +epoch [21/50] batch [310/500] time 1.357 (1.363) data 0.000 (0.003) loss 1.1270 (1.1308) acc 71.8750 (71.6230) lr 1.3681e-03 eta 5:33:39 +epoch [21/50] batch [315/500] time 1.358 (1.363) data 0.000 (0.003) loss 0.7612 (1.1302) acc 78.1250 (71.7361) lr 1.3681e-03 eta 5:33:34 +epoch [21/50] batch [320/500] time 1.368 (1.363) data 0.000 (0.003) loss 1.1055 (1.1288) acc 71.8750 (71.7285) lr 1.3681e-03 eta 5:33:28 +epoch [21/50] batch [325/500] time 1.377 (1.363) data 0.000 (0.003) loss 1.4912 (1.1297) acc 62.5000 (71.6635) lr 1.3681e-03 eta 5:33:22 +epoch [21/50] batch [330/500] time 1.373 (1.363) data 0.000 (0.003) loss 0.7339 (1.1259) acc 81.2500 (71.7045) lr 1.3681e-03 eta 5:33:15 +epoch [21/50] batch [335/500] time 1.387 (1.363) data 0.000 (0.003) loss 0.8979 (1.1252) acc 75.0000 (71.6791) lr 1.3681e-03 eta 5:33:09 +epoch [21/50] batch [340/500] time 1.392 (1.363) data 0.000 (0.003) loss 1.0449 (1.1232) acc 78.1250 (71.7371) lr 1.3681e-03 eta 5:33:02 +epoch [21/50] batch [345/500] time 1.356 (1.363) data 0.000 (0.003) loss 0.8037 (1.1216) acc 75.0000 (71.7482) lr 1.3681e-03 eta 5:32:55 +epoch [21/50] batch [350/500] time 1.335 (1.363) data 0.000 (0.003) loss 0.8828 (1.1201) acc 75.0000 (71.7679) lr 1.3681e-03 eta 5:32:46 +epoch [21/50] batch [355/500] time 1.360 (1.363) data 0.000 (0.002) loss 1.1562 (1.1200) acc 71.8750 (71.8046) lr 1.3681e-03 eta 5:32:36 +epoch [21/50] batch [360/500] time 1.361 (1.363) data 0.000 (0.002) loss 1.2178 (1.1185) acc 65.6250 (71.8056) lr 1.3681e-03 eta 5:32:29 +epoch [21/50] batch [365/500] time 1.352 (1.363) data 0.000 (0.002) loss 1.4414 (1.1180) acc 56.2500 (71.7979) lr 1.3681e-03 eta 5:32:24 +epoch [21/50] batch [370/500] time 1.358 (1.363) data 0.000 (0.002) loss 0.8555 (1.1158) acc 78.1250 (71.8666) lr 1.3681e-03 eta 5:32:15 +epoch [21/50] batch [375/500] time 1.369 (1.363) data 0.000 (0.002) loss 1.2529 (1.1156) acc 68.7500 (71.9083) lr 1.3681e-03 eta 5:32:07 +epoch [21/50] batch [380/500] time 1.363 (1.363) data 0.000 (0.002) loss 1.2559 (1.1138) acc 62.5000 (71.9572) lr 1.3681e-03 eta 5:32:06 +epoch [21/50] batch [385/500] time 1.330 (1.363) data 0.000 (0.002) loss 1.0303 (1.1149) acc 71.8750 (71.8994) lr 1.3681e-03 eta 5:31:58 +epoch [21/50] batch [390/500] time 1.366 (1.363) data 0.001 (0.002) loss 1.1553 (1.1134) acc 71.8750 (71.9391) lr 1.3681e-03 eta 5:31:51 +epoch [21/50] batch [395/500] time 1.379 (1.363) data 0.000 (0.002) loss 0.6860 (1.1117) acc 84.3750 (71.9699) lr 1.3681e-03 eta 5:31:43 +epoch [21/50] batch [400/500] time 1.377 (1.363) data 0.000 (0.002) loss 1.4111 (1.1138) acc 65.6250 (71.9609) lr 1.3681e-03 eta 5:31:36 +epoch [21/50] batch [405/500] time 1.335 (1.363) data 0.000 (0.002) loss 1.3633 (1.1151) acc 75.0000 (71.9290) lr 1.3681e-03 eta 5:31:28 +epoch [21/50] batch [410/500] time 1.392 (1.363) data 0.000 (0.002) loss 1.0107 (1.1139) acc 75.0000 (71.9436) lr 1.3681e-03 eta 5:31:19 +epoch [21/50] batch [415/500] time 1.358 (1.362) data 0.000 (0.002) loss 1.1016 (1.1133) acc 68.7500 (71.9277) lr 1.3681e-03 eta 5:31:11 +epoch [21/50] batch [420/500] time 1.481 (1.363) data 0.000 (0.002) loss 1.2402 (1.1118) acc 68.7500 (71.8973) lr 1.3681e-03 eta 5:31:09 +epoch [21/50] batch [425/500] time 1.352 (1.363) data 0.000 (0.002) loss 0.8149 (1.1127) acc 84.3750 (71.9265) lr 1.3681e-03 eta 5:31:01 +epoch [21/50] batch [430/500] time 1.357 (1.363) data 0.000 (0.002) loss 1.6504 (1.1115) acc 62.5000 (71.8968) lr 1.3681e-03 eta 5:30:54 +epoch [21/50] batch [435/500] time 1.378 (1.363) data 0.000 (0.002) loss 1.3789 (1.1114) acc 78.1250 (71.8822) lr 1.3681e-03 eta 5:30:48 +epoch [21/50] batch [440/500] time 1.373 (1.363) data 0.000 (0.002) loss 1.1943 (1.1115) acc 71.8750 (71.8892) lr 1.3681e-03 eta 5:30:42 +epoch [21/50] batch [445/500] time 1.356 (1.363) data 0.000 (0.002) loss 1.0996 (1.1119) acc 75.0000 (71.8820) lr 1.3681e-03 eta 5:30:35 +epoch [21/50] batch [450/500] time 1.352 (1.363) data 0.000 (0.002) loss 0.8833 (1.1115) acc 75.0000 (71.9028) lr 1.3681e-03 eta 5:30:28 +epoch [21/50] batch [455/500] time 1.367 (1.363) data 0.000 (0.002) loss 1.0400 (1.1119) acc 81.2500 (71.9299) lr 1.3681e-03 eta 5:30:19 +epoch [21/50] batch [460/500] time 1.379 (1.363) data 0.000 (0.002) loss 1.4590 (1.1148) acc 68.7500 (71.8750) lr 1.3681e-03 eta 5:30:13 +epoch [21/50] batch [465/500] time 1.359 (1.363) data 0.000 (0.002) loss 1.0059 (1.1156) acc 62.5000 (71.7876) lr 1.3681e-03 eta 5:30:05 +epoch [21/50] batch [470/500] time 1.369 (1.363) data 0.000 (0.002) loss 0.7271 (1.1180) acc 81.2500 (71.7620) lr 1.3681e-03 eta 5:29:58 +epoch [21/50] batch [475/500] time 1.377 (1.363) data 0.001 (0.002) loss 1.1709 (1.1176) acc 75.0000 (71.8026) lr 1.3681e-03 eta 5:29:53 +epoch [21/50] batch [480/500] time 1.382 (1.363) data 0.000 (0.002) loss 0.8726 (1.1147) acc 78.1250 (71.8685) lr 1.3681e-03 eta 5:29:46 +epoch [21/50] batch [485/500] time 1.346 (1.363) data 0.001 (0.002) loss 0.6226 (1.1138) acc 81.2500 (71.9201) lr 1.3681e-03 eta 5:29:38 +epoch [21/50] batch [490/500] time 1.377 (1.363) data 0.000 (0.002) loss 0.9214 (1.1159) acc 71.8750 (71.8622) lr 1.3681e-03 eta 5:29:30 +epoch [21/50] batch [495/500] time 1.339 (1.362) data 0.000 (0.002) loss 0.5830 (1.1159) acc 78.1250 (71.8561) lr 1.3681e-03 eta 5:29:22 +epoch [21/50] batch [500/500] time 1.354 (1.362) data 0.000 (0.002) loss 0.7573 (1.1146) acc 84.3750 (71.9437) lr 1.3090e-03 eta 5:29:14 +epoch [22/50] batch [5/500] time 1.350 (1.512) data 0.000 (0.160) loss 1.3076 (1.0071) acc 65.6250 (76.8750) lr 1.3090e-03 eta 6:05:15 +epoch [22/50] batch [10/500] time 1.381 (1.436) data 0.000 (0.080) loss 0.8438 (0.9802) acc 68.7500 (74.0625) lr 1.3090e-03 eta 5:46:48 +epoch [22/50] batch [15/500] time 1.363 (1.426) data 0.000 (0.054) loss 0.9927 (1.0298) acc 68.7500 (73.3333) lr 1.3090e-03 eta 5:44:19 +epoch [22/50] batch [20/500] time 1.372 (1.412) data 0.001 (0.040) loss 1.4805 (1.0634) acc 62.5000 (73.4375) lr 1.3090e-03 eta 5:40:48 +epoch [22/50] batch [25/500] time 1.352 (1.401) data 0.000 (0.032) loss 1.2783 (1.0773) acc 68.7500 (73.5000) lr 1.3090e-03 eta 5:38:03 +epoch [22/50] batch [30/500] time 1.372 (1.395) data 0.000 (0.027) loss 0.8867 (1.0817) acc 71.8750 (72.5000) lr 1.3090e-03 eta 5:36:29 +epoch [22/50] batch [35/500] time 1.360 (1.390) data 0.000 (0.023) loss 1.1807 (1.0921) acc 81.2500 (72.8571) lr 1.3090e-03 eta 5:35:12 +epoch [22/50] batch [40/500] time 1.381 (1.387) data 0.000 (0.020) loss 0.8296 (1.0921) acc 68.7500 (72.2656) lr 1.3090e-03 eta 5:34:09 +epoch [22/50] batch [45/500] time 1.344 (1.383) data 0.000 (0.018) loss 0.6519 (1.0807) acc 75.0000 (72.2222) lr 1.3090e-03 eta 5:33:11 +epoch [22/50] batch [50/500] time 1.378 (1.381) data 0.001 (0.016) loss 0.9341 (1.0893) acc 78.1250 (71.8750) lr 1.3090e-03 eta 5:32:36 +epoch [22/50] batch [55/500] time 1.389 (1.380) data 0.000 (0.015) loss 1.1436 (1.0945) acc 71.8750 (71.5909) lr 1.3090e-03 eta 5:32:16 +epoch [22/50] batch [60/500] time 1.360 (1.379) data 0.001 (0.014) loss 0.7173 (1.0779) acc 87.5000 (72.3958) lr 1.3090e-03 eta 5:31:58 +epoch [22/50] batch [65/500] time 1.360 (1.378) data 0.000 (0.013) loss 1.0820 (1.0837) acc 71.8750 (72.1635) lr 1.3090e-03 eta 5:31:25 +epoch [22/50] batch [70/500] time 1.358 (1.376) data 0.000 (0.012) loss 1.4531 (1.0927) acc 68.7500 (72.0089) lr 1.3090e-03 eta 5:30:57 +epoch [22/50] batch [75/500] time 1.348 (1.377) data 0.000 (0.011) loss 0.7925 (1.0877) acc 78.1250 (72.2917) lr 1.3090e-03 eta 5:31:01 +epoch [22/50] batch [80/500] time 1.369 (1.376) data 0.001 (0.010) loss 0.7163 (1.0977) acc 71.8750 (71.9141) lr 1.3090e-03 eta 5:30:40 +epoch [22/50] batch [85/500] time 1.376 (1.375) data 0.000 (0.010) loss 0.8872 (1.0913) acc 71.8750 (72.0221) lr 1.3090e-03 eta 5:30:15 +epoch [22/50] batch [90/500] time 1.389 (1.374) data 0.000 (0.009) loss 1.5352 (1.0906) acc 65.6250 (72.1875) lr 1.3090e-03 eta 5:30:03 +epoch [22/50] batch [95/500] time 1.386 (1.374) data 0.000 (0.009) loss 1.2891 (1.0885) acc 71.8750 (72.5000) lr 1.3090e-03 eta 5:29:49 +epoch [22/50] batch [100/500] time 1.346 (1.373) data 0.000 (0.008) loss 0.8511 (1.0832) acc 71.8750 (72.5312) lr 1.3090e-03 eta 5:29:35 +epoch [22/50] batch [105/500] time 1.363 (1.373) data 0.000 (0.008) loss 1.1006 (1.0967) acc 71.8750 (72.3512) lr 1.3090e-03 eta 5:29:20 +epoch [22/50] batch [110/500] time 1.351 (1.372) data 0.000 (0.008) loss 1.0596 (1.0869) acc 71.8750 (72.5852) lr 1.3090e-03 eta 5:29:07 +epoch [22/50] batch [115/500] time 1.486 (1.373) data 0.000 (0.007) loss 1.3027 (1.0849) acc 81.2500 (72.6359) lr 1.3090e-03 eta 5:29:06 +epoch [22/50] batch [120/500] time 1.341 (1.372) data 0.000 (0.007) loss 1.2607 (1.0882) acc 75.0000 (72.7083) lr 1.3090e-03 eta 5:28:47 +epoch [22/50] batch [125/500] time 1.376 (1.372) data 0.000 (0.007) loss 1.3906 (1.0926) acc 59.3750 (72.6500) lr 1.3090e-03 eta 5:28:41 +epoch [22/50] batch [130/500] time 1.363 (1.371) data 0.000 (0.007) loss 0.7661 (1.0901) acc 81.2500 (72.7644) lr 1.3090e-03 eta 5:28:25 +epoch [22/50] batch [135/500] time 1.382 (1.371) data 0.001 (0.006) loss 0.8560 (1.0838) acc 75.0000 (72.8009) lr 1.3090e-03 eta 5:28:15 +epoch [22/50] batch [140/500] time 1.350 (1.371) data 0.000 (0.006) loss 1.4160 (1.0792) acc 65.6250 (72.8795) lr 1.3090e-03 eta 5:28:06 +epoch [22/50] batch [145/500] time 1.353 (1.371) data 0.000 (0.006) loss 1.5371 (1.0777) acc 65.6250 (72.8879) lr 1.3090e-03 eta 5:27:54 +epoch [22/50] batch [150/500] time 1.353 (1.370) data 0.000 (0.006) loss 1.4033 (1.0805) acc 65.6250 (72.8333) lr 1.3090e-03 eta 5:27:40 +epoch [22/50] batch [155/500] time 1.382 (1.370) data 0.000 (0.006) loss 0.9888 (1.0720) acc 68.7500 (72.9839) lr 1.3090e-03 eta 5:27:34 +epoch [22/50] batch [160/500] time 1.356 (1.370) data 0.000 (0.005) loss 1.0439 (1.0716) acc 78.1250 (72.9492) lr 1.3090e-03 eta 5:27:19 +epoch [22/50] batch [165/500] time 1.352 (1.369) data 0.000 (0.005) loss 1.1875 (1.0737) acc 81.2500 (72.9735) lr 1.3090e-03 eta 5:27:05 +epoch [22/50] batch [170/500] time 1.377 (1.369) data 0.000 (0.005) loss 0.7021 (1.0649) acc 75.0000 (73.0882) lr 1.3090e-03 eta 5:26:56 +epoch [22/50] batch [175/500] time 1.374 (1.369) data 0.000 (0.005) loss 0.9404 (1.0687) acc 75.0000 (72.9821) lr 1.3090e-03 eta 5:26:50 +epoch [22/50] batch [180/500] time 1.373 (1.369) data 0.000 (0.005) loss 0.7524 (1.0662) acc 78.1250 (73.0729) lr 1.3090e-03 eta 5:26:40 +epoch [22/50] batch [185/500] time 1.363 (1.369) data 0.001 (0.005) loss 1.4473 (1.0761) acc 62.5000 (72.9054) lr 1.3090e-03 eta 5:26:34 +epoch [22/50] batch [190/500] time 1.360 (1.369) data 0.000 (0.005) loss 1.1943 (1.0748) acc 75.0000 (72.9770) lr 1.3090e-03 eta 5:26:25 +epoch [22/50] batch [195/500] time 1.361 (1.368) data 0.000 (0.004) loss 1.8574 (1.0776) acc 59.3750 (72.9487) lr 1.3090e-03 eta 5:26:13 +epoch [22/50] batch [200/500] time 1.362 (1.368) data 0.000 (0.004) loss 0.6611 (1.0779) acc 78.1250 (72.9375) lr 1.3090e-03 eta 5:26:04 +epoch [22/50] batch [205/500] time 1.351 (1.368) data 0.000 (0.004) loss 1.5527 (1.0806) acc 56.2500 (72.9726) lr 1.3090e-03 eta 5:25:50 +epoch [22/50] batch [210/500] time 1.356 (1.367) data 0.000 (0.004) loss 1.1562 (1.0787) acc 71.8750 (72.9464) lr 1.3090e-03 eta 5:25:41 +epoch [22/50] batch [215/500] time 1.343 (1.368) data 0.000 (0.004) loss 1.4385 (1.0795) acc 59.3750 (72.8634) lr 1.3090e-03 eta 5:25:38 +epoch [22/50] batch [220/500] time 1.339 (1.368) data 0.000 (0.004) loss 0.7910 (1.0798) acc 78.1250 (72.8409) lr 1.3090e-03 eta 5:25:29 +epoch [22/50] batch [225/500] time 1.356 (1.367) data 0.000 (0.004) loss 1.2627 (1.0767) acc 68.7500 (72.9167) lr 1.3090e-03 eta 5:25:16 +epoch [22/50] batch [230/500] time 1.360 (1.367) data 0.000 (0.004) loss 0.9917 (1.0785) acc 71.8750 (72.9076) lr 1.3090e-03 eta 5:25:06 +epoch [22/50] batch [235/500] time 1.341 (1.367) data 0.000 (0.004) loss 1.0605 (1.0793) acc 65.6250 (72.8457) lr 1.3090e-03 eta 5:24:55 +epoch [22/50] batch [240/500] time 1.364 (1.367) data 0.000 (0.004) loss 0.8745 (1.0760) acc 75.0000 (72.9557) lr 1.3090e-03 eta 5:24:50 +epoch [22/50] batch [245/500] time 1.348 (1.367) data 0.000 (0.004) loss 0.8955 (1.0731) acc 78.1250 (72.9719) lr 1.3090e-03 eta 5:24:40 +epoch [22/50] batch [250/500] time 1.357 (1.366) data 0.000 (0.004) loss 1.1543 (1.0797) acc 68.7500 (72.8000) lr 1.3090e-03 eta 5:24:29 +epoch [22/50] batch [255/500] time 1.358 (1.366) data 0.000 (0.003) loss 1.5693 (1.0834) acc 65.6250 (72.7819) lr 1.3090e-03 eta 5:24:21 +epoch [22/50] batch [260/500] time 1.343 (1.367) data 0.000 (0.003) loss 1.5156 (1.0898) acc 56.2500 (72.5601) lr 1.3090e-03 eta 5:24:19 +epoch [22/50] batch [265/500] time 1.337 (1.366) data 0.000 (0.003) loss 1.1807 (1.0901) acc 68.7500 (72.5472) lr 1.3090e-03 eta 5:24:09 +epoch [22/50] batch [270/500] time 1.371 (1.366) data 0.000 (0.003) loss 0.6802 (1.0862) acc 78.1250 (72.5926) lr 1.3090e-03 eta 5:24:01 +epoch [22/50] batch [275/500] time 1.339 (1.366) data 0.000 (0.003) loss 1.0273 (1.0896) acc 78.1250 (72.5341) lr 1.3090e-03 eta 5:23:50 +epoch [22/50] batch [280/500] time 1.364 (1.366) data 0.000 (0.003) loss 1.2266 (1.0890) acc 75.0000 (72.5446) lr 1.3090e-03 eta 5:23:43 +epoch [22/50] batch [285/500] time 1.374 (1.366) data 0.000 (0.003) loss 1.8203 (1.0903) acc 65.6250 (72.5219) lr 1.3090e-03 eta 5:23:36 +epoch [22/50] batch [290/500] time 1.381 (1.366) data 0.000 (0.003) loss 1.5850 (1.0936) acc 59.3750 (72.4138) lr 1.3090e-03 eta 5:23:29 +epoch [22/50] batch [295/500] time 1.369 (1.366) data 0.000 (0.003) loss 0.8545 (1.0931) acc 75.0000 (72.3729) lr 1.3090e-03 eta 5:23:21 +epoch [22/50] batch [300/500] time 1.354 (1.366) data 0.000 (0.003) loss 1.0469 (1.0925) acc 68.7500 (72.3438) lr 1.3090e-03 eta 5:23:14 +epoch [22/50] batch [305/500] time 1.372 (1.366) data 0.000 (0.003) loss 1.3887 (1.0955) acc 75.0000 (72.3053) lr 1.3090e-03 eta 5:23:05 +epoch [22/50] batch [310/500] time 1.377 (1.366) data 0.000 (0.003) loss 1.0381 (1.0941) acc 71.8750 (72.3387) lr 1.3090e-03 eta 5:22:59 +epoch [22/50] batch [315/500] time 1.368 (1.366) data 0.000 (0.003) loss 1.0576 (1.0956) acc 81.2500 (72.3413) lr 1.3090e-03 eta 5:22:53 +epoch [22/50] batch [320/500] time 1.379 (1.366) data 0.000 (0.003) loss 1.0195 (1.0981) acc 75.0000 (72.3047) lr 1.3090e-03 eta 5:22:49 +epoch [22/50] batch [325/500] time 1.351 (1.366) data 0.000 (0.003) loss 1.4551 (1.1000) acc 59.3750 (72.2115) lr 1.3090e-03 eta 5:22:42 +epoch [22/50] batch [330/500] time 1.367 (1.366) data 0.001 (0.003) loss 0.8818 (1.0991) acc 84.3750 (72.2822) lr 1.3090e-03 eta 5:22:34 +epoch [22/50] batch [335/500] time 1.350 (1.366) data 0.000 (0.003) loss 1.2441 (1.0974) acc 78.1250 (72.3414) lr 1.3090e-03 eta 5:22:26 +epoch [22/50] batch [340/500] time 1.382 (1.366) data 0.000 (0.003) loss 1.1162 (1.1011) acc 68.7500 (72.2978) lr 1.3090e-03 eta 5:22:20 +epoch [22/50] batch [345/500] time 1.362 (1.366) data 0.000 (0.003) loss 1.2441 (1.1013) acc 62.5000 (72.2283) lr 1.3090e-03 eta 5:22:14 +epoch [22/50] batch [350/500] time 1.376 (1.366) data 0.000 (0.003) loss 0.7686 (1.1004) acc 84.3750 (72.2768) lr 1.3090e-03 eta 5:22:06 +epoch [22/50] batch [355/500] time 1.398 (1.366) data 0.000 (0.003) loss 1.6738 (1.1009) acc 59.3750 (72.2359) lr 1.3090e-03 eta 5:22:01 +epoch [22/50] batch [360/500] time 1.371 (1.366) data 0.000 (0.003) loss 1.1816 (1.1000) acc 78.1250 (72.2743) lr 1.3090e-03 eta 5:22:00 +epoch [22/50] batch [365/500] time 1.368 (1.366) data 0.000 (0.003) loss 1.1641 (1.0996) acc 68.7500 (72.2603) lr 1.3090e-03 eta 5:21:52 +epoch [22/50] batch [370/500] time 1.352 (1.366) data 0.000 (0.003) loss 1.2061 (1.0961) acc 68.7500 (72.3226) lr 1.3090e-03 eta 5:21:45 +epoch [22/50] batch [375/500] time 1.340 (1.366) data 0.000 (0.002) loss 0.7520 (1.0943) acc 78.1250 (72.3667) lr 1.3090e-03 eta 5:21:37 +epoch [22/50] batch [380/500] time 1.357 (1.366) data 0.000 (0.002) loss 0.8726 (1.0981) acc 78.1250 (72.2862) lr 1.3090e-03 eta 5:21:28 +epoch [22/50] batch [385/500] time 1.373 (1.366) data 0.000 (0.002) loss 1.7559 (1.0991) acc 65.6250 (72.3133) lr 1.3090e-03 eta 5:21:21 +epoch [22/50] batch [390/500] time 1.357 (1.366) data 0.000 (0.002) loss 0.9609 (1.0975) acc 71.8750 (72.3157) lr 1.3090e-03 eta 5:21:15 +epoch [22/50] batch [395/500] time 1.366 (1.366) data 0.000 (0.002) loss 1.0898 (1.0995) acc 68.7500 (72.2389) lr 1.3090e-03 eta 5:21:07 +epoch [22/50] batch [400/500] time 1.355 (1.366) data 0.001 (0.002) loss 1.5029 (1.1012) acc 56.2500 (72.1797) lr 1.3090e-03 eta 5:20:57 +epoch [22/50] batch [405/500] time 1.355 (1.366) data 0.000 (0.002) loss 1.1572 (1.1001) acc 65.6250 (72.1914) lr 1.3090e-03 eta 5:20:52 +epoch [22/50] batch [410/500] time 1.344 (1.366) data 0.000 (0.002) loss 1.3799 (1.1007) acc 65.6250 (72.1723) lr 1.3090e-03 eta 5:20:44 +epoch [22/50] batch [415/500] time 1.345 (1.366) data 0.000 (0.002) loss 0.7036 (1.1010) acc 78.1250 (72.1611) lr 1.3090e-03 eta 5:20:35 +epoch [22/50] batch [420/500] time 1.338 (1.366) data 0.000 (0.002) loss 1.2471 (1.0998) acc 75.0000 (72.2024) lr 1.3090e-03 eta 5:20:26 +epoch [22/50] batch [425/500] time 1.347 (1.365) data 0.000 (0.002) loss 0.8760 (1.0965) acc 78.1250 (72.2868) lr 1.3090e-03 eta 5:20:17 +epoch [22/50] batch [430/500] time 1.358 (1.365) data 0.001 (0.002) loss 1.1719 (1.0959) acc 68.7500 (72.2965) lr 1.3090e-03 eta 5:20:09 +epoch [22/50] batch [435/500] time 1.341 (1.365) data 0.000 (0.002) loss 1.4414 (1.0984) acc 65.6250 (72.2198) lr 1.3090e-03 eta 5:19:59 +epoch [22/50] batch [440/500] time 1.337 (1.365) data 0.000 (0.002) loss 1.0498 (1.0983) acc 71.8750 (72.2017) lr 1.3090e-03 eta 5:19:50 +epoch [22/50] batch [445/500] time 1.368 (1.365) data 0.000 (0.002) loss 1.8672 (1.0986) acc 62.5000 (72.1980) lr 1.3090e-03 eta 5:19:43 +epoch [22/50] batch [450/500] time 1.361 (1.365) data 0.000 (0.002) loss 0.9985 (1.0973) acc 78.1250 (72.2292) lr 1.3090e-03 eta 5:19:34 +epoch [22/50] batch [455/500] time 1.349 (1.365) data 0.000 (0.002) loss 0.7861 (1.0970) acc 78.1250 (72.2665) lr 1.3090e-03 eta 5:19:26 +epoch [22/50] batch [460/500] time 1.366 (1.365) data 0.001 (0.002) loss 1.2266 (1.0979) acc 62.5000 (72.2351) lr 1.3090e-03 eta 5:19:19 +epoch [22/50] batch [465/500] time 1.362 (1.365) data 0.000 (0.002) loss 1.3418 (1.0975) acc 78.1250 (72.2715) lr 1.3090e-03 eta 5:19:11 +epoch [22/50] batch [470/500] time 1.381 (1.365) data 0.000 (0.002) loss 1.7812 (1.1007) acc 59.3750 (72.2207) lr 1.3090e-03 eta 5:19:04 +epoch [22/50] batch [475/500] time 1.354 (1.365) data 0.000 (0.002) loss 0.7163 (1.1014) acc 81.2500 (72.2171) lr 1.3090e-03 eta 5:18:57 +epoch [22/50] batch [480/500] time 1.354 (1.364) data 0.000 (0.002) loss 1.9248 (1.1032) acc 56.2500 (72.1549) lr 1.3090e-03 eta 5:18:47 +epoch [22/50] batch [485/500] time 1.378 (1.364) data 0.001 (0.002) loss 1.8838 (1.1068) acc 65.6250 (72.1263) lr 1.3090e-03 eta 5:18:40 +epoch [22/50] batch [490/500] time 1.354 (1.364) data 0.000 (0.002) loss 1.1934 (1.1069) acc 75.0000 (72.1365) lr 1.3090e-03 eta 5:18:33 +epoch [22/50] batch [495/500] time 1.374 (1.364) data 0.000 (0.002) loss 2.3359 (1.1098) acc 53.1250 (72.0833) lr 1.3090e-03 eta 5:18:26 +epoch [22/50] batch [500/500] time 1.495 (1.365) data 0.000 (0.002) loss 2.1504 (1.1127) acc 59.3750 (72.0563) lr 1.2487e-03 eta 5:18:23 +epoch [23/50] batch [5/500] time 1.362 (1.535) data 0.000 (0.169) loss 1.2207 (1.1931) acc 65.6250 (71.2500) lr 1.2487e-03 eta 5:57:55 +epoch [23/50] batch [10/500] time 1.379 (1.449) data 0.000 (0.084) loss 1.7744 (1.2527) acc 65.6250 (68.1250) lr 1.2487e-03 eta 5:37:46 +epoch [23/50] batch [15/500] time 1.360 (1.418) data 0.000 (0.056) loss 1.1865 (1.2487) acc 78.1250 (69.3750) lr 1.2487e-03 eta 5:30:34 +epoch [23/50] batch [20/500] time 1.353 (1.400) data 0.000 (0.042) loss 0.6343 (1.1138) acc 84.3750 (72.1875) lr 1.2487e-03 eta 5:26:14 +epoch [23/50] batch [25/500] time 1.364 (1.393) data 0.000 (0.034) loss 1.4229 (1.1267) acc 59.3750 (71.6250) lr 1.2487e-03 eta 5:24:25 +epoch [23/50] batch [30/500] time 1.359 (1.386) data 0.000 (0.028) loss 0.6216 (1.0747) acc 87.5000 (72.8125) lr 1.2487e-03 eta 5:22:47 +epoch [23/50] batch [35/500] time 1.355 (1.383) data 0.000 (0.024) loss 1.8096 (1.0781) acc 56.2500 (73.3036) lr 1.2487e-03 eta 5:21:47 +epoch [23/50] batch [40/500] time 1.348 (1.379) data 0.000 (0.021) loss 1.0713 (1.1070) acc 65.6250 (72.5781) lr 1.2487e-03 eta 5:20:55 +epoch [23/50] batch [45/500] time 1.354 (1.377) data 0.000 (0.019) loss 0.8330 (1.1128) acc 84.3750 (72.5694) lr 1.2487e-03 eta 5:20:09 +epoch [23/50] batch [50/500] time 1.358 (1.374) data 0.000 (0.017) loss 0.9873 (1.1268) acc 71.8750 (72.0000) lr 1.2487e-03 eta 5:19:31 +epoch [23/50] batch [55/500] time 1.342 (1.373) data 0.000 (0.016) loss 0.8599 (1.1331) acc 78.1250 (71.7045) lr 1.2487e-03 eta 5:19:04 +epoch [23/50] batch [60/500] time 1.546 (1.375) data 0.000 (0.014) loss 0.4460 (1.1040) acc 87.5000 (72.2396) lr 1.2487e-03 eta 5:19:27 +epoch [23/50] batch [65/500] time 1.344 (1.373) data 0.000 (0.013) loss 1.0215 (1.0969) acc 75.0000 (72.5000) lr 1.2487e-03 eta 5:18:59 +epoch [23/50] batch [70/500] time 1.353 (1.372) data 0.000 (0.012) loss 1.4463 (1.1034) acc 71.8750 (72.2768) lr 1.2487e-03 eta 5:18:25 +epoch [23/50] batch [75/500] time 1.342 (1.371) data 0.000 (0.012) loss 1.1221 (1.0974) acc 68.7500 (72.5417) lr 1.2487e-03 eta 5:18:06 +epoch [23/50] batch [80/500] time 1.355 (1.370) data 0.000 (0.011) loss 0.9438 (1.1053) acc 65.6250 (72.1094) lr 1.2487e-03 eta 5:17:55 +epoch [23/50] batch [85/500] time 1.361 (1.370) data 0.000 (0.010) loss 0.6543 (1.0893) acc 71.8750 (72.1691) lr 1.2487e-03 eta 5:17:40 +epoch [23/50] batch [90/500] time 1.362 (1.368) data 0.000 (0.010) loss 0.8477 (1.0879) acc 87.5000 (72.2917) lr 1.2487e-03 eta 5:17:14 +epoch [23/50] batch [95/500] time 1.369 (1.368) data 0.000 (0.009) loss 0.7739 (1.0834) acc 78.1250 (72.3684) lr 1.2487e-03 eta 5:17:04 +epoch [23/50] batch [100/500] time 1.348 (1.368) data 0.000 (0.009) loss 0.5796 (1.0755) acc 84.3750 (72.6875) lr 1.2487e-03 eta 5:16:48 +epoch [23/50] batch [105/500] time 1.370 (1.368) data 0.000 (0.008) loss 1.5264 (1.0834) acc 62.5000 (72.5893) lr 1.2487e-03 eta 5:16:52 +epoch [23/50] batch [110/500] time 1.377 (1.368) data 0.000 (0.008) loss 1.0371 (1.0850) acc 71.8750 (72.5852) lr 1.2487e-03 eta 5:16:45 +epoch [23/50] batch [115/500] time 1.355 (1.368) data 0.000 (0.008) loss 1.6270 (1.0907) acc 62.5000 (72.5272) lr 1.2487e-03 eta 5:16:27 +epoch [23/50] batch [120/500] time 1.342 (1.367) data 0.000 (0.007) loss 1.4014 (1.0875) acc 62.5000 (72.5521) lr 1.2487e-03 eta 5:16:16 +epoch [23/50] batch [125/500] time 1.357 (1.367) data 0.000 (0.007) loss 1.2490 (1.0909) acc 75.0000 (72.6750) lr 1.2487e-03 eta 5:16:01 +epoch [23/50] batch [130/500] time 1.354 (1.366) data 0.000 (0.007) loss 1.0381 (1.0932) acc 75.0000 (72.5000) lr 1.2487e-03 eta 5:15:51 +epoch [23/50] batch [135/500] time 1.352 (1.366) data 0.000 (0.007) loss 1.0479 (1.0923) acc 71.8750 (72.5000) lr 1.2487e-03 eta 5:15:39 +epoch [23/50] batch [140/500] time 1.360 (1.366) data 0.000 (0.006) loss 1.1719 (1.1024) acc 71.8750 (72.2098) lr 1.2487e-03 eta 5:15:30 +epoch [23/50] batch [145/500] time 1.357 (1.366) data 0.000 (0.006) loss 1.0088 (1.0954) acc 71.8750 (72.3707) lr 1.2487e-03 eta 5:15:20 +epoch [23/50] batch [150/500] time 1.348 (1.365) data 0.000 (0.006) loss 1.2139 (1.0926) acc 75.0000 (72.4583) lr 1.2487e-03 eta 5:15:07 +epoch [23/50] batch [155/500] time 1.345 (1.365) data 0.000 (0.006) loss 1.5244 (1.0923) acc 62.5000 (72.4194) lr 1.2487e-03 eta 5:14:59 +epoch [23/50] batch [160/500] time 1.353 (1.365) data 0.000 (0.006) loss 0.7515 (1.0813) acc 87.5000 (72.6562) lr 1.2487e-03 eta 5:14:50 +epoch [23/50] batch [165/500] time 1.346 (1.365) data 0.000 (0.005) loss 0.9531 (1.0785) acc 71.8750 (72.6705) lr 1.2487e-03 eta 5:14:42 +epoch [23/50] batch [170/500] time 1.358 (1.365) data 0.000 (0.005) loss 1.0020 (1.0794) acc 71.8750 (72.7390) lr 1.2487e-03 eta 5:14:31 +epoch [23/50] batch [175/500] time 1.342 (1.364) data 0.000 (0.005) loss 1.3242 (1.0753) acc 65.6250 (72.8036) lr 1.2487e-03 eta 5:14:22 +epoch [23/50] batch [180/500] time 1.358 (1.364) data 0.000 (0.005) loss 1.1670 (1.0727) acc 68.7500 (72.8299) lr 1.2487e-03 eta 5:14:10 +epoch [23/50] batch [185/500] time 1.344 (1.364) data 0.000 (0.005) loss 1.4482 (1.0765) acc 75.0000 (72.7872) lr 1.2487e-03 eta 5:14:00 +epoch [23/50] batch [190/500] time 1.373 (1.363) data 0.000 (0.005) loss 1.4570 (1.0819) acc 62.5000 (72.5987) lr 1.2487e-03 eta 5:13:49 +epoch [23/50] batch [195/500] time 1.354 (1.363) data 0.000 (0.005) loss 0.8813 (1.0937) acc 78.1250 (72.4519) lr 1.2487e-03 eta 5:13:39 +epoch [23/50] batch [200/500] time 1.364 (1.363) data 0.000 (0.005) loss 1.2617 (1.1012) acc 68.7500 (72.3125) lr 1.2487e-03 eta 5:13:30 +epoch [23/50] batch [205/500] time 1.352 (1.363) data 0.000 (0.004) loss 1.0977 (1.0988) acc 68.7500 (72.3323) lr 1.2487e-03 eta 5:13:29 +epoch [23/50] batch [210/500] time 1.358 (1.363) data 0.000 (0.004) loss 1.4443 (1.1020) acc 68.7500 (72.2173) lr 1.2487e-03 eta 5:13:18 +epoch [23/50] batch [215/500] time 1.342 (1.363) data 0.000 (0.004) loss 1.0498 (1.0996) acc 75.0000 (72.2820) lr 1.2487e-03 eta 5:13:08 +epoch [23/50] batch [220/500] time 1.376 (1.363) data 0.000 (0.004) loss 1.1240 (1.0995) acc 81.2500 (72.2443) lr 1.2487e-03 eta 5:12:57 +epoch [23/50] batch [225/500] time 1.359 (1.363) data 0.000 (0.004) loss 1.3076 (1.0999) acc 62.5000 (72.2222) lr 1.2487e-03 eta 5:12:49 +epoch [23/50] batch [230/500] time 1.342 (1.362) data 0.000 (0.004) loss 1.6963 (1.1047) acc 56.2500 (72.1467) lr 1.2487e-03 eta 5:12:37 +epoch [23/50] batch [235/500] time 1.384 (1.362) data 0.000 (0.004) loss 0.8809 (1.1050) acc 75.0000 (72.1809) lr 1.2487e-03 eta 5:12:31 +epoch [23/50] batch [240/500] time 1.354 (1.362) data 0.000 (0.004) loss 1.3613 (1.1058) acc 68.7500 (72.1484) lr 1.2487e-03 eta 5:12:22 +epoch [23/50] batch [245/500] time 1.341 (1.362) data 0.000 (0.004) loss 1.1641 (1.1078) acc 78.1250 (72.1684) lr 1.2487e-03 eta 5:12:17 +epoch [23/50] batch [250/500] time 1.337 (1.363) data 0.000 (0.004) loss 1.0957 (1.1069) acc 65.6250 (72.1750) lr 1.2487e-03 eta 5:12:16 +epoch [23/50] batch [255/500] time 1.337 (1.362) data 0.000 (0.004) loss 1.5664 (1.1097) acc 62.5000 (72.1078) lr 1.2487e-03 eta 5:12:07 +epoch [23/50] batch [260/500] time 1.346 (1.362) data 0.000 (0.004) loss 1.2764 (1.1072) acc 59.3750 (72.1514) lr 1.2487e-03 eta 5:11:59 +epoch [23/50] batch [265/500] time 1.366 (1.362) data 0.000 (0.004) loss 1.2256 (1.1077) acc 71.8750 (72.1226) lr 1.2487e-03 eta 5:11:51 +epoch [23/50] batch [270/500] time 1.353 (1.362) data 0.000 (0.003) loss 1.4600 (1.1094) acc 50.0000 (72.0370) lr 1.2487e-03 eta 5:11:44 +epoch [23/50] batch [275/500] time 1.346 (1.362) data 0.000 (0.003) loss 1.6797 (1.1143) acc 62.5000 (72.0000) lr 1.2487e-03 eta 5:11:36 +epoch [23/50] batch [280/500] time 1.355 (1.362) data 0.000 (0.003) loss 1.6934 (1.1173) acc 56.2500 (71.8638) lr 1.2487e-03 eta 5:11:28 +epoch [23/50] batch [285/500] time 1.338 (1.362) data 0.000 (0.003) loss 1.0391 (1.1149) acc 78.1250 (71.9737) lr 1.2487e-03 eta 5:11:20 +epoch [23/50] batch [290/500] time 1.362 (1.362) data 0.000 (0.003) loss 1.2314 (1.1168) acc 75.0000 (71.9181) lr 1.2487e-03 eta 5:11:13 +epoch [23/50] batch [295/500] time 1.368 (1.362) data 0.000 (0.003) loss 0.9268 (1.1142) acc 75.0000 (71.9492) lr 1.2487e-03 eta 5:11:06 +epoch [23/50] batch [300/500] time 1.356 (1.362) data 0.000 (0.003) loss 0.9790 (1.1120) acc 75.0000 (72.0417) lr 1.2487e-03 eta 5:10:56 +epoch [23/50] batch [305/500] time 1.357 (1.362) data 0.000 (0.003) loss 0.7236 (1.1118) acc 81.2500 (72.0799) lr 1.2487e-03 eta 5:10:47 +epoch [23/50] batch [310/500] time 1.370 (1.361) data 0.000 (0.003) loss 0.6499 (1.1103) acc 84.3750 (72.1069) lr 1.2487e-03 eta 5:10:37 +epoch [23/50] batch [315/500] time 1.350 (1.361) data 0.000 (0.003) loss 1.3896 (1.1098) acc 62.5000 (72.0536) lr 1.2487e-03 eta 5:10:30 +epoch [23/50] batch [320/500] time 1.339 (1.361) data 0.000 (0.003) loss 0.9600 (1.1070) acc 71.8750 (72.1094) lr 1.2487e-03 eta 5:10:21 +epoch [23/50] batch [325/500] time 1.360 (1.361) data 0.000 (0.003) loss 0.5933 (1.1069) acc 78.1250 (72.1346) lr 1.2487e-03 eta 5:10:12 +epoch [23/50] batch [330/500] time 1.367 (1.361) data 0.000 (0.003) loss 1.0498 (1.1073) acc 75.0000 (72.0739) lr 1.2487e-03 eta 5:10:06 +epoch [23/50] batch [335/500] time 1.350 (1.361) data 0.000 (0.003) loss 1.3760 (1.1078) acc 56.2500 (72.0056) lr 1.2487e-03 eta 5:09:59 +epoch [23/50] batch [340/500] time 1.374 (1.361) data 0.001 (0.003) loss 0.9268 (1.1091) acc 78.1250 (72.0404) lr 1.2487e-03 eta 5:09:52 +epoch [23/50] batch [345/500] time 1.339 (1.361) data 0.000 (0.003) loss 1.0635 (1.1083) acc 75.0000 (72.0562) lr 1.2487e-03 eta 5:09:44 +epoch [23/50] batch [350/500] time 1.352 (1.361) data 0.000 (0.003) loss 1.5312 (1.1101) acc 71.8750 (72.0893) lr 1.2487e-03 eta 5:09:40 +epoch [23/50] batch [355/500] time 1.351 (1.361) data 0.000 (0.003) loss 0.8120 (1.1102) acc 71.8750 (72.0775) lr 1.2487e-03 eta 5:09:31 +epoch [23/50] batch [360/500] time 1.366 (1.361) data 0.000 (0.003) loss 0.6973 (1.1073) acc 81.2500 (72.1615) lr 1.2487e-03 eta 5:09:22 +epoch [23/50] batch [365/500] time 1.338 (1.361) data 0.000 (0.003) loss 1.8594 (1.1050) acc 56.2500 (72.2089) lr 1.2487e-03 eta 5:09:13 +epoch [23/50] batch [370/500] time 1.363 (1.361) data 0.000 (0.003) loss 1.9814 (1.1091) acc 62.5000 (72.1537) lr 1.2487e-03 eta 5:09:06 +epoch [23/50] batch [375/500] time 1.372 (1.361) data 0.000 (0.003) loss 0.8379 (1.1077) acc 71.8750 (72.1917) lr 1.2487e-03 eta 5:08:59 +epoch [23/50] batch [380/500] time 1.375 (1.361) data 0.000 (0.003) loss 1.0752 (1.1057) acc 75.0000 (72.2204) lr 1.2487e-03 eta 5:08:54 +epoch [23/50] batch [385/500] time 1.346 (1.361) data 0.000 (0.003) loss 1.6562 (1.1093) acc 59.3750 (72.1104) lr 1.2487e-03 eta 5:08:45 +epoch [23/50] batch [390/500] time 1.505 (1.361) data 0.000 (0.002) loss 0.8252 (1.1118) acc 81.2500 (72.0673) lr 1.2487e-03 eta 5:08:41 +epoch [23/50] batch [395/500] time 1.356 (1.361) data 0.000 (0.002) loss 1.4834 (1.1141) acc 59.3750 (71.9778) lr 1.2487e-03 eta 5:08:34 +epoch [23/50] batch [400/500] time 1.383 (1.361) data 0.000 (0.002) loss 1.2861 (1.1150) acc 68.7500 (71.9844) lr 1.2487e-03 eta 5:08:27 +epoch [23/50] batch [405/500] time 1.349 (1.361) data 0.000 (0.002) loss 0.7988 (1.1111) acc 75.0000 (72.0448) lr 1.2487e-03 eta 5:08:20 +epoch [23/50] batch [410/500] time 1.359 (1.361) data 0.000 (0.002) loss 1.2451 (1.1114) acc 75.0000 (72.0198) lr 1.2487e-03 eta 5:08:15 +epoch [23/50] batch [415/500] time 1.351 (1.361) data 0.000 (0.002) loss 1.2910 (1.1151) acc 68.7500 (71.9503) lr 1.2487e-03 eta 5:08:08 +epoch [23/50] batch [420/500] time 1.353 (1.361) data 0.000 (0.002) loss 1.2324 (1.1161) acc 65.6250 (71.9420) lr 1.2487e-03 eta 5:08:02 +epoch [23/50] batch [425/500] time 1.348 (1.361) data 0.000 (0.002) loss 0.7476 (1.1152) acc 68.7500 (71.9265) lr 1.2487e-03 eta 5:07:55 +epoch [23/50] batch [430/500] time 1.353 (1.361) data 0.000 (0.002) loss 1.6699 (1.1171) acc 62.5000 (71.8968) lr 1.2487e-03 eta 5:07:45 +epoch [23/50] batch [435/500] time 1.378 (1.361) data 0.000 (0.002) loss 1.4902 (1.1203) acc 71.8750 (71.8463) lr 1.2487e-03 eta 5:07:40 +epoch [23/50] batch [440/500] time 1.351 (1.361) data 0.000 (0.002) loss 1.3672 (1.1195) acc 68.7500 (71.8395) lr 1.2487e-03 eta 5:07:31 +epoch [23/50] batch [445/500] time 1.344 (1.361) data 0.000 (0.002) loss 1.2578 (1.1208) acc 71.8750 (71.8258) lr 1.2487e-03 eta 5:07:22 +epoch [23/50] batch [450/500] time 1.372 (1.361) data 0.000 (0.002) loss 0.9199 (1.1188) acc 78.1250 (71.8611) lr 1.2487e-03 eta 5:07:16 +epoch [23/50] batch [455/500] time 1.369 (1.361) data 0.000 (0.002) loss 1.0850 (1.1201) acc 75.0000 (71.8681) lr 1.2487e-03 eta 5:07:10 +epoch [23/50] batch [460/500] time 1.357 (1.361) data 0.000 (0.002) loss 0.6948 (1.1177) acc 84.3750 (71.9226) lr 1.2487e-03 eta 5:07:03 +epoch [23/50] batch [465/500] time 1.363 (1.361) data 0.000 (0.002) loss 1.4873 (1.1163) acc 59.3750 (71.9825) lr 1.2487e-03 eta 5:06:55 +epoch [23/50] batch [470/500] time 1.355 (1.361) data 0.000 (0.002) loss 0.6704 (1.1146) acc 81.2500 (71.9880) lr 1.2487e-03 eta 5:06:49 +epoch [23/50] batch [475/500] time 1.349 (1.361) data 0.000 (0.002) loss 0.9224 (1.1141) acc 65.6250 (71.9934) lr 1.2487e-03 eta 5:06:41 +epoch [23/50] batch [480/500] time 1.364 (1.361) data 0.000 (0.002) loss 0.7778 (1.1128) acc 84.3750 (72.0247) lr 1.2487e-03 eta 5:06:35 +epoch [23/50] batch [485/500] time 1.377 (1.361) data 0.001 (0.002) loss 1.1641 (1.1123) acc 71.8750 (72.0232) lr 1.2487e-03 eta 5:06:30 +epoch [23/50] batch [490/500] time 1.343 (1.361) data 0.000 (0.002) loss 1.0547 (1.1139) acc 68.7500 (71.9962) lr 1.2487e-03 eta 5:06:26 +epoch [23/50] batch [495/500] time 1.366 (1.361) data 0.000 (0.002) loss 0.9033 (1.1141) acc 78.1250 (72.0013) lr 1.2487e-03 eta 5:06:20 +epoch [23/50] batch [500/500] time 1.364 (1.361) data 0.000 (0.002) loss 1.3291 (1.1145) acc 78.1250 (72.0062) lr 1.1874e-03 eta 5:06:14 +epoch [24/50] batch [5/500] time 1.347 (1.538) data 0.000 (0.167) loss 1.8467 (1.2152) acc 56.2500 (68.1250) lr 1.1874e-03 eta 5:46:00 +epoch [24/50] batch [10/500] time 1.362 (1.441) data 0.000 (0.084) loss 0.7876 (1.2078) acc 78.1250 (68.4375) lr 1.1874e-03 eta 5:24:05 +epoch [24/50] batch [15/500] time 1.350 (1.413) data 0.000 (0.056) loss 1.1865 (1.1874) acc 68.7500 (69.1667) lr 1.1874e-03 eta 5:17:33 +epoch [24/50] batch [20/500] time 1.368 (1.402) data 0.000 (0.042) loss 1.1484 (1.1468) acc 81.2500 (70.9375) lr 1.1874e-03 eta 5:15:00 +epoch [24/50] batch [25/500] time 1.354 (1.393) data 0.000 (0.034) loss 1.3135 (1.1902) acc 71.8750 (70.8750) lr 1.1874e-03 eta 5:12:47 +epoch [24/50] batch [30/500] time 1.356 (1.386) data 0.000 (0.028) loss 0.9434 (1.1655) acc 71.8750 (71.1458) lr 1.1874e-03 eta 5:11:12 +epoch [24/50] batch [35/500] time 1.367 (1.382) data 0.000 (0.024) loss 1.0312 (1.1700) acc 68.7500 (71.3393) lr 1.1874e-03 eta 5:10:04 +epoch [24/50] batch [40/500] time 1.344 (1.378) data 0.000 (0.021) loss 1.2461 (1.1542) acc 71.8750 (71.5625) lr 1.1874e-03 eta 5:09:08 +epoch [24/50] batch [45/500] time 1.541 (1.381) data 0.000 (0.019) loss 0.7134 (1.1227) acc 78.1250 (72.0139) lr 1.1874e-03 eta 5:09:44 +epoch [24/50] batch [50/500] time 1.367 (1.378) data 0.000 (0.017) loss 1.2471 (1.1246) acc 65.6250 (71.9375) lr 1.1874e-03 eta 5:08:56 +epoch [24/50] batch [55/500] time 1.375 (1.376) data 0.000 (0.016) loss 1.5137 (1.1273) acc 56.2500 (71.4773) lr 1.1874e-03 eta 5:08:18 +epoch [24/50] batch [60/500] time 1.366 (1.374) data 0.000 (0.014) loss 0.7900 (1.1207) acc 78.1250 (71.7708) lr 1.1874e-03 eta 5:07:52 +epoch [24/50] batch [65/500] time 1.357 (1.373) data 0.000 (0.013) loss 0.7119 (1.0938) acc 84.3750 (72.5962) lr 1.1874e-03 eta 5:07:28 +epoch [24/50] batch [70/500] time 1.333 (1.371) data 0.000 (0.012) loss 1.1318 (1.0857) acc 75.0000 (72.5893) lr 1.1874e-03 eta 5:06:52 +epoch [24/50] batch [75/500] time 1.343 (1.369) data 0.000 (0.011) loss 0.9766 (1.0927) acc 68.7500 (72.4583) lr 1.1874e-03 eta 5:06:22 +epoch [24/50] batch [80/500] time 1.355 (1.368) data 0.000 (0.011) loss 1.1484 (1.1006) acc 68.7500 (72.3438) lr 1.1874e-03 eta 5:06:00 +epoch [24/50] batch [85/500] time 1.364 (1.368) data 0.000 (0.010) loss 1.1846 (1.1144) acc 62.5000 (72.2059) lr 1.1874e-03 eta 5:05:47 +epoch [24/50] batch [90/500] time 1.337 (1.368) data 0.000 (0.010) loss 1.3418 (1.1097) acc 65.6250 (72.3264) lr 1.1874e-03 eta 5:05:46 +epoch [24/50] batch [95/500] time 1.360 (1.368) data 0.000 (0.009) loss 0.8848 (1.1000) acc 75.0000 (72.4342) lr 1.1874e-03 eta 5:05:32 +epoch [24/50] batch [100/500] time 1.376 (1.367) data 0.000 (0.009) loss 1.1592 (1.0991) acc 78.1250 (72.4375) lr 1.1874e-03 eta 5:05:19 +epoch [24/50] batch [105/500] time 1.358 (1.367) data 0.000 (0.008) loss 1.6533 (1.1083) acc 62.5000 (72.3512) lr 1.1874e-03 eta 5:05:07 +epoch [24/50] batch [110/500] time 1.358 (1.366) data 0.000 (0.008) loss 1.2168 (1.1175) acc 78.1250 (72.3011) lr 1.1874e-03 eta 5:04:48 +epoch [24/50] batch [115/500] time 1.352 (1.365) data 0.000 (0.008) loss 1.0430 (1.1147) acc 68.7500 (72.3098) lr 1.1874e-03 eta 5:04:35 +epoch [24/50] batch [120/500] time 1.369 (1.365) data 0.000 (0.007) loss 1.0273 (1.1167) acc 78.1250 (72.2656) lr 1.1874e-03 eta 5:04:24 +epoch [24/50] batch [125/500] time 1.336 (1.365) data 0.000 (0.007) loss 1.5420 (1.1061) acc 68.7500 (72.5250) lr 1.1874e-03 eta 5:04:10 +epoch [24/50] batch [130/500] time 1.359 (1.364) data 0.001 (0.007) loss 1.8027 (1.1157) acc 56.2500 (72.1635) lr 1.1874e-03 eta 5:04:01 +epoch [24/50] batch [135/500] time 1.374 (1.364) data 0.000 (0.007) loss 1.1230 (1.1202) acc 65.6250 (72.0602) lr 1.1874e-03 eta 5:03:51 +epoch [24/50] batch [140/500] time 1.365 (1.364) data 0.000 (0.006) loss 1.0381 (1.1136) acc 75.0000 (72.3214) lr 1.1874e-03 eta 5:03:43 +epoch [24/50] batch [145/500] time 1.348 (1.364) data 0.000 (0.006) loss 0.7593 (1.1096) acc 71.8750 (72.4138) lr 1.1874e-03 eta 5:03:32 +epoch [24/50] batch [150/500] time 1.361 (1.364) data 0.000 (0.006) loss 0.7847 (1.1013) acc 75.0000 (72.4792) lr 1.1874e-03 eta 5:03:23 +epoch [24/50] batch [155/500] time 1.384 (1.364) data 0.000 (0.006) loss 1.5049 (1.1039) acc 68.7500 (72.3387) lr 1.1874e-03 eta 5:03:19 +epoch [24/50] batch [160/500] time 1.356 (1.364) data 0.001 (0.006) loss 0.7300 (1.1087) acc 78.1250 (72.3438) lr 1.1874e-03 eta 5:03:09 +epoch [24/50] batch [165/500] time 1.347 (1.363) data 0.000 (0.005) loss 1.1318 (1.1155) acc 71.8750 (72.1212) lr 1.1874e-03 eta 5:02:56 +epoch [24/50] batch [170/500] time 1.359 (1.363) data 0.000 (0.005) loss 1.1162 (1.1121) acc 78.1250 (72.0588) lr 1.1874e-03 eta 5:02:47 +epoch [24/50] batch [175/500] time 1.334 (1.362) data 0.000 (0.005) loss 1.5596 (1.1203) acc 62.5000 (71.7679) lr 1.1874e-03 eta 5:02:34 +epoch [24/50] batch [180/500] time 1.374 (1.362) data 0.000 (0.005) loss 1.4668 (1.1223) acc 62.5000 (71.7882) lr 1.1874e-03 eta 5:02:26 +epoch [24/50] batch [185/500] time 1.356 (1.362) data 0.000 (0.005) loss 0.9346 (1.1199) acc 71.8750 (71.8919) lr 1.1874e-03 eta 5:02:17 +epoch [24/50] batch [190/500] time 1.372 (1.363) data 0.000 (0.005) loss 0.6372 (1.1149) acc 81.2500 (71.9079) lr 1.1874e-03 eta 5:02:19 +epoch [24/50] batch [195/500] time 1.367 (1.363) data 0.001 (0.005) loss 1.6338 (1.1206) acc 59.3750 (71.8109) lr 1.1874e-03 eta 5:02:11 +epoch [24/50] batch [200/500] time 1.361 (1.363) data 0.000 (0.005) loss 1.1719 (1.1189) acc 59.3750 (71.7188) lr 1.1874e-03 eta 5:02:04 +epoch [24/50] batch [205/500] time 1.379 (1.363) data 0.001 (0.004) loss 1.2520 (1.1177) acc 68.7500 (71.8140) lr 1.1874e-03 eta 5:02:00 +epoch [24/50] batch [210/500] time 1.345 (1.363) data 0.000 (0.004) loss 1.3203 (1.1197) acc 68.7500 (71.7113) lr 1.1874e-03 eta 5:01:48 +epoch [24/50] batch [215/500] time 1.339 (1.362) data 0.000 (0.004) loss 1.3105 (1.1226) acc 68.7500 (71.6570) lr 1.1874e-03 eta 5:01:38 +epoch [24/50] batch [220/500] time 1.364 (1.362) data 0.000 (0.004) loss 0.8286 (1.1199) acc 81.2500 (71.7045) lr 1.1874e-03 eta 5:01:31 +epoch [24/50] batch [225/500] time 1.356 (1.362) data 0.000 (0.004) loss 0.9541 (1.1176) acc 75.0000 (71.7500) lr 1.1874e-03 eta 5:01:25 +epoch [24/50] batch [230/500] time 1.383 (1.362) data 0.000 (0.004) loss 1.3799 (1.1191) acc 56.2500 (71.7799) lr 1.1874e-03 eta 5:01:17 +epoch [24/50] batch [235/500] time 1.322 (1.363) data 0.001 (0.004) loss 0.7539 (1.1158) acc 81.2500 (71.9282) lr 1.1874e-03 eta 5:01:15 +epoch [24/50] batch [240/500] time 1.366 (1.363) data 0.000 (0.004) loss 1.0117 (1.1135) acc 78.1250 (72.0182) lr 1.1874e-03 eta 5:01:07 +epoch [24/50] batch [245/500] time 1.336 (1.362) data 0.000 (0.004) loss 1.2236 (1.1090) acc 75.0000 (72.1811) lr 1.1874e-03 eta 5:00:58 +epoch [24/50] batch [250/500] time 1.348 (1.362) data 0.000 (0.004) loss 1.1621 (1.1063) acc 62.5000 (72.2000) lr 1.1874e-03 eta 5:00:50 +epoch [24/50] batch [255/500] time 1.378 (1.362) data 0.000 (0.004) loss 0.9565 (1.1063) acc 78.1250 (72.2672) lr 1.1874e-03 eta 5:00:42 +epoch [24/50] batch [260/500] time 1.360 (1.362) data 0.001 (0.004) loss 2.0801 (1.1124) acc 50.0000 (72.1755) lr 1.1874e-03 eta 5:00:34 +epoch [24/50] batch [265/500] time 1.355 (1.362) data 0.000 (0.004) loss 1.0576 (1.1131) acc 62.5000 (72.1226) lr 1.1874e-03 eta 5:00:27 +epoch [24/50] batch [270/500] time 1.362 (1.362) data 0.000 (0.003) loss 0.6875 (1.1156) acc 81.2500 (72.1065) lr 1.1874e-03 eta 5:00:19 +epoch [24/50] batch [275/500] time 1.343 (1.362) data 0.000 (0.003) loss 1.1240 (1.1195) acc 71.8750 (72.0000) lr 1.1874e-03 eta 5:00:11 +epoch [24/50] batch [280/500] time 1.338 (1.362) data 0.000 (0.003) loss 1.1943 (1.1213) acc 65.6250 (71.9085) lr 1.1874e-03 eta 5:00:02 +epoch [24/50] batch [285/500] time 1.361 (1.362) data 0.000 (0.003) loss 1.0273 (1.1224) acc 78.1250 (71.9298) lr 1.1874e-03 eta 4:59:53 +epoch [24/50] batch [290/500] time 1.345 (1.362) data 0.000 (0.003) loss 1.2305 (1.1187) acc 71.8750 (71.9720) lr 1.1874e-03 eta 4:59:47 +epoch [24/50] batch [295/500] time 1.350 (1.362) data 0.000 (0.003) loss 1.3330 (1.1227) acc 71.8750 (71.8856) lr 1.1874e-03 eta 4:59:40 +epoch [24/50] batch [300/500] time 1.378 (1.362) data 0.000 (0.003) loss 0.8115 (1.1228) acc 75.0000 (71.8438) lr 1.1874e-03 eta 4:59:33 +epoch [24/50] batch [305/500] time 1.377 (1.362) data 0.000 (0.003) loss 0.5601 (1.1187) acc 81.2500 (71.9160) lr 1.1874e-03 eta 4:59:27 +epoch [24/50] batch [310/500] time 1.372 (1.362) data 0.000 (0.003) loss 0.5186 (1.1136) acc 84.3750 (72.0060) lr 1.1874e-03 eta 4:59:19 +epoch [24/50] batch [315/500] time 1.384 (1.362) data 0.000 (0.003) loss 0.8472 (1.1099) acc 78.1250 (72.0437) lr 1.1874e-03 eta 4:59:15 +epoch [24/50] batch [320/500] time 1.344 (1.362) data 0.000 (0.003) loss 1.8311 (1.1114) acc 68.7500 (72.0898) lr 1.1874e-03 eta 4:59:08 +epoch [24/50] batch [325/500] time 1.342 (1.362) data 0.000 (0.003) loss 0.7397 (1.1106) acc 81.2500 (72.1442) lr 1.1874e-03 eta 4:59:00 +epoch [24/50] batch [330/500] time 1.342 (1.362) data 0.000 (0.003) loss 0.9932 (1.1111) acc 84.3750 (72.1212) lr 1.1874e-03 eta 4:58:51 +epoch [24/50] batch [335/500] time 1.345 (1.362) data 0.000 (0.003) loss 1.4805 (1.1081) acc 68.7500 (72.1735) lr 1.1874e-03 eta 4:58:49 +epoch [24/50] batch [340/500] time 1.335 (1.362) data 0.000 (0.003) loss 1.3721 (1.1081) acc 53.1250 (72.1048) lr 1.1874e-03 eta 4:58:42 +epoch [24/50] batch [345/500] time 1.347 (1.362) data 0.000 (0.003) loss 1.0752 (1.1089) acc 68.7500 (72.0652) lr 1.1874e-03 eta 4:58:34 +epoch [24/50] batch [350/500] time 1.376 (1.362) data 0.000 (0.003) loss 1.3164 (1.1093) acc 65.6250 (72.0268) lr 1.1874e-03 eta 4:58:28 +epoch [24/50] batch [355/500] time 1.361 (1.362) data 0.000 (0.003) loss 0.9629 (1.1101) acc 78.1250 (72.0511) lr 1.1874e-03 eta 4:58:22 +epoch [24/50] batch [360/500] time 1.374 (1.362) data 0.000 (0.003) loss 1.2256 (1.1109) acc 68.7500 (72.0747) lr 1.1874e-03 eta 4:58:15 +epoch [24/50] batch [365/500] time 1.387 (1.362) data 0.000 (0.003) loss 0.9722 (1.1110) acc 65.6250 (72.0634) lr 1.1874e-03 eta 4:58:10 +epoch [24/50] batch [370/500] time 1.339 (1.362) data 0.000 (0.003) loss 0.9004 (1.1088) acc 75.0000 (72.1199) lr 1.1874e-03 eta 4:58:03 +epoch [24/50] batch [375/500] time 1.470 (1.362) data 0.000 (0.003) loss 1.1270 (1.1080) acc 65.6250 (72.1417) lr 1.1874e-03 eta 4:57:59 +epoch [24/50] batch [380/500] time 1.361 (1.362) data 0.000 (0.003) loss 0.8091 (1.1085) acc 81.2500 (72.1711) lr 1.1874e-03 eta 4:57:53 +epoch [24/50] batch [385/500] time 1.373 (1.362) data 0.001 (0.003) loss 1.3740 (1.1099) acc 62.5000 (72.1510) lr 1.1874e-03 eta 4:57:46 +epoch [24/50] batch [390/500] time 1.360 (1.362) data 0.000 (0.003) loss 1.1387 (1.1086) acc 71.8750 (72.1554) lr 1.1874e-03 eta 4:57:40 +epoch [24/50] batch [395/500] time 1.359 (1.362) data 0.000 (0.002) loss 2.2754 (1.1115) acc 53.1250 (72.1440) lr 1.1874e-03 eta 4:57:32 +epoch [24/50] batch [400/500] time 1.369 (1.362) data 0.000 (0.002) loss 0.9805 (1.1101) acc 68.7500 (72.1172) lr 1.1874e-03 eta 4:57:25 +epoch [24/50] batch [405/500] time 1.353 (1.362) data 0.000 (0.002) loss 1.0117 (1.1100) acc 71.8750 (72.0988) lr 1.1874e-03 eta 4:57:17 +epoch [24/50] batch [410/500] time 1.375 (1.362) data 0.000 (0.002) loss 0.6680 (1.1106) acc 87.5000 (72.1494) lr 1.1874e-03 eta 4:57:11 +epoch [24/50] batch [415/500] time 1.371 (1.362) data 0.000 (0.002) loss 1.3770 (1.1125) acc 56.2500 (72.0934) lr 1.1874e-03 eta 4:57:07 +epoch [24/50] batch [420/500] time 1.365 (1.363) data 0.000 (0.002) loss 1.4268 (1.1133) acc 65.6250 (72.0685) lr 1.1874e-03 eta 4:57:01 +epoch [24/50] batch [425/500] time 1.359 (1.363) data 0.000 (0.002) loss 1.1279 (1.1129) acc 71.8750 (72.0809) lr 1.1874e-03 eta 4:56:55 +epoch [24/50] batch [430/500] time 1.362 (1.363) data 0.001 (0.002) loss 1.2764 (1.1146) acc 59.3750 (71.9985) lr 1.1874e-03 eta 4:56:49 +epoch [24/50] batch [435/500] time 1.364 (1.363) data 0.000 (0.002) loss 1.6152 (1.1135) acc 53.1250 (72.0043) lr 1.1874e-03 eta 4:56:43 +epoch [24/50] batch [440/500] time 1.370 (1.363) data 0.000 (0.002) loss 0.8760 (1.1162) acc 68.7500 (71.9389) lr 1.1874e-03 eta 4:56:35 +epoch [24/50] batch [445/500] time 1.347 (1.363) data 0.000 (0.002) loss 0.7979 (1.1154) acc 81.2500 (71.9522) lr 1.1874e-03 eta 4:56:28 +epoch [24/50] batch [450/500] time 1.353 (1.362) data 0.001 (0.002) loss 0.7681 (1.1149) acc 75.0000 (71.9097) lr 1.1874e-03 eta 4:56:20 +epoch [24/50] batch [455/500] time 1.349 (1.362) data 0.000 (0.002) loss 0.8711 (1.1141) acc 78.1250 (71.9231) lr 1.1874e-03 eta 4:56:11 +epoch [24/50] batch [460/500] time 1.357 (1.362) data 0.000 (0.002) loss 1.2529 (1.1145) acc 71.8750 (71.9158) lr 1.1874e-03 eta 4:56:04 +epoch [24/50] batch [465/500] time 1.349 (1.362) data 0.000 (0.002) loss 1.0762 (1.1149) acc 71.8750 (71.9220) lr 1.1874e-03 eta 4:55:56 +epoch [24/50] batch [470/500] time 1.365 (1.362) data 0.000 (0.002) loss 1.4697 (1.1167) acc 65.6250 (71.8617) lr 1.1874e-03 eta 4:55:50 +epoch [24/50] batch [475/500] time 1.369 (1.363) data 0.000 (0.002) loss 0.8682 (1.1145) acc 78.1250 (71.8947) lr 1.1874e-03 eta 4:55:47 +epoch [24/50] batch [480/500] time 1.375 (1.363) data 0.000 (0.002) loss 0.7271 (1.1131) acc 78.1250 (71.9141) lr 1.1874e-03 eta 4:55:41 +epoch [24/50] batch [485/500] time 1.398 (1.363) data 0.001 (0.002) loss 0.8594 (1.1144) acc 78.1250 (71.9265) lr 1.1874e-03 eta 4:55:37 +epoch [24/50] batch [490/500] time 1.358 (1.363) data 0.000 (0.002) loss 1.3320 (1.1165) acc 75.0000 (71.8878) lr 1.1874e-03 eta 4:55:31 +epoch [24/50] batch [495/500] time 1.344 (1.363) data 0.000 (0.002) loss 0.9238 (1.1181) acc 68.7500 (71.8750) lr 1.1874e-03 eta 4:55:23 +epoch [24/50] batch [500/500] time 1.354 (1.363) data 0.000 (0.002) loss 1.0029 (1.1160) acc 71.8750 (71.9250) lr 1.1253e-03 eta 4:55:15 +epoch [25/50] batch [5/500] time 1.353 (1.515) data 0.000 (0.147) loss 1.3252 (0.9857) acc 68.7500 (71.8750) lr 1.1253e-03 eta 5:28:03 +epoch [25/50] batch [10/500] time 1.353 (1.440) data 0.000 (0.074) loss 0.7593 (1.0239) acc 75.0000 (70.0000) lr 1.1253e-03 eta 5:11:47 +epoch [25/50] batch [15/500] time 1.359 (1.415) data 0.000 (0.049) loss 0.4866 (0.9795) acc 84.3750 (72.2917) lr 1.1253e-03 eta 5:06:12 +epoch [25/50] batch [20/500] time 1.364 (1.402) data 0.000 (0.037) loss 1.0723 (1.0388) acc 68.7500 (71.2500) lr 1.1253e-03 eta 5:03:21 +epoch [25/50] batch [25/500] time 1.543 (1.401) data 0.000 (0.030) loss 1.7832 (1.0645) acc 59.3750 (70.3750) lr 1.1253e-03 eta 5:02:56 +epoch [25/50] batch [30/500] time 1.354 (1.394) data 0.000 (0.025) loss 0.8540 (1.0559) acc 78.1250 (71.0417) lr 1.1253e-03 eta 5:01:14 +epoch [25/50] batch [35/500] time 1.372 (1.389) data 0.000 (0.021) loss 0.9517 (1.0459) acc 81.2500 (71.6964) lr 1.1253e-03 eta 5:00:08 +epoch [25/50] batch [40/500] time 1.365 (1.386) data 0.000 (0.019) loss 1.2334 (1.0517) acc 65.6250 (71.7969) lr 1.1253e-03 eta 4:59:23 +epoch [25/50] batch [45/500] time 1.362 (1.383) data 0.000 (0.017) loss 0.7979 (1.0393) acc 78.1250 (72.5000) lr 1.1253e-03 eta 4:58:34 +epoch [25/50] batch [50/500] time 1.386 (1.381) data 0.000 (0.015) loss 1.1074 (1.0296) acc 68.7500 (73.0000) lr 1.1253e-03 eta 4:58:00 +epoch [25/50] batch [55/500] time 1.363 (1.379) data 0.000 (0.014) loss 0.6147 (1.0434) acc 84.3750 (72.6705) lr 1.1253e-03 eta 4:57:34 +epoch [25/50] batch [60/500] time 1.347 (1.377) data 0.000 (0.013) loss 0.9497 (1.0511) acc 65.6250 (72.4479) lr 1.1253e-03 eta 4:57:03 +epoch [25/50] batch [65/500] time 1.358 (1.376) data 0.000 (0.012) loss 0.9131 (1.0435) acc 68.7500 (72.2596) lr 1.1253e-03 eta 4:56:35 +epoch [25/50] batch [70/500] time 1.355 (1.377) data 0.000 (0.011) loss 1.1055 (1.0540) acc 71.8750 (72.1429) lr 1.1253e-03 eta 4:56:41 +epoch [25/50] batch [75/500] time 1.361 (1.376) data 0.000 (0.010) loss 1.3184 (1.0584) acc 78.1250 (72.2083) lr 1.1253e-03 eta 4:56:20 +epoch [25/50] batch [80/500] time 1.357 (1.375) data 0.000 (0.010) loss 1.3975 (1.0526) acc 68.7500 (72.4609) lr 1.1253e-03 eta 4:56:01 +epoch [25/50] batch [85/500] time 1.355 (1.374) data 0.000 (0.009) loss 1.4570 (1.0621) acc 71.8750 (72.4265) lr 1.1253e-03 eta 4:55:40 +epoch [25/50] batch [90/500] time 1.355 (1.373) data 0.000 (0.008) loss 1.6777 (1.0685) acc 65.6250 (72.3958) lr 1.1253e-03 eta 4:55:21 +epoch [25/50] batch [95/500] time 1.366 (1.372) data 0.000 (0.008) loss 1.4277 (1.0810) acc 68.7500 (72.2039) lr 1.1253e-03 eta 4:55:00 +epoch [25/50] batch [100/500] time 1.374 (1.371) data 0.000 (0.008) loss 1.3984 (1.0894) acc 68.7500 (72.1250) lr 1.1253e-03 eta 4:54:47 +epoch [25/50] batch [105/500] time 1.355 (1.370) data 0.000 (0.007) loss 1.7412 (1.1052) acc 65.6250 (71.9048) lr 1.1253e-03 eta 4:54:32 +epoch [25/50] batch [110/500] time 1.349 (1.370) data 0.000 (0.007) loss 0.9209 (1.1040) acc 75.0000 (71.9034) lr 1.1253e-03 eta 4:54:20 +epoch [25/50] batch [115/500] time 1.363 (1.370) data 0.000 (0.007) loss 0.9380 (1.0972) acc 71.8750 (71.9293) lr 1.1253e-03 eta 4:54:12 +epoch [25/50] batch [120/500] time 1.364 (1.370) data 0.000 (0.006) loss 1.3730 (1.1022) acc 68.7500 (71.9010) lr 1.1253e-03 eta 4:54:00 +epoch [25/50] batch [125/500] time 1.370 (1.369) data 0.000 (0.006) loss 1.5195 (1.1015) acc 65.6250 (71.9000) lr 1.1253e-03 eta 4:53:46 +epoch [25/50] batch [130/500] time 1.340 (1.369) data 0.000 (0.006) loss 0.9678 (1.1034) acc 71.8750 (71.7308) lr 1.1253e-03 eta 4:53:35 +epoch [25/50] batch [135/500] time 1.350 (1.368) data 0.000 (0.006) loss 1.2285 (1.0989) acc 68.7500 (71.8750) lr 1.1253e-03 eta 4:53:23 +epoch [25/50] batch [140/500] time 1.356 (1.368) data 0.000 (0.006) loss 1.2236 (1.0961) acc 71.8750 (71.8973) lr 1.1253e-03 eta 4:53:16 +epoch [25/50] batch [145/500] time 1.368 (1.368) data 0.000 (0.005) loss 1.1436 (1.1041) acc 75.0000 (71.8534) lr 1.1253e-03 eta 4:53:10 +epoch [25/50] batch [150/500] time 1.364 (1.368) data 0.000 (0.005) loss 0.9634 (1.0972) acc 62.5000 (71.9792) lr 1.1253e-03 eta 4:53:03 +epoch [25/50] batch [155/500] time 1.375 (1.369) data 0.000 (0.005) loss 0.7197 (1.0904) acc 75.0000 (72.0766) lr 1.1253e-03 eta 4:52:59 +epoch [25/50] batch [160/500] time 1.369 (1.369) data 0.000 (0.005) loss 1.2686 (1.0965) acc 68.7500 (71.9727) lr 1.1253e-03 eta 4:52:52 +epoch [25/50] batch [165/500] time 1.504 (1.369) data 0.000 (0.005) loss 1.5020 (1.1020) acc 68.7500 (71.9508) lr 1.1253e-03 eta 4:52:53 +epoch [25/50] batch [170/500] time 1.371 (1.369) data 0.000 (0.005) loss 0.8198 (1.1050) acc 75.0000 (71.8750) lr 1.1253e-03 eta 4:52:47 +epoch [25/50] batch [175/500] time 1.381 (1.369) data 0.000 (0.005) loss 0.9756 (1.1071) acc 81.2500 (71.9821) lr 1.1253e-03 eta 4:52:43 +epoch [25/50] batch [180/500] time 1.365 (1.369) data 0.000 (0.004) loss 1.5488 (1.1128) acc 59.3750 (71.8576) lr 1.1253e-03 eta 4:52:32 +epoch [25/50] batch [185/500] time 1.355 (1.369) data 0.000 (0.004) loss 0.8018 (1.1070) acc 75.0000 (71.9426) lr 1.1253e-03 eta 4:52:18 +epoch [25/50] batch [190/500] time 1.360 (1.368) data 0.000 (0.004) loss 1.9980 (1.1141) acc 59.3750 (71.7928) lr 1.1253e-03 eta 4:52:06 +epoch [25/50] batch [195/500] time 1.349 (1.368) data 0.000 (0.004) loss 0.5894 (1.1105) acc 75.0000 (71.8910) lr 1.1253e-03 eta 4:51:56 +epoch [25/50] batch [200/500] time 1.342 (1.367) data 0.000 (0.004) loss 0.8745 (1.1115) acc 78.1250 (71.8750) lr 1.1253e-03 eta 4:51:43 +epoch [25/50] batch [205/500] time 1.379 (1.367) data 0.000 (0.004) loss 1.7891 (1.1152) acc 65.6250 (71.7835) lr 1.1253e-03 eta 4:51:35 +epoch [25/50] batch [210/500] time 1.346 (1.368) data 0.000 (0.004) loss 1.1514 (1.1174) acc 68.7500 (71.7113) lr 1.1253e-03 eta 4:51:34 +epoch [25/50] batch [215/500] time 1.355 (1.368) data 0.000 (0.004) loss 0.5317 (1.1179) acc 87.5000 (71.7587) lr 1.1253e-03 eta 4:51:23 +epoch [25/50] batch [220/500] time 1.357 (1.367) data 0.000 (0.004) loss 1.1982 (1.1237) acc 71.8750 (71.6619) lr 1.1253e-03 eta 4:51:12 +epoch [25/50] batch [225/500] time 1.360 (1.367) data 0.000 (0.004) loss 0.8086 (1.1225) acc 75.0000 (71.6389) lr 1.1253e-03 eta 4:51:04 +epoch [25/50] batch [230/500] time 1.356 (1.367) data 0.000 (0.004) loss 0.8140 (1.1228) acc 75.0000 (71.6440) lr 1.1253e-03 eta 4:50:54 +epoch [25/50] batch [235/500] time 1.343 (1.367) data 0.000 (0.003) loss 1.0029 (1.1233) acc 68.7500 (71.6622) lr 1.1253e-03 eta 4:50:47 +epoch [25/50] batch [240/500] time 1.342 (1.367) data 0.000 (0.003) loss 1.5742 (1.1269) acc 68.7500 (71.6146) lr 1.1253e-03 eta 4:50:39 +epoch [25/50] batch [245/500] time 1.378 (1.367) data 0.000 (0.003) loss 0.9126 (1.1260) acc 75.0000 (71.6071) lr 1.1253e-03 eta 4:50:30 +epoch [25/50] batch [250/500] time 1.333 (1.366) data 0.000 (0.003) loss 0.6797 (1.1229) acc 81.2500 (71.6500) lr 1.1253e-03 eta 4:50:19 +epoch [25/50] batch [255/500] time 1.360 (1.366) data 0.000 (0.003) loss 0.6108 (1.1174) acc 81.2500 (71.6912) lr 1.1253e-03 eta 4:50:10 +epoch [25/50] batch [260/500] time 1.353 (1.366) data 0.000 (0.003) loss 1.3350 (1.1169) acc 75.0000 (71.7788) lr 1.1253e-03 eta 4:50:02 +epoch [25/50] batch [265/500] time 1.359 (1.366) data 0.000 (0.003) loss 1.1553 (1.1165) acc 71.8750 (71.7453) lr 1.1253e-03 eta 4:49:53 +epoch [25/50] batch [270/500] time 1.368 (1.366) data 0.000 (0.003) loss 1.2129 (1.1183) acc 62.5000 (71.7361) lr 1.1253e-03 eta 4:49:45 +epoch [25/50] batch [275/500] time 1.360 (1.366) data 0.000 (0.003) loss 0.9370 (1.1182) acc 75.0000 (71.7273) lr 1.1253e-03 eta 4:49:37 +epoch [25/50] batch [280/500] time 1.357 (1.365) data 0.000 (0.003) loss 0.7427 (1.1172) acc 84.3750 (71.7522) lr 1.1253e-03 eta 4:49:27 +epoch [25/50] batch [285/500] time 1.372 (1.365) data 0.000 (0.003) loss 1.2373 (1.1169) acc 81.2500 (71.8202) lr 1.1253e-03 eta 4:49:20 +epoch [25/50] batch [290/500] time 1.359 (1.365) data 0.000 (0.003) loss 0.4551 (1.1102) acc 84.3750 (71.9397) lr 1.1253e-03 eta 4:49:12 +epoch [25/50] batch [295/500] time 1.376 (1.365) data 0.000 (0.003) loss 0.6997 (1.1103) acc 81.2500 (71.9597) lr 1.1253e-03 eta 4:49:07 +epoch [25/50] batch [300/500] time 1.330 (1.365) data 0.000 (0.003) loss 1.1914 (1.1099) acc 59.3750 (71.9792) lr 1.1253e-03 eta 4:48:56 +epoch [25/50] batch [305/500] time 1.366 (1.365) data 0.000 (0.003) loss 0.5649 (1.1082) acc 84.3750 (71.9672) lr 1.1253e-03 eta 4:48:47 +epoch [25/50] batch [310/500] time 1.351 (1.365) data 0.000 (0.003) loss 1.4521 (1.1092) acc 68.7500 (71.9657) lr 1.1253e-03 eta 4:48:45 +epoch [25/50] batch [315/500] time 1.351 (1.365) data 0.001 (0.003) loss 1.4512 (1.1142) acc 68.7500 (71.9147) lr 1.1253e-03 eta 4:48:38 +epoch [25/50] batch [320/500] time 1.385 (1.365) data 0.000 (0.003) loss 1.0742 (1.1162) acc 71.8750 (71.9043) lr 1.1253e-03 eta 4:48:32 +epoch [25/50] batch [325/500] time 1.383 (1.365) data 0.000 (0.003) loss 0.8911 (1.1147) acc 78.1250 (71.9327) lr 1.1253e-03 eta 4:48:24 +epoch [25/50] batch [330/500] time 1.354 (1.365) data 0.000 (0.003) loss 0.7974 (1.1153) acc 81.2500 (71.9034) lr 1.1253e-03 eta 4:48:16 +epoch [25/50] batch [335/500] time 1.376 (1.365) data 0.000 (0.003) loss 1.6445 (1.1177) acc 62.5000 (71.8377) lr 1.1253e-03 eta 4:48:11 +epoch [25/50] batch [340/500] time 1.373 (1.365) data 0.000 (0.002) loss 1.7178 (1.1179) acc 62.5000 (71.8107) lr 1.1253e-03 eta 4:48:04 +epoch [25/50] batch [345/500] time 1.350 (1.365) data 0.000 (0.002) loss 0.8550 (1.1204) acc 81.2500 (71.7935) lr 1.1253e-03 eta 4:47:55 +epoch [25/50] batch [350/500] time 1.347 (1.365) data 0.000 (0.002) loss 1.8945 (1.1241) acc 68.7500 (71.8036) lr 1.1253e-03 eta 4:47:47 +epoch [25/50] batch [355/500] time 1.365 (1.365) data 0.000 (0.002) loss 1.1797 (1.1262) acc 75.0000 (71.7782) lr 1.1253e-03 eta 4:47:43 +epoch [25/50] batch [360/500] time 1.379 (1.365) data 0.000 (0.002) loss 2.3613 (1.1269) acc 65.6250 (71.7795) lr 1.1253e-03 eta 4:47:35 +epoch [25/50] batch [365/500] time 1.359 (1.365) data 0.000 (0.002) loss 0.9849 (1.1254) acc 75.0000 (71.7551) lr 1.1253e-03 eta 4:47:26 +epoch [25/50] batch [370/500] time 1.376 (1.365) data 0.000 (0.002) loss 0.6021 (1.1244) acc 81.2500 (71.7652) lr 1.1253e-03 eta 4:47:17 +epoch [25/50] batch [375/500] time 1.364 (1.365) data 0.000 (0.002) loss 1.5996 (1.1248) acc 62.5000 (71.7750) lr 1.1253e-03 eta 4:47:09 +epoch [25/50] batch [380/500] time 1.363 (1.365) data 0.000 (0.002) loss 0.7065 (1.1225) acc 87.5000 (71.8421) lr 1.1253e-03 eta 4:47:01 +epoch [25/50] batch [385/500] time 1.380 (1.365) data 0.000 (0.002) loss 1.1240 (1.1227) acc 68.7500 (71.8101) lr 1.1253e-03 eta 4:46:55 +epoch [25/50] batch [390/500] time 1.356 (1.365) data 0.000 (0.002) loss 1.2100 (1.1230) acc 68.7500 (71.8269) lr 1.1253e-03 eta 4:46:48 +epoch [25/50] batch [395/500] time 1.362 (1.365) data 0.000 (0.002) loss 1.2539 (1.1211) acc 75.0000 (71.8592) lr 1.1253e-03 eta 4:46:40 +epoch [25/50] batch [400/500] time 1.358 (1.365) data 0.000 (0.002) loss 0.9688 (1.1180) acc 71.8750 (71.9062) lr 1.1253e-03 eta 4:46:34 +epoch [25/50] batch [405/500] time 1.362 (1.365) data 0.000 (0.002) loss 0.8203 (1.1218) acc 75.0000 (71.8364) lr 1.1253e-03 eta 4:46:27 +epoch [25/50] batch [410/500] time 1.360 (1.365) data 0.000 (0.002) loss 1.5830 (1.1200) acc 62.5000 (71.8750) lr 1.1253e-03 eta 4:46:20 +epoch [25/50] batch [415/500] time 1.365 (1.365) data 0.000 (0.002) loss 0.7012 (1.1204) acc 81.2500 (71.8901) lr 1.1253e-03 eta 4:46:13 +epoch [25/50] batch [420/500] time 1.360 (1.365) data 0.000 (0.002) loss 0.6553 (1.1208) acc 81.2500 (71.8750) lr 1.1253e-03 eta 4:46:07 +epoch [25/50] batch [425/500] time 1.372 (1.365) data 0.000 (0.002) loss 0.8838 (1.1180) acc 81.2500 (71.9485) lr 1.1253e-03 eta 4:46:00 +epoch [25/50] batch [430/500] time 1.345 (1.365) data 0.000 (0.002) loss 0.9629 (1.1164) acc 81.2500 (71.9695) lr 1.1253e-03 eta 4:45:52 +epoch [25/50] batch [435/500] time 1.366 (1.364) data 0.000 (0.002) loss 0.7822 (1.1144) acc 81.2500 (71.9828) lr 1.1253e-03 eta 4:45:44 +epoch [25/50] batch [440/500] time 1.374 (1.364) data 0.000 (0.002) loss 1.0781 (1.1127) acc 68.7500 (72.0099) lr 1.1253e-03 eta 4:45:37 +epoch [25/50] batch [445/500] time 1.353 (1.364) data 0.000 (0.002) loss 1.2363 (1.1158) acc 75.0000 (71.9663) lr 1.1253e-03 eta 4:45:28 +epoch [25/50] batch [450/500] time 1.371 (1.364) data 0.000 (0.002) loss 1.4287 (1.1182) acc 59.3750 (71.8958) lr 1.1253e-03 eta 4:45:21 +epoch [25/50] batch [455/500] time 1.363 (1.364) data 0.000 (0.002) loss 0.6738 (1.1164) acc 78.1250 (71.9368) lr 1.1253e-03 eta 4:45:17 +epoch [25/50] batch [460/500] time 1.338 (1.364) data 0.000 (0.002) loss 0.8784 (1.1177) acc 78.1250 (71.9429) lr 1.1253e-03 eta 4:45:09 +epoch [25/50] batch [465/500] time 1.377 (1.364) data 0.000 (0.002) loss 1.5488 (1.1197) acc 65.6250 (71.9086) lr 1.1253e-03 eta 4:45:02 +epoch [25/50] batch [470/500] time 1.349 (1.364) data 0.000 (0.002) loss 1.1162 (1.1180) acc 68.7500 (71.9747) lr 1.1253e-03 eta 4:44:54 +epoch [25/50] batch [475/500] time 1.352 (1.364) data 0.000 (0.002) loss 0.6089 (1.1148) acc 84.3750 (72.0724) lr 1.1253e-03 eta 4:44:47 +epoch [25/50] batch [480/500] time 1.394 (1.364) data 0.000 (0.002) loss 1.0859 (1.1147) acc 65.6250 (72.0833) lr 1.1253e-03 eta 4:44:39 +epoch [25/50] batch [485/500] time 1.378 (1.364) data 0.001 (0.002) loss 0.7456 (1.1125) acc 71.8750 (72.1134) lr 1.1253e-03 eta 4:44:32 +epoch [25/50] batch [490/500] time 1.365 (1.364) data 0.000 (0.002) loss 0.7793 (1.1103) acc 75.0000 (72.1747) lr 1.1253e-03 eta 4:44:24 +epoch [25/50] batch [495/500] time 1.469 (1.364) data 0.000 (0.002) loss 1.4551 (1.1103) acc 65.6250 (72.1528) lr 1.1253e-03 eta 4:44:19 +epoch [25/50] batch [500/500] time 1.337 (1.364) data 0.000 (0.002) loss 1.4229 (1.1117) acc 62.5000 (72.1375) lr 1.0628e-03 eta 4:44:10 +epoch [26/50] batch [5/500] time 1.356 (1.531) data 0.000 (0.178) loss 0.8428 (1.1877) acc 71.8750 (68.7500) lr 1.0628e-03 eta 5:18:52 +epoch [26/50] batch [10/500] time 1.345 (1.441) data 0.000 (0.089) loss 0.7363 (1.1746) acc 78.1250 (70.0000) lr 1.0628e-03 eta 4:59:59 +epoch [26/50] batch [15/500] time 1.353 (1.410) data 0.000 (0.060) loss 1.6309 (1.1123) acc 71.8750 (72.2917) lr 1.0628e-03 eta 4:53:29 +epoch [26/50] batch [20/500] time 1.354 (1.399) data 0.000 (0.045) loss 1.1270 (1.1227) acc 75.0000 (73.4375) lr 1.0628e-03 eta 4:51:00 +epoch [26/50] batch [25/500] time 1.356 (1.390) data 0.000 (0.036) loss 1.1094 (1.1157) acc 78.1250 (74.0000) lr 1.0628e-03 eta 4:48:54 +epoch [26/50] batch [30/500] time 1.359 (1.384) data 0.000 (0.030) loss 1.3955 (1.1180) acc 59.3750 (72.9167) lr 1.0628e-03 eta 4:47:44 +epoch [26/50] batch [35/500] time 1.358 (1.381) data 0.000 (0.026) loss 0.9536 (1.1170) acc 81.2500 (73.4821) lr 1.0628e-03 eta 4:46:48 +epoch [26/50] batch [40/500] time 1.357 (1.381) data 0.000 (0.023) loss 1.5469 (1.1195) acc 65.6250 (73.2031) lr 1.0628e-03 eta 4:46:44 +epoch [26/50] batch [45/500] time 1.344 (1.378) data 0.000 (0.020) loss 1.0596 (1.0974) acc 78.1250 (73.5417) lr 1.0628e-03 eta 4:46:00 +epoch [26/50] batch [50/500] time 1.362 (1.377) data 0.000 (0.018) loss 1.3594 (1.0957) acc 71.8750 (73.4375) lr 1.0628e-03 eta 4:45:41 +epoch [26/50] batch [55/500] time 1.365 (1.375) data 0.000 (0.017) loss 0.9297 (1.0993) acc 65.6250 (73.4091) lr 1.0628e-03 eta 4:45:06 +epoch [26/50] batch [60/500] time 1.364 (1.373) data 0.000 (0.015) loss 1.1836 (1.1054) acc 68.7500 (73.1250) lr 1.0628e-03 eta 4:44:42 +epoch [26/50] batch [65/500] time 1.344 (1.371) data 0.000 (0.014) loss 1.0820 (1.0984) acc 75.0000 (72.9808) lr 1.0628e-03 eta 4:44:09 +epoch [26/50] batch [70/500] time 1.374 (1.370) data 0.001 (0.013) loss 1.0400 (1.1150) acc 71.8750 (72.5000) lr 1.0628e-03 eta 4:43:47 +epoch [26/50] batch [75/500] time 1.344 (1.368) data 0.000 (0.012) loss 1.2588 (1.1146) acc 68.7500 (72.3333) lr 1.0628e-03 eta 4:43:21 +epoch [26/50] batch [80/500] time 1.364 (1.367) data 0.000 (0.011) loss 1.0928 (1.1113) acc 68.7500 (72.2656) lr 1.0628e-03 eta 4:43:01 +epoch [26/50] batch [85/500] time 1.350 (1.366) data 0.000 (0.011) loss 0.6631 (1.1197) acc 87.5000 (71.9485) lr 1.0628e-03 eta 4:42:44 +epoch [26/50] batch [90/500] time 1.346 (1.366) data 0.000 (0.010) loss 1.0410 (1.1237) acc 78.1250 (71.9097) lr 1.0628e-03 eta 4:42:28 +epoch [26/50] batch [95/500] time 1.369 (1.365) data 0.000 (0.010) loss 0.9336 (1.1143) acc 71.8750 (71.9079) lr 1.0628e-03 eta 4:42:14 +epoch [26/50] batch [100/500] time 1.339 (1.365) data 0.000 (0.009) loss 0.6709 (1.1119) acc 87.5000 (71.9375) lr 1.0628e-03 eta 4:42:08 +epoch [26/50] batch [105/500] time 1.342 (1.365) data 0.000 (0.009) loss 1.3486 (1.1034) acc 65.6250 (72.0238) lr 1.0628e-03 eta 4:41:54 +epoch [26/50] batch [110/500] time 1.364 (1.364) data 0.000 (0.008) loss 1.0527 (1.0978) acc 68.7500 (72.1875) lr 1.0628e-03 eta 4:41:40 +epoch [26/50] batch [115/500] time 1.356 (1.364) data 0.000 (0.008) loss 1.0039 (1.0973) acc 62.5000 (72.2011) lr 1.0628e-03 eta 4:41:30 +epoch [26/50] batch [120/500] time 1.341 (1.364) data 0.000 (0.008) loss 0.9019 (1.0831) acc 78.1250 (72.4479) lr 1.0628e-03 eta 4:41:21 +epoch [26/50] batch [125/500] time 1.375 (1.363) data 0.000 (0.007) loss 0.7988 (1.0835) acc 75.0000 (72.4250) lr 1.0628e-03 eta 4:41:12 +epoch [26/50] batch [130/500] time 1.369 (1.363) data 0.000 (0.007) loss 1.0146 (1.0861) acc 75.0000 (72.4519) lr 1.0628e-03 eta 4:41:04 +epoch [26/50] batch [135/500] time 1.341 (1.363) data 0.000 (0.007) loss 0.6240 (1.0770) acc 78.1250 (72.6157) lr 1.0628e-03 eta 4:40:51 +epoch [26/50] batch [140/500] time 1.375 (1.364) data 0.000 (0.007) loss 0.9556 (1.0763) acc 71.8750 (72.7679) lr 1.0628e-03 eta 4:40:58 +epoch [26/50] batch [145/500] time 1.375 (1.364) data 0.001 (0.006) loss 1.1201 (1.0682) acc 68.7500 (72.9095) lr 1.0628e-03 eta 4:40:53 +epoch [26/50] batch [150/500] time 1.379 (1.364) data 0.000 (0.006) loss 0.7783 (1.0634) acc 78.1250 (72.8125) lr 1.0628e-03 eta 4:40:48 +epoch [26/50] batch [155/500] time 1.353 (1.364) data 0.000 (0.006) loss 1.4971 (1.0661) acc 68.7500 (72.7823) lr 1.0628e-03 eta 4:40:40 +epoch [26/50] batch [160/500] time 1.363 (1.364) data 0.000 (0.006) loss 0.9316 (1.0689) acc 75.0000 (72.6367) lr 1.0628e-03 eta 4:40:30 +epoch [26/50] batch [165/500] time 1.347 (1.363) data 0.000 (0.006) loss 1.7598 (1.0764) acc 59.3750 (72.4811) lr 1.0628e-03 eta 4:40:17 +epoch [26/50] batch [170/500] time 1.346 (1.363) data 0.000 (0.006) loss 1.4561 (1.0748) acc 59.3750 (72.4632) lr 1.0628e-03 eta 4:40:07 +epoch [26/50] batch [175/500] time 1.348 (1.363) data 0.000 (0.005) loss 1.0000 (1.0789) acc 71.8750 (72.3929) lr 1.0628e-03 eta 4:39:55 +epoch [26/50] batch [180/500] time 1.346 (1.362) data 0.000 (0.005) loss 1.3262 (1.0804) acc 65.6250 (72.4306) lr 1.0628e-03 eta 4:39:43 +epoch [26/50] batch [185/500] time 1.361 (1.363) data 0.000 (0.005) loss 1.0449 (1.0787) acc 75.0000 (72.4493) lr 1.0628e-03 eta 4:39:40 +epoch [26/50] batch [190/500] time 1.358 (1.362) data 0.000 (0.005) loss 1.3262 (1.0798) acc 62.5000 (72.4342) lr 1.0628e-03 eta 4:39:31 +epoch [26/50] batch [195/500] time 1.351 (1.362) data 0.000 (0.005) loss 1.1699 (1.0796) acc 71.8750 (72.4359) lr 1.0628e-03 eta 4:39:22 +epoch [26/50] batch [200/500] time 1.365 (1.362) data 0.000 (0.005) loss 1.5107 (1.0849) acc 68.7500 (72.3750) lr 1.0628e-03 eta 4:39:16 +epoch [26/50] batch [205/500] time 1.374 (1.362) data 0.000 (0.005) loss 0.8159 (1.0837) acc 78.1250 (72.4543) lr 1.0628e-03 eta 4:39:09 +epoch [26/50] batch [210/500] time 1.364 (1.362) data 0.000 (0.005) loss 0.9248 (1.0786) acc 65.6250 (72.5000) lr 1.0628e-03 eta 4:39:01 +epoch [26/50] batch [215/500] time 1.373 (1.362) data 0.000 (0.004) loss 0.7031 (1.0731) acc 84.3750 (72.6453) lr 1.0628e-03 eta 4:38:54 +epoch [26/50] batch [220/500] time 1.367 (1.362) data 0.000 (0.004) loss 1.0381 (1.0771) acc 71.8750 (72.5568) lr 1.0628e-03 eta 4:38:48 +epoch [26/50] batch [225/500] time 1.354 (1.362) data 0.000 (0.004) loss 1.2061 (1.0783) acc 71.8750 (72.6111) lr 1.0628e-03 eta 4:38:40 +epoch [26/50] batch [230/500] time 1.389 (1.363) data 0.000 (0.004) loss 1.3027 (1.0798) acc 65.6250 (72.6359) lr 1.0628e-03 eta 4:38:38 +epoch [26/50] batch [235/500] time 1.365 (1.362) data 0.000 (0.004) loss 1.6357 (1.0846) acc 62.5000 (72.5665) lr 1.0628e-03 eta 4:38:29 +epoch [26/50] batch [240/500] time 1.361 (1.362) data 0.000 (0.004) loss 1.3477 (1.0878) acc 65.6250 (72.5391) lr 1.0628e-03 eta 4:38:21 +epoch [26/50] batch [245/500] time 1.412 (1.363) data 0.000 (0.004) loss 1.1855 (1.0830) acc 65.6250 (72.6276) lr 1.0628e-03 eta 4:38:17 +epoch [26/50] batch [250/500] time 1.355 (1.363) data 0.000 (0.004) loss 1.5576 (1.0830) acc 56.2500 (72.6500) lr 1.0628e-03 eta 4:38:11 +epoch [26/50] batch [255/500] time 1.383 (1.363) data 0.000 (0.004) loss 1.6680 (1.0877) acc 65.6250 (72.4265) lr 1.0628e-03 eta 4:38:06 +epoch [26/50] batch [260/500] time 1.372 (1.363) data 0.000 (0.004) loss 0.8657 (1.0847) acc 78.1250 (72.5120) lr 1.0628e-03 eta 4:38:02 +epoch [26/50] batch [265/500] time 1.374 (1.363) data 0.000 (0.004) loss 1.2812 (1.0857) acc 65.6250 (72.4646) lr 1.0628e-03 eta 4:37:54 +epoch [26/50] batch [270/500] time 1.372 (1.363) data 0.000 (0.004) loss 0.7866 (1.0808) acc 81.2500 (72.6042) lr 1.0628e-03 eta 4:37:49 +epoch [26/50] batch [275/500] time 1.360 (1.363) data 0.000 (0.004) loss 0.9351 (1.0756) acc 75.0000 (72.7386) lr 1.0628e-03 eta 4:37:42 +epoch [26/50] batch [280/500] time 1.364 (1.363) data 0.000 (0.004) loss 0.9702 (1.0747) acc 65.6250 (72.7232) lr 1.0628e-03 eta 4:37:36 +epoch [26/50] batch [285/500] time 1.370 (1.363) data 0.000 (0.003) loss 0.9780 (1.0748) acc 65.6250 (72.6645) lr 1.0628e-03 eta 4:37:35 +epoch [26/50] batch [290/500] time 1.363 (1.363) data 0.000 (0.003) loss 1.0146 (1.0711) acc 81.2500 (72.6724) lr 1.0628e-03 eta 4:37:27 +epoch [26/50] batch [295/500] time 1.367 (1.363) data 0.000 (0.003) loss 1.0908 (1.0751) acc 78.1250 (72.5953) lr 1.0628e-03 eta 4:37:19 +epoch [26/50] batch [300/500] time 1.355 (1.364) data 0.000 (0.003) loss 0.6797 (1.0687) acc 81.2500 (72.7812) lr 1.0628e-03 eta 4:37:14 +epoch [26/50] batch [305/500] time 1.348 (1.363) data 0.000 (0.003) loss 1.0332 (1.0711) acc 75.0000 (72.7766) lr 1.0628e-03 eta 4:37:07 +epoch [26/50] batch [310/500] time 1.372 (1.363) data 0.000 (0.003) loss 1.5811 (1.0758) acc 65.6250 (72.6613) lr 1.0628e-03 eta 4:36:59 +epoch [26/50] batch [315/500] time 1.345 (1.363) data 0.000 (0.003) loss 0.5361 (1.0744) acc 81.2500 (72.6687) lr 1.0628e-03 eta 4:36:52 +epoch [26/50] batch [320/500] time 1.361 (1.363) data 0.001 (0.003) loss 1.2324 (1.0739) acc 71.8750 (72.6270) lr 1.0628e-03 eta 4:36:44 +epoch [26/50] batch [325/500] time 1.498 (1.364) data 0.000 (0.003) loss 1.2178 (1.0744) acc 71.8750 (72.6538) lr 1.0628e-03 eta 4:36:43 +epoch [26/50] batch [330/500] time 1.366 (1.364) data 0.000 (0.003) loss 1.2656 (1.0809) acc 65.6250 (72.6326) lr 1.0628e-03 eta 4:36:38 +epoch [26/50] batch [335/500] time 1.356 (1.364) data 0.001 (0.003) loss 1.1523 (1.0802) acc 62.5000 (72.6679) lr 1.0628e-03 eta 4:36:32 +epoch [26/50] batch [340/500] time 1.341 (1.364) data 0.000 (0.003) loss 1.5186 (1.0810) acc 59.3750 (72.6654) lr 1.0628e-03 eta 4:36:25 +epoch [26/50] batch [345/500] time 1.342 (1.364) data 0.000 (0.003) loss 0.5986 (1.0790) acc 81.2500 (72.6359) lr 1.0628e-03 eta 4:36:19 +epoch [26/50] batch [350/500] time 1.346 (1.364) data 0.000 (0.003) loss 1.8408 (1.0813) acc 62.5000 (72.6071) lr 1.0628e-03 eta 4:36:10 +epoch [26/50] batch [355/500] time 1.350 (1.364) data 0.000 (0.003) loss 0.9888 (1.0816) acc 75.0000 (72.5704) lr 1.0628e-03 eta 4:36:02 +epoch [26/50] batch [360/500] time 1.341 (1.364) data 0.001 (0.003) loss 1.4766 (1.0855) acc 68.7500 (72.4913) lr 1.0628e-03 eta 4:35:54 +epoch [26/50] batch [365/500] time 1.371 (1.364) data 0.000 (0.003) loss 1.1494 (1.0827) acc 71.8750 (72.5685) lr 1.0628e-03 eta 4:35:47 +epoch [26/50] batch [370/500] time 1.379 (1.364) data 0.000 (0.003) loss 1.0312 (1.0821) acc 68.7500 (72.5676) lr 1.0628e-03 eta 4:35:40 +epoch [26/50] batch [375/500] time 1.361 (1.364) data 0.000 (0.003) loss 1.4336 (1.0809) acc 71.8750 (72.6083) lr 1.0628e-03 eta 4:35:34 +epoch [26/50] batch [380/500] time 1.346 (1.364) data 0.000 (0.003) loss 0.6694 (1.0779) acc 81.2500 (72.6727) lr 1.0628e-03 eta 4:35:26 +epoch [26/50] batch [385/500] time 1.374 (1.364) data 0.000 (0.003) loss 1.0693 (1.0790) acc 71.8750 (72.6218) lr 1.0628e-03 eta 4:35:19 +epoch [26/50] batch [390/500] time 1.385 (1.364) data 0.001 (0.003) loss 1.1182 (1.0806) acc 71.8750 (72.5721) lr 1.0628e-03 eta 4:35:12 +epoch [26/50] batch [395/500] time 1.363 (1.363) data 0.000 (0.003) loss 0.4873 (1.0791) acc 84.3750 (72.5870) lr 1.0628e-03 eta 4:35:03 +epoch [26/50] batch [400/500] time 1.334 (1.363) data 0.000 (0.003) loss 1.5000 (1.0808) acc 68.7500 (72.5547) lr 1.0628e-03 eta 4:34:54 +epoch [26/50] batch [405/500] time 1.346 (1.363) data 0.000 (0.003) loss 0.9331 (1.0807) acc 68.7500 (72.5463) lr 1.0628e-03 eta 4:34:47 +epoch [26/50] batch [410/500] time 1.333 (1.363) data 0.000 (0.003) loss 1.0605 (1.0832) acc 71.8750 (72.4695) lr 1.0628e-03 eta 4:34:39 +epoch [26/50] batch [415/500] time 1.354 (1.363) data 0.000 (0.002) loss 1.4795 (1.0845) acc 71.8750 (72.4774) lr 1.0628e-03 eta 4:34:31 +epoch [26/50] batch [420/500] time 1.354 (1.363) data 0.000 (0.002) loss 0.9180 (1.0865) acc 68.7500 (72.3958) lr 1.0628e-03 eta 4:34:23 +epoch [26/50] batch [425/500] time 1.359 (1.363) data 0.000 (0.002) loss 1.1074 (1.0866) acc 68.7500 (72.4118) lr 1.0628e-03 eta 4:34:20 +epoch [26/50] batch [430/500] time 1.349 (1.363) data 0.000 (0.002) loss 0.7168 (1.0855) acc 84.3750 (72.4201) lr 1.0628e-03 eta 4:34:12 +epoch [26/50] batch [435/500] time 1.343 (1.363) data 0.000 (0.002) loss 1.1846 (1.0871) acc 65.6250 (72.3994) lr 1.0628e-03 eta 4:34:04 +epoch [26/50] batch [440/500] time 1.365 (1.363) data 0.000 (0.002) loss 1.9814 (1.0877) acc 56.2500 (72.3793) lr 1.0628e-03 eta 4:33:57 +epoch [26/50] batch [445/500] time 1.376 (1.363) data 0.000 (0.002) loss 0.9282 (1.0867) acc 81.2500 (72.3947) lr 1.0628e-03 eta 4:33:50 +epoch [26/50] batch [450/500] time 1.355 (1.363) data 0.000 (0.002) loss 1.0439 (1.0860) acc 78.1250 (72.4444) lr 1.0628e-03 eta 4:33:43 +epoch [26/50] batch [455/500] time 1.393 (1.363) data 0.000 (0.002) loss 0.9478 (1.0856) acc 71.8750 (72.4588) lr 1.0628e-03 eta 4:33:38 +epoch [26/50] batch [460/500] time 1.383 (1.363) data 0.000 (0.002) loss 0.5513 (1.0858) acc 84.3750 (72.4524) lr 1.0628e-03 eta 4:33:32 +epoch [26/50] batch [465/500] time 1.348 (1.363) data 0.000 (0.002) loss 1.0254 (1.0869) acc 78.1250 (72.4395) lr 1.0628e-03 eta 4:33:24 +epoch [26/50] batch [470/500] time 1.352 (1.363) data 0.000 (0.002) loss 0.8843 (1.0863) acc 78.1250 (72.4468) lr 1.0628e-03 eta 4:33:19 +epoch [26/50] batch [475/500] time 1.333 (1.363) data 0.000 (0.002) loss 0.9897 (1.0872) acc 71.8750 (72.4079) lr 1.0628e-03 eta 4:33:09 +epoch [26/50] batch [480/500] time 1.375 (1.363) data 0.000 (0.002) loss 1.9092 (1.0920) acc 56.2500 (72.3763) lr 1.0628e-03 eta 4:33:02 +epoch [26/50] batch [485/500] time 1.340 (1.363) data 0.001 (0.002) loss 1.1436 (1.0937) acc 84.3750 (72.4034) lr 1.0628e-03 eta 4:32:55 +epoch [26/50] batch [490/500] time 1.360 (1.363) data 0.000 (0.002) loss 1.0205 (1.0919) acc 62.5000 (72.4298) lr 1.0628e-03 eta 4:32:48 +epoch [26/50] batch [495/500] time 1.357 (1.363) data 0.000 (0.002) loss 1.1768 (1.0928) acc 68.7500 (72.3611) lr 1.0628e-03 eta 4:32:40 +epoch [26/50] batch [500/500] time 1.344 (1.363) data 0.000 (0.002) loss 0.7944 (1.0912) acc 71.8750 (72.3563) lr 1.0000e-03 eta 4:32:33 +epoch [27/50] batch [5/500] time 1.341 (1.507) data 0.000 (0.151) loss 0.6025 (1.1316) acc 90.6250 (74.3750) lr 1.0000e-03 eta 5:01:12 +epoch [27/50] batch [10/500] time 1.528 (1.452) data 0.000 (0.076) loss 0.9546 (1.0901) acc 71.8750 (76.5625) lr 1.0000e-03 eta 4:50:05 +epoch [27/50] batch [15/500] time 1.355 (1.420) data 0.000 (0.051) loss 0.7256 (1.1072) acc 81.2500 (74.7917) lr 1.0000e-03 eta 4:43:34 +epoch [27/50] batch [20/500] time 1.348 (1.404) data 0.000 (0.038) loss 0.5239 (1.0829) acc 81.2500 (75.3125) lr 1.0000e-03 eta 4:40:14 +epoch [27/50] batch [25/500] time 1.362 (1.394) data 0.000 (0.030) loss 1.5049 (1.1612) acc 56.2500 (73.3750) lr 1.0000e-03 eta 4:38:18 +epoch [27/50] batch [30/500] time 1.371 (1.388) data 0.000 (0.025) loss 1.3555 (1.1633) acc 62.5000 (73.4375) lr 1.0000e-03 eta 4:36:52 +epoch [27/50] batch [35/500] time 1.355 (1.383) data 0.000 (0.022) loss 0.9766 (1.1330) acc 71.8750 (73.5714) lr 1.0000e-03 eta 4:35:50 +epoch [27/50] batch [40/500] time 1.348 (1.379) data 0.000 (0.019) loss 1.0801 (1.1059) acc 81.2500 (74.2188) lr 1.0000e-03 eta 4:34:58 +epoch [27/50] batch [45/500] time 1.347 (1.377) data 0.000 (0.017) loss 0.9351 (1.0898) acc 75.0000 (74.5139) lr 1.0000e-03 eta 4:34:21 +epoch [27/50] batch [50/500] time 1.364 (1.375) data 0.000 (0.015) loss 0.9805 (1.0777) acc 75.0000 (74.3750) lr 1.0000e-03 eta 4:33:50 +epoch [27/50] batch [55/500] time 1.346 (1.373) data 0.000 (0.014) loss 0.5132 (1.0510) acc 78.1250 (74.8295) lr 1.0000e-03 eta 4:33:20 +epoch [27/50] batch [60/500] time 1.369 (1.371) data 0.000 (0.013) loss 1.0967 (1.0694) acc 75.0000 (74.1146) lr 1.0000e-03 eta 4:32:54 +epoch [27/50] batch [65/500] time 1.370 (1.371) data 0.001 (0.012) loss 1.1035 (1.0607) acc 75.0000 (74.5192) lr 1.0000e-03 eta 4:32:37 +epoch [27/50] batch [70/500] time 1.352 (1.370) data 0.001 (0.011) loss 1.2832 (1.0727) acc 75.0000 (74.1964) lr 1.0000e-03 eta 4:32:25 +epoch [27/50] batch [75/500] time 1.362 (1.369) data 0.000 (0.010) loss 0.4734 (1.0639) acc 81.2500 (74.2083) lr 1.0000e-03 eta 4:32:10 +epoch [27/50] batch [80/500] time 1.351 (1.368) data 0.000 (0.010) loss 1.2422 (1.0560) acc 71.8750 (74.1797) lr 1.0000e-03 eta 4:31:45 +epoch [27/50] batch [85/500] time 1.365 (1.367) data 0.000 (0.009) loss 1.0078 (1.0477) acc 78.1250 (74.3750) lr 1.0000e-03 eta 4:31:31 +epoch [27/50] batch [90/500] time 1.361 (1.366) data 0.000 (0.009) loss 0.8667 (1.0514) acc 75.0000 (74.4444) lr 1.0000e-03 eta 4:31:14 +epoch [27/50] batch [95/500] time 1.370 (1.366) data 0.000 (0.008) loss 1.1240 (1.0518) acc 71.8750 (74.2763) lr 1.0000e-03 eta 4:31:05 +epoch [27/50] batch [100/500] time 1.359 (1.366) data 0.000 (0.008) loss 0.7363 (1.0690) acc 81.2500 (73.7188) lr 1.0000e-03 eta 4:30:52 +epoch [27/50] batch [105/500] time 1.360 (1.365) data 0.000 (0.008) loss 1.1777 (1.0715) acc 75.0000 (73.8393) lr 1.0000e-03 eta 4:30:40 +epoch [27/50] batch [110/500] time 1.354 (1.366) data 0.000 (0.007) loss 0.6177 (1.0692) acc 81.2500 (73.9205) lr 1.0000e-03 eta 4:30:37 +epoch [27/50] batch [115/500] time 1.353 (1.365) data 0.000 (0.007) loss 0.9819 (1.0634) acc 75.0000 (74.0217) lr 1.0000e-03 eta 4:30:22 +epoch [27/50] batch [120/500] time 1.352 (1.364) data 0.000 (0.007) loss 0.8516 (1.0542) acc 75.0000 (74.1146) lr 1.0000e-03 eta 4:30:06 +epoch [27/50] batch [125/500] time 1.365 (1.364) data 0.000 (0.006) loss 1.8047 (1.0581) acc 59.3750 (74.1250) lr 1.0000e-03 eta 4:29:55 +epoch [27/50] batch [130/500] time 1.343 (1.363) data 0.000 (0.006) loss 0.5576 (1.0573) acc 87.5000 (74.0865) lr 1.0000e-03 eta 4:29:42 +epoch [27/50] batch [135/500] time 1.352 (1.363) data 0.000 (0.006) loss 0.9863 (1.0585) acc 68.7500 (74.0509) lr 1.0000e-03 eta 4:29:30 +epoch [27/50] batch [140/500] time 1.351 (1.363) data 0.000 (0.006) loss 1.1924 (1.0599) acc 68.7500 (74.0848) lr 1.0000e-03 eta 4:29:23 +epoch [27/50] batch [145/500] time 1.361 (1.363) data 0.000 (0.006) loss 0.6665 (1.0533) acc 78.1250 (74.1810) lr 1.0000e-03 eta 4:29:17 +epoch [27/50] batch [150/500] time 1.368 (1.363) data 0.000 (0.005) loss 1.2100 (1.0583) acc 71.8750 (74.1042) lr 1.0000e-03 eta 4:29:06 +epoch [27/50] batch [155/500] time 1.350 (1.363) data 0.000 (0.005) loss 0.5879 (1.0601) acc 84.3750 (74.0323) lr 1.0000e-03 eta 4:29:09 +epoch [27/50] batch [160/500] time 1.370 (1.363) data 0.000 (0.005) loss 1.5371 (1.0676) acc 68.7500 (73.8867) lr 1.0000e-03 eta 4:29:02 +epoch [27/50] batch [165/500] time 1.368 (1.363) data 0.000 (0.005) loss 1.2959 (1.0728) acc 68.7500 (73.6742) lr 1.0000e-03 eta 4:28:55 +epoch [27/50] batch [170/500] time 1.362 (1.363) data 0.000 (0.005) loss 0.6226 (1.0692) acc 78.1250 (73.6029) lr 1.0000e-03 eta 4:28:47 +epoch [27/50] batch [175/500] time 1.370 (1.363) data 0.000 (0.005) loss 1.2793 (1.0716) acc 56.2500 (73.5357) lr 1.0000e-03 eta 4:28:37 +epoch [27/50] batch [180/500] time 1.376 (1.363) data 0.000 (0.005) loss 0.9487 (1.0715) acc 68.7500 (73.5069) lr 1.0000e-03 eta 4:28:29 +epoch [27/50] batch [185/500] time 1.369 (1.363) data 0.000 (0.004) loss 1.4238 (1.0796) acc 65.6250 (73.3446) lr 1.0000e-03 eta 4:28:22 +epoch [27/50] batch [190/500] time 1.360 (1.363) data 0.000 (0.004) loss 0.9556 (1.0774) acc 68.7500 (73.3882) lr 1.0000e-03 eta 4:28:14 +epoch [27/50] batch [195/500] time 1.371 (1.363) data 0.000 (0.004) loss 1.3945 (1.0818) acc 68.7500 (73.3654) lr 1.0000e-03 eta 4:28:07 +epoch [27/50] batch [200/500] time 1.368 (1.363) data 0.000 (0.004) loss 1.4385 (1.0827) acc 65.6250 (73.3750) lr 1.0000e-03 eta 4:28:01 +epoch [27/50] batch [205/500] time 1.350 (1.363) data 0.001 (0.004) loss 1.4160 (1.0858) acc 65.6250 (73.2927) lr 1.0000e-03 eta 4:27:54 +epoch [27/50] batch [210/500] time 1.328 (1.363) data 0.000 (0.004) loss 0.6646 (1.0830) acc 84.3750 (73.3631) lr 1.0000e-03 eta 4:27:44 +epoch [27/50] batch [215/500] time 1.350 (1.362) data 0.000 (0.004) loss 0.8428 (1.0828) acc 75.0000 (73.3721) lr 1.0000e-03 eta 4:27:35 +epoch [27/50] batch [220/500] time 1.359 (1.362) data 0.000 (0.004) loss 1.1592 (1.0806) acc 71.8750 (73.4375) lr 1.0000e-03 eta 4:27:27 +epoch [27/50] batch [225/500] time 1.352 (1.362) data 0.000 (0.004) loss 0.8140 (1.0821) acc 87.5000 (73.4306) lr 1.0000e-03 eta 4:27:16 +epoch [27/50] batch [230/500] time 1.341 (1.362) data 0.000 (0.004) loss 1.0068 (1.0839) acc 75.0000 (73.4103) lr 1.0000e-03 eta 4:27:05 +epoch [27/50] batch [235/500] time 1.364 (1.361) data 0.000 (0.004) loss 0.6543 (1.0872) acc 84.3750 (73.3511) lr 1.0000e-03 eta 4:26:56 +epoch [27/50] batch [240/500] time 1.381 (1.361) data 0.000 (0.003) loss 1.0137 (1.0833) acc 71.8750 (73.4115) lr 1.0000e-03 eta 4:26:49 +epoch [27/50] batch [245/500] time 1.352 (1.361) data 0.000 (0.003) loss 1.0039 (1.0804) acc 71.8750 (73.4311) lr 1.0000e-03 eta 4:26:42 +epoch [27/50] batch [250/500] time 1.492 (1.362) data 0.000 (0.003) loss 1.2334 (1.0791) acc 62.5000 (73.4500) lr 1.0000e-03 eta 4:26:40 +epoch [27/50] batch [255/500] time 1.338 (1.362) data 0.000 (0.003) loss 0.9585 (1.0784) acc 84.3750 (73.5172) lr 1.0000e-03 eta 4:26:30 +epoch [27/50] batch [260/500] time 1.352 (1.361) data 0.001 (0.003) loss 1.2246 (1.0805) acc 53.1250 (73.3654) lr 1.0000e-03 eta 4:26:22 +epoch [27/50] batch [265/500] time 1.351 (1.361) data 0.000 (0.003) loss 1.6533 (1.0808) acc 56.2500 (73.3726) lr 1.0000e-03 eta 4:26:14 +epoch [27/50] batch [270/500] time 1.368 (1.361) data 0.000 (0.003) loss 0.9248 (1.0804) acc 68.7500 (73.2986) lr 1.0000e-03 eta 4:26:09 +epoch [27/50] batch [275/500] time 1.356 (1.361) data 0.000 (0.003) loss 0.6807 (1.0821) acc 81.2500 (73.2386) lr 1.0000e-03 eta 4:26:00 +epoch [27/50] batch [280/500] time 1.362 (1.361) data 0.000 (0.003) loss 1.4219 (1.0846) acc 75.0000 (73.2589) lr 1.0000e-03 eta 4:25:53 +epoch [27/50] batch [285/500] time 1.357 (1.361) data 0.000 (0.003) loss 0.9341 (1.0821) acc 84.3750 (73.3333) lr 1.0000e-03 eta 4:25:45 +epoch [27/50] batch [290/500] time 1.362 (1.361) data 0.000 (0.003) loss 1.3008 (1.0869) acc 71.8750 (73.2220) lr 1.0000e-03 eta 4:25:37 +epoch [27/50] batch [295/500] time 1.370 (1.362) data 0.001 (0.003) loss 0.8716 (1.0882) acc 75.0000 (73.1780) lr 1.0000e-03 eta 4:25:37 +epoch [27/50] batch [300/500] time 1.378 (1.362) data 0.000 (0.003) loss 1.6182 (1.0883) acc 65.6250 (73.1458) lr 1.0000e-03 eta 4:25:30 +epoch [27/50] batch [305/500] time 1.367 (1.362) data 0.000 (0.003) loss 1.1270 (1.0893) acc 81.2500 (73.1352) lr 1.0000e-03 eta 4:25:23 +epoch [27/50] batch [310/500] time 1.354 (1.361) data 0.000 (0.003) loss 0.8418 (1.0915) acc 81.2500 (73.0746) lr 1.0000e-03 eta 4:25:14 +epoch [27/50] batch [315/500] time 1.363 (1.361) data 0.000 (0.003) loss 1.4404 (1.0915) acc 65.6250 (73.0258) lr 1.0000e-03 eta 4:25:08 +epoch [27/50] batch [320/500] time 1.355 (1.361) data 0.000 (0.003) loss 0.6670 (1.0908) acc 81.2500 (73.0078) lr 1.0000e-03 eta 4:25:00 +epoch [27/50] batch [325/500] time 1.361 (1.361) data 0.000 (0.003) loss 0.9438 (1.0952) acc 75.0000 (72.9327) lr 1.0000e-03 eta 4:24:52 +epoch [27/50] batch [330/500] time 1.374 (1.361) data 0.001 (0.003) loss 0.7202 (1.0960) acc 78.1250 (72.8788) lr 1.0000e-03 eta 4:24:46 +epoch [27/50] batch [335/500] time 1.378 (1.361) data 0.000 (0.003) loss 1.1475 (1.0966) acc 65.6250 (72.8358) lr 1.0000e-03 eta 4:24:39 +epoch [27/50] batch [340/500] time 1.394 (1.362) data 0.000 (0.003) loss 1.2461 (1.0966) acc 75.0000 (72.7757) lr 1.0000e-03 eta 4:24:35 +epoch [27/50] batch [345/500] time 1.384 (1.362) data 0.000 (0.003) loss 1.0195 (1.0936) acc 71.8750 (72.8080) lr 1.0000e-03 eta 4:24:31 +epoch [27/50] batch [350/500] time 1.379 (1.362) data 0.000 (0.002) loss 1.6113 (1.0938) acc 62.5000 (72.8304) lr 1.0000e-03 eta 4:24:24 +epoch [27/50] batch [355/500] time 1.371 (1.362) data 0.000 (0.002) loss 0.7856 (1.0976) acc 84.3750 (72.8433) lr 1.0000e-03 eta 4:24:18 +epoch [27/50] batch [360/500] time 1.375 (1.362) data 0.000 (0.002) loss 1.4043 (1.0999) acc 71.8750 (72.8125) lr 1.0000e-03 eta 4:24:12 +epoch [27/50] batch [365/500] time 1.367 (1.362) data 0.000 (0.002) loss 1.1191 (1.1003) acc 71.8750 (72.8168) lr 1.0000e-03 eta 4:24:07 +epoch [27/50] batch [370/500] time 1.348 (1.362) data 0.000 (0.002) loss 1.1465 (1.0987) acc 75.0000 (72.8885) lr 1.0000e-03 eta 4:24:00 +epoch [27/50] batch [375/500] time 1.353 (1.362) data 0.000 (0.002) loss 1.2559 (1.1000) acc 71.8750 (72.8083) lr 1.0000e-03 eta 4:23:55 +epoch [27/50] batch [380/500] time 1.389 (1.362) data 0.000 (0.002) loss 1.2510 (1.1001) acc 75.0000 (72.8125) lr 1.0000e-03 eta 4:23:49 +epoch [27/50] batch [385/500] time 1.360 (1.362) data 0.000 (0.002) loss 0.9077 (1.1003) acc 75.0000 (72.8003) lr 1.0000e-03 eta 4:23:41 +epoch [27/50] batch [390/500] time 1.341 (1.362) data 0.000 (0.002) loss 1.0801 (1.1003) acc 75.0000 (72.8045) lr 1.0000e-03 eta 4:23:33 +epoch [27/50] batch [395/500] time 1.346 (1.362) data 0.000 (0.002) loss 0.6870 (1.0976) acc 78.1250 (72.8481) lr 1.0000e-03 eta 4:23:29 +epoch [27/50] batch [400/500] time 1.343 (1.362) data 0.000 (0.002) loss 0.9644 (1.0973) acc 71.8750 (72.8750) lr 1.0000e-03 eta 4:23:21 +epoch [27/50] batch [405/500] time 1.338 (1.362) data 0.000 (0.002) loss 1.3936 (1.0973) acc 65.6250 (72.8858) lr 1.0000e-03 eta 4:23:11 +epoch [27/50] batch [410/500] time 1.383 (1.362) data 0.000 (0.002) loss 1.5469 (1.0965) acc 56.2500 (72.8659) lr 1.0000e-03 eta 4:23:05 +epoch [27/50] batch [415/500] time 1.350 (1.362) data 0.000 (0.002) loss 1.0557 (1.0995) acc 68.7500 (72.7786) lr 1.0000e-03 eta 4:22:58 +epoch [27/50] batch [420/500] time 1.364 (1.362) data 0.000 (0.002) loss 0.9644 (1.1005) acc 71.8750 (72.7530) lr 1.0000e-03 eta 4:22:51 +epoch [27/50] batch [425/500] time 1.345 (1.362) data 0.000 (0.002) loss 1.5703 (1.1022) acc 68.7500 (72.6838) lr 1.0000e-03 eta 4:22:44 +epoch [27/50] batch [430/500] time 1.344 (1.362) data 0.000 (0.002) loss 1.4854 (1.1014) acc 59.3750 (72.6890) lr 1.0000e-03 eta 4:22:36 +epoch [27/50] batch [435/500] time 1.374 (1.362) data 0.000 (0.002) loss 1.5605 (1.1039) acc 65.6250 (72.6868) lr 1.0000e-03 eta 4:22:29 +epoch [27/50] batch [440/500] time 1.361 (1.362) data 0.000 (0.002) loss 0.6821 (1.1038) acc 81.2500 (72.6918) lr 1.0000e-03 eta 4:22:25 +epoch [27/50] batch [445/500] time 1.356 (1.362) data 0.000 (0.002) loss 0.9219 (1.1057) acc 68.7500 (72.6194) lr 1.0000e-03 eta 4:22:17 +epoch [27/50] batch [450/500] time 1.346 (1.362) data 0.001 (0.002) loss 1.1787 (1.1066) acc 65.6250 (72.5625) lr 1.0000e-03 eta 4:22:10 +epoch [27/50] batch [455/500] time 1.364 (1.362) data 0.000 (0.002) loss 0.8164 (1.1069) acc 78.1250 (72.5206) lr 1.0000e-03 eta 4:22:03 +epoch [27/50] batch [460/500] time 1.363 (1.362) data 0.000 (0.002) loss 1.2334 (1.1046) acc 71.8750 (72.5747) lr 1.0000e-03 eta 4:21:55 +epoch [27/50] batch [465/500] time 1.351 (1.362) data 0.001 (0.002) loss 1.0459 (1.1066) acc 65.6250 (72.5134) lr 1.0000e-03 eta 4:21:47 +epoch [27/50] batch [470/500] time 1.344 (1.362) data 0.000 (0.002) loss 1.1514 (1.1065) acc 68.7500 (72.4934) lr 1.0000e-03 eta 4:21:39 +epoch [27/50] batch [475/500] time 1.360 (1.362) data 0.000 (0.002) loss 0.9497 (1.1064) acc 71.8750 (72.4868) lr 1.0000e-03 eta 4:21:33 +epoch [27/50] batch [480/500] time 1.346 (1.362) data 0.000 (0.002) loss 0.5972 (1.1047) acc 75.0000 (72.5260) lr 1.0000e-03 eta 4:21:26 +epoch [27/50] batch [485/500] time 1.364 (1.362) data 0.001 (0.002) loss 0.8145 (1.1039) acc 84.3750 (72.5258) lr 1.0000e-03 eta 4:21:19 +epoch [27/50] batch [490/500] time 1.350 (1.362) data 0.000 (0.002) loss 0.8511 (1.1051) acc 78.1250 (72.5064) lr 1.0000e-03 eta 4:21:11 +epoch [27/50] batch [495/500] time 1.354 (1.361) data 0.000 (0.002) loss 1.3389 (1.1079) acc 71.8750 (72.4495) lr 1.0000e-03 eta 4:21:03 +epoch [27/50] batch [500/500] time 1.339 (1.361) data 0.000 (0.002) loss 1.4512 (1.1081) acc 68.7500 (72.4437) lr 9.3721e-04 eta 4:20:54 +epoch [28/50] batch [5/500] time 1.362 (1.531) data 0.000 (0.164) loss 0.9038 (0.8771) acc 78.1250 (77.5000) lr 9.3721e-04 eta 4:53:19 +epoch [28/50] batch [10/500] time 1.353 (1.448) data 0.000 (0.082) loss 1.4824 (0.9555) acc 62.5000 (74.0625) lr 9.3721e-04 eta 4:37:12 +epoch [28/50] batch [15/500] time 1.359 (1.417) data 0.000 (0.055) loss 1.2021 (0.9955) acc 65.6250 (73.1250) lr 9.3721e-04 eta 4:31:15 +epoch [28/50] batch [20/500] time 1.376 (1.405) data 0.000 (0.041) loss 0.9253 (0.9901) acc 78.1250 (72.9688) lr 9.3721e-04 eta 4:28:44 +epoch [28/50] batch [25/500] time 1.374 (1.404) data 0.000 (0.033) loss 1.2578 (0.9980) acc 68.7500 (72.7500) lr 9.3721e-04 eta 4:28:25 +epoch [28/50] batch [30/500] time 1.337 (1.396) data 0.000 (0.028) loss 0.5098 (1.0029) acc 90.6250 (72.6042) lr 9.3721e-04 eta 4:26:57 +epoch [28/50] batch [35/500] time 1.350 (1.392) data 0.000 (0.024) loss 0.4636 (0.9931) acc 87.5000 (73.0357) lr 9.3721e-04 eta 4:25:56 +epoch [28/50] batch [40/500] time 1.353 (1.388) data 0.000 (0.021) loss 0.7461 (1.0035) acc 75.0000 (73.0469) lr 9.3721e-04 eta 4:25:03 +epoch [28/50] batch [45/500] time 1.339 (1.385) data 0.000 (0.019) loss 1.6865 (1.0234) acc 62.5000 (72.7778) lr 9.3721e-04 eta 4:24:30 +epoch [28/50] batch [50/500] time 1.370 (1.384) data 0.000 (0.017) loss 0.8867 (1.0140) acc 78.1250 (73.2500) lr 9.3721e-04 eta 4:24:04 +epoch [28/50] batch [55/500] time 1.353 (1.381) data 0.000 (0.015) loss 1.0039 (1.0055) acc 68.7500 (73.4091) lr 9.3721e-04 eta 4:23:26 +epoch [28/50] batch [60/500] time 1.384 (1.380) data 0.001 (0.014) loss 0.4717 (1.0032) acc 87.5000 (73.8021) lr 9.3721e-04 eta 4:23:11 +epoch [28/50] batch [65/500] time 1.379 (1.380) data 0.000 (0.013) loss 0.7412 (1.0010) acc 78.1250 (73.8942) lr 9.3721e-04 eta 4:22:59 +epoch [28/50] batch [70/500] time 1.341 (1.379) data 0.000 (0.012) loss 0.8979 (1.0000) acc 71.8750 (73.8839) lr 9.3721e-04 eta 4:22:37 +epoch [28/50] batch [75/500] time 1.354 (1.377) data 0.000 (0.011) loss 1.0742 (0.9975) acc 68.7500 (74.0417) lr 9.3721e-04 eta 4:22:13 +epoch [28/50] batch [80/500] time 1.369 (1.376) data 0.000 (0.011) loss 0.9473 (1.0021) acc 71.8750 (73.9062) lr 9.3721e-04 eta 4:21:57 +epoch [28/50] batch [85/500] time 1.378 (1.377) data 0.000 (0.010) loss 1.0342 (1.0162) acc 78.1250 (73.7868) lr 9.3721e-04 eta 4:22:02 +epoch [28/50] batch [90/500] time 1.360 (1.377) data 0.000 (0.009) loss 0.9136 (1.0144) acc 75.0000 (73.9236) lr 9.3721e-04 eta 4:21:46 +epoch [28/50] batch [95/500] time 1.385 (1.376) data 0.000 (0.009) loss 0.9800 (1.0171) acc 84.3750 (73.8816) lr 9.3721e-04 eta 4:21:31 +epoch [28/50] batch [100/500] time 1.368 (1.375) data 0.000 (0.009) loss 0.5820 (1.0255) acc 81.2500 (73.6562) lr 9.3721e-04 eta 4:21:18 +epoch [28/50] batch [105/500] time 1.361 (1.374) data 0.000 (0.008) loss 0.8511 (1.0221) acc 71.8750 (73.6012) lr 9.3721e-04 eta 4:21:01 +epoch [28/50] batch [110/500] time 1.366 (1.374) data 0.000 (0.008) loss 1.6074 (1.0293) acc 53.1250 (73.4091) lr 9.3721e-04 eta 4:20:46 +epoch [28/50] batch [115/500] time 1.349 (1.373) data 0.000 (0.007) loss 1.4258 (1.0321) acc 62.5000 (73.3424) lr 9.3721e-04 eta 4:20:32 +epoch [28/50] batch [120/500] time 1.360 (1.373) data 0.000 (0.007) loss 0.8403 (1.0341) acc 75.0000 (73.3073) lr 9.3721e-04 eta 4:20:20 +epoch [28/50] batch [125/500] time 1.384 (1.373) data 0.000 (0.007) loss 1.0176 (1.0347) acc 71.8750 (73.3250) lr 9.3721e-04 eta 4:20:12 +epoch [28/50] batch [130/500] time 1.363 (1.373) data 0.000 (0.007) loss 1.2324 (1.0383) acc 68.7500 (73.1971) lr 9.3721e-04 eta 4:20:13 +epoch [28/50] batch [135/500] time 1.347 (1.373) data 0.000 (0.006) loss 1.1240 (1.0421) acc 78.1250 (73.2407) lr 9.3721e-04 eta 4:20:00 +epoch [28/50] batch [140/500] time 1.369 (1.373) data 0.000 (0.006) loss 1.5430 (1.0472) acc 59.3750 (73.1473) lr 9.3721e-04 eta 4:19:52 +epoch [28/50] batch [145/500] time 1.361 (1.372) data 0.000 (0.006) loss 1.4512 (1.0516) acc 68.7500 (73.1681) lr 9.3721e-04 eta 4:19:39 +epoch [28/50] batch [150/500] time 1.358 (1.371) data 0.000 (0.006) loss 1.0674 (1.0600) acc 75.0000 (73.1250) lr 9.3721e-04 eta 4:19:25 +epoch [28/50] batch [155/500] time 1.363 (1.371) data 0.000 (0.006) loss 1.1445 (1.0657) acc 78.1250 (73.1250) lr 9.3721e-04 eta 4:19:11 +epoch [28/50] batch [160/500] time 1.367 (1.370) data 0.000 (0.005) loss 0.6982 (1.0662) acc 81.2500 (73.0469) lr 9.3721e-04 eta 4:18:58 +epoch [28/50] batch [165/500] time 1.354 (1.370) data 0.000 (0.005) loss 1.2217 (1.0717) acc 68.7500 (72.9167) lr 9.3721e-04 eta 4:18:49 +epoch [28/50] batch [170/500] time 1.377 (1.370) data 0.000 (0.005) loss 0.9868 (1.0698) acc 71.8750 (72.8676) lr 9.3721e-04 eta 4:18:39 +epoch [28/50] batch [175/500] time 1.370 (1.370) data 0.000 (0.005) loss 0.8789 (1.0655) acc 78.1250 (73.0000) lr 9.3721e-04 eta 4:18:31 +epoch [28/50] batch [180/500] time 1.355 (1.369) data 0.000 (0.005) loss 1.1123 (1.0626) acc 65.6250 (73.0208) lr 9.3721e-04 eta 4:18:19 +epoch [28/50] batch [185/500] time 1.367 (1.369) data 0.000 (0.005) loss 0.9321 (1.0630) acc 81.2500 (72.9899) lr 9.3721e-04 eta 4:18:09 +epoch [28/50] batch [190/500] time 1.344 (1.369) data 0.000 (0.005) loss 1.0596 (1.0699) acc 68.7500 (72.7961) lr 9.3721e-04 eta 4:17:57 +epoch [28/50] batch [195/500] time 1.343 (1.368) data 0.000 (0.005) loss 0.9194 (1.0693) acc 65.6250 (72.6603) lr 9.3721e-04 eta 4:17:48 +epoch [28/50] batch [200/500] time 1.359 (1.368) data 0.000 (0.004) loss 0.6865 (1.0669) acc 78.1250 (72.6562) lr 9.3721e-04 eta 4:17:38 +epoch [28/50] batch [205/500] time 1.346 (1.368) data 0.000 (0.004) loss 0.9370 (1.0632) acc 78.1250 (72.6829) lr 9.3721e-04 eta 4:17:27 +epoch [28/50] batch [210/500] time 1.376 (1.368) data 0.000 (0.004) loss 1.4873 (1.0618) acc 65.6250 (72.6786) lr 9.3721e-04 eta 4:17:20 +epoch [28/50] batch [215/500] time 1.360 (1.368) data 0.000 (0.004) loss 0.8110 (1.0662) acc 71.8750 (72.6017) lr 9.3721e-04 eta 4:17:14 +epoch [28/50] batch [220/500] time 1.378 (1.368) data 0.000 (0.004) loss 1.0234 (1.0667) acc 71.8750 (72.6136) lr 9.3721e-04 eta 4:17:08 +epoch [28/50] batch [225/500] time 1.368 (1.367) data 0.000 (0.004) loss 0.7456 (1.0625) acc 81.2500 (72.6667) lr 9.3721e-04 eta 4:16:57 +epoch [28/50] batch [230/500] time 1.365 (1.368) data 0.000 (0.004) loss 1.4463 (1.0630) acc 56.2500 (72.6223) lr 9.3721e-04 eta 4:16:56 +epoch [28/50] batch [235/500] time 1.345 (1.368) data 0.000 (0.004) loss 0.5942 (1.0693) acc 87.5000 (72.5399) lr 9.3721e-04 eta 4:16:48 +epoch [28/50] batch [240/500] time 1.373 (1.368) data 0.000 (0.004) loss 1.5801 (1.0686) acc 68.7500 (72.5521) lr 9.3721e-04 eta 4:16:39 +epoch [28/50] batch [245/500] time 1.352 (1.367) data 0.000 (0.004) loss 1.1592 (1.0669) acc 65.6250 (72.5893) lr 9.3721e-04 eta 4:16:30 +epoch [28/50] batch [250/500] time 1.353 (1.367) data 0.000 (0.004) loss 1.0703 (1.0628) acc 65.6250 (72.6000) lr 9.3721e-04 eta 4:16:21 +epoch [28/50] batch [255/500] time 1.351 (1.367) data 0.000 (0.004) loss 1.3516 (1.0651) acc 78.1250 (72.6103) lr 9.3721e-04 eta 4:16:11 +epoch [28/50] batch [260/500] time 1.362 (1.367) data 0.000 (0.004) loss 1.5146 (1.0672) acc 68.7500 (72.6322) lr 9.3721e-04 eta 4:16:03 +epoch [28/50] batch [265/500] time 1.385 (1.367) data 0.000 (0.003) loss 0.7666 (1.0627) acc 75.0000 (72.6179) lr 9.3721e-04 eta 4:15:57 +epoch [28/50] batch [270/500] time 1.370 (1.367) data 0.000 (0.003) loss 1.1416 (1.0638) acc 68.7500 (72.5810) lr 9.3721e-04 eta 4:15:51 +epoch [28/50] batch [275/500] time 1.340 (1.367) data 0.000 (0.003) loss 0.9688 (1.0653) acc 81.2500 (72.6364) lr 9.3721e-04 eta 4:15:47 +epoch [28/50] batch [280/500] time 1.372 (1.367) data 0.000 (0.003) loss 0.7500 (1.0673) acc 78.1250 (72.6562) lr 9.3721e-04 eta 4:15:40 +epoch [28/50] batch [285/500] time 1.351 (1.367) data 0.000 (0.003) loss 0.7676 (1.0666) acc 75.0000 (72.6425) lr 9.3721e-04 eta 4:15:32 +epoch [28/50] batch [290/500] time 1.360 (1.367) data 0.000 (0.003) loss 0.7681 (1.0653) acc 71.8750 (72.6401) lr 9.3721e-04 eta 4:15:24 +epoch [28/50] batch [295/500] time 1.346 (1.367) data 0.001 (0.003) loss 0.8901 (1.0640) acc 71.8750 (72.6271) lr 9.3721e-04 eta 4:15:14 +epoch [28/50] batch [300/500] time 1.336 (1.367) data 0.000 (0.003) loss 1.2119 (1.0635) acc 71.8750 (72.7083) lr 9.3721e-04 eta 4:15:05 +epoch [28/50] batch [305/500] time 1.368 (1.367) data 0.000 (0.003) loss 1.3848 (1.0679) acc 62.5000 (72.6332) lr 9.3721e-04 eta 4:14:58 +epoch [28/50] batch [310/500] time 1.355 (1.366) data 0.000 (0.003) loss 1.4082 (1.0669) acc 68.7500 (72.6815) lr 9.3721e-04 eta 4:14:48 +epoch [28/50] batch [315/500] time 1.352 (1.366) data 0.000 (0.003) loss 1.2041 (1.0687) acc 65.6250 (72.6091) lr 9.3721e-04 eta 4:14:39 +epoch [28/50] batch [320/500] time 1.347 (1.366) data 0.000 (0.003) loss 0.8745 (1.0686) acc 84.3750 (72.7051) lr 9.3721e-04 eta 4:14:30 +epoch [28/50] batch [325/500] time 1.375 (1.366) data 0.000 (0.003) loss 1.3652 (1.0697) acc 65.6250 (72.7115) lr 9.3721e-04 eta 4:14:22 +epoch [28/50] batch [330/500] time 1.353 (1.366) data 0.000 (0.003) loss 1.1689 (1.0693) acc 68.7500 (72.7273) lr 9.3721e-04 eta 4:14:14 +epoch [28/50] batch [335/500] time 1.363 (1.366) data 0.000 (0.003) loss 1.2480 (1.0717) acc 65.6250 (72.6119) lr 9.3721e-04 eta 4:14:07 +epoch [28/50] batch [340/500] time 1.358 (1.366) data 0.000 (0.003) loss 1.2334 (1.0754) acc 75.0000 (72.5460) lr 9.3721e-04 eta 4:14:00 +epoch [28/50] batch [345/500] time 1.348 (1.366) data 0.001 (0.003) loss 1.2109 (1.0750) acc 56.2500 (72.5272) lr 9.3721e-04 eta 4:13:52 +epoch [28/50] batch [350/500] time 1.358 (1.366) data 0.001 (0.003) loss 1.1191 (1.0768) acc 84.3750 (72.5268) lr 9.3721e-04 eta 4:13:46 +epoch [28/50] batch [355/500] time 1.363 (1.366) data 0.000 (0.003) loss 1.9355 (1.0797) acc 59.3750 (72.4736) lr 9.3721e-04 eta 4:13:40 +epoch [28/50] batch [360/500] time 1.374 (1.366) data 0.001 (0.003) loss 0.8652 (1.0768) acc 78.1250 (72.5608) lr 9.3721e-04 eta 4:13:32 +epoch [28/50] batch [365/500] time 1.363 (1.366) data 0.000 (0.003) loss 0.9512 (1.0759) acc 78.1250 (72.6027) lr 9.3721e-04 eta 4:13:25 +epoch [28/50] batch [370/500] time 1.502 (1.366) data 0.000 (0.003) loss 1.3662 (1.0772) acc 62.5000 (72.5507) lr 9.3721e-04 eta 4:13:21 +epoch [28/50] batch [375/500] time 1.370 (1.366) data 0.000 (0.003) loss 1.4043 (1.0789) acc 65.6250 (72.4917) lr 9.3721e-04 eta 4:13:14 +epoch [28/50] batch [380/500] time 1.383 (1.366) data 0.000 (0.003) loss 1.1035 (1.0786) acc 71.8750 (72.4753) lr 9.3721e-04 eta 4:13:08 +epoch [28/50] batch [385/500] time 1.353 (1.366) data 0.000 (0.002) loss 0.9429 (1.0794) acc 75.0000 (72.4269) lr 9.3721e-04 eta 4:13:01 +epoch [28/50] batch [390/500] time 1.362 (1.366) data 0.001 (0.002) loss 0.9727 (1.0784) acc 78.1250 (72.4439) lr 9.3721e-04 eta 4:12:54 +epoch [28/50] batch [395/500] time 1.345 (1.366) data 0.000 (0.002) loss 0.9316 (1.0770) acc 75.0000 (72.5000) lr 9.3721e-04 eta 4:12:46 +epoch [28/50] batch [400/500] time 1.388 (1.366) data 0.000 (0.002) loss 0.4641 (1.0753) acc 93.7500 (72.5469) lr 9.3721e-04 eta 4:12:40 +epoch [28/50] batch [405/500] time 1.358 (1.366) data 0.001 (0.002) loss 1.0137 (1.0762) acc 71.8750 (72.5231) lr 9.3721e-04 eta 4:12:31 +epoch [28/50] batch [410/500] time 1.375 (1.366) data 0.001 (0.002) loss 0.8438 (1.0759) acc 71.8750 (72.5229) lr 9.3721e-04 eta 4:12:24 +epoch [28/50] batch [415/500] time 1.349 (1.366) data 0.001 (0.002) loss 1.2178 (1.0773) acc 65.6250 (72.4699) lr 9.3721e-04 eta 4:12:21 +epoch [28/50] batch [420/500] time 1.346 (1.366) data 0.000 (0.002) loss 1.0625 (1.0765) acc 75.0000 (72.4777) lr 9.3721e-04 eta 4:12:12 +epoch [28/50] batch [425/500] time 1.350 (1.366) data 0.000 (0.002) loss 0.9907 (1.0772) acc 75.0000 (72.4559) lr 9.3721e-04 eta 4:12:03 +epoch [28/50] batch [430/500] time 1.362 (1.366) data 0.000 (0.002) loss 1.0957 (1.0775) acc 68.7500 (72.4709) lr 9.3721e-04 eta 4:11:56 +epoch [28/50] batch [435/500] time 1.357 (1.365) data 0.000 (0.002) loss 1.3555 (1.0787) acc 68.7500 (72.4425) lr 9.3721e-04 eta 4:11:49 +epoch [28/50] batch [440/500] time 1.365 (1.365) data 0.000 (0.002) loss 1.1963 (1.0778) acc 59.3750 (72.4716) lr 9.3721e-04 eta 4:11:40 +epoch [28/50] batch [445/500] time 1.366 (1.365) data 0.000 (0.002) loss 1.1221 (1.0763) acc 78.1250 (72.5140) lr 9.3721e-04 eta 4:11:32 +epoch [28/50] batch [450/500] time 1.340 (1.365) data 0.000 (0.002) loss 0.9097 (1.0773) acc 78.1250 (72.4722) lr 9.3721e-04 eta 4:11:23 +epoch [28/50] batch [455/500] time 1.348 (1.365) data 0.000 (0.002) loss 1.4277 (1.0774) acc 71.8750 (72.4863) lr 9.3721e-04 eta 4:11:16 +epoch [28/50] batch [460/500] time 1.361 (1.365) data 0.000 (0.002) loss 0.9209 (1.0771) acc 68.7500 (72.4728) lr 9.3721e-04 eta 4:11:09 +epoch [28/50] batch [465/500] time 1.354 (1.365) data 0.000 (0.002) loss 0.6074 (1.0765) acc 84.3750 (72.4664) lr 9.3721e-04 eta 4:11:00 +epoch [28/50] batch [470/500] time 1.362 (1.365) data 0.000 (0.002) loss 0.8892 (1.0770) acc 65.6250 (72.4734) lr 9.3721e-04 eta 4:10:52 +epoch [28/50] batch [475/500] time 1.354 (1.365) data 0.000 (0.002) loss 1.5303 (1.0781) acc 56.2500 (72.4671) lr 9.3721e-04 eta 4:10:44 +epoch [28/50] batch [480/500] time 1.366 (1.364) data 0.000 (0.002) loss 1.5840 (1.0825) acc 56.2500 (72.3633) lr 9.3721e-04 eta 4:10:36 +epoch [28/50] batch [485/500] time 1.341 (1.364) data 0.001 (0.002) loss 1.1299 (1.0831) acc 81.2500 (72.3711) lr 9.3721e-04 eta 4:10:28 +epoch [28/50] batch [490/500] time 1.342 (1.364) data 0.000 (0.002) loss 1.2373 (1.0831) acc 71.8750 (72.4043) lr 9.3721e-04 eta 4:10:20 +epoch [28/50] batch [495/500] time 1.366 (1.364) data 0.000 (0.002) loss 1.1201 (1.0826) acc 65.6250 (72.4116) lr 9.3721e-04 eta 4:10:11 +epoch [28/50] batch [500/500] time 1.353 (1.364) data 0.000 (0.002) loss 1.0703 (1.0838) acc 78.1250 (72.3750) lr 8.7467e-04 eta 4:10:03 +epoch [29/50] batch [5/500] time 1.351 (1.538) data 0.000 (0.168) loss 0.9004 (1.0697) acc 78.1250 (70.6250) lr 8.7467e-04 eta 4:41:54 +epoch [29/50] batch [10/500] time 1.340 (1.464) data 0.000 (0.084) loss 1.5361 (1.1233) acc 62.5000 (72.5000) lr 8.7467e-04 eta 4:28:07 +epoch [29/50] batch [15/500] time 1.349 (1.426) data 0.000 (0.056) loss 1.3984 (1.1696) acc 62.5000 (70.8333) lr 8.7467e-04 eta 4:21:10 +epoch [29/50] batch [20/500] time 1.365 (1.410) data 0.000 (0.042) loss 1.4609 (1.1926) acc 78.1250 (71.4062) lr 8.7467e-04 eta 4:17:57 +epoch [29/50] batch [25/500] time 1.366 (1.398) data 0.001 (0.034) loss 0.7402 (1.1296) acc 68.7500 (71.6250) lr 8.7467e-04 eta 4:15:43 +epoch [29/50] batch [30/500] time 1.346 (1.391) data 0.000 (0.028) loss 0.9995 (1.1005) acc 71.8750 (71.8750) lr 8.7467e-04 eta 4:14:23 +epoch [29/50] batch [35/500] time 1.350 (1.386) data 0.000 (0.024) loss 1.5381 (1.1090) acc 71.8750 (72.3214) lr 8.7467e-04 eta 4:13:14 +epoch [29/50] batch [40/500] time 1.365 (1.381) data 0.000 (0.021) loss 1.0928 (1.0658) acc 71.8750 (73.0469) lr 8.7467e-04 eta 4:12:15 +epoch [29/50] batch [45/500] time 1.363 (1.379) data 0.000 (0.019) loss 1.5859 (1.0874) acc 62.5000 (72.6389) lr 8.7467e-04 eta 4:11:44 +epoch [29/50] batch [50/500] time 1.376 (1.376) data 0.000 (0.017) loss 0.7378 (1.0704) acc 75.0000 (73.0000) lr 8.7467e-04 eta 4:11:08 +epoch [29/50] batch [55/500] time 1.366 (1.375) data 0.000 (0.016) loss 1.2402 (1.0988) acc 71.8750 (72.8977) lr 8.7467e-04 eta 4:10:46 +epoch [29/50] batch [60/500] time 1.362 (1.373) data 0.000 (0.014) loss 0.8037 (1.1230) acc 75.0000 (72.5000) lr 8.7467e-04 eta 4:10:15 +epoch [29/50] batch [65/500] time 1.340 (1.371) data 0.000 (0.013) loss 1.6484 (1.1441) acc 59.3750 (72.0673) lr 8.7467e-04 eta 4:09:55 +epoch [29/50] batch [70/500] time 1.369 (1.373) data 0.000 (0.012) loss 1.1162 (1.1439) acc 78.1250 (72.0536) lr 8.7467e-04 eta 4:10:02 +epoch [29/50] batch [75/500] time 1.355 (1.372) data 0.000 (0.012) loss 0.8604 (1.1399) acc 75.0000 (72.3750) lr 8.7467e-04 eta 4:09:44 +epoch [29/50] batch [80/500] time 1.342 (1.371) data 0.000 (0.011) loss 0.6982 (1.1294) acc 81.2500 (72.7344) lr 8.7467e-04 eta 4:09:29 +epoch [29/50] batch [85/500] time 1.354 (1.369) data 0.000 (0.010) loss 0.9814 (1.1353) acc 81.2500 (72.7941) lr 8.7467e-04 eta 4:09:03 +epoch [29/50] batch [90/500] time 1.357 (1.368) data 0.000 (0.010) loss 1.1387 (1.1238) acc 75.0000 (72.9514) lr 8.7467e-04 eta 4:08:47 +epoch [29/50] batch [95/500] time 1.343 (1.367) data 0.000 (0.009) loss 0.9658 (1.1177) acc 71.8750 (73.0592) lr 8.7467e-04 eta 4:08:31 +epoch [29/50] batch [100/500] time 1.352 (1.367) data 0.000 (0.009) loss 1.0049 (1.1074) acc 71.8750 (73.0938) lr 8.7467e-04 eta 4:08:17 +epoch [29/50] batch [105/500] time 1.363 (1.366) data 0.000 (0.008) loss 1.4229 (1.1091) acc 68.7500 (73.0655) lr 8.7467e-04 eta 4:08:03 +epoch [29/50] batch [110/500] time 1.373 (1.366) data 0.000 (0.008) loss 0.7769 (1.1057) acc 75.0000 (73.0966) lr 8.7467e-04 eta 4:07:51 +epoch [29/50] batch [115/500] time 1.365 (1.367) data 0.000 (0.008) loss 1.1523 (1.1055) acc 68.7500 (73.1250) lr 8.7467e-04 eta 4:08:01 +epoch [29/50] batch [120/500] time 1.364 (1.367) data 0.000 (0.007) loss 1.7598 (1.1082) acc 53.1250 (73.0208) lr 8.7467e-04 eta 4:07:55 +epoch [29/50] batch [125/500] time 1.374 (1.367) data 0.000 (0.007) loss 0.5938 (1.0975) acc 81.2500 (73.3500) lr 8.7467e-04 eta 4:07:49 +epoch [29/50] batch [130/500] time 1.375 (1.367) data 0.001 (0.007) loss 1.3867 (1.0966) acc 78.1250 (73.3654) lr 8.7467e-04 eta 4:07:41 +epoch [29/50] batch [135/500] time 1.351 (1.367) data 0.000 (0.007) loss 0.9570 (1.0935) acc 71.8750 (73.3796) lr 8.7467e-04 eta 4:07:29 +epoch [29/50] batch [140/500] time 1.361 (1.366) data 0.001 (0.006) loss 1.0527 (1.0981) acc 68.7500 (73.0580) lr 8.7467e-04 eta 4:07:18 +epoch [29/50] batch [145/500] time 1.374 (1.366) data 0.000 (0.006) loss 0.6357 (1.0947) acc 78.1250 (73.0388) lr 8.7467e-04 eta 4:07:10 +epoch [29/50] batch [150/500] time 1.377 (1.366) data 0.000 (0.006) loss 1.2578 (1.0936) acc 65.6250 (73.0208) lr 8.7467e-04 eta 4:07:04 +epoch [29/50] batch [155/500] time 1.379 (1.366) data 0.000 (0.006) loss 0.7485 (1.0920) acc 78.1250 (73.0242) lr 8.7467e-04 eta 4:06:55 +epoch [29/50] batch [160/500] time 1.369 (1.366) data 0.000 (0.006) loss 1.5195 (1.0983) acc 68.7500 (72.9297) lr 8.7467e-04 eta 4:06:47 +epoch [29/50] batch [165/500] time 1.360 (1.366) data 0.000 (0.005) loss 0.6196 (1.1001) acc 90.6250 (72.9924) lr 8.7467e-04 eta 4:06:37 +epoch [29/50] batch [170/500] time 1.354 (1.365) data 0.001 (0.005) loss 1.4199 (1.0960) acc 62.5000 (72.9044) lr 8.7467e-04 eta 4:06:27 +epoch [29/50] batch [175/500] time 1.341 (1.365) data 0.000 (0.005) loss 0.9009 (1.0976) acc 75.0000 (72.8571) lr 8.7467e-04 eta 4:06:19 +epoch [29/50] batch [180/500] time 1.373 (1.365) data 0.000 (0.005) loss 0.8276 (1.0958) acc 84.3750 (72.8993) lr 8.7467e-04 eta 4:06:10 +epoch [29/50] batch [185/500] time 1.359 (1.365) data 0.000 (0.005) loss 1.3740 (1.0902) acc 62.5000 (72.9561) lr 8.7467e-04 eta 4:06:01 +epoch [29/50] batch [190/500] time 1.370 (1.365) data 0.000 (0.005) loss 1.3301 (1.0896) acc 75.0000 (73.0099) lr 8.7467e-04 eta 4:05:51 +epoch [29/50] batch [195/500] time 1.360 (1.365) data 0.000 (0.005) loss 1.2510 (1.0838) acc 71.8750 (73.0609) lr 8.7467e-04 eta 4:05:43 +epoch [29/50] batch [200/500] time 1.343 (1.364) data 0.000 (0.005) loss 0.7979 (1.0840) acc 84.3750 (73.0469) lr 8.7467e-04 eta 4:05:35 +epoch [29/50] batch [205/500] time 1.358 (1.364) data 0.000 (0.004) loss 1.0898 (1.0838) acc 75.0000 (73.0488) lr 8.7467e-04 eta 4:05:28 +epoch [29/50] batch [210/500] time 1.358 (1.364) data 0.000 (0.004) loss 1.8164 (1.0870) acc 56.2500 (72.9167) lr 8.7467e-04 eta 4:05:19 +epoch [29/50] batch [215/500] time 1.375 (1.365) data 0.000 (0.004) loss 0.7412 (1.0806) acc 75.0000 (72.9797) lr 8.7467e-04 eta 4:05:20 +epoch [29/50] batch [220/500] time 1.363 (1.365) data 0.001 (0.004) loss 0.8813 (1.0841) acc 71.8750 (72.8409) lr 8.7467e-04 eta 4:05:11 +epoch [29/50] batch [225/500] time 1.350 (1.365) data 0.000 (0.004) loss 1.0225 (1.0849) acc 75.0000 (72.8194) lr 8.7467e-04 eta 4:05:03 +epoch [29/50] batch [230/500] time 1.371 (1.364) data 0.000 (0.004) loss 1.0938 (1.0855) acc 75.0000 (72.8261) lr 8.7467e-04 eta 4:04:54 +epoch [29/50] batch [235/500] time 1.356 (1.364) data 0.000 (0.004) loss 0.9570 (1.0844) acc 81.2500 (72.9521) lr 8.7467e-04 eta 4:04:44 +epoch [29/50] batch [240/500] time 1.341 (1.364) data 0.000 (0.004) loss 0.9658 (1.0814) acc 75.0000 (72.9688) lr 8.7467e-04 eta 4:04:35 +epoch [29/50] batch [245/500] time 1.356 (1.364) data 0.000 (0.004) loss 0.6968 (1.0791) acc 75.0000 (72.9719) lr 8.7467e-04 eta 4:04:26 +epoch [29/50] batch [250/500] time 1.367 (1.364) data 0.000 (0.004) loss 0.9751 (1.0798) acc 75.0000 (72.9125) lr 8.7467e-04 eta 4:04:20 +epoch [29/50] batch [255/500] time 1.362 (1.364) data 0.000 (0.004) loss 1.3311 (1.0789) acc 71.8750 (72.9412) lr 8.7467e-04 eta 4:04:11 +epoch [29/50] batch [260/500] time 1.359 (1.364) data 0.000 (0.004) loss 1.1270 (1.0799) acc 81.2500 (72.9808) lr 8.7467e-04 eta 4:04:11 +epoch [29/50] batch [265/500] time 1.370 (1.364) data 0.000 (0.004) loss 0.9419 (1.0814) acc 78.1250 (72.9599) lr 8.7467e-04 eta 4:04:04 +epoch [29/50] batch [270/500] time 1.348 (1.364) data 0.000 (0.004) loss 1.4131 (1.0819) acc 65.6250 (72.8819) lr 8.7467e-04 eta 4:03:55 +epoch [29/50] batch [275/500] time 1.346 (1.364) data 0.000 (0.003) loss 0.6611 (1.0810) acc 75.0000 (72.8750) lr 8.7467e-04 eta 4:03:46 +epoch [29/50] batch [280/500] time 1.367 (1.364) data 0.000 (0.003) loss 0.9019 (1.0792) acc 71.8750 (72.8237) lr 8.7467e-04 eta 4:03:40 +epoch [29/50] batch [285/500] time 1.355 (1.364) data 0.000 (0.003) loss 0.7549 (1.0767) acc 65.6250 (72.8070) lr 8.7467e-04 eta 4:03:33 +epoch [29/50] batch [290/500] time 1.361 (1.364) data 0.000 (0.003) loss 1.0635 (1.0749) acc 75.0000 (72.8448) lr 8.7467e-04 eta 4:03:23 +epoch [29/50] batch [295/500] time 1.347 (1.363) data 0.000 (0.003) loss 0.7969 (1.0752) acc 81.2500 (72.8814) lr 8.7467e-04 eta 4:03:15 +epoch [29/50] batch [300/500] time 1.350 (1.363) data 0.000 (0.003) loss 1.0713 (1.0776) acc 71.8750 (72.8125) lr 8.7467e-04 eta 4:03:07 +epoch [29/50] batch [305/500] time 1.351 (1.363) data 0.000 (0.003) loss 0.9790 (1.0767) acc 81.2500 (72.8586) lr 8.7467e-04 eta 4:02:59 +epoch [29/50] batch [310/500] time 1.352 (1.363) data 0.000 (0.003) loss 1.4053 (1.0788) acc 65.6250 (72.8730) lr 8.7467e-04 eta 4:02:51 +epoch [29/50] batch [315/500] time 1.350 (1.363) data 0.000 (0.003) loss 0.5981 (1.0782) acc 84.3750 (72.9067) lr 8.7467e-04 eta 4:02:42 +epoch [29/50] batch [320/500] time 1.348 (1.363) data 0.000 (0.003) loss 0.9683 (1.0771) acc 71.8750 (72.9199) lr 8.7467e-04 eta 4:02:34 +epoch [29/50] batch [325/500] time 1.363 (1.363) data 0.000 (0.003) loss 0.9131 (1.0776) acc 78.1250 (72.9327) lr 8.7467e-04 eta 4:02:26 +epoch [29/50] batch [330/500] time 1.370 (1.363) data 0.000 (0.003) loss 0.7915 (1.0742) acc 84.3750 (73.0114) lr 8.7467e-04 eta 4:02:20 +epoch [29/50] batch [335/500] time 1.357 (1.363) data 0.000 (0.003) loss 0.6602 (1.0713) acc 84.3750 (73.0037) lr 8.7467e-04 eta 4:02:13 +epoch [29/50] batch [340/500] time 1.340 (1.362) data 0.000 (0.003) loss 1.2812 (1.0740) acc 65.6250 (72.8768) lr 8.7467e-04 eta 4:02:03 +epoch [29/50] batch [345/500] time 1.332 (1.362) data 0.000 (0.003) loss 0.5542 (1.0732) acc 78.1250 (72.9076) lr 8.7467e-04 eta 4:01:55 +epoch [29/50] batch [350/500] time 1.368 (1.362) data 0.000 (0.003) loss 0.8447 (1.0718) acc 75.0000 (72.9196) lr 8.7467e-04 eta 4:01:49 +epoch [29/50] batch [355/500] time 1.491 (1.363) data 0.000 (0.003) loss 0.9683 (1.0725) acc 65.6250 (72.8521) lr 8.7467e-04 eta 4:01:45 +epoch [29/50] batch [360/500] time 1.341 (1.362) data 0.000 (0.003) loss 0.4995 (1.0704) acc 84.3750 (72.8819) lr 8.7467e-04 eta 4:01:36 +epoch [29/50] batch [365/500] time 1.375 (1.362) data 0.000 (0.003) loss 1.1875 (1.0718) acc 68.7500 (72.8596) lr 8.7467e-04 eta 4:01:28 +epoch [29/50] batch [370/500] time 1.345 (1.362) data 0.000 (0.003) loss 0.8501 (1.0735) acc 75.0000 (72.7956) lr 8.7467e-04 eta 4:01:19 +epoch [29/50] batch [375/500] time 1.363 (1.362) data 0.000 (0.003) loss 0.7744 (1.0715) acc 68.7500 (72.8167) lr 8.7467e-04 eta 4:01:11 +epoch [29/50] batch [380/500] time 1.348 (1.362) data 0.000 (0.003) loss 1.1963 (1.0718) acc 71.8750 (72.8043) lr 8.7467e-04 eta 4:01:03 +epoch [29/50] batch [385/500] time 1.359 (1.362) data 0.000 (0.003) loss 1.5039 (1.0735) acc 68.7500 (72.8003) lr 8.7467e-04 eta 4:00:54 +epoch [29/50] batch [390/500] time 1.370 (1.362) data 0.001 (0.003) loss 1.2305 (1.0744) acc 75.0000 (72.8125) lr 8.7467e-04 eta 4:00:46 +epoch [29/50] batch [395/500] time 1.343 (1.362) data 0.000 (0.003) loss 0.8770 (1.0718) acc 71.8750 (72.7927) lr 8.7467e-04 eta 4:00:39 +epoch [29/50] batch [400/500] time 1.336 (1.362) data 0.000 (0.002) loss 1.5420 (1.0765) acc 62.5000 (72.7344) lr 8.7467e-04 eta 4:00:34 +epoch [29/50] batch [405/500] time 1.369 (1.362) data 0.000 (0.002) loss 0.9038 (1.0735) acc 71.8750 (72.7701) lr 8.7467e-04 eta 4:00:27 +epoch [29/50] batch [410/500] time 1.342 (1.362) data 0.000 (0.002) loss 0.8672 (1.0710) acc 75.0000 (72.8277) lr 8.7467e-04 eta 4:00:19 +epoch [29/50] batch [415/500] time 1.351 (1.362) data 0.000 (0.002) loss 1.0469 (1.0711) acc 75.0000 (72.8313) lr 8.7467e-04 eta 4:00:11 +epoch [29/50] batch [420/500] time 1.358 (1.362) data 0.000 (0.002) loss 0.9443 (1.0704) acc 81.2500 (72.8646) lr 8.7467e-04 eta 4:00:05 +epoch [29/50] batch [425/500] time 1.353 (1.362) data 0.000 (0.002) loss 0.6807 (1.0739) acc 84.3750 (72.7721) lr 8.7467e-04 eta 3:59:58 +epoch [29/50] batch [430/500] time 1.363 (1.362) data 0.000 (0.002) loss 0.9854 (1.0742) acc 75.0000 (72.7471) lr 8.7467e-04 eta 3:59:51 +epoch [29/50] batch [435/500] time 1.355 (1.362) data 0.000 (0.002) loss 0.8984 (1.0749) acc 75.0000 (72.7155) lr 8.7467e-04 eta 3:59:44 +epoch [29/50] batch [440/500] time 1.358 (1.361) data 0.000 (0.002) loss 1.6123 (1.0776) acc 65.6250 (72.6989) lr 8.7467e-04 eta 3:59:37 +epoch [29/50] batch [445/500] time 1.351 (1.361) data 0.001 (0.002) loss 0.7344 (1.0751) acc 71.8750 (72.7037) lr 8.7467e-04 eta 3:59:28 +epoch [29/50] batch [450/500] time 1.372 (1.361) data 0.000 (0.002) loss 1.2363 (1.0771) acc 78.1250 (72.7500) lr 8.7467e-04 eta 3:59:21 +epoch [29/50] batch [455/500] time 1.355 (1.361) data 0.000 (0.002) loss 1.0518 (1.0767) acc 65.6250 (72.7266) lr 8.7467e-04 eta 3:59:13 +epoch [29/50] batch [460/500] time 1.395 (1.361) data 0.000 (0.002) loss 1.2285 (1.0736) acc 65.6250 (72.7989) lr 8.7467e-04 eta 3:59:06 +epoch [29/50] batch [465/500] time 1.364 (1.361) data 0.000 (0.002) loss 0.9180 (1.0712) acc 78.1250 (72.8360) lr 8.7467e-04 eta 3:59:00 +epoch [29/50] batch [470/500] time 1.372 (1.361) data 0.000 (0.002) loss 1.2285 (1.0731) acc 65.6250 (72.8125) lr 8.7467e-04 eta 3:58:53 +epoch [29/50] batch [475/500] time 1.353 (1.361) data 0.000 (0.002) loss 1.5117 (1.0755) acc 59.3750 (72.7500) lr 8.7467e-04 eta 3:58:46 +epoch [29/50] batch [480/500] time 1.374 (1.361) data 0.000 (0.002) loss 1.2520 (1.0749) acc 68.7500 (72.7279) lr 8.7467e-04 eta 3:58:39 +epoch [29/50] batch [485/500] time 1.340 (1.361) data 0.001 (0.002) loss 0.8643 (1.0733) acc 81.2500 (72.7706) lr 8.7467e-04 eta 3:58:31 +epoch [29/50] batch [490/500] time 1.365 (1.361) data 0.000 (0.002) loss 1.1670 (1.0726) acc 68.7500 (72.7934) lr 8.7467e-04 eta 3:58:24 +epoch [29/50] batch [495/500] time 1.362 (1.361) data 0.000 (0.002) loss 0.8838 (1.0742) acc 62.5000 (72.7336) lr 8.7467e-04 eta 3:58:17 +epoch [29/50] batch [500/500] time 1.341 (1.361) data 0.000 (0.002) loss 1.0947 (1.0728) acc 68.7500 (72.7313) lr 8.1262e-04 eta 3:58:11 +epoch [30/50] batch [5/500] time 1.348 (1.525) data 0.000 (0.161) loss 0.7637 (0.9962) acc 90.6250 (75.0000) lr 8.1262e-04 eta 4:26:45 +epoch [30/50] batch [10/500] time 1.361 (1.444) data 0.000 (0.081) loss 1.3389 (1.0848) acc 68.7500 (73.4375) lr 8.1262e-04 eta 4:12:26 +epoch [30/50] batch [15/500] time 1.362 (1.415) data 0.000 (0.054) loss 1.3584 (1.0495) acc 59.3750 (73.5417) lr 8.1262e-04 eta 4:07:19 +epoch [30/50] batch [20/500] time 1.361 (1.403) data 0.000 (0.041) loss 0.8320 (1.0568) acc 81.2500 (74.0625) lr 8.1262e-04 eta 4:05:01 +epoch [30/50] batch [25/500] time 1.361 (1.395) data 0.000 (0.033) loss 0.7725 (1.0000) acc 75.0000 (74.7500) lr 8.1262e-04 eta 4:03:28 +epoch [30/50] batch [30/500] time 1.345 (1.386) data 0.000 (0.027) loss 1.1514 (1.0157) acc 75.0000 (74.6875) lr 8.1262e-04 eta 4:01:56 +epoch [30/50] batch [35/500] time 1.355 (1.382) data 0.000 (0.023) loss 1.3438 (1.0352) acc 65.6250 (74.1071) lr 8.1262e-04 eta 4:01:04 +epoch [30/50] batch [40/500] time 1.348 (1.378) data 0.000 (0.020) loss 1.2529 (1.0365) acc 62.5000 (74.2188) lr 8.1262e-04 eta 4:00:10 +epoch [30/50] batch [45/500] time 1.348 (1.376) data 0.000 (0.018) loss 1.2275 (1.0396) acc 75.0000 (74.5139) lr 8.1262e-04 eta 3:59:41 +epoch [30/50] batch [50/500] time 1.363 (1.373) data 0.000 (0.016) loss 0.3538 (1.0430) acc 87.5000 (74.5000) lr 8.1262e-04 eta 3:59:12 +epoch [30/50] batch [55/500] time 1.349 (1.372) data 0.000 (0.015) loss 1.1885 (1.0579) acc 75.0000 (74.2045) lr 8.1262e-04 eta 3:58:47 +epoch [30/50] batch [60/500] time 1.345 (1.374) data 0.001 (0.014) loss 0.6680 (1.0466) acc 84.3750 (74.3750) lr 8.1262e-04 eta 3:59:03 +epoch [30/50] batch [65/500] time 1.370 (1.373) data 0.000 (0.013) loss 1.0176 (1.0474) acc 68.7500 (73.9904) lr 8.1262e-04 eta 3:58:51 +epoch [30/50] batch [70/500] time 1.373 (1.373) data 0.000 (0.012) loss 0.7925 (1.0492) acc 71.8750 (74.1071) lr 8.1262e-04 eta 3:58:39 +epoch [30/50] batch [75/500] time 1.383 (1.372) data 0.000 (0.011) loss 1.0342 (1.0525) acc 84.3750 (74.2083) lr 8.1262e-04 eta 3:58:24 +epoch [30/50] batch [80/500] time 1.344 (1.371) data 0.000 (0.010) loss 0.6768 (1.0488) acc 78.1250 (74.2188) lr 8.1262e-04 eta 3:58:07 +epoch [30/50] batch [85/500] time 1.352 (1.370) data 0.000 (0.010) loss 1.8604 (1.0646) acc 59.3750 (73.8603) lr 8.1262e-04 eta 3:57:50 +epoch [30/50] batch [90/500] time 1.350 (1.369) data 0.000 (0.009) loss 1.8311 (1.0711) acc 59.3750 (73.7153) lr 8.1262e-04 eta 3:57:35 +epoch [30/50] batch [95/500] time 1.329 (1.369) data 0.000 (0.009) loss 1.3281 (1.0776) acc 68.7500 (73.7171) lr 8.1262e-04 eta 3:57:23 +epoch [30/50] batch [100/500] time 1.360 (1.368) data 0.000 (0.008) loss 1.0586 (1.0807) acc 71.8750 (73.6875) lr 8.1262e-04 eta 3:57:09 +epoch [30/50] batch [105/500] time 1.355 (1.370) data 0.000 (0.008) loss 1.1016 (1.0838) acc 81.2500 (73.6012) lr 8.1262e-04 eta 3:57:16 +epoch [30/50] batch [110/500] time 1.365 (1.369) data 0.000 (0.008) loss 1.2480 (1.0869) acc 75.0000 (73.5511) lr 8.1262e-04 eta 3:57:08 +epoch [30/50] batch [115/500] time 1.364 (1.369) data 0.000 (0.007) loss 1.2158 (1.0883) acc 62.5000 (73.4783) lr 8.1262e-04 eta 3:56:57 +epoch [30/50] batch [120/500] time 1.350 (1.369) data 0.000 (0.007) loss 0.9868 (1.0795) acc 75.0000 (73.6458) lr 8.1262e-04 eta 3:56:46 +epoch [30/50] batch [125/500] time 1.368 (1.369) data 0.000 (0.007) loss 1.0791 (1.0848) acc 65.6250 (73.4000) lr 8.1262e-04 eta 3:56:41 +epoch [30/50] batch [130/500] time 1.362 (1.368) data 0.000 (0.007) loss 0.7856 (1.0789) acc 78.1250 (73.4375) lr 8.1262e-04 eta 3:56:28 +epoch [30/50] batch [135/500] time 1.352 (1.368) data 0.000 (0.006) loss 0.9331 (1.0750) acc 75.0000 (73.5648) lr 8.1262e-04 eta 3:56:16 +epoch [30/50] batch [140/500] time 1.346 (1.367) data 0.000 (0.006) loss 0.9478 (1.0729) acc 81.2500 (73.6161) lr 8.1262e-04 eta 3:56:01 +epoch [30/50] batch [145/500] time 1.358 (1.366) data 0.000 (0.006) loss 0.6694 (1.0671) acc 81.2500 (73.5776) lr 8.1262e-04 eta 3:55:47 +epoch [30/50] batch [150/500] time 1.372 (1.366) data 0.000 (0.006) loss 1.6934 (1.0768) acc 65.6250 (73.4375) lr 8.1262e-04 eta 3:55:39 +epoch [30/50] batch [155/500] time 1.375 (1.366) data 0.000 (0.006) loss 0.9365 (1.0743) acc 78.1250 (73.3871) lr 8.1262e-04 eta 3:55:32 +epoch [30/50] batch [160/500] time 1.375 (1.366) data 0.001 (0.005) loss 1.0127 (1.0810) acc 65.6250 (73.2812) lr 8.1262e-04 eta 3:55:22 +epoch [30/50] batch [165/500] time 1.362 (1.366) data 0.000 (0.005) loss 0.8203 (1.0764) acc 81.2500 (73.3712) lr 8.1262e-04 eta 3:55:13 +epoch [30/50] batch [170/500] time 1.379 (1.366) data 0.000 (0.005) loss 0.9482 (1.0755) acc 78.1250 (73.4375) lr 8.1262e-04 eta 3:55:09 +epoch [30/50] batch [175/500] time 1.369 (1.366) data 0.000 (0.005) loss 1.4590 (1.0761) acc 71.8750 (73.4643) lr 8.1262e-04 eta 3:54:59 +epoch [30/50] batch [180/500] time 1.372 (1.366) data 0.000 (0.005) loss 1.2168 (1.0720) acc 68.7500 (73.5069) lr 8.1262e-04 eta 3:54:51 +epoch [30/50] batch [185/500] time 1.351 (1.365) data 0.000 (0.005) loss 0.8486 (1.0691) acc 71.8750 (73.5473) lr 8.1262e-04 eta 3:54:42 +epoch [30/50] batch [190/500] time 1.356 (1.365) data 0.000 (0.005) loss 0.9307 (1.0668) acc 81.2500 (73.5691) lr 8.1262e-04 eta 3:54:34 +epoch [30/50] batch [195/500] time 1.354 (1.365) data 0.000 (0.004) loss 1.1504 (1.0695) acc 65.6250 (73.5256) lr 8.1262e-04 eta 3:54:26 +epoch [30/50] batch [200/500] time 1.342 (1.365) data 0.000 (0.004) loss 1.1533 (1.0680) acc 75.0000 (73.5469) lr 8.1262e-04 eta 3:54:15 +epoch [30/50] batch [205/500] time 1.375 (1.365) data 0.000 (0.004) loss 0.6650 (1.0650) acc 84.3750 (73.6738) lr 8.1262e-04 eta 3:54:14 +epoch [30/50] batch [210/500] time 1.354 (1.365) data 0.000 (0.004) loss 1.1113 (1.0658) acc 78.1250 (73.6756) lr 8.1262e-04 eta 3:54:05 +epoch [30/50] batch [215/500] time 1.365 (1.365) data 0.000 (0.004) loss 1.5986 (1.0686) acc 68.7500 (73.5756) lr 8.1262e-04 eta 3:53:55 +epoch [30/50] batch [220/500] time 1.356 (1.364) data 0.000 (0.004) loss 1.0107 (1.0665) acc 78.1250 (73.5653) lr 8.1262e-04 eta 3:53:46 +epoch [30/50] batch [225/500] time 1.368 (1.364) data 0.000 (0.004) loss 1.1787 (1.0655) acc 62.5000 (73.5556) lr 8.1262e-04 eta 3:53:38 +epoch [30/50] batch [230/500] time 1.363 (1.364) data 0.000 (0.004) loss 1.7969 (1.0640) acc 59.3750 (73.6549) lr 8.1262e-04 eta 3:53:27 +epoch [30/50] batch [235/500] time 1.348 (1.364) data 0.000 (0.004) loss 0.8716 (1.0618) acc 71.8750 (73.6037) lr 8.1262e-04 eta 3:53:17 +epoch [30/50] batch [240/500] time 1.343 (1.363) data 0.001 (0.004) loss 1.1670 (1.0605) acc 68.7500 (73.5677) lr 8.1262e-04 eta 3:53:08 +epoch [30/50] batch [245/500] time 1.469 (1.364) data 0.000 (0.004) loss 0.4651 (1.0571) acc 84.3750 (73.6352) lr 8.1262e-04 eta 3:53:05 +epoch [30/50] batch [250/500] time 1.354 (1.363) data 0.000 (0.004) loss 0.8242 (1.0575) acc 78.1250 (73.6250) lr 8.1262e-04 eta 3:52:54 +epoch [30/50] batch [255/500] time 1.353 (1.363) data 0.000 (0.004) loss 0.7173 (1.0569) acc 75.0000 (73.5049) lr 8.1262e-04 eta 3:52:46 +epoch [30/50] batch [260/500] time 1.371 (1.363) data 0.000 (0.003) loss 1.5312 (1.0590) acc 62.5000 (73.4976) lr 8.1262e-04 eta 3:52:38 +epoch [30/50] batch [265/500] time 1.350 (1.363) data 0.000 (0.003) loss 1.2051 (1.0630) acc 68.7500 (73.3491) lr 8.1262e-04 eta 3:52:30 +epoch [30/50] batch [270/500] time 1.342 (1.363) data 0.000 (0.003) loss 1.2998 (1.0625) acc 65.6250 (73.3681) lr 8.1262e-04 eta 3:52:20 +epoch [30/50] batch [275/500] time 1.373 (1.363) data 0.000 (0.003) loss 1.8154 (1.0653) acc 56.2500 (73.3636) lr 8.1262e-04 eta 3:52:14 +epoch [30/50] batch [280/500] time 1.363 (1.363) data 0.000 (0.003) loss 1.1006 (1.0685) acc 78.1250 (73.3259) lr 8.1262e-04 eta 3:52:08 +epoch [30/50] batch [285/500] time 1.350 (1.363) data 0.000 (0.003) loss 1.3125 (1.0716) acc 65.6250 (73.2237) lr 8.1262e-04 eta 3:52:02 +epoch [30/50] batch [290/500] time 1.360 (1.363) data 0.000 (0.003) loss 0.7896 (1.0715) acc 81.2500 (73.2328) lr 8.1262e-04 eta 3:51:55 +epoch [30/50] batch [295/500] time 1.347 (1.363) data 0.000 (0.003) loss 0.8257 (1.0693) acc 78.1250 (73.2945) lr 8.1262e-04 eta 3:51:45 +epoch [30/50] batch [300/500] time 1.362 (1.363) data 0.000 (0.003) loss 0.9736 (1.0693) acc 71.8750 (73.2292) lr 8.1262e-04 eta 3:51:39 +epoch [30/50] batch [305/500] time 1.369 (1.363) data 0.000 (0.003) loss 0.7388 (1.0669) acc 81.2500 (73.2684) lr 8.1262e-04 eta 3:51:33 +epoch [30/50] batch [310/500] time 1.388 (1.363) data 0.000 (0.003) loss 0.7949 (1.0676) acc 78.1250 (73.2157) lr 8.1262e-04 eta 3:51:28 +epoch [30/50] batch [315/500] time 1.354 (1.363) data 0.000 (0.003) loss 1.0479 (1.0677) acc 71.8750 (73.1746) lr 8.1262e-04 eta 3:51:21 +epoch [30/50] batch [320/500] time 1.361 (1.363) data 0.000 (0.003) loss 0.6035 (1.0674) acc 84.3750 (73.1543) lr 8.1262e-04 eta 3:51:14 +epoch [30/50] batch [325/500] time 1.365 (1.363) data 0.001 (0.003) loss 0.9810 (1.0679) acc 71.8750 (73.0962) lr 8.1262e-04 eta 3:51:08 +epoch [30/50] batch [330/500] time 1.378 (1.363) data 0.000 (0.003) loss 1.3984 (1.0655) acc 75.0000 (73.1818) lr 8.1262e-04 eta 3:51:02 +epoch [30/50] batch [335/500] time 1.371 (1.363) data 0.000 (0.003) loss 1.4395 (1.0662) acc 71.8750 (73.1530) lr 8.1262e-04 eta 3:50:56 +epoch [30/50] batch [340/500] time 1.381 (1.363) data 0.000 (0.003) loss 1.1680 (1.0684) acc 68.7500 (73.1158) lr 8.1262e-04 eta 3:50:50 +epoch [30/50] batch [345/500] time 1.372 (1.364) data 0.000 (0.003) loss 1.0205 (1.0671) acc 65.6250 (73.1522) lr 8.1262e-04 eta 3:50:47 +epoch [30/50] batch [350/500] time 1.362 (1.364) data 0.001 (0.003) loss 1.4121 (1.0682) acc 68.7500 (73.1429) lr 8.1262e-04 eta 3:50:41 +epoch [30/50] batch [355/500] time 1.343 (1.364) data 0.000 (0.003) loss 1.0137 (1.0659) acc 81.2500 (73.2130) lr 8.1262e-04 eta 3:50:33 +epoch [30/50] batch [360/500] time 1.347 (1.364) data 0.001 (0.003) loss 1.0742 (1.0666) acc 78.1250 (73.2031) lr 8.1262e-04 eta 3:50:26 +epoch [30/50] batch [365/500] time 1.362 (1.364) data 0.000 (0.003) loss 0.3977 (1.0627) acc 87.5000 (73.2620) lr 8.1262e-04 eta 3:50:19 +epoch [30/50] batch [370/500] time 1.365 (1.364) data 0.000 (0.003) loss 0.6670 (1.0620) acc 81.2500 (73.2264) lr 8.1262e-04 eta 3:50:13 +epoch [30/50] batch [375/500] time 1.347 (1.364) data 0.000 (0.003) loss 1.0674 (1.0634) acc 71.8750 (73.1500) lr 8.1262e-04 eta 3:50:06 +epoch [30/50] batch [380/500] time 1.374 (1.363) data 0.000 (0.002) loss 1.5469 (1.0661) acc 62.5000 (73.1168) lr 8.1262e-04 eta 3:49:58 +epoch [30/50] batch [385/500] time 1.377 (1.364) data 0.001 (0.002) loss 1.2285 (1.0678) acc 68.7500 (73.0682) lr 8.1262e-04 eta 3:49:51 +epoch [30/50] batch [390/500] time 1.350 (1.364) data 0.000 (0.002) loss 0.5615 (1.0667) acc 81.2500 (73.0849) lr 8.1262e-04 eta 3:49:48 +epoch [30/50] batch [395/500] time 1.356 (1.364) data 0.000 (0.002) loss 1.4346 (1.0691) acc 59.3750 (73.0063) lr 8.1262e-04 eta 3:49:40 +epoch [30/50] batch [400/500] time 1.378 (1.364) data 0.000 (0.002) loss 1.8516 (1.0726) acc 65.6250 (73.0000) lr 8.1262e-04 eta 3:49:33 +epoch [30/50] batch [405/500] time 1.362 (1.364) data 0.000 (0.002) loss 1.3301 (1.0742) acc 75.0000 (73.0015) lr 8.1262e-04 eta 3:49:26 +epoch [30/50] batch [410/500] time 1.348 (1.364) data 0.000 (0.002) loss 1.0664 (1.0741) acc 71.8750 (73.0030) lr 8.1262e-04 eta 3:49:20 +epoch [30/50] batch [415/500] time 1.354 (1.364) data 0.000 (0.002) loss 0.6938 (1.0715) acc 75.0000 (73.0271) lr 8.1262e-04 eta 3:49:13 +epoch [30/50] batch [420/500] time 1.373 (1.364) data 0.000 (0.002) loss 1.1162 (1.0722) acc 68.7500 (72.9762) lr 8.1262e-04 eta 3:49:06 +epoch [30/50] batch [425/500] time 1.364 (1.364) data 0.000 (0.002) loss 0.6914 (1.0744) acc 87.5000 (72.9926) lr 8.1262e-04 eta 3:49:00 +epoch [30/50] batch [430/500] time 1.364 (1.364) data 0.001 (0.002) loss 0.7139 (1.0726) acc 78.1250 (73.0305) lr 8.1262e-04 eta 3:48:53 +epoch [30/50] batch [435/500] time 1.360 (1.364) data 0.000 (0.002) loss 0.8911 (1.0727) acc 78.1250 (73.0963) lr 8.1262e-04 eta 3:48:45 +epoch [30/50] batch [440/500] time 1.368 (1.364) data 0.000 (0.002) loss 0.8896 (1.0734) acc 71.8750 (73.0682) lr 8.1262e-04 eta 3:48:39 +epoch [30/50] batch [445/500] time 1.370 (1.364) data 0.000 (0.002) loss 1.0225 (1.0727) acc 71.8750 (73.0688) lr 8.1262e-04 eta 3:48:32 +epoch [30/50] batch [450/500] time 1.361 (1.364) data 0.000 (0.002) loss 1.1855 (1.0715) acc 78.1250 (73.1319) lr 8.1262e-04 eta 3:48:25 +epoch [30/50] batch [455/500] time 1.360 (1.364) data 0.000 (0.002) loss 0.7793 (1.0707) acc 75.0000 (73.1113) lr 8.1262e-04 eta 3:48:18 +epoch [30/50] batch [460/500] time 1.359 (1.364) data 0.000 (0.002) loss 0.9463 (1.0693) acc 75.0000 (73.1182) lr 8.1262e-04 eta 3:48:11 +epoch [30/50] batch [465/500] time 1.393 (1.364) data 0.000 (0.002) loss 1.1260 (1.0702) acc 68.7500 (73.0712) lr 8.1262e-04 eta 3:48:06 +epoch [30/50] batch [470/500] time 1.368 (1.364) data 0.000 (0.002) loss 1.3359 (1.0691) acc 65.6250 (73.1117) lr 8.1262e-04 eta 3:48:00 +epoch [30/50] batch [475/500] time 1.365 (1.364) data 0.000 (0.002) loss 1.6367 (1.0702) acc 59.3750 (73.0658) lr 8.1262e-04 eta 3:47:54 +epoch [30/50] batch [480/500] time 1.352 (1.364) data 0.000 (0.002) loss 1.0293 (1.0698) acc 65.6250 (73.0794) lr 8.1262e-04 eta 3:47:47 +epoch [30/50] batch [485/500] time 1.343 (1.364) data 0.001 (0.002) loss 1.2129 (1.0683) acc 59.3750 (73.1250) lr 8.1262e-04 eta 3:47:39 +epoch [30/50] batch [490/500] time 1.356 (1.364) data 0.000 (0.002) loss 1.0996 (1.0713) acc 75.0000 (73.0102) lr 8.1262e-04 eta 3:47:35 +epoch [30/50] batch [495/500] time 1.352 (1.364) data 0.000 (0.002) loss 0.6094 (1.0722) acc 84.3750 (72.9924) lr 8.1262e-04 eta 3:47:27 +epoch [30/50] batch [500/500] time 1.378 (1.364) data 0.000 (0.002) loss 1.4238 (1.0735) acc 75.0000 (72.9313) lr 7.5131e-04 eta 3:47:20 +epoch [31/50] batch [5/500] time 1.377 (1.532) data 0.000 (0.166) loss 1.7695 (1.1137) acc 68.7500 (73.1250) lr 7.5131e-04 eta 4:15:08 +epoch [31/50] batch [10/500] time 1.345 (1.448) data 0.000 (0.083) loss 1.0205 (1.0380) acc 71.8750 (74.0625) lr 7.5131e-04 eta 4:01:06 +epoch [31/50] batch [15/500] time 1.365 (1.417) data 0.000 (0.056) loss 1.0957 (1.1368) acc 75.0000 (72.5000) lr 7.5131e-04 eta 3:55:51 +epoch [31/50] batch [20/500] time 1.353 (1.400) data 0.000 (0.042) loss 0.7007 (1.1699) acc 78.1250 (71.2500) lr 7.5131e-04 eta 3:52:47 +epoch [31/50] batch [25/500] time 1.357 (1.390) data 0.000 (0.034) loss 0.6411 (1.1616) acc 78.1250 (71.0000) lr 7.5131e-04 eta 3:51:08 +epoch [31/50] batch [30/500] time 1.365 (1.386) data 0.000 (0.028) loss 0.8252 (1.1160) acc 81.2500 (72.8125) lr 7.5131e-04 eta 3:50:18 +epoch [31/50] batch [35/500] time 1.333 (1.381) data 0.000 (0.024) loss 0.9204 (1.1298) acc 78.1250 (72.4107) lr 7.5131e-04 eta 3:49:26 +epoch [31/50] batch [40/500] time 1.363 (1.379) data 0.000 (0.021) loss 1.0459 (1.1169) acc 68.7500 (72.5781) lr 7.5131e-04 eta 3:48:54 +epoch [31/50] batch [45/500] time 1.382 (1.381) data 0.000 (0.019) loss 0.6045 (1.1115) acc 75.0000 (72.4306) lr 7.5131e-04 eta 3:49:08 +epoch [31/50] batch [50/500] time 1.363 (1.379) data 0.000 (0.017) loss 1.4561 (1.1101) acc 65.6250 (72.2500) lr 7.5131e-04 eta 3:48:43 +epoch [31/50] batch [55/500] time 1.354 (1.378) data 0.000 (0.015) loss 1.0928 (1.1070) acc 75.0000 (72.3864) lr 7.5131e-04 eta 3:48:20 +epoch [31/50] batch [60/500] time 1.376 (1.377) data 0.000 (0.014) loss 0.9321 (1.0947) acc 75.0000 (72.6562) lr 7.5131e-04 eta 3:48:03 +epoch [31/50] batch [65/500] time 1.339 (1.374) data 0.000 (0.013) loss 1.1416 (1.0988) acc 71.8750 (72.7404) lr 7.5131e-04 eta 3:47:34 +epoch [31/50] batch [70/500] time 1.344 (1.372) data 0.000 (0.012) loss 0.4670 (1.0833) acc 81.2500 (72.7679) lr 7.5131e-04 eta 3:47:08 +epoch [31/50] batch [75/500] time 1.346 (1.372) data 0.000 (0.011) loss 0.8101 (1.0939) acc 75.0000 (72.4583) lr 7.5131e-04 eta 3:46:54 +epoch [31/50] batch [80/500] time 1.364 (1.371) data 0.000 (0.011) loss 1.2979 (1.0973) acc 62.5000 (72.1094) lr 7.5131e-04 eta 3:46:37 +epoch [31/50] batch [85/500] time 1.481 (1.371) data 0.000 (0.010) loss 1.5830 (1.1091) acc 59.3750 (71.9118) lr 7.5131e-04 eta 3:46:37 +epoch [31/50] batch [90/500] time 1.369 (1.370) data 0.000 (0.010) loss 0.7969 (1.1096) acc 78.1250 (71.9097) lr 7.5131e-04 eta 3:46:20 +epoch [31/50] batch [95/500] time 1.377 (1.370) data 0.000 (0.009) loss 0.6724 (1.1043) acc 75.0000 (71.8421) lr 7.5131e-04 eta 3:46:07 +epoch [31/50] batch [100/500] time 1.350 (1.369) data 0.000 (0.009) loss 1.0479 (1.1000) acc 71.8750 (71.9062) lr 7.5131e-04 eta 3:45:56 +epoch [31/50] batch [105/500] time 1.368 (1.369) data 0.000 (0.008) loss 0.8770 (1.0944) acc 81.2500 (71.9345) lr 7.5131e-04 eta 3:45:42 +epoch [31/50] batch [110/500] time 1.363 (1.369) data 0.000 (0.008) loss 1.0635 (1.0911) acc 75.0000 (72.0739) lr 7.5131e-04 eta 3:45:34 +epoch [31/50] batch [115/500] time 1.366 (1.369) data 0.000 (0.008) loss 0.8804 (1.0960) acc 81.2500 (72.1196) lr 7.5131e-04 eta 3:45:28 +epoch [31/50] batch [120/500] time 1.386 (1.369) data 0.000 (0.007) loss 1.1191 (1.1027) acc 78.1250 (72.0052) lr 7.5131e-04 eta 3:45:23 +epoch [31/50] batch [125/500] time 1.357 (1.369) data 0.000 (0.007) loss 1.1182 (1.0943) acc 75.0000 (72.1750) lr 7.5131e-04 eta 3:45:14 +epoch [31/50] batch [130/500] time 1.366 (1.368) data 0.000 (0.007) loss 0.9800 (1.0933) acc 71.8750 (72.2356) lr 7.5131e-04 eta 3:45:03 +epoch [31/50] batch [135/500] time 1.369 (1.368) data 0.000 (0.006) loss 0.7446 (1.0958) acc 81.2500 (72.1991) lr 7.5131e-04 eta 3:44:57 +epoch [31/50] batch [140/500] time 1.375 (1.368) data 0.000 (0.006) loss 1.2324 (1.1008) acc 71.8750 (71.9866) lr 7.5131e-04 eta 3:44:45 +epoch [31/50] batch [145/500] time 1.345 (1.367) data 0.000 (0.006) loss 1.5664 (1.1005) acc 65.6250 (72.0905) lr 7.5131e-04 eta 3:44:33 +epoch [31/50] batch [150/500] time 1.364 (1.367) data 0.000 (0.006) loss 0.8438 (1.0999) acc 81.2500 (72.2292) lr 7.5131e-04 eta 3:44:25 +epoch [31/50] batch [155/500] time 1.356 (1.367) data 0.000 (0.006) loss 0.6797 (1.0961) acc 84.3750 (72.2379) lr 7.5131e-04 eta 3:44:16 +epoch [31/50] batch [160/500] time 1.357 (1.367) data 0.000 (0.006) loss 1.5576 (1.0989) acc 65.6250 (72.1094) lr 7.5131e-04 eta 3:44:07 +epoch [31/50] batch [165/500] time 1.388 (1.367) data 0.000 (0.005) loss 0.9253 (1.0916) acc 68.7500 (72.2917) lr 7.5131e-04 eta 3:44:01 +epoch [31/50] batch [170/500] time 1.357 (1.366) data 0.001 (0.005) loss 0.8154 (1.0841) acc 78.1250 (72.3897) lr 7.5131e-04 eta 3:43:52 +epoch [31/50] batch [175/500] time 1.355 (1.366) data 0.000 (0.005) loss 1.5166 (1.0832) acc 62.5000 (72.4643) lr 7.5131e-04 eta 3:43:42 +epoch [31/50] batch [180/500] time 1.356 (1.366) data 0.000 (0.005) loss 1.4883 (1.0879) acc 68.7500 (72.4653) lr 7.5131e-04 eta 3:43:33 +epoch [31/50] batch [185/500] time 1.376 (1.367) data 0.000 (0.005) loss 1.6172 (1.0914) acc 56.2500 (72.3480) lr 7.5131e-04 eta 3:43:32 +epoch [31/50] batch [190/500] time 1.349 (1.366) data 0.001 (0.005) loss 1.0225 (1.0890) acc 68.7500 (72.3520) lr 7.5131e-04 eta 3:43:24 +epoch [31/50] batch [195/500] time 1.377 (1.367) data 0.000 (0.005) loss 1.0635 (1.0945) acc 68.7500 (72.2756) lr 7.5131e-04 eta 3:43:18 +epoch [31/50] batch [200/500] time 1.372 (1.367) data 0.001 (0.004) loss 1.5127 (1.0973) acc 65.6250 (72.2500) lr 7.5131e-04 eta 3:43:12 +epoch [31/50] batch [205/500] time 1.388 (1.367) data 0.000 (0.004) loss 1.1973 (1.0989) acc 71.8750 (72.3171) lr 7.5131e-04 eta 3:43:09 +epoch [31/50] batch [210/500] time 1.368 (1.367) data 0.001 (0.004) loss 0.9551 (1.0996) acc 65.6250 (72.3363) lr 7.5131e-04 eta 3:43:02 +epoch [31/50] batch [215/500] time 1.360 (1.367) data 0.000 (0.004) loss 1.2285 (1.1014) acc 68.7500 (72.3401) lr 7.5131e-04 eta 3:42:57 +epoch [31/50] batch [220/500] time 1.373 (1.367) data 0.000 (0.004) loss 0.8379 (1.1003) acc 81.2500 (72.3722) lr 7.5131e-04 eta 3:42:49 +epoch [31/50] batch [225/500] time 1.376 (1.367) data 0.000 (0.004) loss 0.8662 (1.0972) acc 75.0000 (72.4167) lr 7.5131e-04 eta 3:42:42 +epoch [31/50] batch [230/500] time 1.361 (1.368) data 0.000 (0.004) loss 1.2246 (1.0974) acc 56.2500 (72.3913) lr 7.5131e-04 eta 3:42:40 +epoch [31/50] batch [235/500] time 1.366 (1.367) data 0.000 (0.004) loss 1.1094 (1.0952) acc 68.7500 (72.4734) lr 7.5131e-04 eta 3:42:30 +epoch [31/50] batch [240/500] time 1.372 (1.367) data 0.000 (0.004) loss 2.0410 (1.0967) acc 37.5000 (72.3307) lr 7.5131e-04 eta 3:42:21 +epoch [31/50] batch [245/500] time 1.342 (1.367) data 0.000 (0.004) loss 0.8247 (1.1028) acc 75.0000 (72.2832) lr 7.5131e-04 eta 3:42:12 +epoch [31/50] batch [250/500] time 1.368 (1.367) data 0.000 (0.004) loss 0.9253 (1.1018) acc 71.8750 (72.3250) lr 7.5131e-04 eta 3:42:05 +epoch [31/50] batch [255/500] time 1.343 (1.366) data 0.000 (0.004) loss 0.7241 (1.1051) acc 81.2500 (72.2794) lr 7.5131e-04 eta 3:41:56 +epoch [31/50] batch [260/500] time 1.388 (1.366) data 0.001 (0.004) loss 1.3447 (1.1066) acc 65.6250 (72.2957) lr 7.5131e-04 eta 3:41:49 +epoch [31/50] batch [265/500] time 1.361 (1.366) data 0.000 (0.003) loss 1.3779 (1.1107) acc 71.8750 (72.2524) lr 7.5131e-04 eta 3:41:41 +epoch [31/50] batch [270/500] time 1.358 (1.366) data 0.000 (0.003) loss 1.0830 (1.1108) acc 75.0000 (72.2569) lr 7.5131e-04 eta 3:41:34 +epoch [31/50] batch [275/500] time 1.360 (1.366) data 0.000 (0.003) loss 0.8584 (1.1097) acc 75.0000 (72.2273) lr 7.5131e-04 eta 3:41:26 +epoch [31/50] batch [280/500] time 1.351 (1.366) data 0.000 (0.003) loss 0.8003 (1.1090) acc 81.2500 (72.2210) lr 7.5131e-04 eta 3:41:19 +epoch [31/50] batch [285/500] time 1.371 (1.366) data 0.000 (0.003) loss 1.3477 (1.1125) acc 68.7500 (72.0943) lr 7.5131e-04 eta 3:41:11 +epoch [31/50] batch [290/500] time 1.356 (1.366) data 0.000 (0.003) loss 1.0801 (1.1146) acc 75.0000 (72.0366) lr 7.5131e-04 eta 3:41:03 +epoch [31/50] batch [295/500] time 1.354 (1.366) data 0.000 (0.003) loss 1.2559 (1.1147) acc 68.7500 (72.0869) lr 7.5131e-04 eta 3:40:56 +epoch [31/50] batch [300/500] time 1.347 (1.366) data 0.000 (0.003) loss 1.0479 (1.1121) acc 75.0000 (72.1667) lr 7.5131e-04 eta 3:40:48 +epoch [31/50] batch [305/500] time 1.379 (1.366) data 0.000 (0.003) loss 0.5298 (1.1092) acc 90.6250 (72.2234) lr 7.5131e-04 eta 3:40:41 +epoch [31/50] batch [310/500] time 1.362 (1.366) data 0.000 (0.003) loss 1.4336 (1.1144) acc 53.1250 (71.9859) lr 7.5131e-04 eta 3:40:34 +epoch [31/50] batch [315/500] time 1.388 (1.366) data 0.000 (0.003) loss 1.1113 (1.1145) acc 65.6250 (72.0139) lr 7.5131e-04 eta 3:40:29 +epoch [31/50] batch [320/500] time 1.357 (1.366) data 0.000 (0.003) loss 1.6348 (1.1131) acc 59.3750 (71.9922) lr 7.5131e-04 eta 3:40:22 +epoch [31/50] batch [325/500] time 1.488 (1.366) data 0.000 (0.003) loss 0.8340 (1.1070) acc 75.0000 (72.1250) lr 7.5131e-04 eta 3:40:18 +epoch [31/50] batch [330/500] time 1.376 (1.366) data 0.000 (0.003) loss 0.8428 (1.1045) acc 75.0000 (72.1875) lr 7.5131e-04 eta 3:40:10 +epoch [31/50] batch [335/500] time 1.333 (1.366) data 0.000 (0.003) loss 1.4639 (1.1031) acc 68.7500 (72.2015) lr 7.5131e-04 eta 3:40:01 +epoch [31/50] batch [340/500] time 1.360 (1.366) data 0.000 (0.003) loss 1.3525 (1.1086) acc 68.7500 (72.0956) lr 7.5131e-04 eta 3:39:55 +epoch [31/50] batch [345/500] time 1.339 (1.366) data 0.000 (0.003) loss 0.9136 (1.1075) acc 84.3750 (72.1739) lr 7.5131e-04 eta 3:39:47 +epoch [31/50] batch [350/500] time 1.361 (1.366) data 0.000 (0.003) loss 1.9307 (1.1108) acc 65.6250 (72.0804) lr 7.5131e-04 eta 3:39:41 +epoch [31/50] batch [355/500] time 1.384 (1.366) data 0.000 (0.003) loss 0.9321 (1.1095) acc 71.8750 (72.0511) lr 7.5131e-04 eta 3:39:34 +epoch [31/50] batch [360/500] time 1.369 (1.366) data 0.000 (0.003) loss 1.5850 (1.1101) acc 65.6250 (72.0573) lr 7.5131e-04 eta 3:39:27 +epoch [31/50] batch [365/500] time 1.359 (1.366) data 0.000 (0.003) loss 0.9365 (1.1121) acc 75.0000 (72.0120) lr 7.5131e-04 eta 3:39:20 +epoch [31/50] batch [370/500] time 1.357 (1.366) data 0.000 (0.003) loss 1.3750 (1.1131) acc 68.7500 (72.0355) lr 7.5131e-04 eta 3:39:17 +epoch [31/50] batch [375/500] time 1.375 (1.366) data 0.000 (0.003) loss 0.9995 (1.1129) acc 81.2500 (72.0583) lr 7.5131e-04 eta 3:39:09 +epoch [31/50] batch [380/500] time 1.385 (1.366) data 0.000 (0.003) loss 0.8242 (1.1124) acc 71.8750 (72.1299) lr 7.5131e-04 eta 3:39:01 +epoch [31/50] batch [385/500] time 1.373 (1.366) data 0.000 (0.003) loss 1.5508 (1.1108) acc 62.5000 (72.1753) lr 7.5131e-04 eta 3:38:54 +epoch [31/50] batch [390/500] time 1.391 (1.366) data 0.000 (0.002) loss 0.8750 (1.1095) acc 81.2500 (72.2035) lr 7.5131e-04 eta 3:38:48 +epoch [31/50] batch [395/500] time 1.364 (1.366) data 0.000 (0.002) loss 2.0586 (1.1120) acc 62.5000 (72.2389) lr 7.5131e-04 eta 3:38:41 +epoch [31/50] batch [400/500] time 1.355 (1.366) data 0.000 (0.002) loss 0.5513 (1.1079) acc 87.5000 (72.2734) lr 7.5131e-04 eta 3:38:34 +epoch [31/50] batch [405/500] time 1.374 (1.366) data 0.000 (0.002) loss 1.0635 (1.1092) acc 65.6250 (72.2222) lr 7.5131e-04 eta 3:38:27 +epoch [31/50] batch [410/500] time 1.360 (1.366) data 0.000 (0.002) loss 0.9141 (1.1088) acc 84.3750 (72.2713) lr 7.5131e-04 eta 3:38:19 +epoch [31/50] batch [415/500] time 1.357 (1.366) data 0.000 (0.002) loss 1.1494 (1.1104) acc 65.6250 (72.2590) lr 7.5131e-04 eta 3:38:12 +epoch [31/50] batch [420/500] time 1.392 (1.366) data 0.000 (0.002) loss 0.7227 (1.1097) acc 75.0000 (72.2470) lr 7.5131e-04 eta 3:38:06 +epoch [31/50] batch [425/500] time 1.360 (1.366) data 0.000 (0.002) loss 0.8340 (1.1073) acc 81.2500 (72.2721) lr 7.5131e-04 eta 3:37:59 +epoch [31/50] batch [430/500] time 1.373 (1.366) data 0.000 (0.002) loss 0.9199 (1.1064) acc 84.3750 (72.2820) lr 7.5131e-04 eta 3:37:52 +epoch [31/50] batch [435/500] time 1.372 (1.366) data 0.000 (0.002) loss 0.8999 (1.1026) acc 75.0000 (72.3491) lr 7.5131e-04 eta 3:37:45 +epoch [31/50] batch [440/500] time 1.365 (1.366) data 0.000 (0.002) loss 1.3633 (1.1031) acc 62.5000 (72.3295) lr 7.5131e-04 eta 3:37:37 +epoch [31/50] batch [445/500] time 1.342 (1.366) data 0.000 (0.002) loss 0.9805 (1.1036) acc 71.8750 (72.3244) lr 7.5131e-04 eta 3:37:28 +epoch [31/50] batch [450/500] time 1.363 (1.366) data 0.000 (0.002) loss 1.1914 (1.1024) acc 71.8750 (72.3333) lr 7.5131e-04 eta 3:37:21 +epoch [31/50] batch [455/500] time 1.374 (1.366) data 0.000 (0.002) loss 1.2607 (1.1017) acc 68.7500 (72.3489) lr 7.5131e-04 eta 3:37:14 +epoch [31/50] batch [460/500] time 1.371 (1.365) data 0.001 (0.002) loss 1.0615 (1.1047) acc 75.0000 (72.2894) lr 7.5131e-04 eta 3:37:05 +epoch [31/50] batch [465/500] time 1.362 (1.365) data 0.000 (0.002) loss 1.3955 (1.1046) acc 65.6250 (72.3051) lr 7.5131e-04 eta 3:36:58 +epoch [31/50] batch [470/500] time 1.355 (1.365) data 0.000 (0.002) loss 1.4053 (1.1051) acc 62.5000 (72.3138) lr 7.5131e-04 eta 3:36:51 +epoch [31/50] batch [475/500] time 1.353 (1.365) data 0.000 (0.002) loss 0.2898 (1.1025) acc 93.7500 (72.3816) lr 7.5131e-04 eta 3:36:44 +epoch [31/50] batch [480/500] time 1.369 (1.365) data 0.000 (0.002) loss 0.7979 (1.1015) acc 78.1250 (72.4349) lr 7.5131e-04 eta 3:36:37 +epoch [31/50] batch [485/500] time 1.376 (1.365) data 0.001 (0.002) loss 1.6992 (1.1040) acc 56.2500 (72.3776) lr 7.5131e-04 eta 3:36:31 +epoch [31/50] batch [490/500] time 1.364 (1.365) data 0.000 (0.002) loss 1.0664 (1.1043) acc 71.8750 (72.3916) lr 7.5131e-04 eta 3:36:24 +epoch [31/50] batch [495/500] time 1.369 (1.365) data 0.000 (0.002) loss 1.1016 (1.1024) acc 71.8750 (72.4495) lr 7.5131e-04 eta 3:36:17 +epoch [31/50] batch [500/500] time 1.360 (1.365) data 0.000 (0.002) loss 1.1699 (1.1012) acc 65.6250 (72.4688) lr 6.9098e-04 eta 3:36:10 +epoch [32/50] batch [5/500] time 1.380 (1.533) data 0.000 (0.161) loss 1.0244 (0.9857) acc 71.8750 (75.6250) lr 6.9098e-04 eta 4:02:32 +epoch [32/50] batch [10/500] time 1.354 (1.447) data 0.000 (0.080) loss 0.9399 (1.0604) acc 71.8750 (72.5000) lr 6.9098e-04 eta 3:48:50 +epoch [32/50] batch [15/500] time 1.345 (1.415) data 0.000 (0.054) loss 1.6631 (1.1360) acc 56.2500 (70.4167) lr 6.9098e-04 eta 3:43:37 +epoch [32/50] batch [20/500] time 1.368 (1.409) data 0.000 (0.040) loss 1.5342 (1.1108) acc 62.5000 (70.9375) lr 6.9098e-04 eta 3:42:35 +epoch [32/50] batch [25/500] time 1.362 (1.400) data 0.000 (0.032) loss 1.0332 (1.0583) acc 75.0000 (72.0000) lr 6.9098e-04 eta 3:41:03 +epoch [32/50] batch [30/500] time 1.363 (1.392) data 0.000 (0.027) loss 1.3701 (1.0727) acc 62.5000 (72.2917) lr 6.9098e-04 eta 3:39:39 +epoch [32/50] batch [35/500] time 1.366 (1.387) data 0.000 (0.023) loss 0.6460 (1.0416) acc 87.5000 (73.1250) lr 6.9098e-04 eta 3:38:50 +epoch [32/50] batch [40/500] time 1.361 (1.385) data 0.000 (0.020) loss 0.8374 (1.0286) acc 84.3750 (73.6719) lr 6.9098e-04 eta 3:38:20 +epoch [32/50] batch [45/500] time 1.367 (1.383) data 0.000 (0.018) loss 1.3926 (1.0411) acc 84.3750 (73.6111) lr 6.9098e-04 eta 3:37:53 +epoch [32/50] batch [50/500] time 1.375 (1.380) data 0.000 (0.016) loss 1.3154 (1.0551) acc 78.1250 (73.4375) lr 6.9098e-04 eta 3:37:25 +epoch [32/50] batch [55/500] time 1.374 (1.379) data 0.000 (0.015) loss 0.7480 (1.0592) acc 78.1250 (73.1250) lr 6.9098e-04 eta 3:37:01 +epoch [32/50] batch [60/500] time 1.477 (1.380) data 0.001 (0.014) loss 1.2637 (1.0686) acc 71.8750 (72.9167) lr 6.9098e-04 eta 3:37:04 +epoch [32/50] batch [65/500] time 1.390 (1.379) data 0.001 (0.013) loss 1.7422 (1.0689) acc 65.6250 (73.0288) lr 6.9098e-04 eta 3:36:49 +epoch [32/50] batch [70/500] time 1.397 (1.379) data 0.000 (0.012) loss 0.8501 (1.0780) acc 81.2500 (72.9464) lr 6.9098e-04 eta 3:36:48 +epoch [32/50] batch [75/500] time 1.370 (1.379) data 0.000 (0.011) loss 0.4600 (1.0757) acc 78.1250 (72.6667) lr 6.9098e-04 eta 3:36:32 +epoch [32/50] batch [80/500] time 1.352 (1.377) data 0.000 (0.010) loss 1.7588 (1.0833) acc 50.0000 (72.4219) lr 6.9098e-04 eta 3:36:14 +epoch [32/50] batch [85/500] time 1.371 (1.376) data 0.000 (0.010) loss 1.2812 (1.0943) acc 71.8750 (72.3529) lr 6.9098e-04 eta 3:35:59 +epoch [32/50] batch [90/500] time 1.364 (1.376) data 0.000 (0.009) loss 1.1729 (1.0978) acc 75.0000 (72.3958) lr 6.9098e-04 eta 3:35:44 +epoch [32/50] batch [95/500] time 1.354 (1.374) data 0.000 (0.009) loss 0.9111 (1.1015) acc 75.0000 (72.1053) lr 6.9098e-04 eta 3:35:26 +epoch [32/50] batch [100/500] time 1.349 (1.374) data 0.000 (0.008) loss 1.1758 (1.1057) acc 68.7500 (72.0625) lr 6.9098e-04 eta 3:35:16 +epoch [32/50] batch [105/500] time 1.357 (1.374) data 0.000 (0.008) loss 0.8740 (1.1009) acc 78.1250 (72.0833) lr 6.9098e-04 eta 3:35:05 +epoch [32/50] batch [110/500] time 1.351 (1.373) data 0.000 (0.008) loss 1.0615 (1.0973) acc 75.0000 (72.1591) lr 6.9098e-04 eta 3:34:52 +epoch [32/50] batch [115/500] time 1.356 (1.372) data 0.000 (0.007) loss 1.9453 (1.1074) acc 68.7500 (72.0380) lr 6.9098e-04 eta 3:34:39 +epoch [32/50] batch [120/500] time 1.344 (1.372) data 0.000 (0.007) loss 1.3340 (1.1095) acc 68.7500 (72.0052) lr 6.9098e-04 eta 3:34:24 +epoch [32/50] batch [125/500] time 1.373 (1.372) data 0.000 (0.007) loss 1.4551 (1.1121) acc 56.2500 (71.8500) lr 6.9098e-04 eta 3:34:19 +epoch [32/50] batch [130/500] time 1.347 (1.371) data 0.001 (0.007) loss 0.8813 (1.1016) acc 81.2500 (72.0673) lr 6.9098e-04 eta 3:34:05 +epoch [32/50] batch [135/500] time 1.351 (1.370) data 0.000 (0.006) loss 0.6016 (1.0966) acc 84.3750 (72.0602) lr 6.9098e-04 eta 3:33:53 +epoch [32/50] batch [140/500] time 1.372 (1.370) data 0.000 (0.006) loss 1.3535 (1.0967) acc 65.6250 (72.0536) lr 6.9098e-04 eta 3:33:44 +epoch [32/50] batch [145/500] time 1.380 (1.370) data 0.000 (0.006) loss 1.6768 (1.0984) acc 68.7500 (71.9612) lr 6.9098e-04 eta 3:33:37 +epoch [32/50] batch [150/500] time 1.362 (1.370) data 0.000 (0.006) loss 0.7466 (1.0881) acc 84.3750 (72.2083) lr 6.9098e-04 eta 3:33:29 +epoch [32/50] batch [155/500] time 1.373 (1.370) data 0.000 (0.006) loss 1.5713 (1.0970) acc 62.5000 (72.0363) lr 6.9098e-04 eta 3:33:25 +epoch [32/50] batch [160/500] time 1.371 (1.371) data 0.000 (0.005) loss 0.9443 (1.1012) acc 65.6250 (71.7969) lr 6.9098e-04 eta 3:33:26 +epoch [32/50] batch [165/500] time 1.362 (1.371) data 0.000 (0.005) loss 0.8901 (1.0961) acc 68.7500 (71.9318) lr 6.9098e-04 eta 3:33:21 +epoch [32/50] batch [170/500] time 1.361 (1.371) data 0.000 (0.005) loss 0.9922 (1.0976) acc 68.7500 (71.8750) lr 6.9098e-04 eta 3:33:15 +epoch [32/50] batch [175/500] time 1.373 (1.372) data 0.000 (0.005) loss 1.1338 (1.0947) acc 68.7500 (71.9643) lr 6.9098e-04 eta 3:33:10 +epoch [32/50] batch [180/500] time 1.378 (1.371) data 0.000 (0.005) loss 1.1016 (1.0919) acc 75.0000 (72.0833) lr 6.9098e-04 eta 3:33:01 +epoch [32/50] batch [185/500] time 1.349 (1.371) data 0.000 (0.005) loss 1.0986 (1.0912) acc 75.0000 (71.9932) lr 6.9098e-04 eta 3:32:49 +epoch [32/50] batch [190/500] time 1.350 (1.371) data 0.000 (0.005) loss 0.8691 (1.0885) acc 78.1250 (72.0559) lr 6.9098e-04 eta 3:32:41 +epoch [32/50] batch [195/500] time 1.367 (1.370) data 0.000 (0.004) loss 1.1719 (1.0874) acc 59.3750 (71.9872) lr 6.9098e-04 eta 3:32:31 +epoch [32/50] batch [200/500] time 1.345 (1.370) data 0.000 (0.004) loss 0.8809 (1.0837) acc 68.7500 (71.9688) lr 6.9098e-04 eta 3:32:20 +epoch [32/50] batch [205/500] time 1.369 (1.370) data 0.000 (0.004) loss 1.2812 (1.0833) acc 71.8750 (71.9970) lr 6.9098e-04 eta 3:32:16 +epoch [32/50] batch [210/500] time 1.359 (1.370) data 0.000 (0.004) loss 1.3398 (1.0862) acc 71.8750 (71.9494) lr 6.9098e-04 eta 3:32:07 +epoch [32/50] batch [215/500] time 1.352 (1.370) data 0.000 (0.004) loss 0.8486 (1.0851) acc 71.8750 (71.9331) lr 6.9098e-04 eta 3:31:56 +epoch [32/50] batch [220/500] time 1.329 (1.369) data 0.000 (0.004) loss 1.8262 (1.0893) acc 56.2500 (71.9176) lr 6.9098e-04 eta 3:31:44 +epoch [32/50] batch [225/500] time 1.376 (1.369) data 0.000 (0.004) loss 2.0273 (1.0902) acc 59.3750 (71.8472) lr 6.9098e-04 eta 3:31:34 +epoch [32/50] batch [230/500] time 1.329 (1.368) data 0.000 (0.004) loss 1.6523 (1.0924) acc 59.3750 (71.7935) lr 6.9098e-04 eta 3:31:23 +epoch [32/50] batch [235/500] time 1.342 (1.368) data 0.000 (0.004) loss 0.8433 (1.0854) acc 71.8750 (71.9548) lr 6.9098e-04 eta 3:31:12 +epoch [32/50] batch [240/500] time 1.370 (1.368) data 0.000 (0.004) loss 1.5605 (1.0870) acc 65.6250 (71.9401) lr 6.9098e-04 eta 3:31:04 +epoch [32/50] batch [245/500] time 1.373 (1.368) data 0.000 (0.004) loss 1.6680 (1.0928) acc 68.7500 (71.8750) lr 6.9098e-04 eta 3:30:56 +epoch [32/50] batch [250/500] time 1.368 (1.367) data 0.001 (0.004) loss 0.7021 (1.0879) acc 75.0000 (71.9250) lr 6.9098e-04 eta 3:30:48 +epoch [32/50] batch [255/500] time 1.387 (1.367) data 0.000 (0.003) loss 1.1250 (1.0875) acc 68.7500 (71.9363) lr 6.9098e-04 eta 3:30:42 +epoch [32/50] batch [260/500] time 1.361 (1.368) data 0.000 (0.003) loss 1.1562 (1.0905) acc 68.7500 (71.7909) lr 6.9098e-04 eta 3:30:36 +epoch [32/50] batch [265/500] time 1.375 (1.368) data 0.000 (0.003) loss 0.7866 (1.0890) acc 81.2500 (71.8986) lr 6.9098e-04 eta 3:30:30 +epoch [32/50] batch [270/500] time 1.361 (1.368) data 0.000 (0.003) loss 1.0664 (1.0860) acc 81.2500 (71.9792) lr 6.9098e-04 eta 3:30:22 +epoch [32/50] batch [275/500] time 1.367 (1.367) data 0.000 (0.003) loss 0.8647 (1.0833) acc 75.0000 (71.9886) lr 6.9098e-04 eta 3:30:13 +epoch [32/50] batch [280/500] time 1.367 (1.367) data 0.000 (0.003) loss 1.2012 (1.0858) acc 71.8750 (71.9308) lr 6.9098e-04 eta 3:30:06 +epoch [32/50] batch [285/500] time 1.357 (1.367) data 0.000 (0.003) loss 0.7397 (1.0796) acc 75.0000 (72.0833) lr 6.9098e-04 eta 3:29:58 +epoch [32/50] batch [290/500] time 1.350 (1.367) data 0.000 (0.003) loss 1.8760 (1.0881) acc 59.3750 (71.9289) lr 6.9098e-04 eta 3:29:49 +epoch [32/50] batch [295/500] time 1.377 (1.367) data 0.000 (0.003) loss 1.1758 (1.0908) acc 65.6250 (71.9174) lr 6.9098e-04 eta 3:29:41 +epoch [32/50] batch [300/500] time 1.346 (1.367) data 0.000 (0.003) loss 1.1191 (1.0911) acc 78.1250 (71.9375) lr 6.9098e-04 eta 3:29:33 +epoch [32/50] batch [305/500] time 1.372 (1.367) data 0.000 (0.003) loss 0.9917 (1.0904) acc 78.1250 (71.9877) lr 6.9098e-04 eta 3:29:29 +epoch [32/50] batch [310/500] time 1.352 (1.367) data 0.000 (0.003) loss 1.4443 (1.0942) acc 75.0000 (71.9657) lr 6.9098e-04 eta 3:29:20 +epoch [32/50] batch [315/500] time 1.341 (1.367) data 0.000 (0.003) loss 0.6445 (1.0893) acc 78.1250 (72.0337) lr 6.9098e-04 eta 3:29:12 +epoch [32/50] batch [320/500] time 1.358 (1.367) data 0.000 (0.003) loss 1.1309 (1.0933) acc 78.1250 (71.9531) lr 6.9098e-04 eta 3:29:04 +epoch [32/50] batch [325/500] time 1.341 (1.366) data 0.000 (0.003) loss 0.6411 (1.0910) acc 78.1250 (72.0000) lr 6.9098e-04 eta 3:28:55 +epoch [32/50] batch [330/500] time 1.338 (1.366) data 0.000 (0.003) loss 0.7109 (1.0924) acc 78.1250 (72.0076) lr 6.9098e-04 eta 3:28:45 +epoch [32/50] batch [335/500] time 1.336 (1.366) data 0.000 (0.003) loss 0.8013 (1.0912) acc 81.2500 (72.0522) lr 6.9098e-04 eta 3:28:37 +epoch [32/50] batch [340/500] time 1.343 (1.366) data 0.000 (0.003) loss 1.4277 (1.0910) acc 71.8750 (72.1048) lr 6.9098e-04 eta 3:28:28 +epoch [32/50] batch [345/500] time 1.371 (1.366) data 0.000 (0.003) loss 0.9946 (1.0886) acc 75.0000 (72.0924) lr 6.9098e-04 eta 3:28:21 +epoch [32/50] batch [350/500] time 1.379 (1.366) data 0.000 (0.003) loss 1.6162 (1.0903) acc 71.8750 (72.0625) lr 6.9098e-04 eta 3:28:17 +epoch [32/50] batch [355/500] time 1.362 (1.366) data 0.000 (0.003) loss 1.0479 (1.0876) acc 71.8750 (72.1303) lr 6.9098e-04 eta 3:28:11 +epoch [32/50] batch [360/500] time 1.373 (1.366) data 0.000 (0.003) loss 1.1543 (1.0885) acc 59.3750 (72.0399) lr 6.9098e-04 eta 3:28:04 +epoch [32/50] batch [365/500] time 1.371 (1.366) data 0.000 (0.003) loss 1.1387 (1.0901) acc 62.5000 (72.0377) lr 6.9098e-04 eta 3:27:56 +epoch [32/50] batch [370/500] time 1.361 (1.366) data 0.000 (0.003) loss 0.9707 (1.0878) acc 81.2500 (72.1115) lr 6.9098e-04 eta 3:27:49 +epoch [32/50] batch [375/500] time 1.338 (1.366) data 0.000 (0.002) loss 1.0322 (1.0877) acc 68.7500 (72.1000) lr 6.9098e-04 eta 3:27:41 +epoch [32/50] batch [380/500] time 1.371 (1.366) data 0.000 (0.002) loss 1.7510 (1.0905) acc 53.1250 (72.0148) lr 6.9098e-04 eta 3:27:34 +epoch [32/50] batch [385/500] time 1.377 (1.366) data 0.000 (0.002) loss 1.1680 (1.0874) acc 71.8750 (72.1185) lr 6.9098e-04 eta 3:27:27 +epoch [32/50] batch [390/500] time 1.353 (1.366) data 0.000 (0.002) loss 0.7671 (1.0847) acc 78.1250 (72.1955) lr 6.9098e-04 eta 3:27:19 +epoch [32/50] batch [395/500] time 1.370 (1.365) data 0.000 (0.002) loss 0.6885 (1.0846) acc 78.1250 (72.1519) lr 6.9098e-04 eta 3:27:11 +epoch [32/50] batch [400/500] time 1.349 (1.365) data 0.000 (0.002) loss 0.9897 (1.0858) acc 75.0000 (72.1484) lr 6.9098e-04 eta 3:27:03 +epoch [32/50] batch [405/500] time 1.346 (1.365) data 0.000 (0.002) loss 0.8652 (1.0857) acc 78.1250 (72.1605) lr 6.9098e-04 eta 3:26:56 +epoch [32/50] batch [410/500] time 1.356 (1.365) data 0.000 (0.002) loss 1.2324 (1.0831) acc 68.7500 (72.2104) lr 6.9098e-04 eta 3:26:48 +epoch [32/50] batch [415/500] time 1.342 (1.365) data 0.000 (0.002) loss 0.9644 (1.0814) acc 78.1250 (72.2666) lr 6.9098e-04 eta 3:26:40 +epoch [32/50] batch [420/500] time 1.346 (1.365) data 0.000 (0.002) loss 1.8359 (1.0803) acc 53.1250 (72.2470) lr 6.9098e-04 eta 3:26:33 +epoch [32/50] batch [425/500] time 1.376 (1.365) data 0.000 (0.002) loss 1.1289 (1.0826) acc 62.5000 (72.2353) lr 6.9098e-04 eta 3:26:25 +epoch [32/50] batch [430/500] time 1.360 (1.365) data 0.000 (0.002) loss 1.9102 (1.0840) acc 59.3750 (72.2093) lr 6.9098e-04 eta 3:26:19 +epoch [32/50] batch [435/500] time 1.375 (1.365) data 0.000 (0.002) loss 1.7842 (1.0862) acc 68.7500 (72.1839) lr 6.9098e-04 eta 3:26:13 +epoch [32/50] batch [440/500] time 1.354 (1.365) data 0.000 (0.002) loss 0.7041 (1.0867) acc 81.2500 (72.2088) lr 6.9098e-04 eta 3:26:05 +epoch [32/50] batch [445/500] time 1.492 (1.365) data 0.000 (0.002) loss 1.1875 (1.0851) acc 84.3750 (72.2683) lr 6.9098e-04 eta 3:26:00 +epoch [32/50] batch [450/500] time 1.377 (1.365) data 0.000 (0.002) loss 0.7905 (1.0842) acc 90.6250 (72.3472) lr 6.9098e-04 eta 3:25:53 +epoch [32/50] batch [455/500] time 1.380 (1.365) data 0.000 (0.002) loss 1.4473 (1.0845) acc 68.7500 (72.3489) lr 6.9098e-04 eta 3:25:47 +epoch [32/50] batch [460/500] time 1.348 (1.365) data 0.000 (0.002) loss 1.3496 (1.0826) acc 62.5000 (72.3709) lr 6.9098e-04 eta 3:25:40 +epoch [32/50] batch [465/500] time 1.354 (1.365) data 0.001 (0.002) loss 1.0957 (1.0841) acc 71.8750 (72.3656) lr 6.9098e-04 eta 3:25:32 +epoch [32/50] batch [470/500] time 1.382 (1.365) data 0.000 (0.002) loss 1.1709 (1.0857) acc 75.0000 (72.3404) lr 6.9098e-04 eta 3:25:26 +epoch [32/50] batch [475/500] time 1.365 (1.365) data 0.000 (0.002) loss 1.2461 (1.0856) acc 65.6250 (72.3092) lr 6.9098e-04 eta 3:25:18 +epoch [32/50] batch [480/500] time 1.355 (1.365) data 0.000 (0.002) loss 1.9756 (1.0873) acc 62.5000 (72.3112) lr 6.9098e-04 eta 3:25:10 +epoch [32/50] batch [485/500] time 1.367 (1.365) data 0.001 (0.002) loss 1.3623 (1.0889) acc 68.7500 (72.3003) lr 6.9098e-04 eta 3:25:03 +epoch [32/50] batch [490/500] time 1.359 (1.365) data 0.000 (0.002) loss 1.1582 (1.0878) acc 68.7500 (72.3151) lr 6.9098e-04 eta 3:24:58 +epoch [32/50] batch [495/500] time 1.395 (1.365) data 0.000 (0.002) loss 0.9312 (1.0875) acc 78.1250 (72.3232) lr 6.9098e-04 eta 3:24:51 +epoch [32/50] batch [500/500] time 1.354 (1.365) data 0.000 (0.002) loss 1.0537 (1.0868) acc 75.0000 (72.3187) lr 6.3188e-04 eta 3:24:44 +epoch [33/50] batch [5/500] time 1.372 (1.542) data 0.000 (0.167) loss 0.9033 (0.9895) acc 81.2500 (76.2500) lr 6.3188e-04 eta 3:51:10 +epoch [33/50] batch [10/500] time 1.371 (1.453) data 0.001 (0.084) loss 1.2559 (1.0363) acc 75.0000 (74.6875) lr 6.3188e-04 eta 3:37:41 +epoch [33/50] batch [15/500] time 1.361 (1.418) data 0.000 (0.056) loss 0.8208 (1.0995) acc 75.0000 (73.1250) lr 6.3188e-04 eta 3:32:22 +epoch [33/50] batch [20/500] time 1.366 (1.402) data 0.000 (0.042) loss 1.1846 (1.0630) acc 71.8750 (73.2812) lr 6.3188e-04 eta 3:29:51 +epoch [33/50] batch [25/500] time 1.350 (1.396) data 0.001 (0.034) loss 0.8599 (1.0536) acc 78.1250 (73.1250) lr 6.3188e-04 eta 3:28:53 +epoch [33/50] batch [30/500] time 1.367 (1.391) data 0.000 (0.028) loss 0.8667 (1.0641) acc 71.8750 (73.0208) lr 6.3188e-04 eta 3:28:01 +epoch [33/50] batch [35/500] time 1.361 (1.393) data 0.000 (0.024) loss 1.2969 (1.0864) acc 78.1250 (73.0357) lr 6.3188e-04 eta 3:28:06 +epoch [33/50] batch [40/500] time 1.355 (1.389) data 0.000 (0.021) loss 1.5645 (1.0951) acc 59.3750 (72.5781) lr 6.3188e-04 eta 3:27:28 +epoch [33/50] batch [45/500] time 1.352 (1.386) data 0.000 (0.019) loss 0.9966 (1.0909) acc 75.0000 (72.4306) lr 6.3188e-04 eta 3:26:53 +epoch [33/50] batch [50/500] time 1.354 (1.383) data 0.000 (0.017) loss 0.6763 (1.0962) acc 84.3750 (72.4375) lr 6.3188e-04 eta 3:26:19 +epoch [33/50] batch [55/500] time 1.343 (1.381) data 0.000 (0.016) loss 0.5845 (1.0748) acc 78.1250 (72.8409) lr 6.3188e-04 eta 3:25:54 +epoch [33/50] batch [60/500] time 1.359 (1.379) data 0.000 (0.014) loss 0.9272 (1.0751) acc 75.0000 (73.1250) lr 6.3188e-04 eta 3:25:32 +epoch [33/50] batch [65/500] time 1.345 (1.378) data 0.000 (0.013) loss 0.8975 (1.0484) acc 81.2500 (73.7500) lr 6.3188e-04 eta 3:25:11 +epoch [33/50] batch [70/500] time 1.370 (1.377) data 0.000 (0.012) loss 1.3047 (1.0701) acc 65.6250 (73.5714) lr 6.3188e-04 eta 3:24:55 +epoch [33/50] batch [75/500] time 1.371 (1.376) data 0.000 (0.012) loss 1.0879 (1.0643) acc 68.7500 (73.7500) lr 6.3188e-04 eta 3:24:36 +epoch [33/50] batch [80/500] time 1.374 (1.375) data 0.000 (0.011) loss 1.6172 (1.0720) acc 68.7500 (73.7500) lr 6.3188e-04 eta 3:24:28 +epoch [33/50] batch [85/500] time 1.352 (1.375) data 0.000 (0.010) loss 1.2148 (1.0706) acc 71.8750 (73.8235) lr 6.3188e-04 eta 3:24:14 +epoch [33/50] batch [90/500] time 1.352 (1.374) data 0.000 (0.010) loss 1.1543 (1.0805) acc 68.7500 (73.5069) lr 6.3188e-04 eta 3:23:58 +epoch [33/50] batch [95/500] time 1.362 (1.373) data 0.000 (0.009) loss 1.0889 (1.0773) acc 71.8750 (73.4868) lr 6.3188e-04 eta 3:23:45 +epoch [33/50] batch [100/500] time 1.353 (1.372) data 0.000 (0.009) loss 1.7900 (1.0784) acc 59.3750 (73.4062) lr 6.3188e-04 eta 3:23:31 +epoch [33/50] batch [105/500] time 1.377 (1.372) data 0.000 (0.008) loss 0.8481 (1.0802) acc 81.2500 (73.1548) lr 6.3188e-04 eta 3:23:20 +epoch [33/50] batch [110/500] time 1.355 (1.371) data 0.000 (0.008) loss 1.0693 (1.0805) acc 78.1250 (73.1818) lr 6.3188e-04 eta 3:23:08 +epoch [33/50] batch [115/500] time 1.375 (1.371) data 0.001 (0.008) loss 1.0332 (1.0780) acc 71.8750 (73.1250) lr 6.3188e-04 eta 3:23:01 +epoch [33/50] batch [120/500] time 1.350 (1.371) data 0.001 (0.007) loss 1.1787 (1.0833) acc 71.8750 (73.1250) lr 6.3188e-04 eta 3:22:51 +epoch [33/50] batch [125/500] time 1.355 (1.370) data 0.000 (0.007) loss 0.5508 (1.0838) acc 81.2500 (73.0750) lr 6.3188e-04 eta 3:22:41 +epoch [33/50] batch [130/500] time 1.365 (1.370) data 0.001 (0.007) loss 0.9497 (1.0836) acc 75.0000 (72.9087) lr 6.3188e-04 eta 3:22:35 +epoch [33/50] batch [135/500] time 1.373 (1.371) data 0.000 (0.007) loss 1.6582 (1.0988) acc 62.5000 (72.6157) lr 6.3188e-04 eta 3:22:32 +epoch [33/50] batch [140/500] time 1.350 (1.371) data 0.000 (0.006) loss 1.4980 (1.0963) acc 65.6250 (72.5670) lr 6.3188e-04 eta 3:22:25 +epoch [33/50] batch [145/500] time 1.344 (1.370) data 0.000 (0.006) loss 1.3281 (1.1003) acc 56.2500 (72.4138) lr 6.3188e-04 eta 3:22:13 +epoch [33/50] batch [150/500] time 1.366 (1.370) data 0.000 (0.006) loss 1.1133 (1.1030) acc 71.8750 (72.3958) lr 6.3188e-04 eta 3:22:02 +epoch [33/50] batch [155/500] time 1.387 (1.370) data 0.000 (0.006) loss 1.0801 (1.1086) acc 71.8750 (72.2581) lr 6.3188e-04 eta 3:21:54 +epoch [33/50] batch [160/500] time 1.366 (1.369) data 0.000 (0.006) loss 0.8218 (1.1013) acc 78.1250 (72.4805) lr 6.3188e-04 eta 3:21:45 +epoch [33/50] batch [165/500] time 1.366 (1.369) data 0.000 (0.005) loss 0.5962 (1.1027) acc 87.5000 (72.4811) lr 6.3188e-04 eta 3:21:37 +epoch [33/50] batch [170/500] time 1.367 (1.369) data 0.000 (0.005) loss 1.1914 (1.1050) acc 75.0000 (72.4632) lr 6.3188e-04 eta 3:21:30 +epoch [33/50] batch [175/500] time 1.364 (1.369) data 0.000 (0.005) loss 0.7920 (1.0995) acc 78.1250 (72.5179) lr 6.3188e-04 eta 3:21:19 +epoch [33/50] batch [180/500] time 1.348 (1.369) data 0.000 (0.005) loss 1.3848 (1.0944) acc 68.7500 (72.7257) lr 6.3188e-04 eta 3:21:14 +epoch [33/50] batch [185/500] time 1.344 (1.369) data 0.000 (0.005) loss 1.2832 (1.0951) acc 71.8750 (72.7027) lr 6.3188e-04 eta 3:21:03 +epoch [33/50] batch [190/500] time 1.365 (1.368) data 0.000 (0.005) loss 1.3408 (1.0961) acc 78.1250 (72.7632) lr 6.3188e-04 eta 3:20:54 +epoch [33/50] batch [195/500] time 1.355 (1.368) data 0.000 (0.005) loss 0.6958 (1.0897) acc 75.0000 (72.7885) lr 6.3188e-04 eta 3:20:46 +epoch [33/50] batch [200/500] time 1.376 (1.368) data 0.000 (0.005) loss 1.0098 (1.0913) acc 71.8750 (72.7344) lr 6.3188e-04 eta 3:20:40 +epoch [33/50] batch [205/500] time 1.364 (1.368) data 0.000 (0.004) loss 0.8062 (1.0890) acc 75.0000 (72.7287) lr 6.3188e-04 eta 3:20:29 +epoch [33/50] batch [210/500] time 1.378 (1.368) data 0.000 (0.004) loss 1.4414 (1.0874) acc 68.7500 (72.7083) lr 6.3188e-04 eta 3:20:23 +epoch [33/50] batch [215/500] time 1.373 (1.368) data 0.000 (0.004) loss 1.3213 (1.0920) acc 59.3750 (72.5581) lr 6.3188e-04 eta 3:20:15 +epoch [33/50] batch [220/500] time 1.353 (1.368) data 0.000 (0.004) loss 1.4756 (1.0897) acc 65.6250 (72.6562) lr 6.3188e-04 eta 3:20:08 +epoch [33/50] batch [225/500] time 1.358 (1.368) data 0.000 (0.004) loss 1.4404 (1.0903) acc 65.6250 (72.5833) lr 6.3188e-04 eta 3:20:01 +epoch [33/50] batch [230/500] time 1.351 (1.368) data 0.000 (0.004) loss 0.8369 (1.0906) acc 78.1250 (72.6087) lr 6.3188e-04 eta 3:19:53 +epoch [33/50] batch [235/500] time 1.352 (1.367) data 0.000 (0.004) loss 0.8931 (1.0953) acc 78.1250 (72.5665) lr 6.3188e-04 eta 3:19:43 +epoch [33/50] batch [240/500] time 1.348 (1.367) data 0.000 (0.004) loss 1.1328 (1.0969) acc 62.5000 (72.5130) lr 6.3188e-04 eta 3:19:35 +epoch [33/50] batch [245/500] time 1.363 (1.367) data 0.000 (0.004) loss 0.8867 (1.0980) acc 75.0000 (72.5510) lr 6.3188e-04 eta 3:19:27 +epoch [33/50] batch [250/500] time 1.376 (1.367) data 0.000 (0.004) loss 0.7920 (1.0945) acc 75.0000 (72.5875) lr 6.3188e-04 eta 3:19:19 +epoch [33/50] batch [255/500] time 1.377 (1.367) data 0.000 (0.004) loss 1.4492 (1.0974) acc 71.8750 (72.5245) lr 6.3188e-04 eta 3:19:13 +epoch [33/50] batch [260/500] time 1.367 (1.367) data 0.000 (0.004) loss 1.0166 (1.0960) acc 78.1250 (72.5721) lr 6.3188e-04 eta 3:19:05 +epoch [33/50] batch [265/500] time 1.360 (1.367) data 0.001 (0.004) loss 1.4590 (1.0932) acc 71.8750 (72.6651) lr 6.3188e-04 eta 3:18:58 +epoch [33/50] batch [270/500] time 1.353 (1.367) data 0.000 (0.003) loss 1.1963 (1.0932) acc 75.0000 (72.6505) lr 6.3188e-04 eta 3:18:49 +epoch [33/50] batch [275/500] time 1.509 (1.367) data 0.000 (0.003) loss 1.1299 (1.0953) acc 62.5000 (72.5795) lr 6.3188e-04 eta 3:18:48 +epoch [33/50] batch [280/500] time 1.371 (1.367) data 0.001 (0.003) loss 0.8726 (1.0962) acc 78.1250 (72.5781) lr 6.3188e-04 eta 3:18:42 +epoch [33/50] batch [285/500] time 1.382 (1.367) data 0.000 (0.003) loss 0.6201 (1.0952) acc 81.2500 (72.5987) lr 6.3188e-04 eta 3:18:37 +epoch [33/50] batch [290/500] time 1.356 (1.367) data 0.000 (0.003) loss 1.0244 (1.0917) acc 75.0000 (72.6509) lr 6.3188e-04 eta 3:18:29 +epoch [33/50] batch [295/500] time 1.376 (1.367) data 0.000 (0.003) loss 1.0205 (1.0904) acc 62.5000 (72.6695) lr 6.3188e-04 eta 3:18:22 +epoch [33/50] batch [300/500] time 1.357 (1.367) data 0.000 (0.003) loss 1.3223 (1.0905) acc 65.6250 (72.6250) lr 6.3188e-04 eta 3:18:14 +epoch [33/50] batch [305/500] time 1.352 (1.367) data 0.001 (0.003) loss 1.1963 (1.0918) acc 75.0000 (72.6127) lr 6.3188e-04 eta 3:18:06 +epoch [33/50] batch [310/500] time 1.355 (1.367) data 0.000 (0.003) loss 0.8584 (1.0898) acc 68.7500 (72.5907) lr 6.3188e-04 eta 3:17:59 +epoch [33/50] batch [315/500] time 1.351 (1.367) data 0.000 (0.003) loss 0.6416 (1.0867) acc 75.0000 (72.6091) lr 6.3188e-04 eta 3:17:49 +epoch [33/50] batch [320/500] time 1.367 (1.367) data 0.000 (0.003) loss 0.8940 (1.0861) acc 84.3750 (72.6367) lr 6.3188e-04 eta 3:17:46 +epoch [33/50] batch [325/500] time 1.369 (1.367) data 0.000 (0.003) loss 0.9688 (1.0863) acc 75.0000 (72.6346) lr 6.3188e-04 eta 3:17:37 +epoch [33/50] batch [330/500] time 1.354 (1.367) data 0.000 (0.003) loss 0.8936 (1.0836) acc 71.8750 (72.6799) lr 6.3188e-04 eta 3:17:29 +epoch [33/50] batch [335/500] time 1.360 (1.367) data 0.000 (0.003) loss 0.9268 (1.0820) acc 78.1250 (72.7146) lr 6.3188e-04 eta 3:17:22 +epoch [33/50] batch [340/500] time 1.345 (1.367) data 0.000 (0.003) loss 0.7163 (1.0781) acc 84.3750 (72.8309) lr 6.3188e-04 eta 3:17:13 +epoch [33/50] batch [345/500] time 1.389 (1.367) data 0.000 (0.003) loss 1.1377 (1.0780) acc 75.0000 (72.8804) lr 6.3188e-04 eta 3:17:07 +epoch [33/50] batch [350/500] time 1.337 (1.366) data 0.000 (0.003) loss 0.9937 (1.0795) acc 78.1250 (72.8214) lr 6.3188e-04 eta 3:17:00 +epoch [33/50] batch [355/500] time 1.356 (1.366) data 0.000 (0.003) loss 1.2520 (1.0779) acc 62.5000 (72.8169) lr 6.3188e-04 eta 3:16:52 +epoch [33/50] batch [360/500] time 1.356 (1.366) data 0.000 (0.003) loss 1.2969 (1.0779) acc 65.6250 (72.8038) lr 6.3188e-04 eta 3:16:44 +epoch [33/50] batch [365/500] time 1.361 (1.366) data 0.000 (0.003) loss 1.0703 (1.0774) acc 78.1250 (72.7740) lr 6.3188e-04 eta 3:16:35 +epoch [33/50] batch [370/500] time 1.353 (1.366) data 0.000 (0.003) loss 1.6504 (1.0818) acc 65.6250 (72.6605) lr 6.3188e-04 eta 3:16:28 +epoch [33/50] batch [375/500] time 1.364 (1.366) data 0.000 (0.003) loss 0.9717 (1.0835) acc 75.0000 (72.6000) lr 6.3188e-04 eta 3:16:21 +epoch [33/50] batch [380/500] time 1.353 (1.366) data 0.000 (0.003) loss 1.2539 (1.0814) acc 71.8750 (72.6398) lr 6.3188e-04 eta 3:16:12 +epoch [33/50] batch [385/500] time 1.375 (1.366) data 0.000 (0.003) loss 0.4482 (1.0818) acc 87.5000 (72.5731) lr 6.3188e-04 eta 3:16:06 +epoch [33/50] batch [390/500] time 1.385 (1.366) data 0.000 (0.003) loss 1.0264 (1.0810) acc 71.8750 (72.6122) lr 6.3188e-04 eta 3:15:58 +epoch [33/50] batch [395/500] time 1.340 (1.366) data 0.000 (0.002) loss 0.9028 (1.0801) acc 81.2500 (72.6266) lr 6.3188e-04 eta 3:15:50 +epoch [33/50] batch [400/500] time 1.342 (1.365) data 0.000 (0.002) loss 1.2480 (1.0807) acc 71.8750 (72.6484) lr 6.3188e-04 eta 3:15:40 +epoch [33/50] batch [405/500] time 1.360 (1.365) data 0.000 (0.002) loss 0.8276 (1.0781) acc 78.1250 (72.6775) lr 6.3188e-04 eta 3:15:32 +epoch [33/50] batch [410/500] time 1.357 (1.365) data 0.000 (0.002) loss 1.6338 (1.0793) acc 65.6250 (72.6296) lr 6.3188e-04 eta 3:15:24 +epoch [33/50] batch [415/500] time 1.361 (1.365) data 0.000 (0.002) loss 1.3047 (1.0773) acc 59.3750 (72.6355) lr 6.3188e-04 eta 3:15:17 +epoch [33/50] batch [420/500] time 1.379 (1.365) data 0.000 (0.002) loss 0.8765 (1.0758) acc 78.1250 (72.6488) lr 6.3188e-04 eta 3:15:12 +epoch [33/50] batch [425/500] time 1.354 (1.365) data 0.000 (0.002) loss 0.7207 (1.0748) acc 68.7500 (72.6397) lr 6.3188e-04 eta 3:15:04 +epoch [33/50] batch [430/500] time 1.375 (1.365) data 0.001 (0.002) loss 1.3994 (1.0755) acc 56.2500 (72.5654) lr 6.3188e-04 eta 3:14:57 +epoch [33/50] batch [435/500] time 1.324 (1.365) data 0.000 (0.002) loss 0.8071 (1.0739) acc 75.0000 (72.5647) lr 6.3188e-04 eta 3:14:48 +epoch [33/50] batch [440/500] time 1.352 (1.365) data 0.000 (0.002) loss 1.1143 (1.0753) acc 78.1250 (72.5639) lr 6.3188e-04 eta 3:14:40 +epoch [33/50] batch [445/500] time 1.354 (1.364) data 0.000 (0.002) loss 1.3252 (1.0756) acc 68.7500 (72.5562) lr 6.3188e-04 eta 3:14:32 +epoch [33/50] batch [450/500] time 1.358 (1.364) data 0.000 (0.002) loss 1.0615 (1.0753) acc 75.0000 (72.5764) lr 6.3188e-04 eta 3:14:25 +epoch [33/50] batch [455/500] time 1.357 (1.364) data 0.000 (0.002) loss 1.6543 (1.0745) acc 68.7500 (72.6374) lr 6.3188e-04 eta 3:14:17 +epoch [33/50] batch [460/500] time 1.371 (1.364) data 0.000 (0.002) loss 0.6626 (1.0728) acc 84.3750 (72.6834) lr 6.3188e-04 eta 3:14:09 +epoch [33/50] batch [465/500] time 1.362 (1.364) data 0.000 (0.002) loss 1.2344 (1.0731) acc 65.6250 (72.6344) lr 6.3188e-04 eta 3:14:04 +epoch [33/50] batch [470/500] time 1.365 (1.364) data 0.000 (0.002) loss 1.2480 (1.0745) acc 84.3750 (72.6862) lr 6.3188e-04 eta 3:13:57 +epoch [33/50] batch [475/500] time 1.346 (1.364) data 0.000 (0.002) loss 1.0889 (1.0747) acc 75.0000 (72.6579) lr 6.3188e-04 eta 3:13:48 +epoch [33/50] batch [480/500] time 1.368 (1.364) data 0.000 (0.002) loss 1.6338 (1.0752) acc 65.6250 (72.7214) lr 6.3188e-04 eta 3:13:42 +epoch [33/50] batch [485/500] time 1.354 (1.364) data 0.001 (0.002) loss 0.8740 (1.0728) acc 75.0000 (72.7513) lr 6.3188e-04 eta 3:13:34 +epoch [33/50] batch [490/500] time 1.363 (1.364) data 0.000 (0.002) loss 1.5723 (1.0714) acc 68.7500 (72.7934) lr 6.3188e-04 eta 3:13:26 +epoch [33/50] batch [495/500] time 1.352 (1.364) data 0.000 (0.002) loss 0.8833 (1.0714) acc 75.0000 (72.7904) lr 6.3188e-04 eta 3:13:18 +epoch [33/50] batch [500/500] time 1.372 (1.364) data 0.000 (0.002) loss 0.8779 (1.0699) acc 75.0000 (72.8250) lr 5.7422e-04 eta 3:13:11 +epoch [34/50] batch [5/500] time 1.379 (1.580) data 0.000 (0.169) loss 0.8701 (0.9671) acc 78.1250 (72.5000) lr 5.7422e-04 eta 3:43:38 +epoch [34/50] batch [10/500] time 1.382 (1.468) data 0.000 (0.085) loss 0.9233 (0.9614) acc 68.7500 (73.7500) lr 5.7422e-04 eta 3:27:41 +epoch [34/50] batch [15/500] time 1.357 (1.431) data 0.001 (0.056) loss 1.1543 (0.9732) acc 71.8750 (73.7500) lr 5.7422e-04 eta 3:22:18 +epoch [34/50] batch [20/500] time 1.342 (1.411) data 0.000 (0.042) loss 1.1709 (1.0115) acc 71.8750 (73.1250) lr 5.7422e-04 eta 3:19:27 +epoch [34/50] batch [25/500] time 1.356 (1.400) data 0.000 (0.034) loss 0.9692 (1.0301) acc 75.0000 (73.3750) lr 5.7422e-04 eta 3:17:41 +epoch [34/50] batch [30/500] time 1.382 (1.395) data 0.001 (0.028) loss 0.9321 (1.0729) acc 68.7500 (72.9167) lr 5.7422e-04 eta 3:16:57 +epoch [34/50] batch [35/500] time 1.376 (1.390) data 0.000 (0.024) loss 1.2422 (1.0991) acc 68.7500 (72.5893) lr 5.7422e-04 eta 3:16:07 +epoch [34/50] batch [40/500] time 1.353 (1.387) data 0.000 (0.021) loss 1.0977 (1.0982) acc 71.8750 (72.4219) lr 5.7422e-04 eta 3:15:29 +epoch [34/50] batch [45/500] time 1.374 (1.385) data 0.000 (0.019) loss 1.2920 (1.0825) acc 78.1250 (72.7778) lr 5.7422e-04 eta 3:15:06 +epoch [34/50] batch [50/500] time 1.374 (1.382) data 0.000 (0.017) loss 0.7266 (1.0842) acc 78.1250 (72.7500) lr 5.7422e-04 eta 3:14:41 +epoch [34/50] batch [55/500] time 1.360 (1.380) data 0.000 (0.016) loss 1.1309 (1.0879) acc 71.8750 (72.7841) lr 5.7422e-04 eta 3:14:14 +epoch [34/50] batch [60/500] time 1.362 (1.378) data 0.001 (0.014) loss 1.0605 (1.0831) acc 75.0000 (72.8125) lr 5.7422e-04 eta 3:13:52 +epoch [34/50] batch [65/500] time 1.356 (1.377) data 0.000 (0.013) loss 1.0771 (1.0710) acc 68.7500 (72.7885) lr 5.7422e-04 eta 3:13:36 +epoch [34/50] batch [70/500] time 1.341 (1.376) data 0.000 (0.012) loss 0.7358 (1.0548) acc 84.3750 (73.3036) lr 5.7422e-04 eta 3:13:20 +epoch [34/50] batch [75/500] time 1.363 (1.375) data 0.000 (0.012) loss 0.5664 (1.0475) acc 90.6250 (73.2917) lr 5.7422e-04 eta 3:13:03 +epoch [34/50] batch [80/500] time 1.365 (1.374) data 0.000 (0.011) loss 0.7061 (1.0433) acc 78.1250 (73.2812) lr 5.7422e-04 eta 3:12:48 +epoch [34/50] batch [85/500] time 1.353 (1.373) data 0.000 (0.010) loss 0.6978 (1.0554) acc 81.2500 (73.0882) lr 5.7422e-04 eta 3:12:35 +epoch [34/50] batch [90/500] time 1.368 (1.373) data 0.000 (0.010) loss 1.2959 (1.0677) acc 68.7500 (72.7431) lr 5.7422e-04 eta 3:12:24 +epoch [34/50] batch [95/500] time 1.363 (1.372) data 0.000 (0.009) loss 0.9492 (1.0623) acc 71.8750 (72.8947) lr 5.7422e-04 eta 3:12:12 +epoch [34/50] batch [100/500] time 1.351 (1.372) data 0.000 (0.009) loss 0.6978 (1.0518) acc 84.3750 (73.1562) lr 5.7422e-04 eta 3:12:01 +epoch [34/50] batch [105/500] time 1.358 (1.372) data 0.000 (0.008) loss 0.9497 (1.0472) acc 71.8750 (73.2440) lr 5.7422e-04 eta 3:11:57 +epoch [34/50] batch [110/500] time 1.359 (1.372) data 0.000 (0.008) loss 0.7212 (1.0509) acc 78.1250 (73.2102) lr 5.7422e-04 eta 3:11:48 +epoch [34/50] batch [115/500] time 1.347 (1.371) data 0.000 (0.008) loss 0.8398 (1.0535) acc 75.0000 (73.2337) lr 5.7422e-04 eta 3:11:37 +epoch [34/50] batch [120/500] time 1.368 (1.371) data 0.000 (0.007) loss 0.8989 (1.0535) acc 68.7500 (73.2031) lr 5.7422e-04 eta 3:11:27 +epoch [34/50] batch [125/500] time 1.365 (1.370) data 0.000 (0.007) loss 0.7012 (1.0585) acc 75.0000 (73.0750) lr 5.7422e-04 eta 3:11:16 +epoch [34/50] batch [130/500] time 1.376 (1.370) data 0.000 (0.007) loss 0.6362 (1.0527) acc 84.3750 (73.1731) lr 5.7422e-04 eta 3:11:08 +epoch [34/50] batch [135/500] time 1.346 (1.369) data 0.000 (0.007) loss 0.6919 (1.0503) acc 84.3750 (73.3565) lr 5.7422e-04 eta 3:10:55 +epoch [34/50] batch [140/500] time 1.339 (1.369) data 0.000 (0.006) loss 1.0283 (1.0502) acc 78.1250 (73.4152) lr 5.7422e-04 eta 3:10:43 +epoch [34/50] batch [145/500] time 1.490 (1.370) data 0.000 (0.006) loss 0.8018 (1.0415) acc 81.2500 (73.6638) lr 5.7422e-04 eta 3:10:42 +epoch [34/50] batch [150/500] time 1.344 (1.369) data 0.000 (0.006) loss 1.6250 (1.0544) acc 53.1250 (73.3542) lr 5.7422e-04 eta 3:10:32 +epoch [34/50] batch [155/500] time 1.342 (1.369) data 0.000 (0.006) loss 0.9360 (1.0578) acc 71.8750 (73.2661) lr 5.7422e-04 eta 3:10:21 +epoch [34/50] batch [160/500] time 1.363 (1.368) data 0.000 (0.006) loss 0.6982 (1.0560) acc 78.1250 (73.2422) lr 5.7422e-04 eta 3:10:11 +epoch [34/50] batch [165/500] time 1.330 (1.368) data 0.000 (0.005) loss 1.2881 (1.0572) acc 65.6250 (73.1818) lr 5.7422e-04 eta 3:10:00 +epoch [34/50] batch [170/500] time 1.378 (1.368) data 0.000 (0.005) loss 1.3242 (1.0547) acc 68.7500 (73.1618) lr 5.7422e-04 eta 3:09:51 +epoch [34/50] batch [175/500] time 1.356 (1.367) data 0.000 (0.005) loss 0.9297 (1.0515) acc 78.1250 (73.2321) lr 5.7422e-04 eta 3:09:43 +epoch [34/50] batch [180/500] time 1.362 (1.367) data 0.000 (0.005) loss 1.2256 (1.0582) acc 75.0000 (73.1944) lr 5.7422e-04 eta 3:09:35 +epoch [34/50] batch [185/500] time 1.352 (1.367) data 0.000 (0.005) loss 0.6538 (1.0539) acc 81.2500 (73.2770) lr 5.7422e-04 eta 3:09:27 +epoch [34/50] batch [190/500] time 1.375 (1.367) data 0.000 (0.005) loss 0.8789 (1.0613) acc 81.2500 (73.2237) lr 5.7422e-04 eta 3:09:23 +epoch [34/50] batch [195/500] time 1.354 (1.367) data 0.000 (0.005) loss 1.2822 (1.0664) acc 75.0000 (73.1731) lr 5.7422e-04 eta 3:09:14 +epoch [34/50] batch [200/500] time 1.363 (1.367) data 0.000 (0.005) loss 0.7231 (1.0640) acc 84.3750 (73.2031) lr 5.7422e-04 eta 3:09:06 +epoch [34/50] batch [205/500] time 1.363 (1.367) data 0.000 (0.004) loss 1.1816 (1.0619) acc 71.8750 (73.2622) lr 5.7422e-04 eta 3:08:56 +epoch [34/50] batch [210/500] time 1.355 (1.366) data 0.000 (0.004) loss 0.7051 (1.0574) acc 81.2500 (73.3631) lr 5.7422e-04 eta 3:08:48 +epoch [34/50] batch [215/500] time 1.352 (1.366) data 0.000 (0.004) loss 1.4199 (1.0575) acc 62.5000 (73.4157) lr 5.7422e-04 eta 3:08:40 +epoch [34/50] batch [220/500] time 1.347 (1.366) data 0.000 (0.004) loss 0.6968 (1.0586) acc 84.3750 (73.3949) lr 5.7422e-04 eta 3:08:30 +epoch [34/50] batch [225/500] time 1.353 (1.366) data 0.000 (0.004) loss 0.8843 (1.0575) acc 71.8750 (73.3750) lr 5.7422e-04 eta 3:08:21 +epoch [34/50] batch [230/500] time 1.339 (1.365) data 0.000 (0.004) loss 0.6963 (1.0542) acc 75.0000 (73.3832) lr 5.7422e-04 eta 3:08:12 +epoch [34/50] batch [235/500] time 1.358 (1.365) data 0.000 (0.004) loss 1.0986 (1.0510) acc 71.8750 (73.4176) lr 5.7422e-04 eta 3:08:03 +epoch [34/50] batch [240/500] time 1.365 (1.365) data 0.000 (0.004) loss 1.1172 (1.0501) acc 71.8750 (73.4505) lr 5.7422e-04 eta 3:07:55 +epoch [34/50] batch [245/500] time 1.363 (1.365) data 0.000 (0.004) loss 1.0156 (1.0528) acc 81.2500 (73.4056) lr 5.7422e-04 eta 3:07:50 +epoch [34/50] batch [250/500] time 1.350 (1.365) data 0.000 (0.004) loss 0.9053 (1.0578) acc 75.0000 (73.2875) lr 5.7422e-04 eta 3:07:41 +epoch [34/50] batch [255/500] time 1.380 (1.365) data 0.000 (0.004) loss 1.0283 (1.0567) acc 78.1250 (73.2598) lr 5.7422e-04 eta 3:07:31 +epoch [34/50] batch [260/500] time 1.360 (1.364) data 0.000 (0.004) loss 1.4033 (1.0586) acc 59.3750 (73.1611) lr 5.7422e-04 eta 3:07:22 +epoch [34/50] batch [265/500] time 1.339 (1.364) data 0.000 (0.004) loss 0.8926 (1.0614) acc 78.1250 (73.1132) lr 5.7422e-04 eta 3:07:13 +epoch [34/50] batch [270/500] time 1.363 (1.364) data 0.000 (0.003) loss 1.2188 (1.0612) acc 75.0000 (73.1134) lr 5.7422e-04 eta 3:07:06 +epoch [34/50] batch [275/500] time 1.363 (1.364) data 0.000 (0.003) loss 1.0410 (1.0634) acc 59.3750 (72.9886) lr 5.7422e-04 eta 3:06:58 +epoch [34/50] batch [280/500] time 1.377 (1.364) data 0.000 (0.003) loss 1.3584 (1.0644) acc 62.5000 (72.8906) lr 5.7422e-04 eta 3:06:52 +epoch [34/50] batch [285/500] time 1.361 (1.364) data 0.000 (0.003) loss 1.5234 (1.0665) acc 68.7500 (72.8399) lr 5.7422e-04 eta 3:06:44 +epoch [34/50] batch [290/500] time 1.353 (1.364) data 0.000 (0.003) loss 1.3789 (1.0689) acc 68.7500 (72.8664) lr 5.7422e-04 eta 3:06:39 +epoch [34/50] batch [295/500] time 1.401 (1.364) data 0.000 (0.003) loss 1.5244 (1.0710) acc 75.0000 (72.8496) lr 5.7422e-04 eta 3:06:32 +epoch [34/50] batch [300/500] time 1.352 (1.364) data 0.000 (0.003) loss 1.6328 (1.0723) acc 68.7500 (72.8958) lr 5.7422e-04 eta 3:06:24 +epoch [34/50] batch [305/500] time 1.353 (1.364) data 0.001 (0.003) loss 1.0615 (1.0713) acc 71.8750 (72.9098) lr 5.7422e-04 eta 3:06:16 +epoch [34/50] batch [310/500] time 1.363 (1.364) data 0.001 (0.003) loss 1.2646 (1.0733) acc 68.7500 (72.9032) lr 5.7422e-04 eta 3:06:09 +epoch [34/50] batch [315/500] time 1.367 (1.364) data 0.001 (0.003) loss 0.8521 (1.0735) acc 81.2500 (72.9464) lr 5.7422e-04 eta 3:06:01 +epoch [34/50] batch [320/500] time 1.352 (1.364) data 0.001 (0.003) loss 1.4551 (1.0737) acc 59.3750 (72.9297) lr 5.7422e-04 eta 3:05:53 +epoch [34/50] batch [325/500] time 1.357 (1.363) data 0.000 (0.003) loss 0.7705 (1.0776) acc 78.1250 (72.9231) lr 5.7422e-04 eta 3:05:45 +epoch [34/50] batch [330/500] time 1.358 (1.363) data 0.000 (0.003) loss 1.3145 (1.0796) acc 75.0000 (72.8883) lr 5.7422e-04 eta 3:05:37 +epoch [34/50] batch [335/500] time 1.358 (1.363) data 0.000 (0.003) loss 0.9922 (1.0799) acc 75.0000 (72.9011) lr 5.7422e-04 eta 3:05:29 +epoch [34/50] batch [340/500] time 1.349 (1.363) data 0.000 (0.003) loss 0.7476 (1.0798) acc 75.0000 (72.8860) lr 5.7422e-04 eta 3:05:23 +epoch [34/50] batch [345/500] time 1.357 (1.363) data 0.000 (0.003) loss 1.0645 (1.0802) acc 62.5000 (72.8170) lr 5.7422e-04 eta 3:05:16 +epoch [34/50] batch [350/500] time 1.335 (1.363) data 0.000 (0.003) loss 0.6519 (1.0804) acc 84.3750 (72.8482) lr 5.7422e-04 eta 3:05:07 +epoch [34/50] batch [355/500] time 1.353 (1.363) data 0.001 (0.003) loss 1.4561 (1.0808) acc 65.6250 (72.8345) lr 5.7422e-04 eta 3:05:01 +epoch [34/50] batch [360/500] time 1.359 (1.363) data 0.000 (0.003) loss 0.7437 (1.0795) acc 78.1250 (72.8212) lr 5.7422e-04 eta 3:04:54 +epoch [34/50] batch [365/500] time 1.379 (1.363) data 0.000 (0.003) loss 0.9829 (1.0793) acc 68.7500 (72.7825) lr 5.7422e-04 eta 3:04:47 +epoch [34/50] batch [370/500] time 1.377 (1.363) data 0.001 (0.003) loss 1.3115 (1.0789) acc 68.7500 (72.7787) lr 5.7422e-04 eta 3:04:41 +epoch [34/50] batch [375/500] time 1.345 (1.363) data 0.000 (0.003) loss 0.7368 (1.0786) acc 84.3750 (72.8000) lr 5.7422e-04 eta 3:04:34 +epoch [34/50] batch [380/500] time 1.371 (1.363) data 0.000 (0.003) loss 1.0605 (1.0773) acc 71.8750 (72.8372) lr 5.7422e-04 eta 3:04:27 +epoch [34/50] batch [385/500] time 1.358 (1.363) data 0.000 (0.003) loss 0.9634 (1.0761) acc 68.7500 (72.8896) lr 5.7422e-04 eta 3:04:20 +epoch [34/50] batch [390/500] time 1.350 (1.363) data 0.000 (0.003) loss 1.2070 (1.0758) acc 62.5000 (72.8846) lr 5.7422e-04 eta 3:04:15 +epoch [34/50] batch [395/500] time 1.355 (1.363) data 0.000 (0.003) loss 0.8862 (1.0740) acc 78.1250 (72.9589) lr 5.7422e-04 eta 3:04:08 +epoch [34/50] batch [400/500] time 1.376 (1.363) data 0.000 (0.002) loss 0.9009 (1.0739) acc 75.0000 (72.9531) lr 5.7422e-04 eta 3:04:02 +epoch [34/50] batch [405/500] time 1.371 (1.363) data 0.000 (0.002) loss 0.6943 (1.0728) acc 84.3750 (73.0093) lr 5.7422e-04 eta 3:03:55 +epoch [34/50] batch [410/500] time 1.357 (1.363) data 0.000 (0.002) loss 1.3838 (1.0754) acc 71.8750 (72.9954) lr 5.7422e-04 eta 3:03:48 +epoch [34/50] batch [415/500] time 1.368 (1.363) data 0.000 (0.002) loss 0.8931 (1.0768) acc 81.2500 (72.9593) lr 5.7422e-04 eta 3:03:42 +epoch [34/50] batch [420/500] time 1.355 (1.363) data 0.000 (0.002) loss 0.8613 (1.0746) acc 78.1250 (73.0208) lr 5.7422e-04 eta 3:03:35 +epoch [34/50] batch [425/500] time 1.361 (1.363) data 0.000 (0.002) loss 1.0410 (1.0742) acc 62.5000 (72.9926) lr 5.7422e-04 eta 3:03:28 +epoch [34/50] batch [430/500] time 1.354 (1.363) data 0.000 (0.002) loss 0.4622 (1.0721) acc 90.6250 (72.9869) lr 5.7422e-04 eta 3:03:21 +epoch [34/50] batch [435/500] time 1.356 (1.363) data 0.000 (0.002) loss 1.4004 (1.0734) acc 68.7500 (72.9598) lr 5.7422e-04 eta 3:03:16 +epoch [34/50] batch [440/500] time 1.363 (1.363) data 0.000 (0.002) loss 1.8906 (1.0733) acc 53.1250 (72.9048) lr 5.7422e-04 eta 3:03:09 +epoch [34/50] batch [445/500] time 1.378 (1.363) data 0.000 (0.002) loss 1.1709 (1.0721) acc 56.2500 (72.9003) lr 5.7422e-04 eta 3:03:02 +epoch [34/50] batch [450/500] time 1.362 (1.363) data 0.000 (0.002) loss 1.0186 (1.0723) acc 68.7500 (72.8750) lr 5.7422e-04 eta 3:02:54 +epoch [34/50] batch [455/500] time 1.372 (1.363) data 0.000 (0.002) loss 0.4722 (1.0724) acc 90.6250 (72.9327) lr 5.7422e-04 eta 3:02:48 +epoch [34/50] batch [460/500] time 1.387 (1.364) data 0.000 (0.002) loss 1.6230 (1.0722) acc 56.2500 (72.9416) lr 5.7422e-04 eta 3:02:42 +epoch [34/50] batch [465/500] time 1.367 (1.364) data 0.000 (0.002) loss 0.8931 (1.0720) acc 78.1250 (72.9704) lr 5.7422e-04 eta 3:02:35 +epoch [34/50] batch [470/500] time 1.354 (1.363) data 0.000 (0.002) loss 1.1475 (1.0732) acc 71.8750 (72.9654) lr 5.7422e-04 eta 3:02:28 +epoch [34/50] batch [475/500] time 1.366 (1.363) data 0.000 (0.002) loss 0.9419 (1.0725) acc 78.1250 (73.0197) lr 5.7422e-04 eta 3:02:20 +epoch [34/50] batch [480/500] time 1.350 (1.363) data 0.000 (0.002) loss 1.6006 (1.0731) acc 62.5000 (72.9688) lr 5.7422e-04 eta 3:02:13 +epoch [34/50] batch [485/500] time 1.350 (1.363) data 0.001 (0.002) loss 1.5049 (1.0735) acc 62.5000 (72.9768) lr 5.7422e-04 eta 3:02:05 +epoch [34/50] batch [490/500] time 1.339 (1.363) data 0.000 (0.002) loss 0.5757 (1.0710) acc 78.1250 (73.0357) lr 5.7422e-04 eta 3:01:57 +epoch [34/50] batch [495/500] time 1.348 (1.363) data 0.000 (0.002) loss 1.2090 (1.0733) acc 75.0000 (72.9987) lr 5.7422e-04 eta 3:01:50 +epoch [34/50] batch [500/500] time 1.366 (1.363) data 0.000 (0.002) loss 1.0615 (1.0727) acc 65.6250 (72.9938) lr 5.1825e-04 eta 3:01:43 +epoch [35/50] batch [5/500] time 1.387 (1.532) data 0.001 (0.155) loss 0.8403 (0.9844) acc 81.2500 (75.6250) lr 5.1825e-04 eta 3:24:11 +epoch [35/50] batch [10/500] time 1.362 (1.449) data 0.000 (0.078) loss 0.8433 (0.9358) acc 62.5000 (74.0625) lr 5.1825e-04 eta 3:12:59 +epoch [35/50] batch [15/500] time 1.357 (1.423) data 0.000 (0.052) loss 1.2041 (0.9488) acc 65.6250 (75.4167) lr 5.1825e-04 eta 3:09:21 +epoch [35/50] batch [20/500] time 1.357 (1.406) data 0.000 (0.039) loss 0.8096 (0.9824) acc 78.1250 (74.5312) lr 5.1825e-04 eta 3:06:58 +epoch [35/50] batch [25/500] time 1.387 (1.404) data 0.000 (0.031) loss 1.0000 (0.9999) acc 78.1250 (74.1250) lr 5.1825e-04 eta 3:06:39 +epoch [35/50] batch [30/500] time 1.363 (1.398) data 0.000 (0.026) loss 1.2881 (1.0316) acc 75.0000 (74.2708) lr 5.1825e-04 eta 3:05:44 +epoch [35/50] batch [35/500] time 1.368 (1.394) data 0.000 (0.022) loss 1.3652 (1.0478) acc 62.5000 (74.1071) lr 5.1825e-04 eta 3:05:06 +epoch [35/50] batch [40/500] time 1.357 (1.390) data 0.001 (0.020) loss 0.6143 (1.0291) acc 81.2500 (74.1406) lr 5.1825e-04 eta 3:04:27 +epoch [35/50] batch [45/500] time 1.330 (1.387) data 0.001 (0.018) loss 0.5811 (1.0286) acc 81.2500 (74.3056) lr 5.1825e-04 eta 3:03:53 +epoch [35/50] batch [50/500] time 1.359 (1.384) data 0.000 (0.016) loss 0.7280 (1.0143) acc 84.3750 (74.8125) lr 5.1825e-04 eta 3:03:22 +epoch [35/50] batch [55/500] time 1.342 (1.381) data 0.000 (0.014) loss 1.5381 (1.0328) acc 59.3750 (74.3182) lr 5.1825e-04 eta 3:02:52 +epoch [35/50] batch [60/500] time 1.368 (1.380) data 0.000 (0.013) loss 1.2354 (1.0291) acc 62.5000 (74.0625) lr 5.1825e-04 eta 3:02:39 +epoch [35/50] batch [65/500] time 1.400 (1.381) data 0.000 (0.012) loss 1.6074 (1.0306) acc 65.6250 (73.9904) lr 5.1825e-04 eta 3:02:35 +epoch [35/50] batch [70/500] time 1.362 (1.381) data 0.000 (0.011) loss 0.8262 (1.0076) acc 68.7500 (74.1518) lr 5.1825e-04 eta 3:02:31 +epoch [35/50] batch [75/500] time 1.367 (1.380) data 0.001 (0.011) loss 1.2705 (1.0230) acc 78.1250 (74.0833) lr 5.1825e-04 eta 3:02:17 +epoch [35/50] batch [80/500] time 1.352 (1.381) data 0.000 (0.010) loss 0.7554 (1.0242) acc 78.1250 (74.1406) lr 5.1825e-04 eta 3:02:18 +epoch [35/50] batch [85/500] time 1.354 (1.380) data 0.000 (0.009) loss 0.7339 (1.0102) acc 81.2500 (74.3382) lr 5.1825e-04 eta 3:02:01 +epoch [35/50] batch [90/500] time 1.368 (1.378) data 0.000 (0.009) loss 0.8550 (1.0077) acc 75.0000 (74.3750) lr 5.1825e-04 eta 3:01:43 +epoch [35/50] batch [95/500] time 1.343 (1.377) data 0.000 (0.009) loss 1.5312 (1.0243) acc 53.1250 (74.0132) lr 5.1825e-04 eta 3:01:22 +epoch [35/50] batch [100/500] time 1.359 (1.376) data 0.000 (0.008) loss 0.8696 (1.0275) acc 78.1250 (73.9062) lr 5.1825e-04 eta 3:01:08 +epoch [35/50] batch [105/500] time 1.341 (1.375) data 0.000 (0.008) loss 1.0410 (1.0265) acc 71.8750 (73.8690) lr 5.1825e-04 eta 3:00:54 +epoch [35/50] batch [110/500] time 1.344 (1.374) data 0.000 (0.007) loss 2.0723 (1.0362) acc 56.2500 (73.7784) lr 5.1825e-04 eta 3:00:41 +epoch [35/50] batch [115/500] time 1.356 (1.373) data 0.000 (0.007) loss 0.8213 (1.0309) acc 75.0000 (73.8315) lr 5.1825e-04 eta 3:00:26 +epoch [35/50] batch [120/500] time 1.361 (1.373) data 0.000 (0.007) loss 0.8799 (1.0252) acc 75.0000 (73.8802) lr 5.1825e-04 eta 3:00:16 +epoch [35/50] batch [125/500] time 1.377 (1.374) data 0.000 (0.007) loss 1.2939 (1.0226) acc 75.0000 (74.0250) lr 5.1825e-04 eta 3:00:16 +epoch [35/50] batch [130/500] time 1.369 (1.373) data 0.000 (0.006) loss 0.5820 (1.0253) acc 81.2500 (73.9663) lr 5.1825e-04 eta 3:00:04 +epoch [35/50] batch [135/500] time 1.357 (1.372) data 0.000 (0.006) loss 1.2256 (1.0373) acc 75.0000 (73.8657) lr 5.1825e-04 eta 2:59:53 +epoch [35/50] batch [140/500] time 1.391 (1.373) data 0.000 (0.006) loss 0.8853 (1.0319) acc 78.1250 (74.0179) lr 5.1825e-04 eta 2:59:48 +epoch [35/50] batch [145/500] time 1.369 (1.372) data 0.000 (0.006) loss 0.7139 (1.0289) acc 78.1250 (74.0302) lr 5.1825e-04 eta 2:59:36 +epoch [35/50] batch [150/500] time 1.350 (1.371) data 0.000 (0.006) loss 0.9028 (1.0310) acc 78.1250 (74.0208) lr 5.1825e-04 eta 2:59:25 +epoch [35/50] batch [155/500] time 1.373 (1.371) data 0.000 (0.005) loss 2.1992 (1.0354) acc 62.5000 (73.9315) lr 5.1825e-04 eta 2:59:17 +epoch [35/50] batch [160/500] time 1.346 (1.371) data 0.000 (0.005) loss 0.7876 (1.0334) acc 78.1250 (73.9648) lr 5.1825e-04 eta 2:59:05 +epoch [35/50] batch [165/500] time 1.362 (1.370) data 0.000 (0.005) loss 1.3809 (1.0368) acc 75.0000 (73.9205) lr 5.1825e-04 eta 2:58:57 +epoch [35/50] batch [170/500] time 1.362 (1.370) data 0.000 (0.005) loss 0.7129 (1.0344) acc 84.3750 (73.9706) lr 5.1825e-04 eta 2:58:49 +epoch [35/50] batch [175/500] time 1.369 (1.370) data 0.000 (0.005) loss 1.4170 (1.0314) acc 71.8750 (74.0357) lr 5.1825e-04 eta 2:58:40 +epoch [35/50] batch [180/500] time 1.358 (1.370) data 0.000 (0.005) loss 1.2305 (1.0326) acc 68.7500 (73.9062) lr 5.1825e-04 eta 2:58:32 +epoch [35/50] batch [185/500] time 1.368 (1.370) data 0.000 (0.005) loss 1.0605 (1.0354) acc 68.7500 (73.7838) lr 5.1825e-04 eta 2:58:23 +epoch [35/50] batch [190/500] time 1.344 (1.369) data 0.000 (0.004) loss 1.3955 (1.0413) acc 65.6250 (73.6513) lr 5.1825e-04 eta 2:58:14 +epoch [35/50] batch [195/500] time 1.368 (1.369) data 0.000 (0.004) loss 1.4834 (1.0487) acc 78.1250 (73.5737) lr 5.1825e-04 eta 2:58:04 +epoch [35/50] batch [200/500] time 1.339 (1.368) data 0.000 (0.004) loss 1.0107 (1.0474) acc 84.3750 (73.6719) lr 5.1825e-04 eta 2:57:52 +epoch [35/50] batch [205/500] time 1.331 (1.368) data 0.000 (0.004) loss 1.1992 (1.0527) acc 71.8750 (73.5213) lr 5.1825e-04 eta 2:57:43 +epoch [35/50] batch [210/500] time 1.370 (1.368) data 0.001 (0.004) loss 0.9897 (1.0500) acc 75.0000 (73.4821) lr 5.1825e-04 eta 2:57:36 +epoch [35/50] batch [215/500] time 1.341 (1.368) data 0.000 (0.004) loss 0.8281 (1.0504) acc 78.1250 (73.4157) lr 5.1825e-04 eta 2:57:27 +epoch [35/50] batch [220/500] time 1.361 (1.367) data 0.000 (0.004) loss 0.6611 (1.0442) acc 75.0000 (73.5511) lr 5.1825e-04 eta 2:57:18 +epoch [35/50] batch [225/500] time 1.365 (1.368) data 0.000 (0.004) loss 0.8647 (1.0426) acc 78.1250 (73.5556) lr 5.1825e-04 eta 2:57:15 +epoch [35/50] batch [230/500] time 1.355 (1.367) data 0.000 (0.004) loss 1.0879 (1.0452) acc 75.0000 (73.5190) lr 5.1825e-04 eta 2:57:05 +epoch [35/50] batch [235/500] time 1.359 (1.367) data 0.000 (0.004) loss 0.8608 (1.0411) acc 78.1250 (73.6170) lr 5.1825e-04 eta 2:56:55 +epoch [35/50] batch [240/500] time 1.345 (1.367) data 0.000 (0.004) loss 1.2041 (1.0462) acc 75.0000 (73.4896) lr 5.1825e-04 eta 2:56:47 +epoch [35/50] batch [245/500] time 1.351 (1.367) data 0.000 (0.004) loss 0.9263 (1.0425) acc 75.0000 (73.5459) lr 5.1825e-04 eta 2:56:37 +epoch [35/50] batch [250/500] time 1.376 (1.366) data 0.000 (0.003) loss 1.0869 (1.0417) acc 81.2500 (73.6000) lr 5.1825e-04 eta 2:56:29 +epoch [35/50] batch [255/500] time 1.341 (1.366) data 0.000 (0.003) loss 0.8032 (1.0432) acc 78.1250 (73.5172) lr 5.1825e-04 eta 2:56:21 +epoch [35/50] batch [260/500] time 1.363 (1.366) data 0.000 (0.003) loss 1.1650 (1.0492) acc 71.8750 (73.4615) lr 5.1825e-04 eta 2:56:13 +epoch [35/50] batch [265/500] time 1.500 (1.366) data 0.000 (0.003) loss 0.5737 (1.0449) acc 81.2500 (73.5613) lr 5.1825e-04 eta 2:56:09 +epoch [35/50] batch [270/500] time 1.364 (1.366) data 0.000 (0.003) loss 1.2139 (1.0473) acc 56.2500 (73.4375) lr 5.1825e-04 eta 2:56:01 +epoch [35/50] batch [275/500] time 1.359 (1.366) data 0.000 (0.003) loss 1.2412 (1.0462) acc 65.6250 (73.4318) lr 5.1825e-04 eta 2:55:54 +epoch [35/50] batch [280/500] time 1.361 (1.366) data 0.000 (0.003) loss 1.3320 (1.0522) acc 68.7500 (73.3705) lr 5.1825e-04 eta 2:55:47 +epoch [35/50] batch [285/500] time 1.337 (1.366) data 0.000 (0.003) loss 0.8604 (1.0518) acc 75.0000 (73.3224) lr 5.1825e-04 eta 2:55:37 +epoch [35/50] batch [290/500] time 1.363 (1.366) data 0.000 (0.003) loss 1.3564 (1.0526) acc 65.6250 (73.2651) lr 5.1825e-04 eta 2:55:31 +epoch [35/50] batch [295/500] time 1.343 (1.366) data 0.001 (0.003) loss 1.2285 (1.0504) acc 65.6250 (73.2839) lr 5.1825e-04 eta 2:55:22 +epoch [35/50] batch [300/500] time 1.340 (1.365) data 0.000 (0.003) loss 1.2285 (1.0522) acc 75.0000 (73.2604) lr 5.1825e-04 eta 2:55:13 +epoch [35/50] batch [305/500] time 1.361 (1.365) data 0.000 (0.003) loss 1.4258 (1.0528) acc 65.6250 (73.2480) lr 5.1825e-04 eta 2:55:06 +epoch [35/50] batch [310/500] time 1.362 (1.365) data 0.000 (0.003) loss 0.8096 (1.0556) acc 71.8750 (73.1653) lr 5.1825e-04 eta 2:54:58 +epoch [35/50] batch [315/500] time 1.354 (1.365) data 0.000 (0.003) loss 1.0039 (1.0579) acc 78.1250 (73.1746) lr 5.1825e-04 eta 2:54:51 +epoch [35/50] batch [320/500] time 1.368 (1.365) data 0.000 (0.003) loss 0.9761 (1.0569) acc 75.0000 (73.2422) lr 5.1825e-04 eta 2:54:44 +epoch [35/50] batch [325/500] time 1.352 (1.365) data 0.000 (0.003) loss 0.6260 (1.0575) acc 81.2500 (73.2596) lr 5.1825e-04 eta 2:54:37 +epoch [35/50] batch [330/500] time 1.368 (1.365) data 0.000 (0.003) loss 1.2656 (1.0573) acc 56.2500 (73.2481) lr 5.1825e-04 eta 2:54:31 +epoch [35/50] batch [335/500] time 1.338 (1.365) data 0.001 (0.003) loss 1.3447 (1.0588) acc 59.3750 (73.2090) lr 5.1825e-04 eta 2:54:23 +epoch [35/50] batch [340/500] time 1.352 (1.365) data 0.000 (0.003) loss 1.2236 (1.0617) acc 62.5000 (73.1158) lr 5.1825e-04 eta 2:54:15 +epoch [35/50] batch [345/500] time 1.354 (1.365) data 0.000 (0.003) loss 0.4390 (1.0596) acc 87.5000 (73.1703) lr 5.1825e-04 eta 2:54:08 +epoch [35/50] batch [350/500] time 1.336 (1.365) data 0.000 (0.003) loss 0.9946 (1.0569) acc 81.2500 (73.2679) lr 5.1825e-04 eta 2:53:59 +epoch [35/50] batch [355/500] time 1.343 (1.364) data 0.000 (0.003) loss 1.2256 (1.0583) acc 65.6250 (73.2306) lr 5.1825e-04 eta 2:53:50 +epoch [35/50] batch [360/500] time 1.355 (1.364) data 0.001 (0.003) loss 0.7554 (1.0555) acc 87.5000 (73.3507) lr 5.1825e-04 eta 2:53:42 +epoch [35/50] batch [365/500] time 1.364 (1.365) data 0.000 (0.002) loss 1.1006 (1.0551) acc 78.1250 (73.3990) lr 5.1825e-04 eta 2:53:38 +epoch [35/50] batch [370/500] time 1.406 (1.365) data 0.001 (0.002) loss 1.2363 (1.0515) acc 71.8750 (73.4628) lr 5.1825e-04 eta 2:53:32 +epoch [35/50] batch [375/500] time 1.368 (1.365) data 0.000 (0.002) loss 1.3730 (1.0538) acc 56.2500 (73.3667) lr 5.1825e-04 eta 2:53:26 +epoch [35/50] batch [380/500] time 1.341 (1.365) data 0.000 (0.002) loss 0.8379 (1.0535) acc 75.0000 (73.3224) lr 5.1825e-04 eta 2:53:18 +epoch [35/50] batch [385/500] time 1.366 (1.365) data 0.000 (0.002) loss 0.4031 (1.0511) acc 90.6250 (73.4010) lr 5.1825e-04 eta 2:53:10 +epoch [35/50] batch [390/500] time 1.345 (1.364) data 0.000 (0.002) loss 0.9658 (1.0477) acc 84.3750 (73.5016) lr 5.1825e-04 eta 2:53:02 +epoch [35/50] batch [395/500] time 1.369 (1.364) data 0.000 (0.002) loss 1.4863 (1.0494) acc 75.0000 (73.4968) lr 5.1825e-04 eta 2:52:55 +epoch [35/50] batch [400/500] time 1.353 (1.364) data 0.000 (0.002) loss 1.1953 (1.0494) acc 62.5000 (73.4688) lr 5.1825e-04 eta 2:52:48 +epoch [35/50] batch [405/500] time 1.351 (1.364) data 0.000 (0.002) loss 1.1680 (1.0514) acc 62.5000 (73.4336) lr 5.1825e-04 eta 2:52:40 +epoch [35/50] batch [410/500] time 1.353 (1.364) data 0.000 (0.002) loss 0.8599 (1.0525) acc 84.3750 (73.4680) lr 5.1825e-04 eta 2:52:36 +epoch [35/50] batch [415/500] time 1.354 (1.364) data 0.000 (0.002) loss 1.6494 (1.0524) acc 65.6250 (73.4639) lr 5.1825e-04 eta 2:52:28 +epoch [35/50] batch [420/500] time 1.339 (1.364) data 0.001 (0.002) loss 1.0557 (1.0515) acc 71.8750 (73.4747) lr 5.1825e-04 eta 2:52:21 +epoch [35/50] batch [425/500] time 1.358 (1.364) data 0.000 (0.002) loss 0.6504 (1.0523) acc 87.5000 (73.4779) lr 5.1825e-04 eta 2:52:14 +epoch [35/50] batch [430/500] time 1.352 (1.364) data 0.000 (0.002) loss 1.1172 (1.0537) acc 71.8750 (73.4157) lr 5.1825e-04 eta 2:52:06 +epoch [35/50] batch [435/500] time 1.379 (1.364) data 0.000 (0.002) loss 1.4961 (1.0520) acc 68.7500 (73.4698) lr 5.1825e-04 eta 2:52:00 +epoch [35/50] batch [440/500] time 1.361 (1.364) data 0.000 (0.002) loss 0.8320 (1.0496) acc 65.6250 (73.4588) lr 5.1825e-04 eta 2:51:53 +epoch [35/50] batch [445/500] time 1.349 (1.364) data 0.000 (0.002) loss 1.1543 (1.0510) acc 78.1250 (73.4059) lr 5.1825e-04 eta 2:51:45 +epoch [35/50] batch [450/500] time 1.350 (1.364) data 0.000 (0.002) loss 0.8818 (1.0524) acc 71.8750 (73.3750) lr 5.1825e-04 eta 2:51:37 +epoch [35/50] batch [455/500] time 1.362 (1.364) data 0.000 (0.002) loss 0.7886 (1.0508) acc 81.2500 (73.3929) lr 5.1825e-04 eta 2:51:30 +epoch [35/50] batch [460/500] time 1.332 (1.364) data 0.000 (0.002) loss 1.3398 (1.0518) acc 59.3750 (73.3628) lr 5.1825e-04 eta 2:51:22 +epoch [35/50] batch [465/500] time 1.354 (1.364) data 0.000 (0.002) loss 1.0068 (1.0518) acc 59.3750 (73.3065) lr 5.1825e-04 eta 2:51:14 +epoch [35/50] batch [470/500] time 1.352 (1.363) data 0.000 (0.002) loss 1.3682 (1.0534) acc 75.0000 (73.2447) lr 5.1825e-04 eta 2:51:07 +epoch [35/50] batch [475/500] time 1.368 (1.363) data 0.000 (0.002) loss 1.5820 (1.0540) acc 62.5000 (73.2237) lr 5.1825e-04 eta 2:50:59 +epoch [35/50] batch [480/500] time 1.356 (1.363) data 0.000 (0.002) loss 0.8384 (1.0527) acc 78.1250 (73.2422) lr 5.1825e-04 eta 2:50:52 +epoch [35/50] batch [485/500] time 1.352 (1.363) data 0.001 (0.002) loss 1.1670 (1.0526) acc 75.0000 (73.2345) lr 5.1825e-04 eta 2:50:44 +epoch [35/50] batch [490/500] time 1.347 (1.363) data 0.000 (0.002) loss 1.4180 (1.0545) acc 75.0000 (73.2143) lr 5.1825e-04 eta 2:50:36 +epoch [35/50] batch [495/500] time 1.341 (1.363) data 0.000 (0.002) loss 1.3076 (1.0555) acc 71.8750 (73.2386) lr 5.1825e-04 eta 2:50:29 +epoch [35/50] batch [500/500] time 1.327 (1.363) data 0.000 (0.002) loss 1.1699 (1.0539) acc 78.1250 (73.2625) lr 4.6417e-04 eta 2:50:21 +epoch [36/50] batch [5/500] time 1.354 (1.575) data 0.000 (0.209) loss 1.0146 (1.0461) acc 71.8750 (74.3750) lr 4.6417e-04 eta 3:16:41 +epoch [36/50] batch [10/500] time 1.356 (1.465) data 0.001 (0.105) loss 0.6724 (1.0539) acc 81.2500 (76.8750) lr 4.6417e-04 eta 3:02:50 +epoch [36/50] batch [15/500] time 1.341 (1.429) data 0.000 (0.070) loss 1.2471 (1.0610) acc 65.6250 (75.4167) lr 4.6417e-04 eta 2:58:14 +epoch [36/50] batch [20/500] time 1.340 (1.410) data 0.000 (0.053) loss 0.9888 (1.0952) acc 65.6250 (73.4375) lr 4.6417e-04 eta 2:55:47 +epoch [36/50] batch [25/500] time 1.360 (1.400) data 0.000 (0.042) loss 1.1689 (1.0642) acc 71.8750 (73.1250) lr 4.6417e-04 eta 2:54:24 +epoch [36/50] batch [30/500] time 1.363 (1.394) data 0.000 (0.035) loss 1.0703 (1.0340) acc 68.7500 (73.6458) lr 4.6417e-04 eta 2:53:33 +epoch [36/50] batch [35/500] time 1.375 (1.390) data 0.000 (0.030) loss 1.0752 (1.0279) acc 65.6250 (73.3929) lr 4.6417e-04 eta 2:52:59 +epoch [36/50] batch [40/500] time 1.351 (1.387) data 0.000 (0.026) loss 1.0020 (1.0189) acc 71.8750 (73.4375) lr 4.6417e-04 eta 2:52:30 +epoch [36/50] batch [45/500] time 1.349 (1.384) data 0.000 (0.024) loss 0.6245 (1.0155) acc 84.3750 (73.5417) lr 4.6417e-04 eta 2:51:56 +epoch [36/50] batch [50/500] time 1.360 (1.381) data 0.000 (0.021) loss 1.4502 (1.0348) acc 68.7500 (73.3750) lr 4.6417e-04 eta 2:51:28 +epoch [36/50] batch [55/500] time 1.379 (1.379) data 0.000 (0.019) loss 0.9077 (1.0281) acc 71.8750 (73.6364) lr 4.6417e-04 eta 2:51:06 +epoch [36/50] batch [60/500] time 1.369 (1.377) data 0.000 (0.018) loss 0.7437 (1.0078) acc 75.0000 (74.1146) lr 4.6417e-04 eta 2:50:44 +epoch [36/50] batch [65/500] time 1.353 (1.378) data 0.000 (0.016) loss 1.1191 (1.0035) acc 68.7500 (74.4231) lr 4.6417e-04 eta 2:50:42 +epoch [36/50] batch [70/500] time 1.366 (1.377) data 0.000 (0.015) loss 0.8940 (0.9965) acc 71.8750 (74.5089) lr 4.6417e-04 eta 2:50:29 +epoch [36/50] batch [75/500] time 1.368 (1.376) data 0.000 (0.014) loss 0.7617 (0.9959) acc 78.1250 (74.8333) lr 4.6417e-04 eta 2:50:19 +epoch [36/50] batch [80/500] time 1.362 (1.376) data 0.001 (0.013) loss 1.7500 (1.0034) acc 62.5000 (74.7266) lr 4.6417e-04 eta 2:50:07 +epoch [36/50] batch [85/500] time 1.364 (1.374) data 0.000 (0.013) loss 1.0225 (1.0035) acc 62.5000 (74.6324) lr 4.6417e-04 eta 2:49:49 +epoch [36/50] batch [90/500] time 1.377 (1.373) data 0.000 (0.012) loss 1.2002 (1.0127) acc 75.0000 (74.5139) lr 4.6417e-04 eta 2:49:34 +epoch [36/50] batch [95/500] time 1.374 (1.373) data 0.000 (0.011) loss 1.2500 (1.0179) acc 78.1250 (74.4079) lr 4.6417e-04 eta 2:49:25 +epoch [36/50] batch [100/500] time 1.360 (1.372) data 0.000 (0.011) loss 0.9722 (1.0230) acc 78.1250 (74.2500) lr 4.6417e-04 eta 2:49:13 +epoch [36/50] batch [105/500] time 1.364 (1.372) data 0.000 (0.010) loss 1.0508 (1.0129) acc 75.0000 (74.3750) lr 4.6417e-04 eta 2:49:02 +epoch [36/50] batch [110/500] time 1.351 (1.372) data 0.000 (0.010) loss 0.9248 (1.0247) acc 81.2500 (74.3466) lr 4.6417e-04 eta 2:48:59 +epoch [36/50] batch [115/500] time 1.343 (1.371) data 0.000 (0.009) loss 1.1191 (1.0250) acc 68.7500 (74.2935) lr 4.6417e-04 eta 2:48:43 +epoch [36/50] batch [120/500] time 1.362 (1.371) data 0.000 (0.009) loss 0.7681 (1.0185) acc 78.1250 (74.4792) lr 4.6417e-04 eta 2:48:34 +epoch [36/50] batch [125/500] time 1.376 (1.370) data 0.000 (0.009) loss 0.7339 (1.0204) acc 75.0000 (74.4250) lr 4.6417e-04 eta 2:48:24 +epoch [36/50] batch [130/500] time 1.374 (1.370) data 0.000 (0.008) loss 0.7383 (1.0237) acc 75.0000 (74.5192) lr 4.6417e-04 eta 2:48:13 +epoch [36/50] batch [135/500] time 1.377 (1.370) data 0.000 (0.008) loss 0.5122 (1.0228) acc 87.5000 (74.4213) lr 4.6417e-04 eta 2:48:07 +epoch [36/50] batch [140/500] time 1.360 (1.369) data 0.000 (0.008) loss 1.0723 (1.0246) acc 71.8750 (74.3973) lr 4.6417e-04 eta 2:47:57 +epoch [36/50] batch [145/500] time 1.362 (1.369) data 0.000 (0.008) loss 1.5400 (1.0280) acc 68.7500 (74.2026) lr 4.6417e-04 eta 2:47:48 +epoch [36/50] batch [150/500] time 1.344 (1.368) data 0.000 (0.007) loss 0.8550 (1.0337) acc 71.8750 (74.1042) lr 4.6417e-04 eta 2:47:36 +epoch [36/50] batch [155/500] time 1.354 (1.368) data 0.000 (0.007) loss 0.9614 (1.0369) acc 75.0000 (74.0726) lr 4.6417e-04 eta 2:47:27 +epoch [36/50] batch [160/500] time 1.331 (1.368) data 0.000 (0.007) loss 0.9634 (1.0320) acc 71.8750 (74.1406) lr 4.6417e-04 eta 2:47:18 +epoch [36/50] batch [165/500] time 1.355 (1.367) data 0.000 (0.007) loss 0.7534 (1.0256) acc 71.8750 (74.2992) lr 4.6417e-04 eta 2:47:09 +epoch [36/50] batch [170/500] time 1.381 (1.367) data 0.000 (0.007) loss 1.5664 (1.0311) acc 62.5000 (74.1912) lr 4.6417e-04 eta 2:47:02 +epoch [36/50] batch [175/500] time 1.359 (1.367) data 0.001 (0.006) loss 1.3154 (1.0350) acc 75.0000 (74.2679) lr 4.6417e-04 eta 2:46:53 +epoch [36/50] batch [180/500] time 1.348 (1.367) data 0.000 (0.006) loss 1.1729 (1.0396) acc 75.0000 (74.2188) lr 4.6417e-04 eta 2:46:44 +epoch [36/50] batch [185/500] time 1.365 (1.367) data 0.000 (0.006) loss 0.8804 (1.0415) acc 75.0000 (74.0541) lr 4.6417e-04 eta 2:46:36 +epoch [36/50] batch [190/500] time 1.371 (1.366) data 0.000 (0.006) loss 0.8755 (1.0436) acc 71.8750 (73.9309) lr 4.6417e-04 eta 2:46:28 +epoch [36/50] batch [195/500] time 1.363 (1.367) data 0.000 (0.006) loss 0.9160 (1.0401) acc 71.8750 (73.9904) lr 4.6417e-04 eta 2:46:22 +epoch [36/50] batch [200/500] time 1.350 (1.366) data 0.000 (0.006) loss 0.6621 (1.0361) acc 84.3750 (74.1094) lr 4.6417e-04 eta 2:46:12 +epoch [36/50] batch [205/500] time 1.375 (1.366) data 0.000 (0.005) loss 0.8999 (1.0403) acc 75.0000 (74.0549) lr 4.6417e-04 eta 2:46:05 +epoch [36/50] batch [210/500] time 1.362 (1.367) data 0.000 (0.005) loss 1.0498 (1.0451) acc 59.3750 (73.8988) lr 4.6417e-04 eta 2:46:02 +epoch [36/50] batch [215/500] time 1.346 (1.366) data 0.000 (0.005) loss 0.6660 (1.0448) acc 75.0000 (73.8953) lr 4.6417e-04 eta 2:45:53 +epoch [36/50] batch [220/500] time 1.349 (1.366) data 0.000 (0.005) loss 1.0635 (1.0443) acc 87.5000 (73.9489) lr 4.6417e-04 eta 2:45:46 +epoch [36/50] batch [225/500] time 1.351 (1.366) data 0.000 (0.005) loss 1.0684 (1.0439) acc 71.8750 (73.9306) lr 4.6417e-04 eta 2:45:39 +epoch [36/50] batch [230/500] time 1.357 (1.366) data 0.000 (0.005) loss 1.1133 (1.0487) acc 75.0000 (73.8995) lr 4.6417e-04 eta 2:45:29 +epoch [36/50] batch [235/500] time 1.352 (1.366) data 0.000 (0.005) loss 1.2168 (1.0462) acc 68.7500 (73.9761) lr 4.6417e-04 eta 2:45:20 +epoch [36/50] batch [240/500] time 1.368 (1.365) data 0.000 (0.005) loss 0.6006 (1.0422) acc 87.5000 (74.0885) lr 4.6417e-04 eta 2:45:12 +epoch [36/50] batch [245/500] time 1.355 (1.366) data 0.000 (0.005) loss 1.6621 (1.0436) acc 59.3750 (74.0306) lr 4.6417e-04 eta 2:45:08 +epoch [36/50] batch [250/500] time 1.491 (1.366) data 0.000 (0.005) loss 1.2246 (1.0499) acc 78.1250 (73.8500) lr 4.6417e-04 eta 2:45:04 +epoch [36/50] batch [255/500] time 1.363 (1.366) data 0.000 (0.004) loss 0.6187 (1.0464) acc 75.0000 (73.9093) lr 4.6417e-04 eta 2:44:57 +epoch [36/50] batch [260/500] time 1.364 (1.366) data 0.000 (0.004) loss 0.8418 (1.0429) acc 81.2500 (73.9784) lr 4.6417e-04 eta 2:44:49 +epoch [36/50] batch [265/500] time 1.357 (1.366) data 0.000 (0.004) loss 1.3584 (1.0465) acc 75.0000 (73.9505) lr 4.6417e-04 eta 2:44:42 +epoch [36/50] batch [270/500] time 1.371 (1.366) data 0.000 (0.004) loss 1.0566 (1.0443) acc 71.8750 (73.9352) lr 4.6417e-04 eta 2:44:35 +epoch [36/50] batch [275/500] time 1.369 (1.366) data 0.000 (0.004) loss 0.7290 (1.0454) acc 84.3750 (73.8977) lr 4.6417e-04 eta 2:44:27 +epoch [36/50] batch [280/500] time 1.383 (1.366) data 0.000 (0.004) loss 0.9155 (1.0461) acc 78.1250 (73.9174) lr 4.6417e-04 eta 2:44:21 +epoch [36/50] batch [285/500] time 1.374 (1.366) data 0.000 (0.004) loss 1.2129 (1.0459) acc 71.8750 (73.9693) lr 4.6417e-04 eta 2:44:14 +epoch [36/50] batch [290/500] time 1.365 (1.366) data 0.000 (0.004) loss 0.2268 (1.0449) acc 96.8750 (73.9871) lr 4.6417e-04 eta 2:44:07 +epoch [36/50] batch [295/500] time 1.369 (1.366) data 0.000 (0.004) loss 1.4756 (1.0458) acc 75.0000 (74.0572) lr 4.6417e-04 eta 2:43:59 +epoch [36/50] batch [300/500] time 1.335 (1.366) data 0.000 (0.004) loss 0.8955 (1.0461) acc 78.1250 (74.0417) lr 4.6417e-04 eta 2:43:52 +epoch [36/50] batch [305/500] time 1.346 (1.365) data 0.000 (0.004) loss 1.4434 (1.0464) acc 65.6250 (73.9857) lr 4.6417e-04 eta 2:43:44 +epoch [36/50] batch [310/500] time 1.373 (1.365) data 0.000 (0.004) loss 2.0195 (1.0492) acc 59.3750 (73.9819) lr 4.6417e-04 eta 2:43:36 +epoch [36/50] batch [315/500] time 1.372 (1.365) data 0.000 (0.004) loss 0.7085 (1.0475) acc 84.3750 (74.0675) lr 4.6417e-04 eta 2:43:30 +epoch [36/50] batch [320/500] time 1.376 (1.365) data 0.000 (0.004) loss 1.1035 (1.0455) acc 78.1250 (74.1016) lr 4.6417e-04 eta 2:43:23 +epoch [36/50] batch [325/500] time 1.335 (1.365) data 0.000 (0.004) loss 0.9009 (1.0457) acc 75.0000 (74.1058) lr 4.6417e-04 eta 2:43:15 +epoch [36/50] batch [330/500] time 1.332 (1.365) data 0.000 (0.004) loss 0.8799 (1.0453) acc 71.8750 (74.0625) lr 4.6417e-04 eta 2:43:06 +epoch [36/50] batch [335/500] time 1.372 (1.365) data 0.000 (0.003) loss 0.9341 (1.0456) acc 78.1250 (74.0858) lr 4.6417e-04 eta 2:42:59 +epoch [36/50] batch [340/500] time 1.376 (1.365) data 0.000 (0.003) loss 0.6221 (1.0454) acc 78.1250 (74.0533) lr 4.6417e-04 eta 2:42:53 +epoch [36/50] batch [345/500] time 1.358 (1.365) data 0.000 (0.003) loss 0.8193 (1.0449) acc 81.2500 (74.0399) lr 4.6417e-04 eta 2:42:45 +epoch [36/50] batch [350/500] time 1.391 (1.365) data 0.000 (0.003) loss 0.8848 (1.0447) acc 71.8750 (74.0179) lr 4.6417e-04 eta 2:42:42 +epoch [36/50] batch [355/500] time 1.388 (1.366) data 0.000 (0.003) loss 1.9629 (1.0472) acc 65.6250 (73.9701) lr 4.6417e-04 eta 2:42:37 +epoch [36/50] batch [360/500] time 1.370 (1.366) data 0.000 (0.003) loss 0.8008 (1.0477) acc 68.7500 (73.8976) lr 4.6417e-04 eta 2:42:30 +epoch [36/50] batch [365/500] time 1.364 (1.366) data 0.000 (0.003) loss 1.1016 (1.0444) acc 71.8750 (73.9983) lr 4.6417e-04 eta 2:42:23 +epoch [36/50] batch [370/500] time 1.390 (1.366) data 0.000 (0.003) loss 0.5435 (1.0435) acc 87.5000 (74.0372) lr 4.6417e-04 eta 2:42:17 +epoch [36/50] batch [375/500] time 1.349 (1.366) data 0.000 (0.003) loss 0.7266 (1.0412) acc 81.2500 (74.0667) lr 4.6417e-04 eta 2:42:10 +epoch [36/50] batch [380/500] time 1.373 (1.366) data 0.001 (0.003) loss 1.0410 (1.0398) acc 71.8750 (74.0954) lr 4.6417e-04 eta 2:42:02 +epoch [36/50] batch [385/500] time 1.397 (1.366) data 0.001 (0.003) loss 0.6587 (1.0386) acc 78.1250 (74.0828) lr 4.6417e-04 eta 2:41:56 +epoch [36/50] batch [390/500] time 1.375 (1.365) data 0.000 (0.003) loss 0.8096 (1.0400) acc 68.7500 (74.0064) lr 4.6417e-04 eta 2:41:48 +epoch [36/50] batch [395/500] time 1.360 (1.366) data 0.000 (0.003) loss 1.0713 (1.0391) acc 59.3750 (74.0190) lr 4.6417e-04 eta 2:41:43 +epoch [36/50] batch [400/500] time 1.361 (1.366) data 0.000 (0.003) loss 1.2842 (1.0420) acc 65.6250 (74.0000) lr 4.6417e-04 eta 2:41:36 +epoch [36/50] batch [405/500] time 1.360 (1.365) data 0.000 (0.003) loss 0.9741 (1.0412) acc 75.0000 (74.0509) lr 4.6417e-04 eta 2:41:27 +epoch [36/50] batch [410/500] time 1.361 (1.365) data 0.000 (0.003) loss 1.9502 (1.0423) acc 59.3750 (74.0244) lr 4.6417e-04 eta 2:41:19 +epoch [36/50] batch [415/500] time 1.358 (1.365) data 0.000 (0.003) loss 1.0498 (1.0420) acc 68.7500 (74.0286) lr 4.6417e-04 eta 2:41:11 +epoch [36/50] batch [420/500] time 1.352 (1.365) data 0.000 (0.003) loss 1.0938 (1.0454) acc 68.7500 (73.9583) lr 4.6417e-04 eta 2:41:03 +epoch [36/50] batch [425/500] time 1.348 (1.365) data 0.000 (0.003) loss 1.1152 (1.0469) acc 78.1250 (73.9191) lr 4.6417e-04 eta 2:40:57 +epoch [36/50] batch [430/500] time 1.367 (1.365) data 0.000 (0.003) loss 2.0020 (1.0512) acc 56.2500 (73.8009) lr 4.6417e-04 eta 2:40:50 +epoch [36/50] batch [435/500] time 1.351 (1.365) data 0.000 (0.003) loss 1.4277 (1.0517) acc 68.7500 (73.8075) lr 4.6417e-04 eta 2:40:42 +epoch [36/50] batch [440/500] time 1.348 (1.365) data 0.000 (0.003) loss 1.1260 (1.0493) acc 68.7500 (73.8068) lr 4.6417e-04 eta 2:40:35 +epoch [36/50] batch [445/500] time 1.360 (1.365) data 0.000 (0.003) loss 0.7246 (1.0490) acc 81.2500 (73.8202) lr 4.6417e-04 eta 2:40:27 +epoch [36/50] batch [450/500] time 1.372 (1.365) data 0.000 (0.003) loss 0.9761 (1.0501) acc 81.2500 (73.8333) lr 4.6417e-04 eta 2:40:20 +epoch [36/50] batch [455/500] time 1.349 (1.365) data 0.000 (0.003) loss 1.4912 (1.0515) acc 68.7500 (73.8187) lr 4.6417e-04 eta 2:40:13 +epoch [36/50] batch [460/500] time 1.383 (1.365) data 0.000 (0.003) loss 1.0264 (1.0495) acc 65.6250 (73.8587) lr 4.6417e-04 eta 2:40:06 +epoch [36/50] batch [465/500] time 1.367 (1.365) data 0.000 (0.003) loss 0.8149 (1.0494) acc 81.2500 (73.8710) lr 4.6417e-04 eta 2:40:00 +epoch [36/50] batch [470/500] time 1.371 (1.365) data 0.000 (0.003) loss 0.9805 (1.0503) acc 59.3750 (73.7766) lr 4.6417e-04 eta 2:39:53 +epoch [36/50] batch [475/500] time 1.358 (1.365) data 0.001 (0.003) loss 0.6436 (1.0483) acc 81.2500 (73.8026) lr 4.6417e-04 eta 2:39:46 +epoch [36/50] batch [480/500] time 1.350 (1.365) data 0.000 (0.003) loss 1.2246 (1.0501) acc 68.7500 (73.7826) lr 4.6417e-04 eta 2:39:39 +epoch [36/50] batch [485/500] time 1.379 (1.365) data 0.001 (0.003) loss 1.0146 (1.0512) acc 68.7500 (73.7629) lr 4.6417e-04 eta 2:39:32 +epoch [36/50] batch [490/500] time 1.377 (1.365) data 0.000 (0.003) loss 1.1611 (1.0512) acc 68.7500 (73.7500) lr 4.6417e-04 eta 2:39:25 +epoch [36/50] batch [495/500] time 1.338 (1.365) data 0.000 (0.002) loss 1.0547 (1.0523) acc 75.0000 (73.7247) lr 4.6417e-04 eta 2:39:20 +epoch [36/50] batch [500/500] time 1.358 (1.365) data 0.000 (0.002) loss 1.3438 (1.0526) acc 68.7500 (73.7125) lr 4.1221e-04 eta 2:39:13 +epoch [37/50] batch [5/500] time 1.373 (1.524) data 0.001 (0.155) loss 1.1191 (0.9126) acc 78.1250 (77.5000) lr 4.1221e-04 eta 2:57:37 +epoch [37/50] batch [10/500] time 1.362 (1.442) data 0.001 (0.078) loss 1.5635 (1.0203) acc 62.5000 (74.0625) lr 4.1221e-04 eta 2:47:57 +epoch [37/50] batch [15/500] time 1.368 (1.415) data 0.000 (0.052) loss 0.9106 (0.9857) acc 81.2500 (75.4167) lr 4.1221e-04 eta 2:44:45 +epoch [37/50] batch [20/500] time 1.351 (1.401) data 0.000 (0.039) loss 0.9956 (0.9884) acc 75.0000 (73.9062) lr 4.1221e-04 eta 2:42:58 +epoch [37/50] batch [25/500] time 1.357 (1.393) data 0.000 (0.031) loss 1.1172 (1.0047) acc 71.8750 (73.3750) lr 4.1221e-04 eta 2:41:58 +epoch [37/50] batch [30/500] time 1.345 (1.388) data 0.000 (0.026) loss 1.4834 (1.0497) acc 62.5000 (72.6042) lr 4.1221e-04 eta 2:41:11 +epoch [37/50] batch [35/500] time 1.350 (1.381) data 0.000 (0.023) loss 1.0400 (1.0576) acc 71.8750 (72.3214) lr 4.1221e-04 eta 2:40:21 +epoch [37/50] batch [40/500] time 1.359 (1.379) data 0.000 (0.020) loss 0.7402 (1.0650) acc 81.2500 (72.1094) lr 4.1221e-04 eta 2:39:58 +epoch [37/50] batch [45/500] time 1.368 (1.377) data 0.000 (0.018) loss 0.6143 (1.0382) acc 78.1250 (72.5694) lr 4.1221e-04 eta 2:39:34 +epoch [37/50] batch [50/500] time 1.352 (1.378) data 0.000 (0.016) loss 0.9468 (1.0194) acc 71.8750 (72.7500) lr 4.1221e-04 eta 2:39:33 +epoch [37/50] batch [55/500] time 1.357 (1.375) data 0.000 (0.014) loss 1.3574 (1.0271) acc 68.7500 (72.7273) lr 4.1221e-04 eta 2:39:11 +epoch [37/50] batch [60/500] time 1.347 (1.373) data 0.000 (0.013) loss 1.1260 (1.0286) acc 68.7500 (72.8125) lr 4.1221e-04 eta 2:38:49 +epoch [37/50] batch [65/500] time 1.343 (1.371) data 0.000 (0.012) loss 0.8140 (1.0274) acc 78.1250 (72.7885) lr 4.1221e-04 eta 2:38:29 +epoch [37/50] batch [70/500] time 1.349 (1.370) data 0.000 (0.011) loss 0.6401 (1.0184) acc 81.2500 (73.1250) lr 4.1221e-04 eta 2:38:15 +epoch [37/50] batch [75/500] time 1.356 (1.370) data 0.000 (0.011) loss 0.9014 (1.0189) acc 65.6250 (73.1250) lr 4.1221e-04 eta 2:38:04 +epoch [37/50] batch [80/500] time 1.354 (1.368) data 0.000 (0.010) loss 0.8491 (1.0284) acc 78.1250 (72.8906) lr 4.1221e-04 eta 2:37:49 +epoch [37/50] batch [85/500] time 1.353 (1.367) data 0.000 (0.010) loss 0.8779 (1.0262) acc 78.1250 (73.1618) lr 4.1221e-04 eta 2:37:34 +epoch [37/50] batch [90/500] time 1.358 (1.366) data 0.000 (0.009) loss 0.7695 (1.0258) acc 75.0000 (73.1944) lr 4.1221e-04 eta 2:37:21 +epoch [37/50] batch [95/500] time 1.371 (1.367) data 0.000 (0.009) loss 1.3623 (1.0220) acc 71.8750 (73.4868) lr 4.1221e-04 eta 2:37:18 +epoch [37/50] batch [100/500] time 1.373 (1.367) data 0.000 (0.008) loss 1.3057 (1.0315) acc 65.6250 (73.2812) lr 4.1221e-04 eta 2:37:09 +epoch [37/50] batch [105/500] time 1.358 (1.366) data 0.000 (0.008) loss 0.8022 (1.0291) acc 78.1250 (73.3333) lr 4.1221e-04 eta 2:36:59 +epoch [37/50] batch [110/500] time 1.357 (1.366) data 0.000 (0.007) loss 1.1816 (1.0159) acc 62.5000 (73.6932) lr 4.1221e-04 eta 2:36:50 +epoch [37/50] batch [115/500] time 1.358 (1.365) data 0.000 (0.007) loss 1.0986 (1.0248) acc 81.2500 (73.7228) lr 4.1221e-04 eta 2:36:40 +epoch [37/50] batch [120/500] time 1.351 (1.365) data 0.000 (0.007) loss 0.7603 (1.0208) acc 90.6250 (73.9062) lr 4.1221e-04 eta 2:36:29 +epoch [37/50] batch [125/500] time 1.347 (1.365) data 0.000 (0.007) loss 0.6196 (1.0211) acc 81.2500 (73.7250) lr 4.1221e-04 eta 2:36:21 +epoch [37/50] batch [130/500] time 1.345 (1.364) data 0.000 (0.006) loss 1.0615 (1.0179) acc 75.0000 (73.8221) lr 4.1221e-04 eta 2:36:12 +epoch [37/50] batch [135/500] time 1.362 (1.364) data 0.000 (0.006) loss 1.7490 (1.0152) acc 50.0000 (73.8889) lr 4.1221e-04 eta 2:36:02 +epoch [37/50] batch [140/500] time 1.368 (1.364) data 0.000 (0.006) loss 1.0840 (1.0189) acc 68.7500 (73.7946) lr 4.1221e-04 eta 2:35:56 +epoch [37/50] batch [145/500] time 1.361 (1.364) data 0.001 (0.006) loss 1.2969 (1.0184) acc 62.5000 (73.7500) lr 4.1221e-04 eta 2:35:49 +epoch [37/50] batch [150/500] time 1.347 (1.364) data 0.000 (0.006) loss 0.8213 (1.0170) acc 78.1250 (73.7708) lr 4.1221e-04 eta 2:35:41 +epoch [37/50] batch [155/500] time 1.364 (1.364) data 0.000 (0.005) loss 1.0820 (1.0150) acc 75.0000 (73.7500) lr 4.1221e-04 eta 2:35:33 +epoch [37/50] batch [160/500] time 1.374 (1.363) data 0.000 (0.005) loss 0.9766 (1.0207) acc 75.0000 (73.6523) lr 4.1221e-04 eta 2:35:25 +epoch [37/50] batch [165/500] time 1.354 (1.363) data 0.000 (0.005) loss 1.4424 (1.0222) acc 59.3750 (73.6364) lr 4.1221e-04 eta 2:35:17 +epoch [37/50] batch [170/500] time 1.340 (1.363) data 0.000 (0.005) loss 0.8618 (1.0230) acc 81.2500 (73.7316) lr 4.1221e-04 eta 2:35:10 +epoch [37/50] batch [175/500] time 1.351 (1.363) data 0.000 (0.005) loss 1.6221 (1.0267) acc 71.8750 (73.7321) lr 4.1221e-04 eta 2:35:02 +epoch [37/50] batch [180/500] time 1.359 (1.363) data 0.001 (0.005) loss 1.5049 (1.0209) acc 62.5000 (73.8368) lr 4.1221e-04 eta 2:34:54 +epoch [37/50] batch [185/500] time 1.333 (1.362) data 0.000 (0.005) loss 1.4482 (1.0233) acc 62.5000 (73.8851) lr 4.1221e-04 eta 2:34:45 +epoch [37/50] batch [190/500] time 1.353 (1.362) data 0.000 (0.004) loss 1.0674 (1.0261) acc 68.7500 (73.7829) lr 4.1221e-04 eta 2:34:37 +epoch [37/50] batch [195/500] time 1.363 (1.363) data 0.000 (0.004) loss 0.5962 (1.0225) acc 78.1250 (73.8462) lr 4.1221e-04 eta 2:34:34 +epoch [37/50] batch [200/500] time 1.377 (1.363) data 0.000 (0.004) loss 1.1162 (1.0214) acc 71.8750 (73.8906) lr 4.1221e-04 eta 2:34:26 +epoch [37/50] batch [205/500] time 1.372 (1.363) data 0.000 (0.004) loss 1.4248 (1.0266) acc 56.2500 (73.7348) lr 4.1221e-04 eta 2:34:19 +epoch [37/50] batch [210/500] time 1.371 (1.363) data 0.000 (0.004) loss 1.9199 (1.0327) acc 56.2500 (73.5863) lr 4.1221e-04 eta 2:34:13 +epoch [37/50] batch [215/500] time 1.359 (1.363) data 0.000 (0.004) loss 1.5547 (1.0396) acc 68.7500 (73.5320) lr 4.1221e-04 eta 2:34:05 +epoch [37/50] batch [220/500] time 1.352 (1.363) data 0.000 (0.004) loss 1.6475 (1.0381) acc 68.7500 (73.6364) lr 4.1221e-04 eta 2:33:58 +epoch [37/50] batch [225/500] time 1.354 (1.362) data 0.000 (0.004) loss 1.1982 (1.0354) acc 71.8750 (73.7222) lr 4.1221e-04 eta 2:33:49 +epoch [37/50] batch [230/500] time 1.370 (1.362) data 0.000 (0.004) loss 1.7725 (1.0390) acc 68.7500 (73.6957) lr 4.1221e-04 eta 2:33:42 +epoch [37/50] batch [235/500] time 1.494 (1.363) data 0.000 (0.004) loss 1.3506 (1.0411) acc 71.8750 (73.6702) lr 4.1221e-04 eta 2:33:38 +epoch [37/50] batch [240/500] time 1.360 (1.363) data 0.000 (0.004) loss 1.4043 (1.0408) acc 68.7500 (73.7109) lr 4.1221e-04 eta 2:33:32 +epoch [37/50] batch [245/500] time 1.334 (1.362) data 0.000 (0.004) loss 1.6992 (1.0438) acc 59.3750 (73.6480) lr 4.1221e-04 eta 2:33:23 +epoch [37/50] batch [250/500] time 1.369 (1.363) data 0.000 (0.003) loss 1.1436 (1.0427) acc 78.1250 (73.7125) lr 4.1221e-04 eta 2:33:16 +epoch [37/50] batch [255/500] time 1.365 (1.363) data 0.001 (0.003) loss 1.1484 (1.0463) acc 65.6250 (73.6152) lr 4.1221e-04 eta 2:33:10 +epoch [37/50] batch [260/500] time 1.355 (1.363) data 0.000 (0.003) loss 0.9336 (1.0507) acc 75.0000 (73.5216) lr 4.1221e-04 eta 2:33:03 +epoch [37/50] batch [265/500] time 1.357 (1.362) data 0.000 (0.003) loss 1.4004 (1.0497) acc 68.7500 (73.5613) lr 4.1221e-04 eta 2:32:56 +epoch [37/50] batch [270/500] time 1.344 (1.362) data 0.001 (0.003) loss 0.6445 (1.0526) acc 90.6250 (73.5995) lr 4.1221e-04 eta 2:32:48 +epoch [37/50] batch [275/500] time 1.362 (1.362) data 0.000 (0.003) loss 0.5181 (1.0518) acc 90.6250 (73.5909) lr 4.1221e-04 eta 2:32:40 +epoch [37/50] batch [280/500] time 1.349 (1.362) data 0.000 (0.003) loss 1.3818 (1.0541) acc 75.0000 (73.6272) lr 4.1221e-04 eta 2:32:35 +epoch [37/50] batch [285/500] time 1.388 (1.363) data 0.000 (0.003) loss 0.8198 (1.0559) acc 75.0000 (73.6294) lr 4.1221e-04 eta 2:32:29 +epoch [37/50] batch [290/500] time 1.366 (1.363) data 0.000 (0.003) loss 0.7158 (1.0540) acc 84.3750 (73.6853) lr 4.1221e-04 eta 2:32:22 +epoch [37/50] batch [295/500] time 1.372 (1.363) data 0.001 (0.003) loss 0.7383 (1.0544) acc 81.2500 (73.6441) lr 4.1221e-04 eta 2:32:16 +epoch [37/50] batch [300/500] time 1.362 (1.363) data 0.000 (0.003) loss 1.0576 (1.0559) acc 65.6250 (73.6562) lr 4.1221e-04 eta 2:32:09 +epoch [37/50] batch [305/500] time 1.354 (1.363) data 0.000 (0.003) loss 1.0342 (1.0563) acc 75.0000 (73.6783) lr 4.1221e-04 eta 2:32:03 +epoch [37/50] batch [310/500] time 1.358 (1.363) data 0.000 (0.003) loss 1.1279 (1.0542) acc 81.2500 (73.7399) lr 4.1221e-04 eta 2:31:56 +epoch [37/50] batch [315/500] time 1.360 (1.363) data 0.001 (0.003) loss 1.0400 (1.0526) acc 68.7500 (73.7401) lr 4.1221e-04 eta 2:31:50 +epoch [37/50] batch [320/500] time 1.374 (1.363) data 0.000 (0.003) loss 1.2725 (1.0510) acc 68.7500 (73.7598) lr 4.1221e-04 eta 2:31:44 +epoch [37/50] batch [325/500] time 1.378 (1.363) data 0.001 (0.003) loss 1.6279 (1.0530) acc 65.6250 (73.7308) lr 4.1221e-04 eta 2:31:38 +epoch [37/50] batch [330/500] time 1.378 (1.363) data 0.000 (0.003) loss 0.7505 (1.0517) acc 78.1250 (73.6742) lr 4.1221e-04 eta 2:31:33 +epoch [37/50] batch [335/500] time 1.367 (1.364) data 0.000 (0.003) loss 1.2900 (1.0509) acc 65.6250 (73.6101) lr 4.1221e-04 eta 2:31:30 +epoch [37/50] batch [340/500] time 1.376 (1.364) data 0.000 (0.003) loss 0.5859 (1.0492) acc 78.1250 (73.6121) lr 4.1221e-04 eta 2:31:25 +epoch [37/50] batch [345/500] time 1.384 (1.364) data 0.000 (0.003) loss 1.2520 (1.0507) acc 71.8750 (73.5960) lr 4.1221e-04 eta 2:31:18 +epoch [37/50] batch [350/500] time 1.356 (1.364) data 0.000 (0.003) loss 1.7373 (1.0520) acc 53.1250 (73.5446) lr 4.1221e-04 eta 2:31:11 +epoch [37/50] batch [355/500] time 1.372 (1.364) data 0.001 (0.003) loss 0.9878 (1.0534) acc 68.7500 (73.4595) lr 4.1221e-04 eta 2:31:04 +epoch [37/50] batch [360/500] time 1.342 (1.364) data 0.001 (0.003) loss 1.1729 (1.0539) acc 65.6250 (73.4462) lr 4.1221e-04 eta 2:30:56 +epoch [37/50] batch [365/500] time 1.334 (1.364) data 0.000 (0.003) loss 1.2578 (1.0558) acc 78.1250 (73.4418) lr 4.1221e-04 eta 2:30:49 +epoch [37/50] batch [370/500] time 1.384 (1.364) data 0.000 (0.003) loss 1.2959 (1.0540) acc 68.7500 (73.4797) lr 4.1221e-04 eta 2:30:42 +epoch [37/50] batch [375/500] time 1.353 (1.364) data 0.000 (0.002) loss 1.4277 (1.0569) acc 65.6250 (73.4667) lr 4.1221e-04 eta 2:30:35 +epoch [37/50] batch [380/500] time 1.359 (1.364) data 0.000 (0.002) loss 1.2852 (1.0593) acc 62.5000 (73.3799) lr 4.1221e-04 eta 2:30:30 +epoch [37/50] batch [385/500] time 1.362 (1.364) data 0.000 (0.002) loss 1.2383 (1.0592) acc 71.8750 (73.4010) lr 4.1221e-04 eta 2:30:22 +epoch [37/50] batch [390/500] time 1.339 (1.364) data 0.000 (0.002) loss 1.4805 (1.0590) acc 65.6250 (73.4215) lr 4.1221e-04 eta 2:30:15 +epoch [37/50] batch [395/500] time 1.358 (1.364) data 0.001 (0.002) loss 0.9326 (1.0565) acc 78.1250 (73.4968) lr 4.1221e-04 eta 2:30:07 +epoch [37/50] batch [400/500] time 1.369 (1.364) data 0.000 (0.002) loss 0.7754 (1.0563) acc 78.1250 (73.4531) lr 4.1221e-04 eta 2:30:01 +epoch [37/50] batch [405/500] time 1.373 (1.364) data 0.001 (0.002) loss 1.0215 (1.0586) acc 65.6250 (73.4105) lr 4.1221e-04 eta 2:29:54 +epoch [37/50] batch [410/500] time 1.359 (1.364) data 0.000 (0.002) loss 0.6821 (1.0571) acc 78.1250 (73.4527) lr 4.1221e-04 eta 2:29:47 +epoch [37/50] batch [415/500] time 1.371 (1.364) data 0.000 (0.002) loss 0.7925 (1.0547) acc 81.2500 (73.5090) lr 4.1221e-04 eta 2:29:41 +epoch [37/50] batch [420/500] time 1.358 (1.364) data 0.000 (0.002) loss 1.4619 (1.0553) acc 71.8750 (73.4747) lr 4.1221e-04 eta 2:29:34 +epoch [37/50] batch [425/500] time 1.363 (1.364) data 0.000 (0.002) loss 1.2578 (1.0575) acc 71.8750 (73.4485) lr 4.1221e-04 eta 2:29:27 +epoch [37/50] batch [430/500] time 1.342 (1.364) data 0.000 (0.002) loss 1.3467 (1.0583) acc 71.8750 (73.4012) lr 4.1221e-04 eta 2:29:18 +epoch [37/50] batch [435/500] time 1.360 (1.364) data 0.000 (0.002) loss 0.8838 (1.0588) acc 78.1250 (73.3836) lr 4.1221e-04 eta 2:29:11 +epoch [37/50] batch [440/500] time 1.344 (1.363) data 0.000 (0.002) loss 1.3232 (1.0587) acc 62.5000 (73.3594) lr 4.1221e-04 eta 2:29:04 +epoch [37/50] batch [445/500] time 1.369 (1.363) data 0.000 (0.002) loss 1.5156 (1.0575) acc 59.3750 (73.3708) lr 4.1221e-04 eta 2:28:57 +epoch [37/50] batch [450/500] time 1.363 (1.364) data 0.000 (0.002) loss 0.7524 (1.0566) acc 87.5000 (73.3681) lr 4.1221e-04 eta 2:28:51 +epoch [37/50] batch [455/500] time 1.345 (1.363) data 0.000 (0.002) loss 0.8569 (1.0554) acc 75.0000 (73.3997) lr 4.1221e-04 eta 2:28:43 +epoch [37/50] batch [460/500] time 1.386 (1.364) data 0.000 (0.002) loss 1.2949 (1.0567) acc 75.0000 (73.3967) lr 4.1221e-04 eta 2:28:37 +epoch [37/50] batch [465/500] time 1.364 (1.364) data 0.000 (0.002) loss 1.0703 (1.0567) acc 71.8750 (73.3333) lr 4.1221e-04 eta 2:28:30 +epoch [37/50] batch [470/500] time 1.373 (1.363) data 0.000 (0.002) loss 1.3682 (1.0592) acc 65.6250 (73.2912) lr 4.1221e-04 eta 2:28:23 +epoch [37/50] batch [475/500] time 1.350 (1.363) data 0.000 (0.002) loss 1.6689 (1.0606) acc 59.3750 (73.2961) lr 4.1221e-04 eta 2:28:16 +epoch [37/50] batch [480/500] time 1.347 (1.364) data 0.000 (0.002) loss 1.2139 (1.0606) acc 71.8750 (73.3073) lr 4.1221e-04 eta 2:28:10 +epoch [37/50] batch [485/500] time 1.359 (1.364) data 0.001 (0.002) loss 1.4717 (1.0633) acc 59.3750 (73.2474) lr 4.1221e-04 eta 2:28:03 +epoch [37/50] batch [490/500] time 1.365 (1.363) data 0.000 (0.002) loss 0.5972 (1.0632) acc 90.6250 (73.2334) lr 4.1221e-04 eta 2:27:55 +epoch [37/50] batch [495/500] time 1.358 (1.363) data 0.000 (0.002) loss 0.8257 (1.0627) acc 75.0000 (73.2449) lr 4.1221e-04 eta 2:27:48 +epoch [37/50] batch [500/500] time 1.368 (1.363) data 0.000 (0.002) loss 1.4131 (1.0620) acc 75.0000 (73.3000) lr 3.6258e-04 eta 2:27:42 +epoch [38/50] batch [5/500] time 1.376 (1.521) data 0.000 (0.151) loss 1.8984 (1.1250) acc 65.6250 (74.3750) lr 3.6258e-04 eta 2:44:38 +epoch [38/50] batch [10/500] time 1.350 (1.439) data 0.000 (0.076) loss 0.9062 (0.9882) acc 81.2500 (77.1875) lr 3.6258e-04 eta 2:35:39 +epoch [38/50] batch [15/500] time 1.371 (1.414) data 0.000 (0.051) loss 0.9512 (0.9800) acc 84.3750 (77.7083) lr 3.6258e-04 eta 2:32:47 +epoch [38/50] batch [20/500] time 1.337 (1.400) data 0.000 (0.038) loss 1.0312 (0.9787) acc 78.1250 (77.1875) lr 3.6258e-04 eta 2:31:09 +epoch [38/50] batch [25/500] time 1.387 (1.392) data 0.000 (0.031) loss 0.9019 (0.9825) acc 71.8750 (76.7500) lr 3.6258e-04 eta 2:30:15 +epoch [38/50] batch [30/500] time 1.364 (1.393) data 0.000 (0.026) loss 0.4424 (0.9867) acc 84.3750 (76.8750) lr 3.6258e-04 eta 2:30:14 +epoch [38/50] batch [35/500] time 1.359 (1.388) data 0.000 (0.022) loss 0.9873 (1.0073) acc 75.0000 (76.0714) lr 3.6258e-04 eta 2:29:35 +epoch [38/50] batch [40/500] time 1.330 (1.383) data 0.000 (0.019) loss 1.2344 (1.0255) acc 71.8750 (75.5469) lr 3.6258e-04 eta 2:28:52 +epoch [38/50] batch [45/500] time 1.366 (1.381) data 0.000 (0.017) loss 1.2432 (1.0400) acc 68.7500 (75.1389) lr 3.6258e-04 eta 2:28:33 +epoch [38/50] batch [50/500] time 1.370 (1.378) data 0.000 (0.015) loss 0.6577 (1.0334) acc 84.3750 (75.0000) lr 3.6258e-04 eta 2:28:07 +epoch [38/50] batch [55/500] time 1.358 (1.377) data 0.000 (0.014) loss 0.7417 (1.0222) acc 81.2500 (75.1136) lr 3.6258e-04 eta 2:27:52 +epoch [38/50] batch [60/500] time 1.365 (1.375) data 0.000 (0.013) loss 0.8594 (1.0277) acc 75.0000 (74.9479) lr 3.6258e-04 eta 2:27:36 +epoch [38/50] batch [65/500] time 1.353 (1.374) data 0.000 (0.012) loss 0.9502 (1.0198) acc 78.1250 (75.0962) lr 3.6258e-04 eta 2:27:21 +epoch [38/50] batch [70/500] time 1.347 (1.373) data 0.000 (0.011) loss 1.2432 (1.0105) acc 75.0000 (75.2232) lr 3.6258e-04 eta 2:27:09 +epoch [38/50] batch [75/500] time 1.359 (1.374) data 0.000 (0.010) loss 1.3740 (1.0295) acc 75.0000 (74.7500) lr 3.6258e-04 eta 2:27:05 +epoch [38/50] batch [80/500] time 1.349 (1.373) data 0.000 (0.010) loss 1.0908 (1.0299) acc 75.0000 (74.6875) lr 3.6258e-04 eta 2:26:52 +epoch [38/50] batch [85/500] time 1.352 (1.372) data 0.000 (0.009) loss 1.6377 (1.0392) acc 68.7500 (74.7794) lr 3.6258e-04 eta 2:26:40 +epoch [38/50] batch [90/500] time 1.362 (1.371) data 0.000 (0.009) loss 1.2559 (1.0363) acc 65.6250 (74.9653) lr 3.6258e-04 eta 2:26:30 +epoch [38/50] batch [95/500] time 1.343 (1.371) data 0.000 (0.008) loss 0.6846 (1.0399) acc 78.1250 (74.7039) lr 3.6258e-04 eta 2:26:19 +epoch [38/50] batch [100/500] time 1.373 (1.370) data 0.000 (0.008) loss 1.2139 (1.0440) acc 71.8750 (74.6562) lr 3.6258e-04 eta 2:26:10 +epoch [38/50] batch [105/500] time 1.400 (1.370) data 0.000 (0.008) loss 1.3740 (1.0465) acc 53.1250 (74.4048) lr 3.6258e-04 eta 2:26:03 +epoch [38/50] batch [110/500] time 1.369 (1.371) data 0.000 (0.007) loss 0.7886 (1.0376) acc 84.3750 (74.5455) lr 3.6258e-04 eta 2:25:58 +epoch [38/50] batch [115/500] time 1.358 (1.370) data 0.000 (0.007) loss 1.6826 (1.0375) acc 59.3750 (74.5109) lr 3.6258e-04 eta 2:25:47 +epoch [38/50] batch [120/500] time 1.351 (1.370) data 0.000 (0.007) loss 1.2656 (1.0448) acc 68.7500 (74.4271) lr 3.6258e-04 eta 2:25:37 +epoch [38/50] batch [125/500] time 1.375 (1.369) data 0.000 (0.006) loss 1.1445 (1.0509) acc 59.3750 (74.1750) lr 3.6258e-04 eta 2:25:30 +epoch [38/50] batch [130/500] time 1.363 (1.369) data 0.000 (0.006) loss 0.6685 (1.0501) acc 78.1250 (74.1827) lr 3.6258e-04 eta 2:25:20 +epoch [38/50] batch [135/500] time 1.356 (1.368) data 0.000 (0.006) loss 0.9331 (1.0422) acc 75.0000 (74.3287) lr 3.6258e-04 eta 2:25:10 +epoch [38/50] batch [140/500] time 1.372 (1.368) data 0.000 (0.006) loss 0.9287 (1.0439) acc 81.2500 (74.2411) lr 3.6258e-04 eta 2:25:01 +epoch [38/50] batch [145/500] time 1.368 (1.368) data 0.000 (0.006) loss 1.4209 (1.0480) acc 71.8750 (74.1164) lr 3.6258e-04 eta 2:24:52 +epoch [38/50] batch [150/500] time 1.350 (1.368) data 0.000 (0.005) loss 0.9629 (1.0488) acc 71.8750 (74.0417) lr 3.6258e-04 eta 2:24:44 +epoch [38/50] batch [155/500] time 1.342 (1.367) data 0.000 (0.005) loss 1.5664 (1.0532) acc 59.3750 (74.0121) lr 3.6258e-04 eta 2:24:36 +epoch [38/50] batch [160/500] time 1.364 (1.367) data 0.000 (0.005) loss 1.0273 (1.0557) acc 68.7500 (74.0625) lr 3.6258e-04 eta 2:24:28 +epoch [38/50] batch [165/500] time 1.350 (1.367) data 0.000 (0.005) loss 2.4512 (1.0625) acc 56.2500 (73.9773) lr 3.6258e-04 eta 2:24:21 +epoch [38/50] batch [170/500] time 1.373 (1.367) data 0.000 (0.005) loss 1.0244 (1.0633) acc 75.0000 (73.8787) lr 3.6258e-04 eta 2:24:15 +epoch [38/50] batch [175/500] time 1.356 (1.368) data 0.001 (0.005) loss 1.5557 (1.0655) acc 56.2500 (73.6964) lr 3.6258e-04 eta 2:24:10 +epoch [38/50] batch [180/500] time 1.373 (1.368) data 0.000 (0.005) loss 0.7759 (1.0600) acc 75.0000 (73.6806) lr 3.6258e-04 eta 2:24:04 +epoch [38/50] batch [185/500] time 1.363 (1.368) data 0.000 (0.004) loss 1.9854 (1.0678) acc 65.6250 (73.5473) lr 3.6258e-04 eta 2:23:57 +epoch [38/50] batch [190/500] time 1.347 (1.367) data 0.000 (0.004) loss 1.2061 (1.0718) acc 59.3750 (73.3553) lr 3.6258e-04 eta 2:23:47 +epoch [38/50] batch [195/500] time 1.353 (1.367) data 0.000 (0.004) loss 1.2949 (1.0673) acc 62.5000 (73.3974) lr 3.6258e-04 eta 2:23:36 +epoch [38/50] batch [200/500] time 1.346 (1.366) data 0.000 (0.004) loss 0.8774 (1.0626) acc 84.3750 (73.5781) lr 3.6258e-04 eta 2:23:26 +epoch [38/50] batch [205/500] time 1.366 (1.366) data 0.000 (0.004) loss 0.7329 (1.0600) acc 78.1250 (73.6128) lr 3.6258e-04 eta 2:23:18 +epoch [38/50] batch [210/500] time 1.381 (1.366) data 0.000 (0.004) loss 0.7358 (1.0532) acc 87.5000 (73.7649) lr 3.6258e-04 eta 2:23:11 +epoch [38/50] batch [215/500] time 1.366 (1.366) data 0.000 (0.004) loss 0.5400 (1.0507) acc 90.6250 (73.8081) lr 3.6258e-04 eta 2:23:03 +epoch [38/50] batch [220/500] time 1.369 (1.366) data 0.000 (0.004) loss 0.9937 (1.0500) acc 78.1250 (73.8068) lr 3.6258e-04 eta 2:22:59 +epoch [38/50] batch [225/500] time 1.364 (1.366) data 0.000 (0.004) loss 2.1035 (1.0555) acc 68.7500 (73.7917) lr 3.6258e-04 eta 2:22:51 +epoch [38/50] batch [230/500] time 1.380 (1.366) data 0.000 (0.004) loss 0.7012 (1.0511) acc 65.6250 (73.8451) lr 3.6258e-04 eta 2:22:43 +epoch [38/50] batch [235/500] time 1.345 (1.366) data 0.000 (0.004) loss 1.6543 (1.0543) acc 50.0000 (73.7766) lr 3.6258e-04 eta 2:22:36 +epoch [38/50] batch [240/500] time 1.390 (1.366) data 0.000 (0.004) loss 1.5293 (1.0573) acc 62.5000 (73.6589) lr 3.6258e-04 eta 2:22:29 +epoch [38/50] batch [245/500] time 1.360 (1.366) data 0.000 (0.003) loss 0.8506 (1.0564) acc 84.3750 (73.7245) lr 3.6258e-04 eta 2:22:22 +epoch [38/50] batch [250/500] time 1.362 (1.366) data 0.000 (0.003) loss 0.7354 (1.0574) acc 87.5000 (73.7375) lr 3.6258e-04 eta 2:22:15 +epoch [38/50] batch [255/500] time 1.366 (1.366) data 0.000 (0.003) loss 1.1221 (1.0570) acc 62.5000 (73.6642) lr 3.6258e-04 eta 2:22:08 +epoch [38/50] batch [260/500] time 1.346 (1.365) data 0.000 (0.003) loss 0.9497 (1.0563) acc 68.7500 (73.5577) lr 3.6258e-04 eta 2:22:00 +epoch [38/50] batch [265/500] time 1.369 (1.365) data 0.000 (0.003) loss 1.0098 (1.0576) acc 75.0000 (73.5142) lr 3.6258e-04 eta 2:21:52 +epoch [38/50] batch [270/500] time 1.362 (1.365) data 0.000 (0.003) loss 0.9507 (1.0580) acc 75.0000 (73.4954) lr 3.6258e-04 eta 2:21:45 +epoch [38/50] batch [275/500] time 1.347 (1.365) data 0.000 (0.003) loss 1.4912 (1.0589) acc 62.5000 (73.4545) lr 3.6258e-04 eta 2:21:39 +epoch [38/50] batch [280/500] time 1.350 (1.365) data 0.000 (0.003) loss 0.5811 (1.0587) acc 81.2500 (73.4040) lr 3.6258e-04 eta 2:21:31 +epoch [38/50] batch [285/500] time 1.334 (1.365) data 0.000 (0.003) loss 1.2588 (1.0616) acc 71.8750 (73.3224) lr 3.6258e-04 eta 2:21:22 +epoch [38/50] batch [290/500] time 1.357 (1.365) data 0.000 (0.003) loss 0.5903 (1.0610) acc 81.2500 (73.3728) lr 3.6258e-04 eta 2:21:13 +epoch [38/50] batch [295/500] time 1.348 (1.364) data 0.000 (0.003) loss 1.0000 (1.0613) acc 71.8750 (73.2627) lr 3.6258e-04 eta 2:21:06 +epoch [38/50] batch [300/500] time 1.367 (1.364) data 0.000 (0.003) loss 1.1426 (1.0635) acc 65.6250 (73.1875) lr 3.6258e-04 eta 2:20:59 +epoch [38/50] batch [305/500] time 1.340 (1.364) data 0.000 (0.003) loss 0.7837 (1.0596) acc 81.2500 (73.2480) lr 3.6258e-04 eta 2:20:51 +epoch [38/50] batch [310/500] time 1.360 (1.364) data 0.000 (0.003) loss 1.2363 (1.0566) acc 59.3750 (73.2560) lr 3.6258e-04 eta 2:20:44 +epoch [38/50] batch [315/500] time 1.487 (1.364) data 0.000 (0.003) loss 1.0713 (1.0577) acc 78.1250 (73.2639) lr 3.6258e-04 eta 2:20:39 +epoch [38/50] batch [320/500] time 1.355 (1.364) data 0.000 (0.003) loss 1.2207 (1.0588) acc 75.0000 (73.2324) lr 3.6258e-04 eta 2:20:31 +epoch [38/50] batch [325/500] time 1.337 (1.364) data 0.000 (0.003) loss 0.9429 (1.0607) acc 71.8750 (73.1827) lr 3.6258e-04 eta 2:20:24 +epoch [38/50] batch [330/500] time 1.367 (1.364) data 0.000 (0.003) loss 0.8599 (1.0628) acc 75.0000 (73.1534) lr 3.6258e-04 eta 2:20:16 +epoch [38/50] batch [335/500] time 1.339 (1.364) data 0.000 (0.003) loss 1.0537 (1.0611) acc 68.7500 (73.1530) lr 3.6258e-04 eta 2:20:07 +epoch [38/50] batch [340/500] time 1.367 (1.364) data 0.001 (0.003) loss 0.9834 (1.0621) acc 78.1250 (73.1250) lr 3.6258e-04 eta 2:20:01 +epoch [38/50] batch [345/500] time 1.366 (1.364) data 0.000 (0.003) loss 1.1318 (1.0612) acc 78.1250 (73.1884) lr 3.6258e-04 eta 2:19:53 +epoch [38/50] batch [350/500] time 1.355 (1.364) data 0.000 (0.003) loss 0.9985 (1.0623) acc 78.1250 (73.1786) lr 3.6258e-04 eta 2:19:46 +epoch [38/50] batch [355/500] time 1.359 (1.364) data 0.000 (0.002) loss 0.6772 (1.0601) acc 71.8750 (73.1690) lr 3.6258e-04 eta 2:19:39 +epoch [38/50] batch [360/500] time 1.386 (1.364) data 0.000 (0.002) loss 1.1504 (1.0608) acc 71.8750 (73.1424) lr 3.6258e-04 eta 2:19:35 +epoch [38/50] batch [365/500] time 1.356 (1.364) data 0.000 (0.002) loss 0.9800 (1.0594) acc 65.6250 (73.1421) lr 3.6258e-04 eta 2:19:27 +epoch [38/50] batch [370/500] time 1.340 (1.364) data 0.000 (0.002) loss 0.9150 (1.0598) acc 68.7500 (73.0912) lr 3.6258e-04 eta 2:19:20 +epoch [38/50] batch [375/500] time 1.357 (1.364) data 0.000 (0.002) loss 1.3555 (1.0615) acc 65.6250 (73.0417) lr 3.6258e-04 eta 2:19:12 +epoch [38/50] batch [380/500] time 1.359 (1.364) data 0.000 (0.002) loss 0.7119 (1.0629) acc 75.0000 (73.0016) lr 3.6258e-04 eta 2:19:05 +epoch [38/50] batch [385/500] time 1.365 (1.364) data 0.000 (0.002) loss 1.2500 (1.0643) acc 81.2500 (73.0114) lr 3.6258e-04 eta 2:18:58 +epoch [38/50] batch [390/500] time 1.343 (1.364) data 0.000 (0.002) loss 0.7412 (1.0635) acc 84.3750 (73.0369) lr 3.6258e-04 eta 2:18:51 +epoch [38/50] batch [395/500] time 1.361 (1.363) data 0.000 (0.002) loss 1.5293 (1.0659) acc 71.8750 (73.0301) lr 3.6258e-04 eta 2:18:44 +epoch [38/50] batch [400/500] time 1.363 (1.363) data 0.000 (0.002) loss 1.0420 (1.0636) acc 68.7500 (73.0703) lr 3.6258e-04 eta 2:18:37 +epoch [38/50] batch [405/500] time 1.389 (1.363) data 0.000 (0.002) loss 1.7383 (1.0665) acc 65.6250 (73.0324) lr 3.6258e-04 eta 2:18:29 +epoch [38/50] batch [410/500] time 1.355 (1.363) data 0.000 (0.002) loss 1.1836 (1.0666) acc 68.7500 (73.0107) lr 3.6258e-04 eta 2:18:23 +epoch [38/50] batch [415/500] time 1.373 (1.363) data 0.000 (0.002) loss 0.9131 (1.0642) acc 71.8750 (73.0120) lr 3.6258e-04 eta 2:18:16 +epoch [38/50] batch [420/500] time 1.368 (1.363) data 0.000 (0.002) loss 0.7598 (1.0652) acc 75.0000 (72.9762) lr 3.6258e-04 eta 2:18:09 +epoch [38/50] batch [425/500] time 1.368 (1.363) data 0.000 (0.002) loss 0.6548 (1.0658) acc 81.2500 (72.9485) lr 3.6258e-04 eta 2:18:01 +epoch [38/50] batch [430/500] time 1.354 (1.363) data 0.000 (0.002) loss 1.1055 (1.0656) acc 75.0000 (72.9724) lr 3.6258e-04 eta 2:17:54 +epoch [38/50] batch [435/500] time 1.392 (1.363) data 0.000 (0.002) loss 0.6309 (1.0636) acc 87.5000 (73.0172) lr 3.6258e-04 eta 2:17:48 +epoch [38/50] batch [440/500] time 1.350 (1.363) data 0.000 (0.002) loss 1.3291 (1.0655) acc 68.7500 (72.9616) lr 3.6258e-04 eta 2:17:42 +epoch [38/50] batch [445/500] time 1.339 (1.363) data 0.000 (0.002) loss 0.5000 (1.0633) acc 90.6250 (73.0126) lr 3.6258e-04 eta 2:17:35 +epoch [38/50] batch [450/500] time 1.358 (1.363) data 0.000 (0.002) loss 1.4561 (1.0647) acc 71.8750 (73.0208) lr 3.6258e-04 eta 2:17:28 +epoch [38/50] batch [455/500] time 1.366 (1.363) data 0.000 (0.002) loss 1.2861 (1.0669) acc 68.7500 (72.9945) lr 3.6258e-04 eta 2:17:20 +epoch [38/50] batch [460/500] time 1.388 (1.364) data 0.000 (0.002) loss 1.2246 (1.0655) acc 68.7500 (73.0299) lr 3.6258e-04 eta 2:17:15 +epoch [38/50] batch [465/500] time 1.362 (1.364) data 0.001 (0.002) loss 1.2422 (1.0640) acc 65.6250 (73.0511) lr 3.6258e-04 eta 2:17:09 +epoch [38/50] batch [470/500] time 1.375 (1.364) data 0.000 (0.002) loss 1.3740 (1.0654) acc 62.5000 (72.9920) lr 3.6258e-04 eta 2:17:03 +epoch [38/50] batch [475/500] time 1.366 (1.364) data 0.000 (0.002) loss 0.6978 (1.0663) acc 81.2500 (72.9934) lr 3.6258e-04 eta 2:16:56 +epoch [38/50] batch [480/500] time 1.361 (1.364) data 0.000 (0.002) loss 0.6206 (1.0638) acc 81.2500 (73.0794) lr 3.6258e-04 eta 2:16:49 +epoch [38/50] batch [485/500] time 1.364 (1.364) data 0.001 (0.002) loss 0.9907 (1.0632) acc 75.0000 (73.0992) lr 3.6258e-04 eta 2:16:42 +epoch [38/50] batch [490/500] time 1.389 (1.364) data 0.000 (0.002) loss 1.1055 (1.0642) acc 62.5000 (73.0485) lr 3.6258e-04 eta 2:16:36 +epoch [38/50] batch [495/500] time 1.359 (1.364) data 0.000 (0.002) loss 0.6753 (1.0628) acc 81.2500 (73.0871) lr 3.6258e-04 eta 2:16:30 +epoch [38/50] batch [500/500] time 1.341 (1.364) data 0.000 (0.002) loss 1.6973 (1.0625) acc 68.7500 (73.1125) lr 3.1545e-04 eta 2:16:23 +epoch [39/50] batch [5/500] time 1.352 (1.575) data 0.000 (0.172) loss 0.4158 (1.0646) acc 87.5000 (75.6250) lr 3.1545e-04 eta 2:37:23 +epoch [39/50] batch [10/500] time 1.357 (1.468) data 0.000 (0.086) loss 1.0898 (1.0346) acc 71.8750 (75.6250) lr 3.1545e-04 eta 2:26:33 +epoch [39/50] batch [15/500] time 1.377 (1.430) data 0.000 (0.057) loss 0.5664 (1.0373) acc 84.3750 (75.4167) lr 3.1545e-04 eta 2:22:40 +epoch [39/50] batch [20/500] time 1.387 (1.413) data 0.000 (0.043) loss 0.9160 (1.0738) acc 75.0000 (74.3750) lr 3.1545e-04 eta 2:20:47 +epoch [39/50] batch [25/500] time 1.350 (1.403) data 0.000 (0.035) loss 0.9639 (1.0515) acc 78.1250 (74.0000) lr 3.1545e-04 eta 2:19:40 +epoch [39/50] batch [30/500] time 1.370 (1.396) data 0.000 (0.029) loss 0.5996 (1.0376) acc 75.0000 (73.8542) lr 3.1545e-04 eta 2:18:52 +epoch [39/50] batch [35/500] time 1.365 (1.392) data 0.000 (0.025) loss 0.7427 (1.0308) acc 84.3750 (73.9286) lr 3.1545e-04 eta 2:18:20 +epoch [39/50] batch [40/500] time 1.372 (1.388) data 0.000 (0.022) loss 1.5283 (1.0520) acc 65.6250 (73.2031) lr 3.1545e-04 eta 2:17:50 +epoch [39/50] batch [45/500] time 1.356 (1.384) data 0.000 (0.019) loss 0.9302 (1.0231) acc 62.5000 (73.6111) lr 3.1545e-04 eta 2:17:24 +epoch [39/50] batch [50/500] time 1.359 (1.385) data 0.000 (0.017) loss 1.0391 (1.0247) acc 75.0000 (73.7500) lr 3.1545e-04 eta 2:17:18 +epoch [39/50] batch [55/500] time 1.348 (1.382) data 0.000 (0.016) loss 1.7383 (1.0439) acc 68.7500 (73.3523) lr 3.1545e-04 eta 2:16:54 +epoch [39/50] batch [60/500] time 1.339 (1.379) data 0.000 (0.015) loss 0.9028 (1.0348) acc 81.2500 (73.4896) lr 3.1545e-04 eta 2:16:30 +epoch [39/50] batch [65/500] time 1.361 (1.377) data 0.000 (0.014) loss 0.6265 (1.0216) acc 71.8750 (73.7500) lr 3.1545e-04 eta 2:16:09 +epoch [39/50] batch [70/500] time 1.353 (1.376) data 0.000 (0.013) loss 0.5889 (1.0241) acc 75.0000 (73.8393) lr 3.1545e-04 eta 2:15:58 +epoch [39/50] batch [75/500] time 1.345 (1.374) data 0.000 (0.012) loss 0.8906 (1.0133) acc 75.0000 (74.0417) lr 3.1545e-04 eta 2:15:41 +epoch [39/50] batch [80/500] time 1.355 (1.373) data 0.000 (0.011) loss 0.6958 (1.0145) acc 87.5000 (74.2578) lr 3.1545e-04 eta 2:15:27 +epoch [39/50] batch [85/500] time 1.346 (1.372) data 0.001 (0.010) loss 0.6084 (1.0121) acc 81.2500 (74.3382) lr 3.1545e-04 eta 2:15:14 +epoch [39/50] batch [90/500] time 1.366 (1.371) data 0.000 (0.010) loss 0.7368 (1.0164) acc 68.7500 (74.1667) lr 3.1545e-04 eta 2:15:01 +epoch [39/50] batch [95/500] time 1.365 (1.371) data 0.000 (0.009) loss 0.8198 (1.0161) acc 75.0000 (74.1118) lr 3.1545e-04 eta 2:14:56 +epoch [39/50] batch [100/500] time 1.360 (1.371) data 0.000 (0.009) loss 0.5850 (1.0117) acc 84.3750 (74.1562) lr 3.1545e-04 eta 2:14:46 +epoch [39/50] batch [105/500] time 1.369 (1.370) data 0.000 (0.009) loss 1.0625 (1.0231) acc 65.6250 (73.8393) lr 3.1545e-04 eta 2:14:38 +epoch [39/50] batch [110/500] time 1.362 (1.370) data 0.000 (0.008) loss 1.3818 (1.0192) acc 65.6250 (74.0341) lr 3.1545e-04 eta 2:14:28 +epoch [39/50] batch [115/500] time 1.378 (1.369) data 0.000 (0.008) loss 0.6133 (1.0170) acc 87.5000 (74.1576) lr 3.1545e-04 eta 2:14:18 +epoch [39/50] batch [120/500] time 1.378 (1.369) data 0.000 (0.008) loss 1.1074 (1.0176) acc 68.7500 (74.0625) lr 3.1545e-04 eta 2:14:09 +epoch [39/50] batch [125/500] time 1.363 (1.369) data 0.000 (0.007) loss 1.2480 (1.0172) acc 68.7500 (74.1000) lr 3.1545e-04 eta 2:14:01 +epoch [39/50] batch [130/500] time 1.363 (1.368) data 0.000 (0.007) loss 0.9048 (1.0249) acc 78.1250 (74.0625) lr 3.1545e-04 eta 2:13:52 +epoch [39/50] batch [135/500] time 1.331 (1.368) data 0.000 (0.007) loss 0.8701 (1.0262) acc 81.2500 (74.1204) lr 3.1545e-04 eta 2:13:43 +epoch [39/50] batch [140/500] time 1.363 (1.368) data 0.000 (0.006) loss 0.7314 (1.0185) acc 75.0000 (74.3527) lr 3.1545e-04 eta 2:13:34 +epoch [39/50] batch [145/500] time 1.488 (1.368) data 0.000 (0.006) loss 0.8403 (1.0123) acc 78.1250 (74.5905) lr 3.1545e-04 eta 2:13:32 +epoch [39/50] batch [150/500] time 1.386 (1.368) data 0.000 (0.006) loss 0.9126 (1.0087) acc 65.6250 (74.4583) lr 3.1545e-04 eta 2:13:23 +epoch [39/50] batch [155/500] time 1.367 (1.368) data 0.000 (0.006) loss 0.9658 (1.0157) acc 68.7500 (74.3145) lr 3.1545e-04 eta 2:13:15 +epoch [39/50] batch [160/500] time 1.355 (1.368) data 0.000 (0.006) loss 0.8853 (1.0110) acc 75.0000 (74.3359) lr 3.1545e-04 eta 2:13:08 +epoch [39/50] batch [165/500] time 1.342 (1.367) data 0.001 (0.006) loss 1.7666 (1.0199) acc 78.1250 (74.4129) lr 3.1545e-04 eta 2:12:57 +epoch [39/50] batch [170/500] time 1.366 (1.367) data 0.000 (0.005) loss 1.0332 (1.0162) acc 71.8750 (74.3934) lr 3.1545e-04 eta 2:12:50 +epoch [39/50] batch [175/500] time 1.372 (1.367) data 0.000 (0.005) loss 1.2422 (1.0174) acc 78.1250 (74.3750) lr 3.1545e-04 eta 2:12:45 +epoch [39/50] batch [180/500] time 1.358 (1.367) data 0.000 (0.005) loss 1.0566 (1.0252) acc 71.8750 (74.2188) lr 3.1545e-04 eta 2:12:36 +epoch [39/50] batch [185/500] time 1.354 (1.367) data 0.001 (0.005) loss 1.7158 (1.0290) acc 53.1250 (74.1892) lr 3.1545e-04 eta 2:12:28 +epoch [39/50] batch [190/500] time 1.350 (1.367) data 0.000 (0.005) loss 1.2480 (1.0296) acc 75.0000 (74.1776) lr 3.1545e-04 eta 2:12:24 +epoch [39/50] batch [195/500] time 1.354 (1.367) data 0.000 (0.005) loss 1.0605 (1.0332) acc 71.8750 (74.2308) lr 3.1545e-04 eta 2:12:17 +epoch [39/50] batch [200/500] time 1.361 (1.367) data 0.000 (0.005) loss 0.8306 (1.0321) acc 81.2500 (74.2656) lr 3.1545e-04 eta 2:12:09 +epoch [39/50] batch [205/500] time 1.366 (1.367) data 0.000 (0.005) loss 1.0400 (1.0383) acc 71.8750 (74.1311) lr 3.1545e-04 eta 2:12:02 +epoch [39/50] batch [210/500] time 1.356 (1.367) data 0.000 (0.004) loss 1.3320 (1.0410) acc 62.5000 (74.0030) lr 3.1545e-04 eta 2:11:53 +epoch [39/50] batch [215/500] time 1.367 (1.367) data 0.000 (0.004) loss 0.7256 (1.0391) acc 84.3750 (74.0843) lr 3.1545e-04 eta 2:11:45 +epoch [39/50] batch [220/500] time 1.348 (1.366) data 0.000 (0.004) loss 0.6470 (1.0394) acc 81.2500 (74.0199) lr 3.1545e-04 eta 2:11:37 +epoch [39/50] batch [225/500] time 1.366 (1.366) data 0.000 (0.004) loss 0.9834 (1.0373) acc 68.7500 (74.0417) lr 3.1545e-04 eta 2:11:30 +epoch [39/50] batch [230/500] time 1.368 (1.366) data 0.000 (0.004) loss 0.7236 (1.0356) acc 75.0000 (74.0625) lr 3.1545e-04 eta 2:11:21 +epoch [39/50] batch [235/500] time 1.364 (1.366) data 0.000 (0.004) loss 1.3203 (1.0381) acc 75.0000 (74.0293) lr 3.1545e-04 eta 2:11:13 +epoch [39/50] batch [240/500] time 1.346 (1.365) data 0.000 (0.004) loss 0.6899 (1.0313) acc 84.3750 (74.1927) lr 3.1545e-04 eta 2:11:04 +epoch [39/50] batch [245/500] time 1.350 (1.365) data 0.000 (0.004) loss 1.2119 (1.0321) acc 62.5000 (74.1837) lr 3.1545e-04 eta 2:10:57 +epoch [39/50] batch [250/500] time 1.352 (1.365) data 0.000 (0.004) loss 0.8916 (1.0322) acc 78.1250 (74.1500) lr 3.1545e-04 eta 2:10:51 +epoch [39/50] batch [255/500] time 1.367 (1.365) data 0.000 (0.004) loss 0.9038 (1.0297) acc 65.6250 (74.1176) lr 3.1545e-04 eta 2:10:43 +epoch [39/50] batch [260/500] time 1.339 (1.365) data 0.000 (0.004) loss 1.1230 (1.0284) acc 56.2500 (74.0986) lr 3.1545e-04 eta 2:10:36 +epoch [39/50] batch [265/500] time 1.341 (1.365) data 0.000 (0.004) loss 0.8311 (1.0302) acc 71.8750 (74.0330) lr 3.1545e-04 eta 2:10:28 +epoch [39/50] batch [270/500] time 1.359 (1.365) data 0.000 (0.004) loss 1.0459 (1.0302) acc 75.0000 (74.0162) lr 3.1545e-04 eta 2:10:20 +epoch [39/50] batch [275/500] time 1.385 (1.365) data 0.000 (0.003) loss 0.9028 (1.0284) acc 75.0000 (74.1250) lr 3.1545e-04 eta 2:10:13 +epoch [39/50] batch [280/500] time 1.345 (1.365) data 0.000 (0.003) loss 0.7017 (1.0298) acc 78.1250 (74.0737) lr 3.1545e-04 eta 2:10:05 +epoch [39/50] batch [285/500] time 1.347 (1.364) data 0.000 (0.003) loss 1.3896 (1.0333) acc 65.6250 (73.9693) lr 3.1545e-04 eta 2:09:57 +epoch [39/50] batch [290/500] time 1.350 (1.365) data 0.000 (0.003) loss 1.2754 (1.0367) acc 53.1250 (73.9116) lr 3.1545e-04 eta 2:09:52 +epoch [39/50] batch [295/500] time 1.384 (1.365) data 0.000 (0.003) loss 0.9990 (1.0354) acc 75.0000 (73.9725) lr 3.1545e-04 eta 2:09:45 +epoch [39/50] batch [300/500] time 1.362 (1.365) data 0.000 (0.003) loss 0.8560 (1.0357) acc 81.2500 (73.9583) lr 3.1545e-04 eta 2:09:38 +epoch [39/50] batch [305/500] time 1.351 (1.365) data 0.000 (0.003) loss 1.1318 (1.0347) acc 78.1250 (73.9959) lr 3.1545e-04 eta 2:09:31 +epoch [39/50] batch [310/500] time 1.369 (1.365) data 0.000 (0.003) loss 0.7856 (1.0349) acc 62.5000 (73.8508) lr 3.1545e-04 eta 2:09:24 +epoch [39/50] batch [315/500] time 1.365 (1.364) data 0.000 (0.003) loss 1.0742 (1.0329) acc 75.0000 (73.8690) lr 3.1545e-04 eta 2:09:17 +epoch [39/50] batch [320/500] time 1.360 (1.364) data 0.000 (0.003) loss 0.7275 (1.0351) acc 75.0000 (73.8184) lr 3.1545e-04 eta 2:09:09 +epoch [39/50] batch [325/500] time 1.372 (1.364) data 0.000 (0.003) loss 1.1406 (1.0354) acc 75.0000 (73.8269) lr 3.1545e-04 eta 2:09:03 +epoch [39/50] batch [330/500] time 1.363 (1.364) data 0.000 (0.003) loss 1.5537 (1.0388) acc 68.7500 (73.7784) lr 3.1545e-04 eta 2:08:56 +epoch [39/50] batch [335/500] time 1.368 (1.365) data 0.001 (0.003) loss 1.1963 (1.0386) acc 75.0000 (73.7500) lr 3.1545e-04 eta 2:08:51 +epoch [39/50] batch [340/500] time 1.350 (1.365) data 0.000 (0.003) loss 0.5151 (1.0378) acc 87.5000 (73.7500) lr 3.1545e-04 eta 2:08:44 +epoch [39/50] batch [345/500] time 1.353 (1.365) data 0.000 (0.003) loss 0.7915 (1.0380) acc 81.2500 (73.7500) lr 3.1545e-04 eta 2:08:37 +epoch [39/50] batch [350/500] time 1.351 (1.365) data 0.000 (0.003) loss 0.6606 (1.0392) acc 81.2500 (73.7500) lr 3.1545e-04 eta 2:08:30 +epoch [39/50] batch [355/500] time 1.362 (1.365) data 0.000 (0.003) loss 1.0527 (1.0454) acc 81.2500 (73.6796) lr 3.1545e-04 eta 2:08:22 +epoch [39/50] batch [360/500] time 1.355 (1.364) data 0.000 (0.003) loss 1.2148 (1.0421) acc 62.5000 (73.7066) lr 3.1545e-04 eta 2:08:14 +epoch [39/50] batch [365/500] time 1.365 (1.364) data 0.000 (0.003) loss 1.2891 (1.0406) acc 71.8750 (73.7671) lr 3.1545e-04 eta 2:08:06 +epoch [39/50] batch [370/500] time 1.380 (1.364) data 0.001 (0.003) loss 0.8091 (1.0415) acc 78.1250 (73.7753) lr 3.1545e-04 eta 2:08:00 +epoch [39/50] batch [375/500] time 1.364 (1.364) data 0.000 (0.003) loss 0.8462 (1.0390) acc 81.2500 (73.8417) lr 3.1545e-04 eta 2:07:53 +epoch [39/50] batch [380/500] time 1.383 (1.364) data 0.000 (0.003) loss 0.7476 (1.0363) acc 81.2500 (73.9227) lr 3.1545e-04 eta 2:07:47 +epoch [39/50] batch [385/500] time 1.372 (1.364) data 0.000 (0.003) loss 1.1797 (1.0324) acc 59.3750 (73.9529) lr 3.1545e-04 eta 2:07:41 +epoch [39/50] batch [390/500] time 1.382 (1.364) data 0.000 (0.003) loss 0.8062 (1.0299) acc 75.0000 (73.9744) lr 3.1545e-04 eta 2:07:33 +epoch [39/50] batch [395/500] time 1.389 (1.364) data 0.000 (0.003) loss 0.6890 (1.0297) acc 84.3750 (74.0032) lr 3.1545e-04 eta 2:07:27 +epoch [39/50] batch [400/500] time 1.389 (1.365) data 0.001 (0.003) loss 1.3115 (1.0319) acc 62.5000 (73.9062) lr 3.1545e-04 eta 2:07:21 +epoch [39/50] batch [405/500] time 1.353 (1.365) data 0.001 (0.002) loss 0.7480 (1.0319) acc 75.0000 (73.8657) lr 3.1545e-04 eta 2:07:14 +epoch [39/50] batch [410/500] time 1.355 (1.365) data 0.000 (0.002) loss 0.9111 (1.0313) acc 75.0000 (73.8567) lr 3.1545e-04 eta 2:07:07 +epoch [39/50] batch [415/500] time 1.366 (1.364) data 0.000 (0.002) loss 1.0654 (1.0342) acc 71.8750 (73.8253) lr 3.1545e-04 eta 2:07:00 +epoch [39/50] batch [420/500] time 1.334 (1.364) data 0.000 (0.002) loss 0.4834 (1.0328) acc 87.5000 (73.8467) lr 3.1545e-04 eta 2:06:53 +epoch [39/50] batch [425/500] time 1.366 (1.364) data 0.000 (0.002) loss 1.2852 (1.0336) acc 71.8750 (73.8235) lr 3.1545e-04 eta 2:06:45 +epoch [39/50] batch [430/500] time 1.360 (1.364) data 0.000 (0.002) loss 0.8447 (1.0341) acc 75.0000 (73.8227) lr 3.1545e-04 eta 2:06:38 +epoch [39/50] batch [435/500] time 1.348 (1.364) data 0.000 (0.002) loss 0.8564 (1.0334) acc 75.0000 (73.8218) lr 3.1545e-04 eta 2:06:32 +epoch [39/50] batch [440/500] time 1.370 (1.364) data 0.000 (0.002) loss 0.8276 (1.0347) acc 75.0000 (73.7784) lr 3.1545e-04 eta 2:06:25 +epoch [39/50] batch [445/500] time 1.375 (1.364) data 0.000 (0.002) loss 0.9023 (1.0335) acc 78.1250 (73.8272) lr 3.1545e-04 eta 2:06:18 +epoch [39/50] batch [450/500] time 1.349 (1.364) data 0.000 (0.002) loss 0.7065 (1.0313) acc 81.2500 (73.8264) lr 3.1545e-04 eta 2:06:11 +epoch [39/50] batch [455/500] time 1.342 (1.364) data 0.000 (0.002) loss 1.1895 (1.0340) acc 75.0000 (73.7775) lr 3.1545e-04 eta 2:06:03 +epoch [39/50] batch [460/500] time 1.343 (1.364) data 0.000 (0.002) loss 1.5088 (1.0370) acc 68.7500 (73.7568) lr 3.1545e-04 eta 2:05:55 +epoch [39/50] batch [465/500] time 1.373 (1.364) data 0.000 (0.002) loss 1.0244 (1.0340) acc 71.8750 (73.8374) lr 3.1545e-04 eta 2:05:48 +epoch [39/50] batch [470/500] time 1.355 (1.364) data 0.000 (0.002) loss 1.2529 (1.0358) acc 68.7500 (73.8364) lr 3.1545e-04 eta 2:05:41 +epoch [39/50] batch [475/500] time 1.462 (1.364) data 0.000 (0.002) loss 0.7739 (1.0340) acc 81.2500 (73.8816) lr 3.1545e-04 eta 2:05:35 +epoch [39/50] batch [480/500] time 1.360 (1.364) data 0.000 (0.002) loss 1.1768 (1.0356) acc 71.8750 (73.8281) lr 3.1545e-04 eta 2:05:28 +epoch [39/50] batch [485/500] time 1.350 (1.364) data 0.001 (0.002) loss 0.8159 (1.0366) acc 81.2500 (73.8015) lr 3.1545e-04 eta 2:05:21 +epoch [39/50] batch [490/500] time 1.360 (1.364) data 0.000 (0.002) loss 0.9546 (1.0361) acc 65.6250 (73.7628) lr 3.1545e-04 eta 2:05:14 +epoch [39/50] batch [495/500] time 1.355 (1.364) data 0.000 (0.002) loss 0.8555 (1.0387) acc 71.8750 (73.6932) lr 3.1545e-04 eta 2:05:07 +epoch [39/50] batch [500/500] time 1.350 (1.364) data 0.000 (0.002) loss 1.3486 (1.0397) acc 62.5000 (73.6750) lr 2.7103e-04 eta 2:05:00 +epoch [40/50] batch [5/500] time 1.350 (1.516) data 0.000 (0.164) loss 1.1533 (1.1640) acc 78.1250 (72.5000) lr 2.7103e-04 eta 2:18:51 +epoch [40/50] batch [10/500] time 1.377 (1.437) data 0.000 (0.082) loss 1.0137 (1.0077) acc 75.0000 (73.7500) lr 2.7103e-04 eta 2:11:26 +epoch [40/50] batch [15/500] time 1.354 (1.409) data 0.000 (0.055) loss 1.2393 (1.0431) acc 68.7500 (74.1667) lr 2.7103e-04 eta 2:08:48 +epoch [40/50] batch [20/500] time 1.354 (1.405) data 0.000 (0.041) loss 1.5312 (1.0784) acc 62.5000 (73.9062) lr 2.7103e-04 eta 2:08:16 +epoch [40/50] batch [25/500] time 1.376 (1.398) data 0.000 (0.033) loss 1.0840 (1.0808) acc 75.0000 (74.3750) lr 2.7103e-04 eta 2:07:36 +epoch [40/50] batch [30/500] time 1.348 (1.391) data 0.000 (0.028) loss 1.1934 (1.1129) acc 71.8750 (73.6458) lr 2.7103e-04 eta 2:06:49 +epoch [40/50] batch [35/500] time 1.381 (1.387) data 0.000 (0.024) loss 1.2803 (1.0949) acc 71.8750 (73.3929) lr 2.7103e-04 eta 2:06:17 +epoch [40/50] batch [40/500] time 1.355 (1.382) data 0.000 (0.021) loss 0.7847 (1.0642) acc 90.6250 (74.2969) lr 2.7103e-04 eta 2:05:45 +epoch [40/50] batch [45/500] time 1.362 (1.380) data 0.000 (0.018) loss 1.4229 (1.1044) acc 68.7500 (73.4722) lr 2.7103e-04 eta 2:05:28 +epoch [40/50] batch [50/500] time 1.373 (1.378) data 0.000 (0.017) loss 0.7427 (1.0909) acc 81.2500 (73.1875) lr 2.7103e-04 eta 2:05:10 +epoch [40/50] batch [55/500] time 1.361 (1.377) data 0.000 (0.015) loss 0.4158 (1.0846) acc 93.7500 (73.6932) lr 2.7103e-04 eta 2:04:55 +epoch [40/50] batch [60/500] time 1.374 (1.378) data 0.000 (0.014) loss 1.1240 (1.0707) acc 75.0000 (74.0104) lr 2.7103e-04 eta 2:04:55 +epoch [40/50] batch [65/500] time 1.354 (1.376) data 0.000 (0.013) loss 1.6299 (1.0752) acc 65.6250 (73.9904) lr 2.7103e-04 eta 2:04:39 +epoch [40/50] batch [70/500] time 1.375 (1.376) data 0.000 (0.012) loss 1.0713 (1.0537) acc 71.8750 (74.1964) lr 2.7103e-04 eta 2:04:30 +epoch [40/50] batch [75/500] time 1.343 (1.374) data 0.000 (0.011) loss 0.6055 (1.0476) acc 78.1250 (74.4167) lr 2.7103e-04 eta 2:04:15 +epoch [40/50] batch [80/500] time 1.368 (1.373) data 0.000 (0.010) loss 0.6504 (1.0454) acc 78.1250 (74.3750) lr 2.7103e-04 eta 2:04:02 +epoch [40/50] batch [85/500] time 1.357 (1.372) data 0.000 (0.010) loss 1.9131 (1.0623) acc 68.7500 (74.3750) lr 2.7103e-04 eta 2:03:50 +epoch [40/50] batch [90/500] time 1.373 (1.372) data 0.000 (0.009) loss 1.2607 (1.0618) acc 68.7500 (74.4097) lr 2.7103e-04 eta 2:03:44 +epoch [40/50] batch [95/500] time 1.366 (1.372) data 0.000 (0.009) loss 0.6763 (1.0507) acc 75.0000 (74.4079) lr 2.7103e-04 eta 2:03:33 +epoch [40/50] batch [100/500] time 1.361 (1.371) data 0.000 (0.008) loss 0.8374 (1.0501) acc 78.1250 (74.4688) lr 2.7103e-04 eta 2:03:26 +epoch [40/50] batch [105/500] time 1.361 (1.371) data 0.000 (0.008) loss 0.7417 (1.0493) acc 87.5000 (74.4940) lr 2.7103e-04 eta 2:03:16 +epoch [40/50] batch [110/500] time 1.370 (1.370) data 0.000 (0.008) loss 0.8003 (1.0515) acc 68.7500 (74.3750) lr 2.7103e-04 eta 2:03:05 +epoch [40/50] batch [115/500] time 1.341 (1.369) data 0.000 (0.007) loss 1.0088 (1.0481) acc 68.7500 (74.3750) lr 2.7103e-04 eta 2:02:53 +epoch [40/50] batch [120/500] time 1.349 (1.370) data 0.000 (0.007) loss 1.0195 (1.0482) acc 75.0000 (74.4271) lr 2.7103e-04 eta 2:02:48 +epoch [40/50] batch [125/500] time 1.361 (1.369) data 0.000 (0.007) loss 1.0723 (1.0488) acc 62.5000 (74.2000) lr 2.7103e-04 eta 2:02:38 +epoch [40/50] batch [130/500] time 1.365 (1.369) data 0.000 (0.007) loss 1.1035 (1.0478) acc 75.0000 (74.1346) lr 2.7103e-04 eta 2:02:29 +epoch [40/50] batch [135/500] time 1.364 (1.368) data 0.001 (0.006) loss 0.6333 (1.0479) acc 84.3750 (74.0278) lr 2.7103e-04 eta 2:02:21 +epoch [40/50] batch [140/500] time 1.371 (1.368) data 0.000 (0.006) loss 1.9834 (1.0444) acc 56.2500 (74.1071) lr 2.7103e-04 eta 2:02:14 +epoch [40/50] batch [145/500] time 1.351 (1.369) data 0.001 (0.006) loss 0.6567 (1.0393) acc 78.1250 (74.1164) lr 2.7103e-04 eta 2:02:08 +epoch [40/50] batch [150/500] time 1.355 (1.368) data 0.000 (0.006) loss 0.9048 (1.0380) acc 84.3750 (74.2917) lr 2.7103e-04 eta 2:02:00 +epoch [40/50] batch [155/500] time 1.358 (1.368) data 0.000 (0.006) loss 1.1982 (1.0509) acc 71.8750 (74.0323) lr 2.7103e-04 eta 2:01:53 +epoch [40/50] batch [160/500] time 1.352 (1.368) data 0.000 (0.005) loss 0.9478 (1.0448) acc 71.8750 (74.1211) lr 2.7103e-04 eta 2:01:45 +epoch [40/50] batch [165/500] time 1.360 (1.369) data 0.000 (0.005) loss 0.9595 (1.0530) acc 78.1250 (73.9583) lr 2.7103e-04 eta 2:01:42 +epoch [40/50] batch [170/500] time 1.358 (1.369) data 0.000 (0.005) loss 0.9780 (1.0627) acc 68.7500 (73.7868) lr 2.7103e-04 eta 2:01:34 +epoch [40/50] batch [175/500] time 1.353 (1.368) data 0.000 (0.005) loss 1.2568 (1.0625) acc 65.6250 (73.6964) lr 2.7103e-04 eta 2:01:25 +epoch [40/50] batch [180/500] time 1.357 (1.368) data 0.000 (0.005) loss 0.9219 (1.0587) acc 68.7500 (73.8194) lr 2.7103e-04 eta 2:01:16 +epoch [40/50] batch [185/500] time 1.355 (1.368) data 0.000 (0.005) loss 1.6816 (1.0584) acc 56.2500 (73.7500) lr 2.7103e-04 eta 2:01:09 +epoch [40/50] batch [190/500] time 1.347 (1.367) data 0.000 (0.005) loss 0.5063 (1.0558) acc 84.3750 (73.8158) lr 2.7103e-04 eta 2:01:00 +epoch [40/50] batch [195/500] time 1.344 (1.367) data 0.000 (0.004) loss 1.0186 (1.0554) acc 68.7500 (73.8301) lr 2.7103e-04 eta 2:00:50 +epoch [40/50] batch [200/500] time 1.352 (1.367) data 0.000 (0.004) loss 0.4404 (1.0592) acc 84.3750 (73.7969) lr 2.7103e-04 eta 2:00:42 +epoch [40/50] batch [205/500] time 1.355 (1.366) data 0.000 (0.004) loss 0.6836 (1.0603) acc 84.3750 (73.7043) lr 2.7103e-04 eta 2:00:34 +epoch [40/50] batch [210/500] time 1.350 (1.366) data 0.000 (0.004) loss 1.2246 (1.0611) acc 78.1250 (73.7054) lr 2.7103e-04 eta 2:00:25 +epoch [40/50] batch [215/500] time 1.379 (1.366) data 0.000 (0.004) loss 1.0449 (1.0642) acc 78.1250 (73.6773) lr 2.7103e-04 eta 2:00:18 +epoch [40/50] batch [220/500] time 1.352 (1.365) data 0.000 (0.004) loss 0.9883 (1.0658) acc 78.1250 (73.6222) lr 2.7103e-04 eta 2:00:09 +epoch [40/50] batch [225/500] time 1.331 (1.365) data 0.000 (0.004) loss 0.7607 (1.0718) acc 78.1250 (73.4583) lr 2.7103e-04 eta 2:00:01 +epoch [40/50] batch [230/500] time 1.355 (1.365) data 0.000 (0.004) loss 0.8789 (1.0696) acc 71.8750 (73.4647) lr 2.7103e-04 eta 1:59:52 +epoch [40/50] batch [235/500] time 1.345 (1.364) data 0.000 (0.004) loss 0.8975 (1.0700) acc 75.0000 (73.4176) lr 2.7103e-04 eta 1:59:43 +epoch [40/50] batch [240/500] time 1.363 (1.364) data 0.000 (0.004) loss 1.4678 (1.0682) acc 62.5000 (73.5156) lr 2.7103e-04 eta 1:59:36 +epoch [40/50] batch [245/500] time 1.363 (1.364) data 0.000 (0.004) loss 0.7603 (1.0664) acc 75.0000 (73.5204) lr 2.7103e-04 eta 1:59:29 +epoch [40/50] batch [250/500] time 1.344 (1.364) data 0.000 (0.004) loss 0.8320 (1.0626) acc 75.0000 (73.5875) lr 2.7103e-04 eta 1:59:21 +epoch [40/50] batch [255/500] time 1.373 (1.364) data 0.000 (0.004) loss 0.9829 (1.0652) acc 71.8750 (73.5294) lr 2.7103e-04 eta 1:59:15 +epoch [40/50] batch [260/500] time 1.380 (1.364) data 0.000 (0.003) loss 1.0684 (1.0665) acc 62.5000 (73.5096) lr 2.7103e-04 eta 1:59:07 +epoch [40/50] batch [265/500] time 1.363 (1.365) data 0.000 (0.003) loss 1.3291 (1.0669) acc 68.7500 (73.5377) lr 2.7103e-04 eta 1:59:04 +epoch [40/50] batch [270/500] time 1.363 (1.365) data 0.000 (0.003) loss 0.8945 (1.0651) acc 75.0000 (73.4606) lr 2.7103e-04 eta 1:58:56 +epoch [40/50] batch [275/500] time 1.360 (1.364) data 0.000 (0.003) loss 0.8638 (1.0654) acc 87.5000 (73.5227) lr 2.7103e-04 eta 1:58:49 +epoch [40/50] batch [280/500] time 1.388 (1.365) data 0.000 (0.003) loss 0.5356 (1.0675) acc 87.5000 (73.5268) lr 2.7103e-04 eta 1:58:42 +epoch [40/50] batch [285/500] time 1.359 (1.365) data 0.000 (0.003) loss 0.5449 (1.0673) acc 81.2500 (73.5746) lr 2.7103e-04 eta 1:58:36 +epoch [40/50] batch [290/500] time 1.376 (1.365) data 0.000 (0.003) loss 1.1777 (1.0667) acc 68.7500 (73.4806) lr 2.7103e-04 eta 1:58:29 +epoch [40/50] batch [295/500] time 1.366 (1.365) data 0.000 (0.003) loss 0.9082 (1.0697) acc 78.1250 (73.4640) lr 2.7103e-04 eta 1:58:22 +epoch [40/50] batch [300/500] time 1.362 (1.365) data 0.000 (0.003) loss 1.0938 (1.0735) acc 71.8750 (73.4583) lr 2.7103e-04 eta 1:58:15 +epoch [40/50] batch [305/500] time 1.492 (1.365) data 0.000 (0.003) loss 1.2549 (1.0757) acc 71.8750 (73.4324) lr 2.7103e-04 eta 1:58:12 +epoch [40/50] batch [310/500] time 1.372 (1.365) data 0.000 (0.003) loss 0.6431 (1.0725) acc 87.5000 (73.5383) lr 2.7103e-04 eta 1:58:04 +epoch [40/50] batch [315/500] time 1.362 (1.365) data 0.000 (0.003) loss 0.6177 (1.0756) acc 81.2500 (73.4524) lr 2.7103e-04 eta 1:57:58 +epoch [40/50] batch [320/500] time 1.354 (1.365) data 0.000 (0.003) loss 1.2100 (1.0766) acc 68.7500 (73.3691) lr 2.7103e-04 eta 1:57:50 +epoch [40/50] batch [325/500] time 1.353 (1.365) data 0.000 (0.003) loss 0.8066 (1.0783) acc 75.0000 (73.3462) lr 2.7103e-04 eta 1:57:43 +epoch [40/50] batch [330/500] time 1.364 (1.365) data 0.001 (0.003) loss 1.1758 (1.0794) acc 68.7500 (73.3239) lr 2.7103e-04 eta 1:57:35 +epoch [40/50] batch [335/500] time 1.362 (1.365) data 0.000 (0.003) loss 0.7378 (1.0796) acc 78.1250 (73.2836) lr 2.7103e-04 eta 1:57:28 +epoch [40/50] batch [340/500] time 1.369 (1.365) data 0.000 (0.003) loss 0.6011 (1.0795) acc 87.5000 (73.2996) lr 2.7103e-04 eta 1:57:21 +epoch [40/50] batch [345/500] time 1.353 (1.364) data 0.000 (0.003) loss 0.6768 (1.0756) acc 75.0000 (73.2971) lr 2.7103e-04 eta 1:57:13 +epoch [40/50] batch [350/500] time 1.362 (1.364) data 0.000 (0.003) loss 1.0156 (1.0742) acc 78.1250 (73.3393) lr 2.7103e-04 eta 1:57:06 +epoch [40/50] batch [355/500] time 1.351 (1.364) data 0.000 (0.003) loss 0.8921 (1.0732) acc 81.2500 (73.3363) lr 2.7103e-04 eta 1:56:58 +epoch [40/50] batch [360/500] time 1.364 (1.364) data 0.000 (0.003) loss 0.7275 (1.0686) acc 81.2500 (73.4201) lr 2.7103e-04 eta 1:56:51 +epoch [40/50] batch [365/500] time 1.352 (1.364) data 0.000 (0.003) loss 0.7910 (1.0681) acc 78.1250 (73.3818) lr 2.7103e-04 eta 1:56:43 +epoch [40/50] batch [370/500] time 1.375 (1.364) data 0.000 (0.003) loss 1.5000 (1.0680) acc 68.7500 (73.4122) lr 2.7103e-04 eta 1:56:36 +epoch [40/50] batch [375/500] time 1.378 (1.364) data 0.000 (0.002) loss 1.3916 (1.0693) acc 68.7500 (73.3833) lr 2.7103e-04 eta 1:56:29 +epoch [40/50] batch [380/500] time 1.362 (1.364) data 0.000 (0.002) loss 1.4023 (1.0690) acc 78.1250 (73.4046) lr 2.7103e-04 eta 1:56:22 +epoch [40/50] batch [385/500] time 1.371 (1.364) data 0.000 (0.002) loss 0.7461 (1.0723) acc 81.2500 (73.3604) lr 2.7103e-04 eta 1:56:16 +epoch [40/50] batch [390/500] time 1.361 (1.364) data 0.000 (0.002) loss 1.0723 (1.0736) acc 75.0000 (73.3253) lr 2.7103e-04 eta 1:56:08 +epoch [40/50] batch [395/500] time 1.365 (1.364) data 0.000 (0.002) loss 1.0830 (1.0731) acc 68.7500 (73.3149) lr 2.7103e-04 eta 1:56:02 +epoch [40/50] batch [400/500] time 1.365 (1.364) data 0.000 (0.002) loss 1.1807 (1.0722) acc 68.7500 (73.3594) lr 2.7103e-04 eta 1:55:55 +epoch [40/50] batch [405/500] time 1.347 (1.364) data 0.000 (0.002) loss 0.7192 (1.0721) acc 84.3750 (73.3488) lr 2.7103e-04 eta 1:55:49 +epoch [40/50] batch [410/500] time 1.360 (1.364) data 0.000 (0.002) loss 1.3213 (1.0736) acc 62.5000 (73.2546) lr 2.7103e-04 eta 1:55:42 +epoch [40/50] batch [415/500] time 1.345 (1.364) data 0.000 (0.002) loss 0.7896 (1.0713) acc 81.2500 (73.2907) lr 2.7103e-04 eta 1:55:34 +epoch [40/50] batch [420/500] time 1.373 (1.364) data 0.000 (0.002) loss 0.8081 (1.0714) acc 81.2500 (73.2738) lr 2.7103e-04 eta 1:55:27 +epoch [40/50] batch [425/500] time 1.349 (1.364) data 0.000 (0.002) loss 0.7676 (1.0707) acc 75.0000 (73.2353) lr 2.7103e-04 eta 1:55:21 +epoch [40/50] batch [430/500] time 1.358 (1.363) data 0.000 (0.002) loss 0.6748 (1.0704) acc 87.5000 (73.2776) lr 2.7103e-04 eta 1:55:12 +epoch [40/50] batch [435/500] time 1.334 (1.363) data 0.000 (0.002) loss 1.0332 (1.0715) acc 71.8750 (73.2615) lr 2.7103e-04 eta 1:55:05 +epoch [40/50] batch [440/500] time 1.375 (1.363) data 0.001 (0.002) loss 1.7617 (1.0746) acc 62.5000 (73.1960) lr 2.7103e-04 eta 1:54:57 +epoch [40/50] batch [445/500] time 1.373 (1.363) data 0.000 (0.002) loss 1.2393 (1.0763) acc 78.1250 (73.1601) lr 2.7103e-04 eta 1:54:51 +epoch [40/50] batch [450/500] time 1.359 (1.364) data 0.000 (0.002) loss 0.8320 (1.0758) acc 75.0000 (73.1736) lr 2.7103e-04 eta 1:54:45 +epoch [40/50] batch [455/500] time 1.374 (1.364) data 0.000 (0.002) loss 2.0293 (1.0787) acc 53.1250 (73.1525) lr 2.7103e-04 eta 1:54:39 +epoch [40/50] batch [460/500] time 1.354 (1.364) data 0.000 (0.002) loss 1.1152 (1.0778) acc 81.2500 (73.2065) lr 2.7103e-04 eta 1:54:32 +epoch [40/50] batch [465/500] time 1.371 (1.364) data 0.000 (0.002) loss 0.5898 (1.0764) acc 78.1250 (73.2056) lr 2.7103e-04 eta 1:54:25 +epoch [40/50] batch [470/500] time 1.340 (1.364) data 0.000 (0.002) loss 1.2939 (1.0771) acc 71.8750 (73.2181) lr 2.7103e-04 eta 1:54:18 +epoch [40/50] batch [475/500] time 1.371 (1.364) data 0.000 (0.002) loss 0.8813 (1.0759) acc 81.2500 (73.2566) lr 2.7103e-04 eta 1:54:12 +epoch [40/50] batch [480/500] time 1.340 (1.364) data 0.001 (0.002) loss 1.1777 (1.0740) acc 84.3750 (73.3203) lr 2.7103e-04 eta 1:54:05 +epoch [40/50] batch [485/500] time 1.372 (1.364) data 0.001 (0.002) loss 1.2295 (1.0755) acc 68.7500 (73.2474) lr 2.7103e-04 eta 1:53:58 +epoch [40/50] batch [490/500] time 1.369 (1.364) data 0.000 (0.002) loss 1.4609 (1.0750) acc 71.8750 (73.3227) lr 2.7103e-04 eta 1:53:51 +epoch [40/50] batch [495/500] time 1.367 (1.364) data 0.000 (0.002) loss 1.0000 (1.0737) acc 68.7500 (73.3270) lr 2.7103e-04 eta 1:53:44 +epoch [40/50] batch [500/500] time 1.362 (1.363) data 0.000 (0.002) loss 1.1738 (1.0747) acc 81.2500 (73.3187) lr 2.2949e-04 eta 1:53:37 +epoch [41/50] batch [5/500] time 1.360 (1.523) data 0.000 (0.156) loss 0.8813 (0.9584) acc 71.8750 (71.2500) lr 2.2949e-04 eta 2:06:45 +epoch [41/50] batch [10/500] time 1.365 (1.441) data 0.001 (0.078) loss 0.5039 (0.9731) acc 87.5000 (73.4375) lr 2.2949e-04 eta 1:59:49 +epoch [41/50] batch [15/500] time 1.352 (1.416) data 0.000 (0.052) loss 1.0391 (0.9888) acc 71.8750 (73.3333) lr 2.2949e-04 eta 1:57:40 +epoch [41/50] batch [20/500] time 1.349 (1.402) data 0.000 (0.039) loss 1.0166 (1.0083) acc 75.0000 (73.1250) lr 2.2949e-04 eta 1:56:22 +epoch [41/50] batch [25/500] time 1.353 (1.393) data 0.000 (0.032) loss 1.2324 (0.9834) acc 75.0000 (73.2500) lr 2.2949e-04 eta 1:55:29 +epoch [41/50] batch [30/500] time 1.357 (1.387) data 0.000 (0.026) loss 1.3145 (1.0101) acc 65.6250 (73.2292) lr 2.2949e-04 eta 1:54:53 +epoch [41/50] batch [35/500] time 1.354 (1.382) data 0.001 (0.023) loss 1.1738 (1.0494) acc 75.0000 (72.5893) lr 2.2949e-04 eta 1:54:22 +epoch [41/50] batch [40/500] time 1.350 (1.379) data 0.000 (0.020) loss 2.0820 (1.0828) acc 68.7500 (72.6562) lr 2.2949e-04 eta 1:53:58 +epoch [41/50] batch [45/500] time 1.372 (1.377) data 0.000 (0.018) loss 0.8286 (1.0699) acc 78.1250 (72.8472) lr 2.2949e-04 eta 1:53:43 +epoch [41/50] batch [50/500] time 1.374 (1.377) data 0.000 (0.016) loss 1.4971 (1.0900) acc 68.7500 (72.5000) lr 2.2949e-04 eta 1:53:35 +epoch [41/50] batch [55/500] time 1.359 (1.375) data 0.000 (0.015) loss 1.0967 (1.0908) acc 65.6250 (72.7273) lr 2.2949e-04 eta 1:53:21 +epoch [41/50] batch [60/500] time 1.363 (1.375) data 0.000 (0.013) loss 1.3740 (1.0943) acc 75.0000 (72.9167) lr 2.2949e-04 eta 1:53:12 +epoch [41/50] batch [65/500] time 1.369 (1.373) data 0.000 (0.012) loss 0.9863 (1.0864) acc 81.2500 (73.1731) lr 2.2949e-04 eta 1:52:58 +epoch [41/50] batch [70/500] time 1.376 (1.373) data 0.000 (0.012) loss 0.8652 (1.0673) acc 65.6250 (73.4375) lr 2.2949e-04 eta 1:52:46 +epoch [41/50] batch [75/500] time 1.344 (1.372) data 0.000 (0.011) loss 0.6992 (1.0555) acc 87.5000 (73.7917) lr 2.2949e-04 eta 1:52:34 +epoch [41/50] batch [80/500] time 1.368 (1.371) data 0.000 (0.010) loss 0.9829 (1.0641) acc 75.0000 (73.3984) lr 2.2949e-04 eta 1:52:24 +epoch [41/50] batch [85/500] time 1.343 (1.370) data 0.000 (0.010) loss 1.1133 (1.0658) acc 71.8750 (73.3088) lr 2.2949e-04 eta 1:52:13 +epoch [41/50] batch [90/500] time 1.383 (1.369) data 0.000 (0.009) loss 1.2861 (1.0663) acc 75.0000 (73.6111) lr 2.2949e-04 eta 1:52:03 +epoch [41/50] batch [95/500] time 1.352 (1.370) data 0.000 (0.009) loss 0.9082 (1.0608) acc 68.7500 (73.5197) lr 2.2949e-04 eta 1:52:02 +epoch [41/50] batch [100/500] time 1.347 (1.370) data 0.001 (0.008) loss 0.5728 (1.0475) acc 90.6250 (73.9062) lr 2.2949e-04 eta 1:51:51 +epoch [41/50] batch [105/500] time 1.342 (1.369) data 0.000 (0.008) loss 1.1553 (1.0651) acc 71.8750 (73.4226) lr 2.2949e-04 eta 1:51:41 +epoch [41/50] batch [110/500] time 1.346 (1.368) data 0.001 (0.008) loss 1.2275 (1.0659) acc 62.5000 (73.3523) lr 2.2949e-04 eta 1:51:31 +epoch [41/50] batch [115/500] time 1.348 (1.368) data 0.000 (0.007) loss 1.5889 (1.0744) acc 56.2500 (73.2337) lr 2.2949e-04 eta 1:51:21 +epoch [41/50] batch [120/500] time 1.337 (1.367) data 0.000 (0.007) loss 1.0068 (1.0690) acc 68.7500 (73.3333) lr 2.2949e-04 eta 1:51:11 +epoch [41/50] batch [125/500] time 1.357 (1.367) data 0.000 (0.007) loss 0.6177 (1.0564) acc 71.8750 (73.5000) lr 2.2949e-04 eta 1:51:02 +epoch [41/50] batch [130/500] time 1.354 (1.367) data 0.000 (0.006) loss 0.6631 (1.0551) acc 84.3750 (73.4615) lr 2.2949e-04 eta 1:50:55 +epoch [41/50] batch [135/500] time 1.504 (1.367) data 0.000 (0.006) loss 1.5527 (1.0616) acc 62.5000 (73.4028) lr 2.2949e-04 eta 1:50:52 +epoch [41/50] batch [140/500] time 1.360 (1.367) data 0.000 (0.006) loss 0.7275 (1.0510) acc 78.1250 (73.5491) lr 2.2949e-04 eta 1:50:44 +epoch [41/50] batch [145/500] time 1.331 (1.366) data 0.000 (0.006) loss 1.1133 (1.0468) acc 75.0000 (73.6853) lr 2.2949e-04 eta 1:50:33 +epoch [41/50] batch [150/500] time 1.370 (1.366) data 0.000 (0.006) loss 1.2402 (1.0519) acc 75.0000 (73.5208) lr 2.2949e-04 eta 1:50:23 +epoch [41/50] batch [155/500] time 1.362 (1.366) data 0.000 (0.005) loss 0.6948 (1.0531) acc 84.3750 (73.5282) lr 2.2949e-04 eta 1:50:16 +epoch [41/50] batch [160/500] time 1.364 (1.365) data 0.000 (0.005) loss 1.1318 (1.0502) acc 71.8750 (73.4766) lr 2.2949e-04 eta 1:50:07 +epoch [41/50] batch [165/500] time 1.375 (1.365) data 0.000 (0.005) loss 1.2041 (1.0489) acc 62.5000 (73.5795) lr 2.2949e-04 eta 1:49:59 +epoch [41/50] batch [170/500] time 1.355 (1.365) data 0.000 (0.005) loss 1.0381 (1.0469) acc 75.0000 (73.6397) lr 2.2949e-04 eta 1:49:52 +epoch [41/50] batch [175/500] time 1.362 (1.365) data 0.001 (0.005) loss 1.2432 (1.0553) acc 75.0000 (73.5179) lr 2.2949e-04 eta 1:49:44 +epoch [41/50] batch [180/500] time 1.339 (1.364) data 0.000 (0.005) loss 1.2412 (1.0551) acc 68.7500 (73.5069) lr 2.2949e-04 eta 1:49:35 +epoch [41/50] batch [185/500] time 1.355 (1.364) data 0.000 (0.005) loss 1.3262 (1.0567) acc 78.1250 (73.5135) lr 2.2949e-04 eta 1:49:27 +epoch [41/50] batch [190/500] time 1.341 (1.364) data 0.000 (0.004) loss 1.2754 (1.0560) acc 62.5000 (73.4539) lr 2.2949e-04 eta 1:49:19 +epoch [41/50] batch [195/500] time 1.347 (1.363) data 0.000 (0.004) loss 1.3848 (1.0576) acc 65.6250 (73.4455) lr 2.2949e-04 eta 1:49:10 +epoch [41/50] batch [200/500] time 1.359 (1.363) data 0.000 (0.004) loss 0.9961 (1.0594) acc 75.0000 (73.4375) lr 2.2949e-04 eta 1:49:03 +epoch [41/50] batch [205/500] time 1.339 (1.363) data 0.001 (0.004) loss 0.9712 (1.0611) acc 71.8750 (73.4451) lr 2.2949e-04 eta 1:48:55 +epoch [41/50] batch [210/500] time 1.367 (1.363) data 0.000 (0.004) loss 0.7632 (1.0593) acc 78.1250 (73.4226) lr 2.2949e-04 eta 1:48:48 +epoch [41/50] batch [215/500] time 1.360 (1.363) data 0.000 (0.004) loss 1.2139 (1.0603) acc 53.1250 (73.3866) lr 2.2949e-04 eta 1:48:41 +epoch [41/50] batch [220/500] time 1.335 (1.363) data 0.000 (0.004) loss 1.1055 (1.0626) acc 71.8750 (73.3239) lr 2.2949e-04 eta 1:48:33 +epoch [41/50] batch [225/500] time 1.368 (1.363) data 0.000 (0.004) loss 1.1436 (1.0628) acc 71.8750 (73.3056) lr 2.2949e-04 eta 1:48:26 +epoch [41/50] batch [230/500] time 1.369 (1.363) data 0.000 (0.004) loss 1.3682 (1.0641) acc 78.1250 (73.3016) lr 2.2949e-04 eta 1:48:19 +epoch [41/50] batch [235/500] time 1.345 (1.363) data 0.000 (0.004) loss 0.6519 (1.0627) acc 81.2500 (73.3777) lr 2.2949e-04 eta 1:48:15 +epoch [41/50] batch [240/500] time 1.355 (1.363) data 0.000 (0.004) loss 0.8691 (1.0608) acc 81.2500 (73.4896) lr 2.2949e-04 eta 1:48:08 +epoch [41/50] batch [245/500] time 1.363 (1.363) data 0.000 (0.004) loss 0.6812 (1.0624) acc 78.1250 (73.4439) lr 2.2949e-04 eta 1:48:00 +epoch [41/50] batch [250/500] time 1.350 (1.363) data 0.000 (0.004) loss 0.4883 (1.0589) acc 78.1250 (73.4500) lr 2.2949e-04 eta 1:47:52 +epoch [41/50] batch [255/500] time 1.362 (1.363) data 0.000 (0.003) loss 1.0771 (1.0584) acc 65.6250 (73.4436) lr 2.2949e-04 eta 1:47:45 +epoch [41/50] batch [260/500] time 1.348 (1.363) data 0.000 (0.003) loss 1.6729 (1.0609) acc 75.0000 (73.4014) lr 2.2949e-04 eta 1:47:38 +epoch [41/50] batch [265/500] time 1.366 (1.363) data 0.000 (0.003) loss 1.1562 (1.0643) acc 71.8750 (73.3373) lr 2.2949e-04 eta 1:47:31 +epoch [41/50] batch [270/500] time 1.351 (1.363) data 0.000 (0.003) loss 1.0225 (1.0633) acc 65.6250 (73.3449) lr 2.2949e-04 eta 1:47:25 +epoch [41/50] batch [275/500] time 1.362 (1.363) data 0.000 (0.003) loss 0.7407 (1.0638) acc 75.0000 (73.3068) lr 2.2949e-04 eta 1:47:17 +epoch [41/50] batch [280/500] time 1.332 (1.363) data 0.000 (0.003) loss 0.7466 (1.0595) acc 81.2500 (73.4152) lr 2.2949e-04 eta 1:47:11 +epoch [41/50] batch [285/500] time 1.366 (1.363) data 0.000 (0.003) loss 0.6753 (1.0611) acc 81.2500 (73.3333) lr 2.2949e-04 eta 1:47:04 +epoch [41/50] batch [290/500] time 1.366 (1.363) data 0.000 (0.003) loss 1.0381 (1.0607) acc 75.0000 (73.3297) lr 2.2949e-04 eta 1:46:57 +epoch [41/50] batch [295/500] time 1.373 (1.363) data 0.001 (0.003) loss 1.4580 (1.0632) acc 59.3750 (73.2733) lr 2.2949e-04 eta 1:46:51 +epoch [41/50] batch [300/500] time 1.355 (1.363) data 0.000 (0.003) loss 1.1309 (1.0602) acc 78.1250 (73.3542) lr 2.2949e-04 eta 1:46:44 +epoch [41/50] batch [305/500] time 1.357 (1.363) data 0.000 (0.003) loss 1.2539 (1.0588) acc 65.6250 (73.3402) lr 2.2949e-04 eta 1:46:37 +epoch [41/50] batch [310/500] time 1.332 (1.362) data 0.000 (0.003) loss 1.1377 (1.0599) acc 75.0000 (73.3065) lr 2.2949e-04 eta 1:46:29 +epoch [41/50] batch [315/500] time 1.375 (1.362) data 0.000 (0.003) loss 1.3086 (1.0618) acc 65.6250 (73.2143) lr 2.2949e-04 eta 1:46:22 +epoch [41/50] batch [320/500] time 1.354 (1.362) data 0.000 (0.003) loss 1.3926 (1.0620) acc 62.5000 (73.2031) lr 2.2949e-04 eta 1:46:15 +epoch [41/50] batch [325/500] time 1.362 (1.362) data 0.000 (0.003) loss 1.2617 (1.0590) acc 62.5000 (73.2308) lr 2.2949e-04 eta 1:46:07 +epoch [41/50] batch [330/500] time 1.341 (1.362) data 0.000 (0.003) loss 1.0879 (1.0612) acc 68.7500 (73.1723) lr 2.2949e-04 eta 1:45:59 +epoch [41/50] batch [335/500] time 1.362 (1.362) data 0.000 (0.003) loss 1.1123 (1.0609) acc 71.8750 (73.1996) lr 2.2949e-04 eta 1:45:52 +epoch [41/50] batch [340/500] time 1.357 (1.362) data 0.000 (0.003) loss 0.7695 (1.0570) acc 78.1250 (73.2996) lr 2.2949e-04 eta 1:45:45 +epoch [41/50] batch [345/500] time 1.352 (1.362) data 0.000 (0.003) loss 0.8682 (1.0545) acc 84.3750 (73.3967) lr 2.2949e-04 eta 1:45:38 +epoch [41/50] batch [350/500] time 1.352 (1.362) data 0.000 (0.003) loss 0.9399 (1.0555) acc 71.8750 (73.4018) lr 2.2949e-04 eta 1:45:31 +epoch [41/50] batch [355/500] time 1.370 (1.362) data 0.000 (0.003) loss 0.4111 (1.0522) acc 87.5000 (73.5035) lr 2.2949e-04 eta 1:45:25 +epoch [41/50] batch [360/500] time 1.364 (1.362) data 0.000 (0.003) loss 0.8096 (1.0538) acc 78.1250 (73.5243) lr 2.2949e-04 eta 1:45:18 +epoch [41/50] batch [365/500] time 1.369 (1.362) data 0.000 (0.003) loss 1.2920 (1.0563) acc 68.7500 (73.5531) lr 2.2949e-04 eta 1:45:11 +epoch [41/50] batch [370/500] time 1.378 (1.362) data 0.000 (0.002) loss 1.2393 (1.0588) acc 71.8750 (73.4966) lr 2.2949e-04 eta 1:45:04 +epoch [41/50] batch [375/500] time 1.356 (1.362) data 0.000 (0.002) loss 1.5557 (1.0564) acc 53.1250 (73.5333) lr 2.2949e-04 eta 1:44:57 +epoch [41/50] batch [380/500] time 1.342 (1.362) data 0.000 (0.002) loss 1.1543 (1.0552) acc 71.8750 (73.5773) lr 2.2949e-04 eta 1:44:51 +epoch [41/50] batch [385/500] time 1.346 (1.362) data 0.000 (0.002) loss 1.4629 (1.0570) acc 56.2500 (73.5390) lr 2.2949e-04 eta 1:44:44 +epoch [41/50] batch [390/500] time 1.370 (1.362) data 0.000 (0.002) loss 0.7358 (1.0560) acc 75.0000 (73.5737) lr 2.2949e-04 eta 1:44:37 +epoch [41/50] batch [395/500] time 1.345 (1.362) data 0.000 (0.002) loss 1.0850 (1.0555) acc 65.6250 (73.5364) lr 2.2949e-04 eta 1:44:31 +epoch [41/50] batch [400/500] time 1.382 (1.362) data 0.000 (0.002) loss 0.7153 (1.0542) acc 75.0000 (73.5859) lr 2.2949e-04 eta 1:44:25 +epoch [41/50] batch [405/500] time 1.365 (1.362) data 0.000 (0.002) loss 1.0479 (1.0547) acc 81.2500 (73.5880) lr 2.2949e-04 eta 1:44:18 +epoch [41/50] batch [410/500] time 1.351 (1.362) data 0.000 (0.002) loss 0.9385 (1.0566) acc 81.2500 (73.6204) lr 2.2949e-04 eta 1:44:11 +epoch [41/50] batch [415/500] time 1.367 (1.362) data 0.000 (0.002) loss 1.3262 (1.0571) acc 65.6250 (73.6220) lr 2.2949e-04 eta 1:44:05 +epoch [41/50] batch [420/500] time 1.379 (1.362) data 0.000 (0.002) loss 0.7788 (1.0553) acc 84.3750 (73.6979) lr 2.2949e-04 eta 1:43:58 +epoch [41/50] batch [425/500] time 1.360 (1.363) data 0.000 (0.002) loss 1.3242 (1.0555) acc 71.8750 (73.7132) lr 2.2949e-04 eta 1:43:53 +epoch [41/50] batch [430/500] time 1.367 (1.363) data 0.000 (0.002) loss 1.2852 (1.0556) acc 78.1250 (73.6991) lr 2.2949e-04 eta 1:43:46 +epoch [41/50] batch [435/500] time 1.345 (1.363) data 0.000 (0.002) loss 0.4663 (1.0557) acc 84.3750 (73.6997) lr 2.2949e-04 eta 1:43:39 +epoch [41/50] batch [440/500] time 1.362 (1.363) data 0.000 (0.002) loss 0.9946 (1.0546) acc 78.1250 (73.7429) lr 2.2949e-04 eta 1:43:33 +epoch [41/50] batch [445/500] time 1.339 (1.362) data 0.000 (0.002) loss 0.9292 (1.0537) acc 75.0000 (73.7851) lr 2.2949e-04 eta 1:43:25 +epoch [41/50] batch [450/500] time 1.344 (1.362) data 0.000 (0.002) loss 1.2041 (1.0542) acc 75.0000 (73.7847) lr 2.2949e-04 eta 1:43:18 +epoch [41/50] batch [455/500] time 1.368 (1.362) data 0.000 (0.002) loss 0.9526 (1.0548) acc 78.1250 (73.7775) lr 2.2949e-04 eta 1:43:11 +epoch [41/50] batch [460/500] time 1.337 (1.362) data 0.000 (0.002) loss 1.0225 (1.0570) acc 84.3750 (73.7908) lr 2.2949e-04 eta 1:43:04 +epoch [41/50] batch [465/500] time 1.361 (1.362) data 0.000 (0.002) loss 0.9990 (1.0564) acc 68.7500 (73.7702) lr 2.2949e-04 eta 1:42:56 +epoch [41/50] batch [470/500] time 1.378 (1.362) data 0.000 (0.002) loss 1.1680 (1.0569) acc 68.7500 (73.7434) lr 2.2949e-04 eta 1:42:49 +epoch [41/50] batch [475/500] time 1.373 (1.362) data 0.000 (0.002) loss 1.3545 (1.0571) acc 71.8750 (73.7500) lr 2.2949e-04 eta 1:42:42 +epoch [41/50] batch [480/500] time 1.364 (1.362) data 0.000 (0.002) loss 1.0830 (1.0577) acc 65.6250 (73.7174) lr 2.2949e-04 eta 1:42:35 +epoch [41/50] batch [485/500] time 1.368 (1.362) data 0.001 (0.002) loss 0.8511 (1.0559) acc 78.1250 (73.7822) lr 2.2949e-04 eta 1:42:28 +epoch [41/50] batch [490/500] time 1.377 (1.362) data 0.000 (0.002) loss 0.8145 (1.0570) acc 78.1250 (73.7372) lr 2.2949e-04 eta 1:42:22 +epoch [41/50] batch [495/500] time 1.359 (1.362) data 0.000 (0.002) loss 0.8228 (1.0556) acc 78.1250 (73.7058) lr 2.2949e-04 eta 1:42:15 +epoch [41/50] batch [500/500] time 1.382 (1.362) data 0.000 (0.002) loss 1.0752 (1.0538) acc 78.1250 (73.7375) lr 1.9098e-04 eta 1:42:08 +epoch [42/50] batch [5/500] time 1.359 (1.534) data 0.000 (0.166) loss 0.6636 (0.9208) acc 81.2500 (75.6250) lr 1.9098e-04 eta 1:54:54 +epoch [42/50] batch [10/500] time 1.338 (1.447) data 0.000 (0.083) loss 1.3965 (1.0671) acc 62.5000 (74.0625) lr 1.9098e-04 eta 1:48:19 +epoch [42/50] batch [15/500] time 1.377 (1.419) data 0.000 (0.056) loss 1.1748 (1.0640) acc 78.1250 (73.1250) lr 1.9098e-04 eta 1:46:03 +epoch [42/50] batch [20/500] time 1.352 (1.407) data 0.000 (0.042) loss 0.7119 (1.0252) acc 75.0000 (73.7500) lr 1.9098e-04 eta 1:45:04 +epoch [42/50] batch [25/500] time 1.360 (1.397) data 0.000 (0.034) loss 1.1553 (1.0090) acc 75.0000 (74.2500) lr 1.9098e-04 eta 1:44:11 +epoch [42/50] batch [30/500] time 1.331 (1.389) data 0.000 (0.028) loss 0.7168 (1.0239) acc 81.2500 (74.3750) lr 1.9098e-04 eta 1:43:29 +epoch [42/50] batch [35/500] time 1.354 (1.384) data 0.000 (0.024) loss 0.4255 (0.9877) acc 87.5000 (75.1786) lr 1.9098e-04 eta 1:42:58 +epoch [42/50] batch [40/500] time 1.353 (1.380) data 0.000 (0.021) loss 1.0020 (1.0062) acc 78.1250 (74.5312) lr 1.9098e-04 eta 1:42:35 +epoch [42/50] batch [45/500] time 1.350 (1.378) data 0.000 (0.019) loss 0.7310 (0.9926) acc 75.0000 (75.0000) lr 1.9098e-04 eta 1:42:19 +epoch [42/50] batch [50/500] time 1.352 (1.376) data 0.000 (0.017) loss 0.6191 (0.9798) acc 81.2500 (75.3750) lr 1.9098e-04 eta 1:42:01 +epoch [42/50] batch [55/500] time 1.373 (1.375) data 0.000 (0.015) loss 1.1982 (0.9874) acc 75.0000 (74.8295) lr 1.9098e-04 eta 1:41:52 +epoch [42/50] batch [60/500] time 1.338 (1.374) data 0.000 (0.014) loss 0.7900 (1.0053) acc 90.6250 (74.7917) lr 1.9098e-04 eta 1:41:38 +epoch [42/50] batch [65/500] time 1.363 (1.372) data 0.000 (0.013) loss 1.2764 (1.0066) acc 65.6250 (74.4231) lr 1.9098e-04 eta 1:41:25 +epoch [42/50] batch [70/500] time 1.340 (1.371) data 0.000 (0.012) loss 1.3232 (1.0205) acc 75.0000 (74.0625) lr 1.9098e-04 eta 1:41:13 +epoch [42/50] batch [75/500] time 1.373 (1.372) data 0.000 (0.011) loss 1.0771 (1.0232) acc 68.7500 (73.9167) lr 1.9098e-04 eta 1:41:09 +epoch [42/50] batch [80/500] time 1.350 (1.370) data 0.000 (0.011) loss 1.7539 (1.0383) acc 65.6250 (73.5938) lr 1.9098e-04 eta 1:40:55 +epoch [42/50] batch [85/500] time 1.374 (1.370) data 0.000 (0.010) loss 1.0840 (1.0507) acc 71.8750 (73.4191) lr 1.9098e-04 eta 1:40:46 +epoch [42/50] batch [90/500] time 1.328 (1.368) data 0.000 (0.010) loss 1.3682 (1.0555) acc 68.7500 (73.1944) lr 1.9098e-04 eta 1:40:34 +epoch [42/50] batch [95/500] time 1.351 (1.368) data 0.000 (0.009) loss 1.3516 (1.0554) acc 68.7500 (73.3224) lr 1.9098e-04 eta 1:40:24 +epoch [42/50] batch [100/500] time 1.368 (1.368) data 0.000 (0.009) loss 1.0459 (1.0542) acc 71.8750 (73.4688) lr 1.9098e-04 eta 1:40:17 +epoch [42/50] batch [105/500] time 1.354 (1.367) data 0.000 (0.008) loss 1.4170 (1.0711) acc 68.7500 (73.0655) lr 1.9098e-04 eta 1:40:08 +epoch [42/50] batch [110/500] time 1.336 (1.367) data 0.000 (0.008) loss 1.0029 (1.0744) acc 71.8750 (73.0114) lr 1.9098e-04 eta 1:39:59 +epoch [42/50] batch [115/500] time 1.374 (1.366) data 0.000 (0.008) loss 1.4775 (1.0664) acc 71.8750 (73.1522) lr 1.9098e-04 eta 1:39:50 +epoch [42/50] batch [120/500] time 1.354 (1.367) data 0.000 (0.007) loss 0.8208 (1.0692) acc 78.1250 (73.2031) lr 1.9098e-04 eta 1:39:46 +epoch [42/50] batch [125/500] time 1.361 (1.367) data 0.000 (0.007) loss 0.6602 (1.0658) acc 78.1250 (73.1500) lr 1.9098e-04 eta 1:39:38 +epoch [42/50] batch [130/500] time 1.357 (1.366) data 0.000 (0.007) loss 0.4458 (1.0627) acc 90.6250 (73.1971) lr 1.9098e-04 eta 1:39:30 +epoch [42/50] batch [135/500] time 1.363 (1.366) data 0.000 (0.006) loss 0.6611 (1.0555) acc 71.8750 (73.3333) lr 1.9098e-04 eta 1:39:21 +epoch [42/50] batch [140/500] time 1.362 (1.366) data 0.000 (0.006) loss 0.5557 (1.0474) acc 78.1250 (73.4152) lr 1.9098e-04 eta 1:39:14 +epoch [42/50] batch [145/500] time 1.366 (1.366) data 0.000 (0.006) loss 0.5820 (1.0429) acc 84.3750 (73.5776) lr 1.9098e-04 eta 1:39:07 +epoch [42/50] batch [150/500] time 1.340 (1.365) data 0.000 (0.006) loss 0.7485 (1.0421) acc 75.0000 (73.5833) lr 1.9098e-04 eta 1:38:59 +epoch [42/50] batch [155/500] time 1.371 (1.365) data 0.000 (0.006) loss 0.5747 (1.0413) acc 84.3750 (73.4476) lr 1.9098e-04 eta 1:38:52 +epoch [42/50] batch [160/500] time 1.338 (1.365) data 0.000 (0.006) loss 1.2627 (1.0450) acc 68.7500 (73.4570) lr 1.9098e-04 eta 1:38:42 +epoch [42/50] batch [165/500] time 1.339 (1.364) data 0.000 (0.005) loss 1.6709 (1.0467) acc 62.5000 (73.4848) lr 1.9098e-04 eta 1:38:33 +epoch [42/50] batch [170/500] time 1.372 (1.364) data 0.000 (0.005) loss 0.9297 (1.0513) acc 75.0000 (73.3824) lr 1.9098e-04 eta 1:38:25 +epoch [42/50] batch [175/500] time 1.362 (1.364) data 0.000 (0.005) loss 0.9165 (1.0466) acc 68.7500 (73.3750) lr 1.9098e-04 eta 1:38:17 +epoch [42/50] batch [180/500] time 1.365 (1.364) data 0.000 (0.005) loss 1.8926 (1.0464) acc 59.3750 (73.3854) lr 1.9098e-04 eta 1:38:10 +epoch [42/50] batch [185/500] time 1.367 (1.363) data 0.000 (0.005) loss 0.8809 (1.0461) acc 75.0000 (73.3446) lr 1.9098e-04 eta 1:38:03 +epoch [42/50] batch [190/500] time 1.364 (1.363) data 0.000 (0.005) loss 0.8154 (1.0423) acc 84.3750 (73.4868) lr 1.9098e-04 eta 1:37:56 +epoch [42/50] batch [195/500] time 1.364 (1.363) data 0.000 (0.005) loss 0.8960 (1.0418) acc 78.1250 (73.5577) lr 1.9098e-04 eta 1:37:49 +epoch [42/50] batch [200/500] time 1.366 (1.364) data 0.000 (0.004) loss 0.7549 (1.0405) acc 71.8750 (73.5625) lr 1.9098e-04 eta 1:37:43 +epoch [42/50] batch [205/500] time 1.372 (1.364) data 0.001 (0.004) loss 0.8823 (1.0435) acc 75.0000 (73.5671) lr 1.9098e-04 eta 1:37:37 +epoch [42/50] batch [210/500] time 1.386 (1.364) data 0.000 (0.004) loss 0.9663 (1.0428) acc 71.8750 (73.5417) lr 1.9098e-04 eta 1:37:31 +epoch [42/50] batch [215/500] time 1.485 (1.365) data 0.000 (0.004) loss 1.5527 (1.0471) acc 62.5000 (73.5320) lr 1.9098e-04 eta 1:37:27 +epoch [42/50] batch [220/500] time 1.350 (1.364) data 0.000 (0.004) loss 0.5513 (1.0429) acc 78.1250 (73.6222) lr 1.9098e-04 eta 1:37:19 +epoch [42/50] batch [225/500] time 1.365 (1.364) data 0.000 (0.004) loss 0.8306 (1.0355) acc 75.0000 (73.8333) lr 1.9098e-04 eta 1:37:11 +epoch [42/50] batch [230/500] time 1.363 (1.364) data 0.000 (0.004) loss 1.0713 (1.0409) acc 71.8750 (73.7364) lr 1.9098e-04 eta 1:37:04 +epoch [42/50] batch [235/500] time 1.346 (1.364) data 0.000 (0.004) loss 0.6094 (1.0402) acc 75.0000 (73.7500) lr 1.9098e-04 eta 1:36:57 +epoch [42/50] batch [240/500] time 1.348 (1.364) data 0.000 (0.004) loss 1.6426 (1.0418) acc 65.6250 (73.7500) lr 1.9098e-04 eta 1:36:49 +epoch [42/50] batch [245/500] time 1.378 (1.364) data 0.000 (0.004) loss 1.1572 (1.0401) acc 84.3750 (73.8265) lr 1.9098e-04 eta 1:36:42 +epoch [42/50] batch [250/500] time 1.360 (1.364) data 0.000 (0.004) loss 0.5361 (1.0368) acc 81.2500 (73.8375) lr 1.9098e-04 eta 1:36:35 +epoch [42/50] batch [255/500] time 1.372 (1.364) data 0.000 (0.004) loss 0.8589 (1.0361) acc 78.1250 (73.8113) lr 1.9098e-04 eta 1:36:29 +epoch [42/50] batch [260/500] time 1.352 (1.364) data 0.000 (0.004) loss 1.1885 (1.0374) acc 68.7500 (73.7500) lr 1.9098e-04 eta 1:36:24 +epoch [42/50] batch [265/500] time 1.362 (1.364) data 0.000 (0.003) loss 0.9395 (1.0397) acc 75.0000 (73.6557) lr 1.9098e-04 eta 1:36:17 +epoch [42/50] batch [270/500] time 1.359 (1.364) data 0.000 (0.003) loss 1.0781 (1.0399) acc 68.7500 (73.5880) lr 1.9098e-04 eta 1:36:10 +epoch [42/50] batch [275/500] time 1.360 (1.364) data 0.000 (0.003) loss 1.8604 (1.0463) acc 50.0000 (73.4318) lr 1.9098e-04 eta 1:36:03 +epoch [42/50] batch [280/500] time 1.339 (1.364) data 0.000 (0.003) loss 1.1777 (1.0513) acc 71.8750 (73.3594) lr 1.9098e-04 eta 1:35:55 +epoch [42/50] batch [285/500] time 1.367 (1.364) data 0.000 (0.003) loss 0.5811 (1.0522) acc 84.3750 (73.3553) lr 1.9098e-04 eta 1:35:48 +epoch [42/50] batch [290/500] time 1.342 (1.364) data 0.000 (0.003) loss 0.8813 (1.0480) acc 81.2500 (73.5129) lr 1.9098e-04 eta 1:35:41 +epoch [42/50] batch [295/500] time 1.338 (1.363) data 0.000 (0.003) loss 0.8867 (1.0470) acc 81.2500 (73.5699) lr 1.9098e-04 eta 1:35:33 +epoch [42/50] batch [300/500] time 1.348 (1.363) data 0.000 (0.003) loss 1.3975 (1.0475) acc 71.8750 (73.5521) lr 1.9098e-04 eta 1:35:25 +epoch [42/50] batch [305/500] time 1.355 (1.363) data 0.000 (0.003) loss 0.7798 (1.0469) acc 81.2500 (73.5348) lr 1.9098e-04 eta 1:35:19 +epoch [42/50] batch [310/500] time 1.357 (1.363) data 0.000 (0.003) loss 1.0586 (1.0483) acc 78.1250 (73.4980) lr 1.9098e-04 eta 1:35:11 +epoch [42/50] batch [315/500] time 1.356 (1.363) data 0.000 (0.003) loss 0.8550 (1.0465) acc 75.0000 (73.5020) lr 1.9098e-04 eta 1:35:05 +epoch [42/50] batch [320/500] time 1.343 (1.363) data 0.000 (0.003) loss 0.9487 (1.0448) acc 68.7500 (73.5352) lr 1.9098e-04 eta 1:34:57 +epoch [42/50] batch [325/500] time 1.343 (1.363) data 0.000 (0.003) loss 1.3857 (1.0467) acc 59.3750 (73.4904) lr 1.9098e-04 eta 1:34:50 +epoch [42/50] batch [330/500] time 1.341 (1.363) data 0.000 (0.003) loss 0.8501 (1.0479) acc 71.8750 (73.4470) lr 1.9098e-04 eta 1:34:43 +epoch [42/50] batch [335/500] time 1.350 (1.363) data 0.000 (0.003) loss 1.2656 (1.0490) acc 75.0000 (73.4422) lr 1.9098e-04 eta 1:34:35 +epoch [42/50] batch [340/500] time 1.340 (1.363) data 0.001 (0.003) loss 0.8989 (1.0447) acc 78.1250 (73.5202) lr 1.9098e-04 eta 1:34:28 +epoch [42/50] batch [345/500] time 1.353 (1.363) data 0.000 (0.003) loss 1.5791 (1.0472) acc 65.6250 (73.4330) lr 1.9098e-04 eta 1:34:21 +epoch [42/50] batch [350/500] time 1.376 (1.363) data 0.000 (0.003) loss 0.6704 (1.0454) acc 78.1250 (73.4643) lr 1.9098e-04 eta 1:34:15 +epoch [42/50] batch [355/500] time 1.364 (1.363) data 0.000 (0.003) loss 1.0449 (1.0463) acc 71.8750 (73.4507) lr 1.9098e-04 eta 1:34:08 +epoch [42/50] batch [360/500] time 1.351 (1.363) data 0.000 (0.003) loss 0.9653 (1.0455) acc 81.2500 (73.4896) lr 1.9098e-04 eta 1:34:03 +epoch [42/50] batch [365/500] time 1.374 (1.363) data 0.000 (0.003) loss 1.3027 (1.0467) acc 78.1250 (73.5188) lr 1.9098e-04 eta 1:33:57 +epoch [42/50] batch [370/500] time 1.360 (1.363) data 0.000 (0.003) loss 1.0117 (1.0453) acc 78.1250 (73.5811) lr 1.9098e-04 eta 1:33:50 +epoch [42/50] batch [375/500] time 1.374 (1.363) data 0.000 (0.003) loss 1.2441 (1.0443) acc 75.0000 (73.6333) lr 1.9098e-04 eta 1:33:43 +epoch [42/50] batch [380/500] time 1.356 (1.363) data 0.000 (0.003) loss 0.7969 (1.0412) acc 81.2500 (73.7253) lr 1.9098e-04 eta 1:33:35 +epoch [42/50] batch [385/500] time 1.358 (1.363) data 0.000 (0.003) loss 1.0586 (1.0404) acc 78.1250 (73.7906) lr 1.9098e-04 eta 1:33:28 +epoch [42/50] batch [390/500] time 1.370 (1.363) data 0.000 (0.002) loss 1.2021 (1.0411) acc 71.8750 (73.7500) lr 1.9098e-04 eta 1:33:21 +epoch [42/50] batch [395/500] time 1.368 (1.363) data 0.000 (0.002) loss 0.6440 (1.0403) acc 71.8750 (73.7658) lr 1.9098e-04 eta 1:33:14 +epoch [42/50] batch [400/500] time 1.371 (1.363) data 0.000 (0.002) loss 0.4836 (1.0380) acc 90.6250 (73.8281) lr 1.9098e-04 eta 1:33:07 +epoch [42/50] batch [405/500] time 1.371 (1.363) data 0.000 (0.002) loss 0.9678 (1.0396) acc 71.8750 (73.7886) lr 1.9098e-04 eta 1:33:01 +epoch [42/50] batch [410/500] time 1.359 (1.363) data 0.000 (0.002) loss 0.3955 (1.0375) acc 78.1250 (73.7881) lr 1.9098e-04 eta 1:32:54 +epoch [42/50] batch [415/500] time 1.372 (1.363) data 0.000 (0.002) loss 0.6128 (1.0349) acc 87.5000 (73.8705) lr 1.9098e-04 eta 1:32:47 +epoch [42/50] batch [420/500] time 1.341 (1.363) data 0.000 (0.002) loss 0.7314 (1.0359) acc 84.3750 (73.8542) lr 1.9098e-04 eta 1:32:40 +epoch [42/50] batch [425/500] time 1.323 (1.363) data 0.000 (0.002) loss 1.0010 (1.0360) acc 75.0000 (73.8088) lr 1.9098e-04 eta 1:32:32 +epoch [42/50] batch [430/500] time 1.348 (1.362) data 0.000 (0.002) loss 0.4644 (1.0356) acc 87.5000 (73.8081) lr 1.9098e-04 eta 1:32:25 +epoch [42/50] batch [435/500] time 1.343 (1.362) data 0.000 (0.002) loss 1.2686 (1.0354) acc 68.7500 (73.7931) lr 1.9098e-04 eta 1:32:18 +epoch [42/50] batch [440/500] time 1.365 (1.362) data 0.000 (0.002) loss 1.0430 (1.0362) acc 68.7500 (73.7784) lr 1.9098e-04 eta 1:32:11 +epoch [42/50] batch [445/500] time 1.366 (1.362) data 0.000 (0.002) loss 0.8623 (1.0371) acc 75.0000 (73.7640) lr 1.9098e-04 eta 1:32:04 +epoch [42/50] batch [450/500] time 1.348 (1.362) data 0.000 (0.002) loss 1.1445 (1.0357) acc 78.1250 (73.8056) lr 1.9098e-04 eta 1:31:56 +epoch [42/50] batch [455/500] time 1.352 (1.362) data 0.000 (0.002) loss 1.0273 (1.0369) acc 71.8750 (73.8049) lr 1.9098e-04 eta 1:31:49 +epoch [42/50] batch [460/500] time 1.369 (1.362) data 0.000 (0.002) loss 1.5234 (1.0388) acc 71.8750 (73.7840) lr 1.9098e-04 eta 1:31:42 +epoch [42/50] batch [465/500] time 1.344 (1.362) data 0.000 (0.002) loss 1.0586 (1.0393) acc 71.8750 (73.7433) lr 1.9098e-04 eta 1:31:35 +epoch [42/50] batch [470/500] time 1.348 (1.362) data 0.000 (0.002) loss 0.6851 (1.0384) acc 84.3750 (73.7832) lr 1.9098e-04 eta 1:31:28 +epoch [42/50] batch [475/500] time 1.355 (1.362) data 0.000 (0.002) loss 1.1660 (1.0385) acc 71.8750 (73.7697) lr 1.9098e-04 eta 1:31:20 +epoch [42/50] batch [480/500] time 1.350 (1.362) data 0.000 (0.002) loss 1.0273 (1.0376) acc 71.8750 (73.7891) lr 1.9098e-04 eta 1:31:13 +epoch [42/50] batch [485/500] time 1.359 (1.361) data 0.000 (0.002) loss 0.6167 (1.0383) acc 84.3750 (73.7822) lr 1.9098e-04 eta 1:31:06 +epoch [42/50] batch [490/500] time 1.367 (1.361) data 0.000 (0.002) loss 1.0361 (1.0374) acc 68.7500 (73.8074) lr 1.9098e-04 eta 1:30:59 +epoch [42/50] batch [495/500] time 1.339 (1.361) data 0.000 (0.002) loss 1.5166 (1.0399) acc 62.5000 (73.7626) lr 1.9098e-04 eta 1:30:51 +epoch [42/50] batch [500/500] time 1.362 (1.361) data 0.000 (0.002) loss 0.6670 (1.0394) acc 87.5000 (73.7500) lr 1.5567e-04 eta 1:30:44 +epoch [43/50] batch [5/500] time 1.356 (1.569) data 0.000 (0.164) loss 1.6699 (1.1179) acc 68.7500 (76.8750) lr 1.5567e-04 eta 1:44:27 +epoch [43/50] batch [10/500] time 1.365 (1.464) data 0.000 (0.082) loss 1.6572 (1.1003) acc 62.5000 (74.3750) lr 1.5567e-04 eta 1:37:19 +epoch [43/50] batch [15/500] time 1.366 (1.428) data 0.000 (0.055) loss 1.2393 (1.1312) acc 68.7500 (73.1250) lr 1.5567e-04 eta 1:34:50 +epoch [43/50] batch [20/500] time 1.367 (1.410) data 0.001 (0.041) loss 0.7695 (1.1244) acc 84.3750 (73.7500) lr 1.5567e-04 eta 1:33:33 +epoch [43/50] batch [25/500] time 1.353 (1.398) data 0.000 (0.033) loss 1.2891 (1.1262) acc 65.6250 (73.8750) lr 1.5567e-04 eta 1:32:36 +epoch [43/50] batch [30/500] time 1.359 (1.391) data 0.000 (0.028) loss 1.3721 (1.1544) acc 62.5000 (72.9167) lr 1.5567e-04 eta 1:32:02 +epoch [43/50] batch [35/500] time 1.383 (1.388) data 0.001 (0.024) loss 0.7949 (1.1611) acc 84.3750 (72.0536) lr 1.5567e-04 eta 1:31:41 +epoch [43/50] batch [40/500] time 1.361 (1.384) data 0.000 (0.021) loss 1.0820 (1.1543) acc 75.0000 (72.1094) lr 1.5567e-04 eta 1:31:20 +epoch [43/50] batch [45/500] time 1.353 (1.382) data 0.000 (0.019) loss 1.2861 (1.1468) acc 75.0000 (72.3611) lr 1.5567e-04 eta 1:31:05 +epoch [43/50] batch [50/500] time 1.391 (1.381) data 0.000 (0.017) loss 0.7603 (1.1329) acc 75.0000 (72.5625) lr 1.5567e-04 eta 1:30:53 +epoch [43/50] batch [55/500] time 1.348 (1.378) data 0.000 (0.015) loss 1.6045 (1.1346) acc 65.6250 (72.6705) lr 1.5567e-04 eta 1:30:36 +epoch [43/50] batch [60/500] time 1.354 (1.376) data 0.000 (0.014) loss 0.7373 (1.1378) acc 78.1250 (72.3958) lr 1.5567e-04 eta 1:30:22 +epoch [43/50] batch [65/500] time 1.363 (1.377) data 0.000 (0.013) loss 0.8076 (1.1173) acc 75.0000 (72.7404) lr 1.5567e-04 eta 1:30:16 +epoch [43/50] batch [70/500] time 1.373 (1.375) data 0.000 (0.012) loss 0.7666 (1.1018) acc 81.2500 (72.8571) lr 1.5567e-04 eta 1:30:03 +epoch [43/50] batch [75/500] time 1.333 (1.374) data 0.000 (0.011) loss 1.1592 (1.1062) acc 71.8750 (72.6250) lr 1.5567e-04 eta 1:29:51 +epoch [43/50] batch [80/500] time 1.355 (1.373) data 0.000 (0.011) loss 0.9492 (1.1136) acc 71.8750 (72.4609) lr 1.5567e-04 eta 1:29:41 +epoch [43/50] batch [85/500] time 1.355 (1.372) data 0.000 (0.010) loss 0.4810 (1.1139) acc 81.2500 (72.3897) lr 1.5567e-04 eta 1:29:31 +epoch [43/50] batch [90/500] time 1.354 (1.371) data 0.000 (0.009) loss 0.9287 (1.1132) acc 65.6250 (72.3264) lr 1.5567e-04 eta 1:29:21 +epoch [43/50] batch [95/500] time 1.356 (1.370) data 0.001 (0.009) loss 1.2100 (1.1027) acc 71.8750 (72.5329) lr 1.5567e-04 eta 1:29:11 +epoch [43/50] batch [100/500] time 1.376 (1.370) data 0.000 (0.009) loss 0.3054 (1.0992) acc 96.8750 (72.6250) lr 1.5567e-04 eta 1:29:03 +epoch [43/50] batch [105/500] time 1.500 (1.371) data 0.000 (0.008) loss 0.9697 (1.1061) acc 68.7500 (72.4702) lr 1.5567e-04 eta 1:29:00 +epoch [43/50] batch [110/500] time 1.358 (1.371) data 0.000 (0.008) loss 1.0762 (1.1103) acc 75.0000 (72.4716) lr 1.5567e-04 eta 1:28:51 +epoch [43/50] batch [115/500] time 1.359 (1.370) data 0.000 (0.007) loss 0.8994 (1.1045) acc 78.1250 (72.6087) lr 1.5567e-04 eta 1:28:43 +epoch [43/50] batch [120/500] time 1.336 (1.370) data 0.000 (0.007) loss 0.8823 (1.1051) acc 71.8750 (72.6562) lr 1.5567e-04 eta 1:28:34 +epoch [43/50] batch [125/500] time 1.362 (1.370) data 0.000 (0.007) loss 0.7334 (1.1046) acc 78.1250 (72.7000) lr 1.5567e-04 eta 1:28:27 +epoch [43/50] batch [130/500] time 1.362 (1.370) data 0.000 (0.007) loss 1.1992 (1.1017) acc 78.1250 (72.9327) lr 1.5567e-04 eta 1:28:19 +epoch [43/50] batch [135/500] time 1.361 (1.369) data 0.000 (0.006) loss 0.7314 (1.0892) acc 78.1250 (73.2407) lr 1.5567e-04 eta 1:28:12 +epoch [43/50] batch [140/500] time 1.356 (1.369) data 0.000 (0.006) loss 1.1523 (1.0871) acc 71.8750 (73.4152) lr 1.5567e-04 eta 1:28:04 +epoch [43/50] batch [145/500] time 1.361 (1.369) data 0.000 (0.006) loss 0.7773 (1.0830) acc 81.2500 (73.5776) lr 1.5567e-04 eta 1:27:56 +epoch [43/50] batch [150/500] time 1.357 (1.368) data 0.000 (0.006) loss 0.9937 (1.0850) acc 71.8750 (73.5208) lr 1.5567e-04 eta 1:27:47 +epoch [43/50] batch [155/500] time 1.352 (1.368) data 0.000 (0.006) loss 1.1113 (1.0800) acc 71.8750 (73.5282) lr 1.5567e-04 eta 1:27:38 +epoch [43/50] batch [160/500] time 1.354 (1.367) data 0.000 (0.005) loss 0.8052 (1.0722) acc 78.1250 (73.6719) lr 1.5567e-04 eta 1:27:29 +epoch [43/50] batch [165/500] time 1.363 (1.367) data 0.000 (0.005) loss 1.2949 (1.0760) acc 59.3750 (73.5227) lr 1.5567e-04 eta 1:27:21 +epoch [43/50] batch [170/500] time 1.355 (1.366) data 0.001 (0.005) loss 1.2920 (1.0739) acc 65.6250 (73.5294) lr 1.5567e-04 eta 1:27:13 +epoch [43/50] batch [175/500] time 1.358 (1.366) data 0.000 (0.005) loss 1.0186 (1.0699) acc 68.7500 (73.4464) lr 1.5567e-04 eta 1:27:06 +epoch [43/50] batch [180/500] time 1.375 (1.366) data 0.000 (0.005) loss 1.3545 (1.0693) acc 68.7500 (73.4375) lr 1.5567e-04 eta 1:26:58 +epoch [43/50] batch [185/500] time 1.351 (1.366) data 0.000 (0.005) loss 0.8755 (1.0620) acc 78.1250 (73.6655) lr 1.5567e-04 eta 1:26:49 +epoch [43/50] batch [190/500] time 1.356 (1.366) data 0.000 (0.005) loss 0.7744 (1.0623) acc 81.2500 (73.7336) lr 1.5567e-04 eta 1:26:42 +epoch [43/50] batch [195/500] time 1.360 (1.366) data 0.001 (0.005) loss 1.1436 (1.0645) acc 78.1250 (73.7019) lr 1.5567e-04 eta 1:26:36 +epoch [43/50] batch [200/500] time 1.363 (1.366) data 0.000 (0.004) loss 0.8965 (1.0630) acc 75.0000 (73.7812) lr 1.5567e-04 eta 1:26:28 +epoch [43/50] batch [205/500] time 1.377 (1.366) data 0.000 (0.004) loss 1.1318 (1.0657) acc 62.5000 (73.6280) lr 1.5567e-04 eta 1:26:24 +epoch [43/50] batch [210/500] time 1.357 (1.366) data 0.000 (0.004) loss 1.8232 (1.0690) acc 59.3750 (73.5863) lr 1.5567e-04 eta 1:26:17 +epoch [43/50] batch [215/500] time 1.365 (1.366) data 0.000 (0.004) loss 1.4023 (1.0722) acc 59.3750 (73.5174) lr 1.5567e-04 eta 1:26:09 +epoch [43/50] batch [220/500] time 1.364 (1.366) data 0.000 (0.004) loss 1.1309 (1.0691) acc 81.2500 (73.6506) lr 1.5567e-04 eta 1:26:02 +epoch [43/50] batch [225/500] time 1.361 (1.365) data 0.000 (0.004) loss 1.3369 (1.0702) acc 68.7500 (73.5972) lr 1.5567e-04 eta 1:25:54 +epoch [43/50] batch [230/500] time 1.362 (1.365) data 0.000 (0.004) loss 0.7939 (1.0657) acc 71.8750 (73.6413) lr 1.5567e-04 eta 1:25:46 +epoch [43/50] batch [235/500] time 1.348 (1.365) data 0.000 (0.004) loss 1.4502 (1.0677) acc 75.0000 (73.6303) lr 1.5567e-04 eta 1:25:39 +epoch [43/50] batch [240/500] time 1.362 (1.365) data 0.000 (0.004) loss 1.0273 (1.0649) acc 78.1250 (73.6719) lr 1.5567e-04 eta 1:25:32 +epoch [43/50] batch [245/500] time 1.342 (1.365) data 0.000 (0.004) loss 0.7451 (1.0607) acc 87.5000 (73.7500) lr 1.5567e-04 eta 1:25:24 +epoch [43/50] batch [250/500] time 1.348 (1.365) data 0.000 (0.004) loss 0.6948 (1.0599) acc 87.5000 (73.8375) lr 1.5567e-04 eta 1:25:19 +epoch [43/50] batch [255/500] time 1.343 (1.365) data 0.000 (0.004) loss 0.6206 (1.0593) acc 84.3750 (73.8480) lr 1.5567e-04 eta 1:25:13 +epoch [43/50] batch [260/500] time 1.353 (1.365) data 0.000 (0.004) loss 0.5967 (1.0575) acc 75.0000 (73.8702) lr 1.5567e-04 eta 1:25:05 +epoch [43/50] batch [265/500] time 1.374 (1.365) data 0.000 (0.003) loss 1.1035 (1.0556) acc 81.2500 (73.9387) lr 1.5567e-04 eta 1:24:58 +epoch [43/50] batch [270/500] time 1.358 (1.365) data 0.000 (0.003) loss 1.3408 (1.0542) acc 62.5000 (73.9699) lr 1.5567e-04 eta 1:24:51 +epoch [43/50] batch [275/500] time 1.353 (1.365) data 0.000 (0.003) loss 1.0137 (1.0532) acc 71.8750 (73.9773) lr 1.5567e-04 eta 1:24:44 +epoch [43/50] batch [280/500] time 1.371 (1.365) data 0.000 (0.003) loss 0.8223 (1.0511) acc 78.1250 (74.0625) lr 1.5567e-04 eta 1:24:38 +epoch [43/50] batch [285/500] time 1.358 (1.365) data 0.000 (0.003) loss 1.3076 (1.0497) acc 71.8750 (74.0899) lr 1.5567e-04 eta 1:24:30 +epoch [43/50] batch [290/500] time 1.367 (1.365) data 0.000 (0.003) loss 0.9478 (1.0505) acc 78.1250 (74.0733) lr 1.5567e-04 eta 1:24:23 +epoch [43/50] batch [295/500] time 1.360 (1.365) data 0.000 (0.003) loss 1.1729 (1.0509) acc 75.0000 (74.0466) lr 1.5567e-04 eta 1:24:16 +epoch [43/50] batch [300/500] time 1.353 (1.365) data 0.000 (0.003) loss 1.0898 (1.0522) acc 68.7500 (73.9896) lr 1.5567e-04 eta 1:24:08 +epoch [43/50] batch [305/500] time 1.340 (1.364) data 0.000 (0.003) loss 0.8223 (1.0522) acc 78.1250 (73.9652) lr 1.5567e-04 eta 1:24:01 +epoch [43/50] batch [310/500] time 1.353 (1.364) data 0.000 (0.003) loss 0.4885 (1.0495) acc 84.3750 (74.0222) lr 1.5567e-04 eta 1:23:53 +epoch [43/50] batch [315/500] time 1.361 (1.364) data 0.000 (0.003) loss 0.6318 (1.0495) acc 78.1250 (74.0079) lr 1.5567e-04 eta 1:23:46 +epoch [43/50] batch [320/500] time 1.352 (1.364) data 0.000 (0.003) loss 0.9170 (1.0490) acc 78.1250 (74.0039) lr 1.5567e-04 eta 1:23:39 +epoch [43/50] batch [325/500] time 1.364 (1.364) data 0.000 (0.003) loss 1.4375 (1.0479) acc 71.8750 (74.0385) lr 1.5567e-04 eta 1:23:32 +epoch [43/50] batch [330/500] time 1.353 (1.364) data 0.000 (0.003) loss 0.9082 (1.0503) acc 71.8750 (73.9773) lr 1.5567e-04 eta 1:23:25 +epoch [43/50] batch [335/500] time 1.370 (1.364) data 0.000 (0.003) loss 2.0273 (1.0521) acc 68.7500 (74.0019) lr 1.5567e-04 eta 1:23:18 +epoch [43/50] batch [340/500] time 1.368 (1.364) data 0.000 (0.003) loss 1.7852 (1.0538) acc 62.5000 (73.9798) lr 1.5567e-04 eta 1:23:11 +epoch [43/50] batch [345/500] time 1.381 (1.364) data 0.000 (0.003) loss 1.9678 (1.0559) acc 56.2500 (73.9130) lr 1.5567e-04 eta 1:23:04 +epoch [43/50] batch [350/500] time 1.369 (1.364) data 0.000 (0.003) loss 0.7715 (1.0528) acc 78.1250 (73.9732) lr 1.5567e-04 eta 1:22:59 +epoch [43/50] batch [355/500] time 1.354 (1.364) data 0.000 (0.003) loss 1.0010 (1.0541) acc 71.8750 (73.9173) lr 1.5567e-04 eta 1:22:53 +epoch [43/50] batch [360/500] time 1.373 (1.364) data 0.000 (0.003) loss 1.2783 (1.0550) acc 62.5000 (73.9062) lr 1.5567e-04 eta 1:22:45 +epoch [43/50] batch [365/500] time 1.338 (1.364) data 0.000 (0.003) loss 0.9927 (1.0521) acc 68.7500 (73.9469) lr 1.5567e-04 eta 1:22:38 +epoch [43/50] batch [370/500] time 1.353 (1.364) data 0.001 (0.003) loss 1.3525 (1.0533) acc 59.3750 (73.9189) lr 1.5567e-04 eta 1:22:31 +epoch [43/50] batch [375/500] time 1.378 (1.364) data 0.000 (0.003) loss 0.8135 (1.0507) acc 78.1250 (73.9667) lr 1.5567e-04 eta 1:22:24 +epoch [43/50] batch [380/500] time 1.374 (1.364) data 0.000 (0.003) loss 0.9648 (1.0531) acc 75.0000 (73.9474) lr 1.5567e-04 eta 1:22:17 +epoch [43/50] batch [385/500] time 1.342 (1.364) data 0.000 (0.002) loss 0.7119 (1.0517) acc 81.2500 (73.9529) lr 1.5567e-04 eta 1:22:10 +epoch [43/50] batch [390/500] time 1.350 (1.364) data 0.000 (0.002) loss 0.7983 (1.0500) acc 81.2500 (74.0144) lr 1.5567e-04 eta 1:22:02 +epoch [43/50] batch [395/500] time 1.379 (1.364) data 0.000 (0.002) loss 1.0586 (1.0486) acc 65.6250 (73.9794) lr 1.5567e-04 eta 1:21:57 +epoch [43/50] batch [400/500] time 1.364 (1.364) data 0.000 (0.002) loss 0.8286 (1.0490) acc 87.5000 (74.0078) lr 1.5567e-04 eta 1:21:50 +epoch [43/50] batch [405/500] time 1.376 (1.364) data 0.000 (0.002) loss 0.9233 (1.0489) acc 81.2500 (74.0432) lr 1.5567e-04 eta 1:21:43 +epoch [43/50] batch [410/500] time 1.357 (1.364) data 0.000 (0.002) loss 1.9883 (1.0531) acc 59.3750 (73.9939) lr 1.5567e-04 eta 1:21:36 +epoch [43/50] batch [415/500] time 1.341 (1.364) data 0.000 (0.002) loss 0.8726 (1.0537) acc 75.0000 (74.0060) lr 1.5567e-04 eta 1:21:29 +epoch [43/50] batch [420/500] time 1.366 (1.364) data 0.000 (0.002) loss 1.6074 (1.0529) acc 62.5000 (74.0625) lr 1.5567e-04 eta 1:21:22 +epoch [43/50] batch [425/500] time 1.357 (1.364) data 0.000 (0.002) loss 0.7295 (1.0549) acc 81.2500 (74.0441) lr 1.5567e-04 eta 1:21:15 +epoch [43/50] batch [430/500] time 1.361 (1.364) data 0.000 (0.002) loss 0.6694 (1.0546) acc 78.1250 (74.0116) lr 1.5567e-04 eta 1:21:08 +epoch [43/50] batch [435/500] time 1.358 (1.364) data 0.000 (0.002) loss 1.4805 (1.0538) acc 75.0000 (74.0230) lr 1.5567e-04 eta 1:21:01 +epoch [43/50] batch [440/500] time 1.369 (1.364) data 0.000 (0.002) loss 1.0195 (1.0527) acc 78.1250 (74.0838) lr 1.5567e-04 eta 1:20:54 +epoch [43/50] batch [445/500] time 1.340 (1.364) data 0.000 (0.002) loss 0.8877 (1.0531) acc 78.1250 (74.0590) lr 1.5567e-04 eta 1:20:47 +epoch [43/50] batch [450/500] time 1.326 (1.363) data 0.000 (0.002) loss 0.7056 (1.0545) acc 78.1250 (74.0278) lr 1.5567e-04 eta 1:20:40 +epoch [43/50] batch [455/500] time 1.335 (1.363) data 0.000 (0.002) loss 1.5576 (1.0547) acc 56.2500 (74.0110) lr 1.5567e-04 eta 1:20:32 +epoch [43/50] batch [460/500] time 1.367 (1.363) data 0.000 (0.002) loss 1.3252 (1.0561) acc 68.7500 (73.9810) lr 1.5567e-04 eta 1:20:26 +epoch [43/50] batch [465/500] time 1.380 (1.363) data 0.000 (0.002) loss 1.3604 (1.0567) acc 59.3750 (73.9449) lr 1.5567e-04 eta 1:20:18 +epoch [43/50] batch [470/500] time 1.360 (1.363) data 0.000 (0.002) loss 1.3701 (1.0584) acc 62.5000 (73.9096) lr 1.5567e-04 eta 1:20:12 +epoch [43/50] batch [475/500] time 1.367 (1.363) data 0.000 (0.002) loss 0.8618 (1.0581) acc 78.1250 (73.9145) lr 1.5567e-04 eta 1:20:05 +epoch [43/50] batch [480/500] time 1.359 (1.363) data 0.000 (0.002) loss 0.9141 (1.0598) acc 78.1250 (73.8867) lr 1.5567e-04 eta 1:19:58 +epoch [43/50] batch [485/500] time 1.356 (1.363) data 0.001 (0.002) loss 1.0254 (1.0596) acc 71.8750 (73.8595) lr 1.5567e-04 eta 1:19:51 +epoch [43/50] batch [490/500] time 1.519 (1.364) data 0.000 (0.002) loss 1.1514 (1.0599) acc 71.8750 (73.8457) lr 1.5567e-04 eta 1:19:46 +epoch [43/50] batch [495/500] time 1.363 (1.364) data 0.000 (0.002) loss 1.3926 (1.0595) acc 71.8750 (73.8636) lr 1.5567e-04 eta 1:19:39 +epoch [43/50] batch [500/500] time 1.352 (1.364) data 0.000 (0.002) loss 0.6504 (1.0569) acc 81.2500 (73.8937) lr 1.2369e-04 eta 1:19:32 +epoch [44/50] batch [5/500] time 1.361 (1.522) data 0.000 (0.157) loss 1.1572 (0.7880) acc 68.7500 (80.0000) lr 1.2369e-04 eta 1:28:38 +epoch [44/50] batch [10/500] time 1.362 (1.438) data 0.000 (0.079) loss 0.9263 (0.9802) acc 78.1250 (73.1250) lr 1.2369e-04 eta 1:23:37 +epoch [44/50] batch [15/500] time 1.354 (1.410) data 0.000 (0.053) loss 1.0186 (0.9727) acc 65.6250 (72.7083) lr 1.2369e-04 eta 1:21:52 +epoch [44/50] batch [20/500] time 1.357 (1.394) data 0.000 (0.040) loss 1.0264 (0.9894) acc 65.6250 (72.0312) lr 1.2369e-04 eta 1:20:49 +epoch [44/50] batch [25/500] time 1.372 (1.387) data 0.000 (0.032) loss 1.1680 (0.9761) acc 71.8750 (72.8750) lr 1.2369e-04 eta 1:20:19 +epoch [44/50] batch [30/500] time 1.382 (1.383) data 0.000 (0.026) loss 0.9775 (0.9579) acc 78.1250 (73.8542) lr 1.2369e-04 eta 1:20:00 +epoch [44/50] batch [35/500] time 1.376 (1.381) data 0.000 (0.023) loss 1.0928 (0.9585) acc 75.0000 (73.4821) lr 1.2369e-04 eta 1:19:45 +epoch [44/50] batch [40/500] time 1.354 (1.379) data 0.000 (0.020) loss 0.5991 (0.9343) acc 81.2500 (74.2969) lr 1.2369e-04 eta 1:19:31 +epoch [44/50] batch [45/500] time 1.361 (1.377) data 0.000 (0.018) loss 1.1475 (0.9376) acc 75.0000 (74.1667) lr 1.2369e-04 eta 1:19:16 +epoch [44/50] batch [50/500] time 1.377 (1.379) data 0.000 (0.016) loss 1.2646 (0.9569) acc 78.1250 (74.0000) lr 1.2369e-04 eta 1:19:17 +epoch [44/50] batch [55/500] time 1.341 (1.376) data 0.000 (0.015) loss 1.0586 (0.9591) acc 78.1250 (74.2045) lr 1.2369e-04 eta 1:19:00 +epoch [44/50] batch [60/500] time 1.363 (1.374) data 0.000 (0.013) loss 0.6621 (0.9613) acc 81.2500 (74.3229) lr 1.2369e-04 eta 1:18:47 +epoch [44/50] batch [65/500] time 1.367 (1.373) data 0.000 (0.012) loss 1.1113 (0.9633) acc 75.0000 (74.1827) lr 1.2369e-04 eta 1:18:37 +epoch [44/50] batch [70/500] time 1.376 (1.374) data 0.000 (0.012) loss 1.4248 (0.9786) acc 68.7500 (73.8393) lr 1.2369e-04 eta 1:18:31 +epoch [44/50] batch [75/500] time 1.370 (1.373) data 0.000 (0.011) loss 0.8472 (0.9759) acc 78.1250 (74.0000) lr 1.2369e-04 eta 1:18:22 +epoch [44/50] batch [80/500] time 1.359 (1.372) data 0.000 (0.010) loss 0.9487 (0.9946) acc 87.5000 (73.9062) lr 1.2369e-04 eta 1:18:12 +epoch [44/50] batch [85/500] time 1.375 (1.372) data 0.000 (0.010) loss 0.7271 (0.9908) acc 84.3750 (73.8971) lr 1.2369e-04 eta 1:18:04 +epoch [44/50] batch [90/500] time 1.507 (1.373) data 0.000 (0.009) loss 1.5195 (1.0099) acc 65.6250 (73.7500) lr 1.2369e-04 eta 1:18:02 +epoch [44/50] batch [95/500] time 1.350 (1.372) data 0.000 (0.009) loss 0.8779 (1.0089) acc 68.7500 (73.9145) lr 1.2369e-04 eta 1:17:52 +epoch [44/50] batch [100/500] time 1.362 (1.371) data 0.001 (0.008) loss 1.1699 (1.0250) acc 78.1250 (73.8125) lr 1.2369e-04 eta 1:17:42 +epoch [44/50] batch [105/500] time 1.351 (1.370) data 0.000 (0.008) loss 1.5801 (1.0299) acc 65.6250 (73.8690) lr 1.2369e-04 eta 1:17:32 +epoch [44/50] batch [110/500] time 1.363 (1.370) data 0.000 (0.007) loss 0.9810 (1.0315) acc 78.1250 (73.9489) lr 1.2369e-04 eta 1:17:24 +epoch [44/50] batch [115/500] time 1.361 (1.370) data 0.000 (0.007) loss 0.8745 (1.0265) acc 68.7500 (73.9946) lr 1.2369e-04 eta 1:17:15 +epoch [44/50] batch [120/500] time 1.342 (1.369) data 0.000 (0.007) loss 0.8130 (1.0249) acc 78.1250 (74.0625) lr 1.2369e-04 eta 1:17:07 +epoch [44/50] batch [125/500] time 1.354 (1.368) data 0.000 (0.007) loss 1.4229 (1.0152) acc 62.5000 (74.1750) lr 1.2369e-04 eta 1:16:58 +epoch [44/50] batch [130/500] time 1.380 (1.368) data 0.000 (0.006) loss 0.9800 (1.0129) acc 71.8750 (74.1827) lr 1.2369e-04 eta 1:16:50 +epoch [44/50] batch [135/500] time 1.373 (1.368) data 0.000 (0.006) loss 1.5352 (1.0172) acc 71.8750 (74.0972) lr 1.2369e-04 eta 1:16:42 +epoch [44/50] batch [140/500] time 1.355 (1.368) data 0.000 (0.006) loss 1.0176 (1.0151) acc 68.7500 (74.2857) lr 1.2369e-04 eta 1:16:35 +epoch [44/50] batch [145/500] time 1.388 (1.367) data 0.000 (0.006) loss 0.9409 (1.0150) acc 81.2500 (74.4181) lr 1.2369e-04 eta 1:16:27 +epoch [44/50] batch [150/500] time 1.375 (1.367) data 0.000 (0.006) loss 0.8882 (1.0216) acc 75.0000 (74.2292) lr 1.2369e-04 eta 1:16:19 +epoch [44/50] batch [155/500] time 1.376 (1.367) data 0.000 (0.005) loss 0.7979 (1.0234) acc 84.3750 (74.2944) lr 1.2369e-04 eta 1:16:11 +epoch [44/50] batch [160/500] time 1.359 (1.366) data 0.000 (0.005) loss 0.5381 (1.0160) acc 87.5000 (74.4531) lr 1.2369e-04 eta 1:16:03 +epoch [44/50] batch [165/500] time 1.352 (1.366) data 0.000 (0.005) loss 0.6782 (1.0133) acc 81.2500 (74.5455) lr 1.2369e-04 eta 1:15:55 +epoch [44/50] batch [170/500] time 1.358 (1.366) data 0.000 (0.005) loss 1.5449 (1.0186) acc 68.7500 (74.3750) lr 1.2369e-04 eta 1:15:48 +epoch [44/50] batch [175/500] time 1.354 (1.366) data 0.000 (0.005) loss 0.3936 (1.0139) acc 87.5000 (74.4107) lr 1.2369e-04 eta 1:15:40 +epoch [44/50] batch [180/500] time 1.375 (1.366) data 0.000 (0.005) loss 0.7363 (1.0143) acc 84.3750 (74.4618) lr 1.2369e-04 eta 1:15:33 +epoch [44/50] batch [185/500] time 1.351 (1.365) data 0.000 (0.005) loss 1.0918 (1.0131) acc 68.7500 (74.4257) lr 1.2369e-04 eta 1:15:25 +epoch [44/50] batch [190/500] time 1.361 (1.366) data 0.000 (0.004) loss 1.3330 (1.0122) acc 68.7500 (74.4243) lr 1.2369e-04 eta 1:15:19 +epoch [44/50] batch [195/500] time 1.364 (1.365) data 0.000 (0.004) loss 1.0957 (1.0107) acc 65.6250 (74.4231) lr 1.2369e-04 eta 1:15:12 +epoch [44/50] batch [200/500] time 1.356 (1.365) data 0.000 (0.004) loss 0.9023 (1.0101) acc 65.6250 (74.3594) lr 1.2369e-04 eta 1:15:04 +epoch [44/50] batch [205/500] time 1.376 (1.365) data 0.000 (0.004) loss 0.8965 (1.0124) acc 75.0000 (74.2683) lr 1.2369e-04 eta 1:14:57 +epoch [44/50] batch [210/500] time 1.384 (1.365) data 0.000 (0.004) loss 1.1709 (1.0120) acc 65.6250 (74.2708) lr 1.2369e-04 eta 1:14:50 +epoch [44/50] batch [215/500] time 1.381 (1.365) data 0.000 (0.004) loss 1.0254 (1.0169) acc 78.1250 (74.1715) lr 1.2369e-04 eta 1:14:44 +epoch [44/50] batch [220/500] time 1.377 (1.365) data 0.000 (0.004) loss 0.4639 (1.0125) acc 96.8750 (74.3040) lr 1.2369e-04 eta 1:14:38 +epoch [44/50] batch [225/500] time 1.369 (1.365) data 0.000 (0.004) loss 1.7881 (1.0142) acc 62.5000 (74.3333) lr 1.2369e-04 eta 1:14:31 +epoch [44/50] batch [230/500] time 1.392 (1.366) data 0.000 (0.004) loss 0.4348 (1.0090) acc 90.6250 (74.4429) lr 1.2369e-04 eta 1:14:25 +epoch [44/50] batch [235/500] time 1.351 (1.366) data 0.000 (0.004) loss 1.0820 (1.0046) acc 71.8750 (74.6144) lr 1.2369e-04 eta 1:14:20 +epoch [44/50] batch [240/500] time 1.358 (1.366) data 0.000 (0.004) loss 0.5659 (1.0025) acc 78.1250 (74.6354) lr 1.2369e-04 eta 1:14:12 +epoch [44/50] batch [245/500] time 1.361 (1.366) data 0.000 (0.004) loss 1.0020 (0.9985) acc 75.0000 (74.7194) lr 1.2369e-04 eta 1:14:04 +epoch [44/50] batch [250/500] time 1.352 (1.366) data 0.000 (0.003) loss 1.2451 (1.0014) acc 56.2500 (74.6375) lr 1.2369e-04 eta 1:13:57 +epoch [44/50] batch [255/500] time 1.343 (1.365) data 0.000 (0.003) loss 1.3330 (1.0052) acc 65.6250 (74.5221) lr 1.2369e-04 eta 1:13:50 +epoch [44/50] batch [260/500] time 1.362 (1.365) data 0.000 (0.003) loss 1.2861 (1.0111) acc 75.0000 (74.4471) lr 1.2369e-04 eta 1:13:43 +epoch [44/50] batch [265/500] time 1.368 (1.365) data 0.000 (0.003) loss 0.7969 (1.0080) acc 81.2500 (74.5755) lr 1.2369e-04 eta 1:13:36 +epoch [44/50] batch [270/500] time 1.343 (1.365) data 0.000 (0.003) loss 1.2529 (1.0125) acc 75.0000 (74.5255) lr 1.2369e-04 eta 1:13:28 +epoch [44/50] batch [275/500] time 1.340 (1.365) data 0.000 (0.003) loss 0.7300 (1.0087) acc 75.0000 (74.6364) lr 1.2369e-04 eta 1:13:21 +epoch [44/50] batch [280/500] time 1.374 (1.365) data 0.000 (0.003) loss 1.2324 (1.0100) acc 71.8750 (74.6205) lr 1.2369e-04 eta 1:13:14 +epoch [44/50] batch [285/500] time 1.349 (1.365) data 0.000 (0.003) loss 0.8315 (1.0097) acc 71.8750 (74.6162) lr 1.2369e-04 eta 1:13:07 +epoch [44/50] batch [290/500] time 1.352 (1.364) data 0.000 (0.003) loss 1.2686 (1.0107) acc 78.1250 (74.6013) lr 1.2369e-04 eta 1:12:59 +epoch [44/50] batch [295/500] time 1.351 (1.364) data 0.000 (0.003) loss 0.5591 (1.0117) acc 90.6250 (74.5657) lr 1.2369e-04 eta 1:12:52 +epoch [44/50] batch [300/500] time 1.342 (1.364) data 0.000 (0.003) loss 0.9927 (1.0092) acc 75.0000 (74.6771) lr 1.2369e-04 eta 1:12:44 +epoch [44/50] batch [305/500] time 1.343 (1.364) data 0.000 (0.003) loss 0.8599 (1.0087) acc 75.0000 (74.6721) lr 1.2369e-04 eta 1:12:38 +epoch [44/50] batch [310/500] time 1.368 (1.364) data 0.000 (0.003) loss 1.0742 (1.0080) acc 71.8750 (74.6976) lr 1.2369e-04 eta 1:12:31 +epoch [44/50] batch [315/500] time 1.356 (1.364) data 0.000 (0.003) loss 1.7373 (1.0094) acc 62.5000 (74.6925) lr 1.2369e-04 eta 1:12:24 +epoch [44/50] batch [320/500] time 1.357 (1.364) data 0.000 (0.003) loss 0.7090 (1.0083) acc 71.8750 (74.7363) lr 1.2369e-04 eta 1:12:17 +epoch [44/50] batch [325/500] time 1.339 (1.364) data 0.000 (0.003) loss 1.0439 (1.0077) acc 75.0000 (74.7115) lr 1.2369e-04 eta 1:12:10 +epoch [44/50] batch [330/500] time 1.376 (1.364) data 0.000 (0.003) loss 1.2266 (1.0092) acc 75.0000 (74.6875) lr 1.2369e-04 eta 1:12:02 +epoch [44/50] batch [335/500] time 1.346 (1.364) data 0.000 (0.003) loss 0.5581 (1.0090) acc 93.7500 (74.6922) lr 1.2369e-04 eta 1:11:56 +epoch [44/50] batch [340/500] time 1.361 (1.364) data 0.000 (0.003) loss 1.4834 (1.0143) acc 71.8750 (74.6048) lr 1.2369e-04 eta 1:11:49 +epoch [44/50] batch [345/500] time 1.354 (1.364) data 0.000 (0.003) loss 1.2090 (1.0152) acc 75.0000 (74.6377) lr 1.2369e-04 eta 1:11:42 +epoch [44/50] batch [350/500] time 1.347 (1.363) data 0.000 (0.003) loss 1.3174 (1.0170) acc 71.8750 (74.5982) lr 1.2369e-04 eta 1:11:34 +epoch [44/50] batch [355/500] time 1.357 (1.363) data 0.000 (0.003) loss 1.2793 (1.0159) acc 68.7500 (74.6215) lr 1.2369e-04 eta 1:11:27 +epoch [44/50] batch [360/500] time 1.359 (1.363) data 0.000 (0.003) loss 0.9082 (1.0202) acc 75.0000 (74.5660) lr 1.2369e-04 eta 1:11:20 +epoch [44/50] batch [365/500] time 1.341 (1.363) data 0.000 (0.002) loss 1.2637 (1.0226) acc 78.1250 (74.5719) lr 1.2369e-04 eta 1:11:13 +epoch [44/50] batch [370/500] time 1.362 (1.363) data 0.000 (0.002) loss 0.8433 (1.0228) acc 87.5000 (74.5861) lr 1.2369e-04 eta 1:11:06 +epoch [44/50] batch [375/500] time 1.343 (1.363) data 0.000 (0.002) loss 0.9194 (1.0238) acc 71.8750 (74.5083) lr 1.2369e-04 eta 1:10:58 +epoch [44/50] batch [380/500] time 1.356 (1.363) data 0.000 (0.002) loss 1.0293 (1.0271) acc 75.0000 (74.4655) lr 1.2369e-04 eta 1:10:53 +epoch [44/50] batch [385/500] time 1.384 (1.363) data 0.000 (0.002) loss 1.0947 (1.0299) acc 75.0000 (74.3912) lr 1.2369e-04 eta 1:10:46 +epoch [44/50] batch [390/500] time 1.355 (1.363) data 0.000 (0.002) loss 1.1787 (1.0300) acc 71.8750 (74.3670) lr 1.2369e-04 eta 1:10:39 +epoch [44/50] batch [395/500] time 1.354 (1.363) data 0.000 (0.002) loss 1.1123 (1.0316) acc 71.8750 (74.3117) lr 1.2369e-04 eta 1:10:32 +epoch [44/50] batch [400/500] time 1.364 (1.363) data 0.000 (0.002) loss 1.1123 (1.0318) acc 65.6250 (74.2969) lr 1.2369e-04 eta 1:10:26 +epoch [44/50] batch [405/500] time 1.374 (1.363) data 0.000 (0.002) loss 1.3906 (1.0329) acc 68.7500 (74.2824) lr 1.2369e-04 eta 1:10:19 +epoch [44/50] batch [410/500] time 1.342 (1.363) data 0.000 (0.002) loss 1.0137 (1.0342) acc 65.6250 (74.2378) lr 1.2369e-04 eta 1:10:12 +epoch [44/50] batch [415/500] time 1.360 (1.363) data 0.000 (0.002) loss 1.3008 (1.0356) acc 75.0000 (74.2244) lr 1.2369e-04 eta 1:10:05 +epoch [44/50] batch [420/500] time 1.360 (1.363) data 0.000 (0.002) loss 0.9463 (1.0366) acc 68.7500 (74.2113) lr 1.2369e-04 eta 1:09:58 +epoch [44/50] batch [425/500] time 1.373 (1.363) data 0.000 (0.002) loss 0.7334 (1.0340) acc 78.1250 (74.2647) lr 1.2369e-04 eta 1:09:51 +epoch [44/50] batch [430/500] time 1.353 (1.363) data 0.000 (0.002) loss 1.8730 (1.0392) acc 56.2500 (74.1497) lr 1.2369e-04 eta 1:09:44 +epoch [44/50] batch [435/500] time 1.346 (1.363) data 0.000 (0.002) loss 1.2949 (1.0401) acc 68.7500 (74.1236) lr 1.2369e-04 eta 1:09:37 +epoch [44/50] batch [440/500] time 1.364 (1.363) data 0.000 (0.002) loss 1.1201 (1.0399) acc 71.8750 (74.1193) lr 1.2369e-04 eta 1:09:30 +epoch [44/50] batch [445/500] time 1.351 (1.363) data 0.000 (0.002) loss 0.4424 (1.0382) acc 90.6250 (74.1713) lr 1.2369e-04 eta 1:09:23 +epoch [44/50] batch [450/500] time 1.379 (1.363) data 0.000 (0.002) loss 0.8105 (1.0373) acc 78.1250 (74.1528) lr 1.2369e-04 eta 1:09:16 +epoch [44/50] batch [455/500] time 1.361 (1.363) data 0.000 (0.002) loss 1.2324 (1.0380) acc 71.8750 (74.1346) lr 1.2369e-04 eta 1:09:09 +epoch [44/50] batch [460/500] time 1.364 (1.363) data 0.000 (0.002) loss 0.9058 (1.0389) acc 81.2500 (74.0761) lr 1.2369e-04 eta 1:09:02 +epoch [44/50] batch [465/500] time 1.341 (1.363) data 0.000 (0.002) loss 1.2305 (1.0372) acc 68.7500 (74.1331) lr 1.2369e-04 eta 1:08:55 +epoch [44/50] batch [470/500] time 1.347 (1.363) data 0.000 (0.002) loss 1.0410 (1.0372) acc 78.1250 (74.1489) lr 1.2369e-04 eta 1:08:48 +epoch [44/50] batch [475/500] time 1.489 (1.363) data 0.000 (0.002) loss 1.2480 (1.0392) acc 75.0000 (74.1250) lr 1.2369e-04 eta 1:08:42 +epoch [44/50] batch [480/500] time 1.353 (1.363) data 0.000 (0.002) loss 1.0889 (1.0391) acc 65.6250 (74.0951) lr 1.2369e-04 eta 1:08:34 +epoch [44/50] batch [485/500] time 1.357 (1.363) data 0.001 (0.002) loss 1.3994 (1.0397) acc 71.8750 (74.0722) lr 1.2369e-04 eta 1:08:28 +epoch [44/50] batch [490/500] time 1.359 (1.363) data 0.000 (0.002) loss 0.9775 (1.0381) acc 65.6250 (74.0689) lr 1.2369e-04 eta 1:08:21 +epoch [44/50] batch [495/500] time 1.382 (1.363) data 0.000 (0.002) loss 0.4041 (1.0368) acc 87.5000 (74.1035) lr 1.2369e-04 eta 1:08:14 +epoch [44/50] batch [500/500] time 1.372 (1.363) data 0.000 (0.002) loss 0.8052 (1.0364) acc 78.1250 (74.1250) lr 9.5173e-05 eta 1:08:07 +epoch [45/50] batch [5/500] time 1.364 (1.508) data 0.000 (0.148) loss 0.9941 (1.1719) acc 75.0000 (68.7500) lr 9.5173e-05 eta 1:15:16 +epoch [45/50] batch [10/500] time 1.361 (1.434) data 0.000 (0.074) loss 0.7158 (1.0164) acc 84.3750 (73.1250) lr 9.5173e-05 eta 1:11:27 +epoch [45/50] batch [15/500] time 1.368 (1.407) data 0.000 (0.050) loss 1.4229 (1.0346) acc 71.8750 (72.5000) lr 9.5173e-05 eta 1:09:59 +epoch [45/50] batch [20/500] time 1.362 (1.397) data 0.000 (0.037) loss 0.9614 (1.0145) acc 75.0000 (73.1250) lr 9.5173e-05 eta 1:09:23 +epoch [45/50] batch [25/500] time 1.352 (1.389) data 0.000 (0.030) loss 1.0830 (1.0172) acc 78.1250 (74.0000) lr 9.5173e-05 eta 1:08:52 +epoch [45/50] batch [30/500] time 1.359 (1.389) data 0.000 (0.025) loss 1.1543 (1.0412) acc 78.1250 (73.6458) lr 9.5173e-05 eta 1:08:44 +epoch [45/50] batch [35/500] time 1.334 (1.384) data 0.000 (0.022) loss 1.1631 (1.0679) acc 71.8750 (73.0357) lr 9.5173e-05 eta 1:08:23 +epoch [45/50] batch [40/500] time 1.356 (1.381) data 0.000 (0.019) loss 1.0547 (1.0482) acc 65.6250 (73.2812) lr 9.5173e-05 eta 1:08:07 +epoch [45/50] batch [45/500] time 1.366 (1.379) data 0.000 (0.017) loss 1.6543 (1.0803) acc 59.3750 (72.9167) lr 9.5173e-05 eta 1:07:56 +epoch [45/50] batch [50/500] time 1.364 (1.377) data 0.000 (0.015) loss 0.8159 (1.0857) acc 75.0000 (72.8750) lr 9.5173e-05 eta 1:07:41 +epoch [45/50] batch [55/500] time 1.397 (1.376) data 0.000 (0.014) loss 0.4761 (1.1013) acc 93.7500 (72.8409) lr 9.5173e-05 eta 1:07:33 +epoch [45/50] batch [60/500] time 1.390 (1.375) data 0.000 (0.013) loss 0.4583 (1.0993) acc 93.7500 (73.0729) lr 9.5173e-05 eta 1:07:23 +epoch [45/50] batch [65/500] time 1.350 (1.375) data 0.000 (0.012) loss 1.1084 (1.0878) acc 75.0000 (73.1731) lr 9.5173e-05 eta 1:07:14 +epoch [45/50] batch [70/500] time 1.349 (1.375) data 0.000 (0.011) loss 0.8159 (1.0954) acc 84.3750 (73.3929) lr 9.5173e-05 eta 1:07:09 +epoch [45/50] batch [75/500] time 1.401 (1.375) data 0.000 (0.010) loss 0.8105 (1.0890) acc 81.2500 (73.5833) lr 9.5173e-05 eta 1:07:02 +epoch [45/50] batch [80/500] time 1.352 (1.374) data 0.000 (0.010) loss 0.8472 (1.0857) acc 75.0000 (73.5156) lr 9.5173e-05 eta 1:06:52 +epoch [45/50] batch [85/500] time 1.381 (1.373) data 0.000 (0.009) loss 1.3428 (1.0838) acc 75.0000 (73.6765) lr 9.5173e-05 eta 1:06:42 +epoch [45/50] batch [90/500] time 1.367 (1.373) data 0.000 (0.009) loss 1.8779 (1.1031) acc 50.0000 (73.2292) lr 9.5173e-05 eta 1:06:34 +epoch [45/50] batch [95/500] time 1.375 (1.372) data 0.000 (0.008) loss 1.3730 (1.1011) acc 75.0000 (73.2566) lr 9.5173e-05 eta 1:06:25 +epoch [45/50] batch [100/500] time 1.363 (1.371) data 0.000 (0.008) loss 0.9644 (1.1035) acc 75.0000 (73.3125) lr 9.5173e-05 eta 1:06:17 +epoch [45/50] batch [105/500] time 1.376 (1.371) data 0.000 (0.007) loss 0.6616 (1.0942) acc 78.1250 (73.4821) lr 9.5173e-05 eta 1:06:09 +epoch [45/50] batch [110/500] time 1.370 (1.371) data 0.000 (0.007) loss 0.8618 (1.0837) acc 71.8750 (73.5795) lr 9.5173e-05 eta 1:06:02 +epoch [45/50] batch [115/500] time 1.361 (1.371) data 0.000 (0.007) loss 1.2607 (1.0830) acc 75.0000 (73.5870) lr 9.5173e-05 eta 1:05:55 +epoch [45/50] batch [120/500] time 1.367 (1.371) data 0.000 (0.007) loss 0.8584 (1.0752) acc 78.1250 (73.6979) lr 9.5173e-05 eta 1:05:47 +epoch [45/50] batch [125/500] time 1.352 (1.370) data 0.000 (0.006) loss 1.0518 (1.0768) acc 78.1250 (73.6500) lr 9.5173e-05 eta 1:05:39 +epoch [45/50] batch [130/500] time 1.353 (1.370) data 0.000 (0.006) loss 0.8140 (1.0785) acc 75.0000 (73.5577) lr 9.5173e-05 eta 1:05:31 +epoch [45/50] batch [135/500] time 1.372 (1.370) data 0.000 (0.006) loss 1.5381 (1.0785) acc 75.0000 (73.5880) lr 9.5173e-05 eta 1:05:23 +epoch [45/50] batch [140/500] time 1.366 (1.369) data 0.000 (0.006) loss 0.9165 (1.0760) acc 81.2500 (73.6161) lr 9.5173e-05 eta 1:05:15 +epoch [45/50] batch [145/500] time 1.350 (1.369) data 0.000 (0.005) loss 0.5869 (1.0733) acc 81.2500 (73.6638) lr 9.5173e-05 eta 1:05:07 +epoch [45/50] batch [150/500] time 1.349 (1.368) data 0.000 (0.005) loss 0.6421 (1.0789) acc 84.3750 (73.5625) lr 9.5173e-05 eta 1:04:59 +epoch [45/50] batch [155/500] time 1.378 (1.368) data 0.000 (0.005) loss 0.7676 (1.0827) acc 81.2500 (73.5282) lr 9.5173e-05 eta 1:04:51 +epoch [45/50] batch [160/500] time 1.342 (1.367) data 0.000 (0.005) loss 0.6045 (1.0851) acc 87.5000 (73.5156) lr 9.5173e-05 eta 1:04:43 +epoch [45/50] batch [165/500] time 1.351 (1.367) data 0.000 (0.005) loss 1.2246 (1.0909) acc 59.3750 (73.2765) lr 9.5173e-05 eta 1:04:36 +epoch [45/50] batch [170/500] time 1.349 (1.368) data 0.000 (0.005) loss 1.2812 (1.0866) acc 62.5000 (73.3456) lr 9.5173e-05 eta 1:04:31 +epoch [45/50] batch [175/500] time 1.373 (1.368) data 0.000 (0.005) loss 2.0410 (1.0885) acc 56.2500 (73.3214) lr 9.5173e-05 eta 1:04:23 +epoch [45/50] batch [180/500] time 1.376 (1.368) data 0.000 (0.004) loss 0.9775 (1.0882) acc 75.0000 (73.4028) lr 9.5173e-05 eta 1:04:16 +epoch [45/50] batch [185/500] time 1.349 (1.367) data 0.000 (0.004) loss 0.9346 (1.0856) acc 75.0000 (73.4459) lr 9.5173e-05 eta 1:04:08 +epoch [45/50] batch [190/500] time 1.346 (1.367) data 0.000 (0.004) loss 1.0381 (1.0833) acc 75.0000 (73.5033) lr 9.5173e-05 eta 1:04:01 +epoch [45/50] batch [195/500] time 1.348 (1.367) data 0.000 (0.004) loss 0.7573 (1.0803) acc 75.0000 (73.5417) lr 9.5173e-05 eta 1:03:53 +epoch [45/50] batch [200/500] time 1.374 (1.367) data 0.000 (0.004) loss 1.0146 (1.0793) acc 81.2500 (73.5469) lr 9.5173e-05 eta 1:03:47 +epoch [45/50] batch [205/500] time 1.365 (1.367) data 0.000 (0.004) loss 1.0801 (1.0811) acc 78.1250 (73.5366) lr 9.5173e-05 eta 1:03:39 +epoch [45/50] batch [210/500] time 1.476 (1.367) data 0.000 (0.004) loss 1.4277 (1.0774) acc 59.3750 (73.5417) lr 9.5173e-05 eta 1:03:33 +epoch [45/50] batch [215/500] time 1.377 (1.367) data 0.000 (0.004) loss 0.8481 (1.0791) acc 78.1250 (73.5320) lr 9.5173e-05 eta 1:03:26 +epoch [45/50] batch [220/500] time 1.364 (1.367) data 0.001 (0.004) loss 0.6245 (1.0753) acc 84.3750 (73.5795) lr 9.5173e-05 eta 1:03:19 +epoch [45/50] batch [225/500] time 1.358 (1.367) data 0.000 (0.004) loss 1.1006 (1.0753) acc 68.7500 (73.6389) lr 9.5173e-05 eta 1:03:12 +epoch [45/50] batch [230/500] time 1.368 (1.367) data 0.000 (0.004) loss 1.2051 (1.0725) acc 62.5000 (73.5598) lr 9.5173e-05 eta 1:03:05 +epoch [45/50] batch [235/500] time 1.373 (1.367) data 0.000 (0.004) loss 0.6357 (1.0724) acc 78.1250 (73.5239) lr 9.5173e-05 eta 1:02:58 +epoch [45/50] batch [240/500] time 1.384 (1.367) data 0.000 (0.003) loss 1.7725 (1.0766) acc 62.5000 (73.4375) lr 9.5173e-05 eta 1:02:52 +epoch [45/50] batch [245/500] time 1.371 (1.367) data 0.000 (0.003) loss 0.4365 (1.0752) acc 84.3750 (73.3673) lr 9.5173e-05 eta 1:02:45 +epoch [45/50] batch [250/500] time 1.378 (1.367) data 0.000 (0.003) loss 1.1533 (1.0718) acc 68.7500 (73.3750) lr 9.5173e-05 eta 1:02:38 +epoch [45/50] batch [255/500] time 1.354 (1.367) data 0.000 (0.003) loss 0.9087 (1.0757) acc 75.0000 (73.3333) lr 9.5173e-05 eta 1:02:31 +epoch [45/50] batch [260/500] time 1.361 (1.367) data 0.000 (0.003) loss 1.2109 (1.0739) acc 71.8750 (73.3293) lr 9.5173e-05 eta 1:02:24 +epoch [45/50] batch [265/500] time 1.355 (1.367) data 0.000 (0.003) loss 0.9092 (1.0714) acc 84.3750 (73.4080) lr 9.5173e-05 eta 1:02:17 +epoch [45/50] batch [270/500] time 1.354 (1.366) data 0.000 (0.003) loss 0.9355 (1.0721) acc 71.8750 (73.3681) lr 9.5173e-05 eta 1:02:10 +epoch [45/50] batch [275/500] time 1.360 (1.366) data 0.000 (0.003) loss 0.7280 (1.0699) acc 81.2500 (73.4432) lr 9.5173e-05 eta 1:02:02 +epoch [45/50] batch [280/500] time 1.352 (1.366) data 0.000 (0.003) loss 1.2139 (1.0716) acc 62.5000 (73.3817) lr 9.5173e-05 eta 1:01:55 +epoch [45/50] batch [285/500] time 1.352 (1.366) data 0.000 (0.003) loss 0.5894 (1.0696) acc 81.2500 (73.3333) lr 9.5173e-05 eta 1:01:48 +epoch [45/50] batch [290/500] time 1.347 (1.366) data 0.000 (0.003) loss 0.6655 (1.0665) acc 75.0000 (73.3621) lr 9.5173e-05 eta 1:01:40 +epoch [45/50] batch [295/500] time 1.350 (1.365) data 0.000 (0.003) loss 0.9019 (1.0657) acc 81.2500 (73.3475) lr 9.5173e-05 eta 1:01:33 +epoch [45/50] batch [300/500] time 1.363 (1.366) data 0.000 (0.003) loss 1.3828 (1.0682) acc 75.0000 (73.2917) lr 9.5173e-05 eta 1:01:26 +epoch [45/50] batch [305/500] time 1.339 (1.365) data 0.000 (0.003) loss 1.0039 (1.0662) acc 71.8750 (73.3197) lr 9.5173e-05 eta 1:01:19 +epoch [45/50] batch [310/500] time 1.358 (1.366) data 0.000 (0.003) loss 0.7671 (1.0623) acc 81.2500 (73.3871) lr 9.5173e-05 eta 1:01:13 +epoch [45/50] batch [315/500] time 1.333 (1.366) data 0.000 (0.003) loss 0.6875 (1.0593) acc 87.5000 (73.4623) lr 9.5173e-05 eta 1:01:06 +epoch [45/50] batch [320/500] time 1.368 (1.365) data 0.000 (0.003) loss 1.3154 (1.0600) acc 71.8750 (73.4961) lr 9.5173e-05 eta 1:00:59 +epoch [45/50] batch [325/500] time 1.368 (1.365) data 0.000 (0.003) loss 1.5420 (1.0614) acc 65.6250 (73.4712) lr 9.5173e-05 eta 1:00:52 +epoch [45/50] batch [330/500] time 1.354 (1.365) data 0.000 (0.003) loss 0.6782 (1.0607) acc 81.2500 (73.4470) lr 9.5173e-05 eta 1:00:45 +epoch [45/50] batch [335/500] time 1.348 (1.365) data 0.000 (0.003) loss 0.6797 (1.0554) acc 78.1250 (73.5634) lr 9.5173e-05 eta 1:00:38 +epoch [45/50] batch [340/500] time 1.377 (1.365) data 0.000 (0.003) loss 1.1719 (1.0565) acc 62.5000 (73.4926) lr 9.5173e-05 eta 1:00:31 +epoch [45/50] batch [345/500] time 1.359 (1.365) data 0.000 (0.003) loss 0.8853 (1.0586) acc 78.1250 (73.5054) lr 9.5173e-05 eta 1:00:24 +epoch [45/50] batch [350/500] time 1.370 (1.365) data 0.000 (0.002) loss 0.5859 (1.0552) acc 84.3750 (73.5714) lr 9.5173e-05 eta 1:00:18 +epoch [45/50] batch [355/500] time 1.374 (1.366) data 0.000 (0.002) loss 1.4531 (1.0592) acc 56.2500 (73.4507) lr 9.5173e-05 eta 1:00:12 +epoch [45/50] batch [360/500] time 1.372 (1.366) data 0.000 (0.002) loss 1.4082 (1.0601) acc 56.2500 (73.4115) lr 9.5173e-05 eta 1:00:05 +epoch [45/50] batch [365/500] time 1.362 (1.366) data 0.000 (0.002) loss 0.7300 (1.0630) acc 87.5000 (73.3647) lr 9.5173e-05 eta 0:59:58 +epoch [45/50] batch [370/500] time 1.349 (1.365) data 0.000 (0.002) loss 0.5840 (1.0614) acc 81.2500 (73.4122) lr 9.5173e-05 eta 0:59:51 +epoch [45/50] batch [375/500] time 1.387 (1.366) data 0.000 (0.002) loss 0.6812 (1.0608) acc 75.0000 (73.4250) lr 9.5173e-05 eta 0:59:44 +epoch [45/50] batch [380/500] time 1.353 (1.366) data 0.000 (0.002) loss 1.2383 (1.0615) acc 62.5000 (73.4128) lr 9.5173e-05 eta 0:59:37 +epoch [45/50] batch [385/500] time 1.372 (1.365) data 0.000 (0.002) loss 0.7798 (1.0607) acc 84.3750 (73.4416) lr 9.5173e-05 eta 0:59:30 +epoch [45/50] batch [390/500] time 1.357 (1.365) data 0.000 (0.002) loss 1.0283 (1.0636) acc 75.0000 (73.3894) lr 9.5173e-05 eta 0:59:23 +epoch [45/50] batch [395/500] time 1.381 (1.365) data 0.000 (0.002) loss 0.8496 (1.0616) acc 71.8750 (73.3861) lr 9.5173e-05 eta 0:59:16 +epoch [45/50] batch [400/500] time 1.392 (1.365) data 0.001 (0.002) loss 0.7339 (1.0594) acc 78.1250 (73.3828) lr 9.5173e-05 eta 0:59:10 +epoch [45/50] batch [405/500] time 1.363 (1.365) data 0.000 (0.002) loss 0.8291 (1.0578) acc 75.0000 (73.3873) lr 9.5173e-05 eta 0:59:03 +epoch [45/50] batch [410/500] time 1.356 (1.365) data 0.000 (0.002) loss 1.2617 (1.0583) acc 68.7500 (73.3994) lr 9.5173e-05 eta 0:58:56 +epoch [45/50] batch [415/500] time 1.342 (1.365) data 0.000 (0.002) loss 0.9009 (1.0559) acc 75.0000 (73.4488) lr 9.5173e-05 eta 0:58:48 +epoch [45/50] batch [420/500] time 1.371 (1.365) data 0.000 (0.002) loss 1.0977 (1.0557) acc 65.6250 (73.4077) lr 9.5173e-05 eta 0:58:41 +epoch [45/50] batch [425/500] time 1.380 (1.365) data 0.000 (0.002) loss 0.9111 (1.0546) acc 78.1250 (73.4191) lr 9.5173e-05 eta 0:58:34 +epoch [45/50] batch [430/500] time 1.351 (1.365) data 0.000 (0.002) loss 1.1719 (1.0542) acc 75.0000 (73.4593) lr 9.5173e-05 eta 0:58:27 +epoch [45/50] batch [435/500] time 1.342 (1.365) data 0.000 (0.002) loss 1.4775 (1.0533) acc 59.3750 (73.4626) lr 9.5173e-05 eta 0:58:20 +epoch [45/50] batch [440/500] time 1.365 (1.365) data 0.000 (0.002) loss 1.5166 (1.0526) acc 65.6250 (73.4588) lr 9.5173e-05 eta 0:58:13 +epoch [45/50] batch [445/500] time 1.356 (1.365) data 0.000 (0.002) loss 0.5493 (1.0505) acc 90.6250 (73.4691) lr 9.5173e-05 eta 0:58:06 +epoch [45/50] batch [450/500] time 1.369 (1.365) data 0.000 (0.002) loss 0.6255 (1.0486) acc 78.1250 (73.4861) lr 9.5173e-05 eta 0:57:59 +epoch [45/50] batch [455/500] time 1.363 (1.365) data 0.000 (0.002) loss 1.2158 (1.0453) acc 75.0000 (73.5783) lr 9.5173e-05 eta 0:57:53 +epoch [45/50] batch [460/500] time 1.369 (1.365) data 0.000 (0.002) loss 1.0469 (1.0462) acc 78.1250 (73.6005) lr 9.5173e-05 eta 0:57:46 +epoch [45/50] batch [465/500] time 1.355 (1.365) data 0.000 (0.002) loss 1.8037 (1.0457) acc 56.2500 (73.5685) lr 9.5173e-05 eta 0:57:39 +epoch [45/50] batch [470/500] time 1.355 (1.365) data 0.000 (0.002) loss 0.9419 (1.0436) acc 75.0000 (73.6237) lr 9.5173e-05 eta 0:57:32 +epoch [45/50] batch [475/500] time 1.335 (1.365) data 0.000 (0.002) loss 0.9180 (1.0419) acc 71.8750 (73.6579) lr 9.5173e-05 eta 0:57:25 +epoch [45/50] batch [480/500] time 1.377 (1.365) data 0.000 (0.002) loss 1.0928 (1.0427) acc 65.6250 (73.6328) lr 9.5173e-05 eta 0:57:18 +epoch [45/50] batch [485/500] time 1.360 (1.364) data 0.001 (0.002) loss 1.2393 (1.0435) acc 71.8750 (73.6340) lr 9.5173e-05 eta 0:57:11 +epoch [45/50] batch [490/500] time 1.360 (1.364) data 0.000 (0.002) loss 1.4395 (1.0456) acc 50.0000 (73.5332) lr 9.5173e-05 eta 0:57:04 +epoch [45/50] batch [495/500] time 1.350 (1.364) data 0.000 (0.002) loss 1.2676 (1.0451) acc 75.0000 (73.5480) lr 9.5173e-05 eta 0:56:57 +epoch [45/50] batch [500/500] time 1.362 (1.364) data 0.000 (0.002) loss 1.9668 (1.0498) acc 56.2500 (73.4437) lr 7.0224e-05 eta 0:56:50 +epoch [46/50] batch [5/500] time 1.376 (1.534) data 0.000 (0.166) loss 0.7539 (0.7671) acc 75.0000 (79.3750) lr 7.0224e-05 eta 1:03:48 +epoch [46/50] batch [10/500] time 1.359 (1.451) data 0.000 (0.083) loss 1.0703 (0.9014) acc 81.2500 (78.1250) lr 7.0224e-05 eta 1:00:12 +epoch [46/50] batch [15/500] time 1.358 (1.421) data 0.000 (0.056) loss 0.8843 (0.9082) acc 71.8750 (75.8333) lr 7.0224e-05 eta 0:58:52 +epoch [46/50] batch [20/500] time 1.373 (1.407) data 0.000 (0.042) loss 1.0957 (0.9629) acc 84.3750 (75.1562) lr 7.0224e-05 eta 0:58:10 +epoch [46/50] batch [25/500] time 1.356 (1.398) data 0.000 (0.033) loss 1.0264 (1.0286) acc 75.0000 (74.6250) lr 7.0224e-05 eta 0:57:40 +epoch [46/50] batch [30/500] time 1.360 (1.393) data 0.000 (0.028) loss 0.8335 (1.0546) acc 71.8750 (73.6458) lr 7.0224e-05 eta 0:57:20 +epoch [46/50] batch [35/500] time 1.362 (1.388) data 0.000 (0.024) loss 0.9473 (1.0326) acc 78.1250 (73.7500) lr 7.0224e-05 eta 0:57:00 +epoch [46/50] batch [40/500] time 1.521 (1.387) data 0.000 (0.021) loss 1.0283 (1.0585) acc 71.8750 (72.8125) lr 7.0224e-05 eta 0:56:51 +epoch [46/50] batch [45/500] time 1.353 (1.383) data 0.000 (0.019) loss 1.5205 (1.0494) acc 68.7500 (73.3333) lr 7.0224e-05 eta 0:56:36 +epoch [46/50] batch [50/500] time 1.346 (1.380) data 0.000 (0.017) loss 0.4912 (1.0425) acc 84.3750 (73.6250) lr 7.0224e-05 eta 0:56:21 +epoch [46/50] batch [55/500] time 1.358 (1.379) data 0.000 (0.015) loss 0.6499 (1.0335) acc 84.3750 (74.0341) lr 7.0224e-05 eta 0:56:10 +epoch [46/50] batch [60/500] time 1.343 (1.377) data 0.001 (0.014) loss 1.1309 (1.0399) acc 68.7500 (73.5417) lr 7.0224e-05 eta 0:55:59 +epoch [46/50] batch [65/500] time 1.384 (1.377) data 0.000 (0.013) loss 1.5322 (1.0668) acc 62.5000 (73.2692) lr 7.0224e-05 eta 0:55:52 +epoch [46/50] batch [70/500] time 1.354 (1.376) data 0.000 (0.012) loss 0.8813 (1.0591) acc 75.0000 (73.0357) lr 7.0224e-05 eta 0:55:42 +epoch [46/50] batch [75/500] time 1.361 (1.375) data 0.000 (0.011) loss 1.2764 (1.0590) acc 71.8750 (73.0000) lr 7.0224e-05 eta 0:55:34 +epoch [46/50] batch [80/500] time 1.349 (1.374) data 0.000 (0.011) loss 0.6514 (1.0545) acc 68.7500 (72.9688) lr 7.0224e-05 eta 0:55:25 +epoch [46/50] batch [85/500] time 1.348 (1.373) data 0.000 (0.010) loss 0.6572 (1.0510) acc 84.3750 (73.0515) lr 7.0224e-05 eta 0:55:16 +epoch [46/50] batch [90/500] time 1.370 (1.373) data 0.000 (0.010) loss 0.7822 (1.0511) acc 78.1250 (73.1597) lr 7.0224e-05 eta 0:55:09 +epoch [46/50] batch [95/500] time 1.343 (1.372) data 0.000 (0.009) loss 1.2344 (1.0673) acc 71.8750 (72.8289) lr 7.0224e-05 eta 0:55:00 +epoch [46/50] batch [100/500] time 1.362 (1.371) data 0.000 (0.009) loss 1.1699 (1.0756) acc 65.6250 (72.5625) lr 7.0224e-05 eta 0:54:51 +epoch [46/50] batch [105/500] time 1.359 (1.371) data 0.000 (0.008) loss 1.0283 (1.0661) acc 68.7500 (72.8869) lr 7.0224e-05 eta 0:54:43 +epoch [46/50] batch [110/500] time 1.372 (1.371) data 0.000 (0.008) loss 0.5781 (1.0557) acc 81.2500 (73.0966) lr 7.0224e-05 eta 0:54:36 +epoch [46/50] batch [115/500] time 1.382 (1.371) data 0.000 (0.008) loss 1.4189 (1.0512) acc 56.2500 (73.0163) lr 7.0224e-05 eta 0:54:28 +epoch [46/50] batch [120/500] time 1.346 (1.370) data 0.000 (0.007) loss 0.9580 (1.0574) acc 71.8750 (72.9167) lr 7.0224e-05 eta 0:54:21 +epoch [46/50] batch [125/500] time 1.375 (1.370) data 0.000 (0.007) loss 0.6343 (1.0537) acc 87.5000 (73.0500) lr 7.0224e-05 eta 0:54:14 +epoch [46/50] batch [130/500] time 1.353 (1.370) data 0.000 (0.007) loss 1.0381 (1.0412) acc 68.7500 (73.2692) lr 7.0224e-05 eta 0:54:06 +epoch [46/50] batch [135/500] time 1.343 (1.369) data 0.000 (0.006) loss 1.0039 (1.0469) acc 84.3750 (73.1944) lr 7.0224e-05 eta 0:53:58 +epoch [46/50] batch [140/500] time 1.341 (1.369) data 0.000 (0.006) loss 1.1396 (1.0459) acc 65.6250 (73.0357) lr 7.0224e-05 eta 0:53:51 +epoch [46/50] batch [145/500] time 1.361 (1.369) data 0.000 (0.006) loss 0.5894 (1.0420) acc 84.3750 (73.1681) lr 7.0224e-05 eta 0:53:43 +epoch [46/50] batch [150/500] time 1.361 (1.369) data 0.000 (0.006) loss 0.6201 (1.0365) acc 78.1250 (73.2708) lr 7.0224e-05 eta 0:53:36 +epoch [46/50] batch [155/500] time 1.369 (1.368) data 0.000 (0.006) loss 0.9028 (1.0329) acc 75.0000 (73.3266) lr 7.0224e-05 eta 0:53:29 +epoch [46/50] batch [160/500] time 1.350 (1.368) data 0.001 (0.006) loss 1.3770 (1.0398) acc 62.5000 (73.2422) lr 7.0224e-05 eta 0:53:21 +epoch [46/50] batch [165/500] time 1.340 (1.368) data 0.000 (0.005) loss 0.7661 (1.0384) acc 81.2500 (73.2765) lr 7.0224e-05 eta 0:53:13 +epoch [46/50] batch [170/500] time 1.356 (1.367) data 0.000 (0.005) loss 1.0049 (1.0351) acc 81.2500 (73.3088) lr 7.0224e-05 eta 0:53:05 +epoch [46/50] batch [175/500] time 1.366 (1.367) data 0.000 (0.005) loss 1.1562 (1.0343) acc 75.0000 (73.3750) lr 7.0224e-05 eta 0:52:58 +epoch [46/50] batch [180/500] time 1.368 (1.367) data 0.000 (0.005) loss 0.8203 (1.0341) acc 81.2500 (73.4722) lr 7.0224e-05 eta 0:52:51 +epoch [46/50] batch [185/500] time 1.353 (1.367) data 0.000 (0.005) loss 0.8115 (1.0336) acc 81.2500 (73.4797) lr 7.0224e-05 eta 0:52:45 +epoch [46/50] batch [190/500] time 1.363 (1.367) data 0.000 (0.005) loss 0.7524 (1.0341) acc 75.0000 (73.4375) lr 7.0224e-05 eta 0:52:38 +epoch [46/50] batch [195/500] time 1.335 (1.367) data 0.000 (0.005) loss 1.1680 (1.0367) acc 71.8750 (73.3494) lr 7.0224e-05 eta 0:52:30 +epoch [46/50] batch [200/500] time 1.370 (1.367) data 0.000 (0.004) loss 1.6279 (1.0448) acc 62.5000 (73.1562) lr 7.0224e-05 eta 0:52:23 +epoch [46/50] batch [205/500] time 1.347 (1.366) data 0.000 (0.004) loss 0.9199 (1.0443) acc 81.2500 (73.2165) lr 7.0224e-05 eta 0:52:15 +epoch [46/50] batch [210/500] time 1.379 (1.366) data 0.000 (0.004) loss 0.9199 (1.0445) acc 65.6250 (73.1250) lr 7.0224e-05 eta 0:52:08 +epoch [46/50] batch [215/500] time 1.367 (1.366) data 0.000 (0.004) loss 0.7139 (1.0411) acc 78.1250 (73.1831) lr 7.0224e-05 eta 0:52:01 +epoch [46/50] batch [220/500] time 1.373 (1.366) data 0.000 (0.004) loss 0.4272 (1.0409) acc 93.7500 (73.3239) lr 7.0224e-05 eta 0:51:54 +epoch [46/50] batch [225/500] time 1.361 (1.366) data 0.000 (0.004) loss 0.8711 (1.0373) acc 78.1250 (73.4028) lr 7.0224e-05 eta 0:51:47 +epoch [46/50] batch [230/500] time 1.357 (1.366) data 0.000 (0.004) loss 0.9180 (1.0345) acc 71.8750 (73.3832) lr 7.0224e-05 eta 0:51:40 +epoch [46/50] batch [235/500] time 1.340 (1.366) data 0.000 (0.004) loss 1.0713 (1.0351) acc 65.6250 (73.2979) lr 7.0224e-05 eta 0:51:33 +epoch [46/50] batch [240/500] time 1.343 (1.365) data 0.000 (0.004) loss 0.5718 (1.0320) acc 84.3750 (73.3333) lr 7.0224e-05 eta 0:51:25 +epoch [46/50] batch [245/500] time 1.361 (1.365) data 0.000 (0.004) loss 0.7524 (1.0303) acc 78.1250 (73.3546) lr 7.0224e-05 eta 0:51:18 +epoch [46/50] batch [250/500] time 1.342 (1.365) data 0.000 (0.004) loss 0.7759 (1.0278) acc 84.3750 (73.4500) lr 7.0224e-05 eta 0:51:10 +epoch [46/50] batch [255/500] time 1.360 (1.364) data 0.000 (0.004) loss 1.1611 (1.0290) acc 71.8750 (73.4681) lr 7.0224e-05 eta 0:51:02 +epoch [46/50] batch [260/500] time 1.355 (1.364) data 0.000 (0.004) loss 0.8408 (1.0280) acc 81.2500 (73.4615) lr 7.0224e-05 eta 0:50:55 +epoch [46/50] batch [265/500] time 1.356 (1.364) data 0.000 (0.003) loss 1.2939 (1.0266) acc 71.8750 (73.5613) lr 7.0224e-05 eta 0:50:48 +epoch [46/50] batch [270/500] time 1.353 (1.364) data 0.000 (0.003) loss 0.9380 (1.0237) acc 81.2500 (73.6458) lr 7.0224e-05 eta 0:50:41 +epoch [46/50] batch [275/500] time 1.358 (1.364) data 0.000 (0.003) loss 0.7969 (1.0200) acc 78.1250 (73.7727) lr 7.0224e-05 eta 0:50:33 +epoch [46/50] batch [280/500] time 1.362 (1.364) data 0.000 (0.003) loss 0.8721 (1.0171) acc 71.8750 (73.8058) lr 7.0224e-05 eta 0:50:27 +epoch [46/50] batch [285/500] time 1.335 (1.364) data 0.000 (0.003) loss 0.7339 (1.0155) acc 87.5000 (73.8377) lr 7.0224e-05 eta 0:50:20 +epoch [46/50] batch [290/500] time 1.353 (1.364) data 0.000 (0.003) loss 1.2031 (1.0170) acc 65.6250 (73.8039) lr 7.0224e-05 eta 0:50:13 +epoch [46/50] batch [295/500] time 1.372 (1.364) data 0.000 (0.003) loss 0.9238 (1.0166) acc 71.8750 (73.8347) lr 7.0224e-05 eta 0:50:07 +epoch [46/50] batch [300/500] time 1.346 (1.364) data 0.000 (0.003) loss 1.0645 (1.0209) acc 71.8750 (73.6979) lr 7.0224e-05 eta 0:50:00 +epoch [46/50] batch [305/500] time 1.366 (1.364) data 0.000 (0.003) loss 1.0293 (1.0233) acc 78.1250 (73.6066) lr 7.0224e-05 eta 0:49:53 +epoch [46/50] batch [310/500] time 1.351 (1.363) data 0.000 (0.003) loss 0.5400 (1.0206) acc 81.2500 (73.7298) lr 7.0224e-05 eta 0:49:45 +epoch [46/50] batch [315/500] time 1.357 (1.363) data 0.000 (0.003) loss 0.5713 (1.0217) acc 75.0000 (73.6806) lr 7.0224e-05 eta 0:49:38 +epoch [46/50] batch [320/500] time 1.348 (1.363) data 0.000 (0.003) loss 0.8965 (1.0212) acc 75.0000 (73.7109) lr 7.0224e-05 eta 0:49:31 +epoch [46/50] batch [325/500] time 1.374 (1.363) data 0.000 (0.003) loss 1.1748 (1.0177) acc 65.6250 (73.7981) lr 7.0224e-05 eta 0:49:24 +epoch [46/50] batch [330/500] time 1.340 (1.363) data 0.000 (0.003) loss 0.6025 (1.0159) acc 84.3750 (73.8352) lr 7.0224e-05 eta 0:49:18 +epoch [46/50] batch [335/500] time 1.345 (1.363) data 0.000 (0.003) loss 0.8403 (1.0157) acc 81.2500 (73.8433) lr 7.0224e-05 eta 0:49:11 +epoch [46/50] batch [340/500] time 1.359 (1.363) data 0.000 (0.003) loss 0.9507 (1.0180) acc 71.8750 (73.8603) lr 7.0224e-05 eta 0:49:04 +epoch [46/50] batch [345/500] time 1.362 (1.363) data 0.000 (0.003) loss 1.2451 (1.0175) acc 68.7500 (73.8225) lr 7.0224e-05 eta 0:48:57 +epoch [46/50] batch [350/500] time 1.347 (1.363) data 0.000 (0.003) loss 0.9336 (1.0160) acc 78.1250 (73.8304) lr 7.0224e-05 eta 0:48:50 +epoch [46/50] batch [355/500] time 1.365 (1.363) data 0.000 (0.003) loss 0.6787 (1.0164) acc 84.3750 (73.8732) lr 7.0224e-05 eta 0:48:43 +epoch [46/50] batch [360/500] time 1.342 (1.363) data 0.000 (0.003) loss 1.3418 (1.0157) acc 68.7500 (73.9323) lr 7.0224e-05 eta 0:48:35 +epoch [46/50] batch [365/500] time 1.363 (1.362) data 0.000 (0.003) loss 1.0977 (1.0143) acc 71.8750 (74.0240) lr 7.0224e-05 eta 0:48:28 +epoch [46/50] batch [370/500] time 1.361 (1.362) data 0.000 (0.003) loss 0.9121 (1.0135) acc 65.6250 (74.0118) lr 7.0224e-05 eta 0:48:21 +epoch [46/50] batch [375/500] time 1.350 (1.362) data 0.000 (0.003) loss 0.8516 (1.0136) acc 75.0000 (73.9667) lr 7.0224e-05 eta 0:48:15 +epoch [46/50] batch [380/500] time 1.360 (1.362) data 0.000 (0.003) loss 1.2285 (1.0138) acc 75.0000 (73.9885) lr 7.0224e-05 eta 0:48:08 +epoch [46/50] batch [385/500] time 1.354 (1.362) data 0.000 (0.003) loss 0.5518 (1.0098) acc 81.2500 (74.0747) lr 7.0224e-05 eta 0:48:00 +epoch [46/50] batch [390/500] time 1.353 (1.362) data 0.000 (0.002) loss 0.9219 (1.0096) acc 81.2500 (74.0465) lr 7.0224e-05 eta 0:47:54 +epoch [46/50] batch [395/500] time 1.355 (1.362) data 0.000 (0.002) loss 1.2588 (1.0097) acc 71.8750 (74.0506) lr 7.0224e-05 eta 0:47:47 +epoch [46/50] batch [400/500] time 1.365 (1.362) data 0.000 (0.002) loss 1.1367 (1.0099) acc 78.1250 (74.0859) lr 7.0224e-05 eta 0:47:40 +epoch [46/50] batch [405/500] time 1.364 (1.362) data 0.000 (0.002) loss 0.6421 (1.0095) acc 78.1250 (74.1049) lr 7.0224e-05 eta 0:47:33 +epoch [46/50] batch [410/500] time 1.380 (1.362) data 0.000 (0.002) loss 1.1562 (1.0118) acc 75.0000 (74.0854) lr 7.0224e-05 eta 0:47:26 +epoch [46/50] batch [415/500] time 1.356 (1.362) data 0.000 (0.002) loss 1.1279 (1.0109) acc 78.1250 (74.1491) lr 7.0224e-05 eta 0:47:20 +epoch [46/50] batch [420/500] time 1.366 (1.362) data 0.000 (0.002) loss 1.1973 (1.0105) acc 78.1250 (74.1741) lr 7.0224e-05 eta 0:47:13 +epoch [46/50] batch [425/500] time 1.474 (1.362) data 0.000 (0.002) loss 1.1494 (1.0126) acc 71.8750 (74.1250) lr 7.0224e-05 eta 0:47:06 +epoch [46/50] batch [430/500] time 1.343 (1.362) data 0.000 (0.002) loss 1.0928 (1.0157) acc 78.1250 (74.0552) lr 7.0224e-05 eta 0:46:59 +epoch [46/50] batch [435/500] time 1.339 (1.362) data 0.000 (0.002) loss 1.1338 (1.0177) acc 71.8750 (74.0086) lr 7.0224e-05 eta 0:46:52 +epoch [46/50] batch [440/500] time 1.378 (1.362) data 0.000 (0.002) loss 1.1025 (1.0203) acc 78.1250 (73.9489) lr 7.0224e-05 eta 0:46:46 +epoch [46/50] batch [445/500] time 1.356 (1.362) data 0.000 (0.002) loss 1.4658 (1.0207) acc 53.1250 (73.8834) lr 7.0224e-05 eta 0:46:39 +epoch [46/50] batch [450/500] time 1.374 (1.362) data 0.000 (0.002) loss 1.1387 (1.0201) acc 71.8750 (73.9306) lr 7.0224e-05 eta 0:46:32 +epoch [46/50] batch [455/500] time 1.366 (1.362) data 0.000 (0.002) loss 1.0332 (1.0212) acc 71.8750 (73.9354) lr 7.0224e-05 eta 0:46:25 +epoch [46/50] batch [460/500] time 1.368 (1.362) data 0.000 (0.002) loss 1.5254 (1.0208) acc 59.3750 (73.9470) lr 7.0224e-05 eta 0:46:18 +epoch [46/50] batch [465/500] time 1.364 (1.362) data 0.000 (0.002) loss 0.8843 (1.0223) acc 87.5000 (73.9315) lr 7.0224e-05 eta 0:46:11 +epoch [46/50] batch [470/500] time 1.347 (1.362) data 0.000 (0.002) loss 1.0752 (1.0219) acc 68.7500 (73.9561) lr 7.0224e-05 eta 0:46:05 +epoch [46/50] batch [475/500] time 1.375 (1.362) data 0.000 (0.002) loss 0.9854 (1.0218) acc 81.2500 (73.9671) lr 7.0224e-05 eta 0:45:58 +epoch [46/50] batch [480/500] time 1.361 (1.362) data 0.000 (0.002) loss 0.8198 (1.0215) acc 75.0000 (73.9583) lr 7.0224e-05 eta 0:45:51 +epoch [46/50] batch [485/500] time 1.330 (1.362) data 0.001 (0.002) loss 0.4321 (1.0214) acc 87.5000 (73.9691) lr 7.0224e-05 eta 0:45:44 +epoch [46/50] batch [490/500] time 1.361 (1.362) data 0.000 (0.002) loss 1.1953 (1.0244) acc 78.1250 (73.9349) lr 7.0224e-05 eta 0:45:37 +epoch [46/50] batch [495/500] time 1.362 (1.362) data 0.000 (0.002) loss 1.0557 (1.0244) acc 75.0000 (73.9331) lr 7.0224e-05 eta 0:45:30 +epoch [46/50] batch [500/500] time 1.331 (1.362) data 0.000 (0.002) loss 1.4004 (1.0266) acc 65.6250 (73.8563) lr 4.8943e-05 eta 0:45:23 +epoch [47/50] batch [5/500] time 1.384 (1.520) data 0.000 (0.155) loss 0.7734 (0.8260) acc 78.1250 (77.5000) lr 4.8943e-05 eta 0:50:33 +epoch [47/50] batch [10/500] time 1.343 (1.437) data 0.000 (0.078) loss 1.1348 (0.8596) acc 71.8750 (77.8125) lr 4.8943e-05 eta 0:47:38 +epoch [47/50] batch [15/500] time 1.380 (1.427) data 0.000 (0.052) loss 1.3398 (0.8846) acc 71.8750 (77.5000) lr 4.8943e-05 eta 0:47:11 +epoch [47/50] batch [20/500] time 1.355 (1.411) data 0.000 (0.039) loss 1.0732 (0.9054) acc 81.2500 (77.5000) lr 4.8943e-05 eta 0:46:33 +epoch [47/50] batch [25/500] time 1.374 (1.401) data 0.000 (0.031) loss 1.1230 (0.9606) acc 71.8750 (76.2500) lr 4.8943e-05 eta 0:46:07 +epoch [47/50] batch [30/500] time 1.350 (1.395) data 0.000 (0.026) loss 1.2783 (0.9855) acc 75.0000 (76.0417) lr 4.8943e-05 eta 0:45:48 +epoch [47/50] batch [35/500] time 1.364 (1.390) data 0.000 (0.022) loss 0.7598 (1.0140) acc 81.2500 (75.1786) lr 4.8943e-05 eta 0:45:31 +epoch [47/50] batch [40/500] time 1.365 (1.386) data 0.000 (0.020) loss 1.2441 (1.0389) acc 68.7500 (74.6094) lr 4.8943e-05 eta 0:45:16 +epoch [47/50] batch [45/500] time 1.366 (1.382) data 0.000 (0.018) loss 1.1025 (1.0400) acc 68.7500 (74.2361) lr 4.8943e-05 eta 0:45:02 +epoch [47/50] batch [50/500] time 1.355 (1.380) data 0.000 (0.016) loss 0.9336 (1.0489) acc 68.7500 (74.0625) lr 4.8943e-05 eta 0:44:51 +epoch [47/50] batch [55/500] time 1.361 (1.379) data 0.000 (0.014) loss 0.4941 (1.0465) acc 84.3750 (73.8636) lr 4.8943e-05 eta 0:44:41 +epoch [47/50] batch [60/500] time 1.357 (1.377) data 0.000 (0.013) loss 1.1992 (1.0628) acc 65.6250 (73.4896) lr 4.8943e-05 eta 0:44:30 +epoch [47/50] batch [65/500] time 1.369 (1.376) data 0.001 (0.012) loss 0.7197 (1.0581) acc 71.8750 (73.5096) lr 4.8943e-05 eta 0:44:21 +epoch [47/50] batch [70/500] time 1.356 (1.375) data 0.000 (0.011) loss 0.6929 (1.0523) acc 71.8750 (73.3482) lr 4.8943e-05 eta 0:44:13 +epoch [47/50] batch [75/500] time 1.369 (1.373) data 0.000 (0.011) loss 0.5874 (1.0475) acc 84.3750 (73.7500) lr 4.8943e-05 eta 0:44:03 +epoch [47/50] batch [80/500] time 1.361 (1.372) data 0.000 (0.010) loss 1.0293 (1.0257) acc 65.6250 (74.1016) lr 4.8943e-05 eta 0:43:54 +epoch [47/50] batch [85/500] time 1.359 (1.371) data 0.000 (0.009) loss 0.9585 (1.0270) acc 81.2500 (74.0074) lr 4.8943e-05 eta 0:43:45 +epoch [47/50] batch [90/500] time 1.341 (1.370) data 0.000 (0.009) loss 1.6143 (1.0268) acc 68.7500 (74.2361) lr 4.8943e-05 eta 0:43:35 +epoch [47/50] batch [95/500] time 1.355 (1.368) data 0.000 (0.009) loss 0.7632 (1.0193) acc 81.2500 (74.1776) lr 4.8943e-05 eta 0:43:26 +epoch [47/50] batch [100/500] time 1.349 (1.368) data 0.000 (0.008) loss 1.2832 (1.0130) acc 65.6250 (74.2812) lr 4.8943e-05 eta 0:43:18 +epoch [47/50] batch [105/500] time 1.339 (1.367) data 0.000 (0.008) loss 1.1377 (1.0152) acc 71.8750 (74.2560) lr 4.8943e-05 eta 0:43:10 +epoch [47/50] batch [110/500] time 1.342 (1.367) data 0.000 (0.007) loss 1.0791 (1.0128) acc 68.7500 (74.2614) lr 4.8943e-05 eta 0:43:04 +epoch [47/50] batch [115/500] time 1.352 (1.367) data 0.000 (0.007) loss 1.0391 (1.0213) acc 75.0000 (74.0761) lr 4.8943e-05 eta 0:42:56 +epoch [47/50] batch [120/500] time 1.341 (1.367) data 0.000 (0.007) loss 1.3682 (1.0224) acc 62.5000 (74.0625) lr 4.8943e-05 eta 0:42:49 +epoch [47/50] batch [125/500] time 1.357 (1.366) data 0.000 (0.007) loss 1.1729 (1.0228) acc 71.8750 (74.1500) lr 4.8943e-05 eta 0:42:41 +epoch [47/50] batch [130/500] time 1.382 (1.367) data 0.000 (0.006) loss 0.9863 (1.0211) acc 75.0000 (74.1346) lr 4.8943e-05 eta 0:42:35 +epoch [47/50] batch [135/500] time 1.358 (1.366) data 0.000 (0.006) loss 0.8140 (1.0267) acc 75.0000 (74.0741) lr 4.8943e-05 eta 0:42:28 +epoch [47/50] batch [140/500] time 1.351 (1.366) data 0.000 (0.006) loss 1.0723 (1.0288) acc 75.0000 (73.9732) lr 4.8943e-05 eta 0:42:21 +epoch [47/50] batch [145/500] time 1.364 (1.366) data 0.001 (0.006) loss 1.1240 (1.0286) acc 71.8750 (73.9009) lr 4.8943e-05 eta 0:42:13 +epoch [47/50] batch [150/500] time 1.362 (1.366) data 0.000 (0.006) loss 1.0586 (1.0367) acc 84.3750 (73.8750) lr 4.8943e-05 eta 0:42:06 +epoch [47/50] batch [155/500] time 1.341 (1.366) data 0.001 (0.005) loss 0.4888 (1.0344) acc 84.3750 (73.8911) lr 4.8943e-05 eta 0:42:00 +epoch [47/50] batch [160/500] time 1.357 (1.366) data 0.000 (0.005) loss 1.1328 (1.0400) acc 53.1250 (73.6719) lr 4.8943e-05 eta 0:41:53 +epoch [47/50] batch [165/500] time 1.366 (1.366) data 0.000 (0.005) loss 1.3916 (1.0463) acc 71.8750 (73.5038) lr 4.8943e-05 eta 0:41:45 +epoch [47/50] batch [170/500] time 1.347 (1.365) data 0.000 (0.005) loss 1.0654 (1.0459) acc 71.8750 (73.5846) lr 4.8943e-05 eta 0:41:38 +epoch [47/50] batch [175/500] time 1.359 (1.365) data 0.000 (0.005) loss 0.9873 (1.0487) acc 65.6250 (73.5000) lr 4.8943e-05 eta 0:41:31 +epoch [47/50] batch [180/500] time 1.363 (1.365) data 0.000 (0.005) loss 1.6406 (1.0448) acc 53.1250 (73.4722) lr 4.8943e-05 eta 0:41:24 +epoch [47/50] batch [185/500] time 1.332 (1.365) data 0.000 (0.005) loss 0.7705 (1.0413) acc 84.3750 (73.5980) lr 4.8943e-05 eta 0:41:16 +epoch [47/50] batch [190/500] time 1.360 (1.364) data 0.000 (0.004) loss 1.0137 (1.0356) acc 62.5000 (73.6678) lr 4.8943e-05 eta 0:41:09 +epoch [47/50] batch [195/500] time 1.354 (1.364) data 0.000 (0.004) loss 0.9385 (1.0343) acc 75.0000 (73.7019) lr 4.8943e-05 eta 0:41:02 +epoch [47/50] batch [200/500] time 1.358 (1.364) data 0.000 (0.004) loss 0.8896 (1.0317) acc 81.2500 (73.7188) lr 4.8943e-05 eta 0:40:55 +epoch [47/50] batch [205/500] time 1.343 (1.364) data 0.000 (0.004) loss 0.5103 (1.0290) acc 84.3750 (73.7805) lr 4.8943e-05 eta 0:40:49 +epoch [47/50] batch [210/500] time 1.352 (1.364) data 0.000 (0.004) loss 1.6348 (1.0316) acc 68.7500 (73.7351) lr 4.8943e-05 eta 0:40:42 +epoch [47/50] batch [215/500] time 1.365 (1.364) data 0.000 (0.004) loss 0.8643 (1.0309) acc 71.8750 (73.7791) lr 4.8943e-05 eta 0:40:35 +epoch [47/50] batch [220/500] time 1.360 (1.364) data 0.000 (0.004) loss 0.6987 (1.0290) acc 81.2500 (73.7926) lr 4.8943e-05 eta 0:40:27 +epoch [47/50] batch [225/500] time 1.365 (1.364) data 0.000 (0.004) loss 0.6450 (1.0301) acc 78.1250 (73.7361) lr 4.8943e-05 eta 0:40:20 +epoch [47/50] batch [230/500] time 1.370 (1.364) data 0.000 (0.004) loss 0.9805 (1.0296) acc 78.1250 (73.7908) lr 4.8943e-05 eta 0:40:14 +epoch [47/50] batch [235/500] time 1.341 (1.364) data 0.000 (0.004) loss 0.6455 (1.0236) acc 81.2500 (73.9761) lr 4.8943e-05 eta 0:40:07 +epoch [47/50] batch [240/500] time 1.360 (1.364) data 0.000 (0.004) loss 0.7666 (1.0284) acc 78.1250 (73.8802) lr 4.8943e-05 eta 0:40:00 +epoch [47/50] batch [245/500] time 1.363 (1.364) data 0.000 (0.004) loss 1.3486 (1.0318) acc 71.8750 (73.8393) lr 4.8943e-05 eta 0:39:53 +epoch [47/50] batch [250/500] time 1.349 (1.364) data 0.000 (0.003) loss 1.4092 (1.0320) acc 59.3750 (73.8125) lr 4.8943e-05 eta 0:39:46 +epoch [47/50] batch [255/500] time 1.372 (1.364) data 0.000 (0.003) loss 0.6870 (1.0342) acc 71.8750 (73.7623) lr 4.8943e-05 eta 0:39:40 +epoch [47/50] batch [260/500] time 1.341 (1.364) data 0.000 (0.003) loss 1.0010 (1.0326) acc 71.8750 (73.8221) lr 4.8943e-05 eta 0:39:33 +epoch [47/50] batch [265/500] time 1.357 (1.364) data 0.000 (0.003) loss 0.7720 (1.0334) acc 75.0000 (73.7736) lr 4.8943e-05 eta 0:39:26 +epoch [47/50] batch [270/500] time 1.375 (1.364) data 0.000 (0.003) loss 1.1162 (1.0285) acc 71.8750 (73.9236) lr 4.8943e-05 eta 0:39:19 +epoch [47/50] batch [275/500] time 1.371 (1.364) data 0.000 (0.003) loss 0.8784 (1.0270) acc 68.7500 (73.9545) lr 4.8943e-05 eta 0:39:13 +epoch [47/50] batch [280/500] time 1.341 (1.364) data 0.000 (0.003) loss 1.4043 (1.0276) acc 68.7500 (73.9286) lr 4.8943e-05 eta 0:39:05 +epoch [47/50] batch [285/500] time 1.355 (1.364) data 0.000 (0.003) loss 0.7295 (1.0271) acc 78.1250 (73.8596) lr 4.8943e-05 eta 0:38:58 +epoch [47/50] batch [290/500] time 1.364 (1.363) data 0.000 (0.003) loss 1.4111 (1.0302) acc 71.8750 (73.8685) lr 4.8943e-05 eta 0:38:51 +epoch [47/50] batch [295/500] time 1.489 (1.364) data 0.000 (0.003) loss 1.0801 (1.0292) acc 68.7500 (73.8771) lr 4.8943e-05 eta 0:38:45 +epoch [47/50] batch [300/500] time 1.361 (1.364) data 0.000 (0.003) loss 1.1494 (1.0299) acc 75.0000 (73.8958) lr 4.8943e-05 eta 0:38:38 +epoch [47/50] batch [305/500] time 1.366 (1.364) data 0.001 (0.003) loss 0.9253 (1.0311) acc 78.1250 (73.8320) lr 4.8943e-05 eta 0:38:31 +epoch [47/50] batch [310/500] time 1.346 (1.364) data 0.000 (0.003) loss 1.7236 (1.0326) acc 65.6250 (73.8105) lr 4.8943e-05 eta 0:38:24 +epoch [47/50] batch [315/500] time 1.336 (1.363) data 0.000 (0.003) loss 1.8936 (1.0373) acc 68.7500 (73.7302) lr 4.8943e-05 eta 0:38:17 +epoch [47/50] batch [320/500] time 1.370 (1.363) data 0.000 (0.003) loss 0.9985 (1.0362) acc 71.8750 (73.7109) lr 4.8943e-05 eta 0:38:10 +epoch [47/50] batch [325/500] time 1.367 (1.363) data 0.000 (0.003) loss 1.2363 (1.0375) acc 71.8750 (73.7308) lr 4.8943e-05 eta 0:38:03 +epoch [47/50] batch [330/500] time 1.348 (1.363) data 0.000 (0.003) loss 1.3613 (1.0400) acc 68.7500 (73.7500) lr 4.8943e-05 eta 0:37:56 +epoch [47/50] batch [335/500] time 1.352 (1.363) data 0.000 (0.003) loss 1.0850 (1.0389) acc 68.7500 (73.7500) lr 4.8943e-05 eta 0:37:49 +epoch [47/50] batch [340/500] time 1.367 (1.363) data 0.000 (0.003) loss 1.5615 (1.0392) acc 59.3750 (73.7224) lr 4.8943e-05 eta 0:37:42 +epoch [47/50] batch [345/500] time 1.362 (1.363) data 0.000 (0.003) loss 1.0781 (1.0394) acc 71.8750 (73.7319) lr 4.8943e-05 eta 0:37:35 +epoch [47/50] batch [350/500] time 1.332 (1.363) data 0.000 (0.003) loss 0.7920 (1.0394) acc 75.0000 (73.6964) lr 4.8943e-05 eta 0:37:29 +epoch [47/50] batch [355/500] time 1.372 (1.363) data 0.000 (0.003) loss 0.8320 (1.0387) acc 78.1250 (73.6708) lr 4.8943e-05 eta 0:37:22 +epoch [47/50] batch [360/500] time 1.365 (1.363) data 0.000 (0.003) loss 1.0996 (1.0399) acc 75.0000 (73.6372) lr 4.8943e-05 eta 0:37:15 +epoch [47/50] batch [365/500] time 1.364 (1.363) data 0.000 (0.002) loss 1.2061 (1.0400) acc 75.0000 (73.6901) lr 4.8943e-05 eta 0:37:08 +epoch [47/50] batch [370/500] time 1.362 (1.363) data 0.000 (0.002) loss 1.5762 (1.0386) acc 56.2500 (73.6824) lr 4.8943e-05 eta 0:37:01 +epoch [47/50] batch [375/500] time 1.362 (1.363) data 0.000 (0.002) loss 1.7461 (1.0381) acc 59.3750 (73.6833) lr 4.8943e-05 eta 0:36:55 +epoch [47/50] batch [380/500] time 1.358 (1.363) data 0.000 (0.002) loss 0.9478 (1.0396) acc 75.0000 (73.6595) lr 4.8943e-05 eta 0:36:48 +epoch [47/50] batch [385/500] time 1.369 (1.363) data 0.000 (0.002) loss 0.7935 (1.0380) acc 78.1250 (73.7175) lr 4.8943e-05 eta 0:36:41 +epoch [47/50] batch [390/500] time 1.361 (1.363) data 0.000 (0.002) loss 1.9531 (1.0371) acc 53.1250 (73.7500) lr 4.8943e-05 eta 0:36:34 +epoch [47/50] batch [395/500] time 1.369 (1.363) data 0.001 (0.002) loss 1.3154 (1.0381) acc 68.7500 (73.7025) lr 4.8943e-05 eta 0:36:27 +epoch [47/50] batch [400/500] time 1.351 (1.363) data 0.000 (0.002) loss 0.6089 (1.0379) acc 87.5000 (73.6719) lr 4.8943e-05 eta 0:36:20 +epoch [47/50] batch [405/500] time 1.360 (1.363) data 0.000 (0.002) loss 1.3213 (1.0381) acc 68.7500 (73.6728) lr 4.8943e-05 eta 0:36:14 +epoch [47/50] batch [410/500] time 1.366 (1.363) data 0.000 (0.002) loss 0.8203 (1.0390) acc 75.0000 (73.6662) lr 4.8943e-05 eta 0:36:06 +epoch [47/50] batch [415/500] time 1.359 (1.363) data 0.000 (0.002) loss 1.2373 (1.0393) acc 84.3750 (73.7123) lr 4.8943e-05 eta 0:36:00 +epoch [47/50] batch [420/500] time 1.339 (1.363) data 0.000 (0.002) loss 1.0088 (1.0411) acc 68.7500 (73.6682) lr 4.8943e-05 eta 0:35:53 +epoch [47/50] batch [425/500] time 1.374 (1.363) data 0.000 (0.002) loss 1.4072 (1.0403) acc 75.0000 (73.7206) lr 4.8943e-05 eta 0:35:46 +epoch [47/50] batch [430/500] time 1.366 (1.363) data 0.000 (0.002) loss 1.7070 (1.0419) acc 56.2500 (73.6773) lr 4.8943e-05 eta 0:35:39 +epoch [47/50] batch [435/500] time 1.380 (1.363) data 0.000 (0.002) loss 0.8706 (1.0405) acc 81.2500 (73.7141) lr 4.8943e-05 eta 0:35:32 +epoch [47/50] batch [440/500] time 1.366 (1.363) data 0.000 (0.002) loss 0.9663 (1.0395) acc 62.5000 (73.6932) lr 4.8943e-05 eta 0:35:26 +epoch [47/50] batch [445/500] time 1.349 (1.363) data 0.000 (0.002) loss 1.0137 (1.0421) acc 71.8750 (73.6447) lr 4.8943e-05 eta 0:35:19 +epoch [47/50] batch [450/500] time 1.332 (1.363) data 0.000 (0.002) loss 0.9624 (1.0394) acc 75.0000 (73.7014) lr 4.8943e-05 eta 0:35:12 +epoch [47/50] batch [455/500] time 1.361 (1.363) data 0.000 (0.002) loss 1.1621 (1.0400) acc 68.7500 (73.6882) lr 4.8943e-05 eta 0:35:05 +epoch [47/50] batch [460/500] time 1.352 (1.363) data 0.000 (0.002) loss 0.8403 (1.0380) acc 75.0000 (73.7364) lr 4.8943e-05 eta 0:34:58 +epoch [47/50] batch [465/500] time 1.341 (1.362) data 0.000 (0.002) loss 0.8838 (1.0370) acc 81.2500 (73.7903) lr 4.8943e-05 eta 0:34:51 +epoch [47/50] batch [470/500] time 1.339 (1.362) data 0.000 (0.002) loss 0.9883 (1.0361) acc 71.8750 (73.8165) lr 4.8943e-05 eta 0:34:44 +epoch [47/50] batch [475/500] time 1.349 (1.362) data 0.000 (0.002) loss 1.3730 (1.0368) acc 62.5000 (73.8092) lr 4.8943e-05 eta 0:34:37 +epoch [47/50] batch [480/500] time 1.356 (1.362) data 0.000 (0.002) loss 1.5498 (1.0374) acc 62.5000 (73.8151) lr 4.8943e-05 eta 0:34:30 +epoch [47/50] batch [485/500] time 1.360 (1.362) data 0.001 (0.002) loss 1.2510 (1.0384) acc 65.6250 (73.7500) lr 4.8943e-05 eta 0:34:23 +epoch [47/50] batch [490/500] time 1.351 (1.362) data 0.000 (0.002) loss 1.2715 (1.0401) acc 71.8750 (73.7309) lr 4.8943e-05 eta 0:34:16 +epoch [47/50] batch [495/500] time 1.351 (1.362) data 0.000 (0.002) loss 1.5508 (1.0403) acc 59.3750 (73.7247) lr 4.8943e-05 eta 0:34:09 +epoch [47/50] batch [500/500] time 1.327 (1.362) data 0.000 (0.002) loss 1.0527 (1.0407) acc 68.7500 (73.7062) lr 3.1417e-05 eta 0:34:02 +epoch [48/50] batch [5/500] time 1.372 (1.531) data 0.000 (0.163) loss 1.3066 (1.2250) acc 68.7500 (70.0000) lr 3.1417e-05 eta 0:38:09 +epoch [48/50] batch [10/500] time 1.367 (1.444) data 0.000 (0.082) loss 1.0068 (1.0924) acc 75.0000 (71.5625) lr 3.1417e-05 eta 0:35:51 +epoch [48/50] batch [15/500] time 1.370 (1.417) data 0.000 (0.055) loss 1.1836 (1.0464) acc 65.6250 (72.2917) lr 3.1417e-05 eta 0:35:04 +epoch [48/50] batch [20/500] time 1.363 (1.405) data 0.000 (0.041) loss 0.9312 (1.0310) acc 81.2500 (73.2812) lr 3.1417e-05 eta 0:34:39 +epoch [48/50] batch [25/500] time 1.591 (1.406) data 0.001 (0.033) loss 0.8281 (0.9867) acc 78.1250 (74.0000) lr 3.1417e-05 eta 0:34:33 +epoch [48/50] batch [30/500] time 1.366 (1.399) data 0.000 (0.028) loss 0.9507 (1.0246) acc 78.1250 (73.7500) lr 3.1417e-05 eta 0:34:16 +epoch [48/50] batch [35/500] time 1.349 (1.393) data 0.000 (0.024) loss 0.9263 (1.0104) acc 75.0000 (74.4643) lr 3.1417e-05 eta 0:34:01 +epoch [48/50] batch [40/500] time 1.344 (1.389) data 0.000 (0.021) loss 1.4775 (1.0066) acc 65.6250 (74.5312) lr 3.1417e-05 eta 0:33:47 +epoch [48/50] batch [45/500] time 1.353 (1.386) data 0.000 (0.019) loss 1.0645 (1.0216) acc 75.0000 (74.0972) lr 3.1417e-05 eta 0:33:36 +epoch [48/50] batch [50/500] time 1.362 (1.383) data 0.000 (0.017) loss 0.4514 (1.0222) acc 90.6250 (73.8125) lr 3.1417e-05 eta 0:33:25 +epoch [48/50] batch [55/500] time 1.355 (1.381) data 0.001 (0.015) loss 1.0732 (1.0191) acc 75.0000 (73.5795) lr 3.1417e-05 eta 0:33:15 +epoch [48/50] batch [60/500] time 1.350 (1.379) data 0.000 (0.014) loss 0.7173 (1.0204) acc 81.2500 (73.8021) lr 3.1417e-05 eta 0:33:06 +epoch [48/50] batch [65/500] time 1.350 (1.378) data 0.000 (0.013) loss 0.9097 (1.0205) acc 65.6250 (73.6538) lr 3.1417e-05 eta 0:32:56 +epoch [48/50] batch [70/500] time 1.364 (1.376) data 0.000 (0.012) loss 1.1533 (1.0158) acc 71.8750 (73.9286) lr 3.1417e-05 eta 0:32:48 +epoch [48/50] batch [75/500] time 1.357 (1.375) data 0.000 (0.011) loss 0.4963 (1.0028) acc 87.5000 (74.3333) lr 3.1417e-05 eta 0:32:39 +epoch [48/50] batch [80/500] time 1.365 (1.374) data 0.000 (0.011) loss 1.2988 (1.0062) acc 62.5000 (74.0625) lr 3.1417e-05 eta 0:32:31 +epoch [48/50] batch [85/500] time 1.593 (1.376) data 0.000 (0.010) loss 1.2236 (1.0025) acc 68.7500 (74.0809) lr 3.1417e-05 eta 0:32:26 +epoch [48/50] batch [90/500] time 1.363 (1.375) data 0.000 (0.009) loss 1.4248 (1.0168) acc 65.6250 (73.8194) lr 3.1417e-05 eta 0:32:18 +epoch [48/50] batch [95/500] time 1.356 (1.375) data 0.000 (0.009) loss 1.0000 (1.0167) acc 81.2500 (73.8487) lr 3.1417e-05 eta 0:32:11 +epoch [48/50] batch [100/500] time 1.353 (1.374) data 0.000 (0.009) loss 1.0908 (1.0119) acc 78.1250 (73.9688) lr 3.1417e-05 eta 0:32:03 +epoch [48/50] batch [105/500] time 1.381 (1.374) data 0.000 (0.008) loss 1.1904 (1.0065) acc 68.7500 (74.2857) lr 3.1417e-05 eta 0:31:56 +epoch [48/50] batch [110/500] time 1.371 (1.373) data 0.000 (0.008) loss 0.8564 (0.9993) acc 81.2500 (74.5739) lr 3.1417e-05 eta 0:31:48 +epoch [48/50] batch [115/500] time 1.350 (1.372) data 0.000 (0.007) loss 0.6387 (0.9975) acc 78.1250 (74.7826) lr 3.1417e-05 eta 0:31:39 +epoch [48/50] batch [120/500] time 1.364 (1.371) data 0.000 (0.007) loss 1.1816 (0.9949) acc 78.1250 (74.8438) lr 3.1417e-05 eta 0:31:32 +epoch [48/50] batch [125/500] time 1.384 (1.372) data 0.000 (0.007) loss 1.2002 (1.0002) acc 65.6250 (74.7750) lr 3.1417e-05 eta 0:31:25 +epoch [48/50] batch [130/500] time 1.361 (1.372) data 0.000 (0.007) loss 1.0352 (1.0081) acc 71.8750 (74.6875) lr 3.1417e-05 eta 0:31:19 +epoch [48/50] batch [135/500] time 1.340 (1.371) data 0.000 (0.006) loss 0.9316 (1.0138) acc 84.3750 (74.5602) lr 3.1417e-05 eta 0:31:11 +epoch [48/50] batch [140/500] time 1.374 (1.371) data 0.001 (0.006) loss 0.8638 (1.0170) acc 78.1250 (74.4643) lr 3.1417e-05 eta 0:31:04 +epoch [48/50] batch [145/500] time 1.369 (1.371) data 0.000 (0.006) loss 0.7832 (1.0101) acc 78.1250 (74.6336) lr 3.1417e-05 eta 0:30:57 +epoch [48/50] batch [150/500] time 1.352 (1.370) data 0.000 (0.006) loss 0.8301 (1.0072) acc 75.0000 (74.6250) lr 3.1417e-05 eta 0:30:49 +epoch [48/50] batch [155/500] time 1.376 (1.370) data 0.000 (0.006) loss 0.7002 (1.0040) acc 84.3750 (74.7581) lr 3.1417e-05 eta 0:30:42 +epoch [48/50] batch [160/500] time 1.360 (1.369) data 0.000 (0.005) loss 0.6509 (0.9998) acc 81.2500 (74.9219) lr 3.1417e-05 eta 0:30:35 +epoch [48/50] batch [165/500] time 1.371 (1.369) data 0.001 (0.005) loss 1.1289 (0.9990) acc 75.0000 (74.9432) lr 3.1417e-05 eta 0:30:27 +epoch [48/50] batch [170/500] time 1.363 (1.369) data 0.000 (0.005) loss 0.8403 (0.9986) acc 68.7500 (74.8897) lr 3.1417e-05 eta 0:30:20 +epoch [48/50] batch [175/500] time 1.360 (1.369) data 0.000 (0.005) loss 1.1914 (0.9933) acc 71.8750 (75.0536) lr 3.1417e-05 eta 0:30:13 +epoch [48/50] batch [180/500] time 1.360 (1.368) data 0.000 (0.005) loss 1.7568 (0.9989) acc 62.5000 (74.9132) lr 3.1417e-05 eta 0:30:06 +epoch [48/50] batch [185/500] time 1.335 (1.368) data 0.000 (0.005) loss 1.2988 (1.0062) acc 71.8750 (74.6284) lr 3.1417e-05 eta 0:29:58 +epoch [48/50] batch [190/500] time 1.351 (1.368) data 0.000 (0.005) loss 1.2070 (1.0054) acc 65.6250 (74.5724) lr 3.1417e-05 eta 0:29:51 +epoch [48/50] batch [195/500] time 1.373 (1.367) data 0.000 (0.005) loss 1.2354 (1.0104) acc 68.7500 (74.5192) lr 3.1417e-05 eta 0:29:44 +epoch [48/50] batch [200/500] time 1.356 (1.367) data 0.000 (0.004) loss 0.8486 (1.0081) acc 78.1250 (74.5781) lr 3.1417e-05 eta 0:29:37 +epoch [48/50] batch [205/500] time 1.351 (1.367) data 0.000 (0.004) loss 1.5430 (1.0123) acc 68.7500 (74.5274) lr 3.1417e-05 eta 0:29:30 +epoch [48/50] batch [210/500] time 1.340 (1.367) data 0.001 (0.004) loss 1.1982 (1.0138) acc 62.5000 (74.4494) lr 3.1417e-05 eta 0:29:22 +epoch [48/50] batch [215/500] time 1.361 (1.367) data 0.000 (0.004) loss 0.6602 (1.0140) acc 84.3750 (74.5203) lr 3.1417e-05 eta 0:29:15 +epoch [48/50] batch [220/500] time 1.337 (1.366) data 0.000 (0.004) loss 1.1162 (1.0141) acc 75.0000 (74.5455) lr 3.1417e-05 eta 0:29:08 +epoch [48/50] batch [225/500] time 1.365 (1.366) data 0.000 (0.004) loss 0.8340 (1.0179) acc 78.1250 (74.4861) lr 3.1417e-05 eta 0:29:01 +epoch [48/50] batch [230/500] time 1.342 (1.367) data 0.000 (0.004) loss 1.2754 (1.0178) acc 62.5000 (74.4837) lr 3.1417e-05 eta 0:28:55 +epoch [48/50] batch [235/500] time 1.375 (1.367) data 0.000 (0.004) loss 0.5547 (1.0178) acc 93.7500 (74.4947) lr 3.1417e-05 eta 0:28:48 +epoch [48/50] batch [240/500] time 1.349 (1.366) data 0.000 (0.004) loss 1.0459 (1.0160) acc 68.7500 (74.4792) lr 3.1417e-05 eta 0:28:41 +epoch [48/50] batch [245/500] time 1.361 (1.366) data 0.000 (0.004) loss 1.1152 (1.0172) acc 68.7500 (74.3878) lr 3.1417e-05 eta 0:28:34 +epoch [48/50] batch [250/500] time 1.340 (1.366) data 0.000 (0.004) loss 1.0107 (1.0204) acc 75.0000 (74.2500) lr 3.1417e-05 eta 0:28:27 +epoch [48/50] batch [255/500] time 1.371 (1.366) data 0.000 (0.004) loss 1.4746 (1.0251) acc 62.5000 (74.1299) lr 3.1417e-05 eta 0:28:20 +epoch [48/50] batch [260/500] time 1.379 (1.366) data 0.000 (0.003) loss 0.7065 (1.0213) acc 81.2500 (74.2308) lr 3.1417e-05 eta 0:28:13 +epoch [48/50] batch [265/500] time 1.353 (1.366) data 0.000 (0.003) loss 1.3174 (1.0239) acc 62.5000 (74.1627) lr 3.1417e-05 eta 0:28:06 +epoch [48/50] batch [270/500] time 1.355 (1.365) data 0.001 (0.003) loss 0.7910 (1.0248) acc 78.1250 (74.1319) lr 3.1417e-05 eta 0:27:59 +epoch [48/50] batch [275/500] time 1.378 (1.366) data 0.000 (0.003) loss 0.5762 (1.0257) acc 87.5000 (74.1364) lr 3.1417e-05 eta 0:27:52 +epoch [48/50] batch [280/500] time 1.358 (1.366) data 0.000 (0.003) loss 1.0127 (1.0276) acc 75.0000 (74.1071) lr 3.1417e-05 eta 0:27:46 +epoch [48/50] batch [285/500] time 1.340 (1.365) data 0.000 (0.003) loss 1.0566 (1.0284) acc 65.6250 (74.0680) lr 3.1417e-05 eta 0:27:38 +epoch [48/50] batch [290/500] time 1.379 (1.365) data 0.000 (0.003) loss 1.4316 (1.0289) acc 59.3750 (74.0517) lr 3.1417e-05 eta 0:27:32 +epoch [48/50] batch [295/500] time 1.360 (1.365) data 0.000 (0.003) loss 1.0859 (1.0286) acc 75.0000 (74.0572) lr 3.1417e-05 eta 0:27:25 +epoch [48/50] batch [300/500] time 1.372 (1.365) data 0.000 (0.003) loss 0.6187 (1.0251) acc 81.2500 (74.1458) lr 3.1417e-05 eta 0:27:18 +epoch [48/50] batch [305/500] time 1.352 (1.365) data 0.000 (0.003) loss 1.3369 (1.0276) acc 59.3750 (74.0984) lr 3.1417e-05 eta 0:27:11 +epoch [48/50] batch [310/500] time 1.355 (1.365) data 0.000 (0.003) loss 0.6685 (1.0266) acc 78.1250 (74.1230) lr 3.1417e-05 eta 0:27:04 +epoch [48/50] batch [315/500] time 1.361 (1.365) data 0.000 (0.003) loss 1.0732 (1.0295) acc 78.1250 (74.0972) lr 3.1417e-05 eta 0:26:57 +epoch [48/50] batch [320/500] time 1.333 (1.364) data 0.000 (0.003) loss 1.1318 (1.0314) acc 78.1250 (74.0723) lr 3.1417e-05 eta 0:26:50 +epoch [48/50] batch [325/500] time 1.354 (1.364) data 0.000 (0.003) loss 0.8945 (1.0293) acc 75.0000 (74.0865) lr 3.1417e-05 eta 0:26:43 +epoch [48/50] batch [330/500] time 1.360 (1.364) data 0.000 (0.003) loss 1.0713 (1.0277) acc 78.1250 (74.1193) lr 3.1417e-05 eta 0:26:36 +epoch [48/50] batch [335/500] time 1.359 (1.364) data 0.000 (0.003) loss 1.2793 (1.0280) acc 62.5000 (74.0578) lr 3.1417e-05 eta 0:26:29 +epoch [48/50] batch [340/500] time 1.356 (1.364) data 0.000 (0.003) loss 0.9097 (1.0280) acc 75.0000 (74.0717) lr 3.1417e-05 eta 0:26:22 +epoch [48/50] batch [345/500] time 1.342 (1.364) data 0.000 (0.003) loss 1.8828 (1.0295) acc 50.0000 (74.0036) lr 3.1417e-05 eta 0:26:15 +epoch [48/50] batch [350/500] time 1.355 (1.364) data 0.001 (0.003) loss 0.9858 (1.0303) acc 71.8750 (73.9643) lr 3.1417e-05 eta 0:26:08 +epoch [48/50] batch [355/500] time 1.346 (1.364) data 0.000 (0.003) loss 0.6689 (1.0308) acc 87.5000 (73.9965) lr 3.1417e-05 eta 0:26:01 +epoch [48/50] batch [360/500] time 1.368 (1.364) data 0.000 (0.003) loss 0.4932 (1.0276) acc 87.5000 (74.0885) lr 3.1417e-05 eta 0:25:54 +epoch [48/50] batch [365/500] time 1.369 (1.364) data 0.001 (0.003) loss 0.8042 (1.0273) acc 81.2500 (74.1182) lr 3.1417e-05 eta 0:25:47 +epoch [48/50] batch [370/500] time 1.353 (1.363) data 0.001 (0.003) loss 1.2715 (1.0291) acc 78.1250 (74.1047) lr 3.1417e-05 eta 0:25:40 +epoch [48/50] batch [375/500] time 1.355 (1.364) data 0.000 (0.003) loss 0.9043 (1.0270) acc 75.0000 (74.1500) lr 3.1417e-05 eta 0:25:34 +epoch [48/50] batch [380/500] time 1.362 (1.364) data 0.000 (0.003) loss 1.3633 (1.0278) acc 56.2500 (74.1201) lr 3.1417e-05 eta 0:25:27 +epoch [48/50] batch [385/500] time 1.364 (1.364) data 0.000 (0.002) loss 0.9272 (1.0306) acc 78.1250 (74.0909) lr 3.1417e-05 eta 0:25:20 +epoch [48/50] batch [390/500] time 1.384 (1.364) data 0.000 (0.002) loss 0.9575 (1.0299) acc 62.5000 (74.0224) lr 3.1417e-05 eta 0:25:13 +epoch [48/50] batch [395/500] time 1.375 (1.364) data 0.000 (0.002) loss 1.3760 (1.0307) acc 75.0000 (73.9873) lr 3.1417e-05 eta 0:25:07 +epoch [48/50] batch [400/500] time 1.371 (1.364) data 0.000 (0.002) loss 0.8276 (1.0297) acc 78.1250 (74.0234) lr 3.1417e-05 eta 0:25:00 +epoch [48/50] batch [405/500] time 1.365 (1.364) data 0.000 (0.002) loss 0.6553 (1.0293) acc 78.1250 (74.0509) lr 3.1417e-05 eta 0:24:53 +epoch [48/50] batch [410/500] time 1.363 (1.364) data 0.000 (0.002) loss 0.6528 (1.0301) acc 84.3750 (74.0244) lr 3.1417e-05 eta 0:24:46 +epoch [48/50] batch [415/500] time 1.459 (1.364) data 0.000 (0.002) loss 0.8618 (1.0308) acc 68.7500 (74.0136) lr 3.1417e-05 eta 0:24:40 +epoch [48/50] batch [420/500] time 1.365 (1.364) data 0.000 (0.002) loss 0.6616 (1.0307) acc 81.2500 (74.0253) lr 3.1417e-05 eta 0:24:33 +epoch [48/50] batch [425/500] time 1.367 (1.364) data 0.000 (0.002) loss 1.0137 (1.0290) acc 75.0000 (74.0735) lr 3.1417e-05 eta 0:24:26 +epoch [48/50] batch [430/500] time 1.357 (1.364) data 0.001 (0.002) loss 1.5322 (1.0309) acc 65.6250 (74.0625) lr 3.1417e-05 eta 0:24:19 +epoch [48/50] batch [435/500] time 1.363 (1.364) data 0.000 (0.002) loss 1.5264 (1.0352) acc 59.3750 (73.9655) lr 3.1417e-05 eta 0:24:12 +epoch [48/50] batch [440/500] time 1.373 (1.364) data 0.000 (0.002) loss 0.6704 (1.0329) acc 81.2500 (73.9560) lr 3.1417e-05 eta 0:24:06 +epoch [48/50] batch [445/500] time 1.363 (1.364) data 0.000 (0.002) loss 1.4893 (1.0352) acc 68.7500 (73.8904) lr 3.1417e-05 eta 0:23:59 +epoch [48/50] batch [450/500] time 1.376 (1.364) data 0.000 (0.002) loss 1.0918 (1.0351) acc 81.2500 (73.8889) lr 3.1417e-05 eta 0:23:52 +epoch [48/50] batch [455/500] time 1.366 (1.364) data 0.000 (0.002) loss 1.2217 (1.0376) acc 78.1250 (73.9011) lr 3.1417e-05 eta 0:23:45 +epoch [48/50] batch [460/500] time 1.384 (1.364) data 0.000 (0.002) loss 0.8159 (1.0359) acc 78.1250 (73.9062) lr 3.1417e-05 eta 0:23:38 +epoch [48/50] batch [465/500] time 1.368 (1.364) data 0.000 (0.002) loss 1.4404 (1.0383) acc 75.0000 (73.8710) lr 3.1417e-05 eta 0:23:31 +epoch [48/50] batch [470/500] time 1.346 (1.364) data 0.001 (0.002) loss 0.8442 (1.0395) acc 78.1250 (73.8697) lr 3.1417e-05 eta 0:23:25 +epoch [48/50] batch [475/500] time 1.360 (1.364) data 0.000 (0.002) loss 1.2832 (1.0402) acc 68.7500 (73.8553) lr 3.1417e-05 eta 0:23:18 +epoch [48/50] batch [480/500] time 1.363 (1.364) data 0.000 (0.002) loss 1.0723 (1.0413) acc 78.1250 (73.8346) lr 3.1417e-05 eta 0:23:11 +epoch [48/50] batch [485/500] time 1.358 (1.364) data 0.001 (0.002) loss 0.9702 (1.0407) acc 71.8750 (73.8338) lr 3.1417e-05 eta 0:23:04 +epoch [48/50] batch [490/500] time 1.365 (1.364) data 0.000 (0.002) loss 1.0732 (1.0408) acc 68.7500 (73.8648) lr 3.1417e-05 eta 0:22:57 +epoch [48/50] batch [495/500] time 1.354 (1.364) data 0.000 (0.002) loss 1.2314 (1.0424) acc 68.7500 (73.8510) lr 3.1417e-05 eta 0:22:50 +epoch [48/50] batch [500/500] time 1.351 (1.364) data 0.000 (0.002) loss 0.9785 (1.0432) acc 75.0000 (73.8063) lr 1.7713e-05 eta 0:22:43 +epoch [49/50] batch [5/500] time 1.354 (1.536) data 0.000 (0.159) loss 1.0938 (0.9551) acc 71.8750 (75.0000) lr 1.7713e-05 eta 0:25:28 +epoch [49/50] batch [10/500] time 1.570 (1.472) data 0.000 (0.080) loss 1.1260 (0.9415) acc 75.0000 (75.0000) lr 1.7713e-05 eta 0:24:16 +epoch [49/50] batch [15/500] time 1.348 (1.432) data 0.000 (0.053) loss 0.8750 (1.0357) acc 75.0000 (73.1250) lr 1.7713e-05 eta 0:23:30 +epoch [49/50] batch [20/500] time 1.370 (1.412) data 0.000 (0.040) loss 1.2080 (1.0337) acc 71.8750 (73.7500) lr 1.7713e-05 eta 0:23:04 +epoch [49/50] batch [25/500] time 1.356 (1.401) data 0.000 (0.032) loss 0.9009 (1.0319) acc 75.0000 (73.6250) lr 1.7713e-05 eta 0:22:46 +epoch [49/50] batch [30/500] time 1.373 (1.394) data 0.000 (0.027) loss 1.0225 (1.0448) acc 71.8750 (73.3333) lr 1.7713e-05 eta 0:22:32 +epoch [49/50] batch [35/500] time 1.360 (1.389) data 0.000 (0.023) loss 1.0273 (1.0499) acc 78.1250 (72.6786) lr 1.7713e-05 eta 0:22:20 +epoch [49/50] batch [40/500] time 1.352 (1.385) data 0.000 (0.020) loss 0.7271 (1.0349) acc 81.2500 (73.2031) lr 1.7713e-05 eta 0:22:10 +epoch [49/50] batch [45/500] time 1.362 (1.382) data 0.000 (0.018) loss 0.9824 (1.0160) acc 68.7500 (73.7500) lr 1.7713e-05 eta 0:21:59 +epoch [49/50] batch [50/500] time 1.361 (1.380) data 0.000 (0.016) loss 0.6943 (0.9992) acc 84.3750 (74.4375) lr 1.7713e-05 eta 0:21:51 +epoch [49/50] batch [55/500] time 1.363 (1.379) data 0.000 (0.015) loss 1.2764 (1.0173) acc 62.5000 (73.9205) lr 1.7713e-05 eta 0:21:43 +epoch [49/50] batch [60/500] time 1.354 (1.378) data 0.000 (0.014) loss 1.3398 (1.0335) acc 56.2500 (73.6458) lr 1.7713e-05 eta 0:21:34 +epoch [49/50] batch [65/500] time 1.345 (1.377) data 0.000 (0.013) loss 0.9702 (1.0455) acc 78.1250 (73.3654) lr 1.7713e-05 eta 0:21:27 +epoch [49/50] batch [70/500] time 1.498 (1.376) data 0.000 (0.012) loss 1.4014 (1.0475) acc 65.6250 (73.3482) lr 1.7713e-05 eta 0:21:20 +epoch [49/50] batch [75/500] time 1.346 (1.375) data 0.000 (0.011) loss 0.7109 (1.0464) acc 75.0000 (73.2917) lr 1.7713e-05 eta 0:21:11 +epoch [49/50] batch [80/500] time 1.348 (1.374) data 0.000 (0.010) loss 0.9390 (1.0394) acc 75.0000 (73.3594) lr 1.7713e-05 eta 0:21:04 +epoch [49/50] batch [85/500] time 1.342 (1.373) data 0.000 (0.010) loss 1.1318 (1.0365) acc 75.0000 (73.6397) lr 1.7713e-05 eta 0:20:56 +epoch [49/50] batch [90/500] time 1.350 (1.372) data 0.000 (0.009) loss 0.8169 (1.0361) acc 68.7500 (73.4722) lr 1.7713e-05 eta 0:20:48 +epoch [49/50] batch [95/500] time 1.353 (1.371) data 0.000 (0.009) loss 1.0352 (1.0327) acc 68.7500 (73.4211) lr 1.7713e-05 eta 0:20:41 +epoch [49/50] batch [100/500] time 1.352 (1.371) data 0.000 (0.008) loss 0.5142 (1.0360) acc 87.5000 (73.4062) lr 1.7713e-05 eta 0:20:33 +epoch [49/50] batch [105/500] time 1.342 (1.370) data 0.000 (0.008) loss 1.0166 (1.0370) acc 78.1250 (73.3631) lr 1.7713e-05 eta 0:20:25 +epoch [49/50] batch [110/500] time 1.347 (1.369) data 0.000 (0.008) loss 0.6465 (1.0360) acc 87.5000 (73.4091) lr 1.7713e-05 eta 0:20:18 +epoch [49/50] batch [115/500] time 1.378 (1.369) data 0.000 (0.007) loss 0.6235 (1.0375) acc 84.3750 (73.4239) lr 1.7713e-05 eta 0:20:11 +epoch [49/50] batch [120/500] time 1.349 (1.368) data 0.000 (0.007) loss 0.8008 (1.0397) acc 81.2500 (73.3854) lr 1.7713e-05 eta 0:20:04 +epoch [49/50] batch [125/500] time 1.344 (1.368) data 0.000 (0.007) loss 1.6299 (1.0514) acc 59.3750 (73.2000) lr 1.7713e-05 eta 0:19:56 +epoch [49/50] batch [130/500] time 1.357 (1.368) data 0.000 (0.006) loss 0.8184 (1.0569) acc 81.2500 (73.1250) lr 1.7713e-05 eta 0:19:49 +epoch [49/50] batch [135/500] time 1.356 (1.367) data 0.000 (0.006) loss 1.4961 (1.0531) acc 62.5000 (73.3102) lr 1.7713e-05 eta 0:19:42 +epoch [49/50] batch [140/500] time 1.354 (1.366) data 0.000 (0.006) loss 1.5244 (1.0604) acc 65.6250 (73.2143) lr 1.7713e-05 eta 0:19:35 +epoch [49/50] batch [145/500] time 1.362 (1.366) data 0.000 (0.006) loss 1.4512 (1.0597) acc 68.7500 (73.2543) lr 1.7713e-05 eta 0:19:27 +epoch [49/50] batch [150/500] time 1.351 (1.365) data 0.000 (0.006) loss 0.9038 (1.0590) acc 78.1250 (73.3125) lr 1.7713e-05 eta 0:19:20 +epoch [49/50] batch [155/500] time 1.365 (1.365) data 0.000 (0.005) loss 0.5137 (1.0523) acc 84.3750 (73.4274) lr 1.7713e-05 eta 0:19:13 +epoch [49/50] batch [160/500] time 1.330 (1.365) data 0.000 (0.005) loss 1.3467 (1.0517) acc 68.7500 (73.3203) lr 1.7713e-05 eta 0:19:06 +epoch [49/50] batch [165/500] time 1.368 (1.364) data 0.000 (0.005) loss 1.2568 (1.0586) acc 62.5000 (73.2008) lr 1.7713e-05 eta 0:18:59 +epoch [49/50] batch [170/500] time 1.369 (1.364) data 0.000 (0.005) loss 1.1885 (1.0648) acc 68.7500 (73.0699) lr 1.7713e-05 eta 0:18:52 +epoch [49/50] batch [175/500] time 1.342 (1.364) data 0.000 (0.005) loss 1.2256 (1.0627) acc 68.7500 (73.0714) lr 1.7713e-05 eta 0:18:45 +epoch [49/50] batch [180/500] time 1.364 (1.364) data 0.000 (0.005) loss 0.7168 (1.0557) acc 75.0000 (73.2292) lr 1.7713e-05 eta 0:18:38 +epoch [49/50] batch [185/500] time 1.358 (1.364) data 0.000 (0.005) loss 0.9375 (1.0560) acc 78.1250 (73.1419) lr 1.7713e-05 eta 0:18:31 +epoch [49/50] batch [190/500] time 1.349 (1.364) data 0.000 (0.005) loss 0.4949 (1.0519) acc 84.3750 (73.2566) lr 1.7713e-05 eta 0:18:24 +epoch [49/50] batch [195/500] time 1.368 (1.364) data 0.000 (0.004) loss 1.1504 (1.0467) acc 71.8750 (73.2692) lr 1.7713e-05 eta 0:18:17 +epoch [49/50] batch [200/500] time 1.351 (1.364) data 0.000 (0.004) loss 1.1377 (1.0460) acc 62.5000 (73.3281) lr 1.7713e-05 eta 0:18:10 +epoch [49/50] batch [205/500] time 1.354 (1.363) data 0.000 (0.004) loss 1.1279 (1.0438) acc 71.8750 (73.4146) lr 1.7713e-05 eta 0:18:03 +epoch [49/50] batch [210/500] time 1.354 (1.363) data 0.000 (0.004) loss 1.2578 (1.0384) acc 75.0000 (73.5119) lr 1.7713e-05 eta 0:17:56 +epoch [49/50] batch [215/500] time 1.341 (1.363) data 0.000 (0.004) loss 0.8589 (1.0389) acc 78.1250 (73.5610) lr 1.7713e-05 eta 0:17:50 +epoch [49/50] batch [220/500] time 1.378 (1.363) data 0.000 (0.004) loss 0.5459 (1.0370) acc 84.3750 (73.5795) lr 1.7713e-05 eta 0:17:43 +epoch [49/50] batch [225/500] time 1.385 (1.364) data 0.000 (0.004) loss 1.3213 (1.0420) acc 62.5000 (73.5139) lr 1.7713e-05 eta 0:17:36 +epoch [49/50] batch [230/500] time 1.372 (1.364) data 0.000 (0.004) loss 1.5674 (1.0473) acc 62.5000 (73.5326) lr 1.7713e-05 eta 0:17:30 +epoch [49/50] batch [235/500] time 1.338 (1.363) data 0.000 (0.004) loss 1.2412 (1.0480) acc 62.5000 (73.4574) lr 1.7713e-05 eta 0:17:23 +epoch [49/50] batch [240/500] time 1.361 (1.363) data 0.000 (0.004) loss 1.1006 (1.0450) acc 68.7500 (73.4896) lr 1.7713e-05 eta 0:17:16 +epoch [49/50] batch [245/500] time 1.358 (1.363) data 0.000 (0.004) loss 0.8506 (1.0437) acc 71.8750 (73.4694) lr 1.7713e-05 eta 0:17:09 +epoch [49/50] batch [250/500] time 1.361 (1.363) data 0.000 (0.004) loss 0.6274 (1.0439) acc 90.6250 (73.5625) lr 1.7713e-05 eta 0:17:02 +epoch [49/50] batch [255/500] time 1.352 (1.363) data 0.000 (0.003) loss 1.1641 (1.0460) acc 68.7500 (73.5539) lr 1.7713e-05 eta 0:16:55 +epoch [49/50] batch [260/500] time 1.340 (1.363) data 0.000 (0.003) loss 0.9131 (1.0439) acc 78.1250 (73.6058) lr 1.7713e-05 eta 0:16:48 +epoch [49/50] batch [265/500] time 1.349 (1.363) data 0.000 (0.003) loss 0.7129 (1.0394) acc 81.2500 (73.7146) lr 1.7713e-05 eta 0:16:42 +epoch [49/50] batch [270/500] time 1.366 (1.363) data 0.000 (0.003) loss 1.0273 (1.0372) acc 71.8750 (73.6690) lr 1.7713e-05 eta 0:16:35 +epoch [49/50] batch [275/500] time 1.347 (1.363) data 0.000 (0.003) loss 0.8403 (1.0373) acc 78.1250 (73.6136) lr 1.7713e-05 eta 0:16:28 +epoch [49/50] batch [280/500] time 1.352 (1.363) data 0.000 (0.003) loss 1.0059 (1.0393) acc 68.7500 (73.5379) lr 1.7713e-05 eta 0:16:21 +epoch [49/50] batch [285/500] time 1.355 (1.363) data 0.000 (0.003) loss 0.7886 (1.0410) acc 75.0000 (73.4320) lr 1.7713e-05 eta 0:16:14 +epoch [49/50] batch [290/500] time 1.357 (1.363) data 0.000 (0.003) loss 0.7881 (1.0376) acc 75.0000 (73.5237) lr 1.7713e-05 eta 0:16:07 +epoch [49/50] batch [295/500] time 1.353 (1.363) data 0.000 (0.003) loss 1.6719 (1.0418) acc 71.8750 (73.4746) lr 1.7713e-05 eta 0:16:00 +epoch [49/50] batch [300/500] time 1.362 (1.363) data 0.001 (0.003) loss 0.6938 (1.0408) acc 84.3750 (73.4479) lr 1.7713e-05 eta 0:15:53 +epoch [49/50] batch [305/500] time 1.362 (1.363) data 0.000 (0.003) loss 0.8916 (1.0366) acc 71.8750 (73.5758) lr 1.7713e-05 eta 0:15:47 +epoch [49/50] batch [310/500] time 1.339 (1.362) data 0.000 (0.003) loss 1.7578 (1.0416) acc 56.2500 (73.5081) lr 1.7713e-05 eta 0:15:40 +epoch [49/50] batch [315/500] time 1.377 (1.362) data 0.000 (0.003) loss 0.8989 (1.0393) acc 81.2500 (73.5516) lr 1.7713e-05 eta 0:15:33 +epoch [49/50] batch [320/500] time 1.373 (1.363) data 0.000 (0.003) loss 1.1826 (1.0404) acc 78.1250 (73.5938) lr 1.7713e-05 eta 0:15:26 +epoch [49/50] batch [325/500] time 1.369 (1.363) data 0.000 (0.003) loss 1.1670 (1.0390) acc 78.1250 (73.6058) lr 1.7713e-05 eta 0:15:19 +epoch [49/50] batch [330/500] time 1.365 (1.363) data 0.000 (0.003) loss 0.8535 (1.0406) acc 75.0000 (73.6080) lr 1.7713e-05 eta 0:15:12 +epoch [49/50] batch [335/500] time 1.349 (1.363) data 0.000 (0.003) loss 0.7090 (1.0392) acc 75.0000 (73.6194) lr 1.7713e-05 eta 0:15:06 +epoch [49/50] batch [340/500] time 1.376 (1.363) data 0.000 (0.003) loss 0.9849 (1.0375) acc 75.0000 (73.6305) lr 1.7713e-05 eta 0:14:59 +epoch [49/50] batch [345/500] time 1.345 (1.362) data 0.000 (0.003) loss 1.0996 (1.0408) acc 68.7500 (73.5779) lr 1.7713e-05 eta 0:14:52 +epoch [49/50] batch [350/500] time 1.390 (1.362) data 0.000 (0.003) loss 1.4141 (1.0406) acc 62.5000 (73.5357) lr 1.7713e-05 eta 0:14:45 +epoch [49/50] batch [355/500] time 1.371 (1.362) data 0.001 (0.003) loss 0.6973 (1.0385) acc 81.2500 (73.5739) lr 1.7713e-05 eta 0:14:38 +epoch [49/50] batch [360/500] time 1.352 (1.363) data 0.000 (0.003) loss 0.5562 (1.0381) acc 84.3750 (73.5503) lr 1.7713e-05 eta 0:14:32 +epoch [49/50] batch [365/500] time 1.357 (1.363) data 0.000 (0.003) loss 0.7002 (1.0385) acc 87.5000 (73.5274) lr 1.7713e-05 eta 0:14:25 +epoch [49/50] batch [370/500] time 1.354 (1.363) data 0.000 (0.002) loss 0.9170 (1.0380) acc 75.0000 (73.5220) lr 1.7713e-05 eta 0:14:18 +epoch [49/50] batch [375/500] time 1.353 (1.362) data 0.001 (0.002) loss 0.7227 (1.0381) acc 78.1250 (73.5417) lr 1.7713e-05 eta 0:14:11 +epoch [49/50] batch [380/500] time 1.371 (1.363) data 0.000 (0.002) loss 0.6914 (1.0395) acc 81.2500 (73.4786) lr 1.7713e-05 eta 0:14:04 +epoch [49/50] batch [385/500] time 1.347 (1.363) data 0.000 (0.002) loss 0.8091 (1.0386) acc 81.2500 (73.4821) lr 1.7713e-05 eta 0:13:58 +epoch [49/50] batch [390/500] time 1.374 (1.363) data 0.000 (0.002) loss 1.3262 (1.0411) acc 65.6250 (73.4455) lr 1.7713e-05 eta 0:13:51 +epoch [49/50] batch [395/500] time 1.365 (1.363) data 0.000 (0.002) loss 0.8652 (1.0406) acc 75.0000 (73.5206) lr 1.7713e-05 eta 0:13:44 +epoch [49/50] batch [400/500] time 1.482 (1.363) data 0.000 (0.002) loss 0.6543 (1.0377) acc 84.3750 (73.5781) lr 1.7713e-05 eta 0:13:37 +epoch [49/50] batch [405/500] time 1.346 (1.363) data 0.000 (0.002) loss 0.7109 (1.0369) acc 71.8750 (73.5185) lr 1.7713e-05 eta 0:13:30 +epoch [49/50] batch [410/500] time 1.347 (1.363) data 0.000 (0.002) loss 0.6304 (1.0373) acc 81.2500 (73.5137) lr 1.7713e-05 eta 0:13:24 +epoch [49/50] batch [415/500] time 1.359 (1.363) data 0.000 (0.002) loss 0.6196 (1.0382) acc 78.1250 (73.5166) lr 1.7713e-05 eta 0:13:17 +epoch [49/50] batch [420/500] time 1.360 (1.363) data 0.000 (0.002) loss 1.3291 (1.0382) acc 75.0000 (73.5342) lr 1.7713e-05 eta 0:13:10 +epoch [49/50] batch [425/500] time 1.375 (1.363) data 0.000 (0.002) loss 0.9810 (1.0383) acc 75.0000 (73.4926) lr 1.7713e-05 eta 0:13:03 +epoch [49/50] batch [430/500] time 1.366 (1.363) data 0.000 (0.002) loss 1.1279 (1.0374) acc 71.8750 (73.5102) lr 1.7713e-05 eta 0:12:56 +epoch [49/50] batch [435/500] time 1.357 (1.363) data 0.000 (0.002) loss 0.7144 (1.0376) acc 78.1250 (73.5057) lr 1.7713e-05 eta 0:12:50 +epoch [49/50] batch [440/500] time 1.352 (1.363) data 0.000 (0.002) loss 1.6680 (1.0408) acc 59.3750 (73.4588) lr 1.7713e-05 eta 0:12:43 +epoch [49/50] batch [445/500] time 1.353 (1.363) data 0.000 (0.002) loss 1.2715 (1.0431) acc 65.6250 (73.4270) lr 1.7713e-05 eta 0:12:36 +epoch [49/50] batch [450/500] time 1.370 (1.363) data 0.000 (0.002) loss 1.0869 (1.0430) acc 78.1250 (73.4444) lr 1.7713e-05 eta 0:12:29 +epoch [49/50] batch [455/500] time 1.366 (1.363) data 0.000 (0.002) loss 0.8638 (1.0437) acc 75.0000 (73.4478) lr 1.7713e-05 eta 0:12:22 +epoch [49/50] batch [460/500] time 1.337 (1.363) data 0.000 (0.002) loss 1.2949 (1.0455) acc 71.8750 (73.4307) lr 1.7713e-05 eta 0:12:15 +epoch [49/50] batch [465/500] time 1.338 (1.362) data 0.000 (0.002) loss 0.9565 (1.0464) acc 75.0000 (73.4207) lr 1.7713e-05 eta 0:12:08 +epoch [49/50] batch [470/500] time 1.366 (1.362) data 0.000 (0.002) loss 1.0312 (1.0462) acc 81.2500 (73.4309) lr 1.7713e-05 eta 0:12:02 +epoch [49/50] batch [475/500] time 1.362 (1.362) data 0.000 (0.002) loss 1.4688 (1.0468) acc 71.8750 (73.4145) lr 1.7713e-05 eta 0:11:55 +epoch [49/50] batch [480/500] time 1.355 (1.362) data 0.000 (0.002) loss 0.5869 (1.0464) acc 84.3750 (73.4440) lr 1.7713e-05 eta 0:11:48 +epoch [49/50] batch [485/500] time 1.364 (1.362) data 0.001 (0.002) loss 1.5449 (1.0469) acc 65.6250 (73.4536) lr 1.7713e-05 eta 0:11:41 +epoch [49/50] batch [490/500] time 1.366 (1.362) data 0.000 (0.002) loss 0.8013 (1.0471) acc 78.1250 (73.4247) lr 1.7713e-05 eta 0:11:34 +epoch [49/50] batch [495/500] time 1.382 (1.362) data 0.000 (0.002) loss 1.4189 (1.0477) acc 68.7500 (73.4407) lr 1.7713e-05 eta 0:11:28 +epoch [49/50] batch [500/500] time 1.364 (1.363) data 0.000 (0.002) loss 0.4385 (1.0453) acc 81.2500 (73.4625) lr 7.8853e-06 eta 0:11:21 +epoch [50/50] batch [5/500] time 1.382 (1.533) data 0.000 (0.166) loss 1.0312 (0.8382) acc 75.0000 (77.5000) lr 7.8853e-06 eta 0:12:39 +epoch [50/50] batch [10/500] time 1.346 (1.443) data 0.000 (0.083) loss 0.8794 (1.0037) acc 81.2500 (72.1875) lr 7.8853e-06 eta 0:11:46 +epoch [50/50] batch [15/500] time 1.354 (1.412) data 0.000 (0.056) loss 1.3516 (0.9821) acc 65.6250 (72.9167) lr 7.8853e-06 eta 0:11:24 +epoch [50/50] batch [20/500] time 1.342 (1.398) data 0.000 (0.042) loss 0.7046 (1.0124) acc 71.8750 (72.3438) lr 7.8853e-06 eta 0:11:11 +epoch [50/50] batch [25/500] time 1.345 (1.390) data 0.000 (0.034) loss 0.9751 (1.0036) acc 71.8750 (71.7500) lr 7.8853e-06 eta 0:11:00 +epoch [50/50] batch [30/500] time 1.383 (1.386) data 0.000 (0.028) loss 1.4023 (1.0186) acc 68.7500 (72.0833) lr 7.8853e-06 eta 0:10:51 +epoch [50/50] batch [35/500] time 1.348 (1.382) data 0.000 (0.024) loss 1.7012 (1.0178) acc 65.6250 (72.5893) lr 7.8853e-06 eta 0:10:42 +epoch [50/50] batch [40/500] time 1.342 (1.378) data 0.000 (0.021) loss 1.4336 (1.0197) acc 68.7500 (72.8125) lr 7.8853e-06 eta 0:10:33 +epoch [50/50] batch [45/500] time 1.356 (1.377) data 0.000 (0.019) loss 0.8169 (1.0425) acc 84.3750 (72.6389) lr 7.8853e-06 eta 0:10:26 +epoch [50/50] batch [50/500] time 1.343 (1.375) data 0.000 (0.017) loss 0.6675 (1.0107) acc 93.7500 (73.8125) lr 7.8853e-06 eta 0:10:18 +epoch [50/50] batch [55/500] time 1.373 (1.375) data 0.000 (0.015) loss 0.7480 (1.0058) acc 81.2500 (74.0341) lr 7.8853e-06 eta 0:10:11 +epoch [50/50] batch [60/500] time 1.369 (1.377) data 0.000 (0.014) loss 0.8838 (0.9989) acc 78.1250 (74.4271) lr 7.8853e-06 eta 0:10:05 +epoch [50/50] batch [65/500] time 1.357 (1.375) data 0.000 (0.013) loss 1.0020 (1.0090) acc 81.2500 (74.2308) lr 7.8853e-06 eta 0:09:58 +epoch [50/50] batch [70/500] time 1.345 (1.374) data 0.000 (0.012) loss 1.3975 (1.0163) acc 71.8750 (74.1964) lr 7.8853e-06 eta 0:09:50 +epoch [50/50] batch [75/500] time 1.342 (1.373) data 0.000 (0.011) loss 1.3643 (1.0172) acc 59.3750 (73.8750) lr 7.8853e-06 eta 0:09:43 +epoch [50/50] batch [80/500] time 1.347 (1.372) data 0.000 (0.011) loss 1.3770 (1.0219) acc 68.7500 (73.7109) lr 7.8853e-06 eta 0:09:36 +epoch [50/50] batch [85/500] time 1.373 (1.371) data 0.000 (0.010) loss 1.2051 (1.0297) acc 65.6250 (73.5294) lr 7.8853e-06 eta 0:09:29 +epoch [50/50] batch [90/500] time 1.356 (1.370) data 0.000 (0.010) loss 1.2246 (1.0403) acc 68.7500 (73.4722) lr 7.8853e-06 eta 0:09:21 +epoch [50/50] batch [95/500] time 1.371 (1.370) data 0.000 (0.009) loss 1.4697 (1.0535) acc 56.2500 (73.0921) lr 7.8853e-06 eta 0:09:14 +epoch [50/50] batch [100/500] time 1.373 (1.370) data 0.000 (0.009) loss 0.9453 (1.0608) acc 68.7500 (72.7500) lr 7.8853e-06 eta 0:09:07 +epoch [50/50] batch [105/500] time 1.353 (1.371) data 0.000 (0.008) loss 1.2617 (1.0680) acc 65.6250 (72.6488) lr 7.8853e-06 eta 0:09:01 +epoch [50/50] batch [110/500] time 1.342 (1.370) data 0.000 (0.008) loss 1.5879 (1.0613) acc 68.7500 (72.8693) lr 7.8853e-06 eta 0:08:54 +epoch [50/50] batch [115/500] time 1.350 (1.370) data 0.000 (0.008) loss 0.7173 (1.0630) acc 81.2500 (73.0707) lr 7.8853e-06 eta 0:08:47 +epoch [50/50] batch [120/500] time 1.345 (1.369) data 0.000 (0.007) loss 0.8218 (1.0542) acc 75.0000 (73.0469) lr 7.8853e-06 eta 0:08:40 +epoch [50/50] batch [125/500] time 1.365 (1.369) data 0.000 (0.007) loss 0.8896 (1.0536) acc 71.8750 (73.0250) lr 7.8853e-06 eta 0:08:33 +epoch [50/50] batch [130/500] time 1.369 (1.369) data 0.000 (0.007) loss 0.7524 (1.0476) acc 75.0000 (73.1010) lr 7.8853e-06 eta 0:08:26 +epoch [50/50] batch [135/500] time 1.360 (1.368) data 0.000 (0.006) loss 1.2197 (1.0472) acc 59.3750 (73.1481) lr 7.8853e-06 eta 0:08:19 +epoch [50/50] batch [140/500] time 1.363 (1.368) data 0.000 (0.006) loss 0.7334 (1.0417) acc 75.0000 (73.3259) lr 7.8853e-06 eta 0:08:12 +epoch [50/50] batch [145/500] time 1.348 (1.368) data 0.000 (0.006) loss 0.7153 (1.0384) acc 84.3750 (73.4483) lr 7.8853e-06 eta 0:08:05 +epoch [50/50] batch [150/500] time 1.393 (1.368) data 0.000 (0.006) loss 1.1064 (1.0406) acc 68.7500 (73.4583) lr 7.8853e-06 eta 0:07:58 +epoch [50/50] batch [155/500] time 1.370 (1.368) data 0.000 (0.006) loss 0.8516 (1.0465) acc 78.1250 (73.3468) lr 7.8853e-06 eta 0:07:52 +epoch [50/50] batch [160/500] time 1.355 (1.368) data 0.000 (0.006) loss 0.7715 (1.0471) acc 75.0000 (73.3594) lr 7.8853e-06 eta 0:07:45 +epoch [50/50] batch [165/500] time 1.347 (1.368) data 0.000 (0.005) loss 0.4839 (1.0450) acc 87.5000 (73.4659) lr 7.8853e-06 eta 0:07:38 +epoch [50/50] batch [170/500] time 1.365 (1.368) data 0.001 (0.005) loss 1.1631 (1.0412) acc 75.0000 (73.5662) lr 7.8853e-06 eta 0:07:31 +epoch [50/50] batch [175/500] time 1.362 (1.367) data 0.000 (0.005) loss 0.6245 (1.0380) acc 71.8750 (73.5357) lr 7.8853e-06 eta 0:07:24 +epoch [50/50] batch [180/500] time 1.349 (1.367) data 0.000 (0.005) loss 0.6211 (1.0297) acc 84.3750 (73.7153) lr 7.8853e-06 eta 0:07:17 +epoch [50/50] batch [185/500] time 1.361 (1.367) data 0.000 (0.005) loss 1.2949 (1.0344) acc 68.7500 (73.5980) lr 7.8853e-06 eta 0:07:10 +epoch [50/50] batch [190/500] time 1.371 (1.366) data 0.000 (0.005) loss 0.6489 (1.0338) acc 84.3750 (73.5526) lr 7.8853e-06 eta 0:07:03 +epoch [50/50] batch [195/500] time 1.359 (1.367) data 0.000 (0.005) loss 1.3213 (1.0368) acc 71.8750 (73.5417) lr 7.8853e-06 eta 0:06:56 +epoch [50/50] batch [200/500] time 1.368 (1.367) data 0.000 (0.004) loss 1.2061 (1.0434) acc 75.0000 (73.2656) lr 7.8853e-06 eta 0:06:50 +epoch [50/50] batch [205/500] time 1.386 (1.368) data 0.000 (0.004) loss 1.1338 (1.0435) acc 81.2500 (73.2927) lr 7.8853e-06 eta 0:06:43 +epoch [50/50] batch [210/500] time 1.342 (1.367) data 0.000 (0.004) loss 1.0293 (1.0429) acc 78.1250 (73.2887) lr 7.8853e-06 eta 0:06:36 +epoch [50/50] batch [215/500] time 1.336 (1.367) data 0.000 (0.004) loss 0.5586 (1.0398) acc 84.3750 (73.3721) lr 7.8853e-06 eta 0:06:29 +epoch [50/50] batch [220/500] time 1.364 (1.367) data 0.000 (0.004) loss 0.9473 (1.0380) acc 75.0000 (73.4233) lr 7.8853e-06 eta 0:06:22 +epoch [50/50] batch [225/500] time 1.381 (1.367) data 0.000 (0.004) loss 1.4551 (1.0358) acc 59.3750 (73.4306) lr 7.8853e-06 eta 0:06:15 +epoch [50/50] batch [230/500] time 1.341 (1.366) data 0.000 (0.004) loss 0.8125 (1.0353) acc 84.3750 (73.4375) lr 7.8853e-06 eta 0:06:08 +epoch [50/50] batch [235/500] time 1.354 (1.366) data 0.000 (0.004) loss 0.5781 (1.0379) acc 87.5000 (73.4574) lr 7.8853e-06 eta 0:06:02 +epoch [50/50] batch [240/500] time 1.364 (1.366) data 0.000 (0.004) loss 0.7192 (1.0366) acc 81.2500 (73.5026) lr 7.8853e-06 eta 0:05:55 +epoch [50/50] batch [245/500] time 1.348 (1.366) data 0.000 (0.004) loss 1.0508 (1.0361) acc 68.7500 (73.4949) lr 7.8853e-06 eta 0:05:48 +epoch [50/50] batch [250/500] time 1.391 (1.366) data 0.000 (0.004) loss 0.6147 (1.0377) acc 81.2500 (73.4500) lr 7.8853e-06 eta 0:05:41 +epoch [50/50] batch [255/500] time 1.351 (1.366) data 0.000 (0.004) loss 1.0508 (1.0374) acc 71.8750 (73.4559) lr 7.8853e-06 eta 0:05:34 +epoch [50/50] batch [260/500] time 1.355 (1.366) data 0.000 (0.004) loss 1.0215 (1.0384) acc 62.5000 (73.3894) lr 7.8853e-06 eta 0:05:27 +epoch [50/50] batch [265/500] time 1.362 (1.366) data 0.000 (0.003) loss 1.2666 (1.0370) acc 78.1250 (73.4316) lr 7.8853e-06 eta 0:05:21 +epoch [50/50] batch [270/500] time 1.359 (1.366) data 0.000 (0.003) loss 0.6768 (1.0379) acc 90.6250 (73.4722) lr 7.8853e-06 eta 0:05:14 +epoch [50/50] batch [275/500] time 1.354 (1.366) data 0.000 (0.003) loss 1.0977 (1.0372) acc 78.1250 (73.5568) lr 7.8853e-06 eta 0:05:07 +epoch [50/50] batch [280/500] time 1.387 (1.366) data 0.000 (0.003) loss 0.8521 (1.0356) acc 75.0000 (73.5826) lr 7.8853e-06 eta 0:05:00 +epoch [50/50] batch [285/500] time 1.368 (1.366) data 0.000 (0.003) loss 0.8364 (1.0380) acc 68.7500 (73.4320) lr 7.8853e-06 eta 0:04:53 +epoch [50/50] batch [290/500] time 1.361 (1.366) data 0.000 (0.003) loss 0.8350 (1.0380) acc 78.1250 (73.3944) lr 7.8853e-06 eta 0:04:46 +epoch [50/50] batch [295/500] time 1.333 (1.366) data 0.000 (0.003) loss 1.3760 (1.0395) acc 65.6250 (73.3792) lr 7.8853e-06 eta 0:04:40 +epoch [50/50] batch [300/500] time 1.368 (1.366) data 0.000 (0.003) loss 0.6738 (1.0351) acc 87.5000 (73.4688) lr 7.8853e-06 eta 0:04:33 +epoch [50/50] batch [305/500] time 1.348 (1.366) data 0.000 (0.003) loss 1.0322 (1.0370) acc 75.0000 (73.5041) lr 7.8853e-06 eta 0:04:26 +epoch [50/50] batch [310/500] time 1.355 (1.366) data 0.000 (0.003) loss 1.0186 (1.0387) acc 71.8750 (73.4677) lr 7.8853e-06 eta 0:04:19 +epoch [50/50] batch [315/500] time 1.345 (1.366) data 0.000 (0.003) loss 1.6377 (1.0389) acc 65.6250 (73.4722) lr 7.8853e-06 eta 0:04:12 +epoch [50/50] batch [320/500] time 1.365 (1.366) data 0.000 (0.003) loss 0.7979 (1.0388) acc 84.3750 (73.5254) lr 7.8853e-06 eta 0:04:05 +epoch [50/50] batch [325/500] time 1.347 (1.366) data 0.000 (0.003) loss 1.8965 (1.0439) acc 62.5000 (73.4615) lr 7.8853e-06 eta 0:03:58 +epoch [50/50] batch [330/500] time 1.363 (1.365) data 0.000 (0.003) loss 0.6958 (1.0464) acc 84.3750 (73.4375) lr 7.8853e-06 eta 0:03:52 +epoch [50/50] batch [335/500] time 1.341 (1.365) data 0.000 (0.003) loss 0.3496 (1.0450) acc 87.5000 (73.5168) lr 7.8853e-06 eta 0:03:45 +epoch [50/50] batch [340/500] time 1.387 (1.365) data 0.000 (0.003) loss 0.7085 (1.0465) acc 84.3750 (73.4651) lr 7.8853e-06 eta 0:03:38 +epoch [50/50] batch [345/500] time 1.489 (1.366) data 0.000 (0.003) loss 1.0693 (1.0477) acc 71.8750 (73.3786) lr 7.8853e-06 eta 0:03:31 +epoch [50/50] batch [350/500] time 1.361 (1.365) data 0.000 (0.003) loss 0.7822 (1.0465) acc 75.0000 (73.3929) lr 7.8853e-06 eta 0:03:24 +epoch [50/50] batch [355/500] time 1.364 (1.365) data 0.000 (0.003) loss 0.9878 (1.0472) acc 78.1250 (73.4155) lr 7.8853e-06 eta 0:03:17 +epoch [50/50] batch [360/500] time 1.361 (1.365) data 0.000 (0.003) loss 1.0059 (1.0464) acc 78.1250 (73.4549) lr 7.8853e-06 eta 0:03:11 +epoch [50/50] batch [365/500] time 1.366 (1.365) data 0.000 (0.003) loss 1.5508 (1.0467) acc 62.5000 (73.4161) lr 7.8853e-06 eta 0:03:04 +epoch [50/50] batch [370/500] time 1.357 (1.365) data 0.000 (0.003) loss 1.0508 (1.0432) acc 65.6250 (73.4459) lr 7.8853e-06 eta 0:02:57 +epoch [50/50] batch [375/500] time 1.362 (1.365) data 0.000 (0.003) loss 2.0977 (1.0444) acc 59.3750 (73.4667) lr 7.8853e-06 eta 0:02:50 +epoch [50/50] batch [380/500] time 1.362 (1.365) data 0.000 (0.003) loss 1.5547 (1.0453) acc 68.7500 (73.4868) lr 7.8853e-06 eta 0:02:43 +epoch [50/50] batch [385/500] time 1.352 (1.365) data 0.000 (0.003) loss 0.6372 (1.0470) acc 78.1250 (73.4740) lr 7.8853e-06 eta 0:02:36 +epoch [50/50] batch [390/500] time 1.356 (1.365) data 0.000 (0.002) loss 0.4758 (1.0456) acc 84.3750 (73.4856) lr 7.8853e-06 eta 0:02:30 +epoch [50/50] batch [395/500] time 1.344 (1.365) data 0.000 (0.002) loss 0.8574 (1.0458) acc 78.1250 (73.4731) lr 7.8853e-06 eta 0:02:23 +epoch [50/50] batch [400/500] time 1.344 (1.365) data 0.000 (0.002) loss 1.2051 (1.0461) acc 68.7500 (73.5000) lr 7.8853e-06 eta 0:02:16 +epoch [50/50] batch [405/500] time 1.351 (1.365) data 0.000 (0.002) loss 1.4756 (1.0458) acc 62.5000 (73.5417) lr 7.8853e-06 eta 0:02:09 +epoch [50/50] batch [410/500] time 1.374 (1.365) data 0.000 (0.002) loss 0.6509 (1.0433) acc 71.8750 (73.6052) lr 7.8853e-06 eta 0:02:02 +epoch [50/50] batch [415/500] time 1.360 (1.365) data 0.000 (0.002) loss 1.1953 (1.0426) acc 78.1250 (73.6596) lr 7.8853e-06 eta 0:01:55 +epoch [50/50] batch [420/500] time 1.368 (1.365) data 0.000 (0.002) loss 0.5513 (1.0415) acc 81.2500 (73.6830) lr 7.8853e-06 eta 0:01:49 +epoch [50/50] batch [425/500] time 1.352 (1.365) data 0.000 (0.002) loss 0.3328 (1.0389) acc 90.6250 (73.7794) lr 7.8853e-06 eta 0:01:42 +epoch [50/50] batch [430/500] time 1.359 (1.365) data 0.000 (0.002) loss 1.3740 (1.0399) acc 71.8750 (73.7791) lr 7.8853e-06 eta 0:01:35 +epoch [50/50] batch [435/500] time 1.347 (1.364) data 0.000 (0.002) loss 1.2969 (1.0393) acc 59.3750 (73.7859) lr 7.8853e-06 eta 0:01:28 +epoch [50/50] batch [440/500] time 1.358 (1.364) data 0.000 (0.002) loss 0.6968 (1.0403) acc 75.0000 (73.7642) lr 7.8853e-06 eta 0:01:21 +epoch [50/50] batch [445/500] time 1.347 (1.364) data 0.000 (0.002) loss 1.2129 (1.0389) acc 71.8750 (73.7992) lr 7.8853e-06 eta 0:01:15 +epoch [50/50] batch [450/500] time 1.355 (1.364) data 0.000 (0.002) loss 1.3301 (1.0414) acc 68.7500 (73.7778) lr 7.8853e-06 eta 0:01:08 +epoch [50/50] batch [455/500] time 1.357 (1.364) data 0.000 (0.002) loss 0.7915 (1.0410) acc 78.1250 (73.8049) lr 7.8853e-06 eta 0:01:01 +epoch [50/50] batch [460/500] time 1.351 (1.364) data 0.000 (0.002) loss 1.1152 (1.0409) acc 68.7500 (73.7704) lr 7.8853e-06 eta 0:00:54 +epoch [50/50] batch [465/500] time 1.373 (1.364) data 0.000 (0.002) loss 0.6621 (1.0388) acc 81.2500 (73.8105) lr 7.8853e-06 eta 0:00:47 +epoch [50/50] batch [470/500] time 1.341 (1.364) data 0.000 (0.002) loss 1.6699 (1.0416) acc 68.7500 (73.7500) lr 7.8853e-06 eta 0:00:40 +epoch [50/50] batch [475/500] time 1.369 (1.364) data 0.000 (0.002) loss 1.1982 (1.0413) acc 71.8750 (73.7697) lr 7.8853e-06 eta 0:00:34 +epoch [50/50] batch [480/500] time 1.378 (1.364) data 0.000 (0.002) loss 1.0557 (1.0426) acc 81.2500 (73.7826) lr 7.8853e-06 eta 0:00:27 +epoch [50/50] batch [485/500] time 1.377 (1.364) data 0.001 (0.002) loss 0.7363 (1.0414) acc 75.0000 (73.8338) lr 7.8853e-06 eta 0:00:20 +epoch [50/50] batch [490/500] time 1.371 (1.364) data 0.000 (0.002) loss 0.8311 (1.0422) acc 75.0000 (73.8138) lr 7.8853e-06 eta 0:00:13 +epoch [50/50] batch [495/500] time 1.369 (1.364) data 0.000 (0.002) loss 0.9443 (1.0414) acc 84.3750 (73.8699) lr 7.8853e-06 eta 0:00:06 +epoch [50/50] batch [500/500] time 1.372 (1.364) data 0.000 (0.002) loss 0.8145 (1.0416) acc 84.3750 (73.8500) lr 1.9733e-06 eta 0:00:00 +Checkpoint saved to output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 +Finish training +Deploy the last-epoch model +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 38,940 +* accuracy: 77.9% +* error: 22.1% +* macro_f1: 77.4% +Elapsed: 9:33:52 diff --git a/python/ClipDetection/CoOp/saved_outputs/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint b/python/ClipDetection/CoOp/saved_outputs/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint new file mode 100644 index 00000000..a9d493d3 --- /dev/null +++ b/python/ClipDetection/CoOp/saved_outputs/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/checkpoint @@ -0,0 +1 @@ +model.pth.tar-50 diff --git a/python/ClipDetection/CoOp/saved_outputs/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 b/python/ClipDetection/CoOp/saved_outputs/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 new file mode 100644 index 00000000..4a2eaf00 Binary files /dev/null and b/python/ClipDetection/CoOp/saved_outputs/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/prompt_learner/model.pth.tar-50 differ diff --git a/python/ClipDetection/CoOp/saved_outputs/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1698431659.ckb-gpu-lambda.380665.0 b/python/ClipDetection/CoOp/saved_outputs/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1698431659.ckb-gpu-lambda.380665.0 new file mode 100644 index 00000000..1af5c2aa Binary files /dev/null and b/python/ClipDetection/CoOp/saved_outputs/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed1/tensorboard/events.out.tfevents.1698431659.ckb-gpu-lambda.380665.0 differ diff --git a/python/ClipDetection/CoOp/saved_outputs/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/log.txt b/python/ClipDetection/CoOp/saved_outputs/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/log.txt new file mode 100644 index 00000000..23ed3056 --- /dev/null +++ b/python/ClipDetection/CoOp/saved_outputs/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/log.txt @@ -0,0 +1,2029 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/CoOp/vit_l14_ep50.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: False +head: +load_epoch: None +model_dir: +no_train: False +opts: ['TRAINER.COOP.N_CTX', '16', 'TRAINER.COOP.CSC', 'False', 'TRAINER.COOP.CLASS_TOKEN_POSITION', 'end', 'DATASET.NUM_SHOTS', '16'] +output_dir: output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2 +resume: +root: /ckb-nfs/home/zcafego/ +seed: 2 +source_domains: None +target_domains: None +trainer: CoOp +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: 16 + ROOT: /ckb-nfs/home/zcafego/ + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-L/14 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.002 + LR_SCHEDULER: cosine + MAX_EPOCH: 50 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: () + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 5 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: CoOp + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 2.1.0 +Is debug build: False +CUDA used to build PyTorch: 11.8 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 20.04.6 LTS (x86_64) +GCC version: (Ubuntu 8.4.0-3ubuntu2) 8.4.0 +Clang version: 10.0.0-4ubuntu1 +CMake version: version 3.23.2 +Libc version: glibc-2.31 + +Python version: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-5.4.0-113-generic-x86_64-with-glibc2.17 +Is CUDA available: True +CUDA runtime version: Could not collect +CUDA_MODULE_LOADING set to: LAZY +GPU models and configuration: +GPU 0: Tesla V100-SXM2-32GB +GPU 1: Tesla V100-SXM2-32GB +GPU 2: Tesla V100-SXM2-32GB +GPU 3: Tesla V100-SXM2-32GB + +Nvidia driver version: 510.73.05 +cuDNN version: Probably one of the following: +/usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 +/usr/lib/x86_64-linux-gnu/libcudnn.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.4.1 +/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.4.1 +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +Address sizes: 46 bits physical, 48 bits virtual +CPU(s): 64 +On-line CPU(s) list: 0-63 +Thread(s) per core: 2 +Core(s) per socket: 16 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +CPU family: 6 +Model: 85 +Model name: Intel(R) Xeon(R) Gold 6242 CPU @ 2.80GHz +Stepping: 7 +CPU MHz: 1200.106 +CPU max MHz: 3900.0000 +CPU min MHz: 1200.0000 +BogoMIPS: 5600.00 +Virtualization: VT-x +L1d cache: 1 MiB +L1i cache: 1 MiB +L2 cache: 32 MiB +L3 cache: 44 MiB +NUMA node0 CPU(s): 0-15,32-47 +NUMA node1 CPU(s): 16-31,48-63 +Vulnerability Itlb multihit: KVM: Mitigation: Split huge pages +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Mitigation; TSX disabled +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cdp_l3 invpcid_single intel_ppin ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm mpx rdt_a avx512f avx512dq rdseed adx smap clflushopt clwb intel_pt avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts pku ospke avx512_vnni md_clear flush_l1d arch_capabilities + +Versions of relevant libraries: +[pip3] flake8==3.7.9 +[pip3] numpy==1.24.3 +[pip3] torch==2.1.0 +[pip3] torchvision==0.8.2 +[pip3] triton==2.1.0 +[pip3] tritonclient==2.33.0 +[conda] blas 1.0 mkl +[conda] cudatoolkit 11.8.0 h6a678d5_0 +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] libjpeg-turbo 2.0.0 h9bf148f_0 pytorch +[conda] mkl 2023.1.0 h213fc3f_46343 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.8 py38h5eee18b_0 +[conda] mkl_random 1.2.4 py38hdb19cb5_0 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] pytorch 2.1.0 py3.8_cuda11.8_cudnn8.7.0_0 pytorch +[conda] pytorch-cuda 11.8 h7e8668a_5 pytorch +[conda] pytorch-mutex 1.0 cuda pytorch +[conda] torch 2.1.0 pypi_0 pypi +[conda] torchtriton 2.1.0 py38 pytorch +[conda] torchvision 0.16.0 py38_cu118 pytorch +[conda] triton 2.1.0 pypi_0 pypi + Pillow (10.0.1) + +Loading trainer: CoOp +Loading dataset: ImageNet +Creating a 16-shot dataset +Saving preprocessed few-shot data to /ckb-nfs/home/zcafego/imagenet/split_fewshot/shot_16-seed_2.pkl +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 1,000 +# train_x 16,000 +# val 50,000 +# test 50,000 +--------- -------- +Loading CLIP (backbone: ViT-L/14) +Building custom CLIP +Initializing a generic context +Initial context: "X X X X X X X X X X X X X X X X" +Number of context words (tokens): 16 +Turning off gradients in both the image and the text encoder +Multiple GPUs detected (n_gpus=2), use all of them! +Loading evaluator: Classification +No checkpoint found, train from scratch +Initialize tensorboard (log_dir=output/imagenet/CoOp/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/tensorboard) +epoch [1/50] batch [5/500] time 1.353 (2.380) data 0.000 (0.337) loss 2.6680 (3.0004) acc 46.8750 (43.1250) lr 1.0000e-05 eta 16:31:27 +epoch [1/50] batch [10/500] time 1.367 (1.874) data 0.000 (0.169) loss 2.1602 (2.6768) acc 53.1250 (46.8750) lr 1.0000e-05 eta 13:00:24 +epoch [1/50] batch [15/500] time 1.348 (1.701) data 0.000 (0.113) loss 2.3926 (2.6326) acc 53.1250 (47.0833) lr 1.0000e-05 eta 11:48:27 +epoch [1/50] batch [20/500] time 1.366 (1.617) data 0.000 (0.085) loss 2.6055 (2.5407) acc 40.6250 (48.1250) lr 1.0000e-05 eta 11:13:22 +epoch [1/50] batch [25/500] time 1.369 (1.567) data 0.000 (0.068) loss 2.2500 (2.4505) acc 46.8750 (49.3750) lr 1.0000e-05 eta 10:52:18 +epoch [1/50] batch [30/500] time 1.358 (1.532) data 0.000 (0.056) loss 1.9404 (2.4199) acc 56.2500 (49.6875) lr 1.0000e-05 eta 10:37:40 +epoch [1/50] batch [35/500] time 1.362 (1.508) data 0.000 (0.048) loss 1.7510 (2.3313) acc 56.2500 (51.0714) lr 1.0000e-05 eta 10:27:25 +epoch [1/50] batch [40/500] time 1.365 (1.490) data 0.000 (0.042) loss 2.2148 (2.2764) acc 50.0000 (51.7188) lr 1.0000e-05 eta 10:19:54 +epoch [1/50] batch [45/500] time 1.361 (1.476) data 0.000 (0.038) loss 1.5869 (2.2606) acc 59.3750 (52.1528) lr 1.0000e-05 eta 10:13:52 +epoch [1/50] batch [50/500] time 1.354 (1.464) data 0.001 (0.034) loss 1.3623 (2.2209) acc 71.8750 (53.0000) lr 1.0000e-05 eta 10:08:47 +epoch [1/50] batch [55/500] time 1.376 (1.455) data 0.000 (0.031) loss 2.5352 (2.1800) acc 40.6250 (53.6932) lr 1.0000e-05 eta 10:05:03 +epoch [1/50] batch [60/500] time 1.364 (1.447) data 0.000 (0.028) loss 1.6846 (2.1436) acc 59.3750 (54.3750) lr 1.0000e-05 eta 10:01:32 +epoch [1/50] batch [65/500] time 1.365 (1.441) data 0.000 (0.026) loss 1.3691 (2.1060) acc 68.7500 (55.0962) lr 1.0000e-05 eta 9:58:45 +epoch [1/50] batch [70/500] time 1.363 (1.435) data 0.000 (0.024) loss 1.3213 (2.0912) acc 68.7500 (55.4464) lr 1.0000e-05 eta 9:56:22 +epoch [1/50] batch [75/500] time 1.371 (1.430) data 0.000 (0.023) loss 1.7529 (2.0628) acc 56.2500 (55.6667) lr 1.0000e-05 eta 9:54:10 +epoch [1/50] batch [80/500] time 1.375 (1.426) data 0.000 (0.021) loss 1.4932 (2.0367) acc 68.7500 (56.3281) lr 1.0000e-05 eta 9:52:14 +epoch [1/50] batch [85/500] time 1.364 (1.422) data 0.001 (0.020) loss 2.0371 (2.0133) acc 40.6250 (56.4706) lr 1.0000e-05 eta 9:50:25 +epoch [1/50] batch [90/500] time 1.364 (1.419) data 0.000 (0.019) loss 1.4316 (2.0048) acc 59.3750 (56.3194) lr 1.0000e-05 eta 9:49:02 +epoch [1/50] batch [95/500] time 1.370 (1.416) data 0.000 (0.018) loss 0.9624 (1.9725) acc 75.0000 (56.8092) lr 1.0000e-05 eta 9:47:41 +epoch [1/50] batch [100/500] time 1.384 (1.414) data 0.000 (0.017) loss 1.8203 (1.9715) acc 59.3750 (56.9688) lr 1.0000e-05 eta 9:46:44 +epoch [1/50] batch [105/500] time 1.364 (1.411) data 0.000 (0.016) loss 1.8516 (1.9651) acc 59.3750 (57.3214) lr 1.0000e-05 eta 9:45:35 +epoch [1/50] batch [110/500] time 1.361 (1.410) data 0.000 (0.016) loss 1.3242 (1.9543) acc 75.0000 (57.5000) lr 1.0000e-05 eta 9:44:42 +epoch [1/50] batch [115/500] time 1.373 (1.408) data 0.000 (0.015) loss 2.3379 (1.9418) acc 56.2500 (57.6902) lr 1.0000e-05 eta 9:43:45 +epoch [1/50] batch [120/500] time 1.380 (1.406) data 0.000 (0.014) loss 2.0215 (1.9298) acc 46.8750 (57.9688) lr 1.0000e-05 eta 9:42:59 +epoch [1/50] batch [125/500] time 1.351 (1.404) data 0.000 (0.014) loss 1.2373 (1.9107) acc 71.8750 (58.3500) lr 1.0000e-05 eta 9:42:15 +epoch [1/50] batch [130/500] time 1.364 (1.404) data 0.000 (0.013) loss 1.9961 (1.9038) acc 59.3750 (58.5096) lr 1.0000e-05 eta 9:42:08 +epoch [1/50] batch [135/500] time 1.373 (1.403) data 0.000 (0.013) loss 1.7256 (1.8903) acc 56.2500 (58.7269) lr 1.0000e-05 eta 9:41:27 +epoch [1/50] batch [140/500] time 1.394 (1.402) data 0.000 (0.012) loss 2.3574 (1.8775) acc 53.1250 (58.8839) lr 1.0000e-05 eta 9:40:59 +epoch [1/50] batch [145/500] time 1.377 (1.401) data 0.000 (0.012) loss 1.8555 (1.8715) acc 56.2500 (58.8362) lr 1.0000e-05 eta 9:40:27 +epoch [1/50] batch [150/500] time 1.366 (1.400) data 0.000 (0.012) loss 1.9990 (1.8628) acc 53.1250 (59.1458) lr 1.0000e-05 eta 9:39:54 +epoch [1/50] batch [155/500] time 1.363 (1.399) data 0.000 (0.011) loss 2.0996 (1.8558) acc 59.3750 (59.3347) lr 1.0000e-05 eta 9:39:21 +epoch [1/50] batch [160/500] time 1.370 (1.398) data 0.000 (0.011) loss 1.4707 (1.8541) acc 56.2500 (59.1797) lr 1.0000e-05 eta 9:38:51 +epoch [1/50] batch [165/500] time 1.350 (1.397) data 0.000 (0.011) loss 1.7148 (1.8492) acc 53.1250 (59.1856) lr 1.0000e-05 eta 9:38:21 +epoch [1/50] batch [170/500] time 1.380 (1.396) data 0.000 (0.010) loss 1.7578 (1.8375) acc 62.5000 (59.3566) lr 1.0000e-05 eta 9:37:53 +epoch [1/50] batch [175/500] time 1.369 (1.395) data 0.000 (0.010) loss 2.0742 (1.8386) acc 59.3750 (59.4107) lr 1.0000e-05 eta 9:37:20 +epoch [1/50] batch [180/500] time 1.369 (1.395) data 0.000 (0.010) loss 0.9971 (1.8301) acc 75.0000 (59.5486) lr 1.0000e-05 eta 9:36:52 +epoch [1/50] batch [185/500] time 1.367 (1.394) data 0.000 (0.009) loss 1.1113 (1.8254) acc 65.6250 (59.6284) lr 1.0000e-05 eta 9:36:24 +epoch [1/50] batch [190/500] time 1.377 (1.393) data 0.000 (0.009) loss 1.1191 (1.8157) acc 71.8750 (59.7862) lr 1.0000e-05 eta 9:35:56 +epoch [1/50] batch [195/500] time 1.354 (1.392) data 0.000 (0.009) loss 2.0312 (1.8152) acc 62.5000 (59.8397) lr 1.0000e-05 eta 9:35:31 +epoch [1/50] batch [200/500] time 1.365 (1.392) data 0.000 (0.009) loss 1.7812 (1.8054) acc 65.6250 (60.0156) lr 1.0000e-05 eta 9:35:09 +epoch [1/50] batch [205/500] time 1.339 (1.391) data 0.000 (0.009) loss 1.7959 (1.7951) acc 62.5000 (60.1067) lr 1.0000e-05 eta 9:34:42 +epoch [1/50] batch [210/500] time 1.360 (1.390) data 0.000 (0.008) loss 1.6230 (1.7956) acc 62.5000 (60.0744) lr 1.0000e-05 eta 9:34:17 +epoch [1/50] batch [215/500] time 1.352 (1.389) data 0.000 (0.008) loss 1.2314 (1.7844) acc 65.6250 (60.2907) lr 1.0000e-05 eta 9:33:55 +epoch [1/50] batch [220/500] time 1.392 (1.389) data 0.000 (0.008) loss 2.3105 (1.7787) acc 56.2500 (60.4545) lr 1.0000e-05 eta 9:33:42 +epoch [1/50] batch [225/500] time 1.374 (1.389) data 0.000 (0.008) loss 1.5068 (1.7711) acc 65.6250 (60.5694) lr 1.0000e-05 eta 9:33:27 +epoch [1/50] batch [230/500] time 1.362 (1.389) data 0.000 (0.008) loss 1.1777 (1.7646) acc 75.0000 (60.7473) lr 1.0000e-05 eta 9:33:16 +epoch [1/50] batch [235/500] time 1.365 (1.388) data 0.000 (0.008) loss 1.4883 (1.7569) acc 62.5000 (60.8245) lr 1.0000e-05 eta 9:32:53 +epoch [1/50] batch [240/500] time 1.381 (1.388) data 0.000 (0.007) loss 1.3818 (1.7569) acc 71.8750 (60.9245) lr 1.0000e-05 eta 9:32:38 +epoch [1/50] batch [245/500] time 1.351 (1.387) data 0.000 (0.007) loss 1.0498 (1.7544) acc 75.0000 (60.9566) lr 1.0000e-05 eta 9:32:20 +epoch [1/50] batch [250/500] time 1.389 (1.387) data 0.000 (0.007) loss 1.2861 (1.7467) acc 59.3750 (61.0375) lr 1.0000e-05 eta 9:32:06 +epoch [1/50] batch [255/500] time 1.377 (1.387) data 0.000 (0.007) loss 1.6689 (1.7407) acc 68.7500 (61.1275) lr 1.0000e-05 eta 9:31:51 +epoch [1/50] batch [260/500] time 1.372 (1.386) data 0.000 (0.007) loss 1.8477 (1.7376) acc 68.7500 (61.2620) lr 1.0000e-05 eta 9:31:31 +epoch [1/50] batch [265/500] time 1.349 (1.386) data 0.000 (0.007) loss 2.2305 (1.7313) acc 46.8750 (61.4033) lr 1.0000e-05 eta 9:31:13 +epoch [1/50] batch [270/500] time 1.387 (1.385) data 0.000 (0.007) loss 1.3428 (1.7195) acc 71.8750 (61.6551) lr 1.0000e-05 eta 9:30:58 +epoch [1/50] batch [275/500] time 1.343 (1.385) data 0.000 (0.006) loss 1.4639 (1.7159) acc 68.7500 (61.6932) lr 1.0000e-05 eta 9:30:55 +epoch [1/50] batch [280/500] time 1.375 (1.385) data 0.000 (0.006) loss 1.2197 (1.7089) acc 65.6250 (61.8304) lr 1.0000e-05 eta 9:30:37 +epoch [1/50] batch [285/500] time 1.374 (1.385) data 0.000 (0.006) loss 1.1084 (1.7031) acc 71.8750 (61.9737) lr 1.0000e-05 eta 9:30:22 +epoch [1/50] batch [290/500] time 1.384 (1.384) data 0.000 (0.006) loss 1.0654 (1.6992) acc 68.7500 (62.0043) lr 1.0000e-05 eta 9:30:07 +epoch [1/50] batch [295/500] time 1.356 (1.384) data 0.000 (0.006) loss 1.1455 (1.6939) acc 65.6250 (62.0763) lr 1.0000e-05 eta 9:29:50 +epoch [1/50] batch [300/500] time 1.377 (1.384) data 0.000 (0.006) loss 1.6162 (1.6871) acc 56.2500 (62.0625) lr 1.0000e-05 eta 9:29:37 +epoch [1/50] batch [305/500] time 1.371 (1.383) data 0.000 (0.006) loss 1.3760 (1.6856) acc 71.8750 (62.0902) lr 1.0000e-05 eta 9:29:23 +epoch [1/50] batch [310/500] time 1.335 (1.383) data 0.000 (0.006) loss 0.8809 (1.6797) acc 78.1250 (62.2177) lr 1.0000e-05 eta 9:29:08 +epoch [1/50] batch [315/500] time 1.336 (1.383) data 0.000 (0.006) loss 1.0000 (1.6795) acc 65.6250 (62.2123) lr 1.0000e-05 eta 9:28:51 +epoch [1/50] batch [320/500] time 1.359 (1.382) data 0.000 (0.006) loss 1.0859 (1.6703) acc 65.6250 (62.3633) lr 1.0000e-05 eta 9:28:31 +epoch [1/50] batch [325/500] time 1.346 (1.382) data 0.000 (0.006) loss 1.5293 (1.6671) acc 65.6250 (62.4327) lr 1.0000e-05 eta 9:28:13 +epoch [1/50] batch [330/500] time 1.351 (1.381) data 0.001 (0.005) loss 1.6631 (1.6666) acc 59.3750 (62.4053) lr 1.0000e-05 eta 9:27:58 +epoch [1/50] batch [335/500] time 1.365 (1.381) data 0.000 (0.005) loss 1.8867 (1.6611) acc 65.6250 (62.4720) lr 1.0000e-05 eta 9:27:43 +epoch [1/50] batch [340/500] time 1.366 (1.381) data 0.000 (0.005) loss 0.6108 (1.6590) acc 87.5000 (62.5551) lr 1.0000e-05 eta 9:27:27 +epoch [1/50] batch [345/500] time 1.355 (1.381) data 0.000 (0.005) loss 1.0752 (1.6564) acc 71.8750 (62.5996) lr 1.0000e-05 eta 9:27:16 +epoch [1/50] batch [350/500] time 1.361 (1.380) data 0.000 (0.005) loss 1.5283 (1.6548) acc 65.6250 (62.6250) lr 1.0000e-05 eta 9:27:00 +epoch [1/50] batch [355/500] time 1.354 (1.380) data 0.000 (0.005) loss 1.1992 (1.6517) acc 71.8750 (62.6673) lr 1.0000e-05 eta 9:26:46 +epoch [1/50] batch [360/500] time 1.363 (1.380) data 0.000 (0.005) loss 1.1680 (1.6477) acc 71.8750 (62.7083) lr 1.0000e-05 eta 9:26:35 +epoch [1/50] batch [365/500] time 1.364 (1.379) data 0.000 (0.005) loss 1.4873 (1.6436) acc 59.3750 (62.7140) lr 1.0000e-05 eta 9:26:23 +epoch [1/50] batch [370/500] time 1.375 (1.379) data 0.000 (0.005) loss 1.4668 (1.6414) acc 59.3750 (62.7703) lr 1.0000e-05 eta 9:26:12 +epoch [1/50] batch [375/500] time 1.368 (1.379) data 0.000 (0.005) loss 2.1230 (1.6374) acc 53.1250 (62.8000) lr 1.0000e-05 eta 9:26:08 +epoch [1/50] batch [380/500] time 1.373 (1.379) data 0.000 (0.005) loss 1.3418 (1.6347) acc 75.0000 (62.8125) lr 1.0000e-05 eta 9:26:00 +epoch [1/50] batch [385/500] time 1.384 (1.379) data 0.000 (0.005) loss 1.7363 (1.6326) acc 62.5000 (62.8571) lr 1.0000e-05 eta 9:25:50 +epoch [1/50] batch [390/500] time 1.373 (1.379) data 0.000 (0.005) loss 1.1152 (1.6298) acc 71.8750 (62.9808) lr 1.0000e-05 eta 9:25:39 +epoch [1/50] batch [395/500] time 1.360 (1.379) data 0.000 (0.005) loss 0.9424 (1.6234) acc 71.8750 (63.1013) lr 1.0000e-05 eta 9:25:30 +epoch [1/50] batch [400/500] time 1.356 (1.379) data 0.000 (0.005) loss 1.6758 (1.6199) acc 59.3750 (63.2188) lr 1.0000e-05 eta 9:25:19 +epoch [1/50] batch [405/500] time 1.363 (1.379) data 0.000 (0.005) loss 0.9937 (1.6166) acc 59.3750 (63.2330) lr 1.0000e-05 eta 9:25:08 +epoch [1/50] batch [410/500] time 1.357 (1.379) data 0.000 (0.004) loss 1.7041 (1.6145) acc 62.5000 (63.2470) lr 1.0000e-05 eta 9:24:57 +epoch [1/50] batch [415/500] time 1.506 (1.379) data 0.000 (0.004) loss 1.7549 (1.6136) acc 62.5000 (63.3133) lr 1.0000e-05 eta 9:24:56 +epoch [1/50] batch [420/500] time 1.345 (1.379) data 0.000 (0.004) loss 1.8330 (1.6125) acc 53.1250 (63.3185) lr 1.0000e-05 eta 9:24:44 +epoch [1/50] batch [425/500] time 1.364 (1.378) data 0.000 (0.004) loss 1.9277 (1.6128) acc 62.5000 (63.3235) lr 1.0000e-05 eta 9:24:32 +epoch [1/50] batch [430/500] time 1.367 (1.378) data 0.001 (0.004) loss 1.2998 (1.6113) acc 75.0000 (63.3648) lr 1.0000e-05 eta 9:24:20 +epoch [1/50] batch [435/500] time 1.364 (1.378) data 0.000 (0.004) loss 1.1543 (1.6075) acc 71.8750 (63.4483) lr 1.0000e-05 eta 9:24:09 +epoch [1/50] batch [440/500] time 1.351 (1.378) data 0.000 (0.004) loss 1.1318 (1.6046) acc 71.8750 (63.4943) lr 1.0000e-05 eta 9:23:54 +epoch [1/50] batch [445/500] time 1.355 (1.377) data 0.000 (0.004) loss 1.1436 (1.6021) acc 75.0000 (63.5604) lr 1.0000e-05 eta 9:23:38 +epoch [1/50] batch [450/500] time 1.369 (1.377) data 0.000 (0.004) loss 2.3691 (1.6010) acc 53.1250 (63.5556) lr 1.0000e-05 eta 9:23:26 +epoch [1/50] batch [455/500] time 1.366 (1.377) data 0.000 (0.004) loss 1.4902 (1.5982) acc 56.2500 (63.6195) lr 1.0000e-05 eta 9:23:16 +epoch [1/50] batch [460/500] time 1.350 (1.377) data 0.000 (0.004) loss 1.8545 (1.5993) acc 56.2500 (63.6209) lr 1.0000e-05 eta 9:23:04 +epoch [1/50] batch [465/500] time 1.362 (1.377) data 0.001 (0.004) loss 0.8286 (1.5967) acc 75.0000 (63.6492) lr 1.0000e-05 eta 9:22:53 +epoch [1/50] batch [470/500] time 1.374 (1.376) data 0.000 (0.004) loss 1.4180 (1.5934) acc 71.8750 (63.6968) lr 1.0000e-05 eta 9:22:43 +epoch [1/50] batch [475/500] time 1.363 (1.376) data 0.000 (0.004) loss 1.5449 (1.5898) acc 71.8750 (63.7763) lr 1.0000e-05 eta 9:22:35 +epoch [1/50] batch [480/500] time 1.362 (1.376) data 0.000 (0.004) loss 2.2305 (1.5881) acc 56.2500 (63.8021) lr 1.0000e-05 eta 9:22:26 +epoch [1/50] batch [485/500] time 1.365 (1.376) data 0.001 (0.004) loss 1.3672 (1.5830) acc 78.1250 (63.8982) lr 1.0000e-05 eta 9:22:16 +epoch [1/50] batch [490/500] time 1.382 (1.376) data 0.000 (0.004) loss 1.2090 (1.5812) acc 68.7500 (63.9158) lr 1.0000e-05 eta 9:22:08 +epoch [1/50] batch [495/500] time 1.355 (1.376) data 0.000 (0.004) loss 1.5117 (1.5812) acc 65.6250 (63.8699) lr 1.0000e-05 eta 9:21:59 +epoch [1/50] batch [500/500] time 1.392 (1.376) data 0.000 (0.004) loss 1.5713 (1.5795) acc 65.6250 (63.8937) lr 2.0000e-03 eta 9:21:53 +epoch [2/50] batch [5/500] time 1.360 (1.556) data 0.000 (0.185) loss 1.4053 (1.6859) acc 65.6250 (61.8750) lr 2.0000e-03 eta 10:35:07 +epoch [2/50] batch [10/500] time 1.561 (1.477) data 0.001 (0.093) loss 1.4248 (1.5491) acc 56.2500 (62.8125) lr 2.0000e-03 eta 10:02:44 +epoch [2/50] batch [15/500] time 1.355 (1.440) data 0.000 (0.062) loss 1.7383 (1.5878) acc 62.5000 (63.5417) lr 2.0000e-03 eta 9:47:41 +epoch [2/50] batch [20/500] time 1.395 (1.424) data 0.000 (0.047) loss 1.6543 (1.4983) acc 68.7500 (65.1562) lr 2.0000e-03 eta 9:40:55 +epoch [2/50] batch [25/500] time 1.358 (1.411) data 0.000 (0.038) loss 0.9419 (1.4554) acc 75.0000 (66.0000) lr 2.0000e-03 eta 9:35:36 +epoch [2/50] batch [30/500] time 1.369 (1.403) data 0.001 (0.031) loss 2.0371 (1.4713) acc 59.3750 (65.1042) lr 2.0000e-03 eta 9:32:03 +epoch [2/50] batch [35/500] time 1.382 (1.396) data 0.000 (0.027) loss 1.6855 (1.4893) acc 59.3750 (64.3750) lr 2.0000e-03 eta 9:29:12 +epoch [2/50] batch [40/500] time 1.364 (1.392) data 0.000 (0.024) loss 1.1738 (1.4632) acc 81.2500 (65.6250) lr 2.0000e-03 eta 9:27:18 +epoch [2/50] batch [45/500] time 1.353 (1.388) data 0.000 (0.021) loss 1.1260 (1.4604) acc 65.6250 (65.4167) lr 2.0000e-03 eta 9:25:49 +epoch [2/50] batch [50/500] time 1.354 (1.385) data 0.000 (0.019) loss 1.3105 (1.4309) acc 62.5000 (65.6250) lr 2.0000e-03 eta 9:24:29 +epoch [2/50] batch [55/500] time 1.349 (1.383) data 0.000 (0.017) loss 1.7734 (1.4297) acc 59.3750 (65.0568) lr 2.0000e-03 eta 9:23:17 +epoch [2/50] batch [60/500] time 1.362 (1.381) data 0.000 (0.016) loss 1.1660 (1.4269) acc 65.6250 (65.4167) lr 2.0000e-03 eta 9:22:29 +epoch [2/50] batch [65/500] time 1.385 (1.379) data 0.001 (0.015) loss 1.0068 (1.4051) acc 84.3750 (66.0577) lr 2.0000e-03 eta 9:21:42 +epoch [2/50] batch [70/500] time 1.502 (1.380) data 0.000 (0.014) loss 1.5020 (1.3881) acc 62.5000 (66.3393) lr 2.0000e-03 eta 9:21:43 +epoch [2/50] batch [75/500] time 1.361 (1.378) data 0.001 (0.013) loss 0.9395 (1.3524) acc 75.0000 (67.0833) lr 2.0000e-03 eta 9:21:01 +epoch [2/50] batch [80/500] time 1.364 (1.377) data 0.000 (0.012) loss 1.7109 (1.3711) acc 56.2500 (66.6016) lr 2.0000e-03 eta 9:20:22 +epoch [2/50] batch [85/500] time 1.358 (1.376) data 0.000 (0.011) loss 0.9800 (1.3569) acc 78.1250 (66.9118) lr 2.0000e-03 eta 9:19:59 +epoch [2/50] batch [90/500] time 1.373 (1.375) data 0.001 (0.011) loss 0.5405 (1.3499) acc 87.5000 (66.7361) lr 2.0000e-03 eta 9:19:32 +epoch [2/50] batch [95/500] time 1.364 (1.375) data 0.000 (0.010) loss 0.7271 (1.3471) acc 75.0000 (66.8092) lr 2.0000e-03 eta 9:19:13 +epoch [2/50] batch [100/500] time 1.364 (1.375) data 0.001 (0.010) loss 0.6807 (1.3421) acc 90.6250 (67.2188) lr 2.0000e-03 eta 9:18:58 +epoch [2/50] batch [105/500] time 1.381 (1.374) data 0.000 (0.009) loss 1.6592 (1.3438) acc 62.5000 (67.2917) lr 2.0000e-03 eta 9:18:44 +epoch [2/50] batch [110/500] time 1.353 (1.374) data 0.000 (0.009) loss 1.3711 (1.3541) acc 65.6250 (67.0170) lr 2.0000e-03 eta 9:18:30 +epoch [2/50] batch [115/500] time 1.362 (1.375) data 0.000 (0.008) loss 1.1729 (1.3478) acc 59.3750 (67.0109) lr 2.0000e-03 eta 9:18:44 +epoch [2/50] batch [120/500] time 1.371 (1.374) data 0.000 (0.008) loss 0.9062 (1.3445) acc 68.7500 (67.0312) lr 2.0000e-03 eta 9:18:27 +epoch [2/50] batch [125/500] time 1.352 (1.374) data 0.000 (0.008) loss 1.0703 (1.3382) acc 78.1250 (67.1500) lr 2.0000e-03 eta 9:18:08 +epoch [2/50] batch [130/500] time 1.371 (1.374) data 0.000 (0.008) loss 1.4053 (1.3354) acc 65.6250 (67.1154) lr 2.0000e-03 eta 9:17:55 +epoch [2/50] batch [135/500] time 1.365 (1.373) data 0.000 (0.007) loss 0.8887 (1.3302) acc 78.1250 (67.1991) lr 2.0000e-03 eta 9:17:42 +epoch [2/50] batch [140/500] time 1.343 (1.373) data 0.000 (0.007) loss 1.1309 (1.3248) acc 68.7500 (67.3438) lr 2.0000e-03 eta 9:17:31 +epoch [2/50] batch [145/500] time 1.339 (1.373) data 0.000 (0.007) loss 1.1562 (1.3213) acc 65.6250 (67.4353) lr 2.0000e-03 eta 9:17:10 +epoch [2/50] batch [150/500] time 1.349 (1.372) data 0.000 (0.007) loss 1.1416 (1.3113) acc 65.6250 (67.5833) lr 2.0000e-03 eta 9:16:46 +epoch [2/50] batch [155/500] time 1.364 (1.371) data 0.001 (0.006) loss 0.8706 (1.3001) acc 81.2500 (67.8427) lr 2.0000e-03 eta 9:16:29 +epoch [2/50] batch [160/500] time 1.357 (1.372) data 0.000 (0.006) loss 1.3311 (1.3089) acc 71.8750 (67.7344) lr 2.0000e-03 eta 9:16:23 +epoch [2/50] batch [165/500] time 1.377 (1.371) data 0.000 (0.006) loss 1.1055 (1.3172) acc 65.6250 (67.5000) lr 2.0000e-03 eta 9:16:11 +epoch [2/50] batch [170/500] time 1.366 (1.371) data 0.000 (0.006) loss 1.0479 (1.3148) acc 65.6250 (67.5184) lr 2.0000e-03 eta 9:15:59 +epoch [2/50] batch [175/500] time 1.374 (1.371) data 0.001 (0.006) loss 1.7627 (1.3206) acc 62.5000 (67.4821) lr 2.0000e-03 eta 9:15:48 +epoch [2/50] batch [180/500] time 1.366 (1.370) data 0.001 (0.006) loss 1.5010 (1.3118) acc 62.5000 (67.6389) lr 2.0000e-03 eta 9:15:29 +epoch [2/50] batch [185/500] time 1.372 (1.370) data 0.000 (0.005) loss 1.1572 (1.3052) acc 71.8750 (67.7872) lr 2.0000e-03 eta 9:15:17 +epoch [2/50] batch [190/500] time 1.356 (1.370) data 0.000 (0.005) loss 0.8174 (1.3018) acc 68.7500 (67.7138) lr 2.0000e-03 eta 9:15:01 +epoch [2/50] batch [195/500] time 1.379 (1.370) data 0.000 (0.005) loss 1.3809 (1.3046) acc 65.6250 (67.7083) lr 2.0000e-03 eta 9:14:57 +epoch [2/50] batch [200/500] time 1.360 (1.370) data 0.000 (0.005) loss 1.5869 (1.2994) acc 65.6250 (67.7969) lr 2.0000e-03 eta 9:14:51 +epoch [2/50] batch [205/500] time 1.367 (1.370) data 0.000 (0.005) loss 0.9736 (1.2938) acc 81.2500 (68.0335) lr 2.0000e-03 eta 9:14:41 +epoch [2/50] batch [210/500] time 1.344 (1.370) data 0.000 (0.005) loss 1.0078 (1.2903) acc 71.8750 (68.0952) lr 2.0000e-03 eta 9:14:28 +epoch [2/50] batch [215/500] time 1.347 (1.370) data 0.000 (0.005) loss 1.6670 (1.2873) acc 62.5000 (68.1831) lr 2.0000e-03 eta 9:14:33 +epoch [2/50] batch [220/500] time 1.362 (1.370) data 0.000 (0.005) loss 1.1201 (1.2873) acc 68.7500 (68.0824) lr 2.0000e-03 eta 9:14:16 +epoch [2/50] batch [225/500] time 1.385 (1.370) data 0.000 (0.005) loss 1.3184 (1.2855) acc 71.8750 (68.1389) lr 2.0000e-03 eta 9:14:12 +epoch [2/50] batch [230/500] time 1.381 (1.370) data 0.000 (0.004) loss 1.7441 (1.2913) acc 68.7500 (68.0842) lr 2.0000e-03 eta 9:14:05 +epoch [2/50] batch [235/500] time 1.362 (1.370) data 0.000 (0.004) loss 0.8525 (1.2852) acc 71.8750 (68.1117) lr 2.0000e-03 eta 9:13:57 +epoch [2/50] batch [240/500] time 1.375 (1.370) data 0.000 (0.004) loss 1.4434 (1.2792) acc 59.3750 (68.2552) lr 2.0000e-03 eta 9:13:45 +epoch [2/50] batch [245/500] time 1.365 (1.369) data 0.000 (0.004) loss 1.0586 (1.2764) acc 71.8750 (68.2781) lr 2.0000e-03 eta 9:13:32 +epoch [2/50] batch [250/500] time 1.364 (1.369) data 0.000 (0.004) loss 1.8262 (1.2789) acc 53.1250 (68.2375) lr 2.0000e-03 eta 9:13:18 +epoch [2/50] batch [255/500] time 1.377 (1.369) data 0.000 (0.004) loss 1.9043 (1.2774) acc 53.1250 (68.2966) lr 2.0000e-03 eta 9:13:10 +epoch [2/50] batch [260/500] time 1.363 (1.370) data 0.000 (0.004) loss 1.5215 (1.2784) acc 53.1250 (68.2091) lr 2.0000e-03 eta 9:13:16 +epoch [2/50] batch [265/500] time 1.357 (1.369) data 0.000 (0.004) loss 1.2070 (1.2766) acc 68.7500 (68.2547) lr 2.0000e-03 eta 9:13:09 +epoch [2/50] batch [270/500] time 1.357 (1.369) data 0.000 (0.004) loss 1.2393 (1.2794) acc 62.5000 (68.2176) lr 2.0000e-03 eta 9:12:58 +epoch [2/50] batch [275/500] time 1.362 (1.369) data 0.000 (0.004) loss 0.8125 (1.2759) acc 62.5000 (68.1932) lr 2.0000e-03 eta 9:12:48 +epoch [2/50] batch [280/500] time 1.365 (1.369) data 0.000 (0.004) loss 1.8984 (1.2758) acc 62.5000 (68.2589) lr 2.0000e-03 eta 9:12:34 +epoch [2/50] batch [285/500] time 1.361 (1.369) data 0.001 (0.004) loss 1.3506 (1.2761) acc 62.5000 (68.2456) lr 2.0000e-03 eta 9:12:27 +epoch [2/50] batch [290/500] time 1.370 (1.369) data 0.000 (0.004) loss 1.3281 (1.2729) acc 75.0000 (68.3082) lr 2.0000e-03 eta 9:12:22 +epoch [2/50] batch [295/500] time 1.375 (1.369) data 0.000 (0.004) loss 0.7417 (1.2700) acc 87.5000 (68.4110) lr 2.0000e-03 eta 9:12:14 +epoch [2/50] batch [300/500] time 1.365 (1.369) data 0.000 (0.003) loss 1.4355 (1.2736) acc 68.7500 (68.3333) lr 2.0000e-03 eta 9:12:07 +epoch [2/50] batch [305/500] time 1.364 (1.369) data 0.000 (0.003) loss 1.1602 (1.2732) acc 75.0000 (68.4119) lr 2.0000e-03 eta 9:11:55 +epoch [2/50] batch [310/500] time 1.357 (1.369) data 0.000 (0.003) loss 1.5479 (1.2742) acc 65.6250 (68.4173) lr 2.0000e-03 eta 9:11:49 +epoch [2/50] batch [315/500] time 1.342 (1.368) data 0.000 (0.003) loss 1.1592 (1.2742) acc 68.7500 (68.4127) lr 2.0000e-03 eta 9:11:36 +epoch [2/50] batch [320/500] time 1.374 (1.368) data 0.000 (0.003) loss 0.7983 (1.2733) acc 81.2500 (68.4570) lr 2.0000e-03 eta 9:11:27 +epoch [2/50] batch [325/500] time 1.364 (1.368) data 0.000 (0.003) loss 1.3174 (1.2735) acc 65.6250 (68.4423) lr 2.0000e-03 eta 9:11:14 +epoch [2/50] batch [330/500] time 1.390 (1.368) data 0.002 (0.003) loss 1.8877 (1.2728) acc 53.1250 (68.4754) lr 2.0000e-03 eta 9:11:07 +epoch [2/50] batch [335/500] time 1.352 (1.368) data 0.000 (0.003) loss 1.1611 (1.2708) acc 78.1250 (68.5448) lr 2.0000e-03 eta 9:10:59 +epoch [2/50] batch [340/500] time 1.342 (1.368) data 0.001 (0.003) loss 1.2100 (1.2705) acc 71.8750 (68.5386) lr 2.0000e-03 eta 9:10:48 +epoch [2/50] batch [345/500] time 1.361 (1.368) data 0.001 (0.003) loss 0.5332 (1.2664) acc 81.2500 (68.6413) lr 2.0000e-03 eta 9:10:40 +epoch [2/50] batch [350/500] time 1.370 (1.368) data 0.000 (0.003) loss 0.6992 (1.2625) acc 78.1250 (68.7143) lr 2.0000e-03 eta 9:10:31 +epoch [2/50] batch [355/500] time 1.360 (1.368) data 0.000 (0.003) loss 1.2363 (1.2622) acc 65.6250 (68.7324) lr 2.0000e-03 eta 9:10:22 +epoch [2/50] batch [360/500] time 1.361 (1.368) data 0.000 (0.003) loss 1.4199 (1.2634) acc 65.6250 (68.7760) lr 2.0000e-03 eta 9:10:23 +epoch [2/50] batch [365/500] time 1.369 (1.368) data 0.001 (0.003) loss 0.7856 (1.2643) acc 71.8750 (68.7500) lr 2.0000e-03 eta 9:10:15 +epoch [2/50] batch [370/500] time 1.358 (1.368) data 0.000 (0.003) loss 1.2236 (1.2645) acc 71.8750 (68.8091) lr 2.0000e-03 eta 9:10:09 +epoch [2/50] batch [375/500] time 1.387 (1.368) data 0.000 (0.003) loss 1.3965 (1.2650) acc 68.7500 (68.8167) lr 2.0000e-03 eta 9:10:02 +epoch [2/50] batch [380/500] time 1.350 (1.368) data 0.000 (0.003) loss 1.2402 (1.2640) acc 71.8750 (68.8240) lr 2.0000e-03 eta 9:09:51 +epoch [2/50] batch [385/500] time 1.363 (1.368) data 0.000 (0.003) loss 1.5889 (1.2639) acc 68.7500 (68.8799) lr 2.0000e-03 eta 9:09:42 +epoch [2/50] batch [390/500] time 1.345 (1.368) data 0.000 (0.003) loss 1.3213 (1.2638) acc 71.8750 (68.9183) lr 2.0000e-03 eta 9:09:33 +epoch [2/50] batch [395/500] time 1.352 (1.367) data 0.001 (0.003) loss 1.2920 (1.2618) acc 71.8750 (68.9873) lr 2.0000e-03 eta 9:09:21 +epoch [2/50] batch [400/500] time 1.498 (1.368) data 0.000 (0.003) loss 1.4443 (1.2608) acc 68.7500 (69.0391) lr 2.0000e-03 eta 9:09:20 +epoch [2/50] batch [405/500] time 1.363 (1.368) data 0.000 (0.003) loss 1.0762 (1.2595) acc 78.1250 (69.0895) lr 2.0000e-03 eta 9:09:12 +epoch [2/50] batch [410/500] time 1.359 (1.368) data 0.000 (0.003) loss 1.6279 (1.2572) acc 68.7500 (69.1311) lr 2.0000e-03 eta 9:09:03 +epoch [2/50] batch [415/500] time 1.350 (1.367) data 0.000 (0.003) loss 1.1611 (1.2549) acc 75.0000 (69.1792) lr 2.0000e-03 eta 9:08:54 +epoch [2/50] batch [420/500] time 1.350 (1.367) data 0.000 (0.003) loss 1.9873 (1.2568) acc 53.1250 (69.1220) lr 2.0000e-03 eta 9:08:44 +epoch [2/50] batch [425/500] time 1.355 (1.367) data 0.000 (0.003) loss 1.9697 (1.2591) acc 62.5000 (69.1103) lr 2.0000e-03 eta 9:08:33 +epoch [2/50] batch [430/500] time 1.353 (1.367) data 0.000 (0.003) loss 1.1445 (1.2595) acc 68.7500 (69.1206) lr 2.0000e-03 eta 9:08:22 +epoch [2/50] batch [435/500] time 1.364 (1.367) data 0.000 (0.003) loss 1.5254 (1.2595) acc 56.2500 (69.1164) lr 2.0000e-03 eta 9:08:14 +epoch [2/50] batch [440/500] time 1.366 (1.367) data 0.000 (0.003) loss 0.6440 (1.2595) acc 81.2500 (69.0909) lr 2.0000e-03 eta 9:08:06 +epoch [2/50] batch [445/500] time 1.372 (1.367) data 0.000 (0.002) loss 1.9980 (1.2612) acc 62.5000 (69.0730) lr 2.0000e-03 eta 9:07:57 +epoch [2/50] batch [450/500] time 1.339 (1.367) data 0.000 (0.002) loss 1.9062 (1.2605) acc 62.5000 (69.1181) lr 2.0000e-03 eta 9:07:52 +epoch [2/50] batch [455/500] time 1.363 (1.367) data 0.001 (0.002) loss 0.9355 (1.2558) acc 78.1250 (69.1964) lr 2.0000e-03 eta 9:07:45 +epoch [2/50] batch [460/500] time 1.337 (1.367) data 0.001 (0.002) loss 1.4209 (1.2555) acc 71.8750 (69.2595) lr 2.0000e-03 eta 9:07:37 +epoch [2/50] batch [465/500] time 1.360 (1.367) data 0.000 (0.002) loss 1.2129 (1.2578) acc 71.8750 (69.2137) lr 2.0000e-03 eta 9:07:29 +epoch [2/50] batch [470/500] time 1.387 (1.367) data 0.001 (0.002) loss 1.1318 (1.2554) acc 71.8750 (69.2753) lr 2.0000e-03 eta 9:07:21 +epoch [2/50] batch [475/500] time 1.349 (1.367) data 0.001 (0.002) loss 1.4287 (1.2546) acc 62.5000 (69.2697) lr 2.0000e-03 eta 9:07:12 +epoch [2/50] batch [480/500] time 1.374 (1.367) data 0.000 (0.002) loss 1.0303 (1.2567) acc 75.0000 (69.2253) lr 2.0000e-03 eta 9:07:05 +epoch [2/50] batch [485/500] time 1.360 (1.367) data 0.001 (0.002) loss 0.8003 (1.2569) acc 78.1250 (69.2268) lr 2.0000e-03 eta 9:06:58 +epoch [2/50] batch [490/500] time 1.400 (1.367) data 0.000 (0.002) loss 1.7256 (1.2569) acc 75.0000 (69.2921) lr 2.0000e-03 eta 9:06:53 +epoch [2/50] batch [495/500] time 1.349 (1.367) data 0.000 (0.002) loss 0.8784 (1.2580) acc 75.0000 (69.2677) lr 2.0000e-03 eta 9:06:44 +epoch [2/50] batch [500/500] time 1.361 (1.367) data 0.000 (0.002) loss 1.3203 (1.2581) acc 56.2500 (69.2375) lr 1.9980e-03 eta 9:06:41 +epoch [3/50] batch [5/500] time 1.367 (1.526) data 0.000 (0.164) loss 1.3965 (1.3100) acc 75.0000 (66.8750) lr 1.9980e-03 eta 10:10:21 +epoch [3/50] batch [10/500] time 1.355 (1.445) data 0.000 (0.082) loss 1.2119 (1.2591) acc 71.8750 (67.5000) lr 1.9980e-03 eta 9:37:50 +epoch [3/50] batch [15/500] time 1.375 (1.420) data 0.000 (0.055) loss 0.8970 (1.2281) acc 75.0000 (68.1250) lr 1.9980e-03 eta 9:27:40 +epoch [3/50] batch [20/500] time 1.366 (1.406) data 0.001 (0.041) loss 1.4326 (1.2584) acc 65.6250 (68.1250) lr 1.9980e-03 eta 9:21:45 +epoch [3/50] batch [25/500] time 1.351 (1.397) data 0.000 (0.033) loss 1.1221 (1.2494) acc 78.1250 (67.8750) lr 1.9980e-03 eta 9:18:04 +epoch [3/50] batch [30/500] time 1.368 (1.391) data 0.000 (0.028) loss 1.2021 (1.2620) acc 65.6250 (67.0833) lr 1.9980e-03 eta 9:15:40 +epoch [3/50] batch [35/500] time 1.350 (1.387) data 0.000 (0.024) loss 0.9160 (1.2170) acc 84.3750 (68.3929) lr 1.9980e-03 eta 9:14:01 +epoch [3/50] batch [40/500] time 1.377 (1.384) data 0.000 (0.021) loss 1.1143 (1.1854) acc 68.7500 (69.2969) lr 1.9980e-03 eta 9:12:47 +epoch [3/50] batch [45/500] time 1.366 (1.383) data 0.000 (0.019) loss 1.3701 (1.1966) acc 65.6250 (69.7222) lr 1.9980e-03 eta 9:12:15 +epoch [3/50] batch [50/500] time 1.363 (1.382) data 0.000 (0.017) loss 0.9849 (1.2197) acc 78.1250 (69.5000) lr 1.9980e-03 eta 9:11:40 +epoch [3/50] batch [55/500] time 1.347 (1.380) data 0.000 (0.015) loss 1.2959 (1.2008) acc 71.8750 (70.2273) lr 1.9980e-03 eta 9:10:44 +epoch [3/50] batch [60/500] time 1.351 (1.381) data 0.000 (0.014) loss 1.1777 (1.2303) acc 71.8750 (69.7396) lr 1.9980e-03 eta 9:11:02 +epoch [3/50] batch [65/500] time 1.365 (1.380) data 0.000 (0.013) loss 1.2510 (1.2264) acc 71.8750 (69.9038) lr 1.9980e-03 eta 9:10:20 +epoch [3/50] batch [70/500] time 1.367 (1.378) data 0.000 (0.012) loss 0.8843 (1.2124) acc 78.1250 (70.0893) lr 1.9980e-03 eta 9:09:37 +epoch [3/50] batch [75/500] time 1.363 (1.377) data 0.000 (0.011) loss 1.5859 (1.2154) acc 50.0000 (69.5417) lr 1.9980e-03 eta 9:09:11 +epoch [3/50] batch [80/500] time 1.370 (1.377) data 0.000 (0.011) loss 1.3760 (1.2224) acc 71.8750 (69.4531) lr 1.9980e-03 eta 9:09:07 +epoch [3/50] batch [85/500] time 1.361 (1.376) data 0.000 (0.010) loss 0.7153 (1.2115) acc 81.2500 (69.7794) lr 1.9980e-03 eta 9:08:35 +epoch [3/50] batch [90/500] time 1.362 (1.376) data 0.000 (0.009) loss 1.1611 (1.2206) acc 75.0000 (69.5486) lr 1.9980e-03 eta 9:08:17 +epoch [3/50] batch [95/500] time 1.366 (1.375) data 0.000 (0.009) loss 1.3623 (1.2208) acc 59.3750 (69.4737) lr 1.9980e-03 eta 9:07:57 +epoch [3/50] batch [100/500] time 1.354 (1.375) data 0.000 (0.009) loss 1.4170 (1.2178) acc 65.6250 (69.4062) lr 1.9980e-03 eta 9:07:40 +epoch [3/50] batch [105/500] time 1.347 (1.375) data 0.000 (0.008) loss 1.1504 (1.2160) acc 75.0000 (69.3750) lr 1.9980e-03 eta 9:07:43 +epoch [3/50] batch [110/500] time 1.368 (1.375) data 0.000 (0.008) loss 1.1045 (1.2269) acc 65.6250 (69.2330) lr 1.9980e-03 eta 9:07:21 +epoch [3/50] batch [115/500] time 1.370 (1.374) data 0.000 (0.007) loss 1.1250 (1.2154) acc 65.6250 (69.4293) lr 1.9980e-03 eta 9:07:00 +epoch [3/50] batch [120/500] time 1.362 (1.374) data 0.000 (0.007) loss 1.2324 (1.2053) acc 68.7500 (69.7396) lr 1.9980e-03 eta 9:06:49 +epoch [3/50] batch [125/500] time 1.382 (1.374) data 0.000 (0.007) loss 1.3672 (1.2077) acc 62.5000 (69.7500) lr 1.9980e-03 eta 9:06:38 +epoch [3/50] batch [130/500] time 1.363 (1.374) data 0.000 (0.007) loss 1.4092 (1.2048) acc 62.5000 (69.7115) lr 1.9980e-03 eta 9:06:27 +epoch [3/50] batch [135/500] time 1.370 (1.374) data 0.000 (0.006) loss 1.6357 (1.1996) acc 53.1250 (69.8148) lr 1.9980e-03 eta 9:06:21 +epoch [3/50] batch [140/500] time 1.356 (1.374) data 0.000 (0.006) loss 1.9775 (1.2024) acc 65.6250 (69.9107) lr 1.9980e-03 eta 9:06:11 +epoch [3/50] batch [145/500] time 1.356 (1.373) data 0.000 (0.006) loss 1.4160 (1.2030) acc 71.8750 (69.8491) lr 1.9980e-03 eta 9:05:52 +epoch [3/50] batch [150/500] time 1.346 (1.372) data 0.000 (0.006) loss 1.9150 (1.2012) acc 62.5000 (70.0417) lr 1.9980e-03 eta 9:05:29 +epoch [3/50] batch [155/500] time 1.372 (1.372) data 0.000 (0.006) loss 0.6841 (1.1987) acc 81.2500 (70.1210) lr 1.9980e-03 eta 9:05:23 +epoch [3/50] batch [160/500] time 1.360 (1.372) data 0.000 (0.005) loss 0.6108 (1.1946) acc 78.1250 (70.1953) lr 1.9980e-03 eta 9:04:59 +epoch [3/50] batch [165/500] time 1.348 (1.371) data 0.000 (0.005) loss 1.8906 (1.2041) acc 56.2500 (70.0189) lr 1.9980e-03 eta 9:04:42 +epoch [3/50] batch [170/500] time 1.369 (1.371) data 0.000 (0.005) loss 1.1846 (1.2066) acc 68.7500 (69.9816) lr 1.9980e-03 eta 9:04:33 +epoch [3/50] batch [175/500] time 1.359 (1.371) data 0.000 (0.005) loss 1.7412 (1.2126) acc 71.8750 (69.9107) lr 1.9980e-03 eta 9:04:21 +epoch [3/50] batch [180/500] time 1.346 (1.371) data 0.000 (0.005) loss 1.5527 (1.2138) acc 59.3750 (69.9306) lr 1.9980e-03 eta 9:04:09 +epoch [3/50] batch [185/500] time 1.353 (1.370) data 0.000 (0.005) loss 1.1865 (1.2159) acc 62.5000 (69.7973) lr 1.9980e-03 eta 9:03:54 +epoch [3/50] batch [190/500] time 1.359 (1.370) data 0.001 (0.005) loss 1.5918 (1.2169) acc 59.3750 (69.7039) lr 1.9980e-03 eta 9:03:43 +epoch [3/50] batch [195/500] time 1.372 (1.370) data 0.001 (0.005) loss 1.4062 (1.2163) acc 68.7500 (69.7436) lr 1.9980e-03 eta 9:03:35 +epoch [3/50] batch [200/500] time 1.366 (1.370) data 0.000 (0.004) loss 1.5264 (1.2168) acc 65.6250 (69.6562) lr 1.9980e-03 eta 9:03:25 +epoch [3/50] batch [205/500] time 1.347 (1.370) data 0.001 (0.004) loss 0.8989 (1.2164) acc 81.2500 (69.7409) lr 1.9980e-03 eta 9:03:29 +epoch [3/50] batch [210/500] time 1.341 (1.370) data 0.001 (0.004) loss 1.4277 (1.2179) acc 71.8750 (69.6577) lr 1.9980e-03 eta 9:03:20 +epoch [3/50] batch [215/500] time 1.363 (1.370) data 0.000 (0.004) loss 0.7559 (1.2186) acc 78.1250 (69.6366) lr 1.9980e-03 eta 9:03:07 +epoch [3/50] batch [220/500] time 1.360 (1.370) data 0.000 (0.004) loss 1.2773 (1.2178) acc 65.6250 (69.5597) lr 1.9980e-03 eta 9:02:56 +epoch [3/50] batch [225/500] time 1.370 (1.370) data 0.000 (0.004) loss 1.2832 (1.2201) acc 78.1250 (69.5833) lr 1.9980e-03 eta 9:02:47 +epoch [3/50] batch [230/500] time 1.368 (1.370) data 0.000 (0.004) loss 1.4629 (1.2227) acc 59.3750 (69.5516) lr 1.9980e-03 eta 9:02:39 +epoch [3/50] batch [235/500] time 1.371 (1.370) data 0.000 (0.004) loss 0.8120 (1.2267) acc 87.5000 (69.5612) lr 1.9980e-03 eta 9:02:28 +epoch [3/50] batch [240/500] time 1.355 (1.369) data 0.000 (0.004) loss 1.2852 (1.2287) acc 68.7500 (69.5443) lr 1.9980e-03 eta 9:02:13 +epoch [3/50] batch [245/500] time 1.377 (1.369) data 0.000 (0.004) loss 1.0088 (1.2248) acc 75.0000 (69.6173) lr 1.9980e-03 eta 9:02:04 +epoch [3/50] batch [250/500] time 1.368 (1.370) data 0.000 (0.004) loss 1.4209 (1.2242) acc 62.5000 (69.5375) lr 1.9980e-03 eta 9:02:08 +epoch [3/50] batch [255/500] time 1.357 (1.369) data 0.000 (0.004) loss 1.4893 (1.2219) acc 59.3750 (69.5098) lr 1.9980e-03 eta 9:01:55 +epoch [3/50] batch [260/500] time 1.330 (1.369) data 0.000 (0.004) loss 1.1230 (1.2221) acc 65.6250 (69.4351) lr 1.9980e-03 eta 9:01:41 +epoch [3/50] batch [265/500] time 1.359 (1.369) data 0.000 (0.003) loss 1.3643 (1.2223) acc 62.5000 (69.4222) lr 1.9980e-03 eta 9:01:33 +epoch [3/50] batch [270/500] time 1.357 (1.369) data 0.000 (0.003) loss 1.3320 (1.2272) acc 62.5000 (69.2361) lr 1.9980e-03 eta 9:01:27 +epoch [3/50] batch [275/500] time 1.358 (1.369) data 0.000 (0.003) loss 1.7129 (1.2294) acc 59.3750 (69.2273) lr 1.9980e-03 eta 9:01:13 +epoch [3/50] batch [280/500] time 1.373 (1.369) data 0.000 (0.003) loss 0.9673 (1.2292) acc 78.1250 (69.2522) lr 1.9980e-03 eta 9:01:01 +epoch [3/50] batch [285/500] time 1.360 (1.368) data 0.001 (0.003) loss 1.1660 (1.2296) acc 68.7500 (69.2215) lr 1.9980e-03 eta 9:00:53 +epoch [3/50] batch [290/500] time 1.352 (1.368) data 0.000 (0.003) loss 1.4316 (1.2274) acc 56.2500 (69.1703) lr 1.9980e-03 eta 9:00:46 +epoch [3/50] batch [295/500] time 1.368 (1.369) data 0.000 (0.003) loss 1.0322 (1.2261) acc 68.7500 (69.1949) lr 1.9980e-03 eta 9:00:41 +epoch [3/50] batch [300/500] time 1.346 (1.368) data 0.000 (0.003) loss 1.0352 (1.2250) acc 65.6250 (69.1979) lr 1.9980e-03 eta 9:00:28 +epoch [3/50] batch [305/500] time 1.372 (1.368) data 0.000 (0.003) loss 1.0850 (1.2253) acc 78.1250 (69.1803) lr 1.9980e-03 eta 9:00:15 +epoch [3/50] batch [310/500] time 1.338 (1.368) data 0.000 (0.003) loss 1.4893 (1.2256) acc 68.7500 (69.1734) lr 1.9980e-03 eta 9:00:04 +epoch [3/50] batch [315/500] time 1.373 (1.368) data 0.000 (0.003) loss 0.8833 (1.2282) acc 68.7500 (69.1270) lr 1.9980e-03 eta 8:59:52 +epoch [3/50] batch [320/500] time 1.361 (1.367) data 0.001 (0.003) loss 0.9141 (1.2251) acc 75.0000 (69.1309) lr 1.9980e-03 eta 8:59:41 +epoch [3/50] batch [325/500] time 1.362 (1.368) data 0.000 (0.003) loss 1.3154 (1.2278) acc 65.6250 (69.0481) lr 1.9980e-03 eta 8:59:36 +epoch [3/50] batch [330/500] time 1.358 (1.367) data 0.000 (0.003) loss 1.2266 (1.2261) acc 53.1250 (69.0625) lr 1.9980e-03 eta 8:59:28 +epoch [3/50] batch [335/500] time 1.334 (1.367) data 0.000 (0.003) loss 1.7158 (1.2292) acc 50.0000 (68.9739) lr 1.9980e-03 eta 8:59:19 +epoch [3/50] batch [340/500] time 1.379 (1.367) data 0.001 (0.003) loss 1.5186 (1.2312) acc 59.3750 (68.9614) lr 1.9980e-03 eta 8:59:13 +epoch [3/50] batch [345/500] time 1.491 (1.368) data 0.000 (0.003) loss 1.0449 (1.2307) acc 71.8750 (68.9493) lr 1.9980e-03 eta 8:59:12 +epoch [3/50] batch [350/500] time 1.371 (1.368) data 0.000 (0.003) loss 1.2959 (1.2316) acc 65.6250 (68.9643) lr 1.9980e-03 eta 8:59:07 +epoch [3/50] batch [355/500] time 1.379 (1.368) data 0.000 (0.003) loss 1.1777 (1.2299) acc 65.6250 (68.9173) lr 1.9980e-03 eta 8:59:01 +epoch [3/50] batch [360/500] time 1.361 (1.368) data 0.000 (0.003) loss 1.4180 (1.2266) acc 62.5000 (68.9931) lr 1.9980e-03 eta 8:58:52 +epoch [3/50] batch [365/500] time 1.359 (1.368) data 0.000 (0.003) loss 1.1406 (1.2281) acc 59.3750 (68.9983) lr 1.9980e-03 eta 8:58:42 +epoch [3/50] batch [370/500] time 1.364 (1.367) data 0.001 (0.003) loss 1.0615 (1.2282) acc 75.0000 (69.0287) lr 1.9980e-03 eta 8:58:33 +epoch [3/50] batch [375/500] time 1.376 (1.367) data 0.000 (0.003) loss 1.6006 (1.2316) acc 65.6250 (68.9667) lr 1.9980e-03 eta 8:58:25 +epoch [3/50] batch [380/500] time 1.384 (1.367) data 0.000 (0.003) loss 1.3271 (1.2326) acc 65.6250 (68.9391) lr 1.9980e-03 eta 8:58:19 +epoch [3/50] batch [385/500] time 1.358 (1.367) data 0.001 (0.003) loss 0.8999 (1.2326) acc 68.7500 (68.9367) lr 1.9980e-03 eta 8:58:12 +epoch [3/50] batch [390/500] time 1.353 (1.368) data 0.000 (0.003) loss 1.4990 (1.2359) acc 65.6250 (68.8542) lr 1.9980e-03 eta 8:58:10 +epoch [3/50] batch [395/500] time 1.357 (1.368) data 0.000 (0.002) loss 1.0674 (1.2349) acc 75.0000 (68.9003) lr 1.9980e-03 eta 8:58:00 +epoch [3/50] batch [400/500] time 1.353 (1.367) data 0.001 (0.002) loss 1.5459 (1.2344) acc 53.1250 (68.8984) lr 1.9980e-03 eta 8:57:50 +epoch [3/50] batch [405/500] time 1.359 (1.367) data 0.001 (0.002) loss 0.8926 (1.2310) acc 75.0000 (68.9815) lr 1.9980e-03 eta 8:57:42 +epoch [3/50] batch [410/500] time 1.375 (1.367) data 0.000 (0.002) loss 1.3770 (1.2323) acc 59.3750 (68.9177) lr 1.9980e-03 eta 8:57:35 +epoch [3/50] batch [415/500] time 1.350 (1.367) data 0.000 (0.002) loss 0.7705 (1.2309) acc 75.0000 (68.9759) lr 1.9980e-03 eta 8:57:28 +epoch [3/50] batch [420/500] time 1.360 (1.367) data 0.000 (0.002) loss 1.3496 (1.2336) acc 62.5000 (68.9435) lr 1.9980e-03 eta 8:57:20 +epoch [3/50] batch [425/500] time 1.361 (1.367) data 0.000 (0.002) loss 2.0098 (1.2356) acc 62.5000 (68.8971) lr 1.9980e-03 eta 8:57:11 +epoch [3/50] batch [430/500] time 1.377 (1.367) data 0.000 (0.002) loss 1.1934 (1.2352) acc 59.3750 (68.9026) lr 1.9980e-03 eta 8:57:05 +epoch [3/50] batch [435/500] time 1.371 (1.367) data 0.000 (0.002) loss 0.7139 (1.2350) acc 75.0000 (68.9152) lr 1.9980e-03 eta 8:56:59 +epoch [3/50] batch [440/500] time 1.365 (1.367) data 0.000 (0.002) loss 1.2715 (1.2332) acc 65.6250 (68.9347) lr 1.9980e-03 eta 8:56:51 +epoch [3/50] batch [445/500] time 1.355 (1.367) data 0.000 (0.002) loss 1.1357 (1.2309) acc 65.6250 (68.9537) lr 1.9980e-03 eta 8:56:44 +epoch [3/50] batch [450/500] time 1.362 (1.367) data 0.000 (0.002) loss 1.3027 (1.2300) acc 59.3750 (68.9653) lr 1.9980e-03 eta 8:56:36 +epoch [3/50] batch [455/500] time 1.372 (1.367) data 0.001 (0.002) loss 0.9302 (1.2319) acc 81.2500 (68.9080) lr 1.9980e-03 eta 8:56:26 +epoch [3/50] batch [460/500] time 1.348 (1.367) data 0.000 (0.002) loss 0.9902 (1.2300) acc 78.1250 (68.9130) lr 1.9980e-03 eta 8:56:16 +epoch [3/50] batch [465/500] time 1.375 (1.367) data 0.000 (0.002) loss 1.0908 (1.2271) acc 71.8750 (68.9852) lr 1.9980e-03 eta 8:56:08 +epoch [3/50] batch [470/500] time 1.362 (1.367) data 0.000 (0.002) loss 1.5586 (1.2269) acc 65.6250 (69.0027) lr 1.9980e-03 eta 8:55:58 +epoch [3/50] batch [475/500] time 1.361 (1.367) data 0.000 (0.002) loss 0.8652 (1.2247) acc 75.0000 (69.0789) lr 1.9980e-03 eta 8:55:52 +epoch [3/50] batch [480/500] time 1.369 (1.367) data 0.000 (0.002) loss 0.5410 (1.2260) acc 81.2500 (69.0560) lr 1.9980e-03 eta 8:55:44 +epoch [3/50] batch [485/500] time 1.370 (1.367) data 0.001 (0.002) loss 1.0000 (1.2259) acc 75.0000 (69.0593) lr 1.9980e-03 eta 8:55:37 +epoch [3/50] batch [490/500] time 1.361 (1.367) data 0.000 (0.002) loss 1.3896 (1.2264) acc 65.6250 (69.0816) lr 1.9980e-03 eta 8:55:35 +epoch [3/50] batch [495/500] time 1.337 (1.367) data 0.000 (0.002) loss 1.7930 (1.2280) acc 59.3750 (69.0530) lr 1.9980e-03 eta 8:55:24 +epoch [3/50] batch [500/500] time 1.369 (1.367) data 0.000 (0.002) loss 1.6221 (1.2307) acc 59.3750 (69.0438) lr 1.9921e-03 eta 8:55:14 +epoch [4/50] batch [5/500] time 1.352 (1.547) data 0.000 (0.180) loss 1.9072 (1.5724) acc 50.0000 (59.3750) lr 1.9921e-03 eta 10:05:39 +epoch [4/50] batch [10/500] time 1.355 (1.450) data 0.000 (0.090) loss 0.9976 (1.3535) acc 78.1250 (64.6875) lr 1.9921e-03 eta 9:27:52 +epoch [4/50] batch [15/500] time 1.375 (1.424) data 0.000 (0.060) loss 1.3496 (1.2838) acc 71.8750 (66.4583) lr 1.9921e-03 eta 9:17:33 +epoch [4/50] batch [20/500] time 1.383 (1.413) data 0.000 (0.045) loss 1.1094 (1.2851) acc 68.7500 (67.3438) lr 1.9921e-03 eta 9:12:50 +epoch [4/50] batch [25/500] time 1.360 (1.404) data 0.000 (0.036) loss 0.9048 (1.2370) acc 75.0000 (68.7500) lr 1.9921e-03 eta 9:09:15 +epoch [4/50] batch [30/500] time 1.373 (1.399) data 0.000 (0.030) loss 1.8848 (1.2425) acc 62.5000 (68.6458) lr 1.9921e-03 eta 9:07:09 +epoch [4/50] batch [35/500] time 1.372 (1.394) data 0.000 (0.026) loss 0.9092 (1.2032) acc 78.1250 (69.2857) lr 1.9921e-03 eta 9:04:59 +epoch [4/50] batch [40/500] time 1.351 (1.391) data 0.000 (0.023) loss 1.4287 (1.2081) acc 59.3750 (68.9844) lr 1.9921e-03 eta 9:03:52 +epoch [4/50] batch [45/500] time 1.390 (1.393) data 0.000 (0.020) loss 1.2656 (1.2152) acc 75.0000 (68.9583) lr 1.9921e-03 eta 9:04:26 +epoch [4/50] batch [50/500] time 1.351 (1.390) data 0.000 (0.018) loss 1.0693 (1.2178) acc 78.1250 (68.8750) lr 1.9921e-03 eta 9:03:13 +epoch [4/50] batch [55/500] time 1.371 (1.388) data 0.000 (0.017) loss 1.5664 (1.2163) acc 56.2500 (68.5795) lr 1.9921e-03 eta 9:02:14 +epoch [4/50] batch [60/500] time 1.352 (1.385) data 0.000 (0.015) loss 1.2764 (1.2279) acc 59.3750 (68.5417) lr 1.9921e-03 eta 9:01:07 +epoch [4/50] batch [65/500] time 1.383 (1.383) data 0.000 (0.014) loss 1.6162 (1.2349) acc 62.5000 (68.5577) lr 1.9921e-03 eta 9:00:22 +epoch [4/50] batch [70/500] time 1.359 (1.382) data 0.000 (0.013) loss 1.1641 (1.2330) acc 68.7500 (68.4821) lr 1.9921e-03 eta 8:59:29 +epoch [4/50] batch [75/500] time 1.363 (1.380) data 0.000 (0.012) loss 1.2041 (1.2402) acc 71.8750 (68.5417) lr 1.9921e-03 eta 8:58:43 +epoch [4/50] batch [80/500] time 1.360 (1.379) data 0.001 (0.012) loss 1.2988 (1.2405) acc 65.6250 (68.7109) lr 1.9921e-03 eta 8:58:08 +epoch [4/50] batch [85/500] time 1.349 (1.378) data 0.000 (0.011) loss 1.3779 (1.2387) acc 71.8750 (69.0441) lr 1.9921e-03 eta 8:57:34 +epoch [4/50] batch [90/500] time 1.378 (1.378) data 0.000 (0.010) loss 1.1699 (1.2246) acc 78.1250 (69.5833) lr 1.9921e-03 eta 8:57:39 +epoch [4/50] batch [95/500] time 1.376 (1.378) data 0.000 (0.010) loss 1.3535 (1.2140) acc 62.5000 (69.6711) lr 1.9921e-03 eta 8:57:25 +epoch [4/50] batch [100/500] time 1.361 (1.377) data 0.000 (0.009) loss 1.2705 (1.2072) acc 65.6250 (69.8438) lr 1.9921e-03 eta 8:56:55 +epoch [4/50] batch [105/500] time 1.364 (1.376) data 0.000 (0.009) loss 1.1035 (1.2107) acc 78.1250 (69.9702) lr 1.9921e-03 eta 8:56:36 +epoch [4/50] batch [110/500] time 1.350 (1.375) data 0.000 (0.009) loss 0.7671 (1.1996) acc 81.2500 (70.1136) lr 1.9921e-03 eta 8:56:07 +epoch [4/50] batch [115/500] time 1.352 (1.374) data 0.000 (0.008) loss 1.1318 (1.2004) acc 62.5000 (70.2174) lr 1.9921e-03 eta 8:55:34 +epoch [4/50] batch [120/500] time 1.352 (1.373) data 0.000 (0.008) loss 1.0303 (1.1956) acc 75.0000 (70.2865) lr 1.9921e-03 eta 8:55:12 +epoch [4/50] batch [125/500] time 1.339 (1.373) data 0.000 (0.008) loss 1.7617 (1.2028) acc 56.2500 (70.1000) lr 1.9921e-03 eta 8:54:52 +epoch [4/50] batch [130/500] time 1.373 (1.373) data 0.000 (0.007) loss 1.0166 (1.2000) acc 78.1250 (70.2404) lr 1.9921e-03 eta 8:54:42 +epoch [4/50] batch [135/500] time 1.375 (1.373) data 0.000 (0.007) loss 1.3496 (1.1920) acc 68.7500 (70.5324) lr 1.9921e-03 eta 8:54:30 +epoch [4/50] batch [140/500] time 1.356 (1.372) data 0.000 (0.007) loss 0.9253 (1.1948) acc 75.0000 (70.4464) lr 1.9921e-03 eta 8:54:15 +epoch [4/50] batch [145/500] time 1.372 (1.372) data 0.006 (0.007) loss 0.8379 (1.1911) acc 81.2500 (70.5603) lr 1.9921e-03 eta 8:53:59 +epoch [4/50] batch [150/500] time 1.384 (1.372) data 0.001 (0.006) loss 1.7061 (1.1909) acc 65.6250 (70.7083) lr 1.9921e-03 eta 8:53:53 +epoch [4/50] batch [155/500] time 1.363 (1.372) data 0.000 (0.006) loss 1.3623 (1.1864) acc 65.6250 (70.7460) lr 1.9921e-03 eta 8:53:41 +epoch [4/50] batch [160/500] time 1.349 (1.371) data 0.000 (0.006) loss 0.9429 (1.1863) acc 75.0000 (70.6445) lr 1.9921e-03 eta 8:53:27 +epoch [4/50] batch [165/500] time 1.391 (1.371) data 0.000 (0.006) loss 1.3779 (1.1869) acc 65.6250 (70.4924) lr 1.9921e-03 eta 8:53:15 +epoch [4/50] batch [170/500] time 1.378 (1.371) data 0.000 (0.006) loss 1.8555 (1.1891) acc 56.2500 (70.4779) lr 1.9921e-03 eta 8:53:10 +epoch [4/50] batch [175/500] time 1.370 (1.371) data 0.001 (0.006) loss 1.3213 (1.1874) acc 65.6250 (70.4821) lr 1.9921e-03 eta 8:52:58 +epoch [4/50] batch [180/500] time 1.378 (1.371) data 0.000 (0.005) loss 0.6221 (1.1948) acc 81.2500 (70.3993) lr 1.9921e-03 eta 8:52:54 +epoch [4/50] batch [185/500] time 1.369 (1.371) data 0.000 (0.005) loss 1.6963 (1.1984) acc 59.3750 (70.2365) lr 1.9921e-03 eta 8:52:46 +epoch [4/50] batch [190/500] time 1.354 (1.372) data 0.001 (0.005) loss 1.0176 (1.1977) acc 78.1250 (70.3125) lr 1.9921e-03 eta 8:52:51 +epoch [4/50] batch [195/500] time 1.361 (1.371) data 0.001 (0.005) loss 1.4512 (1.1940) acc 71.8750 (70.4647) lr 1.9921e-03 eta 8:52:39 +epoch [4/50] batch [200/500] time 1.357 (1.371) data 0.000 (0.005) loss 1.3740 (1.1899) acc 68.7500 (70.5000) lr 1.9921e-03 eta 8:52:21 +epoch [4/50] batch [205/500] time 1.363 (1.371) data 0.000 (0.005) loss 1.0508 (1.1856) acc 71.8750 (70.5793) lr 1.9921e-03 eta 8:52:09 +epoch [4/50] batch [210/500] time 1.351 (1.370) data 0.000 (0.005) loss 1.4043 (1.1901) acc 68.7500 (70.5804) lr 1.9921e-03 eta 8:51:53 +epoch [4/50] batch [215/500] time 1.358 (1.370) data 0.001 (0.005) loss 1.2686 (1.1962) acc 59.3750 (70.4651) lr 1.9921e-03 eta 8:51:37 +epoch [4/50] batch [220/500] time 1.358 (1.370) data 0.000 (0.004) loss 1.3740 (1.2005) acc 62.5000 (70.3409) lr 1.9921e-03 eta 8:51:26 +epoch [4/50] batch [225/500] time 1.373 (1.370) data 0.001 (0.004) loss 1.2246 (1.2061) acc 62.5000 (70.2917) lr 1.9921e-03 eta 8:51:17 +epoch [4/50] batch [230/500] time 1.347 (1.369) data 0.000 (0.004) loss 1.6982 (1.2091) acc 65.6250 (70.2446) lr 1.9921e-03 eta 8:51:03 +epoch [4/50] batch [235/500] time 1.380 (1.370) data 0.000 (0.004) loss 1.2656 (1.2096) acc 62.5000 (70.2660) lr 1.9921e-03 eta 8:51:07 +epoch [4/50] batch [240/500] time 1.353 (1.370) data 0.000 (0.004) loss 1.4619 (1.2095) acc 65.6250 (70.1953) lr 1.9921e-03 eta 8:50:57 +epoch [4/50] batch [245/500] time 1.341 (1.370) data 0.000 (0.004) loss 1.3721 (1.2111) acc 53.1250 (70.1148) lr 1.9921e-03 eta 8:50:47 +epoch [4/50] batch [250/500] time 1.362 (1.369) data 0.000 (0.004) loss 0.8164 (1.2086) acc 81.2500 (70.2000) lr 1.9921e-03 eta 8:50:39 +epoch [4/50] batch [255/500] time 1.344 (1.369) data 0.000 (0.004) loss 1.2197 (1.2065) acc 68.7500 (70.1838) lr 1.9921e-03 eta 8:50:30 +epoch [4/50] batch [260/500] time 1.363 (1.369) data 0.000 (0.004) loss 0.9243 (1.2043) acc 78.1250 (70.1683) lr 1.9921e-03 eta 8:50:25 +epoch [4/50] batch [265/500] time 1.341 (1.369) data 0.000 (0.004) loss 1.0537 (1.2043) acc 62.5000 (70.0943) lr 1.9921e-03 eta 8:50:11 +epoch [4/50] batch [270/500] time 1.361 (1.369) data 0.000 (0.004) loss 1.5186 (1.2053) acc 65.6250 (70.0231) lr 1.9921e-03 eta 8:49:57 +epoch [4/50] batch [275/500] time 1.369 (1.369) data 0.000 (0.004) loss 1.4678 (1.2051) acc 59.3750 (70.0455) lr 1.9921e-03 eta 8:49:50 +epoch [4/50] batch [280/500] time 1.356 (1.369) data 0.000 (0.004) loss 0.5225 (1.2023) acc 93.7500 (70.1004) lr 1.9921e-03 eta 8:49:41 +epoch [4/50] batch [285/500] time 1.352 (1.369) data 0.000 (0.004) loss 1.3311 (1.2035) acc 65.6250 (70.0768) lr 1.9921e-03 eta 8:49:32 +epoch [4/50] batch [290/500] time 1.391 (1.369) data 0.000 (0.003) loss 1.1992 (1.2051) acc 62.5000 (70.0000) lr 1.9921e-03 eta 8:49:25 +epoch [4/50] batch [295/500] time 1.375 (1.369) data 0.000 (0.003) loss 1.1289 (1.2051) acc 65.6250 (69.9258) lr 1.9921e-03 eta 8:49:23 +epoch [4/50] batch [300/500] time 1.373 (1.369) data 0.000 (0.003) loss 0.8823 (1.2049) acc 71.8750 (69.9271) lr 1.9921e-03 eta 8:49:19 +epoch [4/50] batch [305/500] time 1.361 (1.369) data 0.000 (0.003) loss 1.5352 (1.2099) acc 62.5000 (69.8361) lr 1.9921e-03 eta 8:49:09 +epoch [4/50] batch [310/500] time 1.370 (1.369) data 0.000 (0.003) loss 0.6265 (1.2029) acc 84.3750 (70.0504) lr 1.9921e-03 eta 8:49:00 +epoch [4/50] batch [315/500] time 1.372 (1.369) data 0.000 (0.003) loss 2.1016 (1.2080) acc 46.8750 (69.9405) lr 1.9921e-03 eta 8:48:54 +epoch [4/50] batch [320/500] time 1.348 (1.369) data 0.000 (0.003) loss 1.3740 (1.2108) acc 65.6250 (69.8926) lr 1.9921e-03 eta 8:48:45 +epoch [4/50] batch [325/500] time 1.350 (1.368) data 0.000 (0.003) loss 1.1699 (1.2088) acc 75.0000 (69.9904) lr 1.9921e-03 eta 8:48:34 +epoch [4/50] batch [330/500] time 1.486 (1.369) data 0.000 (0.003) loss 1.8174 (1.2121) acc 62.5000 (69.9053) lr 1.9921e-03 eta 8:48:32 +epoch [4/50] batch [335/500] time 1.375 (1.369) data 0.001 (0.003) loss 0.7378 (1.2087) acc 81.2500 (70.0000) lr 1.9921e-03 eta 8:48:25 +epoch [4/50] batch [340/500] time 1.341 (1.368) data 0.000 (0.003) loss 0.7314 (1.2071) acc 75.0000 (70.0184) lr 1.9921e-03 eta 8:48:14 +epoch [4/50] batch [345/500] time 1.371 (1.368) data 0.000 (0.003) loss 1.3242 (1.2094) acc 62.5000 (69.9366) lr 1.9921e-03 eta 8:48:04 +epoch [4/50] batch [350/500] time 1.348 (1.368) data 0.000 (0.003) loss 1.6553 (1.2092) acc 59.3750 (69.9554) lr 1.9921e-03 eta 8:47:54 +epoch [4/50] batch [355/500] time 1.361 (1.368) data 0.000 (0.003) loss 1.1865 (1.2114) acc 59.3750 (69.8944) lr 1.9921e-03 eta 8:47:44 +epoch [4/50] batch [360/500] time 1.365 (1.368) data 0.000 (0.003) loss 1.5430 (1.2131) acc 68.7500 (69.8524) lr 1.9921e-03 eta 8:47:33 +epoch [4/50] batch [365/500] time 1.374 (1.368) data 0.000 (0.003) loss 1.0684 (1.2131) acc 62.5000 (69.8373) lr 1.9921e-03 eta 8:47:26 +epoch [4/50] batch [370/500] time 1.362 (1.368) data 0.000 (0.003) loss 1.1953 (1.2125) acc 75.0000 (69.8226) lr 1.9921e-03 eta 8:47:16 +epoch [4/50] batch [375/500] time 1.367 (1.368) data 0.000 (0.003) loss 1.1045 (1.2123) acc 71.8750 (69.8000) lr 1.9921e-03 eta 8:47:13 +epoch [4/50] batch [380/500] time 1.351 (1.368) data 0.000 (0.003) loss 1.2949 (1.2140) acc 65.6250 (69.7451) lr 1.9921e-03 eta 8:47:01 +epoch [4/50] batch [385/500] time 1.364 (1.368) data 0.000 (0.003) loss 0.6543 (1.2116) acc 84.3750 (69.7971) lr 1.9921e-03 eta 8:46:53 +epoch [4/50] batch [390/500] time 1.382 (1.368) data 0.000 (0.003) loss 1.4971 (1.2109) acc 62.5000 (69.7997) lr 1.9921e-03 eta 8:46:43 +epoch [4/50] batch [395/500] time 1.370 (1.367) data 0.000 (0.003) loss 1.4199 (1.2113) acc 68.7500 (69.8497) lr 1.9921e-03 eta 8:46:34 +epoch [4/50] batch [400/500] time 1.393 (1.367) data 0.000 (0.003) loss 1.3467 (1.2132) acc 68.7500 (69.8125) lr 1.9921e-03 eta 8:46:28 +epoch [4/50] batch [405/500] time 1.347 (1.367) data 0.000 (0.003) loss 0.9302 (1.2108) acc 81.2500 (69.8380) lr 1.9921e-03 eta 8:46:20 +epoch [4/50] batch [410/500] time 1.358 (1.367) data 0.000 (0.003) loss 1.4658 (1.2106) acc 59.3750 (69.8476) lr 1.9921e-03 eta 8:46:09 +epoch [4/50] batch [415/500] time 1.350 (1.367) data 0.000 (0.003) loss 1.9023 (1.2142) acc 56.2500 (69.7666) lr 1.9921e-03 eta 8:46:01 +epoch [4/50] batch [420/500] time 1.359 (1.367) data 0.000 (0.003) loss 1.2539 (1.2140) acc 62.5000 (69.7470) lr 1.9921e-03 eta 8:45:50 +epoch [4/50] batch [425/500] time 1.344 (1.367) data 0.000 (0.002) loss 1.3154 (1.2141) acc 65.6250 (69.7574) lr 1.9921e-03 eta 8:45:40 +epoch [4/50] batch [430/500] time 1.370 (1.367) data 0.000 (0.002) loss 1.7041 (1.2128) acc 62.5000 (69.8183) lr 1.9921e-03 eta 8:45:31 +epoch [4/50] batch [435/500] time 1.375 (1.367) data 0.000 (0.002) loss 1.3115 (1.2127) acc 59.3750 (69.7917) lr 1.9921e-03 eta 8:45:25 +epoch [4/50] batch [440/500] time 1.356 (1.367) data 0.000 (0.002) loss 1.0898 (1.2128) acc 65.6250 (69.7656) lr 1.9921e-03 eta 8:45:18 +epoch [4/50] batch [445/500] time 1.374 (1.367) data 0.000 (0.002) loss 0.8540 (1.2138) acc 78.1250 (69.7542) lr 1.9921e-03 eta 8:45:10 +epoch [4/50] batch [450/500] time 1.378 (1.367) data 0.000 (0.002) loss 0.9565 (1.2129) acc 78.1250 (69.7778) lr 1.9921e-03 eta 8:45:03 +epoch [4/50] batch [455/500] time 1.343 (1.367) data 0.000 (0.002) loss 0.9810 (1.2138) acc 68.7500 (69.7527) lr 1.9921e-03 eta 8:44:54 +epoch [4/50] batch [460/500] time 1.370 (1.367) data 0.000 (0.002) loss 0.8657 (1.2121) acc 78.1250 (69.7758) lr 1.9921e-03 eta 8:44:46 +epoch [4/50] batch [465/500] time 1.384 (1.367) data 0.000 (0.002) loss 1.2539 (1.2131) acc 68.7500 (69.7849) lr 1.9921e-03 eta 8:44:40 +epoch [4/50] batch [470/500] time 1.360 (1.367) data 0.000 (0.002) loss 1.0879 (1.2107) acc 71.8750 (69.8271) lr 1.9921e-03 eta 8:44:30 +epoch [4/50] batch [475/500] time 1.370 (1.367) data 0.000 (0.002) loss 1.3408 (1.2107) acc 68.7500 (69.8618) lr 1.9921e-03 eta 8:44:30 +epoch [4/50] batch [480/500] time 1.337 (1.367) data 0.000 (0.002) loss 1.5771 (1.2114) acc 62.5000 (69.8763) lr 1.9921e-03 eta 8:44:20 +epoch [4/50] batch [485/500] time 1.360 (1.367) data 0.001 (0.002) loss 1.0186 (1.2101) acc 75.0000 (69.8969) lr 1.9921e-03 eta 8:44:12 +epoch [4/50] batch [490/500] time 1.352 (1.367) data 0.000 (0.002) loss 1.1846 (1.2100) acc 65.6250 (69.8788) lr 1.9921e-03 eta 8:44:04 +epoch [4/50] batch [495/500] time 1.342 (1.366) data 0.000 (0.002) loss 1.1279 (1.2109) acc 65.6250 (69.8043) lr 1.9921e-03 eta 8:43:54 +epoch [4/50] batch [500/500] time 1.374 (1.366) data 0.000 (0.002) loss 1.2891 (1.2104) acc 71.8750 (69.7938) lr 1.9823e-03 eta 8:43:48 +epoch [5/50] batch [5/500] time 1.369 (1.576) data 0.000 (0.202) loss 1.3057 (1.2356) acc 65.6250 (64.3750) lr 1.9823e-03 eta 10:03:49 +epoch [5/50] batch [10/500] time 1.354 (1.469) data 0.000 (0.101) loss 1.4434 (1.2426) acc 68.7500 (67.1875) lr 1.9823e-03 eta 9:23:03 +epoch [5/50] batch [15/500] time 1.349 (1.434) data 0.000 (0.068) loss 1.7832 (1.2808) acc 62.5000 (67.2917) lr 1.9823e-03 eta 9:09:30 +epoch [5/50] batch [20/500] time 1.380 (1.419) data 0.001 (0.051) loss 1.6123 (1.2983) acc 56.2500 (66.7188) lr 1.9823e-03 eta 9:03:24 +epoch [5/50] batch [25/500] time 1.373 (1.417) data 0.000 (0.041) loss 1.3164 (1.2549) acc 75.0000 (67.6250) lr 1.9823e-03 eta 9:02:40 +epoch [5/50] batch [30/500] time 1.362 (1.409) data 0.000 (0.034) loss 0.6875 (1.2068) acc 78.1250 (69.2708) lr 1.9823e-03 eta 8:59:31 +epoch [5/50] batch [35/500] time 1.371 (1.403) data 0.000 (0.029) loss 1.2441 (1.2007) acc 62.5000 (69.7321) lr 1.9823e-03 eta 8:57:08 +epoch [5/50] batch [40/500] time 1.375 (1.399) data 0.000 (0.026) loss 0.8394 (1.1882) acc 75.0000 (69.9219) lr 1.9823e-03 eta 8:55:30 +epoch [5/50] batch [45/500] time 1.372 (1.397) data 0.000 (0.023) loss 0.4075 (1.1695) acc 84.3750 (70.4861) lr 1.9823e-03 eta 8:54:18 +epoch [5/50] batch [50/500] time 1.370 (1.393) data 0.000 (0.021) loss 1.3789 (1.1487) acc 68.7500 (70.8750) lr 1.9823e-03 eta 8:52:58 +epoch [5/50] batch [55/500] time 1.368 (1.391) data 0.000 (0.019) loss 0.8037 (1.1379) acc 84.3750 (71.5909) lr 1.9823e-03 eta 8:52:02 +epoch [5/50] batch [60/500] time 1.352 (1.388) data 0.000 (0.017) loss 0.7754 (1.1415) acc 78.1250 (71.4583) lr 1.9823e-03 eta 8:50:46 +epoch [5/50] batch [65/500] time 1.375 (1.387) data 0.000 (0.016) loss 0.8633 (1.1402) acc 78.1250 (71.6346) lr 1.9823e-03 eta 8:50:06 +epoch [5/50] batch [70/500] time 1.372 (1.387) data 0.000 (0.015) loss 0.9795 (1.1516) acc 68.7500 (71.4732) lr 1.9823e-03 eta 8:49:54 +epoch [5/50] batch [75/500] time 1.389 (1.385) data 0.000 (0.014) loss 1.5244 (1.1618) acc 56.2500 (70.8750) lr 1.9823e-03 eta 8:49:20 +epoch [5/50] batch [80/500] time 1.367 (1.384) data 0.000 (0.013) loss 1.1611 (1.1710) acc 71.8750 (70.5859) lr 1.9823e-03 eta 8:48:41 +epoch [5/50] batch [85/500] time 1.375 (1.383) data 0.000 (0.012) loss 1.5049 (1.1627) acc 65.6250 (70.7721) lr 1.9823e-03 eta 8:48:16 +epoch [5/50] batch [90/500] time 1.368 (1.382) data 0.001 (0.012) loss 1.3945 (1.1692) acc 71.8750 (70.7639) lr 1.9823e-03 eta 8:47:46 +epoch [5/50] batch [95/500] time 1.368 (1.381) data 0.000 (0.011) loss 0.8286 (1.1730) acc 78.1250 (70.8553) lr 1.9823e-03 eta 8:47:02 +epoch [5/50] batch [100/500] time 1.367 (1.380) data 0.000 (0.010) loss 1.0107 (1.1611) acc 81.2500 (71.1250) lr 1.9823e-03 eta 8:46:37 +epoch [5/50] batch [105/500] time 1.375 (1.379) data 0.001 (0.010) loss 1.3359 (1.1534) acc 75.0000 (71.4583) lr 1.9823e-03 eta 8:46:16 +epoch [5/50] batch [110/500] time 1.338 (1.378) data 0.000 (0.010) loss 0.9019 (1.1614) acc 75.0000 (71.1364) lr 1.9823e-03 eta 8:45:44 +epoch [5/50] batch [115/500] time 1.351 (1.377) data 0.000 (0.009) loss 1.1846 (1.1715) acc 65.6250 (70.9511) lr 1.9823e-03 eta 8:45:09 +epoch [5/50] batch [120/500] time 1.327 (1.376) data 0.000 (0.009) loss 1.3291 (1.1758) acc 68.7500 (70.7812) lr 1.9823e-03 eta 8:44:45 +epoch [5/50] batch [125/500] time 1.360 (1.376) data 0.000 (0.008) loss 1.3457 (1.1747) acc 65.6250 (70.9250) lr 1.9823e-03 eta 8:44:28 +epoch [5/50] batch [130/500] time 1.330 (1.375) data 0.000 (0.008) loss 0.9824 (1.1660) acc 75.0000 (71.0577) lr 1.9823e-03 eta 8:44:11 +epoch [5/50] batch [135/500] time 1.340 (1.375) data 0.000 (0.008) loss 0.7021 (1.1641) acc 75.0000 (70.8796) lr 1.9823e-03 eta 8:43:52 +epoch [5/50] batch [140/500] time 1.365 (1.374) data 0.000 (0.008) loss 1.1094 (1.1647) acc 75.0000 (70.8929) lr 1.9823e-03 eta 8:43:38 +epoch [5/50] batch [145/500] time 1.371 (1.374) data 0.000 (0.007) loss 1.6934 (1.1667) acc 59.3750 (70.7759) lr 1.9823e-03 eta 8:43:17 +epoch [5/50] batch [150/500] time 1.353 (1.373) data 0.000 (0.007) loss 1.1611 (1.1637) acc 62.5000 (70.7917) lr 1.9823e-03 eta 8:42:59 +epoch [5/50] batch [155/500] time 1.376 (1.373) data 0.001 (0.007) loss 1.0947 (1.1671) acc 71.8750 (70.5847) lr 1.9823e-03 eta 8:42:42 +epoch [5/50] batch [160/500] time 1.378 (1.373) data 0.000 (0.007) loss 1.5400 (1.1780) acc 59.3750 (70.4102) lr 1.9823e-03 eta 8:42:36 +epoch [5/50] batch [165/500] time 1.347 (1.373) data 0.000 (0.006) loss 1.0098 (1.1835) acc 65.6250 (70.2841) lr 1.9823e-03 eta 8:42:37 +epoch [5/50] batch [170/500] time 1.372 (1.373) data 0.000 (0.006) loss 0.8081 (1.1783) acc 68.7500 (70.3676) lr 1.9823e-03 eta 8:42:24 +epoch [5/50] batch [175/500] time 1.363 (1.373) data 0.000 (0.006) loss 1.2734 (1.1743) acc 68.7500 (70.4643) lr 1.9823e-03 eta 8:42:13 +epoch [5/50] batch [180/500] time 1.372 (1.373) data 0.000 (0.006) loss 1.0195 (1.1724) acc 62.5000 (70.4688) lr 1.9823e-03 eta 8:42:04 +epoch [5/50] batch [185/500] time 1.368 (1.372) data 0.001 (0.006) loss 0.9883 (1.1718) acc 68.7500 (70.5068) lr 1.9823e-03 eta 8:41:48 +epoch [5/50] batch [190/500] time 1.354 (1.372) data 0.000 (0.006) loss 1.1758 (1.1743) acc 71.8750 (70.5757) lr 1.9823e-03 eta 8:41:37 +epoch [5/50] batch [195/500] time 1.368 (1.372) data 0.000 (0.006) loss 0.6362 (1.1746) acc 87.5000 (70.6731) lr 1.9823e-03 eta 8:41:24 +epoch [5/50] batch [200/500] time 1.349 (1.372) data 0.000 (0.005) loss 1.2012 (1.1773) acc 65.6250 (70.6250) lr 1.9823e-03 eta 8:41:12 +epoch [5/50] batch [205/500] time 1.376 (1.372) data 0.000 (0.005) loss 1.0107 (1.1768) acc 68.7500 (70.6707) lr 1.9823e-03 eta 8:41:04 +epoch [5/50] batch [210/500] time 1.375 (1.372) data 0.000 (0.005) loss 1.0850 (1.1826) acc 71.8750 (70.5655) lr 1.9823e-03 eta 8:41:08 +epoch [5/50] batch [215/500] time 1.363 (1.372) data 0.000 (0.005) loss 0.9038 (1.1823) acc 81.2500 (70.5959) lr 1.9823e-03 eta 8:40:58 +epoch [5/50] batch [220/500] time 1.367 (1.372) data 0.000 (0.005) loss 1.2100 (1.1842) acc 65.6250 (70.5540) lr 1.9823e-03 eta 8:40:47 +epoch [5/50] batch [225/500] time 1.346 (1.371) data 0.000 (0.005) loss 1.7256 (1.1794) acc 53.1250 (70.6389) lr 1.9823e-03 eta 8:40:34 +epoch [5/50] batch [230/500] time 1.376 (1.371) data 0.000 (0.005) loss 1.3008 (1.1760) acc 62.5000 (70.7337) lr 1.9823e-03 eta 8:40:26 +epoch [5/50] batch [235/500] time 1.360 (1.371) data 0.000 (0.005) loss 0.8198 (1.1733) acc 84.3750 (70.7713) lr 1.9823e-03 eta 8:40:11 +epoch [5/50] batch [240/500] time 1.361 (1.371) data 0.000 (0.005) loss 1.3096 (1.1748) acc 68.7500 (70.6901) lr 1.9823e-03 eta 8:40:01 +epoch [5/50] batch [245/500] time 1.365 (1.371) data 0.000 (0.005) loss 1.4043 (1.1737) acc 65.6250 (70.7398) lr 1.9823e-03 eta 8:39:51 +epoch [5/50] batch [250/500] time 1.371 (1.371) data 0.000 (0.004) loss 0.7686 (1.1683) acc 75.0000 (70.8625) lr 1.9823e-03 eta 8:39:45 +epoch [5/50] batch [255/500] time 1.363 (1.371) data 0.000 (0.004) loss 1.1035 (1.1694) acc 78.1250 (70.8578) lr 1.9823e-03 eta 8:39:36 +epoch [5/50] batch [260/500] time 1.353 (1.370) data 0.000 (0.004) loss 1.4131 (1.1704) acc 68.7500 (70.8293) lr 1.9823e-03 eta 8:39:23 +epoch [5/50] batch [265/500] time 1.356 (1.370) data 0.001 (0.004) loss 1.2656 (1.1722) acc 62.5000 (70.7901) lr 1.9823e-03 eta 8:39:11 +epoch [5/50] batch [270/500] time 1.357 (1.370) data 0.000 (0.004) loss 1.2275 (1.1733) acc 71.8750 (70.8333) lr 1.9823e-03 eta 8:39:00 +epoch [5/50] batch [275/500] time 1.367 (1.370) data 0.000 (0.004) loss 1.5166 (1.1716) acc 75.0000 (70.9545) lr 1.9823e-03 eta 8:38:47 +epoch [5/50] batch [280/500] time 1.356 (1.370) data 0.000 (0.004) loss 1.2080 (1.1723) acc 62.5000 (70.9152) lr 1.9823e-03 eta 8:38:42 +epoch [5/50] batch [285/500] time 1.360 (1.370) data 0.000 (0.004) loss 0.8892 (1.1675) acc 81.2500 (71.0307) lr 1.9823e-03 eta 8:38:32 +epoch [5/50] batch [290/500] time 1.350 (1.370) data 0.000 (0.004) loss 0.9507 (1.1628) acc 75.0000 (71.1853) lr 1.9823e-03 eta 8:38:23 +epoch [5/50] batch [295/500] time 1.367 (1.369) data 0.000 (0.004) loss 1.0732 (1.1632) acc 65.6250 (71.1441) lr 1.9823e-03 eta 8:38:11 +epoch [5/50] batch [300/500] time 1.363 (1.369) data 0.000 (0.004) loss 1.3555 (1.1649) acc 62.5000 (71.0938) lr 1.9823e-03 eta 8:37:59 +epoch [5/50] batch [305/500] time 1.373 (1.369) data 0.000 (0.004) loss 1.0684 (1.1619) acc 78.1250 (71.1475) lr 1.9823e-03 eta 8:37:51 +epoch [5/50] batch [310/500] time 1.363 (1.369) data 0.000 (0.004) loss 1.3799 (1.1616) acc 62.5000 (71.1593) lr 1.9823e-03 eta 8:37:53 +epoch [5/50] batch [315/500] time 1.355 (1.369) data 0.000 (0.004) loss 0.8481 (1.1611) acc 81.2500 (71.1706) lr 1.9823e-03 eta 8:37:39 +epoch [5/50] batch [320/500] time 1.368 (1.369) data 0.000 (0.004) loss 1.1475 (1.1617) acc 65.6250 (71.0742) lr 1.9823e-03 eta 8:37:31 +epoch [5/50] batch [325/500] time 1.364 (1.369) data 0.000 (0.003) loss 1.4971 (1.1636) acc 62.5000 (71.0385) lr 1.9823e-03 eta 8:37:19 +epoch [5/50] batch [330/500] time 1.361 (1.369) data 0.000 (0.003) loss 1.5264 (1.1665) acc 62.5000 (70.9659) lr 1.9823e-03 eta 8:37:08 +epoch [5/50] batch [335/500] time 1.352 (1.368) data 0.000 (0.003) loss 0.6880 (1.1656) acc 84.3750 (71.0075) lr 1.9823e-03 eta 8:36:56 +epoch [5/50] batch [340/500] time 1.360 (1.368) data 0.000 (0.003) loss 0.9697 (1.1651) acc 75.0000 (71.0018) lr 1.9823e-03 eta 8:36:48 +epoch [5/50] batch [345/500] time 1.352 (1.368) data 0.000 (0.003) loss 1.4688 (1.1669) acc 65.6250 (70.9511) lr 1.9823e-03 eta 8:36:34 +epoch [5/50] batch [350/500] time 1.348 (1.368) data 0.001 (0.003) loss 1.4121 (1.1651) acc 62.5000 (70.9643) lr 1.9823e-03 eta 8:36:22 +epoch [5/50] batch [355/500] time 1.374 (1.368) data 0.000 (0.003) loss 1.1377 (1.1701) acc 75.0000 (70.8715) lr 1.9823e-03 eta 8:36:27 +epoch [5/50] batch [360/500] time 1.352 (1.368) data 0.000 (0.003) loss 1.0410 (1.1689) acc 81.2500 (70.9028) lr 1.9823e-03 eta 8:36:18 +epoch [5/50] batch [365/500] time 1.350 (1.368) data 0.000 (0.003) loss 1.2412 (1.1720) acc 71.8750 (70.8390) lr 1.9823e-03 eta 8:36:08 +epoch [5/50] batch [370/500] time 1.366 (1.368) data 0.000 (0.003) loss 0.7959 (1.1690) acc 78.1250 (70.9375) lr 1.9823e-03 eta 8:36:02 +epoch [5/50] batch [375/500] time 1.360 (1.368) data 0.001 (0.003) loss 1.0801 (1.1661) acc 78.1250 (70.9833) lr 1.9823e-03 eta 8:35:53 +epoch [5/50] batch [380/500] time 1.348 (1.368) data 0.000 (0.003) loss 0.8696 (1.1634) acc 75.0000 (71.0362) lr 1.9823e-03 eta 8:35:46 +epoch [5/50] batch [385/500] time 1.374 (1.368) data 0.000 (0.003) loss 1.0156 (1.1619) acc 68.7500 (71.0795) lr 1.9823e-03 eta 8:35:37 +epoch [5/50] batch [390/500] time 1.364 (1.368) data 0.000 (0.003) loss 1.4287 (1.1657) acc 71.8750 (71.0897) lr 1.9823e-03 eta 8:35:30 +epoch [5/50] batch [395/500] time 1.364 (1.368) data 0.000 (0.003) loss 1.1514 (1.1665) acc 75.0000 (71.0839) lr 1.9823e-03 eta 8:35:22 +epoch [5/50] batch [400/500] time 1.380 (1.368) data 0.000 (0.003) loss 0.9199 (1.1639) acc 68.7500 (71.0781) lr 1.9823e-03 eta 8:35:13 +epoch [5/50] batch [405/500] time 1.361 (1.368) data 0.000 (0.003) loss 1.7090 (1.1636) acc 62.5000 (71.0417) lr 1.9823e-03 eta 8:35:03 +epoch [5/50] batch [410/500] time 1.360 (1.368) data 0.000 (0.003) loss 0.8296 (1.1652) acc 71.8750 (70.9375) lr 1.9823e-03 eta 8:34:57 +epoch [5/50] batch [415/500] time 1.354 (1.368) data 0.000 (0.003) loss 1.8838 (1.1685) acc 65.6250 (70.9111) lr 1.9823e-03 eta 8:34:49 +epoch [5/50] batch [420/500] time 1.373 (1.368) data 0.000 (0.003) loss 0.7759 (1.1671) acc 84.3750 (70.9226) lr 1.9823e-03 eta 8:34:38 +epoch [5/50] batch [425/500] time 1.372 (1.368) data 0.000 (0.003) loss 1.2197 (1.1669) acc 68.7500 (70.9265) lr 1.9823e-03 eta 8:34:31 +epoch [5/50] batch [430/500] time 1.357 (1.367) data 0.000 (0.003) loss 0.9966 (1.1662) acc 78.1250 (70.9593) lr 1.9823e-03 eta 8:34:23 +epoch [5/50] batch [435/500] time 1.378 (1.368) data 0.000 (0.003) loss 1.0303 (1.1631) acc 68.7500 (70.9626) lr 1.9823e-03 eta 8:34:18 +epoch [5/50] batch [440/500] time 1.339 (1.367) data 0.000 (0.003) loss 1.1143 (1.1630) acc 75.0000 (70.9446) lr 1.9823e-03 eta 8:34:09 +epoch [5/50] batch [445/500] time 1.362 (1.367) data 0.000 (0.003) loss 1.0410 (1.1662) acc 75.0000 (70.8848) lr 1.9823e-03 eta 8:34:01 +epoch [5/50] batch [450/500] time 1.508 (1.368) data 0.000 (0.003) loss 1.1211 (1.1667) acc 81.2500 (70.9028) lr 1.9823e-03 eta 8:34:00 +epoch [5/50] batch [455/500] time 1.362 (1.368) data 0.000 (0.003) loss 1.4160 (1.1665) acc 68.7500 (70.9135) lr 1.9823e-03 eta 8:33:50 +epoch [5/50] batch [460/500] time 1.372 (1.367) data 0.000 (0.003) loss 1.2441 (1.1677) acc 65.6250 (70.9035) lr 1.9823e-03 eta 8:33:41 +epoch [5/50] batch [465/500] time 1.356 (1.367) data 0.001 (0.003) loss 0.7437 (1.1650) acc 84.3750 (70.9879) lr 1.9823e-03 eta 8:33:32 +epoch [5/50] batch [470/500] time 1.355 (1.367) data 0.000 (0.003) loss 1.0664 (1.1642) acc 84.3750 (70.9973) lr 1.9823e-03 eta 8:33:24 +epoch [5/50] batch [475/500] time 1.333 (1.367) data 0.000 (0.003) loss 1.1924 (1.1654) acc 65.6250 (70.9868) lr 1.9823e-03 eta 8:33:16 +epoch [5/50] batch [480/500] time 1.354 (1.367) data 0.000 (0.002) loss 0.7373 (1.1649) acc 78.1250 (70.9831) lr 1.9823e-03 eta 8:33:07 +epoch [5/50] batch [485/500] time 1.371 (1.367) data 0.001 (0.002) loss 1.7305 (1.1664) acc 56.2500 (70.9601) lr 1.9823e-03 eta 8:32:57 +epoch [5/50] batch [490/500] time 1.366 (1.367) data 0.000 (0.002) loss 1.9844 (1.1671) acc 53.1250 (70.9566) lr 1.9823e-03 eta 8:32:47 +epoch [5/50] batch [495/500] time 1.349 (1.367) data 0.000 (0.002) loss 1.2090 (1.1682) acc 78.1250 (70.9533) lr 1.9823e-03 eta 8:32:42 +epoch [5/50] batch [500/500] time 1.360 (1.367) data 0.000 (0.002) loss 1.4639 (1.1698) acc 59.3750 (70.9062) lr 1.9686e-03 eta 8:32:33 +epoch [6/50] batch [5/500] time 1.363 (1.550) data 0.000 (0.174) loss 1.1025 (0.9729) acc 62.5000 (73.1250) lr 1.9686e-03 eta 9:41:09 +epoch [6/50] batch [10/500] time 1.367 (1.462) data 0.001 (0.087) loss 0.6235 (1.0377) acc 81.2500 (74.0625) lr 1.9686e-03 eta 9:08:00 +epoch [6/50] batch [15/500] time 1.377 (1.428) data 0.000 (0.058) loss 1.6924 (1.0965) acc 71.8750 (72.9167) lr 1.9686e-03 eta 8:55:04 +epoch [6/50] batch [20/500] time 1.351 (1.411) data 0.000 (0.044) loss 1.3037 (1.1763) acc 68.7500 (71.5625) lr 1.9686e-03 eta 8:48:39 +epoch [6/50] batch [25/500] time 1.363 (1.399) data 0.001 (0.035) loss 1.0293 (1.2121) acc 71.8750 (71.0000) lr 1.9686e-03 eta 8:44:12 +epoch [6/50] batch [30/500] time 1.392 (1.395) data 0.000 (0.029) loss 2.1270 (1.2396) acc 46.8750 (70.6250) lr 1.9686e-03 eta 8:42:17 +epoch [6/50] batch [35/500] time 1.364 (1.390) data 0.001 (0.025) loss 0.5630 (1.2199) acc 81.2500 (71.0714) lr 1.9686e-03 eta 8:40:30 +epoch [6/50] batch [40/500] time 1.357 (1.393) data 0.000 (0.022) loss 1.1768 (1.2113) acc 71.8750 (70.9375) lr 1.9686e-03 eta 8:41:25 +epoch [6/50] batch [45/500] time 1.378 (1.388) data 0.001 (0.020) loss 1.0107 (1.2088) acc 84.3750 (71.0417) lr 1.9686e-03 eta 8:39:36 +epoch [6/50] batch [50/500] time 1.365 (1.385) data 0.000 (0.018) loss 1.7646 (1.2059) acc 65.6250 (71.1875) lr 1.9686e-03 eta 8:38:22 +epoch [6/50] batch [55/500] time 1.360 (1.383) data 0.000 (0.016) loss 1.1045 (1.1906) acc 81.2500 (71.8182) lr 1.9686e-03 eta 8:37:22 +epoch [6/50] batch [60/500] time 1.344 (1.381) data 0.000 (0.015) loss 0.7998 (1.1748) acc 71.8750 (72.0312) lr 1.9686e-03 eta 8:36:23 +epoch [6/50] batch [65/500] time 1.349 (1.379) data 0.001 (0.014) loss 0.7051 (1.1903) acc 84.3750 (71.9712) lr 1.9686e-03 eta 8:35:37 +epoch [6/50] batch [70/500] time 1.371 (1.378) data 0.001 (0.013) loss 1.0566 (1.2028) acc 71.8750 (71.5625) lr 1.9686e-03 eta 8:35:02 +epoch [6/50] batch [75/500] time 1.361 (1.377) data 0.000 (0.012) loss 1.6992 (1.2001) acc 56.2500 (71.3333) lr 1.9686e-03 eta 8:34:30 +epoch [6/50] batch [80/500] time 1.362 (1.375) data 0.001 (0.011) loss 1.0986 (1.1971) acc 71.8750 (71.2500) lr 1.9686e-03 eta 8:33:51 +epoch [6/50] batch [85/500] time 1.356 (1.375) data 0.000 (0.011) loss 1.4736 (1.2062) acc 65.6250 (70.9926) lr 1.9686e-03 eta 8:33:41 +epoch [6/50] batch [90/500] time 1.340 (1.374) data 0.000 (0.010) loss 0.7925 (1.1913) acc 78.1250 (71.2500) lr 1.9686e-03 eta 8:33:12 +epoch [6/50] batch [95/500] time 1.340 (1.373) data 0.000 (0.010) loss 1.1299 (1.1843) acc 68.7500 (71.5461) lr 1.9686e-03 eta 8:32:40 +epoch [6/50] batch [100/500] time 1.342 (1.372) data 0.000 (0.009) loss 1.0674 (1.1898) acc 75.0000 (71.3750) lr 1.9686e-03 eta 8:32:19 +epoch [6/50] batch [105/500] time 1.343 (1.371) data 0.000 (0.009) loss 0.3960 (1.1822) acc 87.5000 (71.6667) lr 1.9686e-03 eta 8:31:52 +epoch [6/50] batch [110/500] time 1.361 (1.371) data 0.000 (0.008) loss 1.4951 (1.1849) acc 59.3750 (71.5057) lr 1.9686e-03 eta 8:31:28 +epoch [6/50] batch [115/500] time 1.348 (1.370) data 0.000 (0.008) loss 0.6533 (1.1865) acc 81.2500 (71.5217) lr 1.9686e-03 eta 8:31:04 +epoch [6/50] batch [120/500] time 1.360 (1.370) data 0.000 (0.008) loss 1.0781 (1.1863) acc 71.8750 (71.4583) lr 1.9686e-03 eta 8:30:52 +epoch [6/50] batch [125/500] time 1.374 (1.370) data 0.000 (0.007) loss 1.1504 (1.1883) acc 65.6250 (71.4250) lr 1.9686e-03 eta 8:30:47 +epoch [6/50] batch [130/500] time 1.373 (1.370) data 0.000 (0.007) loss 1.4258 (1.1896) acc 68.7500 (71.3462) lr 1.9686e-03 eta 8:30:37 +epoch [6/50] batch [135/500] time 1.355 (1.369) data 0.000 (0.007) loss 0.9561 (1.1845) acc 78.1250 (71.6204) lr 1.9686e-03 eta 8:30:14 +epoch [6/50] batch [140/500] time 1.359 (1.369) data 0.000 (0.007) loss 1.2422 (1.1805) acc 71.8750 (71.7188) lr 1.9686e-03 eta 8:30:20 +epoch [6/50] batch [145/500] time 1.351 (1.369) data 0.000 (0.006) loss 1.5293 (1.1872) acc 65.6250 (71.5948) lr 1.9686e-03 eta 8:29:52 +epoch [6/50] batch [150/500] time 1.377 (1.368) data 0.000 (0.006) loss 1.2734 (1.1859) acc 65.6250 (71.5417) lr 1.9686e-03 eta 8:29:40 +epoch [6/50] batch [155/500] time 1.372 (1.368) data 0.000 (0.006) loss 0.9819 (1.1779) acc 68.7500 (71.5726) lr 1.9686e-03 eta 8:29:28 +epoch [6/50] batch [160/500] time 1.349 (1.368) data 0.000 (0.006) loss 1.0889 (1.1788) acc 75.0000 (71.6211) lr 1.9686e-03 eta 8:29:21 +epoch [6/50] batch [165/500] time 1.365 (1.368) data 0.001 (0.006) loss 0.9995 (1.1698) acc 75.0000 (71.7992) lr 1.9686e-03 eta 8:29:06 +epoch [6/50] batch [170/500] time 1.346 (1.367) data 0.001 (0.005) loss 1.3066 (1.1733) acc 75.0000 (71.7831) lr 1.9686e-03 eta 8:28:51 +epoch [6/50] batch [175/500] time 1.357 (1.367) data 0.000 (0.005) loss 0.9326 (1.1742) acc 75.0000 (71.6786) lr 1.9686e-03 eta 8:28:37 +epoch [6/50] batch [180/500] time 1.363 (1.367) data 0.000 (0.005) loss 0.9634 (1.1783) acc 71.8750 (71.4757) lr 1.9686e-03 eta 8:28:23 +epoch [6/50] batch [185/500] time 1.353 (1.367) data 0.000 (0.005) loss 1.0752 (1.1758) acc 68.7500 (71.5203) lr 1.9686e-03 eta 8:28:33 +epoch [6/50] batch [190/500] time 1.361 (1.367) data 0.000 (0.005) loss 0.7446 (1.1762) acc 78.1250 (71.4967) lr 1.9686e-03 eta 8:28:18 +epoch [6/50] batch [195/500] time 1.374 (1.367) data 0.000 (0.005) loss 0.8159 (1.1732) acc 75.0000 (71.4744) lr 1.9686e-03 eta 8:28:10 +epoch [6/50] batch [200/500] time 1.353 (1.367) data 0.000 (0.005) loss 1.3730 (1.1748) acc 68.7500 (71.5000) lr 1.9686e-03 eta 8:27:56 +epoch [6/50] batch [205/500] time 1.369 (1.367) data 0.000 (0.005) loss 1.5283 (1.1782) acc 65.6250 (71.3872) lr 1.9686e-03 eta 8:27:54 +epoch [6/50] batch [210/500] time 1.365 (1.367) data 0.001 (0.005) loss 1.0674 (1.1719) acc 71.8750 (71.4732) lr 1.9686e-03 eta 8:27:45 +epoch [6/50] batch [215/500] time 1.372 (1.367) data 0.000 (0.004) loss 0.8706 (1.1667) acc 71.8750 (71.6134) lr 1.9686e-03 eta 8:27:36 +epoch [6/50] batch [220/500] time 1.369 (1.367) data 0.000 (0.004) loss 1.4541 (1.1665) acc 71.8750 (71.7188) lr 1.9686e-03 eta 8:27:27 +epoch [6/50] batch [225/500] time 1.368 (1.367) data 0.000 (0.004) loss 1.1221 (1.1671) acc 65.6250 (71.6250) lr 1.9686e-03 eta 8:27:18 +epoch [6/50] batch [230/500] time 1.364 (1.366) data 0.001 (0.004) loss 0.7476 (1.1667) acc 75.0000 (71.4810) lr 1.9686e-03 eta 8:27:10 +epoch [6/50] batch [235/500] time 1.374 (1.366) data 0.000 (0.004) loss 1.2656 (1.1682) acc 68.7500 (71.4894) lr 1.9686e-03 eta 8:27:01 +epoch [6/50] batch [240/500] time 1.353 (1.366) data 0.000 (0.004) loss 1.5029 (1.1721) acc 65.6250 (71.4453) lr 1.9686e-03 eta 8:26:49 +epoch [6/50] batch [245/500] time 1.359 (1.366) data 0.000 (0.004) loss 1.3018 (1.1728) acc 62.5000 (71.3648) lr 1.9686e-03 eta 8:26:39 +epoch [6/50] batch [250/500] time 1.364 (1.366) data 0.000 (0.004) loss 0.7749 (1.1763) acc 75.0000 (71.3000) lr 1.9686e-03 eta 8:26:28 +epoch [6/50] batch [255/500] time 1.352 (1.366) data 0.000 (0.004) loss 1.5020 (1.1706) acc 62.5000 (71.3848) lr 1.9686e-03 eta 8:26:17 +epoch [6/50] batch [260/500] time 1.385 (1.366) data 0.000 (0.004) loss 1.6084 (1.1749) acc 65.6250 (71.3101) lr 1.9686e-03 eta 8:26:09 +epoch [6/50] batch [265/500] time 1.362 (1.365) data 0.000 (0.004) loss 1.3506 (1.1730) acc 65.6250 (71.3090) lr 1.9686e-03 eta 8:26:00 +epoch [6/50] batch [270/500] time 1.373 (1.365) data 0.000 (0.004) loss 0.9097 (1.1700) acc 71.8750 (71.3542) lr 1.9686e-03 eta 8:25:54 +epoch [6/50] batch [275/500] time 1.365 (1.365) data 0.000 (0.004) loss 1.2432 (1.1665) acc 75.0000 (71.5000) lr 1.9686e-03 eta 8:25:47 +epoch [6/50] batch [280/500] time 1.491 (1.366) data 0.000 (0.004) loss 0.6104 (1.1669) acc 84.3750 (71.4955) lr 1.9686e-03 eta 8:25:47 +epoch [6/50] batch [285/500] time 1.338 (1.366) data 0.000 (0.003) loss 0.9531 (1.1678) acc 75.0000 (71.4693) lr 1.9686e-03 eta 8:25:37 +epoch [6/50] batch [290/500] time 1.347 (1.365) data 0.000 (0.003) loss 0.7812 (1.1608) acc 75.0000 (71.6272) lr 1.9686e-03 eta 8:25:26 +epoch [6/50] batch [295/500] time 1.370 (1.365) data 0.000 (0.003) loss 0.8340 (1.1613) acc 75.0000 (71.5466) lr 1.9686e-03 eta 8:25:20 +epoch [6/50] batch [300/500] time 1.351 (1.365) data 0.000 (0.003) loss 1.4531 (1.1602) acc 59.3750 (71.5521) lr 1.9686e-03 eta 8:25:08 +epoch [6/50] batch [305/500] time 1.334 (1.365) data 0.000 (0.003) loss 0.8862 (1.1606) acc 78.1250 (71.6393) lr 1.9686e-03 eta 8:24:59 +epoch [6/50] batch [310/500] time 1.362 (1.365) data 0.000 (0.003) loss 1.0635 (1.1568) acc 78.1250 (71.6734) lr 1.9686e-03 eta 8:24:49 +epoch [6/50] batch [315/500] time 1.344 (1.365) data 0.000 (0.003) loss 1.1055 (1.1572) acc 71.8750 (71.6667) lr 1.9686e-03 eta 8:24:39 +epoch [6/50] batch [320/500] time 1.342 (1.365) data 0.000 (0.003) loss 0.7222 (1.1568) acc 78.1250 (71.6992) lr 1.9686e-03 eta 8:24:26 +epoch [6/50] batch [325/500] time 1.362 (1.365) data 0.000 (0.003) loss 0.9795 (1.1585) acc 75.0000 (71.7019) lr 1.9686e-03 eta 8:24:28 +epoch [6/50] batch [330/500] time 1.365 (1.365) data 0.000 (0.003) loss 1.1650 (1.1571) acc 65.6250 (71.6667) lr 1.9686e-03 eta 8:24:19 +epoch [6/50] batch [335/500] time 1.362 (1.365) data 0.000 (0.003) loss 0.9634 (1.1610) acc 78.1250 (71.6604) lr 1.9686e-03 eta 8:24:12 +epoch [6/50] batch [340/500] time 1.359 (1.365) data 0.001 (0.003) loss 0.8813 (1.1583) acc 68.7500 (71.7096) lr 1.9686e-03 eta 8:24:05 +epoch [6/50] batch [345/500] time 1.360 (1.365) data 0.000 (0.003) loss 1.1982 (1.1569) acc 59.3750 (71.6486) lr 1.9686e-03 eta 8:23:56 +epoch [6/50] batch [350/500] time 1.359 (1.365) data 0.000 (0.003) loss 0.5576 (1.1551) acc 81.2500 (71.6607) lr 1.9686e-03 eta 8:23:48 +epoch [6/50] batch [355/500] time 1.352 (1.365) data 0.000 (0.003) loss 1.3701 (1.1551) acc 68.7500 (71.6285) lr 1.9686e-03 eta 8:23:38 +epoch [6/50] batch [360/500] time 1.375 (1.365) data 0.001 (0.003) loss 0.5801 (1.1543) acc 81.2500 (71.6319) lr 1.9686e-03 eta 8:23:33 +epoch [6/50] batch [365/500] time 1.373 (1.365) data 0.000 (0.003) loss 1.2314 (1.1558) acc 68.7500 (71.6010) lr 1.9686e-03 eta 8:23:26 +epoch [6/50] batch [370/500] time 1.349 (1.365) data 0.000 (0.003) loss 0.6265 (1.1543) acc 84.3750 (71.6639) lr 1.9686e-03 eta 8:23:20 +epoch [6/50] batch [375/500] time 1.354 (1.365) data 0.000 (0.003) loss 1.5273 (1.1572) acc 71.8750 (71.6417) lr 1.9686e-03 eta 8:23:11 +epoch [6/50] batch [380/500] time 1.364 (1.365) data 0.000 (0.003) loss 1.7529 (1.1583) acc 53.1250 (71.5954) lr 1.9686e-03 eta 8:23:07 +epoch [6/50] batch [385/500] time 1.349 (1.365) data 0.000 (0.003) loss 0.7988 (1.1567) acc 75.0000 (71.6640) lr 1.9686e-03 eta 8:22:57 +epoch [6/50] batch [390/500] time 1.355 (1.364) data 0.000 (0.003) loss 1.2090 (1.1566) acc 62.5000 (71.6186) lr 1.9686e-03 eta 8:22:48 +epoch [6/50] batch [395/500] time 1.360 (1.364) data 0.000 (0.003) loss 1.0293 (1.1551) acc 81.2500 (71.6614) lr 1.9686e-03 eta 8:22:39 +epoch [6/50] batch [400/500] time 1.358 (1.364) data 0.000 (0.003) loss 1.1992 (1.1572) acc 71.8750 (71.6094) lr 1.9686e-03 eta 8:22:31 +epoch [6/50] batch [405/500] time 1.356 (1.364) data 0.000 (0.003) loss 1.1914 (1.1554) acc 71.8750 (71.5818) lr 1.9686e-03 eta 8:22:22 +epoch [6/50] batch [410/500] time 1.336 (1.364) data 0.000 (0.003) loss 1.3389 (1.1557) acc 71.8750 (71.6006) lr 1.9686e-03 eta 8:22:11 +epoch [6/50] batch [415/500] time 1.361 (1.364) data 0.000 (0.002) loss 1.0771 (1.1552) acc 71.8750 (71.5813) lr 1.9686e-03 eta 8:22:03 +epoch [6/50] batch [420/500] time 1.354 (1.364) data 0.000 (0.002) loss 1.2578 (1.1559) acc 68.7500 (71.5327) lr 1.9686e-03 eta 8:21:54 +epoch [6/50] batch [425/500] time 1.350 (1.364) data 0.000 (0.002) loss 0.9814 (1.1594) acc 68.7500 (71.4044) lr 1.9686e-03 eta 8:21:51 +epoch [6/50] batch [430/500] time 1.361 (1.364) data 0.000 (0.002) loss 1.2266 (1.1617) acc 68.7500 (71.3663) lr 1.9686e-03 eta 8:21:42 +epoch [6/50] batch [435/500] time 1.367 (1.364) data 0.000 (0.002) loss 1.0508 (1.1603) acc 68.7500 (71.3865) lr 1.9686e-03 eta 8:21:35 +epoch [6/50] batch [440/500] time 1.373 (1.364) data 0.000 (0.002) loss 2.2227 (1.1626) acc 43.7500 (71.3494) lr 1.9686e-03 eta 8:21:26 +epoch [6/50] batch [445/500] time 1.373 (1.364) data 0.000 (0.002) loss 1.4199 (1.1633) acc 59.3750 (71.3483) lr 1.9686e-03 eta 8:21:21 +epoch [6/50] batch [450/500] time 1.345 (1.364) data 0.000 (0.002) loss 1.5449 (1.1643) acc 59.3750 (71.3333) lr 1.9686e-03 eta 8:21:12 +epoch [6/50] batch [455/500] time 1.363 (1.364) data 0.001 (0.002) loss 1.2617 (1.1649) acc 65.6250 (71.2637) lr 1.9686e-03 eta 8:21:03 +epoch [6/50] batch [460/500] time 1.361 (1.364) data 0.000 (0.002) loss 1.4922 (1.1656) acc 59.3750 (71.1957) lr 1.9686e-03 eta 8:20:55 +epoch [6/50] batch [465/500] time 1.377 (1.364) data 0.000 (0.002) loss 1.0693 (1.1647) acc 68.7500 (71.1962) lr 1.9686e-03 eta 8:20:49 +epoch [6/50] batch [470/500] time 1.377 (1.364) data 0.000 (0.002) loss 1.7588 (1.1659) acc 59.3750 (71.1436) lr 1.9686e-03 eta 8:20:48 +epoch [6/50] batch [475/500] time 1.365 (1.364) data 0.000 (0.002) loss 0.7280 (1.1648) acc 81.2500 (71.1447) lr 1.9686e-03 eta 8:20:39 +epoch [6/50] batch [480/500] time 1.359 (1.364) data 0.000 (0.002) loss 1.4824 (1.1658) acc 68.7500 (71.1523) lr 1.9686e-03 eta 8:20:30 +epoch [6/50] batch [485/500] time 1.382 (1.364) data 0.001 (0.002) loss 1.0703 (1.1638) acc 71.8750 (71.1985) lr 1.9686e-03 eta 8:20:23 +epoch [6/50] batch [490/500] time 1.372 (1.364) data 0.000 (0.002) loss 0.9204 (1.1634) acc 84.3750 (71.2628) lr 1.9686e-03 eta 8:20:15 +epoch [6/50] batch [495/500] time 1.363 (1.364) data 0.000 (0.002) loss 1.0244 (1.1652) acc 68.7500 (71.2247) lr 1.9686e-03 eta 8:20:07 +epoch [6/50] batch [500/500] time 1.359 (1.364) data 0.000 (0.002) loss 1.6836 (1.1684) acc 59.3750 (71.1688) lr 1.9511e-03 eta 8:20:01 +epoch [7/50] batch [5/500] time 1.365 (1.541) data 0.000 (0.168) loss 1.0840 (1.2078) acc 78.1250 (69.3750) lr 1.9511e-03 eta 9:24:52 +epoch [7/50] batch [10/500] time 1.394 (1.475) data 0.000 (0.084) loss 1.3223 (1.2299) acc 68.7500 (68.7500) lr 1.9511e-03 eta 9:00:26 +epoch [7/50] batch [15/500] time 1.340 (1.434) data 0.000 (0.056) loss 1.4238 (1.2465) acc 62.5000 (68.5417) lr 1.9511e-03 eta 8:45:33 +epoch [7/50] batch [20/500] time 1.353 (1.418) data 0.000 (0.042) loss 1.3154 (1.1948) acc 75.0000 (70.0000) lr 1.9511e-03 eta 8:39:36 +epoch [7/50] batch [25/500] time 1.342 (1.406) data 0.000 (0.034) loss 0.6357 (1.1493) acc 87.5000 (70.6250) lr 1.9511e-03 eta 8:34:59 +epoch [7/50] batch [30/500] time 1.360 (1.399) data 0.000 (0.028) loss 0.7021 (1.1367) acc 78.1250 (70.9375) lr 1.9511e-03 eta 8:32:08 +epoch [7/50] batch [35/500] time 1.358 (1.394) data 0.001 (0.024) loss 1.0264 (1.1274) acc 71.8750 (70.8036) lr 1.9511e-03 eta 8:30:19 +epoch [7/50] batch [40/500] time 1.372 (1.389) data 0.000 (0.021) loss 1.8447 (1.1553) acc 68.7500 (70.7031) lr 1.9511e-03 eta 8:28:30 +epoch [7/50] batch [45/500] time 1.356 (1.386) data 0.000 (0.019) loss 1.0879 (1.1509) acc 81.2500 (71.0417) lr 1.9511e-03 eta 8:27:15 +epoch [7/50] batch [50/500] time 1.383 (1.385) data 0.000 (0.017) loss 1.5684 (1.1540) acc 59.3750 (70.8750) lr 1.9511e-03 eta 8:26:36 +epoch [7/50] batch [55/500] time 1.371 (1.384) data 0.000 (0.016) loss 1.2139 (1.1649) acc 56.2500 (69.9432) lr 1.9511e-03 eta 8:26:21 +epoch [7/50] batch [60/500] time 1.386 (1.384) data 0.000 (0.014) loss 1.1162 (1.1829) acc 75.0000 (69.6875) lr 1.9511e-03 eta 8:26:00 +epoch [7/50] batch [65/500] time 1.365 (1.382) data 0.000 (0.013) loss 1.3359 (1.2015) acc 62.5000 (69.2308) lr 1.9511e-03 eta 8:25:09 +epoch [7/50] batch [70/500] time 1.364 (1.380) data 0.000 (0.012) loss 1.0967 (1.2141) acc 56.2500 (68.4375) lr 1.9511e-03 eta 8:24:30 +epoch [7/50] batch [75/500] time 1.375 (1.379) data 0.000 (0.012) loss 1.4150 (1.2211) acc 62.5000 (68.3333) lr 1.9511e-03 eta 8:23:59 +epoch [7/50] batch [80/500] time 1.366 (1.378) data 0.000 (0.011) loss 0.6182 (1.2036) acc 84.3750 (68.7891) lr 1.9511e-03 eta 8:23:31 +epoch [7/50] batch [85/500] time 1.384 (1.378) data 0.000 (0.010) loss 1.0068 (1.2035) acc 65.6250 (68.8235) lr 1.9511e-03 eta 8:23:17 +epoch [7/50] batch [90/500] time 1.346 (1.377) data 0.000 (0.010) loss 1.6064 (1.2006) acc 68.7500 (69.2014) lr 1.9511e-03 eta 8:22:47 +epoch [7/50] batch [95/500] time 1.373 (1.376) data 0.000 (0.009) loss 1.2051 (1.2070) acc 81.2500 (69.4079) lr 1.9511e-03 eta 8:22:24 +epoch [7/50] batch [100/500] time 1.360 (1.375) data 0.000 (0.009) loss 1.5146 (1.2071) acc 65.6250 (69.6562) lr 1.9511e-03 eta 8:21:59 +epoch [7/50] batch [105/500] time 1.361 (1.375) data 0.000 (0.008) loss 1.0205 (1.2009) acc 75.0000 (69.6726) lr 1.9511e-03 eta 8:21:35 +epoch [7/50] batch [110/500] time 1.352 (1.375) data 0.000 (0.008) loss 1.4600 (1.1979) acc 65.6250 (69.6875) lr 1.9511e-03 eta 8:21:44 +epoch [7/50] batch [115/500] time 1.359 (1.374) data 0.000 (0.008) loss 0.7749 (1.1887) acc 75.0000 (70.0543) lr 1.9511e-03 eta 8:21:19 +epoch [7/50] batch [120/500] time 1.349 (1.373) data 0.000 (0.007) loss 1.4082 (1.1908) acc 65.6250 (70.0781) lr 1.9511e-03 eta 8:20:50 +epoch [7/50] batch [125/500] time 1.361 (1.373) data 0.000 (0.007) loss 0.4717 (1.1833) acc 84.3750 (70.2250) lr 1.9511e-03 eta 8:20:29 +epoch [7/50] batch [130/500] time 1.362 (1.372) data 0.000 (0.007) loss 1.9824 (1.1917) acc 50.0000 (69.9519) lr 1.9511e-03 eta 8:20:14 +epoch [7/50] batch [135/500] time 1.380 (1.372) data 0.000 (0.007) loss 0.8564 (1.1917) acc 71.8750 (69.9074) lr 1.9511e-03 eta 8:20:08 +epoch [7/50] batch [140/500] time 1.377 (1.372) data 0.000 (0.006) loss 1.0654 (1.1902) acc 75.0000 (70.0000) lr 1.9511e-03 eta 8:19:58 +epoch [7/50] batch [145/500] time 1.366 (1.372) data 0.000 (0.006) loss 0.8828 (1.1816) acc 81.2500 (70.2586) lr 1.9511e-03 eta 8:19:51 +epoch [7/50] batch [150/500] time 1.456 (1.372) data 0.000 (0.006) loss 0.6538 (1.1760) acc 81.2500 (70.3958) lr 1.9511e-03 eta 8:19:45 +epoch [7/50] batch [155/500] time 1.364 (1.372) data 0.000 (0.006) loss 1.5928 (1.1798) acc 62.5000 (70.4637) lr 1.9511e-03 eta 8:19:33 +epoch [7/50] batch [160/500] time 1.363 (1.372) data 0.000 (0.006) loss 1.1650 (1.1772) acc 59.3750 (70.3516) lr 1.9511e-03 eta 8:19:15 +epoch [7/50] batch [165/500] time 1.360 (1.371) data 0.000 (0.005) loss 1.0322 (1.1768) acc 75.0000 (70.4167) lr 1.9511e-03 eta 8:19:01 +epoch [7/50] batch [170/500] time 1.364 (1.371) data 0.000 (0.005) loss 1.0986 (1.1747) acc 65.6250 (70.4412) lr 1.9511e-03 eta 8:18:53 +epoch [7/50] batch [175/500] time 1.357 (1.371) data 0.000 (0.005) loss 1.4248 (1.1728) acc 68.7500 (70.4464) lr 1.9511e-03 eta 8:18:37 +epoch [7/50] batch [180/500] time 1.350 (1.370) data 0.000 (0.005) loss 1.2910 (1.1696) acc 71.8750 (70.4861) lr 1.9511e-03 eta 8:18:23 +epoch [7/50] batch [185/500] time 1.376 (1.370) data 0.000 (0.005) loss 0.9663 (1.1628) acc 81.2500 (70.6757) lr 1.9511e-03 eta 8:18:13 +epoch [7/50] batch [190/500] time 1.351 (1.370) data 0.000 (0.005) loss 1.2021 (1.1690) acc 81.2500 (70.5757) lr 1.9511e-03 eta 8:18:03 +epoch [7/50] batch [195/500] time 1.374 (1.370) data 0.000 (0.005) loss 0.7979 (1.1753) acc 81.2500 (70.6090) lr 1.9511e-03 eta 8:17:53 +epoch [7/50] batch [200/500] time 1.361 (1.370) data 0.000 (0.005) loss 1.2393 (1.1781) acc 59.3750 (70.4219) lr 1.9511e-03 eta 8:17:46 +epoch [7/50] batch [205/500] time 1.349 (1.370) data 0.000 (0.004) loss 2.0801 (1.1839) acc 59.3750 (70.3201) lr 1.9511e-03 eta 8:17:37 +epoch [7/50] batch [210/500] time 1.374 (1.370) data 0.000 (0.004) loss 1.1895 (1.1874) acc 62.5000 (70.2232) lr 1.9511e-03 eta 8:17:26 +epoch [7/50] batch [215/500] time 1.350 (1.369) data 0.000 (0.004) loss 0.6333 (1.1855) acc 87.5000 (70.2471) lr 1.9511e-03 eta 8:17:12 +epoch [7/50] batch [220/500] time 1.342 (1.369) data 0.000 (0.004) loss 1.4941 (1.1905) acc 75.0000 (70.2557) lr 1.9511e-03 eta 8:16:57 +epoch [7/50] batch [225/500] time 1.360 (1.369) data 0.000 (0.004) loss 0.8735 (1.1840) acc 78.1250 (70.4028) lr 1.9511e-03 eta 8:16:43 +epoch [7/50] batch [230/500] time 1.375 (1.369) data 0.000 (0.004) loss 1.0107 (1.1828) acc 78.1250 (70.5571) lr 1.9511e-03 eta 8:16:32 +epoch [7/50] batch [235/500] time 1.347 (1.368) data 0.000 (0.004) loss 1.2793 (1.1800) acc 56.2500 (70.4920) lr 1.9511e-03 eta 8:16:21 +epoch [7/50] batch [240/500] time 1.368 (1.368) data 0.000 (0.004) loss 1.8057 (1.1768) acc 65.6250 (70.5469) lr 1.9511e-03 eta 8:16:13 +epoch [7/50] batch [245/500] time 1.360 (1.368) data 0.000 (0.004) loss 1.0547 (1.1828) acc 71.8750 (70.4464) lr 1.9511e-03 eta 8:16:03 +epoch [7/50] batch [250/500] time 1.379 (1.369) data 0.001 (0.004) loss 1.2031 (1.1866) acc 68.7500 (70.3625) lr 1.9511e-03 eta 8:16:05 +epoch [7/50] batch [255/500] time 1.385 (1.369) data 0.000 (0.004) loss 1.1406 (1.1845) acc 65.6250 (70.3186) lr 1.9511e-03 eta 8:16:03 +epoch [7/50] batch [260/500] time 1.376 (1.369) data 0.001 (0.004) loss 1.3672 (1.1879) acc 65.6250 (70.3365) lr 1.9511e-03 eta 8:16:02 +epoch [7/50] batch [265/500] time 1.377 (1.369) data 0.000 (0.004) loss 0.8809 (1.1850) acc 78.1250 (70.4009) lr 1.9511e-03 eta 8:15:55 +epoch [7/50] batch [270/500] time 1.364 (1.369) data 0.000 (0.003) loss 1.3369 (1.1888) acc 71.8750 (70.3935) lr 1.9511e-03 eta 8:15:47 +epoch [7/50] batch [275/500] time 1.361 (1.369) data 0.000 (0.003) loss 1.1445 (1.1851) acc 65.6250 (70.4659) lr 1.9511e-03 eta 8:15:37 +epoch [7/50] batch [280/500] time 1.373 (1.369) data 0.001 (0.003) loss 1.0137 (1.1826) acc 65.6250 (70.4911) lr 1.9511e-03 eta 8:15:32 +epoch [7/50] batch [285/500] time 1.383 (1.369) data 0.000 (0.003) loss 1.0420 (1.1848) acc 65.6250 (70.4934) lr 1.9511e-03 eta 8:15:24 +epoch [7/50] batch [290/500] time 1.349 (1.369) data 0.000 (0.003) loss 1.2686 (1.1815) acc 68.7500 (70.4957) lr 1.9511e-03 eta 8:15:14 +epoch [7/50] batch [295/500] time 1.359 (1.369) data 0.000 (0.003) loss 0.7778 (1.1771) acc 81.2500 (70.6038) lr 1.9511e-03 eta 8:15:12 +epoch [7/50] batch [300/500] time 1.363 (1.369) data 0.000 (0.003) loss 1.0879 (1.1766) acc 71.8750 (70.6562) lr 1.9511e-03 eta 8:15:03 +epoch [7/50] batch [305/500] time 1.374 (1.369) data 0.001 (0.003) loss 1.1777 (1.1720) acc 71.8750 (70.7480) lr 1.9511e-03 eta 8:14:53 +epoch [7/50] batch [310/500] time 1.374 (1.369) data 0.000 (0.003) loss 1.1865 (1.1716) acc 78.1250 (70.7359) lr 1.9511e-03 eta 8:14:47 +epoch [7/50] batch [315/500] time 1.353 (1.369) data 0.000 (0.003) loss 0.8892 (1.1678) acc 81.2500 (70.8234) lr 1.9511e-03 eta 8:14:39 +epoch [7/50] batch [320/500] time 1.379 (1.369) data 0.000 (0.003) loss 0.8164 (1.1658) acc 81.2500 (70.8984) lr 1.9511e-03 eta 8:14:33 +epoch [7/50] batch [325/500] time 1.369 (1.369) data 0.000 (0.003) loss 1.8525 (1.1661) acc 46.8750 (70.8558) lr 1.9511e-03 eta 8:14:23 +epoch [7/50] batch [330/500] time 1.348 (1.368) data 0.000 (0.003) loss 1.4551 (1.1647) acc 65.6250 (70.8996) lr 1.9511e-03 eta 8:14:11 +epoch [7/50] batch [335/500] time 1.368 (1.368) data 0.000 (0.003) loss 1.7998 (1.1678) acc 59.3750 (70.8489) lr 1.9511e-03 eta 8:14:05 +epoch [7/50] batch [340/500] time 1.348 (1.368) data 0.000 (0.003) loss 1.1904 (1.1684) acc 71.8750 (70.7629) lr 1.9511e-03 eta 8:13:54 +epoch [7/50] batch [345/500] time 1.369 (1.368) data 0.000 (0.003) loss 0.9292 (1.1703) acc 84.3750 (70.7518) lr 1.9511e-03 eta 8:13:44 +epoch [7/50] batch [350/500] time 1.384 (1.368) data 0.000 (0.003) loss 1.4941 (1.1693) acc 65.6250 (70.7946) lr 1.9511e-03 eta 8:13:36 +epoch [7/50] batch [355/500] time 1.364 (1.368) data 0.000 (0.003) loss 0.8535 (1.1678) acc 75.0000 (70.7746) lr 1.9511e-03 eta 8:13:26 +epoch [7/50] batch [360/500] time 1.378 (1.368) data 0.000 (0.003) loss 1.5146 (1.1698) acc 53.1250 (70.6337) lr 1.9511e-03 eta 8:13:19 +epoch [7/50] batch [365/500] time 1.373 (1.368) data 0.000 (0.003) loss 0.9160 (1.1663) acc 68.7500 (70.6849) lr 1.9511e-03 eta 8:13:12 +epoch [7/50] batch [370/500] time 1.355 (1.368) data 0.000 (0.003) loss 0.8389 (1.1663) acc 84.3750 (70.6672) lr 1.9511e-03 eta 8:13:02 +epoch [7/50] batch [375/500] time 1.364 (1.368) data 0.000 (0.003) loss 1.2461 (1.1672) acc 65.6250 (70.6833) lr 1.9511e-03 eta 8:12:54 +epoch [7/50] batch [380/500] time 1.357 (1.367) data 0.001 (0.003) loss 1.1865 (1.1705) acc 65.6250 (70.5921) lr 1.9511e-03 eta 8:12:44 +epoch [7/50] batch [385/500] time 1.362 (1.367) data 0.000 (0.003) loss 1.0518 (1.1696) acc 68.7500 (70.5925) lr 1.9511e-03 eta 8:12:38 +epoch [7/50] batch [390/500] time 1.384 (1.368) data 0.000 (0.003) loss 1.0342 (1.1710) acc 71.8750 (70.5929) lr 1.9511e-03 eta 8:12:31 +epoch [7/50] batch [395/500] time 1.364 (1.368) data 0.000 (0.002) loss 0.4795 (1.1682) acc 87.5000 (70.6329) lr 1.9511e-03 eta 8:12:30 +epoch [7/50] batch [400/500] time 1.355 (1.368) data 0.000 (0.002) loss 0.7876 (1.1687) acc 71.8750 (70.5625) lr 1.9511e-03 eta 8:12:20 +epoch [7/50] batch [405/500] time 1.359 (1.368) data 0.001 (0.002) loss 1.7861 (1.1726) acc 59.3750 (70.5170) lr 1.9511e-03 eta 8:12:11 +epoch [7/50] batch [410/500] time 1.366 (1.368) data 0.000 (0.002) loss 0.9360 (1.1732) acc 75.0000 (70.5412) lr 1.9511e-03 eta 8:12:04 +epoch [7/50] batch [415/500] time 1.366 (1.368) data 0.000 (0.002) loss 0.6562 (1.1710) acc 78.1250 (70.6024) lr 1.9511e-03 eta 8:11:57 +epoch [7/50] batch [420/500] time 1.351 (1.367) data 0.000 (0.002) loss 1.3867 (1.1717) acc 56.2500 (70.5878) lr 1.9511e-03 eta 8:11:49 +epoch [7/50] batch [425/500] time 1.362 (1.367) data 0.000 (0.002) loss 0.7241 (1.1718) acc 78.1250 (70.5956) lr 1.9511e-03 eta 8:11:41 +epoch [7/50] batch [430/500] time 1.361 (1.367) data 0.000 (0.002) loss 1.1816 (1.1742) acc 71.8750 (70.5814) lr 1.9511e-03 eta 8:11:35 +epoch [7/50] batch [435/500] time 1.358 (1.367) data 0.000 (0.002) loss 0.9126 (1.1731) acc 78.1250 (70.6178) lr 1.9511e-03 eta 8:11:26 +epoch [7/50] batch [440/500] time 1.357 (1.368) data 0.000 (0.002) loss 1.3359 (1.1746) acc 81.2500 (70.6605) lr 1.9511e-03 eta 8:11:24 +epoch [7/50] batch [445/500] time 1.382 (1.367) data 0.000 (0.002) loss 0.9116 (1.1712) acc 75.0000 (70.7303) lr 1.9511e-03 eta 8:11:16 +epoch [7/50] batch [450/500] time 1.364 (1.368) data 0.000 (0.002) loss 1.1357 (1.1701) acc 65.6250 (70.7153) lr 1.9511e-03 eta 8:11:10 +epoch [7/50] batch [455/500] time 1.357 (1.367) data 0.000 (0.002) loss 1.3389 (1.1691) acc 71.8750 (70.7624) lr 1.9511e-03 eta 8:11:01 +epoch [7/50] batch [460/500] time 1.365 (1.367) data 0.000 (0.002) loss 0.6040 (1.1700) acc 78.1250 (70.7405) lr 1.9511e-03 eta 8:10:53 +epoch [7/50] batch [465/500] time 1.370 (1.367) data 0.000 (0.002) loss 0.9419 (1.1710) acc 78.1250 (70.7258) lr 1.9511e-03 eta 8:10:45 +epoch [7/50] batch [470/500] time 1.368 (1.367) data 0.000 (0.002) loss 0.9775 (1.1705) acc 75.0000 (70.7447) lr 1.9511e-03 eta 8:10:37 +epoch [7/50] batch [475/500] time 1.384 (1.367) data 0.000 (0.002) loss 1.6396 (1.1726) acc 65.6250 (70.7039) lr 1.9511e-03 eta 8:10:33 +epoch [7/50] batch [480/500] time 1.375 (1.367) data 0.000 (0.002) loss 0.8223 (1.1715) acc 75.0000 (70.7031) lr 1.9511e-03 eta 8:10:23 +epoch [7/50] batch [485/500] time 1.368 (1.367) data 0.001 (0.002) loss 1.3066 (1.1696) acc 65.6250 (70.7023) lr 1.9511e-03 eta 8:10:17 +epoch [7/50] batch [490/500] time 1.356 (1.367) data 0.000 (0.002) loss 1.5576 (1.1683) acc 59.3750 (70.7398) lr 1.9511e-03 eta 8:10:09 +epoch [7/50] batch [495/500] time 1.386 (1.367) data 0.000 (0.002) loss 1.3408 (1.1695) acc 65.6250 (70.7323) lr 1.9511e-03 eta 8:10:01 +epoch [7/50] batch [500/500] time 1.357 (1.367) data 0.000 (0.002) loss 0.4719 (1.1684) acc 90.6250 (70.7687) lr 1.9298e-03 eta 8:09:53 +epoch [8/50] batch [5/500] time 1.356 (1.535) data 0.000 (0.177) loss 1.6553 (1.4426) acc 71.8750 (66.8750) lr 1.9298e-03 eta 9:10:03 +epoch [8/50] batch [10/500] time 1.356 (1.449) data 0.000 (0.089) loss 1.0615 (1.2605) acc 65.6250 (67.5000) lr 1.9298e-03 eta 8:38:54 +epoch [8/50] batch [15/500] time 1.348 (1.418) data 0.001 (0.059) loss 0.8975 (1.1567) acc 78.1250 (70.0000) lr 1.9298e-03 eta 8:27:51 +epoch [8/50] batch [20/500] time 1.371 (1.405) data 0.000 (0.045) loss 1.4971 (1.2039) acc 65.6250 (69.3750) lr 1.9298e-03 eta 8:22:49 +epoch [8/50] batch [25/500] time 1.359 (1.397) data 0.000 (0.036) loss 1.2949 (1.1753) acc 71.8750 (69.8750) lr 1.9298e-03 eta 8:19:51 +epoch [8/50] batch [30/500] time 1.359 (1.391) data 0.000 (0.030) loss 1.4688 (1.1672) acc 65.6250 (70.1042) lr 1.9298e-03 eta 8:17:47 +epoch [8/50] batch [35/500] time 1.377 (1.388) data 0.000 (0.026) loss 1.2432 (1.1798) acc 62.5000 (70.2679) lr 1.9298e-03 eta 8:16:41 +epoch [8/50] batch [40/500] time 1.359 (1.386) data 0.000 (0.022) loss 1.2881 (1.1628) acc 75.0000 (71.3281) lr 1.9298e-03 eta 8:15:37 +epoch [8/50] batch [45/500] time 1.394 (1.384) data 0.000 (0.020) loss 0.7329 (1.1377) acc 81.2500 (71.6667) lr 1.9298e-03 eta 8:14:50 +epoch [8/50] batch [50/500] time 1.369 (1.382) data 0.000 (0.018) loss 1.5244 (1.1376) acc 53.1250 (70.9375) lr 1.9298e-03 eta 8:14:07 +epoch [8/50] batch [55/500] time 1.377 (1.381) data 0.008 (0.017) loss 1.2363 (1.1241) acc 71.8750 (71.0227) lr 1.9298e-03 eta 8:13:40 +epoch [8/50] batch [60/500] time 1.369 (1.379) data 0.000 (0.015) loss 1.2979 (1.1487) acc 75.0000 (70.5208) lr 1.9298e-03 eta 8:12:46 +epoch [8/50] batch [65/500] time 1.344 (1.378) data 0.000 (0.014) loss 1.1348 (1.1453) acc 75.0000 (70.7692) lr 1.9298e-03 eta 8:12:08 +epoch [8/50] batch [70/500] time 1.350 (1.376) data 0.000 (0.013) loss 0.8931 (1.1352) acc 81.2500 (71.2054) lr 1.9298e-03 eta 8:11:35 +epoch [8/50] batch [75/500] time 1.363 (1.375) data 0.000 (0.012) loss 1.8516 (1.1449) acc 62.5000 (71.1667) lr 1.9298e-03 eta 8:11:08 +epoch [8/50] batch [80/500] time 1.368 (1.374) data 0.000 (0.012) loss 1.7197 (1.1559) acc 46.8750 (70.7812) lr 1.9298e-03 eta 8:10:41 +epoch [8/50] batch [85/500] time 1.364 (1.375) data 0.000 (0.011) loss 2.0215 (1.1715) acc 50.0000 (70.5515) lr 1.9298e-03 eta 8:10:46 +epoch [8/50] batch [90/500] time 1.366 (1.374) data 0.000 (0.010) loss 1.5664 (1.1726) acc 62.5000 (70.4167) lr 1.9298e-03 eta 8:10:24 +epoch [8/50] batch [95/500] time 1.346 (1.374) data 0.000 (0.010) loss 0.4370 (1.1643) acc 93.7500 (70.7566) lr 1.9298e-03 eta 8:10:05 +epoch [8/50] batch [100/500] time 1.362 (1.373) data 0.000 (0.009) loss 1.1279 (1.1603) acc 75.0000 (70.6250) lr 1.9298e-03 eta 8:09:47 +epoch [8/50] batch [105/500] time 1.357 (1.373) data 0.000 (0.009) loss 0.9888 (1.1626) acc 78.1250 (70.6250) lr 1.9298e-03 eta 8:09:30 +epoch [8/50] batch [110/500] time 1.358 (1.372) data 0.000 (0.008) loss 1.9941 (1.1708) acc 62.5000 (70.5682) lr 1.9298e-03 eta 8:09:07 +epoch [8/50] batch [115/500] time 1.364 (1.372) data 0.000 (0.008) loss 1.5986 (1.1722) acc 65.6250 (70.6793) lr 1.9298e-03 eta 8:08:54 +epoch [8/50] batch [120/500] time 1.379 (1.371) data 0.000 (0.008) loss 1.1191 (1.1634) acc 75.0000 (70.8594) lr 1.9298e-03 eta 8:08:34 +epoch [8/50] batch [125/500] time 1.344 (1.371) data 0.000 (0.008) loss 1.3340 (1.1629) acc 65.6250 (70.9000) lr 1.9298e-03 eta 8:08:22 +epoch [8/50] batch [130/500] time 1.363 (1.371) data 0.000 (0.007) loss 0.9663 (1.1579) acc 75.0000 (71.0577) lr 1.9298e-03 eta 8:08:20 +epoch [8/50] batch [135/500] time 1.354 (1.370) data 0.001 (0.007) loss 1.0957 (1.1399) acc 71.8750 (71.5046) lr 1.9298e-03 eta 8:07:54 +epoch [8/50] batch [140/500] time 1.346 (1.370) data 0.000 (0.007) loss 1.2969 (1.1373) acc 68.7500 (71.5848) lr 1.9298e-03 eta 8:07:35 +epoch [8/50] batch [145/500] time 1.373 (1.369) data 0.000 (0.007) loss 0.9536 (1.1379) acc 59.3750 (71.2931) lr 1.9298e-03 eta 8:07:23 +epoch [8/50] batch [150/500] time 1.345 (1.369) data 0.000 (0.006) loss 0.3591 (1.1386) acc 90.6250 (71.2292) lr 1.9298e-03 eta 8:07:08 +epoch [8/50] batch [155/500] time 1.355 (1.369) data 0.000 (0.006) loss 0.9966 (1.1407) acc 78.1250 (71.3105) lr 1.9298e-03 eta 8:06:57 +epoch [8/50] batch [160/500] time 1.363 (1.368) data 0.000 (0.006) loss 0.7539 (1.1345) acc 75.0000 (71.4453) lr 1.9298e-03 eta 8:06:39 +epoch [8/50] batch [165/500] time 1.351 (1.368) data 0.001 (0.006) loss 1.2402 (1.1389) acc 65.6250 (71.3258) lr 1.9298e-03 eta 8:06:21 +epoch [8/50] batch [170/500] time 1.337 (1.367) data 0.000 (0.006) loss 0.9429 (1.1404) acc 68.7500 (71.3603) lr 1.9298e-03 eta 8:06:04 +epoch [8/50] batch [175/500] time 1.347 (1.367) data 0.000 (0.005) loss 1.4121 (1.1440) acc 62.5000 (71.3571) lr 1.9298e-03 eta 8:05:54 +epoch [8/50] batch [180/500] time 1.365 (1.367) data 0.000 (0.005) loss 1.6309 (1.1499) acc 65.6250 (71.2326) lr 1.9298e-03 eta 8:05:45 +epoch [8/50] batch [185/500] time 1.360 (1.367) data 0.001 (0.005) loss 1.4395 (1.1549) acc 65.6250 (71.0642) lr 1.9298e-03 eta 8:05:30 +epoch [8/50] batch [190/500] time 1.340 (1.366) data 0.000 (0.005) loss 1.0361 (1.1539) acc 75.0000 (71.0033) lr 1.9298e-03 eta 8:05:14 +epoch [8/50] batch [195/500] time 1.365 (1.366) data 0.000 (0.005) loss 1.0254 (1.1506) acc 68.7500 (71.0256) lr 1.9298e-03 eta 8:05:06 +epoch [8/50] batch [200/500] time 1.370 (1.366) data 0.000 (0.005) loss 0.8491 (1.1454) acc 78.1250 (70.9844) lr 1.9298e-03 eta 8:04:57 +epoch [8/50] batch [205/500] time 1.354 (1.366) data 0.001 (0.005) loss 0.5400 (1.1441) acc 87.5000 (71.0366) lr 1.9298e-03 eta 8:04:47 +epoch [8/50] batch [210/500] time 1.376 (1.366) data 0.000 (0.005) loss 1.2471 (1.1396) acc 65.6250 (71.0863) lr 1.9298e-03 eta 8:04:38 +epoch [8/50] batch [215/500] time 1.364 (1.366) data 0.001 (0.005) loss 1.4746 (1.1421) acc 65.6250 (70.9593) lr 1.9298e-03 eta 8:04:30 +epoch [8/50] batch [220/500] time 1.376 (1.366) data 0.000 (0.004) loss 1.6279 (1.1428) acc 59.3750 (70.9091) lr 1.9298e-03 eta 8:04:24 +epoch [8/50] batch [225/500] time 1.342 (1.366) data 0.000 (0.004) loss 2.1133 (1.1451) acc 62.5000 (70.9583) lr 1.9298e-03 eta 8:04:11 +epoch [8/50] batch [230/500] time 1.360 (1.366) data 0.000 (0.004) loss 0.7534 (1.1387) acc 81.2500 (71.1141) lr 1.9298e-03 eta 8:04:15 +epoch [8/50] batch [235/500] time 1.367 (1.366) data 0.000 (0.004) loss 0.8135 (1.1366) acc 81.2500 (71.1569) lr 1.9298e-03 eta 8:04:04 +epoch [8/50] batch [240/500] time 1.384 (1.366) data 0.000 (0.004) loss 1.2412 (1.1356) acc 75.0000 (71.1849) lr 1.9298e-03 eta 8:03:59 +epoch [8/50] batch [245/500] time 1.361 (1.366) data 0.001 (0.004) loss 1.4658 (1.1337) acc 59.3750 (71.1862) lr 1.9298e-03 eta 8:03:51 +epoch [8/50] batch [250/500] time 1.348 (1.366) data 0.001 (0.004) loss 1.2393 (1.1371) acc 78.1250 (71.1625) lr 1.9298e-03 eta 8:03:39 +epoch [8/50] batch [255/500] time 1.370 (1.366) data 0.000 (0.004) loss 0.7456 (1.1350) acc 75.0000 (71.2623) lr 1.9298e-03 eta 8:03:30 +epoch [8/50] batch [260/500] time 1.373 (1.365) data 0.001 (0.004) loss 0.7759 (1.1318) acc 78.1250 (71.3582) lr 1.9298e-03 eta 8:03:21 +epoch [8/50] batch [265/500] time 1.378 (1.365) data 0.000 (0.004) loss 1.2588 (1.1333) acc 68.7500 (71.3208) lr 1.9298e-03 eta 8:03:13 +epoch [8/50] batch [270/500] time 1.493 (1.366) data 0.000 (0.004) loss 0.9180 (1.1305) acc 71.8750 (71.3310) lr 1.9298e-03 eta 8:03:15 +epoch [8/50] batch [275/500] time 1.378 (1.366) data 0.000 (0.004) loss 0.7246 (1.1283) acc 71.8750 (71.3182) lr 1.9298e-03 eta 8:03:10 +epoch [8/50] batch [280/500] time 1.348 (1.366) data 0.000 (0.004) loss 1.3828 (1.1271) acc 59.3750 (71.3616) lr 1.9298e-03 eta 8:03:00 +epoch [8/50] batch [285/500] time 1.357 (1.366) data 0.000 (0.004) loss 1.0840 (1.1283) acc 71.8750 (71.3487) lr 1.9298e-03 eta 8:02:49 +epoch [8/50] batch [290/500] time 1.395 (1.366) data 0.000 (0.003) loss 1.5195 (1.1315) acc 65.6250 (71.3147) lr 1.9298e-03 eta 8:02:47 +epoch [8/50] batch [295/500] time 1.370 (1.366) data 0.000 (0.003) loss 1.4844 (1.1337) acc 62.5000 (71.3030) lr 1.9298e-03 eta 8:02:39 +epoch [8/50] batch [300/500] time 1.369 (1.366) data 0.000 (0.003) loss 1.0293 (1.1285) acc 68.7500 (71.3958) lr 1.9298e-03 eta 8:02:31 +epoch [8/50] batch [305/500] time 1.360 (1.366) data 0.000 (0.003) loss 1.6768 (1.1313) acc 62.5000 (71.4037) lr 1.9298e-03 eta 8:02:23 +epoch [8/50] batch [310/500] time 1.369 (1.366) data 0.002 (0.003) loss 1.2646 (1.1318) acc 59.3750 (71.4012) lr 1.9298e-03 eta 8:02:15 +epoch [8/50] batch [315/500] time 1.359 (1.365) data 0.000 (0.003) loss 0.8677 (1.1308) acc 71.8750 (71.4087) lr 1.9298e-03 eta 8:02:06 +epoch [8/50] batch [320/500] time 1.351 (1.365) data 0.000 (0.003) loss 2.0234 (1.1323) acc 65.6250 (71.4160) lr 1.9298e-03 eta 8:01:59 +epoch [8/50] batch [325/500] time 1.359 (1.365) data 0.001 (0.003) loss 1.6621 (1.1328) acc 59.3750 (71.4038) lr 1.9298e-03 eta 8:01:52 +epoch [8/50] batch [330/500] time 1.360 (1.365) data 0.001 (0.003) loss 0.7300 (1.1321) acc 78.1250 (71.4015) lr 1.9298e-03 eta 8:01:46 +epoch [8/50] batch [335/500] time 1.367 (1.365) data 0.000 (0.003) loss 0.9468 (1.1334) acc 75.0000 (71.3526) lr 1.9298e-03 eta 8:01:38 +epoch [8/50] batch [340/500] time 1.353 (1.365) data 0.000 (0.003) loss 0.9893 (1.1337) acc 71.8750 (71.3511) lr 1.9298e-03 eta 8:01:29 +epoch [8/50] batch [345/500] time 1.353 (1.365) data 0.001 (0.003) loss 1.7910 (1.1372) acc 62.5000 (71.2772) lr 1.9298e-03 eta 8:01:23 +epoch [8/50] batch [350/500] time 1.374 (1.365) data 0.000 (0.003) loss 1.1094 (1.1384) acc 68.7500 (71.2500) lr 1.9298e-03 eta 8:01:16 +epoch [8/50] batch [355/500] time 1.357 (1.365) data 0.000 (0.003) loss 1.7236 (1.1408) acc 62.5000 (71.2588) lr 1.9298e-03 eta 8:01:05 +epoch [8/50] batch [360/500] time 1.362 (1.365) data 0.000 (0.003) loss 0.8965 (1.1406) acc 75.0000 (71.2847) lr 1.9298e-03 eta 8:00:55 +epoch [8/50] batch [365/500] time 1.352 (1.365) data 0.001 (0.003) loss 0.9468 (1.1427) acc 71.8750 (71.2243) lr 1.9298e-03 eta 8:00:43 +epoch [8/50] batch [370/500] time 1.386 (1.365) data 0.000 (0.003) loss 1.3467 (1.1402) acc 68.7500 (71.2753) lr 1.9298e-03 eta 8:00:46 +epoch [8/50] batch [375/500] time 1.351 (1.365) data 0.000 (0.003) loss 1.0889 (1.1428) acc 81.2500 (71.2750) lr 1.9298e-03 eta 8:00:38 +epoch [8/50] batch [380/500] time 1.366 (1.365) data 0.001 (0.003) loss 1.2471 (1.1434) acc 78.1250 (71.3076) lr 1.9298e-03 eta 8:00:31 +epoch [8/50] batch [385/500] time 1.359 (1.365) data 0.000 (0.003) loss 1.7930 (1.1478) acc 56.2500 (71.2419) lr 1.9298e-03 eta 8:00:26 +epoch [8/50] batch [390/500] time 1.353 (1.365) data 0.001 (0.003) loss 1.5645 (1.1473) acc 59.3750 (71.2660) lr 1.9298e-03 eta 8:00:17 +epoch [8/50] batch [395/500] time 1.358 (1.365) data 0.001 (0.003) loss 1.2402 (1.1458) acc 65.6250 (71.3133) lr 1.9298e-03 eta 8:00:09 +epoch [8/50] batch [400/500] time 1.360 (1.365) data 0.001 (0.003) loss 0.9248 (1.1425) acc 75.0000 (71.3750) lr 1.9298e-03 eta 8:00:00 +epoch [8/50] batch [405/500] time 1.357 (1.365) data 0.000 (0.003) loss 1.1631 (1.1437) acc 71.8750 (71.3889) lr 1.9298e-03 eta 7:59:52 +epoch [8/50] batch [410/500] time 1.348 (1.365) data 0.000 (0.003) loss 1.2578 (1.1441) acc 71.8750 (71.4101) lr 1.9298e-03 eta 7:59:45 +epoch [8/50] batch [415/500] time 1.358 (1.365) data 0.000 (0.003) loss 0.9985 (1.1435) acc 71.8750 (71.3855) lr 1.9298e-03 eta 7:59:45 +epoch [8/50] batch [420/500] time 1.358 (1.365) data 0.000 (0.003) loss 1.6797 (1.1458) acc 59.3750 (71.3839) lr 1.9298e-03 eta 7:59:38 +epoch [8/50] batch [425/500] time 1.359 (1.365) data 0.000 (0.003) loss 1.2266 (1.1460) acc 75.0000 (71.3897) lr 1.9298e-03 eta 7:59:30 +epoch [8/50] batch [430/500] time 1.364 (1.365) data 0.000 (0.002) loss 0.9248 (1.1478) acc 78.1250 (71.3808) lr 1.9298e-03 eta 7:59:24 +epoch [8/50] batch [435/500] time 1.345 (1.365) data 0.000 (0.002) loss 1.5391 (1.1472) acc 68.7500 (71.3937) lr 1.9298e-03 eta 7:59:16 +epoch [8/50] batch [440/500] time 1.353 (1.365) data 0.000 (0.002) loss 0.8882 (1.1458) acc 71.8750 (71.3991) lr 1.9298e-03 eta 7:59:06 +epoch [8/50] batch [445/500] time 1.350 (1.365) data 0.000 (0.002) loss 0.8154 (1.1442) acc 71.8750 (71.4256) lr 1.9298e-03 eta 7:58:56 +epoch [8/50] batch [450/500] time 1.364 (1.365) data 0.000 (0.002) loss 1.1621 (1.1429) acc 65.6250 (71.4514) lr 1.9298e-03 eta 7:58:51 +epoch [8/50] batch [455/500] time 1.366 (1.365) data 0.000 (0.002) loss 1.1934 (1.1428) acc 75.0000 (71.4286) lr 1.9298e-03 eta 7:58:42 +epoch [8/50] batch [460/500] time 1.353 (1.365) data 0.000 (0.002) loss 1.1709 (1.1425) acc 68.7500 (71.3927) lr 1.9298e-03 eta 7:58:34 +epoch [8/50] batch [465/500] time 1.363 (1.365) data 0.000 (0.002) loss 0.9214 (1.1420) acc 78.1250 (71.4113) lr 1.9298e-03 eta 7:58:27 +epoch [8/50] batch [470/500] time 1.351 (1.365) data 0.000 (0.002) loss 0.4817 (1.1409) acc 84.3750 (71.4894) lr 1.9298e-03 eta 7:58:19 +epoch [8/50] batch [475/500] time 1.376 (1.365) data 0.000 (0.002) loss 0.7246 (1.1417) acc 81.2500 (71.5000) lr 1.9298e-03 eta 7:58:12 +epoch [8/50] batch [480/500] time 1.373 (1.365) data 0.000 (0.002) loss 0.9126 (1.1419) acc 78.1250 (71.5169) lr 1.9298e-03 eta 7:58:07 +epoch [8/50] batch [485/500] time 1.361 (1.365) data 0.001 (0.002) loss 1.5762 (1.1432) acc 65.6250 (71.4820) lr 1.9298e-03 eta 7:57:59 +epoch [8/50] batch [490/500] time 1.340 (1.365) data 0.000 (0.002) loss 1.4912 (1.1451) acc 56.2500 (71.4413) lr 1.9298e-03 eta 7:57:50 +epoch [8/50] batch [495/500] time 1.368 (1.365) data 0.000 (0.002) loss 1.2812 (1.1463) acc 71.8750 (71.4710) lr 1.9298e-03 eta 7:57:42 +epoch [8/50] batch [500/500] time 1.357 (1.365) data 0.000 (0.002) loss 1.3008 (1.1475) acc 68.7500 (71.4375) lr 1.9048e-03 eta 7:57:35 +epoch [9/50] batch [5/500] time 1.365 (1.536) data 0.000 (0.166) loss 1.2090 (1.2406) acc 62.5000 (66.8750) lr 1.9048e-03 eta 8:57:20 +epoch [9/50] batch [10/500] time 1.358 (1.463) data 0.000 (0.083) loss 1.3271 (1.2690) acc 59.3750 (65.9375) lr 1.9048e-03 eta 8:31:57 +epoch [9/50] batch [15/500] time 1.367 (1.427) data 0.000 (0.055) loss 0.8154 (1.2499) acc 71.8750 (66.4583) lr 1.9048e-03 eta 8:19:10 +epoch [9/50] batch [20/500] time 1.367 (1.413) data 0.000 (0.042) loss 1.0166 (1.2537) acc 81.2500 (66.5625) lr 1.9048e-03 eta 8:13:59 +epoch [9/50] batch [25/500] time 1.366 (1.405) data 0.000 (0.033) loss 0.8799 (1.2021) acc 75.0000 (68.0000) lr 1.9048e-03 eta 8:11:08 +epoch [9/50] batch [30/500] time 1.380 (1.400) data 0.001 (0.028) loss 0.7842 (1.1587) acc 75.0000 (69.4792) lr 1.9048e-03 eta 8:09:13 +epoch [9/50] batch [35/500] time 1.385 (1.396) data 0.000 (0.024) loss 0.8813 (1.1496) acc 78.1250 (69.8214) lr 1.9048e-03 eta 8:07:48 +epoch [9/50] batch [40/500] time 1.375 (1.394) data 0.000 (0.021) loss 1.2979 (1.1691) acc 68.7500 (69.2188) lr 1.9048e-03 eta 8:06:52 +epoch [9/50] batch [45/500] time 1.357 (1.391) data 0.001 (0.019) loss 1.0635 (1.1701) acc 75.0000 (69.4444) lr 1.9048e-03 eta 8:05:43 +epoch [9/50] batch [50/500] time 1.367 (1.387) data 0.001 (0.017) loss 1.4609 (1.1807) acc 62.5000 (68.9375) lr 1.9048e-03 eta 8:04:20 +epoch [9/50] batch [55/500] time 1.352 (1.385) data 0.000 (0.015) loss 1.7246 (1.1993) acc 65.6250 (69.0909) lr 1.9048e-03 eta 8:03:20 +epoch [9/50] batch [60/500] time 1.351 (1.383) data 0.000 (0.014) loss 1.5625 (1.2124) acc 56.2500 (68.6979) lr 1.9048e-03 eta 8:02:34 +epoch [9/50] batch [65/500] time 1.354 (1.381) data 0.000 (0.013) loss 1.1074 (1.2167) acc 75.0000 (68.7500) lr 1.9048e-03 eta 8:01:47 +epoch [9/50] batch [70/500] time 1.355 (1.381) data 0.001 (0.012) loss 1.4453 (1.2006) acc 75.0000 (69.4643) lr 1.9048e-03 eta 8:01:52 +epoch [9/50] batch [75/500] time 1.360 (1.380) data 0.000 (0.011) loss 1.0947 (1.1969) acc 65.6250 (69.5417) lr 1.9048e-03 eta 8:01:14 +epoch [9/50] batch [80/500] time 1.355 (1.378) data 0.001 (0.011) loss 1.3984 (1.2201) acc 53.1250 (69.1016) lr 1.9048e-03 eta 8:00:32 +epoch [9/50] batch [85/500] time 1.385 (1.378) data 0.001 (0.010) loss 1.0146 (1.2260) acc 59.3750 (68.7132) lr 1.9048e-03 eta 8:00:11 +epoch [9/50] batch [90/500] time 1.346 (1.377) data 0.001 (0.010) loss 0.9707 (1.2137) acc 78.1250 (69.0278) lr 1.9048e-03 eta 7:59:45 +epoch [9/50] batch [95/500] time 1.350 (1.375) data 0.000 (0.009) loss 0.9668 (1.2069) acc 68.7500 (69.1118) lr 1.9048e-03 eta 7:59:05 +epoch [9/50] batch [100/500] time 1.386 (1.374) data 0.000 (0.009) loss 2.1016 (1.2106) acc 56.2500 (69.0938) lr 1.9048e-03 eta 7:58:42 +epoch [9/50] batch [105/500] time 1.364 (1.374) data 0.000 (0.008) loss 1.2998 (1.2031) acc 71.8750 (69.4643) lr 1.9048e-03 eta 7:58:25 +epoch [9/50] batch [110/500] time 1.340 (1.373) data 0.000 (0.008) loss 0.6660 (1.1996) acc 78.1250 (69.6023) lr 1.9048e-03 eta 7:57:59 +epoch [9/50] batch [115/500] time 1.363 (1.374) data 0.001 (0.008) loss 0.8130 (1.1927) acc 75.0000 (69.6467) lr 1.9048e-03 eta 7:58:13 +epoch [9/50] batch [120/500] time 1.368 (1.373) data 0.001 (0.007) loss 1.1309 (1.1931) acc 71.8750 (69.8698) lr 1.9048e-03 eta 7:57:45 +epoch [9/50] batch [125/500] time 1.358 (1.373) data 0.000 (0.007) loss 0.9639 (1.1867) acc 71.8750 (70.0500) lr 1.9048e-03 eta 7:57:37 +epoch [9/50] batch [130/500] time 1.373 (1.372) data 0.000 (0.007) loss 1.2930 (1.1878) acc 75.0000 (70.1923) lr 1.9048e-03 eta 7:57:22 +epoch [9/50] batch [135/500] time 1.361 (1.372) data 0.000 (0.007) loss 1.2207 (1.1859) acc 62.5000 (70.1620) lr 1.9048e-03 eta 7:57:09 +epoch [9/50] batch [140/500] time 1.362 (1.372) data 0.000 (0.006) loss 0.9805 (1.1803) acc 71.8750 (70.1339) lr 1.9048e-03 eta 7:56:59 +epoch [9/50] batch [145/500] time 1.361 (1.372) data 0.000 (0.006) loss 0.9067 (1.1713) acc 87.5000 (70.2802) lr 1.9048e-03 eta 7:56:48 +epoch [9/50] batch [150/500] time 1.348 (1.372) data 0.001 (0.006) loss 0.7383 (1.1628) acc 75.0000 (70.3750) lr 1.9048e-03 eta 7:56:36 +epoch [9/50] batch [155/500] time 1.353 (1.371) data 0.000 (0.006) loss 0.8784 (1.1580) acc 81.2500 (70.4234) lr 1.9048e-03 eta 7:56:23 +epoch [9/50] batch [160/500] time 1.385 (1.371) data 0.000 (0.006) loss 1.3467 (1.1598) acc 59.3750 (70.4883) lr 1.9048e-03 eta 7:56:14 +epoch [9/50] batch [165/500] time 1.369 (1.371) data 0.000 (0.005) loss 1.8350 (1.1622) acc 62.5000 (70.4356) lr 1.9048e-03 eta 7:56:00 +epoch [9/50] batch [170/500] time 1.373 (1.371) data 0.000 (0.005) loss 1.0303 (1.1613) acc 71.8750 (70.4228) lr 1.9048e-03 eta 7:55:53 +epoch [9/50] batch [175/500] time 1.366 (1.371) data 0.000 (0.005) loss 1.0547 (1.1574) acc 71.8750 (70.5357) lr 1.9048e-03 eta 7:55:41 +epoch [9/50] batch [180/500] time 1.364 (1.370) data 0.001 (0.005) loss 0.9673 (1.1571) acc 78.1250 (70.5035) lr 1.9048e-03 eta 7:55:32 +epoch [9/50] batch [185/500] time 1.355 (1.370) data 0.000 (0.005) loss 1.6152 (1.1586) acc 62.5000 (70.5912) lr 1.9048e-03 eta 7:55:24 +epoch [9/50] batch [190/500] time 1.357 (1.370) data 0.000 (0.005) loss 1.0986 (1.1585) acc 68.7500 (70.5428) lr 1.9048e-03 eta 7:55:15 +epoch [9/50] batch [195/500] time 1.379 (1.370) data 0.000 (0.005) loss 1.5928 (1.1605) acc 56.2500 (70.4487) lr 1.9048e-03 eta 7:55:05 +epoch [9/50] batch [200/500] time 1.356 (1.370) data 0.000 (0.005) loss 0.9912 (1.1582) acc 71.8750 (70.5156) lr 1.9048e-03 eta 7:54:55 +epoch [9/50] batch [205/500] time 1.339 (1.369) data 0.000 (0.004) loss 0.7622 (1.1574) acc 75.0000 (70.5030) lr 1.9048e-03 eta 7:54:37 +epoch [9/50] batch [210/500] time 1.339 (1.369) data 0.000 (0.004) loss 1.3154 (1.1625) acc 68.7500 (70.4911) lr 1.9048e-03 eta 7:54:19 +epoch [9/50] batch [215/500] time 1.369 (1.369) data 0.000 (0.004) loss 1.9951 (1.1658) acc 59.3750 (70.4797) lr 1.9048e-03 eta 7:54:22 +epoch [9/50] batch [220/500] time 1.374 (1.369) data 0.000 (0.004) loss 0.8984 (1.1623) acc 68.7500 (70.4119) lr 1.9048e-03 eta 7:54:14 +epoch [9/50] batch [225/500] time 1.357 (1.369) data 0.000 (0.004) loss 0.8931 (1.1603) acc 71.8750 (70.4583) lr 1.9048e-03 eta 7:54:09 +epoch [9/50] batch [230/500] time 1.362 (1.369) data 0.000 (0.004) loss 1.3428 (1.1679) acc 65.6250 (70.2582) lr 1.9048e-03 eta 7:53:58 +epoch [9/50] batch [235/500] time 1.365 (1.369) data 0.000 (0.004) loss 0.6377 (1.1624) acc 75.0000 (70.3590) lr 1.9048e-03 eta 7:53:53 +epoch [9/50] batch [240/500] time 1.381 (1.369) data 0.001 (0.004) loss 1.0449 (1.1662) acc 75.0000 (70.3906) lr 1.9048e-03 eta 7:53:44 +epoch [9/50] batch [245/500] time 1.351 (1.369) data 0.000 (0.004) loss 0.4644 (1.1628) acc 81.2500 (70.4974) lr 1.9048e-03 eta 7:53:32 +epoch [9/50] batch [250/500] time 1.357 (1.369) data 0.000 (0.004) loss 1.3799 (1.1612) acc 62.5000 (70.5000) lr 1.9048e-03 eta 7:53:19 +epoch [9/50] batch [255/500] time 1.481 (1.369) data 0.001 (0.004) loss 0.9595 (1.1622) acc 75.0000 (70.4412) lr 1.9048e-03 eta 7:53:20 +epoch [9/50] batch [260/500] time 1.366 (1.369) data 0.000 (0.004) loss 0.6309 (1.1610) acc 81.2500 (70.4808) lr 1.9048e-03 eta 7:53:08 +epoch [9/50] batch [265/500] time 1.359 (1.369) data 0.000 (0.004) loss 1.1768 (1.1627) acc 71.8750 (70.4481) lr 1.9048e-03 eta 7:53:00 +epoch [9/50] batch [270/500] time 1.363 (1.369) data 0.000 (0.003) loss 1.2930 (1.1619) acc 59.3750 (70.4398) lr 1.9048e-03 eta 7:52:51 +epoch [9/50] batch [275/500] time 1.382 (1.369) data 0.000 (0.003) loss 0.4639 (1.1598) acc 81.2500 (70.5000) lr 1.9048e-03 eta 7:52:44 +epoch [9/50] batch [280/500] time 1.373 (1.369) data 0.000 (0.003) loss 0.7627 (1.1584) acc 81.2500 (70.5804) lr 1.9048e-03 eta 7:52:36 +epoch [9/50] batch [285/500] time 1.381 (1.368) data 0.000 (0.003) loss 0.5171 (1.1606) acc 81.2500 (70.4934) lr 1.9048e-03 eta 7:52:27 +epoch [9/50] batch [290/500] time 1.375 (1.369) data 0.000 (0.003) loss 0.8486 (1.1607) acc 78.1250 (70.4634) lr 1.9048e-03 eta 7:52:22 +epoch [9/50] batch [295/500] time 1.353 (1.368) data 0.000 (0.003) loss 1.5527 (1.1608) acc 65.6250 (70.4767) lr 1.9048e-03 eta 7:52:14 +epoch [9/50] batch [300/500] time 1.364 (1.368) data 0.000 (0.003) loss 0.7695 (1.1569) acc 78.1250 (70.5312) lr 1.9048e-03 eta 7:52:06 +epoch [9/50] batch [305/500] time 1.371 (1.368) data 0.001 (0.003) loss 1.1377 (1.1526) acc 75.0000 (70.6250) lr 1.9048e-03 eta 7:51:57 +epoch [9/50] batch [310/500] time 1.356 (1.368) data 0.000 (0.003) loss 1.3525 (1.1534) acc 65.6250 (70.5343) lr 1.9048e-03 eta 7:51:45 +epoch [9/50] batch [315/500] time 1.364 (1.368) data 0.000 (0.003) loss 1.2402 (1.1527) acc 75.0000 (70.5754) lr 1.9048e-03 eta 7:51:36 +epoch [9/50] batch [320/500] time 1.376 (1.368) data 0.000 (0.003) loss 1.7666 (1.1534) acc 59.3750 (70.5469) lr 1.9048e-03 eta 7:51:26 +epoch [9/50] batch [325/500] time 1.361 (1.368) data 0.000 (0.003) loss 1.1592 (1.1543) acc 75.0000 (70.5577) lr 1.9048e-03 eta 7:51:18 +epoch [9/50] batch [330/500] time 1.353 (1.368) data 0.000 (0.003) loss 0.7168 (1.1528) acc 75.0000 (70.6439) lr 1.9048e-03 eta 7:51:10 +epoch [9/50] batch [335/500] time 1.372 (1.368) data 0.000 (0.003) loss 0.7163 (1.1506) acc 81.2500 (70.6810) lr 1.9048e-03 eta 7:51:02 +epoch [9/50] batch [340/500] time 1.361 (1.368) data 0.001 (0.003) loss 0.8906 (1.1491) acc 71.8750 (70.7169) lr 1.9048e-03 eta 7:50:54 +epoch [9/50] batch [345/500] time 1.371 (1.368) data 0.000 (0.003) loss 1.0195 (1.1487) acc 75.0000 (70.7246) lr 1.9048e-03 eta 7:50:47 +epoch [9/50] batch [350/500] time 1.340 (1.367) data 0.000 (0.003) loss 1.2686 (1.1463) acc 59.3750 (70.7321) lr 1.9048e-03 eta 7:50:38 +epoch [9/50] batch [355/500] time 1.343 (1.368) data 0.000 (0.003) loss 1.0947 (1.1419) acc 68.7500 (70.7658) lr 1.9048e-03 eta 7:50:35 +epoch [9/50] batch [360/500] time 1.381 (1.368) data 0.000 (0.003) loss 1.2500 (1.1388) acc 78.1250 (70.8681) lr 1.9048e-03 eta 7:50:31 +epoch [9/50] batch [365/500] time 1.353 (1.368) data 0.000 (0.003) loss 1.6709 (1.1419) acc 59.3750 (70.8818) lr 1.9048e-03 eta 7:50:22 +epoch [9/50] batch [370/500] time 1.341 (1.368) data 0.000 (0.003) loss 0.8115 (1.1450) acc 71.8750 (70.8361) lr 1.9048e-03 eta 7:50:13 +epoch [9/50] batch [375/500] time 1.364 (1.367) data 0.000 (0.003) loss 1.1611 (1.1488) acc 71.8750 (70.8000) lr 1.9048e-03 eta 7:50:04 +epoch [9/50] batch [380/500] time 1.365 (1.367) data 0.000 (0.003) loss 0.6924 (1.1475) acc 84.3750 (70.8635) lr 1.9048e-03 eta 7:49:57 +epoch [9/50] batch [385/500] time 1.353 (1.367) data 0.000 (0.003) loss 1.2559 (1.1503) acc 59.3750 (70.7873) lr 1.9048e-03 eta 7:49:46 +epoch [9/50] batch [390/500] time 1.359 (1.367) data 0.000 (0.003) loss 0.5454 (1.1470) acc 81.2500 (70.8734) lr 1.9048e-03 eta 7:49:36 +epoch [9/50] batch [395/500] time 1.339 (1.367) data 0.000 (0.002) loss 1.5547 (1.1490) acc 65.6250 (70.8386) lr 1.9048e-03 eta 7:49:24 +epoch [9/50] batch [400/500] time 1.351 (1.367) data 0.001 (0.002) loss 1.4971 (1.1483) acc 65.6250 (70.8828) lr 1.9048e-03 eta 7:49:20 +epoch [9/50] batch [405/500] time 1.350 (1.367) data 0.000 (0.002) loss 1.3379 (1.1493) acc 68.7500 (70.8719) lr 1.9048e-03 eta 7:49:11 +epoch [9/50] batch [410/500] time 1.377 (1.367) data 0.000 (0.002) loss 1.2275 (1.1482) acc 75.0000 (70.8689) lr 1.9048e-03 eta 7:49:03 +epoch [9/50] batch [415/500] time 1.362 (1.367) data 0.000 (0.002) loss 1.4160 (1.1489) acc 71.8750 (70.9413) lr 1.9048e-03 eta 7:48:53 +epoch [9/50] batch [420/500] time 1.370 (1.367) data 0.000 (0.002) loss 1.1289 (1.1507) acc 75.0000 (70.9226) lr 1.9048e-03 eta 7:48:47 +epoch [9/50] batch [425/500] time 1.381 (1.367) data 0.000 (0.002) loss 1.1426 (1.1499) acc 78.1250 (70.9412) lr 1.9048e-03 eta 7:48:38 +epoch [9/50] batch [430/500] time 1.358 (1.366) data 0.000 (0.002) loss 0.6396 (1.1492) acc 75.0000 (70.9666) lr 1.9048e-03 eta 7:48:28 +epoch [9/50] batch [435/500] time 1.380 (1.367) data 0.000 (0.002) loss 0.9829 (1.1484) acc 71.8750 (70.9483) lr 1.9048e-03 eta 7:48:22 +epoch [9/50] batch [440/500] time 1.357 (1.366) data 0.000 (0.002) loss 0.7041 (1.1447) acc 81.2500 (71.0014) lr 1.9048e-03 eta 7:48:13 +epoch [9/50] batch [445/500] time 1.351 (1.366) data 0.000 (0.002) loss 1.5566 (1.1454) acc 62.5000 (70.9972) lr 1.9048e-03 eta 7:48:03 +epoch [9/50] batch [450/500] time 1.349 (1.366) data 0.000 (0.002) loss 0.6597 (1.1451) acc 81.2500 (71.0000) lr 1.9048e-03 eta 7:47:55 +epoch [9/50] batch [455/500] time 1.337 (1.366) data 0.000 (0.002) loss 0.9468 (1.1431) acc 68.7500 (71.0371) lr 1.9048e-03 eta 7:47:44 +epoch [9/50] batch [460/500] time 1.375 (1.366) data 0.001 (0.002) loss 0.9106 (1.1408) acc 84.3750 (71.0870) lr 1.9048e-03 eta 7:47:36 +epoch [9/50] batch [465/500] time 1.364 (1.366) data 0.000 (0.002) loss 0.6274 (1.1396) acc 81.2500 (71.1290) lr 1.9048e-03 eta 7:47:31 +epoch [9/50] batch [470/500] time 1.358 (1.366) data 0.000 (0.002) loss 1.2930 (1.1399) acc 71.8750 (71.1237) lr 1.9048e-03 eta 7:47:21 +epoch [9/50] batch [475/500] time 1.353 (1.366) data 0.000 (0.002) loss 1.4570 (1.1437) acc 71.8750 (71.1382) lr 1.9048e-03 eta 7:47:14 +epoch [9/50] batch [480/500] time 1.384 (1.366) data 0.000 (0.002) loss 1.3125 (1.1453) acc 75.0000 (71.1263) lr 1.9048e-03 eta 7:47:05 +epoch [9/50] batch [485/500] time 1.342 (1.366) data 0.001 (0.002) loss 1.3057 (1.1449) acc 65.6250 (71.1082) lr 1.9048e-03 eta 7:46:55 +epoch [9/50] batch [490/500] time 1.364 (1.365) data 0.000 (0.002) loss 0.8857 (1.1429) acc 65.6250 (71.1288) lr 1.9048e-03 eta 7:46:45 +epoch [9/50] batch [495/500] time 1.344 (1.365) data 0.000 (0.002) loss 1.9150 (1.1446) acc 68.7500 (71.1301) lr 1.9048e-03 eta 7:46:36 +epoch [9/50] batch [500/500] time 1.340 (1.365) data 0.000 (0.002) loss 1.7529 (1.1440) acc 59.3750 (71.1312) lr 1.8763e-03 eta 7:46:31 +epoch [10/50] batch [5/500] time 1.360 (1.557) data 0.000 (0.189) loss 0.7515 (0.9112) acc 78.1250 (76.2500) lr 1.8763e-03 eta 8:51:51 +epoch [10/50] batch [10/500] time 1.340 (1.454) data 0.000 (0.095) loss 1.0908 (0.9469) acc 81.2500 (76.2500) lr 1.8763e-03 eta 8:16:29 +epoch [10/50] batch [15/500] time 1.350 (1.419) data 0.000 (0.063) loss 1.0225 (0.9617) acc 68.7500 (74.3750) lr 1.8763e-03 eta 8:04:31 +epoch [10/50] batch [20/500] time 1.347 (1.404) data 0.000 (0.048) loss 0.9297 (0.9701) acc 81.2500 (74.0625) lr 1.8763e-03 eta 7:59:16 +epoch [10/50] batch [25/500] time 1.342 (1.395) data 0.000 (0.038) loss 0.7642 (1.0242) acc 84.3750 (72.8750) lr 1.8763e-03 eta 7:55:59 +epoch [10/50] batch [30/500] time 1.351 (1.388) data 0.000 (0.032) loss 0.8389 (1.0391) acc 78.1250 (72.6042) lr 1.8763e-03 eta 7:53:41 +epoch [10/50] batch [35/500] time 1.376 (1.385) data 0.000 (0.027) loss 1.1045 (1.0290) acc 65.6250 (72.6786) lr 1.8763e-03 eta 7:52:15 +epoch [10/50] batch [40/500] time 1.364 (1.382) data 0.000 (0.024) loss 0.9702 (1.0184) acc 81.2500 (73.3594) lr 1.8763e-03 eta 7:51:13 +epoch [10/50] batch [45/500] time 1.359 (1.380) data 0.000 (0.021) loss 1.3203 (1.0370) acc 65.6250 (73.0556) lr 1.8763e-03 eta 7:50:27 +epoch [10/50] batch [50/500] time 1.374 (1.378) data 0.000 (0.019) loss 1.1045 (1.0517) acc 71.8750 (72.4375) lr 1.8763e-03 eta 7:49:49 +epoch [10/50] batch [55/500] time 1.360 (1.377) data 0.000 (0.018) loss 1.1533 (1.0685) acc 75.0000 (72.1591) lr 1.8763e-03 eta 7:49:14 +epoch [10/50] batch [60/500] time 1.352 (1.378) data 0.000 (0.016) loss 0.9673 (1.0726) acc 62.5000 (71.9271) lr 1.8763e-03 eta 7:49:19 +epoch [10/50] batch [65/500] time 1.343 (1.376) data 0.000 (0.015) loss 1.4619 (1.0934) acc 62.5000 (71.6827) lr 1.8763e-03 eta 7:48:44 +epoch [10/50] batch [70/500] time 1.366 (1.375) data 0.000 (0.014) loss 1.6699 (1.1093) acc 56.2500 (71.5179) lr 1.8763e-03 eta 7:48:05 +epoch [10/50] batch [75/500] time 1.344 (1.374) data 0.000 (0.013) loss 0.9746 (1.1100) acc 68.7500 (71.5417) lr 1.8763e-03 eta 7:47:43 +epoch [10/50] batch [80/500] time 1.370 (1.373) data 0.000 (0.012) loss 1.0391 (1.1397) acc 65.6250 (71.0547) lr 1.8763e-03 eta 7:47:24 +epoch [10/50] batch [85/500] time 1.372 (1.373) data 0.001 (0.011) loss 1.5752 (1.1454) acc 65.6250 (70.9191) lr 1.8763e-03 eta 7:46:59 +epoch [10/50] batch [90/500] time 1.353 (1.372) data 0.001 (0.011) loss 1.4473 (1.1450) acc 62.5000 (70.8681) lr 1.8763e-03 eta 7:46:50 +epoch [10/50] batch [95/500] time 1.348 (1.372) data 0.000 (0.010) loss 1.0557 (1.1463) acc 75.0000 (70.7566) lr 1.8763e-03 eta 7:46:29 +epoch [10/50] batch [100/500] time 1.347 (1.372) data 0.000 (0.010) loss 1.3184 (1.1463) acc 68.7500 (70.5938) lr 1.8763e-03 eta 7:46:32 +epoch [10/50] batch [105/500] time 1.353 (1.372) data 0.000 (0.009) loss 1.6953 (1.1500) acc 53.1250 (70.4167) lr 1.8763e-03 eta 7:46:11 +epoch [10/50] batch [110/500] time 1.364 (1.371) data 0.000 (0.009) loss 0.8037 (1.1375) acc 81.2500 (70.6818) lr 1.8763e-03 eta 7:45:55 +epoch [10/50] batch [115/500] time 1.364 (1.371) data 0.000 (0.009) loss 1.1299 (1.1379) acc 65.6250 (70.7065) lr 1.8763e-03 eta 7:45:38 +epoch [10/50] batch [120/500] time 1.365 (1.370) data 0.000 (0.008) loss 0.6777 (1.1299) acc 81.2500 (70.8073) lr 1.8763e-03 eta 7:45:24 +epoch [10/50] batch [125/500] time 1.361 (1.370) data 0.000 (0.008) loss 1.4355 (1.1397) acc 78.1250 (70.7500) lr 1.8763e-03 eta 7:45:03 +epoch [10/50] batch [130/500] time 1.363 (1.369) data 0.000 (0.008) loss 0.8521 (1.1373) acc 75.0000 (70.8654) lr 1.8763e-03 eta 7:44:53 +epoch [10/50] batch [135/500] time 1.352 (1.369) data 0.000 (0.007) loss 1.2295 (1.1353) acc 75.0000 (70.9259) lr 1.8763e-03 eta 7:44:36 +epoch [10/50] batch [140/500] time 1.374 (1.369) data 0.000 (0.007) loss 1.8027 (1.1434) acc 56.2500 (70.8705) lr 1.8763e-03 eta 7:44:27 +epoch [10/50] batch [145/500] time 1.340 (1.368) data 0.000 (0.007) loss 1.0957 (1.1372) acc 75.0000 (70.9052) lr 1.8763e-03 eta 7:44:12 +epoch [10/50] batch [150/500] time 1.361 (1.368) data 0.001 (0.007) loss 0.7139 (1.1411) acc 78.1250 (70.7708) lr 1.8763e-03 eta 7:43:56 +epoch [10/50] batch [155/500] time 1.343 (1.367) data 0.000 (0.007) loss 0.8057 (1.1298) acc 75.0000 (71.0282) lr 1.8763e-03 eta 7:43:40 +epoch [10/50] batch [160/500] time 1.356 (1.367) data 0.000 (0.006) loss 0.7705 (1.1328) acc 75.0000 (71.0938) lr 1.8763e-03 eta 7:43:27 +epoch [10/50] batch [165/500] time 1.355 (1.367) data 0.000 (0.006) loss 1.5312 (1.1416) acc 65.6250 (71.0038) lr 1.8763e-03 eta 7:43:18 +epoch [10/50] batch [170/500] time 1.361 (1.367) data 0.000 (0.006) loss 1.2803 (1.1375) acc 65.6250 (71.1029) lr 1.8763e-03 eta 7:43:04 +epoch [10/50] batch [175/500] time 1.375 (1.367) data 0.000 (0.006) loss 1.2754 (1.1397) acc 65.6250 (71.0000) lr 1.8763e-03 eta 7:42:55 +epoch [10/50] batch [180/500] time 1.348 (1.366) data 0.000 (0.006) loss 1.3066 (1.1389) acc 71.8750 (71.0417) lr 1.8763e-03 eta 7:42:42 +epoch [10/50] batch [185/500] time 1.359 (1.366) data 0.000 (0.006) loss 1.3906 (1.1413) acc 78.1250 (71.0811) lr 1.8763e-03 eta 7:42:33 +epoch [10/50] batch [190/500] time 1.388 (1.366) data 0.000 (0.005) loss 1.2510 (1.1453) acc 75.0000 (71.1513) lr 1.8763e-03 eta 7:42:27 +epoch [10/50] batch [195/500] time 1.371 (1.366) data 0.000 (0.005) loss 1.3613 (1.1484) acc 56.2500 (71.0256) lr 1.8763e-03 eta 7:42:19 +epoch [10/50] batch [200/500] time 1.380 (1.367) data 0.000 (0.005) loss 0.6396 (1.1479) acc 81.2500 (71.0938) lr 1.8763e-03 eta 7:42:29 +epoch [10/50] batch [205/500] time 1.352 (1.367) data 0.000 (0.005) loss 1.0088 (1.1445) acc 68.7500 (71.1280) lr 1.8763e-03 eta 7:42:23 +epoch [10/50] batch [210/500] time 1.364 (1.367) data 0.000 (0.005) loss 1.8281 (1.1464) acc 59.3750 (71.1161) lr 1.8763e-03 eta 7:42:13 +epoch [10/50] batch [215/500] time 1.353 (1.367) data 0.000 (0.005) loss 1.3057 (1.1544) acc 56.2500 (70.9738) lr 1.8763e-03 eta 7:42:02 +epoch [10/50] batch [220/500] time 1.345 (1.367) data 0.000 (0.005) loss 0.7207 (1.1544) acc 75.0000 (70.8381) lr 1.8763e-03 eta 7:41:53 +epoch [10/50] batch [225/500] time 1.375 (1.367) data 0.000 (0.005) loss 1.2695 (1.1555) acc 75.0000 (70.8194) lr 1.8763e-03 eta 7:41:49 +epoch [10/50] batch [230/500] time 1.366 (1.367) data 0.000 (0.005) loss 1.1953 (1.1545) acc 65.6250 (70.7609) lr 1.8763e-03 eta 7:41:44 +epoch [10/50] batch [235/500] time 1.362 (1.367) data 0.000 (0.004) loss 0.7183 (1.1574) acc 84.3750 (70.7447) lr 1.8763e-03 eta 7:41:37 +epoch [10/50] batch [240/500] time 1.488 (1.367) data 0.000 (0.004) loss 1.4072 (1.1620) acc 71.8750 (70.6641) lr 1.8763e-03 eta 7:41:41 +epoch [10/50] batch [245/500] time 1.355 (1.367) data 0.000 (0.004) loss 1.8672 (1.1713) acc 56.2500 (70.4592) lr 1.8763e-03 eta 7:41:28 +epoch [10/50] batch [250/500] time 1.374 (1.367) data 0.001 (0.004) loss 0.4590 (1.1691) acc 87.5000 (70.5000) lr 1.8763e-03 eta 7:41:20 +epoch [10/50] batch [255/500] time 1.357 (1.367) data 0.000 (0.004) loss 1.4961 (1.1678) acc 56.2500 (70.4902) lr 1.8763e-03 eta 7:41:11 +epoch [10/50] batch [260/500] time 1.354 (1.367) data 0.000 (0.004) loss 1.2119 (1.1664) acc 71.8750 (70.5288) lr 1.8763e-03 eta 7:41:02 +epoch [10/50] batch [265/500] time 1.352 (1.367) data 0.000 (0.004) loss 1.4922 (1.1681) acc 59.3750 (70.4363) lr 1.8763e-03 eta 7:40:53 +epoch [10/50] batch [270/500] time 1.377 (1.367) data 0.001 (0.004) loss 1.2383 (1.1652) acc 62.5000 (70.5208) lr 1.8763e-03 eta 7:40:46 +epoch [10/50] batch [275/500] time 1.355 (1.366) data 0.000 (0.004) loss 1.0664 (1.1685) acc 71.8750 (70.5341) lr 1.8763e-03 eta 7:40:34 +epoch [10/50] batch [280/500] time 1.353 (1.366) data 0.001 (0.004) loss 0.8989 (1.1667) acc 71.8750 (70.5692) lr 1.8763e-03 eta 7:40:26 +epoch [10/50] batch [285/500] time 1.374 (1.366) data 0.001 (0.004) loss 1.0791 (1.1650) acc 65.6250 (70.6031) lr 1.8763e-03 eta 7:40:18 +epoch [10/50] batch [290/500] time 1.361 (1.366) data 0.001 (0.004) loss 1.1279 (1.1638) acc 75.0000 (70.6250) lr 1.8763e-03 eta 7:40:10 +epoch [10/50] batch [295/500] time 1.359 (1.366) data 0.000 (0.004) loss 0.8564 (1.1624) acc 75.0000 (70.6674) lr 1.8763e-03 eta 7:40:01 +epoch [10/50] batch [300/500] time 1.365 (1.366) data 0.000 (0.004) loss 0.6006 (1.1613) acc 84.3750 (70.6875) lr 1.8763e-03 eta 7:39:54 +epoch [10/50] batch [305/500] time 1.363 (1.366) data 0.000 (0.004) loss 1.1016 (1.1615) acc 78.1250 (70.7787) lr 1.8763e-03 eta 7:39:46 +epoch [10/50] batch [310/500] time 1.368 (1.366) data 0.000 (0.003) loss 1.0947 (1.1605) acc 68.7500 (70.7359) lr 1.8763e-03 eta 7:39:36 +epoch [10/50] batch [315/500] time 1.337 (1.366) data 0.000 (0.003) loss 1.8379 (1.1647) acc 62.5000 (70.7440) lr 1.8763e-03 eta 7:39:27 +epoch [10/50] batch [320/500] time 1.352 (1.366) data 0.000 (0.003) loss 1.0166 (1.1602) acc 75.0000 (70.8105) lr 1.8763e-03 eta 7:39:21 +epoch [10/50] batch [325/500] time 1.352 (1.366) data 0.001 (0.003) loss 0.6104 (1.1580) acc 87.5000 (70.8846) lr 1.8763e-03 eta 7:39:14 +epoch [10/50] batch [330/500] time 1.360 (1.366) data 0.000 (0.003) loss 0.7925 (1.1535) acc 78.1250 (71.0038) lr 1.8763e-03 eta 7:39:06 +epoch [10/50] batch [335/500] time 1.362 (1.366) data 0.000 (0.003) loss 0.9351 (1.1527) acc 84.3750 (71.1194) lr 1.8763e-03 eta 7:38:56 +epoch [10/50] batch [340/500] time 1.362 (1.366) data 0.000 (0.003) loss 1.0996 (1.1539) acc 75.0000 (71.0662) lr 1.8763e-03 eta 7:38:57 +epoch [10/50] batch [345/500] time 1.361 (1.366) data 0.000 (0.003) loss 1.4053 (1.1543) acc 65.6250 (71.0870) lr 1.8763e-03 eta 7:38:50 +epoch [10/50] batch [350/500] time 1.361 (1.366) data 0.001 (0.003) loss 1.6270 (1.1555) acc 59.3750 (71.0625) lr 1.8763e-03 eta 7:38:40 +epoch [10/50] batch [355/500] time 1.372 (1.366) data 0.001 (0.003) loss 0.8945 (1.1544) acc 75.0000 (71.0563) lr 1.8763e-03 eta 7:38:32 +epoch [10/50] batch [360/500] time 1.362 (1.366) data 0.000 (0.003) loss 1.1484 (1.1550) acc 71.8750 (71.0243) lr 1.8763e-03 eta 7:38:23 +epoch [10/50] batch [365/500] time 1.375 (1.366) data 0.000 (0.003) loss 0.8125 (1.1571) acc 81.2500 (71.0788) lr 1.8763e-03 eta 7:38:17 +epoch [10/50] batch [370/500] time 1.378 (1.366) data 0.000 (0.003) loss 0.6611 (1.1557) acc 87.5000 (71.1318) lr 1.8763e-03 eta 7:38:11 +epoch [10/50] batch [375/500] time 1.371 (1.366) data 0.000 (0.003) loss 0.8750 (1.1559) acc 75.0000 (71.0917) lr 1.8763e-03 eta 7:38:01 +epoch [10/50] batch [380/500] time 1.359 (1.366) data 0.000 (0.003) loss 1.5918 (1.1573) acc 68.7500 (71.0609) lr 1.8763e-03 eta 7:37:54 +epoch [10/50] batch [385/500] time 1.335 (1.366) data 0.000 (0.003) loss 1.0771 (1.1565) acc 68.7500 (71.1120) lr 1.8763e-03 eta 7:37:54 +epoch [10/50] batch [390/500] time 1.371 (1.366) data 0.000 (0.003) loss 0.4116 (1.1591) acc 84.3750 (71.0337) lr 1.8763e-03 eta 7:37:50 +epoch [10/50] batch [395/500] time 1.350 (1.366) data 0.000 (0.003) loss 1.1006 (1.1569) acc 68.7500 (71.0839) lr 1.8763e-03 eta 7:37:40 +epoch [10/50] batch [400/500] time 1.363 (1.366) data 0.000 (0.003) loss 1.3281 (1.1564) acc 78.1250 (71.0703) lr 1.8763e-03 eta 7:37:33 +epoch [10/50] batch [405/500] time 1.340 (1.366) data 0.000 (0.003) loss 0.9785 (1.1530) acc 71.8750 (71.1574) lr 1.8763e-03 eta 7:37:25 +epoch [10/50] batch [410/500] time 1.378 (1.366) data 0.000 (0.003) loss 0.8184 (1.1516) acc 75.0000 (71.1890) lr 1.8763e-03 eta 7:37:19 +epoch [10/50] batch [415/500] time 1.363 (1.366) data 0.000 (0.003) loss 1.1367 (1.1498) acc 78.1250 (71.2274) lr 1.8763e-03 eta 7:37:10 +epoch [10/50] batch [420/500] time 1.380 (1.366) data 0.001 (0.003) loss 0.8794 (1.1529) acc 75.0000 (71.2054) lr 1.8763e-03 eta 7:37:05 +epoch [10/50] batch [425/500] time 1.363 (1.366) data 0.000 (0.003) loss 0.8608 (1.1518) acc 75.0000 (71.2132) lr 1.8763e-03 eta 7:36:58 +epoch [10/50] batch [430/500] time 1.352 (1.366) data 0.000 (0.003) loss 0.6685 (1.1511) acc 75.0000 (71.2500) lr 1.8763e-03 eta 7:36:50 +epoch [10/50] batch [435/500] time 1.350 (1.366) data 0.001 (0.003) loss 1.1553 (1.1506) acc 71.8750 (71.2931) lr 1.8763e-03 eta 7:36:43 +epoch [10/50] batch [440/500] time 1.366 (1.366) data 0.001 (0.003) loss 1.0312 (1.1494) acc 75.0000 (71.3423) lr 1.8763e-03 eta 7:36:34 +epoch [10/50] batch [445/500] time 1.362 (1.366) data 0.001 (0.003) loss 0.7305 (1.1473) acc 78.1250 (71.3834) lr 1.8763e-03 eta 7:36:27 +epoch [10/50] batch [450/500] time 1.356 (1.366) data 0.001 (0.003) loss 1.1035 (1.1472) acc 68.7500 (71.3542) lr 1.8763e-03 eta 7:36:19 +epoch [10/50] batch [455/500] time 1.347 (1.365) data 0.001 (0.003) loss 1.2383 (1.1469) acc 78.1250 (71.4080) lr 1.8763e-03 eta 7:36:10 +epoch [10/50] batch [460/500] time 1.367 (1.365) data 0.001 (0.002) loss 0.4490 (1.1467) acc 90.6250 (71.4538) lr 1.8763e-03 eta 7:36:04 +epoch [10/50] batch [465/500] time 1.354 (1.365) data 0.000 (0.002) loss 0.8164 (1.1462) acc 68.7500 (71.4718) lr 1.8763e-03 eta 7:35:57 +epoch [10/50] batch [470/500] time 1.356 (1.365) data 0.000 (0.002) loss 0.8945 (1.1469) acc 75.0000 (71.4495) lr 1.8763e-03 eta 7:35:49 +epoch [10/50] batch [475/500] time 1.366 (1.365) data 0.001 (0.002) loss 1.5166 (1.1492) acc 68.7500 (71.4276) lr 1.8763e-03 eta 7:35:42 +epoch [10/50] batch [480/500] time 1.366 (1.365) data 0.000 (0.002) loss 0.5498 (1.1471) acc 87.5000 (71.4844) lr 1.8763e-03 eta 7:35:32 +epoch [10/50] batch [485/500] time 1.357 (1.366) data 0.001 (0.002) loss 1.0771 (1.1472) acc 59.3750 (71.3982) lr 1.8763e-03 eta 7:35:31 +epoch [10/50] batch [490/500] time 1.365 (1.366) data 0.000 (0.002) loss 1.4033 (1.1495) acc 56.2500 (71.3202) lr 1.8763e-03 eta 7:35:24 +epoch [10/50] batch [495/500] time 1.376 (1.366) data 0.000 (0.002) loss 1.1172 (1.1466) acc 75.0000 (71.3763) lr 1.8763e-03 eta 7:35:18 +epoch [10/50] batch [500/500] time 1.374 (1.366) data 0.000 (0.002) loss 0.9155 (1.1473) acc 81.2500 (71.3937) lr 1.8443e-03 eta 7:35:11 +epoch [11/50] batch [5/500] time 1.366 (1.546) data 0.000 (0.168) loss 1.4062 (1.0720) acc 53.1250 (71.8750) lr 1.8443e-03 eta 8:35:11 +epoch [11/50] batch [10/500] time 1.369 (1.451) data 0.000 (0.084) loss 0.7588 (1.0511) acc 75.0000 (70.9375) lr 1.8443e-03 eta 8:03:27 +epoch [11/50] batch [15/500] time 1.374 (1.422) data 0.000 (0.056) loss 0.8335 (1.0536) acc 78.1250 (71.4583) lr 1.8443e-03 eta 7:53:31 +epoch [11/50] batch [20/500] time 1.371 (1.409) data 0.000 (0.042) loss 0.9341 (1.0533) acc 78.1250 (72.5000) lr 1.8443e-03 eta 7:49:10 +epoch [11/50] batch [25/500] time 1.389 (1.401) data 0.000 (0.034) loss 0.6670 (1.0684) acc 81.2500 (73.2500) lr 1.8443e-03 eta 7:46:26 +epoch [11/50] batch [30/500] time 1.351 (1.393) data 0.000 (0.028) loss 1.1260 (1.0854) acc 75.0000 (72.5000) lr 1.8443e-03 eta 7:43:42 +epoch [11/50] batch [35/500] time 1.352 (1.394) data 0.000 (0.024) loss 1.2178 (1.0717) acc 65.6250 (72.6786) lr 1.8443e-03 eta 7:43:46 +epoch [11/50] batch [40/500] time 1.353 (1.389) data 0.000 (0.021) loss 1.2334 (1.0514) acc 71.8750 (73.4375) lr 1.8443e-03 eta 7:42:11 +epoch [11/50] batch [45/500] time 1.348 (1.385) data 0.000 (0.019) loss 1.3076 (1.0861) acc 75.0000 (72.9861) lr 1.8443e-03 eta 7:40:45 +epoch [11/50] batch [50/500] time 1.366 (1.383) data 0.001 (0.017) loss 1.5771 (1.0934) acc 68.7500 (73.0625) lr 1.8443e-03 eta 7:39:56 +epoch [11/50] batch [55/500] time 1.360 (1.382) data 0.000 (0.016) loss 0.7661 (1.0962) acc 81.2500 (72.8977) lr 1.8443e-03 eta 7:39:16 +epoch [11/50] batch [60/500] time 1.368 (1.380) data 0.000 (0.014) loss 0.9277 (1.0810) acc 71.8750 (73.0729) lr 1.8443e-03 eta 7:38:46 +epoch [11/50] batch [65/500] time 1.364 (1.380) data 0.000 (0.013) loss 1.0752 (1.0806) acc 71.8750 (72.9327) lr 1.8443e-03 eta 7:38:29 +epoch [11/50] batch [70/500] time 1.354 (1.378) data 0.001 (0.012) loss 1.3945 (1.1036) acc 65.6250 (72.4554) lr 1.8443e-03 eta 7:37:44 +epoch [11/50] batch [75/500] time 1.374 (1.378) data 0.000 (0.012) loss 1.1270 (1.1094) acc 62.5000 (72.0000) lr 1.8443e-03 eta 7:37:30 +epoch [11/50] batch [80/500] time 1.363 (1.378) data 0.001 (0.011) loss 1.1748 (1.1369) acc 65.6250 (71.2500) lr 1.8443e-03 eta 7:37:35 +epoch [11/50] batch [85/500] time 1.364 (1.377) data 0.000 (0.010) loss 1.3867 (1.1326) acc 68.7500 (71.4706) lr 1.8443e-03 eta 7:37:02 +epoch [11/50] batch [90/500] time 1.361 (1.376) data 0.000 (0.010) loss 1.3506 (1.1273) acc 75.0000 (71.6319) lr 1.8443e-03 eta 7:36:39 +epoch [11/50] batch [95/500] time 1.340 (1.376) data 0.000 (0.009) loss 0.9424 (1.1201) acc 81.2500 (71.7105) lr 1.8443e-03 eta 7:36:22 +epoch [11/50] batch [100/500] time 1.386 (1.375) data 0.001 (0.009) loss 1.0361 (1.1217) acc 75.0000 (71.8125) lr 1.8443e-03 eta 7:36:09 +epoch [11/50] batch [105/500] time 1.358 (1.374) data 0.000 (0.008) loss 1.9658 (1.1305) acc 56.2500 (71.6667) lr 1.8443e-03 eta 7:35:43 +epoch [11/50] batch [110/500] time 1.376 (1.374) data 0.000 (0.008) loss 1.1387 (1.1198) acc 68.7500 (71.7898) lr 1.8443e-03 eta 7:35:24 +epoch [11/50] batch [115/500] time 1.344 (1.374) data 0.000 (0.008) loss 1.8213 (1.1267) acc 50.0000 (71.5489) lr 1.8443e-03 eta 7:35:12 +epoch [11/50] batch [120/500] time 1.338 (1.373) data 0.000 (0.007) loss 0.7944 (1.1215) acc 75.0000 (71.6406) lr 1.8443e-03 eta 7:34:47 +epoch [11/50] batch [125/500] time 1.346 (1.372) data 0.001 (0.007) loss 1.5146 (1.1277) acc 68.7500 (71.6250) lr 1.8443e-03 eta 7:34:30 +epoch [11/50] batch [130/500] time 1.374 (1.372) data 0.000 (0.007) loss 1.5752 (1.1238) acc 68.7500 (71.6587) lr 1.8443e-03 eta 7:34:12 +epoch [11/50] batch [135/500] time 1.366 (1.371) data 0.001 (0.007) loss 0.7251 (1.1156) acc 81.2500 (71.8519) lr 1.8443e-03 eta 7:34:00 +epoch [11/50] batch [140/500] time 1.375 (1.371) data 0.000 (0.006) loss 1.3936 (1.1175) acc 65.6250 (71.7188) lr 1.8443e-03 eta 7:33:47 +epoch [11/50] batch [145/500] time 1.346 (1.370) data 0.000 (0.006) loss 1.2168 (1.1188) acc 75.0000 (71.6595) lr 1.8443e-03 eta 7:33:30 +epoch [11/50] batch [150/500] time 1.362 (1.371) data 0.000 (0.006) loss 1.4385 (1.1195) acc 65.6250 (71.6667) lr 1.8443e-03 eta 7:33:24 +epoch [11/50] batch [155/500] time 1.368 (1.370) data 0.000 (0.006) loss 1.4170 (1.1195) acc 75.0000 (71.7540) lr 1.8443e-03 eta 7:33:13 +epoch [11/50] batch [160/500] time 1.370 (1.370) data 0.000 (0.006) loss 1.5127 (1.1189) acc 59.3750 (71.7188) lr 1.8443e-03 eta 7:33:00 +epoch [11/50] batch [165/500] time 1.371 (1.370) data 0.000 (0.006) loss 0.9395 (1.1135) acc 78.1250 (71.9129) lr 1.8443e-03 eta 7:32:51 +epoch [11/50] batch [170/500] time 1.374 (1.370) data 0.000 (0.005) loss 1.4590 (1.1135) acc 56.2500 (71.8566) lr 1.8443e-03 eta 7:32:45 +epoch [11/50] batch [175/500] time 1.365 (1.370) data 0.000 (0.005) loss 1.5312 (1.1123) acc 68.7500 (71.9643) lr 1.8443e-03 eta 7:32:37 +epoch [11/50] batch [180/500] time 1.350 (1.370) data 0.000 (0.005) loss 1.2490 (1.1071) acc 68.7500 (72.0139) lr 1.8443e-03 eta 7:32:37 +epoch [11/50] batch [185/500] time 1.365 (1.370) data 0.000 (0.005) loss 1.7773 (1.1122) acc 65.6250 (71.8581) lr 1.8443e-03 eta 7:32:21 +epoch [11/50] batch [190/500] time 1.374 (1.370) data 0.000 (0.005) loss 0.9067 (1.1137) acc 65.6250 (71.9243) lr 1.8443e-03 eta 7:32:14 +epoch [11/50] batch [195/500] time 1.358 (1.369) data 0.000 (0.005) loss 1.3145 (1.1171) acc 68.7500 (71.9391) lr 1.8443e-03 eta 7:32:01 +epoch [11/50] batch [200/500] time 1.363 (1.369) data 0.000 (0.005) loss 1.1465 (1.1181) acc 68.7500 (71.9688) lr 1.8443e-03 eta 7:31:49 +epoch [11/50] batch [205/500] time 1.368 (1.369) data 0.000 (0.005) loss 1.1514 (1.1246) acc 62.5000 (71.8750) lr 1.8443e-03 eta 7:31:38 +epoch [11/50] batch [210/500] time 1.359 (1.369) data 0.000 (0.004) loss 0.8555 (1.1251) acc 78.1250 (71.9048) lr 1.8443e-03 eta 7:31:30 +epoch [11/50] batch [215/500] time 1.360 (1.368) data 0.000 (0.004) loss 1.2031 (1.1226) acc 68.7500 (71.9622) lr 1.8443e-03 eta 7:31:14 +epoch [11/50] batch [220/500] time 1.360 (1.368) data 0.000 (0.004) loss 1.6201 (1.1230) acc 75.0000 (72.0170) lr 1.8443e-03 eta 7:31:07 +epoch [11/50] batch [225/500] time 1.347 (1.369) data 0.000 (0.004) loss 0.9932 (1.1195) acc 78.1250 (72.1111) lr 1.8443e-03 eta 7:31:06 +epoch [11/50] batch [230/500] time 1.364 (1.369) data 0.000 (0.004) loss 1.3799 (1.1240) acc 65.6250 (72.0380) lr 1.8443e-03 eta 7:30:58 +epoch [11/50] batch [235/500] time 1.378 (1.368) data 0.000 (0.004) loss 1.0703 (1.1265) acc 75.0000 (72.0612) lr 1.8443e-03 eta 7:30:47 +epoch [11/50] batch [240/500] time 1.370 (1.368) data 0.000 (0.004) loss 0.8530 (1.1288) acc 78.1250 (72.0703) lr 1.8443e-03 eta 7:30:37 +epoch [11/50] batch [245/500] time 1.365 (1.368) data 0.001 (0.004) loss 1.4219 (1.1322) acc 71.8750 (72.0536) lr 1.8443e-03 eta 7:30:30 +epoch [11/50] batch [250/500] time 1.370 (1.368) data 0.000 (0.004) loss 1.4346 (1.1305) acc 65.6250 (72.0875) lr 1.8443e-03 eta 7:30:22 +epoch [11/50] batch [255/500] time 1.369 (1.368) data 0.000 (0.004) loss 0.6943 (1.1298) acc 78.1250 (71.9853) lr 1.8443e-03 eta 7:30:14 +epoch [11/50] batch [260/500] time 1.361 (1.368) data 0.000 (0.004) loss 1.1191 (1.1280) acc 68.7500 (72.0072) lr 1.8443e-03 eta 7:30:05 +epoch [11/50] batch [265/500] time 1.376 (1.368) data 0.000 (0.004) loss 1.0312 (1.1271) acc 75.0000 (72.0283) lr 1.8443e-03 eta 7:29:58 +epoch [11/50] batch [270/500] time 1.363 (1.368) data 0.001 (0.004) loss 1.2988 (1.1304) acc 68.7500 (71.9792) lr 1.8443e-03 eta 7:29:51 +epoch [11/50] batch [275/500] time 1.347 (1.368) data 0.001 (0.003) loss 0.6831 (1.1295) acc 78.1250 (72.0000) lr 1.8443e-03 eta 7:29:42 +epoch [11/50] batch [280/500] time 1.369 (1.368) data 0.000 (0.003) loss 1.2334 (1.1315) acc 65.6250 (71.9308) lr 1.8443e-03 eta 7:29:35 +epoch [11/50] batch [285/500] time 1.360 (1.368) data 0.000 (0.003) loss 1.6084 (1.1346) acc 65.6250 (71.8969) lr 1.8443e-03 eta 7:29:25 +epoch [11/50] batch [290/500] time 1.341 (1.367) data 0.000 (0.003) loss 1.1602 (1.1316) acc 59.3750 (71.8966) lr 1.8443e-03 eta 7:29:12 +epoch [11/50] batch [295/500] time 1.355 (1.367) data 0.000 (0.003) loss 1.4697 (1.1293) acc 65.6250 (71.9280) lr 1.8443e-03 eta 7:29:04 +epoch [11/50] batch [300/500] time 1.355 (1.367) data 0.001 (0.003) loss 1.3594 (1.1266) acc 71.8750 (71.9375) lr 1.8443e-03 eta 7:28:50 +epoch [11/50] batch [305/500] time 1.348 (1.367) data 0.000 (0.003) loss 0.7578 (1.1255) acc 81.2500 (71.9467) lr 1.8443e-03 eta 7:28:38 +epoch [11/50] batch [310/500] time 1.346 (1.367) data 0.000 (0.003) loss 1.1172 (1.1238) acc 71.8750 (71.9456) lr 1.8443e-03 eta 7:28:30 +epoch [11/50] batch [315/500] time 1.339 (1.367) data 0.000 (0.003) loss 1.3145 (1.1235) acc 65.6250 (71.8849) lr 1.8443e-03 eta 7:28:20 +epoch [11/50] batch [320/500] time 1.499 (1.367) data 0.000 (0.003) loss 1.3525 (1.1214) acc 65.6250 (71.9141) lr 1.8443e-03 eta 7:28:19 +epoch [11/50] batch [325/500] time 1.355 (1.367) data 0.000 (0.003) loss 1.2676 (1.1233) acc 59.3750 (71.7981) lr 1.8443e-03 eta 7:28:09 +epoch [11/50] batch [330/500] time 1.354 (1.367) data 0.000 (0.003) loss 1.4229 (1.1255) acc 78.1250 (71.8087) lr 1.8443e-03 eta 7:27:59 +epoch [11/50] batch [335/500] time 1.355 (1.366) data 0.000 (0.003) loss 0.7295 (1.1249) acc 78.1250 (71.8004) lr 1.8443e-03 eta 7:27:51 +epoch [11/50] batch [340/500] time 1.371 (1.366) data 0.000 (0.003) loss 1.2188 (1.1262) acc 62.5000 (71.7371) lr 1.8443e-03 eta 7:27:42 +epoch [11/50] batch [345/500] time 1.363 (1.366) data 0.000 (0.003) loss 1.7158 (1.1268) acc 62.5000 (71.7663) lr 1.8443e-03 eta 7:27:34 +epoch [11/50] batch [350/500] time 1.351 (1.366) data 0.000 (0.003) loss 1.3096 (1.1250) acc 68.7500 (71.8036) lr 1.8443e-03 eta 7:27:23 +epoch [11/50] batch [355/500] time 1.358 (1.366) data 0.000 (0.003) loss 1.1641 (1.1267) acc 75.0000 (71.8222) lr 1.8443e-03 eta 7:27:12 +epoch [11/50] batch [360/500] time 1.366 (1.366) data 0.000 (0.003) loss 1.4766 (1.1273) acc 68.7500 (71.8056) lr 1.8443e-03 eta 7:27:02 +epoch [11/50] batch [365/500] time 1.381 (1.366) data 0.001 (0.003) loss 1.2793 (1.1283) acc 62.5000 (71.7894) lr 1.8443e-03 eta 7:27:06 +epoch [11/50] batch [370/500] time 1.363 (1.366) data 0.000 (0.003) loss 0.8813 (1.1309) acc 71.8750 (71.7230) lr 1.8443e-03 eta 7:26:58 +epoch [11/50] batch [375/500] time 1.370 (1.366) data 0.001 (0.003) loss 1.4492 (1.1320) acc 71.8750 (71.7583) lr 1.8443e-03 eta 7:26:47 +epoch [11/50] batch [380/500] time 1.357 (1.366) data 0.000 (0.003) loss 1.2051 (1.1318) acc 68.7500 (71.7188) lr 1.8443e-03 eta 7:26:41 +epoch [11/50] batch [385/500] time 1.339 (1.366) data 0.000 (0.003) loss 1.0615 (1.1305) acc 68.7500 (71.7208) lr 1.8443e-03 eta 7:26:33 +epoch [11/50] batch [390/500] time 1.353 (1.366) data 0.000 (0.003) loss 1.4326 (1.1306) acc 65.6250 (71.7067) lr 1.8443e-03 eta 7:26:25 +epoch [11/50] batch [395/500] time 1.364 (1.366) data 0.000 (0.003) loss 0.5483 (1.1273) acc 84.3750 (71.7722) lr 1.8443e-03 eta 7:26:17 +epoch [11/50] batch [400/500] time 1.359 (1.366) data 0.000 (0.003) loss 1.1797 (1.1254) acc 68.7500 (71.7969) lr 1.8443e-03 eta 7:26:09 +epoch [11/50] batch [405/500] time 1.376 (1.366) data 0.000 (0.002) loss 1.3750 (1.1278) acc 68.7500 (71.7284) lr 1.8443e-03 eta 7:26:02 +epoch [11/50] batch [410/500] time 1.358 (1.366) data 0.000 (0.002) loss 1.0801 (1.1267) acc 68.7500 (71.7530) lr 1.8443e-03 eta 7:25:54 +epoch [11/50] batch [415/500] time 1.352 (1.366) data 0.000 (0.002) loss 0.3984 (1.1239) acc 90.6250 (71.8750) lr 1.8443e-03 eta 7:25:46 +epoch [11/50] batch [420/500] time 1.355 (1.366) data 0.000 (0.002) loss 1.1289 (1.1246) acc 81.2500 (71.8750) lr 1.8443e-03 eta 7:25:37 +epoch [11/50] batch [425/500] time 1.371 (1.365) data 0.000 (0.002) loss 2.2734 (1.1276) acc 50.0000 (71.7794) lr 1.8443e-03 eta 7:25:28 +epoch [11/50] batch [430/500] time 1.350 (1.365) data 0.001 (0.002) loss 0.9126 (1.1287) acc 81.2500 (71.8023) lr 1.8443e-03 eta 7:25:18 +epoch [11/50] batch [435/500] time 1.365 (1.365) data 0.001 (0.002) loss 1.2197 (1.1296) acc 68.7500 (71.7601) lr 1.8443e-03 eta 7:25:10 +epoch [11/50] batch [440/500] time 1.377 (1.365) data 0.000 (0.002) loss 1.0225 (1.1298) acc 68.7500 (71.7259) lr 1.8443e-03 eta 7:25:03 +epoch [11/50] batch [445/500] time 1.364 (1.365) data 0.000 (0.002) loss 0.9229 (1.1314) acc 78.1250 (71.6854) lr 1.8443e-03 eta 7:24:55 +epoch [11/50] batch [450/500] time 1.356 (1.365) data 0.000 (0.002) loss 1.0947 (1.1322) acc 68.7500 (71.6806) lr 1.8443e-03 eta 7:24:47 +epoch [11/50] batch [455/500] time 1.353 (1.365) data 0.000 (0.002) loss 0.8701 (1.1333) acc 78.1250 (71.6690) lr 1.8443e-03 eta 7:24:37 +epoch [11/50] batch [460/500] time 1.354 (1.365) data 0.000 (0.002) loss 1.0195 (1.1321) acc 78.1250 (71.6780) lr 1.8443e-03 eta 7:24:29 +epoch [11/50] batch [465/500] time 1.363 (1.365) data 0.000 (0.002) loss 1.0146 (1.1326) acc 75.0000 (71.6801) lr 1.8443e-03 eta 7:24:28 +epoch [11/50] batch [470/500] time 1.362 (1.365) data 0.000 (0.002) loss 0.6816 (1.1302) acc 84.3750 (71.7487) lr 1.8443e-03 eta 7:24:21 +epoch [11/50] batch [475/500] time 1.385 (1.365) data 0.000 (0.002) loss 0.7231 (1.1289) acc 81.2500 (71.8026) lr 1.8443e-03 eta 7:24:16 +epoch [11/50] batch [480/500] time 1.371 (1.365) data 0.000 (0.002) loss 1.4443 (1.1306) acc 50.0000 (71.7643) lr 1.8443e-03 eta 7:24:10 +epoch [11/50] batch [485/500] time 1.371 (1.365) data 0.001 (0.002) loss 1.5439 (1.1315) acc 59.3750 (71.7139) lr 1.8443e-03 eta 7:24:02 +epoch [11/50] batch [490/500] time 1.362 (1.365) data 0.000 (0.002) loss 0.9219 (1.1315) acc 71.8750 (71.7219) lr 1.8443e-03 eta 7:23:55 +epoch [11/50] batch [495/500] time 1.357 (1.365) data 0.000 (0.002) loss 1.2324 (1.1350) acc 68.7500 (71.6540) lr 1.8443e-03 eta 7:23:47 +epoch [11/50] batch [500/500] time 1.379 (1.365) data 0.000 (0.002) loss 1.3887 (1.1365) acc 59.3750 (71.6063) lr 1.8090e-03 eta 7:23:42 +epoch [12/50] batch [5/500] time 1.384 (1.540) data 0.000 (0.174) loss 0.6631 (0.8917) acc 71.8750 (73.7500) lr 1.8090e-03 eta 8:20:13 +epoch [12/50] batch [10/500] time 1.547 (1.471) data 0.000 (0.087) loss 0.8623 (0.9059) acc 71.8750 (75.0000) lr 1.8090e-03 eta 7:57:46 +epoch [12/50] batch [15/500] time 1.364 (1.434) data 0.000 (0.058) loss 1.0479 (0.9668) acc 75.0000 (73.9583) lr 1.8090e-03 eta 7:45:35 +epoch [12/50] batch [20/500] time 1.367 (1.415) data 0.000 (0.044) loss 0.8706 (0.9829) acc 81.2500 (73.9062) lr 1.8090e-03 eta 7:39:25 +epoch [12/50] batch [25/500] time 1.366 (1.404) data 0.000 (0.035) loss 0.8374 (0.9832) acc 81.2500 (74.2500) lr 1.8090e-03 eta 7:35:40 +epoch [12/50] batch [30/500] time 1.371 (1.398) data 0.000 (0.029) loss 1.1416 (1.0039) acc 65.6250 (73.7500) lr 1.8090e-03 eta 7:33:41 +epoch [12/50] batch [35/500] time 1.364 (1.395) data 0.000 (0.025) loss 1.3730 (1.0419) acc 68.7500 (72.9464) lr 1.8090e-03 eta 7:32:24 +epoch [12/50] batch [40/500] time 1.364 (1.390) data 0.000 (0.022) loss 1.1455 (1.0658) acc 68.7500 (72.5000) lr 1.8090e-03 eta 7:30:57 +epoch [12/50] batch [45/500] time 1.357 (1.388) data 0.001 (0.020) loss 1.6514 (1.0684) acc 62.5000 (72.0833) lr 1.8090e-03 eta 7:30:01 +epoch [12/50] batch [50/500] time 1.365 (1.386) data 0.000 (0.018) loss 0.9170 (1.0726) acc 75.0000 (72.1250) lr 1.8090e-03 eta 7:29:19 +epoch [12/50] batch [55/500] time 1.361 (1.386) data 0.000 (0.016) loss 1.7676 (1.0734) acc 56.2500 (72.1591) lr 1.8090e-03 eta 7:29:11 +epoch [12/50] batch [60/500] time 1.364 (1.385) data 0.000 (0.015) loss 1.8125 (1.0886) acc 68.7500 (72.2917) lr 1.8090e-03 eta 7:28:35 +epoch [12/50] batch [65/500] time 1.366 (1.383) data 0.000 (0.014) loss 1.3340 (1.0993) acc 78.1250 (72.4519) lr 1.8090e-03 eta 7:27:51 +epoch [12/50] batch [70/500] time 1.353 (1.381) data 0.000 (0.013) loss 1.4697 (1.1114) acc 65.6250 (72.4554) lr 1.8090e-03 eta 7:27:16 +epoch [12/50] batch [75/500] time 1.354 (1.380) data 0.000 (0.012) loss 0.9292 (1.1118) acc 62.5000 (72.1250) lr 1.8090e-03 eta 7:26:40 +epoch [12/50] batch [80/500] time 1.361 (1.378) data 0.000 (0.011) loss 0.7847 (1.1245) acc 84.3750 (71.9531) lr 1.8090e-03 eta 7:26:03 +epoch [12/50] batch [85/500] time 1.355 (1.378) data 0.000 (0.011) loss 1.0918 (1.1283) acc 59.3750 (71.6912) lr 1.8090e-03 eta 7:25:46 +epoch [12/50] batch [90/500] time 1.346 (1.377) data 0.000 (0.010) loss 0.8833 (1.1233) acc 81.2500 (71.8403) lr 1.8090e-03 eta 7:25:21 +epoch [12/50] batch [95/500] time 1.368 (1.376) data 0.000 (0.009) loss 1.0342 (1.1359) acc 68.7500 (71.8750) lr 1.8090e-03 eta 7:24:59 +epoch [12/50] batch [100/500] time 1.358 (1.376) data 0.000 (0.009) loss 0.8872 (1.1362) acc 84.3750 (72.0000) lr 1.8090e-03 eta 7:24:48 +epoch [12/50] batch [105/500] time 1.364 (1.375) data 0.001 (0.009) loss 2.4609 (1.1572) acc 46.8750 (71.5476) lr 1.8090e-03 eta 7:24:35 +epoch [12/50] batch [110/500] time 1.367 (1.375) data 0.000 (0.008) loss 0.9224 (1.1537) acc 71.8750 (71.7045) lr 1.8090e-03 eta 7:24:23 +epoch [12/50] batch [115/500] time 1.374 (1.375) data 0.000 (0.008) loss 1.3965 (1.1565) acc 62.5000 (71.5761) lr 1.8090e-03 eta 7:24:13 +epoch [12/50] batch [120/500] time 1.374 (1.375) data 0.000 (0.008) loss 1.5801 (1.1568) acc 62.5000 (71.6406) lr 1.8090e-03 eta 7:24:08 +epoch [12/50] batch [125/500] time 1.389 (1.375) data 0.000 (0.007) loss 1.3379 (1.1561) acc 71.8750 (71.6250) lr 1.8090e-03 eta 7:23:57 +epoch [12/50] batch [130/500] time 1.363 (1.375) data 0.000 (0.007) loss 1.4209 (1.1540) acc 40.6250 (71.4183) lr 1.8090e-03 eta 7:23:46 +epoch [12/50] batch [135/500] time 1.353 (1.374) data 0.000 (0.007) loss 0.9302 (1.1502) acc 75.0000 (71.5046) lr 1.8090e-03 eta 7:23:26 +epoch [12/50] batch [140/500] time 1.368 (1.374) data 0.000 (0.007) loss 1.3496 (1.1500) acc 75.0000 (71.6071) lr 1.8090e-03 eta 7:23:13 +epoch [12/50] batch [145/500] time 1.361 (1.373) data 0.010 (0.006) loss 0.8667 (1.1483) acc 75.0000 (71.6164) lr 1.8090e-03 eta 7:23:04 +epoch [12/50] batch [150/500] time 1.481 (1.374) data 0.001 (0.006) loss 1.4541 (1.1500) acc 68.7500 (71.5625) lr 1.8090e-03 eta 7:23:04 +epoch [12/50] batch [155/500] time 1.372 (1.374) data 0.000 (0.006) loss 1.6191 (1.1582) acc 53.1250 (71.2903) lr 1.8090e-03 eta 7:22:53 +epoch [12/50] batch [160/500] time 1.356 (1.373) data 0.000 (0.006) loss 1.1084 (1.1574) acc 78.1250 (71.2695) lr 1.8090e-03 eta 7:22:42 +epoch [12/50] batch [165/500] time 1.363 (1.373) data 0.000 (0.006) loss 1.1885 (1.1539) acc 75.0000 (71.3826) lr 1.8090e-03 eta 7:22:33 +epoch [12/50] batch [170/500] time 1.382 (1.373) data 0.000 (0.006) loss 0.8999 (1.1560) acc 81.2500 (71.4154) lr 1.8090e-03 eta 7:22:22 +epoch [12/50] batch [175/500] time 1.358 (1.373) data 0.000 (0.005) loss 1.5566 (1.1620) acc 59.3750 (71.1607) lr 1.8090e-03 eta 7:22:11 +epoch [12/50] batch [180/500] time 1.364 (1.373) data 0.000 (0.005) loss 2.1172 (1.1702) acc 65.6250 (71.1111) lr 1.8090e-03 eta 7:21:58 +epoch [12/50] batch [185/500] time 1.381 (1.372) data 0.000 (0.005) loss 0.9487 (1.1677) acc 68.7500 (71.1318) lr 1.8090e-03 eta 7:21:48 +epoch [12/50] batch [190/500] time 1.376 (1.372) data 0.000 (0.005) loss 1.7461 (1.1750) acc 62.5000 (71.0526) lr 1.8090e-03 eta 7:21:36 +epoch [12/50] batch [195/500] time 1.392 (1.373) data 0.000 (0.005) loss 1.3799 (1.1759) acc 65.6250 (70.9776) lr 1.8090e-03 eta 7:21:41 +epoch [12/50] batch [200/500] time 1.354 (1.372) data 0.000 (0.005) loss 1.8193 (1.1813) acc 65.6250 (70.9531) lr 1.8090e-03 eta 7:21:28 +epoch [12/50] batch [205/500] time 1.342 (1.372) data 0.000 (0.005) loss 1.6367 (1.1798) acc 65.6250 (71.0518) lr 1.8090e-03 eta 7:21:16 +epoch [12/50] batch [210/500] time 1.357 (1.372) data 0.000 (0.005) loss 1.0146 (1.1792) acc 78.1250 (70.9524) lr 1.8090e-03 eta 7:20:59 +epoch [12/50] batch [215/500] time 1.353 (1.372) data 0.000 (0.004) loss 0.7881 (1.1722) acc 78.1250 (71.1192) lr 1.8090e-03 eta 7:20:51 +epoch [12/50] batch [220/500] time 1.373 (1.371) data 0.000 (0.004) loss 1.4131 (1.1656) acc 75.0000 (71.2784) lr 1.8090e-03 eta 7:20:38 +epoch [12/50] batch [225/500] time 1.364 (1.371) data 0.000 (0.004) loss 1.2852 (1.1643) acc 65.6250 (71.2361) lr 1.8090e-03 eta 7:20:25 +epoch [12/50] batch [230/500] time 1.366 (1.371) data 0.000 (0.004) loss 0.4143 (1.1600) acc 87.5000 (71.3451) lr 1.8090e-03 eta 7:20:17 +epoch [12/50] batch [235/500] time 1.391 (1.371) data 0.000 (0.004) loss 1.3008 (1.1571) acc 68.7500 (71.3830) lr 1.8090e-03 eta 7:20:14 +epoch [12/50] batch [240/500] time 1.348 (1.371) data 0.000 (0.004) loss 0.8066 (1.1569) acc 78.1250 (71.4062) lr 1.8090e-03 eta 7:20:04 +epoch [12/50] batch [245/500] time 1.351 (1.371) data 0.000 (0.004) loss 1.0986 (1.1568) acc 71.8750 (71.3520) lr 1.8090e-03 eta 7:19:51 +epoch [12/50] batch [250/500] time 1.354 (1.370) data 0.000 (0.004) loss 1.0996 (1.1527) acc 81.2500 (71.4375) lr 1.8090e-03 eta 7:19:38 +epoch [12/50] batch [255/500] time 1.355 (1.370) data 0.000 (0.004) loss 1.5859 (1.1500) acc 62.5000 (71.5196) lr 1.8090e-03 eta 7:19:23 +epoch [12/50] batch [260/500] time 1.355 (1.369) data 0.000 (0.004) loss 0.9268 (1.1503) acc 75.0000 (71.4663) lr 1.8090e-03 eta 7:19:08 +epoch [12/50] batch [265/500] time 1.360 (1.369) data 0.000 (0.004) loss 1.2285 (1.1496) acc 78.1250 (71.5684) lr 1.8090e-03 eta 7:18:56 +epoch [12/50] batch [270/500] time 1.361 (1.369) data 0.000 (0.004) loss 0.7598 (1.1507) acc 75.0000 (71.5625) lr 1.8090e-03 eta 7:18:47 +epoch [12/50] batch [275/500] time 1.350 (1.369) data 0.000 (0.004) loss 1.1045 (1.1504) acc 71.8750 (71.4773) lr 1.8090e-03 eta 7:18:35 +epoch [12/50] batch [280/500] time 1.355 (1.369) data 0.000 (0.003) loss 1.2422 (1.1493) acc 68.7500 (71.4397) lr 1.8090e-03 eta 7:18:23 +epoch [12/50] batch [285/500] time 1.349 (1.368) data 0.000 (0.003) loss 1.1035 (1.1509) acc 65.6250 (71.3925) lr 1.8090e-03 eta 7:18:12 +epoch [12/50] batch [290/500] time 1.356 (1.368) data 0.000 (0.003) loss 1.3760 (1.1522) acc 65.6250 (71.3793) lr 1.8090e-03 eta 7:18:02 +epoch [12/50] batch [295/500] time 1.370 (1.369) data 0.000 (0.003) loss 1.1426 (1.1525) acc 75.0000 (71.3347) lr 1.8090e-03 eta 7:18:02 +epoch [12/50] batch [300/500] time 1.350 (1.368) data 0.000 (0.003) loss 1.1279 (1.1570) acc 71.8750 (71.2188) lr 1.8090e-03 eta 7:17:52 +epoch [12/50] batch [305/500] time 1.373 (1.368) data 0.000 (0.003) loss 1.0068 (1.1563) acc 75.0000 (71.2090) lr 1.8090e-03 eta 7:17:46 +epoch [12/50] batch [310/500] time 1.368 (1.368) data 0.001 (0.003) loss 1.0596 (1.1526) acc 62.5000 (71.2298) lr 1.8090e-03 eta 7:17:39 +epoch [12/50] batch [315/500] time 1.355 (1.368) data 0.000 (0.003) loss 1.4717 (1.1557) acc 68.7500 (71.1508) lr 1.8090e-03 eta 7:17:33 +epoch [12/50] batch [320/500] time 1.365 (1.368) data 0.000 (0.003) loss 0.9473 (1.1541) acc 71.8750 (71.1621) lr 1.8090e-03 eta 7:17:26 +epoch [12/50] batch [325/500] time 1.392 (1.368) data 0.000 (0.003) loss 1.1738 (1.1535) acc 71.8750 (71.1058) lr 1.8090e-03 eta 7:17:20 +epoch [12/50] batch [330/500] time 1.372 (1.368) data 0.000 (0.003) loss 0.8730 (1.1550) acc 71.8750 (71.0322) lr 1.8090e-03 eta 7:17:10 +epoch [12/50] batch [335/500] time 1.379 (1.368) data 0.000 (0.003) loss 0.8481 (1.1533) acc 75.0000 (71.0634) lr 1.8090e-03 eta 7:17:02 +epoch [12/50] batch [340/500] time 1.375 (1.369) data 0.000 (0.003) loss 1.2725 (1.1552) acc 71.8750 (71.0202) lr 1.8090e-03 eta 7:17:03 +epoch [12/50] batch [345/500] time 1.363 (1.369) data 0.001 (0.003) loss 0.8750 (1.1556) acc 81.2500 (71.0236) lr 1.8090e-03 eta 7:16:54 +epoch [12/50] batch [350/500] time 1.350 (1.368) data 0.000 (0.003) loss 1.3818 (1.1554) acc 68.7500 (71.0714) lr 1.8090e-03 eta 7:16:44 +epoch [12/50] batch [355/500] time 1.344 (1.368) data 0.000 (0.003) loss 1.0225 (1.1576) acc 78.1250 (71.0563) lr 1.8090e-03 eta 7:16:34 +epoch [12/50] batch [360/500] time 1.371 (1.368) data 0.001 (0.003) loss 0.7661 (1.1533) acc 81.2500 (71.1632) lr 1.8090e-03 eta 7:16:29 +epoch [12/50] batch [365/500] time 1.349 (1.368) data 0.000 (0.003) loss 0.7920 (1.1507) acc 75.0000 (71.1901) lr 1.8090e-03 eta 7:16:19 +epoch [12/50] batch [370/500] time 1.333 (1.368) data 0.000 (0.003) loss 1.1270 (1.1525) acc 71.8750 (71.1402) lr 1.8090e-03 eta 7:16:09 +epoch [12/50] batch [375/500] time 1.343 (1.368) data 0.000 (0.003) loss 0.9829 (1.1520) acc 68.7500 (71.1167) lr 1.8090e-03 eta 7:15:59 +epoch [12/50] batch [380/500] time 1.362 (1.368) data 0.000 (0.003) loss 1.0059 (1.1535) acc 75.0000 (71.0444) lr 1.8090e-03 eta 7:15:51 +epoch [12/50] batch [385/500] time 1.338 (1.368) data 0.000 (0.003) loss 0.5586 (1.1518) acc 78.1250 (71.0390) lr 1.8090e-03 eta 7:15:42 +epoch [12/50] batch [390/500] time 1.376 (1.368) data 0.000 (0.003) loss 1.0488 (1.1531) acc 81.2500 (70.9936) lr 1.8090e-03 eta 7:15:33 +epoch [12/50] batch [395/500] time 1.393 (1.368) data 0.000 (0.003) loss 1.2090 (1.1526) acc 62.5000 (70.9731) lr 1.8090e-03 eta 7:15:28 +epoch [12/50] batch [400/500] time 1.364 (1.368) data 0.001 (0.003) loss 1.4512 (1.1528) acc 68.7500 (70.9844) lr 1.8090e-03 eta 7:15:22 +epoch [12/50] batch [405/500] time 1.355 (1.368) data 0.001 (0.003) loss 1.0732 (1.1526) acc 68.7500 (70.9645) lr 1.8090e-03 eta 7:15:12 +epoch [12/50] batch [410/500] time 1.361 (1.367) data 0.000 (0.003) loss 1.0391 (1.1533) acc 68.7500 (70.9375) lr 1.8090e-03 eta 7:15:04 +epoch [12/50] batch [415/500] time 1.362 (1.367) data 0.000 (0.002) loss 1.1738 (1.1509) acc 71.8750 (71.0015) lr 1.8090e-03 eta 7:14:55 +epoch [12/50] batch [420/500] time 1.370 (1.367) data 0.000 (0.002) loss 1.0752 (1.1540) acc 78.1250 (71.0045) lr 1.8090e-03 eta 7:14:49 +epoch [12/50] batch [425/500] time 1.374 (1.367) data 0.000 (0.002) loss 1.0186 (1.1551) acc 68.7500 (71.0074) lr 1.8090e-03 eta 7:14:40 +epoch [12/50] batch [430/500] time 1.349 (1.367) data 0.000 (0.002) loss 0.8887 (1.1540) acc 71.8750 (71.0320) lr 1.8090e-03 eta 7:14:32 +epoch [12/50] batch [435/500] time 1.370 (1.367) data 0.000 (0.002) loss 1.2217 (1.1539) acc 71.8750 (70.9986) lr 1.8090e-03 eta 7:14:26 +epoch [12/50] batch [440/500] time 1.366 (1.367) data 0.000 (0.002) loss 0.7920 (1.1529) acc 68.7500 (71.0156) lr 1.8090e-03 eta 7:14:24 +epoch [12/50] batch [445/500] time 1.357 (1.367) data 0.000 (0.002) loss 1.4082 (1.1544) acc 68.7500 (71.0183) lr 1.8090e-03 eta 7:14:16 +epoch [12/50] batch [450/500] time 1.346 (1.367) data 0.000 (0.002) loss 0.8071 (1.1530) acc 78.1250 (71.0000) lr 1.8090e-03 eta 7:14:08 +epoch [12/50] batch [455/500] time 1.376 (1.367) data 0.001 (0.002) loss 0.8633 (1.1508) acc 78.1250 (71.0508) lr 1.8090e-03 eta 7:14:01 +epoch [12/50] batch [460/500] time 1.350 (1.367) data 0.000 (0.002) loss 0.8877 (1.1479) acc 84.3750 (71.1685) lr 1.8090e-03 eta 7:13:52 +epoch [12/50] batch [465/500] time 1.366 (1.367) data 0.000 (0.002) loss 0.8643 (1.1485) acc 78.1250 (71.2231) lr 1.8090e-03 eta 7:13:43 +epoch [12/50] batch [470/500] time 1.366 (1.367) data 0.000 (0.002) loss 0.8799 (1.1510) acc 81.2500 (71.2500) lr 1.8090e-03 eta 7:13:36 +epoch [12/50] batch [475/500] time 1.381 (1.367) data 0.000 (0.002) loss 0.6504 (1.1495) acc 81.2500 (71.2961) lr 1.8090e-03 eta 7:13:27 +epoch [12/50] batch [480/500] time 1.528 (1.367) data 0.000 (0.002) loss 1.1816 (1.1482) acc 75.0000 (71.3411) lr 1.8090e-03 eta 7:13:27 +epoch [12/50] batch [485/500] time 1.375 (1.367) data 0.001 (0.002) loss 0.7085 (1.1477) acc 81.2500 (71.3466) lr 1.8090e-03 eta 7:13:22 +epoch [12/50] batch [490/500] time 1.399 (1.368) data 0.000 (0.002) loss 1.0625 (1.1462) acc 78.1250 (71.3648) lr 1.8090e-03 eta 7:13:16 +epoch [12/50] batch [495/500] time 1.350 (1.367) data 0.000 (0.002) loss 1.0303 (1.1467) acc 75.0000 (71.3194) lr 1.8090e-03 eta 7:13:07 +epoch [12/50] batch [500/500] time 1.338 (1.367) data 0.000 (0.002) loss 0.6465 (1.1461) acc 78.1250 (71.3438) lr 1.7705e-03 eta 7:12:57 +epoch [13/50] batch [5/500] time 1.388 (1.542) data 0.001 (0.163) loss 1.0977 (1.3258) acc 75.0000 (64.3750) lr 1.7705e-03 eta 8:08:17 +epoch [13/50] batch [10/500] time 1.350 (1.455) data 0.000 (0.082) loss 1.6836 (1.2070) acc 62.5000 (67.5000) lr 1.7705e-03 eta 7:40:22 +epoch [13/50] batch [15/500] time 1.365 (1.423) data 0.000 (0.054) loss 1.1113 (1.1735) acc 71.8750 (67.9167) lr 1.7705e-03 eta 7:30:24 +epoch [13/50] batch [20/500] time 1.360 (1.407) data 0.000 (0.041) loss 1.4805 (1.1886) acc 62.5000 (68.4375) lr 1.7705e-03 eta 7:25:11 +epoch [13/50] batch [25/500] time 1.355 (1.406) data 0.000 (0.033) loss 1.2041 (1.1858) acc 68.7500 (68.5000) lr 1.7705e-03 eta 7:24:36 +epoch [13/50] batch [30/500] time 1.342 (1.398) data 0.000 (0.027) loss 1.0391 (1.1747) acc 84.3750 (69.3750) lr 1.7705e-03 eta 7:22:02 +epoch [13/50] batch [35/500] time 1.340 (1.394) data 0.000 (0.024) loss 1.6816 (1.1916) acc 59.3750 (69.1071) lr 1.7705e-03 eta 7:20:40 +epoch [13/50] batch [40/500] time 1.366 (1.392) data 0.001 (0.021) loss 1.3730 (1.1930) acc 71.8750 (69.4531) lr 1.7705e-03 eta 7:19:57 +epoch [13/50] batch [45/500] time 1.372 (1.390) data 0.000 (0.018) loss 0.9673 (1.1443) acc 75.0000 (70.4861) lr 1.7705e-03 eta 7:19:03 +epoch [13/50] batch [50/500] time 1.356 (1.388) data 0.000 (0.017) loss 0.6055 (1.1402) acc 84.3750 (70.6875) lr 1.7705e-03 eta 7:18:23 +epoch [13/50] batch [55/500] time 1.356 (1.385) data 0.000 (0.015) loss 0.9941 (1.1169) acc 65.6250 (70.9659) lr 1.7705e-03 eta 7:17:19 +epoch [13/50] batch [60/500] time 1.379 (1.383) data 0.000 (0.014) loss 1.3711 (1.1183) acc 65.6250 (71.1458) lr 1.7705e-03 eta 7:16:42 +epoch [13/50] batch [65/500] time 1.376 (1.384) data 0.000 (0.013) loss 0.7363 (1.0926) acc 71.8750 (71.3942) lr 1.7705e-03 eta 7:16:54 +epoch [13/50] batch [70/500] time 1.348 (1.382) data 0.000 (0.012) loss 1.3311 (1.0918) acc 71.8750 (71.4732) lr 1.7705e-03 eta 7:16:07 +epoch [13/50] batch [75/500] time 1.362 (1.381) data 0.001 (0.011) loss 1.2188 (1.0915) acc 65.6250 (71.5417) lr 1.7705e-03 eta 7:15:37 +epoch [13/50] batch [80/500] time 1.363 (1.380) data 0.001 (0.011) loss 1.4873 (1.0910) acc 65.6250 (71.6406) lr 1.7705e-03 eta 7:15:06 +epoch [13/50] batch [85/500] time 1.361 (1.379) data 0.000 (0.010) loss 0.8291 (1.0839) acc 78.1250 (71.8750) lr 1.7705e-03 eta 7:14:49 +epoch [13/50] batch [90/500] time 1.349 (1.378) data 0.000 (0.009) loss 0.8755 (1.0969) acc 71.8750 (71.7014) lr 1.7705e-03 eta 7:14:23 +epoch [13/50] batch [95/500] time 1.347 (1.377) data 0.000 (0.009) loss 0.5410 (1.0980) acc 87.5000 (71.8750) lr 1.7705e-03 eta 7:13:58 +epoch [13/50] batch [100/500] time 1.349 (1.377) data 0.001 (0.009) loss 1.0879 (1.1004) acc 68.7500 (71.9062) lr 1.7705e-03 eta 7:13:36 +epoch [13/50] batch [105/500] time 1.366 (1.376) data 0.001 (0.008) loss 1.0244 (1.1120) acc 75.0000 (71.8750) lr 1.7705e-03 eta 7:13:17 +epoch [13/50] batch [110/500] time 1.359 (1.375) data 0.001 (0.008) loss 1.2920 (1.1170) acc 62.5000 (71.7898) lr 1.7705e-03 eta 7:12:57 +epoch [13/50] batch [115/500] time 1.340 (1.375) data 0.000 (0.007) loss 0.9136 (1.1138) acc 68.7500 (71.7935) lr 1.7705e-03 eta 7:12:38 +epoch [13/50] batch [120/500] time 1.368 (1.374) data 0.000 (0.007) loss 0.8652 (1.1163) acc 71.8750 (71.6667) lr 1.7705e-03 eta 7:12:28 +epoch [13/50] batch [125/500] time 1.347 (1.375) data 0.000 (0.007) loss 0.7681 (1.1059) acc 84.3750 (71.8750) lr 1.7705e-03 eta 7:12:32 +epoch [13/50] batch [130/500] time 1.366 (1.375) data 0.000 (0.007) loss 1.2119 (1.1101) acc 68.7500 (71.7548) lr 1.7705e-03 eta 7:12:18 +epoch [13/50] batch [135/500] time 1.351 (1.374) data 0.000 (0.006) loss 1.8926 (1.1229) acc 65.6250 (71.6898) lr 1.7705e-03 eta 7:12:02 +epoch [13/50] batch [140/500] time 1.355 (1.374) data 0.000 (0.006) loss 0.7563 (1.1126) acc 81.2500 (71.8750) lr 1.7705e-03 eta 7:11:48 +epoch [13/50] batch [145/500] time 1.362 (1.374) data 0.000 (0.006) loss 1.1406 (1.1060) acc 59.3750 (71.8534) lr 1.7705e-03 eta 7:11:40 +epoch [13/50] batch [150/500] time 1.350 (1.373) data 0.000 (0.006) loss 0.8379 (1.1114) acc 78.1250 (71.8750) lr 1.7705e-03 eta 7:11:25 +epoch [13/50] batch [155/500] time 1.356 (1.373) data 0.001 (0.006) loss 1.3057 (1.1037) acc 65.6250 (72.0766) lr 1.7705e-03 eta 7:11:13 +epoch [13/50] batch [160/500] time 1.368 (1.373) data 0.000 (0.005) loss 1.1387 (1.1136) acc 71.8750 (71.8359) lr 1.7705e-03 eta 7:11:02 +epoch [13/50] batch [165/500] time 1.362 (1.373) data 0.000 (0.005) loss 1.4678 (1.1105) acc 62.5000 (71.8939) lr 1.7705e-03 eta 7:10:52 +epoch [13/50] batch [170/500] time 1.350 (1.373) data 0.000 (0.005) loss 1.5479 (1.1107) acc 68.7500 (71.8199) lr 1.7705e-03 eta 7:10:52 +epoch [13/50] batch [175/500] time 1.364 (1.373) data 0.000 (0.005) loss 1.5645 (1.1099) acc 59.3750 (71.8036) lr 1.7705e-03 eta 7:10:40 +epoch [13/50] batch [180/500] time 1.378 (1.372) data 0.001 (0.005) loss 1.8174 (1.1068) acc 62.5000 (71.8750) lr 1.7705e-03 eta 7:10:29 +epoch [13/50] batch [185/500] time 1.383 (1.372) data 0.000 (0.005) loss 0.7837 (1.1089) acc 84.3750 (71.9088) lr 1.7705e-03 eta 7:10:19 +epoch [13/50] batch [190/500] time 1.370 (1.372) data 0.000 (0.005) loss 1.4785 (1.1118) acc 56.2500 (71.6941) lr 1.7705e-03 eta 7:10:05 +epoch [13/50] batch [195/500] time 1.368 (1.372) data 0.001 (0.005) loss 0.9795 (1.1047) acc 75.0000 (71.8429) lr 1.7705e-03 eta 7:09:52 +epoch [13/50] batch [200/500] time 1.365 (1.371) data 0.000 (0.004) loss 0.7212 (1.1091) acc 75.0000 (71.6875) lr 1.7705e-03 eta 7:09:39 +epoch [13/50] batch [205/500] time 1.375 (1.371) data 0.000 (0.004) loss 0.5269 (1.1108) acc 84.3750 (71.7835) lr 1.7705e-03 eta 7:09:31 +epoch [13/50] batch [210/500] time 1.360 (1.371) data 0.000 (0.004) loss 0.7891 (1.1037) acc 78.1250 (71.8750) lr 1.7705e-03 eta 7:09:18 +epoch [13/50] batch [215/500] time 1.361 (1.371) data 0.000 (0.004) loss 0.5483 (1.0972) acc 81.2500 (71.9913) lr 1.7705e-03 eta 7:09:06 +epoch [13/50] batch [220/500] time 1.371 (1.370) data 0.000 (0.004) loss 1.0049 (1.0988) acc 75.0000 (72.0170) lr 1.7705e-03 eta 7:08:57 +epoch [13/50] batch [225/500] time 1.372 (1.370) data 0.000 (0.004) loss 1.7080 (1.1010) acc 65.6250 (72.0139) lr 1.7705e-03 eta 7:08:51 +epoch [13/50] batch [230/500] time 1.387 (1.371) data 0.000 (0.004) loss 1.7236 (1.1049) acc 65.6250 (71.8886) lr 1.7705e-03 eta 7:08:47 +epoch [13/50] batch [235/500] time 1.350 (1.371) data 0.000 (0.004) loss 1.0166 (1.1033) acc 75.0000 (71.9681) lr 1.7705e-03 eta 7:08:37 +epoch [13/50] batch [240/500] time 1.364 (1.370) data 0.000 (0.004) loss 0.9717 (1.1027) acc 65.6250 (71.9661) lr 1.7705e-03 eta 7:08:28 +epoch [13/50] batch [245/500] time 1.357 (1.370) data 0.000 (0.004) loss 0.9404 (1.1050) acc 81.2500 (71.9005) lr 1.7705e-03 eta 7:08:18 +epoch [13/50] batch [250/500] time 1.385 (1.370) data 0.000 (0.004) loss 0.8921 (1.1069) acc 68.7500 (71.8500) lr 1.7705e-03 eta 7:08:10 +epoch [13/50] batch [255/500] time 1.354 (1.370) data 0.000 (0.004) loss 0.5371 (1.1061) acc 84.3750 (71.8260) lr 1.7705e-03 eta 7:08:00 +epoch [13/50] batch [260/500] time 1.382 (1.370) data 0.000 (0.004) loss 0.9189 (1.1057) acc 62.5000 (71.7067) lr 1.7705e-03 eta 7:07:51 +epoch [13/50] batch [265/500] time 1.376 (1.370) data 0.001 (0.003) loss 0.9985 (1.1046) acc 71.8750 (71.8042) lr 1.7705e-03 eta 7:07:47 +epoch [13/50] batch [270/500] time 1.383 (1.370) data 0.000 (0.003) loss 1.3506 (1.1070) acc 71.8750 (71.7593) lr 1.7705e-03 eta 7:07:48 +epoch [13/50] batch [275/500] time 1.349 (1.370) data 0.000 (0.003) loss 1.5801 (1.1088) acc 71.8750 (71.8068) lr 1.7705e-03 eta 7:07:38 +epoch [13/50] batch [280/500] time 1.351 (1.370) data 0.000 (0.003) loss 1.0879 (1.1042) acc 84.3750 (71.9420) lr 1.7705e-03 eta 7:07:29 +epoch [13/50] batch [285/500] time 1.349 (1.370) data 0.000 (0.003) loss 1.9385 (1.1070) acc 62.5000 (71.9408) lr 1.7705e-03 eta 7:07:19 +epoch [13/50] batch [290/500] time 1.364 (1.370) data 0.000 (0.003) loss 1.0059 (1.1073) acc 71.8750 (71.8858) lr 1.7705e-03 eta 7:07:11 +epoch [13/50] batch [295/500] time 1.365 (1.370) data 0.000 (0.003) loss 0.9580 (1.1063) acc 75.0000 (71.9386) lr 1.7705e-03 eta 7:07:07 +epoch [13/50] batch [300/500] time 1.363 (1.370) data 0.000 (0.003) loss 0.7466 (1.1057) acc 81.2500 (71.9688) lr 1.7705e-03 eta 7:06:59 +epoch [13/50] batch [305/500] time 1.362 (1.370) data 0.000 (0.003) loss 1.1416 (1.1089) acc 65.6250 (71.9057) lr 1.7705e-03 eta 7:06:51 +epoch [13/50] batch [310/500] time 1.474 (1.370) data 0.000 (0.003) loss 0.9878 (1.1097) acc 84.3750 (71.8246) lr 1.7705e-03 eta 7:06:49 +epoch [13/50] batch [315/500] time 1.357 (1.370) data 0.000 (0.003) loss 1.2197 (1.1143) acc 65.6250 (71.7063) lr 1.7705e-03 eta 7:06:40 +epoch [13/50] batch [320/500] time 1.380 (1.370) data 0.000 (0.003) loss 0.6221 (1.1164) acc 87.5000 (71.6699) lr 1.7705e-03 eta 7:06:35 +epoch [13/50] batch [325/500] time 1.360 (1.370) data 0.000 (0.003) loss 0.7305 (1.1157) acc 75.0000 (71.6827) lr 1.7705e-03 eta 7:06:26 +epoch [13/50] batch [330/500] time 1.379 (1.370) data 0.000 (0.003) loss 1.4385 (1.1159) acc 65.6250 (71.7140) lr 1.7705e-03 eta 7:06:16 +epoch [13/50] batch [335/500] time 1.371 (1.370) data 0.000 (0.003) loss 1.4814 (1.1169) acc 65.6250 (71.6698) lr 1.7705e-03 eta 7:06:09 +epoch [13/50] batch [340/500] time 1.358 (1.370) data 0.000 (0.003) loss 1.0264 (1.1185) acc 71.8750 (71.6728) lr 1.7705e-03 eta 7:06:00 +epoch [13/50] batch [345/500] time 1.365 (1.370) data 0.000 (0.003) loss 1.5137 (1.1218) acc 68.7500 (71.6304) lr 1.7705e-03 eta 7:05:53 +epoch [13/50] batch [350/500] time 1.348 (1.370) data 0.000 (0.003) loss 0.9941 (1.1233) acc 71.8750 (71.5893) lr 1.7705e-03 eta 7:05:45 +epoch [13/50] batch [355/500] time 1.366 (1.369) data 0.000 (0.003) loss 1.3408 (1.1206) acc 71.8750 (71.6461) lr 1.7705e-03 eta 7:05:33 +epoch [13/50] batch [360/500] time 1.379 (1.369) data 0.000 (0.003) loss 1.0479 (1.1212) acc 78.1250 (71.7188) lr 1.7705e-03 eta 7:05:26 +epoch [13/50] batch [365/500] time 1.378 (1.369) data 0.000 (0.003) loss 1.2705 (1.1225) acc 65.6250 (71.6866) lr 1.7705e-03 eta 7:05:20 +epoch [13/50] batch [370/500] time 1.366 (1.369) data 0.000 (0.003) loss 1.5908 (1.1264) acc 68.7500 (71.6385) lr 1.7705e-03 eta 7:05:11 +epoch [13/50] batch [375/500] time 1.364 (1.369) data 0.000 (0.003) loss 1.0869 (1.1284) acc 65.6250 (71.5833) lr 1.7705e-03 eta 7:05:03 +epoch [13/50] batch [380/500] time 1.361 (1.369) data 0.000 (0.003) loss 1.2832 (1.1293) acc 65.6250 (71.5625) lr 1.7705e-03 eta 7:04:53 +epoch [13/50] batch [385/500] time 1.369 (1.369) data 0.000 (0.003) loss 1.6455 (1.1295) acc 65.6250 (71.5260) lr 1.7705e-03 eta 7:04:45 +epoch [13/50] batch [390/500] time 1.361 (1.369) data 0.000 (0.002) loss 1.0000 (1.1284) acc 78.1250 (71.5385) lr 1.7705e-03 eta 7:04:34 +epoch [13/50] batch [395/500] time 1.361 (1.369) data 0.000 (0.002) loss 1.1709 (1.1296) acc 75.0000 (71.5506) lr 1.7705e-03 eta 7:04:24 +epoch [13/50] batch [400/500] time 1.373 (1.369) data 0.000 (0.002) loss 1.1211 (1.1285) acc 78.1250 (71.6328) lr 1.7705e-03 eta 7:04:15 +epoch [13/50] batch [405/500] time 1.366 (1.368) data 0.000 (0.002) loss 0.8306 (1.1305) acc 78.1250 (71.6049) lr 1.7705e-03 eta 7:04:06 +epoch [13/50] batch [410/500] time 1.353 (1.369) data 0.000 (0.002) loss 0.6021 (1.1274) acc 87.5000 (71.6921) lr 1.7705e-03 eta 7:04:04 +epoch [13/50] batch [415/500] time 1.390 (1.369) data 0.001 (0.002) loss 0.8965 (1.1249) acc 81.2500 (71.7922) lr 1.7705e-03 eta 7:03:58 +epoch [13/50] batch [420/500] time 1.362 (1.369) data 0.000 (0.002) loss 1.1250 (1.1243) acc 75.0000 (71.8378) lr 1.7705e-03 eta 7:03:49 +epoch [13/50] batch [425/500] time 1.341 (1.369) data 0.000 (0.002) loss 0.6699 (1.1225) acc 78.1250 (71.8676) lr 1.7705e-03 eta 7:03:42 +epoch [13/50] batch [430/500] time 1.378 (1.369) data 0.000 (0.002) loss 0.9912 (1.1222) acc 68.7500 (71.8823) lr 1.7705e-03 eta 7:03:35 +epoch [13/50] batch [435/500] time 1.333 (1.368) data 0.000 (0.002) loss 0.9219 (1.1250) acc 75.0000 (71.8175) lr 1.7705e-03 eta 7:03:24 +epoch [13/50] batch [440/500] time 1.363 (1.368) data 0.000 (0.002) loss 0.7549 (1.1237) acc 78.1250 (71.7827) lr 1.7705e-03 eta 7:03:16 +epoch [13/50] batch [445/500] time 1.373 (1.368) data 0.000 (0.002) loss 1.2725 (1.1239) acc 65.6250 (71.7416) lr 1.7705e-03 eta 7:03:09 +epoch [13/50] batch [450/500] time 1.367 (1.368) data 0.000 (0.002) loss 0.9106 (1.1225) acc 84.3750 (71.8056) lr 1.7705e-03 eta 7:03:02 +epoch [13/50] batch [455/500] time 1.364 (1.369) data 0.000 (0.002) loss 1.2451 (1.1215) acc 65.6250 (71.7857) lr 1.7705e-03 eta 7:02:59 +epoch [13/50] batch [460/500] time 1.365 (1.368) data 0.000 (0.002) loss 0.8599 (1.1218) acc 75.0000 (71.8274) lr 1.7705e-03 eta 7:02:49 +epoch [13/50] batch [465/500] time 1.360 (1.368) data 0.000 (0.002) loss 1.4111 (1.1214) acc 75.0000 (71.8616) lr 1.7705e-03 eta 7:02:41 +epoch [13/50] batch [470/500] time 1.375 (1.368) data 0.000 (0.002) loss 1.3135 (1.1206) acc 59.3750 (71.8816) lr 1.7705e-03 eta 7:02:34 +epoch [13/50] batch [475/500] time 1.371 (1.368) data 0.000 (0.002) loss 0.8926 (1.1203) acc 75.0000 (71.8816) lr 1.7705e-03 eta 7:02:26 +epoch [13/50] batch [480/500] time 1.382 (1.368) data 0.000 (0.002) loss 1.2402 (1.1207) acc 75.0000 (71.9336) lr 1.7705e-03 eta 7:02:19 +epoch [13/50] batch [485/500] time 1.345 (1.368) data 0.001 (0.002) loss 0.9893 (1.1230) acc 81.2500 (71.9008) lr 1.7705e-03 eta 7:02:11 +epoch [13/50] batch [490/500] time 1.363 (1.368) data 0.000 (0.002) loss 1.7920 (1.1252) acc 56.2500 (71.8431) lr 1.7705e-03 eta 7:02:04 +epoch [13/50] batch [495/500] time 1.352 (1.368) data 0.000 (0.002) loss 1.3477 (1.1251) acc 68.7500 (71.8434) lr 1.7705e-03 eta 7:01:57 +epoch [13/50] batch [500/500] time 1.362 (1.368) data 0.000 (0.002) loss 0.5400 (1.1247) acc 87.5000 (71.8563) lr 1.7290e-03 eta 7:01:51 +epoch [14/50] batch [5/500] time 1.366 (1.519) data 0.000 (0.159) loss 1.2412 (0.9236) acc 75.0000 (76.8750) lr 1.7290e-03 eta 7:48:13 +epoch [14/50] batch [10/500] time 1.373 (1.443) data 0.000 (0.080) loss 0.8481 (1.0081) acc 81.2500 (74.6875) lr 1.7290e-03 eta 7:24:40 +epoch [14/50] batch [15/500] time 1.354 (1.415) data 0.000 (0.053) loss 1.1631 (1.0782) acc 78.1250 (72.2917) lr 1.7290e-03 eta 7:15:55 +epoch [14/50] batch [20/500] time 1.338 (1.402) data 0.000 (0.040) loss 1.0264 (1.1142) acc 75.0000 (71.7188) lr 1.7290e-03 eta 7:11:56 +epoch [14/50] batch [25/500] time 1.368 (1.394) data 0.000 (0.032) loss 1.0186 (1.0957) acc 75.0000 (72.1250) lr 1.7290e-03 eta 7:09:13 +epoch [14/50] batch [30/500] time 1.354 (1.390) data 0.000 (0.027) loss 0.7134 (1.0928) acc 84.3750 (72.5000) lr 1.7290e-03 eta 7:07:52 +epoch [14/50] batch [35/500] time 1.383 (1.387) data 0.000 (0.023) loss 1.0322 (1.0858) acc 81.2500 (73.4821) lr 1.7290e-03 eta 7:06:45 +epoch [14/50] batch [40/500] time 1.361 (1.384) data 0.000 (0.020) loss 0.9429 (1.0768) acc 81.2500 (73.6719) lr 1.7290e-03 eta 7:05:43 +epoch [14/50] batch [45/500] time 1.362 (1.381) data 0.000 (0.018) loss 1.3965 (1.0994) acc 68.7500 (73.1250) lr 1.7290e-03 eta 7:04:53 +epoch [14/50] batch [50/500] time 1.375 (1.379) data 0.000 (0.016) loss 1.1787 (1.1024) acc 65.6250 (72.4375) lr 1.7290e-03 eta 7:04:10 +epoch [14/50] batch [55/500] time 1.365 (1.378) data 0.000 (0.015) loss 1.0273 (1.0998) acc 68.7500 (72.1591) lr 1.7290e-03 eta 7:03:32 +epoch [14/50] batch [60/500] time 1.352 (1.376) data 0.000 (0.014) loss 1.1826 (1.1009) acc 71.8750 (72.1354) lr 1.7290e-03 eta 7:02:48 +epoch [14/50] batch [65/500] time 1.342 (1.374) data 0.000 (0.013) loss 0.9185 (1.1144) acc 75.0000 (71.6827) lr 1.7290e-03 eta 7:02:11 +epoch [14/50] batch [70/500] time 1.365 (1.373) data 0.000 (0.012) loss 0.7290 (1.1064) acc 87.5000 (72.0536) lr 1.7290e-03 eta 7:01:46 +epoch [14/50] batch [75/500] time 1.364 (1.373) data 0.000 (0.011) loss 1.5586 (1.1146) acc 65.6250 (71.7917) lr 1.7290e-03 eta 7:01:32 +epoch [14/50] batch [80/500] time 1.364 (1.372) data 0.000 (0.010) loss 0.7319 (1.1113) acc 78.1250 (71.8750) lr 1.7290e-03 eta 7:01:10 +epoch [14/50] batch [85/500] time 1.384 (1.372) data 0.000 (0.010) loss 1.4121 (1.1182) acc 62.5000 (71.6176) lr 1.7290e-03 eta 7:00:57 +epoch [14/50] batch [90/500] time 1.359 (1.371) data 0.000 (0.009) loss 0.5928 (1.1092) acc 78.1250 (71.7014) lr 1.7290e-03 eta 7:00:37 +epoch [14/50] batch [95/500] time 1.350 (1.372) data 0.001 (0.009) loss 1.1279 (1.1066) acc 71.8750 (71.7105) lr 1.7290e-03 eta 7:00:48 +epoch [14/50] batch [100/500] time 1.363 (1.371) data 0.000 (0.008) loss 1.2734 (1.1068) acc 75.0000 (71.8125) lr 1.7290e-03 eta 7:00:29 +epoch [14/50] batch [105/500] time 1.369 (1.371) data 0.000 (0.008) loss 0.6748 (1.0988) acc 78.1250 (71.9345) lr 1.7290e-03 eta 7:00:22 +epoch [14/50] batch [110/500] time 1.394 (1.372) data 0.000 (0.008) loss 1.0205 (1.0997) acc 78.1250 (72.0455) lr 1.7290e-03 eta 7:00:29 +epoch [14/50] batch [115/500] time 1.349 (1.372) data 0.000 (0.007) loss 1.2891 (1.1048) acc 62.5000 (71.9293) lr 1.7290e-03 eta 7:00:19 +epoch [14/50] batch [120/500] time 1.351 (1.371) data 0.000 (0.007) loss 1.1816 (1.1054) acc 78.1250 (71.8229) lr 1.7290e-03 eta 7:00:04 +epoch [14/50] batch [125/500] time 1.369 (1.371) data 0.000 (0.007) loss 0.5991 (1.1055) acc 71.8750 (71.8750) lr 1.7290e-03 eta 6:59:48 +epoch [14/50] batch [130/500] time 1.376 (1.371) data 0.000 (0.007) loss 1.0449 (1.1074) acc 75.0000 (71.6827) lr 1.7290e-03 eta 6:59:41 +epoch [14/50] batch [135/500] time 1.386 (1.371) data 0.000 (0.006) loss 1.1553 (1.1036) acc 71.8750 (71.6435) lr 1.7290e-03 eta 6:59:34 +epoch [14/50] batch [140/500] time 1.374 (1.372) data 0.000 (0.006) loss 1.3691 (1.1054) acc 68.7500 (71.5402) lr 1.7290e-03 eta 6:59:53 +epoch [14/50] batch [145/500] time 1.367 (1.372) data 0.000 (0.006) loss 0.7832 (1.1182) acc 84.3750 (71.5086) lr 1.7290e-03 eta 6:59:42 +epoch [14/50] batch [150/500] time 1.365 (1.372) data 0.000 (0.006) loss 1.1611 (1.1178) acc 68.7500 (71.4167) lr 1.7290e-03 eta 6:59:28 +epoch [14/50] batch [155/500] time 1.363 (1.372) data 0.000 (0.006) loss 1.0410 (1.1126) acc 75.0000 (71.5524) lr 1.7290e-03 eta 6:59:20 +epoch [14/50] batch [160/500] time 1.386 (1.371) data 0.000 (0.005) loss 0.7212 (1.1091) acc 78.1250 (71.6992) lr 1.7290e-03 eta 6:59:09 +epoch [14/50] batch [165/500] time 1.374 (1.371) data 0.000 (0.005) loss 1.0635 (1.1066) acc 65.6250 (71.6856) lr 1.7290e-03 eta 6:59:02 +epoch [14/50] batch [170/500] time 1.409 (1.371) data 0.000 (0.005) loss 0.7842 (1.1041) acc 78.1250 (71.7279) lr 1.7290e-03 eta 6:58:54 +epoch [14/50] batch [175/500] time 1.367 (1.371) data 0.000 (0.005) loss 1.0371 (1.1018) acc 68.7500 (71.8750) lr 1.7290e-03 eta 6:58:46 +epoch [14/50] batch [180/500] time 1.375 (1.371) data 0.000 (0.005) loss 1.3770 (1.0975) acc 68.7500 (71.9444) lr 1.7290e-03 eta 6:58:30 +epoch [14/50] batch [185/500] time 1.377 (1.371) data 0.000 (0.005) loss 1.1113 (1.1012) acc 68.7500 (71.8581) lr 1.7290e-03 eta 6:58:22 +epoch [14/50] batch [190/500] time 1.375 (1.371) data 0.001 (0.005) loss 1.2871 (1.0993) acc 68.7500 (71.8914) lr 1.7290e-03 eta 6:58:15 +epoch [14/50] batch [195/500] time 1.349 (1.370) data 0.000 (0.004) loss 0.7490 (1.0914) acc 84.3750 (72.0353) lr 1.7290e-03 eta 6:58:04 +epoch [14/50] batch [200/500] time 1.343 (1.370) data 0.000 (0.004) loss 0.8608 (1.0938) acc 75.0000 (71.9531) lr 1.7290e-03 eta 6:57:50 +epoch [14/50] batch [205/500] time 1.370 (1.370) data 0.000 (0.004) loss 1.3867 (1.0992) acc 65.6250 (71.8445) lr 1.7290e-03 eta 6:57:43 +epoch [14/50] batch [210/500] time 1.352 (1.370) data 0.001 (0.004) loss 0.4761 (1.0931) acc 87.5000 (71.9494) lr 1.7290e-03 eta 6:57:34 +epoch [14/50] batch [215/500] time 1.362 (1.370) data 0.000 (0.004) loss 0.4875 (1.0949) acc 84.3750 (71.9041) lr 1.7290e-03 eta 6:57:23 +epoch [14/50] batch [220/500] time 1.387 (1.370) data 0.000 (0.004) loss 0.9619 (1.0968) acc 75.0000 (71.9460) lr 1.7290e-03 eta 6:57:17 +epoch [14/50] batch [225/500] time 1.361 (1.369) data 0.000 (0.004) loss 1.2090 (1.0994) acc 68.7500 (71.8889) lr 1.7290e-03 eta 6:57:06 +epoch [14/50] batch [230/500] time 1.362 (1.369) data 0.000 (0.004) loss 1.1846 (1.0998) acc 71.8750 (71.9429) lr 1.7290e-03 eta 6:56:58 +epoch [14/50] batch [235/500] time 1.513 (1.370) data 0.000 (0.004) loss 0.8267 (1.1057) acc 71.8750 (71.8218) lr 1.7290e-03 eta 6:56:59 +epoch [14/50] batch [240/500] time 1.354 (1.370) data 0.000 (0.004) loss 1.5723 (1.1033) acc 59.3750 (71.8229) lr 1.7290e-03 eta 6:56:53 +epoch [14/50] batch [245/500] time 1.359 (1.370) data 0.000 (0.004) loss 1.4395 (1.1030) acc 71.8750 (71.8495) lr 1.7290e-03 eta 6:56:41 +epoch [14/50] batch [250/500] time 1.360 (1.369) data 0.000 (0.004) loss 1.7207 (1.1050) acc 56.2500 (71.7500) lr 1.7290e-03 eta 6:56:30 +epoch [14/50] batch [255/500] time 1.365 (1.369) data 0.001 (0.004) loss 0.9336 (1.1014) acc 81.2500 (71.8995) lr 1.7290e-03 eta 6:56:21 +epoch [14/50] batch [260/500] time 1.366 (1.369) data 0.000 (0.003) loss 1.0000 (1.1000) acc 68.7500 (71.8750) lr 1.7290e-03 eta 6:56:09 +epoch [14/50] batch [265/500] time 1.345 (1.369) data 0.000 (0.003) loss 1.4219 (1.1002) acc 71.8750 (71.9693) lr 1.7290e-03 eta 6:55:58 +epoch [14/50] batch [270/500] time 1.365 (1.369) data 0.000 (0.003) loss 0.7354 (1.1032) acc 84.3750 (71.9444) lr 1.7290e-03 eta 6:55:47 +epoch [14/50] batch [275/500] time 1.362 (1.369) data 0.000 (0.003) loss 1.4551 (1.1049) acc 71.8750 (71.9886) lr 1.7290e-03 eta 6:55:41 +epoch [14/50] batch [280/500] time 1.356 (1.369) data 0.000 (0.003) loss 0.6348 (1.1065) acc 81.2500 (71.9308) lr 1.7290e-03 eta 6:55:43 +epoch [14/50] batch [285/500] time 1.369 (1.369) data 0.000 (0.003) loss 1.0635 (1.1078) acc 75.0000 (71.9189) lr 1.7290e-03 eta 6:55:33 +epoch [14/50] batch [290/500] time 1.360 (1.369) data 0.000 (0.003) loss 1.8086 (1.1139) acc 65.6250 (71.8534) lr 1.7290e-03 eta 6:55:27 +epoch [14/50] batch [295/500] time 1.359 (1.369) data 0.001 (0.003) loss 1.1914 (1.1118) acc 71.8750 (71.9174) lr 1.7290e-03 eta 6:55:17 +epoch [14/50] batch [300/500] time 1.367 (1.369) data 0.000 (0.003) loss 2.2207 (1.1142) acc 56.2500 (71.8750) lr 1.7290e-03 eta 6:55:07 +epoch [14/50] batch [305/500] time 1.340 (1.368) data 0.000 (0.003) loss 0.9053 (1.1142) acc 68.7500 (71.8852) lr 1.7290e-03 eta 6:54:57 +epoch [14/50] batch [310/500] time 1.367 (1.368) data 0.001 (0.003) loss 1.1406 (1.1119) acc 68.7500 (71.8851) lr 1.7290e-03 eta 6:54:49 +epoch [14/50] batch [315/500] time 1.366 (1.368) data 0.000 (0.003) loss 1.2412 (1.1085) acc 68.7500 (71.9940) lr 1.7290e-03 eta 6:54:43 +epoch [14/50] batch [320/500] time 1.350 (1.368) data 0.001 (0.003) loss 0.9775 (1.1062) acc 78.1250 (71.9629) lr 1.7290e-03 eta 6:54:34 +epoch [14/50] batch [325/500] time 1.367 (1.368) data 0.000 (0.003) loss 1.2031 (1.1051) acc 68.7500 (71.9712) lr 1.7290e-03 eta 6:54:25 +epoch [14/50] batch [330/500] time 1.363 (1.368) data 0.001 (0.003) loss 0.8521 (1.1043) acc 81.2500 (72.0360) lr 1.7290e-03 eta 6:54:16 +epoch [14/50] batch [335/500] time 1.342 (1.368) data 0.000 (0.003) loss 1.2266 (1.1045) acc 62.5000 (71.9869) lr 1.7290e-03 eta 6:54:08 +epoch [14/50] batch [340/500] time 1.377 (1.368) data 0.000 (0.003) loss 1.1133 (1.1070) acc 71.8750 (71.8934) lr 1.7290e-03 eta 6:54:00 +epoch [14/50] batch [345/500] time 1.394 (1.368) data 0.000 (0.003) loss 0.9727 (1.1087) acc 78.1250 (71.8931) lr 1.7290e-03 eta 6:53:55 +epoch [14/50] batch [350/500] time 1.373 (1.368) data 0.001 (0.003) loss 1.0889 (1.1091) acc 65.6250 (71.8482) lr 1.7290e-03 eta 6:53:49 +epoch [14/50] batch [355/500] time 1.364 (1.368) data 0.000 (0.003) loss 0.6079 (1.1077) acc 84.3750 (71.9366) lr 1.7290e-03 eta 6:53:42 +epoch [14/50] batch [360/500] time 1.372 (1.368) data 0.000 (0.003) loss 1.2695 (1.1070) acc 71.8750 (71.9705) lr 1.7290e-03 eta 6:53:35 +epoch [14/50] batch [365/500] time 1.369 (1.368) data 0.000 (0.003) loss 0.7295 (1.1062) acc 90.6250 (72.0548) lr 1.7290e-03 eta 6:53:28 +epoch [14/50] batch [370/500] time 1.361 (1.368) data 0.001 (0.003) loss 0.8350 (1.1077) acc 75.0000 (72.0101) lr 1.7290e-03 eta 6:53:19 +epoch [14/50] batch [375/500] time 1.353 (1.368) data 0.001 (0.003) loss 0.8115 (1.1083) acc 87.5000 (72.1000) lr 1.7290e-03 eta 6:53:10 +epoch [14/50] batch [380/500] time 1.370 (1.368) data 0.000 (0.003) loss 0.7173 (1.1059) acc 84.3750 (72.1382) lr 1.7290e-03 eta 6:53:09 +epoch [14/50] batch [385/500] time 1.364 (1.368) data 0.000 (0.003) loss 1.0400 (1.1076) acc 78.1250 (72.0698) lr 1.7290e-03 eta 6:53:00 +epoch [14/50] batch [390/500] time 1.358 (1.368) data 0.001 (0.002) loss 0.9580 (1.1051) acc 81.2500 (72.1394) lr 1.7290e-03 eta 6:52:53 +epoch [14/50] batch [395/500] time 1.357 (1.368) data 0.001 (0.002) loss 1.2207 (1.1071) acc 71.8750 (72.0965) lr 1.7290e-03 eta 6:52:46 +epoch [14/50] batch [400/500] time 1.365 (1.368) data 0.000 (0.002) loss 1.6436 (1.1070) acc 59.3750 (72.1094) lr 1.7290e-03 eta 6:52:39 +epoch [14/50] batch [405/500] time 1.375 (1.368) data 0.001 (0.002) loss 1.3418 (1.1059) acc 59.3750 (72.1451) lr 1.7290e-03 eta 6:52:33 +epoch [14/50] batch [410/500] time 1.354 (1.368) data 0.001 (0.002) loss 1.2754 (1.1095) acc 71.8750 (72.0655) lr 1.7290e-03 eta 6:52:26 +epoch [14/50] batch [415/500] time 1.324 (1.368) data 0.000 (0.002) loss 1.2910 (1.1134) acc 71.8750 (71.9804) lr 1.7290e-03 eta 6:52:19 +epoch [14/50] batch [420/500] time 1.361 (1.368) data 0.000 (0.002) loss 1.2529 (1.1129) acc 68.7500 (72.0015) lr 1.7290e-03 eta 6:52:10 +epoch [14/50] batch [425/500] time 1.349 (1.368) data 0.001 (0.002) loss 1.1123 (1.1169) acc 68.7500 (71.8897) lr 1.7290e-03 eta 6:52:05 +epoch [14/50] batch [430/500] time 1.353 (1.368) data 0.000 (0.002) loss 0.6646 (1.1178) acc 81.2500 (71.8968) lr 1.7290e-03 eta 6:51:57 +epoch [14/50] batch [435/500] time 1.361 (1.368) data 0.000 (0.002) loss 1.3506 (1.1202) acc 65.6250 (71.8175) lr 1.7290e-03 eta 6:51:49 +epoch [14/50] batch [440/500] time 1.383 (1.368) data 0.000 (0.002) loss 1.4912 (1.1206) acc 65.6250 (71.7969) lr 1.7290e-03 eta 6:51:43 +epoch [14/50] batch [445/500] time 1.353 (1.368) data 0.000 (0.002) loss 1.0898 (1.1194) acc 75.0000 (71.8329) lr 1.7290e-03 eta 6:51:34 +epoch [14/50] batch [450/500] time 1.358 (1.368) data 0.001 (0.002) loss 1.2031 (1.1196) acc 71.8750 (71.8125) lr 1.7290e-03 eta 6:51:25 +epoch [14/50] batch [455/500] time 1.376 (1.368) data 0.000 (0.002) loss 1.2051 (1.1216) acc 62.5000 (71.7239) lr 1.7290e-03 eta 6:51:20 +epoch [14/50] batch [460/500] time 1.364 (1.368) data 0.000 (0.002) loss 1.3857 (1.1230) acc 65.6250 (71.7459) lr 1.7290e-03 eta 6:51:11 +epoch [14/50] batch [465/500] time 1.373 (1.368) data 0.001 (0.002) loss 0.9697 (1.1221) acc 71.8750 (71.7742) lr 1.7290e-03 eta 6:51:03 +epoch [14/50] batch [470/500] time 1.358 (1.367) data 0.000 (0.002) loss 1.8799 (1.1252) acc 59.3750 (71.7221) lr 1.7290e-03 eta 6:50:55 +epoch [14/50] batch [475/500] time 1.363 (1.367) data 0.000 (0.002) loss 0.6255 (1.1235) acc 81.2500 (71.7434) lr 1.7290e-03 eta 6:50:47 +epoch [14/50] batch [480/500] time 1.366 (1.367) data 0.000 (0.002) loss 1.3818 (1.1244) acc 56.2500 (71.7122) lr 1.7290e-03 eta 6:50:41 +epoch [14/50] batch [485/500] time 1.389 (1.367) data 0.001 (0.002) loss 1.2158 (1.1229) acc 75.0000 (71.7784) lr 1.7290e-03 eta 6:50:34 +epoch [14/50] batch [490/500] time 1.365 (1.367) data 0.000 (0.002) loss 1.2666 (1.1235) acc 71.8750 (71.7538) lr 1.7290e-03 eta 6:50:27 +epoch [14/50] batch [495/500] time 1.365 (1.368) data 0.000 (0.002) loss 1.4590 (1.1226) acc 68.7500 (71.7677) lr 1.7290e-03 eta 6:50:21 +epoch [14/50] batch [500/500] time 1.369 (1.367) data 0.000 (0.002) loss 1.3242 (1.1247) acc 68.7500 (71.7625) lr 1.6845e-03 eta 6:50:14 +epoch [15/50] batch [5/500] time 1.359 (1.545) data 0.000 (0.181) loss 0.6572 (1.0036) acc 78.1250 (72.5000) lr 1.6845e-03 eta 7:43:26 +epoch [15/50] batch [10/500] time 1.349 (1.451) data 0.000 (0.091) loss 1.1221 (1.0043) acc 78.1250 (73.1250) lr 1.6845e-03 eta 7:14:59 +epoch [15/50] batch [15/500] time 1.367 (1.422) data 0.000 (0.061) loss 0.7383 (1.0520) acc 78.1250 (72.0833) lr 1.6845e-03 eta 7:06:09 +epoch [15/50] batch [20/500] time 1.379 (1.406) data 0.000 (0.046) loss 1.3506 (1.1121) acc 59.3750 (70.6250) lr 1.6845e-03 eta 7:01:27 +epoch [15/50] batch [25/500] time 1.360 (1.399) data 0.000 (0.037) loss 1.1279 (1.0989) acc 65.6250 (71.1250) lr 1.6845e-03 eta 6:59:05 +epoch [15/50] batch [30/500] time 1.357 (1.393) data 0.001 (0.031) loss 1.0215 (1.0559) acc 84.3750 (72.7083) lr 1.6845e-03 eta 6:57:14 +epoch [15/50] batch [35/500] time 1.344 (1.388) data 0.000 (0.026) loss 0.9922 (1.0568) acc 84.3750 (73.3929) lr 1.6845e-03 eta 6:55:29 +epoch [15/50] batch [40/500] time 1.353 (1.385) data 0.000 (0.023) loss 1.4609 (1.0552) acc 78.1250 (73.9844) lr 1.6845e-03 eta 6:54:39 +epoch [15/50] batch [45/500] time 1.358 (1.382) data 0.000 (0.020) loss 1.7969 (1.0910) acc 59.3750 (73.1250) lr 1.6845e-03 eta 6:53:39 +epoch [15/50] batch [50/500] time 1.344 (1.380) data 0.000 (0.018) loss 0.7793 (1.0763) acc 78.1250 (73.2500) lr 1.6845e-03 eta 6:52:44 +epoch [15/50] batch [55/500] time 1.360 (1.378) data 0.000 (0.017) loss 1.1729 (1.0785) acc 71.8750 (73.0682) lr 1.6845e-03 eta 6:52:07 +epoch [15/50] batch [60/500] time 1.355 (1.376) data 0.000 (0.015) loss 1.3887 (1.1094) acc 68.7500 (72.5521) lr 1.6845e-03 eta 6:51:27 +epoch [15/50] batch [65/500] time 1.349 (1.375) data 0.000 (0.014) loss 1.3604 (1.1148) acc 68.7500 (72.3077) lr 1.6845e-03 eta 6:51:02 +epoch [15/50] batch [70/500] time 1.357 (1.374) data 0.000 (0.013) loss 1.2070 (1.1063) acc 56.2500 (72.5893) lr 1.6845e-03 eta 6:50:40 +epoch [15/50] batch [75/500] time 1.378 (1.376) data 0.000 (0.012) loss 0.9434 (1.0909) acc 71.8750 (72.7917) lr 1.6845e-03 eta 6:50:58 +epoch [15/50] batch [80/500] time 1.355 (1.375) data 0.001 (0.012) loss 0.9648 (1.0930) acc 78.1250 (72.8516) lr 1.6845e-03 eta 6:50:39 +epoch [15/50] batch [85/500] time 1.389 (1.375) data 0.000 (0.011) loss 0.9897 (1.0901) acc 84.3750 (73.0147) lr 1.6845e-03 eta 6:50:31 +epoch [15/50] batch [90/500] time 1.350 (1.374) data 0.000 (0.010) loss 0.8110 (1.0942) acc 68.7500 (72.9861) lr 1.6845e-03 eta 6:50:07 +epoch [15/50] batch [95/500] time 1.365 (1.374) data 0.000 (0.010) loss 1.4355 (1.0927) acc 65.6250 (72.9934) lr 1.6845e-03 eta 6:49:56 +epoch [15/50] batch [100/500] time 1.367 (1.374) data 0.000 (0.009) loss 0.4133 (1.0904) acc 84.3750 (72.8125) lr 1.6845e-03 eta 6:49:48 +epoch [15/50] batch [105/500] time 1.341 (1.373) data 0.000 (0.009) loss 1.1426 (1.0887) acc 75.0000 (72.9762) lr 1.6845e-03 eta 6:49:28 +epoch [15/50] batch [110/500] time 1.363 (1.372) data 0.000 (0.009) loss 1.2422 (1.0924) acc 71.8750 (72.6989) lr 1.6845e-03 eta 6:49:09 +epoch [15/50] batch [115/500] time 1.349 (1.372) data 0.000 (0.008) loss 0.9121 (1.0841) acc 78.1250 (72.9620) lr 1.6845e-03 eta 6:48:53 +epoch [15/50] batch [120/500] time 1.382 (1.373) data 0.000 (0.008) loss 1.1104 (1.0818) acc 75.0000 (73.0990) lr 1.6845e-03 eta 6:49:04 +epoch [15/50] batch [125/500] time 1.363 (1.372) data 0.000 (0.008) loss 0.6748 (1.0791) acc 81.2500 (73.0000) lr 1.6845e-03 eta 6:48:45 +epoch [15/50] batch [130/500] time 1.375 (1.372) data 0.001 (0.007) loss 0.8613 (1.0870) acc 75.0000 (72.8365) lr 1.6845e-03 eta 6:48:33 +epoch [15/50] batch [135/500] time 1.372 (1.372) data 0.000 (0.007) loss 1.1152 (1.0937) acc 81.2500 (72.8704) lr 1.6845e-03 eta 6:48:21 +epoch [15/50] batch [140/500] time 1.359 (1.371) data 0.000 (0.007) loss 0.4348 (1.0848) acc 87.5000 (73.0804) lr 1.6845e-03 eta 6:48:09 +epoch [15/50] batch [145/500] time 1.355 (1.371) data 0.000 (0.007) loss 0.9009 (1.0806) acc 65.6250 (73.0603) lr 1.6845e-03 eta 6:47:58 +epoch [15/50] batch [150/500] time 1.355 (1.370) data 0.000 (0.006) loss 0.7524 (1.0873) acc 75.0000 (73.0000) lr 1.6845e-03 eta 6:47:36 +epoch [15/50] batch [155/500] time 1.369 (1.370) data 0.001 (0.006) loss 1.2695 (1.0860) acc 75.0000 (73.0242) lr 1.6845e-03 eta 6:47:27 +epoch [15/50] batch [160/500] time 1.373 (1.370) data 0.000 (0.006) loss 0.9722 (1.0851) acc 71.8750 (73.0664) lr 1.6845e-03 eta 6:47:21 +epoch [15/50] batch [165/500] time 1.361 (1.370) data 0.001 (0.006) loss 1.1904 (1.0842) acc 68.7500 (73.1061) lr 1.6845e-03 eta 6:47:12 +epoch [15/50] batch [170/500] time 1.386 (1.370) data 0.000 (0.006) loss 1.0088 (1.0912) acc 84.3750 (73.0882) lr 1.6845e-03 eta 6:47:05 +epoch [15/50] batch [175/500] time 1.364 (1.370) data 0.001 (0.006) loss 1.1758 (1.0852) acc 71.8750 (73.1429) lr 1.6845e-03 eta 6:47:03 +epoch [15/50] batch [180/500] time 1.358 (1.370) data 0.000 (0.005) loss 1.5996 (1.0897) acc 78.1250 (73.0903) lr 1.6845e-03 eta 6:46:53 +epoch [15/50] batch [185/500] time 1.352 (1.370) data 0.000 (0.005) loss 0.9712 (1.0907) acc 81.2500 (73.0068) lr 1.6845e-03 eta 6:46:40 +epoch [15/50] batch [190/500] time 1.366 (1.370) data 0.000 (0.005) loss 1.1562 (1.0957) acc 65.6250 (72.8454) lr 1.6845e-03 eta 6:46:30 +epoch [15/50] batch [195/500] time 1.354 (1.369) data 0.000 (0.005) loss 1.5039 (1.0990) acc 56.2500 (72.7724) lr 1.6845e-03 eta 6:46:16 +epoch [15/50] batch [200/500] time 1.362 (1.369) data 0.001 (0.005) loss 0.6890 (1.1026) acc 75.0000 (72.6875) lr 1.6845e-03 eta 6:46:07 +epoch [15/50] batch [205/500] time 1.347 (1.369) data 0.001 (0.005) loss 0.5933 (1.0979) acc 87.5000 (72.7439) lr 1.6845e-03 eta 6:45:57 +epoch [15/50] batch [210/500] time 1.368 (1.369) data 0.001 (0.005) loss 1.3604 (1.0947) acc 71.8750 (72.8125) lr 1.6845e-03 eta 6:45:46 +epoch [15/50] batch [215/500] time 1.370 (1.368) data 0.000 (0.005) loss 1.0576 (1.0985) acc 81.2500 (72.7180) lr 1.6845e-03 eta 6:45:38 +epoch [15/50] batch [220/500] time 1.351 (1.369) data 0.000 (0.005) loss 1.3662 (1.0975) acc 68.7500 (72.7699) lr 1.6845e-03 eta 6:45:39 +epoch [15/50] batch [225/500] time 1.370 (1.369) data 0.000 (0.004) loss 0.9019 (1.0955) acc 68.7500 (72.7361) lr 1.6845e-03 eta 6:45:28 +epoch [15/50] batch [230/500] time 1.357 (1.368) data 0.000 (0.004) loss 1.3477 (1.0995) acc 59.3750 (72.6766) lr 1.6845e-03 eta 6:45:17 +epoch [15/50] batch [235/500] time 1.352 (1.368) data 0.000 (0.004) loss 1.1221 (1.0968) acc 71.8750 (72.7660) lr 1.6845e-03 eta 6:45:09 +epoch [15/50] batch [240/500] time 1.372 (1.368) data 0.000 (0.004) loss 0.7041 (1.0930) acc 78.1250 (72.8125) lr 1.6845e-03 eta 6:45:03 +epoch [15/50] batch [245/500] time 1.361 (1.368) data 0.000 (0.004) loss 0.9326 (1.0905) acc 81.2500 (72.8571) lr 1.6845e-03 eta 6:44:56 +epoch [15/50] batch [250/500] time 1.340 (1.368) data 0.000 (0.004) loss 1.3086 (1.0943) acc 78.1250 (72.7500) lr 1.6845e-03 eta 6:44:46 +epoch [15/50] batch [255/500] time 1.361 (1.368) data 0.000 (0.004) loss 1.3027 (1.0991) acc 68.7500 (72.7083) lr 1.6845e-03 eta 6:44:38 +epoch [15/50] batch [260/500] time 1.533 (1.369) data 0.000 (0.004) loss 0.8057 (1.0994) acc 75.0000 (72.6082) lr 1.6845e-03 eta 6:44:41 +epoch [15/50] batch [265/500] time 1.361 (1.369) data 0.000 (0.004) loss 1.3477 (1.1006) acc 75.0000 (72.5825) lr 1.6845e-03 eta 6:44:32 +epoch [15/50] batch [270/500] time 1.378 (1.368) data 0.000 (0.004) loss 1.6182 (1.1037) acc 59.3750 (72.5810) lr 1.6845e-03 eta 6:44:22 +epoch [15/50] batch [275/500] time 1.377 (1.368) data 0.000 (0.004) loss 0.9014 (1.1012) acc 68.7500 (72.5909) lr 1.6845e-03 eta 6:44:15 +epoch [15/50] batch [280/500] time 1.352 (1.368) data 0.000 (0.004) loss 1.3711 (1.1006) acc 68.7500 (72.6228) lr 1.6845e-03 eta 6:44:01 +epoch [15/50] batch [285/500] time 1.373 (1.368) data 0.001 (0.004) loss 1.4482 (1.0985) acc 68.7500 (72.6535) lr 1.6845e-03 eta 6:43:53 +epoch [15/50] batch [290/500] time 1.344 (1.368) data 0.000 (0.004) loss 0.9873 (1.0954) acc 68.7500 (72.6940) lr 1.6845e-03 eta 6:43:43 +epoch [15/50] batch [295/500] time 1.385 (1.368) data 0.000 (0.004) loss 1.2363 (1.0952) acc 62.5000 (72.6377) lr 1.6845e-03 eta 6:43:37 +epoch [15/50] batch [300/500] time 1.372 (1.368) data 0.000 (0.003) loss 0.9170 (1.0948) acc 75.0000 (72.6458) lr 1.6845e-03 eta 6:43:26 +epoch [15/50] batch [305/500] time 1.372 (1.368) data 0.000 (0.003) loss 1.2539 (1.0940) acc 68.7500 (72.6230) lr 1.6845e-03 eta 6:43:18 +epoch [15/50] batch [310/500] time 1.352 (1.367) data 0.000 (0.003) loss 1.6816 (1.0971) acc 59.3750 (72.5403) lr 1.6845e-03 eta 6:43:07 +epoch [15/50] batch [315/500] time 1.352 (1.367) data 0.000 (0.003) loss 1.1006 (1.0949) acc 78.1250 (72.5992) lr 1.6845e-03 eta 6:42:58 +epoch [15/50] batch [320/500] time 1.362 (1.367) data 0.000 (0.003) loss 1.3213 (1.0951) acc 78.1250 (72.5977) lr 1.6845e-03 eta 6:42:50 +epoch [15/50] batch [325/500] time 1.369 (1.367) data 0.000 (0.003) loss 1.1943 (1.0962) acc 75.0000 (72.5962) lr 1.6845e-03 eta 6:42:43 +epoch [15/50] batch [330/500] time 1.357 (1.367) data 0.000 (0.003) loss 1.3525 (1.0950) acc 62.5000 (72.5758) lr 1.6845e-03 eta 6:42:34 +epoch [15/50] batch [335/500] time 1.359 (1.367) data 0.000 (0.003) loss 1.5889 (1.0985) acc 59.3750 (72.4440) lr 1.6845e-03 eta 6:42:27 +epoch [15/50] batch [340/500] time 1.351 (1.367) data 0.000 (0.003) loss 1.0088 (1.0971) acc 71.8750 (72.4449) lr 1.6845e-03 eta 6:42:19 +epoch [15/50] batch [345/500] time 1.365 (1.367) data 0.000 (0.003) loss 1.0742 (1.0959) acc 75.0000 (72.5091) lr 1.6845e-03 eta 6:42:14 +epoch [15/50] batch [350/500] time 1.360 (1.367) data 0.000 (0.003) loss 0.7354 (1.0973) acc 71.8750 (72.3929) lr 1.6845e-03 eta 6:42:06 +epoch [15/50] batch [355/500] time 1.350 (1.367) data 0.000 (0.003) loss 1.0977 (1.0986) acc 75.0000 (72.4120) lr 1.6845e-03 eta 6:41:55 +epoch [15/50] batch [360/500] time 1.358 (1.367) data 0.000 (0.003) loss 1.1826 (1.0973) acc 65.6250 (72.4392) lr 1.6845e-03 eta 6:41:54 +epoch [15/50] batch [365/500] time 1.371 (1.367) data 0.001 (0.003) loss 1.3086 (1.1003) acc 71.8750 (72.3288) lr 1.6845e-03 eta 6:41:45 +epoch [15/50] batch [370/500] time 1.363 (1.367) data 0.000 (0.003) loss 1.1748 (1.0995) acc 75.0000 (72.4155) lr 1.6845e-03 eta 6:41:36 +epoch [15/50] batch [375/500] time 1.362 (1.367) data 0.000 (0.003) loss 1.0215 (1.1006) acc 78.1250 (72.4000) lr 1.6845e-03 eta 6:41:27 +epoch [15/50] batch [380/500] time 1.358 (1.367) data 0.000 (0.003) loss 0.5908 (1.0974) acc 84.3750 (72.4836) lr 1.6845e-03 eta 6:41:20 +epoch [15/50] batch [385/500] time 1.375 (1.367) data 0.000 (0.003) loss 1.5156 (1.1006) acc 65.6250 (72.3945) lr 1.6845e-03 eta 6:41:13 +epoch [15/50] batch [390/500] time 1.352 (1.367) data 0.000 (0.003) loss 0.7930 (1.0998) acc 71.8750 (72.3638) lr 1.6845e-03 eta 6:41:04 +epoch [15/50] batch [395/500] time 1.328 (1.366) data 0.000 (0.003) loss 1.4766 (1.1024) acc 65.6250 (72.3101) lr 1.6845e-03 eta 6:40:54 +epoch [15/50] batch [400/500] time 1.373 (1.366) data 0.000 (0.003) loss 0.9736 (1.1030) acc 71.8750 (72.2891) lr 1.6845e-03 eta 6:40:46 +epoch [15/50] batch [405/500] time 1.351 (1.367) data 0.001 (0.003) loss 0.8110 (1.1016) acc 81.2500 (72.3380) lr 1.6845e-03 eta 6:40:44 +epoch [15/50] batch [410/500] time 1.355 (1.367) data 0.001 (0.003) loss 1.1562 (1.1019) acc 62.5000 (72.3323) lr 1.6845e-03 eta 6:40:37 +epoch [15/50] batch [415/500] time 1.376 (1.366) data 0.000 (0.003) loss 0.8901 (1.1006) acc 78.1250 (72.4172) lr 1.6845e-03 eta 6:40:29 +epoch [15/50] batch [420/500] time 1.350 (1.366) data 0.000 (0.003) loss 1.4551 (1.1028) acc 68.7500 (72.3363) lr 1.6845e-03 eta 6:40:21 +epoch [15/50] batch [425/500] time 1.356 (1.366) data 0.000 (0.003) loss 2.3477 (1.1047) acc 46.8750 (72.2941) lr 1.6845e-03 eta 6:40:12 +epoch [15/50] batch [430/500] time 1.353 (1.366) data 0.000 (0.003) loss 1.6201 (1.1068) acc 62.5000 (72.2892) lr 1.6845e-03 eta 6:40:02 +epoch [15/50] batch [435/500] time 1.364 (1.366) data 0.000 (0.003) loss 0.8506 (1.1068) acc 71.8750 (72.2557) lr 1.6845e-03 eta 6:39:56 +epoch [15/50] batch [440/500] time 1.367 (1.366) data 0.000 (0.002) loss 0.8623 (1.1052) acc 78.1250 (72.2869) lr 1.6845e-03 eta 6:39:47 +epoch [15/50] batch [445/500] time 1.353 (1.366) data 0.000 (0.002) loss 1.0186 (1.1036) acc 81.2500 (72.3315) lr 1.6845e-03 eta 6:39:40 +epoch [15/50] batch [450/500] time 1.359 (1.366) data 0.000 (0.002) loss 2.0664 (1.1085) acc 65.6250 (72.3264) lr 1.6845e-03 eta 6:39:30 +epoch [15/50] batch [455/500] time 1.380 (1.366) data 0.000 (0.002) loss 0.6997 (1.1078) acc 81.2500 (72.3626) lr 1.6845e-03 eta 6:39:23 +epoch [15/50] batch [460/500] time 1.373 (1.366) data 0.000 (0.002) loss 0.8242 (1.1073) acc 71.8750 (72.3370) lr 1.6845e-03 eta 6:39:17 +epoch [15/50] batch [465/500] time 1.359 (1.366) data 0.000 (0.002) loss 1.4414 (1.1075) acc 62.5000 (72.3118) lr 1.6845e-03 eta 6:39:08 +epoch [15/50] batch [470/500] time 1.363 (1.366) data 0.000 (0.002) loss 0.9106 (1.1053) acc 75.0000 (72.3404) lr 1.6845e-03 eta 6:39:03 +epoch [15/50] batch [475/500] time 1.381 (1.366) data 0.000 (0.002) loss 0.7583 (1.1062) acc 84.3750 (72.3553) lr 1.6845e-03 eta 6:38:56 +epoch [15/50] batch [480/500] time 1.331 (1.366) data 0.000 (0.002) loss 1.6533 (1.1059) acc 68.7500 (72.3763) lr 1.6845e-03 eta 6:38:48 +epoch [15/50] batch [485/500] time 1.367 (1.366) data 0.001 (0.002) loss 0.4866 (1.1092) acc 84.3750 (72.3454) lr 1.6845e-03 eta 6:38:40 +epoch [15/50] batch [490/500] time 1.340 (1.365) data 0.000 (0.002) loss 1.0342 (1.1083) acc 78.1250 (72.3724) lr 1.6845e-03 eta 6:38:29 +epoch [15/50] batch [495/500] time 1.354 (1.365) data 0.000 (0.002) loss 1.0215 (1.1067) acc 81.2500 (72.4432) lr 1.6845e-03 eta 6:38:21 +epoch [15/50] batch [500/500] time 1.395 (1.365) data 0.000 (0.002) loss 1.0645 (1.1071) acc 65.6250 (72.4062) lr 1.6374e-03 eta 6:38:16 +epoch [16/50] batch [5/500] time 1.359 (1.560) data 0.000 (0.159) loss 1.1816 (1.3155) acc 68.7500 (68.7500) lr 1.6374e-03 eta 7:34:54 +epoch [16/50] batch [10/500] time 1.381 (1.466) data 0.000 (0.080) loss 1.1963 (1.2021) acc 68.7500 (69.6875) lr 1.6374e-03 eta 7:07:18 +epoch [16/50] batch [15/500] time 1.378 (1.430) data 0.000 (0.053) loss 1.3018 (1.1663) acc 71.8750 (70.4167) lr 1.6374e-03 eta 6:56:45 +epoch [16/50] batch [20/500] time 1.355 (1.412) data 0.001 (0.040) loss 1.2812 (1.1815) acc 62.5000 (71.0938) lr 1.6374e-03 eta 6:51:23 +epoch [16/50] batch [25/500] time 1.361 (1.403) data 0.000 (0.032) loss 1.6162 (1.1692) acc 65.6250 (70.8750) lr 1.6374e-03 eta 6:48:31 +epoch [16/50] batch [30/500] time 1.377 (1.397) data 0.000 (0.027) loss 0.6533 (1.1381) acc 75.0000 (71.0417) lr 1.6374e-03 eta 6:46:41 +epoch [16/50] batch [35/500] time 1.378 (1.392) data 0.000 (0.023) loss 1.0371 (1.1505) acc 68.7500 (71.4286) lr 1.6374e-03 eta 6:45:03 +epoch [16/50] batch [40/500] time 1.359 (1.388) data 0.000 (0.020) loss 0.5146 (1.1097) acc 81.2500 (72.0312) lr 1.6374e-03 eta 6:43:52 +epoch [16/50] batch [45/500] time 1.355 (1.386) data 0.000 (0.018) loss 0.9604 (1.0975) acc 75.0000 (71.9444) lr 1.6374e-03 eta 6:43:12 +epoch [16/50] batch [50/500] time 1.349 (1.383) data 0.000 (0.016) loss 0.7944 (1.1082) acc 68.7500 (71.8125) lr 1.6374e-03 eta 6:42:21 +epoch [16/50] batch [55/500] time 1.377 (1.382) data 0.000 (0.015) loss 1.7314 (1.1117) acc 59.3750 (71.9886) lr 1.6374e-03 eta 6:41:41 +epoch [16/50] batch [60/500] time 1.373 (1.381) data 0.000 (0.014) loss 1.2529 (1.1219) acc 68.7500 (71.7188) lr 1.6374e-03 eta 6:41:23 +epoch [16/50] batch [65/500] time 1.341 (1.381) data 0.000 (0.013) loss 1.0488 (1.1140) acc 68.7500 (71.7788) lr 1.6374e-03 eta 6:41:17 +epoch [16/50] batch [70/500] time 1.353 (1.379) data 0.001 (0.012) loss 0.6821 (1.1079) acc 84.3750 (71.9196) lr 1.6374e-03 eta 6:40:40 +epoch [16/50] batch [75/500] time 1.336 (1.378) data 0.000 (0.011) loss 0.7969 (1.0923) acc 84.3750 (72.3333) lr 1.6374e-03 eta 6:40:04 +epoch [16/50] batch [80/500] time 1.377 (1.377) data 0.000 (0.010) loss 1.2617 (1.0923) acc 75.0000 (72.2656) lr 1.6374e-03 eta 6:39:42 +epoch [16/50] batch [85/500] time 1.350 (1.376) data 0.000 (0.010) loss 1.1123 (1.0880) acc 78.1250 (72.5368) lr 1.6374e-03 eta 6:39:22 +epoch [16/50] batch [90/500] time 1.348 (1.376) data 0.000 (0.009) loss 1.5137 (1.0906) acc 62.5000 (72.3264) lr 1.6374e-03 eta 6:39:08 +epoch [16/50] batch [95/500] time 1.373 (1.375) data 0.000 (0.009) loss 0.6782 (1.0942) acc 78.1250 (72.3026) lr 1.6374e-03 eta 6:38:53 +epoch [16/50] batch [100/500] time 1.369 (1.375) data 0.000 (0.008) loss 1.3232 (1.0899) acc 62.5000 (72.2188) lr 1.6374e-03 eta 6:38:42 +epoch [16/50] batch [105/500] time 1.384 (1.375) data 0.000 (0.008) loss 0.9502 (1.0943) acc 78.1250 (72.1726) lr 1.6374e-03 eta 6:38:30 +epoch [16/50] batch [110/500] time 1.376 (1.376) data 0.001 (0.008) loss 0.9692 (1.0933) acc 75.0000 (72.3011) lr 1.6374e-03 eta 6:38:40 +epoch [16/50] batch [115/500] time 1.360 (1.375) data 0.000 (0.007) loss 1.0801 (1.0949) acc 68.7500 (72.2011) lr 1.6374e-03 eta 6:38:20 +epoch [16/50] batch [120/500] time 1.364 (1.374) data 0.000 (0.007) loss 0.8091 (1.0858) acc 75.0000 (72.3958) lr 1.6374e-03 eta 6:38:08 +epoch [16/50] batch [125/500] time 1.349 (1.374) data 0.000 (0.007) loss 1.1670 (1.0895) acc 71.8750 (72.3500) lr 1.6374e-03 eta 6:37:46 +epoch [16/50] batch [130/500] time 1.348 (1.373) data 0.001 (0.007) loss 1.2852 (1.0978) acc 56.2500 (72.2596) lr 1.6374e-03 eta 6:37:25 +epoch [16/50] batch [135/500] time 1.352 (1.372) data 0.000 (0.006) loss 0.6011 (1.0954) acc 81.2500 (72.1528) lr 1.6374e-03 eta 6:37:05 +epoch [16/50] batch [140/500] time 1.357 (1.372) data 0.000 (0.006) loss 0.8325 (1.0989) acc 75.0000 (72.0759) lr 1.6374e-03 eta 6:36:51 +epoch [16/50] batch [145/500] time 1.367 (1.371) data 0.000 (0.006) loss 1.3008 (1.1039) acc 75.0000 (72.1552) lr 1.6374e-03 eta 6:36:38 +epoch [16/50] batch [150/500] time 1.359 (1.371) data 0.001 (0.006) loss 1.3594 (1.1039) acc 65.6250 (72.0000) lr 1.6374e-03 eta 6:36:22 +epoch [16/50] batch [155/500] time 1.358 (1.370) data 0.000 (0.006) loss 1.7549 (1.1090) acc 59.3750 (71.8347) lr 1.6374e-03 eta 6:36:04 +epoch [16/50] batch [160/500] time 1.376 (1.370) data 0.000 (0.005) loss 1.3330 (1.1140) acc 62.5000 (71.7188) lr 1.6374e-03 eta 6:35:58 +epoch [16/50] batch [165/500] time 1.350 (1.370) data 0.000 (0.005) loss 1.4326 (1.1230) acc 65.6250 (71.7045) lr 1.6374e-03 eta 6:35:48 +epoch [16/50] batch [170/500] time 1.363 (1.370) data 0.000 (0.005) loss 1.3076 (1.1247) acc 59.3750 (71.6176) lr 1.6374e-03 eta 6:35:38 +epoch [16/50] batch [175/500] time 1.342 (1.369) data 0.000 (0.005) loss 1.3936 (1.1277) acc 62.5000 (71.4107) lr 1.6374e-03 eta 6:35:26 +epoch [16/50] batch [180/500] time 1.372 (1.369) data 0.000 (0.005) loss 0.8047 (1.1264) acc 68.7500 (71.3889) lr 1.6374e-03 eta 6:35:18 +epoch [16/50] batch [185/500] time 1.363 (1.369) data 0.000 (0.005) loss 0.9136 (1.1269) acc 71.8750 (71.3345) lr 1.6374e-03 eta 6:35:10 +epoch [16/50] batch [190/500] time 1.363 (1.369) data 0.000 (0.005) loss 0.7744 (1.1237) acc 68.7500 (71.2007) lr 1.6374e-03 eta 6:34:58 +epoch [16/50] batch [195/500] time 1.361 (1.369) data 0.000 (0.005) loss 1.1289 (1.1277) acc 68.7500 (71.0737) lr 1.6374e-03 eta 6:34:45 +epoch [16/50] batch [200/500] time 1.376 (1.368) data 0.000 (0.004) loss 1.4131 (1.1270) acc 68.7500 (71.0469) lr 1.6374e-03 eta 6:34:34 +epoch [16/50] batch [205/500] time 1.489 (1.369) data 0.000 (0.004) loss 1.1660 (1.1278) acc 68.7500 (71.1128) lr 1.6374e-03 eta 6:34:33 +epoch [16/50] batch [210/500] time 1.341 (1.368) data 0.000 (0.004) loss 1.4707 (1.1297) acc 59.3750 (71.0863) lr 1.6374e-03 eta 6:34:20 +epoch [16/50] batch [215/500] time 1.352 (1.368) data 0.000 (0.004) loss 0.7695 (1.1293) acc 81.2500 (71.1047) lr 1.6374e-03 eta 6:34:07 +epoch [16/50] batch [220/500] time 1.372 (1.368) data 0.000 (0.004) loss 1.1562 (1.1296) acc 65.6250 (71.0369) lr 1.6374e-03 eta 6:33:57 +epoch [16/50] batch [225/500] time 1.361 (1.368) data 0.000 (0.004) loss 0.8262 (1.1243) acc 75.0000 (71.1389) lr 1.6374e-03 eta 6:33:48 +epoch [16/50] batch [230/500] time 1.338 (1.368) data 0.000 (0.004) loss 0.8286 (1.1198) acc 84.3750 (71.2092) lr 1.6374e-03 eta 6:33:38 +epoch [16/50] batch [235/500] time 1.358 (1.367) data 0.000 (0.004) loss 1.2910 (1.1276) acc 68.7500 (71.0771) lr 1.6374e-03 eta 6:33:28 +epoch [16/50] batch [240/500] time 1.346 (1.367) data 0.000 (0.004) loss 1.5439 (1.1289) acc 62.5000 (71.1068) lr 1.6374e-03 eta 6:33:20 +epoch [16/50] batch [245/500] time 1.365 (1.367) data 0.000 (0.004) loss 1.3818 (1.1303) acc 68.7500 (71.1352) lr 1.6374e-03 eta 6:33:11 +epoch [16/50] batch [250/500] time 1.369 (1.368) data 0.000 (0.004) loss 0.7749 (1.1267) acc 78.1250 (71.2000) lr 1.6374e-03 eta 6:33:11 +epoch [16/50] batch [255/500] time 1.337 (1.367) data 0.000 (0.004) loss 0.9780 (1.1300) acc 75.0000 (71.1765) lr 1.6374e-03 eta 6:33:00 +epoch [16/50] batch [260/500] time 1.361 (1.367) data 0.001 (0.004) loss 0.9146 (1.1287) acc 65.6250 (71.1418) lr 1.6374e-03 eta 6:32:55 +epoch [16/50] batch [265/500] time 1.371 (1.367) data 0.000 (0.003) loss 1.2129 (1.1293) acc 65.6250 (71.1203) lr 1.6374e-03 eta 6:32:44 +epoch [16/50] batch [270/500] time 1.388 (1.367) data 0.000 (0.003) loss 0.6831 (1.1261) acc 81.2500 (71.2153) lr 1.6374e-03 eta 6:32:40 +epoch [16/50] batch [275/500] time 1.382 (1.367) data 0.000 (0.003) loss 0.6616 (1.1266) acc 84.3750 (71.2955) lr 1.6374e-03 eta 6:32:32 +epoch [16/50] batch [280/500] time 1.366 (1.367) data 0.000 (0.003) loss 1.3633 (1.1272) acc 68.7500 (71.2835) lr 1.6374e-03 eta 6:32:24 +epoch [16/50] batch [285/500] time 1.343 (1.367) data 0.000 (0.003) loss 1.1719 (1.1270) acc 78.1250 (71.3706) lr 1.6374e-03 eta 6:32:13 +epoch [16/50] batch [290/500] time 1.387 (1.367) data 0.000 (0.003) loss 1.0781 (1.1275) acc 78.1250 (71.3362) lr 1.6374e-03 eta 6:32:08 +epoch [16/50] batch [295/500] time 1.367 (1.367) data 0.000 (0.003) loss 1.4248 (1.1259) acc 65.6250 (71.3559) lr 1.6374e-03 eta 6:32:00 +epoch [16/50] batch [300/500] time 1.362 (1.367) data 0.000 (0.003) loss 1.3535 (1.1247) acc 71.8750 (71.4167) lr 1.6374e-03 eta 6:31:52 +epoch [16/50] batch [305/500] time 1.361 (1.367) data 0.001 (0.003) loss 0.8418 (1.1205) acc 71.8750 (71.5574) lr 1.6374e-03 eta 6:31:48 +epoch [16/50] batch [310/500] time 1.386 (1.367) data 0.000 (0.003) loss 1.3291 (1.1234) acc 75.0000 (71.5423) lr 1.6374e-03 eta 6:31:42 +epoch [16/50] batch [315/500] time 1.370 (1.367) data 0.000 (0.003) loss 1.0371 (1.1233) acc 78.1250 (71.5774) lr 1.6374e-03 eta 6:31:36 +epoch [16/50] batch [320/500] time 1.382 (1.367) data 0.000 (0.003) loss 1.8027 (1.1195) acc 59.3750 (71.6992) lr 1.6374e-03 eta 6:31:30 +epoch [16/50] batch [325/500] time 1.359 (1.367) data 0.000 (0.003) loss 0.4717 (1.1191) acc 87.5000 (71.7115) lr 1.6374e-03 eta 6:31:21 +epoch [16/50] batch [330/500] time 1.361 (1.367) data 0.000 (0.003) loss 0.9048 (1.1204) acc 68.7500 (71.6572) lr 1.6374e-03 eta 6:31:16 +epoch [16/50] batch [335/500] time 1.381 (1.367) data 0.000 (0.003) loss 0.7524 (1.1213) acc 84.3750 (71.6978) lr 1.6374e-03 eta 6:31:10 +epoch [16/50] batch [340/500] time 1.345 (1.367) data 0.000 (0.003) loss 1.6943 (1.1224) acc 62.5000 (71.6544) lr 1.6374e-03 eta 6:30:59 +epoch [16/50] batch [345/500] time 1.351 (1.367) data 0.000 (0.003) loss 1.3301 (1.1214) acc 75.0000 (71.6033) lr 1.6374e-03 eta 6:30:49 +epoch [16/50] batch [350/500] time 1.351 (1.367) data 0.000 (0.003) loss 1.5195 (1.1220) acc 68.7500 (71.6250) lr 1.6374e-03 eta 6:30:45 +epoch [16/50] batch [355/500] time 1.375 (1.367) data 0.001 (0.003) loss 1.9697 (1.1248) acc 50.0000 (71.5317) lr 1.6374e-03 eta 6:30:37 +epoch [16/50] batch [360/500] time 1.368 (1.367) data 0.000 (0.003) loss 1.1240 (1.1256) acc 71.8750 (71.5017) lr 1.6374e-03 eta 6:30:28 +epoch [16/50] batch [365/500] time 1.355 (1.367) data 0.000 (0.003) loss 0.9268 (1.1257) acc 78.1250 (71.5497) lr 1.6374e-03 eta 6:30:17 +epoch [16/50] batch [370/500] time 1.353 (1.366) data 0.000 (0.003) loss 0.8198 (1.1232) acc 71.8750 (71.5709) lr 1.6374e-03 eta 6:30:07 +epoch [16/50] batch [375/500] time 1.362 (1.366) data 0.000 (0.003) loss 1.0361 (1.1247) acc 78.1250 (71.5417) lr 1.6374e-03 eta 6:29:57 +epoch [16/50] batch [380/500] time 1.351 (1.366) data 0.000 (0.003) loss 0.7788 (1.1218) acc 84.3750 (71.6365) lr 1.6374e-03 eta 6:29:49 +epoch [16/50] batch [385/500] time 1.339 (1.366) data 0.000 (0.002) loss 1.0342 (1.1234) acc 81.2500 (71.6964) lr 1.6374e-03 eta 6:29:39 +epoch [16/50] batch [390/500] time 1.341 (1.366) data 0.000 (0.002) loss 0.7842 (1.1251) acc 78.1250 (71.7147) lr 1.6374e-03 eta 6:29:32 +epoch [16/50] batch [395/500] time 1.364 (1.366) data 0.000 (0.002) loss 1.0781 (1.1259) acc 62.5000 (71.6851) lr 1.6374e-03 eta 6:29:31 +epoch [16/50] batch [400/500] time 1.390 (1.367) data 0.000 (0.002) loss 0.9780 (1.1248) acc 68.7500 (71.7266) lr 1.6374e-03 eta 6:29:27 +epoch [16/50] batch [405/500] time 1.378 (1.367) data 0.000 (0.002) loss 1.5859 (1.1255) acc 68.7500 (71.7515) lr 1.6374e-03 eta 6:29:20 +epoch [16/50] batch [410/500] time 1.366 (1.366) data 0.000 (0.002) loss 1.3955 (1.1264) acc 62.5000 (71.7149) lr 1.6374e-03 eta 6:29:12 +epoch [16/50] batch [415/500] time 1.352 (1.366) data 0.000 (0.002) loss 0.8462 (1.1255) acc 68.7500 (71.7395) lr 1.6374e-03 eta 6:29:03 +epoch [16/50] batch [420/500] time 1.353 (1.366) data 0.000 (0.002) loss 1.2627 (1.1263) acc 68.7500 (71.7411) lr 1.6374e-03 eta 6:28:54 +epoch [16/50] batch [425/500] time 1.346 (1.366) data 0.000 (0.002) loss 1.0098 (1.1235) acc 68.7500 (71.8162) lr 1.6374e-03 eta 6:28:43 +epoch [16/50] batch [430/500] time 1.349 (1.366) data 0.000 (0.002) loss 0.9414 (1.1214) acc 71.8750 (71.8677) lr 1.6374e-03 eta 6:28:35 +epoch [16/50] batch [435/500] time 1.357 (1.366) data 0.000 (0.002) loss 0.6860 (1.1211) acc 71.8750 (71.8678) lr 1.6374e-03 eta 6:28:27 +epoch [16/50] batch [440/500] time 1.354 (1.366) data 0.000 (0.002) loss 1.3496 (1.1221) acc 65.6250 (71.8111) lr 1.6374e-03 eta 6:28:19 +epoch [16/50] batch [445/500] time 1.388 (1.366) data 0.000 (0.002) loss 0.7612 (1.1224) acc 84.3750 (71.8258) lr 1.6374e-03 eta 6:28:14 +epoch [16/50] batch [450/500] time 1.346 (1.366) data 0.000 (0.002) loss 1.2275 (1.1209) acc 71.8750 (71.8681) lr 1.6374e-03 eta 6:28:06 +epoch [16/50] batch [455/500] time 1.377 (1.366) data 0.000 (0.002) loss 1.1650 (1.1211) acc 71.8750 (71.9025) lr 1.6374e-03 eta 6:28:00 +epoch [16/50] batch [460/500] time 1.373 (1.366) data 0.000 (0.002) loss 1.7598 (1.1241) acc 62.5000 (71.8750) lr 1.6374e-03 eta 6:27:53 +epoch [16/50] batch [465/500] time 1.363 (1.366) data 0.000 (0.002) loss 0.7666 (1.1231) acc 78.1250 (71.8952) lr 1.6374e-03 eta 6:27:45 +epoch [16/50] batch [470/500] time 1.355 (1.366) data 0.000 (0.002) loss 1.3682 (1.1256) acc 62.5000 (71.8085) lr 1.6374e-03 eta 6:27:38 +epoch [16/50] batch [475/500] time 1.357 (1.366) data 0.001 (0.002) loss 1.8604 (1.1262) acc 53.1250 (71.8026) lr 1.6374e-03 eta 6:27:30 +epoch [16/50] batch [480/500] time 1.362 (1.366) data 0.000 (0.002) loss 0.9976 (1.1272) acc 71.8750 (71.7839) lr 1.6374e-03 eta 6:27:24 +epoch [16/50] batch [485/500] time 1.380 (1.366) data 0.001 (0.002) loss 1.2051 (1.1270) acc 71.8750 (71.7848) lr 1.6374e-03 eta 6:27:16 +epoch [16/50] batch [490/500] time 1.377 (1.366) data 0.000 (0.002) loss 1.0947 (1.1251) acc 65.6250 (71.8048) lr 1.6374e-03 eta 6:27:10 +epoch [16/50] batch [495/500] time 1.357 (1.366) data 0.000 (0.002) loss 1.1426 (1.1236) acc 75.0000 (71.8182) lr 1.6374e-03 eta 6:27:07 +epoch [16/50] batch [500/500] time 1.348 (1.366) data 0.000 (0.002) loss 1.5625 (1.1231) acc 65.6250 (71.8250) lr 1.5878e-03 eta 6:27:00 +epoch [17/50] batch [5/500] time 1.347 (1.538) data 0.001 (0.175) loss 0.8286 (1.0229) acc 78.1250 (71.8750) lr 1.5878e-03 eta 7:15:44 +epoch [17/50] batch [10/500] time 1.357 (1.449) data 0.000 (0.087) loss 0.9331 (0.9630) acc 75.0000 (74.0625) lr 1.5878e-03 eta 6:50:18 +epoch [17/50] batch [15/500] time 1.363 (1.418) data 0.000 (0.058) loss 0.5518 (0.9810) acc 87.5000 (73.9583) lr 1.5878e-03 eta 6:41:16 +epoch [17/50] batch [20/500] time 1.362 (1.404) data 0.000 (0.044) loss 1.1846 (1.0191) acc 78.1250 (74.0625) lr 1.5878e-03 eta 6:37:18 +epoch [17/50] batch [25/500] time 1.348 (1.393) data 0.000 (0.035) loss 0.7432 (1.0270) acc 78.1250 (73.1250) lr 1.5878e-03 eta 6:34:04 +epoch [17/50] batch [30/500] time 1.361 (1.386) data 0.000 (0.029) loss 1.6797 (1.0701) acc 65.6250 (72.5000) lr 1.5878e-03 eta 6:32:03 +epoch [17/50] batch [35/500] time 1.373 (1.382) data 0.001 (0.025) loss 1.3750 (1.0926) acc 65.6250 (71.6071) lr 1.5878e-03 eta 6:30:49 +epoch [17/50] batch [40/500] time 1.354 (1.379) data 0.001 (0.022) loss 1.5254 (1.1016) acc 75.0000 (72.0312) lr 1.5878e-03 eta 6:29:51 +epoch [17/50] batch [45/500] time 1.356 (1.377) data 0.000 (0.020) loss 0.6934 (1.0945) acc 75.0000 (71.8750) lr 1.5878e-03 eta 6:29:07 +epoch [17/50] batch [50/500] time 1.351 (1.379) data 0.000 (0.018) loss 1.0479 (1.1058) acc 78.1250 (72.1875) lr 1.5878e-03 eta 6:29:37 +epoch [17/50] batch [55/500] time 1.378 (1.377) data 0.000 (0.016) loss 1.1318 (1.1044) acc 65.6250 (72.1023) lr 1.5878e-03 eta 6:28:53 +epoch [17/50] batch [60/500] time 1.342 (1.376) data 0.000 (0.015) loss 0.5415 (1.0999) acc 87.5000 (72.3438) lr 1.5878e-03 eta 6:28:26 +epoch [17/50] batch [65/500] time 1.372 (1.375) data 0.000 (0.014) loss 1.0244 (1.0936) acc 68.7500 (72.4519) lr 1.5878e-03 eta 6:27:57 +epoch [17/50] batch [70/500] time 1.358 (1.374) data 0.000 (0.013) loss 1.2373 (1.0962) acc 65.6250 (72.2321) lr 1.5878e-03 eta 6:27:48 +epoch [17/50] batch [75/500] time 1.391 (1.375) data 0.000 (0.012) loss 0.9360 (1.0902) acc 65.6250 (72.0833) lr 1.5878e-03 eta 6:27:43 +epoch [17/50] batch [80/500] time 1.352 (1.374) data 0.000 (0.011) loss 1.3193 (1.0995) acc 71.8750 (71.8359) lr 1.5878e-03 eta 6:27:35 +epoch [17/50] batch [85/500] time 1.382 (1.374) data 0.001 (0.011) loss 1.3193 (1.1018) acc 75.0000 (72.0221) lr 1.5878e-03 eta 6:27:16 +epoch [17/50] batch [90/500] time 1.378 (1.374) data 0.001 (0.010) loss 1.0469 (1.0889) acc 78.1250 (72.2222) lr 1.5878e-03 eta 6:27:12 +epoch [17/50] batch [95/500] time 1.369 (1.375) data 0.000 (0.010) loss 1.2256 (1.0911) acc 71.8750 (72.2039) lr 1.5878e-03 eta 6:27:22 +epoch [17/50] batch [100/500] time 1.384 (1.375) data 0.000 (0.009) loss 1.2275 (1.0891) acc 75.0000 (72.1875) lr 1.5878e-03 eta 6:27:15 +epoch [17/50] batch [105/500] time 1.367 (1.375) data 0.000 (0.009) loss 1.0156 (1.0858) acc 81.2500 (72.2321) lr 1.5878e-03 eta 6:27:05 +epoch [17/50] batch [110/500] time 1.370 (1.374) data 0.000 (0.008) loss 1.3740 (1.0916) acc 59.3750 (72.1875) lr 1.5878e-03 eta 6:26:43 +epoch [17/50] batch [115/500] time 1.364 (1.374) data 0.001 (0.008) loss 1.2432 (1.0984) acc 65.6250 (72.1467) lr 1.5878e-03 eta 6:26:33 +epoch [17/50] batch [120/500] time 1.378 (1.373) data 0.001 (0.008) loss 1.0518 (1.1004) acc 71.8750 (72.1094) lr 1.5878e-03 eta 6:26:16 +epoch [17/50] batch [125/500] time 1.379 (1.373) data 0.000 (0.007) loss 1.7881 (1.1120) acc 68.7500 (71.9000) lr 1.5878e-03 eta 6:26:04 +epoch [17/50] batch [130/500] time 1.363 (1.373) data 0.001 (0.007) loss 1.2285 (1.1084) acc 68.7500 (71.8510) lr 1.5878e-03 eta 6:25:55 +epoch [17/50] batch [135/500] time 1.352 (1.372) data 0.000 (0.007) loss 1.0752 (1.1049) acc 75.0000 (71.7593) lr 1.5878e-03 eta 6:25:39 +epoch [17/50] batch [140/500] time 1.380 (1.372) data 0.000 (0.007) loss 1.0723 (1.1065) acc 75.0000 (71.9420) lr 1.5878e-03 eta 6:25:28 +epoch [17/50] batch [145/500] time 1.373 (1.372) data 0.001 (0.006) loss 0.7935 (1.1001) acc 71.8750 (72.0905) lr 1.5878e-03 eta 6:25:19 +epoch [17/50] batch [150/500] time 1.354 (1.372) data 0.001 (0.006) loss 0.9170 (1.0984) acc 84.3750 (72.2500) lr 1.5878e-03 eta 6:25:12 +epoch [17/50] batch [155/500] time 1.370 (1.371) data 0.000 (0.006) loss 1.5674 (1.0948) acc 68.7500 (72.3589) lr 1.5878e-03 eta 6:25:02 +epoch [17/50] batch [160/500] time 1.369 (1.371) data 0.000 (0.006) loss 0.7935 (1.0943) acc 75.0000 (72.4023) lr 1.5878e-03 eta 6:24:49 +epoch [17/50] batch [165/500] time 1.369 (1.371) data 0.000 (0.006) loss 1.0342 (1.0987) acc 65.6250 (72.2727) lr 1.5878e-03 eta 6:24:36 +epoch [17/50] batch [170/500] time 1.366 (1.370) data 0.001 (0.006) loss 1.2266 (1.0927) acc 65.6250 (72.3713) lr 1.5878e-03 eta 6:24:23 +epoch [17/50] batch [175/500] time 1.366 (1.370) data 0.000 (0.005) loss 0.7690 (1.0931) acc 78.1250 (72.2857) lr 1.5878e-03 eta 6:24:14 +epoch [17/50] batch [180/500] time 1.368 (1.371) data 0.000 (0.005) loss 0.7910 (1.0869) acc 75.0000 (72.3264) lr 1.5878e-03 eta 6:24:11 +epoch [17/50] batch [185/500] time 1.362 (1.370) data 0.001 (0.005) loss 1.1572 (1.0845) acc 65.6250 (72.2973) lr 1.5878e-03 eta 6:24:03 +epoch [17/50] batch [190/500] time 1.519 (1.371) data 0.001 (0.005) loss 1.1660 (1.0822) acc 62.5000 (72.2697) lr 1.5878e-03 eta 6:24:05 +epoch [17/50] batch [195/500] time 1.344 (1.370) data 0.001 (0.005) loss 1.0400 (1.0847) acc 75.0000 (72.2276) lr 1.5878e-03 eta 6:23:50 +epoch [17/50] batch [200/500] time 1.364 (1.370) data 0.001 (0.005) loss 1.1260 (1.0862) acc 62.5000 (72.2188) lr 1.5878e-03 eta 6:23:43 +epoch [17/50] batch [205/500] time 1.362 (1.370) data 0.001 (0.005) loss 1.4717 (1.0903) acc 71.8750 (72.1951) lr 1.5878e-03 eta 6:23:36 +epoch [17/50] batch [210/500] time 1.353 (1.370) data 0.000 (0.005) loss 1.4531 (1.0977) acc 75.0000 (72.0387) lr 1.5878e-03 eta 6:23:27 +epoch [17/50] batch [215/500] time 1.374 (1.370) data 0.001 (0.005) loss 0.8711 (1.0939) acc 78.1250 (72.0785) lr 1.5878e-03 eta 6:23:19 +epoch [17/50] batch [220/500] time 1.363 (1.370) data 0.000 (0.004) loss 1.1328 (1.0929) acc 75.0000 (72.0597) lr 1.5878e-03 eta 6:23:09 +epoch [17/50] batch [225/500] time 1.360 (1.370) data 0.000 (0.004) loss 1.2070 (1.0928) acc 68.7500 (72.1250) lr 1.5878e-03 eta 6:23:01 +epoch [17/50] batch [230/500] time 1.356 (1.370) data 0.000 (0.004) loss 1.5508 (1.0992) acc 59.3750 (72.0109) lr 1.5878e-03 eta 6:22:47 +epoch [17/50] batch [235/500] time 1.353 (1.370) data 0.001 (0.004) loss 0.5430 (1.0957) acc 81.2500 (72.1144) lr 1.5878e-03 eta 6:22:44 +epoch [17/50] batch [240/500] time 1.370 (1.370) data 0.000 (0.004) loss 1.0811 (1.0955) acc 71.8750 (72.1615) lr 1.5878e-03 eta 6:22:39 +epoch [17/50] batch [245/500] time 1.372 (1.370) data 0.000 (0.004) loss 1.2471 (1.0947) acc 65.6250 (72.1684) lr 1.5878e-03 eta 6:22:33 +epoch [17/50] batch [250/500] time 1.350 (1.370) data 0.000 (0.004) loss 0.8711 (1.0921) acc 81.2500 (72.1875) lr 1.5878e-03 eta 6:22:22 +epoch [17/50] batch [255/500] time 1.354 (1.369) data 0.000 (0.004) loss 0.9106 (1.0884) acc 84.3750 (72.2794) lr 1.5878e-03 eta 6:22:10 +epoch [17/50] batch [260/500] time 1.381 (1.369) data 0.000 (0.004) loss 0.6816 (1.0880) acc 75.0000 (72.2957) lr 1.5878e-03 eta 6:22:01 +epoch [17/50] batch [265/500] time 1.364 (1.369) data 0.000 (0.004) loss 1.0635 (1.0843) acc 75.0000 (72.3821) lr 1.5878e-03 eta 6:21:50 +epoch [17/50] batch [270/500] time 1.368 (1.369) data 0.000 (0.004) loss 1.6201 (1.0865) acc 62.5000 (72.3727) lr 1.5878e-03 eta 6:21:41 +epoch [17/50] batch [275/500] time 1.380 (1.369) data 0.000 (0.004) loss 0.8726 (1.0808) acc 71.8750 (72.5000) lr 1.5878e-03 eta 6:21:35 +epoch [17/50] batch [280/500] time 1.369 (1.369) data 0.000 (0.004) loss 0.8901 (1.0798) acc 84.3750 (72.5781) lr 1.5878e-03 eta 6:21:29 +epoch [17/50] batch [285/500] time 1.359 (1.369) data 0.000 (0.004) loss 0.8813 (1.0813) acc 81.2500 (72.5439) lr 1.5878e-03 eta 6:21:18 +epoch [17/50] batch [290/500] time 1.365 (1.369) data 0.000 (0.003) loss 0.7954 (1.0819) acc 87.5000 (72.5323) lr 1.5878e-03 eta 6:21:09 +epoch [17/50] batch [295/500] time 1.344 (1.368) data 0.000 (0.003) loss 1.2217 (1.0822) acc 62.5000 (72.5106) lr 1.5878e-03 eta 6:20:59 +epoch [17/50] batch [300/500] time 1.353 (1.368) data 0.000 (0.003) loss 1.0010 (1.0828) acc 78.1250 (72.5208) lr 1.5878e-03 eta 6:20:49 +epoch [17/50] batch [305/500] time 1.366 (1.368) data 0.001 (0.003) loss 0.8823 (1.0819) acc 78.1250 (72.5922) lr 1.5878e-03 eta 6:20:42 +epoch [17/50] batch [310/500] time 1.371 (1.368) data 0.000 (0.003) loss 1.1475 (1.0805) acc 81.2500 (72.6613) lr 1.5878e-03 eta 6:20:37 +epoch [17/50] batch [315/500] time 1.361 (1.368) data 0.000 (0.003) loss 0.5098 (1.0770) acc 90.6250 (72.7778) lr 1.5878e-03 eta 6:20:30 +epoch [17/50] batch [320/500] time 1.371 (1.368) data 0.000 (0.003) loss 0.5874 (1.0791) acc 78.1250 (72.6758) lr 1.5878e-03 eta 6:20:22 +epoch [17/50] batch [325/500] time 1.366 (1.368) data 0.001 (0.003) loss 0.8667 (1.0792) acc 84.3750 (72.7308) lr 1.5878e-03 eta 6:20:14 +epoch [17/50] batch [330/500] time 1.369 (1.368) data 0.001 (0.003) loss 0.9224 (1.0785) acc 65.6250 (72.6705) lr 1.5878e-03 eta 6:20:05 +epoch [17/50] batch [335/500] time 1.351 (1.368) data 0.000 (0.003) loss 1.4775 (1.0789) acc 65.6250 (72.6772) lr 1.5878e-03 eta 6:20:04 +epoch [17/50] batch [340/500] time 1.367 (1.368) data 0.000 (0.003) loss 0.8564 (1.0793) acc 75.0000 (72.6195) lr 1.5878e-03 eta 6:19:55 +epoch [17/50] batch [345/500] time 1.366 (1.368) data 0.000 (0.003) loss 0.9966 (1.0772) acc 71.8750 (72.6721) lr 1.5878e-03 eta 6:19:47 +epoch [17/50] batch [350/500] time 1.362 (1.368) data 0.000 (0.003) loss 0.9062 (1.0800) acc 78.1250 (72.5893) lr 1.5878e-03 eta 6:19:39 +epoch [17/50] batch [355/500] time 1.363 (1.368) data 0.000 (0.003) loss 1.5879 (1.0824) acc 62.5000 (72.5792) lr 1.5878e-03 eta 6:19:30 +epoch [17/50] batch [360/500] time 1.388 (1.368) data 0.000 (0.003) loss 1.2656 (1.0849) acc 75.0000 (72.5694) lr 1.5878e-03 eta 6:19:24 +epoch [17/50] batch [365/500] time 1.350 (1.368) data 0.000 (0.003) loss 0.8149 (1.0847) acc 87.5000 (72.5685) lr 1.5878e-03 eta 6:19:14 +epoch [17/50] batch [370/500] time 1.339 (1.368) data 0.000 (0.003) loss 0.7573 (1.0846) acc 78.1250 (72.5929) lr 1.5878e-03 eta 6:19:03 +epoch [17/50] batch [375/500] time 1.363 (1.368) data 0.000 (0.003) loss 0.6636 (1.0817) acc 78.1250 (72.6333) lr 1.5878e-03 eta 6:18:55 +epoch [17/50] batch [380/500] time 1.352 (1.368) data 0.000 (0.003) loss 0.8472 (1.0798) acc 75.0000 (72.6316) lr 1.5878e-03 eta 6:18:52 +epoch [17/50] batch [385/500] time 1.365 (1.368) data 0.001 (0.003) loss 1.2412 (1.0798) acc 59.3750 (72.5649) lr 1.5878e-03 eta 6:18:45 +epoch [17/50] batch [390/500] time 1.372 (1.368) data 0.000 (0.003) loss 1.7988 (1.0868) acc 53.1250 (72.4439) lr 1.5878e-03 eta 6:18:37 +epoch [17/50] batch [395/500] time 1.384 (1.368) data 0.001 (0.003) loss 0.6279 (1.0855) acc 75.0000 (72.4842) lr 1.5878e-03 eta 6:18:32 +epoch [17/50] batch [400/500] time 1.366 (1.368) data 0.001 (0.003) loss 1.1670 (1.0870) acc 71.8750 (72.4609) lr 1.5878e-03 eta 6:18:24 +epoch [17/50] batch [405/500] time 1.352 (1.368) data 0.001 (0.003) loss 1.5029 (1.0880) acc 62.5000 (72.4228) lr 1.5878e-03 eta 6:18:16 +epoch [17/50] batch [410/500] time 1.360 (1.368) data 0.000 (0.003) loss 0.6812 (1.0881) acc 81.2500 (72.4543) lr 1.5878e-03 eta 6:18:08 +epoch [17/50] batch [415/500] time 1.358 (1.368) data 0.000 (0.003) loss 0.9517 (1.0894) acc 68.7500 (72.4473) lr 1.5878e-03 eta 6:18:01 +epoch [17/50] batch [420/500] time 1.361 (1.368) data 0.000 (0.003) loss 0.6875 (1.0900) acc 78.1250 (72.4628) lr 1.5878e-03 eta 6:17:56 +epoch [17/50] batch [425/500] time 1.361 (1.368) data 0.000 (0.003) loss 1.5469 (1.0920) acc 62.5000 (72.4265) lr 1.5878e-03 eta 6:17:48 +epoch [17/50] batch [430/500] time 1.349 (1.368) data 0.000 (0.003) loss 1.8291 (1.0934) acc 59.3750 (72.4346) lr 1.5878e-03 eta 6:17:39 +epoch [17/50] batch [435/500] time 1.374 (1.367) data 0.000 (0.002) loss 1.1230 (1.0927) acc 81.2500 (72.4928) lr 1.5878e-03 eta 6:17:32 +epoch [17/50] batch [440/500] time 1.367 (1.367) data 0.000 (0.002) loss 1.0283 (1.0899) acc 81.2500 (72.5639) lr 1.5878e-03 eta 6:17:23 +epoch [17/50] batch [445/500] time 1.351 (1.367) data 0.000 (0.002) loss 1.0879 (1.0890) acc 78.1250 (72.5913) lr 1.5878e-03 eta 6:17:15 +epoch [17/50] batch [450/500] time 1.357 (1.367) data 0.000 (0.002) loss 0.8413 (1.0914) acc 68.7500 (72.5417) lr 1.5878e-03 eta 6:17:05 +epoch [17/50] batch [455/500] time 1.345 (1.367) data 0.000 (0.002) loss 1.0967 (1.0940) acc 78.1250 (72.4657) lr 1.5878e-03 eta 6:16:56 +epoch [17/50] batch [460/500] time 1.353 (1.367) data 0.000 (0.002) loss 1.0225 (1.0932) acc 75.0000 (72.4864) lr 1.5878e-03 eta 6:16:48 +epoch [17/50] batch [465/500] time 1.356 (1.367) data 0.000 (0.002) loss 1.2480 (1.0934) acc 68.7500 (72.5134) lr 1.5878e-03 eta 6:16:40 +epoch [17/50] batch [470/500] time 1.361 (1.367) data 0.000 (0.002) loss 1.6299 (1.0918) acc 62.5000 (72.5532) lr 1.5878e-03 eta 6:16:31 +epoch [17/50] batch [475/500] time 1.370 (1.367) data 0.000 (0.002) loss 1.4355 (1.0935) acc 68.7500 (72.5066) lr 1.5878e-03 eta 6:16:23 +epoch [17/50] batch [480/500] time 1.363 (1.367) data 0.000 (0.002) loss 0.7661 (1.0938) acc 81.2500 (72.5326) lr 1.5878e-03 eta 6:16:19 +epoch [17/50] batch [485/500] time 1.353 (1.367) data 0.001 (0.002) loss 0.5933 (1.0942) acc 84.3750 (72.5451) lr 1.5878e-03 eta 6:16:11 +epoch [17/50] batch [490/500] time 1.377 (1.367) data 0.000 (0.002) loss 1.2627 (1.0941) acc 56.2500 (72.5383) lr 1.5878e-03 eta 6:16:04 +epoch [17/50] batch [495/500] time 1.345 (1.367) data 0.000 (0.002) loss 1.0967 (1.0953) acc 68.7500 (72.5000) lr 1.5878e-03 eta 6:15:56 +epoch [17/50] batch [500/500] time 1.363 (1.367) data 0.000 (0.002) loss 1.4971 (1.0976) acc 62.5000 (72.4313) lr 1.5358e-03 eta 6:15:50 diff --git a/python/ClipDetection/CoOp/saved_outputs/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1698466111.ckb-gpu-lambda.856979.0 b/python/ClipDetection/CoOp/saved_outputs/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1698466111.ckb-gpu-lambda.856979.0 new file mode 100644 index 00000000..a8a58611 Binary files /dev/null and b/python/ClipDetection/CoOp/saved_outputs/vit_l14_ep50_16shots/nctx16_cscFalse_ctpend/seed2/tensorboard/events.out.tfevents.1698466111.ckb-gpu-lambda.856979.0 differ diff --git a/python/ClipDetection/CoOp/train.py b/python/ClipDetection/CoOp/train.py new file mode 100644 index 00000000..cc939fb5 --- /dev/null +++ b/python/ClipDetection/CoOp/train.py @@ -0,0 +1,238 @@ +################################################################ +# CHANGES MADE TO FILE # +# ------------------------------------------------------------ # +# Created get_trainer() function. # +# - Functions like main() but doesn't initialize logger and # +# returns trainer object from build_trainer(cfg) call # +# # +# - trainer.load_model() and trainer.train() are called in # +# clip_component.py # +# # +# - extend_cfg() has added cuda parameter to set model # +# precision to fp32 or fp16 # +################################################################ + + +import argparse +import torch + +from dassl.utils import setup_logger, set_random_seed, collect_env_info +from dassl.config import get_cfg_default +from dassl.engine import build_trainer + +# custom +# import CoOp.datasets.oxford_pets +# import CoOp.datasets.oxford_flowers +# import CoOp.datasets.fgvc_aircraft +# import CoOp.datasets.dtd +# import CoOp.datasets.eurosat +# import CoOp.datasets.stanford_cars +# import CoOp.datasets.food101 +# import CoOp.datasets.sun397 +# import CoOp.datasets.caltech101 +# import CoOp.datasets.ucf101 +# import CoOp.datasets.cococrops +# import CoOp.datasets.imagenet + +# import CoOp.datasets.imagenet_sketch +# import CoOp.datasets.imagenetv2 +# import CoOp.datasets.imagenet_a +# import CoOp.datasets.imagenet_r + +import CoOp.trainers.coop +# import CoOp.trainers.cocoop +# import CoOp.trainers.zsclip + +import os + +# os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' + + +def print_args(args, cfg): + print("***************") + print("** Arguments **") + print("***************") + optkeys = list(args.__dict__.keys()) + optkeys.sort() + for key in optkeys: + print("{}: {}".format(key, args.__dict__[key])) + print("************") + print("** Config **") + print("************") + print(cfg) + + +def reset_cfg(cfg, args): + if args.root: + cfg.DATASET.ROOT = args.root + + if args.output_dir: + cfg.OUTPUT_DIR = args.output_dir + + if args.resume: + cfg.RESUME = args.resume + + if args.seed: + cfg.SEED = args.seed + + if args.source_domains: + cfg.DATASET.SOURCE_DOMAINS = args.source_domains + + if args.target_domains: + cfg.DATASET.TARGET_DOMAINS = args.target_domains + + if args.transforms: + cfg.INPUT.TRANSFORMS = args.transforms + + if args.trainer: + cfg.TRAINER.NAME = args.trainer + + if args.backbone: + cfg.MODEL.BACKBONE.NAME = args.backbone + + if args.head: + cfg.MODEL.HEAD.NAME = args.head + + +def extend_cfg(cfg, cuda=True): + """ + Add new config variables. + + E.g. + from yacs.config import CfgNode as CN + cfg.TRAINER.MY_MODEL = CN() + cfg.TRAINER.MY_MODEL.PARAM_A = 1. + cfg.TRAINER.MY_MODEL.PARAM_B = 0.5 + cfg.TRAINER.MY_MODEL.PARAM_C = False + """ + from yacs.config import CfgNode as CN + + cfg.TRAINER.COOP = CN() + cfg.TRAINER.COOP.N_CTX = 16 # number of context vectors + cfg.TRAINER.COOP.CSC = False # class-specific context + cfg.TRAINER.COOP.CTX_INIT = "" # initialization words + if cuda: + cfg.TRAINER.COOP.PREC = "fp16" # fp16, fp32, amp + else: + cfg.TRAINER.COOP.PREC = "fp32" + cfg.TRAINER.COOP.CLASS_TOKEN_POSITION = "end" # 'middle' or 'end' or 'front' + + cfg.TRAINER.COCOOP = CN() + cfg.TRAINER.COCOOP.N_CTX = 16 # number of context vectors + cfg.TRAINER.COCOOP.CTX_INIT = "" # initialization words + cfg.TRAINER.COCOOP.PREC = "fp16" # fp16, fp32, amp + + cfg.DATASET.SUBSAMPLE_CLASSES = "all" # all, base or new + + +def setup_cfg(args): + cfg = get_cfg_default() + extend_cfg(cfg, args.cuda) + + # 1. From the dataset config file + if args.dataset_config_file: + cfg.merge_from_file(args.dataset_config_file) + + # 2. From the method config file + if args.config_file: + cfg.merge_from_file(args.config_file) + + # 3. From input arguments + reset_cfg(cfg, args) + + # 4. From optional input arguments + cfg.merge_from_list(args.opts) + + cfg.freeze() + + return cfg + +def get_trainer(args, classnames=[], device_id=-1): + cfg = setup_cfg(args) + if cfg.SEED >= 0: + set_random_seed(cfg.SEED) + + if torch.cuda.is_available() and cfg.USE_CUDA: + torch.backends.cudnn.benchmark = True + + return build_trainer(cfg, classnames, device_id) + +def main(args, image=None): + cfg = setup_cfg(args) + if cfg.SEED >= 0: + print("Setting fixed seed: {}".format(cfg.SEED)) + set_random_seed(cfg.SEED) + # setup_logger(cfg.OUTPUT_DIR) + + if torch.cuda.is_available() and cfg.USE_CUDA: + torch.backends.cudnn.benchmark = True + + print_args(args, cfg) + print("Collecting env info ...") + print("** System info **\n{}\n".format(collect_env_info())) + + trainer = build_trainer(cfg) + + if args.eval_only: + trainer.load_model(args.model_dir, epoch=args.load_epoch) + return trainer.test(image=image) + + if not args.no_train: + trainer.train() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--root", type=str, default="", help="path to dataset") + parser.add_argument("--output-dir", type=str, default="", help="output directory") + parser.add_argument( + "--resume", + type=str, + default="", + help="checkpoint directory (from which the training resumes)", + ) + parser.add_argument( + "--seed", type=int, default=-1, help="only positive value enables a fixed seed" + ) + parser.add_argument( + "--source-domains", type=str, nargs="+", help="source domains for DA/DG" + ) + parser.add_argument( + "--target-domains", type=str, nargs="+", help="target domains for DA/DG" + ) + parser.add_argument( + "--transforms", type=str, nargs="+", help="data augmentation methods" + ) + parser.add_argument( + "--config-file", type=str, default="", help="path to config file" + ) + parser.add_argument( + "--dataset-config-file", + type=str, + default="", + help="path to config file for dataset setup", + ) + parser.add_argument("--trainer", type=str, default="", help="name of trainer") + parser.add_argument("--backbone", type=str, default="", help="name of CNN backbone") + parser.add_argument("--head", type=str, default="", help="name of head") + parser.add_argument("--eval-only", action="store_true", help="evaluation only") + parser.add_argument( + "--model-dir", + type=str, + default="", + help="load model from this directory for eval-only mode", + ) + parser.add_argument( + "--load-epoch", type=int, help="load model weights at this epoch for evaluation" + ) + parser.add_argument( + "--no-train", action="store_true", help="do not call trainer.train()" + ) + parser.add_argument( + "opts", + default=None, + nargs=argparse.REMAINDER, + help="modify config options using the command-line", + ) + args = parser.parse_args() + main(args) diff --git a/python/ClipDetection/CoOp/trainers/__init__.py b/python/ClipDetection/CoOp/trainers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/python/ClipDetection/CoOp/trainers/cocoop.py b/python/ClipDetection/CoOp/trainers/cocoop.py new file mode 100644 index 00000000..51508c88 --- /dev/null +++ b/python/ClipDetection/CoOp/trainers/cocoop.py @@ -0,0 +1,315 @@ +import os.path as osp +from collections import OrderedDict +import math + +import torch +import torch.nn as nn +from torch.nn import functional as F +from torch.cuda.amp import GradScaler, autocast + +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.metrics import compute_accuracy +from dassl.utils import load_pretrained_weights, load_checkpoint +from dassl.optim import build_optimizer, build_lr_scheduler + +from clip import clip +from clip.simple_tokenizer import SimpleTokenizer as _Tokenizer + +_tokenizer = _Tokenizer() + + +def load_clip_to_cpu(cfg): + backbone_name = cfg.MODEL.BACKBONE.NAME + url = clip._MODELS[backbone_name] + model_path = clip._download(url) + + try: + # loading JIT archive + model = torch.jit.load(model_path, map_location="cpu").eval() + state_dict = None + + except RuntimeError: + state_dict = torch.load(model_path, map_location="cpu") + + model = clip.build_model(state_dict or model.state_dict()) + + return model + + +class TextEncoder(nn.Module): + def __init__(self, clip_model): + super().__init__() + self.transformer = clip_model.transformer + self.positional_embedding = clip_model.positional_embedding + self.ln_final = clip_model.ln_final + self.text_projection = clip_model.text_projection + self.dtype = clip_model.dtype + + def forward(self, prompts, tokenized_prompts): + x = prompts + self.positional_embedding.type(self.dtype) + x = x.permute(1, 0, 2) # NLD -> LND + x = self.transformer(x) + x = x.permute(1, 0, 2) # LND -> NLD + x = self.ln_final(x).type(self.dtype) + + # x.shape = [batch_size, n_ctx, transformer.width] + # take features from the eot embedding (eot_token is the highest number in each sequence) + x = x[torch.arange(x.shape[0]), tokenized_prompts.argmax(dim=-1)] @ self.text_projection + + return x + + +class PromptLearner(nn.Module): + def __init__(self, cfg, classnames, clip_model): + super().__init__() + n_cls = len(classnames) + n_ctx = cfg.TRAINER.COCOOP.N_CTX + ctx_init = cfg.TRAINER.COCOOP.CTX_INIT + dtype = clip_model.dtype + ctx_dim = clip_model.ln_final.weight.shape[0] + vis_dim = clip_model.visual.output_dim + clip_imsize = clip_model.visual.input_resolution + cfg_imsize = cfg.INPUT.SIZE[0] + assert cfg_imsize == clip_imsize, f"cfg_imsize ({cfg_imsize}) must equal to clip_imsize ({clip_imsize})" + + if ctx_init: + # use given words to initialize context vectors + ctx_init = ctx_init.replace("_", " ") + n_ctx = len(ctx_init.split(" ")) + prompt = clip.tokenize(ctx_init) + with torch.no_grad(): + embedding = clip_model.token_embedding(prompt).type(dtype) + ctx_vectors = embedding[0, 1 : 1 + n_ctx, :] + prompt_prefix = ctx_init + else: + # random initialization + ctx_vectors = torch.empty(n_ctx, ctx_dim, dtype=dtype) + nn.init.normal_(ctx_vectors, std=0.02) + prompt_prefix = " ".join(["X"] * n_ctx) + + print(f'Initial context: "{prompt_prefix}"') + print(f"Number of context words (tokens): {n_ctx}") + + self.ctx = nn.Parameter(ctx_vectors) + + self.meta_net = nn.Sequential(OrderedDict([ + ("linear1", nn.Linear(vis_dim, vis_dim // 16)), + ("relu", nn.ReLU(inplace=True)), + ("linear2", nn.Linear(vis_dim // 16, ctx_dim)) + ])) + + if cfg.TRAINER.COCOOP.PREC == "fp16": + self.meta_net.half() + + classnames = [name.replace("_", " ") for name in classnames] + name_lens = [len(_tokenizer.encode(name)) for name in classnames] + prompts = [prompt_prefix + " " + name + "." for name in classnames] + + tokenized_prompts = torch.cat([clip.tokenize(p) for p in prompts]) # (n_cls, n_tkn) + with torch.no_grad(): + embedding = clip_model.token_embedding(tokenized_prompts).type(dtype) + + # These token vectors will be saved when in save_model(), + # but they should be ignored in load_model() as we want to use + # those computed using the current class names + self.register_buffer("token_prefix", embedding[:, :1, :]) # SOS + self.register_buffer("token_suffix", embedding[:, 1 + n_ctx :, :]) # CLS, EOS + + self.n_cls = n_cls + self.n_ctx = n_ctx + self.tokenized_prompts = tokenized_prompts # torch.Tensor + self.name_lens = name_lens + + def construct_prompts(self, ctx, prefix, suffix, label=None): + # dim0 is either batch_size (during training) or n_cls (during testing) + # ctx: context tokens, with shape of (dim0, n_ctx, ctx_dim) + # prefix: the sos token, with shape of (n_cls, 1, ctx_dim) + # suffix: remaining tokens, with shape of (n_cls, *, ctx_dim) + + if label is not None: + prefix = prefix[label] + suffix = suffix[label] + + prompts = torch.cat( + [ + prefix, # (dim0, 1, dim) + ctx, # (dim0, n_ctx, dim) + suffix, # (dim0, *, dim) + ], + dim=1, + ) + + return prompts + + def forward(self, im_features): + prefix = self.token_prefix + suffix = self.token_suffix + ctx = self.ctx # (n_ctx, ctx_dim) + bias = self.meta_net(im_features) # (batch, ctx_dim) + bias = bias.unsqueeze(1) # (batch, 1, ctx_dim) + ctx = ctx.unsqueeze(0) # (1, n_ctx, ctx_dim) + ctx_shifted = ctx + bias # (batch, n_ctx, ctx_dim) + + # Use instance-conditioned context tokens for all classes + prompts = [] + for ctx_shifted_i in ctx_shifted: + ctx_i = ctx_shifted_i.unsqueeze(0).expand(self.n_cls, -1, -1) + pts_i = self.construct_prompts(ctx_i, prefix, suffix) # (n_cls, n_tkn, ctx_dim) + prompts.append(pts_i) + prompts = torch.stack(prompts) + + return prompts + + +class CustomCLIP(nn.Module): + def __init__(self, cfg, classnames, clip_model): + super().__init__() + self.prompt_learner = PromptLearner(cfg, classnames, clip_model) + self.tokenized_prompts = self.prompt_learner.tokenized_prompts + self.image_encoder = clip_model.visual + self.text_encoder = TextEncoder(clip_model) + self.logit_scale = clip_model.logit_scale + self.dtype = clip_model.dtype + + def forward(self, image, label=None): + tokenized_prompts = self.tokenized_prompts + logit_scale = self.logit_scale.exp() + + image_features = self.image_encoder(image.type(self.dtype)) + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + + prompts = self.prompt_learner(image_features) + + logits = [] + for pts_i, imf_i in zip(prompts, image_features): + text_features = self.text_encoder(pts_i, tokenized_prompts) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + l_i = logit_scale * imf_i @ text_features.t() + logits.append(l_i) + logits = torch.stack(logits) + + if self.prompt_learner.training: + return F.cross_entropy(logits, label) + + return logits + + +@TRAINER_REGISTRY.register() +class CoCoOp(TrainerX): + def check_cfg(self, cfg): + assert cfg.TRAINER.COCOOP.PREC in ["fp16", "fp32", "amp"] + + def build_model(self): + cfg = self.cfg + classnames = self.dm.dataset.classnames + + print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") + clip_model = load_clip_to_cpu(cfg) + + if cfg.TRAINER.COCOOP.PREC == "fp32" or cfg.TRAINER.COCOOP.PREC == "amp": + # CLIP's default precision is fp16 + clip_model.float() + + print("Building custom CLIP") + self.model = CustomCLIP(cfg, classnames, clip_model) + + print("Turning off gradients in both the image and the text encoder") + name_to_update = "prompt_learner" + + for name, param in self.model.named_parameters(): + if name_to_update not in name: + param.requires_grad_(False) + + # Double check + enabled = set() + for name, param in self.model.named_parameters(): + if param.requires_grad: + enabled.add(name) + print(f"Parameters to be updated: {enabled}") + + if cfg.MODEL.INIT_WEIGHTS: + load_pretrained_weights(self.model.prompt_learner, cfg.MODEL.INIT_WEIGHTS) + + self.model.to(self.device) + # NOTE: only give prompt_learner to the optimizer + self.optim = build_optimizer(self.model.prompt_learner, cfg.OPTIM) + self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) + self.register_model("prompt_learner", self.model.prompt_learner, self.optim, self.sched) + + self.scaler = GradScaler() if cfg.TRAINER.COCOOP.PREC == "amp" else None + + # Note that multi-gpu training could be slow because CLIP's size is + # big, which slows down the copy operation in DataParallel + device_count = torch.cuda.device_count() + if device_count > 1: + print(f"Multiple GPUs detected (n_gpus={device_count}), use all of them!") + self.model = nn.DataParallel(self.model) + + def forward_backward(self, batch): + image, label = self.parse_batch_train(batch) + + model = self.model + optim = self.optim + scaler = self.scaler + + prec = self.cfg.TRAINER.COCOOP.PREC + if prec == "amp": + with autocast(): + loss = model(image, label) + optim.zero_grad() + scaler.scale(loss).backward() + scaler.step(optim) + scaler.update() + else: + loss = model(image, label) + optim.zero_grad() + loss.backward() + optim.step() + + loss_summary = {"loss": loss.item()} + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch): + input = batch["img"] + label = batch["label"] + input = input.to(self.device) + label = label.to(self.device) + return input, label + + def load_model(self, directory, epoch=None): + if not directory: + print("Note that load_model() is skipped as no pretrained model is given") + return + + names = self.get_model_names() + + # By default, the best model is loaded + model_file = "model-best.pth.tar" + + if epoch is not None: + model_file = "model.pth.tar-" + str(epoch) + + for name in names: + model_path = osp.join(directory, name, model_file) + + if not osp.exists(model_path): + raise FileNotFoundError('Model not found at "{}"'.format(model_path)) + + checkpoint = load_checkpoint(model_path) + state_dict = checkpoint["state_dict"] + epoch = checkpoint["epoch"] + + # Ignore fixed token vectors + if "token_prefix" in state_dict: + del state_dict["token_prefix"] + + if "token_suffix" in state_dict: + del state_dict["token_suffix"] + + print("Loading weights to {} " 'from "{}" (epoch = {})'.format(name, model_path, epoch)) + # set strict=False + self._models[name].load_state_dict(state_dict, strict=False) diff --git a/python/ClipDetection/CoOp/trainers/coop.py b/python/ClipDetection/CoOp/trainers/coop.py new file mode 100644 index 00000000..d0bca081 --- /dev/null +++ b/python/ClipDetection/CoOp/trainers/coop.py @@ -0,0 +1,326 @@ +################################################################ +# CHANGES MADE TO FILE # +# ------------------------------------------------------------ # +# Parameter classnames=[] added to CoOp class __init__. # +# - Used to bypass need for DataManager object. # +# # +################################################################ + +import os.path as osp +import random +import numpy as np + +import torch +import torch.nn as nn +from torch.nn import functional as F +from torch.cuda.amp import GradScaler, autocast + +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.metrics import compute_accuracy +from dassl.utils import load_pretrained_weights, load_checkpoint +from dassl.optim import build_optimizer, build_lr_scheduler + +from clip import clip +from clip.simple_tokenizer import SimpleTokenizer as _Tokenizer + +_tokenizer = _Tokenizer() + + +def load_clip_to_cpu(cfg): + backbone_name = cfg.MODEL.BACKBONE.NAME + url = clip._MODELS[backbone_name] + model_path = clip._download(url, "/models") + + try: + # loading JIT archive + model = torch.jit.load(model_path, map_location="cpu").eval() + state_dict = None + + except RuntimeError: + state_dict = torch.load(model_path, map_location="cpu") + + model = clip.build_model(state_dict or model.state_dict()) + + return model + + +class TextEncoder(nn.Module): + def __init__(self, clip_model): + super().__init__() + self.transformer = clip_model.transformer + self.positional_embedding = clip_model.positional_embedding + self.ln_final = clip_model.ln_final + self.text_projection = clip_model.text_projection + self.dtype = clip_model.dtype + + def forward(self, prompts, tokenized_prompts): + x = prompts + self.positional_embedding.type(self.dtype) + x = x.permute(1, 0, 2) # NLD -> LND + x = self.transformer(x) + x = x.permute(1, 0, 2) # LND -> NLD + x = self.ln_final(x).type(self.dtype) + + # x.shape = [batch_size, n_ctx, transformer.width] + # take features from the eot embedding (eot_token is the highest number in each sequence) + x = x[torch.arange(x.shape[0]), tokenized_prompts.argmax(dim=-1)] @ self.text_projection + + return x + + +class PromptLearner(nn.Module): + def __init__(self, cfg, classnames, clip_model): + super().__init__() + n_cls = len(classnames) + n_ctx = cfg.TRAINER.COOP.N_CTX + ctx_init = cfg.TRAINER.COOP.CTX_INIT + dtype = clip_model.dtype + ctx_dim = clip_model.ln_final.weight.shape[0] + clip_imsize = clip_model.visual.input_resolution + cfg_imsize = cfg.INPUT.SIZE[0] + assert cfg_imsize == clip_imsize, f"cfg_imsize ({cfg_imsize}) must equal to clip_imsize ({clip_imsize})" + + if ctx_init: + # use given words to initialize context vectors + ctx_init = ctx_init.replace("_", " ") + n_ctx = len(ctx_init.split(" ")) + prompt = clip.tokenize(ctx_init) + with torch.no_grad(): + embedding = clip_model.token_embedding(prompt).type(dtype) + ctx_vectors = embedding[0, 1 : 1 + n_ctx, :] + prompt_prefix = ctx_init + + else: + # random initialization + if cfg.TRAINER.COOP.CSC: + ctx_vectors = torch.empty(n_cls, n_ctx, ctx_dim, dtype=dtype) + else: + ctx_vectors = torch.empty(n_ctx, ctx_dim, dtype=dtype) + nn.init.normal_(ctx_vectors, std=0.02) + prompt_prefix = " ".join(["X"] * n_ctx) + + + self.ctx = nn.Parameter(ctx_vectors) # to be optimized + + classnames = [name.replace("_", " ") for name in classnames] + name_lens = [len(_tokenizer.encode(name)) for name in classnames] + prompts = [prompt_prefix + " " + name + "." for name in classnames] + + tokenized_prompts = torch.cat([clip.tokenize(p) for p in prompts]) + with torch.no_grad(): + embedding = clip_model.token_embedding(tokenized_prompts).type(dtype) + + # These token vectors will be saved when in save_model(), + # but they should be ignored in load_model() as we want to use + # those computed using the current class names + self.register_buffer("token_prefix", embedding[:, :1, :]) # SOS + self.register_buffer("token_suffix", embedding[:, 1 + n_ctx :, :]) # CLS, EOS + + self.n_cls = n_cls + self.n_ctx = n_ctx + self.tokenized_prompts = tokenized_prompts # torch.Tensor + self.name_lens = name_lens + self.class_token_position = cfg.TRAINER.COOP.CLASS_TOKEN_POSITION + + def forward(self): + ctx = self.ctx + if ctx.dim() == 2: + ctx = ctx.unsqueeze(0).expand(self.n_cls, -1, -1) + + prefix = self.token_prefix + suffix = self.token_suffix + + if self.class_token_position == "end": + prompts = torch.cat( + [ + prefix, # (n_cls, 1, dim) + ctx, # (n_cls, n_ctx, dim) + suffix, # (n_cls, *, dim) + ], + dim=1, + ) + + elif self.class_token_position == "middle": + half_n_ctx = self.n_ctx // 2 + prompts = [] + for i in range(self.n_cls): + name_len = self.name_lens[i] + prefix_i = prefix[i : i + 1, :, :] + class_i = suffix[i : i + 1, :name_len, :] + suffix_i = suffix[i : i + 1, name_len:, :] + ctx_i_half1 = ctx[i : i + 1, :half_n_ctx, :] + ctx_i_half2 = ctx[i : i + 1, half_n_ctx:, :] + prompt = torch.cat( + [ + prefix_i, # (1, 1, dim) + ctx_i_half1, # (1, n_ctx//2, dim) + class_i, # (1, name_len, dim) + ctx_i_half2, # (1, n_ctx//2, dim) + suffix_i, # (1, *, dim) + ], + dim=1, + ) + prompts.append(prompt) + prompts = torch.cat(prompts, dim=0) + + elif self.class_token_position == "front": + prompts = [] + for i in range(self.n_cls): + name_len = self.name_lens[i] + prefix_i = prefix[i : i + 1, :, :] + class_i = suffix[i : i + 1, :name_len, :] + suffix_i = suffix[i : i + 1, name_len:, :] + ctx_i = ctx[i : i + 1, :, :] + prompt = torch.cat( + [ + prefix_i, # (1, 1, dim) + class_i, # (1, name_len, dim) + ctx_i, # (1, n_ctx, dim) + suffix_i, # (1, *, dim) + ], + dim=1, + ) + prompts.append(prompt) + prompts = torch.cat(prompts, dim=0) + + else: + raise ValueError + + return prompts + + +class CustomCLIP(nn.Module): + def __init__(self, cfg, classnames, clip_model): + super().__init__() + self.prompt_learner = PromptLearner(cfg, classnames, clip_model) + self.tokenized_prompts = self.prompt_learner.tokenized_prompts + self.image_encoder = clip_model.visual + self.text_encoder = TextEncoder(clip_model) + self.logit_scale = clip_model.logit_scale + self.dtype = clip_model.dtype + + def forward(self, image): + image_features = self.image_encoder(image.type(self.dtype)) + + prompts = self.prompt_learner() + tokenized_prompts = self.tokenized_prompts + text_features = self.text_encoder(prompts, tokenized_prompts) + + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + logit_scale = self.logit_scale.exp() + logits = logit_scale * image_features @ text_features.t() + + return logits, image_features + + +@TRAINER_REGISTRY.register() +class CoOp(TrainerX): + """Context Optimization (CoOp). + + Learning to Prompt for Vision-Language Models + https://arxiv.org/abs/2109.01134 + """ + + def check_cfg(self, cfg): + assert cfg.TRAINER.COOP.PREC in ["fp16", "fp32", "amp"] + + def build_model(self, classnames=[]): + cfg = self.cfg + classnames = classnames + + clip_model = load_clip_to_cpu(cfg) + + if cfg.TRAINER.COOP.PREC == "fp32" or cfg.TRAINER.COOP.PREC == "amp": + # CLIP's default precision is fp16 + clip_model.float() + + self.model = CustomCLIP(cfg, classnames, clip_model) + + for name, param in self.model.named_parameters(): + if "prompt_learner" not in name: + param.requires_grad_(False) + + if cfg.MODEL.INIT_WEIGHTS: + load_pretrained_weights(self.model.prompt_learner, cfg.MODEL.INIT_WEIGHTS) + + self.model.to(self.device) + # NOTE: only give prompt_learner to the optimizer + self.optim = build_optimizer(self.model.prompt_learner, cfg.OPTIM) + self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) + self.register_model("prompt_learner", self.model.prompt_learner, self.optim, self.sched) + + self.scaler = GradScaler() if cfg.TRAINER.COOP.PREC == "amp" else None + + # Note that multi-gpu training could be slow because CLIP's size is + # big, which slows down the copy operation in DataParallel + # device_count = torch.cuda.device_count() + # if device_count > 1: + # self.model = nn.DataParallel(self.model) + + def forward_backward(self, batch): + image, label = self.parse_batch_train(batch) + + prec = self.cfg.TRAINER.COOP.PREC + if prec == "amp": + with autocast(): + output = self.model(image) + loss = F.cross_entropy(output, label) + self.optim.zero_grad() + self.scaler.scale(loss).backward() + self.scaler.step(self.optim) + self.scaler.update() + else: + output = self.model(image) + loss = F.cross_entropy(output, label) + self.model_backward_and_update(loss) + + loss_summary = { + "loss": loss.item(), + "acc": compute_accuracy(output, label)[0].item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch): + input = batch["img"] + label = batch["label"] + input = input.to(self.device) + label = label.to(self.device) + return input, label + + def load_model(self, directory, epoch=None): + if not directory: + print("Note that load_model() is skipped as no pretrained model is given") + return + + names = self.get_model_names() + + # By default, the best model is loaded + model_file = "model-best.pth.tar" + + if epoch is not None: + model_file = "model.pth.tar-" + str(epoch) + + for name in names: + model_path = osp.join(directory, name, model_file) + + if not osp.exists(model_path): + raise FileNotFoundError('Model not found at "{}"'.format(model_path)) + + checkpoint = load_checkpoint(model_path) + state_dict = checkpoint["state_dict"] + epoch = checkpoint["epoch"] + + # Ignore fixed token vectors + if "token_prefix" in state_dict: + del state_dict["token_prefix"] + + if "token_suffix" in state_dict: + del state_dict["token_suffix"] + + # set strict=False + self._models[name].load_state_dict(state_dict, strict=False) diff --git a/python/ClipDetection/CoOp/trainers/custom_generator.txt b/python/ClipDetection/CoOp/trainers/custom_generator.txt new file mode 100644 index 00000000..5558e084 --- /dev/null +++ b/python/ClipDetection/CoOp/trainers/custom_generator.txt @@ -0,0 +1,11275 @@ +Parameter containing: +tensor(4.6052)Parameter containing: +tensor([[-0.0260, -0.0138, -0.0155, ..., 0.0092, 0.0369, 0.0030], + [ 0.0144, -0.0042, -0.0056, ..., 0.0071, -0.0027, 0.0038], + [-0.0149, 0.0385, 0.0246, ..., -0.0042, 0.0160, 0.0317], + ..., + [-0.0068, 0.0134, -0.0277, ..., -0.0073, 0.0196, -0.0100], + [-0.0111, 0.0476, 0.0128, ..., -0.0119, -0.0163, -0.0131], + [ 0.0333, -0.0335, 0.0196, ..., 0.0075, -0.0094, -0.0114]])Parameter containing: +tensor([ 0.0138, 0.2357, -0.1285, ..., 0.0171, -0.3332, -0.2366])Parameter containing: +tensor([[ 0.0019, 0.0479, -0.0149, ..., 0.0005, -0.0558, -0.0460], + [ 0.0114, -0.0413, 0.0357, ..., 0.0271, -0.0313, -0.0383], + [-0.0026, -0.0340, -0.0006, ..., 0.0216, -0.0294, -0.0423], + ..., + [-0.0038, -0.0350, -0.0048, ..., -0.0228, -0.0328, -0.0412], + [-0.0046, -0.0360, -0.0026, ..., -0.0350, -0.0355, -0.0353], + [-0.0073, -0.0287, -0.0144, ..., -0.0202, -0.0272, -0.0360]])Parameter containing: +tensor([[ 0.0224, -0.0139, -0.0072, ..., -0.0058, -0.0078, 0.0139], + [ 0.0186, 0.0084, 0.0400, ..., -0.0149, -0.0241, -0.0003], + [ 0.0075, -0.0007, 0.0195, ..., -0.0062, -0.0083, 0.0156], + ..., + [ 0.0121, -0.0165, -0.0144, ..., -0.0066, 0.0088, 0.0027], + [-0.0164, -0.0100, -0.0053, ..., -0.0005, -0.0001, -0.0075], + [ 0.0092, 0.0048, 0.0069, ..., 0.0054, -0.0162, 0.0262]])Parameter containing: +tensor([[[[ 2.5284e-02, 1.0597e-02, 7.1678e-03, ..., 2.3422e-02, + 2.1683e-02, 4.8637e-03], + [ 1.3748e-02, -6.2103e-03, -4.8103e-03, ..., 1.6418e-02, + 7.0114e-03, -1.3161e-02], + [ 1.0048e-02, 2.1286e-03, 2.2945e-03, ..., 5.5695e-03, + 5.0468e-03, -1.2604e-02], + ..., + [-1.0101e-02, -2.3854e-04, -5.4588e-03, ..., -1.9226e-02, + -2.4017e-02, -2.4765e-02], + [-3.4752e-03, -1.0979e-02, -1.3603e-02, ..., -7.6408e-03, + 1.5583e-03, -4.4365e-03], + [-2.1469e-02, -4.3182e-02, -3.0121e-02, ..., -5.2147e-03, + 3.7346e-03, -6.8016e-03]], + + [[ 1.5930e-02, -4.9095e-03, -1.2283e-02, ..., 2.5879e-02, + 2.4048e-02, 5.6458e-03], + [ 2.1019e-03, -2.4185e-02, -2.6337e-02, ..., 1.5297e-02, + 5.2605e-03, -1.5121e-02], + [ 5.1956e-03, -7.2556e-03, -9.4376e-03, ..., 7.9193e-03, + 5.4703e-03, -1.2398e-02], + ..., + [-4.2267e-03, 5.9624e-03, -6.2656e-04, ..., 3.8528e-03, + 4.2963e-04, -5.4207e-03], + [-2.8496e-03, -1.1482e-02, -1.3680e-02, ..., 1.5129e-02, + 2.3285e-02, 1.2856e-02], + [-2.7740e-02, -4.9561e-02, -3.1158e-02, ..., 1.2787e-02, + 1.7975e-02, 6.4516e-04]], + + [[ 1.6403e-02, -2.0084e-03, -4.8714e-03, ..., 1.6159e-02, + 1.1337e-02, 5.2719e-03], + [ 1.8549e-03, -2.1622e-02, -2.4734e-02, ..., 6.0081e-03, + -4.9477e-03, -8.3389e-03], + [ 4.8523e-03, -1.0818e-02, -1.5015e-02, ..., 6.0272e-04, + -2.3615e-04, -7.6065e-03], + ..., + [ 2.4033e-03, 2.6741e-03, -8.2016e-03, ..., -1.0231e-02, + -1.0254e-02, -7.4234e-03], + [ 8.2626e-03, -3.1586e-03, -9.0256e-03, ..., -3.5248e-03, + 6.7329e-03, 5.1842e-03], + [-1.0529e-02, -2.6947e-02, -1.5656e-02, ..., 1.6518e-03, + 6.4774e-03, 2.7132e-04]]], + + + [[[ 1.5366e-02, 2.6184e-02, 5.8479e-03, ..., 8.4534e-03, + -9.0027e-03, 2.0325e-02], + [-1.8753e-02, -7.4615e-03, -1.6830e-02, ..., 2.9640e-03, + -1.9193e-05, 1.5640e-02], + [-2.4765e-02, -1.2184e-02, 1.7405e-03, ..., -2.6291e-02, + -2.8641e-02, -3.6869e-03], + ..., + [ 7.4539e-03, -6.8169e-03, 1.4931e-02, ..., 1.4824e-02, + -5.6839e-03, -6.2599e-03], + [ 6.2408e-03, -8.2016e-03, 4.1229e-02, ..., -5.0664e-06, + -2.8336e-02, -1.9409e-02], + [ 1.7120e-02, -1.1139e-02, 6.1279e-02, ..., -4.5490e-04, + 7.2899e-03, 4.6967e-02]], + + [[ 2.1149e-02, 3.3386e-02, 1.0483e-02, ..., 6.6109e-03, + -1.1864e-02, 1.7838e-02], + [-1.5022e-02, -8.8882e-04, -9.4604e-03, ..., 4.7722e-03, + 3.3522e-04, 1.4709e-02], + [-2.3026e-02, -6.3400e-03, 1.1215e-02, ..., -2.9251e-02, + -3.2776e-02, -7.0419e-03], + ..., + [ 5.5275e-03, -1.1826e-02, 7.7248e-03, ..., 1.1215e-02, + -1.1208e-02, -9.9030e-03], + [ 2.2125e-03, -1.5572e-02, 3.5980e-02, ..., -4.5929e-03, + -3.7567e-02, -2.6779e-02], + [ 1.0384e-02, -2.4033e-02, 5.2917e-02, ..., -1.1375e-02, + -4.0016e-03, 4.0253e-02]], + + [[ 1.0483e-02, 2.2339e-02, 8.9121e-04, ..., 5.2719e-03, + -1.2917e-02, 1.7471e-02], + [-2.5070e-02, -1.1597e-02, -1.9104e-02, ..., 4.4594e-03, + 4.0364e-04, 1.5610e-02], + [-3.3417e-02, -1.8112e-02, -1.3227e-03, ..., -2.8519e-02, + -3.0121e-02, -6.7444e-03], + ..., + [ 4.9820e-03, -1.0445e-02, 1.0681e-02, ..., 1.3405e-02, + -8.7509e-03, -8.8196e-03], + [ 2.5711e-03, -1.3268e-02, 4.1168e-02, ..., 9.7275e-04, + -3.0792e-02, -2.5375e-02], + [ 8.9951e-03, -2.1439e-02, 5.3528e-02, ..., -6.4163e-03, + -4.1795e-04, 3.9398e-02]]], + + + [[[ 7.2441e-03, 3.7231e-03, -2.4662e-03, ..., 1.0353e-02, + 1.4267e-02, 1.9363e-02], + [-3.0270e-03, -3.2539e-03, -1.2878e-02, ..., 9.7847e-04, + 5.2299e-03, 6.8626e-03], + [-4.3182e-03, 5.6915e-03, -3.1910e-03, ..., 8.4114e-04, + 2.2297e-03, 7.1373e-03], + ..., + [ 4.4632e-03, 3.8757e-03, -2.0063e-04, ..., 1.5976e-02, + 1.4221e-02, 1.2756e-02], + [ 2.5146e-02, 1.4793e-02, 5.1003e-03, ..., 2.2858e-02, + 2.2186e-02, 2.3026e-02], + [ 3.0807e-02, 2.6031e-02, 1.4259e-02, ..., 2.5116e-02, + 2.1759e-02, 2.4887e-02]], + + [[ 6.9695e-03, 5.0888e-03, -2.8915e-03, ..., 1.7868e-02, + 1.9669e-02, 2.9037e-02], + [-2.8973e-03, -1.2035e-03, -1.1116e-02, ..., 5.5542e-03, + 5.9547e-03, 1.3420e-02], + [-9.8190e-03, 4.3716e-03, 2.3806e-04, ..., 1.1253e-03, + -8.7976e-04, 9.4681e-03], + ..., + [ 6.1417e-03, 5.1804e-03, 2.1095e-03, ..., 2.4979e-02, + 2.5146e-02, 2.7710e-02], + [ 3.1128e-02, 2.0096e-02, 8.0948e-03, ..., 3.3722e-02, + 3.3295e-02, 4.0405e-02], + [ 3.7659e-02, 3.2166e-02, 1.8311e-02, ..., 4.2542e-02, + 3.9429e-02, 4.6356e-02]], + + [[ 1.7014e-02, 1.5358e-02, 1.1269e-02, ..., 2.1378e-02, + 2.1317e-02, 3.0075e-02], + [ 7.4120e-03, 7.8087e-03, 1.1091e-03, ..., 7.4654e-03, + 7.7209e-03, 1.2947e-02], + [-5.4646e-04, 1.1208e-02, 6.4545e-03, ..., 4.1313e-03, + 3.2539e-03, 9.7275e-03], + ..., + [ 3.3531e-03, 2.0325e-04, 1.3704e-03, ..., 7.8087e-03, + 7.9422e-03, 1.4809e-02], + [ 1.6571e-02, 2.9163e-03, 4.2105e-04, ..., 1.1787e-02, + 1.1337e-02, 1.8753e-02], + [ 1.9714e-02, 1.0704e-02, 2.9335e-03, ..., 2.1042e-02, + 1.5457e-02, 2.2263e-02]]], + + + ..., + + + [[[-3.1614e-04, -6.5041e-04, -6.0844e-04, ..., 6.5207e-05, + 2.8062e-04, -5.1928e-04], + [-5.2452e-06, -9.8610e-04, -9.5367e-04, ..., 1.9908e-05, + -1.0675e-04, -8.3148e-05], + [-9.5606e-04, -6.4993e-04, -1.2035e-03, ..., -6.1035e-04, + -4.2439e-04, 6.3181e-04], + ..., + [-7.1907e-04, -6.2132e-04, 1.0270e-04, ..., -3.2485e-05, + -7.7963e-04, -7.9155e-04], + [-9.8991e-04, 6.4433e-05, -1.2598e-03, ..., -8.0490e-04, + -1.2980e-03, -1.2064e-03], + [-2.8110e-04, -5.8031e-04, -2.4199e-04, ..., -5.1558e-05, + 4.4203e-04, 1.4377e-04]], + + [[ 5.6839e-04, 1.9491e-05, 2.8157e-04, ..., 1.6952e-04, + 9.6035e-04, -5.6601e-04], + [ 9.8038e-04, 2.3961e-05, 4.3941e-04, ..., 3.5739e-04, + 7.8630e-04, -6.2466e-04], + [-2.5654e-04, 3.8624e-04, 1.7090e-03, ..., 6.6614e-04, + 6.1607e-04, 7.3719e-04], + ..., + [ 5.9319e-04, 4.7755e-04, 4.7016e-04, ..., 1.0605e-03, + 6.6137e-04, 3.1066e-04], + [ 8.3494e-04, 4.7708e-04, -1.0042e-03, ..., 6.4945e-04, + -2.4092e-04, 3.6502e-04], + [ 4.7803e-04, -3.4690e-04, 6.3467e-04, ..., 2.3830e-04, + 1.9407e-04, 4.0698e-04]], + + [[ 2.0623e-04, -7.5936e-05, -6.9094e-04, ..., -2.5582e-04, + -5.5313e-04, -5.7125e-04], + [-9.0122e-05, 3.5214e-04, 2.0063e-04, ..., -2.6512e-04, + 1.1653e-04, 5.8317e-04], + [-9.5224e-04, -3.9577e-04, -3.9458e-04, ..., 2.1636e-04, + 6.0797e-05, 1.7786e-04], + ..., + [ 4.9019e-04, -1.6594e-04, 5.3120e-04, ..., 3.1352e-04, + 9.8825e-05, 5.7650e-04], + [ 7.5400e-05, 4.0960e-04, -6.8998e-04, ..., 1.8597e-04, + 1.9622e-04, -3.3689e-04], + [-1.4269e-04, -2.5558e-04, 2.9540e-04, ..., 2.1315e-04, + -2.9826e-04, 4.0221e-04]]], + + + [[[ 1.2306e-02, 1.8921e-02, 5.3024e-03, ..., 1.1612e-02, + 6.5956e-03, 2.7069e-02], + [ 1.1261e-02, 2.9709e-02, 1.3695e-02, ..., -8.9722e-03, + -1.7639e-02, -3.2501e-03], + [ 2.1103e-02, 3.1342e-02, 1.7731e-02, ..., -1.1185e-02, + -2.7451e-02, -5.5275e-03], + ..., + [ 3.7292e-02, 2.5757e-02, 6.7863e-03, ..., 1.8631e-02, + 2.8793e-02, 3.6560e-02], + [ 1.9577e-02, -5.3711e-03, -2.1255e-02, ..., -1.6953e-02, + -2.3621e-02, 4.6463e-03], + [ 1.3992e-02, -2.7130e-02, -5.1117e-02, ..., -1.2520e-02, + -4.0009e-02, 1.3618e-02]], + + [[ 1.7109e-03, 9.4223e-03, -2.4147e-03, ..., 8.3694e-03, + 3.3112e-03, 2.3117e-02], + [ 1.1692e-03, 2.3514e-02, 1.1520e-02, ..., -8.2321e-03, + -1.8555e-02, -6.4278e-03], + [ 1.0735e-02, 2.6749e-02, 1.8997e-02, ..., -1.1795e-02, + -3.0396e-02, -9.2773e-03], + ..., + [ 3.4821e-02, 2.1423e-02, 8.1253e-04, ..., 1.6235e-02, + 2.6367e-02, 3.4302e-02], + [ 1.4656e-02, -1.1101e-02, -2.7344e-02, ..., -2.0676e-02, + -3.1250e-02, -1.2932e-03], + [ 5.8136e-03, -3.8971e-02, -6.3354e-02, ..., -2.1881e-02, + -5.2307e-02, 4.1885e-03]], + + [[-1.0658e-02, -1.8530e-03, -8.5220e-03, ..., 4.6959e-03, + -1.9407e-03, 1.7426e-02], + [-1.3008e-02, 1.1108e-02, 5.3177e-03, ..., -8.9722e-03, + -2.1408e-02, -9.2850e-03], + [-3.2902e-03, 1.4580e-02, 1.3863e-02, ..., -1.2299e-02, + -2.9846e-02, -1.2985e-02], + ..., + [ 3.2806e-02, 2.2476e-02, 6.9771e-03, ..., 1.0704e-02, + 1.9516e-02, 2.4567e-02], + [ 1.3817e-02, -6.0501e-03, -1.4580e-02, ..., -2.2476e-02, + -3.2013e-02, -9.6893e-03], + [ 5.8556e-03, -3.2196e-02, -5.1910e-02, ..., -2.4429e-02, + -5.2979e-02, -3.0937e-03]]], + + + [[[ 2.2598e-02, -7.3586e-03, -2.9099e-02, ..., -2.2873e-02, + 8.5068e-03, -4.8706e-02], + [ 1.7410e-02, -3.1433e-02, -4.2816e-02, ..., -6.2675e-03, + 9.4528e-03, -3.8910e-02], + [ 2.2125e-02, -1.5839e-02, -4.1351e-02, ..., 4.6021e-02, + 2.4017e-02, -1.1345e-02], + ..., + [ 2.8290e-02, 3.7964e-02, 4.1656e-02, ..., 2.4734e-02, + -2.2011e-03, -1.9989e-02], + [-1.5671e-02, -2.0996e-02, -2.9182e-03, ..., 2.0828e-02, + 7.9803e-03, 1.4175e-02], + [-3.1624e-03, -9.1400e-03, 7.2937e-03, ..., 1.6663e-02, + 1.3590e-03, 1.6647e-02]], + + [[ 2.2675e-02, -8.0872e-03, -3.0746e-02, ..., -1.9989e-02, + 1.6220e-02, -4.3518e-02], + [ 1.6678e-02, -3.2532e-02, -4.2694e-02, ..., -6.4468e-04, + 1.8555e-02, -3.2135e-02], + [ 2.0767e-02, -1.6098e-02, -3.9978e-02, ..., 5.0598e-02, + 2.9999e-02, -5.6038e-03], + ..., + [ 4.2328e-02, 5.0476e-02, 4.9988e-02, ..., 2.2064e-02, + -1.8721e-03, -1.5190e-02], + [-4.8981e-03, -1.0933e-02, 6.5994e-03, ..., 1.9073e-02, + 7.9498e-03, 2.0065e-02], + [ 4.9896e-03, -1.7853e-03, 1.5068e-02, ..., 1.0445e-02, + -2.7905e-03, 1.9196e-02]], + + [[ 7.0305e-03, -1.8372e-02, -3.5797e-02, ..., -1.5244e-02, + 2.1683e-02, -3.0380e-02], + [-1.7321e-04, -4.1534e-02, -4.5563e-02, ..., 4.7989e-03, + 2.4796e-02, -1.7990e-02], + [ 2.1000e-03, -2.8732e-02, -4.5746e-02, ..., 5.0171e-02, + 3.4485e-02, 4.2267e-03], + ..., + [ 3.7415e-02, 4.6143e-02, 4.9500e-02, ..., 2.0111e-02, + 4.0741e-03, -6.3667e-03], + [-5.8479e-03, -9.4757e-03, 1.2398e-02, ..., 2.1317e-02, + 1.5762e-02, 2.5894e-02], + [ 2.3136e-03, -7.0858e-04, 1.7914e-02, ..., 1.1047e-02, + 2.1496e-03, 2.2278e-02]]]])Parameter containing: +tensor([0.3311, 0.0032, 0.1610, ..., 2.1922, 0.0050, 0.0039])Parameter containing: +tensor([-0.0045, -0.0452, -0.0475, ..., 0.0402, -0.1402, -0.0132])Parameter containing: +tensor([[-7.0632e-05, -1.6510e-04, -7.0930e-05, ..., 4.5090e-03, + -2.9160e-02, -7.8201e-05], + [-1.3733e-04, 1.2165e-04, 4.2319e-05, ..., -1.6594e-03, + 3.1433e-02, 7.4446e-05], + [ 4.8018e-04, 7.7963e-04, -1.0991e-04, ..., -1.6846e-02, + 4.2999e-02, 1.5199e-04], + ..., + [ 2.1267e-04, 4.1032e-04, -7.2420e-05, ..., 4.8027e-03, + -1.7338e-03, -6.6102e-05], + [ 3.0518e-04, -4.4405e-05, -2.2709e-04, ..., 1.1551e-02, + 3.3436e-03, 7.4685e-05], + [-2.8849e-05, 4.5919e-04, 9.3341e-05, ..., -1.1314e-02, + 3.7670e-03, -7.7844e-05]])Parameter containing: +tensor([ 1.5674, -1.6143, -0.8208, ..., 0.0115, 0.0107, -0.0043])Parameter containing: +tensor([[-6.7596e-03, 8.8043e-03, -7.9422e-03, ..., -8.6441e-03, + -8.7433e-03, 3.5553e-03], + [ 1.2077e-02, 5.8784e-03, 1.1253e-02, ..., -3.7060e-03, + 2.0008e-03, 3.8319e-03], + [-5.2032e-03, 2.6913e-03, 1.2894e-02, ..., 6.4812e-03, + -3.0398e-05, -4.2796e-04], + ..., + [-4.5037e-04, -2.5063e-03, -3.2768e-03, ..., -3.2768e-03, + -1.9409e-02, 9.2545e-03], + [-7.3624e-03, 2.8419e-03, -7.9193e-03, ..., 4.0627e-04, + -1.3866e-03, -6.7186e-04], + [ 9.0408e-03, 1.5287e-03, 1.6737e-03, ..., 2.4242e-03, + -3.7575e-03, 4.9667e-03]])Parameter containing: +tensor([-0.0262, -0.0654, 0.0032, ..., 0.1761, -0.0446, 0.0023])Parameter containing: +tensor([6.1186e-04, 2.0990e-03, 3.0166e-05, ..., 6.9025e-01, 3.5588e-01, + 1.4703e-04])Parameter containing: +tensor([ 1.3605e-04, 8.3127e-04, -2.0098e-05, ..., -3.6831e-01, + 1.7861e-01, 7.4003e-05])Parameter containing: +tensor([[ 3.6597e-04, 2.6047e-05, 1.1921e-07, ..., -6.6109e-03, + -1.1740e-03, -6.4468e-04], + [ 5.3291e-03, 1.3710e-02, -3.5620e-04, ..., -3.8052e-03, + -2.5225e-04, 6.0730e-03], + [ 1.3428e-03, 1.2884e-03, -1.9073e-06, ..., -2.8549e-02, + -1.1930e-03, 1.4906e-03], + ..., + [-2.4994e-02, -1.0262e-02, 2.3067e-04, ..., -2.0103e-03, + -1.2665e-02, 6.2332e-03], + [ 3.2401e-04, 9.3758e-05, -5.9605e-08, ..., -6.0234e-03, + -7.3862e-04, -6.4611e-04], + [ 1.1129e-03, -2.3117e-02, -2.7061e-04, ..., -4.4365e-03, + 3.5744e-03, -7.4997e-03]])Parameter containing: +tensor([-0.6826, -0.3132, -0.8076, ..., -0.2167, -0.6543, -0.3040])Parameter containing: +tensor([[-0.0043, -0.0023, 0.0041, ..., 0.0116, -0.0049, -0.0073], + [ 0.0018, 0.0191, -0.0102, ..., -0.0261, 0.0026, 0.0206], + [ 0.0039, -0.0002, -0.0028, ..., 0.0029, 0.0038, -0.0151], + ..., + [ 0.0021, -0.0003, -0.0034, ..., 0.0033, 0.0015, 0.0089], + [-0.0059, 0.0078, 0.0069, ..., -0.0005, -0.0060, 0.0020], + [ 0.0003, -0.0039, -0.0022, ..., -0.0094, 0.0005, 0.0039]])Parameter containing: +tensor([-0.0184, -0.1008, 0.0398, ..., -0.0965, -0.1080, -0.0237])Parameter containing: +tensor([2.8353e-01, 5.9135e-01, 3.3711e-06, ..., 2.0198e+00, 7.7565e-01, + 2.9745e-01])Parameter containing: +tensor([2.1628e-02, 2.1650e-01, 2.3350e-04, ..., 2.6387e-01, 4.4878e-01, + 5.1503e-02])Parameter containing: +tensor([[-0.0033, 0.0007, -0.0002, ..., 0.0078, -0.0240, 0.0078], + [ 0.0070, 0.0033, 0.0170, ..., -0.0062, 0.0080, 0.0055], + [ 0.0102, -0.0102, -0.0003, ..., 0.0024, 0.0164, 0.0043], + ..., + [ 0.0113, 0.0003, -0.0048, ..., 0.0002, 0.0042, -0.0065], + [-0.0144, -0.0119, 0.0076, ..., -0.0037, 0.0036, 0.0072], + [-0.0012, -0.0020, 0.0010, ..., -0.0066, -0.0222, -0.0007]])Parameter containing: +tensor([-1.0248e-01, 8.7988e-01, 1.4414e+00, ..., -1.0862e-03, + -4.0474e-03, 2.2471e-04])Parameter containing: +tensor([[-4.9829e-04, -2.1194e-02, -1.9908e-05, ..., -1.1253e-02, + 1.1993e-02, 1.0979e-04], + [-5.0068e-04, 7.5417e-03, -4.4131e-04, ..., -2.8553e-03, + 1.1459e-02, -3.0899e-03], + [ 2.7752e-03, -5.4703e-03, -1.1978e-02, ..., -3.8319e-03, + -1.0222e-04, -5.6686e-03], + ..., + [ 5.3825e-03, -1.8539e-02, 8.3313e-02, ..., -2.1317e-02, + -9.7198e-03, 1.5419e-02], + [-9.5062e-03, -2.0390e-03, 5.9166e-03, ..., 8.5144e-03, + -4.4022e-03, 6.3820e-03], + [-2.8553e-03, 6.8321e-03, -9.3508e-04, ..., 5.5199e-03, + 4.7264e-03, -4.1389e-03]])Parameter containing: +tensor([-0.0203, -0.0213, 0.0256, ..., -0.0386, -0.0219, -0.0045])Parameter containing: +tensor([0.3628, 0.7462, 0.0949, ..., 0.8510, 0.4239, 0.2627])Parameter containing: +tensor([-0.0484, -0.1306, 0.0199, ..., -0.1032, -0.0533, 0.0084])Parameter containing: +tensor([[ 0.0025, 0.0060, 0.0152, ..., -0.0063, 0.0478, -0.0812], + [ 0.0002, -0.0075, 0.0009, ..., -0.0011, -0.0030, 0.0037], + [ 0.0053, -0.0222, 0.0008, ..., -0.0101, 0.0178, -0.0035], + ..., + [ 0.0026, -0.0111, 0.0018, ..., -0.0058, -0.0008, 0.0039], + [-0.0072, 0.0112, 0.0018, ..., 0.0027, -0.0154, -0.0180], + [ 0.0151, 0.0001, 0.0326, ..., -0.0002, -0.0062, -0.0225]])Parameter containing: +tensor([-0.1219, -0.6836, -0.5273, ..., -0.7568, -0.0984, -0.3079])Parameter containing: +tensor([[ 1.8396e-03, -9.6321e-04, -1.6800e-02, ..., -3.0613e-03, + -9.5901e-03, -3.4103e-03], + [-1.5350e-02, 6.2675e-03, 1.4854e-02, ..., 6.7291e-03, + -9.3937e-05, -6.2218e-03], + [ 5.9891e-03, -4.2915e-04, 1.0605e-02, ..., -5.6076e-03, + -2.0447e-03, 5.9662e-03], + ..., + [ 2.9125e-03, -2.3937e-03, 4.5738e-03, ..., 1.6699e-03, + 6.7043e-04, 5.3139e-03], + [ 9.0456e-04, -1.3828e-03, 1.1587e-03, ..., -1.1549e-03, + 4.4975e-03, -5.7945e-03], + [ 3.0212e-02, 3.7136e-03, 1.1283e-04, ..., 4.8065e-03, + 1.2444e-02, 5.4054e-03]])Parameter containing: +tensor([ 0.0242, -0.0644, 0.0790, ..., -0.0809, -0.1028, -0.0834])Parameter containing: +tensor([0.4126, 0.9839, 0.1912, ..., 0.7707, 0.5578, 0.5130])Parameter containing: +tensor([-0.0194, -0.1082, -0.0194, ..., 0.0886, 0.1335, 0.0285])Parameter containing: +tensor([[-6.4240e-03, -2.0248e-02, -2.0676e-02, ..., 1.1930e-03, + 2.8778e-02, -5.5267e-02], + [-2.8038e-03, 1.3485e-03, -1.9196e-02, ..., 9.4748e-04, + -1.9562e-02, -2.9373e-03], + [-5.8861e-03, -4.8141e-03, 5.3825e-03, ..., -1.8219e-02, + -2.0416e-02, -9.6283e-03], + ..., + [-2.5009e-02, 1.1108e-02, 1.0498e-02, ..., 4.8447e-03, + 1.2636e-05, 2.5177e-03], + [ 1.0887e-02, 1.1696e-02, 1.1856e-02, ..., 2.7962e-03, + -4.8447e-03, -6.4964e-03], + [-8.6746e-03, 2.5177e-03, -4.9591e-03, ..., 2.8553e-03, + -8.6136e-03, 4.2229e-03]])Parameter containing: +tensor([ 0.1929, -0.0773, -0.0911, ..., 0.1083, 0.0064, 0.0453])Parameter containing: +tensor([[-2.2720e-02, 2.8229e-03, -3.1710e-04, ..., 2.3804e-02, + -2.3819e-02, 1.4458e-02], + [-1.9178e-03, 6.7055e-05, -1.0406e-02, ..., -7.9041e-03, + -4.2076e-03, -6.3286e-03], + [-1.2703e-02, -6.1874e-03, -1.0422e-02, ..., -1.6769e-02, + -6.2981e-03, -1.8578e-03], + ..., + [-1.6136e-03, 9.8228e-04, -7.8888e-03, ..., -6.7940e-03, + -2.7447e-03, -2.1706e-03], + [ 5.8823e-03, -3.4351e-03, 1.2810e-02, ..., -1.3399e-03, + 1.7090e-03, 7.6027e-03], + [ 1.0025e-02, 6.5842e-03, 1.1444e-02, ..., -5.9242e-03, + -1.4353e-03, -3.4161e-03]])Parameter containing: +tensor([ 0.0180, -0.0964, 0.0243, ..., -0.0159, -0.0454, -0.0301])Parameter containing: +tensor([0.5055, 0.3092, 0.3977, ..., 1.4209, 0.4980, 0.3574])Parameter containing: +tensor([-0.0747, -0.0122, 0.0623, ..., -0.0418, 0.0183, -0.0493])Parameter containing: +tensor([[-0.0067, -0.0268, 0.0142, ..., -0.0009, 0.0202, -0.0156], + [ 0.0101, 0.0105, -0.0008, ..., 0.0012, -0.0004, 0.0251], + [-0.0059, 0.0096, 0.0011, ..., -0.0045, 0.0005, 0.0131], + ..., + [ 0.0016, -0.0027, -0.0004, ..., 0.0003, -0.0022, -0.0065], + [ 0.0005, 0.0114, 0.0169, ..., 0.0032, 0.0011, 0.0202], + [-0.0124, 0.0076, -0.0112, ..., 0.0046, -0.0065, -0.0068]])Parameter containing: +tensor([-0.2451, -0.3083, -0.4565, ..., -0.1675, -0.2117, -0.5532])Parameter containing: +tensor([[ 6.9380e-04, -4.9591e-03, 3.3998e-04, ..., -5.6534e-03, + 4.0131e-03, 1.9054e-03], + [ 2.4014e-03, -1.7365e-02, -4.1771e-03, ..., -9.8419e-04, + 1.3916e-02, -2.5787e-03], + [-2.0340e-02, 7.0419e-03, 4.9667e-03, ..., 9.6846e-04, + -1.9730e-02, 7.8964e-04], + ..., + [-7.2746e-03, 9.3412e-04, 2.4259e-04, ..., -6.7294e-05, + 1.0061e-03, 3.1109e-03], + [-1.4820e-03, -6.7673e-03, -1.0185e-03, ..., 3.6182e-03, + -1.1826e-02, 2.4719e-02], + [ 6.9389e-03, 3.9864e-03, -3.3212e-04, ..., 1.5701e-02, + 7.3318e-03, 7.0572e-03]])Parameter containing: +tensor([ 0.0455, -0.0800, -0.0028, ..., -0.0156, -0.1378, -0.0312])Parameter containing: +tensor([0.6342, 0.5487, 0.3780, ..., 1.3511, 0.4005, 0.4882])Parameter containing: +tensor([-0.0342, -0.0825, -0.0966, ..., -0.0490, 0.0846, -0.2136])Parameter containing: +tensor([[ 2.7573e-02, 1.0155e-02, 5.2223e-03, ..., 6.3057e-03, + -8.5449e-03, -1.4496e-02], + [ 4.0741e-03, 1.8341e-02, -4.6654e-03, ..., -6.0539e-03, + -2.0538e-02, 5.9052e-03], + [ 6.1989e-05, -9.3613e-03, 4.7445e-04, ..., 1.0582e-02, + 9.0256e-03, -1.5945e-02], + ..., + [ 2.3632e-03, 1.7147e-03, 1.2856e-02, ..., 1.9665e-03, + 1.4906e-03, -5.8441e-03], + [-1.9121e-03, 1.6052e-02, 7.6561e-03, ..., 2.6722e-03, + -5.3329e-03, -3.0499e-03], + [-1.9257e-02, -6.6910e-03, 1.0643e-02, ..., -2.6035e-03, + 6.3744e-03, 3.3646e-03]])Parameter containing: +tensor([-0.1843, -0.5454, -0.1458, ..., -0.0142, 0.0038, 0.0057])Parameter containing: +tensor([[-2.3544e-02, 3.3112e-03, -1.4915e-03, ..., -7.5264e-03, + 1.7456e-02, 1.1635e-02], + [ 6.1531e-03, -3.9154e-02, -3.7251e-03, ..., -2.8820e-03, + -2.1454e-02, 1.2619e-02], + [ 5.9624e-03, -9.4299e-03, 1.4954e-02, ..., -1.4839e-02, + 7.3280e-03, -1.1848e-02], + ..., + [ 4.7982e-05, 4.2915e-03, -1.1238e-02, ..., -1.1238e-02, + 1.3962e-03, -1.3695e-03], + [-7.3586e-03, -1.0338e-02, -1.3638e-04, ..., 2.1240e-02, + 1.3512e-02, -2.4395e-03], + [-1.8524e-02, -1.1511e-03, -6.6681e-03, ..., -3.1424e-04, + -3.4256e-03, 3.2120e-03]])Parameter containing: +tensor([-0.0142, -0.0583, 0.0198, ..., 0.0195, -0.1207, 0.0172])Parameter containing: +tensor([0.7431, 0.3526, 0.6107, ..., 2.2615, 0.5052, 0.3920])Parameter containing: +tensor([-0.0007, 0.0244, 0.0183, ..., -0.1535, -0.0343, 0.0142])Parameter containing: +tensor([[ 2.4109e-03, 3.2253e-03, -4.0948e-05, ..., -2.7924e-03, + -6.2485e-03, 6.4964e-03], + [ 1.8692e-03, -1.9464e-03, -2.5692e-03, ..., -8.8959e-03, + -5.5275e-03, -7.2517e-03], + [-4.8370e-03, -2.5986e-02, -6.8359e-03, ..., -7.1068e-03, + 1.1925e-02, 3.2806e-03], + ..., + [ 6.9885e-03, 3.9635e-03, -1.4124e-03, ..., -4.8065e-03, + -1.8377e-03, 8.5258e-04], + [-2.0752e-02, -1.8066e-02, -5.8937e-03, ..., -8.4991e-03, + -1.3115e-02, -6.9733e-03], + [-2.3849e-02, 7.0190e-03, -5.0430e-03, ..., -1.0780e-02, + -5.9013e-03, -8.5068e-03]])Parameter containing: +tensor([-0.0958, -0.1884, -0.1593, ..., -0.2017, -0.3232, -0.3743])Parameter containing: +tensor([[ 0.0077, -0.0076, 0.0042, ..., 0.0185, 0.0244, 0.0145], + [ 0.0102, 0.0012, -0.0082, ..., -0.0322, -0.0016, 0.0077], + [-0.0055, -0.0099, -0.0081, ..., 0.0024, 0.0082, 0.0235], + ..., + [ 0.0053, 0.0035, 0.0003, ..., -0.0044, -0.0019, -0.0058], + [-0.0026, 0.0178, 0.0062, ..., 0.0020, -0.0052, -0.0042], + [-0.0041, 0.0082, 0.0150, ..., -0.0024, 0.0150, -0.0076]])Parameter containing: +tensor([ 0.0141, -0.0796, 0.0049, ..., 0.0710, -0.1786, 0.0413])Parameter containing: +tensor([0.9759, 0.5307, 0.6513, ..., 0.0107, 0.5041, 0.5372])Parameter containing: +tensor([ 0.0589, 0.0290, 0.0214, ..., 0.3877, -0.0775, -0.1199])Parameter containing: +tensor([[ 0.0074, 0.0006, -0.0156, ..., -0.0098, -0.0449, -0.0081], + [-0.0049, -0.0510, 0.0051, ..., -0.0058, -0.0286, 0.0074], + [ 0.0070, 0.0021, 0.0135, ..., 0.0238, 0.0070, -0.0351], + ..., + [ 0.0131, -0.0296, -0.0192, ..., -0.0009, 0.0007, 0.0007], + [ 0.0140, 0.0147, -0.0112, ..., 0.0018, 0.0341, -0.0212], + [ 0.0168, -0.0181, -0.0056, ..., 0.0013, -0.0197, -0.0118]])Parameter containing: +tensor([-0.4622, -0.0086, 0.2756, ..., 0.0269, 0.0068, -0.0022])Parameter containing: +tensor([[-0.0027, -0.0052, 0.0031, ..., -0.0015, -0.0176, -0.0188], + [-0.0074, 0.0147, 0.0008, ..., 0.0245, -0.0242, 0.0237], + [ 0.0041, 0.0037, 0.0004, ..., 0.0177, 0.0124, 0.0109], + ..., + [-0.0073, 0.0058, -0.0050, ..., -0.0073, -0.0063, 0.0020], + [ 0.0157, -0.0413, 0.0109, ..., 0.0118, -0.0392, 0.0283], + [ 0.0064, 0.0013, -0.0097, ..., 0.0003, 0.0149, 0.0117]])Parameter containing: +tensor([ 0.0089, -0.0779, 0.0223, ..., -0.0115, -0.1759, 0.0235])Parameter containing: +tensor([0.7499, 0.4987, 0.7858, ..., 1.1598, 0.6024, 0.5770])Parameter containing: +tensor([ 0.0150, -0.0119, 0.0050, ..., -0.1037, 0.0333, -0.0361])Parameter containing: +tensor([[ 0.0077, -0.0109, -0.0022, ..., -0.0063, 0.0133, 0.0150], + [-0.0096, 0.0191, 0.0149, ..., -0.0078, 0.0161, 0.0103], + [-0.0020, 0.0116, 0.0042, ..., -0.0045, 0.0149, 0.0007], + ..., + [ 0.0186, 0.0082, 0.0246, ..., -0.0084, 0.0029, -0.0158], + [ 0.0175, -0.0043, 0.0002, ..., -0.0078, 0.0047, -0.0143], + [-0.0011, -0.0010, 0.0262, ..., -0.0082, -0.0047, -0.0202]])Parameter containing: +tensor([-0.2437, -0.3796, -0.5195, ..., -0.2163, -0.4231, -0.2202])Parameter containing: +tensor([[-0.0169, 0.0065, -0.0019, ..., -0.0216, 0.0189, 0.0012], + [ 0.0048, -0.0130, 0.0052, ..., -0.0211, -0.0036, -0.0101], + [ 0.0040, -0.0051, -0.0002, ..., -0.0073, -0.0107, -0.0037], + ..., + [-0.0058, -0.0022, 0.0002, ..., -0.0073, -0.0007, 0.0026], + [-0.0098, -0.0155, 0.0002, ..., 0.0191, 0.0043, 0.0222], + [-0.0067, 0.0011, 0.0009, ..., 0.0020, -0.0060, 0.0049]])Parameter containing: +tensor([-0.0154, -0.0523, -0.0401, ..., 0.1025, -0.1436, -0.0176])Parameter containing: +tensor([1.1220, 0.5171, 1.0746, ..., 0.0111, 0.6744, 0.7526])Parameter containing: +tensor([ 0.0830, -0.0924, -0.0048, ..., 0.1117, -0.0385, -0.0674])Parameter containing: +tensor([[-1.6815e-02, 3.0869e-02, 9.5444e-03, ..., -7.8125e-03, + 1.6342e-02, -1.0109e-02], + [-1.2794e-02, 7.7095e-03, -3.9101e-03, ..., -1.1053e-03, + -5.3482e-03, -1.1383e-02], + [ 1.6956e-03, -1.2161e-02, -4.4136e-03, ..., -1.5106e-03, + -1.3565e-02, 6.5117e-03], + ..., + [-1.2596e-02, 1.0803e-02, 5.0116e-04, ..., -3.5954e-04, + -3.2578e-03, -5.4300e-05], + [-1.4236e-02, -4.2572e-03, 1.3161e-02, ..., 1.7285e-05, + -3.1860e-02, -1.3054e-02], + [ 1.2398e-02, 5.1737e-05, 2.3148e-02, ..., -2.7866e-03, + -4.3144e-03, -2.5146e-02]])Parameter containing: +tensor([-0.0057, -1.4600, 0.3438, ..., -0.0042, -0.0107, -0.0046])Parameter containing: +tensor([[ 6.3477e-03, 1.2695e-02, -6.6872e-03, ..., 1.6868e-05, + 1.6006e-02, -1.3306e-02], + [-5.9090e-03, 8.4877e-04, -1.6708e-02, ..., -2.0477e-02, + -1.0666e-02, -1.1078e-02], + [ 2.4246e-02, 1.2558e-02, -1.6769e-02, ..., 6.4697e-03, + 1.2642e-02, -1.8021e-02], + ..., + [ 1.6754e-02, 5.8670e-03, -1.1282e-03, ..., -1.5726e-03, + 1.8406e-03, 1.1803e-02], + [ 5.7335e-03, 7.0724e-03, 1.3092e-02, ..., -1.1902e-02, + 1.6022e-02, -1.1311e-03], + [ 2.0809e-03, -2.6493e-03, -3.7041e-03, ..., -4.9400e-03, + 6.8893e-03, 1.5732e-02]])Parameter containing: +tensor([-0.0091, -0.0477, -0.0098, ..., -0.0483, -0.1364, 0.0059])Parameter containing: +tensor([0.8486, 0.6437, 0.8933, ..., 1.2490, 0.7166, 0.8544])Parameter containing: +tensor([-0.0071, 0.0669, -0.0529, ..., -0.1688, -0.0491, 0.0438])Parameter containing: +tensor([[ 5.3062e-03, 1.2688e-02, 1.0406e-02, ..., -2.4281e-03, + -4.9362e-03, 1.8988e-03], + [-2.9793e-03, -1.5764e-03, 1.6724e-02, ..., 1.1415e-03, + -2.2034e-02, -9.2392e-03], + [-3.9673e-02, -1.2040e-05, 2.4188e-04, ..., 2.8877e-03, + -3.9101e-03, -2.3239e-02], + ..., + [ 8.1329e-03, 1.3290e-02, 2.1637e-02, ..., -6.3057e-03, + -4.2686e-03, -1.4544e-03], + [ 2.7435e-02, 6.1798e-03, 1.0468e-02, ..., 3.2425e-05, + 4.9400e-03, -9.4604e-03], + [ 1.3458e-02, 7.5836e-03, -1.2062e-02, ..., 4.9925e-04, + -9.8419e-03, -1.8356e-02]])Parameter containing: +tensor([-0.4192, -0.2394, -0.3069, ..., -0.3665, -0.2556, -0.1316])Parameter containing: +tensor([[ 0.0160, -0.0017, -0.0088, ..., 0.0165, -0.0056, 0.0135], + [ 0.0050, 0.0226, 0.0044, ..., 0.0111, 0.0021, 0.0038], + [ 0.0148, -0.0083, 0.0003, ..., 0.0085, 0.0015, -0.0004], + ..., + [-0.0031, -0.0009, -0.0014, ..., 0.0004, -0.0025, -0.0012], + [-0.0146, -0.0036, 0.0007, ..., 0.0108, -0.0012, -0.0406], + [ 0.0060, 0.0041, -0.0141, ..., -0.0118, 0.0065, -0.0112]])Parameter containing: +tensor([-0.0417, 0.0127, -0.0229, ..., 0.0725, -0.0144, -0.0360])Parameter containing: +tensor([1.1527, 0.7259, 1.1281, ..., 1.0935, 0.8785, 1.1066])Parameter containing: +tensor([ 0.0413, -0.1707, -0.0308, ..., 0.0418, -0.2141, -0.0075])Parameter containing: +tensor([[ 0.0110, -0.0251, -0.0094, ..., 0.0972, 0.0119, -0.0125], + [-0.0140, -0.0233, 0.0010, ..., -0.1094, -0.0082, -0.0113], + [-0.0004, 0.0120, 0.0042, ..., 0.0518, 0.0182, 0.0130], + ..., + [-0.0162, 0.0175, -0.0176, ..., 0.0016, -0.0075, 0.0305], + [ 0.0311, -0.0070, -0.0240, ..., -0.0003, -0.0044, -0.0165], + [-0.0102, -0.0211, 0.0222, ..., -0.0004, -0.0292, -0.0076]])Parameter containing: +tensor([ 0.2430, 0.3398, -0.1389, ..., 0.0082, -0.0049, 0.0183])Parameter containing: +tensor([[-0.0019, 0.0045, 0.0063, ..., 0.0163, -0.0224, 0.0068], + [-0.0041, 0.0050, -0.0084, ..., -0.0048, -0.0007, 0.0138], + [-0.0173, 0.0011, 0.0050, ..., 0.0098, 0.0256, -0.0074], + ..., + [ 0.0102, -0.0012, -0.0010, ..., -0.0045, 0.0045, 0.0039], + [ 0.0035, -0.0075, 0.0118, ..., -0.0043, -0.0048, 0.0198], + [ 0.0175, -0.0080, -0.0061, ..., -0.0388, -0.0020, 0.0159]])Parameter containing: +tensor([-0.0218, 0.0022, 0.0177, ..., 0.0566, -0.0418, -0.0156])Parameter containing: +tensor([0.8877, 0.8024, 1.0279, ..., 2.0427, 0.9536, 0.9729])Parameter containing: +tensor([-0.1278, -0.0779, -0.0511, ..., -0.2266, -0.0554, 0.0418])Parameter containing: +tensor([[ 0.0282, 0.0064, 0.0033, ..., -0.0064, 0.0038, 0.0184], + [ 0.0007, 0.0181, 0.0098, ..., -0.0025, -0.0096, 0.0182], + [-0.0056, 0.0077, 0.0009, ..., -0.0078, -0.0058, -0.0323], + ..., + [-0.0288, 0.0303, 0.0033, ..., 0.0016, -0.0074, 0.0192], + [-0.0051, -0.0323, -0.0066, ..., -0.0045, 0.0333, 0.0005], + [ 0.0188, 0.0207, 0.0077, ..., -0.0080, -0.0315, -0.0182]])Parameter containing: +tensor([-0.1138, -0.1492, -0.4014, ..., -0.2352, -0.3323, -0.2046])Parameter containing: +tensor([[-1.9287e-02, 2.2621e-03, 6.0921e-03, ..., -3.4676e-03, + -8.6060e-03, -2.0447e-03], + [-6.7711e-05, -3.6869e-03, -5.7602e-03, ..., -1.7380e-02, + 3.8025e-02, -1.9588e-03], + [-1.2627e-02, -4.6158e-03, -6.8207e-03, ..., 1.4572e-03, + -6.2037e-04, -1.3741e-02], + ..., + [ 3.5152e-03, 2.8687e-03, -8.9417e-03, ..., -8.0633e-04, + 7.1335e-03, 3.8662e-03], + [ 1.1139e-02, 7.1411e-03, 7.1297e-03, ..., -1.9293e-03, + -5.5265e-04, 3.8330e-02], + [-8.9264e-03, -5.6114e-03, 2.1210e-03, ..., -1.2589e-02, + -8.9493e-03, 8.3389e-03]])Parameter containing: +tensor([-0.0068, 0.0181, -0.0552, ..., 0.1211, -0.0751, -0.1089])Parameter containing: +tensor([1.1916, 0.9694, 1.2653, ..., 0.1731, 0.9097, 1.1966])Parameter containing: +tensor([-0.0955, -0.1844, -0.0546, ..., 0.2570, -0.0544, 0.0379])Parameter containing: +tensor([[ 0.0093, -0.0152, -0.0200, ..., 0.0346, -0.0043, -0.0287], + [ 0.0025, -0.0165, -0.0050, ..., -0.0740, -0.0039, -0.0172], + [-0.0126, 0.0090, 0.0117, ..., -0.0017, 0.0034, 0.0126], + ..., + [-0.0276, -0.0107, -0.0004, ..., -0.0036, 0.0028, 0.0067], + [ 0.0289, -0.0022, -0.0177, ..., -0.0029, 0.0003, -0.0052], + [-0.0118, 0.0090, 0.0049, ..., -0.0104, 0.0250, 0.0115]])Parameter containing: +tensor([-1.8496, 0.1801, 2.3359, ..., 0.0398, -0.0217, -0.1345])Parameter containing: +tensor([[ 0.0018, -0.0264, 0.0078, ..., 0.0173, -0.0076, -0.0041], + [ 0.0135, -0.0083, 0.0026, ..., 0.0076, 0.0072, -0.0242], + [ 0.0054, 0.0058, -0.0234, ..., -0.0210, -0.0069, 0.0223], + ..., + [-0.0025, 0.0097, -0.0013, ..., 0.0089, 0.0019, 0.0197], + [ 0.0045, -0.0037, 0.0037, ..., 0.0008, 0.0096, -0.0237], + [-0.0100, 0.0123, 0.0061, ..., -0.0153, -0.0145, 0.0152]])Parameter containing: +tensor([ 0.0037, 0.0258, -0.0091, ..., -0.0498, -0.0065, -0.0458])Parameter containing: +tensor([1.1915, 1.0973, 1.2475, ..., 1.4018, 1.1544, 1.1824])Parameter containing: +tensor([ 0.0225, -0.0913, 0.0974, ..., -0.2996, -0.0410, 0.0070])Parameter containing: +tensor([[-0.0168, 0.0096, 0.0042, ..., -0.0020, -0.0020, 0.0089], + [-0.0215, 0.0329, 0.0130, ..., 0.0041, 0.0160, 0.0035], + [-0.0176, -0.0188, 0.0220, ..., 0.0037, -0.0368, 0.0167], + ..., + [ 0.0086, -0.0059, -0.0079, ..., 0.0015, -0.0030, -0.0178], + [-0.0288, -0.0067, 0.0123, ..., -0.0054, -0.0138, -0.0072], + [-0.0190, 0.0143, -0.0290, ..., -0.0286, -0.0196, -0.0011]])Parameter containing: +tensor([-0.1221, -0.2141, -0.4116, ..., -0.1118, -0.1777, -0.3623])Parameter containing: +tensor([[-0.0010, 0.0136, -0.0347, ..., -0.0224, 0.0056, -0.0153], + [-0.0027, -0.0350, 0.0204, ..., 0.0106, -0.0202, -0.0021], + [ 0.0073, -0.0276, -0.0020, ..., 0.0134, 0.0046, -0.0155], + ..., + [-0.0004, -0.0072, 0.0019, ..., -0.0059, 0.0039, 0.0084], + [ 0.0117, -0.0049, -0.0148, ..., -0.0053, 0.0066, -0.0098], + [ 0.0130, 0.0172, 0.0037, ..., 0.0183, -0.0211, -0.0070]])Parameter containing: +tensor([-0.0200, 0.0231, -0.0658, ..., 0.1027, -0.0781, -0.1132])Parameter containing: +tensor([1.2565, 1.1066, 1.2045, ..., 0.5890, 1.0264, 1.2907])Parameter containing: +tensor([-0.0063, -0.0652, 0.0874, ..., 0.1717, 0.1017, -0.0355])Parameter containing: +tensor([[-0.0141, 0.0224, 0.0066, ..., -0.0503, -0.0308, -0.0002], + [-0.0106, -0.0263, 0.0116, ..., -0.0196, -0.0080, 0.0482], + [ 0.0088, -0.0083, 0.0067, ..., 0.0172, -0.0225, 0.0448], + ..., + [ 0.0117, 0.0198, 0.0119, ..., 0.0007, 0.0045, -0.0206], + [ 0.0123, -0.0125, 0.0020, ..., 0.0034, 0.0106, -0.0007], + [ 0.0226, -0.0011, -0.0222, ..., 0.0048, -0.0005, -0.0066]])Parameter containing: +tensor([ 0.0543, -0.0735, 0.2413, ..., -0.0484, -0.1190, 0.0173])Parameter containing: +tensor([[-0.0229, 0.0144, -0.0111, ..., -0.0034, 0.0119, -0.0192], + [-0.0048, -0.0063, 0.0103, ..., -0.0061, 0.0145, 0.0030], + [ 0.0077, 0.0203, 0.0148, ..., -0.0084, -0.0068, 0.0304], + ..., + [-0.0003, 0.0012, 0.0053, ..., 0.0082, -0.0035, 0.0224], + [-0.0080, 0.0013, 0.0045, ..., 0.0091, -0.0064, -0.0116], + [-0.0171, 0.0154, -0.0227, ..., -0.0176, 0.0146, -0.0069]])Parameter containing: +tensor([-0.0177, 0.0686, -0.0156, ..., -0.0817, 0.0255, 0.0177])Parameter containing: +tensor([1.1690, 1.1532, 1.1559, ..., 1.5800, 1.1703, 1.2291])Parameter containing: +tensor([-0.0074, 0.0918, -0.0353, ..., -0.3273, -0.1143, -0.0546])Parameter containing: +tensor([[-0.0204, -0.0119, -0.0051, ..., -0.0030, -0.0053, 0.0117], + [-0.0035, -0.0211, 0.0029, ..., -0.0038, 0.0121, 0.0023], + [ 0.0126, -0.0055, 0.0038, ..., 0.0006, 0.0247, 0.0077], + ..., + [ 0.0121, 0.0132, -0.0259, ..., 0.0031, 0.0226, 0.0040], + [-0.0022, 0.0106, -0.0208, ..., -0.0026, 0.0163, -0.0018], + [-0.0326, 0.0187, 0.0123, ..., -0.0007, -0.0089, 0.0122]])Parameter containing: +tensor([-0.3628, -0.2209, -0.1646, ..., -0.2522, -0.2683, -0.2517])Parameter containing: +tensor([[-0.0055, 0.0073, 0.0131, ..., -0.0030, -0.0204, -0.0067], + [-0.0153, 0.0021, 0.0061, ..., 0.0199, -0.0058, -0.0234], + [-0.0043, 0.0070, 0.0054, ..., 0.0016, 0.0075, -0.0185], + ..., + [-0.0034, -0.0019, 0.0044, ..., -0.0031, -0.0046, 0.0004], + [-0.0026, -0.0267, -0.0127, ..., 0.0038, -0.0151, 0.0075], + [-0.0029, -0.0212, -0.0195, ..., 0.0119, 0.0086, -0.0139]])Parameter containing: +tensor([-0.0283, 0.0284, -0.0328, ..., 0.0670, -0.0050, -0.0489])Parameter containing: +tensor([1.2720, 1.2516, 1.2042, ..., 0.7531, 1.0650, 1.2413])Parameter containing: +tensor([ 0.0019, -0.0140, 0.0240, ..., 0.2147, -0.1253, -0.2114])Parameter containing: +tensor([[-0.0038, -0.0111, 0.0217, ..., -0.0341, 0.0049, 0.0043], + [ 0.0020, 0.0047, 0.0051, ..., -0.0009, -0.0141, 0.0165], + [-0.0086, 0.0055, 0.0177, ..., 0.0030, -0.0044, -0.0111], + ..., + [ 0.0037, 0.0199, -0.0006, ..., -0.0081, 0.0196, -0.0002], + [-0.0116, 0.0020, -0.0122, ..., 0.0042, -0.0016, -0.0110], + [-0.0201, 0.0025, -0.0230, ..., -0.0041, 0.0287, 0.0105]])Parameter containing: +tensor([-0.8447, 0.0093, -1.0840, ..., -0.0142, 0.0109, 0.0013])Parameter containing: +tensor([[-0.0003, 0.0259, 0.0041, ..., 0.0052, -0.0108, 0.0274], + [ 0.0064, 0.0003, -0.0086, ..., -0.0271, 0.0063, 0.0018], + [-0.0234, 0.0012, 0.0170, ..., 0.0238, 0.0096, 0.0125], + ..., + [ 0.0077, 0.0320, 0.0242, ..., 0.0052, 0.0005, 0.0047], + [ 0.0064, 0.0084, 0.0002, ..., -0.0008, 0.0042, -0.0140], + [ 0.0204, -0.0061, -0.0246, ..., 0.0237, -0.0045, 0.0173]])Parameter containing: +tensor([-0.0204, 0.0182, -0.0022, ..., -0.0782, 0.0405, -0.0199])Parameter containing: +tensor([1.1486, 1.2062, 1.1745, ..., 1.6290, 1.1674, 1.2157])Parameter containing: +tensor([-0.0552, 0.0993, -0.0013, ..., -0.1784, -0.0515, -0.0148])Parameter containing: +tensor([[-0.0086, -0.0217, 0.0128, ..., -0.0079, -0.0053, 0.0027], + [-0.0070, 0.0067, 0.0020, ..., -0.0125, -0.0191, -0.0117], + [ 0.0062, 0.0227, 0.0108, ..., 0.0066, 0.0004, 0.0018], + ..., + [ 0.0228, -0.0078, 0.0063, ..., 0.0002, 0.0019, -0.0005], + [-0.0191, 0.0253, 0.0069, ..., -0.0109, -0.0114, -0.0081], + [ 0.0292, -0.0316, -0.0293, ..., -0.0048, 0.0165, -0.0164]])Parameter containing: +tensor([-0.4792, -0.1467, -0.1043, ..., -0.2996, -0.2251, -0.3262])Parameter containing: +tensor([[-0.0207, 0.0158, -0.0021, ..., 0.0083, 0.0042, 0.0273], + [ 0.0063, 0.0024, -0.0061, ..., 0.0069, -0.0269, 0.0042], + [ 0.0299, -0.0060, -0.0002, ..., -0.0130, 0.0070, -0.0297], + ..., + [-0.0122, 0.0011, -0.0082, ..., -0.0026, 0.0038, -0.0006], + [-0.0204, 0.0085, 0.0057, ..., 0.0096, -0.0105, 0.0216], + [-0.0023, 0.0328, 0.0013, ..., -0.0099, -0.0044, 0.0145]])Parameter containing: +tensor([ 0.0411, -0.0040, -0.0516, ..., 0.1114, 0.0086, -0.0609])Parameter containing: +tensor([1.3836, 1.2857, 1.2323, ..., 0.6118, 1.1779, 1.2560])Parameter containing: +tensor([-0.2367, 0.0575, 0.1226, ..., 0.2404, 0.0237, -0.0258])Parameter containing: +tensor([[ 0.0172, -0.0115, -0.0176, ..., 0.0113, -0.0038, 0.0092], + [-0.0095, 0.0104, 0.0087, ..., 0.0204, -0.0187, -0.0189], + [-0.0259, -0.0040, 0.0021, ..., 0.0106, 0.0068, -0.0149], + ..., + [ 0.0015, 0.0207, -0.0071, ..., -0.0045, -0.0049, 0.0017], + [ 0.0381, 0.0040, -0.0079, ..., 0.0003, -0.0011, 0.0140], + [ 0.0094, -0.0019, -0.0035, ..., -0.0018, 0.0271, -0.0058]])Parameter containing: +tensor([ 0.0180, 0.2048, -0.1954, ..., 0.0674, -0.0071, 0.0122])Parameter containing: +tensor([[ 0.0127, -0.0093, -0.0233, ..., -0.0062, -0.0268, -0.0001], + [ 0.0243, -0.0188, 0.0209, ..., -0.0062, -0.0029, -0.0023], + [-0.0073, 0.0052, -0.0273, ..., 0.0022, 0.0090, 0.0104], + ..., + [-0.0117, 0.0100, 0.0137, ..., 0.0083, -0.0002, 0.0034], + [ 0.0061, 0.0166, 0.0235, ..., 0.0010, 0.0024, -0.0336], + [-0.0002, -0.0062, -0.0147, ..., 0.0020, -0.0193, -0.0020]])Parameter containing: +tensor([-0.0233, 0.0391, -0.0073, ..., -0.0649, 0.0291, 0.0002])Parameter containing: +tensor([1.1998, 1.2310, 1.1488, ..., 1.5903, 1.2228, 1.3022])Parameter containing: +tensor([-0.0585, 0.0091, -0.0592, ..., -0.2559, -0.1667, -0.0673])Parameter containing: +tensor([[-1.1040e-02, -1.6754e-02, -2.7451e-02, ..., -4.3964e-04, + 6.0501e-03, 5.5552e-04], + [ 2.4002e-02, -2.4567e-02, 7.3128e-03, ..., -3.2883e-03, + 1.0437e-02, -2.3246e-05], + [ 8.8272e-03, 8.2474e-03, 3.9597e-03, ..., 4.3845e-04, + -7.0724e-03, 2.1725e-03], + ..., + [-1.2598e-03, -9.5901e-03, 1.6785e-02, ..., -1.8721e-03, + -4.9057e-03, 7.3891e-03], + [ 2.6455e-03, 1.1078e-02, -1.6968e-02, ..., -1.1044e-03, + -6.0959e-03, -3.0914e-02], + [ 1.2146e-02, 2.3819e-02, 5.0545e-04, ..., 2.2030e-03, + -4.2877e-03, 1.9012e-02]])Parameter containing: +tensor([-0.1323, -0.2241, -0.0570, ..., -0.2708, -0.3240, -0.0825])Parameter containing: +tensor([[ 9.5673e-03, -1.0620e-02, -2.4261e-02, ..., 3.1433e-02, + -1.2718e-02, -1.2207e-02], + [ 8.1787e-03, 6.4707e-04, 3.0732e-04, ..., -1.3092e-02, + 5.6446e-05, -1.8692e-02], + [ 6.8626e-03, 1.8829e-02, 1.0094e-02, ..., -3.1891e-03, + 9.3765e-03, -3.5896e-03], + ..., + [ 5.6763e-03, 3.4389e-03, -8.8310e-04, ..., 5.5847e-03, + -3.6240e-03, -4.8103e-03], + [ 2.6627e-03, -1.5274e-02, -6.7186e-04, ..., -2.0081e-02, + 1.1981e-04, 1.0040e-02], + [-7.2365e-03, -5.4207e-03, -3.8395e-03, ..., 5.3978e-03, + -2.5177e-02, -2.7252e-02]])Parameter containing: +tensor([ 0.0388, 0.0069, -0.0129, ..., 0.0417, 0.0218, 0.0082])Parameter containing: +tensor([1.4280, 1.3614, 1.2954, ..., 1.0131, 1.1817, 1.3209])Parameter containing: +tensor([-0.1139, 0.0275, 0.0677, ..., 0.1797, 0.0199, -0.2525])Parameter containing: +tensor([[-0.0128, 0.0002, 0.0248, ..., -0.0053, 0.0151, -0.0208], + [-0.0050, 0.0376, -0.0262, ..., 0.0018, 0.0168, 0.0039], + [-0.0110, -0.0084, 0.0369, ..., -0.0102, 0.0071, 0.0117], + ..., + [ 0.0205, -0.0055, -0.0127, ..., -0.0057, -0.0044, 0.0095], + [-0.0154, -0.0017, -0.0012, ..., 0.0026, -0.0132, 0.0012], + [ 0.0082, 0.0055, 0.0048, ..., -0.0060, -0.0069, 0.0101]])Parameter containing: +tensor([-0.0482, -0.0384, 0.5059, ..., -0.0175, -0.0147, 0.0126])Parameter containing: +tensor([[ 0.0060, 0.0181, 0.0104, ..., -0.0065, 0.0091, 0.0008], + [-0.0115, 0.0102, 0.0212, ..., 0.0144, 0.0276, -0.0077], + [ 0.0390, 0.0029, 0.0083, ..., -0.0165, 0.0036, -0.0177], + ..., + [ 0.0054, -0.0068, -0.0049, ..., 0.0166, -0.0177, 0.0042], + [ 0.0016, -0.0031, 0.0076, ..., 0.0091, 0.0008, 0.0024], + [ 0.0034, -0.0059, 0.0107, ..., -0.0199, 0.0139, -0.0083]])Parameter containing: +tensor([ 0.0060, 0.0679, 0.0352, ..., -0.0554, 0.0134, 0.0558])Parameter containing: +tensor([1.2294, 1.2056, 1.1645, ..., 1.8344, 1.1523, 1.2639])Parameter containing: +tensor([-0.0175, -0.0714, -0.1254, ..., -0.2901, -0.1457, 0.1501])Parameter containing: +tensor([[-4.8256e-03, -3.2368e-03, 1.3252e-02, ..., -1.7681e-03, + 2.2354e-02, -1.1436e-02], + [ 4.0253e-02, 3.1097e-02, 1.9760e-02, ..., 5.3787e-03, + -2.2949e-02, 1.4923e-02], + [-1.9789e-05, -2.5848e-02, -1.0681e-02, ..., 1.1975e-04, + 1.0056e-02, 9.3384e-03], + ..., + [ 1.9211e-02, -1.5373e-02, 5.6839e-03, ..., -1.1314e-02, + -4.1748e-02, 1.5808e-02], + [ 2.8934e-03, -1.8179e-04, 8.8425e-03, ..., -2.5787e-03, + -1.7517e-02, -6.8169e-03], + [ 1.7838e-02, -6.3019e-03, -3.8700e-03, ..., 3.0651e-03, + -3.5019e-03, 1.3748e-02]])Parameter containing: +tensor([-0.2314, -0.3215, -0.0737, ..., -0.3018, -0.1614, -0.3069])Parameter containing: +tensor([[ 0.0107, 0.0326, 0.0092, ..., -0.0069, -0.0051, -0.0002], + [-0.0118, 0.0024, 0.0258, ..., 0.0087, -0.0250, -0.0087], + [ 0.0156, 0.0077, 0.0071, ..., -0.0158, -0.0195, 0.0202], + ..., + [-0.0062, 0.0010, 0.0041, ..., -0.0098, 0.0120, 0.0015], + [-0.0022, 0.0381, -0.0009, ..., 0.0051, 0.0093, 0.0135], + [ 0.0090, -0.0229, -0.0135, ..., 0.0130, 0.0066, 0.0037]])Parameter containing: +tensor([0.0371, 0.0197, 0.0018, ..., 0.0558, 0.0674, 0.0106])Parameter containing: +tensor([1.4988, 1.4287, 1.3546, ..., 0.9505, 1.1804, 1.4063])Parameter containing: +tensor([-0.1733, -0.0029, 0.0176, ..., 0.2492, 0.0635, -0.1153])Parameter containing: +tensor([[ 0.0109, -0.0005, 0.0278, ..., 0.0017, -0.0215, 0.0092], + [ 0.0046, -0.0021, 0.0013, ..., 0.0017, -0.0017, 0.0037], + [-0.0275, 0.0318, 0.0133, ..., 0.0012, 0.0040, -0.0225], + ..., + [ 0.0179, 0.0136, -0.0099, ..., 0.0038, 0.0117, 0.0026], + [ 0.0067, 0.0052, -0.0031, ..., -0.0122, 0.0013, -0.0083], + [ 0.0012, -0.0281, -0.0114, ..., -0.0038, -0.0005, -0.0021]])Parameter containing: +tensor([ 0.2450, 1.7920, 0.0699, ..., 0.0484, -0.0464, 0.0208])Parameter containing: +tensor([[ 0.0059, 0.0109, -0.0109, ..., -0.0163, 0.0007, -0.0298], + [-0.0065, -0.0061, -0.0041, ..., -0.0236, -0.0090, 0.0271], + [ 0.0104, 0.0079, 0.0172, ..., -0.0066, 0.0009, -0.0156], + ..., + [ 0.0096, -0.0068, 0.0006, ..., -0.0150, 0.0118, 0.0032], + [ 0.0230, 0.0083, 0.0289, ..., -0.0219, 0.0005, 0.0038], + [ 0.0172, 0.0228, 0.0059, ..., 0.0067, 0.0123, -0.0109]])Parameter containing: +tensor([ 0.0267, -0.0056, -0.0026, ..., -0.0532, -0.0267, 0.0485])Parameter containing: +tensor([1.2355, 1.2508, 1.2161, ..., 1.8124, 1.1440, 1.3011])Parameter containing: +tensor([ 0.1252, -0.0353, 0.1171, ..., -0.1227, -0.0330, 0.1001])Parameter containing: +tensor([[-6.1083e-04, -1.1574e-02, 9.1705e-03, ..., -1.0834e-03, + 8.1482e-03, 3.4065e-03], + [ 1.2016e-02, -1.7960e-02, 3.3379e-03, ..., -8.6365e-03, + -1.7424e-03, -1.5541e-02], + [ 8.0948e-03, -1.1383e-02, -2.7039e-02, ..., 4.2725e-03, + 4.9667e-03, -2.5375e-02], + ..., + [-2.1606e-02, -2.0233e-02, -3.5381e-03, ..., 2.8253e-05, + -1.3222e-02, 7.2975e-03], + [ 2.8515e-03, 1.3855e-02, 1.0794e-04, ..., -3.3092e-03, + -1.4519e-02, 1.1742e-02], + [-1.1467e-02, 1.2001e-02, 1.0672e-03, ..., -3.9520e-03, + -5.1178e-02, 5.3864e-03]])Parameter containing: +tensor([-0.3506, -0.3098, -0.0694, ..., -0.3074, -0.2494, -0.4229])Parameter containing: +tensor([[-0.0110, 0.0193, -0.0116, ..., 0.0081, -0.0065, -0.0187], + [ 0.0120, 0.0100, 0.0045, ..., 0.0063, -0.0106, -0.0092], + [-0.0012, 0.0172, 0.0223, ..., 0.0052, 0.0394, 0.0099], + ..., + [-0.0025, 0.0068, -0.0081, ..., 0.0005, -0.0055, 0.0065], + [ 0.0112, -0.0010, -0.0028, ..., -0.0172, -0.0041, -0.0017], + [ 0.0086, -0.0009, 0.0137, ..., -0.0030, 0.0077, -0.0112]])Parameter containing: +tensor([ 0.0374, -0.0088, -0.0429, ..., 0.0653, -0.0126, -0.0252])Parameter containing: +tensor([1.5180, 1.3799, 1.3971, ..., 0.8399, 1.2614, 1.5007])Parameter containing: +tensor([-0.1863, 0.1162, 0.4045, ..., 0.2292, 0.4198, -0.0957])Parameter containing: +tensor([[-0.0160, 0.0014, -0.0089, ..., 0.0042, 0.0289, 0.0184], + [-0.0010, 0.0089, -0.0017, ..., -0.0152, -0.0108, -0.0008], + [-0.0141, -0.0294, 0.0109, ..., -0.0025, 0.0298, 0.0266], + ..., + [ 0.0245, 0.0203, 0.0064, ..., -0.0128, 0.0092, -0.0031], + [ 0.0211, 0.0237, -0.0171, ..., -0.0165, -0.0070, 0.0133], + [-0.0094, 0.0021, 0.0247, ..., -0.0004, 0.0047, -0.0201]])Parameter containing: +tensor([-0.2849, -0.3125, -0.2026, ..., -0.0510, -0.0885, 0.0077])Parameter containing: +tensor([[ 0.0026, -0.0256, 0.0400, ..., -0.0214, -0.0089, 0.0125], + [-0.0129, -0.0088, 0.0240, ..., -0.0082, -0.0172, -0.0212], + [-0.0200, -0.0017, 0.0146, ..., -0.0077, 0.0185, -0.0176], + ..., + [-0.0275, 0.0012, -0.0196, ..., 0.0564, 0.0609, -0.0008], + [ 0.0161, 0.0054, -0.0016, ..., -0.0032, 0.0063, 0.0044], + [-0.0055, -0.0287, -0.0144, ..., 0.0080, -0.0101, 0.0110]])Parameter containing: +tensor([ 0.0077, 0.0031, 0.0012, ..., -0.0704, 0.0297, 0.0082])Parameter containing: +tensor([1.2799, 1.2512, 1.2956, ..., 2.2034, 1.1719, 1.3681])Parameter containing: +tensor([ 0.1496, -0.0801, -0.0724, ..., -0.1659, -0.0900, 0.0350])Parameter containing: +tensor([[ 0.0067, 0.0413, 0.0378, ..., 0.0076, 0.0142, 0.0230], + [ 0.0130, 0.0156, 0.0179, ..., 0.0018, 0.0109, 0.0043], + [-0.0078, -0.0166, 0.0107, ..., -0.0015, -0.0044, 0.0111], + ..., + [ 0.0138, 0.0290, 0.0173, ..., -0.0042, 0.0035, 0.0030], + [ 0.0160, 0.0144, 0.0156, ..., -0.0038, -0.0043, 0.0115], + [-0.0084, 0.0176, 0.0231, ..., 0.0015, -0.0203, -0.0239]])Parameter containing: +tensor([-0.2788, -0.1956, -0.3853, ..., -0.3225, -0.2610, -0.0354])Parameter containing: +tensor([[-7.0419e-03, -1.1139e-02, -1.7349e-02, ..., 1.0239e-02, + -1.8906e-02, 2.0798e-02], + [ 1.1635e-02, -8.4457e-03, -7.3700e-03, ..., 1.1932e-02, + -1.0246e-02, -1.2451e-02], + [ 1.3153e-02, -2.9282e-02, -1.9894e-03, ..., 4.7760e-03, + -2.7866e-03, -1.2886e-02], + ..., + [-3.2005e-03, 1.4801e-02, -3.5763e-03, ..., 6.4313e-05, + 1.2386e-04, 2.5702e-04], + [-1.8677e-02, -8.7967e-03, 9.5978e-03, ..., 4.4403e-03, + -1.1940e-02, 2.3422e-02], + [-5.8060e-03, -2.0889e-02, -1.2917e-02, ..., -4.3907e-03, + -7.6561e-03, 2.6611e-02]])Parameter containing: +tensor([ 0.0329, 0.0113, -0.0181, ..., 0.0332, 0.0061, -0.0410])Parameter containing: +tensor([1.6208, 1.4989, 1.4207, ..., 0.7640, 1.2692, 1.4951])Parameter containing: +tensor([-0.1374, -0.0238, -0.0117, ..., 0.3347, 0.1457, -0.0975])Parameter containing: +tensor([[ 0.0034, -0.0225, -0.0031, ..., 0.0137, -0.0365, -0.0123], + [-0.0254, -0.0124, 0.0016, ..., -0.0371, 0.0126, -0.0146], + [ 0.0078, -0.0007, -0.0157, ..., 0.0026, -0.0030, 0.0202], + ..., + [ 0.0041, -0.0135, 0.0168, ..., 0.0034, -0.0226, -0.0060], + [-0.0034, -0.0090, 0.0047, ..., -0.0018, 0.0212, -0.0074], + [ 0.0030, 0.0077, -0.0017, ..., -0.0023, -0.0013, 0.0052]])Parameter containing: +tensor([ 0.2683, -0.1324, 0.1324, ..., 0.0209, 0.0130, 0.0207])Parameter containing: +tensor([[ 0.0029, 0.0157, 0.0060, ..., 0.0021, 0.0078, -0.0089], + [ 0.0114, -0.0065, 0.0057, ..., 0.0285, 0.0281, 0.0063], + [-0.0123, -0.0026, 0.0062, ..., 0.0232, -0.0135, -0.0089], + ..., + [-0.0011, 0.0021, -0.0047, ..., -0.0017, -0.0325, -0.0199], + [-0.0076, -0.0072, -0.0037, ..., -0.0192, -0.0359, -0.0052], + [-0.0138, -0.0226, 0.0044, ..., 0.0032, 0.0111, -0.0124]])Parameter containing: +tensor([-0.0062, -0.0194, -0.0133, ..., 0.0403, 0.0331, 0.0198])Parameter containing: +tensor([1.3324, 1.2243, 1.2615, ..., 1.9267, 1.1337, 1.3792])Parameter containing: +tensor([ 0.1416, 0.0005, 0.0165, ..., 0.0163, -0.0729, 0.0122])Parameter containing: +tensor([[ 0.0125, -0.0005, -0.0153, ..., -0.0146, -0.0185, -0.0108], + [-0.0015, 0.0045, 0.0178, ..., -0.0033, 0.0042, -0.0107], + [-0.0038, 0.0046, 0.0096, ..., -0.0217, 0.0142, 0.0295], + ..., + [ 0.0055, -0.0117, -0.0156, ..., -0.0233, -0.0058, 0.0149], + [ 0.0230, -0.0006, 0.0009, ..., 0.0129, 0.0170, 0.0101], + [ 0.0109, 0.0066, -0.0111, ..., 0.0038, 0.0099, -0.0238]])Parameter containing: +tensor([-0.2153, -0.2781, -0.3320, ..., -0.1223, -0.1307, -0.2898])Parameter containing: +tensor([[-0.0037, -0.0316, 0.0164, ..., -0.0018, -0.0181, 0.0028], + [ 0.0187, 0.0093, 0.0047, ..., 0.0109, 0.0137, -0.0057], + [-0.0071, -0.0045, 0.0287, ..., 0.0160, -0.0103, 0.0014], + ..., + [-0.0109, 0.0038, -0.0134, ..., 0.0115, -0.0042, -0.0035], + [-0.0127, 0.0039, 0.0083, ..., 0.0040, -0.0111, 0.0036], + [-0.0054, -0.0070, -0.0073, ..., -0.0131, -0.0262, 0.0085]])Parameter containing: +tensor([ 0.0253, -0.0024, -0.0242, ..., 0.0956, 0.0208, -0.0150])Parameter containing: +tensor([1.6542, 1.5471, 1.5496, ..., 0.4089, 1.3961, 1.6685])Parameter containing: +tensor([-0.2147, 0.1279, 0.3980, ..., 0.3844, 0.3855, -0.2151])Parameter containing: +tensor([[-0.0031, 0.0022, -0.0024, ..., 0.0046, -0.0105, -0.0062], + [-0.0263, 0.0304, 0.0018, ..., -0.0321, 0.0226, 0.0128], + [ 0.0247, 0.0204, 0.0079, ..., -0.0015, 0.0224, -0.0038], + ..., + [-0.0034, 0.0151, -0.0085, ..., 0.0086, -0.0034, 0.0134], + [ 0.0203, -0.0166, 0.0061, ..., 0.0006, -0.0226, -0.0220], + [ 0.0190, -0.0124, -0.0086, ..., -0.0007, -0.0131, -0.0075]])Parameter containing: +tensor([-2.5195, 0.2338, -0.3826, ..., -0.0098, -0.0044, 0.0407])Parameter containing: +tensor([[-0.0303, -0.0037, 0.0023, ..., 0.0084, 0.0081, -0.0136], + [-0.0157, -0.0159, 0.0211, ..., 0.0249, 0.0152, 0.0164], + [-0.0068, -0.0159, 0.0054, ..., 0.0163, 0.0173, 0.0204], + ..., + [-0.0006, 0.0029, -0.0114, ..., -0.0071, 0.0085, 0.0100], + [-0.0033, -0.0080, -0.0203, ..., -0.0054, 0.0173, 0.0025], + [ 0.0112, -0.0027, 0.0068, ..., 0.0061, -0.0080, 0.0069]])Parameter containing: +tensor([ 0.0108, -0.0551, 0.0180, ..., 0.0265, 0.0322, -0.0401])Parameter containing: +tensor([1.2997, 1.2493, 1.3251, ..., 1.4770, 1.1521, 1.3748])Parameter containing: +tensor([ 0.1529, 0.0496, -0.0569, ..., -0.0100, -0.0241, -0.0367])Parameter containing: +tensor([[ 0.0036, 0.0120, 0.0108, ..., -0.0241, -0.0065, -0.0097], + [-0.0012, -0.0218, -0.0014, ..., -0.0027, -0.0102, 0.0113], + [ 0.0043, 0.0098, -0.0035, ..., -0.0025, -0.0057, 0.0186], + ..., + [-0.0025, -0.0322, -0.0135, ..., -0.0106, -0.0206, -0.0133], + [-0.0045, 0.0262, -0.0152, ..., -0.0032, -0.0122, -0.0036], + [-0.0104, 0.0072, -0.0117, ..., -0.0020, 0.0015, -0.0004]])Parameter containing: +tensor([-0.2783, -0.2571, -0.3367, ..., -0.3469, -0.2042, -0.0554])Parameter containing: +tensor([[ 0.0010, -0.0118, 0.0168, ..., 0.0077, -0.0058, 0.0077], + [ 0.0135, -0.0157, -0.0040, ..., -0.0081, 0.0145, -0.0026], + [ 0.0177, -0.0216, 0.0124, ..., 0.0067, -0.0014, -0.0008], + ..., + [-0.0105, 0.0073, -0.0061, ..., 0.0008, 0.0031, -0.0009], + [-0.0176, 0.0078, -0.0194, ..., -0.0242, -0.0090, -0.0050], + [ 0.0074, 0.0026, 0.0208, ..., 0.0214, -0.0106, 0.0054]])Parameter containing: +tensor([ 0.0367, -0.0981, -0.0668, ..., 0.0355, 0.0193, -0.0256])Parameter containing: +tensor([1.7683, 1.6419, 1.7225, ..., 0.6681, 1.5146, 1.7884])Parameter containing: +tensor([-0.1888, 0.3456, 0.1489, ..., 0.4134, 0.4308, -0.1223])Parameter containing: +tensor([[-0.0099, -0.0102, 0.0307, ..., 0.0022, -0.0095, 0.0271], + [-0.0269, -0.0032, -0.0007, ..., -0.0077, -0.0161, -0.0114], + [-0.0016, 0.0004, 0.0387, ..., -0.0108, 0.0132, -0.0004], + ..., + [-0.0021, -0.0089, 0.0141, ..., 0.0007, -0.0084, 0.0113], + [ 0.0048, -0.0126, -0.0221, ..., -0.0024, -0.0106, 0.0105], + [-0.0116, -0.0002, -0.0007, ..., -0.0022, -0.0071, -0.0038]])Parameter containing: +tensor([-0.2299, 0.2384, -0.0945, ..., 0.0501, -0.0047, -0.0003])Parameter containing: +tensor([[-0.0283, -0.0055, 0.0025, ..., -0.0084, 0.0048, -0.0027], + [ 0.0133, -0.0057, -0.0082, ..., -0.0007, -0.0018, -0.0043], + [-0.0129, 0.0132, 0.0077, ..., -0.0180, 0.0022, -0.0036], + ..., + [ 0.0090, 0.0050, -0.0102, ..., -0.0129, -0.0157, -0.0071], + [ 0.0154, -0.0024, -0.0170, ..., 0.0126, -0.0098, 0.0101], + [ 0.0222, 0.0012, -0.0260, ..., -0.0199, -0.0145, 0.0066]])Parameter containing: +tensor([-0.0646, -0.0642, 0.0045, ..., -0.0348, -0.0156, -0.0321])Parameter containing: +tensor([1.3934, 1.3553, 1.4854, ..., 1.8728, 1.3167, 1.4949])Parameter containing: +tensor([ 0.0756, -0.1134, -0.0586, ..., -0.0262, -0.0903, -0.1063])Parameter containing: +tensor([[-7.9727e-03, 4.2458e-03, 1.8478e-02, ..., -1.1673e-02, + 1.3763e-02, -6.2256e-03], + [-1.7181e-02, 1.0742e-02, -4.7760e-03, ..., -3.7718e-04, + -2.2888e-02, -8.6594e-03], + [-4.2701e-04, 2.2446e-02, 1.0483e-02, ..., -4.0817e-03, + -1.7151e-02, -2.6047e-02], + ..., + [-2.9617e-02, -7.5722e-03, 8.4043e-06, ..., -1.1253e-02, + 1.6479e-02, 2.7222e-02], + [-1.2772e-02, 6.8283e-03, 2.5269e-02, ..., -7.0038e-03, + -7.5645e-03, 9.1019e-03], + [ 4.2176e-04, -4.2152e-03, 4.3335e-02, ..., -3.2711e-03, + -1.3786e-02, 1.5656e-02]])Parameter containing: +tensor([-0.3418, -0.2771, -0.3467, ..., -0.3989, -0.2386, -0.2927])Parameter containing: +tensor([[-0.0106, 0.0111, 0.0187, ..., -0.0266, 0.0003, -0.0147], + [ 0.0346, 0.0015, -0.0025, ..., -0.0093, 0.0119, -0.0310], + [-0.0043, -0.0276, 0.0013, ..., -0.0066, 0.0263, 0.0338], + ..., + [-0.0045, 0.0165, 0.0076, ..., 0.0143, -0.0025, -0.0003], + [ 0.0067, -0.0164, 0.0050, ..., 0.0121, -0.0008, -0.0172], + [-0.0008, -0.0125, -0.0156, ..., 0.0319, 0.0113, -0.0105]])Parameter containing: +tensor([-0.0361, -0.0391, 0.0151, ..., -0.0164, 0.0040, -0.0078])Parameter containing: +tensor([2.1112, 2.0118, 2.0347, ..., 0.7085, 1.8153, 2.2010])Parameter containing: +tensor([-0.1631, -0.1508, 0.1484, ..., 0.4431, 0.6810, -0.3282])Parameter containing: +tensor([[-0.0117, -0.0245, -0.0218, ..., 0.0068, -0.0019, -0.0032], + [ 0.0243, 0.0013, -0.0101, ..., 0.0473, -0.0216, 0.0135], + [ 0.0109, -0.0224, -0.0055, ..., -0.0055, -0.0267, -0.0187], + ..., + [-0.0167, 0.0103, -0.0058, ..., -0.0046, -0.0215, 0.0237], + [ 0.0119, -0.0105, 0.0158, ..., 0.0023, -0.0127, -0.0004], + [-0.0119, -0.0276, 0.0225, ..., -0.0024, -0.0047, -0.0064]])Parameter containing: +tensor([ 0.2433, -0.1136, 0.0888, ..., -0.0050, -0.0137, 0.0093])Parameter containing: +tensor([[-0.0302, 0.0129, -0.0099, ..., 0.0201, 0.0093, -0.0045], + [-0.0376, -0.0102, -0.0002, ..., -0.0104, 0.0078, -0.0009], + [ 0.0196, -0.0219, 0.0057, ..., 0.0070, -0.0059, -0.0075], + ..., + [-0.0068, -0.0123, 0.0011, ..., 0.0024, -0.0069, -0.0181], + [ 0.0018, -0.0121, -0.0095, ..., -0.0199, 0.0067, -0.0080], + [-0.0084, 0.0186, 0.0111, ..., -0.0047, 0.0052, 0.0088]])Parameter containing: +tensor([-7.3486e-02, 2.0966e-02, 2.3758e-02, ..., 9.0637e-03, + 1.1623e-05, -1.4076e-02])Parameter containing: +tensor([1.3803, 1.3287, 1.4781, ..., 1.5120, 1.3130, 1.4137])Parameter containing: +tensor([ 0.0475, -0.0935, -0.0597, ..., 0.0320, 0.0142, -0.0661])Parameter containing: +tensor([[ 1.6296e-02, -1.0147e-02, 2.2263e-02, ..., 1.8875e-02, + -4.6844e-03, -1.2360e-02], + [ 1.4534e-02, -1.0414e-02, -2.5024e-02, ..., -1.7578e-02, + -3.4729e-02, -2.3346e-02], + [-2.1347e-02, 2.1301e-02, 3.8509e-03, ..., 8.6441e-03, + 1.4847e-02, -6.3400e-03], + ..., + [-4.7302e-03, -2.5574e-02, 7.4959e-03, ..., 3.6087e-03, + 1.5732e-02, -2.2202e-02], + [-6.3324e-04, 8.2550e-03, -1.3161e-02, ..., 5.1918e-03, + 2.1324e-03, 1.3359e-02], + [-5.0240e-03, 4.4479e-03, -1.5625e-02, ..., 1.8707e-02, + -4.8995e-05, 1.2718e-02]])Parameter containing: +tensor([-0.2683, -0.3921, -0.3276, ..., -0.3716, -0.2025, -0.3127])Parameter containing: +tensor([[-0.0052, -0.0143, 0.0135, ..., 0.0038, 0.0296, -0.0021], + [ 0.0280, -0.0070, -0.0129, ..., -0.0207, 0.0208, 0.0257], + [ 0.0018, -0.0234, 0.0009, ..., 0.0089, -0.0099, -0.0107], + ..., + [ 0.0084, -0.0017, 0.0058, ..., -0.0016, -0.0057, -0.0010], + [ 0.0047, -0.0170, -0.0032, ..., 0.0134, -0.0184, 0.0449], + [-0.0015, -0.0398, -0.0143, ..., -0.0135, 0.0247, 0.0222]])Parameter containing: +tensor([ 0.0358, 0.0342, 0.0543, ..., 0.0743, -0.0069, 0.0033])Parameter containing: +tensor([2.5167, 2.3499, 2.4777, ..., 0.5123, 2.0356, 2.4509])Parameter containing: +tensor([-0.2338, -0.0299, 0.1534, ..., 0.4063, 0.7359, -0.2059])Parameter containing: +tensor([[ 0.0072, -0.0024, -0.0093, ..., 0.0206, -0.0025, -0.0194], + [ 0.0068, 0.0083, 0.0218, ..., -0.0244, -0.0298, 0.0023], + [ 0.0273, 0.0015, -0.0178, ..., -0.0870, 0.0066, -0.0008], + ..., + [ 0.0076, -0.0371, -0.0106, ..., 0.0053, -0.0015, -0.0093], + [-0.0015, 0.0093, -0.0339, ..., -0.0024, 0.0004, -0.0021], + [ 0.0035, -0.0088, 0.0025, ..., -0.0005, 0.0056, -0.0153]])Parameter containing: +tensor([ 0.0615, 0.0997, -0.5298, ..., 0.0029, -0.0045, -0.0547])Parameter containing: +tensor([[ 0.0042, -0.0015, 0.0274, ..., 0.0097, -0.0203, 0.0125], + [-0.0057, 0.0277, 0.0067, ..., 0.0058, -0.0193, 0.0007], + [-0.0005, 0.0042, 0.0217, ..., 0.0109, 0.0060, 0.0009], + ..., + [ 0.0093, -0.0028, -0.0129, ..., 0.0005, 0.0210, -0.0072], + [ 0.0155, 0.0005, 0.0134, ..., -0.0217, -0.0046, 0.0098], + [ 0.0043, -0.0210, -0.0279, ..., -0.0082, -0.0022, 0.0044]])Parameter containing: +tensor([ 0.0168, 0.0077, -0.0467, ..., 0.0064, -0.0126, -0.0271])Parameter containing: +tensor([1.4813, 1.5299, 1.5828, ..., 1.5154, 1.4352, 1.5897])Parameter containing: +tensor([ 0.1628, -0.0896, -0.0374, ..., -0.0098, -0.0610, -0.1625])Parameter containing: +tensor([[-0.0061, -0.0101, 0.0220, ..., 0.0076, -0.0179, -0.0062], + [ 0.0287, 0.0189, 0.0143, ..., -0.0079, 0.0128, -0.0096], + [-0.0176, 0.0025, -0.0220, ..., -0.0191, -0.0070, -0.0005], + ..., + [ 0.0012, -0.0170, -0.0051, ..., -0.0094, -0.0273, 0.0126], + [ 0.0056, -0.0026, 0.0170, ..., 0.0264, -0.0188, -0.0084], + [ 0.0042, 0.0020, 0.0170, ..., -0.0107, -0.0194, -0.0005]])Parameter containing: +tensor([-0.2793, -0.3450, -0.2959, ..., -0.1840, -0.1981, -0.2493])Parameter containing: +tensor([[-1.0500e-03, -1.4481e-02, 9.7084e-04, ..., 3.5362e-03, + -7.4148e-04, 2.0218e-02], + [ 1.8265e-02, -1.7059e-02, -6.9523e-04, ..., -7.4577e-04, + -8.8272e-03, -1.0271e-03], + [ 6.8474e-03, 8.5602e-03, -2.2079e-02, ..., 1.6556e-02, + -1.0653e-03, -2.1194e-02], + ..., + [ 2.0035e-02, -9.2239e-03, 1.4229e-02, ..., -6.3858e-03, + -7.1640e-03, -2.1927e-02], + [ 1.5144e-02, -9.1791e-06, 7.7324e-03, ..., -7.3395e-03, + 3.1433e-03, 9.2697e-03], + [ 7.2021e-03, 2.0950e-02, 8.4610e-03, ..., 9.9106e-03, + -2.2316e-03, -6.6261e-03]])Parameter containing: +tensor([0.0066, 0.0104, 0.0044, ..., 0.0064, 0.0797, 0.0699])Parameter containing: +tensor([2.6710, 2.5634, 2.7691, ..., 0.6788, 2.2533, 2.7433])Parameter containing: +tensor([-0.0355, 0.2774, 0.4173, ..., 0.5667, 0.5320, -0.4676])Parameter containing: +tensor([[ 0.0145, -0.0110, 0.0256, ..., -0.0594, -0.0049, 0.0130], + [-0.0188, -0.0083, -0.0112, ..., 0.0231, -0.0353, 0.0120], + [ 0.0106, 0.0018, -0.0003, ..., 0.0022, 0.0061, 0.0072], + ..., + [ 0.0017, 0.0005, 0.0002, ..., 0.0049, -0.0219, -0.0394], + [ 0.0120, 0.0053, -0.0002, ..., -0.0002, 0.0002, -0.0100], + [-0.0057, 0.0138, 0.0124, ..., -0.0036, -0.0128, 0.0019]])Parameter containing: +tensor([-0.2327, 0.0399, -0.0326, ..., -0.0056, 0.0197, 0.0396])Parameter containing: +tensor([[ 0.0253, 0.0045, 0.0054, ..., 0.0114, -0.0120, 0.0098], + [ 0.0248, -0.0201, -0.0091, ..., -0.0043, -0.0027, 0.0147], + [ 0.0056, 0.0186, -0.0143, ..., -0.0139, -0.0035, -0.0077], + ..., + [ 0.0012, -0.0047, -0.0184, ..., 0.0032, -0.0123, 0.0104], + [ 0.0084, -0.0137, 0.0252, ..., 0.0189, -0.0143, 0.0102], + [-0.0266, -0.0148, -0.0076, ..., 0.0242, -0.0059, 0.0166]])Parameter containing: +tensor([-0.0083, 0.0090, -0.0957, ..., -0.0067, 0.0007, -0.0046])Parameter containing: +tensor([1.5726, 1.4541, 1.5816, ..., 1.7312, 1.4169, 1.5937])Parameter containing: +tensor([ 0.1270, -0.2203, -0.0099, ..., -0.0846, -0.0867, -0.1574])Parameter containing: +tensor([[-0.0049, 0.0010, -0.0008, ..., 0.0092, -0.0068, -0.0089], + [-0.0189, 0.0112, -0.0008, ..., -0.0095, -0.0150, 0.0131], + [-0.0017, -0.0340, 0.0049, ..., -0.0096, 0.0049, -0.0091], + ..., + [ 0.0015, -0.0103, -0.0238, ..., -0.0044, -0.0164, -0.0042], + [ 0.0059, -0.0020, -0.0025, ..., 0.0057, 0.0186, 0.0068], + [ 0.0137, 0.0040, -0.0026, ..., -0.0155, 0.0179, -0.0174]])Parameter containing: +tensor([-0.1598, -0.3298, -0.3064, ..., -0.3005, -0.3159, -0.1328])Parameter containing: +tensor([[-5.0659e-03, -1.4755e-02, 2.9678e-02, ..., -1.4786e-02, + 3.0472e-02, 1.2962e-02], + [ 8.6517e-03, -2.0859e-02, -1.2672e-02, ..., 1.1673e-02, + -2.9373e-02, 4.4823e-03], + [-1.6266e-02, 4.0253e-02, -6.0081e-03, ..., 2.8193e-05, + -1.5068e-02, -1.5480e-02], + ..., + [-1.2833e-02, 1.1993e-02, -5.6553e-04, ..., 5.5046e-03, + 1.6586e-02, -7.4272e-03], + [ 2.9144e-02, 8.1482e-03, -1.4267e-02, ..., 2.8549e-02, + 3.4962e-03, -7.6218e-03], + [ 2.3270e-02, -1.7654e-02, -1.4374e-02, ..., -3.7155e-03, + -1.8509e-02, -3.0289e-03]])Parameter containing: +tensor([ 0.0115, 0.0317, -0.0131, ..., -0.0563, -0.0150, 0.0325])Parameter containing: +tensor([2.7785, 2.7386, 2.7390, ..., 0.8678, 2.4946, 2.8710])Parameter containing: +tensor([-0.1475, -0.0199, 0.2092, ..., 0.4587, 0.5408, -0.2745])Parameter containing: +tensor([[-0.0120, -0.0190, -0.0403, ..., -0.0117, 0.0023, 0.0093], + [ 0.0185, 0.0156, 0.0064, ..., -0.0211, 0.0304, 0.0128], + [-0.0041, -0.0232, -0.0050, ..., -0.0144, -0.0013, 0.0115], + ..., + [-0.0279, -0.0333, 0.0062, ..., -0.0130, -0.0025, 0.0134], + [ 0.0011, 0.0101, 0.0281, ..., -0.0020, 0.0121, 0.0017], + [-0.0162, 0.0049, -0.0176, ..., 0.0049, 0.0010, -0.0232]])Parameter containing: +tensor([-0.0367, -0.0407, -0.0178, ..., 0.0190, 0.0422, 0.0333])Parameter containing: +tensor([[-0.0003, -0.0005, -0.0012, ..., 0.0035, 0.0030, -0.0334], + [-0.0137, -0.0057, 0.0201, ..., -0.0077, -0.0303, -0.0032], + [-0.0225, -0.0056, 0.0133, ..., 0.0120, -0.0059, -0.0108], + ..., + [ 0.0131, 0.0180, -0.0046, ..., 0.0325, -0.0201, -0.0211], + [-0.0181, 0.0204, -0.0102, ..., -0.0033, 0.0038, -0.0071], + [-0.0171, -0.0187, 0.0197, ..., 0.0140, -0.0235, -0.0155]])Parameter containing: +tensor([-0.0430, 0.0264, -0.0948, ..., -0.0741, -0.0225, -0.0398])Parameter containing: +tensor([1.6421, 1.5236, 1.6723, ..., 1.8496, 1.4900, 1.6385])Parameter containing: +tensor([-0.0308, -0.3167, -0.0299, ..., -0.0403, -0.0753, -0.2397])Parameter containing: +tensor([[-0.0262, -0.0157, 0.0032, ..., -0.0235, -0.0293, -0.0457], + [ 0.0032, -0.0053, 0.0187, ..., 0.0109, 0.0186, 0.0035], + [ 0.0222, -0.0173, 0.0017, ..., -0.0083, 0.0020, 0.0136], + ..., + [ 0.0419, 0.0184, -0.0148, ..., 0.0049, 0.0197, -0.0116], + [ 0.0028, -0.0195, 0.0098, ..., 0.0172, -0.0162, -0.0341], + [ 0.0033, -0.0130, -0.0162, ..., 0.0044, -0.0219, -0.0079]])Parameter containing: +tensor([-0.3088, -0.1615, -0.2668, ..., -0.2515, -0.2261, -0.2349])Parameter containing: +tensor([[-0.0020, -0.0129, -0.0182, ..., 0.0192, -0.0327, -0.0179], + [ 0.0028, -0.0038, -0.0319, ..., -0.0060, 0.0162, 0.0273], + [ 0.0273, -0.0120, -0.0101, ..., -0.0013, -0.0036, -0.0072], + ..., + [ 0.0023, -0.0175, -0.0372, ..., 0.0134, 0.0053, 0.0087], + [ 0.0148, -0.0124, -0.0037, ..., 0.0165, -0.0067, 0.0085], + [-0.0011, 0.0023, 0.0334, ..., -0.0055, -0.0134, 0.0073]])Parameter containing: +tensor([ 0.1197, -0.0195, 0.1592, ..., -0.0289, -0.0276, -0.0573])Parameter containing: +tensor([3.1758, 3.2323, 3.2080, ..., 1.1438, 2.6186, 3.2341])Parameter containing: +tensor([-0.3706, 0.0532, -0.3175, ..., 0.1710, 0.3347, -0.2024])Parameter containing: +tensor([[ 2.1271e-02, 1.1780e-02, -6.7997e-04, ..., 6.0921e-03, + -3.5896e-03, -7.0524e-04], + [ 5.5809e-03, -1.0429e-02, -1.2751e-03, ..., 2.9259e-03, + 3.8683e-05, 2.1801e-03], + [-2.0950e-02, 3.2496e-04, -2.4063e-02, ..., -6.4819e-02, + -3.2349e-03, -4.4746e-03], + ..., + [ 1.0742e-02, -3.3913e-03, -1.1414e-02, ..., -2.3003e-03, + 2.1942e-02, 2.1652e-02], + [-1.3786e-02, 1.0185e-02, -4.3068e-03, ..., 9.9850e-04, + 7.5111e-03, 2.2797e-02], + [ 4.1842e-05, 1.5434e-02, -5.5361e-04, ..., 8.8730e-03, + 1.2108e-02, 1.5915e-02]])Parameter containing: +tensor([ 0.0933, -0.0383, -0.0063, ..., 0.0316, 0.0111, -0.0547])Parameter containing: +tensor([[ 0.0098, -0.0170, 0.0142, ..., -0.0043, -0.0189, -0.0120], + [-0.0104, 0.0156, -0.0006, ..., -0.0124, 0.0124, -0.0015], + [ 0.0062, -0.0193, 0.0082, ..., 0.0038, 0.0258, -0.0131], + ..., + [ 0.0200, 0.0005, -0.0168, ..., -0.0123, -0.0038, 0.0042], + [ 0.0073, 0.0172, -0.0105, ..., 0.0243, 0.0107, 0.0142], + [ 0.0281, 0.0236, 0.0068, ..., -0.0044, -0.0162, -0.0067]])Parameter containing: +tensor([ 0.1116, -0.0343, -0.0087, ..., -0.0480, -0.0558, -0.0237])Parameter containing: +tensor([1.5671, 1.5268, 1.5659, ..., 0.7991, 1.4337, 1.7072])Parameter containing: +tensor([ 0.0451, -0.2257, -0.1079, ..., 0.0651, -0.1020, -0.2032])Parameter containing: +tensor([[-0.0205, -0.0153, 0.0058, ..., 0.0221, 0.0021, -0.0075], + [-0.0245, 0.0084, 0.0146, ..., -0.0019, 0.0069, -0.0050], + [ 0.0138, 0.0011, 0.0013, ..., 0.0371, 0.0046, 0.0199], + ..., + [ 0.0202, 0.0034, 0.0115, ..., -0.0043, -0.0131, -0.0177], + [ 0.0054, 0.0023, 0.0239, ..., -0.0273, 0.0084, -0.0098], + [ 0.0170, 0.0049, -0.0137, ..., -0.0036, -0.0199, 0.0079]])Parameter containing: +tensor([-0.2671, -0.2556, -0.2236, ..., -0.2886, -0.2778, -0.0958])Parameter containing: +tensor([[-0.0092, -0.0169, -0.0232, ..., 0.0134, -0.0171, -0.0194], + [-0.0089, 0.0153, -0.0176, ..., -0.0238, 0.0060, -0.0118], + [ 0.0023, -0.0021, 0.0147, ..., -0.0046, 0.0072, 0.0077], + ..., + [-0.0114, -0.0102, 0.0014, ..., 0.0213, 0.0042, -0.0110], + [ 0.0055, -0.0080, -0.0157, ..., -0.0074, -0.0366, 0.0046], + [ 0.0316, 0.0016, 0.0092, ..., -0.0057, -0.0119, -0.0157]])Parameter containing: +tensor([-0.0195, -0.0715, 0.1113, ..., -0.1528, 0.0856, -0.0403])Parameter containing: +tensor([3.1596, 3.0366, 2.9180, ..., 1.5081, 2.5528, 3.1860])Parameter containing: +tensor([ 0.2221, 0.6211, -0.4953, ..., 0.2567, 0.0674, -0.4556])Parameter containing: +tensor([[ 0.0018, 0.0040, -0.0024, ..., 0.0355, -0.0060, -0.0008], + [ 0.0084, 0.0003, -0.0255, ..., 0.0059, 0.0073, 0.0152], + [-0.0014, -0.0324, -0.0615, ..., 0.0012, -0.0250, 0.0104], + ..., + [ 0.0085, -0.0075, 0.0259, ..., -0.0010, 0.0261, -0.0071], + [-0.0081, 0.0082, 0.0233, ..., 0.0083, -0.0072, 0.0106], + [-0.0114, 0.0072, 0.0143, ..., -0.0098, -0.0095, 0.0079]])Parameter containing: +tensor([-0.4348, -0.0032, -0.1772, ..., -0.0727, 0.0069, -0.1202])Parameter containing: +tensor([[ 1.5135e-03, 2.1210e-02, -7.4272e-03, ..., -6.0768e-03, + -2.6584e-04, 1.1932e-02], + [ 1.6260e-04, 3.7441e-03, -1.4343e-02, ..., 1.8158e-02, + -1.2825e-02, -8.9407e-07], + [ 7.9193e-03, -1.8036e-02, 1.1223e-02, ..., -3.8147e-02, + -2.9087e-03, -5.4131e-03], + ..., + [-1.4465e-02, 1.2436e-02, -1.3103e-03, ..., -8.3694e-03, + 1.7273e-02, -9.1934e-04], + [-3.6774e-03, 3.2272e-03, 2.3682e-02, ..., -4.9706e-03, + 7.7705e-03, -1.4359e-02], + [ 1.0548e-03, -1.9181e-04, -1.6556e-02, ..., -1.1215e-02, + -6.9504e-03, -1.4145e-02]])Parameter containing: +tensor([-0.0270, -0.0575, 0.0640, ..., -0.1500, 0.0551, -0.1482])Parameter containing: +tensor([1.5692, 1.8371, 1.9328, ..., 0.8499, 1.4697, 1.7667])Parameter containing: +tensor([0.3892, 0.2379, 0.1540, ..., 0.6268, 0.2169, 0.1550])Parameter containing: +tensor([[-0.0026, 0.0020, -0.0059, ..., 0.0052, -0.0039, -0.0026], + [ 0.0157, -0.0013, -0.0331, ..., 0.0240, -0.0025, -0.0044], + [ 0.0204, 0.0067, -0.0038, ..., -0.0049, 0.0252, -0.0015], + ..., + [ 0.0077, -0.0031, 0.0343, ..., -0.0061, 0.0099, -0.0152], + [-0.0005, 0.0071, -0.0140, ..., -0.0133, -0.0071, 0.0009], + [-0.0012, 0.0004, 0.0090, ..., -0.0095, -0.0076, -0.0047]])Parameter containing: +tensor([-0.3181, -0.2350, -0.2834, ..., -0.2498, -0.1847, -0.2732])Parameter containing: +tensor([[ 0.0366, 0.0088, -0.0167, ..., -0.0147, -0.0033, 0.0076], + [ 0.0050, 0.0103, 0.0122, ..., 0.0073, -0.0067, -0.0138], + [-0.0014, 0.0265, 0.0125, ..., -0.0273, 0.0040, -0.0028], + ..., + [ 0.0138, 0.0236, -0.0157, ..., 0.0255, -0.0269, -0.0320], + [ 0.0021, -0.0071, 0.0076, ..., -0.0042, 0.0137, -0.0034], + [-0.0111, 0.0175, -0.0121, ..., -0.0294, -0.0013, -0.0084]])Parameter containing: +tensor([-0.1592, -0.0064, 0.1971, ..., 0.0551, -0.0191, 0.0068])Parameter containing: +tensor([2.5678, 2.3816, 2.5756, ..., 1.8250, 2.4113, 2.7505])Parameter containing: +tensor([ 0.1836, 0.3324, -0.2291, ..., -0.1089, 0.5930, -0.2813])Parameter containing: +tensor([[ 0.0123, -0.0058, 0.0023, ..., 0.0007, 0.0222, -0.0039], + [-0.0170, 0.0098, 0.0225, ..., -0.0176, -0.0123, -0.0115], + [-0.0039, -0.0008, 0.0077, ..., 0.0050, 0.0103, -0.0090], + ..., + [ 0.0026, 0.0136, -0.0086, ..., 0.0001, -0.0151, -0.0018], + [ 0.0049, 0.0023, -0.0202, ..., 0.0277, 0.0162, -0.0295], + [-0.0040, 0.0099, -0.0184, ..., -0.0405, -0.0316, -0.0159]])Parameter containing: +tensor([ 0.0484, -0.0519, -1.9238, ..., 0.1216, -0.0272, 0.0114])Parameter containing: +tensor([[-1.9836e-02, 4.5563e-02, -5.3253e-03, ..., 7.5989e-03, + 7.5607e-03, 2.2369e-02], + [ 2.1725e-03, 1.5160e-02, -1.8494e-02, ..., 6.2218e-03, + -9.6977e-05, 1.4214e-02], + [-7.4997e-03, 1.7151e-02, -1.4481e-02, ..., -2.2156e-02, + 1.0445e-02, 9.1171e-03], + ..., + [-2.1515e-02, -1.4336e-02, -3.9558e-03, ..., 2.7351e-03, + -3.2997e-03, 2.3087e-02], + [ 1.9348e-02, 1.7441e-02, 4.3488e-03, ..., 6.1913e-03, + -1.8509e-02, 2.2385e-02], + [ 1.6891e-02, 7.5951e-03, -1.6037e-02, ..., -4.5509e-03, + 6.0081e-03, 1.7471e-02]])Parameter containing: +tensor([-0.2128, 0.1381, 0.1891, ..., 0.0071, 0.0607, -0.0499])Parameter containing: +tensor([1.5486, 1.4414, 1.4516, ..., 0.8932, 1.4491, 1.5433])Parameter containing: +tensor([-0.0080, -0.0616, -0.0676, ..., -0.0960, -0.1752, -0.1096])Parameter containing: +tensor([[ 0.0178, -0.0028, -0.0424, ..., -0.0160, -0.0282, 0.0149], + [ 0.0117, 0.0109, -0.0199, ..., 0.0012, 0.0099, -0.0078], + [-0.0045, -0.0047, -0.0147, ..., -0.0005, -0.0079, -0.0103], + ..., + [-0.0171, -0.0022, -0.0138, ..., 0.0246, -0.0203, -0.0171], + [ 0.0086, 0.0179, -0.0107, ..., -0.0160, -0.0177, -0.0097], + [ 0.0428, -0.0029, -0.0069, ..., -0.0147, 0.0129, 0.0242]])Parameter containing: +tensor([-0.2034, -0.6177, -0.2632, ..., -0.2834, -0.4905, -0.3958])Parameter containing: +tensor([[-0.0127, -0.0107, 0.0178, ..., -0.0069, -0.0052, 0.0050], + [ 0.0019, -0.0100, -0.0021, ..., 0.0078, 0.0061, -0.0132], + [-0.0086, -0.0003, 0.0180, ..., 0.0017, 0.0049, 0.0218], + ..., + [-0.0206, -0.0111, -0.0025, ..., -0.0035, 0.0097, 0.0248], + [ 0.0121, -0.0078, 0.0101, ..., -0.0093, 0.0092, -0.0375], + [ 0.0031, -0.0039, 0.0001, ..., -0.0069, 0.0013, 0.0023]])Parameter containing: +tensor([ 0.0218, -0.1331, -0.1234, ..., -0.1169, 0.0630, 0.0916])Parameter containing: +tensor([1.6224, 1.6137, 1.6369, ..., 1.4513, 1.7169, 1.8505])Parameter containing: +tensor([-0.0204, -0.0891, 0.0739, ..., 0.0297, 0.1517, -0.2596])Parameter containing: +tensor([0.9374, 1.0217, 0.9349, ..., 0.8221, 1.0595, 1.0507])Parameter containing: +tensor([-0.0061, 0.1510, -0.0549, ..., 0.2748, 0.0765, 0.0091])Parameter containing: +tensor([[ 0.0016, 0.0020, 0.0002, ..., -0.0013, 0.0008, 0.0015], + [ 0.0042, 0.0029, 0.0002, ..., 0.0010, 0.0015, -0.0012], + [ 0.0018, 0.0007, -0.0012, ..., -0.0029, -0.0009, 0.0026], + ..., + [ 0.0216, 0.0055, -0.0101, ..., -0.0065, -0.0029, 0.0037], + [ 0.0188, 0.0073, -0.0077, ..., -0.0025, -0.0009, 0.0057], + [ 0.0330, 0.0281, 0.0289, ..., 0.0160, 0.0102, -0.0310]])Parameter containing: +tensor([[-0.0109, 0.0096, -0.0035, ..., -0.0010, 0.0115, -0.0039], + [-0.0054, -0.0049, 0.0055, ..., 0.0239, 0.0171, -0.0071], + [ 0.0032, 0.0101, -0.0155, ..., 0.0070, -0.0119, -0.0098], + ..., + [-0.0112, 0.0009, 0.0023, ..., -0.0169, -0.0096, -0.0147], + [ 0.0080, 0.0086, 0.0201, ..., -0.0108, -0.0191, 0.0043], + [-0.0168, -0.0018, -0.0156, ..., 0.0095, 0.0383, 0.0007]])Parameter containing: +tensor([[ 0.0139, 0.0147, -0.0089, ..., -0.0349, -0.0042, -0.0188], + [-0.0586, -0.0059, -0.0179, ..., 0.0012, -0.0068, 0.0254], + [-0.0211, -0.0321, 0.0308, ..., -0.0189, 0.0091, 0.0066], + ..., + [-0.0217, -0.0089, -0.0143, ..., -0.0153, 0.0053, 0.0016], + [-0.0086, -0.0083, -0.0049, ..., 0.0208, -0.0048, -0.0041], + [-0.0087, -0.0024, 0.0105, ..., -0.0037, -0.0148, 0.0030]])Parameter containing: +tensor([-0.2406, 0.1490, 0.4639, ..., -0.0241, 0.0349, -0.0144])Parameter containing: +tensor([[ 3.2120e-03, 5.4474e-03, -1.3733e-02, ..., -8.3351e-04, + -6.3782e-03, 1.3786e-02], + [-9.3231e-03, 9.2888e-04, -1.3893e-02, ..., -1.1345e-02, + 1.7748e-03, -8.9569e-03], + [ 5.6648e-04, 1.0345e-02, 8.1718e-05, ..., 1.3893e-02, + 7.3791e-05, 6.1369e-04], + ..., + [-3.0212e-02, 3.7193e-03, 1.2009e-02, ..., 7.0229e-03, + 8.0566e-03, 1.4572e-02], + [ 6.4421e-04, -1.0941e-02, -6.3133e-03, ..., 5.6953e-03, + -7.6637e-03, -2.9297e-03], + [-4.3526e-03, 4.7607e-03, -6.6528e-03, ..., 7.3853e-03, + 4.3716e-03, 7.4348e-03]])Parameter containing: +tensor([-6.8054e-02, -3.2990e-02, 4.1809e-02, -1.0217e-01, -5.4932e-03, + 4.2877e-03, -3.8681e-03, -4.8370e-02, -2.2751e-02, -2.1248e-03, + 1.4526e-02, -3.3607e-03, 4.7058e-02, 1.4565e-02, -6.0730e-02, + -1.1035e-01, -2.3251e-03, 2.5635e-02, -6.1523e-02, -1.0469e+00, + -3.0869e-02, 6.7078e-02, -1.4503e-02, 2.6855e-02, -7.4646e-02, + 9.9121e-02, 1.1932e-02, -3.7598e-02, -2.9980e-01, 7.1533e-02, + -2.9648e-02, -1.3227e-03, 2.5940e-02, 6.6650e-02, 5.8899e-02, + -2.0615e-02, -3.5889e-02, 1.6830e-02, 1.0229e-01, 5.3040e-02, + 3.8280e-03, -2.3972e-02, 1.2512e-01, -4.3579e-02, 4.3335e-02, + 2.9175e-02, -2.9160e-02, -3.9093e-02, -2.2247e-02, 9.2850e-03, + -9.8511e-02, 3.3478e-02, -4.2023e-02, -3.2043e-02, -4.7394e-02, + 1.1938e-01, 5.7709e-02, 1.2903e-01, -2.9327e-02, -1.1314e-02, + 7.3090e-03, 3.3844e-02, 2.8290e-02, -1.6266e-02, 7.0740e-02, + -7.3486e-02, 5.4413e-02, -6.0120e-03, 5.1651e-03, -1.4600e-01, + -3.6896e-02, 2.1088e-02, 3.6914e-01, 9.0393e-02, -5.8517e-03, + 8.8318e-02, -8.3847e-03, 9.6512e-03, -3.3783e-02, -4.0710e-02, + 3.9703e-02, -2.7776e-04, -3.7262e-02, 8.9539e-02, -7.3853e-02, + -3.6743e-02, 1.1426e-01, 5.7335e-03, 6.6589e-02, 3.0502e-02, + 2.4170e-02, 6.7017e-02, -4.7363e-02, 1.1696e-02, -5.0568e-02, + 3.9001e-02, 1.2695e-02, -3.1647e-02, -4.1016e-02, -6.2683e-02, + -2.7084e-02, -2.3511e-01, 2.4002e-02, 1.0413e-01, 1.2520e-02, + 1.4908e-02, -8.5693e-02, -6.4575e-02, -2.4414e-02, -4.9408e-02, + 3.0045e-02, 2.5436e-02, -4.7333e-02, -3.4576e-02, -2.5772e-02, + 2.5345e-02, 7.3669e-02, 2.6398e-02, -1.2976e-01, 5.1544e-02, + 6.9199e-03, -6.0028e-02, -8.6792e-02, 1.3252e-02, 1.9196e-02, + -1.3283e-02, 1.0910e-02, 3.8025e-02, 7.4120e-03, -2.3865e-02, + -3.4882e-02, 4.7731e-04, -7.3059e-02, -1.1017e-02, -5.8685e-02, + -2.5238e-02, -2.3773e-02, 5.0201e-02, -2.6428e-02, -5.1361e-02, + -7.4219e-02, 4.5624e-02, 5.3192e-02, 1.3208e-01, 4.1931e-02, + 1.5083e-02, -1.1676e-01, 7.9895e-02, 6.4209e-02, 1.0178e-02, + 6.6681e-03, 8.0490e-03, -2.8870e-02, -6.2790e-03, -4.5357e-03, + -7.2266e-02, -6.2744e-02, -4.0955e-02, 1.5533e-02, -2.6749e-02, + 2.5845e-03, -8.1787e-02, -1.1185e-02, 1.2634e-01, -2.3071e-02, + -2.1301e-02, 5.0415e-02, 1.6006e-02, 1.5850e-03, -7.7362e-03, + -2.8809e-02, -1.2871e-02, -1.6708e-02, -1.0777e-03, 2.6367e-02, + -7.3395e-03, -1.2238e-02, 2.3804e-02, -1.8433e-02, 5.7640e-03, + -3.2379e-02, -2.2598e-02, 1.7105e-02, 2.0096e-02, -6.7871e-02, + 3.6926e-02, -3.5248e-02, 1.6699e-01, 4.4495e-02, 1.0643e-02, + 6.1829e-02, -5.8960e-02, -2.0401e-02, 1.4259e-02, 1.8372e-02, + 1.3344e-02, 1.5945e-02, 1.5259e-02, 2.3511e-01, 5.3436e-02, + -2.8702e-02, -3.5767e-02, -7.1533e-02, 2.8320e-02, -3.3447e-02, + -4.7516e-02, -2.3035e-01, -9.4757e-03, 1.0022e-01, -4.8004e-02, + 4.8248e-02, 5.3329e-03, -1.3863e-02, 2.1835e-02, -1.0999e-01, + 4.5776e-02, 6.6772e-02, 1.4633e-02, -7.9956e-02, -2.9129e-02, + 7.8003e-02, 8.9050e-02, 1.2866e-01, 2.3392e-02, 3.8666e-02, + -1.2779e-02, 1.0010e-01, 5.1361e-02, 2.8229e-02, -2.4048e-02, + -8.2031e-02, 2.1572e-03, 1.1932e-02, 4.0558e-02, 1.6785e-02, + -5.2948e-02, 1.3023e-02, -3.5431e-02, 6.7215e-03, 5.4291e-02, + 1.8219e-02, 6.2439e-02, 9.8724e-03, 1.6693e-02, 3.9520e-02, + 1.7761e-02, -6.9952e-04, -7.2388e-02, -2.2934e-02, -3.5400e-02, + -5.9448e-02, -6.7566e-02, 1.5945e-02, -4.6814e-02, 1.3969e-02, + -1.1818e-02, -3.3112e-02, 1.5488e-03, 8.3618e-02, 2.3468e-02, + 2.1317e-02, -1.1615e-01, 7.6752e-03, 1.2589e-02, 2.4185e-02, + 4.6021e-02, 7.3662e-03, 4.4403e-02, -4.4785e-03, -5.6610e-03, + 4.7180e-02, 8.5144e-03, 2.9205e-02, -3.5370e-02, 6.6956e-02, + -5.9204e-03, 1.6235e-02, 5.1819e-02, -3.0167e-02, -4.4739e-02, + -3.5229e-03, -1.7444e-01, 1.3969e-02, 9.8343e-03, 1.5022e-02, + 2.7435e-02, 1.0309e-01, 6.2141e-03, 6.8848e-02, -3.3936e-02, + -8.7036e-02, -4.8737e-02, -6.1218e-02, -3.6224e-02, -5.8651e-04, + -2.9316e-03, 2.5574e-02, 4.9934e-03, -6.0394e-02, -1.1604e-02, + 6.9641e-02, -2.2621e-03, -3.5405e-04, 9.9121e-02, -5.4199e-02, + 5.6976e-02, 7.6782e-02, 3.0914e-02, 5.4718e-02, 8.3374e-02, + -5.9776e-03, 1.4679e-02, -8.5449e-02, -2.2125e-02, 1.2827e-03, + 2.2003e-02, -1.9577e-02, 1.0840e-01, 1.0445e-02, -3.1006e-02, + -1.0608e-01, -2.5063e-03, -3.9398e-02, 4.1473e-02, 9.4531e-01, + -1.7480e-01, 2.6947e-02, 9.5459e-02, -9.0866e-03, -1.5762e-02, + 9.8022e-02, 4.6143e-02, 2.5925e-02, -1.1609e-01, -2.6764e-02, + -3.0731e-02, -6.2469e-02, -2.6154e-02, 1.9211e-02, -1.2093e-02, + 2.5696e-02, 2.4597e-02, 3.1036e-02, -3.3356e-02, -3.4210e-02, + -6.2656e-04, 1.9779e-03, 3.5645e-02, 1.4175e-02, 6.9763e-02, + 3.1395e-03, 1.4519e-02, -7.8506e-03, 1.6876e-02, 1.1185e-02, + -2.8137e-02, -8.9233e-02, 7.1899e-02, 9.4528e-03, 1.0254e-02, + -2.3453e-02, -4.9194e-02, 5.1880e-02, -3.7750e-02, -2.7008e-03, + -2.6794e-02, 3.9001e-02, 2.9114e-02, 9.4360e-02, -3.6469e-02, + 8.0322e-02, -2.8580e-02, -7.2327e-02, 7.2632e-02, 6.1340e-02, + -9.0576e-02, -5.8823e-03, 3.3722e-02, -1.2524e-01, -2.5284e-02, + -5.0812e-03, -5.8228e-02, -1.0323e-02, -5.1971e-02, 1.4854e-02, + 6.3660e-02, -2.4357e-03, 6.1676e-02, 1.5701e-02, -6.9763e-02, + -5.1918e-03, 1.1115e-01, 9.1370e-02, -3.6392e-03, -5.7648e-02, + 3.8075e-04, 1.0559e-02, 6.3477e-02, -3.2623e-02, -3.0762e-02, + -5.0697e-03, -5.0171e-02, 7.5806e-02, 3.7018e-02, 1.3802e-02, + 6.2317e-02, 7.5317e-02, 6.1707e-02, 9.2346e-02, -1.6541e-02, + -4.5349e-02, -1.5888e-03, 8.4412e-02, -1.1151e-01, 1.6040e-01, + -4.4531e-01, -2.8351e-02, 7.9498e-03, 6.2927e-02, 3.2043e-02, + 6.7627e-02, 6.7810e-02, -2.2354e-02, -6.8848e-02, -2.7359e-02, + -1.2466e-02, -1.7288e-02, 3.1219e-02, -1.2909e-02, -7.4768e-02, + -8.1726e-02, 6.1676e-02, -2.0020e-02, -3.9597e-03, 1.9165e-01, + -5.9296e-02, 1.4763e-02, 2.3895e-02, 9.0332e-03, 1.3268e-02, + -2.5528e-02, 3.5217e-02, -2.2583e-02, 1.4984e-02, 1.0956e-02, + -3.8223e-03, -3.0579e-02, 2.8114e-03, -5.1056e-02, 8.6426e-02, + -3.9795e-02, -1.8005e-02, -9.5886e-02, 1.1017e-02, -6.2225e-02, + -3.1982e-02, 5.2765e-02, 3.7811e-02, 3.0155e-03, -3.3447e-02, + 2.3098e-03, 3.8300e-02, -7.0724e-03, 2.4094e-02, 3.9856e-02, + 5.1003e-03, -2.6169e-02, 1.3672e-02, 1.8967e-02, -7.4829e-02, + -2.9785e-02, -1.1353e-01, 8.1787e-03, -1.0760e-01, 1.2680e-02, + -9.7733e-03, -7.5684e-02, 4.1504e-03, 1.5175e-02, -5.3925e-02, + -6.9885e-03, -8.6731e-02, -1.7380e-02, -4.2175e-02, -3.3630e-02, + -7.9041e-02, -9.5886e-02, 9.3384e-02, -1.5327e-02, 1.0315e-02, + 7.7896e-03, -2.7298e-02, -3.5278e-02, -2.7573e-02, -6.9214e-02, + 1.7685e-02, 7.1106e-03, 1.8295e-02, 3.5522e-02, 3.6438e-02, + 3.7842e-02, -8.8959e-03, 3.0457e-02, -2.2018e-02, 6.5918e-02, + 1.4091e-02, -8.5144e-02, 3.9093e-02, -3.1250e-02, 3.2898e-02, + 4.5349e-02, -4.8981e-03, -2.3346e-02, -1.4252e-02, -3.4973e-02, + -7.4959e-03, 1.8967e-02, 8.8043e-03, 1.5701e-02, -3.9612e-02, + -8.4610e-03, -7.1411e-02, -1.5762e-02, 1.0944e-01, -4.9042e-02, + 1.1520e-02, -5.0964e-02, -5.5511e-02, 7.5245e-04, -2.2736e-02, + 1.3863e-02, -9.8938e-02, -5.9631e-02, -1.8616e-02, 1.4084e-02, + 2.2812e-02, 3.1342e-02, 2.8580e-02, 2.3155e-03, -5.0201e-02, + 2.8488e-02, 3.7354e-02, 2.1378e-02, -3.1708e-02, 9.5703e-02, + -7.6050e-02, 5.0781e-02, 7.0915e-03, 5.2368e-02, -2.5894e-02, + -2.5925e-02, -3.4943e-02, 2.7786e-02, 1.8723e-02, 5.9296e-02, + 1.9211e-02, 4.8889e-02, 6.6772e-02, -4.4952e-02, -2.7298e-02, + 3.8567e-03, -1.3252e-02, -6.6467e-02, 1.8066e-02, -3.2288e-02, + 5.1239e-02, -5.8075e-02, -5.9509e-02, 1.2122e-01, 2.5482e-02, + 9.1003e-02, 6.6467e-02, -3.9154e-02, -2.9022e-02, -4.0100e-02, + 3.3295e-02, -3.6469e-02, -5.4413e-02, 4.5258e-02, -4.5929e-02, + -1.8219e-02, -6.0638e-02, 1.9638e-02, 5.7602e-03, -3.3234e-02, + -2.8839e-03, 3.9215e-02, -4.5990e-02, 4.1229e-02, 6.1951e-03, + 5.2734e-02, 4.0497e-02, -7.1594e-02, -6.1554e-02, -4.0253e-02, + -6.9199e-03, 2.4307e-02, 4.0863e-02, -9.3307e-03, -4.0527e-02, + 3.4088e-02, 1.7509e-03, 2.4307e-02, -4.4983e-02, 4.6875e-02, + -7.0496e-02, 8.0872e-02, 3.1891e-02, -3.9825e-02, 3.0853e-02, + 6.2195e-02, 6.9885e-02, 7.8430e-02, 5.4398e-03, -7.7209e-02, + -2.5879e-02, 5.3528e-02, 1.0399e-02, 2.3384e-03, 1.1877e-01, + 7.5500e-02, -1.1719e-02, -3.9795e-02, -3.1143e-02, -1.6998e-02, + -3.6163e-02, -2.7847e-02, 5.0812e-03, -2.8656e-02, 3.3203e-02, + 6.3049e-02, -2.5665e-02, -5.2490e-02, 4.9500e-02, 5.5054e-02, + -2.0462e-02, 3.9825e-02, 5.5908e-02, 3.5583e-02, 6.1066e-02, + 1.4175e-02, -5.0751e-02, -6.8848e-02, -1.0551e-02, -5.2551e-02, + 6.3538e-02, 4.2419e-02, 1.3580e-03, -6.4880e-02, 2.7115e-02, + 2.2491e-02, -1.0284e-01, -5.1208e-02, -9.2468e-03, 3.0960e-02, + 4.0161e-02, 4.1107e-02, 4.2908e-02, -4.8920e-02, -6.6757e-03, + -1.2726e-02, 5.1849e-02, 3.6041e-02, -2.0264e-02, -3.6285e-02, + -2.1423e-02, 4.6692e-03, -1.8871e-04, -3.7018e-02, 4.8615e-02, + -3.8452e-02, -2.3209e-02, 8.7585e-02, -3.8757e-03, 4.6265e-02, + -3.4790e-03, -1.3857e-03, -3.9612e-02, 7.3608e-02, 3.4370e-03, + 3.3798e-03, 3.3374e-01, -1.3329e-02, -1.2596e-02, 1.2451e-02, + 4.9706e-03, -7.4585e-02, -5.8105e-02, 3.9215e-03, 5.8823e-03, + 4.3610e-02, 6.9275e-02, -5.4535e-02, -2.2919e-02, 3.4271e-02, + 8.8013e-02, 1.5236e-02, 1.9028e-02, 2.9572e-02, -4.9362e-03, + -1.6998e-02, 2.4063e-02, -6.8359e-02, -4.0710e-02, -8.0750e-02, + -2.1484e-02, -4.3976e-02, -5.2521e-02, 1.6144e-02, -1.3771e-02, + -7.4615e-03, -8.8318e-02, 3.7750e-02, 2.3937e-03, 4.7668e-02, + 4.7363e-02, 3.9520e-02, -2.2736e-02, 1.9348e-02, 2.7359e-02, + 1.1086e-02, -7.9163e-02, -3.7262e-02, -3.1525e-02, -4.9591e-02, + -5.1056e-02, -3.1830e-02, -5.1575e-02, 6.5491e-02, 2.6031e-02, + 1.2321e-03, 5.7800e-02, 1.0864e-02, 4.7241e-02, 4.1290e-02, + -6.8665e-02, -3.8471e-03, 5.8838e-02, -9.4986e-03, 2.6894e-03, + 5.6854e-02, 7.0862e-02, -3.1311e-02, 1.4397e-02, -7.6065e-03, + -5.2429e-02, -3.7018e-02, -1.4549e-02, 2.1553e-03, 1.0292e-02, + -2.3651e-02, -5.5809e-03, 5.0774e-03, -4.6051e-02, 1.0658e-02, + 8.3847e-03, 2.6440e-01, -9.6741e-02, 8.6365e-02, -1.8860e-02, + 1.4420e-02, 1.4282e-02, -1.6235e-02, -3.0167e-02, 4.7363e-02, + -5.3741e-02, 3.7170e-02, 1.0132e-02])Parameter containing: +tensor([1.8401, 1.6507, 1.7948, 1.7800, 1.7653, 1.7676, 1.8600, 1.8236, 1.7518, + 1.7366, 1.7352, 1.7177, 1.7873, 1.7211, 1.6886, 1.7202, 1.7312, 1.7573, + 1.6900, 1.7321, 1.8321, 1.7103, 1.7793, 1.7726, 1.8100, 1.7619, 1.7042, + 1.7339, 0.9977, 1.7922, 1.6763, 1.8729, 1.7377, 1.7911, 1.7570, 3.3400, + 1.7528, 1.8010, 1.7648, 1.7912, 1.8502, 1.7852, 1.8444, 1.7257, 1.8150, + 1.8192, 1.7501, 1.7457, 1.6584, 1.8334, 1.7475, 1.7645, 1.7709, 1.7155, + 1.7535, 1.7625, 1.7556, 1.5602, 1.7653, 1.7121, 1.8275, 1.8177, 1.7712, + 1.8210, 1.7243, 1.6933, 1.7744, 1.6863, 1.8974, 1.7603, 1.5926, 1.7837, + 1.7453, 1.6028, 1.7849, 1.7217, 1.8000, 1.7972, 1.6751, 1.7951, 1.7870, + 1.7679, 1.7635, 1.7367, 1.7646, 1.7491, 1.8091, 1.8008, 1.7609, 1.6659, + 1.7907, 1.7056, 1.8515, 1.8243, 1.7269, 1.7128, 1.7645, 1.8122, 1.8631, + 1.7797, 1.7646, 0.7897, 1.6948, 1.7812, 1.6742, 1.7120, 1.6592, 1.7276, + 1.7695, 1.7760, 1.7505, 1.7188, 1.7696, 1.7601, 1.8497, 1.7613, 1.7714, + 1.7569, 1.6808, 1.7755, 1.7906, 1.7683, 1.7978, 1.7816, 1.7833, 1.7623, + 1.7604, 1.7623, 1.6551, 1.7792, 1.7867, 1.7288, 1.7727, 1.6616, 1.8281, + 1.8197, 1.7334, 1.7790, 1.7913, 1.7861, 1.7655, 1.8017, 1.7967, 1.8031, + 1.7339, 1.7511, 1.7536, 1.7109, 1.8131, 1.7464, 1.7341, 1.8073, 1.8755, + 1.7642, 1.7574, 1.7459, 1.7882, 1.6927, 1.7318, 1.7242, 1.7431, 1.7230, + 1.7328, 1.7952, 1.7462, 1.7525, 1.7464, 1.7703, 1.7193, 1.8042, 1.8187, + 1.7647, 1.7742, 1.6977, 1.7334, 1.7333, 1.7343, 1.7863, 1.6834, 1.7995, + 1.6882, 1.7592, 1.7986, 1.7490, 1.7527, 1.7209, 1.7338, 1.7539, 1.8268, + 1.8150, 1.7164, 1.7983, 1.6627, 1.7415, 1.6635, 1.6967, 1.7464, 1.8372, + 1.7354, 1.7487, 1.7584, 1.7058, 1.6908, 1.7519, 1.7761, 1.5736, 1.2228, + 1.7728, 1.7046, 1.7144, 1.7717, 1.8086, 1.7980, 1.7459, 1.8135, 1.8093, + 1.7349, 1.7571, 1.7937, 1.7435, 1.7194, 1.7525, 1.7763, 1.7040, 1.7347, + 1.8036, 1.7626, 1.7879, 1.7484, 1.7946, 1.7265, 1.7901, 1.7673, 1.7644, + 1.7181, 1.7174, 1.8284, 1.7356, 1.8238, 1.6911, 1.7599, 1.7935, 1.7056, + 1.7744, 1.8074, 1.6990, 1.7394, 0.8243, 1.6952, 1.8240, 1.8201, 1.7702, + 1.8529, 1.4891, 1.8102, 1.7815, 1.7122, 1.7379, 1.7450, 1.7833, 1.7664, + 1.6962, 1.7362, 1.6765, 1.7450, 1.6934, 1.7679, 1.7699, 1.7879, 1.7363, + 1.7685, 1.6439, 1.7475, 1.7023, 1.7775, 1.8394, 1.7594, 1.7362, 1.7523, + 1.7827, 1.7071, 1.8144, 1.6561, 1.7740, 1.7412, 1.6781, 1.7144, 1.7361, + 1.7804, 1.8236, 1.9153, 1.7353, 1.7440, 1.7159, 1.7715, 1.7451, 1.8004, + 1.8078, 1.7116, 1.7427, 1.8237, 1.8078, 1.7338, 1.6746, 1.7068, 1.8114, + 1.7397, 1.7601, 1.7596, 1.6326, 1.5706, 1.8009, 1.6958, 1.7471, 1.7363, + 1.8414, 1.7399, 1.7021, 1.6456, 1.7571, 1.6864, 1.7856, 1.8161, 1.7776, + 1.7265, 1.6645, 1.5918, 1.7744, 1.7140, 1.6622, 1.7617, 1.8093, 1.8087, + 1.7346, 1.7258, 1.6232, 1.7583, 1.8145, 1.7119, 1.7761, 1.7462, 1.6483, + 1.7339, 1.7611, 1.6985, 1.8347, 1.6987, 1.7323, 1.7389, 1.7299, 1.7391, + 1.7672, 1.7488, 1.7839, 1.7665, 1.7631, 1.7777, 1.6339, 1.6938, 1.8157, + 1.7464, 1.6534, 1.7763, 1.7792, 1.7784, 1.7034, 1.6523, 1.7126, 1.7317, + 1.7333, 1.7193, 1.7667, 1.7845, 1.7588, 1.7258, 1.7824, 1.7037, 1.6128, + 1.7279, 1.7288, 1.7944, 1.6676, 1.7848, 1.7242, 1.7464, 1.7064, 1.7405, + 1.7550, 1.7883, 1.6747, 1.8077, 0.6920, 1.7930, 1.6939, 1.5853, 1.7549, + 1.7464, 1.7373, 1.7662, 1.7668, 1.7981, 1.7842, 1.7398, 1.8111, 1.6967, + 1.6228, 1.7450, 1.6924, 1.7361, 1.8465, 1.6907, 1.7601, 1.7693, 1.7728, + 1.7786, 1.9575, 1.7447, 1.7650, 1.7740, 1.7779, 1.7729, 1.6935, 1.8035, + 1.8343, 1.7179, 1.8117, 1.7885, 1.7588, 1.6829, 1.7721, 1.7336, 1.6843, + 1.7179, 1.6835, 1.7717, 1.6974, 1.7616, 1.7698, 1.7382, 1.7673, 1.7327, + 1.7272, 1.7410, 1.8681, 1.7535, 1.6609, 1.7775, 1.7975, 1.6045, 1.6706, + 1.6781, 1.7280, 1.7294, 1.7144, 1.7807, 1.7379, 1.6965, 1.6218, 1.7630, + 1.7662, 1.7186, 1.6750, 1.7716, 1.8142, 1.7681, 1.7514, 1.7375, 1.7470, + 1.6814, 1.7563, 1.7806, 1.6861, 1.7027, 1.7264, 1.7811, 1.7573, 1.6235, + 1.7568, 1.7646, 1.7491, 1.7098, 1.7414, 1.7587, 1.7533, 1.7227, 1.6833, + 1.7702, 1.7296, 1.7516, 1.7460, 1.6894, 1.7191, 1.7573, 1.7020, 1.7415, + 1.7898, 1.7779, 1.7721, 1.6386, 1.7686, 1.7720, 0.9429, 1.7820, 1.7034, + 1.7651, 1.7667, 1.7587, 1.8170, 1.6245, 1.7726, 1.7918, 1.7986, 1.6991, + 1.7649, 1.6397, 1.7385, 1.6653, 1.7968, 1.6853, 1.6544, 1.7029, 1.7351, + 0.9336, 1.7428, 1.7244, 1.7644, 1.7481, 1.7767, 1.8191, 1.7117, 1.7716, + 1.5829, 2.0531, 1.7985, 1.7173, 1.7007, 1.7612, 1.7118, 1.7456, 1.7821, + 1.7097, 1.7723, 1.5083, 1.5076, 1.6904, 1.7302, 1.8118, 1.7102, 1.7321, + 1.7028, 1.7465, 1.8067, 1.8033, 1.6827, 1.3774, 1.7701, 1.7287, 1.6279, + 1.7535, 1.6996, 1.7529, 1.7969, 1.7698, 1.7735, 1.7865, 1.7989, 1.8025, + 1.7413, 1.7794, 1.7462, 1.8258, 1.5884, 1.7895, 1.7249, 1.7411, 1.7231, + 1.7466, 1.7880, 1.8221, 1.7345, 1.5172, 1.5861, 1.7265, 1.1932, 1.8105, + 1.7271, 1.7999, 1.7737, 1.7159, 1.7391, 1.5650, 1.0842, 1.6609, 1.7516, + 1.6644, 1.6834, 1.7752, 1.7820, 1.7207, 1.7830, 1.7473, 1.6890, 1.7324, + 1.7598, 1.8085, 1.7870, 1.6808, 1.7300, 1.6913, 1.8049, 1.6898, 1.7118, + 1.5790, 1.7330, 1.7928, 1.7111, 1.7586, 1.7335, 1.7591, 0.8876, 1.7496, + 1.6044, 1.8343, 1.7631, 1.8049, 1.7443, 1.6705, 1.8017, 1.7247, 1.7761, + 1.7494, 1.6933, 1.7185, 1.7567, 1.7555, 1.3987, 1.8214, 1.7133, 1.7056, + 1.7376, 1.6929, 1.6529, 1.7626, 1.7944, 1.7649, 1.7880, 1.8361, 1.7461, + 1.6423, 1.7435, 1.7214, 1.7357, 1.7379, 1.7925, 0.8717, 1.7399, 1.7446, + 1.7784, 1.6280, 1.7529, 1.8482, 1.8067, 1.7008, 1.7553, 1.7262, 1.6551, + 1.8407, 1.7429, 1.7241, 1.7173, 1.6979, 1.6624, 1.7336, 1.7440, 1.7721, + 1.7816, 1.8321, 1.8003, 1.6417, 1.7096, 1.7309, 1.7556, 1.7408, 1.7249, + 1.7821, 1.8816, 1.7762, 1.7795, 1.7538, 1.7687, 1.8382, 1.7207, 1.7429, + 1.7809, 1.7654, 1.7727, 1.7137, 1.7231, 1.7254, 1.7581, 1.9079, 1.7838, + 1.8562, 1.7747, 1.7307, 1.7510, 1.6652, 1.7962, 1.8084, 1.6713, 1.7405, + 1.7645, 1.7221, 1.7829, 1.5737, 1.6718, 1.8124, 1.7877, 1.7250, 1.8048, + 1.7633, 1.6581, 1.7901, 1.8515, 1.7576, 1.7432, 1.7884, 1.6867, 1.6636, + 1.7865, 1.7576, 1.7160, 1.7054, 1.7541, 1.7233, 1.7555, 1.7447, 1.7340, + 1.9150, 1.5423, 1.6653, 1.7190, 1.7784, 1.7623, 1.7703, 1.7078, 1.7105, + 1.7365, 1.8146, 1.7623, 1.7655, 1.6682, 1.8041, 1.7280, 1.7973, 1.7215, + 0.8671, 1.7118, 1.7316, 1.8002, 1.7620, 1.7976, 1.7504, 1.7343, 1.7921, + 1.7943, 1.8092, 1.7626])Parameter containing: +tensor([-6.3961e-02, -1.8907e-01, -6.6267e-02, 1.1582e-01, -1.9398e-02, + -7.6774e-02, -9.2488e-02, 1.5014e-01, -1.5402e-01, 5.3630e-02, + -2.6058e-01, 1.6292e-01, -6.2387e-02, -2.1546e-01, -2.1034e-02, + 1.8510e-01, 2.2416e-01, -8.3732e-02, 6.8939e-02, -1.7281e-01, + 6.4749e-02, -1.6842e-02, -1.8385e-02, -2.3905e-01, -6.9280e-02, + -4.7777e-02, 2.9941e-01, 1.0106e-01, -7.4397e-01, -5.8690e-02, + -1.4871e-01, 1.6635e-01, 1.8614e-01, 5.2980e-02, 1.5625e-03, + 1.1102e+00, 1.6345e-01, 2.9891e-01, 5.7778e-02, 8.6953e-02, + 1.0396e-02, 3.3394e-02, -1.1215e-03, -2.0671e-01, 3.9431e-02, + 3.0618e-01, -9.5164e-02, -1.0811e-01, 3.2754e-01, 2.4226e-01, + -5.1117e-02, 7.2014e-02, 5.2677e-02, 5.2326e-02, 4.5423e-01, + 9.4081e-03, 1.0807e-01, -3.1810e-01, 1.6348e-01, -1.6129e-01, + 6.9611e-02, 9.1917e-02, -1.9035e-02, 1.1163e-01, -2.0498e-01, + 5.3379e-02, 2.6206e-01, -5.0303e-01, 1.6840e-02, 2.6704e-01, + 2.5346e-01, 9.7269e-02, 1.9748e-02, 7.3897e-02, 1.1860e-01, + 4.0665e-02, 1.0623e-03, -7.4162e-02, 7.3179e-02, 3.3942e-01, + -9.5987e-03, 8.2016e-03, -1.7550e-01, 3.1015e-01, -2.2220e-02, + 1.7016e-02, -2.7397e-02, 1.7581e-01, 5.6219e-03, 2.9484e-01, + -1.8801e-01, 4.0724e-01, -9.6560e-02, 5.7297e-02, -9.1629e-03, + -4.6879e-01, 3.3328e-02, -9.2649e-02, -9.6031e-03, 1.9044e-01, + 1.5786e-01, 9.0443e-01, -1.8836e-01, 1.7412e-01, 5.5520e-02, + -1.3535e-01, -4.3605e-02, 7.9157e-02, 1.8799e-01, 6.7221e-02, + -7.6727e-03, -1.9825e-02, -1.3374e-01, 2.9653e-02, -1.9720e-01, + -6.4861e-02, 5.6295e-02, 1.8078e-01, 3.5850e-01, -2.8073e-02, + 1.3744e-01, -3.7472e-02, -1.4722e-01, 3.6151e-02, 2.5569e-02, + -2.2046e-02, -1.2437e-01, 1.0499e-01, 1.7835e-01, 7.1363e-02, + 4.1597e-03, -1.4217e-01, -2.4751e-01, 1.5900e-01, -7.1130e-02, + -1.9061e-01, 2.4493e-01, 3.5116e-02, 1.5471e-02, 2.2332e-02, + 8.9153e-02, 3.0060e-01, 3.8098e-02, -6.2623e-02, 2.9371e-01, + 1.8181e-01, 5.7994e-02, 5.8664e-02, 2.6419e-02, -1.2824e-01, + -1.0993e-01, -4.0017e-02, -3.9351e-02, -1.4422e-01, 8.7471e-02, + 8.4085e-02, -1.9828e-01, -1.3173e-01, 1.4072e-01, 1.5565e-01, + 2.0243e-02, -4.6226e-02, 1.5923e-01, 4.5654e-02, -2.3367e-01, + 1.2978e-01, 2.1358e-01, -8.2719e-02, 1.0171e-01, -2.2423e-01, + 5.3051e-03, -4.1902e-02, 3.8611e-01, 1.2323e-01, -1.4924e-01, + 2.8467e-01, 1.9509e-01, -1.3619e-01, 2.4334e-01, -8.0153e-02, + -6.7516e-02, -6.7610e-02, -1.0731e-01, 2.4883e-02, 2.8200e-01, + 1.2155e-01, 3.6426e-01, 2.3474e-02, -2.8293e-01, 2.2408e-01, + 3.6100e-01, -1.0994e-01, 3.4722e-02, 7.4121e-02, 2.7931e-01, + -1.4990e-01, 2.5177e-01, -1.8813e-02, 8.3328e-02, 8.4020e-02, + -9.8448e-02, 2.7714e-01, 7.4243e-02, 8.0239e-02, 2.0202e-01, + 8.1008e-02, 4.3401e-01, 1.8625e-02, 8.4404e-02, 2.4402e-01, + -1.6691e-01, -8.8845e-02, -1.5939e-01, -2.0858e-01, 2.5397e-01, + 6.0879e-02, 2.0121e-02, -7.9232e-02, -1.6508e-02, -1.3059e-01, + -6.2701e-02, -1.7213e-01, 3.4699e-01, 1.5253e-01, -1.4908e-01, + 1.9412e-01, 1.3451e-01, 2.2582e-01, -9.5174e-02, 2.2037e-01, + 1.0803e-01, 5.3375e-02, 2.2560e-03, 2.7359e-02, -1.8865e-01, + 8.4389e-02, 5.1940e-02, 2.0583e-01, -1.5264e-01, -8.4608e-02, + 9.0049e-02, -8.0713e-02, 1.4706e-01, -1.6154e-02, -4.8605e-02, + 2.0150e-01, -1.8314e-01, 2.6463e-01, -3.1260e-01, 9.1237e-02, + 2.0335e-01, 1.1086e-01, -1.5599e-02, 3.6003e-01, 2.5739e-02, + -2.1332e-01, -7.0601e-03, -1.2192e-01, 1.9431e-01, 1.4852e-01, + 1.2345e-01, 1.8613e-01, 2.5473e-02, 3.4473e-02, 1.1012e-01, + 1.4641e-01, 2.2227e-02, -5.9297e-02, -1.3266e-01, 3.4497e-01, + -3.0586e-01, 7.9368e-01, 3.9487e-02, -1.4621e-01, 6.5543e-02, + 1.7141e-02, -2.0179e-01, 2.3884e-02, -4.0460e-02, -4.6599e-02, + 2.1172e-01, -8.7700e-02, -1.6130e-01, 6.9310e-02, -2.5059e-01, + 5.2468e-02, -2.3732e-01, 1.8512e-01, 7.2659e-02, 4.7407e-02, + -4.3530e-02, -1.4774e-03, 2.0449e-01, 2.7766e-02, -2.9549e-02, + -2.0292e-01, -1.0574e-02, -1.1197e-01, -3.3860e-01, 5.2191e-02, + -5.6128e-02, -2.4438e-01, 1.7825e-01, -3.4640e-01, -2.2049e-01, + 3.1959e-01, -1.0011e-01, -1.3569e-01, -1.9682e-01, -1.7635e-02, + 3.4345e-01, 1.9722e-02, 3.3036e-02, -3.2205e-01, 1.0287e-01, + -1.5752e-01, 1.2438e-01, 3.6095e-01, 1.3326e-01, 2.4184e-01, + 1.2926e-01, -2.5102e-02, -7.7222e-02, 2.3458e-01, -5.3531e-02, + 2.6737e-01, -1.2668e-02, -1.9242e-01, -2.9800e-01, 3.0770e-01, + 9.5187e-02, 1.2686e-01, -6.7057e-02, 9.6127e-02, -1.0121e-01, + -3.1650e-01, 2.8864e-01, 5.3428e-02, -1.3012e-01, -1.6185e-01, + 2.7272e-01, -1.0872e-01, -6.3737e-02, 1.7065e-01, -4.9168e-02, + -7.3678e-03, -3.3221e-01, -3.0027e-01, 1.6127e-01, -2.2458e-01, + 2.3008e-02, 2.1806e-02, 2.3908e-02, -1.7871e-02, 2.8359e-01, + 5.8141e-02, -3.5832e-02, 3.2671e-01, -1.0304e-01, 1.5364e-02, + -4.7144e-02, 2.1080e-01, -6.6814e-02, -1.3583e-02, 2.7550e-01, + -1.3174e-01, 3.0048e-01, -3.1258e-01, 9.8796e-02, 9.0674e-02, + 1.2171e-01, -2.1045e-01, 4.1959e-01, 1.4062e-01, 1.7522e-01, + -1.5909e-01, -3.8485e-01, 3.5587e-01, 6.1791e-02, 2.9728e-01, + -3.2739e-03, 2.4748e-01, -2.4907e-02, -5.3884e-01, 7.2989e-02, + 3.4384e-02, -1.1574e-01, -2.2263e-01, -1.0675e-01, 9.7501e-02, + 3.4671e-02, -1.4328e+00, -1.1062e-02, 1.9438e-01, 1.0939e-01, + -3.7655e-01, -1.6929e-01, 3.2262e-01, 1.3196e-01, 2.6441e-01, + 3.8306e-01, 6.4132e-02, 1.5814e-01, 1.8378e-02, -2.0566e-03, + 1.6024e-02, -4.8503e-02, -1.0331e-01, 1.5740e-01, -1.9693e-02, + -9.4622e-02, 3.1541e-02, -4.3777e-01, -3.6881e-02, -3.7918e-02, + 7.2006e-02, 7.6161e-02, -1.3037e-01, -1.9686e-01, -1.7425e-01, + 1.3822e-01, 1.5573e-01, 3.1976e-03, -3.8504e-02, -1.8306e-01, + 4.7375e-02, -4.3621e-02, -6.6590e-02, 4.0533e-02, 1.7594e-01, + -4.2811e-02, -8.4193e-02, 2.3910e-01, 1.7940e-01, 2.1909e-01, + -9.5213e-02, 8.6281e-03, 1.5524e-01, -6.5299e-02, -6.1806e-02, + 5.6195e-02, -2.5332e-01, 2.3617e-01, 9.1083e-02, 1.9428e-01, + 3.3102e-01, -8.5299e-02, -1.2100e-01, -4.5219e-01, -2.7028e-01, + 2.8839e-01, 2.4645e-01, -1.1720e-01, 3.4503e-01, 1.4733e-01, + 4.0482e-02, -4.2903e-02, -7.8322e-01, 3.0277e-02, -1.6751e-01, + 1.4465e-02, 3.2363e-01, 2.3789e-01, -1.0457e-01, 6.3565e-02, + 2.0327e-01, 4.7821e-02, -4.9042e-01, 7.9319e-02, 9.3845e-02, + -7.2455e-02, 1.6316e-02, -3.3306e-01, -3.0791e-02, -1.1575e-01, + -4.9676e-03, 4.2699e-01, -1.5469e-01, -1.4036e-01, 1.1827e-01, + -2.2014e-01, 2.5985e-01, 4.4695e-02, 2.5353e-01, -1.0868e-02, + -1.2368e-01, -8.1046e-02, -8.4880e-02, 2.9189e-01, 1.9647e-01, + -1.4869e-01, 2.1447e-02, -7.9529e-02, -5.2323e-02, -5.4333e-02, + 5.1668e-02, -8.1463e-02, -1.1637e-02, -4.8212e-01, -2.3931e-02, + 5.3116e-03, -1.9904e+00, 1.3696e-02, 1.9362e-01, -1.3809e-01, + -1.9681e-01, 4.8385e-02, 3.8303e-02, 5.6370e-01, 2.7663e-01, + 1.9251e-02, 2.1590e-01, 4.1623e-02, 2.3325e-01, -8.7494e-02, + -2.3375e-01, -6.6074e-02, -1.8052e-02, -2.0547e-01, -4.7173e-01, + 1.2608e-01, 2.6116e-02, -3.2520e-01, 3.5194e-01, -2.5267e-01, + -2.0281e-02, -2.0063e-02, -3.7879e-02, 1.0518e-01, 7.7329e-02, + 8.1078e-02, -7.6093e-02, -3.4030e-01, 7.6515e-02, 1.1958e-01, + 9.5325e-02, -1.0395e-01, -6.4384e-02, 2.3667e-01, -4.1264e-02, + 3.1444e-01, 8.5630e-02, -2.8794e-01, 6.7791e-01, 1.5426e-01, + -4.1703e-02, -1.1656e-02, -2.8440e-01, -1.0204e-01, 3.4792e-02, + 1.3765e-01, -6.2092e-02, 7.4738e-02, 2.0551e-01, 1.2253e+00, + -1.0391e-01, 1.1737e-01, -3.6290e-01, 1.1110e-01, 6.8894e-02, + 1.8224e-01, -9.1156e-02, -1.1810e-01, -3.2335e-02, 1.4059e-01, + -3.5351e-01, -1.7719e-01, -2.7841e-02, 1.7805e-01, -2.4260e-02, + 3.5481e-01, -1.5195e-01, -2.2427e-02, 2.1070e-01, 9.3560e-02, + 9.1329e-02, -5.9248e-02, 7.8027e-02, 7.1357e-02, 4.7972e-02, + 1.7722e-01, 7.8401e-01, 1.8207e-01, 1.1456e+00, 8.1968e-02, + 9.6185e-02, -8.7804e-02, 1.2846e-01, 1.7551e-01, 3.3738e-02, + 6.4693e-01, -1.2015e+00, 6.1865e-02, 9.1654e-02, 3.5655e-02, + 1.7562e-01, -6.8616e-02, 1.1576e-02, 1.3995e-01, 4.5674e-01, + 1.2882e-01, 1.1601e-01, -8.0007e-02, -8.9488e-02, 1.2392e-01, + 1.6591e-01, 3.0822e-03, 2.0161e-01, -2.3949e-01, -5.5038e-02, + 2.8561e-02, -1.4854e-02, -1.7609e-01, -1.6273e-01, -4.8114e-02, + 2.9822e-02, -1.2119e-01, 5.6389e-01, 1.4921e-02, -5.0786e-01, + 2.0637e-01, -3.2344e-01, 1.2871e-01, 4.8221e-02, 1.8008e-01, + -7.6118e-02, -1.9054e-01, -7.6769e-02, 1.0610e-01, 1.8803e-01, + 1.5988e-01, 2.3421e-01, -2.0432e-01, -2.6052e-03, -8.2586e-02, + 5.4137e-01, 5.5920e-02, -1.1482e-01, 1.9908e-01, 1.6705e-01, + -1.6597e-01, -3.2076e-01, 4.1043e-02, -2.0394e-01, 5.2273e-02, + 8.0126e-02, 4.4827e-02, 3.0543e-01, 3.2901e-01, -2.1709e-01, + 1.5603e-01, 8.3254e-02, 9.2027e-02, -3.0384e-02, -9.9128e-01, + -1.1573e-01, -1.3390e-01, -1.3821e-02, 4.8324e-01, 9.9974e-02, + 1.6134e-01, -1.5294e-02, -1.8115e-01, 3.7491e-02, -3.6812e-01, + 7.3616e-03, 1.5063e-01, -1.2603e-01, 1.6043e-01, -2.3366e-01, + -9.9087e-03, -1.3720e-01, -7.6003e-02, 3.6387e-01, 2.8363e-01, + 6.2078e-02, 4.2387e-02, 1.4645e-01, -7.1626e-01, 8.1430e-03, + 2.1640e-01, -1.5572e-01, -2.6465e-01, -1.9105e-02, 1.0089e-01, + 5.4242e-02, -3.5611e-02, -2.2510e-01, -1.3874e-01, -1.0381e-01, + 2.2531e-01, -5.0153e-03, -1.4030e-01, -2.1619e-02, 5.9003e-02, + -1.5007e-01, 1.0948e-01, -1.1252e-01, 1.0077e-03, 6.4135e-02, + -3.1705e-01, 1.3072e-01, -1.6811e-01, 5.1108e-02, 3.9745e-02, + 4.1817e-01, 3.0233e-01, -4.0726e-02, -8.3054e-02, -1.3071e-01, + 9.7965e-02, -5.9167e-02, -8.3690e-02, 7.1241e-02, 9.2697e-01, + -2.0566e-01, -8.5988e-02, 8.9921e-02, -2.9561e-03, -5.8907e-03, + -4.3587e-02, 3.7168e-01, -1.4915e-03, -8.9582e-02, -3.2550e-01, + -8.3132e-03, -1.0678e-02, -5.6811e-03, 6.3972e-01, 1.1402e-01, + 2.9503e-03, 7.1307e-02, 1.7079e-01, -1.1014e-01, 1.3895e-01, + 1.7041e-02, -3.6806e-02, 2.5082e-01, -2.0396e-02, 2.8433e-01, + 1.4611e-01, -6.2930e-02, -1.2271e-01, -1.3978e-01, 1.3857e-01, + 2.8862e-01, 1.7088e-01, 1.3886e-01, 4.9825e-02, -4.3962e-02, + 3.7045e-01, 2.8376e-01, 2.2356e-01, -2.3949e-01, 1.1278e-01, + 1.0264e-01, -6.9168e-01, -1.2107e-01, -2.2057e-01, 1.6424e-04, + -3.3768e-02, -4.2022e-03, -3.1471e-01, -2.8823e-01, -1.1201e-01, + 9.9413e-02, -6.2378e-02, -8.8212e-02])Parameter containing: +tensor([[ 0.0402, 0.0049, 0.0031, ..., 0.0076, -0.0040, -0.0004], + [ 0.0320, -0.0247, 0.0270, ..., 0.0014, -0.0266, -0.0196], + [-0.0072, 0.0229, 0.0050, ..., -0.0068, -0.0446, -0.0313], + ..., + [ 0.0280, -0.0149, 0.0136, ..., 0.0182, -0.0120, -0.0161], + [ 0.0343, -0.0128, -0.0234, ..., 0.0229, -0.0218, 0.0272], + [ 0.0184, 0.0124, 0.0135, ..., -0.0094, 0.0302, -0.0117]])Parameter containing: +tensor([-0.3799, -0.4065, -0.2979, ..., -0.4219, -0.3420, -0.1925])Parameter containing: +tensor([[-0.0009, 0.0018, 0.0037, ..., -0.0094, 0.0236, 0.0011], + [ 0.0007, 0.0022, -0.0113, ..., -0.0333, 0.0027, 0.0064], + [ 0.0013, -0.0087, 0.0208, ..., 0.0051, 0.0020, 0.0045], + ..., + [ 0.0153, -0.0221, 0.0076, ..., -0.0112, 0.0199, -0.0161], + [-0.0092, -0.0176, 0.0055, ..., -0.0182, 0.0059, 0.0039], + [-0.0012, -0.0012, -0.0088, ..., -0.0243, 0.0233, -0.0009]])Parameter containing: +tensor([ 7.7896e-03, -3.2593e-02, 2.3365e-03, -2.7428e-03, -1.7853e-02, + -3.6957e-02, 4.3549e-02, -8.7357e-03, 5.9662e-03, -6.9542e-03, + -3.0121e-02, -2.0676e-02, -3.7842e-02, -2.5616e-03, -6.9946e-02, + 2.5620e-02, -7.1594e-02, -6.8237e-02, -3.5339e-02, -9.3457e-01, + -2.2919e-02, 5.1079e-03, -3.7384e-02, -1.2177e-02, -2.2659e-02, + -1.8784e-02, -2.9144e-02, -5.5885e-03, 2.5854e-01, 1.8723e-02, + -3.0411e-02, -4.7424e-02, -1.3741e-02, -1.0742e-01, 7.4577e-03, + 4.5700e-03, -1.2428e-02, -2.6245e-02, 2.5635e-02, 5.9547e-03, + -1.3794e-02, -1.9180e-02, -3.3325e-02, 1.5533e-02, 7.5111e-03, + -5.0751e-02, 1.2350e-03, 2.8946e-02, -4.1290e-02, 6.7322e-02, + -1.2611e-02, -2.2552e-02, -7.4959e-04, -8.0261e-03, 1.1475e-02, + 1.5778e-02, 5.5237e-02, -9.5642e-02, -4.5258e-02, -2.1877e-03, + 4.0497e-02, 4.9530e-02, -3.8269e-02, 1.1391e-02, 2.6230e-02, + 3.4790e-02, 3.3569e-02, -6.8054e-03, 3.3379e-03, -7.9102e-02, + 6.3599e-02, 2.4536e-02, 1.4441e-01, 7.4692e-03, 3.0319e-02, + -4.6082e-02, 1.3298e-02, 1.6617e-02, -2.3682e-02, 5.1819e-02, + -9.1064e-02, -1.2390e-02, 7.6416e-02, 9.4070e-03, 1.4420e-02, + -3.0380e-02, -6.9275e-02, -2.1866e-02, -2.3148e-02, 1.5701e-02, + 7.9041e-02, -3.3283e-03, 5.7297e-03, 9.4528e-03, -1.0757e-02, + -5.7259e-03, 1.5404e-02, -4.6921e-03, 4.6196e-03, 7.0251e-02, + -5.9280e-03, -4.4861e-02, -7.4097e-02, -8.2245e-03, 1.6342e-02, + -3.1137e-04, -5.6488e-02, -3.1647e-02, 2.1072e-02, 2.0782e-02, + 2.6672e-02, -3.5950e-02, 1.0429e-02, 2.1378e-02, 3.7811e-02, + -6.6833e-02, 4.2816e-02, 3.4454e-02, 7.8857e-02, -4.6906e-02, + 2.8946e-02, -3.7567e-02, 1.2917e-02, -8.6060e-03, -3.8818e-02, + -1.3573e-02, 7.1678e-03, -8.2626e-03, 1.2360e-02, 8.5388e-02, + -2.6917e-02, -6.2042e-02, -6.1646e-02, -2.9877e-02, 2.4567e-02, + -2.3895e-02, 1.2718e-02, -1.0208e-02, -6.9885e-02, -7.5836e-03, + 6.6376e-03, -2.5208e-02, 3.9795e-02, -1.8814e-02, -5.6244e-02, + 2.3483e-02, 3.8574e-02, -6.3049e-02, 8.9569e-03, -3.6530e-02, + -7.8506e-03, 1.0474e-01, -4.3106e-03, 1.0887e-02, 3.1494e-02, + 2.9190e-02, 4.7821e-02, 9.3842e-03, -2.3926e-02, 7.8796e-02, + -4.9805e-02, 1.7578e-02, -2.2507e-02, 7.4158e-02, -2.3041e-02, + -1.1719e-02, -3.8025e-02, -5.7526e-03, -6.7017e-02, 3.8086e-02, + -1.1129e-03, 3.1799e-02, -8.0261e-03, 7.7454e-02, 2.8320e-02, + -5.6519e-02, 2.7100e-02, -5.0354e-02, -1.8875e-02, 1.0706e-01, + -2.4261e-03, -2.6855e-03, 1.7792e-02, -8.1329e-03, 4.4098e-02, + 5.7098e-02, 3.6163e-02, 9.7107e-02, -9.0881e-02, -3.4599e-03, + 3.5919e-02, 4.2419e-03, -3.9215e-03, 9.3384e-02, 9.3520e-05, + -2.1439e-02, -1.8682e-03, 2.5467e-02, -1.5388e-02, 1.1948e-02, + 5.7190e-02, -4.8492e-02, 4.6692e-02, -5.6610e-02, 2.5425e-03, + -6.0387e-03, 1.8164e-01, 7.9468e-02, 4.1008e-03, 6.8848e-02, + -1.2781e-01, -2.3010e-02, -4.4128e-02, -3.4180e-02, -1.7395e-02, + -3.9032e-02, -9.7595e-02, 1.1002e-02, 1.1032e-02, 3.1113e-02, + 1.8723e-02, -1.2213e-01, -5.0751e-02, 7.5439e-02, -7.0534e-03, + 2.4872e-02, -1.1040e-02, 2.1629e-03, 4.5349e-02, 5.8022e-03, + 4.3869e-03, -1.2917e-02, 5.5298e-02, 5.0316e-03, 5.2612e-02, + 1.2054e-02, -1.5526e-02, 4.3671e-02, -7.1640e-03, -2.7786e-02, + 7.3967e-03, -1.1696e-02, -2.1484e-02, 4.0070e-02, -1.5297e-02, + -2.8000e-02, -2.4567e-02, 1.0895e-01, 4.0283e-03, 1.1200e-02, + 1.0144e-01, 1.0040e-01, 2.3743e-02, 2.2659e-02, -2.0370e-02, + -3.0762e-02, -1.6815e-02, 6.6589e-02, -1.3390e-02, 6.6528e-02, + -4.3526e-03, 3.2227e-02, -9.1858e-02, 6.4575e-02, 3.1433e-02, + -6.7322e-02, 6.6772e-02, 8.4076e-03, -5.8044e-02, 1.6556e-02, + -1.1096e-01, 9.0698e-02, -8.8928e-02, -3.3752e-02, -1.2222e-02, + -4.0100e-02, -4.0192e-02, -6.0806e-03, 1.7136e-02, 1.6037e-02, + 4.7211e-02, -6.3477e-02, -1.1551e-02, 7.1167e-02, -1.0651e-02, + 7.0496e-02, 1.0674e-02, 1.5163e-03, -2.8168e-02, 4.5357e-03, + 2.4887e-02, -1.7761e-02, 5.0659e-02, 6.1684e-03, 4.4952e-02, + 5.1819e-02, -3.4058e-02, 7.0740e-02, 7.2510e-02, 2.2812e-02, + 9.7580e-03, 1.0696e-02, 2.7405e-02, -8.8989e-02, -8.4656e-02, + 5.2765e-02, -4.8157e-02, 4.5593e-02, -3.9398e-02, 3.8422e-02, + 4.9591e-02, -3.3630e-02, -6.8237e-02, -4.3701e-02, 1.0796e-02, + 1.4038e-02, -2.5513e-02, -1.3586e-01, -3.3905e-02, 5.3558e-03, + 7.0801e-03, -9.6207e-03, 4.8859e-02, -4.0649e-02, 1.7197e+00, + -7.1899e-02, -4.9255e-02, -8.5144e-02, -1.7181e-02, -5.9166e-03, + -6.5918e-02, -9.2041e-02, -1.9272e-02, 6.4270e-02, 3.2104e-02, + 6.1035e-03, -1.0696e-02, 2.7679e-02, 8.5999e-02, 2.9144e-02, + 7.7820e-02, -7.6866e-03, 2.7557e-02, 8.9569e-03, 2.2308e-02, + 3.9363e-04, 9.1858e-03, -4.3121e-02, -3.4180e-02, -5.8441e-02, + -3.6621e-02, -2.0874e-02, 1.6830e-02, 4.0558e-02, 7.3486e-02, + 1.2581e-02, -2.3155e-03, 3.6377e-02, 3.4729e-02, -5.3284e-02, + 3.3386e-02, 2.8122e-02, 3.8452e-02, 3.3295e-02, 9.4910e-02, + -2.0279e-02, -2.8763e-02, -3.8879e-02, 1.0704e-02, 9.4604e-02, + 2.8931e-02, 2.0630e-02, -2.4460e-02, -4.9988e-02, -1.1375e-02, + 1.4992e-02, 1.4893e-02, 1.0114e-01, -2.1744e-02, 2.1774e-02, + 2.3193e-02, -1.8646e-02, -3.1097e-02, -3.3112e-02, 7.0129e-02, + -4.4899e-03, -6.8542e-02, 3.4851e-02, -5.6000e-02, 5.3223e-02, + 2.8259e-02, -1.0693e-01, -9.2834e-02, 1.0010e-01, 1.0895e-01, + -3.4119e-02, -1.5152e-02, 4.3427e-02, 2.1225e-02, -1.8845e-02, + -1.8906e-02, 1.1780e-02, 9.1003e-02, -1.2894e-02, 5.9113e-02, + -2.0584e-02, -1.4336e-02, -1.5182e-02, -5.4871e-02, -3.9795e-02, + 1.6586e-02, -1.6113e-02, 6.4049e-03, 1.1810e-01, 2.5757e-02, + -4.8853e-01, 2.2644e-02, -3.5156e-02, 2.0920e-02, 7.0877e-03, + -2.1194e-02, 3.3264e-02, -1.4381e-02, 6.1310e-02, 5.0240e-03, + 3.7750e-02, -1.0406e-02, -4.6906e-02, -4.7546e-02, -1.2230e-02, + 1.3031e-02, -3.0441e-02, 1.9775e-02, -5.4688e-02, 3.0609e-02, + 8.6441e-03, -3.6377e-02, 2.6531e-03, -2.9816e-02, -3.4882e-02, + -4.2908e-02, -4.8096e-02, -6.5613e-02, -8.2703e-02, 2.9678e-02, + -1.5717e-03, 7.2144e-02, -6.6284e-02, 9.9304e-02, -3.2654e-02, + 2.4963e-02, 3.5583e-02, 1.3904e-01, -7.3425e-02, 4.6326e-02, + 3.0396e-02, 2.2018e-02, -3.3569e-02, 1.6571e-02, 5.6976e-02, + -4.1962e-02, 1.7609e-02, -4.1084e-03, -2.6169e-02, 3.2959e-02, + -7.1106e-02, 1.0414e-02, 1.9646e-03, -1.7776e-02, 8.6670e-03, + 6.2988e-02, -1.1330e-03, -4.1321e-02, 2.8610e-03, -4.0955e-02, + -2.3392e-02, 6.1005e-02, -6.8115e-02, -3.0579e-02, 8.0994e-02, + 6.4880e-02, 6.8115e-02, -2.4460e-02, 5.5481e-02, 2.8992e-02, + 5.9418e-02, 3.9673e-02, -1.6281e-02, 4.6509e-02, -9.4604e-03, + 1.6022e-02, -8.6899e-03, 3.6835e-02, 1.5167e-02, -1.0231e-02, + -5.1727e-02, -6.4240e-03, 7.4768e-02, 1.0760e-01, -2.0065e-02, + 1.0735e-02, -1.2622e-01, -2.9022e-02, 1.1845e-03, -1.2917e-02, + -6.3904e-02, 1.0358e-01, -3.5431e-02, 2.2186e-02, 5.6877e-03, + -1.1574e-02, -1.6083e-02, -1.5587e-02, 5.2910e-03, 8.2016e-03, + 7.8659e-03, -1.8631e-02, -1.3077e-02, -1.5572e-02, 1.1108e-02, + -3.5461e-02, 6.5063e-02, -8.1299e-02, -4.1747e-04, -3.7506e-02, + -3.2845e-03, 7.9346e-02, 1.0022e-01, 5.5313e-03, -3.0167e-02, + 1.0490e-02, 1.9470e-02, 6.1340e-02, -1.8845e-02, 1.0582e-02, + -2.5696e-02, -3.5763e-03, 6.7322e-02, -4.8599e-03, -1.3565e-02, + -1.5327e-02, 9.2529e-02, -3.4241e-02, 7.4341e-02, -3.1982e-02, + 2.3422e-02, -2.3132e-02, -2.0050e-02, -5.5237e-02, -1.7120e-02, + -3.2867e-02, 1.7761e-02, -2.9556e-02, -4.5837e-02, -1.6769e-02, + -4.5074e-02, -2.3026e-02, -7.0724e-03, 9.4986e-03, 3.2318e-02, + -1.4465e-02, 3.3173e-02, -4.5959e-02, 4.3831e-03, -7.1030e-03, + -8.2642e-02, -9.9106e-03, -3.1700e-03, -6.5857e-02, -4.4861e-03, + 6.1111e-03, -5.8055e-05, -4.1199e-02, -6.6406e-02, 7.2021e-02, + 6.1737e-02, 6.9618e-04, 4.6234e-02, -8.0490e-04, 6.5552e-02, + 7.3364e-02, 1.7883e-02, 7.0572e-03, 7.6294e-02, 3.6583e-03, + -5.5847e-02, 5.3467e-02, 8.1482e-03, 5.0629e-02, 2.6779e-02, + -6.9214e-02, -3.7201e-02, 3.1067e-02, 4.1779e-02, 8.6288e-03, + -4.6120e-03, -1.1877e-01, 9.7107e-02, -5.1666e-02, 3.1400e-04, + 2.7237e-02, -3.2593e-02, -1.4381e-02, -2.4063e-02, 3.7842e-03, + -1.3321e-02, 4.0344e-02, 5.2277e-02, 2.5543e-02, -5.7983e-03, + 7.0238e-04, -4.8981e-02, -6.4331e-02, 1.4488e-02, 3.2928e-02, + -1.6449e-02, -1.3496e-02, 3.2883e-03, -2.8275e-02, -1.0394e-01, + 2.3224e-02, -2.0599e-02, 2.9724e-02, 5.1514e-02, -4.6661e-02, + 5.1697e-02, -6.7871e-02, 1.5778e-02, 3.7354e-02, 7.0229e-03, + -7.0648e-03, 5.5206e-02, -7.0923e-02, 9.9487e-03, -3.8086e-02, + -1.1192e-02, 4.6997e-02, 1.8250e-02, 4.7211e-02, -3.3386e-02, + 4.4739e-02, 9.4971e-02, 1.0327e-01, 4.6265e-02, -2.7390e-02, + -6.7078e-02, -4.1290e-02, 5.1003e-03, 1.1597e-02, -6.9885e-02, + -7.3891e-03, 4.4800e-02, -6.8115e-02, 1.7685e-02, -1.0590e-01, + 3.1647e-02, -2.8763e-02, 3.7491e-05, 1.1330e-02, 5.8197e-02, + 3.0060e-02, 2.5848e-02, -8.6426e-02, 4.6021e-02, -9.3231e-03, + -5.4382e-02, 4.2633e-02, -2.2324e-02, 1.1444e-02, 4.3793e-02, + 2.4780e-02, -3.5339e-02, -6.7200e-02, 5.4291e-02, 2.4307e-02, + -7.8186e-02, -3.1677e-02, -1.6647e-02, 2.8702e-02, -7.6056e-05, + -4.3549e-02, 1.2598e-01, -8.8623e-02, 1.2978e-02, -9.8999e-02, + 6.3721e-02, -4.3182e-02, 8.4351e-02, 7.4463e-03, 2.1729e-02, + -2.9724e-02, -8.7585e-02, 2.6855e-02, 5.1270e-02, 4.6654e-03, + 3.4424e-02, 2.9800e-02, 3.1799e-02, -1.7502e-02, -7.0679e-02, + 6.9702e-02, -1.2619e-02, 1.7288e-02, 4.2305e-03, 8.4351e-02, + -2.0050e-02, 7.8125e-02, -1.5099e-02, 2.4475e-02, -9.9487e-03, + 1.4198e-02, -5.9998e-02, -3.9185e-02, -9.6970e-03, -2.3727e-02, + -4.7943e-02, -3.1342e-02, 2.7161e-02, -7.4081e-03, 1.9302e-02, + -8.1909e-02, 1.9035e-03, 3.7018e-02, 9.1019e-03, 2.8320e-02, + -2.6093e-02, 1.7487e-02, 3.4119e-02, -2.0996e-02, -3.7659e-02, + -3.4302e-02, -1.2791e-04, -2.4811e-02, -4.5319e-02, 4.1077e-02, + -6.8604e-02, 4.7882e-02, 1.4091e-02, 4.0771e-02, -5.1069e-04, + -4.9500e-02, -9.9976e-02, 2.6245e-02, -4.6753e-02, -9.5596e-03, + 2.7557e-02, 2.5330e-02, -2.4536e-02, 4.0222e-02, -1.1444e-02, + 8.3862e-02, 4.6661e-02, -8.7433e-03, -5.4749e-02, -2.3438e-02, + 1.2192e-02, -2.8833e-01, 8.6212e-03, -3.7537e-02, -2.2629e-02, + -2.6428e-02, 6.6566e-03, -1.2238e-02, 8.9645e-03, 2.0905e-02, + -5.8098e-03, -7.1899e-02, -1.3962e-02])Parameter containing: +tensor([1.5466, 1.6287, 1.4620, 1.5152, 1.6963, 1.5352, 1.7028, 1.6495, 1.6754, + 1.5446, 1.7198, 1.6647, 1.7217, 1.7013, 1.5661, 1.5963, 1.7138, 1.6526, + 1.5437, 2.5590, 1.5243, 1.6118, 1.5728, 1.5510, 1.5395, 1.6284, 1.5794, + 1.6840, 2.6548, 1.5297, 1.5815, 1.5742, 1.6377, 1.6970, 1.5335, 1.6825, + 1.6767, 1.7161, 1.6149, 1.5676, 1.5750, 1.4493, 1.5736, 1.5713, 1.6910, + 1.4318, 1.4671, 1.7247, 1.6311, 1.6464, 1.5485, 1.6081, 1.4676, 1.8106, + 1.5993, 1.4245, 1.6667, 1.6925, 1.5395, 1.7395, 1.5219, 1.5653, 1.7521, + 1.5926, 1.7473, 1.5702, 1.4933, 1.6499, 1.6647, 1.6324, 1.7135, 1.5787, + 1.7618, 1.5647, 1.7105, 1.8888, 1.5787, 1.4775, 1.5883, 1.4983, 1.6657, + 1.5485, 1.7070, 1.4726, 1.5059, 1.7675, 1.7143, 1.6613, 1.5308, 1.6046, + 1.5477, 1.6365, 1.6244, 1.7674, 1.6128, 1.6021, 1.6882, 1.5759, 1.6254, + 1.7811, 1.5736, 2.2523, 1.7263, 1.7387, 1.5957, 1.5393, 1.4164, 1.7550, + 1.5276, 1.5929, 1.5954, 1.5568, 1.4611, 1.6622, 1.6308, 1.6464, 1.6043, + 1.6075, 1.8606, 1.6691, 1.7086, 1.2814, 1.5904, 1.5482, 1.7599, 1.7228, + 1.5460, 1.6140, 1.5800, 1.7483, 1.5361, 1.6983, 1.6441, 1.7168, 1.6573, + 1.6766, 1.8008, 1.6312, 1.4994, 1.5217, 1.5370, 1.5518, 1.5621, 1.6829, + 1.7218, 1.6218, 1.8181, 1.6869, 1.6930, 1.6609, 1.6902, 1.7114, 1.7895, + 1.7047, 1.9185, 1.4508, 1.6689, 1.5640, 1.7115, 1.6192, 1.6506, 1.5862, + 1.5534, 1.2080, 1.4693, 1.5915, 1.5049, 1.6141, 1.6000, 1.6080, 1.5011, + 1.7052, 1.6523, 1.6762, 1.7173, 1.5287, 1.7796, 1.9163, 1.6103, 1.6111, + 1.6848, 1.5284, 1.5299, 1.6179, 1.7410, 1.6436, 1.6795, 0.5891, 1.8123, + 1.6114, 1.4928, 1.5903, 1.7130, 1.5604, 1.6891, 1.6415, 1.5553, 1.5920, + 0.8378, 1.5091, 1.4806, 1.6040, 1.8015, 1.6475, 1.6319, 1.5780, 2.0262, + 1.6141, 1.6232, 1.7835, 1.8310, 1.7591, 1.6236, 1.7798, 1.8485, 1.5591, + 1.8435, 1.6892, 1.7282, 1.7013, 1.6079, 1.8256, 1.5861, 1.5172, 1.6814, + 1.6223, 1.5769, 1.6768, 1.6941, 1.5766, 1.7174, 1.6628, 1.6466, 1.7017, + 1.5979, 1.5386, 1.6890, 1.6619, 1.6857, 1.5417, 1.4578, 1.7095, 1.6859, + 1.6036, 1.6458, 1.6928, 1.5617, 2.0336, 1.5748, 1.6816, 1.7615, 1.4831, + 1.5995, 1.6715, 1.6472, 1.8099, 1.7742, 1.7084, 1.6539, 1.4784, 1.6935, + 1.5482, 1.4752, 1.5757, 1.4937, 1.7429, 1.6930, 1.5716, 1.6499, 1.6268, + 1.8494, 1.6066, 1.6520, 1.2203, 1.6006, 1.5320, 1.6687, 1.6458, 1.5665, + 1.5904, 1.5120, 1.7532, 1.8606, 1.7307, 1.6165, 1.5873, 1.8569, 1.5411, + 1.8052, 1.4410, 1.7376, 1.6775, 1.5768, 1.4959, 1.7195, 1.5293, 1.5665, + 1.5442, 1.7254, 1.6832, 1.7262, 1.6449, 1.6497, 1.8472, 1.6109, 1.5323, + 1.4856, 1.6461, 1.6573, 1.4211, 1.7107, 1.6706, 1.4615, 1.6185, 1.6502, + 1.5425, 1.7448, 1.8702, 1.6209, 1.6842, 1.4225, 1.6420, 1.6030, 1.7335, + 1.4414, 1.3507, 1.6482, 1.7484, 1.4666, 1.4885, 1.4834, 1.6085, 1.7071, + 1.6288, 1.6031, 1.7478, 1.5366, 1.7877, 1.5961, 1.7320, 1.7167, 1.7011, + 1.5680, 1.6012, 1.6675, 1.7850, 1.6508, 1.5850, 1.7151, 1.7315, 1.6190, + 1.6217, 1.6529, 1.4600, 1.6130, 1.5748, 1.4341, 1.4692, 1.6451, 1.8027, + 1.6968, 1.6237, 1.5114, 1.6602, 1.6563, 1.6236, 1.7618, 1.5643, 1.6583, + 1.7570, 1.5363, 1.6690, 1.6809, 1.6877, 1.6174, 1.7092, 1.6972, 1.5257, + 1.7533, 1.5202, 1.5563, 1.2858, 1.7206, 1.6467, 1.7424, 1.5450, 1.6139, + 1.6049, 1.6431, 1.7720, 1.7395, 2.3183, 1.8012, 1.5125, 1.7654, 1.7184, + 1.5203, 1.7012, 1.6321, 1.7103, 1.5438, 1.6525, 1.4328, 1.5309, 1.6249, + 1.6052, 1.7142, 1.5251, 1.9031, 1.5546, 1.8293, 1.6872, 1.5897, 1.6912, + 1.2554, 1.8850, 1.5435, 1.6176, 1.5993, 1.6400, 1.6315, 1.5535, 1.7377, + 1.7238, 1.5547, 1.6823, 1.6606, 1.7936, 1.5944, 1.5299, 1.6171, 1.6747, + 1.7157, 1.5132, 1.4544, 1.6506, 1.6495, 1.6812, 1.6820, 1.6619, 1.6851, + 1.7541, 1.5222, 1.2278, 1.6402, 1.5469, 1.6362, 1.6224, 1.7885, 1.6684, + 1.6326, 1.7323, 1.8737, 1.5761, 1.5595, 1.5265, 1.6326, 1.7348, 1.6750, + 1.7892, 1.7402, 1.6306, 1.6256, 1.6073, 1.5526, 1.6445, 1.6588, 1.7198, + 1.7170, 1.4939, 1.5734, 1.4749, 1.4609, 1.4621, 1.6425, 1.6142, 1.6705, + 1.8619, 1.6656, 1.7224, 1.5495, 1.7225, 1.6603, 1.5858, 1.7914, 1.6040, + 1.6189, 1.7769, 1.5258, 1.5773, 1.5919, 1.5266, 1.6045, 1.7294, 1.5485, + 1.8233, 1.6096, 1.5402, 1.6347, 1.6637, 1.6981, 1.6240, 1.7442, 1.4918, + 1.7090, 1.6694, 1.6764, 1.5901, 1.5481, 1.5938, 1.6709, 1.6144, 1.5924, + 1.6912, 1.6638, 1.6623, 1.5187, 1.6067, 1.5926, 1.6297, 1.5314, 1.4781, + 1.9022, 1.6165, 1.5046, 1.5899, 1.6815, 1.5027, 1.6304, 1.6635, 1.6648, + 1.5609, 1.7298, 1.6707, 1.6422, 1.7757, 1.5534, 1.6288, 1.7241, 1.5718, + 1.7018, 1.6467, 1.6198, 1.8366, 1.6966, 1.6463, 1.5737, 1.8146, 1.8558, + 1.6408, 1.7075, 1.6411, 1.7522, 1.7252, 1.8811, 1.5192, 1.5296, 1.6011, + 1.6077, 1.7297, 1.5673, 1.4351, 1.3742, 1.6096, 1.6247, 1.8658, 1.6591, + 1.6070, 1.7442, 1.7427, 1.5629, 1.7752, 1.7397, 1.5951, 1.5998, 1.6218, + 1.6390, 1.6102, 1.6870, 1.6590, 1.6500, 1.6725, 1.5687, 1.8237, 1.7162, + 1.6969, 1.7778, 1.3082, 1.5581, 1.5859, 1.6857, 8.3553, 1.6262, 1.6234, + 1.6098, 1.6806, 1.6735, 1.5222, 1.5401, 1.6517, 1.5861, 1.6949, 1.6505, + 1.5054, 1.5938, 1.6957, 1.4191, 1.5484, 1.5930, 1.5796, 1.8634, 1.6881, + 1.6730, 1.8082, 1.7221, 1.7040, 1.8423, 1.5473, 1.6795, 2.0259, 1.5601, + 1.7674, 1.6799, 1.5424, 1.7275, 1.4993, 1.6710, 1.6352, 1.6293, 1.6699, + 1.5402, 1.5156, 1.5385, 1.5834, 1.7811, 1.6085, 1.4844, 1.5949, 1.6252, + 1.6161, 1.5671, 1.7266, 1.8965, 1.5759, 1.3900, 1.7972, 1.6964, 1.6084, + 1.7021, 1.7420, 1.6757, 1.8554, 1.6910, 1.7198, 1.8348, 1.6739, 1.6898, + 1.7649, 1.6479, 1.6634, 1.5334, 1.5332, 1.6019, 1.5840, 1.6878, 1.6768, + 1.7021, 1.6632, 1.5904, 1.5411, 1.7415, 1.5871, 1.6146, 1.4035, 1.7149, + 1.7371, 1.6836, 1.6548, 1.7445, 1.5904, 1.6646, 2.1296, 1.5593, 1.5757, + 1.7328, 1.5722, 1.5387, 1.8659, 1.5409, 1.7209, 1.5629, 1.8777, 1.8350, + 1.7118, 1.5701, 1.4138, 1.6292, 1.6653, 1.5712, 1.6760, 1.6174, 1.5633, + 1.5570, 1.6512, 1.6422, 1.4815, 1.8044, 1.6046, 1.6314, 1.5283, 1.4404, + 1.4932, 1.5224, 1.7141, 1.6128, 1.7097, 1.6152, 1.7305, 1.6738, 1.6606, + 1.6788, 1.6342, 1.7643, 1.5878, 1.5240, 1.6807, 1.4941, 1.7150, 1.6930, + 1.8136, 1.6147, 1.6509, 1.6260, 1.8039, 1.4894, 1.4515, 1.5909, 1.6387, + 1.6184, 1.6230, 1.6293, 1.7180, 1.6287, 1.6429, 1.6987, 1.8735, 1.8367, + 1.6882, 1.5426, 1.4757, 1.7773, 1.6352, 1.5406, 1.6494, 1.6965, 1.5874, + 2.3065, 1.5113, 1.3640, 1.5659, 1.5924, 1.5288, 1.6494, 1.6579, 1.7771, + 1.5794, 1.6293, 1.6200])Parameter containing: +tensor([-2.8834e-01, 6.7875e-01, -2.3783e-01, 7.2542e-01, 5.0365e-01, + -1.0250e-01, -3.9273e-01, 7.6085e-01, 2.8215e-01, 2.7522e-01, + 6.3696e-01, 3.5317e-02, 5.8776e-01, -8.9201e-02, -2.4734e-01, + 2.3635e-01, 6.3772e-01, -6.2483e-02, 8.0917e-01, -1.8566e+00, + 1.5441e-01, 2.7425e-01, 3.3344e-01, 1.9197e-01, 6.2422e-01, + -8.6932e-02, 8.8230e-02, 6.7806e-01, -6.2292e-01, 6.2146e-02, + 8.7037e-02, 4.9910e-01, 2.2075e-01, -1.1974e-01, -3.7742e-01, + -2.0259e-01, 1.1265e-01, -2.0129e-01, 3.3812e-01, -9.1389e-03, + -5.3591e-02, 3.6583e-01, -1.5610e-01, 4.3336e-01, -2.5268e-01, + 3.1972e-01, 1.0743e-01, -5.0063e-02, 3.2510e-01, -5.9961e-01, + 4.2868e-01, -2.2210e-01, 2.1076e-01, -1.3806e-01, -9.8569e-02, + -1.9991e-01, 2.0285e-01, -2.1988e-01, 1.2357e-01, 1.6476e-01, + 1.3403e-01, -2.3772e-01, 7.5840e-01, 4.1628e-01, 4.2165e-02, + 3.0577e-01, -4.3873e-01, -3.7216e-02, -1.1519e-01, -7.0516e-01, + -2.2037e-02, -5.4402e-01, 3.3992e-01, -1.9507e-01, -9.1473e-01, + 8.3870e-01, 4.1670e-01, -1.4147e-01, -1.6440e-01, 2.3323e-01, + 7.2164e-01, -6.6401e-02, -6.7743e-02, -5.1050e-01, 4.1789e-01, + 2.6773e-02, -8.0283e-02, 9.3218e-02, -4.2062e-01, -9.3459e-01, + 1.4858e-01, 1.4147e-01, 1.4865e-01, -3.5721e-01, 4.3898e-01, + -2.5401e-01, -2.9791e-01, 2.2632e-01, -4.6250e-01, -5.8804e-01, + 1.4498e-01, -8.5878e-01, 9.4009e-01, 3.6177e-01, 1.4910e-01, + 3.1275e-01, 5.7658e-01, 7.6616e-01, 8.8667e-02, 8.2457e-02, + -3.0712e-01, -1.1602e-01, 2.3959e-01, -2.5542e-01, 1.0640e-01, + -1.2604e-01, -1.1610e-01, -1.9213e-01, -3.2760e-01, -3.4821e-01, + -6.4125e-01, 3.7060e-01, 4.4295e-01, -1.2739e-01, 7.0215e-01, + 2.1715e-01, -1.5359e-01, -5.8012e-01, -3.6162e-02, -9.7989e-01, + 1.4529e-01, 7.3714e-01, 2.3299e-01, 8.8725e-01, -2.6198e-01, + 2.6619e-01, -1.0543e-01, -4.9063e-01, 5.4750e-02, 2.5521e-01, + 1.1565e-01, -1.9963e-01, 4.9922e-03, 3.8230e-01, 6.4455e-01, + 1.7291e-01, -4.4966e-01, 3.1876e-02, -2.6282e-01, -5.4284e-01, + -7.9918e-01, -1.7874e-01, 1.0447e+00, 2.4432e-01, -8.6598e-02, + -5.1222e-01, 1.6548e-02, 6.6075e-02, -1.4376e-01, 1.7247e-01, + 1.8433e-01, 2.0584e-01, 2.7407e-01, -7.5933e-02, 3.4370e-02, + 3.1634e-01, -4.4396e-01, -4.0123e-01, 2.0246e-01, 2.2675e-01, + 7.3490e-01, -6.0053e-02, 7.2985e-01, -2.9349e-01, -4.5335e-02, + 2.3239e-01, -3.9335e-01, -1.4336e-01, -9.0372e-02, -2.1095e-01, + 5.9891e-01, -1.2639e-01, -2.8169e-01, 3.9472e-01, -3.3182e-01, + 3.5402e-01, -4.3384e-01, -6.2287e-01, 1.0957e+00, 3.1057e-01, + 5.0804e-02, -4.2299e-02, 4.6876e-01, 2.7346e-01, 4.2902e-02, + 1.6550e-01, 7.2328e-02, -4.3305e-02, 6.2557e-01, -7.6766e-01, + -5.2920e-02, 6.5177e-01, -1.5345e-01, 3.7510e-01, 3.6323e-01, + 6.0274e-01, -2.2344e-01, 9.2344e-02, -2.7282e-01, 2.9311e-01, + 9.2230e-01, 9.3975e-02, 8.5421e-01, -6.6581e-01, -4.6468e-02, + -3.2008e-01, 6.3147e-01, -9.3217e-01, 4.6425e-03, 5.7329e-02, + -8.6024e-02, 3.8421e-01, 4.3569e-01, -8.3151e-02, -5.1909e-01, + -2.5093e-01, -1.6634e-01, -3.0899e-01, -4.7583e-01, 6.6807e-01, + 4.9516e-02, 2.1708e-01, -5.3353e-02, 3.1828e-01, 1.2399e-01, + 5.4984e-01, 8.3559e-02, -3.9472e-01, -3.6320e-02, -5.7928e-01, + -1.9020e-01, -2.7261e-01, -4.0178e-01, -7.3205e-02, -1.8186e-02, + -3.6264e-01, -3.8185e-02, -6.0324e-01, 3.9541e-01, 2.4023e-01, + -5.1377e-01, -2.2388e-01, 9.4389e-02, -4.5919e-01, -1.6421e-01, + 8.8032e-01, -8.8406e-02, -8.6900e-01, -2.0488e-01, -3.9097e-01, + 7.3720e-01, 4.7167e-01, 4.7704e-01, -4.1401e-01, -6.6253e-01, + -5.8830e-01, -2.9232e-01, -1.3623e-01, 3.8029e-01, -1.6702e-01, + 9.0093e-01, -6.3571e-01, 2.2402e-01, 2.1330e-01, -3.2586e-01, + -5.9196e-02, 5.7648e-02, -1.0256e+00, 1.1868e-01, 2.8840e-01, + -1.7680e-01, -1.5109e-01, 9.8570e-01, -3.1493e-01, 1.3815e-01, + -6.8671e-01, 4.4593e-01, -2.5520e-01, 5.3355e-01, -1.8861e-01, + -1.9878e-01, 7.3772e-01, -1.4706e-01, 2.5136e-01, -6.4688e-01, + -5.8466e-01, 1.0780e-01, -2.1876e-01, -2.6948e-01, -3.7949e-02, + 1.6250e-01, -6.8278e-02, -1.8151e-01, 5.4320e-01, 6.2123e-01, + -5.9731e-01, -1.4939e-01, 8.1843e-02, -5.4720e-02, -6.0758e-01, + 5.5542e-01, 5.7590e-01, 3.9174e-01, 1.5900e-01, -3.9861e-01, + -1.9958e-02, 1.1678e-01, 5.5287e-01, -1.1611e-01, 1.8584e-01, + 5.0934e-01, -1.3474e-01, 4.3069e-01, 1.6100e-01, 1.6451e+00, + 3.0039e-01, -2.2575e-01, 3.3777e-01, -2.8258e-01, 2.4884e-02, + -3.2205e-01, 5.5202e-02, 4.3987e-02, -1.9287e-01, -5.9822e-01, + -9.0719e-01, -4.0716e-01, -6.1424e-01, -5.6892e-01, 4.0417e-02, + -8.3440e-01, 3.1679e-01, -2.0134e-01, 6.8621e-01, 4.4656e-01, + -1.7529e-01, 1.1721e-01, -1.9876e-01, -3.1152e-01, 5.6802e-01, + 3.2369e-01, 6.3882e-02, 1.6577e-02, 4.6248e-02, 2.0744e-01, + 2.0860e-01, 1.0150e+00, -3.7986e-01, 9.3586e-02, 3.7668e-01, + -6.6433e-01, 1.0799e+00, -2.7410e-01, -1.8421e-01, -1.7920e-01, + -2.0334e-02, 3.3188e-01, 5.9764e-03, -2.9039e-01, -7.2283e-02, + -8.0281e-01, -1.1115e-01, 1.4681e-01, -1.6251e-01, 2.9035e-01, + -3.0332e-01, -7.9299e-01, 4.0145e-01, -3.1173e-01, -3.3306e-01, + -1.5556e-01, 8.4179e-01, 6.8650e-01, 4.9622e-01, -6.3749e-01, + -3.2113e-01, 5.1397e-01, -3.7570e-01, 4.3522e-01, -3.2065e-01, + 1.0057e-01, 1.7216e-01, 7.9660e-01, 3.2077e-01, 4.9979e-01, + 4.9811e-01, 5.9768e-02, -8.0808e-02, -1.3962e-01, 6.0569e-02, + 1.5873e-01, 1.0248e+00, -7.9364e-01, -5.7126e-01, -4.9706e-01, + -2.4641e-01, 1.4800e-01, -3.5420e-01, 2.5300e-01, 3.1038e-01, + -3.8231e-01, -2.1472e-01, -4.8862e-01, 1.2613e-01, -4.8684e-01, + -1.1218e+00, 4.5354e-01, 7.3050e-02, -7.5414e-01, 6.2755e-03, + 1.9597e-01, -1.7957e-01, -9.1913e-01, -9.3182e-02, 5.8686e-01, + -4.8479e-01, 2.5590e-01, 1.0595e+00, 2.7002e-01, 7.2647e-01, + 5.1018e-01, 1.9252e-01, 8.9410e-03, 3.9983e-01, -9.9736e-02, + -4.6141e-01, 1.7405e-01, 2.8290e-01, 6.3410e-01, 6.8365e-01, + 8.1244e-01, 5.7984e-01, 4.0560e-01, 3.8197e-02, -4.1310e-01, + 4.8830e-01, -5.7483e-01, 4.4491e-01, -5.7979e-01, -5.8200e-01, + -7.5095e-02, 4.5962e-01, -7.1681e-01, 9.5295e-02, 6.2722e-01, + 2.8345e-01, 2.8936e-01, 2.2348e-01, 2.3233e-01, -8.2202e-02, + 1.9226e-01, 1.9503e-02, 4.2562e-01, -5.9108e-01, -2.3255e-01, + 4.6791e-01, -4.6673e-01, 6.5900e-01, -6.2156e-01, 1.9633e-01, + 1.7809e-01, 5.3349e-01, -6.9414e-02, 9.1718e-01, 6.4407e-01, + 5.0983e-01, 2.0029e-02, 7.9499e-01, 2.8185e-02, -1.5870e-01, + -6.7845e-02, 4.3046e-01, 1.2244e-01, -7.2143e-01, 4.3565e-01, + -2.2752e-01, 8.3008e-02, 7.6862e-02, 3.0861e-01, 3.5815e-01, + 1.2071e-02, -2.8832e-01, -5.2904e-01, 6.2650e-02, 1.1320e+00, + -2.3723e-01, -2.9065e-01, -2.4751e-01, -9.2235e-01, -6.6487e-02, + 1.7130e-01, 3.2095e-02, 3.2017e-01, -2.9495e-01, 1.4542e-01, + -4.9789e-01, -4.8924e-01, -1.7171e-01, 2.4293e-01, -3.5534e-01, + 1.9945e-01, 4.8569e-02, 6.2335e-01, 4.7964e-01, 2.9458e-01, + 1.5449e-01, -6.4109e-01, 1.3690e-01, 3.9368e-01, 1.4828e-01, + 1.4641e-01, 4.3600e-01, 1.0510e-01, -5.8869e-02, 6.5924e-01, + 3.5055e-02, -5.6399e-01, 4.5772e-01, 2.2873e-01, -2.3484e-01, + -3.7558e-01, 8.9041e-01, -2.2871e-01, -3.6462e-01, -1.2101e-01, + -2.2578e-01, -1.3908e-01, 7.4223e-02, 3.0451e-01, 1.8741e-02, + 3.2846e-01, -4.0961e-01, -3.4544e-01, -6.2249e-01, 2.4417e-01, + -2.0547e-01, 1.7231e-01, 3.1831e-01, 6.4965e-01, 3.9325e-01, + -8.2503e-01, 6.0124e-01, 2.8477e-01, -1.9307e-01, 5.5779e-01, + -9.2765e-02, 1.7029e-02, -4.8561e-01, 2.5786e-01, -2.5821e-01, + 3.1338e-01, -1.2206e-01, 6.9799e-01, -3.6036e-01, 4.3696e-01, + 9.1485e-01, 1.1678e+00, 1.5267e-01, 6.1679e-01, 4.5000e-04, + -8.2172e-01, -8.6353e-01, -8.9458e-02, 7.0973e-01, -1.7341e-01, + -6.2408e-01, 1.0639e-01, -6.4188e-02, -1.0023e-01, 4.2501e-01, + 2.7844e-01, -1.3440e-01, -4.0398e-01, -7.8034e-01, -2.8684e-01, + 2.7194e-01, 1.2444e-01, 4.0351e-01, -7.6288e-02, -1.9768e-01, + -4.2178e-01, 4.4424e-01, 1.3352e-01, -9.9392e-02, 6.9779e-01, + 1.6888e-01, 1.0123e+00, -6.2880e-01, 4.5987e-01, 4.4244e-01, + -5.3807e-01, 5.4973e-01, -1.0677e-01, -4.1490e-02, 2.9562e-01, + 5.8947e-02, 2.2215e-02, -6.4694e-01, 7.8909e-02, -2.7255e-01, + 3.1557e-01, 6.9443e-01, -5.6689e-01, -5.3598e-01, 1.7104e-01, + -2.7369e-01, 3.5588e-01, 3.2884e-01, -1.1530e-03, 4.7344e-01, + -2.1759e-01, -5.2551e-01, -3.0974e-02, 1.6733e-01, 3.1815e-01, + -1.3519e-01, 1.9444e-01, 5.8109e-01, -5.5995e-02, 1.9733e-01, + -4.8083e-01, 1.2766e-01, 3.8752e-01, -6.7796e-01, 2.5224e-01, + 2.2199e-01, -2.1854e-01, -5.9393e-03, 3.2313e-01, 2.8912e-01, + 9.3591e-02, 9.0599e-02, -5.3521e-01, 1.5640e-01, -1.7252e-01, + 3.9477e-01, 3.5321e-01, 8.6974e-02, -2.4951e-01, 3.8474e-01, + 1.4969e-01, -4.9545e-01, 8.4216e-01, 7.0573e-02, -3.3233e-01, + 2.1082e-01, 3.2282e-01, 1.4043e-01, -2.4110e-01, -2.8888e-01, + 2.5481e-01, -6.9019e-01, 2.9674e-01, 1.1068e-01, -2.5481e-01, + 1.7578e-01, -1.7300e-01, -2.6870e-01, -1.2870e-01, -3.5365e-01, + 4.1577e-02, 9.9443e-02, -1.6780e-01, -1.1617e-01, 1.6097e-01, + 4.3591e-01, 6.7614e-01, -3.5089e-02, 2.8122e-02, 2.7661e-01, + 3.7887e-01, 1.0609e+00, 7.0093e-01, -7.1668e-03, 7.7913e-01, + -4.2864e-02, 4.7460e-01, -1.0076e+00, 1.0556e-01, -1.8160e-01, + -7.2417e-02, 1.0140e+00, -3.5696e-01, -7.3666e-01, -2.3020e-01, + -3.2148e-01, 5.2983e-01, -3.2121e-01, 1.4286e-02, 6.7987e-01, + -6.5973e-01, -2.0075e-01, 3.7226e-01, 1.6172e-01, -3.3971e-01, + -1.9281e-01, -5.0010e-02, 9.4485e-01, -1.0869e-01, 6.2613e-01, + -1.2510e-01, 1.0464e+00, -5.2931e-01, 6.5617e-02, 1.1157e-01, + 2.1925e-01, 7.1122e-02, -5.7915e-01, -2.9074e-01, -1.2102e-01, + -1.3839e-01, -1.4838e-01, 4.2684e-02, -8.2936e-02, -1.4279e-01, + -1.2063e-02, 3.2719e-01, -4.2521e-01, -2.4905e-01, -4.9499e-02, + -5.2031e-02, -1.7989e-01, -4.2499e-02, 6.8209e-01, -1.5275e-01, + 4.2563e-01, 6.6697e-02, 1.5147e-01, 1.8749e-01, 8.2221e-01, + -2.5794e-01, 8.3046e-01, 2.4105e-01, 6.1676e-01, 3.9542e-01, + -2.3539e-01, -3.6405e-01, -6.6711e-02, -2.9740e-01, 9.6638e-02, + -4.3095e-01, 1.6043e-01, 8.7313e-02, 5.2172e-01, -1.8327e-01, + -8.9197e-02, -1.9214e-01, 2.0663e-01, -3.8714e-01, 3.1245e-01, + -5.1035e-02, -1.3826e-01, 2.8931e-01, 8.6621e-01, 3.4779e-01, + 6.2924e-01, 5.0303e-01, -1.4128e-02])Parameter containing: +tensor([[ 0.0033, -0.0180, 0.0063, ..., 0.0171, 0.0053, 0.0176], + [ 0.0050, 0.0288, 0.0542, ..., 0.0377, 0.0121, -0.0257], + [ 0.0002, -0.0528, 0.0353, ..., 0.0037, 0.0121, 0.0060], + ..., + [ 0.0066, 0.0045, 0.0136, ..., 0.0031, 0.0118, -0.0052], + [-0.0037, 0.0018, -0.0075, ..., 0.0004, -0.0041, 0.0008], + [-0.0034, 0.0127, -0.0073, ..., 0.0064, -0.0214, -0.0094]])Parameter containing: +tensor([ 0.3430, -0.0836, 0.0424, ..., -0.0128, -0.0226, 0.0145])Parameter containing: +tensor([[ 0.0056, -0.0199, 0.0132, ..., -0.0029, 0.0242, -0.0021], + [ 0.0218, 0.0037, 0.0028, ..., -0.0120, 0.0019, 0.0024], + [ 0.0007, -0.0039, -0.0249, ..., -0.0206, 0.0182, 0.0032], + ..., + [-0.0161, -0.0262, -0.0194, ..., 0.0123, 0.0224, 0.0002], + [-0.0050, -0.0197, 0.0011, ..., -0.0005, 0.0069, 0.0091], + [ 0.0174, -0.0362, -0.0287, ..., -0.0099, -0.0143, 0.0133]])Parameter containing: +tensor([ 3.0563e-02, 1.5022e-02, -2.1133e-02, 1.6357e-02, -6.0081e-03, + -1.1963e-02, 3.7994e-02, 4.7791e-02, -1.2894e-02, 1.1734e-02, + -2.5070e-02, 1.4830e-04, 5.0049e-03, 1.2337e-02, -3.2715e-02, + 4.9316e-02, 1.1742e-02, 3.3302e-03, 5.0724e-05, 4.1748e-01, + -2.4628e-02, -1.8173e-02, -3.1830e-02, -2.8961e-02, -3.6240e-03, + -9.0256e-03, 1.6846e-02, 2.3712e-02, 5.6335e-02, -2.2003e-02, + 2.4048e-02, -2.3163e-02, -1.5518e-02, -1.2665e-02, -7.5340e-03, + 2.7512e-02, 7.3128e-03, 1.2070e-02, -2.4948e-02, -2.6566e-02, + -3.5828e-02, 2.5146e-02, -5.1208e-02, 2.1088e-02, -4.1779e-02, + 6.1913e-03, 5.8746e-02, -4.7882e-02, -1.5106e-03, 4.1168e-02, + 8.9417e-02, -3.4241e-02, 6.4354e-03, 2.9251e-02, 6.4049e-03, + 4.6082e-02, 1.7426e-02, -8.8654e-03, 2.8778e-02, -1.1444e-02, + -2.6718e-02, 6.9618e-03, -1.6754e-02, 4.9713e-02, -6.6467e-02, + -6.0081e-03, -9.1858e-03, 1.0658e-02, -1.7029e-02, -1.0211e-01, + -1.3550e-02, 7.3013e-03, 3.1830e-02, -5.3711e-02, -2.3651e-03, + -4.2297e-02, 2.1667e-02, 1.8265e-02, -7.1167e-02, -1.6129e-02, + -3.0380e-02, 4.4189e-02, 1.9302e-02, -2.7542e-02, 2.7588e-02, + 1.2352e-02, -6.1981e-02, 1.2680e-02, -2.4841e-02, -8.7976e-04, + 2.7847e-02, -8.7280e-03, -2.4399e-02, 4.6173e-02, 3.9154e-02, + -2.4811e-02, 2.4765e-02, -3.8788e-02, 2.2308e-02, 4.7363e-02, + 5.9845e-02, -1.8115e-01, -3.4698e-02, -2.8183e-02, -1.3344e-02, + 8.3303e-04, 2.7466e-02, 8.1558e-03, -9.5081e-04, 3.9001e-02, + -2.9678e-03, -1.5572e-02, 1.2978e-02, 2.1011e-02, 1.3420e-02, + -3.3478e-02, 2.9922e-02, -1.8646e-02, 6.6467e-02, -3.5191e-04, + 4.0405e-02, -2.4429e-02, -7.8430e-03, 3.7262e-02, -4.1382e-02, + 9.2163e-03, 3.3844e-02, 2.5925e-02, -3.6041e-02, 3.5614e-02, + -1.3481e-02, 1.8936e-02, 1.7715e-02, -1.8723e-02, 4.6417e-02, + 2.0935e-02, -7.8659e-03, 3.4393e-02, 4.5288e-02, -3.3875e-02, + -1.4618e-02, -2.1305e-03, -8.7891e-03, -7.1411e-02, -3.6316e-02, + -2.5311e-03, 3.0899e-02, -2.4307e-02, -1.3008e-02, 5.2917e-02, + 3.5004e-02, 4.5197e-02, 1.9623e-02, -1.0841e-02, -2.4529e-03, + 3.7201e-02, 3.5339e-02, 1.3718e-02, 1.1375e-02, -2.6093e-02, + -3.7518e-03, -1.6266e-02, 1.4168e-02, -6.6345e-02, -2.2766e-02, + -1.7738e-04, -9.1370e-02, -1.2688e-02, -1.9135e-02, -2.5482e-02, + -2.1301e-02, -3.9154e-02, -1.4015e-02, 5.5115e-02, 1.1574e-02, + -4.7646e-03, 1.2192e-02, 2.0466e-03, -7.8430e-03, -2.8397e-02, + 6.3667e-03, 5.3024e-03, -6.4392e-03, -3.0655e-02, 3.6640e-03, + 3.9307e-02, 3.6133e-02, -7.0984e-02, 3.3264e-02, -2.8553e-03, + 3.6430e-03, 2.4078e-02, -3.6438e-02, 4.3945e-03, -5.9891e-03, + -6.0608e-02, 6.8283e-03, -2.0050e-02, -1.4502e-01, -5.0232e-02, + -2.8580e-02, 5.2216e-02, 3.1952e-02, -1.9638e-02, -1.4832e-02, + 1.3763e-02, 9.1980e-02, 3.2330e-03, 2.6703e-02, 1.0635e-02, + -2.0584e-02, -9.4528e-03, 1.0681e-02, 2.6474e-02, 5.6305e-03, + 8.0414e-03, 4.0321e-03, 4.0771e-02, 2.0218e-02, 2.9612e-04, + 2.7405e-02, -1.2093e-02, 1.5450e-02, 6.4888e-03, 1.6556e-02, + -2.0569e-02, -2.1271e-02, -9.4910e-03, 7.6942e-03, 1.9348e-02, + 7.6714e-03, 1.1742e-02, 9.8877e-03, -1.1467e-02, -4.6204e-02, + 2.9160e-02, 1.8372e-02, 2.3438e-02, 2.8290e-02, -3.2745e-02, + 1.2901e-02, -2.0935e-02, 3.3894e-03, -1.1246e-02, -2.3041e-02, + 1.7990e-02, 3.5400e-02, -1.7273e-02, 1.7090e-02, -4.0674e-04, + -1.7609e-02, 5.5328e-02, 3.3512e-03, -8.0795e-03, -2.8442e-02, + -4.3907e-03, -1.1787e-02, 6.7329e-03, 1.2016e-02, 7.5836e-03, + -2.9266e-02, 5.1147e-02, 4.1656e-02, -1.0422e-02, -1.1787e-03, + 6.7322e-02, -4.6326e-02, 3.3722e-02, 2.8801e-03, -4.9500e-02, + 3.8391e-02, -5.7297e-03, -4.5227e-02, -1.4793e-02, -3.0579e-02, + -9.2926e-03, 1.5287e-03, -1.3947e-02, -2.8183e-02, 1.8951e-02, + -9.3555e-04, -8.1177e-03, -3.7781e-02, 9.2087e-03, -2.1896e-02, + 9.0256e-03, 2.8885e-02, 1.8280e-02, -1.7990e-02, -3.0457e-02, + 1.5869e-02, 6.1417e-03, 2.1851e-02, -1.4830e-03, 5.3619e-02, + 1.2711e-02, 3.5950e-02, 1.3435e-02, 1.3573e-02, -1.9135e-02, + -1.1078e-02, 1.0445e-02, 2.7786e-02, -3.2501e-02, 2.6264e-03, + -1.0414e-02, -2.8107e-02, -5.7465e-02, -1.2390e-02, -1.8738e-02, + -9.8190e-03, -4.6570e-02, 4.6875e-02, -1.9806e-02, -4.6082e-03, + -6.2683e-02, -2.6245e-02, -2.0050e-02, -1.5404e-02, 4.7302e-03, + 5.3375e-02, 4.2847e-02, -4.5746e-02, 8.3160e-03, 1.4453e-01, + 2.1973e-02, 5.1758e-02, -6.7200e-02, -3.7098e-03, 3.9825e-02, + 7.1449e-03, -7.8354e-03, -2.8580e-02, 4.2755e-02, 5.2719e-03, + 9.1705e-03, -5.1117e-03, 1.6525e-02, 1.8982e-02, 9.0332e-03, + -2.6703e-02, -1.9089e-02, -2.3727e-02, 1.7212e-02, -9.4757e-03, + -3.8849e-02, -2.0828e-02, -8.3313e-03, -1.8707e-02, -3.2562e-02, + 9.6970e-03, 6.1150e-03, 2.6825e-02, 8.3740e-02, -2.2720e-02, + -3.9642e-02, -4.4365e-03, -2.4857e-02, 3.0396e-02, 8.9111e-03, + 5.9875e-02, 2.7298e-02, -3.1921e-02, -6.9160e-03, 1.5610e-02, + 3.0518e-02, 3.3295e-02, -3.5187e-02, -4.2023e-02, -8.5297e-03, + -3.5583e-02, -1.6861e-02, 4.6265e-02, -7.0557e-02, -3.6499e-02, + 2.5085e-02, 2.0981e-02, 3.0685e-02, 2.8503e-02, -7.3586e-03, + 3.8544e-02, 5.4199e-02, 2.5284e-02, -1.6983e-02, 4.5074e-02, + 5.7831e-03, -2.1332e-02, 6.0654e-03, 6.9771e-03, -6.9389e-03, + -2.3346e-02, -3.6377e-02, -2.4643e-02, -3.4943e-02, 5.5267e-02, + -2.8488e-02, 4.8889e-02, 5.4207e-03, 3.5492e-02, -1.7471e-02, + 1.9699e-02, -3.4058e-02, -1.1841e-02, 3.0594e-02, -1.9592e-02, + 9.1324e-03, 1.2169e-02, -4.5105e-02, -6.2622e-02, -2.1561e-02, + -2.1648e-03, -1.4114e-02, -2.3193e-02, 2.5360e-02, 1.1749e-01, + 1.9470e-01, 1.6983e-02, 3.8239e-02, -5.1300e-02, -1.4488e-02, + -8.2169e-03, -2.2324e-02, 2.3209e-02, 5.4199e-02, -1.3283e-02, + 2.9816e-02, 1.4275e-02, -1.6251e-02, 5.5504e-03, -2.1973e-03, + 8.6136e-03, -1.1513e-02, 2.7313e-02, 3.3966e-02, -3.6926e-02, + 1.5045e-02, 4.6631e-02, 4.9591e-03, -5.6152e-02, -1.8768e-02, + 8.4076e-03, -6.5308e-03, 2.0355e-02, -1.8872e-01, -2.5024e-02, + -3.5156e-02, 8.7128e-03, 3.6736e-03, 2.4811e-02, -3.6407e-02, + 2.1194e-02, -7.6599e-03, 4.4098e-02, -2.0309e-02, 2.6226e-03, + 1.0490e-03, 3.2990e-02, -7.0435e-02, -2.0275e-03, -2.9583e-03, + -3.7823e-03, 7.5531e-03, 2.1912e-02, -3.1834e-03, -3.5736e-02, + 1.2581e-02, 3.1799e-02, -2.8992e-02, 1.6189e-04, 4.4220e-02, + -7.6485e-03, 6.2294e-03, -2.1684e-04, 1.3824e-02, 3.4607e-02, + 1.7349e-02, 2.7435e-02, 2.9144e-02, 1.6586e-02, 4.3182e-02, + 1.6373e-02, 3.4088e-02, -2.6337e-02, -2.5360e-02, -2.1591e-02, + 1.2138e-02, 3.3966e-02, -1.6785e-02, 3.3016e-03, 4.3030e-02, + 5.4352e-02, -1.6661e-03, -2.3773e-02, 6.8779e-03, -2.1927e-02, + -1.1436e-02, 2.0020e-02, -8.5449e-04, -3.5370e-02, -8.2445e-04, + 1.9207e-03, -5.0293e-02, -2.7145e-02, 2.3254e-02, -6.4125e-03, + -6.0425e-02, 4.0527e-02, -1.0811e-02, -4.1313e-03, 9.3842e-03, + 1.8646e-02, 3.0502e-02, -1.2569e-03, 9.6436e-03, -6.1464e-04, + 1.0376e-02, -8.2855e-03, 5.0323e-02, 3.0655e-02, -3.9795e-02, + -2.5692e-03, 1.4099e-02, -1.9882e-02, 2.7069e-02, -3.4363e-02, + -1.5350e-02, 2.1240e-02, -3.4088e-02, 1.7563e-02, 2.0416e-02, + -4.5433e-03, -1.6800e-02, 2.2903e-02, 1.9836e-02, 3.5645e-02, + -2.2095e-02, -2.8473e-02, 3.1586e-02, 3.9978e-03, 1.6434e-02, + -2.5055e-02, -2.1393e-02, -3.7048e-02, 3.2928e-02, -7.7576e-02, + 1.9775e-02, -3.0182e-02, -3.0579e-02, -2.3712e-02, 4.1718e-02, + -5.3955e-02, -1.2665e-02, 2.3529e-02, 1.5541e-02, 3.0777e-02, + -3.0563e-02, 2.6566e-02, 3.1586e-02, 9.0561e-03, -1.9897e-02, + -4.0474e-03, 4.6295e-02, 2.7832e-02, -2.1439e-02, 6.4575e-02, + -1.9226e-02, 1.3092e-02, 1.3664e-02, -4.5532e-02, 2.2354e-02, + -2.8320e-02, 1.1826e-02, 4.0649e-02, 1.7624e-02, 3.6804e-02, + 1.0056e-02, 1.4320e-02, 8.7585e-03, -3.9673e-03, 9.0714e-03, + 1.8906e-02, 5.5115e-02, 8.3923e-03, 1.2184e-02, -6.7062e-03, + 1.6266e-02, 5.0659e-02, 1.7426e-02, 4.1351e-02, 1.8711e-03, + 6.3362e-03, -2.0966e-02, 2.3327e-03, 4.7058e-02, 3.5553e-02, + -1.1131e-02, -4.3274e-02, -2.3331e-02, -6.9618e-03, -1.7822e-02, + -3.7415e-02, -2.8198e-02, -2.1149e-02, -6.0141e-05, -5.2704e-02, + 5.2567e-03, -5.4382e-02, 1.0933e-02, -5.8807e-02, 1.4450e-02, + 6.4926e-03, -5.9418e-02, 2.5940e-02, 2.9053e-02, 2.6855e-03, + 2.2385e-02, 5.7793e-03, 1.7624e-02, -9.3613e-03, -1.0327e-01, + -9.1858e-03, 2.5253e-02, 2.8381e-02, 2.8564e-02, -1.3519e-02, + 2.2125e-02, -8.2245e-03, 3.4882e-02, -2.5787e-02, -1.0826e-02, + 1.3245e-02, -1.4206e-02, -3.7445e-02, -4.8218e-02, -4.0039e-02, + 4.6753e-02, -5.1069e-04, -2.1774e-02, 3.0930e-02, -6.6406e-02, + -2.5925e-02, -3.2410e-02, 7.2266e-02, -4.3671e-02, 9.2010e-03, + 3.1555e-02, 2.4765e-02, -6.3229e-04, -5.9891e-03, 2.8934e-03, + -2.4963e-02, -4.7607e-03, 7.7515e-03, -2.6108e-02, 7.8430e-03, + -2.4933e-02, -4.3518e-02, 3.5839e-03, 2.2598e-02, 1.8845e-02, + -1.0582e-02, -1.5945e-02, 3.8834e-03, 2.5909e-02, 6.1249e-02, + 2.2537e-02, 1.1930e-03, 4.0375e-02, -1.0376e-02, -1.4725e-02, + 2.5299e-02, 3.1250e-02, -9.2773e-03, -7.6294e-03, -3.4058e-02, + 3.1647e-02, -3.0411e-02, 1.0216e-02, -4.1870e-02, -2.3346e-03, + -1.7715e-02, 1.6321e-01, -1.7548e-02, 1.6037e-02, -5.9052e-02, + 8.2474e-03, 4.6967e-02, 1.0948e-02, -2.3087e-02, -3.6835e-02, + -2.7222e-02, -6.8115e-02, 1.6403e-02, 1.6785e-02, -1.6510e-02, + -5.9547e-03, 2.3270e-02, -5.1849e-02, -2.9083e-02, -5.3223e-02, + 1.1513e-02, 1.6281e-02, 6.8741e-03, 3.1982e-02, -2.7908e-02, + 2.6627e-02, 4.6051e-02, -1.7532e-02, -5.6534e-03, -9.1782e-03, + 2.6505e-02, 2.7390e-02, -1.2978e-02, -1.3390e-02, -2.7512e-02, + -4.7852e-02, -3.6346e-02, 1.0300e-02, -1.5511e-02, -3.6133e-02, + -4.7821e-02, -1.3428e-02, -6.1493e-03, -8.7051e-03, 2.0996e-02, + 1.2634e-02, 2.6264e-03, 2.4139e-02, 9.1267e-04, -3.4637e-03, + 8.2321e-03, -4.6997e-02, -3.5278e-02, 6.2523e-03, 2.5005e-03, + 1.4305e-02, -1.4420e-02, -7.4816e-04, 1.2001e-02, 4.5166e-03, + -4.9194e-02, 1.4847e-02, 2.1591e-03, -1.6916e-04, 1.1780e-02, + 4.7058e-02, 4.8462e-02, -1.8280e-02, -3.1776e-03, -3.6560e-02, + -5.1239e-02, 1.7960e-02, 9.1019e-03, -3.3142e-02, -1.5480e-02, + -1.3832e-02, -1.0956e-01, -3.7975e-03, -5.7343e-02, -1.4809e-02, + -9.6893e-03, 4.9866e-02, -5.1880e-02, 4.2175e-02, -2.3911e-02, + 1.9703e-03, -2.2034e-02, -3.8261e-03])Parameter containing: +tensor([1.0895, 1.1491, 1.1280, 1.0222, 1.1739, 1.0663, 1.1026, 1.1196, 1.0610, + 1.0362, 1.0344, 1.1302, 1.1394, 1.1145, 0.9793, 1.2097, 1.0334, 1.2245, + 1.1241, 0.2753, 1.0395, 1.0472, 1.1671, 1.0659, 1.0802, 1.1507, 1.0881, + 1.0900, 1.1905, 1.2015, 1.1597, 1.0814, 1.0713, 1.1395, 1.1696, 1.3321, + 1.0179, 1.0746, 1.1628, 1.0975, 1.0572, 0.9831, 1.1066, 1.0873, 1.2887, + 1.0379, 1.0448, 1.2154, 1.0990, 1.0652, 1.1241, 1.1402, 1.1011, 1.1263, + 1.1139, 0.9162, 1.0919, 1.2903, 1.1040, 1.1012, 1.1183, 1.0331, 1.1440, + 1.1119, 1.1228, 1.1293, 0.9695, 1.2707, 1.1637, 1.2276, 1.0280, 1.0591, + 2.1889, 1.0572, 1.1401, 1.0796, 1.1198, 1.0126, 1.1814, 1.0254, 1.1224, + 1.1836, 1.1246, 1.0903, 1.2133, 1.1445, 1.1274, 1.1002, 1.1227, 1.1008, + 1.1070, 1.1492, 1.1207, 1.1143, 1.1237, 1.0610, 1.0389, 1.1207, 1.0998, + 1.1228, 1.1145, 1.0209, 1.0081, 1.0739, 1.1013, 1.1098, 1.0723, 1.2898, + 1.1376, 1.1546, 1.0668, 1.0175, 0.9574, 1.0351, 1.1479, 1.0740, 1.1223, + 1.1233, 1.1137, 1.1355, 1.1567, 1.0250, 1.1973, 1.0578, 1.1464, 1.1832, + 1.0966, 1.0225, 1.1626, 1.0823, 1.0742, 1.0992, 1.0003, 1.0753, 1.1261, + 1.1817, 1.2336, 1.2113, 1.0655, 1.0400, 1.0588, 1.0387, 1.1333, 1.0340, + 1.0439, 1.0065, 1.0902, 1.1669, 1.0910, 1.1070, 1.1481, 1.1906, 1.0798, + 1.1427, 1.1982, 0.9619, 1.2056, 1.1212, 1.1140, 1.0979, 1.1636, 1.1301, + 1.0640, 1.0920, 1.1086, 1.0510, 1.1392, 1.1131, 1.1167, 1.1602, 1.0656, + 1.1571, 1.0212, 1.1582, 1.1673, 1.1256, 1.0929, 1.2417, 1.1797, 1.0986, + 1.1346, 1.2516, 1.1074, 1.0885, 1.2958, 1.1755, 1.0924, 1.5735, 1.1752, + 1.1061, 1.2396, 0.9582, 1.1471, 1.0999, 1.1809, 1.0361, 1.0556, 1.1514, + 4.4042, 1.1847, 0.9946, 1.0696, 1.2486, 1.2341, 1.0585, 1.1061, 1.1108, + 1.0926, 1.1026, 1.2013, 1.1338, 1.1785, 1.0487, 1.1234, 1.1309, 1.1504, + 1.0983, 1.0937, 1.1015, 1.1060, 1.1004, 1.1597, 1.0480, 1.0293, 1.1476, + 1.1468, 1.0856, 1.1552, 1.1258, 1.1367, 1.0792, 1.1552, 1.2199, 1.0825, + 1.0273, 1.0731, 1.1292, 1.0727, 1.1044, 1.0763, 1.1227, 1.1270, 1.2005, + 1.1391, 1.2601, 1.0759, 0.9800, 1.0601, 1.0598, 1.1195, 1.1037, 1.1359, + 0.9948, 1.0792, 1.1656, 1.0665, 1.0727, 1.0877, 1.0512, 0.9722, 1.0822, + 0.9968, 1.0633, 1.0913, 1.1195, 1.1480, 1.0806, 1.1148, 1.1943, 1.1331, + 1.2171, 1.1391, 1.0036, 1.0460, 1.0607, 1.0629, 1.1052, 1.1523, 1.1389, + 1.0923, 1.0779, 1.2070, 1.1827, 1.1731, 1.1759, 1.0740, 1.1694, 1.0501, + 1.1126, 1.0367, 1.0730, 1.1285, 1.0258, 1.1576, 1.1315, 1.0176, 1.0823, + 1.1763, 1.2783, 1.0968, 1.2560, 1.1037, 1.1538, 1.1152, 1.0765, 1.0244, + 1.0208, 1.0763, 1.1061, 0.9937, 1.1177, 1.1626, 1.0887, 1.0125, 1.1349, + 1.1448, 1.1865, 1.0696, 1.1315, 1.2748, 1.0957, 1.2077, 1.0925, 1.0961, + 0.4362, 0.8269, 1.1157, 1.1701, 1.0544, 1.1217, 1.1581, 0.9604, 1.1525, + 1.0654, 1.0816, 1.0749, 0.9441, 1.0297, 1.1223, 1.1353, 1.1195, 1.0887, + 1.1435, 1.1358, 1.1909, 1.1669, 1.1307, 1.0546, 1.1769, 1.1563, 1.1995, + 1.0511, 1.0491, 1.0806, 0.9967, 1.1176, 1.0901, 1.0295, 1.1788, 1.1354, + 1.0126, 1.0746, 1.0824, 1.0353, 1.2085, 1.1083, 1.0764, 1.0563, 1.0498, + 1.1317, 1.1567, 1.1507, 1.0341, 1.1843, 1.0600, 1.0345, 1.1261, 1.1050, + 1.0773, 1.0134, 1.1439, 1.3424, 1.1876, 1.0634, 1.1512, 1.0344, 1.0040, + 1.1639, 1.0634, 1.1018, 1.2177, 1.0125, 1.1937, 0.8632, 1.2447, 1.1296, + 1.0644, 1.1010, 1.1356, 1.1632, 1.0518, 1.1655, 1.0770, 1.1398, 1.1057, + 1.2451, 1.1343, 1.1630, 1.1435, 1.0383, 1.1496, 1.1279, 1.2263, 1.0600, + 0.9016, 1.7528, 1.1242, 1.1495, 1.1949, 1.1088, 1.0201, 1.1023, 1.0593, + 1.1124, 1.0015, 0.9765, 1.1217, 1.0880, 1.0674, 1.0305, 1.1003, 1.0986, + 1.1196, 1.0115, 0.9515, 1.0412, 1.0447, 1.1111, 1.2276, 1.0558, 1.0263, + 1.1577, 1.1810, 1.1464, 1.1950, 1.1154, 1.1469, 1.0784, 1.1425, 1.1325, + 0.9748, 1.1008, 1.2164, 1.1759, 1.1185, 1.0715, 1.0538, 1.1341, 1.1335, + 1.1417, 1.0734, 1.1603, 1.1504, 1.0499, 1.0453, 1.1896, 1.0659, 1.1221, + 1.0744, 1.0764, 1.1328, 1.0917, 0.9529, 1.1041, 1.1418, 1.0918, 1.0488, + 1.0744, 1.1292, 1.1925, 1.0147, 1.1584, 1.1822, 1.0020, 1.1705, 1.0952, + 1.1119, 1.1396, 1.0830, 1.0932, 1.0332, 1.1715, 1.1709, 1.2496, 1.0912, + 1.1014, 1.0597, 0.9969, 1.1519, 1.0397, 1.1136, 1.1051, 1.2365, 1.0718, + 0.9613, 1.0971, 0.9209, 1.2023, 1.0763, 1.1217, 1.0613, 1.1238, 1.1573, + 1.1401, 1.2079, 1.0841, 1.1320, 1.0662, 1.0452, 1.0370, 1.1594, 1.1060, + 1.2394, 1.0779, 1.1139, 1.0556, 1.0827, 1.0480, 1.1069, 0.9860, 1.2285, + 1.1220, 1.2044, 1.0099, 1.1480, 1.2758, 1.1016, 1.0456, 1.0680, 1.0806, + 1.1920, 0.9602, 1.1260, 1.0967, 1.0894, 1.0856, 1.0547, 1.1553, 1.1625, + 1.1529, 1.0368, 1.1202, 1.2378, 1.0886, 1.2828, 1.1100, 1.0849, 1.1190, + 1.0355, 1.1792, 1.1415, 1.1741, 1.1189, 1.1158, 1.0776, 1.2510, 1.2408, + 1.0956, 0.9678, 1.2549, 1.1548, 1.1318, 1.1330, 1.0840, 1.0474, 1.0956, + 1.1239, 1.0163, 1.1235, 1.1863, 1.1862, 1.1465, 1.0752, 1.0207, 1.1893, + 1.2338, 1.1984, 1.0556, 1.0000, 1.1678, 1.0845, 1.6301, 1.1264, 1.0290, + 1.0311, 1.0892, 1.0607, 1.0999, 1.0946, 1.2030, 1.0882, 1.1124, 1.1719, + 1.1915, 1.0641, 1.0739, 1.0830, 1.1107, 1.1630, 1.1085, 1.1904, 1.0162, + 1.1530, 1.0941, 1.2059, 1.0574, 1.1017, 1.0775, 1.2459, 1.0807, 1.0607, + 1.2741, 1.1496, 1.2021, 1.0923, 1.0298, 1.1785, 1.0046, 1.0668, 1.1532, + 1.1207, 1.1642, 1.0606, 1.1673, 1.1646, 1.0249, 0.9916, 1.0798, 0.9993, + 0.9868, 1.0063, 1.1514, 1.3287, 1.0684, 1.0735, 1.1959, 1.1412, 1.2029, + 1.2507, 1.0586, 1.0936, 1.1616, 1.2078, 1.1429, 1.1928, 1.0335, 1.1191, + 1.1077, 1.0536, 1.1241, 1.2090, 1.1281, 1.1716, 1.1917, 1.0936, 1.2262, + 1.2455, 1.1533, 1.1363, 1.1134, 1.2211, 1.0558, 1.0892, 0.9781, 1.0907, + 1.0923, 1.1208, 1.1447, 1.0259, 1.0367, 1.2340, 0.3527, 1.1055, 1.0547, + 1.1139, 1.0485, 1.1752, 1.1343, 1.1493, 1.1050, 1.0833, 1.0571, 1.1405, + 1.1860, 1.1253, 1.0112, 1.0885, 1.1551, 1.0193, 1.0367, 1.0937, 1.2202, + 1.1137, 1.0781, 1.0423, 1.0075, 1.1260, 1.1049, 1.1311, 1.0788, 1.1301, + 1.1468, 1.0984, 1.0819, 1.0745, 1.1147, 1.0569, 1.1257, 1.1980, 1.2418, + 1.1304, 1.0243, 1.1301, 1.1706, 1.1504, 1.0834, 1.0783, 1.1954, 1.1526, + 1.1944, 1.0511, 1.1842, 1.1555, 1.0770, 1.0528, 1.0551, 1.1226, 1.1294, + 1.0864, 1.1444, 1.0887, 1.0853, 1.2392, 1.0416, 1.0600, 1.1678, 1.1766, + 1.1449, 1.1450, 1.2752, 1.1177, 1.1140, 1.1532, 1.1915, 1.1941, 1.0838, + 1.1238, 0.9961, 1.0554, 1.1288, 1.1269, 1.1024, 1.1493, 1.1472, 1.1067, + 1.1309, 1.0237, 1.1736])Parameter containing: +tensor([ 1.2977e-01, 3.4533e-02, -1.8398e-01, 1.2173e-02, 5.4290e-02, + 7.0416e-02, 6.9125e-02, 6.3581e-02, -6.3971e-02, 2.2436e-02, + 3.0890e-02, -2.6753e-02, -7.2576e-02, -1.1489e-02, -3.9083e-03, + -2.5749e-02, -4.7372e-03, 3.9891e-02, -4.1990e-02, 2.9299e+00, + -6.0782e-02, 1.1301e-03, 2.0187e-02, 3.1748e-03, -4.5030e-02, + -2.1860e-02, -1.3268e-02, 1.2331e-02, 1.7767e-01, -1.1032e-01, + 6.1784e-02, 1.7215e-02, 2.1386e-03, 3.7935e-02, 1.9422e-02, + -5.7784e-02, 1.7874e-02, -5.3821e-02, -1.6726e-01, -1.2773e-04, + 2.2345e-02, 3.6149e-02, -9.7282e-03, -2.8852e-03, -1.4355e-02, + 7.7343e-03, 5.9046e-02, -6.3718e-02, 3.3771e-02, 5.9758e-02, + 5.2067e-02, -2.4261e-02, 6.2351e-02, 2.6544e-02, 6.7204e-02, + 5.9771e-03, -8.1034e-02, -3.6046e-02, 7.0604e-02, -1.8416e-02, + 1.0626e-02, -3.5582e-02, 2.1963e-02, 3.6199e-02, 3.9028e-02, + 1.8032e-02, 4.5988e-02, 1.1960e-01, -4.5342e-02, 8.4323e-02, + -2.4700e-03, 2.0580e-02, 1.5001e-01, -4.6368e-02, -8.9924e-02, + 3.1414e-02, 4.3287e-04, -6.4596e-02, -3.9071e-02, 3.8661e-02, + -9.9305e-03, 3.0970e-03, -1.4900e-02, 5.3587e-02, -1.1936e-02, + 9.2194e-02, 3.4322e-02, 6.0066e-02, -1.2689e-02, -1.6213e-03, + -3.5178e-02, 2.7978e-02, 2.1645e-02, -3.3493e-02, -9.2206e-03, + -8.8815e-03, -1.5690e-03, -5.0180e-02, -8.9474e-03, 4.4740e-02, + 5.0755e-02, 1.8484e-01, 3.5222e-02, -6.9127e-02, 2.0810e-02, + 2.0733e-02, 1.6405e-02, 3.1962e-04, 6.9797e-03, -2.8417e-02, + -5.4037e-02, -3.7544e-02, 6.0598e-02, 3.6609e-02, 6.1370e-02, + -2.5203e-02, -3.8131e-02, -5.4255e-02, 3.5124e-02, 5.2478e-02, + -7.8859e-02, -1.1023e-01, -5.7933e-02, -2.1626e-02, -1.1320e-01, + 1.1809e-02, -7.8021e-02, 6.3488e-03, -1.6868e-02, -5.0843e-02, + -7.9675e-02, 7.3526e-02, 5.4632e-02, 3.0480e-03, -4.9357e-02, + -7.2188e-03, 4.4292e-03, -5.3419e-03, -2.6700e-02, -3.8512e-02, + 2.4960e-02, 1.5461e-02, 2.2325e-03, -7.1685e-02, 2.8958e-02, + -1.4240e-02, 3.5438e-02, -4.7604e-02, -2.5415e-02, 6.7599e-02, + 1.6583e-01, -4.5225e-02, -7.2389e-03, -1.5869e-02, -4.5997e-02, + -1.9443e-02, 1.2420e-02, -8.4762e-02, 3.5484e-03, -1.5639e-02, + 9.2677e-02, -1.1708e-01, -1.6392e-02, -2.0767e-01, 3.2551e-02, + 9.6369e-03, -7.5081e-02, 4.5103e-02, -5.6623e-02, 5.0668e-03, + -1.5549e-02, 2.9004e-02, 8.0657e-02, 1.4856e-02, 2.5327e-02, + -3.2253e-02, 2.4493e-02, 2.5677e-02, -2.6763e-02, -3.0572e-02, + 2.1989e-02, -6.9528e-02, 8.1583e-02, -2.7302e-02, 2.2262e-02, + -1.3821e-01, 2.7482e-02, 1.3539e-01, -2.5331e-02, -9.1998e-02, + 3.9943e-02, 2.4111e-02, 3.1964e-02, 4.0118e-02, -6.6720e-02, + -2.8206e-02, -1.7699e-02, -1.0273e-01, -7.2117e-01, 2.6851e-02, + -1.3055e-01, 2.8005e-02, 2.8573e-04, 4.3966e-02, 4.6126e-02, + 1.1302e-01, 1.7980e-02, 6.0565e-02, 7.2146e-02, 2.3062e-02, + 3.7915e-03, 1.7058e-01, -1.2183e-02, 1.1138e-02, -3.0831e-02, + -1.5247e-02, -2.0833e-02, 5.8323e-02, -5.0730e-02, -9.2447e-02, + 8.0505e-03, 3.1202e-02, -6.0900e-02, -6.7741e-02, 4.0749e-02, + -6.8300e-02, 3.5966e-03, -4.5552e-02, 6.6899e-02, -1.6725e-02, + 5.5230e-03, 1.6007e-02, 4.6567e-02, 1.9058e-02, -4.2146e-02, + 4.0774e-02, -3.5646e-02, 6.9086e-02, 6.9511e-02, 1.3817e-02, + 3.6827e-02, 5.5455e-02, 3.5128e-02, 2.2640e-02, 2.5301e-02, + 3.7564e-02, 6.2913e-02, 1.1184e-01, 4.4412e-02, -4.5695e-02, + 3.3184e-02, 9.0154e-02, 9.1126e-02, -6.9666e-02, 1.1205e-01, + -3.3497e-02, 2.1879e-02, -1.0857e-01, -5.0966e-03, -2.1120e-02, + -1.6438e-02, 1.3116e-02, 1.7569e-02, -4.8693e-02, -3.2089e-02, + 2.1517e-02, -8.8201e-02, 1.0595e-02, -1.0550e-02, -6.0092e-02, + 8.4827e-03, 4.6030e-02, 5.5492e-02, -3.3597e-02, 5.8026e-04, + 6.9805e-02, -7.8620e-02, -5.8343e-03, 4.6980e-02, -5.9122e-02, + 5.9284e-02, 1.1318e-01, -5.5452e-02, 4.8808e-02, 2.4126e-02, + 2.5154e-02, 5.0583e-03, 2.2864e-04, 6.1358e-02, -8.5796e-02, + -9.8663e-03, -1.4949e-02, 4.5410e-02, 2.3115e-02, 2.4214e-02, + -2.1457e-02, 6.9584e-02, 1.6610e-02, 7.6730e-03, 1.4294e-02, + 1.4369e-02, 1.2196e-02, -1.2681e-02, 6.1613e-03, 1.9278e-02, + -1.4352e-02, 4.2145e-02, -1.1340e-01, 6.2743e-02, -6.3384e-02, + 1.6000e-02, -2.0184e-02, 2.0241e-02, -6.9196e-02, 3.1063e-02, + -1.2378e-01, -3.9613e-03, 1.5876e-02, 7.3869e-02, -3.9551e-02, + -1.2366e-02, 1.5304e-01, -5.1567e-02, 2.8741e-03, -4.6380e-01, + -1.3505e-05, 5.2405e-02, -1.0747e-02, 8.6793e-03, -5.3858e-02, + 1.2035e-01, -7.1143e-02, 4.2165e-02, 4.5190e-04, 4.4525e-02, + 7.1364e-02, -1.9798e-02, 1.1052e-02, -5.2190e-02, 7.9460e-03, + -6.8076e-02, -1.7711e-02, -6.1290e-02, -1.2022e-02, 4.6646e-02, + -1.8673e-02, 6.7254e-02, 4.9145e-02, -1.0084e-03, -4.2727e-02, + 1.5223e-02, 1.3736e-02, 2.6416e-02, 4.2571e-02, -9.0382e-03, + -5.3650e-02, -3.2191e-02, -2.2234e-02, 2.5080e-02, -4.0795e-02, + 4.0941e-02, -1.2991e-02, -2.1576e-02, -5.1419e-02, 1.0806e-02, + -2.5446e-02, 5.2107e-02, 4.3128e-02, -3.1123e-02, 6.3914e-03, + 6.4913e-03, -5.8185e-03, 1.7926e-03, -2.6158e-03, -2.5587e-02, + 5.5986e-02, 8.1868e-02, 2.9394e-02, 1.6433e-02, -2.4356e-02, + -4.0253e-02, 7.8868e-02, 1.7375e-02, 1.4129e-02, -4.3368e-02, + -4.3876e-02, -1.3003e-02, -4.8500e-02, -3.6869e-02, -4.2772e-02, + -7.1692e-03, -1.4630e-01, -3.6647e-02, -7.6028e-02, 9.2405e-02, + 5.4635e-03, 3.8962e-02, 3.1438e-02, 2.9575e-03, 9.4233e-03, + 7.2004e-02, -3.4432e-02, 4.5634e-02, -8.9947e-03, -7.8658e-02, + -9.7364e-02, -7.2371e-02, -3.9383e-02, -3.5334e-02, -1.1011e-02, + -1.1150e-02, 8.6494e-03, 5.0095e-02, -2.7079e-02, -7.5610e-02, + -2.1079e-01, 2.8796e-02, 1.3110e-02, -3.3392e-02, -4.5108e-02, + 4.0402e-04, -2.8353e-02, 3.7843e-02, 1.1853e-02, 2.7821e-02, + 4.5922e-02, -1.2891e-01, -7.2979e-02, 2.3094e-02, 1.4984e-02, + 1.5402e-02, -7.1183e-02, -4.1936e-03, 3.6530e-02, -4.5188e-02, + 2.7603e-02, 2.5460e-02, 5.8202e-02, 8.3031e-02, -4.8382e-02, + 3.7847e-02, -6.2521e-02, -2.2624e-04, -7.8755e-02, 8.5045e-02, + -9.0725e-02, 3.5782e-02, -3.7057e-02, 1.0535e-01, 2.0793e-03, + 1.2219e-02, 8.5230e-03, -7.6473e-02, 1.6504e-02, 3.2562e-02, + -4.8595e-02, 5.7389e-02, 1.1120e-02, -3.3858e-02, -1.0108e-01, + 2.3178e-02, 4.0618e-02, -2.3842e-02, 2.9851e-02, -4.7033e-02, + -5.0536e-02, 4.2567e-02, -7.8787e-02, 3.5106e-02, 1.1993e-03, + -1.4537e-02, -5.1278e-02, -2.4576e-02, 4.4611e-02, -5.3965e-02, + 3.4502e-02, 7.4910e-02, 3.1964e-02, -3.4592e-02, 8.4762e-02, + 6.6939e-02, -7.5200e-02, 4.3292e-02, 1.6985e-01, -6.6695e-02, + 2.4553e-02, -7.5039e-03, 5.2509e-02, 8.2030e-02, 2.4376e-02, + 4.2058e-03, 2.3543e-02, 6.1570e-03, -1.6743e-02, -9.7259e-02, + 2.4244e-02, -2.2547e-02, 2.2843e-02, -4.1545e-02, -1.6013e-02, + 8.3000e-03, -8.5513e-02, 4.4356e-02, 5.1328e-03, -1.9995e-02, + 7.8072e-02, 2.3570e-02, 2.9447e-02, 7.6935e-02, 9.6576e-03, + -6.4136e-02, 3.2867e-02, -4.5160e-02, 3.3648e-02, -1.7392e-02, + 5.8089e-03, 2.4739e-02, 1.0378e-01, 9.3410e-03, -5.3627e-02, + -3.4343e-02, -2.6351e-02, -2.9343e-02, -2.2353e-02, -4.7763e-02, + -1.1366e-02, 5.2010e-04, -4.6743e-03, 1.6199e-02, -7.8063e-02, + 1.9897e-04, 2.7052e-02, 4.3959e-02, -2.7493e-02, 1.4008e-02, + -4.9083e-02, -6.6418e-02, -1.7566e-02, -6.1558e-02, -3.6589e-03, + -3.0492e-02, 2.8641e-02, -5.1314e-02, 3.1308e-02, -4.2798e-02, + 4.9607e-02, 1.6081e-02, 8.1581e-03, 6.2736e-02, 4.4093e-02, + 3.3409e-02, -6.3776e-03, -1.1995e-02, 5.1635e-02, 2.1120e-02, + -1.7519e-02, 7.2731e-02, -1.4197e-02, 3.4120e-02, 8.4901e-02, + -7.1668e-02, 4.6460e-02, 2.7229e-02, 1.6466e-02, 1.3243e-01, + 6.2105e-02, 2.1720e-02, 3.3344e-03, -1.4666e-02, 3.0143e-02, + 4.9758e-02, 3.8548e-02, 1.2586e-02, -1.0586e-02, 4.7062e-02, + 1.7897e-02, 1.9226e-02, 2.5062e-02, 4.5023e-02, -3.5047e-02, + -1.0945e-01, 1.2781e-02, 3.8052e-02, 8.2197e-02, 2.9414e-02, + 1.9449e-02, 3.5261e-02, -6.2046e-02, -2.6565e-02, -2.5523e-02, + -6.0374e-03, 7.5064e-02, 5.9772e-02, 7.9446e-03, 5.8369e-02, + -2.9503e-03, 1.9437e-02, -1.6286e-02, -1.0209e-02, 4.0471e-02, + -5.8021e-02, -1.2354e-01, 2.2840e-02, -2.4538e-02, -3.2923e-02, + 1.1068e-03, -4.9417e-02, -2.2733e-01, 2.4994e-02, -3.4969e-02, + 6.0766e-02, -5.1679e-03, -2.4937e-02, -6.0451e-03, -3.3841e-02, + -5.4637e-02, 4.4074e-02, 8.5792e-02, 4.7601e-02, -1.5414e-01, + 2.0098e-02, 1.3737e-04, 3.5567e-02, 2.6228e-02, 1.7048e-02, + 2.6366e-03, 1.0959e-02, 1.1537e-03, 3.4626e-02, -2.3026e-02, + 6.3907e-02, 4.3722e-02, 7.6725e-02, 9.1198e-02, 2.5575e-02, + 4.8980e-02, -6.5434e-02, -1.2302e-02, 1.1802e-02, -7.0467e-02, + -4.3942e-02, -6.8340e-02, 6.6216e-02, -3.8507e-02, 2.3285e-02, + 4.4583e-02, 4.8039e-02, 6.5735e-02, -1.2380e-02, 2.7694e-02, + 6.3951e-02, -2.0262e-02, 6.0663e-03, 4.8698e-02, 2.6361e-03, + 1.9225e-02, -8.2180e-02, -4.2692e-02, 1.9436e-02, 6.2999e-02, + -9.6810e-02, -2.8198e-02, -3.4410e-02, -3.2170e-03, 5.5013e-02, + 8.0327e-03, -6.7895e-02, -6.1232e-02, 4.2554e-02, -2.2628e-02, + -1.9596e-01, 2.4340e-02, -3.6419e-03, -1.3354e-02, -4.5575e-02, + 6.4358e-02, -6.6430e-02, 1.7512e-02, 1.6014e-03, 4.6003e-02, + -3.3494e-02, -3.2714e+00, -1.9003e-02, 1.7152e-02, -2.3923e-02, + -8.8490e-02, -1.3093e-02, -1.1635e-02, -4.9877e-03, -2.2200e-02, + -5.4845e-02, -2.5154e-02, -2.6730e-02, 4.0258e-03, -6.4070e-02, + 5.7675e-03, -5.2886e-02, -1.0505e-01, 2.4540e-02, -7.0383e-02, + 6.6775e-02, 7.7122e-02, 7.9000e-03, -1.3582e-02, -1.0646e-01, + -1.6877e-03, 7.0112e-02, -8.1721e-03, 3.1691e-02, 1.2633e-02, + -1.3226e-02, 1.1874e-01, 6.4461e-02, 2.3030e-02, -1.8555e-03, + -7.8534e-03, -9.0699e-03, 4.2436e-02, -4.7123e-02, 4.4143e-03, + 4.1099e-02, -1.3332e-02, 2.2844e-02, -8.3630e-02, 3.2066e-02, + 5.8189e-02, -1.0272e-01, 9.2019e-02, 1.7425e-03, 6.0141e-02, + 4.8672e-02, 3.7271e-02, 1.6245e-02, 5.3098e-02, 2.1186e-03, + 2.7736e-02, -4.9052e-02, -4.4594e-02, -2.6920e-02, -3.5221e-02, + 8.8737e-02, -8.3154e-02, -7.8560e-02, 4.0014e-02, -3.7013e-03, + 5.5303e-02, -3.8455e-02, 1.6727e-02, 4.0027e-02, -7.6386e-02, + -2.0709e-02, -1.9373e-02, -6.6888e-02, -2.6983e-02, 5.3923e-02, + 2.7105e-02, -4.4220e-02, 8.9578e-02, -4.0661e-02, -6.4093e-02, + -1.7248e-02, 5.8892e-02, 5.3766e-02, -9.9480e-02, -6.1721e-02, + -4.2307e-02, 1.8172e-02, 2.4058e-03])Parameter containing: +tensor([[ 0.0344, 0.0046, 0.0019, ..., -0.0018, 0.0054, -0.0178], + [-0.0100, 0.0007, 0.0120, ..., -0.0170, 0.0012, -0.0188], + [-0.0169, 0.0147, -0.0102, ..., 0.0031, -0.0298, 0.0021], + ..., + [ 0.0024, 0.0114, 0.0381, ..., 0.0197, -0.0068, 0.0028], + [-0.0170, -0.0138, 0.0048, ..., 0.0125, -0.0223, 0.0095], + [-0.0003, -0.0298, -0.0086, ..., -0.0083, 0.0122, -0.0196]])Parameter containing: +tensor([-0.2842, -0.3364, 0.0483, ..., -0.4465, -0.3184, -0.2751])Parameter containing: +tensor([[ 0.0024, 0.0274, 0.0246, ..., 0.0208, 0.0061, 0.0094], + [-0.0033, 0.0003, -0.0214, ..., 0.0064, 0.0232, 0.0025], + [-0.0203, 0.0050, -0.0124, ..., 0.0002, -0.0194, -0.0300], + ..., + [-0.0107, 0.0104, 0.0014, ..., 0.0129, -0.0087, 0.0057], + [-0.0105, -0.0092, 0.0100, ..., 0.0361, -0.0151, -0.0012], + [ 0.0143, 0.0048, 0.0093, ..., 0.0324, -0.0147, -0.0111]])Parameter containing: +tensor([ 2.6382e-02, 3.8788e-02, -3.9490e-02, 1.4725e-02, 9.3918e-03, + 1.0445e-02, -6.1737e-02, 3.3844e-02, 5.9265e-02, -1.2260e-02, + 4.4891e-02, -4.0771e-02, -6.4583e-03, -2.7496e-02, 3.0258e-02, + -5.0850e-03, 7.0000e-03, -2.2263e-02, -2.8976e-02, -1.4990e-01, + 1.4435e-02, -6.6566e-03, 8.3923e-02, 1.2138e-02, 3.6194e-02, + 9.5062e-03, 1.7868e-02, -9.9373e-04, 1.6174e-01, -1.4526e-02, + 3.1921e-02, 3.1769e-02, 1.8433e-02, 2.9099e-02, 2.5482e-02, + -5.1941e-02, 6.6162e-02, -1.1358e-03, -9.9945e-03, -2.1439e-02, + -3.4546e-02, -2.1957e-02, -5.6396e-02, 7.5836e-03, -2.0172e-02, + 4.1412e-02, 3.6560e-02, 2.5604e-02, 2.8915e-02, -2.9007e-02, + 5.4413e-02, -5.9128e-03, 2.4986e-03, 3.2684e-02, 4.9347e-02, + -2.2034e-02, -3.1586e-02, -8.4534e-03, 2.2095e-02, 1.0857e-02, + -2.1271e-02, -3.2949e-04, 4.3144e-03, -2.1332e-02, 1.5564e-02, + -2.1744e-02, 1.0406e-02, 6.1798e-02, 1.3367e-02, -2.4567e-02, + -2.5330e-03, 1.4069e-02, 4.8737e-02, -9.3994e-03, -6.3843e-02, + 1.5808e-02, 1.1917e-02, 2.9114e-02, -3.3813e-02, -6.1531e-03, + 3.9368e-03, -9.1614e-02, -2.0386e-02, -4.3488e-02, 8.3389e-03, + 3.0151e-02, 1.6449e-02, -4.2114e-03, -3.4271e-02, -2.9683e-04, + -1.4572e-02, -1.3908e-02, -6.4812e-03, -1.8646e-02, 7.5035e-03, + 3.2166e-02, 1.9150e-02, -1.2856e-02, 3.4241e-02, -7.5806e-02, + -2.2964e-02, 1.0046e-01, -3.9246e-02, -3.3386e-02, -2.4776e-03, + -5.4626e-03, 3.8849e-02, 6.4453e-02, 1.2329e-02, -9.5415e-04, + -4.9858e-03, -3.2730e-03, 5.3215e-03, 1.9150e-02, 6.1554e-02, + -3.2684e-02, -5.8594e-02, -3.1555e-02, 2.5665e-02, -4.1260e-02, + 4.7668e-02, 2.7023e-02, 1.1060e-01, 6.0425e-02, 1.0864e-02, + 1.5717e-02, 9.3155e-03, -3.3112e-02, 2.0508e-02, -7.3471e-03, + 3.4924e-03, 3.2562e-02, 5.3375e-02, 2.5955e-02, 3.5339e-02, + 4.8676e-02, 3.1464e-02, 2.3834e-02, -3.1647e-02, 7.5607e-03, + -2.0996e-02, -1.4824e-02, -1.5793e-02, -4.7180e-02, -1.9836e-03, + 9.1410e-04, 2.6779e-02, -1.4099e-02, -2.5208e-02, -1.8509e-02, + -4.6844e-03, -5.9700e-03, 1.4229e-02, -8.0872e-03, -2.7222e-02, + 2.1942e-02, 1.4473e-02, 2.4582e-02, 1.5747e-02, 1.8356e-02, + -3.4668e-02, 9.4376e-03, -1.3405e-02, -7.5912e-03, -2.1240e-02, + -1.2489e-02, -3.8452e-02, -4.0710e-02, -4.2297e-02, -2.2614e-02, + -7.8613e-02, 1.4809e-02, 3.0228e-02, -5.7037e-02, 5.4565e-02, + 3.1738e-02, 2.1687e-03, 5.0659e-02, 3.1261e-03, 5.6114e-03, + 4.1138e-02, 4.2633e-02, -3.4027e-02, -4.6692e-02, 1.8311e-02, + -1.1124e-02, 2.0264e-02, -1.9775e-02, 2.8580e-02, 5.5170e-04, + 6.7568e-04, 4.3907e-03, 3.4088e-02, -4.0771e-02, 3.2318e-02, + -3.0090e-02, -4.9683e-02, -9.3918e-03, -9.6130e-02, -3.5370e-02, + -2.3651e-02, 8.7585e-03, -1.2131e-02, 8.1711e-03, -5.9113e-02, + 2.9007e-02, -1.7853e-03, -1.6495e-02, -1.4244e-02, -4.5654e-02, + 5.2551e-02, -5.3528e-02, 2.9312e-02, -2.1629e-03, -8.3557e-02, + -4.4922e-02, 4.2023e-02, -1.6342e-02, 1.0269e-02, -9.5139e-03, + 1.3206e-02, 3.4515e-02, -3.6621e-02, -1.7471e-02, 3.8605e-02, + 1.9806e-02, -3.6652e-02, -4.8248e-02, -3.5614e-02, -1.7563e-02, + 4.3823e-02, 2.7390e-02, -1.6220e-02, -3.8483e-02, -7.6355e-02, + 2.3300e-02, 1.8158e-02, -1.2329e-02, 1.6434e-02, -1.2016e-02, + -6.6566e-03, -6.5735e-02, 6.1569e-03, 1.7715e-02, 1.4160e-02, + -2.5757e-02, 7.2365e-03, 1.5454e-01, -5.0735e-03, -5.3375e-02, + 4.7577e-02, 8.2855e-03, -4.5197e-02, 1.9684e-02, 1.8723e-02, + 3.1006e-02, 2.0599e-02, 4.3243e-02, -4.3823e-02, -4.9011e-02, + -1.9409e-02, 1.7181e-02, 3.3356e-02, -6.6711e-02, -5.2277e-02, + 4.2908e-02, -4.3373e-03, -1.8585e-02, -1.3092e-02, 9.3384e-03, + -3.2135e-02, -3.2898e-02, 6.1615e-02, -2.2934e-02, -3.6865e-02, + -2.4246e-02, 6.7200e-02, -3.8330e-02, -2.7725e-02, -2.3865e-02, + 2.1072e-02, 4.9530e-02, 6.6986e-03, -1.8402e-02, 1.9516e-02, + -1.5631e-03, 4.5868e-02, -2.7618e-02, -3.1555e-02, -4.6005e-03, + -1.1055e-02, 2.8976e-02, -1.4557e-02, -1.5350e-02, -1.6739e-02, + 2.7725e-02, -4.0466e-02, 2.0466e-03, -5.3833e-02, 2.7725e-02, + -4.4983e-02, -6.2675e-03, -8.6594e-03, 1.0460e-02, 6.8542e-02, + -5.1361e-02, 3.8727e-02, -1.6266e-02, -6.6757e-03, -6.0089e-02, + -3.4119e-02, 4.9377e-02, -4.2633e-02, 2.1225e-02, -4.9255e-02, + -1.8417e-02, -2.4338e-02, -2.1500e-02, 1.9897e-02, -2.2430e-02, + 1.3367e-01, 1.7975e-02, -3.0258e-02, 1.3741e-02, 1.2725e+00, + 3.6407e-02, -6.1523e-02, -2.0874e-02, 1.5045e-02, 9.4652e-04, + 4.9347e-02, 2.6367e-02, 3.5522e-02, -6.5613e-03, 4.9011e-02, + 3.9398e-02, 3.3051e-02, -1.3000e-02, 1.8005e-02, 7.5134e-02, + -5.1331e-02, 3.3447e-02, -3.4363e-02, -8.3084e-03, -5.2399e-02, + -1.8814e-02, 1.7899e-02, -7.4585e-02, 3.5217e-02, -4.7340e-03, + 3.0380e-02, -6.2042e-02, -2.7985e-02, -1.7410e-02, -2.3575e-02, + -3.9558e-03, 4.7760e-03, 2.8534e-02, -2.9129e-02, -3.7231e-02, + 1.6571e-02, 4.0680e-02, 3.4760e-02, -8.8501e-03, -8.6129e-05, + 3.5896e-03, 5.3406e-03, -4.6844e-03, -1.2199e-02, -6.5857e-02, + 4.1695e-03, -3.2959e-02, 1.4992e-02, -2.4643e-02, -6.1279e-02, + 6.1859e-02, 2.4109e-02, -4.8798e-02, 5.0812e-02, 3.2471e-02, + -2.4582e-02, 5.6427e-02, -2.0203e-02, 3.5309e-02, -3.2562e-02, + -1.9012e-02, -1.5717e-02, -3.3875e-02, 1.2756e-02, 6.0081e-03, + 1.4915e-02, -1.3599e-01, -3.0975e-02, -2.1469e-02, 9.8953e-03, + -2.6417e-03, -4.9162e-04, -3.3325e-02, 1.4587e-02, 1.0902e-02, + 6.6833e-02, -3.6591e-02, -3.1342e-02, 2.7847e-02, -2.5330e-02, + 9.3460e-03, 2.7695e-02, -2.6413e-02, -3.3264e-02, 1.6281e-02, + 7.5722e-03, 2.1164e-02, -4.0619e-02, 1.8555e-02, 2.2858e-02, + -9.9548e-02, -1.0910e-02, -1.8167e-03, 1.4282e-02, -3.1647e-02, + -3.6804e-02, -1.3138e-02, 2.3308e-03, 2.8671e-02, 3.0426e-02, + -5.7922e-02, 2.6340e-03, 1.5732e-02, 2.9373e-02, -2.4246e-02, + -1.2260e-02, 3.4607e-02, 1.7868e-02, 3.2825e-03, -3.2928e-02, + 2.4643e-02, 5.6610e-02, 6.8779e-03, -6.8909e-02, 8.5571e-02, + 8.1718e-05, 2.9480e-02, 4.8370e-02, -1.5967e-01, -1.3496e-02, + 5.0781e-02, 1.3247e-03, -1.0094e-02, 1.5640e-02, -6.2332e-03, + 1.5610e-02, 9.1858e-03, -1.7349e-02, 1.2405e-02, 3.3447e-02, + 1.5305e-02, 2.1362e-02, -3.0045e-02, 7.9575e-03, -5.5878e-02, + -2.7985e-02, -1.7838e-02, -2.4002e-02, -8.2779e-03, -5.5504e-03, + 4.4189e-02, 1.4275e-02, 4.2419e-02, -1.3809e-02, 2.4109e-02, + 4.2381e-03, -1.3628e-03, 2.9755e-02, -1.9045e-03, 5.3711e-02, + 8.7509e-03, -1.8759e-03, 6.0394e-02, -6.9702e-02, 1.0878e-04, + -3.5405e-04, -2.3518e-03, -2.9907e-03, 2.0813e-02, 6.1768e-02, + -5.4871e-02, 7.3120e-02, 6.2317e-02, -1.2886e-02, -7.6675e-03, + 6.4636e-02, 4.2725e-02, 2.7344e-02, 2.7428e-03, 2.3163e-02, + -5.4932e-02, 2.2068e-03, 8.9798e-03, -1.8127e-02, -1.5007e-02, + -2.4323e-02, -3.9337e-02, -2.9999e-02, -2.2545e-03, -3.8853e-03, + 5.4962e-02, -2.0828e-02, -2.2125e-02, 1.5736e-03, 3.0167e-02, + -2.5673e-03, -7.9163e-02, 4.0924e-02, 3.6883e-04, 3.6804e-02, + -1.3237e-02, 2.9697e-03, 1.9150e-02, -4.1016e-02, -4.3701e-02, + 2.6581e-02, -2.0020e-02, -1.6772e-01, -4.7394e-02, 2.4277e-02, + 6.9008e-03, -1.0155e-02, 1.4351e-02, 3.6438e-02, -4.9377e-02, + -4.8409e-03, 3.4149e-02, 2.7313e-02, -2.9572e-02, -3.3112e-02, + -5.9845e-02, 2.7313e-02, -3.5461e-02, 2.0905e-02, 5.1270e-02, + -3.5248e-02, -5.9723e-02, 3.2158e-03, 3.7109e-02, -1.0297e-01, + 6.2439e-02, -1.8906e-02, 1.1749e-02, 5.1544e-02, -2.0386e-02, + 5.8563e-02, 4.2877e-02, -3.7689e-02, 4.1199e-03, -5.0964e-02, + 2.1423e-02, -5.1697e-02, -9.2468e-02, 5.4016e-02, 3.0823e-02, + -2.9541e-02, 2.8229e-02, 4.6661e-02, -2.6871e-02, 1.9348e-02, + -1.6891e-02, 2.2781e-02, -5.3444e-03, -2.2461e-02, 2.5009e-02, + -3.4332e-02, -9.6863e-02, -3.2379e-02, 1.2527e-02, -4.8737e-02, + -4.1260e-02, -1.0605e-02, -2.6459e-02, 1.7792e-02, 1.6296e-02, + -3.8452e-03, 4.3274e-02, -5.2582e-02, -2.5513e-02, 2.2491e-02, + -1.9272e-02, -4.9652e-02, -3.3142e-02, -2.8183e-02, -7.0992e-03, + -4.8752e-03, -1.1902e-02, -3.9062e-03, 2.5681e-02, -1.3130e-02, + -2.2751e-02, 7.4402e-02, 7.3662e-03, -4.3488e-03, 2.6505e-02, + -5.1178e-02, -2.8885e-02, -2.3651e-02, 3.8483e-02, 6.0501e-03, + -2.3956e-02, -6.5727e-03, -1.4000e-03, 2.5726e-02, -2.3315e-02, + -1.4435e-02, 8.8348e-03, -2.9083e-02, -5.4871e-02, 1.7960e-02, + 5.2765e-02, 1.8509e-02, 1.7014e-02, 6.7566e-02, -1.2280e-01, + 1.5450e-02, 1.2482e-02, 4.9622e-02, -6.4392e-02, -4.1229e-02, + -2.8473e-02, 1.1497e-02, -1.0080e-03, 4.4098e-02, -7.1716e-03, + -9.2468e-03, -1.5945e-02, 5.2429e-02, -2.1103e-02, -2.1332e-02, + 1.1314e-02, -2.3132e-02, 1.9638e-02, 9.2468e-03, 2.2602e-03, + -2.0538e-02, -6.8169e-03, -5.4047e-02, -5.6366e-02, 6.0959e-03, + 3.7567e-02, 6.7444e-03, 7.8735e-03, 7.1899e-02, 1.2878e-02, + -5.0140e-02, -1.4999e-02, 8.0566e-03, -2.7252e-02, -9.1400e-03, + 2.3514e-02, -2.5391e-02, 2.5883e-03, -9.8114e-03, 1.1627e-02, + 1.1917e-02, -2.7130e-02, -5.7892e-02, 1.5907e-03, 8.6487e-02, + 9.9564e-03, -2.2736e-02, -1.4168e-02, 1.7029e-02, 4.1260e-02, + -6.8054e-03, 1.3733e-03, 1.6312e-02, 1.1803e-02, -2.8137e-02, + 6.4453e-02, 3.0670e-02, -1.2177e-02, -3.4973e-02, -4.0779e-03, + -2.6901e-02, -7.7477e-03, 5.8411e-02, 1.8631e-02, 6.4758e-02, + 2.3270e-02, 8.6060e-02, -3.5370e-02, -2.8580e-02, -2.3117e-02, + -1.4046e-02, -1.1757e-02, 7.2876e-02, -2.0523e-02, -4.0344e-02, + -6.5186e-02, 4.2206e-02, 8.0795e-03, -1.8448e-02, -2.6596e-02, + -2.4967e-03, 2.4490e-03, 2.6062e-02, -1.8570e-02, -2.8748e-02, + 1.5305e-02, 1.9318e-02, 2.8839e-02, 3.0956e-03, -1.1917e-02, + -6.3972e-03, 5.5084e-02, 8.3542e-03, -4.0070e-02, 2.5085e-02, + -3.1372e-02, 1.6403e-02, -2.3087e-02, -3.0289e-03, -4.4281e-02, + 5.6946e-02, 3.4912e-02, -2.2324e-02, -3.3627e-03, 9.8705e-04, + 2.9251e-02, 1.0345e-02, -1.3954e-02, -2.2217e-02, -1.3290e-02, + 3.2288e-02, -2.3560e-02, -2.2995e-02, 5.7709e-02, -1.8967e-02, + -1.1040e-02, -3.9490e-02, 1.2451e-02, -1.3283e-02, 2.0813e-02, + -1.8377e-03, -1.5808e-02, 2.8564e-02, -3.5839e-03, -2.7176e-02, + -2.0081e-02, -2.1815e-04, -2.1378e-02, -9.3317e-04, 3.3234e-02, + -3.9917e-02, -3.8116e-02, -3.2074e-02, -8.1406e-03, 1.5671e-02, + -2.3865e-02, -1.2421e-01, 3.7231e-02, -1.7975e-02, 4.6875e-02, + 2.1973e-02, -2.2369e-02, 3.1281e-02, 3.8300e-03, 1.0979e-02, + -2.8793e-02, -1.0155e-02, -1.5823e-02])Parameter containing: +tensor([1.8660, 1.7784, 1.8131, 1.9032, 1.8201, 1.8358, 1.9317, 1.9030, 2.0080, + 1.7371, 1.9171, 1.7857, 1.9481, 1.8647, 1.9881, 1.9470, 1.9653, 1.8904, + 1.9133, 2.0480, 1.8218, 1.8158, 1.8040, 1.7625, 1.8326, 1.8251, 1.8112, + 1.8591, 2.3300, 1.8212, 1.8430, 1.8733, 1.8304, 1.9012, 2.0362, 1.8383, + 1.9599, 1.7815, 1.6626, 1.9035, 1.8171, 1.7817, 1.9178, 1.8311, 2.0381, + 1.8704, 1.8629, 1.8840, 1.8549, 1.8668, 1.7745, 1.8497, 1.9727, 1.9021, + 1.9105, 1.8380, 1.9000, 1.8313, 1.9636, 1.8302, 1.8879, 1.9270, 1.8041, + 1.8294, 1.8350, 1.6809, 1.7347, 1.9509, 1.8550, 2.2307, 1.8262, 1.8292, + 2.1996, 1.8380, 1.7901, 1.9650, 1.9438, 1.7403, 2.0721, 1.7517, 1.9311, + 1.8478, 1.9061, 1.8099, 1.8356, 2.1220, 1.8942, 1.8027, 1.8589, 1.8872, + 1.9714, 1.8759, 1.8909, 1.8603, 1.8456, 1.7588, 1.8874, 2.0046, 1.7945, + 1.8890, 1.9014, 3.0165, 1.8195, 1.8984, 1.8578, 1.9651, 1.9261, 1.9477, + 1.7389, 1.9035, 1.9816, 1.8913, 1.8726, 1.7762, 1.8504, 2.0517, 2.0671, + 1.8934, 1.7774, 1.9831, 1.8847, 1.8045, 1.9016, 1.8355, 1.9538, 1.8974, + 1.8131, 1.8118, 1.8685, 1.9706, 1.9202, 1.7229, 1.9807, 1.8030, 1.8698, + 1.8555, 2.0546, 1.9435, 1.8757, 1.8427, 1.7255, 1.7774, 1.9838, 1.8948, + 1.8492, 1.8438, 1.7917, 1.9375, 1.9346, 1.9279, 1.8923, 2.0421, 1.8791, + 1.8829, 1.8148, 1.5478, 1.8369, 1.9054, 1.8246, 1.8708, 1.9425, 1.9012, + 1.8279, 1.5597, 1.7954, 1.8119, 1.8879, 1.8401, 1.9324, 1.8507, 1.7592, + 1.9193, 2.0096, 2.0844, 1.8840, 1.9689, 1.9379, 1.9933, 1.8427, 1.7667, + 2.0733, 1.8381, 1.7551, 1.7741, 1.8745, 2.0625, 1.8530, 0.6309, 1.8452, + 1.8917, 1.9017, 1.8086, 1.9182, 1.9565, 1.9748, 1.8523, 1.7366, 1.9107, + 0.8677, 1.8278, 1.8390, 1.9599, 1.8796, 1.8701, 1.7979, 1.9388, 1.7285, + 1.9202, 1.9327, 1.8762, 1.8762, 1.9806, 2.0151, 1.9425, 1.9414, 1.8690, + 1.9037, 1.9074, 1.8259, 1.8553, 2.0224, 1.9358, 1.9235, 1.9911, 1.9111, + 1.8020, 1.8777, 1.9436, 1.9082, 1.9094, 1.9674, 1.8680, 1.7909, 1.9865, + 1.9500, 1.7919, 1.9436, 1.7509, 1.8738, 1.8205, 1.9345, 2.0570, 1.9744, + 1.8709, 1.9073, 1.9121, 1.9625, 2.4609, 1.8394, 2.0095, 1.8269, 1.9627, + 1.7241, 1.9025, 1.7354, 1.8871, 1.9433, 1.7587, 1.9468, 1.9635, 1.8410, + 1.8963, 1.7291, 1.8991, 1.7483, 1.8443, 1.8082, 1.9849, 2.0076, 1.7678, + 1.9225, 1.8132, 2.2230, 1.6371, 1.8443, 1.9185, 1.8186, 1.8701, 1.9011, + 1.8227, 1.9548, 2.0720, 1.8787, 1.8453, 1.9034, 1.8937, 2.1512, 1.7902, + 1.9223, 1.7259, 1.9361, 1.8650, 1.8079, 1.8986, 1.9573, 1.7364, 1.9630, + 1.8175, 1.8065, 1.8854, 1.9771, 1.7975, 1.8899, 1.8159, 2.0447, 1.7576, + 1.8268, 1.9224, 1.9002, 1.8067, 2.0081, 1.9586, 1.6762, 1.9101, 1.8413, + 1.7823, 1.9550, 1.9823, 1.7837, 1.8970, 2.0231, 1.8636, 1.7673, 1.9067, + 2.3982, 2.0804, 1.8810, 1.8703, 1.8265, 1.7580, 1.8966, 1.8586, 1.8388, + 1.9037, 1.8352, 1.8316, 1.8336, 1.8934, 2.0168, 1.9769, 2.0078, 1.8481, + 1.9293, 1.8833, 1.8839, 1.8644, 1.8012, 1.8637, 1.9363, 1.8971, 1.8371, + 1.9506, 1.8396, 2.2401, 1.8631, 1.9060, 1.7227, 1.9123, 1.8348, 1.8611, + 1.8895, 1.8920, 1.6990, 1.8529, 2.0385, 1.8732, 1.8881, 1.7646, 1.8885, + 2.1197, 1.8467, 1.7632, 1.8907, 2.0283, 1.9200, 1.7656, 1.8061, 1.9139, + 1.9146, 1.7305, 1.8498, 1.6375, 1.7809, 1.9367, 2.0803, 1.9106, 1.9903, + 1.9454, 1.8341, 1.9366, 2.0857, 2.1631, 1.8223, 1.8999, 1.8695, 1.8775, + 1.7864, 1.9755, 1.9165, 1.8118, 1.8268, 1.9509, 1.8592, 1.8168, 1.8380, + 1.8159, 1.8603, 1.9215, 1.8509, 1.7259, 1.8962, 1.8724, 1.9343, 1.8903, + 1.8474, 1.6385, 1.8949, 1.8523, 1.9570, 1.8141, 1.8389, 1.8728, 1.7764, + 1.9630, 1.8911, 1.8530, 1.8837, 1.8320, 2.0132, 1.8928, 1.7913, 1.8610, + 1.8055, 1.8332, 1.8728, 1.8835, 1.9436, 1.8447, 1.8912, 1.9659, 1.8275, + 2.0092, 1.9458, 1.8524, 1.8043, 1.8004, 1.7571, 1.7796, 1.8534, 1.9481, + 1.8991, 1.8947, 1.8967, 1.9415, 1.8822, 1.9627, 1.8128, 1.9307, 1.9071, + 1.8921, 1.8939, 1.7313, 1.8759, 1.8561, 1.7990, 1.7605, 1.7864, 1.8529, + 1.8590, 1.8761, 1.9702, 1.8165, 1.9052, 1.8691, 1.8814, 1.8573, 1.9790, + 1.8433, 1.8886, 1.9109, 1.8462, 2.0010, 1.8940, 1.6905, 1.8911, 1.9256, + 1.9097, 1.9327, 1.8287, 1.8924, 1.9177, 1.7493, 1.7674, 1.8636, 1.7492, + 1.9836, 1.8146, 1.8620, 1.7825, 2.0120, 1.8672, 1.8315, 1.8449, 1.7789, + 1.8832, 2.1938, 1.9455, 1.8791, 1.8934, 1.7710, 1.8357, 1.7524, 1.8833, + 1.9098, 1.9889, 2.0089, 1.7989, 1.9659, 1.9234, 1.8230, 1.8224, 1.8910, + 2.3350, 2.0102, 1.9047, 1.9591, 2.0173, 1.8123, 1.8544, 1.8656, 1.8224, + 1.9651, 1.9880, 1.8107, 1.9073, 1.9978, 1.8600, 2.0117, 1.8631, 2.0721, + 1.9352, 1.9215, 1.9140, 1.8218, 1.7605, 1.9906, 1.9540, 1.9666, 1.9012, + 1.9246, 2.2353, 1.8372, 1.7955, 1.8482, 2.2460, 1.8172, 1.7272, 1.9246, + 1.8234, 1.9038, 1.7496, 1.7468, 1.8788, 1.7610, 1.8758, 1.7716, 1.8399, + 1.8478, 1.8214, 1.9384, 1.7953, 1.8796, 1.9437, 1.8393, 1.8481, 1.9724, + 1.9443, 1.8271, 1.9000, 1.9068, 1.9761, 2.0626, 1.8821, 1.8797, 1.8821, + 1.9393, 2.0845, 1.7724, 2.2779, 1.7483, 1.8708, 2.3283, 1.8016, 1.7931, + 1.7968, 1.9056, 1.9454, 1.8709, 1.8524, 1.8509, 1.8794, 1.8774, 1.9060, + 1.9310, 1.8138, 2.0550, 1.7575, 1.9538, 2.1155, 1.8975, 1.9170, 1.8156, + 1.8810, 1.7680, 1.8986, 1.9357, 1.9116, 1.7712, 1.8008, 1.9611, 1.7758, + 1.8260, 1.8118, 1.8727, 1.8630, 1.8199, 1.9040, 2.1573, 1.9015, 1.8741, + 1.9744, 1.8355, 1.9861, 1.9444, 1.9224, 1.9344, 1.9126, 1.8261, 1.9051, + 1.7932, 1.9132, 1.8560, 1.9965, 1.8377, 1.8254, 1.8571, 1.8778, 1.8591, + 1.8667, 1.8168, 2.0227, 1.8521, 2.0372, 1.9477, 2.0108, 1.8808, 1.8310, + 1.9081, 1.7974, 1.9341, 1.7954, 1.8657, 2.0381, 1.8839, 1.8975, 1.9008, + 1.8652, 1.7003, 1.8053, 2.0298, 1.9783, 1.8093, 1.8550, 1.8205, 1.9117, + 2.0263, 1.9597, 1.8719, 1.9367, 1.8217, 1.9005, 3.9133, 1.9469, 1.7762, + 2.0920, 1.8573, 1.9510, 1.9302, 1.8653, 1.9560, 1.9251, 1.8615, 1.9694, + 1.7484, 1.7944, 1.8781, 1.8119, 1.8498, 1.7536, 1.8787, 1.9861, 1.8066, + 1.9102, 1.9123, 1.8142, 1.7973, 1.9175, 1.9055, 1.8494, 1.8617, 1.8569, + 1.9394, 1.9115, 1.9110, 1.7258, 1.8709, 1.8557, 1.7597, 1.9841, 1.9129, + 2.0609, 1.8807, 1.7975, 1.8461, 2.0206, 1.8714, 1.7895, 1.9192, 1.7968, + 1.8826, 1.7698, 1.9447, 1.8479, 1.8068, 1.9118, 1.8883, 1.9746, 1.8796, + 1.8511, 1.9405, 2.0582, 1.7386, 1.9657, 1.8745, 1.9578, 1.9301, 1.9415, + 1.9653, 1.9379, 1.8646, 1.9669, 1.9559, 1.8806, 2.0812, 1.8663, 1.9106, + 2.0192, 1.9615, 1.8668, 1.8325, 1.8629, 1.8094, 1.9444, 1.9607, 1.8693, + 1.7771, 1.7942, 1.8408])Parameter containing: +tensor([ 1.0001e-01, 3.2639e-01, -2.1039e-01, -5.0999e-01, -4.2389e-02, + -2.6753e-01, 9.4194e-01, -2.7457e-01, -5.2690e-01, -5.5466e-02, + -2.7905e-01, 1.6774e-01, -3.8277e-01, -2.5514e-01, -6.1334e-01, + -5.7646e-01, -2.2728e-01, -6.7665e-02, -5.3467e-01, -2.3960e+00, + -5.1092e-01, 3.8211e-01, -2.0304e-01, -4.5887e-01, -6.1576e-01, + 7.0121e-02, -4.9523e-01, -3.8478e-01, -1.0415e+00, 5.6320e-01, + -2.0277e-01, -7.7615e-02, 3.5587e-01, -5.4729e-01, 2.5944e-01, + -2.1211e-01, -9.3042e-01, -3.8996e-01, 1.8186e-03, 2.1777e-01, + -2.0130e-01, -2.2191e-01, 1.0911e+00, -4.2226e-01, 1.1266e-01, + -4.3535e-01, -7.5518e-01, -1.5334e-01, -3.4593e-01, -1.9087e-02, + -4.6673e-01, -3.2443e-01, -4.1880e-01, -6.7491e-01, -2.2719e-01, + 1.1494e+00, 6.2432e-01, 4.0111e-02, -5.6136e-01, 2.8486e-01, + 4.2190e-01, 2.0334e-01, 1.1443e-01, 9.1238e-02, 3.3551e-01, + -4.9760e-02, 8.9777e-02, -6.3372e-01, 1.2032e-01, -1.1987e+00, + 2.0360e-01, 3.1116e-01, 6.7021e-01, 3.3498e-01, -3.2979e-01, + -5.0852e-01, 2.5790e-01, 4.6133e-03, -1.0485e+00, 7.3088e-02, + -5.5104e-01, -1.3722e-01, -1.4933e-01, 5.5035e-01, 2.8101e-01, + -6.5381e-01, 6.2764e-01, 7.8082e-02, -1.6087e-01, -1.8105e-01, + 7.7904e-01, 3.9496e-01, -1.0841e-01, 5.5589e-02, -5.2023e-01, + -2.7022e-02, -9.2938e-02, 8.5328e-01, -3.3161e-01, 7.2774e-01, + -1.6572e-02, -2.6193e+00, 3.1187e-01, 5.7197e-01, 4.1484e-01, + 8.9408e-01, -8.1853e-01, -3.2315e-01, -7.6765e-02, 2.0524e-01, + 7.7114e-01, 4.7549e-01, -6.9258e-01, -2.9009e-03, 1.7830e-01, + -7.2715e-01, 9.8283e-01, 4.1635e-01, 1.7894e-01, 3.5228e-02, + 3.7125e-01, -3.4730e-01, -6.6480e-01, -2.2598e-01, -2.7736e-01, + -4.1118e-01, -2.1612e-01, 3.9944e-01, -4.8386e-01, 5.5842e-01, + -1.0969e+00, -4.2645e-01, -9.7789e-01, -1.5478e-01, 3.4732e-01, + 1.5894e-01, -5.3756e-01, -2.1893e-01, -7.5276e-02, -5.2727e-01, + 1.4501e-03, -1.1514e-01, 5.3553e-01, 4.6137e-01, -1.4240e-01, + -4.5386e-01, -4.4217e-01, 2.2191e-01, 1.7962e-01, 7.0115e-01, + 6.0106e-01, 7.0242e-01, 4.8614e-03, 1.2692e-01, -4.2740e-01, + -7.7894e-02, -2.7168e-01, -4.2341e-01, -5.9347e-01, 6.1331e-02, + -1.6603e-01, -8.8191e-01, 1.5898e-01, -3.9610e-01, -3.6995e-01, + 1.3130e-01, -7.6066e-01, 4.9251e-01, -5.2796e-01, 2.7328e-01, + 4.2783e-01, -2.3592e-01, -8.5147e-01, 5.8858e-01, -2.6978e-01, + -6.8101e-01, -3.1506e-02, -6.8128e-01, -4.4942e-01, 3.5423e-01, + -8.0526e-01, -5.8796e-01, -4.6747e-02, -8.0376e-02, 1.9648e-01, + 9.7790e-01, 9.9415e-02, -1.5476e+00, 7.2616e-02, -8.8570e-01, + -1.3258e-01, -2.2147e-01, 2.3140e-01, 7.3908e-01, 2.1519e-01, + 5.9967e-01, 3.8346e-01, -2.5702e-02, 6.3070e-01, 2.4397e-01, + -3.7191e-01, -2.8234e-01, -3.5167e-03, -1.6815e-01, 3.0992e-01, + 1.2480e-01, -2.3985e-01, 7.4806e-01, 6.7826e-01, 2.2846e-01, + 2.1610e-01, -2.8469e-01, -6.4048e-01, -3.1248e-01, -6.3940e-01, + 8.4760e-02, -1.8882e-01, -2.1436e-01, -3.5960e-01, 2.6390e-02, + 1.3632e+00, -3.4175e-01, 8.5690e-01, 8.1155e-01, -5.4218e-01, + -2.9913e-01, 1.1988e-01, 3.5309e-01, 4.1926e-01, 5.5749e-01, + -6.3115e-01, -5.2563e-01, 1.7902e-01, 4.0641e-01, 6.2101e-01, + -2.2727e-02, -2.9388e-01, -1.3951e-01, -1.7952e-01, 4.4467e-01, + 6.6099e-01, 6.1575e-01, -6.6078e-01, 5.1975e-01, -3.6814e-01, + -5.6789e-01, 5.1790e-01, -1.5656e+00, 3.2540e-01, 8.2552e-01, + 3.7043e-02, 6.1390e-01, 1.6771e-01, -1.2280e+00, -2.3813e-01, + -1.2629e-01, -3.6776e-01, -1.0564e-01, 6.0078e-01, 7.3554e-01, + -2.2202e-01, -7.2251e-01, -3.9098e-01, 6.6057e-01, 1.7973e-01, + -6.7612e-01, -1.0394e-01, 9.9293e-01, -5.2571e-01, -2.6658e-01, + -4.0639e-01, 2.9404e-01, -8.6945e-01, -6.0640e-01, 2.2846e-01, + -3.4287e-01, -5.3146e-02, 5.7531e-01, -8.1353e-03, 3.5567e-01, + 5.9598e-01, -5.8310e-01, -8.2484e-03, 2.3647e-01, -2.1022e-01, + 5.7921e-01, 9.6365e-01, 1.0753e-02, 5.1279e-01, -2.9078e-01, + -4.8622e-01, -3.3165e-01, 4.2497e-01, -3.1292e-01, 3.9897e-01, + 1.2795e-01, 3.1480e-01, 1.4672e-01, 1.8182e-01, -1.5524e-01, + 6.5747e-01, 3.4742e-01, -3.8288e-01, -5.8985e-01, -5.5828e-01, + 2.3804e-01, -4.3328e-01, 2.1676e-01, -2.7809e-01, 5.9206e-01, + 5.3341e-01, -1.0016e+00, 4.7881e-01, -7.3726e-01, 2.7895e-02, + -4.1925e-01, -3.7070e-01, -4.0917e-01, 2.7832e-01, 5.5444e-02, + -1.5275e+00, -3.5360e-01, 1.6569e-01, -3.6858e-01, 2.1436e+00, + -1.5858e+00, 2.6521e-01, -4.9049e-01, -1.3124e-01, -2.1085e-01, + -2.3881e-01, -7.6808e-01, -1.3304e-01, -3.7610e-01, -3.0138e-01, + -4.9982e-01, -8.1163e-01, 1.7125e-02, 5.6042e-01, -3.6971e-01, + 3.8646e-01, 1.5706e-01, 2.9716e-01, 8.9005e-01, 3.0204e-02, + 2.1164e-01, 1.1384e-01, 1.8581e-01, -4.4801e-02, 3.3047e-01, + -8.8904e-02, 2.5982e-01, 3.3461e-01, 9.7410e-01, -1.0836e-01, + -2.2746e-02, -2.8105e-01, 5.6986e-01, 4.0678e-01, -3.3759e-02, + -6.6359e-01, 1.6146e-01, 8.9127e-02, -4.1533e-01, 4.9490e-01, + -8.3370e-02, -2.3326e-01, 3.0722e-01, 2.6059e-01, 7.1568e-01, + 1.9166e-01, 8.7676e-02, -5.6655e-01, -5.7934e-01, 5.5463e-01, + -7.6014e-01, -1.5674e-01, 5.9477e-01, -4.9040e-01, -3.2710e-02, + 8.4659e-02, 3.3333e-02, 2.0411e-01, -5.1136e-01, 5.6339e-01, + 3.8002e-01, -6.0355e-01, 5.3952e-01, -7.2400e-01, -8.2425e-01, + 1.3842e-01, 1.2815e+00, 4.2212e-01, 7.7251e-01, 5.8582e-01, + -2.2402e-01, 1.3369e-01, 1.1079e+00, 2.1883e-01, -2.8498e-01, + 3.8395e-01, -1.3351e-02, 2.1286e-01, -2.2877e-01, 4.7099e-01, + -1.4595e-01, 3.4667e-01, 8.2163e-01, -6.9097e-02, -3.3457e-01, + -6.7919e-02, -4.7688e-01, 6.6409e-01, 3.8889e-01, 8.7833e-01, + -5.7702e-01, 5.9352e-03, -3.6324e-01, 6.8234e-02, -4.7906e-02, + 2.0999e-01, 9.8040e-02, -6.9085e-01, -3.6914e-01, -6.4030e-01, + 2.5372e-01, 4.0410e-01, -1.3079e-01, -5.6092e-01, -1.8909e-01, + -3.2377e-01, -2.1252e-01, -2.2387e-01, 8.7467e-02, 5.0728e-01, + -1.1403e-01, -5.4595e-01, 3.9349e-01, 5.9183e-01, -9.6032e-01, + 8.2167e-02, -5.1443e-01, -9.4190e-01, -9.3388e-01, 9.4551e-02, + -2.4728e-01, 2.9515e-01, -4.8768e-01, -7.5996e-02, -5.9243e-01, + -4.0558e-01, 2.2324e-01, -9.0361e-01, -5.1321e-01, -1.3411e-01, + 5.6479e-01, -9.0425e-03, -5.5796e-01, 1.7802e-01, 2.9690e-01, + -3.8099e-01, 2.4017e-02, -3.0891e-01, -3.2830e-01, 4.2963e-02, + -6.2601e-01, -2.5049e-01, -7.4859e-03, 1.1617e-01, -1.3194e-01, + 4.7443e-01, -1.3437e-02, -7.4380e-01, -1.2107e-01, -4.5423e-01, + -4.6178e-01, 3.5232e-01, -5.6029e-01, 7.6554e-01, 7.5839e-01, + 4.9742e-01, -1.8662e-01, 1.2774e-01, -3.0275e-01, 4.3990e-01, + -6.9867e-01, -3.8308e-01, 1.2532e-01, 1.5111e-01, -4.7295e-01, + 2.0581e-01, 3.9599e-02, -4.5042e-01, -4.7772e-01, -1.2960e-01, + -2.4035e-02, -8.1313e-01, 7.2255e-01, 2.5972e-01, 8.9410e-01, + 7.9167e-02, -1.3100e-01, -1.8255e-01, -1.3246e-02, 1.7391e-01, + -1.0780e+00, 5.4875e-01, 2.4150e-02, -3.4517e-01, -5.1029e-01, + -7.0280e-01, 1.6151e-02, -6.9358e-01, -1.9686e-01, 1.8666e-01, + -3.3663e-02, 2.8270e-01, 4.3943e-01, 7.5781e-01, -8.4487e-02, + 1.6837e-01, 5.1663e-01, 1.0567e+00, 1.0433e+00, -4.2754e-01, + -4.3495e-01, 6.6799e-01, 2.8105e-01, 2.9122e-01, 1.0039e-01, + -8.5692e-02, -6.0030e-01, 2.7274e-01, -1.2726e-01, 3.2528e-01, + -4.7566e-01, -8.0034e-02, 9.9471e-01, 2.6183e-01, -5.3645e-01, + 2.7173e-01, 9.5450e-01, -5.9020e-01, 1.5703e-01, 4.1426e-01, + -8.9005e-01, 6.8087e-01, -6.4571e-01, -4.4648e-01, -2.6380e-01, + -1.5983e+00, -3.5993e-01, 2.2042e-01, -6.5626e-01, 1.1699e+00, + -2.0258e-01, 3.4444e-01, 5.6699e-01, -6.5344e-01, 5.0124e-01, + 3.2623e-01, 2.0384e-01, -3.2459e-01, 8.5185e-03, 1.3471e-01, + 5.1772e-01, 1.4214e-01, 4.1939e-02, 6.7896e-01, 8.3246e-02, + -6.1466e-02, 4.5889e-02, -7.6077e-01, 2.5588e-01, 5.4054e-01, + 9.3660e-01, -4.6980e-01, -1.7507e-01, -1.4851e-01, 1.8071e-01, + 4.1031e-02, 1.4749e-02, 6.5961e-01, -7.0903e-02, 5.9657e-02, + -1.9495e-01, 8.3158e-01, -1.5257e-01, 1.2255e+00, -1.2356e-01, + -2.0381e-01, 4.8242e-01, -1.6604e-01, -8.8528e-02, 1.0697e-01, + 1.9617e-01, -7.0234e-01, 4.4242e-01, -5.7977e-02, 3.9103e-01, + 1.8679e-01, -4.9163e-01, 4.2347e-03, -6.8967e-01, 1.6035e-01, + -8.5094e-01, -7.4418e-02, 6.7336e-02, -8.3224e-01, 2.6062e-02, + 4.9192e-01, 5.2151e-01, -1.6844e-01, 2.4388e-01, -3.7647e-02, + -4.8822e-01, 4.6222e-03, 1.2678e-01, -3.0084e-01, 6.3812e-01, + 7.1659e-01, 1.7261e-01, -8.0721e-01, 6.4520e-01, -1.2987e-01, + -1.7628e-01, -3.8206e-01, 9.2294e-01, 3.3497e-01, 3.1547e-01, + 3.1440e-01, 4.2543e-01, -8.7846e-01, 5.0277e-01, -1.7191e-01, + -4.8866e-01, 6.1943e-01, 4.8681e-01, 3.1232e-01, -2.7119e-01, + 5.9825e-01, -3.5353e-01, 3.5124e-01, 2.8750e-02, -6.5004e-01, + 2.6290e-01, -2.8577e-01, 2.7292e-01, -3.7203e-02, 2.6134e-01, + 8.5432e-01, -5.0034e-01, -5.1107e-01, -3.8058e-01, -2.5238e-01, + 4.3307e-01, 3.9068e-01, 7.7398e-01, -5.3277e-01, 4.4721e-01, + 1.1372e-01, 5.0360e-01, -1.7603e-01, 2.4101e-01, 2.2365e-01, + -1.8078e-01, 2.7429e-01, -2.1549e-01, 1.4268e-01, 3.3917e-01, + -6.9570e-01, -4.9179e-01, -1.5029e-01, 6.5821e-01, 8.5738e-01, + -8.3008e-01, -3.8017e-01, -3.3007e-01, 5.9142e-01, 5.3897e-01, + 1.3393e-01, 2.6619e+00, -8.4140e-01, 7.2939e-02, -1.0110e+00, + 4.0343e-01, -7.9010e-01, -5.8433e-01, 5.6104e-01, 1.6495e-01, + -6.4035e-01, -2.1644e-01, -1.1082e+00, -2.0013e-01, -2.8716e-01, + 9.0775e-01, 4.8080e-01, 1.1360e-01, -5.3904e-02, -3.1460e-01, + 4.1571e-01, 8.1695e-02, -1.6502e-01, -1.2634e-01, 6.4734e-01, + 4.4058e-01, 6.9211e-03, -3.5066e-01, -2.4901e-01, 9.0255e-01, + -1.1789e-01, -6.6065e-01, -1.2066e-01, -1.7871e-02, 6.4703e-01, + -2.7888e-01, 3.9891e-01, -1.8701e-01, -5.6675e-01, 5.2930e-01, + -1.1687e+00, -3.6498e-01, 1.4806e-01, -4.1063e-01, 1.0374e+00, + -4.3072e-01, -2.1022e-02, 4.1391e-01, -3.7349e-02, 2.4009e-01, + -7.7916e-02, -6.8427e-01, -8.1858e-03, 1.2579e-01, 2.1868e-01, + -6.8004e-01, 7.4477e-01, 3.0428e-01, 4.4441e-02, -5.2122e-01, + -8.5566e-01, -7.7845e-02, -4.6011e-01, -4.8875e-01, -6.0817e-01, + -4.2802e-01, -1.0616e-01, -6.4764e-01, 5.8157e-01, 9.6697e-02, + 4.5654e-01, 3.5184e-02, -6.4207e-01, -6.6000e-01, 1.6382e-02, + 3.3541e-01, 4.0628e-01, -6.7456e-01, 8.9355e-01, -5.9333e-02, + 7.9808e-02, 2.2350e-01, -3.6857e-01, 9.5804e-01, 1.1220e-01, + -4.0588e-02, 6.0938e-01, 3.0536e-03])Parameter containing: +tensor([[-0.0334, 0.0285, 0.0295, ..., 0.0152, -0.0162, -0.0086], + [-0.0067, 0.0443, -0.0088, ..., 0.0052, 0.0228, -0.0394], + [-0.0080, -0.0090, -0.0276, ..., -0.0169, -0.0212, 0.0219], + ..., + [-0.0186, 0.0049, -0.0079, ..., -0.0045, 0.0192, -0.0271], + [ 0.0003, 0.0048, -0.0178, ..., -0.0006, -0.0150, 0.0126], + [ 0.0191, -0.0278, 0.0059, ..., 0.0222, 0.0142, 0.0118]])Parameter containing: +tensor([-0.4333, 0.1654, -0.0519, ..., -0.0249, 0.0006, 0.0306])Parameter containing: +tensor([[ 0.0058, 0.0067, 0.0045, ..., -0.0050, 0.0077, -0.0065], + [ 0.0011, -0.0229, -0.0150, ..., 0.0013, 0.0057, 0.0196], + [-0.0234, 0.0136, -0.0235, ..., 0.0329, -0.0069, 0.0318], + ..., + [ 0.0086, 0.0077, -0.0036, ..., -0.0093, -0.0244, 0.0068], + [ 0.0283, 0.0173, 0.0116, ..., -0.0010, 0.0039, -0.0024], + [ 0.0225, 0.0120, 0.0018, ..., -0.0170, 0.0129, -0.0031]])Parameter containing: +tensor([ 4.6478e-02, 4.1260e-02, -5.7587e-02, 4.2152e-03, -1.5381e-02, + -7.1945e-03, 7.1960e-02, 3.3722e-03, 1.2032e-02, -1.7223e-03, + -4.3213e-02, -2.2369e-02, 2.6031e-02, 2.9510e-02, -1.9821e-02, + 1.1589e-02, 3.1555e-02, 5.9738e-03, 2.5436e-02, 1.7834e-01, + -1.2909e-02, -1.7929e-02, 1.7914e-02, -7.0419e-03, 7.2670e-03, + -1.1589e-02, -2.1347e-02, 3.9612e-02, 6.9214e-02, -3.1097e-02, + 3.6774e-02, 3.3020e-02, 3.6297e-03, -3.4698e-02, -1.0223e-02, + 3.4210e-02, 4.7668e-02, -3.0304e-02, -3.7567e-02, -4.2389e-02, + -3.6896e-02, 5.4779e-02, -5.8228e-02, 4.9225e-02, -6.8420e-02, + -1.0086e-02, -1.3298e-02, 1.2856e-02, 7.1899e-02, 4.1229e-02, + 7.8369e-02, -2.3773e-02, 7.1411e-02, 3.9337e-02, -5.9624e-03, + 5.9235e-02, 5.2368e-02, -7.6790e-03, 1.0061e-03, 1.7975e-02, + 1.2077e-02, 1.7365e-02, 2.3132e-02, -3.7811e-02, -1.7868e-02, + 2.1454e-02, -1.1116e-02, -9.3155e-03, 5.4260e-02, -1.4148e-01, + 5.0934e-02, 2.6646e-03, -4.3518e-02, 1.6846e-02, -2.5818e-02, + -3.9856e-02, 1.7212e-02, -3.2196e-03, -5.2166e-04, -5.6641e-02, + -1.0824e-03, -2.0920e-02, 4.7874e-03, -1.6281e-02, 4.0894e-02, + 4.4708e-02, -2.2568e-02, 3.1982e-02, -2.4689e-02, -2.5970e-02, + -4.4434e-02, 3.2562e-02, 5.6458e-02, -1.5976e-02, 1.6129e-02, + -1.6968e-02, 3.2330e-03, 4.4403e-02, -3.6438e-02, 3.1891e-02, + -1.8921e-03, -1.9669e-02, 3.1342e-02, -5.9235e-02, 1.8967e-02, + 2.7206e-02, 7.2021e-02, 2.2812e-02, 2.5635e-02, 2.3743e-02, + 2.0905e-02, 8.2458e-02, -1.6117e-03, 5.7190e-02, -3.2349e-02, + -3.7994e-02, 1.8845e-02, 1.8845e-03, 5.3284e-02, 2.1801e-03, + 2.8900e-02, -5.6824e-02, -4.2610e-03, -2.7145e-02, -3.4515e-02, + -2.0996e-02, 2.9221e-02, -8.8501e-02, 5.2567e-03, 5.8441e-02, + -2.4704e-02, -2.6581e-02, -1.5053e-02, -1.8555e-02, 3.0090e-02, + 1.1121e-01, -2.7542e-03, 1.1032e-02, -2.6871e-02, 6.1913e-03, + -1.2978e-02, -2.2446e-02, 2.0050e-02, 3.3478e-02, -5.1308e-03, + -4.4769e-02, 4.5868e-02, 1.2375e-02, 1.7563e-02, 5.1239e-02, + 2.5131e-02, -2.2842e-02, -1.8188e-02, -1.9974e-02, -4.7913e-02, + -2.3041e-02, 6.3705e-03, -3.4695e-03, -2.0279e-02, 3.1616e-02, + 2.6306e-02, 1.2650e-02, 3.2104e-02, -4.3823e-02, -1.5366e-02, + 1.1955e-02, -1.1835e-01, -1.2444e-02, 2.3632e-03, -8.0872e-03, + -3.7575e-03, -4.6234e-02, -4.8340e-02, 2.8748e-02, 3.4576e-02, + 3.8208e-02, 6.4636e-02, -5.4962e-02, 4.1351e-03, -4.1866e-04, + 1.9073e-02, -6.0310e-03, -2.4857e-02, -3.9795e-02, 5.2948e-02, + 3.6041e-02, -3.3447e-02, 7.5500e-02, -4.7493e-03, -2.7298e-02, + -2.9510e-02, -6.9695e-03, 8.8806e-03, 1.5434e-02, -4.9011e-02, + 8.0948e-03, -9.0485e-03, -2.1530e-02, -9.2407e-02, 8.7585e-03, + -9.7717e-02, 2.3956e-02, 6.7871e-02, 4.1351e-02, 1.3680e-02, + 1.4982e-03, 1.0400e-01, 2.1118e-02, -1.5480e-02, 3.6591e-02, + 1.4511e-02, -5.4169e-03, 4.0321e-03, 6.5552e-02, 6.5842e-03, + 1.4305e-02, 4.5586e-03, -1.3863e-02, 1.2848e-02, -5.1727e-02, + 8.5144e-02, 3.9581e-02, -9.8114e-03, 2.3163e-02, -1.4214e-02, + 8.0032e-03, -2.9968e-02, -1.5175e-02, -3.9558e-03, -1.3176e-02, + 5.9692e-02, 1.1299e-02, -3.5736e-02, 1.2413e-02, -6.2469e-02, + 2.5238e-02, 1.4099e-02, 2.2583e-02, 4.8035e-02, 2.4109e-02, + 1.8890e-02, 2.1240e-02, 1.5610e-02, 1.5671e-02, -2.6047e-02, + 1.4259e-02, -3.4103e-03, -1.4206e-02, -3.4271e-02, 2.3422e-02, + -5.7800e-02, 3.7781e-02, -2.2018e-02, -3.3508e-02, -1.1887e-02, + -1.6037e-02, 2.2068e-03, 1.6724e-02, -1.4359e-02, 6.9962e-03, + -5.2460e-02, 4.4952e-02, 6.4087e-02, -4.5502e-02, 1.1162e-02, + 5.1422e-02, -2.1790e-02, 3.6346e-02, 1.4618e-02, -2.3529e-02, + 2.8046e-02, -3.3173e-02, 8.0795e-03, -3.8574e-02, -9.4299e-02, + 1.6769e-02, 2.6413e-02, -4.2328e-02, 9.2010e-03, 3.3386e-02, + -2.6817e-03, -5.0995e-02, -2.8839e-02, 5.7106e-03, -1.2684e-03, + 8.8806e-03, -4.4434e-02, 8.3494e-04, 6.4545e-03, -7.7576e-02, + -4.2206e-02, 3.7933e-02, 2.3224e-02, -1.9028e-02, 2.0508e-02, + 1.7151e-02, -4.0627e-03, 2.8107e-02, 1.7044e-02, 7.8979e-02, + -2.2781e-02, 1.3550e-02, -1.7624e-02, -3.5736e-02, 2.3972e-02, + -1.8845e-02, 2.1347e-02, 4.6234e-03, -5.3711e-02, 2.4399e-02, + 5.7259e-03, 6.3095e-03, 3.8414e-03, 1.4709e-02, 7.2899e-03, + 8.6517e-03, 2.8656e-02, -5.5122e-04, 3.4008e-03, 2.0630e-02, + 2.3346e-02, 1.2459e-02, -6.3293e-02, -5.6732e-02, 1.0559e-01, + 9.0759e-02, 1.4839e-02, -9.9411e-03, 2.0111e-02, 5.5511e-02, + -2.4433e-03, 2.4063e-02, -6.2561e-02, -3.9558e-03, -1.1353e-02, + 3.7445e-02, -3.1490e-03, -9.2468e-03, -6.5269e-03, 2.7252e-02, + -3.0548e-02, -4.4556e-02, -3.9642e-02, -1.2064e-03, -3.4332e-02, + 4.6959e-03, -4.1321e-02, -4.5990e-02, 1.6342e-02, -4.4708e-02, + 3.6255e-02, 2.2034e-02, 6.4583e-03, 7.3364e-02, -3.8513e-02, + 1.5930e-02, 6.1096e-02, -2.4246e-02, -2.3575e-02, -2.4662e-03, + 1.0941e-02, -1.9211e-02, -2.1301e-02, -1.3809e-02, -2.5665e-02, + 3.1921e-02, -1.8806e-03, 2.7420e-02, -4.3671e-02, 2.3239e-02, + -9.8572e-03, 1.0803e-02, -3.4607e-02, -4.7791e-02, -6.9275e-03, + -1.7075e-02, 6.0455e-02, 3.6407e-02, 5.3436e-02, 5.7190e-02, + -3.1952e-02, 7.1564e-03, 1.6922e-02, 7.2937e-03, -2.6302e-03, + -5.5359e-02, -1.1360e-02, -2.0859e-02, 1.6296e-02, 1.3933e-03, + 3.3905e-02, -7.8857e-02, 1.4816e-02, 1.6174e-02, -9.2602e-04, + 1.1911e-03, 4.7226e-03, -1.4030e-02, -1.4473e-02, -5.9753e-02, + 6.2988e-02, 1.1841e-02, -4.2389e-02, 6.6284e-02, -1.9791e-02, + -2.5463e-04, -1.6571e-02, -2.0081e-02, -5.6793e-02, 2.2049e-02, + 3.3112e-02, 5.5351e-03, -1.9684e-02, 1.1755e-01, 9.5825e-02, + 9.2834e-02, 3.2043e-02, 2.7237e-02, -2.0081e-02, 1.8646e-02, + -7.5134e-02, -6.1676e-02, 3.1097e-02, -2.4750e-02, -5.6580e-02, + -1.4442e-02, -2.0447e-02, -2.1805e-02, 5.7526e-02, -1.5678e-03, + -4.7516e-02, -7.0923e-02, 1.8921e-03, 2.2583e-02, 4.3732e-02, + 7.9250e-04, 1.4275e-02, -1.4984e-02, 2.1400e-03, -4.6387e-02, + 6.9275e-03, -8.8120e-03, 8.7646e-02, -2.3669e-01, 1.8951e-02, + -7.2174e-03, 1.7166e-02, 3.0014e-02, -3.4973e-02, -4.0680e-02, + -5.1605e-02, -2.1088e-04, -1.8845e-02, 2.7466e-02, -1.8356e-02, + 5.8075e-02, -4.3304e-02, -7.6355e-02, -8.0200e-02, 2.8427e-02, + -3.8055e-02, 5.0201e-02, -1.3794e-02, 2.6474e-02, 1.4137e-02, + 5.3833e-02, 6.0516e-02, -1.1345e-02, 1.4664e-02, 1.6464e-02, + -6.2943e-04, -4.0474e-03, 7.6790e-03, 1.5160e-02, -1.4793e-02, + -5.9776e-03, 3.4607e-02, -6.1646e-03, 2.8290e-02, 9.4971e-02, + 2.9160e-02, 3.3264e-02, -1.1536e-02, -6.0310e-03, -3.4882e-02, + -3.3630e-02, 6.9763e-02, 1.7105e-02, -1.3481e-02, 2.9449e-02, + 1.6327e-02, 6.6162e-02, 2.2415e-02, -8.2703e-03, -6.4148e-02, + -5.0354e-02, 1.4214e-02, 4.9286e-02, -5.5450e-02, 1.5182e-02, + -2.8336e-02, -5.8960e-02, 1.5823e-02, 5.8838e-02, -2.4063e-02, + -6.9214e-02, 6.2347e-02, 2.8259e-02, -2.0462e-02, -6.0883e-02, + -3.7575e-03, -1.3939e-02, -3.4607e-02, 5.4893e-03, -2.7962e-03, + 9.5596e-03, 1.1261e-02, 1.1215e-02, 9.5215e-02, -4.4250e-02, + 2.5970e-02, -2.8351e-02, -4.4067e-02, 2.9282e-02, -2.4261e-02, + -2.1484e-02, 3.8361e-02, 3.0853e-02, 2.7756e-02, 3.4485e-02, + -4.2114e-02, 2.5909e-02, 3.6377e-02, -1.0086e-02, 2.8992e-02, + -2.3880e-02, 2.3697e-02, -2.1713e-02, 2.9678e-02, -3.4027e-02, + -3.5889e-02, -2.0065e-02, -9.5154e-02, 2.8702e-02, -6.5796e-02, + 8.2626e-03, -1.5060e-02, -9.7885e-03, -5.0781e-02, 4.5471e-02, + -7.2937e-02, 2.3361e-02, -3.3508e-02, 9.9121e-02, 5.3635e-03, + -2.2293e-02, -2.3758e-02, 3.4515e-02, 1.3947e-02, 2.8976e-02, + -3.5797e-02, -6.5155e-03, 4.8462e-02, -2.6798e-03, 1.6922e-02, + 1.1887e-02, 8.4734e-04, 4.1199e-02, 1.4353e-03, -2.1286e-03, + -8.1055e-02, -1.9135e-02, 3.3295e-02, -2.5391e-02, 2.9572e-02, + 1.7868e-02, -1.6785e-02, 1.2001e-02, -2.9846e-02, 1.4832e-02, + -1.7410e-02, 2.0630e-02, -3.1250e-02, -4.4678e-02, 2.4891e-03, + 3.7567e-02, -5.1300e-02, -1.3695e-02, 2.1839e-03, -2.0966e-02, + -1.4938e-02, -2.3300e-02, 4.5013e-02, 3.4424e-02, -2.3087e-02, + -2.7634e-02, 2.2018e-02, -7.2289e-03, 6.2805e-02, 3.6621e-02, + -4.5441e-02, -1.8112e-02, -4.7821e-02, -4.7760e-02, -1.0996e-03, + -6.2195e-02, -8.7585e-03, -7.2998e-02, -1.1948e-02, 1.0933e-02, + -2.5726e-02, -3.3302e-03, 6.8893e-03, -1.3519e-02, 1.8280e-02, + -2.1790e-02, 1.0498e-02, -1.2772e-02, -3.2562e-02, -4.5715e-02, + 5.7220e-03, -1.0307e-02, -2.2064e-02, 1.6388e-02, -1.0399e-02, + 3.2444e-03, 2.5192e-02, -5.9776e-03, 1.2680e-02, -4.5410e-02, + 9.0313e-04, -5.9143e-02, -2.4826e-02, 2.3422e-02, -3.2166e-02, + -5.6496e-03, -2.2919e-02, 1.9104e-02, -3.8338e-03, -8.0139e-02, + 6.6147e-03, -2.8091e-02, -3.6469e-02, -3.9337e-02, -3.2867e-02, + 5.4443e-02, 2.1149e-02, 4.6082e-02, 5.1208e-02, 2.8030e-02, + -3.9062e-03, 1.5053e-02, 4.1351e-02, -6.8331e-04, -2.8183e-02, + -8.4763e-03, 1.0704e-02, 6.5491e-02, 1.5533e-02, 1.3351e-02, + 5.8289e-02, -3.4088e-02, 3.9291e-03, -2.9129e-02, 5.6244e-02, + -1.0910e-02, 4.7150e-02, 4.7241e-02, 2.1912e-02, -4.8920e-02, + -4.7058e-02, 3.5858e-02, 2.0325e-02, 1.2009e-02, -6.0028e-02, + 6.2988e-02, 3.8177e-02, 3.8666e-02, -3.3478e-02, -1.9241e-02, + -2.5696e-02, 3.0444e-01, -2.0157e-02, -1.5320e-02, -4.1443e-02, + -6.3324e-03, 7.6538e-02, -4.5624e-02, -4.3793e-02, -6.3293e-02, + -2.5650e-02, -3.1708e-02, -6.9733e-03, -4.4128e-02, -1.2222e-02, + -2.8656e-02, 1.4755e-02, 4.5410e-02, -1.4847e-02, -6.1127e-02, + -1.8982e-02, 7.3357e-03, 6.2828e-03, 3.9154e-02, 3.3508e-02, + 3.6377e-02, 6.2675e-03, 1.7609e-02, 2.0752e-03, 8.6060e-03, + -1.2619e-02, 3.3264e-02, -3.6682e-02, -1.8784e-02, -4.0344e-02, + -5.5054e-02, -3.8025e-02, 4.7729e-02, 4.1840e-02, -7.7881e-02, + -2.6321e-02, 6.2073e-02, -6.0150e-02, -4.1046e-02, 6.2904e-03, + -2.1759e-02, 3.9856e-02, -3.1281e-02, 2.5539e-03, 3.5739e-04, + -4.9286e-02, -1.1353e-02, 2.0035e-02, 3.0548e-02, -6.3515e-03, + 3.6804e-02, 6.1401e-02, -6.7322e-02, 2.9926e-03, -6.9458e-02, + -6.5735e-02, -2.2568e-02, 1.6052e-02, -3.5065e-02, -4.1840e-02, + -3.0746e-03, 1.9791e-02, 3.3997e-02, -2.9160e-02, -4.9469e-02, + -5.8655e-02, -1.8463e-02, -8.1482e-03, -3.5339e-02, 2.3788e-02, + -2.3300e-02, -1.3672e-01, -6.4636e-02, -1.6983e-02, 1.0239e-02, + 2.0599e-02, -1.8295e-02, -2.2568e-02, 8.8684e-02, 2.7573e-02, + 2.5513e-02, 4.2145e-02, 7.9422e-03])Parameter containing: +tensor([1.2778, 1.4128, 1.3243, 1.2121, 1.4286, 1.2285, 1.2714, 1.2086, 1.2280, + 1.2027, 1.3056, 1.3433, 1.3571, 1.2871, 1.2546, 1.3233, 1.2960, 1.4360, + 1.3255, 0.3545, 1.3113, 1.2376, 1.3394, 1.3552, 1.3275, 1.3286, 1.2904, + 1.1890, 1.4459, 1.3469, 1.3475, 1.2710, 1.3147, 1.2767, 1.3758, 1.4789, + 1.2862, 1.3537, 1.4560, 1.3338, 1.3595, 1.2610, 1.3219, 1.3039, 1.3483, + 1.3257, 1.2723, 1.3911, 1.2703, 1.2903, 1.3294, 1.3034, 1.3718, 1.3183, + 1.4007, 1.4945, 1.1526, 1.5551, 1.4231, 1.4085, 1.3184, 1.3429, 1.2638, + 1.3687, 1.2802, 1.3492, 1.2815, 1.3761, 1.4529, 1.5865, 1.2352, 1.3187, + 2.4101, 1.2996, 1.5029, 1.2912, 1.3043, 1.2830, 1.3575, 1.4108, 1.2408, + 1.4397, 1.2768, 1.3065, 1.3802, 1.3398, 1.3298, 1.2797, 1.2675, 1.3241, + 1.2821, 1.3869, 1.3612, 1.3836, 1.4122, 1.2181, 1.3843, 1.2839, 1.3655, + 1.4379, 1.2846, 1.1037, 1.3041, 1.2195, 1.2868, 1.2898, 1.2872, 1.3970, + 1.2838, 1.2924, 1.2720, 1.3200, 1.2509, 1.2536, 1.3131, 1.3219, 1.3353, + 1.3341, 1.2996, 1.3819, 1.2691, 1.3806, 1.3662, 1.4026, 1.3948, 1.3118, + 1.3203, 1.2443, 1.4601, 1.3088, 1.3087, 1.2799, 1.1752, 1.3434, 1.2705, + 1.2702, 1.3879, 1.3780, 1.2694, 1.1935, 1.3127, 1.2161, 1.3522, 1.2309, + 1.2594, 1.2788, 1.3216, 1.3680, 1.3151, 1.3007, 1.2686, 1.3322, 1.2459, + 1.3076, 1.4006, 1.1240, 1.4359, 1.3258, 1.3004, 1.2717, 1.3467, 1.2417, + 1.2638, 1.4253, 1.3500, 1.2859, 1.3707, 1.3677, 1.3646, 1.3057, 1.2892, + 1.2587, 1.1566, 1.2520, 1.3977, 1.3951, 1.2995, 1.5024, 1.3988, 1.2421, + 1.2630, 1.5120, 1.4118, 1.3496, 1.4099, 1.3526, 1.2130, 1.8183, 1.4380, + 1.5001, 1.3415, 1.2570, 1.3791, 1.4268, 1.4050, 1.2840, 1.2843, 1.2593, + 4.8400, 1.2843, 1.1926, 1.3561, 1.3769, 1.3333, 1.2853, 1.2566, 1.4406, + 1.2301, 1.3254, 1.4186, 1.2946, 1.2669, 1.2523, 1.2704, 1.2703, 1.3087, + 1.4050, 1.3373, 1.3108, 1.3078, 1.2544, 1.3056, 1.2784, 1.2104, 1.4433, + 1.2860, 1.4168, 1.3535, 1.2526, 1.3556, 1.2621, 1.3858, 1.4261, 1.3332, + 1.2681, 1.2991, 1.3661, 1.2210, 1.2892, 1.2889, 1.3536, 1.2801, 1.3803, + 1.3746, 1.4893, 1.2750, 1.3595, 1.2911, 1.2272, 1.3971, 1.3142, 1.2904, + 1.2525, 1.2875, 1.3555, 1.3670, 1.3386, 1.2144, 1.2527, 1.1820, 1.3643, + 1.2478, 1.3274, 1.2426, 1.2595, 1.3444, 1.2624, 1.3189, 1.3534, 1.3567, + 1.3505, 1.2788, 1.2749, 1.2946, 1.2299, 1.2255, 1.3519, 1.2815, 1.3201, + 1.2997, 1.4289, 1.4691, 1.3291, 1.3569, 1.3914, 1.3111, 1.4085, 1.2841, + 1.2588, 1.2578, 1.2939, 1.3514, 1.2254, 1.3247, 1.3362, 1.2644, 1.2673, + 1.3170, 1.4680, 1.3108, 1.4813, 1.2732, 1.3297, 1.3588, 1.3602, 1.3319, + 1.2410, 1.2551, 1.2761, 1.2361, 1.2865, 1.3475, 1.2561, 1.2721, 1.2840, + 1.3149, 1.2915, 1.2586, 1.4006, 1.4374, 1.4045, 1.3576, 1.3122, 1.2363, + 0.7507, 1.1181, 1.3389, 1.3652, 1.2037, 1.4117, 1.2621, 1.2860, 1.3316, + 1.2805, 1.2710, 1.2435, 1.1944, 1.3598, 1.3444, 1.4724, 1.3790, 1.3487, + 1.3941, 1.3124, 1.4063, 1.4142, 1.3284, 1.3665, 1.3139, 1.3566, 1.4895, + 1.2782, 1.2546, 1.4901, 1.1898, 1.3537, 1.2926, 1.2563, 1.3956, 1.3219, + 1.2015, 1.2747, 1.2112, 1.3383, 1.4631, 1.2327, 1.3151, 1.2368, 1.3727, + 1.2585, 1.2890, 1.4346, 1.2018, 1.2840, 1.3254, 1.2897, 1.3935, 1.3739, + 1.2381, 1.2642, 1.3081, 1.6554, 1.3431, 1.2500, 1.2942, 1.2410, 1.2230, + 1.3736, 1.3201, 1.3532, 1.4267, 1.2428, 1.3280, 1.2035, 1.3420, 1.3727, + 1.2243, 1.3602, 1.3100, 1.3725, 1.2488, 1.3750, 1.2510, 1.2722, 1.3553, + 1.3276, 1.2686, 1.2770, 1.3718, 1.2138, 1.2707, 1.3313, 1.3915, 1.2798, + 1.2845, 1.8782, 1.3366, 1.3609, 1.3501, 1.3094, 1.2649, 1.4126, 1.2749, + 1.3580, 1.2331, 1.2871, 1.3615, 1.3453, 1.3761, 1.3402, 1.3949, 1.3075, + 1.3821, 1.2335, 1.1219, 1.2089, 1.3412, 1.3147, 1.4133, 1.2842, 1.2501, + 1.3594, 1.4005, 1.2015, 1.2951, 1.3273, 1.3984, 1.3136, 1.3617, 1.2494, + 1.2750, 1.3031, 1.4086, 1.2963, 1.3859, 1.3196, 1.2856, 1.3284, 1.3102, + 1.3510, 1.2589, 1.2849, 1.2815, 1.2839, 1.3223, 1.3948, 1.2715, 1.2000, + 1.3363, 1.3016, 1.4022, 1.2559, 1.2914, 1.3016, 1.2621, 1.3012, 1.2922, + 1.3268, 1.3100, 1.4105, 1.2209, 1.2655, 1.4514, 1.1279, 1.3305, 1.3242, + 1.3918, 1.3349, 1.3387, 1.3374, 1.2754, 1.3770, 1.3840, 1.4277, 1.2821, + 1.3066, 1.2942, 1.2172, 1.3147, 1.3135, 1.2541, 1.3755, 1.4096, 1.3069, + 1.2051, 1.2896, 1.1959, 1.3449, 1.3062, 1.4289, 1.2718, 1.3988, 1.3705, + 1.3853, 1.4339, 1.3522, 1.3735, 1.3411, 1.2900, 1.2655, 1.4412, 1.2839, + 1.4373, 1.2894, 1.3883, 1.2792, 1.3214, 1.2820, 1.3570, 1.2802, 1.3953, + 1.3076, 1.3642, 1.2259, 1.3210, 1.5030, 1.3443, 1.2598, 1.2860, 1.2877, + 1.3718, 1.3292, 1.4056, 1.3185, 1.3182, 1.3692, 1.3035, 1.3890, 1.2688, + 1.2993, 1.2494, 1.3429, 1.3417, 1.3072, 1.5261, 1.1908, 1.2241, 1.3305, + 1.3312, 1.3154, 1.3555, 1.3268, 1.3306, 1.3453, 1.2817, 1.3936, 1.3599, + 1.3008, 1.2335, 1.4052, 1.3677, 1.3380, 1.3518, 1.2657, 1.4209, 1.3038, + 1.2496, 1.2492, 1.3114, 1.3554, 1.4546, 1.2551, 1.3230, 1.3162, 1.3046, + 1.3480, 1.3656, 1.4054, 1.2291, 1.4179, 1.4216, 1.6285, 1.2478, 1.2996, + 1.2502, 1.3330, 1.2784, 1.2079, 1.2718, 1.3187, 1.3079, 1.3155, 1.4294, + 1.3614, 1.2806, 1.2023, 1.4037, 1.4959, 1.3305, 1.3032, 1.3336, 1.2883, + 1.3871, 1.2994, 1.3879, 1.2965, 1.2655, 1.3263, 1.3945, 1.4027, 1.2427, + 1.4667, 1.3111, 1.3269, 1.2957, 1.4106, 1.2785, 1.2503, 1.4278, 1.3792, + 1.4130, 1.4359, 1.2090, 1.2858, 1.3821, 1.3316, 1.2425, 1.2517, 1.1997, + 1.2026, 1.2181, 1.2937, 1.4291, 1.2833, 1.2416, 1.3274, 1.4065, 1.4087, + 1.4433, 1.2292, 1.3580, 1.3133, 1.3483, 1.3444, 1.4458, 1.3265, 1.3614, + 1.3026, 1.2615, 1.2933, 1.5180, 1.2559, 1.4251, 1.3611, 1.3697, 1.3070, + 1.3921, 1.2738, 1.3158, 1.2808, 1.3083, 1.2928, 1.3000, 1.2510, 1.3136, + 1.3668, 1.2892, 1.4194, 1.1994, 1.2050, 1.3569, 0.4170, 1.3689, 1.2871, + 1.2851, 1.2127, 1.2463, 1.3796, 1.3585, 1.3439, 1.3009, 1.3167, 1.4009, + 1.3422, 1.4012, 1.2848, 1.3174, 1.3833, 1.3165, 1.2696, 1.2573, 1.3488, + 1.3744, 1.2530, 1.2258, 1.2341, 1.4300, 1.4465, 1.4464, 1.2995, 1.2796, + 1.2561, 1.3208, 1.2107, 1.2561, 1.3227, 1.3021, 1.2849, 1.2951, 1.3540, + 1.3733, 1.2993, 1.2819, 1.3679, 1.3642, 1.3118, 1.3858, 1.3867, 1.5010, + 1.3779, 1.2740, 1.3771, 1.3687, 1.3323, 1.2218, 1.2367, 1.2822, 1.3661, + 1.3749, 1.2645, 1.3220, 1.2099, 1.3023, 1.2323, 1.3248, 1.3039, 1.3085, + 1.3062, 1.3787, 1.6302, 1.3617, 1.2350, 1.2862, 1.3350, 1.3497, 1.3331, + 1.3821, 1.2707, 1.3288, 1.3331, 1.3864, 1.3344, 1.3385, 1.2354, 1.3073, + 1.4564, 1.2089, 1.3711])Parameter containing: +tensor([ 1.1249e-01, 6.6530e-02, -1.5252e-01, -5.8736e-04, 4.5505e-02, + 9.3366e-02, 8.9381e-02, 5.9666e-02, -9.5938e-02, 3.6553e-02, + 6.7269e-02, -1.9519e-02, -1.0083e-01, -2.3823e-02, -3.5535e-02, + -6.4250e-02, -1.6845e-02, 1.2477e-02, -5.8642e-02, 4.4019e+00, + -4.9843e-02, 2.5174e-02, 3.9088e-02, -8.4199e-04, -2.0867e-02, + -5.4807e-04, -2.0647e-03, -1.7433e-02, 1.0351e-01, -9.0810e-02, + 3.9953e-02, 2.3238e-02, 1.4501e-02, 4.8113e-02, 5.7156e-02, + -6.4520e-02, 2.7001e-02, -4.0983e-02, -1.1597e-01, 3.8330e-02, + 2.3931e-02, 3.4684e-03, 8.6621e-03, 1.2125e-02, -4.2395e-02, + 6.7561e-03, 4.1510e-02, -4.2593e-02, 2.6011e-02, 5.0339e-02, + 4.2213e-02, -1.4566e-02, 2.7324e-02, 4.6538e-02, 5.4109e-02, + 5.0325e-02, -6.1837e-02, -4.9468e-02, 2.5524e-02, 1.0648e-02, + 4.9119e-02, -4.5645e-02, 4.1377e-02, 1.9581e-02, 9.3057e-02, + 1.7621e-02, 8.7060e-02, 9.3680e-02, -4.0777e-02, 2.2552e-01, + 3.7641e-02, 2.7666e-02, 3.4943e-01, 5.6216e-02, -1.4487e-01, + 6.4672e-02, 3.3319e-02, -9.7215e-02, -3.7396e-02, 3.0751e-02, + 1.8282e-02, -1.1652e-02, -1.9703e-02, 1.1703e-01, -2.4701e-02, + 4.9298e-02, 1.2135e-01, 8.3444e-02, -2.4657e-02, 1.2175e-02, + -1.6799e-02, 2.4807e-02, 2.8444e-02, -5.0449e-02, -5.5266e-02, + 1.3274e-02, -2.0715e-02, -5.3437e-02, -6.7125e-02, -5.9439e-03, + 2.9591e-02, 1.3338e-01, 3.7340e-02, -7.2558e-02, 5.8032e-02, + 2.6419e-02, -4.8896e-03, -3.1441e-02, 3.4037e-02, -5.9380e-02, + -3.7031e-02, -1.5587e-03, 2.8165e-02, 3.1967e-02, 6.8637e-02, + -2.4927e-02, 1.0872e-02, -5.9152e-02, 1.8055e-02, 7.8582e-02, + -6.0901e-02, -1.3762e-01, -6.6338e-02, -2.4902e-02, -1.3091e-01, + -1.7644e-02, -1.1633e-01, 1.2645e-02, 9.5415e-03, -5.4999e-02, + -9.1769e-02, 3.0410e-02, -4.1700e-03, -3.8699e-03, -4.4601e-02, + -3.4956e-03, -8.8458e-03, 1.3220e-02, -4.3815e-02, -3.4442e-02, + -8.5615e-03, 3.0837e-02, 3.9464e-02, 1.0106e-02, 5.6561e-02, + -1.6560e-02, -7.9464e-04, -2.5703e-02, -1.4476e-02, 8.4910e-02, + 1.9942e-01, -5.2275e-02, -6.9300e-02, 5.5513e-03, -7.2165e-02, + -1.4982e-01, -1.9055e-02, -7.8602e-02, 6.0158e-03, 1.1233e-03, + 1.1562e-01, -1.1397e-01, -2.8403e-02, -4.9939e-02, 3.1589e-02, + 1.2695e-03, -7.5428e-02, 4.5873e-02, -5.3041e-02, 3.1856e-02, + -4.4322e-02, 5.4903e-02, 5.8381e-02, 1.6829e-02, 3.3597e-02, + -8.3475e-02, -1.2639e-02, 2.3891e-02, -5.2602e-02, 2.8142e-02, + 4.5836e-04, -8.0072e-02, 5.3213e-02, -2.0927e-03, 5.6944e-03, + -1.6532e-01, 2.7191e-02, 1.2540e-01, -4.0220e-02, -5.6549e-02, + 1.9608e-02, -4.7420e-02, 4.8768e-02, 5.2063e-02, -7.7537e-02, + 8.8836e-03, -2.1279e-02, -1.0774e-01, -5.9231e-01, 4.3655e-02, + -1.2093e-01, -1.1290e-02, -1.6956e-02, 4.7933e-02, 4.8477e-02, + 1.0526e-01, -8.1703e-02, 8.6334e-02, 8.1503e-02, -8.2884e-03, + 1.9980e-03, 1.6998e-01, -2.9055e-03, -2.3385e-02, -5.2070e-02, + -8.9273e-03, -2.7549e-02, 4.1074e-02, -8.2153e-02, -1.1626e-01, + 1.8730e-02, 5.3543e-03, -6.0731e-03, -2.9544e-02, 2.4765e-02, + -4.7577e-02, 3.2469e-02, -4.7903e-03, 7.3674e-02, -3.6682e-02, + 6.6302e-03, 2.0081e-02, -1.1133e-02, 5.2496e-02, -2.5489e-02, + 3.2781e-02, -7.1077e-02, 5.0036e-02, 5.3721e-02, 3.5437e-02, + 6.2628e-02, 8.2153e-02, 4.1833e-02, 2.3117e-02, 2.5839e-02, + 2.7974e-02, 8.7044e-02, 3.6748e-02, 7.4573e-03, -1.1560e-02, + 4.0989e-02, 1.7920e-02, 8.9429e-02, -7.5363e-02, 1.1239e-01, + -9.7433e-02, -5.9373e-03, -9.0286e-02, 2.9564e-02, 1.9275e-02, + -4.6716e-02, -3.4492e-02, 1.2544e-02, -4.6450e-02, -4.1636e-02, + 2.8558e-02, -4.4963e-02, 1.0954e-02, -6.3352e-02, -3.6041e-02, + 1.6229e-02, 3.7611e-02, 8.6765e-02, -7.4588e-02, 1.9078e-02, + 5.8601e-02, -9.5907e-02, 3.2584e-02, 4.2996e-02, -6.7500e-02, + 1.5511e-01, 1.4252e-01, -4.6072e-02, 1.9960e-02, 4.7929e-03, + 6.0026e-02, 6.9157e-02, -4.8183e-02, 1.1191e-01, -1.1622e-01, + -3.2258e-02, -4.7785e-02, 6.6812e-02, 2.3133e-02, 3.6284e-02, + -1.1379e-02, 6.7204e-02, 2.7096e-02, 1.2316e-02, 4.3819e-02, + 1.6182e-02, 1.0396e-02, -3.4605e-03, 5.9977e-02, 3.0063e-02, + 6.0782e-03, 6.6088e-02, -5.5381e-02, 8.2836e-02, -3.4805e-02, + 3.3898e-02, 1.2261e-02, -4.6789e-02, -6.2023e-02, 1.2293e-02, + -7.1054e-02, 3.4515e-02, 4.1708e-02, 1.0877e-01, -2.4782e-02, + -7.4042e-02, 1.9112e-01, -3.6649e-02, -2.9619e-02, -8.1513e-02, + -1.1837e-01, 9.0077e-03, 1.3000e-02, 3.9411e-02, -9.5251e-02, + 1.3697e-01, -3.9842e-02, 1.0547e-01, -3.3850e-02, 1.7734e-02, + 8.9571e-02, -7.6323e-03, -2.8654e-02, -1.9298e-02, 6.9649e-03, + -9.7311e-02, 1.5954e-02, -5.8039e-02, -2.2428e-02, 7.0552e-02, + 1.0890e-02, 7.9111e-02, 7.5176e-02, 1.7303e-02, -2.0276e-02, + -3.1886e-02, 4.3340e-02, -2.2585e-02, -1.7488e-02, -9.9096e-03, + -5.1280e-02, -4.2994e-02, 1.3468e-02, 7.8899e-03, -6.5327e-02, + 2.5238e-02, -1.2104e-02, 1.9794e-02, -8.7755e-02, 1.2272e-02, + -3.8113e-02, 4.8278e-02, 8.7076e-02, 2.7675e-02, 3.3508e-02, + 4.7161e-02, -4.2115e-02, -6.3795e-02, 9.3564e-03, -2.6794e-02, + -3.8587e-04, 7.3805e-02, 5.9713e-02, 3.5639e-04, -2.9299e-02, + -8.8966e-02, 1.2821e-01, 3.4049e-03, 4.1277e-03, -4.2586e-02, + -2.8642e-02, -7.6255e-03, -6.1095e-02, -5.1197e-02, -6.0758e-02, + 2.7188e-02, -9.9969e-02, 3.1856e-03, -3.5005e-02, 6.6075e-02, + 8.8329e-03, 2.1645e-02, 4.5789e-03, -2.4657e-02, 4.3128e-02, + 1.1682e-01, -3.2732e-02, 7.7938e-02, 2.1887e-02, -6.3995e-02, + -1.1791e-01, -6.2589e-02, 7.7237e-03, -1.2817e-02, -1.6943e-02, + -4.3070e-02, 2.5946e-02, 9.0340e-02, -3.8835e-02, -1.0102e-01, + -1.3493e+00, 3.1324e-02, 2.9935e-02, -3.3154e-02, -1.0369e-02, + -4.0507e-03, -1.2769e-02, 4.4127e-03, -2.8645e-02, -1.8434e-02, + 2.3528e-02, -1.3385e-01, -9.2718e-02, 3.5029e-02, 1.4013e-02, + -4.1471e-02, -7.4307e-02, -1.2050e-02, 7.5646e-02, 3.5991e-02, + -7.2910e-03, 1.7712e-02, 5.8498e-02, 1.1845e-01, -7.3238e-02, + -5.0888e-03, -8.3001e-02, -2.9860e-02, 1.0127e-02, 1.1152e-01, + -9.6156e-02, 2.8515e-02, -6.0384e-02, 1.1800e-01, 2.3654e-02, + -1.3667e-02, 5.0921e-02, -9.9970e-02, 3.6478e-02, 3.3720e-02, + -7.5835e-03, 2.4045e-02, 2.6867e-02, -1.9072e-02, -1.0716e-01, + 1.3416e-02, 3.4972e-02, -8.7127e-02, 4.2388e-02, -1.9562e-02, + -3.1595e-02, 2.2393e-02, -6.9860e-02, 4.5873e-02, -1.3502e-02, + -2.1857e-02, -8.8041e-02, -1.2840e-02, 2.1885e-02, -6.6175e-02, + 2.2215e-02, 1.1226e-01, 6.1672e-02, -2.8236e-02, 9.6886e-02, + 9.5079e-02, -8.6334e-02, 3.8164e-02, 1.8307e-01, -6.8546e-02, + -4.5854e-02, -2.1095e-02, 8.4351e-02, 7.5503e-02, 1.1180e-02, + 1.8422e-02, 1.6019e-02, 2.6321e-02, -3.6733e-02, -1.2587e-01, + 1.7723e-02, -8.1332e-02, 3.5607e-02, -2.6955e-02, 2.2358e-04, + 2.8571e-02, -5.5601e-02, 5.2749e-02, -1.8398e-02, -2.9063e-02, + 6.0163e-02, 4.2427e-02, 4.9609e-02, 3.5119e-02, 3.5887e-02, + -8.4040e-02, -3.5546e-03, -8.2419e-02, 4.4913e-02, -2.8065e-02, + -8.6208e-03, 6.7511e-02, 9.9593e-02, 6.3995e-02, -6.3132e-02, + -4.9890e-02, -2.7507e-02, 5.0936e-02, -2.7012e-02, -3.6912e-02, + -2.4369e-02, 1.5135e-02, -2.4008e-02, 5.1538e-03, -9.2522e-02, + 2.4081e-02, 3.6717e-02, -1.6280e-04, -4.9823e-02, 5.6712e-04, + -2.6802e-02, -3.0708e-02, -2.1814e-02, -5.9829e-02, -6.6685e-02, + -1.4319e-03, 1.2471e-02, -2.3993e-02, 6.8387e-02, 2.0728e-02, + 3.0708e-02, 4.8047e-02, 1.0205e-02, 6.6293e-02, 2.8431e-02, + 6.7335e-02, 9.5118e-03, -4.3140e-02, 5.8522e-02, 4.3582e-03, + -2.7967e-02, 5.7708e-02, -3.5698e-02, 2.9260e-02, 1.0412e-01, + -6.7268e-02, 7.3698e-02, 3.2205e-02, -5.8821e-03, 1.3816e-01, + 6.3438e-02, 3.3773e-02, -9.4991e-03, 1.9717e-02, 3.7347e-02, + 5.1135e-02, 1.2657e-02, -5.9936e-02, -3.0121e-02, 7.1307e-02, + 3.1290e-02, 1.0219e-02, 2.0115e-02, 2.6330e-02, -2.9001e-02, + -1.0041e-01, -4.3682e-03, 5.1741e-02, 8.8398e-02, 4.9760e-02, + 2.4417e-02, 5.8507e-02, -8.0023e-02, -1.5147e-02, -4.0268e-03, + -3.7588e-02, 1.0212e-01, 6.3257e-02, -4.1025e-02, 8.4955e-03, + -2.6449e-02, 4.9546e-02, 1.7144e-02, -6.9164e-03, 6.3603e-02, + -6.4019e-02, -1.0373e-01, 2.1895e-02, -1.5778e-02, 3.1353e-03, + -3.9587e-02, 3.7480e-02, -2.3462e-01, 4.4842e-02, -4.6786e-02, + 5.5548e-02, 5.3524e-02, -1.7518e-02, -3.4734e-02, -2.3288e-03, + -9.6355e-02, 8.0766e-02, 8.5742e-02, 2.9556e-02, -6.0110e-02, + 3.5366e-02, 3.0754e-04, 2.1479e-02, 6.5357e-03, 2.3472e-02, + -2.9868e-02, -1.4617e-02, -1.1686e-02, 4.1814e-02, -2.7550e-02, + 1.0170e-01, 1.5005e-02, 8.5918e-02, 1.5293e-01, 3.6819e-02, + 2.8971e-02, -4.9363e-02, 2.3617e-03, 2.9390e-02, -5.3829e-02, + -2.8364e-03, -5.2993e-02, 3.4160e-02, -3.5602e-02, 7.5076e-03, + 1.7979e-02, 2.6363e-02, 6.1626e-02, 1.4246e-02, 9.0429e-02, + 9.7307e-02, -1.8644e-02, -1.0435e-02, 5.2864e-02, -3.1784e-02, + 3.9848e-02, -7.4252e-02, -1.6382e-02, 2.7518e-02, 6.7591e-02, + -1.0744e-01, -9.2538e-03, -7.7432e-02, 3.2136e-03, 8.4539e-02, + -9.5115e-04, -6.7152e-02, -1.0466e-01, 2.8688e-02, 1.1688e-02, + -2.1216e-01, 2.4527e-02, 7.5086e-03, 5.9728e-03, -5.0438e-02, + 4.5686e-02, -2.3327e-02, -2.7498e-02, 1.3780e-02, 6.8096e-02, + -4.4034e-02, -5.3989e+00, -1.8472e-02, 2.5993e-02, 1.1153e-02, + -7.6305e-02, -4.6629e-02, -4.5762e-02, -1.2894e-02, 2.3282e-02, + -5.1168e-02, -1.6560e-02, -1.8486e-02, -4.1046e-02, -4.4457e-02, + 4.0430e-02, -3.8813e-02, -7.7700e-02, 2.9239e-02, -4.2895e-02, + 7.3962e-02, 7.1423e-02, 1.0706e-02, -2.8681e-02, -1.1633e-01, + -2.8290e-02, 3.7359e-02, 2.3484e-02, 3.1901e-02, 4.8001e-02, + -3.1100e-02, 8.7397e-02, 9.8772e-02, 1.7629e-02, 3.6501e-03, + 9.3748e-03, -3.4984e-03, 3.5952e-02, -7.3531e-02, -7.0477e-03, + 6.7023e-03, -2.4618e-02, 2.3288e-02, -9.6989e-02, 3.1015e-02, + 1.3645e-02, -2.0102e-01, 8.9683e-02, 3.0963e-03, 7.7379e-02, + 5.2203e-02, 4.2260e-02, 5.4637e-02, 7.2847e-02, 2.5497e-02, + -2.4353e-03, -2.0962e-02, -1.2389e-02, -3.2438e-02, -7.5809e-02, + 9.4790e-02, -1.0083e-01, -1.0935e-01, 3.2097e-02, -2.6656e-02, + -1.8654e-02, -8.5054e-02, 1.4421e-02, 6.8078e-02, -5.8056e-02, + -4.4167e-02, -1.4837e-02, -7.9102e-02, -4.1090e-02, 7.0457e-02, + 1.0067e-02, 2.5603e-02, 9.8748e-02, 4.7162e-02, -5.7733e-02, + 2.2399e-02, 3.9574e-02, 9.2868e-02, -7.5548e-02, -5.1467e-02, + -4.1965e-02, 8.5480e-02, -2.1664e-03])Parameter containing: +tensor([[ 0.0202, 0.0051, -0.0099, ..., 0.0150, -0.0118, -0.0252], + [ 0.0279, -0.0216, -0.0171, ..., -0.0218, 0.0069, 0.0059], + [-0.0100, 0.0249, 0.0076, ..., 0.0068, -0.0119, 0.0081], + ..., + [ 0.0039, -0.0400, -0.0170, ..., 0.0191, -0.0038, 0.0145], + [ 0.0186, -0.0087, 0.0062, ..., 0.0125, -0.0135, -0.0063], + [ 0.0241, -0.0023, -0.0027, ..., 0.0083, 0.0031, 0.0143]])Parameter containing: +tensor([-0.1687, -0.1522, -0.1874, ..., -0.3894, -0.2622, -0.3452])Parameter containing: +tensor([[-0.0104, 0.0026, -0.0121, ..., 0.0043, -0.0197, -0.0084], + [-0.0017, -0.0006, 0.0162, ..., -0.0014, 0.0029, 0.0020], + [-0.0115, 0.0131, -0.0065, ..., -0.0152, 0.0059, 0.0125], + ..., + [-0.0005, 0.0133, -0.0086, ..., 0.0159, -0.0166, 0.0221], + [-0.0144, -0.0031, 0.0204, ..., 0.0199, 0.0079, -0.0012], + [-0.0021, -0.0152, -0.0143, ..., 0.0090, -0.0025, -0.0068]])Parameter containing: +tensor([-1.4450e-02, -1.9440e-02, -5.2460e-02, -3.1021e-02, -5.1300e-02, + 3.6041e-02, -4.9820e-03, 3.0884e-02, -6.0791e-02, -3.9024e-03, + 6.5979e-02, -5.0690e-02, -1.5404e-02, 1.2665e-02, 1.0269e-02, + -2.3239e-02, 1.9638e-02, 4.1351e-02, -9.3155e-03, 4.5898e-02, + -1.4809e-02, 3.9795e-02, 5.4199e-02, 2.2781e-02, -1.1696e-02, + 4.7455e-02, -3.1342e-02, -5.1666e-02, 2.0187e-02, -4.6753e-02, + 1.4305e-02, 2.4292e-02, 4.6883e-03, 4.9622e-02, 1.7075e-02, + -7.3669e-02, 4.7668e-02, -3.4607e-02, 5.6549e-02, -2.3193e-02, + -1.6907e-02, -4.4983e-02, 1.2787e-02, 3.8055e-02, -4.3793e-02, + 7.4951e-02, -2.7313e-03, 2.3071e-02, -2.5681e-02, -2.4292e-02, + 3.4485e-02, 3.3752e-02, 1.5030e-02, 1.4137e-02, 5.1941e-02, + -3.1830e-02, 4.2801e-03, 8.0322e-02, 2.6215e-02, 4.4525e-02, + 2.6276e-02, -3.3325e-02, 2.9175e-02, -5.9875e-02, 6.1890e-02, + 4.4678e-02, 3.4008e-03, 6.0394e-02, 3.0575e-03, 2.4216e-02, + -1.5656e-02, 2.8706e-03, 1.1194e-01, 1.2585e-01, -6.3538e-02, + 5.7129e-02, -8.5678e-03, -2.8549e-02, 3.8269e-02, -6.1859e-02, + -2.5925e-02, -7.7637e-02, 1.9104e-02, 1.2222e-02, 3.5248e-02, + 1.2161e-02, 6.0577e-02, 3.3173e-02, -2.0538e-02, 4.6310e-03, + -5.7648e-02, 1.1932e-02, 5.6274e-02, -1.1154e-02, -6.1523e-02, + 9.4299e-02, 4.9988e-02, -2.1072e-02, -5.7259e-03, -4.9530e-02, + -8.6914e-02, 1.1816e-01, 3.1860e-02, -1.7868e-02, -5.5618e-03, + 1.4656e-02, -1.0147e-03, 6.7505e-02, 2.8412e-02, -5.7129e-02, + -2.4433e-03, 2.9312e-02, -5.8250e-03, -2.6993e-02, -1.4832e-02, + 1.0757e-02, -4.6600e-02, -1.3153e-02, -5.3589e-02, -4.4342e-02, + -1.4353e-03, -4.3793e-03, 1.9211e-02, 1.8112e-02, -8.9294e-02, + -2.7573e-02, -5.5733e-03, -8.1238e-02, 5.9601e-02, -1.1169e-01, + -2.1286e-02, -4.9652e-02, -1.4503e-02, -3.3081e-02, -2.4017e-02, + -8.7662e-03, -2.5314e-02, -9.2926e-03, -4.5837e-02, 3.4424e-02, + -3.4454e-02, 4.5349e-02, 5.2155e-02, 4.0588e-02, 6.9824e-02, + -2.4170e-02, -2.2324e-02, 7.3547e-02, 3.6987e-02, 1.9012e-02, + 3.7231e-02, -7.6904e-02, 5.1880e-02, 1.5228e-02, -3.5583e-02, + -5.7495e-02, -1.0101e-01, -2.4506e-02, 3.2330e-03, 6.7200e-02, + 2.8725e-03, -1.1665e-02, -7.1144e-03, -1.0231e-02, -9.5825e-03, + 5.4260e-02, -5.0262e-02, -5.2551e-02, -1.3290e-02, 8.1116e-02, + -3.0533e-02, 7.3776e-03, 2.4551e-02, -8.6975e-02, -4.4342e-02, + 4.7516e-02, -1.4748e-02, 4.2114e-02, -4.8584e-02, 4.8492e-02, + -1.2657e-02, -4.6936e-02, -3.9429e-02, -2.0401e-02, 1.7181e-02, + 2.9335e-03, 4.5242e-03, 1.4160e-01, 6.3515e-03, -2.2491e-02, + -3.1643e-03, 1.0612e-02, 8.1970e-02, -1.3794e-02, -8.8043e-03, + 4.9561e-02, -5.0964e-03, -3.1311e-02, -8.0383e-02, 2.4078e-02, + 3.2063e-03, -5.1544e-02, 6.0616e-03, -8.0185e-03, 4.0283e-02, + -1.9867e-02, -7.0129e-02, -1.7181e-02, 1.0101e-02, -3.5614e-02, + -9.1705e-03, 1.0254e-02, 1.8930e-03, 5.0964e-02, -4.9713e-02, + -2.0294e-03, 3.4821e-02, -4.1473e-02, -1.2077e-02, -8.0444e-02, + 4.1412e-02, 7.0496e-02, -2.3071e-02, -1.7212e-02, 6.1874e-03, + 2.6428e-02, -2.0203e-02, -2.5589e-02, 6.1890e-02, 3.3478e-02, + 4.9957e-02, -2.1423e-02, -7.5928e-02, -4.8828e-02, 2.5978e-03, + 7.7820e-02, -9.0881e-02, -2.9709e-02, 1.0559e-02, 4.3030e-02, + 6.5918e-02, 1.6846e-02, -8.6365e-03, 4.7394e-02, 3.0457e-02, + -2.7008e-02, -3.8513e-02, 5.5298e-02, -3.8269e-02, -7.2861e-03, + 1.2192e-02, -3.8666e-02, -1.1482e-02, -7.8552e-02, 9.7733e-03, + 6.8176e-02, 4.8340e-02, 2.0966e-02, 2.2064e-02, -3.8528e-03, + 2.3636e-02, 1.2711e-02, -3.0380e-02, -8.6182e-02, -4.6173e-02, + -4.6783e-02, -3.2623e-02, -8.6060e-03, 3.2520e-03, -1.2886e-02, + -3.9673e-02, 3.4027e-03, 1.0046e-01, 3.3478e-02, -1.5030e-02, + 1.1162e-02, 3.0350e-02, 4.8279e-02, 2.0081e-02, -9.4528e-03, + 2.7328e-02, -1.1261e-02, 5.4970e-03, 1.3132e-03, 3.6163e-02, + 7.7362e-03, -3.6469e-02, -5.6915e-02, 4.0619e-02, 6.8474e-03, + 6.9580e-02, 2.7710e-02, -3.1494e-02, 2.1805e-02, 3.1281e-02, + -8.8348e-03, -3.2898e-02, 2.7069e-02, -3.9940e-03, 9.3445e-02, + -3.4088e-02, -5.5313e-03, -3.7689e-02, 1.6724e-02, 4.7028e-02, + -2.2858e-02, 4.0100e-02, 5.3650e-02, -3.3752e-02, 4.2076e-03, + 3.9124e-02, 5.7983e-02, -3.1143e-02, 3.8513e-02, 1.0323e-02, + -5.5481e-02, -4.6356e-02, -4.0039e-02, 7.4219e-02, -7.2021e-02, + -1.0323e-02, 3.8483e-02, -5.9570e-02, -2.7084e-02, 9.9170e-01, + -2.5467e-02, 6.9702e-02, 2.7435e-02, 4.4373e-02, -2.9785e-02, + 5.3497e-02, -6.6071e-03, -9.4528e-03, -2.0615e-02, 1.1345e-02, + 4.7150e-02, 5.0354e-02, -1.2062e-02, -2.1973e-02, 2.1774e-02, + -4.2786e-02, -8.9340e-03, 4.1382e-02, 3.9864e-03, 2.9648e-02, + -2.7267e-02, -3.4580e-03, -5.5664e-02, 8.5907e-03, -2.2476e-02, + 7.6172e-02, 1.4473e-02, -1.6937e-02, -6.1584e-02, 3.5591e-03, + 1.6144e-02, 8.6670e-03, 2.4853e-03, -5.0018e-02, 1.6235e-02, + 1.9775e-02, -3.4027e-02, 8.4473e-02, -1.0803e-02, -5.3528e-02, + 3.7861e-03, -2.1576e-02, 3.7415e-02, -2.8839e-02, -8.6731e-02, + -4.9744e-03, 7.8552e-02, -6.4270e-02, 1.1009e-02, -2.1423e-02, + -2.4216e-02, 8.6243e-02, -5.7251e-02, 5.2826e-02, 7.1472e-02, + -2.5269e-02, 2.3468e-02, 1.5396e-02, 6.8130e-03, 1.3374e-02, + -3.2043e-02, 8.0933e-02, -4.6783e-02, -4.3091e-02, -4.8950e-02, + 2.8801e-03, -6.0234e-03, -6.4964e-03, 2.0203e-02, -2.5589e-02, + 5.3864e-03, -5.1697e-02, -8.9722e-02, 8.4656e-02, 1.2188e-03, + 6.9336e-02, -1.9135e-02, -1.4641e-02, -2.3636e-02, -2.4597e-02, + -9.3689e-02, -2.0050e-02, 3.0487e-02, 3.6621e-02, 3.4027e-02, + -8.9874e-03, 3.9703e-02, 2.5654e-03, 2.7573e-02, -9.3445e-02, + -6.0944e-02, -9.6283e-03, -1.6403e-03, 1.9331e-03, 3.9856e-02, + -6.8726e-02, -2.6672e-02, -1.5839e-02, -3.2684e-02, 1.5198e-02, + 4.4403e-03, -5.0293e-02, -3.5645e-02, 2.1896e-02, -4.4067e-02, + -3.1982e-02, -2.1866e-02, -3.6621e-02, -1.9531e-02, -9.9106e-03, + -2.4673e-02, -4.5288e-02, 3.9764e-02, 1.0429e-02, 3.6957e-02, + 1.9943e-02, -2.9251e-02, -2.3087e-02, -1.0883e-01, 2.7267e-02, + -1.7349e-02, -2.8488e-02, 4.4922e-02, 2.2797e-02, 2.3148e-02, + -3.6836e-04, -5.5725e-02, -6.5918e-02, 3.3142e-02, 1.2970e-02, + -9.5520e-03, -8.7433e-03, 2.8717e-02, -4.6478e-02, -3.7994e-02, + 7.0618e-02, -2.2369e-02, -3.4119e-02, -2.9465e-02, 1.0574e-04, + -2.7817e-02, 3.8849e-02, -2.4629e-04, 5.5115e-02, -1.7776e-02, + 6.0692e-03, -2.2720e-02, 5.4230e-02, -3.9154e-02, -3.1891e-02, + 9.6436e-03, 8.9722e-02, 5.8990e-02, -1.1063e-03, -7.3242e-03, + 3.8300e-02, -2.6031e-02, -3.2593e-02, 6.5979e-02, -3.9490e-02, + -6.1890e-02, -1.2589e-03, 7.5256e-02, 5.3223e-02, -2.5833e-02, + -2.1667e-02, 6.6467e-02, 4.8584e-02, -1.9684e-02, -8.9417e-03, + -4.0710e-02, -1.8967e-02, 4.2511e-02, 4.4708e-02, 2.3956e-02, + -2.4551e-02, -2.4567e-02, -2.1851e-02, -6.6650e-02, -2.4673e-02, + 2.7588e-02, -3.9673e-02, 6.9153e-02, 1.9150e-02, -3.3131e-03, + -5.8594e-02, -1.2550e-02, -5.0690e-02, 3.5004e-02, -9.9957e-05, + -2.9907e-02, 4.1138e-02, 4.6959e-03, -5.2605e-03, -4.1016e-02, + -3.0014e-02, -1.3374e-02, -2.0096e-02, -5.9624e-03, -1.9089e-02, + 2.3899e-03, 1.4854e-02, 1.8494e-02, -1.0391e-02, -1.6373e-02, + -2.6657e-02, 3.0426e-02, -2.9388e-02, -1.7258e-02, 8.3801e-02, + 4.2511e-02, 1.4603e-02, 6.9962e-03, -2.7267e-02, -4.2053e-02, + 2.4109e-03, 7.2250e-03, -1.8051e-02, 1.2123e-02, -6.6772e-02, + -4.9011e-02, 3.3325e-02, -9.6083e-04, -1.6739e-02, 3.7323e-02, + 1.9653e-02, -3.6469e-02, -6.5063e-02, -4.2664e-02, 2.1423e-02, + 1.4153e-02, -6.0944e-02, -1.6418e-02, -2.0462e-02, 5.4108e-02, + -6.2439e-02, -1.2047e-02, 2.0615e-02, -8.3313e-03, 1.9150e-02, + -3.3020e-02, -5.9052e-02, -3.5362e-03, 4.8279e-02, 7.7581e-04, + -2.9388e-02, 7.5626e-04, -7.7026e-02, 3.2291e-03, -2.3895e-02, + -2.0447e-02, -5.3040e-02, 4.3091e-02, 1.8417e-02, 3.7323e-02, + -7.1777e-02, 2.2614e-02, -2.3453e-02, 9.3918e-03, 2.1072e-02, + -6.3599e-02, -1.2827e-03, -3.2990e-02, -4.0710e-02, 1.2779e-03, + 1.5045e-02, 3.6285e-02, 2.3239e-02, -3.0701e-02, -3.3905e-02, + 2.8732e-02, 2.5955e-02, -6.2561e-03, -6.4209e-02, 3.1113e-02, + -5.2368e-02, -3.9337e-02, 2.4933e-02, 4.9820e-03, -5.0995e-02, + 1.7792e-02, -5.0774e-03, -5.2155e-02, 6.5002e-03, -6.1684e-03, + -7.5317e-02, 3.0960e-02, -3.4119e-02, -2.2980e-02, -1.0941e-02, + -6.3171e-02, 2.3773e-02, 7.6050e-02, 3.6392e-03, 5.1117e-03, + 1.1864e-02, -5.1880e-02, -1.0663e-01, -3.6285e-02, 1.0872e-02, + 9.3689e-03, 1.1284e-02, -1.1948e-02, 6.7078e-02, -1.3687e-02, + 3.9734e-02, -4.2725e-03, 5.1804e-03, 6.2439e-02, 2.9617e-02, + 2.8687e-03, -2.0889e-02, 4.2908e-02, -1.4122e-02, -4.1161e-03, + 3.6255e-02, 1.6693e-02, 2.4048e-02, -2.6428e-02, -4.6814e-02, + 1.6907e-02, 1.1269e-02, -6.5186e-02, 1.1847e-01, 4.9774e-02, + 3.2444e-03, -2.2018e-02, -2.2964e-02, -2.3331e-02, 8.9264e-03, + 2.5665e-02, -4.0833e-02, 3.0457e-02, 1.6918e-03, -4.4708e-02, + -5.4321e-02, 5.0995e-02, -6.7139e-02, 1.6037e-02, 4.4373e-02, + -3.3600e-02, -5.5328e-02, -3.9978e-02, 6.6223e-02, 4.3121e-02, + -9.3262e-02, -1.2390e-02, 1.1337e-02, 1.2619e-02, 4.5204e-03, + 3.6682e-02, 3.8422e-02, -1.2413e-02, 4.9194e-02, 9.0210e-02, + 5.9937e-02, -1.3379e-01, 4.9400e-03, 5.2452e-03, -1.5884e-02, + -4.0619e-02, 3.6201e-03, -4.7150e-02, -1.3702e-02, 5.2399e-02, + -2.6215e-02, 3.8147e-02, -7.9155e-04, 2.6855e-02, -2.1927e-02, + -1.8539e-02, 1.3641e-02, -2.2598e-02, -4.0955e-02, -5.6610e-02, + 7.6782e-02, -3.4695e-03, 4.1008e-03, 2.9449e-02, -1.4961e-02, + -2.8824e-02, 7.7698e-02, 4.5837e-02, -2.8061e-02, -8.8120e-03, + -3.3844e-02, 2.0676e-02, 7.3624e-04, -3.8818e-02, 1.3878e-02, + 8.1940e-03, 9.8495e-03, -1.2484e-03, 2.6016e-02, 8.2397e-03, + 2.2873e-02, 3.6804e-02, 1.8997e-03, 1.3168e-02, 5.0316e-03, + -2.5654e-03, 3.4851e-02, -5.0201e-03, 1.0445e-02, 3.4485e-02, + 3.4668e-02, 3.4515e-02, 6.4026e-02, 9.1248e-02, -7.8247e-02, + -7.4844e-03, 6.0272e-02, -2.4399e-02, 3.3691e-02, 1.4244e-02, + 1.6068e-02, -5.9448e-02, -4.2877e-02, -8.6746e-03, -1.0736e-01, + -3.5461e-02, -2.6962e-02, -8.1787e-02, -2.7451e-02, 1.2024e-02, + -4.1992e-02, 8.1635e-03, 6.4430e-03, -3.7689e-02, 5.9601e-02, + -2.4536e-02, 6.6589e-02, 7.8552e-02, 8.3130e-02, 1.6815e-02, + -1.2283e-02, 3.2074e-02, 1.6693e-02, -8.3008e-02, -1.6525e-02, + -4.8920e-02, 1.9150e-02, -4.1748e-02])Parameter containing: +tensor([2.0257, 2.0344, 1.9511, 1.9693, 1.9585, 1.9219, 2.0683, 1.9555, 1.9977, + 2.0715, 2.0045, 2.0027, 2.0430, 2.0124, 2.0187, 1.8801, 1.9952, 2.1130, + 1.9557, 1.2675, 1.8754, 1.8764, 1.9653, 1.9503, 2.0653, 1.9437, 1.9732, + 2.0050, 2.0532, 1.8522, 2.0196, 1.9858, 1.9741, 1.9016, 2.0161, 2.0116, + 1.9765, 2.0106, 1.8425, 1.9446, 1.9371, 2.0979, 1.9926, 2.0592, 2.1984, + 2.0775, 1.9381, 1.9745, 1.9952, 1.9890, 1.8479, 2.0970, 2.0300, 2.0011, + 2.0767, 2.1105, 1.9994, 1.9672, 2.0051, 2.1348, 2.0109, 2.0383, 1.8306, + 2.0998, 1.9471, 1.9370, 1.9396, 1.9878, 2.0677, 2.3015, 2.1528, 1.9999, + 1.8284, 2.0095, 1.9911, 2.0213, 2.0314, 1.9826, 1.9751, 1.9446, 2.0328, + 1.9656, 1.9499, 1.9528, 2.0021, 1.9421, 2.0539, 2.0090, 2.0005, 2.0296, + 1.8981, 2.0537, 1.9623, 1.9398, 1.9628, 1.9870, 2.0913, 2.0724, 1.9533, + 2.1124, 1.9853, 2.6650, 1.9589, 2.0429, 1.8930, 1.9923, 2.0287, 2.0133, + 2.0124, 2.0338, 2.0284, 2.0974, 2.0387, 2.1974, 1.9633, 2.1018, 1.8175, + 1.9394, 2.0572, 2.0080, 2.0954, 1.9050, 2.1441, 1.9543, 1.9805, 2.0541, + 1.9790, 1.9280, 1.9894, 2.1866, 1.9792, 1.8995, 1.9158, 1.9590, 2.1155, + 2.5199, 2.0179, 2.0004, 1.9195, 2.0193, 1.9890, 1.9006, 1.9797, 2.0440, + 2.0788, 2.0407, 2.0304, 2.1029, 2.0203, 1.9760, 1.9394, 2.0054, 1.9847, + 2.0356, 2.1139, 1.5719, 1.9705, 1.9917, 2.0091, 2.0040, 1.9577, 1.9719, + 2.0472, 1.7621, 2.0073, 1.9903, 2.1363, 1.9998, 2.0051, 1.9888, 1.9687, + 1.9345, 2.1511, 2.1393, 2.0262, 1.9638, 2.0399, 2.0069, 1.9389, 1.9667, + 2.0143, 2.0119, 1.9335, 1.9657, 2.1081, 2.0954, 1.9555, 0.4295, 1.9307, + 2.0535, 2.0774, 2.0535, 2.1234, 1.9335, 2.0603, 2.0513, 1.9627, 1.9375, + 0.6353, 2.0395, 2.2153, 2.1222, 2.0615, 2.0297, 2.0519, 2.0836, 2.0057, + 2.0021, 2.0199, 1.9734, 1.9511, 2.1163, 2.0859, 2.0537, 2.0991, 2.0374, + 1.9356, 2.0284, 2.0152, 2.0300, 2.1570, 1.9596, 1.8716, 2.0989, 1.9362, + 2.0595, 1.9272, 1.9588, 2.0182, 1.9944, 2.0247, 2.0196, 1.9151, 1.9654, + 2.0398, 1.9895, 2.1065, 2.0104, 1.9463, 1.9615, 1.9753, 2.0335, 1.9698, + 1.9619, 1.9918, 1.9939, 1.9441, 2.3460, 1.9763, 1.9905, 2.1296, 1.9593, + 2.0462, 1.9803, 1.8739, 1.9475, 1.9598, 2.0517, 1.9284, 1.9119, 2.0556, + 1.9551, 1.8316, 1.9645, 1.9500, 2.0806, 2.0543, 1.9594, 1.9842, 1.9619, + 1.9587, 1.8544, 2.0395, 1.9717, 2.2640, 1.9691, 2.0282, 1.9032, 1.9770, + 1.9332, 1.9511, 1.8922, 1.9482, 1.9790, 2.0446, 2.0257, 2.0371, 1.9969, + 2.0323, 1.9707, 2.1331, 2.0598, 1.9312, 2.0447, 2.0934, 1.8701, 2.0536, + 1.9339, 1.9815, 2.0502, 1.8652, 1.9644, 1.9713, 2.0664, 2.0611, 1.8879, + 1.9392, 1.9859, 2.0829, 2.0115, 1.9858, 1.9979, 1.8491, 1.9454, 1.9416, + 1.9871, 2.0103, 1.9462, 2.1577, 2.0422, 1.8648, 1.9684, 1.9651, 1.9931, + 2.4453, 1.7836, 1.9797, 2.0477, 2.0374, 1.8115, 1.9577, 1.9438, 1.9010, + 1.9335, 2.0134, 2.0297, 2.0084, 1.9193, 1.9962, 2.1257, 1.9753, 1.9826, + 2.0457, 1.9393, 2.1329, 1.9958, 2.0344, 2.0395, 2.0875, 2.0308, 1.9300, + 2.0481, 1.9178, 2.3537, 2.0540, 1.9842, 1.8567, 1.9846, 2.1611, 2.0759, + 2.1378, 1.9157, 1.8941, 2.0725, 2.0011, 2.0108, 2.0496, 2.0581, 2.0146, + 2.0212, 1.9948, 1.9491, 1.9728, 2.1604, 2.1109, 1.9235, 2.0226, 2.0845, + 1.9894, 1.9106, 1.9809, 1.7731, 2.0163, 1.9498, 2.0033, 1.9945, 2.1344, + 2.0049, 2.0276, 1.9931, 2.1364, 1.9570, 1.9764, 1.9729, 2.0930, 1.9910, + 1.9975, 2.0646, 2.0498, 1.9523, 2.1627, 2.0006, 1.8691, 2.0954, 1.9561, + 1.9464, 2.0225, 2.0438, 2.0626, 1.8734, 2.0497, 1.9712, 2.0253, 2.2255, + 1.9269, 1.5977, 1.9628, 1.9559, 2.0419, 2.0557, 2.1421, 1.9419, 1.9496, + 2.1094, 2.1491, 1.9859, 2.0079, 1.8498, 2.0724, 1.9499, 2.0346, 2.0262, + 1.9633, 2.0354, 1.9472, 1.9602, 2.0195, 2.0032, 2.0036, 1.9063, 2.0086, + 2.0016, 2.0250, 2.2570, 1.9673, 1.9707, 1.8847, 1.9078, 2.0152, 2.0952, + 2.0862, 1.9537, 2.0682, 1.9151, 2.0110, 2.2095, 1.9940, 2.1294, 2.0299, + 1.9963, 2.1305, 1.9429, 2.0693, 1.9541, 1.9645, 1.9386, 2.0793, 2.0342, + 1.9821, 2.0205, 1.8914, 1.9734, 2.0128, 1.9509, 2.0054, 2.0226, 2.0904, + 2.0363, 1.9614, 2.1894, 1.8249, 2.1139, 2.0987, 1.7479, 2.0491, 2.2605, + 2.0351, 2.0034, 1.8794, 2.0154, 2.0932, 2.0017, 1.8794, 2.0726, 1.8610, + 1.9683, 1.9647, 1.9156, 1.8042, 1.9973, 2.0662, 2.0574, 2.0067, 2.0059, + 1.9640, 2.1439, 2.0814, 2.0323, 2.0262, 1.9617, 1.9233, 2.0654, 1.9877, + 2.0460, 2.0286, 2.0341, 2.0739, 1.9845, 2.0807, 2.0348, 1.9746, 1.9962, + 2.0325, 1.9365, 1.9254, 1.9935, 2.0833, 1.9138, 2.0450, 2.1419, 2.0006, + 1.9513, 2.1702, 1.9652, 1.9930, 2.1020, 2.1332, 1.9315, 2.1528, 1.9875, + 2.0101, 1.9131, 2.1410, 1.9704, 1.9231, 1.9808, 1.9090, 2.0045, 2.0392, + 1.9050, 2.1369, 1.9591, 2.0033, 2.1322, 2.0857, 1.9056, 2.0861, 2.0624, + 1.9077, 2.1703, 1.9176, 1.8142, 1.9912, 1.8790, 1.9754, 1.9995, 1.9791, + 2.0240, 1.8256, 2.0508, 1.9844, 2.1159, 1.9374, 1.9971, 1.9893, 2.0586, + 2.0790, 1.9968, 2.0608, 2.0390, 2.0590, 2.0581, 1.9986, 2.0023, 1.9103, + 2.0122, 2.0208, 2.0332, 2.0699, 1.9136, 2.0321, 2.3401, 1.9774, 1.9934, + 2.0168, 2.0446, 1.8847, 1.9775, 2.1485, 2.0622, 1.9779, 2.0434, 2.1604, + 1.9566, 1.9872, 2.2087, 2.0353, 1.8128, 1.9966, 1.9910, 2.0200, 1.9513, + 2.0511, 1.9924, 2.0438, 2.0259, 2.0082, 2.0246, 2.0088, 1.9980, 1.9457, + 1.9530, 1.9194, 2.0609, 1.9816, 1.9832, 2.0252, 2.0323, 1.9594, 2.0305, + 2.0356, 2.2375, 1.9345, 2.0042, 2.1218, 2.1397, 1.9727, 2.1261, 1.9762, + 2.0960, 1.9011, 1.9572, 2.0197, 2.0102, 1.8579, 2.0108, 1.9747, 2.0413, + 2.1950, 2.0268, 1.9291, 2.0363, 1.9824, 1.9842, 2.0567, 1.9963, 1.9368, + 2.0931, 1.9622, 1.9801, 2.1398, 1.8515, 2.0383, 2.1540, 2.0991, 2.0238, + 2.0954, 1.8529, 2.0306, 1.9698, 2.0138, 2.0344, 2.1097, 2.0003, 2.0347, + 1.9435, 2.0138, 2.0070, 2.0207, 1.9699, 1.9808, 3.9111, 1.9489, 1.9240, + 1.9906, 1.9973, 2.1468, 2.0961, 2.0055, 2.0753, 2.1031, 1.9375, 2.0882, + 2.0503, 2.0181, 1.8466, 2.0694, 2.1705, 1.8652, 2.0717, 1.9540, 1.9346, + 1.9093, 1.9651, 1.9621, 2.0628, 2.0070, 1.9060, 1.9553, 1.8944, 2.0086, + 1.9623, 2.0737, 1.9730, 1.9533, 2.1365, 2.0299, 1.8659, 1.9766, 2.1460, + 1.9863, 2.1535, 1.9489, 2.1114, 1.9806, 1.9694, 1.8617, 1.8932, 1.9100, + 1.9833, 2.0381, 1.9284, 1.9857, 2.0655, 1.9793, 1.9920, 2.0654, 1.9590, + 1.9620, 2.0547, 2.1570, 1.9327, 1.9948, 1.9445, 1.9828, 2.0383, 2.0391, + 1.9883, 1.9772, 2.0274, 1.9669, 2.0183, 2.0465, 2.0411, 1.9694, 2.0264, + 2.2126, 2.3055, 1.8294, 1.9801, 2.0337, 2.0238, 2.0515, 2.3617, 2.0011, + 1.9341, 1.9198, 2.0020])Parameter containing: +tensor([ 2.1501e-01, 7.4704e-01, -4.3778e-01, -1.9189e-01, -1.9360e-01, + 4.1040e-01, 4.4803e-01, -1.0623e-01, -1.4964e-01, -3.4063e-01, + -2.0855e-01, 3.2643e-01, -8.2681e-01, -6.0881e-01, -5.0489e-01, + -3.7201e-01, -2.5095e-01, -4.9668e-01, -5.6372e-01, -1.3344e+00, + -1.8790e-01, 1.3055e-01, 5.6959e-01, -5.4303e-01, 7.8711e-01, + 1.1111e-01, 1.4635e-01, -1.4257e-01, -6.8712e-01, 3.8435e-01, + 2.1803e-01, 4.9633e-01, 1.4904e-01, 1.6454e-02, 3.2819e-01, + -1.7535e-01, -2.5352e-01, -1.2546e-01, 1.0994e-01, 9.3778e-02, + -6.8792e-01, -1.2614e-01, 2.4639e-01, 1.6165e-01, -3.7067e-01, + -2.1706e-01, -1.0287e-01, -1.8020e-01, 2.9180e-01, 2.3912e-02, + 3.4678e-02, -7.0900e-01, 2.1722e-01, 5.3221e-01, -2.7258e-01, + 1.5429e+00, 2.2216e-01, 1.3756e-01, -4.0713e-01, 4.5913e-01, + -8.6891e-02, 5.5913e-01, 1.7959e-01, -4.4340e-01, 2.4813e-01, + 3.0107e-01, 6.3924e-01, -6.1232e-02, 1.8421e-02, -1.9119e+00, + 7.7772e-01, 2.1262e-01, 3.9979e-01, 3.5545e-01, -6.8527e-01, + -4.8011e-01, 6.0286e-01, 3.7308e-01, -1.8340e-01, -2.6823e-01, + -1.1094e-01, -5.5771e-01, -4.4588e-01, 5.1400e-01, 1.3997e-01, + -4.0948e-01, 6.4643e-01, 6.4235e-02, -4.0403e-01, -5.4217e-01, + 2.7304e-01, 5.2554e-01, -9.3850e-02, -3.3140e-02, -2.6708e-01, + 5.6201e-01, 5.5971e-01, 4.5284e-01, -3.5819e-01, 1.7672e-01, + 1.0297e-01, -2.2664e+00, 7.2437e-02, -2.4392e-01, 2.2132e-01, + 6.1708e-01, 7.2010e-02, -1.7920e-01, 4.3395e-01, -3.4397e-01, + 6.1327e-01, 5.4794e-01, -5.3735e-01, 5.9204e-01, 5.3248e-01, + -9.2280e-01, 5.4920e-01, 2.2522e-01, 5.7106e-01, 3.3792e-01, + 4.6288e-01, -8.5729e-02, 5.4681e-01, 1.2722e-01, -4.7660e-02, + -5.9782e-01, -4.5898e-01, 2.3266e-01, -1.9421e-02, 8.9277e-01, + -6.2295e-01, -5.0095e-01, -2.8544e-01, -7.9341e-02, 5.1180e-01, + 1.2812e+00, 2.5656e-01, 3.7108e-01, 1.0246e-01, 1.0855e-01, + -4.4166e-01, -3.6916e-02, 1.6275e-01, 3.0883e-01, -7.6207e-01, + -3.2294e-01, 4.3284e-01, -3.2373e-01, -3.9448e-01, 3.8939e-01, + 4.7772e-01, 1.6648e-01, 5.8841e-02, 2.4477e-02, -8.8626e-01, + 4.9069e-01, 2.7631e-01, 1.3535e-01, -4.4725e-02, 1.2324e-01, + -2.0032e-01, -8.9303e-02, 4.2760e-01, 2.0125e-01, -4.4736e-01, + 2.2468e-01, -1.0505e+00, 2.4208e-01, -7.1624e-01, 1.1178e-01, + -1.1839e-01, -9.6857e-02, -7.3601e-01, 2.8437e-01, 5.4252e-01, + 6.5352e-02, 7.0957e-02, 1.3129e-01, 7.4641e-02, 4.4224e-01, + -4.6053e-01, -2.3242e-01, -3.5033e-01, 6.2616e-02, 5.2954e-01, + 1.0180e+00, 1.4960e-01, -1.3682e+00, -1.4817e-01, -6.3487e-01, + -4.4761e-01, -5.2614e-01, 6.5118e-01, 5.0969e-01, 3.5756e-01, + 4.0856e-01, 1.4308e-01, -1.2530e-01, 9.7227e-01, 3.4538e-01, + -7.7295e-01, 2.7611e-01, 2.6164e-01, 3.8689e-01, -2.0480e-01, + 5.2936e-01, 4.2684e-01, 4.6405e-01, -1.6827e-01, -9.7458e-02, + 5.0772e-01, -5.6984e-01, -3.9931e-01, 2.7341e-01, -7.1261e-01, + -5.7750e-01, 5.6267e-02, -6.3205e-02, -3.5059e-01, -2.6100e-02, + 1.0959e+00, -1.2912e-01, 2.6108e-01, 6.5407e-01, 1.9582e-01, + 3.5861e-01, -3.8938e-01, -5.9331e-03, -1.6254e-02, -1.5344e-01, + -2.4446e-01, 4.5050e-01, 2.4633e-01, 1.3749e-01, -2.3003e-01, + -2.6949e-01, 5.4330e-01, 1.7525e-01, 2.4688e-02, 3.6464e-01, + 1.1125e-01, -1.7916e-01, -3.5670e-01, 1.4557e-01, -3.3076e-01, + -3.5905e-01, 1.9406e-01, -1.2120e+00, -2.5908e-01, 2.6289e-01, + -4.2772e-01, -7.2619e-02, -2.7620e-01, -5.6160e-01, -2.3287e-02, + -2.4912e-02, -5.6602e-02, 3.3388e-01, 1.9100e-01, 5.3561e-02, + -4.4234e-01, -2.6088e-01, 7.4349e-02, -2.6779e-01, -4.8274e-01, + 5.6909e-01, 5.0762e-01, 1.4416e-01, -3.3702e-01, 8.1444e-02, + -9.1124e-04, -5.0530e-02, -5.5433e-02, -1.2218e+00, -7.1735e-01, + -2.9989e-01, 2.1273e-01, 3.5223e-01, -1.0045e-01, -1.4589e-01, + 6.6810e-01, 6.6347e-03, -2.8485e-01, -3.9527e-02, -6.1962e-01, + 5.7725e-01, 6.0066e-01, -1.6726e-01, 3.5993e-01, -4.4027e-01, + -6.4637e-01, 3.6673e-01, 3.9305e-01, -6.9421e-01, 3.8688e-01, + -3.5038e-01, -3.3405e-01, -2.0529e-01, 2.3280e-01, -1.2050e-02, + 5.0844e-02, 3.1180e-01, -1.4231e-01, -4.1143e-01, -6.3243e-02, + 2.3677e-01, 1.7883e-01, 2.9399e-01, -4.0658e-01, -1.1772e-02, + 6.7914e-02, -8.2298e-02, -3.7087e-01, -1.3104e-01, -2.7620e-01, + 1.0077e-01, 2.6525e-01, 2.4830e-01, 5.9466e-01, 2.8895e-01, + -4.5527e-01, 7.1790e-02, 2.2942e-01, -2.6116e-01, 3.9986e+00, + -9.8789e-01, -1.2329e-01, -6.7903e-01, 3.8148e-02, -2.7855e-01, + 3.8224e-01, -5.2978e-01, 1.4232e-01, -2.4229e-01, 1.7414e-01, + 3.4799e-01, -4.5234e-01, -1.0896e-02, 1.0077e+00, 3.6756e-01, + -3.3123e-01, 2.9891e-01, -5.0165e-01, 5.9525e-01, -5.5131e-01, + 2.8175e-01, -3.0794e-02, -3.3171e-01, 4.8648e-01, 2.5715e-01, + -9.2639e-02, 1.4169e-01, -2.7374e-01, 1.1780e+00, -5.9521e-01, + -1.4061e-01, -3.1326e-01, 5.3595e-01, -3.8327e-01, -6.1277e-01, + -4.3154e-01, 1.4341e-01, -4.2311e-03, -5.5929e-01, 3.7290e-01, + 1.3575e-01, -5.8564e-01, 5.0063e-01, 3.2996e-01, 4.3748e-01, + 1.2429e-01, -1.8594e-01, -2.0776e-01, -8.4629e-01, -7.2942e-01, + -4.6600e-01, 2.0105e-01, 8.7055e-01, -1.0882e-02, -6.5407e-02, + -2.6198e-01, 1.3003e-01, -3.6015e-01, -2.0504e-01, -2.0976e-01, + -3.5524e-01, -7.5813e-01, 4.4625e-01, 3.2435e-01, -4.2963e-01, + 1.4965e-01, 3.3192e-01, 6.0794e-03, 5.7775e-01, -5.5045e-01, + -7.7025e-02, -1.2794e-01, 4.5363e-01, -2.2398e-01, 1.5045e-01, + 9.8628e-01, -4.1569e-04, 4.0888e-02, 2.2238e-01, 4.5657e-01, + -1.7871e-01, 1.6835e-02, 5.9045e-01, -3.5778e-01, 2.6057e-01, + 1.5641e-01, -2.0078e-02, 2.4277e-01, 5.2386e-01, 1.1039e+00, + -1.1327e+00, -1.1502e-01, -7.1413e-02, 3.7271e-01, 1.6588e-01, + -5.0401e-01, -2.7928e-01, 2.3201e-01, -4.3780e-01, -7.2960e-01, + -9.1982e-02, 3.7177e-01, 2.3800e-01, 1.7524e-01, -1.7606e-02, + -6.8586e-01, -1.7550e-01, 1.6649e-01, 9.6858e-01, 6.1731e-01, + -6.7386e-02, 1.5299e-01, 3.5824e-01, -2.3525e-02, -1.2447e-01, + -2.2176e-01, 3.9829e-01, 2.9128e-01, -2.1872e+00, -3.9490e-02, + 2.0953e-01, 2.1769e-02, 3.2698e-01, 4.3849e-01, -5.8048e-01, + -6.7511e-01, 3.8794e-01, -7.0737e-01, 7.1450e-02, -4.0589e-01, + 9.3709e-01, -6.9863e-03, -8.9709e-01, 2.8178e-01, -1.5175e-01, + -7.1227e-01, 1.7312e-02, -5.7876e-01, 7.2203e-02, -3.7204e-01, + 5.4562e-01, 4.1058e-01, 1.8340e-01, 1.3698e-01, 2.2252e-01, + 1.3383e-03, -2.1874e-01, -3.6462e-01, 4.9029e-02, -3.7906e-02, + -6.1232e-01, 2.8946e-01, -4.1292e-01, -2.4855e-02, 5.5976e-01, + 1.6030e-01, 1.2595e-01, 6.2577e-01, -4.8489e-01, 9.7715e-01, + -1.3700e+00, 3.9636e-01, 8.1092e-01, -1.7226e-01, -4.9420e-01, + 1.0185e+00, 4.1705e-01, -9.1852e-02, -3.6709e-01, -4.4797e-01, + -2.9262e-01, -7.5866e-01, 2.7837e-01, -1.8833e-01, 4.7308e-01, + -1.2365e-01, 8.6737e-02, -3.4702e-01, 4.5654e-01, -1.7387e-01, + -4.3082e-01, 3.6552e-01, 1.1090e-01, -4.9035e-01, -5.4686e-02, + -4.0094e-01, -7.3754e-01, -5.6454e-01, -2.7902e-01, 6.3375e-01, + 7.6849e-02, 7.6877e-01, 3.7509e-01, 5.1973e-01, -2.5887e-01, + 7.4737e-01, 2.5756e-01, 5.4741e-01, 3.5443e-01, 3.5458e-01, + -3.8656e-01, 9.1460e-01, -8.0961e-02, 7.5145e-01, 3.3231e-01, + -6.3823e-02, -3.9819e-01, -2.4886e-01, -1.5665e-01, -3.0294e-01, + -1.0763e+00, 5.0080e-01, 1.1450e-01, 3.9202e-01, -1.3330e-01, + -1.5818e-01, 1.7867e-01, -3.6898e-01, 6.2401e-01, 1.3509e-01, + 2.8328e-01, 1.8425e-01, -6.9642e-01, -2.0965e-01, -5.9807e-02, + -6.0111e-01, 5.6218e-02, -5.8171e-02, 2.7017e-01, 2.5399e-01, + -1.6934e-01, -2.4471e-01, -5.6285e-01, -2.3438e-02, 8.7225e-01, + 5.8195e-01, 1.6643e-01, 3.0911e-01, -1.2398e-01, 1.0637e-01, + 3.0400e-01, 3.8059e-01, 3.0698e-01, 5.2002e-01, 3.1318e-01, + -1.1297e-01, -6.4791e-01, -1.2858e-01, -3.3301e-02, 2.8349e-01, + 4.7307e-01, -6.4352e-01, -3.7952e-01, -4.6233e-01, 5.4687e-01, + 3.8553e-01, 1.8468e-01, 5.6490e-01, -5.7524e-01, 8.1843e-02, + 5.2448e-02, 2.1670e-01, -7.9810e-01, 7.6422e-01, 3.6075e-01, + -4.6884e-01, -3.0259e-01, -1.0189e-01, -1.9926e-01, -5.9249e-01, + -4.1673e-01, -1.7293e-01, 4.8049e-01, 5.6171e-01, 5.4314e-01, + -4.5473e-01, 3.3866e-02, -3.8822e-01, -2.2157e-01, 3.9036e-01, + -1.0263e+00, 5.0080e-01, 2.9338e-03, 1.6282e-01, -6.7238e-02, + -1.5358e-01, 7.7335e-01, -6.0466e-01, -7.1113e-03, 4.5572e-01, + 2.6197e-01, -7.7756e-02, -1.7559e-01, 1.2412e-01, 4.5016e-01, + 4.4452e-01, 4.5932e-01, -1.5143e-01, 1.7155e-01, -1.5590e-01, + -6.7263e-01, -7.4638e-02, 6.8537e-01, 3.1193e-01, -1.8483e-01, + 2.6559e-01, -5.7226e-01, -4.2480e-02, 4.6850e-01, -4.0256e-01, + -7.3821e-01, 3.3337e-01, 7.9803e-01, 5.2502e-01, -2.7037e-01, + 4.2035e-01, -4.6351e-01, -5.0869e-01, -3.4929e-01, -3.2073e-01, + 2.7673e-01, -3.4714e-01, 5.5583e-01, 5.3885e-01, 5.8890e-01, + 2.3125e-01, -6.4483e-02, 2.6723e-01, -3.3570e-01, -7.4513e-02, + 7.5238e-01, 2.7637e-01, 8.6769e-01, -1.4806e-01, 2.2444e-01, + 8.2832e-01, 2.1911e-01, 4.5670e-02, -5.8405e-01, 7.1401e-01, + -1.7179e-02, 2.1653e-01, -3.9713e-01, 5.5353e-01, 5.1918e-01, + -3.4896e-01, 3.0754e-01, 4.5784e-01, 9.4302e-01, 6.3974e-01, + -2.9996e-01, 3.2503e-01, 1.7531e-01, 1.9067e-01, 2.9472e-01, + 1.0802e-01, 2.8037e+00, -2.4171e-01, 1.0029e-01, -6.1159e-02, + 8.6921e-01, 4.2050e-01, -8.0290e-01, -2.8834e-01, -5.9433e-01, + -4.8745e-01, -2.3467e-01, -2.1206e-01, -6.2243e-01, -5.9644e-01, + 9.0945e-02, 6.1859e-01, 5.3241e-01, -2.0134e-01, -3.0877e-01, + 1.2689e-01, 4.3569e-01, 1.7475e-01, -2.3127e-01, 2.0306e-01, + 6.0639e-01, -3.0360e-01, 1.2595e-01, -5.7503e-01, 4.5264e-01, + -3.8956e-01, -4.0904e-01, -1.0199e-01, -1.8171e-01, 2.0400e-02, + -5.6246e-01, 4.5533e-01, 8.9785e-02, -8.7525e-01, -5.9002e-01, + -7.0129e-01, -2.6266e-01, -2.6017e-01, -7.0782e-01, 4.3546e-01, + -5.1003e-01, 1.1640e-01, -1.5618e-01, 2.6692e-01, 1.1821e-01, + -4.6812e-01, -5.0343e-01, 2.3759e-01, 8.6180e-01, 1.1069e-01, + -1.8956e-01, 2.4460e-01, 5.6212e-01, -1.0961e-01, -6.8296e-01, + -7.2474e-01, -2.1875e-01, 1.6066e-01, -4.0349e-01, -3.5831e-01, + -6.3780e-01, -5.2782e-01, -2.7818e-01, 3.5333e-01, 8.6573e-01, + 1.4529e-01, -5.4409e-01, -8.4162e-01, -1.4071e-01, -3.4625e-02, + 6.1403e-02, -4.3922e-01, -1.0181e+00, 5.3744e-01, 6.9962e-01, + -2.0664e-01, -3.6145e-01, -3.0133e-01, 1.7422e+00, 2.1885e-01, + 1.6592e-01, 3.7474e-01, 4.0835e-01])Parameter containing: +tensor([[-1.8555e-02, 1.2207e-02, -1.6556e-02, ..., 9.8953e-03, + 1.6815e-02, -1.8707e-02], + [ 3.0487e-02, 3.2715e-02, -3.0022e-03, ..., 3.5187e-02, + 3.5980e-02, -5.8136e-03], + [ 2.0390e-03, -2.0386e-02, 1.7670e-02, ..., 2.3132e-02, + 4.0550e-03, 1.1375e-02], + ..., + [-5.3101e-03, 2.4445e-02, -1.9531e-02, ..., -1.0094e-02, + -1.0544e-02, 2.3727e-03], + [-1.3418e-03, 4.7874e-03, 1.2207e-02, ..., 7.7553e-03, + -6.1214e-05, -1.3153e-02], + [ 9.2850e-03, 7.7629e-03, -1.5533e-02, ..., 1.3306e-02, + 5.0316e-03, 2.2507e-03]])Parameter containing: +tensor([ 0.1748, -0.0695, -0.2499, ..., -0.0291, 0.0082, 0.0654])Parameter containing: +tensor([[-0.0072, 0.0056, 0.0144, ..., 0.0010, 0.0068, -0.0195], + [-0.0204, 0.0330, -0.0089, ..., -0.0183, 0.0075, 0.0104], + [-0.0120, -0.0031, 0.0017, ..., -0.0134, 0.0086, -0.0073], + ..., + [-0.0075, -0.0018, -0.0179, ..., -0.0070, -0.0049, -0.0116], + [-0.0321, -0.0037, 0.0038, ..., 0.0033, -0.0005, 0.0031], + [ 0.0155, 0.0208, -0.0011, ..., 0.0005, -0.0043, -0.0342]])Parameter containing: +tensor([-4.6196e-03, -1.0712e-02, -4.2458e-03, -1.1658e-02, -7.2266e-02, + 1.1406e-02, 6.5796e-02, -3.1586e-02, 2.2736e-02, 5.1697e-02, + -8.3847e-03, -7.9575e-03, 5.0659e-02, 4.4937e-03, -1.5732e-02, + 1.4328e-02, 1.7075e-02, 1.4191e-03, 2.4689e-02, -5.8716e-02, + -2.6588e-03, -3.6438e-02, 3.8147e-02, -5.0079e-02, 1.9745e-02, + -2.9221e-02, -3.1464e-02, 5.1300e-02, 1.1711e-02, -1.5007e-02, + 1.4740e-02, 5.8861e-03, 1.6403e-02, -1.9547e-02, -1.9958e-02, + 4.1595e-02, 4.2992e-03, -4.0375e-02, 1.0658e-02, 5.4626e-03, + -3.0426e-02, 4.9438e-02, 2.3499e-02, -7.7400e-03, -5.2277e-02, + -4.4212e-03, -6.8092e-04, -2.4185e-02, -1.4219e-03, 1.4244e-02, + 5.7617e-02, 4.7150e-02, -1.1452e-02, 9.1476e-03, 1.4732e-02, + 5.9967e-02, -5.5115e-02, 1.6342e-02, 3.3722e-02, -3.9520e-03, + -3.9001e-02, 1.2306e-02, 4.2480e-02, -3.2883e-03, 2.6760e-03, + -1.4351e-02, -2.8259e-02, 6.0692e-03, -1.1177e-02, -6.8604e-02, + 1.3428e-02, -6.4819e-02, -1.2286e-01, 7.9274e-06, -3.5583e-02, + 1.1482e-02, -4.8126e-02, 1.4811e-03, -1.1566e-02, -6.2012e-02, + 3.0136e-04, 4.3732e-02, -1.6174e-02, -2.2690e-02, 1.5366e-02, + -1.0666e-02, -1.3145e-02, 4.5563e-02, 6.5918e-03, 3.3140e-05, + 2.3926e-02, 7.9834e-02, 7.0129e-02, -2.4094e-02, 2.3895e-02, + -1.3113e-03, 1.4229e-02, -3.1082e-02, -2.2018e-02, 7.6790e-03, + 4.1992e-02, 1.0077e-01, 1.5175e-02, -8.6288e-03, 4.8447e-03, + 3.2257e-02, 8.7524e-02, 1.0815e-03, -3.4912e-02, 2.1362e-02, + -3.5095e-02, 3.9185e-02, 5.9166e-03, -1.3496e-02, -1.1681e-02, + -1.9058e-02, 2.8458e-03, -1.9516e-02, 4.3884e-02, -4.3030e-02, + -2.0966e-02, -1.8967e-02, -4.4952e-02, -3.3417e-02, 6.1951e-03, + -1.6769e-02, -4.3716e-03, -7.8552e-02, 2.9556e-02, 6.4201e-03, + -1.1986e-02, -2.5497e-02, 6.0844e-03, -1.0658e-02, -9.0179e-03, + -7.1335e-03, -4.7073e-03, 1.8127e-02, 1.9989e-02, -5.7144e-03, + -2.5070e-02, -4.1565e-02, -3.6255e-02, -5.9631e-02, 6.5193e-03, + -6.2866e-02, 4.8828e-03, -3.1647e-02, 3.4790e-02, 2.1637e-02, + 6.2866e-03, -6.7749e-02, 1.6113e-02, 4.7119e-02, 9.9487e-03, + -7.0312e-02, 5.3711e-02, -1.9897e-02, -7.0763e-03, -2.9709e-02, + 2.7542e-02, 1.3306e-02, 4.8889e-02, 2.1591e-02, 1.9302e-02, + 3.9978e-02, -8.5693e-02, 1.8982e-02, -2.0050e-02, -3.4576e-02, + 3.7933e-02, 3.1464e-02, -3.8574e-02, -3.8055e-02, 8.1635e-03, + -2.9007e-02, 5.3482e-03, -2.7390e-02, -5.9998e-02, -5.9906e-02, + -1.9699e-02, -1.5625e-02, -1.7685e-02, -1.1206e-03, 2.2335e-03, + 3.8391e-02, 9.6130e-03, 2.2751e-02, -4.9316e-02, -3.1586e-02, + 2.0065e-02, -1.5106e-02, -1.6907e-02, -1.1854e-03, -3.2482e-03, + -9.5010e-05, 2.9404e-02, 2.9221e-02, 4.6229e-04, -2.8015e-02, + 4.4373e-02, 2.6062e-02, 1.8219e-02, -2.9648e-02, 1.0002e-02, + 2.0706e-02, 7.3669e-02, 1.1726e-02, 3.6804e-02, 4.1138e-02, + -2.9392e-03, 3.0106e-02, 3.0594e-03, 2.5574e-02, 5.3772e-02, + 1.3222e-02, -3.6072e-02, -1.5572e-02, -2.4384e-02, 1.3962e-02, + 1.6357e-02, -1.3443e-02, -3.6407e-02, -6.2218e-03, 1.8494e-02, + -4.0588e-02, -4.0131e-02, -6.3721e-02, 3.6278e-03, 2.1362e-02, + 9.0179e-03, -3.0411e-02, 1.4854e-02, 4.0283e-02, -5.5359e-02, + 2.6428e-02, 4.8409e-03, -3.4363e-02, 5.3772e-02, -3.8116e-02, + 2.8229e-04, 5.4199e-02, 7.9956e-03, -1.5869e-03, 2.0340e-02, + 1.7532e-02, 8.2016e-03, -2.4738e-03, -1.4038e-02, 1.6041e-03, + 2.3544e-02, 2.3026e-02, -2.4399e-02, -1.1978e-02, -3.0777e-02, + -3.2196e-02, -1.2001e-02, -5.4230e-02, -3.0792e-02, 7.1602e-03, + -5.3444e-03, -1.8097e-02, 9.2840e-04, 1.1177e-02, -1.2733e-02, + 2.0081e-02, 4.1008e-03, 2.3697e-02, 1.7761e-02, -2.0035e-02, + 7.1297e-03, -2.9938e-02, -1.2978e-02, -1.1086e-02, -3.1143e-02, + -8.3847e-03, -6.1264e-03, -1.1139e-02, 2.1729e-02, 4.6173e-02, + -5.5878e-02, -2.1790e-02, -1.8921e-02, -4.9957e-02, 3.6530e-02, + 5.3215e-03, -6.5735e-02, 1.1475e-02, 4.1107e-02, 9.8190e-03, + -7.7248e-03, 2.1011e-02, 1.1238e-02, 5.5275e-03, 3.5217e-02, + 3.1921e-02, 3.1471e-04, 1.8646e-02, 4.5090e-03, 4.9591e-02, + -3.6896e-02, -1.0056e-02, 1.2040e-04, -2.0615e-02, 8.1921e-04, + -5.4779e-03, 1.6571e-02, -2.8305e-02, -1.2932e-02, -1.3977e-02, + 4.3762e-02, -2.3575e-02, 2.0401e-02, 3.3188e-03, -2.0508e-02, + 6.5247e-02, -6.1111e-03, -4.0588e-03, 1.2978e-02, -1.4982e-03, + -1.1116e-02, 1.9665e-03, 1.6739e-02, -2.3193e-02, -4.3304e-02, + 7.2144e-02, 6.3049e-02, -2.1835e-02, 2.0233e-02, 5.1971e-02, + 1.8845e-02, 1.3985e-02, -1.6785e-02, 8.1253e-03, -5.7892e-02, + 4.6783e-02, 2.7817e-02, -1.4709e-02, -3.4466e-03, -3.8940e-02, + -2.7637e-03, -1.5007e-02, -3.5725e-03, 3.4668e-02, -4.6692e-02, + -2.7222e-02, -1.6083e-02, -2.4200e-02, -4.6631e-02, -4.3945e-02, + 9.1019e-03, 3.4607e-02, 1.2703e-02, 7.4219e-02, -5.6076e-03, + -1.3451e-02, -2.6188e-03, -3.5919e-02, 4.5776e-03, -1.3481e-02, + 4.0344e-02, 3.7117e-03, -4.0894e-02, 1.1891e-04, -1.5007e-02, + 5.7800e-02, -4.8248e-02, 5.7587e-02, -3.7460e-03, 1.0727e-02, + 1.9699e-02, -5.8502e-02, 2.1652e-02, -4.3488e-03, 1.2045e-03, + -5.0201e-02, -7.4280e-02, -2.1118e-02, -3.7789e-04, 1.8112e-02, + 1.3870e-02, 1.9989e-02, 6.8703e-03, -2.6817e-03, 2.9099e-02, + -3.0422e-03, 4.6844e-02, -5.0415e-02, -8.2169e-03, -2.6760e-03, + -9.0103e-03, -3.5534e-03, 3.1677e-02, 1.9333e-02, 1.1230e-02, + 1.1559e-02, 7.1106e-03, -6.8176e-02, 3.5614e-02, -2.6535e-02, + 1.2856e-02, 6.5041e-03, -3.2410e-02, 6.0150e-02, -2.4094e-02, + -1.0971e-02, -1.1093e-02, 4.0924e-02, -3.3447e-02, -7.6447e-03, + 1.2741e-02, 2.2171e-02, -2.0844e-02, 5.2490e-02, -8.1558e-03, + -3.0365e-03, 4.1992e-02, 5.0323e-02, -4.7943e-02, -1.9531e-02, + 8.1024e-03, -4.1046e-02, -1.6327e-02, 8.3237e-03, -1.3824e-02, + -2.3102e-02, 3.6297e-03, -6.4201e-03, 3.0975e-03, 8.0490e-03, + 6.9771e-03, -1.5625e-02, 3.1605e-03, 1.0498e-01, 1.4465e-02, + -2.4002e-02, -1.4000e-02, -3.5858e-02, -4.4022e-03, -3.5400e-02, + 2.5058e-04, 1.4572e-02, -1.9503e-04, -1.6553e-01, -5.6076e-03, + -3.9185e-02, -2.0935e-02, -2.3209e-02, -3.5065e-02, -1.6373e-02, + -3.5019e-03, 1.6205e-02, -1.2405e-02, 7.7148e-02, -1.0551e-02, + 1.8539e-02, 2.3849e-02, -8.8272e-03, 5.6419e-03, 1.2901e-02, + 1.2619e-02, 1.4397e-02, -1.6449e-02, -4.7493e-03, 8.8654e-03, + -2.2186e-02, 2.1393e-02, 1.0635e-02, 7.3486e-02, 8.9111e-03, + -3.8116e-02, -2.6825e-02, -1.0338e-02, 1.1879e-02, 1.2970e-02, + -2.2797e-02, -1.1337e-02, -2.0004e-02, 5.1788e-02, 4.2419e-02, + -4.0512e-03, -7.1640e-03, -3.3142e-02, -4.9408e-02, -2.0264e-02, + 3.2227e-02, -3.5919e-02, -1.1429e-02, -4.4250e-03, 7.1754e-03, + 1.4107e-02, 3.7415e-02, 1.3893e-02, 4.4632e-03, -2.7084e-02, + -1.7120e-02, -1.3641e-02, 1.9226e-02, -2.9694e-02, 1.8661e-02, + -6.7635e-03, -1.4336e-02, -3.3569e-02, -3.7872e-02, 4.1687e-02, + 3.1555e-02, -1.9958e-02, 1.0811e-02, -8.8501e-03, -4.3915e-02, + -4.4670e-03, 1.8295e-02, 3.5095e-03, 8.9050e-02, -1.4282e-02, + -8.9722e-03, -4.6417e-02, -3.7018e-02, 3.1738e-02, -1.7609e-02, + -8.5602e-03, 1.7380e-02, -1.8646e-02, -1.8311e-02, 9.8343e-03, + 4.1107e-02, -1.1806e-03, -9.9335e-03, -4.8218e-02, 1.5572e-02, + 2.7008e-02, 2.3209e-02, 1.8616e-02, 1.9852e-02, 3.3264e-02, + 9.2773e-03, -6.4331e-02, 1.1345e-02, 1.7595e-03, 7.8812e-03, + -2.2522e-02, 3.4760e-02, -9.6664e-03, 8.0414e-03, -1.6663e-02, + 4.7791e-02, 2.0355e-02, 1.7181e-02, -3.5583e-02, 1.1414e-02, + -5.7709e-02, 8.1909e-02, -2.3651e-02, 2.2297e-03, -2.9358e-02, + -1.8143e-02, 3.0792e-02, 6.0028e-02, 3.3051e-02, -2.4551e-02, + 2.2156e-02, 2.2324e-02, 4.0039e-02, 9.3918e-03, 7.6721e-02, + 3.5187e-02, 7.9803e-03, 3.7270e-03, 1.7441e-02, -4.0779e-03, + -3.5248e-02, -5.6122e-02, 1.3580e-02, 8.0688e-02, 1.2466e-02, + 3.6255e-02, 3.0151e-02, 2.1271e-02, -1.0519e-03, -4.3182e-02, + -2.2385e-02, 3.5339e-02, -3.5309e-02, -4.9042e-02, 3.5583e-02, + 5.3314e-02, -6.4754e-04, 4.6600e-02, 4.5807e-02, -2.1439e-02, + 6.3896e-03, 2.0065e-03, 1.4320e-02, -4.1847e-03, 2.8961e-02, + 8.5907e-03, -9.6283e-03, -5.2277e-02, -3.6377e-02, -2.6459e-02, + 2.4681e-03, 2.7679e-02, 4.6356e-02, -2.1744e-02, -3.1036e-02, + 5.9242e-03, -2.1622e-02, -8.7204e-03, 3.6736e-03, 2.0370e-03, + -3.5614e-02, 3.6652e-02, 5.8556e-03, 1.6708e-03, 3.4088e-02, + -8.9455e-04, 5.4245e-03, -3.2959e-02, 1.3819e-03, -9.0637e-02, + -2.3117e-02, -3.3188e-03, 3.7811e-02, 2.0157e-02, -1.2825e-02, + -3.1586e-02, -1.6785e-02, -1.5823e-02, -3.2349e-02, 2.0962e-03, + 6.4507e-03, -8.2321e-03, 8.1177e-03, 5.0781e-02, 5.9814e-02, + 4.4586e-02, 1.5503e-02, 1.2657e-02, -7.0534e-03, -2.7283e-02, + -1.1047e-02, -2.2400e-02, -2.6230e-02, -4.2114e-02, -1.2314e-02, + 1.1765e-02, -5.0476e-02, 2.1629e-03, -9.9182e-03, 2.2842e-02, + 2.0645e-02, 5.9143e-02, 4.4556e-02, 3.4302e-02, -1.0101e-02, + 4.0558e-02, 1.6373e-02, 6.6223e-03, -6.2790e-03, 1.3374e-02, + 2.3575e-02, 3.0731e-02, 1.7929e-02, -7.4730e-03, 1.1337e-02, + 2.5925e-02, 2.8839e-02, 1.0468e-02, -1.0818e-02, -8.6975e-03, + 1.9302e-02, -1.2520e-02, 3.6812e-03, -9.1476e-03, -4.1595e-02, + -1.5030e-03, -1.1528e-02, 1.8463e-02, 2.7023e-02, -2.2446e-02, + 3.1799e-02, 2.0935e-01, 8.7585e-03, -3.9459e-02, -1.2238e-02, + -2.6062e-02, -2.3911e-02, -7.2432e-04, 2.0691e-02, -5.9540e-02, + -9.0714e-03, -4.3762e-02, 6.6589e-02, -4.0817e-03, 2.6199e-02, + -8.0872e-03, 1.1955e-02, 3.5248e-02, -7.0435e-02, -1.8265e-02, + -5.5786e-02, 3.5797e-02, -1.3924e-03, -3.1494e-02, 1.8585e-02, + 2.5314e-02, 1.3420e-02, 8.4915e-03, -3.8300e-02, 1.6418e-02, + -4.0579e-04, 2.9266e-02, 5.0354e-02, -3.2898e-02, -4.4739e-02, + 4.3068e-03, -5.1208e-02, 2.0233e-02, 4.6356e-02, -8.3069e-02, + -3.3203e-02, 3.1647e-02, -6.6872e-03, 1.0475e-02, -3.0251e-03, + 1.3290e-02, 2.4323e-02, -7.9422e-03, 8.2397e-03, 3.5645e-02, + -2.4994e-02, -3.4698e-02, -3.8971e-02, 1.4175e-02, 7.0610e-03, + 3.1555e-02, -5.7144e-03, 2.8351e-02, 4.3396e-02, -2.6337e-02, + 5.5145e-02, 2.4689e-02, 1.3634e-02, -3.8452e-02, -6.9857e-04, + 1.5114e-02, -2.6245e-02, 1.5961e-02, 4.2694e-02, -2.4857e-02, + -3.3752e-02, 2.1805e-02, 1.4648e-02, -2.0737e-02, 5.3406e-02, + 1.7761e-02, -3.2959e-02, 1.1215e-02, 6.9046e-03, -1.5572e-02, + 6.2180e-03, -1.9760e-02, -7.9803e-03, 3.3684e-03, 5.7983e-02, + 6.1951e-03, 1.7899e-02, 1.8234e-02])Parameter containing: +tensor([1.2872, 1.3902, 1.3280, 1.3746, 1.3808, 1.3310, 1.2905, 1.3744, 1.3278, + 1.2770, 1.3421, 1.3515, 1.2648, 1.4362, 1.3084, 1.3631, 1.3041, 1.3989, + 1.2531, 0.3754, 1.3145, 1.3037, 1.3856, 1.3438, 1.3401, 1.2750, 1.2746, + 1.3092, 1.5337, 1.3190, 1.2867, 1.3100, 1.3887, 1.3135, 1.4317, 1.4281, + 1.2585, 1.3812, 1.4312, 1.3558, 1.3686, 1.2785, 1.3244, 1.3191, 1.3727, + 1.3274, 1.3551, 1.3741, 1.2872, 1.3436, 1.4240, 1.3185, 1.3188, 1.2834, + 1.3673, 1.4366, 1.2857, 1.4752, 1.3431, 1.2975, 1.3674, 1.3349, 1.2434, + 1.3561, 1.3934, 1.3163, 1.2964, 1.3656, 1.3091, 1.7060, 1.2112, 1.3438, + 2.4524, 1.3774, 1.3413, 1.2583, 1.2963, 1.3005, 1.3575, 1.4152, 1.2581, + 1.4116, 1.3328, 1.3162, 1.3791, 1.3734, 1.3793, 1.2855, 1.3213, 1.3572, + 1.3486, 1.3470, 1.3550, 1.3349, 1.4237, 1.3012, 1.3963, 1.2965, 1.2334, + 1.3769, 1.3145, 1.1864, 1.2911, 1.2162, 1.2641, 1.3571, 1.3028, 1.3323, + 1.2926, 1.3058, 1.3455, 1.3841, 1.2867, 1.2912, 1.2599, 1.3620, 1.3224, + 1.2191, 1.2997, 1.3315, 1.3464, 1.4175, 1.3383, 1.4195, 1.3479, 1.3867, + 1.3359, 1.3288, 1.4146, 1.3789, 1.3210, 1.3271, 1.2555, 1.2443, 1.3080, + 1.3088, 1.4416, 1.3641, 1.3288, 1.3230, 1.3284, 1.3592, 1.3514, 1.2883, + 1.2990, 1.3497, 1.3354, 1.3534, 1.2966, 1.3139, 1.3233, 1.3309, 1.3223, + 1.3252, 1.3250, 1.1075, 1.3933, 1.3489, 1.3420, 1.2268, 1.4483, 1.2636, + 1.3052, 1.4442, 1.3708, 1.3202, 1.2663, 1.3475, 1.2657, 1.3688, 1.3033, + 1.3369, 1.3094, 1.4049, 1.4070, 1.3336, 1.3666, 1.4053, 1.3935, 1.2258, + 1.3650, 1.4078, 1.3205, 1.2375, 1.3886, 1.3380, 1.3228, 1.8635, 1.3402, + 1.4189, 1.3341, 1.2720, 1.3730, 1.4405, 1.3683, 1.3268, 1.3239, 1.4251, + 3.8598, 1.3067, 1.2033, 1.3676, 1.4208, 1.4274, 1.3379, 1.2837, 1.3651, + 1.3117, 1.3239, 1.4408, 1.3410, 1.2964, 1.2914, 1.2973, 1.3181, 1.3553, + 1.4354, 1.2757, 1.2851, 1.3733, 1.2667, 1.2805, 1.3078, 1.2954, 1.3843, + 1.2732, 1.4498, 1.4749, 1.3324, 1.4228, 1.2985, 1.3570, 1.4567, 1.4490, + 1.3023, 1.3295, 1.4018, 1.2917, 1.2907, 1.2989, 1.3706, 1.3021, 1.4164, + 1.2990, 1.3597, 1.3136, 1.2466, 1.2832, 1.3223, 1.3688, 1.3477, 1.3461, + 1.2518, 1.2632, 1.3714, 1.3059, 1.3299, 1.3516, 1.2935, 1.2013, 1.3700, + 1.3071, 1.2896, 1.3473, 1.2983, 1.3207, 1.3504, 1.3353, 1.2847, 1.3485, + 1.3610, 1.3010, 1.3774, 1.3308, 1.2954, 1.2410, 1.3406, 1.3067, 1.3201, + 1.3046, 1.3123, 1.5712, 1.3965, 1.3870, 1.3659, 1.3075, 1.3338, 1.3484, + 1.3848, 1.3058, 1.3570, 1.3049, 1.3742, 1.2756, 1.3889, 1.2953, 1.3125, + 1.3678, 1.4067, 1.4245, 1.5097, 1.2939, 1.4008, 1.3100, 1.2898, 1.3111, + 1.2741, 1.2701, 1.2899, 1.2631, 1.3375, 1.3803, 1.3075, 1.2911, 1.4013, + 1.3300, 1.3336, 1.2717, 1.3298, 1.4376, 1.2984, 1.3539, 1.3718, 1.3205, + 0.9002, 1.1273, 1.3590, 1.3239, 1.3377, 1.4179, 1.3039, 1.2933, 1.2394, + 1.3289, 1.3309, 1.2902, 1.2587, 1.3533, 1.2726, 1.3677, 1.3686, 1.2982, + 1.3758, 1.2909, 1.3539, 1.3848, 1.3587, 1.2797, 1.3354, 1.3630, 1.4622, + 1.2987, 1.2724, 1.4872, 1.2995, 1.4829, 1.3287, 1.3487, 1.4115, 1.3740, + 1.2543, 1.2981, 1.3168, 1.3157, 1.4478, 1.3249, 1.3100, 1.2767, 1.3550, + 1.3242, 1.3166, 1.3973, 1.2298, 1.3345, 1.2938, 1.2998, 1.4123, 1.3963, + 1.2704, 1.3450, 1.3796, 1.5035, 1.3848, 1.3198, 1.2972, 1.3515, 1.2375, + 1.4217, 1.2988, 1.3277, 1.4822, 1.2519, 1.3244, 1.2602, 1.3856, 1.3670, + 1.2955, 1.3744, 1.3669, 1.3714, 1.3356, 1.3683, 1.2582, 1.3579, 1.3028, + 1.3691, 1.3240, 1.3866, 1.3637, 1.2497, 1.3819, 1.3386, 1.3314, 1.2769, + 1.3428, 1.9735, 1.3798, 1.3468, 1.3407, 1.3285, 1.3599, 1.4341, 1.2609, + 1.3951, 1.2966, 1.2580, 1.3596, 1.3417, 1.4204, 1.3244, 1.4308, 1.3175, + 1.3006, 1.2515, 1.2395, 1.2493, 1.3272, 1.3005, 1.3845, 1.2743, 1.3190, + 1.3370, 1.3416, 1.1472, 1.2675, 1.3340, 1.3503, 1.3742, 1.3713, 1.3525, + 1.3118, 1.3306, 1.4060, 1.2554, 1.3448, 1.3657, 1.3274, 1.3503, 1.3757, + 1.3110, 1.3053, 1.3492, 1.3277, 1.3186, 1.3125, 1.4098, 1.3111, 1.3407, + 1.4183, 1.3285, 1.3850, 1.3056, 1.3465, 1.3493, 1.3270, 1.2958, 1.3944, + 1.3085, 1.3012, 1.3702, 1.2813, 1.2915, 1.3575, 1.1952, 1.3088, 1.3576, + 1.3346, 1.3577, 1.2858, 1.3787, 1.2965, 1.3709, 1.3449, 1.4087, 1.3293, + 1.3631, 1.3237, 1.2341, 1.3424, 1.3324, 1.2798, 1.3931, 1.3897, 1.2935, + 1.2785, 1.3657, 1.2583, 1.4487, 1.2702, 1.3525, 1.2965, 1.3752, 1.4298, + 1.3788, 1.3413, 1.3038, 1.4992, 1.3144, 1.3067, 1.2532, 1.3159, 1.2683, + 1.3746, 1.2443, 1.3016, 1.2842, 1.2961, 1.2805, 1.3825, 1.2896, 1.3415, + 1.2923, 1.3563, 1.2550, 1.3741, 1.4044, 1.4208, 1.3354, 1.2533, 1.3894, + 1.3589, 1.2489, 1.3956, 1.2572, 1.3543, 1.3533, 1.3309, 1.3372, 1.3268, + 1.3941, 1.2826, 1.3982, 1.4003, 1.2890, 1.4476, 1.2573, 1.3132, 1.3348, + 1.3615, 1.3087, 1.2843, 1.3003, 1.3694, 1.3762, 1.2734, 1.3903, 1.4113, + 1.3063, 1.2960, 1.3405, 1.3407, 1.3494, 1.3724, 1.3081, 1.2372, 1.3109, + 1.3138, 1.3427, 1.3206, 1.3542, 1.3643, 1.3706, 1.3337, 1.3657, 1.3878, + 1.3569, 1.3834, 1.3598, 1.2711, 1.4078, 1.3747, 1.5675, 1.2749, 1.2390, + 1.2538, 1.2943, 1.2955, 1.2661, 1.2821, 1.3547, 1.3100, 1.3714, 1.3571, + 1.3821, 1.2715, 1.2951, 1.3517, 1.5251, 1.3422, 1.3360, 1.3600, 1.2926, + 1.3325, 1.3143, 1.3297, 1.3354, 1.3274, 1.3914, 1.4117, 1.4397, 1.2755, + 1.4176, 1.3882, 1.3252, 1.2717, 1.3697, 1.3725, 1.2981, 1.3732, 1.3615, + 1.3712, 1.3910, 1.2379, 1.3230, 1.3764, 1.3150, 1.2980, 1.2872, 1.2196, + 1.2480, 1.3109, 1.4561, 1.4493, 1.2962, 1.2464, 1.3213, 1.3355, 1.3883, + 1.5240, 1.2383, 1.3756, 1.3613, 1.3726, 1.3346, 1.4441, 1.3882, 1.3590, + 1.3018, 1.2842, 1.2935, 1.3592, 1.2867, 1.2710, 1.3668, 1.3812, 1.3530, + 1.4434, 1.2979, 1.3567, 1.3358, 1.3525, 1.3261, 1.3334, 1.2530, 1.3479, + 1.3575, 1.3310, 1.4809, 1.2589, 1.2816, 1.3649, 0.4186, 1.4145, 1.2535, + 1.3430, 1.3093, 1.3702, 1.3583, 1.3723, 1.3647, 1.3383, 1.2710, 1.4596, + 1.3527, 1.3731, 1.2643, 1.3753, 1.3268, 1.2178, 1.3426, 1.3283, 1.3710, + 1.3677, 1.2947, 1.2627, 1.2397, 1.3351, 1.4022, 1.4032, 1.3174, 1.3353, + 1.2170, 1.2888, 1.3188, 1.3385, 1.3833, 1.2715, 1.3615, 1.3580, 1.3459, + 1.3332, 1.3058, 1.3122, 1.3450, 1.3347, 1.2283, 1.2703, 1.3616, 1.4082, + 1.3126, 1.2626, 1.2945, 1.4637, 1.3127, 1.2583, 1.3960, 1.3580, 1.2940, + 1.3000, 1.3408, 1.3034, 1.2603, 1.3945, 1.2683, 1.2457, 1.4037, 1.3055, + 1.3822, 1.3478, 1.5217, 1.3705, 1.2843, 1.2831, 1.3864, 1.3480, 1.3994, + 1.3649, 1.2519, 1.3327, 1.3546, 1.4664, 1.2941, 1.3896, 1.3242, 1.3849, + 1.4253, 1.2813, 1.3836])Parameter containing: +tensor([ 8.7339e-02, 6.1186e-02, -1.1393e-01, -2.3187e-02, 6.6224e-02, + 1.0484e-01, 6.1928e-02, 6.6969e-02, -8.2951e-02, 2.9519e-02, + 5.2148e-02, -2.8967e-02, -1.1092e-01, -1.5207e-02, -2.4184e-02, + -5.6119e-02, -1.0727e-02, 3.7562e-04, -2.8220e-02, 4.4238e+00, + -6.0840e-02, 2.7884e-02, 3.6516e-02, -1.5820e-02, -5.4270e-02, + -6.5341e-03, -2.6812e-02, -6.5928e-03, 2.9693e-02, -9.4959e-02, + 1.1967e-02, -8.0892e-03, 5.1503e-03, 3.4587e-02, 8.9452e-02, + -6.5282e-02, 2.3060e-02, -5.3270e-02, -1.2999e-01, 3.5249e-02, + 2.4790e-02, -5.1246e-04, 1.9823e-02, -1.1152e-02, -3.1151e-02, + 2.7093e-03, 3.5210e-02, -3.5405e-02, 1.7274e-02, 2.7853e-02, + 4.4344e-02, 1.2647e-02, -2.1005e-02, 3.0567e-02, 1.0525e-01, + 2.3741e-02, -8.9789e-02, -3.4534e-02, 4.5046e-02, 1.6473e-02, + 6.1683e-02, -6.3831e-02, 4.1221e-02, 1.8472e-02, 7.8758e-02, + -2.1876e-02, 6.5841e-02, 1.0714e-01, -3.7961e-02, 2.8308e-01, + 2.0154e-02, 3.1277e-02, 4.4453e-01, 7.3221e-02, -1.2682e-01, + 8.9078e-02, 1.6569e-02, -1.2624e-01, -8.9929e-02, 6.2439e-02, + 5.7802e-03, -5.0506e-03, -3.1454e-02, 1.1621e-01, 9.1203e-03, + 4.3324e-02, 1.0606e-01, 1.0382e-01, -8.6284e-03, 2.2634e-02, + 3.4288e-02, 2.8976e-02, 4.6591e-02, -4.4971e-02, -4.7039e-02, + 2.8868e-02, -4.0962e-02, -4.7151e-02, -5.6912e-02, -5.0799e-02, + 2.0091e-02, 6.3718e-02, 5.2493e-03, -4.9528e-02, 5.4862e-02, + 5.7750e-03, -3.8043e-02, -4.7645e-02, 2.5038e-02, -6.7444e-02, + -4.9929e-02, -8.9348e-03, -2.2372e-04, 1.7883e-02, 6.9704e-02, + -3.3632e-02, 7.6718e-03, -6.9778e-02, 1.3104e-02, 8.7781e-02, + -6.2585e-02, -1.3901e-01, -5.5338e-02, -7.6090e-03, -1.1955e-01, + 1.5045e-02, -8.3363e-02, -3.8883e-03, 9.6386e-03, -3.3875e-02, + -8.4132e-02, 3.9896e-02, -3.4697e-02, -3.9018e-03, -6.4938e-02, + -1.0833e-02, -2.7428e-02, 4.2242e-03, -2.9702e-02, -4.0883e-02, + -1.6351e-03, -3.6239e-03, 4.0830e-02, 3.1907e-02, 4.9882e-02, + -2.4862e-02, -3.1561e-02, -4.6421e-02, 1.7076e-02, 8.8081e-02, + 1.8782e-01, -5.4576e-02, -9.4501e-02, 5.3017e-03, -3.8913e-02, + -1.3871e-01, -7.6044e-04, -9.9700e-02, 4.5043e-02, -4.9313e-03, + 1.4418e-01, -1.1649e-01, 4.5746e-03, 2.1470e-02, 7.0424e-03, + -1.6105e-02, -4.2264e-02, 3.2586e-02, -5.0255e-02, 3.5865e-02, + -4.6770e-02, 7.1799e-02, 2.7991e-02, 6.2034e-02, 6.3537e-02, + -7.6811e-02, -3.2816e-02, 5.1421e-02, -3.5653e-02, -2.8211e-03, + -1.0634e-02, -5.3496e-02, 4.2309e-02, -6.4404e-03, -1.2252e-02, + -1.5379e-01, 4.8189e-02, 7.4657e-02, -3.1758e-02, -3.2122e-02, + -1.1376e-02, -3.7663e-02, 3.5552e-02, 4.2712e-02, -7.2979e-02, + -6.1913e-03, -4.6449e-02, -1.0435e-01, -1.1276e-01, 2.3868e-02, + -1.0795e-01, -1.5822e-02, -4.6000e-02, 2.2899e-02, 4.1231e-02, + 9.4707e-02, -7.7341e-02, 5.8019e-02, 9.9886e-02, 8.9785e-03, + 9.3099e-03, 1.4751e-01, -6.3596e-03, -4.8528e-02, -6.2385e-02, + -8.9581e-04, -2.4941e-03, 3.8993e-02, -7.8228e-02, -1.4844e-01, + -9.6491e-03, -3.7275e-02, 6.1649e-02, -4.6889e-02, 1.6001e-02, + -5.7545e-02, 5.0802e-02, -3.0250e-02, 7.6051e-02, -5.7192e-02, + 1.7285e-03, -2.3828e-03, -1.6417e-02, 9.5145e-02, -2.6237e-02, + 9.2725e-03, -7.9587e-02, 4.6451e-02, 5.6558e-02, 9.2277e-03, + 6.6372e-02, 8.8247e-02, 4.5625e-02, 2.6280e-02, 2.6710e-02, + 3.2452e-02, 9.7308e-02, 6.8142e-03, 1.4230e-02, -3.9746e-02, + 5.8876e-02, 7.5144e-03, 5.6110e-02, -6.9678e-02, 8.3911e-02, + -1.1423e-01, -3.4860e-02, -8.7782e-02, 2.4758e-02, 5.3315e-03, + -4.8554e-02, -6.1523e-02, 7.4342e-03, -3.1822e-02, -4.5801e-02, + 4.5603e-02, -2.8560e-02, 1.5172e-02, -6.5890e-02, -2.5550e-02, + -1.1881e-02, 1.0670e-02, 6.7133e-02, -3.5807e-02, 3.2999e-02, + 3.9318e-02, -1.1167e-01, 1.9786e-03, 3.6381e-02, -6.4092e-02, + 1.5747e-01, 1.4422e-01, 8.4585e-04, 1.2694e-02, 1.9729e-02, + 3.4082e-02, 1.0148e-01, -3.5446e-02, 1.4398e-01, -1.1190e-01, + -2.0517e-02, -4.3020e-02, 9.5896e-02, 4.6603e-02, 5.5142e-02, + 9.3104e-03, 9.9086e-02, 2.6252e-02, -1.0458e-02, 4.9047e-02, + 4.6309e-02, -1.9033e-02, 7.5156e-03, 7.0244e-02, 2.1686e-02, + -9.7619e-03, 6.5970e-02, -8.2329e-02, 9.5006e-02, -4.9631e-02, + 3.9037e-02, 2.9221e-03, -5.2377e-02, -3.3832e-02, -8.8777e-03, + -6.0936e-02, -8.4812e-03, 4.0256e-02, 1.2562e-01, -5.0265e-02, + -5.2797e-02, 1.9654e-01, -4.9036e-02, -2.3278e-02, 9.1080e-02, + -1.1720e-01, 1.3250e-02, 5.1713e-02, 4.0155e-02, -9.8394e-02, + 1.4361e-01, -9.7763e-03, 1.2028e-01, -3.9207e-02, 2.3117e-02, + 8.6042e-02, -8.9835e-03, -5.6574e-02, -1.4509e-02, 3.6862e-03, + -1.0871e-01, 1.5602e-02, -3.5644e-02, -3.3046e-02, 7.6870e-02, + 2.7187e-02, 8.6666e-02, 6.3289e-02, 2.9537e-02, 2.4505e-02, + -3.2169e-02, 3.7398e-02, 9.9083e-03, 1.2819e-02, -4.2381e-02, + -3.4945e-02, -4.1356e-02, 9.5625e-03, -1.0108e-02, -5.6753e-02, + 3.8378e-02, -1.0612e-03, 2.3003e-02, -8.1679e-02, -1.6081e-02, + -2.5599e-02, 3.0458e-02, 9.6022e-02, 2.4029e-02, 2.1840e-02, + 4.9550e-02, -4.1764e-02, -5.5127e-02, -6.5640e-03, -2.8241e-02, + 3.8417e-03, 7.3980e-02, 3.6628e-02, -1.5850e-02, -1.6176e-02, + -9.0316e-02, 1.1332e-01, 2.6261e-02, 6.8534e-03, -1.0242e-02, + -1.7797e-02, -1.3862e-02, -7.3380e-02, -4.9812e-02, -6.4270e-02, + 2.7561e-02, -4.8994e-02, -2.0447e-02, -1.3643e-02, 6.1894e-02, + 6.2277e-03, 4.1400e-02, 2.3730e-02, 1.8632e-02, 7.0347e-02, + 1.1018e-01, -6.8689e-02, 5.4930e-02, 2.8267e-02, -8.7133e-02, + -1.2398e-01, -6.2607e-02, 3.4140e-03, 5.9502e-03, -2.9806e-02, + -5.7428e-02, 2.2081e-02, 8.3703e-02, -6.1587e-02, -6.7964e-02, + -9.1609e-01, 1.5850e-02, 2.8917e-02, -3.4535e-02, -1.1373e-02, + 3.3038e-02, -1.5638e-02, -1.9283e-02, -4.7749e-02, -4.2693e-02, + 4.2583e-03, -1.3398e-01, -8.5306e-02, 1.2055e-02, -3.7024e-02, + -5.7687e-02, -4.9867e-02, -2.1062e-02, 6.0778e-02, 6.8634e-02, + -2.2010e-02, 3.8530e-03, 3.4686e-02, 8.6580e-02, -6.1052e-02, + -1.2155e-02, -8.3256e-02, -4.8036e-02, 8.6155e-02, 7.9283e-02, + -1.1840e-01, 5.3046e-05, -8.5889e-02, 1.0105e-01, 1.0565e-02, + -3.0992e-03, 4.3394e-02, -9.1592e-02, 3.0969e-02, 3.9235e-02, + -3.1675e-02, 2.2725e-04, 4.3952e-02, -1.6739e-02, -9.4257e-02, + 2.3025e-02, 3.2887e-02, -6.3583e-02, 1.7878e-02, 1.0523e-02, + -4.1162e-02, 1.2117e-02, -5.3632e-02, 4.9440e-02, 2.1264e-02, + -3.3455e-04, -9.0725e-02, -3.2049e-02, -8.2630e-03, -4.3023e-02, + 4.5941e-03, 1.0333e-01, 6.9964e-02, 2.6214e-03, 7.1494e-02, + 7.0434e-02, -1.2217e-01, 2.8510e-02, 1.3535e-01, -6.5110e-02, + -5.2724e-03, -2.8958e-02, 8.3868e-02, 6.3884e-02, 3.0647e-02, + 3.0163e-02, -6.8748e-03, 3.6247e-02, -2.0434e-02, -1.1683e-01, + 2.4959e-02, -6.3642e-02, 4.0930e-02, -4.0984e-02, 2.4238e-02, + 9.8886e-03, -3.7878e-02, 4.9394e-02, -2.7229e-02, -3.0365e-02, + 3.3135e-02, 5.1902e-02, 6.7655e-02, 3.0071e-02, 1.9174e-02, + -7.3900e-02, 3.5665e-02, -5.8163e-02, 7.0575e-02, -6.0976e-02, + 5.5363e-03, 2.2391e-02, 1.0395e-01, 7.7417e-02, -8.4300e-02, + -6.5664e-02, -2.2830e-02, 6.6328e-02, -2.2972e-02, -4.1334e-02, + -1.4479e-02, -7.1433e-03, -2.0187e-02, -1.0832e-02, -8.9342e-02, + 2.1364e-02, 3.7668e-02, -2.2352e-02, -5.8539e-02, -2.8948e-03, + -3.9876e-03, -4.2716e-02, 2.3490e-02, -6.3561e-02, -4.4949e-02, + -7.5358e-03, 1.4734e-02, -1.6480e-02, 4.3481e-02, 1.9900e-02, + -2.1993e-02, 3.7201e-02, 1.4117e-02, 8.6074e-02, 1.4078e-02, + 6.8319e-02, 1.7246e-02, -5.1239e-02, 1.0810e-02, -1.1514e-02, + -8.7157e-03, 2.3565e-02, -9.8027e-03, 2.3457e-02, 7.3346e-02, + -5.4916e-02, 8.1404e-02, 2.9883e-02, 2.6833e-02, 1.1656e-01, + 6.4959e-02, 3.9964e-03, 7.9622e-03, 2.5413e-02, 3.7862e-02, + 5.7556e-02, -1.7377e-03, -7.8044e-02, 3.9611e-03, 5.0649e-02, + 2.1132e-02, 3.8345e-02, 4.0324e-02, 3.7830e-02, -2.6392e-02, + -7.8393e-02, -1.0003e-02, 5.9539e-02, 1.0001e-01, 6.2470e-02, + 4.5374e-03, 4.8765e-02, -5.7036e-02, 2.1272e-02, 1.7698e-02, + 8.3991e-03, 1.1972e-01, 5.4382e-02, -2.7853e-02, -2.0012e-03, + -1.9343e-02, 6.0604e-02, 8.5150e-03, -3.5723e-02, 4.2000e-02, + -4.4597e-02, -1.0483e-01, 2.3521e-02, -1.0434e-02, 2.4817e-03, + -2.6558e-02, 6.5472e-02, -1.9289e-01, 3.2543e-02, -5.5323e-02, + 4.4021e-02, 3.7345e-02, -6.6249e-03, -3.5713e-02, -2.2755e-02, + -1.0394e-01, 6.1514e-02, 8.5493e-02, 3.4580e-02, -4.7714e-02, + 1.2638e-02, 3.7360e-03, 1.3561e-02, 2.2447e-02, 1.5395e-02, + -3.0915e-02, 8.6202e-03, -1.8053e-02, 4.4125e-02, -1.6347e-02, + 8.7838e-02, 2.2420e-02, 8.3271e-02, 1.3798e-01, 2.5618e-02, + 3.2791e-02, -4.0671e-02, -4.8187e-03, 5.1011e-02, -4.7374e-02, + 1.3102e-02, -4.2113e-02, 8.1951e-02, -4.8852e-02, 3.5464e-02, + -3.0721e-03, 1.8094e-02, 7.1526e-02, 3.9374e-02, 9.8026e-02, + 8.5014e-02, -5.5595e-03, -4.8160e-02, 6.0500e-02, -3.8437e-02, + 2.5993e-02, -7.7234e-02, -4.8771e-02, 2.8361e-02, 6.6123e-02, + -7.5233e-02, 6.7676e-03, -8.9858e-02, 5.8787e-03, 8.0563e-02, + 2.5858e-02, -8.7248e-02, -8.6673e-02, 2.9948e-02, 2.5570e-02, + -1.9238e-01, 5.0784e-03, 1.4281e-02, 7.2192e-04, -3.1842e-02, + 3.3397e-02, -5.5607e-02, -6.8789e-02, -1.0273e-03, 6.6684e-02, + -4.6887e-02, -5.2200e+00, -1.7528e-02, 4.8796e-02, -1.3987e-02, + -9.0133e-02, -2.4560e-02, -3.0890e-02, -1.3240e-02, 3.3220e-02, + -2.1894e-02, -5.0840e-02, 3.4118e-02, -1.8583e-02, -4.1701e-02, + 2.2649e-02, -2.8920e-02, -9.6992e-02, 2.9297e-02, -4.1082e-02, + 8.9242e-02, 6.1757e-02, -3.6107e-02, -1.3546e-02, -1.2672e-01, + -2.2187e-02, 4.5807e-02, 4.1666e-02, 4.1213e-03, 3.8655e-02, + -1.4283e-02, 6.7196e-02, 7.3769e-02, 1.5050e-02, 2.3629e-02, + 2.7161e-02, 7.9100e-03, 1.6354e-02, -8.4922e-02, 2.0299e-02, + 8.6790e-03, 3.9312e-03, 2.8671e-02, -7.6458e-02, 3.5176e-02, + 9.5686e-03, -1.8447e-01, 9.5421e-02, 1.8604e-02, 9.2838e-02, + 1.0337e-01, 4.1986e-02, 4.9914e-02, 4.2715e-02, 3.8483e-03, + -1.6496e-02, -2.5456e-02, -1.3371e-02, -1.5350e-02, -3.9309e-02, + 8.4016e-02, -9.3841e-02, -9.3237e-02, 6.4717e-02, -1.0674e-02, + -7.5973e-03, -1.0846e-01, -3.1029e-03, 5.0573e-02, -5.4467e-02, + -4.5049e-02, -1.6193e-02, -4.6469e-02, -5.5992e-02, 4.7240e-02, + -3.7008e-03, 1.1970e-01, 1.0794e-01, 4.3454e-02, -7.2692e-02, + 1.9468e-02, 4.4535e-02, 5.7215e-02, -6.2108e-02, -7.0441e-02, + -7.9148e-02, 8.5440e-02, 1.7515e-04])Parameter containing: +tensor([[ 0.0181, -0.0053, 0.0376, ..., 0.0159, 0.0007, -0.0079], + [-0.0007, 0.0249, 0.0235, ..., -0.0029, -0.0186, 0.0282], + [-0.0062, 0.0307, 0.0169, ..., -0.0004, -0.0213, -0.0294], + ..., + [-0.0041, -0.0177, 0.0085, ..., 0.0139, -0.0345, 0.0094], + [-0.0312, -0.0024, 0.0006, ..., -0.0078, 0.0215, -0.0030], + [ 0.0234, -0.0085, -0.0076, ..., 0.0165, -0.0413, 0.0310]])Parameter containing: +tensor([-0.3335, -0.4612, -0.1525, ..., -0.2974, -0.4580, -0.3103])Parameter containing: +tensor([[ 0.0056, 0.0015, -0.0063, ..., -0.0112, -0.0193, -0.0046], + [ 0.0023, 0.0101, 0.0025, ..., 0.0093, 0.0138, 0.0203], + [-0.0024, 0.0070, -0.0107, ..., 0.0100, -0.0153, 0.0128], + ..., + [-0.0022, -0.0275, 0.0059, ..., 0.0156, 0.0155, 0.0103], + [ 0.0172, 0.0090, 0.0030, ..., -0.0214, -0.0010, -0.0127], + [-0.0244, 0.0601, 0.0131, ..., 0.0105, -0.0049, 0.0170]])Parameter containing: +tensor([-1.4061e-02, 5.6396e-02, -6.3660e-02, -1.5656e-02, -7.0129e-02, + 2.0599e-02, 4.9377e-02, -3.9101e-03, -9.5215e-02, -2.7008e-02, + -6.2294e-03, -5.4504e-02, -3.6987e-02, 1.1467e-02, -6.0364e-02, + 1.1948e-02, -2.9373e-02, -8.7402e-02, 8.5144e-03, 5.3802e-02, + -3.0930e-02, 1.3420e-02, 2.3880e-02, -3.1372e-02, 5.2948e-02, + 5.3467e-02, -4.9561e-02, 1.4877e-02, -1.1041e-01, -2.2690e-02, + 1.9684e-02, 5.5275e-03, 1.0254e-02, 7.5806e-02, -9.7046e-03, + -1.0480e-01, 8.2474e-03, -3.4485e-02, 3.0014e-02, 3.0609e-02, + -3.0533e-02, 8.3801e-02, 2.3941e-02, 5.3024e-03, -6.8481e-02, + 1.1032e-02, 2.2781e-02, 1.8511e-03, -8.6548e-02, -1.5198e-02, + 6.7505e-02, 1.4053e-02, 3.4424e-02, 6.8176e-02, 7.7271e-02, + -2.4109e-02, -8.1421e-02, 2.7557e-02, -3.1052e-02, 6.6662e-04, + -4.0070e-02, 3.4027e-02, 1.1841e-02, -6.9618e-03, 3.4424e-02, + -3.0487e-02, 7.7844e-05, -7.1220e-03, -1.5717e-02, 4.3060e-02, + -4.1138e-02, -8.2642e-02, 1.1102e-01, 1.3599e-01, -4.3335e-02, + 3.8269e-02, -1.6346e-03, 4.3030e-02, -2.4582e-02, -5.9174e-02, + -2.5162e-02, -3.9001e-02, -4.8790e-03, 1.1086e-02, 1.3870e-02, + -6.6833e-02, 3.4424e-02, 4.6539e-02, 6.1760e-03, -3.5339e-02, + 1.6159e-02, 3.7079e-02, 4.8370e-02, -2.2202e-02, -1.4000e-02, + 1.0925e-01, 1.7685e-02, -5.4413e-02, -7.2205e-02, -1.6632e-02, + 2.9182e-03, -1.9666e-01, 5.5756e-02, -5.1514e-02, 4.4647e-02, + 6.9946e-02, 2.4979e-02, -1.4046e-02, -3.4027e-02, 2.9282e-02, + -1.1826e-02, 2.2263e-02, 9.3933e-02, 3.9429e-02, -3.7781e-02, + -5.6458e-02, -3.6621e-02, -5.4779e-02, -3.7169e-04, -1.4061e-02, + -3.3966e-02, -2.3285e-02, -4.4281e-02, -1.0368e-02, -3.7384e-02, + -3.3112e-02, -7.8613e-02, -3.0914e-02, 5.9021e-02, -4.4769e-02, + -7.5195e-02, -4.1779e-02, 4.3030e-03, -6.2866e-02, 2.7435e-02, + 1.2720e-01, 2.9449e-03, 6.2408e-02, 5.4245e-03, 3.5034e-02, + 3.0960e-02, -3.8177e-02, 4.6463e-03, 8.0109e-03, -1.8127e-02, + -8.8684e-02, -1.0429e-02, -3.1433e-02, 6.5674e-02, 6.5735e-02, + 8.7036e-02, -8.7524e-02, 2.6718e-02, 8.1909e-02, -9.2102e-02, + -1.7593e-02, -5.4932e-02, 1.5839e-02, 1.3710e-02, -3.4409e-03, + 1.6577e-01, -2.3468e-02, 2.4734e-02, 1.5747e-02, -2.8046e-02, + 4.0802e-02, -5.1392e-02, -9.7122e-03, 1.8723e-02, 5.7281e-02, + -2.8458e-02, 2.8839e-02, -2.7649e-02, -4.4220e-02, 1.2222e-02, + -2.3956e-02, 1.4236e-02, -1.0651e-02, -7.6904e-02, -6.7322e-02, + -5.0262e-02, -5.9509e-02, -7.0618e-02, -4.3396e-02, 8.0078e-02, + -3.3325e-02, 3.0624e-02, 2.4512e-01, -1.0144e-01, -6.1249e-02, + 2.6138e-02, -8.4610e-03, 7.6843e-02, 1.1192e-02, -8.0505e-02, + 5.1453e-02, 6.0364e-02, -4.6722e-02, -3.1586e-02, -3.1311e-02, + 5.7953e-02, -6.7997e-04, -3.5973e-03, -7.8125e-02, 4.6936e-02, + 3.1021e-02, -6.1127e-02, 2.0584e-02, 1.0339e-01, 5.8472e-02, + -1.3710e-02, 4.4250e-02, 3.9154e-02, 1.7075e-02, 3.4424e-02, + -7.4158e-02, -1.0986e-02, -2.1469e-02, -8.9050e-02, 9.3689e-03, + 2.7557e-02, -4.6021e-02, 3.1769e-02, 1.6556e-02, 5.2643e-02, + -1.8143e-02, -6.4270e-02, -2.6123e-02, 6.5247e-02, 3.2104e-02, + -5.7434e-02, 5.7869e-03, -1.4633e-02, 8.5510e-02, -4.4556e-02, + -2.1534e-03, -4.9896e-02, -3.9917e-02, 5.9479e-02, -1.2680e-02, + 1.0901e-01, -2.4338e-02, -2.4429e-02, 4.2297e-02, 5.8594e-02, + -2.1072e-02, -1.8875e-02, -1.3257e-01, -6.5979e-02, 3.3356e-02, + 1.7443e-03, -1.4175e-02, -5.2338e-02, -7.3364e-02, 1.6251e-02, + -1.5802e-03, -3.9459e-02, -2.7374e-02, 2.8000e-02, 2.1164e-02, + 4.5654e-02, -7.5378e-02, -7.2815e-02, -4.2236e-02, -3.5889e-02, + -3.0624e-02, 3.6987e-02, -2.8732e-02, -7.6828e-03, 4.5319e-03, + -1.9806e-02, -9.3937e-04, 2.4857e-02, -2.2259e-03, -3.6591e-02, + 3.7323e-02, 2.0370e-03, 7.9834e-02, 3.7231e-02, 1.0002e-02, + 1.7105e-02, 4.5837e-02, -4.6692e-02, -4.8523e-02, 3.3722e-02, + 8.3435e-02, -2.1683e-02, -6.1798e-02, 2.3483e-02, 3.5919e-02, + 2.0370e-02, -9.9869e-03, 1.8478e-02, -2.6886e-02, 6.9946e-02, + 3.6621e-02, -2.6321e-03, 9.5947e-02, 1.6586e-02, 5.2338e-02, + -4.2084e-02, 1.9943e-02, -5.6305e-02, 6.4026e-02, -6.0211e-02, + 3.3630e-02, 5.9448e-02, 3.2410e-02, 2.9678e-02, -2.2797e-02, + 3.8696e-02, -1.2733e-02, -1.3748e-02, -5.0018e-02, 1.8646e-02, + -1.9653e-02, -2.2293e-02, -3.8242e-03, 7.0740e-02, -4.0710e-02, + -5.0720e-02, -4.9248e-03, -7.8369e-02, -4.6570e-02, 6.0693e-01, + -1.6943e-01, 1.2054e-01, -2.4719e-02, 1.8860e-02, -1.4612e-01, + 6.7749e-02, -3.7811e-02, 4.3427e-02, -2.1225e-02, -2.4750e-02, + 7.8979e-02, -1.7532e-02, -4.8798e-02, 1.6388e-02, -4.3549e-02, + -5.3711e-02, 3.9062e-02, 2.7985e-02, 5.9631e-02, 2.8320e-02, + 4.2267e-02, 3.2401e-04, 2.4475e-02, 1.0384e-02, -4.2480e-02, + -3.9856e-02, 1.0321e-01, -5.8563e-02, 2.4902e-02, -1.4107e-02, + -5.2368e-02, -2.2842e-02, 2.3376e-02, -3.6469e-02, -3.9429e-02, + 6.1401e-02, -6.1646e-03, 5.8746e-02, -9.4528e-03, -1.2421e-02, + 6.7322e-02, -1.9302e-02, 5.4230e-02, 9.0485e-03, -4.9255e-02, + 1.1572e-01, 1.6220e-02, -7.0435e-02, 2.1118e-02, -4.2145e-02, + -4.5593e-02, -4.9774e-02, 6.0913e-02, 3.0304e-02, 2.1534e-03, + -1.0452e-03, 1.9501e-02, -1.0841e-02, -5.1544e-02, 4.0253e-02, + -7.6721e-02, 4.4434e-02, -3.3722e-02, -3.0746e-02, -1.0907e-01, + -6.8787e-02, 8.5571e-02, -3.6743e-02, -2.6855e-02, -4.3762e-02, + -3.9215e-02, -1.0818e-02, -8.3313e-02, 5.0842e-02, 6.1035e-02, + 3.1311e-02, 8.9741e-04, -1.7532e-02, 7.4341e-02, -1.5465e-02, + -6.9519e-02, -5.2002e-02, 1.0608e-01, -7.9163e-02, -9.5062e-03, + -3.2921e-03, 7.5989e-02, -4.3907e-03, -2.6840e-02, -6.5536e-03, + 3.3234e-02, 1.0811e-02, 5.7983e-03, -4.9042e-02, -3.9093e-02, + -3.9581e-02, 1.7822e-02, -6.1096e-02, -5.0720e-02, -4.8157e-02, + 4.1428e-03, -4.1016e-02, -3.2959e-02, 3.9154e-02, -2.9526e-02, + -1.2360e-01, 9.9030e-03, -2.6550e-02, 2.6505e-02, 3.7750e-02, + -3.5858e-02, 8.9951e-03, 4.8637e-03, -3.0960e-02, -8.2764e-02, + -3.8242e-03, 3.0319e-02, -4.2175e-02, -8.8318e-02, 2.1057e-02, + -9.5276e-02, -8.1665e-02, -3.4393e-02, 4.5532e-02, -1.3542e-02, + -1.3756e-02, -6.5308e-02, -8.9355e-02, 1.4160e-01, -4.8645e-02, + 4.9713e-02, -1.3733e-02, 3.3447e-02, 2.5406e-02, -6.4964e-03, + 5.6702e-02, 1.6342e-02, 1.6785e-02, -1.6998e-02, 5.7487e-03, + -4.2175e-02, 7.4036e-02, 1.0849e-02, 1.0529e-01, 3.6804e-02, + -2.8107e-02, -7.0068e-02, -4.9713e-02, 1.8188e-02, -4.3671e-02, + 3.5004e-02, 9.6313e-02, 4.2450e-02, 6.0059e-02, 5.2704e-02, + 3.7018e-02, -8.0109e-03, -8.6594e-03, 4.1779e-02, 2.6550e-02, + -6.3110e-02, -7.6843e-02, 8.2886e-02, 4.2206e-02, -7.9285e-02, + 2.0248e-02, 4.6539e-02, 1.3840e-02, -1.7807e-02, -6.7871e-02, + 9.4070e-03, -5.5573e-02, 5.7770e-02, 1.0849e-02, 9.2163e-02, + 6.7139e-02, 1.6388e-02, -2.5635e-03, -1.1945e-01, 8.4763e-03, + 5.6488e-02, -4.8637e-03, 5.6702e-02, -9.3140e-02, -2.0325e-02, + -9.3994e-02, -8.2275e-02, -1.3657e-02, 4.4708e-02, -5.2551e-02, + 2.5864e-02, -3.2562e-02, 1.1322e-01, 7.3181e-02, -6.3171e-02, + 6.0608e-02, -6.4545e-03, 1.1572e-01, -2.9510e-02, 8.9233e-02, + 6.3721e-02, 1.4725e-02, 1.8356e-02, -2.4994e-02, -2.6642e-02, + 2.1530e-02, -1.0132e-02, -6.9214e-02, 2.2064e-02, -3.3997e-02, + -1.0612e-02, -2.5970e-02, 2.8091e-02, -2.3727e-02, -5.9692e-02, + -7.8796e-02, 8.3862e-02, 3.1769e-02, -6.2500e-02, 5.5450e-02, + -1.8280e-02, 7.4524e-02, -3.0426e-02, -5.5359e-02, 7.1373e-03, + -3.7903e-02, 1.7685e-02, -7.1106e-02, -3.8385e-05, -5.7373e-02, + -2.8854e-02, -9.9258e-03, -5.6152e-02, -2.2678e-03, 8.7341e-02, + 2.7756e-02, 3.2654e-02, -9.6817e-03, -2.3178e-02, 1.3306e-01, + 4.1626e-02, -5.2765e-02, 2.1393e-02, 7.0129e-02, 4.1016e-02, + 4.2558e-04, -7.3730e-02, -6.8359e-02, 2.1774e-02, 8.2520e-02, + 2.7802e-02, -8.4019e-04, 8.5083e-02, 4.2603e-02, -3.7598e-02, + -1.0718e-01, -1.2264e-03, 9.1476e-03, -8.7509e-03, 7.0923e-02, + 6.0547e-02, 6.1073e-03, 1.0052e-03, 9.3689e-02, -2.9633e-02, + -2.0767e-02, 5.7449e-03, -1.1909e-02, -9.0942e-02, 1.8463e-02, + 7.7629e-03, -4.3640e-02, -4.4861e-02, -6.5918e-02, -2.2125e-02, + -8.6853e-02, 3.0991e-02, 1.1780e-02, 3.6835e-02, -3.6163e-02, + -1.8982e-02, 4.1443e-02, -6.8237e-02, -4.1016e-02, -1.4877e-02, + 4.0817e-03, 7.3624e-03, -1.0017e-02, 1.6495e-02, 4.3091e-02, + 3.0319e-02, -2.2293e-02, 6.9351e-03, -4.2633e-02, -6.1218e-02, + 4.9255e-02, -2.5101e-02, 3.8818e-02, -3.5439e-03, 1.9394e-02, + 1.0309e-03, 1.2541e-03, 1.5793e-02, 2.5757e-02, -4.0253e-02, + 1.6455e-01, -1.1420e-01, -4.3030e-03, 1.1755e-01, 3.1464e-02, + -4.3945e-02, 1.9547e-02, 7.6050e-02, -8.4229e-03, -5.1544e-02, + 4.4250e-02, -1.4275e-02, 6.0883e-03, -3.5278e-02, -3.5004e-02, + -2.5192e-02, 2.6306e-02, 4.5135e-02, 6.1646e-02, 5.8777e-02, + 5.5580e-03, 6.5857e-02, -1.1185e-02, 1.6357e-02, -8.2092e-03, + 9.3262e-02, -7.9346e-02, -2.1301e-02, 1.0548e-03, 2.6718e-02, + -3.2349e-03, 9.4727e-02, -2.4918e-02, -5.5542e-03, 5.9204e-02, + 2.4399e-02, -9.6985e-02, -4.7668e-02, 6.7558e-03, -1.0597e-02, + -4.4739e-02, -1.3306e-02, -2.6443e-02, 5.0629e-02, 5.0392e-03, + 1.5549e-02, -1.6632e-02, -2.2964e-03, 5.2368e-02, 6.4941e-02, + 8.4412e-02, -6.3416e-02, -5.0537e-02, 4.8157e-02, -6.9336e-02, + -8.6212e-04, -1.1627e-01, -4.7943e-02, -1.5190e-02, 3.7415e-02, + -2.2430e-02, -3.0457e-02, 8.8196e-02, -2.6581e-02, 8.4534e-03, + -9.7961e-03, 4.3427e-02, 5.0446e-02, -8.2214e-02, -4.3976e-02, + -2.3193e-02, 6.0791e-02, 1.6357e-02, -6.8298e-02, -2.3895e-02, + -4.2816e-02, -1.3687e-02, 5.8929e-02, -1.0574e-02, 3.6560e-02, + -8.7891e-03, -3.1872e-03, 3.5370e-02, -2.9800e-02, -2.0485e-03, + 8.1406e-03, 1.9684e-02, 5.2216e-02, 1.5137e-02, -4.5471e-02, + -6.7444e-02, 1.6251e-02, 4.4495e-02, 4.0558e-02, -2.4445e-02, + -2.1790e-02, -6.0501e-03, 3.4210e-02, 4.8065e-02, 9.5886e-02, + -1.2589e-02, -5.8167e-02, -6.6467e-02, 3.1158e-02, -2.4628e-02, + 1.0902e-02, 5.6488e-02, 6.9763e-02, 2.7252e-02, -4.3304e-02, + 1.3428e-01, 7.2327e-03, -3.0807e-02, 4.0741e-02, -5.2032e-02, + -7.6294e-02, -8.7402e-02, -1.6678e-02, 4.8584e-02, -1.5306e-03, + 8.0719e-03, 1.9730e-02, -1.1078e-01, -3.7415e-02, 1.0144e-01, + -8.3237e-03, 9.0271e-02, 3.2623e-02, 1.0938e-01, 3.7140e-02, + 2.9648e-02, -7.9269e-03, -4.7699e-02, -2.3422e-02, 4.2999e-02, + -2.6901e-02, 2.6611e-02, 2.6657e-02])Parameter containing: +tensor([2.0307, 1.9726, 2.0683, 2.0922, 2.0904, 2.1595, 2.1940, 2.0945, 2.1829, + 2.0632, 2.0069, 2.0619, 2.0249, 2.1153, 2.0952, 2.0311, 2.0945, 1.9873, + 2.0693, 1.3209, 1.9580, 2.0676, 2.0866, 2.1143, 2.0650, 2.1496, 2.0865, + 1.9528, 2.1317, 2.0063, 2.1316, 2.1745, 2.0471, 2.0919, 2.0855, 1.9931, + 2.0886, 2.1438, 1.9753, 2.0601, 2.0038, 2.0082, 2.1160, 2.1505, 2.2815, + 2.0785, 2.2053, 2.0123, 2.0239, 2.0069, 2.0493, 2.0066, 2.1710, 2.0697, + 2.2646, 2.1356, 2.0977, 2.0522, 2.0962, 2.0822, 2.1202, 2.0967, 2.1053, + 2.1302, 2.0645, 2.0926, 1.9923, 2.0041, 2.0004, 2.0571, 2.2438, 2.0124, + 2.0061, 1.9758, 2.1365, 2.1637, 2.0070, 2.0505, 2.1504, 2.0160, 2.0619, + 1.9575, 2.0646, 2.0421, 2.1013, 2.0887, 2.2091, 2.0112, 1.9810, 2.0051, + 2.0335, 2.1576, 2.1458, 2.0246, 1.9795, 2.0867, 2.2490, 2.0376, 2.1731, + 2.0989, 1.9751, 1.8038, 2.0467, 2.1660, 2.0282, 1.9610, 2.3143, 2.1453, + 2.0223, 2.0440, 1.9253, 2.2580, 2.0914, 2.1201, 2.0345, 1.9917, 1.9536, + 2.0809, 2.0473, 2.0467, 2.0435, 1.9806, 2.1231, 2.1840, 2.0553, 2.1431, + 1.9885, 2.3494, 2.0862, 2.0096, 2.0571, 2.0646, 2.0548, 2.0386, 2.2171, + 2.1465, 2.1068, 1.9592, 2.0894, 2.1193, 2.1046, 2.0338, 2.0214, 2.0104, + 1.9678, 2.0540, 2.1080, 2.1480, 2.1495, 2.0671, 2.0314, 2.2913, 2.2099, + 2.1134, 2.0601, 1.4066, 2.0700, 2.0636, 2.1496, 2.0385, 2.0866, 2.0064, + 2.1332, 1.8232, 2.0438, 2.2630, 2.2799, 2.0261, 2.1469, 2.0730, 2.0680, + 2.0978, 1.9948, 2.0685, 2.0162, 2.1138, 2.2317, 2.0896, 2.1131, 2.1128, + 2.2830, 2.1778, 1.9381, 1.9642, 2.1773, 2.2628, 1.9100, 0.7467, 2.0003, + 2.2190, 2.1667, 2.1351, 2.2362, 2.1190, 2.1051, 2.3376, 2.1806, 2.0938, + 0.9265, 1.9867, 2.1207, 2.0960, 2.2167, 2.1010, 2.0326, 2.1115, 2.1153, + 2.2205, 2.0719, 2.0205, 2.0097, 2.0012, 2.2113, 2.1965, 2.1255, 1.9719, + 2.1534, 2.0961, 2.1630, 2.1540, 2.2609, 2.1181, 2.0263, 2.1285, 1.9925, + 2.0507, 2.0578, 2.0307, 2.1078, 2.0729, 2.0448, 2.1018, 2.0279, 2.0320, + 1.9764, 2.0579, 2.1533, 2.0485, 2.0464, 2.0832, 2.0751, 2.2815, 2.0456, + 1.9951, 2.0517, 2.1280, 1.9985, 2.1785, 2.0958, 2.1668, 2.0606, 2.1180, + 2.0829, 2.0670, 1.9896, 2.0625, 2.0223, 1.9520, 2.1020, 1.9954, 2.0594, + 1.9704, 1.9320, 2.0810, 2.0772, 1.9758, 2.0288, 2.0715, 2.0910, 1.9785, + 2.0258, 2.0914, 2.0275, 2.0663, 2.1614, 2.0998, 1.9857, 2.1790, 2.1992, + 2.0393, 2.0261, 1.9987, 2.0452, 2.0599, 2.0318, 2.0960, 2.2425, 2.1013, + 2.1393, 2.0122, 2.1288, 2.1729, 2.0566, 2.1189, 2.0796, 2.0585, 2.1067, + 2.0435, 2.1635, 2.2792, 2.1824, 2.0506, 2.0928, 2.2066, 2.0765, 1.9179, + 2.0876, 2.1804, 2.1673, 2.1951, 2.2373, 1.9701, 1.9493, 2.0041, 1.9992, + 2.2302, 2.0406, 2.0838, 2.1606, 2.0498, 2.1176, 2.0158, 2.1376, 2.1164, + 1.2271, 1.7720, 2.1755, 2.0930, 2.1836, 2.0538, 2.0824, 2.0939, 2.1511, + 2.0509, 2.0580, 2.0198, 1.9866, 1.9962, 2.0523, 2.0944, 2.1456, 2.1053, + 2.0300, 2.0940, 2.3120, 1.9547, 2.0700, 2.1009, 2.0682, 2.0459, 2.0006, + 2.2175, 1.9863, 2.2981, 2.0458, 2.0225, 1.9890, 2.0384, 2.2434, 2.0457, + 2.1980, 2.0483, 2.0478, 2.0192, 2.1184, 2.0825, 2.2390, 2.1722, 1.9870, + 2.1458, 2.0283, 1.9633, 2.0250, 2.1574, 1.9837, 2.1228, 2.0989, 2.0857, + 2.0165, 1.9334, 2.0708, 1.9785, 2.0866, 2.0367, 2.0997, 2.0162, 2.0879, + 2.0782, 2.0978, 2.0613, 2.1383, 2.0852, 2.0643, 2.2999, 2.2209, 2.0422, + 2.2045, 2.0467, 2.1153, 2.3100, 2.0153, 2.2065, 2.0402, 2.0260, 2.0171, + 2.1041, 2.1454, 2.0819, 2.0804, 2.1649, 2.1010, 2.0213, 2.0817, 2.2861, + 1.5624, 2.4553, 2.0714, 2.0337, 2.0162, 2.1285, 2.2080, 2.0755, 2.0775, + 2.0191, 2.0924, 2.0059, 2.0281, 2.0560, 2.2036, 2.1356, 2.0119, 2.0982, + 1.9969, 2.2523, 2.0878, 2.0347, 2.0952, 2.0964, 2.0247, 2.0063, 1.9721, + 2.1730, 2.1007, 1.8965, 2.0519, 2.0051, 2.1024, 2.0860, 2.0207, 2.0678, + 2.0972, 2.0524, 2.2369, 2.1333, 2.0543, 2.2793, 1.9811, 2.1158, 2.1302, + 2.0558, 2.1161, 2.0195, 2.2593, 2.0156, 2.0946, 1.8880, 2.0206, 2.1074, + 2.2498, 2.0933, 2.0475, 2.0456, 2.0136, 1.9998, 2.2418, 2.1298, 2.1151, + 2.2087, 2.0441, 2.0162, 2.0065, 2.2207, 2.0723, 1.5891, 2.0869, 2.0881, + 2.0180, 2.0379, 1.9723, 2.1504, 2.0642, 2.2587, 2.0276, 2.0433, 2.0237, + 2.1352, 2.0410, 1.9913, 1.8999, 2.1760, 2.1910, 2.2422, 2.0412, 1.9200, + 2.0412, 2.0715, 2.0873, 2.1982, 2.0002, 2.1190, 1.9513, 2.0818, 2.1398, + 2.0467, 2.1935, 2.0150, 2.1526, 2.2373, 2.0407, 2.0075, 1.9397, 2.0824, + 1.9908, 2.0283, 2.0259, 2.0223, 2.1736, 1.9523, 1.9705, 2.2646, 2.0516, + 2.0430, 2.0424, 2.0742, 2.1556, 2.0510, 2.0165, 1.9642, 2.1213, 2.0721, + 2.0460, 2.0685, 2.2526, 2.0811, 2.0153, 2.0851, 2.0620, 2.0626, 2.1310, + 2.0389, 2.2499, 2.0059, 2.0056, 2.1145, 2.1155, 1.9914, 1.9843, 2.0977, + 2.0383, 1.9961, 2.0099, 1.8014, 2.0623, 2.1228, 2.1464, 2.0297, 2.0503, + 2.1106, 2.0956, 2.1274, 1.9793, 2.1071, 2.1342, 2.0167, 1.9905, 2.1575, + 2.0514, 2.0519, 2.1893, 2.0108, 2.0159, 2.0668, 2.0728, 2.1197, 2.1696, + 1.9989, 2.2020, 2.0833, 2.3113, 2.0717, 2.0181, 2.2653, 2.1990, 2.0044, + 2.0979, 1.9749, 2.0083, 2.0437, 2.1062, 2.0969, 1.9639, 1.9803, 2.0559, + 2.1292, 1.9763, 2.0597, 2.0988, 1.8016, 2.0659, 2.1028, 2.1028, 2.2098, + 1.9922, 2.1306, 2.1300, 2.1369, 2.0740, 2.0127, 2.0872, 2.0136, 2.1810, + 1.9943, 2.0467, 2.1332, 1.9401, 1.9783, 2.0138, 2.1217, 2.0784, 2.0202, + 2.0787, 2.0651, 2.0199, 2.2011, 2.1647, 2.0053, 1.9973, 2.1077, 2.1093, + 2.1253, 2.0120, 2.0643, 2.1226, 1.9848, 1.9670, 2.1012, 1.9518, 2.0922, + 2.0492, 2.2058, 2.2500, 2.2433, 2.2611, 1.9797, 2.0808, 2.1515, 2.1605, + 2.0636, 2.0649, 2.1260, 1.9303, 2.0484, 2.2103, 2.1352, 2.0834, 2.0316, + 2.1334, 2.0103, 2.1443, 2.0279, 2.1024, 2.1013, 2.2352, 2.0015, 2.1543, + 2.0345, 2.1090, 2.2991, 2.1148, 2.0197, 2.1404, 2.9376, 2.0328, 2.0857, + 2.0746, 2.0915, 2.0639, 2.0736, 2.0799, 2.2220, 2.2648, 1.9870, 2.1855, + 2.0254, 2.0939, 1.9822, 1.9375, 2.1553, 2.1249, 2.1436, 2.0041, 2.0035, + 2.0451, 1.9603, 2.1117, 2.2223, 2.1545, 2.0233, 2.1566, 1.9525, 1.9624, + 2.0093, 2.0608, 2.1441, 2.0819, 2.0554, 2.0715, 2.0770, 2.0988, 2.3386, + 2.0327, 2.0376, 2.1202, 2.1253, 2.0856, 2.0563, 1.7493, 2.0553, 1.9948, + 2.0059, 2.2029, 2.0470, 2.1513, 2.2328, 2.0887, 2.1264, 2.0788, 1.9652, + 2.0717, 2.2406, 2.0654, 1.9399, 2.0947, 2.1596, 2.1316, 2.0074, 2.1920, + 2.0630, 2.0547, 2.1203, 2.0279, 2.0755, 2.1415, 1.9674, 2.0430, 2.1068, + 2.3021, 2.1641, 2.0975, 2.0427, 2.1529, 2.0259, 2.0686, 1.9664, 2.1563, + 2.0207, 1.9886, 2.1628])Parameter containing: +tensor([-0.1634, 0.3904, -0.6572, 0.0778, -0.3645, 0.8040, 0.6369, -0.0867, + -0.3817, -0.1613, -0.3118, 0.5072, -0.3457, -0.5480, -0.3993, -0.0154, + 0.1044, 0.0733, -0.2292, -1.6394, 0.1039, 0.4882, 0.6659, -0.5731, + 0.4084, 0.1744, -0.1819, -0.0776, -0.3350, 0.4749, 0.0453, 0.3498, + 0.0599, -0.5911, 0.0388, -0.2646, -0.3983, -0.4301, 0.5977, -0.0765, + -0.3520, -0.0992, 0.5881, 0.6972, -0.6566, -0.0760, -0.6735, -0.2202, + 0.4286, -0.1231, -0.5646, -0.3618, 0.5786, 0.4331, -0.7112, 1.5154, + 0.4482, 0.2446, -0.1697, 0.3791, 0.4552, 0.2399, 0.4756, -0.8407, + 0.2955, 0.6822, 0.5503, -0.2047, -0.2023, -1.5053, 0.9010, 0.5263, + 0.8808, 0.3741, -0.7068, -0.3806, 0.2776, 0.0435, 0.4804, -0.4777, + -0.0481, -0.3290, -0.1130, 0.5107, 0.1467, -0.3193, 0.7629, 0.2585, + -0.1659, -0.3165, -0.4949, 0.4801, 0.5671, -0.1125, -0.7146, 0.5034, + 0.7717, 0.4830, -0.5553, 0.4703, -0.1743, -0.2755, 0.3085, -0.4850, + -0.2644, 0.5301, 0.5706, 0.4006, 0.5181, -0.6555, 0.4549, 0.8119, + -0.8537, 0.5428, 0.4085, -0.2830, 0.6755, 0.5459, 0.1751, 0.0993, + 0.0719, -0.1542, -0.3314, -0.6055, -0.3381, -0.4999, -0.0787, -0.9931, + -0.0120, 0.3317, -0.3046, -0.6244, -0.6682, -0.2793, 0.4557, 0.4783, + 0.4025, -0.2780, -0.4007, 0.2884, -0.5973, 0.3956, 0.0334, 0.6921, + -0.2348, -0.1816, 0.3306, 0.4644, -0.4059, 0.2303, 0.3054, -0.5736, + 0.3553, 0.0930, -0.6138, 0.3553, -0.4840, -0.3348, -0.5161, 0.3380, + -0.5832, 0.0614, 0.5337, 0.1841, -0.1116, 0.5732, -0.9823, -0.0027, + -0.3231, 0.5131, 0.4541, -0.0448, -0.4895, 0.0861, -0.0614, 0.4743, + 0.6420, -0.0692, -0.0172, 0.6431, -0.6864, -0.6511, 0.2636, 0.1945, + 0.7223, 1.1545, -0.2667, 0.9053, -0.0978, -0.7616, -0.9102, -0.3463, + 0.6792, 0.6667, 0.1881, 0.8544, 0.5431, 0.2004, 1.1457, 0.4141, + -0.9264, -0.1053, 0.6978, 0.6861, 0.3049, 0.3733, 0.8130, 0.7921, + -0.4449, -0.0137, 0.2515, -0.5731, -0.6839, 0.7414, 0.0611, 0.0709, + -0.2592, -0.0999, -0.5076, -0.6549, 0.9194, 0.4722, -0.4555, 0.5878, + -0.0519, 0.3215, -0.2202, 0.0980, 0.5063, -0.3292, 0.2484, 0.5109, + -0.2138, 0.2214, -0.2958, -0.2670, 0.5005, 0.6784, 0.2350, 0.7536, + 0.3887, 0.7270, 0.0442, 0.0527, 0.0210, -0.5180, 0.0711, -0.5699, + -0.3708, 0.6118, -0.6395, 0.2085, 0.0284, -0.6505, 0.2182, 0.3107, + 0.3407, -0.2664, -0.3857, -0.2913, -0.6433, 0.1765, 0.1000, -0.4661, + -0.3760, -0.0993, 0.2911, 0.3472, 0.3607, 0.1198, -0.0327, 0.1065, + 0.4254, -0.4536, -0.4919, -0.3662, -0.0898, 0.5787, 0.5154, 0.0449, + 0.1505, -0.7936, -0.4062, 0.4328, -0.2997, 0.4559, -0.2560, -0.0027, + 0.4216, -0.8116, -0.2374, 0.4854, 0.5336, -0.3588, 0.4325, -0.8042, + -0.4591, -0.4535, 0.6669, 0.6368, -0.5369, 0.5936, -0.4235, -0.6996, + 0.0087, 0.2624, 0.5607, 0.6638, -0.6091, 0.2427, 0.4354, 0.0214, + -0.2835, 0.0946, -0.1392, 0.5883, 0.1297, 0.4315, 0.7669, 0.1802, + -0.5833, -0.1598, 0.4056, -0.2835, 2.4506, -0.0757, 0.6105, -0.2125, + 0.6271, -0.2360, 0.4260, -0.4225, -0.4409, -0.3974, -0.0852, 0.5301, + -0.5151, 0.3451, 0.6331, -0.0027, 0.3818, -0.2817, -0.4590, 0.6107, + -0.6834, 0.0148, -0.3686, -0.6853, 0.1435, -0.0316, 0.7759, 0.4233, + -0.0109, 0.9338, -0.3701, 0.0630, -0.1912, -0.1769, -0.5478, -0.2753, + -0.8406, -0.5571, 0.0083, -0.0551, 0.2625, 0.2223, -0.6563, 0.7482, + 0.0779, 0.5153, -0.1369, 0.1232, -0.0049, -0.9306, -0.2408, -0.5681, + 0.6130, 0.5066, 0.3246, 0.2921, -0.5615, -0.5330, -0.3602, 0.1486, + 0.1352, -0.4357, -0.2049, 0.5760, 0.2577, -0.4218, 0.6486, -0.2831, + 0.6872, 1.1954, -0.8105, 0.3491, -0.5672, -0.1094, -0.3430, -0.7002, + 0.6166, 0.5148, -0.4035, -0.2608, 0.4024, -0.5864, -0.2557, 0.2989, + 0.0739, 0.7871, 0.4411, -0.1270, 0.5860, 0.8288, 0.7889, -1.7029, + -0.2622, -0.2401, 0.4067, 0.6801, -0.5707, -0.6888, 0.3839, -0.2854, + -0.6706, 0.2172, 0.2885, -0.1456, 0.4580, 0.3836, -0.4238, -0.6184, + -0.0139, 0.5807, 0.6330, -0.3544, -0.4924, 0.5683, 0.4040, -0.2675, + 0.1704, 0.3875, 0.3749, -1.7340, 0.0225, 0.1601, -0.1486, 0.8069, + 0.4392, -0.5474, -0.5232, -0.0288, -0.5448, 0.3483, -0.2150, 0.6120, + -0.0751, -0.5416, -0.7117, 0.3526, -0.3994, -0.0146, -1.1012, -0.0727, + -0.4971, 0.3637, 0.2655, 0.4278, 0.6634, -0.0713, -0.0099, -0.1449, + -0.4338, -0.1937, -0.6498, -0.7250, 0.2271, -0.6612, 0.1220, 0.3243, + 0.1165, 0.2658, 0.6581, -0.2702, 0.1820, -0.8709, 0.3657, 0.6121, + 0.2829, -0.6148, 0.3372, 0.9084, 0.3225, -0.3238, -0.5079, -0.4441, + -0.5670, 0.3065, -0.0379, 0.6865, -0.7325, -0.6104, -0.6129, 0.3126, + 0.0805, -0.4352, 0.5677, 0.5351, -0.2273, -0.5317, -0.4505, -0.1210, + -0.8835, -0.3817, 0.8097, -0.0851, 1.0992, -0.4803, 0.3294, -0.1898, + 0.3222, 0.3128, 0.1108, 0.1788, -0.4207, -0.2907, 0.8966, -0.0309, + 0.4949, 0.6861, -0.4014, -0.2843, -0.1982, -0.0093, 0.6738, -0.6007, + 0.3616, -0.1017, 0.3762, -0.3327, 0.1810, 0.3345, -1.0147, 0.9662, + -0.1385, 0.2843, 0.4133, -0.6416, -0.2800, 0.3751, -0.6579, -0.4096, + 0.0644, 0.3496, 0.6392, -0.0599, -0.2617, -0.1823, -0.6173, 0.5599, + 0.2138, -0.0549, 0.2094, 0.6796, -0.5302, -0.1417, -0.1526, 0.5912, + 0.6554, -0.2012, -0.3520, 0.2011, 0.4550, -0.4031, 0.2536, 0.7203, + -0.5432, -0.1476, -0.7711, 0.6847, 0.2413, 0.2893, 0.7736, -0.4954, + 0.5358, -0.4769, -0.3864, -0.5988, 0.7091, 0.6538, -0.0866, -0.1456, + 0.8250, -0.0113, -0.4833, -0.0904, -0.2416, 0.2671, 0.7442, 0.7193, + -0.2374, -0.0454, -0.0685, -0.5255, 0.1050, -0.1814, 0.0709, -0.4543, + 0.4718, 0.5187, -0.6497, 0.8609, -0.3123, 0.3958, 0.0808, -0.2275, + 0.0566, 0.4994, -0.0844, 0.3117, 0.5139, -0.1189, -0.8420, 0.7102, + 0.1490, -0.1751, -0.0938, 0.5447, 0.7600, 0.1141, 0.0118, -0.0166, + -0.4014, 0.5753, 0.3237, -0.0531, -0.2725, 0.4342, 0.4947, -0.5230, + 0.7061, -0.5629, -0.5246, 0.1271, -0.6878, 0.3458, -0.0280, 0.2749, + 0.3727, 0.7445, 0.7685, 0.2294, 0.6002, -0.2364, -0.0361, 0.8746, + 0.5983, 0.7713, -0.0492, 0.0072, 0.3269, 0.4310, 0.1407, -0.4384, + -0.1729, -0.3326, 0.4758, -0.4218, 0.8020, -0.0350, -0.7215, 0.4019, + 0.9356, 0.5021, 0.5510, -0.3667, 0.4088, 0.5787, 0.5401, 0.5139, + 0.2937, 2.5777, -0.4828, -0.2321, -0.1922, 0.7759, 0.4836, -0.4644, + 0.1484, -0.9385, -0.7380, -0.1577, -0.8349, -0.3073, -0.5824, 0.5791, + -0.0707, 0.2943, -0.5056, -0.0182, 0.1451, 0.0616, 0.4046, 0.0973, + 0.4898, 0.7257, -0.2495, -0.0735, -0.8476, 0.2016, -0.0924, -0.2625, + -0.2927, -0.5473, -0.0670, -0.2398, -0.1042, 0.2342, -0.6123, -0.7766, + -0.2891, -0.1510, -0.3887, -0.8447, 0.3705, -0.3663, 0.3751, -0.3333, + 0.2739, -0.1393, -0.5013, -0.0648, 0.8232, 1.0130, -0.2885, 0.4336, + 0.4970, -0.2387, 0.1380, -0.7007, -0.7673, -0.3192, 0.2573, -0.4747, + -0.4189, -0.1628, -0.6715, 0.0745, 0.3072, 0.8509, -0.2102, -0.5690, + -0.4548, -0.0489, -0.0070, -0.0870, -0.2811, -0.6708, 0.6954, 0.4769, + -0.4482, -0.5612, -0.2993, 0.9320, 0.3225, -0.2130, 0.4533, 0.4963])Parameter containing: +tensor([[-0.0083, -0.0071, -0.0021, ..., -0.0010, 0.0090, 0.0052], + [ 0.0555, -0.0439, -0.0148, ..., 0.0150, -0.0063, 0.0137], + [ 0.0143, 0.0434, 0.0090, ..., -0.0173, 0.0094, -0.0070], + ..., + [-0.0136, -0.0252, 0.0119, ..., -0.0044, 0.0303, 0.0039], + [ 0.0051, -0.0150, -0.0075, ..., -0.0273, -0.0061, -0.0200], + [ 0.0049, -0.0019, 0.0238, ..., 0.0028, -0.0135, -0.0199]])Parameter containing: +tensor([ 1.1367, -0.1210, 0.1425, ..., -0.0217, -0.0073, -0.0175])Parameter containing: +tensor([[ 0.0033, 0.0186, -0.0174, ..., -0.0052, 0.0126, 0.0130], + [-0.0088, 0.0030, 0.0068, ..., 0.0046, -0.0482, 0.0263], + [-0.0032, 0.0138, -0.0109, ..., 0.0132, 0.0237, 0.0135], + ..., + [ 0.0033, -0.0081, -0.0091, ..., 0.0204, -0.0066, 0.0058], + [-0.0299, 0.0074, -0.0033, ..., -0.0114, 0.0144, 0.0044], + [ 0.0031, 0.0146, -0.0029, ..., 0.0022, -0.0339, -0.0151]])Parameter containing: +tensor([-4.5128e-03, 1.0891e-03, -3.3478e-02, -1.2718e-02, -6.7078e-02, + 6.7139e-02, 6.8726e-02, -1.5411e-02, 4.0192e-02, 3.3264e-02, + 2.8030e-02, -1.7822e-02, -5.0354e-02, -3.2593e-02, 8.6975e-03, + 1.7487e-02, -1.7090e-03, 4.8798e-02, -1.4435e-02, -4.2175e-02, + 2.5803e-02, 2.9007e-02, 2.6627e-03, -1.9150e-02, 3.9734e-02, + -5.0354e-02, -2.0618e-03, 2.3621e-02, 1.5732e-02, 2.6825e-02, + 1.3304e-04, 1.1345e-02, -7.6218e-03, -5.8632e-03, 2.7752e-03, + 3.4485e-02, -7.0251e-02, -5.3925e-02, -1.8692e-02, 4.6722e-02, + -7.8659e-03, -8.9264e-03, 2.0721e-02, -5.7182e-03, 1.3725e-02, + -4.4556e-03, 7.2784e-03, -1.8707e-02, -3.1403e-02, -2.4948e-03, + 1.8265e-02, 2.4246e-02, -1.9287e-02, 4.3732e-02, 1.9684e-02, + 4.5807e-02, -4.0497e-02, -1.6037e-02, 5.4291e-02, -1.3718e-02, + 8.4763e-03, -8.7585e-03, 1.9363e-02, 2.0309e-02, -3.0838e-02, + -1.6861e-02, 3.9398e-02, 2.0432e-02, -3.1796e-03, -1.1469e-01, + -2.4979e-02, -2.7313e-02, -2.8641e-02, -1.3489e-02, -1.9913e-02, + 1.3153e-02, 3.0518e-03, -2.1744e-03, 2.5864e-02, 6.5804e-03, + 2.2602e-03, 1.9440e-02, -2.1469e-02, -3.9215e-03, -1.3847e-02, + 1.6678e-02, -1.3870e-02, 3.2616e-03, -8.3466e-03, -2.1759e-02, + 2.8610e-02, 3.9215e-02, 3.1799e-02, 6.3629e-03, 3.6030e-03, + 2.7679e-02, 4.0527e-02, 5.1346e-03, -8.5144e-03, 1.0765e-02, + 6.1127e-02, 4.1870e-02, 6.3965e-02, -2.3132e-02, -4.7226e-03, + 6.6650e-02, 1.6022e-02, 2.7573e-02, 8.3084e-03, 1.3275e-02, + 2.3880e-02, 4.5654e-02, 2.8748e-02, -2.9831e-03, 4.4518e-03, + -2.8244e-02, 4.4220e-02, -5.7716e-03, -4.3762e-02, 4.9973e-03, + -1.5388e-02, -1.6312e-02, -6.7688e-02, -3.3905e-02, 5.9662e-03, + -4.7882e-02, -1.3046e-02, -3.3630e-02, -9.1324e-03, 3.1097e-02, + -1.4183e-02, 6.0081e-03, -2.5864e-02, -7.4768e-03, 2.9739e-02, + 3.4393e-02, 2.4811e-02, 2.9816e-02, -1.0612e-02, 1.4702e-02, + -4.1229e-02, 1.1559e-03, 1.6479e-02, -2.8671e-02, 1.7166e-02, + -6.2103e-02, -1.5732e-02, -6.9618e-03, 1.0216e-02, 6.4880e-02, + 3.8422e-02, 1.3800e-03, 1.3947e-02, 4.2786e-02, -8.7738e-03, + 2.3666e-02, -3.6224e-02, 6.2866e-03, 2.3880e-02, -4.7668e-02, + 1.2398e-02, -2.8549e-02, 5.6854e-02, -1.1093e-02, 4.1809e-03, + 1.5900e-02, -7.0923e-02, -1.3222e-02, -2.2984e-03, 1.9363e-02, + 1.1263e-03, 5.5176e-02, 1.3657e-03, -4.2725e-02, 2.6398e-02, + -6.1798e-02, 5.4810e-02, -6.8909e-02, -1.5808e-02, -2.3163e-02, + -2.2049e-02, -4.7668e-02, -9.6436e-03, 5.6000e-02, -1.8188e-02, + 1.4977e-02, -2.4994e-02, -3.9642e-02, 3.3295e-02, -2.5375e-02, + 1.7929e-02, 1.1589e-02, 1.4969e-02, 4.7546e-02, -3.4332e-02, + 1.5305e-02, 3.7201e-02, -3.9032e-02, -6.9641e-02, 4.6417e-02, + -1.0063e-02, -5.7144e-03, 7.8125e-03, 2.7008e-02, 2.4460e-02, + 6.2317e-02, 2.3178e-02, 2.5406e-02, 2.8976e-02, 2.5299e-02, + 1.0063e-02, 3.1708e-02, -1.2169e-02, 4.7729e-02, 1.2070e-02, + -2.1988e-02, -4.1870e-02, 4.9103e-02, -6.7749e-03, -3.1281e-02, + 1.7853e-02, 1.0002e-02, -6.2988e-02, 4.6509e-02, -3.8671e-04, + -6.1859e-02, -4.1595e-02, -9.8038e-03, 2.6657e-02, 1.9226e-02, + -1.1948e-02, 2.3865e-02, -2.6520e-02, 1.3962e-03, -2.2705e-02, + -3.2440e-02, -1.9623e-02, 1.5236e-02, -8.2779e-03, 2.0691e-02, + 1.8906e-02, 3.3112e-02, -9.6970e-03, -2.9480e-02, 1.0132e-02, + -3.9597e-03, 2.8870e-02, 1.1482e-02, -6.5346e-03, -1.7975e-02, + 6.3858e-03, 1.6129e-02, -1.6724e-02, -3.8696e-02, -1.1530e-03, + -4.0863e-02, -9.4147e-03, -1.8814e-02, -2.6245e-02, -4.2694e-02, + -7.2327e-02, 2.4548e-03, 2.0828e-02, 2.7802e-02, -1.8738e-02, + -1.2159e-03, 1.9043e-02, 3.7689e-02, -1.7334e-02, 3.0060e-02, + 1.4259e-02, -1.1581e-02, -6.1531e-03, -2.6794e-02, -4.4403e-02, + 4.0894e-02, 4.2572e-02, -2.1118e-02, 4.7028e-02, -2.6306e-02, + 1.8692e-02, -7.3303e-02, -3.6011e-02, -2.3788e-02, 6.3362e-03, + 7.7209e-03, 1.2604e-02, 4.7340e-03, 5.2307e-02, -3.3630e-02, + 3.0457e-02, -1.6266e-02, 2.8824e-02, 1.3283e-02, 1.4275e-02, + 9.7961e-03, 2.0966e-02, -1.2543e-02, -3.7262e-02, 1.6418e-02, + -2.0790e-03, -7.4997e-03, -2.3788e-02, 3.4912e-02, -1.4687e-02, + 1.8326e-02, 3.1006e-02, 6.1569e-03, 3.8177e-02, 5.0259e-04, + -2.3861e-03, -5.2299e-03, -4.9133e-03, 5.3101e-03, 2.4094e-02, + -4.6806e-03, 3.0090e-02, 1.5926e-03, 1.7303e-02, -1.1276e-02, + -4.8027e-03, -6.8245e-03, -2.7206e-02, 4.4342e-02, -1.5527e-01, + -6.2744e-02, 5.3139e-03, -4.3526e-03, 3.7323e-02, 1.3878e-02, + -7.0610e-03, -3.3478e-02, 1.8244e-03, -3.2501e-02, -5.3528e-02, + -4.8645e-02, -4.3755e-03, -1.1063e-02, 3.5114e-03, -4.6387e-03, + 1.9562e-02, 2.8488e-02, -3.0243e-02, 1.0025e-02, -1.1663e-03, + 4.0771e-02, -8.8501e-03, 4.4647e-02, -3.5858e-02, 8.3160e-04, + -2.9831e-02, 3.0079e-03, 7.6660e-02, 9.9548e-02, -2.1179e-02, + -9.8572e-03, 6.5231e-04, -4.4128e-02, 2.3819e-02, 3.6888e-03, + -1.6006e-02, 1.4183e-02, -3.2013e-02, -1.5976e-02, 1.1642e-02, + -2.1935e-03, 1.7029e-02, 2.9251e-02, 1.3626e-02, 1.7410e-02, + 4.1077e-02, -6.7749e-02, -1.6632e-02, -2.3499e-02, 1.6891e-02, + 4.2000e-03, 1.7807e-02, 5.8197e-02, -4.5959e-02, -3.3722e-02, + -4.2496e-03, 3.6102e-02, -6.9580e-02, -2.7054e-02, -3.7903e-02, + -1.0242e-03, 1.3550e-02, 3.8177e-02, 1.6830e-02, -3.2187e-05, + -2.9358e-02, -1.7502e-02, 2.5772e-02, 4.2343e-03, 3.1281e-02, + 2.5223e-02, 2.3315e-02, 4.6356e-02, -1.2512e-02, -4.5837e-02, + -2.6962e-02, 5.2765e-02, -4.8103e-03, 2.2003e-02, 3.4356e-04, + -6.4392e-03, -6.1893e-04, -1.8494e-02, -2.4506e-02, 7.8344e-04, + -1.0498e-02, -2.3361e-02, -2.0096e-02, -2.3056e-02, 2.1317e-02, + -1.8481e-01, 3.2349e-02, 1.8555e-02, 4.7546e-02, -2.4323e-02, + -1.4732e-02, -1.5671e-02, 8.3237e-03, -3.6041e-02, -1.2108e-02, + 2.9030e-03, 3.0716e-02, -9.8648e-03, 3.7292e-02, 2.7710e-02, + -6.7566e-02, 3.8910e-02, 2.6230e-02, 3.8422e-02, 4.9011e-02, + -3.3150e-03, 4.1809e-03, 5.4588e-03, 5.0079e-02, -3.3508e-02, + 1.4694e-02, -7.9956e-03, 2.1408e-02, -1.5918e-01, 3.6438e-02, + -3.8239e-02, -4.7455e-03, 2.0096e-02, 2.3666e-02, -1.4549e-02, + -5.8556e-03, 2.7847e-02, -6.0303e-02, 1.0347e-03, 1.9241e-02, + 1.4847e-02, 1.0292e-02, 2.9163e-03, 3.0258e-02, -6.1615e-02, + 1.5434e-02, 1.9226e-02, -1.7914e-02, 1.5945e-02, -3.0228e-02, + -1.3977e-02, -7.9203e-04, -6.0852e-02, 2.8381e-02, 2.0416e-02, + -3.9154e-02, -4.3396e-02, -2.5238e-02, 1.8539e-02, -2.5787e-02, + 5.3528e-02, 3.0365e-03, 1.9028e-02, 1.6724e-02, 6.2347e-02, + -8.5373e-03, 3.2593e-02, -2.9129e-02, 2.1988e-02, -3.9215e-02, + 2.8473e-02, -9.6130e-03, -1.7288e-02, -1.4145e-02, -1.7014e-02, + 2.1423e-02, 1.2009e-02, -4.2877e-02, 5.3902e-03, -7.6477e-02, + 2.2354e-03, 5.6343e-03, 1.1192e-02, -5.4413e-02, 2.8519e-02, + 5.8022e-03, -1.9897e-02, -2.0561e-03, 6.8474e-04, 2.5528e-02, + 1.5205e-02, 2.7374e-02, 4.5074e-02, -3.3783e-02, -8.5815e-02, + -8.3237e-03, 7.4768e-03, -1.8661e-02, -1.6632e-02, 3.5461e-02, + -3.4943e-02, 3.0994e-06, 1.9165e-02, 7.0610e-03, -4.0588e-02, + 9.1457e-04, -3.0479e-03, -7.0801e-03, 8.1558e-03, -2.4689e-02, + 8.8272e-03, -1.7319e-02, -2.9465e-02, 3.0746e-02, -2.2110e-02, + 6.3110e-02, -4.1626e-02, 1.1772e-02, -4.8615e-02, 4.4708e-02, + 1.3359e-02, -3.3569e-02, -2.0752e-02, 7.0839e-03, 2.4223e-03, + -4.3549e-02, 3.9551e-02, -2.1988e-02, -1.8936e-02, 1.8784e-02, + 1.0193e-02, 1.0780e-02, 4.6616e-03, 3.2104e-02, 3.0472e-02, + -9.6130e-02, -7.0381e-03, -2.5883e-03, -5.7335e-03, -4.1382e-02, + -1.6769e-02, 6.8542e-02, -1.2489e-02, -3.9902e-03, 1.3103e-03, + -1.9669e-02, 5.8403e-03, -2.5345e-02, -3.3539e-02, 4.8340e-02, + 4.8187e-02, 4.1718e-02, 2.8198e-02, 3.8879e-02, -1.8631e-02, + 4.6814e-02, 1.6891e-02, -9.8953e-03, 4.2084e-02, 6.9122e-03, + 4.2725e-02, -6.0501e-03, 4.5624e-03, 1.1833e-02, -5.6824e-02, + 2.8095e-03, 2.9083e-02, 1.3138e-02, 1.4244e-02, 6.1798e-03, + 3.9795e-02, -2.0386e-02, 2.1988e-02, 5.4169e-02, 4.3373e-03, + -2.5040e-02, -3.0088e-04, 1.6937e-02, 7.5340e-04, 3.2135e-02, + -4.7913e-02, -2.1027e-02, -6.6040e-02, 4.8103e-03, -1.8738e-02, + -3.2158e-03, 2.6688e-02, -2.9816e-02, -1.7090e-02, 9.7198e-03, + 1.9272e-02, -5.0011e-03, -4.9103e-02, 1.7654e-02, 2.5436e-02, + 1.2337e-02, 4.6387e-03, 2.5879e-02, 2.0157e-02, 1.0941e-02, + -2.4048e-02, -6.4850e-03, 1.8524e-02, -2.9507e-03, -2.2602e-03, + 4.8157e-02, 1.4801e-02, -1.5381e-02, 1.4893e-02, 5.7697e-04, + -4.5807e-02, 9.9487e-03, -1.2199e-02, 5.0140e-02, -1.4999e-02, + 5.7709e-02, -2.1133e-02, 1.0757e-02, 2.0966e-02, 3.1281e-02, + -3.9024e-03, 4.7989e-03, 5.2704e-02, 2.0828e-02, -1.0963e-02, + -1.3313e-02, -2.4689e-02, -6.2927e-02, 5.5275e-03, 5.9128e-04, + 2.9037e-02, -4.2847e-02, 1.0155e-02, 5.3329e-03, 1.4275e-02, + 2.9663e-02, -2.4902e-02, 1.9085e-04, 3.0334e-02, 4.3396e-02, + 1.7212e-02, -4.9927e-02, -3.1921e-02, 2.3834e-02, 8.0505e-02, + -1.4977e-02, -2.0355e-02, -2.7008e-02, -2.5299e-02, -4.6272e-03, + 6.6772e-02, 1.6083e-02, -2.0142e-03, 2.4948e-02, -1.7349e-02, + -4.0398e-03, -1.9791e-02, 3.9917e-02, -1.6754e-02, 1.6830e-02, + 2.8000e-03, -1.3115e-02, -2.3819e-02, -9.0866e-03, 1.6571e-02, + -1.1642e-02, 1.4746e-01, -4.8126e-02, -2.1484e-02, -7.6637e-03, + 5.6030e-02, -3.9917e-02, 1.5289e-02, 1.1955e-02, 1.0445e-02, + 1.8097e-02, -5.3894e-02, -6.5651e-03, 1.0391e-02, 2.6566e-02, + 5.8350e-02, 2.1637e-02, 3.3936e-02, 1.3794e-02, 2.1118e-02, + -1.4984e-02, 1.7990e-02, 5.3619e-02, -1.8127e-02, 6.2378e-02, + -2.3651e-02, -8.1177e-03, 1.4168e-02, -3.0956e-03, 5.5237e-03, + 3.3783e-02, 2.7252e-02, 2.4902e-02, -2.5757e-02, -1.6632e-02, + -4.6021e-02, -2.4963e-02, 6.0028e-02, 2.2552e-02, -1.0608e-01, + -1.7654e-02, 1.4448e-03, -3.8483e-02, -3.5461e-02, -4.3976e-02, + -9.4376e-03, 5.6549e-02, -4.8645e-02, -1.4091e-02, 6.5002e-03, + -1.1063e-02, -2.4597e-02, 3.9886e-02, 1.5762e-02, 3.9307e-02, + 1.3527e-02, 2.9892e-02, -1.1322e-02, 1.5945e-02, -2.3911e-02, + 1.8356e-02, -1.0468e-02, -1.6129e-02, -8.1863e-03, 1.3138e-02, + 3.9581e-02, -5.4169e-02, 4.7333e-02, 3.4821e-02, -1.4366e-02, + 1.0414e-03, -1.4893e-02, -5.1544e-02, 3.5156e-02, 3.6072e-02, + -1.8501e-03, 8.4000e-03, -1.7441e-02, -9.5215e-03, -1.2230e-02, + -1.0429e-02, 1.2161e-02, -1.2169e-02, -2.6215e-02, -9.1374e-05, + -8.3847e-03, 1.2932e-02, 3.9246e-02])Parameter containing: +tensor([1.4201, 1.5553, 1.4466, 1.4176, 1.4067, 1.4397, 1.3864, 1.3708, 1.4152, + 1.3614, 1.4378, 1.3843, 1.3444, 1.4626, 1.3758, 1.4693, 1.4095, 1.5620, + 1.4866, 0.4077, 1.4047, 1.3871, 1.4473, 1.4040, 1.5196, 1.4607, 1.4180, + 1.3462, 1.5646, 1.3797, 1.4029, 1.3888, 1.4918, 1.4661, 1.4651, 1.4938, + 1.3918, 1.4911, 1.4575, 1.4223, 1.4761, 1.4732, 1.4389, 1.4287, 1.4739, + 1.3806, 1.3968, 1.4439, 1.3657, 1.4691, 1.4194, 1.4101, 1.4352, 1.4645, + 1.4784, 1.4505, 1.3830, 1.6047, 1.4694, 1.4865, 1.4252, 1.3965, 1.4197, + 1.3777, 1.3788, 1.4218, 1.3675, 1.4547, 1.4370, 1.6006, 1.4216, 1.4492, + 2.2403, 1.3783, 1.4818, 1.4228, 1.4079, 1.4081, 1.5440, 1.5037, 1.3940, + 1.4326, 1.3924, 1.4796, 1.4971, 1.3687, 1.5182, 1.3814, 1.3834, 1.4392, + 1.4455, 1.4210, 1.3747, 1.4317, 1.4766, 1.3185, 1.4140, 1.3892, 1.3706, + 1.5666, 1.4581, 1.3999, 1.3989, 1.3766, 1.4173, 1.4575, 1.4027, 1.4818, + 1.3969, 1.4545, 1.4167, 1.4052, 1.2946, 1.3471, 1.4722, 1.3740, 1.4010, + 1.3432, 1.4888, 1.4517, 1.4854, 1.4591, 1.4632, 1.4699, 1.3971, 1.4651, + 1.4098, 1.4119, 1.4666, 1.4620, 1.4262, 1.3439, 1.3321, 1.4214, 1.4017, + 1.4997, 1.5333, 1.4508, 1.4351, 1.3742, 1.4386, 1.4162, 1.4854, 1.4447, + 1.3915, 1.3794, 1.4169, 1.4852, 1.4739, 1.4276, 1.3996, 1.3800, 1.4374, + 1.4500, 1.3956, 1.1616, 1.4410, 1.5182, 1.4307, 1.4148, 1.4508, 1.3891, + 1.4194, 1.5416, 1.4519, 1.4224, 1.4381, 1.4547, 1.3615, 1.4564, 1.4398, + 1.3899, 1.3217, 1.4564, 1.5274, 1.4199, 1.4670, 1.5066, 1.5204, 1.3846, + 1.3979, 1.4862, 1.4596, 1.4462, 1.5558, 1.4122, 1.3897, 1.8632, 1.5444, + 1.5091, 1.4304, 1.3688, 1.4861, 1.4807, 1.4606, 1.4240, 1.4124, 1.4525, + 3.8975, 1.3556, 1.2548, 1.4661, 1.4985, 1.4950, 1.4275, 1.4643, 1.3548, + 1.3323, 1.4001, 1.4308, 1.4286, 1.3500, 1.3509, 1.3783, 1.4309, 1.5165, + 1.4920, 1.3666, 1.3648, 1.4776, 1.4400, 1.4439, 1.4176, 1.3889, 1.4822, + 1.3767, 1.4823, 1.5177, 1.3962, 1.4796, 1.4227, 1.4678, 1.4844, 1.4738, + 1.4363, 1.4863, 1.3920, 1.3446, 1.4588, 1.4765, 1.4450, 1.3699, 1.4560, + 1.4734, 1.5286, 1.4649, 1.4107, 1.4684, 1.4413, 1.4883, 1.3877, 1.4846, + 1.3845, 1.3414, 1.5221, 1.4007, 1.4440, 1.4092, 1.3943, 1.3575, 1.4469, + 1.3563, 1.3385, 1.4539, 1.4622, 1.4092, 1.4660, 1.4254, 1.4688, 1.3586, + 1.4422, 1.4098, 1.5499, 1.3933, 1.4258, 1.2855, 1.4232, 1.3333, 1.4687, + 1.3916, 1.4269, 1.5470, 1.5170, 1.4956, 1.5049, 1.4062, 1.4651, 1.5575, + 1.4794, 1.3587, 1.4639, 1.5001, 1.5203, 1.3703, 1.4702, 1.3974, 1.4340, + 1.4630, 1.5828, 1.4600, 1.4888, 1.4105, 1.5150, 1.4381, 1.3933, 1.3613, + 1.3687, 1.3838, 1.3728, 1.4708, 1.4796, 1.4643, 1.3512, 1.3890, 1.5210, + 1.5169, 1.3263, 1.3799, 1.4009, 1.5255, 1.4198, 1.4169, 1.3880, 1.3731, + 0.8802, 1.2407, 1.5166, 1.4331, 1.4277, 1.4972, 1.3723, 1.4263, 1.3627, + 1.3381, 1.4444, 1.3821, 1.3215, 1.3868, 1.3506, 1.4783, 1.4441, 1.3980, + 1.4687, 1.4535, 1.5312, 1.4153, 1.4329, 1.4587, 1.4845, 1.4896, 1.4546, + 1.4669, 1.3970, 1.6142, 1.5081, 1.4518, 1.4433, 1.4726, 1.5298, 1.4506, + 1.4577, 1.3823, 1.4399, 1.3515, 1.5149, 1.4061, 1.3733, 1.4142, 1.4033, + 1.4039, 1.4585, 1.5243, 1.2887, 1.4220, 1.3885, 1.3685, 1.5446, 1.4967, + 1.3723, 1.3086, 1.4316, 1.5888, 1.4014, 1.4572, 1.4306, 1.3706, 1.3952, + 1.4537, 1.3702, 1.3470, 1.4911, 1.3435, 1.4427, 1.3334, 1.4723, 1.4442, + 1.4298, 1.4902, 1.5190, 1.5378, 1.3713, 1.4796, 1.3502, 1.3679, 1.4412, + 1.4243, 1.4387, 1.4598, 1.4295, 1.3602, 1.4251, 1.3908, 1.4348, 1.3431, + 1.2924, 2.2007, 1.4737, 1.5134, 1.4332, 1.4222, 1.4245, 1.4097, 1.4176, + 1.4686, 1.4317, 1.3341, 1.4361, 1.4114, 1.4940, 1.4381, 1.4572, 1.4983, + 1.4450, 1.3799, 1.2985, 1.3759, 1.4010, 1.3972, 1.3854, 1.4735, 1.4090, + 1.4359, 1.4176, 1.1470, 1.3816, 1.4783, 1.4746, 1.4164, 1.4836, 1.3951, + 1.4659, 1.3982, 1.4574, 1.4124, 1.4343, 1.4261, 1.3920, 1.4123, 1.4116, + 1.4015, 1.4180, 1.4316, 1.3637, 1.3466, 1.3692, 1.5015, 1.4733, 1.4815, + 1.4506, 1.4205, 1.5465, 1.3860, 1.4599, 1.4610, 1.4319, 1.3867, 1.4277, + 1.4058, 1.5038, 1.3254, 1.4081, 1.4177, 1.5158, 1.1846, 1.4519, 1.4454, + 1.4294, 1.4399, 1.4583, 1.4504, 1.3279, 1.4267, 1.4588, 1.5297, 1.4256, + 1.4696, 1.3555, 1.3443, 1.4276, 1.4016, 1.4142, 1.5254, 1.4633, 1.4104, + 1.3277, 1.4271, 1.3069, 1.5239, 1.3506, 1.4531, 1.3500, 1.5408, 1.4959, + 1.4397, 1.4760, 1.4411, 1.4869, 1.3990, 1.3856, 1.3908, 1.4489, 1.3610, + 1.4872, 1.4184, 1.4228, 1.4071, 1.3704, 1.3442, 1.4610, 1.3600, 1.4608, + 1.3470, 1.4855, 1.3853, 1.4027, 1.4836, 1.5157, 1.4284, 1.4205, 1.5115, + 1.5605, 1.3542, 1.3765, 1.3660, 1.4906, 1.3845, 1.4881, 1.5181, 1.4794, + 1.3891, 1.3431, 1.3882, 1.5148, 1.4450, 1.4678, 1.3192, 1.3673, 1.4189, + 1.4483, 1.4242, 1.3967, 1.3293, 1.4184, 1.4218, 1.4289, 1.5523, 1.4417, + 1.3730, 1.4459, 1.4928, 1.4107, 1.4603, 1.4857, 1.4236, 1.4498, 1.4567, + 1.4311, 1.4568, 1.3501, 1.4661, 1.5013, 1.4657, 1.4661, 1.4130, 1.4475, + 1.4720, 1.4931, 1.4820, 1.4778, 1.5414, 1.5453, 1.6388, 1.3688, 1.4117, + 1.4119, 1.4349, 1.4636, 1.4066, 1.4177, 1.4517, 1.3720, 1.5074, 1.5475, + 1.4340, 1.3701, 1.4308, 1.4670, 1.6579, 1.4371, 1.4845, 1.4797, 1.3362, + 1.3730, 1.4448, 1.4329, 1.5366, 1.3915, 1.3983, 1.5381, 1.5082, 1.4285, + 1.4790, 1.4179, 1.4467, 1.3747, 1.4305, 1.4426, 1.3684, 1.4587, 1.5358, + 1.5565, 1.4905, 1.3312, 1.3485, 1.5212, 1.4619, 1.3691, 1.4415, 1.3194, + 1.3221, 1.3496, 1.4874, 1.5061, 1.3922, 1.2938, 1.4972, 1.4190, 1.4368, + 1.5890, 1.3690, 1.4215, 1.4396, 1.5313, 1.4531, 1.5123, 1.4330, 1.4697, + 1.3048, 1.3517, 1.4730, 1.5835, 1.3724, 1.4596, 1.4266, 1.4551, 1.4752, + 1.5286, 1.3820, 1.4288, 1.5007, 1.4162, 1.3557, 1.4200, 1.3324, 1.3406, + 1.4550, 1.4577, 1.5262, 1.3809, 1.3025, 1.4433, 0.3834, 1.5315, 1.4227, + 1.4417, 1.3479, 1.3955, 1.4660, 1.4970, 1.4552, 1.3603, 1.4358, 1.5342, + 1.4399, 1.4024, 1.3257, 1.4345, 1.4759, 1.3737, 1.3923, 1.3939, 1.4668, + 1.5472, 1.3777, 1.2935, 1.3241, 1.5129, 1.4817, 1.4110, 1.3857, 1.4226, + 1.4402, 1.4090, 1.3844, 1.4178, 1.4924, 1.3791, 1.4072, 1.4571, 1.4994, + 1.4460, 1.4087, 1.4387, 1.4671, 1.4868, 1.3603, 1.2742, 1.4785, 1.4911, + 1.5177, 1.3701, 1.4761, 1.4919, 1.4244, 1.3797, 1.4817, 1.3420, 1.5247, + 1.4447, 1.4205, 1.4528, 1.2608, 1.4320, 1.3855, 1.4680, 1.4616, 1.4392, + 1.4488, 1.5083, 1.6081, 1.4801, 1.4033, 1.4737, 1.5151, 1.4830, 1.4994, + 1.4744, 1.3404, 1.4201, 1.3889, 1.5779, 1.3445, 1.4092, 1.3881, 1.4861, + 1.4624, 1.2567, 1.4012])Parameter containing: +tensor([ 7.1926e-02, 9.8689e-02, -1.2303e-01, -2.5298e-02, 3.4331e-02, + 1.2279e-01, 7.0546e-02, 7.1920e-02, -1.2536e-01, 1.0271e-02, + 5.1308e-02, -2.0179e-02, -9.3063e-02, -2.3841e-02, -6.1843e-02, + -5.7950e-02, -2.1823e-02, -4.0402e-02, -4.8366e-02, 4.5095e+00, + -8.1055e-02, 3.5031e-02, 5.1826e-03, -8.3012e-03, -2.8866e-02, + 2.0101e-02, -1.2298e-02, 2.3551e-03, 2.6563e-02, -9.2846e-02, + 3.9427e-02, 7.3944e-03, 3.6174e-02, 7.0540e-02, 1.0127e-01, + -7.3228e-02, 1.6834e-02, -4.9488e-02, -1.2337e-01, 3.9531e-02, + 2.1712e-02, -6.2757e-03, -1.2862e-02, 2.7663e-02, -4.1635e-02, + 1.9606e-03, 4.4968e-02, -2.3781e-02, 7.2327e-03, 4.9955e-02, + 4.4026e-02, -1.3647e-02, 2.4914e-02, 4.7459e-02, 6.1771e-02, + 4.8846e-02, -9.5908e-02, -5.7176e-02, 3.2771e-02, 2.4921e-02, + 4.3089e-02, -5.9890e-02, 4.0627e-02, 3.5825e-02, 8.3920e-02, + -7.3116e-03, 6.8076e-02, 9.7614e-02, -3.1050e-03, 1.9702e-01, + 2.8901e-02, 3.4745e-02, 2.8137e-01, 1.2861e-01, -8.9629e-02, + 7.5863e-02, 5.6941e-02, -1.0249e-01, -5.3639e-02, 4.9301e-02, + 4.0120e-03, -3.8715e-02, -1.4343e-02, 1.1497e-01, 2.3780e-02, + 4.0012e-02, 1.2210e-01, 7.2994e-02, -1.2727e-02, 1.7420e-02, + 6.5306e-02, 7.4932e-03, 4.0247e-02, -5.5433e-02, -2.5412e-02, + 4.9268e-02, -1.1561e-02, -6.6962e-02, -5.6268e-02, -2.7171e-02, + 3.4363e-02, -1.0745e-01, 3.3880e-02, -6.8456e-02, 7.3153e-02, + 1.5285e-02, -1.0217e-02, -6.7162e-02, 2.9075e-02, -7.0045e-02, + -4.0472e-02, -2.5368e-02, 4.4549e-02, 5.6120e-02, 3.2623e-02, + -5.4795e-02, 2.6527e-02, -5.5896e-02, -1.6000e-02, 1.0200e-01, + -4.4206e-02, -1.1714e-01, -6.5153e-02, -1.2306e-02, -1.4154e-01, + 2.1956e-02, -1.1985e-01, -1.2669e-02, 2.0597e-02, -4.3890e-02, + -1.2176e-01, 4.0412e-02, -5.9347e-02, -2.5230e-02, -5.3073e-02, + 4.3119e-02, -3.1720e-02, -9.3149e-04, -4.3995e-02, -9.7696e-03, + 3.2038e-02, 1.4291e-03, 4.9957e-02, 4.6637e-02, 5.3679e-02, + -1.2209e-02, -1.9658e-02, -2.5437e-02, 5.6141e-03, 1.0341e-01, + 2.0419e-01, -5.7293e-02, -1.0318e-01, -1.5183e-03, -5.2509e-02, + -1.9216e-01, -4.7395e-02, -5.7462e-02, 4.7400e-02, 7.5755e-03, + 1.4958e-01, -1.2968e-01, 3.6259e-03, 2.0252e-02, 1.0480e-02, + 1.7640e-02, -4.4658e-02, 2.1554e-02, -4.3620e-02, 4.1143e-02, + -4.8213e-02, 6.6094e-02, 4.4460e-02, 4.8178e-02, 2.9246e-02, + -7.9401e-02, 1.5524e-02, 5.2811e-02, -5.4107e-02, 7.2643e-03, + -2.3653e-02, -6.1912e-02, 4.2795e-02, -3.0012e-02, 1.7330e-02, + -1.7558e-01, 3.3922e-02, -2.2670e-01, -3.1409e-02, -7.9016e-02, + 1.7179e-03, -1.7622e-02, 9.6026e-02, 5.8238e-02, -1.0110e-01, + 2.8476e-02, -1.9550e-02, -1.1975e-01, -3.5531e-02, 5.4143e-02, + -1.2903e-01, -2.2033e-02, -3.3625e-02, 2.7479e-02, 3.7191e-02, + 1.0223e-01, -1.0936e-01, 8.2673e-02, 1.2283e-01, 2.0124e-02, + 3.4005e-03, 1.8433e-01, -1.0718e-02, -4.5526e-02, -5.3880e-02, + -3.0684e-02, -3.0079e-02, 6.1851e-02, -1.0747e-01, -1.5114e-01, + -2.1672e-02, -3.4361e-02, 7.0627e-02, -1.8793e-02, 2.9893e-02, + -3.4514e-02, 8.0450e-02, -1.5180e-03, 1.1799e-01, -3.8760e-02, + -3.0789e-03, 1.6182e-02, -3.7646e-02, 7.8771e-02, -2.8594e-02, + 4.2433e-02, -6.7604e-02, 5.3816e-02, 6.7646e-02, 1.8357e-02, + 8.8243e-02, 6.2018e-02, 3.7490e-02, 5.1684e-02, 3.0710e-02, + 4.0863e-02, 7.8782e-02, -5.9085e-02, -4.1544e-03, -1.7084e-02, + 4.6805e-02, 4.1792e-04, 6.9342e-02, -7.3616e-02, 9.0404e-02, + -6.6740e-02, -6.1122e-02, -8.2978e-02, 3.7532e-02, 6.6200e-03, + -7.2245e-02, -5.4751e-02, -7.9822e-03, -1.0696e-01, -1.4899e-02, + 4.4273e-02, -2.2897e-02, -1.2173e-02, -7.6203e-02, -1.0068e-02, + -2.9959e-02, 4.3258e-02, 5.1004e-02, -4.1525e-02, 4.9007e-03, + 3.9915e-02, -1.2598e-01, 2.5546e-02, 3.7653e-02, -8.7241e-02, + 1.8375e-01, 1.8118e-01, -4.7049e-04, 2.6604e-02, 2.1197e-02, + 4.7143e-02, 8.6612e-02, -5.3318e-02, 1.3800e-01, -9.7973e-02, + -9.6148e-03, -2.5540e-02, 1.1848e-01, 3.4482e-02, 4.3729e-02, + 2.8187e-03, 1.2743e-01, 5.8442e-02, 7.1207e-03, 3.7814e-02, + 3.9278e-02, 2.5866e-02, -2.0183e-02, 1.0628e-01, 6.9019e-03, + 6.3032e-03, 8.7620e-02, -6.0641e-02, 8.8681e-02, -3.2896e-02, + 4.7297e-02, 1.7168e-02, -5.5006e-02, -6.5152e-02, 3.4500e-04, + -9.7153e-02, 1.8775e-03, 4.4805e-02, 1.4104e-01, -4.3080e-02, + -8.6769e-02, 1.9695e-01, -8.2901e-02, -2.9552e-02, 1.0799e-01, + -2.1573e-01, 3.3186e-02, 4.6826e-02, 6.3347e-02, -1.4726e-01, + 1.4905e-01, -3.3903e-02, 1.3529e-01, -4.4907e-02, 3.0754e-02, + 8.4923e-02, -1.1759e-02, -5.3177e-02, 1.8132e-03, 1.7820e-03, + -7.6560e-02, 3.6070e-02, -1.5777e-02, -3.2811e-02, 9.8611e-02, + 4.6915e-02, 6.5758e-02, 6.3504e-02, 3.6216e-02, 1.3813e-02, + -6.7687e-02, 4.9432e-02, 1.5750e-02, 2.6505e-04, -4.4287e-02, + -4.7489e-02, -4.3150e-02, 2.3563e-02, 7.1404e-03, -7.6061e-02, + 4.5241e-02, 8.6304e-03, 3.8552e-02, -7.6021e-02, -1.8180e-02, + -4.3500e-02, 4.0450e-02, 1.1016e-01, -7.8001e-03, 3.5547e-02, + 8.0275e-02, -4.0487e-02, -7.4191e-02, -9.4242e-03, -2.3875e-02, + 1.7037e-02, 8.3679e-02, 7.0398e-02, -1.3054e-02, -1.6851e-02, + -1.0129e-01, 7.7011e-02, 1.8186e-02, -1.5612e-02, -2.3688e-02, + -1.5667e-02, -3.0407e-02, -6.1633e-02, -7.1531e-02, -5.1729e-02, + 2.9326e-02, -3.0786e-02, -5.9341e-02, -1.3668e-03, 3.4147e-02, + 8.1444e-04, 1.3175e-02, 2.3305e-02, -8.5761e-03, 6.6633e-02, + 1.0857e-01, -4.4033e-02, 6.4443e-02, 4.3963e-02, -1.0024e-01, + -1.2691e-01, -6.5507e-02, 3.2509e-02, -1.7542e-02, -2.8070e-02, + -6.9461e-02, 5.4981e-02, 1.0495e-01, -7.2139e-02, -4.5150e-02, + -1.3194e+00, -2.1716e-02, 1.6033e-02, -2.0474e-02, -1.4458e-02, + -1.7178e-02, 5.0754e-03, 4.1188e-03, -5.8843e-02, -6.4931e-02, + 3.5215e-02, -1.2734e-01, -1.0457e-01, 3.7082e-02, -3.0661e-02, + -8.1785e-02, -6.2548e-02, -2.9710e-02, 6.4331e-02, 7.1621e-02, + -2.1610e-02, 2.6836e-02, 5.8310e-02, 1.0695e-01, -7.6603e-02, + -2.8282e-02, -5.2149e-02, -5.0183e-02, 1.3031e-01, 1.0361e-01, + -1.1657e-01, -1.3109e-03, -7.0763e-02, 1.3312e-01, 1.7073e-02, + -6.3133e-03, 7.3985e-03, -1.0757e-01, 4.0777e-02, 5.6490e-02, + 1.1158e-02, 1.7426e-02, 3.9671e-02, -1.4881e-02, -8.9648e-02, + 3.4174e-02, 4.1025e-02, -8.3607e-02, 1.4398e-02, -3.9054e-03, + -1.1896e-02, 3.6092e-02, -6.1125e-02, 7.3290e-02, 2.2875e-02, + 2.2129e-02, -1.0867e-01, -5.2400e-02, 5.9108e-03, -4.7872e-02, + 4.4676e-02, 1.6246e-01, 8.7665e-02, 7.1226e-03, 9.3367e-02, + 1.0172e-01, -1.0781e-01, 2.2360e-02, 1.4627e-01, -7.5511e-02, + -3.8149e-02, -1.0550e-02, 1.2566e-01, 8.7934e-02, 2.7694e-02, + 4.6231e-02, 9.8059e-03, 1.1057e-02, -1.2549e-02, -1.2867e-01, + 3.2761e-02, -6.6936e-02, 5.8947e-02, -3.9928e-02, 2.3036e-02, + 2.4065e-02, -3.8741e-02, 5.7200e-02, -4.8267e-02, -5.7932e-02, + 4.2671e-02, 1.0091e-01, 6.2769e-02, 1.2278e-02, 4.0642e-02, + -9.1265e-02, -1.0523e-02, -4.5142e-02, 5.2563e-02, -7.1983e-02, + 4.8271e-03, 5.9733e-02, 1.5306e-01, 1.2165e-01, -7.3451e-02, + -4.4362e-02, -5.2985e-03, 1.2389e-01, -1.8968e-02, -4.3705e-02, + -7.5750e-03, 1.7417e-03, -1.3828e-02, 6.1293e-03, -6.8898e-02, + 2.5053e-02, 3.2660e-02, -2.2871e-02, -6.5013e-02, -3.3281e-02, + 1.6938e-02, 5.2703e-03, 1.7473e-02, -4.0848e-02, -6.3855e-02, + -4.4330e-02, 1.2664e-02, -1.8142e-02, -3.0737e-03, 3.5341e-04, + -3.6272e-02, 6.4434e-02, 8.5610e-03, 5.5707e-02, 3.1020e-02, + 5.4425e-02, -1.0614e-02, -6.8428e-02, 5.0324e-02, -2.5363e-02, + -1.7314e-02, 2.0876e-02, -3.0069e-02, -9.7373e-03, 8.5768e-02, + -6.7738e-02, 1.0936e-01, 2.3543e-02, 2.8994e-02, 1.1516e-01, + 3.2016e-02, 2.9510e-02, 1.9065e-02, 5.2079e-02, 4.7913e-02, + 6.7791e-02, 1.1473e-02, -8.5581e-02, -3.5063e-02, 7.8980e-02, + 2.3897e-02, 8.8964e-03, 8.0085e-02, 4.4945e-02, -2.8048e-02, + -1.1211e-01, 1.1271e-03, 6.7566e-02, 1.0283e-01, 8.2337e-02, + 1.4015e-02, 3.6052e-02, -8.8143e-02, 3.8836e-02, -1.3229e-03, + 2.0368e-02, 1.0822e-01, 1.0346e-01, -4.8797e-02, 7.9168e-03, + -3.0540e-02, 4.3786e-02, 3.0310e-02, -2.8381e-02, 4.6088e-02, + -8.1171e-02, -9.7811e-02, -4.9416e-03, 2.1832e-02, 1.2438e-02, + -4.1425e-02, 7.7462e-02, -7.7581e-02, 4.9547e-02, -4.6468e-02, + 7.3378e-02, 1.7769e-02, 3.7301e-04, -2.8760e-02, -3.4329e-02, + -9.3701e-02, 2.8186e-02, 7.1374e-02, 2.5266e-02, -1.4843e-02, + 6.4047e-02, 1.9724e-04, 2.2431e-02, 2.5484e-02, 2.5640e-02, + -3.9437e-02, 5.5480e-03, -2.8331e-02, 5.9693e-02, -2.6961e-02, + 1.3684e-01, -7.8911e-03, 8.4968e-02, 1.5320e-01, 3.1721e-02, + 2.1825e-02, -2.4057e-02, -9.8858e-03, 6.1091e-02, -6.6224e-02, + 1.3962e-02, -3.9338e-02, 5.9961e-02, -3.8697e-02, 3.3387e-02, + 7.4055e-03, 3.7146e-02, 7.8271e-02, 5.5880e-02, 1.4089e-01, + 9.3110e-02, -1.4280e-02, -4.0142e-02, 6.0164e-02, -7.6656e-02, + 4.5689e-02, -1.0814e-01, -3.5491e-02, 4.5264e-02, 9.2623e-02, + -7.1723e-02, 1.7291e-02, -8.7315e-02, -4.0728e-03, 1.0796e-01, + 3.3997e-02, -9.8876e-02, -8.9155e-02, 3.8369e-02, 1.9530e-02, + -1.9117e-01, -2.0658e-03, 6.6779e-03, 8.1699e-03, -9.0752e-03, + 4.4134e-02, -3.9304e-02, -5.1453e-02, -1.0062e-02, 6.5434e-02, + -4.0342e-02, -4.5351e+00, -2.3207e-02, 5.0064e-02, -2.5201e-02, + -7.1243e-02, -4.5385e-02, -2.5975e-02, -3.4780e-02, 4.8646e-02, + -2.3921e-02, -2.9032e-02, 1.5368e-02, -8.8112e-03, -2.5050e-02, + 5.5610e-02, -1.1592e-02, -6.9792e-02, 3.3603e-02, -4.6731e-02, + 1.1831e-01, 8.9346e-02, -1.9387e-02, -1.4286e-02, -1.2412e-01, + -3.5152e-02, 4.5515e-02, 5.9119e-02, 1.7479e-02, 6.7957e-02, + -1.4066e-02, 7.6105e-02, 6.1042e-02, 4.2792e-02, 2.3038e-02, + 1.0391e-02, 1.9895e-02, 8.4799e-03, -8.5925e-02, 1.9332e-02, + 4.1033e-03, -1.8517e-02, 5.1354e-02, -8.4071e-02, 5.3309e-02, + 3.8044e-03, -2.0082e-01, 9.7535e-02, 4.2757e-02, 8.7049e-02, + 9.1052e-02, 3.9801e-02, 6.2336e-02, 7.5325e-02, -3.0500e-03, + -2.3021e-02, 1.4764e-02, -6.1970e-03, 2.1045e-03, -5.7605e-02, + 8.1457e-02, -1.2928e-01, -1.1268e-01, 7.0443e-02, -3.6264e-02, + -3.0411e-02, -1.2364e-01, -4.2091e-02, 7.2788e-03, -5.1890e-02, + -4.4106e-02, -4.7509e-03, -6.6056e-02, -4.5752e-02, 7.1066e-02, + -1.1775e-02, 1.1221e-01, 1.0023e-01, 5.5698e-02, -4.2404e-02, + 3.4032e-03, 3.7870e-02, 7.1071e-02, -6.0725e-02, -7.0095e-02, + -7.1618e-02, 8.7326e-02, -1.1954e-04])Parameter containing: +tensor([[-0.0018, -0.0035, 0.0047, ..., -0.0104, 0.0071, -0.0345], + [-0.0061, -0.0015, -0.0073, ..., -0.0022, -0.0246, -0.0080], + [-0.0041, -0.0014, -0.0101, ..., -0.0074, 0.0381, -0.0051], + ..., + [ 0.0136, 0.0115, -0.0352, ..., -0.0105, 0.0295, 0.0026], + [ 0.0275, 0.0076, -0.0009, ..., 0.0037, -0.0102, -0.0048], + [-0.0325, -0.0094, -0.0329, ..., 0.0203, 0.0127, 0.0209]])Parameter containing: +tensor([-0.3206, 0.0299, -0.2832, ..., -0.3774, -0.0878, -0.3206])Parameter containing: +tensor([[ 0.0119, 0.0111, -0.0122, ..., 0.0184, 0.0008, 0.0128], + [ 0.0061, 0.0051, 0.0010, ..., -0.0092, -0.0083, -0.0078], + [ 0.0028, 0.0019, -0.0141, ..., -0.0223, 0.0200, -0.0158], + ..., + [-0.0324, 0.0039, 0.0036, ..., -0.0004, 0.0073, -0.0153], + [-0.0048, 0.0068, 0.0207, ..., 0.0054, 0.0054, 0.0070], + [-0.0170, 0.0005, 0.0025, ..., -0.0155, 0.0081, 0.0126]])Parameter containing: +tensor([ 4.6204e-02, 5.8441e-02, -5.1392e-02, 1.8127e-02, -1.4503e-02, + 1.1530e-01, 7.2876e-02, -1.4977e-02, 5.1849e-02, -2.3407e-02, + 6.6528e-02, 4.4891e-02, -9.1919e-02, -5.7007e-02, 5.9967e-02, + 2.0325e-02, -2.2049e-02, 2.5101e-02, -2.0569e-02, -7.3242e-02, + 3.3478e-02, 5.7281e-02, -9.8419e-03, -8.0017e-02, 6.3232e-02, + -4.4342e-02, -1.3260e-02, 3.1464e-02, -6.7932e-02, 1.5965e-03, + -2.5314e-02, 5.3833e-02, -5.8044e-02, 5.3284e-02, 3.6621e-02, + 1.6907e-02, -5.3741e-02, -6.6589e-02, 2.3026e-02, 9.1675e-02, + -2.7908e-02, -1.7334e-02, 5.2734e-02, 2.9926e-03, -2.6947e-02, + 1.2520e-02, 3.7933e-02, 4.8340e-02, -1.3135e-01, -1.7252e-03, + -3.8872e-03, 3.7537e-02, -1.6800e-02, 4.6600e-02, 5.3894e-02, + 1.0391e-02, -7.0740e-02, -1.1070e-02, 1.3390e-02, -1.5808e-02, + 4.9286e-02, -1.4046e-02, -6.4545e-03, -4.1962e-02, -2.0508e-02, + 2.2232e-02, 6.1859e-02, 2.9922e-02, 9.7885e-03, -4.4525e-02, + -7.2002e-04, 1.0872e-02, 3.9642e-02, -1.8005e-03, -9.3567e-02, + -3.4668e-02, 1.4938e-02, -5.6992e-03, 8.4915e-03, -1.9348e-02, + -2.7542e-02, 5.0850e-03, -5.3589e-02, -5.9319e-03, -4.7569e-03, + -8.8272e-03, 3.0289e-02, -5.6458e-03, 2.9011e-03, -6.5063e-02, + -7.7454e-02, 7.1289e-02, 8.5754e-03, -3.6087e-03, -1.3557e-02, + 6.9153e-02, 7.7087e-02, 2.4429e-02, -2.4673e-02, -1.0025e-02, + 2.5177e-02, -1.5161e-01, 4.2419e-02, -8.7402e-02, -5.4535e-02, + 2.6260e-02, 2.1763e-03, 3.0106e-02, 4.6501e-03, -1.1963e-02, + 7.1983e-03, 1.1719e-01, 5.6671e-02, 5.1758e-02, 2.3102e-02, + -4.7180e-02, 2.0309e-02, -7.3090e-03, -1.1462e-01, -7.7858e-03, + -1.0674e-02, -4.4342e-02, -4.5593e-02, -1.9928e-02, -5.2673e-02, + -6.5918e-02, -5.4749e-02, -5.9418e-02, 1.8753e-02, 5.8167e-02, + 1.4366e-02, 7.4890e-02, -9.4482e-02, -5.0049e-02, 5.6946e-02, + 7.3914e-02, 2.0340e-02, -3.3627e-03, -9.0515e-02, 1.0352e-01, + -2.6215e-02, 3.2440e-02, 5.1544e-02, 2.4673e-02, 6.7282e-04, + -2.1332e-02, -2.5415e-04, 2.0615e-02, 2.0390e-03, 8.2825e-02, + 3.0243e-02, -4.2558e-04, 4.9683e-02, 4.6326e-02, -8.1970e-02, + 9.2316e-03, -8.6487e-02, 6.5247e-02, 1.1981e-04, -2.8229e-03, + 4.2542e-02, -1.0938e-01, -1.5732e-02, 8.0872e-03, -2.2781e-02, + 2.0920e-02, -1.6833e-01, -2.4734e-02, -1.9852e-02, 8.9783e-02, + -2.3376e-02, 7.5623e-02, -4.9133e-02, -3.3386e-02, -3.0334e-02, + -8.6182e-02, 2.4261e-02, -4.0039e-02, 2.0599e-02, 1.3298e-02, + 2.4902e-02, -6.8665e-02, -1.3580e-02, 2.7344e-02, 3.2837e-02, + -4.9210e-03, -5.4077e-02, 2.8174e-01, 5.2887e-02, -6.7444e-02, + 6.1150e-03, 2.0996e-02, 1.3037e-01, 1.1176e-01, -9.5520e-02, + 5.8838e-02, -1.4839e-03, -6.2347e-02, 1.8814e-02, 1.4618e-02, + -4.5815e-03, -5.8136e-02, 1.5396e-02, 5.5634e-02, 5.2002e-02, + 9.8694e-02, -6.1981e-02, 4.0710e-02, -1.0803e-02, 2.3403e-03, + 1.3390e-02, 2.5005e-03, 3.7354e-02, 5.5450e-02, -5.9357e-02, + -7.2571e-02, -2.1042e-02, 8.5083e-02, -5.7129e-02, -1.6895e-01, + 9.1003e-02, 3.0670e-03, 4.8370e-03, 4.2877e-02, 5.7983e-03, + -1.3132e-03, -6.2561e-02, -1.9104e-02, 6.4270e-02, 2.6672e-02, + 1.8631e-02, 5.1361e-02, -5.6244e-02, -1.5594e-02, -3.5858e-02, + -4.8737e-02, -2.8534e-02, 1.6953e-02, -4.9957e-02, 1.0632e-01, + -5.7411e-03, 2.0050e-02, -3.8849e-02, 1.4210e-03, -6.3972e-03, + -2.9724e-02, 3.4271e-02, -8.4290e-02, -1.4397e-02, 1.6800e-02, + 2.3911e-02, -3.8422e-02, -4.4861e-03, -1.2952e-01, 1.7334e-02, + 1.6861e-03, -4.2175e-02, 9.4177e-02, -5.9204e-02, -3.0533e-02, + -1.4380e-01, -3.2959e-02, -1.0811e-02, -1.7792e-02, -6.7139e-02, + -3.5126e-02, 1.6022e-02, -2.6794e-02, 1.9394e-02, 3.3203e-02, + -1.4549e-02, -1.9501e-02, 1.5244e-02, -1.7181e-02, -7.9041e-02, + 9.0271e-02, 4.2633e-02, 7.7637e-02, 6.4087e-02, -8.1421e-02, + 8.1482e-02, 1.4046e-02, -4.6661e-02, -4.1412e-02, -1.3275e-02, + 4.4342e-02, 2.3346e-02, -5.0262e-02, 5.8746e-02, -7.6050e-02, + 5.1758e-02, -3.5553e-02, 8.8739e-04, -9.7351e-02, 2.1912e-02, + 1.2993e-02, 7.8613e-02, -1.4999e-02, 5.1422e-02, 5.5115e-02, + 4.3526e-03, -4.4830e-02, -9.6863e-02, 5.5969e-02, -4.8706e-02, + 5.9776e-03, 5.9906e-02, -1.4717e-02, 3.5278e-02, -2.7069e-02, + 2.9343e-02, -2.4090e-03, -4.6539e-02, -3.1799e-02, 6.6162e-02, + -1.7212e-02, 3.7781e-02, 1.9287e-02, 5.4016e-02, -3.1948e-03, + -9.8572e-03, -2.2186e-02, -3.3600e-02, 1.7838e-02, 2.9468e-01, + -2.1094e-01, 7.1411e-02, -2.4445e-02, 3.2501e-02, -1.4282e-01, + 6.5369e-02, -4.2450e-02, 3.3142e-02, -1.7761e-02, -5.8380e-02, + -2.8343e-03, -4.4708e-03, -8.4839e-03, 3.1647e-02, -2.3941e-02, + -4.8035e-02, 5.1910e-02, 2.4338e-02, 8.9264e-03, 3.7872e-02, + 6.0303e-02, -6.6162e-02, 5.1239e-02, 3.4943e-03, -7.2632e-02, + 1.5488e-02, 4.9622e-02, 4.0375e-02, -8.3313e-03, -5.2582e-02, + -2.9663e-02, -9.0027e-03, 1.3245e-02, -2.8702e-02, -8.6823e-03, + -3.5461e-02, -5.5450e-02, -1.6113e-02, -2.6245e-02, -5.1788e-02, + 2.9434e-02, -3.0579e-02, 7.8979e-02, 8.5571e-02, -1.0269e-02, + 4.1046e-02, -3.1204e-02, -6.1523e-02, -2.6520e-02, -8.5831e-03, + -8.5602e-03, 7.6660e-02, 5.7404e-02, -1.4267e-02, -4.5052e-03, + 1.1078e-02, 4.4556e-02, -5.9906e-02, -2.7466e-02, -8.3801e-02, + 9.2125e-04, -2.0523e-02, 8.5526e-03, 9.3889e-04, -1.0284e-01, + -8.7891e-02, 3.8971e-02, 3.6591e-02, 1.4114e-02, -3.1128e-02, + 6.0028e-02, -2.4994e-02, 6.2561e-02, 3.0350e-02, -5.2307e-02, + 1.6602e-02, 7.7576e-02, -2.0691e-02, 2.5986e-02, -6.1218e-02, + -3.5736e-02, 2.9236e-02, 5.2155e-02, -2.7573e-02, 3.2166e-02, + -3.6865e-02, 2.3148e-02, -5.7159e-02, -3.4546e-02, 3.0212e-03, + -9.9976e-02, -3.1021e-02, -1.0002e-02, 5.6213e-02, 1.6800e-02, + -7.2327e-02, -2.2507e-03, 2.1133e-02, -8.6853e-02, -5.5176e-02, + 5.8289e-02, 2.2949e-02, -4.5532e-02, 8.5938e-02, 6.1768e-02, + -1.0492e-01, 7.0435e-02, 8.5205e-02, 3.3020e-02, 8.1909e-02, + -9.2163e-03, 3.7292e-02, 1.9791e-02, 6.0577e-02, -6.4697e-02, + -9.0256e-03, -2.2507e-02, 1.0323e-02, -6.9092e-02, 3.5706e-02, + -3.5492e-02, 2.2415e-02, -2.1896e-02, 5.7678e-02, -2.4536e-02, + 2.3712e-02, -2.2385e-02, -8.9661e-02, 7.8064e-02, 5.8807e-02, + 3.7903e-02, -3.4760e-02, -1.6693e-02, -1.4915e-02, -5.1544e-02, + -3.3741e-03, -1.2665e-02, -8.1055e-02, -2.2537e-02, -1.7977e-03, + 1.5495e-02, 5.0507e-02, -5.6885e-02, 2.3560e-02, 3.1830e-02, + -2.3315e-02, -7.5134e-02, -7.4585e-02, 3.1281e-02, -7.7576e-02, + 1.1047e-01, 3.3142e-02, 6.2469e-02, 2.0046e-03, 3.7506e-02, + 2.1027e-02, 1.6602e-02, 5.1849e-02, 4.5807e-02, -6.7871e-02, + -1.8753e-02, 9.8495e-03, 4.0741e-02, -1.4687e-02, -1.0059e-01, + 4.0466e-02, 4.2847e-02, -3.2776e-02, 4.2419e-03, -7.5134e-02, + 5.6854e-02, -2.2797e-02, 5.1208e-02, -1.0303e-01, 2.5650e-02, + -3.6530e-02, -2.0542e-03, 2.3987e-02, -2.2217e-02, -2.7466e-02, + -4.6082e-02, 6.6467e-02, 7.7393e-02, -3.6652e-02, -1.0638e-01, + -6.0242e-02, -8.0200e-02, -5.3223e-02, 1.3268e-02, 5.2376e-03, + -4.8187e-02, -2.8870e-02, 4.0070e-02, -2.7695e-02, -6.3599e-02, + 3.1166e-03, 5.7678e-02, 3.4088e-02, -9.8324e-04, 1.9257e-02, + -1.5821e-03, -1.0948e-02, -5.5023e-02, -1.3626e-02, -5.9387e-02, + 3.8574e-02, -4.7852e-02, 3.1872e-03, -7.1655e-02, 5.0323e-02, + 5.2277e-02, -3.2776e-02, -3.9124e-02, -4.0955e-02, -4.5532e-02, + -4.1504e-02, 1.0522e-01, 2.8778e-02, 1.4038e-02, -1.4252e-02, + 6.8970e-03, 2.5620e-02, -2.8400e-03, 5.7617e-02, 5.7129e-02, + -8.1909e-02, -1.7700e-02, -3.0426e-02, -3.9337e-02, -5.2673e-02, + 2.1652e-02, 1.8539e-02, -7.2632e-02, -4.7180e-02, 6.1279e-02, + -5.0629e-02, 3.3264e-02, -5.4626e-02, -5.1727e-02, 6.0120e-02, + 6.1401e-02, -1.6281e-02, 5.2338e-02, 6.8115e-02, -3.2837e-02, + 8.8806e-02, -7.4120e-03, -4.5929e-02, 2.6871e-02, 2.0828e-02, + 2.6703e-02, 6.1493e-03, 2.7985e-02, -2.2278e-03, -2.0447e-02, + -4.0802e-02, 3.4790e-02, 7.5623e-02, 8.7524e-02, 8.7219e-02, + 3.7327e-03, 1.9341e-03, 6.0692e-03, 6.4331e-02, 7.9346e-02, + -2.7557e-02, -1.8860e-02, 1.7929e-02, -4.6539e-03, 2.0950e-02, + -1.1307e-02, -8.0383e-02, -9.9548e-02, -1.2985e-02, 1.4694e-02, + -1.4982e-03, -1.2039e-02, -4.5990e-02, 3.1403e-02, 3.7720e-02, + 8.4000e-03, 1.8600e-02, -2.5253e-02, 2.2232e-02, 2.4734e-02, + 5.6877e-03, 1.8463e-02, 3.3722e-02, 7.6294e-02, -1.8784e-02, + -4.1504e-02, -3.3386e-02, 7.5989e-02, -3.5034e-02, 6.9641e-02, + 4.2786e-02, 2.3697e-02, 2.8458e-02, 1.9760e-02, 4.0222e-02, + -7.4646e-02, -1.6129e-02, 2.0325e-02, 1.0529e-01, -1.8158e-02, + 1.5137e-01, 9.3384e-03, 1.5114e-02, 6.5552e-02, 9.2010e-03, + 6.3782e-03, 6.5880e-03, 1.6492e-01, 4.3976e-02, 4.2206e-02, + 3.6652e-02, -3.6621e-02, -1.8311e-02, 1.0323e-02, 5.4817e-03, + 1.4816e-02, -9.3445e-02, 3.4729e-02, 4.1595e-02, 2.5360e-02, + 3.9337e-02, 7.1167e-02, 3.4821e-02, 5.8441e-02, 1.0474e-01, + 6.0272e-02, -9.9792e-02, -7.5195e-02, 7.1655e-02, 1.0089e-01, + -5.3192e-02, 5.8441e-02, -1.1896e-01, -4.9805e-02, 9.4833e-03, + 9.4116e-02, -1.4209e-01, -5.2246e-02, 1.5942e-01, 4.7852e-02, + -4.1351e-03, 5.0598e-02, 7.8796e-02, 3.9520e-02, -5.8517e-03, + -2.4399e-02, -3.5370e-02, -8.7219e-02, -2.7390e-02, 9.0210e-02, + 5.2094e-02, -6.2622e-02, -2.3300e-02, 3.0960e-02, -6.3416e-02, + 8.2886e-02, -1.1761e-01, -4.1412e-02, 1.4107e-02, 3.2227e-02, + -1.3374e-02, -9.9121e-02, -2.8076e-03, 7.0557e-02, -2.4384e-02, + 1.1920e-01, 6.6650e-02, -6.0059e-02, 3.6041e-02, -3.1235e-02, + 5.3528e-02, -3.6621e-02, 7.0068e-02, -3.5736e-02, -1.0880e-02, + -6.6467e-02, 2.5543e-02, -8.5402e-04, -3.6346e-02, -9.3384e-03, + 1.8234e-02, -5.7404e-02, -3.2501e-03, -5.9448e-02, -3.1738e-02, + -9.0637e-02, -5.0476e-02, 7.0862e-02, 2.3178e-02, -1.5820e-01, + -1.4290e-02, -3.4904e-04, 1.1505e-02, -2.6077e-02, -9.8343e-03, + -3.0182e-02, -2.0691e-02, -1.1168e-03, -4.4800e-02, 5.0751e-02, + -2.8564e-02, 1.4658e-03, 7.2083e-02, 4.0649e-02, 2.2446e-02, + 3.7415e-02, 3.5400e-02, -3.0701e-02, 2.8976e-02, -5.6671e-02, + -1.3855e-02, -5.4016e-02, -6.5552e-02, 1.2970e-02, -6.0730e-02, + -1.8860e-02, -1.0938e-01, 9.8114e-03, 2.0390e-03, 3.4576e-02, + 3.3207e-03, -5.9319e-03, -1.3916e-01, 6.0791e-02, 6.5186e-02, + 1.5007e-02, 5.7281e-02, -1.7487e-02, 6.9824e-02, 2.9663e-02, + -5.3375e-02, 1.5266e-02, -7.2510e-02, -2.8275e-02, -1.1635e-02, + -2.3514e-02, 9.7885e-03, 4.0588e-02])Parameter containing: +tensor([2.1482, 2.0761, 2.1921, 2.0451, 2.0384, 2.1436, 2.2704, 2.1227, 2.1465, + 2.1898, 2.1549, 2.0869, 2.1795, 2.0904, 2.1714, 2.0272, 2.1504, 2.0925, + 2.1178, 1.2856, 2.1856, 2.1374, 2.2333, 2.0119, 2.2004, 2.1451, 2.0892, + 2.0631, 2.2006, 2.0803, 2.1321, 2.0761, 2.1056, 2.0743, 2.1243, 2.1250, + 2.3369, 2.1272, 2.0290, 2.1546, 2.0374, 2.3274, 2.2738, 2.1767, 2.0974, + 2.1418, 2.1897, 2.0832, 2.0704, 2.1172, 2.1430, 2.1572, 2.1444, 2.1279, + 2.2470, 2.0093, 2.0840, 1.9981, 2.0446, 2.0698, 2.1134, 2.1441, 2.1823, + 2.1272, 2.0367, 2.0682, 2.1908, 2.1000, 2.0956, 1.7824, 2.1168, 2.1184, + 2.3859, 2.0491, 2.1741, 2.1228, 2.0877, 2.1016, 2.2688, 2.0134, 2.1942, + 2.1419, 2.0865, 2.0894, 2.1286, 2.2371, 2.1443, 2.0431, 2.0828, 2.1831, + 2.0309, 2.0924, 2.0745, 2.1408, 2.1419, 2.1016, 2.2139, 2.1779, 2.1671, + 2.1297, 2.2277, 2.0503, 2.1542, 2.1528, 2.0703, 2.1439, 2.2227, 2.1981, + 2.0713, 2.0392, 2.0976, 2.1198, 2.0589, 2.1613, 2.1716, 2.0782, 2.0527, + 2.0557, 2.0537, 2.1340, 2.1217, 2.0609, 2.3008, 2.1750, 2.1099, 2.2135, + 2.1483, 2.1244, 2.0733, 2.0288, 2.1141, 2.1998, 2.0138, 2.1505, 2.3035, + 2.1225, 2.2076, 2.0718, 2.0643, 2.1111, 2.1190, 2.0362, 2.1451, 2.1716, + 2.0588, 2.1940, 2.1055, 2.1166, 2.1894, 2.0712, 2.1387, 2.1944, 2.2454, + 2.2819, 2.1488, 1.3095, 2.1511, 2.0310, 2.0929, 2.1536, 1.9961, 2.1454, + 2.2574, 2.0555, 2.1161, 2.2521, 2.1017, 2.1226, 2.0426, 2.0752, 2.1164, + 2.1192, 1.8822, 2.1522, 2.0504, 2.1629, 2.1790, 2.3046, 2.1391, 2.1411, + 2.4045, 2.2437, 2.0769, 2.1762, 2.0932, 2.0806, 2.0046, 0.9834, 2.0615, + 2.1874, 2.0295, 2.1896, 2.1249, 2.1741, 2.1469, 2.2090, 2.3059, 2.0889, + 0.8082, 2.0433, 2.0004, 2.0083, 2.1260, 2.0278, 2.1430, 2.1549, 2.2318, + 2.2448, 1.9585, 2.0712, 2.0533, 2.0480, 2.1845, 2.0333, 2.2601, 2.1605, + 2.1659, 2.1481, 2.2122, 2.1820, 2.1369, 2.1664, 1.9730, 2.2248, 2.0468, + 2.2257, 2.2819, 2.1141, 2.1859, 2.0820, 2.1872, 2.1288, 2.1822, 2.1780, + 2.0478, 2.2415, 2.1621, 2.1453, 2.0071, 2.1323, 2.1880, 2.3021, 2.1457, + 2.2072, 2.1449, 2.1183, 2.0404, 2.2815, 2.2627, 2.1645, 2.2278, 2.1534, + 2.0938, 2.1091, 2.1209, 2.0492, 2.1297, 2.1816, 2.1060, 2.2206, 2.0955, + 2.1179, 2.1007, 2.2277, 2.1673, 2.0951, 2.2219, 2.1363, 2.0325, 2.1666, + 2.0490, 2.0837, 2.0539, 2.1483, 2.1213, 2.0532, 2.1494, 2.2184, 2.3024, + 2.1308, 2.1234, 1.9768, 2.2508, 2.1202, 2.0864, 2.1375, 2.2378, 2.1481, + 2.1342, 1.9919, 2.1629, 2.1637, 2.2153, 2.1035, 2.1337, 2.0895, 2.1282, + 2.1316, 1.9982, 2.0586, 2.1836, 2.1714, 2.1560, 2.1255, 2.2053, 2.1329, + 2.1358, 2.1552, 2.1542, 2.1948, 2.1878, 2.1202, 2.0789, 2.1483, 2.0329, + 2.2126, 2.1098, 2.2082, 2.0599, 2.2092, 2.3170, 2.2484, 2.1793, 2.0508, + 0.5618, 2.0714, 2.1515, 2.1989, 2.1049, 2.0814, 2.0987, 2.2290, 2.1709, + 2.2567, 2.1159, 2.0255, 2.2227, 2.0735, 2.0707, 2.0808, 2.3608, 2.1141, + 2.1577, 2.2603, 2.2280, 2.0188, 2.2263, 2.0251, 2.3628, 2.1262, 1.9121, + 2.3010, 2.1140, 2.3798, 2.0306, 2.1233, 2.0576, 2.1311, 2.1088, 2.0206, + 2.1556, 2.0028, 2.1734, 2.0158, 2.1590, 2.0562, 2.1899, 2.1982, 2.1236, + 2.0886, 2.2811, 2.1128, 2.1131, 2.1598, 2.0060, 2.0154, 2.1563, 2.2502, + 2.1250, 2.0527, 2.0428, 1.9846, 2.1653, 2.1090, 2.1451, 2.2003, 2.1499, + 2.2526, 2.2141, 2.0655, 2.0515, 2.1719, 2.1375, 2.1248, 2.0989, 2.0976, + 2.0913, 2.1493, 2.2613, 2.2130, 2.0378, 2.2520, 2.1286, 2.1573, 2.1093, + 2.0292, 2.1074, 2.0239, 2.1475, 2.1301, 2.1112, 2.1901, 2.0577, 2.1609, + 1.4774, 2.2200, 2.2076, 2.0265, 2.1652, 2.0973, 2.2043, 2.0817, 2.2479, + 2.1600, 2.0770, 2.0933, 2.0445, 2.0840, 2.1304, 2.1265, 2.1881, 2.0445, + 2.1241, 2.1748, 2.2234, 2.0541, 2.2300, 2.1778, 2.0552, 2.2086, 2.0418, + 2.0995, 2.0420, 1.7627, 2.0249, 2.0225, 2.1123, 2.2335, 2.1247, 2.0972, + 2.0920, 2.1411, 2.2878, 2.1152, 2.0844, 2.1943, 2.1201, 2.1608, 2.0665, + 2.0956, 2.1365, 2.1061, 2.1110, 2.1114, 2.1407, 2.0190, 2.1246, 1.9944, + 2.2389, 2.1340, 2.1952, 2.0701, 2.1126, 2.1158, 2.3171, 2.0932, 2.1448, + 2.2950, 2.0981, 2.1204, 2.1288, 2.2271, 2.0780, 1.4625, 2.2277, 2.1814, + 2.1414, 2.0223, 2.0474, 2.2824, 2.1625, 2.0740, 2.0709, 2.1278, 2.2366, + 2.0778, 2.1140, 1.9411, 1.9732, 2.2932, 2.1656, 2.2058, 2.1473, 2.0715, + 2.1679, 2.1443, 2.0592, 2.1594, 2.1587, 2.2206, 2.0880, 2.0784, 2.1948, + 2.1170, 2.3211, 2.1703, 2.1656, 2.1462, 2.0380, 2.1215, 2.2290, 2.1520, + 2.2105, 2.0404, 2.0373, 2.1508, 2.1466, 2.0975, 2.1173, 2.2048, 2.0858, + 2.1657, 2.1193, 2.1691, 2.1388, 2.2129, 2.0910, 2.0813, 2.2194, 2.0774, + 2.1017, 2.2044, 2.1490, 2.1446, 2.3251, 2.2174, 2.1969, 2.1872, 2.2394, + 2.0567, 2.2579, 1.9658, 2.0598, 2.1581, 2.0006, 2.0074, 2.2322, 2.1349, + 2.1261, 2.0935, 2.2362, 1.5636, 2.0990, 2.0930, 2.2388, 2.1674, 2.0623, + 2.1734, 2.1535, 2.0805, 2.0350, 2.1338, 2.1778, 2.1319, 2.2243, 2.2794, + 2.1139, 2.2005, 2.1549, 2.0232, 2.0834, 2.0726, 2.2218, 2.0496, 2.1193, + 2.0845, 2.1427, 2.0182, 2.3726, 2.0258, 2.1590, 2.2770, 2.1841, 2.2523, + 2.1467, 1.9906, 2.0301, 2.1457, 2.1497, 2.0760, 2.0481, 2.1327, 2.1243, + 2.2556, 2.1431, 2.1105, 2.1889, 1.7540, 2.1039, 2.1062, 2.0953, 2.1475, + 2.1238, 2.1812, 2.2103, 2.1678, 2.1608, 2.0309, 2.1152, 2.1129, 2.2841, + 2.0309, 2.0734, 2.1454, 2.2304, 2.0570, 2.1209, 2.2711, 2.1286, 2.1101, + 2.0775, 2.0758, 2.0920, 2.2181, 2.2613, 2.1191, 2.1776, 2.1671, 2.1099, + 2.2056, 2.1635, 2.0298, 2.2926, 2.1715, 1.9039, 2.0213, 2.0237, 2.1132, + 2.0098, 2.1750, 2.2415, 2.1563, 2.1910, 2.0880, 2.1238, 2.2616, 2.1590, + 2.0323, 2.0179, 2.1277, 1.8232, 2.1558, 2.2524, 2.2178, 2.1622, 2.0681, + 2.1036, 2.1641, 2.1510, 2.2235, 2.0144, 2.0947, 2.0730, 1.9855, 2.2212, + 2.1229, 2.1985, 2.2897, 2.1890, 2.0744, 2.0369, 2.3364, 2.1039, 2.1677, + 2.1267, 2.2754, 2.2022, 2.0778, 2.1968, 2.0938, 2.2225, 1.9913, 2.1763, + 2.2320, 2.0717, 2.1027, 2.1216, 2.2761, 2.2161, 2.1315, 2.0953, 2.0402, + 2.3112, 2.1639, 2.3642, 2.1207, 2.2583, 2.0666, 2.0418, 2.1478, 2.1192, + 2.0907, 2.1516, 2.1368, 2.1209, 2.2247, 2.0873, 2.2266, 2.1265, 2.4356, + 2.1106, 2.2100, 2.3024, 2.1876, 2.2123, 2.0296, 1.6260, 2.0849, 2.0714, + 1.9995, 2.2396, 2.2255, 2.0709, 2.1770, 2.1812, 2.1527, 2.0534, 2.0879, + 2.1128, 2.3070, 2.1067, 2.1437, 2.0932, 2.1928, 2.0556, 2.0341, 2.2084, + 2.2778, 2.3196, 2.1044, 2.0765, 2.2309, 2.3135, 2.2170, 2.0869, 2.1127, + 2.2343, 2.2393, 2.2178, 2.0545, 2.0631, 2.1401, 2.0484, 1.9397, 2.1904, + 2.0942, 2.1266, 2.2787])Parameter containing: +tensor([-4.0474e-01, 5.0409e-01, -6.2095e-01, 1.9046e-01, -9.3106e-01, + 4.3668e-01, 6.2783e-01, -7.8965e-03, -8.5588e-01, -3.0263e-01, + -6.0073e-01, 7.6827e-02, -4.7316e-01, -8.7310e-02, -6.6791e-01, + -3.7563e-03, 1.8902e-01, -2.1686e-01, -6.6348e-01, -1.3281e+00, + 1.0986e-01, 3.3849e-01, 7.3085e-01, -3.8962e-01, 5.8070e-01, + 1.0180e-02, -3.1196e-01, 6.9664e-03, -8.0248e-01, 6.6610e-01, + 3.4659e-01, 1.4230e-01, 5.1505e-01, -2.6895e-01, -3.3652e-01, + -5.5124e-01, -5.4981e-01, -2.2390e-01, 7.7914e-01, 4.4125e-01, + -2.5739e-01, 6.8878e-01, 8.3604e-01, 3.8809e-01, -5.0993e-01, + -5.5098e-01, -8.5939e-01, -4.3050e-01, 3.6157e-02, -2.1150e-01, + -4.7758e-01, -6.4244e-01, 7.0543e-01, 4.7260e-01, -7.1362e-01, + 1.1644e+00, 2.3792e-01, -1.3141e-01, -1.7051e-01, -2.5365e-02, + -5.8489e-01, 6.5227e-01, 5.4440e-01, -3.2898e-01, 1.4602e-01, + 3.5652e-01, 5.1635e-01, -5.1022e-01, -4.3654e-01, -1.4454e+00, + 2.9734e-01, 9.4037e-02, 1.0461e+00, 2.1383e-01, -3.6251e-01, + -2.5680e-01, 5.1107e-01, 7.0932e-01, 4.6205e-01, -2.5158e-01, + 1.7436e-01, 1.6938e-01, -2.8672e-01, 7.2591e-01, -2.8540e-01, + -7.8770e-01, 4.9259e-01, 2.1408e-01, 1.3567e-01, -5.2772e-01, + -6.7128e-02, 4.5592e-01, 7.4472e-02, -7.0644e-02, -5.8231e-01, + 2.4495e-01, 4.7405e-01, -1.7291e-01, -6.3612e-01, 6.2670e-01, + 4.6047e-01, -6.5939e-01, 3.9048e-01, -3.0125e-01, 1.1632e-01, + 1.0360e+00, 7.5709e-01, 4.1743e-01, 1.6902e-01, -3.8525e-01, + 8.5621e-01, 3.0732e-01, -2.1138e-01, 2.7196e-01, 3.5186e-01, + -3.2635e-01, 7.3482e-01, 4.2488e-01, 7.4492e-01, 1.4549e-01, + -2.1434e-01, -3.5687e-01, -7.2550e-01, -6.6136e-01, 1.2367e-01, + -4.1577e-01, -5.3205e-01, -6.2603e-02, -1.9586e-01, -3.7930e-01, + -4.2071e-01, -9.0787e-01, 4.8806e-02, -4.6018e-01, 7.3300e-01, + 5.1992e-01, 7.1370e-01, 1.6186e-01, 2.5509e-01, -8.3998e-02, + -1.7691e-01, 4.6683e-01, -4.6551e-01, -3.3841e-01, -4.4711e-01, + -1.8539e-01, 2.9682e-01, -2.4576e-01, -2.1410e-01, -3.5070e-03, + 6.3886e-01, -7.4631e-01, 6.3523e-01, 5.2012e-01, -7.1093e-01, + 1.0273e+00, -5.6964e-02, -2.7703e-01, -2.2630e-01, -3.6590e-01, + -4.7491e-02, 5.2619e-01, 7.8483e-01, -3.9002e-01, 5.5755e-02, + 6.2525e-01, -3.9933e-01, 3.6984e-01, -4.2966e-02, -4.1367e-02, + 4.2210e-01, 1.4972e-01, -2.6309e-01, -2.4747e-01, -1.9582e-01, + 4.4209e-01, 3.6193e-01, -4.4070e-01, -5.1436e-01, 1.3340e-01, + -1.2178e+00, -7.4455e-01, 1.2202e-01, 4.9822e-01, 5.3379e-01, + 7.0351e-01, -1.7865e-01, 1.1071e+00, -5.1895e-01, -6.5553e-01, + -3.6015e-01, -4.4857e-01, 2.3724e-01, 5.4664e-01, 4.1513e-01, + 4.0819e-01, 1.0514e+00, 2.3382e-01, 9.6579e-01, 1.0408e-01, + -1.6920e-01, 1.0829e-01, 5.4694e-01, 1.5864e-02, 8.3642e-02, + 2.5477e-01, 8.8037e-01, 8.7462e-01, -8.6983e-02, 1.2242e-01, + -1.5298e-01, 6.3101e-02, -6.2515e-01, 5.4305e-01, 6.4796e-01, + -3.3765e-01, -7.2310e-01, -1.3832e-01, -6.2350e-01, 7.3444e-01, + 4.6106e-01, -2.6191e-01, -7.5841e-01, 4.4694e-01, 3.0497e-01, + -2.4138e-01, -5.6582e-01, -2.6223e-01, 2.7859e-01, -6.5636e-02, + -4.4158e-01, 3.0139e-01, 6.6060e-01, 8.8885e-01, -5.2302e-01, + -6.2983e-01, 3.8733e-01, 3.4346e-01, 2.3819e-01, -2.2995e-03, + 9.1729e-01, 7.6446e-01, 1.9985e-01, -4.2040e-01, 4.0301e-01, + -3.7577e-01, -3.5067e-01, -8.2791e-01, -8.2184e-01, 8.2123e-01, + -5.7524e-01, 3.0567e-01, 4.3731e-01, -1.4166e-01, 6.3945e-01, + -2.2943e-02, 5.7065e-01, -1.1292e+00, -2.9721e-01, -5.3010e-02, + 4.5299e-02, -2.3854e-01, -4.6227e-01, 3.1692e-01, -3.5330e-01, + -2.6094e-01, 8.2341e-01, 2.6003e-01, 5.0662e-03, 2.1076e-01, + 2.9852e-01, 4.6640e-01, 9.2121e-02, -7.7412e-01, -3.7900e-01, + -1.8495e-01, -1.1545e-01, 7.3789e-01, 3.2838e-01, 4.5115e-01, + -3.8806e-01, -1.0646e+00, -7.5262e-01, 2.5062e-01, -2.5364e-01, + 6.6377e-01, -5.8219e-01, 3.3624e-01, 2.8649e-01, 3.9229e-01, + -6.2032e-01, 4.4817e-01, 1.0572e+00, -2.2192e-01, 1.5616e-01, + -4.2898e-01, -5.9908e-01, -3.8231e-01, 4.8874e-01, 1.5247e-01, + -8.2282e-01, 8.2276e-01, -4.6885e-01, -2.2112e-01, -3.1448e-01, + 6.1385e-01, 6.0871e-02, 6.5478e-01, -6.8870e-01, -1.8069e-01, + 6.0595e-01, -2.1569e-01, -6.8886e-02, -2.1633e-01, 2.6627e-02, + 9.1286e-01, 3.7838e-01, 4.4088e-01, -1.9911e-02, 5.2531e-01, + -1.0447e+00, -7.8353e-01, 6.1983e-01, -3.1507e-01, 6.4922e-01, + -6.7502e-01, 4.6014e-01, -4.8093e-01, 4.9097e-01, -5.0731e-01, + 3.2845e-01, -4.6035e-01, 5.1567e-02, -7.1382e-01, -1.1038e-02, + 5.6548e-01, -8.6373e-01, 3.8686e-01, 4.7987e-01, -2.6022e-01, + 1.0529e+00, -3.4670e-01, -5.7703e-01, 9.2351e-01, -1.0424e+00, + 3.6434e-01, -5.1920e-01, -2.0621e-01, -5.7194e-01, -1.3815e-01, + -1.5458e-01, 7.8982e-01, -2.1564e-01, 1.4026e+00, -9.9102e-02, + -4.7951e-01, -1.3557e-01, -3.1940e-01, -4.9115e-01, -1.7228e-01, + -4.8218e-01, -2.9346e-01, -9.5089e-02, -1.2865e-01, 7.9052e-01, + -2.9325e-02, -3.4700e-01, 7.1176e-01, -1.4633e-01, 4.0358e-01, + 6.3262e-01, -1.9452e-01, 1.9849e-01, -5.0373e-01, -4.7863e-01, + -4.8630e-01, -5.2785e-01, 7.3041e-01, 3.3524e-01, -3.8826e-01, + -3.6228e-01, -6.5662e-01, -2.4258e-01, -6.7619e-02, 2.2568e-01, + -8.4740e-01, -5.3083e-02, 1.0252e+00, 3.3464e-01, -1.1910e-01, + 3.3992e-01, -2.7331e-01, 5.2138e-01, 7.7297e-01, -6.8481e-01, + 9.7702e-02, -5.2441e-01, -6.2938e-02, -7.9591e-01, -4.0645e-01, + 2.8333e-01, 3.2968e-01, -4.2421e-01, -4.1324e-01, 8.1571e-01, + -4.2995e-01, -3.8089e-01, 4.6637e-01, -4.0341e-01, 6.3807e-01, + 6.2462e-01, 5.8128e-02, 3.2726e-01, 4.4414e-01, 1.1882e+00, + -2.5828e+00, 2.7348e-01, -3.0446e-01, 1.2585e-01, 1.3078e-01, + -3.7640e-01, -4.2410e-01, 5.0738e-01, -2.4688e-01, -2.9159e-01, + 2.6960e-01, 4.7148e-01, -4.0152e-01, 2.0195e-01, 4.5138e-01, + -7.5467e-01, -8.1379e-01, -8.9726e-02, 4.4020e-01, 6.0406e-01, + -1.6593e-01, -6.8744e-01, 8.2714e-01, 2.3347e-03, -4.2319e-01, + 8.7532e-02, 3.5544e-01, -2.7449e-01, -2.0140e+00, 4.4710e-02, + -8.1419e-02, -4.1267e-01, 7.1687e-01, 4.5125e-01, -4.8160e-01, + -3.1118e-01, -1.5726e-01, -6.9149e-01, 7.0766e-01, -2.8900e-01, + 6.8034e-01, -3.8809e-01, -5.0950e-01, 1.0280e-01, 3.8500e-01, + -3.3726e-01, 4.1365e-01, -4.9350e-01, -3.2800e-02, -5.9575e-01, + 1.5721e-01, 1.9718e-01, 1.7395e-01, 8.4402e-01, -4.4557e-01, + -2.3550e-01, 1.6015e-01, -4.7043e-01, 1.8386e-01, -5.2999e-01, + -3.7753e-01, 9.7203e-04, -9.8730e-01, 2.4090e-01, 3.2854e-01, + -3.6399e-03, 5.6957e-01, 2.5796e-01, 3.6204e-01, 7.4543e-01, + -8.7224e-01, -2.4659e-01, 2.6866e-01, 4.1437e-01, -6.1573e-01, + -2.1760e-02, 2.4079e-01, 2.4883e-01, -1.2891e-01, -5.7589e-01, + -1.9098e-01, -5.2836e-01, 8.7724e-02, 8.1355e-01, 1.0105e+00, + 2.3312e-01, -5.4144e-01, -6.5276e-01, -2.7310e-01, 4.0889e-01, + 6.8733e-01, 2.3444e-01, 2.6451e-01, -4.0372e-01, -4.0228e-01, + -6.9071e-01, -9.1914e-02, -7.1768e-01, -3.9160e-01, 9.2464e-01, + 1.1736e-01, 9.9555e-01, -3.4335e-01, 6.8837e-02, 1.0661e-01, + 9.6046e-01, 1.9288e-01, 7.9258e-01, 5.5334e-03, 2.6709e-01, + -3.5906e-01, 1.0510e+00, 4.9558e-01, 1.5815e-01, 7.9056e-01, + -2.9611e-02, -4.7788e-01, -7.4477e-02, 7.0278e-01, 2.0944e-01, + -9.9172e-01, 2.9514e-02, -3.4982e-01, 5.8798e-01, 1.0294e-01, + -7.4728e-02, 4.0436e-01, -7.3965e-01, 1.1608e-01, 9.7668e-01, + 5.6120e-01, 6.9853e-01, -1.1240e+00, -8.3312e-01, 2.2103e-02, + -6.2014e-01, -1.0602e-01, -1.7596e-01, 5.0554e-01, -5.1151e-02, + -1.7238e-01, 6.7374e-01, -3.0443e-01, -4.7714e-01, 6.0438e-02, + 5.7751e-01, 2.2188e-01, 5.4365e-02, 6.0409e-01, 2.7528e-01, + 1.4016e-01, -3.6410e-01, 2.8936e-01, 7.7458e-01, 1.7155e-01, + -3.6958e-01, 4.1315e-01, 5.7424e-01, -3.4738e-01, 9.6110e-01, + 1.0386e+00, -2.5240e-01, 6.7732e-02, -3.1303e-01, 2.3925e-01, + 2.4488e-01, -2.8734e-01, 6.3480e-01, -2.5157e-01, 2.4225e-01, + -4.3591e-02, -2.9242e-01, -7.2588e-01, 1.2189e+00, 1.8182e-01, + -3.0529e-01, -2.6477e-01, 8.6763e-01, -8.6587e-01, -4.1271e-01, + 6.5369e-02, -2.0068e-01, 2.9647e-02, 5.5062e-01, 1.5341e-01, + -3.3696e-01, 1.0672e-01, 9.7665e-02, -8.4515e-01, -9.6087e-02, + 1.6451e-01, 6.5034e-03, -8.0948e-01, 1.7525e-01, 1.7383e-01, + -3.1217e-01, 5.1740e-01, -4.0527e-01, 2.9478e-01, 6.3920e-01, + 5.8840e-01, 4.5157e-02, 2.6356e-01, -5.4925e-02, -4.1065e-01, + 7.8009e-01, -1.6714e-01, -2.3860e-01, 7.1367e-01, 4.4418e-01, + 3.1620e-01, -2.0515e-01, 8.4948e-01, 3.2021e-01, 7.0564e-02, + 1.7485e-02, -2.4201e-01, -6.2400e-01, 7.2354e-01, 6.4737e-01, + -3.9444e-01, -4.5600e-01, 1.8392e-01, 1.3942e-01, -6.9550e-01, + 5.6519e-01, -3.4485e-01, -1.0811e+00, 2.2609e-03, -7.7264e-01, + -1.7520e-01, 2.9658e-01, 3.4381e-01, -2.5195e-01, 8.1881e-01, + 6.2122e-01, 1.8847e-01, 6.6781e-01, -2.9861e-01, -1.6733e-01, + 8.1848e-01, 5.7276e-01, 2.9802e-01, -5.0510e-01, 1.6306e-01, + 2.1710e-01, 8.4910e-01, 8.8421e-01, -4.5161e-01, -4.3119e-01, + -6.4349e-01, 4.1669e-01, -6.1655e-01, 1.6923e-01, -6.1267e-01, + -7.0699e-02, 4.5768e-01, 4.9780e-02, 5.7231e-01, 9.1744e-01, + -4.6267e-01, -1.1711e-01, 1.1242e+00, 8.5179e-01, -9.9988e-02, + 9.1609e-02, 2.5796e+00, -6.0888e-01, -2.4259e-01, -1.4821e-01, + 6.9903e-01, -4.4285e-01, -1.1718e-01, 4.0595e-01, -4.6315e-01, + -6.0281e-01, 2.5561e-01, -6.3263e-01, -8.4082e-01, -2.2050e-01, + 3.4140e-01, -5.1675e-01, 9.8693e-01, -9.4655e-01, 2.5957e-01, + -3.4720e-01, 5.9071e-01, 6.8600e-01, -3.4691e-01, 8.6725e-01, + 8.1034e-01, -6.1649e-01, -1.9221e-01, -6.0154e-01, 4.3298e-01, + 2.1760e-01, -5.0489e-02, 2.7929e-01, -3.3724e-01, 3.8897e-03, + -2.4199e-01, 7.9453e-02, 6.1785e-01, -4.8319e-01, -6.2845e-01, + -2.7386e-01, -4.2604e-01, -4.6069e-01, -6.7830e-01, -2.9960e-01, + -1.0792e-01, 9.2764e-01, -1.7870e-01, 4.6413e-01, -2.7450e-01, + -5.8849e-01, -4.7641e-01, -1.3057e-01, 5.1836e-01, 2.7529e-02, + 4.1431e-01, 1.7077e-01, 5.5491e-01, -9.9315e-02, -9.0158e-01, + 4.0775e-01, 2.8138e-01, 5.2781e-01, -2.2875e-01, 3.6168e-01, + 6.1371e-02, -3.9472e-01, 6.2913e-01, 9.8502e-01, 6.8541e-01, + 3.2804e-01, -4.7059e-01, -7.8782e-01, 4.4955e-01, -5.3380e-02, + 1.4579e-01, -5.4171e-01, -5.6868e-01, 9.9710e-01, 5.0651e-01, + 1.2785e-02, -6.9887e-01, 1.9614e-01, 4.3504e-01, 8.8311e-01, + 3.1585e-01, 2.0645e-01, 6.1529e-01])Parameter containing: +tensor([[-0.0039, -0.0192, -0.0103, ..., 0.0052, 0.0099, 0.0056], + [-0.0152, -0.0052, -0.0067, ..., -0.0012, -0.0065, -0.0135], + [ 0.0042, 0.0065, 0.0006, ..., 0.0062, 0.0171, 0.0149], + ..., + [-0.0145, -0.0176, 0.0174, ..., -0.0138, -0.0058, -0.0098], + [ 0.0048, 0.0245, 0.0091, ..., -0.0059, 0.0057, -0.0145], + [-0.0083, 0.0048, -0.0003, ..., -0.0104, 0.0195, 0.0123]])Parameter containing: +tensor([-0.3210, 0.2307, -0.0475, ..., 0.0169, -0.0356, -0.0140])Parameter containing: +tensor([[ 5.7831e-03, -8.9035e-03, 5.7757e-05, ..., 6.5651e-03, + -8.6365e-03, -6.7825e-03], + [-1.5414e-04, -4.9782e-03, 2.2430e-02, ..., -2.0050e-02, + -1.9369e-03, 7.0000e-03], + [-1.2711e-02, -7.1526e-03, -3.1647e-02, ..., 6.2637e-03, + -2.0340e-02, 1.4626e-02], + ..., + [-6.7062e-03, 5.0068e-04, -8.3008e-03, ..., 3.5477e-03, + 2.7447e-03, -2.1606e-02], + [ 2.0172e-02, -1.5497e-03, -1.4412e-02, ..., 5.5504e-04, + -1.2497e-02, 7.7095e-03], + [-5.1003e-03, 1.3168e-02, -4.6082e-03, ..., -8.7051e-03, + -2.3022e-03, 1.5236e-02]])Parameter containing: +tensor([ 2.8961e-02, 1.2787e-02, 9.3918e-03, -4.4594e-03, -2.9327e-02, + -1.2665e-02, -1.4824e-02, -5.9021e-02, -2.2614e-02, 4.1718e-02, + 1.3733e-02, -3.5095e-02, -3.7689e-02, -3.9642e-02, -2.7161e-03, + -2.5986e-02, 9.3460e-03, 2.6489e-02, -1.2917e-02, 4.2389e-02, + -2.0542e-03, 3.3264e-02, -7.0419e-03, -2.8320e-02, -4.1840e-02, + 5.9853e-03, 2.1957e-02, 2.8915e-02, 5.2856e-02, 2.3895e-02, + 6.1655e-04, 3.9032e-02, 1.3557e-02, 1.1131e-02, -1.9007e-03, + 4.7516e-02, 1.2726e-02, 3.2593e-02, 2.2568e-02, 3.3508e-02, + 2.0615e-02, -2.6016e-02, 1.1467e-02, -4.1313e-03, 2.7771e-02, + 6.9702e-02, 3.0533e-02, 8.4305e-03, 2.9709e-02, 1.2260e-02, + -1.7212e-02, -1.8600e-02, 2.7863e-02, 1.4679e-02, 6.6833e-02, + 6.7566e-02, -6.8817e-03, 8.4152e-03, 1.9699e-02, 9.3155e-03, + 2.4200e-02, -1.6876e-02, 1.3756e-02, 3.3142e-02, 4.0710e-02, + 1.9028e-02, 1.0834e-02, -1.3260e-02, -2.3842e-03, -6.6589e-02, + -2.7679e-02, 1.1187e-03, -5.0049e-02, -3.3020e-02, -1.6129e-02, + 2.2812e-02, 8.4839e-03, 2.3689e-03, -6.3354e-02, -3.5706e-03, + -1.9455e-02, -3.4142e-03, 3.6530e-02, -1.0811e-02, 1.4145e-02, + 2.9541e-02, 2.4918e-02, 2.1378e-02, -2.0828e-02, 3.6011e-02, + -5.6458e-04, 1.9064e-03, 1.2054e-02, -1.9730e-02, 2.1027e-02, + -1.3870e-02, -2.9053e-02, 6.4049e-03, -4.4525e-02, -2.8152e-02, + 1.3695e-03, 3.7842e-02, -3.5675e-02, 1.4519e-02, 3.6182e-03, + -7.3280e-03, 3.5858e-02, 2.4017e-02, 4.1351e-02, 3.2776e-02, + -6.4011e-03, 1.7242e-02, -1.5612e-03, 3.1311e-02, 2.5665e-02, + -6.5651e-03, 1.8250e-02, 1.1436e-02, 1.9958e-02, 2.0828e-02, + 3.8849e-02, -3.1082e-02, -4.8798e-02, -5.4993e-02, 3.0411e-02, + 3.0441e-02, 2.0538e-02, -3.9581e-02, -8.4534e-03, 4.3335e-03, + 6.3629e-03, -1.4992e-02, 5.9776e-03, 1.7700e-02, -2.4979e-02, + 5.0690e-02, 3.2063e-03, -3.2990e-02, -2.0859e-02, -6.4880e-02, + 1.0536e-02, 1.4133e-03, -2.2858e-02, 6.4468e-03, 2.6779e-03, + -1.7410e-02, -2.5543e-02, 5.3329e-03, 2.0813e-02, 3.3905e-02, + 6.7177e-03, 2.1088e-02, 4.3091e-02, 4.9164e-02, 2.2106e-03, + 8.6914e-02, -4.0970e-03, -1.0353e-02, 2.5604e-02, -2.4490e-02, + 8.5754e-03, 4.3945e-03, 3.2013e-02, 2.2629e-02, -1.7807e-02, + -1.9806e-02, -1.8753e-02, -1.6403e-02, -4.0649e-02, 6.3934e-03, + -2.0584e-02, 2.7649e-02, -3.3600e-02, 1.2939e-02, 2.8732e-02, + 5.0888e-03, 8.4915e-03, -3.0380e-02, -7.9193e-03, 2.8717e-02, + 4.5135e-02, -2.2354e-02, -2.2537e-02, 1.6815e-02, 1.0452e-02, + 5.0087e-03, -3.0308e-03, 4.1924e-03, -1.5198e-02, -2.9793e-03, + 7.6660e-02, -1.6541e-02, -2.8168e-02, -7.1220e-03, -3.7689e-02, + -2.5192e-02, -4.6310e-03, -4.5624e-02, -4.1260e-02, 5.1842e-03, + -1.3863e-02, 1.6113e-02, 2.9465e-02, -3.0327e-03, 1.0544e-02, + -1.9348e-02, 4.7394e-02, -1.9409e-02, -6.8054e-03, -3.7689e-02, + -1.0872e-02, 2.6123e-02, 2.0203e-02, 2.8961e-02, 4.1656e-03, + 2.5513e-02, 2.0142e-02, 7.9498e-03, 2.9739e-02, -7.3624e-03, + -1.0452e-02, -5.0507e-03, -2.3865e-02, 2.8076e-02, 1.7365e-02, + -5.5206e-02, 2.0554e-02, -4.1870e-02, 1.3680e-02, 1.2794e-02, + 1.3763e-02, 3.4698e-02, 1.2520e-02, -1.4160e-02, -2.3804e-02, + -5.1636e-02, 2.7771e-02, -2.7023e-02, 3.0563e-02, -1.5541e-02, + 2.4582e-02, -2.0844e-02, 2.2354e-02, -2.4078e-02, 4.7951e-03, + 5.1514e-02, 2.8427e-02, 6.7383e-02, -1.3474e-02, -3.5065e-02, + 3.7212e-03, 5.2277e-02, 1.4748e-02, -3.3264e-02, 2.5116e-02, + 5.3215e-03, -1.2360e-02, -9.1782e-03, -2.6123e-02, -1.5993e-03, + -3.6469e-02, 2.7084e-02, 9.0027e-03, -7.9498e-03, -4.9805e-02, + 2.5620e-02, 5.5027e-04, 4.5715e-02, -2.8946e-02, 3.6697e-03, + -1.2688e-02, 1.5526e-02, -5.7869e-03, -5.4413e-02, -1.0658e-02, + -7.0076e-03, 4.3427e-02, -5.3406e-02, 3.8666e-02, 1.4244e-02, + 3.2837e-02, 1.4427e-02, 1.3985e-02, -1.3618e-02, 6.5460e-03, + -2.6169e-02, -9.1858e-03, 2.1744e-02, 3.4790e-02, -4.5868e-02, + 3.6983e-03, -3.9520e-02, 5.7869e-03, 1.7136e-02, -7.0524e-04, + 3.6072e-02, -2.9099e-02, 1.3741e-02, -5.5786e-02, 1.8707e-02, + 1.7776e-02, 1.4793e-02, 3.0563e-02, -1.1759e-03, -7.0343e-03, + -1.8616e-02, -7.0477e-04, -3.0151e-02, 2.7039e-02, -1.0996e-03, + 4.2839e-03, 1.9363e-02, 1.9821e-02, -7.0686e-03, 2.9221e-02, + -2.9312e-02, -4.6692e-02, -4.4708e-02, 1.7822e-02, 3.0777e-02, + 2.0050e-02, 3.2135e-02, -1.4893e-02, 2.1439e-02, 3.8623e-01, + -7.0129e-02, -9.4788e-02, -5.1849e-02, 1.3657e-02, -5.4817e-03, + 2.8809e-02, 1.1253e-02, -9.3842e-03, 2.5925e-02, -2.6611e-02, + 9.3231e-03, -2.2339e-02, -1.2421e-02, 2.6398e-02, 1.1768e-03, + -4.1199e-02, 9.4757e-03, -2.1606e-02, -3.9093e-02, 3.1113e-02, + -4.8279e-02, 5.3986e-02, 4.7264e-03, -4.7668e-02, 2.0275e-03, + -4.2648e-03, -5.8990e-02, 5.2063e-02, 1.6937e-02, 7.5951e-03, + -9.3536e-03, 1.3672e-02, -2.8553e-03, -1.3069e-02, 3.7628e-02, + -2.0187e-02, 1.5152e-02, -5.2734e-02, 1.3647e-03, 4.1290e-02, + 1.5991e-02, 1.1473e-03, 6.7253e-03, -1.7715e-02, 4.7516e-02, + -1.0345e-02, -3.4058e-02, -1.2016e-02, -4.4861e-02, -1.4091e-02, + 3.7933e-02, -3.1647e-02, -4.6814e-02, 7.5798e-03, 7.0000e-03, + -6.3660e-02, 4.2480e-02, -4.9011e-02, -3.1586e-02, -1.9226e-02, + 1.3418e-03, -4.6692e-03, -3.4027e-02, 1.7273e-02, 6.9275e-03, + -1.3649e-02, 7.4654e-03, 2.4170e-02, 1.0431e-04, 6.6223e-02, + -2.2598e-02, 1.8219e-02, 5.1147e-02, 1.7303e-02, -4.0405e-02, + -6.0730e-03, -3.3691e-02, -1.9012e-02, 7.7019e-03, 2.1496e-03, + -6.3591e-03, -3.7003e-03, -2.0294e-02, 9.7179e-04, -1.9089e-02, + 4.1931e-02, -2.8214e-02, 1.6708e-02, -2.8549e-02, 3.8177e-02, + -1.2573e-01, 1.0773e-02, 1.9058e-02, 2.6188e-03, 2.8412e-02, + 2.8366e-02, 2.4776e-03, 1.2312e-03, -1.3908e-02, 1.8539e-02, + -3.2440e-02, 1.6312e-02, 1.5778e-02, 1.7212e-02, -6.2637e-03, + 1.9257e-02, 1.8768e-02, -1.8646e-02, 4.2877e-02, 7.7332e-02, + 6.6872e-03, -2.4597e-02, -1.7700e-02, 2.2888e-02, -2.9144e-02, + 3.9032e-02, -1.5167e-02, 1.4137e-02, -1.0522e-01, -3.0396e-02, + -2.9755e-02, 4.9553e-03, -4.7302e-03, 6.8359e-03, 2.0172e-02, + -1.7578e-02, -1.6190e-02, -6.3820e-03, 1.4679e-02, 2.4231e-02, + -2.3590e-02, 1.4351e-02, -4.9667e-03, 3.7964e-02, -2.4857e-02, + -4.9713e-02, 5.2002e-02, -1.8738e-02, -1.4105e-03, -1.1841e-02, + -4.4464e-02, -2.4124e-02, -6.8909e-02, -3.9978e-02, 3.4285e-04, + -1.1314e-02, 3.5262e-04, -7.2365e-03, -3.4210e-02, -1.7502e-02, + -9.1629e-03, 6.1005e-02, 4.6967e-02, 8.7662e-03, 4.1840e-02, + 4.6783e-02, -1.5045e-02, -5.5725e-02, -1.1307e-02, 9.8953e-03, + 2.8076e-02, -3.3783e-02, -1.9240e-04, -6.7101e-03, 6.9094e-04, + 2.4429e-02, 1.6830e-02, -3.5492e-02, 5.9624e-03, -1.6205e-02, + -2.9663e-02, -8.1100e-03, 1.1604e-02, 1.9363e-02, -1.1749e-02, + 7.3910e-05, -4.3793e-02, -3.8910e-02, 7.4005e-03, 1.4145e-02, + -8.0109e-03, 1.2535e-02, 2.8973e-03, 1.5526e-02, -3.7476e-02, + 8.3313e-03, 3.2196e-02, 4.2084e-02, 1.5343e-02, 3.4149e-02, + -6.2141e-03, -1.6312e-02, 2.6016e-02, 3.3051e-02, -4.0771e-02, + 2.5650e-02, 6.4964e-03, -2.9785e-02, -3.1403e-02, -3.9703e-02, + -1.0674e-02, -5.3101e-02, -3.0121e-02, 6.3972e-03, -2.8152e-02, + 6.0394e-02, -5.6427e-02, 4.2358e-02, -1.8692e-02, 2.8595e-02, + 2.9587e-02, -5.4512e-03, 3.2257e-02, -2.7817e-02, 7.8125e-03, + -1.3382e-02, 3.1891e-03, 7.9117e-03, 3.7537e-03, -3.0460e-03, + -5.0964e-02, 1.0399e-02, 2.2995e-02, -5.2109e-03, -6.6986e-03, + -4.9561e-02, 6.8932e-03, 6.7139e-03, 2.4902e-02, 1.6083e-02, + -8.6975e-04, -2.6413e-02, 2.0828e-02, 2.1286e-02, -1.0796e-02, + -1.5564e-03, -5.8655e-02, 1.8890e-02, -7.1907e-03, -2.8702e-02, + -6.8626e-03, 1.5297e-02, -3.2135e-02, -4.1931e-02, -1.3748e-02, + 4.0131e-03, -5.4779e-03, -7.3509e-03, 5.8258e-02, -3.4515e-02, + -2.0142e-02, -8.4763e-03, -4.4922e-02, 2.3926e-02, -2.6932e-02, + 2.0294e-02, 4.2084e-02, 1.6983e-02, -5.3467e-02, -2.2415e-02, + -4.5052e-03, -1.5137e-02, 1.5381e-02, 3.7231e-02, -1.3514e-03, + 3.6163e-03, 2.1103e-02, 2.0142e-02, 5.3040e-02, 3.6377e-02, + -7.5989e-03, 5.5008e-03, -1.7075e-02, -2.9892e-02, 1.0551e-02, + -3.6163e-02, -1.8738e-02, -5.9433e-03, -2.8381e-02, -2.8671e-02, + 1.4938e-02, 2.2995e-02, -5.3589e-02, 2.4155e-02, -2.2415e-02, + 6.8359e-03, -1.9028e-02, -3.9902e-03, -3.0121e-02, -2.0981e-03, + -6.4453e-02, 2.8778e-02, -3.8483e-02, 7.0419e-03, -4.4006e-02, + -4.5593e-02, 1.8921e-03, 2.4109e-02, -3.8391e-02, -5.6992e-03, + -2.4826e-02, 1.6586e-02, -3.5309e-02, -3.6430e-03, 1.2115e-02, + 5.4810e-02, -3.6560e-02, 2.7771e-02, -5.2643e-02, -3.8505e-04, + 2.2278e-02, 1.0681e-02, 4.1351e-02, 1.7939e-03, -1.9012e-02, + -1.8173e-02, -3.4882e-02, 5.5199e-03, -4.0497e-02, 6.7177e-03, + 2.4681e-03, 1.6891e-02, 2.5146e-02, 4.0070e-02, 7.3891e-03, + -2.0142e-02, -6.4125e-03, -7.8659e-03, -1.4868e-03, 7.1220e-03, + -2.9434e-02, -1.7654e-02, 1.4297e-02, 1.4519e-02, 1.7502e-02, + 1.0345e-02, -2.4170e-02, 2.0309e-02, 3.1097e-02, -1.1818e-02, + 3.6072e-02, 2.1362e-02, -3.7050e-04, -2.3483e-02, -3.8681e-03, + 2.2079e-02, -3.0991e-02, -3.6835e-02, -1.7578e-02, 5.9700e-03, + -2.9816e-02, -6.9122e-03, -8.8257e-02, -4.4594e-03, -1.8082e-02, + -3.7323e-02, 1.7603e-01, -8.0948e-03, -1.6403e-02, 1.6113e-02, + -7.1449e-03, 9.5444e-03, 4.4212e-03, 4.0619e-02, -2.9068e-02, + 2.0874e-02, -3.3356e-02, 5.0323e-02, 1.2749e-02, -4.8637e-03, + -5.3101e-03, 8.6594e-03, 8.5754e-03, 3.8567e-03, 4.4769e-02, + 1.1604e-02, 5.2551e-02, -2.6474e-03, -4.3091e-02, 8.9951e-03, + -3.3569e-02, 4.7951e-03, 1.8875e-02, -2.7618e-02, 5.1666e-02, + 3.7720e-02, 1.8768e-02, 3.1189e-02, -7.6103e-03, 1.9409e-02, + -2.0828e-02, -2.7084e-02, 3.2440e-02, 7.1182e-03, -6.8665e-02, + -4.0913e-04, 1.2344e-02, 1.5488e-02, 4.6229e-04, -4.8828e-02, + -2.3132e-02, 5.8136e-02, -1.5335e-02, 7.5226e-03, 2.4719e-02, + 2.4231e-02, 8.5754e-03, 2.9892e-02, 7.0534e-03, 3.0396e-02, + -4.3030e-03, 1.9150e-02, 1.6823e-03, 1.2360e-02, 2.5757e-02, + 1.2772e-02, -1.3840e-02, -4.5959e-02, -6.2256e-02, 4.0016e-03, + 1.6203e-03, -2.7069e-02, -6.0883e-03, -6.6284e-02, -2.9633e-02, + -3.8719e-03, 1.0887e-02, 2.1530e-02, -3.0304e-02, 2.3060e-03, + 3.8727e-02, -1.4484e-04, -1.0635e-02, -4.5395e-03, 6.2561e-03, + 4.6356e-02, 5.3864e-02, 6.8550e-03, 2.3193e-03, -2.4506e-02, + -1.9852e-02, 5.5122e-03, 8.6746e-03])Parameter containing: +tensor([1.4389, 1.5492, 1.4444, 1.4369, 1.4696, 1.5982, 1.4798, 1.4962, 1.5059, + 1.4201, 1.5556, 1.4056, 1.4634, 1.5500, 1.3880, 1.4895, 1.4535, 1.6242, + 1.4925, 0.4319, 1.4099, 1.5599, 1.5428, 1.4731, 1.5599, 1.4931, 1.5788, + 1.4391, 1.6308, 1.4246, 1.5024, 1.5215, 1.4943, 1.5267, 1.5500, 1.5011, + 1.4709, 1.5334, 1.5236, 1.4504, 1.5937, 1.4595, 1.5022, 1.5378, 1.5704, + 1.5227, 1.5181, 1.4495, 1.5018, 1.4275, 1.4919, 1.5308, 1.4562, 1.5681, + 1.5308, 1.4581, 1.4151, 1.5746, 1.5263, 1.5096, 1.5268, 1.5130, 1.3768, + 1.5480, 1.4876, 1.5399, 1.4630, 1.4655, 1.5274, 1.4453, 1.4500, 1.5840, + 2.3376, 1.5532, 1.5260, 1.4554, 1.4595, 1.5422, 1.4586, 1.5038, 1.5498, + 1.4491, 1.4934, 1.5165, 1.5430, 1.4469, 1.4588, 1.4770, 1.4158, 1.4627, + 1.5009, 1.4705, 1.5292, 1.5613, 1.5451, 1.4401, 1.5326, 1.4154, 1.4870, + 1.5448, 1.5034, 1.5059, 1.4505, 1.5611, 1.4406, 1.4348, 1.4452, 1.4387, + 1.4832, 1.5420, 1.5072, 1.5112, 1.4903, 1.4850, 1.4967, 1.5254, 1.4557, + 1.4718, 1.5248, 1.4829, 1.5204, 1.4817, 1.4837, 1.5054, 1.5230, 1.5211, + 1.4579, 1.5346, 1.5020, 1.4881, 1.4620, 1.4641, 1.4179, 1.4865, 1.3921, + 1.5071, 1.5182, 1.5576, 1.4620, 1.4443, 1.5125, 1.4419, 1.5722, 1.4472, + 1.4750, 1.5109, 1.4204, 1.5067, 1.5041, 1.4859, 1.5257, 1.4809, 1.4586, + 1.5638, 1.4914, 1.2267, 1.5272, 1.5182, 1.5851, 1.5238, 1.4617, 1.4648, + 1.4793, 1.5202, 1.4991, 1.5037, 1.4156, 1.5364, 1.4462, 1.5589, 1.5135, + 1.5042, 1.4492, 1.4746, 1.5703, 1.5694, 1.5920, 1.4744, 1.5622, 1.4728, + 1.4683, 1.5139, 1.4662, 1.4459, 1.5067, 1.4089, 1.4473, 1.9656, 1.4960, + 1.4923, 1.4494, 1.4368, 1.5290, 1.6168, 1.4908, 1.4451, 1.5191, 1.5695, + 2.9963, 1.4412, 1.4245, 1.5307, 1.5404, 1.6068, 1.4556, 1.5329, 1.3620, + 1.5110, 1.4351, 1.4680, 1.4922, 1.4024, 1.4582, 1.3978, 1.5052, 1.5299, + 1.4814, 1.4928, 1.4387, 1.6266, 1.4509, 1.5028, 1.3828, 1.4767, 1.5010, + 1.4637, 1.5415, 1.5347, 1.4912, 1.5763, 1.4288, 1.5410, 1.5556, 1.5185, + 1.4852, 1.5436, 1.5161, 1.5171, 1.4735, 1.5772, 1.5198, 1.4617, 1.5643, + 1.5262, 1.5619, 1.4639, 1.4431, 1.4988, 1.4283, 1.5170, 1.5242, 1.5298, + 1.3956, 1.4130, 1.5891, 1.5797, 1.4557, 1.4643, 1.4404, 1.4528, 1.5400, + 1.4213, 1.4825, 1.4541, 1.5162, 1.4745, 1.5409, 1.4437, 1.5563, 1.5220, + 1.4978, 1.4322, 1.5253, 1.4922, 1.5099, 1.4052, 1.4925, 1.4202, 1.4800, + 1.5443, 1.4790, 1.5203, 1.5604, 1.5245, 1.5537, 1.4405, 1.4779, 1.5587, + 1.5019, 1.4787, 1.4604, 1.5525, 1.6145, 1.4254, 1.4455, 1.4977, 1.4991, + 1.4780, 1.5286, 1.5118, 1.4985, 1.4406, 1.4756, 1.4888, 1.5407, 1.4589, + 1.4604, 1.5004, 1.4316, 1.5032, 1.5052, 1.5273, 1.4469, 1.4871, 1.4798, + 1.5064, 1.4596, 1.4211, 1.5157, 1.4834, 1.4306, 1.4624, 1.5115, 1.5144, + 1.0832, 1.4433, 1.5137, 1.4210, 1.4692, 1.4682, 1.5019, 1.5391, 1.4631, + 1.5099, 1.5117, 1.4275, 1.4380, 1.4603, 1.5109, 1.4955, 1.4468, 1.4753, + 1.4881, 1.5287, 1.4950, 1.5418, 1.4729, 1.5359, 1.5092, 1.5260, 1.5131, + 1.5075, 1.4478, 1.5727, 1.5024, 1.4902, 1.4793, 1.5357, 1.4492, 1.5143, + 1.4284, 1.4360, 1.5019, 1.5772, 1.5235, 1.4726, 1.5132, 1.4744, 1.5066, + 1.4778, 1.5114, 1.5889, 1.5091, 1.5202, 1.4418, 1.4208, 1.4492, 1.5511, + 1.5025, 1.4133, 1.5739, 1.5198, 1.4711, 1.5452, 1.5289, 1.4598, 1.5236, + 1.4898, 1.4469, 1.4270, 1.5182, 1.4165, 1.5311, 1.4409, 1.4210, 1.5046, + 1.4362, 1.5257, 1.6063, 1.5472, 1.4609, 1.6293, 1.4389, 1.4431, 1.4479, + 1.4984, 1.4706, 1.4638, 1.4930, 1.4133, 1.4526, 1.4260, 1.4582, 1.4404, + 1.2730, 2.3238, 1.5373, 1.5353, 1.4663, 1.5103, 1.4978, 1.4814, 1.4722, + 1.4676, 1.5279, 1.5444, 1.4845, 1.4893, 1.5243, 1.4853, 1.6193, 1.5047, + 1.5249, 1.4747, 1.4290, 1.4990, 1.5122, 1.4788, 1.4516, 1.4248, 1.4096, + 1.5029, 1.4956, 1.1624, 1.4618, 1.5129, 1.5083, 1.4871, 1.5393, 1.5344, + 1.4575, 1.5062, 1.5657, 1.4710, 1.4620, 1.4261, 1.4575, 1.4434, 1.5555, + 1.4455, 1.4711, 1.5682, 1.4642, 1.4363, 1.4250, 1.5141, 1.5018, 1.4898, + 1.4853, 1.5390, 1.5320, 1.4198, 1.4610, 1.4487, 1.5688, 1.5402, 1.5826, + 1.5324, 1.5616, 1.5575, 1.4719, 1.4533, 1.5122, 1.2552, 1.4833, 1.4171, + 1.4476, 1.4818, 1.4313, 1.5298, 1.5112, 1.5557, 1.4849, 1.5036, 1.4919, + 1.5493, 1.5096, 1.4353, 1.4918, 1.5246, 1.4687, 1.5117, 1.4312, 1.4720, + 1.4250, 1.5285, 1.4419, 1.5280, 1.4315, 1.4631, 1.4872, 1.5459, 1.6067, + 1.4754, 1.4919, 1.5278, 1.5539, 1.4711, 1.4978, 1.4965, 1.4713, 1.4707, + 1.5351, 1.4653, 1.4548, 1.4723, 1.4779, 1.4827, 1.5278, 1.5125, 1.4794, + 1.5375, 1.5805, 1.5112, 1.5454, 1.4380, 1.5259, 1.4707, 1.4437, 1.5549, + 1.6015, 1.4510, 1.4803, 1.3817, 1.5958, 1.4488, 1.5055, 1.5146, 1.5294, + 1.5361, 1.4402, 1.4803, 1.6003, 1.5086, 1.5265, 1.4244, 1.4718, 1.5720, + 1.5039, 1.4664, 1.4566, 1.3609, 1.4783, 1.4991, 1.4960, 1.6116, 1.5236, + 1.4866, 1.5166, 1.5421, 1.4833, 1.4550, 1.5109, 1.4517, 1.4751, 1.4535, + 1.4929, 1.5054, 1.4567, 1.4902, 1.4949, 1.4979, 1.5163, 1.5112, 1.4693, + 1.5772, 1.5449, 1.4362, 1.4490, 1.6147, 1.4914, 1.6548, 1.4735, 1.4384, + 1.4582, 1.4245, 1.5034, 1.4928, 1.4265, 1.5251, 1.5082, 1.5644, 1.5962, + 1.4326, 1.5006, 1.5278, 1.6330, 1.7276, 1.5964, 1.5586, 1.4862, 1.5098, + 1.5259, 1.5411, 1.5315, 1.5077, 1.5071, 1.5442, 1.6036, 1.5586, 1.4586, + 1.5649, 1.4736, 1.4905, 1.4643, 1.5023, 1.5050, 1.4885, 1.4790, 1.5414, + 1.5510, 1.4828, 1.3514, 1.4503, 1.5604, 1.5021, 1.5154, 1.5488, 1.4103, + 1.4800, 1.4122, 1.6422, 1.5825, 1.4450, 1.3146, 1.5293, 1.5225, 1.5205, + 1.5006, 1.4796, 1.5416, 1.5103, 1.5915, 1.4845, 1.5031, 1.5782, 1.5030, + 1.4565, 1.4765, 1.4934, 1.5769, 1.4670, 1.5313, 1.5007, 1.4586, 1.5791, + 1.5857, 1.4441, 1.5946, 1.4943, 1.4492, 1.4897, 1.5223, 1.4263, 1.4230, + 1.5314, 1.5365, 1.6068, 1.3709, 1.4172, 1.4926, 0.3475, 1.5345, 1.4760, + 1.4426, 1.4324, 1.5224, 1.5465, 1.5081, 1.4960, 1.5665, 1.5345, 1.5173, + 1.5048, 1.4816, 1.5093, 1.5004, 1.5425, 1.5010, 1.5042, 1.4334, 1.5301, + 1.4441, 1.4909, 1.3882, 1.5067, 1.5217, 1.5300, 1.4790, 1.4483, 1.5049, + 1.4574, 1.4995, 1.4635, 1.4108, 1.4958, 1.4742, 1.4559, 1.4804, 1.5102, + 1.4960, 1.4830, 1.5441, 1.4797, 1.4928, 1.4054, 1.2281, 1.5226, 1.5581, + 1.5337, 1.4610, 1.5058, 1.5363, 1.5828, 1.4924, 1.5631, 1.4330, 1.4567, + 1.5843, 1.4818, 1.4862, 1.4260, 1.5339, 1.4564, 1.5012, 1.4777, 1.4922, + 1.4950, 1.5238, 1.5327, 1.5206, 1.4421, 1.5222, 1.5461, 1.5083, 1.5667, + 1.5199, 1.4212, 1.5084, 1.5419, 1.5771, 1.4018, 1.5208, 1.3893, 1.5437, + 1.5531, 1.4182, 1.4350])Parameter containing: +tensor([ 9.5040e-02, 6.2264e-02, -1.1090e-01, -1.8604e-02, 8.0698e-02, + 1.3616e-01, 6.9032e-02, 3.6952e-02, -1.3561e-01, -2.6607e-03, + 5.8487e-02, -3.3613e-02, -8.9103e-02, -1.5545e-02, -9.2796e-02, + -5.0928e-02, -1.2088e-02, -2.7953e-02, -1.5458e-02, 4.8689e+00, + -9.4001e-02, 4.3498e-02, -1.5978e-02, 2.9513e-03, -5.3258e-02, + 1.4907e-02, -1.7444e-02, 5.4565e-03, 5.2729e-02, -8.5186e-02, + 4.1780e-02, 2.2448e-02, 2.6540e-02, 7.7438e-02, 5.7399e-02, + -3.7843e-02, 3.1810e-02, -3.5782e-02, -1.3307e-01, 4.2217e-02, + 4.0042e-02, 3.2673e-02, -1.7885e-03, 1.6705e-02, -4.0591e-02, + 3.4436e-03, 6.0339e-02, -2.1147e-02, 2.8313e-02, 5.5186e-02, + 1.9493e-02, -1.3385e-02, -7.6236e-03, 2.4183e-02, 9.9285e-02, + 8.9684e-02, -1.0770e-01, -3.9934e-02, 3.3857e-02, 1.4410e-02, + 6.5640e-02, -8.2179e-02, 4.2060e-02, 3.6734e-02, 9.0417e-02, + -3.1195e-02, 7.3319e-02, 9.6951e-02, 1.6255e-02, 2.2244e-01, + 4.1289e-02, 5.4411e-02, 4.8112e-01, 8.8067e-02, -1.0920e-01, + 9.3275e-02, 2.4059e-02, -1.5016e-01, -8.2098e-02, 1.9829e-02, + -2.0126e-02, -5.1567e-02, -2.6316e-02, 1.0021e-01, 4.6702e-02, + 5.0399e-02, 1.4458e-01, 1.1531e-01, -2.2371e-02, 2.3077e-02, + 2.5645e-03, 1.2902e-02, 2.5156e-02, -5.4277e-02, -2.7611e-02, + 6.6039e-02, -7.8203e-02, -2.1121e-02, -6.1290e-02, -5.3134e-02, + -3.1687e-03, -7.9709e-02, 8.7348e-03, -6.4941e-02, 4.9401e-02, + -8.7840e-03, -5.3413e-03, -8.4726e-02, 2.7666e-02, -3.4226e-02, + -6.2891e-02, -3.7197e-02, 3.3137e-02, 8.0032e-02, 1.5725e-02, + -4.9238e-02, -3.9217e-02, -7.3844e-02, -2.2711e-02, 1.0376e-01, + -2.0982e-02, -1.8312e-01, -7.5614e-02, -4.5955e-02, -1.4845e-01, + 3.5872e-04, -1.3807e-01, -6.7375e-03, 5.1624e-02, -4.4845e-02, + -1.1063e-01, 7.7368e-02, -4.5386e-02, -3.4162e-02, -6.7513e-02, + 5.0009e-02, -3.4471e-02, 1.9803e-02, -2.7813e-02, -2.6261e-02, + 2.8243e-02, -5.6916e-02, 7.3801e-02, 3.9814e-02, 6.2373e-02, + -1.2426e-02, -2.7946e-02, -1.9696e-02, 1.6410e-02, 1.0248e-01, + 1.8027e-01, -6.1605e-02, -9.9918e-02, -5.3417e-03, -3.2127e-02, + -2.8592e-01, -2.2483e-02, -7.7138e-02, 4.1591e-02, -7.1350e-04, + 1.4369e-01, -1.2360e-01, -2.4565e-02, 6.2265e-02, 1.0753e-02, + 4.7586e-03, -2.7731e-02, 4.4703e-02, -3.8944e-02, 2.0401e-02, + -5.4677e-02, 6.2356e-02, 3.8168e-02, 5.0544e-02, 6.1430e-02, + -8.3724e-02, 1.9396e-02, 6.3526e-02, -2.8940e-02, 2.4832e-02, + 7.3654e-03, -7.3348e-02, 5.7481e-02, -3.2183e-02, 1.5765e-02, + -1.6311e-01, 3.8052e-02, 3.4197e-04, -3.0377e-02, -7.3608e-02, + 3.8378e-02, -2.0785e-02, 9.1732e-02, 6.0364e-02, -1.0010e-01, + 3.6494e-02, -5.0895e-02, -1.4789e-01, 8.7741e-02, 2.5649e-02, + -1.3797e-01, -3.8446e-02, -3.6134e-02, 1.7410e-02, 4.8862e-02, + 1.1463e-01, -9.7149e-02, 6.3472e-02, 1.2722e-01, 2.0616e-02, + 1.0777e-02, 1.9428e-01, -2.0818e-02, -5.5920e-02, -9.7729e-02, + -1.9469e-02, -2.6490e-02, 3.9081e-02, -1.1816e-01, -2.0222e-01, + -1.7587e-02, -3.8713e-02, 8.3494e-02, -2.2572e-02, 3.5223e-02, + -3.0260e-02, 9.7330e-02, -1.5827e-02, 1.2929e-01, -2.1608e-02, + -1.2937e-02, 2.7590e-02, -1.8385e-02, 6.9238e-02, 1.2346e-02, + 2.1508e-02, -8.5435e-02, 3.3443e-02, 7.3312e-02, 1.8266e-02, + 8.2059e-02, 8.2763e-02, 4.0149e-02, 3.6304e-02, 3.5831e-02, + 6.1176e-02, 7.7396e-02, -4.7230e-02, 3.2182e-02, -3.1280e-03, + 4.4441e-02, 4.4095e-02, 5.9928e-02, -7.2963e-02, 7.7081e-02, + -7.7831e-02, -6.3993e-02, -7.7120e-02, 1.9159e-02, 3.0940e-02, + -4.3952e-02, -6.4650e-02, -3.4125e-02, -5.8668e-02, -2.7959e-02, + 4.8970e-02, -5.8047e-02, -8.7014e-03, -7.8959e-02, -2.2804e-02, + -3.4480e-02, 9.5613e-03, 7.5918e-02, -6.5928e-02, 1.3879e-02, + 4.7323e-02, -1.1317e-01, -2.2184e-03, 6.7904e-02, -1.0016e-01, + 1.8474e-01, 1.4786e-01, 7.0485e-03, 2.2563e-02, 5.1969e-02, + 4.1149e-02, 8.6695e-02, -6.1285e-02, 1.0310e-01, -1.0924e-01, + -2.6213e-02, -1.9226e-02, 1.0561e-01, 4.9150e-02, 6.3478e-02, + 1.8294e-02, 1.6091e-01, 4.7983e-02, 4.8363e-02, 7.2620e-02, + 5.2564e-02, 3.9703e-03, -4.5733e-02, 1.0324e-01, -9.4249e-03, + 1.3086e-02, 1.1960e-01, -8.4199e-02, 6.5710e-02, -5.5719e-02, + 3.2044e-02, -4.8804e-03, -7.7953e-02, -7.9623e-02, -8.9439e-03, + -7.2873e-02, 7.2873e-04, 1.8083e-02, 1.4896e-01, -4.6932e-02, + -5.7677e-02, 2.1030e-01, -8.1739e-02, -4.1775e-02, 1.2972e-01, + -1.9701e-01, 4.4433e-02, 1.9681e-02, 3.2872e-02, -1.5593e-01, + 1.5408e-01, -2.0397e-02, 1.2069e-01, -3.3059e-02, 2.9468e-02, + 1.2204e-01, -3.2076e-03, -5.6352e-02, 9.4064e-03, -3.4305e-02, + -1.5335e-01, 2.1700e-02, -2.2277e-02, -6.3310e-02, 9.7901e-02, + 8.1202e-03, 8.5831e-02, 9.2371e-02, 5.5351e-02, -2.6530e-02, + -6.1205e-02, 3.0950e-02, -2.3011e-02, 4.4549e-02, -2.0652e-02, + -5.5406e-02, -3.3195e-02, 5.0550e-02, -1.2650e-02, -9.6969e-02, + 1.5714e-02, 1.8129e-02, 9.1402e-03, -9.1455e-02, -4.2209e-02, + -5.1691e-03, 3.0798e-02, 8.4623e-02, 4.9172e-02, 2.8383e-02, + 9.3418e-02, -2.3945e-02, -7.1348e-02, 2.9395e-03, -3.3130e-02, + 1.6025e-02, 7.9770e-02, 7.0224e-02, -2.8055e-02, 2.4734e-02, + -9.9713e-02, 9.2963e-02, 1.6934e-02, -4.6360e-03, -2.3557e-02, + -4.9357e-03, -2.3687e-02, -7.7652e-02, -8.3567e-02, -1.0004e-01, + 3.3868e-02, -5.1590e-04, -8.0284e-02, -1.6152e-02, 4.4573e-02, + -3.8656e-03, 1.1956e-02, 5.6796e-02, 2.0969e-02, 7.8286e-02, + 1.1165e-01, -6.3958e-02, 7.6898e-02, 4.6967e-02, -1.0099e-01, + -1.3392e-01, -5.2005e-02, 3.4906e-02, 1.6815e-02, -3.4399e-02, + -7.8110e-02, 5.6494e-02, 6.3118e-02, -5.4425e-02, -1.3512e-02, + -1.3473e+00, -3.9723e-02, 2.4948e-02, -5.1947e-02, -7.7290e-03, + -1.5215e-02, 2.3884e-02, -1.6945e-02, -8.0580e-02, -5.4007e-02, + 2.1998e-02, -1.8827e-01, -8.7725e-02, 5.0141e-02, -6.8379e-02, + -2.7706e-02, -1.7930e-02, -3.4555e-02, 6.4826e-02, 9.8114e-02, + -1.2598e-02, 3.6801e-02, 3.4150e-02, 1.1018e-01, -7.9386e-02, + -4.1934e-02, -8.4118e-02, -1.8066e-02, 1.6404e-01, 1.1735e-01, + -1.3154e-01, -1.0468e-02, -7.2428e-02, 1.0006e-01, 2.2076e-02, + 3.2061e-02, 2.0222e-02, -1.0035e-01, 5.2576e-02, 2.7860e-02, + -2.0873e-02, 3.7349e-02, 2.0231e-02, -4.2276e-02, -7.7922e-02, + 3.1693e-02, 3.0677e-02, -9.3555e-02, 3.2186e-02, 4.0509e-02, + 1.8245e-02, 4.8887e-02, -7.9230e-02, 7.1811e-02, 2.9829e-02, + 1.9192e-02, -1.1067e-01, -8.1617e-02, 2.2437e-02, -2.6069e-02, + 4.3656e-02, 1.6310e-01, 9.6680e-02, 1.0566e-02, 9.1838e-02, + 1.1688e-01, -1.2390e-01, 3.2972e-02, 1.2624e-01, -9.4054e-02, + -4.5634e-03, -1.5043e-02, 1.2885e-01, 9.9710e-02, 3.7776e-03, + 4.5239e-02, -9.0217e-03, 4.9700e-03, -2.4734e-02, -1.2538e-01, + 4.6608e-02, -8.2679e-02, 9.4270e-02, -7.2957e-02, 5.8114e-03, + 5.3164e-04, -4.3835e-02, 3.8315e-02, -4.7578e-02, -8.0964e-02, + 3.8714e-02, 1.0235e-01, 7.0860e-02, 3.7793e-02, 2.9038e-02, + -7.7706e-02, -1.6005e-02, -4.0638e-02, 7.4940e-02, -9.7327e-02, + 1.2264e-03, 1.6966e-02, 1.9095e-01, 1.3447e-01, -1.0382e-01, + -6.7178e-02, -1.4883e-02, 6.4356e-02, -3.1611e-02, -3.0012e-02, + -2.2480e-02, -1.9876e-02, -2.2346e-02, 7.6023e-03, -7.9597e-02, + 2.8261e-02, 5.0167e-02, -1.8636e-02, -1.0160e-01, -3.2626e-02, + 3.7352e-02, -1.4968e-02, 2.9950e-02, -5.7603e-02, -7.1363e-02, + -3.4022e-02, 3.4849e-02, 3.3342e-02, 3.8673e-03, -1.7286e-02, + -4.4154e-02, 6.3981e-02, 2.6404e-02, 7.4898e-02, 3.6818e-03, + 3.2078e-02, 1.3596e-02, -1.0983e-01, 4.5060e-02, -3.5186e-03, + -5.2234e-03, -1.4830e-02, -4.7432e-02, -1.6053e-03, 1.2008e-01, + -6.6264e-02, 1.1926e-01, 2.6320e-02, 2.2969e-02, 1.5679e-01, + 3.5541e-02, -2.1966e-02, 3.5414e-02, 1.3858e-02, 2.8476e-02, + 7.5914e-02, -1.7632e-02, -1.0996e-01, -2.1218e-02, 4.4041e-02, + -1.6149e-02, -7.5358e-03, 6.0889e-02, 4.7087e-02, -1.8710e-02, + -1.2684e-01, -2.2078e-02, 2.9795e-02, 1.0746e-01, 8.8426e-02, + 2.8203e-02, 4.2237e-02, -5.8773e-02, 4.3712e-02, -5.0027e-03, + 1.5752e-02, 6.0129e-02, 1.2071e-01, -2.2050e-02, 1.1836e-02, + -5.5813e-02, 4.6402e-02, 9.4760e-03, -7.1275e-03, 5.8639e-02, + -9.9549e-02, -1.0721e-01, 5.5813e-04, -1.2840e-04, 2.6538e-02, + -8.8939e-02, 7.2796e-02, -8.9444e-02, 1.8669e-02, -2.8006e-02, + 7.6746e-02, 3.4517e-02, -2.7705e-02, -2.0928e-02, -6.2216e-02, + -1.2193e-01, 4.9761e-02, 6.8638e-02, -7.1114e-03, -9.1348e-02, + 5.0051e-02, -4.5998e-04, 4.2629e-02, 3.8347e-02, 5.8768e-03, + -3.8492e-02, -8.8272e-04, 3.2111e-02, 9.0239e-02, -3.9676e-02, + 1.2868e-01, -2.6537e-02, 1.0337e-01, 1.7039e-01, 3.4185e-02, + -5.4864e-03, -6.1429e-02, 4.7363e-02, 5.8705e-02, -4.7458e-02, + 3.6429e-02, -2.7022e-02, 1.0612e-01, -5.0205e-02, 3.3695e-02, + 2.0430e-02, 3.3421e-02, 9.1960e-02, 5.0020e-02, 1.0917e-01, + 1.3181e-01, 2.9231e-02, -6.8652e-02, 7.7897e-02, -9.2801e-02, + 4.2380e-02, -1.3279e-01, -2.2904e-02, 5.7570e-02, 8.7368e-02, + -4.0879e-02, 6.1447e-03, -1.1034e-01, -1.8915e-02, 1.1407e-01, + 3.7383e-02, -1.3176e-01, -8.9242e-02, 5.5264e-02, 9.7983e-03, + -2.2093e-01, 1.1690e-02, 2.6277e-02, -2.1914e-04, -2.7651e-02, + 9.1245e-03, -1.7339e-02, -4.2814e-02, -5.7284e-03, 7.8398e-02, + 5.2745e-03, -4.2589e+00, -6.7703e-04, 3.3652e-02, -2.4619e-02, + -9.9009e-02, -3.9888e-02, -4.6650e-02, -4.1620e-02, 3.5950e-02, + -8.1256e-02, -2.8324e-02, 4.3199e-02, -9.3154e-03, -3.0566e-02, + 5.8488e-02, 3.1941e-02, -9.1054e-02, 4.4687e-02, -7.7019e-02, + 7.2983e-02, 4.5990e-02, -4.4156e-02, 1.3127e-02, -1.6789e-01, + -2.4004e-02, 7.0575e-02, 5.2642e-02, 3.7899e-03, 4.3697e-02, + -2.6423e-02, 8.3382e-02, 5.5484e-02, 3.6222e-02, 3.1565e-02, + 6.2682e-03, 1.0727e-03, -1.8432e-02, -7.0222e-02, -4.8251e-04, + 2.5970e-02, 2.0257e-02, 5.5975e-02, -5.8172e-02, 6.7692e-02, + -9.9726e-03, -1.9012e-01, 1.2354e-01, 2.4521e-02, 1.1634e-01, + 1.0389e-01, 3.1179e-02, 7.4662e-02, 6.8046e-02, -2.9729e-02, + 2.2585e-03, 1.1500e-02, 1.7597e-03, -1.8727e-02, -4.7353e-02, + 1.0485e-01, -1.6798e-01, -6.5276e-02, 7.2302e-02, -4.9853e-02, + -3.5061e-02, -1.2070e-01, -5.1736e-02, 7.9296e-03, -5.0152e-02, + -3.6957e-02, -5.1987e-03, -5.0143e-02, -5.8339e-02, 7.5306e-02, + -2.9533e-02, 7.2287e-02, 9.0682e-02, 2.2872e-02, -5.3548e-02, + 2.0132e-02, 6.3997e-02, 6.5266e-02, -6.9182e-02, -1.0422e-01, + -7.9938e-02, 1.0629e-01, -3.4558e-02])Parameter containing: +tensor([[-2.1896e-02, 2.0416e-02, 2.3441e-03, ..., 1.0908e-04, + 6.9656e-03, 1.4427e-02], + [ 3.4351e-03, -6.1264e-03, 2.0737e-02, ..., -1.4359e-02, + -2.4033e-02, -1.4053e-02], + [ 1.8631e-02, -2.3880e-02, -5.8861e-03, ..., 5.5122e-03, + -3.2663e-04, -2.0580e-03], + ..., + [-7.4425e-03, -9.9869e-03, 1.6281e-02, ..., 2.2583e-02, + 1.5378e-05, 1.3184e-02], + [ 7.1678e-03, -2.3453e-02, 1.8890e-02, ..., 1.1101e-02, + 3.5305e-03, 3.5629e-03], + [-1.4053e-02, -1.7029e-02, -9.1400e-03, ..., -1.0704e-02, + 1.3428e-02, 3.3951e-04]])Parameter containing: +tensor([-0.3743, -0.4089, -0.3171, ..., -0.2976, -0.0021, -0.3103])Parameter containing: +tensor([[-0.0040, 0.0537, 0.0006, ..., -0.0110, 0.0033, 0.0026], + [ 0.0102, -0.0116, -0.0276, ..., -0.0249, -0.0013, 0.0136], + [-0.0247, 0.0174, -0.0146, ..., -0.0083, -0.0184, -0.0121], + ..., + [-0.0018, -0.0283, 0.0097, ..., 0.0254, -0.0131, 0.0048], + [ 0.0004, 0.0063, -0.0341, ..., -0.0153, 0.0024, 0.0111], + [-0.0007, 0.0055, -0.0035, ..., -0.0027, -0.0048, -0.0002]])Parameter containing: +tensor([ 5.4077e-02, 1.3013e-01, -5.7983e-02, 1.5717e-02, 3.2715e-02, + 9.5520e-02, 1.2794e-02, -6.7444e-02, -1.2537e-01, 6.1417e-03, + -2.1545e-02, -5.6244e-02, -1.3098e-01, -8.5876e-02, -1.1154e-02, + -6.3354e-02, -1.7914e-02, -1.3092e-02, -3.7479e-03, -9.3323e-02, + -6.9122e-03, 1.2047e-02, 4.1595e-02, -1.2341e-01, 5.6030e-02, + 1.6724e-02, 6.0059e-02, 8.7769e-02, -5.6396e-02, 4.5135e-02, + 4.3274e-02, 9.0881e-02, -3.3398e-03, 6.0730e-02, 5.9242e-03, + -5.7678e-03, -8.1558e-03, -3.1082e-02, 5.3253e-02, 7.5500e-02, + 3.5614e-02, 2.6764e-02, 2.6077e-02, 2.8214e-02, -2.6535e-02, + 7.6233e-02, 4.6722e-02, 8.8577e-03, 2.0466e-03, 3.2928e-02, + -1.6464e-02, -3.5736e-02, 1.1829e-01, 2.7435e-02, 8.1360e-02, + 1.0065e-01, -2.0920e-02, -6.6109e-03, 3.3813e-02, -1.5533e-02, + 4.5837e-02, -7.1869e-03, 7.8430e-03, -3.3783e-02, 6.3721e-02, + 7.6782e-02, 4.5715e-02, -4.1016e-02, 4.4861e-02, 1.8143e-02, + -4.2053e-02, -3.8757e-02, 3.9459e-02, 4.6356e-02, -7.9651e-02, + -2.8778e-02, 1.2016e-03, 2.1317e-02, -1.1902e-01, -4.4128e-02, + -1.7929e-02, 3.0701e-02, 2.0523e-03, 3.7689e-02, 5.8594e-03, + -4.1199e-02, 4.6661e-02, 6.6071e-03, -5.0049e-02, 8.2016e-03, + -3.7476e-02, 6.2943e-03, -7.1754e-03, -8.7402e-02, -9.7504e-03, + 5.4092e-03, 1.8677e-02, -2.2797e-02, -1.3171e-01, -5.3009e-02, + -7.5150e-03, -5.6610e-02, -5.5656e-03, 4.2267e-03, -1.4664e-02, + -2.2949e-02, 1.0779e-01, 5.8380e-02, 3.7518e-03, 1.9089e-02, + -1.2642e-02, 4.3121e-02, 6.2134e-02, 9.3506e-02, 1.0779e-01, + -2.4292e-02, -4.2381e-03, -6.3324e-04, 8.0627e-02, 7.5951e-03, + 2.9419e-02, -1.2256e-01, -1.1737e-01, -3.6621e-02, 4.0970e-03, + 3.1769e-02, -3.0045e-02, 2.9526e-02, 5.0812e-02, -1.9028e-02, + 1.6342e-02, -1.6373e-02, -3.4607e-02, -5.0140e-02, 5.9967e-02, + 1.6113e-01, 2.9709e-02, 3.1261e-03, -4.0771e-02, -4.9377e-02, + -9.1324e-03, 1.8524e-02, -4.6753e-02, 7.7759e-02, -2.4277e-02, + 1.5205e-02, -5.0873e-02, 3.6407e-02, 1.1772e-02, 6.4819e-02, + 6.5430e-02, -7.9773e-02, 3.2196e-02, 7.5806e-02, -2.5482e-02, + -7.5073e-03, -7.5684e-02, 4.1138e-02, 2.2705e-02, -3.2776e-02, + 3.8452e-02, 6.3801e-04, 6.2866e-03, -1.5839e-02, -3.5675e-02, + 1.3695e-02, -4.1656e-02, 8.6441e-03, -6.6895e-02, 2.8839e-02, + 1.4000e-03, 2.5787e-02, -1.7883e-02, 1.1032e-02, -1.0094e-02, + -2.7054e-02, 1.2245e-02, -2.2964e-02, -3.4332e-02, 5.9326e-02, + 9.2239e-03, -1.3293e-01, -7.0312e-02, -3.7109e-02, 1.0480e-01, + -2.2598e-02, -1.0231e-02, 3.6597e-01, -3.4851e-02, 4.1275e-03, + 5.7281e-02, -8.4778e-02, 2.5238e-02, 1.2749e-02, -2.2781e-02, + 4.7028e-02, 7.9575e-03, -1.0938e-01, 1.2695e-01, 2.5497e-02, + -7.2571e-02, 6.2744e-02, 4.2053e-02, 9.9335e-03, 3.8574e-02, + 4.9225e-02, 5.7495e-02, 3.7506e-02, -3.7750e-02, -9.9609e-02, + 5.7495e-02, 1.9272e-02, 2.8351e-02, 3.5667e-03, 3.9697e-04, + 2.4910e-03, 9.3384e-03, 5.7556e-02, -8.8989e-02, -5.3741e-02, + -1.5190e-02, -3.2867e-02, 3.6682e-02, 9.9060e-02, 2.8717e-02, + -1.0742e-01, -4.7791e-02, -2.6199e-02, 5.9601e-02, 6.9618e-03, + 2.2018e-02, 1.8509e-02, -1.4549e-02, -1.3170e-03, -3.5217e-02, + -5.4840e-02, -4.6570e-02, 2.1164e-02, 6.7932e-02, 3.7109e-02, + 3.8696e-02, -2.5635e-02, 9.4543e-02, -2.4261e-02, 1.5930e-02, + 4.0863e-02, 4.0527e-02, -1.0506e-02, -5.0842e-02, 7.0114e-03, + 2.9755e-02, 2.1561e-02, 8.7524e-02, -1.0522e-01, 1.0016e-01, + 5.6305e-02, -4.6234e-02, -1.5442e-02, -2.8503e-02, -1.6332e-04, + -5.5054e-02, -2.4738e-03, -4.9896e-02, -1.0181e-01, -1.2988e-01, + -3.0304e-02, 1.6689e-03, 1.8372e-02, -3.8300e-02, -1.8911e-03, + -8.8654e-03, 8.3740e-02, 5.7800e-02, -1.5884e-02, -7.7576e-02, + 5.2673e-02, 3.2654e-02, 9.7046e-03, 4.5502e-02, -5.1422e-02, + 1.2772e-02, 4.8584e-02, -4.2969e-02, 1.9821e-02, -2.5436e-02, + -2.6970e-03, -9.7885e-03, -2.3636e-02, 3.4607e-02, -6.4636e-02, + 6.8481e-02, -3.0792e-02, 3.5858e-02, -6.0699e-02, 3.0121e-02, + 4.3793e-02, 5.9090e-03, 6.3232e-02, 9.8190e-03, 5.9937e-02, + -4.1847e-03, 6.5125e-02, -3.9856e-02, 2.0767e-02, -6.0730e-02, + -3.4851e-02, 7.5874e-03, -7.2937e-02, -4.6158e-03, -1.9928e-02, + 9.6497e-02, 7.0251e-02, 3.7750e-02, -3.4973e-02, 5.8075e-02, + -3.2928e-02, -6.7566e-02, -2.8946e-02, 8.9600e-02, 7.0740e-02, + -1.3000e-01, 3.0487e-02, -4.3457e-02, -2.4277e-02, 3.4985e-01, + -1.3965e-01, -6.5369e-02, -9.9548e-02, 3.9764e-02, -1.6895e-01, + 8.6853e-02, -6.1554e-02, 6.6452e-03, -5.3589e-02, -3.1342e-02, + 4.0321e-03, -5.3223e-02, 5.7159e-02, 2.5040e-02, -4.1748e-02, + -4.8035e-02, -1.7853e-02, -1.1917e-02, 1.6266e-02, 1.2947e-02, + -2.1835e-02, 8.0872e-03, 4.2450e-02, -1.1212e-01, -7.9285e-02, + 7.2083e-02, 3.1769e-02, 3.6835e-02, -1.0254e-02, -5.7716e-03, + -5.8929e-02, -4.0039e-02, -2.1820e-02, -2.5192e-02, 7.8430e-03, + -1.0059e-01, -6.3782e-02, -8.7280e-02, 1.7120e-02, 4.0039e-02, + -1.1322e-02, -1.0315e-01, 1.0278e-01, -7.9468e-02, 4.2999e-02, + -1.4030e-02, -6.9885e-02, -2.0172e-02, -6.4087e-02, -2.1561e-02, + 2.7435e-02, 2.4155e-02, 3.0685e-02, 4.2053e-02, -4.9347e-02, + -6.3416e-02, 3.6438e-02, -6.3232e-02, -8.7097e-02, -2.8366e-02, + -2.8931e-02, -7.0435e-02, -3.1204e-02, 3.8055e-02, -3.4760e-02, + -3.0624e-02, 7.2327e-02, 7.9041e-02, 5.4359e-03, -1.2573e-02, + -8.9905e-02, -2.4567e-02, 2.7084e-02, -2.0187e-02, 4.4556e-03, + 7.3730e-02, 6.7825e-03, 2.8248e-03, 6.6986e-03, 7.2746e-03, + -2.2919e-02, -9.0182e-05, -8.2169e-03, -5.8716e-02, 1.3069e-02, + -1.4679e-02, 4.9530e-02, 2.9053e-02, -4.5685e-02, 3.2463e-03, + -1.0907e-01, -3.2288e-02, -1.1620e-02, -3.1311e-02, 8.2031e-02, + 1.0834e-02, -3.6526e-03, 2.4414e-02, -1.3501e-01, 9.6207e-03, + 1.9760e-02, 1.4168e-02, -4.5715e-02, 8.3679e-02, 1.8021e-02, + 2.8198e-02, 2.4445e-02, 5.9471e-03, 9.4849e-02, 1.0663e-01, + 1.8204e-02, -5.3467e-02, 4.1718e-02, 7.1960e-02, -1.2573e-01, + 6.1646e-02, -2.7527e-02, -5.3162e-02, 2.3518e-03, -3.5828e-02, + -6.7505e-02, -8.8787e-04, 9.1410e-04, 3.1921e-02, 3.0121e-02, + -3.2654e-02, -6.5002e-02, -6.7810e-02, 1.0950e-01, 3.9642e-02, + 6.2180e-03, -2.6733e-02, 2.5520e-03, -1.6983e-02, -8.4595e-02, + -1.6708e-02, 5.6396e-02, -7.7637e-02, 1.9348e-02, -3.6469e-02, + -7.5317e-02, 1.0544e-02, -4.8218e-02, 4.8027e-03, -6.1371e-02, + -5.4138e-02, -4.2664e-02, -7.4280e-02, -5.8807e-02, -5.2368e-02, + 2.7435e-02, 1.2366e-01, 3.3997e-02, 5.6122e-02, 2.0584e-02, + 8.3618e-02, -1.7443e-03, -3.7270e-03, -1.6403e-02, -2.6951e-03, + -1.7853e-02, -5.8105e-02, 9.6252e-02, 2.5726e-02, -6.2744e-02, + 5.7373e-03, 5.4840e-02, -1.0388e-01, 5.1575e-02, -4.5685e-02, + 2.1408e-02, -3.2715e-02, 6.8665e-02, 1.9913e-02, 3.2898e-02, + 3.3325e-02, -2.5101e-02, 1.5900e-02, -2.3422e-02, 4.2328e-02, + -3.1830e-02, 8.2031e-02, 6.8665e-02, -2.9129e-02, -8.8684e-02, + -4.8981e-02, -5.0171e-02, 1.8295e-02, 4.5891e-03, 4.2511e-02, + -2.2690e-02, 3.3752e-02, 8.6914e-02, 8.7463e-02, -9.0942e-02, + 3.5004e-02, 1.8845e-02, 3.2898e-02, -5.1819e-02, -4.3121e-02, + 1.3115e-02, 2.9495e-02, 1.5961e-02, -9.1095e-03, -4.0741e-02, + 6.4758e-02, -1.4880e-01, 4.6204e-02, -2.0798e-02, 1.3634e-02, + 4.1473e-02, -2.6871e-02, 1.2810e-02, -5.9021e-02, -4.3793e-02, + -1.5961e-02, 1.0028e-01, -2.3091e-04, 8.6670e-03, 1.3039e-02, + 1.0872e-02, 1.1694e-01, -1.8494e-02, 1.4246e-04, -3.5439e-03, + -9.9792e-02, -5.7251e-02, -3.1311e-02, 7.8857e-02, -1.2369e-03, + -4.5319e-02, -8.3740e-02, -1.8402e-02, -4.1321e-02, 6.1646e-02, + 1.0004e-03, -8.3237e-03, -8.2321e-03, 1.5274e-02, -6.9031e-02, + 1.0010e-02, 7.0992e-03, -7.8735e-02, -4.0985e-02, -1.7624e-02, + 5.8502e-02, 5.5878e-02, -3.2593e-02, 1.1520e-02, 2.4986e-03, + 8.0017e-02, -3.4580e-03, -3.2471e-02, -1.1490e-02, -1.7075e-02, + -7.3914e-02, 4.4220e-02, 1.0443e-01, -1.5915e-02, -1.0651e-02, + -1.4099e-02, -4.8737e-02, 4.7363e-02, 6.4575e-02, 1.1938e-01, + 1.6205e-02, 4.7150e-02, 2.2629e-02, 2.3514e-02, -5.4436e-03, + -3.2735e-04, -1.6495e-02, -7.3853e-02, 3.0045e-02, 5.2551e-02, + -5.2246e-02, -1.0040e-01, 2.6993e-02, -5.7159e-02, 3.2616e-03, + 3.1372e-02, 6.9275e-02, -7.4280e-02, 4.3976e-02, -3.7323e-02, + -5.0018e-02, 6.4575e-02, -1.2062e-02, 3.6926e-02, 3.2349e-02, + -3.4088e-02, 6.3293e-02, 1.2466e-02, -5.5664e-02, -2.7481e-02, + -4.1656e-03, -9.3412e-04, 4.5753e-04, -2.2842e-02, 2.7985e-02, + -9.8724e-03, 5.6267e-04, 2.1423e-02, 4.3091e-02, -1.7273e-02, + 1.1279e-01, -9.0759e-02, 5.3864e-03, 5.6061e-02, 5.5939e-02, + -2.3941e-02, -3.7170e-02, 5.0903e-02, 1.8997e-02, -4.5532e-02, + 1.2476e-01, 3.7060e-03, -1.0094e-02, -7.6111e-02, -3.5706e-02, + -4.8462e-02, -3.5919e-02, 5.0659e-02, 2.3468e-02, 9.0698e-02, + 3.3264e-02, 1.0193e-01, 6.3599e-02, -1.6205e-02, 7.1602e-03, + 4.4983e-02, -1.2030e-01, 3.1219e-02, -4.3671e-02, 5.3314e-02, + -5.5122e-03, 5.2277e-02, 1.9272e-02, 1.6037e-02, 7.8430e-02, + 2.0462e-02, -1.3867e-01, -7.6477e-02, 8.5632e-02, 1.6220e-02, + 7.0679e-02, 2.4048e-02, -4.8584e-02, 1.1574e-02, 6.0913e-02, + -5.9174e-02, 4.8065e-03, -3.1204e-02, 9.5139e-03, 1.3557e-02, + -7.6050e-02, 2.8015e-02, -5.7678e-02, 1.2917e-02, -2.8717e-02, + 2.3331e-02, -8.3801e-02, -4.6539e-02, -6.9847e-03, -1.4511e-02, + 6.0844e-03, -4.9347e-02, 2.1454e-02, 8.4381e-03, -6.3965e-02, + 9.4482e-02, -5.5725e-02, 2.3880e-02, -1.3222e-02, 2.0313e-03, + 3.6682e-02, 3.2166e-02, 9.2697e-03, -8.5693e-02, 8.2626e-03, + -3.2410e-02, -6.1340e-02, 4.4312e-02, -7.8308e-02, 6.9031e-02, + 2.3438e-02, -1.8707e-02, 3.7842e-02, 1.0576e-03, 1.6190e-02, + -1.0370e-01, -6.0181e-02, 6.3232e-02, 3.1796e-03, -1.2854e-01, + 1.2077e-02, -8.4839e-02, -5.0385e-02, -3.2532e-02, -4.2191e-03, + -1.1971e-02, -5.3650e-02, -1.0857e-02, 4.0588e-02, 4.0741e-02, + 6.7940e-03, -1.4281e-04, 6.3416e-02, 1.0284e-01, 5.7800e-02, + 4.9408e-02, 3.4637e-02, -3.8300e-02, 6.1218e-02, -3.0823e-02, + 8.1940e-03, -8.8730e-03, -4.8096e-02, -8.6975e-02, 8.5449e-03, + -2.7332e-03, -1.3794e-01, 5.2872e-03, -1.0187e-01, 4.6387e-02, + -1.6403e-02, 4.8035e-02, -6.8787e-02, 2.8000e-02, 3.2806e-02, + 6.9336e-02, 1.6815e-02, -2.1332e-02, 1.0016e-01, 5.1849e-02, + -3.4760e-02, 1.0941e-02, -3.6896e-02, 5.2338e-02, 1.2726e-02, + -2.6688e-02, 3.6530e-02, 9.5215e-02])Parameter containing: +tensor([2.1306, 2.0981, 2.1663, 2.0829, 1.7951, 2.1608, 2.2010, 2.0909, 2.2347, + 2.1553, 2.0846, 2.2494, 2.2396, 2.1285, 2.1372, 2.0650, 2.1098, 2.1325, + 2.2690, 1.6700, 2.2613, 2.1879, 2.1590, 2.0855, 2.1393, 2.1793, 2.0697, + 2.0706, 2.0371, 2.0495, 2.1932, 2.1432, 2.1056, 2.1178, 2.0806, 2.0542, + 2.3682, 2.0192, 2.0318, 2.2403, 2.0286, 2.0738, 2.1933, 2.1454, 2.0341, + 2.0834, 2.1870, 1.9817, 2.0910, 2.0121, 2.2396, 2.1091, 2.0853, 2.0440, + 2.2116, 2.0897, 2.0882, 1.9730, 2.0929, 2.1411, 2.1363, 2.1475, 2.0930, + 2.2281, 2.0509, 2.1295, 2.2263, 1.9539, 2.2303, 1.9053, 2.2407, 2.2046, + 2.2446, 2.3049, 2.2940, 2.2953, 2.1716, 2.0946, 2.2047, 1.9443, 2.0475, + 2.0535, 2.1913, 2.1422, 2.2416, 2.0748, 2.1369, 2.0475, 2.1431, 2.0515, + 2.2646, 2.1117, 2.0603, 2.0523, 2.0881, 2.1070, 2.1603, 2.0667, 2.2130, + 2.0835, 2.1439, 2.0512, 2.1125, 2.1444, 2.0461, 1.9122, 2.1266, 2.1854, + 2.0578, 2.0436, 2.0820, 2.3937, 2.1196, 2.2232, 2.2387, 2.0481, 2.1932, + 2.1360, 2.0806, 2.0098, 2.1164, 1.9950, 2.2060, 2.2159, 2.0401, 2.1459, + 2.0444, 2.1784, 2.2300, 2.1567, 2.1416, 2.0632, 2.1148, 2.0197, 2.1419, + 2.1275, 2.1502, 2.1169, 2.0210, 2.1767, 2.2734, 2.0312, 2.1906, 2.0491, + 2.1156, 2.0720, 2.1628, 2.0191, 2.1939, 2.1975, 2.1269, 1.9829, 2.2568, + 2.1802, 2.2635, 1.4847, 2.0943, 2.0363, 2.1120, 2.2034, 2.0194, 2.2002, + 2.1097, 2.0688, 2.0510, 2.1756, 2.1981, 2.0952, 2.0924, 2.0707, 2.0671, + 2.1223, 2.0485, 2.1029, 2.0321, 2.1441, 2.1635, 2.3610, 2.0506, 2.1866, + 2.2366, 2.1572, 2.1832, 2.2729, 2.0558, 2.0246, 2.1644, 1.3161, 2.0913, + 2.1085, 2.2300, 2.1219, 2.1403, 2.2661, 2.0771, 2.1381, 2.2069, 2.1151, + 0.9310, 2.0980, 2.2353, 2.0280, 2.1296, 2.0789, 2.1014, 2.0711, 2.1582, + 2.2039, 2.1553, 2.0854, 2.1312, 1.9870, 2.0954, 2.0356, 2.0757, 2.1471, + 2.0934, 2.1607, 2.0877, 2.1701, 2.2665, 2.1066, 1.8172, 2.1928, 2.1054, + 2.1729, 2.2115, 2.0668, 2.1303, 1.9412, 2.2088, 2.2902, 2.1197, 2.0195, + 1.9842, 2.3541, 2.2992, 2.1719, 2.1185, 2.3326, 2.0597, 2.2121, 2.0990, + 2.1920, 2.0137, 2.1492, 2.0695, 2.0967, 2.0664, 2.2442, 2.1980, 2.0537, + 2.1239, 2.2239, 2.1115, 2.0760, 2.1666, 2.0412, 2.3497, 2.2904, 2.3730, + 2.0719, 2.2055, 2.1495, 2.2552, 2.1622, 2.1594, 2.1742, 2.1108, 2.1134, + 2.0495, 2.1505, 2.1692, 2.3587, 2.0808, 2.1928, 2.1677, 2.1319, 2.2773, + 2.1028, 2.1036, 1.9044, 2.1900, 2.0670, 2.1364, 2.0927, 2.0784, 2.0589, + 2.1780, 2.0068, 2.0723, 2.0721, 2.1785, 2.1709, 2.0420, 2.0222, 2.0584, + 2.3348, 2.0092, 2.1329, 2.0837, 2.1588, 2.1440, 2.2351, 2.1050, 2.0425, + 2.1074, 2.0984, 2.1500, 2.1535, 2.1695, 2.1366, 2.0727, 2.1510, 2.1025, + 2.0234, 2.1057, 2.1375, 2.0724, 2.1730, 2.1779, 2.1295, 2.1127, 2.0601, + 0.4807, 2.3870, 2.1398, 2.2474, 2.2304, 2.2226, 2.0155, 2.0902, 2.2085, + 2.1103, 2.0833, 2.0171, 2.1137, 2.1066, 2.1834, 2.1329, 2.2222, 2.0514, + 2.1268, 2.2083, 2.0987, 2.1011, 2.3077, 2.1029, 2.2446, 2.0978, 1.8194, + 2.1265, 2.1962, 2.2371, 2.1642, 2.0771, 2.0407, 2.2704, 2.2185, 2.0338, + 2.3705, 2.1359, 2.1662, 2.2103, 2.0700, 2.1263, 2.1311, 2.1644, 2.2601, + 2.1973, 2.1671, 2.1047, 2.1370, 2.3222, 2.0790, 2.0729, 2.1185, 2.1862, + 2.0944, 2.1344, 2.0862, 1.9719, 2.2108, 2.1109, 2.2341, 2.1641, 2.1254, + 2.3113, 2.1620, 2.1923, 2.1841, 2.1487, 2.2404, 2.1601, 2.0500, 2.0576, + 2.1119, 2.2564, 2.1640, 2.3131, 2.1320, 2.2965, 2.2485, 2.2046, 2.0289, + 1.9622, 2.0788, 2.0702, 2.1659, 2.1610, 2.1714, 2.1783, 2.1003, 2.1383, + 1.5933, 2.3478, 2.1246, 1.9951, 2.1622, 2.1895, 2.1283, 2.0383, 2.3347, + 2.2002, 2.1307, 2.0826, 2.0998, 2.0361, 2.0912, 2.2533, 2.1869, 2.0681, + 2.2428, 2.1953, 2.2473, 2.0651, 2.1201, 2.1390, 2.2267, 2.1952, 2.1608, + 2.0552, 2.1135, 1.6425, 2.0805, 2.0025, 2.1075, 2.1361, 2.2242, 2.1469, + 2.1484, 2.1327, 2.3234, 2.1158, 2.0765, 2.1809, 2.1726, 2.2022, 2.0506, + 2.1052, 2.1520, 2.0688, 2.2633, 2.1049, 2.1441, 2.0107, 2.1586, 2.0242, + 2.0553, 2.1383, 2.1448, 2.1316, 2.1199, 2.1036, 2.4179, 2.0712, 2.0769, + 2.2432, 2.1385, 2.0033, 2.1947, 2.1739, 2.1416, 1.4240, 2.0872, 2.1168, + 2.0558, 2.0731, 2.0900, 2.3486, 2.1286, 2.1595, 2.1331, 2.0985, 2.2349, + 2.0733, 2.2393, 2.0185, 1.9720, 2.1799, 2.2404, 2.2769, 2.1214, 2.1365, + 2.1539, 1.9940, 2.0823, 2.0511, 2.1764, 2.2886, 2.0483, 2.1611, 2.1046, + 2.0842, 2.3037, 2.1485, 2.2134, 2.2764, 2.1331, 2.1390, 2.2793, 2.1325, + 2.0913, 2.0291, 2.0483, 2.2171, 2.0498, 2.0716, 2.0527, 2.0389, 2.0519, + 2.2236, 2.1044, 2.1652, 2.2721, 2.1719, 2.0850, 2.1445, 2.2287, 2.0876, + 2.0748, 2.2805, 2.2230, 2.1957, 2.1366, 2.1051, 2.0608, 2.1175, 2.0909, + 2.0974, 2.3656, 2.0853, 1.9904, 2.1722, 2.0264, 2.1505, 2.2116, 2.1064, + 2.3324, 2.0612, 2.0759, 1.5095, 2.1135, 2.0918, 2.1234, 2.1231, 2.0909, + 2.1507, 2.1431, 2.0905, 2.1110, 2.1229, 2.0950, 2.0726, 2.1123, 2.2312, + 2.1390, 2.2622, 2.2702, 2.1241, 2.0290, 2.1137, 2.1211, 2.0897, 2.1186, + 2.1072, 2.1211, 2.0325, 2.3841, 2.1235, 2.1657, 2.2012, 2.1920, 2.0612, + 2.1856, 1.9654, 2.0802, 2.2121, 2.1295, 2.1085, 2.0468, 2.1225, 2.0937, + 2.2090, 2.1711, 2.2392, 2.2404, 1.9501, 2.2067, 2.1174, 2.1059, 2.0687, + 2.0720, 2.2781, 2.0699, 2.0782, 2.1167, 2.1556, 2.1573, 2.0769, 2.1428, + 2.0000, 2.0540, 2.1057, 2.3078, 2.1170, 2.0252, 2.2114, 2.0516, 2.0569, + 2.1436, 2.1945, 2.1005, 2.0620, 2.1000, 1.9946, 2.0742, 2.2715, 2.1476, + 2.0796, 2.1513, 2.1469, 2.1263, 2.1124, 1.8206, 2.1527, 2.1983, 2.1449, + 2.0146, 2.1201, 2.1955, 2.1086, 2.1151, 2.0930, 2.2746, 2.0733, 2.1319, + 1.9999, 2.0404, 2.1487, 1.8035, 2.0896, 2.1148, 2.1295, 2.2288, 2.0789, + 2.0948, 2.1741, 2.2246, 2.1047, 2.0214, 2.1493, 2.1955, 1.9344, 2.0848, + 2.0497, 2.1492, 2.1086, 2.1639, 2.0673, 2.0541, 2.1296, 1.9733, 2.1538, + 2.1301, 2.2918, 2.0695, 1.9991, 2.2444, 2.0984, 2.1520, 2.1604, 2.1877, + 2.1085, 2.1740, 2.4152, 2.1196, 2.1161, 2.0564, 2.2205, 2.0721, 2.0152, + 2.2869, 2.1730, 2.2223, 2.1000, 2.1991, 2.0098, 1.9885, 2.1247, 2.1999, + 2.0865, 2.1030, 2.2179, 2.0859, 2.2840, 2.0955, 2.2556, 2.0865, 2.5612, + 2.1310, 2.1637, 2.2042, 2.3089, 2.2860, 2.0313, 1.5092, 1.9951, 1.9718, + 2.0639, 2.2435, 2.0679, 2.1874, 2.0538, 2.1991, 2.2152, 2.0431, 1.9732, + 2.1273, 2.3086, 2.0617, 2.1179, 1.9938, 2.2863, 2.1030, 2.0925, 2.2335, + 2.2860, 2.1918, 2.0950, 2.0555, 2.2699, 2.3361, 2.3560, 2.0020, 2.2247, + 2.1124, 2.1613, 2.1211, 2.0804, 2.1438, 2.0524, 2.1160, 2.0022, 2.0361, + 2.0578, 2.0534, 2.2993])Parameter containing: +tensor([ 5.5032e-02, 3.4170e-01, -5.9914e-01, 5.7442e-02, -5.9289e-01, + 6.2105e-01, 7.2261e-01, -3.8350e-01, 5.3076e-01, -3.4490e-01, + -3.7516e-01, 7.6397e-01, -4.8466e-01, -1.9540e-01, 2.3205e-01, + 6.6639e-01, 3.8165e-02, 4.7632e-01, -7.5291e-01, -1.4750e+00, + 5.0280e-01, 4.2079e-01, 5.0035e-01, -4.4191e-01, 4.7763e-01, + -5.6680e-01, -3.7598e-01, 3.5411e-03, -6.2287e-01, 5.5517e-01, + -1.8523e-01, 2.2972e-01, -3.0193e-01, -4.0291e-01, -2.8643e-01, + -1.4825e-01, -9.7817e-01, -2.5017e-01, 3.0976e-01, 9.1308e-01, + -3.7209e-01, 5.0364e-02, 8.3947e-01, 1.0546e-01, -5.1225e-01, + -7.5814e-01, -6.4859e-01, -1.9139e-01, -3.6599e-01, -3.1565e-01, + -8.4970e-01, -9.4634e-02, -2.2861e-01, 6.4416e-02, -7.4689e-01, + 8.2814e-01, 6.7515e-02, -2.1095e-01, 1.0784e-01, -2.0049e-01, + -1.5426e-02, 2.9876e-01, 2.9834e-01, -9.7773e-01, -2.1292e-01, + 4.1629e-01, 5.9539e-01, -1.2394e-01, -5.7038e-01, -1.6600e+00, + 5.8589e-01, 3.1986e-01, 9.3529e-01, -8.1940e-01, -1.0018e+00, + -7.7484e-01, 5.0273e-01, 1.4810e-01, 5.6621e-01, 2.0840e-01, + -2.5548e-02, 3.3979e-01, -9.2684e-01, 1.6033e-01, -5.8794e-01, + -1.7960e-01, 5.1259e-01, -1.7504e-01, 5.2133e-01, -4.8449e-01, + -7.1617e-01, 7.2320e-01, -4.6025e-01, 2.7989e-01, -6.8861e-01, + 1.6533e-01, 6.9277e-01, 2.0693e-01, -4.5392e-01, 7.2708e-01, + 3.7570e-01, -7.4780e-01, 1.2439e-01, -6.6284e-01, -1.2287e-01, + 5.6412e-01, 2.9251e-01, 3.7662e-01, -1.5380e-01, -1.2422e-01, + 7.0778e-01, 8.5423e-01, -3.5530e-01, 4.0904e-01, 3.2800e-01, + -2.4588e-01, 9.1576e-01, 3.5030e-01, -3.2823e-01, -5.6558e-02, + -3.5270e-01, -7.7882e-02, -4.8140e-01, -4.8432e-01, 7.9960e-02, + -7.8382e-01, -1.7744e-01, -3.0857e-01, -4.1455e-01, 7.0633e-01, + 1.3336e-01, -3.3674e-01, -3.0745e-01, -2.2139e-01, 5.0010e-01, + 2.7472e-01, 5.4829e-01, -3.3975e-02, -3.4801e-01, 2.3116e-01, + -8.3419e-01, 2.5358e-01, 2.2674e-01, -1.5673e-01, -3.6639e-01, + -1.5545e-01, 6.0038e-01, -1.5873e-01, -5.5829e-01, 5.9090e-01, + 3.3606e-01, -4.3766e-02, 6.3776e-01, 1.2674e-01, -7.7525e-01, + 1.0622e+00, -1.5575e-01, 5.7224e-02, -1.2175e-01, -3.5464e-01, + -4.1974e-01, -5.5180e-01, 3.1076e-01, -1.1148e-01, 1.7761e-01, + 3.9893e-01, -8.1377e-01, 1.9128e-01, 2.2551e-01, 1.0480e-01, + 2.5536e-01, 3.7489e-01, -6.0449e-01, -1.7544e-01, -3.6786e-01, + -1.7595e-01, 5.1698e-01, -7.5201e-01, 8.6587e-02, 3.5645e-01, + -9.2023e-01, -6.3389e-01, 5.6677e-01, 7.7825e-01, 4.0414e-02, + 8.2350e-01, -7.1027e-01, 1.1759e+00, 4.5486e-01, -5.8566e-01, + -8.3278e-01, -2.3355e-02, 6.1520e-01, 9.4288e-01, 1.9277e-01, + 3.2174e-01, 7.2860e-01, 1.5114e-01, 5.5562e-01, 1.9258e-01, + -3.3681e-01, -6.2664e-01, 4.8709e-01, 3.9007e-01, -1.0146e-01, + 3.3661e-01, 2.6557e-01, 7.1143e-01, -4.7444e-01, -2.2915e-02, + 5.6967e-02, -2.6513e-01, -6.7503e-01, 8.1852e-01, 1.7758e-01, + -5.4827e-01, -6.1577e-01, 3.7921e-01, -3.2689e-01, -4.2346e-01, + 1.0098e+00, 8.1804e-02, -9.1237e-01, 4.6550e-01, 3.3835e-02, + -5.5621e-02, -6.1343e-01, -1.6698e-02, 2.1856e-01, -6.0267e-02, + -4.5651e-01, 5.7496e-01, 5.1042e-02, 3.3396e-01, -1.1056e-01, + -9.3004e-01, 5.3485e-01, 3.2634e-01, -3.2156e-01, 1.0520e+00, + 1.2905e-02, 7.6925e-01, -6.5054e-02, -5.2462e-01, -2.7286e-01, + -5.9786e-01, 8.3486e-02, -4.5704e-01, -3.2914e-01, 8.4377e-01, + -2.7605e-01, 1.9019e-01, -5.2997e-02, -4.1378e-01, 9.2904e-02, + -4.0969e-01, 5.7097e-01, -2.9682e-01, -1.1046e+00, -6.4378e-01, + -1.0371e+00, -1.7496e-01, 3.8344e-01, 7.4962e-01, -2.3920e-01, + -5.1637e-01, 6.3089e-01, 1.5819e-01, 5.0971e-01, 2.4068e-01, + 1.9098e-01, 1.2260e-01, -4.0640e-01, -1.0648e+00, -3.8075e-01, + 9.3032e-03, 3.6212e-01, 5.7357e-01, 6.9055e-01, -2.7114e-01, + 8.0547e-02, -8.7845e-01, -7.8596e-01, 2.5760e-01, -3.3892e-01, + 5.2147e-01, -1.9988e-01, 1.4552e-01, 6.2273e-01, -1.7925e-01, + -2.8361e-01, 3.1194e-01, 7.0649e-01, -6.5505e-01, -1.1719e-01, + -4.4102e-01, -1.1529e-01, -1.0064e+00, 5.8067e-01, -4.9356e-02, + -5.4669e-01, -5.6302e-02, -8.0066e-01, -3.0239e-01, -7.7490e-02, + 4.1506e-01, 3.2261e-01, 3.0288e-01, -4.8018e-01, -5.2130e-01, + 2.3988e-01, -4.4202e-01, -1.5315e-01, 1.1838e-01, 9.8189e-02, + 4.9815e-01, 7.1809e-01, 4.9991e-01, 1.3004e-01, 4.2302e-01, + -5.0610e-01, -8.3078e-01, 4.9045e-01, -8.0706e-03, 2.8603e-01, + -1.2553e+00, 2.8014e-01, -4.5281e-01, 5.2321e-01, -6.6674e-01, + 1.7911e-01, -3.0358e-01, -2.8774e-01, -1.6966e-01, -2.1777e-01, + -8.2017e-02, -6.3974e-01, 3.4140e-01, 7.0512e-01, -2.6187e-01, + 4.2697e-01, -9.8788e-02, -1.9254e-01, 5.5265e-01, -7.7105e-01, + 7.7795e-01, -8.2557e-01, 3.7116e-01, -2.1748e-01, -3.0085e-01, + 1.1101e-01, 4.6779e-01, 2.7369e-01, 1.1426e+00, -6.6837e-01, + -2.9639e-01, -3.4687e-01, -3.5086e-01, -5.5450e-01, -3.4801e-02, + -9.7990e-01, -5.0421e-01, -1.3055e-01, -4.7743e-01, 5.8862e-02, + 2.0139e-01, 2.2049e-02, 5.6779e-01, 1.2275e+00, 4.8432e-01, + 4.5270e-01, -2.4627e-01, -2.1459e-01, -5.2194e-01, -2.4221e-01, + -3.0220e-01, 3.5763e-01, 7.6412e-01, -8.6895e-02, -2.2266e-01, + 3.1286e-01, -2.7849e-01, -5.3784e-01, 1.2801e-02, -6.3741e-01, + -4.8683e-01, -2.1989e-01, 1.0171e+00, 3.2114e-01, -7.0300e-01, + -2.5995e-01, -3.3686e-01, 6.3376e-01, 6.2771e-01, -6.6254e-01, + 3.1605e-01, -3.3023e-01, 8.2851e-01, -2.5261e-01, -8.0842e-01, + -8.4885e-02, 1.0560e+00, -6.9100e-01, -3.3871e-01, 1.6286e-01, + -3.3739e-01, 3.4416e-01, 1.0518e-01, -1.4568e-01, 7.3312e-01, + 2.6994e-01, -3.9172e-01, -1.2606e-01, 3.7430e-01, 1.2941e+00, + -1.8404e+00, 1.4481e-01, -2.2216e-01, 5.6620e-01, 4.0119e-02, + -2.7102e-01, -2.9898e-01, 6.6150e-01, -3.5446e-01, -6.3626e-01, + 3.0112e-01, 4.1641e-01, -7.8385e-03, 1.6033e-01, 5.9720e-01, + -1.0396e+00, -6.8988e-02, 4.9328e-01, 3.7363e-01, 8.8177e-01, + -6.3460e-02, 6.0906e-02, 7.2328e-01, 4.1937e-01, -5.4564e-01, + 7.1990e-02, -1.1627e-02, 2.7944e-01, -2.0765e+00, -6.6142e-02, + 1.0473e-01, 2.9151e-01, 4.1840e-01, 5.2563e-01, -8.2629e-01, + -5.1596e-02, 6.0050e-02, -9.0046e-01, 2.2648e-01, -1.7885e-01, + 6.1356e-01, -5.4350e-01, -7.6380e-01, -1.0181e-01, 1.7266e-01, + -3.3133e-01, 8.8887e-02, -6.9486e-01, 1.1714e-01, -3.4276e-01, + 3.3015e-01, 1.0132e-01, -9.4859e-02, 3.0991e-01, -5.5856e-02, + -2.1844e-02, -1.9616e-01, -6.7070e-01, 3.0605e-01, -8.7142e-01, + -3.8150e-02, -4.0821e-01, -6.1820e-01, -2.8025e-01, -6.9023e-02, + -4.5809e-01, 6.2847e-01, 7.1941e-01, 2.3210e-01, -3.2175e-02, + -4.7614e-01, 2.2915e-01, 1.7289e-01, -2.0610e-01, -8.2476e-01, + 4.6670e-02, 4.5217e-01, 3.0375e-01, -8.2604e-02, -8.5447e-01, + -3.8456e-02, -8.1994e-01, -1.8847e-01, -2.1478e-01, 8.7968e-01, + -4.2970e-01, -5.4261e-01, -4.3325e-01, 1.0360e-01, 2.8478e-01, + 2.7084e-01, 3.8629e-01, 5.8479e-02, -4.9726e-02, -9.2089e-01, + -5.2837e-01, -2.0357e-01, -7.3693e-01, -4.4392e-01, 6.7021e-01, + -1.7075e-01, 4.6331e-01, -4.9681e-01, -2.5834e-01, -2.8214e-01, + 9.1656e-01, 2.3502e-01, -1.8248e-01, 2.8570e-01, -8.3115e-02, + -7.6035e-01, 3.9176e-01, 2.9743e-01, 2.1699e-01, 1.2622e-02, + 7.1951e-02, -2.9116e-01, -3.8441e-02, -1.4330e-01, 6.7795e-01, + -8.4475e-01, 5.5603e-02, -5.1013e-01, 3.5273e-01, 6.8286e-02, + 2.0730e-01, 7.8723e-01, -4.9373e-01, 5.6147e-01, 5.1230e-01, + 4.5580e-01, 1.5385e-01, -7.6108e-01, 6.2250e-02, 4.4146e-02, + -9.6293e-01, 1.5051e-01, 5.0428e-02, 1.6998e-02, -1.3017e-01, + 5.2094e-01, 9.0157e-01, -2.3312e-01, -9.2199e-01, -5.8327e-02, + 3.2615e-02, -2.0963e-01, -1.3304e-02, 5.9420e-01, 2.4892e-01, + 4.2144e-01, -1.3338e-01, 7.3720e-01, 6.7606e-01, -1.0390e-01, + 3.1712e-01, 4.1540e-02, 2.1947e-01, -2.6176e-02, 5.7473e-01, + 6.0464e-01, 1.1620e-02, -4.7551e-01, -5.2233e-01, -1.7980e-01, + 5.4628e-01, -3.4662e-01, 2.8681e-01, 4.0650e-01, 4.5632e-01, + 3.7570e-03, -3.2734e-01, -1.5770e-01, 1.2390e+00, 3.1241e-01, + -6.3380e-01, -7.3302e-01, 8.5430e-01, -6.2936e-02, -1.8730e-01, + 1.2957e-01, -4.5305e-01, -4.4873e-01, 8.0722e-01, 2.5926e-01, + 1.9403e-01, 8.0011e-02, -2.3443e-01, -6.4465e-01, 2.1859e-02, + 1.9577e-01, -3.9800e-01, -7.0151e-01, 2.0552e-01, 5.4888e-01, + -1.0330e-02, 3.4967e-01, -2.1443e-01, 6.5350e-01, 2.7752e-02, + 7.1228e-02, -9.6233e-02, 6.2011e-01, -1.6035e-01, 2.6784e-01, + 4.7279e-01, 2.4499e-01, -4.1408e-01, 6.9601e-01, 7.2197e-01, + -2.7174e-01, -1.3945e-01, 1.0073e+00, 8.1506e-01, -1.5321e-01, + -6.7199e-02, 5.5904e-01, -3.3913e-01, 2.5178e-01, 2.1693e-01, + -4.7386e-01, -1.3057e-01, 1.0067e+00, 4.0309e-01, -1.3929e-01, + 6.5192e-01, -7.4773e-01, -8.4036e-01, 2.4286e-01, -3.3858e-01, + 4.2005e-01, -5.4786e-01, 1.3617e-01, -2.3379e-01, 1.6159e-01, + 6.6706e-01, -8.7631e-02, 4.3947e-01, -1.2622e-01, 7.9351e-01, + 5.6074e-01, 2.8677e-01, -1.4647e-01, 2.5503e-02, 3.6957e-01, + -1.7680e-01, 4.9258e-01, -2.7404e-01, -6.6334e-01, -6.8055e-01, + -4.6036e-02, -1.5788e-01, -5.7529e-01, 8.0077e-01, -3.6329e-01, + -2.8435e-01, 3.6245e-01, 8.2002e-01, 4.7903e-01, 4.2087e-01, + -4.1194e-01, -3.2981e-01, 8.9196e-02, 2.5886e-01, 1.0111e-01, + 2.3851e-01, 1.9850e+00, -4.3724e-01, -3.1161e-01, -3.5585e-01, + 1.0762e+00, -1.9195e-01, -1.5232e-01, 7.7787e-01, -3.4547e-01, + -5.2549e-01, -2.9215e-01, -7.1992e-01, -3.3349e-01, -6.5178e-01, + 1.0218e+00, 3.1221e-02, -2.2267e-01, 3.3001e-02, 4.0659e-01, + -8.9277e-02, -4.2065e-01, 7.2138e-01, -3.0468e-01, 6.3441e-01, + 6.4458e-01, -2.5796e-01, -2.9764e-02, -3.8748e-01, -1.4858e-01, + 3.9582e-01, -5.3268e-01, -2.0677e-02, -7.5198e-01, -1.4154e-01, + -3.5085e-01, -1.7420e-01, 6.5263e-01, -3.9491e-01, -1.2203e+00, + -3.2941e-01, -7.7585e-02, -5.8524e-01, -8.6829e-01, -7.8241e-01, + -3.4036e-01, 1.0552e+00, 1.2820e-02, -4.2201e-02, -1.9478e-01, + -8.3023e-01, -2.5496e-01, 7.6814e-01, -1.2869e-01, 2.9615e-01, + 6.5342e-01, 1.4642e-02, 1.9967e-01, -2.6521e-01, -8.1220e-01, + -3.3556e-01, 5.7942e-02, 2.7999e-01, -2.4631e-01, 1.3797e-01, + 1.5128e-01, -6.2344e-01, 6.0882e-01, 9.6539e-01, 5.9792e-01, + 2.6680e-01, -6.7106e-01, -1.1571e+00, 8.1228e-01, 1.1491e-03, + 2.5220e-01, -2.8103e-01, -5.5863e-01, 5.5975e-01, 4.4755e-01, + -4.1402e-01, -4.9207e-01, -4.5646e-01, 7.1663e-02, 3.1954e-01, + 2.2474e-01, 3.8796e-01, 6.4554e-01])Parameter containing: +tensor([[ 0.0420, -0.0440, -0.0347, ..., -0.0244, -0.0091, 0.0145], + [-0.0261, 0.0071, 0.0178, ..., 0.0025, -0.0036, -0.0314], + [ 0.0267, -0.0007, 0.0216, ..., 0.0241, 0.0350, -0.0015], + ..., + [-0.0045, -0.0047, 0.0123, ..., -0.0035, 0.0097, 0.0152], + [-0.0158, -0.0261, -0.0006, ..., -0.0156, -0.0045, 0.0177], + [-0.0226, -0.0010, -0.0124, ..., 0.0051, 0.0012, 0.0042]])Parameter containing: +tensor([ 0.1199, -0.1290, 0.1978, ..., -0.0250, 0.0124, -0.0064])Parameter containing: +tensor([[-5.5428e-03, -2.7206e-02, 2.7370e-03, ..., 2.2766e-02, + -3.1643e-03, -1.5764e-03], + [ 2.0981e-02, -1.9547e-02, 1.0455e-04, ..., -1.9424e-02, + -8.0032e-03, 3.5324e-03], + [ 1.8829e-02, 1.2306e-02, -3.6640e-03, ..., -1.7288e-02, + 6.9389e-03, 1.1398e-02], + ..., + [-1.0307e-02, 1.9897e-02, 5.8711e-05, ..., -1.7059e-02, + 9.9411e-03, 2.1317e-02], + [-2.5986e-02, -2.5024e-02, 1.0292e-02, ..., 2.4624e-03, + -7.9422e-03, -1.5936e-03], + [ 1.5373e-02, 1.6113e-02, -3.0041e-03, ..., 5.8517e-03, + 7.4081e-03, 2.3529e-02]])Parameter containing: +tensor([ 2.0859e-02, -2.0630e-02, -5.0323e-02, -1.5854e-02, 1.8677e-02, + -8.5983e-03, 1.6937e-02, -4.2786e-02, -3.0975e-02, 2.8900e-02, + 3.1143e-02, -3.3112e-02, -6.0944e-02, 7.2403e-03, 6.2134e-02, + 1.5930e-02, -6.3477e-02, 1.5442e-02, -9.2545e-03, 4.6425e-03, + -2.2705e-02, 2.4551e-02, 3.8788e-02, -8.3008e-03, -5.0537e-02, + -3.0624e-02, 3.3783e-02, -5.7587e-02, -1.5533e-02, -1.8036e-02, + -1.4130e-02, 4.0894e-03, 4.6051e-02, -8.1299e-02, 3.1738e-02, + 1.8219e-02, -1.5175e-02, 3.0014e-02, 7.3547e-02, 2.8801e-03, + -6.2447e-03, -3.5858e-02, -1.1673e-02, -3.0060e-02, 2.4414e-02, + -1.2505e-02, -9.6924e-02, 3.1174e-02, 1.0956e-01, -1.3878e-02, + -5.0735e-03, 4.1199e-02, 4.9133e-02, -1.2459e-02, -3.4515e-02, + 8.7097e-02, 1.1810e-02, -4.8706e-02, 2.0004e-02, 9.0179e-03, + -1.4343e-02, -6.8604e-02, 2.2568e-02, 3.3478e-02, 1.3725e-02, + 7.2136e-03, 2.1301e-02, -1.0628e-02, -2.1362e-02, -1.1755e-01, + 1.2100e-02, 1.5434e-02, -7.8491e-02, 9.7656e-03, -3.3630e-02, + -1.3838e-03, -2.8286e-03, -3.0460e-03, -1.3405e-02, 5.1422e-02, + 1.9658e-04, 2.6611e-02, -4.2328e-02, 3.4241e-02, 7.5569e-03, + 4.6661e-02, 1.2703e-02, -1.2032e-02, -3.1528e-03, 4.0527e-02, + 5.0720e-02, 9.2087e-03, -6.1989e-03, -1.1612e-02, 4.4937e-03, + 8.2321e-03, 8.4839e-03, -7.0435e-02, 7.0686e-03, -1.1665e-02, + 3.5065e-02, 5.2277e-02, -2.3499e-02, 6.0242e-02, 6.7566e-02, + 1.6663e-02, 5.6458e-02, 1.2810e-02, 1.6693e-02, -5.4646e-04, + -6.7078e-02, 2.1118e-02, -9.4727e-02, 2.5650e-02, 1.8707e-02, + 1.2650e-02, -4.4159e-02, 2.3376e-02, 7.8674e-02, 1.8646e-02, + -6.2256e-02, -1.5190e-02, -2.3056e-02, 1.1292e-02, 1.6510e-02, + -1.3107e-02, 2.2736e-02, 3.0975e-03, -4.0283e-02, -1.1543e-02, + -4.8866e-03, -6.9153e-02, 1.1856e-02, -1.8356e-02, -2.3087e-02, + -4.6692e-02, 2.5101e-02, 1.2138e-02, 4.1237e-03, -6.6589e-02, + 7.2510e-02, -4.6616e-03, -7.1167e-02, -2.3834e-02, 7.9498e-03, + 1.5915e-02, -2.0721e-02, -1.6296e-02, 1.0139e-02, 4.0131e-02, + 3.5797e-02, 2.6913e-03, 2.5009e-02, -2.1103e-02, -4.7577e-02, + 6.7444e-02, 1.1787e-02, -2.8687e-02, -2.8244e-02, 1.5015e-02, + 1.8646e-02, 7.9803e-03, 3.1647e-02, -1.0902e-02, 7.0740e-02, + -5.1086e-02, -3.5431e-02, 4.2816e-02, -4.6844e-02, -9.9792e-03, + -1.7197e-02, -9.9258e-03, 3.2501e-02, 4.8279e-02, 3.3508e-02, + -1.7410e-02, -8.0200e-02, 1.0544e-02, -6.9092e-02, -2.8915e-02, + -9.5886e-02, -5.0598e-02, 2.9327e-02, 1.7441e-02, 1.2245e-02, + 4.3610e-02, 2.4643e-02, -5.5542e-02, -2.7710e-02, 1.4946e-02, + 1.3275e-02, -1.6937e-02, -6.4697e-02, -2.3621e-02, -2.4048e-02, + 1.6418e-02, -8.3389e-03, 8.1863e-03, -1.1986e-02, -1.3840e-02, + -9.3201e-02, -2.2182e-03, 2.0386e-02, 5.4901e-02, -2.2339e-02, + -4.2969e-02, 2.8580e-02, 1.3611e-02, -3.8452e-03, 4.8706e-02, + -3.9948e-02, 2.0447e-02, -1.2100e-02, 6.1005e-02, 3.1830e-02, + 8.1665e-02, -5.4077e-02, -3.3966e-02, -9.5291e-03, -7.4244e-04, + 5.3467e-02, 2.3865e-02, 5.9433e-03, 3.8483e-02, -6.2675e-03, + -6.4148e-02, 3.1708e-02, 8.2064e-04, 3.6926e-03, 1.6327e-03, + -3.8929e-03, 2.4933e-02, 5.3253e-02, 6.4163e-03, -4.3365e-02, + 5.6274e-02, 2.8351e-02, 1.0307e-02, 2.8305e-03, -2.1515e-02, + 1.6968e-02, 6.2744e-02, 1.4587e-02, 5.6839e-03, 2.6646e-03, + 2.7756e-02, 8.0505e-02, 5.8472e-02, 3.3813e-02, 4.8637e-03, + -1.0284e-02, 4.4434e-02, 1.3077e-02, 4.7729e-02, 3.7384e-02, + 9.8267e-03, -2.0920e-02, -3.3600e-02, -1.3557e-02, 1.5671e-02, + 6.0120e-03, 1.8631e-02, 7.0740e-02, 4.0985e-02, -3.2288e-02, + -2.6886e-02, -3.2410e-02, 3.0579e-02, -3.1174e-02, -5.9692e-02, + 2.9373e-02, 1.3802e-02, -6.5918e-03, -7.5684e-02, 1.1101e-02, + -1.0614e-03, -2.8885e-02, -3.6392e-03, -1.3153e-02, 3.3112e-02, + -6.7383e-02, 1.3893e-02, 5.3741e-02, -1.1772e-02, -6.5384e-03, + 9.5139e-03, 3.7842e-02, 7.5928e-02, 3.4912e-02, -4.2908e-02, + -3.3539e-02, 3.1891e-02, -1.3733e-02, 5.4596e-02, -2.0233e-02, + -3.7201e-02, -2.6230e-02, -3.4695e-03, -3.3207e-03, -1.5518e-02, + 2.8442e-02, 5.5756e-02, 4.7729e-02, -1.6388e-02, 7.0251e-02, + -6.8298e-02, 3.2867e-02, 3.2898e-02, -3.7212e-03, 3.6560e-02, + 5.8746e-02, 3.7781e-02, 1.0370e-01, -6.2286e-02, -3.5126e-02, + -4.3030e-02, 1.2123e-02, 3.3966e-02, 4.0016e-03, -1.0986e-02, + -2.0584e-02, 1.5541e-02, 1.2108e-02, 6.8237e-02, 3.9673e-01, + -1.8234e-03, 5.7098e-02, 1.4084e-02, 5.7037e-02, 1.8982e-02, + 3.2227e-02, -5.1941e-02, 2.2110e-02, -1.8753e-02, 8.0643e-03, + -2.7237e-02, 1.2276e-02, 1.0309e-03, -1.6800e-02, 3.3051e-02, + 3.7746e-03, 5.0850e-03, -3.6285e-02, -5.2673e-02, 3.6041e-02, + -1.0266e-01, 8.7929e-04, 2.5345e-02, -1.6159e-02, -6.3992e-04, + 3.1189e-02, -1.1711e-02, 1.3405e-02, 9.0408e-03, -2.3865e-02, + 1.2596e-02, 7.6828e-03, -6.0394e-02, -2.0798e-02, 2.5085e-02, + -5.6953e-03, -1.1284e-02, -2.2385e-02, -5.0415e-02, 5.0018e-02, + 5.1208e-02, 5.1918e-03, 3.9734e-02, 3.3112e-02, 9.2224e-02, + -4.4189e-02, -1.9226e-03, -1.9653e-02, -3.7170e-02, 4.0375e-02, + -8.1253e-03, -3.9558e-03, -7.6294e-02, -5.6061e-02, -6.2012e-02, + 2.2400e-02, 1.1206e-05, 4.1046e-02, 5.8603e-04, -3.8513e-02, + -4.7455e-03, 1.8951e-02, -6.6681e-03, -3.8815e-03, 4.3579e-02, + 7.7148e-02, -1.9348e-02, 1.0681e-02, 1.5076e-02, -4.2725e-02, + 5.6076e-03, -3.1250e-02, -2.6230e-02, -6.9519e-02, -5.3345e-02, + -5.9875e-02, -1.8280e-02, 2.1881e-02, -2.3605e-02, 2.3453e-02, + -3.0563e-02, -4.1122e-03, -4.6417e-02, -2.5146e-02, -2.1683e-02, + 4.3060e-02, 3.7506e-02, 4.7516e-02, 2.2934e-02, -1.1604e-02, + -1.3635e-01, 6.0699e-02, 3.2928e-02, 1.8631e-02, 6.5689e-03, + 1.3412e-02, -4.0649e-02, 4.2175e-02, 3.3140e-05, -1.4381e-02, + 4.6204e-02, 5.7404e-02, -1.4488e-02, -6.7291e-03, 1.2566e-02, + 1.5469e-03, -3.6774e-02, -3.0350e-02, 2.1133e-02, 1.8555e-02, + 6.1417e-03, -1.1955e-02, -6.7871e-02, 1.5640e-02, -6.0638e-02, + -5.2887e-02, 2.1347e-02, 1.8936e-02, -9.1248e-02, 6.2103e-02, + -1.8204e-02, 1.1002e-02, 2.5787e-02, 4.5624e-02, 3.7384e-02, + -4.1466e-03, 6.1035e-02, -2.8343e-03, 1.4069e-02, -1.1894e-02, + -7.2815e-02, -8.3847e-03, -1.7099e-03, 8.9493e-03, 2.1667e-02, + 9.3002e-03, 3.6957e-02, -2.6047e-02, 7.4730e-03, -3.2135e-02, + -7.1777e-02, 5.0116e-04, 2.8610e-06, -1.5457e-02, -5.6915e-02, + 5.3070e-02, 1.1032e-02, 1.0065e-01, 5.8472e-02, -2.7676e-03, + 1.3481e-02, -1.7410e-02, -1.6434e-02, -1.0696e-02, -3.8223e-03, + -3.1616e-02, -2.2751e-02, -5.1239e-02, 1.8570e-02, -7.3914e-02, + -9.0256e-03, 5.8990e-02, 4.5967e-03, 7.6408e-03, 6.4697e-03, + 5.3978e-03, -1.4824e-02, -9.7839e-02, -1.1887e-02, -7.8430e-03, + -1.9012e-02, -4.1901e-02, 1.6632e-02, 6.9397e-02, 2.6230e-02, + 8.9722e-03, -4.3068e-03, 6.9458e-02, 1.2672e-02, 5.3263e-04, + 9.3460e-03, -1.3313e-02, 5.7404e-02, 4.2664e-02, 1.2894e-02, + 7.8430e-02, 1.7822e-02, 1.2802e-02, 6.7505e-02, -1.9592e-02, + -3.7750e-02, 6.7322e-02, 2.6031e-02, -1.2741e-02, 8.3923e-03, + 5.6122e-02, -1.0010e-02, 2.6276e-02, -6.3904e-02, 5.0446e-02, + 2.3346e-02, 2.2293e-02, 9.0866e-03, 3.9825e-02, -2.2980e-02, + -1.8143e-02, -3.0060e-02, 2.7557e-02, 2.1820e-02, 4.9774e-02, + 1.0857e-02, 3.8757e-02, -2.5162e-02, -4.6921e-03, -1.2398e-02, + -1.0277e-02, -9.8328e-02, -1.1002e-02, 1.0178e-02, 2.4597e-02, + -2.8351e-02, -1.9394e-02, 3.0487e-02, -3.8757e-03, -7.9880e-03, + -1.2405e-02, -1.8463e-02, -1.5091e-02, 4.1901e-02, 2.8778e-02, + -1.0979e-02, 9.3384e-03, 3.1494e-02, -3.5339e-02, 3.8204e-03, + 2.4368e-02, -1.1688e-01, -4.3396e-02, 1.4755e-02, 1.2825e-02, + -1.5335e-03, 4.4495e-02, -2.8778e-02, -3.5553e-03, -8.0643e-03, + -1.2749e-02, -5.0240e-03, 2.8473e-02, 3.6163e-02, 4.6005e-03, + 5.8746e-02, -3.0441e-02, -2.9129e-02, 3.5706e-02, 2.1362e-02, + 3.8528e-03, 4.5837e-02, -4.4647e-02, -5.2826e-02, 2.2247e-02, + -2.3148e-02, 4.0466e-02, -3.2379e-02, -4.2969e-02, -8.1360e-02, + 2.4963e-02, 4.4861e-02, -1.9272e-02, -1.4610e-02, 1.7929e-03, + -7.0992e-03, 3.1799e-02, 1.0791e-01, 3.3417e-02, 4.9744e-03, + 4.3732e-02, 3.1586e-02, 1.9821e-02, -4.5013e-02, -4.3640e-02, + 7.3486e-02, -2.6794e-02, -1.0699e-01, 3.5973e-03, -3.3569e-02, + -4.5349e-02, 6.7139e-03, -5.1727e-02, -3.2318e-02, 1.1993e-02, + -1.4305e-03, 2.7252e-02, -5.1636e-02, 2.9587e-02, -1.5289e-02, + 8.7433e-03, 7.8430e-03, 9.8114e-03, 1.2848e-02, -5.4199e-02, + 1.4542e-02, 1.8951e-02, 9.6130e-03, 2.9816e-02, -9.6512e-03, + -1.6708e-02, -2.7252e-02, -6.4148e-02, -6.6895e-02, -2.6230e-02, + 2.5436e-02, -6.1836e-03, -5.2185e-02, -1.8524e-02, -1.3237e-02, + -5.5725e-02, -3.5126e-02, -5.2917e-02, -6.3354e-02, -3.4851e-02, + -1.6068e-02, -2.1957e-02, 5.4092e-03, -5.4207e-03, 1.1559e-02, + 5.0781e-02, -1.1292e-02, 2.5009e-02, 8.1558e-03, 4.3564e-03, + -1.8936e-02, 3.6201e-03, -2.3834e-02, -1.9026e-03, 5.2795e-03, + -4.9174e-05, -3.1952e-02, 1.6068e-02, 7.0923e-02, 3.7048e-02, + -2.2232e-02, 6.5308e-02, -1.3069e-02, -8.0872e-02, -1.5175e-02, + 6.2439e-02, 1.6174e-02, -2.7466e-02, -2.0432e-02, -2.8900e-02, + -3.3073e-03, 7.0862e-02, -2.1133e-02, -7.9880e-03, -5.4413e-02, + -3.5095e-02, 1.4954e-01, -5.7800e-02, 3.8818e-02, 2.6459e-02, + -1.6842e-03, 4.2542e-02, -2.4109e-02, -2.2034e-02, 5.8060e-03, + -2.1992e-03, 4.8561e-03, 6.8779e-03, -3.0319e-02, 3.6865e-02, + 2.5711e-02, -3.7323e-02, -5.0201e-02, -5.8411e-02, -2.3453e-02, + 2.4719e-02, 4.3884e-02, -8.3029e-05, -3.0212e-02, -2.6596e-02, + 3.6194e-02, 5.2368e-02, -1.2566e-02, -1.8072e-03, -4.1412e-02, + 1.4153e-02, 4.2206e-02, -4.5441e-02, 1.7147e-03, 1.3176e-02, + -2.5970e-02, -7.2336e-04, -3.8208e-02, 4.0283e-02, 7.5188e-03, + -9.6054e-03, 4.3526e-03, -9.2087e-03, 4.0550e-03, 3.4302e-02, + -1.6312e-02, 1.7105e-02, 2.2003e-02, -1.6708e-02, -6.7749e-02, + -8.1940e-03, -3.3478e-02, 1.3794e-02, -2.0035e-02, -3.3875e-02, + 1.3062e-02, -3.4912e-02, 1.0956e-02, -2.8534e-03, -3.4210e-02, + -2.6230e-02, -2.7374e-02, -3.8452e-02, -3.9124e-02, 3.7598e-02, + 2.9434e-02, -6.7993e-02, 2.3544e-02, 2.5116e-02, -6.5269e-03, + -2.5787e-02, -1.6769e-02, -4.9164e-02, -3.3234e-02, 1.2070e-02, + 9.2224e-02, -7.6599e-02, 1.3876e-03, 2.7161e-02, 5.3253e-03, + 5.2765e-02, 7.7171e-03, 4.9408e-02, 2.0340e-02, -4.4403e-02, + 6.4430e-03, 9.6924e-02, -3.0487e-02])Parameter containing: +tensor([1.5612, 1.6291, 1.5122, 1.5747, 1.6225, 1.5788, 1.5892, 1.5757, 1.6082, + 1.5988, 1.6139, 1.5868, 1.5801, 1.6259, 1.5611, 1.5231, 1.6739, 1.6669, + 1.5990, 0.4274, 1.5622, 1.6011, 1.5774, 1.5165, 1.6530, 1.5793, 1.5959, + 1.5585, 1.6473, 1.4690, 1.5691, 1.5691, 1.6401, 1.5813, 1.6636, 1.5143, + 1.6102, 1.6316, 1.6255, 1.5060, 1.5319, 1.6488, 1.6245, 1.6413, 1.6142, + 1.5674, 1.5211, 1.6072, 1.6012, 1.5171, 1.6199, 1.5440, 1.5075, 1.5305, + 1.5490, 1.6551, 1.5008, 1.6083, 1.6627, 1.5574, 1.5938, 1.5662, 1.5658, + 1.5936, 1.5152, 1.6172, 1.4900, 1.5247, 1.6271, 1.5863, 1.5609, 1.7847, + 2.1021, 1.6023, 1.6314, 1.5624, 1.6506, 1.4861, 1.5519, 1.6230, 1.5676, + 1.5654, 1.5644, 1.6012, 1.5914, 1.5827, 1.5950, 1.5313, 1.6519, 1.6020, + 1.5715, 1.5640, 1.5664, 1.6471, 1.5941, 1.5665, 1.6015, 1.5576, 1.5990, + 1.6531, 1.5997, 1.5374, 1.5525, 1.6357, 1.5070, 1.6177, 1.5243, 1.5483, + 1.6100, 1.6093, 1.5234, 1.6186, 1.6167, 1.6678, 1.6350, 1.5438, 1.5267, + 1.5384, 1.6124, 1.5812, 1.6419, 1.5493, 1.5972, 1.5512, 1.6389, 1.6147, + 1.4907, 1.5437, 1.5886, 1.5572, 1.5550, 1.4767, 1.5170, 1.5503, 1.5716, + 1.5880, 1.6357, 1.5919, 1.6067, 1.5730, 1.5858, 1.5905, 1.6521, 1.5865, + 1.5048, 1.5617, 1.5851, 1.6758, 1.5167, 1.5563, 1.5114, 1.5789, 1.5416, + 1.6574, 1.5917, 1.3285, 1.6240, 1.6606, 1.6460, 1.6168, 1.5033, 1.5473, + 1.6333, 1.5656, 1.5466, 1.5838, 1.5561, 1.5466, 1.5480, 1.6122, 1.5619, + 1.5732, 1.5800, 1.5441, 1.6047, 1.6982, 1.6320, 1.6484, 1.6278, 1.6129, + 1.6167, 1.5956, 1.6070, 1.5811, 1.6763, 1.5567, 1.6143, 2.1741, 1.6671, + 1.5965, 1.5974, 1.5375, 1.6123, 1.5943, 1.5073, 1.5639, 1.5723, 1.6553, + 3.4704, 1.5658, 1.5836, 1.6005, 1.6608, 1.6609, 1.6136, 1.5075, 1.5065, + 1.5464, 1.4956, 1.5471, 1.5916, 1.5797, 1.5640, 1.4862, 1.5662, 1.5947, + 1.5522, 1.5988, 1.5318, 1.6886, 1.4830, 1.5993, 1.5229, 1.5385, 1.5668, + 1.5360, 1.5791, 1.6482, 1.6321, 1.7258, 1.6303, 1.6478, 1.7792, 1.5524, + 1.5670, 1.6981, 1.5449, 1.5758, 1.5182, 1.6659, 1.6315, 1.5490, 1.6675, + 1.6144, 1.6283, 1.5757, 1.5882, 1.5369, 1.5254, 1.7189, 1.6006, 1.6993, + 1.5655, 1.5378, 1.6526, 1.6030, 1.5295, 1.5335, 1.5831, 1.5121, 1.6575, + 1.5789, 1.5940, 1.5068, 1.4599, 1.5887, 1.5618, 1.5181, 1.7093, 1.4992, + 1.6316, 1.5302, 1.5932, 1.5249, 1.6118, 1.5605, 1.4670, 1.5210, 1.6202, + 1.6143, 1.5616, 1.7132, 1.6734, 1.6960, 1.6497, 1.5144, 1.5368, 1.5412, + 1.5550, 1.5144, 1.5971, 1.6508, 1.6773, 1.5563, 1.6012, 1.5162, 1.5163, + 1.6103, 1.6071, 1.6437, 1.5799, 1.5341, 1.6273, 1.6276, 1.6143, 1.6015, + 1.5359, 1.5915, 1.5081, 1.5776, 1.5932, 1.6629, 1.6389, 1.6354, 1.6139, + 1.5766, 1.5325, 1.5796, 1.5362, 1.5868, 1.5815, 1.5430, 1.6207, 1.6325, + 1.0084, 1.3852, 1.6033, 1.6354, 1.6064, 1.6104, 1.5252, 1.6160, 1.6036, + 1.5788, 1.5968, 1.5653, 1.5560, 1.4895, 1.5731, 1.5991, 1.6102, 1.5368, + 1.6023, 1.5326, 1.6283, 1.6053, 1.5736, 1.6930, 1.5774, 1.5697, 1.6059, + 1.6048, 1.5693, 1.6887, 1.5748, 1.6362, 1.5797, 1.5532, 1.5407, 1.6113, + 1.5052, 1.6081, 1.6208, 1.5894, 1.5822, 1.5563, 1.6003, 1.5935, 1.6142, + 1.5649, 1.5751, 1.6461, 1.5523, 1.5837, 1.5288, 1.5320, 1.5913, 1.5801, + 1.4600, 1.5917, 1.6242, 1.6360, 1.4584, 1.6947, 1.5531, 1.5535, 1.6059, + 1.5470, 1.6079, 1.5324, 1.6425, 1.4971, 1.5478, 1.5758, 1.6043, 1.6234, + 1.4722, 1.6315, 1.6825, 1.6424, 1.5320, 1.6130, 1.6026, 1.5700, 1.5041, + 1.5321, 1.5527, 1.5418, 1.6419, 1.4374, 1.6255, 1.5188, 1.5278, 1.5432, + 1.4489, 2.3006, 1.5959, 1.7023, 1.5785, 1.6375, 1.5754, 1.5096, 1.6006, + 1.6337, 1.5149, 1.5128, 1.5400, 1.5957, 1.5554, 1.5141, 1.6146, 1.4660, + 1.6387, 1.6256, 1.5380, 1.6197, 1.5558, 1.4969, 1.5637, 1.5350, 1.5603, + 1.5597, 1.5843, 1.3633, 1.5204, 1.6087, 1.5517, 1.5795, 1.6358, 1.5697, + 1.5797, 1.5676, 1.6028, 1.5955, 1.5415, 1.6213, 1.4824, 1.5580, 1.5154, + 1.6320, 1.5831, 1.5853, 1.5625, 1.5538, 1.5902, 1.6137, 1.5797, 1.5854, + 1.6301, 1.5884, 1.6639, 1.6174, 1.6834, 1.5654, 1.6103, 1.5832, 1.6769, + 1.6275, 1.5682, 1.5536, 1.5728, 1.5328, 1.6245, 1.3324, 1.6080, 1.6106, + 1.5931, 1.5965, 1.6213, 1.5882, 1.6126, 1.6217, 1.5003, 1.5451, 1.5859, + 1.6413, 1.5879, 1.4787, 1.5228, 1.6128, 1.6167, 1.6126, 1.4987, 1.6039, + 1.4994, 1.5541, 1.4806, 1.6224, 1.5732, 1.5347, 1.5990, 1.6350, 1.6045, + 1.5526, 1.5640, 1.6160, 1.6265, 1.5525, 1.5405, 1.5849, 1.5789, 1.5462, + 1.6115, 1.5663, 1.5264, 1.5779, 1.5052, 1.5357, 1.6098, 1.5319, 1.6803, + 1.6384, 1.6489, 1.5756, 1.6014, 1.5709, 1.6672, 1.5551, 1.5675, 1.7146, + 1.6731, 1.6189, 1.5798, 1.5877, 1.6571, 1.5571, 1.6023, 1.5781, 1.6524, + 1.6087, 1.5548, 1.5786, 1.6094, 1.6381, 1.6036, 1.5332, 1.5510, 1.6183, + 1.6424, 1.5860, 1.5442, 1.5265, 1.6016, 1.5861, 1.6329, 1.6371, 1.6098, + 1.5439, 1.6541, 1.6189, 1.5390, 1.6603, 1.6226, 1.5107, 1.6136, 1.6319, + 1.6133, 1.6171, 1.5596, 1.5044, 1.5676, 1.6682, 1.6274, 1.5624, 1.6302, + 1.6082, 1.6882, 1.5975, 1.5947, 1.6779, 1.5172, 1.8169, 1.5628, 1.5574, + 1.5296, 1.6388, 1.6386, 1.5509, 1.5011, 1.5941, 1.5359, 1.6311, 1.6930, + 1.5646, 1.5703, 1.7156, 1.7039, 1.7237, 1.6125, 1.6781, 1.6184, 1.6499, + 1.5913, 1.5678, 1.5397, 1.6042, 1.5798, 1.4778, 1.6637, 1.6021, 1.5434, + 1.5934, 1.6021, 1.6542, 1.5897, 1.5038, 1.6211, 1.5139, 1.4714, 1.5686, + 1.6549, 1.6761, 1.5084, 1.5039, 1.6728, 1.5570, 1.5322, 1.6111, 1.5362, + 1.5236, 1.5444, 1.6373, 1.6728, 1.5343, 1.4118, 1.6080, 1.6841, 1.5738, + 1.6541, 1.5087, 1.6481, 1.5934, 1.6207, 1.5490, 1.6071, 1.6627, 1.6416, + 1.6076, 1.6141, 1.5574, 1.6018, 1.5730, 1.6248, 1.5821, 1.5914, 1.5954, + 1.7080, 1.5800, 1.5834, 1.6229, 1.5816, 1.5754, 1.5840, 1.3503, 1.5136, + 1.5413, 1.6312, 1.6352, 1.5054, 1.5827, 1.5213, 0.3553, 1.6140, 1.5190, + 1.5867, 1.5369, 1.5415, 1.6162, 1.6345, 1.5307, 1.6651, 1.5010, 1.6128, + 1.5278, 1.5920, 1.5121, 1.5923, 1.5087, 1.5450, 1.5725, 1.5622, 1.5693, + 1.5568, 1.5951, 1.5524, 1.5817, 1.5403, 1.5696, 1.5777, 1.5988, 1.5369, + 1.6626, 1.5796, 1.6014, 1.5697, 1.5798, 1.4994, 1.5147, 1.5696, 1.5165, + 1.5875, 1.6455, 1.6432, 1.6248, 1.6029, 1.5540, 1.3902, 1.5868, 1.6621, + 1.6429, 1.5988, 1.6661, 1.5714, 1.5929, 1.5571, 1.6623, 1.5666, 1.5757, + 1.5621, 1.5998, 1.5808, 1.5551, 1.6553, 1.5133, 1.6109, 1.5503, 1.5683, + 1.5732, 1.6376, 1.5990, 1.5096, 1.5476, 1.5207, 1.7330, 1.6614, 1.5702, + 1.5172, 1.5875, 1.6315, 1.5304, 1.6697, 1.5546, 1.5182, 1.5630, 1.5935, + 1.6117, 1.4515, 1.5856])Parameter containing: +tensor([ 1.0403e-01, 9.7060e-02, -1.5602e-01, -8.2800e-03, 1.0229e-01, + 1.4111e-01, 1.0306e-01, 5.4273e-02, -1.6648e-01, -8.7717e-03, + 6.2813e-02, -2.6458e-02, -9.6767e-02, -2.1291e-02, -7.9400e-02, + -5.0567e-02, -3.2953e-02, -2.4269e-02, -5.1724e-03, 4.8843e+00, + -8.4254e-02, 5.4768e-02, -7.4972e-03, -2.4967e-02, -9.5140e-04, + 1.5371e-02, 7.1181e-03, -8.3969e-03, 2.2335e-02, -8.5531e-02, + 3.7403e-02, 2.8807e-02, 1.7188e-02, 1.0637e-01, 9.4134e-02, + -8.2811e-02, 4.2986e-03, -5.9408e-02, -8.1549e-02, 4.5918e-02, + 2.3936e-02, 3.2823e-02, 8.4698e-03, 9.2395e-03, -4.5630e-02, + 1.6469e-02, 3.6515e-02, -3.3968e-02, 5.7709e-02, 5.4930e-02, + 6.1122e-02, -6.1926e-06, 4.1280e-02, 3.4293e-02, 9.1601e-02, + 1.3812e-01, -7.9320e-02, -7.3999e-02, 5.8668e-02, 4.7881e-03, + 9.5586e-02, -7.9776e-02, 2.8651e-02, 6.1912e-03, 1.1924e-01, + -4.3352e-03, 1.2326e-01, 1.0645e-01, -4.3967e-03, 1.8691e-01, + 2.4468e-02, 2.8348e-02, 5.9777e-01, 7.2056e-02, -1.2830e-01, + 8.9767e-02, 4.4243e-02, -1.1128e-01, -8.0267e-02, -1.6716e-02, + -1.8243e-02, -5.4433e-02, -1.3675e-02, 1.2009e-01, 2.2563e-02, + 3.1082e-02, 1.3952e-01, 1.2806e-01, -2.9679e-02, -2.6804e-02, + 8.4288e-03, 2.6239e-02, 4.8801e-02, -5.7234e-02, -3.9436e-02, + 4.0219e-02, -2.0147e-02, -3.7820e-03, -9.8342e-02, -4.0746e-02, + 1.4139e-02, -5.4825e-02, 3.9232e-02, -9.4168e-02, 2.0618e-02, + -1.3461e-02, 3.9536e-02, -6.7572e-02, 2.6576e-02, -7.1447e-02, + -7.3643e-02, 1.1431e-03, 4.0579e-02, 5.8312e-02, 8.5941e-02, + -5.1140e-02, -1.5227e-02, -9.3090e-02, 1.2289e-02, 9.8821e-02, + -4.3573e-02, -2.1652e-01, -8.5814e-02, -3.8511e-02, -1.6028e-01, + 5.6119e-03, -1.4571e-01, 3.2770e-02, 5.6601e-02, -3.9532e-02, + -8.4895e-02, 6.0803e-02, -5.5046e-02, -2.6444e-02, -5.8353e-02, + 4.9541e-02, -2.7995e-02, -1.8478e-02, -3.4308e-02, -3.5461e-02, + 2.1209e-02, -4.9318e-03, 5.6645e-02, 3.7480e-02, 6.0327e-02, + -6.5690e-04, -2.0427e-02, -2.3503e-02, 1.8327e-04, 1.1751e-01, + 1.9287e-01, -7.5346e-02, -1.1450e-01, 1.6207e-02, -3.5160e-02, + -3.7534e-01, -1.0929e-02, -3.1112e-02, 3.6410e-02, 1.0848e-02, + 1.6411e-01, -1.3910e-01, -4.8034e-02, 2.0327e-02, 1.7898e-02, + -1.4329e-02, -6.9380e-02, 4.4215e-02, -6.9706e-02, 2.2333e-02, + -4.2725e-02, 7.9795e-02, 4.6975e-02, 4.7850e-02, 4.6778e-02, + -1.3970e-01, -1.4567e-03, 5.0245e-02, -1.2355e-02, 4.1464e-02, + -1.9858e-02, -6.7841e-02, 2.9885e-02, -2.5422e-02, 3.9081e-02, + -1.5655e-01, 3.9403e-02, 2.2546e-01, -2.9838e-02, -8.0923e-02, + 3.2238e-02, -3.3750e-02, 1.0578e-01, 8.4942e-02, -1.0134e-01, + 1.0470e-02, -1.6958e-02, -1.5581e-01, 8.7543e-02, 4.6490e-02, + -1.8868e-01, -2.1945e-02, -6.2705e-02, 5.4263e-02, 5.5378e-02, + 1.5345e-01, -8.8809e-02, 1.1056e-01, 1.3004e-01, 3.1240e-03, + 4.8471e-02, 2.2921e-01, -1.2579e-02, -3.3532e-02, -9.3535e-02, + -1.1855e-02, -5.6449e-02, 4.5213e-02, -1.4483e-01, -1.9879e-01, + -1.6979e-02, -7.0818e-02, 8.4117e-02, -1.6528e-02, 2.0236e-02, + -3.8914e-02, 6.5753e-02, -4.7799e-02, 1.2737e-01, -2.6316e-02, + -1.8444e-02, 3.9019e-02, -5.2672e-03, 5.9503e-02, -2.2726e-02, + 3.4543e-02, -1.0741e-01, 3.9277e-02, 6.2676e-02, 2.2335e-02, + 4.1903e-02, 1.1149e-01, 4.2704e-02, 5.2054e-02, 4.1758e-02, + 5.8227e-02, 8.3051e-02, -7.6821e-02, 6.0740e-03, -1.2686e-02, + 1.6613e-02, 2.4717e-02, 1.0240e-01, -1.2099e-01, 1.0826e-01, + -3.0802e-02, -8.7470e-02, -1.0857e-01, -1.1902e-04, 2.7459e-02, + -5.9645e-02, -7.6922e-02, -5.7398e-02, -5.9345e-02, -7.4553e-02, + 4.1273e-02, -7.1765e-02, -1.5432e-02, -9.4385e-02, 1.0805e-02, + -4.0354e-02, 7.1086e-02, 1.0681e-01, -7.6471e-02, 8.4077e-03, + 6.2677e-02, -9.7095e-02, 3.0733e-02, 8.5449e-02, -1.1908e-01, + 1.7565e-01, 1.5352e-01, 8.9398e-03, 4.5373e-02, 3.3377e-02, + 5.5408e-02, 4.7944e-02, -7.5976e-02, 8.8923e-02, -1.3306e-01, + -7.5426e-03, -1.8189e-02, 1.3748e-01, 4.3790e-02, 4.7611e-02, + 1.8740e-02, 1.8154e-01, 5.4250e-02, 8.3740e-02, 7.2689e-02, + 6.7214e-02, 5.0011e-03, -6.1465e-02, 1.2461e-01, 2.1885e-02, + 2.4945e-03, 1.0912e-01, -8.7420e-02, 5.2805e-02, -6.0513e-02, + 7.2861e-02, 3.0458e-02, -9.6075e-02, -1.2647e-01, 2.7966e-02, + -6.5805e-02, 1.0081e-02, 4.9374e-02, 1.5660e-01, -4.2903e-02, + -7.9792e-02, 2.2622e-01, -6.6623e-02, -5.4678e-02, 3.2016e-01, + -2.0631e-01, 3.6732e-02, -3.8266e-03, 3.5298e-02, -2.0218e-01, + 1.5783e-01, -3.6070e-02, 1.1865e-01, -3.4082e-02, 2.0930e-02, + 8.3586e-02, 1.5725e-02, -3.8440e-02, 6.5960e-03, -3.2614e-02, + -1.2969e-01, 2.5196e-02, -7.0515e-02, -2.4179e-02, 1.0546e-01, + -4.5103e-03, 1.0148e-01, 9.7182e-02, 4.2287e-02, -1.2103e-02, + -6.1044e-02, 5.8164e-02, -1.3052e-02, 1.2791e-01, -1.5350e-02, + -4.3890e-02, -9.8166e-02, 4.9865e-02, -6.0790e-03, -1.1680e-01, + 7.4108e-03, -9.9132e-03, 5.0176e-03, -5.9086e-02, -1.6564e-02, + -2.7472e-02, 2.0742e-02, 1.3547e-01, 1.4747e-02, 3.9906e-02, + 9.4255e-02, -3.2829e-02, -6.6845e-02, -8.2401e-03, -5.0691e-02, + 2.7525e-02, 8.2268e-02, 1.1415e-01, -2.1360e-02, 9.0743e-03, + -1.2780e-01, 1.0709e-01, 3.6027e-02, -3.8634e-02, -3.7508e-02, + -3.5288e-02, -2.5998e-02, -6.0742e-02, -7.8862e-02, -1.0022e-01, + 3.9805e-02, -3.6270e-02, -5.7538e-02, -2.7233e-02, 4.4938e-02, + 2.6449e-03, 9.3045e-03, 3.3165e-02, -7.1608e-03, 6.8827e-02, + 1.0516e-01, -3.3185e-02, 8.7285e-02, 3.3311e-02, -1.1761e-01, + -1.3953e-01, -7.4792e-02, 3.7520e-02, 1.5746e-02, 9.0114e-04, + -8.0773e-02, 4.4950e-02, 6.3050e-02, -8.5671e-02, -6.2881e-03, + -1.5994e+00, -4.3553e-02, 2.9454e-02, -5.6907e-02, -3.5270e-02, + -3.1434e-02, 1.7889e-02, -7.5432e-03, -1.1371e-01, -5.5130e-02, + 2.9257e-02, -1.7139e-01, -1.1265e-01, 6.7160e-02, -3.7416e-02, + -7.2524e-02, -3.5468e-02, -1.1186e-03, 6.8092e-02, 1.1672e-01, + -1.4002e-04, 3.5592e-02, 5.8436e-02, 1.3232e-01, -7.7206e-02, + -4.7924e-02, -8.6854e-02, -4.7352e-02, 8.5665e-02, 1.4172e-01, + -1.4817e-01, -1.8879e-02, -7.7543e-02, 1.4620e-01, 1.0426e-02, + 5.8315e-03, -6.9976e-03, -1.2946e-01, 7.7025e-02, 3.7671e-02, + 1.4007e-02, 4.8044e-02, 1.4411e-02, -5.2605e-02, -1.2956e-01, + 1.0883e-02, 3.6544e-02, -1.0974e-01, 4.1607e-02, -4.8047e-03, + -1.9821e-03, 3.7107e-02, -7.3962e-02, 7.7033e-02, 3.0835e-02, + -1.0230e-02, -1.2997e-01, -9.3121e-02, 2.8200e-02, -3.7071e-02, + 6.1153e-02, 1.6810e-01, 8.3451e-02, -7.2332e-03, 1.0225e-01, + 1.1711e-01, -1.2504e-01, 1.0692e-02, 1.7643e-01, -9.4140e-02, + -1.7660e-02, -3.5525e-02, 1.3885e-01, 9.2340e-02, -1.6297e-02, + 2.5335e-02, 2.0930e-02, -3.7296e-02, -2.7389e-02, -1.2104e-01, + 6.3596e-02, -5.1928e-02, 9.5645e-02, -7.1889e-02, 3.9743e-03, + 3.1035e-02, -7.1511e-02, 6.0946e-02, -7.1944e-02, -4.7700e-02, + 6.4958e-02, 1.0425e-01, 7.0645e-02, 2.9044e-02, 1.3859e-02, + -8.4643e-02, -2.9044e-02, -8.2613e-02, 5.3671e-02, -4.7501e-02, + -4.3595e-02, 1.8381e-02, 1.6899e-01, 1.1637e-01, -1.2471e-01, + -6.4090e-02, -2.6507e-02, 7.6388e-02, -7.0416e-02, -2.9882e-02, + -1.3445e-02, 4.8258e-03, 3.6539e-03, 3.5404e-02, -8.2439e-02, + 2.2725e-02, 2.4114e-03, -1.5794e-02, -8.3513e-02, -3.4496e-02, + 1.8298e-02, -2.2342e-02, 1.4748e-04, -5.4580e-02, -1.0311e-01, + -2.7600e-02, 4.6100e-02, 1.7093e-02, 1.9861e-02, 2.2585e-02, + 1.4885e-02, 1.0653e-01, 1.1443e-02, 8.4617e-02, 1.5395e-02, + 3.1660e-02, 3.9251e-03, -9.7112e-02, 3.9738e-02, -2.0323e-03, + -1.5839e-02, -1.2260e-02, -8.4579e-02, -2.8312e-03, 1.5448e-01, + -6.6625e-02, 1.7054e-01, 1.5554e-02, 4.5368e-02, 1.5215e-01, + 5.1925e-02, 8.8155e-03, 9.9351e-03, 6.4983e-02, 4.1809e-02, + 1.0706e-01, -1.1904e-02, -7.9306e-02, -1.8990e-02, 8.2367e-02, + 3.7968e-02, 3.2559e-02, 3.2740e-02, 7.0236e-02, -3.4334e-02, + -1.2870e-01, -1.7692e-02, 6.6359e-02, 1.2667e-01, 6.2629e-02, + 2.6319e-02, 6.9796e-02, -7.5105e-02, 9.2368e-02, 4.5839e-02, + -9.4532e-03, 7.9225e-02, 1.2312e-01, -4.0744e-02, 3.0323e-02, + -3.8575e-02, 6.1851e-02, -4.2298e-03, 1.4596e-03, 2.9492e-02, + -6.8381e-02, -1.5145e-01, 1.4044e-02, 3.8219e-03, 2.7525e-02, + -5.9736e-02, 8.6931e-02, -5.9665e-02, 6.6026e-02, -4.7780e-02, + 8.0811e-02, 9.5365e-02, -4.2623e-02, 3.3717e-03, -1.6398e-02, + -5.8409e-02, 5.1490e-02, 1.0267e-01, 7.1453e-03, -7.2832e-02, + 1.0664e-01, 1.3928e-02, 3.6142e-02, 4.5365e-02, 3.0590e-02, + -4.0905e-02, 3.1009e-03, 4.9965e-02, 6.2574e-02, -3.7965e-02, + 1.5737e-01, -2.2978e-03, 1.3913e-01, 1.9018e-01, 4.1395e-02, + 1.9691e-02, -5.2322e-02, 3.8990e-02, 9.0262e-02, -4.5511e-02, + 1.7857e-02, -6.1070e-03, 7.9881e-02, -6.1300e-02, 1.5891e-02, + -9.1146e-03, -1.0800e-02, 1.1645e-01, 4.1218e-02, 1.4686e-01, + 1.4334e-01, 3.2606e-02, -4.7522e-02, 9.4717e-02, -8.2010e-02, + 7.2789e-02, -1.5124e-01, -4.3470e-04, 5.9777e-02, 1.2152e-01, + -4.7845e-02, 2.5638e-02, -9.8613e-02, -2.8399e-03, 1.3092e-01, + 5.0667e-03, -1.4642e-01, -1.3147e-01, 6.6508e-02, 4.7440e-02, + -2.2257e-01, 3.7247e-02, 4.4363e-03, 3.6616e-02, -1.6164e-02, + 2.2315e-02, -7.9318e-03, 2.3029e-03, 7.1865e-03, 8.5034e-02, + -1.1355e-02, -4.4492e+00, -4.4573e-02, 2.7494e-02, -3.2405e-02, + -1.0286e-01, -5.5482e-02, -3.4495e-02, -5.3544e-02, 1.8831e-02, + -7.9519e-02, -3.4958e-02, -6.5070e-03, -2.7990e-02, -5.7086e-02, + 7.8146e-02, -9.5072e-03, -8.0300e-02, 4.5083e-02, -7.5206e-02, + 1.1142e-01, 5.7118e-02, -1.1984e-02, 9.7471e-03, -1.5284e-01, + -3.3771e-02, 1.0841e-01, 2.3453e-02, -1.4740e-02, 6.4261e-02, + -5.0851e-02, 1.1362e-01, 7.0055e-02, 4.6831e-02, 3.7477e-02, + -1.3111e-02, -9.9276e-03, 6.9019e-03, -7.9165e-02, -3.6014e-03, + 1.1528e-02, 1.4484e-02, 3.9553e-02, -1.0392e-01, 9.0540e-02, + -2.3436e-02, -2.2548e-01, 1.1270e-01, 5.3056e-02, 9.3794e-02, + 1.0745e-01, 3.8244e-02, 6.4040e-02, 1.0307e-01, -9.8231e-03, + 3.1430e-03, 2.4322e-02, -2.4742e-02, 2.0988e-02, -5.0275e-02, + 1.0009e-01, -1.2285e-01, -8.5569e-02, 7.0198e-02, -6.9018e-02, + -3.8501e-02, -1.6234e-01, -2.3968e-02, 4.3340e-02, -4.9714e-02, + -2.6754e-02, -1.0560e-03, -1.0699e-01, -2.9853e-02, 7.2143e-02, + -1.1260e-02, 4.7919e-02, 9.6402e-02, 4.0698e-02, -4.5639e-02, + -2.8596e-02, 4.3692e-02, 7.4926e-02, -6.2533e-02, -8.9408e-02, + -6.3402e-02, 1.1697e-01, 7.2318e-03])Parameter containing: +tensor([[-0.0048, 0.0208, 0.0301, ..., 0.0115, 0.0131, 0.0097], + [ 0.0114, -0.0127, 0.0137, ..., 0.0169, 0.0023, 0.0218], + [-0.0232, -0.0296, 0.0010, ..., 0.0052, -0.0218, 0.0328], + ..., + [-0.0138, -0.0232, -0.0012, ..., -0.0105, -0.0009, 0.0167], + [-0.0087, 0.0149, -0.0075, ..., 0.0247, -0.0048, -0.0043], + [-0.0146, 0.0123, 0.0197, ..., -0.0132, 0.0005, -0.0090]])Parameter containing: +tensor([-0.2957, -0.3293, -0.3479, ..., -0.1686, -0.4126, -0.3352])Parameter containing: +tensor([[-2.0264e-02, 2.8854e-02, 4.7760e-03, ..., -4.8904e-03, + -3.3169e-03, -2.8595e-02], + [ 7.9193e-03, -1.0986e-02, 8.4000e-03, ..., 2.7145e-02, + 3.1189e-02, -2.6474e-02], + [ 1.8585e-02, -1.3618e-02, -1.1322e-02, ..., 6.1989e-03, + -1.4870e-02, -5.5194e-05], + ..., + [ 1.0979e-02, 2.5269e-02, -1.1635e-03, ..., 2.2926e-03, + 2.9037e-02, -2.4094e-02], + [ 1.6174e-02, 2.0721e-02, 5.5618e-03, ..., -1.0529e-03, + 6.1226e-03, 1.5610e-02], + [-1.6403e-02, 1.9646e-03, -7.2136e-03, ..., -3.4119e-02, + -3.3054e-03, -1.8219e-02]])Parameter containing: +tensor([ 7.6416e-02, 3.9429e-02, -1.3733e-01, 5.8136e-02, 3.9581e-02, + 7.4158e-02, 5.7098e-02, -3.6793e-03, -6.1531e-03, 1.2093e-02, + 2.0386e-02, -3.7659e-02, -1.5125e-01, -8.3130e-02, 2.2827e-02, + -1.5557e-05, -9.2102e-02, -7.6752e-03, -2.1561e-02, -1.9568e-01, + 3.1555e-02, 3.9185e-02, 7.4387e-04, -3.9825e-02, 5.6152e-02, + 1.3351e-03, 3.2654e-02, -4.4489e-04, -8.2458e-02, 4.4403e-02, + -2.3438e-02, 8.2947e-02, 2.9175e-02, -5.3329e-03, 8.5876e-02, + -1.5121e-02, -6.7406e-03, 7.0435e-02, 2.0416e-02, 6.4636e-02, + 2.3041e-02, -1.1034e-03, -2.3956e-02, -1.0437e-02, -5.2147e-03, + -7.6866e-03, -8.5815e-02, 6.6956e-02, 6.4209e-02, -1.0811e-02, + 4.2648e-03, 6.3133e-03, 8.9111e-02, -1.9852e-02, -6.9580e-02, + 8.1543e-02, -6.0883e-02, 6.6795e-03, 7.3303e-02, -1.9196e-02, + -1.4107e-02, -8.1970e-02, 6.2927e-02, -6.9702e-02, 2.7161e-02, + -5.0385e-02, 5.2551e-02, 7.9498e-03, 6.5346e-03, -7.0496e-02, + 8.2886e-02, 6.8848e-02, 1.0809e-01, 1.0033e-02, -5.7526e-02, + 1.2383e-02, 5.5084e-02, -5.0774e-03, 1.1398e-02, 5.9845e-02, + 1.1055e-02, -3.6377e-02, -7.8369e-02, 2.3499e-02, 3.6896e-02, + -3.3051e-02, 3.8025e-02, -1.5640e-02, 5.0903e-02, 4.3091e-02, + -7.7972e-03, 7.2571e-02, -2.4017e-02, -9.0210e-02, -6.1584e-02, + 2.7695e-02, 4.2542e-02, -1.4331e-01, -3.0945e-02, 7.7515e-02, + 7.1106e-02, -5.2948e-02, -9.3994e-03, 5.4230e-02, 2.2110e-02, + 1.4473e-02, 4.0009e-02, -3.9558e-03, 2.9907e-02, -2.3315e-02, + -6.4148e-02, 7.2876e-02, -6.2805e-02, 7.8552e-02, 1.1102e-01, + 4.9667e-03, 2.8915e-02, 2.5131e-02, 1.1731e-01, 3.4542e-03, + -4.7058e-02, -6.0699e-02, -5.7709e-02, -2.8259e-02, -6.4392e-02, + -1.6251e-02, -5.6091e-02, -1.2344e-02, -6.3843e-02, -8.3679e-02, + -4.3152e-02, -3.4393e-02, -3.1616e-02, -6.9092e-02, 1.3557e-02, + -1.1124e-02, 3.5828e-02, 2.5650e-02, -4.1473e-02, -3.2440e-02, + 7.1655e-02, 2.5589e-02, -8.5693e-02, -4.2908e-02, 7.2517e-03, + 3.8471e-03, 7.3059e-02, 3.0029e-02, -5.1941e-02, 1.0046e-01, + 1.2018e-01, 6.3629e-03, 8.5220e-03, -1.1635e-02, -1.6992e-01, + 1.0201e-02, -8.0750e-02, 5.5023e-02, 6.4659e-03, 4.5990e-02, + 8.8989e-02, -3.0716e-02, -2.3865e-02, -5.3650e-02, 7.0801e-02, + 2.2049e-02, -7.4158e-02, 1.0071e-02, -8.0261e-03, 2.7328e-02, + -8.1543e-02, 2.7054e-02, -2.3270e-04, 9.7580e-03, 7.5623e-02, + -5.8167e-02, -4.1901e-02, -3.5919e-02, -5.2124e-02, -4.8645e-02, + -1.0242e-01, -1.2451e-01, 1.4313e-02, 2.7180e-03, 6.5002e-02, + -2.1469e-02, -5.6114e-03, 4.1089e-01, -5.4565e-02, -1.5175e-02, + -3.8509e-03, -7.3181e-02, 1.0345e-02, 4.2328e-02, -1.1879e-02, + 1.0785e-01, 6.1569e-03, -2.9236e-02, 1.4844e-01, 2.1896e-03, + -5.9082e-02, -2.0008e-03, 6.3293e-02, 6.3599e-02, 8.5144e-03, + 1.5945e-02, 2.9583e-03, 4.7607e-02, -1.9806e-02, 6.9580e-02, + -7.1594e-02, 1.3557e-02, 3.0716e-02, 2.7649e-02, 2.7283e-02, + 4.7516e-02, -6.5918e-02, 2.7046e-03, 4.8866e-03, -1.2024e-01, + 4.6234e-02, 3.3447e-02, 4.3213e-02, 4.9591e-02, 2.6016e-02, + -8.7952e-02, 1.7502e-02, -3.1067e-02, 2.2278e-02, -1.0490e-03, + 4.4312e-02, 3.8147e-02, 5.5786e-02, 3.4302e-02, -2.3178e-02, + -1.8066e-02, 2.7878e-02, 9.2926e-03, 3.6560e-02, 1.5404e-02, + 4.3365e-02, 8.6792e-02, 3.2898e-02, -5.2414e-03, -4.9744e-03, + 9.2407e-02, 1.4526e-01, -3.5461e-02, 7.4219e-02, 1.7166e-02, + -2.1286e-02, -4.3678e-03, 5.5771e-03, -2.0508e-02, 1.2396e-01, + 2.7802e-02, 9.6283e-03, -4.3518e-02, -4.7211e-02, -3.2940e-03, + -4.8584e-02, 3.1982e-02, 2.0294e-02, -2.6001e-02, -9.3201e-02, + -8.6121e-02, -6.6406e-02, 5.2612e-02, -1.2108e-02, -7.9224e-02, + 5.6824e-02, 1.8967e-02, -1.0948e-03, -9.5764e-02, -7.7200e-04, + 2.7527e-02, 1.9699e-02, 1.5518e-02, -3.7628e-02, -2.3529e-02, + -4.3671e-02, -9.8648e-03, -2.5589e-02, 6.4125e-03, 4.7646e-03, + -1.6754e-02, -1.0658e-02, 5.5298e-02, 5.9692e-02, -1.0590e-01, + -4.5837e-02, 9.1553e-02, 1.7761e-02, 5.0201e-02, 2.9037e-02, + -5.7343e-02, 2.0279e-02, 1.3016e-02, 2.9739e-02, -3.4302e-02, + -4.3518e-02, 9.9915e-02, -1.4793e-02, 5.0720e-02, 2.2079e-02, + -4.4647e-02, 6.1768e-02, -1.2291e-02, 8.8806e-03, -4.4952e-02, + 5.1300e-02, 7.6355e-02, 6.1310e-02, -8.8257e-02, -2.8595e-02, + -6.1890e-02, 2.6749e-02, 7.0496e-02, 3.5400e-02, 1.8219e-02, + -7.3853e-02, 9.9411e-03, 7.5264e-03, 1.2817e-03, 5.3809e-01, + -1.4111e-01, 1.3843e-01, -3.9001e-02, 3.5431e-02, -1.5918e-01, + 5.8899e-02, -1.0211e-01, -2.9861e-02, -7.7896e-03, 5.7098e-02, + -3.1403e-02, 1.8051e-02, 1.2283e-02, -2.8534e-02, -1.5078e-03, + -1.1528e-02, 1.7303e-02, -3.3875e-02, -6.5918e-02, 1.3115e-02, + -3.7567e-02, -3.0090e-02, 8.7524e-02, -8.1940e-03, -5.2063e-02, + 5.2429e-02, 6.9153e-02, -4.5868e-02, -1.0033e-02, -4.6234e-02, + -3.8422e-02, 1.1635e-02, -8.1604e-02, -9.4177e-02, 3.2593e-02, + -9.1019e-03, -8.7952e-02, -6.4636e-02, -5.2399e-02, 5.1392e-02, + 9.8145e-02, -8.0681e-04, 8.5449e-02, 1.3745e-01, 5.7281e-02, + -1.5488e-02, 2.2259e-03, -6.7383e-02, -4.9835e-02, 5.6213e-02, + -2.5291e-03, -4.9515e-03, 6.8359e-03, -1.9875e-03, -5.8228e-02, + 1.4725e-02, 3.4790e-02, 5.2399e-02, 4.0222e-02, -4.0955e-02, + -2.0660e-02, 4.7241e-02, -2.8656e-02, 1.9485e-02, -2.2095e-02, + 5.2094e-02, 4.3823e-02, 3.9856e-02, -1.1200e-02, -9.5337e-02, + 5.9509e-03, -3.6530e-02, 2.0416e-02, 1.5732e-02, -4.2877e-02, + -6.1340e-02, -3.6621e-02, -1.7960e-02, -8.1970e-02, 1.9547e-02, + -2.1179e-02, 3.5706e-02, 1.0452e-02, -2.1118e-02, -2.4033e-02, + 7.1289e-02, 1.2250e-01, 2.2446e-02, -3.7262e-02, -2.0370e-02, + -1.4075e-01, 6.0486e-02, -2.9888e-03, 6.6948e-03, 3.5156e-02, + -3.1921e-02, -3.7415e-02, 5.8136e-02, -8.8684e-02, -7.2693e-02, + 5.1910e-02, 1.5869e-02, -4.8757e-04, 7.6904e-02, -9.7046e-03, + 4.1016e-02, -3.4027e-02, -5.0842e-02, 1.3321e-02, 5.3680e-02, + 3.1414e-03, -4.7455e-02, -2.7618e-02, 2.5101e-02, -1.5723e-01, + 7.8201e-03, -2.0660e-02, 2.5883e-03, -6.2447e-03, 5.5389e-02, + -1.3313e-02, 4.7821e-02, 6.9351e-03, 5.7983e-02, 5.6183e-02, + 1.1887e-02, 5.2910e-03, -3.2379e-02, 5.1392e-02, -7.3975e-02, + -8.5571e-02, -1.4877e-02, -1.5236e-02, -2.1667e-02, -1.7227e-02, + -1.8250e-02, 4.9805e-02, -9.5947e-02, -3.6545e-03, -6.0822e-02, + -9.9731e-02, 5.7709e-02, 9.8343e-03, 5.7373e-02, -3.7537e-02, + 4.8615e-02, -3.5797e-02, 2.6840e-02, 4.7211e-02, -3.5217e-02, + 6.2103e-02, -6.2065e-03, -1.5650e-03, -4.1718e-02, 2.3468e-02, + -1.9951e-03, 2.7786e-02, 3.2043e-02, 1.0689e-02, -9.7351e-02, + -9.5093e-02, 1.6891e-02, 8.6060e-02, 1.4938e-02, -5.2521e-02, + 2.5116e-02, 2.1744e-02, -1.1877e-01, 1.1864e-02, -8.8501e-02, + -1.6541e-02, -1.1487e-01, 1.8127e-02, 3.0273e-02, 9.3994e-02, + -3.4607e-02, -3.0746e-02, 9.1003e-02, 5.6641e-02, -5.7037e-02, + -6.4392e-02, 4.1718e-02, 1.3525e-01, 5.1697e-02, -8.3313e-02, + 4.7760e-03, -5.3406e-02, -3.6774e-02, 1.1792e-01, -2.0370e-02, + -4.1016e-02, 2.3682e-02, 3.7537e-02, -5.7495e-02, -5.1300e-02, + 2.9572e-02, 2.8168e-02, 1.2695e-02, 7.2241e-04, 3.0762e-02, + 7.5760e-03, 5.3284e-02, -7.0068e-02, 2.8854e-02, -5.5328e-02, + -4.3091e-02, -7.0679e-02, 1.8738e-02, -1.3046e-02, 3.7781e-02, + 1.0361e-02, -1.2070e-02, 3.9749e-03, -7.9651e-02, -1.5659e-03, + -4.2450e-02, 3.2806e-02, -4.0283e-02, 1.9882e-02, -3.8605e-02, + 5.2246e-02, 4.4189e-02, -6.0577e-02, -3.6682e-02, -8.7204e-03, + -6.4819e-02, -2.7985e-02, 4.7394e-02, 7.2266e-02, -3.6041e-02, + 1.1871e-02, 2.5116e-02, 5.1300e-02, -3.8666e-02, 6.6467e-02, + 4.7821e-02, -2.4509e-03, -5.8350e-02, 4.3304e-02, 3.1281e-02, + 5.8990e-02, 1.9951e-03, 3.8544e-02, -5.9891e-03, 5.7556e-02, + 1.6617e-02, 3.1319e-03, 7.5134e-02, 6.8420e-02, 4.2236e-02, + 5.5573e-02, -5.0323e-02, -2.2797e-02, -1.2222e-02, -2.8610e-02, + -4.1321e-02, 4.5563e-02, 2.5726e-02, 2.1149e-02, 5.3741e-02, + -4.7455e-02, -2.0660e-02, -7.1106e-02, -4.9629e-03, -2.2308e-02, + -4.8706e-02, 4.1077e-02, -1.4275e-02, -4.3823e-02, 2.3315e-02, + 7.4234e-03, -2.4673e-02, 1.0425e-01, 7.0457e-03, 4.8218e-02, + 7.4120e-03, 8.8425e-03, 3.7384e-02, -2.9678e-02, 6.0349e-03, + 6.0638e-02, -4.3213e-02, -1.9580e-01, -7.4730e-03, -5.1819e-02, + -3.3112e-02, 8.0383e-02, -4.2938e-02, 7.4219e-02, 2.4536e-02, + -1.0063e-02, 9.0485e-03, 1.3123e-03, -1.2062e-02, -5.7907e-03, + 1.0223e-02, -3.5004e-02, 7.4036e-02, -8.8501e-03, -1.6312e-02, + -1.4610e-02, -3.0640e-02, -9.7809e-03, 6.0852e-02, -9.5459e-02, + -2.1076e-03, -8.5022e-02, -6.9153e-02, 1.4397e-02, 6.0516e-02, + -9.5081e-04, -1.5594e-02, -1.4824e-02, 3.0151e-02, -8.6365e-02, + 2.0813e-02, -6.9092e-02, -6.9031e-02, -3.3295e-02, -1.7899e-02, + -1.0413e-01, 8.7357e-04, 8.5327e-02, 1.5732e-02, 3.0212e-02, + 1.4087e-01, 4.0466e-02, 3.7170e-02, 6.4621e-03, -3.9520e-02, + 6.2988e-02, -2.2156e-02, -4.6265e-02, -2.0157e-02, 6.6711e-02, + -5.0385e-02, 1.3031e-02, -2.8976e-02, 7.9285e-02, 8.1604e-02, + -1.5625e-02, -5.6976e-02, -4.4464e-02, -1.1398e-02, -7.6843e-02, + 3.2501e-02, 9.4177e-02, 3.9398e-02, -3.1769e-02, 5.9418e-02, + -7.1167e-02, 7.4646e-02, -8.1787e-02, -4.7699e-02, -2.6825e-02, + -6.9666e-04, 4.6844e-02, -7.4768e-02, 5.8167e-02, -4.3701e-02, + 8.9340e-03, -2.5970e-02, -4.2694e-02, -3.4973e-02, 3.2196e-02, + -5.8258e-02, -2.2598e-02, -6.3782e-03, -7.2754e-02, 3.6743e-02, + 1.5320e-01, -2.5864e-02, -3.7689e-02, -6.9092e-02, -5.9387e-02, + 1.8661e-02, 2.9053e-02, 6.0455e-02, -8.7830e-02, -3.1311e-02, + 1.0277e-02, 6.6345e-02, -4.2969e-02, -3.1624e-03, -4.0009e-02, + 3.0502e-02, -1.7914e-02, -6.5796e-02, -8.0872e-02, -1.6022e-02, + -1.2671e-01, 3.2654e-02, -9.5139e-03, 1.7807e-02, -6.0150e-02, + -6.4430e-03, -8.7204e-03, -8.2855e-03, -5.0446e-02, 7.0915e-03, + -2.0523e-02, -1.1787e-02, 3.8574e-02, -1.5854e-02, 1.0025e-02, + -7.5195e-02, -5.3596e-03, 7.8613e-02, -2.2324e-02, 1.7990e-02, + 1.1328e-01, -3.4546e-02, 1.2367e-02, -5.5634e-02, -6.4758e-02, + -3.3173e-02, -5.9174e-02, -7.6599e-02, 1.9653e-02, 5.2887e-02, + 6.2683e-02, -1.0712e-01, 6.7749e-02, 3.8605e-02, -4.1748e-02, + -3.0762e-02, -2.6627e-02, -7.7881e-02, 2.5978e-03, 4.1199e-03, + 1.3733e-01, -4.8889e-02, -9.5520e-03, 6.9031e-02, 2.8091e-02, + 4.2877e-02, 3.3203e-02, -3.7201e-02, 2.0309e-02, -5.8746e-02, + 1.0063e-02, 8.3252e-02, 1.8173e-02])Parameter containing: +tensor([2.1959, 2.2433, 2.1857, 2.1209, 1.6389, 2.1854, 2.1641, 2.2831, 2.2128, + 2.2176, 2.1341, 2.2153, 2.3185, 2.2126, 2.2357, 2.0978, 2.1635, 2.1208, + 2.2639, 1.7605, 2.3400, 2.1941, 2.3848, 2.3280, 2.1389, 2.2421, 2.1610, + 2.2365, 2.1423, 2.0889, 2.1251, 2.1304, 2.1786, 2.2438, 2.0898, 2.1105, + 2.3662, 2.1162, 2.1394, 2.2017, 2.0881, 2.1809, 2.1864, 2.1774, 2.1370, + 2.1468, 2.2602, 2.0467, 2.2073, 2.0081, 2.2256, 2.2054, 2.3503, 2.1690, + 2.3088, 2.0737, 2.3294, 2.0558, 2.1249, 2.2076, 2.1621, 2.1766, 2.1614, + 2.1080, 2.2065, 2.4156, 2.2566, 2.1855, 2.1032, 1.7747, 2.1803, 2.1558, + 2.0032, 2.1216, 2.2635, 2.3478, 2.2386, 2.1590, 2.0873, 2.1088, 2.1520, + 2.2779, 2.1882, 2.1679, 2.1767, 2.2001, 2.2802, 2.0911, 2.1828, 2.0577, + 2.2320, 2.1016, 2.2414, 2.1581, 2.1407, 2.3021, 2.0415, 2.2577, 2.2683, + 2.1252, 2.1751, 2.0928, 2.1778, 2.1687, 2.1598, 1.8509, 2.4226, 2.1822, + 2.1058, 2.1584, 2.1773, 2.2128, 2.1975, 2.3101, 2.3948, 2.1183, 2.0253, + 2.3268, 2.2789, 2.1262, 2.0824, 2.2447, 2.2727, 2.1143, 2.1328, 2.1054, + 2.2809, 2.2893, 2.2209, 2.1829, 2.2278, 2.3562, 2.2678, 2.1217, 2.3154, + 2.1505, 2.1856, 2.1743, 2.0592, 2.4429, 2.1887, 2.1263, 2.2271, 2.0800, + 2.1909, 2.0682, 2.1730, 2.1021, 2.2987, 2.1907, 2.0872, 2.1588, 2.2281, + 2.1060, 2.1932, 1.4218, 2.1816, 2.0950, 2.1298, 2.3864, 2.0477, 2.1148, + 2.2073, 2.1981, 2.1993, 2.1867, 1.9977, 2.2300, 2.2829, 2.0759, 2.2131, + 2.0797, 1.9357, 2.1667, 2.0612, 2.2055, 2.1462, 2.2525, 2.2415, 2.1718, + 2.2981, 2.3206, 2.3284, 2.2534, 2.0522, 1.9514, 2.0436, 1.2547, 2.2027, + 2.0659, 2.1481, 2.2872, 2.2690, 2.1321, 2.2106, 2.1154, 2.2069, 2.1221, + 0.9120, 2.2188, 2.4457, 2.1044, 2.0904, 2.1309, 2.2110, 2.1845, 2.2859, + 2.1890, 2.2604, 2.1990, 2.2026, 2.0569, 2.3205, 1.8578, 2.1635, 2.1395, + 1.9611, 2.1856, 2.2196, 2.1708, 2.2654, 2.2020, 1.6530, 2.4133, 2.1938, + 2.3114, 2.2499, 2.0929, 2.1446, 2.0065, 2.1790, 2.1916, 2.2220, 2.0788, + 2.1493, 2.1848, 2.1861, 2.1895, 2.1188, 2.2206, 2.1182, 2.2558, 2.2083, + 2.1962, 2.0984, 2.1888, 2.1710, 2.3315, 2.1818, 2.1630, 2.1299, 2.1561, + 2.2780, 2.2752, 2.2380, 2.1594, 2.2068, 2.2501, 2.2587, 2.2793, 2.1248, + 2.2332, 2.2974, 2.1941, 2.3257, 2.1813, 2.1643, 2.2622, 2.0120, 2.1354, + 2.1130, 2.2935, 2.2795, 2.3622, 2.0759, 2.2596, 2.0937, 2.1968, 2.1598, + 2.1423, 2.1361, 1.6926, 2.1329, 2.2892, 2.2827, 2.1544, 2.2791, 2.2954, + 2.0467, 2.0488, 2.1520, 2.2570, 2.1547, 2.2220, 2.1348, 2.1048, 2.1245, + 2.1469, 2.0957, 2.2185, 2.0759, 2.3178, 2.1051, 2.2986, 2.2410, 2.0303, + 2.3367, 2.1090, 2.2387, 2.2247, 2.3043, 2.1765, 2.4028, 2.2597, 2.0883, + 2.0746, 2.1657, 2.2288, 2.1221, 2.3232, 2.3526, 2.0159, 2.1545, 2.1041, + 0.3871, 2.3329, 2.2983, 2.0760, 2.4192, 2.1847, 2.1178, 2.2100, 2.2635, + 2.2291, 2.1490, 2.1012, 2.2794, 2.2359, 2.1499, 2.1478, 2.3279, 2.1762, + 2.2106, 2.2876, 2.2022, 2.0238, 2.2397, 2.1261, 2.3905, 2.2223, 1.9225, + 2.2648, 2.2286, 2.0396, 2.1020, 2.1426, 2.1499, 2.3850, 2.1622, 2.2299, + 2.5684, 2.2173, 2.3663, 2.2255, 2.2637, 2.0994, 2.2758, 2.3033, 2.2118, + 2.3846, 2.2690, 2.1289, 2.3554, 2.4116, 2.1124, 2.1070, 2.1095, 2.1002, + 2.2363, 2.3103, 2.1195, 2.1077, 2.1961, 2.2064, 2.1364, 2.1967, 2.2627, + 2.2412, 2.2803, 2.2380, 2.0999, 2.1453, 2.3822, 2.1865, 2.0122, 2.2556, + 2.2398, 2.1578, 2.4047, 2.2957, 2.1867, 2.2028, 2.1285, 2.3118, 2.1787, + 2.0632, 2.1458, 2.1129, 2.2477, 2.2613, 2.1903, 2.1475, 2.2234, 2.2157, + 1.5280, 2.7052, 2.1411, 1.9908, 2.1080, 2.3099, 2.1839, 2.2772, 2.3016, + 2.1400, 2.1563, 2.1544, 2.0065, 2.1619, 2.2343, 2.2508, 2.0020, 2.1819, + 2.2169, 2.2876, 2.2819, 2.0921, 2.2851, 2.1712, 2.1838, 2.4053, 2.2280, + 2.1213, 2.2339, 1.5170, 2.1571, 1.9372, 2.1083, 2.2179, 2.2474, 2.1511, + 2.2367, 2.1640, 2.3784, 2.1733, 2.1151, 2.1422, 2.1430, 2.0686, 2.1205, + 2.1924, 2.2715, 2.2642, 2.2484, 2.1470, 2.1295, 2.2515, 2.1685, 2.0151, + 2.1463, 2.4572, 2.1058, 2.1192, 2.0471, 2.2623, 2.3658, 2.2353, 2.1376, + 2.3325, 2.1941, 2.0276, 2.1756, 2.1455, 2.1653, 1.4040, 2.1441, 2.1498, + 2.0774, 2.1498, 2.0594, 2.1203, 2.2170, 2.1717, 2.1872, 2.2332, 2.0680, + 2.1824, 2.2045, 2.0639, 2.2616, 2.1788, 2.2463, 2.2707, 2.1051, 2.1681, + 2.2768, 2.0634, 2.2021, 2.2291, 2.2101, 2.1755, 2.0786, 2.2107, 2.0552, + 2.0850, 2.3577, 2.2024, 2.3391, 2.2122, 2.1305, 2.1941, 2.3249, 2.1990, + 2.1484, 2.0657, 2.2080, 2.2095, 2.2046, 2.1868, 2.1210, 2.1913, 2.1871, + 2.3558, 2.3104, 2.2753, 2.2643, 2.1893, 2.0487, 2.2330, 2.3197, 2.1989, + 2.0989, 2.2021, 2.3357, 2.2768, 2.2759, 2.0574, 2.2289, 2.1212, 2.1820, + 2.2164, 2.2613, 2.1490, 1.9993, 2.3027, 2.0638, 2.1157, 2.1022, 2.0929, + 2.3347, 2.2287, 2.2191, 1.6776, 2.1588, 2.2015, 2.3349, 2.1413, 2.2229, + 2.2184, 2.1025, 2.1480, 2.2768, 2.1595, 2.1132, 2.0917, 2.2105, 2.4072, + 2.0885, 2.2566, 2.2266, 2.0839, 1.9988, 2.0598, 2.2237, 2.1696, 2.2168, + 2.1860, 2.1769, 2.0475, 2.1407, 2.1101, 2.1082, 2.1283, 2.1669, 2.1678, + 2.2295, 1.9174, 2.0682, 2.2209, 2.1534, 2.1680, 2.2277, 2.1702, 2.1736, + 2.3767, 2.2455, 2.4314, 2.2258, 1.8448, 2.2785, 2.0552, 2.2357, 2.2187, + 2.2307, 2.1839, 2.1638, 2.2256, 2.2257, 2.1354, 2.1825, 2.0499, 2.2662, + 2.0253, 2.2905, 2.1492, 2.2266, 2.2326, 2.1619, 2.1309, 2.1369, 2.2871, + 2.2399, 2.1446, 2.2617, 2.1392, 2.0626, 2.0500, 2.1366, 2.1822, 2.2245, + 2.2088, 2.2096, 2.1357, 2.1353, 2.2215, 1.8183, 2.0847, 2.0899, 2.1745, + 2.1806, 2.3140, 2.2878, 2.2096, 2.2386, 2.2901, 2.2440, 2.0776, 2.1517, + 2.0757, 2.1171, 2.1626, 1.6976, 2.1217, 2.3893, 2.1851, 2.0992, 2.2829, + 2.1739, 2.2640, 2.2493, 2.1478, 2.2046, 2.1931, 2.1903, 1.8220, 2.2475, + 2.0913, 2.1884, 2.0506, 2.2598, 2.2075, 2.3095, 1.9063, 2.1329, 2.1046, + 2.1928, 2.1712, 2.1733, 2.0890, 2.2223, 2.2372, 2.1847, 2.1720, 2.3124, + 2.2107, 2.1327, 2.2606, 2.4070, 2.1014, 2.2041, 2.2022, 2.1145, 2.0395, + 2.1650, 2.2246, 2.2635, 2.2594, 2.3897, 2.0606, 2.0628, 2.2408, 2.2936, + 2.0929, 2.1765, 2.0997, 2.1489, 2.2996, 2.2237, 2.2520, 2.0795, 2.3836, + 2.1249, 2.4757, 2.3715, 2.1985, 2.2140, 2.0363, 1.3627, 2.1650, 2.1868, + 2.1629, 2.1055, 2.1162, 2.1416, 2.2213, 2.2584, 2.2356, 2.1500, 2.0179, + 2.1893, 2.2718, 2.3281, 2.2356, 2.0494, 2.3746, 2.2077, 2.0318, 2.2620, + 2.2605, 2.1761, 2.2573, 2.1130, 2.2154, 2.3167, 2.2101, 2.1554, 2.2810, + 2.1703, 2.1957, 2.3504, 2.1863, 2.0534, 2.2269, 2.2191, 2.1727, 2.1098, + 2.0837, 2.2015, 2.3503])Parameter containing: +tensor([ 1.3281e-01, 1.0489e+00, -4.3994e-01, -3.5076e-01, -1.5734e-01, + 1.0467e-01, 3.4624e-01, -3.6057e-01, -5.3122e-01, -1.1508e-01, + -5.0123e-01, 2.9999e-01, -7.4618e-01, -5.3507e-01, 4.8069e-01, + -4.2248e-01, 1.7101e-01, 2.3967e-01, -5.6175e-01, -1.5818e+00, + 4.6960e-01, 2.5782e-01, 8.7915e-01, -1.2254e+00, 4.6011e-01, + -2.9357e-01, 3.7613e-01, 4.0989e-01, -5.0915e-01, 5.7232e-01, + 4.9235e-01, 4.1011e-01, -4.4866e-02, -1.1593e-01, -2.0305e-01, + 3.1680e-02, -5.8174e-01, -1.0083e-01, 1.1103e+00, 7.3384e-01, + -3.7280e-01, 5.5970e-02, 4.7164e-01, 4.6526e-01, -5.5908e-01, + -3.5303e-01, -3.3388e-01, -4.7566e-01, 4.4903e-01, 1.1533e-01, + -6.4007e-01, -3.7924e-01, 7.8134e-01, 6.7420e-02, -5.9359e-01, + 1.2319e+00, 8.8111e-01, -1.8171e-01, -1.3960e-01, -3.9758e-02, + 1.7319e-01, 4.6109e-01, -1.0275e-01, -4.8635e-01, 2.2356e-01, + 1.1343e+00, 6.4209e-01, -7.2440e-01, -3.6563e-01, -1.5383e+00, + 3.5883e-02, -1.2616e-01, 4.5378e-01, -6.5210e-01, -5.3505e-01, + -9.3343e-01, 6.5818e-01, 4.9906e-01, -2.6536e-01, 2.5868e-01, + 9.8699e-02, 6.8153e-01, -3.1711e-01, 6.5012e-01, -6.8029e-01, + -2.7511e-01, 5.5501e-01, 4.4060e-02, 1.8087e-03, -3.0272e-01, + -5.5106e-01, 8.0156e-02, -5.0127e-01, -2.2272e-01, -5.4057e-01, + -3.7910e-01, 3.7393e-01, -4.5307e-02, -7.3265e-01, 4.7157e-01, + -2.0217e-01, -6.4716e-01, -8.6948e-02, 1.8414e-01, 8.2459e-02, + 3.1776e-01, 1.0655e+00, 6.4381e-01, 1.8725e-01, -1.5551e-01, + 7.7160e-01, 6.6357e-01, 6.1445e-03, 4.1845e-01, 7.9661e-01, + -3.5947e-02, 5.9436e-01, 5.0569e-01, 6.5157e-01, 2.5360e-01, + 1.1022e-01, -7.2232e-01, -8.4216e-01, -3.0733e-01, 4.9215e-01, + -2.9305e-01, 1.1464e-01, 5.4001e-01, -1.9519e-01, 5.3638e-01, + 5.7463e-01, -7.0124e-01, 2.2865e-01, -3.8127e-02, 8.8887e-01, + 9.2389e-01, 5.1717e-01, -2.9086e-01, -1.4492e-01, -7.3113e-01, + -3.8683e-01, 6.1832e-01, -2.4134e-01, 2.6464e-01, -4.8043e-01, + 2.3333e-01, 1.3102e-01, -1.4933e-01, -6.4925e-01, 3.4122e-01, + 1.7284e-01, -6.7279e-01, 3.6753e-01, 1.3642e-01, -4.6944e-01, + 7.0057e-01, -2.5841e-01, 7.7047e-02, 5.0625e-02, -8.2669e-01, + -3.6478e-01, 1.6450e-01, 4.8215e-01, -3.4735e-01, -1.2598e-01, + 2.5064e-01, -6.9497e-02, 4.7381e-02, -5.4013e-01, -5.1097e-03, + 7.9240e-01, 2.5241e-01, -5.7863e-01, 5.1426e-01, -6.1765e-01, + 2.3296e-01, 2.1380e-01, -4.3335e-01, -2.0675e-01, 7.5988e-01, + -4.6048e-01, -6.6959e-01, 3.0083e-01, 3.7472e-01, 3.8105e-01, + 8.4046e-01, -3.9265e-01, 1.3335e+00, 1.4567e-01, -1.3545e-01, + -3.1525e-01, -6.1112e-01, -8.4796e-02, 3.9994e-01, 3.8930e-01, + -1.1288e-02, 8.5296e-01, -3.9526e-01, 2.2442e-01, 6.9698e-01, + -1.0549e+00, 4.6266e-01, 2.7690e-01, 3.1111e-01, 6.6209e-02, + -4.6759e-02, 6.1619e-01, 5.8232e-01, -6.4537e-01, -8.5775e-01, + 7.0397e-01, -3.8256e-01, -5.9776e-01, 5.3431e-01, 3.2315e-01, + -1.5651e-01, 1.5397e-02, 2.3762e-01, -3.1457e-01, 2.9154e-01, + 4.2581e-01, 5.1001e-02, -5.5437e-01, 9.1472e-01, 2.8580e-01, + -6.3040e-01, -8.1291e-01, 9.6829e-02, 2.1046e-01, 3.6351e-01, + -2.6908e-01, 9.2500e-02, 3.6032e-01, 3.7904e-02, -6.3151e-01, + -7.9317e-01, 3.9727e-01, 4.2895e-01, -1.9378e-01, 7.6716e-01, + 2.8807e-01, -1.6993e-01, 4.8568e-01, -6.1613e-01, -4.5870e-02, + -2.6191e-01, 7.7553e-02, 4.0871e-01, -8.8964e-01, 4.6862e-01, + 1.5260e-02, 4.7435e-01, 8.4179e-01, -4.0157e-01, 3.9829e-01, + 1.6903e-01, 4.2167e-01, -8.4015e-01, -8.4332e-01, -2.0669e-01, + -3.7132e-01, -2.7185e-02, 2.0107e-01, -1.6155e-01, -7.5347e-01, + -2.9117e-01, 5.7739e-01, 3.6695e-01, 1.6001e-01, 2.1570e-01, + 3.7116e-03, 6.9096e-01, -3.0534e-01, -8.5314e-01, -3.9907e-01, + -9.2290e-02, -4.2464e-02, 4.0167e-01, 2.1478e-01, -6.6176e-02, + 3.3254e-02, -7.6433e-01, -5.3241e-01, 5.8260e-01, -7.2345e-01, + 4.0051e-01, -5.5805e-01, 1.6328e-01, 1.1494e-01, -1.8896e-02, + -9.4009e-02, 2.6599e-01, 6.5200e-01, -7.1813e-01, -2.9351e-01, + 1.4233e-01, -3.6490e-01, -5.3627e-01, 4.7552e-01, 7.6735e-02, + 6.7022e-02, 6.1545e-01, -5.3540e-01, -6.3876e-01, -2.4615e-01, + 6.7330e-02, -4.3329e-01, 1.5423e-01, -6.4616e-01, -1.5042e-01, + 5.2939e-01, 1.3776e-01, 3.9853e-01, 2.5346e-01, 2.3949e-01, + 8.2627e-02, 3.1305e-02, 4.0734e-01, 4.0606e-01, 8.5729e-01, + -7.4386e-01, -4.3430e-01, 3.6861e-01, -2.6908e-01, -1.0369e-01, + -1.0651e+00, -1.1467e+00, -5.9298e-01, 6.4622e-01, -4.5900e-01, + 5.4481e-01, -2.3575e-01, -4.3648e-01, -4.0216e-01, -1.2223e-01, + -2.9405e-01, -1.0319e+00, 6.1357e-01, 5.8217e-01, -3.6815e-02, + 4.7501e-01, -2.8313e-01, -5.7884e-01, 9.5559e-01, -7.9043e-01, + 4.8335e-01, -3.6762e-01, 2.8385e-02, -9.3544e-01, 9.2769e-03, + 6.9211e-01, 4.3277e-01, 3.1741e-01, 6.5197e-01, -1.4770e-01, + -9.3074e-02, -7.4372e-01, -4.4881e-01, -1.1087e-01, 5.2817e-01, + -1.2982e+00, -5.1468e-01, -3.3536e-01, -1.9444e-01, 6.8215e-01, + -4.7416e-01, -5.2786e-01, 7.7139e-01, -7.5394e-01, 6.6867e-01, + -2.6968e-01, -4.9102e-01, 5.0436e-01, -6.4053e-01, -2.4396e-01, + -1.5928e-01, 2.6028e-01, 6.8988e-01, 5.3118e-03, -6.4690e-01, + -4.4641e-01, -5.6631e-01, -5.8067e-01, -4.0297e-01, -1.0473e-01, + -5.4320e-01, -5.8003e-01, 8.1864e-01, 8.3972e-01, 9.5139e-02, + -1.8129e-01, -5.2752e-01, 9.9258e-01, 4.1471e-01, 1.3180e-02, + -4.7539e-01, -3.0162e-01, 6.0368e-01, -5.5971e-01, -6.4534e-01, + 3.9169e-01, 6.3918e-01, -4.1132e-01, -5.0957e-01, 5.2105e-01, + -2.7070e-01, 5.9905e-02, -3.3576e-01, -3.3296e-01, 7.0715e-01, + 3.0238e-01, -4.5997e-01, 4.5236e-01, 1.5374e-01, 1.1668e+00, + -1.6149e+00, -2.6051e-01, -5.3227e-01, 1.7578e-01, 7.4930e-01, + 2.8102e-01, -4.9195e-01, 6.3192e-01, -2.3230e-01, -2.0161e-01, + 3.4353e-01, 3.6410e-01, -2.6098e-01, -3.7803e-01, 5.0583e-01, + -4.5856e-01, -1.7220e-01, 2.1415e-01, 5.8864e-01, 9.6195e-01, + 2.0834e-01, -3.0874e-01, 9.0913e-01, 4.2598e-01, -4.4309e-01, + 4.9674e-01, 2.3522e-01, -3.7915e-01, -2.1614e+00, -3.6390e-01, + 6.6082e-02, -1.7560e-02, 6.3947e-01, 5.7122e-01, -6.5979e-01, + -1.2240e-01, -3.1060e-01, -7.9018e-01, 3.2094e-01, 4.1335e-01, + 9.0558e-02, -5.1849e-01, -1.8333e-01, -1.7102e-01, 5.3537e-02, + -6.2528e-01, 4.0292e-01, -6.8299e-01, 1.4752e-01, -5.9252e-01, + -3.2536e-01, -5.0408e-01, -8.1444e-02, -8.7752e-02, -6.6041e-01, + -1.5532e-01, 1.9076e-02, -4.8949e-01, -4.7710e-01, -8.8513e-01, + -5.6397e-01, 2.1987e-01, -7.2030e-01, 1.7313e-01, 1.3092e-01, + -8.1733e-02, 3.9655e-01, 4.4970e-01, 1.2227e-01, 4.0097e-01, + -1.8540e-01, -4.0063e-01, 4.4000e-01, -3.4232e-02, -3.9916e-01, + -3.5780e-01, 4.9083e-01, -2.7053e-01, 4.0731e-01, -4.0411e-01, + -2.9179e-01, -1.9077e-01, -1.7176e-01, 5.6506e-01, 3.3544e-01, + 2.3209e-01, -8.1954e-01, -4.9928e-01, -1.9707e-01, 7.4907e-01, + 4.8980e-01, 4.6658e-01, 1.5447e-01, -2.9315e-01, -5.4347e-01, + -6.2719e-01, 1.3141e-02, -2.7332e-01, -7.1763e-01, 1.0533e+00, + -1.3157e-02, 8.6267e-01, -3.9119e-01, 1.6885e-01, 1.1079e-01, + 9.4402e-01, 2.0349e-01, 1.2190e-01, -1.1176e-01, -6.5901e-01, + -6.6527e-01, 4.0093e-01, 6.4473e-01, -5.8975e-02, 6.1555e-02, + 2.6041e-01, -1.0250e+00, 5.6489e-01, 3.0620e-01, 6.1333e-01, + -9.1546e-01, 1.5750e-01, -3.4531e-01, 5.8772e-01, -2.7294e-01, + 2.8230e-01, 4.2517e-01, -5.1764e-01, 6.1962e-01, 7.8920e-01, + 3.2426e-01, 5.0412e-01, -7.7646e-01, -1.3567e-01, 1.7101e-01, + -9.9796e-01, -4.4537e-01, 4.8358e-02, 6.3490e-01, 2.3371e-01, + -1.4228e-01, 1.3333e-01, -3.6550e-01, -8.1516e-01, -2.0957e-01, + 5.5559e-02, -7.6858e-01, 4.7748e-02, 5.5862e-01, -8.6177e-01, + -1.6945e-01, 7.1205e-02, -4.7811e-01, 2.5253e-01, -2.6369e-01, + 1.6023e-01, 6.2191e-01, 7.3936e-02, -2.9017e-01, 4.3377e-01, + 1.2851e+00, 3.7442e-02, -5.9583e-01, -4.5210e-01, 1.0180e-01, + 5.0120e-01, -2.9147e-01, 8.8957e-01, 2.0878e-02, -4.1971e-01, + -1.8841e-01, -2.7482e-01, 4.3985e-02, 1.0758e+00, 7.5964e-01, + -3.3972e-01, -1.1785e-01, 1.0062e+00, 3.7155e-02, -2.1678e-01, + 2.4287e-01, 6.3074e-02, -5.6007e-01, 7.0995e-01, 3.6084e-01, + -1.6873e-02, -8.3919e-01, 1.6823e-01, -9.4747e-01, -4.8958e-01, + 7.8847e-01, 7.7202e-04, -9.2255e-01, 6.4493e-01, 1.8281e-01, + -3.6465e-01, 5.7656e-01, -4.6102e-01, 2.9057e-01, 4.1614e-01, + 3.3420e-01, 5.0496e-01, 3.3019e-01, -1.1436e-01, 1.2199e-01, + 2.8585e-01, -8.7563e-04, -9.0184e-01, 6.5108e-01, 7.4054e-01, + 4.4034e-01, 3.3995e-01, 5.0614e-01, -1.3607e-01, 3.8840e-01, + 8.6456e-03, 3.4877e-01, -1.8693e-01, -6.4272e-02, 3.5093e-01, + -1.7511e-01, -7.3258e-02, 2.0546e-01, 2.3129e-01, -3.5526e-01, + 8.1774e-01, -6.0656e-01, -7.6188e-01, -3.6338e-01, -8.9585e-01, + -2.6506e-01, -4.5514e-01, 5.5527e-02, -2.8256e-01, 7.8668e-01, + 2.8775e-01, -1.0851e-01, 6.9988e-01, -6.2157e-01, 7.6669e-01, + 3.1773e-01, -4.2009e-02, 3.2616e-01, -2.7670e-01, 3.8984e-01, + 1.2011e-01, 4.8168e-01, 8.8709e-01, -3.2855e-01, -2.7174e-01, + -7.4272e-01, 1.2719e-01, -6.1042e-01, 2.2660e-01, 4.4052e-02, + 4.4497e-01, 2.8106e-01, -5.1593e-03, 5.0610e-01, 7.0556e-01, + -4.2123e-01, 1.4735e-01, 5.1446e-01, 4.8529e-01, -5.4870e-02, + -7.3664e-01, 1.6677e+00, -8.3535e-01, -5.2973e-01, 3.0460e-01, + 7.2249e-01, 1.6966e-01, 8.9588e-02, 8.3096e-01, -8.6126e-01, + -7.6821e-02, -5.3029e-02, -8.9052e-01, -3.4367e-01, -7.1569e-01, + 5.8929e-01, -8.2918e-01, -2.3534e-02, -2.0483e-01, 6.2491e-01, + 5.2459e-01, 8.5316e-02, 2.7787e-01, -3.9188e-01, 7.1636e-01, + 7.4751e-01, -9.2674e-01, -1.6038e-01, -6.0258e-01, 1.1622e-01, + 6.2238e-01, -2.3765e-01, 1.0691e-01, 4.0306e-01, 1.1263e-01, + -3.5009e-01, -4.1460e-01, 9.3237e-01, -5.4956e-01, -1.1002e+00, + -2.6917e-02, -1.0258e+00, -7.7761e-01, -7.1489e-01, -3.9857e-01, + -4.6436e-02, 2.4821e-01, -3.0298e-01, 6.9399e-01, -3.3227e-01, + -3.8206e-01, -2.6360e-01, 4.3574e-01, 7.5279e-01, 7.5876e-01, + 2.8074e-01, 1.1018e-01, -1.9109e-01, -6.2550e-02, -5.6703e-01, + -6.2459e-01, 5.1496e-01, 1.8184e-01, -5.6312e-01, 9.6952e-02, + 1.0453e-01, -4.1765e-01, 4.6603e-01, 3.6378e-01, 9.6122e-01, + 3.8609e-01, -3.5637e-01, -8.0935e-01, 6.6562e-01, -2.8625e-01, + 3.6776e-01, -1.1129e-01, -5.7451e-01, 1.0716e+00, 6.7912e-01, + -6.6297e-01, -6.0802e-01, -2.3813e-01, 5.4780e-01, 4.8164e-01, + 9.7501e-02, 2.7520e-01, 1.0619e+00])Parameter containing: +tensor([[-0.0090, -0.0285, -0.0117, ..., -0.0334, -0.0124, -0.0016], + [ 0.0073, -0.0109, -0.0155, ..., -0.0185, 0.0384, -0.0127], + [ 0.0053, 0.0201, 0.0105, ..., -0.0385, -0.0188, -0.0073], + ..., + [-0.0101, 0.0020, -0.0220, ..., 0.0291, -0.0050, 0.0291], + [ 0.0129, 0.0045, 0.0251, ..., 0.0085, 0.0212, -0.0309], + [-0.0195, -0.0013, 0.0205, ..., 0.0047, -0.0370, 0.0062]])Parameter containing: +tensor([-0.2406, 0.2976, 0.3762, ..., -0.0151, 0.0414, 0.0243])Parameter containing: +tensor([[-0.0192, 0.0069, -0.0140, ..., 0.0031, -0.0047, -0.0004], + [-0.0301, -0.0264, -0.0079, ..., -0.0047, -0.0026, 0.0025], + [ 0.0142, -0.0199, 0.0016, ..., -0.0152, 0.0006, 0.0133], + ..., + [-0.0022, 0.0139, 0.0077, ..., 0.0194, 0.0097, -0.0001], + [ 0.0107, -0.0085, 0.0090, ..., -0.0009, 0.0038, -0.0255], + [ 0.0045, 0.0113, 0.0006, ..., -0.0065, -0.0201, -0.0050]])Parameter containing: +tensor([-1.6556e-03, -6.8176e-02, -6.2675e-03, -1.1215e-03, 1.7944e-02, + -2.9358e-02, 2.5925e-02, -1.5930e-02, 1.7960e-02, -1.9501e-02, + 2.4078e-02, -2.7313e-02, -3.3905e-02, 4.1962e-02, 8.5678e-03, + 4.2816e-02, 2.2644e-02, -1.5625e-02, 1.7345e-04, -7.3364e-02, + 1.1093e-02, 1.1620e-02, 4.7363e-02, -3.3627e-03, -5.6671e-02, + -4.5715e-02, 1.2283e-02, 3.6072e-02, 8.4000e-03, 2.0309e-02, + 4.6600e-02, 3.5278e-02, 2.3300e-02, 1.3256e-04, -4.8767e-02, + -7.8369e-02, 2.0660e-02, 2.9999e-02, 7.9895e-02, -5.1208e-02, + 7.0000e-03, -3.2272e-03, 1.7883e-02, -4.1351e-03, -6.4583e-03, + -8.0032e-03, -2.5772e-02, 9.2087e-03, -1.5091e-02, 3.6120e-05, + -2.0645e-02, -2.1378e-02, -7.7393e-02, -4.5502e-02, -3.1891e-02, + 8.6548e-02, 7.3120e-02, -1.1492e-03, 2.1469e-02, 4.8184e-04, + 2.2842e-02, 2.4939e-04, 5.0659e-02, -4.2963e-04, 1.2802e-02, + -6.9946e-02, -2.3636e-02, -4.4250e-02, -5.2551e-02, -1.1066e-01, + 2.3804e-02, 2.5330e-02, -1.2524e-01, -1.9562e-02, 2.8503e-02, + 4.5959e-02, 4.2358e-02, 1.2421e-02, 1.7349e-02, -4.3335e-03, + -2.4200e-02, 5.4535e-02, -3.4332e-02, -1.5556e-02, -4.6906e-02, + -1.1436e-02, 1.5945e-02, -3.3569e-02, 4.2236e-02, 1.0567e-02, + -6.9504e-03, -1.8845e-02, -1.8234e-02, 2.3987e-02, 3.6469e-03, + 1.5884e-02, 3.0350e-02, -4.3396e-02, 1.4664e-02, 7.9346e-02, + -4.3243e-02, 4.8409e-03, -6.9763e-02, 2.6474e-02, 2.1866e-02, + 6.6467e-02, 4.3945e-02, 1.2970e-02, 3.5187e-02, 4.6661e-02, + 3.4363e-02, -2.2659e-03, -2.5253e-02, -6.2256e-02, 2.8854e-02, + 1.0483e-02, 7.2449e-02, 4.8584e-02, 3.0075e-02, 8.1299e-02, + -3.3020e-02, 2.3636e-02, 2.8095e-03, 2.8671e-02, 3.2593e-02, + -2.7802e-02, -3.8624e-04, -2.8244e-02, -1.2238e-02, 3.8483e-02, + -5.3711e-02, -1.7715e-02, 1.2756e-02, -1.3763e-02, 1.7595e-03, + -3.2227e-02, -6.0699e-02, 2.7863e-02, 3.0975e-02, -7.9956e-02, + 5.5878e-02, -4.8859e-02, -4.9469e-02, -2.5482e-02, -3.4454e-02, + -4.3182e-03, 4.5074e-02, -4.0436e-02, 7.0229e-03, -1.6357e-02, + 4.2755e-02, 2.7176e-02, 5.8594e-02, -1.7578e-02, -2.9541e-02, + 6.7810e-02, 2.7206e-02, -5.5695e-02, -4.2114e-02, 1.8646e-02, + 1.0376e-02, -4.6539e-02, 4.4617e-02, -5.7755e-03, 3.4576e-02, + -1.6525e-02, 3.2928e-02, 9.7275e-03, 5.2277e-02, 4.5891e-03, + -8.9493e-03, -5.1918e-03, 7.8087e-03, -1.7212e-02, -6.0349e-03, + -5.6061e-02, -1.0315e-02, 6.0883e-03, -4.5776e-02, -1.1185e-02, + -3.9520e-02, -6.0387e-03, 1.7715e-02, 6.2286e-02, 5.4077e-02, + 1.3489e-02, 1.3847e-02, -9.6985e-02, -3.0193e-03, -2.6337e-02, + -2.2629e-02, -8.5144e-02, -2.4094e-02, 3.0994e-03, 6.4026e-02, + 2.5665e-02, 1.0254e-02, -4.0253e-02, -6.0333e-02, -1.5167e-02, + -3.4576e-02, -1.1131e-02, 6.5979e-02, -8.1482e-02, 1.5497e-03, + -6.1531e-03, 2.6688e-02, -8.1396e-04, 2.8091e-02, 6.5308e-03, + 3.5858e-03, -1.2375e-02, -2.4185e-02, -1.8509e-02, -4.0924e-02, + 4.5197e-02, -1.4656e-02, -7.1350e-02, 8.2636e-04, -3.3325e-02, + -1.4343e-02, 3.5156e-02, -9.1410e-04, 9.0027e-02, -3.2227e-02, + -4.7272e-02, -9.8267e-03, 1.2329e-01, -2.0981e-02, 5.8327e-03, + -3.9886e-02, -2.2110e-02, 3.6835e-02, 9.0088e-02, -1.4488e-02, + 1.0071e-02, 6.1005e-02, 1.7975e-02, 1.9882e-02, 1.5930e-02, + -2.3010e-02, -4.4861e-02, 4.7241e-02, 1.0857e-02, 1.9333e-02, + 7.7362e-03, -1.8280e-02, 8.5510e-02, 1.7929e-02, -9.3365e-04, + -5.5733e-03, 6.0669e-02, 3.0823e-02, -1.4061e-02, 2.3254e-02, + -2.0508e-02, -1.8112e-02, -3.5828e-02, 2.7618e-02, 2.1103e-02, + 2.0264e-02, -2.4841e-02, 6.3721e-02, 2.3941e-02, 3.2005e-03, + -2.1988e-02, -1.0862e-03, -9.0942e-03, -1.1559e-02, -7.1289e-02, + -1.8280e-02, -1.5823e-02, -6.9962e-03, -6.0730e-02, 8.1329e-03, + -1.0065e-01, 4.7058e-02, -5.8861e-03, 4.7779e-04, 1.2875e-03, + -5.7068e-02, -8.9417e-02, 8.3557e-02, -5.9692e-02, -1.7776e-03, + -4.6021e-02, -2.8992e-02, -8.6136e-03, 3.6713e-02, -1.2138e-02, + -3.9032e-02, 4.3060e-02, -6.3049e-02, 2.2352e-04, -1.3138e-02, + -5.7800e-02, -3.6804e-02, -6.9275e-02, -6.9458e-02, -3.7781e-02, + -3.5572e-03, 7.4585e-02, 8.3389e-03, -3.4103e-03, 2.9724e-02, + 3.6438e-02, -2.7328e-02, 5.1300e-02, 5.1300e-02, 8.9493e-03, + 2.2980e-02, 2.5375e-02, -1.6678e-02, -1.1574e-02, -2.7115e-02, + 7.7515e-03, 1.0384e-02, 1.0429e-02, 1.9226e-02, 5.3162e-02, + 5.0926e-03, -1.4091e-02, -4.2510e-04, 5.4962e-02, 2.0605e-01, + -1.3680e-02, 2.9583e-03, 9.6863e-02, 1.4257e-03, 3.6255e-02, + 1.4374e-02, 2.6749e-02, -2.4536e-02, -5.9090e-03, 7.2693e-02, + -2.5467e-02, -2.6337e-02, 1.6785e-02, 1.4023e-02, 2.7313e-03, + 6.2805e-02, 4.9408e-02, -1.0826e-02, 2.3880e-02, 2.1454e-02, + 8.5678e-03, -9.3918e-03, 2.5223e-02, -3.3844e-02, -7.6942e-03, + 1.2955e-02, 5.3955e-02, 9.7656e-03, -2.8976e-02, 3.9337e-02, + 6.9519e-02, 1.8631e-02, -2.3178e-02, -1.1665e-02, 1.8417e-02, + 2.6047e-02, -1.0033e-02, 3.3905e-02, -6.0486e-02, 1.0925e-02, + 3.0426e-02, 1.6312e-02, -4.5685e-02, 3.7689e-02, -3.3905e-02, + -3.9948e-02, -1.4366e-02, 1.8112e-02, 4.4403e-03, 1.6432e-03, + -1.8448e-02, 2.8671e-02, 2.3079e-04, -3.2379e-02, -2.0645e-02, + -3.8849e-02, 1.2970e-02, -3.1342e-02, 2.6199e-02, 8.7619e-06, + -2.7485e-03, 1.5802e-03, -1.9424e-02, -3.8269e-02, -3.0334e-02, + 1.6403e-02, 1.6403e-02, 1.6479e-02, -2.4185e-03, -3.4119e-02, + -1.0376e-02, -1.4740e-02, 5.7316e-04, -1.9852e-02, -5.7800e-02, + -2.1133e-02, 1.9211e-02, -9.1934e-03, -7.0000e-03, -1.4076e-02, + 2.6360e-03, 2.1774e-02, -9.6054e-03, -1.4496e-02, 5.0783e-04, + 3.3691e-02, 1.2993e-02, -1.5060e-02, 4.6692e-02, 1.4978e-01, + -1.4893e-01, 2.9205e-02, 3.1586e-02, -5.9326e-02, -6.9008e-03, + 3.1891e-02, -4.6875e-02, -3.3035e-03, 3.8910e-02, -2.3666e-02, + 2.3804e-02, -3.9551e-02, 3.0243e-02, 2.4567e-02, -4.4006e-02, + -3.9978e-02, -1.7563e-02, -5.1392e-02, -5.8212e-03, 6.6650e-02, + 2.7866e-03, 5.7411e-04, -1.2375e-02, 4.7333e-02, 2.3727e-02, + 3.4821e-02, 3.6346e-02, 2.7756e-02, -4.2023e-02, 1.8524e-02, + 9.8038e-03, -3.6011e-02, 3.4698e-02, -8.6182e-02, -5.0507e-02, + -2.7039e-02, 8.9951e-03, 4.0314e-02, 1.9272e-02, -3.9825e-03, + -2.0157e-02, -1.1072e-01, -1.3294e-03, -4.5738e-03, 6.2275e-04, + 9.2010e-03, 1.7532e-02, 3.4454e-02, -1.1017e-02, 5.4398e-03, + 1.2947e-02, 1.6876e-02, 1.9363e-02, 1.1658e-02, -1.9501e-02, + 2.9099e-02, -2.0340e-02, 4.2084e-02, 2.5925e-02, 4.1771e-03, + 2.6443e-02, -1.2688e-02, -5.1453e-02, 2.4319e-03, -3.8330e-02, + 1.7654e-02, 2.6962e-02, 3.5095e-02, -5.8441e-02, -3.0014e-02, + 1.3481e-02, 5.5420e-02, 7.1869e-03, -5.1392e-02, 7.4097e-02, + 1.9180e-02, 2.3911e-02, 9.3307e-03, 3.8300e-02, -7.8278e-03, + -5.0171e-02, -8.6670e-02, -6.3416e-02, -2.5757e-02, 7.9834e-02, + 1.3481e-02, -2.6108e-02, -2.6688e-02, 5.6213e-02, 1.0022e-01, + 3.5736e-02, -2.7740e-02, -1.4015e-02, -3.4302e-02, -5.2002e-02, + 2.1912e-02, -6.8909e-02, -1.0216e-02, 4.4495e-02, -2.4780e-02, + -6.0005e-03, -3.8483e-02, -8.3466e-03, 1.4488e-02, 1.9379e-02, + 4.7546e-02, -1.1909e-02, 9.3651e-04, 6.5002e-02, 1.9333e-02, + -1.5175e-02, -3.1464e-02, 3.3875e-02, 5.5008e-03, -2.5116e-02, + -2.5742e-02, 2.8503e-02, 1.1359e-01, -1.2955e-02, -3.0411e-02, + 4.2992e-03, 8.7585e-02, 1.1452e-02, 7.5378e-02, 5.1270e-02, + 1.0872e-02, -4.9469e-02, -5.6030e-02, 6.0120e-02, 1.4200e-03, + 1.4328e-02, -3.7170e-02, -2.3087e-02, 2.5162e-02, 3.9185e-02, + -1.9882e-02, 4.4952e-02, 9.4299e-02, 1.1896e-01, -3.1372e-02, + 5.1422e-03, 3.1372e-02, 4.0131e-02, -5.7251e-02, 1.3519e-02, + -3.1174e-02, -9.5215e-02, -2.9583e-03, 7.3719e-04, -2.1072e-02, + 5.2032e-03, 2.8839e-02, -2.0477e-02, -3.3600e-02, 6.6490e-03, + -9.7717e-02, 3.9032e-02, 2.8610e-02, -4.1885e-03, 9.6207e-03, + 3.7018e-02, 3.2597e-03, -5.5206e-02, -4.7363e-02, -9.2545e-03, + -2.3823e-03, 3.1616e-02, -2.2171e-02, 2.9877e-02, 4.8828e-02, + -3.5828e-02, -1.1780e-02, -1.1528e-02, -2.6207e-03, -4.1275e-03, + -1.0635e-02, 5.6244e-02, -7.1594e-02, 1.2772e-02, 3.2745e-02, + 4.6082e-02, -1.6464e-02, 9.0149e-02, -2.8244e-02, 6.3110e-02, + -4.0924e-02, -1.2466e-02, -2.4368e-02, 7.9966e-04, -5.1147e-02, + 3.7811e-02, 4.3030e-02, -1.3098e-01, 4.0771e-02, 2.1606e-02, + -1.2375e-02, -4.8187e-02, -3.0472e-02, -4.2114e-02, -2.7298e-02, + 2.9221e-02, -6.0883e-02, -2.0752e-02, 2.4857e-02, 1.0895e-02, + 6.7139e-03, -5.0323e-02, 8.5144e-03, -1.3382e-02, 4.3701e-02, + 2.2934e-02, 4.0710e-02, 7.7248e-04, -2.1942e-02, -1.2863e-02, + -2.3155e-03, -3.0960e-02, -3.7933e-02, -2.0752e-02, 2.0599e-03, + -8.8013e-02, 4.9408e-02, -1.8021e-02, -3.1548e-03, -6.4697e-02, + -1.3191e-02, -3.3752e-02, -5.8517e-03, -3.0594e-02, -1.9196e-02, + 4.2297e-02, 1.8555e-02, -5.2460e-02, 8.3084e-03, -9.3307e-03, + -7.0992e-03, -2.3682e-02, -6.5918e-02, -9.3689e-03, -6.0425e-03, + -1.8585e-02, 7.0679e-02, -2.8896e-03, 2.9907e-02, -1.0857e-02, + 2.8259e-02, -6.4453e-02, 2.2583e-02, -3.0273e-02, 2.6321e-02, + -4.2145e-02, -3.1185e-03, 4.9744e-02, -8.8745e-02, -1.4400e-03, + -1.2947e-02, 2.0996e-02, 1.1005e-01, 1.5099e-02, 5.7404e-02, + 5.1880e-02, 1.2077e-02, -6.0394e-02, 3.3661e-02, -2.8549e-02, + 3.1342e-02, 1.8457e-01, 5.0163e-03, 1.2878e-02, -2.2827e-02, + -1.3092e-02, 1.5366e-02, -4.1565e-02, 3.6377e-02, 2.7023e-02, + 2.3239e-02, 1.0078e-02, 3.0319e-02, -4.4830e-02, 2.3102e-02, + -4.9477e-03, 1.5823e-02, 1.8906e-02, -3.5553e-02, 1.5701e-02, + -4.0039e-02, -6.6071e-03, -4.1840e-02, -2.6443e-02, 1.2383e-02, + 2.7771e-02, -8.8577e-03, 1.2444e-02, 5.2032e-02, 2.4353e-02, + 1.8219e-02, 2.9221e-02, -4.1229e-02, -7.5951e-03, 4.0527e-02, + -1.6968e-02, 6.6833e-03, -9.1248e-03, -5.4688e-02, 8.1177e-03, + -2.5269e-02, 3.0014e-02, -2.7328e-02, 3.2253e-03, 3.5431e-02, + -2.1805e-02, 4.3976e-02, 1.8341e-02, 2.3636e-02, 2.2827e-02, + -2.0340e-02, -1.1665e-02, 3.3875e-02, -3.8071e-03, 4.5227e-02, + 6.2790e-03, -6.6895e-02, 4.2877e-02, -1.3599e-01, -5.0583e-03, + 1.9409e-02, -2.7351e-03, 2.2755e-03, -6.1035e-02, 5.2429e-02, + 2.7359e-02, 4.9805e-02, 3.2471e-02, -4.7989e-03, 2.2873e-02, + -1.2941e-03, -9.3811e-02, 2.6810e-02, -1.3443e-02, -1.4938e-02, + 2.0325e-02, -2.8629e-03, -3.7903e-02, 4.5654e-02, -5.9814e-02, + -2.3155e-03, -3.5767e-02, -8.6823e-03, 4.5662e-03, 5.3772e-02, + 6.0669e-02, 5.5504e-03, -4.4556e-02])Parameter containing: +tensor([1.5493, 1.6399, 1.5999, 1.5885, 1.6329, 1.6397, 1.5427, 1.5989, 1.5938, + 1.6237, 1.6590, 1.6457, 1.6181, 1.6205, 1.4713, 1.5963, 1.6449, 1.7084, + 1.6214, 0.4006, 1.6030, 1.6363, 1.5759, 1.5834, 1.6650, 1.5835, 1.6580, + 1.5898, 1.5943, 1.5827, 1.5928, 1.5276, 1.6157, 1.5918, 1.6369, 1.5499, + 1.6460, 1.5885, 1.5715, 1.5816, 1.5972, 1.5878, 1.5510, 1.6371, 1.6093, + 1.6871, 1.5316, 1.6196, 1.6218, 1.5502, 1.6218, 1.6361, 1.5700, 1.5400, + 1.6542, 1.6868, 1.5516, 1.5528, 1.6228, 1.6274, 1.5977, 1.6739, 1.5992, + 1.5451, 1.6178, 1.7181, 1.5690, 1.4695, 1.6697, 1.5485, 1.5806, 1.6739, + 1.8901, 1.6811, 1.6642, 1.6300, 1.5537, 1.5634, 1.6123, 1.6138, 1.6103, + 1.5792, 1.5907, 1.6066, 1.5462, 1.5001, 1.6765, 1.5610, 1.6225, 1.5802, + 1.6846, 1.5668, 1.6137, 1.6866, 1.6791, 1.4948, 1.6263, 1.6051, 1.6387, + 1.5753, 1.5968, 1.5417, 1.5984, 1.5651, 1.5851, 1.6353, 1.5291, 1.5567, + 1.5705, 1.6465, 1.5494, 1.5523, 1.6082, 1.6735, 1.6387, 1.5507, 1.5189, + 1.5639, 1.6035, 1.5690, 1.5816, 1.5912, 1.5696, 1.5963, 1.6102, 1.5688, + 1.6092, 1.6175, 1.6432, 1.6284, 1.5562, 1.5349, 1.5583, 1.6106, 1.5579, + 1.6081, 1.6015, 1.6442, 1.6334, 1.6291, 1.6137, 1.5456, 1.7259, 1.5900, + 1.5554, 1.5780, 1.6134, 1.6272, 1.5741, 1.6224, 1.5436, 1.4855, 1.5818, + 1.6631, 1.5472, 1.2020, 1.5398, 1.6032, 1.5883, 1.6821, 1.5366, 1.5118, + 1.5927, 1.6376, 1.6173, 1.6728, 1.5829, 1.6114, 1.5536, 1.6710, 1.5637, + 1.5744, 1.4803, 1.5800, 1.6404, 1.6355, 1.6304, 1.6685, 1.5647, 1.5934, + 1.5731, 1.6635, 1.5844, 1.5600, 1.6321, 1.5281, 1.5389, 2.1468, 1.6702, + 1.6049, 1.5883, 1.5372, 1.6238, 1.6070, 1.5457, 1.5475, 1.6049, 1.5958, + 3.0083, 1.5153, 1.6055, 1.6462, 1.5855, 1.6214, 1.6535, 1.5140, 1.5091, + 1.5449, 1.5918, 1.6292, 1.5536, 1.4959, 1.6112, 1.5364, 1.4727, 1.6162, + 1.5672, 1.5872, 1.5662, 1.7077, 1.6566, 1.6526, 1.4817, 1.6145, 1.6988, + 1.6212, 1.6270, 1.6128, 1.6391, 1.7390, 1.6581, 1.6760, 1.7409, 1.6474, + 1.6504, 1.5595, 1.6109, 1.6272, 1.4755, 1.6741, 1.5887, 1.5637, 1.6667, + 1.6239, 1.6049, 1.6180, 1.5848, 1.4813, 1.5367, 1.7013, 1.5623, 1.6481, + 1.5652, 1.6429, 1.6078, 1.6493, 1.5002, 1.6345, 1.5190, 1.6447, 1.5506, + 1.5112, 1.5698, 1.5044, 1.5513, 1.5468, 1.5381, 1.5950, 1.6118, 1.5543, + 1.6706, 1.6023, 1.5748, 1.6172, 1.5726, 1.5440, 1.4987, 1.5566, 1.6374, + 1.5455, 1.5638, 1.6089, 1.6195, 1.6111, 1.6488, 1.6001, 1.4797, 1.6119, + 1.6401, 1.5164, 1.5940, 1.6737, 1.6311, 1.5728, 1.5773, 1.6245, 1.4770, + 1.6347, 1.6324, 1.7058, 1.5734, 1.5590, 1.6184, 1.7056, 1.6453, 1.4783, + 1.5747, 1.6226, 1.5360, 1.6008, 1.6301, 1.5816, 1.5759, 1.5865, 1.5823, + 1.5757, 1.5304, 1.5889, 1.7170, 1.5545, 1.5908, 1.4911, 1.5523, 1.6808, + 1.0102, 1.4416, 1.6402, 1.6516, 1.5736, 1.5707, 1.5194, 1.5789, 1.5578, + 1.5791, 1.5766, 1.6633, 1.5945, 1.5471, 1.6900, 1.5929, 1.5097, 1.5815, + 1.6156, 1.6017, 1.5836, 1.5091, 1.5963, 1.6344, 1.5837, 1.6584, 1.5643, + 1.6530, 1.6016, 1.7498, 1.5965, 1.6086, 1.5473, 1.6562, 1.6158, 1.5985, + 1.5164, 1.5495, 1.6260, 1.6174, 1.5751, 1.5895, 1.5756, 1.5310, 1.5784, + 1.5734, 1.5745, 1.6339, 1.6713, 1.6830, 1.6132, 1.4848, 1.6080, 1.5582, + 1.5989, 1.5899, 1.6601, 1.5907, 1.6642, 1.6658, 1.5568, 1.5169, 1.5837, + 1.7022, 1.5386, 1.5777, 1.6683, 1.5221, 1.4951, 1.5858, 1.5786, 1.6066, + 1.5431, 1.6643, 1.6195, 1.5992, 1.5671, 1.6200, 1.5648, 1.5666, 1.6210, + 1.5249, 1.6037, 1.6288, 1.5963, 1.5306, 1.6307, 1.5835, 1.4687, 1.5704, + 1.3834, 2.4891, 1.6304, 1.6105, 1.5524, 1.6043, 1.5713, 1.5287, 1.6168, + 1.5186, 1.5306, 1.5284, 1.5702, 1.5954, 1.6189, 1.5576, 1.6143, 1.5329, + 1.6438, 1.5675, 1.5940, 1.5842, 1.5996, 1.5994, 1.5268, 1.5964, 1.5654, + 1.6301, 1.5943, 1.2062, 1.6348, 1.5624, 1.6046, 1.6042, 1.6371, 1.6150, + 1.6369, 1.6009, 1.7376, 1.6231, 1.5693, 1.6378, 1.5659, 1.5503, 1.6336, + 1.5653, 1.6103, 1.6127, 1.6306, 1.5466, 1.5629, 1.6925, 1.5879, 1.5860, + 1.5904, 1.5143, 1.6487, 1.5793, 1.6201, 1.5986, 1.5847, 1.6243, 1.5638, + 1.5498, 1.6304, 1.5939, 1.5593, 1.5534, 1.6571, 1.3636, 1.5721, 1.5782, + 1.5547, 1.5585, 1.6214, 1.6656, 1.5667, 1.6133, 1.5377, 1.6059, 1.5794, + 1.6044, 1.6507, 1.5776, 1.5033, 1.5131, 1.5670, 1.5717, 1.6147, 1.6226, + 1.6446, 1.5396, 1.5357, 1.6272, 1.5983, 1.5448, 1.5605, 1.7070, 1.6120, + 1.6054, 1.6446, 1.6035, 1.6333, 1.5536, 1.5951, 1.5870, 1.6011, 1.5795, + 1.5923, 1.5524, 1.5733, 1.5762, 1.5603, 1.5949, 1.5760, 1.6026, 1.6437, + 1.5465, 1.7006, 1.6365, 1.5208, 1.6201, 1.6017, 1.5543, 1.6804, 1.7420, + 1.6809, 1.5633, 1.6597, 1.6247, 1.6704, 1.6162, 1.6767, 1.5603, 1.5749, + 1.6013, 1.6128, 1.6164, 1.6349, 1.6668, 1.5795, 1.5639, 1.5701, 1.6196, + 1.5638, 1.5866, 1.5865, 1.4001, 1.5744, 1.6010, 1.6010, 1.6537, 1.6356, + 1.6039, 1.6189, 1.5781, 1.5685, 1.5841, 1.5766, 1.5213, 1.6012, 1.5502, + 1.5972, 1.6567, 1.5655, 1.5510, 1.5973, 1.5988, 1.6302, 1.6506, 1.5686, + 1.6109, 1.6744, 1.5829, 1.6352, 1.6946, 1.6377, 1.7059, 1.5471, 1.5368, + 1.5926, 1.5611, 1.6253, 1.5294, 1.4044, 1.6179, 1.5868, 1.6041, 1.6719, + 1.5846, 1.5398, 1.6249, 1.7150, 1.7153, 1.6471, 1.6346, 1.5701, 1.6264, + 1.6090, 1.6172, 1.6575, 1.5996, 1.6354, 1.5527, 1.7479, 1.5759, 1.6304, + 1.6639, 1.5755, 1.6105, 1.6639, 1.6032, 1.5069, 1.6164, 1.6587, 1.5650, + 1.6497, 1.6342, 1.5691, 1.6271, 1.6007, 1.5943, 1.5942, 1.5980, 1.5675, + 1.5087, 1.5839, 1.6046, 1.6019, 1.6140, 1.3802, 1.6647, 1.6404, 1.5502, + 1.6699, 1.6187, 1.6753, 1.5719, 1.6739, 1.5828, 1.6139, 1.6800, 1.5641, + 1.5271, 1.5764, 1.6025, 1.5944, 1.5819, 1.5731, 1.5817, 1.5822, 1.6158, + 1.6417, 1.6392, 1.6379, 1.6283, 1.5247, 1.5779, 1.6359, 1.4932, 1.5547, + 1.5839, 1.6003, 1.6529, 1.5339, 1.6317, 1.6241, 0.3793, 1.5644, 1.5394, + 1.6564, 1.5079, 1.5475, 1.6538, 1.5442, 1.5427, 1.5980, 1.5646, 1.5751, + 1.6030, 1.5852, 1.5536, 1.5873, 1.5875, 1.5647, 1.6036, 1.5827, 1.5896, + 1.6706, 1.6393, 1.5696, 1.5956, 1.5279, 1.6411, 1.4874, 1.5671, 1.5945, + 1.6258, 1.6366, 1.5930, 1.5429, 1.6729, 1.5758, 1.6391, 1.5821, 1.5636, + 1.6237, 1.6044, 1.6874, 1.5696, 1.6090, 1.5168, 1.2428, 1.6478, 1.6367, + 1.6609, 1.5528, 1.6392, 1.6062, 1.5966, 1.5086, 1.6277, 1.5310, 1.6601, + 1.5359, 1.5844, 1.6001, 1.5696, 1.6074, 1.6169, 1.6799, 1.5825, 1.6523, + 1.6416, 1.6637, 1.6573, 1.4960, 1.6380, 1.5620, 1.6887, 1.6310, 1.6395, + 1.5979, 1.5711, 1.5458, 1.6128, 1.6570, 1.5286, 1.5453, 1.5261, 1.6336, + 1.5759, 1.5715, 1.6449])Parameter containing: +tensor([ 9.9168e-02, 4.3849e-02, -1.7755e-01, 3.2002e-02, 7.9832e-02, + 2.2029e-01, 9.3474e-02, 3.6958e-02, -1.1444e-01, -1.2357e-02, + 4.9962e-02, -3.6297e-02, -1.3317e-01, -2.1069e-02, -1.0363e-01, + -7.6869e-02, -1.0596e-02, -1.9904e-02, 1.5362e-02, 4.3388e+00, + -5.8986e-02, 5.7045e-02, -2.8652e-02, 3.9917e-02, -3.0142e-02, + 1.6968e-02, 2.2256e-02, 4.2894e-02, 4.1783e-02, -5.1836e-02, + 2.7941e-02, 3.3575e-02, 1.9892e-02, 1.2753e-01, 1.1412e-01, + -3.5983e-02, 4.1505e-02, -3.6498e-02, -1.4680e-01, 7.4925e-02, + 5.5049e-02, 4.8291e-02, -2.4369e-03, 5.2145e-02, -9.3334e-03, + 3.9382e-02, 6.6774e-02, -1.6817e-02, 2.5555e-02, 1.1247e-01, + 5.4547e-02, 4.5930e-02, 2.4378e-02, 6.2863e-02, 6.7775e-02, + 8.7540e-02, -1.0405e-01, -3.7358e-02, 5.7717e-02, 7.5026e-03, + 8.6343e-02, -1.0512e-01, 6.9189e-02, 3.2092e-02, 1.0216e-01, + -2.0255e-02, 1.0212e-01, 1.0654e-01, 3.5857e-02, 2.3916e-01, + 5.0539e-03, 6.2813e-02, 6.3034e-01, 7.7081e-02, -1.2523e-01, + 1.1814e-01, 3.1212e-02, -1.2816e-01, -1.3047e-02, -6.0614e-02, + -1.9448e-02, -5.9216e-02, 5.8349e-02, 1.3912e-01, 5.4253e-02, + 3.7561e-02, 1.0247e-01, 1.0217e-01, -5.0383e-04, -1.2770e-02, + 6.9393e-05, 4.3805e-02, 7.1861e-02, -9.6834e-02, -7.5698e-02, + 2.8570e-02, -2.6059e-02, -1.6039e-02, -6.1035e-02, -3.4551e-02, + 4.6976e-02, -3.7879e-02, 5.5665e-02, -1.1818e-01, 2.9161e-02, + 5.0025e-02, -3.7138e-03, -6.8892e-02, 3.7005e-03, -1.0422e-02, + -3.8308e-02, -2.7117e-02, 7.8872e-02, 7.9073e-02, 7.7121e-02, + -6.7232e-02, -2.7161e-02, -8.8668e-02, -1.8786e-02, 1.0479e-01, + -2.0932e-02, -2.5799e-01, -6.9773e-02, -4.2048e-02, -1.8170e-01, + -9.6430e-03, -1.7160e-01, 5.4934e-03, 4.0641e-02, -5.2708e-02, + -1.1047e-01, 8.8251e-02, -7.7579e-02, -2.2611e-02, -7.4420e-02, + 7.1777e-02, -4.9953e-02, -7.0597e-04, -3.5563e-02, -4.9313e-03, + 2.4732e-02, -9.2466e-03, 3.3888e-02, 6.1213e-02, 6.7017e-02, + -1.1905e-02, -1.3046e-02, 8.6177e-04, 1.0020e-02, 1.4302e-01, + 2.1901e-01, -5.3628e-02, -1.2036e-01, 4.3155e-02, -4.0979e-02, + -4.2002e-01, -7.8410e-03, -1.6884e-02, 1.4052e-02, 6.5780e-02, + 1.6192e-01, -1.3758e-01, -5.3189e-02, 4.2110e-02, -5.1313e-02, + -2.9008e-02, -6.6411e-02, 6.2063e-02, -5.4825e-02, 7.1862e-02, + -8.1163e-02, 1.0925e-01, 3.2302e-02, 3.9469e-02, 5.8172e-02, + -1.2644e-01, 4.1148e-03, 2.5105e-02, -4.9433e-02, 4.0924e-02, + -2.9660e-03, -4.3415e-02, 3.4479e-02, -5.4720e-03, 1.6097e-02, + -1.7373e-01, 6.0208e-02, 2.4375e-01, 2.8294e-03, -9.3666e-02, + 2.8309e-02, -4.6205e-02, 1.0189e-01, 3.5309e-02, -8.6500e-02, + 2.4689e-02, -7.4888e-02, -1.3506e-01, 1.5368e-01, 2.4555e-03, + -1.5823e-01, 5.7932e-03, -5.2773e-02, 4.4575e-02, 8.0074e-02, + 1.6230e-01, -8.7500e-02, 6.6434e-02, 1.2840e-01, 3.9700e-02, + 3.3606e-02, 2.0853e-01, 1.8290e-02, -8.0931e-02, -9.6761e-02, + -4.6112e-02, -1.8914e-02, 4.8560e-02, -1.1570e-01, -2.3101e-01, + -6.7442e-02, -7.2427e-02, 4.9952e-02, -4.3988e-02, 1.8997e-02, + -4.0182e-02, 8.3352e-02, -4.5539e-02, 1.1739e-01, -4.0738e-02, + 3.9162e-02, 6.3029e-02, 1.4560e-02, 7.7979e-02, 1.0567e-02, + 1.8793e-03, -1.1100e-01, 4.5256e-02, 9.0369e-02, 3.4154e-03, + 6.3770e-02, 7.5397e-02, 1.1233e-02, 1.0372e-01, 3.4302e-02, + 7.3305e-02, 7.2093e-02, -9.6457e-02, 2.4593e-02, -5.1865e-02, + 3.5280e-02, -4.2728e-03, 8.0915e-02, -1.4808e-01, 1.5104e-01, + -3.9665e-02, -9.2068e-02, -4.3506e-02, 5.6074e-02, 3.5466e-02, + -5.8521e-02, -8.1093e-02, -5.2968e-02, -4.0219e-02, -2.9365e-02, + 3.5287e-02, -7.8777e-02, -9.6997e-03, -7.0869e-02, -8.0517e-03, + 2.7628e-02, 3.7099e-02, 1.6447e-01, -8.0948e-02, 1.5004e-02, + 7.5770e-02, -6.2618e-02, 1.1144e-02, 6.9380e-02, -1.2749e-01, + 2.0715e-01, 2.1565e-01, 1.3051e-03, 2.1492e-02, 4.6540e-02, + 4.0014e-02, 2.8328e-02, -1.1086e-01, 1.2292e-01, -1.4375e-01, + 3.2895e-03, -3.6497e-02, 9.6531e-02, 6.9485e-02, 7.0013e-02, + 1.9933e-02, 2.3198e-01, 5.5978e-02, 5.4095e-02, 7.0778e-02, + 1.8711e-02, -1.9152e-03, -4.6121e-02, 1.7459e-01, -2.3601e-02, + 1.5128e-02, 1.2909e-01, -1.2117e-01, 7.0730e-02, -5.3275e-02, + 5.2342e-02, 2.5520e-02, -1.4723e-01, -1.5816e-01, 3.6779e-02, + -2.4259e-02, -3.5487e-04, 6.2654e-03, 1.9984e-01, -7.6132e-02, + -5.9028e-02, 2.1878e-01, -9.5459e-02, -5.9844e-02, -7.4959e-03, + -2.0009e-01, 5.3376e-02, 2.6009e-02, 3.6678e-02, -2.1850e-01, + 1.4710e-01, -2.3338e-02, 1.2922e-01, -3.7010e-02, -2.7180e-02, + 9.6733e-02, 5.3181e-02, -6.1271e-02, -1.1622e-02, -5.7339e-02, + -1.5541e-01, 4.0807e-02, -2.3586e-02, -8.5208e-02, 1.2889e-01, + -1.5264e-02, 8.5310e-02, 1.0926e-01, 5.6991e-02, 2.3987e-03, + -1.1391e-01, 3.9262e-02, -2.8064e-02, 6.5222e-02, 2.2107e-02, + -7.2463e-02, -6.1091e-02, 4.6915e-02, -2.5152e-02, -1.2362e-01, + 4.6378e-02, -5.5811e-03, 1.7696e-02, -6.9871e-02, -4.3401e-02, + 1.3433e-03, 5.2477e-02, 1.0138e-01, 5.2289e-02, -2.1016e-02, + 6.6678e-02, -3.2154e-02, -4.6185e-02, -2.7438e-02, -2.5051e-02, + 3.9109e-02, 1.1025e-01, 1.0976e-01, 1.1039e-02, 1.5488e-02, + -1.4314e-01, 1.3005e-01, 4.1390e-02, -1.8025e-02, -2.3589e-02, + -1.4516e-02, -2.1773e-02, -7.3370e-02, -7.3497e-02, -9.7571e-02, + 4.6733e-03, 6.5029e-03, -1.3255e-01, -8.5680e-02, 2.9686e-02, + 4.4497e-02, 1.9360e-02, 6.3552e-02, 1.8620e-03, 7.7786e-02, + 1.3989e-01, -3.6987e-02, 1.0180e-01, 7.3867e-02, -1.4634e-01, + -1.1714e-01, -7.4277e-02, 5.9020e-02, 2.2145e-02, -2.0120e-02, + -1.1564e-01, 8.6384e-02, 5.9949e-02, -8.7968e-02, 1.6044e-02, + -1.3494e+00, -1.3101e-02, 4.1360e-02, -7.1064e-02, -8.5138e-02, + -4.7313e-02, 2.7228e-02, -2.2329e-02, -9.8785e-02, -6.7174e-02, + 3.2216e-02, -2.0466e-01, -9.3770e-02, 9.2580e-02, -5.3992e-03, + -1.7000e-02, -3.8034e-02, 9.0621e-03, 8.6878e-02, 1.0987e-01, + 6.6488e-04, 5.1833e-02, 5.3204e-02, 1.2711e-01, -8.6105e-02, + -4.8681e-02, -6.0973e-02, -6.4746e-02, 8.5388e-02, 1.7026e-01, + -1.2123e-01, -1.9613e-02, -9.3257e-02, 1.2637e-01, -2.8975e-03, + 3.1562e-03, 3.3015e-03, -6.1522e-02, 5.9085e-02, 4.2832e-02, + 3.9749e-02, 5.1629e-02, 2.1208e-02, -6.0335e-02, -1.2391e-01, + -1.1050e-02, 6.3860e-02, -1.0895e-01, 4.7044e-02, 2.1700e-02, + -7.9585e-03, 9.7766e-02, -1.1182e-01, 1.3406e-01, 4.7058e-02, + -3.2009e-02, -1.0783e-01, -9.7116e-02, 2.9043e-02, -2.2611e-02, + 1.0493e-01, 1.7676e-01, 7.0756e-02, 5.9062e-03, 8.1334e-02, + 1.1995e-01, -1.3953e-01, 3.0317e-02, 1.0772e-01, -1.1324e-01, + 2.4339e-04, -3.5425e-02, 1.6272e-01, 7.3581e-02, -1.3233e-03, + 5.4502e-02, 2.7183e-02, -4.3697e-02, -2.1902e-02, -1.1941e-01, + 4.8753e-02, -5.3143e-02, 1.1286e-01, -1.1190e-01, 1.5261e-02, + -3.4672e-02, -8.7632e-02, 7.1268e-02, -5.5456e-02, -5.3602e-02, + 1.5196e-02, 1.1779e-01, 3.9464e-02, 1.0086e-02, 1.8113e-03, + -5.6602e-02, -5.1226e-02, -9.6473e-02, 5.4633e-02, -1.6578e-02, + -4.3707e-02, -1.5735e-02, 1.7919e-01, 1.3178e-01, -1.3620e-01, + -7.0200e-02, 6.2552e-03, 5.4710e-02, -2.3846e-02, -1.8538e-02, + 4.6390e-03, -1.0941e-02, -2.7629e-02, 3.5496e-02, -8.9692e-02, + 9.8759e-03, 3.5293e-02, -3.2110e-02, -1.0279e-01, -5.9262e-02, + 1.0760e-02, -6.1608e-02, 9.6876e-03, -4.7052e-02, -7.1123e-02, + 1.0229e-02, 9.2876e-02, 9.8025e-03, 3.4059e-02, -2.0260e-02, + -8.4060e-03, 1.1549e-01, 7.8580e-03, 1.4220e-01, -7.0386e-03, + 5.5618e-02, 5.0090e-02, -7.8569e-02, 4.5682e-02, -1.0535e-02, + -2.0261e-02, -3.6776e-02, -3.4775e-02, 2.5522e-02, 1.2608e-01, + -6.7188e-02, 2.0959e-01, 2.1202e-02, 2.0807e-02, 1.5260e-01, + 1.0424e-01, 6.0454e-03, 2.1931e-02, 2.5767e-03, 5.1140e-02, + 9.3779e-02, -1.6807e-02, -5.5458e-02, -1.3268e-02, 4.6824e-02, + -2.5836e-02, 1.7654e-02, 3.8869e-02, 7.0855e-02, -5.4796e-02, + -1.8964e-01, 3.8619e-02, 3.9355e-02, 1.0636e-01, 6.1914e-02, + 4.0157e-02, 5.5368e-02, -7.4316e-02, 4.9771e-02, 1.0426e-01, + -3.9820e-02, 5.2887e-02, 7.6436e-02, -2.4672e-02, 7.8243e-02, + -6.3843e-03, 6.3411e-02, 1.4871e-03, -4.8862e-02, 2.5202e-02, + -7.8051e-02, -1.4255e-01, -1.5805e-02, 1.3358e-02, 5.2986e-02, + -1.1864e-01, 4.6314e-02, 3.5143e-02, 7.3639e-02, -4.0801e-02, + 9.7016e-02, 6.1717e-02, -1.8799e-02, 2.7340e-03, -7.7440e-02, + -1.3023e-01, -4.1560e-03, 7.8262e-02, 3.5681e-02, -1.3603e-01, + 8.7046e-02, -1.3018e-02, 7.5417e-02, -1.1819e-02, 5.1524e-02, + -6.0646e-02, -9.2515e-03, 2.0641e-02, 6.3551e-02, -7.2697e-02, + 1.3991e-01, -2.5632e-02, 1.1834e-01, 1.6382e-01, 6.7759e-02, + 2.4555e-02, -8.7307e-02, 5.7441e-02, 7.5379e-02, -6.1329e-02, + 3.8550e-02, 8.5031e-04, 1.1118e-01, -6.0965e-02, 3.8923e-02, + 5.2394e-03, 4.7283e-03, 1.1786e-01, 3.2184e-02, 1.8079e-01, + 1.2621e-01, 2.8031e-02, -6.1219e-02, 1.0079e-01, -9.9842e-02, + 4.4614e-02, -1.3790e-01, -1.7813e-02, 7.1840e-02, 7.4822e-02, + -4.2562e-02, -2.6297e-02, -1.1915e-01, 3.5753e-02, 1.5433e-01, + 4.8434e-02, -1.5736e-01, -1.1653e-01, 5.1710e-02, 5.2133e-02, + -2.3209e-01, 5.4455e-02, -1.1171e-02, 7.2662e-03, 2.9330e-02, + 3.7252e-02, -3.8506e-02, -2.0239e-02, -1.8180e-03, 1.2752e-01, + 1.2268e-03, -4.5370e+00, -8.1560e-03, 7.7931e-02, -3.1185e-03, + -1.2173e-01, -4.9946e-02, -7.9451e-02, -8.4369e-02, 5.3416e-02, + -1.3204e-01, -4.5491e-02, 1.4509e-02, -1.4095e-03, -3.9864e-02, + 1.0990e-01, 2.8505e-02, -7.8903e-02, 5.6863e-02, -9.2724e-02, + 6.2713e-02, 3.9996e-02, -8.9662e-03, -1.1170e-02, -1.4313e-01, + -3.2183e-02, 1.0727e-01, 5.8286e-02, -1.9646e-03, 5.0987e-02, + -3.8793e-02, 9.1875e-02, 1.0112e-01, 1.9460e-02, -1.6071e-02, + -5.8468e-03, 2.4669e-03, -1.6030e-04, -5.2524e-02, -5.0830e-02, + 2.3971e-02, 2.3613e-02, 1.6205e-02, -1.4068e-01, 1.1002e-01, + -2.4975e-02, -2.1740e-01, 9.6996e-02, 3.8999e-02, 1.4596e-01, + 8.4671e-02, 6.8433e-02, 9.0532e-02, 7.4389e-02, -6.1792e-03, + 3.1973e-04, 2.1138e-02, 1.3640e-02, 4.0633e-02, -3.3085e-02, + 1.0155e-01, -1.3498e-01, -1.0529e-01, 9.6324e-02, -8.7950e-02, + -7.9196e-03, -1.4550e-01, -1.4610e-02, 4.1099e-02, -7.0051e-02, + -3.3869e-02, 5.5067e-03, -5.6253e-02, -5.2561e-02, 8.9154e-02, + -2.1283e-02, 6.1910e-02, 7.9824e-02, -7.3723e-03, -5.7242e-02, + -1.5250e-02, 1.1465e-01, 4.5637e-02, -1.4574e-01, -9.4880e-02, + -7.2486e-02, 1.0574e-01, -4.5374e-03])Parameter containing: +tensor([[ 0.0249, 0.0004, -0.0134, ..., 0.0097, -0.0221, -0.0155], + [-0.0316, 0.0126, -0.0031, ..., -0.0116, 0.0157, 0.0038], + [-0.0350, 0.0108, 0.0050, ..., -0.0090, -0.0208, -0.0072], + ..., + [-0.0134, -0.0048, 0.0264, ..., -0.0219, -0.0065, -0.0021], + [ 0.0038, 0.0062, -0.0022, ..., -0.0090, -0.0054, -0.0187], + [ 0.0026, 0.0050, 0.0312, ..., -0.0390, 0.0172, -0.0119]])Parameter containing: +tensor([-0.3540, -0.2788, -0.3865, ..., -0.3696, -0.3518, -0.2264])Parameter containing: +tensor([[ 0.0172, -0.0035, -0.0072, ..., 0.0195, 0.0084, -0.0046], + [ 0.0164, 0.0169, -0.0219, ..., 0.0049, 0.0127, 0.0043], + [-0.0011, -0.0046, 0.0048, ..., 0.0056, -0.0323, -0.0245], + ..., + [ 0.0166, 0.0074, 0.0161, ..., -0.0444, -0.0176, -0.0061], + [-0.0054, -0.0033, -0.0143, ..., 0.0186, 0.0021, -0.0065], + [-0.0192, -0.0033, 0.0053, ..., 0.0050, 0.0029, -0.0080]])Parameter containing: +tensor([ 4.4006e-02, 4.4006e-02, -4.0344e-02, 7.8247e-02, 3.2082e-03, + -6.9160e-03, 6.3477e-02, 6.2180e-04, 2.3254e-02, -5.0262e-02, + 4.3701e-02, -3.5614e-02, -1.0718e-01, 7.1945e-03, -5.4596e-02, + 2.0920e-02, -9.7275e-03, -5.1758e-02, 2.5070e-02, -2.9663e-01, + 2.7863e-02, 1.9608e-02, -9.3079e-03, -3.5187e-02, 3.0624e-02, + -7.2266e-02, 3.1525e-02, -1.0262e-02, -3.3112e-02, 3.5210e-03, + 6.8481e-02, -4.9934e-03, 4.7531e-03, 8.1421e-02, -4.2969e-02, + -1.2006e-01, 1.0361e-02, -7.3891e-03, 1.4328e-02, 2.2385e-02, + 2.0996e-02, 3.7231e-02, 2.8091e-02, 1.2085e-02, 6.1607e-04, + -5.8960e-02, -1.8845e-02, -7.3242e-03, -1.6846e-02, -3.9185e-02, + -3.6194e-02, -6.7749e-02, -8.5754e-02, -9.9731e-02, -5.9128e-03, + 5.4199e-02, 4.1382e-02, -1.0706e-01, 2.8366e-02, -1.7044e-02, + 4.5319e-02, 4.0741e-03, 4.5197e-02, -3.0594e-02, 4.9500e-02, + -5.0568e-02, -8.7204e-03, -1.3562e-01, 5.0751e-02, -2.9129e-02, + 4.4250e-02, 1.4915e-02, 3.8788e-02, 6.2408e-02, 5.7312e-02, + 3.1403e-02, 5.3314e-02, -4.5502e-02, -2.9419e-02, 7.1335e-03, + -4.0497e-02, 2.8214e-02, -6.0120e-02, -1.1139e-02, -6.0028e-02, + -5.4352e-02, 2.4017e-02, 2.9099e-02, 3.0167e-02, 7.3425e-02, + -2.4384e-02, 1.6584e-03, -4.3716e-03, 5.9296e-02, -5.5389e-02, + 3.3779e-03, 5.9540e-02, -7.3730e-02, 2.7985e-02, 6.5186e-02, + -7.1472e-02, -5.0812e-02, -6.9702e-02, 6.2164e-02, 4.4373e-02, + 5.2582e-02, 1.1920e-01, -2.2644e-02, 6.2988e-02, -1.3977e-02, + 5.6702e-02, -9.5444e-03, -6.0940e-04, -2.4063e-02, 1.2109e-01, + 5.3482e-03, 7.7148e-02, 6.3354e-02, 2.1942e-02, 6.2805e-02, + -1.3056e-03, -3.2776e-02, -2.1667e-02, -9.2239e-03, 4.1687e-02, + -8.0200e-02, -9.2041e-02, -4.2847e-02, -1.4244e-02, -3.4882e-02, + -5.4230e-02, -1.1044e-03, -4.0985e-02, -5.4901e-02, 3.9825e-03, + -1.0185e-02, -5.8014e-02, 2.3926e-02, 1.6510e-02, 1.2032e-02, + 1.3130e-02, -4.1771e-03, 1.8816e-03, -3.4943e-02, 1.8845e-02, + -1.9928e-02, 4.5990e-02, -2.6535e-02, -8.4412e-02, 4.5288e-02, + 1.2439e-01, -1.6861e-02, -1.6693e-02, 2.3788e-02, -3.0396e-02, + 8.3780e-04, 2.1057e-03, 5.1147e-02, -1.4244e-02, -3.1006e-02, + 1.0052e-01, -5.0323e-02, 7.9651e-02, -3.1525e-02, 3.9612e-02, + 8.4167e-02, 2.6062e-02, 4.0833e-02, 9.8755e-02, -1.6832e-03, + -3.1204e-02, 4.4037e-02, -4.0161e-02, -2.1927e-02, -5.6396e-02, + -1.1725e-01, -2.2964e-02, 3.0228e-02, 2.9922e-02, -3.9005e-03, + -2.4368e-02, -4.0802e-02, 4.2480e-02, 6.6956e-02, 8.8379e-02, + -3.1433e-02, -3.2471e-02, 2.2559e-01, -5.1910e-02, -1.2238e-02, + -5.9967e-02, -1.1407e-01, 4.2877e-02, 7.1899e-02, 4.1138e-02, + 8.1205e-04, 5.4871e-02, -9.1003e-02, 1.6650e-01, 3.3417e-02, + -1.1768e-01, -2.3132e-02, 1.1646e-01, -6.3477e-03, 6.1760e-03, + 5.7316e-04, 5.2246e-02, 2.7954e-02, 5.6213e-02, -7.7698e-02, + -2.0538e-02, 8.9294e-02, -5.5847e-03, 1.2344e-02, -4.4708e-02, + 9.7351e-02, -1.8188e-02, -2.7069e-02, 7.0801e-03, -1.6431e-01, + -1.7761e-02, -6.6910e-03, 5.9662e-02, 1.4954e-01, -2.1393e-02, + -8.1970e-02, 3.8116e-02, 1.2842e-01, 1.4221e-02, -8.9478e-02, + -2.9205e-02, -8.4152e-03, -5.2948e-03, 1.7960e-02, -2.3041e-03, + 2.1622e-02, 5.3894e-02, 4.1321e-02, 6.3049e-02, 7.4585e-02, + -7.5134e-02, -1.7120e-02, 3.9856e-02, 1.5587e-02, 5.4718e-02, + -3.0640e-02, 2.6260e-02, 3.3875e-02, 5.4993e-02, -8.8043e-03, + -4.1107e-02, -2.8122e-02, 2.2095e-02, -3.5095e-02, 4.8065e-02, + 9.5940e-04, -4.5105e-02, -9.4910e-02, 5.2681e-03, -2.9404e-02, + -2.9007e-02, 3.9520e-02, 6.2347e-02, -2.9388e-02, -7.4890e-02, + -6.6284e-02, -7.2670e-03, -5.9174e-02, -3.7262e-02, -1.0040e-01, + 2.4204e-03, -5.4993e-02, -5.0568e-02, -5.2277e-02, -1.6388e-02, + 4.3297e-03, 1.1176e-01, -5.7144e-03, 7.0068e-02, -2.0242e-04, + -7.4768e-02, 6.5231e-03, 2.8366e-02, -6.0394e-02, 2.2354e-02, + -3.7537e-02, -2.7283e-02, 1.1131e-02, 2.7893e-02, -8.1482e-03, + -9.1736e-02, 4.5959e-02, -7.0190e-02, -4.4373e-02, 4.5593e-02, + -8.2825e-02, 9.3628e-02, -7.8308e-02, -4.0222e-02, -1.3855e-02, + -2.8920e-04, 1.4343e-01, -9.5367e-04, 1.5392e-03, 1.8402e-02, + 4.4861e-03, -6.3843e-02, -7.4615e-03, 3.1189e-02, 8.0643e-03, + 2.0874e-02, 2.0294e-02, 3.3913e-03, -2.5803e-02, -4.5349e-02, + 6.5575e-03, 1.4587e-02, 9.6054e-03, 4.6234e-02, 9.3872e-02, + -5.8441e-02, 2.1152e-03, -7.7324e-03, 4.5929e-02, 6.1621e-01, + -1.3562e-01, -3.2684e-02, 5.1544e-02, 4.3068e-03, -4.7089e-02, + 2.3514e-02, -3.3569e-02, -8.0017e-02, -4.9103e-02, 1.8994e-01, + 8.9340e-03, -8.0032e-03, 3.8239e-02, -1.7883e-02, -3.2898e-02, + 9.0454e-02, 9.1934e-03, -2.8091e-02, 6.6223e-03, 1.7242e-02, + 6.3744e-03, 3.2711e-03, 1.0388e-01, -8.2397e-02, -4.4464e-02, + -6.4011e-03, 7.5806e-02, -1.0323e-02, -3.5919e-02, -5.2338e-03, + 2.6779e-02, 1.3000e-02, -3.7567e-02, -1.5144e-02, 1.0291e-01, + -5.7220e-02, -3.5889e-02, 1.0323e-02, -4.9072e-02, 1.8463e-02, + 3.1799e-02, -3.0956e-03, 2.1194e-02, 3.1143e-02, 5.4199e-02, + 5.5786e-02, -2.1545e-02, 2.5299e-02, -5.8228e-02, 1.4038e-02, + 5.2414e-03, 1.2466e-02, 4.9042e-02, -1.7014e-02, -8.2825e-02, + -6.7810e-02, 1.8982e-02, -3.2166e-02, 3.1647e-02, -6.4636e-02, + -9.5520e-03, -2.0157e-02, -6.2805e-02, -4.1870e-02, -3.7689e-02, + 2.1042e-02, 6.1981e-02, 9.3384e-02, -6.7505e-02, -2.8427e-02, + -9.1614e-02, -2.3346e-02, -4.8553e-02, -1.6510e-02, -7.6790e-03, + 3.8971e-02, 2.9129e-02, -4.8065e-02, 5.2595e-04, -3.1021e-02, + -4.9072e-02, 5.5328e-02, -3.8147e-02, -1.1398e-02, -1.4252e-02, + -3.4027e-02, 1.2152e-01, -4.3121e-02, 1.0384e-02, 1.3135e-01, + -7.9102e-02, 2.5757e-02, 4.2664e-02, -1.1505e-01, -3.5278e-02, + 8.9798e-03, -7.8659e-03, 7.4768e-02, 2.4002e-02, -4.2053e-02, + 3.3779e-03, -2.9251e-02, 2.5543e-02, 6.2378e-02, 1.3626e-02, + -1.4030e-02, 1.1673e-02, 1.3084e-02, 4.4937e-03, 6.6467e-02, + 3.3569e-02, 1.0590e-02, -1.7273e-02, 1.3354e-01, -1.0046e-01, + 8.8196e-02, 3.1921e-02, 6.5727e-03, 2.5616e-03, 3.4847e-03, + -4.6783e-02, -1.9608e-02, 2.7161e-03, -6.0944e-02, -5.9845e-02, + -4.8126e-02, -2.8610e-03, -6.2683e-02, 4.9347e-02, -1.0399e-02, + -2.4887e-02, -1.4526e-01, -2.4475e-02, 1.8478e-02, -5.1613e-03, + 1.6012e-03, 5.2856e-02, -3.1281e-02, -4.5166e-02, 5.4588e-03, + -4.0649e-02, 3.0960e-02, -2.2705e-02, 5.6946e-02, -4.3579e-02, + -4.4670e-03, -7.7515e-02, -2.2755e-03, -4.0955e-02, 3.1708e-02, + 1.9547e-02, 6.0852e-02, -8.9111e-02, 2.8534e-02, -3.1952e-02, + 2.3224e-02, 7.8857e-02, 1.0376e-02, -2.0126e-02, -3.8513e-02, + 4.7779e-04, 5.2124e-02, -3.6736e-03, -2.6077e-02, 5.2399e-02, + -3.8635e-02, 1.0284e-02, -1.0727e-02, 3.0426e-02, -3.5706e-02, + -6.3416e-02, -1.4941e-01, -1.2947e-02, -1.3756e-02, 8.0017e-02, + 5.7411e-03, 1.2093e-03, 3.6955e-04, 9.0210e-02, 9.1919e-02, + -7.9193e-03, 2.3174e-03, 4.5654e-02, -4.2664e-02, -1.0083e-01, + -2.9709e-02, -1.0016e-01, -7.1228e-02, 1.2744e-01, -7.2449e-02, + 3.3661e-02, 1.6739e-02, -4.2081e-04, 2.8763e-02, -5.9891e-03, + -1.9592e-02, 3.0579e-02, 5.4199e-02, 5.7251e-02, 1.4343e-02, + 8.3847e-03, -1.4000e-02, 7.7782e-03, -3.4760e-02, -7.5134e-02, + -2.5742e-02, -1.3428e-02, 1.4258e-01, -6.7322e-02, -1.1391e-02, + 2.9312e-02, -3.5217e-02, 5.9784e-02, 1.7410e-02, 8.3557e-02, + -4.3152e-02, -2.1225e-02, -5.1270e-02, 1.0663e-01, -4.0009e-02, + 6.7505e-02, -1.8768e-02, -1.8845e-02, -1.6342e-02, 1.0675e-01, + -7.0129e-02, 6.4819e-02, 7.0801e-02, 1.7065e-01, -5.0415e-02, + 3.4332e-02, 3.6194e-02, 5.4810e-02, -7.4280e-02, 2.6672e-02, + -7.6599e-02, -9.3155e-03, 3.4088e-02, 3.1342e-02, 2.2537e-02, + 1.9272e-02, 2.6093e-02, -5.4260e-02, -5.2246e-02, -2.6123e-02, + -3.4119e-02, 6.9946e-02, 2.1210e-02, -4.1161e-03, -6.0463e-03, + 1.3000e-01, 6.1615e-02, -4.8431e-02, -1.1176e-01, -6.4575e-02, + 1.6499e-03, -4.3701e-02, 7.8430e-02, 5.2795e-02, 5.8197e-02, + -2.3117e-02, -7.7087e-02, -7.5806e-02, -2.0325e-02, 4.2534e-03, + -6.2622e-02, 9.2163e-02, -4.7394e-02, -1.4824e-02, 1.4999e-02, + 1.1772e-02, -3.8635e-02, 6.6101e-02, -5.7312e-02, 6.4392e-02, + -1.3115e-02, -6.5857e-02, -5.3864e-02, 7.6843e-02, -6.1340e-02, + 2.0355e-02, 4.0375e-02, -1.3599e-01, 8.5449e-02, -1.3321e-02, + 1.8066e-02, -1.2894e-02, -4.9683e-02, -5.8517e-03, -5.9471e-03, + 1.8253e-03, -2.0447e-02, 8.0688e-02, -7.8979e-02, 8.7524e-02, + -3.7048e-02, -5.1270e-02, 4.2572e-02, 8.2855e-03, 7.0190e-02, + -9.6283e-03, 3.9948e-02, -1.7487e-02, -3.3752e-02, -5.0964e-02, + 1.7883e-02, -6.3538e-02, -3.4790e-02, 1.1554e-01, 1.1285e-01, + -1.2830e-01, 1.6846e-02, -6.2927e-02, 4.2953e-03, -1.3696e-01, + -6.1859e-02, -9.1431e-02, -2.4094e-02, -7.9102e-02, 4.9225e-02, + 9.9106e-03, -2.1606e-02, 2.0981e-02, 4.4403e-02, 6.6772e-02, + 2.4979e-02, 3.5248e-02, -1.3428e-02, 7.2670e-03, -7.3486e-02, + -1.2131e-02, 3.2318e-02, -4.4586e-02, 6.5979e-02, 5.0934e-02, + -2.3590e-02, -3.3875e-02, 5.4245e-03, -6.7261e-02, 3.0937e-03, + -3.1464e-02, -5.6671e-02, 1.6266e-02, -1.2732e-01, -1.2524e-01, + -6.3660e-02, 5.2734e-02, 1.7639e-01, -3.7445e-02, 6.8848e-02, + -6.0730e-03, -3.7994e-02, -3.1982e-02, 2.5940e-02, -2.2720e-02, + -3.3752e-02, 1.8774e-01, -5.4382e-02, 7.8735e-03, -9.3018e-02, + -3.5034e-02, -3.8853e-03, -5.6458e-03, 6.9031e-02, 5.0262e-02, + 6.5491e-02, 7.4219e-02, -1.1917e-02, -1.5190e-02, 4.6577e-03, + 9.6130e-02, -3.8879e-02, 8.1711e-03, 4.7791e-02, 5.7800e-02, + -3.0182e-02, -1.0788e-02, -9.2850e-03, -4.7577e-02, -5.6671e-02, + 1.3741e-02, -3.2410e-02, 1.1253e-02, -1.6083e-02, 2.0233e-02, + 1.4366e-02, 3.6224e-02, 2.8656e-02, 3.3264e-02, 6.3416e-02, + -6.0852e-02, 1.0582e-02, 2.5726e-02, -1.0492e-01, 3.5591e-03, + -6.4819e-02, 3.4058e-02, -7.6111e-02, -3.0792e-02, 1.1810e-02, + -2.9953e-02, 9.2850e-03, 7.1411e-02, -2.3735e-04, 2.5009e-02, + -1.6235e-02, 5.5054e-02, 1.2103e-01, -2.7204e-04, 8.8882e-03, + 3.9551e-02, -8.4656e-02, 3.6163e-02, -1.1993e-01, -2.1896e-02, + 2.0767e-02, -2.4246e-02, -2.0798e-02, -4.0161e-02, 1.0602e-01, + 3.7201e-02, 9.6817e-03, 1.1200e-01, -2.5299e-02, 8.6517e-03, + 9.4910e-03, -1.3940e-01, -2.2217e-02, 5.7037e-02, 7.7576e-02, + 3.6865e-02, -1.5251e-02, -5.6992e-03, 6.6345e-02, -7.4829e-02, + 1.1581e-02, -9.0820e-02, -4.9286e-02, -8.9645e-03, -2.6321e-02, + -1.0490e-02, -2.8820e-03, -1.4297e-02])Parameter containing: +tensor([2.2640, 2.1569, 2.2719, 2.2420, 1.7296, 2.2268, 2.2222, 2.1929, 2.2933, + 2.3042, 2.2599, 2.2068, 2.2888, 2.1418, 2.3097, 2.1628, 2.3067, 2.1962, + 2.2541, 1.8798, 2.3330, 2.1527, 2.4260, 2.2432, 2.2344, 2.2169, 2.2899, + 2.2657, 2.2956, 2.2150, 2.2134, 2.3052, 2.2338, 2.2947, 2.1073, 2.1633, + 2.3743, 2.2180, 2.1662, 2.2571, 2.0809, 2.2990, 2.1551, 2.1888, 2.1720, + 2.2131, 2.4677, 2.1487, 2.2043, 2.1072, 2.1941, 2.3012, 2.2159, 2.2000, + 2.5026, 2.2741, 2.3018, 2.1355, 2.2140, 2.2018, 2.1730, 2.2111, 2.3010, + 2.1690, 2.2760, 2.1768, 2.2708, 2.1877, 2.2992, 1.9298, 2.3732, 2.2795, + 1.9456, 2.3256, 2.3227, 2.3304, 2.2873, 2.2034, 2.2036, 2.2123, 2.2451, + 2.2720, 2.3650, 2.1825, 2.3101, 2.1620, 2.3239, 2.3294, 2.3094, 2.1591, + 2.1981, 2.0829, 2.3779, 2.1536, 2.2076, 2.1523, 2.2948, 2.3787, 2.2889, + 2.3791, 2.1895, 2.1518, 2.3576, 2.3959, 2.2077, 1.9487, 2.2961, 2.2239, + 2.2803, 2.1499, 2.0425, 2.3617, 2.4161, 2.2494, 2.2602, 2.1698, 2.2544, + 2.4007, 2.3820, 2.2632, 2.3637, 2.1851, 2.2005, 2.2400, 2.1663, 2.1420, + 2.3441, 2.3564, 2.3680, 2.2313, 2.1356, 2.3518, 2.3749, 2.2281, 2.4065, + 2.1281, 2.1338, 2.1551, 2.1071, 2.7126, 2.1604, 2.1262, 2.4673, 2.1946, + 2.1879, 2.1070, 2.4353, 2.2552, 2.3758, 2.2670, 2.1755, 2.1645, 2.3566, + 2.3182, 2.3796, 1.7392, 2.1921, 2.2126, 2.1753, 2.2293, 2.1131, 2.2069, + 2.2205, 2.3078, 2.3424, 2.1081, 2.0709, 2.2687, 2.2176, 2.2434, 2.2262, + 2.1535, 1.9846, 2.1755, 2.0177, 2.2333, 2.2221, 2.2830, 2.2767, 2.1881, + 2.6348, 2.3794, 2.3480, 2.3592, 2.1792, 1.9351, 2.1316, 1.2753, 2.1821, + 2.1568, 2.3376, 2.4373, 2.2861, 2.2278, 2.4345, 2.3894, 2.2850, 2.2173, + 0.8030, 2.2375, 2.4590, 2.1584, 2.2757, 2.1539, 2.2788, 2.3235, 2.2182, + 2.2825, 2.2370, 2.2002, 2.2239, 2.1483, 2.3083, 1.9194, 2.2911, 2.1758, + 2.1693, 2.3248, 2.2062, 2.1135, 2.5439, 2.3680, 1.7868, 2.2818, 2.2139, + 2.3934, 2.2358, 2.2729, 2.1581, 2.1231, 2.2160, 2.2524, 2.2938, 2.3082, + 2.1456, 2.2228, 2.4099, 2.2020, 2.1645, 2.1912, 2.2698, 2.2760, 2.2801, + 2.3099, 2.2425, 2.1879, 2.2222, 2.3711, 2.1873, 2.3739, 2.2790, 2.2676, + 2.3049, 2.2355, 2.3792, 2.3141, 2.3377, 2.4090, 2.3914, 2.3383, 2.3445, + 2.3042, 2.4982, 2.3165, 2.2953, 2.1704, 2.2329, 2.4004, 2.1656, 2.3381, + 2.2305, 2.2528, 2.2741, 2.3754, 2.1884, 2.4750, 2.2019, 2.2011, 2.3016, + 2.2869, 2.2985, 1.9621, 2.1770, 2.2807, 2.2525, 2.1672, 2.1855, 2.4377, + 2.2557, 2.1184, 2.3795, 2.3838, 2.1112, 2.2527, 2.1712, 2.3678, 2.2962, + 2.3979, 2.1778, 2.2367, 2.2000, 2.4401, 2.1548, 2.3850, 2.4349, 2.1581, + 2.3237, 2.2696, 2.2616, 2.2724, 2.2583, 2.1867, 2.5341, 2.2949, 2.3043, + 2.1654, 2.2662, 2.4074, 2.2167, 2.3727, 2.3872, 2.1168, 2.2891, 2.2260, + 0.4755, 2.3298, 2.1823, 2.2057, 2.4162, 2.2086, 2.1705, 2.2330, 2.3128, + 2.2970, 2.2148, 2.1848, 2.2950, 2.2475, 2.2988, 2.1964, 2.3900, 2.2402, + 2.2406, 2.2631, 2.3282, 2.1986, 2.2774, 2.1227, 2.3814, 2.2799, 1.9796, + 2.3173, 2.2065, 2.0763, 2.2129, 2.1847, 2.2115, 2.3988, 2.3331, 2.2648, + 2.2173, 2.2222, 2.4255, 2.3040, 2.2819, 2.2370, 2.2084, 2.4024, 2.2437, + 2.3018, 2.4716, 2.2703, 2.2421, 2.2014, 2.2222, 2.3140, 2.1600, 2.2164, + 2.2799, 2.3304, 2.1385, 2.1579, 2.1541, 2.2016, 2.1684, 2.2469, 2.2755, + 2.2213, 2.3451, 2.1953, 2.2886, 2.3196, 2.3890, 2.2848, 2.1909, 2.1920, + 2.2832, 2.2331, 2.3384, 2.3486, 2.2844, 2.1510, 2.2882, 2.3951, 2.1480, + 2.1827, 2.2679, 2.1737, 2.2718, 2.3449, 2.2749, 2.2403, 2.2581, 2.4260, + 1.7728, 2.9975, 2.3103, 2.0867, 2.1864, 2.3866, 2.2906, 2.3580, 2.2100, + 2.2319, 2.4259, 2.2954, 1.9379, 2.1505, 2.2671, 2.1461, 2.1807, 2.2680, + 2.3275, 2.2361, 2.3598, 2.1926, 2.3816, 2.2544, 2.1655, 2.3670, 2.3348, + 2.1993, 2.2997, 1.5097, 2.1035, 2.1724, 2.2067, 2.3189, 2.2590, 2.1026, + 2.2328, 2.2674, 2.2554, 2.2146, 2.1348, 2.5011, 2.3310, 2.1544, 2.2187, + 2.1928, 2.2301, 2.3006, 2.2465, 2.1373, 2.2418, 2.2347, 2.2468, 2.2022, + 2.1193, 2.4142, 2.2624, 2.1409, 2.1893, 2.2579, 2.3964, 2.2402, 2.3457, + 2.3320, 2.2716, 2.0796, 2.2567, 2.1346, 2.2694, 1.6067, 2.1672, 2.3004, + 2.3265, 2.2902, 2.1561, 2.2396, 2.1815, 2.2325, 2.2429, 2.2065, 2.2034, + 2.1567, 2.4732, 2.3073, 2.2450, 2.4269, 2.2559, 2.3715, 2.2559, 2.5272, + 2.4351, 2.1391, 2.2326, 2.2180, 2.1983, 2.2195, 2.2410, 2.2155, 2.1019, + 2.0940, 2.2429, 2.2578, 2.2910, 2.3509, 2.1719, 2.4017, 2.3275, 2.2035, + 2.2029, 2.2942, 2.1925, 2.3053, 2.2525, 2.2779, 2.2201, 2.1996, 2.2416, + 2.3947, 2.3721, 2.2754, 2.3328, 2.2189, 2.3045, 2.2970, 2.4679, 2.2055, + 2.1974, 2.2517, 2.4998, 2.4094, 2.2671, 2.0876, 2.2103, 2.2355, 2.2657, + 2.2187, 2.3696, 2.2213, 2.2929, 2.4940, 2.1678, 2.2122, 2.3943, 2.2058, + 2.3842, 2.1598, 2.3303, 1.8785, 2.3260, 2.2574, 2.2955, 2.1308, 2.2304, + 2.2647, 2.1533, 2.1775, 2.3234, 2.2043, 2.3099, 2.2060, 2.2395, 2.3715, + 2.2542, 2.3158, 2.2958, 2.1105, 1.9845, 2.1140, 2.2661, 2.2051, 2.2324, + 2.3517, 2.1900, 2.2237, 2.2118, 2.1717, 2.1753, 2.2713, 2.2211, 2.2775, + 2.3432, 2.0401, 2.2418, 2.4036, 2.2838, 2.2299, 2.2778, 2.0997, 2.2535, + 2.3705, 2.3978, 2.4439, 2.3486, 2.1743, 2.3667, 2.1913, 2.3294, 2.1519, + 2.3608, 2.2327, 2.2330, 2.2944, 2.1614, 2.2846, 2.2820, 2.1228, 2.2492, + 2.0873, 2.2352, 2.2103, 2.3395, 2.3103, 2.1853, 2.2346, 2.1970, 2.1938, + 2.2931, 2.2062, 2.3980, 2.2042, 2.1698, 2.1685, 2.2466, 2.2704, 2.2514, + 2.3169, 2.1725, 2.3112, 2.2201, 2.2424, 1.8915, 2.1430, 2.1394, 2.3150, + 2.3607, 2.2556, 2.4942, 2.2349, 2.1679, 2.3034, 2.2429, 2.1978, 2.3658, + 2.2612, 2.2297, 2.0844, 1.7705, 2.1903, 2.3076, 2.1763, 2.2701, 2.3301, + 2.0571, 2.2049, 2.3389, 2.2586, 2.2229, 2.2573, 2.2908, 1.9869, 2.2785, + 2.1082, 2.3457, 2.1417, 2.2914, 2.3343, 2.2597, 1.9121, 2.1643, 2.2582, + 2.1474, 2.2186, 2.1766, 2.1799, 2.2972, 2.2724, 2.2806, 2.2794, 2.1807, + 2.4325, 2.1112, 2.3376, 2.2759, 2.2201, 2.4643, 2.2095, 2.2176, 2.1463, + 2.2402, 2.2014, 2.3456, 2.3663, 2.3184, 2.2078, 2.0484, 2.3456, 2.2747, + 2.1629, 2.3276, 2.2766, 2.1601, 2.2534, 2.3233, 2.1668, 2.1531, 2.3054, + 2.1789, 2.3498, 2.3915, 2.2312, 2.2355, 2.1540, 1.6734, 2.1592, 2.2040, + 2.1684, 2.4621, 2.2115, 2.1323, 2.2404, 2.4256, 2.3941, 2.2906, 2.1213, + 2.5773, 2.4617, 2.3884, 2.2571, 2.0984, 2.3819, 2.3312, 2.1406, 2.3404, + 2.3413, 2.4340, 2.2037, 2.2027, 2.3634, 2.3197, 2.1520, 2.2111, 2.4423, + 2.2281, 2.2688, 2.3624, 2.2121, 2.1558, 2.1795, 2.2498, 2.2931, 2.1872, + 2.2805, 2.3515, 2.3612])Parameter containing: +tensor([ 0.1621, 0.1320, -0.8621, -0.0336, 0.1502, 0.0541, 0.1869, 0.0480, + 0.1135, -0.0172, -0.3924, 0.3050, -0.6887, -0.2888, 0.5417, -0.1436, + 0.1237, 0.4877, -0.4812, -1.1091, 0.4803, 0.1873, 0.8007, -0.5644, + 0.2164, -0.0993, 0.2257, 0.2541, -0.7131, 0.6955, -0.0308, 0.6967, + 0.2246, -0.5032, 0.4245, -0.0466, -0.5230, 0.4978, 0.7140, 0.5024, + -0.0975, -0.2074, 0.4438, 0.2407, -0.3374, -0.2500, -0.8558, -0.0481, + 0.5578, 0.1013, -0.4001, -0.1277, 0.5967, 0.3349, -1.0492, 1.1932, + 0.2491, 0.0806, 0.1028, 0.0558, -0.1210, -0.1634, 0.3394, -0.5425, + -0.0082, 0.3193, 0.4550, -0.3117, -0.5130, -1.1894, 0.5283, 0.5521, + 0.3825, -0.5808, -0.9071, -0.6364, 0.1480, -0.0503, 0.2118, 0.5899, + 0.4158, 0.3171, -0.5889, 0.6909, -0.2356, -0.1724, 0.3149, -0.5220, + 0.5159, -0.0380, -0.1137, 0.1872, -0.4011, -0.4489, -0.2862, -0.1378, + 0.5677, -0.4666, -0.7463, 0.9317, 0.5492, -0.4332, -0.0704, 0.5361, + 0.4139, 0.3445, 0.1253, 0.4641, -0.0434, -0.0163, 0.3818, 0.8250, + -0.5626, 0.4493, 0.2315, 0.0676, 0.5582, 0.5198, 0.7981, 0.3034, + -0.4785, -0.3753, -0.4814, 0.0080, 0.1538, -0.3050, 0.3001, 0.4994, + -0.5714, 0.1687, 0.0673, -0.5381, 0.2875, -0.2586, 0.6101, 0.2481, + 0.4871, 0.1383, -0.0470, -0.9018, 0.1253, 0.4531, -0.7880, -0.1147, + -0.0429, 0.1276, 0.6707, -0.1825, -0.5070, 0.4332, 0.1504, -0.1559, + 0.5703, -0.4738, -0.9653, 0.6716, -0.2044, -0.2327, -0.0894, -0.0192, + -0.3091, 0.1484, 0.3899, -0.4676, 0.4944, -0.0957, -0.5098, 0.1894, + -0.3161, -0.1162, 0.3520, 0.1080, -0.1028, 0.4153, 0.1881, 0.3348, + 0.0837, -0.7355, -0.3223, 0.0820, -1.1374, -0.6000, 0.5029, 0.3999, + 0.1938, 0.2657, -0.2888, 1.9289, 0.1540, -0.0699, -0.7190, -0.6000, + -0.3247, 0.3231, 0.6391, 0.7734, 0.2144, 0.3356, -0.3159, 0.1742, + -0.6171, 0.0906, 0.4558, 0.3694, -0.3378, -0.1699, 0.4212, 0.3525, + -0.6260, 0.3566, -0.0672, -0.4158, -0.4320, 0.3351, 0.5299, -0.0924, + -0.5760, 0.0608, -0.0278, 0.1324, 0.9353, 0.3500, -0.6054, 0.2021, + 0.2814, -0.3948, -0.2922, 0.0252, 0.0906, 0.2763, 0.1940, 0.2645, + 0.6804, 0.5362, -0.3680, -0.6316, 0.4309, 0.4631, 0.0191, 0.4439, + 0.9340, 0.3927, 0.4112, -0.6026, -0.1669, 0.2130, 0.6028, 0.1757, + -0.1022, 0.7878, 0.2552, 0.5194, 0.4319, 0.0206, 0.6868, -0.3183, + 0.5887, -0.6037, -0.6882, -0.1226, -0.4427, 0.1058, 0.4440, 0.3308, + -0.3919, -0.2030, 0.1480, 0.9253, 0.3558, -0.4835, 0.4862, 0.3480, + -0.1619, -0.7845, 0.1806, -0.3933, -0.3324, 0.3735, -0.5081, 0.2008, + -0.2949, -0.9179, -0.3203, 0.4054, -0.5126, 0.1843, -0.3469, 0.7029, + 0.5811, -0.1982, -0.4353, 0.5465, 0.5292, 0.0970, -0.3432, -0.2300, + -0.4013, -0.5306, 0.0403, -0.2399, -0.5037, 0.7009, -0.0160, -0.4212, + 0.2411, -0.1549, 0.1045, -0.0450, -0.3573, -0.1059, 0.6022, -0.0220, + 0.8710, -0.1001, -0.2127, -0.0137, 0.4078, 0.4049, -0.0803, 0.3979, + -0.6313, -0.2661, 0.2763, -0.2651, -0.1129, -0.6101, 0.2320, -0.3461, + 0.6760, -0.1488, 0.2616, -0.5456, -0.2613, -0.0147, -0.0846, -0.3216, + -0.6864, 0.2660, 0.5609, 0.3031, 0.3006, -0.0308, -0.4863, 0.0349, + -0.7812, 0.3434, -0.3803, 0.0266, -0.2601, -0.0796, 0.6440, 0.2608, + -0.1114, 0.3551, -0.3097, -0.2056, -0.1391, -0.4942, -0.4338, 0.3045, + -0.4930, -0.5881, -0.4112, -0.4181, 0.4903, 0.4216, -0.2322, 0.6487, + 0.4649, 0.5861, -0.5109, -0.1017, 0.1379, -0.4404, 0.2101, -0.3105, + 0.2269, 0.0366, -0.2296, -0.3566, 0.2081, -0.1343, 0.1240, -0.0179, + -0.2213, -0.0625, -0.1611, 0.5202, 0.6848, 0.0801, 0.3838, -0.2576, + 0.4027, 0.5769, -0.8503, -0.0359, -0.1019, 0.8661, -0.4741, -0.7002, + -0.0760, 0.1911, -0.1189, -0.5374, 0.5348, -0.1533, 0.1744, -0.1486, + -0.3563, 0.3826, 0.6247, 0.0360, 0.5564, 0.2615, 1.2232, -2.3384, + 0.5863, -0.2913, 0.5541, 0.3806, 0.0317, -0.6352, 0.5425, -0.2135, + -0.5930, 0.6499, 0.3602, 0.0085, -0.0622, 0.1431, -0.2406, -0.5485, + -0.3074, 0.2902, 0.5639, 0.0688, -0.5124, 0.3740, -0.1591, -0.7494, + 0.2714, 0.2281, 0.1605, -1.2912, 0.1387, 0.2413, 0.3343, 0.5193, + 0.6109, 0.1938, 0.1689, 0.2520, -0.2678, 0.0738, -0.1442, -0.8710, + -0.4280, -0.3669, -0.2250, -0.0465, -0.4375, 0.3030, -0.5160, 0.0954, + -0.5934, -0.0733, 0.0735, 0.4256, 0.1004, -0.5574, 0.4439, 0.3795, + 0.0716, -0.0068, -0.7792, -0.2683, -0.2675, -0.3899, -0.2300, 0.1020, + -0.2996, 0.0126, 0.4978, -0.3174, -0.0587, -0.4951, 0.0076, 0.6863, + 0.2059, -0.4359, 0.0722, 0.3730, -0.5903, 0.1088, -0.3538, -0.2944, + -0.6744, -0.3510, 0.5116, 1.0242, -0.2177, -0.9765, 0.1696, 0.5113, + -0.0504, -0.0675, 0.3385, 0.4160, 0.2705, -0.2345, 0.0065, 0.1965, + -0.3579, -0.1229, 0.6358, 0.0630, 0.5625, -0.3390, -0.2423, 0.4026, + 1.0521, 0.1297, -0.2261, 0.3796, -0.2018, -0.4482, 0.6710, 0.2464, + 0.1820, 0.0604, -0.0261, -0.7356, 0.1610, 0.3147, 0.7031, -0.7128, + 0.4793, -0.6179, 0.4299, -0.1426, 0.1959, 0.0474, -0.7578, 0.3871, + 0.4518, -0.0792, -0.0705, -0.7027, -0.2180, -0.1972, -0.4494, -0.4577, + 0.5420, 0.6782, -0.0076, -0.0709, 0.4196, 0.2184, -0.5684, -0.3109, + 0.4548, -0.4821, -0.5241, 0.6643, -0.4297, 0.2593, -0.0437, 0.0028, + 0.1823, -0.0203, -0.2064, 0.2194, 0.4955, 0.0041, 0.5605, 0.6234, + -0.1054, -0.4684, -0.2235, 0.1850, 0.2052, 0.2630, 0.3708, -0.0098, + 0.1920, -0.1915, -0.2142, -0.1864, 0.5316, 0.2705, -0.0170, -0.1591, + 0.2824, 0.0184, -0.2010, 0.2990, 0.0938, 0.3728, 0.6740, 0.4613, + 0.3229, -0.0187, 0.2902, -1.1038, -0.1624, 0.6004, -0.5546, -1.5263, + -0.2350, 0.0903, -0.4732, 0.4322, -0.5486, 0.5330, 0.2533, 0.2336, + 0.1706, -0.3836, 0.1548, -0.0594, 0.1809, -0.0227, -0.2615, 0.2121, + 0.1297, 0.3730, -0.2365, 0.3757, 0.4399, -0.1251, -0.5659, 0.2555, + -0.7871, -0.4932, 0.2153, 0.0382, -0.0994, -0.0076, 0.2226, -0.5119, + 0.6038, -0.6071, -0.9637, 0.1321, -0.7474, -0.0215, -0.0357, 0.1232, + -0.7136, 0.3996, 0.9405, -0.0939, 0.4872, -0.3826, 0.5215, 0.5895, + 0.4666, 0.1541, -0.5932, 0.0385, 0.0506, 0.1357, 0.2811, 0.4319, + -0.0849, -0.3161, 0.0826, -0.3186, 0.2883, -0.1638, 0.5536, 0.2820, + 0.0268, 0.2919, 0.7081, -0.2793, 0.7435, -0.0363, -0.1690, -0.1719, + -0.5410, 1.0448, -0.5631, 0.1419, 0.5340, 0.5717, 0.1740, -0.3740, + 0.2532, -0.4504, -0.6378, -0.2206, -0.4109, -0.6917, -0.2142, 0.9643, + -0.3597, -0.1640, -0.7284, 0.1631, 0.1779, 0.1018, 0.4215, -0.4838, + 0.4528, 0.7088, -0.3826, -0.3808, -0.1254, -0.5218, 0.5777, -0.1802, + -0.4346, -0.4577, 0.0334, -0.2096, 0.1364, 0.3060, 0.0198, -0.8002, + 0.0443, -0.5895, -0.6670, -0.4631, -0.0998, 0.1485, 0.4129, -0.1068, + -0.0434, -0.2676, -0.7246, -0.2339, 0.2438, 0.2161, 0.5981, 0.5052, + -0.1812, -0.0142, -0.5303, -0.8499, -0.7728, 0.1444, -0.3332, -0.2925, + 0.3377, 0.4867, -0.3891, 0.4536, 0.9387, 0.4918, -0.0686, -0.3014, + -0.5999, 0.2228, -0.4177, 0.7356, -0.5696, -0.5074, 0.6764, 0.6888, + -0.2936, 0.0707, -0.0800, 0.5583, 0.2862, 0.4679, 0.6217, 0.6404])Parameter containing: +tensor([[-0.0240, 0.0029, -0.0312, ..., 0.0232, 0.0232, -0.0007], + [-0.0009, 0.0125, -0.0428, ..., 0.0167, -0.0114, 0.0172], + [-0.0243, 0.0004, -0.0028, ..., -0.0064, 0.0121, 0.0166], + ..., + [ 0.0119, 0.0008, -0.0014, ..., -0.0109, 0.0003, -0.0192], + [-0.0027, -0.0135, 0.0034, ..., 0.0144, 0.0325, -0.0189], + [ 0.0063, 0.0089, -0.0012, ..., 0.0233, -0.0183, -0.0119]])Parameter containing: +tensor([-0.0339, 0.5952, -0.3469, ..., 0.0100, -0.0171, 0.0073])Parameter containing: +tensor([[ 1.6983e-02, 1.5869e-02, 2.5711e-02, ..., 1.8282e-03, + -1.1787e-02, -2.0477e-02], + [-4.1723e-06, 4.6234e-03, -1.7273e-02, ..., -1.3374e-02, + -4.8447e-03, 3.5763e-03], + [-9.9716e-03, -4.3945e-03, 2.9068e-03, ..., 1.9684e-02, + -2.8351e-02, -2.8290e-02], + ..., + [-2.1042e-02, -2.2217e-02, 2.5452e-02, ..., -4.4417e-04, + 1.7960e-02, -1.9300e-04], + [ 6.3667e-03, -1.1711e-02, -3.2842e-05, ..., -1.2466e-02, + 2.6031e-02, 2.3913e-04], + [ 1.1337e-02, 1.0567e-02, 2.4395e-03, ..., -1.5053e-02, + -5.5428e-03, -7.4120e-03]])Parameter containing: +tensor([-3.2745e-02, -3.7262e-02, 7.0618e-02, -4.9408e-02, -8.5632e-02, + -1.1737e-01, 2.8248e-03, 3.5278e-02, 2.7481e-02, -7.3914e-02, + 6.2103e-03, -1.5160e-02, 6.6101e-02, 2.4586e-03, 3.9703e-02, + 1.2024e-02, 8.2474e-03, -1.0178e-02, 2.8503e-02, -8.3923e-02, + -3.1860e-02, 5.1636e-02, 2.6596e-02, 1.4214e-02, 5.3749e-03, + 3.8635e-02, 9.4910e-03, 5.4169e-03, 2.8625e-02, 5.6244e-02, + 9.2773e-02, -3.4973e-02, -5.4443e-02, -1.7410e-02, -8.0322e-02, + -2.9312e-02, -4.7180e-02, 3.3905e-02, 8.2397e-02, -5.3833e-02, + 5.6610e-02, -3.3188e-03, 9.7198e-03, -6.4163e-03, -3.9764e-02, + -7.4097e-02, -3.4454e-02, -2.2781e-02, -6.9641e-02, -5.4474e-02, + -4.5288e-02, -8.2581e-02, -1.0016e-01, -8.6182e-02, -6.2744e-02, + 6.8542e-02, 7.1411e-02, -7.6782e-02, 6.6101e-02, 3.5004e-02, + 2.3087e-02, 1.2428e-02, 3.9154e-02, -2.8885e-02, 2.3026e-02, + 3.0975e-02, -6.9946e-02, -2.4078e-02, -7.0435e-02, -1.0736e-01, + 7.1960e-02, 3.1464e-02, -1.7651e-01, -5.1758e-02, -1.1955e-02, + -2.8366e-02, 1.2091e-01, -8.0032e-03, 2.8351e-02, 7.6599e-02, + -7.8278e-03, -2.9221e-02, -3.5126e-02, -6.3965e-02, -1.0217e-01, + -1.1749e-02, 2.8610e-02, -3.5492e-02, 1.8173e-02, -1.5427e-02, + -2.2491e-02, -7.9895e-02, -6.0577e-03, 1.0437e-01, 7.1030e-03, + -4.1779e-02, 2.0691e-02, -2.1744e-02, -4.5074e-02, 8.3557e-02, + -3.9795e-02, -6.3354e-02, -9.0454e-02, 1.1726e-02, -1.9026e-03, + 6.2561e-02, 5.7648e-02, -1.2598e-03, -3.3142e-02, 1.1192e-02, + -5.4993e-02, -6.8207e-03, -7.0251e-02, -3.7537e-03, -2.0264e-02, + -1.6495e-02, 5.8929e-02, 5.2734e-02, 2.5604e-02, 4.9591e-02, + 4.0833e-02, 7.2021e-02, 8.3618e-02, 5.5908e-02, 9.9365e-02, + -6.7810e-02, -4.8859e-02, 6.0913e-02, 4.6936e-02, 2.3026e-02, + 2.4643e-02, -2.7294e-03, -7.0000e-03, 2.8259e-02, -2.8000e-02, + 3.6743e-02, 3.1036e-02, -5.6335e-02, 1.1298e-01, -2.6901e-02, + 4.3297e-03, 5.1514e-02, 2.9175e-02, -3.7628e-02, -2.9716e-03, + -5.6122e-02, 4.5654e-02, -4.4189e-02, -7.1960e-02, -4.3121e-02, + -1.0902e-02, 1.4626e-02, 2.4857e-02, -5.3833e-02, -3.4943e-02, + 1.2573e-01, 4.2114e-02, -8.2397e-02, -2.7939e-02, -9.5062e-03, + 4.9072e-02, -3.5828e-02, -1.2123e-02, -4.6295e-02, 6.8604e-02, + -5.3902e-03, 6.6895e-02, 3.5706e-02, 5.4810e-02, -8.3984e-02, + 2.7725e-02, -9.0881e-02, -2.1469e-02, 1.2909e-02, 1.5402e-03, + -1.3535e-02, 1.6994e-03, 5.4443e-02, 5.9296e-02, -3.1952e-02, + -8.9783e-02, 1.1833e-02, 3.9948e-02, 8.2764e-02, 1.7273e-02, + 1.1322e-01, -1.2062e-02, -1.9678e-01, -8.5068e-03, 1.9958e-02, + -4.4250e-02, -9.0637e-03, 4.3640e-02, 1.1816e-01, 9.2712e-02, + -3.2440e-02, 1.2960e-03, -3.6407e-02, -1.6748e-01, 2.6276e-02, + 2.5833e-02, 1.8478e-02, 2.1027e-02, -5.5878e-02, -2.7237e-02, + -1.8600e-02, -7.0343e-03, 5.3101e-02, 7.2289e-03, -1.1633e-01, + 1.1024e-02, -1.3672e-02, -8.4778e-02, 4.2999e-02, 1.5900e-02, + -2.4460e-02, 2.7752e-03, -9.0820e-02, -5.1941e-02, 4.6051e-02, + 4.0619e-02, 2.6520e-02, 3.6804e-02, 5.4504e-02, 1.7334e-02, + 5.2551e-02, 3.5400e-02, 4.0680e-02, 5.0629e-02, 3.8849e-02, + -1.0114e-01, -1.7532e-02, -1.4519e-02, -4.5967e-03, -5.9601e-02, + 2.7451e-02, 6.6711e-02, -8.5388e-02, 5.1270e-02, 4.3106e-03, + -2.9434e-02, -2.7191e-02, -6.9580e-02, 2.4521e-02, -1.5854e-02, + -3.3295e-02, -5.2155e-02, 7.4463e-02, -8.1024e-03, 4.5990e-02, + -3.9856e-02, 7.2327e-02, -1.3824e-02, -3.5767e-02, -5.3375e-02, + -6.3354e-02, 5.9143e-02, -2.9785e-02, -5.1819e-02, -1.4191e-03, + -4.8309e-02, -2.7359e-02, 1.4809e-02, -4.0321e-03, 2.6428e-02, + 6.0196e-03, 1.8768e-02, -3.5309e-02, 5.8807e-02, -3.5065e-02, + -5.7404e-02, 1.9409e-02, -2.7237e-02, 2.7374e-02, 3.3627e-03, + -7.4646e-02, -2.2507e-02, -2.6672e-02, -2.2705e-02, 3.7537e-02, + -1.1487e-01, -8.6731e-02, 6.1951e-02, -1.8433e-02, -4.1595e-02, + -6.4888e-03, -2.1606e-02, -1.8829e-02, 1.8097e-02, -1.3634e-02, + -2.5406e-02, 4.0680e-02, -4.6448e-02, -6.1035e-02, 4.1901e-02, + -1.5297e-02, -2.0386e-02, -3.8208e-02, -3.4698e-02, -2.6306e-02, + 2.8915e-02, 2.8961e-02, -1.9180e-02, -2.0874e-02, 1.2062e-02, + 9.1248e-02, -7.3792e-02, 8.2581e-02, 2.2095e-02, 2.4109e-02, + 8.5297e-03, -3.9917e-02, 1.9516e-02, 5.9479e-02, 1.6575e-03, + -6.4392e-02, 5.5939e-02, 4.1351e-02, -4.5013e-02, 3.5065e-02, + -5.5939e-02, -3.6621e-02, 7.2388e-02, -1.1955e-02, 5.0537e-01, + -4.3221e-03, -5.9845e-02, 9.2468e-02, -2.0552e-04, 5.4535e-02, + 7.6447e-03, 1.6312e-02, -1.0315e-01, 1.3399e-03, 1.4111e-01, + -5.0690e-02, -7.7637e-02, 4.9408e-02, 1.8692e-02, 5.7106e-03, + 1.2610e-01, -1.7075e-02, -4.6783e-02, 8.6426e-02, -1.9379e-02, + 6.0516e-02, -6.6467e-02, 6.7383e-02, -2.3712e-02, 3.2959e-03, + 3.4363e-02, -2.7908e-02, -1.6464e-02, -4.3793e-02, -3.1555e-02, + 1.0529e-01, -2.0172e-02, -3.9612e-02, -1.1551e-02, 5.5664e-02, + -3.3905e-02, -1.6510e-02, 3.2928e-02, -4.0649e-02, 7.1716e-02, + -2.5253e-02, 3.1281e-02, -1.1154e-02, 1.2230e-02, -7.7515e-03, + -5.7953e-02, -9.7036e-04, 4.2023e-02, 1.6327e-02, 1.5778e-02, + 4.0436e-03, -2.2984e-03, 8.8959e-03, -7.6172e-02, -5.8380e-02, + -4.4250e-04, -6.6772e-02, -1.6479e-02, -1.1642e-02, 5.5298e-02, + -3.2532e-02, 6.6833e-03, -1.2039e-02, -2.9358e-02, 1.2466e-02, + 2.9068e-02, -4.7211e-02, 1.2427e-01, 2.3285e-02, 2.2766e-02, + -1.3580e-02, 4.3304e-02, -7.6965e-02, -2.4384e-02, 7.5874e-03, + -4.8157e-02, 6.9702e-02, -2.3193e-02, -7.1655e-02, 6.7078e-02, + -8.1253e-03, 2.9144e-02, 5.5771e-03, 1.5549e-02, 7.8735e-02, + -1.7288e-02, -1.9043e-02, 1.4229e-02, -2.3041e-02, 9.5337e-02, + -1.4673e-01, -2.5482e-02, 1.0269e-02, -3.0579e-02, 2.6459e-02, + 1.0797e-01, 1.5671e-02, 2.2675e-02, 5.3864e-02, -3.8116e-02, + 1.2083e-03, 4.2343e-04, 4.5586e-03, -4.0558e-02, 2.6489e-02, + -6.5002e-02, -1.5915e-02, -6.2683e-02, -4.7493e-03, 6.5430e-02, + -8.2779e-03, 3.7670e-04, 7.5928e-02, 5.5603e-02, 3.0899e-02, + 6.4636e-02, 4.4617e-02, -1.4450e-02, -3.3234e-02, 2.0126e-02, + 5.1842e-03, -2.2232e-02, 4.2847e-02, -5.9418e-02, 2.7069e-02, + 1.2884e-03, -3.8635e-02, -8.5678e-03, -3.1174e-02, -3.3691e-02, + 1.8463e-02, -3.8483e-02, -1.6953e-02, 1.5032e-04, 5.0415e-02, + -1.6312e-02, 2.1896e-03, 5.3482e-03, -2.6169e-03, 1.5961e-02, + 8.8989e-02, -8.9417e-02, 6.7322e-02, 4.4060e-03, -4.0680e-02, + 7.1655e-02, -7.9880e-03, 3.7415e-02, 1.4679e-02, 5.3253e-02, + -4.5166e-02, -6.3553e-03, -6.2622e-02, -1.0034e-01, -3.8055e-02, + 1.2070e-02, 5.5359e-02, 2.9800e-02, -4.4250e-02, 3.8483e-02, + 1.8158e-02, 2.4048e-02, -1.9745e-02, -4.2358e-02, 1.0504e-01, + -7.1289e-02, -4.8798e-02, 5.9113e-02, -1.9197e-03, 7.9346e-03, + -4.9042e-02, -1.8524e-02, -4.1199e-02, -4.6204e-02, -3.8452e-02, + 1.0345e-02, 9.5825e-02, -2.6993e-02, -1.4549e-02, 8.4534e-02, + -5.8889e-04, -9.0942e-02, -4.8757e-04, 1.6724e-02, -2.7725e-02, + -1.8555e-02, 5.5695e-02, -2.7573e-02, -5.7709e-02, -2.2324e-02, + 2.2247e-02, -6.0883e-02, -8.6365e-02, 1.7517e-02, 8.9539e-02, + 6.8359e-03, -4.1840e-02, -2.2354e-03, 5.0446e-02, -2.1027e-02, + -8.9966e-02, 3.4428e-03, 1.4275e-02, 6.3232e-02, 2.9099e-02, + 1.7456e-02, -3.3203e-02, 2.0004e-02, -3.6316e-03, -4.0131e-03, + -6.4331e-02, 1.0785e-01, 2.2873e-02, 3.7689e-02, 6.3843e-02, + 3.4332e-02, -5.2948e-02, -1.1115e-01, 6.5063e-02, 6.7810e-02, + 7.7576e-02, -1.8646e-02, 2.1606e-02, -4.2816e-02, 5.0446e-02, + 1.8005e-02, 3.1799e-02, 9.4849e-02, 1.8478e-02, -4.5135e-02, + 2.9434e-02, 3.4027e-02, -5.4230e-02, -1.2000e-01, -4.0192e-02, + -3.8055e-02, -1.1035e-01, -1.5144e-02, 6.3538e-02, -5.6824e-02, + -2.4490e-02, -2.3254e-02, -8.2642e-02, 7.3181e-02, -1.1963e-02, + -9.0881e-02, 7.8726e-04, -1.5526e-03, 1.6342e-02, 7.5806e-02, + 5.8746e-02, -2.7905e-03, -5.0781e-02, -3.6652e-02, 3.6438e-02, + 9.2957e-02, -1.8158e-02, -9.3323e-02, 4.7333e-02, 3.7048e-02, + -7.4463e-02, -3.2379e-02, -4.8004e-02, 4.8798e-02, -9.2041e-02, + 1.1208e-02, -1.8906e-02, 6.5491e-02, -1.1194e-01, 1.6312e-02, + 9.8228e-04, 4.9377e-02, 4.5258e-02, -9.7198e-03, -4.9973e-03, + 1.4687e-02, -1.0727e-02, -3.3112e-02, 1.3420e-02, -1.1926e-01, + -3.4210e-02, -2.6505e-02, -1.4172e-01, -4.8645e-02, 6.9214e-02, + -1.5572e-02, 2.7514e-04, -2.4918e-02, -8.4457e-03, -3.5706e-02, + 4.5898e-02, 1.9073e-02, 1.9043e-02, 2.5650e-02, 7.7454e-02, + 4.1199e-03, -2.7809e-03, -2.6291e-02, 1.1681e-02, 1.0777e-03, + 2.2415e-02, 7.7095e-03, 4.5471e-02, -3.2837e-02, 3.6530e-02, + 1.0117e-02, 7.2449e-02, -8.8272e-03, -6.4240e-03, -2.0645e-02, + -1.9745e-02, 1.2451e-01, -5.4504e-02, -2.4109e-02, -3.6652e-02, + -4.5776e-02, -1.9501e-02, -1.4977e-02, 3.8757e-02, -2.0264e-02, + 9.2468e-02, -3.8757e-02, -1.0120e-01, 4.2511e-02, 9.3384e-03, + -7.1228e-02, -6.3843e-02, -8.2764e-02, -1.5274e-02, 4.5837e-02, + 1.7807e-02, 1.1047e-02, -4.1313e-03, 3.1143e-02, -4.5074e-02, + 2.0096e-02, 2.8496e-03, 3.0777e-02, -5.9509e-02, -5.3680e-02, + -5.6610e-02, 6.8054e-02, -4.6730e-03, -4.0741e-02, -3.3478e-02, + -1.1200e-02, 1.2512e-02, 4.8859e-02, 1.0048e-02, 1.2789e-03, + 3.3203e-02, 8.2550e-03, 1.0597e-02, 6.1737e-02, -4.7577e-02, + 1.2741e-03, 3.1787e-01, -7.5928e-02, -9.5703e-02, 1.4748e-02, + 1.4664e-02, 8.2764e-02, 2.2106e-03, 1.1646e-01, 2.5482e-02, + -1.8402e-02, 3.6602e-03, 2.4826e-02, 9.9106e-03, -4.2023e-02, + -8.2092e-02, 3.7781e-02, 2.1667e-02, -3.5553e-02, 1.0400e-01, + 1.7120e-02, -2.5818e-02, -4.9164e-02, -1.6220e-02, -6.5269e-03, + 5.7251e-02, -9.5459e-02, 1.8341e-02, 2.6260e-02, 4.4739e-02, + 6.4941e-02, 3.4058e-02, -3.7292e-02, -3.1525e-02, 7.1838e-02, + 1.8677e-02, 9.6863e-02, -1.2825e-02, 2.3460e-03, 4.5654e-02, + -5.2612e-02, -2.2858e-02, 1.5305e-02, 2.5543e-02, -1.0818e-02, + 3.5782e-03, 7.5500e-02, 1.0168e-01, 2.2522e-02, -4.3976e-02, + -1.9531e-02, -1.2894e-02, -1.2756e-02, 1.8740e-03, -3.4237e-03, + 5.5573e-02, -5.2338e-02, 4.3152e-02, -1.2140e-01, -1.8387e-02, + 3.2928e-02, 4.8950e-02, 1.2146e-02, -5.9845e-02, 6.4087e-02, + -4.7821e-02, 7.9224e-02, 4.6906e-02, 9.7046e-02, 5.7953e-02, + 8.5510e-02, -1.3525e-01, 2.4994e-02, -7.3471e-03, 3.4424e-02, + -1.6235e-02, -2.4094e-02, -2.6352e-02, 6.3049e-02, 3.8872e-03, + 5.0720e-02, -1.5222e-01, 5.7251e-02, 2.0691e-02, 6.3416e-02, + 4.5654e-02, -1.1436e-02, -3.4546e-02])Parameter containing: +tensor([1.7929, 1.7831, 1.7803, 1.6813, 1.6177, 1.8186, 1.8280, 1.8169, 1.8193, + 1.7491, 1.7959, 1.8259, 1.7240, 1.6950, 1.6551, 1.7475, 1.8325, 1.7874, + 1.7081, 0.4558, 1.7815, 1.8641, 1.7709, 1.7374, 1.8430, 1.7791, 1.7354, + 1.7626, 1.7691, 1.6661, 1.7468, 1.7535, 1.7837, 1.7498, 1.7474, 1.7921, + 1.8657, 1.7096, 1.7576, 1.6830, 1.7558, 1.8993, 1.7414, 1.7797, 1.7533, + 1.6189, 1.7345, 1.7020, 1.7244, 1.6815, 1.7499, 1.8150, 1.7153, 1.6953, + 1.7383, 1.6688, 1.8391, 1.6598, 1.7349, 1.7585, 1.6782, 1.7734, 1.7821, + 1.7409, 1.7198, 1.6855, 1.7529, 1.7155, 1.7341, 1.5297, 1.7421, 1.8686, + 1.8653, 1.7259, 1.7289, 1.6731, 1.7387, 1.6307, 1.7143, 1.6711, 1.7531, + 1.8033, 1.7714, 1.7676, 1.7775, 1.6657, 1.7763, 1.7440, 1.7064, 1.7384, + 1.7850, 1.7902, 1.7361, 1.8213, 1.7260, 1.6903, 1.8085, 1.7339, 1.7769, + 1.7721, 1.7588, 1.6839, 1.8513, 1.7149, 1.7672, 1.7700, 1.6876, 1.7124, + 1.7675, 1.8268, 1.7704, 1.7144, 1.7660, 1.8349, 1.7693, 1.6901, 1.7396, + 1.8073, 1.6787, 1.7993, 1.7948, 1.7033, 1.7233, 1.7223, 1.7224, 1.7772, + 1.6722, 1.8057, 1.8742, 1.6941, 1.7141, 1.7528, 1.7275, 1.7983, 1.7217, + 1.8105, 1.6726, 1.7739, 1.7665, 1.7696, 1.7423, 1.7597, 1.7595, 1.6767, + 1.7325, 1.7476, 1.7849, 1.7194, 1.6787, 1.7302, 1.7817, 1.7152, 1.7512, + 1.6956, 1.7467, 1.3401, 1.8498, 1.7573, 1.7962, 1.7595, 1.6933, 1.6367, + 1.7679, 1.6964, 1.7361, 1.7991, 1.7227, 1.7609, 1.7854, 1.7482, 1.7924, + 1.8307, 1.6455, 1.6848, 1.8324, 1.8348, 1.7826, 1.8397, 1.7152, 1.6917, + 1.7201, 1.7656, 1.8076, 1.8197, 1.7645, 1.6691, 1.7135, 2.2904, 1.7352, + 1.7467, 1.7217, 1.7181, 1.8016, 1.8424, 1.6587, 1.7916, 1.7563, 1.7790, + 2.8969, 1.7273, 1.7394, 1.7848, 1.7283, 1.7294, 1.7608, 1.7266, 1.7323, + 1.7652, 1.7301, 1.7450, 1.8608, 1.7369, 1.7490, 1.5925, 1.7045, 1.7451, + 1.7193, 1.7442, 1.6924, 1.7473, 1.6818, 1.7552, 1.6258, 1.7437, 1.7192, + 1.7460, 1.7766, 1.8368, 1.7696, 1.8426, 1.8348, 1.7678, 1.8167, 1.7304, + 1.7137, 1.7233, 1.8062, 1.8429, 1.7146, 1.8293, 1.8135, 1.7622, 1.8080, + 1.8045, 1.7830, 1.7069, 1.7667, 1.7784, 1.6708, 1.7645, 1.7546, 1.8052, + 1.7619, 1.7424, 1.8167, 1.7871, 1.7916, 1.7700, 1.7150, 1.7424, 1.7701, + 1.7658, 1.6809, 1.7777, 1.7157, 1.7050, 1.8382, 1.7815, 1.7690, 1.8156, + 1.7530, 1.6965, 1.7266, 1.6922, 1.7875, 1.6888, 1.6801, 1.6591, 1.7296, + 1.8412, 1.7243, 1.6172, 1.7628, 1.7759, 1.7321, 1.7935, 1.7681, 1.7590, + 1.6764, 1.6220, 1.7573, 1.7823, 1.7837, 1.8083, 1.7544, 1.7745, 1.6749, + 1.7069, 1.7159, 1.8097, 1.6928, 1.7173, 1.7915, 1.7135, 1.8118, 1.7447, + 1.7273, 1.6825, 1.7379, 1.8264, 1.7297, 1.7173, 1.7472, 1.8040, 1.7840, + 1.7809, 1.6736, 1.7975, 1.7601, 1.7554, 1.7233, 1.6839, 1.7200, 1.7189, + 1.1019, 1.6431, 1.7165, 1.7557, 1.6809, 1.7875, 1.6864, 1.8092, 1.7360, + 1.7681, 1.7731, 1.7921, 1.6934, 1.7466, 1.6807, 1.7739, 1.7474, 1.7377, + 1.7673, 1.7556, 1.7118, 1.7117, 1.7537, 1.7710, 1.7321, 1.7860, 1.6302, + 1.7879, 1.8190, 1.6993, 1.7643, 1.8275, 1.7474, 1.7175, 1.7749, 1.7524, + 1.6974, 1.7262, 1.8118, 1.7030, 1.7440, 1.7438, 1.7523, 1.7821, 1.7594, + 1.6938, 1.7808, 1.7905, 1.7734, 1.7000, 1.7035, 1.7412, 1.7406, 1.7781, + 1.7695, 1.7509, 1.7576, 1.7752, 1.7044, 1.7850, 1.7253, 1.6955, 1.7934, + 1.7814, 1.7614, 1.7061, 1.8297, 1.7617, 1.7969, 1.6922, 1.6822, 1.7527, + 1.7551, 1.8767, 1.7740, 1.8746, 1.7875, 1.8527, 1.7601, 1.7039, 1.7222, + 1.7680, 1.6989, 1.7715, 1.7485, 1.7254, 1.6715, 1.7374, 1.6857, 1.6731, + 1.5084, 2.5756, 1.8057, 1.7862, 1.6845, 1.7544, 1.7355, 1.7684, 1.7092, + 1.8210, 1.7644, 1.7506, 1.7032, 1.8150, 1.7363, 1.7353, 1.8017, 1.7049, + 1.7996, 1.7475, 1.7090, 1.7468, 1.7934, 1.7622, 1.7136, 1.7332, 1.6697, + 1.8075, 1.7316, 1.5067, 1.7573, 1.7962, 1.7551, 1.7285, 1.7033, 1.7589, + 1.7889, 1.7553, 1.8125, 1.8115, 1.6954, 1.8001, 1.7614, 1.7621, 1.7395, + 1.7802, 1.7583, 1.8117, 1.6605, 1.7690, 1.6949, 1.7969, 1.7511, 1.7341, + 1.7781, 1.7734, 1.7195, 1.7107, 1.7229, 1.8224, 1.7728, 1.8050, 1.7612, + 1.7779, 1.8357, 1.6278, 1.7457, 1.6289, 1.8321, 1.4774, 1.7665, 1.6529, + 1.6894, 1.6457, 1.7476, 1.7246, 1.7836, 1.7473, 1.7166, 1.8502, 1.7336, + 1.7778, 1.7636, 1.7954, 1.6843, 1.6976, 1.7811, 1.7412, 1.7866, 1.8105, + 1.7974, 1.7285, 1.7500, 1.7821, 1.7113, 1.6886, 1.7415, 1.8403, 1.7683, + 1.7202, 1.7977, 1.7161, 1.7669, 1.7054, 1.6233, 1.7726, 1.7435, 1.7252, + 1.7905, 1.7461, 1.6852, 1.7706, 1.7498, 1.7314, 1.7612, 1.7688, 1.8387, + 1.7507, 1.7780, 1.7521, 1.7521, 1.6886, 1.6748, 1.6639, 1.7260, 1.8651, + 1.8324, 1.8109, 1.7392, 1.7163, 1.7573, 1.8032, 1.7246, 1.7790, 1.8149, + 1.8411, 1.7696, 1.7417, 1.7305, 1.8165, 1.7831, 1.6465, 1.7257, 1.6999, + 1.7752, 1.7095, 1.8007, 1.5871, 1.7589, 1.7477, 1.7404, 1.7618, 1.7613, + 1.7768, 1.7874, 1.7526, 1.7289, 1.7907, 1.7646, 1.7130, 1.6932, 1.7876, + 1.7574, 1.7329, 1.7561, 1.6575, 1.6210, 1.7973, 1.8189, 1.8263, 1.7098, + 1.8421, 1.7957, 1.7129, 1.7902, 1.7544, 1.7874, 1.8462, 1.6938, 1.7722, + 1.7217, 1.6570, 1.6927, 1.8389, 1.7193, 1.6345, 1.8630, 1.7586, 1.7213, + 1.7517, 1.6532, 1.7951, 1.8013, 1.6195, 1.7767, 1.7708, 1.8667, 1.7844, + 1.6620, 1.7018, 1.8073, 1.7122, 1.6877, 1.8080, 1.8439, 1.7730, 1.7713, + 1.7360, 1.7293, 1.7934, 1.8518, 1.7278, 1.7761, 1.7626, 1.7324, 1.7621, + 1.7627, 1.7576, 1.7272, 1.7169, 1.7375, 1.7508, 1.7859, 1.8297, 1.7494, + 1.7099, 1.7173, 1.7456, 1.6725, 1.7380, 1.5079, 1.7439, 1.7492, 1.7837, + 1.7155, 1.7384, 1.8116, 1.7994, 1.7162, 1.7863, 1.8267, 1.8537, 1.6841, + 1.7267, 1.7238, 1.6594, 1.6171, 1.7859, 1.7840, 1.7119, 1.7442, 1.8366, + 1.7012, 1.6456, 1.8376, 1.8048, 1.7586, 1.7052, 1.7953, 1.6071, 1.7094, + 1.6981, 1.8010, 1.7339, 1.7359, 1.7767, 1.7557, 0.3923, 1.7237, 1.7051, + 1.8097, 1.6661, 1.7293, 1.7843, 1.8136, 1.7692, 1.6809, 1.7752, 1.7953, + 1.7330, 1.7169, 1.7330, 1.7858, 1.7055, 1.7001, 1.7556, 1.7170, 1.7452, + 1.7962, 1.7701, 1.7458, 1.6952, 1.6571, 1.7019, 1.6985, 1.7858, 1.7325, + 1.8441, 1.7544, 1.7539, 1.7416, 1.7197, 1.7388, 1.7254, 1.7573, 1.6840, + 1.6938, 1.7749, 1.8030, 1.7458, 1.7379, 1.7583, 1.5081, 1.7613, 1.8099, + 1.8687, 1.7221, 1.8274, 1.7241, 1.8184, 1.6901, 1.8262, 1.7127, 1.7925, + 1.6913, 1.7442, 1.7687, 1.7329, 1.7385, 1.7035, 1.7806, 1.7704, 1.8107, + 1.7890, 1.7229, 1.7178, 1.7381, 1.8056, 1.6509, 1.8329, 1.7378, 1.7336, + 1.7829, 1.6570, 1.6778, 1.7194, 1.8111, 1.7018, 1.6994, 1.6840, 1.7387, + 1.7542, 1.6884, 1.7213])Parameter containing: +tensor([ 1.0949e-01, 1.2029e-01, -2.2643e-01, 1.3633e-02, 7.7175e-02, + 2.0759e-01, 1.2732e-01, 5.3440e-02, -1.8767e-01, -3.7663e-03, + 5.5563e-02, -4.0936e-02, -1.8914e-01, -4.9850e-02, -8.7422e-02, + -8.5198e-02, -3.2767e-02, -3.9917e-02, -1.4945e-02, 5.4149e+00, + -4.7396e-02, 8.8626e-02, -2.3334e-02, -1.2182e-02, -4.0219e-02, + -4.6954e-02, 5.5812e-02, 2.6243e-02, 2.9270e-02, -7.4899e-02, + 6.1470e-02, 3.6407e-02, 1.3643e-02, 1.1972e-01, 9.5888e-02, + -9.1053e-02, 1.0808e-02, -6.1807e-02, -1.2643e-01, 1.0759e-01, + 7.5769e-02, 3.0389e-02, 1.9903e-02, 2.0498e-02, -2.8517e-02, + -3.5988e-03, 3.3271e-02, -2.9117e-02, 6.7151e-02, 5.6767e-02, + 2.7080e-03, 1.0900e-02, 3.1927e-02, 1.3691e-02, 5.4312e-02, + 1.1228e-01, -1.0770e-01, -8.9129e-02, 8.0802e-02, 2.2251e-02, + 9.6828e-02, -7.9053e-02, 4.6662e-02, 2.6232e-02, 8.6675e-02, + 7.8493e-03, 1.2616e-01, 1.0770e-01, -4.4169e-02, 1.6066e-01, + 5.3667e-02, 1.1023e-01, 7.7716e-01, 8.7449e-02, -1.5993e-01, + 6.4785e-02, 7.7663e-02, -1.3849e-01, -2.6414e-02, 1.5254e-02, + -3.1125e-03, -5.5026e-02, -9.6868e-03, 1.4584e-01, 1.1902e-02, + 1.0312e-02, 1.2705e-01, 1.0381e-01, -2.9179e-02, 2.8543e-02, + -1.6863e-02, 4.4634e-02, 3.5224e-02, -5.3106e-02, -8.9368e-02, + 2.2416e-02, 2.2251e-02, -8.7668e-02, -1.0003e-01, 1.3118e-02, + 3.7356e-02, -4.3921e-02, 1.0691e-02, -8.1180e-02, 8.0626e-02, + 1.2307e-02, 3.7298e-02, -2.7534e-02, -2.6614e-02, -6.7037e-02, + -7.2763e-02, -1.8265e-02, -1.0384e-02, 9.7256e-02, 1.2837e-01, + -3.4833e-02, -3.0022e-02, -5.6598e-02, 5.1160e-02, 1.4362e-01, + -3.5597e-02, -2.8443e-01, -1.3226e-01, -6.6577e-02, -1.5931e-01, + -3.4211e-02, -1.8955e-01, 7.5645e-03, 7.7564e-03, -5.1944e-02, + -1.0860e-01, 7.3288e-02, -6.2435e-02, -1.8061e-02, -7.4753e-02, + 7.2062e-02, -8.5082e-03, -4.6290e-02, -7.0267e-02, 5.0560e-04, + 2.1024e-03, 3.1026e-02, 5.5825e-02, 2.2368e-02, 6.0203e-02, + -3.7331e-02, -9.3823e-03, 1.2192e-02, -6.6988e-02, 1.6273e-01, + 2.5527e-01, -9.1718e-02, -1.5925e-01, 3.0293e-02, -8.7519e-02, + -4.5639e-01, -6.0476e-02, -2.3655e-02, 3.3486e-02, 2.2417e-02, + 1.9185e-01, -1.7733e-01, -3.4288e-02, 3.0369e-02, -2.2127e-03, + 3.9785e-02, -1.1005e-01, 8.2197e-02, -5.6468e-02, 3.7779e-02, + -4.6349e-02, 8.3061e-02, 1.7083e-03, 3.6090e-02, 6.6748e-02, + -1.3850e-01, -8.5291e-03, 3.9900e-02, -2.8865e-02, 5.9780e-02, + 1.6062e-03, -1.1419e-01, 7.5935e-02, 1.8732e-02, 2.3851e-02, + -1.9683e-01, 1.9399e-02, 5.7028e-01, -1.1578e-02, -5.2761e-02, + 2.9288e-02, -7.8124e-02, 7.4517e-02, 1.0981e-01, -6.1710e-02, + 3.0429e-02, -3.0580e-02, -1.2169e-01, 1.4486e-01, 2.7204e-02, + -2.0147e-01, -5.2343e-02, 1.5732e-02, 9.6297e-02, 4.9667e-02, + 1.5123e-01, -8.8324e-02, 1.2086e-01, 1.1278e-01, 9.8894e-04, + 1.0845e-02, 2.4921e-01, -1.5126e-03, -2.4564e-02, -1.0051e-01, + -1.0166e-02, -6.3747e-02, 4.1885e-02, -1.2753e-01, -2.2882e-01, + -1.2265e-02, -5.5138e-02, 2.3936e-02, -1.1381e-02, 3.2840e-02, + -7.2174e-02, 9.4510e-02, -3.3043e-02, 1.1032e-01, -4.9055e-02, + 1.7022e-03, 4.9647e-02, 1.1223e-02, 9.7754e-02, 9.0599e-03, + 1.6349e-02, -1.0698e-01, 7.6059e-02, 7.9308e-02, 9.7373e-03, + 9.2770e-02, 1.4207e-01, -4.8836e-03, 4.7935e-02, 6.4542e-02, + 3.0055e-02, 1.2875e-01, -4.6859e-02, 4.9863e-02, 1.4377e-03, + 1.5789e-02, 1.4662e-02, 1.2451e-01, -1.0362e-01, 1.4963e-01, + -2.6775e-02, -4.0095e-02, -1.1525e-01, 9.7913e-03, 3.2486e-02, + -1.0553e-01, -3.9868e-02, -1.9666e-02, -6.5488e-02, -5.8675e-02, + 2.3672e-02, -6.1824e-02, -3.4578e-03, -7.2933e-02, -2.7511e-02, + 5.2952e-03, 6.9479e-02, 1.0031e-01, -1.5548e-01, -1.5024e-02, + 7.7628e-02, -8.3099e-02, 2.1916e-02, 9.7977e-02, -1.0470e-01, + 1.8839e-01, 1.2992e-01, -3.9989e-02, 3.8404e-02, 2.1818e-02, + 3.8327e-02, 5.1547e-02, -5.4093e-02, 1.4502e-01, -1.3062e-01, + -5.9733e-02, -1.4898e-02, 1.5852e-01, 6.7279e-02, 9.2137e-02, + -1.3338e-02, 1.8929e-01, 1.5181e-02, 6.7185e-02, 5.8950e-02, + 5.9982e-02, 4.4719e-02, -5.1546e-02, 1.4082e-01, -1.4495e-02, + -1.1608e-02, 9.2850e-02, -8.2966e-02, 9.0176e-02, -5.3713e-02, + 4.5182e-02, 1.0756e-02, -8.7488e-02, -1.2639e-01, -1.3084e-03, + -7.5066e-02, 2.1022e-02, 2.8497e-02, 1.8901e-01, -4.0666e-02, + -7.9830e-02, 2.6010e-01, -6.3069e-02, -5.6760e-02, 2.3198e-01, + -2.7823e-01, 3.6469e-02, -2.2526e-02, 8.1990e-02, -1.8918e-01, + 2.0363e-01, -5.4943e-02, 1.3450e-01, -5.3811e-02, 5.4539e-02, + 9.9996e-02, -2.9484e-02, -3.2027e-02, 3.0322e-02, -5.0587e-02, + -1.2371e-01, 2.7864e-02, -7.2747e-02, -4.5982e-02, 6.4722e-02, + 2.5427e-03, 1.0404e-01, 1.2117e-01, 5.1235e-02, -2.6081e-02, + -7.1022e-02, 7.9909e-02, 4.0153e-03, 1.3626e-01, -6.4602e-02, + -8.0432e-02, -5.1698e-02, -2.0403e-03, -2.4301e-02, -9.1350e-02, + -2.8840e-03, -1.5332e-02, -3.6093e-02, -9.8040e-02, -5.1894e-03, + -2.3653e-02, 7.5881e-02, 1.7702e-01, 9.3416e-02, 7.4547e-02, + 1.1536e-01, -3.9263e-03, -5.5926e-02, -4.2878e-02, -1.3414e-02, + 1.5467e-02, 1.1647e-01, 1.1761e-01, -2.0235e-03, -2.0868e-02, + -1.2706e-01, 1.3958e-01, 3.3958e-02, -2.4072e-02, -8.1018e-02, + -2.9652e-02, -3.1342e-02, -5.6468e-02, -1.2003e-01, -6.9973e-02, + -2.3683e-03, 1.0191e-02, -6.2477e-02, -3.2790e-02, 3.2910e-02, + -3.8474e-04, 7.6801e-03, 5.5080e-02, -3.7293e-02, 3.7632e-02, + 1.2180e-01, -6.6580e-02, 8.8961e-02, 2.3944e-02, -1.2642e-01, + -1.5582e-01, -3.5932e-02, 5.6498e-02, 1.3303e-02, -5.9329e-02, + -9.1585e-02, 7.7481e-02, 1.0074e-01, -1.0729e-01, 2.9733e-03, + -1.9608e+00, -1.7896e-02, 1.6667e-02, -6.7317e-02, -4.6438e-02, + -2.0567e-02, -5.6402e-03, 4.9861e-02, -6.5507e-02, -4.4573e-02, + 4.8462e-02, -1.7947e-01, -1.2917e-01, 1.0376e-01, 4.6402e-03, + -3.8884e-02, -5.6493e-02, 4.4177e-02, 1.3340e-01, 1.6811e-01, + 2.6634e-03, 1.9004e-02, 7.5964e-02, 1.8048e-01, -1.0555e-01, + -2.6694e-02, -1.1950e-01, -7.9573e-02, 1.3374e-02, 1.5681e-01, + -1.3325e-01, -1.1418e-02, -6.1896e-02, 1.3991e-01, -2.9282e-03, + -3.5584e-03, -1.6388e-02, -1.1436e-01, 6.5513e-02, 4.1263e-02, + 3.0082e-02, 3.3306e-02, -1.1155e-02, -5.5836e-02, -1.1409e-01, + -1.0892e-02, 9.0167e-02, -1.6112e-01, 6.5796e-02, -1.1796e-02, + -5.7307e-02, 7.6839e-02, -9.1151e-02, 1.2946e-01, 3.8997e-03, + -3.1941e-02, -1.0808e-01, -8.4099e-02, 5.5579e-03, -5.9969e-02, + 6.1003e-02, 1.6967e-01, 4.3307e-02, 1.5222e-02, 1.2151e-01, + 1.2265e-01, -1.1868e-01, 5.6359e-02, 1.5663e-01, -8.6873e-02, + -2.5422e-02, -4.0970e-02, 1.4281e-01, 9.8614e-02, -4.6476e-02, + 1.9954e-02, 5.2306e-03, -5.3084e-02, -4.3791e-02, -1.4934e-01, + 3.4870e-02, -7.6011e-02, 1.2525e-01, -5.5468e-02, 7.1483e-02, + -9.6414e-03, -1.3096e-01, 4.0228e-02, -2.6196e-02, -8.0135e-02, + 3.0319e-02, 1.4976e-01, 7.4810e-02, 3.7500e-02, -2.3439e-02, + -8.4951e-02, -3.1313e-02, -1.4224e-01, 4.9112e-02, -5.7134e-02, + -1.7928e-02, 7.7955e-02, 1.6979e-01, 1.1908e-01, -1.5242e-01, + -7.3433e-02, 3.9254e-03, 9.9303e-02, -3.5989e-02, -4.6880e-02, + -7.3148e-02, 2.5752e-02, -2.4774e-02, 4.5360e-02, -8.7025e-02, + 2.4669e-03, -1.3410e-02, 1.1635e-02, -6.7339e-02, -2.3523e-02, + 1.1249e-02, -2.1098e-02, 5.2816e-03, -7.8247e-02, -8.7813e-02, + 2.1990e-03, 5.2155e-02, -2.4094e-02, 1.0330e-01, 1.7441e-02, + 5.9195e-02, 1.4813e-01, 1.1106e-02, 5.9486e-02, 2.5604e-02, + -2.0879e-02, -3.8578e-02, -1.1124e-01, 9.3536e-02, -4.4539e-03, + -1.8219e-02, -2.0443e-02, -4.9660e-02, -1.0606e-02, 1.4319e-01, + -3.8835e-02, 1.8643e-01, 3.4203e-02, 5.8226e-02, 1.4025e-01, + 9.2013e-02, 6.8444e-03, -2.5568e-02, 3.9318e-02, 5.1420e-02, + 9.7254e-02, 9.0392e-03, -5.4230e-02, -2.1404e-02, 8.8912e-02, + 6.4720e-02, 1.2284e-02, 4.4045e-02, 2.5807e-03, -2.5942e-02, + -1.3710e-01, -1.3367e-02, 7.5493e-02, 1.1875e-01, 8.1141e-02, + -1.2936e-02, 4.7390e-02, -1.1134e-01, 9.6089e-02, 6.4378e-02, + -1.8029e-02, 8.4028e-02, 1.4069e-01, -1.7390e-02, 2.6406e-02, + -7.9666e-03, 5.2227e-02, 3.9486e-02, -9.2852e-03, 7.3538e-02, + -7.3171e-02, -1.9590e-01, -1.7776e-02, -4.0482e-02, 1.5421e-02, + -4.6823e-02, 2.3603e-02, -7.3262e-03, 8.0283e-02, -3.5043e-02, + 1.1719e-01, 1.1037e-01, -5.6377e-02, -3.4377e-03, -4.8010e-02, + -1.2813e-01, 2.4231e-02, 1.1043e-01, 5.4121e-02, -7.6396e-02, + 8.8454e-02, 4.3982e-02, 7.0963e-02, 8.3958e-03, 5.9537e-02, + -6.3115e-02, -6.1581e-02, 4.2345e-02, 1.0158e-01, -6.7802e-02, + 1.0960e-01, -3.4674e-02, 1.2936e-01, 1.9522e-01, 1.0823e-01, + 5.2180e-02, -9.0325e-02, 5.8393e-03, 8.2930e-02, -8.4221e-02, + 1.7008e-03, -7.9800e-02, 7.3705e-02, -8.1106e-02, 5.6668e-02, + 3.2226e-02, 6.6968e-03, 1.3006e-01, 4.6790e-02, 2.0633e-01, + 1.8214e-01, 1.8984e-04, -5.6160e-02, 1.1562e-01, -6.7184e-02, + 5.9152e-02, -1.1576e-01, -9.8722e-04, 7.8000e-02, 1.0290e-01, + -4.5230e-02, 9.1396e-03, -7.5262e-02, 1.1895e-02, 1.1374e-01, + 1.6541e-02, -1.4464e-01, -1.6592e-01, 6.0472e-03, 4.7818e-04, + -2.0584e-01, 5.2593e-02, 3.0140e-02, 1.3068e-02, 8.7231e-03, + 1.0776e-02, -3.1502e-02, -2.4943e-02, -5.4088e-03, 1.0589e-01, + -2.9905e-02, -5.0469e+00, -3.2722e-02, 4.9282e-02, -2.2213e-02, + -1.1515e-01, -4.1087e-02, -8.2253e-02, -5.4938e-02, 4.3435e-02, + -4.4115e-02, -2.0652e-02, -5.0619e-02, -3.0088e-02, -6.0237e-02, + 1.7771e-01, 3.4522e-03, -7.0164e-02, 1.7609e-02, -8.1784e-02, + 9.7742e-02, 8.6289e-02, 5.3136e-03, 1.3217e-02, -1.8450e-01, + -2.3459e-02, 9.8487e-02, 1.8942e-02, -2.4920e-02, 6.7617e-02, + -3.2965e-02, 7.1420e-02, 8.1317e-02, 1.9588e-02, 4.7272e-03, + -4.8509e-02, 3.8701e-03, 1.6512e-02, -7.8775e-02, -6.9517e-02, + 1.2679e-04, 3.3200e-03, 6.5191e-03, -1.9788e-01, 8.8512e-02, + -4.9999e-03, -2.4801e-01, 1.4648e-01, 3.9599e-02, 1.3425e-01, + 8.5676e-02, 8.7498e-02, 1.0170e-01, 9.6648e-02, 1.2441e-02, + 4.7502e-02, 7.0925e-03, 1.0906e-02, 9.9315e-03, -8.5651e-02, + 1.0266e-01, -1.5819e-01, -1.1354e-01, 8.0533e-02, -1.7047e-02, + -1.7048e-02, -1.7593e-01, 8.0783e-03, 3.7989e-02, -4.0361e-02, + -2.3227e-02, -2.0796e-02, -1.1312e-01, -4.6048e-02, 9.6961e-02, + 4.7441e-02, 7.9526e-02, 8.1258e-02, 2.5623e-02, -3.5423e-02, + -2.2783e-02, 6.6940e-02, 5.3477e-02, -1.4346e-01, -9.9305e-02, + -5.6176e-02, 1.2165e-01, 1.0727e-02])Parameter containing: +tensor([[-0.0076, 0.0140, -0.0179, ..., -0.0190, -0.0001, 0.0083], + [-0.0082, -0.0062, -0.0340, ..., 0.0008, 0.0212, -0.0008], + [-0.0030, -0.0002, 0.0044, ..., 0.0062, -0.0090, 0.0150], + ..., + [-0.0003, 0.0026, -0.0107, ..., 0.0015, 0.0198, 0.0079], + [-0.0139, 0.0263, 0.0082, ..., -0.0135, 0.0330, -0.0161], + [ 0.0057, 0.0020, -0.0367, ..., 0.0184, -0.0093, 0.0296]])Parameter containing: +tensor([-0.2454, -0.3401, -0.3860, ..., -0.3416, -0.3689, -0.1425])Parameter containing: +tensor([[-0.0122, 0.0114, 0.0196, ..., 0.0045, 0.0157, 0.0007], + [ 0.0119, 0.0089, -0.0186, ..., -0.0101, -0.0171, -0.0015], + [ 0.0058, -0.0237, 0.0086, ..., -0.0162, 0.0190, -0.0067], + ..., + [-0.0058, 0.0016, 0.0089, ..., -0.0329, -0.0035, 0.0069], + [-0.0245, 0.0052, 0.0177, ..., -0.0051, 0.0200, 0.0037], + [ 0.0006, -0.0077, 0.0039, ..., 0.0064, 0.0152, -0.0266]])Parameter containing: +tensor([ 2.1591e-02, -3.8971e-02, 4.7058e-02, -3.9276e-02, -4.8645e-02, + -4.8645e-02, 6.9092e-02, 5.2643e-02, 5.4443e-02, -3.2440e-02, + 5.1117e-02, 1.7868e-02, -7.8888e-03, -3.6774e-02, -4.2305e-03, + 2.9739e-02, 6.4621e-03, -1.9791e-02, -2.1191e-03, -3.9185e-01, + -8.0811e-02, 5.4291e-02, 4.3640e-02, -1.6708e-02, 3.3295e-02, + 5.2307e-02, 1.1772e-02, -5.2429e-02, -1.8356e-02, 7.1472e-02, + 9.4421e-02, -4.7646e-03, -8.4412e-02, 5.1117e-02, -2.5604e-02, + -1.0460e-02, -8.0505e-02, 2.1240e-02, 1.8341e-02, 5.2277e-02, + 4.8065e-02, 5.0507e-02, 2.8397e-02, 3.0899e-02, 7.3051e-03, + -2.5421e-02, 6.0455e-02, -1.6251e-02, -5.4962e-02, 9.8228e-04, + -1.8143e-02, -4.0802e-02, -8.2825e-02, -9.6558e-02, 1.0406e-02, + 3.6896e-02, -2.7069e-02, -6.9092e-02, 2.9907e-02, 3.8574e-02, + 5.0781e-02, 3.7193e-03, 1.0065e-01, 7.6485e-03, -3.1614e-04, + 4.6722e-02, -3.7933e-02, -7.7271e-02, -1.4732e-02, -2.0538e-02, + 7.1045e-02, -4.4556e-03, 3.7109e-02, 3.2104e-02, 1.3588e-02, + -1.7609e-02, 4.4006e-02, 1.4793e-02, 1.4610e-02, 1.1719e-01, + -2.8477e-03, 5.4283e-03, -5.0842e-02, 1.1711e-02, -7.9773e-02, + -9.3384e-02, 4.7394e-02, -3.4668e-02, -2.8610e-02, -2.3880e-02, + -1.7273e-02, -6.7444e-02, 7.4720e-04, 1.6260e-01, -1.2970e-04, + -3.3356e-02, -4.6936e-02, -1.5274e-02, -8.2336e-02, 4.1718e-02, + 6.7101e-03, -9.7961e-02, -4.6692e-02, -1.5358e-02, 2.2247e-02, + 5.7373e-02, 7.7820e-02, -9.5459e-02, 3.0365e-02, -7.7393e-02, + 2.0950e-02, -1.5511e-02, -9.0942e-02, 1.4183e-02, -1.4450e-02, + -2.0618e-03, 6.0455e-02, 5.1392e-02, -3.3203e-02, 3.6133e-02, + 4.1931e-02, -3.7567e-02, 2.7359e-02, 6.7902e-03, 5.8258e-02, + 8.9188e-03, -7.5439e-02, 6.3904e-02, 2.8931e-02, 1.5747e-02, + -1.0536e-02, -3.3691e-02, -2.4719e-02, -2.8183e-02, -5.4565e-02, + 8.6792e-02, -2.8625e-02, 1.1589e-02, 7.1777e-02, -3.2867e-02, + -2.1286e-02, 4.6967e-02, 3.5553e-02, -4.9530e-02, 2.1683e-02, + -7.2815e-02, 2.3666e-02, -3.8696e-02, -1.0870e-01, 4.0802e-02, + 9.8694e-02, -6.8665e-02, -2.6184e-02, -7.4402e-02, -3.1311e-02, + 2.8305e-02, 7.9651e-02, 1.1002e-02, -2.2602e-03, 3.6168e-04, + 6.1066e-02, 1.2016e-02, -5.0720e-02, -3.1776e-03, 5.1636e-02, + 3.5950e-02, 6.2805e-02, 7.7362e-03, 9.9976e-02, -4.2847e-02, + 5.1308e-03, -3.3020e-02, -1.3115e-02, 2.6184e-02, -5.9052e-02, + -1.1816e-01, -7.5073e-02, -1.9012e-02, 5.0598e-02, 8.6010e-05, + -1.3525e-01, 3.9940e-03, 1.1029e-01, 1.4124e-01, 3.8116e-02, + 1.7729e-03, -2.2736e-02, 1.1194e-01, -3.6488e-03, -2.0081e-02, + -1.1652e-01, -5.5634e-02, 7.7148e-02, 2.6855e-02, 3.7354e-02, + 1.4557e-02, 1.2703e-02, -8.2397e-02, 5.5847e-02, 1.1093e-02, + -1.5022e-02, 2.6810e-02, 1.7426e-02, -5.6915e-02, 1.6739e-02, + 9.7198e-03, -9.5062e-03, 5.3375e-02, 9.7122e-03, -3.4241e-02, + -3.4199e-03, -4.3671e-02, -4.1565e-02, -1.0147e-02, 4.3762e-02, + -2.2110e-02, 5.1193e-03, -3.9307e-02, -7.7087e-02, 7.2212e-03, + 6.0150e-02, 2.0355e-02, 2.2690e-02, 5.6854e-02, 1.1612e-02, + 5.4535e-02, -7.8354e-03, 7.3181e-02, 7.7148e-02, -2.3209e-02, + -4.7760e-02, -1.6113e-02, 8.5678e-03, -4.8462e-02, -6.6772e-02, + 1.0094e-02, -6.7017e-02, -2.6184e-02, 6.4880e-02, 4.4769e-02, + 7.5302e-03, 1.7288e-02, 9.0561e-03, -6.2065e-03, -4.1870e-02, + -4.5105e-02, -7.7942e-02, 6.5125e-02, -3.1433e-02, 5.7190e-02, + -5.0293e-02, -6.7329e-03, -7.7858e-03, -1.2352e-02, -1.3573e-02, + -6.3599e-02, 2.1515e-02, -2.0935e-02, -3.9490e-02, -1.0811e-02, + -1.2524e-01, -2.2247e-02, 9.7046e-03, 2.3155e-03, -5.7465e-02, + -3.8879e-02, -1.4923e-02, 3.6652e-02, 7.2083e-02, -3.6194e-02, + 3.0396e-02, -5.0415e-02, 1.0468e-02, 8.6670e-03, 6.9847e-03, + -6.4964e-03, -6.5552e-02, 1.2146e-02, 9.1858e-03, -2.9999e-02, + -3.7506e-02, 1.2749e-02, -2.3972e-02, 7.6782e-02, -2.5894e-02, + -1.9989e-02, -8.8623e-02, -1.8707e-02, 1.0590e-01, -5.0079e-02, + -3.5248e-02, 4.0253e-02, 1.7792e-02, -6.2683e-02, 6.1218e-02, + 8.9645e-03, 1.0077e-01, -1.9867e-02, -4.0985e-02, -4.0771e-02, + -3.0350e-02, 4.6906e-02, 1.2341e-03, -3.4363e-02, 5.5939e-02, + 8.1238e-02, -9.4177e-02, 3.7079e-02, 1.4252e-02, -5.8746e-03, + 3.8757e-02, -3.4882e-02, 2.1118e-02, 3.4027e-02, -2.6886e-02, + -2.7466e-02, 3.1372e-02, 7.7759e-02, 5.7251e-02, 1.0223e-01, + -6.7627e-02, 6.8398e-03, -1.3626e-02, -2.0180e-03, 1.2168e+00, + -1.6504e-01, -5.4413e-02, 4.6722e-02, 8.7128e-03, -3.4760e-02, + 2.0538e-02, -2.0340e-02, -3.0212e-02, -2.3682e-02, 1.9055e-01, + -3.7689e-02, -5.8289e-03, 9.5215e-02, -2.2141e-02, -2.5253e-02, + 6.0394e-02, -1.8066e-02, -5.6946e-02, -2.8458e-03, -2.0309e-02, + 9.6313e-02, 6.1531e-03, 2.2415e-02, -1.0199e-01, -4.9469e-02, + 3.2684e-02, -6.6895e-02, 1.3893e-02, -8.1909e-02, -3.3569e-02, + 5.0507e-02, -2.1790e-02, -3.6224e-02, -2.2003e-02, 8.4595e-02, + -3.1860e-02, -4.8798e-02, -7.1030e-03, -7.2327e-02, 7.0862e-02, + -3.1677e-02, -6.7017e-02, 3.7537e-02, 5.0507e-02, 1.7990e-02, + -2.3697e-02, 6.3110e-02, 1.3781e-03, -4.6692e-02, -6.9763e-02, + 7.8964e-03, 4.9042e-02, 1.7944e-02, -4.6997e-02, 1.9745e-02, + -9.0332e-03, -1.8921e-02, -5.5573e-02, 5.3062e-03, -1.4740e-02, + -6.3232e-02, 2.2827e-02, 3.0563e-02, 1.1780e-02, 7.3776e-03, + 1.7517e-02, 4.5319e-02, 8.3435e-02, -2.1545e-02, 1.4534e-02, + -5.2765e-02, 4.9255e-02, -1.1542e-01, -4.5410e-02, 2.5803e-02, + 5.5908e-02, 6.8420e-02, -4.9316e-02, -9.5459e-02, 4.9347e-02, + -5.4504e-02, -2.6794e-02, -2.6871e-02, -4.8637e-03, 2.7039e-02, + 4.5197e-02, 3.5614e-02, 1.5411e-02, -1.2154e-02, 1.3757e-01, + -1.6479e-01, -1.7639e-02, -3.5309e-02, -3.1311e-02, -2.1317e-02, + 7.0923e-02, 3.9978e-02, -2.0584e-02, -6.0364e-02, -8.1360e-02, + 1.7059e-02, 5.4321e-03, -2.0157e-02, -1.9180e-02, -1.2947e-02, + 8.9417e-03, 2.0828e-03, -6.4087e-02, 2.7237e-03, 4.8401e-02, + -2.8706e-03, -2.6810e-02, 1.9913e-02, 4.0802e-02, -7.6172e-02, + 9.4360e-02, 3.8055e-02, 2.1530e-02, -1.8784e-02, 4.5967e-03, + -5.3864e-02, 2.6199e-02, -1.7410e-02, -8.3069e-02, -5.4901e-02, + 5.3177e-03, -5.6915e-03, -6.2469e-02, 1.5167e-02, -8.2626e-03, + -1.8600e-02, -5.1331e-02, -5.4199e-02, -4.1084e-03, 6.8726e-02, + -3.8574e-02, -3.0842e-03, -4.0131e-02, -1.4565e-02, -3.6804e-02, + 6.5552e-02, -5.0049e-02, 6.9641e-02, 6.4087e-02, -2.2736e-02, + 2.8366e-02, -5.7983e-02, -6.0944e-02, 1.8295e-02, 2.2461e-02, + -5.0079e-02, 3.4027e-02, -1.0211e-01, -9.0576e-02, -8.8867e-02, + -4.1779e-02, 2.3163e-02, -4.7607e-02, -1.4679e-02, 4.1313e-03, + -1.6413e-03, 3.9581e-02, -2.9724e-02, -1.7258e-02, 6.6895e-02, + -2.2232e-02, -8.6670e-03, 2.5314e-02, 2.3438e-02, -4.0558e-02, + -9.4543e-02, -6.9519e-02, 9.9945e-03, -8.9844e-02, -1.6083e-02, + -3.2257e-02, 1.7853e-02, -3.8818e-02, 5.4474e-02, 1.9516e-02, + -3.6224e-02, 1.8921e-02, 9.1675e-02, -8.5878e-04, -5.4382e-02, + -1.0175e-01, 7.5012e-02, -8.4778e-02, 1.5945e-02, -3.0762e-02, + 3.9764e-02, 3.0624e-02, -7.0862e-02, -2.7359e-02, 5.8899e-02, + 6.0669e-02, 1.4641e-02, 3.0029e-02, 1.3330e-01, -1.0565e-01, + -3.7651e-03, -8.3008e-03, 9.8228e-04, 3.3630e-02, -6.7940e-03, + 2.4887e-02, -2.1896e-02, 1.8768e-02, 5.7159e-02, -2.2507e-02, + -9.0881e-02, 3.8605e-02, 4.2206e-02, 4.7821e-02, 1.1975e-01, + 2.7954e-02, -5.4665e-03, -5.6458e-02, 3.1403e-02, 2.6840e-02, + 9.7656e-02, -2.3071e-02, 3.4241e-02, -1.9257e-02, 5.1605e-02, + 1.7181e-02, 7.5195e-02, 4.2206e-02, 8.7585e-02, -7.7332e-02, + -1.6968e-02, 7.8186e-02, 1.0994e-02, -4.6082e-02, 1.9073e-03, + -5.4596e-02, -5.4779e-02, 2.9236e-02, 7.0068e-02, -1.0330e-02, + 3.3234e-02, 1.0223e-02, -4.5807e-02, 4.8706e-02, -4.0680e-02, + -1.4160e-02, 5.7068e-02, -1.3489e-02, 7.1411e-02, 6.9618e-03, + 1.0773e-01, 5.8380e-02, -4.2786e-02, -6.8359e-02, 3.7079e-02, + 5.5695e-02, -1.1237e-01, -2.9800e-02, 6.0272e-02, 4.4647e-02, + -3.5309e-02, -6.6589e-02, -4.6417e-02, 2.3407e-02, -5.9784e-02, + -2.2461e-02, 7.5684e-02, 1.7471e-02, -9.3811e-02, 2.6302e-03, + -1.5915e-02, -1.8127e-02, 1.8677e-02, -4.4800e-02, 4.0314e-02, + -3.1281e-02, -2.8443e-04, -6.0028e-02, 3.4027e-02, -1.3159e-01, + -5.4443e-02, -2.5757e-02, -4.6570e-02, -1.5465e-02, 5.2979e-02, + -2.7069e-02, -1.7120e-02, 1.9241e-02, -3.6163e-03, -8.4473e-02, + -3.5522e-02, 6.4087e-02, 6.2561e-02, -5.8441e-02, 2.9510e-02, + -8.4961e-02, -3.9062e-02, 6.1302e-03, 7.5989e-03, 1.6144e-02, + 1.5808e-02, -2.5375e-02, 5.3635e-03, -9.4788e-02, -6.1554e-02, + -3.1952e-02, 5.6732e-02, 1.0132e-02, 1.7563e-02, 1.5266e-02, + -6.7993e-02, 7.7515e-02, -5.8594e-02, 1.3535e-02, -3.5431e-02, + -3.6850e-03, -8.2153e-02, -1.0689e-02, 2.4429e-02, -3.1189e-02, + 3.2166e-02, -5.3101e-02, -6.2065e-03, 6.4453e-02, 2.9449e-02, + 6.5796e-02, -5.1941e-02, -3.0136e-02, -4.2572e-02, 1.6327e-02, + 4.2999e-02, 3.8879e-02, -5.3070e-02, -5.2376e-03, 3.4943e-02, + -1.0147e-02, 1.0635e-02, -5.2299e-03, -4.2915e-03, -1.9958e-02, + 1.9470e-02, -8.3313e-03, 2.7039e-02, -4.2969e-02, -8.4412e-02, + -1.5411e-02, 7.9269e-03, 7.1106e-02, 1.1208e-02, 1.2383e-02, + 2.5940e-02, 2.7084e-02, -9.2163e-03, -1.9058e-02, -1.3649e-02, + -6.9618e-03, 3.2715e-01, 3.3295e-02, -1.7197e-02, 4.3732e-02, + -3.2684e-02, 6.1646e-02, 2.2232e-02, 9.7778e-02, 5.4817e-03, + -7.6050e-02, -1.6159e-02, 8.4610e-03, -2.1652e-02, -9.4299e-02, + -2.1683e-02, 3.0807e-02, 4.0924e-02, -4.1992e-02, 7.6355e-02, + 4.2358e-02, 1.7212e-02, -3.0106e-02, -6.0333e-02, -7.1716e-02, + 4.4861e-02, -4.4617e-02, 1.8730e-03, 2.9175e-02, 8.8501e-03, + 3.5686e-03, 7.3975e-02, 4.4281e-02, 1.5991e-02, 9.7839e-02, + -1.7776e-02, 2.5635e-02, 4.3152e-02, -1.4130e-02, 5.9235e-02, + -5.7404e-02, 7.4120e-03, 1.9426e-03, -4.9095e-03, 1.5617e-02, + -1.7517e-02, -2.3315e-02, 3.9368e-02, 2.7481e-02, 5.0430e-03, + -6.1859e-02, -3.4363e-02, 5.7281e-02, -1.0544e-02, 2.8152e-02, + 9.5459e-02, -7.8369e-02, 1.1642e-02, -9.4788e-02, -4.5929e-02, + -3.6133e-02, 8.3466e-03, -7.0374e-02, -3.3875e-02, 1.4877e-02, + 8.5907e-03, 6.1523e-02, 6.3171e-02, 2.8381e-02, 3.4576e-02, + 3.2288e-02, -1.2085e-01, 2.4643e-02, 4.6600e-02, 1.0339e-01, + -2.8563e-04, -3.3905e-02, -7.9250e-04, 4.6356e-02, -3.6804e-02, + 5.4138e-02, -3.4637e-02, 7.5256e-02, 4.1748e-02, -3.9215e-02, + 4.8370e-02, 1.8768e-02, -7.0095e-04])Parameter containing: +tensor([2.3688, 2.2313, 2.1793, 2.2621, 1.8781, 2.4473, 2.2259, 2.2702, 2.3276, + 2.4534, 2.1846, 2.2655, 2.3461, 2.2421, 2.3190, 2.2593, 2.3454, 2.2707, + 2.2164, 1.7897, 2.3242, 2.3469, 2.3397, 2.2526, 2.2761, 2.3129, 2.2576, + 2.2638, 2.1498, 2.2063, 2.4860, 2.2652, 2.2365, 2.3424, 2.2737, 2.4070, + 2.4567, 2.2116, 2.2207, 2.3936, 2.1568, 2.3500, 2.2707, 2.2544, 2.3308, + 2.5199, 2.4381, 2.2376, 2.2106, 2.1092, 2.2784, 2.4935, 2.3248, 2.3377, + 2.5530, 2.3622, 2.4510, 2.1990, 2.2610, 2.3005, 2.3197, 2.3185, 2.2755, + 2.3920, 2.3173, 2.2691, 2.4864, 2.3578, 2.2687, 2.0558, 2.4016, 2.1990, + 2.1231, 2.4432, 2.3081, 2.2969, 2.5476, 2.2131, 2.1720, 2.4336, 2.3412, + 2.3070, 2.4076, 2.3136, 2.3851, 2.2409, 2.3300, 2.2133, 2.3456, 2.2283, + 2.2573, 2.2693, 2.3708, 2.3321, 2.1889, 2.2271, 2.2766, 2.4146, 2.3176, + 2.3727, 2.4370, 2.3428, 2.4437, 2.3811, 2.2463, 2.0142, 2.3516, 2.2030, + 2.2619, 2.1330, 2.2031, 2.3857, 2.4318, 2.2763, 2.2307, 2.3119, 2.3642, + 2.4886, 2.2440, 2.2410, 2.3068, 2.3970, 2.1728, 2.3072, 2.3099, 2.2765, + 2.3712, 2.4885, 2.3787, 2.2320, 2.2641, 2.4476, 2.4079, 2.2429, 2.4214, + 2.2500, 2.2248, 2.2907, 2.2025, 2.4755, 2.2717, 2.2460, 2.3245, 2.3066, + 2.3491, 2.1617, 2.4615, 2.3194, 2.4639, 2.4275, 2.2145, 2.2690, 2.3564, + 2.2622, 2.3437, 1.8991, 2.2557, 2.3662, 2.2678, 2.4041, 2.1706, 2.3727, + 2.3550, 2.4025, 2.4941, 2.2641, 2.0832, 2.2486, 2.3981, 2.4916, 2.5032, + 2.3176, 2.1182, 2.2759, 2.2190, 2.3240, 2.3059, 2.2629, 2.1827, 2.3566, + 2.4849, 2.2785, 2.2739, 2.3701, 2.2370, 2.0638, 2.1875, 1.4153, 2.2091, + 2.1576, 2.4698, 2.4457, 2.3009, 2.3664, 2.4033, 2.3602, 2.2590, 2.1947, + 0.8316, 2.3363, 2.5227, 2.2341, 2.2393, 2.3279, 2.3750, 2.3931, 2.2447, + 2.4049, 2.2068, 2.3888, 2.2901, 2.1921, 2.4135, 2.0252, 2.2543, 2.3396, + 2.0983, 2.3843, 2.3032, 2.1555, 2.3856, 2.3157, 1.9222, 2.4996, 2.2700, + 2.3211, 2.3556, 2.3793, 2.2133, 2.1887, 2.4271, 2.2548, 2.1998, 2.2376, + 2.2489, 2.3295, 2.4860, 2.1721, 2.2410, 2.3237, 2.2318, 2.4154, 2.2485, + 2.3437, 2.2699, 2.3348, 2.2798, 2.3398, 2.2431, 2.3392, 2.3666, 2.3178, + 2.2839, 2.3838, 2.3424, 2.3153, 2.4632, 2.4543, 2.4417, 2.4471, 2.2925, + 2.2936, 2.4775, 2.2104, 2.2885, 2.2657, 2.3830, 2.4525, 2.2305, 2.4506, + 2.3083, 2.3477, 2.4036, 2.3077, 2.2872, 2.4994, 2.2552, 2.3892, 2.3094, + 2.2584, 2.5298, 2.1819, 2.2275, 2.2282, 2.2621, 2.2170, 2.2949, 2.2702, + 2.2701, 2.1525, 2.3722, 2.3994, 2.2509, 2.3115, 2.2393, 2.4235, 2.2856, + 2.4068, 2.1916, 2.3960, 2.1988, 2.4193, 2.1971, 2.4324, 2.4284, 2.2356, + 2.3937, 2.3617, 2.2964, 2.2486, 2.3001, 2.3829, 2.4658, 2.4456, 2.2983, + 2.2155, 2.4114, 2.3580, 2.1757, 2.4205, 2.3233, 2.2826, 2.3751, 2.2619, + 0.6763, 2.3765, 2.2672, 2.2851, 2.4040, 2.3082, 2.1688, 2.2536, 2.5452, + 2.2973, 2.4423, 2.1116, 2.5492, 2.2694, 2.3131, 2.3285, 2.6898, 2.3127, + 2.2720, 2.4728, 2.3080, 2.2795, 2.4653, 2.2102, 2.4244, 2.2528, 1.9396, + 2.3106, 2.3023, 2.2089, 2.3359, 2.3033, 2.3235, 2.2955, 2.2911, 2.3302, + 2.3617, 2.2888, 2.3233, 2.3285, 2.3036, 2.2174, 2.2295, 2.3946, 2.2406, + 2.3098, 2.4310, 2.3034, 2.4401, 2.3541, 2.2902, 2.3781, 2.2550, 2.4189, + 2.4577, 2.3707, 2.2346, 2.3813, 2.3329, 2.2650, 2.2464, 2.3527, 2.4315, + 2.1820, 2.1956, 2.3020, 2.2607, 2.5223, 2.4836, 2.3561, 2.2295, 2.3134, + 2.2896, 2.2292, 2.4570, 2.2569, 2.3331, 2.2332, 2.3618, 2.4185, 2.2385, + 2.2092, 2.4481, 2.2702, 2.3838, 2.5967, 2.1375, 2.3543, 2.3309, 2.4050, + 1.9929, 2.8342, 2.2762, 2.1165, 2.1972, 2.3158, 2.3037, 2.3601, 2.3452, + 2.4369, 2.4202, 2.3446, 2.0123, 2.3398, 2.4535, 2.3274, 2.3294, 2.3187, + 2.4326, 2.2906, 2.3188, 2.3241, 2.3415, 2.2712, 2.2277, 2.3186, 2.4004, + 2.2099, 2.4762, 1.6964, 2.2420, 2.1310, 2.3157, 2.3439, 2.2923, 2.2653, + 2.3732, 2.2344, 2.3661, 2.3163, 2.3467, 2.3602, 2.5663, 2.3309, 2.2888, + 2.2422, 2.3163, 2.2859, 2.2626, 2.2535, 2.2556, 2.3285, 2.3882, 2.1625, + 2.3154, 2.3565, 2.3619, 2.2242, 2.2228, 2.2822, 2.3462, 2.3962, 2.3544, + 2.3980, 2.4145, 2.1884, 2.3029, 2.2805, 2.4121, 1.8022, 2.2502, 2.3409, + 2.3668, 2.2910, 2.2283, 2.3151, 2.3326, 2.2121, 2.2692, 2.2691, 2.1824, + 2.3013, 2.5337, 2.4772, 2.2240, 2.1979, 2.3759, 2.2393, 2.2431, 2.5248, + 2.6478, 2.1782, 2.4853, 2.2081, 2.3496, 2.2851, 2.1992, 2.3299, 2.2451, + 2.2018, 2.2678, 2.3296, 2.2097, 2.4417, 2.2165, 2.4353, 2.1743, 2.3772, + 2.3127, 2.3293, 2.3001, 2.3943, 2.2248, 2.1862, 2.2712, 2.2826, 2.2903, + 2.2824, 2.3611, 2.2890, 2.3107, 2.2675, 2.3155, 2.2492, 2.5576, 2.3347, + 2.2097, 2.5520, 2.6258, 2.6445, 2.4963, 2.2860, 2.3289, 2.2363, 2.4110, + 2.3982, 2.3522, 2.1712, 2.4856, 2.3696, 2.2355, 2.3357, 2.4464, 2.2544, + 2.5418, 2.3130, 2.2759, 2.0163, 2.2506, 2.3865, 2.2722, 2.2426, 2.2608, + 2.3552, 2.2536, 2.2496, 2.5000, 2.2151, 2.1788, 2.3562, 2.3983, 2.5739, + 2.2555, 2.3720, 2.4809, 2.2267, 2.1463, 2.2596, 2.3202, 2.3680, 2.2596, + 2.4000, 2.2462, 2.3756, 2.3770, 2.2603, 2.2017, 2.2966, 2.2811, 2.3990, + 2.4600, 2.1128, 2.2974, 2.3937, 2.2691, 2.3202, 2.3163, 2.2603, 2.2670, + 2.1615, 2.5914, 2.3516, 2.3436, 2.5322, 2.3456, 2.2495, 2.3500, 2.3577, + 2.3212, 2.3772, 2.2294, 2.4203, 2.2417, 2.2855, 2.2905, 2.3836, 2.2920, + 2.0747, 2.3910, 2.3422, 2.3533, 2.3373, 2.2799, 2.3063, 2.3708, 2.3412, + 2.2712, 2.3281, 2.3870, 2.0959, 2.1643, 2.2547, 2.3594, 2.4648, 2.4353, + 2.3893, 2.3490, 2.4140, 2.1236, 2.3925, 1.9306, 2.2080, 2.1856, 2.3817, + 2.2347, 2.2804, 2.3309, 2.2901, 2.2481, 2.3116, 2.2113, 2.1951, 2.3585, + 2.2502, 2.2685, 2.2172, 1.8982, 2.2615, 2.3360, 2.4474, 2.3664, 2.4077, + 2.0775, 2.2460, 2.5276, 2.3668, 2.1460, 2.2497, 2.3825, 2.1070, 2.3265, + 2.1847, 2.3438, 2.2506, 2.4812, 2.4725, 2.2261, 2.1374, 2.2534, 2.3833, + 2.2467, 2.3161, 2.3496, 2.1886, 2.5165, 2.3093, 2.3108, 2.3012, 2.2424, + 2.3140, 2.2488, 2.3573, 2.3360, 2.3477, 2.3584, 2.4732, 2.2807, 2.2586, + 2.3842, 2.2568, 2.3066, 2.4383, 2.6123, 2.1762, 2.2118, 2.3744, 2.2700, + 2.2935, 2.3482, 2.3100, 2.4050, 2.2994, 2.2794, 2.2825, 2.3585, 2.2868, + 2.4056, 2.3555, 2.2402, 2.2468, 2.2976, 2.2346, 1.8783, 2.4171, 2.3038, + 2.2860, 2.3164, 2.2196, 2.2157, 2.2415, 2.3264, 2.3517, 2.3506, 2.1730, + 2.7051, 2.3974, 2.2808, 2.4565, 2.1615, 2.3847, 2.4392, 2.1036, 2.3728, + 2.3957, 2.4668, 2.3189, 2.2417, 2.6226, 2.2039, 2.2346, 2.2887, 2.2391, + 2.2973, 2.1040, 2.4543, 2.0656, 2.2314, 2.6447, 2.3755, 2.3391, 2.2552, + 2.2017, 2.3095, 2.3668])Parameter containing: +tensor([-0.2197, 0.3354, -0.4777, 0.2296, -0.2571, -0.2782, 0.2680, 0.1041, + -0.3820, -0.2839, -0.0534, -0.0172, -0.4468, -0.0195, 0.1345, -0.0460, + 0.1044, -0.0443, -0.2040, -0.7718, 0.3504, 0.2080, 0.2851, -0.5775, + 0.1081, -0.5376, -0.0747, 0.1722, -0.5021, -0.0547, 0.5642, 0.1543, + 0.1477, -0.4310, -0.2311, -0.6174, -0.4021, 0.1010, 0.8179, -0.0225, + -0.1327, -0.2184, 0.4328, -0.1261, -0.6001, -0.9222, -0.7819, -0.1870, + 0.2756, -0.2588, -0.6792, -0.6569, -0.1249, -0.3367, -1.0191, 0.9087, + 0.7620, -0.6038, 0.0963, -0.0083, -0.1625, 0.1888, -0.1935, -0.7019, + 0.0724, 0.0275, -0.1649, -0.5189, -0.6144, -0.9170, 0.6056, 0.1743, + 0.0684, -0.5347, -0.3405, -0.5641, 0.8142, -0.2712, -0.0467, 0.5850, + 0.3540, 0.2550, -0.6323, -0.1901, -0.5813, -0.3020, 0.2263, -0.0346, + 0.2808, 0.1108, -0.2443, -0.0337, -0.3646, 0.1265, -0.3793, -0.1714, + 0.7608, -0.5926, -0.3150, 1.0996, -0.4086, -0.6049, -0.5115, 0.3061, + 0.5860, 0.2025, 0.4765, 0.4331, -0.0295, -0.1208, 0.3130, 0.4131, + -0.5850, 0.0727, 0.5004, 0.0265, 0.7074, 0.5079, 0.6204, 0.5398, + -0.1818, 0.0148, -0.2653, 0.1086, 0.2554, -0.6922, 0.2110, 0.2183, + -0.4925, 0.0034, 0.0637, -0.3723, 0.3850, -0.4272, 0.8816, -0.0507, + 0.2320, -0.3092, -0.0387, -0.2841, 0.2158, 0.1545, -0.1862, -0.4746, + -0.3839, -0.0917, 0.7386, -0.4229, -0.6547, -0.2977, 0.0176, -0.0818, + 0.2436, -0.1958, -0.6082, 0.9952, -0.1447, -0.3482, -0.2684, -0.6609, + 0.0937, -0.4504, 0.7387, -0.3864, 0.5788, 0.3532, -0.3904, 0.1869, + 0.1422, -0.3027, 0.5089, -0.1565, -0.3660, 0.3908, 0.0425, 0.0389, + 0.2921, 0.0230, 0.1537, 0.0203, -0.5847, -0.7670, 0.2777, 0.3962, + 0.0310, 0.3750, -0.4292, 1.8772, 0.0144, 0.2069, -0.6072, -0.7556, + 0.0070, 0.6241, 0.5252, -0.0226, 0.4671, 0.0726, -0.6137, 0.4737, + -0.8719, -0.3078, 0.7422, 0.0657, -0.3948, -0.3315, 0.4820, 0.4000, + -0.1885, -0.5593, -0.0326, 0.0303, -0.7097, 0.4710, -0.0080, 0.2596, + -0.5053, -0.2653, 0.0719, -0.2415, 0.7024, 0.0625, 0.0104, 0.8983, + -0.0749, -0.4544, -0.1307, 0.4815, -0.0946, 0.0246, -0.3972, -0.0282, + 0.3114, 0.5682, -0.4983, -0.3602, 0.7954, 0.1412, -0.0285, 0.4178, + -0.0331, 0.0314, 0.1608, -0.5503, 0.3445, -0.3405, 0.3910, 0.3342, + 0.0887, 0.7079, -0.2582, 0.5060, 0.2926, 0.2701, 0.3206, -0.1652, + 0.4704, -0.9332, -0.6158, -0.4013, -0.1978, 0.3000, 0.6680, 0.0541, + -0.2291, -0.1851, 0.2771, -0.1664, 0.2027, -0.5805, -0.2936, 0.0750, + -0.4917, -0.5222, 0.1597, -0.4634, 0.3040, 0.2883, -0.1047, 0.2459, + -0.5699, -1.0970, -0.1979, -0.0615, -0.3691, -0.1197, 0.1136, 0.3624, + 0.0045, 0.2218, -0.7172, 0.5502, 0.3994, -0.0446, -0.1522, -0.6184, + -0.3763, -0.8814, 0.1757, -0.1981, -0.1453, 0.7149, -0.0042, -0.4930, + 0.1774, -0.1074, 0.0255, 0.4448, -0.0075, -0.1348, 0.1764, -0.4782, + 0.4759, 0.3203, -0.3103, -0.3071, 0.5137, 0.1515, -0.2925, 0.4680, + -0.3368, -0.3806, 0.4335, -0.0727, 0.1921, -0.5856, -0.0663, 0.1296, + 0.3556, 0.2977, 0.4352, -0.3491, -0.4985, -0.2220, 0.7073, -0.0541, + -1.2134, 0.2054, 0.2669, 0.2849, 0.9239, -0.1697, -0.2411, 0.5358, + -0.8517, 0.4159, -0.4341, 0.3638, -0.2540, -0.0092, 0.3277, 0.6798, + -0.0468, 0.7107, -0.3865, 0.1758, 0.0242, -0.3292, -0.0701, 0.3912, + -0.8041, -0.4097, -0.0628, -0.3553, 0.4081, 0.0968, 0.1385, 0.5100, + 0.2941, 0.5431, -0.0992, -0.4025, 0.3482, -0.2689, 0.2528, -0.3012, + -0.1930, 0.5852, -0.2023, -0.6191, -0.3207, -0.2631, -0.0191, -0.0539, + -0.0853, -0.1048, -0.3848, 0.0635, 0.0174, 0.1541, 0.2229, -0.3462, + 0.7997, 0.4048, -0.5542, -0.4005, -0.2120, 0.1894, -0.5277, -0.4932, + -0.2205, 0.1349, -0.4992, -0.4048, 0.2936, -0.1750, 0.6371, -0.0344, + -0.1503, 0.4773, 0.0841, -0.1642, 0.2792, 0.3737, 1.0111, -1.7460, + 0.4022, 0.1431, -0.0319, 0.4733, 0.1269, -0.5802, 0.6673, 0.7738, + -0.2363, 0.4140, 0.1159, 0.1393, -0.2416, 0.2621, -0.5978, -0.5345, + -0.1574, 0.0450, 0.5317, 0.1276, -0.1231, 0.3749, 0.4681, -0.3119, + 0.4738, 0.0894, 0.0050, -0.7968, -0.0672, -0.0325, -0.2209, 0.5237, + 0.0687, -0.1099, -0.0316, 0.0336, -0.5300, -0.0200, -0.2776, -0.2014, + -0.9375, -0.5626, -0.0044, -0.0603, -0.2738, 0.3892, -0.3124, -0.0381, + -0.2990, 0.0306, -0.2100, 0.0666, -0.0415, -0.3901, 0.4862, 0.0913, + 0.2861, -0.3323, -0.4852, -0.3346, -0.2049, -0.8711, 0.1985, 0.0153, + 0.0071, 0.3348, 0.7854, -0.0110, 0.0555, -0.3576, 0.1926, 0.2818, + 0.2335, 0.0705, -0.2015, -0.1077, -0.0319, -0.1805, -0.0067, -0.1685, + -0.9881, -0.2477, 0.5524, 0.7451, 0.3463, -0.6136, -0.1202, 0.3726, + 0.3561, 0.2569, 0.0566, -0.0621, 0.0672, -0.3779, -0.2898, -0.2102, + -0.5451, 0.1628, -0.1314, 0.3358, 0.4334, -0.3215, 0.3202, 0.4060, + 0.3323, -0.1247, 0.2225, 0.1549, 0.0055, -0.9321, 0.2920, 0.2641, + -0.3068, 0.1259, -0.3560, -0.4065, 0.6489, -0.0242, 0.3904, -0.4408, + 0.5459, -0.2332, 0.3938, -0.2005, 0.0877, -0.6211, -0.8579, 1.0872, + 0.6445, 0.3743, -0.3435, -0.6151, -0.6444, 0.3134, -0.8510, -0.3456, + 0.5685, 0.7083, -0.1262, 0.2567, 0.4642, 0.1214, -0.8930, -0.1473, + 0.1031, -0.6386, -0.1372, 0.5740, -0.2368, -0.2888, 0.0044, -0.2496, + 0.1395, -0.0843, -0.5974, 0.3231, 0.0515, -0.4491, 0.7146, 0.9455, + 0.1059, -0.4779, -0.6536, 0.2079, 0.4047, -0.3084, 0.2931, 0.5544, + 0.1194, -0.2737, -0.1332, -0.3436, 0.6100, 0.0870, -0.0378, -0.0052, + 0.5198, -0.1086, -0.3978, 0.1642, 0.1202, 0.0767, 0.4188, 0.4866, + 0.3156, -0.3905, -0.2053, -0.5197, -0.5034, 0.5509, -0.1465, -1.6720, + 0.2816, 0.1294, -0.1098, 0.2624, -0.7129, 0.2074, 0.4927, 0.4706, + 0.0720, 0.2808, -0.1190, 0.6402, 0.1292, 0.0522, -0.1317, 0.5756, + 0.1856, 0.1908, 0.0235, 0.3120, 0.0203, 0.1948, -0.4117, 0.1373, + -0.5174, 0.1786, 0.4331, -0.3683, 0.1093, -0.5001, -0.0453, -0.9516, + -0.1138, -0.7882, -0.6547, -0.0519, -0.1056, 0.2221, 0.0606, -0.2394, + -0.3450, 0.2991, 0.4017, -0.2505, -0.0356, -0.1036, 0.0541, 0.1212, + 0.5994, 0.2916, -0.1002, 0.0949, -0.0342, 0.0201, 0.5530, -0.5057, + -0.3143, -0.7078, -0.0361, -0.3898, -0.1446, -0.7224, -0.1283, 0.0217, + 0.6097, -0.0934, 0.3045, -0.2684, 0.2040, 0.2234, 0.3884, -0.3300, + -0.4949, 0.9895, -0.7241, -0.3068, -0.3328, 0.5213, 0.0418, -0.0891, + 0.6599, -0.3454, 0.3083, 0.3161, -0.7353, -0.5203, 0.0900, 0.7819, + -0.4455, -0.2579, -0.1770, 0.5329, 0.0052, 0.1006, 0.1091, -0.1591, + 0.0947, 0.5937, -0.6986, -0.3535, -0.2971, -0.0830, 0.2296, -0.0583, + -0.5550, 0.0065, 0.4454, -0.2022, 0.0124, 0.5068, -0.7134, -0.0757, + -0.4578, -0.4611, -0.4076, -0.5882, -0.1275, 0.0725, 0.4444, 0.3409, + 0.2102, -0.1735, -0.5005, 0.2124, 0.2399, 0.0604, 0.3048, 0.2323, + -0.4227, -0.2303, -0.9566, -0.6571, -0.2358, 0.2445, 0.3673, -0.2596, + 0.7291, 0.0237, 0.0355, 0.5341, 0.6529, 0.7548, 0.0482, -0.8337, + -0.7353, 0.1694, -0.2487, 0.4523, -0.1397, -0.1567, 0.8041, 0.3133, + -0.5152, -0.9554, -0.3891, 0.1217, 0.3175, 0.0666, -0.1434, 0.2636])Parameter containing: +tensor([[ 0.0218, 0.0299, 0.0232, ..., 0.0067, -0.0137, 0.0305], + [-0.0040, -0.0037, 0.0077, ..., 0.0148, 0.0173, -0.0006], + [ 0.0255, 0.0141, 0.0116, ..., -0.0026, 0.0090, -0.0149], + ..., + [-0.0184, -0.0006, 0.0145, ..., -0.0108, 0.0039, -0.0072], + [ 0.0077, -0.0093, 0.0101, ..., 0.0183, 0.0176, -0.0251], + [-0.0074, 0.0018, -0.0270, ..., -0.0280, -0.0008, 0.0160]])Parameter containing: +tensor([-0.3987, 0.2491, 0.1892, ..., 0.0093, 0.0336, -0.0070])Parameter containing: +tensor([[ 0.0156, 0.0023, -0.0103, ..., -0.0124, 0.0080, -0.0077], + [-0.0054, 0.0075, -0.0130, ..., -0.0132, -0.0064, 0.0061], + [-0.0258, 0.0146, -0.0273, ..., 0.0131, 0.0164, 0.0133], + ..., + [-0.0032, 0.0077, 0.0094, ..., 0.0011, -0.0049, -0.0260], + [-0.0200, 0.0107, -0.0099, ..., -0.0227, -0.0027, 0.0055], + [-0.0254, 0.0017, 0.0047, ..., -0.0115, -0.0037, 0.0176]])Parameter containing: +tensor([-7.0679e-02, -4.5776e-02, 9.1003e-02, -9.2896e-02, -1.0553e-01, + -1.0181e-01, -2.5803e-02, -1.9501e-02, 9.9060e-02, -5.9784e-02, + -9.1782e-03, 9.3689e-02, 3.3569e-02, 3.6102e-02, 2.7451e-02, + 1.0675e-01, -2.7191e-02, -5.3497e-02, 1.2787e-02, -1.2189e-01, + -3.2837e-02, 2.3331e-02, 5.8624e-02, 3.2562e-02, 4.0222e-02, + 4.9957e-02, -4.8889e-02, -1.0880e-02, 6.6757e-03, 8.3069e-02, + 3.0701e-02, -5.0720e-02, -2.2461e-02, -2.9907e-02, 3.4668e-02, + -6.8115e-02, -6.2500e-02, 6.7017e-02, 1.5381e-01, -4.2480e-02, + -2.7969e-02, 3.5065e-02, 4.3182e-02, -4.9164e-02, -5.0659e-02, + -5.5298e-02, 4.4159e-02, -7.7087e-02, -3.0991e-02, 3.7048e-02, + -2.8351e-02, -1.1823e-01, 2.7409e-03, -3.3997e-02, 1.4296e-03, + -2.3537e-03, 8.4778e-02, -2.4139e-02, -9.0179e-03, 7.6790e-03, + -6.6490e-03, 3.6346e-02, -2.0126e-02, 2.2537e-02, -4.6997e-03, + 7.3792e-02, -8.8745e-02, -2.4490e-02, -1.2347e-01, -1.2634e-01, + 2.9633e-02, -5.7564e-03, -2.0911e-01, -8.7036e-02, -2.5528e-02, + -5.9082e-02, 3.6865e-02, 6.3965e-02, -7.2632e-03, 7.9834e-02, + 4.2664e-02, 6.3232e-02, 3.9459e-02, -5.0690e-02, -8.1726e-02, + -5.8594e-02, 1.1548e-01, -4.0131e-02, 3.0319e-02, -9.5337e-02, + -7.4524e-02, -7.6843e-02, -5.9021e-02, 9.4055e-02, 2.3514e-02, + -2.0996e-02, 1.3054e-02, -7.8247e-02, -9.8022e-02, 1.9202e-01, + 5.8594e-02, -5.3040e-02, -1.1823e-01, 1.5015e-02, 1.7075e-02, + -4.1718e-02, -1.9974e-02, -2.4048e-02, 3.1525e-02, -3.2898e-02, + 3.1113e-02, 9.5459e-02, -1.1664e-01, -5.0240e-03, -5.0232e-02, + 1.6508e-03, 6.4453e-02, 4.3365e-02, -2.9495e-02, 9.2407e-02, + -3.8086e-02, 1.9445e-03, 2.4414e-02, 9.6069e-02, 7.1106e-02, + -2.1118e-02, 1.0645e-01, 9.8877e-02, 4.7058e-02, 2.4948e-02, + 3.3783e-02, -5.2826e-02, 1.4795e-01, 1.4429e-01, 2.7863e-02, + 7.3608e-02, 3.7670e-03, -1.4319e-01, 8.0566e-02, -2.8549e-02, + 1.1816e-03, 9.8145e-02, 2.5558e-02, 6.2195e-02, -1.2047e-02, + 5.1819e-02, -2.8744e-03, -1.2085e-01, -6.2988e-02, -8.5388e-02, + -3.7994e-02, 1.3962e-02, 5.8136e-02, -7.4158e-02, 1.6479e-02, + 1.1902e-01, 6.0303e-02, -3.8879e-02, -7.4951e-02, -8.5693e-02, + -3.5400e-02, 3.5919e-02, -2.8793e-02, -7.5607e-03, 5.2643e-02, + 5.1361e-02, 1.1829e-01, -6.7810e-02, 5.5786e-02, -5.8502e-02, + 1.4478e-01, -8.7830e-02, -3.9062e-02, 1.3940e-01, -1.3527e-02, + 1.5236e-02, -2.0996e-02, -2.1057e-02, 3.6255e-02, 6.7902e-04, + -7.9163e-02, -4.9629e-03, 2.2064e-02, 7.5562e-02, -1.0864e-01, + 1.3062e-01, -4.4189e-02, -5.3076e-01, 7.3792e-02, 5.9814e-02, + -1.1389e-01, -3.5919e-02, -4.1199e-02, 5.3528e-02, 3.6255e-02, + -5.6801e-03, 3.1952e-02, 4.8248e-02, -1.8665e-01, 3.3813e-02, + 4.9408e-02, -8.1558e-03, -3.0151e-02, -1.5857e-01, -3.2135e-02, + -2.4261e-03, 1.2337e-02, -3.3936e-02, -4.2603e-02, -1.1481e-01, + -1.0529e-02, 3.8886e-04, -7.4707e-02, 6.2927e-02, 7.0190e-02, + -8.8501e-02, 6.1066e-02, -7.7248e-04, 2.6871e-02, 1.0718e-01, + 5.8838e-02, 9.2102e-02, -1.8555e-02, 4.2358e-02, 2.3666e-02, + 1.4282e-01, -7.4997e-03, 3.2043e-02, 6.7749e-02, 4.3060e-02, + -6.9641e-02, -1.2589e-02, 9.8022e-02, -4.2053e-02, -2.5085e-02, + -6.2439e-02, 1.0425e-01, -7.3608e-02, -7.2449e-02, 5.0262e-02, + -1.2352e-02, 1.0391e-02, -1.8723e-02, 2.0737e-02, -9.9243e-02, + -8.9294e-02, -1.3684e-01, 1.4172e-01, -2.3300e-02, 3.2745e-02, + -8.8654e-03, 3.0685e-02, 3.9764e-02, 3.5309e-02, -4.6234e-02, + -2.1149e-02, 4.3274e-02, -4.4067e-02, -5.4352e-02, -8.4290e-02, + 2.7222e-02, -8.1421e-02, 5.6183e-02, 6.7017e-02, 3.0563e-02, + -1.0422e-02, 6.9092e-02, -5.5115e-02, 1.2091e-01, -3.0243e-02, + -8.4915e-03, -2.3682e-02, -6.2256e-02, -2.4628e-02, 4.7821e-02, + -4.0558e-02, -1.2445e-01, 7.6050e-02, -6.8420e-02, 5.0446e-02, + -1.0040e-01, -1.0150e-01, 1.3771e-02, 7.6904e-02, -1.0229e-01, + 6.4331e-02, -3.3173e-02, -3.2837e-02, 1.0582e-02, 9.0332e-02, + -1.3123e-02, -1.7441e-02, 5.4993e-02, -8.1848e-02, -1.4786e-02, + -8.4900e-02, 7.5874e-03, -1.2466e-02, -3.9246e-02, -7.0557e-02, + -4.0375e-02, -9.9060e-02, -5.7709e-02, -1.0333e-01, 4.7516e-02, + 7.1899e-02, -9.7046e-02, 1.2421e-01, -6.1462e-02, -5.6061e-02, + 4.9774e-02, -3.4271e-02, 1.1642e-02, 1.8448e-02, 3.9825e-02, + -4.1595e-02, 5.0964e-02, 5.9143e-02, -8.1665e-02, 6.6040e-02, + -1.4641e-02, -3.8696e-02, 1.3565e-02, -3.2318e-02, 1.0596e+00, + -3.0182e-02, -9.3933e-02, 7.8491e-02, -2.4429e-02, 6.4331e-02, + -5.5817e-02, 2.8839e-02, -2.0096e-02, -2.3560e-02, 4.5441e-02, + -7.0007e-02, -1.2067e-01, 7.7698e-02, -1.1734e-02, 1.5900e-02, + 6.6101e-02, -3.7964e-02, -6.2439e-02, 5.0079e-02, -6.2347e-02, + 7.5867e-02, -1.2903e-01, -6.0425e-02, -1.4717e-02, 2.4750e-02, + 8.2703e-02, -4.2084e-02, -3.1319e-03, -9.4238e-02, 4.2175e-02, + 1.9730e-02, -8.8074e-02, 5.4359e-03, -3.2288e-02, 7.5806e-02, + -2.3331e-02, 4.3915e-02, 4.5868e-02, -1.6342e-02, 1.1243e-01, + -1.0236e-01, -1.3298e-02, -1.0358e-01, 9.4299e-03, -7.5439e-02, + -7.3242e-02, 6.2195e-02, 1.0040e-01, 2.8320e-02, -2.3926e-02, + -3.8696e-02, -5.5542e-03, 2.3041e-02, -5.6702e-02, -6.7322e-02, + 5.0720e-02, -7.1045e-02, -8.0994e-02, -1.0010e-01, 5.1483e-02, + -8.4839e-02, 5.9662e-02, 1.0419e-01, 6.5002e-02, -1.2711e-02, + 7.4646e-02, -1.2207e-01, 3.7170e-02, 4.0497e-02, -1.7624e-02, + -2.0355e-02, -6.6406e-02, -5.2887e-02, 4.9782e-03, 5.8350e-02, + -5.6641e-02, 5.7526e-02, -3.4393e-02, -6.4453e-02, 4.7150e-02, + 2.0538e-02, 5.0598e-02, -1.2779e-02, -7.1533e-02, 1.0449e-01, + -1.6251e-02, -8.4412e-02, 7.5195e-02, 5.9601e-02, 6.1310e-02, + 2.2903e-02, -2.9999e-02, -3.6041e-02, 6.2469e-02, 5.2399e-02, + 7.8247e-02, 3.4546e-02, -4.0344e-02, 2.4460e-02, 7.3910e-04, + 7.0984e-02, 9.1003e-02, 7.1640e-03, -2.1863e-01, 3.0319e-02, + -3.5057e-03, -1.6678e-02, -1.0938e-01, -1.0541e-01, -1.1330e-02, + -1.3440e-01, 7.4692e-03, 6.9336e-02, -1.3794e-01, -2.7069e-02, + 1.2329e-02, 7.8247e-02, -6.4011e-03, -2.4475e-02, -8.3740e-02, + 1.9165e-02, 8.7585e-02, 9.7229e-02, -2.7100e-02, -8.3618e-03, + -2.2491e-02, -2.1179e-02, 1.0597e-02, -1.0992e-01, -6.5979e-02, + -6.6650e-02, 5.6244e-02, 3.7109e-02, 1.0767e-01, 9.8938e-02, + -6.0822e-02, -3.7079e-02, -1.0614e-03, 4.0412e-04, -3.8300e-02, + 1.0895e-01, -1.0541e-01, 1.1792e-01, 2.0264e-02, 6.9284e-04, + 4.5929e-02, 5.2910e-03, -5.2490e-02, 4.3701e-02, 5.1880e-02, + 7.4615e-03, -9.7885e-03, -3.8330e-02, -1.2659e-01, -8.4656e-02, + -1.2488e-01, 5.0110e-02, 2.5848e-02, -4.6043e-03, 4.7913e-02, + -6.5857e-02, 5.9387e-02, 3.1830e-02, 1.8906e-02, 7.4291e-04, + -1.1169e-02, -1.8936e-02, 1.2366e-01, 7.6294e-02, 3.8849e-02, + -1.0938e-01, 3.0029e-02, -5.6152e-02, 1.4809e-02, -6.6345e-02, + 1.0468e-01, 3.8574e-02, 7.0992e-03, -1.4160e-02, 9.8572e-02, + 8.5266e-02, -5.5328e-02, 7.7148e-02, -1.0376e-02, 4.1046e-03, + -1.2915e-01, 6.7871e-02, 9.3231e-03, -1.0138e-01, 1.3443e-02, + 4.5013e-02, -3.8385e-05, -3.0212e-02, -1.8784e-02, 1.2354e-01, + 6.4819e-02, 3.2310e-03, -5.0873e-02, -4.9095e-03, -5.7465e-02, + 3.1525e-02, -2.1057e-03, 5.0720e-02, -4.4495e-02, -1.9178e-03, + 5.1392e-02, 6.4514e-02, -9.3155e-03, 1.0876e-01, -4.2305e-03, + -1.2292e-01, 9.3994e-02, -1.7920e-03, 4.2694e-02, 1.6922e-02, + 4.1565e-02, -3.9764e-02, -5.7770e-02, 1.2360e-02, 7.4097e-02, + 8.1909e-02, -4.5166e-02, -5.3833e-02, -7.1869e-03, -9.4528e-03, + 4.8981e-02, -2.7710e-02, 1.0260e-01, -1.3412e-02, 1.1505e-02, + 6.5247e-02, 1.0474e-01, -2.1515e-02, -3.8239e-02, -1.0846e-01, + 5.4108e-02, -2.1985e-01, -3.7720e-02, 4.5532e-02, -8.8135e-02, + -2.5528e-02, -1.0339e-01, -7.7393e-02, -1.1940e-03, -5.2185e-02, + -6.1951e-02, 2.1973e-02, 3.8452e-02, -4.7058e-02, 2.1606e-02, + 2.3384e-03, 1.9165e-02, -2.5101e-02, -3.4466e-03, 9.0942e-02, + 1.3977e-01, -1.2122e-01, -8.9294e-02, 9.1370e-02, 1.8951e-02, + -8.0185e-03, 1.6144e-02, -1.4145e-02, 6.8848e-02, -1.4801e-02, + -3.5522e-02, -4.9561e-02, 2.6398e-02, -1.1346e-01, -3.8662e-03, + 1.0541e-01, -1.7624e-02, -2.3285e-02, 7.0251e-02, 6.9542e-03, + 3.5156e-02, 1.2573e-01, -4.9438e-02, -7.5317e-02, -3.8971e-02, + -6.6223e-02, -2.0905e-02, -6.0944e-02, -6.0944e-02, 1.3611e-01, + 3.0319e-02, 7.3853e-02, -7.8812e-03, 7.2327e-02, 3.3951e-03, + 9.3140e-02, 1.0571e-01, 3.3150e-03, -3.7262e-02, 1.6083e-02, + 7.7705e-03, 3.0014e-02, -1.8890e-02, 7.6599e-02, -5.6793e-02, + 6.9336e-02, 5.2399e-02, 3.0533e-02, -3.6560e-02, -1.0094e-02, + -5.7037e-02, 1.3550e-01, 4.0497e-02, -5.1819e-02, -1.9897e-02, + -6.3965e-02, 1.9617e-01, -1.3374e-02, -7.1350e-02, 2.0462e-02, + 4.1779e-02, -9.9060e-02, -8.6670e-02, 4.7607e-02, -1.2292e-01, + 7.4402e-02, -9.5764e-02, -1.0065e-01, 4.4739e-02, -3.7598e-02, + -1.0968e-01, -1.5881e-01, 1.7334e-02, -9.2651e-02, 1.0797e-01, + 9.8267e-02, 4.5052e-03, -2.0615e-02, -7.7393e-02, -3.9635e-03, + 5.2277e-02, 6.7932e-02, 1.3293e-01, 1.8339e-03, -7.7454e-02, + -2.6428e-02, 1.0175e-01, 5.3864e-02, -5.4901e-02, -5.7068e-03, + -7.3486e-02, -8.3679e-02, -4.8523e-02, 4.5258e-02, 2.5436e-02, + -5.6793e-02, 4.0558e-02, 6.2744e-02, 5.5817e-02, 5.1941e-02, + -6.1279e-02, 2.9028e-01, -1.9169e-03, -8.4229e-02, -4.5868e-02, + 3.0945e-02, 6.5552e-02, 4.5898e-02, 6.6589e-02, -4.1199e-02, + -6.7444e-02, 3.7804e-03, 1.2665e-02, 1.0422e-02, -9.9976e-02, + -2.8168e-02, 3.8586e-03, -2.4567e-02, -9.8328e-02, 3.9581e-02, + 7.5562e-02, -7.6180e-03, 2.4734e-02, 3.1235e-02, -7.0801e-02, + 1.0535e-01, -1.2085e-02, 7.4844e-03, 1.0841e-02, -1.1749e-01, + 5.3467e-02, 1.4549e-02, -5.8289e-02, -1.5213e-02, 1.1957e-01, + 9.1125e-02, 1.3904e-01, 1.0822e-01, -4.1321e-02, 5.4565e-02, + 8.5083e-02, -7.4341e-02, 1.5030e-02, -3.3478e-02, -2.0203e-02, + 2.5604e-02, 3.6987e-02, -1.3008e-02, 6.3232e-02, -7.7454e-02, + -1.4111e-01, -8.8074e-02, -3.7506e-02, -4.3671e-02, 8.7402e-02, + 4.4617e-02, 4.7424e-02, -1.4565e-02, -1.0992e-01, 1.0181e-01, + -9.0637e-02, 1.2854e-01, 6.5369e-02, 2.7649e-02, -2.2308e-02, + -2.0157e-02, 6.4331e-02, -3.9062e-03, 2.3636e-02, 2.3666e-02, + 1.4148e-01, -2.2064e-02, 2.6810e-02, -1.4656e-02, -6.7688e-02, + -4.1504e-02, -2.7832e-02, -4.6509e-02, 4.8615e-02, 6.4087e-02, + 9.7504e-03, -1.4923e-02, -2.2110e-02, 8.0444e-02, -2.2034e-02, + -2.3918e-03, -3.1235e-02, 2.3834e-02])Parameter containing: +tensor([1.6302, 1.7985, 1.6646, 1.6736, 1.6246, 1.7435, 1.6344, 1.5951, 1.7835, + 1.7151, 1.7555, 1.7396, 1.7075, 1.6981, 1.6234, 1.7331, 1.7033, 1.7175, + 1.6565, 0.4716, 1.7354, 1.6599, 1.7952, 1.7343, 1.7564, 1.7225, 1.7034, + 1.7112, 1.7357, 1.6902, 1.7430, 1.6893, 1.7471, 1.7559, 1.6642, 1.6995, + 1.8257, 1.6706, 1.7690, 1.5814, 1.7597, 1.6698, 1.7122, 1.8090, 1.8079, + 1.6904, 1.7110, 1.7207, 1.6608, 1.6236, 1.7413, 1.7274, 1.7435, 1.7000, + 1.8054, 1.8643, 1.7005, 1.6365, 1.7551, 1.7265, 1.7312, 1.6578, 1.7392, + 1.7119, 1.6265, 1.6944, 1.6835, 1.6661, 1.7137, 1.6735, 1.7496, 1.7183, + 1.8942, 1.6574, 1.7160, 1.6588, 1.6898, 1.6397, 1.6588, 1.7767, 1.7230, + 1.7033, 1.7254, 1.6983, 1.6907, 1.6416, 1.7969, 1.7425, 1.7828, 1.7353, + 1.7485, 1.6252, 1.7266, 1.7510, 1.7018, 1.6580, 1.7274, 1.6662, 1.6723, + 1.6758, 1.6462, 1.8060, 1.6850, 1.6715, 1.5962, 1.8119, 1.7262, 1.6277, + 1.8163, 1.7033, 1.6744, 1.7379, 1.6901, 1.7661, 1.6420, 1.7525, 1.6696, + 1.6702, 1.7037, 1.6573, 1.7341, 1.7748, 1.6981, 1.6995, 1.7189, 1.6223, + 1.6765, 1.7105, 1.7716, 1.7087, 1.7001, 1.7687, 1.8099, 1.7073, 1.6350, + 1.7793, 1.6807, 1.7039, 1.6787, 1.6660, 1.7035, 1.7318, 1.7495, 1.7426, + 1.7035, 1.7014, 1.7568, 1.7150, 1.7569, 1.7416, 1.6580, 1.7700, 1.7185, + 1.7293, 1.7545, 1.4643, 1.7184, 1.7745, 1.7545, 1.7200, 1.6514, 1.6298, + 1.6485, 1.7160, 1.6818, 1.6670, 1.7770, 1.7118, 1.7166, 1.7775, 1.7387, + 1.6928, 1.6731, 1.6660, 1.7970, 1.7641, 1.7415, 1.6976, 1.6877, 1.7292, + 1.7794, 1.6900, 1.7106, 1.7634, 1.7742, 1.5609, 1.6714, 2.4107, 1.7411, + 1.7388, 1.7277, 1.6532, 1.7926, 1.7386, 1.7353, 1.7197, 1.6594, 1.7166, + 2.9263, 1.6523, 1.6338, 1.6537, 1.6623, 1.8019, 1.7601, 1.7041, 1.5999, + 1.6747, 1.6999, 1.7111, 1.7555, 1.6907, 1.7676, 1.6870, 1.7656, 1.6681, + 1.6671, 1.7032, 1.6807, 1.6889, 1.7178, 1.7326, 1.6222, 1.6898, 1.7453, + 1.6968, 1.7163, 1.7438, 1.7356, 1.7753, 1.7192, 1.6955, 1.7155, 1.6026, + 1.7237, 1.7517, 1.6404, 1.6831, 1.6877, 1.7579, 1.7780, 1.6317, 1.8032, + 1.6840, 1.6494, 1.7386, 1.6764, 1.7246, 1.7601, 1.7989, 1.7198, 1.7053, + 1.7228, 1.7097, 1.8476, 1.6812, 1.6919, 1.7276, 1.6427, 1.6771, 1.6796, + 1.6661, 1.7377, 1.7073, 1.7061, 1.6469, 1.7119, 1.6620, 1.7859, 1.6810, + 1.6796, 1.7163, 1.6304, 1.6605, 1.7343, 1.6836, 1.6586, 1.6460, 1.7383, + 1.6565, 1.6995, 1.6848, 1.6886, 1.7302, 1.6319, 1.7084, 1.6455, 1.6820, + 1.7227, 1.6099, 1.6801, 1.7885, 1.7595, 1.7140, 1.7157, 1.8043, 1.6373, + 1.7648, 1.7183, 1.7589, 1.7482, 1.7072, 1.6913, 1.7318, 1.7578, 1.7108, + 1.6067, 1.7158, 1.7523, 1.6791, 1.7145, 1.7314, 1.7042, 1.7376, 1.6796, + 1.8628, 1.7177, 1.7367, 1.6759, 1.7337, 1.7052, 1.6002, 1.7024, 1.7880, + 1.0642, 1.6845, 1.7041, 1.6755, 1.6165, 1.7929, 1.6193, 1.6957, 1.6715, + 1.5950, 1.6571, 1.6822, 1.6901, 1.7022, 1.7024, 1.7112, 1.6735, 1.6967, + 1.6778, 1.6575, 1.7305, 1.8147, 1.6804, 1.6963, 1.6868, 1.7943, 1.5572, + 1.6860, 1.7039, 1.7471, 1.6538, 1.6997, 1.7835, 1.8064, 1.7628, 1.7780, + 1.6053, 1.6869, 1.7610, 1.7246, 1.7329, 1.7138, 1.6687, 1.6806, 1.6728, + 1.6817, 1.7294, 1.7214, 1.7234, 1.7492, 1.6789, 1.7357, 1.7378, 1.7139, + 1.7068, 1.7499, 1.6821, 1.7469, 1.6772, 1.6623, 1.6863, 1.7077, 1.6838, + 1.6427, 1.6845, 1.6781, 1.7001, 1.6867, 1.6507, 1.7201, 1.7097, 1.7876, + 1.6753, 1.7640, 1.7407, 1.7754, 1.6692, 1.7506, 1.7908, 1.7180, 1.7826, + 1.7048, 1.7631, 1.7375, 1.7276, 1.6960, 1.6728, 1.6715, 1.6916, 1.6660, + 1.8672, 2.4371, 1.7775, 1.7589, 1.6740, 1.6990, 1.6844, 1.7150, 1.6511, + 1.7197, 1.7364, 1.6682, 1.6788, 1.7742, 1.7689, 1.6509, 1.7058, 1.5937, + 1.6183, 1.7320, 1.6366, 1.7162, 1.6393, 1.6547, 1.6122, 1.6934, 1.7644, + 1.7195, 1.7261, 1.5334, 1.6655, 1.7531, 1.6899, 1.7284, 1.7253, 1.7140, + 1.7599, 1.7455, 1.6877, 1.7144, 1.7063, 1.7585, 1.6483, 1.7034, 1.7872, + 1.6436, 1.6991, 1.7315, 1.6869, 1.7266, 1.6941, 1.6680, 1.7382, 1.7115, + 1.6838, 1.7406, 1.7487, 1.7284, 1.6998, 1.7120, 1.7665, 1.7601, 1.7432, + 1.6692, 1.6749, 1.7034, 1.7852, 1.6845, 1.7553, 1.6064, 1.6902, 1.6817, + 1.7528, 1.7495, 1.6978, 1.7445, 1.7844, 1.7044, 1.6215, 1.8064, 1.6947, + 1.7256, 1.7462, 1.6445, 1.7153, 1.7068, 1.6898, 1.7033, 1.6971, 1.6012, + 1.7222, 1.6898, 1.6337, 1.6189, 1.7169, 1.6581, 1.6849, 1.7840, 1.7095, + 1.7210, 1.6652, 1.6866, 1.7299, 1.7457, 1.6161, 1.6876, 1.6810, 1.6921, + 1.7402, 1.6973, 1.6989, 1.7322, 1.6970, 1.6480, 1.6529, 1.7896, 1.7318, + 1.7364, 1.6680, 1.7424, 1.7295, 1.6991, 1.7154, 1.7562, 1.7087, 1.8444, + 1.7505, 1.7476, 1.6686, 1.7369, 1.7072, 1.7248, 1.6937, 1.7070, 1.6945, + 1.7787, 1.7125, 1.7263, 1.7831, 1.6013, 1.7178, 1.6786, 1.7872, 1.6886, + 1.8013, 1.6767, 1.8089, 1.5642, 1.7826, 1.7168, 1.7179, 1.7068, 1.7395, + 1.6109, 1.8325, 1.7497, 1.7236, 1.6414, 1.7245, 1.6947, 1.7587, 1.6420, + 1.6678, 1.7057, 1.6923, 1.7012, 1.6248, 1.7218, 1.7242, 1.6999, 1.6844, + 1.7597, 1.7158, 1.7671, 1.7628, 1.7141, 1.7269, 1.7585, 1.7083, 1.7444, + 1.7899, 1.7588, 1.7001, 1.7858, 1.6202, 1.7109, 1.7387, 1.7646, 1.8021, + 1.6163, 1.7132, 1.7312, 1.8329, 1.9605, 1.6433, 1.7724, 1.7457, 1.6874, + 1.7289, 1.7210, 1.7764, 1.7046, 1.6804, 1.7570, 1.7835, 1.6932, 1.7622, + 1.7469, 1.6904, 1.6917, 1.6915, 1.7520, 1.7008, 1.6287, 1.7597, 1.6486, + 1.6401, 1.7387, 1.6101, 1.5982, 1.7633, 1.7166, 1.7213, 1.7841, 1.6996, + 1.6279, 1.6920, 1.7200, 1.7078, 1.6533, 1.5925, 1.7321, 1.7587, 1.6523, + 1.7181, 1.6851, 1.7170, 1.7081, 1.7212, 1.7277, 1.7332, 1.8115, 1.6485, + 1.7706, 1.6613, 1.6623, 1.6581, 1.7324, 1.6186, 1.7720, 1.6911, 1.7587, + 1.7309, 1.6524, 1.7619, 1.6940, 1.6903, 1.7228, 1.7862, 1.6758, 1.6580, + 1.6437, 1.7973, 1.6737, 1.7409, 1.6853, 1.7406, 0.3935, 1.6961, 1.6231, + 1.7050, 1.5714, 1.6899, 1.7399, 1.6476, 1.6983, 1.7059, 1.6509, 1.7344, + 1.6900, 1.7586, 1.6770, 1.6974, 1.6979, 1.6864, 1.7182, 1.7086, 1.6912, + 1.7265, 1.7103, 1.7262, 1.7304, 1.6552, 1.6509, 1.6989, 1.7162, 1.7130, + 1.7572, 1.7408, 1.7483, 1.6416, 1.6755, 1.7099, 1.6839, 1.7110, 1.6536, + 1.6486, 1.7170, 1.6966, 1.6698, 1.7294, 1.7431, 1.5646, 1.7819, 1.7805, + 1.7427, 1.7266, 1.7108, 1.7271, 1.6740, 1.6893, 1.7412, 1.6017, 1.7220, + 1.7766, 1.7968, 1.7975, 1.7024, 1.7743, 1.6766, 1.6991, 1.7311, 1.7651, + 1.7055, 1.6984, 1.7207, 1.6710, 1.6696, 1.7045, 1.8250, 1.7300, 1.6860, + 1.7420, 1.7058, 1.7018, 1.7314, 1.8237, 1.7121, 1.6656, 1.6345, 1.6813, + 1.6482, 1.6091, 1.6466])Parameter containing: +tensor([ 1.3478e-01, 4.9497e-02, -1.9917e-01, -2.5184e-02, 1.8369e-01, + 1.4607e-01, 1.4084e-01, 6.8528e-02, -1.5947e-01, -6.2207e-02, + 7.6121e-02, 3.2098e-03, -1.8940e-01, -4.3906e-02, -1.2922e-01, + -4.1989e-02, -2.0775e-02, -6.5565e-03, 7.1063e-05, 4.6815e+00, + -3.8654e-02, 1.2217e-01, 4.5751e-02, 1.4584e-02, -3.0041e-02, + -1.6267e-02, 6.6473e-02, 1.1673e-02, 6.2238e-02, -2.7864e-02, + 6.8211e-02, 4.2821e-02, -2.0303e-02, 1.4535e-01, 7.2629e-02, + -9.8829e-02, 1.5733e-02, 1.1677e-02, -1.0973e-01, 1.0373e-01, + 5.3048e-02, 4.4230e-02, -1.5395e-02, 2.9456e-02, -2.1916e-02, + -1.3948e-02, 7.3127e-02, -2.4933e-02, 8.8659e-02, 9.9079e-02, + -5.0301e-02, -5.6772e-02, 7.0892e-02, -7.0100e-02, 2.1891e-02, + 1.0014e-01, -4.8898e-02, -1.2617e-01, 4.7462e-02, 5.8843e-02, + 7.5138e-02, -7.0855e-02, 8.9971e-02, 3.2970e-02, 8.8290e-02, + 5.7166e-03, 1.2110e-01, 1.0937e-01, -2.0420e-02, 2.1111e-01, + 1.3070e-01, 9.3519e-02, 7.5941e-01, 1.3822e-01, -1.2845e-01, + 7.7136e-02, 5.3268e-02, -1.1566e-01, -9.7905e-03, 6.5708e-02, + -5.1334e-04, 1.6957e-02, -7.0916e-02, 1.5199e-01, -1.2595e-02, + -4.9605e-02, 1.3342e-01, 1.0708e-01, -3.1504e-02, 5.3242e-03, + -2.3024e-02, 2.6981e-02, 4.4238e-02, -6.3475e-02, -1.1366e-01, + -2.3226e-02, 3.2116e-02, -1.0859e-01, -1.1022e-01, 3.6969e-02, + 3.9728e-02, -7.8961e-02, -6.6003e-02, -3.1561e-02, 6.1650e-02, + 8.0293e-03, 6.8628e-02, -4.7751e-02, 6.1943e-04, -4.6869e-02, + -6.5288e-02, -4.3023e-02, -5.4249e-02, 5.9192e-02, 1.2167e-01, + -5.0382e-02, -5.4665e-02, -1.0046e-02, 1.7697e-02, 1.4857e-01, + -6.6598e-02, -2.7876e-01, -1.4727e-01, 2.0667e-02, -1.5421e-01, + -4.0336e-02, -1.7894e-01, 7.5444e-02, 2.9734e-02, -6.8246e-02, + -1.3805e-01, 2.5628e-02, -5.8404e-02, -1.9855e-02, -1.0818e-01, + 5.1029e-02, -2.3740e-02, -2.3311e-02, -4.2155e-02, -6.1693e-02, + -8.1013e-04, 2.0931e-02, 2.0602e-02, -3.5260e-02, 1.0996e-01, + -3.2859e-02, -1.4331e-02, -3.7377e-03, -1.2704e-01, 1.7471e-01, + 2.6021e-01, -8.9699e-02, -1.3109e-01, -1.8212e-02, -3.0574e-02, + -5.7635e-01, 1.9360e-02, -5.0405e-03, -3.5920e-02, 4.8079e-02, + 1.9668e-01, -1.7959e-01, -2.4585e-02, 2.3465e-02, 1.3495e-02, + 2.6305e-02, -4.9910e-02, 7.2364e-02, -3.3804e-02, 4.1981e-02, + -3.9400e-02, 9.3760e-02, 1.3973e-03, -1.1626e-03, 7.6308e-02, + -1.9463e-01, -4.3892e-02, -1.6021e-02, -1.9729e-02, 6.7893e-02, + -6.5787e-02, -5.7712e-02, 1.4709e-01, 6.0649e-02, 4.0690e-02, + -1.8589e-01, 4.2896e-02, 7.1578e-01, -7.9182e-02, -8.3033e-02, + -8.5046e-02, -1.4351e-01, 9.7572e-02, 1.8969e-01, -8.5853e-02, + 1.7470e-02, -3.4782e-02, -1.6035e-01, 3.4463e-01, 6.4275e-02, + -2.2809e-01, -5.6076e-02, -2.3670e-02, 8.6391e-02, 4.4294e-02, + 1.7742e-01, -1.4438e-01, 1.0222e-01, 1.5188e-01, -6.5791e-02, + 4.2606e-02, 2.7326e-01, -3.9982e-02, -6.7293e-02, -2.9540e-02, + 2.9845e-02, -4.9459e-02, 4.1518e-02, -1.2998e-01, -2.7670e-01, + 5.9885e-03, -2.6303e-02, 1.0029e-01, 6.5153e-02, 2.8001e-03, + -2.1397e-02, 8.1904e-02, 1.3961e-02, 1.6888e-01, -4.7473e-02, + -2.2614e-02, 7.6486e-02, 1.7701e-03, 7.7329e-02, -1.2749e-02, + 1.8841e-02, -1.0814e-02, 9.6482e-02, 7.9134e-02, 1.1832e-02, + 9.7106e-02, 1.4173e-01, -3.1437e-02, 7.6158e-02, 2.8705e-02, + 9.3654e-03, 5.6769e-02, -3.9432e-02, 8.9598e-03, 3.7227e-02, + -2.9699e-02, 2.5160e-02, 1.2401e-01, -1.3756e-01, 1.6073e-01, + -7.9136e-02, -1.2519e-02, -1.4151e-01, 5.5290e-02, 2.9537e-02, + -6.5784e-02, -1.6376e-02, -9.2159e-03, -4.8453e-03, -5.9228e-02, + 4.9723e-02, -3.6096e-02, -1.2492e-02, -6.2044e-02, -7.8530e-02, + -6.7506e-03, 5.7670e-02, 9.7309e-02, -1.9286e-01, -2.4384e-02, + 8.4430e-02, -8.3274e-02, 1.8163e-02, 9.2198e-02, -1.4828e-01, + 1.1023e-01, 2.1166e-01, -5.7077e-02, 7.9818e-02, 3.2513e-02, + 7.4148e-02, 6.8250e-02, -1.0043e-02, 2.2324e-01, -1.5617e-01, + -9.4417e-02, 1.2959e-02, 1.2156e-01, 5.6972e-02, 9.1363e-02, + -5.3003e-02, 2.4945e-01, -3.0141e-02, 1.0665e-01, 6.6269e-02, + 5.7760e-02, 8.7219e-02, -2.8721e-02, 1.3285e-01, 1.1745e-02, + 2.0377e-02, 4.3067e-02, -9.6992e-02, 3.5457e-02, -7.1593e-02, + 8.9201e-02, 3.3648e-02, -4.9995e-02, -8.9329e-02, 1.1568e-02, + -1.2160e-02, 2.4013e-02, 1.0378e-01, 2.1693e-01, -2.2300e-02, + -1.3594e-01, 2.4443e-01, -8.2670e-02, -3.2104e-02, -8.0759e-01, + -2.5522e-01, 3.1634e-02, 1.2173e-02, 5.3848e-02, -1.6714e-01, + 2.2496e-01, -2.5352e-02, 1.1204e-01, -6.9995e-02, 1.4671e-01, + 7.5391e-02, 1.2656e-02, -7.6575e-04, -1.0763e-02, -3.7951e-02, + -8.5652e-02, 1.6227e-03, -1.0500e-01, -6.3366e-02, 1.4564e-01, + 2.8785e-02, 7.8663e-02, 1.4579e-01, 5.0819e-02, -4.8727e-02, + -6.2212e-02, 8.5918e-02, 1.3938e-02, 1.6487e-01, -5.1246e-02, + -7.7145e-02, -6.3459e-02, -6.9927e-05, -8.5017e-02, -7.5265e-02, + -1.3347e-02, -4.1582e-02, -1.3234e-03, -1.2145e-01, -6.8723e-03, + -3.2144e-02, 5.8249e-02, 1.8110e-01, 1.0470e-01, 1.0201e-01, + 8.5427e-02, -1.7104e-02, -2.0914e-02, -4.8454e-02, 9.2019e-03, + 4.8977e-02, 9.0650e-02, 1.0401e-01, -4.5989e-02, -2.2640e-02, + -1.0117e-01, 1.3499e-01, 4.5189e-02, 7.2833e-03, -5.6661e-02, + -3.6711e-02, -2.9658e-02, -7.0319e-02, -9.1909e-02, -2.3429e-02, + 6.0404e-03, -4.5523e-02, -3.0660e-02, -3.0381e-02, 1.5182e-02, + -7.5460e-03, -1.4732e-02, -4.3778e-03, -4.7716e-02, 1.3978e-02, + 9.6924e-02, -6.2953e-03, 7.1078e-02, -1.9884e-02, -1.0224e-01, + -1.7645e-01, -6.0434e-02, 7.0719e-02, 1.1863e-02, -1.4188e-02, + -8.5998e-02, 8.1130e-02, 5.8946e-02, -8.1932e-02, -1.1164e-02, + -9.0257e-01, 2.2129e-02, 5.9473e-02, -6.6727e-02, -3.7941e-02, + 7.0465e-03, -1.1116e-02, 5.4991e-02, -1.0147e-01, -1.0370e-01, + 5.7779e-02, -1.7909e-01, -1.2021e-01, 7.5867e-02, -2.8435e-02, + 2.3324e-02, -3.7433e-02, -2.6374e-02, 1.0537e-01, 1.8956e-01, + -1.8281e-02, 9.0064e-03, 9.5987e-02, 2.0872e-01, -1.0442e-01, + -1.9331e-02, -3.1765e-02, -7.1338e-02, 3.3094e-02, 1.6677e-01, + -1.3918e-01, 3.8584e-02, -1.0807e-01, 1.3928e-01, -2.9507e-02, + 1.3837e-02, -1.9234e-02, -1.1115e-01, 2.1526e-02, 5.8286e-02, + -3.8775e-02, -5.9076e-03, -2.4913e-02, -6.5648e-02, -5.4430e-02, + -3.0729e-02, 9.4677e-02, -1.9277e-01, 5.0177e-02, -2.1330e-02, + -4.6595e-02, 2.3514e-02, -7.6106e-02, 9.4801e-02, -1.6368e-02, + -2.9171e-02, -1.3751e-01, -7.4673e-02, 3.7233e-02, -2.6263e-02, + 6.3678e-02, 1.6571e-01, 1.0001e-02, -3.4022e-02, 8.4699e-02, + 7.3399e-02, -1.3803e-01, 6.7760e-02, 1.6934e-01, -7.5010e-02, + 1.6691e-02, 7.5057e-03, 1.0972e-01, 7.7287e-02, -2.8700e-02, + 1.3740e-02, 1.0455e-02, -2.6215e-02, 7.3498e-02, -1.2539e-01, + -1.8040e-02, -1.8961e-01, 1.2432e-01, -6.3336e-02, 9.7975e-02, + -4.1474e-02, -1.3091e-01, 2.9420e-02, 8.7890e-03, -3.5217e-02, + 4.1313e-02, 1.0067e-01, 9.6062e-02, 4.5217e-02, 1.1410e-05, + -9.3742e-02, -2.5187e-02, -1.3336e-01, 7.0206e-02, -3.5455e-02, + -3.5793e-02, 6.1289e-02, 1.0714e-01, 9.0462e-02, -6.6959e-02, + -5.5268e-02, -2.4237e-02, 9.0868e-02, -2.7395e-04, -6.6102e-02, + -1.4149e-02, 2.2778e-02, -2.2358e-02, 1.6611e-02, -1.3722e-01, + -1.0803e-03, 9.2254e-03, 9.3046e-04, -9.5271e-02, 7.2699e-03, + -1.5842e-02, -6.4155e-02, 2.2247e-02, -5.4963e-03, -1.6438e-02, + -1.1363e-02, 3.0167e-02, -5.5412e-02, 1.5861e-01, 3.8205e-02, + 1.0428e-01, 1.4365e-01, 5.1521e-03, 9.7592e-02, 7.0424e-02, + -5.2993e-03, 6.1979e-02, -1.1234e-01, 1.3335e-01, -3.8977e-03, + 1.5236e-03, 5.9121e-02, -4.6393e-02, -7.4197e-03, 1.5965e-01, + -7.6934e-02, 2.0333e-01, 4.7435e-02, 1.1154e-01, 1.3193e-01, + 1.2094e-01, -2.3863e-02, 1.9543e-02, 2.3810e-02, 2.4418e-02, + 7.7090e-02, 6.6509e-02, -1.1898e-02, -3.2068e-02, 9.7540e-02, + 8.0355e-02, 2.5759e-02, -1.3683e-02, -4.1099e-02, -6.9798e-02, + -1.1555e-01, -9.3035e-02, 9.1930e-02, 1.1443e-01, 1.3131e-01, + -7.3821e-03, 2.6003e-02, -1.0138e-01, 1.0635e-01, 8.4075e-02, + -6.2905e-02, 7.4726e-02, 6.4077e-02, -7.1340e-02, 5.6060e-02, + -1.7636e-02, 1.4522e-02, 6.7943e-02, -4.5894e-02, 8.4953e-02, + -5.2461e-02, -1.8256e-01, -6.3344e-03, -1.4413e-02, -3.7308e-02, + -2.5780e-02, 5.6539e-02, 4.2219e-02, 1.4163e-01, -3.2828e-02, + 3.6561e-02, 6.6173e-02, -7.5909e-02, 1.7378e-02, -8.9888e-02, + -1.4279e-01, 7.4994e-02, 1.5726e-01, 3.1206e-02, -9.0772e-02, + 6.8410e-02, 2.9224e-02, 4.4098e-02, -1.3920e-02, 9.1034e-02, + -4.6151e-02, -5.6685e-02, 5.4332e-02, 7.8448e-02, -1.0195e-01, + 1.2775e-01, -1.4901e-02, 1.1812e-01, 1.7138e-01, 7.7575e-02, + -1.0953e-02, -4.2166e-02, -3.0236e-04, 1.1864e-01, -1.0792e-01, + 5.7050e-02, -6.2270e-02, 9.0958e-02, -6.9540e-02, 9.6204e-02, + 4.0174e-02, -1.6757e-02, 1.3001e-01, 1.4080e-02, 2.0438e-01, + 1.7934e-01, -1.0523e-02, -4.7175e-02, 1.0229e-01, -6.7819e-02, + 7.1270e-02, -9.5283e-02, -5.8948e-02, 1.1191e-01, 1.2083e-01, + -1.7280e-02, 1.4690e-02, -1.0733e-01, -7.0945e-03, 1.0890e-01, + -4.4923e-02, -1.9094e-01, -1.7491e-01, 5.1405e-03, -3.6598e-02, + -2.1371e-01, 6.9781e-02, 8.6824e-02, 7.3522e-02, 5.1444e-03, + 2.4714e-02, -3.9927e-02, -3.7512e-02, 5.3088e-03, 3.9691e-02, + -2.1449e-02, -4.3316e+00, -2.5358e-02, 5.2166e-02, -3.0075e-02, + -1.6318e-01, 2.6883e-02, -7.9553e-02, 3.8043e-02, 8.0910e-02, + -1.0636e-01, 2.9228e-02, -3.2607e-02, -2.1576e-03, -1.1461e-01, + 2.1159e-01, -8.1155e-03, -4.2389e-02, 4.4097e-02, -4.2229e-02, + 1.5469e-01, 8.3980e-02, 2.8703e-02, -1.6500e-02, -1.6388e-01, + -4.1705e-02, 8.7547e-02, -3.8063e-02, -5.4893e-02, 6.7877e-02, + -1.8283e-02, 1.2425e-01, 9.7803e-02, -5.3695e-03, 6.0873e-02, + -4.2723e-02, 1.6263e-02, 4.4332e-02, -8.6944e-02, -4.4148e-02, + -4.8852e-02, -1.3835e-02, 2.1686e-02, -2.0245e-01, 9.2224e-02, + -3.2831e-02, -2.5258e-01, 1.2468e-01, 5.1174e-02, 1.4680e-01, + 3.0071e-02, 6.5667e-02, 1.4609e-01, 1.1543e-01, 3.1377e-02, + 5.9735e-02, -5.4245e-02, 2.8447e-02, -9.4915e-02, -1.0815e-01, + 7.3491e-02, -1.7688e-01, -1.1766e-01, 8.3479e-02, 8.7339e-04, + -4.7636e-02, -1.5777e-01, 3.0539e-02, 1.1327e-01, -7.4810e-03, + -1.7076e-03, -1.1211e-01, -8.4320e-02, -1.8981e-02, 1.2201e-01, + 3.7016e-02, 2.1191e-02, 1.3702e-02, 2.2009e-02, -3.0147e-02, + -4.6553e-03, 5.7443e-02, 1.1077e-01, -1.2032e-01, -1.3606e-01, + -4.3071e-02, 1.1801e-01, -1.3836e-02])Parameter containing: +tensor([[ 0.0171, 0.0130, 0.0185, ..., 0.0060, -0.0017, -0.0105], + [-0.0364, 0.0154, -0.0142, ..., 0.0086, 0.0068, 0.0017], + [-0.0113, -0.0242, 0.0130, ..., 0.0225, 0.0031, -0.0107], + ..., + [ 0.0166, 0.0227, -0.0178, ..., -0.0059, 0.0132, -0.0079], + [ 0.0282, 0.0054, 0.0172, ..., 0.0009, 0.0006, 0.0101], + [ 0.0041, 0.0177, -0.0183, ..., 0.0003, 0.0102, 0.0056]])Parameter containing: +tensor([-0.2388, -0.3625, -0.0865, ..., -0.3342, -0.2629, -0.1206])Parameter containing: +tensor([[ 1.4420e-02, -3.2654e-02, -8.9569e-03, ..., 8.2855e-03, + 1.0498e-02, -1.5457e-02], + [-7.6370e-03, -2.0157e-02, 1.2436e-02, ..., 4.3762e-02, + 3.8452e-02, -2.3422e-02], + [ 1.2445e-04, 2.7905e-03, -6.9084e-03, ..., -7.2594e-03, + 1.1620e-02, 1.2497e-02], + ..., + [ 2.7823e-04, 7.8583e-03, -2.6993e-02, ..., 2.8183e-02, + -4.7226e-03, 4.9896e-03], + [-6.7711e-03, -6.6414e-03, -4.2305e-03, ..., 5.4321e-03, + 1.3855e-02, 1.0252e-05], + [ 2.7084e-03, 7.5684e-03, -7.6370e-03, ..., 2.0428e-03, + -1.5198e-02, -6.1722e-03]])Parameter containing: +tensor([ 1.6052e-02, 1.0201e-02, -8.6594e-03, -4.4342e-02, -7.0618e-02, + 1.1162e-02, -3.6133e-02, -5.5695e-02, 8.9783e-02, -7.8552e-02, + -4.6814e-02, 4.4342e-02, 2.7924e-02, -2.4399e-02, -2.2797e-02, + 3.2745e-02, -5.3680e-02, -2.0996e-02, -4.4037e-02, -4.6631e-01, + -5.6946e-02, 4.1687e-02, 4.3304e-02, 2.7786e-02, 6.3904e-02, + 7.9956e-02, -6.8481e-02, 2.7100e-02, -2.1866e-02, 5.5389e-02, + 4.5807e-02, 4.8409e-03, -1.7181e-02, 5.1270e-02, 3.7201e-02, + -2.4204e-03, -7.8491e-02, -2.8763e-02, 6.2927e-02, 4.0436e-02, + -2.8854e-02, 2.9388e-02, -2.1698e-02, -6.4819e-02, 9.0942e-03, + 7.4730e-03, 1.0193e-01, -5.1788e-02, -4.5746e-02, -4.4373e-02, + -6.1554e-02, 8.2626e-03, -7.9285e-02, 3.9642e-02, -2.2995e-02, + 5.6549e-02, -3.1433e-02, -2.1362e-03, -6.9031e-02, -1.1696e-02, + 1.2123e-02, 3.1708e-02, -1.4946e-02, -3.5370e-02, -2.2980e-02, + 4.5166e-02, -2.7908e-02, -1.0938e-01, -4.7424e-02, -9.8389e-02, + 6.4636e-02, -2.4658e-02, -1.8875e-02, -4.8370e-02, 4.2480e-02, + -3.7872e-02, 6.0883e-03, 4.2053e-02, 7.9575e-03, 3.3356e-02, + 4.2572e-02, -1.9627e-03, 5.0842e-02, -2.4002e-02, 1.1917e-02, + -5.9631e-02, -3.0804e-03, -7.9346e-02, -7.4585e-02, 1.3626e-02, + -1.1493e-01, -1.7883e-02, -2.7481e-02, 9.0210e-02, 5.0354e-02, + -2.0340e-02, -3.8391e-02, 2.9404e-02, 3.5767e-02, 7.1350e-02, + 6.0577e-02, -1.0492e-01, -7.0251e-02, 3.9551e-02, -5.0720e-02, + -2.0462e-02, 3.2684e-02, -6.1279e-02, 5.0415e-02, -5.6000e-02, + 9.8755e-02, -3.2623e-02, -8.6487e-02, -3.4210e-02, -4.0344e-02, + 2.0935e-02, 4.9530e-02, 4.4525e-02, -4.6234e-02, -7.0152e-03, + -1.1536e-02, 4.9133e-03, -4.8599e-03, 1.8417e-02, 1.7548e-02, + 1.2192e-02, -9.1248e-03, 1.2292e-01, 5.0293e-02, 9.3155e-03, + -6.1218e-02, 6.2073e-02, 2.1805e-02, 2.1942e-02, -1.9531e-02, + 2.4841e-02, 3.4428e-03, -4.9530e-02, 7.4890e-02, 1.9943e-02, + 1.0791e-01, -4.0833e-02, 1.1581e-02, -3.7708e-03, -1.5732e-02, + -1.6241e-03, -2.5009e-02, -5.6244e-02, 6.9389e-03, -1.8646e-02, + 2.1500e-02, -1.2802e-02, 2.7405e-02, -5.1270e-02, -5.3192e-02, + 3.5431e-02, 3.1242e-03, 6.0120e-02, -6.2988e-02, 1.6846e-02, + 1.5945e-02, -4.3335e-03, 2.6749e-02, -3.8116e-02, -5.8624e-02, + 2.7939e-02, 1.1310e-01, -1.2802e-02, 7.2266e-02, 1.4105e-03, + 9.4452e-03, -6.7078e-02, -1.2672e-02, -1.2039e-02, -1.0199e-01, + -3.8513e-02, -2.9129e-02, -9.3140e-02, 1.2561e-01, 1.0223e-01, + -5.7129e-02, 9.0790e-03, -3.0251e-03, -2.7313e-02, -2.7008e-02, + 4.2908e-02, -8.0383e-02, -1.9067e-01, 2.3300e-02, 3.2776e-02, + -1.1169e-01, -1.4175e-02, 7.6828e-03, 8.9722e-03, 1.9699e-02, + 7.4463e-02, 1.0376e-01, -5.2094e-02, 1.1694e-01, 7.2388e-02, + 4.7424e-02, 2.6474e-02, -2.7180e-03, -7.5256e-02, -3.9703e-02, + 2.1324e-03, 2.1332e-02, -9.0027e-02, -4.5044e-02, 1.4114e-02, + -1.3870e-02, -3.0945e-02, 2.5883e-03, 1.4809e-02, 6.4758e-02, + 2.8717e-02, 4.7546e-02, -5.4199e-02, 4.0375e-02, 4.5380e-02, + 3.6682e-02, 1.3125e-04, 1.9722e-03, -5.7892e-02, -2.1072e-02, + 9.9304e-02, -8.0811e-02, 5.3162e-02, 4.6875e-02, 4.6921e-03, + -4.1580e-03, -4.0436e-02, 1.0590e-02, -3.3661e-02, -1.3885e-02, + -1.2598e-01, 4.5563e-02, -2.5955e-02, -1.8158e-02, 5.2521e-02, + 3.5950e-02, 6.2805e-02, 1.1520e-02, -6.6223e-03, -5.7831e-02, + -1.7944e-02, -4.5135e-02, 2.9724e-02, -3.3203e-02, -4.0932e-03, + -8.1177e-02, -2.3438e-02, 1.3756e-02, 1.4565e-02, -3.3661e-02, + -3.1235e-02, -3.8910e-02, -5.9113e-02, -1.2201e-01, -6.6605e-03, + -7.9895e-02, -8.3252e-02, -5.2246e-02, -1.0483e-02, 4.4739e-02, + -1.0933e-02, 2.1057e-02, 1.6663e-02, -2.7435e-02, -6.3591e-03, + 4.2694e-02, -8.2153e-02, 1.5106e-02, -2.7939e-02, 8.7708e-02, + 3.4271e-02, 5.5962e-03, 1.5182e-02, -1.1713e-01, -4.7211e-02, + -1.8417e-02, -5.8868e-02, -4.3823e-02, 4.2877e-02, 3.0701e-02, + -4.4586e-02, -9.9060e-02, 6.2744e-02, 3.6945e-03, -2.0569e-02, + 1.7212e-02, 6.6589e-02, 6.5422e-03, -7.2899e-03, 2.0370e-02, + -1.2108e-02, 1.3697e-04, 1.3647e-03, -6.7200e-02, -3.7567e-02, + -2.0752e-02, -2.4338e-02, -8.2886e-02, 6.1127e-02, -7.6172e-02, + 5.2216e-02, -8.7402e-02, 7.6538e-02, -7.1777e-02, 1.4297e-02, + 1.1879e-02, 1.1810e-02, -7.9956e-02, -1.0797e-01, -1.4229e-02, + 1.2405e-02, -3.7964e-02, 4.5685e-02, -3.7415e-02, 4.2343e-03, + -1.7639e-02, -4.4647e-02, 1.6220e-02, 2.0432e-02, 2.0430e+00, + -9.8450e-02, 5.6702e-02, -2.4724e-04, -3.8849e-02, 3.6133e-02, + -1.4502e-01, 8.0719e-03, 9.3689e-03, -4.4785e-03, 5.9845e-02, + -1.1639e-01, -3.2776e-02, 9.5276e-02, 4.3732e-02, 6.3110e-02, + 4.8370e-02, -2.0233e-02, -7.6355e-02, 4.1771e-03, -3.2715e-02, + 8.4778e-02, 3.7781e-02, 3.6287e-04, -4.7974e-02, -3.3478e-02, + 1.0468e-01, -8.3069e-02, -3.0289e-02, -3.8116e-02, 1.7334e-02, + 5.9753e-02, -2.5528e-02, -1.6190e-02, 3.8509e-03, 8.3313e-02, + 1.1879e-02, 4.3854e-02, 4.4739e-02, 7.8186e-02, 3.1464e-02, + -1.2276e-02, -7.3059e-02, 2.1149e-02, 8.8959e-03, -3.4760e-02, + 3.8452e-02, 6.4636e-02, 5.7465e-02, 1.4854e-02, -4.1718e-02, + 2.0065e-03, 1.3435e-02, 1.7014e-03, 1.0608e-01, -4.0245e-03, + 1.0063e-02, -2.1152e-03, -1.8509e-02, -1.0760e-01, 6.1249e-02, + -5.3650e-02, 6.3896e-03, 5.0262e-02, 4.8462e-02, 7.9775e-04, + -2.6413e-02, 5.1086e-02, 8.9417e-03, -4.1931e-02, 3.2837e-02, + -4.6387e-02, -7.0251e-02, -1.3779e-02, 6.0699e-02, -3.0365e-02, + 2.9541e-02, -1.7227e-02, -6.7749e-02, -4.4891e-02, -2.4643e-03, + -2.8870e-02, -4.9438e-02, 1.7319e-02, 1.7258e-02, 1.5808e-02, + 8.2825e-02, -3.2990e-02, 2.6413e-02, -2.1942e-02, 1.0645e-01, + 1.2561e-01, -1.4076e-02, -9.6741e-03, -9.7580e-03, 6.7444e-02, + -3.7201e-02, 6.8420e-02, -3.1036e-02, -4.1748e-02, 3.4119e-02, + 1.0010e-02, 3.4485e-02, 2.0081e-02, -8.8623e-02, -2.4200e-02, + 1.2131e-02, 1.6129e-02, -9.3323e-02, -8.3862e-02, 2.7252e-02, + 4.3854e-02, -7.9422e-03, 3.7811e-02, -1.9348e-02, 8.6670e-03, + 1.9474e-03, 2.4765e-02, 4.2084e-02, -7.7095e-03, -5.0171e-02, + -3.3722e-02, 6.4049e-03, 2.3300e-02, 1.6434e-02, 2.4323e-02, + 1.8066e-02, 1.6968e-02, -6.0211e-02, 4.5700e-03, -3.1342e-02, + 1.0109e-02, 1.3588e-02, 4.6265e-02, -3.0106e-02, 9.6924e-02, + 3.7048e-02, -5.9753e-02, -6.7596e-03, -4.4434e-02, -7.4463e-02, + 4.8553e-02, -1.9760e-02, 1.2524e-01, 6.9160e-03, 1.3672e-02, + -1.3000e-01, 4.5943e-04, -9.0820e-02, 3.8757e-02, -1.0222e-04, + 1.3878e-02, 1.1604e-02, -3.9154e-02, -3.4668e-02, -3.1624e-03, + -2.1835e-02, 4.1443e-02, 1.1955e-02, -5.5176e-02, 4.9286e-02, + -9.8206e-02, 2.6184e-02, -2.0859e-02, -4.6356e-02, -7.5500e-02, + 4.2686e-03, -9.6497e-02, 5.5054e-02, 8.9111e-03, 1.0323e-02, + -4.2419e-02, -4.8828e-02, -1.3281e-01, -6.1874e-03, -4.8584e-02, + 8.8549e-04, 3.3447e-02, -3.6865e-02, -1.9928e-02, 1.1330e-02, + 2.9358e-02, -1.9515e-04, 1.3184e-01, 1.6724e-02, -3.0303e-04, + -6.4575e-02, 1.5656e-02, -8.7585e-02, -5.6702e-02, -2.4384e-02, + 4.3091e-02, 8.2886e-02, -1.2524e-01, -4.3416e-04, 6.2866e-02, + 2.0966e-02, -2.7939e-02, 3.7292e-02, 7.5256e-02, -8.3191e-02, + -5.0659e-03, 3.5877e-03, 1.8738e-02, -4.9896e-02, -2.5497e-02, + 5.1056e-02, 1.6586e-02, 2.5009e-02, 8.5754e-02, -6.3904e-02, + -3.8025e-02, 5.7587e-02, -3.1403e-02, 6.9153e-02, 8.4900e-02, + 5.5817e-02, -5.0079e-02, 7.1350e-02, -1.9592e-02, 2.8137e-02, + 9.0759e-02, -2.9099e-02, 8.1558e-03, -4.1687e-02, 2.2964e-02, + 6.1798e-02, 2.2781e-02, 1.8173e-02, -3.5126e-02, 5.9013e-03, + 1.9638e-02, 4.8462e-02, -3.1799e-02, -4.3488e-02, -7.4005e-03, + -1.4023e-02, -7.3730e-02, -7.4219e-02, -1.8799e-02, -4.6234e-02, + 1.5388e-02, -7.0740e-02, -5.9479e-02, 5.1666e-02, -3.6072e-02, + -9.7561e-04, -2.4204e-03, -5.8632e-03, -5.8014e-02, 1.7624e-02, + -2.3365e-03, 6.2683e-02, 2.9175e-02, -1.5945e-03, 5.1575e-02, + 7.2754e-02, -1.0004e-01, 2.3239e-02, 3.6041e-02, -1.4709e-02, + -8.6823e-03, -4.5654e-02, 1.0431e-01, 1.8219e-02, 2.9541e-02, + -2.7130e-02, 8.1238e-02, -1.9852e-02, -4.0070e-02, -4.2297e-02, + -1.0567e-02, -4.9377e-02, -5.4550e-04, -3.5645e-02, 3.8269e-02, + 3.2806e-02, 5.8533e-02, -5.9998e-02, 1.0443e-03, 5.3501e-04, + 4.5868e-02, 4.1819e-04, -9.7778e-02, 3.9703e-02, 3.8971e-02, + 3.3508e-02, 3.2593e-02, -1.0040e-02, 2.6093e-02, -2.0981e-02, + -1.0513e-02, 4.8889e-02, 7.7087e-02, -4.7363e-02, -2.3239e-02, + -8.3435e-02, -3.1311e-02, 3.3936e-02, -6.5979e-02, 3.6041e-02, + 1.8372e-02, -3.1158e-02, 3.0701e-02, -1.1646e-01, -8.1238e-02, + -1.7509e-03, 4.7684e-03, 2.2217e-02, -4.8370e-02, 3.3630e-02, + -5.8563e-02, 3.6591e-02, -8.4763e-03, -1.1703e-02, 9.7122e-03, + 3.4271e-02, -2.8183e-02, 2.9190e-02, 2.5452e-02, -5.9998e-02, + 8.0719e-03, -5.5542e-02, -1.9007e-03, -3.5919e-02, -1.4893e-02, + -4.0436e-02, -8.2458e-02, 5.3223e-02, 4.0588e-02, 9.2545e-03, + 5.9143e-02, 2.1530e-02, 6.4209e-02, 1.4913e-04, -2.3804e-02, + 2.4689e-02, -3.0727e-03, 1.7593e-02, -5.5603e-02, -1.2331e-03, + -5.0171e-02, -3.5004e-02, 3.3600e-02, 2.1820e-02, -2.1591e-02, + 2.4460e-02, -4.1412e-02, -2.2293e-02, -4.1618e-03, 2.1927e-02, + -3.3741e-03, 7.3929e-03, 6.1083e-04, -4.9622e-02, 5.9509e-03, + -2.7496e-02, 4.1455e-01, 4.4556e-02, 2.9053e-02, 3.4912e-02, + -3.5065e-02, 3.6102e-02, 2.4399e-02, 8.8135e-02, -2.7924e-02, + -3.8391e-02, -2.7908e-02, 3.9490e-02, -6.0638e-02, -7.5256e-02, + 8.7402e-02, 1.2520e-02, 4.1046e-02, -4.9408e-02, 2.0691e-02, + 4.8645e-02, 9.2102e-02, -3.4241e-02, -3.1494e-02, -5.8105e-02, + 1.1554e-01, 6.1371e-02, -2.2675e-02, -8.6746e-03, -6.1554e-02, + 3.6621e-02, 6.9122e-03, 3.2318e-02, -7.4829e-02, 8.0322e-02, + 2.4734e-02, 1.1273e-01, 4.0039e-02, -8.9788e-04, -8.0795e-03, + -8.4152e-03, 1.7761e-02, 4.0833e-02, -7.9117e-03, 4.7035e-03, + -2.4872e-03, 2.5681e-02, -2.8564e-02, -3.9398e-02, 7.8049e-03, + -6.6345e-02, -1.0602e-01, 4.6448e-02, -5.2765e-02, 3.7506e-02, + 9.6436e-02, -1.4091e-02, 2.6962e-02, -8.5754e-02, 1.5701e-02, + -3.0655e-02, -1.7639e-02, 2.4582e-02, -4.4098e-02, -1.8066e-02, + 2.7969e-02, 6.6147e-03, -8.4412e-02, 1.3481e-02, -1.1090e-01, + 6.2378e-02, 1.5701e-02, -3.7140e-02, 2.2751e-02, -6.6040e-02, + 3.5492e-02, 2.5757e-02, -7.2937e-02, -2.1805e-02, -7.4158e-03, + -2.7374e-02, 3.4119e-02, 4.5685e-02, -2.6093e-02, -3.7170e-02, + -1.5736e-03, -1.2703e-03, 4.0558e-02])Parameter containing: +tensor([2.5576, 2.3304, 2.3711, 2.3977, 1.9550, 2.6222, 2.4071, 2.4198, 2.5433, + 2.2927, 2.3707, 2.4486, 2.3675, 2.2586, 2.4003, 2.5166, 2.4092, 2.3295, + 2.3509, 1.9934, 2.4948, 2.4201, 2.5280, 2.3026, 2.3817, 2.3575, 2.4651, + 2.4091, 2.2956, 2.3569, 2.4069, 2.4324, 2.4017, 2.5549, 2.2891, 2.3673, + 2.5084, 2.3113, 2.2584, 2.4008, 2.2910, 2.5507, 2.4455, 2.3672, 2.4784, + 2.4578, 2.4305, 2.3201, 2.3507, 2.2985, 2.3682, 2.4973, 2.4444, 2.4272, + 2.4040, 2.2344, 2.4056, 2.3216, 2.4079, 2.3590, 2.3579, 2.3738, 2.3978, + 2.3264, 2.5303, 2.4412, 2.5666, 2.3776, 2.4179, 2.1270, 2.4574, 2.4055, + 2.4645, 2.4699, 2.4424, 2.5034, 2.5580, 2.3774, 2.3579, 2.5861, 2.5141, + 2.3922, 2.3188, 2.3972, 2.5189, 2.5155, 2.4887, 2.3563, 2.5517, 2.4372, + 2.3899, 2.4647, 2.5373, 2.5416, 2.4712, 2.4022, 2.2961, 2.5308, 2.4706, + 2.4441, 2.3014, 2.4697, 2.4678, 2.4144, 2.4452, 2.0670, 2.4215, 2.4244, + 2.3358, 2.3543, 2.2992, 2.5740, 2.4565, 2.4551, 2.5621, 2.2902, 2.5656, + 2.5313, 2.4438, 2.4695, 2.3469, 2.4974, 2.4416, 2.3992, 2.4196, 2.2348, + 2.6539, 2.6797, 2.4311, 2.4361, 2.3586, 2.6112, 2.6770, 2.4377, 2.4944, + 2.4049, 2.2290, 2.4017, 2.3639, 2.5649, 2.3928, 2.3823, 2.4417, 2.2946, + 2.4853, 2.3311, 2.3497, 2.5481, 2.4626, 2.5429, 2.4029, 2.3552, 2.3902, + 2.5832, 2.4849, 2.2304, 2.4417, 2.3722, 2.5731, 2.5410, 2.2837, 2.4604, + 2.3329, 2.4226, 2.4918, 2.2607, 2.3743, 2.5165, 2.4731, 2.5513, 2.5786, + 2.3786, 2.2037, 2.4933, 2.2872, 2.5195, 2.3746, 2.4236, 2.2408, 2.4877, + 2.5523, 2.4471, 2.4894, 2.5245, 2.2506, 2.1873, 2.2735, 1.4312, 2.3013, + 2.3922, 2.5230, 2.4725, 2.4565, 2.4358, 2.5594, 2.4528, 2.3324, 2.4306, + 1.2379, 2.2861, 2.4164, 2.3120, 2.3680, 2.5697, 2.4218, 2.4618, 2.4612, + 2.4438, 2.3577, 2.4119, 2.4494, 2.2746, 2.5410, 2.0422, 2.4465, 2.5530, + 2.1844, 2.4634, 2.3859, 2.2563, 2.4984, 2.3779, 1.9990, 2.4829, 2.4556, + 2.5563, 2.4028, 2.3832, 2.3399, 2.2638, 2.4324, 2.4482, 2.3053, 2.2566, + 2.3205, 2.4360, 2.4522, 2.3403, 2.5395, 2.4561, 2.4037, 2.5031, 2.3726, + 2.4524, 2.4662, 2.4363, 2.6728, 2.4845, 2.4196, 2.4342, 2.5506, 2.3502, + 2.4282, 2.4350, 2.5386, 2.4616, 2.4830, 2.3861, 2.5172, 2.5778, 2.3977, + 2.4931, 2.4868, 2.4324, 2.4132, 2.3309, 2.4617, 2.4026, 2.6077, 2.3682, + 2.4094, 2.4648, 2.5450, 2.3728, 2.3849, 2.7174, 2.4184, 2.4798, 2.3318, + 2.4888, 2.4238, 2.2394, 2.2824, 2.5057, 2.5209, 2.3631, 2.4332, 2.3806, + 2.3419, 2.2953, 2.4176, 2.3549, 2.3651, 2.5080, 2.4547, 2.4592, 2.2354, + 2.3696, 2.3371, 2.4121, 2.4274, 2.5341, 2.3987, 2.6333, 2.5166, 2.4138, + 2.5587, 2.5251, 2.3318, 2.3929, 2.3471, 2.5221, 2.4845, 2.4899, 2.5011, + 2.4038, 2.6121, 2.4854, 2.3235, 2.5669, 2.3907, 2.2766, 2.3783, 2.4000, + 0.6357, 2.4000, 2.5624, 2.3160, 2.6602, 2.3403, 2.3703, 2.4046, 2.4510, + 2.4836, 2.4286, 2.1999, 2.6096, 2.4631, 2.3707, 2.3089, 2.4851, 2.3330, + 2.3177, 2.5466, 2.4031, 2.4949, 2.5606, 2.4180, 2.4414, 2.4380, 2.0319, + 2.4428, 2.4251, 2.1127, 2.4867, 2.3711, 2.4772, 2.4604, 2.3391, 2.3586, + 2.4182, 2.3580, 2.4760, 2.4144, 2.4725, 2.5182, 2.3207, 2.6489, 2.4085, + 2.3826, 2.4882, 2.3557, 2.4317, 2.3772, 2.4133, 2.4613, 2.4010, 2.4738, + 2.5751, 2.4574, 2.3113, 2.6367, 2.5771, 2.3439, 2.4362, 2.3548, 2.4980, + 2.3924, 2.4147, 2.3892, 2.5263, 2.7315, 2.4591, 2.4544, 2.3935, 2.3675, + 2.4730, 2.3705, 2.4611, 2.2936, 2.4690, 2.4581, 2.4748, 2.4659, 2.3809, + 2.4476, 2.6219, 2.3810, 2.5058, 2.7186, 2.2634, 2.7011, 2.4224, 2.6094, + 2.2897, 3.4400, 2.3407, 2.3741, 2.3515, 2.5698, 2.5394, 2.4029, 2.4405, + 2.3774, 2.6522, 2.4810, 2.1642, 2.3742, 2.8108, 2.4177, 2.2984, 2.4548, + 2.6247, 2.4611, 2.5190, 2.4920, 2.4434, 2.4291, 2.4715, 2.3702, 2.4663, + 2.2900, 2.4210, 1.7521, 2.3985, 2.3208, 2.4050, 2.4549, 2.4296, 2.3289, + 2.4849, 2.4413, 2.5168, 2.5119, 2.5425, 2.4593, 2.5218, 2.4031, 2.4426, + 2.3513, 2.4843, 2.5100, 2.3698, 2.3297, 2.2840, 2.4772, 2.6122, 2.2475, + 2.4717, 2.2813, 2.5931, 2.3489, 2.3603, 2.3900, 2.4951, 2.4265, 2.4145, + 2.3638, 2.5526, 2.2686, 2.4221, 2.3169, 2.3508, 1.9304, 2.4101, 2.4658, + 2.4240, 2.3683, 2.4086, 2.4019, 2.3520, 2.4472, 2.6275, 2.3913, 2.3408, + 2.3086, 2.4151, 2.3883, 2.4458, 2.3428, 2.4694, 2.3540, 2.4129, 2.4636, + 2.7249, 2.3469, 2.4367, 2.3560, 2.4105, 2.3652, 2.4830, 2.5175, 2.2671, + 2.3230, 2.3949, 2.4571, 2.4217, 2.4320, 2.3076, 2.6991, 2.3035, 2.4677, + 2.4479, 2.4913, 2.3424, 2.4013, 2.4394, 2.3548, 2.3995, 2.3956, 2.4178, + 2.4282, 2.3959, 2.5658, 2.3178, 2.5060, 2.4149, 2.4419, 2.5459, 2.4721, + 2.3397, 2.6064, 2.6917, 2.4959, 2.4903, 2.2762, 2.4950, 2.2810, 2.3614, + 2.4633, 2.4814, 2.3575, 2.5248, 2.3733, 2.3363, 2.4094, 2.5385, 2.3083, + 2.3769, 2.5763, 2.4112, 2.2750, 2.4145, 2.3639, 2.5136, 2.3405, 2.4383, + 2.5640, 2.5239, 2.4995, 2.5569, 2.3197, 2.3858, 2.3962, 2.4429, 2.4544, + 2.3997, 2.3287, 2.4404, 2.3087, 2.3067, 2.4738, 2.3649, 2.5190, 2.5092, + 2.4250, 2.2719, 2.4374, 2.4496, 2.4747, 2.3308, 2.4564, 2.4331, 2.6464, + 2.4636, 2.2174, 2.3275, 2.4728, 2.5065, 2.3889, 2.3736, 2.3614, 2.4190, + 2.3662, 2.5910, 2.2835, 2.4849, 2.4330, 2.5681, 2.5034, 2.3754, 2.2966, + 2.2987, 2.5070, 2.4195, 2.6241, 2.4842, 2.3508, 2.4747, 2.3618, 2.3558, + 2.1667, 2.5238, 2.4680, 2.5973, 2.5392, 2.3654, 2.4019, 2.4716, 2.4832, + 2.5000, 2.5019, 2.3741, 2.3643, 2.3876, 2.3685, 2.6611, 2.4422, 2.4762, + 2.3547, 2.4025, 2.4497, 2.2040, 2.4888, 2.2723, 2.3743, 2.2826, 2.6054, + 2.3190, 2.4891, 2.5162, 2.4549, 2.2425, 2.4536, 2.4128, 2.4304, 2.4707, + 2.5659, 2.3483, 2.2955, 1.8539, 2.3997, 2.3637, 2.3968, 2.4626, 2.3946, + 2.3222, 2.3524, 2.5597, 2.3742, 2.2840, 2.3314, 2.3857, 2.2875, 2.4121, + 2.2458, 2.4827, 2.3292, 2.6354, 2.4387, 2.4942, 2.2355, 2.3579, 2.5134, + 2.2878, 2.3131, 2.5136, 2.2672, 2.4973, 2.4720, 2.3654, 2.5278, 2.4510, + 2.3359, 2.2845, 2.6296, 2.3547, 2.3985, 2.4756, 2.5552, 2.3655, 2.3423, + 2.4334, 2.3119, 2.4998, 2.4868, 2.6313, 2.3357, 2.4385, 2.4794, 2.4030, + 2.3261, 2.4024, 2.4096, 2.4862, 2.5003, 2.4382, 2.5027, 2.4801, 2.4794, + 2.4303, 2.5030, 2.3749, 2.3007, 2.4419, 2.4550, 1.8549, 2.3850, 2.4200, + 2.3704, 2.6410, 2.2996, 2.3996, 2.4752, 2.5489, 2.4231, 2.5352, 2.2041, + 2.6158, 2.4349, 2.3655, 2.7523, 2.3759, 2.4892, 2.3502, 2.2487, 2.4969, + 2.4593, 2.5063, 2.4189, 2.4738, 2.4963, 2.3114, 2.3510, 2.3365, 2.4439, + 2.4067, 2.2807, 2.4631, 2.4164, 2.2501, 2.5119, 2.4704, 2.4727, 2.3767, + 2.4629, 2.4223, 2.4440])Parameter containing: +tensor([ 2.9932e-02, -3.0162e-01, 1.4128e-01, -3.4646e-01, -2.5106e-01, + -5.6301e-01, 2.2639e-01, 2.4600e-01, 1.6465e-01, -4.2078e-01, + -1.2340e-01, 3.7251e-01, 1.2406e-01, 2.7133e-01, 3.1003e-02, + 1.7342e-01, 1.8523e-01, 1.8913e-01, -1.4909e-01, -3.1221e-01, + -2.9462e-01, 1.4270e-01, 6.6221e-01, -4.7315e-01, 1.8876e-01, + 7.3583e-02, -1.2354e-01, -7.3950e-02, -1.2269e-01, 4.9641e-01, + 3.4733e-01, -3.2025e-02, -2.0315e-02, -4.4567e-01, -2.4298e-01, + -3.3707e-01, -3.7226e-01, 4.7930e-01, 6.1613e-01, -3.1205e-01, + -1.6558e-01, 6.1039e-02, 4.8515e-01, 4.7716e-02, -5.9613e-01, + -6.4904e-01, -4.5658e-01, -2.0424e-01, -1.2793e-01, 3.3709e-01, + -1.8851e-01, -8.0029e-01, -1.7977e-02, -3.8074e-01, -6.6202e-01, + 2.6686e-01, 5.5996e-01, -4.8043e-01, -6.7872e-02, 1.2299e-01, + -9.4741e-02, 5.9958e-02, 4.9218e-02, -4.2103e-01, 2.5196e-01, + 2.4068e-01, -3.8744e-02, -1.3429e-01, -4.8376e-01, -6.1607e-01, + 3.2976e-01, 9.5367e-02, -1.5271e-01, -4.6356e-01, -2.8587e-01, + -4.2759e-01, 6.3393e-01, 2.3609e-02, 1.5916e-01, 8.8886e-01, + 2.6343e-01, 3.4043e-01, -4.8861e-01, -1.8627e-01, -6.9661e-01, + -5.4499e-01, 4.9617e-01, -9.7585e-02, 4.6151e-01, -2.2765e-01, + -3.9411e-03, -1.6364e-01, -5.1623e-01, 2.0387e-01, -1.4929e-01, + -2.7565e-01, 2.2874e-01, -1.8453e-01, -5.3695e-01, 6.6773e-01, + 4.4026e-02, -4.9675e-01, -4.4427e-01, 1.0410e-01, 4.6265e-01, + 2.2047e-01, 4.1624e-01, 1.1387e-01, -6.0574e-02, -1.2546e-02, + 8.7225e-02, 4.3368e-01, -3.9527e-01, -2.3091e-01, 1.4382e-01, + -4.0146e-01, 6.7164e-01, 4.8646e-01, 2.2774e-01, 3.8298e-01, + -1.9689e-01, 2.4062e-01, -5.9467e-02, 2.6072e-01, 3.2621e-01, + -2.5951e-01, 2.8232e-01, 6.0466e-01, -2.6100e-01, 1.8510e-01, + 1.3541e-01, -5.1284e-01, 4.8758e-01, -7.8206e-02, 4.7141e-01, + 1.7501e-01, 1.4147e-01, -1.8194e-01, 3.3316e-01, -3.6307e-01, + -3.6573e-01, 5.3486e-01, -1.8098e-02, -2.2143e-01, 4.6298e-02, + 5.6899e-03, 4.3567e-01, -4.8673e-01, -6.7744e-01, -1.4599e-01, + 9.3934e-02, -1.6618e-02, 2.4010e-01, -5.1935e-01, -2.4059e-01, + 7.7139e-01, 5.8555e-01, -4.2442e-01, -2.0791e-01, -4.0307e-01, + -1.1149e-01, -2.0498e-02, 8.9808e-02, -4.4491e-01, 7.5951e-01, + 4.6165e-02, 3.6412e-01, -1.5113e-01, 3.0061e-01, -4.1646e-01, + 6.1315e-01, 3.3305e-02, -5.2668e-01, 6.5254e-01, 9.5107e-02, + 3.4002e-01, 7.6512e-02, -5.4823e-02, 9.7594e-02, -2.7147e-01, + -7.9573e-01, -9.5936e-02, 6.0138e-01, 6.2805e-01, -1.2078e-01, + 5.6416e-01, -2.2096e-03, 2.1145e+00, -1.0990e-01, -3.9488e-02, + -8.6518e-01, -4.6028e-01, 2.7963e-01, 5.6114e-01, 3.7760e-01, + -9.8124e-02, 1.6267e-01, 3.0374e-02, -5.7425e-01, 1.3610e-01, + -4.6454e-01, -7.3948e-02, 1.4996e-01, -4.4412e-01, -3.2803e-01, + -2.5651e-01, 1.9774e-01, 4.8669e-01, 9.0103e-02, -4.5681e-01, + 2.8155e-01, -1.8632e-01, -8.3922e-01, 2.0943e-01, 2.9468e-01, + -3.3018e-01, -2.1694e-01, -2.5128e-01, -3.5858e-01, 7.1802e-02, + 5.5272e-01, 1.1630e-01, -4.3954e-01, 6.8150e-01, 3.1679e-02, + 2.2199e-01, -1.1738e-01, 3.0026e-01, 1.3040e-01, 2.2225e-01, + -2.4677e-01, -1.0417e-01, 3.3233e-01, 1.9889e-01, -5.1069e-01, + -1.9670e-01, 2.4504e-01, 1.9361e-01, -2.3999e-01, 2.3646e-01, + -6.1190e-02, 4.4584e-03, 1.3534e-01, -1.4524e-01, -3.5619e-01, + -5.1144e-01, -4.9386e-01, 3.5775e-01, -3.1955e-01, 6.4570e-01, + -1.2014e-01, 7.9763e-01, 2.1697e-01, 7.6738e-02, -2.2917e-02, + -5.9375e-01, 5.2489e-01, -6.0439e-01, -2.7954e-01, -5.2342e-01, + 8.9634e-02, 1.5874e-01, 5.5865e-01, 2.5939e-01, -5.7272e-02, + -4.0273e-02, 2.4863e-01, -1.7334e-01, 6.4374e-01, -2.2190e-01, + -3.6028e-01, 1.9930e-01, -3.9872e-01, -3.2904e-01, 1.3150e-01, + -7.3514e-01, -3.3150e-01, 3.8126e-01, -1.5717e-01, -5.4585e-02, + -4.2850e-01, -7.4214e-01, -1.7948e-01, 4.3636e-01, -4.9748e-01, + 4.5393e-01, 1.7344e-01, -9.9264e-03, 3.6427e-01, -7.0329e-02, + -4.7180e-01, 1.2721e-01, 5.0421e-01, -3.5168e-01, -3.9122e-01, + -2.7454e-01, 2.4527e-01, -6.6616e-01, 4.3131e-01, -2.6262e-01, + -3.2642e-01, 8.5823e-02, -5.1454e-02, -1.1051e+00, 4.8101e-01, + 2.7046e-01, -1.9462e-01, 3.6501e-01, -2.0426e-01, -2.7896e-01, + 3.8353e-01, -4.3028e-01, 4.5165e-01, 5.5885e-01, -3.3605e-02, + -1.0840e-01, 5.6463e-01, 4.7314e-01, -5.7062e-02, 5.5220e-01, + -3.7576e-01, -2.8350e-01, 1.8441e-01, -3.4029e-01, 5.2433e-02, + -4.3591e-01, -5.0902e-01, 3.3369e-01, 2.0976e-01, -2.3188e-01, + 4.9477e-01, -6.9597e-02, -2.6938e-01, -5.6728e-02, 6.2881e-01, + 7.3492e-02, -9.3713e-01, 1.1243e-01, 3.7966e-02, -6.5733e-02, + 3.9749e-01, -1.6061e-01, -5.4494e-01, 4.2282e-01, -3.0773e-01, + 7.1377e-01, -6.8213e-01, -1.5114e-01, -1.4101e-01, 1.6538e-01, + 3.5520e-01, 4.3173e-01, -1.6039e-01, 4.6407e-02, -9.5964e-02, + 8.6294e-02, -3.3205e-01, 5.8999e-02, -3.7933e-01, 1.3829e-01, + -5.0132e-01, -4.4829e-01, -8.1570e-02, -5.1014e-01, 3.7524e-01, + -2.9223e-01, 3.9546e-02, 4.7483e-02, 1.9292e-01, 2.2456e-01, + -2.5084e-01, -7.0861e-02, 2.6201e-01, -4.7447e-02, 1.1546e-03, + -1.7990e-01, -7.7817e-02, 4.9409e-01, -4.3644e-01, -3.4852e-01, + 9.1261e-02, -7.7327e-01, -3.1767e-01, 3.4946e-01, 2.7020e-01, + -1.8949e-01, 8.8170e-02, 5.3733e-01, 2.0567e-01, 1.3307e-01, + 5.8523e-01, -7.4638e-01, 6.8873e-01, 4.0128e-01, -5.1895e-01, + -1.4250e-01, 1.2085e-02, -1.7605e-01, -1.8713e-01, 1.7004e-02, + 1.2924e-01, 6.0687e-01, -4.2209e-01, -6.2843e-01, 5.2935e-01, + -6.1243e-02, 2.2317e-01, -6.2447e-02, -2.8592e-01, 8.2672e-01, + 1.1707e-01, -5.4270e-01, 2.9941e-01, 2.8900e-01, 8.6537e-01, + -1.6511e+00, 1.1635e-01, 2.8656e-02, 2.1624e-01, 1.9607e-01, + 5.0981e-01, -4.5917e-01, -1.5322e-01, 4.5360e-02, -5.6081e-01, + 4.8290e-01, 3.4567e-01, -5.8747e-02, -6.8929e-01, 1.2810e-01, + -2.5956e-01, -3.9461e-01, -6.4888e-01, -8.8910e-03, -2.0434e-02, + -1.4205e-01, -2.4369e-02, 5.7545e-01, 1.1698e-01, -3.8967e-01, + 3.4959e-01, 3.5758e-01, 3.2217e-02, -6.6200e-01, -6.6839e-02, + 5.8133e-02, 2.7942e-01, 2.9290e-01, 1.7966e-02, -3.6733e-01, + -9.9340e-02, -1.3521e-01, -2.7681e-01, -4.2879e-01, -3.3071e-01, + -5.7073e-01, -5.8907e-01, -5.0452e-01, 2.0418e-01, 1.0199e-01, + -3.9590e-01, 1.8624e-01, -2.9696e-01, 1.8448e-01, -3.0404e-01, + 4.6199e-01, -3.9756e-01, 3.4744e-01, -2.6387e-01, -1.1653e-01, + 5.9746e-01, 1.8090e-01, -1.2004e-01, -1.1056e-01, -5.3151e-01, + -4.6283e-01, 3.1908e-02, -7.4224e-01, -3.6923e-01, -5.3822e-01, + -4.1702e-01, 2.8470e-01, 4.4310e-01, 1.6097e-01, 1.9126e-01, + -1.3619e-01, 2.4835e-01, 6.0324e-02, 8.2238e-02, 3.8127e-01, + -8.2515e-02, 4.5638e-01, 2.9320e-01, 1.9801e-01, 8.7940e-03, + -2.9512e-01, -5.5449e-01, 1.6342e-02, 2.2033e-01, 5.8513e-02, + 3.4597e-01, -3.2905e-01, -1.1703e-01, 1.9318e-01, 2.9449e-01, + 1.0837e-01, -1.0457e-03, -1.1668e-01, 1.6970e-01, -7.2471e-02, + -4.7684e-01, 9.0855e-02, 7.9242e-02, -8.0585e-02, 3.2629e-01, + 1.2611e-01, 3.2879e-01, -2.5022e-01, -1.6554e-01, 6.3878e-01, + 3.5972e-01, -1.9698e-01, -2.6073e-01, 5.1697e-01, -3.3824e-01, + -4.4351e-01, 1.3762e-01, 1.5554e-01, -3.4920e-02, -1.0653e-01, + 9.6069e-02, -2.2788e-01, 7.0399e-02, 1.6845e-01, 2.9433e-01, + -7.1957e-01, 4.2253e-01, -1.6981e-01, 4.2037e-01, -2.1842e-01, + 2.3953e-01, -3.1829e-01, -8.9034e-01, 7.7953e-01, 5.7918e-01, + 2.7067e-01, -4.5477e-01, -6.4798e-01, -8.2738e-02, 1.8428e-01, + 3.2654e-02, -2.2082e-01, 9.1149e-01, 4.6169e-01, -2.0381e-01, + 2.3860e-01, 4.9382e-01, -6.1471e-02, -4.3793e-01, -3.6538e-01, + -2.2946e-01, -1.1087e+00, 1.6603e-01, 6.7709e-01, -1.9357e-01, + -2.8555e-01, -3.4081e-01, -2.5572e-01, 2.9791e-01, -2.6723e-01, + -6.2435e-01, 4.9662e-01, 1.0662e-01, -4.2844e-01, 5.9660e-01, + 7.0396e-01, 8.5778e-02, -5.7455e-01, -2.1013e-01, 4.0237e-01, + 4.0175e-01, -4.7408e-01, 1.8092e-01, 6.5065e-01, 2.9740e-01, + -2.0762e-01, -1.7106e-01, -3.5587e-01, 5.4142e-01, -3.0241e-02, + 2.4929e-02, -3.9181e-01, 4.0413e-01, -5.2466e-01, -2.1312e-01, + 4.6921e-01, 2.1146e-02, -1.4481e-01, 6.1093e-01, 5.1777e-02, + -1.5702e-02, -1.3893e-01, 1.2269e-02, -4.5805e-01, -5.3720e-01, + -1.7545e-01, -6.6881e-02, -1.4616e+00, -1.8604e-01, 4.2172e-01, + -3.1246e-01, -4.6231e-02, -1.8316e-01, 3.5422e-01, 8.5290e-02, + 3.7975e-01, 5.6355e-01, 2.4853e-01, -3.5697e-01, 4.5364e-01, + -1.7780e-01, -3.3519e-03, -3.2779e-01, 5.3199e-01, -3.6808e-02, + 3.8655e-01, 2.2483e-02, 3.1360e-01, -1.3338e-01, 2.1981e-01, + -4.9110e-01, 6.6587e-01, -1.4109e-01, -1.5661e-01, -1.1334e-01, + -4.6857e-01, 4.8447e-01, -2.2792e-01, 1.3462e-01, -4.0982e-01, + 2.8143e-01, -4.1661e-01, -7.0864e-01, 2.5823e-01, -5.3160e-01, + 1.9616e-01, -1.9649e-01, -4.1130e-01, -1.6410e-01, 1.6438e-01, + 5.2778e-02, -6.6124e-01, -3.2948e-02, -5.4545e-01, 5.2703e-01, + 2.8672e-01, 5.7645e-01, -2.6746e-01, -3.9089e-01, 2.0937e-01, + -3.6736e-02, 2.4706e-01, 3.5762e-01, -1.6023e-01, -3.1083e-01, + -4.6344e-01, 4.1670e-01, -7.5496e-02, 1.4926e-01, -3.3287e-01, + -4.2243e-01, -6.3298e-02, 2.5848e-01, 3.7625e-01, 8.7703e-02, + -3.2085e-01, 3.9825e-01, 4.0217e-01, 3.1725e-01, -1.6784e-01, + -4.0644e-01, 5.2821e-01, -5.1684e-01, -5.4546e-01, 1.1714e-01, + 3.2991e-01, 2.9674e-01, 1.7642e-01, 5.5854e-01, -3.4519e-01, + -1.7785e-01, 2.0199e-01, -5.2339e-01, -6.3149e-02, -3.0370e-01, + -2.8747e-02, -3.4003e-01, -1.5512e-01, -4.3562e-01, 6.5145e-01, + 3.0143e-01, -3.2293e-01, 3.3838e-01, -1.3263e-01, 3.5033e-02, + 4.7880e-01, -8.1988e-01, -2.0750e-01, -6.8490e-02, -5.9730e-02, + 4.1138e-01, 2.3811e-01, -3.2911e-01, -6.8965e-02, 5.5200e-01, + 1.9072e-01, 6.2259e-02, 6.6072e-01, -6.0550e-01, 2.3396e-01, + 1.1968e-01, -5.3494e-01, 6.5234e-02, -1.3458e-01, -2.1542e-01, + -7.3702e-02, 1.8912e-01, 1.5528e-01, 4.7149e-01, -2.0252e-01, + -6.8936e-01, -1.6355e-01, 2.8603e-02, 7.8233e-02, 3.6492e-01, + 1.3363e-01, -4.6805e-01, -3.1390e-01, -7.6726e-01, -3.2683e-01, + -2.0703e-01, 5.6110e-01, 3.6255e-01, -2.0338e-01, 4.8012e-01, + -2.3513e-01, 3.9425e-01, 5.9808e-01, 8.5797e-01, 6.0902e-01, + 1.5637e-01, -7.8162e-01, -6.8807e-02, 1.1870e-01, -1.4694e-01, + -6.4079e-02, -5.5715e-01, -2.5838e-01, 8.1312e-01, 4.2588e-01, + -2.3839e-01, -5.9330e-01, -1.5378e-02, 5.7079e-01, 1.5399e-01, + 1.2549e-01, -1.4820e-01, 6.9585e-02])Parameter containing: +tensor([[ 0.0030, -0.0047, 0.0065, ..., 0.0104, -0.0140, 0.0053], + [-0.0176, 0.0191, -0.0227, ..., 0.0217, 0.0145, -0.0007], + [ 0.0033, -0.0146, 0.0133, ..., 0.0050, -0.0265, -0.0137], + ..., + [-0.0195, -0.0042, -0.0021, ..., -0.0063, 0.0234, -0.0025], + [-0.0185, 0.0035, -0.0008, ..., 0.0019, 0.0356, 0.0087], + [-0.0246, -0.0227, -0.0082, ..., -0.0005, -0.0009, 0.0117]])Parameter containing: +tensor([-0.1141, 0.1932, 0.1205, ..., -0.0247, 0.0140, 0.0328])Parameter containing: +tensor([[-0.0028, -0.0143, 0.0161, ..., 0.0151, 0.0104, -0.0198], + [-0.0164, -0.0346, 0.0067, ..., 0.0199, -0.0166, -0.0236], + [-0.0236, -0.0092, -0.0233, ..., -0.0062, -0.0015, 0.0028], + ..., + [ 0.0289, 0.0172, -0.0065, ..., -0.0083, -0.0195, 0.0067], + [ 0.0268, -0.0132, 0.0347, ..., 0.0141, 0.0156, -0.0042], + [-0.0458, 0.0232, -0.0022, ..., -0.0111, 0.0161, 0.0254]])Parameter containing: +tensor([-1.0736e-01, 1.0443e-01, 7.3425e-02, -6.7932e-02, -9.2896e-02, + -8.6670e-02, -1.2103e-01, -5.3589e-02, 1.7627e-01, -6.3171e-03, + -4.8187e-02, 1.4111e-01, 1.7212e-01, -4.9286e-02, 1.7249e-01, + 4.2206e-02, -9.5154e-02, -8.4839e-02, -1.5198e-01, -2.9883e-01, + 4.8187e-02, 3.7262e-02, 2.3163e-02, -1.1139e-01, 1.5063e-01, + 1.6846e-01, -1.2781e-01, 1.2366e-01, -1.0736e-01, 5.1331e-02, + 4.5276e-04, -9.5032e-02, -7.3669e-02, -1.0352e-01, 3.6224e-02, + -1.9043e-02, -1.5588e-01, -9.9365e-02, 1.8542e-01, -1.8408e-01, + -9.4788e-02, -7.6111e-02, 1.0559e-01, -1.2573e-01, -5.5450e-02, + -7.9773e-02, -1.0445e-02, -8.3435e-02, 1.5945e-02, 2.6093e-02, + -3.2898e-02, -7.7942e-02, 1.5137e-02, 1.0034e-01, -1.7871e-01, + 3.0914e-02, 1.0242e-01, 7.2754e-02, -8.0872e-02, -2.6566e-02, + 1.4473e-02, 1.0205e-01, -6.9275e-02, -1.0187e-01, -4.8971e-04, + 1.1060e-01, -1.6309e-01, -2.8381e-02, -1.4929e-01, -2.3840e-01, + 1.5881e-01, 2.1576e-02, -2.5659e-01, -2.0959e-01, -6.7871e-02, + -1.3831e-01, -1.1322e-02, 1.9482e-01, 7.0679e-02, 8.3740e-02, + 8.5754e-02, 6.2469e-02, 4.4189e-02, -1.2451e-01, -1.7426e-02, + -8.2214e-02, 5.7739e-02, -1.0864e-01, 5.4840e-02, -1.2341e-01, + -1.9739e-01, -7.0007e-02, -8.5571e-02, 1.8713e-01, 1.6150e-01, + -6.2675e-03, 1.1002e-02, -1.0147e-02, 2.7344e-02, 1.8445e-01, + 2.1648e-03, -2.4158e-01, -2.2278e-01, 3.5980e-02, -2.7328e-02, + -1.2108e-02, -9.7717e-02, 2.1240e-01, 4.9103e-02, -4.3526e-03, + 9.4177e-02, 1.1572e-01, -9.4666e-02, 9.7534e-02, -1.1841e-01, + 3.8849e-02, 1.2030e-01, 1.1273e-01, 2.3849e-02, -7.4219e-02, + -6.2622e-02, 5.7129e-02, 1.0669e-01, 1.3965e-01, 5.4871e-02, + -2.2141e-02, 2.1179e-01, 1.6406e-01, 3.1799e-02, 7.4036e-02, + 1.6895e-01, 5.3215e-03, 1.0449e-01, 1.9153e-01, 1.9507e-01, + 6.2500e-02, 7.5623e-02, -1.5210e-01, 9.2590e-02, 2.7084e-02, + 1.0925e-01, 1.1957e-01, -2.2449e-03, 2.6855e-02, -1.4319e-01, + 1.7468e-01, -5.4504e-02, -1.5747e-01, 2.6810e-02, -1.9165e-01, + -1.8250e-01, 5.1918e-03, 2.1277e-01, -1.1560e-01, -1.3281e-01, + 1.6016e-01, -1.0229e-01, -1.1914e-01, -1.4453e-01, -5.8136e-02, + -1.0779e-01, 4.4891e-02, 3.8757e-02, -1.1139e-01, 5.1636e-02, + 1.5125e-01, 1.2842e-01, -2.4377e-01, 1.0999e-01, 2.9358e-02, + 1.4502e-01, -1.5527e-01, -2.1805e-02, 2.1692e-01, -1.1218e-01, + 1.0236e-01, 7.4539e-03, -8.4717e-02, 1.4966e-01, 1.8872e-01, + -5.7465e-02, -5.9624e-03, -1.1603e-01, -1.1726e-02, -6.6223e-02, + 1.8970e-01, -8.3496e-02, -5.7422e-01, 1.2646e-01, 1.7444e-01, + -7.0557e-02, 3.9337e-02, -4.2816e-02, -4.2801e-03, 8.5571e-02, + 1.2659e-01, 9.8572e-02, 2.9053e-02, -2.4731e-01, 1.0773e-01, + 8.7952e-02, -6.2744e-02, -5.9128e-03, -6.5491e-02, -9.3384e-02, + -1.5308e-01, 8.5876e-02, -1.2384e-01, -1.9019e-01, -5.1514e-02, + 5.5939e-02, -1.6382e-01, -1.0547e-01, 9.9609e-02, 1.4856e-01, + -6.6223e-02, 8.5510e-02, -7.5562e-02, 1.1902e-01, 2.8223e-01, + 1.2524e-01, -1.1734e-02, -4.3060e-02, -4.0833e-02, 7.2670e-03, + 1.3892e-01, -1.6357e-01, 7.6447e-03, -4.2023e-02, 1.1340e-01, + -7.7637e-02, -1.1688e-01, 1.1066e-01, -2.5616e-03, -3.2196e-02, + -2.2461e-01, 2.0532e-01, -1.0785e-01, -1.8774e-01, 1.5796e-01, + 1.0852e-01, -1.0419e-01, 8.7219e-02, -7.9712e-02, -8.0139e-02, + -9.1736e-02, -2.4036e-01, 1.3232e-01, -5.8746e-02, 1.2024e-01, + 1.1035e-01, 1.5771e-01, 4.5227e-02, 8.1299e-02, -9.1187e-02, + 1.9730e-02, 1.4075e-01, -1.0773e-01, -1.8689e-01, -7.0801e-02, + 1.1511e-01, -6.6650e-02, -8.6548e-02, 2.0911e-01, 7.1335e-03, + -1.6431e-01, 1.6467e-01, 1.7029e-02, 1.8112e-02, 1.5778e-02, + 3.5797e-02, -2.7756e-02, -3.1891e-02, -1.3161e-02, 1.8433e-01, + -1.5723e-01, -1.9055e-01, 1.0565e-01, -2.0154e-01, 1.0498e-01, + -5.3894e-02, -2.2339e-01, -1.0962e-01, 9.6558e-02, -1.9165e-01, + 1.2128e-01, -1.7017e-01, 6.7871e-02, -1.3953e-01, 1.1292e-01, + 2.6489e-02, 1.4832e-01, 9.2163e-02, -2.7466e-02, -6.8665e-02, + -1.5723e-01, -2.7100e-01, 2.4872e-02, -5.6839e-03, -4.6051e-02, + -5.5359e-02, -1.4734e-01, -1.0590e-01, -4.3488e-02, -4.1809e-02, + 3.7689e-02, -2.5146e-02, 1.6040e-01, -1.3696e-01, -9.9487e-03, + -7.5989e-03, -9.9609e-02, 1.1340e-01, 1.1389e-01, 4.6509e-02, + -1.1243e-01, 3.4668e-02, 2.6794e-02, -2.1912e-01, 8.2703e-02, + -2.0721e-02, -1.6711e-01, 1.3098e-01, -5.6244e-02, 4.6606e-01, + -5.5878e-02, 1.9989e-02, -1.2585e-01, 5.1636e-02, 1.7383e-01, + -1.0144e-01, -3.9612e-02, -1.9394e-02, -2.8336e-02, -1.4197e-01, + -1.4929e-01, -1.4331e-01, 1.1859e-01, 6.8481e-02, 1.0187e-01, + 1.1658e-01, 5.1147e-02, -7.5012e-02, 1.5649e-01, -1.5808e-01, + 1.5295e-01, -1.3879e-01, -8.2397e-02, -6.9824e-02, 1.0431e-01, + 2.1667e-01, -2.3041e-02, 9.5139e-03, -2.7863e-02, 1.4185e-01, + 1.6272e-01, -1.8848e-01, -6.3599e-02, -9.6497e-02, 2.0227e-01, + -1.6394e-01, 6.7017e-02, 1.5173e-01, -3.3783e-02, 1.5503e-01, + -9.6741e-02, -1.7725e-01, -2.0422e-01, -5.4352e-02, -2.7634e-02, + -1.5588e-01, 1.4612e-01, 6.7932e-02, 6.2927e-02, -8.1177e-02, + -3.8361e-02, -1.5472e-02, -2.8000e-02, 1.4233e-01, 9.8343e-03, + 1.1359e-01, -1.0986e-01, -1.6711e-01, -2.0020e-01, 1.5820e-01, + -1.4697e-01, -2.6505e-02, 1.4746e-01, 1.6516e-01, -2.7420e-02, + 1.1584e-01, -9.7961e-02, 8.9355e-02, 5.3528e-02, -3.0640e-02, + 1.0779e-01, -1.3928e-01, -3.9337e-02, 9.4177e-02, -9.7656e-02, + -1.7456e-01, 4.3274e-02, -1.3574e-01, -6.3843e-02, 1.3519e-02, + 1.5564e-01, 1.0779e-01, -3.2013e-02, -8.8074e-02, 1.6495e-02, + 2.0105e-01, -1.4966e-01, 8.1482e-02, -1.0805e-03, 1.8933e-01, + 9.0942e-02, -5.0934e-02, -1.3232e-01, 2.3669e-01, 1.5857e-01, + -7.8659e-03, -9.8145e-02, -2.0721e-02, 1.1163e-01, 1.2671e-01, + 1.2372e-01, 2.3083e-01, 1.8359e-01, -2.2925e-01, 4.1321e-02, + -1.1829e-01, -1.0571e-01, -1.0291e-01, -1.9531e-01, -8.3160e-03, + 6.5613e-02, -1.1700e-01, 7.0374e-02, -1.8311e-01, 1.5991e-01, + 2.8580e-02, -5.5809e-03, -1.0185e-03, -7.9529e-02, -2.3242e-01, + 4.9866e-02, 1.1224e-01, 2.1411e-01, 7.4524e-02, 2.9037e-02, + 1.2158e-01, -1.7532e-02, 4.8401e-02, -7.3242e-02, -1.2634e-01, + -1.4514e-01, 6.0944e-02, 7.9285e-02, 6.3660e-02, 6.2988e-02, + -3.4027e-02, -1.6675e-01, 1.4526e-01, -1.2054e-01, -3.8849e-02, + 1.4539e-01, -1.5491e-01, 2.1704e-01, -1.7700e-01, 2.5284e-02, + 8.9722e-03, 2.2937e-01, -1.5417e-01, -6.0669e-02, -1.3989e-01, + -7.7637e-02, -8.9111e-02, -8.1909e-02, -5.0201e-02, -7.2510e-02, + -8.9844e-02, 9.5398e-02, 1.3000e-01, -3.2684e-02, 1.9580e-01, + -1.4844e-01, -1.9104e-02, 4.3701e-02, 3.9734e-02, -9.6741e-02, + -1.0962e-01, 4.8431e-02, 1.9690e-01, -2.7756e-02, 1.5991e-01, + -7.5562e-02, 8.9294e-02, -1.7249e-01, 1.9373e-01, -1.5991e-01, + 9.1431e-02, -4.4647e-02, 1.9104e-02, -3.6591e-02, 1.2610e-01, + 1.1340e-01, -5.2338e-02, 1.3611e-01, 2.6779e-02, 7.7026e-02, + -1.3208e-01, 4.8126e-02, -7.7248e-03, -1.0803e-01, 2.1927e-02, + 1.9104e-02, 1.4935e-03, -2.2021e-01, -6.5674e-02, 1.7456e-01, + 3.1464e-02, 4.1779e-02, -7.9346e-02, 1.5149e-01, -8.9600e-02, + -1.4001e-01, 7.8491e-02, 9.1064e-02, -2.3022e-01, 1.0962e-01, + 1.2561e-01, -3.2806e-02, 7.1411e-02, 2.2107e-01, 6.4453e-02, + -5.3955e-02, 1.7712e-01, -1.9043e-01, 3.7170e-02, 7.6027e-03, + 2.0178e-01, -8.5693e-02, -1.7319e-02, 4.7379e-03, 1.3135e-01, + 1.2280e-01, -1.2152e-01, -5.5084e-02, -1.7859e-01, -7.8186e-02, + -3.0609e-02, -1.3196e-01, 1.1768e-01, -1.9995e-01, 7.0435e-02, + 1.8970e-01, 3.4943e-02, -1.0309e-01, -6.6406e-02, -1.6479e-01, + 9.5947e-02, -1.6479e-01, -6.7749e-02, -1.6327e-02, -2.1143e-01, + -1.1493e-01, -2.2009e-01, -1.3696e-01, 6.6040e-02, -2.4756e-01, + -7.2083e-02, 6.8665e-02, 2.4353e-02, -9.4177e-02, 1.6199e-01, + -4.8566e-04, -7.3853e-03, 6.0059e-02, 3.1708e-02, 1.1475e-01, + 1.5784e-01, -1.0327e-01, -5.5908e-02, 1.0773e-01, -6.5796e-02, + -8.8135e-02, 9.3750e-02, 5.8411e-02, 7.5256e-02, 5.3314e-02, + 5.5573e-02, -9.7717e-02, -3.7537e-02, -1.0382e-01, -2.1497e-01, + 1.0992e-01, 2.7237e-02, -3.6621e-02, 1.0443e-01, 4.6326e-02, + 1.8665e-01, 1.5149e-01, -1.6443e-01, -2.2293e-02, 4.4891e-02, + 1.3831e-01, -7.9834e-02, -1.6602e-01, -1.0413e-01, 1.4551e-01, + 2.4857e-02, 2.1619e-01, -3.0869e-02, 1.5637e-01, 2.2595e-01, + 2.6489e-01, 1.3574e-01, 6.9214e-02, -6.5063e-02, 6.5674e-02, + -1.8701e-01, 2.1591e-02, -1.8356e-02, 2.3755e-01, -1.3330e-01, + 2.3608e-01, 1.2390e-01, 6.3232e-02, 4.4495e-02, 9.2346e-02, + -2.1094e-01, 1.6357e-01, -3.6133e-02, -1.0950e-01, -7.8125e-02, + -1.8263e-03, 1.1737e-01, 9.1187e-02, -3.7498e-03, 2.4780e-02, + 1.7712e-01, -2.2949e-02, -4.4617e-02, 1.2292e-01, -2.5903e-01, + 1.0883e-01, -8.7463e-02, -1.7957e-01, -1.3660e-01, -1.5747e-01, + -1.5308e-01, -1.6882e-01, 1.2262e-01, -1.2756e-01, 1.8530e-01, + 1.5881e-01, 1.1377e-01, 8.3679e-02, -1.3367e-01, -1.0553e-01, + 9.5520e-02, 1.2903e-01, 2.4023e-01, -1.0144e-01, -1.6565e-01, + -8.4412e-02, 7.3120e-02, 2.3422e-02, 3.3966e-02, 1.1884e-01, + 1.5625e-01, -1.3562e-01, -1.8359e-01, 4.9286e-02, -6.8703e-03, + 4.0527e-02, 9.3018e-02, 2.2473e-01, 2.0813e-02, 5.7648e-02, + -1.1462e-01, 3.4595e-01, -1.0187e-01, -7.7637e-02, 4.7668e-02, + 2.3022e-01, 5.8960e-02, 3.5004e-02, 2.1509e-01, -9.2224e-02, + -2.1561e-02, -6.1493e-02, -8.3557e-02, -7.7026e-02, -5.3711e-02, + -5.2643e-02, 4.7035e-03, 4.9400e-03, -1.0541e-01, 1.6003e-01, + 1.1176e-01, -5.6061e-02, -1.3710e-02, 7.5500e-02, 6.3538e-02, + 1.3892e-01, -1.0809e-01, -3.7262e-02, 1.0521e-02, -1.6418e-01, + 8.0872e-02, -1.8250e-01, -1.7847e-01, -1.3806e-01, 6.9031e-02, + 7.8613e-02, 1.9177e-01, 2.2705e-01, 3.6869e-03, 6.1218e-02, + 1.7139e-01, -9.0271e-02, 6.0608e-02, 7.4036e-02, -1.3232e-01, + 9.5276e-02, 6.6284e-02, -5.0781e-02, 5.2490e-02, -7.6965e-02, + -2.0837e-01, -1.0034e-01, -1.8958e-01, -4.4922e-02, 1.7761e-01, + 6.3538e-02, 6.7871e-02, -3.6926e-02, -4.1008e-03, 7.0679e-02, + -5.2277e-02, 2.0203e-01, 3.3447e-02, -3.5339e-02, -7.2823e-03, + -3.6713e-02, 8.4167e-02, -8.8867e-02, 7.8918e-02, 1.6525e-02, + 1.9385e-01, 2.0496e-01, -9.1309e-02, 4.7684e-03, -1.2830e-01, + 3.9520e-03, -1.0559e-01, -1.2427e-01, 1.4722e-01, 2.3108e-01, + 2.5597e-03, -1.2164e-01, -1.4050e-01, 4.4403e-02, 1.9202e-01, + 7.7820e-02, -1.1420e-01, 8.2031e-02])Parameter containing: +tensor([1.8626, 1.8031, 1.7707, 1.7813, 1.5411, 1.7607, 1.7641, 1.6653, 1.7079, + 1.7684, 1.8017, 1.6956, 1.6986, 1.7498, 1.8004, 1.8909, 1.7765, 1.8687, + 1.7344, 0.4985, 1.8175, 1.8176, 1.7591, 1.7257, 1.8502, 1.6815, 1.7383, + 1.7850, 1.8199, 1.7943, 1.8372, 1.6945, 1.7009, 1.7682, 1.7271, 1.7660, + 1.8474, 1.7567, 1.7404, 1.6637, 1.8308, 1.8491, 1.7334, 1.8106, 1.7488, + 1.6771, 1.7654, 1.7597, 1.8563, 1.7420, 1.8416, 1.8208, 1.7759, 1.7856, + 1.7080, 1.8179, 1.7241, 1.7123, 1.7897, 1.7672, 1.7761, 1.7262, 1.7635, + 1.7302, 1.8483, 1.7563, 1.7805, 1.7103, 1.6915, 1.6331, 1.7328, 1.7430, + 1.9799, 1.7362, 1.7595, 1.7817, 1.7687, 1.7316, 1.6897, 1.7418, 1.7713, + 1.7142, 1.8107, 1.7375, 1.7881, 1.7923, 1.8721, 1.7792, 1.7724, 1.6501, + 1.7684, 1.8037, 1.7338, 1.7793, 1.7959, 1.7426, 1.7526, 1.8015, 1.7506, + 1.7252, 1.8087, 1.8846, 1.7413, 1.7966, 1.6733, 1.7162, 1.7589, 1.7807, + 1.7870, 1.7457, 1.6903, 1.7602, 1.7180, 1.7813, 1.7669, 1.7081, 1.7036, + 1.7858, 1.8298, 1.6986, 1.7633, 1.7596, 1.7947, 1.7986, 1.7408, 1.7156, + 1.8134, 1.8334, 1.8831, 1.8223, 1.7169, 1.7277, 1.8390, 1.7366, 1.6804, + 1.8091, 1.7192, 1.7432, 1.7482, 1.7376, 1.7989, 1.7752, 1.7880, 1.6884, + 1.8083, 1.7428, 1.7844, 1.7537, 1.7240, 1.7806, 1.6277, 1.7218, 1.7583, + 1.8438, 1.7764, 1.4386, 1.7788, 1.7629, 1.7766, 1.7988, 1.5892, 1.7308, + 1.7457, 1.7244, 1.7396, 1.8274, 1.7685, 1.8633, 1.7943, 1.7235, 1.6937, + 1.7572, 1.8016, 1.8091, 1.8354, 1.6880, 1.8347, 1.7619, 1.7627, 1.7118, + 1.6843, 1.8207, 1.8057, 1.8163, 1.7315, 1.5833, 1.7522, 2.7548, 1.7724, + 1.6638, 1.8069, 1.7381, 1.7530, 1.8312, 1.7646, 1.8395, 1.7653, 1.6637, + 2.9893, 1.7513, 1.8372, 1.7216, 1.7038, 1.8960, 1.7074, 1.7235, 1.8619, + 1.7499, 1.7265, 1.7715, 1.7588, 1.6586, 1.7386, 1.6842, 1.7496, 1.7123, + 1.7183, 1.8547, 1.8007, 1.6818, 1.7232, 1.7186, 1.5766, 1.7863, 1.7755, + 1.7346, 1.7565, 1.8070, 1.6555, 1.7957, 1.8173, 1.7355, 1.8109, 1.7154, + 1.8207, 1.8189, 1.6928, 1.7726, 1.8410, 1.7976, 1.7274, 1.7988, 1.8094, + 1.7929, 1.7947, 1.6713, 1.7325, 1.6619, 1.8692, 1.8463, 1.7701, 1.8279, + 1.7421, 1.7823, 1.7778, 1.7596, 1.7339, 1.7867, 1.8402, 1.7847, 1.7104, + 1.7948, 1.7840, 1.6757, 1.7317, 1.7500, 1.8238, 1.8030, 1.7794, 1.7665, + 1.7595, 1.7266, 1.7846, 1.8269, 1.8003, 1.7487, 1.7192, 1.8082, 1.7012, + 1.7666, 1.7735, 1.7122, 1.7510, 1.8224, 1.7595, 1.7332, 1.8395, 1.7557, + 1.7576, 1.7263, 1.7551, 1.7655, 1.7280, 1.8227, 1.7688, 1.7970, 1.5910, + 1.8495, 1.8002, 1.8020, 1.7830, 1.7413, 1.8417, 1.7040, 1.8032, 1.7786, + 1.7579, 1.7294, 1.7574, 1.7677, 1.7913, 1.7839, 1.7360, 1.7482, 1.7686, + 1.7938, 1.7941, 1.7248, 1.7029, 1.7483, 1.7367, 1.6134, 1.7984, 1.8084, + 0.9421, 1.7933, 1.7893, 1.7963, 1.7915, 1.7544, 1.7983, 1.7982, 1.6628, + 1.7429, 1.7766, 1.7301, 1.7107, 1.7985, 1.7769, 1.7670, 1.8131, 1.6624, + 1.6756, 1.8100, 1.7682, 1.7307, 1.7146, 1.7440, 1.8184, 1.8075, 1.5879, + 1.7574, 1.8042, 1.8490, 1.8459, 1.7467, 1.8094, 1.7255, 1.7316, 1.7178, + 1.8016, 1.6997, 1.7831, 1.8231, 1.7265, 1.6869, 1.7430, 1.7517, 1.7799, + 1.7093, 1.7564, 1.7116, 1.7531, 1.7908, 1.8358, 1.8037, 1.7656, 1.8286, + 1.7787, 1.7869, 1.7289, 1.7703, 1.8054, 1.7791, 1.6605, 1.7075, 1.8083, + 1.7061, 1.7457, 1.7999, 1.6409, 1.8023, 1.7131, 1.7376, 1.7145, 1.6934, + 1.8714, 1.7531, 1.7853, 1.8259, 1.8281, 1.7963, 1.7558, 1.6699, 1.8304, + 1.7752, 1.8119, 1.7505, 1.7462, 1.7651, 1.7035, 1.7693, 1.7784, 1.8790, + 1.7997, 2.7079, 1.8050, 1.7434, 1.7304, 1.7496, 1.7413, 1.7786, 1.7676, + 1.7633, 1.8048, 1.7498, 1.5717, 1.7788, 1.7032, 1.7021, 1.7289, 1.7154, + 1.7574, 1.7357, 1.7384, 1.8404, 1.7523, 1.7456, 1.6581, 1.7213, 1.7779, + 1.8164, 1.7584, 1.8338, 1.7372, 1.7945, 1.7703, 1.7775, 1.7512, 1.7422, + 1.7518, 1.6974, 1.7084, 1.8513, 1.8187, 1.7494, 1.7221, 1.8195, 1.7825, + 1.7718, 1.7819, 1.7341, 1.8106, 1.7494, 1.7639, 1.7447, 1.7622, 1.7019, + 1.7713, 1.8107, 1.7751, 1.6831, 1.8983, 1.8854, 1.7562, 1.7641, 1.8450, + 1.6962, 1.7789, 1.7653, 1.8096, 1.7490, 1.7509, 1.8375, 1.8035, 1.7372, + 1.8059, 1.8244, 1.6840, 1.7538, 1.7917, 1.8426, 1.7746, 1.8419, 1.7937, + 1.7681, 1.8910, 1.7616, 1.7887, 1.6965, 1.7099, 1.7803, 1.7888, 1.8322, + 1.7731, 1.7062, 1.8104, 1.7924, 1.6989, 1.7703, 1.7718, 1.8199, 1.7933, + 1.7476, 1.7699, 1.7553, 1.7314, 1.7351, 1.8270, 1.7637, 1.7724, 1.6985, + 1.7598, 1.7834, 1.7270, 1.7643, 1.8691, 1.6961, 1.7836, 1.7457, 1.7214, + 1.7549, 1.7503, 1.7444, 1.7038, 1.7202, 1.7286, 1.7643, 1.8116, 1.8197, + 1.7559, 1.8472, 1.7803, 1.6730, 1.8114, 1.7312, 1.8234, 1.8250, 1.7086, + 1.7285, 1.8161, 1.7310, 1.7546, 1.7177, 1.7930, 1.8005, 1.8174, 1.6891, + 1.7722, 1.7018, 1.8242, 1.6458, 1.7265, 1.7661, 1.8259, 1.7962, 1.6896, + 1.7895, 1.7994, 1.7630, 1.7438, 1.7374, 1.7818, 1.7458, 1.7538, 1.7060, + 1.7827, 1.7179, 1.6832, 1.6815, 1.6348, 1.7162, 1.8015, 1.7629, 1.8582, + 1.7548, 1.8259, 1.7654, 1.7764, 1.8435, 1.7540, 1.8566, 1.8117, 1.8390, + 1.7786, 1.6708, 1.7558, 1.7757, 1.6846, 1.8093, 1.7874, 1.7895, 1.8258, + 1.7518, 1.7854, 1.7683, 1.8340, 1.8175, 1.7892, 1.7295, 1.7386, 1.7859, + 1.7975, 1.7490, 1.7827, 1.7883, 1.7735, 1.7281, 1.8743, 1.7000, 1.7167, + 1.7611, 1.7891, 1.7726, 1.7243, 1.7394, 1.7958, 1.6989, 1.8408, 1.7790, + 1.7038, 1.7744, 1.7268, 1.6995, 1.7691, 1.7200, 1.7585, 1.7669, 1.8629, + 1.7698, 1.8005, 1.8482, 1.6710, 1.7339, 1.5718, 1.8037, 1.7726, 1.7195, + 1.7267, 1.7365, 1.7477, 1.7643, 1.7557, 1.7137, 1.6893, 1.8008, 1.7865, + 1.7747, 1.7637, 1.7201, 1.6544, 1.7408, 1.6003, 1.7998, 1.7676, 1.7365, + 1.7003, 1.7455, 1.7690, 1.8151, 1.7240, 1.8377, 1.7835, 1.7003, 1.7420, + 1.8121, 1.7799, 1.7392, 1.7157, 1.7833, 1.8003, 0.4432, 1.7083, 1.8084, + 1.8351, 1.6847, 1.7720, 1.7214, 1.7016, 1.6804, 1.7575, 1.8147, 1.8383, + 1.8066, 1.7622, 1.7447, 1.8649, 1.6914, 1.7502, 1.7756, 1.8313, 1.7522, + 1.7012, 1.7607, 1.7849, 1.6833, 1.7358, 1.7736, 1.8525, 1.8686, 1.7100, + 1.8097, 1.8105, 1.7527, 1.7602, 1.7818, 1.7557, 1.7135, 1.7930, 1.7505, + 1.8375, 1.7433, 1.7744, 1.7698, 1.7585, 1.7979, 1.5292, 1.8328, 1.7411, + 1.7647, 1.8039, 1.7369, 1.7095, 1.7712, 1.7364, 1.7643, 1.7983, 1.8086, + 1.7453, 1.7678, 1.7649, 1.7738, 1.8089, 1.8234, 1.7720, 1.6807, 1.7681, + 1.6920, 1.7764, 1.8613, 1.8168, 1.6981, 1.7320, 1.8520, 1.7266, 1.7517, + 1.8211, 1.7164, 1.7532, 1.7514, 1.8598, 1.7562, 1.7970, 1.7824, 1.6946, + 1.7698, 1.7516, 1.7438])Parameter containing: +tensor([ 1.5714e-01, 1.0521e-01, -2.1855e-01, -3.6352e-02, 1.8207e-01, + 1.7251e-01, 1.3290e-01, 7.0782e-02, -1.9266e-01, -9.2168e-02, + 7.9184e-02, -5.8226e-02, -1.7935e-01, -2.4685e-02, -4.8018e-02, + -9.5424e-02, 2.4386e-02, -5.1330e-02, 3.3999e-02, 3.2014e+00, + -3.9015e-02, 1.1516e-01, 8.0055e-02, 9.1744e-02, 2.1027e-02, + 1.0418e-02, 7.2239e-02, -1.1568e-01, 3.2005e-03, -4.8645e-02, + 1.4117e-01, 7.1536e-02, -6.0574e-02, 1.8058e-01, 7.5935e-02, + -8.4233e-02, 2.0770e-02, -1.0301e-01, -1.0013e-01, 1.5660e-01, + 8.0592e-02, 1.2721e-01, -4.6441e-02, 5.7574e-02, 3.4196e-02, + -2.7855e-03, 1.1291e-01, -4.5029e-03, 4.9145e-02, 5.6455e-02, + 1.2398e-03, -2.5091e-02, 3.1097e-02, -9.4255e-03, 2.5114e-02, + 1.5999e-01, -4.6883e-02, -1.1271e-01, 4.5208e-02, 1.4600e-02, + 1.2989e-01, -1.0197e-01, 1.6056e-01, 8.7824e-02, 1.6033e-01, + 3.4396e-02, 1.3704e-01, 4.1646e-02, -5.3530e-02, 2.4990e-01, + 4.8798e-02, 4.4839e-02, 7.1403e-01, 1.1320e-01, -1.0711e-01, + 1.2396e-01, 1.3501e-01, -6.7984e-02, -8.9051e-03, 9.0761e-02, + 9.5781e-03, -5.4714e-02, -9.0754e-02, 1.3412e-01, -1.9785e-02, + 8.3251e-02, 9.4815e-02, 1.7186e-01, -3.4824e-02, 6.8364e-02, + -2.1312e-02, -4.3523e-02, 3.8732e-02, -6.0781e-03, -7.7346e-02, + 3.1005e-02, 6.6436e-02, -8.4234e-02, -1.1001e-01, 2.1268e-03, + -1.4562e-02, -4.7741e-02, -7.1506e-02, -1.0927e-01, 7.7507e-02, + -1.2546e-02, 1.1501e-01, -5.7693e-02, 5.3100e-02, -1.1730e-01, + -8.1036e-02, -1.0260e-01, -8.1279e-02, 9.5656e-02, 5.0190e-02, + 4.7565e-02, 5.3448e-03, 4.5320e-02, 1.9243e-02, 1.4295e-01, + -3.6603e-02, -3.0097e-01, -1.3997e-01, -6.3057e-02, -2.0284e-01, + -5.2843e-02, -2.4076e-01, 7.0576e-02, 1.0352e-01, -3.0233e-02, + -1.0235e-01, -5.7698e-03, -3.2386e-02, -5.5490e-02, -1.1519e-01, + 8.9923e-02, -7.0423e-02, -5.0296e-02, 1.8792e-02, -9.9913e-02, + -1.2686e-02, 7.4479e-03, 2.6728e-02, 1.4484e-02, 5.6417e-02, + -9.0091e-02, -7.0845e-03, 2.2955e-02, -1.0560e-01, 1.6517e-01, + 3.1767e-01, -1.0263e-01, -1.2595e-01, -6.4043e-02, -9.5674e-02, + -4.9398e-01, -3.6656e-02, -2.0442e-02, -6.8514e-03, 4.0622e-02, + 2.4586e-01, -1.6740e-01, -6.2760e-02, 8.0933e-03, -6.1926e-02, + 1.3707e-02, 3.3186e-03, 8.2177e-02, 5.3861e-02, 1.9900e-02, + -9.7574e-02, 8.0126e-02, 9.0824e-02, -5.0445e-02, -2.1681e-02, + -1.8436e-01, -9.9015e-02, 8.8390e-03, -4.9693e-02, 9.5410e-02, + -7.1627e-02, -4.8957e-02, 1.9217e-01, 1.0617e-01, 6.4168e-02, + -1.4703e-01, 9.9676e-02, 9.1445e-01, -1.4521e-01, -9.8065e-02, + -1.1537e-01, -1.0495e-01, 1.0574e-01, 1.5191e-01, -6.7199e-02, + -3.9562e-02, -2.5574e-02, -2.0092e-01, 5.2279e-01, 3.2230e-02, + -2.6505e-01, 6.5943e-02, 1.2653e-02, 3.2023e-02, 8.7500e-02, + 1.5057e-01, -2.0009e-01, 7.4692e-02, 1.0776e-01, -5.0648e-02, + 6.2093e-02, 3.3582e-01, -3.1111e-02, -9.1817e-02, 3.2584e-02, + -2.9921e-02, -1.3872e-02, 8.5125e-03, -9.8826e-02, -3.3480e-01, + 4.8171e-02, -1.2216e-02, 1.8550e-01, -2.2219e-02, -2.7731e-03, + -2.9300e-02, 6.2133e-02, 2.5511e-02, 1.6478e-01, -5.1522e-02, + -3.3598e-02, 2.7848e-02, 1.7212e-02, 4.1664e-02, -3.3681e-02, + -1.5164e-02, -1.1802e-02, 1.1873e-01, 1.4403e-01, -1.8655e-02, + 3.1523e-02, 1.0407e-01, -3.3324e-02, 1.0530e-01, -8.4562e-03, + 2.3715e-02, -8.4441e-03, -8.1965e-02, 4.1591e-02, -4.1058e-02, + -8.8389e-02, -7.8143e-02, 1.2788e-01, -1.1700e-01, 1.8904e-01, + -3.8862e-02, 3.0045e-02, -1.1734e-01, 7.6597e-02, 3.1204e-02, + -1.0797e-01, -8.3629e-02, 6.1842e-02, 1.9294e-02, -1.1804e-01, + 7.2849e-02, -2.0952e-02, 2.2642e-02, -6.2562e-02, -6.2156e-02, + 6.7846e-02, 9.7213e-02, 1.1358e-01, -1.4457e-01, -3.7113e-03, + 4.6414e-02, -2.3938e-02, 3.0535e-02, 1.3291e-01, -1.3570e-01, + 1.5219e-01, 2.9188e-01, -3.4406e-02, 4.5174e-02, 5.9539e-02, + 3.4760e-02, -7.6875e-04, -3.4697e-02, 1.7763e-01, -9.1222e-02, + -3.2008e-02, 6.8429e-02, 7.8547e-02, 3.7662e-02, 1.3554e-01, + -6.3083e-02, 3.0479e-01, -4.1982e-03, 4.5922e-02, 9.9895e-02, + -7.3645e-03, 1.2549e-02, -2.1003e-02, 2.0596e-01, 5.0304e-02, + -1.2328e-02, 1.4580e-02, -9.2944e-02, 6.6810e-02, -7.2437e-02, + 8.9594e-02, 5.3213e-02, -8.6073e-02, -1.5086e-01, 1.6785e-02, + -8.3192e-03, -1.9318e-02, -5.3071e-03, 2.6348e-01, -2.6442e-02, + -2.1526e-01, 2.4490e-01, -1.1710e-01, 2.5417e-02, -1.3422e+00, + -1.9219e-01, -2.5468e-02, -9.7910e-03, 1.0712e-03, -1.9965e-01, + 2.1429e-01, 2.8356e-03, 1.3562e-01, -7.5370e-02, 1.5262e-01, + 7.8208e-02, 1.3468e-02, 7.6906e-03, -3.6844e-02, -4.3252e-02, + -4.5571e-02, 6.0231e-02, 2.0208e-02, -9.7585e-02, 1.3374e-01, + -3.9385e-03, 8.0897e-02, 1.5606e-01, -1.0525e-02, -3.8102e-02, + -1.0328e-01, 5.2117e-02, -2.4488e-02, 1.3929e-01, -5.5941e-02, + -3.7997e-02, -1.4443e-02, -8.2470e-02, -5.2119e-02, -7.9846e-02, + 2.0052e-02, 8.7459e-05, -8.3340e-02, -1.1757e-01, -4.5237e-02, + 1.9323e-02, 2.7198e-02, 1.7889e-01, 8.7077e-02, 1.5148e-02, + 1.2188e-01, -1.0065e-02, -2.4437e-02, -7.9199e-03, -1.5123e-02, + 1.5032e-01, 1.0752e-01, 2.0341e-01, 3.4366e-02, -9.1777e-02, + -1.4547e-01, 1.8320e-01, 4.5751e-02, 3.7170e-02, -8.5500e-02, + -7.2542e-02, -3.7958e-02, -1.6933e-01, -9.1045e-02, -4.4125e-02, + -7.2687e-04, -3.1537e-02, 1.5086e-02, -4.8495e-02, 6.5625e-02, + -1.6750e-02, 1.9904e-03, -1.1916e-02, -7.7741e-02, 3.0378e-02, + 1.3298e-01, 1.6665e-02, 7.6148e-02, -1.4184e-02, -1.0865e-01, + -2.5864e-01, -9.4924e-02, 5.2374e-02, 4.8062e-02, 4.8401e-02, + -6.1411e-02, 5.2295e-02, 1.0746e-01, -2.6086e-02, -1.8869e-01, + -4.6206e-01, -2.0475e-03, 3.6519e-02, -1.5038e-01, -4.3278e-03, + 4.2967e-02, 1.3432e-02, 4.5682e-02, -8.6488e-02, -1.7540e-01, + 5.3118e-02, -2.6750e-01, -1.5931e-01, 1.2383e-01, 8.0944e-03, + -6.6257e-02, 4.0884e-02, 2.4966e-02, 9.8009e-02, 2.3656e-01, + -6.1301e-03, 3.3331e-02, 9.9844e-02, 1.9059e-01, -1.0462e-01, + 1.1752e-02, -5.6101e-02, -3.5903e-02, 1.7924e-01, 1.7801e-01, + -1.6374e-01, -2.3782e-02, -5.2861e-02, 9.9118e-02, -1.7693e-02, + -3.4212e-02, -3.9081e-02, -2.0267e-01, 1.2076e-01, 9.5575e-02, + -1.0296e-02, -2.5224e-04, -4.2425e-03, -6.1769e-02, -5.4251e-02, + -3.3280e-02, 1.0036e-01, -1.9815e-01, 5.9549e-02, -4.9488e-02, + -9.9051e-02, 7.7852e-02, -1.2828e-01, 1.6435e-01, 1.2599e-02, + -6.9440e-02, -1.6187e-01, -3.3820e-03, 9.6772e-02, 5.1878e-02, + 6.2215e-02, 2.1757e-01, -6.2182e-02, -1.7840e-03, 7.6085e-02, + 1.4915e-01, -5.7066e-02, 4.5222e-02, 2.0680e-01, -1.4390e-01, + 1.7874e-03, -3.0383e-02, 1.8139e-01, 6.8907e-02, -2.5350e-03, + 4.4395e-02, 4.1111e-02, -1.0086e-02, 2.7673e-02, -2.1411e-01, + -7.8112e-02, -1.3029e-01, 9.8681e-02, -9.1540e-02, 6.2187e-02, + 1.4301e-02, -5.2033e-02, 4.9276e-02, 3.3699e-03, -7.4605e-02, + 7.3420e-02, 4.7442e-02, 1.0370e-01, 6.9103e-02, 2.2118e-03, + -7.5054e-02, 2.4609e-03, -2.0037e-01, 9.2002e-02, -1.2598e-01, + -4.0373e-02, 4.0524e-02, 7.1912e-02, 2.4932e-02, 1.0932e-02, + 1.6149e-03, -2.0728e-02, 1.1106e-01, 1.1121e-02, -1.1926e-01, + 1.0912e-02, -3.5671e-02, -9.7672e-03, 2.8253e-02, -1.8453e-01, + 3.7205e-02, 5.7847e-02, 1.4349e-02, -1.2725e-01, -1.2037e-02, + -6.4877e-02, -1.7167e-02, 4.8023e-02, -3.2393e-02, -2.7004e-02, + -6.4756e-03, 7.6627e-02, -3.2907e-02, 9.5477e-02, 4.4782e-02, + 1.4996e-01, 1.3177e-01, 7.9663e-02, 1.2557e-01, 5.1729e-02, + -6.4316e-02, 1.2403e-01, -2.0048e-01, 1.1972e-01, -3.4580e-02, + 5.9819e-02, 1.1675e-01, -2.3962e-02, 7.6439e-03, 1.1763e-01, + -5.1537e-02, 1.5785e-01, 3.0192e-02, 1.2142e-01, 2.1545e-01, + 1.5338e-01, -1.1547e-02, 4.5611e-02, 8.2466e-02, 5.1706e-02, + 1.4715e-01, -6.1603e-03, 3.3504e-02, 2.1085e-02, 3.6481e-02, + 1.1234e-01, 8.1627e-03, 7.1839e-03, -2.7252e-02, -8.4993e-02, + -1.4174e-01, -9.3428e-02, 7.1645e-02, 8.5555e-02, 1.3293e-01, + -1.9959e-02, -1.6840e-02, -9.3083e-02, 9.5655e-02, 3.6715e-02, + -5.1703e-02, 9.1998e-02, 7.1881e-02, -1.0141e-01, 1.2980e-01, + -1.0584e-01, 8.7971e-02, 1.0475e-01, -8.8393e-02, 6.7205e-02, + -5.2151e-02, -1.4961e-01, -5.4824e-02, -5.6715e-02, -6.3596e-02, + -1.0653e-01, 1.2907e-02, 1.4339e-01, 1.5099e-01, 6.3067e-03, + 1.6588e-02, 8.9463e-02, -1.1750e-01, 1.6891e-02, -1.7042e-01, + -1.6635e-01, 5.1580e-02, 1.3655e-01, -2.4035e-02, -1.0723e-01, + 7.4677e-02, 2.4940e-02, 6.3164e-02, -2.1078e-02, 8.4296e-02, + -5.9920e-02, -4.0778e-02, 6.1650e-02, 7.4657e-02, -1.2583e-01, + 1.5846e-01, 5.1025e-03, 1.1199e-01, 1.7501e-01, 9.1035e-02, + 1.1224e-02, -5.2327e-03, 1.1513e-02, 5.1251e-02, -1.2470e-01, + -1.6167e-02, -1.0953e-01, 5.0011e-02, -8.7080e-02, 1.6923e-01, + 2.2695e-02, 3.1640e-02, 1.3928e-01, 1.0300e-01, 2.1505e-01, + 2.4769e-01, 1.1580e-02, -3.5025e-02, 1.4856e-01, -3.9221e-03, + 5.0325e-02, -1.7961e-01, -1.2315e-02, 1.3724e-01, 1.0216e-01, + -9.3912e-02, -2.9979e-02, -1.6285e-01, 7.8192e-03, 8.5767e-02, + -3.3118e-02, -2.1102e-01, -1.0094e-01, -3.0506e-02, -2.1837e-02, + -1.9102e-01, 8.7712e-02, 1.1036e-01, 6.0651e-03, 7.0093e-02, + 1.0984e-01, 9.9566e-03, -7.1757e-03, 4.3886e-02, 5.9905e-02, + 3.3323e-02, -3.7092e+00, 5.5709e-02, 7.9547e-02, 5.5805e-03, + -1.4747e-01, 1.5289e-02, -5.2988e-02, 3.3662e-02, 1.3538e-01, + -2.1147e-01, 7.8032e-02, 6.7690e-02, -3.3204e-02, -1.3344e-01, + 1.4210e-01, 4.8270e-02, -2.6500e-02, 1.0287e-01, -5.5116e-02, + 1.6589e-01, 1.1236e-01, -4.1125e-02, -5.9105e-02, -2.1212e-01, + 3.8394e-02, 1.5368e-01, 4.8188e-02, -9.4478e-02, 2.1040e-02, + -9.0515e-02, 1.4378e-01, 1.5708e-01, -6.8028e-04, 4.4458e-02, + -7.1579e-03, 6.5651e-02, -1.2955e-02, -8.5481e-02, -4.0749e-02, + -8.3394e-02, 3.5046e-02, 1.2930e-02, -1.7214e-01, 9.5684e-02, + -4.2899e-02, -2.3204e-01, 1.1294e-01, -4.2825e-03, 1.6472e-01, + 1.0376e-02, 9.8266e-02, 1.3218e-01, 1.2411e-01, -3.2590e-02, + 3.2784e-02, -8.1404e-02, 6.8053e-02, -7.2287e-02, -9.7887e-02, + 3.1597e-02, -1.8775e-01, -1.8899e-01, 6.3650e-02, -6.3658e-02, + -5.6036e-02, -1.1804e-01, 3.8965e-02, 1.1923e-01, -3.0770e-02, + -6.4427e-03, -4.9890e-02, -9.9548e-02, 9.1358e-03, 1.5483e-01, + 3.1754e-02, 7.0219e-02, 8.9523e-02, -3.5334e-02, -1.6340e-01, + -2.0951e-02, 7.2451e-02, 1.5202e-01, -1.3042e-01, -1.3775e-01, + -7.0584e-02, 1.7459e-01, -1.6711e-03])Parameter containing: +tensor([[-0.0193, 0.0156, -0.0011, ..., 0.0403, -0.0181, -0.0006], + [-0.0301, 0.0043, -0.0099, ..., -0.0052, 0.0136, -0.0069], + [-0.0016, 0.0183, 0.0016, ..., -0.0064, -0.0117, 0.0118], + ..., + [ 0.0070, 0.0163, -0.0119, ..., 0.0026, -0.0226, 0.0221], + [ 0.0004, 0.0120, 0.0207, ..., -0.0106, 0.0029, 0.0323], + [ 0.0079, -0.0005, 0.0047, ..., -0.0068, -0.0219, 0.0219]])Parameter containing: +tensor([-0.3269, -0.2380, -0.3926, ..., -0.2299, 0.2595, -0.2932])Parameter containing: +tensor([[-0.0171, 0.0118, 0.0016, ..., -0.0016, 0.0133, -0.0409], + [-0.0224, -0.0010, -0.0217, ..., 0.0149, 0.0045, -0.0212], + [ 0.0286, 0.0206, -0.0153, ..., -0.0103, -0.0133, -0.0120], + ..., + [-0.0076, -0.0130, 0.0111, ..., 0.0085, -0.0125, 0.0113], + [-0.0247, -0.0079, 0.0172, ..., 0.0136, -0.0062, -0.0172], + [ 0.0397, -0.0172, -0.0138, ..., 0.0265, 0.0010, 0.0029]])Parameter containing: +tensor([-3.6469e-02, 1.6272e-01, -6.0242e-02, -9.7885e-03, -4.4312e-02, + 1.1780e-01, 9.4238e-02, -1.3342e-01, 3.6133e-02, 2.9022e-02, + 2.3518e-03, 5.4352e-02, -2.9572e-02, -6.2134e-02, 7.6782e-02, + 9.3460e-03, -1.0315e-01, 4.0710e-02, 3.5400e-02, -3.4863e-01, + 3.7628e-02, 2.0355e-02, 5.0323e-02, 5.9967e-02, 5.8136e-02, + 6.6589e-02, -3.6346e-02, -2.6657e-02, -1.1108e-01, 5.0079e-02, + 8.3801e-02, -1.4366e-02, -5.9387e-02, 1.3708e-01, 4.3335e-02, + -1.9272e-02, -1.2573e-01, -6.5674e-02, 4.9591e-02, 2.5781e-01, + 2.0385e-04, 2.4597e-02, -3.5156e-02, 2.2919e-02, -5.5237e-02, + 1.8005e-02, 1.1578e-01, -4.3335e-02, -7.9468e-02, -8.9050e-02, + -4.0131e-02, 5.1575e-02, -4.7569e-03, 1.1786e-01, -7.7698e-02, + 9.4681e-03, -1.0657e-01, -5.6641e-02, 5.4321e-02, -5.0873e-02, + -1.8738e-02, -1.2428e-02, -9.0759e-02, -8.9050e-02, -5.0110e-02, + 4.4586e-02, -5.6183e-02, -5.2124e-02, -5.9547e-03, -4.5197e-02, + 7.0312e-02, 1.6470e-03, -1.6479e-01, -2.2324e-02, 2.2812e-02, + -1.2695e-02, -2.7435e-02, 1.3855e-01, 3.5675e-02, -5.5351e-03, + 1.8173e-02, 2.0924e-03, -5.3986e-02, -8.4412e-02, -2.4002e-02, + -6.5918e-02, 1.0815e-01, -1.7395e-01, -1.6382e-01, 8.5640e-04, + -1.9653e-01, -1.1774e-01, 4.2114e-02, 8.8379e-02, 7.2815e-02, + 3.2101e-03, 1.5030e-02, 1.0992e-01, -5.3986e-02, 7.1533e-02, + 5.2582e-02, -1.2061e-01, -4.5990e-02, 1.1414e-01, -7.8918e-02, + -3.4332e-02, 9.1248e-02, -3.1128e-02, 4.1901e-02, -3.0273e-02, + 7.7393e-02, -3.4180e-02, -7.3914e-02, 1.8982e-02, 6.4583e-03, + 9.8450e-02, -2.0462e-02, 8.6426e-02, -8.2397e-02, -8.6975e-02, + 5.4932e-02, -1.2383e-02, -1.1955e-02, 2.8122e-02, -1.9211e-02, + -7.6660e-02, -3.1738e-02, -2.9907e-02, 2.9449e-02, 1.2463e-01, + -2.1194e-02, 3.9032e-02, -5.9479e-02, -8.0994e-02, -7.1533e-02, + 5.0446e-02, -7.1831e-03, -7.8613e-02, 6.6284e-02, 4.1626e-02, + -9.6619e-02, 3.5797e-02, -7.1838e-02, -2.2858e-02, -6.3354e-02, + 4.9255e-02, -5.7800e-02, -5.3650e-02, 1.2215e-02, 9.1003e-02, + 6.5002e-02, -8.4473e-02, 5.0323e-02, 3.8177e-02, -9.4055e-02, + 2.5436e-02, -7.3792e-02, 1.0272e-01, -1.3989e-01, -4.0100e-02, + -2.4506e-02, -3.8269e-02, -1.7441e-02, -8.9722e-02, -1.0199e-01, + 5.2490e-02, 1.0254e-01, -4.8920e-02, 4.8828e-02, 7.8354e-03, + 2.0706e-02, -4.4525e-02, 2.4658e-02, -1.4233e-01, -1.1957e-01, + -3.5522e-02, -1.8994e-01, -1.1090e-01, 8.0688e-02, 6.3293e-02, + -6.2744e-02, -1.0913e-01, -1.6809e-01, -5.3024e-03, 1.7517e-02, + -1.7670e-02, -9.7046e-02, -2.3340e-01, 1.5625e-02, -9.2840e-04, + -1.7593e-02, 3.4454e-02, 1.8631e-02, -5.7312e-02, 6.7017e-02, + 4.2633e-02, 7.8186e-02, -5.2277e-02, 3.5248e-02, -2.8793e-02, + 2.9421e-04, -3.2806e-02, 4.8004e-02, -8.1909e-02, 1.6434e-02, + -1.1469e-01, 1.3763e-02, -8.5510e-02, -1.5454e-01, 7.0435e-02, + 4.8462e-02, -5.6183e-02, -2.0340e-02, 8.4290e-02, 4.1321e-02, + 9.0393e-02, 5.2094e-02, -3.8727e-02, 2.0203e-01, 1.1884e-01, + -2.3590e-02, 1.0632e-01, 2.6199e-02, -1.1391e-02, -1.5572e-02, + 1.0620e-01, -6.2408e-02, 6.1615e-02, 3.4149e-02, 4.5410e-02, + -7.7393e-02, -9.2896e-02, 3.4943e-02, -1.6205e-02, 4.5502e-02, + -8.2947e-02, 2.9266e-02, -7.4120e-03, -6.8398e-03, 1.2091e-01, + 1.1719e-01, -3.0045e-02, -3.6316e-02, 3.4210e-02, -3.4821e-02, + -9.2834e-02, 5.9753e-02, -7.4524e-02, -4.0802e-02, 1.2146e-02, + -2.7390e-02, -4.1107e-02, -3.3539e-02, -6.5063e-02, 1.2199e-02, + 9.4971e-02, -8.0933e-02, 1.7262e-03, -2.5940e-02, 1.6602e-02, + -1.1609e-01, -5.7434e-02, -1.1743e-01, 5.5809e-03, -8.1421e-02, + -1.6357e-01, 2.0802e-04, 6.3354e-02, 2.3861e-03, 3.3966e-02, + 1.7627e-01, -1.3269e-01, 2.1301e-02, 8.4763e-03, 4.2053e-02, + -5.5695e-02, 1.4355e-01, -2.0874e-02, -8.5999e-02, -1.7105e-02, + 7.0618e-02, 2.7496e-02, -1.4026e-01, -2.2705e-02, -8.4534e-02, + 5.2612e-02, -1.0693e-01, 1.2201e-01, -4.8218e-03, 5.1270e-02, + -2.8778e-02, 4.6417e-02, -2.6276e-02, -1.0938e-01, -4.6814e-02, + 4.5929e-02, -1.0254e-02, 8.2214e-02, -5.8228e-02, 9.7656e-02, + 1.7578e-02, 1.2866e-01, 1.9958e-02, 1.3049e-01, -9.2224e-02, + -9.8572e-03, -1.0309e-01, -1.1932e-02, -3.8891e-03, 5.7220e-02, + 2.9099e-02, -5.8716e-02, 3.4912e-02, -4.6539e-02, 3.0609e-02, + -4.7028e-02, -6.7932e-02, 5.2795e-02, -6.7825e-03, 4.4159e-02, + -5.0171e-02, -7.4951e-02, 1.3069e-02, 4.0924e-02, 9.9463e-01, + -1.9760e-02, 7.8918e-02, -1.3257e-01, -4.0253e-02, -5.2856e-02, + -3.4088e-02, 9.1324e-03, -8.1406e-03, 1.1497e-02, 3.5767e-02, + -1.1243e-01, -7.6050e-02, 4.4586e-02, 6.6345e-02, -6.4880e-02, + 2.9888e-03, 9.5337e-02, 1.3599e-01, -1.0956e-01, 5.1697e-02, + 7.1045e-02, 3.2845e-03, 2.7924e-02, -5.4321e-02, -7.1411e-02, + 1.7236e-01, -6.8176e-02, -3.7659e-02, -2.7985e-02, 7.5607e-03, + -1.8616e-02, -8.5678e-03, -4.1016e-02, -2.7161e-02, 1.7371e-01, + -9.4604e-02, -2.1423e-02, -9.6497e-02, 1.0547e-01, -1.2134e-01, + 6.7139e-03, -1.1578e-01, -7.1167e-02, -9.4543e-02, -2.4414e-02, + 3.5763e-03, 2.0584e-02, 1.1566e-01, -2.4231e-02, 4.9744e-02, + -1.6876e-02, 7.9041e-02, -2.6382e-02, 1.2622e-01, -1.6693e-02, + 7.6355e-02, 1.0046e-01, 4.1771e-03, -2.0605e-01, 2.7054e-02, + 4.7913e-02, 1.8906e-02, -6.1096e-02, 6.4026e-02, -3.7659e-02, + -4.2145e-02, 1.6931e-01, 8.7646e-02, -7.4890e-02, 3.7079e-02, + -1.6769e-02, -1.0052e-01, -1.1389e-01, 1.5393e-01, -1.6556e-02, + -8.6975e-02, -6.1737e-02, -1.2402e-01, -1.3084e-02, -2.8839e-03, + -1.0582e-02, -4.9530e-02, 5.4741e-03, 4.7333e-02, -3.1342e-02, + 1.1877e-01, 5.7068e-02, 6.1859e-02, 6.5041e-03, 1.8188e-01, + 6.9092e-01, -2.3331e-02, -5.5878e-02, 3.7659e-02, -1.0307e-02, + -3.2410e-02, 8.8745e-02, -8.8867e-02, -1.4648e-01, -2.7161e-02, + 8.2947e-02, 1.4282e-01, 9.1370e-02, -1.0574e-02, 4.1107e-02, + 1.4062e-01, 1.1792e-01, 1.1810e-02, -9.7290e-02, 5.8228e-02, + 1.3892e-01, -9.9976e-02, 2.5116e-02, 5.3009e-02, -3.7659e-02, + 9.7656e-02, -2.5070e-02, 3.4424e-02, -1.7197e-02, 3.8849e-02, + -1.0315e-01, -8.4961e-02, 5.0842e-02, 4.2175e-02, 5.7770e-02, + -1.8555e-02, -4.3579e-02, -5.0446e-02, 6.9946e-02, -1.0941e-02, + 4.6143e-02, -3.8208e-02, 9.8877e-02, 4.0833e-02, 9.1492e-02, + 2.1210e-02, -7.8125e-02, -7.5378e-02, -1.0114e-01, -4.9042e-02, + 6.9153e-02, 9.6512e-03, 1.8164e-01, 1.7731e-02, 4.1565e-02, + -2.4463e-01, 1.7380e-02, -1.4294e-01, -3.6163e-02, 3.0518e-04, + -1.7105e-02, 1.0443e-01, 6.5308e-03, 8.9905e-02, -6.4331e-02, + -2.5452e-02, -1.5427e-02, -1.6614e-01, -7.2937e-02, 8.9844e-02, + -1.0754e-01, 1.5839e-02, 7.0923e-02, 5.2460e-02, -1.1395e-01, + 5.6427e-02, -3.1311e-02, 9.8450e-02, 1.9730e-02, 3.8934e-04, + 1.5717e-02, -1.4062e-01, -5.0781e-02, 3.6804e-02, -5.0934e-02, + 4.3274e-02, 6.4514e-02, -6.1340e-02, -4.4342e-02, 7.7454e-02, + 3.7567e-02, 2.5925e-02, 1.0706e-01, -5.6976e-02, 1.3954e-02, + -3.6407e-02, -6.9824e-02, -9.9304e-02, 1.1206e-03, -3.4389e-03, + -4.3121e-02, -9.1614e-02, -9.4543e-02, 1.6876e-02, -6.6284e-02, + 9.6497e-02, -7.1594e-02, -4.3732e-02, 9.7839e-02, -1.1487e-01, + -8.4167e-02, -4.0497e-02, 1.3863e-02, 5.8670e-03, 1.7347e-03, + 7.2937e-02, -8.8684e-02, -3.0785e-03, 2.5955e-02, 3.1860e-02, + -9.8572e-03, 7.8369e-02, 3.5614e-02, -4.3091e-02, 4.0100e-02, + 3.4943e-02, 3.1525e-02, 3.8330e-02, -2.8000e-02, -2.6703e-02, + 1.0162e-01, 7.0801e-02, 1.5979e-01, -1.8738e-02, -5.5176e-02, + -6.5369e-02, -5.6244e-02, 1.6575e-03, -1.1670e-01, 6.4148e-02, + 5.0293e-02, 6.1066e-02, -1.0155e-02, -1.2354e-01, 2.2614e-02, + 4.6417e-02, -4.5593e-02, -9.8328e-02, 2.4460e-02, -2.4521e-02, + 3.0594e-02, 4.7913e-02, -5.8441e-02, -1.0048e-02, -7.2823e-03, + 9.5459e-02, -6.4735e-03, -5.1453e-02, 4.0863e-02, 1.2039e-02, + 2.3270e-02, 2.8336e-02, 1.0883e-01, -1.7624e-02, -1.7532e-02, + 3.1891e-02, -7.6904e-02, 4.4441e-03, -9.7046e-02, -3.0914e-02, + 1.3321e-02, -4.2572e-02, 6.9458e-02, 2.3178e-02, 4.8828e-02, + 4.7943e-02, 8.3466e-03, -5.9906e-02, -5.3894e-02, -6.7444e-02, + -7.5867e-02, -5.9174e-02, -1.2718e-02, 2.3300e-02, 4.2236e-02, + 1.3599e-01, 9.7885e-03, -1.0962e-01, 3.4668e-02, -4.9713e-02, + 3.2783e-05, 4.0741e-02, 5.3436e-02, 9.0210e-02, 8.6914e-02, + 7.8857e-02, 1.4172e-01, 2.4078e-02, 1.0687e-01, -6.6589e-02, + -1.5771e-01, -8.2703e-02, 9.8694e-02, -4.8904e-03, -4.0649e-02, + -1.0876e-01, -2.2232e-02, 5.2094e-02, -4.8370e-02, 7.5439e-02, + 1.2535e-02, -6.5674e-02, 2.8503e-02, -2.5620e-02, 1.3664e-02, + 6.7993e-02, -1.0150e-01, -7.6599e-02, 1.1528e-02, 3.7231e-02, + -8.5205e-02, 2.2018e-02, -5.7281e-02, -1.9135e-02, 4.3518e-02, + 8.1604e-02, -1.0181e-01, -3.8147e-02, 1.2436e-02, -8.5632e-02, + 6.4636e-02, -4.8584e-02, 6.6772e-02, -5.1300e-02, 2.3651e-02, + 1.0858e-01, -1.1981e-01, 3.7632e-03, 5.8533e-02, -3.1342e-02, + 1.3403e-01, -9.2773e-02, 9.3323e-02, -2.6443e-02, 8.2092e-02, + -5.4688e-02, 1.0303e-01, -7.1960e-02, -1.4185e-01, -1.2537e-01, + -1.2802e-02, -1.9669e-02, 6.4880e-02, 4.2358e-02, -4.2839e-03, + 1.2189e-01, -6.2828e-03, 3.8605e-02, -1.5038e-02, -9.3140e-02, + 4.7028e-02, 6.0883e-02, -4.2938e-02, 5.2071e-04, 2.1610e-03, + 8.2397e-02, 2.5977e-01, -3.5496e-03, 9.6497e-02, 8.2703e-02, + 5.9357e-02, -1.5783e-03, -8.3984e-02, 3.4790e-02, -5.7556e-02, + -4.6875e-02, 3.5143e-04, 1.0303e-01, 1.6235e-02, -3.5797e-02, + 7.6355e-02, 2.5269e-02, 4.0283e-02, 3.3722e-02, 5.1483e-02, + 8.1909e-02, 4.9896e-02, -6.7932e-02, -1.2335e-01, -6.5613e-02, + 1.2131e-02, -2.9205e-02, -9.3140e-02, -2.8885e-02, 2.0538e-02, + 9.0698e-02, -5.4443e-02, 8.4900e-02, 2.9633e-02, 5.1758e-02, + 1.3763e-02, 6.9519e-02, 6.0608e-02, 1.0583e-01, -2.4429e-02, + -7.5073e-02, -3.4241e-02, 9.9915e-02, -6.2347e-02, 4.7493e-03, + 3.2978e-03, 2.6535e-02, 1.1444e-04, -2.0798e-02, 7.9834e-02, + -4.9347e-02, -1.0492e-01, 1.5045e-02, -1.1493e-01, -2.9953e-02, + -1.4519e-02, -2.4918e-02, 3.1372e-02, 6.4209e-02, 1.0780e-02, + -5.0507e-02, -1.4931e-02, -1.0931e-01, 7.7019e-03, -1.9385e-01, + 4.1168e-02, -5.6366e-02, -7.5562e-02, -6.1890e-02, -1.1023e-01, + 2.9526e-02, 9.0515e-02, -3.5744e-03, 4.0466e-02, 4.5441e-02, + 3.4576e-02, 5.6793e-02, -1.0602e-01, -1.1299e-02, 5.1208e-02, + -4.1046e-02, 9.7198e-03, -6.1462e-02, -1.0052e-01, 8.4076e-03, + -9.4604e-02, 9.8953e-03, 6.1554e-02])Parameter containing: +tensor([2.2836, 2.2623, 2.2728, 2.2751, 2.2677, 2.4896, 2.4848, 2.3706, 2.5277, + 2.2358, 2.3212, 2.3091, 2.3751, 2.3409, 2.3887, 2.2233, 2.3059, 2.3760, + 2.3349, 2.7988, 2.3484, 2.3604, 2.3025, 2.4139, 2.2065, 2.5094, 2.4309, + 2.4903, 2.1988, 2.2597, 2.3186, 2.3134, 2.3185, 2.6018, 2.2067, 2.2691, + 2.3309, 2.2402, 2.2445, 2.7682, 2.2321, 2.3523, 2.3791, 2.2468, 2.3558, + 2.3014, 2.3595, 2.3211, 2.3757, 2.1141, 2.2303, 2.4424, 2.4205, 2.3217, + 2.3337, 2.3350, 2.3622, 2.3076, 2.3821, 2.2884, 2.4378, 2.3676, 2.3492, + 2.2260, 2.3221, 2.2602, 2.3660, 2.3384, 2.4014, 2.7936, 2.4196, 2.3274, + 2.3671, 2.5887, 2.2795, 2.3208, 2.2872, 2.2780, 2.1992, 2.4337, 2.2422, + 2.2682, 2.2171, 2.3948, 2.2670, 2.2343, 2.2981, 2.2693, 2.2724, 2.2160, + 2.3139, 2.3121, 2.3384, 2.4798, 2.3535, 2.2585, 2.2772, 2.3278, 2.3495, + 2.3174, 2.4153, 2.5859, 2.3195, 2.4833, 2.2640, 2.2159, 2.3548, 2.2305, + 2.3506, 2.3109, 2.2594, 2.3875, 2.4088, 2.4293, 2.4857, 2.2573, 2.3575, + 2.3645, 2.4322, 2.2848, 2.3235, 2.3897, 2.2430, 2.3598, 2.3608, 2.1504, + 2.5151, 2.5464, 2.3191, 2.3495, 2.1983, 2.2918, 2.4158, 2.3904, 2.4342, + 2.4049, 2.3032, 2.5024, 2.2924, 2.5428, 2.4114, 2.3645, 2.3113, 2.3470, + 2.2809, 2.2530, 2.2246, 2.3749, 2.3138, 2.5595, 2.3965, 2.3072, 2.4053, + 2.4625, 2.3788, 2.6980, 2.3840, 2.3440, 2.2617, 2.3744, 2.1746, 2.3023, + 2.2086, 2.4053, 2.4884, 2.2782, 2.2567, 2.5410, 2.3193, 2.3158, 2.3928, + 2.4509, 2.0716, 2.5628, 2.2551, 2.4581, 2.3956, 2.2217, 2.2878, 2.3877, + 2.3486, 2.2614, 2.3617, 2.2922, 2.2638, 2.4065, 2.2689, 1.0988, 2.3420, + 2.3585, 2.2917, 2.3827, 2.3319, 2.3020, 2.3903, 2.2736, 2.2319, 2.3806, + 1.3342, 2.3759, 2.3139, 2.2133, 2.2983, 2.4403, 2.3521, 2.4802, 2.5205, + 2.4101, 2.4336, 2.3705, 2.3335, 2.3805, 2.5011, 2.1977, 2.2888, 2.4008, + 2.2075, 2.4126, 2.3203, 2.4452, 2.4104, 2.2892, 2.4073, 2.3455, 2.3511, + 2.3958, 2.4004, 2.3555, 2.2310, 2.3726, 2.2519, 2.2059, 2.2110, 2.2125, + 2.1648, 2.4104, 2.3876, 2.2703, 2.3979, 2.2819, 2.3374, 2.3302, 2.2354, + 2.3461, 2.2734, 2.3462, 2.5224, 2.4833, 2.3291, 2.3517, 2.2777, 2.3085, + 2.3876, 2.3782, 2.3474, 2.3451, 2.4248, 2.3716, 2.6404, 2.3153, 2.3406, + 2.3860, 2.3607, 2.3962, 2.3189, 2.2672, 2.3711, 2.3791, 2.2194, 2.2803, + 2.4083, 2.2761, 2.4173, 2.2845, 2.3444, 2.5596, 2.4811, 2.4737, 2.4499, + 2.4351, 2.3279, 2.7294, 2.2321, 2.3936, 2.2265, 2.3656, 2.2984, 2.2823, + 2.5139, 2.2303, 2.2668, 2.2091, 2.2940, 2.3453, 2.3100, 2.5271, 2.5363, + 2.3898, 2.2919, 2.4632, 2.3279, 2.5609, 2.3796, 2.3439, 2.2990, 2.1915, + 2.4004, 2.2724, 2.3014, 2.3202, 2.2661, 2.4051, 2.3114, 2.4008, 2.2553, + 2.4169, 2.3254, 2.3748, 2.4071, 2.4148, 2.3257, 2.2542, 2.2565, 2.3255, + 1.0368, 2.4136, 2.4344, 2.2713, 2.3418, 2.3693, 2.3610, 2.3290, 2.2906, + 2.3320, 2.4559, 2.2263, 2.3971, 2.2848, 2.2835, 2.3296, 2.3135, 2.3973, + 2.4620, 2.3723, 2.3816, 2.4425, 2.4960, 2.3998, 2.3360, 2.3177, 2.1258, + 2.3319, 2.4550, 2.2678, 2.4137, 2.3452, 2.5231, 2.4271, 2.2389, 2.2411, + 2.3888, 2.3768, 2.4222, 2.3935, 2.3259, 2.3471, 2.3140, 2.4708, 2.3969, + 2.3159, 2.4021, 2.5023, 2.3574, 2.2141, 2.4547, 2.3712, 2.2649, 2.3120, + 2.3954, 2.2454, 2.3630, 2.3928, 2.4318, 2.3398, 2.3917, 2.4447, 2.2607, + 2.3408, 2.3916, 2.3773, 2.3499, 2.4071, 2.3030, 2.4019, 2.2296, 2.4911, + 2.3300, 2.3643, 2.4153, 2.2525, 2.4206, 2.2427, 2.4401, 2.3681, 2.3906, + 2.4328, 2.2356, 2.2297, 2.3566, 2.2836, 2.2859, 2.5026, 2.3523, 2.4348, + 2.6588, 3.7533, 2.3396, 2.2670, 2.2848, 2.4338, 2.3247, 2.2380, 2.2849, + 2.3591, 2.4750, 2.3341, 2.2376, 2.3901, 2.6482, 2.2936, 2.4247, 2.4099, + 2.4332, 2.4702, 2.4999, 2.5136, 2.2909, 2.2548, 2.3399, 2.4379, 2.3890, + 2.1797, 2.2449, 2.4876, 2.4356, 2.2469, 2.3131, 2.5125, 2.3290, 2.3605, + 2.3724, 2.3760, 2.4266, 2.4542, 2.3139, 2.3399, 2.3044, 2.3643, 2.3527, + 2.2571, 2.3809, 2.3492, 2.3256, 2.3224, 2.2221, 2.3733, 2.3437, 2.1776, + 2.3822, 2.3127, 2.3458, 2.4210, 2.4154, 2.3127, 2.5117, 2.3292, 2.3855, + 2.3198, 2.4858, 2.2322, 2.3392, 2.1891, 2.4567, 2.4760, 2.2336, 2.4192, + 2.2877, 2.2794, 2.3042, 2.2707, 2.3126, 2.2663, 2.4194, 2.3827, 2.2848, + 2.2669, 2.3965, 2.3590, 2.2713, 2.3002, 2.3128, 2.3614, 2.3517, 2.3291, + 2.5153, 2.2987, 2.3548, 2.3768, 2.3937, 2.2768, 2.4841, 2.3234, 2.1443, + 2.2820, 2.2877, 2.2695, 2.4717, 2.3183, 2.2493, 2.6564, 2.2307, 2.3746, + 2.3745, 2.3673, 2.2790, 2.3275, 2.3607, 2.3602, 2.3226, 2.5189, 2.3797, + 2.3876, 2.2800, 2.4039, 2.2463, 2.3176, 2.3052, 2.4909, 2.5803, 2.4538, + 2.3132, 2.3479, 2.4377, 2.3054, 2.5416, 2.4145, 2.2907, 2.3398, 2.4638, + 2.3699, 2.3819, 2.2982, 2.4746, 2.5282, 2.3545, 2.3307, 2.3648, 2.3536, + 2.3116, 2.3079, 2.3169, 2.6240, 2.2975, 2.3685, 2.4311, 2.3693, 2.4842, + 2.3711, 2.3410, 2.5237, 2.3753, 2.3804, 2.1967, 2.4785, 2.3339, 2.3392, + 2.4601, 2.2095, 2.4320, 2.2722, 2.2876, 2.3902, 2.3450, 2.3190, 2.3629, + 2.3394, 2.3433, 2.3516, 2.2867, 2.3276, 2.3313, 2.3305, 2.3285, 2.5351, + 2.4824, 2.2602, 2.3945, 2.3668, 2.3317, 2.1739, 2.2305, 2.4108, 2.3467, + 2.2079, 2.4747, 2.2943, 2.3790, 2.6729, 2.5252, 2.3475, 2.3181, 2.2528, + 2.2801, 2.3034, 2.2967, 2.6657, 2.4441, 2.3536, 2.4201, 2.3460, 2.4714, + 2.2284, 2.4259, 2.3764, 2.4271, 2.5601, 2.4350, 2.3774, 2.3004, 2.3895, + 2.4669, 2.4508, 2.3371, 2.3227, 2.5032, 2.3196, 2.3379, 2.3778, 2.3761, + 2.1820, 2.3314, 2.1958, 2.2008, 2.4098, 2.5979, 2.2799, 2.3461, 2.4614, + 2.3359, 2.3336, 2.6203, 2.4267, 2.2657, 2.3032, 2.3602, 2.2581, 2.4675, + 2.2453, 2.3804, 2.2823, 2.1461, 2.2759, 2.5891, 2.2736, 2.2670, 2.2639, + 2.2100, 2.3349, 2.5720, 2.3157, 2.2162, 2.3284, 2.3788, 2.2535, 2.2463, + 2.3431, 2.4069, 2.3624, 2.4723, 2.2752, 2.4635, 2.9034, 2.2371, 2.4090, + 2.2496, 2.3304, 2.3411, 2.2727, 2.4774, 2.3166, 2.3610, 2.3738, 2.3590, + 2.3766, 2.2283, 2.3738, 2.3774, 2.3004, 2.4230, 2.3889, 2.3237, 2.2233, + 2.4146, 2.3798, 2.4000, 2.4473, 2.4295, 2.2930, 2.1776, 2.4433, 2.2065, + 2.3386, 2.4510, 2.4059, 2.2630, 2.3705, 2.3392, 2.3462, 2.4111, 2.3758, + 2.4813, 2.3263, 2.3184, 2.4310, 2.4148, 2.3368, 2.3599, 2.3383, 2.2286, + 2.3814, 2.3993, 2.3483, 2.3905, 2.3331, 2.3333, 2.2417, 2.4162, 2.2706, + 2.3414, 2.3442, 2.2891, 2.6559, 2.2701, 2.2298, 2.2341, 2.2531, 2.4576, + 2.3354, 2.2710, 2.1691, 2.3822, 2.3545, 2.3533, 2.2163, 2.4231, 2.4164, + 2.2920, 2.3275, 2.4484, 2.2593, 2.3305, 2.3374, 2.3359, 2.3922, 2.3363, + 2.3097, 2.3412, 2.3888])Parameter containing: +tensor([ 8.6604e-02, -2.9286e-01, 1.9763e-01, -1.2163e-01, -3.8894e-01, + -2.1842e-01, -1.2438e-01, 1.4834e-01, 6.4301e-02, -7.7811e-02, + -1.3064e-01, -8.3709e-02, 1.7816e-01, -2.4458e-01, 5.9149e-02, + -2.5113e-02, 8.0249e-02, -7.7466e-02, -3.5921e-01, -3.3318e-01, + -2.8440e-01, 5.6322e-02, 2.3694e-01, -3.0856e-01, 1.0845e-01, + 1.2182e-01, -2.4052e-01, 1.5255e-01, 3.2016e-02, -7.5441e-02, + 4.5930e-02, 7.0053e-02, 1.4111e-01, -4.6860e-01, 8.0392e-02, + 1.4536e-01, -9.4315e-02, 3.2234e-02, 2.7123e-01, -5.3386e-01, + -1.3102e-01, 8.6262e-02, 1.8063e-01, -4.6477e-02, -3.9122e-02, + -2.0202e-01, -2.6970e-01, -7.5590e-02, 1.5426e-01, 1.8237e-01, + -1.1917e-01, -3.6866e-01, -1.2210e-01, -1.5103e-02, -1.8397e-01, + 5.8603e-01, 1.4954e-01, -4.9178e-03, -4.2942e-02, 1.1036e-01, + 3.8296e-04, 3.2313e-01, 1.6136e-01, -1.1096e-02, 2.0610e-01, + 3.6640e-02, -2.3877e-02, -5.1362e-02, -4.5582e-01, -8.1244e-01, + -8.7015e-03, -2.3145e-01, 1.4538e-01, -4.8757e-01, -9.1498e-02, + -2.2466e-01, 2.7441e-01, -2.5719e-01, -1.5121e-01, 4.0123e-01, + 1.6970e-01, -1.8025e-01, 2.7369e-03, -5.2702e-02, -2.5943e-01, + 1.9232e-01, 2.6753e-04, 1.2177e-01, 2.5853e-01, -1.5520e-01, + 1.1621e-01, -2.7038e-02, -1.6476e-01, 1.5174e-01, -5.6310e-02, + -6.6330e-02, -2.3552e-01, -1.8034e-01, -7.3568e-02, 1.8432e-01, + -3.8841e-01, -2.1282e-01, -9.6584e-02, -2.2054e-01, 2.3119e-01, + 7.7053e-02, -5.6909e-02, -1.3414e-01, 8.0427e-02, 2.1012e-01, + 2.4924e-01, 1.1686e-01, -1.3254e-01, 5.9697e-02, -1.7765e-01, + -1.6437e-01, 3.3616e-01, 8.5083e-03, -2.5418e-02, 2.6027e-01, + -1.6043e-01, 8.2424e-02, -1.3150e-01, -5.3904e-02, 1.0872e-01, + 1.5643e-01, 1.3161e-01, 5.1541e-01, -1.5884e-01, -1.8408e-02, + 1.2766e-01, -8.4682e-02, 3.3091e-01, 1.6210e-01, 2.0954e-01, + 1.3627e-01, -1.8098e-01, -2.8791e-01, 1.5007e-01, -4.2789e-01, + 1.8015e-01, 1.0072e-01, 2.2316e-01, 1.5218e-01, 1.0389e-01, + -1.3575e-01, 1.2173e-01, -3.1166e-02, -6.5930e-02, -2.7193e-01, + -7.4922e-02, 1.2258e-01, 2.0601e-01, -3.3073e-01, -2.0484e-01, + 6.6838e-01, 1.8386e-01, -4.5000e-01, 7.0386e-02, -1.9580e-01, + 1.2700e-01, -8.3763e-02, 4.0264e-03, -5.8593e-02, 2.2412e-01, + 1.7506e-04, 1.8127e-01, 7.3059e-02, 1.8272e-01, -7.2565e-02, + 6.6788e-02, -2.5547e-01, -2.3021e-01, 6.6056e-01, 7.9189e-03, + 6.3060e-02, 2.2812e-01, 5.6355e-02, -1.2058e-01, 5.2562e-02, + -4.2476e-01, 2.2737e-02, -2.9150e-02, 8.7327e-02, -1.3205e-01, + 5.9143e-01, 3.7196e-02, 1.3226e-01, 1.4421e-01, -2.7715e-02, + -4.6107e-01, 1.6758e-02, 1.8647e-01, 6.3556e-02, -3.9387e-02, + 4.3825e-02, 6.2616e-02, -1.9770e-01, -4.2564e-01, 3.2139e-01, + 1.8278e-02, 2.9093e-02, 1.3184e-01, -1.8062e-01, 3.7262e-02, + 5.3266e-02, 2.6615e-01, -8.8052e-02, -4.1996e-02, -3.9655e-01, + 1.2197e-01, -7.8418e-02, -5.1390e-01, -7.3780e-02, 1.8691e-01, + -7.2425e-02, -5.5631e-02, -1.4905e-01, -2.6188e-01, 3.4893e-02, + 3.2901e-01, -1.4541e-01, -4.0933e-01, -1.2446e-01, 4.5600e-02, + 6.2080e-02, -2.2978e-01, 1.6788e-02, -1.0107e-01, 1.8279e-01, + -7.4715e-02, 1.4252e-01, 1.1494e-01, 1.2269e-01, -2.4501e-01, + -2.5672e-01, 2.9405e-01, 9.4859e-02, -3.2469e-03, -1.3721e-01, + -1.6615e-01, 2.3003e-01, 2.1743e-01, -3.5526e-01, -1.8183e-01, + 5.2396e-03, -3.8641e-01, 2.7500e-02, -2.1637e-01, 2.3685e-01, + -1.1876e-01, 7.4968e-02, 3.1369e-01, 6.9360e-02, -1.0279e-01, + -2.0016e-01, 3.0319e-01, -4.0203e-01, -4.8866e-01, -3.3533e-01, + -4.9880e-02, -1.4934e-01, 1.5540e-01, 3.2154e-02, 7.3181e-02, + 6.0271e-02, 1.6326e-01, 6.9507e-03, 1.4214e-01, -6.3119e-02, + -2.9987e-01, 7.6350e-02, 3.7965e-02, 1.5102e-03, 1.6799e-01, + -2.3439e-01, -2.9438e-01, 2.4812e-01, -1.9865e-01, -8.4152e-02, + 5.5912e-03, -8.7288e-01, 1.2965e-01, 3.3379e-01, 2.3957e-01, + -1.5091e-02, -1.2882e-01, -8.1023e-02, -1.2267e-01, 1.1010e-01, + -2.1774e-01, 3.1341e-02, 1.2947e-01, 1.6552e-03, 5.0242e-03, + -4.7059e-01, -1.4026e-01, -6.4873e-02, 2.3260e-01, -2.9756e-01, + -3.5772e-01, -3.4365e-01, -3.4945e-01, -1.9062e-01, 4.4129e-01, + 8.3628e-02, -7.4316e-02, 1.8464e-01, -1.5011e-01, -1.9917e-01, + 7.2023e-02, -1.5736e-01, 1.1195e-01, -1.0936e-03, 2.1782e-02, + 1.7796e-01, 2.2404e-01, -7.0521e-02, -1.3729e-01, 9.3621e-02, + -4.7864e-01, -2.9099e-02, 9.3240e-02, -1.6473e-01, -1.7154e-01, + -1.6437e-01, -3.6914e-01, 2.9099e-01, 1.8210e-01, 9.8001e-02, + -1.9131e-02, -6.1023e-02, -2.6954e-02, 2.3269e-02, -1.0748e-01, + 2.3885e-02, -3.0717e-01, 7.0289e-03, -2.9415e-01, 7.8834e-02, + 2.2555e-01, -1.4750e-01, -4.4716e-01, 3.1579e-01, -2.2415e-01, + 2.3326e-01, -3.4718e-01, -8.0136e-02, -4.9056e-02, 1.8268e-01, + 1.2392e-01, -9.0308e-02, -1.1581e-01, 3.6533e-02, 1.3277e-01, + 1.6580e-01, -2.9610e-01, 1.5124e-02, 3.1983e-02, -9.6834e-02, + 4.0151e-02, -2.6227e-02, 6.1991e-02, -3.5487e-01, 1.0990e-01, + -3.1649e-02, -6.0119e-02, 1.9958e-01, 4.1921e-02, -4.4497e-02, + -1.2652e-01, 1.2113e-01, -3.8615e-02, -5.6424e-02, -1.0027e-01, + 2.4490e-01, 5.1361e-02, 3.0297e-01, -9.0036e-02, -1.8055e-01, + -1.3455e-01, -2.0498e-01, -2.2928e-01, 1.1047e-01, 6.0962e-02, + -4.5831e-01, -2.5677e-01, 3.0335e-01, 1.6979e-01, 5.4195e-03, + 1.8430e-01, -4.2833e-01, 1.5375e-01, 7.8641e-02, -2.6052e-01, + 4.1466e-03, -1.3320e-01, 5.0047e-02, -1.3446e-01, -5.9763e-02, + -2.1034e-01, 1.8951e-01, -1.7249e-01, -2.8103e-01, -5.9848e-02, + 8.7397e-02, 3.8154e-02, 1.6206e-02, -9.0562e-02, 3.1392e-01, + -2.5056e-02, -4.6812e-01, -1.3958e-01, 7.6124e-02, 1.3683e-01, + -1.4261e+00, 2.3469e-02, -4.5264e-02, 8.6404e-02, 1.7327e-01, + 9.3496e-02, -6.0595e-02, 8.4582e-02, 2.0683e-01, -2.6103e-02, + 1.3176e-01, -2.3246e-01, 6.2243e-02, -2.7171e-01, 3.5642e-02, + -4.2095e-01, -2.5752e-01, -1.3984e-01, -1.1365e-01, -7.5260e-02, + -8.9288e-02, -5.2441e-03, 1.1592e-01, -7.2560e-02, -8.0440e-02, + -1.6767e-01, 1.1599e-01, 5.8748e-02, -3.3971e-01, -2.0054e-01, + 5.3322e-02, 2.9757e-01, 3.9025e-01, -1.4760e-01, -1.7380e-01, + -5.3329e-02, -2.1588e-02, -1.7357e-01, -1.0301e-01, -1.2302e-01, + -1.8281e-01, -1.6062e-01, -2.5224e-01, 4.3526e-02, -2.0653e-01, + -1.2789e-01, -4.3447e-02, -1.1119e-01, 1.6140e-01, -1.3325e-01, + 1.7198e-01, -1.1098e-01, 9.2506e-02, -9.1265e-02, -4.8381e-02, + 1.0725e-01, 2.3545e-02, -1.3973e-01, 1.8025e-01, -2.7206e-01, + -2.6945e-01, -4.6276e-02, -5.3086e-01, -2.1206e-03, -5.7141e-03, + 2.9229e-02, 4.3049e-04, 3.9786e-01, 1.9325e-02, -1.4418e-01, + 3.4156e-02, -3.8111e-02, -6.2930e-03, -7.2592e-02, 1.1503e-01, + -1.9450e-01, -7.8893e-02, -4.4225e-02, -1.5106e-01, -1.5963e-01, + -1.8364e-01, 1.1715e-01, -9.9646e-02, -3.7290e-02, -3.4496e-02, + 1.2110e-01, -4.7361e-03, 7.1833e-02, -2.6530e-02, -1.9589e-01, + 2.0624e-01, -1.1726e-01, -6.6253e-02, 1.3797e-01, 9.6431e-02, + -3.9810e-01, 1.4199e-01, -1.7143e-01, -1.1868e-01, -2.3157e-01, + 2.4837e-01, 3.2089e-01, -2.0151e-01, -1.6186e-01, 5.8250e-01, + 2.3087e-02, -2.6994e-02, -7.5188e-02, 1.6035e-01, -9.8618e-02, + -6.2415e-02, -2.8756e-02, 1.6325e-01, -2.2189e-01, 1.2451e-01, + -3.8018e-02, 1.2878e-01, -6.3290e-02, -5.8006e-02, -1.2467e-01, + -3.6006e-01, 1.5278e-01, -1.8236e-01, 3.0076e-01, -2.6767e-01, + 2.2054e-02, 1.0192e-02, 4.0691e-02, 3.1209e-01, 3.8954e-01, + -3.6524e-02, -3.8531e-01, -4.2814e-01, -1.7692e-01, 1.1740e-01, + 7.4477e-02, 2.0804e-02, 2.5254e-01, -1.4224e-01, -1.9235e-01, + 3.5380e-01, 1.1057e-01, -1.0711e-01, -1.6409e-01, -1.2642e-01, + -1.1363e-01, -7.9586e-01, 8.1844e-02, 1.3153e-01, -4.1466e-02, + 1.7913e-01, -3.3507e-01, 3.3697e-03, 6.7601e-02, -2.2027e-01, + -6.6688e-02, -1.2807e-02, 1.7038e-01, -3.2307e-01, 2.0768e-01, + 2.2008e-01, -9.4049e-02, -1.8320e-01, -1.4369e-01, 2.4456e-01, + 2.0687e-01, -2.6573e-01, -1.9569e-01, 2.2473e-01, 1.6415e-01, + -3.9568e-02, 8.5480e-02, -3.2554e-02, -4.2158e-02, -9.3549e-02, + -3.7789e-02, -1.6005e-01, 1.0470e-01, -3.8674e-01, -9.9921e-02, + 2.2348e-01, 2.8183e-01, -1.1313e-01, 1.7603e-01, -6.7394e-02, + 1.0005e-01, 1.6291e-01, 7.7057e-03, -1.8000e-01, 1.7484e-01, + -7.1115e-02, -3.3143e-02, -6.5741e-01, -2.4860e-01, 2.0984e-01, + -2.1012e-01, -1.2822e-01, -2.5784e-01, -1.0285e-02, 1.4045e-01, + 3.4105e-01, 5.2854e-01, -5.3262e-02, -2.7384e-01, 1.2843e-01, + -2.7380e-02, 1.8670e-02, -1.9795e-01, 2.8344e-01, -5.1208e-02, + 4.4121e-01, 3.3801e-03, 3.1375e-01, 9.6573e-02, -1.9746e-01, + -2.7762e-01, 3.9339e-01, 7.2044e-02, -8.3836e-03, -3.9421e-01, + -8.8247e-02, 9.3472e-02, -1.6321e-01, -3.0705e-02, -1.7254e-01, + -3.9066e-02, 5.7862e-02, -3.4114e-02, 2.1111e-01, -4.5305e-01, + 1.8462e-01, -8.1985e-02, -1.9718e-01, -2.1379e-02, 7.6429e-02, + -4.3138e-02, -2.3068e-01, -1.6621e-02, -3.2513e-01, 5.3912e-02, + -1.1320e-01, 3.4280e-01, 2.6733e-02, -2.1735e-01, -2.3797e-01, + 1.5833e-01, -1.4166e-02, 3.5680e-01, 1.5322e-01, -8.9745e-02, + -1.9738e-01, 1.4938e-01, 9.4129e-02, 2.2862e-01, 8.3815e-02, + -2.7415e-01, 2.5166e-02, -3.1954e-01, 3.3088e-01, 3.3316e-01, + 7.9626e-02, 2.1199e-01, 3.0189e-01, 7.0640e-02, 1.4281e-01, + -2.7548e-01, 1.6440e-01, -2.1965e-01, -3.7695e-01, 1.1076e-01, + 3.7800e-02, 7.4885e-02, -3.4464e-02, 1.4313e-01, -6.0915e-02, + -2.3558e-01, -9.9578e-02, -2.5076e-01, -2.4921e-01, -2.6538e-01, + 4.9933e-02, -1.5339e-01, -1.0247e-01, -1.9087e-01, 1.2254e-01, + 3.0902e-01, -1.7322e-01, -9.5716e-02, -1.0661e-01, -1.5046e-02, + 3.5670e-01, -2.1956e-01, 1.7981e-01, 2.0513e-02, -2.8031e-01, + -1.6479e-01, 1.0474e-01, -3.1772e-01, -1.5772e-01, 3.5148e-01, + 3.1563e-01, -1.5615e-02, 2.0267e-01, -4.0184e-01, 1.1223e-01, + 1.9680e-01, -7.8862e-02, -1.6144e-02, 1.5175e-01, -3.1454e-02, + -5.4835e-02, -7.4258e-02, -9.8505e-03, -9.8693e-02, -2.3574e-02, + -1.9845e-01, 1.3589e-02, -5.6328e-02, 1.8877e-01, -1.1442e-01, + 1.5472e-01, -9.2352e-02, -2.1861e-03, -5.0178e-01, -1.4711e-01, + 2.4483e-02, 1.7381e-01, 1.1555e-01, -1.1969e-01, 4.7179e-01, + -4.1360e-01, 5.9100e-02, 1.4711e-01, 3.4214e-01, 4.1288e-02, + 2.5513e-01, -1.7110e-01, -1.9608e-01, -4.2552e-02, -2.5904e-01, + -1.2025e-01, -2.1390e-01, -1.7674e-02, 3.4328e-01, 3.1743e-02, + 2.8413e-02, -1.5858e-01, 2.3370e-02, 2.3553e-01, 8.7197e-02, + 8.3802e-02, -1.1817e-02, 1.2176e-01])Parameter containing: +tensor([[ 2.9816e-02, 4.1199e-03, 8.1406e-03, ..., 1.6235e-02, + -2.6321e-02, 4.1542e-03], + [-9.5673e-03, -3.6621e-02, -5.4779e-03, ..., -1.4587e-02, + 9.2392e-03, -7.4482e-04], + [ 7.9727e-03, -1.2749e-02, 1.3336e-02, ..., -3.6591e-02, + -5.0735e-04, -1.6289e-03], + ..., + [ 2.0859e-02, -7.8630e-04, -1.1818e-02, ..., 7.7069e-05, + -3.9337e-02, -8.6823e-03], + [-9.7809e-03, -6.9389e-03, -4.0497e-02, ..., 1.0925e-02, + -5.8136e-03, 1.8625e-03], + [-2.3834e-02, -9.3536e-03, -4.1656e-03, ..., 1.7807e-02, + -1.5495e-02, -1.8188e-02]])Parameter containing: +tensor([ 0.1714, 0.2435, 0.2001, ..., 0.0595, 0.0106, -0.0736])Parameter containing: +tensor([[-0.0021, 0.0144, -0.0298, ..., -0.0013, -0.0110, -0.0228], + [-0.0030, -0.0152, 0.0158, ..., 0.0002, 0.0506, -0.0149], + [ 0.0067, -0.0117, -0.0151, ..., -0.0057, -0.0125, -0.0014], + ..., + [ 0.0228, 0.0216, 0.0058, ..., 0.0212, -0.0056, 0.0391], + [ 0.0132, 0.0172, -0.0291, ..., -0.0060, -0.0128, -0.0266], + [-0.0192, -0.0129, 0.0062, ..., -0.0020, 0.0054, -0.0218]])Parameter containing: +tensor([-3.2935e-01, 3.8574e-01, -1.5894e-01, -7.9712e-02, -1.2671e-01, + -1.6199e-01, 1.0590e-01, -2.9614e-01, 2.3022e-01, 3.6469e-02, + -5.2643e-02, 3.4033e-01, 1.4880e-01, -8.3801e-02, 4.8340e-01, + 1.7334e-01, -1.7029e-01, 8.2764e-02, -1.4929e-01, -2.7979e-01, + 1.2170e-01, 1.6174e-01, 2.1887e-01, -5.0903e-02, 2.0105e-01, + 1.3806e-01, -1.4636e-01, -3.9154e-02, -3.9307e-01, 1.2183e-01, + 1.6663e-01, -1.3965e-01, -9.2239e-03, -1.0297e-01, -5.9967e-02, + -4.0710e-02, -1.7126e-01, -1.7651e-01, 2.1472e-01, 1.6138e-01, + -1.6394e-01, 4.7821e-02, 2.2400e-01, -2.2632e-01, -3.0811e-01, + -1.8933e-01, -5.2704e-02, -1.0468e-01, -1.3489e-01, -1.2469e-01, + -1.3416e-01, 1.6504e-01, 3.1934e-01, 3.4814e-01, -1.9519e-01, + 6.3171e-02, 1.2305e-01, 7.3242e-02, -1.4343e-01, -9.5154e-02, + 2.2620e-01, 1.8762e-01, -2.3962e-01, -2.3669e-01, -2.3727e-02, + 2.4072e-01, -2.9541e-01, -1.2622e-01, -1.2805e-01, -8.8806e-02, + 3.1689e-01, 7.8674e-02, -5.3711e-02, -2.3657e-01, -9.0271e-02, + -2.0654e-01, -7.9346e-02, 3.0127e-01, 3.1299e-01, -8.1024e-03, + 7.9468e-02, 2.3962e-01, -5.1514e-02, -2.0771e-03, -4.1168e-02, + -4.1235e-01, 3.2129e-01, -4.1333e-01, -1.4758e-01, -5.6732e-02, + -2.8223e-01, -1.7761e-01, -1.2903e-01, 4.8755e-01, 2.5684e-01, + -1.8066e-02, -6.8970e-02, 8.1299e-02, 1.1688e-01, 1.9360e-01, + -3.4962e-03, -2.7197e-01, -1.5454e-01, 2.5955e-02, -1.4087e-01, + -6.9824e-02, -1.1560e-01, 2.2192e-01, 2.2375e-01, -2.6562e-01, + 1.5686e-01, 1.7334e-01, -4.9927e-02, 1.7310e-01, -5.7373e-03, + 2.4084e-01, 1.3684e-01, 2.2937e-01, -5.5618e-03, -2.5342e-01, + 1.3458e-02, 1.4160e-01, 2.2644e-01, 1.7688e-01, 2.6855e-02, + -7.4097e-02, 1.1395e-01, 6.5613e-03, -1.8433e-02, 4.7803e-01, + 2.7612e-01, -1.6211e-01, 1.3245e-01, 2.4368e-02, 1.6711e-01, + -6.8909e-02, 2.5781e-01, -3.1079e-01, 4.4128e-02, 2.2339e-01, + -1.6638e-01, 8.4900e-02, 1.5526e-02, 8.4290e-02, -3.7256e-01, + 3.8208e-01, -6.8848e-02, -2.6880e-01, -1.7639e-01, 2.2156e-01, + -2.9980e-01, -6.5430e-02, 2.6904e-01, -1.6040e-01, -4.2749e-01, + 1.1218e-01, -3.0786e-01, 8.3313e-02, -3.7134e-01, -3.5669e-01, + -2.5684e-01, -2.1765e-01, 2.4658e-01, -2.2327e-01, 3.3081e-02, + 1.3037e-01, 8.6731e-02, -3.2007e-01, -7.5500e-02, 1.8738e-02, + 2.0117e-01, -2.1680e-01, -8.2779e-03, 6.8665e-02, -1.8115e-01, + 2.6733e-01, -2.4634e-01, -2.5977e-01, 4.7852e-01, 1.2781e-01, + -6.1523e-02, -2.2485e-01, -3.7256e-01, 1.5030e-02, 5.9937e-02, + 1.1444e-01, -1.5002e-01, 3.1226e-01, 2.8613e-01, 2.8467e-01, + -9.7427e-03, 7.5317e-02, -2.1643e-01, -3.8391e-02, 3.1201e-01, + 1.2268e-01, 1.2854e-01, 1.3464e-01, -1.2109e-01, 2.0679e-01, + -2.4979e-02, -2.1936e-01, -2.6535e-02, 4.0802e-02, -2.4365e-01, + -4.5605e-01, -2.5177e-02, -1.5369e-01, -4.3481e-01, 9.3689e-03, + 7.9773e-02, -2.8027e-01, -7.6965e-02, 2.0410e-01, 2.3254e-01, + 2.7420e-02, 2.3975e-01, -6.6101e-02, 3.1128e-01, 5.5371e-01, + 1.3611e-01, 1.3748e-02, -1.9287e-02, 2.3083e-01, 1.9031e-01, + 1.8201e-01, -2.6672e-02, -4.7394e-02, 1.4185e-01, 2.6318e-01, + -3.7036e-01, -1.1749e-01, 3.3057e-01, 4.7821e-02, 1.2482e-01, + -7.0374e-02, 9.2957e-02, -7.6599e-02, -2.2131e-01, 4.1309e-01, + 3.6768e-01, -2.0154e-01, 2.7802e-02, 2.9404e-02, -1.5945e-02, + -3.2861e-01, 1.3159e-01, 5.8411e-02, -1.3281e-01, 2.9712e-01, + 1.4697e-01, 1.9373e-01, 5.9692e-02, 1.2610e-01, -1.6589e-01, + 3.5229e-01, 7.0496e-02, -3.3539e-02, -1.2396e-01, -1.1200e-01, + -2.5620e-02, -6.9824e-02, -9.5642e-02, 3.2251e-01, -2.1790e-01, + -4.8682e-01, 9.0942e-02, 3.2013e-02, 1.1603e-01, 1.4124e-01, + 1.4026e-01, -1.9885e-01, -2.9688e-01, -8.8440e-02, 1.8298e-01, + -1.5906e-01, -6.6345e-02, 6.5002e-02, -4.0576e-01, 1.2195e-01, + 9.2163e-02, -1.6528e-01, -1.2659e-01, 1.1407e-01, -3.2495e-01, + 1.0181e-01, -1.0797e-01, 2.7124e-01, -2.8027e-01, 2.2083e-01, + -1.7456e-01, 2.1265e-01, 1.8823e-01, -1.3452e-01, -2.2668e-01, + 1.1368e-02, -3.7207e-01, 3.0151e-01, -5.6763e-02, -1.4917e-01, + -3.9459e-02, 2.6147e-01, 8.5144e-03, -4.8523e-02, -1.7188e-01, + 2.4384e-02, -5.7800e-02, 2.4365e-01, -2.8223e-01, 2.0477e-02, + 6.5613e-02, -2.3010e-01, 3.4448e-01, 2.5659e-01, 2.1820e-02, + -3.4253e-01, -2.7881e-01, 3.2690e-01, -2.0728e-01, -5.1956e-03, + 6.8726e-02, -1.9824e-01, 1.7322e-01, -7.4036e-02, -9.8877e-01, + 8.1177e-02, -1.3452e-01, -2.7539e-01, 4.3793e-02, 1.9852e-02, + -2.7661e-01, -2.0767e-02, -8.2031e-02, 2.9564e-05, -2.8589e-01, + -2.9395e-01, -3.1201e-01, -4.1626e-02, 3.5938e-01, 1.5515e-01, + 5.5878e-02, -7.8583e-03, 3.3386e-02, 1.8826e-03, -8.0444e-02, + -4.8065e-02, -1.8005e-01, -7.5500e-02, -2.7313e-02, 8.1787e-02, + 3.3887e-01, 9.7046e-02, 5.3497e-02, -5.3772e-02, -1.6541e-01, + 1.7053e-01, -1.5942e-01, -6.8054e-02, -1.9263e-01, 4.4238e-01, + -3.9648e-01, -1.4990e-01, -6.9771e-03, 2.2156e-01, 1.9409e-02, + 3.0350e-02, -3.6377e-01, -6.2164e-02, -4.2261e-01, -1.4359e-02, + -2.1912e-01, 1.3940e-01, 1.7603e-01, 2.3608e-01, -6.6895e-02, + -1.6980e-01, 3.4088e-02, -3.2867e-02, 3.6206e-01, -2.0798e-02, + 3.1836e-01, -2.0276e-01, -8.2275e-02, -4.9146e-01, 1.3928e-01, + -1.0724e-01, -2.1484e-01, 1.6455e-01, 3.1030e-01, -1.0187e-01, + 1.3757e-01, 1.0126e-01, 1.2524e-01, 1.7786e-01, -5.7220e-02, + -7.0610e-03, -4.4946e-01, -2.2522e-01, 2.6562e-01, -2.2852e-01, + -3.4839e-01, -7.1228e-02, -3.4277e-01, -1.9897e-01, 1.2482e-01, + 2.4219e-01, -5.1605e-02, 3.6987e-02, -8.5999e-02, -1.0681e-03, + 3.8745e-01, 1.7059e-02, 3.6304e-01, 1.7737e-01, 2.0703e-01, + -2.0764e-01, -1.8945e-01, -2.6489e-01, 2.0251e-01, 1.1841e-01, + 8.4717e-02, -2.7771e-02, -1.2170e-01, 1.9226e-02, -8.2703e-02, + 2.8003e-01, 4.7168e-01, 3.5522e-01, -3.5010e-01, 3.2776e-02, + 1.3367e-01, -8.9783e-02, -3.3752e-02, -4.7437e-01, 2.8198e-02, + 1.3965e-01, -3.7231e-01, 2.1448e-01, -6.5613e-02, 9.9731e-02, + 2.6840e-02, -1.1261e-02, 2.1423e-02, -2.4033e-02, -2.6318e-01, + 2.3254e-02, 2.4673e-02, 2.7979e-01, 3.0713e-01, -9.6588e-03, + 1.9104e-01, -1.9507e-01, 8.6365e-02, 3.8605e-02, -2.9199e-01, + -1.1090e-01, -2.3035e-01, 2.9370e-01, 1.1894e-02, 6.7932e-02, + 3.3722e-02, -3.5742e-01, 9.9976e-02, -2.8149e-01, -1.1664e-01, + 3.0029e-01, -2.9199e-01, 2.9346e-01, -3.6621e-01, -1.0321e-01, + -2.2949e-01, 3.4595e-01, -3.0249e-01, -6.8848e-02, -1.6101e-01, + -1.5503e-01, 4.5685e-02, -1.0468e-01, 1.5942e-01, -9.0759e-02, + -2.9346e-01, -1.0223e-02, 3.6835e-02, -4.5624e-02, 2.7100e-01, + -2.0190e-01, 2.2937e-01, 1.7078e-01, 3.1714e-01, -3.6401e-01, + 6.8703e-03, -1.0941e-02, 2.5586e-01, -1.5234e-01, 2.1582e-01, + 1.7944e-02, -1.3342e-01, -6.4941e-02, 2.3779e-01, -3.0322e-01, + 1.0791e-01, 2.9297e-03, -1.9910e-01, -2.0947e-01, 1.7529e-01, + 1.1121e-01, -2.4078e-02, 2.5586e-01, -9.3323e-02, 2.1960e-01, + -7.2449e-02, 6.2988e-02, -1.5356e-01, -2.4695e-01, 2.5659e-01, + 2.0288e-01, -1.0254e-01, -1.9873e-01, -4.1656e-02, 3.4698e-02, + 3.2495e-01, -1.0834e-01, -1.6418e-01, 8.2092e-02, -1.3513e-01, + -4.2236e-01, 1.7896e-01, 5.2521e-02, -5.5359e-02, 1.9800e-01, + 1.9336e-01, -3.6475e-01, 2.2620e-01, 2.6562e-01, 1.2292e-01, + -2.7390e-02, 3.2300e-01, -1.2280e-01, -1.2079e-01, 1.0083e-01, + 3.7549e-01, 1.1206e-01, 1.8875e-02, 1.7773e-01, 2.5244e-01, + 1.4648e-01, -4.9530e-02, -8.4991e-03, -1.9788e-01, -1.0077e-01, + 3.9917e-02, -2.8540e-01, -8.9340e-03, -3.8794e-01, 1.6736e-01, + 1.8445e-01, 2.2412e-01, -9.8267e-02, -3.8013e-01, -2.0129e-01, + 3.4180e-01, -1.2115e-01, -1.5613e-01, 7.4463e-02, -2.6270e-01, + -2.4048e-01, -5.8472e-02, -3.3960e-01, 1.8555e-01, -1.7114e-01, + -1.3318e-01, 2.9468e-01, -1.4648e-01, -1.2016e-02, 2.8540e-01, + 8.3313e-02, 9.5398e-02, 2.0251e-01, -9.3384e-02, 1.6907e-01, + 2.4487e-01, -1.0583e-01, -7.8186e-02, 5.3680e-02, -5.6183e-02, + -1.7627e-01, 1.6052e-01, -3.3478e-02, 1.2128e-01, 1.4038e-01, + 1.0828e-01, -3.8428e-01, 1.2634e-01, -1.5540e-01, -3.3447e-01, + 7.6965e-02, -1.8896e-01, -1.0107e-01, 2.1973e-01, 3.4277e-01, + 3.8574e-01, 1.4819e-01, -1.4624e-01, 9.7885e-03, 2.5024e-01, + 1.3757e-01, -2.7417e-01, -2.1741e-01, -9.9976e-02, 3.0835e-01, + -1.8018e-01, 4.4824e-01, 1.0400e-01, 4.7729e-01, 1.2622e-01, + 1.8738e-01, 2.1692e-01, 2.7417e-01, 1.1517e-01, 2.5952e-01, + -4.3945e-01, 8.9661e-02, 4.3335e-02, 2.8052e-01, 1.6235e-02, + 2.0288e-01, 5.7068e-02, 2.2083e-01, -2.4673e-02, 1.2561e-01, + -2.5684e-01, 2.1033e-01, -6.6528e-02, -7.8674e-02, -1.1284e-02, + -1.2122e-01, 5.8685e-02, -1.9739e-01, 1.5186e-01, 3.4241e-02, + 4.6802e-01, -1.2769e-01, 9.4238e-02, 2.8638e-01, -2.6831e-01, + 9.5642e-02, -1.6760e-01, -2.6074e-01, -2.9150e-01, -1.5710e-01, + 9.3201e-02, -4.3506e-01, 1.7371e-01, -1.8066e-01, 2.7026e-01, + 2.9785e-01, 1.4880e-01, 1.1169e-01, -2.6172e-01, 8.8257e-02, + 1.3013e-01, 2.1619e-01, 9.3750e-02, -2.0715e-01, -2.7710e-01, + -5.2551e-02, 1.4539e-01, 8.9600e-02, 3.7866e-01, 4.1275e-03, + 4.8193e-01, -1.8604e-01, -2.1716e-01, 6.0959e-03, -1.0056e-02, + -5.4199e-02, 1.5967e-01, 2.4597e-01, 2.0044e-01, -7.6294e-02, + 7.9803e-03, 2.3914e-01, -1.8079e-01, -1.5625e-01, 8.1604e-02, + 4.2456e-01, -5.4474e-02, -1.6064e-01, 2.0496e-01, -2.1008e-01, + 1.9641e-01, -1.2103e-01, 8.6823e-03, -1.7273e-01, -5.8319e-02, + 4.7485e-02, 5.0812e-02, 5.5313e-03, -1.8201e-01, 1.2939e-01, + 1.9104e-02, -1.1429e-02, -1.0291e-01, 4.6021e-02, 1.1914e-01, + 1.4368e-01, -4.7900e-01, -1.0107e-01, 9.9426e-02, -1.8326e-02, + 1.8005e-01, -2.1045e-01, -1.8237e-01, -3.7915e-01, 8.7585e-02, + -3.8483e-02, 1.7944e-01, 3.6938e-01, 1.6003e-01, -6.0577e-03, + -1.0689e-02, -2.7319e-01, 4.5972e-01, 1.4473e-02, -1.2939e-01, + 1.8030e-01, 4.9500e-02, 3.5553e-02, -1.5106e-03, 6.2805e-02, + -3.1299e-01, -1.9531e-01, -3.5132e-01, -2.3438e-01, 1.8042e-01, + -2.4628e-02, -4.4189e-02, -1.2659e-01, -2.0279e-02, 1.0699e-01, + -3.8574e-02, 1.4636e-01, -1.8506e-01, -5.8746e-04, -3.5498e-01, + 1.2494e-01, 1.4441e-01, -7.9041e-02, 1.2256e-01, -4.6631e-02, + 1.5662e-01, 3.2642e-01, -3.0811e-01, 1.6040e-01, -1.2634e-01, + 8.5449e-02, -9.7534e-02, -2.6001e-01, 5.2539e-01, 1.1804e-01, + -1.3721e-01, -8.4290e-02, -4.2285e-01, -2.7618e-03, 2.3938e-01, + -1.0718e-01, -1.5405e-01, 2.5854e-01])Parameter containing: +tensor([1.8845, 1.8352, 1.8765, 1.8810, 1.5563, 1.8842, 1.8917, 1.8105, 1.9868, + 1.8980, 1.8124, 1.8699, 1.9132, 1.9105, 1.8045, 1.8734, 1.9492, 1.9584, + 1.8736, 0.5699, 1.9376, 1.8571, 1.8018, 1.7807, 1.9045, 1.8593, 1.8650, + 1.9069, 1.7912, 1.9275, 1.9020, 1.8200, 1.8223, 1.9474, 1.8166, 1.9292, + 1.8478, 1.8643, 1.7707, 1.7729, 1.8149, 1.9277, 1.8711, 1.8703, 1.8980, + 1.8339, 1.8191, 1.9654, 1.9062, 1.8221, 1.9507, 1.9744, 1.7754, 1.7398, + 1.8771, 1.9682, 1.9305, 1.9569, 2.0626, 1.9420, 2.0490, 1.8973, 1.9315, + 1.8269, 1.8913, 1.8603, 1.8191, 1.8255, 1.8268, 1.6817, 1.9641, 2.0235, + 1.8896, 1.8203, 1.9921, 1.9167, 1.9540, 1.9359, 1.7891, 1.9293, 1.9515, + 1.8066, 1.9256, 1.8339, 1.8488, 1.8092, 1.9274, 1.9570, 1.8335, 2.0225, + 1.9396, 1.8363, 1.9361, 1.8342, 1.8680, 1.8073, 1.8952, 1.9872, 1.9524, + 1.8325, 1.9858, 1.9102, 1.8231, 1.9491, 1.9409, 1.9810, 1.8556, 1.7757, + 1.9584, 1.9158, 1.9068, 1.8165, 2.0067, 1.8175, 1.8685, 1.9612, 1.8651, + 1.9308, 1.9786, 1.8803, 2.0131, 1.9723, 1.8474, 1.9064, 1.8661, 1.7817, + 2.0292, 1.9366, 2.0245, 1.9169, 1.8463, 1.9134, 1.9453, 1.9029, 1.7884, + 1.8547, 1.7968, 1.9072, 1.8114, 1.9171, 1.9438, 1.9655, 1.9935, 1.9752, + 1.8150, 1.8036, 1.8647, 1.8392, 1.9946, 1.9759, 1.7417, 1.8785, 1.8786, + 1.9880, 1.8447, 1.5194, 1.9745, 1.8525, 2.0701, 1.8832, 1.7878, 1.9596, + 1.9374, 1.9127, 1.9849, 1.8117, 1.8179, 1.8551, 1.8570, 2.0063, 1.8393, + 1.7900, 1.7607, 1.7850, 2.0079, 1.8642, 1.8940, 1.8689, 1.7796, 2.0517, + 1.8809, 1.8384, 1.8476, 1.8974, 1.9596, 1.6308, 1.9006, 2.1654, 1.8026, + 1.9379, 1.9820, 1.9537, 1.9238, 1.8867, 1.8491, 1.8553, 1.9149, 1.9309, + 2.1425, 1.8556, 1.9288, 1.8089, 1.9468, 2.0053, 1.7469, 1.9317, 1.9154, + 1.9378, 1.9160, 1.9163, 1.8746, 1.7993, 1.8608, 1.6098, 1.7738, 1.9288, + 1.8738, 1.8375, 1.8819, 1.7251, 1.8135, 1.8060, 1.6460, 1.9648, 1.8793, + 1.8735, 1.9054, 2.0499, 1.8881, 1.8455, 1.8787, 1.8306, 1.8660, 1.9408, + 1.7706, 1.9874, 1.8663, 1.8535, 1.8764, 1.8998, 1.8804, 1.7887, 1.9140, + 1.9037, 1.8234, 1.9198, 1.8980, 1.9463, 1.9508, 1.9099, 1.9282, 1.9256, + 1.9555, 1.8825, 1.8637, 1.8823, 1.8507, 1.9972, 1.8996, 1.8754, 1.9579, + 1.9130, 1.8970, 1.9163, 1.9452, 1.6176, 1.8198, 1.8957, 1.9368, 1.9477, + 1.9516, 2.0003, 1.8590, 1.9027, 1.9444, 1.9328, 1.7937, 2.0126, 1.9234, + 1.9333, 1.8502, 1.8628, 1.8921, 1.7600, 1.8306, 1.8923, 1.8978, 1.8728, + 1.8345, 1.8942, 1.8993, 1.9678, 1.8302, 1.8068, 1.9830, 1.9665, 1.7677, + 2.0295, 1.9052, 1.9362, 1.8471, 1.8804, 1.8914, 1.8475, 1.9386, 1.9127, + 1.9178, 1.8527, 1.8397, 1.8191, 1.9175, 2.0222, 1.8673, 1.7971, 1.8434, + 1.8621, 1.8646, 1.9333, 1.7439, 1.8221, 1.8288, 1.7576, 1.8384, 1.8605, + 0.9349, 1.8890, 1.9015, 1.8971, 2.0346, 1.8416, 1.7772, 1.8861, 1.9598, + 1.9484, 1.8769, 1.7706, 1.9063, 1.8180, 1.9036, 1.9621, 1.9720, 1.9243, + 1.8806, 1.8688, 1.7491, 1.8797, 1.7815, 1.8845, 1.8533, 1.8813, 1.6127, + 1.8691, 1.8961, 1.9153, 1.9476, 1.8424, 1.9297, 1.9749, 1.9124, 1.7928, + 1.8377, 1.9906, 1.9413, 1.8094, 1.8876, 1.9582, 1.9084, 1.9157, 1.9248, + 1.9435, 1.8492, 1.8388, 1.8780, 1.9118, 1.9468, 1.9740, 1.8628, 1.9167, + 1.8829, 1.9969, 1.8789, 1.8648, 1.9281, 1.8597, 1.9086, 1.9463, 1.9790, + 1.9217, 1.9149, 2.0050, 1.9205, 1.8149, 1.8173, 1.9832, 1.8378, 1.8729, + 1.8689, 1.8932, 1.9051, 1.9418, 1.8442, 1.8880, 1.9247, 1.8423, 1.8932, + 1.8876, 1.8785, 1.8502, 1.8830, 1.9121, 1.7974, 1.8728, 1.8306, 1.8850, + 1.8189, 3.1739, 1.9905, 1.8789, 1.8099, 1.9587, 1.8831, 1.9429, 1.8643, + 1.8100, 1.9681, 1.8934, 1.6765, 1.8947, 1.9454, 1.9292, 1.9464, 1.8583, + 1.9097, 1.8778, 1.8680, 1.9591, 1.8427, 1.7749, 1.8435, 1.9601, 1.8104, + 1.8350, 1.9762, 1.5642, 1.8662, 1.9177, 1.8729, 1.9588, 1.9295, 1.8630, + 1.9340, 1.8574, 1.8658, 1.9321, 1.8614, 1.7848, 1.8497, 1.8741, 1.9315, + 1.8774, 1.8153, 1.9115, 1.8713, 1.8385, 1.8768, 1.8541, 1.9018, 1.6993, + 1.7122, 1.7800, 1.8768, 1.7910, 1.8274, 1.9631, 1.8354, 1.8277, 1.9133, + 1.7502, 1.9774, 1.8644, 1.8474, 1.8558, 1.9439, 1.6937, 1.8008, 1.8918, + 1.9959, 1.8452, 1.8962, 1.8652, 1.9799, 1.8773, 1.9406, 1.8726, 1.9284, + 1.7958, 1.9859, 1.7727, 1.9554, 1.8868, 1.8192, 1.7793, 1.8819, 1.9566, + 1.9689, 1.9528, 1.8410, 1.8819, 1.9371, 1.9973, 1.9218, 1.9970, 1.8444, + 1.8517, 1.8372, 1.8086, 1.9310, 1.7364, 1.8146, 1.9686, 1.7777, 1.9504, + 1.7437, 1.9044, 1.9021, 1.9502, 1.9839, 1.9025, 1.8885, 1.8824, 1.9079, + 1.9152, 1.8963, 1.9761, 1.8352, 1.7745, 1.8770, 1.9029, 1.9332, 1.9402, + 2.0578, 1.9987, 1.9575, 1.9153, 1.9579, 1.9238, 1.9215, 1.8162, 1.8468, + 1.9803, 1.8959, 1.8979, 1.7640, 1.7755, 1.8934, 1.9122, 1.9088, 1.8951, + 1.9510, 1.8332, 1.9277, 1.6539, 1.9063, 1.8719, 1.8038, 1.9036, 1.9306, + 1.8461, 1.9749, 1.7861, 1.8499, 1.9288, 1.8200, 1.9073, 1.8482, 1.8608, + 1.8441, 1.9250, 1.9120, 1.7419, 1.6542, 1.8009, 1.8765, 1.8275, 1.9703, + 1.9623, 1.8815, 1.8559, 1.9297, 1.9958, 1.8692, 1.7484, 1.8487, 1.9696, + 1.8354, 1.8752, 1.9288, 1.8886, 1.8154, 1.8855, 1.9286, 1.8765, 1.8816, + 1.9660, 1.8762, 1.9568, 1.8975, 2.0225, 1.8980, 1.9379, 1.8901, 1.8865, + 1.8581, 1.9267, 1.9512, 1.9268, 1.8755, 1.8539, 1.8854, 1.7541, 1.8981, + 1.8321, 1.9068, 1.7888, 1.9020, 1.9170, 1.8782, 1.9882, 1.9291, 1.8385, + 1.8635, 1.8034, 1.8408, 1.8393, 1.9157, 1.9480, 1.8666, 1.9089, 1.8793, + 1.8050, 1.8915, 1.9401, 1.8111, 1.9036, 1.6810, 1.8467, 1.8421, 1.9678, + 1.7958, 1.8310, 1.9185, 1.7928, 1.8878, 1.9683, 1.8328, 1.8989, 1.8129, + 2.0088, 1.8850, 1.8757, 1.6818, 1.8862, 1.8237, 1.8779, 1.8614, 1.8838, + 1.7376, 1.9299, 1.8992, 1.8632, 1.8358, 1.9367, 1.8568, 1.7632, 1.9217, + 1.9264, 1.8954, 1.8412, 1.9310, 1.8819, 1.8354, 0.6095, 1.8538, 1.8894, + 1.8347, 1.8615, 1.8613, 1.8770, 1.8895, 1.8258, 1.8161, 1.9044, 1.8602, + 1.9129, 1.8627, 1.9785, 1.9359, 2.0186, 1.9919, 1.9591, 1.8777, 1.8415, + 2.0802, 1.9656, 1.9193, 1.9592, 1.7867, 1.9489, 1.8725, 1.8389, 1.8473, + 1.8577, 1.7747, 1.9154, 1.7346, 1.8922, 1.9370, 1.8342, 1.9007, 1.7837, + 1.9888, 1.8941, 1.8775, 1.9283, 1.8973, 2.0281, 1.4591, 2.0369, 1.9378, + 1.8734, 1.9433, 1.8871, 1.7934, 1.9058, 1.8899, 1.8908, 1.9713, 1.8724, + 1.9331, 1.9974, 1.8825, 1.9038, 1.8145, 1.9421, 1.9098, 1.9045, 1.9809, + 1.9200, 1.8320, 1.7140, 1.9192, 1.8688, 1.8621, 1.9538, 1.8416, 1.8669, + 1.8944, 1.7779, 1.8887, 1.8708, 1.8072, 1.9348, 1.8595, 1.9091, 1.9632, + 1.9195, 1.8811, 1.8306])Parameter containing: +tensor([ 1.4977e-01, 9.0828e-02, -1.1229e-01, -2.4072e-02, 2.1253e-01, + 2.2673e-01, 1.5718e-01, 2.1828e-01, -2.4588e-01, -8.8377e-02, + 1.2490e-01, -1.3510e-01, -2.0088e-01, 5.7927e-02, -1.0586e-01, + -1.1825e-01, -9.5867e-03, -1.3082e-01, 4.6557e-02, 2.5393e+00, + -5.5107e-02, -2.8849e-03, -3.2968e-02, 8.6449e-02, 3.8190e-02, + -4.2845e-02, 8.5313e-03, -4.0419e-02, -4.0110e-03, -5.3827e-02, + 6.2886e-02, 9.8723e-02, 3.9529e-03, 1.7838e-01, 2.0938e-01, + -1.2265e-01, -1.3081e-01, -1.0067e-01, -1.1288e-01, 1.2991e-01, + 1.8509e-02, 2.8068e-03, 3.8258e-03, 1.9762e-01, 8.1392e-02, + -3.3367e-02, 9.7839e-02, -1.3463e-01, 9.3890e-02, 1.0690e-01, + 9.7328e-02, -9.1940e-02, -1.7569e-01, 5.7027e-02, -5.5082e-02, + 9.2227e-02, -6.8959e-02, -8.2383e-02, -1.3635e-02, 4.3161e-02, + 1.1511e-01, -1.9712e-01, 1.1997e-01, 1.2039e-01, 2.0542e-01, + 6.4997e-02, 2.1161e-01, -4.0855e-02, -1.3579e-01, 2.9124e-01, + -4.1219e-02, 1.6387e-02, 4.7529e-01, 1.2968e-01, -9.7462e-02, + 1.3828e-01, 1.6654e-01, -1.1903e-01, -9.1884e-02, 1.2110e-01, + 2.8219e-02, -8.1905e-02, -2.8228e-02, 1.2402e-01, -6.1241e-02, + 1.2894e-01, 7.7995e-02, 2.6379e-01, -2.3807e-02, -6.2343e-02, + -2.0724e-02, 5.8649e-02, 1.0976e-01, -1.4784e-01, -1.6364e-01, + 5.2668e-02, 2.2110e-01, -1.7867e-01, -2.4759e-01, 1.6276e-02, + 9.4501e-02, -1.3611e-01, 6.1677e-02, -1.0329e-01, 7.9952e-02, + 1.0241e-01, 1.3805e-01, -8.0445e-02, -1.0865e-01, -2.0342e-02, + 5.4483e-03, -4.4460e-02, -9.8944e-02, 2.5351e-02, -9.3630e-03, + -4.6340e-02, 1.8653e-02, -5.4521e-02, -9.9965e-02, 2.9935e-01, + -7.1766e-03, -3.5605e-01, -2.3958e-01, -2.7390e-02, -1.5948e-01, + -1.0897e-01, -1.7559e-01, 1.9700e-01, 1.0314e-02, -1.7945e-01, + -9.4148e-02, 7.2500e-03, 3.1125e-02, 8.9480e-02, -7.2448e-02, + 1.7824e-01, -2.1028e-01, -8.8175e-02, -1.0598e-02, -7.6072e-02, + 8.6819e-02, 3.8492e-02, -3.9397e-02, -7.3740e-02, 1.3232e-01, + -1.7187e-01, 2.5512e-02, 9.1546e-02, 9.4585e-03, 7.7797e-02, + 3.9275e-01, -1.0438e-01, -1.2284e-01, 1.5048e-02, 8.2665e-03, + -5.1999e-01, 2.1568e-02, -1.7402e-01, 8.3193e-02, 1.3827e-01, + 1.8734e-01, -1.4931e-02, -9.5600e-02, 8.9184e-02, -3.2616e-02, + 4.2414e-02, -6.3760e-02, 1.8367e-01, 2.8722e-04, -1.0264e-02, + -1.8133e-02, 1.1746e-01, 4.0782e-02, -2.9797e-02, 8.2706e-02, + -2.9593e-01, 3.7761e-02, 1.2163e-01, -2.4276e-01, 1.0814e-01, + -1.4889e-01, 1.9977e-03, 2.4847e-01, 9.8385e-02, -3.8665e-02, + -8.7119e-02, 5.5839e-04, -6.7407e-01, -1.9018e-01, -1.3712e-01, + -1.6403e-01, -1.2081e-01, 1.1545e-01, 9.3563e-02, -9.6760e-02, + 5.7779e-02, 3.0807e-02, -2.1459e-01, 2.2013e-01, -1.3995e-02, + -3.3612e-01, 1.5033e-01, -5.4025e-02, 3.2845e-02, 9.2645e-02, + 3.2002e-01, -1.4954e-02, 1.5258e-01, 2.1483e-01, -2.7657e-02, + 8.3226e-02, 3.7656e-01, -5.6053e-03, -1.6680e-01, -3.9404e-02, + -1.0372e-02, -5.0771e-02, 4.1767e-02, -1.9044e-01, -4.0341e-01, + 4.0919e-02, 7.6289e-02, 1.1134e-01, -1.3315e-01, -4.8227e-02, + -7.6237e-02, -3.9292e-02, 9.4033e-03, 2.0934e-01, -1.2315e-01, + 2.1096e-01, 1.0530e-01, -1.2812e-01, -3.9880e-02, -2.1281e-01, + -3.3356e-02, -1.0072e-03, 8.7769e-02, 1.3860e-01, -6.7106e-02, + -1.4575e-02, 1.5290e-01, -6.3386e-02, 7.9846e-02, -1.3863e-02, + 7.7562e-02, -8.3009e-02, -1.0501e-01, -2.6750e-02, -1.7253e-01, + -2.5846e-02, -1.1804e-01, 1.7110e-01, -1.2576e-01, 1.7992e-01, + -1.9679e-01, -8.5437e-03, -6.9406e-02, 4.9921e-02, 5.0245e-02, + -1.1880e-01, -6.9830e-02, 5.9304e-02, -2.2032e-02, -5.5579e-02, + 2.8273e-01, 4.2607e-02, -6.2353e-03, -6.9794e-02, -4.0076e-02, + 7.6442e-02, 1.2681e-01, 2.6285e-01, -8.0095e-02, -5.2822e-02, + 4.3852e-02, -4.5039e-02, 1.2212e-01, 2.0381e-01, -1.1561e-01, + 1.6132e-01, 3.3906e-01, -1.2707e-02, 5.1528e-02, 1.2264e-01, + 9.6960e-02, -5.2069e-02, -1.3161e-01, 3.4161e-01, -1.0293e-01, + 3.3521e-03, -2.1975e-03, 1.8850e-02, -1.2784e-03, 1.6387e-01, + -5.1121e-02, 3.9901e-01, -1.2812e-01, 6.6056e-02, 1.3807e-01, + 4.9560e-02, -1.0768e-01, -1.2635e-01, 1.1741e-01, 3.6649e-02, + 9.2106e-02, 2.2007e-02, -1.2226e-01, 1.7446e-01, -3.4823e-02, + 5.3663e-02, -2.9064e-02, -2.4956e-01, -2.3326e-01, 2.9518e-03, + 1.6585e-01, 1.2153e-01, -2.6665e-02, 3.1691e-01, 1.1982e-01, + -1.7774e-01, 3.4280e-01, -1.0939e-01, 4.9095e-02, -1.5014e+00, + -2.8279e-01, 8.2694e-02, 5.9233e-02, 1.2187e-02, -1.3442e-01, + 2.5667e-01, 4.1742e-02, 1.5975e-01, 4.0541e-03, 2.5555e-01, + 2.7525e-01, 1.9277e-02, 1.5007e-01, -3.5787e-02, -4.1940e-02, + 3.8335e-03, 1.0118e-01, -4.8436e-02, 3.0893e-02, 7.3047e-02, + 9.5244e-02, 1.8675e-01, 1.3491e-01, -3.9549e-02, 8.8511e-02, + -2.0086e-01, 8.2027e-02, -2.5592e-02, 8.8682e-02, 9.3104e-02, + -1.5674e-01, -1.0768e-01, 2.4316e-02, 3.6231e-02, -1.5898e-01, + 3.4396e-02, 8.7499e-02, 4.3070e-02, -2.6488e-01, 6.0697e-02, + -1.0659e-01, 1.1697e-01, 1.1641e-01, 2.2722e-01, 5.9255e-02, + 1.9844e-01, -1.0398e-01, -1.0219e-01, -1.0963e-01, -1.4832e-01, + 7.7414e-02, 1.1030e-01, 2.3565e-01, -6.7662e-02, 9.7777e-03, + -2.2667e-01, 1.0643e-01, 1.2504e-02, 7.0543e-02, 2.1687e-02, + -4.3200e-02, 6.9545e-02, -1.5286e-01, -5.3265e-02, -4.3717e-02, + 2.8115e-02, -1.6122e-01, -2.7460e-02, -6.3853e-02, 2.8858e-02, + 4.2642e-02, 1.0826e-01, 1.2918e-01, -5.5144e-02, 2.8905e-02, + 2.5789e-01, -6.0265e-03, 1.1585e-01, 4.1049e-02, -1.6564e-01, + -3.6466e-01, -4.4526e-02, -3.9705e-02, 5.0000e-02, 4.2413e-02, + -9.8391e-02, 2.2150e-02, 4.3222e-02, -7.1868e-02, -9.0238e-02, + -3.9393e-01, 3.9913e-02, 6.0577e-02, -5.4894e-02, -2.7723e-02, + -5.5954e-02, 7.2414e-02, 1.0857e-01, -1.4078e-01, -1.9501e-02, + 1.1566e-01, -2.9919e-01, -2.8507e-01, 1.3112e-01, -1.1352e-02, + -1.2703e-01, -1.3442e-01, 7.1983e-03, 2.7848e-01, 1.1755e-01, + 7.1231e-03, 1.5916e-01, -3.3708e-03, 1.5079e-01, -1.1724e-01, + 2.7498e-02, 1.7569e-02, -1.5906e-02, -1.2364e-01, 2.7426e-01, + -1.4464e-01, -2.9425e-02, -1.8606e-01, 1.4634e-01, -1.0930e-01, + -6.5916e-02, 8.1522e-02, -1.8188e-01, 1.8782e-02, 6.5026e-02, + -3.8653e-02, 1.2372e-01, -1.9791e-02, -2.8119e-02, -1.2243e-01, + -9.4997e-02, 1.8606e-01, -2.6361e-01, 2.0877e-01, -4.2930e-02, + -1.2266e-01, 1.4367e-01, -9.8226e-02, 3.1051e-01, 1.0347e-01, + 1.1779e-01, -2.0663e-01, 1.4741e-02, 1.1494e-01, 2.7045e-02, + 5.6440e-02, 2.2375e-01, 1.3750e-02, -8.0551e-02, 4.4833e-02, + 1.8983e-01, 1.6651e-02, 4.2797e-02, 1.4209e-01, -1.9838e-01, + -4.0615e-03, -1.5028e-01, 1.5990e-01, 6.5197e-02, 7.3199e-02, + 2.7811e-02, 1.1103e-01, -5.0831e-02, 1.4231e-02, -2.2152e-01, + -1.8666e-01, -9.2706e-02, -2.2502e-02, -1.2607e-01, 2.3435e-01, + -3.5548e-02, -1.7466e-01, 3.4294e-02, -6.1595e-02, -6.2275e-02, + 4.5427e-02, 8.0757e-02, 1.0295e-01, 7.7756e-02, -2.9827e-02, + -1.5265e-01, -4.7962e-03, -1.6855e-01, 1.9033e-01, -1.1006e-01, + -8.5642e-02, 1.7786e-01, 1.0449e-01, 8.5187e-02, -5.4987e-02, + -3.4442e-02, -4.7119e-02, 1.5076e-01, 1.0142e-01, -9.9440e-02, + -4.9660e-02, -2.6204e-02, -4.3997e-02, 1.3288e-02, -9.6366e-02, + 7.7371e-02, 2.0195e-01, -2.5764e-02, -9.3003e-03, -5.8061e-02, + -1.9938e-01, -5.7160e-02, 1.5829e-01, 1.8127e-01, -5.1235e-02, + 5.3454e-02, -5.7813e-02, -9.8958e-02, 3.4245e-02, -3.8890e-02, + 5.5782e-02, 7.2028e-02, 8.4247e-02, 1.6157e-01, 8.5942e-02, + -6.3876e-02, 1.6179e-01, -1.1903e-01, 1.9752e-01, -1.2409e-01, + -1.3576e-01, 9.2939e-02, -8.0832e-02, -3.7897e-02, 1.3954e-01, + -1.2824e-01, 1.3462e-01, 3.5907e-02, 7.7795e-02, 3.5418e-01, + 1.8305e-01, 4.7019e-02, 1.2169e-01, 1.4338e-01, 2.2147e-02, + 1.6112e-01, -2.3264e-02, 8.2084e-04, -4.4160e-02, -2.8564e-03, + 8.1639e-02, 3.2530e-02, -8.9730e-02, 1.4196e-01, -1.2849e-01, + -1.8822e-01, -9.6932e-02, 1.5225e-01, 2.9004e-02, 5.1665e-02, + 1.4070e-01, -3.1082e-02, -1.7318e-01, 1.1008e-01, -1.8146e-02, + -9.7296e-02, 2.5158e-01, 1.0361e-01, 1.6765e-02, 9.7833e-02, + -9.4305e-02, 1.9451e-01, 7.9379e-02, -1.3511e-01, -2.7555e-02, + -8.5424e-02, -1.8621e-01, -9.9312e-02, -7.1172e-02, -1.2654e-01, + -1.1217e-01, 1.0024e-01, 2.0272e-01, 7.3024e-02, -6.2072e-02, + 2.3424e-01, 1.1232e-01, -1.6426e-01, -5.0104e-02, -9.9868e-02, + -2.0319e-01, 2.6993e-02, 5.0555e-02, -7.6812e-02, -2.2759e-01, + 2.5402e-01, -2.1871e-03, 4.5064e-02, -7.3341e-03, 2.1875e-02, + -1.0079e-01, -3.1013e-02, -9.9062e-02, 6.1146e-02, -1.5484e-01, + 2.4925e-01, -1.1802e-02, 7.6865e-02, 1.1096e-01, 2.3033e-01, + 3.1681e-02, 3.8874e-03, 4.8754e-02, 2.8777e-02, -6.3957e-02, + -1.4132e-01, -2.0266e-01, -3.7878e-02, -6.4012e-02, 1.7023e-01, + 7.9161e-02, 1.3030e-02, 1.9864e-01, 8.3066e-02, 2.5653e-01, + 9.1549e-02, 1.6667e-01, 5.2830e-03, 1.3459e-01, -6.7270e-02, + 8.6687e-02, -2.3856e-01, -6.9920e-02, 7.9934e-02, 1.3820e-01, + -1.3395e-01, -1.5970e-02, -8.8392e-02, -4.4374e-02, 1.4311e-01, + -5.3860e-02, -1.6929e-01, -1.7831e-01, -2.5584e-01, 6.5801e-02, + -3.4672e-01, 1.2548e-01, 8.3701e-02, 8.4880e-02, -1.2696e-02, + 7.1410e-02, -2.6850e-02, -8.6469e-02, -7.0036e-02, 1.2408e-01, + -1.3168e-01, -2.3096e+00, 9.2856e-02, 1.1851e-01, 7.4759e-02, + -1.8195e-01, 8.1052e-02, -2.3694e-02, 6.8942e-02, 5.0159e-02, + -3.2627e-01, 4.6953e-02, 1.3953e-02, -3.2010e-02, -1.5938e-01, + 1.1737e-01, 5.7404e-02, -5.9496e-02, 1.1439e-02, -5.4151e-02, + 1.9076e-01, 1.3202e-01, 2.3647e-02, -3.2032e-02, -2.0221e-01, + 3.9195e-02, 1.9440e-01, 3.9982e-02, -1.6485e-01, -7.3657e-02, + -6.7927e-02, 1.7999e-01, 1.9496e-01, 1.4840e-01, 2.8846e-02, + 1.1035e-01, 5.6286e-02, -5.4704e-02, -1.4696e-01, -4.4278e-02, + 5.0493e-02, -2.2273e-02, -3.9672e-02, -3.2121e-01, 2.0969e-01, + -3.9823e-02, -3.1859e-01, 1.5124e-01, 1.4009e-01, 5.0370e-02, + 8.3453e-02, 9.9049e-02, 2.4573e-01, 1.9968e-01, 5.8656e-02, + -5.6640e-02, 4.0581e-02, 2.6943e-02, -1.3497e-04, -1.2463e-01, + 5.2881e-02, -1.2241e-01, 8.8162e-03, 1.3377e-01, 5.5711e-03, + -3.2902e-02, -1.4677e-01, -4.2712e-02, 1.2307e-01, 8.0042e-02, + -3.1782e-02, -1.3426e-01, 2.1633e-02, 1.0981e-02, 2.2117e-01, + -1.3080e-02, 8.9470e-02, 1.8250e-01, -2.3044e-01, -5.8739e-02, + -3.1466e-02, 6.1521e-02, 2.5686e-01, -6.6933e-02, -2.3293e-01, + -2.6690e-02, 2.1225e-01, 7.8747e-02])Parameter containing: +tensor([[ 4.3831e-03, -1.7395e-02, 3.9558e-03, ..., -1.6966e-03, + 1.0384e-02, 3.0640e-02], + [-2.2182e-03, -2.1912e-02, 5.6572e-03, ..., 2.1820e-02, + 7.8583e-03, 1.6800e-02], + [ 5.5084e-03, -1.9119e-02, -4.4098e-03, ..., 2.0035e-02, + -1.3260e-02, 1.9684e-03], + ..., + [ 4.6631e-02, -5.4359e-05, -3.4523e-03, ..., 4.8828e-03, + 2.2156e-02, -1.1864e-02], + [ 8.8787e-04, -8.1482e-03, 1.7776e-02, ..., -1.6052e-02, + 3.0502e-02, 9.1400e-03], + [ 1.3641e-02, 7.2708e-03, 3.8815e-03, ..., -3.4828e-03, + 2.0161e-03, -3.3054e-03]])Parameter containing: +tensor([-0.1271, -0.1493, -0.2289, ..., -0.5405, -0.3594, -0.0424])Parameter containing: +tensor([[-0.0115, -0.0157, -0.0200, ..., -0.0466, -0.0021, -0.0165], + [ 0.0092, -0.0032, -0.0108, ..., 0.0121, -0.0015, 0.0188], + [ 0.0030, -0.0024, 0.0109, ..., -0.0146, -0.0017, -0.0030], + ..., + [-0.0107, -0.0003, 0.0144, ..., -0.0054, 0.0235, 0.0117], + [ 0.0090, 0.0042, 0.0030, ..., 0.0037, 0.0125, -0.0172], + [-0.0052, -0.0043, 0.0161, ..., 0.0261, 0.0152, -0.0026]])Parameter containing: +tensor([ 2.5586e-01, -2.6196e-01, 2.3486e-01, 1.3916e-01, -1.3599e-03, + 4.4922e-02, 1.1615e-01, 1.1955e-02, -2.2461e-01, 1.1938e-01, + 1.4453e-01, -7.4097e-02, -1.5967e-01, 3.5095e-02, -1.9214e-01, + 6.9092e-02, -6.2347e-02, 7.0038e-03, -5.2734e-02, 2.5732e-01, + 3.0411e-02, -3.2158e-03, -4.5239e-01, 1.0803e-01, -2.2742e-01, + -1.1322e-01, 3.1433e-02, -1.7426e-02, 2.8857e-01, -9.8145e-02, + -2.0264e-01, 8.4595e-02, 7.5928e-02, -5.0781e-02, 1.3879e-01, + 1.1859e-01, 2.1851e-01, -7.2327e-02, -1.4209e-01, 6.4819e-02, + 1.1395e-01, -6.5674e-02, -6.4087e-02, 4.7607e-02, 1.8921e-02, + 1.1249e-01, 1.6370e-01, 1.0114e-01, 1.5955e-01, -1.7944e-01, + 1.3147e-01, 7.1228e-02, -1.2158e-01, -1.3306e-01, 1.9141e-01, + -1.3168e-02, -1.5515e-01, -1.9641e-01, 1.9983e-01, -1.1304e-01, + -1.2634e-01, 8.5205e-02, 2.0050e-02, 1.3379e-01, -1.9434e-01, + -5.9113e-02, 8.1543e-02, 1.2436e-02, 2.6291e-02, 2.5049e-01, + -1.7688e-01, 1.2524e-01, -1.7102e-01, 1.9751e-01, 1.8896e-01, + 2.8589e-01, -1.9067e-01, -3.7628e-02, 8.3466e-03, -1.2036e-01, + -3.3447e-02, 2.2400e-02, -1.3107e-02, -1.7136e-02, 5.1880e-02, + 9.6497e-02, -1.9214e-01, -3.2379e-02, -8.5571e-02, 1.6809e-01, + 1.9482e-01, 1.0614e-01, 1.6821e-01, -1.9812e-01, 2.9282e-02, + 5.1956e-03, -1.4514e-01, 8.9050e-02, 3.2623e-02, -3.6938e-01, + -1.4392e-01, 6.9031e-02, 2.9517e-01, 2.1252e-01, 2.3956e-02, + -2.4052e-03, -7.3364e-02, -1.6922e-02, -6.2332e-03, 9.4360e-02, + -2.0386e-01, -5.0323e-02, 1.1627e-01, -1.7688e-01, 3.9276e-02, + -2.6520e-02, -8.8379e-02, -3.0960e-02, -5.7495e-02, 6.6910e-03, + 8.5602e-03, 1.1673e-02, -3.2867e-02, -9.2346e-02, -3.0127e-01, + -6.9542e-03, -1.4868e-01, -1.5161e-01, 8.5754e-02, -1.5942e-01, + 3.1948e-03, 7.7942e-02, -2.4695e-01, -1.4941e-01, -2.9175e-01, + -1.8604e-01, -1.6553e-01, 2.4765e-02, 2.4857e-02, 8.7830e-02, + -2.0044e-01, -9.2590e-02, -7.0251e-02, -1.0168e-01, 1.5295e-01, + -9.3262e-02, -2.5317e-01, 2.3743e-01, 1.9897e-01, 8.7219e-02, + -5.1239e-02, 1.0211e-01, -5.9723e-02, 2.6611e-01, 3.5938e-01, + -5.9540e-02, 3.0713e-01, 3.8090e-03, 1.7749e-01, 3.1396e-01, + 1.4099e-01, 1.6199e-01, -3.5191e-03, 6.2561e-02, 2.8534e-02, + -2.0007e-01, -1.8066e-01, 3.4180e-02, 7.5562e-02, 1.5833e-01, + -1.7786e-01, -1.8213e-01, -2.8198e-02, -2.9492e-01, -1.4542e-02, + -6.7810e-02, 9.3079e-02, 1.5076e-01, -9.5398e-02, -2.7588e-01, + 9.1675e-02, -4.7302e-02, -8.7952e-02, -4.3091e-02, -1.5918e-01, + -2.1704e-01, 1.3412e-02, 2.1313e-01, -1.9043e-02, -1.1938e-01, + 1.6870e-01, 6.8542e-02, -1.5430e-01, -1.8274e-01, -2.4744e-01, + -2.0203e-01, -1.5991e-01, 2.0447e-02, -7.7820e-02, -1.5002e-01, + -1.7471e-02, -1.0651e-01, 1.1975e-01, 4.4403e-02, 2.1057e-01, + 1.5039e-01, -3.3081e-02, 8.3694e-03, 2.1594e-01, -1.2128e-01, + -2.2058e-01, 1.1041e-01, 2.2449e-01, -2.3022e-01, -1.3965e-01, + 1.4478e-01, 1.0638e-01, 2.3303e-01, 1.4087e-01, -9.6863e-02, + -2.8540e-01, 1.1133e-01, 1.7224e-01, 1.2749e-02, 6.4941e-02, + -2.2446e-02, 2.0190e-01, 3.1830e-02, 7.7896e-03, -6.5369e-02, + 1.4832e-01, 4.1351e-02, -2.2571e-01, -3.7018e-02, 8.5083e-02, + 2.1240e-01, -3.1958e-01, -2.5726e-02, 2.3096e-01, -2.4133e-01, + -2.5464e-01, 1.1420e-01, -1.7786e-01, 1.5222e-01, 1.6281e-02, + 2.0520e-01, 2.0850e-01, -9.8328e-02, 8.3084e-03, -3.8452e-02, + 9.8450e-02, -3.6896e-02, 6.7932e-02, -8.7128e-03, 1.2646e-01, + -4.3060e-02, -2.1423e-01, 2.6587e-01, 1.9202e-01, 1.6455e-01, + 3.0811e-01, 2.5073e-01, -4.7119e-02, -3.8281e-01, -4.4952e-02, + 1.3403e-01, -2.4744e-01, 3.6438e-02, -3.9062e-02, 1.0809e-01, + -2.9770e-02, -4.2297e-02, 1.7548e-02, -8.6689e-04, -8.0811e-02, + 1.4893e-01, -9.7473e-02, -2.5806e-01, 2.1326e-01, 8.2703e-02, + 9.4666e-02, 3.0151e-01, 2.7054e-02, -1.4612e-01, 2.3657e-01, + -1.0828e-01, 1.5649e-01, -1.6284e-01, 2.7710e-01, -9.6497e-02, + -2.0428e-03, -1.8091e-01, -1.6309e-01, -8.1665e-02, -1.5027e-01, + -9.0271e-02, 1.4758e-01, 1.6687e-01, -6.4758e-02, -1.1267e-01, + 1.3989e-01, -1.8921e-01, 7.0374e-02, 1.7310e-01, -2.2803e-01, + 8.6441e-03, 8.0017e-02, 4.1687e-02, 2.9639e-01, -1.2396e-01, + -1.0071e-01, 8.0505e-02, -1.1536e-01, -2.2742e-01, -7.3303e-02, + 9.5901e-03, -1.9006e-01, -6.5369e-02, 5.6793e-02, -1.8372e-02, + 1.7432e-01, 1.3599e-01, -2.5528e-02, 1.9495e-01, -1.2969e+00, + 4.7394e-02, 1.1646e-01, 6.7383e-02, -1.6223e-01, 7.1533e-02, + 1.3025e-01, 1.8112e-02, 3.0737e-01, 1.0773e-01, -9.6375e-02, + 2.2632e-01, 2.2571e-01, -5.6610e-03, -1.4490e-01, -8.7463e-02, + -1.4673e-01, 1.4355e-01, 1.1865e-01, -4.6349e-03, 3.1104e-01, + -4.8340e-02, 9.0515e-02, -6.7444e-02, -3.1616e-02, -1.7200e-01, + 9.5596e-03, -3.3539e-02, 1.6943e-01, 2.1576e-02, -7.2205e-02, + -4.6051e-02, 3.4619e-01, 1.9669e-02, 1.6907e-01, -2.2095e-02, + 9.5764e-02, 5.7526e-02, -1.6284e-01, 1.3220e-01, -4.5380e-02, + 3.0322e-01, 1.2451e-01, -7.6172e-02, 5.8624e-02, 2.1399e-01, + 2.5659e-01, 1.7542e-01, -1.8726e-01, -1.0052e-01, 1.5601e-01, + -8.0261e-03, -1.9250e-01, -2.0728e-01, -1.0773e-01, 8.2092e-02, + -2.3718e-01, 1.8848e-01, 1.1389e-01, -5.9967e-02, -1.1237e-01, + 2.9388e-02, 6.6284e-02, -1.4258e-01, 5.5008e-03, 1.8115e-01, + 4.0039e-02, 2.7930e-01, -2.1948e-01, 6.1493e-02, 9.9976e-02, + -1.1169e-01, 2.2644e-01, -1.0059e-01, 8.5999e-02, 7.2250e-03, + -1.9547e-02, -3.0298e-01, 1.4709e-01, 9.9854e-02, -7.2510e-02, + 4.1138e-02, 9.9854e-02, 1.3802e-02, 1.3916e-01, -3.0786e-01, + -9.0332e-03, 2.3364e-01, -1.1591e-01, -2.6050e-01, -2.8491e-01, + -9.1797e-02, 1.7749e-01, 1.1115e-01, -1.6943e-01, -2.5223e-02, + -3.4515e-02, 3.3789e-01, -1.4458e-02, -1.8970e-01, 2.4280e-01, + 4.2908e-02, -1.0559e-01, 6.2408e-03, 2.6221e-01, -5.0323e-02, + 5.5420e-02, -9.2697e-04, 4.0497e-02, 1.1017e-01, 1.3525e-01, + -2.0557e-01, 2.5000e-01, -2.8125e-01, 3.1641e-01, 4.8553e-02, + 2.9443e-01, -3.9154e-02, -5.8594e-02, 1.0431e-01, 3.3203e-01, + -4.3274e-02, -2.1545e-01, -1.3843e-01, -1.0760e-01, -3.8147e-02, + -2.5635e-01, 8.1665e-02, 1.6235e-01, -5.6885e-02, 9.8022e-02, + 5.6488e-02, 2.2266e-01, -1.8787e-01, 9.2041e-02, -1.0907e-01, + 3.0273e-01, 1.3049e-01, -1.2018e-01, -2.9663e-02, 6.1452e-05, + -1.5228e-02, 5.6366e-02, -4.9072e-02, 9.7168e-02, 1.5833e-01, + -9.8938e-02, -1.2256e-01, 4.6997e-02, -5.3986e-02, 7.8552e-02, + 1.4661e-01, 1.5198e-01, 2.3999e-01, 2.8152e-02, -1.6375e-03, + 1.6089e-01, -2.6978e-01, -2.3022e-01, -8.1055e-02, -1.2299e-01, + -3.2166e-02, 3.8239e-02, -1.3013e-01, -1.7151e-01, 5.3711e-02, + -1.6012e-03, 5.1651e-03, -8.3008e-02, -9.8572e-02, -1.0376e-01, + 2.5146e-01, -6.1066e-02, 2.0776e-01, -1.9629e-01, -8.8440e-02, + 8.7952e-02, 5.1239e-02, -8.2626e-03, 6.3667e-03, -4.3854e-02, + -1.8945e-01, 1.0223e-01, -1.6223e-01, -4.5624e-03, -9.3079e-02, + -3.2288e-02, -1.8127e-01, 7.4524e-02, 8.6060e-02, -2.6953e-01, + -8.4412e-02, -3.5431e-02, 9.4727e-02, 2.6779e-02, -2.8882e-01, + -3.2178e-01, -5.1575e-03, 3.6285e-02, 6.9824e-02, 2.1204e-01, + 1.8349e-03, -1.6162e-01, -3.0200e-01, -2.6154e-02, -5.6076e-03, + -6.1768e-02, 2.8491e-01, -8.7524e-02, -2.2229e-01, 3.2074e-02, + 2.2205e-01, -1.4099e-01, 1.3696e-01, 3.6743e-02, 7.0923e-02, + -2.1533e-01, -1.0870e-01, 3.9330e-03, -4.7150e-02, -1.8530e-01, + -9.0454e-02, 2.4573e-01, 6.5247e-02, 3.3142e-02, 3.3630e-02, + -2.3819e-02, 1.1084e-01, 2.6794e-02, 1.5049e-03, -5.7892e-02, + -6.4514e-02, -2.3010e-01, 2.1863e-01, 4.8889e-02, 5.6641e-02, + -2.0850e-01, 8.2825e-02, -8.4839e-02, -1.0162e-01, 1.2891e-01, + 1.4734e-01, 1.2225e-01, 6.5125e-02, -3.5492e-02, 3.3057e-01, + 5.4749e-02, 4.1138e-02, -4.5868e-02, 2.4817e-01, -1.2585e-01, + -1.6858e-01, -2.5488e-01, -2.6709e-01, -2.1265e-01, -8.3313e-02, + -9.2163e-02, 1.5320e-01, -8.2214e-02, -2.7490e-01, 1.1139e-01, + 2.3560e-01, 4.5776e-02, 5.1361e-02, -1.8262e-01, -5.0879e-04, + 5.0720e-02, -2.9160e-02, -2.6831e-01, 1.2042e-01, 1.4197e-01, + -2.1655e-01, -6.8848e-02, -1.2213e-01, -4.3091e-02, -1.0223e-01, + -2.2241e-01, -2.1500e-02, -2.2858e-02, 1.1035e-01, -1.4368e-01, + -2.0081e-01, 1.4136e-01, 1.5112e-01, -2.9126e-01, -1.6284e-01, + 1.1853e-01, -3.2568e-01, 4.4678e-02, -2.7124e-01, -1.5076e-01, + -7.4890e-02, -1.1877e-01, -2.1851e-01, 9.8267e-02, 7.3975e-02, + 2.4365e-01, 1.7899e-02, -5.1956e-03, -1.8774e-01, 1.7651e-01, + -2.9297e-01, 1.8250e-01, -7.8247e-02, 2.2571e-01, 8.7158e-02, + 1.9214e-01, 3.2074e-02, -1.3171e-01, 1.0712e-01, -7.7209e-02, + 2.4097e-01, -2.2339e-01, 1.9531e-01, -1.7224e-01, 1.8042e-01, + -1.1169e-01, -7.6332e-03, 1.0455e-01, -1.8628e-01, 3.8647e-01, + -8.5754e-03, 4.4434e-02, 1.0852e-01, 2.3816e-01, -4.9957e-02, + -1.7053e-01, 1.6479e-01, -2.3425e-01, 1.9812e-01, -8.8989e-02, + -3.0548e-02, 1.5236e-02, -1.1304e-01, 1.5796e-01, -8.7891e-02, + -2.1103e-02, -3.0289e-03, -2.3779e-01, 1.9067e-01, 1.6187e-01, + 1.6870e-01, 5.0720e-02, 2.0947e-01, -1.7908e-01, -4.0955e-02, + -2.0605e-01, 3.5339e-02, 1.6992e-01, -1.0504e-01, -1.5942e-01, + -2.0142e-01, -1.5472e-02, -2.3804e-01, -1.8341e-02, -1.5869e-01, + 1.3782e-01, -1.9580e-01, 1.5857e-01, 4.2627e-01, -7.3303e-02, + -2.3132e-01, 2.1240e-02, 2.0294e-02, -1.1322e-01, 4.2267e-02, + -1.5991e-02, -7.6538e-02, -4.0100e-02, 8.8745e-02, 2.7173e-01, + -7.8186e-02, 7.9285e-02, 2.8214e-02, 1.0638e-01, -4.3396e-02, + 5.2460e-02, 1.3416e-01, 4.0497e-02, 2.5272e-04, -4.1443e-02, + -2.7319e-01, 2.6221e-01, -2.4612e-02, 5.6580e-02, 2.6953e-01, + 1.0156e-01, 7.1655e-02, 6.6452e-03, 4.6356e-02, 8.2947e-02, + 1.0986e-02, -1.8567e-01, -2.7026e-01, 9.7412e-02, 6.3416e-02, + -1.0809e-01, 2.5684e-01, 5.9418e-02, 5.4047e-02, 2.0657e-03, + -8.6914e-02, 6.0425e-02, -1.0840e-01, -1.6711e-01, -1.5234e-01, + 2.5879e-01, -5.5786e-02, -7.8430e-02, 5.5878e-02, -3.2812e-01, + -1.9934e-01, 9.6313e-02, -1.3062e-01, 1.6736e-01, 1.7151e-01, + 8.5815e-02, -1.1182e-01, -1.5137e-02, -4.1931e-02, -6.9336e-02, + -4.3823e-02, -4.1809e-02, 7.6904e-02, -3.6157e-01, -6.3660e-02, + -1.6382e-01, 2.4658e-02, 4.3774e-01, -4.3774e-01, -1.1328e-01, + 8.6121e-02, 4.8401e-02, -6.8542e-02, -1.1786e-01, -1.8677e-01, + 2.4097e-01, 1.3098e-01, 2.5928e-01, -1.8005e-01, -6.6650e-02, + 1.5649e-01, 9.4604e-02, -6.0059e-02])Parameter containing: +tensor([1.7453, 1.7916, 1.7866, 1.7060, 2.2256, 1.7691, 1.6459, 1.7028, 1.7940, + 1.7137, 1.7359, 1.6988, 1.7650, 1.6460, 1.9754, 1.7098, 1.6823, 1.6809, + 1.7675, 2.8573, 1.7179, 1.7905, 1.7851, 1.7294, 1.7422, 1.7077, 1.7375, + 1.6869, 1.8025, 1.6808, 1.6921, 1.7139, 1.7179, 1.7785, 1.7144, 1.6573, + 1.7561, 1.7219, 1.8840, 1.8551, 1.7527, 1.7285, 1.8142, 1.7218, 1.7085, + 1.7747, 1.7440, 1.6605, 1.7240, 1.6341, 1.6871, 1.7110, 1.8223, 1.7324, + 1.6996, 1.9939, 1.6452, 1.5798, 1.6484, 1.6495, 1.7104, 1.6899, 1.7505, + 1.7364, 1.7274, 1.8487, 1.7835, 1.7188, 1.6811, 2.6443, 1.7195, 1.6335, + 1.6849, 1.8288, 1.7249, 1.7434, 1.7538, 1.7232, 1.7073, 1.7086, 1.7375, + 1.6350, 1.6951, 1.7128, 1.6553, 1.6948, 1.7890, 1.7998, 1.6942, 1.6631, + 1.8334, 1.7767, 1.7468, 1.7147, 1.6983, 1.7031, 1.7222, 1.6426, 1.6320, + 1.8031, 1.6616, 1.8613, 1.7142, 1.7822, 1.6660, 1.8272, 1.7065, 1.6898, + 1.6310, 1.7521, 1.7844, 1.7785, 1.7546, 1.7858, 1.7514, 1.7175, 1.8956, + 1.6952, 1.7559, 1.7030, 1.6885, 1.8080, 1.6344, 1.7356, 1.6977, 1.8344, + 1.7739, 1.6807, 1.6260, 1.8368, 1.6922, 1.8865, 1.7865, 1.6245, 1.6890, + 1.7053, 1.8425, 1.7724, 1.7564, 1.7803, 1.6540, 1.7523, 1.6523, 1.8011, + 1.7817, 1.8210, 1.7611, 1.7148, 1.6591, 1.7066, 1.7953, 1.6767, 1.7715, + 1.7065, 2.0386, 2.6908, 1.7533, 1.7069, 1.7056, 1.8112, 1.8387, 1.7340, + 1.6729, 1.7448, 1.7651, 1.7562, 1.7603, 1.7402, 1.6834, 1.6905, 1.7669, + 1.7285, 1.7695, 1.6959, 1.7073, 1.7729, 1.6439, 1.7737, 1.9007, 1.8193, + 1.6352, 1.7767, 1.7973, 1.6721, 1.6326, 2.0689, 1.7521, 1.5624, 1.7597, + 1.7233, 1.7889, 1.6468, 1.7949, 1.6567, 1.7086, 1.6855, 1.7539, 1.7119, + 1.7105, 1.7665, 1.6940, 1.7199, 1.6587, 1.6535, 1.7443, 1.8192, 1.6830, + 1.6448, 1.8299, 1.7532, 1.7436, 1.7580, 1.7853, 2.2166, 1.7786, 1.6753, + 1.6548, 1.7259, 1.7014, 2.1519, 1.7872, 1.8132, 2.3511, 1.7451, 1.6446, + 1.7903, 1.7986, 1.7225, 1.6865, 1.8110, 1.7765, 1.7229, 1.8636, 1.6293, + 1.6722, 1.6752, 1.7345, 1.6557, 1.7530, 1.8020, 1.7724, 1.7530, 1.7401, + 1.7140, 1.6637, 1.7321, 1.7963, 1.7235, 1.5552, 1.7318, 1.6982, 1.6654, + 1.7070, 1.7836, 1.6677, 1.7144, 1.7207, 1.6214, 1.7139, 1.6765, 1.7318, + 1.6520, 1.7032, 1.6822, 1.7167, 1.9936, 1.6842, 1.7808, 1.6448, 1.6655, + 1.7495, 1.7671, 1.7984, 1.7307, 1.7251, 1.8597, 1.6690, 1.7206, 1.8050, + 1.6937, 1.7735, 2.4023, 1.7041, 1.6982, 1.8072, 1.7366, 1.7480, 1.6612, + 1.8285, 1.7446, 1.6593, 1.5729, 1.8789, 1.7670, 1.5866, 1.6760, 1.9861, + 1.7029, 1.7262, 1.6737, 1.7327, 1.7383, 1.6930, 1.7918, 1.7627, 1.6763, + 1.7033, 1.7053, 1.8581, 1.7151, 1.5995, 1.7384, 1.7110, 1.7173, 1.7167, + 1.7400, 1.7778, 1.7273, 1.8511, 1.7437, 1.7464, 1.8946, 1.7516, 1.7868, + 1.9961, 1.7375, 1.8223, 1.8808, 1.6267, 1.6657, 1.7340, 1.7113, 1.6643, + 1.6593, 1.7756, 2.0622, 2.1898, 1.7083, 1.6580, 1.7134, 1.6195, 1.7422, + 1.6664, 1.6919, 1.7474, 1.8087, 1.7293, 1.7537, 1.7542, 1.7362, 2.0181, + 1.6838, 1.7311, 1.7292, 1.6971, 1.7682, 1.7884, 1.7328, 1.5910, 1.7977, + 1.8513, 1.7065, 1.7044, 1.7043, 1.6129, 1.6470, 1.7784, 1.7901, 1.9546, + 1.7444, 1.7572, 1.7716, 1.7748, 1.7461, 1.6755, 1.7075, 1.7108, 1.7496, + 1.8443, 1.6807, 1.8030, 1.7385, 1.7137, 1.7996, 1.8267, 1.6882, 1.7066, + 1.7421, 1.7258, 1.7883, 1.6757, 1.7318, 1.7987, 1.7435, 1.7862, 1.7681, + 1.7145, 1.6727, 1.7336, 1.7229, 1.7688, 1.7182, 1.8338, 1.7751, 1.8746, + 1.7005, 1.6615, 1.6654, 1.7231, 1.6855, 1.8164, 1.7140, 1.7523, 1.7857, + 2.3900, 1.5233, 1.6147, 1.6700, 1.7539, 1.7769, 1.6780, 1.6786, 1.7570, + 1.7269, 1.6752, 1.6584, 2.0102, 1.7854, 1.8528, 1.7610, 1.7021, 1.7769, + 1.7718, 1.7939, 1.7760, 1.8099, 1.7298, 2.0391, 1.7124, 1.6612, 1.7290, + 1.7201, 1.7583, 3.1886, 1.8840, 1.7371, 1.6928, 1.7375, 1.6900, 1.7818, + 1.7990, 1.8781, 1.8105, 1.7186, 1.7681, 1.7200, 1.7865, 1.7380, 1.7729, + 1.6335, 1.6749, 1.8281, 1.7893, 1.8383, 1.7957, 1.7725, 1.7262, 1.8634, + 1.7606, 1.7397, 1.7381, 1.8562, 1.7843, 1.6655, 1.7895, 1.7482, 1.7366, + 1.7391, 1.8199, 1.7300, 1.8336, 1.6733, 1.6370, 2.5717, 1.7043, 1.7063, + 1.6659, 1.6898, 1.7545, 1.7829, 1.6406, 1.6760, 1.7399, 1.7606, 1.6639, + 1.6529, 1.7716, 1.7976, 1.7405, 1.7779, 1.7114, 1.8633, 1.7808, 1.6656, + 1.7106, 1.6379, 1.7043, 1.7034, 1.7934, 1.7144, 1.6960, 1.6369, 1.6841, + 1.6676, 1.7403, 1.8162, 1.7031, 1.7580, 1.7332, 1.6771, 1.8749, 1.6809, + 1.6692, 1.7695, 1.8276, 1.7821, 1.5962, 1.7035, 1.6830, 1.7837, 1.7629, + 1.8197, 1.7063, 1.8174, 1.7458, 1.7029, 1.7777, 1.7655, 1.8087, 1.7410, + 1.6792, 1.8107, 1.7881, 1.7322, 1.7766, 1.7144, 1.6638, 1.7587, 1.7629, + 1.6253, 1.7377, 1.8036, 1.7703, 1.7773, 1.7076, 1.6700, 1.7811, 1.7947, + 1.7252, 1.7153, 1.6269, 2.4692, 1.6680, 1.7619, 1.7354, 1.7241, 1.7732, + 1.6729, 1.6692, 1.7719, 1.7385, 1.8128, 1.7062, 1.6910, 1.7789, 1.8631, + 1.6821, 1.7483, 1.7831, 1.7509, 1.7962, 1.7171, 1.7621, 1.7450, 1.7958, + 1.7197, 1.6905, 1.7545, 1.7549, 1.6749, 1.7081, 1.8256, 1.8227, 1.7506, + 1.8081, 1.7703, 1.7216, 1.6962, 1.8352, 1.6964, 1.7632, 1.7305, 1.7199, + 1.6506, 1.7202, 1.7306, 1.6137, 1.9637, 1.6504, 1.7721, 1.7887, 1.7454, + 1.6628, 1.6622, 1.6674, 1.7064, 1.7213, 1.7320, 1.8118, 1.8140, 1.9451, + 1.6857, 1.7272, 1.8096, 1.7151, 1.8023, 1.7991, 1.7979, 1.7435, 1.6916, + 1.7446, 1.8259, 1.8163, 1.6671, 1.7059, 1.7805, 1.7954, 1.8122, 1.7129, + 1.7006, 1.8278, 1.6928, 1.7699, 1.8127, 2.6332, 1.6948, 1.8121, 1.7807, + 1.7341, 1.7502, 1.7695, 1.9141, 1.7048, 1.6762, 1.7265, 1.6776, 1.8072, + 1.6987, 1.7570, 1.7126, 1.8879, 1.7532, 1.8169, 1.6947, 1.7870, 1.5678, + 1.6320, 1.6937, 1.7704, 1.6546, 1.7238, 1.6330, 1.7746, 1.9694, 1.6477, + 1.7148, 1.7291, 1.9253, 1.7652, 1.6549, 1.8793, 2.7716, 1.7033, 1.8173, + 1.7531, 1.8650, 1.6701, 1.7444, 1.8200, 1.8616, 1.7401, 1.7574, 1.6387, + 1.7232, 1.7761, 1.7073, 1.7611, 1.6337, 1.6966, 1.6602, 1.7216, 1.7338, + 1.6352, 1.6476, 1.6426, 1.7410, 1.8819, 1.6756, 1.5814, 1.7793, 1.7433, + 1.7529, 1.7703, 1.6975, 1.7116, 1.7453, 1.7028, 1.8396, 1.7989, 1.7690, + 1.5984, 1.9373, 1.8247, 1.6775, 1.7250, 1.5793, 2.5916, 1.6575, 1.7041, + 1.6216, 1.7912, 1.7722, 1.8124, 1.7116, 1.6666, 1.6750, 1.6511, 1.7467, + 1.7391, 1.6443, 1.7227, 1.8167, 1.7149, 1.7041, 1.7369, 1.7651, 1.6900, + 1.7217, 1.7272, 1.6623, 1.7115, 1.6773, 1.7730, 1.7566, 1.6515, 1.7410, + 1.6800, 1.8447, 1.8276, 1.7401, 1.7146, 1.7237, 1.8311, 1.7415, 1.7554, + 1.7146, 1.6885, 1.7398])Parameter containing: +tensor([-1.5534e-02, -9.1763e-02, -1.4045e-01, 2.9126e-02, -2.9463e-01, + 2.3244e-01, 1.4650e-01, 1.3824e-01, -1.4006e-01, -1.1251e-01, + 1.2915e-01, -1.6772e-01, -1.1740e-01, -1.8915e-02, 2.1420e-01, + -1.1838e-01, 2.4480e-02, 1.0073e-01, 1.0139e-01, -2.4958e-01, + -4.7139e-02, -3.2491e-02, 7.5936e-02, 5.0327e-03, -5.9994e-02, + -1.1320e-01, 1.1245e-01, 1.3947e-01, -1.5842e-01, 1.0699e-01, + -1.0513e-01, 1.4646e-01, 1.6563e-01, 8.8609e-02, 1.4347e-01, + -7.4134e-02, -1.2482e-01, 3.6243e-02, 2.5012e-01, 2.1067e-02, + -1.0077e-01, -9.0722e-02, -3.1916e-02, 2.9762e-01, 9.3168e-02, + -5.2134e-02, 1.2359e-01, -1.1238e-01, 6.2995e-02, 1.8050e-01, + 5.4862e-02, -1.4248e-01, -2.7663e-01, 7.0286e-02, -4.1281e-02, + 1.8095e-01, 2.5451e-02, -9.3611e-02, 3.1525e-02, 3.0742e-02, + -9.0886e-02, 3.5809e-02, -5.0295e-02, -1.6220e-02, 1.4903e-01, + 1.6067e-01, 7.7424e-02, -1.5068e-01, -9.7631e-02, -3.0946e-01, + -1.0245e-01, -7.8358e-02, 3.2978e-01, -1.1044e-01, 6.9739e-02, + -1.4487e-01, 1.8632e-01, -4.9441e-02, -1.4343e-01, 3.6038e-01, + 4.3383e-02, -1.5065e-01, 2.7976e-02, -2.0230e-02, -2.2828e-01, + 1.5317e-01, -8.6870e-02, 5.8729e-02, 8.2278e-02, -1.1187e-01, + -2.0997e-01, 1.1161e-01, 5.3529e-03, -3.4734e-01, -3.0991e-01, + -7.8397e-02, 1.9746e-01, -3.0953e-02, -2.6813e-01, 2.1088e-01, + 1.2270e-01, -2.1737e-01, 2.1370e-01, -1.8573e-02, 2.0623e-01, + 3.2870e-02, 1.3364e-01, -1.4971e-01, -3.0270e-01, -6.7287e-02, + 1.4275e-01, 2.9592e-01, 1.2103e-01, -1.6004e-01, -2.3951e-02, + -3.6579e-02, 3.0966e-01, -6.5552e-02, -2.5138e-01, 2.6074e-01, + -6.9158e-02, -3.3349e-01, -1.6839e-01, -2.2436e-01, -4.7964e-02, + -1.3955e-01, -2.2653e-02, 3.1051e-01, -4.5824e-02, 1.2099e-01, + 1.0464e-02, -2.2245e-01, 2.8780e-01, 2.4416e-02, 9.3748e-02, + 2.1737e-01, -9.0517e-02, -6.9622e-02, -8.9925e-02, -1.7875e-01, + 1.5849e-01, 3.7965e-02, -1.1369e-01, 1.3518e-01, 1.7028e-02, + -2.7495e-01, 1.6793e-01, 2.1625e-01, 2.3221e-01, -1.6563e-02, + 3.1062e-01, -1.4128e-01, -1.3812e-01, -2.9676e-02, -1.1254e-01, + 7.3345e-02, 1.9008e-01, -3.1589e-01, 1.7483e-01, 2.9653e-02, + -4.0455e-02, -3.6911e-02, 8.0104e-02, 2.0828e-01, -4.0867e-02, + -2.4094e-02, -2.2152e-01, 2.1542e-01, -1.7157e-01, -1.2389e-02, + 6.6927e-02, 3.7046e-01, -3.9744e-02, 1.6790e-01, 2.5737e-01, + -6.8471e-02, 5.7411e-02, 6.1858e-02, -3.9160e-01, -4.6156e-02, + -2.1085e-01, 1.1249e-01, 1.3627e-01, 2.7793e-01, -2.0108e-01, + 2.2857e-01, -1.2739e-01, 1.2818e+00, -8.5203e-02, -2.1764e-01, + -2.8680e-01, -2.2593e-01, 8.1885e-02, -6.8978e-02, -1.5091e-01, + 4.0286e-02, 1.1618e-01, -2.5044e-01, 1.5447e-01, 9.2900e-02, + -9.9608e-02, 2.9712e-02, -2.2294e-02, 3.1927e-02, 9.5738e-02, + 1.9990e-01, 1.6504e-01, 2.4631e-01, 1.0133e-01, 6.9246e-03, + 1.8389e-01, 1.8244e-01, -2.1620e-01, 3.0810e-01, 3.2302e-03, + -1.5407e-01, -2.9648e-01, 4.3923e-03, -1.9695e-01, 3.5502e-02, + 9.7438e-02, 3.6828e-03, -4.1745e-01, -9.6438e-02, -2.8194e-01, + 1.0465e-01, -2.3009e-01, -1.8973e-01, 1.1445e-01, 1.1904e-01, + 2.4688e-01, 8.2958e-02, 4.5977e-03, -1.2731e-01, -3.0066e-01, + -2.6491e-01, 3.9542e-02, 5.0806e-02, 1.1334e-01, -6.6315e-03, + -1.2101e-01, 2.5357e-01, 1.4793e-02, -7.2905e-02, -5.3436e-02, + 1.2530e-01, -1.5131e-01, 2.3338e-02, -2.0551e-01, -2.9184e-02, + -1.2791e-02, -2.3347e-01, 1.7415e-01, -7.8002e-02, 1.1495e-01, + -2.7784e-01, 1.3663e-01, -1.6558e-01, -1.6523e-01, -1.5290e-01, + -1.4430e-01, 1.4719e-01, 6.6719e-02, 2.7414e-02, 4.6831e-02, + 3.8021e-01, -5.6495e-02, -1.5276e-01, -2.3739e-02, 3.4107e-02, + -7.4406e-02, 1.1249e-01, 2.2351e-01, -1.1416e-01, 4.6791e-02, + -3.3080e-02, 1.2830e-01, 2.8935e-01, 9.2980e-02, -1.5083e-01, + -4.5502e-02, -2.3274e-01, -9.3975e-02, 1.5168e-01, -8.0357e-02, + 2.5591e-01, -6.7106e-02, -1.8530e-01, 2.9677e-01, -1.3151e-01, + 1.0348e-01, -7.1660e-02, 1.1649e-01, -2.7519e-02, 1.0288e-01, + 2.3199e-02, 1.6356e-01, -2.6839e-01, 4.4333e-01, 9.9366e-02, + -3.5228e-01, -1.4036e-01, -1.9225e-01, -1.8235e-01, 4.8254e-02, + 1.1766e-01, 1.5968e-01, -2.8152e-01, 2.7740e-02, -1.4705e-01, + -8.5655e-02, -1.2457e-01, -1.5918e-01, -2.2650e-01, -2.0617e-03, + 2.2071e-01, 4.7423e-01, -1.2317e-01, 8.1605e-02, 2.6438e-01, + -1.7328e-01, -5.5589e-02, -7.9478e-02, 6.6586e-02, -3.9526e-01, + -1.6453e-01, -6.7773e-02, -6.8706e-02, 4.0666e-02, -1.0352e-01, + 2.0825e-01, -3.9659e-02, 7.2424e-03, 4.7092e-02, 2.2881e-01, + -1.1379e-01, -3.1302e-01, 8.0753e-02, -9.4348e-02, 7.6734e-02, + 1.4272e-01, -9.4793e-02, -1.0785e-01, -1.6521e-02, 2.9463e-02, + 2.0519e-01, 4.1036e-02, 5.8431e-02, -7.8597e-02, 2.9006e-01, + 2.5388e-01, 1.6062e-01, 2.9854e-03, 1.3798e-01, 1.5111e-01, + -3.3206e-01, -4.6566e-02, 1.1332e-01, 2.7857e-01, -3.2234e-01, + -1.5060e-01, -1.9339e-01, 6.2923e-02, -3.6645e-01, 5.0914e-02, + -1.0140e-03, 1.0521e-01, 2.5313e-01, 9.8427e-02, 5.3377e-02, + 3.0633e-02, -1.2009e-02, 4.8498e-02, -1.7911e-02, -1.2377e-01, + 2.1468e-02, 8.0258e-02, 2.3942e-01, -1.3388e-01, 6.4349e-02, + -2.2947e-01, 1.1419e-01, 1.6653e-01, 3.6875e-02, 2.3298e-02, + -9.5069e-04, 2.0242e-01, 8.2273e-02, 1.2790e-01, 3.8417e-02, + -1.3779e-01, -3.5614e-01, -2.9389e-03, -3.7396e-02, -2.2514e-01, + 1.1973e-01, 1.5550e-01, 4.0188e-02, -1.1199e-01, -8.5370e-02, + 2.5541e-01, 8.7027e-02, 1.0922e-01, -5.5832e-02, -2.6477e-02, + -2.6585e-01, -1.9053e-01, -1.2098e-01, 7.0815e-02, 2.6938e-01, + 2.0130e-02, 3.1811e-02, -3.5534e-02, 1.0926e-01, 3.0880e-01, + -1.5899e-01, 6.1207e-02, -5.4653e-03, -2.8716e-02, 3.2807e-02, + -2.0764e-01, 4.9832e-02, 1.6245e-01, 5.7098e-02, 1.8167e-01, + -1.6261e-02, 2.1805e-02, -3.1371e-01, -1.3691e-01, 1.1864e-01, + -1.1328e-01, -7.7644e-02, -3.5575e-02, 2.1682e-01, 1.3694e-01, + 8.8308e-03, 2.2246e-01, 4.5436e-02, 1.8990e-04, -1.4476e-01, + -7.4615e-02, 8.5595e-02, 5.4799e-02, -7.1474e-02, 7.8181e-03, + -3.7506e-02, -9.0086e-02, -2.5738e-02, 4.2255e-03, -1.9520e-01, + -3.2488e-02, 6.0368e-03, -2.6547e-01, -2.4829e-01, 2.1645e-02, + -7.3512e-02, 7.6393e-02, -7.2706e-02, 1.5358e-01, 9.3677e-03, + -1.4428e-01, 1.6863e-01, -2.6322e-01, 2.1465e-01, -2.1246e-01, + -3.2902e-01, 3.1898e-02, 2.4764e-01, 3.4598e-01, 2.4307e-01, + 2.8445e-01, 1.6679e-02, 8.4466e-03, 6.5163e-02, 3.9525e-02, + -9.2690e-02, 4.4431e-02, -4.6913e-02, -9.2345e-03, -8.9794e-03, + 1.9486e-02, -1.1213e-01, 1.1999e-01, 1.0694e-01, -1.5547e-01, + 5.2061e-02, -1.4350e-01, 1.6855e-01, 8.6247e-02, 3.3788e-02, + 7.4436e-02, 2.2431e-01, -8.0994e-02, 7.9031e-02, -2.3455e-01, + -1.6954e-01, -2.5681e-01, 9.9518e-02, -2.0205e-02, 1.9510e-01, + -1.7703e-01, -2.3888e-01, 1.6401e-02, -1.6304e-01, -1.0341e-01, + -8.9298e-02, 2.9634e-02, -1.5724e-01, 6.5853e-02, -1.4971e-01, + -1.3078e-01, -1.6806e-01, -1.3370e-01, 3.9207e-01, -7.3544e-02, + -8.2711e-02, 9.3258e-02, -1.6172e-01, 1.8738e-01, 1.4350e-01, + 2.4039e-01, -3.3456e-02, -1.7536e-02, 2.7335e-01, -2.1055e-01, + -6.3564e-02, 2.1079e-02, 2.3638e-01, -3.1217e-02, 3.7273e-02, + 1.1079e-02, 1.5963e-01, -1.8719e-01, -1.0053e-03, -1.7813e-02, + -2.9240e-01, -8.0952e-02, 5.3291e-03, 2.6588e-01, -2.0618e-01, + -1.6225e-01, 1.0025e-01, -1.1047e-01, -9.2405e-02, 1.0489e-01, + -4.5642e-02, -1.8395e-01, -1.0642e-01, 1.8981e-01, 1.9907e-02, + -1.7135e-01, 6.5531e-02, -1.3988e-03, 2.7817e-01, -2.5802e-01, + -1.4527e-01, 1.6533e-01, 2.5748e-02, -6.5332e-02, 4.1882e-02, + -1.7025e-01, -4.3420e-01, -5.4950e-02, 2.4054e-01, 4.4939e-01, + 2.2223e-01, -5.3037e-02, 1.9851e-01, 8.5092e-02, -5.1684e-02, + 5.8122e-02, -2.0831e-01, 1.3727e-02, -1.4461e-01, 1.1605e-01, + 3.3023e-01, -2.5016e-03, 8.3355e-02, 2.8889e-03, -9.1767e-02, + -2.2500e-01, -2.6005e-01, -2.2718e-02, 7.8965e-02, -2.0193e-01, + 3.1948e-01, -1.4691e-01, -4.2470e-03, 1.6581e-01, -5.5066e-02, + -2.8730e-02, 1.2353e-01, 9.0078e-02, 5.3239e-02, 7.6229e-02, + -1.8688e-02, 1.8038e-01, -1.3688e-01, 1.0056e-02, -1.1915e-01, + 1.1376e-01, -1.0106e-01, 3.1833e-02, -2.3530e-01, -2.1356e-01, + 1.2081e-02, 1.9723e-01, -6.6254e-02, -5.8621e-03, 7.9542e-02, + 1.8380e-01, 6.9102e-02, -2.0355e-01, 3.8634e-04, 1.5694e-01, + 1.5234e-02, 7.0116e-02, -8.7134e-02, -1.0493e-01, -1.2832e-01, + 8.8785e-02, 5.0802e-02, -1.6515e-01, 1.5294e-01, -9.5807e-02, + 1.1724e-01, -2.1482e-01, 5.9678e-02, -1.0187e-01, -1.1231e-01, + 7.3172e-02, 8.1636e-02, 1.3985e-01, -6.1189e-02, 3.2938e-01, + -2.0465e-01, 5.8024e-02, -2.0098e-01, -7.7353e-02, -1.4911e-01, + -1.2737e-01, -5.6727e-02, -1.5146e-01, 5.5125e-03, -3.8270e-01, + 8.5914e-02, 4.1835e-02, 2.1180e-01, 6.6254e-02, 1.4918e-01, + 4.0368e-04, 8.3069e-02, 5.3523e-02, -7.5874e-02, -3.9154e-02, + 9.0881e-02, -4.8088e-02, -8.1549e-02, -9.4344e-02, 1.8658e-01, + -1.1282e-03, -3.2404e-02, 2.2197e-01, 1.2310e-02, -1.4756e-01, + -1.5818e-01, -1.0427e-01, -1.3376e-01, -4.5153e-01, 1.4150e-01, + -4.6084e-01, 7.0984e-02, -5.3660e-03, 1.1445e-01, 2.0639e-01, + -1.0595e-01, 9.8461e-02, 1.1745e-01, -8.5346e-02, 9.7677e-02, + -2.8740e-01, -1.0844e-01, 2.0947e-02, -1.3861e-01, 1.5285e-01, + 7.0771e-02, 4.2567e-02, 1.3805e-01, 3.5083e-02, -4.3154e-02, + -3.7462e-01, 5.8789e-02, -1.3240e-01, -1.0771e-01, -2.4909e-01, + 3.2137e-02, -1.4593e-01, -5.0975e-02, -1.2179e-01, -1.0344e-01, + 1.2949e-01, -1.4998e-01, 1.3079e-01, -1.7869e-01, -1.0132e-01, + 1.8350e-01, 4.9456e-02, -5.7911e-02, -1.6670e-01, -2.7019e-01, + -1.3105e-01, 2.1986e-01, -6.1723e-02, 3.5003e-01, -1.5446e-01, + 1.9561e-01, 1.1300e-01, 1.3925e-01, -1.5276e-01, 1.0147e-01, + -1.9581e-02, -4.3691e-01, -9.7110e-02, -3.3967e-01, -2.7802e-02, + -7.2438e-02, -1.4261e-01, 1.0363e-01, 3.0578e-01, 4.9787e-02, + 1.1251e-01, 7.2107e-02, 2.5426e-01, 1.4806e-01, 9.5198e-02, + 1.0909e-01, -9.6786e-02, -1.5798e-01, -4.7056e-03, -2.3768e-01, + -2.6643e-01, 1.7792e-01, 1.9149e-01, 4.8918e-02, 1.6031e-01, + -1.1331e-01, -1.2023e-02, 1.2726e-01, 3.3811e-01, 2.9085e-01, + -3.1095e-02, -2.7095e-01, 1.0544e-01, -7.7017e-02, 1.5635e-01, + 1.0879e-01, 8.0752e-03, 1.5868e-01, -1.9379e-01, 4.0000e-02, + -3.2902e-01, -1.3328e-01, 1.0858e-01, 4.2967e-02, -1.2718e-01, + -3.5778e-02, 1.1070e-01, 1.7500e-01])Parameter containing: +tensor([0.9789, 0.9294, 1.0503, 0.9833, 1.3635, 0.9817, 0.8959, 0.9901, 0.9724, + 1.0195, 0.9912, 1.0194, 0.9510, 0.9497, 1.0480, 0.9598, 0.9356, 0.9458, + 1.0339, 1.4869, 0.9539, 1.0286, 1.0209, 0.9972, 0.9667, 0.9840, 0.9655, + 1.0248, 0.9781, 0.9649, 0.9608, 1.0080, 0.9781, 0.9799, 1.0005, 0.9709, + 0.9607, 1.0319, 1.0683, 1.0291, 0.9931, 0.9402, 1.0207, 0.9329, 0.9443, + 1.0045, 0.9924, 0.9469, 0.9767, 1.0029, 0.9833, 0.9146, 0.9649, 0.9591, + 0.9597, 1.1766, 0.9689, 0.9140, 0.9275, 0.9578, 0.9100, 0.9850, 0.9424, + 1.0120, 0.9857, 1.0222, 1.0457, 1.0024, 1.0410, 1.4892, 0.9385, 0.9460, + 0.9541, 1.0221, 0.9335, 0.9982, 1.0116, 0.9893, 0.9835, 0.9902, 0.9754, + 0.9635, 0.9653, 0.9582, 0.9862, 0.9706, 0.9513, 0.9523, 0.9896, 0.9582, + 0.9980, 0.9927, 0.9415, 0.9474, 0.9352, 1.0319, 1.0163, 0.9256, 0.9569, + 1.0629, 0.9677, 0.9598, 0.9947, 0.9495, 1.0303, 1.1323, 0.9882, 0.9709, + 0.9408, 0.9724, 1.0592, 0.9617, 0.9984, 0.9713, 1.0029, 0.9759, 1.0748, + 0.9440, 0.9530, 0.9644, 0.9541, 1.0438, 0.9496, 0.9783, 0.9973, 1.0621, + 0.9814, 0.9424, 0.9092, 0.9427, 1.0071, 0.9924, 0.9948, 0.9372, 1.0529, + 1.0278, 1.0116, 0.9581, 1.0060, 1.0342, 0.9699, 1.0151, 0.9476, 0.9842, + 0.9829, 1.0783, 1.0188, 0.9930, 0.9428, 0.9109, 1.1249, 0.9518, 0.9691, + 0.9435, 1.0108, 1.8398, 0.9405, 1.0203, 0.9426, 0.9539, 1.0851, 0.9390, + 0.9137, 0.9963, 0.9783, 0.9774, 0.9921, 0.9872, 0.9343, 0.9382, 1.0005, + 1.0383, 1.0908, 1.0157, 0.9840, 0.9636, 0.9586, 0.9408, 1.0387, 0.9679, + 0.9141, 0.9924, 0.9980, 0.9678, 0.9615, 1.2429, 1.0328, 1.1239, 0.9823, + 0.9177, 0.9334, 0.9671, 0.9564, 0.9523, 0.9280, 0.9608, 1.0131, 0.9538, + 1.1718, 1.0348, 0.9512, 1.0957, 0.9225, 0.9518, 1.0423, 0.9711, 0.9743, + 1.0087, 1.0097, 0.9382, 0.9720, 1.0695, 1.0373, 1.3284, 0.9904, 0.9383, + 0.9837, 0.9402, 0.9627, 1.1128, 0.9779, 0.9696, 1.5119, 0.9708, 0.9545, + 1.0144, 0.9609, 0.9002, 0.9714, 0.9668, 1.0259, 0.9891, 0.9930, 0.9484, + 1.0384, 0.9235, 1.0008, 0.9805, 0.9669, 1.0055, 0.9815, 0.9618, 0.9798, + 0.9605, 0.9924, 1.0096, 0.9602, 0.9588, 0.9440, 0.9645, 0.9641, 0.9293, + 0.9837, 0.9505, 0.9990, 0.9378, 0.9800, 0.9223, 0.9465, 1.0049, 0.9628, + 0.9659, 0.9606, 0.9697, 0.9839, 1.0872, 0.9421, 0.9863, 0.9491, 0.9595, + 0.9383, 0.9845, 0.9450, 0.9953, 0.9629, 0.9854, 1.0170, 0.9908, 0.9800, + 0.9220, 0.9957, 1.3879, 0.9971, 0.9980, 0.9933, 1.0206, 0.9309, 0.9692, + 0.9807, 1.0204, 0.9707, 0.9000, 1.0781, 0.9787, 0.9227, 0.9209, 1.1598, + 0.9672, 0.9598, 0.9724, 1.0041, 0.9868, 0.9703, 1.0104, 0.9311, 1.0195, + 0.9569, 0.9985, 1.0102, 1.0374, 0.9528, 0.9749, 1.0033, 0.9420, 0.9753, + 0.9903, 0.9605, 0.9820, 1.0534, 0.9808, 1.0243, 1.0862, 0.9781, 1.0080, + 0.6989, 1.0179, 0.9068, 0.9796, 0.9608, 0.9569, 1.0111, 0.9837, 0.9577, + 0.9589, 0.9779, 1.0451, 1.1752, 0.9593, 0.9444, 0.9135, 0.9258, 0.9964, + 0.9537, 1.0046, 1.0097, 0.9951, 0.9439, 0.9784, 0.9663, 0.9261, 1.2351, + 0.9782, 0.9517, 1.1271, 0.9770, 0.9646, 0.9768, 0.9724, 0.9354, 1.0252, + 0.9786, 0.9721, 0.9825, 0.9624, 0.9528, 0.9635, 1.0061, 0.9892, 0.9911, + 0.9513, 0.9656, 1.0066, 0.9590, 0.9904, 0.9637, 0.9677, 0.9733, 0.9671, + 0.9685, 0.9739, 1.0084, 0.9667, 0.9818, 1.0092, 1.0129, 0.9741, 0.9463, + 1.0102, 1.0144, 1.0013, 0.9456, 1.0054, 1.0135, 0.9948, 1.0081, 0.9378, + 1.0029, 0.9869, 0.9540, 0.9829, 1.0220, 1.0169, 0.9994, 0.9782, 1.0695, + 0.9567, 0.9314, 0.9749, 0.9682, 1.0318, 0.9593, 0.9616, 0.9607, 0.9752, + 1.4810, 0.9323, 0.9402, 0.9837, 1.0073, 0.9842, 0.9367, 0.9604, 1.0116, + 1.0078, 0.9702, 0.9684, 1.1285, 0.9511, 0.9765, 1.0176, 0.9596, 1.0339, + 0.9624, 1.0395, 0.9849, 0.9565, 0.9806, 1.0773, 0.9714, 0.9472, 1.0115, + 0.9649, 0.9228, 2.0901, 0.9879, 0.9882, 0.9761, 0.9814, 0.9468, 0.9994, + 0.9286, 0.9911, 0.9497, 0.9147, 0.9676, 0.9874, 0.9877, 0.9406, 0.9652, + 0.9351, 0.9923, 1.0111, 0.9771, 1.0114, 1.0217, 0.9870, 0.9537, 1.0784, + 1.0336, 1.0093, 0.9765, 0.9986, 1.0106, 0.9380, 0.9709, 1.0175, 0.9601, + 0.9987, 0.9836, 0.9945, 0.9813, 0.9800, 0.8724, 1.7095, 0.9634, 0.9996, + 0.9365, 0.9805, 0.9622, 1.0119, 0.9878, 0.9472, 0.9417, 0.9169, 0.9876, + 0.9900, 0.9350, 1.1267, 0.9505, 1.0640, 0.9625, 1.0296, 1.0113, 0.9412, + 0.9205, 0.9651, 0.9713, 0.9395, 1.0119, 0.9269, 1.0506, 0.9600, 0.9694, + 0.9757, 0.9544, 1.0086, 0.9788, 1.0508, 1.0209, 0.9488, 0.9966, 0.9317, + 0.9572, 0.9729, 1.0164, 0.9792, 0.9196, 0.9784, 0.9793, 0.9809, 0.9796, + 0.9861, 0.9729, 0.9508, 0.9676, 0.9786, 0.9937, 1.0135, 0.9527, 0.9108, + 0.9070, 1.0103, 0.9822, 0.9784, 1.0208, 0.9664, 0.9470, 0.9950, 1.0025, + 0.9190, 0.9683, 1.0020, 0.9953, 1.0268, 0.9235, 0.9606, 0.9984, 0.9683, + 1.0037, 1.0107, 0.9407, 1.4926, 0.9923, 1.0471, 0.9927, 0.9654, 1.0307, + 0.9588, 0.9602, 0.9697, 0.9666, 0.9573, 0.9678, 0.9603, 1.0030, 1.0766, + 0.9451, 1.0309, 0.9786, 1.0248, 1.0939, 0.9755, 1.0129, 0.9870, 0.9454, + 0.9679, 0.9613, 0.9953, 1.0223, 0.9391, 0.9962, 1.0024, 0.9905, 0.9667, + 0.9975, 1.0962, 0.9719, 0.9752, 1.0808, 0.9856, 0.9431, 0.9547, 0.9668, + 0.9539, 0.9781, 0.9672, 0.9340, 1.1096, 1.0085, 0.9580, 0.9485, 0.9604, + 0.9431, 0.9216, 0.9337, 0.9443, 1.0195, 0.9627, 0.9664, 1.0743, 0.9936, + 0.9841, 0.9570, 1.0709, 0.9946, 0.9981, 0.9501, 0.9921, 0.9470, 0.9568, + 0.9283, 0.9953, 1.0495, 0.9913, 0.9984, 1.0113, 1.0228, 0.9384, 0.9789, + 1.0531, 0.9579, 0.9499, 1.0594, 0.9886, 1.4655, 0.9475, 1.0596, 0.9834, + 1.0035, 1.0115, 0.9774, 1.0214, 0.9791, 0.9685, 0.9650, 0.9535, 0.9771, + 0.9126, 0.9927, 1.0409, 1.2192, 0.9652, 1.0573, 0.9687, 0.9728, 0.9429, + 1.0402, 0.9895, 0.8916, 1.0075, 0.9711, 0.9827, 0.9865, 1.2124, 0.9498, + 0.9622, 0.9726, 1.0289, 0.9481, 1.0022, 0.9531, 1.6650, 1.0106, 1.0316, + 0.9528, 1.0154, 0.9521, 0.9370, 0.9971, 1.0099, 0.9931, 0.9787, 0.9301, + 0.9546, 1.0009, 0.9576, 1.0004, 0.9747, 0.9970, 0.9373, 0.9452, 1.0046, + 0.8863, 0.9072, 0.9384, 0.9922, 0.9774, 0.9665, 0.9855, 0.9787, 0.9994, + 0.9961, 0.9882, 0.9664, 0.9641, 0.9573, 0.9490, 1.0217, 1.0008, 1.0120, + 0.9250, 1.0264, 0.9800, 0.9720, 0.9221, 0.8856, 1.7454, 0.9631, 0.9402, + 0.9390, 1.0204, 1.0326, 1.0082, 1.0139, 0.9646, 0.9637, 0.9325, 1.0175, + 0.9553, 0.9596, 0.9731, 0.9429, 1.0211, 0.9584, 0.9190, 1.0024, 0.9525, + 1.0043, 1.0158, 0.9731, 0.9330, 0.9368, 0.9898, 0.9570, 1.0412, 0.9947, + 0.9534, 1.1320, 1.0050, 0.9660, 0.9688, 0.9525, 0.9742, 1.0358, 0.9980, + 0.9786, 0.9825, 0.9780])Parameter containing: +tensor([-3.0972e-01, 8.6520e-02, -1.7815e-01, -2.1274e-01, 1.0310e-02, + -2.1799e-01, 2.6805e-02, -1.2406e-01, 1.1344e-01, -1.0872e-01, + -2.1680e-01, -1.5038e-01, -1.6801e-01, -3.8868e-01, 4.3549e-02, + -1.6931e-01, -1.0309e-01, -1.1146e-01, -2.8433e-02, -7.3690e-02, + -2.0815e-01, -1.5633e-01, -5.8505e-02, -1.1866e-01, -9.1465e-02, + -1.2483e-01, -1.6782e-01, -1.8859e-01, -2.8829e-01, -6.2344e-02, + 5.8944e-02, -2.3529e-02, -1.3301e-01, -3.5179e-02, -1.3117e-01, + -3.7027e-01, -3.8042e-01, -2.0661e-01, -5.9429e-02, 4.7935e-02, + -1.1948e-01, -1.6189e-01, -1.1954e-01, -1.6275e-01, -1.9291e-01, + -3.2738e-01, 6.2183e-02, -1.8937e-01, -2.8478e-01, -1.1591e-01, + -3.1983e-02, -1.3321e-01, -3.0793e-01, -9.1934e-02, -3.1607e-01, + 1.7626e-03, -3.7448e-02, -2.1910e-01, -3.0856e-01, 1.1530e-01, + 7.8005e-02, -1.3057e-01, -1.4585e-01, -2.2813e-01, -1.6288e-01, + -2.2923e-01, -2.6776e-01, -1.9933e-01, -2.2633e-01, 1.1466e-01, + -1.0994e-01, -1.2034e-01, -7.9534e-02, -3.2612e-01, -2.9924e-01, + -1.5115e-01, -1.4329e-02, -1.6127e-01, 1.3527e-01, -3.5650e-02, + -1.3662e-01, -9.8355e-02, -1.8159e-01, -8.9092e-02, -7.4571e-02, + -2.8322e-01, 2.9549e-01, -7.7638e-02, -2.7117e-02, -1.8011e-01, + -3.8760e-01, 1.8966e-02, -6.7477e-02, -1.5817e-01, -3.1372e-01, + -8.0799e-02, 6.7441e-02, -1.7432e-01, -2.6955e-01, 6.9790e-02, + -1.0390e-01, -4.4026e-01, -1.0927e-01, -4.8078e-01, -4.3120e-03, + -1.4704e-01, 1.6252e-01, -6.3587e-03, -1.6030e-01, -1.2555e-01, + 2.4264e-03, -2.0487e-01, -1.9123e-01, -7.2894e-02, 2.6715e-02, + -2.9070e-01, 2.1943e-02, -1.0394e-01, -2.0428e-01, 1.9086e-02, + -5.8713e-02, -1.1117e-01, -2.1590e-01, -7.7958e-03, 8.1530e-02, + -4.0557e-02, -6.9983e-02, 8.2076e-02, -2.8870e-01, 5.1586e-04, + -7.0652e-02, -1.6712e-01, -5.4576e-02, 3.9755e-02, 9.6160e-02, + 3.4701e-02, -2.3451e-01, -2.4870e-01, 2.3932e-02, -1.9629e-01, + -5.2537e-02, 2.0197e-02, -7.9063e-02, -1.7944e-01, -1.2057e-01, + -1.4907e-01, 1.8654e-01, -1.6093e-01, -2.8629e-01, -2.0329e-01, + 3.8109e-02, -2.5073e-01, -2.7577e-01, -1.1063e-01, -1.0919e-01, + 1.3914e-02, -2.8746e-01, -3.2939e-01, -2.2465e-01, -1.6625e-01, + -1.8405e-01, -4.9315e-02, -2.2022e-01, -2.8075e-01, -5.6807e-02, + 1.0513e-01, -7.9162e-02, 3.6867e-02, -1.8553e-01, -2.9716e-01, + 7.4794e-02, 6.0102e-02, -1.4944e-01, 1.4078e-03, -1.2949e-01, + -1.4017e-01, -2.0010e-01, -1.3645e-01, -5.3604e-02, 9.3507e-02, + -4.0007e-01, -8.2399e-02, 1.4770e-03, 2.5266e-02, -1.3282e-01, + -5.0428e-02, -2.4350e-01, 2.7861e-01, -3.4818e-02, 9.8363e-02, + -3.0732e-01, -3.4218e-01, -2.9501e-02, -3.9651e-02, -6.8811e-02, + -4.2808e-02, 1.3737e-01, -1.1500e-01, -2.3656e-01, 2.0047e-01, + -2.0047e-01, -1.8455e-01, -1.9737e-01, -3.0253e-02, -8.7459e-02, + -2.0932e-01, 1.5391e-02, 1.2252e-02, -2.1960e-01, -3.3056e-01, + 1.3683e-01, -2.0823e-01, -3.2396e-01, -1.1140e-01, -3.3370e-01, + -2.8346e-01, -2.3274e-01, -1.6795e-01, -3.2785e-01, 3.7867e-02, + 1.3719e-01, 1.4507e-01, -6.2225e-02, 5.7701e-02, -3.8609e-02, + 3.8244e-02, -2.9839e-01, -1.8011e-01, -1.9061e-01, -5.9240e-02, + -9.7641e-02, -2.6895e-01, -1.6988e-01, -1.9597e-01, -1.6472e-01, + -4.4346e-01, 1.3133e-01, -1.2524e-01, -3.5263e-01, -1.4135e-01, + 4.1890e-02, -5.1405e-02, 4.4846e-02, -1.4310e-01, -1.5345e-01, + -1.3753e-01, -1.1721e-01, -1.3412e-01, -1.3196e-01, 3.5904e-02, + 1.4487e-02, -1.3828e-01, -1.1927e-01, -2.2135e-01, -1.7975e-01, + -2.3271e-01, -2.4277e-02, -2.7056e-01, -3.3887e-01, -2.0596e-01, + -3.2562e-01, -4.5267e-01, 1.7332e-01, 3.4287e-02, -2.7470e-01, + -4.0351e-02, -1.5061e-01, -2.0349e-01, 8.5356e-02, 6.9045e-02, + 3.8983e-02, 6.1722e-02, 2.4213e-02, -8.5502e-02, 4.4269e-03, + -1.0968e-01, -1.4912e-01, -4.7435e-02, 1.4155e-01, -1.8818e-01, + -3.2807e-02, -2.2663e-01, -3.0295e-01, 9.7512e-03, -1.7489e-01, + -9.1184e-02, -2.0599e-01, 1.4191e-01, -5.3643e-02, 7.9877e-02, + -2.9450e-01, -1.2680e-01, 8.3230e-02, -2.6408e-01, -4.8992e-02, + -5.0730e-02, 7.4562e-03, -1.9729e-01, -1.8002e-01, -6.4581e-03, + -2.3463e-01, 1.5716e-01, -5.0802e-02, -3.3855e-01, 5.2296e-02, + -9.6272e-02, -1.5351e-01, -2.3901e-01, -1.5963e-01, -1.6693e-01, + -1.1239e-01, -3.7593e-01, -7.4591e-02, 4.3444e-02, -1.5205e-01, + -1.6956e-03, 1.6140e-01, -2.1399e-01, -1.8157e-01, -3.0919e-02, + -2.7055e-01, -1.2207e-01, -1.5803e-01, -3.8846e-02, -4.9430e-01, + -1.1189e-01, -2.8882e-01, -1.2073e-01, -6.5715e-02, -1.4964e-01, + -2.4234e-01, -2.6441e-01, -2.7818e-01, -6.0429e-02, 7.3955e-02, + 6.7665e-02, -2.6970e-01, -4.5610e-02, 2.3626e-02, 1.0868e-02, + -2.1497e-02, -2.7581e-01, -2.1453e-01, 1.3595e-01, -2.0828e-01, + 1.1379e-01, 8.0894e-03, 8.1668e-02, 1.1096e-01, 2.5441e-01, + 7.3634e-02, -2.1243e-01, -2.3957e-01, -1.1420e-02, -1.9181e-01, + -1.9316e-01, -4.4721e-01, -8.3250e-02, -6.5639e-02, -5.8319e-02, + -1.2700e-01, -1.0081e-01, -7.0792e-02, -3.4129e-01, -1.4185e-01, + -4.1964e-01, -1.5962e-01, 2.2652e-02, -2.0097e-01, -1.7302e-01, + -1.6948e-01, -1.4114e-01, -9.6507e-02, -1.6094e-01, -3.6258e-01, + -2.5993e-01, 6.2780e-02, -1.1315e-01, -2.1144e-02, -7.3426e-03, + -1.3075e-01, -2.6975e-01, -2.2317e-01, -7.0074e-02, 8.6945e-02, + -2.5793e-01, -1.4867e-01, 8.9297e-02, 3.3459e-03, -2.6978e-01, + -1.6799e-01, -1.9151e-01, -7.5621e-02, -5.7356e-02, -2.5335e-01, + 3.0635e-02, -2.6356e-01, -7.0652e-02, -2.5116e-01, -3.5934e-01, + -2.9494e-02, -9.2532e-02, -5.1282e-02, -1.1410e-01, -1.0907e-01, + -2.8711e-01, -1.1422e-01, -3.5451e-02, -1.9770e-01, 1.1843e-01, + -2.8797e-01, -4.3960e-01, -3.4017e-02, 9.0205e-02, -1.3106e-01, + -8.4129e-02, -1.8859e-01, -2.6158e-01, 1.6586e-01, -1.2378e-01, + -1.5896e-01, -1.6694e-01, 1.6234e-02, -1.5692e-01, -1.6066e-01, + -3.7369e-04, 2.3442e-02, -5.5136e-02, -8.2050e-02, -2.5465e-01, + -1.2962e-01, -1.5731e-01, 7.7169e-02, -1.6205e-01, -2.2577e-01, + 2.7081e-01, -1.1905e-01, -2.3228e-02, -2.7891e-01, -2.6373e-01, + -1.0485e-01, -1.3941e-01, -6.7630e-02, -1.4722e-02, 1.3796e-02, + -3.4524e-02, 8.9978e-02, -2.1230e-01, 8.5656e-02, -1.6448e-01, + -3.2909e-02, -2.3079e-01, -3.2352e-01, 2.6198e-02, -1.7452e-01, + -2.2494e-01, -3.5444e-01, -1.2681e-01, -1.1768e-01, -2.7710e-01, + -2.5133e-01, -1.0190e-01, -3.9334e-01, -4.8219e-02, -2.9652e-01, + -7.3225e-02, -8.6772e-03, 3.2621e-02, -1.6923e-01, -2.9421e-01, + 1.8239e-01, -1.4777e-01, -2.9244e-01, -3.1106e-02, -1.5851e-01, + -2.4942e-01, -2.5309e-01, -2.9053e-01, 9.9120e-02, -2.4745e-01, + 4.4178e-02, 2.9294e-02, 1.8558e-01, -9.1317e-02, 2.0792e-02, + -1.6872e-01, -2.2066e-01, -6.8652e-02, 6.6996e-02, -2.0110e-01, + -1.2881e-02, -4.1882e-02, -1.6867e-01, -4.1276e-02, -1.1632e-01, + -2.4713e-01, -2.1714e-01, -2.5670e-01, -7.6108e-02, 7.5738e-02, + -5.2782e-02, -2.1995e-01, -2.1114e-01, -8.2967e-02, -2.9250e-01, + -1.1585e-01, -3.7202e-02, 2.2169e-01, -1.1157e-01, -1.3117e-01, + -4.5070e-01, 9.1724e-02, -2.5536e-01, -1.1099e-01, -1.4028e-01, + 2.8116e-02, -5.5589e-02, -1.1969e-01, -8.9219e-02, 4.4426e-02, + 1.8124e-01, 6.9828e-02, -2.9449e-01, -3.8633e-01, -3.2289e-01, + -1.5161e-01, 7.4736e-02, -3.2911e-01, 9.0841e-02, -9.4956e-02, + -1.6886e-01, -4.3418e-01, -1.8379e-01, -1.7249e-01, -4.3381e-02, + -3.0515e-01, -1.3860e-02, -1.3408e-01, 2.3386e-02, -1.9684e-01, + 1.1179e-01, -1.0725e-01, -5.1684e-02, 9.9985e-03, -8.1149e-02, + -5.7091e-02, -2.6378e-01, -1.8482e-01, 2.4157e-02, -3.0910e-01, + -9.7387e-02, -1.3443e-01, -1.3120e-01, -4.1030e-02, -2.8421e-01, + -1.1970e-01, 2.6584e-01, -2.9540e-01, -4.1858e-01, -4.4225e-02, + -3.3776e-02, 5.1072e-02, -1.0784e-01, 2.4809e-03, -1.0006e-02, + -1.6900e-01, -1.6756e-01, 7.1867e-02, -1.4536e-01, -3.2948e-01, + -4.8964e-02, 2.5080e-02, -2.9177e-01, -2.3546e-01, -9.6734e-02, + 5.2769e-02, 5.9821e-02, -7.1729e-02, -5.2216e-02, 4.6748e-02, + -1.0374e-01, -3.3411e-01, 1.9977e-02, -1.9944e-01, -1.4249e-01, + -4.2926e-01, -2.0550e-01, -1.2332e-01, 2.4972e-02, -3.1156e-01, + -3.5618e-02, 1.4841e-01, -6.8435e-02, -8.9134e-02, -4.3101e-01, + 3.2034e-02, -5.7796e-02, -1.0815e-01, 7.9357e-02, 6.0425e-02, + 9.0992e-02, -8.4117e-02, -1.6460e-01, -1.8860e-01, -1.1188e-01, + -1.6019e-02, -2.7539e-01, -3.0461e-02, 3.6393e-02, -5.5778e-02, + -2.3261e-01, 3.3813e-03, -7.2618e-02, 1.3152e-01, 2.5424e-02, + -2.3474e-01, 1.2788e-01, 1.0549e-01, -3.9438e-01, -1.4477e-01, + -8.7272e-02, -5.9155e-02, -1.8755e-02, 8.3795e-03, -1.3303e-01, + 3.5993e-02, -4.8489e-01, -9.8144e-02, -2.0187e-01, -1.7780e-01, + -1.5702e-01, -1.6632e-01, -7.6623e-02, -1.2067e-01, 9.7476e-02, + -1.4865e-01, 3.9298e-02, -8.1964e-02, -5.7571e-02, -1.6357e-02, + -2.4697e-02, -2.3285e-01, -1.8711e-01, 5.9722e-02, -5.0565e-03, + 1.6085e-01, -5.6193e-02, -1.2748e-01, -1.7989e-01, 1.6912e-02, + -7.4359e-02, 5.9318e-02, 4.5689e-02, -3.4782e-01, 1.2438e-02, + 4.9462e-03, -7.6542e-02, -1.7623e-03, -3.1946e-01, -8.4081e-02, + 4.2274e-02, -2.0708e-01, -1.0376e-01, -3.2503e-01, -4.9442e-02, + -1.5170e-01, -1.7022e-01, -1.8604e-01, 7.0737e-02, -2.3663e-01, + -3.6244e-02, -2.3101e-01, -2.2293e-01, 8.8521e-03, -7.0427e-02, + -2.2006e-01, 6.1665e-02, -1.2758e-01, -1.3123e-01, 1.3888e-02, + -2.7413e-01, -3.8097e-02, -3.4511e-01, -2.7228e-01, 8.0634e-02, + -1.5312e-02, -3.7472e-03, 2.0875e-02, 1.5117e-01, -2.4863e-01, + -3.2693e-01, -1.0401e-01, -1.4831e-01, -1.8991e-01, -1.6961e-01, + 1.1688e-01, 6.0180e-02, -5.3109e-02, -3.7527e-01, 1.3247e-01, + -1.2943e-01, -1.7973e-01, 1.0068e-01, -1.7821e-01, -1.9093e-01, + 2.9122e-02, -5.7265e-01, -1.4149e-01, -7.8197e-02, -2.9365e-01, + -2.5148e-01, -1.0391e-01, -1.6442e-01, -1.2958e-01, -6.3028e-02, + -7.3926e-02, -9.9090e-02, 1.4622e-02, -3.2253e-01, -2.1039e-01, + -3.5321e-02, -6.1373e-02, -4.3052e-03, -2.5899e-01, 1.4603e-01, + -6.2891e-02, 3.2609e-02, -8.3760e-02, -7.8426e-02, -5.5548e-02, + -2.1703e-01, -7.2742e-02, -1.0241e-01, -1.5250e-01, -5.3758e-03, + 1.6436e-01, -1.6233e-01, -1.1661e-01, -2.6216e-01, -3.3025e-01, + -1.5915e-01, -3.5974e-01, -1.6534e-01, 8.1741e-03, 1.2124e-01, + -7.8771e-02, -2.6709e-01, -1.5131e-01, 1.1832e-01, -9.7288e-02, + 1.5229e-01, -1.3003e-01, -3.0911e-01, -8.6667e-02, -6.7893e-02, + -1.5559e-01, -1.3761e-01, -4.8186e-02, -1.4222e-01, -5.8575e-02, + -4.5176e-01, -2.7698e-01, -1.8527e-01, 1.3501e-01, 1.4931e-02, + -5.2130e-01, -2.6890e-01, 9.5427e-02]) \ No newline at end of file diff --git a/python/ClipDetection/CoOp/trainers/custom_generator_cuda.txt b/python/ClipDetection/CoOp/trainers/custom_generator_cuda.txt new file mode 100644 index 00000000..1428fa4b --- /dev/null +++ b/python/ClipDetection/CoOp/trainers/custom_generator_cuda.txt @@ -0,0 +1,11696 @@ +Parameter containing: +tensor(4.6052, device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-3.6102e-02, 8.2932e-03, 1.1726e-02, ..., 1.1253e-03, + -4.8218e-03, 1.7639e-02], + [ 3.5629e-03, -1.0719e-02, 2.6947e-02, ..., 6.7596e-03, + 9.3536e-03, 4.5252e-04], + [-1.8234e-02, -1.9272e-02, -4.8523e-03, ..., -1.6937e-02, + 3.1796e-03, -6.8932e-03], + ..., + [-2.7466e-02, -4.8752e-03, 2.0004e-02, ..., 1.2712e-03, + -2.6382e-02, 2.4521e-02], + [ 1.2375e-02, -1.9409e-02, 4.3678e-03, ..., 1.6769e-02, + -3.3844e-02, -1.2253e-02], + [ 2.2934e-02, 8.4534e-03, 3.3875e-02, ..., -3.8853e-03, + 2.7120e-05, -7.8354e-03]], dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.0138, 0.2357, -0.1285, ..., 0.0171, -0.3332, -0.2366], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0019, 0.0479, -0.0149, ..., 0.0005, -0.0558, -0.0460], + [ 0.0114, -0.0413, 0.0357, ..., 0.0271, -0.0313, -0.0383], + [-0.0026, -0.0340, -0.0006, ..., 0.0216, -0.0294, -0.0423], + ..., + [-0.0038, -0.0350, -0.0048, ..., -0.0228, -0.0328, -0.0412], + [-0.0046, -0.0360, -0.0026, ..., -0.0350, -0.0355, -0.0353], + [-0.0073, -0.0287, -0.0144, ..., -0.0202, -0.0272, -0.0360]], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0224, -0.0139, -0.0072, ..., -0.0058, -0.0078, 0.0139], + [ 0.0186, 0.0084, 0.0400, ..., -0.0149, -0.0241, -0.0003], + [ 0.0075, -0.0007, 0.0195, ..., -0.0062, -0.0083, 0.0156], + ..., + [ 0.0121, -0.0165, -0.0144, ..., -0.0066, 0.0088, 0.0027], + [-0.0164, -0.0100, -0.0053, ..., -0.0005, -0.0001, -0.0075], + [ 0.0092, 0.0048, 0.0069, ..., 0.0054, -0.0162, 0.0262]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[[[ 2.5284e-02, 1.0597e-02, 7.1678e-03, ..., 2.3422e-02, + 2.1683e-02, 4.8637e-03], + [ 1.3748e-02, -6.2103e-03, -4.8103e-03, ..., 1.6418e-02, + 7.0114e-03, -1.3161e-02], + [ 1.0048e-02, 2.1286e-03, 2.2945e-03, ..., 5.5695e-03, + 5.0468e-03, -1.2604e-02], + ..., + [-1.0101e-02, -2.3854e-04, -5.4588e-03, ..., -1.9226e-02, + -2.4017e-02, -2.4765e-02], + [-3.4752e-03, -1.0979e-02, -1.3603e-02, ..., -7.6408e-03, + 1.5583e-03, -4.4365e-03], + [-2.1469e-02, -4.3182e-02, -3.0121e-02, ..., -5.2147e-03, + 3.7346e-03, -6.8016e-03]], + + [[ 1.5930e-02, -4.9095e-03, -1.2283e-02, ..., 2.5879e-02, + 2.4048e-02, 5.6458e-03], + [ 2.1019e-03, -2.4185e-02, -2.6337e-02, ..., 1.5297e-02, + 5.2605e-03, -1.5121e-02], + [ 5.1956e-03, -7.2556e-03, -9.4376e-03, ..., 7.9193e-03, + 5.4703e-03, -1.2398e-02], + ..., + [-4.2267e-03, 5.9624e-03, -6.2656e-04, ..., 3.8528e-03, + 4.2963e-04, -5.4207e-03], + [-2.8496e-03, -1.1482e-02, -1.3680e-02, ..., 1.5129e-02, + 2.3285e-02, 1.2856e-02], + [-2.7740e-02, -4.9561e-02, -3.1158e-02, ..., 1.2787e-02, + 1.7975e-02, 6.4516e-04]], + + [[ 1.6403e-02, -2.0084e-03, -4.8714e-03, ..., 1.6159e-02, + 1.1337e-02, 5.2719e-03], + [ 1.8549e-03, -2.1622e-02, -2.4734e-02, ..., 6.0081e-03, + -4.9477e-03, -8.3389e-03], + [ 4.8523e-03, -1.0818e-02, -1.5015e-02, ..., 6.0272e-04, + -2.3615e-04, -7.6065e-03], + ..., + [ 2.4033e-03, 2.6741e-03, -8.2016e-03, ..., -1.0231e-02, + -1.0254e-02, -7.4234e-03], + [ 8.2626e-03, -3.1586e-03, -9.0256e-03, ..., -3.5248e-03, + 6.7329e-03, 5.1842e-03], + [-1.0529e-02, -2.6947e-02, -1.5656e-02, ..., 1.6518e-03, + 6.4774e-03, 2.7132e-04]]], + + + [[[ 1.5366e-02, 2.6184e-02, 5.8479e-03, ..., 8.4534e-03, + -9.0027e-03, 2.0325e-02], + [-1.8753e-02, -7.4615e-03, -1.6830e-02, ..., 2.9640e-03, + -1.9193e-05, 1.5640e-02], + [-2.4765e-02, -1.2184e-02, 1.7405e-03, ..., -2.6291e-02, + -2.8641e-02, -3.6869e-03], + ..., + [ 7.4539e-03, -6.8169e-03, 1.4931e-02, ..., 1.4824e-02, + -5.6839e-03, -6.2599e-03], + [ 6.2408e-03, -8.2016e-03, 4.1229e-02, ..., -5.0664e-06, + -2.8336e-02, -1.9409e-02], + [ 1.7120e-02, -1.1139e-02, 6.1279e-02, ..., -4.5490e-04, + 7.2899e-03, 4.6967e-02]], + + [[ 2.1149e-02, 3.3386e-02, 1.0483e-02, ..., 6.6109e-03, + -1.1864e-02, 1.7838e-02], + [-1.5022e-02, -8.8882e-04, -9.4604e-03, ..., 4.7722e-03, + 3.3522e-04, 1.4709e-02], + [-2.3026e-02, -6.3400e-03, 1.1215e-02, ..., -2.9251e-02, + -3.2776e-02, -7.0419e-03], + ..., + [ 5.5275e-03, -1.1826e-02, 7.7248e-03, ..., 1.1215e-02, + -1.1208e-02, -9.9030e-03], + [ 2.2125e-03, -1.5572e-02, 3.5980e-02, ..., -4.5929e-03, + -3.7567e-02, -2.6779e-02], + [ 1.0384e-02, -2.4033e-02, 5.2917e-02, ..., -1.1375e-02, + -4.0016e-03, 4.0253e-02]], + + [[ 1.0483e-02, 2.2339e-02, 8.9121e-04, ..., 5.2719e-03, + -1.2917e-02, 1.7471e-02], + [-2.5070e-02, -1.1597e-02, -1.9104e-02, ..., 4.4594e-03, + 4.0364e-04, 1.5610e-02], + [-3.3417e-02, -1.8112e-02, -1.3227e-03, ..., -2.8519e-02, + -3.0121e-02, -6.7444e-03], + ..., + [ 4.9820e-03, -1.0445e-02, 1.0681e-02, ..., 1.3405e-02, + -8.7509e-03, -8.8196e-03], + [ 2.5711e-03, -1.3268e-02, 4.1168e-02, ..., 9.7275e-04, + -3.0792e-02, -2.5375e-02], + [ 8.9951e-03, -2.1439e-02, 5.3528e-02, ..., -6.4163e-03, + -4.1795e-04, 3.9398e-02]]], + + + [[[ 7.2441e-03, 3.7231e-03, -2.4662e-03, ..., 1.0353e-02, + 1.4267e-02, 1.9363e-02], + [-3.0270e-03, -3.2539e-03, -1.2878e-02, ..., 9.7847e-04, + 5.2299e-03, 6.8626e-03], + [-4.3182e-03, 5.6915e-03, -3.1910e-03, ..., 8.4114e-04, + 2.2297e-03, 7.1373e-03], + ..., + [ 4.4632e-03, 3.8757e-03, -2.0063e-04, ..., 1.5976e-02, + 1.4221e-02, 1.2756e-02], + [ 2.5146e-02, 1.4793e-02, 5.1003e-03, ..., 2.2858e-02, + 2.2186e-02, 2.3026e-02], + [ 3.0807e-02, 2.6031e-02, 1.4259e-02, ..., 2.5116e-02, + 2.1759e-02, 2.4887e-02]], + + [[ 6.9695e-03, 5.0888e-03, -2.8915e-03, ..., 1.7868e-02, + 1.9669e-02, 2.9037e-02], + [-2.8973e-03, -1.2035e-03, -1.1116e-02, ..., 5.5542e-03, + 5.9547e-03, 1.3420e-02], + [-9.8190e-03, 4.3716e-03, 2.3806e-04, ..., 1.1253e-03, + -8.7976e-04, 9.4681e-03], + ..., + [ 6.1417e-03, 5.1804e-03, 2.1095e-03, ..., 2.4979e-02, + 2.5146e-02, 2.7710e-02], + [ 3.1128e-02, 2.0096e-02, 8.0948e-03, ..., 3.3722e-02, + 3.3295e-02, 4.0405e-02], + [ 3.7659e-02, 3.2166e-02, 1.8311e-02, ..., 4.2542e-02, + 3.9429e-02, 4.6356e-02]], + + [[ 1.7014e-02, 1.5358e-02, 1.1269e-02, ..., 2.1378e-02, + 2.1317e-02, 3.0075e-02], + [ 7.4120e-03, 7.8087e-03, 1.1091e-03, ..., 7.4654e-03, + 7.7209e-03, 1.2947e-02], + [-5.4646e-04, 1.1208e-02, 6.4545e-03, ..., 4.1313e-03, + 3.2539e-03, 9.7275e-03], + ..., + [ 3.3531e-03, 2.0325e-04, 1.3704e-03, ..., 7.8087e-03, + 7.9422e-03, 1.4809e-02], + [ 1.6571e-02, 2.9163e-03, 4.2105e-04, ..., 1.1787e-02, + 1.1337e-02, 1.8753e-02], + [ 1.9714e-02, 1.0704e-02, 2.9335e-03, ..., 2.1042e-02, + 1.5457e-02, 2.2263e-02]]], + + + ..., + + + [[[-3.1614e-04, -6.5041e-04, -6.0844e-04, ..., 6.5207e-05, + 2.8062e-04, -5.1928e-04], + [-5.2452e-06, -9.8610e-04, -9.5367e-04, ..., 1.9908e-05, + -1.0675e-04, -8.3148e-05], + [-9.5606e-04, -6.4993e-04, -1.2035e-03, ..., -6.1035e-04, + -4.2439e-04, 6.3181e-04], + ..., + [-7.1907e-04, -6.2132e-04, 1.0270e-04, ..., -3.2485e-05, + -7.7963e-04, -7.9155e-04], + [-9.8991e-04, 6.4433e-05, -1.2598e-03, ..., -8.0490e-04, + -1.2980e-03, -1.2064e-03], + [-2.8110e-04, -5.8031e-04, -2.4199e-04, ..., -5.1558e-05, + 4.4203e-04, 1.4377e-04]], + + [[ 5.6839e-04, 1.9491e-05, 2.8157e-04, ..., 1.6952e-04, + 9.6035e-04, -5.6601e-04], + [ 9.8038e-04, 2.3961e-05, 4.3941e-04, ..., 3.5739e-04, + 7.8630e-04, -6.2466e-04], + [-2.5654e-04, 3.8624e-04, 1.7090e-03, ..., 6.6614e-04, + 6.1607e-04, 7.3719e-04], + ..., + [ 5.9319e-04, 4.7755e-04, 4.7016e-04, ..., 1.0605e-03, + 6.6137e-04, 3.1066e-04], + [ 8.3494e-04, 4.7708e-04, -1.0042e-03, ..., 6.4945e-04, + -2.4092e-04, 3.6502e-04], + [ 4.7803e-04, -3.4690e-04, 6.3467e-04, ..., 2.3830e-04, + 1.9407e-04, 4.0698e-04]], + + [[ 2.0623e-04, -7.5936e-05, -6.9094e-04, ..., -2.5582e-04, + -5.5313e-04, -5.7125e-04], + [-9.0122e-05, 3.5214e-04, 2.0063e-04, ..., -2.6512e-04, + 1.1653e-04, 5.8317e-04], + [-9.5224e-04, -3.9577e-04, -3.9458e-04, ..., 2.1636e-04, + 6.0797e-05, 1.7786e-04], + ..., + [ 4.9019e-04, -1.6594e-04, 5.3120e-04, ..., 3.1352e-04, + 9.8825e-05, 5.7650e-04], + [ 7.5400e-05, 4.0960e-04, -6.8998e-04, ..., 1.8597e-04, + 1.9622e-04, -3.3689e-04], + [-1.4269e-04, -2.5558e-04, 2.9540e-04, ..., 2.1315e-04, + -2.9826e-04, 4.0221e-04]]], + + + [[[ 1.2306e-02, 1.8921e-02, 5.3024e-03, ..., 1.1612e-02, + 6.5956e-03, 2.7069e-02], + [ 1.1261e-02, 2.9709e-02, 1.3695e-02, ..., -8.9722e-03, + -1.7639e-02, -3.2501e-03], + [ 2.1103e-02, 3.1342e-02, 1.7731e-02, ..., -1.1185e-02, + -2.7451e-02, -5.5275e-03], + ..., + [ 3.7292e-02, 2.5757e-02, 6.7863e-03, ..., 1.8631e-02, + 2.8793e-02, 3.6560e-02], + [ 1.9577e-02, -5.3711e-03, -2.1255e-02, ..., -1.6953e-02, + -2.3621e-02, 4.6463e-03], + [ 1.3992e-02, -2.7130e-02, -5.1117e-02, ..., -1.2520e-02, + -4.0009e-02, 1.3618e-02]], + + [[ 1.7109e-03, 9.4223e-03, -2.4147e-03, ..., 8.3694e-03, + 3.3112e-03, 2.3117e-02], + [ 1.1692e-03, 2.3514e-02, 1.1520e-02, ..., -8.2321e-03, + -1.8555e-02, -6.4278e-03], + [ 1.0735e-02, 2.6749e-02, 1.8997e-02, ..., -1.1795e-02, + -3.0396e-02, -9.2773e-03], + ..., + [ 3.4821e-02, 2.1423e-02, 8.1253e-04, ..., 1.6235e-02, + 2.6367e-02, 3.4302e-02], + [ 1.4656e-02, -1.1101e-02, -2.7344e-02, ..., -2.0676e-02, + -3.1250e-02, -1.2932e-03], + [ 5.8136e-03, -3.8971e-02, -6.3354e-02, ..., -2.1881e-02, + -5.2307e-02, 4.1885e-03]], + + [[-1.0658e-02, -1.8530e-03, -8.5220e-03, ..., 4.6959e-03, + -1.9407e-03, 1.7426e-02], + [-1.3008e-02, 1.1108e-02, 5.3177e-03, ..., -8.9722e-03, + -2.1408e-02, -9.2850e-03], + [-3.2902e-03, 1.4580e-02, 1.3863e-02, ..., -1.2299e-02, + -2.9846e-02, -1.2985e-02], + ..., + [ 3.2806e-02, 2.2476e-02, 6.9771e-03, ..., 1.0704e-02, + 1.9516e-02, 2.4567e-02], + [ 1.3817e-02, -6.0501e-03, -1.4580e-02, ..., -2.2476e-02, + -3.2013e-02, -9.6893e-03], + [ 5.8556e-03, -3.2196e-02, -5.1910e-02, ..., -2.4429e-02, + -5.2979e-02, -3.0937e-03]]], + + + [[[ 2.2598e-02, -7.3586e-03, -2.9099e-02, ..., -2.2873e-02, + 8.5068e-03, -4.8706e-02], + [ 1.7410e-02, -3.1433e-02, -4.2816e-02, ..., -6.2675e-03, + 9.4528e-03, -3.8910e-02], + [ 2.2125e-02, -1.5839e-02, -4.1351e-02, ..., 4.6021e-02, + 2.4017e-02, -1.1345e-02], + ..., + [ 2.8290e-02, 3.7964e-02, 4.1656e-02, ..., 2.4734e-02, + -2.2011e-03, -1.9989e-02], + [-1.5671e-02, -2.0996e-02, -2.9182e-03, ..., 2.0828e-02, + 7.9803e-03, 1.4175e-02], + [-3.1624e-03, -9.1400e-03, 7.2937e-03, ..., 1.6663e-02, + 1.3590e-03, 1.6647e-02]], + + [[ 2.2675e-02, -8.0872e-03, -3.0746e-02, ..., -1.9989e-02, + 1.6220e-02, -4.3518e-02], + [ 1.6678e-02, -3.2532e-02, -4.2694e-02, ..., -6.4468e-04, + 1.8555e-02, -3.2135e-02], + [ 2.0767e-02, -1.6098e-02, -3.9978e-02, ..., 5.0598e-02, + 2.9999e-02, -5.6038e-03], + ..., + [ 4.2328e-02, 5.0476e-02, 4.9988e-02, ..., 2.2064e-02, + -1.8721e-03, -1.5190e-02], + [-4.8981e-03, -1.0933e-02, 6.5994e-03, ..., 1.9073e-02, + 7.9498e-03, 2.0065e-02], + [ 4.9896e-03, -1.7853e-03, 1.5068e-02, ..., 1.0445e-02, + -2.7905e-03, 1.9196e-02]], + + [[ 7.0305e-03, -1.8372e-02, -3.5797e-02, ..., -1.5244e-02, + 2.1683e-02, -3.0380e-02], + [-1.7321e-04, -4.1534e-02, -4.5563e-02, ..., 4.7989e-03, + 2.4796e-02, -1.7990e-02], + [ 2.1000e-03, -2.8732e-02, -4.5746e-02, ..., 5.0171e-02, + 3.4485e-02, 4.2267e-03], + ..., + [ 3.7415e-02, 4.6143e-02, 4.9500e-02, ..., 2.0111e-02, + 4.0741e-03, -6.3667e-03], + [-5.8479e-03, -9.4757e-03, 1.2398e-02, ..., 2.1317e-02, + 1.5762e-02, 2.5894e-02], + [ 2.3136e-03, -7.0858e-04, 1.7914e-02, ..., 1.1047e-02, + 2.1496e-03, 2.2278e-02]]]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([0.3311, 0.0032, 0.1610, ..., 2.1922, 0.0050, 0.0039], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.0045, -0.0452, -0.0475, ..., 0.0402, -0.1402, -0.0132], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-7.0632e-05, -1.6510e-04, -7.0930e-05, ..., 4.5090e-03, + -2.9160e-02, -7.8201e-05], + [-1.3733e-04, 1.2165e-04, 4.2319e-05, ..., -1.6594e-03, + 3.1433e-02, 7.4446e-05], + [ 4.8018e-04, 7.7963e-04, -1.0991e-04, ..., -1.6846e-02, + 4.2999e-02, 1.5199e-04], + ..., + [ 2.1267e-04, 4.1032e-04, -7.2420e-05, ..., 4.8027e-03, + -1.7338e-03, -6.6102e-05], + [ 3.0518e-04, -4.4405e-05, -2.2709e-04, ..., 1.1551e-02, + 3.3436e-03, 7.4685e-05], + [-2.8849e-05, 4.5919e-04, 9.3341e-05, ..., -1.1314e-02, + 3.7670e-03, -7.7844e-05]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([ 1.5674, -1.6143, -0.8208, ..., 0.0115, 0.0107, -0.0043], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-6.7596e-03, 8.8043e-03, -7.9422e-03, ..., -8.6441e-03, + -8.7433e-03, 3.5553e-03], + [ 1.2077e-02, 5.8784e-03, 1.1253e-02, ..., -3.7060e-03, + 2.0008e-03, 3.8319e-03], + [-5.2032e-03, 2.6913e-03, 1.2894e-02, ..., 6.4812e-03, + -3.0398e-05, -4.2796e-04], + ..., + [-4.5037e-04, -2.5063e-03, -3.2768e-03, ..., -3.2768e-03, + -1.9409e-02, 9.2545e-03], + [-7.3624e-03, 2.8419e-03, -7.9193e-03, ..., 4.0627e-04, + -1.3866e-03, -6.7186e-04], + [ 9.0408e-03, 1.5287e-03, 1.6737e-03, ..., 2.4242e-03, + -3.7575e-03, 4.9667e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-0.0262, -0.0654, 0.0032, ..., 0.1761, -0.0446, 0.0023], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([6.1186e-04, 2.0990e-03, 3.0166e-05, ..., 6.9025e-01, 3.5588e-01, + 1.4703e-04], device='cuda:1', requires_grad=True)Parameter containing: +tensor([ 1.3605e-04, 8.3127e-04, -2.0098e-05, ..., -3.6831e-01, + 1.7861e-01, 7.4003e-05], device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 3.6597e-04, 2.6047e-05, 1.1921e-07, ..., -6.6109e-03, + -1.1740e-03, -6.4468e-04], + [ 5.3291e-03, 1.3710e-02, -3.5620e-04, ..., -3.8052e-03, + -2.5225e-04, 6.0730e-03], + [ 1.3428e-03, 1.2884e-03, -1.9073e-06, ..., -2.8549e-02, + -1.1930e-03, 1.4906e-03], + ..., + [-2.4994e-02, -1.0262e-02, 2.3067e-04, ..., -2.0103e-03, + -1.2665e-02, 6.2332e-03], + [ 3.2401e-04, 9.3758e-05, -5.9605e-08, ..., -6.0234e-03, + -7.3862e-04, -6.4611e-04], + [ 1.1129e-03, -2.3117e-02, -2.7061e-04, ..., -4.4365e-03, + 3.5744e-03, -7.4997e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-0.6826, -0.3132, -0.8076, ..., -0.2167, -0.6543, -0.3040], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0043, -0.0023, 0.0041, ..., 0.0116, -0.0049, -0.0073], + [ 0.0018, 0.0191, -0.0102, ..., -0.0261, 0.0026, 0.0206], + [ 0.0039, -0.0002, -0.0028, ..., 0.0029, 0.0038, -0.0151], + ..., + [ 0.0021, -0.0003, -0.0034, ..., 0.0033, 0.0015, 0.0089], + [-0.0059, 0.0078, 0.0069, ..., -0.0005, -0.0060, 0.0020], + [ 0.0003, -0.0039, -0.0022, ..., -0.0094, 0.0005, 0.0039]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.0184, -0.1008, 0.0398, ..., -0.0965, -0.1080, -0.0237], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([2.8353e-01, 5.9135e-01, 3.3711e-06, ..., 2.0198e+00, 7.7565e-01, + 2.9745e-01], device='cuda:1', requires_grad=True)Parameter containing: +tensor([2.1628e-02, 2.1650e-01, 2.3350e-04, ..., 2.6387e-01, 4.4878e-01, + 5.1503e-02], device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0033, 0.0007, -0.0002, ..., 0.0078, -0.0240, 0.0078], + [ 0.0070, 0.0033, 0.0170, ..., -0.0062, 0.0080, 0.0055], + [ 0.0102, -0.0102, -0.0003, ..., 0.0024, 0.0164, 0.0043], + ..., + [ 0.0113, 0.0003, -0.0048, ..., 0.0002, 0.0042, -0.0065], + [-0.0144, -0.0119, 0.0076, ..., -0.0037, 0.0036, 0.0072], + [-0.0012, -0.0020, 0.0010, ..., -0.0066, -0.0222, -0.0007]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-1.0248e-01, 8.7988e-01, 1.4414e+00, ..., -1.0862e-03, + -4.0474e-03, 2.2471e-04], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([[-4.9829e-04, -2.1194e-02, -1.9908e-05, ..., -1.1253e-02, + 1.1993e-02, 1.0979e-04], + [-5.0068e-04, 7.5417e-03, -4.4131e-04, ..., -2.8553e-03, + 1.1459e-02, -3.0899e-03], + [ 2.7752e-03, -5.4703e-03, -1.1978e-02, ..., -3.8319e-03, + -1.0222e-04, -5.6686e-03], + ..., + [ 5.3825e-03, -1.8539e-02, 8.3313e-02, ..., -2.1317e-02, + -9.7198e-03, 1.5419e-02], + [-9.5062e-03, -2.0390e-03, 5.9166e-03, ..., 8.5144e-03, + -4.4022e-03, 6.3820e-03], + [-2.8553e-03, 6.8321e-03, -9.3508e-04, ..., 5.5199e-03, + 4.7264e-03, -4.1389e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-0.0203, -0.0213, 0.0256, ..., -0.0386, -0.0219, -0.0045], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([0.3628, 0.7462, 0.0949, ..., 0.8510, 0.4239, 0.2627], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.0484, -0.1306, 0.0199, ..., -0.1032, -0.0533, 0.0084], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0025, 0.0060, 0.0152, ..., -0.0063, 0.0478, -0.0812], + [ 0.0002, -0.0075, 0.0009, ..., -0.0011, -0.0030, 0.0037], + [ 0.0053, -0.0222, 0.0008, ..., -0.0101, 0.0178, -0.0035], + ..., + [ 0.0026, -0.0111, 0.0018, ..., -0.0058, -0.0008, 0.0039], + [-0.0072, 0.0112, 0.0018, ..., 0.0027, -0.0154, -0.0180], + [ 0.0151, 0.0001, 0.0326, ..., -0.0002, -0.0062, -0.0225]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.1219, -0.6836, -0.5273, ..., -0.7568, -0.0984, -0.3079], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 1.8396e-03, -9.6321e-04, -1.6800e-02, ..., -3.0613e-03, + -9.5901e-03, -3.4103e-03], + [-1.5350e-02, 6.2675e-03, 1.4854e-02, ..., 6.7291e-03, + -9.3937e-05, -6.2218e-03], + [ 5.9891e-03, -4.2915e-04, 1.0605e-02, ..., -5.6076e-03, + -2.0447e-03, 5.9662e-03], + ..., + [ 2.9125e-03, -2.3937e-03, 4.5738e-03, ..., 1.6699e-03, + 6.7043e-04, 5.3139e-03], + [ 9.0456e-04, -1.3828e-03, 1.1587e-03, ..., -1.1549e-03, + 4.4975e-03, -5.7945e-03], + [ 3.0212e-02, 3.7136e-03, 1.1283e-04, ..., 4.8065e-03, + 1.2444e-02, 5.4054e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([ 0.0242, -0.0644, 0.0790, ..., -0.0809, -0.1028, -0.0834], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([0.4126, 0.9839, 0.1912, ..., 0.7707, 0.5578, 0.5130], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.0194, -0.1082, -0.0194, ..., 0.0886, 0.1335, 0.0285], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-6.4240e-03, -2.0248e-02, -2.0676e-02, ..., 1.1930e-03, + 2.8778e-02, -5.5267e-02], + [-2.8038e-03, 1.3485e-03, -1.9196e-02, ..., 9.4748e-04, + -1.9562e-02, -2.9373e-03], + [-5.8861e-03, -4.8141e-03, 5.3825e-03, ..., -1.8219e-02, + -2.0416e-02, -9.6283e-03], + ..., + [-2.5009e-02, 1.1108e-02, 1.0498e-02, ..., 4.8447e-03, + 1.2636e-05, 2.5177e-03], + [ 1.0887e-02, 1.1696e-02, 1.1856e-02, ..., 2.7962e-03, + -4.8447e-03, -6.4964e-03], + [-8.6746e-03, 2.5177e-03, -4.9591e-03, ..., 2.8553e-03, + -8.6136e-03, 4.2229e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([ 0.1929, -0.0773, -0.0911, ..., 0.1083, 0.0064, 0.0453], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-2.2720e-02, 2.8229e-03, -3.1710e-04, ..., 2.3804e-02, + -2.3819e-02, 1.4458e-02], + [-1.9178e-03, 6.7055e-05, -1.0406e-02, ..., -7.9041e-03, + -4.2076e-03, -6.3286e-03], + [-1.2703e-02, -6.1874e-03, -1.0422e-02, ..., -1.6769e-02, + -6.2981e-03, -1.8578e-03], + ..., + [-1.6136e-03, 9.8228e-04, -7.8888e-03, ..., -6.7940e-03, + -2.7447e-03, -2.1706e-03], + [ 5.8823e-03, -3.4351e-03, 1.2810e-02, ..., -1.3399e-03, + 1.7090e-03, 7.6027e-03], + [ 1.0025e-02, 6.5842e-03, 1.1444e-02, ..., -5.9242e-03, + -1.4353e-03, -3.4161e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([ 0.0180, -0.0964, 0.0243, ..., -0.0159, -0.0454, -0.0301], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([0.5055, 0.3092, 0.3977, ..., 1.4209, 0.4980, 0.3574], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.0747, -0.0122, 0.0623, ..., -0.0418, 0.0183, -0.0493], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0067, -0.0268, 0.0142, ..., -0.0009, 0.0202, -0.0156], + [ 0.0101, 0.0105, -0.0008, ..., 0.0012, -0.0004, 0.0251], + [-0.0059, 0.0096, 0.0011, ..., -0.0045, 0.0005, 0.0131], + ..., + [ 0.0016, -0.0027, -0.0004, ..., 0.0003, -0.0022, -0.0065], + [ 0.0005, 0.0114, 0.0169, ..., 0.0032, 0.0011, 0.0202], + [-0.0124, 0.0076, -0.0112, ..., 0.0046, -0.0065, -0.0068]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.2451, -0.3083, -0.4565, ..., -0.1675, -0.2117, -0.5532], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 6.9380e-04, -4.9591e-03, 3.3998e-04, ..., -5.6534e-03, + 4.0131e-03, 1.9054e-03], + [ 2.4014e-03, -1.7365e-02, -4.1771e-03, ..., -9.8419e-04, + 1.3916e-02, -2.5787e-03], + [-2.0340e-02, 7.0419e-03, 4.9667e-03, ..., 9.6846e-04, + -1.9730e-02, 7.8964e-04], + ..., + [-7.2746e-03, 9.3412e-04, 2.4259e-04, ..., -6.7294e-05, + 1.0061e-03, 3.1109e-03], + [-1.4820e-03, -6.7673e-03, -1.0185e-03, ..., 3.6182e-03, + -1.1826e-02, 2.4719e-02], + [ 6.9389e-03, 3.9864e-03, -3.3212e-04, ..., 1.5701e-02, + 7.3318e-03, 7.0572e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([ 0.0455, -0.0800, -0.0028, ..., -0.0156, -0.1378, -0.0312], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([0.6342, 0.5487, 0.3780, ..., 1.3511, 0.4005, 0.4882], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.0342, -0.0825, -0.0966, ..., -0.0490, 0.0846, -0.2136], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 2.7573e-02, 1.0155e-02, 5.2223e-03, ..., 6.3057e-03, + -8.5449e-03, -1.4496e-02], + [ 4.0741e-03, 1.8341e-02, -4.6654e-03, ..., -6.0539e-03, + -2.0538e-02, 5.9052e-03], + [ 6.1989e-05, -9.3613e-03, 4.7445e-04, ..., 1.0582e-02, + 9.0256e-03, -1.5945e-02], + ..., + [ 2.3632e-03, 1.7147e-03, 1.2856e-02, ..., 1.9665e-03, + 1.4906e-03, -5.8441e-03], + [-1.9121e-03, 1.6052e-02, 7.6561e-03, ..., 2.6722e-03, + -5.3329e-03, -3.0499e-03], + [-1.9257e-02, -6.6910e-03, 1.0643e-02, ..., -2.6035e-03, + 6.3744e-03, 3.3646e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-0.1843, -0.5454, -0.1458, ..., -0.0142, 0.0038, 0.0057], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-2.3544e-02, 3.3112e-03, -1.4915e-03, ..., -7.5264e-03, + 1.7456e-02, 1.1635e-02], + [ 6.1531e-03, -3.9154e-02, -3.7251e-03, ..., -2.8820e-03, + -2.1454e-02, 1.2619e-02], + [ 5.9624e-03, -9.4299e-03, 1.4954e-02, ..., -1.4839e-02, + 7.3280e-03, -1.1848e-02], + ..., + [ 4.7982e-05, 4.2915e-03, -1.1238e-02, ..., -1.1238e-02, + 1.3962e-03, -1.3695e-03], + [-7.3586e-03, -1.0338e-02, -1.3638e-04, ..., 2.1240e-02, + 1.3512e-02, -2.4395e-03], + [-1.8524e-02, -1.1511e-03, -6.6681e-03, ..., -3.1424e-04, + -3.4256e-03, 3.2120e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-0.0142, -0.0583, 0.0198, ..., 0.0195, -0.1207, 0.0172], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([0.7431, 0.3526, 0.6107, ..., 2.2615, 0.5052, 0.3920], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.0007, 0.0244, 0.0183, ..., -0.1535, -0.0343, 0.0142], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 2.4109e-03, 3.2253e-03, -4.0948e-05, ..., -2.7924e-03, + -6.2485e-03, 6.4964e-03], + [ 1.8692e-03, -1.9464e-03, -2.5692e-03, ..., -8.8959e-03, + -5.5275e-03, -7.2517e-03], + [-4.8370e-03, -2.5986e-02, -6.8359e-03, ..., -7.1068e-03, + 1.1925e-02, 3.2806e-03], + ..., + [ 6.9885e-03, 3.9635e-03, -1.4124e-03, ..., -4.8065e-03, + -1.8377e-03, 8.5258e-04], + [-2.0752e-02, -1.8066e-02, -5.8937e-03, ..., -8.4991e-03, + -1.3115e-02, -6.9733e-03], + [-2.3849e-02, 7.0190e-03, -5.0430e-03, ..., -1.0780e-02, + -5.9013e-03, -8.5068e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-0.0958, -0.1884, -0.1593, ..., -0.2017, -0.3232, -0.3743], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0077, -0.0076, 0.0042, ..., 0.0185, 0.0244, 0.0145], + [ 0.0102, 0.0012, -0.0082, ..., -0.0322, -0.0016, 0.0077], + [-0.0055, -0.0099, -0.0081, ..., 0.0024, 0.0082, 0.0235], + ..., + [ 0.0053, 0.0035, 0.0003, ..., -0.0044, -0.0019, -0.0058], + [-0.0026, 0.0178, 0.0062, ..., 0.0020, -0.0052, -0.0042], + [-0.0041, 0.0082, 0.0150, ..., -0.0024, 0.0150, -0.0076]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.0141, -0.0796, 0.0049, ..., 0.0710, -0.1786, 0.0413], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([0.9759, 0.5307, 0.6513, ..., 0.0107, 0.5041, 0.5372], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([ 0.0589, 0.0290, 0.0214, ..., 0.3877, -0.0775, -0.1199], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0074, 0.0006, -0.0156, ..., -0.0098, -0.0449, -0.0081], + [-0.0049, -0.0510, 0.0051, ..., -0.0058, -0.0286, 0.0074], + [ 0.0070, 0.0021, 0.0135, ..., 0.0238, 0.0070, -0.0351], + ..., + [ 0.0131, -0.0296, -0.0192, ..., -0.0009, 0.0007, 0.0007], + [ 0.0140, 0.0147, -0.0112, ..., 0.0018, 0.0341, -0.0212], + [ 0.0168, -0.0181, -0.0056, ..., 0.0013, -0.0197, -0.0118]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.4622, -0.0086, 0.2756, ..., 0.0269, 0.0068, -0.0022], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0027, -0.0052, 0.0031, ..., -0.0015, -0.0176, -0.0188], + [-0.0074, 0.0147, 0.0008, ..., 0.0245, -0.0242, 0.0237], + [ 0.0041, 0.0037, 0.0004, ..., 0.0177, 0.0124, 0.0109], + ..., + [-0.0073, 0.0058, -0.0050, ..., -0.0073, -0.0063, 0.0020], + [ 0.0157, -0.0413, 0.0109, ..., 0.0118, -0.0392, 0.0283], + [ 0.0064, 0.0013, -0.0097, ..., 0.0003, 0.0149, 0.0117]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.0089, -0.0779, 0.0223, ..., -0.0115, -0.1759, 0.0235], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([0.7499, 0.4987, 0.7858, ..., 1.1598, 0.6024, 0.5770], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([ 0.0150, -0.0119, 0.0050, ..., -0.1037, 0.0333, -0.0361], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0077, -0.0109, -0.0022, ..., -0.0063, 0.0133, 0.0150], + [-0.0096, 0.0191, 0.0149, ..., -0.0078, 0.0161, 0.0103], + [-0.0020, 0.0116, 0.0042, ..., -0.0045, 0.0149, 0.0007], + ..., + [ 0.0186, 0.0082, 0.0246, ..., -0.0084, 0.0029, -0.0158], + [ 0.0175, -0.0043, 0.0002, ..., -0.0078, 0.0047, -0.0143], + [-0.0011, -0.0010, 0.0262, ..., -0.0082, -0.0047, -0.0202]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.2437, -0.3796, -0.5195, ..., -0.2163, -0.4231, -0.2202], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0169, 0.0065, -0.0019, ..., -0.0216, 0.0189, 0.0012], + [ 0.0048, -0.0130, 0.0052, ..., -0.0211, -0.0036, -0.0101], + [ 0.0040, -0.0051, -0.0002, ..., -0.0073, -0.0107, -0.0037], + ..., + [-0.0058, -0.0022, 0.0002, ..., -0.0073, -0.0007, 0.0026], + [-0.0098, -0.0155, 0.0002, ..., 0.0191, 0.0043, 0.0222], + [-0.0067, 0.0011, 0.0009, ..., 0.0020, -0.0060, 0.0049]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.0154, -0.0523, -0.0401, ..., 0.1025, -0.1436, -0.0176], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.1220, 0.5171, 1.0746, ..., 0.0111, 0.6744, 0.7526], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([ 0.0830, -0.0924, -0.0048, ..., 0.1117, -0.0385, -0.0674], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-1.6815e-02, 3.0869e-02, 9.5444e-03, ..., -7.8125e-03, + 1.6342e-02, -1.0109e-02], + [-1.2794e-02, 7.7095e-03, -3.9101e-03, ..., -1.1053e-03, + -5.3482e-03, -1.1383e-02], + [ 1.6956e-03, -1.2161e-02, -4.4136e-03, ..., -1.5106e-03, + -1.3565e-02, 6.5117e-03], + ..., + [-1.2596e-02, 1.0803e-02, 5.0116e-04, ..., -3.5954e-04, + -3.2578e-03, -5.4300e-05], + [-1.4236e-02, -4.2572e-03, 1.3161e-02, ..., 1.7285e-05, + -3.1860e-02, -1.3054e-02], + [ 1.2398e-02, 5.1737e-05, 2.3148e-02, ..., -2.7866e-03, + -4.3144e-03, -2.5146e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-0.0057, -1.4600, 0.3438, ..., -0.0042, -0.0107, -0.0046], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 6.3477e-03, 1.2695e-02, -6.6872e-03, ..., 1.6868e-05, + 1.6006e-02, -1.3306e-02], + [-5.9090e-03, 8.4877e-04, -1.6708e-02, ..., -2.0477e-02, + -1.0666e-02, -1.1078e-02], + [ 2.4246e-02, 1.2558e-02, -1.6769e-02, ..., 6.4697e-03, + 1.2642e-02, -1.8021e-02], + ..., + [ 1.6754e-02, 5.8670e-03, -1.1282e-03, ..., -1.5726e-03, + 1.8406e-03, 1.1803e-02], + [ 5.7335e-03, 7.0724e-03, 1.3092e-02, ..., -1.1902e-02, + 1.6022e-02, -1.1311e-03], + [ 2.0809e-03, -2.6493e-03, -3.7041e-03, ..., -4.9400e-03, + 6.8893e-03, 1.5732e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-0.0091, -0.0477, -0.0098, ..., -0.0483, -0.1364, 0.0059], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([0.8486, 0.6437, 0.8933, ..., 1.2490, 0.7166, 0.8544], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.0071, 0.0669, -0.0529, ..., -0.1688, -0.0491, 0.0438], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 5.3062e-03, 1.2688e-02, 1.0406e-02, ..., -2.4281e-03, + -4.9362e-03, 1.8988e-03], + [-2.9793e-03, -1.5764e-03, 1.6724e-02, ..., 1.1415e-03, + -2.2034e-02, -9.2392e-03], + [-3.9673e-02, -1.2040e-05, 2.4188e-04, ..., 2.8877e-03, + -3.9101e-03, -2.3239e-02], + ..., + [ 8.1329e-03, 1.3290e-02, 2.1637e-02, ..., -6.3057e-03, + -4.2686e-03, -1.4544e-03], + [ 2.7435e-02, 6.1798e-03, 1.0468e-02, ..., 3.2425e-05, + 4.9400e-03, -9.4604e-03], + [ 1.3458e-02, 7.5836e-03, -1.2062e-02, ..., 4.9925e-04, + -9.8419e-03, -1.8356e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-0.4192, -0.2394, -0.3069, ..., -0.3665, -0.2556, -0.1316], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0160, -0.0017, -0.0088, ..., 0.0165, -0.0056, 0.0135], + [ 0.0050, 0.0226, 0.0044, ..., 0.0111, 0.0021, 0.0038], + [ 0.0148, -0.0083, 0.0003, ..., 0.0085, 0.0015, -0.0004], + ..., + [-0.0031, -0.0009, -0.0014, ..., 0.0004, -0.0025, -0.0012], + [-0.0146, -0.0036, 0.0007, ..., 0.0108, -0.0012, -0.0406], + [ 0.0060, 0.0041, -0.0141, ..., -0.0118, 0.0065, -0.0112]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.0417, 0.0127, -0.0229, ..., 0.0725, -0.0144, -0.0360], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.1527, 0.7259, 1.1281, ..., 1.0935, 0.8785, 1.1066], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([ 0.0413, -0.1707, -0.0308, ..., 0.0418, -0.2141, -0.0075], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0110, -0.0251, -0.0094, ..., 0.0972, 0.0119, -0.0125], + [-0.0140, -0.0233, 0.0010, ..., -0.1094, -0.0082, -0.0113], + [-0.0004, 0.0120, 0.0042, ..., 0.0518, 0.0182, 0.0130], + ..., + [-0.0162, 0.0175, -0.0176, ..., 0.0016, -0.0075, 0.0305], + [ 0.0311, -0.0070, -0.0240, ..., -0.0003, -0.0044, -0.0165], + [-0.0102, -0.0211, 0.0222, ..., -0.0004, -0.0292, -0.0076]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.2430, 0.3398, -0.1389, ..., 0.0082, -0.0049, 0.0183], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0019, 0.0045, 0.0063, ..., 0.0163, -0.0224, 0.0068], + [-0.0041, 0.0050, -0.0084, ..., -0.0048, -0.0007, 0.0138], + [-0.0173, 0.0011, 0.0050, ..., 0.0098, 0.0256, -0.0074], + ..., + [ 0.0102, -0.0012, -0.0010, ..., -0.0045, 0.0045, 0.0039], + [ 0.0035, -0.0075, 0.0118, ..., -0.0043, -0.0048, 0.0198], + [ 0.0175, -0.0080, -0.0061, ..., -0.0388, -0.0020, 0.0159]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.0218, 0.0022, 0.0177, ..., 0.0566, -0.0418, -0.0156], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([0.8877, 0.8024, 1.0279, ..., 2.0427, 0.9536, 0.9729], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.1278, -0.0779, -0.0511, ..., -0.2266, -0.0554, 0.0418], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0282, 0.0064, 0.0033, ..., -0.0064, 0.0038, 0.0184], + [ 0.0007, 0.0181, 0.0098, ..., -0.0025, -0.0096, 0.0182], + [-0.0056, 0.0077, 0.0009, ..., -0.0078, -0.0058, -0.0323], + ..., + [-0.0288, 0.0303, 0.0033, ..., 0.0016, -0.0074, 0.0192], + [-0.0051, -0.0323, -0.0066, ..., -0.0045, 0.0333, 0.0005], + [ 0.0188, 0.0207, 0.0077, ..., -0.0080, -0.0315, -0.0182]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.1138, -0.1492, -0.4014, ..., -0.2352, -0.3323, -0.2046], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-1.9287e-02, 2.2621e-03, 6.0921e-03, ..., -3.4676e-03, + -8.6060e-03, -2.0447e-03], + [-6.7711e-05, -3.6869e-03, -5.7602e-03, ..., -1.7380e-02, + 3.8025e-02, -1.9588e-03], + [-1.2627e-02, -4.6158e-03, -6.8207e-03, ..., 1.4572e-03, + -6.2037e-04, -1.3741e-02], + ..., + [ 3.5152e-03, 2.8687e-03, -8.9417e-03, ..., -8.0633e-04, + 7.1335e-03, 3.8662e-03], + [ 1.1139e-02, 7.1411e-03, 7.1297e-03, ..., -1.9293e-03, + -5.5265e-04, 3.8330e-02], + [-8.9264e-03, -5.6114e-03, 2.1210e-03, ..., -1.2589e-02, + -8.9493e-03, 8.3389e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-0.0068, 0.0181, -0.0552, ..., 0.1211, -0.0751, -0.1089], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.1916, 0.9694, 1.2653, ..., 0.1731, 0.9097, 1.1966], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.0955, -0.1844, -0.0546, ..., 0.2570, -0.0544, 0.0379], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0093, -0.0152, -0.0200, ..., 0.0346, -0.0043, -0.0287], + [ 0.0025, -0.0165, -0.0050, ..., -0.0740, -0.0039, -0.0172], + [-0.0126, 0.0090, 0.0117, ..., -0.0017, 0.0034, 0.0126], + ..., + [-0.0276, -0.0107, -0.0004, ..., -0.0036, 0.0028, 0.0067], + [ 0.0289, -0.0022, -0.0177, ..., -0.0029, 0.0003, -0.0052], + [-0.0118, 0.0090, 0.0049, ..., -0.0104, 0.0250, 0.0115]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-1.8496, 0.1801, 2.3359, ..., 0.0398, -0.0217, -0.1345], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0018, -0.0264, 0.0078, ..., 0.0173, -0.0076, -0.0041], + [ 0.0135, -0.0083, 0.0026, ..., 0.0076, 0.0072, -0.0242], + [ 0.0054, 0.0058, -0.0234, ..., -0.0210, -0.0069, 0.0223], + ..., + [-0.0025, 0.0097, -0.0013, ..., 0.0089, 0.0019, 0.0197], + [ 0.0045, -0.0037, 0.0037, ..., 0.0008, 0.0096, -0.0237], + [-0.0100, 0.0123, 0.0061, ..., -0.0153, -0.0145, 0.0152]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.0037, 0.0258, -0.0091, ..., -0.0498, -0.0065, -0.0458], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.1915, 1.0973, 1.2475, ..., 1.4018, 1.1544, 1.1824], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([ 0.0225, -0.0913, 0.0974, ..., -0.2996, -0.0410, 0.0070], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0168, 0.0096, 0.0042, ..., -0.0020, -0.0020, 0.0089], + [-0.0215, 0.0329, 0.0130, ..., 0.0041, 0.0160, 0.0035], + [-0.0176, -0.0188, 0.0220, ..., 0.0037, -0.0368, 0.0167], + ..., + [ 0.0086, -0.0059, -0.0079, ..., 0.0015, -0.0030, -0.0178], + [-0.0288, -0.0067, 0.0123, ..., -0.0054, -0.0138, -0.0072], + [-0.0190, 0.0143, -0.0290, ..., -0.0286, -0.0196, -0.0011]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.1221, -0.2141, -0.4116, ..., -0.1118, -0.1777, -0.3623], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0010, 0.0136, -0.0347, ..., -0.0224, 0.0056, -0.0153], + [-0.0027, -0.0350, 0.0204, ..., 0.0106, -0.0202, -0.0021], + [ 0.0073, -0.0276, -0.0020, ..., 0.0134, 0.0046, -0.0155], + ..., + [-0.0004, -0.0072, 0.0019, ..., -0.0059, 0.0039, 0.0084], + [ 0.0117, -0.0049, -0.0148, ..., -0.0053, 0.0066, -0.0098], + [ 0.0130, 0.0172, 0.0037, ..., 0.0183, -0.0211, -0.0070]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.0200, 0.0231, -0.0658, ..., 0.1027, -0.0781, -0.1132], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.2565, 1.1066, 1.2045, ..., 0.5890, 1.0264, 1.2907], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.0063, -0.0652, 0.0874, ..., 0.1717, 0.1017, -0.0355], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0141, 0.0224, 0.0066, ..., -0.0503, -0.0308, -0.0002], + [-0.0106, -0.0263, 0.0116, ..., -0.0196, -0.0080, 0.0482], + [ 0.0088, -0.0083, 0.0067, ..., 0.0172, -0.0225, 0.0448], + ..., + [ 0.0117, 0.0198, 0.0119, ..., 0.0007, 0.0045, -0.0206], + [ 0.0123, -0.0125, 0.0020, ..., 0.0034, 0.0106, -0.0007], + [ 0.0226, -0.0011, -0.0222, ..., 0.0048, -0.0005, -0.0066]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.0543, -0.0735, 0.2413, ..., -0.0484, -0.1190, 0.0173], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0229, 0.0144, -0.0111, ..., -0.0034, 0.0119, -0.0192], + [-0.0048, -0.0063, 0.0103, ..., -0.0061, 0.0145, 0.0030], + [ 0.0077, 0.0203, 0.0148, ..., -0.0084, -0.0068, 0.0304], + ..., + [-0.0003, 0.0012, 0.0053, ..., 0.0082, -0.0035, 0.0224], + [-0.0080, 0.0013, 0.0045, ..., 0.0091, -0.0064, -0.0116], + [-0.0171, 0.0154, -0.0227, ..., -0.0176, 0.0146, -0.0069]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.0177, 0.0686, -0.0156, ..., -0.0817, 0.0255, 0.0177], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.1690, 1.1532, 1.1559, ..., 1.5800, 1.1703, 1.2291], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.0074, 0.0918, -0.0353, ..., -0.3273, -0.1143, -0.0546], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0204, -0.0119, -0.0051, ..., -0.0030, -0.0053, 0.0117], + [-0.0035, -0.0211, 0.0029, ..., -0.0038, 0.0121, 0.0023], + [ 0.0126, -0.0055, 0.0038, ..., 0.0006, 0.0247, 0.0077], + ..., + [ 0.0121, 0.0132, -0.0259, ..., 0.0031, 0.0226, 0.0040], + [-0.0022, 0.0106, -0.0208, ..., -0.0026, 0.0163, -0.0018], + [-0.0326, 0.0187, 0.0123, ..., -0.0007, -0.0089, 0.0122]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.3628, -0.2209, -0.1646, ..., -0.2522, -0.2683, -0.2517], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0055, 0.0073, 0.0131, ..., -0.0030, -0.0204, -0.0067], + [-0.0153, 0.0021, 0.0061, ..., 0.0199, -0.0058, -0.0234], + [-0.0043, 0.0070, 0.0054, ..., 0.0016, 0.0075, -0.0185], + ..., + [-0.0034, -0.0019, 0.0044, ..., -0.0031, -0.0046, 0.0004], + [-0.0026, -0.0267, -0.0127, ..., 0.0038, -0.0151, 0.0075], + [-0.0029, -0.0212, -0.0195, ..., 0.0119, 0.0086, -0.0139]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.0283, 0.0284, -0.0328, ..., 0.0670, -0.0050, -0.0489], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.2720, 1.2516, 1.2042, ..., 0.7531, 1.0650, 1.2413], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([ 0.0019, -0.0140, 0.0240, ..., 0.2147, -0.1253, -0.2114], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0038, -0.0111, 0.0217, ..., -0.0341, 0.0049, 0.0043], + [ 0.0020, 0.0047, 0.0051, ..., -0.0009, -0.0141, 0.0165], + [-0.0086, 0.0055, 0.0177, ..., 0.0030, -0.0044, -0.0111], + ..., + [ 0.0037, 0.0199, -0.0006, ..., -0.0081, 0.0196, -0.0002], + [-0.0116, 0.0020, -0.0122, ..., 0.0042, -0.0016, -0.0110], + [-0.0201, 0.0025, -0.0230, ..., -0.0041, 0.0287, 0.0105]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.8447, 0.0093, -1.0840, ..., -0.0142, 0.0109, 0.0013], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0003, 0.0259, 0.0041, ..., 0.0052, -0.0108, 0.0274], + [ 0.0064, 0.0003, -0.0086, ..., -0.0271, 0.0063, 0.0018], + [-0.0234, 0.0012, 0.0170, ..., 0.0238, 0.0096, 0.0125], + ..., + [ 0.0077, 0.0320, 0.0242, ..., 0.0052, 0.0005, 0.0047], + [ 0.0064, 0.0084, 0.0002, ..., -0.0008, 0.0042, -0.0140], + [ 0.0204, -0.0061, -0.0246, ..., 0.0237, -0.0045, 0.0173]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.0204, 0.0182, -0.0022, ..., -0.0782, 0.0405, -0.0199], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.1486, 1.2062, 1.1745, ..., 1.6290, 1.1674, 1.2157], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.0552, 0.0993, -0.0013, ..., -0.1784, -0.0515, -0.0148], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0086, -0.0217, 0.0128, ..., -0.0079, -0.0053, 0.0027], + [-0.0070, 0.0067, 0.0020, ..., -0.0125, -0.0191, -0.0117], + [ 0.0062, 0.0227, 0.0108, ..., 0.0066, 0.0004, 0.0018], + ..., + [ 0.0228, -0.0078, 0.0063, ..., 0.0002, 0.0019, -0.0005], + [-0.0191, 0.0253, 0.0069, ..., -0.0109, -0.0114, -0.0081], + [ 0.0292, -0.0316, -0.0293, ..., -0.0048, 0.0165, -0.0164]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.4792, -0.1467, -0.1043, ..., -0.2996, -0.2251, -0.3262], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0207, 0.0158, -0.0021, ..., 0.0083, 0.0042, 0.0273], + [ 0.0063, 0.0024, -0.0061, ..., 0.0069, -0.0269, 0.0042], + [ 0.0299, -0.0060, -0.0002, ..., -0.0130, 0.0070, -0.0297], + ..., + [-0.0122, 0.0011, -0.0082, ..., -0.0026, 0.0038, -0.0006], + [-0.0204, 0.0085, 0.0057, ..., 0.0096, -0.0105, 0.0216], + [-0.0023, 0.0328, 0.0013, ..., -0.0099, -0.0044, 0.0145]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.0411, -0.0040, -0.0516, ..., 0.1114, 0.0086, -0.0609], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.3836, 1.2857, 1.2323, ..., 0.6118, 1.1779, 1.2560], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.2367, 0.0575, 0.1226, ..., 0.2404, 0.0237, -0.0258], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0172, -0.0115, -0.0176, ..., 0.0113, -0.0038, 0.0092], + [-0.0095, 0.0104, 0.0087, ..., 0.0204, -0.0187, -0.0189], + [-0.0259, -0.0040, 0.0021, ..., 0.0106, 0.0068, -0.0149], + ..., + [ 0.0015, 0.0207, -0.0071, ..., -0.0045, -0.0049, 0.0017], + [ 0.0381, 0.0040, -0.0079, ..., 0.0003, -0.0011, 0.0140], + [ 0.0094, -0.0019, -0.0035, ..., -0.0018, 0.0271, -0.0058]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.0180, 0.2048, -0.1954, ..., 0.0674, -0.0071, 0.0122], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0127, -0.0093, -0.0233, ..., -0.0062, -0.0268, -0.0001], + [ 0.0243, -0.0188, 0.0209, ..., -0.0062, -0.0029, -0.0023], + [-0.0073, 0.0052, -0.0273, ..., 0.0022, 0.0090, 0.0104], + ..., + [-0.0117, 0.0100, 0.0137, ..., 0.0083, -0.0002, 0.0034], + [ 0.0061, 0.0166, 0.0235, ..., 0.0010, 0.0024, -0.0336], + [-0.0002, -0.0062, -0.0147, ..., 0.0020, -0.0193, -0.0020]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.0233, 0.0391, -0.0073, ..., -0.0649, 0.0291, 0.0002], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.1998, 1.2310, 1.1488, ..., 1.5903, 1.2228, 1.3022], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.0585, 0.0091, -0.0592, ..., -0.2559, -0.1667, -0.0673], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-1.1040e-02, -1.6754e-02, -2.7451e-02, ..., -4.3964e-04, + 6.0501e-03, 5.5552e-04], + [ 2.4002e-02, -2.4567e-02, 7.3128e-03, ..., -3.2883e-03, + 1.0437e-02, -2.3246e-05], + [ 8.8272e-03, 8.2474e-03, 3.9597e-03, ..., 4.3845e-04, + -7.0724e-03, 2.1725e-03], + ..., + [-1.2598e-03, -9.5901e-03, 1.6785e-02, ..., -1.8721e-03, + -4.9057e-03, 7.3891e-03], + [ 2.6455e-03, 1.1078e-02, -1.6968e-02, ..., -1.1044e-03, + -6.0959e-03, -3.0914e-02], + [ 1.2146e-02, 2.3819e-02, 5.0545e-04, ..., 2.2030e-03, + -4.2877e-03, 1.9012e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-0.1323, -0.2241, -0.0570, ..., -0.2708, -0.3240, -0.0825], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 9.5673e-03, -1.0620e-02, -2.4261e-02, ..., 3.1433e-02, + -1.2718e-02, -1.2207e-02], + [ 8.1787e-03, 6.4707e-04, 3.0732e-04, ..., -1.3092e-02, + 5.6446e-05, -1.8692e-02], + [ 6.8626e-03, 1.8829e-02, 1.0094e-02, ..., -3.1891e-03, + 9.3765e-03, -3.5896e-03], + ..., + [ 5.6763e-03, 3.4389e-03, -8.8310e-04, ..., 5.5847e-03, + -3.6240e-03, -4.8103e-03], + [ 2.6627e-03, -1.5274e-02, -6.7186e-04, ..., -2.0081e-02, + 1.1981e-04, 1.0040e-02], + [-7.2365e-03, -5.4207e-03, -3.8395e-03, ..., 5.3978e-03, + -2.5177e-02, -2.7252e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([ 0.0388, 0.0069, -0.0129, ..., 0.0417, 0.0218, 0.0082], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.4280, 1.3614, 1.2954, ..., 1.0131, 1.1817, 1.3209], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.1139, 0.0275, 0.0677, ..., 0.1797, 0.0199, -0.2525], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0128, 0.0002, 0.0248, ..., -0.0053, 0.0151, -0.0208], + [-0.0050, 0.0376, -0.0262, ..., 0.0018, 0.0168, 0.0039], + [-0.0110, -0.0084, 0.0369, ..., -0.0102, 0.0071, 0.0117], + ..., + [ 0.0205, -0.0055, -0.0127, ..., -0.0057, -0.0044, 0.0095], + [-0.0154, -0.0017, -0.0012, ..., 0.0026, -0.0132, 0.0012], + [ 0.0082, 0.0055, 0.0048, ..., -0.0060, -0.0069, 0.0101]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.0482, -0.0384, 0.5059, ..., -0.0175, -0.0147, 0.0126], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0060, 0.0181, 0.0104, ..., -0.0065, 0.0091, 0.0008], + [-0.0115, 0.0102, 0.0212, ..., 0.0144, 0.0276, -0.0077], + [ 0.0390, 0.0029, 0.0083, ..., -0.0165, 0.0036, -0.0177], + ..., + [ 0.0054, -0.0068, -0.0049, ..., 0.0166, -0.0177, 0.0042], + [ 0.0016, -0.0031, 0.0076, ..., 0.0091, 0.0008, 0.0024], + [ 0.0034, -0.0059, 0.0107, ..., -0.0199, 0.0139, -0.0083]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.0060, 0.0679, 0.0352, ..., -0.0554, 0.0134, 0.0558], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.2294, 1.2056, 1.1645, ..., 1.8344, 1.1523, 1.2639], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.0175, -0.0714, -0.1254, ..., -0.2901, -0.1457, 0.1501], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-4.8256e-03, -3.2368e-03, 1.3252e-02, ..., -1.7681e-03, + 2.2354e-02, -1.1436e-02], + [ 4.0253e-02, 3.1097e-02, 1.9760e-02, ..., 5.3787e-03, + -2.2949e-02, 1.4923e-02], + [-1.9789e-05, -2.5848e-02, -1.0681e-02, ..., 1.1975e-04, + 1.0056e-02, 9.3384e-03], + ..., + [ 1.9211e-02, -1.5373e-02, 5.6839e-03, ..., -1.1314e-02, + -4.1748e-02, 1.5808e-02], + [ 2.8934e-03, -1.8179e-04, 8.8425e-03, ..., -2.5787e-03, + -1.7517e-02, -6.8169e-03], + [ 1.7838e-02, -6.3019e-03, -3.8700e-03, ..., 3.0651e-03, + -3.5019e-03, 1.3748e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-0.2314, -0.3215, -0.0737, ..., -0.3018, -0.1614, -0.3069], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0107, 0.0326, 0.0092, ..., -0.0069, -0.0051, -0.0002], + [-0.0118, 0.0024, 0.0258, ..., 0.0087, -0.0250, -0.0087], + [ 0.0156, 0.0077, 0.0071, ..., -0.0158, -0.0195, 0.0202], + ..., + [-0.0062, 0.0010, 0.0041, ..., -0.0098, 0.0120, 0.0015], + [-0.0022, 0.0381, -0.0009, ..., 0.0051, 0.0093, 0.0135], + [ 0.0090, -0.0229, -0.0135, ..., 0.0130, 0.0066, 0.0037]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([0.0371, 0.0197, 0.0018, ..., 0.0558, 0.0674, 0.0106], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.4988, 1.4287, 1.3546, ..., 0.9505, 1.1804, 1.4063], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.1733, -0.0029, 0.0176, ..., 0.2492, 0.0635, -0.1153], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0109, -0.0005, 0.0278, ..., 0.0017, -0.0215, 0.0092], + [ 0.0046, -0.0021, 0.0013, ..., 0.0017, -0.0017, 0.0037], + [-0.0275, 0.0318, 0.0133, ..., 0.0012, 0.0040, -0.0225], + ..., + [ 0.0179, 0.0136, -0.0099, ..., 0.0038, 0.0117, 0.0026], + [ 0.0067, 0.0052, -0.0031, ..., -0.0122, 0.0013, -0.0083], + [ 0.0012, -0.0281, -0.0114, ..., -0.0038, -0.0005, -0.0021]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.2450, 1.7920, 0.0699, ..., 0.0484, -0.0464, 0.0208], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0059, 0.0109, -0.0109, ..., -0.0163, 0.0007, -0.0298], + [-0.0065, -0.0061, -0.0041, ..., -0.0236, -0.0090, 0.0271], + [ 0.0104, 0.0079, 0.0172, ..., -0.0066, 0.0009, -0.0156], + ..., + [ 0.0096, -0.0068, 0.0006, ..., -0.0150, 0.0118, 0.0032], + [ 0.0230, 0.0083, 0.0289, ..., -0.0219, 0.0005, 0.0038], + [ 0.0172, 0.0228, 0.0059, ..., 0.0067, 0.0123, -0.0109]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.0267, -0.0056, -0.0026, ..., -0.0532, -0.0267, 0.0485], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.2355, 1.2508, 1.2161, ..., 1.8124, 1.1440, 1.3011], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([ 0.1252, -0.0353, 0.1171, ..., -0.1227, -0.0330, 0.1001], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-6.1083e-04, -1.1574e-02, 9.1705e-03, ..., -1.0834e-03, + 8.1482e-03, 3.4065e-03], + [ 1.2016e-02, -1.7960e-02, 3.3379e-03, ..., -8.6365e-03, + -1.7424e-03, -1.5541e-02], + [ 8.0948e-03, -1.1383e-02, -2.7039e-02, ..., 4.2725e-03, + 4.9667e-03, -2.5375e-02], + ..., + [-2.1606e-02, -2.0233e-02, -3.5381e-03, ..., 2.8253e-05, + -1.3222e-02, 7.2975e-03], + [ 2.8515e-03, 1.3855e-02, 1.0794e-04, ..., -3.3092e-03, + -1.4519e-02, 1.1742e-02], + [-1.1467e-02, 1.2001e-02, 1.0672e-03, ..., -3.9520e-03, + -5.1178e-02, 5.3864e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-0.3506, -0.3098, -0.0694, ..., -0.3074, -0.2494, -0.4229], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0110, 0.0193, -0.0116, ..., 0.0081, -0.0065, -0.0187], + [ 0.0120, 0.0100, 0.0045, ..., 0.0063, -0.0106, -0.0092], + [-0.0012, 0.0172, 0.0223, ..., 0.0052, 0.0394, 0.0099], + ..., + [-0.0025, 0.0068, -0.0081, ..., 0.0005, -0.0055, 0.0065], + [ 0.0112, -0.0010, -0.0028, ..., -0.0172, -0.0041, -0.0017], + [ 0.0086, -0.0009, 0.0137, ..., -0.0030, 0.0077, -0.0112]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.0374, -0.0088, -0.0429, ..., 0.0653, -0.0126, -0.0252], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.5180, 1.3799, 1.3971, ..., 0.8399, 1.2614, 1.5007], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.1863, 0.1162, 0.4045, ..., 0.2292, 0.4198, -0.0957], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0160, 0.0014, -0.0089, ..., 0.0042, 0.0289, 0.0184], + [-0.0010, 0.0089, -0.0017, ..., -0.0152, -0.0108, -0.0008], + [-0.0141, -0.0294, 0.0109, ..., -0.0025, 0.0298, 0.0266], + ..., + [ 0.0245, 0.0203, 0.0064, ..., -0.0128, 0.0092, -0.0031], + [ 0.0211, 0.0237, -0.0171, ..., -0.0165, -0.0070, 0.0133], + [-0.0094, 0.0021, 0.0247, ..., -0.0004, 0.0047, -0.0201]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.2849, -0.3125, -0.2026, ..., -0.0510, -0.0885, 0.0077], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0026, -0.0256, 0.0400, ..., -0.0214, -0.0089, 0.0125], + [-0.0129, -0.0088, 0.0240, ..., -0.0082, -0.0172, -0.0212], + [-0.0200, -0.0017, 0.0146, ..., -0.0077, 0.0185, -0.0176], + ..., + [-0.0275, 0.0012, -0.0196, ..., 0.0564, 0.0609, -0.0008], + [ 0.0161, 0.0054, -0.0016, ..., -0.0032, 0.0063, 0.0044], + [-0.0055, -0.0287, -0.0144, ..., 0.0080, -0.0101, 0.0110]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.0077, 0.0031, 0.0012, ..., -0.0704, 0.0297, 0.0082], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.2799, 1.2512, 1.2956, ..., 2.2034, 1.1719, 1.3681], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([ 0.1496, -0.0801, -0.0724, ..., -0.1659, -0.0900, 0.0350], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0067, 0.0413, 0.0378, ..., 0.0076, 0.0142, 0.0230], + [ 0.0130, 0.0156, 0.0179, ..., 0.0018, 0.0109, 0.0043], + [-0.0078, -0.0166, 0.0107, ..., -0.0015, -0.0044, 0.0111], + ..., + [ 0.0138, 0.0290, 0.0173, ..., -0.0042, 0.0035, 0.0030], + [ 0.0160, 0.0144, 0.0156, ..., -0.0038, -0.0043, 0.0115], + [-0.0084, 0.0176, 0.0231, ..., 0.0015, -0.0203, -0.0239]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.2788, -0.1956, -0.3853, ..., -0.3225, -0.2610, -0.0354], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-7.0419e-03, -1.1139e-02, -1.7349e-02, ..., 1.0239e-02, + -1.8906e-02, 2.0798e-02], + [ 1.1635e-02, -8.4457e-03, -7.3700e-03, ..., 1.1932e-02, + -1.0246e-02, -1.2451e-02], + [ 1.3153e-02, -2.9282e-02, -1.9894e-03, ..., 4.7760e-03, + -2.7866e-03, -1.2886e-02], + ..., + [-3.2005e-03, 1.4801e-02, -3.5763e-03, ..., 6.4313e-05, + 1.2386e-04, 2.5702e-04], + [-1.8677e-02, -8.7967e-03, 9.5978e-03, ..., 4.4403e-03, + -1.1940e-02, 2.3422e-02], + [-5.8060e-03, -2.0889e-02, -1.2917e-02, ..., -4.3907e-03, + -7.6561e-03, 2.6611e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([ 0.0329, 0.0113, -0.0181, ..., 0.0332, 0.0061, -0.0410], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.6208, 1.4989, 1.4207, ..., 0.7640, 1.2692, 1.4951], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.1374, -0.0238, -0.0117, ..., 0.3347, 0.1457, -0.0975], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0034, -0.0225, -0.0031, ..., 0.0137, -0.0365, -0.0123], + [-0.0254, -0.0124, 0.0016, ..., -0.0371, 0.0126, -0.0146], + [ 0.0078, -0.0007, -0.0157, ..., 0.0026, -0.0030, 0.0202], + ..., + [ 0.0041, -0.0135, 0.0168, ..., 0.0034, -0.0226, -0.0060], + [-0.0034, -0.0090, 0.0047, ..., -0.0018, 0.0212, -0.0074], + [ 0.0030, 0.0077, -0.0017, ..., -0.0023, -0.0013, 0.0052]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.2683, -0.1324, 0.1324, ..., 0.0209, 0.0130, 0.0207], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0029, 0.0157, 0.0060, ..., 0.0021, 0.0078, -0.0089], + [ 0.0114, -0.0065, 0.0057, ..., 0.0285, 0.0281, 0.0063], + [-0.0123, -0.0026, 0.0062, ..., 0.0232, -0.0135, -0.0089], + ..., + [-0.0011, 0.0021, -0.0047, ..., -0.0017, -0.0325, -0.0199], + [-0.0076, -0.0072, -0.0037, ..., -0.0192, -0.0359, -0.0052], + [-0.0138, -0.0226, 0.0044, ..., 0.0032, 0.0111, -0.0124]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.0062, -0.0194, -0.0133, ..., 0.0403, 0.0331, 0.0198], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.3324, 1.2243, 1.2615, ..., 1.9267, 1.1337, 1.3792], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([ 0.1416, 0.0005, 0.0165, ..., 0.0163, -0.0729, 0.0122], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0125, -0.0005, -0.0153, ..., -0.0146, -0.0185, -0.0108], + [-0.0015, 0.0045, 0.0178, ..., -0.0033, 0.0042, -0.0107], + [-0.0038, 0.0046, 0.0096, ..., -0.0217, 0.0142, 0.0295], + ..., + [ 0.0055, -0.0117, -0.0156, ..., -0.0233, -0.0058, 0.0149], + [ 0.0230, -0.0006, 0.0009, ..., 0.0129, 0.0170, 0.0101], + [ 0.0109, 0.0066, -0.0111, ..., 0.0038, 0.0099, -0.0238]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.2153, -0.2781, -0.3320, ..., -0.1223, -0.1307, -0.2898], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0037, -0.0316, 0.0164, ..., -0.0018, -0.0181, 0.0028], + [ 0.0187, 0.0093, 0.0047, ..., 0.0109, 0.0137, -0.0057], + [-0.0071, -0.0045, 0.0287, ..., 0.0160, -0.0103, 0.0014], + ..., + [-0.0109, 0.0038, -0.0134, ..., 0.0115, -0.0042, -0.0035], + [-0.0127, 0.0039, 0.0083, ..., 0.0040, -0.0111, 0.0036], + [-0.0054, -0.0070, -0.0073, ..., -0.0131, -0.0262, 0.0085]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.0253, -0.0024, -0.0242, ..., 0.0956, 0.0208, -0.0150], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.6542, 1.5471, 1.5496, ..., 0.4089, 1.3961, 1.6685], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.2147, 0.1279, 0.3980, ..., 0.3844, 0.3855, -0.2151], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0031, 0.0022, -0.0024, ..., 0.0046, -0.0105, -0.0062], + [-0.0263, 0.0304, 0.0018, ..., -0.0321, 0.0226, 0.0128], + [ 0.0247, 0.0204, 0.0079, ..., -0.0015, 0.0224, -0.0038], + ..., + [-0.0034, 0.0151, -0.0085, ..., 0.0086, -0.0034, 0.0134], + [ 0.0203, -0.0166, 0.0061, ..., 0.0006, -0.0226, -0.0220], + [ 0.0190, -0.0124, -0.0086, ..., -0.0007, -0.0131, -0.0075]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-2.5195, 0.2338, -0.3826, ..., -0.0098, -0.0044, 0.0407], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0303, -0.0037, 0.0023, ..., 0.0084, 0.0081, -0.0136], + [-0.0157, -0.0159, 0.0211, ..., 0.0249, 0.0152, 0.0164], + [-0.0068, -0.0159, 0.0054, ..., 0.0163, 0.0173, 0.0204], + ..., + [-0.0006, 0.0029, -0.0114, ..., -0.0071, 0.0085, 0.0100], + [-0.0033, -0.0080, -0.0203, ..., -0.0054, 0.0173, 0.0025], + [ 0.0112, -0.0027, 0.0068, ..., 0.0061, -0.0080, 0.0069]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.0108, -0.0551, 0.0180, ..., 0.0265, 0.0322, -0.0401], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.2997, 1.2493, 1.3251, ..., 1.4770, 1.1521, 1.3748], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([ 0.1529, 0.0496, -0.0569, ..., -0.0100, -0.0241, -0.0367], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0036, 0.0120, 0.0108, ..., -0.0241, -0.0065, -0.0097], + [-0.0012, -0.0218, -0.0014, ..., -0.0027, -0.0102, 0.0113], + [ 0.0043, 0.0098, -0.0035, ..., -0.0025, -0.0057, 0.0186], + ..., + [-0.0025, -0.0322, -0.0135, ..., -0.0106, -0.0206, -0.0133], + [-0.0045, 0.0262, -0.0152, ..., -0.0032, -0.0122, -0.0036], + [-0.0104, 0.0072, -0.0117, ..., -0.0020, 0.0015, -0.0004]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.2783, -0.2571, -0.3367, ..., -0.3469, -0.2042, -0.0554], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0010, -0.0118, 0.0168, ..., 0.0077, -0.0058, 0.0077], + [ 0.0135, -0.0157, -0.0040, ..., -0.0081, 0.0145, -0.0026], + [ 0.0177, -0.0216, 0.0124, ..., 0.0067, -0.0014, -0.0008], + ..., + [-0.0105, 0.0073, -0.0061, ..., 0.0008, 0.0031, -0.0009], + [-0.0176, 0.0078, -0.0194, ..., -0.0242, -0.0090, -0.0050], + [ 0.0074, 0.0026, 0.0208, ..., 0.0214, -0.0106, 0.0054]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.0367, -0.0981, -0.0668, ..., 0.0355, 0.0193, -0.0256], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.7683, 1.6419, 1.7225, ..., 0.6681, 1.5146, 1.7884], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.1888, 0.3456, 0.1489, ..., 0.4134, 0.4308, -0.1223], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0099, -0.0102, 0.0307, ..., 0.0022, -0.0095, 0.0271], + [-0.0269, -0.0032, -0.0007, ..., -0.0077, -0.0161, -0.0114], + [-0.0016, 0.0004, 0.0387, ..., -0.0108, 0.0132, -0.0004], + ..., + [-0.0021, -0.0089, 0.0141, ..., 0.0007, -0.0084, 0.0113], + [ 0.0048, -0.0126, -0.0221, ..., -0.0024, -0.0106, 0.0105], + [-0.0116, -0.0002, -0.0007, ..., -0.0022, -0.0071, -0.0038]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.2299, 0.2384, -0.0945, ..., 0.0501, -0.0047, -0.0003], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0283, -0.0055, 0.0025, ..., -0.0084, 0.0048, -0.0027], + [ 0.0133, -0.0057, -0.0082, ..., -0.0007, -0.0018, -0.0043], + [-0.0129, 0.0132, 0.0077, ..., -0.0180, 0.0022, -0.0036], + ..., + [ 0.0090, 0.0050, -0.0102, ..., -0.0129, -0.0157, -0.0071], + [ 0.0154, -0.0024, -0.0170, ..., 0.0126, -0.0098, 0.0101], + [ 0.0222, 0.0012, -0.0260, ..., -0.0199, -0.0145, 0.0066]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.0646, -0.0642, 0.0045, ..., -0.0348, -0.0156, -0.0321], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.3934, 1.3553, 1.4854, ..., 1.8728, 1.3167, 1.4949], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([ 0.0756, -0.1134, -0.0586, ..., -0.0262, -0.0903, -0.1063], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-7.9727e-03, 4.2458e-03, 1.8478e-02, ..., -1.1673e-02, + 1.3763e-02, -6.2256e-03], + [-1.7181e-02, 1.0742e-02, -4.7760e-03, ..., -3.7718e-04, + -2.2888e-02, -8.6594e-03], + [-4.2701e-04, 2.2446e-02, 1.0483e-02, ..., -4.0817e-03, + -1.7151e-02, -2.6047e-02], + ..., + [-2.9617e-02, -7.5722e-03, 8.4043e-06, ..., -1.1253e-02, + 1.6479e-02, 2.7222e-02], + [-1.2772e-02, 6.8283e-03, 2.5269e-02, ..., -7.0038e-03, + -7.5645e-03, 9.1019e-03], + [ 4.2176e-04, -4.2152e-03, 4.3335e-02, ..., -3.2711e-03, + -1.3786e-02, 1.5656e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-0.3418, -0.2771, -0.3467, ..., -0.3989, -0.2386, -0.2927], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0106, 0.0111, 0.0187, ..., -0.0266, 0.0003, -0.0147], + [ 0.0346, 0.0015, -0.0025, ..., -0.0093, 0.0119, -0.0310], + [-0.0043, -0.0276, 0.0013, ..., -0.0066, 0.0263, 0.0338], + ..., + [-0.0045, 0.0165, 0.0076, ..., 0.0143, -0.0025, -0.0003], + [ 0.0067, -0.0164, 0.0050, ..., 0.0121, -0.0008, -0.0172], + [-0.0008, -0.0125, -0.0156, ..., 0.0319, 0.0113, -0.0105]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.0361, -0.0391, 0.0151, ..., -0.0164, 0.0040, -0.0078], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([2.1112, 2.0118, 2.0347, ..., 0.7085, 1.8153, 2.2010], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.1631, -0.1508, 0.1484, ..., 0.4431, 0.6810, -0.3282], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0117, -0.0245, -0.0218, ..., 0.0068, -0.0019, -0.0032], + [ 0.0243, 0.0013, -0.0101, ..., 0.0473, -0.0216, 0.0135], + [ 0.0109, -0.0224, -0.0055, ..., -0.0055, -0.0267, -0.0187], + ..., + [-0.0167, 0.0103, -0.0058, ..., -0.0046, -0.0215, 0.0237], + [ 0.0119, -0.0105, 0.0158, ..., 0.0023, -0.0127, -0.0004], + [-0.0119, -0.0276, 0.0225, ..., -0.0024, -0.0047, -0.0064]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.2433, -0.1136, 0.0888, ..., -0.0050, -0.0137, 0.0093], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0302, 0.0129, -0.0099, ..., 0.0201, 0.0093, -0.0045], + [-0.0376, -0.0102, -0.0002, ..., -0.0104, 0.0078, -0.0009], + [ 0.0196, -0.0219, 0.0057, ..., 0.0070, -0.0059, -0.0075], + ..., + [-0.0068, -0.0123, 0.0011, ..., 0.0024, -0.0069, -0.0181], + [ 0.0018, -0.0121, -0.0095, ..., -0.0199, 0.0067, -0.0080], + [-0.0084, 0.0186, 0.0111, ..., -0.0047, 0.0052, 0.0088]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-7.3486e-02, 2.0966e-02, 2.3758e-02, ..., 9.0637e-03, + 1.1623e-05, -1.4076e-02], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([1.3803, 1.3287, 1.4781, ..., 1.5120, 1.3130, 1.4137], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([ 0.0475, -0.0935, -0.0597, ..., 0.0320, 0.0142, -0.0661], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 1.6296e-02, -1.0147e-02, 2.2263e-02, ..., 1.8875e-02, + -4.6844e-03, -1.2360e-02], + [ 1.4534e-02, -1.0414e-02, -2.5024e-02, ..., -1.7578e-02, + -3.4729e-02, -2.3346e-02], + [-2.1347e-02, 2.1301e-02, 3.8509e-03, ..., 8.6441e-03, + 1.4847e-02, -6.3400e-03], + ..., + [-4.7302e-03, -2.5574e-02, 7.4959e-03, ..., 3.6087e-03, + 1.5732e-02, -2.2202e-02], + [-6.3324e-04, 8.2550e-03, -1.3161e-02, ..., 5.1918e-03, + 2.1324e-03, 1.3359e-02], + [-5.0240e-03, 4.4479e-03, -1.5625e-02, ..., 1.8707e-02, + -4.8995e-05, 1.2718e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-0.2683, -0.3921, -0.3276, ..., -0.3716, -0.2025, -0.3127], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0052, -0.0143, 0.0135, ..., 0.0038, 0.0296, -0.0021], + [ 0.0280, -0.0070, -0.0129, ..., -0.0207, 0.0208, 0.0257], + [ 0.0018, -0.0234, 0.0009, ..., 0.0089, -0.0099, -0.0107], + ..., + [ 0.0084, -0.0017, 0.0058, ..., -0.0016, -0.0057, -0.0010], + [ 0.0047, -0.0170, -0.0032, ..., 0.0134, -0.0184, 0.0449], + [-0.0015, -0.0398, -0.0143, ..., -0.0135, 0.0247, 0.0222]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.0358, 0.0342, 0.0543, ..., 0.0743, -0.0069, 0.0033], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([2.5167, 2.3499, 2.4777, ..., 0.5123, 2.0356, 2.4509], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.2338, -0.0299, 0.1534, ..., 0.4063, 0.7359, -0.2059], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0072, -0.0024, -0.0093, ..., 0.0206, -0.0025, -0.0194], + [ 0.0068, 0.0083, 0.0218, ..., -0.0244, -0.0298, 0.0023], + [ 0.0273, 0.0015, -0.0178, ..., -0.0870, 0.0066, -0.0008], + ..., + [ 0.0076, -0.0371, -0.0106, ..., 0.0053, -0.0015, -0.0093], + [-0.0015, 0.0093, -0.0339, ..., -0.0024, 0.0004, -0.0021], + [ 0.0035, -0.0088, 0.0025, ..., -0.0005, 0.0056, -0.0153]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.0615, 0.0997, -0.5298, ..., 0.0029, -0.0045, -0.0547], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0042, -0.0015, 0.0274, ..., 0.0097, -0.0203, 0.0125], + [-0.0057, 0.0277, 0.0067, ..., 0.0058, -0.0193, 0.0007], + [-0.0005, 0.0042, 0.0217, ..., 0.0109, 0.0060, 0.0009], + ..., + [ 0.0093, -0.0028, -0.0129, ..., 0.0005, 0.0210, -0.0072], + [ 0.0155, 0.0005, 0.0134, ..., -0.0217, -0.0046, 0.0098], + [ 0.0043, -0.0210, -0.0279, ..., -0.0082, -0.0022, 0.0044]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.0168, 0.0077, -0.0467, ..., 0.0064, -0.0126, -0.0271], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.4813, 1.5299, 1.5828, ..., 1.5154, 1.4352, 1.5897], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([ 0.1628, -0.0896, -0.0374, ..., -0.0098, -0.0610, -0.1625], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0061, -0.0101, 0.0220, ..., 0.0076, -0.0179, -0.0062], + [ 0.0287, 0.0189, 0.0143, ..., -0.0079, 0.0128, -0.0096], + [-0.0176, 0.0025, -0.0220, ..., -0.0191, -0.0070, -0.0005], + ..., + [ 0.0012, -0.0170, -0.0051, ..., -0.0094, -0.0273, 0.0126], + [ 0.0056, -0.0026, 0.0170, ..., 0.0264, -0.0188, -0.0084], + [ 0.0042, 0.0020, 0.0170, ..., -0.0107, -0.0194, -0.0005]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.2793, -0.3450, -0.2959, ..., -0.1840, -0.1981, -0.2493], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-1.0500e-03, -1.4481e-02, 9.7084e-04, ..., 3.5362e-03, + -7.4148e-04, 2.0218e-02], + [ 1.8265e-02, -1.7059e-02, -6.9523e-04, ..., -7.4577e-04, + -8.8272e-03, -1.0271e-03], + [ 6.8474e-03, 8.5602e-03, -2.2079e-02, ..., 1.6556e-02, + -1.0653e-03, -2.1194e-02], + ..., + [ 2.0035e-02, -9.2239e-03, 1.4229e-02, ..., -6.3858e-03, + -7.1640e-03, -2.1927e-02], + [ 1.5144e-02, -9.1791e-06, 7.7324e-03, ..., -7.3395e-03, + 3.1433e-03, 9.2697e-03], + [ 7.2021e-03, 2.0950e-02, 8.4610e-03, ..., 9.9106e-03, + -2.2316e-03, -6.6261e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([0.0066, 0.0104, 0.0044, ..., 0.0064, 0.0797, 0.0699], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([2.6710, 2.5634, 2.7691, ..., 0.6788, 2.2533, 2.7433], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.0355, 0.2774, 0.4173, ..., 0.5667, 0.5320, -0.4676], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0145, -0.0110, 0.0256, ..., -0.0594, -0.0049, 0.0130], + [-0.0188, -0.0083, -0.0112, ..., 0.0231, -0.0353, 0.0120], + [ 0.0106, 0.0018, -0.0003, ..., 0.0022, 0.0061, 0.0072], + ..., + [ 0.0017, 0.0005, 0.0002, ..., 0.0049, -0.0219, -0.0394], + [ 0.0120, 0.0053, -0.0002, ..., -0.0002, 0.0002, -0.0100], + [-0.0057, 0.0138, 0.0124, ..., -0.0036, -0.0128, 0.0019]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.2327, 0.0399, -0.0326, ..., -0.0056, 0.0197, 0.0396], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0253, 0.0045, 0.0054, ..., 0.0114, -0.0120, 0.0098], + [ 0.0248, -0.0201, -0.0091, ..., -0.0043, -0.0027, 0.0147], + [ 0.0056, 0.0186, -0.0143, ..., -0.0139, -0.0035, -0.0077], + ..., + [ 0.0012, -0.0047, -0.0184, ..., 0.0032, -0.0123, 0.0104], + [ 0.0084, -0.0137, 0.0252, ..., 0.0189, -0.0143, 0.0102], + [-0.0266, -0.0148, -0.0076, ..., 0.0242, -0.0059, 0.0166]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.0083, 0.0090, -0.0957, ..., -0.0067, 0.0007, -0.0046], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.5726, 1.4541, 1.5816, ..., 1.7312, 1.4169, 1.5937], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([ 0.1270, -0.2203, -0.0099, ..., -0.0846, -0.0867, -0.1574], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0049, 0.0010, -0.0008, ..., 0.0092, -0.0068, -0.0089], + [-0.0189, 0.0112, -0.0008, ..., -0.0095, -0.0150, 0.0131], + [-0.0017, -0.0340, 0.0049, ..., -0.0096, 0.0049, -0.0091], + ..., + [ 0.0015, -0.0103, -0.0238, ..., -0.0044, -0.0164, -0.0042], + [ 0.0059, -0.0020, -0.0025, ..., 0.0057, 0.0186, 0.0068], + [ 0.0137, 0.0040, -0.0026, ..., -0.0155, 0.0179, -0.0174]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.1598, -0.3298, -0.3064, ..., -0.3005, -0.3159, -0.1328], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-5.0659e-03, -1.4755e-02, 2.9678e-02, ..., -1.4786e-02, + 3.0472e-02, 1.2962e-02], + [ 8.6517e-03, -2.0859e-02, -1.2672e-02, ..., 1.1673e-02, + -2.9373e-02, 4.4823e-03], + [-1.6266e-02, 4.0253e-02, -6.0081e-03, ..., 2.8193e-05, + -1.5068e-02, -1.5480e-02], + ..., + [-1.2833e-02, 1.1993e-02, -5.6553e-04, ..., 5.5046e-03, + 1.6586e-02, -7.4272e-03], + [ 2.9144e-02, 8.1482e-03, -1.4267e-02, ..., 2.8549e-02, + 3.4962e-03, -7.6218e-03], + [ 2.3270e-02, -1.7654e-02, -1.4374e-02, ..., -3.7155e-03, + -1.8509e-02, -3.0289e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([ 0.0115, 0.0317, -0.0131, ..., -0.0563, -0.0150, 0.0325], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([2.7785, 2.7386, 2.7390, ..., 0.8678, 2.4946, 2.8710], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.1475, -0.0199, 0.2092, ..., 0.4587, 0.5408, -0.2745], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0120, -0.0190, -0.0403, ..., -0.0117, 0.0023, 0.0093], + [ 0.0185, 0.0156, 0.0064, ..., -0.0211, 0.0304, 0.0128], + [-0.0041, -0.0232, -0.0050, ..., -0.0144, -0.0013, 0.0115], + ..., + [-0.0279, -0.0333, 0.0062, ..., -0.0130, -0.0025, 0.0134], + [ 0.0011, 0.0101, 0.0281, ..., -0.0020, 0.0121, 0.0017], + [-0.0162, 0.0049, -0.0176, ..., 0.0049, 0.0010, -0.0232]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.0367, -0.0407, -0.0178, ..., 0.0190, 0.0422, 0.0333], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0003, -0.0005, -0.0012, ..., 0.0035, 0.0030, -0.0334], + [-0.0137, -0.0057, 0.0201, ..., -0.0077, -0.0303, -0.0032], + [-0.0225, -0.0056, 0.0133, ..., 0.0120, -0.0059, -0.0108], + ..., + [ 0.0131, 0.0180, -0.0046, ..., 0.0325, -0.0201, -0.0211], + [-0.0181, 0.0204, -0.0102, ..., -0.0033, 0.0038, -0.0071], + [-0.0171, -0.0187, 0.0197, ..., 0.0140, -0.0235, -0.0155]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.0430, 0.0264, -0.0948, ..., -0.0741, -0.0225, -0.0398], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.6421, 1.5236, 1.6723, ..., 1.8496, 1.4900, 1.6385], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.0308, -0.3167, -0.0299, ..., -0.0403, -0.0753, -0.2397], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0262, -0.0157, 0.0032, ..., -0.0235, -0.0293, -0.0457], + [ 0.0032, -0.0053, 0.0187, ..., 0.0109, 0.0186, 0.0035], + [ 0.0222, -0.0173, 0.0017, ..., -0.0083, 0.0020, 0.0136], + ..., + [ 0.0419, 0.0184, -0.0148, ..., 0.0049, 0.0197, -0.0116], + [ 0.0028, -0.0195, 0.0098, ..., 0.0172, -0.0162, -0.0341], + [ 0.0033, -0.0130, -0.0162, ..., 0.0044, -0.0219, -0.0079]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.3088, -0.1615, -0.2668, ..., -0.2515, -0.2261, -0.2349], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0020, -0.0129, -0.0182, ..., 0.0192, -0.0327, -0.0179], + [ 0.0028, -0.0038, -0.0319, ..., -0.0060, 0.0162, 0.0273], + [ 0.0273, -0.0120, -0.0101, ..., -0.0013, -0.0036, -0.0072], + ..., + [ 0.0023, -0.0175, -0.0372, ..., 0.0134, 0.0053, 0.0087], + [ 0.0148, -0.0124, -0.0037, ..., 0.0165, -0.0067, 0.0085], + [-0.0011, 0.0023, 0.0334, ..., -0.0055, -0.0134, 0.0073]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.1197, -0.0195, 0.1592, ..., -0.0289, -0.0276, -0.0573], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([3.1758, 3.2323, 3.2080, ..., 1.1438, 2.6186, 3.2341], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.3706, 0.0532, -0.3175, ..., 0.1710, 0.3347, -0.2024], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 2.1271e-02, 1.1780e-02, -6.7997e-04, ..., 6.0921e-03, + -3.5896e-03, -7.0524e-04], + [ 5.5809e-03, -1.0429e-02, -1.2751e-03, ..., 2.9259e-03, + 3.8683e-05, 2.1801e-03], + [-2.0950e-02, 3.2496e-04, -2.4063e-02, ..., -6.4819e-02, + -3.2349e-03, -4.4746e-03], + ..., + [ 1.0742e-02, -3.3913e-03, -1.1414e-02, ..., -2.3003e-03, + 2.1942e-02, 2.1652e-02], + [-1.3786e-02, 1.0185e-02, -4.3068e-03, ..., 9.9850e-04, + 7.5111e-03, 2.2797e-02], + [ 4.1842e-05, 1.5434e-02, -5.5361e-04, ..., 8.8730e-03, + 1.2108e-02, 1.5915e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([ 0.0933, -0.0383, -0.0063, ..., 0.0316, 0.0111, -0.0547], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0098, -0.0170, 0.0142, ..., -0.0043, -0.0189, -0.0120], + [-0.0104, 0.0156, -0.0006, ..., -0.0124, 0.0124, -0.0015], + [ 0.0062, -0.0193, 0.0082, ..., 0.0038, 0.0258, -0.0131], + ..., + [ 0.0200, 0.0005, -0.0168, ..., -0.0123, -0.0038, 0.0042], + [ 0.0073, 0.0172, -0.0105, ..., 0.0243, 0.0107, 0.0142], + [ 0.0281, 0.0236, 0.0068, ..., -0.0044, -0.0162, -0.0067]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.1116, -0.0343, -0.0087, ..., -0.0480, -0.0558, -0.0237], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.5671, 1.5268, 1.5659, ..., 0.7991, 1.4337, 1.7072], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([ 0.0451, -0.2257, -0.1079, ..., 0.0651, -0.1020, -0.2032], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0205, -0.0153, 0.0058, ..., 0.0221, 0.0021, -0.0075], + [-0.0245, 0.0084, 0.0146, ..., -0.0019, 0.0069, -0.0050], + [ 0.0138, 0.0011, 0.0013, ..., 0.0371, 0.0046, 0.0199], + ..., + [ 0.0202, 0.0034, 0.0115, ..., -0.0043, -0.0131, -0.0177], + [ 0.0054, 0.0023, 0.0239, ..., -0.0273, 0.0084, -0.0098], + [ 0.0170, 0.0049, -0.0137, ..., -0.0036, -0.0199, 0.0079]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.2671, -0.2556, -0.2236, ..., -0.2886, -0.2778, -0.0958], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0092, -0.0169, -0.0232, ..., 0.0134, -0.0171, -0.0194], + [-0.0089, 0.0153, -0.0176, ..., -0.0238, 0.0060, -0.0118], + [ 0.0023, -0.0021, 0.0147, ..., -0.0046, 0.0072, 0.0077], + ..., + [-0.0114, -0.0102, 0.0014, ..., 0.0213, 0.0042, -0.0110], + [ 0.0055, -0.0080, -0.0157, ..., -0.0074, -0.0366, 0.0046], + [ 0.0316, 0.0016, 0.0092, ..., -0.0057, -0.0119, -0.0157]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.0195, -0.0715, 0.1113, ..., -0.1528, 0.0856, -0.0403], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([3.1596, 3.0366, 2.9180, ..., 1.5081, 2.5528, 3.1860], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([ 0.2221, 0.6211, -0.4953, ..., 0.2567, 0.0674, -0.4556], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0018, 0.0040, -0.0024, ..., 0.0355, -0.0060, -0.0008], + [ 0.0084, 0.0003, -0.0255, ..., 0.0059, 0.0073, 0.0152], + [-0.0014, -0.0324, -0.0615, ..., 0.0012, -0.0250, 0.0104], + ..., + [ 0.0085, -0.0075, 0.0259, ..., -0.0010, 0.0261, -0.0071], + [-0.0081, 0.0082, 0.0233, ..., 0.0083, -0.0072, 0.0106], + [-0.0114, 0.0072, 0.0143, ..., -0.0098, -0.0095, 0.0079]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.4348, -0.0032, -0.1772, ..., -0.0727, 0.0069, -0.1202], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 1.5135e-03, 2.1210e-02, -7.4272e-03, ..., -6.0768e-03, + -2.6584e-04, 1.1932e-02], + [ 1.6260e-04, 3.7441e-03, -1.4343e-02, ..., 1.8158e-02, + -1.2825e-02, -8.9407e-07], + [ 7.9193e-03, -1.8036e-02, 1.1223e-02, ..., -3.8147e-02, + -2.9087e-03, -5.4131e-03], + ..., + [-1.4465e-02, 1.2436e-02, -1.3103e-03, ..., -8.3694e-03, + 1.7273e-02, -9.1934e-04], + [-3.6774e-03, 3.2272e-03, 2.3682e-02, ..., -4.9706e-03, + 7.7705e-03, -1.4359e-02], + [ 1.0548e-03, -1.9181e-04, -1.6556e-02, ..., -1.1215e-02, + -6.9504e-03, -1.4145e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-0.0270, -0.0575, 0.0640, ..., -0.1500, 0.0551, -0.1482], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.5692, 1.8371, 1.9328, ..., 0.8499, 1.4697, 1.7667], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([0.3892, 0.2379, 0.1540, ..., 0.6268, 0.2169, 0.1550], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[-0.0026, 0.0020, -0.0059, ..., 0.0052, -0.0039, -0.0026], + [ 0.0157, -0.0013, -0.0331, ..., 0.0240, -0.0025, -0.0044], + [ 0.0204, 0.0067, -0.0038, ..., -0.0049, 0.0252, -0.0015], + ..., + [ 0.0077, -0.0031, 0.0343, ..., -0.0061, 0.0099, -0.0152], + [-0.0005, 0.0071, -0.0140, ..., -0.0133, -0.0071, 0.0009], + [-0.0012, 0.0004, 0.0090, ..., -0.0095, -0.0076, -0.0047]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.3181, -0.2350, -0.2834, ..., -0.2498, -0.1847, -0.2732], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0366, 0.0088, -0.0167, ..., -0.0147, -0.0033, 0.0076], + [ 0.0050, 0.0103, 0.0122, ..., 0.0073, -0.0067, -0.0138], + [-0.0014, 0.0265, 0.0125, ..., -0.0273, 0.0040, -0.0028], + ..., + [ 0.0138, 0.0236, -0.0157, ..., 0.0255, -0.0269, -0.0320], + [ 0.0021, -0.0071, 0.0076, ..., -0.0042, 0.0137, -0.0034], + [-0.0111, 0.0175, -0.0121, ..., -0.0294, -0.0013, -0.0084]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.1592, -0.0064, 0.1971, ..., 0.0551, -0.0191, 0.0068], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([2.5678, 2.3816, 2.5756, ..., 1.8250, 2.4113, 2.7505], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([ 0.1836, 0.3324, -0.2291, ..., -0.1089, 0.5930, -0.2813], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0123, -0.0058, 0.0023, ..., 0.0007, 0.0222, -0.0039], + [-0.0170, 0.0098, 0.0225, ..., -0.0176, -0.0123, -0.0115], + [-0.0039, -0.0008, 0.0077, ..., 0.0050, 0.0103, -0.0090], + ..., + [ 0.0026, 0.0136, -0.0086, ..., 0.0001, -0.0151, -0.0018], + [ 0.0049, 0.0023, -0.0202, ..., 0.0277, 0.0162, -0.0295], + [-0.0040, 0.0099, -0.0184, ..., -0.0405, -0.0316, -0.0159]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.0484, -0.0519, -1.9238, ..., 0.1216, -0.0272, 0.0114], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-1.9836e-02, 4.5563e-02, -5.3253e-03, ..., 7.5989e-03, + 7.5607e-03, 2.2369e-02], + [ 2.1725e-03, 1.5160e-02, -1.8494e-02, ..., 6.2218e-03, + -9.6977e-05, 1.4214e-02], + [-7.4997e-03, 1.7151e-02, -1.4481e-02, ..., -2.2156e-02, + 1.0445e-02, 9.1171e-03], + ..., + [-2.1515e-02, -1.4336e-02, -3.9558e-03, ..., 2.7351e-03, + -3.2997e-03, 2.3087e-02], + [ 1.9348e-02, 1.7441e-02, 4.3488e-03, ..., 6.1913e-03, + -1.8509e-02, 2.2385e-02], + [ 1.6891e-02, 7.5951e-03, -1.6037e-02, ..., -4.5509e-03, + 6.0081e-03, 1.7471e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-0.2128, 0.1381, 0.1891, ..., 0.0071, 0.0607, -0.0499], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.5486, 1.4414, 1.4516, ..., 0.8932, 1.4491, 1.5433], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.0080, -0.0616, -0.0676, ..., -0.0960, -0.1752, -0.1096], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0178, -0.0028, -0.0424, ..., -0.0160, -0.0282, 0.0149], + [ 0.0117, 0.0109, -0.0199, ..., 0.0012, 0.0099, -0.0078], + [-0.0045, -0.0047, -0.0147, ..., -0.0005, -0.0079, -0.0103], + ..., + [-0.0171, -0.0022, -0.0138, ..., 0.0246, -0.0203, -0.0171], + [ 0.0086, 0.0179, -0.0107, ..., -0.0160, -0.0177, -0.0097], + [ 0.0428, -0.0029, -0.0069, ..., -0.0147, 0.0129, 0.0242]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.2034, -0.6177, -0.2632, ..., -0.2834, -0.4905, -0.3958], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0127, -0.0107, 0.0178, ..., -0.0069, -0.0052, 0.0050], + [ 0.0019, -0.0100, -0.0021, ..., 0.0078, 0.0061, -0.0132], + [-0.0086, -0.0003, 0.0180, ..., 0.0017, 0.0049, 0.0218], + ..., + [-0.0206, -0.0111, -0.0025, ..., -0.0035, 0.0097, 0.0248], + [ 0.0121, -0.0078, 0.0101, ..., -0.0093, 0.0092, -0.0375], + [ 0.0031, -0.0039, 0.0001, ..., -0.0069, 0.0013, 0.0023]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.0218, -0.1331, -0.1234, ..., -0.1169, 0.0630, 0.0916], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.6224, 1.6137, 1.6369, ..., 1.4513, 1.7169, 1.8505], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.0204, -0.0891, 0.0739, ..., 0.0297, 0.1517, -0.2596], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([0.9374, 1.0217, 0.9349, ..., 0.8221, 1.0595, 1.0507], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([-0.0061, 0.1510, -0.0549, ..., 0.2748, 0.0765, 0.0091], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0016, 0.0020, 0.0002, ..., -0.0013, 0.0008, 0.0015], + [ 0.0042, 0.0029, 0.0002, ..., 0.0010, 0.0015, -0.0012], + [ 0.0018, 0.0007, -0.0012, ..., -0.0029, -0.0009, 0.0026], + ..., + [ 0.0216, 0.0055, -0.0101, ..., -0.0065, -0.0029, 0.0037], + [ 0.0188, 0.0073, -0.0077, ..., -0.0025, -0.0009, 0.0057], + [ 0.0330, 0.0281, 0.0289, ..., 0.0160, 0.0102, -0.0310]], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0109, 0.0096, -0.0035, ..., -0.0010, 0.0115, -0.0039], + [-0.0054, -0.0049, 0.0055, ..., 0.0239, 0.0171, -0.0071], + [ 0.0032, 0.0101, -0.0155, ..., 0.0070, -0.0119, -0.0098], + ..., + [-0.0112, 0.0009, 0.0023, ..., -0.0169, -0.0096, -0.0147], + [ 0.0080, 0.0086, 0.0201, ..., -0.0108, -0.0191, 0.0043], + [-0.0168, -0.0018, -0.0156, ..., 0.0095, 0.0383, 0.0007]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0139, 0.0147, -0.0089, ..., -0.0349, -0.0042, -0.0188], + [-0.0586, -0.0059, -0.0179, ..., 0.0012, -0.0068, 0.0254], + [-0.0211, -0.0321, 0.0308, ..., -0.0189, 0.0091, 0.0066], + ..., + [-0.0217, -0.0089, -0.0143, ..., -0.0153, 0.0053, 0.0016], + [-0.0086, -0.0083, -0.0049, ..., 0.0208, -0.0048, -0.0041], + [-0.0087, -0.0024, 0.0105, ..., -0.0037, -0.0148, 0.0030]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.2406, 0.1490, 0.4639, ..., -0.0241, 0.0349, -0.0144], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 3.2120e-03, 5.4474e-03, -1.3733e-02, ..., -8.3351e-04, + -6.3782e-03, 1.3786e-02], + [-9.3231e-03, 9.2888e-04, -1.3893e-02, ..., -1.1345e-02, + 1.7748e-03, -8.9569e-03], + [ 5.6648e-04, 1.0345e-02, 8.1718e-05, ..., 1.3893e-02, + 7.3791e-05, 6.1369e-04], + ..., + [-3.0212e-02, 3.7193e-03, 1.2009e-02, ..., 7.0229e-03, + 8.0566e-03, 1.4572e-02], + [ 6.4421e-04, -1.0941e-02, -6.3133e-03, ..., 5.6953e-03, + -7.6637e-03, -2.9297e-03], + [-4.3526e-03, 4.7607e-03, -6.6528e-03, ..., 7.3853e-03, + 4.3716e-03, 7.4348e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-6.8054e-02, -3.2990e-02, 4.1809e-02, -1.0217e-01, -5.4932e-03, + 4.2877e-03, -3.8681e-03, -4.8370e-02, -2.2751e-02, -2.1248e-03, + 1.4526e-02, -3.3607e-03, 4.7058e-02, 1.4565e-02, -6.0730e-02, + -1.1035e-01, -2.3251e-03, 2.5635e-02, -6.1523e-02, -1.0469e+00, + -3.0869e-02, 6.7078e-02, -1.4503e-02, 2.6855e-02, -7.4646e-02, + 9.9121e-02, 1.1932e-02, -3.7598e-02, -2.9980e-01, 7.1533e-02, + -2.9648e-02, -1.3227e-03, 2.5940e-02, 6.6650e-02, 5.8899e-02, + -2.0615e-02, -3.5889e-02, 1.6830e-02, 1.0229e-01, 5.3040e-02, + 3.8280e-03, -2.3972e-02, 1.2512e-01, -4.3579e-02, 4.3335e-02, + 2.9175e-02, -2.9160e-02, -3.9093e-02, -2.2247e-02, 9.2850e-03, + -9.8511e-02, 3.3478e-02, -4.2023e-02, -3.2043e-02, -4.7394e-02, + 1.1938e-01, 5.7709e-02, 1.2903e-01, -2.9327e-02, -1.1314e-02, + 7.3090e-03, 3.3844e-02, 2.8290e-02, -1.6266e-02, 7.0740e-02, + -7.3486e-02, 5.4413e-02, -6.0120e-03, 5.1651e-03, -1.4600e-01, + -3.6896e-02, 2.1088e-02, 3.6914e-01, 9.0393e-02, -5.8517e-03, + 8.8318e-02, -8.3847e-03, 9.6512e-03, -3.3783e-02, -4.0710e-02, + 3.9703e-02, -2.7776e-04, -3.7262e-02, 8.9539e-02, -7.3853e-02, + -3.6743e-02, 1.1426e-01, 5.7335e-03, 6.6589e-02, 3.0502e-02, + 2.4170e-02, 6.7017e-02, -4.7363e-02, 1.1696e-02, -5.0568e-02, + 3.9001e-02, 1.2695e-02, -3.1647e-02, -4.1016e-02, -6.2683e-02, + -2.7084e-02, -2.3511e-01, 2.4002e-02, 1.0413e-01, 1.2520e-02, + 1.4908e-02, -8.5693e-02, -6.4575e-02, -2.4414e-02, -4.9408e-02, + 3.0045e-02, 2.5436e-02, -4.7333e-02, -3.4576e-02, -2.5772e-02, + 2.5345e-02, 7.3669e-02, 2.6398e-02, -1.2976e-01, 5.1544e-02, + 6.9199e-03, -6.0028e-02, -8.6792e-02, 1.3252e-02, 1.9196e-02, + -1.3283e-02, 1.0910e-02, 3.8025e-02, 7.4120e-03, -2.3865e-02, + -3.4882e-02, 4.7731e-04, -7.3059e-02, -1.1017e-02, -5.8685e-02, + -2.5238e-02, -2.3773e-02, 5.0201e-02, -2.6428e-02, -5.1361e-02, + -7.4219e-02, 4.5624e-02, 5.3192e-02, 1.3208e-01, 4.1931e-02, + 1.5083e-02, -1.1676e-01, 7.9895e-02, 6.4209e-02, 1.0178e-02, + 6.6681e-03, 8.0490e-03, -2.8870e-02, -6.2790e-03, -4.5357e-03, + -7.2266e-02, -6.2744e-02, -4.0955e-02, 1.5533e-02, -2.6749e-02, + 2.5845e-03, -8.1787e-02, -1.1185e-02, 1.2634e-01, -2.3071e-02, + -2.1301e-02, 5.0415e-02, 1.6006e-02, 1.5850e-03, -7.7362e-03, + -2.8809e-02, -1.2871e-02, -1.6708e-02, -1.0777e-03, 2.6367e-02, + -7.3395e-03, -1.2238e-02, 2.3804e-02, -1.8433e-02, 5.7640e-03, + -3.2379e-02, -2.2598e-02, 1.7105e-02, 2.0096e-02, -6.7871e-02, + 3.6926e-02, -3.5248e-02, 1.6699e-01, 4.4495e-02, 1.0643e-02, + 6.1829e-02, -5.8960e-02, -2.0401e-02, 1.4259e-02, 1.8372e-02, + 1.3344e-02, 1.5945e-02, 1.5259e-02, 2.3511e-01, 5.3436e-02, + -2.8702e-02, -3.5767e-02, -7.1533e-02, 2.8320e-02, -3.3447e-02, + -4.7516e-02, -2.3035e-01, -9.4757e-03, 1.0022e-01, -4.8004e-02, + 4.8248e-02, 5.3329e-03, -1.3863e-02, 2.1835e-02, -1.0999e-01, + 4.5776e-02, 6.6772e-02, 1.4633e-02, -7.9956e-02, -2.9129e-02, + 7.8003e-02, 8.9050e-02, 1.2866e-01, 2.3392e-02, 3.8666e-02, + -1.2779e-02, 1.0010e-01, 5.1361e-02, 2.8229e-02, -2.4048e-02, + -8.2031e-02, 2.1572e-03, 1.1932e-02, 4.0558e-02, 1.6785e-02, + -5.2948e-02, 1.3023e-02, -3.5431e-02, 6.7215e-03, 5.4291e-02, + 1.8219e-02, 6.2439e-02, 9.8724e-03, 1.6693e-02, 3.9520e-02, + 1.7761e-02, -6.9952e-04, -7.2388e-02, -2.2934e-02, -3.5400e-02, + -5.9448e-02, -6.7566e-02, 1.5945e-02, -4.6814e-02, 1.3969e-02, + -1.1818e-02, -3.3112e-02, 1.5488e-03, 8.3618e-02, 2.3468e-02, + 2.1317e-02, -1.1615e-01, 7.6752e-03, 1.2589e-02, 2.4185e-02, + 4.6021e-02, 7.3662e-03, 4.4403e-02, -4.4785e-03, -5.6610e-03, + 4.7180e-02, 8.5144e-03, 2.9205e-02, -3.5370e-02, 6.6956e-02, + -5.9204e-03, 1.6235e-02, 5.1819e-02, -3.0167e-02, -4.4739e-02, + -3.5229e-03, -1.7444e-01, 1.3969e-02, 9.8343e-03, 1.5022e-02, + 2.7435e-02, 1.0309e-01, 6.2141e-03, 6.8848e-02, -3.3936e-02, + -8.7036e-02, -4.8737e-02, -6.1218e-02, -3.6224e-02, -5.8651e-04, + -2.9316e-03, 2.5574e-02, 4.9934e-03, -6.0394e-02, -1.1604e-02, + 6.9641e-02, -2.2621e-03, -3.5405e-04, 9.9121e-02, -5.4199e-02, + 5.6976e-02, 7.6782e-02, 3.0914e-02, 5.4718e-02, 8.3374e-02, + -5.9776e-03, 1.4679e-02, -8.5449e-02, -2.2125e-02, 1.2827e-03, + 2.2003e-02, -1.9577e-02, 1.0840e-01, 1.0445e-02, -3.1006e-02, + -1.0608e-01, -2.5063e-03, -3.9398e-02, 4.1473e-02, 9.4531e-01, + -1.7480e-01, 2.6947e-02, 9.5459e-02, -9.0866e-03, -1.5762e-02, + 9.8022e-02, 4.6143e-02, 2.5925e-02, -1.1609e-01, -2.6764e-02, + -3.0731e-02, -6.2469e-02, -2.6154e-02, 1.9211e-02, -1.2093e-02, + 2.5696e-02, 2.4597e-02, 3.1036e-02, -3.3356e-02, -3.4210e-02, + -6.2656e-04, 1.9779e-03, 3.5645e-02, 1.4175e-02, 6.9763e-02, + 3.1395e-03, 1.4519e-02, -7.8506e-03, 1.6876e-02, 1.1185e-02, + -2.8137e-02, -8.9233e-02, 7.1899e-02, 9.4528e-03, 1.0254e-02, + -2.3453e-02, -4.9194e-02, 5.1880e-02, -3.7750e-02, -2.7008e-03, + -2.6794e-02, 3.9001e-02, 2.9114e-02, 9.4360e-02, -3.6469e-02, + 8.0322e-02, -2.8580e-02, -7.2327e-02, 7.2632e-02, 6.1340e-02, + -9.0576e-02, -5.8823e-03, 3.3722e-02, -1.2524e-01, -2.5284e-02, + -5.0812e-03, -5.8228e-02, -1.0323e-02, -5.1971e-02, 1.4854e-02, + 6.3660e-02, -2.4357e-03, 6.1676e-02, 1.5701e-02, -6.9763e-02, + -5.1918e-03, 1.1115e-01, 9.1370e-02, -3.6392e-03, -5.7648e-02, + 3.8075e-04, 1.0559e-02, 6.3477e-02, -3.2623e-02, -3.0762e-02, + -5.0697e-03, -5.0171e-02, 7.5806e-02, 3.7018e-02, 1.3802e-02, + 6.2317e-02, 7.5317e-02, 6.1707e-02, 9.2346e-02, -1.6541e-02, + -4.5349e-02, -1.5888e-03, 8.4412e-02, -1.1151e-01, 1.6040e-01, + -4.4531e-01, -2.8351e-02, 7.9498e-03, 6.2927e-02, 3.2043e-02, + 6.7627e-02, 6.7810e-02, -2.2354e-02, -6.8848e-02, -2.7359e-02, + -1.2466e-02, -1.7288e-02, 3.1219e-02, -1.2909e-02, -7.4768e-02, + -8.1726e-02, 6.1676e-02, -2.0020e-02, -3.9597e-03, 1.9165e-01, + -5.9296e-02, 1.4763e-02, 2.3895e-02, 9.0332e-03, 1.3268e-02, + -2.5528e-02, 3.5217e-02, -2.2583e-02, 1.4984e-02, 1.0956e-02, + -3.8223e-03, -3.0579e-02, 2.8114e-03, -5.1056e-02, 8.6426e-02, + -3.9795e-02, -1.8005e-02, -9.5886e-02, 1.1017e-02, -6.2225e-02, + -3.1982e-02, 5.2765e-02, 3.7811e-02, 3.0155e-03, -3.3447e-02, + 2.3098e-03, 3.8300e-02, -7.0724e-03, 2.4094e-02, 3.9856e-02, + 5.1003e-03, -2.6169e-02, 1.3672e-02, 1.8967e-02, -7.4829e-02, + -2.9785e-02, -1.1353e-01, 8.1787e-03, -1.0760e-01, 1.2680e-02, + -9.7733e-03, -7.5684e-02, 4.1504e-03, 1.5175e-02, -5.3925e-02, + -6.9885e-03, -8.6731e-02, -1.7380e-02, -4.2175e-02, -3.3630e-02, + -7.9041e-02, -9.5886e-02, 9.3384e-02, -1.5327e-02, 1.0315e-02, + 7.7896e-03, -2.7298e-02, -3.5278e-02, -2.7573e-02, -6.9214e-02, + 1.7685e-02, 7.1106e-03, 1.8295e-02, 3.5522e-02, 3.6438e-02, + 3.7842e-02, -8.8959e-03, 3.0457e-02, -2.2018e-02, 6.5918e-02, + 1.4091e-02, -8.5144e-02, 3.9093e-02, -3.1250e-02, 3.2898e-02, + 4.5349e-02, -4.8981e-03, -2.3346e-02, -1.4252e-02, -3.4973e-02, + -7.4959e-03, 1.8967e-02, 8.8043e-03, 1.5701e-02, -3.9612e-02, + -8.4610e-03, -7.1411e-02, -1.5762e-02, 1.0944e-01, -4.9042e-02, + 1.1520e-02, -5.0964e-02, -5.5511e-02, 7.5245e-04, -2.2736e-02, + 1.3863e-02, -9.8938e-02, -5.9631e-02, -1.8616e-02, 1.4084e-02, + 2.2812e-02, 3.1342e-02, 2.8580e-02, 2.3155e-03, -5.0201e-02, + 2.8488e-02, 3.7354e-02, 2.1378e-02, -3.1708e-02, 9.5703e-02, + -7.6050e-02, 5.0781e-02, 7.0915e-03, 5.2368e-02, -2.5894e-02, + -2.5925e-02, -3.4943e-02, 2.7786e-02, 1.8723e-02, 5.9296e-02, + 1.9211e-02, 4.8889e-02, 6.6772e-02, -4.4952e-02, -2.7298e-02, + 3.8567e-03, -1.3252e-02, -6.6467e-02, 1.8066e-02, -3.2288e-02, + 5.1239e-02, -5.8075e-02, -5.9509e-02, 1.2122e-01, 2.5482e-02, + 9.1003e-02, 6.6467e-02, -3.9154e-02, -2.9022e-02, -4.0100e-02, + 3.3295e-02, -3.6469e-02, -5.4413e-02, 4.5258e-02, -4.5929e-02, + -1.8219e-02, -6.0638e-02, 1.9638e-02, 5.7602e-03, -3.3234e-02, + -2.8839e-03, 3.9215e-02, -4.5990e-02, 4.1229e-02, 6.1951e-03, + 5.2734e-02, 4.0497e-02, -7.1594e-02, -6.1554e-02, -4.0253e-02, + -6.9199e-03, 2.4307e-02, 4.0863e-02, -9.3307e-03, -4.0527e-02, + 3.4088e-02, 1.7509e-03, 2.4307e-02, -4.4983e-02, 4.6875e-02, + -7.0496e-02, 8.0872e-02, 3.1891e-02, -3.9825e-02, 3.0853e-02, + 6.2195e-02, 6.9885e-02, 7.8430e-02, 5.4398e-03, -7.7209e-02, + -2.5879e-02, 5.3528e-02, 1.0399e-02, 2.3384e-03, 1.1877e-01, + 7.5500e-02, -1.1719e-02, -3.9795e-02, -3.1143e-02, -1.6998e-02, + -3.6163e-02, -2.7847e-02, 5.0812e-03, -2.8656e-02, 3.3203e-02, + 6.3049e-02, -2.5665e-02, -5.2490e-02, 4.9500e-02, 5.5054e-02, + -2.0462e-02, 3.9825e-02, 5.5908e-02, 3.5583e-02, 6.1066e-02, + 1.4175e-02, -5.0751e-02, -6.8848e-02, -1.0551e-02, -5.2551e-02, + 6.3538e-02, 4.2419e-02, 1.3580e-03, -6.4880e-02, 2.7115e-02, + 2.2491e-02, -1.0284e-01, -5.1208e-02, -9.2468e-03, 3.0960e-02, + 4.0161e-02, 4.1107e-02, 4.2908e-02, -4.8920e-02, -6.6757e-03, + -1.2726e-02, 5.1849e-02, 3.6041e-02, -2.0264e-02, -3.6285e-02, + -2.1423e-02, 4.6692e-03, -1.8871e-04, -3.7018e-02, 4.8615e-02, + -3.8452e-02, -2.3209e-02, 8.7585e-02, -3.8757e-03, 4.6265e-02, + -3.4790e-03, -1.3857e-03, -3.9612e-02, 7.3608e-02, 3.4370e-03, + 3.3798e-03, 3.3374e-01, -1.3329e-02, -1.2596e-02, 1.2451e-02, + 4.9706e-03, -7.4585e-02, -5.8105e-02, 3.9215e-03, 5.8823e-03, + 4.3610e-02, 6.9275e-02, -5.4535e-02, -2.2919e-02, 3.4271e-02, + 8.8013e-02, 1.5236e-02, 1.9028e-02, 2.9572e-02, -4.9362e-03, + -1.6998e-02, 2.4063e-02, -6.8359e-02, -4.0710e-02, -8.0750e-02, + -2.1484e-02, -4.3976e-02, -5.2521e-02, 1.6144e-02, -1.3771e-02, + -7.4615e-03, -8.8318e-02, 3.7750e-02, 2.3937e-03, 4.7668e-02, + 4.7363e-02, 3.9520e-02, -2.2736e-02, 1.9348e-02, 2.7359e-02, + 1.1086e-02, -7.9163e-02, -3.7262e-02, -3.1525e-02, -4.9591e-02, + -5.1056e-02, -3.1830e-02, -5.1575e-02, 6.5491e-02, 2.6031e-02, + 1.2321e-03, 5.7800e-02, 1.0864e-02, 4.7241e-02, 4.1290e-02, + -6.8665e-02, -3.8471e-03, 5.8838e-02, -9.4986e-03, 2.6894e-03, + 5.6854e-02, 7.0862e-02, -3.1311e-02, 1.4397e-02, -7.6065e-03, + -5.2429e-02, -3.7018e-02, -1.4549e-02, 2.1553e-03, 1.0292e-02, + -2.3651e-02, -5.5809e-03, 5.0774e-03, -4.6051e-02, 1.0658e-02, + 8.3847e-03, 2.6440e-01, -9.6741e-02, 8.6365e-02, -1.8860e-02, + 1.4420e-02, 1.4282e-02, -1.6235e-02, -3.0167e-02, 4.7363e-02, + -5.3741e-02, 3.7170e-02, 1.0132e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.8401, 1.6507, 1.7948, 1.7800, 1.7653, 1.7676, 1.8600, 1.8236, 1.7518, + 1.7366, 1.7352, 1.7177, 1.7873, 1.7211, 1.6886, 1.7202, 1.7312, 1.7573, + 1.6900, 1.7321, 1.8321, 1.7103, 1.7793, 1.7726, 1.8100, 1.7619, 1.7042, + 1.7339, 0.9977, 1.7922, 1.6763, 1.8729, 1.7377, 1.7911, 1.7570, 3.3400, + 1.7528, 1.8010, 1.7648, 1.7912, 1.8502, 1.7852, 1.8444, 1.7257, 1.8150, + 1.8192, 1.7501, 1.7457, 1.6584, 1.8334, 1.7475, 1.7645, 1.7709, 1.7155, + 1.7535, 1.7625, 1.7556, 1.5602, 1.7653, 1.7121, 1.8275, 1.8177, 1.7712, + 1.8210, 1.7243, 1.6933, 1.7744, 1.6863, 1.8974, 1.7603, 1.5926, 1.7837, + 1.7453, 1.6028, 1.7849, 1.7217, 1.8000, 1.7972, 1.6751, 1.7951, 1.7870, + 1.7679, 1.7635, 1.7367, 1.7646, 1.7491, 1.8091, 1.8008, 1.7609, 1.6659, + 1.7907, 1.7056, 1.8515, 1.8243, 1.7269, 1.7128, 1.7645, 1.8122, 1.8631, + 1.7797, 1.7646, 0.7897, 1.6948, 1.7812, 1.6742, 1.7120, 1.6592, 1.7276, + 1.7695, 1.7760, 1.7505, 1.7188, 1.7696, 1.7601, 1.8497, 1.7613, 1.7714, + 1.7569, 1.6808, 1.7755, 1.7906, 1.7683, 1.7978, 1.7816, 1.7833, 1.7623, + 1.7604, 1.7623, 1.6551, 1.7792, 1.7867, 1.7288, 1.7727, 1.6616, 1.8281, + 1.8197, 1.7334, 1.7790, 1.7913, 1.7861, 1.7655, 1.8017, 1.7967, 1.8031, + 1.7339, 1.7511, 1.7536, 1.7109, 1.8131, 1.7464, 1.7341, 1.8073, 1.8755, + 1.7642, 1.7574, 1.7459, 1.7882, 1.6927, 1.7318, 1.7242, 1.7431, 1.7230, + 1.7328, 1.7952, 1.7462, 1.7525, 1.7464, 1.7703, 1.7193, 1.8042, 1.8187, + 1.7647, 1.7742, 1.6977, 1.7334, 1.7333, 1.7343, 1.7863, 1.6834, 1.7995, + 1.6882, 1.7592, 1.7986, 1.7490, 1.7527, 1.7209, 1.7338, 1.7539, 1.8268, + 1.8150, 1.7164, 1.7983, 1.6627, 1.7415, 1.6635, 1.6967, 1.7464, 1.8372, + 1.7354, 1.7487, 1.7584, 1.7058, 1.6908, 1.7519, 1.7761, 1.5736, 1.2228, + 1.7728, 1.7046, 1.7144, 1.7717, 1.8086, 1.7980, 1.7459, 1.8135, 1.8093, + 1.7349, 1.7571, 1.7937, 1.7435, 1.7194, 1.7525, 1.7763, 1.7040, 1.7347, + 1.8036, 1.7626, 1.7879, 1.7484, 1.7946, 1.7265, 1.7901, 1.7673, 1.7644, + 1.7181, 1.7174, 1.8284, 1.7356, 1.8238, 1.6911, 1.7599, 1.7935, 1.7056, + 1.7744, 1.8074, 1.6990, 1.7394, 0.8243, 1.6952, 1.8240, 1.8201, 1.7702, + 1.8529, 1.4891, 1.8102, 1.7815, 1.7122, 1.7379, 1.7450, 1.7833, 1.7664, + 1.6962, 1.7362, 1.6765, 1.7450, 1.6934, 1.7679, 1.7699, 1.7879, 1.7363, + 1.7685, 1.6439, 1.7475, 1.7023, 1.7775, 1.8394, 1.7594, 1.7362, 1.7523, + 1.7827, 1.7071, 1.8144, 1.6561, 1.7740, 1.7412, 1.6781, 1.7144, 1.7361, + 1.7804, 1.8236, 1.9153, 1.7353, 1.7440, 1.7159, 1.7715, 1.7451, 1.8004, + 1.8078, 1.7116, 1.7427, 1.8237, 1.8078, 1.7338, 1.6746, 1.7068, 1.8114, + 1.7397, 1.7601, 1.7596, 1.6326, 1.5706, 1.8009, 1.6958, 1.7471, 1.7363, + 1.8414, 1.7399, 1.7021, 1.6456, 1.7571, 1.6864, 1.7856, 1.8161, 1.7776, + 1.7265, 1.6645, 1.5918, 1.7744, 1.7140, 1.6622, 1.7617, 1.8093, 1.8087, + 1.7346, 1.7258, 1.6232, 1.7583, 1.8145, 1.7119, 1.7761, 1.7462, 1.6483, + 1.7339, 1.7611, 1.6985, 1.8347, 1.6987, 1.7323, 1.7389, 1.7299, 1.7391, + 1.7672, 1.7488, 1.7839, 1.7665, 1.7631, 1.7777, 1.6339, 1.6938, 1.8157, + 1.7464, 1.6534, 1.7763, 1.7792, 1.7784, 1.7034, 1.6523, 1.7126, 1.7317, + 1.7333, 1.7193, 1.7667, 1.7845, 1.7588, 1.7258, 1.7824, 1.7037, 1.6128, + 1.7279, 1.7288, 1.7944, 1.6676, 1.7848, 1.7242, 1.7464, 1.7064, 1.7405, + 1.7550, 1.7883, 1.6747, 1.8077, 0.6920, 1.7930, 1.6939, 1.5853, 1.7549, + 1.7464, 1.7373, 1.7662, 1.7668, 1.7981, 1.7842, 1.7398, 1.8111, 1.6967, + 1.6228, 1.7450, 1.6924, 1.7361, 1.8465, 1.6907, 1.7601, 1.7693, 1.7728, + 1.7786, 1.9575, 1.7447, 1.7650, 1.7740, 1.7779, 1.7729, 1.6935, 1.8035, + 1.8343, 1.7179, 1.8117, 1.7885, 1.7588, 1.6829, 1.7721, 1.7336, 1.6843, + 1.7179, 1.6835, 1.7717, 1.6974, 1.7616, 1.7698, 1.7382, 1.7673, 1.7327, + 1.7272, 1.7410, 1.8681, 1.7535, 1.6609, 1.7775, 1.7975, 1.6045, 1.6706, + 1.6781, 1.7280, 1.7294, 1.7144, 1.7807, 1.7379, 1.6965, 1.6218, 1.7630, + 1.7662, 1.7186, 1.6750, 1.7716, 1.8142, 1.7681, 1.7514, 1.7375, 1.7470, + 1.6814, 1.7563, 1.7806, 1.6861, 1.7027, 1.7264, 1.7811, 1.7573, 1.6235, + 1.7568, 1.7646, 1.7491, 1.7098, 1.7414, 1.7587, 1.7533, 1.7227, 1.6833, + 1.7702, 1.7296, 1.7516, 1.7460, 1.6894, 1.7191, 1.7573, 1.7020, 1.7415, + 1.7898, 1.7779, 1.7721, 1.6386, 1.7686, 1.7720, 0.9429, 1.7820, 1.7034, + 1.7651, 1.7667, 1.7587, 1.8170, 1.6245, 1.7726, 1.7918, 1.7986, 1.6991, + 1.7649, 1.6397, 1.7385, 1.6653, 1.7968, 1.6853, 1.6544, 1.7029, 1.7351, + 0.9336, 1.7428, 1.7244, 1.7644, 1.7481, 1.7767, 1.8191, 1.7117, 1.7716, + 1.5829, 2.0531, 1.7985, 1.7173, 1.7007, 1.7612, 1.7118, 1.7456, 1.7821, + 1.7097, 1.7723, 1.5083, 1.5076, 1.6904, 1.7302, 1.8118, 1.7102, 1.7321, + 1.7028, 1.7465, 1.8067, 1.8033, 1.6827, 1.3774, 1.7701, 1.7287, 1.6279, + 1.7535, 1.6996, 1.7529, 1.7969, 1.7698, 1.7735, 1.7865, 1.7989, 1.8025, + 1.7413, 1.7794, 1.7462, 1.8258, 1.5884, 1.7895, 1.7249, 1.7411, 1.7231, + 1.7466, 1.7880, 1.8221, 1.7345, 1.5172, 1.5861, 1.7265, 1.1932, 1.8105, + 1.7271, 1.7999, 1.7737, 1.7159, 1.7391, 1.5650, 1.0842, 1.6609, 1.7516, + 1.6644, 1.6834, 1.7752, 1.7820, 1.7207, 1.7830, 1.7473, 1.6890, 1.7324, + 1.7598, 1.8085, 1.7870, 1.6808, 1.7300, 1.6913, 1.8049, 1.6898, 1.7118, + 1.5790, 1.7330, 1.7928, 1.7111, 1.7586, 1.7335, 1.7591, 0.8876, 1.7496, + 1.6044, 1.8343, 1.7631, 1.8049, 1.7443, 1.6705, 1.8017, 1.7247, 1.7761, + 1.7494, 1.6933, 1.7185, 1.7567, 1.7555, 1.3987, 1.8214, 1.7133, 1.7056, + 1.7376, 1.6929, 1.6529, 1.7626, 1.7944, 1.7649, 1.7880, 1.8361, 1.7461, + 1.6423, 1.7435, 1.7214, 1.7357, 1.7379, 1.7925, 0.8717, 1.7399, 1.7446, + 1.7784, 1.6280, 1.7529, 1.8482, 1.8067, 1.7008, 1.7553, 1.7262, 1.6551, + 1.8407, 1.7429, 1.7241, 1.7173, 1.6979, 1.6624, 1.7336, 1.7440, 1.7721, + 1.7816, 1.8321, 1.8003, 1.6417, 1.7096, 1.7309, 1.7556, 1.7408, 1.7249, + 1.7821, 1.8816, 1.7762, 1.7795, 1.7538, 1.7687, 1.8382, 1.7207, 1.7429, + 1.7809, 1.7654, 1.7727, 1.7137, 1.7231, 1.7254, 1.7581, 1.9079, 1.7838, + 1.8562, 1.7747, 1.7307, 1.7510, 1.6652, 1.7962, 1.8084, 1.6713, 1.7405, + 1.7645, 1.7221, 1.7829, 1.5737, 1.6718, 1.8124, 1.7877, 1.7250, 1.8048, + 1.7633, 1.6581, 1.7901, 1.8515, 1.7576, 1.7432, 1.7884, 1.6867, 1.6636, + 1.7865, 1.7576, 1.7160, 1.7054, 1.7541, 1.7233, 1.7555, 1.7447, 1.7340, + 1.9150, 1.5423, 1.6653, 1.7190, 1.7784, 1.7623, 1.7703, 1.7078, 1.7105, + 1.7365, 1.8146, 1.7623, 1.7655, 1.6682, 1.8041, 1.7280, 1.7973, 1.7215, + 0.8671, 1.7118, 1.7316, 1.8002, 1.7620, 1.7976, 1.7504, 1.7343, 1.7921, + 1.7943, 1.8092, 1.7626], device='cuda:1', requires_grad=True)Parameter containing: +tensor([-6.3961e-02, -1.8907e-01, -6.6267e-02, 1.1582e-01, -1.9398e-02, + -7.6774e-02, -9.2488e-02, 1.5014e-01, -1.5402e-01, 5.3630e-02, + -2.6058e-01, 1.6292e-01, -6.2387e-02, -2.1546e-01, -2.1034e-02, + 1.8510e-01, 2.2416e-01, -8.3732e-02, 6.8939e-02, -1.7281e-01, + 6.4749e-02, -1.6842e-02, -1.8385e-02, -2.3905e-01, -6.9280e-02, + -4.7777e-02, 2.9941e-01, 1.0106e-01, -7.4397e-01, -5.8690e-02, + -1.4871e-01, 1.6635e-01, 1.8614e-01, 5.2980e-02, 1.5625e-03, + 1.1102e+00, 1.6345e-01, 2.9891e-01, 5.7778e-02, 8.6953e-02, + 1.0396e-02, 3.3394e-02, -1.1215e-03, -2.0671e-01, 3.9431e-02, + 3.0618e-01, -9.5164e-02, -1.0811e-01, 3.2754e-01, 2.4226e-01, + -5.1117e-02, 7.2014e-02, 5.2677e-02, 5.2326e-02, 4.5423e-01, + 9.4081e-03, 1.0807e-01, -3.1810e-01, 1.6348e-01, -1.6129e-01, + 6.9611e-02, 9.1917e-02, -1.9035e-02, 1.1163e-01, -2.0498e-01, + 5.3379e-02, 2.6206e-01, -5.0303e-01, 1.6840e-02, 2.6704e-01, + 2.5346e-01, 9.7269e-02, 1.9748e-02, 7.3897e-02, 1.1860e-01, + 4.0665e-02, 1.0623e-03, -7.4162e-02, 7.3179e-02, 3.3942e-01, + -9.5987e-03, 8.2016e-03, -1.7550e-01, 3.1015e-01, -2.2220e-02, + 1.7016e-02, -2.7397e-02, 1.7581e-01, 5.6219e-03, 2.9484e-01, + -1.8801e-01, 4.0724e-01, -9.6560e-02, 5.7297e-02, -9.1629e-03, + -4.6879e-01, 3.3328e-02, -9.2649e-02, -9.6031e-03, 1.9044e-01, + 1.5786e-01, 9.0443e-01, -1.8836e-01, 1.7412e-01, 5.5520e-02, + -1.3535e-01, -4.3605e-02, 7.9157e-02, 1.8799e-01, 6.7221e-02, + -7.6727e-03, -1.9825e-02, -1.3374e-01, 2.9653e-02, -1.9720e-01, + -6.4861e-02, 5.6295e-02, 1.8078e-01, 3.5850e-01, -2.8073e-02, + 1.3744e-01, -3.7472e-02, -1.4722e-01, 3.6151e-02, 2.5569e-02, + -2.2046e-02, -1.2437e-01, 1.0499e-01, 1.7835e-01, 7.1363e-02, + 4.1597e-03, -1.4217e-01, -2.4751e-01, 1.5900e-01, -7.1130e-02, + -1.9061e-01, 2.4493e-01, 3.5116e-02, 1.5471e-02, 2.2332e-02, + 8.9153e-02, 3.0060e-01, 3.8098e-02, -6.2623e-02, 2.9371e-01, + 1.8181e-01, 5.7994e-02, 5.8664e-02, 2.6419e-02, -1.2824e-01, + -1.0993e-01, -4.0017e-02, -3.9351e-02, -1.4422e-01, 8.7471e-02, + 8.4085e-02, -1.9828e-01, -1.3173e-01, 1.4072e-01, 1.5565e-01, + 2.0243e-02, -4.6226e-02, 1.5923e-01, 4.5654e-02, -2.3367e-01, + 1.2978e-01, 2.1358e-01, -8.2719e-02, 1.0171e-01, -2.2423e-01, + 5.3051e-03, -4.1902e-02, 3.8611e-01, 1.2323e-01, -1.4924e-01, + 2.8467e-01, 1.9509e-01, -1.3619e-01, 2.4334e-01, -8.0153e-02, + -6.7516e-02, -6.7610e-02, -1.0731e-01, 2.4883e-02, 2.8200e-01, + 1.2155e-01, 3.6426e-01, 2.3474e-02, -2.8293e-01, 2.2408e-01, + 3.6100e-01, -1.0994e-01, 3.4722e-02, 7.4121e-02, 2.7931e-01, + -1.4990e-01, 2.5177e-01, -1.8813e-02, 8.3328e-02, 8.4020e-02, + -9.8448e-02, 2.7714e-01, 7.4243e-02, 8.0239e-02, 2.0202e-01, + 8.1008e-02, 4.3401e-01, 1.8625e-02, 8.4404e-02, 2.4402e-01, + -1.6691e-01, -8.8845e-02, -1.5939e-01, -2.0858e-01, 2.5397e-01, + 6.0879e-02, 2.0121e-02, -7.9232e-02, -1.6508e-02, -1.3059e-01, + -6.2701e-02, -1.7213e-01, 3.4699e-01, 1.5253e-01, -1.4908e-01, + 1.9412e-01, 1.3451e-01, 2.2582e-01, -9.5174e-02, 2.2037e-01, + 1.0803e-01, 5.3375e-02, 2.2560e-03, 2.7359e-02, -1.8865e-01, + 8.4389e-02, 5.1940e-02, 2.0583e-01, -1.5264e-01, -8.4608e-02, + 9.0049e-02, -8.0713e-02, 1.4706e-01, -1.6154e-02, -4.8605e-02, + 2.0150e-01, -1.8314e-01, 2.6463e-01, -3.1260e-01, 9.1237e-02, + 2.0335e-01, 1.1086e-01, -1.5599e-02, 3.6003e-01, 2.5739e-02, + -2.1332e-01, -7.0601e-03, -1.2192e-01, 1.9431e-01, 1.4852e-01, + 1.2345e-01, 1.8613e-01, 2.5473e-02, 3.4473e-02, 1.1012e-01, + 1.4641e-01, 2.2227e-02, -5.9297e-02, -1.3266e-01, 3.4497e-01, + -3.0586e-01, 7.9368e-01, 3.9487e-02, -1.4621e-01, 6.5543e-02, + 1.7141e-02, -2.0179e-01, 2.3884e-02, -4.0460e-02, -4.6599e-02, + 2.1172e-01, -8.7700e-02, -1.6130e-01, 6.9310e-02, -2.5059e-01, + 5.2468e-02, -2.3732e-01, 1.8512e-01, 7.2659e-02, 4.7407e-02, + -4.3530e-02, -1.4774e-03, 2.0449e-01, 2.7766e-02, -2.9549e-02, + -2.0292e-01, -1.0574e-02, -1.1197e-01, -3.3860e-01, 5.2191e-02, + -5.6128e-02, -2.4438e-01, 1.7825e-01, -3.4640e-01, -2.2049e-01, + 3.1959e-01, -1.0011e-01, -1.3569e-01, -1.9682e-01, -1.7635e-02, + 3.4345e-01, 1.9722e-02, 3.3036e-02, -3.2205e-01, 1.0287e-01, + -1.5752e-01, 1.2438e-01, 3.6095e-01, 1.3326e-01, 2.4184e-01, + 1.2926e-01, -2.5102e-02, -7.7222e-02, 2.3458e-01, -5.3531e-02, + 2.6737e-01, -1.2668e-02, -1.9242e-01, -2.9800e-01, 3.0770e-01, + 9.5187e-02, 1.2686e-01, -6.7057e-02, 9.6127e-02, -1.0121e-01, + -3.1650e-01, 2.8864e-01, 5.3428e-02, -1.3012e-01, -1.6185e-01, + 2.7272e-01, -1.0872e-01, -6.3737e-02, 1.7065e-01, -4.9168e-02, + -7.3678e-03, -3.3221e-01, -3.0027e-01, 1.6127e-01, -2.2458e-01, + 2.3008e-02, 2.1806e-02, 2.3908e-02, -1.7871e-02, 2.8359e-01, + 5.8141e-02, -3.5832e-02, 3.2671e-01, -1.0304e-01, 1.5364e-02, + -4.7144e-02, 2.1080e-01, -6.6814e-02, -1.3583e-02, 2.7550e-01, + -1.3174e-01, 3.0048e-01, -3.1258e-01, 9.8796e-02, 9.0674e-02, + 1.2171e-01, -2.1045e-01, 4.1959e-01, 1.4062e-01, 1.7522e-01, + -1.5909e-01, -3.8485e-01, 3.5587e-01, 6.1791e-02, 2.9728e-01, + -3.2739e-03, 2.4748e-01, -2.4907e-02, -5.3884e-01, 7.2989e-02, + 3.4384e-02, -1.1574e-01, -2.2263e-01, -1.0675e-01, 9.7501e-02, + 3.4671e-02, -1.4328e+00, -1.1062e-02, 1.9438e-01, 1.0939e-01, + -3.7655e-01, -1.6929e-01, 3.2262e-01, 1.3196e-01, 2.6441e-01, + 3.8306e-01, 6.4132e-02, 1.5814e-01, 1.8378e-02, -2.0566e-03, + 1.6024e-02, -4.8503e-02, -1.0331e-01, 1.5740e-01, -1.9693e-02, + -9.4622e-02, 3.1541e-02, -4.3777e-01, -3.6881e-02, -3.7918e-02, + 7.2006e-02, 7.6161e-02, -1.3037e-01, -1.9686e-01, -1.7425e-01, + 1.3822e-01, 1.5573e-01, 3.1976e-03, -3.8504e-02, -1.8306e-01, + 4.7375e-02, -4.3621e-02, -6.6590e-02, 4.0533e-02, 1.7594e-01, + -4.2811e-02, -8.4193e-02, 2.3910e-01, 1.7940e-01, 2.1909e-01, + -9.5213e-02, 8.6281e-03, 1.5524e-01, -6.5299e-02, -6.1806e-02, + 5.6195e-02, -2.5332e-01, 2.3617e-01, 9.1083e-02, 1.9428e-01, + 3.3102e-01, -8.5299e-02, -1.2100e-01, -4.5219e-01, -2.7028e-01, + 2.8839e-01, 2.4645e-01, -1.1720e-01, 3.4503e-01, 1.4733e-01, + 4.0482e-02, -4.2903e-02, -7.8322e-01, 3.0277e-02, -1.6751e-01, + 1.4465e-02, 3.2363e-01, 2.3789e-01, -1.0457e-01, 6.3565e-02, + 2.0327e-01, 4.7821e-02, -4.9042e-01, 7.9319e-02, 9.3845e-02, + -7.2455e-02, 1.6316e-02, -3.3306e-01, -3.0791e-02, -1.1575e-01, + -4.9676e-03, 4.2699e-01, -1.5469e-01, -1.4036e-01, 1.1827e-01, + -2.2014e-01, 2.5985e-01, 4.4695e-02, 2.5353e-01, -1.0868e-02, + -1.2368e-01, -8.1046e-02, -8.4880e-02, 2.9189e-01, 1.9647e-01, + -1.4869e-01, 2.1447e-02, -7.9529e-02, -5.2323e-02, -5.4333e-02, + 5.1668e-02, -8.1463e-02, -1.1637e-02, -4.8212e-01, -2.3931e-02, + 5.3116e-03, -1.9904e+00, 1.3696e-02, 1.9362e-01, -1.3809e-01, + -1.9681e-01, 4.8385e-02, 3.8303e-02, 5.6370e-01, 2.7663e-01, + 1.9251e-02, 2.1590e-01, 4.1623e-02, 2.3325e-01, -8.7494e-02, + -2.3375e-01, -6.6074e-02, -1.8052e-02, -2.0547e-01, -4.7173e-01, + 1.2608e-01, 2.6116e-02, -3.2520e-01, 3.5194e-01, -2.5267e-01, + -2.0281e-02, -2.0063e-02, -3.7879e-02, 1.0518e-01, 7.7329e-02, + 8.1078e-02, -7.6093e-02, -3.4030e-01, 7.6515e-02, 1.1958e-01, + 9.5325e-02, -1.0395e-01, -6.4384e-02, 2.3667e-01, -4.1264e-02, + 3.1444e-01, 8.5630e-02, -2.8794e-01, 6.7791e-01, 1.5426e-01, + -4.1703e-02, -1.1656e-02, -2.8440e-01, -1.0204e-01, 3.4792e-02, + 1.3765e-01, -6.2092e-02, 7.4738e-02, 2.0551e-01, 1.2253e+00, + -1.0391e-01, 1.1737e-01, -3.6290e-01, 1.1110e-01, 6.8894e-02, + 1.8224e-01, -9.1156e-02, -1.1810e-01, -3.2335e-02, 1.4059e-01, + -3.5351e-01, -1.7719e-01, -2.7841e-02, 1.7805e-01, -2.4260e-02, + 3.5481e-01, -1.5195e-01, -2.2427e-02, 2.1070e-01, 9.3560e-02, + 9.1329e-02, -5.9248e-02, 7.8027e-02, 7.1357e-02, 4.7972e-02, + 1.7722e-01, 7.8401e-01, 1.8207e-01, 1.1456e+00, 8.1968e-02, + 9.6185e-02, -8.7804e-02, 1.2846e-01, 1.7551e-01, 3.3738e-02, + 6.4693e-01, -1.2015e+00, 6.1865e-02, 9.1654e-02, 3.5655e-02, + 1.7562e-01, -6.8616e-02, 1.1576e-02, 1.3995e-01, 4.5674e-01, + 1.2882e-01, 1.1601e-01, -8.0007e-02, -8.9488e-02, 1.2392e-01, + 1.6591e-01, 3.0822e-03, 2.0161e-01, -2.3949e-01, -5.5038e-02, + 2.8561e-02, -1.4854e-02, -1.7609e-01, -1.6273e-01, -4.8114e-02, + 2.9822e-02, -1.2119e-01, 5.6389e-01, 1.4921e-02, -5.0786e-01, + 2.0637e-01, -3.2344e-01, 1.2871e-01, 4.8221e-02, 1.8008e-01, + -7.6118e-02, -1.9054e-01, -7.6769e-02, 1.0610e-01, 1.8803e-01, + 1.5988e-01, 2.3421e-01, -2.0432e-01, -2.6052e-03, -8.2586e-02, + 5.4137e-01, 5.5920e-02, -1.1482e-01, 1.9908e-01, 1.6705e-01, + -1.6597e-01, -3.2076e-01, 4.1043e-02, -2.0394e-01, 5.2273e-02, + 8.0126e-02, 4.4827e-02, 3.0543e-01, 3.2901e-01, -2.1709e-01, + 1.5603e-01, 8.3254e-02, 9.2027e-02, -3.0384e-02, -9.9128e-01, + -1.1573e-01, -1.3390e-01, -1.3821e-02, 4.8324e-01, 9.9974e-02, + 1.6134e-01, -1.5294e-02, -1.8115e-01, 3.7491e-02, -3.6812e-01, + 7.3616e-03, 1.5063e-01, -1.2603e-01, 1.6043e-01, -2.3366e-01, + -9.9087e-03, -1.3720e-01, -7.6003e-02, 3.6387e-01, 2.8363e-01, + 6.2078e-02, 4.2387e-02, 1.4645e-01, -7.1626e-01, 8.1430e-03, + 2.1640e-01, -1.5572e-01, -2.6465e-01, -1.9105e-02, 1.0089e-01, + 5.4242e-02, -3.5611e-02, -2.2510e-01, -1.3874e-01, -1.0381e-01, + 2.2531e-01, -5.0153e-03, -1.4030e-01, -2.1619e-02, 5.9003e-02, + -1.5007e-01, 1.0948e-01, -1.1252e-01, 1.0077e-03, 6.4135e-02, + -3.1705e-01, 1.3072e-01, -1.6811e-01, 5.1108e-02, 3.9745e-02, + 4.1817e-01, 3.0233e-01, -4.0726e-02, -8.3054e-02, -1.3071e-01, + 9.7965e-02, -5.9167e-02, -8.3690e-02, 7.1241e-02, 9.2697e-01, + -2.0566e-01, -8.5988e-02, 8.9921e-02, -2.9561e-03, -5.8907e-03, + -4.3587e-02, 3.7168e-01, -1.4915e-03, -8.9582e-02, -3.2550e-01, + -8.3132e-03, -1.0678e-02, -5.6811e-03, 6.3972e-01, 1.1402e-01, + 2.9503e-03, 7.1307e-02, 1.7079e-01, -1.1014e-01, 1.3895e-01, + 1.7041e-02, -3.6806e-02, 2.5082e-01, -2.0396e-02, 2.8433e-01, + 1.4611e-01, -6.2930e-02, -1.2271e-01, -1.3978e-01, 1.3857e-01, + 2.8862e-01, 1.7088e-01, 1.3886e-01, 4.9825e-02, -4.3962e-02, + 3.7045e-01, 2.8376e-01, 2.2356e-01, -2.3949e-01, 1.1278e-01, + 1.0264e-01, -6.9168e-01, -1.2107e-01, -2.2057e-01, 1.6424e-04, + -3.3768e-02, -4.2022e-03, -3.1471e-01, -2.8823e-01, -1.1201e-01, + 9.9413e-02, -6.2378e-02, -8.8212e-02], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[ 0.0402, 0.0049, 0.0031, ..., 0.0076, -0.0040, -0.0004], + [ 0.0320, -0.0247, 0.0270, ..., 0.0014, -0.0266, -0.0196], + [-0.0072, 0.0229, 0.0050, ..., -0.0068, -0.0446, -0.0313], + ..., + [ 0.0280, -0.0149, 0.0136, ..., 0.0182, -0.0120, -0.0161], + [ 0.0343, -0.0128, -0.0234, ..., 0.0229, -0.0218, 0.0272], + [ 0.0184, 0.0124, 0.0135, ..., -0.0094, 0.0302, -0.0117]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.3799, -0.4065, -0.2979, ..., -0.4219, -0.3420, -0.1925], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0009, 0.0018, 0.0037, ..., -0.0094, 0.0236, 0.0011], + [ 0.0007, 0.0022, -0.0113, ..., -0.0333, 0.0027, 0.0064], + [ 0.0013, -0.0087, 0.0208, ..., 0.0051, 0.0020, 0.0045], + ..., + [ 0.0153, -0.0221, 0.0076, ..., -0.0112, 0.0199, -0.0161], + [-0.0092, -0.0176, 0.0055, ..., -0.0182, 0.0059, 0.0039], + [-0.0012, -0.0012, -0.0088, ..., -0.0243, 0.0233, -0.0009]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 7.7896e-03, -3.2593e-02, 2.3365e-03, -2.7428e-03, -1.7853e-02, + -3.6957e-02, 4.3549e-02, -8.7357e-03, 5.9662e-03, -6.9542e-03, + -3.0121e-02, -2.0676e-02, -3.7842e-02, -2.5616e-03, -6.9946e-02, + 2.5620e-02, -7.1594e-02, -6.8237e-02, -3.5339e-02, -9.3457e-01, + -2.2919e-02, 5.1079e-03, -3.7384e-02, -1.2177e-02, -2.2659e-02, + -1.8784e-02, -2.9144e-02, -5.5885e-03, 2.5854e-01, 1.8723e-02, + -3.0411e-02, -4.7424e-02, -1.3741e-02, -1.0742e-01, 7.4577e-03, + 4.5700e-03, -1.2428e-02, -2.6245e-02, 2.5635e-02, 5.9547e-03, + -1.3794e-02, -1.9180e-02, -3.3325e-02, 1.5533e-02, 7.5111e-03, + -5.0751e-02, 1.2350e-03, 2.8946e-02, -4.1290e-02, 6.7322e-02, + -1.2611e-02, -2.2552e-02, -7.4959e-04, -8.0261e-03, 1.1475e-02, + 1.5778e-02, 5.5237e-02, -9.5642e-02, -4.5258e-02, -2.1877e-03, + 4.0497e-02, 4.9530e-02, -3.8269e-02, 1.1391e-02, 2.6230e-02, + 3.4790e-02, 3.3569e-02, -6.8054e-03, 3.3379e-03, -7.9102e-02, + 6.3599e-02, 2.4536e-02, 1.4441e-01, 7.4692e-03, 3.0319e-02, + -4.6082e-02, 1.3298e-02, 1.6617e-02, -2.3682e-02, 5.1819e-02, + -9.1064e-02, -1.2390e-02, 7.6416e-02, 9.4070e-03, 1.4420e-02, + -3.0380e-02, -6.9275e-02, -2.1866e-02, -2.3148e-02, 1.5701e-02, + 7.9041e-02, -3.3283e-03, 5.7297e-03, 9.4528e-03, -1.0757e-02, + -5.7259e-03, 1.5404e-02, -4.6921e-03, 4.6196e-03, 7.0251e-02, + -5.9280e-03, -4.4861e-02, -7.4097e-02, -8.2245e-03, 1.6342e-02, + -3.1137e-04, -5.6488e-02, -3.1647e-02, 2.1072e-02, 2.0782e-02, + 2.6672e-02, -3.5950e-02, 1.0429e-02, 2.1378e-02, 3.7811e-02, + -6.6833e-02, 4.2816e-02, 3.4454e-02, 7.8857e-02, -4.6906e-02, + 2.8946e-02, -3.7567e-02, 1.2917e-02, -8.6060e-03, -3.8818e-02, + -1.3573e-02, 7.1678e-03, -8.2626e-03, 1.2360e-02, 8.5388e-02, + -2.6917e-02, -6.2042e-02, -6.1646e-02, -2.9877e-02, 2.4567e-02, + -2.3895e-02, 1.2718e-02, -1.0208e-02, -6.9885e-02, -7.5836e-03, + 6.6376e-03, -2.5208e-02, 3.9795e-02, -1.8814e-02, -5.6244e-02, + 2.3483e-02, 3.8574e-02, -6.3049e-02, 8.9569e-03, -3.6530e-02, + -7.8506e-03, 1.0474e-01, -4.3106e-03, 1.0887e-02, 3.1494e-02, + 2.9190e-02, 4.7821e-02, 9.3842e-03, -2.3926e-02, 7.8796e-02, + -4.9805e-02, 1.7578e-02, -2.2507e-02, 7.4158e-02, -2.3041e-02, + -1.1719e-02, -3.8025e-02, -5.7526e-03, -6.7017e-02, 3.8086e-02, + -1.1129e-03, 3.1799e-02, -8.0261e-03, 7.7454e-02, 2.8320e-02, + -5.6519e-02, 2.7100e-02, -5.0354e-02, -1.8875e-02, 1.0706e-01, + -2.4261e-03, -2.6855e-03, 1.7792e-02, -8.1329e-03, 4.4098e-02, + 5.7098e-02, 3.6163e-02, 9.7107e-02, -9.0881e-02, -3.4599e-03, + 3.5919e-02, 4.2419e-03, -3.9215e-03, 9.3384e-02, 9.3520e-05, + -2.1439e-02, -1.8682e-03, 2.5467e-02, -1.5388e-02, 1.1948e-02, + 5.7190e-02, -4.8492e-02, 4.6692e-02, -5.6610e-02, 2.5425e-03, + -6.0387e-03, 1.8164e-01, 7.9468e-02, 4.1008e-03, 6.8848e-02, + -1.2781e-01, -2.3010e-02, -4.4128e-02, -3.4180e-02, -1.7395e-02, + -3.9032e-02, -9.7595e-02, 1.1002e-02, 1.1032e-02, 3.1113e-02, + 1.8723e-02, -1.2213e-01, -5.0751e-02, 7.5439e-02, -7.0534e-03, + 2.4872e-02, -1.1040e-02, 2.1629e-03, 4.5349e-02, 5.8022e-03, + 4.3869e-03, -1.2917e-02, 5.5298e-02, 5.0316e-03, 5.2612e-02, + 1.2054e-02, -1.5526e-02, 4.3671e-02, -7.1640e-03, -2.7786e-02, + 7.3967e-03, -1.1696e-02, -2.1484e-02, 4.0070e-02, -1.5297e-02, + -2.8000e-02, -2.4567e-02, 1.0895e-01, 4.0283e-03, 1.1200e-02, + 1.0144e-01, 1.0040e-01, 2.3743e-02, 2.2659e-02, -2.0370e-02, + -3.0762e-02, -1.6815e-02, 6.6589e-02, -1.3390e-02, 6.6528e-02, + -4.3526e-03, 3.2227e-02, -9.1858e-02, 6.4575e-02, 3.1433e-02, + -6.7322e-02, 6.6772e-02, 8.4076e-03, -5.8044e-02, 1.6556e-02, + -1.1096e-01, 9.0698e-02, -8.8928e-02, -3.3752e-02, -1.2222e-02, + -4.0100e-02, -4.0192e-02, -6.0806e-03, 1.7136e-02, 1.6037e-02, + 4.7211e-02, -6.3477e-02, -1.1551e-02, 7.1167e-02, -1.0651e-02, + 7.0496e-02, 1.0674e-02, 1.5163e-03, -2.8168e-02, 4.5357e-03, + 2.4887e-02, -1.7761e-02, 5.0659e-02, 6.1684e-03, 4.4952e-02, + 5.1819e-02, -3.4058e-02, 7.0740e-02, 7.2510e-02, 2.2812e-02, + 9.7580e-03, 1.0696e-02, 2.7405e-02, -8.8989e-02, -8.4656e-02, + 5.2765e-02, -4.8157e-02, 4.5593e-02, -3.9398e-02, 3.8422e-02, + 4.9591e-02, -3.3630e-02, -6.8237e-02, -4.3701e-02, 1.0796e-02, + 1.4038e-02, -2.5513e-02, -1.3586e-01, -3.3905e-02, 5.3558e-03, + 7.0801e-03, -9.6207e-03, 4.8859e-02, -4.0649e-02, 1.7197e+00, + -7.1899e-02, -4.9255e-02, -8.5144e-02, -1.7181e-02, -5.9166e-03, + -6.5918e-02, -9.2041e-02, -1.9272e-02, 6.4270e-02, 3.2104e-02, + 6.1035e-03, -1.0696e-02, 2.7679e-02, 8.5999e-02, 2.9144e-02, + 7.7820e-02, -7.6866e-03, 2.7557e-02, 8.9569e-03, 2.2308e-02, + 3.9363e-04, 9.1858e-03, -4.3121e-02, -3.4180e-02, -5.8441e-02, + -3.6621e-02, -2.0874e-02, 1.6830e-02, 4.0558e-02, 7.3486e-02, + 1.2581e-02, -2.3155e-03, 3.6377e-02, 3.4729e-02, -5.3284e-02, + 3.3386e-02, 2.8122e-02, 3.8452e-02, 3.3295e-02, 9.4910e-02, + -2.0279e-02, -2.8763e-02, -3.8879e-02, 1.0704e-02, 9.4604e-02, + 2.8931e-02, 2.0630e-02, -2.4460e-02, -4.9988e-02, -1.1375e-02, + 1.4992e-02, 1.4893e-02, 1.0114e-01, -2.1744e-02, 2.1774e-02, + 2.3193e-02, -1.8646e-02, -3.1097e-02, -3.3112e-02, 7.0129e-02, + -4.4899e-03, -6.8542e-02, 3.4851e-02, -5.6000e-02, 5.3223e-02, + 2.8259e-02, -1.0693e-01, -9.2834e-02, 1.0010e-01, 1.0895e-01, + -3.4119e-02, -1.5152e-02, 4.3427e-02, 2.1225e-02, -1.8845e-02, + -1.8906e-02, 1.1780e-02, 9.1003e-02, -1.2894e-02, 5.9113e-02, + -2.0584e-02, -1.4336e-02, -1.5182e-02, -5.4871e-02, -3.9795e-02, + 1.6586e-02, -1.6113e-02, 6.4049e-03, 1.1810e-01, 2.5757e-02, + -4.8853e-01, 2.2644e-02, -3.5156e-02, 2.0920e-02, 7.0877e-03, + -2.1194e-02, 3.3264e-02, -1.4381e-02, 6.1310e-02, 5.0240e-03, + 3.7750e-02, -1.0406e-02, -4.6906e-02, -4.7546e-02, -1.2230e-02, + 1.3031e-02, -3.0441e-02, 1.9775e-02, -5.4688e-02, 3.0609e-02, + 8.6441e-03, -3.6377e-02, 2.6531e-03, -2.9816e-02, -3.4882e-02, + -4.2908e-02, -4.8096e-02, -6.5613e-02, -8.2703e-02, 2.9678e-02, + -1.5717e-03, 7.2144e-02, -6.6284e-02, 9.9304e-02, -3.2654e-02, + 2.4963e-02, 3.5583e-02, 1.3904e-01, -7.3425e-02, 4.6326e-02, + 3.0396e-02, 2.2018e-02, -3.3569e-02, 1.6571e-02, 5.6976e-02, + -4.1962e-02, 1.7609e-02, -4.1084e-03, -2.6169e-02, 3.2959e-02, + -7.1106e-02, 1.0414e-02, 1.9646e-03, -1.7776e-02, 8.6670e-03, + 6.2988e-02, -1.1330e-03, -4.1321e-02, 2.8610e-03, -4.0955e-02, + -2.3392e-02, 6.1005e-02, -6.8115e-02, -3.0579e-02, 8.0994e-02, + 6.4880e-02, 6.8115e-02, -2.4460e-02, 5.5481e-02, 2.8992e-02, + 5.9418e-02, 3.9673e-02, -1.6281e-02, 4.6509e-02, -9.4604e-03, + 1.6022e-02, -8.6899e-03, 3.6835e-02, 1.5167e-02, -1.0231e-02, + -5.1727e-02, -6.4240e-03, 7.4768e-02, 1.0760e-01, -2.0065e-02, + 1.0735e-02, -1.2622e-01, -2.9022e-02, 1.1845e-03, -1.2917e-02, + -6.3904e-02, 1.0358e-01, -3.5431e-02, 2.2186e-02, 5.6877e-03, + -1.1574e-02, -1.6083e-02, -1.5587e-02, 5.2910e-03, 8.2016e-03, + 7.8659e-03, -1.8631e-02, -1.3077e-02, -1.5572e-02, 1.1108e-02, + -3.5461e-02, 6.5063e-02, -8.1299e-02, -4.1747e-04, -3.7506e-02, + -3.2845e-03, 7.9346e-02, 1.0022e-01, 5.5313e-03, -3.0167e-02, + 1.0490e-02, 1.9470e-02, 6.1340e-02, -1.8845e-02, 1.0582e-02, + -2.5696e-02, -3.5763e-03, 6.7322e-02, -4.8599e-03, -1.3565e-02, + -1.5327e-02, 9.2529e-02, -3.4241e-02, 7.4341e-02, -3.1982e-02, + 2.3422e-02, -2.3132e-02, -2.0050e-02, -5.5237e-02, -1.7120e-02, + -3.2867e-02, 1.7761e-02, -2.9556e-02, -4.5837e-02, -1.6769e-02, + -4.5074e-02, -2.3026e-02, -7.0724e-03, 9.4986e-03, 3.2318e-02, + -1.4465e-02, 3.3173e-02, -4.5959e-02, 4.3831e-03, -7.1030e-03, + -8.2642e-02, -9.9106e-03, -3.1700e-03, -6.5857e-02, -4.4861e-03, + 6.1111e-03, -5.8055e-05, -4.1199e-02, -6.6406e-02, 7.2021e-02, + 6.1737e-02, 6.9618e-04, 4.6234e-02, -8.0490e-04, 6.5552e-02, + 7.3364e-02, 1.7883e-02, 7.0572e-03, 7.6294e-02, 3.6583e-03, + -5.5847e-02, 5.3467e-02, 8.1482e-03, 5.0629e-02, 2.6779e-02, + -6.9214e-02, -3.7201e-02, 3.1067e-02, 4.1779e-02, 8.6288e-03, + -4.6120e-03, -1.1877e-01, 9.7107e-02, -5.1666e-02, 3.1400e-04, + 2.7237e-02, -3.2593e-02, -1.4381e-02, -2.4063e-02, 3.7842e-03, + -1.3321e-02, 4.0344e-02, 5.2277e-02, 2.5543e-02, -5.7983e-03, + 7.0238e-04, -4.8981e-02, -6.4331e-02, 1.4488e-02, 3.2928e-02, + -1.6449e-02, -1.3496e-02, 3.2883e-03, -2.8275e-02, -1.0394e-01, + 2.3224e-02, -2.0599e-02, 2.9724e-02, 5.1514e-02, -4.6661e-02, + 5.1697e-02, -6.7871e-02, 1.5778e-02, 3.7354e-02, 7.0229e-03, + -7.0648e-03, 5.5206e-02, -7.0923e-02, 9.9487e-03, -3.8086e-02, + -1.1192e-02, 4.6997e-02, 1.8250e-02, 4.7211e-02, -3.3386e-02, + 4.4739e-02, 9.4971e-02, 1.0327e-01, 4.6265e-02, -2.7390e-02, + -6.7078e-02, -4.1290e-02, 5.1003e-03, 1.1597e-02, -6.9885e-02, + -7.3891e-03, 4.4800e-02, -6.8115e-02, 1.7685e-02, -1.0590e-01, + 3.1647e-02, -2.8763e-02, 3.7491e-05, 1.1330e-02, 5.8197e-02, + 3.0060e-02, 2.5848e-02, -8.6426e-02, 4.6021e-02, -9.3231e-03, + -5.4382e-02, 4.2633e-02, -2.2324e-02, 1.1444e-02, 4.3793e-02, + 2.4780e-02, -3.5339e-02, -6.7200e-02, 5.4291e-02, 2.4307e-02, + -7.8186e-02, -3.1677e-02, -1.6647e-02, 2.8702e-02, -7.6056e-05, + -4.3549e-02, 1.2598e-01, -8.8623e-02, 1.2978e-02, -9.8999e-02, + 6.3721e-02, -4.3182e-02, 8.4351e-02, 7.4463e-03, 2.1729e-02, + -2.9724e-02, -8.7585e-02, 2.6855e-02, 5.1270e-02, 4.6654e-03, + 3.4424e-02, 2.9800e-02, 3.1799e-02, -1.7502e-02, -7.0679e-02, + 6.9702e-02, -1.2619e-02, 1.7288e-02, 4.2305e-03, 8.4351e-02, + -2.0050e-02, 7.8125e-02, -1.5099e-02, 2.4475e-02, -9.9487e-03, + 1.4198e-02, -5.9998e-02, -3.9185e-02, -9.6970e-03, -2.3727e-02, + -4.7943e-02, -3.1342e-02, 2.7161e-02, -7.4081e-03, 1.9302e-02, + -8.1909e-02, 1.9035e-03, 3.7018e-02, 9.1019e-03, 2.8320e-02, + -2.6093e-02, 1.7487e-02, 3.4119e-02, -2.0996e-02, -3.7659e-02, + -3.4302e-02, -1.2791e-04, -2.4811e-02, -4.5319e-02, 4.1077e-02, + -6.8604e-02, 4.7882e-02, 1.4091e-02, 4.0771e-02, -5.1069e-04, + -4.9500e-02, -9.9976e-02, 2.6245e-02, -4.6753e-02, -9.5596e-03, + 2.7557e-02, 2.5330e-02, -2.4536e-02, 4.0222e-02, -1.1444e-02, + 8.3862e-02, 4.6661e-02, -8.7433e-03, -5.4749e-02, -2.3438e-02, + 1.2192e-02, -2.8833e-01, 8.6212e-03, -3.7537e-02, -2.2629e-02, + -2.6428e-02, 6.6566e-03, -1.2238e-02, 8.9645e-03, 2.0905e-02, + -5.8098e-03, -7.1899e-02, -1.3962e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.5466, 1.6287, 1.4620, 1.5152, 1.6963, 1.5352, 1.7028, 1.6495, 1.6754, + 1.5446, 1.7198, 1.6647, 1.7217, 1.7013, 1.5661, 1.5963, 1.7138, 1.6526, + 1.5437, 2.5590, 1.5243, 1.6118, 1.5728, 1.5510, 1.5395, 1.6284, 1.5794, + 1.6840, 2.6548, 1.5297, 1.5815, 1.5742, 1.6377, 1.6970, 1.5335, 1.6825, + 1.6767, 1.7161, 1.6149, 1.5676, 1.5750, 1.4493, 1.5736, 1.5713, 1.6910, + 1.4318, 1.4671, 1.7247, 1.6311, 1.6464, 1.5485, 1.6081, 1.4676, 1.8106, + 1.5993, 1.4245, 1.6667, 1.6925, 1.5395, 1.7395, 1.5219, 1.5653, 1.7521, + 1.5926, 1.7473, 1.5702, 1.4933, 1.6499, 1.6647, 1.6324, 1.7135, 1.5787, + 1.7618, 1.5647, 1.7105, 1.8888, 1.5787, 1.4775, 1.5883, 1.4983, 1.6657, + 1.5485, 1.7070, 1.4726, 1.5059, 1.7675, 1.7143, 1.6613, 1.5308, 1.6046, + 1.5477, 1.6365, 1.6244, 1.7674, 1.6128, 1.6021, 1.6882, 1.5759, 1.6254, + 1.7811, 1.5736, 2.2523, 1.7263, 1.7387, 1.5957, 1.5393, 1.4164, 1.7550, + 1.5276, 1.5929, 1.5954, 1.5568, 1.4611, 1.6622, 1.6308, 1.6464, 1.6043, + 1.6075, 1.8606, 1.6691, 1.7086, 1.2814, 1.5904, 1.5482, 1.7599, 1.7228, + 1.5460, 1.6140, 1.5800, 1.7483, 1.5361, 1.6983, 1.6441, 1.7168, 1.6573, + 1.6766, 1.8008, 1.6312, 1.4994, 1.5217, 1.5370, 1.5518, 1.5621, 1.6829, + 1.7218, 1.6218, 1.8181, 1.6869, 1.6930, 1.6609, 1.6902, 1.7114, 1.7895, + 1.7047, 1.9185, 1.4508, 1.6689, 1.5640, 1.7115, 1.6192, 1.6506, 1.5862, + 1.5534, 1.2080, 1.4693, 1.5915, 1.5049, 1.6141, 1.6000, 1.6080, 1.5011, + 1.7052, 1.6523, 1.6762, 1.7173, 1.5287, 1.7796, 1.9163, 1.6103, 1.6111, + 1.6848, 1.5284, 1.5299, 1.6179, 1.7410, 1.6436, 1.6795, 0.5891, 1.8123, + 1.6114, 1.4928, 1.5903, 1.7130, 1.5604, 1.6891, 1.6415, 1.5553, 1.5920, + 0.8378, 1.5091, 1.4806, 1.6040, 1.8015, 1.6475, 1.6319, 1.5780, 2.0262, + 1.6141, 1.6232, 1.7835, 1.8310, 1.7591, 1.6236, 1.7798, 1.8485, 1.5591, + 1.8435, 1.6892, 1.7282, 1.7013, 1.6079, 1.8256, 1.5861, 1.5172, 1.6814, + 1.6223, 1.5769, 1.6768, 1.6941, 1.5766, 1.7174, 1.6628, 1.6466, 1.7017, + 1.5979, 1.5386, 1.6890, 1.6619, 1.6857, 1.5417, 1.4578, 1.7095, 1.6859, + 1.6036, 1.6458, 1.6928, 1.5617, 2.0336, 1.5748, 1.6816, 1.7615, 1.4831, + 1.5995, 1.6715, 1.6472, 1.8099, 1.7742, 1.7084, 1.6539, 1.4784, 1.6935, + 1.5482, 1.4752, 1.5757, 1.4937, 1.7429, 1.6930, 1.5716, 1.6499, 1.6268, + 1.8494, 1.6066, 1.6520, 1.2203, 1.6006, 1.5320, 1.6687, 1.6458, 1.5665, + 1.5904, 1.5120, 1.7532, 1.8606, 1.7307, 1.6165, 1.5873, 1.8569, 1.5411, + 1.8052, 1.4410, 1.7376, 1.6775, 1.5768, 1.4959, 1.7195, 1.5293, 1.5665, + 1.5442, 1.7254, 1.6832, 1.7262, 1.6449, 1.6497, 1.8472, 1.6109, 1.5323, + 1.4856, 1.6461, 1.6573, 1.4211, 1.7107, 1.6706, 1.4615, 1.6185, 1.6502, + 1.5425, 1.7448, 1.8702, 1.6209, 1.6842, 1.4225, 1.6420, 1.6030, 1.7335, + 1.4414, 1.3507, 1.6482, 1.7484, 1.4666, 1.4885, 1.4834, 1.6085, 1.7071, + 1.6288, 1.6031, 1.7478, 1.5366, 1.7877, 1.5961, 1.7320, 1.7167, 1.7011, + 1.5680, 1.6012, 1.6675, 1.7850, 1.6508, 1.5850, 1.7151, 1.7315, 1.6190, + 1.6217, 1.6529, 1.4600, 1.6130, 1.5748, 1.4341, 1.4692, 1.6451, 1.8027, + 1.6968, 1.6237, 1.5114, 1.6602, 1.6563, 1.6236, 1.7618, 1.5643, 1.6583, + 1.7570, 1.5363, 1.6690, 1.6809, 1.6877, 1.6174, 1.7092, 1.6972, 1.5257, + 1.7533, 1.5202, 1.5563, 1.2858, 1.7206, 1.6467, 1.7424, 1.5450, 1.6139, + 1.6049, 1.6431, 1.7720, 1.7395, 2.3183, 1.8012, 1.5125, 1.7654, 1.7184, + 1.5203, 1.7012, 1.6321, 1.7103, 1.5438, 1.6525, 1.4328, 1.5309, 1.6249, + 1.6052, 1.7142, 1.5251, 1.9031, 1.5546, 1.8293, 1.6872, 1.5897, 1.6912, + 1.2554, 1.8850, 1.5435, 1.6176, 1.5993, 1.6400, 1.6315, 1.5535, 1.7377, + 1.7238, 1.5547, 1.6823, 1.6606, 1.7936, 1.5944, 1.5299, 1.6171, 1.6747, + 1.7157, 1.5132, 1.4544, 1.6506, 1.6495, 1.6812, 1.6820, 1.6619, 1.6851, + 1.7541, 1.5222, 1.2278, 1.6402, 1.5469, 1.6362, 1.6224, 1.7885, 1.6684, + 1.6326, 1.7323, 1.8737, 1.5761, 1.5595, 1.5265, 1.6326, 1.7348, 1.6750, + 1.7892, 1.7402, 1.6306, 1.6256, 1.6073, 1.5526, 1.6445, 1.6588, 1.7198, + 1.7170, 1.4939, 1.5734, 1.4749, 1.4609, 1.4621, 1.6425, 1.6142, 1.6705, + 1.8619, 1.6656, 1.7224, 1.5495, 1.7225, 1.6603, 1.5858, 1.7914, 1.6040, + 1.6189, 1.7769, 1.5258, 1.5773, 1.5919, 1.5266, 1.6045, 1.7294, 1.5485, + 1.8233, 1.6096, 1.5402, 1.6347, 1.6637, 1.6981, 1.6240, 1.7442, 1.4918, + 1.7090, 1.6694, 1.6764, 1.5901, 1.5481, 1.5938, 1.6709, 1.6144, 1.5924, + 1.6912, 1.6638, 1.6623, 1.5187, 1.6067, 1.5926, 1.6297, 1.5314, 1.4781, + 1.9022, 1.6165, 1.5046, 1.5899, 1.6815, 1.5027, 1.6304, 1.6635, 1.6648, + 1.5609, 1.7298, 1.6707, 1.6422, 1.7757, 1.5534, 1.6288, 1.7241, 1.5718, + 1.7018, 1.6467, 1.6198, 1.8366, 1.6966, 1.6463, 1.5737, 1.8146, 1.8558, + 1.6408, 1.7075, 1.6411, 1.7522, 1.7252, 1.8811, 1.5192, 1.5296, 1.6011, + 1.6077, 1.7297, 1.5673, 1.4351, 1.3742, 1.6096, 1.6247, 1.8658, 1.6591, + 1.6070, 1.7442, 1.7427, 1.5629, 1.7752, 1.7397, 1.5951, 1.5998, 1.6218, + 1.6390, 1.6102, 1.6870, 1.6590, 1.6500, 1.6725, 1.5687, 1.8237, 1.7162, + 1.6969, 1.7778, 1.3082, 1.5581, 1.5859, 1.6857, 8.3553, 1.6262, 1.6234, + 1.6098, 1.6806, 1.6735, 1.5222, 1.5401, 1.6517, 1.5861, 1.6949, 1.6505, + 1.5054, 1.5938, 1.6957, 1.4191, 1.5484, 1.5930, 1.5796, 1.8634, 1.6881, + 1.6730, 1.8082, 1.7221, 1.7040, 1.8423, 1.5473, 1.6795, 2.0259, 1.5601, + 1.7674, 1.6799, 1.5424, 1.7275, 1.4993, 1.6710, 1.6352, 1.6293, 1.6699, + 1.5402, 1.5156, 1.5385, 1.5834, 1.7811, 1.6085, 1.4844, 1.5949, 1.6252, + 1.6161, 1.5671, 1.7266, 1.8965, 1.5759, 1.3900, 1.7972, 1.6964, 1.6084, + 1.7021, 1.7420, 1.6757, 1.8554, 1.6910, 1.7198, 1.8348, 1.6739, 1.6898, + 1.7649, 1.6479, 1.6634, 1.5334, 1.5332, 1.6019, 1.5840, 1.6878, 1.6768, + 1.7021, 1.6632, 1.5904, 1.5411, 1.7415, 1.5871, 1.6146, 1.4035, 1.7149, + 1.7371, 1.6836, 1.6548, 1.7445, 1.5904, 1.6646, 2.1296, 1.5593, 1.5757, + 1.7328, 1.5722, 1.5387, 1.8659, 1.5409, 1.7209, 1.5629, 1.8777, 1.8350, + 1.7118, 1.5701, 1.4138, 1.6292, 1.6653, 1.5712, 1.6760, 1.6174, 1.5633, + 1.5570, 1.6512, 1.6422, 1.4815, 1.8044, 1.6046, 1.6314, 1.5283, 1.4404, + 1.4932, 1.5224, 1.7141, 1.6128, 1.7097, 1.6152, 1.7305, 1.6738, 1.6606, + 1.6788, 1.6342, 1.7643, 1.5878, 1.5240, 1.6807, 1.4941, 1.7150, 1.6930, + 1.8136, 1.6147, 1.6509, 1.6260, 1.8039, 1.4894, 1.4515, 1.5909, 1.6387, + 1.6184, 1.6230, 1.6293, 1.7180, 1.6287, 1.6429, 1.6987, 1.8735, 1.8367, + 1.6882, 1.5426, 1.4757, 1.7773, 1.6352, 1.5406, 1.6494, 1.6965, 1.5874, + 2.3065, 1.5113, 1.3640, 1.5659, 1.5924, 1.5288, 1.6494, 1.6579, 1.7771, + 1.5794, 1.6293, 1.6200], device='cuda:1', requires_grad=True)Parameter containing: +tensor([-2.8834e-01, 6.7875e-01, -2.3783e-01, 7.2542e-01, 5.0365e-01, + -1.0250e-01, -3.9273e-01, 7.6085e-01, 2.8215e-01, 2.7522e-01, + 6.3696e-01, 3.5317e-02, 5.8776e-01, -8.9201e-02, -2.4734e-01, + 2.3635e-01, 6.3772e-01, -6.2483e-02, 8.0917e-01, -1.8566e+00, + 1.5441e-01, 2.7425e-01, 3.3344e-01, 1.9197e-01, 6.2422e-01, + -8.6932e-02, 8.8230e-02, 6.7806e-01, -6.2292e-01, 6.2146e-02, + 8.7037e-02, 4.9910e-01, 2.2075e-01, -1.1974e-01, -3.7742e-01, + -2.0259e-01, 1.1265e-01, -2.0129e-01, 3.3812e-01, -9.1389e-03, + -5.3591e-02, 3.6583e-01, -1.5610e-01, 4.3336e-01, -2.5268e-01, + 3.1972e-01, 1.0743e-01, -5.0063e-02, 3.2510e-01, -5.9961e-01, + 4.2868e-01, -2.2210e-01, 2.1076e-01, -1.3806e-01, -9.8569e-02, + -1.9991e-01, 2.0285e-01, -2.1988e-01, 1.2357e-01, 1.6476e-01, + 1.3403e-01, -2.3772e-01, 7.5840e-01, 4.1628e-01, 4.2165e-02, + 3.0577e-01, -4.3873e-01, -3.7216e-02, -1.1519e-01, -7.0516e-01, + -2.2037e-02, -5.4402e-01, 3.3992e-01, -1.9507e-01, -9.1473e-01, + 8.3870e-01, 4.1670e-01, -1.4147e-01, -1.6440e-01, 2.3323e-01, + 7.2164e-01, -6.6401e-02, -6.7743e-02, -5.1050e-01, 4.1789e-01, + 2.6773e-02, -8.0283e-02, 9.3218e-02, -4.2062e-01, -9.3459e-01, + 1.4858e-01, 1.4147e-01, 1.4865e-01, -3.5721e-01, 4.3898e-01, + -2.5401e-01, -2.9791e-01, 2.2632e-01, -4.6250e-01, -5.8804e-01, + 1.4498e-01, -8.5878e-01, 9.4009e-01, 3.6177e-01, 1.4910e-01, + 3.1275e-01, 5.7658e-01, 7.6616e-01, 8.8667e-02, 8.2457e-02, + -3.0712e-01, -1.1602e-01, 2.3959e-01, -2.5542e-01, 1.0640e-01, + -1.2604e-01, -1.1610e-01, -1.9213e-01, -3.2760e-01, -3.4821e-01, + -6.4125e-01, 3.7060e-01, 4.4295e-01, -1.2739e-01, 7.0215e-01, + 2.1715e-01, -1.5359e-01, -5.8012e-01, -3.6162e-02, -9.7989e-01, + 1.4529e-01, 7.3714e-01, 2.3299e-01, 8.8725e-01, -2.6198e-01, + 2.6619e-01, -1.0543e-01, -4.9063e-01, 5.4750e-02, 2.5521e-01, + 1.1565e-01, -1.9963e-01, 4.9922e-03, 3.8230e-01, 6.4455e-01, + 1.7291e-01, -4.4966e-01, 3.1876e-02, -2.6282e-01, -5.4284e-01, + -7.9918e-01, -1.7874e-01, 1.0447e+00, 2.4432e-01, -8.6598e-02, + -5.1222e-01, 1.6548e-02, 6.6075e-02, -1.4376e-01, 1.7247e-01, + 1.8433e-01, 2.0584e-01, 2.7407e-01, -7.5933e-02, 3.4370e-02, + 3.1634e-01, -4.4396e-01, -4.0123e-01, 2.0246e-01, 2.2675e-01, + 7.3490e-01, -6.0053e-02, 7.2985e-01, -2.9349e-01, -4.5335e-02, + 2.3239e-01, -3.9335e-01, -1.4336e-01, -9.0372e-02, -2.1095e-01, + 5.9891e-01, -1.2639e-01, -2.8169e-01, 3.9472e-01, -3.3182e-01, + 3.5402e-01, -4.3384e-01, -6.2287e-01, 1.0957e+00, 3.1057e-01, + 5.0804e-02, -4.2299e-02, 4.6876e-01, 2.7346e-01, 4.2902e-02, + 1.6550e-01, 7.2328e-02, -4.3305e-02, 6.2557e-01, -7.6766e-01, + -5.2920e-02, 6.5177e-01, -1.5345e-01, 3.7510e-01, 3.6323e-01, + 6.0274e-01, -2.2344e-01, 9.2344e-02, -2.7282e-01, 2.9311e-01, + 9.2230e-01, 9.3975e-02, 8.5421e-01, -6.6581e-01, -4.6468e-02, + -3.2008e-01, 6.3147e-01, -9.3217e-01, 4.6425e-03, 5.7329e-02, + -8.6024e-02, 3.8421e-01, 4.3569e-01, -8.3151e-02, -5.1909e-01, + -2.5093e-01, -1.6634e-01, -3.0899e-01, -4.7583e-01, 6.6807e-01, + 4.9516e-02, 2.1708e-01, -5.3353e-02, 3.1828e-01, 1.2399e-01, + 5.4984e-01, 8.3559e-02, -3.9472e-01, -3.6320e-02, -5.7928e-01, + -1.9020e-01, -2.7261e-01, -4.0178e-01, -7.3205e-02, -1.8186e-02, + -3.6264e-01, -3.8185e-02, -6.0324e-01, 3.9541e-01, 2.4023e-01, + -5.1377e-01, -2.2388e-01, 9.4389e-02, -4.5919e-01, -1.6421e-01, + 8.8032e-01, -8.8406e-02, -8.6900e-01, -2.0488e-01, -3.9097e-01, + 7.3720e-01, 4.7167e-01, 4.7704e-01, -4.1401e-01, -6.6253e-01, + -5.8830e-01, -2.9232e-01, -1.3623e-01, 3.8029e-01, -1.6702e-01, + 9.0093e-01, -6.3571e-01, 2.2402e-01, 2.1330e-01, -3.2586e-01, + -5.9196e-02, 5.7648e-02, -1.0256e+00, 1.1868e-01, 2.8840e-01, + -1.7680e-01, -1.5109e-01, 9.8570e-01, -3.1493e-01, 1.3815e-01, + -6.8671e-01, 4.4593e-01, -2.5520e-01, 5.3355e-01, -1.8861e-01, + -1.9878e-01, 7.3772e-01, -1.4706e-01, 2.5136e-01, -6.4688e-01, + -5.8466e-01, 1.0780e-01, -2.1876e-01, -2.6948e-01, -3.7949e-02, + 1.6250e-01, -6.8278e-02, -1.8151e-01, 5.4320e-01, 6.2123e-01, + -5.9731e-01, -1.4939e-01, 8.1843e-02, -5.4720e-02, -6.0758e-01, + 5.5542e-01, 5.7590e-01, 3.9174e-01, 1.5900e-01, -3.9861e-01, + -1.9958e-02, 1.1678e-01, 5.5287e-01, -1.1611e-01, 1.8584e-01, + 5.0934e-01, -1.3474e-01, 4.3069e-01, 1.6100e-01, 1.6451e+00, + 3.0039e-01, -2.2575e-01, 3.3777e-01, -2.8258e-01, 2.4884e-02, + -3.2205e-01, 5.5202e-02, 4.3987e-02, -1.9287e-01, -5.9822e-01, + -9.0719e-01, -4.0716e-01, -6.1424e-01, -5.6892e-01, 4.0417e-02, + -8.3440e-01, 3.1679e-01, -2.0134e-01, 6.8621e-01, 4.4656e-01, + -1.7529e-01, 1.1721e-01, -1.9876e-01, -3.1152e-01, 5.6802e-01, + 3.2369e-01, 6.3882e-02, 1.6577e-02, 4.6248e-02, 2.0744e-01, + 2.0860e-01, 1.0150e+00, -3.7986e-01, 9.3586e-02, 3.7668e-01, + -6.6433e-01, 1.0799e+00, -2.7410e-01, -1.8421e-01, -1.7920e-01, + -2.0334e-02, 3.3188e-01, 5.9764e-03, -2.9039e-01, -7.2283e-02, + -8.0281e-01, -1.1115e-01, 1.4681e-01, -1.6251e-01, 2.9035e-01, + -3.0332e-01, -7.9299e-01, 4.0145e-01, -3.1173e-01, -3.3306e-01, + -1.5556e-01, 8.4179e-01, 6.8650e-01, 4.9622e-01, -6.3749e-01, + -3.2113e-01, 5.1397e-01, -3.7570e-01, 4.3522e-01, -3.2065e-01, + 1.0057e-01, 1.7216e-01, 7.9660e-01, 3.2077e-01, 4.9979e-01, + 4.9811e-01, 5.9768e-02, -8.0808e-02, -1.3962e-01, 6.0569e-02, + 1.5873e-01, 1.0248e+00, -7.9364e-01, -5.7126e-01, -4.9706e-01, + -2.4641e-01, 1.4800e-01, -3.5420e-01, 2.5300e-01, 3.1038e-01, + -3.8231e-01, -2.1472e-01, -4.8862e-01, 1.2613e-01, -4.8684e-01, + -1.1218e+00, 4.5354e-01, 7.3050e-02, -7.5414e-01, 6.2755e-03, + 1.9597e-01, -1.7957e-01, -9.1913e-01, -9.3182e-02, 5.8686e-01, + -4.8479e-01, 2.5590e-01, 1.0595e+00, 2.7002e-01, 7.2647e-01, + 5.1018e-01, 1.9252e-01, 8.9410e-03, 3.9983e-01, -9.9736e-02, + -4.6141e-01, 1.7405e-01, 2.8290e-01, 6.3410e-01, 6.8365e-01, + 8.1244e-01, 5.7984e-01, 4.0560e-01, 3.8197e-02, -4.1310e-01, + 4.8830e-01, -5.7483e-01, 4.4491e-01, -5.7979e-01, -5.8200e-01, + -7.5095e-02, 4.5962e-01, -7.1681e-01, 9.5295e-02, 6.2722e-01, + 2.8345e-01, 2.8936e-01, 2.2348e-01, 2.3233e-01, -8.2202e-02, + 1.9226e-01, 1.9503e-02, 4.2562e-01, -5.9108e-01, -2.3255e-01, + 4.6791e-01, -4.6673e-01, 6.5900e-01, -6.2156e-01, 1.9633e-01, + 1.7809e-01, 5.3349e-01, -6.9414e-02, 9.1718e-01, 6.4407e-01, + 5.0983e-01, 2.0029e-02, 7.9499e-01, 2.8185e-02, -1.5870e-01, + -6.7845e-02, 4.3046e-01, 1.2244e-01, -7.2143e-01, 4.3565e-01, + -2.2752e-01, 8.3008e-02, 7.6862e-02, 3.0861e-01, 3.5815e-01, + 1.2071e-02, -2.8832e-01, -5.2904e-01, 6.2650e-02, 1.1320e+00, + -2.3723e-01, -2.9065e-01, -2.4751e-01, -9.2235e-01, -6.6487e-02, + 1.7130e-01, 3.2095e-02, 3.2017e-01, -2.9495e-01, 1.4542e-01, + -4.9789e-01, -4.8924e-01, -1.7171e-01, 2.4293e-01, -3.5534e-01, + 1.9945e-01, 4.8569e-02, 6.2335e-01, 4.7964e-01, 2.9458e-01, + 1.5449e-01, -6.4109e-01, 1.3690e-01, 3.9368e-01, 1.4828e-01, + 1.4641e-01, 4.3600e-01, 1.0510e-01, -5.8869e-02, 6.5924e-01, + 3.5055e-02, -5.6399e-01, 4.5772e-01, 2.2873e-01, -2.3484e-01, + -3.7558e-01, 8.9041e-01, -2.2871e-01, -3.6462e-01, -1.2101e-01, + -2.2578e-01, -1.3908e-01, 7.4223e-02, 3.0451e-01, 1.8741e-02, + 3.2846e-01, -4.0961e-01, -3.4544e-01, -6.2249e-01, 2.4417e-01, + -2.0547e-01, 1.7231e-01, 3.1831e-01, 6.4965e-01, 3.9325e-01, + -8.2503e-01, 6.0124e-01, 2.8477e-01, -1.9307e-01, 5.5779e-01, + -9.2765e-02, 1.7029e-02, -4.8561e-01, 2.5786e-01, -2.5821e-01, + 3.1338e-01, -1.2206e-01, 6.9799e-01, -3.6036e-01, 4.3696e-01, + 9.1485e-01, 1.1678e+00, 1.5267e-01, 6.1679e-01, 4.5000e-04, + -8.2172e-01, -8.6353e-01, -8.9458e-02, 7.0973e-01, -1.7341e-01, + -6.2408e-01, 1.0639e-01, -6.4188e-02, -1.0023e-01, 4.2501e-01, + 2.7844e-01, -1.3440e-01, -4.0398e-01, -7.8034e-01, -2.8684e-01, + 2.7194e-01, 1.2444e-01, 4.0351e-01, -7.6288e-02, -1.9768e-01, + -4.2178e-01, 4.4424e-01, 1.3352e-01, -9.9392e-02, 6.9779e-01, + 1.6888e-01, 1.0123e+00, -6.2880e-01, 4.5987e-01, 4.4244e-01, + -5.3807e-01, 5.4973e-01, -1.0677e-01, -4.1490e-02, 2.9562e-01, + 5.8947e-02, 2.2215e-02, -6.4694e-01, 7.8909e-02, -2.7255e-01, + 3.1557e-01, 6.9443e-01, -5.6689e-01, -5.3598e-01, 1.7104e-01, + -2.7369e-01, 3.5588e-01, 3.2884e-01, -1.1530e-03, 4.7344e-01, + -2.1759e-01, -5.2551e-01, -3.0974e-02, 1.6733e-01, 3.1815e-01, + -1.3519e-01, 1.9444e-01, 5.8109e-01, -5.5995e-02, 1.9733e-01, + -4.8083e-01, 1.2766e-01, 3.8752e-01, -6.7796e-01, 2.5224e-01, + 2.2199e-01, -2.1854e-01, -5.9393e-03, 3.2313e-01, 2.8912e-01, + 9.3591e-02, 9.0599e-02, -5.3521e-01, 1.5640e-01, -1.7252e-01, + 3.9477e-01, 3.5321e-01, 8.6974e-02, -2.4951e-01, 3.8474e-01, + 1.4969e-01, -4.9545e-01, 8.4216e-01, 7.0573e-02, -3.3233e-01, + 2.1082e-01, 3.2282e-01, 1.4043e-01, -2.4110e-01, -2.8888e-01, + 2.5481e-01, -6.9019e-01, 2.9674e-01, 1.1068e-01, -2.5481e-01, + 1.7578e-01, -1.7300e-01, -2.6870e-01, -1.2870e-01, -3.5365e-01, + 4.1577e-02, 9.9443e-02, -1.6780e-01, -1.1617e-01, 1.6097e-01, + 4.3591e-01, 6.7614e-01, -3.5089e-02, 2.8122e-02, 2.7661e-01, + 3.7887e-01, 1.0609e+00, 7.0093e-01, -7.1668e-03, 7.7913e-01, + -4.2864e-02, 4.7460e-01, -1.0076e+00, 1.0556e-01, -1.8160e-01, + -7.2417e-02, 1.0140e+00, -3.5696e-01, -7.3666e-01, -2.3020e-01, + -3.2148e-01, 5.2983e-01, -3.2121e-01, 1.4286e-02, 6.7987e-01, + -6.5973e-01, -2.0075e-01, 3.7226e-01, 1.6172e-01, -3.3971e-01, + -1.9281e-01, -5.0010e-02, 9.4485e-01, -1.0869e-01, 6.2613e-01, + -1.2510e-01, 1.0464e+00, -5.2931e-01, 6.5617e-02, 1.1157e-01, + 2.1925e-01, 7.1122e-02, -5.7915e-01, -2.9074e-01, -1.2102e-01, + -1.3839e-01, -1.4838e-01, 4.2684e-02, -8.2936e-02, -1.4279e-01, + -1.2063e-02, 3.2719e-01, -4.2521e-01, -2.4905e-01, -4.9499e-02, + -5.2031e-02, -1.7989e-01, -4.2499e-02, 6.8209e-01, -1.5275e-01, + 4.2563e-01, 6.6697e-02, 1.5147e-01, 1.8749e-01, 8.2221e-01, + -2.5794e-01, 8.3046e-01, 2.4105e-01, 6.1676e-01, 3.9542e-01, + -2.3539e-01, -3.6405e-01, -6.6711e-02, -2.9740e-01, 9.6638e-02, + -4.3095e-01, 1.6043e-01, 8.7313e-02, 5.2172e-01, -1.8327e-01, + -8.9197e-02, -1.9214e-01, 2.0663e-01, -3.8714e-01, 3.1245e-01, + -5.1035e-02, -1.3826e-01, 2.8931e-01, 8.6621e-01, 3.4779e-01, + 6.2924e-01, 5.0303e-01, -1.4128e-02], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[ 0.0033, -0.0180, 0.0063, ..., 0.0171, 0.0053, 0.0176], + [ 0.0050, 0.0288, 0.0542, ..., 0.0377, 0.0121, -0.0257], + [ 0.0002, -0.0528, 0.0353, ..., 0.0037, 0.0121, 0.0060], + ..., + [ 0.0066, 0.0045, 0.0136, ..., 0.0031, 0.0118, -0.0052], + [-0.0037, 0.0018, -0.0075, ..., 0.0004, -0.0041, 0.0008], + [-0.0034, 0.0127, -0.0073, ..., 0.0064, -0.0214, -0.0094]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.3430, -0.0836, 0.0424, ..., -0.0128, -0.0226, 0.0145], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0056, -0.0199, 0.0132, ..., -0.0029, 0.0242, -0.0021], + [ 0.0218, 0.0037, 0.0028, ..., -0.0120, 0.0019, 0.0024], + [ 0.0007, -0.0039, -0.0249, ..., -0.0206, 0.0182, 0.0032], + ..., + [-0.0161, -0.0262, -0.0194, ..., 0.0123, 0.0224, 0.0002], + [-0.0050, -0.0197, 0.0011, ..., -0.0005, 0.0069, 0.0091], + [ 0.0174, -0.0362, -0.0287, ..., -0.0099, -0.0143, 0.0133]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 3.0563e-02, 1.5022e-02, -2.1133e-02, 1.6357e-02, -6.0081e-03, + -1.1963e-02, 3.7994e-02, 4.7791e-02, -1.2894e-02, 1.1734e-02, + -2.5070e-02, 1.4830e-04, 5.0049e-03, 1.2337e-02, -3.2715e-02, + 4.9316e-02, 1.1742e-02, 3.3302e-03, 5.0724e-05, 4.1748e-01, + -2.4628e-02, -1.8173e-02, -3.1830e-02, -2.8961e-02, -3.6240e-03, + -9.0256e-03, 1.6846e-02, 2.3712e-02, 5.6335e-02, -2.2003e-02, + 2.4048e-02, -2.3163e-02, -1.5518e-02, -1.2665e-02, -7.5340e-03, + 2.7512e-02, 7.3128e-03, 1.2070e-02, -2.4948e-02, -2.6566e-02, + -3.5828e-02, 2.5146e-02, -5.1208e-02, 2.1088e-02, -4.1779e-02, + 6.1913e-03, 5.8746e-02, -4.7882e-02, -1.5106e-03, 4.1168e-02, + 8.9417e-02, -3.4241e-02, 6.4354e-03, 2.9251e-02, 6.4049e-03, + 4.6082e-02, 1.7426e-02, -8.8654e-03, 2.8778e-02, -1.1444e-02, + -2.6718e-02, 6.9618e-03, -1.6754e-02, 4.9713e-02, -6.6467e-02, + -6.0081e-03, -9.1858e-03, 1.0658e-02, -1.7029e-02, -1.0211e-01, + -1.3550e-02, 7.3013e-03, 3.1830e-02, -5.3711e-02, -2.3651e-03, + -4.2297e-02, 2.1667e-02, 1.8265e-02, -7.1167e-02, -1.6129e-02, + -3.0380e-02, 4.4189e-02, 1.9302e-02, -2.7542e-02, 2.7588e-02, + 1.2352e-02, -6.1981e-02, 1.2680e-02, -2.4841e-02, -8.7976e-04, + 2.7847e-02, -8.7280e-03, -2.4399e-02, 4.6173e-02, 3.9154e-02, + -2.4811e-02, 2.4765e-02, -3.8788e-02, 2.2308e-02, 4.7363e-02, + 5.9845e-02, -1.8115e-01, -3.4698e-02, -2.8183e-02, -1.3344e-02, + 8.3303e-04, 2.7466e-02, 8.1558e-03, -9.5081e-04, 3.9001e-02, + -2.9678e-03, -1.5572e-02, 1.2978e-02, 2.1011e-02, 1.3420e-02, + -3.3478e-02, 2.9922e-02, -1.8646e-02, 6.6467e-02, -3.5191e-04, + 4.0405e-02, -2.4429e-02, -7.8430e-03, 3.7262e-02, -4.1382e-02, + 9.2163e-03, 3.3844e-02, 2.5925e-02, -3.6041e-02, 3.5614e-02, + -1.3481e-02, 1.8936e-02, 1.7715e-02, -1.8723e-02, 4.6417e-02, + 2.0935e-02, -7.8659e-03, 3.4393e-02, 4.5288e-02, -3.3875e-02, + -1.4618e-02, -2.1305e-03, -8.7891e-03, -7.1411e-02, -3.6316e-02, + -2.5311e-03, 3.0899e-02, -2.4307e-02, -1.3008e-02, 5.2917e-02, + 3.5004e-02, 4.5197e-02, 1.9623e-02, -1.0841e-02, -2.4529e-03, + 3.7201e-02, 3.5339e-02, 1.3718e-02, 1.1375e-02, -2.6093e-02, + -3.7518e-03, -1.6266e-02, 1.4168e-02, -6.6345e-02, -2.2766e-02, + -1.7738e-04, -9.1370e-02, -1.2688e-02, -1.9135e-02, -2.5482e-02, + -2.1301e-02, -3.9154e-02, -1.4015e-02, 5.5115e-02, 1.1574e-02, + -4.7646e-03, 1.2192e-02, 2.0466e-03, -7.8430e-03, -2.8397e-02, + 6.3667e-03, 5.3024e-03, -6.4392e-03, -3.0655e-02, 3.6640e-03, + 3.9307e-02, 3.6133e-02, -7.0984e-02, 3.3264e-02, -2.8553e-03, + 3.6430e-03, 2.4078e-02, -3.6438e-02, 4.3945e-03, -5.9891e-03, + -6.0608e-02, 6.8283e-03, -2.0050e-02, -1.4502e-01, -5.0232e-02, + -2.8580e-02, 5.2216e-02, 3.1952e-02, -1.9638e-02, -1.4832e-02, + 1.3763e-02, 9.1980e-02, 3.2330e-03, 2.6703e-02, 1.0635e-02, + -2.0584e-02, -9.4528e-03, 1.0681e-02, 2.6474e-02, 5.6305e-03, + 8.0414e-03, 4.0321e-03, 4.0771e-02, 2.0218e-02, 2.9612e-04, + 2.7405e-02, -1.2093e-02, 1.5450e-02, 6.4888e-03, 1.6556e-02, + -2.0569e-02, -2.1271e-02, -9.4910e-03, 7.6942e-03, 1.9348e-02, + 7.6714e-03, 1.1742e-02, 9.8877e-03, -1.1467e-02, -4.6204e-02, + 2.9160e-02, 1.8372e-02, 2.3438e-02, 2.8290e-02, -3.2745e-02, + 1.2901e-02, -2.0935e-02, 3.3894e-03, -1.1246e-02, -2.3041e-02, + 1.7990e-02, 3.5400e-02, -1.7273e-02, 1.7090e-02, -4.0674e-04, + -1.7609e-02, 5.5328e-02, 3.3512e-03, -8.0795e-03, -2.8442e-02, + -4.3907e-03, -1.1787e-02, 6.7329e-03, 1.2016e-02, 7.5836e-03, + -2.9266e-02, 5.1147e-02, 4.1656e-02, -1.0422e-02, -1.1787e-03, + 6.7322e-02, -4.6326e-02, 3.3722e-02, 2.8801e-03, -4.9500e-02, + 3.8391e-02, -5.7297e-03, -4.5227e-02, -1.4793e-02, -3.0579e-02, + -9.2926e-03, 1.5287e-03, -1.3947e-02, -2.8183e-02, 1.8951e-02, + -9.3555e-04, -8.1177e-03, -3.7781e-02, 9.2087e-03, -2.1896e-02, + 9.0256e-03, 2.8885e-02, 1.8280e-02, -1.7990e-02, -3.0457e-02, + 1.5869e-02, 6.1417e-03, 2.1851e-02, -1.4830e-03, 5.3619e-02, + 1.2711e-02, 3.5950e-02, 1.3435e-02, 1.3573e-02, -1.9135e-02, + -1.1078e-02, 1.0445e-02, 2.7786e-02, -3.2501e-02, 2.6264e-03, + -1.0414e-02, -2.8107e-02, -5.7465e-02, -1.2390e-02, -1.8738e-02, + -9.8190e-03, -4.6570e-02, 4.6875e-02, -1.9806e-02, -4.6082e-03, + -6.2683e-02, -2.6245e-02, -2.0050e-02, -1.5404e-02, 4.7302e-03, + 5.3375e-02, 4.2847e-02, -4.5746e-02, 8.3160e-03, 1.4453e-01, + 2.1973e-02, 5.1758e-02, -6.7200e-02, -3.7098e-03, 3.9825e-02, + 7.1449e-03, -7.8354e-03, -2.8580e-02, 4.2755e-02, 5.2719e-03, + 9.1705e-03, -5.1117e-03, 1.6525e-02, 1.8982e-02, 9.0332e-03, + -2.6703e-02, -1.9089e-02, -2.3727e-02, 1.7212e-02, -9.4757e-03, + -3.8849e-02, -2.0828e-02, -8.3313e-03, -1.8707e-02, -3.2562e-02, + 9.6970e-03, 6.1150e-03, 2.6825e-02, 8.3740e-02, -2.2720e-02, + -3.9642e-02, -4.4365e-03, -2.4857e-02, 3.0396e-02, 8.9111e-03, + 5.9875e-02, 2.7298e-02, -3.1921e-02, -6.9160e-03, 1.5610e-02, + 3.0518e-02, 3.3295e-02, -3.5187e-02, -4.2023e-02, -8.5297e-03, + -3.5583e-02, -1.6861e-02, 4.6265e-02, -7.0557e-02, -3.6499e-02, + 2.5085e-02, 2.0981e-02, 3.0685e-02, 2.8503e-02, -7.3586e-03, + 3.8544e-02, 5.4199e-02, 2.5284e-02, -1.6983e-02, 4.5074e-02, + 5.7831e-03, -2.1332e-02, 6.0654e-03, 6.9771e-03, -6.9389e-03, + -2.3346e-02, -3.6377e-02, -2.4643e-02, -3.4943e-02, 5.5267e-02, + -2.8488e-02, 4.8889e-02, 5.4207e-03, 3.5492e-02, -1.7471e-02, + 1.9699e-02, -3.4058e-02, -1.1841e-02, 3.0594e-02, -1.9592e-02, + 9.1324e-03, 1.2169e-02, -4.5105e-02, -6.2622e-02, -2.1561e-02, + -2.1648e-03, -1.4114e-02, -2.3193e-02, 2.5360e-02, 1.1749e-01, + 1.9470e-01, 1.6983e-02, 3.8239e-02, -5.1300e-02, -1.4488e-02, + -8.2169e-03, -2.2324e-02, 2.3209e-02, 5.4199e-02, -1.3283e-02, + 2.9816e-02, 1.4275e-02, -1.6251e-02, 5.5504e-03, -2.1973e-03, + 8.6136e-03, -1.1513e-02, 2.7313e-02, 3.3966e-02, -3.6926e-02, + 1.5045e-02, 4.6631e-02, 4.9591e-03, -5.6152e-02, -1.8768e-02, + 8.4076e-03, -6.5308e-03, 2.0355e-02, -1.8872e-01, -2.5024e-02, + -3.5156e-02, 8.7128e-03, 3.6736e-03, 2.4811e-02, -3.6407e-02, + 2.1194e-02, -7.6599e-03, 4.4098e-02, -2.0309e-02, 2.6226e-03, + 1.0490e-03, 3.2990e-02, -7.0435e-02, -2.0275e-03, -2.9583e-03, + -3.7823e-03, 7.5531e-03, 2.1912e-02, -3.1834e-03, -3.5736e-02, + 1.2581e-02, 3.1799e-02, -2.8992e-02, 1.6189e-04, 4.4220e-02, + -7.6485e-03, 6.2294e-03, -2.1684e-04, 1.3824e-02, 3.4607e-02, + 1.7349e-02, 2.7435e-02, 2.9144e-02, 1.6586e-02, 4.3182e-02, + 1.6373e-02, 3.4088e-02, -2.6337e-02, -2.5360e-02, -2.1591e-02, + 1.2138e-02, 3.3966e-02, -1.6785e-02, 3.3016e-03, 4.3030e-02, + 5.4352e-02, -1.6661e-03, -2.3773e-02, 6.8779e-03, -2.1927e-02, + -1.1436e-02, 2.0020e-02, -8.5449e-04, -3.5370e-02, -8.2445e-04, + 1.9207e-03, -5.0293e-02, -2.7145e-02, 2.3254e-02, -6.4125e-03, + -6.0425e-02, 4.0527e-02, -1.0811e-02, -4.1313e-03, 9.3842e-03, + 1.8646e-02, 3.0502e-02, -1.2569e-03, 9.6436e-03, -6.1464e-04, + 1.0376e-02, -8.2855e-03, 5.0323e-02, 3.0655e-02, -3.9795e-02, + -2.5692e-03, 1.4099e-02, -1.9882e-02, 2.7069e-02, -3.4363e-02, + -1.5350e-02, 2.1240e-02, -3.4088e-02, 1.7563e-02, 2.0416e-02, + -4.5433e-03, -1.6800e-02, 2.2903e-02, 1.9836e-02, 3.5645e-02, + -2.2095e-02, -2.8473e-02, 3.1586e-02, 3.9978e-03, 1.6434e-02, + -2.5055e-02, -2.1393e-02, -3.7048e-02, 3.2928e-02, -7.7576e-02, + 1.9775e-02, -3.0182e-02, -3.0579e-02, -2.3712e-02, 4.1718e-02, + -5.3955e-02, -1.2665e-02, 2.3529e-02, 1.5541e-02, 3.0777e-02, + -3.0563e-02, 2.6566e-02, 3.1586e-02, 9.0561e-03, -1.9897e-02, + -4.0474e-03, 4.6295e-02, 2.7832e-02, -2.1439e-02, 6.4575e-02, + -1.9226e-02, 1.3092e-02, 1.3664e-02, -4.5532e-02, 2.2354e-02, + -2.8320e-02, 1.1826e-02, 4.0649e-02, 1.7624e-02, 3.6804e-02, + 1.0056e-02, 1.4320e-02, 8.7585e-03, -3.9673e-03, 9.0714e-03, + 1.8906e-02, 5.5115e-02, 8.3923e-03, 1.2184e-02, -6.7062e-03, + 1.6266e-02, 5.0659e-02, 1.7426e-02, 4.1351e-02, 1.8711e-03, + 6.3362e-03, -2.0966e-02, 2.3327e-03, 4.7058e-02, 3.5553e-02, + -1.1131e-02, -4.3274e-02, -2.3331e-02, -6.9618e-03, -1.7822e-02, + -3.7415e-02, -2.8198e-02, -2.1149e-02, -6.0141e-05, -5.2704e-02, + 5.2567e-03, -5.4382e-02, 1.0933e-02, -5.8807e-02, 1.4450e-02, + 6.4926e-03, -5.9418e-02, 2.5940e-02, 2.9053e-02, 2.6855e-03, + 2.2385e-02, 5.7793e-03, 1.7624e-02, -9.3613e-03, -1.0327e-01, + -9.1858e-03, 2.5253e-02, 2.8381e-02, 2.8564e-02, -1.3519e-02, + 2.2125e-02, -8.2245e-03, 3.4882e-02, -2.5787e-02, -1.0826e-02, + 1.3245e-02, -1.4206e-02, -3.7445e-02, -4.8218e-02, -4.0039e-02, + 4.6753e-02, -5.1069e-04, -2.1774e-02, 3.0930e-02, -6.6406e-02, + -2.5925e-02, -3.2410e-02, 7.2266e-02, -4.3671e-02, 9.2010e-03, + 3.1555e-02, 2.4765e-02, -6.3229e-04, -5.9891e-03, 2.8934e-03, + -2.4963e-02, -4.7607e-03, 7.7515e-03, -2.6108e-02, 7.8430e-03, + -2.4933e-02, -4.3518e-02, 3.5839e-03, 2.2598e-02, 1.8845e-02, + -1.0582e-02, -1.5945e-02, 3.8834e-03, 2.5909e-02, 6.1249e-02, + 2.2537e-02, 1.1930e-03, 4.0375e-02, -1.0376e-02, -1.4725e-02, + 2.5299e-02, 3.1250e-02, -9.2773e-03, -7.6294e-03, -3.4058e-02, + 3.1647e-02, -3.0411e-02, 1.0216e-02, -4.1870e-02, -2.3346e-03, + -1.7715e-02, 1.6321e-01, -1.7548e-02, 1.6037e-02, -5.9052e-02, + 8.2474e-03, 4.6967e-02, 1.0948e-02, -2.3087e-02, -3.6835e-02, + -2.7222e-02, -6.8115e-02, 1.6403e-02, 1.6785e-02, -1.6510e-02, + -5.9547e-03, 2.3270e-02, -5.1849e-02, -2.9083e-02, -5.3223e-02, + 1.1513e-02, 1.6281e-02, 6.8741e-03, 3.1982e-02, -2.7908e-02, + 2.6627e-02, 4.6051e-02, -1.7532e-02, -5.6534e-03, -9.1782e-03, + 2.6505e-02, 2.7390e-02, -1.2978e-02, -1.3390e-02, -2.7512e-02, + -4.7852e-02, -3.6346e-02, 1.0300e-02, -1.5511e-02, -3.6133e-02, + -4.7821e-02, -1.3428e-02, -6.1493e-03, -8.7051e-03, 2.0996e-02, + 1.2634e-02, 2.6264e-03, 2.4139e-02, 9.1267e-04, -3.4637e-03, + 8.2321e-03, -4.6997e-02, -3.5278e-02, 6.2523e-03, 2.5005e-03, + 1.4305e-02, -1.4420e-02, -7.4816e-04, 1.2001e-02, 4.5166e-03, + -4.9194e-02, 1.4847e-02, 2.1591e-03, -1.6916e-04, 1.1780e-02, + 4.7058e-02, 4.8462e-02, -1.8280e-02, -3.1776e-03, -3.6560e-02, + -5.1239e-02, 1.7960e-02, 9.1019e-03, -3.3142e-02, -1.5480e-02, + -1.3832e-02, -1.0956e-01, -3.7975e-03, -5.7343e-02, -1.4809e-02, + -9.6893e-03, 4.9866e-02, -5.1880e-02, 4.2175e-02, -2.3911e-02, + 1.9703e-03, -2.2034e-02, -3.8261e-03], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.0895, 1.1491, 1.1280, 1.0222, 1.1739, 1.0663, 1.1026, 1.1196, 1.0610, + 1.0362, 1.0344, 1.1302, 1.1394, 1.1145, 0.9793, 1.2097, 1.0334, 1.2245, + 1.1241, 0.2753, 1.0395, 1.0472, 1.1671, 1.0659, 1.0802, 1.1507, 1.0881, + 1.0900, 1.1905, 1.2015, 1.1597, 1.0814, 1.0713, 1.1395, 1.1696, 1.3321, + 1.0179, 1.0746, 1.1628, 1.0975, 1.0572, 0.9831, 1.1066, 1.0873, 1.2887, + 1.0379, 1.0448, 1.2154, 1.0990, 1.0652, 1.1241, 1.1402, 1.1011, 1.1263, + 1.1139, 0.9162, 1.0919, 1.2903, 1.1040, 1.1012, 1.1183, 1.0331, 1.1440, + 1.1119, 1.1228, 1.1293, 0.9695, 1.2707, 1.1637, 1.2276, 1.0280, 1.0591, + 2.1889, 1.0572, 1.1401, 1.0796, 1.1198, 1.0126, 1.1814, 1.0254, 1.1224, + 1.1836, 1.1246, 1.0903, 1.2133, 1.1445, 1.1274, 1.1002, 1.1227, 1.1008, + 1.1070, 1.1492, 1.1207, 1.1143, 1.1237, 1.0610, 1.0389, 1.1207, 1.0998, + 1.1228, 1.1145, 1.0209, 1.0081, 1.0739, 1.1013, 1.1098, 1.0723, 1.2898, + 1.1376, 1.1546, 1.0668, 1.0175, 0.9574, 1.0351, 1.1479, 1.0740, 1.1223, + 1.1233, 1.1137, 1.1355, 1.1567, 1.0250, 1.1973, 1.0578, 1.1464, 1.1832, + 1.0966, 1.0225, 1.1626, 1.0823, 1.0742, 1.0992, 1.0003, 1.0753, 1.1261, + 1.1817, 1.2336, 1.2113, 1.0655, 1.0400, 1.0588, 1.0387, 1.1333, 1.0340, + 1.0439, 1.0065, 1.0902, 1.1669, 1.0910, 1.1070, 1.1481, 1.1906, 1.0798, + 1.1427, 1.1982, 0.9619, 1.2056, 1.1212, 1.1140, 1.0979, 1.1636, 1.1301, + 1.0640, 1.0920, 1.1086, 1.0510, 1.1392, 1.1131, 1.1167, 1.1602, 1.0656, + 1.1571, 1.0212, 1.1582, 1.1673, 1.1256, 1.0929, 1.2417, 1.1797, 1.0986, + 1.1346, 1.2516, 1.1074, 1.0885, 1.2958, 1.1755, 1.0924, 1.5735, 1.1752, + 1.1061, 1.2396, 0.9582, 1.1471, 1.0999, 1.1809, 1.0361, 1.0556, 1.1514, + 4.4042, 1.1847, 0.9946, 1.0696, 1.2486, 1.2341, 1.0585, 1.1061, 1.1108, + 1.0926, 1.1026, 1.2013, 1.1338, 1.1785, 1.0487, 1.1234, 1.1309, 1.1504, + 1.0983, 1.0937, 1.1015, 1.1060, 1.1004, 1.1597, 1.0480, 1.0293, 1.1476, + 1.1468, 1.0856, 1.1552, 1.1258, 1.1367, 1.0792, 1.1552, 1.2199, 1.0825, + 1.0273, 1.0731, 1.1292, 1.0727, 1.1044, 1.0763, 1.1227, 1.1270, 1.2005, + 1.1391, 1.2601, 1.0759, 0.9800, 1.0601, 1.0598, 1.1195, 1.1037, 1.1359, + 0.9948, 1.0792, 1.1656, 1.0665, 1.0727, 1.0877, 1.0512, 0.9722, 1.0822, + 0.9968, 1.0633, 1.0913, 1.1195, 1.1480, 1.0806, 1.1148, 1.1943, 1.1331, + 1.2171, 1.1391, 1.0036, 1.0460, 1.0607, 1.0629, 1.1052, 1.1523, 1.1389, + 1.0923, 1.0779, 1.2070, 1.1827, 1.1731, 1.1759, 1.0740, 1.1694, 1.0501, + 1.1126, 1.0367, 1.0730, 1.1285, 1.0258, 1.1576, 1.1315, 1.0176, 1.0823, + 1.1763, 1.2783, 1.0968, 1.2560, 1.1037, 1.1538, 1.1152, 1.0765, 1.0244, + 1.0208, 1.0763, 1.1061, 0.9937, 1.1177, 1.1626, 1.0887, 1.0125, 1.1349, + 1.1448, 1.1865, 1.0696, 1.1315, 1.2748, 1.0957, 1.2077, 1.0925, 1.0961, + 0.4362, 0.8269, 1.1157, 1.1701, 1.0544, 1.1217, 1.1581, 0.9604, 1.1525, + 1.0654, 1.0816, 1.0749, 0.9441, 1.0297, 1.1223, 1.1353, 1.1195, 1.0887, + 1.1435, 1.1358, 1.1909, 1.1669, 1.1307, 1.0546, 1.1769, 1.1563, 1.1995, + 1.0511, 1.0491, 1.0806, 0.9967, 1.1176, 1.0901, 1.0295, 1.1788, 1.1354, + 1.0126, 1.0746, 1.0824, 1.0353, 1.2085, 1.1083, 1.0764, 1.0563, 1.0498, + 1.1317, 1.1567, 1.1507, 1.0341, 1.1843, 1.0600, 1.0345, 1.1261, 1.1050, + 1.0773, 1.0134, 1.1439, 1.3424, 1.1876, 1.0634, 1.1512, 1.0344, 1.0040, + 1.1639, 1.0634, 1.1018, 1.2177, 1.0125, 1.1937, 0.8632, 1.2447, 1.1296, + 1.0644, 1.1010, 1.1356, 1.1632, 1.0518, 1.1655, 1.0770, 1.1398, 1.1057, + 1.2451, 1.1343, 1.1630, 1.1435, 1.0383, 1.1496, 1.1279, 1.2263, 1.0600, + 0.9016, 1.7528, 1.1242, 1.1495, 1.1949, 1.1088, 1.0201, 1.1023, 1.0593, + 1.1124, 1.0015, 0.9765, 1.1217, 1.0880, 1.0674, 1.0305, 1.1003, 1.0986, + 1.1196, 1.0115, 0.9515, 1.0412, 1.0447, 1.1111, 1.2276, 1.0558, 1.0263, + 1.1577, 1.1810, 1.1464, 1.1950, 1.1154, 1.1469, 1.0784, 1.1425, 1.1325, + 0.9748, 1.1008, 1.2164, 1.1759, 1.1185, 1.0715, 1.0538, 1.1341, 1.1335, + 1.1417, 1.0734, 1.1603, 1.1504, 1.0499, 1.0453, 1.1896, 1.0659, 1.1221, + 1.0744, 1.0764, 1.1328, 1.0917, 0.9529, 1.1041, 1.1418, 1.0918, 1.0488, + 1.0744, 1.1292, 1.1925, 1.0147, 1.1584, 1.1822, 1.0020, 1.1705, 1.0952, + 1.1119, 1.1396, 1.0830, 1.0932, 1.0332, 1.1715, 1.1709, 1.2496, 1.0912, + 1.1014, 1.0597, 0.9969, 1.1519, 1.0397, 1.1136, 1.1051, 1.2365, 1.0718, + 0.9613, 1.0971, 0.9209, 1.2023, 1.0763, 1.1217, 1.0613, 1.1238, 1.1573, + 1.1401, 1.2079, 1.0841, 1.1320, 1.0662, 1.0452, 1.0370, 1.1594, 1.1060, + 1.2394, 1.0779, 1.1139, 1.0556, 1.0827, 1.0480, 1.1069, 0.9860, 1.2285, + 1.1220, 1.2044, 1.0099, 1.1480, 1.2758, 1.1016, 1.0456, 1.0680, 1.0806, + 1.1920, 0.9602, 1.1260, 1.0967, 1.0894, 1.0856, 1.0547, 1.1553, 1.1625, + 1.1529, 1.0368, 1.1202, 1.2378, 1.0886, 1.2828, 1.1100, 1.0849, 1.1190, + 1.0355, 1.1792, 1.1415, 1.1741, 1.1189, 1.1158, 1.0776, 1.2510, 1.2408, + 1.0956, 0.9678, 1.2549, 1.1548, 1.1318, 1.1330, 1.0840, 1.0474, 1.0956, + 1.1239, 1.0163, 1.1235, 1.1863, 1.1862, 1.1465, 1.0752, 1.0207, 1.1893, + 1.2338, 1.1984, 1.0556, 1.0000, 1.1678, 1.0845, 1.6301, 1.1264, 1.0290, + 1.0311, 1.0892, 1.0607, 1.0999, 1.0946, 1.2030, 1.0882, 1.1124, 1.1719, + 1.1915, 1.0641, 1.0739, 1.0830, 1.1107, 1.1630, 1.1085, 1.1904, 1.0162, + 1.1530, 1.0941, 1.2059, 1.0574, 1.1017, 1.0775, 1.2459, 1.0807, 1.0607, + 1.2741, 1.1496, 1.2021, 1.0923, 1.0298, 1.1785, 1.0046, 1.0668, 1.1532, + 1.1207, 1.1642, 1.0606, 1.1673, 1.1646, 1.0249, 0.9916, 1.0798, 0.9993, + 0.9868, 1.0063, 1.1514, 1.3287, 1.0684, 1.0735, 1.1959, 1.1412, 1.2029, + 1.2507, 1.0586, 1.0936, 1.1616, 1.2078, 1.1429, 1.1928, 1.0335, 1.1191, + 1.1077, 1.0536, 1.1241, 1.2090, 1.1281, 1.1716, 1.1917, 1.0936, 1.2262, + 1.2455, 1.1533, 1.1363, 1.1134, 1.2211, 1.0558, 1.0892, 0.9781, 1.0907, + 1.0923, 1.1208, 1.1447, 1.0259, 1.0367, 1.2340, 0.3527, 1.1055, 1.0547, + 1.1139, 1.0485, 1.1752, 1.1343, 1.1493, 1.1050, 1.0833, 1.0571, 1.1405, + 1.1860, 1.1253, 1.0112, 1.0885, 1.1551, 1.0193, 1.0367, 1.0937, 1.2202, + 1.1137, 1.0781, 1.0423, 1.0075, 1.1260, 1.1049, 1.1311, 1.0788, 1.1301, + 1.1468, 1.0984, 1.0819, 1.0745, 1.1147, 1.0569, 1.1257, 1.1980, 1.2418, + 1.1304, 1.0243, 1.1301, 1.1706, 1.1504, 1.0834, 1.0783, 1.1954, 1.1526, + 1.1944, 1.0511, 1.1842, 1.1555, 1.0770, 1.0528, 1.0551, 1.1226, 1.1294, + 1.0864, 1.1444, 1.0887, 1.0853, 1.2392, 1.0416, 1.0600, 1.1678, 1.1766, + 1.1449, 1.1450, 1.2752, 1.1177, 1.1140, 1.1532, 1.1915, 1.1941, 1.0838, + 1.1238, 0.9961, 1.0554, 1.1288, 1.1269, 1.1024, 1.1493, 1.1472, 1.1067, + 1.1309, 1.0237, 1.1736], device='cuda:1', requires_grad=True)Parameter containing: +tensor([ 1.2977e-01, 3.4533e-02, -1.8398e-01, 1.2173e-02, 5.4290e-02, + 7.0416e-02, 6.9125e-02, 6.3581e-02, -6.3971e-02, 2.2436e-02, + 3.0890e-02, -2.6753e-02, -7.2576e-02, -1.1489e-02, -3.9083e-03, + -2.5749e-02, -4.7372e-03, 3.9891e-02, -4.1990e-02, 2.9299e+00, + -6.0782e-02, 1.1301e-03, 2.0187e-02, 3.1748e-03, -4.5030e-02, + -2.1860e-02, -1.3268e-02, 1.2331e-02, 1.7767e-01, -1.1032e-01, + 6.1784e-02, 1.7215e-02, 2.1386e-03, 3.7935e-02, 1.9422e-02, + -5.7784e-02, 1.7874e-02, -5.3821e-02, -1.6726e-01, -1.2773e-04, + 2.2345e-02, 3.6149e-02, -9.7282e-03, -2.8852e-03, -1.4355e-02, + 7.7343e-03, 5.9046e-02, -6.3718e-02, 3.3771e-02, 5.9758e-02, + 5.2067e-02, -2.4261e-02, 6.2351e-02, 2.6544e-02, 6.7204e-02, + 5.9771e-03, -8.1034e-02, -3.6046e-02, 7.0604e-02, -1.8416e-02, + 1.0626e-02, -3.5582e-02, 2.1963e-02, 3.6199e-02, 3.9028e-02, + 1.8032e-02, 4.5988e-02, 1.1960e-01, -4.5342e-02, 8.4323e-02, + -2.4700e-03, 2.0580e-02, 1.5001e-01, -4.6368e-02, -8.9924e-02, + 3.1414e-02, 4.3287e-04, -6.4596e-02, -3.9071e-02, 3.8661e-02, + -9.9305e-03, 3.0970e-03, -1.4900e-02, 5.3587e-02, -1.1936e-02, + 9.2194e-02, 3.4322e-02, 6.0066e-02, -1.2689e-02, -1.6213e-03, + -3.5178e-02, 2.7978e-02, 2.1645e-02, -3.3493e-02, -9.2206e-03, + -8.8815e-03, -1.5690e-03, -5.0180e-02, -8.9474e-03, 4.4740e-02, + 5.0755e-02, 1.8484e-01, 3.5222e-02, -6.9127e-02, 2.0810e-02, + 2.0733e-02, 1.6405e-02, 3.1962e-04, 6.9797e-03, -2.8417e-02, + -5.4037e-02, -3.7544e-02, 6.0598e-02, 3.6609e-02, 6.1370e-02, + -2.5203e-02, -3.8131e-02, -5.4255e-02, 3.5124e-02, 5.2478e-02, + -7.8859e-02, -1.1023e-01, -5.7933e-02, -2.1626e-02, -1.1320e-01, + 1.1809e-02, -7.8021e-02, 6.3488e-03, -1.6868e-02, -5.0843e-02, + -7.9675e-02, 7.3526e-02, 5.4632e-02, 3.0480e-03, -4.9357e-02, + -7.2188e-03, 4.4292e-03, -5.3419e-03, -2.6700e-02, -3.8512e-02, + 2.4960e-02, 1.5461e-02, 2.2325e-03, -7.1685e-02, 2.8958e-02, + -1.4240e-02, 3.5438e-02, -4.7604e-02, -2.5415e-02, 6.7599e-02, + 1.6583e-01, -4.5225e-02, -7.2389e-03, -1.5869e-02, -4.5997e-02, + -1.9443e-02, 1.2420e-02, -8.4762e-02, 3.5484e-03, -1.5639e-02, + 9.2677e-02, -1.1708e-01, -1.6392e-02, -2.0767e-01, 3.2551e-02, + 9.6369e-03, -7.5081e-02, 4.5103e-02, -5.6623e-02, 5.0668e-03, + -1.5549e-02, 2.9004e-02, 8.0657e-02, 1.4856e-02, 2.5327e-02, + -3.2253e-02, 2.4493e-02, 2.5677e-02, -2.6763e-02, -3.0572e-02, + 2.1989e-02, -6.9528e-02, 8.1583e-02, -2.7302e-02, 2.2262e-02, + -1.3821e-01, 2.7482e-02, 1.3539e-01, -2.5331e-02, -9.1998e-02, + 3.9943e-02, 2.4111e-02, 3.1964e-02, 4.0118e-02, -6.6720e-02, + -2.8206e-02, -1.7699e-02, -1.0273e-01, -7.2117e-01, 2.6851e-02, + -1.3055e-01, 2.8005e-02, 2.8573e-04, 4.3966e-02, 4.6126e-02, + 1.1302e-01, 1.7980e-02, 6.0565e-02, 7.2146e-02, 2.3062e-02, + 3.7915e-03, 1.7058e-01, -1.2183e-02, 1.1138e-02, -3.0831e-02, + -1.5247e-02, -2.0833e-02, 5.8323e-02, -5.0730e-02, -9.2447e-02, + 8.0505e-03, 3.1202e-02, -6.0900e-02, -6.7741e-02, 4.0749e-02, + -6.8300e-02, 3.5966e-03, -4.5552e-02, 6.6899e-02, -1.6725e-02, + 5.5230e-03, 1.6007e-02, 4.6567e-02, 1.9058e-02, -4.2146e-02, + 4.0774e-02, -3.5646e-02, 6.9086e-02, 6.9511e-02, 1.3817e-02, + 3.6827e-02, 5.5455e-02, 3.5128e-02, 2.2640e-02, 2.5301e-02, + 3.7564e-02, 6.2913e-02, 1.1184e-01, 4.4412e-02, -4.5695e-02, + 3.3184e-02, 9.0154e-02, 9.1126e-02, -6.9666e-02, 1.1205e-01, + -3.3497e-02, 2.1879e-02, -1.0857e-01, -5.0966e-03, -2.1120e-02, + -1.6438e-02, 1.3116e-02, 1.7569e-02, -4.8693e-02, -3.2089e-02, + 2.1517e-02, -8.8201e-02, 1.0595e-02, -1.0550e-02, -6.0092e-02, + 8.4827e-03, 4.6030e-02, 5.5492e-02, -3.3597e-02, 5.8026e-04, + 6.9805e-02, -7.8620e-02, -5.8343e-03, 4.6980e-02, -5.9122e-02, + 5.9284e-02, 1.1318e-01, -5.5452e-02, 4.8808e-02, 2.4126e-02, + 2.5154e-02, 5.0583e-03, 2.2864e-04, 6.1358e-02, -8.5796e-02, + -9.8663e-03, -1.4949e-02, 4.5410e-02, 2.3115e-02, 2.4214e-02, + -2.1457e-02, 6.9584e-02, 1.6610e-02, 7.6730e-03, 1.4294e-02, + 1.4369e-02, 1.2196e-02, -1.2681e-02, 6.1613e-03, 1.9278e-02, + -1.4352e-02, 4.2145e-02, -1.1340e-01, 6.2743e-02, -6.3384e-02, + 1.6000e-02, -2.0184e-02, 2.0241e-02, -6.9196e-02, 3.1063e-02, + -1.2378e-01, -3.9613e-03, 1.5876e-02, 7.3869e-02, -3.9551e-02, + -1.2366e-02, 1.5304e-01, -5.1567e-02, 2.8741e-03, -4.6380e-01, + -1.3505e-05, 5.2405e-02, -1.0747e-02, 8.6793e-03, -5.3858e-02, + 1.2035e-01, -7.1143e-02, 4.2165e-02, 4.5190e-04, 4.4525e-02, + 7.1364e-02, -1.9798e-02, 1.1052e-02, -5.2190e-02, 7.9460e-03, + -6.8076e-02, -1.7711e-02, -6.1290e-02, -1.2022e-02, 4.6646e-02, + -1.8673e-02, 6.7254e-02, 4.9145e-02, -1.0084e-03, -4.2727e-02, + 1.5223e-02, 1.3736e-02, 2.6416e-02, 4.2571e-02, -9.0382e-03, + -5.3650e-02, -3.2191e-02, -2.2234e-02, 2.5080e-02, -4.0795e-02, + 4.0941e-02, -1.2991e-02, -2.1576e-02, -5.1419e-02, 1.0806e-02, + -2.5446e-02, 5.2107e-02, 4.3128e-02, -3.1123e-02, 6.3914e-03, + 6.4913e-03, -5.8185e-03, 1.7926e-03, -2.6158e-03, -2.5587e-02, + 5.5986e-02, 8.1868e-02, 2.9394e-02, 1.6433e-02, -2.4356e-02, + -4.0253e-02, 7.8868e-02, 1.7375e-02, 1.4129e-02, -4.3368e-02, + -4.3876e-02, -1.3003e-02, -4.8500e-02, -3.6869e-02, -4.2772e-02, + -7.1692e-03, -1.4630e-01, -3.6647e-02, -7.6028e-02, 9.2405e-02, + 5.4635e-03, 3.8962e-02, 3.1438e-02, 2.9575e-03, 9.4233e-03, + 7.2004e-02, -3.4432e-02, 4.5634e-02, -8.9947e-03, -7.8658e-02, + -9.7364e-02, -7.2371e-02, -3.9383e-02, -3.5334e-02, -1.1011e-02, + -1.1150e-02, 8.6494e-03, 5.0095e-02, -2.7079e-02, -7.5610e-02, + -2.1079e-01, 2.8796e-02, 1.3110e-02, -3.3392e-02, -4.5108e-02, + 4.0402e-04, -2.8353e-02, 3.7843e-02, 1.1853e-02, 2.7821e-02, + 4.5922e-02, -1.2891e-01, -7.2979e-02, 2.3094e-02, 1.4984e-02, + 1.5402e-02, -7.1183e-02, -4.1936e-03, 3.6530e-02, -4.5188e-02, + 2.7603e-02, 2.5460e-02, 5.8202e-02, 8.3031e-02, -4.8382e-02, + 3.7847e-02, -6.2521e-02, -2.2624e-04, -7.8755e-02, 8.5045e-02, + -9.0725e-02, 3.5782e-02, -3.7057e-02, 1.0535e-01, 2.0793e-03, + 1.2219e-02, 8.5230e-03, -7.6473e-02, 1.6504e-02, 3.2562e-02, + -4.8595e-02, 5.7389e-02, 1.1120e-02, -3.3858e-02, -1.0108e-01, + 2.3178e-02, 4.0618e-02, -2.3842e-02, 2.9851e-02, -4.7033e-02, + -5.0536e-02, 4.2567e-02, -7.8787e-02, 3.5106e-02, 1.1993e-03, + -1.4537e-02, -5.1278e-02, -2.4576e-02, 4.4611e-02, -5.3965e-02, + 3.4502e-02, 7.4910e-02, 3.1964e-02, -3.4592e-02, 8.4762e-02, + 6.6939e-02, -7.5200e-02, 4.3292e-02, 1.6985e-01, -6.6695e-02, + 2.4553e-02, -7.5039e-03, 5.2509e-02, 8.2030e-02, 2.4376e-02, + 4.2058e-03, 2.3543e-02, 6.1570e-03, -1.6743e-02, -9.7259e-02, + 2.4244e-02, -2.2547e-02, 2.2843e-02, -4.1545e-02, -1.6013e-02, + 8.3000e-03, -8.5513e-02, 4.4356e-02, 5.1328e-03, -1.9995e-02, + 7.8072e-02, 2.3570e-02, 2.9447e-02, 7.6935e-02, 9.6576e-03, + -6.4136e-02, 3.2867e-02, -4.5160e-02, 3.3648e-02, -1.7392e-02, + 5.8089e-03, 2.4739e-02, 1.0378e-01, 9.3410e-03, -5.3627e-02, + -3.4343e-02, -2.6351e-02, -2.9343e-02, -2.2353e-02, -4.7763e-02, + -1.1366e-02, 5.2010e-04, -4.6743e-03, 1.6199e-02, -7.8063e-02, + 1.9897e-04, 2.7052e-02, 4.3959e-02, -2.7493e-02, 1.4008e-02, + -4.9083e-02, -6.6418e-02, -1.7566e-02, -6.1558e-02, -3.6589e-03, + -3.0492e-02, 2.8641e-02, -5.1314e-02, 3.1308e-02, -4.2798e-02, + 4.9607e-02, 1.6081e-02, 8.1581e-03, 6.2736e-02, 4.4093e-02, + 3.3409e-02, -6.3776e-03, -1.1995e-02, 5.1635e-02, 2.1120e-02, + -1.7519e-02, 7.2731e-02, -1.4197e-02, 3.4120e-02, 8.4901e-02, + -7.1668e-02, 4.6460e-02, 2.7229e-02, 1.6466e-02, 1.3243e-01, + 6.2105e-02, 2.1720e-02, 3.3344e-03, -1.4666e-02, 3.0143e-02, + 4.9758e-02, 3.8548e-02, 1.2586e-02, -1.0586e-02, 4.7062e-02, + 1.7897e-02, 1.9226e-02, 2.5062e-02, 4.5023e-02, -3.5047e-02, + -1.0945e-01, 1.2781e-02, 3.8052e-02, 8.2197e-02, 2.9414e-02, + 1.9449e-02, 3.5261e-02, -6.2046e-02, -2.6565e-02, -2.5523e-02, + -6.0374e-03, 7.5064e-02, 5.9772e-02, 7.9446e-03, 5.8369e-02, + -2.9503e-03, 1.9437e-02, -1.6286e-02, -1.0209e-02, 4.0471e-02, + -5.8021e-02, -1.2354e-01, 2.2840e-02, -2.4538e-02, -3.2923e-02, + 1.1068e-03, -4.9417e-02, -2.2733e-01, 2.4994e-02, -3.4969e-02, + 6.0766e-02, -5.1679e-03, -2.4937e-02, -6.0451e-03, -3.3841e-02, + -5.4637e-02, 4.4074e-02, 8.5792e-02, 4.7601e-02, -1.5414e-01, + 2.0098e-02, 1.3737e-04, 3.5567e-02, 2.6228e-02, 1.7048e-02, + 2.6366e-03, 1.0959e-02, 1.1537e-03, 3.4626e-02, -2.3026e-02, + 6.3907e-02, 4.3722e-02, 7.6725e-02, 9.1198e-02, 2.5575e-02, + 4.8980e-02, -6.5434e-02, -1.2302e-02, 1.1802e-02, -7.0467e-02, + -4.3942e-02, -6.8340e-02, 6.6216e-02, -3.8507e-02, 2.3285e-02, + 4.4583e-02, 4.8039e-02, 6.5735e-02, -1.2380e-02, 2.7694e-02, + 6.3951e-02, -2.0262e-02, 6.0663e-03, 4.8698e-02, 2.6361e-03, + 1.9225e-02, -8.2180e-02, -4.2692e-02, 1.9436e-02, 6.2999e-02, + -9.6810e-02, -2.8198e-02, -3.4410e-02, -3.2170e-03, 5.5013e-02, + 8.0327e-03, -6.7895e-02, -6.1232e-02, 4.2554e-02, -2.2628e-02, + -1.9596e-01, 2.4340e-02, -3.6419e-03, -1.3354e-02, -4.5575e-02, + 6.4358e-02, -6.6430e-02, 1.7512e-02, 1.6014e-03, 4.6003e-02, + -3.3494e-02, -3.2714e+00, -1.9003e-02, 1.7152e-02, -2.3923e-02, + -8.8490e-02, -1.3093e-02, -1.1635e-02, -4.9877e-03, -2.2200e-02, + -5.4845e-02, -2.5154e-02, -2.6730e-02, 4.0258e-03, -6.4070e-02, + 5.7675e-03, -5.2886e-02, -1.0505e-01, 2.4540e-02, -7.0383e-02, + 6.6775e-02, 7.7122e-02, 7.9000e-03, -1.3582e-02, -1.0646e-01, + -1.6877e-03, 7.0112e-02, -8.1721e-03, 3.1691e-02, 1.2633e-02, + -1.3226e-02, 1.1874e-01, 6.4461e-02, 2.3030e-02, -1.8555e-03, + -7.8534e-03, -9.0699e-03, 4.2436e-02, -4.7123e-02, 4.4143e-03, + 4.1099e-02, -1.3332e-02, 2.2844e-02, -8.3630e-02, 3.2066e-02, + 5.8189e-02, -1.0272e-01, 9.2019e-02, 1.7425e-03, 6.0141e-02, + 4.8672e-02, 3.7271e-02, 1.6245e-02, 5.3098e-02, 2.1186e-03, + 2.7736e-02, -4.9052e-02, -4.4594e-02, -2.6920e-02, -3.5221e-02, + 8.8737e-02, -8.3154e-02, -7.8560e-02, 4.0014e-02, -3.7013e-03, + 5.5303e-02, -3.8455e-02, 1.6727e-02, 4.0027e-02, -7.6386e-02, + -2.0709e-02, -1.9373e-02, -6.6888e-02, -2.6983e-02, 5.3923e-02, + 2.7105e-02, -4.4220e-02, 8.9578e-02, -4.0661e-02, -6.4093e-02, + -1.7248e-02, 5.8892e-02, 5.3766e-02, -9.9480e-02, -6.1721e-02, + -4.2307e-02, 1.8172e-02, 2.4058e-03], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[ 0.0344, 0.0046, 0.0019, ..., -0.0018, 0.0054, -0.0178], + [-0.0100, 0.0007, 0.0120, ..., -0.0170, 0.0012, -0.0188], + [-0.0169, 0.0147, -0.0102, ..., 0.0031, -0.0298, 0.0021], + ..., + [ 0.0024, 0.0114, 0.0381, ..., 0.0197, -0.0068, 0.0028], + [-0.0170, -0.0138, 0.0048, ..., 0.0125, -0.0223, 0.0095], + [-0.0003, -0.0298, -0.0086, ..., -0.0083, 0.0122, -0.0196]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.2842, -0.3364, 0.0483, ..., -0.4465, -0.3184, -0.2751], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0024, 0.0274, 0.0246, ..., 0.0208, 0.0061, 0.0094], + [-0.0033, 0.0003, -0.0214, ..., 0.0064, 0.0232, 0.0025], + [-0.0203, 0.0050, -0.0124, ..., 0.0002, -0.0194, -0.0300], + ..., + [-0.0107, 0.0104, 0.0014, ..., 0.0129, -0.0087, 0.0057], + [-0.0105, -0.0092, 0.0100, ..., 0.0361, -0.0151, -0.0012], + [ 0.0143, 0.0048, 0.0093, ..., 0.0324, -0.0147, -0.0111]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 2.6382e-02, 3.8788e-02, -3.9490e-02, 1.4725e-02, 9.3918e-03, + 1.0445e-02, -6.1737e-02, 3.3844e-02, 5.9265e-02, -1.2260e-02, + 4.4891e-02, -4.0771e-02, -6.4583e-03, -2.7496e-02, 3.0258e-02, + -5.0850e-03, 7.0000e-03, -2.2263e-02, -2.8976e-02, -1.4990e-01, + 1.4435e-02, -6.6566e-03, 8.3923e-02, 1.2138e-02, 3.6194e-02, + 9.5062e-03, 1.7868e-02, -9.9373e-04, 1.6174e-01, -1.4526e-02, + 3.1921e-02, 3.1769e-02, 1.8433e-02, 2.9099e-02, 2.5482e-02, + -5.1941e-02, 6.6162e-02, -1.1358e-03, -9.9945e-03, -2.1439e-02, + -3.4546e-02, -2.1957e-02, -5.6396e-02, 7.5836e-03, -2.0172e-02, + 4.1412e-02, 3.6560e-02, 2.5604e-02, 2.8915e-02, -2.9007e-02, + 5.4413e-02, -5.9128e-03, 2.4986e-03, 3.2684e-02, 4.9347e-02, + -2.2034e-02, -3.1586e-02, -8.4534e-03, 2.2095e-02, 1.0857e-02, + -2.1271e-02, -3.2949e-04, 4.3144e-03, -2.1332e-02, 1.5564e-02, + -2.1744e-02, 1.0406e-02, 6.1798e-02, 1.3367e-02, -2.4567e-02, + -2.5330e-03, 1.4069e-02, 4.8737e-02, -9.3994e-03, -6.3843e-02, + 1.5808e-02, 1.1917e-02, 2.9114e-02, -3.3813e-02, -6.1531e-03, + 3.9368e-03, -9.1614e-02, -2.0386e-02, -4.3488e-02, 8.3389e-03, + 3.0151e-02, 1.6449e-02, -4.2114e-03, -3.4271e-02, -2.9683e-04, + -1.4572e-02, -1.3908e-02, -6.4812e-03, -1.8646e-02, 7.5035e-03, + 3.2166e-02, 1.9150e-02, -1.2856e-02, 3.4241e-02, -7.5806e-02, + -2.2964e-02, 1.0046e-01, -3.9246e-02, -3.3386e-02, -2.4776e-03, + -5.4626e-03, 3.8849e-02, 6.4453e-02, 1.2329e-02, -9.5415e-04, + -4.9858e-03, -3.2730e-03, 5.3215e-03, 1.9150e-02, 6.1554e-02, + -3.2684e-02, -5.8594e-02, -3.1555e-02, 2.5665e-02, -4.1260e-02, + 4.7668e-02, 2.7023e-02, 1.1060e-01, 6.0425e-02, 1.0864e-02, + 1.5717e-02, 9.3155e-03, -3.3112e-02, 2.0508e-02, -7.3471e-03, + 3.4924e-03, 3.2562e-02, 5.3375e-02, 2.5955e-02, 3.5339e-02, + 4.8676e-02, 3.1464e-02, 2.3834e-02, -3.1647e-02, 7.5607e-03, + -2.0996e-02, -1.4824e-02, -1.5793e-02, -4.7180e-02, -1.9836e-03, + 9.1410e-04, 2.6779e-02, -1.4099e-02, -2.5208e-02, -1.8509e-02, + -4.6844e-03, -5.9700e-03, 1.4229e-02, -8.0872e-03, -2.7222e-02, + 2.1942e-02, 1.4473e-02, 2.4582e-02, 1.5747e-02, 1.8356e-02, + -3.4668e-02, 9.4376e-03, -1.3405e-02, -7.5912e-03, -2.1240e-02, + -1.2489e-02, -3.8452e-02, -4.0710e-02, -4.2297e-02, -2.2614e-02, + -7.8613e-02, 1.4809e-02, 3.0228e-02, -5.7037e-02, 5.4565e-02, + 3.1738e-02, 2.1687e-03, 5.0659e-02, 3.1261e-03, 5.6114e-03, + 4.1138e-02, 4.2633e-02, -3.4027e-02, -4.6692e-02, 1.8311e-02, + -1.1124e-02, 2.0264e-02, -1.9775e-02, 2.8580e-02, 5.5170e-04, + 6.7568e-04, 4.3907e-03, 3.4088e-02, -4.0771e-02, 3.2318e-02, + -3.0090e-02, -4.9683e-02, -9.3918e-03, -9.6130e-02, -3.5370e-02, + -2.3651e-02, 8.7585e-03, -1.2131e-02, 8.1711e-03, -5.9113e-02, + 2.9007e-02, -1.7853e-03, -1.6495e-02, -1.4244e-02, -4.5654e-02, + 5.2551e-02, -5.3528e-02, 2.9312e-02, -2.1629e-03, -8.3557e-02, + -4.4922e-02, 4.2023e-02, -1.6342e-02, 1.0269e-02, -9.5139e-03, + 1.3206e-02, 3.4515e-02, -3.6621e-02, -1.7471e-02, 3.8605e-02, + 1.9806e-02, -3.6652e-02, -4.8248e-02, -3.5614e-02, -1.7563e-02, + 4.3823e-02, 2.7390e-02, -1.6220e-02, -3.8483e-02, -7.6355e-02, + 2.3300e-02, 1.8158e-02, -1.2329e-02, 1.6434e-02, -1.2016e-02, + -6.6566e-03, -6.5735e-02, 6.1569e-03, 1.7715e-02, 1.4160e-02, + -2.5757e-02, 7.2365e-03, 1.5454e-01, -5.0735e-03, -5.3375e-02, + 4.7577e-02, 8.2855e-03, -4.5197e-02, 1.9684e-02, 1.8723e-02, + 3.1006e-02, 2.0599e-02, 4.3243e-02, -4.3823e-02, -4.9011e-02, + -1.9409e-02, 1.7181e-02, 3.3356e-02, -6.6711e-02, -5.2277e-02, + 4.2908e-02, -4.3373e-03, -1.8585e-02, -1.3092e-02, 9.3384e-03, + -3.2135e-02, -3.2898e-02, 6.1615e-02, -2.2934e-02, -3.6865e-02, + -2.4246e-02, 6.7200e-02, -3.8330e-02, -2.7725e-02, -2.3865e-02, + 2.1072e-02, 4.9530e-02, 6.6986e-03, -1.8402e-02, 1.9516e-02, + -1.5631e-03, 4.5868e-02, -2.7618e-02, -3.1555e-02, -4.6005e-03, + -1.1055e-02, 2.8976e-02, -1.4557e-02, -1.5350e-02, -1.6739e-02, + 2.7725e-02, -4.0466e-02, 2.0466e-03, -5.3833e-02, 2.7725e-02, + -4.4983e-02, -6.2675e-03, -8.6594e-03, 1.0460e-02, 6.8542e-02, + -5.1361e-02, 3.8727e-02, -1.6266e-02, -6.6757e-03, -6.0089e-02, + -3.4119e-02, 4.9377e-02, -4.2633e-02, 2.1225e-02, -4.9255e-02, + -1.8417e-02, -2.4338e-02, -2.1500e-02, 1.9897e-02, -2.2430e-02, + 1.3367e-01, 1.7975e-02, -3.0258e-02, 1.3741e-02, 1.2725e+00, + 3.6407e-02, -6.1523e-02, -2.0874e-02, 1.5045e-02, 9.4652e-04, + 4.9347e-02, 2.6367e-02, 3.5522e-02, -6.5613e-03, 4.9011e-02, + 3.9398e-02, 3.3051e-02, -1.3000e-02, 1.8005e-02, 7.5134e-02, + -5.1331e-02, 3.3447e-02, -3.4363e-02, -8.3084e-03, -5.2399e-02, + -1.8814e-02, 1.7899e-02, -7.4585e-02, 3.5217e-02, -4.7340e-03, + 3.0380e-02, -6.2042e-02, -2.7985e-02, -1.7410e-02, -2.3575e-02, + -3.9558e-03, 4.7760e-03, 2.8534e-02, -2.9129e-02, -3.7231e-02, + 1.6571e-02, 4.0680e-02, 3.4760e-02, -8.8501e-03, -8.6129e-05, + 3.5896e-03, 5.3406e-03, -4.6844e-03, -1.2199e-02, -6.5857e-02, + 4.1695e-03, -3.2959e-02, 1.4992e-02, -2.4643e-02, -6.1279e-02, + 6.1859e-02, 2.4109e-02, -4.8798e-02, 5.0812e-02, 3.2471e-02, + -2.4582e-02, 5.6427e-02, -2.0203e-02, 3.5309e-02, -3.2562e-02, + -1.9012e-02, -1.5717e-02, -3.3875e-02, 1.2756e-02, 6.0081e-03, + 1.4915e-02, -1.3599e-01, -3.0975e-02, -2.1469e-02, 9.8953e-03, + -2.6417e-03, -4.9162e-04, -3.3325e-02, 1.4587e-02, 1.0902e-02, + 6.6833e-02, -3.6591e-02, -3.1342e-02, 2.7847e-02, -2.5330e-02, + 9.3460e-03, 2.7695e-02, -2.6413e-02, -3.3264e-02, 1.6281e-02, + 7.5722e-03, 2.1164e-02, -4.0619e-02, 1.8555e-02, 2.2858e-02, + -9.9548e-02, -1.0910e-02, -1.8167e-03, 1.4282e-02, -3.1647e-02, + -3.6804e-02, -1.3138e-02, 2.3308e-03, 2.8671e-02, 3.0426e-02, + -5.7922e-02, 2.6340e-03, 1.5732e-02, 2.9373e-02, -2.4246e-02, + -1.2260e-02, 3.4607e-02, 1.7868e-02, 3.2825e-03, -3.2928e-02, + 2.4643e-02, 5.6610e-02, 6.8779e-03, -6.8909e-02, 8.5571e-02, + 8.1718e-05, 2.9480e-02, 4.8370e-02, -1.5967e-01, -1.3496e-02, + 5.0781e-02, 1.3247e-03, -1.0094e-02, 1.5640e-02, -6.2332e-03, + 1.5610e-02, 9.1858e-03, -1.7349e-02, 1.2405e-02, 3.3447e-02, + 1.5305e-02, 2.1362e-02, -3.0045e-02, 7.9575e-03, -5.5878e-02, + -2.7985e-02, -1.7838e-02, -2.4002e-02, -8.2779e-03, -5.5504e-03, + 4.4189e-02, 1.4275e-02, 4.2419e-02, -1.3809e-02, 2.4109e-02, + 4.2381e-03, -1.3628e-03, 2.9755e-02, -1.9045e-03, 5.3711e-02, + 8.7509e-03, -1.8759e-03, 6.0394e-02, -6.9702e-02, 1.0878e-04, + -3.5405e-04, -2.3518e-03, -2.9907e-03, 2.0813e-02, 6.1768e-02, + -5.4871e-02, 7.3120e-02, 6.2317e-02, -1.2886e-02, -7.6675e-03, + 6.4636e-02, 4.2725e-02, 2.7344e-02, 2.7428e-03, 2.3163e-02, + -5.4932e-02, 2.2068e-03, 8.9798e-03, -1.8127e-02, -1.5007e-02, + -2.4323e-02, -3.9337e-02, -2.9999e-02, -2.2545e-03, -3.8853e-03, + 5.4962e-02, -2.0828e-02, -2.2125e-02, 1.5736e-03, 3.0167e-02, + -2.5673e-03, -7.9163e-02, 4.0924e-02, 3.6883e-04, 3.6804e-02, + -1.3237e-02, 2.9697e-03, 1.9150e-02, -4.1016e-02, -4.3701e-02, + 2.6581e-02, -2.0020e-02, -1.6772e-01, -4.7394e-02, 2.4277e-02, + 6.9008e-03, -1.0155e-02, 1.4351e-02, 3.6438e-02, -4.9377e-02, + -4.8409e-03, 3.4149e-02, 2.7313e-02, -2.9572e-02, -3.3112e-02, + -5.9845e-02, 2.7313e-02, -3.5461e-02, 2.0905e-02, 5.1270e-02, + -3.5248e-02, -5.9723e-02, 3.2158e-03, 3.7109e-02, -1.0297e-01, + 6.2439e-02, -1.8906e-02, 1.1749e-02, 5.1544e-02, -2.0386e-02, + 5.8563e-02, 4.2877e-02, -3.7689e-02, 4.1199e-03, -5.0964e-02, + 2.1423e-02, -5.1697e-02, -9.2468e-02, 5.4016e-02, 3.0823e-02, + -2.9541e-02, 2.8229e-02, 4.6661e-02, -2.6871e-02, 1.9348e-02, + -1.6891e-02, 2.2781e-02, -5.3444e-03, -2.2461e-02, 2.5009e-02, + -3.4332e-02, -9.6863e-02, -3.2379e-02, 1.2527e-02, -4.8737e-02, + -4.1260e-02, -1.0605e-02, -2.6459e-02, 1.7792e-02, 1.6296e-02, + -3.8452e-03, 4.3274e-02, -5.2582e-02, -2.5513e-02, 2.2491e-02, + -1.9272e-02, -4.9652e-02, -3.3142e-02, -2.8183e-02, -7.0992e-03, + -4.8752e-03, -1.1902e-02, -3.9062e-03, 2.5681e-02, -1.3130e-02, + -2.2751e-02, 7.4402e-02, 7.3662e-03, -4.3488e-03, 2.6505e-02, + -5.1178e-02, -2.8885e-02, -2.3651e-02, 3.8483e-02, 6.0501e-03, + -2.3956e-02, -6.5727e-03, -1.4000e-03, 2.5726e-02, -2.3315e-02, + -1.4435e-02, 8.8348e-03, -2.9083e-02, -5.4871e-02, 1.7960e-02, + 5.2765e-02, 1.8509e-02, 1.7014e-02, 6.7566e-02, -1.2280e-01, + 1.5450e-02, 1.2482e-02, 4.9622e-02, -6.4392e-02, -4.1229e-02, + -2.8473e-02, 1.1497e-02, -1.0080e-03, 4.4098e-02, -7.1716e-03, + -9.2468e-03, -1.5945e-02, 5.2429e-02, -2.1103e-02, -2.1332e-02, + 1.1314e-02, -2.3132e-02, 1.9638e-02, 9.2468e-03, 2.2602e-03, + -2.0538e-02, -6.8169e-03, -5.4047e-02, -5.6366e-02, 6.0959e-03, + 3.7567e-02, 6.7444e-03, 7.8735e-03, 7.1899e-02, 1.2878e-02, + -5.0140e-02, -1.4999e-02, 8.0566e-03, -2.7252e-02, -9.1400e-03, + 2.3514e-02, -2.5391e-02, 2.5883e-03, -9.8114e-03, 1.1627e-02, + 1.1917e-02, -2.7130e-02, -5.7892e-02, 1.5907e-03, 8.6487e-02, + 9.9564e-03, -2.2736e-02, -1.4168e-02, 1.7029e-02, 4.1260e-02, + -6.8054e-03, 1.3733e-03, 1.6312e-02, 1.1803e-02, -2.8137e-02, + 6.4453e-02, 3.0670e-02, -1.2177e-02, -3.4973e-02, -4.0779e-03, + -2.6901e-02, -7.7477e-03, 5.8411e-02, 1.8631e-02, 6.4758e-02, + 2.3270e-02, 8.6060e-02, -3.5370e-02, -2.8580e-02, -2.3117e-02, + -1.4046e-02, -1.1757e-02, 7.2876e-02, -2.0523e-02, -4.0344e-02, + -6.5186e-02, 4.2206e-02, 8.0795e-03, -1.8448e-02, -2.6596e-02, + -2.4967e-03, 2.4490e-03, 2.6062e-02, -1.8570e-02, -2.8748e-02, + 1.5305e-02, 1.9318e-02, 2.8839e-02, 3.0956e-03, -1.1917e-02, + -6.3972e-03, 5.5084e-02, 8.3542e-03, -4.0070e-02, 2.5085e-02, + -3.1372e-02, 1.6403e-02, -2.3087e-02, -3.0289e-03, -4.4281e-02, + 5.6946e-02, 3.4912e-02, -2.2324e-02, -3.3627e-03, 9.8705e-04, + 2.9251e-02, 1.0345e-02, -1.3954e-02, -2.2217e-02, -1.3290e-02, + 3.2288e-02, -2.3560e-02, -2.2995e-02, 5.7709e-02, -1.8967e-02, + -1.1040e-02, -3.9490e-02, 1.2451e-02, -1.3283e-02, 2.0813e-02, + -1.8377e-03, -1.5808e-02, 2.8564e-02, -3.5839e-03, -2.7176e-02, + -2.0081e-02, -2.1815e-04, -2.1378e-02, -9.3317e-04, 3.3234e-02, + -3.9917e-02, -3.8116e-02, -3.2074e-02, -8.1406e-03, 1.5671e-02, + -2.3865e-02, -1.2421e-01, 3.7231e-02, -1.7975e-02, 4.6875e-02, + 2.1973e-02, -2.2369e-02, 3.1281e-02, 3.8300e-03, 1.0979e-02, + -2.8793e-02, -1.0155e-02, -1.5823e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.8660, 1.7784, 1.8131, 1.9032, 1.8201, 1.8358, 1.9317, 1.9030, 2.0080, + 1.7371, 1.9171, 1.7857, 1.9481, 1.8647, 1.9881, 1.9470, 1.9653, 1.8904, + 1.9133, 2.0480, 1.8218, 1.8158, 1.8040, 1.7625, 1.8326, 1.8251, 1.8112, + 1.8591, 2.3300, 1.8212, 1.8430, 1.8733, 1.8304, 1.9012, 2.0362, 1.8383, + 1.9599, 1.7815, 1.6626, 1.9035, 1.8171, 1.7817, 1.9178, 1.8311, 2.0381, + 1.8704, 1.8629, 1.8840, 1.8549, 1.8668, 1.7745, 1.8497, 1.9727, 1.9021, + 1.9105, 1.8380, 1.9000, 1.8313, 1.9636, 1.8302, 1.8879, 1.9270, 1.8041, + 1.8294, 1.8350, 1.6809, 1.7347, 1.9509, 1.8550, 2.2307, 1.8262, 1.8292, + 2.1996, 1.8380, 1.7901, 1.9650, 1.9438, 1.7403, 2.0721, 1.7517, 1.9311, + 1.8478, 1.9061, 1.8099, 1.8356, 2.1220, 1.8942, 1.8027, 1.8589, 1.8872, + 1.9714, 1.8759, 1.8909, 1.8603, 1.8456, 1.7588, 1.8874, 2.0046, 1.7945, + 1.8890, 1.9014, 3.0165, 1.8195, 1.8984, 1.8578, 1.9651, 1.9261, 1.9477, + 1.7389, 1.9035, 1.9816, 1.8913, 1.8726, 1.7762, 1.8504, 2.0517, 2.0671, + 1.8934, 1.7774, 1.9831, 1.8847, 1.8045, 1.9016, 1.8355, 1.9538, 1.8974, + 1.8131, 1.8118, 1.8685, 1.9706, 1.9202, 1.7229, 1.9807, 1.8030, 1.8698, + 1.8555, 2.0546, 1.9435, 1.8757, 1.8427, 1.7255, 1.7774, 1.9838, 1.8948, + 1.8492, 1.8438, 1.7917, 1.9375, 1.9346, 1.9279, 1.8923, 2.0421, 1.8791, + 1.8829, 1.8148, 1.5478, 1.8369, 1.9054, 1.8246, 1.8708, 1.9425, 1.9012, + 1.8279, 1.5597, 1.7954, 1.8119, 1.8879, 1.8401, 1.9324, 1.8507, 1.7592, + 1.9193, 2.0096, 2.0844, 1.8840, 1.9689, 1.9379, 1.9933, 1.8427, 1.7667, + 2.0733, 1.8381, 1.7551, 1.7741, 1.8745, 2.0625, 1.8530, 0.6309, 1.8452, + 1.8917, 1.9017, 1.8086, 1.9182, 1.9565, 1.9748, 1.8523, 1.7366, 1.9107, + 0.8677, 1.8278, 1.8390, 1.9599, 1.8796, 1.8701, 1.7979, 1.9388, 1.7285, + 1.9202, 1.9327, 1.8762, 1.8762, 1.9806, 2.0151, 1.9425, 1.9414, 1.8690, + 1.9037, 1.9074, 1.8259, 1.8553, 2.0224, 1.9358, 1.9235, 1.9911, 1.9111, + 1.8020, 1.8777, 1.9436, 1.9082, 1.9094, 1.9674, 1.8680, 1.7909, 1.9865, + 1.9500, 1.7919, 1.9436, 1.7509, 1.8738, 1.8205, 1.9345, 2.0570, 1.9744, + 1.8709, 1.9073, 1.9121, 1.9625, 2.4609, 1.8394, 2.0095, 1.8269, 1.9627, + 1.7241, 1.9025, 1.7354, 1.8871, 1.9433, 1.7587, 1.9468, 1.9635, 1.8410, + 1.8963, 1.7291, 1.8991, 1.7483, 1.8443, 1.8082, 1.9849, 2.0076, 1.7678, + 1.9225, 1.8132, 2.2230, 1.6371, 1.8443, 1.9185, 1.8186, 1.8701, 1.9011, + 1.8227, 1.9548, 2.0720, 1.8787, 1.8453, 1.9034, 1.8937, 2.1512, 1.7902, + 1.9223, 1.7259, 1.9361, 1.8650, 1.8079, 1.8986, 1.9573, 1.7364, 1.9630, + 1.8175, 1.8065, 1.8854, 1.9771, 1.7975, 1.8899, 1.8159, 2.0447, 1.7576, + 1.8268, 1.9224, 1.9002, 1.8067, 2.0081, 1.9586, 1.6762, 1.9101, 1.8413, + 1.7823, 1.9550, 1.9823, 1.7837, 1.8970, 2.0231, 1.8636, 1.7673, 1.9067, + 2.3982, 2.0804, 1.8810, 1.8703, 1.8265, 1.7580, 1.8966, 1.8586, 1.8388, + 1.9037, 1.8352, 1.8316, 1.8336, 1.8934, 2.0168, 1.9769, 2.0078, 1.8481, + 1.9293, 1.8833, 1.8839, 1.8644, 1.8012, 1.8637, 1.9363, 1.8971, 1.8371, + 1.9506, 1.8396, 2.2401, 1.8631, 1.9060, 1.7227, 1.9123, 1.8348, 1.8611, + 1.8895, 1.8920, 1.6990, 1.8529, 2.0385, 1.8732, 1.8881, 1.7646, 1.8885, + 2.1197, 1.8467, 1.7632, 1.8907, 2.0283, 1.9200, 1.7656, 1.8061, 1.9139, + 1.9146, 1.7305, 1.8498, 1.6375, 1.7809, 1.9367, 2.0803, 1.9106, 1.9903, + 1.9454, 1.8341, 1.9366, 2.0857, 2.1631, 1.8223, 1.8999, 1.8695, 1.8775, + 1.7864, 1.9755, 1.9165, 1.8118, 1.8268, 1.9509, 1.8592, 1.8168, 1.8380, + 1.8159, 1.8603, 1.9215, 1.8509, 1.7259, 1.8962, 1.8724, 1.9343, 1.8903, + 1.8474, 1.6385, 1.8949, 1.8523, 1.9570, 1.8141, 1.8389, 1.8728, 1.7764, + 1.9630, 1.8911, 1.8530, 1.8837, 1.8320, 2.0132, 1.8928, 1.7913, 1.8610, + 1.8055, 1.8332, 1.8728, 1.8835, 1.9436, 1.8447, 1.8912, 1.9659, 1.8275, + 2.0092, 1.9458, 1.8524, 1.8043, 1.8004, 1.7571, 1.7796, 1.8534, 1.9481, + 1.8991, 1.8947, 1.8967, 1.9415, 1.8822, 1.9627, 1.8128, 1.9307, 1.9071, + 1.8921, 1.8939, 1.7313, 1.8759, 1.8561, 1.7990, 1.7605, 1.7864, 1.8529, + 1.8590, 1.8761, 1.9702, 1.8165, 1.9052, 1.8691, 1.8814, 1.8573, 1.9790, + 1.8433, 1.8886, 1.9109, 1.8462, 2.0010, 1.8940, 1.6905, 1.8911, 1.9256, + 1.9097, 1.9327, 1.8287, 1.8924, 1.9177, 1.7493, 1.7674, 1.8636, 1.7492, + 1.9836, 1.8146, 1.8620, 1.7825, 2.0120, 1.8672, 1.8315, 1.8449, 1.7789, + 1.8832, 2.1938, 1.9455, 1.8791, 1.8934, 1.7710, 1.8357, 1.7524, 1.8833, + 1.9098, 1.9889, 2.0089, 1.7989, 1.9659, 1.9234, 1.8230, 1.8224, 1.8910, + 2.3350, 2.0102, 1.9047, 1.9591, 2.0173, 1.8123, 1.8544, 1.8656, 1.8224, + 1.9651, 1.9880, 1.8107, 1.9073, 1.9978, 1.8600, 2.0117, 1.8631, 2.0721, + 1.9352, 1.9215, 1.9140, 1.8218, 1.7605, 1.9906, 1.9540, 1.9666, 1.9012, + 1.9246, 2.2353, 1.8372, 1.7955, 1.8482, 2.2460, 1.8172, 1.7272, 1.9246, + 1.8234, 1.9038, 1.7496, 1.7468, 1.8788, 1.7610, 1.8758, 1.7716, 1.8399, + 1.8478, 1.8214, 1.9384, 1.7953, 1.8796, 1.9437, 1.8393, 1.8481, 1.9724, + 1.9443, 1.8271, 1.9000, 1.9068, 1.9761, 2.0626, 1.8821, 1.8797, 1.8821, + 1.9393, 2.0845, 1.7724, 2.2779, 1.7483, 1.8708, 2.3283, 1.8016, 1.7931, + 1.7968, 1.9056, 1.9454, 1.8709, 1.8524, 1.8509, 1.8794, 1.8774, 1.9060, + 1.9310, 1.8138, 2.0550, 1.7575, 1.9538, 2.1155, 1.8975, 1.9170, 1.8156, + 1.8810, 1.7680, 1.8986, 1.9357, 1.9116, 1.7712, 1.8008, 1.9611, 1.7758, + 1.8260, 1.8118, 1.8727, 1.8630, 1.8199, 1.9040, 2.1573, 1.9015, 1.8741, + 1.9744, 1.8355, 1.9861, 1.9444, 1.9224, 1.9344, 1.9126, 1.8261, 1.9051, + 1.7932, 1.9132, 1.8560, 1.9965, 1.8377, 1.8254, 1.8571, 1.8778, 1.8591, + 1.8667, 1.8168, 2.0227, 1.8521, 2.0372, 1.9477, 2.0108, 1.8808, 1.8310, + 1.9081, 1.7974, 1.9341, 1.7954, 1.8657, 2.0381, 1.8839, 1.8975, 1.9008, + 1.8652, 1.7003, 1.8053, 2.0298, 1.9783, 1.8093, 1.8550, 1.8205, 1.9117, + 2.0263, 1.9597, 1.8719, 1.9367, 1.8217, 1.9005, 3.9133, 1.9469, 1.7762, + 2.0920, 1.8573, 1.9510, 1.9302, 1.8653, 1.9560, 1.9251, 1.8615, 1.9694, + 1.7484, 1.7944, 1.8781, 1.8119, 1.8498, 1.7536, 1.8787, 1.9861, 1.8066, + 1.9102, 1.9123, 1.8142, 1.7973, 1.9175, 1.9055, 1.8494, 1.8617, 1.8569, + 1.9394, 1.9115, 1.9110, 1.7258, 1.8709, 1.8557, 1.7597, 1.9841, 1.9129, + 2.0609, 1.8807, 1.7975, 1.8461, 2.0206, 1.8714, 1.7895, 1.9192, 1.7968, + 1.8826, 1.7698, 1.9447, 1.8479, 1.8068, 1.9118, 1.8883, 1.9746, 1.8796, + 1.8511, 1.9405, 2.0582, 1.7386, 1.9657, 1.8745, 1.9578, 1.9301, 1.9415, + 1.9653, 1.9379, 1.8646, 1.9669, 1.9559, 1.8806, 2.0812, 1.8663, 1.9106, + 2.0192, 1.9615, 1.8668, 1.8325, 1.8629, 1.8094, 1.9444, 1.9607, 1.8693, + 1.7771, 1.7942, 1.8408], device='cuda:1', requires_grad=True)Parameter containing: +tensor([ 1.0001e-01, 3.2639e-01, -2.1039e-01, -5.0999e-01, -4.2389e-02, + -2.6753e-01, 9.4194e-01, -2.7457e-01, -5.2690e-01, -5.5466e-02, + -2.7905e-01, 1.6774e-01, -3.8277e-01, -2.5514e-01, -6.1334e-01, + -5.7646e-01, -2.2728e-01, -6.7665e-02, -5.3467e-01, -2.3960e+00, + -5.1092e-01, 3.8211e-01, -2.0304e-01, -4.5887e-01, -6.1576e-01, + 7.0121e-02, -4.9523e-01, -3.8478e-01, -1.0415e+00, 5.6320e-01, + -2.0277e-01, -7.7615e-02, 3.5587e-01, -5.4729e-01, 2.5944e-01, + -2.1211e-01, -9.3042e-01, -3.8996e-01, 1.8186e-03, 2.1777e-01, + -2.0130e-01, -2.2191e-01, 1.0911e+00, -4.2226e-01, 1.1266e-01, + -4.3535e-01, -7.5518e-01, -1.5334e-01, -3.4593e-01, -1.9087e-02, + -4.6673e-01, -3.2443e-01, -4.1880e-01, -6.7491e-01, -2.2719e-01, + 1.1494e+00, 6.2432e-01, 4.0111e-02, -5.6136e-01, 2.8486e-01, + 4.2190e-01, 2.0334e-01, 1.1443e-01, 9.1238e-02, 3.3551e-01, + -4.9760e-02, 8.9777e-02, -6.3372e-01, 1.2032e-01, -1.1987e+00, + 2.0360e-01, 3.1116e-01, 6.7021e-01, 3.3498e-01, -3.2979e-01, + -5.0852e-01, 2.5790e-01, 4.6133e-03, -1.0485e+00, 7.3088e-02, + -5.5104e-01, -1.3722e-01, -1.4933e-01, 5.5035e-01, 2.8101e-01, + -6.5381e-01, 6.2764e-01, 7.8082e-02, -1.6087e-01, -1.8105e-01, + 7.7904e-01, 3.9496e-01, -1.0841e-01, 5.5589e-02, -5.2023e-01, + -2.7022e-02, -9.2938e-02, 8.5328e-01, -3.3161e-01, 7.2774e-01, + -1.6572e-02, -2.6193e+00, 3.1187e-01, 5.7197e-01, 4.1484e-01, + 8.9408e-01, -8.1853e-01, -3.2315e-01, -7.6765e-02, 2.0524e-01, + 7.7114e-01, 4.7549e-01, -6.9258e-01, -2.9009e-03, 1.7830e-01, + -7.2715e-01, 9.8283e-01, 4.1635e-01, 1.7894e-01, 3.5228e-02, + 3.7125e-01, -3.4730e-01, -6.6480e-01, -2.2598e-01, -2.7736e-01, + -4.1118e-01, -2.1612e-01, 3.9944e-01, -4.8386e-01, 5.5842e-01, + -1.0969e+00, -4.2645e-01, -9.7789e-01, -1.5478e-01, 3.4732e-01, + 1.5894e-01, -5.3756e-01, -2.1893e-01, -7.5276e-02, -5.2727e-01, + 1.4501e-03, -1.1514e-01, 5.3553e-01, 4.6137e-01, -1.4240e-01, + -4.5386e-01, -4.4217e-01, 2.2191e-01, 1.7962e-01, 7.0115e-01, + 6.0106e-01, 7.0242e-01, 4.8614e-03, 1.2692e-01, -4.2740e-01, + -7.7894e-02, -2.7168e-01, -4.2341e-01, -5.9347e-01, 6.1331e-02, + -1.6603e-01, -8.8191e-01, 1.5898e-01, -3.9610e-01, -3.6995e-01, + 1.3130e-01, -7.6066e-01, 4.9251e-01, -5.2796e-01, 2.7328e-01, + 4.2783e-01, -2.3592e-01, -8.5147e-01, 5.8858e-01, -2.6978e-01, + -6.8101e-01, -3.1506e-02, -6.8128e-01, -4.4942e-01, 3.5423e-01, + -8.0526e-01, -5.8796e-01, -4.6747e-02, -8.0376e-02, 1.9648e-01, + 9.7790e-01, 9.9415e-02, -1.5476e+00, 7.2616e-02, -8.8570e-01, + -1.3258e-01, -2.2147e-01, 2.3140e-01, 7.3908e-01, 2.1519e-01, + 5.9967e-01, 3.8346e-01, -2.5702e-02, 6.3070e-01, 2.4397e-01, + -3.7191e-01, -2.8234e-01, -3.5167e-03, -1.6815e-01, 3.0992e-01, + 1.2480e-01, -2.3985e-01, 7.4806e-01, 6.7826e-01, 2.2846e-01, + 2.1610e-01, -2.8469e-01, -6.4048e-01, -3.1248e-01, -6.3940e-01, + 8.4760e-02, -1.8882e-01, -2.1436e-01, -3.5960e-01, 2.6390e-02, + 1.3632e+00, -3.4175e-01, 8.5690e-01, 8.1155e-01, -5.4218e-01, + -2.9913e-01, 1.1988e-01, 3.5309e-01, 4.1926e-01, 5.5749e-01, + -6.3115e-01, -5.2563e-01, 1.7902e-01, 4.0641e-01, 6.2101e-01, + -2.2727e-02, -2.9388e-01, -1.3951e-01, -1.7952e-01, 4.4467e-01, + 6.6099e-01, 6.1575e-01, -6.6078e-01, 5.1975e-01, -3.6814e-01, + -5.6789e-01, 5.1790e-01, -1.5656e+00, 3.2540e-01, 8.2552e-01, + 3.7043e-02, 6.1390e-01, 1.6771e-01, -1.2280e+00, -2.3813e-01, + -1.2629e-01, -3.6776e-01, -1.0564e-01, 6.0078e-01, 7.3554e-01, + -2.2202e-01, -7.2251e-01, -3.9098e-01, 6.6057e-01, 1.7973e-01, + -6.7612e-01, -1.0394e-01, 9.9293e-01, -5.2571e-01, -2.6658e-01, + -4.0639e-01, 2.9404e-01, -8.6945e-01, -6.0640e-01, 2.2846e-01, + -3.4287e-01, -5.3146e-02, 5.7531e-01, -8.1353e-03, 3.5567e-01, + 5.9598e-01, -5.8310e-01, -8.2484e-03, 2.3647e-01, -2.1022e-01, + 5.7921e-01, 9.6365e-01, 1.0753e-02, 5.1279e-01, -2.9078e-01, + -4.8622e-01, -3.3165e-01, 4.2497e-01, -3.1292e-01, 3.9897e-01, + 1.2795e-01, 3.1480e-01, 1.4672e-01, 1.8182e-01, -1.5524e-01, + 6.5747e-01, 3.4742e-01, -3.8288e-01, -5.8985e-01, -5.5828e-01, + 2.3804e-01, -4.3328e-01, 2.1676e-01, -2.7809e-01, 5.9206e-01, + 5.3341e-01, -1.0016e+00, 4.7881e-01, -7.3726e-01, 2.7895e-02, + -4.1925e-01, -3.7070e-01, -4.0917e-01, 2.7832e-01, 5.5444e-02, + -1.5275e+00, -3.5360e-01, 1.6569e-01, -3.6858e-01, 2.1436e+00, + -1.5858e+00, 2.6521e-01, -4.9049e-01, -1.3124e-01, -2.1085e-01, + -2.3881e-01, -7.6808e-01, -1.3304e-01, -3.7610e-01, -3.0138e-01, + -4.9982e-01, -8.1163e-01, 1.7125e-02, 5.6042e-01, -3.6971e-01, + 3.8646e-01, 1.5706e-01, 2.9716e-01, 8.9005e-01, 3.0204e-02, + 2.1164e-01, 1.1384e-01, 1.8581e-01, -4.4801e-02, 3.3047e-01, + -8.8904e-02, 2.5982e-01, 3.3461e-01, 9.7410e-01, -1.0836e-01, + -2.2746e-02, -2.8105e-01, 5.6986e-01, 4.0678e-01, -3.3759e-02, + -6.6359e-01, 1.6146e-01, 8.9127e-02, -4.1533e-01, 4.9490e-01, + -8.3370e-02, -2.3326e-01, 3.0722e-01, 2.6059e-01, 7.1568e-01, + 1.9166e-01, 8.7676e-02, -5.6655e-01, -5.7934e-01, 5.5463e-01, + -7.6014e-01, -1.5674e-01, 5.9477e-01, -4.9040e-01, -3.2710e-02, + 8.4659e-02, 3.3333e-02, 2.0411e-01, -5.1136e-01, 5.6339e-01, + 3.8002e-01, -6.0355e-01, 5.3952e-01, -7.2400e-01, -8.2425e-01, + 1.3842e-01, 1.2815e+00, 4.2212e-01, 7.7251e-01, 5.8582e-01, + -2.2402e-01, 1.3369e-01, 1.1079e+00, 2.1883e-01, -2.8498e-01, + 3.8395e-01, -1.3351e-02, 2.1286e-01, -2.2877e-01, 4.7099e-01, + -1.4595e-01, 3.4667e-01, 8.2163e-01, -6.9097e-02, -3.3457e-01, + -6.7919e-02, -4.7688e-01, 6.6409e-01, 3.8889e-01, 8.7833e-01, + -5.7702e-01, 5.9352e-03, -3.6324e-01, 6.8234e-02, -4.7906e-02, + 2.0999e-01, 9.8040e-02, -6.9085e-01, -3.6914e-01, -6.4030e-01, + 2.5372e-01, 4.0410e-01, -1.3079e-01, -5.6092e-01, -1.8909e-01, + -3.2377e-01, -2.1252e-01, -2.2387e-01, 8.7467e-02, 5.0728e-01, + -1.1403e-01, -5.4595e-01, 3.9349e-01, 5.9183e-01, -9.6032e-01, + 8.2167e-02, -5.1443e-01, -9.4190e-01, -9.3388e-01, 9.4551e-02, + -2.4728e-01, 2.9515e-01, -4.8768e-01, -7.5996e-02, -5.9243e-01, + -4.0558e-01, 2.2324e-01, -9.0361e-01, -5.1321e-01, -1.3411e-01, + 5.6479e-01, -9.0425e-03, -5.5796e-01, 1.7802e-01, 2.9690e-01, + -3.8099e-01, 2.4017e-02, -3.0891e-01, -3.2830e-01, 4.2963e-02, + -6.2601e-01, -2.5049e-01, -7.4859e-03, 1.1617e-01, -1.3194e-01, + 4.7443e-01, -1.3437e-02, -7.4380e-01, -1.2107e-01, -4.5423e-01, + -4.6178e-01, 3.5232e-01, -5.6029e-01, 7.6554e-01, 7.5839e-01, + 4.9742e-01, -1.8662e-01, 1.2774e-01, -3.0275e-01, 4.3990e-01, + -6.9867e-01, -3.8308e-01, 1.2532e-01, 1.5111e-01, -4.7295e-01, + 2.0581e-01, 3.9599e-02, -4.5042e-01, -4.7772e-01, -1.2960e-01, + -2.4035e-02, -8.1313e-01, 7.2255e-01, 2.5972e-01, 8.9410e-01, + 7.9167e-02, -1.3100e-01, -1.8255e-01, -1.3246e-02, 1.7391e-01, + -1.0780e+00, 5.4875e-01, 2.4150e-02, -3.4517e-01, -5.1029e-01, + -7.0280e-01, 1.6151e-02, -6.9358e-01, -1.9686e-01, 1.8666e-01, + -3.3663e-02, 2.8270e-01, 4.3943e-01, 7.5781e-01, -8.4487e-02, + 1.6837e-01, 5.1663e-01, 1.0567e+00, 1.0433e+00, -4.2754e-01, + -4.3495e-01, 6.6799e-01, 2.8105e-01, 2.9122e-01, 1.0039e-01, + -8.5692e-02, -6.0030e-01, 2.7274e-01, -1.2726e-01, 3.2528e-01, + -4.7566e-01, -8.0034e-02, 9.9471e-01, 2.6183e-01, -5.3645e-01, + 2.7173e-01, 9.5450e-01, -5.9020e-01, 1.5703e-01, 4.1426e-01, + -8.9005e-01, 6.8087e-01, -6.4571e-01, -4.4648e-01, -2.6380e-01, + -1.5983e+00, -3.5993e-01, 2.2042e-01, -6.5626e-01, 1.1699e+00, + -2.0258e-01, 3.4444e-01, 5.6699e-01, -6.5344e-01, 5.0124e-01, + 3.2623e-01, 2.0384e-01, -3.2459e-01, 8.5185e-03, 1.3471e-01, + 5.1772e-01, 1.4214e-01, 4.1939e-02, 6.7896e-01, 8.3246e-02, + -6.1466e-02, 4.5889e-02, -7.6077e-01, 2.5588e-01, 5.4054e-01, + 9.3660e-01, -4.6980e-01, -1.7507e-01, -1.4851e-01, 1.8071e-01, + 4.1031e-02, 1.4749e-02, 6.5961e-01, -7.0903e-02, 5.9657e-02, + -1.9495e-01, 8.3158e-01, -1.5257e-01, 1.2255e+00, -1.2356e-01, + -2.0381e-01, 4.8242e-01, -1.6604e-01, -8.8528e-02, 1.0697e-01, + 1.9617e-01, -7.0234e-01, 4.4242e-01, -5.7977e-02, 3.9103e-01, + 1.8679e-01, -4.9163e-01, 4.2347e-03, -6.8967e-01, 1.6035e-01, + -8.5094e-01, -7.4418e-02, 6.7336e-02, -8.3224e-01, 2.6062e-02, + 4.9192e-01, 5.2151e-01, -1.6844e-01, 2.4388e-01, -3.7647e-02, + -4.8822e-01, 4.6222e-03, 1.2678e-01, -3.0084e-01, 6.3812e-01, + 7.1659e-01, 1.7261e-01, -8.0721e-01, 6.4520e-01, -1.2987e-01, + -1.7628e-01, -3.8206e-01, 9.2294e-01, 3.3497e-01, 3.1547e-01, + 3.1440e-01, 4.2543e-01, -8.7846e-01, 5.0277e-01, -1.7191e-01, + -4.8866e-01, 6.1943e-01, 4.8681e-01, 3.1232e-01, -2.7119e-01, + 5.9825e-01, -3.5353e-01, 3.5124e-01, 2.8750e-02, -6.5004e-01, + 2.6290e-01, -2.8577e-01, 2.7292e-01, -3.7203e-02, 2.6134e-01, + 8.5432e-01, -5.0034e-01, -5.1107e-01, -3.8058e-01, -2.5238e-01, + 4.3307e-01, 3.9068e-01, 7.7398e-01, -5.3277e-01, 4.4721e-01, + 1.1372e-01, 5.0360e-01, -1.7603e-01, 2.4101e-01, 2.2365e-01, + -1.8078e-01, 2.7429e-01, -2.1549e-01, 1.4268e-01, 3.3917e-01, + -6.9570e-01, -4.9179e-01, -1.5029e-01, 6.5821e-01, 8.5738e-01, + -8.3008e-01, -3.8017e-01, -3.3007e-01, 5.9142e-01, 5.3897e-01, + 1.3393e-01, 2.6619e+00, -8.4140e-01, 7.2939e-02, -1.0110e+00, + 4.0343e-01, -7.9010e-01, -5.8433e-01, 5.6104e-01, 1.6495e-01, + -6.4035e-01, -2.1644e-01, -1.1082e+00, -2.0013e-01, -2.8716e-01, + 9.0775e-01, 4.8080e-01, 1.1360e-01, -5.3904e-02, -3.1460e-01, + 4.1571e-01, 8.1695e-02, -1.6502e-01, -1.2634e-01, 6.4734e-01, + 4.4058e-01, 6.9211e-03, -3.5066e-01, -2.4901e-01, 9.0255e-01, + -1.1789e-01, -6.6065e-01, -1.2066e-01, -1.7871e-02, 6.4703e-01, + -2.7888e-01, 3.9891e-01, -1.8701e-01, -5.6675e-01, 5.2930e-01, + -1.1687e+00, -3.6498e-01, 1.4806e-01, -4.1063e-01, 1.0374e+00, + -4.3072e-01, -2.1022e-02, 4.1391e-01, -3.7349e-02, 2.4009e-01, + -7.7916e-02, -6.8427e-01, -8.1858e-03, 1.2579e-01, 2.1868e-01, + -6.8004e-01, 7.4477e-01, 3.0428e-01, 4.4441e-02, -5.2122e-01, + -8.5566e-01, -7.7845e-02, -4.6011e-01, -4.8875e-01, -6.0817e-01, + -4.2802e-01, -1.0616e-01, -6.4764e-01, 5.8157e-01, 9.6697e-02, + 4.5654e-01, 3.5184e-02, -6.4207e-01, -6.6000e-01, 1.6382e-02, + 3.3541e-01, 4.0628e-01, -6.7456e-01, 8.9355e-01, -5.9333e-02, + 7.9808e-02, 2.2350e-01, -3.6857e-01, 9.5804e-01, 1.1220e-01, + -4.0588e-02, 6.0938e-01, 3.0536e-03], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[-0.0334, 0.0285, 0.0295, ..., 0.0152, -0.0162, -0.0086], + [-0.0067, 0.0443, -0.0088, ..., 0.0052, 0.0228, -0.0394], + [-0.0080, -0.0090, -0.0276, ..., -0.0169, -0.0212, 0.0219], + ..., + [-0.0186, 0.0049, -0.0079, ..., -0.0045, 0.0192, -0.0271], + [ 0.0003, 0.0048, -0.0178, ..., -0.0006, -0.0150, 0.0126], + [ 0.0191, -0.0278, 0.0059, ..., 0.0222, 0.0142, 0.0118]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.4333, 0.1654, -0.0519, ..., -0.0249, 0.0006, 0.0306], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0058, 0.0067, 0.0045, ..., -0.0050, 0.0077, -0.0065], + [ 0.0011, -0.0229, -0.0150, ..., 0.0013, 0.0057, 0.0196], + [-0.0234, 0.0136, -0.0235, ..., 0.0329, -0.0069, 0.0318], + ..., + [ 0.0086, 0.0077, -0.0036, ..., -0.0093, -0.0244, 0.0068], + [ 0.0283, 0.0173, 0.0116, ..., -0.0010, 0.0039, -0.0024], + [ 0.0225, 0.0120, 0.0018, ..., -0.0170, 0.0129, -0.0031]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 4.6478e-02, 4.1260e-02, -5.7587e-02, 4.2152e-03, -1.5381e-02, + -7.1945e-03, 7.1960e-02, 3.3722e-03, 1.2032e-02, -1.7223e-03, + -4.3213e-02, -2.2369e-02, 2.6031e-02, 2.9510e-02, -1.9821e-02, + 1.1589e-02, 3.1555e-02, 5.9738e-03, 2.5436e-02, 1.7834e-01, + -1.2909e-02, -1.7929e-02, 1.7914e-02, -7.0419e-03, 7.2670e-03, + -1.1589e-02, -2.1347e-02, 3.9612e-02, 6.9214e-02, -3.1097e-02, + 3.6774e-02, 3.3020e-02, 3.6297e-03, -3.4698e-02, -1.0223e-02, + 3.4210e-02, 4.7668e-02, -3.0304e-02, -3.7567e-02, -4.2389e-02, + -3.6896e-02, 5.4779e-02, -5.8228e-02, 4.9225e-02, -6.8420e-02, + -1.0086e-02, -1.3298e-02, 1.2856e-02, 7.1899e-02, 4.1229e-02, + 7.8369e-02, -2.3773e-02, 7.1411e-02, 3.9337e-02, -5.9624e-03, + 5.9235e-02, 5.2368e-02, -7.6790e-03, 1.0061e-03, 1.7975e-02, + 1.2077e-02, 1.7365e-02, 2.3132e-02, -3.7811e-02, -1.7868e-02, + 2.1454e-02, -1.1116e-02, -9.3155e-03, 5.4260e-02, -1.4148e-01, + 5.0934e-02, 2.6646e-03, -4.3518e-02, 1.6846e-02, -2.5818e-02, + -3.9856e-02, 1.7212e-02, -3.2196e-03, -5.2166e-04, -5.6641e-02, + -1.0824e-03, -2.0920e-02, 4.7874e-03, -1.6281e-02, 4.0894e-02, + 4.4708e-02, -2.2568e-02, 3.1982e-02, -2.4689e-02, -2.5970e-02, + -4.4434e-02, 3.2562e-02, 5.6458e-02, -1.5976e-02, 1.6129e-02, + -1.6968e-02, 3.2330e-03, 4.4403e-02, -3.6438e-02, 3.1891e-02, + -1.8921e-03, -1.9669e-02, 3.1342e-02, -5.9235e-02, 1.8967e-02, + 2.7206e-02, 7.2021e-02, 2.2812e-02, 2.5635e-02, 2.3743e-02, + 2.0905e-02, 8.2458e-02, -1.6117e-03, 5.7190e-02, -3.2349e-02, + -3.7994e-02, 1.8845e-02, 1.8845e-03, 5.3284e-02, 2.1801e-03, + 2.8900e-02, -5.6824e-02, -4.2610e-03, -2.7145e-02, -3.4515e-02, + -2.0996e-02, 2.9221e-02, -8.8501e-02, 5.2567e-03, 5.8441e-02, + -2.4704e-02, -2.6581e-02, -1.5053e-02, -1.8555e-02, 3.0090e-02, + 1.1121e-01, -2.7542e-03, 1.1032e-02, -2.6871e-02, 6.1913e-03, + -1.2978e-02, -2.2446e-02, 2.0050e-02, 3.3478e-02, -5.1308e-03, + -4.4769e-02, 4.5868e-02, 1.2375e-02, 1.7563e-02, 5.1239e-02, + 2.5131e-02, -2.2842e-02, -1.8188e-02, -1.9974e-02, -4.7913e-02, + -2.3041e-02, 6.3705e-03, -3.4695e-03, -2.0279e-02, 3.1616e-02, + 2.6306e-02, 1.2650e-02, 3.2104e-02, -4.3823e-02, -1.5366e-02, + 1.1955e-02, -1.1835e-01, -1.2444e-02, 2.3632e-03, -8.0872e-03, + -3.7575e-03, -4.6234e-02, -4.8340e-02, 2.8748e-02, 3.4576e-02, + 3.8208e-02, 6.4636e-02, -5.4962e-02, 4.1351e-03, -4.1866e-04, + 1.9073e-02, -6.0310e-03, -2.4857e-02, -3.9795e-02, 5.2948e-02, + 3.6041e-02, -3.3447e-02, 7.5500e-02, -4.7493e-03, -2.7298e-02, + -2.9510e-02, -6.9695e-03, 8.8806e-03, 1.5434e-02, -4.9011e-02, + 8.0948e-03, -9.0485e-03, -2.1530e-02, -9.2407e-02, 8.7585e-03, + -9.7717e-02, 2.3956e-02, 6.7871e-02, 4.1351e-02, 1.3680e-02, + 1.4982e-03, 1.0400e-01, 2.1118e-02, -1.5480e-02, 3.6591e-02, + 1.4511e-02, -5.4169e-03, 4.0321e-03, 6.5552e-02, 6.5842e-03, + 1.4305e-02, 4.5586e-03, -1.3863e-02, 1.2848e-02, -5.1727e-02, + 8.5144e-02, 3.9581e-02, -9.8114e-03, 2.3163e-02, -1.4214e-02, + 8.0032e-03, -2.9968e-02, -1.5175e-02, -3.9558e-03, -1.3176e-02, + 5.9692e-02, 1.1299e-02, -3.5736e-02, 1.2413e-02, -6.2469e-02, + 2.5238e-02, 1.4099e-02, 2.2583e-02, 4.8035e-02, 2.4109e-02, + 1.8890e-02, 2.1240e-02, 1.5610e-02, 1.5671e-02, -2.6047e-02, + 1.4259e-02, -3.4103e-03, -1.4206e-02, -3.4271e-02, 2.3422e-02, + -5.7800e-02, 3.7781e-02, -2.2018e-02, -3.3508e-02, -1.1887e-02, + -1.6037e-02, 2.2068e-03, 1.6724e-02, -1.4359e-02, 6.9962e-03, + -5.2460e-02, 4.4952e-02, 6.4087e-02, -4.5502e-02, 1.1162e-02, + 5.1422e-02, -2.1790e-02, 3.6346e-02, 1.4618e-02, -2.3529e-02, + 2.8046e-02, -3.3173e-02, 8.0795e-03, -3.8574e-02, -9.4299e-02, + 1.6769e-02, 2.6413e-02, -4.2328e-02, 9.2010e-03, 3.3386e-02, + -2.6817e-03, -5.0995e-02, -2.8839e-02, 5.7106e-03, -1.2684e-03, + 8.8806e-03, -4.4434e-02, 8.3494e-04, 6.4545e-03, -7.7576e-02, + -4.2206e-02, 3.7933e-02, 2.3224e-02, -1.9028e-02, 2.0508e-02, + 1.7151e-02, -4.0627e-03, 2.8107e-02, 1.7044e-02, 7.8979e-02, + -2.2781e-02, 1.3550e-02, -1.7624e-02, -3.5736e-02, 2.3972e-02, + -1.8845e-02, 2.1347e-02, 4.6234e-03, -5.3711e-02, 2.4399e-02, + 5.7259e-03, 6.3095e-03, 3.8414e-03, 1.4709e-02, 7.2899e-03, + 8.6517e-03, 2.8656e-02, -5.5122e-04, 3.4008e-03, 2.0630e-02, + 2.3346e-02, 1.2459e-02, -6.3293e-02, -5.6732e-02, 1.0559e-01, + 9.0759e-02, 1.4839e-02, -9.9411e-03, 2.0111e-02, 5.5511e-02, + -2.4433e-03, 2.4063e-02, -6.2561e-02, -3.9558e-03, -1.1353e-02, + 3.7445e-02, -3.1490e-03, -9.2468e-03, -6.5269e-03, 2.7252e-02, + -3.0548e-02, -4.4556e-02, -3.9642e-02, -1.2064e-03, -3.4332e-02, + 4.6959e-03, -4.1321e-02, -4.5990e-02, 1.6342e-02, -4.4708e-02, + 3.6255e-02, 2.2034e-02, 6.4583e-03, 7.3364e-02, -3.8513e-02, + 1.5930e-02, 6.1096e-02, -2.4246e-02, -2.3575e-02, -2.4662e-03, + 1.0941e-02, -1.9211e-02, -2.1301e-02, -1.3809e-02, -2.5665e-02, + 3.1921e-02, -1.8806e-03, 2.7420e-02, -4.3671e-02, 2.3239e-02, + -9.8572e-03, 1.0803e-02, -3.4607e-02, -4.7791e-02, -6.9275e-03, + -1.7075e-02, 6.0455e-02, 3.6407e-02, 5.3436e-02, 5.7190e-02, + -3.1952e-02, 7.1564e-03, 1.6922e-02, 7.2937e-03, -2.6302e-03, + -5.5359e-02, -1.1360e-02, -2.0859e-02, 1.6296e-02, 1.3933e-03, + 3.3905e-02, -7.8857e-02, 1.4816e-02, 1.6174e-02, -9.2602e-04, + 1.1911e-03, 4.7226e-03, -1.4030e-02, -1.4473e-02, -5.9753e-02, + 6.2988e-02, 1.1841e-02, -4.2389e-02, 6.6284e-02, -1.9791e-02, + -2.5463e-04, -1.6571e-02, -2.0081e-02, -5.6793e-02, 2.2049e-02, + 3.3112e-02, 5.5351e-03, -1.9684e-02, 1.1755e-01, 9.5825e-02, + 9.2834e-02, 3.2043e-02, 2.7237e-02, -2.0081e-02, 1.8646e-02, + -7.5134e-02, -6.1676e-02, 3.1097e-02, -2.4750e-02, -5.6580e-02, + -1.4442e-02, -2.0447e-02, -2.1805e-02, 5.7526e-02, -1.5678e-03, + -4.7516e-02, -7.0923e-02, 1.8921e-03, 2.2583e-02, 4.3732e-02, + 7.9250e-04, 1.4275e-02, -1.4984e-02, 2.1400e-03, -4.6387e-02, + 6.9275e-03, -8.8120e-03, 8.7646e-02, -2.3669e-01, 1.8951e-02, + -7.2174e-03, 1.7166e-02, 3.0014e-02, -3.4973e-02, -4.0680e-02, + -5.1605e-02, -2.1088e-04, -1.8845e-02, 2.7466e-02, -1.8356e-02, + 5.8075e-02, -4.3304e-02, -7.6355e-02, -8.0200e-02, 2.8427e-02, + -3.8055e-02, 5.0201e-02, -1.3794e-02, 2.6474e-02, 1.4137e-02, + 5.3833e-02, 6.0516e-02, -1.1345e-02, 1.4664e-02, 1.6464e-02, + -6.2943e-04, -4.0474e-03, 7.6790e-03, 1.5160e-02, -1.4793e-02, + -5.9776e-03, 3.4607e-02, -6.1646e-03, 2.8290e-02, 9.4971e-02, + 2.9160e-02, 3.3264e-02, -1.1536e-02, -6.0310e-03, -3.4882e-02, + -3.3630e-02, 6.9763e-02, 1.7105e-02, -1.3481e-02, 2.9449e-02, + 1.6327e-02, 6.6162e-02, 2.2415e-02, -8.2703e-03, -6.4148e-02, + -5.0354e-02, 1.4214e-02, 4.9286e-02, -5.5450e-02, 1.5182e-02, + -2.8336e-02, -5.8960e-02, 1.5823e-02, 5.8838e-02, -2.4063e-02, + -6.9214e-02, 6.2347e-02, 2.8259e-02, -2.0462e-02, -6.0883e-02, + -3.7575e-03, -1.3939e-02, -3.4607e-02, 5.4893e-03, -2.7962e-03, + 9.5596e-03, 1.1261e-02, 1.1215e-02, 9.5215e-02, -4.4250e-02, + 2.5970e-02, -2.8351e-02, -4.4067e-02, 2.9282e-02, -2.4261e-02, + -2.1484e-02, 3.8361e-02, 3.0853e-02, 2.7756e-02, 3.4485e-02, + -4.2114e-02, 2.5909e-02, 3.6377e-02, -1.0086e-02, 2.8992e-02, + -2.3880e-02, 2.3697e-02, -2.1713e-02, 2.9678e-02, -3.4027e-02, + -3.5889e-02, -2.0065e-02, -9.5154e-02, 2.8702e-02, -6.5796e-02, + 8.2626e-03, -1.5060e-02, -9.7885e-03, -5.0781e-02, 4.5471e-02, + -7.2937e-02, 2.3361e-02, -3.3508e-02, 9.9121e-02, 5.3635e-03, + -2.2293e-02, -2.3758e-02, 3.4515e-02, 1.3947e-02, 2.8976e-02, + -3.5797e-02, -6.5155e-03, 4.8462e-02, -2.6798e-03, 1.6922e-02, + 1.1887e-02, 8.4734e-04, 4.1199e-02, 1.4353e-03, -2.1286e-03, + -8.1055e-02, -1.9135e-02, 3.3295e-02, -2.5391e-02, 2.9572e-02, + 1.7868e-02, -1.6785e-02, 1.2001e-02, -2.9846e-02, 1.4832e-02, + -1.7410e-02, 2.0630e-02, -3.1250e-02, -4.4678e-02, 2.4891e-03, + 3.7567e-02, -5.1300e-02, -1.3695e-02, 2.1839e-03, -2.0966e-02, + -1.4938e-02, -2.3300e-02, 4.5013e-02, 3.4424e-02, -2.3087e-02, + -2.7634e-02, 2.2018e-02, -7.2289e-03, 6.2805e-02, 3.6621e-02, + -4.5441e-02, -1.8112e-02, -4.7821e-02, -4.7760e-02, -1.0996e-03, + -6.2195e-02, -8.7585e-03, -7.2998e-02, -1.1948e-02, 1.0933e-02, + -2.5726e-02, -3.3302e-03, 6.8893e-03, -1.3519e-02, 1.8280e-02, + -2.1790e-02, 1.0498e-02, -1.2772e-02, -3.2562e-02, -4.5715e-02, + 5.7220e-03, -1.0307e-02, -2.2064e-02, 1.6388e-02, -1.0399e-02, + 3.2444e-03, 2.5192e-02, -5.9776e-03, 1.2680e-02, -4.5410e-02, + 9.0313e-04, -5.9143e-02, -2.4826e-02, 2.3422e-02, -3.2166e-02, + -5.6496e-03, -2.2919e-02, 1.9104e-02, -3.8338e-03, -8.0139e-02, + 6.6147e-03, -2.8091e-02, -3.6469e-02, -3.9337e-02, -3.2867e-02, + 5.4443e-02, 2.1149e-02, 4.6082e-02, 5.1208e-02, 2.8030e-02, + -3.9062e-03, 1.5053e-02, 4.1351e-02, -6.8331e-04, -2.8183e-02, + -8.4763e-03, 1.0704e-02, 6.5491e-02, 1.5533e-02, 1.3351e-02, + 5.8289e-02, -3.4088e-02, 3.9291e-03, -2.9129e-02, 5.6244e-02, + -1.0910e-02, 4.7150e-02, 4.7241e-02, 2.1912e-02, -4.8920e-02, + -4.7058e-02, 3.5858e-02, 2.0325e-02, 1.2009e-02, -6.0028e-02, + 6.2988e-02, 3.8177e-02, 3.8666e-02, -3.3478e-02, -1.9241e-02, + -2.5696e-02, 3.0444e-01, -2.0157e-02, -1.5320e-02, -4.1443e-02, + -6.3324e-03, 7.6538e-02, -4.5624e-02, -4.3793e-02, -6.3293e-02, + -2.5650e-02, -3.1708e-02, -6.9733e-03, -4.4128e-02, -1.2222e-02, + -2.8656e-02, 1.4755e-02, 4.5410e-02, -1.4847e-02, -6.1127e-02, + -1.8982e-02, 7.3357e-03, 6.2828e-03, 3.9154e-02, 3.3508e-02, + 3.6377e-02, 6.2675e-03, 1.7609e-02, 2.0752e-03, 8.6060e-03, + -1.2619e-02, 3.3264e-02, -3.6682e-02, -1.8784e-02, -4.0344e-02, + -5.5054e-02, -3.8025e-02, 4.7729e-02, 4.1840e-02, -7.7881e-02, + -2.6321e-02, 6.2073e-02, -6.0150e-02, -4.1046e-02, 6.2904e-03, + -2.1759e-02, 3.9856e-02, -3.1281e-02, 2.5539e-03, 3.5739e-04, + -4.9286e-02, -1.1353e-02, 2.0035e-02, 3.0548e-02, -6.3515e-03, + 3.6804e-02, 6.1401e-02, -6.7322e-02, 2.9926e-03, -6.9458e-02, + -6.5735e-02, -2.2568e-02, 1.6052e-02, -3.5065e-02, -4.1840e-02, + -3.0746e-03, 1.9791e-02, 3.3997e-02, -2.9160e-02, -4.9469e-02, + -5.8655e-02, -1.8463e-02, -8.1482e-03, -3.5339e-02, 2.3788e-02, + -2.3300e-02, -1.3672e-01, -6.4636e-02, -1.6983e-02, 1.0239e-02, + 2.0599e-02, -1.8295e-02, -2.2568e-02, 8.8684e-02, 2.7573e-02, + 2.5513e-02, 4.2145e-02, 7.9422e-03], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.2778, 1.4128, 1.3243, 1.2121, 1.4286, 1.2285, 1.2714, 1.2086, 1.2280, + 1.2027, 1.3056, 1.3433, 1.3571, 1.2871, 1.2546, 1.3233, 1.2960, 1.4360, + 1.3255, 0.3545, 1.3113, 1.2376, 1.3394, 1.3552, 1.3275, 1.3286, 1.2904, + 1.1890, 1.4459, 1.3469, 1.3475, 1.2710, 1.3147, 1.2767, 1.3758, 1.4789, + 1.2862, 1.3537, 1.4560, 1.3338, 1.3595, 1.2610, 1.3219, 1.3039, 1.3483, + 1.3257, 1.2723, 1.3911, 1.2703, 1.2903, 1.3294, 1.3034, 1.3718, 1.3183, + 1.4007, 1.4945, 1.1526, 1.5551, 1.4231, 1.4085, 1.3184, 1.3429, 1.2638, + 1.3687, 1.2802, 1.3492, 1.2815, 1.3761, 1.4529, 1.5865, 1.2352, 1.3187, + 2.4101, 1.2996, 1.5029, 1.2912, 1.3043, 1.2830, 1.3575, 1.4108, 1.2408, + 1.4397, 1.2768, 1.3065, 1.3802, 1.3398, 1.3298, 1.2797, 1.2675, 1.3241, + 1.2821, 1.3869, 1.3612, 1.3836, 1.4122, 1.2181, 1.3843, 1.2839, 1.3655, + 1.4379, 1.2846, 1.1037, 1.3041, 1.2195, 1.2868, 1.2898, 1.2872, 1.3970, + 1.2838, 1.2924, 1.2720, 1.3200, 1.2509, 1.2536, 1.3131, 1.3219, 1.3353, + 1.3341, 1.2996, 1.3819, 1.2691, 1.3806, 1.3662, 1.4026, 1.3948, 1.3118, + 1.3203, 1.2443, 1.4601, 1.3088, 1.3087, 1.2799, 1.1752, 1.3434, 1.2705, + 1.2702, 1.3879, 1.3780, 1.2694, 1.1935, 1.3127, 1.2161, 1.3522, 1.2309, + 1.2594, 1.2788, 1.3216, 1.3680, 1.3151, 1.3007, 1.2686, 1.3322, 1.2459, + 1.3076, 1.4006, 1.1240, 1.4359, 1.3258, 1.3004, 1.2717, 1.3467, 1.2417, + 1.2638, 1.4253, 1.3500, 1.2859, 1.3707, 1.3677, 1.3646, 1.3057, 1.2892, + 1.2587, 1.1566, 1.2520, 1.3977, 1.3951, 1.2995, 1.5024, 1.3988, 1.2421, + 1.2630, 1.5120, 1.4118, 1.3496, 1.4099, 1.3526, 1.2130, 1.8183, 1.4380, + 1.5001, 1.3415, 1.2570, 1.3791, 1.4268, 1.4050, 1.2840, 1.2843, 1.2593, + 4.8400, 1.2843, 1.1926, 1.3561, 1.3769, 1.3333, 1.2853, 1.2566, 1.4406, + 1.2301, 1.3254, 1.4186, 1.2946, 1.2669, 1.2523, 1.2704, 1.2703, 1.3087, + 1.4050, 1.3373, 1.3108, 1.3078, 1.2544, 1.3056, 1.2784, 1.2104, 1.4433, + 1.2860, 1.4168, 1.3535, 1.2526, 1.3556, 1.2621, 1.3858, 1.4261, 1.3332, + 1.2681, 1.2991, 1.3661, 1.2210, 1.2892, 1.2889, 1.3536, 1.2801, 1.3803, + 1.3746, 1.4893, 1.2750, 1.3595, 1.2911, 1.2272, 1.3971, 1.3142, 1.2904, + 1.2525, 1.2875, 1.3555, 1.3670, 1.3386, 1.2144, 1.2527, 1.1820, 1.3643, + 1.2478, 1.3274, 1.2426, 1.2595, 1.3444, 1.2624, 1.3189, 1.3534, 1.3567, + 1.3505, 1.2788, 1.2749, 1.2946, 1.2299, 1.2255, 1.3519, 1.2815, 1.3201, + 1.2997, 1.4289, 1.4691, 1.3291, 1.3569, 1.3914, 1.3111, 1.4085, 1.2841, + 1.2588, 1.2578, 1.2939, 1.3514, 1.2254, 1.3247, 1.3362, 1.2644, 1.2673, + 1.3170, 1.4680, 1.3108, 1.4813, 1.2732, 1.3297, 1.3588, 1.3602, 1.3319, + 1.2410, 1.2551, 1.2761, 1.2361, 1.2865, 1.3475, 1.2561, 1.2721, 1.2840, + 1.3149, 1.2915, 1.2586, 1.4006, 1.4374, 1.4045, 1.3576, 1.3122, 1.2363, + 0.7507, 1.1181, 1.3389, 1.3652, 1.2037, 1.4117, 1.2621, 1.2860, 1.3316, + 1.2805, 1.2710, 1.2435, 1.1944, 1.3598, 1.3444, 1.4724, 1.3790, 1.3487, + 1.3941, 1.3124, 1.4063, 1.4142, 1.3284, 1.3665, 1.3139, 1.3566, 1.4895, + 1.2782, 1.2546, 1.4901, 1.1898, 1.3537, 1.2926, 1.2563, 1.3956, 1.3219, + 1.2015, 1.2747, 1.2112, 1.3383, 1.4631, 1.2327, 1.3151, 1.2368, 1.3727, + 1.2585, 1.2890, 1.4346, 1.2018, 1.2840, 1.3254, 1.2897, 1.3935, 1.3739, + 1.2381, 1.2642, 1.3081, 1.6554, 1.3431, 1.2500, 1.2942, 1.2410, 1.2230, + 1.3736, 1.3201, 1.3532, 1.4267, 1.2428, 1.3280, 1.2035, 1.3420, 1.3727, + 1.2243, 1.3602, 1.3100, 1.3725, 1.2488, 1.3750, 1.2510, 1.2722, 1.3553, + 1.3276, 1.2686, 1.2770, 1.3718, 1.2138, 1.2707, 1.3313, 1.3915, 1.2798, + 1.2845, 1.8782, 1.3366, 1.3609, 1.3501, 1.3094, 1.2649, 1.4126, 1.2749, + 1.3580, 1.2331, 1.2871, 1.3615, 1.3453, 1.3761, 1.3402, 1.3949, 1.3075, + 1.3821, 1.2335, 1.1219, 1.2089, 1.3412, 1.3147, 1.4133, 1.2842, 1.2501, + 1.3594, 1.4005, 1.2015, 1.2951, 1.3273, 1.3984, 1.3136, 1.3617, 1.2494, + 1.2750, 1.3031, 1.4086, 1.2963, 1.3859, 1.3196, 1.2856, 1.3284, 1.3102, + 1.3510, 1.2589, 1.2849, 1.2815, 1.2839, 1.3223, 1.3948, 1.2715, 1.2000, + 1.3363, 1.3016, 1.4022, 1.2559, 1.2914, 1.3016, 1.2621, 1.3012, 1.2922, + 1.3268, 1.3100, 1.4105, 1.2209, 1.2655, 1.4514, 1.1279, 1.3305, 1.3242, + 1.3918, 1.3349, 1.3387, 1.3374, 1.2754, 1.3770, 1.3840, 1.4277, 1.2821, + 1.3066, 1.2942, 1.2172, 1.3147, 1.3135, 1.2541, 1.3755, 1.4096, 1.3069, + 1.2051, 1.2896, 1.1959, 1.3449, 1.3062, 1.4289, 1.2718, 1.3988, 1.3705, + 1.3853, 1.4339, 1.3522, 1.3735, 1.3411, 1.2900, 1.2655, 1.4412, 1.2839, + 1.4373, 1.2894, 1.3883, 1.2792, 1.3214, 1.2820, 1.3570, 1.2802, 1.3953, + 1.3076, 1.3642, 1.2259, 1.3210, 1.5030, 1.3443, 1.2598, 1.2860, 1.2877, + 1.3718, 1.3292, 1.4056, 1.3185, 1.3182, 1.3692, 1.3035, 1.3890, 1.2688, + 1.2993, 1.2494, 1.3429, 1.3417, 1.3072, 1.5261, 1.1908, 1.2241, 1.3305, + 1.3312, 1.3154, 1.3555, 1.3268, 1.3306, 1.3453, 1.2817, 1.3936, 1.3599, + 1.3008, 1.2335, 1.4052, 1.3677, 1.3380, 1.3518, 1.2657, 1.4209, 1.3038, + 1.2496, 1.2492, 1.3114, 1.3554, 1.4546, 1.2551, 1.3230, 1.3162, 1.3046, + 1.3480, 1.3656, 1.4054, 1.2291, 1.4179, 1.4216, 1.6285, 1.2478, 1.2996, + 1.2502, 1.3330, 1.2784, 1.2079, 1.2718, 1.3187, 1.3079, 1.3155, 1.4294, + 1.3614, 1.2806, 1.2023, 1.4037, 1.4959, 1.3305, 1.3032, 1.3336, 1.2883, + 1.3871, 1.2994, 1.3879, 1.2965, 1.2655, 1.3263, 1.3945, 1.4027, 1.2427, + 1.4667, 1.3111, 1.3269, 1.2957, 1.4106, 1.2785, 1.2503, 1.4278, 1.3792, + 1.4130, 1.4359, 1.2090, 1.2858, 1.3821, 1.3316, 1.2425, 1.2517, 1.1997, + 1.2026, 1.2181, 1.2937, 1.4291, 1.2833, 1.2416, 1.3274, 1.4065, 1.4087, + 1.4433, 1.2292, 1.3580, 1.3133, 1.3483, 1.3444, 1.4458, 1.3265, 1.3614, + 1.3026, 1.2615, 1.2933, 1.5180, 1.2559, 1.4251, 1.3611, 1.3697, 1.3070, + 1.3921, 1.2738, 1.3158, 1.2808, 1.3083, 1.2928, 1.3000, 1.2510, 1.3136, + 1.3668, 1.2892, 1.4194, 1.1994, 1.2050, 1.3569, 0.4170, 1.3689, 1.2871, + 1.2851, 1.2127, 1.2463, 1.3796, 1.3585, 1.3439, 1.3009, 1.3167, 1.4009, + 1.3422, 1.4012, 1.2848, 1.3174, 1.3833, 1.3165, 1.2696, 1.2573, 1.3488, + 1.3744, 1.2530, 1.2258, 1.2341, 1.4300, 1.4465, 1.4464, 1.2995, 1.2796, + 1.2561, 1.3208, 1.2107, 1.2561, 1.3227, 1.3021, 1.2849, 1.2951, 1.3540, + 1.3733, 1.2993, 1.2819, 1.3679, 1.3642, 1.3118, 1.3858, 1.3867, 1.5010, + 1.3779, 1.2740, 1.3771, 1.3687, 1.3323, 1.2218, 1.2367, 1.2822, 1.3661, + 1.3749, 1.2645, 1.3220, 1.2099, 1.3023, 1.2323, 1.3248, 1.3039, 1.3085, + 1.3062, 1.3787, 1.6302, 1.3617, 1.2350, 1.2862, 1.3350, 1.3497, 1.3331, + 1.3821, 1.2707, 1.3288, 1.3331, 1.3864, 1.3344, 1.3385, 1.2354, 1.3073, + 1.4564, 1.2089, 1.3711], device='cuda:1', requires_grad=True)Parameter containing: +tensor([ 1.1249e-01, 6.6530e-02, -1.5252e-01, -5.8736e-04, 4.5505e-02, + 9.3366e-02, 8.9381e-02, 5.9666e-02, -9.5938e-02, 3.6553e-02, + 6.7269e-02, -1.9519e-02, -1.0083e-01, -2.3823e-02, -3.5535e-02, + -6.4250e-02, -1.6845e-02, 1.2477e-02, -5.8642e-02, 4.4019e+00, + -4.9843e-02, 2.5174e-02, 3.9088e-02, -8.4199e-04, -2.0867e-02, + -5.4807e-04, -2.0647e-03, -1.7433e-02, 1.0351e-01, -9.0810e-02, + 3.9953e-02, 2.3238e-02, 1.4501e-02, 4.8113e-02, 5.7156e-02, + -6.4520e-02, 2.7001e-02, -4.0983e-02, -1.1597e-01, 3.8330e-02, + 2.3931e-02, 3.4684e-03, 8.6621e-03, 1.2125e-02, -4.2395e-02, + 6.7561e-03, 4.1510e-02, -4.2593e-02, 2.6011e-02, 5.0339e-02, + 4.2213e-02, -1.4566e-02, 2.7324e-02, 4.6538e-02, 5.4109e-02, + 5.0325e-02, -6.1837e-02, -4.9468e-02, 2.5524e-02, 1.0648e-02, + 4.9119e-02, -4.5645e-02, 4.1377e-02, 1.9581e-02, 9.3057e-02, + 1.7621e-02, 8.7060e-02, 9.3680e-02, -4.0777e-02, 2.2552e-01, + 3.7641e-02, 2.7666e-02, 3.4943e-01, 5.6216e-02, -1.4487e-01, + 6.4672e-02, 3.3319e-02, -9.7215e-02, -3.7396e-02, 3.0751e-02, + 1.8282e-02, -1.1652e-02, -1.9703e-02, 1.1703e-01, -2.4701e-02, + 4.9298e-02, 1.2135e-01, 8.3444e-02, -2.4657e-02, 1.2175e-02, + -1.6799e-02, 2.4807e-02, 2.8444e-02, -5.0449e-02, -5.5266e-02, + 1.3274e-02, -2.0715e-02, -5.3437e-02, -6.7125e-02, -5.9439e-03, + 2.9591e-02, 1.3338e-01, 3.7340e-02, -7.2558e-02, 5.8032e-02, + 2.6419e-02, -4.8896e-03, -3.1441e-02, 3.4037e-02, -5.9380e-02, + -3.7031e-02, -1.5587e-03, 2.8165e-02, 3.1967e-02, 6.8637e-02, + -2.4927e-02, 1.0872e-02, -5.9152e-02, 1.8055e-02, 7.8582e-02, + -6.0901e-02, -1.3762e-01, -6.6338e-02, -2.4902e-02, -1.3091e-01, + -1.7644e-02, -1.1633e-01, 1.2645e-02, 9.5415e-03, -5.4999e-02, + -9.1769e-02, 3.0410e-02, -4.1700e-03, -3.8699e-03, -4.4601e-02, + -3.4956e-03, -8.8458e-03, 1.3220e-02, -4.3815e-02, -3.4442e-02, + -8.5615e-03, 3.0837e-02, 3.9464e-02, 1.0106e-02, 5.6561e-02, + -1.6560e-02, -7.9464e-04, -2.5703e-02, -1.4476e-02, 8.4910e-02, + 1.9942e-01, -5.2275e-02, -6.9300e-02, 5.5513e-03, -7.2165e-02, + -1.4982e-01, -1.9055e-02, -7.8602e-02, 6.0158e-03, 1.1233e-03, + 1.1562e-01, -1.1397e-01, -2.8403e-02, -4.9939e-02, 3.1589e-02, + 1.2695e-03, -7.5428e-02, 4.5873e-02, -5.3041e-02, 3.1856e-02, + -4.4322e-02, 5.4903e-02, 5.8381e-02, 1.6829e-02, 3.3597e-02, + -8.3475e-02, -1.2639e-02, 2.3891e-02, -5.2602e-02, 2.8142e-02, + 4.5836e-04, -8.0072e-02, 5.3213e-02, -2.0927e-03, 5.6944e-03, + -1.6532e-01, 2.7191e-02, 1.2540e-01, -4.0220e-02, -5.6549e-02, + 1.9608e-02, -4.7420e-02, 4.8768e-02, 5.2063e-02, -7.7537e-02, + 8.8836e-03, -2.1279e-02, -1.0774e-01, -5.9231e-01, 4.3655e-02, + -1.2093e-01, -1.1290e-02, -1.6956e-02, 4.7933e-02, 4.8477e-02, + 1.0526e-01, -8.1703e-02, 8.6334e-02, 8.1503e-02, -8.2884e-03, + 1.9980e-03, 1.6998e-01, -2.9055e-03, -2.3385e-02, -5.2070e-02, + -8.9273e-03, -2.7549e-02, 4.1074e-02, -8.2153e-02, -1.1626e-01, + 1.8730e-02, 5.3543e-03, -6.0731e-03, -2.9544e-02, 2.4765e-02, + -4.7577e-02, 3.2469e-02, -4.7903e-03, 7.3674e-02, -3.6682e-02, + 6.6302e-03, 2.0081e-02, -1.1133e-02, 5.2496e-02, -2.5489e-02, + 3.2781e-02, -7.1077e-02, 5.0036e-02, 5.3721e-02, 3.5437e-02, + 6.2628e-02, 8.2153e-02, 4.1833e-02, 2.3117e-02, 2.5839e-02, + 2.7974e-02, 8.7044e-02, 3.6748e-02, 7.4573e-03, -1.1560e-02, + 4.0989e-02, 1.7920e-02, 8.9429e-02, -7.5363e-02, 1.1239e-01, + -9.7433e-02, -5.9373e-03, -9.0286e-02, 2.9564e-02, 1.9275e-02, + -4.6716e-02, -3.4492e-02, 1.2544e-02, -4.6450e-02, -4.1636e-02, + 2.8558e-02, -4.4963e-02, 1.0954e-02, -6.3352e-02, -3.6041e-02, + 1.6229e-02, 3.7611e-02, 8.6765e-02, -7.4588e-02, 1.9078e-02, + 5.8601e-02, -9.5907e-02, 3.2584e-02, 4.2996e-02, -6.7500e-02, + 1.5511e-01, 1.4252e-01, -4.6072e-02, 1.9960e-02, 4.7929e-03, + 6.0026e-02, 6.9157e-02, -4.8183e-02, 1.1191e-01, -1.1622e-01, + -3.2258e-02, -4.7785e-02, 6.6812e-02, 2.3133e-02, 3.6284e-02, + -1.1379e-02, 6.7204e-02, 2.7096e-02, 1.2316e-02, 4.3819e-02, + 1.6182e-02, 1.0396e-02, -3.4605e-03, 5.9977e-02, 3.0063e-02, + 6.0782e-03, 6.6088e-02, -5.5381e-02, 8.2836e-02, -3.4805e-02, + 3.3898e-02, 1.2261e-02, -4.6789e-02, -6.2023e-02, 1.2293e-02, + -7.1054e-02, 3.4515e-02, 4.1708e-02, 1.0877e-01, -2.4782e-02, + -7.4042e-02, 1.9112e-01, -3.6649e-02, -2.9619e-02, -8.1513e-02, + -1.1837e-01, 9.0077e-03, 1.3000e-02, 3.9411e-02, -9.5251e-02, + 1.3697e-01, -3.9842e-02, 1.0547e-01, -3.3850e-02, 1.7734e-02, + 8.9571e-02, -7.6323e-03, -2.8654e-02, -1.9298e-02, 6.9649e-03, + -9.7311e-02, 1.5954e-02, -5.8039e-02, -2.2428e-02, 7.0552e-02, + 1.0890e-02, 7.9111e-02, 7.5176e-02, 1.7303e-02, -2.0276e-02, + -3.1886e-02, 4.3340e-02, -2.2585e-02, -1.7488e-02, -9.9096e-03, + -5.1280e-02, -4.2994e-02, 1.3468e-02, 7.8899e-03, -6.5327e-02, + 2.5238e-02, -1.2104e-02, 1.9794e-02, -8.7755e-02, 1.2272e-02, + -3.8113e-02, 4.8278e-02, 8.7076e-02, 2.7675e-02, 3.3508e-02, + 4.7161e-02, -4.2115e-02, -6.3795e-02, 9.3564e-03, -2.6794e-02, + -3.8587e-04, 7.3805e-02, 5.9713e-02, 3.5639e-04, -2.9299e-02, + -8.8966e-02, 1.2821e-01, 3.4049e-03, 4.1277e-03, -4.2586e-02, + -2.8642e-02, -7.6255e-03, -6.1095e-02, -5.1197e-02, -6.0758e-02, + 2.7188e-02, -9.9969e-02, 3.1856e-03, -3.5005e-02, 6.6075e-02, + 8.8329e-03, 2.1645e-02, 4.5789e-03, -2.4657e-02, 4.3128e-02, + 1.1682e-01, -3.2732e-02, 7.7938e-02, 2.1887e-02, -6.3995e-02, + -1.1791e-01, -6.2589e-02, 7.7237e-03, -1.2817e-02, -1.6943e-02, + -4.3070e-02, 2.5946e-02, 9.0340e-02, -3.8835e-02, -1.0102e-01, + -1.3493e+00, 3.1324e-02, 2.9935e-02, -3.3154e-02, -1.0369e-02, + -4.0507e-03, -1.2769e-02, 4.4127e-03, -2.8645e-02, -1.8434e-02, + 2.3528e-02, -1.3385e-01, -9.2718e-02, 3.5029e-02, 1.4013e-02, + -4.1471e-02, -7.4307e-02, -1.2050e-02, 7.5646e-02, 3.5991e-02, + -7.2910e-03, 1.7712e-02, 5.8498e-02, 1.1845e-01, -7.3238e-02, + -5.0888e-03, -8.3001e-02, -2.9860e-02, 1.0127e-02, 1.1152e-01, + -9.6156e-02, 2.8515e-02, -6.0384e-02, 1.1800e-01, 2.3654e-02, + -1.3667e-02, 5.0921e-02, -9.9970e-02, 3.6478e-02, 3.3720e-02, + -7.5835e-03, 2.4045e-02, 2.6867e-02, -1.9072e-02, -1.0716e-01, + 1.3416e-02, 3.4972e-02, -8.7127e-02, 4.2388e-02, -1.9562e-02, + -3.1595e-02, 2.2393e-02, -6.9860e-02, 4.5873e-02, -1.3502e-02, + -2.1857e-02, -8.8041e-02, -1.2840e-02, 2.1885e-02, -6.6175e-02, + 2.2215e-02, 1.1226e-01, 6.1672e-02, -2.8236e-02, 9.6886e-02, + 9.5079e-02, -8.6334e-02, 3.8164e-02, 1.8307e-01, -6.8546e-02, + -4.5854e-02, -2.1095e-02, 8.4351e-02, 7.5503e-02, 1.1180e-02, + 1.8422e-02, 1.6019e-02, 2.6321e-02, -3.6733e-02, -1.2587e-01, + 1.7723e-02, -8.1332e-02, 3.5607e-02, -2.6955e-02, 2.2358e-04, + 2.8571e-02, -5.5601e-02, 5.2749e-02, -1.8398e-02, -2.9063e-02, + 6.0163e-02, 4.2427e-02, 4.9609e-02, 3.5119e-02, 3.5887e-02, + -8.4040e-02, -3.5546e-03, -8.2419e-02, 4.4913e-02, -2.8065e-02, + -8.6208e-03, 6.7511e-02, 9.9593e-02, 6.3995e-02, -6.3132e-02, + -4.9890e-02, -2.7507e-02, 5.0936e-02, -2.7012e-02, -3.6912e-02, + -2.4369e-02, 1.5135e-02, -2.4008e-02, 5.1538e-03, -9.2522e-02, + 2.4081e-02, 3.6717e-02, -1.6280e-04, -4.9823e-02, 5.6712e-04, + -2.6802e-02, -3.0708e-02, -2.1814e-02, -5.9829e-02, -6.6685e-02, + -1.4319e-03, 1.2471e-02, -2.3993e-02, 6.8387e-02, 2.0728e-02, + 3.0708e-02, 4.8047e-02, 1.0205e-02, 6.6293e-02, 2.8431e-02, + 6.7335e-02, 9.5118e-03, -4.3140e-02, 5.8522e-02, 4.3582e-03, + -2.7967e-02, 5.7708e-02, -3.5698e-02, 2.9260e-02, 1.0412e-01, + -6.7268e-02, 7.3698e-02, 3.2205e-02, -5.8821e-03, 1.3816e-01, + 6.3438e-02, 3.3773e-02, -9.4991e-03, 1.9717e-02, 3.7347e-02, + 5.1135e-02, 1.2657e-02, -5.9936e-02, -3.0121e-02, 7.1307e-02, + 3.1290e-02, 1.0219e-02, 2.0115e-02, 2.6330e-02, -2.9001e-02, + -1.0041e-01, -4.3682e-03, 5.1741e-02, 8.8398e-02, 4.9760e-02, + 2.4417e-02, 5.8507e-02, -8.0023e-02, -1.5147e-02, -4.0268e-03, + -3.7588e-02, 1.0212e-01, 6.3257e-02, -4.1025e-02, 8.4955e-03, + -2.6449e-02, 4.9546e-02, 1.7144e-02, -6.9164e-03, 6.3603e-02, + -6.4019e-02, -1.0373e-01, 2.1895e-02, -1.5778e-02, 3.1353e-03, + -3.9587e-02, 3.7480e-02, -2.3462e-01, 4.4842e-02, -4.6786e-02, + 5.5548e-02, 5.3524e-02, -1.7518e-02, -3.4734e-02, -2.3288e-03, + -9.6355e-02, 8.0766e-02, 8.5742e-02, 2.9556e-02, -6.0110e-02, + 3.5366e-02, 3.0754e-04, 2.1479e-02, 6.5357e-03, 2.3472e-02, + -2.9868e-02, -1.4617e-02, -1.1686e-02, 4.1814e-02, -2.7550e-02, + 1.0170e-01, 1.5005e-02, 8.5918e-02, 1.5293e-01, 3.6819e-02, + 2.8971e-02, -4.9363e-02, 2.3617e-03, 2.9390e-02, -5.3829e-02, + -2.8364e-03, -5.2993e-02, 3.4160e-02, -3.5602e-02, 7.5076e-03, + 1.7979e-02, 2.6363e-02, 6.1626e-02, 1.4246e-02, 9.0429e-02, + 9.7307e-02, -1.8644e-02, -1.0435e-02, 5.2864e-02, -3.1784e-02, + 3.9848e-02, -7.4252e-02, -1.6382e-02, 2.7518e-02, 6.7591e-02, + -1.0744e-01, -9.2538e-03, -7.7432e-02, 3.2136e-03, 8.4539e-02, + -9.5115e-04, -6.7152e-02, -1.0466e-01, 2.8688e-02, 1.1688e-02, + -2.1216e-01, 2.4527e-02, 7.5086e-03, 5.9728e-03, -5.0438e-02, + 4.5686e-02, -2.3327e-02, -2.7498e-02, 1.3780e-02, 6.8096e-02, + -4.4034e-02, -5.3989e+00, -1.8472e-02, 2.5993e-02, 1.1153e-02, + -7.6305e-02, -4.6629e-02, -4.5762e-02, -1.2894e-02, 2.3282e-02, + -5.1168e-02, -1.6560e-02, -1.8486e-02, -4.1046e-02, -4.4457e-02, + 4.0430e-02, -3.8813e-02, -7.7700e-02, 2.9239e-02, -4.2895e-02, + 7.3962e-02, 7.1423e-02, 1.0706e-02, -2.8681e-02, -1.1633e-01, + -2.8290e-02, 3.7359e-02, 2.3484e-02, 3.1901e-02, 4.8001e-02, + -3.1100e-02, 8.7397e-02, 9.8772e-02, 1.7629e-02, 3.6501e-03, + 9.3748e-03, -3.4984e-03, 3.5952e-02, -7.3531e-02, -7.0477e-03, + 6.7023e-03, -2.4618e-02, 2.3288e-02, -9.6989e-02, 3.1015e-02, + 1.3645e-02, -2.0102e-01, 8.9683e-02, 3.0963e-03, 7.7379e-02, + 5.2203e-02, 4.2260e-02, 5.4637e-02, 7.2847e-02, 2.5497e-02, + -2.4353e-03, -2.0962e-02, -1.2389e-02, -3.2438e-02, -7.5809e-02, + 9.4790e-02, -1.0083e-01, -1.0935e-01, 3.2097e-02, -2.6656e-02, + -1.8654e-02, -8.5054e-02, 1.4421e-02, 6.8078e-02, -5.8056e-02, + -4.4167e-02, -1.4837e-02, -7.9102e-02, -4.1090e-02, 7.0457e-02, + 1.0067e-02, 2.5603e-02, 9.8748e-02, 4.7162e-02, -5.7733e-02, + 2.2399e-02, 3.9574e-02, 9.2868e-02, -7.5548e-02, -5.1467e-02, + -4.1965e-02, 8.5480e-02, -2.1664e-03], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[ 0.0202, 0.0051, -0.0099, ..., 0.0150, -0.0118, -0.0252], + [ 0.0279, -0.0216, -0.0171, ..., -0.0218, 0.0069, 0.0059], + [-0.0100, 0.0249, 0.0076, ..., 0.0068, -0.0119, 0.0081], + ..., + [ 0.0039, -0.0400, -0.0170, ..., 0.0191, -0.0038, 0.0145], + [ 0.0186, -0.0087, 0.0062, ..., 0.0125, -0.0135, -0.0063], + [ 0.0241, -0.0023, -0.0027, ..., 0.0083, 0.0031, 0.0143]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.1687, -0.1522, -0.1874, ..., -0.3894, -0.2622, -0.3452], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0104, 0.0026, -0.0121, ..., 0.0043, -0.0197, -0.0084], + [-0.0017, -0.0006, 0.0162, ..., -0.0014, 0.0029, 0.0020], + [-0.0115, 0.0131, -0.0065, ..., -0.0152, 0.0059, 0.0125], + ..., + [-0.0005, 0.0133, -0.0086, ..., 0.0159, -0.0166, 0.0221], + [-0.0144, -0.0031, 0.0204, ..., 0.0199, 0.0079, -0.0012], + [-0.0021, -0.0152, -0.0143, ..., 0.0090, -0.0025, -0.0068]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-1.4450e-02, -1.9440e-02, -5.2460e-02, -3.1021e-02, -5.1300e-02, + 3.6041e-02, -4.9820e-03, 3.0884e-02, -6.0791e-02, -3.9024e-03, + 6.5979e-02, -5.0690e-02, -1.5404e-02, 1.2665e-02, 1.0269e-02, + -2.3239e-02, 1.9638e-02, 4.1351e-02, -9.3155e-03, 4.5898e-02, + -1.4809e-02, 3.9795e-02, 5.4199e-02, 2.2781e-02, -1.1696e-02, + 4.7455e-02, -3.1342e-02, -5.1666e-02, 2.0187e-02, -4.6753e-02, + 1.4305e-02, 2.4292e-02, 4.6883e-03, 4.9622e-02, 1.7075e-02, + -7.3669e-02, 4.7668e-02, -3.4607e-02, 5.6549e-02, -2.3193e-02, + -1.6907e-02, -4.4983e-02, 1.2787e-02, 3.8055e-02, -4.3793e-02, + 7.4951e-02, -2.7313e-03, 2.3071e-02, -2.5681e-02, -2.4292e-02, + 3.4485e-02, 3.3752e-02, 1.5030e-02, 1.4137e-02, 5.1941e-02, + -3.1830e-02, 4.2801e-03, 8.0322e-02, 2.6215e-02, 4.4525e-02, + 2.6276e-02, -3.3325e-02, 2.9175e-02, -5.9875e-02, 6.1890e-02, + 4.4678e-02, 3.4008e-03, 6.0394e-02, 3.0575e-03, 2.4216e-02, + -1.5656e-02, 2.8706e-03, 1.1194e-01, 1.2585e-01, -6.3538e-02, + 5.7129e-02, -8.5678e-03, -2.8549e-02, 3.8269e-02, -6.1859e-02, + -2.5925e-02, -7.7637e-02, 1.9104e-02, 1.2222e-02, 3.5248e-02, + 1.2161e-02, 6.0577e-02, 3.3173e-02, -2.0538e-02, 4.6310e-03, + -5.7648e-02, 1.1932e-02, 5.6274e-02, -1.1154e-02, -6.1523e-02, + 9.4299e-02, 4.9988e-02, -2.1072e-02, -5.7259e-03, -4.9530e-02, + -8.6914e-02, 1.1816e-01, 3.1860e-02, -1.7868e-02, -5.5618e-03, + 1.4656e-02, -1.0147e-03, 6.7505e-02, 2.8412e-02, -5.7129e-02, + -2.4433e-03, 2.9312e-02, -5.8250e-03, -2.6993e-02, -1.4832e-02, + 1.0757e-02, -4.6600e-02, -1.3153e-02, -5.3589e-02, -4.4342e-02, + -1.4353e-03, -4.3793e-03, 1.9211e-02, 1.8112e-02, -8.9294e-02, + -2.7573e-02, -5.5733e-03, -8.1238e-02, 5.9601e-02, -1.1169e-01, + -2.1286e-02, -4.9652e-02, -1.4503e-02, -3.3081e-02, -2.4017e-02, + -8.7662e-03, -2.5314e-02, -9.2926e-03, -4.5837e-02, 3.4424e-02, + -3.4454e-02, 4.5349e-02, 5.2155e-02, 4.0588e-02, 6.9824e-02, + -2.4170e-02, -2.2324e-02, 7.3547e-02, 3.6987e-02, 1.9012e-02, + 3.7231e-02, -7.6904e-02, 5.1880e-02, 1.5228e-02, -3.5583e-02, + -5.7495e-02, -1.0101e-01, -2.4506e-02, 3.2330e-03, 6.7200e-02, + 2.8725e-03, -1.1665e-02, -7.1144e-03, -1.0231e-02, -9.5825e-03, + 5.4260e-02, -5.0262e-02, -5.2551e-02, -1.3290e-02, 8.1116e-02, + -3.0533e-02, 7.3776e-03, 2.4551e-02, -8.6975e-02, -4.4342e-02, + 4.7516e-02, -1.4748e-02, 4.2114e-02, -4.8584e-02, 4.8492e-02, + -1.2657e-02, -4.6936e-02, -3.9429e-02, -2.0401e-02, 1.7181e-02, + 2.9335e-03, 4.5242e-03, 1.4160e-01, 6.3515e-03, -2.2491e-02, + -3.1643e-03, 1.0612e-02, 8.1970e-02, -1.3794e-02, -8.8043e-03, + 4.9561e-02, -5.0964e-03, -3.1311e-02, -8.0383e-02, 2.4078e-02, + 3.2063e-03, -5.1544e-02, 6.0616e-03, -8.0185e-03, 4.0283e-02, + -1.9867e-02, -7.0129e-02, -1.7181e-02, 1.0101e-02, -3.5614e-02, + -9.1705e-03, 1.0254e-02, 1.8930e-03, 5.0964e-02, -4.9713e-02, + -2.0294e-03, 3.4821e-02, -4.1473e-02, -1.2077e-02, -8.0444e-02, + 4.1412e-02, 7.0496e-02, -2.3071e-02, -1.7212e-02, 6.1874e-03, + 2.6428e-02, -2.0203e-02, -2.5589e-02, 6.1890e-02, 3.3478e-02, + 4.9957e-02, -2.1423e-02, -7.5928e-02, -4.8828e-02, 2.5978e-03, + 7.7820e-02, -9.0881e-02, -2.9709e-02, 1.0559e-02, 4.3030e-02, + 6.5918e-02, 1.6846e-02, -8.6365e-03, 4.7394e-02, 3.0457e-02, + -2.7008e-02, -3.8513e-02, 5.5298e-02, -3.8269e-02, -7.2861e-03, + 1.2192e-02, -3.8666e-02, -1.1482e-02, -7.8552e-02, 9.7733e-03, + 6.8176e-02, 4.8340e-02, 2.0966e-02, 2.2064e-02, -3.8528e-03, + 2.3636e-02, 1.2711e-02, -3.0380e-02, -8.6182e-02, -4.6173e-02, + -4.6783e-02, -3.2623e-02, -8.6060e-03, 3.2520e-03, -1.2886e-02, + -3.9673e-02, 3.4027e-03, 1.0046e-01, 3.3478e-02, -1.5030e-02, + 1.1162e-02, 3.0350e-02, 4.8279e-02, 2.0081e-02, -9.4528e-03, + 2.7328e-02, -1.1261e-02, 5.4970e-03, 1.3132e-03, 3.6163e-02, + 7.7362e-03, -3.6469e-02, -5.6915e-02, 4.0619e-02, 6.8474e-03, + 6.9580e-02, 2.7710e-02, -3.1494e-02, 2.1805e-02, 3.1281e-02, + -8.8348e-03, -3.2898e-02, 2.7069e-02, -3.9940e-03, 9.3445e-02, + -3.4088e-02, -5.5313e-03, -3.7689e-02, 1.6724e-02, 4.7028e-02, + -2.2858e-02, 4.0100e-02, 5.3650e-02, -3.3752e-02, 4.2076e-03, + 3.9124e-02, 5.7983e-02, -3.1143e-02, 3.8513e-02, 1.0323e-02, + -5.5481e-02, -4.6356e-02, -4.0039e-02, 7.4219e-02, -7.2021e-02, + -1.0323e-02, 3.8483e-02, -5.9570e-02, -2.7084e-02, 9.9170e-01, + -2.5467e-02, 6.9702e-02, 2.7435e-02, 4.4373e-02, -2.9785e-02, + 5.3497e-02, -6.6071e-03, -9.4528e-03, -2.0615e-02, 1.1345e-02, + 4.7150e-02, 5.0354e-02, -1.2062e-02, -2.1973e-02, 2.1774e-02, + -4.2786e-02, -8.9340e-03, 4.1382e-02, 3.9864e-03, 2.9648e-02, + -2.7267e-02, -3.4580e-03, -5.5664e-02, 8.5907e-03, -2.2476e-02, + 7.6172e-02, 1.4473e-02, -1.6937e-02, -6.1584e-02, 3.5591e-03, + 1.6144e-02, 8.6670e-03, 2.4853e-03, -5.0018e-02, 1.6235e-02, + 1.9775e-02, -3.4027e-02, 8.4473e-02, -1.0803e-02, -5.3528e-02, + 3.7861e-03, -2.1576e-02, 3.7415e-02, -2.8839e-02, -8.6731e-02, + -4.9744e-03, 7.8552e-02, -6.4270e-02, 1.1009e-02, -2.1423e-02, + -2.4216e-02, 8.6243e-02, -5.7251e-02, 5.2826e-02, 7.1472e-02, + -2.5269e-02, 2.3468e-02, 1.5396e-02, 6.8130e-03, 1.3374e-02, + -3.2043e-02, 8.0933e-02, -4.6783e-02, -4.3091e-02, -4.8950e-02, + 2.8801e-03, -6.0234e-03, -6.4964e-03, 2.0203e-02, -2.5589e-02, + 5.3864e-03, -5.1697e-02, -8.9722e-02, 8.4656e-02, 1.2188e-03, + 6.9336e-02, -1.9135e-02, -1.4641e-02, -2.3636e-02, -2.4597e-02, + -9.3689e-02, -2.0050e-02, 3.0487e-02, 3.6621e-02, 3.4027e-02, + -8.9874e-03, 3.9703e-02, 2.5654e-03, 2.7573e-02, -9.3445e-02, + -6.0944e-02, -9.6283e-03, -1.6403e-03, 1.9331e-03, 3.9856e-02, + -6.8726e-02, -2.6672e-02, -1.5839e-02, -3.2684e-02, 1.5198e-02, + 4.4403e-03, -5.0293e-02, -3.5645e-02, 2.1896e-02, -4.4067e-02, + -3.1982e-02, -2.1866e-02, -3.6621e-02, -1.9531e-02, -9.9106e-03, + -2.4673e-02, -4.5288e-02, 3.9764e-02, 1.0429e-02, 3.6957e-02, + 1.9943e-02, -2.9251e-02, -2.3087e-02, -1.0883e-01, 2.7267e-02, + -1.7349e-02, -2.8488e-02, 4.4922e-02, 2.2797e-02, 2.3148e-02, + -3.6836e-04, -5.5725e-02, -6.5918e-02, 3.3142e-02, 1.2970e-02, + -9.5520e-03, -8.7433e-03, 2.8717e-02, -4.6478e-02, -3.7994e-02, + 7.0618e-02, -2.2369e-02, -3.4119e-02, -2.9465e-02, 1.0574e-04, + -2.7817e-02, 3.8849e-02, -2.4629e-04, 5.5115e-02, -1.7776e-02, + 6.0692e-03, -2.2720e-02, 5.4230e-02, -3.9154e-02, -3.1891e-02, + 9.6436e-03, 8.9722e-02, 5.8990e-02, -1.1063e-03, -7.3242e-03, + 3.8300e-02, -2.6031e-02, -3.2593e-02, 6.5979e-02, -3.9490e-02, + -6.1890e-02, -1.2589e-03, 7.5256e-02, 5.3223e-02, -2.5833e-02, + -2.1667e-02, 6.6467e-02, 4.8584e-02, -1.9684e-02, -8.9417e-03, + -4.0710e-02, -1.8967e-02, 4.2511e-02, 4.4708e-02, 2.3956e-02, + -2.4551e-02, -2.4567e-02, -2.1851e-02, -6.6650e-02, -2.4673e-02, + 2.7588e-02, -3.9673e-02, 6.9153e-02, 1.9150e-02, -3.3131e-03, + -5.8594e-02, -1.2550e-02, -5.0690e-02, 3.5004e-02, -9.9957e-05, + -2.9907e-02, 4.1138e-02, 4.6959e-03, -5.2605e-03, -4.1016e-02, + -3.0014e-02, -1.3374e-02, -2.0096e-02, -5.9624e-03, -1.9089e-02, + 2.3899e-03, 1.4854e-02, 1.8494e-02, -1.0391e-02, -1.6373e-02, + -2.6657e-02, 3.0426e-02, -2.9388e-02, -1.7258e-02, 8.3801e-02, + 4.2511e-02, 1.4603e-02, 6.9962e-03, -2.7267e-02, -4.2053e-02, + 2.4109e-03, 7.2250e-03, -1.8051e-02, 1.2123e-02, -6.6772e-02, + -4.9011e-02, 3.3325e-02, -9.6083e-04, -1.6739e-02, 3.7323e-02, + 1.9653e-02, -3.6469e-02, -6.5063e-02, -4.2664e-02, 2.1423e-02, + 1.4153e-02, -6.0944e-02, -1.6418e-02, -2.0462e-02, 5.4108e-02, + -6.2439e-02, -1.2047e-02, 2.0615e-02, -8.3313e-03, 1.9150e-02, + -3.3020e-02, -5.9052e-02, -3.5362e-03, 4.8279e-02, 7.7581e-04, + -2.9388e-02, 7.5626e-04, -7.7026e-02, 3.2291e-03, -2.3895e-02, + -2.0447e-02, -5.3040e-02, 4.3091e-02, 1.8417e-02, 3.7323e-02, + -7.1777e-02, 2.2614e-02, -2.3453e-02, 9.3918e-03, 2.1072e-02, + -6.3599e-02, -1.2827e-03, -3.2990e-02, -4.0710e-02, 1.2779e-03, + 1.5045e-02, 3.6285e-02, 2.3239e-02, -3.0701e-02, -3.3905e-02, + 2.8732e-02, 2.5955e-02, -6.2561e-03, -6.4209e-02, 3.1113e-02, + -5.2368e-02, -3.9337e-02, 2.4933e-02, 4.9820e-03, -5.0995e-02, + 1.7792e-02, -5.0774e-03, -5.2155e-02, 6.5002e-03, -6.1684e-03, + -7.5317e-02, 3.0960e-02, -3.4119e-02, -2.2980e-02, -1.0941e-02, + -6.3171e-02, 2.3773e-02, 7.6050e-02, 3.6392e-03, 5.1117e-03, + 1.1864e-02, -5.1880e-02, -1.0663e-01, -3.6285e-02, 1.0872e-02, + 9.3689e-03, 1.1284e-02, -1.1948e-02, 6.7078e-02, -1.3687e-02, + 3.9734e-02, -4.2725e-03, 5.1804e-03, 6.2439e-02, 2.9617e-02, + 2.8687e-03, -2.0889e-02, 4.2908e-02, -1.4122e-02, -4.1161e-03, + 3.6255e-02, 1.6693e-02, 2.4048e-02, -2.6428e-02, -4.6814e-02, + 1.6907e-02, 1.1269e-02, -6.5186e-02, 1.1847e-01, 4.9774e-02, + 3.2444e-03, -2.2018e-02, -2.2964e-02, -2.3331e-02, 8.9264e-03, + 2.5665e-02, -4.0833e-02, 3.0457e-02, 1.6918e-03, -4.4708e-02, + -5.4321e-02, 5.0995e-02, -6.7139e-02, 1.6037e-02, 4.4373e-02, + -3.3600e-02, -5.5328e-02, -3.9978e-02, 6.6223e-02, 4.3121e-02, + -9.3262e-02, -1.2390e-02, 1.1337e-02, 1.2619e-02, 4.5204e-03, + 3.6682e-02, 3.8422e-02, -1.2413e-02, 4.9194e-02, 9.0210e-02, + 5.9937e-02, -1.3379e-01, 4.9400e-03, 5.2452e-03, -1.5884e-02, + -4.0619e-02, 3.6201e-03, -4.7150e-02, -1.3702e-02, 5.2399e-02, + -2.6215e-02, 3.8147e-02, -7.9155e-04, 2.6855e-02, -2.1927e-02, + -1.8539e-02, 1.3641e-02, -2.2598e-02, -4.0955e-02, -5.6610e-02, + 7.6782e-02, -3.4695e-03, 4.1008e-03, 2.9449e-02, -1.4961e-02, + -2.8824e-02, 7.7698e-02, 4.5837e-02, -2.8061e-02, -8.8120e-03, + -3.3844e-02, 2.0676e-02, 7.3624e-04, -3.8818e-02, 1.3878e-02, + 8.1940e-03, 9.8495e-03, -1.2484e-03, 2.6016e-02, 8.2397e-03, + 2.2873e-02, 3.6804e-02, 1.8997e-03, 1.3168e-02, 5.0316e-03, + -2.5654e-03, 3.4851e-02, -5.0201e-03, 1.0445e-02, 3.4485e-02, + 3.4668e-02, 3.4515e-02, 6.4026e-02, 9.1248e-02, -7.8247e-02, + -7.4844e-03, 6.0272e-02, -2.4399e-02, 3.3691e-02, 1.4244e-02, + 1.6068e-02, -5.9448e-02, -4.2877e-02, -8.6746e-03, -1.0736e-01, + -3.5461e-02, -2.6962e-02, -8.1787e-02, -2.7451e-02, 1.2024e-02, + -4.1992e-02, 8.1635e-03, 6.4430e-03, -3.7689e-02, 5.9601e-02, + -2.4536e-02, 6.6589e-02, 7.8552e-02, 8.3130e-02, 1.6815e-02, + -1.2283e-02, 3.2074e-02, 1.6693e-02, -8.3008e-02, -1.6525e-02, + -4.8920e-02, 1.9150e-02, -4.1748e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([2.0257, 2.0344, 1.9511, 1.9693, 1.9585, 1.9219, 2.0683, 1.9555, 1.9977, + 2.0715, 2.0045, 2.0027, 2.0430, 2.0124, 2.0187, 1.8801, 1.9952, 2.1130, + 1.9557, 1.2675, 1.8754, 1.8764, 1.9653, 1.9503, 2.0653, 1.9437, 1.9732, + 2.0050, 2.0532, 1.8522, 2.0196, 1.9858, 1.9741, 1.9016, 2.0161, 2.0116, + 1.9765, 2.0106, 1.8425, 1.9446, 1.9371, 2.0979, 1.9926, 2.0592, 2.1984, + 2.0775, 1.9381, 1.9745, 1.9952, 1.9890, 1.8479, 2.0970, 2.0300, 2.0011, + 2.0767, 2.1105, 1.9994, 1.9672, 2.0051, 2.1348, 2.0109, 2.0383, 1.8306, + 2.0998, 1.9471, 1.9370, 1.9396, 1.9878, 2.0677, 2.3015, 2.1528, 1.9999, + 1.8284, 2.0095, 1.9911, 2.0213, 2.0314, 1.9826, 1.9751, 1.9446, 2.0328, + 1.9656, 1.9499, 1.9528, 2.0021, 1.9421, 2.0539, 2.0090, 2.0005, 2.0296, + 1.8981, 2.0537, 1.9623, 1.9398, 1.9628, 1.9870, 2.0913, 2.0724, 1.9533, + 2.1124, 1.9853, 2.6650, 1.9589, 2.0429, 1.8930, 1.9923, 2.0287, 2.0133, + 2.0124, 2.0338, 2.0284, 2.0974, 2.0387, 2.1974, 1.9633, 2.1018, 1.8175, + 1.9394, 2.0572, 2.0080, 2.0954, 1.9050, 2.1441, 1.9543, 1.9805, 2.0541, + 1.9790, 1.9280, 1.9894, 2.1866, 1.9792, 1.8995, 1.9158, 1.9590, 2.1155, + 2.5199, 2.0179, 2.0004, 1.9195, 2.0193, 1.9890, 1.9006, 1.9797, 2.0440, + 2.0788, 2.0407, 2.0304, 2.1029, 2.0203, 1.9760, 1.9394, 2.0054, 1.9847, + 2.0356, 2.1139, 1.5719, 1.9705, 1.9917, 2.0091, 2.0040, 1.9577, 1.9719, + 2.0472, 1.7621, 2.0073, 1.9903, 2.1363, 1.9998, 2.0051, 1.9888, 1.9687, + 1.9345, 2.1511, 2.1393, 2.0262, 1.9638, 2.0399, 2.0069, 1.9389, 1.9667, + 2.0143, 2.0119, 1.9335, 1.9657, 2.1081, 2.0954, 1.9555, 0.4295, 1.9307, + 2.0535, 2.0774, 2.0535, 2.1234, 1.9335, 2.0603, 2.0513, 1.9627, 1.9375, + 0.6353, 2.0395, 2.2153, 2.1222, 2.0615, 2.0297, 2.0519, 2.0836, 2.0057, + 2.0021, 2.0199, 1.9734, 1.9511, 2.1163, 2.0859, 2.0537, 2.0991, 2.0374, + 1.9356, 2.0284, 2.0152, 2.0300, 2.1570, 1.9596, 1.8716, 2.0989, 1.9362, + 2.0595, 1.9272, 1.9588, 2.0182, 1.9944, 2.0247, 2.0196, 1.9151, 1.9654, + 2.0398, 1.9895, 2.1065, 2.0104, 1.9463, 1.9615, 1.9753, 2.0335, 1.9698, + 1.9619, 1.9918, 1.9939, 1.9441, 2.3460, 1.9763, 1.9905, 2.1296, 1.9593, + 2.0462, 1.9803, 1.8739, 1.9475, 1.9598, 2.0517, 1.9284, 1.9119, 2.0556, + 1.9551, 1.8316, 1.9645, 1.9500, 2.0806, 2.0543, 1.9594, 1.9842, 1.9619, + 1.9587, 1.8544, 2.0395, 1.9717, 2.2640, 1.9691, 2.0282, 1.9032, 1.9770, + 1.9332, 1.9511, 1.8922, 1.9482, 1.9790, 2.0446, 2.0257, 2.0371, 1.9969, + 2.0323, 1.9707, 2.1331, 2.0598, 1.9312, 2.0447, 2.0934, 1.8701, 2.0536, + 1.9339, 1.9815, 2.0502, 1.8652, 1.9644, 1.9713, 2.0664, 2.0611, 1.8879, + 1.9392, 1.9859, 2.0829, 2.0115, 1.9858, 1.9979, 1.8491, 1.9454, 1.9416, + 1.9871, 2.0103, 1.9462, 2.1577, 2.0422, 1.8648, 1.9684, 1.9651, 1.9931, + 2.4453, 1.7836, 1.9797, 2.0477, 2.0374, 1.8115, 1.9577, 1.9438, 1.9010, + 1.9335, 2.0134, 2.0297, 2.0084, 1.9193, 1.9962, 2.1257, 1.9753, 1.9826, + 2.0457, 1.9393, 2.1329, 1.9958, 2.0344, 2.0395, 2.0875, 2.0308, 1.9300, + 2.0481, 1.9178, 2.3537, 2.0540, 1.9842, 1.8567, 1.9846, 2.1611, 2.0759, + 2.1378, 1.9157, 1.8941, 2.0725, 2.0011, 2.0108, 2.0496, 2.0581, 2.0146, + 2.0212, 1.9948, 1.9491, 1.9728, 2.1604, 2.1109, 1.9235, 2.0226, 2.0845, + 1.9894, 1.9106, 1.9809, 1.7731, 2.0163, 1.9498, 2.0033, 1.9945, 2.1344, + 2.0049, 2.0276, 1.9931, 2.1364, 1.9570, 1.9764, 1.9729, 2.0930, 1.9910, + 1.9975, 2.0646, 2.0498, 1.9523, 2.1627, 2.0006, 1.8691, 2.0954, 1.9561, + 1.9464, 2.0225, 2.0438, 2.0626, 1.8734, 2.0497, 1.9712, 2.0253, 2.2255, + 1.9269, 1.5977, 1.9628, 1.9559, 2.0419, 2.0557, 2.1421, 1.9419, 1.9496, + 2.1094, 2.1491, 1.9859, 2.0079, 1.8498, 2.0724, 1.9499, 2.0346, 2.0262, + 1.9633, 2.0354, 1.9472, 1.9602, 2.0195, 2.0032, 2.0036, 1.9063, 2.0086, + 2.0016, 2.0250, 2.2570, 1.9673, 1.9707, 1.8847, 1.9078, 2.0152, 2.0952, + 2.0862, 1.9537, 2.0682, 1.9151, 2.0110, 2.2095, 1.9940, 2.1294, 2.0299, + 1.9963, 2.1305, 1.9429, 2.0693, 1.9541, 1.9645, 1.9386, 2.0793, 2.0342, + 1.9821, 2.0205, 1.8914, 1.9734, 2.0128, 1.9509, 2.0054, 2.0226, 2.0904, + 2.0363, 1.9614, 2.1894, 1.8249, 2.1139, 2.0987, 1.7479, 2.0491, 2.2605, + 2.0351, 2.0034, 1.8794, 2.0154, 2.0932, 2.0017, 1.8794, 2.0726, 1.8610, + 1.9683, 1.9647, 1.9156, 1.8042, 1.9973, 2.0662, 2.0574, 2.0067, 2.0059, + 1.9640, 2.1439, 2.0814, 2.0323, 2.0262, 1.9617, 1.9233, 2.0654, 1.9877, + 2.0460, 2.0286, 2.0341, 2.0739, 1.9845, 2.0807, 2.0348, 1.9746, 1.9962, + 2.0325, 1.9365, 1.9254, 1.9935, 2.0833, 1.9138, 2.0450, 2.1419, 2.0006, + 1.9513, 2.1702, 1.9652, 1.9930, 2.1020, 2.1332, 1.9315, 2.1528, 1.9875, + 2.0101, 1.9131, 2.1410, 1.9704, 1.9231, 1.9808, 1.9090, 2.0045, 2.0392, + 1.9050, 2.1369, 1.9591, 2.0033, 2.1322, 2.0857, 1.9056, 2.0861, 2.0624, + 1.9077, 2.1703, 1.9176, 1.8142, 1.9912, 1.8790, 1.9754, 1.9995, 1.9791, + 2.0240, 1.8256, 2.0508, 1.9844, 2.1159, 1.9374, 1.9971, 1.9893, 2.0586, + 2.0790, 1.9968, 2.0608, 2.0390, 2.0590, 2.0581, 1.9986, 2.0023, 1.9103, + 2.0122, 2.0208, 2.0332, 2.0699, 1.9136, 2.0321, 2.3401, 1.9774, 1.9934, + 2.0168, 2.0446, 1.8847, 1.9775, 2.1485, 2.0622, 1.9779, 2.0434, 2.1604, + 1.9566, 1.9872, 2.2087, 2.0353, 1.8128, 1.9966, 1.9910, 2.0200, 1.9513, + 2.0511, 1.9924, 2.0438, 2.0259, 2.0082, 2.0246, 2.0088, 1.9980, 1.9457, + 1.9530, 1.9194, 2.0609, 1.9816, 1.9832, 2.0252, 2.0323, 1.9594, 2.0305, + 2.0356, 2.2375, 1.9345, 2.0042, 2.1218, 2.1397, 1.9727, 2.1261, 1.9762, + 2.0960, 1.9011, 1.9572, 2.0197, 2.0102, 1.8579, 2.0108, 1.9747, 2.0413, + 2.1950, 2.0268, 1.9291, 2.0363, 1.9824, 1.9842, 2.0567, 1.9963, 1.9368, + 2.0931, 1.9622, 1.9801, 2.1398, 1.8515, 2.0383, 2.1540, 2.0991, 2.0238, + 2.0954, 1.8529, 2.0306, 1.9698, 2.0138, 2.0344, 2.1097, 2.0003, 2.0347, + 1.9435, 2.0138, 2.0070, 2.0207, 1.9699, 1.9808, 3.9111, 1.9489, 1.9240, + 1.9906, 1.9973, 2.1468, 2.0961, 2.0055, 2.0753, 2.1031, 1.9375, 2.0882, + 2.0503, 2.0181, 1.8466, 2.0694, 2.1705, 1.8652, 2.0717, 1.9540, 1.9346, + 1.9093, 1.9651, 1.9621, 2.0628, 2.0070, 1.9060, 1.9553, 1.8944, 2.0086, + 1.9623, 2.0737, 1.9730, 1.9533, 2.1365, 2.0299, 1.8659, 1.9766, 2.1460, + 1.9863, 2.1535, 1.9489, 2.1114, 1.9806, 1.9694, 1.8617, 1.8932, 1.9100, + 1.9833, 2.0381, 1.9284, 1.9857, 2.0655, 1.9793, 1.9920, 2.0654, 1.9590, + 1.9620, 2.0547, 2.1570, 1.9327, 1.9948, 1.9445, 1.9828, 2.0383, 2.0391, + 1.9883, 1.9772, 2.0274, 1.9669, 2.0183, 2.0465, 2.0411, 1.9694, 2.0264, + 2.2126, 2.3055, 1.8294, 1.9801, 2.0337, 2.0238, 2.0515, 2.3617, 2.0011, + 1.9341, 1.9198, 2.0020], device='cuda:1', requires_grad=True)Parameter containing: +tensor([ 2.1501e-01, 7.4704e-01, -4.3778e-01, -1.9189e-01, -1.9360e-01, + 4.1040e-01, 4.4803e-01, -1.0623e-01, -1.4964e-01, -3.4063e-01, + -2.0855e-01, 3.2643e-01, -8.2681e-01, -6.0881e-01, -5.0489e-01, + -3.7201e-01, -2.5095e-01, -4.9668e-01, -5.6372e-01, -1.3344e+00, + -1.8790e-01, 1.3055e-01, 5.6959e-01, -5.4303e-01, 7.8711e-01, + 1.1111e-01, 1.4635e-01, -1.4257e-01, -6.8712e-01, 3.8435e-01, + 2.1803e-01, 4.9633e-01, 1.4904e-01, 1.6454e-02, 3.2819e-01, + -1.7535e-01, -2.5352e-01, -1.2546e-01, 1.0994e-01, 9.3778e-02, + -6.8792e-01, -1.2614e-01, 2.4639e-01, 1.6165e-01, -3.7067e-01, + -2.1706e-01, -1.0287e-01, -1.8020e-01, 2.9180e-01, 2.3912e-02, + 3.4678e-02, -7.0900e-01, 2.1722e-01, 5.3221e-01, -2.7258e-01, + 1.5429e+00, 2.2216e-01, 1.3756e-01, -4.0713e-01, 4.5913e-01, + -8.6891e-02, 5.5913e-01, 1.7959e-01, -4.4340e-01, 2.4813e-01, + 3.0107e-01, 6.3924e-01, -6.1232e-02, 1.8421e-02, -1.9119e+00, + 7.7772e-01, 2.1262e-01, 3.9979e-01, 3.5545e-01, -6.8527e-01, + -4.8011e-01, 6.0286e-01, 3.7308e-01, -1.8340e-01, -2.6823e-01, + -1.1094e-01, -5.5771e-01, -4.4588e-01, 5.1400e-01, 1.3997e-01, + -4.0948e-01, 6.4643e-01, 6.4235e-02, -4.0403e-01, -5.4217e-01, + 2.7304e-01, 5.2554e-01, -9.3850e-02, -3.3140e-02, -2.6708e-01, + 5.6201e-01, 5.5971e-01, 4.5284e-01, -3.5819e-01, 1.7672e-01, + 1.0297e-01, -2.2664e+00, 7.2437e-02, -2.4392e-01, 2.2132e-01, + 6.1708e-01, 7.2010e-02, -1.7920e-01, 4.3395e-01, -3.4397e-01, + 6.1327e-01, 5.4794e-01, -5.3735e-01, 5.9204e-01, 5.3248e-01, + -9.2280e-01, 5.4920e-01, 2.2522e-01, 5.7106e-01, 3.3792e-01, + 4.6288e-01, -8.5729e-02, 5.4681e-01, 1.2722e-01, -4.7660e-02, + -5.9782e-01, -4.5898e-01, 2.3266e-01, -1.9421e-02, 8.9277e-01, + -6.2295e-01, -5.0095e-01, -2.8544e-01, -7.9341e-02, 5.1180e-01, + 1.2812e+00, 2.5656e-01, 3.7108e-01, 1.0246e-01, 1.0855e-01, + -4.4166e-01, -3.6916e-02, 1.6275e-01, 3.0883e-01, -7.6207e-01, + -3.2294e-01, 4.3284e-01, -3.2373e-01, -3.9448e-01, 3.8939e-01, + 4.7772e-01, 1.6648e-01, 5.8841e-02, 2.4477e-02, -8.8626e-01, + 4.9069e-01, 2.7631e-01, 1.3535e-01, -4.4725e-02, 1.2324e-01, + -2.0032e-01, -8.9303e-02, 4.2760e-01, 2.0125e-01, -4.4736e-01, + 2.2468e-01, -1.0505e+00, 2.4208e-01, -7.1624e-01, 1.1178e-01, + -1.1839e-01, -9.6857e-02, -7.3601e-01, 2.8437e-01, 5.4252e-01, + 6.5352e-02, 7.0957e-02, 1.3129e-01, 7.4641e-02, 4.4224e-01, + -4.6053e-01, -2.3242e-01, -3.5033e-01, 6.2616e-02, 5.2954e-01, + 1.0180e+00, 1.4960e-01, -1.3682e+00, -1.4817e-01, -6.3487e-01, + -4.4761e-01, -5.2614e-01, 6.5118e-01, 5.0969e-01, 3.5756e-01, + 4.0856e-01, 1.4308e-01, -1.2530e-01, 9.7227e-01, 3.4538e-01, + -7.7295e-01, 2.7611e-01, 2.6164e-01, 3.8689e-01, -2.0480e-01, + 5.2936e-01, 4.2684e-01, 4.6405e-01, -1.6827e-01, -9.7458e-02, + 5.0772e-01, -5.6984e-01, -3.9931e-01, 2.7341e-01, -7.1261e-01, + -5.7750e-01, 5.6267e-02, -6.3205e-02, -3.5059e-01, -2.6100e-02, + 1.0959e+00, -1.2912e-01, 2.6108e-01, 6.5407e-01, 1.9582e-01, + 3.5861e-01, -3.8938e-01, -5.9331e-03, -1.6254e-02, -1.5344e-01, + -2.4446e-01, 4.5050e-01, 2.4633e-01, 1.3749e-01, -2.3003e-01, + -2.6949e-01, 5.4330e-01, 1.7525e-01, 2.4688e-02, 3.6464e-01, + 1.1125e-01, -1.7916e-01, -3.5670e-01, 1.4557e-01, -3.3076e-01, + -3.5905e-01, 1.9406e-01, -1.2120e+00, -2.5908e-01, 2.6289e-01, + -4.2772e-01, -7.2619e-02, -2.7620e-01, -5.6160e-01, -2.3287e-02, + -2.4912e-02, -5.6602e-02, 3.3388e-01, 1.9100e-01, 5.3561e-02, + -4.4234e-01, -2.6088e-01, 7.4349e-02, -2.6779e-01, -4.8274e-01, + 5.6909e-01, 5.0762e-01, 1.4416e-01, -3.3702e-01, 8.1444e-02, + -9.1124e-04, -5.0530e-02, -5.5433e-02, -1.2218e+00, -7.1735e-01, + -2.9989e-01, 2.1273e-01, 3.5223e-01, -1.0045e-01, -1.4589e-01, + 6.6810e-01, 6.6347e-03, -2.8485e-01, -3.9527e-02, -6.1962e-01, + 5.7725e-01, 6.0066e-01, -1.6726e-01, 3.5993e-01, -4.4027e-01, + -6.4637e-01, 3.6673e-01, 3.9305e-01, -6.9421e-01, 3.8688e-01, + -3.5038e-01, -3.3405e-01, -2.0529e-01, 2.3280e-01, -1.2050e-02, + 5.0844e-02, 3.1180e-01, -1.4231e-01, -4.1143e-01, -6.3243e-02, + 2.3677e-01, 1.7883e-01, 2.9399e-01, -4.0658e-01, -1.1772e-02, + 6.7914e-02, -8.2298e-02, -3.7087e-01, -1.3104e-01, -2.7620e-01, + 1.0077e-01, 2.6525e-01, 2.4830e-01, 5.9466e-01, 2.8895e-01, + -4.5527e-01, 7.1790e-02, 2.2942e-01, -2.6116e-01, 3.9986e+00, + -9.8789e-01, -1.2329e-01, -6.7903e-01, 3.8148e-02, -2.7855e-01, + 3.8224e-01, -5.2978e-01, 1.4232e-01, -2.4229e-01, 1.7414e-01, + 3.4799e-01, -4.5234e-01, -1.0896e-02, 1.0077e+00, 3.6756e-01, + -3.3123e-01, 2.9891e-01, -5.0165e-01, 5.9525e-01, -5.5131e-01, + 2.8175e-01, -3.0794e-02, -3.3171e-01, 4.8648e-01, 2.5715e-01, + -9.2639e-02, 1.4169e-01, -2.7374e-01, 1.1780e+00, -5.9521e-01, + -1.4061e-01, -3.1326e-01, 5.3595e-01, -3.8327e-01, -6.1277e-01, + -4.3154e-01, 1.4341e-01, -4.2311e-03, -5.5929e-01, 3.7290e-01, + 1.3575e-01, -5.8564e-01, 5.0063e-01, 3.2996e-01, 4.3748e-01, + 1.2429e-01, -1.8594e-01, -2.0776e-01, -8.4629e-01, -7.2942e-01, + -4.6600e-01, 2.0105e-01, 8.7055e-01, -1.0882e-02, -6.5407e-02, + -2.6198e-01, 1.3003e-01, -3.6015e-01, -2.0504e-01, -2.0976e-01, + -3.5524e-01, -7.5813e-01, 4.4625e-01, 3.2435e-01, -4.2963e-01, + 1.4965e-01, 3.3192e-01, 6.0794e-03, 5.7775e-01, -5.5045e-01, + -7.7025e-02, -1.2794e-01, 4.5363e-01, -2.2398e-01, 1.5045e-01, + 9.8628e-01, -4.1569e-04, 4.0888e-02, 2.2238e-01, 4.5657e-01, + -1.7871e-01, 1.6835e-02, 5.9045e-01, -3.5778e-01, 2.6057e-01, + 1.5641e-01, -2.0078e-02, 2.4277e-01, 5.2386e-01, 1.1039e+00, + -1.1327e+00, -1.1502e-01, -7.1413e-02, 3.7271e-01, 1.6588e-01, + -5.0401e-01, -2.7928e-01, 2.3201e-01, -4.3780e-01, -7.2960e-01, + -9.1982e-02, 3.7177e-01, 2.3800e-01, 1.7524e-01, -1.7606e-02, + -6.8586e-01, -1.7550e-01, 1.6649e-01, 9.6858e-01, 6.1731e-01, + -6.7386e-02, 1.5299e-01, 3.5824e-01, -2.3525e-02, -1.2447e-01, + -2.2176e-01, 3.9829e-01, 2.9128e-01, -2.1872e+00, -3.9490e-02, + 2.0953e-01, 2.1769e-02, 3.2698e-01, 4.3849e-01, -5.8048e-01, + -6.7511e-01, 3.8794e-01, -7.0737e-01, 7.1450e-02, -4.0589e-01, + 9.3709e-01, -6.9863e-03, -8.9709e-01, 2.8178e-01, -1.5175e-01, + -7.1227e-01, 1.7312e-02, -5.7876e-01, 7.2203e-02, -3.7204e-01, + 5.4562e-01, 4.1058e-01, 1.8340e-01, 1.3698e-01, 2.2252e-01, + 1.3383e-03, -2.1874e-01, -3.6462e-01, 4.9029e-02, -3.7906e-02, + -6.1232e-01, 2.8946e-01, -4.1292e-01, -2.4855e-02, 5.5976e-01, + 1.6030e-01, 1.2595e-01, 6.2577e-01, -4.8489e-01, 9.7715e-01, + -1.3700e+00, 3.9636e-01, 8.1092e-01, -1.7226e-01, -4.9420e-01, + 1.0185e+00, 4.1705e-01, -9.1852e-02, -3.6709e-01, -4.4797e-01, + -2.9262e-01, -7.5866e-01, 2.7837e-01, -1.8833e-01, 4.7308e-01, + -1.2365e-01, 8.6737e-02, -3.4702e-01, 4.5654e-01, -1.7387e-01, + -4.3082e-01, 3.6552e-01, 1.1090e-01, -4.9035e-01, -5.4686e-02, + -4.0094e-01, -7.3754e-01, -5.6454e-01, -2.7902e-01, 6.3375e-01, + 7.6849e-02, 7.6877e-01, 3.7509e-01, 5.1973e-01, -2.5887e-01, + 7.4737e-01, 2.5756e-01, 5.4741e-01, 3.5443e-01, 3.5458e-01, + -3.8656e-01, 9.1460e-01, -8.0961e-02, 7.5145e-01, 3.3231e-01, + -6.3823e-02, -3.9819e-01, -2.4886e-01, -1.5665e-01, -3.0294e-01, + -1.0763e+00, 5.0080e-01, 1.1450e-01, 3.9202e-01, -1.3330e-01, + -1.5818e-01, 1.7867e-01, -3.6898e-01, 6.2401e-01, 1.3509e-01, + 2.8328e-01, 1.8425e-01, -6.9642e-01, -2.0965e-01, -5.9807e-02, + -6.0111e-01, 5.6218e-02, -5.8171e-02, 2.7017e-01, 2.5399e-01, + -1.6934e-01, -2.4471e-01, -5.6285e-01, -2.3438e-02, 8.7225e-01, + 5.8195e-01, 1.6643e-01, 3.0911e-01, -1.2398e-01, 1.0637e-01, + 3.0400e-01, 3.8059e-01, 3.0698e-01, 5.2002e-01, 3.1318e-01, + -1.1297e-01, -6.4791e-01, -1.2858e-01, -3.3301e-02, 2.8349e-01, + 4.7307e-01, -6.4352e-01, -3.7952e-01, -4.6233e-01, 5.4687e-01, + 3.8553e-01, 1.8468e-01, 5.6490e-01, -5.7524e-01, 8.1843e-02, + 5.2448e-02, 2.1670e-01, -7.9810e-01, 7.6422e-01, 3.6075e-01, + -4.6884e-01, -3.0259e-01, -1.0189e-01, -1.9926e-01, -5.9249e-01, + -4.1673e-01, -1.7293e-01, 4.8049e-01, 5.6171e-01, 5.4314e-01, + -4.5473e-01, 3.3866e-02, -3.8822e-01, -2.2157e-01, 3.9036e-01, + -1.0263e+00, 5.0080e-01, 2.9338e-03, 1.6282e-01, -6.7238e-02, + -1.5358e-01, 7.7335e-01, -6.0466e-01, -7.1113e-03, 4.5572e-01, + 2.6197e-01, -7.7756e-02, -1.7559e-01, 1.2412e-01, 4.5016e-01, + 4.4452e-01, 4.5932e-01, -1.5143e-01, 1.7155e-01, -1.5590e-01, + -6.7263e-01, -7.4638e-02, 6.8537e-01, 3.1193e-01, -1.8483e-01, + 2.6559e-01, -5.7226e-01, -4.2480e-02, 4.6850e-01, -4.0256e-01, + -7.3821e-01, 3.3337e-01, 7.9803e-01, 5.2502e-01, -2.7037e-01, + 4.2035e-01, -4.6351e-01, -5.0869e-01, -3.4929e-01, -3.2073e-01, + 2.7673e-01, -3.4714e-01, 5.5583e-01, 5.3885e-01, 5.8890e-01, + 2.3125e-01, -6.4483e-02, 2.6723e-01, -3.3570e-01, -7.4513e-02, + 7.5238e-01, 2.7637e-01, 8.6769e-01, -1.4806e-01, 2.2444e-01, + 8.2832e-01, 2.1911e-01, 4.5670e-02, -5.8405e-01, 7.1401e-01, + -1.7179e-02, 2.1653e-01, -3.9713e-01, 5.5353e-01, 5.1918e-01, + -3.4896e-01, 3.0754e-01, 4.5784e-01, 9.4302e-01, 6.3974e-01, + -2.9996e-01, 3.2503e-01, 1.7531e-01, 1.9067e-01, 2.9472e-01, + 1.0802e-01, 2.8037e+00, -2.4171e-01, 1.0029e-01, -6.1159e-02, + 8.6921e-01, 4.2050e-01, -8.0290e-01, -2.8834e-01, -5.9433e-01, + -4.8745e-01, -2.3467e-01, -2.1206e-01, -6.2243e-01, -5.9644e-01, + 9.0945e-02, 6.1859e-01, 5.3241e-01, -2.0134e-01, -3.0877e-01, + 1.2689e-01, 4.3569e-01, 1.7475e-01, -2.3127e-01, 2.0306e-01, + 6.0639e-01, -3.0360e-01, 1.2595e-01, -5.7503e-01, 4.5264e-01, + -3.8956e-01, -4.0904e-01, -1.0199e-01, -1.8171e-01, 2.0400e-02, + -5.6246e-01, 4.5533e-01, 8.9785e-02, -8.7525e-01, -5.9002e-01, + -7.0129e-01, -2.6266e-01, -2.6017e-01, -7.0782e-01, 4.3546e-01, + -5.1003e-01, 1.1640e-01, -1.5618e-01, 2.6692e-01, 1.1821e-01, + -4.6812e-01, -5.0343e-01, 2.3759e-01, 8.6180e-01, 1.1069e-01, + -1.8956e-01, 2.4460e-01, 5.6212e-01, -1.0961e-01, -6.8296e-01, + -7.2474e-01, -2.1875e-01, 1.6066e-01, -4.0349e-01, -3.5831e-01, + -6.3780e-01, -5.2782e-01, -2.7818e-01, 3.5333e-01, 8.6573e-01, + 1.4529e-01, -5.4409e-01, -8.4162e-01, -1.4071e-01, -3.4625e-02, + 6.1403e-02, -4.3922e-01, -1.0181e+00, 5.3744e-01, 6.9962e-01, + -2.0664e-01, -3.6145e-01, -3.0133e-01, 1.7422e+00, 2.1885e-01, + 1.6592e-01, 3.7474e-01, 4.0835e-01], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[-1.8555e-02, 1.2207e-02, -1.6556e-02, ..., 9.8953e-03, + 1.6815e-02, -1.8707e-02], + [ 3.0487e-02, 3.2715e-02, -3.0022e-03, ..., 3.5187e-02, + 3.5980e-02, -5.8136e-03], + [ 2.0390e-03, -2.0386e-02, 1.7670e-02, ..., 2.3132e-02, + 4.0550e-03, 1.1375e-02], + ..., + [-5.3101e-03, 2.4445e-02, -1.9531e-02, ..., -1.0094e-02, + -1.0544e-02, 2.3727e-03], + [-1.3418e-03, 4.7874e-03, 1.2207e-02, ..., 7.7553e-03, + -6.1214e-05, -1.3153e-02], + [ 9.2850e-03, 7.7629e-03, -1.5533e-02, ..., 1.3306e-02, + 5.0316e-03, 2.2507e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([ 0.1748, -0.0695, -0.2499, ..., -0.0291, 0.0082, 0.0654], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0072, 0.0056, 0.0144, ..., 0.0010, 0.0068, -0.0195], + [-0.0204, 0.0330, -0.0089, ..., -0.0183, 0.0075, 0.0104], + [-0.0120, -0.0031, 0.0017, ..., -0.0134, 0.0086, -0.0073], + ..., + [-0.0075, -0.0018, -0.0179, ..., -0.0070, -0.0049, -0.0116], + [-0.0321, -0.0037, 0.0038, ..., 0.0033, -0.0005, 0.0031], + [ 0.0155, 0.0208, -0.0011, ..., 0.0005, -0.0043, -0.0342]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-4.6196e-03, -1.0712e-02, -4.2458e-03, -1.1658e-02, -7.2266e-02, + 1.1406e-02, 6.5796e-02, -3.1586e-02, 2.2736e-02, 5.1697e-02, + -8.3847e-03, -7.9575e-03, 5.0659e-02, 4.4937e-03, -1.5732e-02, + 1.4328e-02, 1.7075e-02, 1.4191e-03, 2.4689e-02, -5.8716e-02, + -2.6588e-03, -3.6438e-02, 3.8147e-02, -5.0079e-02, 1.9745e-02, + -2.9221e-02, -3.1464e-02, 5.1300e-02, 1.1711e-02, -1.5007e-02, + 1.4740e-02, 5.8861e-03, 1.6403e-02, -1.9547e-02, -1.9958e-02, + 4.1595e-02, 4.2992e-03, -4.0375e-02, 1.0658e-02, 5.4626e-03, + -3.0426e-02, 4.9438e-02, 2.3499e-02, -7.7400e-03, -5.2277e-02, + -4.4212e-03, -6.8092e-04, -2.4185e-02, -1.4219e-03, 1.4244e-02, + 5.7617e-02, 4.7150e-02, -1.1452e-02, 9.1476e-03, 1.4732e-02, + 5.9967e-02, -5.5115e-02, 1.6342e-02, 3.3722e-02, -3.9520e-03, + -3.9001e-02, 1.2306e-02, 4.2480e-02, -3.2883e-03, 2.6760e-03, + -1.4351e-02, -2.8259e-02, 6.0692e-03, -1.1177e-02, -6.8604e-02, + 1.3428e-02, -6.4819e-02, -1.2286e-01, 7.9274e-06, -3.5583e-02, + 1.1482e-02, -4.8126e-02, 1.4811e-03, -1.1566e-02, -6.2012e-02, + 3.0136e-04, 4.3732e-02, -1.6174e-02, -2.2690e-02, 1.5366e-02, + -1.0666e-02, -1.3145e-02, 4.5563e-02, 6.5918e-03, 3.3140e-05, + 2.3926e-02, 7.9834e-02, 7.0129e-02, -2.4094e-02, 2.3895e-02, + -1.3113e-03, 1.4229e-02, -3.1082e-02, -2.2018e-02, 7.6790e-03, + 4.1992e-02, 1.0077e-01, 1.5175e-02, -8.6288e-03, 4.8447e-03, + 3.2257e-02, 8.7524e-02, 1.0815e-03, -3.4912e-02, 2.1362e-02, + -3.5095e-02, 3.9185e-02, 5.9166e-03, -1.3496e-02, -1.1681e-02, + -1.9058e-02, 2.8458e-03, -1.9516e-02, 4.3884e-02, -4.3030e-02, + -2.0966e-02, -1.8967e-02, -4.4952e-02, -3.3417e-02, 6.1951e-03, + -1.6769e-02, -4.3716e-03, -7.8552e-02, 2.9556e-02, 6.4201e-03, + -1.1986e-02, -2.5497e-02, 6.0844e-03, -1.0658e-02, -9.0179e-03, + -7.1335e-03, -4.7073e-03, 1.8127e-02, 1.9989e-02, -5.7144e-03, + -2.5070e-02, -4.1565e-02, -3.6255e-02, -5.9631e-02, 6.5193e-03, + -6.2866e-02, 4.8828e-03, -3.1647e-02, 3.4790e-02, 2.1637e-02, + 6.2866e-03, -6.7749e-02, 1.6113e-02, 4.7119e-02, 9.9487e-03, + -7.0312e-02, 5.3711e-02, -1.9897e-02, -7.0763e-03, -2.9709e-02, + 2.7542e-02, 1.3306e-02, 4.8889e-02, 2.1591e-02, 1.9302e-02, + 3.9978e-02, -8.5693e-02, 1.8982e-02, -2.0050e-02, -3.4576e-02, + 3.7933e-02, 3.1464e-02, -3.8574e-02, -3.8055e-02, 8.1635e-03, + -2.9007e-02, 5.3482e-03, -2.7390e-02, -5.9998e-02, -5.9906e-02, + -1.9699e-02, -1.5625e-02, -1.7685e-02, -1.1206e-03, 2.2335e-03, + 3.8391e-02, 9.6130e-03, 2.2751e-02, -4.9316e-02, -3.1586e-02, + 2.0065e-02, -1.5106e-02, -1.6907e-02, -1.1854e-03, -3.2482e-03, + -9.5010e-05, 2.9404e-02, 2.9221e-02, 4.6229e-04, -2.8015e-02, + 4.4373e-02, 2.6062e-02, 1.8219e-02, -2.9648e-02, 1.0002e-02, + 2.0706e-02, 7.3669e-02, 1.1726e-02, 3.6804e-02, 4.1138e-02, + -2.9392e-03, 3.0106e-02, 3.0594e-03, 2.5574e-02, 5.3772e-02, + 1.3222e-02, -3.6072e-02, -1.5572e-02, -2.4384e-02, 1.3962e-02, + 1.6357e-02, -1.3443e-02, -3.6407e-02, -6.2218e-03, 1.8494e-02, + -4.0588e-02, -4.0131e-02, -6.3721e-02, 3.6278e-03, 2.1362e-02, + 9.0179e-03, -3.0411e-02, 1.4854e-02, 4.0283e-02, -5.5359e-02, + 2.6428e-02, 4.8409e-03, -3.4363e-02, 5.3772e-02, -3.8116e-02, + 2.8229e-04, 5.4199e-02, 7.9956e-03, -1.5869e-03, 2.0340e-02, + 1.7532e-02, 8.2016e-03, -2.4738e-03, -1.4038e-02, 1.6041e-03, + 2.3544e-02, 2.3026e-02, -2.4399e-02, -1.1978e-02, -3.0777e-02, + -3.2196e-02, -1.2001e-02, -5.4230e-02, -3.0792e-02, 7.1602e-03, + -5.3444e-03, -1.8097e-02, 9.2840e-04, 1.1177e-02, -1.2733e-02, + 2.0081e-02, 4.1008e-03, 2.3697e-02, 1.7761e-02, -2.0035e-02, + 7.1297e-03, -2.9938e-02, -1.2978e-02, -1.1086e-02, -3.1143e-02, + -8.3847e-03, -6.1264e-03, -1.1139e-02, 2.1729e-02, 4.6173e-02, + -5.5878e-02, -2.1790e-02, -1.8921e-02, -4.9957e-02, 3.6530e-02, + 5.3215e-03, -6.5735e-02, 1.1475e-02, 4.1107e-02, 9.8190e-03, + -7.7248e-03, 2.1011e-02, 1.1238e-02, 5.5275e-03, 3.5217e-02, + 3.1921e-02, 3.1471e-04, 1.8646e-02, 4.5090e-03, 4.9591e-02, + -3.6896e-02, -1.0056e-02, 1.2040e-04, -2.0615e-02, 8.1921e-04, + -5.4779e-03, 1.6571e-02, -2.8305e-02, -1.2932e-02, -1.3977e-02, + 4.3762e-02, -2.3575e-02, 2.0401e-02, 3.3188e-03, -2.0508e-02, + 6.5247e-02, -6.1111e-03, -4.0588e-03, 1.2978e-02, -1.4982e-03, + -1.1116e-02, 1.9665e-03, 1.6739e-02, -2.3193e-02, -4.3304e-02, + 7.2144e-02, 6.3049e-02, -2.1835e-02, 2.0233e-02, 5.1971e-02, + 1.8845e-02, 1.3985e-02, -1.6785e-02, 8.1253e-03, -5.7892e-02, + 4.6783e-02, 2.7817e-02, -1.4709e-02, -3.4466e-03, -3.8940e-02, + -2.7637e-03, -1.5007e-02, -3.5725e-03, 3.4668e-02, -4.6692e-02, + -2.7222e-02, -1.6083e-02, -2.4200e-02, -4.6631e-02, -4.3945e-02, + 9.1019e-03, 3.4607e-02, 1.2703e-02, 7.4219e-02, -5.6076e-03, + -1.3451e-02, -2.6188e-03, -3.5919e-02, 4.5776e-03, -1.3481e-02, + 4.0344e-02, 3.7117e-03, -4.0894e-02, 1.1891e-04, -1.5007e-02, + 5.7800e-02, -4.8248e-02, 5.7587e-02, -3.7460e-03, 1.0727e-02, + 1.9699e-02, -5.8502e-02, 2.1652e-02, -4.3488e-03, 1.2045e-03, + -5.0201e-02, -7.4280e-02, -2.1118e-02, -3.7789e-04, 1.8112e-02, + 1.3870e-02, 1.9989e-02, 6.8703e-03, -2.6817e-03, 2.9099e-02, + -3.0422e-03, 4.6844e-02, -5.0415e-02, -8.2169e-03, -2.6760e-03, + -9.0103e-03, -3.5534e-03, 3.1677e-02, 1.9333e-02, 1.1230e-02, + 1.1559e-02, 7.1106e-03, -6.8176e-02, 3.5614e-02, -2.6535e-02, + 1.2856e-02, 6.5041e-03, -3.2410e-02, 6.0150e-02, -2.4094e-02, + -1.0971e-02, -1.1093e-02, 4.0924e-02, -3.3447e-02, -7.6447e-03, + 1.2741e-02, 2.2171e-02, -2.0844e-02, 5.2490e-02, -8.1558e-03, + -3.0365e-03, 4.1992e-02, 5.0323e-02, -4.7943e-02, -1.9531e-02, + 8.1024e-03, -4.1046e-02, -1.6327e-02, 8.3237e-03, -1.3824e-02, + -2.3102e-02, 3.6297e-03, -6.4201e-03, 3.0975e-03, 8.0490e-03, + 6.9771e-03, -1.5625e-02, 3.1605e-03, 1.0498e-01, 1.4465e-02, + -2.4002e-02, -1.4000e-02, -3.5858e-02, -4.4022e-03, -3.5400e-02, + 2.5058e-04, 1.4572e-02, -1.9503e-04, -1.6553e-01, -5.6076e-03, + -3.9185e-02, -2.0935e-02, -2.3209e-02, -3.5065e-02, -1.6373e-02, + -3.5019e-03, 1.6205e-02, -1.2405e-02, 7.7148e-02, -1.0551e-02, + 1.8539e-02, 2.3849e-02, -8.8272e-03, 5.6419e-03, 1.2901e-02, + 1.2619e-02, 1.4397e-02, -1.6449e-02, -4.7493e-03, 8.8654e-03, + -2.2186e-02, 2.1393e-02, 1.0635e-02, 7.3486e-02, 8.9111e-03, + -3.8116e-02, -2.6825e-02, -1.0338e-02, 1.1879e-02, 1.2970e-02, + -2.2797e-02, -1.1337e-02, -2.0004e-02, 5.1788e-02, 4.2419e-02, + -4.0512e-03, -7.1640e-03, -3.3142e-02, -4.9408e-02, -2.0264e-02, + 3.2227e-02, -3.5919e-02, -1.1429e-02, -4.4250e-03, 7.1754e-03, + 1.4107e-02, 3.7415e-02, 1.3893e-02, 4.4632e-03, -2.7084e-02, + -1.7120e-02, -1.3641e-02, 1.9226e-02, -2.9694e-02, 1.8661e-02, + -6.7635e-03, -1.4336e-02, -3.3569e-02, -3.7872e-02, 4.1687e-02, + 3.1555e-02, -1.9958e-02, 1.0811e-02, -8.8501e-03, -4.3915e-02, + -4.4670e-03, 1.8295e-02, 3.5095e-03, 8.9050e-02, -1.4282e-02, + -8.9722e-03, -4.6417e-02, -3.7018e-02, 3.1738e-02, -1.7609e-02, + -8.5602e-03, 1.7380e-02, -1.8646e-02, -1.8311e-02, 9.8343e-03, + 4.1107e-02, -1.1806e-03, -9.9335e-03, -4.8218e-02, 1.5572e-02, + 2.7008e-02, 2.3209e-02, 1.8616e-02, 1.9852e-02, 3.3264e-02, + 9.2773e-03, -6.4331e-02, 1.1345e-02, 1.7595e-03, 7.8812e-03, + -2.2522e-02, 3.4760e-02, -9.6664e-03, 8.0414e-03, -1.6663e-02, + 4.7791e-02, 2.0355e-02, 1.7181e-02, -3.5583e-02, 1.1414e-02, + -5.7709e-02, 8.1909e-02, -2.3651e-02, 2.2297e-03, -2.9358e-02, + -1.8143e-02, 3.0792e-02, 6.0028e-02, 3.3051e-02, -2.4551e-02, + 2.2156e-02, 2.2324e-02, 4.0039e-02, 9.3918e-03, 7.6721e-02, + 3.5187e-02, 7.9803e-03, 3.7270e-03, 1.7441e-02, -4.0779e-03, + -3.5248e-02, -5.6122e-02, 1.3580e-02, 8.0688e-02, 1.2466e-02, + 3.6255e-02, 3.0151e-02, 2.1271e-02, -1.0519e-03, -4.3182e-02, + -2.2385e-02, 3.5339e-02, -3.5309e-02, -4.9042e-02, 3.5583e-02, + 5.3314e-02, -6.4754e-04, 4.6600e-02, 4.5807e-02, -2.1439e-02, + 6.3896e-03, 2.0065e-03, 1.4320e-02, -4.1847e-03, 2.8961e-02, + 8.5907e-03, -9.6283e-03, -5.2277e-02, -3.6377e-02, -2.6459e-02, + 2.4681e-03, 2.7679e-02, 4.6356e-02, -2.1744e-02, -3.1036e-02, + 5.9242e-03, -2.1622e-02, -8.7204e-03, 3.6736e-03, 2.0370e-03, + -3.5614e-02, 3.6652e-02, 5.8556e-03, 1.6708e-03, 3.4088e-02, + -8.9455e-04, 5.4245e-03, -3.2959e-02, 1.3819e-03, -9.0637e-02, + -2.3117e-02, -3.3188e-03, 3.7811e-02, 2.0157e-02, -1.2825e-02, + -3.1586e-02, -1.6785e-02, -1.5823e-02, -3.2349e-02, 2.0962e-03, + 6.4507e-03, -8.2321e-03, 8.1177e-03, 5.0781e-02, 5.9814e-02, + 4.4586e-02, 1.5503e-02, 1.2657e-02, -7.0534e-03, -2.7283e-02, + -1.1047e-02, -2.2400e-02, -2.6230e-02, -4.2114e-02, -1.2314e-02, + 1.1765e-02, -5.0476e-02, 2.1629e-03, -9.9182e-03, 2.2842e-02, + 2.0645e-02, 5.9143e-02, 4.4556e-02, 3.4302e-02, -1.0101e-02, + 4.0558e-02, 1.6373e-02, 6.6223e-03, -6.2790e-03, 1.3374e-02, + 2.3575e-02, 3.0731e-02, 1.7929e-02, -7.4730e-03, 1.1337e-02, + 2.5925e-02, 2.8839e-02, 1.0468e-02, -1.0818e-02, -8.6975e-03, + 1.9302e-02, -1.2520e-02, 3.6812e-03, -9.1476e-03, -4.1595e-02, + -1.5030e-03, -1.1528e-02, 1.8463e-02, 2.7023e-02, -2.2446e-02, + 3.1799e-02, 2.0935e-01, 8.7585e-03, -3.9459e-02, -1.2238e-02, + -2.6062e-02, -2.3911e-02, -7.2432e-04, 2.0691e-02, -5.9540e-02, + -9.0714e-03, -4.3762e-02, 6.6589e-02, -4.0817e-03, 2.6199e-02, + -8.0872e-03, 1.1955e-02, 3.5248e-02, -7.0435e-02, -1.8265e-02, + -5.5786e-02, 3.5797e-02, -1.3924e-03, -3.1494e-02, 1.8585e-02, + 2.5314e-02, 1.3420e-02, 8.4915e-03, -3.8300e-02, 1.6418e-02, + -4.0579e-04, 2.9266e-02, 5.0354e-02, -3.2898e-02, -4.4739e-02, + 4.3068e-03, -5.1208e-02, 2.0233e-02, 4.6356e-02, -8.3069e-02, + -3.3203e-02, 3.1647e-02, -6.6872e-03, 1.0475e-02, -3.0251e-03, + 1.3290e-02, 2.4323e-02, -7.9422e-03, 8.2397e-03, 3.5645e-02, + -2.4994e-02, -3.4698e-02, -3.8971e-02, 1.4175e-02, 7.0610e-03, + 3.1555e-02, -5.7144e-03, 2.8351e-02, 4.3396e-02, -2.6337e-02, + 5.5145e-02, 2.4689e-02, 1.3634e-02, -3.8452e-02, -6.9857e-04, + 1.5114e-02, -2.6245e-02, 1.5961e-02, 4.2694e-02, -2.4857e-02, + -3.3752e-02, 2.1805e-02, 1.4648e-02, -2.0737e-02, 5.3406e-02, + 1.7761e-02, -3.2959e-02, 1.1215e-02, 6.9046e-03, -1.5572e-02, + 6.2180e-03, -1.9760e-02, -7.9803e-03, 3.3684e-03, 5.7983e-02, + 6.1951e-03, 1.7899e-02, 1.8234e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.2872, 1.3902, 1.3280, 1.3746, 1.3808, 1.3310, 1.2905, 1.3744, 1.3278, + 1.2770, 1.3421, 1.3515, 1.2648, 1.4362, 1.3084, 1.3631, 1.3041, 1.3989, + 1.2531, 0.3754, 1.3145, 1.3037, 1.3856, 1.3438, 1.3401, 1.2750, 1.2746, + 1.3092, 1.5337, 1.3190, 1.2867, 1.3100, 1.3887, 1.3135, 1.4317, 1.4281, + 1.2585, 1.3812, 1.4312, 1.3558, 1.3686, 1.2785, 1.3244, 1.3191, 1.3727, + 1.3274, 1.3551, 1.3741, 1.2872, 1.3436, 1.4240, 1.3185, 1.3188, 1.2834, + 1.3673, 1.4366, 1.2857, 1.4752, 1.3431, 1.2975, 1.3674, 1.3349, 1.2434, + 1.3561, 1.3934, 1.3163, 1.2964, 1.3656, 1.3091, 1.7060, 1.2112, 1.3438, + 2.4524, 1.3774, 1.3413, 1.2583, 1.2963, 1.3005, 1.3575, 1.4152, 1.2581, + 1.4116, 1.3328, 1.3162, 1.3791, 1.3734, 1.3793, 1.2855, 1.3213, 1.3572, + 1.3486, 1.3470, 1.3550, 1.3349, 1.4237, 1.3012, 1.3963, 1.2965, 1.2334, + 1.3769, 1.3145, 1.1864, 1.2911, 1.2162, 1.2641, 1.3571, 1.3028, 1.3323, + 1.2926, 1.3058, 1.3455, 1.3841, 1.2867, 1.2912, 1.2599, 1.3620, 1.3224, + 1.2191, 1.2997, 1.3315, 1.3464, 1.4175, 1.3383, 1.4195, 1.3479, 1.3867, + 1.3359, 1.3288, 1.4146, 1.3789, 1.3210, 1.3271, 1.2555, 1.2443, 1.3080, + 1.3088, 1.4416, 1.3641, 1.3288, 1.3230, 1.3284, 1.3592, 1.3514, 1.2883, + 1.2990, 1.3497, 1.3354, 1.3534, 1.2966, 1.3139, 1.3233, 1.3309, 1.3223, + 1.3252, 1.3250, 1.1075, 1.3933, 1.3489, 1.3420, 1.2268, 1.4483, 1.2636, + 1.3052, 1.4442, 1.3708, 1.3202, 1.2663, 1.3475, 1.2657, 1.3688, 1.3033, + 1.3369, 1.3094, 1.4049, 1.4070, 1.3336, 1.3666, 1.4053, 1.3935, 1.2258, + 1.3650, 1.4078, 1.3205, 1.2375, 1.3886, 1.3380, 1.3228, 1.8635, 1.3402, + 1.4189, 1.3341, 1.2720, 1.3730, 1.4405, 1.3683, 1.3268, 1.3239, 1.4251, + 3.8598, 1.3067, 1.2033, 1.3676, 1.4208, 1.4274, 1.3379, 1.2837, 1.3651, + 1.3117, 1.3239, 1.4408, 1.3410, 1.2964, 1.2914, 1.2973, 1.3181, 1.3553, + 1.4354, 1.2757, 1.2851, 1.3733, 1.2667, 1.2805, 1.3078, 1.2954, 1.3843, + 1.2732, 1.4498, 1.4749, 1.3324, 1.4228, 1.2985, 1.3570, 1.4567, 1.4490, + 1.3023, 1.3295, 1.4018, 1.2917, 1.2907, 1.2989, 1.3706, 1.3021, 1.4164, + 1.2990, 1.3597, 1.3136, 1.2466, 1.2832, 1.3223, 1.3688, 1.3477, 1.3461, + 1.2518, 1.2632, 1.3714, 1.3059, 1.3299, 1.3516, 1.2935, 1.2013, 1.3700, + 1.3071, 1.2896, 1.3473, 1.2983, 1.3207, 1.3504, 1.3353, 1.2847, 1.3485, + 1.3610, 1.3010, 1.3774, 1.3308, 1.2954, 1.2410, 1.3406, 1.3067, 1.3201, + 1.3046, 1.3123, 1.5712, 1.3965, 1.3870, 1.3659, 1.3075, 1.3338, 1.3484, + 1.3848, 1.3058, 1.3570, 1.3049, 1.3742, 1.2756, 1.3889, 1.2953, 1.3125, + 1.3678, 1.4067, 1.4245, 1.5097, 1.2939, 1.4008, 1.3100, 1.2898, 1.3111, + 1.2741, 1.2701, 1.2899, 1.2631, 1.3375, 1.3803, 1.3075, 1.2911, 1.4013, + 1.3300, 1.3336, 1.2717, 1.3298, 1.4376, 1.2984, 1.3539, 1.3718, 1.3205, + 0.9002, 1.1273, 1.3590, 1.3239, 1.3377, 1.4179, 1.3039, 1.2933, 1.2394, + 1.3289, 1.3309, 1.2902, 1.2587, 1.3533, 1.2726, 1.3677, 1.3686, 1.2982, + 1.3758, 1.2909, 1.3539, 1.3848, 1.3587, 1.2797, 1.3354, 1.3630, 1.4622, + 1.2987, 1.2724, 1.4872, 1.2995, 1.4829, 1.3287, 1.3487, 1.4115, 1.3740, + 1.2543, 1.2981, 1.3168, 1.3157, 1.4478, 1.3249, 1.3100, 1.2767, 1.3550, + 1.3242, 1.3166, 1.3973, 1.2298, 1.3345, 1.2938, 1.2998, 1.4123, 1.3963, + 1.2704, 1.3450, 1.3796, 1.5035, 1.3848, 1.3198, 1.2972, 1.3515, 1.2375, + 1.4217, 1.2988, 1.3277, 1.4822, 1.2519, 1.3244, 1.2602, 1.3856, 1.3670, + 1.2955, 1.3744, 1.3669, 1.3714, 1.3356, 1.3683, 1.2582, 1.3579, 1.3028, + 1.3691, 1.3240, 1.3866, 1.3637, 1.2497, 1.3819, 1.3386, 1.3314, 1.2769, + 1.3428, 1.9735, 1.3798, 1.3468, 1.3407, 1.3285, 1.3599, 1.4341, 1.2609, + 1.3951, 1.2966, 1.2580, 1.3596, 1.3417, 1.4204, 1.3244, 1.4308, 1.3175, + 1.3006, 1.2515, 1.2395, 1.2493, 1.3272, 1.3005, 1.3845, 1.2743, 1.3190, + 1.3370, 1.3416, 1.1472, 1.2675, 1.3340, 1.3503, 1.3742, 1.3713, 1.3525, + 1.3118, 1.3306, 1.4060, 1.2554, 1.3448, 1.3657, 1.3274, 1.3503, 1.3757, + 1.3110, 1.3053, 1.3492, 1.3277, 1.3186, 1.3125, 1.4098, 1.3111, 1.3407, + 1.4183, 1.3285, 1.3850, 1.3056, 1.3465, 1.3493, 1.3270, 1.2958, 1.3944, + 1.3085, 1.3012, 1.3702, 1.2813, 1.2915, 1.3575, 1.1952, 1.3088, 1.3576, + 1.3346, 1.3577, 1.2858, 1.3787, 1.2965, 1.3709, 1.3449, 1.4087, 1.3293, + 1.3631, 1.3237, 1.2341, 1.3424, 1.3324, 1.2798, 1.3931, 1.3897, 1.2935, + 1.2785, 1.3657, 1.2583, 1.4487, 1.2702, 1.3525, 1.2965, 1.3752, 1.4298, + 1.3788, 1.3413, 1.3038, 1.4992, 1.3144, 1.3067, 1.2532, 1.3159, 1.2683, + 1.3746, 1.2443, 1.3016, 1.2842, 1.2961, 1.2805, 1.3825, 1.2896, 1.3415, + 1.2923, 1.3563, 1.2550, 1.3741, 1.4044, 1.4208, 1.3354, 1.2533, 1.3894, + 1.3589, 1.2489, 1.3956, 1.2572, 1.3543, 1.3533, 1.3309, 1.3372, 1.3268, + 1.3941, 1.2826, 1.3982, 1.4003, 1.2890, 1.4476, 1.2573, 1.3132, 1.3348, + 1.3615, 1.3087, 1.2843, 1.3003, 1.3694, 1.3762, 1.2734, 1.3903, 1.4113, + 1.3063, 1.2960, 1.3405, 1.3407, 1.3494, 1.3724, 1.3081, 1.2372, 1.3109, + 1.3138, 1.3427, 1.3206, 1.3542, 1.3643, 1.3706, 1.3337, 1.3657, 1.3878, + 1.3569, 1.3834, 1.3598, 1.2711, 1.4078, 1.3747, 1.5675, 1.2749, 1.2390, + 1.2538, 1.2943, 1.2955, 1.2661, 1.2821, 1.3547, 1.3100, 1.3714, 1.3571, + 1.3821, 1.2715, 1.2951, 1.3517, 1.5251, 1.3422, 1.3360, 1.3600, 1.2926, + 1.3325, 1.3143, 1.3297, 1.3354, 1.3274, 1.3914, 1.4117, 1.4397, 1.2755, + 1.4176, 1.3882, 1.3252, 1.2717, 1.3697, 1.3725, 1.2981, 1.3732, 1.3615, + 1.3712, 1.3910, 1.2379, 1.3230, 1.3764, 1.3150, 1.2980, 1.2872, 1.2196, + 1.2480, 1.3109, 1.4561, 1.4493, 1.2962, 1.2464, 1.3213, 1.3355, 1.3883, + 1.5240, 1.2383, 1.3756, 1.3613, 1.3726, 1.3346, 1.4441, 1.3882, 1.3590, + 1.3018, 1.2842, 1.2935, 1.3592, 1.2867, 1.2710, 1.3668, 1.3812, 1.3530, + 1.4434, 1.2979, 1.3567, 1.3358, 1.3525, 1.3261, 1.3334, 1.2530, 1.3479, + 1.3575, 1.3310, 1.4809, 1.2589, 1.2816, 1.3649, 0.4186, 1.4145, 1.2535, + 1.3430, 1.3093, 1.3702, 1.3583, 1.3723, 1.3647, 1.3383, 1.2710, 1.4596, + 1.3527, 1.3731, 1.2643, 1.3753, 1.3268, 1.2178, 1.3426, 1.3283, 1.3710, + 1.3677, 1.2947, 1.2627, 1.2397, 1.3351, 1.4022, 1.4032, 1.3174, 1.3353, + 1.2170, 1.2888, 1.3188, 1.3385, 1.3833, 1.2715, 1.3615, 1.3580, 1.3459, + 1.3332, 1.3058, 1.3122, 1.3450, 1.3347, 1.2283, 1.2703, 1.3616, 1.4082, + 1.3126, 1.2626, 1.2945, 1.4637, 1.3127, 1.2583, 1.3960, 1.3580, 1.2940, + 1.3000, 1.3408, 1.3034, 1.2603, 1.3945, 1.2683, 1.2457, 1.4037, 1.3055, + 1.3822, 1.3478, 1.5217, 1.3705, 1.2843, 1.2831, 1.3864, 1.3480, 1.3994, + 1.3649, 1.2519, 1.3327, 1.3546, 1.4664, 1.2941, 1.3896, 1.3242, 1.3849, + 1.4253, 1.2813, 1.3836], device='cuda:1', requires_grad=True)Parameter containing: +tensor([ 8.7339e-02, 6.1186e-02, -1.1393e-01, -2.3187e-02, 6.6224e-02, + 1.0484e-01, 6.1928e-02, 6.6969e-02, -8.2951e-02, 2.9519e-02, + 5.2148e-02, -2.8967e-02, -1.1092e-01, -1.5207e-02, -2.4184e-02, + -5.6119e-02, -1.0727e-02, 3.7562e-04, -2.8220e-02, 4.4238e+00, + -6.0840e-02, 2.7884e-02, 3.6516e-02, -1.5820e-02, -5.4270e-02, + -6.5341e-03, -2.6812e-02, -6.5928e-03, 2.9693e-02, -9.4959e-02, + 1.1967e-02, -8.0892e-03, 5.1503e-03, 3.4587e-02, 8.9452e-02, + -6.5282e-02, 2.3060e-02, -5.3270e-02, -1.2999e-01, 3.5249e-02, + 2.4790e-02, -5.1246e-04, 1.9823e-02, -1.1152e-02, -3.1151e-02, + 2.7093e-03, 3.5210e-02, -3.5405e-02, 1.7274e-02, 2.7853e-02, + 4.4344e-02, 1.2647e-02, -2.1005e-02, 3.0567e-02, 1.0525e-01, + 2.3741e-02, -8.9789e-02, -3.4534e-02, 4.5046e-02, 1.6473e-02, + 6.1683e-02, -6.3831e-02, 4.1221e-02, 1.8472e-02, 7.8758e-02, + -2.1876e-02, 6.5841e-02, 1.0714e-01, -3.7961e-02, 2.8308e-01, + 2.0154e-02, 3.1277e-02, 4.4453e-01, 7.3221e-02, -1.2682e-01, + 8.9078e-02, 1.6569e-02, -1.2624e-01, -8.9929e-02, 6.2439e-02, + 5.7802e-03, -5.0506e-03, -3.1454e-02, 1.1621e-01, 9.1203e-03, + 4.3324e-02, 1.0606e-01, 1.0382e-01, -8.6284e-03, 2.2634e-02, + 3.4288e-02, 2.8976e-02, 4.6591e-02, -4.4971e-02, -4.7039e-02, + 2.8868e-02, -4.0962e-02, -4.7151e-02, -5.6912e-02, -5.0799e-02, + 2.0091e-02, 6.3718e-02, 5.2493e-03, -4.9528e-02, 5.4862e-02, + 5.7750e-03, -3.8043e-02, -4.7645e-02, 2.5038e-02, -6.7444e-02, + -4.9929e-02, -8.9348e-03, -2.2372e-04, 1.7883e-02, 6.9704e-02, + -3.3632e-02, 7.6718e-03, -6.9778e-02, 1.3104e-02, 8.7781e-02, + -6.2585e-02, -1.3901e-01, -5.5338e-02, -7.6090e-03, -1.1955e-01, + 1.5045e-02, -8.3363e-02, -3.8883e-03, 9.6386e-03, -3.3875e-02, + -8.4132e-02, 3.9896e-02, -3.4697e-02, -3.9018e-03, -6.4938e-02, + -1.0833e-02, -2.7428e-02, 4.2242e-03, -2.9702e-02, -4.0883e-02, + -1.6351e-03, -3.6239e-03, 4.0830e-02, 3.1907e-02, 4.9882e-02, + -2.4862e-02, -3.1561e-02, -4.6421e-02, 1.7076e-02, 8.8081e-02, + 1.8782e-01, -5.4576e-02, -9.4501e-02, 5.3017e-03, -3.8913e-02, + -1.3871e-01, -7.6044e-04, -9.9700e-02, 4.5043e-02, -4.9313e-03, + 1.4418e-01, -1.1649e-01, 4.5746e-03, 2.1470e-02, 7.0424e-03, + -1.6105e-02, -4.2264e-02, 3.2586e-02, -5.0255e-02, 3.5865e-02, + -4.6770e-02, 7.1799e-02, 2.7991e-02, 6.2034e-02, 6.3537e-02, + -7.6811e-02, -3.2816e-02, 5.1421e-02, -3.5653e-02, -2.8211e-03, + -1.0634e-02, -5.3496e-02, 4.2309e-02, -6.4404e-03, -1.2252e-02, + -1.5379e-01, 4.8189e-02, 7.4657e-02, -3.1758e-02, -3.2122e-02, + -1.1376e-02, -3.7663e-02, 3.5552e-02, 4.2712e-02, -7.2979e-02, + -6.1913e-03, -4.6449e-02, -1.0435e-01, -1.1276e-01, 2.3868e-02, + -1.0795e-01, -1.5822e-02, -4.6000e-02, 2.2899e-02, 4.1231e-02, + 9.4707e-02, -7.7341e-02, 5.8019e-02, 9.9886e-02, 8.9785e-03, + 9.3099e-03, 1.4751e-01, -6.3596e-03, -4.8528e-02, -6.2385e-02, + -8.9581e-04, -2.4941e-03, 3.8993e-02, -7.8228e-02, -1.4844e-01, + -9.6491e-03, -3.7275e-02, 6.1649e-02, -4.6889e-02, 1.6001e-02, + -5.7545e-02, 5.0802e-02, -3.0250e-02, 7.6051e-02, -5.7192e-02, + 1.7285e-03, -2.3828e-03, -1.6417e-02, 9.5145e-02, -2.6237e-02, + 9.2725e-03, -7.9587e-02, 4.6451e-02, 5.6558e-02, 9.2277e-03, + 6.6372e-02, 8.8247e-02, 4.5625e-02, 2.6280e-02, 2.6710e-02, + 3.2452e-02, 9.7308e-02, 6.8142e-03, 1.4230e-02, -3.9746e-02, + 5.8876e-02, 7.5144e-03, 5.6110e-02, -6.9678e-02, 8.3911e-02, + -1.1423e-01, -3.4860e-02, -8.7782e-02, 2.4758e-02, 5.3315e-03, + -4.8554e-02, -6.1523e-02, 7.4342e-03, -3.1822e-02, -4.5801e-02, + 4.5603e-02, -2.8560e-02, 1.5172e-02, -6.5890e-02, -2.5550e-02, + -1.1881e-02, 1.0670e-02, 6.7133e-02, -3.5807e-02, 3.2999e-02, + 3.9318e-02, -1.1167e-01, 1.9786e-03, 3.6381e-02, -6.4092e-02, + 1.5747e-01, 1.4422e-01, 8.4585e-04, 1.2694e-02, 1.9729e-02, + 3.4082e-02, 1.0148e-01, -3.5446e-02, 1.4398e-01, -1.1190e-01, + -2.0517e-02, -4.3020e-02, 9.5896e-02, 4.6603e-02, 5.5142e-02, + 9.3104e-03, 9.9086e-02, 2.6252e-02, -1.0458e-02, 4.9047e-02, + 4.6309e-02, -1.9033e-02, 7.5156e-03, 7.0244e-02, 2.1686e-02, + -9.7619e-03, 6.5970e-02, -8.2329e-02, 9.5006e-02, -4.9631e-02, + 3.9037e-02, 2.9221e-03, -5.2377e-02, -3.3832e-02, -8.8777e-03, + -6.0936e-02, -8.4812e-03, 4.0256e-02, 1.2562e-01, -5.0265e-02, + -5.2797e-02, 1.9654e-01, -4.9036e-02, -2.3278e-02, 9.1080e-02, + -1.1720e-01, 1.3250e-02, 5.1713e-02, 4.0155e-02, -9.8394e-02, + 1.4361e-01, -9.7763e-03, 1.2028e-01, -3.9207e-02, 2.3117e-02, + 8.6042e-02, -8.9835e-03, -5.6574e-02, -1.4509e-02, 3.6862e-03, + -1.0871e-01, 1.5602e-02, -3.5644e-02, -3.3046e-02, 7.6870e-02, + 2.7187e-02, 8.6666e-02, 6.3289e-02, 2.9537e-02, 2.4505e-02, + -3.2169e-02, 3.7398e-02, 9.9083e-03, 1.2819e-02, -4.2381e-02, + -3.4945e-02, -4.1356e-02, 9.5625e-03, -1.0108e-02, -5.6753e-02, + 3.8378e-02, -1.0612e-03, 2.3003e-02, -8.1679e-02, -1.6081e-02, + -2.5599e-02, 3.0458e-02, 9.6022e-02, 2.4029e-02, 2.1840e-02, + 4.9550e-02, -4.1764e-02, -5.5127e-02, -6.5640e-03, -2.8241e-02, + 3.8417e-03, 7.3980e-02, 3.6628e-02, -1.5850e-02, -1.6176e-02, + -9.0316e-02, 1.1332e-01, 2.6261e-02, 6.8534e-03, -1.0242e-02, + -1.7797e-02, -1.3862e-02, -7.3380e-02, -4.9812e-02, -6.4270e-02, + 2.7561e-02, -4.8994e-02, -2.0447e-02, -1.3643e-02, 6.1894e-02, + 6.2277e-03, 4.1400e-02, 2.3730e-02, 1.8632e-02, 7.0347e-02, + 1.1018e-01, -6.8689e-02, 5.4930e-02, 2.8267e-02, -8.7133e-02, + -1.2398e-01, -6.2607e-02, 3.4140e-03, 5.9502e-03, -2.9806e-02, + -5.7428e-02, 2.2081e-02, 8.3703e-02, -6.1587e-02, -6.7964e-02, + -9.1609e-01, 1.5850e-02, 2.8917e-02, -3.4535e-02, -1.1373e-02, + 3.3038e-02, -1.5638e-02, -1.9283e-02, -4.7749e-02, -4.2693e-02, + 4.2583e-03, -1.3398e-01, -8.5306e-02, 1.2055e-02, -3.7024e-02, + -5.7687e-02, -4.9867e-02, -2.1062e-02, 6.0778e-02, 6.8634e-02, + -2.2010e-02, 3.8530e-03, 3.4686e-02, 8.6580e-02, -6.1052e-02, + -1.2155e-02, -8.3256e-02, -4.8036e-02, 8.6155e-02, 7.9283e-02, + -1.1840e-01, 5.3046e-05, -8.5889e-02, 1.0105e-01, 1.0565e-02, + -3.0992e-03, 4.3394e-02, -9.1592e-02, 3.0969e-02, 3.9235e-02, + -3.1675e-02, 2.2725e-04, 4.3952e-02, -1.6739e-02, -9.4257e-02, + 2.3025e-02, 3.2887e-02, -6.3583e-02, 1.7878e-02, 1.0523e-02, + -4.1162e-02, 1.2117e-02, -5.3632e-02, 4.9440e-02, 2.1264e-02, + -3.3455e-04, -9.0725e-02, -3.2049e-02, -8.2630e-03, -4.3023e-02, + 4.5941e-03, 1.0333e-01, 6.9964e-02, 2.6214e-03, 7.1494e-02, + 7.0434e-02, -1.2217e-01, 2.8510e-02, 1.3535e-01, -6.5110e-02, + -5.2724e-03, -2.8958e-02, 8.3868e-02, 6.3884e-02, 3.0647e-02, + 3.0163e-02, -6.8748e-03, 3.6247e-02, -2.0434e-02, -1.1683e-01, + 2.4959e-02, -6.3642e-02, 4.0930e-02, -4.0984e-02, 2.4238e-02, + 9.8886e-03, -3.7878e-02, 4.9394e-02, -2.7229e-02, -3.0365e-02, + 3.3135e-02, 5.1902e-02, 6.7655e-02, 3.0071e-02, 1.9174e-02, + -7.3900e-02, 3.5665e-02, -5.8163e-02, 7.0575e-02, -6.0976e-02, + 5.5363e-03, 2.2391e-02, 1.0395e-01, 7.7417e-02, -8.4300e-02, + -6.5664e-02, -2.2830e-02, 6.6328e-02, -2.2972e-02, -4.1334e-02, + -1.4479e-02, -7.1433e-03, -2.0187e-02, -1.0832e-02, -8.9342e-02, + 2.1364e-02, 3.7668e-02, -2.2352e-02, -5.8539e-02, -2.8948e-03, + -3.9876e-03, -4.2716e-02, 2.3490e-02, -6.3561e-02, -4.4949e-02, + -7.5358e-03, 1.4734e-02, -1.6480e-02, 4.3481e-02, 1.9900e-02, + -2.1993e-02, 3.7201e-02, 1.4117e-02, 8.6074e-02, 1.4078e-02, + 6.8319e-02, 1.7246e-02, -5.1239e-02, 1.0810e-02, -1.1514e-02, + -8.7157e-03, 2.3565e-02, -9.8027e-03, 2.3457e-02, 7.3346e-02, + -5.4916e-02, 8.1404e-02, 2.9883e-02, 2.6833e-02, 1.1656e-01, + 6.4959e-02, 3.9964e-03, 7.9622e-03, 2.5413e-02, 3.7862e-02, + 5.7556e-02, -1.7377e-03, -7.8044e-02, 3.9611e-03, 5.0649e-02, + 2.1132e-02, 3.8345e-02, 4.0324e-02, 3.7830e-02, -2.6392e-02, + -7.8393e-02, -1.0003e-02, 5.9539e-02, 1.0001e-01, 6.2470e-02, + 4.5374e-03, 4.8765e-02, -5.7036e-02, 2.1272e-02, 1.7698e-02, + 8.3991e-03, 1.1972e-01, 5.4382e-02, -2.7853e-02, -2.0012e-03, + -1.9343e-02, 6.0604e-02, 8.5150e-03, -3.5723e-02, 4.2000e-02, + -4.4597e-02, -1.0483e-01, 2.3521e-02, -1.0434e-02, 2.4817e-03, + -2.6558e-02, 6.5472e-02, -1.9289e-01, 3.2543e-02, -5.5323e-02, + 4.4021e-02, 3.7345e-02, -6.6249e-03, -3.5713e-02, -2.2755e-02, + -1.0394e-01, 6.1514e-02, 8.5493e-02, 3.4580e-02, -4.7714e-02, + 1.2638e-02, 3.7360e-03, 1.3561e-02, 2.2447e-02, 1.5395e-02, + -3.0915e-02, 8.6202e-03, -1.8053e-02, 4.4125e-02, -1.6347e-02, + 8.7838e-02, 2.2420e-02, 8.3271e-02, 1.3798e-01, 2.5618e-02, + 3.2791e-02, -4.0671e-02, -4.8187e-03, 5.1011e-02, -4.7374e-02, + 1.3102e-02, -4.2113e-02, 8.1951e-02, -4.8852e-02, 3.5464e-02, + -3.0721e-03, 1.8094e-02, 7.1526e-02, 3.9374e-02, 9.8026e-02, + 8.5014e-02, -5.5595e-03, -4.8160e-02, 6.0500e-02, -3.8437e-02, + 2.5993e-02, -7.7234e-02, -4.8771e-02, 2.8361e-02, 6.6123e-02, + -7.5233e-02, 6.7676e-03, -8.9858e-02, 5.8787e-03, 8.0563e-02, + 2.5858e-02, -8.7248e-02, -8.6673e-02, 2.9948e-02, 2.5570e-02, + -1.9238e-01, 5.0784e-03, 1.4281e-02, 7.2192e-04, -3.1842e-02, + 3.3397e-02, -5.5607e-02, -6.8789e-02, -1.0273e-03, 6.6684e-02, + -4.6887e-02, -5.2200e+00, -1.7528e-02, 4.8796e-02, -1.3987e-02, + -9.0133e-02, -2.4560e-02, -3.0890e-02, -1.3240e-02, 3.3220e-02, + -2.1894e-02, -5.0840e-02, 3.4118e-02, -1.8583e-02, -4.1701e-02, + 2.2649e-02, -2.8920e-02, -9.6992e-02, 2.9297e-02, -4.1082e-02, + 8.9242e-02, 6.1757e-02, -3.6107e-02, -1.3546e-02, -1.2672e-01, + -2.2187e-02, 4.5807e-02, 4.1666e-02, 4.1213e-03, 3.8655e-02, + -1.4283e-02, 6.7196e-02, 7.3769e-02, 1.5050e-02, 2.3629e-02, + 2.7161e-02, 7.9100e-03, 1.6354e-02, -8.4922e-02, 2.0299e-02, + 8.6790e-03, 3.9312e-03, 2.8671e-02, -7.6458e-02, 3.5176e-02, + 9.5686e-03, -1.8447e-01, 9.5421e-02, 1.8604e-02, 9.2838e-02, + 1.0337e-01, 4.1986e-02, 4.9914e-02, 4.2715e-02, 3.8483e-03, + -1.6496e-02, -2.5456e-02, -1.3371e-02, -1.5350e-02, -3.9309e-02, + 8.4016e-02, -9.3841e-02, -9.3237e-02, 6.4717e-02, -1.0674e-02, + -7.5973e-03, -1.0846e-01, -3.1029e-03, 5.0573e-02, -5.4467e-02, + -4.5049e-02, -1.6193e-02, -4.6469e-02, -5.5992e-02, 4.7240e-02, + -3.7008e-03, 1.1970e-01, 1.0794e-01, 4.3454e-02, -7.2692e-02, + 1.9468e-02, 4.4535e-02, 5.7215e-02, -6.2108e-02, -7.0441e-02, + -7.9148e-02, 8.5440e-02, 1.7515e-04], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[ 0.0181, -0.0053, 0.0376, ..., 0.0159, 0.0007, -0.0079], + [-0.0007, 0.0249, 0.0235, ..., -0.0029, -0.0186, 0.0282], + [-0.0062, 0.0307, 0.0169, ..., -0.0004, -0.0213, -0.0294], + ..., + [-0.0041, -0.0177, 0.0085, ..., 0.0139, -0.0345, 0.0094], + [-0.0312, -0.0024, 0.0006, ..., -0.0078, 0.0215, -0.0030], + [ 0.0234, -0.0085, -0.0076, ..., 0.0165, -0.0413, 0.0310]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.3335, -0.4612, -0.1525, ..., -0.2974, -0.4580, -0.3103], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0056, 0.0015, -0.0063, ..., -0.0112, -0.0193, -0.0046], + [ 0.0023, 0.0101, 0.0025, ..., 0.0093, 0.0138, 0.0203], + [-0.0024, 0.0070, -0.0107, ..., 0.0100, -0.0153, 0.0128], + ..., + [-0.0022, -0.0275, 0.0059, ..., 0.0156, 0.0155, 0.0103], + [ 0.0172, 0.0090, 0.0030, ..., -0.0214, -0.0010, -0.0127], + [-0.0244, 0.0601, 0.0131, ..., 0.0105, -0.0049, 0.0170]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-1.4061e-02, 5.6396e-02, -6.3660e-02, -1.5656e-02, -7.0129e-02, + 2.0599e-02, 4.9377e-02, -3.9101e-03, -9.5215e-02, -2.7008e-02, + -6.2294e-03, -5.4504e-02, -3.6987e-02, 1.1467e-02, -6.0364e-02, + 1.1948e-02, -2.9373e-02, -8.7402e-02, 8.5144e-03, 5.3802e-02, + -3.0930e-02, 1.3420e-02, 2.3880e-02, -3.1372e-02, 5.2948e-02, + 5.3467e-02, -4.9561e-02, 1.4877e-02, -1.1041e-01, -2.2690e-02, + 1.9684e-02, 5.5275e-03, 1.0254e-02, 7.5806e-02, -9.7046e-03, + -1.0480e-01, 8.2474e-03, -3.4485e-02, 3.0014e-02, 3.0609e-02, + -3.0533e-02, 8.3801e-02, 2.3941e-02, 5.3024e-03, -6.8481e-02, + 1.1032e-02, 2.2781e-02, 1.8511e-03, -8.6548e-02, -1.5198e-02, + 6.7505e-02, 1.4053e-02, 3.4424e-02, 6.8176e-02, 7.7271e-02, + -2.4109e-02, -8.1421e-02, 2.7557e-02, -3.1052e-02, 6.6662e-04, + -4.0070e-02, 3.4027e-02, 1.1841e-02, -6.9618e-03, 3.4424e-02, + -3.0487e-02, 7.7844e-05, -7.1220e-03, -1.5717e-02, 4.3060e-02, + -4.1138e-02, -8.2642e-02, 1.1102e-01, 1.3599e-01, -4.3335e-02, + 3.8269e-02, -1.6346e-03, 4.3030e-02, -2.4582e-02, -5.9174e-02, + -2.5162e-02, -3.9001e-02, -4.8790e-03, 1.1086e-02, 1.3870e-02, + -6.6833e-02, 3.4424e-02, 4.6539e-02, 6.1760e-03, -3.5339e-02, + 1.6159e-02, 3.7079e-02, 4.8370e-02, -2.2202e-02, -1.4000e-02, + 1.0925e-01, 1.7685e-02, -5.4413e-02, -7.2205e-02, -1.6632e-02, + 2.9182e-03, -1.9666e-01, 5.5756e-02, -5.1514e-02, 4.4647e-02, + 6.9946e-02, 2.4979e-02, -1.4046e-02, -3.4027e-02, 2.9282e-02, + -1.1826e-02, 2.2263e-02, 9.3933e-02, 3.9429e-02, -3.7781e-02, + -5.6458e-02, -3.6621e-02, -5.4779e-02, -3.7169e-04, -1.4061e-02, + -3.3966e-02, -2.3285e-02, -4.4281e-02, -1.0368e-02, -3.7384e-02, + -3.3112e-02, -7.8613e-02, -3.0914e-02, 5.9021e-02, -4.4769e-02, + -7.5195e-02, -4.1779e-02, 4.3030e-03, -6.2866e-02, 2.7435e-02, + 1.2720e-01, 2.9449e-03, 6.2408e-02, 5.4245e-03, 3.5034e-02, + 3.0960e-02, -3.8177e-02, 4.6463e-03, 8.0109e-03, -1.8127e-02, + -8.8684e-02, -1.0429e-02, -3.1433e-02, 6.5674e-02, 6.5735e-02, + 8.7036e-02, -8.7524e-02, 2.6718e-02, 8.1909e-02, -9.2102e-02, + -1.7593e-02, -5.4932e-02, 1.5839e-02, 1.3710e-02, -3.4409e-03, + 1.6577e-01, -2.3468e-02, 2.4734e-02, 1.5747e-02, -2.8046e-02, + 4.0802e-02, -5.1392e-02, -9.7122e-03, 1.8723e-02, 5.7281e-02, + -2.8458e-02, 2.8839e-02, -2.7649e-02, -4.4220e-02, 1.2222e-02, + -2.3956e-02, 1.4236e-02, -1.0651e-02, -7.6904e-02, -6.7322e-02, + -5.0262e-02, -5.9509e-02, -7.0618e-02, -4.3396e-02, 8.0078e-02, + -3.3325e-02, 3.0624e-02, 2.4512e-01, -1.0144e-01, -6.1249e-02, + 2.6138e-02, -8.4610e-03, 7.6843e-02, 1.1192e-02, -8.0505e-02, + 5.1453e-02, 6.0364e-02, -4.6722e-02, -3.1586e-02, -3.1311e-02, + 5.7953e-02, -6.7997e-04, -3.5973e-03, -7.8125e-02, 4.6936e-02, + 3.1021e-02, -6.1127e-02, 2.0584e-02, 1.0339e-01, 5.8472e-02, + -1.3710e-02, 4.4250e-02, 3.9154e-02, 1.7075e-02, 3.4424e-02, + -7.4158e-02, -1.0986e-02, -2.1469e-02, -8.9050e-02, 9.3689e-03, + 2.7557e-02, -4.6021e-02, 3.1769e-02, 1.6556e-02, 5.2643e-02, + -1.8143e-02, -6.4270e-02, -2.6123e-02, 6.5247e-02, 3.2104e-02, + -5.7434e-02, 5.7869e-03, -1.4633e-02, 8.5510e-02, -4.4556e-02, + -2.1534e-03, -4.9896e-02, -3.9917e-02, 5.9479e-02, -1.2680e-02, + 1.0901e-01, -2.4338e-02, -2.4429e-02, 4.2297e-02, 5.8594e-02, + -2.1072e-02, -1.8875e-02, -1.3257e-01, -6.5979e-02, 3.3356e-02, + 1.7443e-03, -1.4175e-02, -5.2338e-02, -7.3364e-02, 1.6251e-02, + -1.5802e-03, -3.9459e-02, -2.7374e-02, 2.8000e-02, 2.1164e-02, + 4.5654e-02, -7.5378e-02, -7.2815e-02, -4.2236e-02, -3.5889e-02, + -3.0624e-02, 3.6987e-02, -2.8732e-02, -7.6828e-03, 4.5319e-03, + -1.9806e-02, -9.3937e-04, 2.4857e-02, -2.2259e-03, -3.6591e-02, + 3.7323e-02, 2.0370e-03, 7.9834e-02, 3.7231e-02, 1.0002e-02, + 1.7105e-02, 4.5837e-02, -4.6692e-02, -4.8523e-02, 3.3722e-02, + 8.3435e-02, -2.1683e-02, -6.1798e-02, 2.3483e-02, 3.5919e-02, + 2.0370e-02, -9.9869e-03, 1.8478e-02, -2.6886e-02, 6.9946e-02, + 3.6621e-02, -2.6321e-03, 9.5947e-02, 1.6586e-02, 5.2338e-02, + -4.2084e-02, 1.9943e-02, -5.6305e-02, 6.4026e-02, -6.0211e-02, + 3.3630e-02, 5.9448e-02, 3.2410e-02, 2.9678e-02, -2.2797e-02, + 3.8696e-02, -1.2733e-02, -1.3748e-02, -5.0018e-02, 1.8646e-02, + -1.9653e-02, -2.2293e-02, -3.8242e-03, 7.0740e-02, -4.0710e-02, + -5.0720e-02, -4.9248e-03, -7.8369e-02, -4.6570e-02, 6.0693e-01, + -1.6943e-01, 1.2054e-01, -2.4719e-02, 1.8860e-02, -1.4612e-01, + 6.7749e-02, -3.7811e-02, 4.3427e-02, -2.1225e-02, -2.4750e-02, + 7.8979e-02, -1.7532e-02, -4.8798e-02, 1.6388e-02, -4.3549e-02, + -5.3711e-02, 3.9062e-02, 2.7985e-02, 5.9631e-02, 2.8320e-02, + 4.2267e-02, 3.2401e-04, 2.4475e-02, 1.0384e-02, -4.2480e-02, + -3.9856e-02, 1.0321e-01, -5.8563e-02, 2.4902e-02, -1.4107e-02, + -5.2368e-02, -2.2842e-02, 2.3376e-02, -3.6469e-02, -3.9429e-02, + 6.1401e-02, -6.1646e-03, 5.8746e-02, -9.4528e-03, -1.2421e-02, + 6.7322e-02, -1.9302e-02, 5.4230e-02, 9.0485e-03, -4.9255e-02, + 1.1572e-01, 1.6220e-02, -7.0435e-02, 2.1118e-02, -4.2145e-02, + -4.5593e-02, -4.9774e-02, 6.0913e-02, 3.0304e-02, 2.1534e-03, + -1.0452e-03, 1.9501e-02, -1.0841e-02, -5.1544e-02, 4.0253e-02, + -7.6721e-02, 4.4434e-02, -3.3722e-02, -3.0746e-02, -1.0907e-01, + -6.8787e-02, 8.5571e-02, -3.6743e-02, -2.6855e-02, -4.3762e-02, + -3.9215e-02, -1.0818e-02, -8.3313e-02, 5.0842e-02, 6.1035e-02, + 3.1311e-02, 8.9741e-04, -1.7532e-02, 7.4341e-02, -1.5465e-02, + -6.9519e-02, -5.2002e-02, 1.0608e-01, -7.9163e-02, -9.5062e-03, + -3.2921e-03, 7.5989e-02, -4.3907e-03, -2.6840e-02, -6.5536e-03, + 3.3234e-02, 1.0811e-02, 5.7983e-03, -4.9042e-02, -3.9093e-02, + -3.9581e-02, 1.7822e-02, -6.1096e-02, -5.0720e-02, -4.8157e-02, + 4.1428e-03, -4.1016e-02, -3.2959e-02, 3.9154e-02, -2.9526e-02, + -1.2360e-01, 9.9030e-03, -2.6550e-02, 2.6505e-02, 3.7750e-02, + -3.5858e-02, 8.9951e-03, 4.8637e-03, -3.0960e-02, -8.2764e-02, + -3.8242e-03, 3.0319e-02, -4.2175e-02, -8.8318e-02, 2.1057e-02, + -9.5276e-02, -8.1665e-02, -3.4393e-02, 4.5532e-02, -1.3542e-02, + -1.3756e-02, -6.5308e-02, -8.9355e-02, 1.4160e-01, -4.8645e-02, + 4.9713e-02, -1.3733e-02, 3.3447e-02, 2.5406e-02, -6.4964e-03, + 5.6702e-02, 1.6342e-02, 1.6785e-02, -1.6998e-02, 5.7487e-03, + -4.2175e-02, 7.4036e-02, 1.0849e-02, 1.0529e-01, 3.6804e-02, + -2.8107e-02, -7.0068e-02, -4.9713e-02, 1.8188e-02, -4.3671e-02, + 3.5004e-02, 9.6313e-02, 4.2450e-02, 6.0059e-02, 5.2704e-02, + 3.7018e-02, -8.0109e-03, -8.6594e-03, 4.1779e-02, 2.6550e-02, + -6.3110e-02, -7.6843e-02, 8.2886e-02, 4.2206e-02, -7.9285e-02, + 2.0248e-02, 4.6539e-02, 1.3840e-02, -1.7807e-02, -6.7871e-02, + 9.4070e-03, -5.5573e-02, 5.7770e-02, 1.0849e-02, 9.2163e-02, + 6.7139e-02, 1.6388e-02, -2.5635e-03, -1.1945e-01, 8.4763e-03, + 5.6488e-02, -4.8637e-03, 5.6702e-02, -9.3140e-02, -2.0325e-02, + -9.3994e-02, -8.2275e-02, -1.3657e-02, 4.4708e-02, -5.2551e-02, + 2.5864e-02, -3.2562e-02, 1.1322e-01, 7.3181e-02, -6.3171e-02, + 6.0608e-02, -6.4545e-03, 1.1572e-01, -2.9510e-02, 8.9233e-02, + 6.3721e-02, 1.4725e-02, 1.8356e-02, -2.4994e-02, -2.6642e-02, + 2.1530e-02, -1.0132e-02, -6.9214e-02, 2.2064e-02, -3.3997e-02, + -1.0612e-02, -2.5970e-02, 2.8091e-02, -2.3727e-02, -5.9692e-02, + -7.8796e-02, 8.3862e-02, 3.1769e-02, -6.2500e-02, 5.5450e-02, + -1.8280e-02, 7.4524e-02, -3.0426e-02, -5.5359e-02, 7.1373e-03, + -3.7903e-02, 1.7685e-02, -7.1106e-02, -3.8385e-05, -5.7373e-02, + -2.8854e-02, -9.9258e-03, -5.6152e-02, -2.2678e-03, 8.7341e-02, + 2.7756e-02, 3.2654e-02, -9.6817e-03, -2.3178e-02, 1.3306e-01, + 4.1626e-02, -5.2765e-02, 2.1393e-02, 7.0129e-02, 4.1016e-02, + 4.2558e-04, -7.3730e-02, -6.8359e-02, 2.1774e-02, 8.2520e-02, + 2.7802e-02, -8.4019e-04, 8.5083e-02, 4.2603e-02, -3.7598e-02, + -1.0718e-01, -1.2264e-03, 9.1476e-03, -8.7509e-03, 7.0923e-02, + 6.0547e-02, 6.1073e-03, 1.0052e-03, 9.3689e-02, -2.9633e-02, + -2.0767e-02, 5.7449e-03, -1.1909e-02, -9.0942e-02, 1.8463e-02, + 7.7629e-03, -4.3640e-02, -4.4861e-02, -6.5918e-02, -2.2125e-02, + -8.6853e-02, 3.0991e-02, 1.1780e-02, 3.6835e-02, -3.6163e-02, + -1.8982e-02, 4.1443e-02, -6.8237e-02, -4.1016e-02, -1.4877e-02, + 4.0817e-03, 7.3624e-03, -1.0017e-02, 1.6495e-02, 4.3091e-02, + 3.0319e-02, -2.2293e-02, 6.9351e-03, -4.2633e-02, -6.1218e-02, + 4.9255e-02, -2.5101e-02, 3.8818e-02, -3.5439e-03, 1.9394e-02, + 1.0309e-03, 1.2541e-03, 1.5793e-02, 2.5757e-02, -4.0253e-02, + 1.6455e-01, -1.1420e-01, -4.3030e-03, 1.1755e-01, 3.1464e-02, + -4.3945e-02, 1.9547e-02, 7.6050e-02, -8.4229e-03, -5.1544e-02, + 4.4250e-02, -1.4275e-02, 6.0883e-03, -3.5278e-02, -3.5004e-02, + -2.5192e-02, 2.6306e-02, 4.5135e-02, 6.1646e-02, 5.8777e-02, + 5.5580e-03, 6.5857e-02, -1.1185e-02, 1.6357e-02, -8.2092e-03, + 9.3262e-02, -7.9346e-02, -2.1301e-02, 1.0548e-03, 2.6718e-02, + -3.2349e-03, 9.4727e-02, -2.4918e-02, -5.5542e-03, 5.9204e-02, + 2.4399e-02, -9.6985e-02, -4.7668e-02, 6.7558e-03, -1.0597e-02, + -4.4739e-02, -1.3306e-02, -2.6443e-02, 5.0629e-02, 5.0392e-03, + 1.5549e-02, -1.6632e-02, -2.2964e-03, 5.2368e-02, 6.4941e-02, + 8.4412e-02, -6.3416e-02, -5.0537e-02, 4.8157e-02, -6.9336e-02, + -8.6212e-04, -1.1627e-01, -4.7943e-02, -1.5190e-02, 3.7415e-02, + -2.2430e-02, -3.0457e-02, 8.8196e-02, -2.6581e-02, 8.4534e-03, + -9.7961e-03, 4.3427e-02, 5.0446e-02, -8.2214e-02, -4.3976e-02, + -2.3193e-02, 6.0791e-02, 1.6357e-02, -6.8298e-02, -2.3895e-02, + -4.2816e-02, -1.3687e-02, 5.8929e-02, -1.0574e-02, 3.6560e-02, + -8.7891e-03, -3.1872e-03, 3.5370e-02, -2.9800e-02, -2.0485e-03, + 8.1406e-03, 1.9684e-02, 5.2216e-02, 1.5137e-02, -4.5471e-02, + -6.7444e-02, 1.6251e-02, 4.4495e-02, 4.0558e-02, -2.4445e-02, + -2.1790e-02, -6.0501e-03, 3.4210e-02, 4.8065e-02, 9.5886e-02, + -1.2589e-02, -5.8167e-02, -6.6467e-02, 3.1158e-02, -2.4628e-02, + 1.0902e-02, 5.6488e-02, 6.9763e-02, 2.7252e-02, -4.3304e-02, + 1.3428e-01, 7.2327e-03, -3.0807e-02, 4.0741e-02, -5.2032e-02, + -7.6294e-02, -8.7402e-02, -1.6678e-02, 4.8584e-02, -1.5306e-03, + 8.0719e-03, 1.9730e-02, -1.1078e-01, -3.7415e-02, 1.0144e-01, + -8.3237e-03, 9.0271e-02, 3.2623e-02, 1.0938e-01, 3.7140e-02, + 2.9648e-02, -7.9269e-03, -4.7699e-02, -2.3422e-02, 4.2999e-02, + -2.6901e-02, 2.6611e-02, 2.6657e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([2.0307, 1.9726, 2.0683, 2.0922, 2.0904, 2.1595, 2.1940, 2.0945, 2.1829, + 2.0632, 2.0069, 2.0619, 2.0249, 2.1153, 2.0952, 2.0311, 2.0945, 1.9873, + 2.0693, 1.3209, 1.9580, 2.0676, 2.0866, 2.1143, 2.0650, 2.1496, 2.0865, + 1.9528, 2.1317, 2.0063, 2.1316, 2.1745, 2.0471, 2.0919, 2.0855, 1.9931, + 2.0886, 2.1438, 1.9753, 2.0601, 2.0038, 2.0082, 2.1160, 2.1505, 2.2815, + 2.0785, 2.2053, 2.0123, 2.0239, 2.0069, 2.0493, 2.0066, 2.1710, 2.0697, + 2.2646, 2.1356, 2.0977, 2.0522, 2.0962, 2.0822, 2.1202, 2.0967, 2.1053, + 2.1302, 2.0645, 2.0926, 1.9923, 2.0041, 2.0004, 2.0571, 2.2438, 2.0124, + 2.0061, 1.9758, 2.1365, 2.1637, 2.0070, 2.0505, 2.1504, 2.0160, 2.0619, + 1.9575, 2.0646, 2.0421, 2.1013, 2.0887, 2.2091, 2.0112, 1.9810, 2.0051, + 2.0335, 2.1576, 2.1458, 2.0246, 1.9795, 2.0867, 2.2490, 2.0376, 2.1731, + 2.0989, 1.9751, 1.8038, 2.0467, 2.1660, 2.0282, 1.9610, 2.3143, 2.1453, + 2.0223, 2.0440, 1.9253, 2.2580, 2.0914, 2.1201, 2.0345, 1.9917, 1.9536, + 2.0809, 2.0473, 2.0467, 2.0435, 1.9806, 2.1231, 2.1840, 2.0553, 2.1431, + 1.9885, 2.3494, 2.0862, 2.0096, 2.0571, 2.0646, 2.0548, 2.0386, 2.2171, + 2.1465, 2.1068, 1.9592, 2.0894, 2.1193, 2.1046, 2.0338, 2.0214, 2.0104, + 1.9678, 2.0540, 2.1080, 2.1480, 2.1495, 2.0671, 2.0314, 2.2913, 2.2099, + 2.1134, 2.0601, 1.4066, 2.0700, 2.0636, 2.1496, 2.0385, 2.0866, 2.0064, + 2.1332, 1.8232, 2.0438, 2.2630, 2.2799, 2.0261, 2.1469, 2.0730, 2.0680, + 2.0978, 1.9948, 2.0685, 2.0162, 2.1138, 2.2317, 2.0896, 2.1131, 2.1128, + 2.2830, 2.1778, 1.9381, 1.9642, 2.1773, 2.2628, 1.9100, 0.7467, 2.0003, + 2.2190, 2.1667, 2.1351, 2.2362, 2.1190, 2.1051, 2.3376, 2.1806, 2.0938, + 0.9265, 1.9867, 2.1207, 2.0960, 2.2167, 2.1010, 2.0326, 2.1115, 2.1153, + 2.2205, 2.0719, 2.0205, 2.0097, 2.0012, 2.2113, 2.1965, 2.1255, 1.9719, + 2.1534, 2.0961, 2.1630, 2.1540, 2.2609, 2.1181, 2.0263, 2.1285, 1.9925, + 2.0507, 2.0578, 2.0307, 2.1078, 2.0729, 2.0448, 2.1018, 2.0279, 2.0320, + 1.9764, 2.0579, 2.1533, 2.0485, 2.0464, 2.0832, 2.0751, 2.2815, 2.0456, + 1.9951, 2.0517, 2.1280, 1.9985, 2.1785, 2.0958, 2.1668, 2.0606, 2.1180, + 2.0829, 2.0670, 1.9896, 2.0625, 2.0223, 1.9520, 2.1020, 1.9954, 2.0594, + 1.9704, 1.9320, 2.0810, 2.0772, 1.9758, 2.0288, 2.0715, 2.0910, 1.9785, + 2.0258, 2.0914, 2.0275, 2.0663, 2.1614, 2.0998, 1.9857, 2.1790, 2.1992, + 2.0393, 2.0261, 1.9987, 2.0452, 2.0599, 2.0318, 2.0960, 2.2425, 2.1013, + 2.1393, 2.0122, 2.1288, 2.1729, 2.0566, 2.1189, 2.0796, 2.0585, 2.1067, + 2.0435, 2.1635, 2.2792, 2.1824, 2.0506, 2.0928, 2.2066, 2.0765, 1.9179, + 2.0876, 2.1804, 2.1673, 2.1951, 2.2373, 1.9701, 1.9493, 2.0041, 1.9992, + 2.2302, 2.0406, 2.0838, 2.1606, 2.0498, 2.1176, 2.0158, 2.1376, 2.1164, + 1.2271, 1.7720, 2.1755, 2.0930, 2.1836, 2.0538, 2.0824, 2.0939, 2.1511, + 2.0509, 2.0580, 2.0198, 1.9866, 1.9962, 2.0523, 2.0944, 2.1456, 2.1053, + 2.0300, 2.0940, 2.3120, 1.9547, 2.0700, 2.1009, 2.0682, 2.0459, 2.0006, + 2.2175, 1.9863, 2.2981, 2.0458, 2.0225, 1.9890, 2.0384, 2.2434, 2.0457, + 2.1980, 2.0483, 2.0478, 2.0192, 2.1184, 2.0825, 2.2390, 2.1722, 1.9870, + 2.1458, 2.0283, 1.9633, 2.0250, 2.1574, 1.9837, 2.1228, 2.0989, 2.0857, + 2.0165, 1.9334, 2.0708, 1.9785, 2.0866, 2.0367, 2.0997, 2.0162, 2.0879, + 2.0782, 2.0978, 2.0613, 2.1383, 2.0852, 2.0643, 2.2999, 2.2209, 2.0422, + 2.2045, 2.0467, 2.1153, 2.3100, 2.0153, 2.2065, 2.0402, 2.0260, 2.0171, + 2.1041, 2.1454, 2.0819, 2.0804, 2.1649, 2.1010, 2.0213, 2.0817, 2.2861, + 1.5624, 2.4553, 2.0714, 2.0337, 2.0162, 2.1285, 2.2080, 2.0755, 2.0775, + 2.0191, 2.0924, 2.0059, 2.0281, 2.0560, 2.2036, 2.1356, 2.0119, 2.0982, + 1.9969, 2.2523, 2.0878, 2.0347, 2.0952, 2.0964, 2.0247, 2.0063, 1.9721, + 2.1730, 2.1007, 1.8965, 2.0519, 2.0051, 2.1024, 2.0860, 2.0207, 2.0678, + 2.0972, 2.0524, 2.2369, 2.1333, 2.0543, 2.2793, 1.9811, 2.1158, 2.1302, + 2.0558, 2.1161, 2.0195, 2.2593, 2.0156, 2.0946, 1.8880, 2.0206, 2.1074, + 2.2498, 2.0933, 2.0475, 2.0456, 2.0136, 1.9998, 2.2418, 2.1298, 2.1151, + 2.2087, 2.0441, 2.0162, 2.0065, 2.2207, 2.0723, 1.5891, 2.0869, 2.0881, + 2.0180, 2.0379, 1.9723, 2.1504, 2.0642, 2.2587, 2.0276, 2.0433, 2.0237, + 2.1352, 2.0410, 1.9913, 1.8999, 2.1760, 2.1910, 2.2422, 2.0412, 1.9200, + 2.0412, 2.0715, 2.0873, 2.1982, 2.0002, 2.1190, 1.9513, 2.0818, 2.1398, + 2.0467, 2.1935, 2.0150, 2.1526, 2.2373, 2.0407, 2.0075, 1.9397, 2.0824, + 1.9908, 2.0283, 2.0259, 2.0223, 2.1736, 1.9523, 1.9705, 2.2646, 2.0516, + 2.0430, 2.0424, 2.0742, 2.1556, 2.0510, 2.0165, 1.9642, 2.1213, 2.0721, + 2.0460, 2.0685, 2.2526, 2.0811, 2.0153, 2.0851, 2.0620, 2.0626, 2.1310, + 2.0389, 2.2499, 2.0059, 2.0056, 2.1145, 2.1155, 1.9914, 1.9843, 2.0977, + 2.0383, 1.9961, 2.0099, 1.8014, 2.0623, 2.1228, 2.1464, 2.0297, 2.0503, + 2.1106, 2.0956, 2.1274, 1.9793, 2.1071, 2.1342, 2.0167, 1.9905, 2.1575, + 2.0514, 2.0519, 2.1893, 2.0108, 2.0159, 2.0668, 2.0728, 2.1197, 2.1696, + 1.9989, 2.2020, 2.0833, 2.3113, 2.0717, 2.0181, 2.2653, 2.1990, 2.0044, + 2.0979, 1.9749, 2.0083, 2.0437, 2.1062, 2.0969, 1.9639, 1.9803, 2.0559, + 2.1292, 1.9763, 2.0597, 2.0988, 1.8016, 2.0659, 2.1028, 2.1028, 2.2098, + 1.9922, 2.1306, 2.1300, 2.1369, 2.0740, 2.0127, 2.0872, 2.0136, 2.1810, + 1.9943, 2.0467, 2.1332, 1.9401, 1.9783, 2.0138, 2.1217, 2.0784, 2.0202, + 2.0787, 2.0651, 2.0199, 2.2011, 2.1647, 2.0053, 1.9973, 2.1077, 2.1093, + 2.1253, 2.0120, 2.0643, 2.1226, 1.9848, 1.9670, 2.1012, 1.9518, 2.0922, + 2.0492, 2.2058, 2.2500, 2.2433, 2.2611, 1.9797, 2.0808, 2.1515, 2.1605, + 2.0636, 2.0649, 2.1260, 1.9303, 2.0484, 2.2103, 2.1352, 2.0834, 2.0316, + 2.1334, 2.0103, 2.1443, 2.0279, 2.1024, 2.1013, 2.2352, 2.0015, 2.1543, + 2.0345, 2.1090, 2.2991, 2.1148, 2.0197, 2.1404, 2.9376, 2.0328, 2.0857, + 2.0746, 2.0915, 2.0639, 2.0736, 2.0799, 2.2220, 2.2648, 1.9870, 2.1855, + 2.0254, 2.0939, 1.9822, 1.9375, 2.1553, 2.1249, 2.1436, 2.0041, 2.0035, + 2.0451, 1.9603, 2.1117, 2.2223, 2.1545, 2.0233, 2.1566, 1.9525, 1.9624, + 2.0093, 2.0608, 2.1441, 2.0819, 2.0554, 2.0715, 2.0770, 2.0988, 2.3386, + 2.0327, 2.0376, 2.1202, 2.1253, 2.0856, 2.0563, 1.7493, 2.0553, 1.9948, + 2.0059, 2.2029, 2.0470, 2.1513, 2.2328, 2.0887, 2.1264, 2.0788, 1.9652, + 2.0717, 2.2406, 2.0654, 1.9399, 2.0947, 2.1596, 2.1316, 2.0074, 2.1920, + 2.0630, 2.0547, 2.1203, 2.0279, 2.0755, 2.1415, 1.9674, 2.0430, 2.1068, + 2.3021, 2.1641, 2.0975, 2.0427, 2.1529, 2.0259, 2.0686, 1.9664, 2.1563, + 2.0207, 1.9886, 2.1628], device='cuda:1', requires_grad=True)Parameter containing: +tensor([-0.1634, 0.3904, -0.6572, 0.0778, -0.3645, 0.8040, 0.6369, -0.0867, + -0.3817, -0.1613, -0.3118, 0.5072, -0.3457, -0.5480, -0.3993, -0.0154, + 0.1044, 0.0733, -0.2292, -1.6394, 0.1039, 0.4882, 0.6659, -0.5731, + 0.4084, 0.1744, -0.1819, -0.0776, -0.3350, 0.4749, 0.0453, 0.3498, + 0.0599, -0.5911, 0.0388, -0.2646, -0.3983, -0.4301, 0.5977, -0.0765, + -0.3520, -0.0992, 0.5881, 0.6972, -0.6566, -0.0760, -0.6735, -0.2202, + 0.4286, -0.1231, -0.5646, -0.3618, 0.5786, 0.4331, -0.7112, 1.5154, + 0.4482, 0.2446, -0.1697, 0.3791, 0.4552, 0.2399, 0.4756, -0.8407, + 0.2955, 0.6822, 0.5503, -0.2047, -0.2023, -1.5053, 0.9010, 0.5263, + 0.8808, 0.3741, -0.7068, -0.3806, 0.2776, 0.0435, 0.4804, -0.4777, + -0.0481, -0.3290, -0.1130, 0.5107, 0.1467, -0.3193, 0.7629, 0.2585, + -0.1659, -0.3165, -0.4949, 0.4801, 0.5671, -0.1125, -0.7146, 0.5034, + 0.7717, 0.4830, -0.5553, 0.4703, -0.1743, -0.2755, 0.3085, -0.4850, + -0.2644, 0.5301, 0.5706, 0.4006, 0.5181, -0.6555, 0.4549, 0.8119, + -0.8537, 0.5428, 0.4085, -0.2830, 0.6755, 0.5459, 0.1751, 0.0993, + 0.0719, -0.1542, -0.3314, -0.6055, -0.3381, -0.4999, -0.0787, -0.9931, + -0.0120, 0.3317, -0.3046, -0.6244, -0.6682, -0.2793, 0.4557, 0.4783, + 0.4025, -0.2780, -0.4007, 0.2884, -0.5973, 0.3956, 0.0334, 0.6921, + -0.2348, -0.1816, 0.3306, 0.4644, -0.4059, 0.2303, 0.3054, -0.5736, + 0.3553, 0.0930, -0.6138, 0.3553, -0.4840, -0.3348, -0.5161, 0.3380, + -0.5832, 0.0614, 0.5337, 0.1841, -0.1116, 0.5732, -0.9823, -0.0027, + -0.3231, 0.5131, 0.4541, -0.0448, -0.4895, 0.0861, -0.0614, 0.4743, + 0.6420, -0.0692, -0.0172, 0.6431, -0.6864, -0.6511, 0.2636, 0.1945, + 0.7223, 1.1545, -0.2667, 0.9053, -0.0978, -0.7616, -0.9102, -0.3463, + 0.6792, 0.6667, 0.1881, 0.8544, 0.5431, 0.2004, 1.1457, 0.4141, + -0.9264, -0.1053, 0.6978, 0.6861, 0.3049, 0.3733, 0.8130, 0.7921, + -0.4449, -0.0137, 0.2515, -0.5731, -0.6839, 0.7414, 0.0611, 0.0709, + -0.2592, -0.0999, -0.5076, -0.6549, 0.9194, 0.4722, -0.4555, 0.5878, + -0.0519, 0.3215, -0.2202, 0.0980, 0.5063, -0.3292, 0.2484, 0.5109, + -0.2138, 0.2214, -0.2958, -0.2670, 0.5005, 0.6784, 0.2350, 0.7536, + 0.3887, 0.7270, 0.0442, 0.0527, 0.0210, -0.5180, 0.0711, -0.5699, + -0.3708, 0.6118, -0.6395, 0.2085, 0.0284, -0.6505, 0.2182, 0.3107, + 0.3407, -0.2664, -0.3857, -0.2913, -0.6433, 0.1765, 0.1000, -0.4661, + -0.3760, -0.0993, 0.2911, 0.3472, 0.3607, 0.1198, -0.0327, 0.1065, + 0.4254, -0.4536, -0.4919, -0.3662, -0.0898, 0.5787, 0.5154, 0.0449, + 0.1505, -0.7936, -0.4062, 0.4328, -0.2997, 0.4559, -0.2560, -0.0027, + 0.4216, -0.8116, -0.2374, 0.4854, 0.5336, -0.3588, 0.4325, -0.8042, + -0.4591, -0.4535, 0.6669, 0.6368, -0.5369, 0.5936, -0.4235, -0.6996, + 0.0087, 0.2624, 0.5607, 0.6638, -0.6091, 0.2427, 0.4354, 0.0214, + -0.2835, 0.0946, -0.1392, 0.5883, 0.1297, 0.4315, 0.7669, 0.1802, + -0.5833, -0.1598, 0.4056, -0.2835, 2.4506, -0.0757, 0.6105, -0.2125, + 0.6271, -0.2360, 0.4260, -0.4225, -0.4409, -0.3974, -0.0852, 0.5301, + -0.5151, 0.3451, 0.6331, -0.0027, 0.3818, -0.2817, -0.4590, 0.6107, + -0.6834, 0.0148, -0.3686, -0.6853, 0.1435, -0.0316, 0.7759, 0.4233, + -0.0109, 0.9338, -0.3701, 0.0630, -0.1912, -0.1769, -0.5478, -0.2753, + -0.8406, -0.5571, 0.0083, -0.0551, 0.2625, 0.2223, -0.6563, 0.7482, + 0.0779, 0.5153, -0.1369, 0.1232, -0.0049, -0.9306, -0.2408, -0.5681, + 0.6130, 0.5066, 0.3246, 0.2921, -0.5615, -0.5330, -0.3602, 0.1486, + 0.1352, -0.4357, -0.2049, 0.5760, 0.2577, -0.4218, 0.6486, -0.2831, + 0.6872, 1.1954, -0.8105, 0.3491, -0.5672, -0.1094, -0.3430, -0.7002, + 0.6166, 0.5148, -0.4035, -0.2608, 0.4024, -0.5864, -0.2557, 0.2989, + 0.0739, 0.7871, 0.4411, -0.1270, 0.5860, 0.8288, 0.7889, -1.7029, + -0.2622, -0.2401, 0.4067, 0.6801, -0.5707, -0.6888, 0.3839, -0.2854, + -0.6706, 0.2172, 0.2885, -0.1456, 0.4580, 0.3836, -0.4238, -0.6184, + -0.0139, 0.5807, 0.6330, -0.3544, -0.4924, 0.5683, 0.4040, -0.2675, + 0.1704, 0.3875, 0.3749, -1.7340, 0.0225, 0.1601, -0.1486, 0.8069, + 0.4392, -0.5474, -0.5232, -0.0288, -0.5448, 0.3483, -0.2150, 0.6120, + -0.0751, -0.5416, -0.7117, 0.3526, -0.3994, -0.0146, -1.1012, -0.0727, + -0.4971, 0.3637, 0.2655, 0.4278, 0.6634, -0.0713, -0.0099, -0.1449, + -0.4338, -0.1937, -0.6498, -0.7250, 0.2271, -0.6612, 0.1220, 0.3243, + 0.1165, 0.2658, 0.6581, -0.2702, 0.1820, -0.8709, 0.3657, 0.6121, + 0.2829, -0.6148, 0.3372, 0.9084, 0.3225, -0.3238, -0.5079, -0.4441, + -0.5670, 0.3065, -0.0379, 0.6865, -0.7325, -0.6104, -0.6129, 0.3126, + 0.0805, -0.4352, 0.5677, 0.5351, -0.2273, -0.5317, -0.4505, -0.1210, + -0.8835, -0.3817, 0.8097, -0.0851, 1.0992, -0.4803, 0.3294, -0.1898, + 0.3222, 0.3128, 0.1108, 0.1788, -0.4207, -0.2907, 0.8966, -0.0309, + 0.4949, 0.6861, -0.4014, -0.2843, -0.1982, -0.0093, 0.6738, -0.6007, + 0.3616, -0.1017, 0.3762, -0.3327, 0.1810, 0.3345, -1.0147, 0.9662, + -0.1385, 0.2843, 0.4133, -0.6416, -0.2800, 0.3751, -0.6579, -0.4096, + 0.0644, 0.3496, 0.6392, -0.0599, -0.2617, -0.1823, -0.6173, 0.5599, + 0.2138, -0.0549, 0.2094, 0.6796, -0.5302, -0.1417, -0.1526, 0.5912, + 0.6554, -0.2012, -0.3520, 0.2011, 0.4550, -0.4031, 0.2536, 0.7203, + -0.5432, -0.1476, -0.7711, 0.6847, 0.2413, 0.2893, 0.7736, -0.4954, + 0.5358, -0.4769, -0.3864, -0.5988, 0.7091, 0.6538, -0.0866, -0.1456, + 0.8250, -0.0113, -0.4833, -0.0904, -0.2416, 0.2671, 0.7442, 0.7193, + -0.2374, -0.0454, -0.0685, -0.5255, 0.1050, -0.1814, 0.0709, -0.4543, + 0.4718, 0.5187, -0.6497, 0.8609, -0.3123, 0.3958, 0.0808, -0.2275, + 0.0566, 0.4994, -0.0844, 0.3117, 0.5139, -0.1189, -0.8420, 0.7102, + 0.1490, -0.1751, -0.0938, 0.5447, 0.7600, 0.1141, 0.0118, -0.0166, + -0.4014, 0.5753, 0.3237, -0.0531, -0.2725, 0.4342, 0.4947, -0.5230, + 0.7061, -0.5629, -0.5246, 0.1271, -0.6878, 0.3458, -0.0280, 0.2749, + 0.3727, 0.7445, 0.7685, 0.2294, 0.6002, -0.2364, -0.0361, 0.8746, + 0.5983, 0.7713, -0.0492, 0.0072, 0.3269, 0.4310, 0.1407, -0.4384, + -0.1729, -0.3326, 0.4758, -0.4218, 0.8020, -0.0350, -0.7215, 0.4019, + 0.9356, 0.5021, 0.5510, -0.3667, 0.4088, 0.5787, 0.5401, 0.5139, + 0.2937, 2.5777, -0.4828, -0.2321, -0.1922, 0.7759, 0.4836, -0.4644, + 0.1484, -0.9385, -0.7380, -0.1577, -0.8349, -0.3073, -0.5824, 0.5791, + -0.0707, 0.2943, -0.5056, -0.0182, 0.1451, 0.0616, 0.4046, 0.0973, + 0.4898, 0.7257, -0.2495, -0.0735, -0.8476, 0.2016, -0.0924, -0.2625, + -0.2927, -0.5473, -0.0670, -0.2398, -0.1042, 0.2342, -0.6123, -0.7766, + -0.2891, -0.1510, -0.3887, -0.8447, 0.3705, -0.3663, 0.3751, -0.3333, + 0.2739, -0.1393, -0.5013, -0.0648, 0.8232, 1.0130, -0.2885, 0.4336, + 0.4970, -0.2387, 0.1380, -0.7007, -0.7673, -0.3192, 0.2573, -0.4747, + -0.4189, -0.1628, -0.6715, 0.0745, 0.3072, 0.8509, -0.2102, -0.5690, + -0.4548, -0.0489, -0.0070, -0.0870, -0.2811, -0.6708, 0.6954, 0.4769, + -0.4482, -0.5612, -0.2993, 0.9320, 0.3225, -0.2130, 0.4533, 0.4963], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0083, -0.0071, -0.0021, ..., -0.0010, 0.0090, 0.0052], + [ 0.0555, -0.0439, -0.0148, ..., 0.0150, -0.0063, 0.0137], + [ 0.0143, 0.0434, 0.0090, ..., -0.0173, 0.0094, -0.0070], + ..., + [-0.0136, -0.0252, 0.0119, ..., -0.0044, 0.0303, 0.0039], + [ 0.0051, -0.0150, -0.0075, ..., -0.0273, -0.0061, -0.0200], + [ 0.0049, -0.0019, 0.0238, ..., 0.0028, -0.0135, -0.0199]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 1.1367, -0.1210, 0.1425, ..., -0.0217, -0.0073, -0.0175], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0033, 0.0186, -0.0174, ..., -0.0052, 0.0126, 0.0130], + [-0.0088, 0.0030, 0.0068, ..., 0.0046, -0.0482, 0.0263], + [-0.0032, 0.0138, -0.0109, ..., 0.0132, 0.0237, 0.0135], + ..., + [ 0.0033, -0.0081, -0.0091, ..., 0.0204, -0.0066, 0.0058], + [-0.0299, 0.0074, -0.0033, ..., -0.0114, 0.0144, 0.0044], + [ 0.0031, 0.0146, -0.0029, ..., 0.0022, -0.0339, -0.0151]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-4.5128e-03, 1.0891e-03, -3.3478e-02, -1.2718e-02, -6.7078e-02, + 6.7139e-02, 6.8726e-02, -1.5411e-02, 4.0192e-02, 3.3264e-02, + 2.8030e-02, -1.7822e-02, -5.0354e-02, -3.2593e-02, 8.6975e-03, + 1.7487e-02, -1.7090e-03, 4.8798e-02, -1.4435e-02, -4.2175e-02, + 2.5803e-02, 2.9007e-02, 2.6627e-03, -1.9150e-02, 3.9734e-02, + -5.0354e-02, -2.0618e-03, 2.3621e-02, 1.5732e-02, 2.6825e-02, + 1.3304e-04, 1.1345e-02, -7.6218e-03, -5.8632e-03, 2.7752e-03, + 3.4485e-02, -7.0251e-02, -5.3925e-02, -1.8692e-02, 4.6722e-02, + -7.8659e-03, -8.9264e-03, 2.0721e-02, -5.7182e-03, 1.3725e-02, + -4.4556e-03, 7.2784e-03, -1.8707e-02, -3.1403e-02, -2.4948e-03, + 1.8265e-02, 2.4246e-02, -1.9287e-02, 4.3732e-02, 1.9684e-02, + 4.5807e-02, -4.0497e-02, -1.6037e-02, 5.4291e-02, -1.3718e-02, + 8.4763e-03, -8.7585e-03, 1.9363e-02, 2.0309e-02, -3.0838e-02, + -1.6861e-02, 3.9398e-02, 2.0432e-02, -3.1796e-03, -1.1469e-01, + -2.4979e-02, -2.7313e-02, -2.8641e-02, -1.3489e-02, -1.9913e-02, + 1.3153e-02, 3.0518e-03, -2.1744e-03, 2.5864e-02, 6.5804e-03, + 2.2602e-03, 1.9440e-02, -2.1469e-02, -3.9215e-03, -1.3847e-02, + 1.6678e-02, -1.3870e-02, 3.2616e-03, -8.3466e-03, -2.1759e-02, + 2.8610e-02, 3.9215e-02, 3.1799e-02, 6.3629e-03, 3.6030e-03, + 2.7679e-02, 4.0527e-02, 5.1346e-03, -8.5144e-03, 1.0765e-02, + 6.1127e-02, 4.1870e-02, 6.3965e-02, -2.3132e-02, -4.7226e-03, + 6.6650e-02, 1.6022e-02, 2.7573e-02, 8.3084e-03, 1.3275e-02, + 2.3880e-02, 4.5654e-02, 2.8748e-02, -2.9831e-03, 4.4518e-03, + -2.8244e-02, 4.4220e-02, -5.7716e-03, -4.3762e-02, 4.9973e-03, + -1.5388e-02, -1.6312e-02, -6.7688e-02, -3.3905e-02, 5.9662e-03, + -4.7882e-02, -1.3046e-02, -3.3630e-02, -9.1324e-03, 3.1097e-02, + -1.4183e-02, 6.0081e-03, -2.5864e-02, -7.4768e-03, 2.9739e-02, + 3.4393e-02, 2.4811e-02, 2.9816e-02, -1.0612e-02, 1.4702e-02, + -4.1229e-02, 1.1559e-03, 1.6479e-02, -2.8671e-02, 1.7166e-02, + -6.2103e-02, -1.5732e-02, -6.9618e-03, 1.0216e-02, 6.4880e-02, + 3.8422e-02, 1.3800e-03, 1.3947e-02, 4.2786e-02, -8.7738e-03, + 2.3666e-02, -3.6224e-02, 6.2866e-03, 2.3880e-02, -4.7668e-02, + 1.2398e-02, -2.8549e-02, 5.6854e-02, -1.1093e-02, 4.1809e-03, + 1.5900e-02, -7.0923e-02, -1.3222e-02, -2.2984e-03, 1.9363e-02, + 1.1263e-03, 5.5176e-02, 1.3657e-03, -4.2725e-02, 2.6398e-02, + -6.1798e-02, 5.4810e-02, -6.8909e-02, -1.5808e-02, -2.3163e-02, + -2.2049e-02, -4.7668e-02, -9.6436e-03, 5.6000e-02, -1.8188e-02, + 1.4977e-02, -2.4994e-02, -3.9642e-02, 3.3295e-02, -2.5375e-02, + 1.7929e-02, 1.1589e-02, 1.4969e-02, 4.7546e-02, -3.4332e-02, + 1.5305e-02, 3.7201e-02, -3.9032e-02, -6.9641e-02, 4.6417e-02, + -1.0063e-02, -5.7144e-03, 7.8125e-03, 2.7008e-02, 2.4460e-02, + 6.2317e-02, 2.3178e-02, 2.5406e-02, 2.8976e-02, 2.5299e-02, + 1.0063e-02, 3.1708e-02, -1.2169e-02, 4.7729e-02, 1.2070e-02, + -2.1988e-02, -4.1870e-02, 4.9103e-02, -6.7749e-03, -3.1281e-02, + 1.7853e-02, 1.0002e-02, -6.2988e-02, 4.6509e-02, -3.8671e-04, + -6.1859e-02, -4.1595e-02, -9.8038e-03, 2.6657e-02, 1.9226e-02, + -1.1948e-02, 2.3865e-02, -2.6520e-02, 1.3962e-03, -2.2705e-02, + -3.2440e-02, -1.9623e-02, 1.5236e-02, -8.2779e-03, 2.0691e-02, + 1.8906e-02, 3.3112e-02, -9.6970e-03, -2.9480e-02, 1.0132e-02, + -3.9597e-03, 2.8870e-02, 1.1482e-02, -6.5346e-03, -1.7975e-02, + 6.3858e-03, 1.6129e-02, -1.6724e-02, -3.8696e-02, -1.1530e-03, + -4.0863e-02, -9.4147e-03, -1.8814e-02, -2.6245e-02, -4.2694e-02, + -7.2327e-02, 2.4548e-03, 2.0828e-02, 2.7802e-02, -1.8738e-02, + -1.2159e-03, 1.9043e-02, 3.7689e-02, -1.7334e-02, 3.0060e-02, + 1.4259e-02, -1.1581e-02, -6.1531e-03, -2.6794e-02, -4.4403e-02, + 4.0894e-02, 4.2572e-02, -2.1118e-02, 4.7028e-02, -2.6306e-02, + 1.8692e-02, -7.3303e-02, -3.6011e-02, -2.3788e-02, 6.3362e-03, + 7.7209e-03, 1.2604e-02, 4.7340e-03, 5.2307e-02, -3.3630e-02, + 3.0457e-02, -1.6266e-02, 2.8824e-02, 1.3283e-02, 1.4275e-02, + 9.7961e-03, 2.0966e-02, -1.2543e-02, -3.7262e-02, 1.6418e-02, + -2.0790e-03, -7.4997e-03, -2.3788e-02, 3.4912e-02, -1.4687e-02, + 1.8326e-02, 3.1006e-02, 6.1569e-03, 3.8177e-02, 5.0259e-04, + -2.3861e-03, -5.2299e-03, -4.9133e-03, 5.3101e-03, 2.4094e-02, + -4.6806e-03, 3.0090e-02, 1.5926e-03, 1.7303e-02, -1.1276e-02, + -4.8027e-03, -6.8245e-03, -2.7206e-02, 4.4342e-02, -1.5527e-01, + -6.2744e-02, 5.3139e-03, -4.3526e-03, 3.7323e-02, 1.3878e-02, + -7.0610e-03, -3.3478e-02, 1.8244e-03, -3.2501e-02, -5.3528e-02, + -4.8645e-02, -4.3755e-03, -1.1063e-02, 3.5114e-03, -4.6387e-03, + 1.9562e-02, 2.8488e-02, -3.0243e-02, 1.0025e-02, -1.1663e-03, + 4.0771e-02, -8.8501e-03, 4.4647e-02, -3.5858e-02, 8.3160e-04, + -2.9831e-02, 3.0079e-03, 7.6660e-02, 9.9548e-02, -2.1179e-02, + -9.8572e-03, 6.5231e-04, -4.4128e-02, 2.3819e-02, 3.6888e-03, + -1.6006e-02, 1.4183e-02, -3.2013e-02, -1.5976e-02, 1.1642e-02, + -2.1935e-03, 1.7029e-02, 2.9251e-02, 1.3626e-02, 1.7410e-02, + 4.1077e-02, -6.7749e-02, -1.6632e-02, -2.3499e-02, 1.6891e-02, + 4.2000e-03, 1.7807e-02, 5.8197e-02, -4.5959e-02, -3.3722e-02, + -4.2496e-03, 3.6102e-02, -6.9580e-02, -2.7054e-02, -3.7903e-02, + -1.0242e-03, 1.3550e-02, 3.8177e-02, 1.6830e-02, -3.2187e-05, + -2.9358e-02, -1.7502e-02, 2.5772e-02, 4.2343e-03, 3.1281e-02, + 2.5223e-02, 2.3315e-02, 4.6356e-02, -1.2512e-02, -4.5837e-02, + -2.6962e-02, 5.2765e-02, -4.8103e-03, 2.2003e-02, 3.4356e-04, + -6.4392e-03, -6.1893e-04, -1.8494e-02, -2.4506e-02, 7.8344e-04, + -1.0498e-02, -2.3361e-02, -2.0096e-02, -2.3056e-02, 2.1317e-02, + -1.8481e-01, 3.2349e-02, 1.8555e-02, 4.7546e-02, -2.4323e-02, + -1.4732e-02, -1.5671e-02, 8.3237e-03, -3.6041e-02, -1.2108e-02, + 2.9030e-03, 3.0716e-02, -9.8648e-03, 3.7292e-02, 2.7710e-02, + -6.7566e-02, 3.8910e-02, 2.6230e-02, 3.8422e-02, 4.9011e-02, + -3.3150e-03, 4.1809e-03, 5.4588e-03, 5.0079e-02, -3.3508e-02, + 1.4694e-02, -7.9956e-03, 2.1408e-02, -1.5918e-01, 3.6438e-02, + -3.8239e-02, -4.7455e-03, 2.0096e-02, 2.3666e-02, -1.4549e-02, + -5.8556e-03, 2.7847e-02, -6.0303e-02, 1.0347e-03, 1.9241e-02, + 1.4847e-02, 1.0292e-02, 2.9163e-03, 3.0258e-02, -6.1615e-02, + 1.5434e-02, 1.9226e-02, -1.7914e-02, 1.5945e-02, -3.0228e-02, + -1.3977e-02, -7.9203e-04, -6.0852e-02, 2.8381e-02, 2.0416e-02, + -3.9154e-02, -4.3396e-02, -2.5238e-02, 1.8539e-02, -2.5787e-02, + 5.3528e-02, 3.0365e-03, 1.9028e-02, 1.6724e-02, 6.2347e-02, + -8.5373e-03, 3.2593e-02, -2.9129e-02, 2.1988e-02, -3.9215e-02, + 2.8473e-02, -9.6130e-03, -1.7288e-02, -1.4145e-02, -1.7014e-02, + 2.1423e-02, 1.2009e-02, -4.2877e-02, 5.3902e-03, -7.6477e-02, + 2.2354e-03, 5.6343e-03, 1.1192e-02, -5.4413e-02, 2.8519e-02, + 5.8022e-03, -1.9897e-02, -2.0561e-03, 6.8474e-04, 2.5528e-02, + 1.5205e-02, 2.7374e-02, 4.5074e-02, -3.3783e-02, -8.5815e-02, + -8.3237e-03, 7.4768e-03, -1.8661e-02, -1.6632e-02, 3.5461e-02, + -3.4943e-02, 3.0994e-06, 1.9165e-02, 7.0610e-03, -4.0588e-02, + 9.1457e-04, -3.0479e-03, -7.0801e-03, 8.1558e-03, -2.4689e-02, + 8.8272e-03, -1.7319e-02, -2.9465e-02, 3.0746e-02, -2.2110e-02, + 6.3110e-02, -4.1626e-02, 1.1772e-02, -4.8615e-02, 4.4708e-02, + 1.3359e-02, -3.3569e-02, -2.0752e-02, 7.0839e-03, 2.4223e-03, + -4.3549e-02, 3.9551e-02, -2.1988e-02, -1.8936e-02, 1.8784e-02, + 1.0193e-02, 1.0780e-02, 4.6616e-03, 3.2104e-02, 3.0472e-02, + -9.6130e-02, -7.0381e-03, -2.5883e-03, -5.7335e-03, -4.1382e-02, + -1.6769e-02, 6.8542e-02, -1.2489e-02, -3.9902e-03, 1.3103e-03, + -1.9669e-02, 5.8403e-03, -2.5345e-02, -3.3539e-02, 4.8340e-02, + 4.8187e-02, 4.1718e-02, 2.8198e-02, 3.8879e-02, -1.8631e-02, + 4.6814e-02, 1.6891e-02, -9.8953e-03, 4.2084e-02, 6.9122e-03, + 4.2725e-02, -6.0501e-03, 4.5624e-03, 1.1833e-02, -5.6824e-02, + 2.8095e-03, 2.9083e-02, 1.3138e-02, 1.4244e-02, 6.1798e-03, + 3.9795e-02, -2.0386e-02, 2.1988e-02, 5.4169e-02, 4.3373e-03, + -2.5040e-02, -3.0088e-04, 1.6937e-02, 7.5340e-04, 3.2135e-02, + -4.7913e-02, -2.1027e-02, -6.6040e-02, 4.8103e-03, -1.8738e-02, + -3.2158e-03, 2.6688e-02, -2.9816e-02, -1.7090e-02, 9.7198e-03, + 1.9272e-02, -5.0011e-03, -4.9103e-02, 1.7654e-02, 2.5436e-02, + 1.2337e-02, 4.6387e-03, 2.5879e-02, 2.0157e-02, 1.0941e-02, + -2.4048e-02, -6.4850e-03, 1.8524e-02, -2.9507e-03, -2.2602e-03, + 4.8157e-02, 1.4801e-02, -1.5381e-02, 1.4893e-02, 5.7697e-04, + -4.5807e-02, 9.9487e-03, -1.2199e-02, 5.0140e-02, -1.4999e-02, + 5.7709e-02, -2.1133e-02, 1.0757e-02, 2.0966e-02, 3.1281e-02, + -3.9024e-03, 4.7989e-03, 5.2704e-02, 2.0828e-02, -1.0963e-02, + -1.3313e-02, -2.4689e-02, -6.2927e-02, 5.5275e-03, 5.9128e-04, + 2.9037e-02, -4.2847e-02, 1.0155e-02, 5.3329e-03, 1.4275e-02, + 2.9663e-02, -2.4902e-02, 1.9085e-04, 3.0334e-02, 4.3396e-02, + 1.7212e-02, -4.9927e-02, -3.1921e-02, 2.3834e-02, 8.0505e-02, + -1.4977e-02, -2.0355e-02, -2.7008e-02, -2.5299e-02, -4.6272e-03, + 6.6772e-02, 1.6083e-02, -2.0142e-03, 2.4948e-02, -1.7349e-02, + -4.0398e-03, -1.9791e-02, 3.9917e-02, -1.6754e-02, 1.6830e-02, + 2.8000e-03, -1.3115e-02, -2.3819e-02, -9.0866e-03, 1.6571e-02, + -1.1642e-02, 1.4746e-01, -4.8126e-02, -2.1484e-02, -7.6637e-03, + 5.6030e-02, -3.9917e-02, 1.5289e-02, 1.1955e-02, 1.0445e-02, + 1.8097e-02, -5.3894e-02, -6.5651e-03, 1.0391e-02, 2.6566e-02, + 5.8350e-02, 2.1637e-02, 3.3936e-02, 1.3794e-02, 2.1118e-02, + -1.4984e-02, 1.7990e-02, 5.3619e-02, -1.8127e-02, 6.2378e-02, + -2.3651e-02, -8.1177e-03, 1.4168e-02, -3.0956e-03, 5.5237e-03, + 3.3783e-02, 2.7252e-02, 2.4902e-02, -2.5757e-02, -1.6632e-02, + -4.6021e-02, -2.4963e-02, 6.0028e-02, 2.2552e-02, -1.0608e-01, + -1.7654e-02, 1.4448e-03, -3.8483e-02, -3.5461e-02, -4.3976e-02, + -9.4376e-03, 5.6549e-02, -4.8645e-02, -1.4091e-02, 6.5002e-03, + -1.1063e-02, -2.4597e-02, 3.9886e-02, 1.5762e-02, 3.9307e-02, + 1.3527e-02, 2.9892e-02, -1.1322e-02, 1.5945e-02, -2.3911e-02, + 1.8356e-02, -1.0468e-02, -1.6129e-02, -8.1863e-03, 1.3138e-02, + 3.9581e-02, -5.4169e-02, 4.7333e-02, 3.4821e-02, -1.4366e-02, + 1.0414e-03, -1.4893e-02, -5.1544e-02, 3.5156e-02, 3.6072e-02, + -1.8501e-03, 8.4000e-03, -1.7441e-02, -9.5215e-03, -1.2230e-02, + -1.0429e-02, 1.2161e-02, -1.2169e-02, -2.6215e-02, -9.1374e-05, + -8.3847e-03, 1.2932e-02, 3.9246e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.4201, 1.5553, 1.4466, 1.4176, 1.4067, 1.4397, 1.3864, 1.3708, 1.4152, + 1.3614, 1.4378, 1.3843, 1.3444, 1.4626, 1.3758, 1.4693, 1.4095, 1.5620, + 1.4866, 0.4077, 1.4047, 1.3871, 1.4473, 1.4040, 1.5196, 1.4607, 1.4180, + 1.3462, 1.5646, 1.3797, 1.4029, 1.3888, 1.4918, 1.4661, 1.4651, 1.4938, + 1.3918, 1.4911, 1.4575, 1.4223, 1.4761, 1.4732, 1.4389, 1.4287, 1.4739, + 1.3806, 1.3968, 1.4439, 1.3657, 1.4691, 1.4194, 1.4101, 1.4352, 1.4645, + 1.4784, 1.4505, 1.3830, 1.6047, 1.4694, 1.4865, 1.4252, 1.3965, 1.4197, + 1.3777, 1.3788, 1.4218, 1.3675, 1.4547, 1.4370, 1.6006, 1.4216, 1.4492, + 2.2403, 1.3783, 1.4818, 1.4228, 1.4079, 1.4081, 1.5440, 1.5037, 1.3940, + 1.4326, 1.3924, 1.4796, 1.4971, 1.3687, 1.5182, 1.3814, 1.3834, 1.4392, + 1.4455, 1.4210, 1.3747, 1.4317, 1.4766, 1.3185, 1.4140, 1.3892, 1.3706, + 1.5666, 1.4581, 1.3999, 1.3989, 1.3766, 1.4173, 1.4575, 1.4027, 1.4818, + 1.3969, 1.4545, 1.4167, 1.4052, 1.2946, 1.3471, 1.4722, 1.3740, 1.4010, + 1.3432, 1.4888, 1.4517, 1.4854, 1.4591, 1.4632, 1.4699, 1.3971, 1.4651, + 1.4098, 1.4119, 1.4666, 1.4620, 1.4262, 1.3439, 1.3321, 1.4214, 1.4017, + 1.4997, 1.5333, 1.4508, 1.4351, 1.3742, 1.4386, 1.4162, 1.4854, 1.4447, + 1.3915, 1.3794, 1.4169, 1.4852, 1.4739, 1.4276, 1.3996, 1.3800, 1.4374, + 1.4500, 1.3956, 1.1616, 1.4410, 1.5182, 1.4307, 1.4148, 1.4508, 1.3891, + 1.4194, 1.5416, 1.4519, 1.4224, 1.4381, 1.4547, 1.3615, 1.4564, 1.4398, + 1.3899, 1.3217, 1.4564, 1.5274, 1.4199, 1.4670, 1.5066, 1.5204, 1.3846, + 1.3979, 1.4862, 1.4596, 1.4462, 1.5558, 1.4122, 1.3897, 1.8632, 1.5444, + 1.5091, 1.4304, 1.3688, 1.4861, 1.4807, 1.4606, 1.4240, 1.4124, 1.4525, + 3.8975, 1.3556, 1.2548, 1.4661, 1.4985, 1.4950, 1.4275, 1.4643, 1.3548, + 1.3323, 1.4001, 1.4308, 1.4286, 1.3500, 1.3509, 1.3783, 1.4309, 1.5165, + 1.4920, 1.3666, 1.3648, 1.4776, 1.4400, 1.4439, 1.4176, 1.3889, 1.4822, + 1.3767, 1.4823, 1.5177, 1.3962, 1.4796, 1.4227, 1.4678, 1.4844, 1.4738, + 1.4363, 1.4863, 1.3920, 1.3446, 1.4588, 1.4765, 1.4450, 1.3699, 1.4560, + 1.4734, 1.5286, 1.4649, 1.4107, 1.4684, 1.4413, 1.4883, 1.3877, 1.4846, + 1.3845, 1.3414, 1.5221, 1.4007, 1.4440, 1.4092, 1.3943, 1.3575, 1.4469, + 1.3563, 1.3385, 1.4539, 1.4622, 1.4092, 1.4660, 1.4254, 1.4688, 1.3586, + 1.4422, 1.4098, 1.5499, 1.3933, 1.4258, 1.2855, 1.4232, 1.3333, 1.4687, + 1.3916, 1.4269, 1.5470, 1.5170, 1.4956, 1.5049, 1.4062, 1.4651, 1.5575, + 1.4794, 1.3587, 1.4639, 1.5001, 1.5203, 1.3703, 1.4702, 1.3974, 1.4340, + 1.4630, 1.5828, 1.4600, 1.4888, 1.4105, 1.5150, 1.4381, 1.3933, 1.3613, + 1.3687, 1.3838, 1.3728, 1.4708, 1.4796, 1.4643, 1.3512, 1.3890, 1.5210, + 1.5169, 1.3263, 1.3799, 1.4009, 1.5255, 1.4198, 1.4169, 1.3880, 1.3731, + 0.8802, 1.2407, 1.5166, 1.4331, 1.4277, 1.4972, 1.3723, 1.4263, 1.3627, + 1.3381, 1.4444, 1.3821, 1.3215, 1.3868, 1.3506, 1.4783, 1.4441, 1.3980, + 1.4687, 1.4535, 1.5312, 1.4153, 1.4329, 1.4587, 1.4845, 1.4896, 1.4546, + 1.4669, 1.3970, 1.6142, 1.5081, 1.4518, 1.4433, 1.4726, 1.5298, 1.4506, + 1.4577, 1.3823, 1.4399, 1.3515, 1.5149, 1.4061, 1.3733, 1.4142, 1.4033, + 1.4039, 1.4585, 1.5243, 1.2887, 1.4220, 1.3885, 1.3685, 1.5446, 1.4967, + 1.3723, 1.3086, 1.4316, 1.5888, 1.4014, 1.4572, 1.4306, 1.3706, 1.3952, + 1.4537, 1.3702, 1.3470, 1.4911, 1.3435, 1.4427, 1.3334, 1.4723, 1.4442, + 1.4298, 1.4902, 1.5190, 1.5378, 1.3713, 1.4796, 1.3502, 1.3679, 1.4412, + 1.4243, 1.4387, 1.4598, 1.4295, 1.3602, 1.4251, 1.3908, 1.4348, 1.3431, + 1.2924, 2.2007, 1.4737, 1.5134, 1.4332, 1.4222, 1.4245, 1.4097, 1.4176, + 1.4686, 1.4317, 1.3341, 1.4361, 1.4114, 1.4940, 1.4381, 1.4572, 1.4983, + 1.4450, 1.3799, 1.2985, 1.3759, 1.4010, 1.3972, 1.3854, 1.4735, 1.4090, + 1.4359, 1.4176, 1.1470, 1.3816, 1.4783, 1.4746, 1.4164, 1.4836, 1.3951, + 1.4659, 1.3982, 1.4574, 1.4124, 1.4343, 1.4261, 1.3920, 1.4123, 1.4116, + 1.4015, 1.4180, 1.4316, 1.3637, 1.3466, 1.3692, 1.5015, 1.4733, 1.4815, + 1.4506, 1.4205, 1.5465, 1.3860, 1.4599, 1.4610, 1.4319, 1.3867, 1.4277, + 1.4058, 1.5038, 1.3254, 1.4081, 1.4177, 1.5158, 1.1846, 1.4519, 1.4454, + 1.4294, 1.4399, 1.4583, 1.4504, 1.3279, 1.4267, 1.4588, 1.5297, 1.4256, + 1.4696, 1.3555, 1.3443, 1.4276, 1.4016, 1.4142, 1.5254, 1.4633, 1.4104, + 1.3277, 1.4271, 1.3069, 1.5239, 1.3506, 1.4531, 1.3500, 1.5408, 1.4959, + 1.4397, 1.4760, 1.4411, 1.4869, 1.3990, 1.3856, 1.3908, 1.4489, 1.3610, + 1.4872, 1.4184, 1.4228, 1.4071, 1.3704, 1.3442, 1.4610, 1.3600, 1.4608, + 1.3470, 1.4855, 1.3853, 1.4027, 1.4836, 1.5157, 1.4284, 1.4205, 1.5115, + 1.5605, 1.3542, 1.3765, 1.3660, 1.4906, 1.3845, 1.4881, 1.5181, 1.4794, + 1.3891, 1.3431, 1.3882, 1.5148, 1.4450, 1.4678, 1.3192, 1.3673, 1.4189, + 1.4483, 1.4242, 1.3967, 1.3293, 1.4184, 1.4218, 1.4289, 1.5523, 1.4417, + 1.3730, 1.4459, 1.4928, 1.4107, 1.4603, 1.4857, 1.4236, 1.4498, 1.4567, + 1.4311, 1.4568, 1.3501, 1.4661, 1.5013, 1.4657, 1.4661, 1.4130, 1.4475, + 1.4720, 1.4931, 1.4820, 1.4778, 1.5414, 1.5453, 1.6388, 1.3688, 1.4117, + 1.4119, 1.4349, 1.4636, 1.4066, 1.4177, 1.4517, 1.3720, 1.5074, 1.5475, + 1.4340, 1.3701, 1.4308, 1.4670, 1.6579, 1.4371, 1.4845, 1.4797, 1.3362, + 1.3730, 1.4448, 1.4329, 1.5366, 1.3915, 1.3983, 1.5381, 1.5082, 1.4285, + 1.4790, 1.4179, 1.4467, 1.3747, 1.4305, 1.4426, 1.3684, 1.4587, 1.5358, + 1.5565, 1.4905, 1.3312, 1.3485, 1.5212, 1.4619, 1.3691, 1.4415, 1.3194, + 1.3221, 1.3496, 1.4874, 1.5061, 1.3922, 1.2938, 1.4972, 1.4190, 1.4368, + 1.5890, 1.3690, 1.4215, 1.4396, 1.5313, 1.4531, 1.5123, 1.4330, 1.4697, + 1.3048, 1.3517, 1.4730, 1.5835, 1.3724, 1.4596, 1.4266, 1.4551, 1.4752, + 1.5286, 1.3820, 1.4288, 1.5007, 1.4162, 1.3557, 1.4200, 1.3324, 1.3406, + 1.4550, 1.4577, 1.5262, 1.3809, 1.3025, 1.4433, 0.3834, 1.5315, 1.4227, + 1.4417, 1.3479, 1.3955, 1.4660, 1.4970, 1.4552, 1.3603, 1.4358, 1.5342, + 1.4399, 1.4024, 1.3257, 1.4345, 1.4759, 1.3737, 1.3923, 1.3939, 1.4668, + 1.5472, 1.3777, 1.2935, 1.3241, 1.5129, 1.4817, 1.4110, 1.3857, 1.4226, + 1.4402, 1.4090, 1.3844, 1.4178, 1.4924, 1.3791, 1.4072, 1.4571, 1.4994, + 1.4460, 1.4087, 1.4387, 1.4671, 1.4868, 1.3603, 1.2742, 1.4785, 1.4911, + 1.5177, 1.3701, 1.4761, 1.4919, 1.4244, 1.3797, 1.4817, 1.3420, 1.5247, + 1.4447, 1.4205, 1.4528, 1.2608, 1.4320, 1.3855, 1.4680, 1.4616, 1.4392, + 1.4488, 1.5083, 1.6081, 1.4801, 1.4033, 1.4737, 1.5151, 1.4830, 1.4994, + 1.4744, 1.3404, 1.4201, 1.3889, 1.5779, 1.3445, 1.4092, 1.3881, 1.4861, + 1.4624, 1.2567, 1.4012], device='cuda:1', requires_grad=True)Parameter containing: +tensor([ 7.1926e-02, 9.8689e-02, -1.2303e-01, -2.5298e-02, 3.4331e-02, + 1.2279e-01, 7.0546e-02, 7.1920e-02, -1.2536e-01, 1.0271e-02, + 5.1308e-02, -2.0179e-02, -9.3063e-02, -2.3841e-02, -6.1843e-02, + -5.7950e-02, -2.1823e-02, -4.0402e-02, -4.8366e-02, 4.5095e+00, + -8.1055e-02, 3.5031e-02, 5.1826e-03, -8.3012e-03, -2.8866e-02, + 2.0101e-02, -1.2298e-02, 2.3551e-03, 2.6563e-02, -9.2846e-02, + 3.9427e-02, 7.3944e-03, 3.6174e-02, 7.0540e-02, 1.0127e-01, + -7.3228e-02, 1.6834e-02, -4.9488e-02, -1.2337e-01, 3.9531e-02, + 2.1712e-02, -6.2757e-03, -1.2862e-02, 2.7663e-02, -4.1635e-02, + 1.9606e-03, 4.4968e-02, -2.3781e-02, 7.2327e-03, 4.9955e-02, + 4.4026e-02, -1.3647e-02, 2.4914e-02, 4.7459e-02, 6.1771e-02, + 4.8846e-02, -9.5908e-02, -5.7176e-02, 3.2771e-02, 2.4921e-02, + 4.3089e-02, -5.9890e-02, 4.0627e-02, 3.5825e-02, 8.3920e-02, + -7.3116e-03, 6.8076e-02, 9.7614e-02, -3.1050e-03, 1.9702e-01, + 2.8901e-02, 3.4745e-02, 2.8137e-01, 1.2861e-01, -8.9629e-02, + 7.5863e-02, 5.6941e-02, -1.0249e-01, -5.3639e-02, 4.9301e-02, + 4.0120e-03, -3.8715e-02, -1.4343e-02, 1.1497e-01, 2.3780e-02, + 4.0012e-02, 1.2210e-01, 7.2994e-02, -1.2727e-02, 1.7420e-02, + 6.5306e-02, 7.4932e-03, 4.0247e-02, -5.5433e-02, -2.5412e-02, + 4.9268e-02, -1.1561e-02, -6.6962e-02, -5.6268e-02, -2.7171e-02, + 3.4363e-02, -1.0745e-01, 3.3880e-02, -6.8456e-02, 7.3153e-02, + 1.5285e-02, -1.0217e-02, -6.7162e-02, 2.9075e-02, -7.0045e-02, + -4.0472e-02, -2.5368e-02, 4.4549e-02, 5.6120e-02, 3.2623e-02, + -5.4795e-02, 2.6527e-02, -5.5896e-02, -1.6000e-02, 1.0200e-01, + -4.4206e-02, -1.1714e-01, -6.5153e-02, -1.2306e-02, -1.4154e-01, + 2.1956e-02, -1.1985e-01, -1.2669e-02, 2.0597e-02, -4.3890e-02, + -1.2176e-01, 4.0412e-02, -5.9347e-02, -2.5230e-02, -5.3073e-02, + 4.3119e-02, -3.1720e-02, -9.3149e-04, -4.3995e-02, -9.7696e-03, + 3.2038e-02, 1.4291e-03, 4.9957e-02, 4.6637e-02, 5.3679e-02, + -1.2209e-02, -1.9658e-02, -2.5437e-02, 5.6141e-03, 1.0341e-01, + 2.0419e-01, -5.7293e-02, -1.0318e-01, -1.5183e-03, -5.2509e-02, + -1.9216e-01, -4.7395e-02, -5.7462e-02, 4.7400e-02, 7.5755e-03, + 1.4958e-01, -1.2968e-01, 3.6259e-03, 2.0252e-02, 1.0480e-02, + 1.7640e-02, -4.4658e-02, 2.1554e-02, -4.3620e-02, 4.1143e-02, + -4.8213e-02, 6.6094e-02, 4.4460e-02, 4.8178e-02, 2.9246e-02, + -7.9401e-02, 1.5524e-02, 5.2811e-02, -5.4107e-02, 7.2643e-03, + -2.3653e-02, -6.1912e-02, 4.2795e-02, -3.0012e-02, 1.7330e-02, + -1.7558e-01, 3.3922e-02, -2.2670e-01, -3.1409e-02, -7.9016e-02, + 1.7179e-03, -1.7622e-02, 9.6026e-02, 5.8238e-02, -1.0110e-01, + 2.8476e-02, -1.9550e-02, -1.1975e-01, -3.5531e-02, 5.4143e-02, + -1.2903e-01, -2.2033e-02, -3.3625e-02, 2.7479e-02, 3.7191e-02, + 1.0223e-01, -1.0936e-01, 8.2673e-02, 1.2283e-01, 2.0124e-02, + 3.4005e-03, 1.8433e-01, -1.0718e-02, -4.5526e-02, -5.3880e-02, + -3.0684e-02, -3.0079e-02, 6.1851e-02, -1.0747e-01, -1.5114e-01, + -2.1672e-02, -3.4361e-02, 7.0627e-02, -1.8793e-02, 2.9893e-02, + -3.4514e-02, 8.0450e-02, -1.5180e-03, 1.1799e-01, -3.8760e-02, + -3.0789e-03, 1.6182e-02, -3.7646e-02, 7.8771e-02, -2.8594e-02, + 4.2433e-02, -6.7604e-02, 5.3816e-02, 6.7646e-02, 1.8357e-02, + 8.8243e-02, 6.2018e-02, 3.7490e-02, 5.1684e-02, 3.0710e-02, + 4.0863e-02, 7.8782e-02, -5.9085e-02, -4.1544e-03, -1.7084e-02, + 4.6805e-02, 4.1792e-04, 6.9342e-02, -7.3616e-02, 9.0404e-02, + -6.6740e-02, -6.1122e-02, -8.2978e-02, 3.7532e-02, 6.6200e-03, + -7.2245e-02, -5.4751e-02, -7.9822e-03, -1.0696e-01, -1.4899e-02, + 4.4273e-02, -2.2897e-02, -1.2173e-02, -7.6203e-02, -1.0068e-02, + -2.9959e-02, 4.3258e-02, 5.1004e-02, -4.1525e-02, 4.9007e-03, + 3.9915e-02, -1.2598e-01, 2.5546e-02, 3.7653e-02, -8.7241e-02, + 1.8375e-01, 1.8118e-01, -4.7049e-04, 2.6604e-02, 2.1197e-02, + 4.7143e-02, 8.6612e-02, -5.3318e-02, 1.3800e-01, -9.7973e-02, + -9.6148e-03, -2.5540e-02, 1.1848e-01, 3.4482e-02, 4.3729e-02, + 2.8187e-03, 1.2743e-01, 5.8442e-02, 7.1207e-03, 3.7814e-02, + 3.9278e-02, 2.5866e-02, -2.0183e-02, 1.0628e-01, 6.9019e-03, + 6.3032e-03, 8.7620e-02, -6.0641e-02, 8.8681e-02, -3.2896e-02, + 4.7297e-02, 1.7168e-02, -5.5006e-02, -6.5152e-02, 3.4500e-04, + -9.7153e-02, 1.8775e-03, 4.4805e-02, 1.4104e-01, -4.3080e-02, + -8.6769e-02, 1.9695e-01, -8.2901e-02, -2.9552e-02, 1.0799e-01, + -2.1573e-01, 3.3186e-02, 4.6826e-02, 6.3347e-02, -1.4726e-01, + 1.4905e-01, -3.3903e-02, 1.3529e-01, -4.4907e-02, 3.0754e-02, + 8.4923e-02, -1.1759e-02, -5.3177e-02, 1.8132e-03, 1.7820e-03, + -7.6560e-02, 3.6070e-02, -1.5777e-02, -3.2811e-02, 9.8611e-02, + 4.6915e-02, 6.5758e-02, 6.3504e-02, 3.6216e-02, 1.3813e-02, + -6.7687e-02, 4.9432e-02, 1.5750e-02, 2.6505e-04, -4.4287e-02, + -4.7489e-02, -4.3150e-02, 2.3563e-02, 7.1404e-03, -7.6061e-02, + 4.5241e-02, 8.6304e-03, 3.8552e-02, -7.6021e-02, -1.8180e-02, + -4.3500e-02, 4.0450e-02, 1.1016e-01, -7.8001e-03, 3.5547e-02, + 8.0275e-02, -4.0487e-02, -7.4191e-02, -9.4242e-03, -2.3875e-02, + 1.7037e-02, 8.3679e-02, 7.0398e-02, -1.3054e-02, -1.6851e-02, + -1.0129e-01, 7.7011e-02, 1.8186e-02, -1.5612e-02, -2.3688e-02, + -1.5667e-02, -3.0407e-02, -6.1633e-02, -7.1531e-02, -5.1729e-02, + 2.9326e-02, -3.0786e-02, -5.9341e-02, -1.3668e-03, 3.4147e-02, + 8.1444e-04, 1.3175e-02, 2.3305e-02, -8.5761e-03, 6.6633e-02, + 1.0857e-01, -4.4033e-02, 6.4443e-02, 4.3963e-02, -1.0024e-01, + -1.2691e-01, -6.5507e-02, 3.2509e-02, -1.7542e-02, -2.8070e-02, + -6.9461e-02, 5.4981e-02, 1.0495e-01, -7.2139e-02, -4.5150e-02, + -1.3194e+00, -2.1716e-02, 1.6033e-02, -2.0474e-02, -1.4458e-02, + -1.7178e-02, 5.0754e-03, 4.1188e-03, -5.8843e-02, -6.4931e-02, + 3.5215e-02, -1.2734e-01, -1.0457e-01, 3.7082e-02, -3.0661e-02, + -8.1785e-02, -6.2548e-02, -2.9710e-02, 6.4331e-02, 7.1621e-02, + -2.1610e-02, 2.6836e-02, 5.8310e-02, 1.0695e-01, -7.6603e-02, + -2.8282e-02, -5.2149e-02, -5.0183e-02, 1.3031e-01, 1.0361e-01, + -1.1657e-01, -1.3109e-03, -7.0763e-02, 1.3312e-01, 1.7073e-02, + -6.3133e-03, 7.3985e-03, -1.0757e-01, 4.0777e-02, 5.6490e-02, + 1.1158e-02, 1.7426e-02, 3.9671e-02, -1.4881e-02, -8.9648e-02, + 3.4174e-02, 4.1025e-02, -8.3607e-02, 1.4398e-02, -3.9054e-03, + -1.1896e-02, 3.6092e-02, -6.1125e-02, 7.3290e-02, 2.2875e-02, + 2.2129e-02, -1.0867e-01, -5.2400e-02, 5.9108e-03, -4.7872e-02, + 4.4676e-02, 1.6246e-01, 8.7665e-02, 7.1226e-03, 9.3367e-02, + 1.0172e-01, -1.0781e-01, 2.2360e-02, 1.4627e-01, -7.5511e-02, + -3.8149e-02, -1.0550e-02, 1.2566e-01, 8.7934e-02, 2.7694e-02, + 4.6231e-02, 9.8059e-03, 1.1057e-02, -1.2549e-02, -1.2867e-01, + 3.2761e-02, -6.6936e-02, 5.8947e-02, -3.9928e-02, 2.3036e-02, + 2.4065e-02, -3.8741e-02, 5.7200e-02, -4.8267e-02, -5.7932e-02, + 4.2671e-02, 1.0091e-01, 6.2769e-02, 1.2278e-02, 4.0642e-02, + -9.1265e-02, -1.0523e-02, -4.5142e-02, 5.2563e-02, -7.1983e-02, + 4.8271e-03, 5.9733e-02, 1.5306e-01, 1.2165e-01, -7.3451e-02, + -4.4362e-02, -5.2985e-03, 1.2389e-01, -1.8968e-02, -4.3705e-02, + -7.5750e-03, 1.7417e-03, -1.3828e-02, 6.1293e-03, -6.8898e-02, + 2.5053e-02, 3.2660e-02, -2.2871e-02, -6.5013e-02, -3.3281e-02, + 1.6938e-02, 5.2703e-03, 1.7473e-02, -4.0848e-02, -6.3855e-02, + -4.4330e-02, 1.2664e-02, -1.8142e-02, -3.0737e-03, 3.5341e-04, + -3.6272e-02, 6.4434e-02, 8.5610e-03, 5.5707e-02, 3.1020e-02, + 5.4425e-02, -1.0614e-02, -6.8428e-02, 5.0324e-02, -2.5363e-02, + -1.7314e-02, 2.0876e-02, -3.0069e-02, -9.7373e-03, 8.5768e-02, + -6.7738e-02, 1.0936e-01, 2.3543e-02, 2.8994e-02, 1.1516e-01, + 3.2016e-02, 2.9510e-02, 1.9065e-02, 5.2079e-02, 4.7913e-02, + 6.7791e-02, 1.1473e-02, -8.5581e-02, -3.5063e-02, 7.8980e-02, + 2.3897e-02, 8.8964e-03, 8.0085e-02, 4.4945e-02, -2.8048e-02, + -1.1211e-01, 1.1271e-03, 6.7566e-02, 1.0283e-01, 8.2337e-02, + 1.4015e-02, 3.6052e-02, -8.8143e-02, 3.8836e-02, -1.3229e-03, + 2.0368e-02, 1.0822e-01, 1.0346e-01, -4.8797e-02, 7.9168e-03, + -3.0540e-02, 4.3786e-02, 3.0310e-02, -2.8381e-02, 4.6088e-02, + -8.1171e-02, -9.7811e-02, -4.9416e-03, 2.1832e-02, 1.2438e-02, + -4.1425e-02, 7.7462e-02, -7.7581e-02, 4.9547e-02, -4.6468e-02, + 7.3378e-02, 1.7769e-02, 3.7301e-04, -2.8760e-02, -3.4329e-02, + -9.3701e-02, 2.8186e-02, 7.1374e-02, 2.5266e-02, -1.4843e-02, + 6.4047e-02, 1.9724e-04, 2.2431e-02, 2.5484e-02, 2.5640e-02, + -3.9437e-02, 5.5480e-03, -2.8331e-02, 5.9693e-02, -2.6961e-02, + 1.3684e-01, -7.8911e-03, 8.4968e-02, 1.5320e-01, 3.1721e-02, + 2.1825e-02, -2.4057e-02, -9.8858e-03, 6.1091e-02, -6.6224e-02, + 1.3962e-02, -3.9338e-02, 5.9961e-02, -3.8697e-02, 3.3387e-02, + 7.4055e-03, 3.7146e-02, 7.8271e-02, 5.5880e-02, 1.4089e-01, + 9.3110e-02, -1.4280e-02, -4.0142e-02, 6.0164e-02, -7.6656e-02, + 4.5689e-02, -1.0814e-01, -3.5491e-02, 4.5264e-02, 9.2623e-02, + -7.1723e-02, 1.7291e-02, -8.7315e-02, -4.0728e-03, 1.0796e-01, + 3.3997e-02, -9.8876e-02, -8.9155e-02, 3.8369e-02, 1.9530e-02, + -1.9117e-01, -2.0658e-03, 6.6779e-03, 8.1699e-03, -9.0752e-03, + 4.4134e-02, -3.9304e-02, -5.1453e-02, -1.0062e-02, 6.5434e-02, + -4.0342e-02, -4.5351e+00, -2.3207e-02, 5.0064e-02, -2.5201e-02, + -7.1243e-02, -4.5385e-02, -2.5975e-02, -3.4780e-02, 4.8646e-02, + -2.3921e-02, -2.9032e-02, 1.5368e-02, -8.8112e-03, -2.5050e-02, + 5.5610e-02, -1.1592e-02, -6.9792e-02, 3.3603e-02, -4.6731e-02, + 1.1831e-01, 8.9346e-02, -1.9387e-02, -1.4286e-02, -1.2412e-01, + -3.5152e-02, 4.5515e-02, 5.9119e-02, 1.7479e-02, 6.7957e-02, + -1.4066e-02, 7.6105e-02, 6.1042e-02, 4.2792e-02, 2.3038e-02, + 1.0391e-02, 1.9895e-02, 8.4799e-03, -8.5925e-02, 1.9332e-02, + 4.1033e-03, -1.8517e-02, 5.1354e-02, -8.4071e-02, 5.3309e-02, + 3.8044e-03, -2.0082e-01, 9.7535e-02, 4.2757e-02, 8.7049e-02, + 9.1052e-02, 3.9801e-02, 6.2336e-02, 7.5325e-02, -3.0500e-03, + -2.3021e-02, 1.4764e-02, -6.1970e-03, 2.1045e-03, -5.7605e-02, + 8.1457e-02, -1.2928e-01, -1.1268e-01, 7.0443e-02, -3.6264e-02, + -3.0411e-02, -1.2364e-01, -4.2091e-02, 7.2788e-03, -5.1890e-02, + -4.4106e-02, -4.7509e-03, -6.6056e-02, -4.5752e-02, 7.1066e-02, + -1.1775e-02, 1.1221e-01, 1.0023e-01, 5.5698e-02, -4.2404e-02, + 3.4032e-03, 3.7870e-02, 7.1071e-02, -6.0725e-02, -7.0095e-02, + -7.1618e-02, 8.7326e-02, -1.1954e-04], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[-0.0018, -0.0035, 0.0047, ..., -0.0104, 0.0071, -0.0345], + [-0.0061, -0.0015, -0.0073, ..., -0.0022, -0.0246, -0.0080], + [-0.0041, -0.0014, -0.0101, ..., -0.0074, 0.0381, -0.0051], + ..., + [ 0.0136, 0.0115, -0.0352, ..., -0.0105, 0.0295, 0.0026], + [ 0.0275, 0.0076, -0.0009, ..., 0.0037, -0.0102, -0.0048], + [-0.0325, -0.0094, -0.0329, ..., 0.0203, 0.0127, 0.0209]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.3206, 0.0299, -0.2832, ..., -0.3774, -0.0878, -0.3206], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0119, 0.0111, -0.0122, ..., 0.0184, 0.0008, 0.0128], + [ 0.0061, 0.0051, 0.0010, ..., -0.0092, -0.0083, -0.0078], + [ 0.0028, 0.0019, -0.0141, ..., -0.0223, 0.0200, -0.0158], + ..., + [-0.0324, 0.0039, 0.0036, ..., -0.0004, 0.0073, -0.0153], + [-0.0048, 0.0068, 0.0207, ..., 0.0054, 0.0054, 0.0070], + [-0.0170, 0.0005, 0.0025, ..., -0.0155, 0.0081, 0.0126]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 4.6204e-02, 5.8441e-02, -5.1392e-02, 1.8127e-02, -1.4503e-02, + 1.1530e-01, 7.2876e-02, -1.4977e-02, 5.1849e-02, -2.3407e-02, + 6.6528e-02, 4.4891e-02, -9.1919e-02, -5.7007e-02, 5.9967e-02, + 2.0325e-02, -2.2049e-02, 2.5101e-02, -2.0569e-02, -7.3242e-02, + 3.3478e-02, 5.7281e-02, -9.8419e-03, -8.0017e-02, 6.3232e-02, + -4.4342e-02, -1.3260e-02, 3.1464e-02, -6.7932e-02, 1.5965e-03, + -2.5314e-02, 5.3833e-02, -5.8044e-02, 5.3284e-02, 3.6621e-02, + 1.6907e-02, -5.3741e-02, -6.6589e-02, 2.3026e-02, 9.1675e-02, + -2.7908e-02, -1.7334e-02, 5.2734e-02, 2.9926e-03, -2.6947e-02, + 1.2520e-02, 3.7933e-02, 4.8340e-02, -1.3135e-01, -1.7252e-03, + -3.8872e-03, 3.7537e-02, -1.6800e-02, 4.6600e-02, 5.3894e-02, + 1.0391e-02, -7.0740e-02, -1.1070e-02, 1.3390e-02, -1.5808e-02, + 4.9286e-02, -1.4046e-02, -6.4545e-03, -4.1962e-02, -2.0508e-02, + 2.2232e-02, 6.1859e-02, 2.9922e-02, 9.7885e-03, -4.4525e-02, + -7.2002e-04, 1.0872e-02, 3.9642e-02, -1.8005e-03, -9.3567e-02, + -3.4668e-02, 1.4938e-02, -5.6992e-03, 8.4915e-03, -1.9348e-02, + -2.7542e-02, 5.0850e-03, -5.3589e-02, -5.9319e-03, -4.7569e-03, + -8.8272e-03, 3.0289e-02, -5.6458e-03, 2.9011e-03, -6.5063e-02, + -7.7454e-02, 7.1289e-02, 8.5754e-03, -3.6087e-03, -1.3557e-02, + 6.9153e-02, 7.7087e-02, 2.4429e-02, -2.4673e-02, -1.0025e-02, + 2.5177e-02, -1.5161e-01, 4.2419e-02, -8.7402e-02, -5.4535e-02, + 2.6260e-02, 2.1763e-03, 3.0106e-02, 4.6501e-03, -1.1963e-02, + 7.1983e-03, 1.1719e-01, 5.6671e-02, 5.1758e-02, 2.3102e-02, + -4.7180e-02, 2.0309e-02, -7.3090e-03, -1.1462e-01, -7.7858e-03, + -1.0674e-02, -4.4342e-02, -4.5593e-02, -1.9928e-02, -5.2673e-02, + -6.5918e-02, -5.4749e-02, -5.9418e-02, 1.8753e-02, 5.8167e-02, + 1.4366e-02, 7.4890e-02, -9.4482e-02, -5.0049e-02, 5.6946e-02, + 7.3914e-02, 2.0340e-02, -3.3627e-03, -9.0515e-02, 1.0352e-01, + -2.6215e-02, 3.2440e-02, 5.1544e-02, 2.4673e-02, 6.7282e-04, + -2.1332e-02, -2.5415e-04, 2.0615e-02, 2.0390e-03, 8.2825e-02, + 3.0243e-02, -4.2558e-04, 4.9683e-02, 4.6326e-02, -8.1970e-02, + 9.2316e-03, -8.6487e-02, 6.5247e-02, 1.1981e-04, -2.8229e-03, + 4.2542e-02, -1.0938e-01, -1.5732e-02, 8.0872e-03, -2.2781e-02, + 2.0920e-02, -1.6833e-01, -2.4734e-02, -1.9852e-02, 8.9783e-02, + -2.3376e-02, 7.5623e-02, -4.9133e-02, -3.3386e-02, -3.0334e-02, + -8.6182e-02, 2.4261e-02, -4.0039e-02, 2.0599e-02, 1.3298e-02, + 2.4902e-02, -6.8665e-02, -1.3580e-02, 2.7344e-02, 3.2837e-02, + -4.9210e-03, -5.4077e-02, 2.8174e-01, 5.2887e-02, -6.7444e-02, + 6.1150e-03, 2.0996e-02, 1.3037e-01, 1.1176e-01, -9.5520e-02, + 5.8838e-02, -1.4839e-03, -6.2347e-02, 1.8814e-02, 1.4618e-02, + -4.5815e-03, -5.8136e-02, 1.5396e-02, 5.5634e-02, 5.2002e-02, + 9.8694e-02, -6.1981e-02, 4.0710e-02, -1.0803e-02, 2.3403e-03, + 1.3390e-02, 2.5005e-03, 3.7354e-02, 5.5450e-02, -5.9357e-02, + -7.2571e-02, -2.1042e-02, 8.5083e-02, -5.7129e-02, -1.6895e-01, + 9.1003e-02, 3.0670e-03, 4.8370e-03, 4.2877e-02, 5.7983e-03, + -1.3132e-03, -6.2561e-02, -1.9104e-02, 6.4270e-02, 2.6672e-02, + 1.8631e-02, 5.1361e-02, -5.6244e-02, -1.5594e-02, -3.5858e-02, + -4.8737e-02, -2.8534e-02, 1.6953e-02, -4.9957e-02, 1.0632e-01, + -5.7411e-03, 2.0050e-02, -3.8849e-02, 1.4210e-03, -6.3972e-03, + -2.9724e-02, 3.4271e-02, -8.4290e-02, -1.4397e-02, 1.6800e-02, + 2.3911e-02, -3.8422e-02, -4.4861e-03, -1.2952e-01, 1.7334e-02, + 1.6861e-03, -4.2175e-02, 9.4177e-02, -5.9204e-02, -3.0533e-02, + -1.4380e-01, -3.2959e-02, -1.0811e-02, -1.7792e-02, -6.7139e-02, + -3.5126e-02, 1.6022e-02, -2.6794e-02, 1.9394e-02, 3.3203e-02, + -1.4549e-02, -1.9501e-02, 1.5244e-02, -1.7181e-02, -7.9041e-02, + 9.0271e-02, 4.2633e-02, 7.7637e-02, 6.4087e-02, -8.1421e-02, + 8.1482e-02, 1.4046e-02, -4.6661e-02, -4.1412e-02, -1.3275e-02, + 4.4342e-02, 2.3346e-02, -5.0262e-02, 5.8746e-02, -7.6050e-02, + 5.1758e-02, -3.5553e-02, 8.8739e-04, -9.7351e-02, 2.1912e-02, + 1.2993e-02, 7.8613e-02, -1.4999e-02, 5.1422e-02, 5.5115e-02, + 4.3526e-03, -4.4830e-02, -9.6863e-02, 5.5969e-02, -4.8706e-02, + 5.9776e-03, 5.9906e-02, -1.4717e-02, 3.5278e-02, -2.7069e-02, + 2.9343e-02, -2.4090e-03, -4.6539e-02, -3.1799e-02, 6.6162e-02, + -1.7212e-02, 3.7781e-02, 1.9287e-02, 5.4016e-02, -3.1948e-03, + -9.8572e-03, -2.2186e-02, -3.3600e-02, 1.7838e-02, 2.9468e-01, + -2.1094e-01, 7.1411e-02, -2.4445e-02, 3.2501e-02, -1.4282e-01, + 6.5369e-02, -4.2450e-02, 3.3142e-02, -1.7761e-02, -5.8380e-02, + -2.8343e-03, -4.4708e-03, -8.4839e-03, 3.1647e-02, -2.3941e-02, + -4.8035e-02, 5.1910e-02, 2.4338e-02, 8.9264e-03, 3.7872e-02, + 6.0303e-02, -6.6162e-02, 5.1239e-02, 3.4943e-03, -7.2632e-02, + 1.5488e-02, 4.9622e-02, 4.0375e-02, -8.3313e-03, -5.2582e-02, + -2.9663e-02, -9.0027e-03, 1.3245e-02, -2.8702e-02, -8.6823e-03, + -3.5461e-02, -5.5450e-02, -1.6113e-02, -2.6245e-02, -5.1788e-02, + 2.9434e-02, -3.0579e-02, 7.8979e-02, 8.5571e-02, -1.0269e-02, + 4.1046e-02, -3.1204e-02, -6.1523e-02, -2.6520e-02, -8.5831e-03, + -8.5602e-03, 7.6660e-02, 5.7404e-02, -1.4267e-02, -4.5052e-03, + 1.1078e-02, 4.4556e-02, -5.9906e-02, -2.7466e-02, -8.3801e-02, + 9.2125e-04, -2.0523e-02, 8.5526e-03, 9.3889e-04, -1.0284e-01, + -8.7891e-02, 3.8971e-02, 3.6591e-02, 1.4114e-02, -3.1128e-02, + 6.0028e-02, -2.4994e-02, 6.2561e-02, 3.0350e-02, -5.2307e-02, + 1.6602e-02, 7.7576e-02, -2.0691e-02, 2.5986e-02, -6.1218e-02, + -3.5736e-02, 2.9236e-02, 5.2155e-02, -2.7573e-02, 3.2166e-02, + -3.6865e-02, 2.3148e-02, -5.7159e-02, -3.4546e-02, 3.0212e-03, + -9.9976e-02, -3.1021e-02, -1.0002e-02, 5.6213e-02, 1.6800e-02, + -7.2327e-02, -2.2507e-03, 2.1133e-02, -8.6853e-02, -5.5176e-02, + 5.8289e-02, 2.2949e-02, -4.5532e-02, 8.5938e-02, 6.1768e-02, + -1.0492e-01, 7.0435e-02, 8.5205e-02, 3.3020e-02, 8.1909e-02, + -9.2163e-03, 3.7292e-02, 1.9791e-02, 6.0577e-02, -6.4697e-02, + -9.0256e-03, -2.2507e-02, 1.0323e-02, -6.9092e-02, 3.5706e-02, + -3.5492e-02, 2.2415e-02, -2.1896e-02, 5.7678e-02, -2.4536e-02, + 2.3712e-02, -2.2385e-02, -8.9661e-02, 7.8064e-02, 5.8807e-02, + 3.7903e-02, -3.4760e-02, -1.6693e-02, -1.4915e-02, -5.1544e-02, + -3.3741e-03, -1.2665e-02, -8.1055e-02, -2.2537e-02, -1.7977e-03, + 1.5495e-02, 5.0507e-02, -5.6885e-02, 2.3560e-02, 3.1830e-02, + -2.3315e-02, -7.5134e-02, -7.4585e-02, 3.1281e-02, -7.7576e-02, + 1.1047e-01, 3.3142e-02, 6.2469e-02, 2.0046e-03, 3.7506e-02, + 2.1027e-02, 1.6602e-02, 5.1849e-02, 4.5807e-02, -6.7871e-02, + -1.8753e-02, 9.8495e-03, 4.0741e-02, -1.4687e-02, -1.0059e-01, + 4.0466e-02, 4.2847e-02, -3.2776e-02, 4.2419e-03, -7.5134e-02, + 5.6854e-02, -2.2797e-02, 5.1208e-02, -1.0303e-01, 2.5650e-02, + -3.6530e-02, -2.0542e-03, 2.3987e-02, -2.2217e-02, -2.7466e-02, + -4.6082e-02, 6.6467e-02, 7.7393e-02, -3.6652e-02, -1.0638e-01, + -6.0242e-02, -8.0200e-02, -5.3223e-02, 1.3268e-02, 5.2376e-03, + -4.8187e-02, -2.8870e-02, 4.0070e-02, -2.7695e-02, -6.3599e-02, + 3.1166e-03, 5.7678e-02, 3.4088e-02, -9.8324e-04, 1.9257e-02, + -1.5821e-03, -1.0948e-02, -5.5023e-02, -1.3626e-02, -5.9387e-02, + 3.8574e-02, -4.7852e-02, 3.1872e-03, -7.1655e-02, 5.0323e-02, + 5.2277e-02, -3.2776e-02, -3.9124e-02, -4.0955e-02, -4.5532e-02, + -4.1504e-02, 1.0522e-01, 2.8778e-02, 1.4038e-02, -1.4252e-02, + 6.8970e-03, 2.5620e-02, -2.8400e-03, 5.7617e-02, 5.7129e-02, + -8.1909e-02, -1.7700e-02, -3.0426e-02, -3.9337e-02, -5.2673e-02, + 2.1652e-02, 1.8539e-02, -7.2632e-02, -4.7180e-02, 6.1279e-02, + -5.0629e-02, 3.3264e-02, -5.4626e-02, -5.1727e-02, 6.0120e-02, + 6.1401e-02, -1.6281e-02, 5.2338e-02, 6.8115e-02, -3.2837e-02, + 8.8806e-02, -7.4120e-03, -4.5929e-02, 2.6871e-02, 2.0828e-02, + 2.6703e-02, 6.1493e-03, 2.7985e-02, -2.2278e-03, -2.0447e-02, + -4.0802e-02, 3.4790e-02, 7.5623e-02, 8.7524e-02, 8.7219e-02, + 3.7327e-03, 1.9341e-03, 6.0692e-03, 6.4331e-02, 7.9346e-02, + -2.7557e-02, -1.8860e-02, 1.7929e-02, -4.6539e-03, 2.0950e-02, + -1.1307e-02, -8.0383e-02, -9.9548e-02, -1.2985e-02, 1.4694e-02, + -1.4982e-03, -1.2039e-02, -4.5990e-02, 3.1403e-02, 3.7720e-02, + 8.4000e-03, 1.8600e-02, -2.5253e-02, 2.2232e-02, 2.4734e-02, + 5.6877e-03, 1.8463e-02, 3.3722e-02, 7.6294e-02, -1.8784e-02, + -4.1504e-02, -3.3386e-02, 7.5989e-02, -3.5034e-02, 6.9641e-02, + 4.2786e-02, 2.3697e-02, 2.8458e-02, 1.9760e-02, 4.0222e-02, + -7.4646e-02, -1.6129e-02, 2.0325e-02, 1.0529e-01, -1.8158e-02, + 1.5137e-01, 9.3384e-03, 1.5114e-02, 6.5552e-02, 9.2010e-03, + 6.3782e-03, 6.5880e-03, 1.6492e-01, 4.3976e-02, 4.2206e-02, + 3.6652e-02, -3.6621e-02, -1.8311e-02, 1.0323e-02, 5.4817e-03, + 1.4816e-02, -9.3445e-02, 3.4729e-02, 4.1595e-02, 2.5360e-02, + 3.9337e-02, 7.1167e-02, 3.4821e-02, 5.8441e-02, 1.0474e-01, + 6.0272e-02, -9.9792e-02, -7.5195e-02, 7.1655e-02, 1.0089e-01, + -5.3192e-02, 5.8441e-02, -1.1896e-01, -4.9805e-02, 9.4833e-03, + 9.4116e-02, -1.4209e-01, -5.2246e-02, 1.5942e-01, 4.7852e-02, + -4.1351e-03, 5.0598e-02, 7.8796e-02, 3.9520e-02, -5.8517e-03, + -2.4399e-02, -3.5370e-02, -8.7219e-02, -2.7390e-02, 9.0210e-02, + 5.2094e-02, -6.2622e-02, -2.3300e-02, 3.0960e-02, -6.3416e-02, + 8.2886e-02, -1.1761e-01, -4.1412e-02, 1.4107e-02, 3.2227e-02, + -1.3374e-02, -9.9121e-02, -2.8076e-03, 7.0557e-02, -2.4384e-02, + 1.1920e-01, 6.6650e-02, -6.0059e-02, 3.6041e-02, -3.1235e-02, + 5.3528e-02, -3.6621e-02, 7.0068e-02, -3.5736e-02, -1.0880e-02, + -6.6467e-02, 2.5543e-02, -8.5402e-04, -3.6346e-02, -9.3384e-03, + 1.8234e-02, -5.7404e-02, -3.2501e-03, -5.9448e-02, -3.1738e-02, + -9.0637e-02, -5.0476e-02, 7.0862e-02, 2.3178e-02, -1.5820e-01, + -1.4290e-02, -3.4904e-04, 1.1505e-02, -2.6077e-02, -9.8343e-03, + -3.0182e-02, -2.0691e-02, -1.1168e-03, -4.4800e-02, 5.0751e-02, + -2.8564e-02, 1.4658e-03, 7.2083e-02, 4.0649e-02, 2.2446e-02, + 3.7415e-02, 3.5400e-02, -3.0701e-02, 2.8976e-02, -5.6671e-02, + -1.3855e-02, -5.4016e-02, -6.5552e-02, 1.2970e-02, -6.0730e-02, + -1.8860e-02, -1.0938e-01, 9.8114e-03, 2.0390e-03, 3.4576e-02, + 3.3207e-03, -5.9319e-03, -1.3916e-01, 6.0791e-02, 6.5186e-02, + 1.5007e-02, 5.7281e-02, -1.7487e-02, 6.9824e-02, 2.9663e-02, + -5.3375e-02, 1.5266e-02, -7.2510e-02, -2.8275e-02, -1.1635e-02, + -2.3514e-02, 9.7885e-03, 4.0588e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([2.1482, 2.0761, 2.1921, 2.0451, 2.0384, 2.1436, 2.2704, 2.1227, 2.1465, + 2.1898, 2.1549, 2.0869, 2.1795, 2.0904, 2.1714, 2.0272, 2.1504, 2.0925, + 2.1178, 1.2856, 2.1856, 2.1374, 2.2333, 2.0119, 2.2004, 2.1451, 2.0892, + 2.0631, 2.2006, 2.0803, 2.1321, 2.0761, 2.1056, 2.0743, 2.1243, 2.1250, + 2.3369, 2.1272, 2.0290, 2.1546, 2.0374, 2.3274, 2.2738, 2.1767, 2.0974, + 2.1418, 2.1897, 2.0832, 2.0704, 2.1172, 2.1430, 2.1572, 2.1444, 2.1279, + 2.2470, 2.0093, 2.0840, 1.9981, 2.0446, 2.0698, 2.1134, 2.1441, 2.1823, + 2.1272, 2.0367, 2.0682, 2.1908, 2.1000, 2.0956, 1.7824, 2.1168, 2.1184, + 2.3859, 2.0491, 2.1741, 2.1228, 2.0877, 2.1016, 2.2688, 2.0134, 2.1942, + 2.1419, 2.0865, 2.0894, 2.1286, 2.2371, 2.1443, 2.0431, 2.0828, 2.1831, + 2.0309, 2.0924, 2.0745, 2.1408, 2.1419, 2.1016, 2.2139, 2.1779, 2.1671, + 2.1297, 2.2277, 2.0503, 2.1542, 2.1528, 2.0703, 2.1439, 2.2227, 2.1981, + 2.0713, 2.0392, 2.0976, 2.1198, 2.0589, 2.1613, 2.1716, 2.0782, 2.0527, + 2.0557, 2.0537, 2.1340, 2.1217, 2.0609, 2.3008, 2.1750, 2.1099, 2.2135, + 2.1483, 2.1244, 2.0733, 2.0288, 2.1141, 2.1998, 2.0138, 2.1505, 2.3035, + 2.1225, 2.2076, 2.0718, 2.0643, 2.1111, 2.1190, 2.0362, 2.1451, 2.1716, + 2.0588, 2.1940, 2.1055, 2.1166, 2.1894, 2.0712, 2.1387, 2.1944, 2.2454, + 2.2819, 2.1488, 1.3095, 2.1511, 2.0310, 2.0929, 2.1536, 1.9961, 2.1454, + 2.2574, 2.0555, 2.1161, 2.2521, 2.1017, 2.1226, 2.0426, 2.0752, 2.1164, + 2.1192, 1.8822, 2.1522, 2.0504, 2.1629, 2.1790, 2.3046, 2.1391, 2.1411, + 2.4045, 2.2437, 2.0769, 2.1762, 2.0932, 2.0806, 2.0046, 0.9834, 2.0615, + 2.1874, 2.0295, 2.1896, 2.1249, 2.1741, 2.1469, 2.2090, 2.3059, 2.0889, + 0.8082, 2.0433, 2.0004, 2.0083, 2.1260, 2.0278, 2.1430, 2.1549, 2.2318, + 2.2448, 1.9585, 2.0712, 2.0533, 2.0480, 2.1845, 2.0333, 2.2601, 2.1605, + 2.1659, 2.1481, 2.2122, 2.1820, 2.1369, 2.1664, 1.9730, 2.2248, 2.0468, + 2.2257, 2.2819, 2.1141, 2.1859, 2.0820, 2.1872, 2.1288, 2.1822, 2.1780, + 2.0478, 2.2415, 2.1621, 2.1453, 2.0071, 2.1323, 2.1880, 2.3021, 2.1457, + 2.2072, 2.1449, 2.1183, 2.0404, 2.2815, 2.2627, 2.1645, 2.2278, 2.1534, + 2.0938, 2.1091, 2.1209, 2.0492, 2.1297, 2.1816, 2.1060, 2.2206, 2.0955, + 2.1179, 2.1007, 2.2277, 2.1673, 2.0951, 2.2219, 2.1363, 2.0325, 2.1666, + 2.0490, 2.0837, 2.0539, 2.1483, 2.1213, 2.0532, 2.1494, 2.2184, 2.3024, + 2.1308, 2.1234, 1.9768, 2.2508, 2.1202, 2.0864, 2.1375, 2.2378, 2.1481, + 2.1342, 1.9919, 2.1629, 2.1637, 2.2153, 2.1035, 2.1337, 2.0895, 2.1282, + 2.1316, 1.9982, 2.0586, 2.1836, 2.1714, 2.1560, 2.1255, 2.2053, 2.1329, + 2.1358, 2.1552, 2.1542, 2.1948, 2.1878, 2.1202, 2.0789, 2.1483, 2.0329, + 2.2126, 2.1098, 2.2082, 2.0599, 2.2092, 2.3170, 2.2484, 2.1793, 2.0508, + 0.5618, 2.0714, 2.1515, 2.1989, 2.1049, 2.0814, 2.0987, 2.2290, 2.1709, + 2.2567, 2.1159, 2.0255, 2.2227, 2.0735, 2.0707, 2.0808, 2.3608, 2.1141, + 2.1577, 2.2603, 2.2280, 2.0188, 2.2263, 2.0251, 2.3628, 2.1262, 1.9121, + 2.3010, 2.1140, 2.3798, 2.0306, 2.1233, 2.0576, 2.1311, 2.1088, 2.0206, + 2.1556, 2.0028, 2.1734, 2.0158, 2.1590, 2.0562, 2.1899, 2.1982, 2.1236, + 2.0886, 2.2811, 2.1128, 2.1131, 2.1598, 2.0060, 2.0154, 2.1563, 2.2502, + 2.1250, 2.0527, 2.0428, 1.9846, 2.1653, 2.1090, 2.1451, 2.2003, 2.1499, + 2.2526, 2.2141, 2.0655, 2.0515, 2.1719, 2.1375, 2.1248, 2.0989, 2.0976, + 2.0913, 2.1493, 2.2613, 2.2130, 2.0378, 2.2520, 2.1286, 2.1573, 2.1093, + 2.0292, 2.1074, 2.0239, 2.1475, 2.1301, 2.1112, 2.1901, 2.0577, 2.1609, + 1.4774, 2.2200, 2.2076, 2.0265, 2.1652, 2.0973, 2.2043, 2.0817, 2.2479, + 2.1600, 2.0770, 2.0933, 2.0445, 2.0840, 2.1304, 2.1265, 2.1881, 2.0445, + 2.1241, 2.1748, 2.2234, 2.0541, 2.2300, 2.1778, 2.0552, 2.2086, 2.0418, + 2.0995, 2.0420, 1.7627, 2.0249, 2.0225, 2.1123, 2.2335, 2.1247, 2.0972, + 2.0920, 2.1411, 2.2878, 2.1152, 2.0844, 2.1943, 2.1201, 2.1608, 2.0665, + 2.0956, 2.1365, 2.1061, 2.1110, 2.1114, 2.1407, 2.0190, 2.1246, 1.9944, + 2.2389, 2.1340, 2.1952, 2.0701, 2.1126, 2.1158, 2.3171, 2.0932, 2.1448, + 2.2950, 2.0981, 2.1204, 2.1288, 2.2271, 2.0780, 1.4625, 2.2277, 2.1814, + 2.1414, 2.0223, 2.0474, 2.2824, 2.1625, 2.0740, 2.0709, 2.1278, 2.2366, + 2.0778, 2.1140, 1.9411, 1.9732, 2.2932, 2.1656, 2.2058, 2.1473, 2.0715, + 2.1679, 2.1443, 2.0592, 2.1594, 2.1587, 2.2206, 2.0880, 2.0784, 2.1948, + 2.1170, 2.3211, 2.1703, 2.1656, 2.1462, 2.0380, 2.1215, 2.2290, 2.1520, + 2.2105, 2.0404, 2.0373, 2.1508, 2.1466, 2.0975, 2.1173, 2.2048, 2.0858, + 2.1657, 2.1193, 2.1691, 2.1388, 2.2129, 2.0910, 2.0813, 2.2194, 2.0774, + 2.1017, 2.2044, 2.1490, 2.1446, 2.3251, 2.2174, 2.1969, 2.1872, 2.2394, + 2.0567, 2.2579, 1.9658, 2.0598, 2.1581, 2.0006, 2.0074, 2.2322, 2.1349, + 2.1261, 2.0935, 2.2362, 1.5636, 2.0990, 2.0930, 2.2388, 2.1674, 2.0623, + 2.1734, 2.1535, 2.0805, 2.0350, 2.1338, 2.1778, 2.1319, 2.2243, 2.2794, + 2.1139, 2.2005, 2.1549, 2.0232, 2.0834, 2.0726, 2.2218, 2.0496, 2.1193, + 2.0845, 2.1427, 2.0182, 2.3726, 2.0258, 2.1590, 2.2770, 2.1841, 2.2523, + 2.1467, 1.9906, 2.0301, 2.1457, 2.1497, 2.0760, 2.0481, 2.1327, 2.1243, + 2.2556, 2.1431, 2.1105, 2.1889, 1.7540, 2.1039, 2.1062, 2.0953, 2.1475, + 2.1238, 2.1812, 2.2103, 2.1678, 2.1608, 2.0309, 2.1152, 2.1129, 2.2841, + 2.0309, 2.0734, 2.1454, 2.2304, 2.0570, 2.1209, 2.2711, 2.1286, 2.1101, + 2.0775, 2.0758, 2.0920, 2.2181, 2.2613, 2.1191, 2.1776, 2.1671, 2.1099, + 2.2056, 2.1635, 2.0298, 2.2926, 2.1715, 1.9039, 2.0213, 2.0237, 2.1132, + 2.0098, 2.1750, 2.2415, 2.1563, 2.1910, 2.0880, 2.1238, 2.2616, 2.1590, + 2.0323, 2.0179, 2.1277, 1.8232, 2.1558, 2.2524, 2.2178, 2.1622, 2.0681, + 2.1036, 2.1641, 2.1510, 2.2235, 2.0144, 2.0947, 2.0730, 1.9855, 2.2212, + 2.1229, 2.1985, 2.2897, 2.1890, 2.0744, 2.0369, 2.3364, 2.1039, 2.1677, + 2.1267, 2.2754, 2.2022, 2.0778, 2.1968, 2.0938, 2.2225, 1.9913, 2.1763, + 2.2320, 2.0717, 2.1027, 2.1216, 2.2761, 2.2161, 2.1315, 2.0953, 2.0402, + 2.3112, 2.1639, 2.3642, 2.1207, 2.2583, 2.0666, 2.0418, 2.1478, 2.1192, + 2.0907, 2.1516, 2.1368, 2.1209, 2.2247, 2.0873, 2.2266, 2.1265, 2.4356, + 2.1106, 2.2100, 2.3024, 2.1876, 2.2123, 2.0296, 1.6260, 2.0849, 2.0714, + 1.9995, 2.2396, 2.2255, 2.0709, 2.1770, 2.1812, 2.1527, 2.0534, 2.0879, + 2.1128, 2.3070, 2.1067, 2.1437, 2.0932, 2.1928, 2.0556, 2.0341, 2.2084, + 2.2778, 2.3196, 2.1044, 2.0765, 2.2309, 2.3135, 2.2170, 2.0869, 2.1127, + 2.2343, 2.2393, 2.2178, 2.0545, 2.0631, 2.1401, 2.0484, 1.9397, 2.1904, + 2.0942, 2.1266, 2.2787], device='cuda:1', requires_grad=True)Parameter containing: +tensor([-4.0474e-01, 5.0409e-01, -6.2095e-01, 1.9046e-01, -9.3106e-01, + 4.3668e-01, 6.2783e-01, -7.8965e-03, -8.5588e-01, -3.0263e-01, + -6.0073e-01, 7.6827e-02, -4.7316e-01, -8.7310e-02, -6.6791e-01, + -3.7563e-03, 1.8902e-01, -2.1686e-01, -6.6348e-01, -1.3281e+00, + 1.0986e-01, 3.3849e-01, 7.3085e-01, -3.8962e-01, 5.8070e-01, + 1.0180e-02, -3.1196e-01, 6.9664e-03, -8.0248e-01, 6.6610e-01, + 3.4659e-01, 1.4230e-01, 5.1505e-01, -2.6895e-01, -3.3652e-01, + -5.5124e-01, -5.4981e-01, -2.2390e-01, 7.7914e-01, 4.4125e-01, + -2.5739e-01, 6.8878e-01, 8.3604e-01, 3.8809e-01, -5.0993e-01, + -5.5098e-01, -8.5939e-01, -4.3050e-01, 3.6157e-02, -2.1150e-01, + -4.7758e-01, -6.4244e-01, 7.0543e-01, 4.7260e-01, -7.1362e-01, + 1.1644e+00, 2.3792e-01, -1.3141e-01, -1.7051e-01, -2.5365e-02, + -5.8489e-01, 6.5227e-01, 5.4440e-01, -3.2898e-01, 1.4602e-01, + 3.5652e-01, 5.1635e-01, -5.1022e-01, -4.3654e-01, -1.4454e+00, + 2.9734e-01, 9.4037e-02, 1.0461e+00, 2.1383e-01, -3.6251e-01, + -2.5680e-01, 5.1107e-01, 7.0932e-01, 4.6205e-01, -2.5158e-01, + 1.7436e-01, 1.6938e-01, -2.8672e-01, 7.2591e-01, -2.8540e-01, + -7.8770e-01, 4.9259e-01, 2.1408e-01, 1.3567e-01, -5.2772e-01, + -6.7128e-02, 4.5592e-01, 7.4472e-02, -7.0644e-02, -5.8231e-01, + 2.4495e-01, 4.7405e-01, -1.7291e-01, -6.3612e-01, 6.2670e-01, + 4.6047e-01, -6.5939e-01, 3.9048e-01, -3.0125e-01, 1.1632e-01, + 1.0360e+00, 7.5709e-01, 4.1743e-01, 1.6902e-01, -3.8525e-01, + 8.5621e-01, 3.0732e-01, -2.1138e-01, 2.7196e-01, 3.5186e-01, + -3.2635e-01, 7.3482e-01, 4.2488e-01, 7.4492e-01, 1.4549e-01, + -2.1434e-01, -3.5687e-01, -7.2550e-01, -6.6136e-01, 1.2367e-01, + -4.1577e-01, -5.3205e-01, -6.2603e-02, -1.9586e-01, -3.7930e-01, + -4.2071e-01, -9.0787e-01, 4.8806e-02, -4.6018e-01, 7.3300e-01, + 5.1992e-01, 7.1370e-01, 1.6186e-01, 2.5509e-01, -8.3998e-02, + -1.7691e-01, 4.6683e-01, -4.6551e-01, -3.3841e-01, -4.4711e-01, + -1.8539e-01, 2.9682e-01, -2.4576e-01, -2.1410e-01, -3.5070e-03, + 6.3886e-01, -7.4631e-01, 6.3523e-01, 5.2012e-01, -7.1093e-01, + 1.0273e+00, -5.6964e-02, -2.7703e-01, -2.2630e-01, -3.6590e-01, + -4.7491e-02, 5.2619e-01, 7.8483e-01, -3.9002e-01, 5.5755e-02, + 6.2525e-01, -3.9933e-01, 3.6984e-01, -4.2966e-02, -4.1367e-02, + 4.2210e-01, 1.4972e-01, -2.6309e-01, -2.4747e-01, -1.9582e-01, + 4.4209e-01, 3.6193e-01, -4.4070e-01, -5.1436e-01, 1.3340e-01, + -1.2178e+00, -7.4455e-01, 1.2202e-01, 4.9822e-01, 5.3379e-01, + 7.0351e-01, -1.7865e-01, 1.1071e+00, -5.1895e-01, -6.5553e-01, + -3.6015e-01, -4.4857e-01, 2.3724e-01, 5.4664e-01, 4.1513e-01, + 4.0819e-01, 1.0514e+00, 2.3382e-01, 9.6579e-01, 1.0408e-01, + -1.6920e-01, 1.0829e-01, 5.4694e-01, 1.5864e-02, 8.3642e-02, + 2.5477e-01, 8.8037e-01, 8.7462e-01, -8.6983e-02, 1.2242e-01, + -1.5298e-01, 6.3101e-02, -6.2515e-01, 5.4305e-01, 6.4796e-01, + -3.3765e-01, -7.2310e-01, -1.3832e-01, -6.2350e-01, 7.3444e-01, + 4.6106e-01, -2.6191e-01, -7.5841e-01, 4.4694e-01, 3.0497e-01, + -2.4138e-01, -5.6582e-01, -2.6223e-01, 2.7859e-01, -6.5636e-02, + -4.4158e-01, 3.0139e-01, 6.6060e-01, 8.8885e-01, -5.2302e-01, + -6.2983e-01, 3.8733e-01, 3.4346e-01, 2.3819e-01, -2.2995e-03, + 9.1729e-01, 7.6446e-01, 1.9985e-01, -4.2040e-01, 4.0301e-01, + -3.7577e-01, -3.5067e-01, -8.2791e-01, -8.2184e-01, 8.2123e-01, + -5.7524e-01, 3.0567e-01, 4.3731e-01, -1.4166e-01, 6.3945e-01, + -2.2943e-02, 5.7065e-01, -1.1292e+00, -2.9721e-01, -5.3010e-02, + 4.5299e-02, -2.3854e-01, -4.6227e-01, 3.1692e-01, -3.5330e-01, + -2.6094e-01, 8.2341e-01, 2.6003e-01, 5.0662e-03, 2.1076e-01, + 2.9852e-01, 4.6640e-01, 9.2121e-02, -7.7412e-01, -3.7900e-01, + -1.8495e-01, -1.1545e-01, 7.3789e-01, 3.2838e-01, 4.5115e-01, + -3.8806e-01, -1.0646e+00, -7.5262e-01, 2.5062e-01, -2.5364e-01, + 6.6377e-01, -5.8219e-01, 3.3624e-01, 2.8649e-01, 3.9229e-01, + -6.2032e-01, 4.4817e-01, 1.0572e+00, -2.2192e-01, 1.5616e-01, + -4.2898e-01, -5.9908e-01, -3.8231e-01, 4.8874e-01, 1.5247e-01, + -8.2282e-01, 8.2276e-01, -4.6885e-01, -2.2112e-01, -3.1448e-01, + 6.1385e-01, 6.0871e-02, 6.5478e-01, -6.8870e-01, -1.8069e-01, + 6.0595e-01, -2.1569e-01, -6.8886e-02, -2.1633e-01, 2.6627e-02, + 9.1286e-01, 3.7838e-01, 4.4088e-01, -1.9911e-02, 5.2531e-01, + -1.0447e+00, -7.8353e-01, 6.1983e-01, -3.1507e-01, 6.4922e-01, + -6.7502e-01, 4.6014e-01, -4.8093e-01, 4.9097e-01, -5.0731e-01, + 3.2845e-01, -4.6035e-01, 5.1567e-02, -7.1382e-01, -1.1038e-02, + 5.6548e-01, -8.6373e-01, 3.8686e-01, 4.7987e-01, -2.6022e-01, + 1.0529e+00, -3.4670e-01, -5.7703e-01, 9.2351e-01, -1.0424e+00, + 3.6434e-01, -5.1920e-01, -2.0621e-01, -5.7194e-01, -1.3815e-01, + -1.5458e-01, 7.8982e-01, -2.1564e-01, 1.4026e+00, -9.9102e-02, + -4.7951e-01, -1.3557e-01, -3.1940e-01, -4.9115e-01, -1.7228e-01, + -4.8218e-01, -2.9346e-01, -9.5089e-02, -1.2865e-01, 7.9052e-01, + -2.9325e-02, -3.4700e-01, 7.1176e-01, -1.4633e-01, 4.0358e-01, + 6.3262e-01, -1.9452e-01, 1.9849e-01, -5.0373e-01, -4.7863e-01, + -4.8630e-01, -5.2785e-01, 7.3041e-01, 3.3524e-01, -3.8826e-01, + -3.6228e-01, -6.5662e-01, -2.4258e-01, -6.7619e-02, 2.2568e-01, + -8.4740e-01, -5.3083e-02, 1.0252e+00, 3.3464e-01, -1.1910e-01, + 3.3992e-01, -2.7331e-01, 5.2138e-01, 7.7297e-01, -6.8481e-01, + 9.7702e-02, -5.2441e-01, -6.2938e-02, -7.9591e-01, -4.0645e-01, + 2.8333e-01, 3.2968e-01, -4.2421e-01, -4.1324e-01, 8.1571e-01, + -4.2995e-01, -3.8089e-01, 4.6637e-01, -4.0341e-01, 6.3807e-01, + 6.2462e-01, 5.8128e-02, 3.2726e-01, 4.4414e-01, 1.1882e+00, + -2.5828e+00, 2.7348e-01, -3.0446e-01, 1.2585e-01, 1.3078e-01, + -3.7640e-01, -4.2410e-01, 5.0738e-01, -2.4688e-01, -2.9159e-01, + 2.6960e-01, 4.7148e-01, -4.0152e-01, 2.0195e-01, 4.5138e-01, + -7.5467e-01, -8.1379e-01, -8.9726e-02, 4.4020e-01, 6.0406e-01, + -1.6593e-01, -6.8744e-01, 8.2714e-01, 2.3347e-03, -4.2319e-01, + 8.7532e-02, 3.5544e-01, -2.7449e-01, -2.0140e+00, 4.4710e-02, + -8.1419e-02, -4.1267e-01, 7.1687e-01, 4.5125e-01, -4.8160e-01, + -3.1118e-01, -1.5726e-01, -6.9149e-01, 7.0766e-01, -2.8900e-01, + 6.8034e-01, -3.8809e-01, -5.0950e-01, 1.0280e-01, 3.8500e-01, + -3.3726e-01, 4.1365e-01, -4.9350e-01, -3.2800e-02, -5.9575e-01, + 1.5721e-01, 1.9718e-01, 1.7395e-01, 8.4402e-01, -4.4557e-01, + -2.3550e-01, 1.6015e-01, -4.7043e-01, 1.8386e-01, -5.2999e-01, + -3.7753e-01, 9.7203e-04, -9.8730e-01, 2.4090e-01, 3.2854e-01, + -3.6399e-03, 5.6957e-01, 2.5796e-01, 3.6204e-01, 7.4543e-01, + -8.7224e-01, -2.4659e-01, 2.6866e-01, 4.1437e-01, -6.1573e-01, + -2.1760e-02, 2.4079e-01, 2.4883e-01, -1.2891e-01, -5.7589e-01, + -1.9098e-01, -5.2836e-01, 8.7724e-02, 8.1355e-01, 1.0105e+00, + 2.3312e-01, -5.4144e-01, -6.5276e-01, -2.7310e-01, 4.0889e-01, + 6.8733e-01, 2.3444e-01, 2.6451e-01, -4.0372e-01, -4.0228e-01, + -6.9071e-01, -9.1914e-02, -7.1768e-01, -3.9160e-01, 9.2464e-01, + 1.1736e-01, 9.9555e-01, -3.4335e-01, 6.8837e-02, 1.0661e-01, + 9.6046e-01, 1.9288e-01, 7.9258e-01, 5.5334e-03, 2.6709e-01, + -3.5906e-01, 1.0510e+00, 4.9558e-01, 1.5815e-01, 7.9056e-01, + -2.9611e-02, -4.7788e-01, -7.4477e-02, 7.0278e-01, 2.0944e-01, + -9.9172e-01, 2.9514e-02, -3.4982e-01, 5.8798e-01, 1.0294e-01, + -7.4728e-02, 4.0436e-01, -7.3965e-01, 1.1608e-01, 9.7668e-01, + 5.6120e-01, 6.9853e-01, -1.1240e+00, -8.3312e-01, 2.2103e-02, + -6.2014e-01, -1.0602e-01, -1.7596e-01, 5.0554e-01, -5.1151e-02, + -1.7238e-01, 6.7374e-01, -3.0443e-01, -4.7714e-01, 6.0438e-02, + 5.7751e-01, 2.2188e-01, 5.4365e-02, 6.0409e-01, 2.7528e-01, + 1.4016e-01, -3.6410e-01, 2.8936e-01, 7.7458e-01, 1.7155e-01, + -3.6958e-01, 4.1315e-01, 5.7424e-01, -3.4738e-01, 9.6110e-01, + 1.0386e+00, -2.5240e-01, 6.7732e-02, -3.1303e-01, 2.3925e-01, + 2.4488e-01, -2.8734e-01, 6.3480e-01, -2.5157e-01, 2.4225e-01, + -4.3591e-02, -2.9242e-01, -7.2588e-01, 1.2189e+00, 1.8182e-01, + -3.0529e-01, -2.6477e-01, 8.6763e-01, -8.6587e-01, -4.1271e-01, + 6.5369e-02, -2.0068e-01, 2.9647e-02, 5.5062e-01, 1.5341e-01, + -3.3696e-01, 1.0672e-01, 9.7665e-02, -8.4515e-01, -9.6087e-02, + 1.6451e-01, 6.5034e-03, -8.0948e-01, 1.7525e-01, 1.7383e-01, + -3.1217e-01, 5.1740e-01, -4.0527e-01, 2.9478e-01, 6.3920e-01, + 5.8840e-01, 4.5157e-02, 2.6356e-01, -5.4925e-02, -4.1065e-01, + 7.8009e-01, -1.6714e-01, -2.3860e-01, 7.1367e-01, 4.4418e-01, + 3.1620e-01, -2.0515e-01, 8.4948e-01, 3.2021e-01, 7.0564e-02, + 1.7485e-02, -2.4201e-01, -6.2400e-01, 7.2354e-01, 6.4737e-01, + -3.9444e-01, -4.5600e-01, 1.8392e-01, 1.3942e-01, -6.9550e-01, + 5.6519e-01, -3.4485e-01, -1.0811e+00, 2.2609e-03, -7.7264e-01, + -1.7520e-01, 2.9658e-01, 3.4381e-01, -2.5195e-01, 8.1881e-01, + 6.2122e-01, 1.8847e-01, 6.6781e-01, -2.9861e-01, -1.6733e-01, + 8.1848e-01, 5.7276e-01, 2.9802e-01, -5.0510e-01, 1.6306e-01, + 2.1710e-01, 8.4910e-01, 8.8421e-01, -4.5161e-01, -4.3119e-01, + -6.4349e-01, 4.1669e-01, -6.1655e-01, 1.6923e-01, -6.1267e-01, + -7.0699e-02, 4.5768e-01, 4.9780e-02, 5.7231e-01, 9.1744e-01, + -4.6267e-01, -1.1711e-01, 1.1242e+00, 8.5179e-01, -9.9988e-02, + 9.1609e-02, 2.5796e+00, -6.0888e-01, -2.4259e-01, -1.4821e-01, + 6.9903e-01, -4.4285e-01, -1.1718e-01, 4.0595e-01, -4.6315e-01, + -6.0281e-01, 2.5561e-01, -6.3263e-01, -8.4082e-01, -2.2050e-01, + 3.4140e-01, -5.1675e-01, 9.8693e-01, -9.4655e-01, 2.5957e-01, + -3.4720e-01, 5.9071e-01, 6.8600e-01, -3.4691e-01, 8.6725e-01, + 8.1034e-01, -6.1649e-01, -1.9221e-01, -6.0154e-01, 4.3298e-01, + 2.1760e-01, -5.0489e-02, 2.7929e-01, -3.3724e-01, 3.8897e-03, + -2.4199e-01, 7.9453e-02, 6.1785e-01, -4.8319e-01, -6.2845e-01, + -2.7386e-01, -4.2604e-01, -4.6069e-01, -6.7830e-01, -2.9960e-01, + -1.0792e-01, 9.2764e-01, -1.7870e-01, 4.6413e-01, -2.7450e-01, + -5.8849e-01, -4.7641e-01, -1.3057e-01, 5.1836e-01, 2.7529e-02, + 4.1431e-01, 1.7077e-01, 5.5491e-01, -9.9315e-02, -9.0158e-01, + 4.0775e-01, 2.8138e-01, 5.2781e-01, -2.2875e-01, 3.6168e-01, + 6.1371e-02, -3.9472e-01, 6.2913e-01, 9.8502e-01, 6.8541e-01, + 3.2804e-01, -4.7059e-01, -7.8782e-01, 4.4955e-01, -5.3380e-02, + 1.4579e-01, -5.4171e-01, -5.6868e-01, 9.9710e-01, 5.0651e-01, + 1.2785e-02, -6.9887e-01, 1.9614e-01, 4.3504e-01, 8.8311e-01, + 3.1585e-01, 2.0645e-01, 6.1529e-01], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[-0.0039, -0.0192, -0.0103, ..., 0.0052, 0.0099, 0.0056], + [-0.0152, -0.0052, -0.0067, ..., -0.0012, -0.0065, -0.0135], + [ 0.0042, 0.0065, 0.0006, ..., 0.0062, 0.0171, 0.0149], + ..., + [-0.0145, -0.0176, 0.0174, ..., -0.0138, -0.0058, -0.0098], + [ 0.0048, 0.0245, 0.0091, ..., -0.0059, 0.0057, -0.0145], + [-0.0083, 0.0048, -0.0003, ..., -0.0104, 0.0195, 0.0123]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.3210, 0.2307, -0.0475, ..., 0.0169, -0.0356, -0.0140], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 5.7831e-03, -8.9035e-03, 5.7757e-05, ..., 6.5651e-03, + -8.6365e-03, -6.7825e-03], + [-1.5414e-04, -4.9782e-03, 2.2430e-02, ..., -2.0050e-02, + -1.9369e-03, 7.0000e-03], + [-1.2711e-02, -7.1526e-03, -3.1647e-02, ..., 6.2637e-03, + -2.0340e-02, 1.4626e-02], + ..., + [-6.7062e-03, 5.0068e-04, -8.3008e-03, ..., 3.5477e-03, + 2.7447e-03, -2.1606e-02], + [ 2.0172e-02, -1.5497e-03, -1.4412e-02, ..., 5.5504e-04, + -1.2497e-02, 7.7095e-03], + [-5.1003e-03, 1.3168e-02, -4.6082e-03, ..., -8.7051e-03, + -2.3022e-03, 1.5236e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([ 2.8961e-02, 1.2787e-02, 9.3918e-03, -4.4594e-03, -2.9327e-02, + -1.2665e-02, -1.4824e-02, -5.9021e-02, -2.2614e-02, 4.1718e-02, + 1.3733e-02, -3.5095e-02, -3.7689e-02, -3.9642e-02, -2.7161e-03, + -2.5986e-02, 9.3460e-03, 2.6489e-02, -1.2917e-02, 4.2389e-02, + -2.0542e-03, 3.3264e-02, -7.0419e-03, -2.8320e-02, -4.1840e-02, + 5.9853e-03, 2.1957e-02, 2.8915e-02, 5.2856e-02, 2.3895e-02, + 6.1655e-04, 3.9032e-02, 1.3557e-02, 1.1131e-02, -1.9007e-03, + 4.7516e-02, 1.2726e-02, 3.2593e-02, 2.2568e-02, 3.3508e-02, + 2.0615e-02, -2.6016e-02, 1.1467e-02, -4.1313e-03, 2.7771e-02, + 6.9702e-02, 3.0533e-02, 8.4305e-03, 2.9709e-02, 1.2260e-02, + -1.7212e-02, -1.8600e-02, 2.7863e-02, 1.4679e-02, 6.6833e-02, + 6.7566e-02, -6.8817e-03, 8.4152e-03, 1.9699e-02, 9.3155e-03, + 2.4200e-02, -1.6876e-02, 1.3756e-02, 3.3142e-02, 4.0710e-02, + 1.9028e-02, 1.0834e-02, -1.3260e-02, -2.3842e-03, -6.6589e-02, + -2.7679e-02, 1.1187e-03, -5.0049e-02, -3.3020e-02, -1.6129e-02, + 2.2812e-02, 8.4839e-03, 2.3689e-03, -6.3354e-02, -3.5706e-03, + -1.9455e-02, -3.4142e-03, 3.6530e-02, -1.0811e-02, 1.4145e-02, + 2.9541e-02, 2.4918e-02, 2.1378e-02, -2.0828e-02, 3.6011e-02, + -5.6458e-04, 1.9064e-03, 1.2054e-02, -1.9730e-02, 2.1027e-02, + -1.3870e-02, -2.9053e-02, 6.4049e-03, -4.4525e-02, -2.8152e-02, + 1.3695e-03, 3.7842e-02, -3.5675e-02, 1.4519e-02, 3.6182e-03, + -7.3280e-03, 3.5858e-02, 2.4017e-02, 4.1351e-02, 3.2776e-02, + -6.4011e-03, 1.7242e-02, -1.5612e-03, 3.1311e-02, 2.5665e-02, + -6.5651e-03, 1.8250e-02, 1.1436e-02, 1.9958e-02, 2.0828e-02, + 3.8849e-02, -3.1082e-02, -4.8798e-02, -5.4993e-02, 3.0411e-02, + 3.0441e-02, 2.0538e-02, -3.9581e-02, -8.4534e-03, 4.3335e-03, + 6.3629e-03, -1.4992e-02, 5.9776e-03, 1.7700e-02, -2.4979e-02, + 5.0690e-02, 3.2063e-03, -3.2990e-02, -2.0859e-02, -6.4880e-02, + 1.0536e-02, 1.4133e-03, -2.2858e-02, 6.4468e-03, 2.6779e-03, + -1.7410e-02, -2.5543e-02, 5.3329e-03, 2.0813e-02, 3.3905e-02, + 6.7177e-03, 2.1088e-02, 4.3091e-02, 4.9164e-02, 2.2106e-03, + 8.6914e-02, -4.0970e-03, -1.0353e-02, 2.5604e-02, -2.4490e-02, + 8.5754e-03, 4.3945e-03, 3.2013e-02, 2.2629e-02, -1.7807e-02, + -1.9806e-02, -1.8753e-02, -1.6403e-02, -4.0649e-02, 6.3934e-03, + -2.0584e-02, 2.7649e-02, -3.3600e-02, 1.2939e-02, 2.8732e-02, + 5.0888e-03, 8.4915e-03, -3.0380e-02, -7.9193e-03, 2.8717e-02, + 4.5135e-02, -2.2354e-02, -2.2537e-02, 1.6815e-02, 1.0452e-02, + 5.0087e-03, -3.0308e-03, 4.1924e-03, -1.5198e-02, -2.9793e-03, + 7.6660e-02, -1.6541e-02, -2.8168e-02, -7.1220e-03, -3.7689e-02, + -2.5192e-02, -4.6310e-03, -4.5624e-02, -4.1260e-02, 5.1842e-03, + -1.3863e-02, 1.6113e-02, 2.9465e-02, -3.0327e-03, 1.0544e-02, + -1.9348e-02, 4.7394e-02, -1.9409e-02, -6.8054e-03, -3.7689e-02, + -1.0872e-02, 2.6123e-02, 2.0203e-02, 2.8961e-02, 4.1656e-03, + 2.5513e-02, 2.0142e-02, 7.9498e-03, 2.9739e-02, -7.3624e-03, + -1.0452e-02, -5.0507e-03, -2.3865e-02, 2.8076e-02, 1.7365e-02, + -5.5206e-02, 2.0554e-02, -4.1870e-02, 1.3680e-02, 1.2794e-02, + 1.3763e-02, 3.4698e-02, 1.2520e-02, -1.4160e-02, -2.3804e-02, + -5.1636e-02, 2.7771e-02, -2.7023e-02, 3.0563e-02, -1.5541e-02, + 2.4582e-02, -2.0844e-02, 2.2354e-02, -2.4078e-02, 4.7951e-03, + 5.1514e-02, 2.8427e-02, 6.7383e-02, -1.3474e-02, -3.5065e-02, + 3.7212e-03, 5.2277e-02, 1.4748e-02, -3.3264e-02, 2.5116e-02, + 5.3215e-03, -1.2360e-02, -9.1782e-03, -2.6123e-02, -1.5993e-03, + -3.6469e-02, 2.7084e-02, 9.0027e-03, -7.9498e-03, -4.9805e-02, + 2.5620e-02, 5.5027e-04, 4.5715e-02, -2.8946e-02, 3.6697e-03, + -1.2688e-02, 1.5526e-02, -5.7869e-03, -5.4413e-02, -1.0658e-02, + -7.0076e-03, 4.3427e-02, -5.3406e-02, 3.8666e-02, 1.4244e-02, + 3.2837e-02, 1.4427e-02, 1.3985e-02, -1.3618e-02, 6.5460e-03, + -2.6169e-02, -9.1858e-03, 2.1744e-02, 3.4790e-02, -4.5868e-02, + 3.6983e-03, -3.9520e-02, 5.7869e-03, 1.7136e-02, -7.0524e-04, + 3.6072e-02, -2.9099e-02, 1.3741e-02, -5.5786e-02, 1.8707e-02, + 1.7776e-02, 1.4793e-02, 3.0563e-02, -1.1759e-03, -7.0343e-03, + -1.8616e-02, -7.0477e-04, -3.0151e-02, 2.7039e-02, -1.0996e-03, + 4.2839e-03, 1.9363e-02, 1.9821e-02, -7.0686e-03, 2.9221e-02, + -2.9312e-02, -4.6692e-02, -4.4708e-02, 1.7822e-02, 3.0777e-02, + 2.0050e-02, 3.2135e-02, -1.4893e-02, 2.1439e-02, 3.8623e-01, + -7.0129e-02, -9.4788e-02, -5.1849e-02, 1.3657e-02, -5.4817e-03, + 2.8809e-02, 1.1253e-02, -9.3842e-03, 2.5925e-02, -2.6611e-02, + 9.3231e-03, -2.2339e-02, -1.2421e-02, 2.6398e-02, 1.1768e-03, + -4.1199e-02, 9.4757e-03, -2.1606e-02, -3.9093e-02, 3.1113e-02, + -4.8279e-02, 5.3986e-02, 4.7264e-03, -4.7668e-02, 2.0275e-03, + -4.2648e-03, -5.8990e-02, 5.2063e-02, 1.6937e-02, 7.5951e-03, + -9.3536e-03, 1.3672e-02, -2.8553e-03, -1.3069e-02, 3.7628e-02, + -2.0187e-02, 1.5152e-02, -5.2734e-02, 1.3647e-03, 4.1290e-02, + 1.5991e-02, 1.1473e-03, 6.7253e-03, -1.7715e-02, 4.7516e-02, + -1.0345e-02, -3.4058e-02, -1.2016e-02, -4.4861e-02, -1.4091e-02, + 3.7933e-02, -3.1647e-02, -4.6814e-02, 7.5798e-03, 7.0000e-03, + -6.3660e-02, 4.2480e-02, -4.9011e-02, -3.1586e-02, -1.9226e-02, + 1.3418e-03, -4.6692e-03, -3.4027e-02, 1.7273e-02, 6.9275e-03, + -1.3649e-02, 7.4654e-03, 2.4170e-02, 1.0431e-04, 6.6223e-02, + -2.2598e-02, 1.8219e-02, 5.1147e-02, 1.7303e-02, -4.0405e-02, + -6.0730e-03, -3.3691e-02, -1.9012e-02, 7.7019e-03, 2.1496e-03, + -6.3591e-03, -3.7003e-03, -2.0294e-02, 9.7179e-04, -1.9089e-02, + 4.1931e-02, -2.8214e-02, 1.6708e-02, -2.8549e-02, 3.8177e-02, + -1.2573e-01, 1.0773e-02, 1.9058e-02, 2.6188e-03, 2.8412e-02, + 2.8366e-02, 2.4776e-03, 1.2312e-03, -1.3908e-02, 1.8539e-02, + -3.2440e-02, 1.6312e-02, 1.5778e-02, 1.7212e-02, -6.2637e-03, + 1.9257e-02, 1.8768e-02, -1.8646e-02, 4.2877e-02, 7.7332e-02, + 6.6872e-03, -2.4597e-02, -1.7700e-02, 2.2888e-02, -2.9144e-02, + 3.9032e-02, -1.5167e-02, 1.4137e-02, -1.0522e-01, -3.0396e-02, + -2.9755e-02, 4.9553e-03, -4.7302e-03, 6.8359e-03, 2.0172e-02, + -1.7578e-02, -1.6190e-02, -6.3820e-03, 1.4679e-02, 2.4231e-02, + -2.3590e-02, 1.4351e-02, -4.9667e-03, 3.7964e-02, -2.4857e-02, + -4.9713e-02, 5.2002e-02, -1.8738e-02, -1.4105e-03, -1.1841e-02, + -4.4464e-02, -2.4124e-02, -6.8909e-02, -3.9978e-02, 3.4285e-04, + -1.1314e-02, 3.5262e-04, -7.2365e-03, -3.4210e-02, -1.7502e-02, + -9.1629e-03, 6.1005e-02, 4.6967e-02, 8.7662e-03, 4.1840e-02, + 4.6783e-02, -1.5045e-02, -5.5725e-02, -1.1307e-02, 9.8953e-03, + 2.8076e-02, -3.3783e-02, -1.9240e-04, -6.7101e-03, 6.9094e-04, + 2.4429e-02, 1.6830e-02, -3.5492e-02, 5.9624e-03, -1.6205e-02, + -2.9663e-02, -8.1100e-03, 1.1604e-02, 1.9363e-02, -1.1749e-02, + 7.3910e-05, -4.3793e-02, -3.8910e-02, 7.4005e-03, 1.4145e-02, + -8.0109e-03, 1.2535e-02, 2.8973e-03, 1.5526e-02, -3.7476e-02, + 8.3313e-03, 3.2196e-02, 4.2084e-02, 1.5343e-02, 3.4149e-02, + -6.2141e-03, -1.6312e-02, 2.6016e-02, 3.3051e-02, -4.0771e-02, + 2.5650e-02, 6.4964e-03, -2.9785e-02, -3.1403e-02, -3.9703e-02, + -1.0674e-02, -5.3101e-02, -3.0121e-02, 6.3972e-03, -2.8152e-02, + 6.0394e-02, -5.6427e-02, 4.2358e-02, -1.8692e-02, 2.8595e-02, + 2.9587e-02, -5.4512e-03, 3.2257e-02, -2.7817e-02, 7.8125e-03, + -1.3382e-02, 3.1891e-03, 7.9117e-03, 3.7537e-03, -3.0460e-03, + -5.0964e-02, 1.0399e-02, 2.2995e-02, -5.2109e-03, -6.6986e-03, + -4.9561e-02, 6.8932e-03, 6.7139e-03, 2.4902e-02, 1.6083e-02, + -8.6975e-04, -2.6413e-02, 2.0828e-02, 2.1286e-02, -1.0796e-02, + -1.5564e-03, -5.8655e-02, 1.8890e-02, -7.1907e-03, -2.8702e-02, + -6.8626e-03, 1.5297e-02, -3.2135e-02, -4.1931e-02, -1.3748e-02, + 4.0131e-03, -5.4779e-03, -7.3509e-03, 5.8258e-02, -3.4515e-02, + -2.0142e-02, -8.4763e-03, -4.4922e-02, 2.3926e-02, -2.6932e-02, + 2.0294e-02, 4.2084e-02, 1.6983e-02, -5.3467e-02, -2.2415e-02, + -4.5052e-03, -1.5137e-02, 1.5381e-02, 3.7231e-02, -1.3514e-03, + 3.6163e-03, 2.1103e-02, 2.0142e-02, 5.3040e-02, 3.6377e-02, + -7.5989e-03, 5.5008e-03, -1.7075e-02, -2.9892e-02, 1.0551e-02, + -3.6163e-02, -1.8738e-02, -5.9433e-03, -2.8381e-02, -2.8671e-02, + 1.4938e-02, 2.2995e-02, -5.3589e-02, 2.4155e-02, -2.2415e-02, + 6.8359e-03, -1.9028e-02, -3.9902e-03, -3.0121e-02, -2.0981e-03, + -6.4453e-02, 2.8778e-02, -3.8483e-02, 7.0419e-03, -4.4006e-02, + -4.5593e-02, 1.8921e-03, 2.4109e-02, -3.8391e-02, -5.6992e-03, + -2.4826e-02, 1.6586e-02, -3.5309e-02, -3.6430e-03, 1.2115e-02, + 5.4810e-02, -3.6560e-02, 2.7771e-02, -5.2643e-02, -3.8505e-04, + 2.2278e-02, 1.0681e-02, 4.1351e-02, 1.7939e-03, -1.9012e-02, + -1.8173e-02, -3.4882e-02, 5.5199e-03, -4.0497e-02, 6.7177e-03, + 2.4681e-03, 1.6891e-02, 2.5146e-02, 4.0070e-02, 7.3891e-03, + -2.0142e-02, -6.4125e-03, -7.8659e-03, -1.4868e-03, 7.1220e-03, + -2.9434e-02, -1.7654e-02, 1.4297e-02, 1.4519e-02, 1.7502e-02, + 1.0345e-02, -2.4170e-02, 2.0309e-02, 3.1097e-02, -1.1818e-02, + 3.6072e-02, 2.1362e-02, -3.7050e-04, -2.3483e-02, -3.8681e-03, + 2.2079e-02, -3.0991e-02, -3.6835e-02, -1.7578e-02, 5.9700e-03, + -2.9816e-02, -6.9122e-03, -8.8257e-02, -4.4594e-03, -1.8082e-02, + -3.7323e-02, 1.7603e-01, -8.0948e-03, -1.6403e-02, 1.6113e-02, + -7.1449e-03, 9.5444e-03, 4.4212e-03, 4.0619e-02, -2.9068e-02, + 2.0874e-02, -3.3356e-02, 5.0323e-02, 1.2749e-02, -4.8637e-03, + -5.3101e-03, 8.6594e-03, 8.5754e-03, 3.8567e-03, 4.4769e-02, + 1.1604e-02, 5.2551e-02, -2.6474e-03, -4.3091e-02, 8.9951e-03, + -3.3569e-02, 4.7951e-03, 1.8875e-02, -2.7618e-02, 5.1666e-02, + 3.7720e-02, 1.8768e-02, 3.1189e-02, -7.6103e-03, 1.9409e-02, + -2.0828e-02, -2.7084e-02, 3.2440e-02, 7.1182e-03, -6.8665e-02, + -4.0913e-04, 1.2344e-02, 1.5488e-02, 4.6229e-04, -4.8828e-02, + -2.3132e-02, 5.8136e-02, -1.5335e-02, 7.5226e-03, 2.4719e-02, + 2.4231e-02, 8.5754e-03, 2.9892e-02, 7.0534e-03, 3.0396e-02, + -4.3030e-03, 1.9150e-02, 1.6823e-03, 1.2360e-02, 2.5757e-02, + 1.2772e-02, -1.3840e-02, -4.5959e-02, -6.2256e-02, 4.0016e-03, + 1.6203e-03, -2.7069e-02, -6.0883e-03, -6.6284e-02, -2.9633e-02, + -3.8719e-03, 1.0887e-02, 2.1530e-02, -3.0304e-02, 2.3060e-03, + 3.8727e-02, -1.4484e-04, -1.0635e-02, -4.5395e-03, 6.2561e-03, + 4.6356e-02, 5.3864e-02, 6.8550e-03, 2.3193e-03, -2.4506e-02, + -1.9852e-02, 5.5122e-03, 8.6746e-03], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.4389, 1.5492, 1.4444, 1.4369, 1.4696, 1.5982, 1.4798, 1.4962, 1.5059, + 1.4201, 1.5556, 1.4056, 1.4634, 1.5500, 1.3880, 1.4895, 1.4535, 1.6242, + 1.4925, 0.4319, 1.4099, 1.5599, 1.5428, 1.4731, 1.5599, 1.4931, 1.5788, + 1.4391, 1.6308, 1.4246, 1.5024, 1.5215, 1.4943, 1.5267, 1.5500, 1.5011, + 1.4709, 1.5334, 1.5236, 1.4504, 1.5937, 1.4595, 1.5022, 1.5378, 1.5704, + 1.5227, 1.5181, 1.4495, 1.5018, 1.4275, 1.4919, 1.5308, 1.4562, 1.5681, + 1.5308, 1.4581, 1.4151, 1.5746, 1.5263, 1.5096, 1.5268, 1.5130, 1.3768, + 1.5480, 1.4876, 1.5399, 1.4630, 1.4655, 1.5274, 1.4453, 1.4500, 1.5840, + 2.3376, 1.5532, 1.5260, 1.4554, 1.4595, 1.5422, 1.4586, 1.5038, 1.5498, + 1.4491, 1.4934, 1.5165, 1.5430, 1.4469, 1.4588, 1.4770, 1.4158, 1.4627, + 1.5009, 1.4705, 1.5292, 1.5613, 1.5451, 1.4401, 1.5326, 1.4154, 1.4870, + 1.5448, 1.5034, 1.5059, 1.4505, 1.5611, 1.4406, 1.4348, 1.4452, 1.4387, + 1.4832, 1.5420, 1.5072, 1.5112, 1.4903, 1.4850, 1.4967, 1.5254, 1.4557, + 1.4718, 1.5248, 1.4829, 1.5204, 1.4817, 1.4837, 1.5054, 1.5230, 1.5211, + 1.4579, 1.5346, 1.5020, 1.4881, 1.4620, 1.4641, 1.4179, 1.4865, 1.3921, + 1.5071, 1.5182, 1.5576, 1.4620, 1.4443, 1.5125, 1.4419, 1.5722, 1.4472, + 1.4750, 1.5109, 1.4204, 1.5067, 1.5041, 1.4859, 1.5257, 1.4809, 1.4586, + 1.5638, 1.4914, 1.2267, 1.5272, 1.5182, 1.5851, 1.5238, 1.4617, 1.4648, + 1.4793, 1.5202, 1.4991, 1.5037, 1.4156, 1.5364, 1.4462, 1.5589, 1.5135, + 1.5042, 1.4492, 1.4746, 1.5703, 1.5694, 1.5920, 1.4744, 1.5622, 1.4728, + 1.4683, 1.5139, 1.4662, 1.4459, 1.5067, 1.4089, 1.4473, 1.9656, 1.4960, + 1.4923, 1.4494, 1.4368, 1.5290, 1.6168, 1.4908, 1.4451, 1.5191, 1.5695, + 2.9963, 1.4412, 1.4245, 1.5307, 1.5404, 1.6068, 1.4556, 1.5329, 1.3620, + 1.5110, 1.4351, 1.4680, 1.4922, 1.4024, 1.4582, 1.3978, 1.5052, 1.5299, + 1.4814, 1.4928, 1.4387, 1.6266, 1.4509, 1.5028, 1.3828, 1.4767, 1.5010, + 1.4637, 1.5415, 1.5347, 1.4912, 1.5763, 1.4288, 1.5410, 1.5556, 1.5185, + 1.4852, 1.5436, 1.5161, 1.5171, 1.4735, 1.5772, 1.5198, 1.4617, 1.5643, + 1.5262, 1.5619, 1.4639, 1.4431, 1.4988, 1.4283, 1.5170, 1.5242, 1.5298, + 1.3956, 1.4130, 1.5891, 1.5797, 1.4557, 1.4643, 1.4404, 1.4528, 1.5400, + 1.4213, 1.4825, 1.4541, 1.5162, 1.4745, 1.5409, 1.4437, 1.5563, 1.5220, + 1.4978, 1.4322, 1.5253, 1.4922, 1.5099, 1.4052, 1.4925, 1.4202, 1.4800, + 1.5443, 1.4790, 1.5203, 1.5604, 1.5245, 1.5537, 1.4405, 1.4779, 1.5587, + 1.5019, 1.4787, 1.4604, 1.5525, 1.6145, 1.4254, 1.4455, 1.4977, 1.4991, + 1.4780, 1.5286, 1.5118, 1.4985, 1.4406, 1.4756, 1.4888, 1.5407, 1.4589, + 1.4604, 1.5004, 1.4316, 1.5032, 1.5052, 1.5273, 1.4469, 1.4871, 1.4798, + 1.5064, 1.4596, 1.4211, 1.5157, 1.4834, 1.4306, 1.4624, 1.5115, 1.5144, + 1.0832, 1.4433, 1.5137, 1.4210, 1.4692, 1.4682, 1.5019, 1.5391, 1.4631, + 1.5099, 1.5117, 1.4275, 1.4380, 1.4603, 1.5109, 1.4955, 1.4468, 1.4753, + 1.4881, 1.5287, 1.4950, 1.5418, 1.4729, 1.5359, 1.5092, 1.5260, 1.5131, + 1.5075, 1.4478, 1.5727, 1.5024, 1.4902, 1.4793, 1.5357, 1.4492, 1.5143, + 1.4284, 1.4360, 1.5019, 1.5772, 1.5235, 1.4726, 1.5132, 1.4744, 1.5066, + 1.4778, 1.5114, 1.5889, 1.5091, 1.5202, 1.4418, 1.4208, 1.4492, 1.5511, + 1.5025, 1.4133, 1.5739, 1.5198, 1.4711, 1.5452, 1.5289, 1.4598, 1.5236, + 1.4898, 1.4469, 1.4270, 1.5182, 1.4165, 1.5311, 1.4409, 1.4210, 1.5046, + 1.4362, 1.5257, 1.6063, 1.5472, 1.4609, 1.6293, 1.4389, 1.4431, 1.4479, + 1.4984, 1.4706, 1.4638, 1.4930, 1.4133, 1.4526, 1.4260, 1.4582, 1.4404, + 1.2730, 2.3238, 1.5373, 1.5353, 1.4663, 1.5103, 1.4978, 1.4814, 1.4722, + 1.4676, 1.5279, 1.5444, 1.4845, 1.4893, 1.5243, 1.4853, 1.6193, 1.5047, + 1.5249, 1.4747, 1.4290, 1.4990, 1.5122, 1.4788, 1.4516, 1.4248, 1.4096, + 1.5029, 1.4956, 1.1624, 1.4618, 1.5129, 1.5083, 1.4871, 1.5393, 1.5344, + 1.4575, 1.5062, 1.5657, 1.4710, 1.4620, 1.4261, 1.4575, 1.4434, 1.5555, + 1.4455, 1.4711, 1.5682, 1.4642, 1.4363, 1.4250, 1.5141, 1.5018, 1.4898, + 1.4853, 1.5390, 1.5320, 1.4198, 1.4610, 1.4487, 1.5688, 1.5402, 1.5826, + 1.5324, 1.5616, 1.5575, 1.4719, 1.4533, 1.5122, 1.2552, 1.4833, 1.4171, + 1.4476, 1.4818, 1.4313, 1.5298, 1.5112, 1.5557, 1.4849, 1.5036, 1.4919, + 1.5493, 1.5096, 1.4353, 1.4918, 1.5246, 1.4687, 1.5117, 1.4312, 1.4720, + 1.4250, 1.5285, 1.4419, 1.5280, 1.4315, 1.4631, 1.4872, 1.5459, 1.6067, + 1.4754, 1.4919, 1.5278, 1.5539, 1.4711, 1.4978, 1.4965, 1.4713, 1.4707, + 1.5351, 1.4653, 1.4548, 1.4723, 1.4779, 1.4827, 1.5278, 1.5125, 1.4794, + 1.5375, 1.5805, 1.5112, 1.5454, 1.4380, 1.5259, 1.4707, 1.4437, 1.5549, + 1.6015, 1.4510, 1.4803, 1.3817, 1.5958, 1.4488, 1.5055, 1.5146, 1.5294, + 1.5361, 1.4402, 1.4803, 1.6003, 1.5086, 1.5265, 1.4244, 1.4718, 1.5720, + 1.5039, 1.4664, 1.4566, 1.3609, 1.4783, 1.4991, 1.4960, 1.6116, 1.5236, + 1.4866, 1.5166, 1.5421, 1.4833, 1.4550, 1.5109, 1.4517, 1.4751, 1.4535, + 1.4929, 1.5054, 1.4567, 1.4902, 1.4949, 1.4979, 1.5163, 1.5112, 1.4693, + 1.5772, 1.5449, 1.4362, 1.4490, 1.6147, 1.4914, 1.6548, 1.4735, 1.4384, + 1.4582, 1.4245, 1.5034, 1.4928, 1.4265, 1.5251, 1.5082, 1.5644, 1.5962, + 1.4326, 1.5006, 1.5278, 1.6330, 1.7276, 1.5964, 1.5586, 1.4862, 1.5098, + 1.5259, 1.5411, 1.5315, 1.5077, 1.5071, 1.5442, 1.6036, 1.5586, 1.4586, + 1.5649, 1.4736, 1.4905, 1.4643, 1.5023, 1.5050, 1.4885, 1.4790, 1.5414, + 1.5510, 1.4828, 1.3514, 1.4503, 1.5604, 1.5021, 1.5154, 1.5488, 1.4103, + 1.4800, 1.4122, 1.6422, 1.5825, 1.4450, 1.3146, 1.5293, 1.5225, 1.5205, + 1.5006, 1.4796, 1.5416, 1.5103, 1.5915, 1.4845, 1.5031, 1.5782, 1.5030, + 1.4565, 1.4765, 1.4934, 1.5769, 1.4670, 1.5313, 1.5007, 1.4586, 1.5791, + 1.5857, 1.4441, 1.5946, 1.4943, 1.4492, 1.4897, 1.5223, 1.4263, 1.4230, + 1.5314, 1.5365, 1.6068, 1.3709, 1.4172, 1.4926, 0.3475, 1.5345, 1.4760, + 1.4426, 1.4324, 1.5224, 1.5465, 1.5081, 1.4960, 1.5665, 1.5345, 1.5173, + 1.5048, 1.4816, 1.5093, 1.5004, 1.5425, 1.5010, 1.5042, 1.4334, 1.5301, + 1.4441, 1.4909, 1.3882, 1.5067, 1.5217, 1.5300, 1.4790, 1.4483, 1.5049, + 1.4574, 1.4995, 1.4635, 1.4108, 1.4958, 1.4742, 1.4559, 1.4804, 1.5102, + 1.4960, 1.4830, 1.5441, 1.4797, 1.4928, 1.4054, 1.2281, 1.5226, 1.5581, + 1.5337, 1.4610, 1.5058, 1.5363, 1.5828, 1.4924, 1.5631, 1.4330, 1.4567, + 1.5843, 1.4818, 1.4862, 1.4260, 1.5339, 1.4564, 1.5012, 1.4777, 1.4922, + 1.4950, 1.5238, 1.5327, 1.5206, 1.4421, 1.5222, 1.5461, 1.5083, 1.5667, + 1.5199, 1.4212, 1.5084, 1.5419, 1.5771, 1.4018, 1.5208, 1.3893, 1.5437, + 1.5531, 1.4182, 1.4350], device='cuda:1', requires_grad=True)Parameter containing: +tensor([ 9.5040e-02, 6.2264e-02, -1.1090e-01, -1.8604e-02, 8.0698e-02, + 1.3616e-01, 6.9032e-02, 3.6952e-02, -1.3561e-01, -2.6607e-03, + 5.8487e-02, -3.3613e-02, -8.9103e-02, -1.5545e-02, -9.2796e-02, + -5.0928e-02, -1.2088e-02, -2.7953e-02, -1.5458e-02, 4.8689e+00, + -9.4001e-02, 4.3498e-02, -1.5978e-02, 2.9513e-03, -5.3258e-02, + 1.4907e-02, -1.7444e-02, 5.4565e-03, 5.2729e-02, -8.5186e-02, + 4.1780e-02, 2.2448e-02, 2.6540e-02, 7.7438e-02, 5.7399e-02, + -3.7843e-02, 3.1810e-02, -3.5782e-02, -1.3307e-01, 4.2217e-02, + 4.0042e-02, 3.2673e-02, -1.7885e-03, 1.6705e-02, -4.0591e-02, + 3.4436e-03, 6.0339e-02, -2.1147e-02, 2.8313e-02, 5.5186e-02, + 1.9493e-02, -1.3385e-02, -7.6236e-03, 2.4183e-02, 9.9285e-02, + 8.9684e-02, -1.0770e-01, -3.9934e-02, 3.3857e-02, 1.4410e-02, + 6.5640e-02, -8.2179e-02, 4.2060e-02, 3.6734e-02, 9.0417e-02, + -3.1195e-02, 7.3319e-02, 9.6951e-02, 1.6255e-02, 2.2244e-01, + 4.1289e-02, 5.4411e-02, 4.8112e-01, 8.8067e-02, -1.0920e-01, + 9.3275e-02, 2.4059e-02, -1.5016e-01, -8.2098e-02, 1.9829e-02, + -2.0126e-02, -5.1567e-02, -2.6316e-02, 1.0021e-01, 4.6702e-02, + 5.0399e-02, 1.4458e-01, 1.1531e-01, -2.2371e-02, 2.3077e-02, + 2.5645e-03, 1.2902e-02, 2.5156e-02, -5.4277e-02, -2.7611e-02, + 6.6039e-02, -7.8203e-02, -2.1121e-02, -6.1290e-02, -5.3134e-02, + -3.1687e-03, -7.9709e-02, 8.7348e-03, -6.4941e-02, 4.9401e-02, + -8.7840e-03, -5.3413e-03, -8.4726e-02, 2.7666e-02, -3.4226e-02, + -6.2891e-02, -3.7197e-02, 3.3137e-02, 8.0032e-02, 1.5725e-02, + -4.9238e-02, -3.9217e-02, -7.3844e-02, -2.2711e-02, 1.0376e-01, + -2.0982e-02, -1.8312e-01, -7.5614e-02, -4.5955e-02, -1.4845e-01, + 3.5872e-04, -1.3807e-01, -6.7375e-03, 5.1624e-02, -4.4845e-02, + -1.1063e-01, 7.7368e-02, -4.5386e-02, -3.4162e-02, -6.7513e-02, + 5.0009e-02, -3.4471e-02, 1.9803e-02, -2.7813e-02, -2.6261e-02, + 2.8243e-02, -5.6916e-02, 7.3801e-02, 3.9814e-02, 6.2373e-02, + -1.2426e-02, -2.7946e-02, -1.9696e-02, 1.6410e-02, 1.0248e-01, + 1.8027e-01, -6.1605e-02, -9.9918e-02, -5.3417e-03, -3.2127e-02, + -2.8592e-01, -2.2483e-02, -7.7138e-02, 4.1591e-02, -7.1350e-04, + 1.4369e-01, -1.2360e-01, -2.4565e-02, 6.2265e-02, 1.0753e-02, + 4.7586e-03, -2.7731e-02, 4.4703e-02, -3.8944e-02, 2.0401e-02, + -5.4677e-02, 6.2356e-02, 3.8168e-02, 5.0544e-02, 6.1430e-02, + -8.3724e-02, 1.9396e-02, 6.3526e-02, -2.8940e-02, 2.4832e-02, + 7.3654e-03, -7.3348e-02, 5.7481e-02, -3.2183e-02, 1.5765e-02, + -1.6311e-01, 3.8052e-02, 3.4197e-04, -3.0377e-02, -7.3608e-02, + 3.8378e-02, -2.0785e-02, 9.1732e-02, 6.0364e-02, -1.0010e-01, + 3.6494e-02, -5.0895e-02, -1.4789e-01, 8.7741e-02, 2.5649e-02, + -1.3797e-01, -3.8446e-02, -3.6134e-02, 1.7410e-02, 4.8862e-02, + 1.1463e-01, -9.7149e-02, 6.3472e-02, 1.2722e-01, 2.0616e-02, + 1.0777e-02, 1.9428e-01, -2.0818e-02, -5.5920e-02, -9.7729e-02, + -1.9469e-02, -2.6490e-02, 3.9081e-02, -1.1816e-01, -2.0222e-01, + -1.7587e-02, -3.8713e-02, 8.3494e-02, -2.2572e-02, 3.5223e-02, + -3.0260e-02, 9.7330e-02, -1.5827e-02, 1.2929e-01, -2.1608e-02, + -1.2937e-02, 2.7590e-02, -1.8385e-02, 6.9238e-02, 1.2346e-02, + 2.1508e-02, -8.5435e-02, 3.3443e-02, 7.3312e-02, 1.8266e-02, + 8.2059e-02, 8.2763e-02, 4.0149e-02, 3.6304e-02, 3.5831e-02, + 6.1176e-02, 7.7396e-02, -4.7230e-02, 3.2182e-02, -3.1280e-03, + 4.4441e-02, 4.4095e-02, 5.9928e-02, -7.2963e-02, 7.7081e-02, + -7.7831e-02, -6.3993e-02, -7.7120e-02, 1.9159e-02, 3.0940e-02, + -4.3952e-02, -6.4650e-02, -3.4125e-02, -5.8668e-02, -2.7959e-02, + 4.8970e-02, -5.8047e-02, -8.7014e-03, -7.8959e-02, -2.2804e-02, + -3.4480e-02, 9.5613e-03, 7.5918e-02, -6.5928e-02, 1.3879e-02, + 4.7323e-02, -1.1317e-01, -2.2184e-03, 6.7904e-02, -1.0016e-01, + 1.8474e-01, 1.4786e-01, 7.0485e-03, 2.2563e-02, 5.1969e-02, + 4.1149e-02, 8.6695e-02, -6.1285e-02, 1.0310e-01, -1.0924e-01, + -2.6213e-02, -1.9226e-02, 1.0561e-01, 4.9150e-02, 6.3478e-02, + 1.8294e-02, 1.6091e-01, 4.7983e-02, 4.8363e-02, 7.2620e-02, + 5.2564e-02, 3.9703e-03, -4.5733e-02, 1.0324e-01, -9.4249e-03, + 1.3086e-02, 1.1960e-01, -8.4199e-02, 6.5710e-02, -5.5719e-02, + 3.2044e-02, -4.8804e-03, -7.7953e-02, -7.9623e-02, -8.9439e-03, + -7.2873e-02, 7.2873e-04, 1.8083e-02, 1.4896e-01, -4.6932e-02, + -5.7677e-02, 2.1030e-01, -8.1739e-02, -4.1775e-02, 1.2972e-01, + -1.9701e-01, 4.4433e-02, 1.9681e-02, 3.2872e-02, -1.5593e-01, + 1.5408e-01, -2.0397e-02, 1.2069e-01, -3.3059e-02, 2.9468e-02, + 1.2204e-01, -3.2076e-03, -5.6352e-02, 9.4064e-03, -3.4305e-02, + -1.5335e-01, 2.1700e-02, -2.2277e-02, -6.3310e-02, 9.7901e-02, + 8.1202e-03, 8.5831e-02, 9.2371e-02, 5.5351e-02, -2.6530e-02, + -6.1205e-02, 3.0950e-02, -2.3011e-02, 4.4549e-02, -2.0652e-02, + -5.5406e-02, -3.3195e-02, 5.0550e-02, -1.2650e-02, -9.6969e-02, + 1.5714e-02, 1.8129e-02, 9.1402e-03, -9.1455e-02, -4.2209e-02, + -5.1691e-03, 3.0798e-02, 8.4623e-02, 4.9172e-02, 2.8383e-02, + 9.3418e-02, -2.3945e-02, -7.1348e-02, 2.9395e-03, -3.3130e-02, + 1.6025e-02, 7.9770e-02, 7.0224e-02, -2.8055e-02, 2.4734e-02, + -9.9713e-02, 9.2963e-02, 1.6934e-02, -4.6360e-03, -2.3557e-02, + -4.9357e-03, -2.3687e-02, -7.7652e-02, -8.3567e-02, -1.0004e-01, + 3.3868e-02, -5.1590e-04, -8.0284e-02, -1.6152e-02, 4.4573e-02, + -3.8656e-03, 1.1956e-02, 5.6796e-02, 2.0969e-02, 7.8286e-02, + 1.1165e-01, -6.3958e-02, 7.6898e-02, 4.6967e-02, -1.0099e-01, + -1.3392e-01, -5.2005e-02, 3.4906e-02, 1.6815e-02, -3.4399e-02, + -7.8110e-02, 5.6494e-02, 6.3118e-02, -5.4425e-02, -1.3512e-02, + -1.3473e+00, -3.9723e-02, 2.4948e-02, -5.1947e-02, -7.7290e-03, + -1.5215e-02, 2.3884e-02, -1.6945e-02, -8.0580e-02, -5.4007e-02, + 2.1998e-02, -1.8827e-01, -8.7725e-02, 5.0141e-02, -6.8379e-02, + -2.7706e-02, -1.7930e-02, -3.4555e-02, 6.4826e-02, 9.8114e-02, + -1.2598e-02, 3.6801e-02, 3.4150e-02, 1.1018e-01, -7.9386e-02, + -4.1934e-02, -8.4118e-02, -1.8066e-02, 1.6404e-01, 1.1735e-01, + -1.3154e-01, -1.0468e-02, -7.2428e-02, 1.0006e-01, 2.2076e-02, + 3.2061e-02, 2.0222e-02, -1.0035e-01, 5.2576e-02, 2.7860e-02, + -2.0873e-02, 3.7349e-02, 2.0231e-02, -4.2276e-02, -7.7922e-02, + 3.1693e-02, 3.0677e-02, -9.3555e-02, 3.2186e-02, 4.0509e-02, + 1.8245e-02, 4.8887e-02, -7.9230e-02, 7.1811e-02, 2.9829e-02, + 1.9192e-02, -1.1067e-01, -8.1617e-02, 2.2437e-02, -2.6069e-02, + 4.3656e-02, 1.6310e-01, 9.6680e-02, 1.0566e-02, 9.1838e-02, + 1.1688e-01, -1.2390e-01, 3.2972e-02, 1.2624e-01, -9.4054e-02, + -4.5634e-03, -1.5043e-02, 1.2885e-01, 9.9710e-02, 3.7776e-03, + 4.5239e-02, -9.0217e-03, 4.9700e-03, -2.4734e-02, -1.2538e-01, + 4.6608e-02, -8.2679e-02, 9.4270e-02, -7.2957e-02, 5.8114e-03, + 5.3164e-04, -4.3835e-02, 3.8315e-02, -4.7578e-02, -8.0964e-02, + 3.8714e-02, 1.0235e-01, 7.0860e-02, 3.7793e-02, 2.9038e-02, + -7.7706e-02, -1.6005e-02, -4.0638e-02, 7.4940e-02, -9.7327e-02, + 1.2264e-03, 1.6966e-02, 1.9095e-01, 1.3447e-01, -1.0382e-01, + -6.7178e-02, -1.4883e-02, 6.4356e-02, -3.1611e-02, -3.0012e-02, + -2.2480e-02, -1.9876e-02, -2.2346e-02, 7.6023e-03, -7.9597e-02, + 2.8261e-02, 5.0167e-02, -1.8636e-02, -1.0160e-01, -3.2626e-02, + 3.7352e-02, -1.4968e-02, 2.9950e-02, -5.7603e-02, -7.1363e-02, + -3.4022e-02, 3.4849e-02, 3.3342e-02, 3.8673e-03, -1.7286e-02, + -4.4154e-02, 6.3981e-02, 2.6404e-02, 7.4898e-02, 3.6818e-03, + 3.2078e-02, 1.3596e-02, -1.0983e-01, 4.5060e-02, -3.5186e-03, + -5.2234e-03, -1.4830e-02, -4.7432e-02, -1.6053e-03, 1.2008e-01, + -6.6264e-02, 1.1926e-01, 2.6320e-02, 2.2969e-02, 1.5679e-01, + 3.5541e-02, -2.1966e-02, 3.5414e-02, 1.3858e-02, 2.8476e-02, + 7.5914e-02, -1.7632e-02, -1.0996e-01, -2.1218e-02, 4.4041e-02, + -1.6149e-02, -7.5358e-03, 6.0889e-02, 4.7087e-02, -1.8710e-02, + -1.2684e-01, -2.2078e-02, 2.9795e-02, 1.0746e-01, 8.8426e-02, + 2.8203e-02, 4.2237e-02, -5.8773e-02, 4.3712e-02, -5.0027e-03, + 1.5752e-02, 6.0129e-02, 1.2071e-01, -2.2050e-02, 1.1836e-02, + -5.5813e-02, 4.6402e-02, 9.4760e-03, -7.1275e-03, 5.8639e-02, + -9.9549e-02, -1.0721e-01, 5.5813e-04, -1.2840e-04, 2.6538e-02, + -8.8939e-02, 7.2796e-02, -8.9444e-02, 1.8669e-02, -2.8006e-02, + 7.6746e-02, 3.4517e-02, -2.7705e-02, -2.0928e-02, -6.2216e-02, + -1.2193e-01, 4.9761e-02, 6.8638e-02, -7.1114e-03, -9.1348e-02, + 5.0051e-02, -4.5998e-04, 4.2629e-02, 3.8347e-02, 5.8768e-03, + -3.8492e-02, -8.8272e-04, 3.2111e-02, 9.0239e-02, -3.9676e-02, + 1.2868e-01, -2.6537e-02, 1.0337e-01, 1.7039e-01, 3.4185e-02, + -5.4864e-03, -6.1429e-02, 4.7363e-02, 5.8705e-02, -4.7458e-02, + 3.6429e-02, -2.7022e-02, 1.0612e-01, -5.0205e-02, 3.3695e-02, + 2.0430e-02, 3.3421e-02, 9.1960e-02, 5.0020e-02, 1.0917e-01, + 1.3181e-01, 2.9231e-02, -6.8652e-02, 7.7897e-02, -9.2801e-02, + 4.2380e-02, -1.3279e-01, -2.2904e-02, 5.7570e-02, 8.7368e-02, + -4.0879e-02, 6.1447e-03, -1.1034e-01, -1.8915e-02, 1.1407e-01, + 3.7383e-02, -1.3176e-01, -8.9242e-02, 5.5264e-02, 9.7983e-03, + -2.2093e-01, 1.1690e-02, 2.6277e-02, -2.1914e-04, -2.7651e-02, + 9.1245e-03, -1.7339e-02, -4.2814e-02, -5.7284e-03, 7.8398e-02, + 5.2745e-03, -4.2589e+00, -6.7703e-04, 3.3652e-02, -2.4619e-02, + -9.9009e-02, -3.9888e-02, -4.6650e-02, -4.1620e-02, 3.5950e-02, + -8.1256e-02, -2.8324e-02, 4.3199e-02, -9.3154e-03, -3.0566e-02, + 5.8488e-02, 3.1941e-02, -9.1054e-02, 4.4687e-02, -7.7019e-02, + 7.2983e-02, 4.5990e-02, -4.4156e-02, 1.3127e-02, -1.6789e-01, + -2.4004e-02, 7.0575e-02, 5.2642e-02, 3.7899e-03, 4.3697e-02, + -2.6423e-02, 8.3382e-02, 5.5484e-02, 3.6222e-02, 3.1565e-02, + 6.2682e-03, 1.0727e-03, -1.8432e-02, -7.0222e-02, -4.8251e-04, + 2.5970e-02, 2.0257e-02, 5.5975e-02, -5.8172e-02, 6.7692e-02, + -9.9726e-03, -1.9012e-01, 1.2354e-01, 2.4521e-02, 1.1634e-01, + 1.0389e-01, 3.1179e-02, 7.4662e-02, 6.8046e-02, -2.9729e-02, + 2.2585e-03, 1.1500e-02, 1.7597e-03, -1.8727e-02, -4.7353e-02, + 1.0485e-01, -1.6798e-01, -6.5276e-02, 7.2302e-02, -4.9853e-02, + -3.5061e-02, -1.2070e-01, -5.1736e-02, 7.9296e-03, -5.0152e-02, + -3.6957e-02, -5.1987e-03, -5.0143e-02, -5.8339e-02, 7.5306e-02, + -2.9533e-02, 7.2287e-02, 9.0682e-02, 2.2872e-02, -5.3548e-02, + 2.0132e-02, 6.3997e-02, 6.5266e-02, -6.9182e-02, -1.0422e-01, + -7.9938e-02, 1.0629e-01, -3.4558e-02], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[-2.1896e-02, 2.0416e-02, 2.3441e-03, ..., 1.0908e-04, + 6.9656e-03, 1.4427e-02], + [ 3.4351e-03, -6.1264e-03, 2.0737e-02, ..., -1.4359e-02, + -2.4033e-02, -1.4053e-02], + [ 1.8631e-02, -2.3880e-02, -5.8861e-03, ..., 5.5122e-03, + -3.2663e-04, -2.0580e-03], + ..., + [-7.4425e-03, -9.9869e-03, 1.6281e-02, ..., 2.2583e-02, + 1.5378e-05, 1.3184e-02], + [ 7.1678e-03, -2.3453e-02, 1.8890e-02, ..., 1.1101e-02, + 3.5305e-03, 3.5629e-03], + [-1.4053e-02, -1.7029e-02, -9.1400e-03, ..., -1.0704e-02, + 1.3428e-02, 3.3951e-04]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-0.3743, -0.4089, -0.3171, ..., -0.2976, -0.0021, -0.3103], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0040, 0.0537, 0.0006, ..., -0.0110, 0.0033, 0.0026], + [ 0.0102, -0.0116, -0.0276, ..., -0.0249, -0.0013, 0.0136], + [-0.0247, 0.0174, -0.0146, ..., -0.0083, -0.0184, -0.0121], + ..., + [-0.0018, -0.0283, 0.0097, ..., 0.0254, -0.0131, 0.0048], + [ 0.0004, 0.0063, -0.0341, ..., -0.0153, 0.0024, 0.0111], + [-0.0007, 0.0055, -0.0035, ..., -0.0027, -0.0048, -0.0002]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 5.4077e-02, 1.3013e-01, -5.7983e-02, 1.5717e-02, 3.2715e-02, + 9.5520e-02, 1.2794e-02, -6.7444e-02, -1.2537e-01, 6.1417e-03, + -2.1545e-02, -5.6244e-02, -1.3098e-01, -8.5876e-02, -1.1154e-02, + -6.3354e-02, -1.7914e-02, -1.3092e-02, -3.7479e-03, -9.3323e-02, + -6.9122e-03, 1.2047e-02, 4.1595e-02, -1.2341e-01, 5.6030e-02, + 1.6724e-02, 6.0059e-02, 8.7769e-02, -5.6396e-02, 4.5135e-02, + 4.3274e-02, 9.0881e-02, -3.3398e-03, 6.0730e-02, 5.9242e-03, + -5.7678e-03, -8.1558e-03, -3.1082e-02, 5.3253e-02, 7.5500e-02, + 3.5614e-02, 2.6764e-02, 2.6077e-02, 2.8214e-02, -2.6535e-02, + 7.6233e-02, 4.6722e-02, 8.8577e-03, 2.0466e-03, 3.2928e-02, + -1.6464e-02, -3.5736e-02, 1.1829e-01, 2.7435e-02, 8.1360e-02, + 1.0065e-01, -2.0920e-02, -6.6109e-03, 3.3813e-02, -1.5533e-02, + 4.5837e-02, -7.1869e-03, 7.8430e-03, -3.3783e-02, 6.3721e-02, + 7.6782e-02, 4.5715e-02, -4.1016e-02, 4.4861e-02, 1.8143e-02, + -4.2053e-02, -3.8757e-02, 3.9459e-02, 4.6356e-02, -7.9651e-02, + -2.8778e-02, 1.2016e-03, 2.1317e-02, -1.1902e-01, -4.4128e-02, + -1.7929e-02, 3.0701e-02, 2.0523e-03, 3.7689e-02, 5.8594e-03, + -4.1199e-02, 4.6661e-02, 6.6071e-03, -5.0049e-02, 8.2016e-03, + -3.7476e-02, 6.2943e-03, -7.1754e-03, -8.7402e-02, -9.7504e-03, + 5.4092e-03, 1.8677e-02, -2.2797e-02, -1.3171e-01, -5.3009e-02, + -7.5150e-03, -5.6610e-02, -5.5656e-03, 4.2267e-03, -1.4664e-02, + -2.2949e-02, 1.0779e-01, 5.8380e-02, 3.7518e-03, 1.9089e-02, + -1.2642e-02, 4.3121e-02, 6.2134e-02, 9.3506e-02, 1.0779e-01, + -2.4292e-02, -4.2381e-03, -6.3324e-04, 8.0627e-02, 7.5951e-03, + 2.9419e-02, -1.2256e-01, -1.1737e-01, -3.6621e-02, 4.0970e-03, + 3.1769e-02, -3.0045e-02, 2.9526e-02, 5.0812e-02, -1.9028e-02, + 1.6342e-02, -1.6373e-02, -3.4607e-02, -5.0140e-02, 5.9967e-02, + 1.6113e-01, 2.9709e-02, 3.1261e-03, -4.0771e-02, -4.9377e-02, + -9.1324e-03, 1.8524e-02, -4.6753e-02, 7.7759e-02, -2.4277e-02, + 1.5205e-02, -5.0873e-02, 3.6407e-02, 1.1772e-02, 6.4819e-02, + 6.5430e-02, -7.9773e-02, 3.2196e-02, 7.5806e-02, -2.5482e-02, + -7.5073e-03, -7.5684e-02, 4.1138e-02, 2.2705e-02, -3.2776e-02, + 3.8452e-02, 6.3801e-04, 6.2866e-03, -1.5839e-02, -3.5675e-02, + 1.3695e-02, -4.1656e-02, 8.6441e-03, -6.6895e-02, 2.8839e-02, + 1.4000e-03, 2.5787e-02, -1.7883e-02, 1.1032e-02, -1.0094e-02, + -2.7054e-02, 1.2245e-02, -2.2964e-02, -3.4332e-02, 5.9326e-02, + 9.2239e-03, -1.3293e-01, -7.0312e-02, -3.7109e-02, 1.0480e-01, + -2.2598e-02, -1.0231e-02, 3.6597e-01, -3.4851e-02, 4.1275e-03, + 5.7281e-02, -8.4778e-02, 2.5238e-02, 1.2749e-02, -2.2781e-02, + 4.7028e-02, 7.9575e-03, -1.0938e-01, 1.2695e-01, 2.5497e-02, + -7.2571e-02, 6.2744e-02, 4.2053e-02, 9.9335e-03, 3.8574e-02, + 4.9225e-02, 5.7495e-02, 3.7506e-02, -3.7750e-02, -9.9609e-02, + 5.7495e-02, 1.9272e-02, 2.8351e-02, 3.5667e-03, 3.9697e-04, + 2.4910e-03, 9.3384e-03, 5.7556e-02, -8.8989e-02, -5.3741e-02, + -1.5190e-02, -3.2867e-02, 3.6682e-02, 9.9060e-02, 2.8717e-02, + -1.0742e-01, -4.7791e-02, -2.6199e-02, 5.9601e-02, 6.9618e-03, + 2.2018e-02, 1.8509e-02, -1.4549e-02, -1.3170e-03, -3.5217e-02, + -5.4840e-02, -4.6570e-02, 2.1164e-02, 6.7932e-02, 3.7109e-02, + 3.8696e-02, -2.5635e-02, 9.4543e-02, -2.4261e-02, 1.5930e-02, + 4.0863e-02, 4.0527e-02, -1.0506e-02, -5.0842e-02, 7.0114e-03, + 2.9755e-02, 2.1561e-02, 8.7524e-02, -1.0522e-01, 1.0016e-01, + 5.6305e-02, -4.6234e-02, -1.5442e-02, -2.8503e-02, -1.6332e-04, + -5.5054e-02, -2.4738e-03, -4.9896e-02, -1.0181e-01, -1.2988e-01, + -3.0304e-02, 1.6689e-03, 1.8372e-02, -3.8300e-02, -1.8911e-03, + -8.8654e-03, 8.3740e-02, 5.7800e-02, -1.5884e-02, -7.7576e-02, + 5.2673e-02, 3.2654e-02, 9.7046e-03, 4.5502e-02, -5.1422e-02, + 1.2772e-02, 4.8584e-02, -4.2969e-02, 1.9821e-02, -2.5436e-02, + -2.6970e-03, -9.7885e-03, -2.3636e-02, 3.4607e-02, -6.4636e-02, + 6.8481e-02, -3.0792e-02, 3.5858e-02, -6.0699e-02, 3.0121e-02, + 4.3793e-02, 5.9090e-03, 6.3232e-02, 9.8190e-03, 5.9937e-02, + -4.1847e-03, 6.5125e-02, -3.9856e-02, 2.0767e-02, -6.0730e-02, + -3.4851e-02, 7.5874e-03, -7.2937e-02, -4.6158e-03, -1.9928e-02, + 9.6497e-02, 7.0251e-02, 3.7750e-02, -3.4973e-02, 5.8075e-02, + -3.2928e-02, -6.7566e-02, -2.8946e-02, 8.9600e-02, 7.0740e-02, + -1.3000e-01, 3.0487e-02, -4.3457e-02, -2.4277e-02, 3.4985e-01, + -1.3965e-01, -6.5369e-02, -9.9548e-02, 3.9764e-02, -1.6895e-01, + 8.6853e-02, -6.1554e-02, 6.6452e-03, -5.3589e-02, -3.1342e-02, + 4.0321e-03, -5.3223e-02, 5.7159e-02, 2.5040e-02, -4.1748e-02, + -4.8035e-02, -1.7853e-02, -1.1917e-02, 1.6266e-02, 1.2947e-02, + -2.1835e-02, 8.0872e-03, 4.2450e-02, -1.1212e-01, -7.9285e-02, + 7.2083e-02, 3.1769e-02, 3.6835e-02, -1.0254e-02, -5.7716e-03, + -5.8929e-02, -4.0039e-02, -2.1820e-02, -2.5192e-02, 7.8430e-03, + -1.0059e-01, -6.3782e-02, -8.7280e-02, 1.7120e-02, 4.0039e-02, + -1.1322e-02, -1.0315e-01, 1.0278e-01, -7.9468e-02, 4.2999e-02, + -1.4030e-02, -6.9885e-02, -2.0172e-02, -6.4087e-02, -2.1561e-02, + 2.7435e-02, 2.4155e-02, 3.0685e-02, 4.2053e-02, -4.9347e-02, + -6.3416e-02, 3.6438e-02, -6.3232e-02, -8.7097e-02, -2.8366e-02, + -2.8931e-02, -7.0435e-02, -3.1204e-02, 3.8055e-02, -3.4760e-02, + -3.0624e-02, 7.2327e-02, 7.9041e-02, 5.4359e-03, -1.2573e-02, + -8.9905e-02, -2.4567e-02, 2.7084e-02, -2.0187e-02, 4.4556e-03, + 7.3730e-02, 6.7825e-03, 2.8248e-03, 6.6986e-03, 7.2746e-03, + -2.2919e-02, -9.0182e-05, -8.2169e-03, -5.8716e-02, 1.3069e-02, + -1.4679e-02, 4.9530e-02, 2.9053e-02, -4.5685e-02, 3.2463e-03, + -1.0907e-01, -3.2288e-02, -1.1620e-02, -3.1311e-02, 8.2031e-02, + 1.0834e-02, -3.6526e-03, 2.4414e-02, -1.3501e-01, 9.6207e-03, + 1.9760e-02, 1.4168e-02, -4.5715e-02, 8.3679e-02, 1.8021e-02, + 2.8198e-02, 2.4445e-02, 5.9471e-03, 9.4849e-02, 1.0663e-01, + 1.8204e-02, -5.3467e-02, 4.1718e-02, 7.1960e-02, -1.2573e-01, + 6.1646e-02, -2.7527e-02, -5.3162e-02, 2.3518e-03, -3.5828e-02, + -6.7505e-02, -8.8787e-04, 9.1410e-04, 3.1921e-02, 3.0121e-02, + -3.2654e-02, -6.5002e-02, -6.7810e-02, 1.0950e-01, 3.9642e-02, + 6.2180e-03, -2.6733e-02, 2.5520e-03, -1.6983e-02, -8.4595e-02, + -1.6708e-02, 5.6396e-02, -7.7637e-02, 1.9348e-02, -3.6469e-02, + -7.5317e-02, 1.0544e-02, -4.8218e-02, 4.8027e-03, -6.1371e-02, + -5.4138e-02, -4.2664e-02, -7.4280e-02, -5.8807e-02, -5.2368e-02, + 2.7435e-02, 1.2366e-01, 3.3997e-02, 5.6122e-02, 2.0584e-02, + 8.3618e-02, -1.7443e-03, -3.7270e-03, -1.6403e-02, -2.6951e-03, + -1.7853e-02, -5.8105e-02, 9.6252e-02, 2.5726e-02, -6.2744e-02, + 5.7373e-03, 5.4840e-02, -1.0388e-01, 5.1575e-02, -4.5685e-02, + 2.1408e-02, -3.2715e-02, 6.8665e-02, 1.9913e-02, 3.2898e-02, + 3.3325e-02, -2.5101e-02, 1.5900e-02, -2.3422e-02, 4.2328e-02, + -3.1830e-02, 8.2031e-02, 6.8665e-02, -2.9129e-02, -8.8684e-02, + -4.8981e-02, -5.0171e-02, 1.8295e-02, 4.5891e-03, 4.2511e-02, + -2.2690e-02, 3.3752e-02, 8.6914e-02, 8.7463e-02, -9.0942e-02, + 3.5004e-02, 1.8845e-02, 3.2898e-02, -5.1819e-02, -4.3121e-02, + 1.3115e-02, 2.9495e-02, 1.5961e-02, -9.1095e-03, -4.0741e-02, + 6.4758e-02, -1.4880e-01, 4.6204e-02, -2.0798e-02, 1.3634e-02, + 4.1473e-02, -2.6871e-02, 1.2810e-02, -5.9021e-02, -4.3793e-02, + -1.5961e-02, 1.0028e-01, -2.3091e-04, 8.6670e-03, 1.3039e-02, + 1.0872e-02, 1.1694e-01, -1.8494e-02, 1.4246e-04, -3.5439e-03, + -9.9792e-02, -5.7251e-02, -3.1311e-02, 7.8857e-02, -1.2369e-03, + -4.5319e-02, -8.3740e-02, -1.8402e-02, -4.1321e-02, 6.1646e-02, + 1.0004e-03, -8.3237e-03, -8.2321e-03, 1.5274e-02, -6.9031e-02, + 1.0010e-02, 7.0992e-03, -7.8735e-02, -4.0985e-02, -1.7624e-02, + 5.8502e-02, 5.5878e-02, -3.2593e-02, 1.1520e-02, 2.4986e-03, + 8.0017e-02, -3.4580e-03, -3.2471e-02, -1.1490e-02, -1.7075e-02, + -7.3914e-02, 4.4220e-02, 1.0443e-01, -1.5915e-02, -1.0651e-02, + -1.4099e-02, -4.8737e-02, 4.7363e-02, 6.4575e-02, 1.1938e-01, + 1.6205e-02, 4.7150e-02, 2.2629e-02, 2.3514e-02, -5.4436e-03, + -3.2735e-04, -1.6495e-02, -7.3853e-02, 3.0045e-02, 5.2551e-02, + -5.2246e-02, -1.0040e-01, 2.6993e-02, -5.7159e-02, 3.2616e-03, + 3.1372e-02, 6.9275e-02, -7.4280e-02, 4.3976e-02, -3.7323e-02, + -5.0018e-02, 6.4575e-02, -1.2062e-02, 3.6926e-02, 3.2349e-02, + -3.4088e-02, 6.3293e-02, 1.2466e-02, -5.5664e-02, -2.7481e-02, + -4.1656e-03, -9.3412e-04, 4.5753e-04, -2.2842e-02, 2.7985e-02, + -9.8724e-03, 5.6267e-04, 2.1423e-02, 4.3091e-02, -1.7273e-02, + 1.1279e-01, -9.0759e-02, 5.3864e-03, 5.6061e-02, 5.5939e-02, + -2.3941e-02, -3.7170e-02, 5.0903e-02, 1.8997e-02, -4.5532e-02, + 1.2476e-01, 3.7060e-03, -1.0094e-02, -7.6111e-02, -3.5706e-02, + -4.8462e-02, -3.5919e-02, 5.0659e-02, 2.3468e-02, 9.0698e-02, + 3.3264e-02, 1.0193e-01, 6.3599e-02, -1.6205e-02, 7.1602e-03, + 4.4983e-02, -1.2030e-01, 3.1219e-02, -4.3671e-02, 5.3314e-02, + -5.5122e-03, 5.2277e-02, 1.9272e-02, 1.6037e-02, 7.8430e-02, + 2.0462e-02, -1.3867e-01, -7.6477e-02, 8.5632e-02, 1.6220e-02, + 7.0679e-02, 2.4048e-02, -4.8584e-02, 1.1574e-02, 6.0913e-02, + -5.9174e-02, 4.8065e-03, -3.1204e-02, 9.5139e-03, 1.3557e-02, + -7.6050e-02, 2.8015e-02, -5.7678e-02, 1.2917e-02, -2.8717e-02, + 2.3331e-02, -8.3801e-02, -4.6539e-02, -6.9847e-03, -1.4511e-02, + 6.0844e-03, -4.9347e-02, 2.1454e-02, 8.4381e-03, -6.3965e-02, + 9.4482e-02, -5.5725e-02, 2.3880e-02, -1.3222e-02, 2.0313e-03, + 3.6682e-02, 3.2166e-02, 9.2697e-03, -8.5693e-02, 8.2626e-03, + -3.2410e-02, -6.1340e-02, 4.4312e-02, -7.8308e-02, 6.9031e-02, + 2.3438e-02, -1.8707e-02, 3.7842e-02, 1.0576e-03, 1.6190e-02, + -1.0370e-01, -6.0181e-02, 6.3232e-02, 3.1796e-03, -1.2854e-01, + 1.2077e-02, -8.4839e-02, -5.0385e-02, -3.2532e-02, -4.2191e-03, + -1.1971e-02, -5.3650e-02, -1.0857e-02, 4.0588e-02, 4.0741e-02, + 6.7940e-03, -1.4281e-04, 6.3416e-02, 1.0284e-01, 5.7800e-02, + 4.9408e-02, 3.4637e-02, -3.8300e-02, 6.1218e-02, -3.0823e-02, + 8.1940e-03, -8.8730e-03, -4.8096e-02, -8.6975e-02, 8.5449e-03, + -2.7332e-03, -1.3794e-01, 5.2872e-03, -1.0187e-01, 4.6387e-02, + -1.6403e-02, 4.8035e-02, -6.8787e-02, 2.8000e-02, 3.2806e-02, + 6.9336e-02, 1.6815e-02, -2.1332e-02, 1.0016e-01, 5.1849e-02, + -3.4760e-02, 1.0941e-02, -3.6896e-02, 5.2338e-02, 1.2726e-02, + -2.6688e-02, 3.6530e-02, 9.5215e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([2.1306, 2.0981, 2.1663, 2.0829, 1.7951, 2.1608, 2.2010, 2.0909, 2.2347, + 2.1553, 2.0846, 2.2494, 2.2396, 2.1285, 2.1372, 2.0650, 2.1098, 2.1325, + 2.2690, 1.6700, 2.2613, 2.1879, 2.1590, 2.0855, 2.1393, 2.1793, 2.0697, + 2.0706, 2.0371, 2.0495, 2.1932, 2.1432, 2.1056, 2.1178, 2.0806, 2.0542, + 2.3682, 2.0192, 2.0318, 2.2403, 2.0286, 2.0738, 2.1933, 2.1454, 2.0341, + 2.0834, 2.1870, 1.9817, 2.0910, 2.0121, 2.2396, 2.1091, 2.0853, 2.0440, + 2.2116, 2.0897, 2.0882, 1.9730, 2.0929, 2.1411, 2.1363, 2.1475, 2.0930, + 2.2281, 2.0509, 2.1295, 2.2263, 1.9539, 2.2303, 1.9053, 2.2407, 2.2046, + 2.2446, 2.3049, 2.2940, 2.2953, 2.1716, 2.0946, 2.2047, 1.9443, 2.0475, + 2.0535, 2.1913, 2.1422, 2.2416, 2.0748, 2.1369, 2.0475, 2.1431, 2.0515, + 2.2646, 2.1117, 2.0603, 2.0523, 2.0881, 2.1070, 2.1603, 2.0667, 2.2130, + 2.0835, 2.1439, 2.0512, 2.1125, 2.1444, 2.0461, 1.9122, 2.1266, 2.1854, + 2.0578, 2.0436, 2.0820, 2.3937, 2.1196, 2.2232, 2.2387, 2.0481, 2.1932, + 2.1360, 2.0806, 2.0098, 2.1164, 1.9950, 2.2060, 2.2159, 2.0401, 2.1459, + 2.0444, 2.1784, 2.2300, 2.1567, 2.1416, 2.0632, 2.1148, 2.0197, 2.1419, + 2.1275, 2.1502, 2.1169, 2.0210, 2.1767, 2.2734, 2.0312, 2.1906, 2.0491, + 2.1156, 2.0720, 2.1628, 2.0191, 2.1939, 2.1975, 2.1269, 1.9829, 2.2568, + 2.1802, 2.2635, 1.4847, 2.0943, 2.0363, 2.1120, 2.2034, 2.0194, 2.2002, + 2.1097, 2.0688, 2.0510, 2.1756, 2.1981, 2.0952, 2.0924, 2.0707, 2.0671, + 2.1223, 2.0485, 2.1029, 2.0321, 2.1441, 2.1635, 2.3610, 2.0506, 2.1866, + 2.2366, 2.1572, 2.1832, 2.2729, 2.0558, 2.0246, 2.1644, 1.3161, 2.0913, + 2.1085, 2.2300, 2.1219, 2.1403, 2.2661, 2.0771, 2.1381, 2.2069, 2.1151, + 0.9310, 2.0980, 2.2353, 2.0280, 2.1296, 2.0789, 2.1014, 2.0711, 2.1582, + 2.2039, 2.1553, 2.0854, 2.1312, 1.9870, 2.0954, 2.0356, 2.0757, 2.1471, + 2.0934, 2.1607, 2.0877, 2.1701, 2.2665, 2.1066, 1.8172, 2.1928, 2.1054, + 2.1729, 2.2115, 2.0668, 2.1303, 1.9412, 2.2088, 2.2902, 2.1197, 2.0195, + 1.9842, 2.3541, 2.2992, 2.1719, 2.1185, 2.3326, 2.0597, 2.2121, 2.0990, + 2.1920, 2.0137, 2.1492, 2.0695, 2.0967, 2.0664, 2.2442, 2.1980, 2.0537, + 2.1239, 2.2239, 2.1115, 2.0760, 2.1666, 2.0412, 2.3497, 2.2904, 2.3730, + 2.0719, 2.2055, 2.1495, 2.2552, 2.1622, 2.1594, 2.1742, 2.1108, 2.1134, + 2.0495, 2.1505, 2.1692, 2.3587, 2.0808, 2.1928, 2.1677, 2.1319, 2.2773, + 2.1028, 2.1036, 1.9044, 2.1900, 2.0670, 2.1364, 2.0927, 2.0784, 2.0589, + 2.1780, 2.0068, 2.0723, 2.0721, 2.1785, 2.1709, 2.0420, 2.0222, 2.0584, + 2.3348, 2.0092, 2.1329, 2.0837, 2.1588, 2.1440, 2.2351, 2.1050, 2.0425, + 2.1074, 2.0984, 2.1500, 2.1535, 2.1695, 2.1366, 2.0727, 2.1510, 2.1025, + 2.0234, 2.1057, 2.1375, 2.0724, 2.1730, 2.1779, 2.1295, 2.1127, 2.0601, + 0.4807, 2.3870, 2.1398, 2.2474, 2.2304, 2.2226, 2.0155, 2.0902, 2.2085, + 2.1103, 2.0833, 2.0171, 2.1137, 2.1066, 2.1834, 2.1329, 2.2222, 2.0514, + 2.1268, 2.2083, 2.0987, 2.1011, 2.3077, 2.1029, 2.2446, 2.0978, 1.8194, + 2.1265, 2.1962, 2.2371, 2.1642, 2.0771, 2.0407, 2.2704, 2.2185, 2.0338, + 2.3705, 2.1359, 2.1662, 2.2103, 2.0700, 2.1263, 2.1311, 2.1644, 2.2601, + 2.1973, 2.1671, 2.1047, 2.1370, 2.3222, 2.0790, 2.0729, 2.1185, 2.1862, + 2.0944, 2.1344, 2.0862, 1.9719, 2.2108, 2.1109, 2.2341, 2.1641, 2.1254, + 2.3113, 2.1620, 2.1923, 2.1841, 2.1487, 2.2404, 2.1601, 2.0500, 2.0576, + 2.1119, 2.2564, 2.1640, 2.3131, 2.1320, 2.2965, 2.2485, 2.2046, 2.0289, + 1.9622, 2.0788, 2.0702, 2.1659, 2.1610, 2.1714, 2.1783, 2.1003, 2.1383, + 1.5933, 2.3478, 2.1246, 1.9951, 2.1622, 2.1895, 2.1283, 2.0383, 2.3347, + 2.2002, 2.1307, 2.0826, 2.0998, 2.0361, 2.0912, 2.2533, 2.1869, 2.0681, + 2.2428, 2.1953, 2.2473, 2.0651, 2.1201, 2.1390, 2.2267, 2.1952, 2.1608, + 2.0552, 2.1135, 1.6425, 2.0805, 2.0025, 2.1075, 2.1361, 2.2242, 2.1469, + 2.1484, 2.1327, 2.3234, 2.1158, 2.0765, 2.1809, 2.1726, 2.2022, 2.0506, + 2.1052, 2.1520, 2.0688, 2.2633, 2.1049, 2.1441, 2.0107, 2.1586, 2.0242, + 2.0553, 2.1383, 2.1448, 2.1316, 2.1199, 2.1036, 2.4179, 2.0712, 2.0769, + 2.2432, 2.1385, 2.0033, 2.1947, 2.1739, 2.1416, 1.4240, 2.0872, 2.1168, + 2.0558, 2.0731, 2.0900, 2.3486, 2.1286, 2.1595, 2.1331, 2.0985, 2.2349, + 2.0733, 2.2393, 2.0185, 1.9720, 2.1799, 2.2404, 2.2769, 2.1214, 2.1365, + 2.1539, 1.9940, 2.0823, 2.0511, 2.1764, 2.2886, 2.0483, 2.1611, 2.1046, + 2.0842, 2.3037, 2.1485, 2.2134, 2.2764, 2.1331, 2.1390, 2.2793, 2.1325, + 2.0913, 2.0291, 2.0483, 2.2171, 2.0498, 2.0716, 2.0527, 2.0389, 2.0519, + 2.2236, 2.1044, 2.1652, 2.2721, 2.1719, 2.0850, 2.1445, 2.2287, 2.0876, + 2.0748, 2.2805, 2.2230, 2.1957, 2.1366, 2.1051, 2.0608, 2.1175, 2.0909, + 2.0974, 2.3656, 2.0853, 1.9904, 2.1722, 2.0264, 2.1505, 2.2116, 2.1064, + 2.3324, 2.0612, 2.0759, 1.5095, 2.1135, 2.0918, 2.1234, 2.1231, 2.0909, + 2.1507, 2.1431, 2.0905, 2.1110, 2.1229, 2.0950, 2.0726, 2.1123, 2.2312, + 2.1390, 2.2622, 2.2702, 2.1241, 2.0290, 2.1137, 2.1211, 2.0897, 2.1186, + 2.1072, 2.1211, 2.0325, 2.3841, 2.1235, 2.1657, 2.2012, 2.1920, 2.0612, + 2.1856, 1.9654, 2.0802, 2.2121, 2.1295, 2.1085, 2.0468, 2.1225, 2.0937, + 2.2090, 2.1711, 2.2392, 2.2404, 1.9501, 2.2067, 2.1174, 2.1059, 2.0687, + 2.0720, 2.2781, 2.0699, 2.0782, 2.1167, 2.1556, 2.1573, 2.0769, 2.1428, + 2.0000, 2.0540, 2.1057, 2.3078, 2.1170, 2.0252, 2.2114, 2.0516, 2.0569, + 2.1436, 2.1945, 2.1005, 2.0620, 2.1000, 1.9946, 2.0742, 2.2715, 2.1476, + 2.0796, 2.1513, 2.1469, 2.1263, 2.1124, 1.8206, 2.1527, 2.1983, 2.1449, + 2.0146, 2.1201, 2.1955, 2.1086, 2.1151, 2.0930, 2.2746, 2.0733, 2.1319, + 1.9999, 2.0404, 2.1487, 1.8035, 2.0896, 2.1148, 2.1295, 2.2288, 2.0789, + 2.0948, 2.1741, 2.2246, 2.1047, 2.0214, 2.1493, 2.1955, 1.9344, 2.0848, + 2.0497, 2.1492, 2.1086, 2.1639, 2.0673, 2.0541, 2.1296, 1.9733, 2.1538, + 2.1301, 2.2918, 2.0695, 1.9991, 2.2444, 2.0984, 2.1520, 2.1604, 2.1877, + 2.1085, 2.1740, 2.4152, 2.1196, 2.1161, 2.0564, 2.2205, 2.0721, 2.0152, + 2.2869, 2.1730, 2.2223, 2.1000, 2.1991, 2.0098, 1.9885, 2.1247, 2.1999, + 2.0865, 2.1030, 2.2179, 2.0859, 2.2840, 2.0955, 2.2556, 2.0865, 2.5612, + 2.1310, 2.1637, 2.2042, 2.3089, 2.2860, 2.0313, 1.5092, 1.9951, 1.9718, + 2.0639, 2.2435, 2.0679, 2.1874, 2.0538, 2.1991, 2.2152, 2.0431, 1.9732, + 2.1273, 2.3086, 2.0617, 2.1179, 1.9938, 2.2863, 2.1030, 2.0925, 2.2335, + 2.2860, 2.1918, 2.0950, 2.0555, 2.2699, 2.3361, 2.3560, 2.0020, 2.2247, + 2.1124, 2.1613, 2.1211, 2.0804, 2.1438, 2.0524, 2.1160, 2.0022, 2.0361, + 2.0578, 2.0534, 2.2993], device='cuda:1', requires_grad=True)Parameter containing: +tensor([ 5.5032e-02, 3.4170e-01, -5.9914e-01, 5.7442e-02, -5.9289e-01, + 6.2105e-01, 7.2261e-01, -3.8350e-01, 5.3076e-01, -3.4490e-01, + -3.7516e-01, 7.6397e-01, -4.8466e-01, -1.9540e-01, 2.3205e-01, + 6.6639e-01, 3.8165e-02, 4.7632e-01, -7.5291e-01, -1.4750e+00, + 5.0280e-01, 4.2079e-01, 5.0035e-01, -4.4191e-01, 4.7763e-01, + -5.6680e-01, -3.7598e-01, 3.5411e-03, -6.2287e-01, 5.5517e-01, + -1.8523e-01, 2.2972e-01, -3.0193e-01, -4.0291e-01, -2.8643e-01, + -1.4825e-01, -9.7817e-01, -2.5017e-01, 3.0976e-01, 9.1308e-01, + -3.7209e-01, 5.0364e-02, 8.3947e-01, 1.0546e-01, -5.1225e-01, + -7.5814e-01, -6.4859e-01, -1.9139e-01, -3.6599e-01, -3.1565e-01, + -8.4970e-01, -9.4634e-02, -2.2861e-01, 6.4416e-02, -7.4689e-01, + 8.2814e-01, 6.7515e-02, -2.1095e-01, 1.0784e-01, -2.0049e-01, + -1.5426e-02, 2.9876e-01, 2.9834e-01, -9.7773e-01, -2.1292e-01, + 4.1629e-01, 5.9539e-01, -1.2394e-01, -5.7038e-01, -1.6600e+00, + 5.8589e-01, 3.1986e-01, 9.3529e-01, -8.1940e-01, -1.0018e+00, + -7.7484e-01, 5.0273e-01, 1.4810e-01, 5.6621e-01, 2.0840e-01, + -2.5548e-02, 3.3979e-01, -9.2684e-01, 1.6033e-01, -5.8794e-01, + -1.7960e-01, 5.1259e-01, -1.7504e-01, 5.2133e-01, -4.8449e-01, + -7.1617e-01, 7.2320e-01, -4.6025e-01, 2.7989e-01, -6.8861e-01, + 1.6533e-01, 6.9277e-01, 2.0693e-01, -4.5392e-01, 7.2708e-01, + 3.7570e-01, -7.4780e-01, 1.2439e-01, -6.6284e-01, -1.2287e-01, + 5.6412e-01, 2.9251e-01, 3.7662e-01, -1.5380e-01, -1.2422e-01, + 7.0778e-01, 8.5423e-01, -3.5530e-01, 4.0904e-01, 3.2800e-01, + -2.4588e-01, 9.1576e-01, 3.5030e-01, -3.2823e-01, -5.6558e-02, + -3.5270e-01, -7.7882e-02, -4.8140e-01, -4.8432e-01, 7.9960e-02, + -7.8382e-01, -1.7744e-01, -3.0857e-01, -4.1455e-01, 7.0633e-01, + 1.3336e-01, -3.3674e-01, -3.0745e-01, -2.2139e-01, 5.0010e-01, + 2.7472e-01, 5.4829e-01, -3.3975e-02, -3.4801e-01, 2.3116e-01, + -8.3419e-01, 2.5358e-01, 2.2674e-01, -1.5673e-01, -3.6639e-01, + -1.5545e-01, 6.0038e-01, -1.5873e-01, -5.5829e-01, 5.9090e-01, + 3.3606e-01, -4.3766e-02, 6.3776e-01, 1.2674e-01, -7.7525e-01, + 1.0622e+00, -1.5575e-01, 5.7224e-02, -1.2175e-01, -3.5464e-01, + -4.1974e-01, -5.5180e-01, 3.1076e-01, -1.1148e-01, 1.7761e-01, + 3.9893e-01, -8.1377e-01, 1.9128e-01, 2.2551e-01, 1.0480e-01, + 2.5536e-01, 3.7489e-01, -6.0449e-01, -1.7544e-01, -3.6786e-01, + -1.7595e-01, 5.1698e-01, -7.5201e-01, 8.6587e-02, 3.5645e-01, + -9.2023e-01, -6.3389e-01, 5.6677e-01, 7.7825e-01, 4.0414e-02, + 8.2350e-01, -7.1027e-01, 1.1759e+00, 4.5486e-01, -5.8566e-01, + -8.3278e-01, -2.3355e-02, 6.1520e-01, 9.4288e-01, 1.9277e-01, + 3.2174e-01, 7.2860e-01, 1.5114e-01, 5.5562e-01, 1.9258e-01, + -3.3681e-01, -6.2664e-01, 4.8709e-01, 3.9007e-01, -1.0146e-01, + 3.3661e-01, 2.6557e-01, 7.1143e-01, -4.7444e-01, -2.2915e-02, + 5.6967e-02, -2.6513e-01, -6.7503e-01, 8.1852e-01, 1.7758e-01, + -5.4827e-01, -6.1577e-01, 3.7921e-01, -3.2689e-01, -4.2346e-01, + 1.0098e+00, 8.1804e-02, -9.1237e-01, 4.6550e-01, 3.3835e-02, + -5.5621e-02, -6.1343e-01, -1.6698e-02, 2.1856e-01, -6.0267e-02, + -4.5651e-01, 5.7496e-01, 5.1042e-02, 3.3396e-01, -1.1056e-01, + -9.3004e-01, 5.3485e-01, 3.2634e-01, -3.2156e-01, 1.0520e+00, + 1.2905e-02, 7.6925e-01, -6.5054e-02, -5.2462e-01, -2.7286e-01, + -5.9786e-01, 8.3486e-02, -4.5704e-01, -3.2914e-01, 8.4377e-01, + -2.7605e-01, 1.9019e-01, -5.2997e-02, -4.1378e-01, 9.2904e-02, + -4.0969e-01, 5.7097e-01, -2.9682e-01, -1.1046e+00, -6.4378e-01, + -1.0371e+00, -1.7496e-01, 3.8344e-01, 7.4962e-01, -2.3920e-01, + -5.1637e-01, 6.3089e-01, 1.5819e-01, 5.0971e-01, 2.4068e-01, + 1.9098e-01, 1.2260e-01, -4.0640e-01, -1.0648e+00, -3.8075e-01, + 9.3032e-03, 3.6212e-01, 5.7357e-01, 6.9055e-01, -2.7114e-01, + 8.0547e-02, -8.7845e-01, -7.8596e-01, 2.5760e-01, -3.3892e-01, + 5.2147e-01, -1.9988e-01, 1.4552e-01, 6.2273e-01, -1.7925e-01, + -2.8361e-01, 3.1194e-01, 7.0649e-01, -6.5505e-01, -1.1719e-01, + -4.4102e-01, -1.1529e-01, -1.0064e+00, 5.8067e-01, -4.9356e-02, + -5.4669e-01, -5.6302e-02, -8.0066e-01, -3.0239e-01, -7.7490e-02, + 4.1506e-01, 3.2261e-01, 3.0288e-01, -4.8018e-01, -5.2130e-01, + 2.3988e-01, -4.4202e-01, -1.5315e-01, 1.1838e-01, 9.8189e-02, + 4.9815e-01, 7.1809e-01, 4.9991e-01, 1.3004e-01, 4.2302e-01, + -5.0610e-01, -8.3078e-01, 4.9045e-01, -8.0706e-03, 2.8603e-01, + -1.2553e+00, 2.8014e-01, -4.5281e-01, 5.2321e-01, -6.6674e-01, + 1.7911e-01, -3.0358e-01, -2.8774e-01, -1.6966e-01, -2.1777e-01, + -8.2017e-02, -6.3974e-01, 3.4140e-01, 7.0512e-01, -2.6187e-01, + 4.2697e-01, -9.8788e-02, -1.9254e-01, 5.5265e-01, -7.7105e-01, + 7.7795e-01, -8.2557e-01, 3.7116e-01, -2.1748e-01, -3.0085e-01, + 1.1101e-01, 4.6779e-01, 2.7369e-01, 1.1426e+00, -6.6837e-01, + -2.9639e-01, -3.4687e-01, -3.5086e-01, -5.5450e-01, -3.4801e-02, + -9.7990e-01, -5.0421e-01, -1.3055e-01, -4.7743e-01, 5.8862e-02, + 2.0139e-01, 2.2049e-02, 5.6779e-01, 1.2275e+00, 4.8432e-01, + 4.5270e-01, -2.4627e-01, -2.1459e-01, -5.2194e-01, -2.4221e-01, + -3.0220e-01, 3.5763e-01, 7.6412e-01, -8.6895e-02, -2.2266e-01, + 3.1286e-01, -2.7849e-01, -5.3784e-01, 1.2801e-02, -6.3741e-01, + -4.8683e-01, -2.1989e-01, 1.0171e+00, 3.2114e-01, -7.0300e-01, + -2.5995e-01, -3.3686e-01, 6.3376e-01, 6.2771e-01, -6.6254e-01, + 3.1605e-01, -3.3023e-01, 8.2851e-01, -2.5261e-01, -8.0842e-01, + -8.4885e-02, 1.0560e+00, -6.9100e-01, -3.3871e-01, 1.6286e-01, + -3.3739e-01, 3.4416e-01, 1.0518e-01, -1.4568e-01, 7.3312e-01, + 2.6994e-01, -3.9172e-01, -1.2606e-01, 3.7430e-01, 1.2941e+00, + -1.8404e+00, 1.4481e-01, -2.2216e-01, 5.6620e-01, 4.0119e-02, + -2.7102e-01, -2.9898e-01, 6.6150e-01, -3.5446e-01, -6.3626e-01, + 3.0112e-01, 4.1641e-01, -7.8385e-03, 1.6033e-01, 5.9720e-01, + -1.0396e+00, -6.8988e-02, 4.9328e-01, 3.7363e-01, 8.8177e-01, + -6.3460e-02, 6.0906e-02, 7.2328e-01, 4.1937e-01, -5.4564e-01, + 7.1990e-02, -1.1627e-02, 2.7944e-01, -2.0765e+00, -6.6142e-02, + 1.0473e-01, 2.9151e-01, 4.1840e-01, 5.2563e-01, -8.2629e-01, + -5.1596e-02, 6.0050e-02, -9.0046e-01, 2.2648e-01, -1.7885e-01, + 6.1356e-01, -5.4350e-01, -7.6380e-01, -1.0181e-01, 1.7266e-01, + -3.3133e-01, 8.8887e-02, -6.9486e-01, 1.1714e-01, -3.4276e-01, + 3.3015e-01, 1.0132e-01, -9.4859e-02, 3.0991e-01, -5.5856e-02, + -2.1844e-02, -1.9616e-01, -6.7070e-01, 3.0605e-01, -8.7142e-01, + -3.8150e-02, -4.0821e-01, -6.1820e-01, -2.8025e-01, -6.9023e-02, + -4.5809e-01, 6.2847e-01, 7.1941e-01, 2.3210e-01, -3.2175e-02, + -4.7614e-01, 2.2915e-01, 1.7289e-01, -2.0610e-01, -8.2476e-01, + 4.6670e-02, 4.5217e-01, 3.0375e-01, -8.2604e-02, -8.5447e-01, + -3.8456e-02, -8.1994e-01, -1.8847e-01, -2.1478e-01, 8.7968e-01, + -4.2970e-01, -5.4261e-01, -4.3325e-01, 1.0360e-01, 2.8478e-01, + 2.7084e-01, 3.8629e-01, 5.8479e-02, -4.9726e-02, -9.2089e-01, + -5.2837e-01, -2.0357e-01, -7.3693e-01, -4.4392e-01, 6.7021e-01, + -1.7075e-01, 4.6331e-01, -4.9681e-01, -2.5834e-01, -2.8214e-01, + 9.1656e-01, 2.3502e-01, -1.8248e-01, 2.8570e-01, -8.3115e-02, + -7.6035e-01, 3.9176e-01, 2.9743e-01, 2.1699e-01, 1.2622e-02, + 7.1951e-02, -2.9116e-01, -3.8441e-02, -1.4330e-01, 6.7795e-01, + -8.4475e-01, 5.5603e-02, -5.1013e-01, 3.5273e-01, 6.8286e-02, + 2.0730e-01, 7.8723e-01, -4.9373e-01, 5.6147e-01, 5.1230e-01, + 4.5580e-01, 1.5385e-01, -7.6108e-01, 6.2250e-02, 4.4146e-02, + -9.6293e-01, 1.5051e-01, 5.0428e-02, 1.6998e-02, -1.3017e-01, + 5.2094e-01, 9.0157e-01, -2.3312e-01, -9.2199e-01, -5.8327e-02, + 3.2615e-02, -2.0963e-01, -1.3304e-02, 5.9420e-01, 2.4892e-01, + 4.2144e-01, -1.3338e-01, 7.3720e-01, 6.7606e-01, -1.0390e-01, + 3.1712e-01, 4.1540e-02, 2.1947e-01, -2.6176e-02, 5.7473e-01, + 6.0464e-01, 1.1620e-02, -4.7551e-01, -5.2233e-01, -1.7980e-01, + 5.4628e-01, -3.4662e-01, 2.8681e-01, 4.0650e-01, 4.5632e-01, + 3.7570e-03, -3.2734e-01, -1.5770e-01, 1.2390e+00, 3.1241e-01, + -6.3380e-01, -7.3302e-01, 8.5430e-01, -6.2936e-02, -1.8730e-01, + 1.2957e-01, -4.5305e-01, -4.4873e-01, 8.0722e-01, 2.5926e-01, + 1.9403e-01, 8.0011e-02, -2.3443e-01, -6.4465e-01, 2.1859e-02, + 1.9577e-01, -3.9800e-01, -7.0151e-01, 2.0552e-01, 5.4888e-01, + -1.0330e-02, 3.4967e-01, -2.1443e-01, 6.5350e-01, 2.7752e-02, + 7.1228e-02, -9.6233e-02, 6.2011e-01, -1.6035e-01, 2.6784e-01, + 4.7279e-01, 2.4499e-01, -4.1408e-01, 6.9601e-01, 7.2197e-01, + -2.7174e-01, -1.3945e-01, 1.0073e+00, 8.1506e-01, -1.5321e-01, + -6.7199e-02, 5.5904e-01, -3.3913e-01, 2.5178e-01, 2.1693e-01, + -4.7386e-01, -1.3057e-01, 1.0067e+00, 4.0309e-01, -1.3929e-01, + 6.5192e-01, -7.4773e-01, -8.4036e-01, 2.4286e-01, -3.3858e-01, + 4.2005e-01, -5.4786e-01, 1.3617e-01, -2.3379e-01, 1.6159e-01, + 6.6706e-01, -8.7631e-02, 4.3947e-01, -1.2622e-01, 7.9351e-01, + 5.6074e-01, 2.8677e-01, -1.4647e-01, 2.5503e-02, 3.6957e-01, + -1.7680e-01, 4.9258e-01, -2.7404e-01, -6.6334e-01, -6.8055e-01, + -4.6036e-02, -1.5788e-01, -5.7529e-01, 8.0077e-01, -3.6329e-01, + -2.8435e-01, 3.6245e-01, 8.2002e-01, 4.7903e-01, 4.2087e-01, + -4.1194e-01, -3.2981e-01, 8.9196e-02, 2.5886e-01, 1.0111e-01, + 2.3851e-01, 1.9850e+00, -4.3724e-01, -3.1161e-01, -3.5585e-01, + 1.0762e+00, -1.9195e-01, -1.5232e-01, 7.7787e-01, -3.4547e-01, + -5.2549e-01, -2.9215e-01, -7.1992e-01, -3.3349e-01, -6.5178e-01, + 1.0218e+00, 3.1221e-02, -2.2267e-01, 3.3001e-02, 4.0659e-01, + -8.9277e-02, -4.2065e-01, 7.2138e-01, -3.0468e-01, 6.3441e-01, + 6.4458e-01, -2.5796e-01, -2.9764e-02, -3.8748e-01, -1.4858e-01, + 3.9582e-01, -5.3268e-01, -2.0677e-02, -7.5198e-01, -1.4154e-01, + -3.5085e-01, -1.7420e-01, 6.5263e-01, -3.9491e-01, -1.2203e+00, + -3.2941e-01, -7.7585e-02, -5.8524e-01, -8.6829e-01, -7.8241e-01, + -3.4036e-01, 1.0552e+00, 1.2820e-02, -4.2201e-02, -1.9478e-01, + -8.3023e-01, -2.5496e-01, 7.6814e-01, -1.2869e-01, 2.9615e-01, + 6.5342e-01, 1.4642e-02, 1.9967e-01, -2.6521e-01, -8.1220e-01, + -3.3556e-01, 5.7942e-02, 2.7999e-01, -2.4631e-01, 1.3797e-01, + 1.5128e-01, -6.2344e-01, 6.0882e-01, 9.6539e-01, 5.9792e-01, + 2.6680e-01, -6.7106e-01, -1.1571e+00, 8.1228e-01, 1.1491e-03, + 2.5220e-01, -2.8103e-01, -5.5863e-01, 5.5975e-01, 4.4755e-01, + -4.1402e-01, -4.9207e-01, -4.5646e-01, 7.1663e-02, 3.1954e-01, + 2.2474e-01, 3.8796e-01, 6.4554e-01], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[ 0.0420, -0.0440, -0.0347, ..., -0.0244, -0.0091, 0.0145], + [-0.0261, 0.0071, 0.0178, ..., 0.0025, -0.0036, -0.0314], + [ 0.0267, -0.0007, 0.0216, ..., 0.0241, 0.0350, -0.0015], + ..., + [-0.0045, -0.0047, 0.0123, ..., -0.0035, 0.0097, 0.0152], + [-0.0158, -0.0261, -0.0006, ..., -0.0156, -0.0045, 0.0177], + [-0.0226, -0.0010, -0.0124, ..., 0.0051, 0.0012, 0.0042]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 0.1199, -0.1290, 0.1978, ..., -0.0250, 0.0124, -0.0064], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-5.5428e-03, -2.7206e-02, 2.7370e-03, ..., 2.2766e-02, + -3.1643e-03, -1.5764e-03], + [ 2.0981e-02, -1.9547e-02, 1.0455e-04, ..., -1.9424e-02, + -8.0032e-03, 3.5324e-03], + [ 1.8829e-02, 1.2306e-02, -3.6640e-03, ..., -1.7288e-02, + 6.9389e-03, 1.1398e-02], + ..., + [-1.0307e-02, 1.9897e-02, 5.8711e-05, ..., -1.7059e-02, + 9.9411e-03, 2.1317e-02], + [-2.5986e-02, -2.5024e-02, 1.0292e-02, ..., 2.4624e-03, + -7.9422e-03, -1.5936e-03], + [ 1.5373e-02, 1.6113e-02, -3.0041e-03, ..., 5.8517e-03, + 7.4081e-03, 2.3529e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([ 2.0859e-02, -2.0630e-02, -5.0323e-02, -1.5854e-02, 1.8677e-02, + -8.5983e-03, 1.6937e-02, -4.2786e-02, -3.0975e-02, 2.8900e-02, + 3.1143e-02, -3.3112e-02, -6.0944e-02, 7.2403e-03, 6.2134e-02, + 1.5930e-02, -6.3477e-02, 1.5442e-02, -9.2545e-03, 4.6425e-03, + -2.2705e-02, 2.4551e-02, 3.8788e-02, -8.3008e-03, -5.0537e-02, + -3.0624e-02, 3.3783e-02, -5.7587e-02, -1.5533e-02, -1.8036e-02, + -1.4130e-02, 4.0894e-03, 4.6051e-02, -8.1299e-02, 3.1738e-02, + 1.8219e-02, -1.5175e-02, 3.0014e-02, 7.3547e-02, 2.8801e-03, + -6.2447e-03, -3.5858e-02, -1.1673e-02, -3.0060e-02, 2.4414e-02, + -1.2505e-02, -9.6924e-02, 3.1174e-02, 1.0956e-01, -1.3878e-02, + -5.0735e-03, 4.1199e-02, 4.9133e-02, -1.2459e-02, -3.4515e-02, + 8.7097e-02, 1.1810e-02, -4.8706e-02, 2.0004e-02, 9.0179e-03, + -1.4343e-02, -6.8604e-02, 2.2568e-02, 3.3478e-02, 1.3725e-02, + 7.2136e-03, 2.1301e-02, -1.0628e-02, -2.1362e-02, -1.1755e-01, + 1.2100e-02, 1.5434e-02, -7.8491e-02, 9.7656e-03, -3.3630e-02, + -1.3838e-03, -2.8286e-03, -3.0460e-03, -1.3405e-02, 5.1422e-02, + 1.9658e-04, 2.6611e-02, -4.2328e-02, 3.4241e-02, 7.5569e-03, + 4.6661e-02, 1.2703e-02, -1.2032e-02, -3.1528e-03, 4.0527e-02, + 5.0720e-02, 9.2087e-03, -6.1989e-03, -1.1612e-02, 4.4937e-03, + 8.2321e-03, 8.4839e-03, -7.0435e-02, 7.0686e-03, -1.1665e-02, + 3.5065e-02, 5.2277e-02, -2.3499e-02, 6.0242e-02, 6.7566e-02, + 1.6663e-02, 5.6458e-02, 1.2810e-02, 1.6693e-02, -5.4646e-04, + -6.7078e-02, 2.1118e-02, -9.4727e-02, 2.5650e-02, 1.8707e-02, + 1.2650e-02, -4.4159e-02, 2.3376e-02, 7.8674e-02, 1.8646e-02, + -6.2256e-02, -1.5190e-02, -2.3056e-02, 1.1292e-02, 1.6510e-02, + -1.3107e-02, 2.2736e-02, 3.0975e-03, -4.0283e-02, -1.1543e-02, + -4.8866e-03, -6.9153e-02, 1.1856e-02, -1.8356e-02, -2.3087e-02, + -4.6692e-02, 2.5101e-02, 1.2138e-02, 4.1237e-03, -6.6589e-02, + 7.2510e-02, -4.6616e-03, -7.1167e-02, -2.3834e-02, 7.9498e-03, + 1.5915e-02, -2.0721e-02, -1.6296e-02, 1.0139e-02, 4.0131e-02, + 3.5797e-02, 2.6913e-03, 2.5009e-02, -2.1103e-02, -4.7577e-02, + 6.7444e-02, 1.1787e-02, -2.8687e-02, -2.8244e-02, 1.5015e-02, + 1.8646e-02, 7.9803e-03, 3.1647e-02, -1.0902e-02, 7.0740e-02, + -5.1086e-02, -3.5431e-02, 4.2816e-02, -4.6844e-02, -9.9792e-03, + -1.7197e-02, -9.9258e-03, 3.2501e-02, 4.8279e-02, 3.3508e-02, + -1.7410e-02, -8.0200e-02, 1.0544e-02, -6.9092e-02, -2.8915e-02, + -9.5886e-02, -5.0598e-02, 2.9327e-02, 1.7441e-02, 1.2245e-02, + 4.3610e-02, 2.4643e-02, -5.5542e-02, -2.7710e-02, 1.4946e-02, + 1.3275e-02, -1.6937e-02, -6.4697e-02, -2.3621e-02, -2.4048e-02, + 1.6418e-02, -8.3389e-03, 8.1863e-03, -1.1986e-02, -1.3840e-02, + -9.3201e-02, -2.2182e-03, 2.0386e-02, 5.4901e-02, -2.2339e-02, + -4.2969e-02, 2.8580e-02, 1.3611e-02, -3.8452e-03, 4.8706e-02, + -3.9948e-02, 2.0447e-02, -1.2100e-02, 6.1005e-02, 3.1830e-02, + 8.1665e-02, -5.4077e-02, -3.3966e-02, -9.5291e-03, -7.4244e-04, + 5.3467e-02, 2.3865e-02, 5.9433e-03, 3.8483e-02, -6.2675e-03, + -6.4148e-02, 3.1708e-02, 8.2064e-04, 3.6926e-03, 1.6327e-03, + -3.8929e-03, 2.4933e-02, 5.3253e-02, 6.4163e-03, -4.3365e-02, + 5.6274e-02, 2.8351e-02, 1.0307e-02, 2.8305e-03, -2.1515e-02, + 1.6968e-02, 6.2744e-02, 1.4587e-02, 5.6839e-03, 2.6646e-03, + 2.7756e-02, 8.0505e-02, 5.8472e-02, 3.3813e-02, 4.8637e-03, + -1.0284e-02, 4.4434e-02, 1.3077e-02, 4.7729e-02, 3.7384e-02, + 9.8267e-03, -2.0920e-02, -3.3600e-02, -1.3557e-02, 1.5671e-02, + 6.0120e-03, 1.8631e-02, 7.0740e-02, 4.0985e-02, -3.2288e-02, + -2.6886e-02, -3.2410e-02, 3.0579e-02, -3.1174e-02, -5.9692e-02, + 2.9373e-02, 1.3802e-02, -6.5918e-03, -7.5684e-02, 1.1101e-02, + -1.0614e-03, -2.8885e-02, -3.6392e-03, -1.3153e-02, 3.3112e-02, + -6.7383e-02, 1.3893e-02, 5.3741e-02, -1.1772e-02, -6.5384e-03, + 9.5139e-03, 3.7842e-02, 7.5928e-02, 3.4912e-02, -4.2908e-02, + -3.3539e-02, 3.1891e-02, -1.3733e-02, 5.4596e-02, -2.0233e-02, + -3.7201e-02, -2.6230e-02, -3.4695e-03, -3.3207e-03, -1.5518e-02, + 2.8442e-02, 5.5756e-02, 4.7729e-02, -1.6388e-02, 7.0251e-02, + -6.8298e-02, 3.2867e-02, 3.2898e-02, -3.7212e-03, 3.6560e-02, + 5.8746e-02, 3.7781e-02, 1.0370e-01, -6.2286e-02, -3.5126e-02, + -4.3030e-02, 1.2123e-02, 3.3966e-02, 4.0016e-03, -1.0986e-02, + -2.0584e-02, 1.5541e-02, 1.2108e-02, 6.8237e-02, 3.9673e-01, + -1.8234e-03, 5.7098e-02, 1.4084e-02, 5.7037e-02, 1.8982e-02, + 3.2227e-02, -5.1941e-02, 2.2110e-02, -1.8753e-02, 8.0643e-03, + -2.7237e-02, 1.2276e-02, 1.0309e-03, -1.6800e-02, 3.3051e-02, + 3.7746e-03, 5.0850e-03, -3.6285e-02, -5.2673e-02, 3.6041e-02, + -1.0266e-01, 8.7929e-04, 2.5345e-02, -1.6159e-02, -6.3992e-04, + 3.1189e-02, -1.1711e-02, 1.3405e-02, 9.0408e-03, -2.3865e-02, + 1.2596e-02, 7.6828e-03, -6.0394e-02, -2.0798e-02, 2.5085e-02, + -5.6953e-03, -1.1284e-02, -2.2385e-02, -5.0415e-02, 5.0018e-02, + 5.1208e-02, 5.1918e-03, 3.9734e-02, 3.3112e-02, 9.2224e-02, + -4.4189e-02, -1.9226e-03, -1.9653e-02, -3.7170e-02, 4.0375e-02, + -8.1253e-03, -3.9558e-03, -7.6294e-02, -5.6061e-02, -6.2012e-02, + 2.2400e-02, 1.1206e-05, 4.1046e-02, 5.8603e-04, -3.8513e-02, + -4.7455e-03, 1.8951e-02, -6.6681e-03, -3.8815e-03, 4.3579e-02, + 7.7148e-02, -1.9348e-02, 1.0681e-02, 1.5076e-02, -4.2725e-02, + 5.6076e-03, -3.1250e-02, -2.6230e-02, -6.9519e-02, -5.3345e-02, + -5.9875e-02, -1.8280e-02, 2.1881e-02, -2.3605e-02, 2.3453e-02, + -3.0563e-02, -4.1122e-03, -4.6417e-02, -2.5146e-02, -2.1683e-02, + 4.3060e-02, 3.7506e-02, 4.7516e-02, 2.2934e-02, -1.1604e-02, + -1.3635e-01, 6.0699e-02, 3.2928e-02, 1.8631e-02, 6.5689e-03, + 1.3412e-02, -4.0649e-02, 4.2175e-02, 3.3140e-05, -1.4381e-02, + 4.6204e-02, 5.7404e-02, -1.4488e-02, -6.7291e-03, 1.2566e-02, + 1.5469e-03, -3.6774e-02, -3.0350e-02, 2.1133e-02, 1.8555e-02, + 6.1417e-03, -1.1955e-02, -6.7871e-02, 1.5640e-02, -6.0638e-02, + -5.2887e-02, 2.1347e-02, 1.8936e-02, -9.1248e-02, 6.2103e-02, + -1.8204e-02, 1.1002e-02, 2.5787e-02, 4.5624e-02, 3.7384e-02, + -4.1466e-03, 6.1035e-02, -2.8343e-03, 1.4069e-02, -1.1894e-02, + -7.2815e-02, -8.3847e-03, -1.7099e-03, 8.9493e-03, 2.1667e-02, + 9.3002e-03, 3.6957e-02, -2.6047e-02, 7.4730e-03, -3.2135e-02, + -7.1777e-02, 5.0116e-04, 2.8610e-06, -1.5457e-02, -5.6915e-02, + 5.3070e-02, 1.1032e-02, 1.0065e-01, 5.8472e-02, -2.7676e-03, + 1.3481e-02, -1.7410e-02, -1.6434e-02, -1.0696e-02, -3.8223e-03, + -3.1616e-02, -2.2751e-02, -5.1239e-02, 1.8570e-02, -7.3914e-02, + -9.0256e-03, 5.8990e-02, 4.5967e-03, 7.6408e-03, 6.4697e-03, + 5.3978e-03, -1.4824e-02, -9.7839e-02, -1.1887e-02, -7.8430e-03, + -1.9012e-02, -4.1901e-02, 1.6632e-02, 6.9397e-02, 2.6230e-02, + 8.9722e-03, -4.3068e-03, 6.9458e-02, 1.2672e-02, 5.3263e-04, + 9.3460e-03, -1.3313e-02, 5.7404e-02, 4.2664e-02, 1.2894e-02, + 7.8430e-02, 1.7822e-02, 1.2802e-02, 6.7505e-02, -1.9592e-02, + -3.7750e-02, 6.7322e-02, 2.6031e-02, -1.2741e-02, 8.3923e-03, + 5.6122e-02, -1.0010e-02, 2.6276e-02, -6.3904e-02, 5.0446e-02, + 2.3346e-02, 2.2293e-02, 9.0866e-03, 3.9825e-02, -2.2980e-02, + -1.8143e-02, -3.0060e-02, 2.7557e-02, 2.1820e-02, 4.9774e-02, + 1.0857e-02, 3.8757e-02, -2.5162e-02, -4.6921e-03, -1.2398e-02, + -1.0277e-02, -9.8328e-02, -1.1002e-02, 1.0178e-02, 2.4597e-02, + -2.8351e-02, -1.9394e-02, 3.0487e-02, -3.8757e-03, -7.9880e-03, + -1.2405e-02, -1.8463e-02, -1.5091e-02, 4.1901e-02, 2.8778e-02, + -1.0979e-02, 9.3384e-03, 3.1494e-02, -3.5339e-02, 3.8204e-03, + 2.4368e-02, -1.1688e-01, -4.3396e-02, 1.4755e-02, 1.2825e-02, + -1.5335e-03, 4.4495e-02, -2.8778e-02, -3.5553e-03, -8.0643e-03, + -1.2749e-02, -5.0240e-03, 2.8473e-02, 3.6163e-02, 4.6005e-03, + 5.8746e-02, -3.0441e-02, -2.9129e-02, 3.5706e-02, 2.1362e-02, + 3.8528e-03, 4.5837e-02, -4.4647e-02, -5.2826e-02, 2.2247e-02, + -2.3148e-02, 4.0466e-02, -3.2379e-02, -4.2969e-02, -8.1360e-02, + 2.4963e-02, 4.4861e-02, -1.9272e-02, -1.4610e-02, 1.7929e-03, + -7.0992e-03, 3.1799e-02, 1.0791e-01, 3.3417e-02, 4.9744e-03, + 4.3732e-02, 3.1586e-02, 1.9821e-02, -4.5013e-02, -4.3640e-02, + 7.3486e-02, -2.6794e-02, -1.0699e-01, 3.5973e-03, -3.3569e-02, + -4.5349e-02, 6.7139e-03, -5.1727e-02, -3.2318e-02, 1.1993e-02, + -1.4305e-03, 2.7252e-02, -5.1636e-02, 2.9587e-02, -1.5289e-02, + 8.7433e-03, 7.8430e-03, 9.8114e-03, 1.2848e-02, -5.4199e-02, + 1.4542e-02, 1.8951e-02, 9.6130e-03, 2.9816e-02, -9.6512e-03, + -1.6708e-02, -2.7252e-02, -6.4148e-02, -6.6895e-02, -2.6230e-02, + 2.5436e-02, -6.1836e-03, -5.2185e-02, -1.8524e-02, -1.3237e-02, + -5.5725e-02, -3.5126e-02, -5.2917e-02, -6.3354e-02, -3.4851e-02, + -1.6068e-02, -2.1957e-02, 5.4092e-03, -5.4207e-03, 1.1559e-02, + 5.0781e-02, -1.1292e-02, 2.5009e-02, 8.1558e-03, 4.3564e-03, + -1.8936e-02, 3.6201e-03, -2.3834e-02, -1.9026e-03, 5.2795e-03, + -4.9174e-05, -3.1952e-02, 1.6068e-02, 7.0923e-02, 3.7048e-02, + -2.2232e-02, 6.5308e-02, -1.3069e-02, -8.0872e-02, -1.5175e-02, + 6.2439e-02, 1.6174e-02, -2.7466e-02, -2.0432e-02, -2.8900e-02, + -3.3073e-03, 7.0862e-02, -2.1133e-02, -7.9880e-03, -5.4413e-02, + -3.5095e-02, 1.4954e-01, -5.7800e-02, 3.8818e-02, 2.6459e-02, + -1.6842e-03, 4.2542e-02, -2.4109e-02, -2.2034e-02, 5.8060e-03, + -2.1992e-03, 4.8561e-03, 6.8779e-03, -3.0319e-02, 3.6865e-02, + 2.5711e-02, -3.7323e-02, -5.0201e-02, -5.8411e-02, -2.3453e-02, + 2.4719e-02, 4.3884e-02, -8.3029e-05, -3.0212e-02, -2.6596e-02, + 3.6194e-02, 5.2368e-02, -1.2566e-02, -1.8072e-03, -4.1412e-02, + 1.4153e-02, 4.2206e-02, -4.5441e-02, 1.7147e-03, 1.3176e-02, + -2.5970e-02, -7.2336e-04, -3.8208e-02, 4.0283e-02, 7.5188e-03, + -9.6054e-03, 4.3526e-03, -9.2087e-03, 4.0550e-03, 3.4302e-02, + -1.6312e-02, 1.7105e-02, 2.2003e-02, -1.6708e-02, -6.7749e-02, + -8.1940e-03, -3.3478e-02, 1.3794e-02, -2.0035e-02, -3.3875e-02, + 1.3062e-02, -3.4912e-02, 1.0956e-02, -2.8534e-03, -3.4210e-02, + -2.6230e-02, -2.7374e-02, -3.8452e-02, -3.9124e-02, 3.7598e-02, + 2.9434e-02, -6.7993e-02, 2.3544e-02, 2.5116e-02, -6.5269e-03, + -2.5787e-02, -1.6769e-02, -4.9164e-02, -3.3234e-02, 1.2070e-02, + 9.2224e-02, -7.6599e-02, 1.3876e-03, 2.7161e-02, 5.3253e-03, + 5.2765e-02, 7.7171e-03, 4.9408e-02, 2.0340e-02, -4.4403e-02, + 6.4430e-03, 9.6924e-02, -3.0487e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.5612, 1.6291, 1.5122, 1.5747, 1.6225, 1.5788, 1.5892, 1.5757, 1.6082, + 1.5988, 1.6139, 1.5868, 1.5801, 1.6259, 1.5611, 1.5231, 1.6739, 1.6669, + 1.5990, 0.4274, 1.5622, 1.6011, 1.5774, 1.5165, 1.6530, 1.5793, 1.5959, + 1.5585, 1.6473, 1.4690, 1.5691, 1.5691, 1.6401, 1.5813, 1.6636, 1.5143, + 1.6102, 1.6316, 1.6255, 1.5060, 1.5319, 1.6488, 1.6245, 1.6413, 1.6142, + 1.5674, 1.5211, 1.6072, 1.6012, 1.5171, 1.6199, 1.5440, 1.5075, 1.5305, + 1.5490, 1.6551, 1.5008, 1.6083, 1.6627, 1.5574, 1.5938, 1.5662, 1.5658, + 1.5936, 1.5152, 1.6172, 1.4900, 1.5247, 1.6271, 1.5863, 1.5609, 1.7847, + 2.1021, 1.6023, 1.6314, 1.5624, 1.6506, 1.4861, 1.5519, 1.6230, 1.5676, + 1.5654, 1.5644, 1.6012, 1.5914, 1.5827, 1.5950, 1.5313, 1.6519, 1.6020, + 1.5715, 1.5640, 1.5664, 1.6471, 1.5941, 1.5665, 1.6015, 1.5576, 1.5990, + 1.6531, 1.5997, 1.5374, 1.5525, 1.6357, 1.5070, 1.6177, 1.5243, 1.5483, + 1.6100, 1.6093, 1.5234, 1.6186, 1.6167, 1.6678, 1.6350, 1.5438, 1.5267, + 1.5384, 1.6124, 1.5812, 1.6419, 1.5493, 1.5972, 1.5512, 1.6389, 1.6147, + 1.4907, 1.5437, 1.5886, 1.5572, 1.5550, 1.4767, 1.5170, 1.5503, 1.5716, + 1.5880, 1.6357, 1.5919, 1.6067, 1.5730, 1.5858, 1.5905, 1.6521, 1.5865, + 1.5048, 1.5617, 1.5851, 1.6758, 1.5167, 1.5563, 1.5114, 1.5789, 1.5416, + 1.6574, 1.5917, 1.3285, 1.6240, 1.6606, 1.6460, 1.6168, 1.5033, 1.5473, + 1.6333, 1.5656, 1.5466, 1.5838, 1.5561, 1.5466, 1.5480, 1.6122, 1.5619, + 1.5732, 1.5800, 1.5441, 1.6047, 1.6982, 1.6320, 1.6484, 1.6278, 1.6129, + 1.6167, 1.5956, 1.6070, 1.5811, 1.6763, 1.5567, 1.6143, 2.1741, 1.6671, + 1.5965, 1.5974, 1.5375, 1.6123, 1.5943, 1.5073, 1.5639, 1.5723, 1.6553, + 3.4704, 1.5658, 1.5836, 1.6005, 1.6608, 1.6609, 1.6136, 1.5075, 1.5065, + 1.5464, 1.4956, 1.5471, 1.5916, 1.5797, 1.5640, 1.4862, 1.5662, 1.5947, + 1.5522, 1.5988, 1.5318, 1.6886, 1.4830, 1.5993, 1.5229, 1.5385, 1.5668, + 1.5360, 1.5791, 1.6482, 1.6321, 1.7258, 1.6303, 1.6478, 1.7792, 1.5524, + 1.5670, 1.6981, 1.5449, 1.5758, 1.5182, 1.6659, 1.6315, 1.5490, 1.6675, + 1.6144, 1.6283, 1.5757, 1.5882, 1.5369, 1.5254, 1.7189, 1.6006, 1.6993, + 1.5655, 1.5378, 1.6526, 1.6030, 1.5295, 1.5335, 1.5831, 1.5121, 1.6575, + 1.5789, 1.5940, 1.5068, 1.4599, 1.5887, 1.5618, 1.5181, 1.7093, 1.4992, + 1.6316, 1.5302, 1.5932, 1.5249, 1.6118, 1.5605, 1.4670, 1.5210, 1.6202, + 1.6143, 1.5616, 1.7132, 1.6734, 1.6960, 1.6497, 1.5144, 1.5368, 1.5412, + 1.5550, 1.5144, 1.5971, 1.6508, 1.6773, 1.5563, 1.6012, 1.5162, 1.5163, + 1.6103, 1.6071, 1.6437, 1.5799, 1.5341, 1.6273, 1.6276, 1.6143, 1.6015, + 1.5359, 1.5915, 1.5081, 1.5776, 1.5932, 1.6629, 1.6389, 1.6354, 1.6139, + 1.5766, 1.5325, 1.5796, 1.5362, 1.5868, 1.5815, 1.5430, 1.6207, 1.6325, + 1.0084, 1.3852, 1.6033, 1.6354, 1.6064, 1.6104, 1.5252, 1.6160, 1.6036, + 1.5788, 1.5968, 1.5653, 1.5560, 1.4895, 1.5731, 1.5991, 1.6102, 1.5368, + 1.6023, 1.5326, 1.6283, 1.6053, 1.5736, 1.6930, 1.5774, 1.5697, 1.6059, + 1.6048, 1.5693, 1.6887, 1.5748, 1.6362, 1.5797, 1.5532, 1.5407, 1.6113, + 1.5052, 1.6081, 1.6208, 1.5894, 1.5822, 1.5563, 1.6003, 1.5935, 1.6142, + 1.5649, 1.5751, 1.6461, 1.5523, 1.5837, 1.5288, 1.5320, 1.5913, 1.5801, + 1.4600, 1.5917, 1.6242, 1.6360, 1.4584, 1.6947, 1.5531, 1.5535, 1.6059, + 1.5470, 1.6079, 1.5324, 1.6425, 1.4971, 1.5478, 1.5758, 1.6043, 1.6234, + 1.4722, 1.6315, 1.6825, 1.6424, 1.5320, 1.6130, 1.6026, 1.5700, 1.5041, + 1.5321, 1.5527, 1.5418, 1.6419, 1.4374, 1.6255, 1.5188, 1.5278, 1.5432, + 1.4489, 2.3006, 1.5959, 1.7023, 1.5785, 1.6375, 1.5754, 1.5096, 1.6006, + 1.6337, 1.5149, 1.5128, 1.5400, 1.5957, 1.5554, 1.5141, 1.6146, 1.4660, + 1.6387, 1.6256, 1.5380, 1.6197, 1.5558, 1.4969, 1.5637, 1.5350, 1.5603, + 1.5597, 1.5843, 1.3633, 1.5204, 1.6087, 1.5517, 1.5795, 1.6358, 1.5697, + 1.5797, 1.5676, 1.6028, 1.5955, 1.5415, 1.6213, 1.4824, 1.5580, 1.5154, + 1.6320, 1.5831, 1.5853, 1.5625, 1.5538, 1.5902, 1.6137, 1.5797, 1.5854, + 1.6301, 1.5884, 1.6639, 1.6174, 1.6834, 1.5654, 1.6103, 1.5832, 1.6769, + 1.6275, 1.5682, 1.5536, 1.5728, 1.5328, 1.6245, 1.3324, 1.6080, 1.6106, + 1.5931, 1.5965, 1.6213, 1.5882, 1.6126, 1.6217, 1.5003, 1.5451, 1.5859, + 1.6413, 1.5879, 1.4787, 1.5228, 1.6128, 1.6167, 1.6126, 1.4987, 1.6039, + 1.4994, 1.5541, 1.4806, 1.6224, 1.5732, 1.5347, 1.5990, 1.6350, 1.6045, + 1.5526, 1.5640, 1.6160, 1.6265, 1.5525, 1.5405, 1.5849, 1.5789, 1.5462, + 1.6115, 1.5663, 1.5264, 1.5779, 1.5052, 1.5357, 1.6098, 1.5319, 1.6803, + 1.6384, 1.6489, 1.5756, 1.6014, 1.5709, 1.6672, 1.5551, 1.5675, 1.7146, + 1.6731, 1.6189, 1.5798, 1.5877, 1.6571, 1.5571, 1.6023, 1.5781, 1.6524, + 1.6087, 1.5548, 1.5786, 1.6094, 1.6381, 1.6036, 1.5332, 1.5510, 1.6183, + 1.6424, 1.5860, 1.5442, 1.5265, 1.6016, 1.5861, 1.6329, 1.6371, 1.6098, + 1.5439, 1.6541, 1.6189, 1.5390, 1.6603, 1.6226, 1.5107, 1.6136, 1.6319, + 1.6133, 1.6171, 1.5596, 1.5044, 1.5676, 1.6682, 1.6274, 1.5624, 1.6302, + 1.6082, 1.6882, 1.5975, 1.5947, 1.6779, 1.5172, 1.8169, 1.5628, 1.5574, + 1.5296, 1.6388, 1.6386, 1.5509, 1.5011, 1.5941, 1.5359, 1.6311, 1.6930, + 1.5646, 1.5703, 1.7156, 1.7039, 1.7237, 1.6125, 1.6781, 1.6184, 1.6499, + 1.5913, 1.5678, 1.5397, 1.6042, 1.5798, 1.4778, 1.6637, 1.6021, 1.5434, + 1.5934, 1.6021, 1.6542, 1.5897, 1.5038, 1.6211, 1.5139, 1.4714, 1.5686, + 1.6549, 1.6761, 1.5084, 1.5039, 1.6728, 1.5570, 1.5322, 1.6111, 1.5362, + 1.5236, 1.5444, 1.6373, 1.6728, 1.5343, 1.4118, 1.6080, 1.6841, 1.5738, + 1.6541, 1.5087, 1.6481, 1.5934, 1.6207, 1.5490, 1.6071, 1.6627, 1.6416, + 1.6076, 1.6141, 1.5574, 1.6018, 1.5730, 1.6248, 1.5821, 1.5914, 1.5954, + 1.7080, 1.5800, 1.5834, 1.6229, 1.5816, 1.5754, 1.5840, 1.3503, 1.5136, + 1.5413, 1.6312, 1.6352, 1.5054, 1.5827, 1.5213, 0.3553, 1.6140, 1.5190, + 1.5867, 1.5369, 1.5415, 1.6162, 1.6345, 1.5307, 1.6651, 1.5010, 1.6128, + 1.5278, 1.5920, 1.5121, 1.5923, 1.5087, 1.5450, 1.5725, 1.5622, 1.5693, + 1.5568, 1.5951, 1.5524, 1.5817, 1.5403, 1.5696, 1.5777, 1.5988, 1.5369, + 1.6626, 1.5796, 1.6014, 1.5697, 1.5798, 1.4994, 1.5147, 1.5696, 1.5165, + 1.5875, 1.6455, 1.6432, 1.6248, 1.6029, 1.5540, 1.3902, 1.5868, 1.6621, + 1.6429, 1.5988, 1.6661, 1.5714, 1.5929, 1.5571, 1.6623, 1.5666, 1.5757, + 1.5621, 1.5998, 1.5808, 1.5551, 1.6553, 1.5133, 1.6109, 1.5503, 1.5683, + 1.5732, 1.6376, 1.5990, 1.5096, 1.5476, 1.5207, 1.7330, 1.6614, 1.5702, + 1.5172, 1.5875, 1.6315, 1.5304, 1.6697, 1.5546, 1.5182, 1.5630, 1.5935, + 1.6117, 1.4515, 1.5856], device='cuda:1', requires_grad=True)Parameter containing: +tensor([ 1.0403e-01, 9.7060e-02, -1.5602e-01, -8.2800e-03, 1.0229e-01, + 1.4111e-01, 1.0306e-01, 5.4273e-02, -1.6648e-01, -8.7717e-03, + 6.2813e-02, -2.6458e-02, -9.6767e-02, -2.1291e-02, -7.9400e-02, + -5.0567e-02, -3.2953e-02, -2.4269e-02, -5.1724e-03, 4.8843e+00, + -8.4254e-02, 5.4768e-02, -7.4972e-03, -2.4967e-02, -9.5140e-04, + 1.5371e-02, 7.1181e-03, -8.3969e-03, 2.2335e-02, -8.5531e-02, + 3.7403e-02, 2.8807e-02, 1.7188e-02, 1.0637e-01, 9.4134e-02, + -8.2811e-02, 4.2986e-03, -5.9408e-02, -8.1549e-02, 4.5918e-02, + 2.3936e-02, 3.2823e-02, 8.4698e-03, 9.2395e-03, -4.5630e-02, + 1.6469e-02, 3.6515e-02, -3.3968e-02, 5.7709e-02, 5.4930e-02, + 6.1122e-02, -6.1926e-06, 4.1280e-02, 3.4293e-02, 9.1601e-02, + 1.3812e-01, -7.9320e-02, -7.3999e-02, 5.8668e-02, 4.7881e-03, + 9.5586e-02, -7.9776e-02, 2.8651e-02, 6.1912e-03, 1.1924e-01, + -4.3352e-03, 1.2326e-01, 1.0645e-01, -4.3967e-03, 1.8691e-01, + 2.4468e-02, 2.8348e-02, 5.9777e-01, 7.2056e-02, -1.2830e-01, + 8.9767e-02, 4.4243e-02, -1.1128e-01, -8.0267e-02, -1.6716e-02, + -1.8243e-02, -5.4433e-02, -1.3675e-02, 1.2009e-01, 2.2563e-02, + 3.1082e-02, 1.3952e-01, 1.2806e-01, -2.9679e-02, -2.6804e-02, + 8.4288e-03, 2.6239e-02, 4.8801e-02, -5.7234e-02, -3.9436e-02, + 4.0219e-02, -2.0147e-02, -3.7820e-03, -9.8342e-02, -4.0746e-02, + 1.4139e-02, -5.4825e-02, 3.9232e-02, -9.4168e-02, 2.0618e-02, + -1.3461e-02, 3.9536e-02, -6.7572e-02, 2.6576e-02, -7.1447e-02, + -7.3643e-02, 1.1431e-03, 4.0579e-02, 5.8312e-02, 8.5941e-02, + -5.1140e-02, -1.5227e-02, -9.3090e-02, 1.2289e-02, 9.8821e-02, + -4.3573e-02, -2.1652e-01, -8.5814e-02, -3.8511e-02, -1.6028e-01, + 5.6119e-03, -1.4571e-01, 3.2770e-02, 5.6601e-02, -3.9532e-02, + -8.4895e-02, 6.0803e-02, -5.5046e-02, -2.6444e-02, -5.8353e-02, + 4.9541e-02, -2.7995e-02, -1.8478e-02, -3.4308e-02, -3.5461e-02, + 2.1209e-02, -4.9318e-03, 5.6645e-02, 3.7480e-02, 6.0327e-02, + -6.5690e-04, -2.0427e-02, -2.3503e-02, 1.8327e-04, 1.1751e-01, + 1.9287e-01, -7.5346e-02, -1.1450e-01, 1.6207e-02, -3.5160e-02, + -3.7534e-01, -1.0929e-02, -3.1112e-02, 3.6410e-02, 1.0848e-02, + 1.6411e-01, -1.3910e-01, -4.8034e-02, 2.0327e-02, 1.7898e-02, + -1.4329e-02, -6.9380e-02, 4.4215e-02, -6.9706e-02, 2.2333e-02, + -4.2725e-02, 7.9795e-02, 4.6975e-02, 4.7850e-02, 4.6778e-02, + -1.3970e-01, -1.4567e-03, 5.0245e-02, -1.2355e-02, 4.1464e-02, + -1.9858e-02, -6.7841e-02, 2.9885e-02, -2.5422e-02, 3.9081e-02, + -1.5655e-01, 3.9403e-02, 2.2546e-01, -2.9838e-02, -8.0923e-02, + 3.2238e-02, -3.3750e-02, 1.0578e-01, 8.4942e-02, -1.0134e-01, + 1.0470e-02, -1.6958e-02, -1.5581e-01, 8.7543e-02, 4.6490e-02, + -1.8868e-01, -2.1945e-02, -6.2705e-02, 5.4263e-02, 5.5378e-02, + 1.5345e-01, -8.8809e-02, 1.1056e-01, 1.3004e-01, 3.1240e-03, + 4.8471e-02, 2.2921e-01, -1.2579e-02, -3.3532e-02, -9.3535e-02, + -1.1855e-02, -5.6449e-02, 4.5213e-02, -1.4483e-01, -1.9879e-01, + -1.6979e-02, -7.0818e-02, 8.4117e-02, -1.6528e-02, 2.0236e-02, + -3.8914e-02, 6.5753e-02, -4.7799e-02, 1.2737e-01, -2.6316e-02, + -1.8444e-02, 3.9019e-02, -5.2672e-03, 5.9503e-02, -2.2726e-02, + 3.4543e-02, -1.0741e-01, 3.9277e-02, 6.2676e-02, 2.2335e-02, + 4.1903e-02, 1.1149e-01, 4.2704e-02, 5.2054e-02, 4.1758e-02, + 5.8227e-02, 8.3051e-02, -7.6821e-02, 6.0740e-03, -1.2686e-02, + 1.6613e-02, 2.4717e-02, 1.0240e-01, -1.2099e-01, 1.0826e-01, + -3.0802e-02, -8.7470e-02, -1.0857e-01, -1.1902e-04, 2.7459e-02, + -5.9645e-02, -7.6922e-02, -5.7398e-02, -5.9345e-02, -7.4553e-02, + 4.1273e-02, -7.1765e-02, -1.5432e-02, -9.4385e-02, 1.0805e-02, + -4.0354e-02, 7.1086e-02, 1.0681e-01, -7.6471e-02, 8.4077e-03, + 6.2677e-02, -9.7095e-02, 3.0733e-02, 8.5449e-02, -1.1908e-01, + 1.7565e-01, 1.5352e-01, 8.9398e-03, 4.5373e-02, 3.3377e-02, + 5.5408e-02, 4.7944e-02, -7.5976e-02, 8.8923e-02, -1.3306e-01, + -7.5426e-03, -1.8189e-02, 1.3748e-01, 4.3790e-02, 4.7611e-02, + 1.8740e-02, 1.8154e-01, 5.4250e-02, 8.3740e-02, 7.2689e-02, + 6.7214e-02, 5.0011e-03, -6.1465e-02, 1.2461e-01, 2.1885e-02, + 2.4945e-03, 1.0912e-01, -8.7420e-02, 5.2805e-02, -6.0513e-02, + 7.2861e-02, 3.0458e-02, -9.6075e-02, -1.2647e-01, 2.7966e-02, + -6.5805e-02, 1.0081e-02, 4.9374e-02, 1.5660e-01, -4.2903e-02, + -7.9792e-02, 2.2622e-01, -6.6623e-02, -5.4678e-02, 3.2016e-01, + -2.0631e-01, 3.6732e-02, -3.8266e-03, 3.5298e-02, -2.0218e-01, + 1.5783e-01, -3.6070e-02, 1.1865e-01, -3.4082e-02, 2.0930e-02, + 8.3586e-02, 1.5725e-02, -3.8440e-02, 6.5960e-03, -3.2614e-02, + -1.2969e-01, 2.5196e-02, -7.0515e-02, -2.4179e-02, 1.0546e-01, + -4.5103e-03, 1.0148e-01, 9.7182e-02, 4.2287e-02, -1.2103e-02, + -6.1044e-02, 5.8164e-02, -1.3052e-02, 1.2791e-01, -1.5350e-02, + -4.3890e-02, -9.8166e-02, 4.9865e-02, -6.0790e-03, -1.1680e-01, + 7.4108e-03, -9.9132e-03, 5.0176e-03, -5.9086e-02, -1.6564e-02, + -2.7472e-02, 2.0742e-02, 1.3547e-01, 1.4747e-02, 3.9906e-02, + 9.4255e-02, -3.2829e-02, -6.6845e-02, -8.2401e-03, -5.0691e-02, + 2.7525e-02, 8.2268e-02, 1.1415e-01, -2.1360e-02, 9.0743e-03, + -1.2780e-01, 1.0709e-01, 3.6027e-02, -3.8634e-02, -3.7508e-02, + -3.5288e-02, -2.5998e-02, -6.0742e-02, -7.8862e-02, -1.0022e-01, + 3.9805e-02, -3.6270e-02, -5.7538e-02, -2.7233e-02, 4.4938e-02, + 2.6449e-03, 9.3045e-03, 3.3165e-02, -7.1608e-03, 6.8827e-02, + 1.0516e-01, -3.3185e-02, 8.7285e-02, 3.3311e-02, -1.1761e-01, + -1.3953e-01, -7.4792e-02, 3.7520e-02, 1.5746e-02, 9.0114e-04, + -8.0773e-02, 4.4950e-02, 6.3050e-02, -8.5671e-02, -6.2881e-03, + -1.5994e+00, -4.3553e-02, 2.9454e-02, -5.6907e-02, -3.5270e-02, + -3.1434e-02, 1.7889e-02, -7.5432e-03, -1.1371e-01, -5.5130e-02, + 2.9257e-02, -1.7139e-01, -1.1265e-01, 6.7160e-02, -3.7416e-02, + -7.2524e-02, -3.5468e-02, -1.1186e-03, 6.8092e-02, 1.1672e-01, + -1.4002e-04, 3.5592e-02, 5.8436e-02, 1.3232e-01, -7.7206e-02, + -4.7924e-02, -8.6854e-02, -4.7352e-02, 8.5665e-02, 1.4172e-01, + -1.4817e-01, -1.8879e-02, -7.7543e-02, 1.4620e-01, 1.0426e-02, + 5.8315e-03, -6.9976e-03, -1.2946e-01, 7.7025e-02, 3.7671e-02, + 1.4007e-02, 4.8044e-02, 1.4411e-02, -5.2605e-02, -1.2956e-01, + 1.0883e-02, 3.6544e-02, -1.0974e-01, 4.1607e-02, -4.8047e-03, + -1.9821e-03, 3.7107e-02, -7.3962e-02, 7.7033e-02, 3.0835e-02, + -1.0230e-02, -1.2997e-01, -9.3121e-02, 2.8200e-02, -3.7071e-02, + 6.1153e-02, 1.6810e-01, 8.3451e-02, -7.2332e-03, 1.0225e-01, + 1.1711e-01, -1.2504e-01, 1.0692e-02, 1.7643e-01, -9.4140e-02, + -1.7660e-02, -3.5525e-02, 1.3885e-01, 9.2340e-02, -1.6297e-02, + 2.5335e-02, 2.0930e-02, -3.7296e-02, -2.7389e-02, -1.2104e-01, + 6.3596e-02, -5.1928e-02, 9.5645e-02, -7.1889e-02, 3.9743e-03, + 3.1035e-02, -7.1511e-02, 6.0946e-02, -7.1944e-02, -4.7700e-02, + 6.4958e-02, 1.0425e-01, 7.0645e-02, 2.9044e-02, 1.3859e-02, + -8.4643e-02, -2.9044e-02, -8.2613e-02, 5.3671e-02, -4.7501e-02, + -4.3595e-02, 1.8381e-02, 1.6899e-01, 1.1637e-01, -1.2471e-01, + -6.4090e-02, -2.6507e-02, 7.6388e-02, -7.0416e-02, -2.9882e-02, + -1.3445e-02, 4.8258e-03, 3.6539e-03, 3.5404e-02, -8.2439e-02, + 2.2725e-02, 2.4114e-03, -1.5794e-02, -8.3513e-02, -3.4496e-02, + 1.8298e-02, -2.2342e-02, 1.4748e-04, -5.4580e-02, -1.0311e-01, + -2.7600e-02, 4.6100e-02, 1.7093e-02, 1.9861e-02, 2.2585e-02, + 1.4885e-02, 1.0653e-01, 1.1443e-02, 8.4617e-02, 1.5395e-02, + 3.1660e-02, 3.9251e-03, -9.7112e-02, 3.9738e-02, -2.0323e-03, + -1.5839e-02, -1.2260e-02, -8.4579e-02, -2.8312e-03, 1.5448e-01, + -6.6625e-02, 1.7054e-01, 1.5554e-02, 4.5368e-02, 1.5215e-01, + 5.1925e-02, 8.8155e-03, 9.9351e-03, 6.4983e-02, 4.1809e-02, + 1.0706e-01, -1.1904e-02, -7.9306e-02, -1.8990e-02, 8.2367e-02, + 3.7968e-02, 3.2559e-02, 3.2740e-02, 7.0236e-02, -3.4334e-02, + -1.2870e-01, -1.7692e-02, 6.6359e-02, 1.2667e-01, 6.2629e-02, + 2.6319e-02, 6.9796e-02, -7.5105e-02, 9.2368e-02, 4.5839e-02, + -9.4532e-03, 7.9225e-02, 1.2312e-01, -4.0744e-02, 3.0323e-02, + -3.8575e-02, 6.1851e-02, -4.2298e-03, 1.4596e-03, 2.9492e-02, + -6.8381e-02, -1.5145e-01, 1.4044e-02, 3.8219e-03, 2.7525e-02, + -5.9736e-02, 8.6931e-02, -5.9665e-02, 6.6026e-02, -4.7780e-02, + 8.0811e-02, 9.5365e-02, -4.2623e-02, 3.3717e-03, -1.6398e-02, + -5.8409e-02, 5.1490e-02, 1.0267e-01, 7.1453e-03, -7.2832e-02, + 1.0664e-01, 1.3928e-02, 3.6142e-02, 4.5365e-02, 3.0590e-02, + -4.0905e-02, 3.1009e-03, 4.9965e-02, 6.2574e-02, -3.7965e-02, + 1.5737e-01, -2.2978e-03, 1.3913e-01, 1.9018e-01, 4.1395e-02, + 1.9691e-02, -5.2322e-02, 3.8990e-02, 9.0262e-02, -4.5511e-02, + 1.7857e-02, -6.1070e-03, 7.9881e-02, -6.1300e-02, 1.5891e-02, + -9.1146e-03, -1.0800e-02, 1.1645e-01, 4.1218e-02, 1.4686e-01, + 1.4334e-01, 3.2606e-02, -4.7522e-02, 9.4717e-02, -8.2010e-02, + 7.2789e-02, -1.5124e-01, -4.3470e-04, 5.9777e-02, 1.2152e-01, + -4.7845e-02, 2.5638e-02, -9.8613e-02, -2.8399e-03, 1.3092e-01, + 5.0667e-03, -1.4642e-01, -1.3147e-01, 6.6508e-02, 4.7440e-02, + -2.2257e-01, 3.7247e-02, 4.4363e-03, 3.6616e-02, -1.6164e-02, + 2.2315e-02, -7.9318e-03, 2.3029e-03, 7.1865e-03, 8.5034e-02, + -1.1355e-02, -4.4492e+00, -4.4573e-02, 2.7494e-02, -3.2405e-02, + -1.0286e-01, -5.5482e-02, -3.4495e-02, -5.3544e-02, 1.8831e-02, + -7.9519e-02, -3.4958e-02, -6.5070e-03, -2.7990e-02, -5.7086e-02, + 7.8146e-02, -9.5072e-03, -8.0300e-02, 4.5083e-02, -7.5206e-02, + 1.1142e-01, 5.7118e-02, -1.1984e-02, 9.7471e-03, -1.5284e-01, + -3.3771e-02, 1.0841e-01, 2.3453e-02, -1.4740e-02, 6.4261e-02, + -5.0851e-02, 1.1362e-01, 7.0055e-02, 4.6831e-02, 3.7477e-02, + -1.3111e-02, -9.9276e-03, 6.9019e-03, -7.9165e-02, -3.6014e-03, + 1.1528e-02, 1.4484e-02, 3.9553e-02, -1.0392e-01, 9.0540e-02, + -2.3436e-02, -2.2548e-01, 1.1270e-01, 5.3056e-02, 9.3794e-02, + 1.0745e-01, 3.8244e-02, 6.4040e-02, 1.0307e-01, -9.8231e-03, + 3.1430e-03, 2.4322e-02, -2.4742e-02, 2.0988e-02, -5.0275e-02, + 1.0009e-01, -1.2285e-01, -8.5569e-02, 7.0198e-02, -6.9018e-02, + -3.8501e-02, -1.6234e-01, -2.3968e-02, 4.3340e-02, -4.9714e-02, + -2.6754e-02, -1.0560e-03, -1.0699e-01, -2.9853e-02, 7.2143e-02, + -1.1260e-02, 4.7919e-02, 9.6402e-02, 4.0698e-02, -4.5639e-02, + -2.8596e-02, 4.3692e-02, 7.4926e-02, -6.2533e-02, -8.9408e-02, + -6.3402e-02, 1.1697e-01, 7.2318e-03], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[-0.0048, 0.0208, 0.0301, ..., 0.0115, 0.0131, 0.0097], + [ 0.0114, -0.0127, 0.0137, ..., 0.0169, 0.0023, 0.0218], + [-0.0232, -0.0296, 0.0010, ..., 0.0052, -0.0218, 0.0328], + ..., + [-0.0138, -0.0232, -0.0012, ..., -0.0105, -0.0009, 0.0167], + [-0.0087, 0.0149, -0.0075, ..., 0.0247, -0.0048, -0.0043], + [-0.0146, 0.0123, 0.0197, ..., -0.0132, 0.0005, -0.0090]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.2957, -0.3293, -0.3479, ..., -0.1686, -0.4126, -0.3352], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-2.0264e-02, 2.8854e-02, 4.7760e-03, ..., -4.8904e-03, + -3.3169e-03, -2.8595e-02], + [ 7.9193e-03, -1.0986e-02, 8.4000e-03, ..., 2.7145e-02, + 3.1189e-02, -2.6474e-02], + [ 1.8585e-02, -1.3618e-02, -1.1322e-02, ..., 6.1989e-03, + -1.4870e-02, -5.5194e-05], + ..., + [ 1.0979e-02, 2.5269e-02, -1.1635e-03, ..., 2.2926e-03, + 2.9037e-02, -2.4094e-02], + [ 1.6174e-02, 2.0721e-02, 5.5618e-03, ..., -1.0529e-03, + 6.1226e-03, 1.5610e-02], + [-1.6403e-02, 1.9646e-03, -7.2136e-03, ..., -3.4119e-02, + -3.3054e-03, -1.8219e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([ 7.6416e-02, 3.9429e-02, -1.3733e-01, 5.8136e-02, 3.9581e-02, + 7.4158e-02, 5.7098e-02, -3.6793e-03, -6.1531e-03, 1.2093e-02, + 2.0386e-02, -3.7659e-02, -1.5125e-01, -8.3130e-02, 2.2827e-02, + -1.5557e-05, -9.2102e-02, -7.6752e-03, -2.1561e-02, -1.9568e-01, + 3.1555e-02, 3.9185e-02, 7.4387e-04, -3.9825e-02, 5.6152e-02, + 1.3351e-03, 3.2654e-02, -4.4489e-04, -8.2458e-02, 4.4403e-02, + -2.3438e-02, 8.2947e-02, 2.9175e-02, -5.3329e-03, 8.5876e-02, + -1.5121e-02, -6.7406e-03, 7.0435e-02, 2.0416e-02, 6.4636e-02, + 2.3041e-02, -1.1034e-03, -2.3956e-02, -1.0437e-02, -5.2147e-03, + -7.6866e-03, -8.5815e-02, 6.6956e-02, 6.4209e-02, -1.0811e-02, + 4.2648e-03, 6.3133e-03, 8.9111e-02, -1.9852e-02, -6.9580e-02, + 8.1543e-02, -6.0883e-02, 6.6795e-03, 7.3303e-02, -1.9196e-02, + -1.4107e-02, -8.1970e-02, 6.2927e-02, -6.9702e-02, 2.7161e-02, + -5.0385e-02, 5.2551e-02, 7.9498e-03, 6.5346e-03, -7.0496e-02, + 8.2886e-02, 6.8848e-02, 1.0809e-01, 1.0033e-02, -5.7526e-02, + 1.2383e-02, 5.5084e-02, -5.0774e-03, 1.1398e-02, 5.9845e-02, + 1.1055e-02, -3.6377e-02, -7.8369e-02, 2.3499e-02, 3.6896e-02, + -3.3051e-02, 3.8025e-02, -1.5640e-02, 5.0903e-02, 4.3091e-02, + -7.7972e-03, 7.2571e-02, -2.4017e-02, -9.0210e-02, -6.1584e-02, + 2.7695e-02, 4.2542e-02, -1.4331e-01, -3.0945e-02, 7.7515e-02, + 7.1106e-02, -5.2948e-02, -9.3994e-03, 5.4230e-02, 2.2110e-02, + 1.4473e-02, 4.0009e-02, -3.9558e-03, 2.9907e-02, -2.3315e-02, + -6.4148e-02, 7.2876e-02, -6.2805e-02, 7.8552e-02, 1.1102e-01, + 4.9667e-03, 2.8915e-02, 2.5131e-02, 1.1731e-01, 3.4542e-03, + -4.7058e-02, -6.0699e-02, -5.7709e-02, -2.8259e-02, -6.4392e-02, + -1.6251e-02, -5.6091e-02, -1.2344e-02, -6.3843e-02, -8.3679e-02, + -4.3152e-02, -3.4393e-02, -3.1616e-02, -6.9092e-02, 1.3557e-02, + -1.1124e-02, 3.5828e-02, 2.5650e-02, -4.1473e-02, -3.2440e-02, + 7.1655e-02, 2.5589e-02, -8.5693e-02, -4.2908e-02, 7.2517e-03, + 3.8471e-03, 7.3059e-02, 3.0029e-02, -5.1941e-02, 1.0046e-01, + 1.2018e-01, 6.3629e-03, 8.5220e-03, -1.1635e-02, -1.6992e-01, + 1.0201e-02, -8.0750e-02, 5.5023e-02, 6.4659e-03, 4.5990e-02, + 8.8989e-02, -3.0716e-02, -2.3865e-02, -5.3650e-02, 7.0801e-02, + 2.2049e-02, -7.4158e-02, 1.0071e-02, -8.0261e-03, 2.7328e-02, + -8.1543e-02, 2.7054e-02, -2.3270e-04, 9.7580e-03, 7.5623e-02, + -5.8167e-02, -4.1901e-02, -3.5919e-02, -5.2124e-02, -4.8645e-02, + -1.0242e-01, -1.2451e-01, 1.4313e-02, 2.7180e-03, 6.5002e-02, + -2.1469e-02, -5.6114e-03, 4.1089e-01, -5.4565e-02, -1.5175e-02, + -3.8509e-03, -7.3181e-02, 1.0345e-02, 4.2328e-02, -1.1879e-02, + 1.0785e-01, 6.1569e-03, -2.9236e-02, 1.4844e-01, 2.1896e-03, + -5.9082e-02, -2.0008e-03, 6.3293e-02, 6.3599e-02, 8.5144e-03, + 1.5945e-02, 2.9583e-03, 4.7607e-02, -1.9806e-02, 6.9580e-02, + -7.1594e-02, 1.3557e-02, 3.0716e-02, 2.7649e-02, 2.7283e-02, + 4.7516e-02, -6.5918e-02, 2.7046e-03, 4.8866e-03, -1.2024e-01, + 4.6234e-02, 3.3447e-02, 4.3213e-02, 4.9591e-02, 2.6016e-02, + -8.7952e-02, 1.7502e-02, -3.1067e-02, 2.2278e-02, -1.0490e-03, + 4.4312e-02, 3.8147e-02, 5.5786e-02, 3.4302e-02, -2.3178e-02, + -1.8066e-02, 2.7878e-02, 9.2926e-03, 3.6560e-02, 1.5404e-02, + 4.3365e-02, 8.6792e-02, 3.2898e-02, -5.2414e-03, -4.9744e-03, + 9.2407e-02, 1.4526e-01, -3.5461e-02, 7.4219e-02, 1.7166e-02, + -2.1286e-02, -4.3678e-03, 5.5771e-03, -2.0508e-02, 1.2396e-01, + 2.7802e-02, 9.6283e-03, -4.3518e-02, -4.7211e-02, -3.2940e-03, + -4.8584e-02, 3.1982e-02, 2.0294e-02, -2.6001e-02, -9.3201e-02, + -8.6121e-02, -6.6406e-02, 5.2612e-02, -1.2108e-02, -7.9224e-02, + 5.6824e-02, 1.8967e-02, -1.0948e-03, -9.5764e-02, -7.7200e-04, + 2.7527e-02, 1.9699e-02, 1.5518e-02, -3.7628e-02, -2.3529e-02, + -4.3671e-02, -9.8648e-03, -2.5589e-02, 6.4125e-03, 4.7646e-03, + -1.6754e-02, -1.0658e-02, 5.5298e-02, 5.9692e-02, -1.0590e-01, + -4.5837e-02, 9.1553e-02, 1.7761e-02, 5.0201e-02, 2.9037e-02, + -5.7343e-02, 2.0279e-02, 1.3016e-02, 2.9739e-02, -3.4302e-02, + -4.3518e-02, 9.9915e-02, -1.4793e-02, 5.0720e-02, 2.2079e-02, + -4.4647e-02, 6.1768e-02, -1.2291e-02, 8.8806e-03, -4.4952e-02, + 5.1300e-02, 7.6355e-02, 6.1310e-02, -8.8257e-02, -2.8595e-02, + -6.1890e-02, 2.6749e-02, 7.0496e-02, 3.5400e-02, 1.8219e-02, + -7.3853e-02, 9.9411e-03, 7.5264e-03, 1.2817e-03, 5.3809e-01, + -1.4111e-01, 1.3843e-01, -3.9001e-02, 3.5431e-02, -1.5918e-01, + 5.8899e-02, -1.0211e-01, -2.9861e-02, -7.7896e-03, 5.7098e-02, + -3.1403e-02, 1.8051e-02, 1.2283e-02, -2.8534e-02, -1.5078e-03, + -1.1528e-02, 1.7303e-02, -3.3875e-02, -6.5918e-02, 1.3115e-02, + -3.7567e-02, -3.0090e-02, 8.7524e-02, -8.1940e-03, -5.2063e-02, + 5.2429e-02, 6.9153e-02, -4.5868e-02, -1.0033e-02, -4.6234e-02, + -3.8422e-02, 1.1635e-02, -8.1604e-02, -9.4177e-02, 3.2593e-02, + -9.1019e-03, -8.7952e-02, -6.4636e-02, -5.2399e-02, 5.1392e-02, + 9.8145e-02, -8.0681e-04, 8.5449e-02, 1.3745e-01, 5.7281e-02, + -1.5488e-02, 2.2259e-03, -6.7383e-02, -4.9835e-02, 5.6213e-02, + -2.5291e-03, -4.9515e-03, 6.8359e-03, -1.9875e-03, -5.8228e-02, + 1.4725e-02, 3.4790e-02, 5.2399e-02, 4.0222e-02, -4.0955e-02, + -2.0660e-02, 4.7241e-02, -2.8656e-02, 1.9485e-02, -2.2095e-02, + 5.2094e-02, 4.3823e-02, 3.9856e-02, -1.1200e-02, -9.5337e-02, + 5.9509e-03, -3.6530e-02, 2.0416e-02, 1.5732e-02, -4.2877e-02, + -6.1340e-02, -3.6621e-02, -1.7960e-02, -8.1970e-02, 1.9547e-02, + -2.1179e-02, 3.5706e-02, 1.0452e-02, -2.1118e-02, -2.4033e-02, + 7.1289e-02, 1.2250e-01, 2.2446e-02, -3.7262e-02, -2.0370e-02, + -1.4075e-01, 6.0486e-02, -2.9888e-03, 6.6948e-03, 3.5156e-02, + -3.1921e-02, -3.7415e-02, 5.8136e-02, -8.8684e-02, -7.2693e-02, + 5.1910e-02, 1.5869e-02, -4.8757e-04, 7.6904e-02, -9.7046e-03, + 4.1016e-02, -3.4027e-02, -5.0842e-02, 1.3321e-02, 5.3680e-02, + 3.1414e-03, -4.7455e-02, -2.7618e-02, 2.5101e-02, -1.5723e-01, + 7.8201e-03, -2.0660e-02, 2.5883e-03, -6.2447e-03, 5.5389e-02, + -1.3313e-02, 4.7821e-02, 6.9351e-03, 5.7983e-02, 5.6183e-02, + 1.1887e-02, 5.2910e-03, -3.2379e-02, 5.1392e-02, -7.3975e-02, + -8.5571e-02, -1.4877e-02, -1.5236e-02, -2.1667e-02, -1.7227e-02, + -1.8250e-02, 4.9805e-02, -9.5947e-02, -3.6545e-03, -6.0822e-02, + -9.9731e-02, 5.7709e-02, 9.8343e-03, 5.7373e-02, -3.7537e-02, + 4.8615e-02, -3.5797e-02, 2.6840e-02, 4.7211e-02, -3.5217e-02, + 6.2103e-02, -6.2065e-03, -1.5650e-03, -4.1718e-02, 2.3468e-02, + -1.9951e-03, 2.7786e-02, 3.2043e-02, 1.0689e-02, -9.7351e-02, + -9.5093e-02, 1.6891e-02, 8.6060e-02, 1.4938e-02, -5.2521e-02, + 2.5116e-02, 2.1744e-02, -1.1877e-01, 1.1864e-02, -8.8501e-02, + -1.6541e-02, -1.1487e-01, 1.8127e-02, 3.0273e-02, 9.3994e-02, + -3.4607e-02, -3.0746e-02, 9.1003e-02, 5.6641e-02, -5.7037e-02, + -6.4392e-02, 4.1718e-02, 1.3525e-01, 5.1697e-02, -8.3313e-02, + 4.7760e-03, -5.3406e-02, -3.6774e-02, 1.1792e-01, -2.0370e-02, + -4.1016e-02, 2.3682e-02, 3.7537e-02, -5.7495e-02, -5.1300e-02, + 2.9572e-02, 2.8168e-02, 1.2695e-02, 7.2241e-04, 3.0762e-02, + 7.5760e-03, 5.3284e-02, -7.0068e-02, 2.8854e-02, -5.5328e-02, + -4.3091e-02, -7.0679e-02, 1.8738e-02, -1.3046e-02, 3.7781e-02, + 1.0361e-02, -1.2070e-02, 3.9749e-03, -7.9651e-02, -1.5659e-03, + -4.2450e-02, 3.2806e-02, -4.0283e-02, 1.9882e-02, -3.8605e-02, + 5.2246e-02, 4.4189e-02, -6.0577e-02, -3.6682e-02, -8.7204e-03, + -6.4819e-02, -2.7985e-02, 4.7394e-02, 7.2266e-02, -3.6041e-02, + 1.1871e-02, 2.5116e-02, 5.1300e-02, -3.8666e-02, 6.6467e-02, + 4.7821e-02, -2.4509e-03, -5.8350e-02, 4.3304e-02, 3.1281e-02, + 5.8990e-02, 1.9951e-03, 3.8544e-02, -5.9891e-03, 5.7556e-02, + 1.6617e-02, 3.1319e-03, 7.5134e-02, 6.8420e-02, 4.2236e-02, + 5.5573e-02, -5.0323e-02, -2.2797e-02, -1.2222e-02, -2.8610e-02, + -4.1321e-02, 4.5563e-02, 2.5726e-02, 2.1149e-02, 5.3741e-02, + -4.7455e-02, -2.0660e-02, -7.1106e-02, -4.9629e-03, -2.2308e-02, + -4.8706e-02, 4.1077e-02, -1.4275e-02, -4.3823e-02, 2.3315e-02, + 7.4234e-03, -2.4673e-02, 1.0425e-01, 7.0457e-03, 4.8218e-02, + 7.4120e-03, 8.8425e-03, 3.7384e-02, -2.9678e-02, 6.0349e-03, + 6.0638e-02, -4.3213e-02, -1.9580e-01, -7.4730e-03, -5.1819e-02, + -3.3112e-02, 8.0383e-02, -4.2938e-02, 7.4219e-02, 2.4536e-02, + -1.0063e-02, 9.0485e-03, 1.3123e-03, -1.2062e-02, -5.7907e-03, + 1.0223e-02, -3.5004e-02, 7.4036e-02, -8.8501e-03, -1.6312e-02, + -1.4610e-02, -3.0640e-02, -9.7809e-03, 6.0852e-02, -9.5459e-02, + -2.1076e-03, -8.5022e-02, -6.9153e-02, 1.4397e-02, 6.0516e-02, + -9.5081e-04, -1.5594e-02, -1.4824e-02, 3.0151e-02, -8.6365e-02, + 2.0813e-02, -6.9092e-02, -6.9031e-02, -3.3295e-02, -1.7899e-02, + -1.0413e-01, 8.7357e-04, 8.5327e-02, 1.5732e-02, 3.0212e-02, + 1.4087e-01, 4.0466e-02, 3.7170e-02, 6.4621e-03, -3.9520e-02, + 6.2988e-02, -2.2156e-02, -4.6265e-02, -2.0157e-02, 6.6711e-02, + -5.0385e-02, 1.3031e-02, -2.8976e-02, 7.9285e-02, 8.1604e-02, + -1.5625e-02, -5.6976e-02, -4.4464e-02, -1.1398e-02, -7.6843e-02, + 3.2501e-02, 9.4177e-02, 3.9398e-02, -3.1769e-02, 5.9418e-02, + -7.1167e-02, 7.4646e-02, -8.1787e-02, -4.7699e-02, -2.6825e-02, + -6.9666e-04, 4.6844e-02, -7.4768e-02, 5.8167e-02, -4.3701e-02, + 8.9340e-03, -2.5970e-02, -4.2694e-02, -3.4973e-02, 3.2196e-02, + -5.8258e-02, -2.2598e-02, -6.3782e-03, -7.2754e-02, 3.6743e-02, + 1.5320e-01, -2.5864e-02, -3.7689e-02, -6.9092e-02, -5.9387e-02, + 1.8661e-02, 2.9053e-02, 6.0455e-02, -8.7830e-02, -3.1311e-02, + 1.0277e-02, 6.6345e-02, -4.2969e-02, -3.1624e-03, -4.0009e-02, + 3.0502e-02, -1.7914e-02, -6.5796e-02, -8.0872e-02, -1.6022e-02, + -1.2671e-01, 3.2654e-02, -9.5139e-03, 1.7807e-02, -6.0150e-02, + -6.4430e-03, -8.7204e-03, -8.2855e-03, -5.0446e-02, 7.0915e-03, + -2.0523e-02, -1.1787e-02, 3.8574e-02, -1.5854e-02, 1.0025e-02, + -7.5195e-02, -5.3596e-03, 7.8613e-02, -2.2324e-02, 1.7990e-02, + 1.1328e-01, -3.4546e-02, 1.2367e-02, -5.5634e-02, -6.4758e-02, + -3.3173e-02, -5.9174e-02, -7.6599e-02, 1.9653e-02, 5.2887e-02, + 6.2683e-02, -1.0712e-01, 6.7749e-02, 3.8605e-02, -4.1748e-02, + -3.0762e-02, -2.6627e-02, -7.7881e-02, 2.5978e-03, 4.1199e-03, + 1.3733e-01, -4.8889e-02, -9.5520e-03, 6.9031e-02, 2.8091e-02, + 4.2877e-02, 3.3203e-02, -3.7201e-02, 2.0309e-02, -5.8746e-02, + 1.0063e-02, 8.3252e-02, 1.8173e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([2.1959, 2.2433, 2.1857, 2.1209, 1.6389, 2.1854, 2.1641, 2.2831, 2.2128, + 2.2176, 2.1341, 2.2153, 2.3185, 2.2126, 2.2357, 2.0978, 2.1635, 2.1208, + 2.2639, 1.7605, 2.3400, 2.1941, 2.3848, 2.3280, 2.1389, 2.2421, 2.1610, + 2.2365, 2.1423, 2.0889, 2.1251, 2.1304, 2.1786, 2.2438, 2.0898, 2.1105, + 2.3662, 2.1162, 2.1394, 2.2017, 2.0881, 2.1809, 2.1864, 2.1774, 2.1370, + 2.1468, 2.2602, 2.0467, 2.2073, 2.0081, 2.2256, 2.2054, 2.3503, 2.1690, + 2.3088, 2.0737, 2.3294, 2.0558, 2.1249, 2.2076, 2.1621, 2.1766, 2.1614, + 2.1080, 2.2065, 2.4156, 2.2566, 2.1855, 2.1032, 1.7747, 2.1803, 2.1558, + 2.0032, 2.1216, 2.2635, 2.3478, 2.2386, 2.1590, 2.0873, 2.1088, 2.1520, + 2.2779, 2.1882, 2.1679, 2.1767, 2.2001, 2.2802, 2.0911, 2.1828, 2.0577, + 2.2320, 2.1016, 2.2414, 2.1581, 2.1407, 2.3021, 2.0415, 2.2577, 2.2683, + 2.1252, 2.1751, 2.0928, 2.1778, 2.1687, 2.1598, 1.8509, 2.4226, 2.1822, + 2.1058, 2.1584, 2.1773, 2.2128, 2.1975, 2.3101, 2.3948, 2.1183, 2.0253, + 2.3268, 2.2789, 2.1262, 2.0824, 2.2447, 2.2727, 2.1143, 2.1328, 2.1054, + 2.2809, 2.2893, 2.2209, 2.1829, 2.2278, 2.3562, 2.2678, 2.1217, 2.3154, + 2.1505, 2.1856, 2.1743, 2.0592, 2.4429, 2.1887, 2.1263, 2.2271, 2.0800, + 2.1909, 2.0682, 2.1730, 2.1021, 2.2987, 2.1907, 2.0872, 2.1588, 2.2281, + 2.1060, 2.1932, 1.4218, 2.1816, 2.0950, 2.1298, 2.3864, 2.0477, 2.1148, + 2.2073, 2.1981, 2.1993, 2.1867, 1.9977, 2.2300, 2.2829, 2.0759, 2.2131, + 2.0797, 1.9357, 2.1667, 2.0612, 2.2055, 2.1462, 2.2525, 2.2415, 2.1718, + 2.2981, 2.3206, 2.3284, 2.2534, 2.0522, 1.9514, 2.0436, 1.2547, 2.2027, + 2.0659, 2.1481, 2.2872, 2.2690, 2.1321, 2.2106, 2.1154, 2.2069, 2.1221, + 0.9120, 2.2188, 2.4457, 2.1044, 2.0904, 2.1309, 2.2110, 2.1845, 2.2859, + 2.1890, 2.2604, 2.1990, 2.2026, 2.0569, 2.3205, 1.8578, 2.1635, 2.1395, + 1.9611, 2.1856, 2.2196, 2.1708, 2.2654, 2.2020, 1.6530, 2.4133, 2.1938, + 2.3114, 2.2499, 2.0929, 2.1446, 2.0065, 2.1790, 2.1916, 2.2220, 2.0788, + 2.1493, 2.1848, 2.1861, 2.1895, 2.1188, 2.2206, 2.1182, 2.2558, 2.2083, + 2.1962, 2.0984, 2.1888, 2.1710, 2.3315, 2.1818, 2.1630, 2.1299, 2.1561, + 2.2780, 2.2752, 2.2380, 2.1594, 2.2068, 2.2501, 2.2587, 2.2793, 2.1248, + 2.2332, 2.2974, 2.1941, 2.3257, 2.1813, 2.1643, 2.2622, 2.0120, 2.1354, + 2.1130, 2.2935, 2.2795, 2.3622, 2.0759, 2.2596, 2.0937, 2.1968, 2.1598, + 2.1423, 2.1361, 1.6926, 2.1329, 2.2892, 2.2827, 2.1544, 2.2791, 2.2954, + 2.0467, 2.0488, 2.1520, 2.2570, 2.1547, 2.2220, 2.1348, 2.1048, 2.1245, + 2.1469, 2.0957, 2.2185, 2.0759, 2.3178, 2.1051, 2.2986, 2.2410, 2.0303, + 2.3367, 2.1090, 2.2387, 2.2247, 2.3043, 2.1765, 2.4028, 2.2597, 2.0883, + 2.0746, 2.1657, 2.2288, 2.1221, 2.3232, 2.3526, 2.0159, 2.1545, 2.1041, + 0.3871, 2.3329, 2.2983, 2.0760, 2.4192, 2.1847, 2.1178, 2.2100, 2.2635, + 2.2291, 2.1490, 2.1012, 2.2794, 2.2359, 2.1499, 2.1478, 2.3279, 2.1762, + 2.2106, 2.2876, 2.2022, 2.0238, 2.2397, 2.1261, 2.3905, 2.2223, 1.9225, + 2.2648, 2.2286, 2.0396, 2.1020, 2.1426, 2.1499, 2.3850, 2.1622, 2.2299, + 2.5684, 2.2173, 2.3663, 2.2255, 2.2637, 2.0994, 2.2758, 2.3033, 2.2118, + 2.3846, 2.2690, 2.1289, 2.3554, 2.4116, 2.1124, 2.1070, 2.1095, 2.1002, + 2.2363, 2.3103, 2.1195, 2.1077, 2.1961, 2.2064, 2.1364, 2.1967, 2.2627, + 2.2412, 2.2803, 2.2380, 2.0999, 2.1453, 2.3822, 2.1865, 2.0122, 2.2556, + 2.2398, 2.1578, 2.4047, 2.2957, 2.1867, 2.2028, 2.1285, 2.3118, 2.1787, + 2.0632, 2.1458, 2.1129, 2.2477, 2.2613, 2.1903, 2.1475, 2.2234, 2.2157, + 1.5280, 2.7052, 2.1411, 1.9908, 2.1080, 2.3099, 2.1839, 2.2772, 2.3016, + 2.1400, 2.1563, 2.1544, 2.0065, 2.1619, 2.2343, 2.2508, 2.0020, 2.1819, + 2.2169, 2.2876, 2.2819, 2.0921, 2.2851, 2.1712, 2.1838, 2.4053, 2.2280, + 2.1213, 2.2339, 1.5170, 2.1571, 1.9372, 2.1083, 2.2179, 2.2474, 2.1511, + 2.2367, 2.1640, 2.3784, 2.1733, 2.1151, 2.1422, 2.1430, 2.0686, 2.1205, + 2.1924, 2.2715, 2.2642, 2.2484, 2.1470, 2.1295, 2.2515, 2.1685, 2.0151, + 2.1463, 2.4572, 2.1058, 2.1192, 2.0471, 2.2623, 2.3658, 2.2353, 2.1376, + 2.3325, 2.1941, 2.0276, 2.1756, 2.1455, 2.1653, 1.4040, 2.1441, 2.1498, + 2.0774, 2.1498, 2.0594, 2.1203, 2.2170, 2.1717, 2.1872, 2.2332, 2.0680, + 2.1824, 2.2045, 2.0639, 2.2616, 2.1788, 2.2463, 2.2707, 2.1051, 2.1681, + 2.2768, 2.0634, 2.2021, 2.2291, 2.2101, 2.1755, 2.0786, 2.2107, 2.0552, + 2.0850, 2.3577, 2.2024, 2.3391, 2.2122, 2.1305, 2.1941, 2.3249, 2.1990, + 2.1484, 2.0657, 2.2080, 2.2095, 2.2046, 2.1868, 2.1210, 2.1913, 2.1871, + 2.3558, 2.3104, 2.2753, 2.2643, 2.1893, 2.0487, 2.2330, 2.3197, 2.1989, + 2.0989, 2.2021, 2.3357, 2.2768, 2.2759, 2.0574, 2.2289, 2.1212, 2.1820, + 2.2164, 2.2613, 2.1490, 1.9993, 2.3027, 2.0638, 2.1157, 2.1022, 2.0929, + 2.3347, 2.2287, 2.2191, 1.6776, 2.1588, 2.2015, 2.3349, 2.1413, 2.2229, + 2.2184, 2.1025, 2.1480, 2.2768, 2.1595, 2.1132, 2.0917, 2.2105, 2.4072, + 2.0885, 2.2566, 2.2266, 2.0839, 1.9988, 2.0598, 2.2237, 2.1696, 2.2168, + 2.1860, 2.1769, 2.0475, 2.1407, 2.1101, 2.1082, 2.1283, 2.1669, 2.1678, + 2.2295, 1.9174, 2.0682, 2.2209, 2.1534, 2.1680, 2.2277, 2.1702, 2.1736, + 2.3767, 2.2455, 2.4314, 2.2258, 1.8448, 2.2785, 2.0552, 2.2357, 2.2187, + 2.2307, 2.1839, 2.1638, 2.2256, 2.2257, 2.1354, 2.1825, 2.0499, 2.2662, + 2.0253, 2.2905, 2.1492, 2.2266, 2.2326, 2.1619, 2.1309, 2.1369, 2.2871, + 2.2399, 2.1446, 2.2617, 2.1392, 2.0626, 2.0500, 2.1366, 2.1822, 2.2245, + 2.2088, 2.2096, 2.1357, 2.1353, 2.2215, 1.8183, 2.0847, 2.0899, 2.1745, + 2.1806, 2.3140, 2.2878, 2.2096, 2.2386, 2.2901, 2.2440, 2.0776, 2.1517, + 2.0757, 2.1171, 2.1626, 1.6976, 2.1217, 2.3893, 2.1851, 2.0992, 2.2829, + 2.1739, 2.2640, 2.2493, 2.1478, 2.2046, 2.1931, 2.1903, 1.8220, 2.2475, + 2.0913, 2.1884, 2.0506, 2.2598, 2.2075, 2.3095, 1.9063, 2.1329, 2.1046, + 2.1928, 2.1712, 2.1733, 2.0890, 2.2223, 2.2372, 2.1847, 2.1720, 2.3124, + 2.2107, 2.1327, 2.2606, 2.4070, 2.1014, 2.2041, 2.2022, 2.1145, 2.0395, + 2.1650, 2.2246, 2.2635, 2.2594, 2.3897, 2.0606, 2.0628, 2.2408, 2.2936, + 2.0929, 2.1765, 2.0997, 2.1489, 2.2996, 2.2237, 2.2520, 2.0795, 2.3836, + 2.1249, 2.4757, 2.3715, 2.1985, 2.2140, 2.0363, 1.3627, 2.1650, 2.1868, + 2.1629, 2.1055, 2.1162, 2.1416, 2.2213, 2.2584, 2.2356, 2.1500, 2.0179, + 2.1893, 2.2718, 2.3281, 2.2356, 2.0494, 2.3746, 2.2077, 2.0318, 2.2620, + 2.2605, 2.1761, 2.2573, 2.1130, 2.2154, 2.3167, 2.2101, 2.1554, 2.2810, + 2.1703, 2.1957, 2.3504, 2.1863, 2.0534, 2.2269, 2.2191, 2.1727, 2.1098, + 2.0837, 2.2015, 2.3503], device='cuda:1', requires_grad=True)Parameter containing: +tensor([ 1.3281e-01, 1.0489e+00, -4.3994e-01, -3.5076e-01, -1.5734e-01, + 1.0467e-01, 3.4624e-01, -3.6057e-01, -5.3122e-01, -1.1508e-01, + -5.0123e-01, 2.9999e-01, -7.4618e-01, -5.3507e-01, 4.8069e-01, + -4.2248e-01, 1.7101e-01, 2.3967e-01, -5.6175e-01, -1.5818e+00, + 4.6960e-01, 2.5782e-01, 8.7915e-01, -1.2254e+00, 4.6011e-01, + -2.9357e-01, 3.7613e-01, 4.0989e-01, -5.0915e-01, 5.7232e-01, + 4.9235e-01, 4.1011e-01, -4.4866e-02, -1.1593e-01, -2.0305e-01, + 3.1680e-02, -5.8174e-01, -1.0083e-01, 1.1103e+00, 7.3384e-01, + -3.7280e-01, 5.5970e-02, 4.7164e-01, 4.6526e-01, -5.5908e-01, + -3.5303e-01, -3.3388e-01, -4.7566e-01, 4.4903e-01, 1.1533e-01, + -6.4007e-01, -3.7924e-01, 7.8134e-01, 6.7420e-02, -5.9359e-01, + 1.2319e+00, 8.8111e-01, -1.8171e-01, -1.3960e-01, -3.9758e-02, + 1.7319e-01, 4.6109e-01, -1.0275e-01, -4.8635e-01, 2.2356e-01, + 1.1343e+00, 6.4209e-01, -7.2440e-01, -3.6563e-01, -1.5383e+00, + 3.5883e-02, -1.2616e-01, 4.5378e-01, -6.5210e-01, -5.3505e-01, + -9.3343e-01, 6.5818e-01, 4.9906e-01, -2.6536e-01, 2.5868e-01, + 9.8699e-02, 6.8153e-01, -3.1711e-01, 6.5012e-01, -6.8029e-01, + -2.7511e-01, 5.5501e-01, 4.4060e-02, 1.8087e-03, -3.0272e-01, + -5.5106e-01, 8.0156e-02, -5.0127e-01, -2.2272e-01, -5.4057e-01, + -3.7910e-01, 3.7393e-01, -4.5307e-02, -7.3265e-01, 4.7157e-01, + -2.0217e-01, -6.4716e-01, -8.6948e-02, 1.8414e-01, 8.2459e-02, + 3.1776e-01, 1.0655e+00, 6.4381e-01, 1.8725e-01, -1.5551e-01, + 7.7160e-01, 6.6357e-01, 6.1445e-03, 4.1845e-01, 7.9661e-01, + -3.5947e-02, 5.9436e-01, 5.0569e-01, 6.5157e-01, 2.5360e-01, + 1.1022e-01, -7.2232e-01, -8.4216e-01, -3.0733e-01, 4.9215e-01, + -2.9305e-01, 1.1464e-01, 5.4001e-01, -1.9519e-01, 5.3638e-01, + 5.7463e-01, -7.0124e-01, 2.2865e-01, -3.8127e-02, 8.8887e-01, + 9.2389e-01, 5.1717e-01, -2.9086e-01, -1.4492e-01, -7.3113e-01, + -3.8683e-01, 6.1832e-01, -2.4134e-01, 2.6464e-01, -4.8043e-01, + 2.3333e-01, 1.3102e-01, -1.4933e-01, -6.4925e-01, 3.4122e-01, + 1.7284e-01, -6.7279e-01, 3.6753e-01, 1.3642e-01, -4.6944e-01, + 7.0057e-01, -2.5841e-01, 7.7047e-02, 5.0625e-02, -8.2669e-01, + -3.6478e-01, 1.6450e-01, 4.8215e-01, -3.4735e-01, -1.2598e-01, + 2.5064e-01, -6.9497e-02, 4.7381e-02, -5.4013e-01, -5.1097e-03, + 7.9240e-01, 2.5241e-01, -5.7863e-01, 5.1426e-01, -6.1765e-01, + 2.3296e-01, 2.1380e-01, -4.3335e-01, -2.0675e-01, 7.5988e-01, + -4.6048e-01, -6.6959e-01, 3.0083e-01, 3.7472e-01, 3.8105e-01, + 8.4046e-01, -3.9265e-01, 1.3335e+00, 1.4567e-01, -1.3545e-01, + -3.1525e-01, -6.1112e-01, -8.4796e-02, 3.9994e-01, 3.8930e-01, + -1.1288e-02, 8.5296e-01, -3.9526e-01, 2.2442e-01, 6.9698e-01, + -1.0549e+00, 4.6266e-01, 2.7690e-01, 3.1111e-01, 6.6209e-02, + -4.6759e-02, 6.1619e-01, 5.8232e-01, -6.4537e-01, -8.5775e-01, + 7.0397e-01, -3.8256e-01, -5.9776e-01, 5.3431e-01, 3.2315e-01, + -1.5651e-01, 1.5397e-02, 2.3762e-01, -3.1457e-01, 2.9154e-01, + 4.2581e-01, 5.1001e-02, -5.5437e-01, 9.1472e-01, 2.8580e-01, + -6.3040e-01, -8.1291e-01, 9.6829e-02, 2.1046e-01, 3.6351e-01, + -2.6908e-01, 9.2500e-02, 3.6032e-01, 3.7904e-02, -6.3151e-01, + -7.9317e-01, 3.9727e-01, 4.2895e-01, -1.9378e-01, 7.6716e-01, + 2.8807e-01, -1.6993e-01, 4.8568e-01, -6.1613e-01, -4.5870e-02, + -2.6191e-01, 7.7553e-02, 4.0871e-01, -8.8964e-01, 4.6862e-01, + 1.5260e-02, 4.7435e-01, 8.4179e-01, -4.0157e-01, 3.9829e-01, + 1.6903e-01, 4.2167e-01, -8.4015e-01, -8.4332e-01, -2.0669e-01, + -3.7132e-01, -2.7185e-02, 2.0107e-01, -1.6155e-01, -7.5347e-01, + -2.9117e-01, 5.7739e-01, 3.6695e-01, 1.6001e-01, 2.1570e-01, + 3.7116e-03, 6.9096e-01, -3.0534e-01, -8.5314e-01, -3.9907e-01, + -9.2290e-02, -4.2464e-02, 4.0167e-01, 2.1478e-01, -6.6176e-02, + 3.3254e-02, -7.6433e-01, -5.3241e-01, 5.8260e-01, -7.2345e-01, + 4.0051e-01, -5.5805e-01, 1.6328e-01, 1.1494e-01, -1.8896e-02, + -9.4009e-02, 2.6599e-01, 6.5200e-01, -7.1813e-01, -2.9351e-01, + 1.4233e-01, -3.6490e-01, -5.3627e-01, 4.7552e-01, 7.6735e-02, + 6.7022e-02, 6.1545e-01, -5.3540e-01, -6.3876e-01, -2.4615e-01, + 6.7330e-02, -4.3329e-01, 1.5423e-01, -6.4616e-01, -1.5042e-01, + 5.2939e-01, 1.3776e-01, 3.9853e-01, 2.5346e-01, 2.3949e-01, + 8.2627e-02, 3.1305e-02, 4.0734e-01, 4.0606e-01, 8.5729e-01, + -7.4386e-01, -4.3430e-01, 3.6861e-01, -2.6908e-01, -1.0369e-01, + -1.0651e+00, -1.1467e+00, -5.9298e-01, 6.4622e-01, -4.5900e-01, + 5.4481e-01, -2.3575e-01, -4.3648e-01, -4.0216e-01, -1.2223e-01, + -2.9405e-01, -1.0319e+00, 6.1357e-01, 5.8217e-01, -3.6815e-02, + 4.7501e-01, -2.8313e-01, -5.7884e-01, 9.5559e-01, -7.9043e-01, + 4.8335e-01, -3.6762e-01, 2.8385e-02, -9.3544e-01, 9.2769e-03, + 6.9211e-01, 4.3277e-01, 3.1741e-01, 6.5197e-01, -1.4770e-01, + -9.3074e-02, -7.4372e-01, -4.4881e-01, -1.1087e-01, 5.2817e-01, + -1.2982e+00, -5.1468e-01, -3.3536e-01, -1.9444e-01, 6.8215e-01, + -4.7416e-01, -5.2786e-01, 7.7139e-01, -7.5394e-01, 6.6867e-01, + -2.6968e-01, -4.9102e-01, 5.0436e-01, -6.4053e-01, -2.4396e-01, + -1.5928e-01, 2.6028e-01, 6.8988e-01, 5.3118e-03, -6.4690e-01, + -4.4641e-01, -5.6631e-01, -5.8067e-01, -4.0297e-01, -1.0473e-01, + -5.4320e-01, -5.8003e-01, 8.1864e-01, 8.3972e-01, 9.5139e-02, + -1.8129e-01, -5.2752e-01, 9.9258e-01, 4.1471e-01, 1.3180e-02, + -4.7539e-01, -3.0162e-01, 6.0368e-01, -5.5971e-01, -6.4534e-01, + 3.9169e-01, 6.3918e-01, -4.1132e-01, -5.0957e-01, 5.2105e-01, + -2.7070e-01, 5.9905e-02, -3.3576e-01, -3.3296e-01, 7.0715e-01, + 3.0238e-01, -4.5997e-01, 4.5236e-01, 1.5374e-01, 1.1668e+00, + -1.6149e+00, -2.6051e-01, -5.3227e-01, 1.7578e-01, 7.4930e-01, + 2.8102e-01, -4.9195e-01, 6.3192e-01, -2.3230e-01, -2.0161e-01, + 3.4353e-01, 3.6410e-01, -2.6098e-01, -3.7803e-01, 5.0583e-01, + -4.5856e-01, -1.7220e-01, 2.1415e-01, 5.8864e-01, 9.6195e-01, + 2.0834e-01, -3.0874e-01, 9.0913e-01, 4.2598e-01, -4.4309e-01, + 4.9674e-01, 2.3522e-01, -3.7915e-01, -2.1614e+00, -3.6390e-01, + 6.6082e-02, -1.7560e-02, 6.3947e-01, 5.7122e-01, -6.5979e-01, + -1.2240e-01, -3.1060e-01, -7.9018e-01, 3.2094e-01, 4.1335e-01, + 9.0558e-02, -5.1849e-01, -1.8333e-01, -1.7102e-01, 5.3537e-02, + -6.2528e-01, 4.0292e-01, -6.8299e-01, 1.4752e-01, -5.9252e-01, + -3.2536e-01, -5.0408e-01, -8.1444e-02, -8.7752e-02, -6.6041e-01, + -1.5532e-01, 1.9076e-02, -4.8949e-01, -4.7710e-01, -8.8513e-01, + -5.6397e-01, 2.1987e-01, -7.2030e-01, 1.7313e-01, 1.3092e-01, + -8.1733e-02, 3.9655e-01, 4.4970e-01, 1.2227e-01, 4.0097e-01, + -1.8540e-01, -4.0063e-01, 4.4000e-01, -3.4232e-02, -3.9916e-01, + -3.5780e-01, 4.9083e-01, -2.7053e-01, 4.0731e-01, -4.0411e-01, + -2.9179e-01, -1.9077e-01, -1.7176e-01, 5.6506e-01, 3.3544e-01, + 2.3209e-01, -8.1954e-01, -4.9928e-01, -1.9707e-01, 7.4907e-01, + 4.8980e-01, 4.6658e-01, 1.5447e-01, -2.9315e-01, -5.4347e-01, + -6.2719e-01, 1.3141e-02, -2.7332e-01, -7.1763e-01, 1.0533e+00, + -1.3157e-02, 8.6267e-01, -3.9119e-01, 1.6885e-01, 1.1079e-01, + 9.4402e-01, 2.0349e-01, 1.2190e-01, -1.1176e-01, -6.5901e-01, + -6.6527e-01, 4.0093e-01, 6.4473e-01, -5.8975e-02, 6.1555e-02, + 2.6041e-01, -1.0250e+00, 5.6489e-01, 3.0620e-01, 6.1333e-01, + -9.1546e-01, 1.5750e-01, -3.4531e-01, 5.8772e-01, -2.7294e-01, + 2.8230e-01, 4.2517e-01, -5.1764e-01, 6.1962e-01, 7.8920e-01, + 3.2426e-01, 5.0412e-01, -7.7646e-01, -1.3567e-01, 1.7101e-01, + -9.9796e-01, -4.4537e-01, 4.8358e-02, 6.3490e-01, 2.3371e-01, + -1.4228e-01, 1.3333e-01, -3.6550e-01, -8.1516e-01, -2.0957e-01, + 5.5559e-02, -7.6858e-01, 4.7748e-02, 5.5862e-01, -8.6177e-01, + -1.6945e-01, 7.1205e-02, -4.7811e-01, 2.5253e-01, -2.6369e-01, + 1.6023e-01, 6.2191e-01, 7.3936e-02, -2.9017e-01, 4.3377e-01, + 1.2851e+00, 3.7442e-02, -5.9583e-01, -4.5210e-01, 1.0180e-01, + 5.0120e-01, -2.9147e-01, 8.8957e-01, 2.0878e-02, -4.1971e-01, + -1.8841e-01, -2.7482e-01, 4.3985e-02, 1.0758e+00, 7.5964e-01, + -3.3972e-01, -1.1785e-01, 1.0062e+00, 3.7155e-02, -2.1678e-01, + 2.4287e-01, 6.3074e-02, -5.6007e-01, 7.0995e-01, 3.6084e-01, + -1.6873e-02, -8.3919e-01, 1.6823e-01, -9.4747e-01, -4.8958e-01, + 7.8847e-01, 7.7202e-04, -9.2255e-01, 6.4493e-01, 1.8281e-01, + -3.6465e-01, 5.7656e-01, -4.6102e-01, 2.9057e-01, 4.1614e-01, + 3.3420e-01, 5.0496e-01, 3.3019e-01, -1.1436e-01, 1.2199e-01, + 2.8585e-01, -8.7563e-04, -9.0184e-01, 6.5108e-01, 7.4054e-01, + 4.4034e-01, 3.3995e-01, 5.0614e-01, -1.3607e-01, 3.8840e-01, + 8.6456e-03, 3.4877e-01, -1.8693e-01, -6.4272e-02, 3.5093e-01, + -1.7511e-01, -7.3258e-02, 2.0546e-01, 2.3129e-01, -3.5526e-01, + 8.1774e-01, -6.0656e-01, -7.6188e-01, -3.6338e-01, -8.9585e-01, + -2.6506e-01, -4.5514e-01, 5.5527e-02, -2.8256e-01, 7.8668e-01, + 2.8775e-01, -1.0851e-01, 6.9988e-01, -6.2157e-01, 7.6669e-01, + 3.1773e-01, -4.2009e-02, 3.2616e-01, -2.7670e-01, 3.8984e-01, + 1.2011e-01, 4.8168e-01, 8.8709e-01, -3.2855e-01, -2.7174e-01, + -7.4272e-01, 1.2719e-01, -6.1042e-01, 2.2660e-01, 4.4052e-02, + 4.4497e-01, 2.8106e-01, -5.1593e-03, 5.0610e-01, 7.0556e-01, + -4.2123e-01, 1.4735e-01, 5.1446e-01, 4.8529e-01, -5.4870e-02, + -7.3664e-01, 1.6677e+00, -8.3535e-01, -5.2973e-01, 3.0460e-01, + 7.2249e-01, 1.6966e-01, 8.9588e-02, 8.3096e-01, -8.6126e-01, + -7.6821e-02, -5.3029e-02, -8.9052e-01, -3.4367e-01, -7.1569e-01, + 5.8929e-01, -8.2918e-01, -2.3534e-02, -2.0483e-01, 6.2491e-01, + 5.2459e-01, 8.5316e-02, 2.7787e-01, -3.9188e-01, 7.1636e-01, + 7.4751e-01, -9.2674e-01, -1.6038e-01, -6.0258e-01, 1.1622e-01, + 6.2238e-01, -2.3765e-01, 1.0691e-01, 4.0306e-01, 1.1263e-01, + -3.5009e-01, -4.1460e-01, 9.3237e-01, -5.4956e-01, -1.1002e+00, + -2.6917e-02, -1.0258e+00, -7.7761e-01, -7.1489e-01, -3.9857e-01, + -4.6436e-02, 2.4821e-01, -3.0298e-01, 6.9399e-01, -3.3227e-01, + -3.8206e-01, -2.6360e-01, 4.3574e-01, 7.5279e-01, 7.5876e-01, + 2.8074e-01, 1.1018e-01, -1.9109e-01, -6.2550e-02, -5.6703e-01, + -6.2459e-01, 5.1496e-01, 1.8184e-01, -5.6312e-01, 9.6952e-02, + 1.0453e-01, -4.1765e-01, 4.6603e-01, 3.6378e-01, 9.6122e-01, + 3.8609e-01, -3.5637e-01, -8.0935e-01, 6.6562e-01, -2.8625e-01, + 3.6776e-01, -1.1129e-01, -5.7451e-01, 1.0716e+00, 6.7912e-01, + -6.6297e-01, -6.0802e-01, -2.3813e-01, 5.4780e-01, 4.8164e-01, + 9.7501e-02, 2.7520e-01, 1.0619e+00], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[-0.0090, -0.0285, -0.0117, ..., -0.0334, -0.0124, -0.0016], + [ 0.0073, -0.0109, -0.0155, ..., -0.0185, 0.0384, -0.0127], + [ 0.0053, 0.0201, 0.0105, ..., -0.0385, -0.0188, -0.0073], + ..., + [-0.0101, 0.0020, -0.0220, ..., 0.0291, -0.0050, 0.0291], + [ 0.0129, 0.0045, 0.0251, ..., 0.0085, 0.0212, -0.0309], + [-0.0195, -0.0013, 0.0205, ..., 0.0047, -0.0370, 0.0062]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.2406, 0.2976, 0.3762, ..., -0.0151, 0.0414, 0.0243], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0192, 0.0069, -0.0140, ..., 0.0031, -0.0047, -0.0004], + [-0.0301, -0.0264, -0.0079, ..., -0.0047, -0.0026, 0.0025], + [ 0.0142, -0.0199, 0.0016, ..., -0.0152, 0.0006, 0.0133], + ..., + [-0.0022, 0.0139, 0.0077, ..., 0.0194, 0.0097, -0.0001], + [ 0.0107, -0.0085, 0.0090, ..., -0.0009, 0.0038, -0.0255], + [ 0.0045, 0.0113, 0.0006, ..., -0.0065, -0.0201, -0.0050]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-1.6556e-03, -6.8176e-02, -6.2675e-03, -1.1215e-03, 1.7944e-02, + -2.9358e-02, 2.5925e-02, -1.5930e-02, 1.7960e-02, -1.9501e-02, + 2.4078e-02, -2.7313e-02, -3.3905e-02, 4.1962e-02, 8.5678e-03, + 4.2816e-02, 2.2644e-02, -1.5625e-02, 1.7345e-04, -7.3364e-02, + 1.1093e-02, 1.1620e-02, 4.7363e-02, -3.3627e-03, -5.6671e-02, + -4.5715e-02, 1.2283e-02, 3.6072e-02, 8.4000e-03, 2.0309e-02, + 4.6600e-02, 3.5278e-02, 2.3300e-02, 1.3256e-04, -4.8767e-02, + -7.8369e-02, 2.0660e-02, 2.9999e-02, 7.9895e-02, -5.1208e-02, + 7.0000e-03, -3.2272e-03, 1.7883e-02, -4.1351e-03, -6.4583e-03, + -8.0032e-03, -2.5772e-02, 9.2087e-03, -1.5091e-02, 3.6120e-05, + -2.0645e-02, -2.1378e-02, -7.7393e-02, -4.5502e-02, -3.1891e-02, + 8.6548e-02, 7.3120e-02, -1.1492e-03, 2.1469e-02, 4.8184e-04, + 2.2842e-02, 2.4939e-04, 5.0659e-02, -4.2963e-04, 1.2802e-02, + -6.9946e-02, -2.3636e-02, -4.4250e-02, -5.2551e-02, -1.1066e-01, + 2.3804e-02, 2.5330e-02, -1.2524e-01, -1.9562e-02, 2.8503e-02, + 4.5959e-02, 4.2358e-02, 1.2421e-02, 1.7349e-02, -4.3335e-03, + -2.4200e-02, 5.4535e-02, -3.4332e-02, -1.5556e-02, -4.6906e-02, + -1.1436e-02, 1.5945e-02, -3.3569e-02, 4.2236e-02, 1.0567e-02, + -6.9504e-03, -1.8845e-02, -1.8234e-02, 2.3987e-02, 3.6469e-03, + 1.5884e-02, 3.0350e-02, -4.3396e-02, 1.4664e-02, 7.9346e-02, + -4.3243e-02, 4.8409e-03, -6.9763e-02, 2.6474e-02, 2.1866e-02, + 6.6467e-02, 4.3945e-02, 1.2970e-02, 3.5187e-02, 4.6661e-02, + 3.4363e-02, -2.2659e-03, -2.5253e-02, -6.2256e-02, 2.8854e-02, + 1.0483e-02, 7.2449e-02, 4.8584e-02, 3.0075e-02, 8.1299e-02, + -3.3020e-02, 2.3636e-02, 2.8095e-03, 2.8671e-02, 3.2593e-02, + -2.7802e-02, -3.8624e-04, -2.8244e-02, -1.2238e-02, 3.8483e-02, + -5.3711e-02, -1.7715e-02, 1.2756e-02, -1.3763e-02, 1.7595e-03, + -3.2227e-02, -6.0699e-02, 2.7863e-02, 3.0975e-02, -7.9956e-02, + 5.5878e-02, -4.8859e-02, -4.9469e-02, -2.5482e-02, -3.4454e-02, + -4.3182e-03, 4.5074e-02, -4.0436e-02, 7.0229e-03, -1.6357e-02, + 4.2755e-02, 2.7176e-02, 5.8594e-02, -1.7578e-02, -2.9541e-02, + 6.7810e-02, 2.7206e-02, -5.5695e-02, -4.2114e-02, 1.8646e-02, + 1.0376e-02, -4.6539e-02, 4.4617e-02, -5.7755e-03, 3.4576e-02, + -1.6525e-02, 3.2928e-02, 9.7275e-03, 5.2277e-02, 4.5891e-03, + -8.9493e-03, -5.1918e-03, 7.8087e-03, -1.7212e-02, -6.0349e-03, + -5.6061e-02, -1.0315e-02, 6.0883e-03, -4.5776e-02, -1.1185e-02, + -3.9520e-02, -6.0387e-03, 1.7715e-02, 6.2286e-02, 5.4077e-02, + 1.3489e-02, 1.3847e-02, -9.6985e-02, -3.0193e-03, -2.6337e-02, + -2.2629e-02, -8.5144e-02, -2.4094e-02, 3.0994e-03, 6.4026e-02, + 2.5665e-02, 1.0254e-02, -4.0253e-02, -6.0333e-02, -1.5167e-02, + -3.4576e-02, -1.1131e-02, 6.5979e-02, -8.1482e-02, 1.5497e-03, + -6.1531e-03, 2.6688e-02, -8.1396e-04, 2.8091e-02, 6.5308e-03, + 3.5858e-03, -1.2375e-02, -2.4185e-02, -1.8509e-02, -4.0924e-02, + 4.5197e-02, -1.4656e-02, -7.1350e-02, 8.2636e-04, -3.3325e-02, + -1.4343e-02, 3.5156e-02, -9.1410e-04, 9.0027e-02, -3.2227e-02, + -4.7272e-02, -9.8267e-03, 1.2329e-01, -2.0981e-02, 5.8327e-03, + -3.9886e-02, -2.2110e-02, 3.6835e-02, 9.0088e-02, -1.4488e-02, + 1.0071e-02, 6.1005e-02, 1.7975e-02, 1.9882e-02, 1.5930e-02, + -2.3010e-02, -4.4861e-02, 4.7241e-02, 1.0857e-02, 1.9333e-02, + 7.7362e-03, -1.8280e-02, 8.5510e-02, 1.7929e-02, -9.3365e-04, + -5.5733e-03, 6.0669e-02, 3.0823e-02, -1.4061e-02, 2.3254e-02, + -2.0508e-02, -1.8112e-02, -3.5828e-02, 2.7618e-02, 2.1103e-02, + 2.0264e-02, -2.4841e-02, 6.3721e-02, 2.3941e-02, 3.2005e-03, + -2.1988e-02, -1.0862e-03, -9.0942e-03, -1.1559e-02, -7.1289e-02, + -1.8280e-02, -1.5823e-02, -6.9962e-03, -6.0730e-02, 8.1329e-03, + -1.0065e-01, 4.7058e-02, -5.8861e-03, 4.7779e-04, 1.2875e-03, + -5.7068e-02, -8.9417e-02, 8.3557e-02, -5.9692e-02, -1.7776e-03, + -4.6021e-02, -2.8992e-02, -8.6136e-03, 3.6713e-02, -1.2138e-02, + -3.9032e-02, 4.3060e-02, -6.3049e-02, 2.2352e-04, -1.3138e-02, + -5.7800e-02, -3.6804e-02, -6.9275e-02, -6.9458e-02, -3.7781e-02, + -3.5572e-03, 7.4585e-02, 8.3389e-03, -3.4103e-03, 2.9724e-02, + 3.6438e-02, -2.7328e-02, 5.1300e-02, 5.1300e-02, 8.9493e-03, + 2.2980e-02, 2.5375e-02, -1.6678e-02, -1.1574e-02, -2.7115e-02, + 7.7515e-03, 1.0384e-02, 1.0429e-02, 1.9226e-02, 5.3162e-02, + 5.0926e-03, -1.4091e-02, -4.2510e-04, 5.4962e-02, 2.0605e-01, + -1.3680e-02, 2.9583e-03, 9.6863e-02, 1.4257e-03, 3.6255e-02, + 1.4374e-02, 2.6749e-02, -2.4536e-02, -5.9090e-03, 7.2693e-02, + -2.5467e-02, -2.6337e-02, 1.6785e-02, 1.4023e-02, 2.7313e-03, + 6.2805e-02, 4.9408e-02, -1.0826e-02, 2.3880e-02, 2.1454e-02, + 8.5678e-03, -9.3918e-03, 2.5223e-02, -3.3844e-02, -7.6942e-03, + 1.2955e-02, 5.3955e-02, 9.7656e-03, -2.8976e-02, 3.9337e-02, + 6.9519e-02, 1.8631e-02, -2.3178e-02, -1.1665e-02, 1.8417e-02, + 2.6047e-02, -1.0033e-02, 3.3905e-02, -6.0486e-02, 1.0925e-02, + 3.0426e-02, 1.6312e-02, -4.5685e-02, 3.7689e-02, -3.3905e-02, + -3.9948e-02, -1.4366e-02, 1.8112e-02, 4.4403e-03, 1.6432e-03, + -1.8448e-02, 2.8671e-02, 2.3079e-04, -3.2379e-02, -2.0645e-02, + -3.8849e-02, 1.2970e-02, -3.1342e-02, 2.6199e-02, 8.7619e-06, + -2.7485e-03, 1.5802e-03, -1.9424e-02, -3.8269e-02, -3.0334e-02, + 1.6403e-02, 1.6403e-02, 1.6479e-02, -2.4185e-03, -3.4119e-02, + -1.0376e-02, -1.4740e-02, 5.7316e-04, -1.9852e-02, -5.7800e-02, + -2.1133e-02, 1.9211e-02, -9.1934e-03, -7.0000e-03, -1.4076e-02, + 2.6360e-03, 2.1774e-02, -9.6054e-03, -1.4496e-02, 5.0783e-04, + 3.3691e-02, 1.2993e-02, -1.5060e-02, 4.6692e-02, 1.4978e-01, + -1.4893e-01, 2.9205e-02, 3.1586e-02, -5.9326e-02, -6.9008e-03, + 3.1891e-02, -4.6875e-02, -3.3035e-03, 3.8910e-02, -2.3666e-02, + 2.3804e-02, -3.9551e-02, 3.0243e-02, 2.4567e-02, -4.4006e-02, + -3.9978e-02, -1.7563e-02, -5.1392e-02, -5.8212e-03, 6.6650e-02, + 2.7866e-03, 5.7411e-04, -1.2375e-02, 4.7333e-02, 2.3727e-02, + 3.4821e-02, 3.6346e-02, 2.7756e-02, -4.2023e-02, 1.8524e-02, + 9.8038e-03, -3.6011e-02, 3.4698e-02, -8.6182e-02, -5.0507e-02, + -2.7039e-02, 8.9951e-03, 4.0314e-02, 1.9272e-02, -3.9825e-03, + -2.0157e-02, -1.1072e-01, -1.3294e-03, -4.5738e-03, 6.2275e-04, + 9.2010e-03, 1.7532e-02, 3.4454e-02, -1.1017e-02, 5.4398e-03, + 1.2947e-02, 1.6876e-02, 1.9363e-02, 1.1658e-02, -1.9501e-02, + 2.9099e-02, -2.0340e-02, 4.2084e-02, 2.5925e-02, 4.1771e-03, + 2.6443e-02, -1.2688e-02, -5.1453e-02, 2.4319e-03, -3.8330e-02, + 1.7654e-02, 2.6962e-02, 3.5095e-02, -5.8441e-02, -3.0014e-02, + 1.3481e-02, 5.5420e-02, 7.1869e-03, -5.1392e-02, 7.4097e-02, + 1.9180e-02, 2.3911e-02, 9.3307e-03, 3.8300e-02, -7.8278e-03, + -5.0171e-02, -8.6670e-02, -6.3416e-02, -2.5757e-02, 7.9834e-02, + 1.3481e-02, -2.6108e-02, -2.6688e-02, 5.6213e-02, 1.0022e-01, + 3.5736e-02, -2.7740e-02, -1.4015e-02, -3.4302e-02, -5.2002e-02, + 2.1912e-02, -6.8909e-02, -1.0216e-02, 4.4495e-02, -2.4780e-02, + -6.0005e-03, -3.8483e-02, -8.3466e-03, 1.4488e-02, 1.9379e-02, + 4.7546e-02, -1.1909e-02, 9.3651e-04, 6.5002e-02, 1.9333e-02, + -1.5175e-02, -3.1464e-02, 3.3875e-02, 5.5008e-03, -2.5116e-02, + -2.5742e-02, 2.8503e-02, 1.1359e-01, -1.2955e-02, -3.0411e-02, + 4.2992e-03, 8.7585e-02, 1.1452e-02, 7.5378e-02, 5.1270e-02, + 1.0872e-02, -4.9469e-02, -5.6030e-02, 6.0120e-02, 1.4200e-03, + 1.4328e-02, -3.7170e-02, -2.3087e-02, 2.5162e-02, 3.9185e-02, + -1.9882e-02, 4.4952e-02, 9.4299e-02, 1.1896e-01, -3.1372e-02, + 5.1422e-03, 3.1372e-02, 4.0131e-02, -5.7251e-02, 1.3519e-02, + -3.1174e-02, -9.5215e-02, -2.9583e-03, 7.3719e-04, -2.1072e-02, + 5.2032e-03, 2.8839e-02, -2.0477e-02, -3.3600e-02, 6.6490e-03, + -9.7717e-02, 3.9032e-02, 2.8610e-02, -4.1885e-03, 9.6207e-03, + 3.7018e-02, 3.2597e-03, -5.5206e-02, -4.7363e-02, -9.2545e-03, + -2.3823e-03, 3.1616e-02, -2.2171e-02, 2.9877e-02, 4.8828e-02, + -3.5828e-02, -1.1780e-02, -1.1528e-02, -2.6207e-03, -4.1275e-03, + -1.0635e-02, 5.6244e-02, -7.1594e-02, 1.2772e-02, 3.2745e-02, + 4.6082e-02, -1.6464e-02, 9.0149e-02, -2.8244e-02, 6.3110e-02, + -4.0924e-02, -1.2466e-02, -2.4368e-02, 7.9966e-04, -5.1147e-02, + 3.7811e-02, 4.3030e-02, -1.3098e-01, 4.0771e-02, 2.1606e-02, + -1.2375e-02, -4.8187e-02, -3.0472e-02, -4.2114e-02, -2.7298e-02, + 2.9221e-02, -6.0883e-02, -2.0752e-02, 2.4857e-02, 1.0895e-02, + 6.7139e-03, -5.0323e-02, 8.5144e-03, -1.3382e-02, 4.3701e-02, + 2.2934e-02, 4.0710e-02, 7.7248e-04, -2.1942e-02, -1.2863e-02, + -2.3155e-03, -3.0960e-02, -3.7933e-02, -2.0752e-02, 2.0599e-03, + -8.8013e-02, 4.9408e-02, -1.8021e-02, -3.1548e-03, -6.4697e-02, + -1.3191e-02, -3.3752e-02, -5.8517e-03, -3.0594e-02, -1.9196e-02, + 4.2297e-02, 1.8555e-02, -5.2460e-02, 8.3084e-03, -9.3307e-03, + -7.0992e-03, -2.3682e-02, -6.5918e-02, -9.3689e-03, -6.0425e-03, + -1.8585e-02, 7.0679e-02, -2.8896e-03, 2.9907e-02, -1.0857e-02, + 2.8259e-02, -6.4453e-02, 2.2583e-02, -3.0273e-02, 2.6321e-02, + -4.2145e-02, -3.1185e-03, 4.9744e-02, -8.8745e-02, -1.4400e-03, + -1.2947e-02, 2.0996e-02, 1.1005e-01, 1.5099e-02, 5.7404e-02, + 5.1880e-02, 1.2077e-02, -6.0394e-02, 3.3661e-02, -2.8549e-02, + 3.1342e-02, 1.8457e-01, 5.0163e-03, 1.2878e-02, -2.2827e-02, + -1.3092e-02, 1.5366e-02, -4.1565e-02, 3.6377e-02, 2.7023e-02, + 2.3239e-02, 1.0078e-02, 3.0319e-02, -4.4830e-02, 2.3102e-02, + -4.9477e-03, 1.5823e-02, 1.8906e-02, -3.5553e-02, 1.5701e-02, + -4.0039e-02, -6.6071e-03, -4.1840e-02, -2.6443e-02, 1.2383e-02, + 2.7771e-02, -8.8577e-03, 1.2444e-02, 5.2032e-02, 2.4353e-02, + 1.8219e-02, 2.9221e-02, -4.1229e-02, -7.5951e-03, 4.0527e-02, + -1.6968e-02, 6.6833e-03, -9.1248e-03, -5.4688e-02, 8.1177e-03, + -2.5269e-02, 3.0014e-02, -2.7328e-02, 3.2253e-03, 3.5431e-02, + -2.1805e-02, 4.3976e-02, 1.8341e-02, 2.3636e-02, 2.2827e-02, + -2.0340e-02, -1.1665e-02, 3.3875e-02, -3.8071e-03, 4.5227e-02, + 6.2790e-03, -6.6895e-02, 4.2877e-02, -1.3599e-01, -5.0583e-03, + 1.9409e-02, -2.7351e-03, 2.2755e-03, -6.1035e-02, 5.2429e-02, + 2.7359e-02, 4.9805e-02, 3.2471e-02, -4.7989e-03, 2.2873e-02, + -1.2941e-03, -9.3811e-02, 2.6810e-02, -1.3443e-02, -1.4938e-02, + 2.0325e-02, -2.8629e-03, -3.7903e-02, 4.5654e-02, -5.9814e-02, + -2.3155e-03, -3.5767e-02, -8.6823e-03, 4.5662e-03, 5.3772e-02, + 6.0669e-02, 5.5504e-03, -4.4556e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.5493, 1.6399, 1.5999, 1.5885, 1.6329, 1.6397, 1.5427, 1.5989, 1.5938, + 1.6237, 1.6590, 1.6457, 1.6181, 1.6205, 1.4713, 1.5963, 1.6449, 1.7084, + 1.6214, 0.4006, 1.6030, 1.6363, 1.5759, 1.5834, 1.6650, 1.5835, 1.6580, + 1.5898, 1.5943, 1.5827, 1.5928, 1.5276, 1.6157, 1.5918, 1.6369, 1.5499, + 1.6460, 1.5885, 1.5715, 1.5816, 1.5972, 1.5878, 1.5510, 1.6371, 1.6093, + 1.6871, 1.5316, 1.6196, 1.6218, 1.5502, 1.6218, 1.6361, 1.5700, 1.5400, + 1.6542, 1.6868, 1.5516, 1.5528, 1.6228, 1.6274, 1.5977, 1.6739, 1.5992, + 1.5451, 1.6178, 1.7181, 1.5690, 1.4695, 1.6697, 1.5485, 1.5806, 1.6739, + 1.8901, 1.6811, 1.6642, 1.6300, 1.5537, 1.5634, 1.6123, 1.6138, 1.6103, + 1.5792, 1.5907, 1.6066, 1.5462, 1.5001, 1.6765, 1.5610, 1.6225, 1.5802, + 1.6846, 1.5668, 1.6137, 1.6866, 1.6791, 1.4948, 1.6263, 1.6051, 1.6387, + 1.5753, 1.5968, 1.5417, 1.5984, 1.5651, 1.5851, 1.6353, 1.5291, 1.5567, + 1.5705, 1.6465, 1.5494, 1.5523, 1.6082, 1.6735, 1.6387, 1.5507, 1.5189, + 1.5639, 1.6035, 1.5690, 1.5816, 1.5912, 1.5696, 1.5963, 1.6102, 1.5688, + 1.6092, 1.6175, 1.6432, 1.6284, 1.5562, 1.5349, 1.5583, 1.6106, 1.5579, + 1.6081, 1.6015, 1.6442, 1.6334, 1.6291, 1.6137, 1.5456, 1.7259, 1.5900, + 1.5554, 1.5780, 1.6134, 1.6272, 1.5741, 1.6224, 1.5436, 1.4855, 1.5818, + 1.6631, 1.5472, 1.2020, 1.5398, 1.6032, 1.5883, 1.6821, 1.5366, 1.5118, + 1.5927, 1.6376, 1.6173, 1.6728, 1.5829, 1.6114, 1.5536, 1.6710, 1.5637, + 1.5744, 1.4803, 1.5800, 1.6404, 1.6355, 1.6304, 1.6685, 1.5647, 1.5934, + 1.5731, 1.6635, 1.5844, 1.5600, 1.6321, 1.5281, 1.5389, 2.1468, 1.6702, + 1.6049, 1.5883, 1.5372, 1.6238, 1.6070, 1.5457, 1.5475, 1.6049, 1.5958, + 3.0083, 1.5153, 1.6055, 1.6462, 1.5855, 1.6214, 1.6535, 1.5140, 1.5091, + 1.5449, 1.5918, 1.6292, 1.5536, 1.4959, 1.6112, 1.5364, 1.4727, 1.6162, + 1.5672, 1.5872, 1.5662, 1.7077, 1.6566, 1.6526, 1.4817, 1.6145, 1.6988, + 1.6212, 1.6270, 1.6128, 1.6391, 1.7390, 1.6581, 1.6760, 1.7409, 1.6474, + 1.6504, 1.5595, 1.6109, 1.6272, 1.4755, 1.6741, 1.5887, 1.5637, 1.6667, + 1.6239, 1.6049, 1.6180, 1.5848, 1.4813, 1.5367, 1.7013, 1.5623, 1.6481, + 1.5652, 1.6429, 1.6078, 1.6493, 1.5002, 1.6345, 1.5190, 1.6447, 1.5506, + 1.5112, 1.5698, 1.5044, 1.5513, 1.5468, 1.5381, 1.5950, 1.6118, 1.5543, + 1.6706, 1.6023, 1.5748, 1.6172, 1.5726, 1.5440, 1.4987, 1.5566, 1.6374, + 1.5455, 1.5638, 1.6089, 1.6195, 1.6111, 1.6488, 1.6001, 1.4797, 1.6119, + 1.6401, 1.5164, 1.5940, 1.6737, 1.6311, 1.5728, 1.5773, 1.6245, 1.4770, + 1.6347, 1.6324, 1.7058, 1.5734, 1.5590, 1.6184, 1.7056, 1.6453, 1.4783, + 1.5747, 1.6226, 1.5360, 1.6008, 1.6301, 1.5816, 1.5759, 1.5865, 1.5823, + 1.5757, 1.5304, 1.5889, 1.7170, 1.5545, 1.5908, 1.4911, 1.5523, 1.6808, + 1.0102, 1.4416, 1.6402, 1.6516, 1.5736, 1.5707, 1.5194, 1.5789, 1.5578, + 1.5791, 1.5766, 1.6633, 1.5945, 1.5471, 1.6900, 1.5929, 1.5097, 1.5815, + 1.6156, 1.6017, 1.5836, 1.5091, 1.5963, 1.6344, 1.5837, 1.6584, 1.5643, + 1.6530, 1.6016, 1.7498, 1.5965, 1.6086, 1.5473, 1.6562, 1.6158, 1.5985, + 1.5164, 1.5495, 1.6260, 1.6174, 1.5751, 1.5895, 1.5756, 1.5310, 1.5784, + 1.5734, 1.5745, 1.6339, 1.6713, 1.6830, 1.6132, 1.4848, 1.6080, 1.5582, + 1.5989, 1.5899, 1.6601, 1.5907, 1.6642, 1.6658, 1.5568, 1.5169, 1.5837, + 1.7022, 1.5386, 1.5777, 1.6683, 1.5221, 1.4951, 1.5858, 1.5786, 1.6066, + 1.5431, 1.6643, 1.6195, 1.5992, 1.5671, 1.6200, 1.5648, 1.5666, 1.6210, + 1.5249, 1.6037, 1.6288, 1.5963, 1.5306, 1.6307, 1.5835, 1.4687, 1.5704, + 1.3834, 2.4891, 1.6304, 1.6105, 1.5524, 1.6043, 1.5713, 1.5287, 1.6168, + 1.5186, 1.5306, 1.5284, 1.5702, 1.5954, 1.6189, 1.5576, 1.6143, 1.5329, + 1.6438, 1.5675, 1.5940, 1.5842, 1.5996, 1.5994, 1.5268, 1.5964, 1.5654, + 1.6301, 1.5943, 1.2062, 1.6348, 1.5624, 1.6046, 1.6042, 1.6371, 1.6150, + 1.6369, 1.6009, 1.7376, 1.6231, 1.5693, 1.6378, 1.5659, 1.5503, 1.6336, + 1.5653, 1.6103, 1.6127, 1.6306, 1.5466, 1.5629, 1.6925, 1.5879, 1.5860, + 1.5904, 1.5143, 1.6487, 1.5793, 1.6201, 1.5986, 1.5847, 1.6243, 1.5638, + 1.5498, 1.6304, 1.5939, 1.5593, 1.5534, 1.6571, 1.3636, 1.5721, 1.5782, + 1.5547, 1.5585, 1.6214, 1.6656, 1.5667, 1.6133, 1.5377, 1.6059, 1.5794, + 1.6044, 1.6507, 1.5776, 1.5033, 1.5131, 1.5670, 1.5717, 1.6147, 1.6226, + 1.6446, 1.5396, 1.5357, 1.6272, 1.5983, 1.5448, 1.5605, 1.7070, 1.6120, + 1.6054, 1.6446, 1.6035, 1.6333, 1.5536, 1.5951, 1.5870, 1.6011, 1.5795, + 1.5923, 1.5524, 1.5733, 1.5762, 1.5603, 1.5949, 1.5760, 1.6026, 1.6437, + 1.5465, 1.7006, 1.6365, 1.5208, 1.6201, 1.6017, 1.5543, 1.6804, 1.7420, + 1.6809, 1.5633, 1.6597, 1.6247, 1.6704, 1.6162, 1.6767, 1.5603, 1.5749, + 1.6013, 1.6128, 1.6164, 1.6349, 1.6668, 1.5795, 1.5639, 1.5701, 1.6196, + 1.5638, 1.5866, 1.5865, 1.4001, 1.5744, 1.6010, 1.6010, 1.6537, 1.6356, + 1.6039, 1.6189, 1.5781, 1.5685, 1.5841, 1.5766, 1.5213, 1.6012, 1.5502, + 1.5972, 1.6567, 1.5655, 1.5510, 1.5973, 1.5988, 1.6302, 1.6506, 1.5686, + 1.6109, 1.6744, 1.5829, 1.6352, 1.6946, 1.6377, 1.7059, 1.5471, 1.5368, + 1.5926, 1.5611, 1.6253, 1.5294, 1.4044, 1.6179, 1.5868, 1.6041, 1.6719, + 1.5846, 1.5398, 1.6249, 1.7150, 1.7153, 1.6471, 1.6346, 1.5701, 1.6264, + 1.6090, 1.6172, 1.6575, 1.5996, 1.6354, 1.5527, 1.7479, 1.5759, 1.6304, + 1.6639, 1.5755, 1.6105, 1.6639, 1.6032, 1.5069, 1.6164, 1.6587, 1.5650, + 1.6497, 1.6342, 1.5691, 1.6271, 1.6007, 1.5943, 1.5942, 1.5980, 1.5675, + 1.5087, 1.5839, 1.6046, 1.6019, 1.6140, 1.3802, 1.6647, 1.6404, 1.5502, + 1.6699, 1.6187, 1.6753, 1.5719, 1.6739, 1.5828, 1.6139, 1.6800, 1.5641, + 1.5271, 1.5764, 1.6025, 1.5944, 1.5819, 1.5731, 1.5817, 1.5822, 1.6158, + 1.6417, 1.6392, 1.6379, 1.6283, 1.5247, 1.5779, 1.6359, 1.4932, 1.5547, + 1.5839, 1.6003, 1.6529, 1.5339, 1.6317, 1.6241, 0.3793, 1.5644, 1.5394, + 1.6564, 1.5079, 1.5475, 1.6538, 1.5442, 1.5427, 1.5980, 1.5646, 1.5751, + 1.6030, 1.5852, 1.5536, 1.5873, 1.5875, 1.5647, 1.6036, 1.5827, 1.5896, + 1.6706, 1.6393, 1.5696, 1.5956, 1.5279, 1.6411, 1.4874, 1.5671, 1.5945, + 1.6258, 1.6366, 1.5930, 1.5429, 1.6729, 1.5758, 1.6391, 1.5821, 1.5636, + 1.6237, 1.6044, 1.6874, 1.5696, 1.6090, 1.5168, 1.2428, 1.6478, 1.6367, + 1.6609, 1.5528, 1.6392, 1.6062, 1.5966, 1.5086, 1.6277, 1.5310, 1.6601, + 1.5359, 1.5844, 1.6001, 1.5696, 1.6074, 1.6169, 1.6799, 1.5825, 1.6523, + 1.6416, 1.6637, 1.6573, 1.4960, 1.6380, 1.5620, 1.6887, 1.6310, 1.6395, + 1.5979, 1.5711, 1.5458, 1.6128, 1.6570, 1.5286, 1.5453, 1.5261, 1.6336, + 1.5759, 1.5715, 1.6449], device='cuda:1', requires_grad=True)Parameter containing: +tensor([ 9.9168e-02, 4.3849e-02, -1.7755e-01, 3.2002e-02, 7.9832e-02, + 2.2029e-01, 9.3474e-02, 3.6958e-02, -1.1444e-01, -1.2357e-02, + 4.9962e-02, -3.6297e-02, -1.3317e-01, -2.1069e-02, -1.0363e-01, + -7.6869e-02, -1.0596e-02, -1.9904e-02, 1.5362e-02, 4.3388e+00, + -5.8986e-02, 5.7045e-02, -2.8652e-02, 3.9917e-02, -3.0142e-02, + 1.6968e-02, 2.2256e-02, 4.2894e-02, 4.1783e-02, -5.1836e-02, + 2.7941e-02, 3.3575e-02, 1.9892e-02, 1.2753e-01, 1.1412e-01, + -3.5983e-02, 4.1505e-02, -3.6498e-02, -1.4680e-01, 7.4925e-02, + 5.5049e-02, 4.8291e-02, -2.4369e-03, 5.2145e-02, -9.3334e-03, + 3.9382e-02, 6.6774e-02, -1.6817e-02, 2.5555e-02, 1.1247e-01, + 5.4547e-02, 4.5930e-02, 2.4378e-02, 6.2863e-02, 6.7775e-02, + 8.7540e-02, -1.0405e-01, -3.7358e-02, 5.7717e-02, 7.5026e-03, + 8.6343e-02, -1.0512e-01, 6.9189e-02, 3.2092e-02, 1.0216e-01, + -2.0255e-02, 1.0212e-01, 1.0654e-01, 3.5857e-02, 2.3916e-01, + 5.0539e-03, 6.2813e-02, 6.3034e-01, 7.7081e-02, -1.2523e-01, + 1.1814e-01, 3.1212e-02, -1.2816e-01, -1.3047e-02, -6.0614e-02, + -1.9448e-02, -5.9216e-02, 5.8349e-02, 1.3912e-01, 5.4253e-02, + 3.7561e-02, 1.0247e-01, 1.0217e-01, -5.0383e-04, -1.2770e-02, + 6.9393e-05, 4.3805e-02, 7.1861e-02, -9.6834e-02, -7.5698e-02, + 2.8570e-02, -2.6059e-02, -1.6039e-02, -6.1035e-02, -3.4551e-02, + 4.6976e-02, -3.7879e-02, 5.5665e-02, -1.1818e-01, 2.9161e-02, + 5.0025e-02, -3.7138e-03, -6.8892e-02, 3.7005e-03, -1.0422e-02, + -3.8308e-02, -2.7117e-02, 7.8872e-02, 7.9073e-02, 7.7121e-02, + -6.7232e-02, -2.7161e-02, -8.8668e-02, -1.8786e-02, 1.0479e-01, + -2.0932e-02, -2.5799e-01, -6.9773e-02, -4.2048e-02, -1.8170e-01, + -9.6430e-03, -1.7160e-01, 5.4934e-03, 4.0641e-02, -5.2708e-02, + -1.1047e-01, 8.8251e-02, -7.7579e-02, -2.2611e-02, -7.4420e-02, + 7.1777e-02, -4.9953e-02, -7.0597e-04, -3.5563e-02, -4.9313e-03, + 2.4732e-02, -9.2466e-03, 3.3888e-02, 6.1213e-02, 6.7017e-02, + -1.1905e-02, -1.3046e-02, 8.6177e-04, 1.0020e-02, 1.4302e-01, + 2.1901e-01, -5.3628e-02, -1.2036e-01, 4.3155e-02, -4.0979e-02, + -4.2002e-01, -7.8410e-03, -1.6884e-02, 1.4052e-02, 6.5780e-02, + 1.6192e-01, -1.3758e-01, -5.3189e-02, 4.2110e-02, -5.1313e-02, + -2.9008e-02, -6.6411e-02, 6.2063e-02, -5.4825e-02, 7.1862e-02, + -8.1163e-02, 1.0925e-01, 3.2302e-02, 3.9469e-02, 5.8172e-02, + -1.2644e-01, 4.1148e-03, 2.5105e-02, -4.9433e-02, 4.0924e-02, + -2.9660e-03, -4.3415e-02, 3.4479e-02, -5.4720e-03, 1.6097e-02, + -1.7373e-01, 6.0208e-02, 2.4375e-01, 2.8294e-03, -9.3666e-02, + 2.8309e-02, -4.6205e-02, 1.0189e-01, 3.5309e-02, -8.6500e-02, + 2.4689e-02, -7.4888e-02, -1.3506e-01, 1.5368e-01, 2.4555e-03, + -1.5823e-01, 5.7932e-03, -5.2773e-02, 4.4575e-02, 8.0074e-02, + 1.6230e-01, -8.7500e-02, 6.6434e-02, 1.2840e-01, 3.9700e-02, + 3.3606e-02, 2.0853e-01, 1.8290e-02, -8.0931e-02, -9.6761e-02, + -4.6112e-02, -1.8914e-02, 4.8560e-02, -1.1570e-01, -2.3101e-01, + -6.7442e-02, -7.2427e-02, 4.9952e-02, -4.3988e-02, 1.8997e-02, + -4.0182e-02, 8.3352e-02, -4.5539e-02, 1.1739e-01, -4.0738e-02, + 3.9162e-02, 6.3029e-02, 1.4560e-02, 7.7979e-02, 1.0567e-02, + 1.8793e-03, -1.1100e-01, 4.5256e-02, 9.0369e-02, 3.4154e-03, + 6.3770e-02, 7.5397e-02, 1.1233e-02, 1.0372e-01, 3.4302e-02, + 7.3305e-02, 7.2093e-02, -9.6457e-02, 2.4593e-02, -5.1865e-02, + 3.5280e-02, -4.2728e-03, 8.0915e-02, -1.4808e-01, 1.5104e-01, + -3.9665e-02, -9.2068e-02, -4.3506e-02, 5.6074e-02, 3.5466e-02, + -5.8521e-02, -8.1093e-02, -5.2968e-02, -4.0219e-02, -2.9365e-02, + 3.5287e-02, -7.8777e-02, -9.6997e-03, -7.0869e-02, -8.0517e-03, + 2.7628e-02, 3.7099e-02, 1.6447e-01, -8.0948e-02, 1.5004e-02, + 7.5770e-02, -6.2618e-02, 1.1144e-02, 6.9380e-02, -1.2749e-01, + 2.0715e-01, 2.1565e-01, 1.3051e-03, 2.1492e-02, 4.6540e-02, + 4.0014e-02, 2.8328e-02, -1.1086e-01, 1.2292e-01, -1.4375e-01, + 3.2895e-03, -3.6497e-02, 9.6531e-02, 6.9485e-02, 7.0013e-02, + 1.9933e-02, 2.3198e-01, 5.5978e-02, 5.4095e-02, 7.0778e-02, + 1.8711e-02, -1.9152e-03, -4.6121e-02, 1.7459e-01, -2.3601e-02, + 1.5128e-02, 1.2909e-01, -1.2117e-01, 7.0730e-02, -5.3275e-02, + 5.2342e-02, 2.5520e-02, -1.4723e-01, -1.5816e-01, 3.6779e-02, + -2.4259e-02, -3.5487e-04, 6.2654e-03, 1.9984e-01, -7.6132e-02, + -5.9028e-02, 2.1878e-01, -9.5459e-02, -5.9844e-02, -7.4959e-03, + -2.0009e-01, 5.3376e-02, 2.6009e-02, 3.6678e-02, -2.1850e-01, + 1.4710e-01, -2.3338e-02, 1.2922e-01, -3.7010e-02, -2.7180e-02, + 9.6733e-02, 5.3181e-02, -6.1271e-02, -1.1622e-02, -5.7339e-02, + -1.5541e-01, 4.0807e-02, -2.3586e-02, -8.5208e-02, 1.2889e-01, + -1.5264e-02, 8.5310e-02, 1.0926e-01, 5.6991e-02, 2.3987e-03, + -1.1391e-01, 3.9262e-02, -2.8064e-02, 6.5222e-02, 2.2107e-02, + -7.2463e-02, -6.1091e-02, 4.6915e-02, -2.5152e-02, -1.2362e-01, + 4.6378e-02, -5.5811e-03, 1.7696e-02, -6.9871e-02, -4.3401e-02, + 1.3433e-03, 5.2477e-02, 1.0138e-01, 5.2289e-02, -2.1016e-02, + 6.6678e-02, -3.2154e-02, -4.6185e-02, -2.7438e-02, -2.5051e-02, + 3.9109e-02, 1.1025e-01, 1.0976e-01, 1.1039e-02, 1.5488e-02, + -1.4314e-01, 1.3005e-01, 4.1390e-02, -1.8025e-02, -2.3589e-02, + -1.4516e-02, -2.1773e-02, -7.3370e-02, -7.3497e-02, -9.7571e-02, + 4.6733e-03, 6.5029e-03, -1.3255e-01, -8.5680e-02, 2.9686e-02, + 4.4497e-02, 1.9360e-02, 6.3552e-02, 1.8620e-03, 7.7786e-02, + 1.3989e-01, -3.6987e-02, 1.0180e-01, 7.3867e-02, -1.4634e-01, + -1.1714e-01, -7.4277e-02, 5.9020e-02, 2.2145e-02, -2.0120e-02, + -1.1564e-01, 8.6384e-02, 5.9949e-02, -8.7968e-02, 1.6044e-02, + -1.3494e+00, -1.3101e-02, 4.1360e-02, -7.1064e-02, -8.5138e-02, + -4.7313e-02, 2.7228e-02, -2.2329e-02, -9.8785e-02, -6.7174e-02, + 3.2216e-02, -2.0466e-01, -9.3770e-02, 9.2580e-02, -5.3992e-03, + -1.7000e-02, -3.8034e-02, 9.0621e-03, 8.6878e-02, 1.0987e-01, + 6.6488e-04, 5.1833e-02, 5.3204e-02, 1.2711e-01, -8.6105e-02, + -4.8681e-02, -6.0973e-02, -6.4746e-02, 8.5388e-02, 1.7026e-01, + -1.2123e-01, -1.9613e-02, -9.3257e-02, 1.2637e-01, -2.8975e-03, + 3.1562e-03, 3.3015e-03, -6.1522e-02, 5.9085e-02, 4.2832e-02, + 3.9749e-02, 5.1629e-02, 2.1208e-02, -6.0335e-02, -1.2391e-01, + -1.1050e-02, 6.3860e-02, -1.0895e-01, 4.7044e-02, 2.1700e-02, + -7.9585e-03, 9.7766e-02, -1.1182e-01, 1.3406e-01, 4.7058e-02, + -3.2009e-02, -1.0783e-01, -9.7116e-02, 2.9043e-02, -2.2611e-02, + 1.0493e-01, 1.7676e-01, 7.0756e-02, 5.9062e-03, 8.1334e-02, + 1.1995e-01, -1.3953e-01, 3.0317e-02, 1.0772e-01, -1.1324e-01, + 2.4339e-04, -3.5425e-02, 1.6272e-01, 7.3581e-02, -1.3233e-03, + 5.4502e-02, 2.7183e-02, -4.3697e-02, -2.1902e-02, -1.1941e-01, + 4.8753e-02, -5.3143e-02, 1.1286e-01, -1.1190e-01, 1.5261e-02, + -3.4672e-02, -8.7632e-02, 7.1268e-02, -5.5456e-02, -5.3602e-02, + 1.5196e-02, 1.1779e-01, 3.9464e-02, 1.0086e-02, 1.8113e-03, + -5.6602e-02, -5.1226e-02, -9.6473e-02, 5.4633e-02, -1.6578e-02, + -4.3707e-02, -1.5735e-02, 1.7919e-01, 1.3178e-01, -1.3620e-01, + -7.0200e-02, 6.2552e-03, 5.4710e-02, -2.3846e-02, -1.8538e-02, + 4.6390e-03, -1.0941e-02, -2.7629e-02, 3.5496e-02, -8.9692e-02, + 9.8759e-03, 3.5293e-02, -3.2110e-02, -1.0279e-01, -5.9262e-02, + 1.0760e-02, -6.1608e-02, 9.6876e-03, -4.7052e-02, -7.1123e-02, + 1.0229e-02, 9.2876e-02, 9.8025e-03, 3.4059e-02, -2.0260e-02, + -8.4060e-03, 1.1549e-01, 7.8580e-03, 1.4220e-01, -7.0386e-03, + 5.5618e-02, 5.0090e-02, -7.8569e-02, 4.5682e-02, -1.0535e-02, + -2.0261e-02, -3.6776e-02, -3.4775e-02, 2.5522e-02, 1.2608e-01, + -6.7188e-02, 2.0959e-01, 2.1202e-02, 2.0807e-02, 1.5260e-01, + 1.0424e-01, 6.0454e-03, 2.1931e-02, 2.5767e-03, 5.1140e-02, + 9.3779e-02, -1.6807e-02, -5.5458e-02, -1.3268e-02, 4.6824e-02, + -2.5836e-02, 1.7654e-02, 3.8869e-02, 7.0855e-02, -5.4796e-02, + -1.8964e-01, 3.8619e-02, 3.9355e-02, 1.0636e-01, 6.1914e-02, + 4.0157e-02, 5.5368e-02, -7.4316e-02, 4.9771e-02, 1.0426e-01, + -3.9820e-02, 5.2887e-02, 7.6436e-02, -2.4672e-02, 7.8243e-02, + -6.3843e-03, 6.3411e-02, 1.4871e-03, -4.8862e-02, 2.5202e-02, + -7.8051e-02, -1.4255e-01, -1.5805e-02, 1.3358e-02, 5.2986e-02, + -1.1864e-01, 4.6314e-02, 3.5143e-02, 7.3639e-02, -4.0801e-02, + 9.7016e-02, 6.1717e-02, -1.8799e-02, 2.7340e-03, -7.7440e-02, + -1.3023e-01, -4.1560e-03, 7.8262e-02, 3.5681e-02, -1.3603e-01, + 8.7046e-02, -1.3018e-02, 7.5417e-02, -1.1819e-02, 5.1524e-02, + -6.0646e-02, -9.2515e-03, 2.0641e-02, 6.3551e-02, -7.2697e-02, + 1.3991e-01, -2.5632e-02, 1.1834e-01, 1.6382e-01, 6.7759e-02, + 2.4555e-02, -8.7307e-02, 5.7441e-02, 7.5379e-02, -6.1329e-02, + 3.8550e-02, 8.5031e-04, 1.1118e-01, -6.0965e-02, 3.8923e-02, + 5.2394e-03, 4.7283e-03, 1.1786e-01, 3.2184e-02, 1.8079e-01, + 1.2621e-01, 2.8031e-02, -6.1219e-02, 1.0079e-01, -9.9842e-02, + 4.4614e-02, -1.3790e-01, -1.7813e-02, 7.1840e-02, 7.4822e-02, + -4.2562e-02, -2.6297e-02, -1.1915e-01, 3.5753e-02, 1.5433e-01, + 4.8434e-02, -1.5736e-01, -1.1653e-01, 5.1710e-02, 5.2133e-02, + -2.3209e-01, 5.4455e-02, -1.1171e-02, 7.2662e-03, 2.9330e-02, + 3.7252e-02, -3.8506e-02, -2.0239e-02, -1.8180e-03, 1.2752e-01, + 1.2268e-03, -4.5370e+00, -8.1560e-03, 7.7931e-02, -3.1185e-03, + -1.2173e-01, -4.9946e-02, -7.9451e-02, -8.4369e-02, 5.3416e-02, + -1.3204e-01, -4.5491e-02, 1.4509e-02, -1.4095e-03, -3.9864e-02, + 1.0990e-01, 2.8505e-02, -7.8903e-02, 5.6863e-02, -9.2724e-02, + 6.2713e-02, 3.9996e-02, -8.9662e-03, -1.1170e-02, -1.4313e-01, + -3.2183e-02, 1.0727e-01, 5.8286e-02, -1.9646e-03, 5.0987e-02, + -3.8793e-02, 9.1875e-02, 1.0112e-01, 1.9460e-02, -1.6071e-02, + -5.8468e-03, 2.4669e-03, -1.6030e-04, -5.2524e-02, -5.0830e-02, + 2.3971e-02, 2.3613e-02, 1.6205e-02, -1.4068e-01, 1.1002e-01, + -2.4975e-02, -2.1740e-01, 9.6996e-02, 3.8999e-02, 1.4596e-01, + 8.4671e-02, 6.8433e-02, 9.0532e-02, 7.4389e-02, -6.1792e-03, + 3.1973e-04, 2.1138e-02, 1.3640e-02, 4.0633e-02, -3.3085e-02, + 1.0155e-01, -1.3498e-01, -1.0529e-01, 9.6324e-02, -8.7950e-02, + -7.9196e-03, -1.4550e-01, -1.4610e-02, 4.1099e-02, -7.0051e-02, + -3.3869e-02, 5.5067e-03, -5.6253e-02, -5.2561e-02, 8.9154e-02, + -2.1283e-02, 6.1910e-02, 7.9824e-02, -7.3723e-03, -5.7242e-02, + -1.5250e-02, 1.1465e-01, 4.5637e-02, -1.4574e-01, -9.4880e-02, + -7.2486e-02, 1.0574e-01, -4.5374e-03], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[ 0.0249, 0.0004, -0.0134, ..., 0.0097, -0.0221, -0.0155], + [-0.0316, 0.0126, -0.0031, ..., -0.0116, 0.0157, 0.0038], + [-0.0350, 0.0108, 0.0050, ..., -0.0090, -0.0208, -0.0072], + ..., + [-0.0134, -0.0048, 0.0264, ..., -0.0219, -0.0065, -0.0021], + [ 0.0038, 0.0062, -0.0022, ..., -0.0090, -0.0054, -0.0187], + [ 0.0026, 0.0050, 0.0312, ..., -0.0390, 0.0172, -0.0119]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.3540, -0.2788, -0.3865, ..., -0.3696, -0.3518, -0.2264], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0172, -0.0035, -0.0072, ..., 0.0195, 0.0084, -0.0046], + [ 0.0164, 0.0169, -0.0219, ..., 0.0049, 0.0127, 0.0043], + [-0.0011, -0.0046, 0.0048, ..., 0.0056, -0.0323, -0.0245], + ..., + [ 0.0166, 0.0074, 0.0161, ..., -0.0444, -0.0176, -0.0061], + [-0.0054, -0.0033, -0.0143, ..., 0.0186, 0.0021, -0.0065], + [-0.0192, -0.0033, 0.0053, ..., 0.0050, 0.0029, -0.0080]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 4.4006e-02, 4.4006e-02, -4.0344e-02, 7.8247e-02, 3.2082e-03, + -6.9160e-03, 6.3477e-02, 6.2180e-04, 2.3254e-02, -5.0262e-02, + 4.3701e-02, -3.5614e-02, -1.0718e-01, 7.1945e-03, -5.4596e-02, + 2.0920e-02, -9.7275e-03, -5.1758e-02, 2.5070e-02, -2.9663e-01, + 2.7863e-02, 1.9608e-02, -9.3079e-03, -3.5187e-02, 3.0624e-02, + -7.2266e-02, 3.1525e-02, -1.0262e-02, -3.3112e-02, 3.5210e-03, + 6.8481e-02, -4.9934e-03, 4.7531e-03, 8.1421e-02, -4.2969e-02, + -1.2006e-01, 1.0361e-02, -7.3891e-03, 1.4328e-02, 2.2385e-02, + 2.0996e-02, 3.7231e-02, 2.8091e-02, 1.2085e-02, 6.1607e-04, + -5.8960e-02, -1.8845e-02, -7.3242e-03, -1.6846e-02, -3.9185e-02, + -3.6194e-02, -6.7749e-02, -8.5754e-02, -9.9731e-02, -5.9128e-03, + 5.4199e-02, 4.1382e-02, -1.0706e-01, 2.8366e-02, -1.7044e-02, + 4.5319e-02, 4.0741e-03, 4.5197e-02, -3.0594e-02, 4.9500e-02, + -5.0568e-02, -8.7204e-03, -1.3562e-01, 5.0751e-02, -2.9129e-02, + 4.4250e-02, 1.4915e-02, 3.8788e-02, 6.2408e-02, 5.7312e-02, + 3.1403e-02, 5.3314e-02, -4.5502e-02, -2.9419e-02, 7.1335e-03, + -4.0497e-02, 2.8214e-02, -6.0120e-02, -1.1139e-02, -6.0028e-02, + -5.4352e-02, 2.4017e-02, 2.9099e-02, 3.0167e-02, 7.3425e-02, + -2.4384e-02, 1.6584e-03, -4.3716e-03, 5.9296e-02, -5.5389e-02, + 3.3779e-03, 5.9540e-02, -7.3730e-02, 2.7985e-02, 6.5186e-02, + -7.1472e-02, -5.0812e-02, -6.9702e-02, 6.2164e-02, 4.4373e-02, + 5.2582e-02, 1.1920e-01, -2.2644e-02, 6.2988e-02, -1.3977e-02, + 5.6702e-02, -9.5444e-03, -6.0940e-04, -2.4063e-02, 1.2109e-01, + 5.3482e-03, 7.7148e-02, 6.3354e-02, 2.1942e-02, 6.2805e-02, + -1.3056e-03, -3.2776e-02, -2.1667e-02, -9.2239e-03, 4.1687e-02, + -8.0200e-02, -9.2041e-02, -4.2847e-02, -1.4244e-02, -3.4882e-02, + -5.4230e-02, -1.1044e-03, -4.0985e-02, -5.4901e-02, 3.9825e-03, + -1.0185e-02, -5.8014e-02, 2.3926e-02, 1.6510e-02, 1.2032e-02, + 1.3130e-02, -4.1771e-03, 1.8816e-03, -3.4943e-02, 1.8845e-02, + -1.9928e-02, 4.5990e-02, -2.6535e-02, -8.4412e-02, 4.5288e-02, + 1.2439e-01, -1.6861e-02, -1.6693e-02, 2.3788e-02, -3.0396e-02, + 8.3780e-04, 2.1057e-03, 5.1147e-02, -1.4244e-02, -3.1006e-02, + 1.0052e-01, -5.0323e-02, 7.9651e-02, -3.1525e-02, 3.9612e-02, + 8.4167e-02, 2.6062e-02, 4.0833e-02, 9.8755e-02, -1.6832e-03, + -3.1204e-02, 4.4037e-02, -4.0161e-02, -2.1927e-02, -5.6396e-02, + -1.1725e-01, -2.2964e-02, 3.0228e-02, 2.9922e-02, -3.9005e-03, + -2.4368e-02, -4.0802e-02, 4.2480e-02, 6.6956e-02, 8.8379e-02, + -3.1433e-02, -3.2471e-02, 2.2559e-01, -5.1910e-02, -1.2238e-02, + -5.9967e-02, -1.1407e-01, 4.2877e-02, 7.1899e-02, 4.1138e-02, + 8.1205e-04, 5.4871e-02, -9.1003e-02, 1.6650e-01, 3.3417e-02, + -1.1768e-01, -2.3132e-02, 1.1646e-01, -6.3477e-03, 6.1760e-03, + 5.7316e-04, 5.2246e-02, 2.7954e-02, 5.6213e-02, -7.7698e-02, + -2.0538e-02, 8.9294e-02, -5.5847e-03, 1.2344e-02, -4.4708e-02, + 9.7351e-02, -1.8188e-02, -2.7069e-02, 7.0801e-03, -1.6431e-01, + -1.7761e-02, -6.6910e-03, 5.9662e-02, 1.4954e-01, -2.1393e-02, + -8.1970e-02, 3.8116e-02, 1.2842e-01, 1.4221e-02, -8.9478e-02, + -2.9205e-02, -8.4152e-03, -5.2948e-03, 1.7960e-02, -2.3041e-03, + 2.1622e-02, 5.3894e-02, 4.1321e-02, 6.3049e-02, 7.4585e-02, + -7.5134e-02, -1.7120e-02, 3.9856e-02, 1.5587e-02, 5.4718e-02, + -3.0640e-02, 2.6260e-02, 3.3875e-02, 5.4993e-02, -8.8043e-03, + -4.1107e-02, -2.8122e-02, 2.2095e-02, -3.5095e-02, 4.8065e-02, + 9.5940e-04, -4.5105e-02, -9.4910e-02, 5.2681e-03, -2.9404e-02, + -2.9007e-02, 3.9520e-02, 6.2347e-02, -2.9388e-02, -7.4890e-02, + -6.6284e-02, -7.2670e-03, -5.9174e-02, -3.7262e-02, -1.0040e-01, + 2.4204e-03, -5.4993e-02, -5.0568e-02, -5.2277e-02, -1.6388e-02, + 4.3297e-03, 1.1176e-01, -5.7144e-03, 7.0068e-02, -2.0242e-04, + -7.4768e-02, 6.5231e-03, 2.8366e-02, -6.0394e-02, 2.2354e-02, + -3.7537e-02, -2.7283e-02, 1.1131e-02, 2.7893e-02, -8.1482e-03, + -9.1736e-02, 4.5959e-02, -7.0190e-02, -4.4373e-02, 4.5593e-02, + -8.2825e-02, 9.3628e-02, -7.8308e-02, -4.0222e-02, -1.3855e-02, + -2.8920e-04, 1.4343e-01, -9.5367e-04, 1.5392e-03, 1.8402e-02, + 4.4861e-03, -6.3843e-02, -7.4615e-03, 3.1189e-02, 8.0643e-03, + 2.0874e-02, 2.0294e-02, 3.3913e-03, -2.5803e-02, -4.5349e-02, + 6.5575e-03, 1.4587e-02, 9.6054e-03, 4.6234e-02, 9.3872e-02, + -5.8441e-02, 2.1152e-03, -7.7324e-03, 4.5929e-02, 6.1621e-01, + -1.3562e-01, -3.2684e-02, 5.1544e-02, 4.3068e-03, -4.7089e-02, + 2.3514e-02, -3.3569e-02, -8.0017e-02, -4.9103e-02, 1.8994e-01, + 8.9340e-03, -8.0032e-03, 3.8239e-02, -1.7883e-02, -3.2898e-02, + 9.0454e-02, 9.1934e-03, -2.8091e-02, 6.6223e-03, 1.7242e-02, + 6.3744e-03, 3.2711e-03, 1.0388e-01, -8.2397e-02, -4.4464e-02, + -6.4011e-03, 7.5806e-02, -1.0323e-02, -3.5919e-02, -5.2338e-03, + 2.6779e-02, 1.3000e-02, -3.7567e-02, -1.5144e-02, 1.0291e-01, + -5.7220e-02, -3.5889e-02, 1.0323e-02, -4.9072e-02, 1.8463e-02, + 3.1799e-02, -3.0956e-03, 2.1194e-02, 3.1143e-02, 5.4199e-02, + 5.5786e-02, -2.1545e-02, 2.5299e-02, -5.8228e-02, 1.4038e-02, + 5.2414e-03, 1.2466e-02, 4.9042e-02, -1.7014e-02, -8.2825e-02, + -6.7810e-02, 1.8982e-02, -3.2166e-02, 3.1647e-02, -6.4636e-02, + -9.5520e-03, -2.0157e-02, -6.2805e-02, -4.1870e-02, -3.7689e-02, + 2.1042e-02, 6.1981e-02, 9.3384e-02, -6.7505e-02, -2.8427e-02, + -9.1614e-02, -2.3346e-02, -4.8553e-02, -1.6510e-02, -7.6790e-03, + 3.8971e-02, 2.9129e-02, -4.8065e-02, 5.2595e-04, -3.1021e-02, + -4.9072e-02, 5.5328e-02, -3.8147e-02, -1.1398e-02, -1.4252e-02, + -3.4027e-02, 1.2152e-01, -4.3121e-02, 1.0384e-02, 1.3135e-01, + -7.9102e-02, 2.5757e-02, 4.2664e-02, -1.1505e-01, -3.5278e-02, + 8.9798e-03, -7.8659e-03, 7.4768e-02, 2.4002e-02, -4.2053e-02, + 3.3779e-03, -2.9251e-02, 2.5543e-02, 6.2378e-02, 1.3626e-02, + -1.4030e-02, 1.1673e-02, 1.3084e-02, 4.4937e-03, 6.6467e-02, + 3.3569e-02, 1.0590e-02, -1.7273e-02, 1.3354e-01, -1.0046e-01, + 8.8196e-02, 3.1921e-02, 6.5727e-03, 2.5616e-03, 3.4847e-03, + -4.6783e-02, -1.9608e-02, 2.7161e-03, -6.0944e-02, -5.9845e-02, + -4.8126e-02, -2.8610e-03, -6.2683e-02, 4.9347e-02, -1.0399e-02, + -2.4887e-02, -1.4526e-01, -2.4475e-02, 1.8478e-02, -5.1613e-03, + 1.6012e-03, 5.2856e-02, -3.1281e-02, -4.5166e-02, 5.4588e-03, + -4.0649e-02, 3.0960e-02, -2.2705e-02, 5.6946e-02, -4.3579e-02, + -4.4670e-03, -7.7515e-02, -2.2755e-03, -4.0955e-02, 3.1708e-02, + 1.9547e-02, 6.0852e-02, -8.9111e-02, 2.8534e-02, -3.1952e-02, + 2.3224e-02, 7.8857e-02, 1.0376e-02, -2.0126e-02, -3.8513e-02, + 4.7779e-04, 5.2124e-02, -3.6736e-03, -2.6077e-02, 5.2399e-02, + -3.8635e-02, 1.0284e-02, -1.0727e-02, 3.0426e-02, -3.5706e-02, + -6.3416e-02, -1.4941e-01, -1.2947e-02, -1.3756e-02, 8.0017e-02, + 5.7411e-03, 1.2093e-03, 3.6955e-04, 9.0210e-02, 9.1919e-02, + -7.9193e-03, 2.3174e-03, 4.5654e-02, -4.2664e-02, -1.0083e-01, + -2.9709e-02, -1.0016e-01, -7.1228e-02, 1.2744e-01, -7.2449e-02, + 3.3661e-02, 1.6739e-02, -4.2081e-04, 2.8763e-02, -5.9891e-03, + -1.9592e-02, 3.0579e-02, 5.4199e-02, 5.7251e-02, 1.4343e-02, + 8.3847e-03, -1.4000e-02, 7.7782e-03, -3.4760e-02, -7.5134e-02, + -2.5742e-02, -1.3428e-02, 1.4258e-01, -6.7322e-02, -1.1391e-02, + 2.9312e-02, -3.5217e-02, 5.9784e-02, 1.7410e-02, 8.3557e-02, + -4.3152e-02, -2.1225e-02, -5.1270e-02, 1.0663e-01, -4.0009e-02, + 6.7505e-02, -1.8768e-02, -1.8845e-02, -1.6342e-02, 1.0675e-01, + -7.0129e-02, 6.4819e-02, 7.0801e-02, 1.7065e-01, -5.0415e-02, + 3.4332e-02, 3.6194e-02, 5.4810e-02, -7.4280e-02, 2.6672e-02, + -7.6599e-02, -9.3155e-03, 3.4088e-02, 3.1342e-02, 2.2537e-02, + 1.9272e-02, 2.6093e-02, -5.4260e-02, -5.2246e-02, -2.6123e-02, + -3.4119e-02, 6.9946e-02, 2.1210e-02, -4.1161e-03, -6.0463e-03, + 1.3000e-01, 6.1615e-02, -4.8431e-02, -1.1176e-01, -6.4575e-02, + 1.6499e-03, -4.3701e-02, 7.8430e-02, 5.2795e-02, 5.8197e-02, + -2.3117e-02, -7.7087e-02, -7.5806e-02, -2.0325e-02, 4.2534e-03, + -6.2622e-02, 9.2163e-02, -4.7394e-02, -1.4824e-02, 1.4999e-02, + 1.1772e-02, -3.8635e-02, 6.6101e-02, -5.7312e-02, 6.4392e-02, + -1.3115e-02, -6.5857e-02, -5.3864e-02, 7.6843e-02, -6.1340e-02, + 2.0355e-02, 4.0375e-02, -1.3599e-01, 8.5449e-02, -1.3321e-02, + 1.8066e-02, -1.2894e-02, -4.9683e-02, -5.8517e-03, -5.9471e-03, + 1.8253e-03, -2.0447e-02, 8.0688e-02, -7.8979e-02, 8.7524e-02, + -3.7048e-02, -5.1270e-02, 4.2572e-02, 8.2855e-03, 7.0190e-02, + -9.6283e-03, 3.9948e-02, -1.7487e-02, -3.3752e-02, -5.0964e-02, + 1.7883e-02, -6.3538e-02, -3.4790e-02, 1.1554e-01, 1.1285e-01, + -1.2830e-01, 1.6846e-02, -6.2927e-02, 4.2953e-03, -1.3696e-01, + -6.1859e-02, -9.1431e-02, -2.4094e-02, -7.9102e-02, 4.9225e-02, + 9.9106e-03, -2.1606e-02, 2.0981e-02, 4.4403e-02, 6.6772e-02, + 2.4979e-02, 3.5248e-02, -1.3428e-02, 7.2670e-03, -7.3486e-02, + -1.2131e-02, 3.2318e-02, -4.4586e-02, 6.5979e-02, 5.0934e-02, + -2.3590e-02, -3.3875e-02, 5.4245e-03, -6.7261e-02, 3.0937e-03, + -3.1464e-02, -5.6671e-02, 1.6266e-02, -1.2732e-01, -1.2524e-01, + -6.3660e-02, 5.2734e-02, 1.7639e-01, -3.7445e-02, 6.8848e-02, + -6.0730e-03, -3.7994e-02, -3.1982e-02, 2.5940e-02, -2.2720e-02, + -3.3752e-02, 1.8774e-01, -5.4382e-02, 7.8735e-03, -9.3018e-02, + -3.5034e-02, -3.8853e-03, -5.6458e-03, 6.9031e-02, 5.0262e-02, + 6.5491e-02, 7.4219e-02, -1.1917e-02, -1.5190e-02, 4.6577e-03, + 9.6130e-02, -3.8879e-02, 8.1711e-03, 4.7791e-02, 5.7800e-02, + -3.0182e-02, -1.0788e-02, -9.2850e-03, -4.7577e-02, -5.6671e-02, + 1.3741e-02, -3.2410e-02, 1.1253e-02, -1.6083e-02, 2.0233e-02, + 1.4366e-02, 3.6224e-02, 2.8656e-02, 3.3264e-02, 6.3416e-02, + -6.0852e-02, 1.0582e-02, 2.5726e-02, -1.0492e-01, 3.5591e-03, + -6.4819e-02, 3.4058e-02, -7.6111e-02, -3.0792e-02, 1.1810e-02, + -2.9953e-02, 9.2850e-03, 7.1411e-02, -2.3735e-04, 2.5009e-02, + -1.6235e-02, 5.5054e-02, 1.2103e-01, -2.7204e-04, 8.8882e-03, + 3.9551e-02, -8.4656e-02, 3.6163e-02, -1.1993e-01, -2.1896e-02, + 2.0767e-02, -2.4246e-02, -2.0798e-02, -4.0161e-02, 1.0602e-01, + 3.7201e-02, 9.6817e-03, 1.1200e-01, -2.5299e-02, 8.6517e-03, + 9.4910e-03, -1.3940e-01, -2.2217e-02, 5.7037e-02, 7.7576e-02, + 3.6865e-02, -1.5251e-02, -5.6992e-03, 6.6345e-02, -7.4829e-02, + 1.1581e-02, -9.0820e-02, -4.9286e-02, -8.9645e-03, -2.6321e-02, + -1.0490e-02, -2.8820e-03, -1.4297e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([2.2640, 2.1569, 2.2719, 2.2420, 1.7296, 2.2268, 2.2222, 2.1929, 2.2933, + 2.3042, 2.2599, 2.2068, 2.2888, 2.1418, 2.3097, 2.1628, 2.3067, 2.1962, + 2.2541, 1.8798, 2.3330, 2.1527, 2.4260, 2.2432, 2.2344, 2.2169, 2.2899, + 2.2657, 2.2956, 2.2150, 2.2134, 2.3052, 2.2338, 2.2947, 2.1073, 2.1633, + 2.3743, 2.2180, 2.1662, 2.2571, 2.0809, 2.2990, 2.1551, 2.1888, 2.1720, + 2.2131, 2.4677, 2.1487, 2.2043, 2.1072, 2.1941, 2.3012, 2.2159, 2.2000, + 2.5026, 2.2741, 2.3018, 2.1355, 2.2140, 2.2018, 2.1730, 2.2111, 2.3010, + 2.1690, 2.2760, 2.1768, 2.2708, 2.1877, 2.2992, 1.9298, 2.3732, 2.2795, + 1.9456, 2.3256, 2.3227, 2.3304, 2.2873, 2.2034, 2.2036, 2.2123, 2.2451, + 2.2720, 2.3650, 2.1825, 2.3101, 2.1620, 2.3239, 2.3294, 2.3094, 2.1591, + 2.1981, 2.0829, 2.3779, 2.1536, 2.2076, 2.1523, 2.2948, 2.3787, 2.2889, + 2.3791, 2.1895, 2.1518, 2.3576, 2.3959, 2.2077, 1.9487, 2.2961, 2.2239, + 2.2803, 2.1499, 2.0425, 2.3617, 2.4161, 2.2494, 2.2602, 2.1698, 2.2544, + 2.4007, 2.3820, 2.2632, 2.3637, 2.1851, 2.2005, 2.2400, 2.1663, 2.1420, + 2.3441, 2.3564, 2.3680, 2.2313, 2.1356, 2.3518, 2.3749, 2.2281, 2.4065, + 2.1281, 2.1338, 2.1551, 2.1071, 2.7126, 2.1604, 2.1262, 2.4673, 2.1946, + 2.1879, 2.1070, 2.4353, 2.2552, 2.3758, 2.2670, 2.1755, 2.1645, 2.3566, + 2.3182, 2.3796, 1.7392, 2.1921, 2.2126, 2.1753, 2.2293, 2.1131, 2.2069, + 2.2205, 2.3078, 2.3424, 2.1081, 2.0709, 2.2687, 2.2176, 2.2434, 2.2262, + 2.1535, 1.9846, 2.1755, 2.0177, 2.2333, 2.2221, 2.2830, 2.2767, 2.1881, + 2.6348, 2.3794, 2.3480, 2.3592, 2.1792, 1.9351, 2.1316, 1.2753, 2.1821, + 2.1568, 2.3376, 2.4373, 2.2861, 2.2278, 2.4345, 2.3894, 2.2850, 2.2173, + 0.8030, 2.2375, 2.4590, 2.1584, 2.2757, 2.1539, 2.2788, 2.3235, 2.2182, + 2.2825, 2.2370, 2.2002, 2.2239, 2.1483, 2.3083, 1.9194, 2.2911, 2.1758, + 2.1693, 2.3248, 2.2062, 2.1135, 2.5439, 2.3680, 1.7868, 2.2818, 2.2139, + 2.3934, 2.2358, 2.2729, 2.1581, 2.1231, 2.2160, 2.2524, 2.2938, 2.3082, + 2.1456, 2.2228, 2.4099, 2.2020, 2.1645, 2.1912, 2.2698, 2.2760, 2.2801, + 2.3099, 2.2425, 2.1879, 2.2222, 2.3711, 2.1873, 2.3739, 2.2790, 2.2676, + 2.3049, 2.2355, 2.3792, 2.3141, 2.3377, 2.4090, 2.3914, 2.3383, 2.3445, + 2.3042, 2.4982, 2.3165, 2.2953, 2.1704, 2.2329, 2.4004, 2.1656, 2.3381, + 2.2305, 2.2528, 2.2741, 2.3754, 2.1884, 2.4750, 2.2019, 2.2011, 2.3016, + 2.2869, 2.2985, 1.9621, 2.1770, 2.2807, 2.2525, 2.1672, 2.1855, 2.4377, + 2.2557, 2.1184, 2.3795, 2.3838, 2.1112, 2.2527, 2.1712, 2.3678, 2.2962, + 2.3979, 2.1778, 2.2367, 2.2000, 2.4401, 2.1548, 2.3850, 2.4349, 2.1581, + 2.3237, 2.2696, 2.2616, 2.2724, 2.2583, 2.1867, 2.5341, 2.2949, 2.3043, + 2.1654, 2.2662, 2.4074, 2.2167, 2.3727, 2.3872, 2.1168, 2.2891, 2.2260, + 0.4755, 2.3298, 2.1823, 2.2057, 2.4162, 2.2086, 2.1705, 2.2330, 2.3128, + 2.2970, 2.2148, 2.1848, 2.2950, 2.2475, 2.2988, 2.1964, 2.3900, 2.2402, + 2.2406, 2.2631, 2.3282, 2.1986, 2.2774, 2.1227, 2.3814, 2.2799, 1.9796, + 2.3173, 2.2065, 2.0763, 2.2129, 2.1847, 2.2115, 2.3988, 2.3331, 2.2648, + 2.2173, 2.2222, 2.4255, 2.3040, 2.2819, 2.2370, 2.2084, 2.4024, 2.2437, + 2.3018, 2.4716, 2.2703, 2.2421, 2.2014, 2.2222, 2.3140, 2.1600, 2.2164, + 2.2799, 2.3304, 2.1385, 2.1579, 2.1541, 2.2016, 2.1684, 2.2469, 2.2755, + 2.2213, 2.3451, 2.1953, 2.2886, 2.3196, 2.3890, 2.2848, 2.1909, 2.1920, + 2.2832, 2.2331, 2.3384, 2.3486, 2.2844, 2.1510, 2.2882, 2.3951, 2.1480, + 2.1827, 2.2679, 2.1737, 2.2718, 2.3449, 2.2749, 2.2403, 2.2581, 2.4260, + 1.7728, 2.9975, 2.3103, 2.0867, 2.1864, 2.3866, 2.2906, 2.3580, 2.2100, + 2.2319, 2.4259, 2.2954, 1.9379, 2.1505, 2.2671, 2.1461, 2.1807, 2.2680, + 2.3275, 2.2361, 2.3598, 2.1926, 2.3816, 2.2544, 2.1655, 2.3670, 2.3348, + 2.1993, 2.2997, 1.5097, 2.1035, 2.1724, 2.2067, 2.3189, 2.2590, 2.1026, + 2.2328, 2.2674, 2.2554, 2.2146, 2.1348, 2.5011, 2.3310, 2.1544, 2.2187, + 2.1928, 2.2301, 2.3006, 2.2465, 2.1373, 2.2418, 2.2347, 2.2468, 2.2022, + 2.1193, 2.4142, 2.2624, 2.1409, 2.1893, 2.2579, 2.3964, 2.2402, 2.3457, + 2.3320, 2.2716, 2.0796, 2.2567, 2.1346, 2.2694, 1.6067, 2.1672, 2.3004, + 2.3265, 2.2902, 2.1561, 2.2396, 2.1815, 2.2325, 2.2429, 2.2065, 2.2034, + 2.1567, 2.4732, 2.3073, 2.2450, 2.4269, 2.2559, 2.3715, 2.2559, 2.5272, + 2.4351, 2.1391, 2.2326, 2.2180, 2.1983, 2.2195, 2.2410, 2.2155, 2.1019, + 2.0940, 2.2429, 2.2578, 2.2910, 2.3509, 2.1719, 2.4017, 2.3275, 2.2035, + 2.2029, 2.2942, 2.1925, 2.3053, 2.2525, 2.2779, 2.2201, 2.1996, 2.2416, + 2.3947, 2.3721, 2.2754, 2.3328, 2.2189, 2.3045, 2.2970, 2.4679, 2.2055, + 2.1974, 2.2517, 2.4998, 2.4094, 2.2671, 2.0876, 2.2103, 2.2355, 2.2657, + 2.2187, 2.3696, 2.2213, 2.2929, 2.4940, 2.1678, 2.2122, 2.3943, 2.2058, + 2.3842, 2.1598, 2.3303, 1.8785, 2.3260, 2.2574, 2.2955, 2.1308, 2.2304, + 2.2647, 2.1533, 2.1775, 2.3234, 2.2043, 2.3099, 2.2060, 2.2395, 2.3715, + 2.2542, 2.3158, 2.2958, 2.1105, 1.9845, 2.1140, 2.2661, 2.2051, 2.2324, + 2.3517, 2.1900, 2.2237, 2.2118, 2.1717, 2.1753, 2.2713, 2.2211, 2.2775, + 2.3432, 2.0401, 2.2418, 2.4036, 2.2838, 2.2299, 2.2778, 2.0997, 2.2535, + 2.3705, 2.3978, 2.4439, 2.3486, 2.1743, 2.3667, 2.1913, 2.3294, 2.1519, + 2.3608, 2.2327, 2.2330, 2.2944, 2.1614, 2.2846, 2.2820, 2.1228, 2.2492, + 2.0873, 2.2352, 2.2103, 2.3395, 2.3103, 2.1853, 2.2346, 2.1970, 2.1938, + 2.2931, 2.2062, 2.3980, 2.2042, 2.1698, 2.1685, 2.2466, 2.2704, 2.2514, + 2.3169, 2.1725, 2.3112, 2.2201, 2.2424, 1.8915, 2.1430, 2.1394, 2.3150, + 2.3607, 2.2556, 2.4942, 2.2349, 2.1679, 2.3034, 2.2429, 2.1978, 2.3658, + 2.2612, 2.2297, 2.0844, 1.7705, 2.1903, 2.3076, 2.1763, 2.2701, 2.3301, + 2.0571, 2.2049, 2.3389, 2.2586, 2.2229, 2.2573, 2.2908, 1.9869, 2.2785, + 2.1082, 2.3457, 2.1417, 2.2914, 2.3343, 2.2597, 1.9121, 2.1643, 2.2582, + 2.1474, 2.2186, 2.1766, 2.1799, 2.2972, 2.2724, 2.2806, 2.2794, 2.1807, + 2.4325, 2.1112, 2.3376, 2.2759, 2.2201, 2.4643, 2.2095, 2.2176, 2.1463, + 2.2402, 2.2014, 2.3456, 2.3663, 2.3184, 2.2078, 2.0484, 2.3456, 2.2747, + 2.1629, 2.3276, 2.2766, 2.1601, 2.2534, 2.3233, 2.1668, 2.1531, 2.3054, + 2.1789, 2.3498, 2.3915, 2.2312, 2.2355, 2.1540, 1.6734, 2.1592, 2.2040, + 2.1684, 2.4621, 2.2115, 2.1323, 2.2404, 2.4256, 2.3941, 2.2906, 2.1213, + 2.5773, 2.4617, 2.3884, 2.2571, 2.0984, 2.3819, 2.3312, 2.1406, 2.3404, + 2.3413, 2.4340, 2.2037, 2.2027, 2.3634, 2.3197, 2.1520, 2.2111, 2.4423, + 2.2281, 2.2688, 2.3624, 2.2121, 2.1558, 2.1795, 2.2498, 2.2931, 2.1872, + 2.2805, 2.3515, 2.3612], device='cuda:1', requires_grad=True)Parameter containing: +tensor([ 0.1621, 0.1320, -0.8621, -0.0336, 0.1502, 0.0541, 0.1869, 0.0480, + 0.1135, -0.0172, -0.3924, 0.3050, -0.6887, -0.2888, 0.5417, -0.1436, + 0.1237, 0.4877, -0.4812, -1.1091, 0.4803, 0.1873, 0.8007, -0.5644, + 0.2164, -0.0993, 0.2257, 0.2541, -0.7131, 0.6955, -0.0308, 0.6967, + 0.2246, -0.5032, 0.4245, -0.0466, -0.5230, 0.4978, 0.7140, 0.5024, + -0.0975, -0.2074, 0.4438, 0.2407, -0.3374, -0.2500, -0.8558, -0.0481, + 0.5578, 0.1013, -0.4001, -0.1277, 0.5967, 0.3349, -1.0492, 1.1932, + 0.2491, 0.0806, 0.1028, 0.0558, -0.1210, -0.1634, 0.3394, -0.5425, + -0.0082, 0.3193, 0.4550, -0.3117, -0.5130, -1.1894, 0.5283, 0.5521, + 0.3825, -0.5808, -0.9071, -0.6364, 0.1480, -0.0503, 0.2118, 0.5899, + 0.4158, 0.3171, -0.5889, 0.6909, -0.2356, -0.1724, 0.3149, -0.5220, + 0.5159, -0.0380, -0.1137, 0.1872, -0.4011, -0.4489, -0.2862, -0.1378, + 0.5677, -0.4666, -0.7463, 0.9317, 0.5492, -0.4332, -0.0704, 0.5361, + 0.4139, 0.3445, 0.1253, 0.4641, -0.0434, -0.0163, 0.3818, 0.8250, + -0.5626, 0.4493, 0.2315, 0.0676, 0.5582, 0.5198, 0.7981, 0.3034, + -0.4785, -0.3753, -0.4814, 0.0080, 0.1538, -0.3050, 0.3001, 0.4994, + -0.5714, 0.1687, 0.0673, -0.5381, 0.2875, -0.2586, 0.6101, 0.2481, + 0.4871, 0.1383, -0.0470, -0.9018, 0.1253, 0.4531, -0.7880, -0.1147, + -0.0429, 0.1276, 0.6707, -0.1825, -0.5070, 0.4332, 0.1504, -0.1559, + 0.5703, -0.4738, -0.9653, 0.6716, -0.2044, -0.2327, -0.0894, -0.0192, + -0.3091, 0.1484, 0.3899, -0.4676, 0.4944, -0.0957, -0.5098, 0.1894, + -0.3161, -0.1162, 0.3520, 0.1080, -0.1028, 0.4153, 0.1881, 0.3348, + 0.0837, -0.7355, -0.3223, 0.0820, -1.1374, -0.6000, 0.5029, 0.3999, + 0.1938, 0.2657, -0.2888, 1.9289, 0.1540, -0.0699, -0.7190, -0.6000, + -0.3247, 0.3231, 0.6391, 0.7734, 0.2144, 0.3356, -0.3159, 0.1742, + -0.6171, 0.0906, 0.4558, 0.3694, -0.3378, -0.1699, 0.4212, 0.3525, + -0.6260, 0.3566, -0.0672, -0.4158, -0.4320, 0.3351, 0.5299, -0.0924, + -0.5760, 0.0608, -0.0278, 0.1324, 0.9353, 0.3500, -0.6054, 0.2021, + 0.2814, -0.3948, -0.2922, 0.0252, 0.0906, 0.2763, 0.1940, 0.2645, + 0.6804, 0.5362, -0.3680, -0.6316, 0.4309, 0.4631, 0.0191, 0.4439, + 0.9340, 0.3927, 0.4112, -0.6026, -0.1669, 0.2130, 0.6028, 0.1757, + -0.1022, 0.7878, 0.2552, 0.5194, 0.4319, 0.0206, 0.6868, -0.3183, + 0.5887, -0.6037, -0.6882, -0.1226, -0.4427, 0.1058, 0.4440, 0.3308, + -0.3919, -0.2030, 0.1480, 0.9253, 0.3558, -0.4835, 0.4862, 0.3480, + -0.1619, -0.7845, 0.1806, -0.3933, -0.3324, 0.3735, -0.5081, 0.2008, + -0.2949, -0.9179, -0.3203, 0.4054, -0.5126, 0.1843, -0.3469, 0.7029, + 0.5811, -0.1982, -0.4353, 0.5465, 0.5292, 0.0970, -0.3432, -0.2300, + -0.4013, -0.5306, 0.0403, -0.2399, -0.5037, 0.7009, -0.0160, -0.4212, + 0.2411, -0.1549, 0.1045, -0.0450, -0.3573, -0.1059, 0.6022, -0.0220, + 0.8710, -0.1001, -0.2127, -0.0137, 0.4078, 0.4049, -0.0803, 0.3979, + -0.6313, -0.2661, 0.2763, -0.2651, -0.1129, -0.6101, 0.2320, -0.3461, + 0.6760, -0.1488, 0.2616, -0.5456, -0.2613, -0.0147, -0.0846, -0.3216, + -0.6864, 0.2660, 0.5609, 0.3031, 0.3006, -0.0308, -0.4863, 0.0349, + -0.7812, 0.3434, -0.3803, 0.0266, -0.2601, -0.0796, 0.6440, 0.2608, + -0.1114, 0.3551, -0.3097, -0.2056, -0.1391, -0.4942, -0.4338, 0.3045, + -0.4930, -0.5881, -0.4112, -0.4181, 0.4903, 0.4216, -0.2322, 0.6487, + 0.4649, 0.5861, -0.5109, -0.1017, 0.1379, -0.4404, 0.2101, -0.3105, + 0.2269, 0.0366, -0.2296, -0.3566, 0.2081, -0.1343, 0.1240, -0.0179, + -0.2213, -0.0625, -0.1611, 0.5202, 0.6848, 0.0801, 0.3838, -0.2576, + 0.4027, 0.5769, -0.8503, -0.0359, -0.1019, 0.8661, -0.4741, -0.7002, + -0.0760, 0.1911, -0.1189, -0.5374, 0.5348, -0.1533, 0.1744, -0.1486, + -0.3563, 0.3826, 0.6247, 0.0360, 0.5564, 0.2615, 1.2232, -2.3384, + 0.5863, -0.2913, 0.5541, 0.3806, 0.0317, -0.6352, 0.5425, -0.2135, + -0.5930, 0.6499, 0.3602, 0.0085, -0.0622, 0.1431, -0.2406, -0.5485, + -0.3074, 0.2902, 0.5639, 0.0688, -0.5124, 0.3740, -0.1591, -0.7494, + 0.2714, 0.2281, 0.1605, -1.2912, 0.1387, 0.2413, 0.3343, 0.5193, + 0.6109, 0.1938, 0.1689, 0.2520, -0.2678, 0.0738, -0.1442, -0.8710, + -0.4280, -0.3669, -0.2250, -0.0465, -0.4375, 0.3030, -0.5160, 0.0954, + -0.5934, -0.0733, 0.0735, 0.4256, 0.1004, -0.5574, 0.4439, 0.3795, + 0.0716, -0.0068, -0.7792, -0.2683, -0.2675, -0.3899, -0.2300, 0.1020, + -0.2996, 0.0126, 0.4978, -0.3174, -0.0587, -0.4951, 0.0076, 0.6863, + 0.2059, -0.4359, 0.0722, 0.3730, -0.5903, 0.1088, -0.3538, -0.2944, + -0.6744, -0.3510, 0.5116, 1.0242, -0.2177, -0.9765, 0.1696, 0.5113, + -0.0504, -0.0675, 0.3385, 0.4160, 0.2705, -0.2345, 0.0065, 0.1965, + -0.3579, -0.1229, 0.6358, 0.0630, 0.5625, -0.3390, -0.2423, 0.4026, + 1.0521, 0.1297, -0.2261, 0.3796, -0.2018, -0.4482, 0.6710, 0.2464, + 0.1820, 0.0604, -0.0261, -0.7356, 0.1610, 0.3147, 0.7031, -0.7128, + 0.4793, -0.6179, 0.4299, -0.1426, 0.1959, 0.0474, -0.7578, 0.3871, + 0.4518, -0.0792, -0.0705, -0.7027, -0.2180, -0.1972, -0.4494, -0.4577, + 0.5420, 0.6782, -0.0076, -0.0709, 0.4196, 0.2184, -0.5684, -0.3109, + 0.4548, -0.4821, -0.5241, 0.6643, -0.4297, 0.2593, -0.0437, 0.0028, + 0.1823, -0.0203, -0.2064, 0.2194, 0.4955, 0.0041, 0.5605, 0.6234, + -0.1054, -0.4684, -0.2235, 0.1850, 0.2052, 0.2630, 0.3708, -0.0098, + 0.1920, -0.1915, -0.2142, -0.1864, 0.5316, 0.2705, -0.0170, -0.1591, + 0.2824, 0.0184, -0.2010, 0.2990, 0.0938, 0.3728, 0.6740, 0.4613, + 0.3229, -0.0187, 0.2902, -1.1038, -0.1624, 0.6004, -0.5546, -1.5263, + -0.2350, 0.0903, -0.4732, 0.4322, -0.5486, 0.5330, 0.2533, 0.2336, + 0.1706, -0.3836, 0.1548, -0.0594, 0.1809, -0.0227, -0.2615, 0.2121, + 0.1297, 0.3730, -0.2365, 0.3757, 0.4399, -0.1251, -0.5659, 0.2555, + -0.7871, -0.4932, 0.2153, 0.0382, -0.0994, -0.0076, 0.2226, -0.5119, + 0.6038, -0.6071, -0.9637, 0.1321, -0.7474, -0.0215, -0.0357, 0.1232, + -0.7136, 0.3996, 0.9405, -0.0939, 0.4872, -0.3826, 0.5215, 0.5895, + 0.4666, 0.1541, -0.5932, 0.0385, 0.0506, 0.1357, 0.2811, 0.4319, + -0.0849, -0.3161, 0.0826, -0.3186, 0.2883, -0.1638, 0.5536, 0.2820, + 0.0268, 0.2919, 0.7081, -0.2793, 0.7435, -0.0363, -0.1690, -0.1719, + -0.5410, 1.0448, -0.5631, 0.1419, 0.5340, 0.5717, 0.1740, -0.3740, + 0.2532, -0.4504, -0.6378, -0.2206, -0.4109, -0.6917, -0.2142, 0.9643, + -0.3597, -0.1640, -0.7284, 0.1631, 0.1779, 0.1018, 0.4215, -0.4838, + 0.4528, 0.7088, -0.3826, -0.3808, -0.1254, -0.5218, 0.5777, -0.1802, + -0.4346, -0.4577, 0.0334, -0.2096, 0.1364, 0.3060, 0.0198, -0.8002, + 0.0443, -0.5895, -0.6670, -0.4631, -0.0998, 0.1485, 0.4129, -0.1068, + -0.0434, -0.2676, -0.7246, -0.2339, 0.2438, 0.2161, 0.5981, 0.5052, + -0.1812, -0.0142, -0.5303, -0.8499, -0.7728, 0.1444, -0.3332, -0.2925, + 0.3377, 0.4867, -0.3891, 0.4536, 0.9387, 0.4918, -0.0686, -0.3014, + -0.5999, 0.2228, -0.4177, 0.7356, -0.5696, -0.5074, 0.6764, 0.6888, + -0.2936, 0.0707, -0.0800, 0.5583, 0.2862, 0.4679, 0.6217, 0.6404], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[-0.0240, 0.0029, -0.0312, ..., 0.0232, 0.0232, -0.0007], + [-0.0009, 0.0125, -0.0428, ..., 0.0167, -0.0114, 0.0172], + [-0.0243, 0.0004, -0.0028, ..., -0.0064, 0.0121, 0.0166], + ..., + [ 0.0119, 0.0008, -0.0014, ..., -0.0109, 0.0003, -0.0192], + [-0.0027, -0.0135, 0.0034, ..., 0.0144, 0.0325, -0.0189], + [ 0.0063, 0.0089, -0.0012, ..., 0.0233, -0.0183, -0.0119]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.0339, 0.5952, -0.3469, ..., 0.0100, -0.0171, 0.0073], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 1.6983e-02, 1.5869e-02, 2.5711e-02, ..., 1.8282e-03, + -1.1787e-02, -2.0477e-02], + [-4.1723e-06, 4.6234e-03, -1.7273e-02, ..., -1.3374e-02, + -4.8447e-03, 3.5763e-03], + [-9.9716e-03, -4.3945e-03, 2.9068e-03, ..., 1.9684e-02, + -2.8351e-02, -2.8290e-02], + ..., + [-2.1042e-02, -2.2217e-02, 2.5452e-02, ..., -4.4417e-04, + 1.7960e-02, -1.9300e-04], + [ 6.3667e-03, -1.1711e-02, -3.2842e-05, ..., -1.2466e-02, + 2.6031e-02, 2.3913e-04], + [ 1.1337e-02, 1.0567e-02, 2.4395e-03, ..., -1.5053e-02, + -5.5428e-03, -7.4120e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-3.2745e-02, -3.7262e-02, 7.0618e-02, -4.9408e-02, -8.5632e-02, + -1.1737e-01, 2.8248e-03, 3.5278e-02, 2.7481e-02, -7.3914e-02, + 6.2103e-03, -1.5160e-02, 6.6101e-02, 2.4586e-03, 3.9703e-02, + 1.2024e-02, 8.2474e-03, -1.0178e-02, 2.8503e-02, -8.3923e-02, + -3.1860e-02, 5.1636e-02, 2.6596e-02, 1.4214e-02, 5.3749e-03, + 3.8635e-02, 9.4910e-03, 5.4169e-03, 2.8625e-02, 5.6244e-02, + 9.2773e-02, -3.4973e-02, -5.4443e-02, -1.7410e-02, -8.0322e-02, + -2.9312e-02, -4.7180e-02, 3.3905e-02, 8.2397e-02, -5.3833e-02, + 5.6610e-02, -3.3188e-03, 9.7198e-03, -6.4163e-03, -3.9764e-02, + -7.4097e-02, -3.4454e-02, -2.2781e-02, -6.9641e-02, -5.4474e-02, + -4.5288e-02, -8.2581e-02, -1.0016e-01, -8.6182e-02, -6.2744e-02, + 6.8542e-02, 7.1411e-02, -7.6782e-02, 6.6101e-02, 3.5004e-02, + 2.3087e-02, 1.2428e-02, 3.9154e-02, -2.8885e-02, 2.3026e-02, + 3.0975e-02, -6.9946e-02, -2.4078e-02, -7.0435e-02, -1.0736e-01, + 7.1960e-02, 3.1464e-02, -1.7651e-01, -5.1758e-02, -1.1955e-02, + -2.8366e-02, 1.2091e-01, -8.0032e-03, 2.8351e-02, 7.6599e-02, + -7.8278e-03, -2.9221e-02, -3.5126e-02, -6.3965e-02, -1.0217e-01, + -1.1749e-02, 2.8610e-02, -3.5492e-02, 1.8173e-02, -1.5427e-02, + -2.2491e-02, -7.9895e-02, -6.0577e-03, 1.0437e-01, 7.1030e-03, + -4.1779e-02, 2.0691e-02, -2.1744e-02, -4.5074e-02, 8.3557e-02, + -3.9795e-02, -6.3354e-02, -9.0454e-02, 1.1726e-02, -1.9026e-03, + 6.2561e-02, 5.7648e-02, -1.2598e-03, -3.3142e-02, 1.1192e-02, + -5.4993e-02, -6.8207e-03, -7.0251e-02, -3.7537e-03, -2.0264e-02, + -1.6495e-02, 5.8929e-02, 5.2734e-02, 2.5604e-02, 4.9591e-02, + 4.0833e-02, 7.2021e-02, 8.3618e-02, 5.5908e-02, 9.9365e-02, + -6.7810e-02, -4.8859e-02, 6.0913e-02, 4.6936e-02, 2.3026e-02, + 2.4643e-02, -2.7294e-03, -7.0000e-03, 2.8259e-02, -2.8000e-02, + 3.6743e-02, 3.1036e-02, -5.6335e-02, 1.1298e-01, -2.6901e-02, + 4.3297e-03, 5.1514e-02, 2.9175e-02, -3.7628e-02, -2.9716e-03, + -5.6122e-02, 4.5654e-02, -4.4189e-02, -7.1960e-02, -4.3121e-02, + -1.0902e-02, 1.4626e-02, 2.4857e-02, -5.3833e-02, -3.4943e-02, + 1.2573e-01, 4.2114e-02, -8.2397e-02, -2.7939e-02, -9.5062e-03, + 4.9072e-02, -3.5828e-02, -1.2123e-02, -4.6295e-02, 6.8604e-02, + -5.3902e-03, 6.6895e-02, 3.5706e-02, 5.4810e-02, -8.3984e-02, + 2.7725e-02, -9.0881e-02, -2.1469e-02, 1.2909e-02, 1.5402e-03, + -1.3535e-02, 1.6994e-03, 5.4443e-02, 5.9296e-02, -3.1952e-02, + -8.9783e-02, 1.1833e-02, 3.9948e-02, 8.2764e-02, 1.7273e-02, + 1.1322e-01, -1.2062e-02, -1.9678e-01, -8.5068e-03, 1.9958e-02, + -4.4250e-02, -9.0637e-03, 4.3640e-02, 1.1816e-01, 9.2712e-02, + -3.2440e-02, 1.2960e-03, -3.6407e-02, -1.6748e-01, 2.6276e-02, + 2.5833e-02, 1.8478e-02, 2.1027e-02, -5.5878e-02, -2.7237e-02, + -1.8600e-02, -7.0343e-03, 5.3101e-02, 7.2289e-03, -1.1633e-01, + 1.1024e-02, -1.3672e-02, -8.4778e-02, 4.2999e-02, 1.5900e-02, + -2.4460e-02, 2.7752e-03, -9.0820e-02, -5.1941e-02, 4.6051e-02, + 4.0619e-02, 2.6520e-02, 3.6804e-02, 5.4504e-02, 1.7334e-02, + 5.2551e-02, 3.5400e-02, 4.0680e-02, 5.0629e-02, 3.8849e-02, + -1.0114e-01, -1.7532e-02, -1.4519e-02, -4.5967e-03, -5.9601e-02, + 2.7451e-02, 6.6711e-02, -8.5388e-02, 5.1270e-02, 4.3106e-03, + -2.9434e-02, -2.7191e-02, -6.9580e-02, 2.4521e-02, -1.5854e-02, + -3.3295e-02, -5.2155e-02, 7.4463e-02, -8.1024e-03, 4.5990e-02, + -3.9856e-02, 7.2327e-02, -1.3824e-02, -3.5767e-02, -5.3375e-02, + -6.3354e-02, 5.9143e-02, -2.9785e-02, -5.1819e-02, -1.4191e-03, + -4.8309e-02, -2.7359e-02, 1.4809e-02, -4.0321e-03, 2.6428e-02, + 6.0196e-03, 1.8768e-02, -3.5309e-02, 5.8807e-02, -3.5065e-02, + -5.7404e-02, 1.9409e-02, -2.7237e-02, 2.7374e-02, 3.3627e-03, + -7.4646e-02, -2.2507e-02, -2.6672e-02, -2.2705e-02, 3.7537e-02, + -1.1487e-01, -8.6731e-02, 6.1951e-02, -1.8433e-02, -4.1595e-02, + -6.4888e-03, -2.1606e-02, -1.8829e-02, 1.8097e-02, -1.3634e-02, + -2.5406e-02, 4.0680e-02, -4.6448e-02, -6.1035e-02, 4.1901e-02, + -1.5297e-02, -2.0386e-02, -3.8208e-02, -3.4698e-02, -2.6306e-02, + 2.8915e-02, 2.8961e-02, -1.9180e-02, -2.0874e-02, 1.2062e-02, + 9.1248e-02, -7.3792e-02, 8.2581e-02, 2.2095e-02, 2.4109e-02, + 8.5297e-03, -3.9917e-02, 1.9516e-02, 5.9479e-02, 1.6575e-03, + -6.4392e-02, 5.5939e-02, 4.1351e-02, -4.5013e-02, 3.5065e-02, + -5.5939e-02, -3.6621e-02, 7.2388e-02, -1.1955e-02, 5.0537e-01, + -4.3221e-03, -5.9845e-02, 9.2468e-02, -2.0552e-04, 5.4535e-02, + 7.6447e-03, 1.6312e-02, -1.0315e-01, 1.3399e-03, 1.4111e-01, + -5.0690e-02, -7.7637e-02, 4.9408e-02, 1.8692e-02, 5.7106e-03, + 1.2610e-01, -1.7075e-02, -4.6783e-02, 8.6426e-02, -1.9379e-02, + 6.0516e-02, -6.6467e-02, 6.7383e-02, -2.3712e-02, 3.2959e-03, + 3.4363e-02, -2.7908e-02, -1.6464e-02, -4.3793e-02, -3.1555e-02, + 1.0529e-01, -2.0172e-02, -3.9612e-02, -1.1551e-02, 5.5664e-02, + -3.3905e-02, -1.6510e-02, 3.2928e-02, -4.0649e-02, 7.1716e-02, + -2.5253e-02, 3.1281e-02, -1.1154e-02, 1.2230e-02, -7.7515e-03, + -5.7953e-02, -9.7036e-04, 4.2023e-02, 1.6327e-02, 1.5778e-02, + 4.0436e-03, -2.2984e-03, 8.8959e-03, -7.6172e-02, -5.8380e-02, + -4.4250e-04, -6.6772e-02, -1.6479e-02, -1.1642e-02, 5.5298e-02, + -3.2532e-02, 6.6833e-03, -1.2039e-02, -2.9358e-02, 1.2466e-02, + 2.9068e-02, -4.7211e-02, 1.2427e-01, 2.3285e-02, 2.2766e-02, + -1.3580e-02, 4.3304e-02, -7.6965e-02, -2.4384e-02, 7.5874e-03, + -4.8157e-02, 6.9702e-02, -2.3193e-02, -7.1655e-02, 6.7078e-02, + -8.1253e-03, 2.9144e-02, 5.5771e-03, 1.5549e-02, 7.8735e-02, + -1.7288e-02, -1.9043e-02, 1.4229e-02, -2.3041e-02, 9.5337e-02, + -1.4673e-01, -2.5482e-02, 1.0269e-02, -3.0579e-02, 2.6459e-02, + 1.0797e-01, 1.5671e-02, 2.2675e-02, 5.3864e-02, -3.8116e-02, + 1.2083e-03, 4.2343e-04, 4.5586e-03, -4.0558e-02, 2.6489e-02, + -6.5002e-02, -1.5915e-02, -6.2683e-02, -4.7493e-03, 6.5430e-02, + -8.2779e-03, 3.7670e-04, 7.5928e-02, 5.5603e-02, 3.0899e-02, + 6.4636e-02, 4.4617e-02, -1.4450e-02, -3.3234e-02, 2.0126e-02, + 5.1842e-03, -2.2232e-02, 4.2847e-02, -5.9418e-02, 2.7069e-02, + 1.2884e-03, -3.8635e-02, -8.5678e-03, -3.1174e-02, -3.3691e-02, + 1.8463e-02, -3.8483e-02, -1.6953e-02, 1.5032e-04, 5.0415e-02, + -1.6312e-02, 2.1896e-03, 5.3482e-03, -2.6169e-03, 1.5961e-02, + 8.8989e-02, -8.9417e-02, 6.7322e-02, 4.4060e-03, -4.0680e-02, + 7.1655e-02, -7.9880e-03, 3.7415e-02, 1.4679e-02, 5.3253e-02, + -4.5166e-02, -6.3553e-03, -6.2622e-02, -1.0034e-01, -3.8055e-02, + 1.2070e-02, 5.5359e-02, 2.9800e-02, -4.4250e-02, 3.8483e-02, + 1.8158e-02, 2.4048e-02, -1.9745e-02, -4.2358e-02, 1.0504e-01, + -7.1289e-02, -4.8798e-02, 5.9113e-02, -1.9197e-03, 7.9346e-03, + -4.9042e-02, -1.8524e-02, -4.1199e-02, -4.6204e-02, -3.8452e-02, + 1.0345e-02, 9.5825e-02, -2.6993e-02, -1.4549e-02, 8.4534e-02, + -5.8889e-04, -9.0942e-02, -4.8757e-04, 1.6724e-02, -2.7725e-02, + -1.8555e-02, 5.5695e-02, -2.7573e-02, -5.7709e-02, -2.2324e-02, + 2.2247e-02, -6.0883e-02, -8.6365e-02, 1.7517e-02, 8.9539e-02, + 6.8359e-03, -4.1840e-02, -2.2354e-03, 5.0446e-02, -2.1027e-02, + -8.9966e-02, 3.4428e-03, 1.4275e-02, 6.3232e-02, 2.9099e-02, + 1.7456e-02, -3.3203e-02, 2.0004e-02, -3.6316e-03, -4.0131e-03, + -6.4331e-02, 1.0785e-01, 2.2873e-02, 3.7689e-02, 6.3843e-02, + 3.4332e-02, -5.2948e-02, -1.1115e-01, 6.5063e-02, 6.7810e-02, + 7.7576e-02, -1.8646e-02, 2.1606e-02, -4.2816e-02, 5.0446e-02, + 1.8005e-02, 3.1799e-02, 9.4849e-02, 1.8478e-02, -4.5135e-02, + 2.9434e-02, 3.4027e-02, -5.4230e-02, -1.2000e-01, -4.0192e-02, + -3.8055e-02, -1.1035e-01, -1.5144e-02, 6.3538e-02, -5.6824e-02, + -2.4490e-02, -2.3254e-02, -8.2642e-02, 7.3181e-02, -1.1963e-02, + -9.0881e-02, 7.8726e-04, -1.5526e-03, 1.6342e-02, 7.5806e-02, + 5.8746e-02, -2.7905e-03, -5.0781e-02, -3.6652e-02, 3.6438e-02, + 9.2957e-02, -1.8158e-02, -9.3323e-02, 4.7333e-02, 3.7048e-02, + -7.4463e-02, -3.2379e-02, -4.8004e-02, 4.8798e-02, -9.2041e-02, + 1.1208e-02, -1.8906e-02, 6.5491e-02, -1.1194e-01, 1.6312e-02, + 9.8228e-04, 4.9377e-02, 4.5258e-02, -9.7198e-03, -4.9973e-03, + 1.4687e-02, -1.0727e-02, -3.3112e-02, 1.3420e-02, -1.1926e-01, + -3.4210e-02, -2.6505e-02, -1.4172e-01, -4.8645e-02, 6.9214e-02, + -1.5572e-02, 2.7514e-04, -2.4918e-02, -8.4457e-03, -3.5706e-02, + 4.5898e-02, 1.9073e-02, 1.9043e-02, 2.5650e-02, 7.7454e-02, + 4.1199e-03, -2.7809e-03, -2.6291e-02, 1.1681e-02, 1.0777e-03, + 2.2415e-02, 7.7095e-03, 4.5471e-02, -3.2837e-02, 3.6530e-02, + 1.0117e-02, 7.2449e-02, -8.8272e-03, -6.4240e-03, -2.0645e-02, + -1.9745e-02, 1.2451e-01, -5.4504e-02, -2.4109e-02, -3.6652e-02, + -4.5776e-02, -1.9501e-02, -1.4977e-02, 3.8757e-02, -2.0264e-02, + 9.2468e-02, -3.8757e-02, -1.0120e-01, 4.2511e-02, 9.3384e-03, + -7.1228e-02, -6.3843e-02, -8.2764e-02, -1.5274e-02, 4.5837e-02, + 1.7807e-02, 1.1047e-02, -4.1313e-03, 3.1143e-02, -4.5074e-02, + 2.0096e-02, 2.8496e-03, 3.0777e-02, -5.9509e-02, -5.3680e-02, + -5.6610e-02, 6.8054e-02, -4.6730e-03, -4.0741e-02, -3.3478e-02, + -1.1200e-02, 1.2512e-02, 4.8859e-02, 1.0048e-02, 1.2789e-03, + 3.3203e-02, 8.2550e-03, 1.0597e-02, 6.1737e-02, -4.7577e-02, + 1.2741e-03, 3.1787e-01, -7.5928e-02, -9.5703e-02, 1.4748e-02, + 1.4664e-02, 8.2764e-02, 2.2106e-03, 1.1646e-01, 2.5482e-02, + -1.8402e-02, 3.6602e-03, 2.4826e-02, 9.9106e-03, -4.2023e-02, + -8.2092e-02, 3.7781e-02, 2.1667e-02, -3.5553e-02, 1.0400e-01, + 1.7120e-02, -2.5818e-02, -4.9164e-02, -1.6220e-02, -6.5269e-03, + 5.7251e-02, -9.5459e-02, 1.8341e-02, 2.6260e-02, 4.4739e-02, + 6.4941e-02, 3.4058e-02, -3.7292e-02, -3.1525e-02, 7.1838e-02, + 1.8677e-02, 9.6863e-02, -1.2825e-02, 2.3460e-03, 4.5654e-02, + -5.2612e-02, -2.2858e-02, 1.5305e-02, 2.5543e-02, -1.0818e-02, + 3.5782e-03, 7.5500e-02, 1.0168e-01, 2.2522e-02, -4.3976e-02, + -1.9531e-02, -1.2894e-02, -1.2756e-02, 1.8740e-03, -3.4237e-03, + 5.5573e-02, -5.2338e-02, 4.3152e-02, -1.2140e-01, -1.8387e-02, + 3.2928e-02, 4.8950e-02, 1.2146e-02, -5.9845e-02, 6.4087e-02, + -4.7821e-02, 7.9224e-02, 4.6906e-02, 9.7046e-02, 5.7953e-02, + 8.5510e-02, -1.3525e-01, 2.4994e-02, -7.3471e-03, 3.4424e-02, + -1.6235e-02, -2.4094e-02, -2.6352e-02, 6.3049e-02, 3.8872e-03, + 5.0720e-02, -1.5222e-01, 5.7251e-02, 2.0691e-02, 6.3416e-02, + 4.5654e-02, -1.1436e-02, -3.4546e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.7929, 1.7831, 1.7803, 1.6813, 1.6177, 1.8186, 1.8280, 1.8169, 1.8193, + 1.7491, 1.7959, 1.8259, 1.7240, 1.6950, 1.6551, 1.7475, 1.8325, 1.7874, + 1.7081, 0.4558, 1.7815, 1.8641, 1.7709, 1.7374, 1.8430, 1.7791, 1.7354, + 1.7626, 1.7691, 1.6661, 1.7468, 1.7535, 1.7837, 1.7498, 1.7474, 1.7921, + 1.8657, 1.7096, 1.7576, 1.6830, 1.7558, 1.8993, 1.7414, 1.7797, 1.7533, + 1.6189, 1.7345, 1.7020, 1.7244, 1.6815, 1.7499, 1.8150, 1.7153, 1.6953, + 1.7383, 1.6688, 1.8391, 1.6598, 1.7349, 1.7585, 1.6782, 1.7734, 1.7821, + 1.7409, 1.7198, 1.6855, 1.7529, 1.7155, 1.7341, 1.5297, 1.7421, 1.8686, + 1.8653, 1.7259, 1.7289, 1.6731, 1.7387, 1.6307, 1.7143, 1.6711, 1.7531, + 1.8033, 1.7714, 1.7676, 1.7775, 1.6657, 1.7763, 1.7440, 1.7064, 1.7384, + 1.7850, 1.7902, 1.7361, 1.8213, 1.7260, 1.6903, 1.8085, 1.7339, 1.7769, + 1.7721, 1.7588, 1.6839, 1.8513, 1.7149, 1.7672, 1.7700, 1.6876, 1.7124, + 1.7675, 1.8268, 1.7704, 1.7144, 1.7660, 1.8349, 1.7693, 1.6901, 1.7396, + 1.8073, 1.6787, 1.7993, 1.7948, 1.7033, 1.7233, 1.7223, 1.7224, 1.7772, + 1.6722, 1.8057, 1.8742, 1.6941, 1.7141, 1.7528, 1.7275, 1.7983, 1.7217, + 1.8105, 1.6726, 1.7739, 1.7665, 1.7696, 1.7423, 1.7597, 1.7595, 1.6767, + 1.7325, 1.7476, 1.7849, 1.7194, 1.6787, 1.7302, 1.7817, 1.7152, 1.7512, + 1.6956, 1.7467, 1.3401, 1.8498, 1.7573, 1.7962, 1.7595, 1.6933, 1.6367, + 1.7679, 1.6964, 1.7361, 1.7991, 1.7227, 1.7609, 1.7854, 1.7482, 1.7924, + 1.8307, 1.6455, 1.6848, 1.8324, 1.8348, 1.7826, 1.8397, 1.7152, 1.6917, + 1.7201, 1.7656, 1.8076, 1.8197, 1.7645, 1.6691, 1.7135, 2.2904, 1.7352, + 1.7467, 1.7217, 1.7181, 1.8016, 1.8424, 1.6587, 1.7916, 1.7563, 1.7790, + 2.8969, 1.7273, 1.7394, 1.7848, 1.7283, 1.7294, 1.7608, 1.7266, 1.7323, + 1.7652, 1.7301, 1.7450, 1.8608, 1.7369, 1.7490, 1.5925, 1.7045, 1.7451, + 1.7193, 1.7442, 1.6924, 1.7473, 1.6818, 1.7552, 1.6258, 1.7437, 1.7192, + 1.7460, 1.7766, 1.8368, 1.7696, 1.8426, 1.8348, 1.7678, 1.8167, 1.7304, + 1.7137, 1.7233, 1.8062, 1.8429, 1.7146, 1.8293, 1.8135, 1.7622, 1.8080, + 1.8045, 1.7830, 1.7069, 1.7667, 1.7784, 1.6708, 1.7645, 1.7546, 1.8052, + 1.7619, 1.7424, 1.8167, 1.7871, 1.7916, 1.7700, 1.7150, 1.7424, 1.7701, + 1.7658, 1.6809, 1.7777, 1.7157, 1.7050, 1.8382, 1.7815, 1.7690, 1.8156, + 1.7530, 1.6965, 1.7266, 1.6922, 1.7875, 1.6888, 1.6801, 1.6591, 1.7296, + 1.8412, 1.7243, 1.6172, 1.7628, 1.7759, 1.7321, 1.7935, 1.7681, 1.7590, + 1.6764, 1.6220, 1.7573, 1.7823, 1.7837, 1.8083, 1.7544, 1.7745, 1.6749, + 1.7069, 1.7159, 1.8097, 1.6928, 1.7173, 1.7915, 1.7135, 1.8118, 1.7447, + 1.7273, 1.6825, 1.7379, 1.8264, 1.7297, 1.7173, 1.7472, 1.8040, 1.7840, + 1.7809, 1.6736, 1.7975, 1.7601, 1.7554, 1.7233, 1.6839, 1.7200, 1.7189, + 1.1019, 1.6431, 1.7165, 1.7557, 1.6809, 1.7875, 1.6864, 1.8092, 1.7360, + 1.7681, 1.7731, 1.7921, 1.6934, 1.7466, 1.6807, 1.7739, 1.7474, 1.7377, + 1.7673, 1.7556, 1.7118, 1.7117, 1.7537, 1.7710, 1.7321, 1.7860, 1.6302, + 1.7879, 1.8190, 1.6993, 1.7643, 1.8275, 1.7474, 1.7175, 1.7749, 1.7524, + 1.6974, 1.7262, 1.8118, 1.7030, 1.7440, 1.7438, 1.7523, 1.7821, 1.7594, + 1.6938, 1.7808, 1.7905, 1.7734, 1.7000, 1.7035, 1.7412, 1.7406, 1.7781, + 1.7695, 1.7509, 1.7576, 1.7752, 1.7044, 1.7850, 1.7253, 1.6955, 1.7934, + 1.7814, 1.7614, 1.7061, 1.8297, 1.7617, 1.7969, 1.6922, 1.6822, 1.7527, + 1.7551, 1.8767, 1.7740, 1.8746, 1.7875, 1.8527, 1.7601, 1.7039, 1.7222, + 1.7680, 1.6989, 1.7715, 1.7485, 1.7254, 1.6715, 1.7374, 1.6857, 1.6731, + 1.5084, 2.5756, 1.8057, 1.7862, 1.6845, 1.7544, 1.7355, 1.7684, 1.7092, + 1.8210, 1.7644, 1.7506, 1.7032, 1.8150, 1.7363, 1.7353, 1.8017, 1.7049, + 1.7996, 1.7475, 1.7090, 1.7468, 1.7934, 1.7622, 1.7136, 1.7332, 1.6697, + 1.8075, 1.7316, 1.5067, 1.7573, 1.7962, 1.7551, 1.7285, 1.7033, 1.7589, + 1.7889, 1.7553, 1.8125, 1.8115, 1.6954, 1.8001, 1.7614, 1.7621, 1.7395, + 1.7802, 1.7583, 1.8117, 1.6605, 1.7690, 1.6949, 1.7969, 1.7511, 1.7341, + 1.7781, 1.7734, 1.7195, 1.7107, 1.7229, 1.8224, 1.7728, 1.8050, 1.7612, + 1.7779, 1.8357, 1.6278, 1.7457, 1.6289, 1.8321, 1.4774, 1.7665, 1.6529, + 1.6894, 1.6457, 1.7476, 1.7246, 1.7836, 1.7473, 1.7166, 1.8502, 1.7336, + 1.7778, 1.7636, 1.7954, 1.6843, 1.6976, 1.7811, 1.7412, 1.7866, 1.8105, + 1.7974, 1.7285, 1.7500, 1.7821, 1.7113, 1.6886, 1.7415, 1.8403, 1.7683, + 1.7202, 1.7977, 1.7161, 1.7669, 1.7054, 1.6233, 1.7726, 1.7435, 1.7252, + 1.7905, 1.7461, 1.6852, 1.7706, 1.7498, 1.7314, 1.7612, 1.7688, 1.8387, + 1.7507, 1.7780, 1.7521, 1.7521, 1.6886, 1.6748, 1.6639, 1.7260, 1.8651, + 1.8324, 1.8109, 1.7392, 1.7163, 1.7573, 1.8032, 1.7246, 1.7790, 1.8149, + 1.8411, 1.7696, 1.7417, 1.7305, 1.8165, 1.7831, 1.6465, 1.7257, 1.6999, + 1.7752, 1.7095, 1.8007, 1.5871, 1.7589, 1.7477, 1.7404, 1.7618, 1.7613, + 1.7768, 1.7874, 1.7526, 1.7289, 1.7907, 1.7646, 1.7130, 1.6932, 1.7876, + 1.7574, 1.7329, 1.7561, 1.6575, 1.6210, 1.7973, 1.8189, 1.8263, 1.7098, + 1.8421, 1.7957, 1.7129, 1.7902, 1.7544, 1.7874, 1.8462, 1.6938, 1.7722, + 1.7217, 1.6570, 1.6927, 1.8389, 1.7193, 1.6345, 1.8630, 1.7586, 1.7213, + 1.7517, 1.6532, 1.7951, 1.8013, 1.6195, 1.7767, 1.7708, 1.8667, 1.7844, + 1.6620, 1.7018, 1.8073, 1.7122, 1.6877, 1.8080, 1.8439, 1.7730, 1.7713, + 1.7360, 1.7293, 1.7934, 1.8518, 1.7278, 1.7761, 1.7626, 1.7324, 1.7621, + 1.7627, 1.7576, 1.7272, 1.7169, 1.7375, 1.7508, 1.7859, 1.8297, 1.7494, + 1.7099, 1.7173, 1.7456, 1.6725, 1.7380, 1.5079, 1.7439, 1.7492, 1.7837, + 1.7155, 1.7384, 1.8116, 1.7994, 1.7162, 1.7863, 1.8267, 1.8537, 1.6841, + 1.7267, 1.7238, 1.6594, 1.6171, 1.7859, 1.7840, 1.7119, 1.7442, 1.8366, + 1.7012, 1.6456, 1.8376, 1.8048, 1.7586, 1.7052, 1.7953, 1.6071, 1.7094, + 1.6981, 1.8010, 1.7339, 1.7359, 1.7767, 1.7557, 0.3923, 1.7237, 1.7051, + 1.8097, 1.6661, 1.7293, 1.7843, 1.8136, 1.7692, 1.6809, 1.7752, 1.7953, + 1.7330, 1.7169, 1.7330, 1.7858, 1.7055, 1.7001, 1.7556, 1.7170, 1.7452, + 1.7962, 1.7701, 1.7458, 1.6952, 1.6571, 1.7019, 1.6985, 1.7858, 1.7325, + 1.8441, 1.7544, 1.7539, 1.7416, 1.7197, 1.7388, 1.7254, 1.7573, 1.6840, + 1.6938, 1.7749, 1.8030, 1.7458, 1.7379, 1.7583, 1.5081, 1.7613, 1.8099, + 1.8687, 1.7221, 1.8274, 1.7241, 1.8184, 1.6901, 1.8262, 1.7127, 1.7925, + 1.6913, 1.7442, 1.7687, 1.7329, 1.7385, 1.7035, 1.7806, 1.7704, 1.8107, + 1.7890, 1.7229, 1.7178, 1.7381, 1.8056, 1.6509, 1.8329, 1.7378, 1.7336, + 1.7829, 1.6570, 1.6778, 1.7194, 1.8111, 1.7018, 1.6994, 1.6840, 1.7387, + 1.7542, 1.6884, 1.7213], device='cuda:1', requires_grad=True)Parameter containing: +tensor([ 1.0949e-01, 1.2029e-01, -2.2643e-01, 1.3633e-02, 7.7175e-02, + 2.0759e-01, 1.2732e-01, 5.3440e-02, -1.8767e-01, -3.7663e-03, + 5.5563e-02, -4.0936e-02, -1.8914e-01, -4.9850e-02, -8.7422e-02, + -8.5198e-02, -3.2767e-02, -3.9917e-02, -1.4945e-02, 5.4149e+00, + -4.7396e-02, 8.8626e-02, -2.3334e-02, -1.2182e-02, -4.0219e-02, + -4.6954e-02, 5.5812e-02, 2.6243e-02, 2.9270e-02, -7.4899e-02, + 6.1470e-02, 3.6407e-02, 1.3643e-02, 1.1972e-01, 9.5888e-02, + -9.1053e-02, 1.0808e-02, -6.1807e-02, -1.2643e-01, 1.0759e-01, + 7.5769e-02, 3.0389e-02, 1.9903e-02, 2.0498e-02, -2.8517e-02, + -3.5988e-03, 3.3271e-02, -2.9117e-02, 6.7151e-02, 5.6767e-02, + 2.7080e-03, 1.0900e-02, 3.1927e-02, 1.3691e-02, 5.4312e-02, + 1.1228e-01, -1.0770e-01, -8.9129e-02, 8.0802e-02, 2.2251e-02, + 9.6828e-02, -7.9053e-02, 4.6662e-02, 2.6232e-02, 8.6675e-02, + 7.8493e-03, 1.2616e-01, 1.0770e-01, -4.4169e-02, 1.6066e-01, + 5.3667e-02, 1.1023e-01, 7.7716e-01, 8.7449e-02, -1.5993e-01, + 6.4785e-02, 7.7663e-02, -1.3849e-01, -2.6414e-02, 1.5254e-02, + -3.1125e-03, -5.5026e-02, -9.6868e-03, 1.4584e-01, 1.1902e-02, + 1.0312e-02, 1.2705e-01, 1.0381e-01, -2.9179e-02, 2.8543e-02, + -1.6863e-02, 4.4634e-02, 3.5224e-02, -5.3106e-02, -8.9368e-02, + 2.2416e-02, 2.2251e-02, -8.7668e-02, -1.0003e-01, 1.3118e-02, + 3.7356e-02, -4.3921e-02, 1.0691e-02, -8.1180e-02, 8.0626e-02, + 1.2307e-02, 3.7298e-02, -2.7534e-02, -2.6614e-02, -6.7037e-02, + -7.2763e-02, -1.8265e-02, -1.0384e-02, 9.7256e-02, 1.2837e-01, + -3.4833e-02, -3.0022e-02, -5.6598e-02, 5.1160e-02, 1.4362e-01, + -3.5597e-02, -2.8443e-01, -1.3226e-01, -6.6577e-02, -1.5931e-01, + -3.4211e-02, -1.8955e-01, 7.5645e-03, 7.7564e-03, -5.1944e-02, + -1.0860e-01, 7.3288e-02, -6.2435e-02, -1.8061e-02, -7.4753e-02, + 7.2062e-02, -8.5082e-03, -4.6290e-02, -7.0267e-02, 5.0560e-04, + 2.1024e-03, 3.1026e-02, 5.5825e-02, 2.2368e-02, 6.0203e-02, + -3.7331e-02, -9.3823e-03, 1.2192e-02, -6.6988e-02, 1.6273e-01, + 2.5527e-01, -9.1718e-02, -1.5925e-01, 3.0293e-02, -8.7519e-02, + -4.5639e-01, -6.0476e-02, -2.3655e-02, 3.3486e-02, 2.2417e-02, + 1.9185e-01, -1.7733e-01, -3.4288e-02, 3.0369e-02, -2.2127e-03, + 3.9785e-02, -1.1005e-01, 8.2197e-02, -5.6468e-02, 3.7779e-02, + -4.6349e-02, 8.3061e-02, 1.7083e-03, 3.6090e-02, 6.6748e-02, + -1.3850e-01, -8.5291e-03, 3.9900e-02, -2.8865e-02, 5.9780e-02, + 1.6062e-03, -1.1419e-01, 7.5935e-02, 1.8732e-02, 2.3851e-02, + -1.9683e-01, 1.9399e-02, 5.7028e-01, -1.1578e-02, -5.2761e-02, + 2.9288e-02, -7.8124e-02, 7.4517e-02, 1.0981e-01, -6.1710e-02, + 3.0429e-02, -3.0580e-02, -1.2169e-01, 1.4486e-01, 2.7204e-02, + -2.0147e-01, -5.2343e-02, 1.5732e-02, 9.6297e-02, 4.9667e-02, + 1.5123e-01, -8.8324e-02, 1.2086e-01, 1.1278e-01, 9.8894e-04, + 1.0845e-02, 2.4921e-01, -1.5126e-03, -2.4564e-02, -1.0051e-01, + -1.0166e-02, -6.3747e-02, 4.1885e-02, -1.2753e-01, -2.2882e-01, + -1.2265e-02, -5.5138e-02, 2.3936e-02, -1.1381e-02, 3.2840e-02, + -7.2174e-02, 9.4510e-02, -3.3043e-02, 1.1032e-01, -4.9055e-02, + 1.7022e-03, 4.9647e-02, 1.1223e-02, 9.7754e-02, 9.0599e-03, + 1.6349e-02, -1.0698e-01, 7.6059e-02, 7.9308e-02, 9.7373e-03, + 9.2770e-02, 1.4207e-01, -4.8836e-03, 4.7935e-02, 6.4542e-02, + 3.0055e-02, 1.2875e-01, -4.6859e-02, 4.9863e-02, 1.4377e-03, + 1.5789e-02, 1.4662e-02, 1.2451e-01, -1.0362e-01, 1.4963e-01, + -2.6775e-02, -4.0095e-02, -1.1525e-01, 9.7913e-03, 3.2486e-02, + -1.0553e-01, -3.9868e-02, -1.9666e-02, -6.5488e-02, -5.8675e-02, + 2.3672e-02, -6.1824e-02, -3.4578e-03, -7.2933e-02, -2.7511e-02, + 5.2952e-03, 6.9479e-02, 1.0031e-01, -1.5548e-01, -1.5024e-02, + 7.7628e-02, -8.3099e-02, 2.1916e-02, 9.7977e-02, -1.0470e-01, + 1.8839e-01, 1.2992e-01, -3.9989e-02, 3.8404e-02, 2.1818e-02, + 3.8327e-02, 5.1547e-02, -5.4093e-02, 1.4502e-01, -1.3062e-01, + -5.9733e-02, -1.4898e-02, 1.5852e-01, 6.7279e-02, 9.2137e-02, + -1.3338e-02, 1.8929e-01, 1.5181e-02, 6.7185e-02, 5.8950e-02, + 5.9982e-02, 4.4719e-02, -5.1546e-02, 1.4082e-01, -1.4495e-02, + -1.1608e-02, 9.2850e-02, -8.2966e-02, 9.0176e-02, -5.3713e-02, + 4.5182e-02, 1.0756e-02, -8.7488e-02, -1.2639e-01, -1.3084e-03, + -7.5066e-02, 2.1022e-02, 2.8497e-02, 1.8901e-01, -4.0666e-02, + -7.9830e-02, 2.6010e-01, -6.3069e-02, -5.6760e-02, 2.3198e-01, + -2.7823e-01, 3.6469e-02, -2.2526e-02, 8.1990e-02, -1.8918e-01, + 2.0363e-01, -5.4943e-02, 1.3450e-01, -5.3811e-02, 5.4539e-02, + 9.9996e-02, -2.9484e-02, -3.2027e-02, 3.0322e-02, -5.0587e-02, + -1.2371e-01, 2.7864e-02, -7.2747e-02, -4.5982e-02, 6.4722e-02, + 2.5427e-03, 1.0404e-01, 1.2117e-01, 5.1235e-02, -2.6081e-02, + -7.1022e-02, 7.9909e-02, 4.0153e-03, 1.3626e-01, -6.4602e-02, + -8.0432e-02, -5.1698e-02, -2.0403e-03, -2.4301e-02, -9.1350e-02, + -2.8840e-03, -1.5332e-02, -3.6093e-02, -9.8040e-02, -5.1894e-03, + -2.3653e-02, 7.5881e-02, 1.7702e-01, 9.3416e-02, 7.4547e-02, + 1.1536e-01, -3.9263e-03, -5.5926e-02, -4.2878e-02, -1.3414e-02, + 1.5467e-02, 1.1647e-01, 1.1761e-01, -2.0235e-03, -2.0868e-02, + -1.2706e-01, 1.3958e-01, 3.3958e-02, -2.4072e-02, -8.1018e-02, + -2.9652e-02, -3.1342e-02, -5.6468e-02, -1.2003e-01, -6.9973e-02, + -2.3683e-03, 1.0191e-02, -6.2477e-02, -3.2790e-02, 3.2910e-02, + -3.8474e-04, 7.6801e-03, 5.5080e-02, -3.7293e-02, 3.7632e-02, + 1.2180e-01, -6.6580e-02, 8.8961e-02, 2.3944e-02, -1.2642e-01, + -1.5582e-01, -3.5932e-02, 5.6498e-02, 1.3303e-02, -5.9329e-02, + -9.1585e-02, 7.7481e-02, 1.0074e-01, -1.0729e-01, 2.9733e-03, + -1.9608e+00, -1.7896e-02, 1.6667e-02, -6.7317e-02, -4.6438e-02, + -2.0567e-02, -5.6402e-03, 4.9861e-02, -6.5507e-02, -4.4573e-02, + 4.8462e-02, -1.7947e-01, -1.2917e-01, 1.0376e-01, 4.6402e-03, + -3.8884e-02, -5.6493e-02, 4.4177e-02, 1.3340e-01, 1.6811e-01, + 2.6634e-03, 1.9004e-02, 7.5964e-02, 1.8048e-01, -1.0555e-01, + -2.6694e-02, -1.1950e-01, -7.9573e-02, 1.3374e-02, 1.5681e-01, + -1.3325e-01, -1.1418e-02, -6.1896e-02, 1.3991e-01, -2.9282e-03, + -3.5584e-03, -1.6388e-02, -1.1436e-01, 6.5513e-02, 4.1263e-02, + 3.0082e-02, 3.3306e-02, -1.1155e-02, -5.5836e-02, -1.1409e-01, + -1.0892e-02, 9.0167e-02, -1.6112e-01, 6.5796e-02, -1.1796e-02, + -5.7307e-02, 7.6839e-02, -9.1151e-02, 1.2946e-01, 3.8997e-03, + -3.1941e-02, -1.0808e-01, -8.4099e-02, 5.5579e-03, -5.9969e-02, + 6.1003e-02, 1.6967e-01, 4.3307e-02, 1.5222e-02, 1.2151e-01, + 1.2265e-01, -1.1868e-01, 5.6359e-02, 1.5663e-01, -8.6873e-02, + -2.5422e-02, -4.0970e-02, 1.4281e-01, 9.8614e-02, -4.6476e-02, + 1.9954e-02, 5.2306e-03, -5.3084e-02, -4.3791e-02, -1.4934e-01, + 3.4870e-02, -7.6011e-02, 1.2525e-01, -5.5468e-02, 7.1483e-02, + -9.6414e-03, -1.3096e-01, 4.0228e-02, -2.6196e-02, -8.0135e-02, + 3.0319e-02, 1.4976e-01, 7.4810e-02, 3.7500e-02, -2.3439e-02, + -8.4951e-02, -3.1313e-02, -1.4224e-01, 4.9112e-02, -5.7134e-02, + -1.7928e-02, 7.7955e-02, 1.6979e-01, 1.1908e-01, -1.5242e-01, + -7.3433e-02, 3.9254e-03, 9.9303e-02, -3.5989e-02, -4.6880e-02, + -7.3148e-02, 2.5752e-02, -2.4774e-02, 4.5360e-02, -8.7025e-02, + 2.4669e-03, -1.3410e-02, 1.1635e-02, -6.7339e-02, -2.3523e-02, + 1.1249e-02, -2.1098e-02, 5.2816e-03, -7.8247e-02, -8.7813e-02, + 2.1990e-03, 5.2155e-02, -2.4094e-02, 1.0330e-01, 1.7441e-02, + 5.9195e-02, 1.4813e-01, 1.1106e-02, 5.9486e-02, 2.5604e-02, + -2.0879e-02, -3.8578e-02, -1.1124e-01, 9.3536e-02, -4.4539e-03, + -1.8219e-02, -2.0443e-02, -4.9660e-02, -1.0606e-02, 1.4319e-01, + -3.8835e-02, 1.8643e-01, 3.4203e-02, 5.8226e-02, 1.4025e-01, + 9.2013e-02, 6.8444e-03, -2.5568e-02, 3.9318e-02, 5.1420e-02, + 9.7254e-02, 9.0392e-03, -5.4230e-02, -2.1404e-02, 8.8912e-02, + 6.4720e-02, 1.2284e-02, 4.4045e-02, 2.5807e-03, -2.5942e-02, + -1.3710e-01, -1.3367e-02, 7.5493e-02, 1.1875e-01, 8.1141e-02, + -1.2936e-02, 4.7390e-02, -1.1134e-01, 9.6089e-02, 6.4378e-02, + -1.8029e-02, 8.4028e-02, 1.4069e-01, -1.7390e-02, 2.6406e-02, + -7.9666e-03, 5.2227e-02, 3.9486e-02, -9.2852e-03, 7.3538e-02, + -7.3171e-02, -1.9590e-01, -1.7776e-02, -4.0482e-02, 1.5421e-02, + -4.6823e-02, 2.3603e-02, -7.3262e-03, 8.0283e-02, -3.5043e-02, + 1.1719e-01, 1.1037e-01, -5.6377e-02, -3.4377e-03, -4.8010e-02, + -1.2813e-01, 2.4231e-02, 1.1043e-01, 5.4121e-02, -7.6396e-02, + 8.8454e-02, 4.3982e-02, 7.0963e-02, 8.3958e-03, 5.9537e-02, + -6.3115e-02, -6.1581e-02, 4.2345e-02, 1.0158e-01, -6.7802e-02, + 1.0960e-01, -3.4674e-02, 1.2936e-01, 1.9522e-01, 1.0823e-01, + 5.2180e-02, -9.0325e-02, 5.8393e-03, 8.2930e-02, -8.4221e-02, + 1.7008e-03, -7.9800e-02, 7.3705e-02, -8.1106e-02, 5.6668e-02, + 3.2226e-02, 6.6968e-03, 1.3006e-01, 4.6790e-02, 2.0633e-01, + 1.8214e-01, 1.8984e-04, -5.6160e-02, 1.1562e-01, -6.7184e-02, + 5.9152e-02, -1.1576e-01, -9.8722e-04, 7.8000e-02, 1.0290e-01, + -4.5230e-02, 9.1396e-03, -7.5262e-02, 1.1895e-02, 1.1374e-01, + 1.6541e-02, -1.4464e-01, -1.6592e-01, 6.0472e-03, 4.7818e-04, + -2.0584e-01, 5.2593e-02, 3.0140e-02, 1.3068e-02, 8.7231e-03, + 1.0776e-02, -3.1502e-02, -2.4943e-02, -5.4088e-03, 1.0589e-01, + -2.9905e-02, -5.0469e+00, -3.2722e-02, 4.9282e-02, -2.2213e-02, + -1.1515e-01, -4.1087e-02, -8.2253e-02, -5.4938e-02, 4.3435e-02, + -4.4115e-02, -2.0652e-02, -5.0619e-02, -3.0088e-02, -6.0237e-02, + 1.7771e-01, 3.4522e-03, -7.0164e-02, 1.7609e-02, -8.1784e-02, + 9.7742e-02, 8.6289e-02, 5.3136e-03, 1.3217e-02, -1.8450e-01, + -2.3459e-02, 9.8487e-02, 1.8942e-02, -2.4920e-02, 6.7617e-02, + -3.2965e-02, 7.1420e-02, 8.1317e-02, 1.9588e-02, 4.7272e-03, + -4.8509e-02, 3.8701e-03, 1.6512e-02, -7.8775e-02, -6.9517e-02, + 1.2679e-04, 3.3200e-03, 6.5191e-03, -1.9788e-01, 8.8512e-02, + -4.9999e-03, -2.4801e-01, 1.4648e-01, 3.9599e-02, 1.3425e-01, + 8.5676e-02, 8.7498e-02, 1.0170e-01, 9.6648e-02, 1.2441e-02, + 4.7502e-02, 7.0925e-03, 1.0906e-02, 9.9315e-03, -8.5651e-02, + 1.0266e-01, -1.5819e-01, -1.1354e-01, 8.0533e-02, -1.7047e-02, + -1.7048e-02, -1.7593e-01, 8.0783e-03, 3.7989e-02, -4.0361e-02, + -2.3227e-02, -2.0796e-02, -1.1312e-01, -4.6048e-02, 9.6961e-02, + 4.7441e-02, 7.9526e-02, 8.1258e-02, 2.5623e-02, -3.5423e-02, + -2.2783e-02, 6.6940e-02, 5.3477e-02, -1.4346e-01, -9.9305e-02, + -5.6176e-02, 1.2165e-01, 1.0727e-02], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[-0.0076, 0.0140, -0.0179, ..., -0.0190, -0.0001, 0.0083], + [-0.0082, -0.0062, -0.0340, ..., 0.0008, 0.0212, -0.0008], + [-0.0030, -0.0002, 0.0044, ..., 0.0062, -0.0090, 0.0150], + ..., + [-0.0003, 0.0026, -0.0107, ..., 0.0015, 0.0198, 0.0079], + [-0.0139, 0.0263, 0.0082, ..., -0.0135, 0.0330, -0.0161], + [ 0.0057, 0.0020, -0.0367, ..., 0.0184, -0.0093, 0.0296]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.2454, -0.3401, -0.3860, ..., -0.3416, -0.3689, -0.1425], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0122, 0.0114, 0.0196, ..., 0.0045, 0.0157, 0.0007], + [ 0.0119, 0.0089, -0.0186, ..., -0.0101, -0.0171, -0.0015], + [ 0.0058, -0.0237, 0.0086, ..., -0.0162, 0.0190, -0.0067], + ..., + [-0.0058, 0.0016, 0.0089, ..., -0.0329, -0.0035, 0.0069], + [-0.0245, 0.0052, 0.0177, ..., -0.0051, 0.0200, 0.0037], + [ 0.0006, -0.0077, 0.0039, ..., 0.0064, 0.0152, -0.0266]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 2.1591e-02, -3.8971e-02, 4.7058e-02, -3.9276e-02, -4.8645e-02, + -4.8645e-02, 6.9092e-02, 5.2643e-02, 5.4443e-02, -3.2440e-02, + 5.1117e-02, 1.7868e-02, -7.8888e-03, -3.6774e-02, -4.2305e-03, + 2.9739e-02, 6.4621e-03, -1.9791e-02, -2.1191e-03, -3.9185e-01, + -8.0811e-02, 5.4291e-02, 4.3640e-02, -1.6708e-02, 3.3295e-02, + 5.2307e-02, 1.1772e-02, -5.2429e-02, -1.8356e-02, 7.1472e-02, + 9.4421e-02, -4.7646e-03, -8.4412e-02, 5.1117e-02, -2.5604e-02, + -1.0460e-02, -8.0505e-02, 2.1240e-02, 1.8341e-02, 5.2277e-02, + 4.8065e-02, 5.0507e-02, 2.8397e-02, 3.0899e-02, 7.3051e-03, + -2.5421e-02, 6.0455e-02, -1.6251e-02, -5.4962e-02, 9.8228e-04, + -1.8143e-02, -4.0802e-02, -8.2825e-02, -9.6558e-02, 1.0406e-02, + 3.6896e-02, -2.7069e-02, -6.9092e-02, 2.9907e-02, 3.8574e-02, + 5.0781e-02, 3.7193e-03, 1.0065e-01, 7.6485e-03, -3.1614e-04, + 4.6722e-02, -3.7933e-02, -7.7271e-02, -1.4732e-02, -2.0538e-02, + 7.1045e-02, -4.4556e-03, 3.7109e-02, 3.2104e-02, 1.3588e-02, + -1.7609e-02, 4.4006e-02, 1.4793e-02, 1.4610e-02, 1.1719e-01, + -2.8477e-03, 5.4283e-03, -5.0842e-02, 1.1711e-02, -7.9773e-02, + -9.3384e-02, 4.7394e-02, -3.4668e-02, -2.8610e-02, -2.3880e-02, + -1.7273e-02, -6.7444e-02, 7.4720e-04, 1.6260e-01, -1.2970e-04, + -3.3356e-02, -4.6936e-02, -1.5274e-02, -8.2336e-02, 4.1718e-02, + 6.7101e-03, -9.7961e-02, -4.6692e-02, -1.5358e-02, 2.2247e-02, + 5.7373e-02, 7.7820e-02, -9.5459e-02, 3.0365e-02, -7.7393e-02, + 2.0950e-02, -1.5511e-02, -9.0942e-02, 1.4183e-02, -1.4450e-02, + -2.0618e-03, 6.0455e-02, 5.1392e-02, -3.3203e-02, 3.6133e-02, + 4.1931e-02, -3.7567e-02, 2.7359e-02, 6.7902e-03, 5.8258e-02, + 8.9188e-03, -7.5439e-02, 6.3904e-02, 2.8931e-02, 1.5747e-02, + -1.0536e-02, -3.3691e-02, -2.4719e-02, -2.8183e-02, -5.4565e-02, + 8.6792e-02, -2.8625e-02, 1.1589e-02, 7.1777e-02, -3.2867e-02, + -2.1286e-02, 4.6967e-02, 3.5553e-02, -4.9530e-02, 2.1683e-02, + -7.2815e-02, 2.3666e-02, -3.8696e-02, -1.0870e-01, 4.0802e-02, + 9.8694e-02, -6.8665e-02, -2.6184e-02, -7.4402e-02, -3.1311e-02, + 2.8305e-02, 7.9651e-02, 1.1002e-02, -2.2602e-03, 3.6168e-04, + 6.1066e-02, 1.2016e-02, -5.0720e-02, -3.1776e-03, 5.1636e-02, + 3.5950e-02, 6.2805e-02, 7.7362e-03, 9.9976e-02, -4.2847e-02, + 5.1308e-03, -3.3020e-02, -1.3115e-02, 2.6184e-02, -5.9052e-02, + -1.1816e-01, -7.5073e-02, -1.9012e-02, 5.0598e-02, 8.6010e-05, + -1.3525e-01, 3.9940e-03, 1.1029e-01, 1.4124e-01, 3.8116e-02, + 1.7729e-03, -2.2736e-02, 1.1194e-01, -3.6488e-03, -2.0081e-02, + -1.1652e-01, -5.5634e-02, 7.7148e-02, 2.6855e-02, 3.7354e-02, + 1.4557e-02, 1.2703e-02, -8.2397e-02, 5.5847e-02, 1.1093e-02, + -1.5022e-02, 2.6810e-02, 1.7426e-02, -5.6915e-02, 1.6739e-02, + 9.7198e-03, -9.5062e-03, 5.3375e-02, 9.7122e-03, -3.4241e-02, + -3.4199e-03, -4.3671e-02, -4.1565e-02, -1.0147e-02, 4.3762e-02, + -2.2110e-02, 5.1193e-03, -3.9307e-02, -7.7087e-02, 7.2212e-03, + 6.0150e-02, 2.0355e-02, 2.2690e-02, 5.6854e-02, 1.1612e-02, + 5.4535e-02, -7.8354e-03, 7.3181e-02, 7.7148e-02, -2.3209e-02, + -4.7760e-02, -1.6113e-02, 8.5678e-03, -4.8462e-02, -6.6772e-02, + 1.0094e-02, -6.7017e-02, -2.6184e-02, 6.4880e-02, 4.4769e-02, + 7.5302e-03, 1.7288e-02, 9.0561e-03, -6.2065e-03, -4.1870e-02, + -4.5105e-02, -7.7942e-02, 6.5125e-02, -3.1433e-02, 5.7190e-02, + -5.0293e-02, -6.7329e-03, -7.7858e-03, -1.2352e-02, -1.3573e-02, + -6.3599e-02, 2.1515e-02, -2.0935e-02, -3.9490e-02, -1.0811e-02, + -1.2524e-01, -2.2247e-02, 9.7046e-03, 2.3155e-03, -5.7465e-02, + -3.8879e-02, -1.4923e-02, 3.6652e-02, 7.2083e-02, -3.6194e-02, + 3.0396e-02, -5.0415e-02, 1.0468e-02, 8.6670e-03, 6.9847e-03, + -6.4964e-03, -6.5552e-02, 1.2146e-02, 9.1858e-03, -2.9999e-02, + -3.7506e-02, 1.2749e-02, -2.3972e-02, 7.6782e-02, -2.5894e-02, + -1.9989e-02, -8.8623e-02, -1.8707e-02, 1.0590e-01, -5.0079e-02, + -3.5248e-02, 4.0253e-02, 1.7792e-02, -6.2683e-02, 6.1218e-02, + 8.9645e-03, 1.0077e-01, -1.9867e-02, -4.0985e-02, -4.0771e-02, + -3.0350e-02, 4.6906e-02, 1.2341e-03, -3.4363e-02, 5.5939e-02, + 8.1238e-02, -9.4177e-02, 3.7079e-02, 1.4252e-02, -5.8746e-03, + 3.8757e-02, -3.4882e-02, 2.1118e-02, 3.4027e-02, -2.6886e-02, + -2.7466e-02, 3.1372e-02, 7.7759e-02, 5.7251e-02, 1.0223e-01, + -6.7627e-02, 6.8398e-03, -1.3626e-02, -2.0180e-03, 1.2168e+00, + -1.6504e-01, -5.4413e-02, 4.6722e-02, 8.7128e-03, -3.4760e-02, + 2.0538e-02, -2.0340e-02, -3.0212e-02, -2.3682e-02, 1.9055e-01, + -3.7689e-02, -5.8289e-03, 9.5215e-02, -2.2141e-02, -2.5253e-02, + 6.0394e-02, -1.8066e-02, -5.6946e-02, -2.8458e-03, -2.0309e-02, + 9.6313e-02, 6.1531e-03, 2.2415e-02, -1.0199e-01, -4.9469e-02, + 3.2684e-02, -6.6895e-02, 1.3893e-02, -8.1909e-02, -3.3569e-02, + 5.0507e-02, -2.1790e-02, -3.6224e-02, -2.2003e-02, 8.4595e-02, + -3.1860e-02, -4.8798e-02, -7.1030e-03, -7.2327e-02, 7.0862e-02, + -3.1677e-02, -6.7017e-02, 3.7537e-02, 5.0507e-02, 1.7990e-02, + -2.3697e-02, 6.3110e-02, 1.3781e-03, -4.6692e-02, -6.9763e-02, + 7.8964e-03, 4.9042e-02, 1.7944e-02, -4.6997e-02, 1.9745e-02, + -9.0332e-03, -1.8921e-02, -5.5573e-02, 5.3062e-03, -1.4740e-02, + -6.3232e-02, 2.2827e-02, 3.0563e-02, 1.1780e-02, 7.3776e-03, + 1.7517e-02, 4.5319e-02, 8.3435e-02, -2.1545e-02, 1.4534e-02, + -5.2765e-02, 4.9255e-02, -1.1542e-01, -4.5410e-02, 2.5803e-02, + 5.5908e-02, 6.8420e-02, -4.9316e-02, -9.5459e-02, 4.9347e-02, + -5.4504e-02, -2.6794e-02, -2.6871e-02, -4.8637e-03, 2.7039e-02, + 4.5197e-02, 3.5614e-02, 1.5411e-02, -1.2154e-02, 1.3757e-01, + -1.6479e-01, -1.7639e-02, -3.5309e-02, -3.1311e-02, -2.1317e-02, + 7.0923e-02, 3.9978e-02, -2.0584e-02, -6.0364e-02, -8.1360e-02, + 1.7059e-02, 5.4321e-03, -2.0157e-02, -1.9180e-02, -1.2947e-02, + 8.9417e-03, 2.0828e-03, -6.4087e-02, 2.7237e-03, 4.8401e-02, + -2.8706e-03, -2.6810e-02, 1.9913e-02, 4.0802e-02, -7.6172e-02, + 9.4360e-02, 3.8055e-02, 2.1530e-02, -1.8784e-02, 4.5967e-03, + -5.3864e-02, 2.6199e-02, -1.7410e-02, -8.3069e-02, -5.4901e-02, + 5.3177e-03, -5.6915e-03, -6.2469e-02, 1.5167e-02, -8.2626e-03, + -1.8600e-02, -5.1331e-02, -5.4199e-02, -4.1084e-03, 6.8726e-02, + -3.8574e-02, -3.0842e-03, -4.0131e-02, -1.4565e-02, -3.6804e-02, + 6.5552e-02, -5.0049e-02, 6.9641e-02, 6.4087e-02, -2.2736e-02, + 2.8366e-02, -5.7983e-02, -6.0944e-02, 1.8295e-02, 2.2461e-02, + -5.0079e-02, 3.4027e-02, -1.0211e-01, -9.0576e-02, -8.8867e-02, + -4.1779e-02, 2.3163e-02, -4.7607e-02, -1.4679e-02, 4.1313e-03, + -1.6413e-03, 3.9581e-02, -2.9724e-02, -1.7258e-02, 6.6895e-02, + -2.2232e-02, -8.6670e-03, 2.5314e-02, 2.3438e-02, -4.0558e-02, + -9.4543e-02, -6.9519e-02, 9.9945e-03, -8.9844e-02, -1.6083e-02, + -3.2257e-02, 1.7853e-02, -3.8818e-02, 5.4474e-02, 1.9516e-02, + -3.6224e-02, 1.8921e-02, 9.1675e-02, -8.5878e-04, -5.4382e-02, + -1.0175e-01, 7.5012e-02, -8.4778e-02, 1.5945e-02, -3.0762e-02, + 3.9764e-02, 3.0624e-02, -7.0862e-02, -2.7359e-02, 5.8899e-02, + 6.0669e-02, 1.4641e-02, 3.0029e-02, 1.3330e-01, -1.0565e-01, + -3.7651e-03, -8.3008e-03, 9.8228e-04, 3.3630e-02, -6.7940e-03, + 2.4887e-02, -2.1896e-02, 1.8768e-02, 5.7159e-02, -2.2507e-02, + -9.0881e-02, 3.8605e-02, 4.2206e-02, 4.7821e-02, 1.1975e-01, + 2.7954e-02, -5.4665e-03, -5.6458e-02, 3.1403e-02, 2.6840e-02, + 9.7656e-02, -2.3071e-02, 3.4241e-02, -1.9257e-02, 5.1605e-02, + 1.7181e-02, 7.5195e-02, 4.2206e-02, 8.7585e-02, -7.7332e-02, + -1.6968e-02, 7.8186e-02, 1.0994e-02, -4.6082e-02, 1.9073e-03, + -5.4596e-02, -5.4779e-02, 2.9236e-02, 7.0068e-02, -1.0330e-02, + 3.3234e-02, 1.0223e-02, -4.5807e-02, 4.8706e-02, -4.0680e-02, + -1.4160e-02, 5.7068e-02, -1.3489e-02, 7.1411e-02, 6.9618e-03, + 1.0773e-01, 5.8380e-02, -4.2786e-02, -6.8359e-02, 3.7079e-02, + 5.5695e-02, -1.1237e-01, -2.9800e-02, 6.0272e-02, 4.4647e-02, + -3.5309e-02, -6.6589e-02, -4.6417e-02, 2.3407e-02, -5.9784e-02, + -2.2461e-02, 7.5684e-02, 1.7471e-02, -9.3811e-02, 2.6302e-03, + -1.5915e-02, -1.8127e-02, 1.8677e-02, -4.4800e-02, 4.0314e-02, + -3.1281e-02, -2.8443e-04, -6.0028e-02, 3.4027e-02, -1.3159e-01, + -5.4443e-02, -2.5757e-02, -4.6570e-02, -1.5465e-02, 5.2979e-02, + -2.7069e-02, -1.7120e-02, 1.9241e-02, -3.6163e-03, -8.4473e-02, + -3.5522e-02, 6.4087e-02, 6.2561e-02, -5.8441e-02, 2.9510e-02, + -8.4961e-02, -3.9062e-02, 6.1302e-03, 7.5989e-03, 1.6144e-02, + 1.5808e-02, -2.5375e-02, 5.3635e-03, -9.4788e-02, -6.1554e-02, + -3.1952e-02, 5.6732e-02, 1.0132e-02, 1.7563e-02, 1.5266e-02, + -6.7993e-02, 7.7515e-02, -5.8594e-02, 1.3535e-02, -3.5431e-02, + -3.6850e-03, -8.2153e-02, -1.0689e-02, 2.4429e-02, -3.1189e-02, + 3.2166e-02, -5.3101e-02, -6.2065e-03, 6.4453e-02, 2.9449e-02, + 6.5796e-02, -5.1941e-02, -3.0136e-02, -4.2572e-02, 1.6327e-02, + 4.2999e-02, 3.8879e-02, -5.3070e-02, -5.2376e-03, 3.4943e-02, + -1.0147e-02, 1.0635e-02, -5.2299e-03, -4.2915e-03, -1.9958e-02, + 1.9470e-02, -8.3313e-03, 2.7039e-02, -4.2969e-02, -8.4412e-02, + -1.5411e-02, 7.9269e-03, 7.1106e-02, 1.1208e-02, 1.2383e-02, + 2.5940e-02, 2.7084e-02, -9.2163e-03, -1.9058e-02, -1.3649e-02, + -6.9618e-03, 3.2715e-01, 3.3295e-02, -1.7197e-02, 4.3732e-02, + -3.2684e-02, 6.1646e-02, 2.2232e-02, 9.7778e-02, 5.4817e-03, + -7.6050e-02, -1.6159e-02, 8.4610e-03, -2.1652e-02, -9.4299e-02, + -2.1683e-02, 3.0807e-02, 4.0924e-02, -4.1992e-02, 7.6355e-02, + 4.2358e-02, 1.7212e-02, -3.0106e-02, -6.0333e-02, -7.1716e-02, + 4.4861e-02, -4.4617e-02, 1.8730e-03, 2.9175e-02, 8.8501e-03, + 3.5686e-03, 7.3975e-02, 4.4281e-02, 1.5991e-02, 9.7839e-02, + -1.7776e-02, 2.5635e-02, 4.3152e-02, -1.4130e-02, 5.9235e-02, + -5.7404e-02, 7.4120e-03, 1.9426e-03, -4.9095e-03, 1.5617e-02, + -1.7517e-02, -2.3315e-02, 3.9368e-02, 2.7481e-02, 5.0430e-03, + -6.1859e-02, -3.4363e-02, 5.7281e-02, -1.0544e-02, 2.8152e-02, + 9.5459e-02, -7.8369e-02, 1.1642e-02, -9.4788e-02, -4.5929e-02, + -3.6133e-02, 8.3466e-03, -7.0374e-02, -3.3875e-02, 1.4877e-02, + 8.5907e-03, 6.1523e-02, 6.3171e-02, 2.8381e-02, 3.4576e-02, + 3.2288e-02, -1.2085e-01, 2.4643e-02, 4.6600e-02, 1.0339e-01, + -2.8563e-04, -3.3905e-02, -7.9250e-04, 4.6356e-02, -3.6804e-02, + 5.4138e-02, -3.4637e-02, 7.5256e-02, 4.1748e-02, -3.9215e-02, + 4.8370e-02, 1.8768e-02, -7.0095e-04], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([2.3688, 2.2313, 2.1793, 2.2621, 1.8781, 2.4473, 2.2259, 2.2702, 2.3276, + 2.4534, 2.1846, 2.2655, 2.3461, 2.2421, 2.3190, 2.2593, 2.3454, 2.2707, + 2.2164, 1.7897, 2.3242, 2.3469, 2.3397, 2.2526, 2.2761, 2.3129, 2.2576, + 2.2638, 2.1498, 2.2063, 2.4860, 2.2652, 2.2365, 2.3424, 2.2737, 2.4070, + 2.4567, 2.2116, 2.2207, 2.3936, 2.1568, 2.3500, 2.2707, 2.2544, 2.3308, + 2.5199, 2.4381, 2.2376, 2.2106, 2.1092, 2.2784, 2.4935, 2.3248, 2.3377, + 2.5530, 2.3622, 2.4510, 2.1990, 2.2610, 2.3005, 2.3197, 2.3185, 2.2755, + 2.3920, 2.3173, 2.2691, 2.4864, 2.3578, 2.2687, 2.0558, 2.4016, 2.1990, + 2.1231, 2.4432, 2.3081, 2.2969, 2.5476, 2.2131, 2.1720, 2.4336, 2.3412, + 2.3070, 2.4076, 2.3136, 2.3851, 2.2409, 2.3300, 2.2133, 2.3456, 2.2283, + 2.2573, 2.2693, 2.3708, 2.3321, 2.1889, 2.2271, 2.2766, 2.4146, 2.3176, + 2.3727, 2.4370, 2.3428, 2.4437, 2.3811, 2.2463, 2.0142, 2.3516, 2.2030, + 2.2619, 2.1330, 2.2031, 2.3857, 2.4318, 2.2763, 2.2307, 2.3119, 2.3642, + 2.4886, 2.2440, 2.2410, 2.3068, 2.3970, 2.1728, 2.3072, 2.3099, 2.2765, + 2.3712, 2.4885, 2.3787, 2.2320, 2.2641, 2.4476, 2.4079, 2.2429, 2.4214, + 2.2500, 2.2248, 2.2907, 2.2025, 2.4755, 2.2717, 2.2460, 2.3245, 2.3066, + 2.3491, 2.1617, 2.4615, 2.3194, 2.4639, 2.4275, 2.2145, 2.2690, 2.3564, + 2.2622, 2.3437, 1.8991, 2.2557, 2.3662, 2.2678, 2.4041, 2.1706, 2.3727, + 2.3550, 2.4025, 2.4941, 2.2641, 2.0832, 2.2486, 2.3981, 2.4916, 2.5032, + 2.3176, 2.1182, 2.2759, 2.2190, 2.3240, 2.3059, 2.2629, 2.1827, 2.3566, + 2.4849, 2.2785, 2.2739, 2.3701, 2.2370, 2.0638, 2.1875, 1.4153, 2.2091, + 2.1576, 2.4698, 2.4457, 2.3009, 2.3664, 2.4033, 2.3602, 2.2590, 2.1947, + 0.8316, 2.3363, 2.5227, 2.2341, 2.2393, 2.3279, 2.3750, 2.3931, 2.2447, + 2.4049, 2.2068, 2.3888, 2.2901, 2.1921, 2.4135, 2.0252, 2.2543, 2.3396, + 2.0983, 2.3843, 2.3032, 2.1555, 2.3856, 2.3157, 1.9222, 2.4996, 2.2700, + 2.3211, 2.3556, 2.3793, 2.2133, 2.1887, 2.4271, 2.2548, 2.1998, 2.2376, + 2.2489, 2.3295, 2.4860, 2.1721, 2.2410, 2.3237, 2.2318, 2.4154, 2.2485, + 2.3437, 2.2699, 2.3348, 2.2798, 2.3398, 2.2431, 2.3392, 2.3666, 2.3178, + 2.2839, 2.3838, 2.3424, 2.3153, 2.4632, 2.4543, 2.4417, 2.4471, 2.2925, + 2.2936, 2.4775, 2.2104, 2.2885, 2.2657, 2.3830, 2.4525, 2.2305, 2.4506, + 2.3083, 2.3477, 2.4036, 2.3077, 2.2872, 2.4994, 2.2552, 2.3892, 2.3094, + 2.2584, 2.5298, 2.1819, 2.2275, 2.2282, 2.2621, 2.2170, 2.2949, 2.2702, + 2.2701, 2.1525, 2.3722, 2.3994, 2.2509, 2.3115, 2.2393, 2.4235, 2.2856, + 2.4068, 2.1916, 2.3960, 2.1988, 2.4193, 2.1971, 2.4324, 2.4284, 2.2356, + 2.3937, 2.3617, 2.2964, 2.2486, 2.3001, 2.3829, 2.4658, 2.4456, 2.2983, + 2.2155, 2.4114, 2.3580, 2.1757, 2.4205, 2.3233, 2.2826, 2.3751, 2.2619, + 0.6763, 2.3765, 2.2672, 2.2851, 2.4040, 2.3082, 2.1688, 2.2536, 2.5452, + 2.2973, 2.4423, 2.1116, 2.5492, 2.2694, 2.3131, 2.3285, 2.6898, 2.3127, + 2.2720, 2.4728, 2.3080, 2.2795, 2.4653, 2.2102, 2.4244, 2.2528, 1.9396, + 2.3106, 2.3023, 2.2089, 2.3359, 2.3033, 2.3235, 2.2955, 2.2911, 2.3302, + 2.3617, 2.2888, 2.3233, 2.3285, 2.3036, 2.2174, 2.2295, 2.3946, 2.2406, + 2.3098, 2.4310, 2.3034, 2.4401, 2.3541, 2.2902, 2.3781, 2.2550, 2.4189, + 2.4577, 2.3707, 2.2346, 2.3813, 2.3329, 2.2650, 2.2464, 2.3527, 2.4315, + 2.1820, 2.1956, 2.3020, 2.2607, 2.5223, 2.4836, 2.3561, 2.2295, 2.3134, + 2.2896, 2.2292, 2.4570, 2.2569, 2.3331, 2.2332, 2.3618, 2.4185, 2.2385, + 2.2092, 2.4481, 2.2702, 2.3838, 2.5967, 2.1375, 2.3543, 2.3309, 2.4050, + 1.9929, 2.8342, 2.2762, 2.1165, 2.1972, 2.3158, 2.3037, 2.3601, 2.3452, + 2.4369, 2.4202, 2.3446, 2.0123, 2.3398, 2.4535, 2.3274, 2.3294, 2.3187, + 2.4326, 2.2906, 2.3188, 2.3241, 2.3415, 2.2712, 2.2277, 2.3186, 2.4004, + 2.2099, 2.4762, 1.6964, 2.2420, 2.1310, 2.3157, 2.3439, 2.2923, 2.2653, + 2.3732, 2.2344, 2.3661, 2.3163, 2.3467, 2.3602, 2.5663, 2.3309, 2.2888, + 2.2422, 2.3163, 2.2859, 2.2626, 2.2535, 2.2556, 2.3285, 2.3882, 2.1625, + 2.3154, 2.3565, 2.3619, 2.2242, 2.2228, 2.2822, 2.3462, 2.3962, 2.3544, + 2.3980, 2.4145, 2.1884, 2.3029, 2.2805, 2.4121, 1.8022, 2.2502, 2.3409, + 2.3668, 2.2910, 2.2283, 2.3151, 2.3326, 2.2121, 2.2692, 2.2691, 2.1824, + 2.3013, 2.5337, 2.4772, 2.2240, 2.1979, 2.3759, 2.2393, 2.2431, 2.5248, + 2.6478, 2.1782, 2.4853, 2.2081, 2.3496, 2.2851, 2.1992, 2.3299, 2.2451, + 2.2018, 2.2678, 2.3296, 2.2097, 2.4417, 2.2165, 2.4353, 2.1743, 2.3772, + 2.3127, 2.3293, 2.3001, 2.3943, 2.2248, 2.1862, 2.2712, 2.2826, 2.2903, + 2.2824, 2.3611, 2.2890, 2.3107, 2.2675, 2.3155, 2.2492, 2.5576, 2.3347, + 2.2097, 2.5520, 2.6258, 2.6445, 2.4963, 2.2860, 2.3289, 2.2363, 2.4110, + 2.3982, 2.3522, 2.1712, 2.4856, 2.3696, 2.2355, 2.3357, 2.4464, 2.2544, + 2.5418, 2.3130, 2.2759, 2.0163, 2.2506, 2.3865, 2.2722, 2.2426, 2.2608, + 2.3552, 2.2536, 2.2496, 2.5000, 2.2151, 2.1788, 2.3562, 2.3983, 2.5739, + 2.2555, 2.3720, 2.4809, 2.2267, 2.1463, 2.2596, 2.3202, 2.3680, 2.2596, + 2.4000, 2.2462, 2.3756, 2.3770, 2.2603, 2.2017, 2.2966, 2.2811, 2.3990, + 2.4600, 2.1128, 2.2974, 2.3937, 2.2691, 2.3202, 2.3163, 2.2603, 2.2670, + 2.1615, 2.5914, 2.3516, 2.3436, 2.5322, 2.3456, 2.2495, 2.3500, 2.3577, + 2.3212, 2.3772, 2.2294, 2.4203, 2.2417, 2.2855, 2.2905, 2.3836, 2.2920, + 2.0747, 2.3910, 2.3422, 2.3533, 2.3373, 2.2799, 2.3063, 2.3708, 2.3412, + 2.2712, 2.3281, 2.3870, 2.0959, 2.1643, 2.2547, 2.3594, 2.4648, 2.4353, + 2.3893, 2.3490, 2.4140, 2.1236, 2.3925, 1.9306, 2.2080, 2.1856, 2.3817, + 2.2347, 2.2804, 2.3309, 2.2901, 2.2481, 2.3116, 2.2113, 2.1951, 2.3585, + 2.2502, 2.2685, 2.2172, 1.8982, 2.2615, 2.3360, 2.4474, 2.3664, 2.4077, + 2.0775, 2.2460, 2.5276, 2.3668, 2.1460, 2.2497, 2.3825, 2.1070, 2.3265, + 2.1847, 2.3438, 2.2506, 2.4812, 2.4725, 2.2261, 2.1374, 2.2534, 2.3833, + 2.2467, 2.3161, 2.3496, 2.1886, 2.5165, 2.3093, 2.3108, 2.3012, 2.2424, + 2.3140, 2.2488, 2.3573, 2.3360, 2.3477, 2.3584, 2.4732, 2.2807, 2.2586, + 2.3842, 2.2568, 2.3066, 2.4383, 2.6123, 2.1762, 2.2118, 2.3744, 2.2700, + 2.2935, 2.3482, 2.3100, 2.4050, 2.2994, 2.2794, 2.2825, 2.3585, 2.2868, + 2.4056, 2.3555, 2.2402, 2.2468, 2.2976, 2.2346, 1.8783, 2.4171, 2.3038, + 2.2860, 2.3164, 2.2196, 2.2157, 2.2415, 2.3264, 2.3517, 2.3506, 2.1730, + 2.7051, 2.3974, 2.2808, 2.4565, 2.1615, 2.3847, 2.4392, 2.1036, 2.3728, + 2.3957, 2.4668, 2.3189, 2.2417, 2.6226, 2.2039, 2.2346, 2.2887, 2.2391, + 2.2973, 2.1040, 2.4543, 2.0656, 2.2314, 2.6447, 2.3755, 2.3391, 2.2552, + 2.2017, 2.3095, 2.3668], device='cuda:1', requires_grad=True)Parameter containing: +tensor([-0.2197, 0.3354, -0.4777, 0.2296, -0.2571, -0.2782, 0.2680, 0.1041, + -0.3820, -0.2839, -0.0534, -0.0172, -0.4468, -0.0195, 0.1345, -0.0460, + 0.1044, -0.0443, -0.2040, -0.7718, 0.3504, 0.2080, 0.2851, -0.5775, + 0.1081, -0.5376, -0.0747, 0.1722, -0.5021, -0.0547, 0.5642, 0.1543, + 0.1477, -0.4310, -0.2311, -0.6174, -0.4021, 0.1010, 0.8179, -0.0225, + -0.1327, -0.2184, 0.4328, -0.1261, -0.6001, -0.9222, -0.7819, -0.1870, + 0.2756, -0.2588, -0.6792, -0.6569, -0.1249, -0.3367, -1.0191, 0.9087, + 0.7620, -0.6038, 0.0963, -0.0083, -0.1625, 0.1888, -0.1935, -0.7019, + 0.0724, 0.0275, -0.1649, -0.5189, -0.6144, -0.9170, 0.6056, 0.1743, + 0.0684, -0.5347, -0.3405, -0.5641, 0.8142, -0.2712, -0.0467, 0.5850, + 0.3540, 0.2550, -0.6323, -0.1901, -0.5813, -0.3020, 0.2263, -0.0346, + 0.2808, 0.1108, -0.2443, -0.0337, -0.3646, 0.1265, -0.3793, -0.1714, + 0.7608, -0.5926, -0.3150, 1.0996, -0.4086, -0.6049, -0.5115, 0.3061, + 0.5860, 0.2025, 0.4765, 0.4331, -0.0295, -0.1208, 0.3130, 0.4131, + -0.5850, 0.0727, 0.5004, 0.0265, 0.7074, 0.5079, 0.6204, 0.5398, + -0.1818, 0.0148, -0.2653, 0.1086, 0.2554, -0.6922, 0.2110, 0.2183, + -0.4925, 0.0034, 0.0637, -0.3723, 0.3850, -0.4272, 0.8816, -0.0507, + 0.2320, -0.3092, -0.0387, -0.2841, 0.2158, 0.1545, -0.1862, -0.4746, + -0.3839, -0.0917, 0.7386, -0.4229, -0.6547, -0.2977, 0.0176, -0.0818, + 0.2436, -0.1958, -0.6082, 0.9952, -0.1447, -0.3482, -0.2684, -0.6609, + 0.0937, -0.4504, 0.7387, -0.3864, 0.5788, 0.3532, -0.3904, 0.1869, + 0.1422, -0.3027, 0.5089, -0.1565, -0.3660, 0.3908, 0.0425, 0.0389, + 0.2921, 0.0230, 0.1537, 0.0203, -0.5847, -0.7670, 0.2777, 0.3962, + 0.0310, 0.3750, -0.4292, 1.8772, 0.0144, 0.2069, -0.6072, -0.7556, + 0.0070, 0.6241, 0.5252, -0.0226, 0.4671, 0.0726, -0.6137, 0.4737, + -0.8719, -0.3078, 0.7422, 0.0657, -0.3948, -0.3315, 0.4820, 0.4000, + -0.1885, -0.5593, -0.0326, 0.0303, -0.7097, 0.4710, -0.0080, 0.2596, + -0.5053, -0.2653, 0.0719, -0.2415, 0.7024, 0.0625, 0.0104, 0.8983, + -0.0749, -0.4544, -0.1307, 0.4815, -0.0946, 0.0246, -0.3972, -0.0282, + 0.3114, 0.5682, -0.4983, -0.3602, 0.7954, 0.1412, -0.0285, 0.4178, + -0.0331, 0.0314, 0.1608, -0.5503, 0.3445, -0.3405, 0.3910, 0.3342, + 0.0887, 0.7079, -0.2582, 0.5060, 0.2926, 0.2701, 0.3206, -0.1652, + 0.4704, -0.9332, -0.6158, -0.4013, -0.1978, 0.3000, 0.6680, 0.0541, + -0.2291, -0.1851, 0.2771, -0.1664, 0.2027, -0.5805, -0.2936, 0.0750, + -0.4917, -0.5222, 0.1597, -0.4634, 0.3040, 0.2883, -0.1047, 0.2459, + -0.5699, -1.0970, -0.1979, -0.0615, -0.3691, -0.1197, 0.1136, 0.3624, + 0.0045, 0.2218, -0.7172, 0.5502, 0.3994, -0.0446, -0.1522, -0.6184, + -0.3763, -0.8814, 0.1757, -0.1981, -0.1453, 0.7149, -0.0042, -0.4930, + 0.1774, -0.1074, 0.0255, 0.4448, -0.0075, -0.1348, 0.1764, -0.4782, + 0.4759, 0.3203, -0.3103, -0.3071, 0.5137, 0.1515, -0.2925, 0.4680, + -0.3368, -0.3806, 0.4335, -0.0727, 0.1921, -0.5856, -0.0663, 0.1296, + 0.3556, 0.2977, 0.4352, -0.3491, -0.4985, -0.2220, 0.7073, -0.0541, + -1.2134, 0.2054, 0.2669, 0.2849, 0.9239, -0.1697, -0.2411, 0.5358, + -0.8517, 0.4159, -0.4341, 0.3638, -0.2540, -0.0092, 0.3277, 0.6798, + -0.0468, 0.7107, -0.3865, 0.1758, 0.0242, -0.3292, -0.0701, 0.3912, + -0.8041, -0.4097, -0.0628, -0.3553, 0.4081, 0.0968, 0.1385, 0.5100, + 0.2941, 0.5431, -0.0992, -0.4025, 0.3482, -0.2689, 0.2528, -0.3012, + -0.1930, 0.5852, -0.2023, -0.6191, -0.3207, -0.2631, -0.0191, -0.0539, + -0.0853, -0.1048, -0.3848, 0.0635, 0.0174, 0.1541, 0.2229, -0.3462, + 0.7997, 0.4048, -0.5542, -0.4005, -0.2120, 0.1894, -0.5277, -0.4932, + -0.2205, 0.1349, -0.4992, -0.4048, 0.2936, -0.1750, 0.6371, -0.0344, + -0.1503, 0.4773, 0.0841, -0.1642, 0.2792, 0.3737, 1.0111, -1.7460, + 0.4022, 0.1431, -0.0319, 0.4733, 0.1269, -0.5802, 0.6673, 0.7738, + -0.2363, 0.4140, 0.1159, 0.1393, -0.2416, 0.2621, -0.5978, -0.5345, + -0.1574, 0.0450, 0.5317, 0.1276, -0.1231, 0.3749, 0.4681, -0.3119, + 0.4738, 0.0894, 0.0050, -0.7968, -0.0672, -0.0325, -0.2209, 0.5237, + 0.0687, -0.1099, -0.0316, 0.0336, -0.5300, -0.0200, -0.2776, -0.2014, + -0.9375, -0.5626, -0.0044, -0.0603, -0.2738, 0.3892, -0.3124, -0.0381, + -0.2990, 0.0306, -0.2100, 0.0666, -0.0415, -0.3901, 0.4862, 0.0913, + 0.2861, -0.3323, -0.4852, -0.3346, -0.2049, -0.8711, 0.1985, 0.0153, + 0.0071, 0.3348, 0.7854, -0.0110, 0.0555, -0.3576, 0.1926, 0.2818, + 0.2335, 0.0705, -0.2015, -0.1077, -0.0319, -0.1805, -0.0067, -0.1685, + -0.9881, -0.2477, 0.5524, 0.7451, 0.3463, -0.6136, -0.1202, 0.3726, + 0.3561, 0.2569, 0.0566, -0.0621, 0.0672, -0.3779, -0.2898, -0.2102, + -0.5451, 0.1628, -0.1314, 0.3358, 0.4334, -0.3215, 0.3202, 0.4060, + 0.3323, -0.1247, 0.2225, 0.1549, 0.0055, -0.9321, 0.2920, 0.2641, + -0.3068, 0.1259, -0.3560, -0.4065, 0.6489, -0.0242, 0.3904, -0.4408, + 0.5459, -0.2332, 0.3938, -0.2005, 0.0877, -0.6211, -0.8579, 1.0872, + 0.6445, 0.3743, -0.3435, -0.6151, -0.6444, 0.3134, -0.8510, -0.3456, + 0.5685, 0.7083, -0.1262, 0.2567, 0.4642, 0.1214, -0.8930, -0.1473, + 0.1031, -0.6386, -0.1372, 0.5740, -0.2368, -0.2888, 0.0044, -0.2496, + 0.1395, -0.0843, -0.5974, 0.3231, 0.0515, -0.4491, 0.7146, 0.9455, + 0.1059, -0.4779, -0.6536, 0.2079, 0.4047, -0.3084, 0.2931, 0.5544, + 0.1194, -0.2737, -0.1332, -0.3436, 0.6100, 0.0870, -0.0378, -0.0052, + 0.5198, -0.1086, -0.3978, 0.1642, 0.1202, 0.0767, 0.4188, 0.4866, + 0.3156, -0.3905, -0.2053, -0.5197, -0.5034, 0.5509, -0.1465, -1.6720, + 0.2816, 0.1294, -0.1098, 0.2624, -0.7129, 0.2074, 0.4927, 0.4706, + 0.0720, 0.2808, -0.1190, 0.6402, 0.1292, 0.0522, -0.1317, 0.5756, + 0.1856, 0.1908, 0.0235, 0.3120, 0.0203, 0.1948, -0.4117, 0.1373, + -0.5174, 0.1786, 0.4331, -0.3683, 0.1093, -0.5001, -0.0453, -0.9516, + -0.1138, -0.7882, -0.6547, -0.0519, -0.1056, 0.2221, 0.0606, -0.2394, + -0.3450, 0.2991, 0.4017, -0.2505, -0.0356, -0.1036, 0.0541, 0.1212, + 0.5994, 0.2916, -0.1002, 0.0949, -0.0342, 0.0201, 0.5530, -0.5057, + -0.3143, -0.7078, -0.0361, -0.3898, -0.1446, -0.7224, -0.1283, 0.0217, + 0.6097, -0.0934, 0.3045, -0.2684, 0.2040, 0.2234, 0.3884, -0.3300, + -0.4949, 0.9895, -0.7241, -0.3068, -0.3328, 0.5213, 0.0418, -0.0891, + 0.6599, -0.3454, 0.3083, 0.3161, -0.7353, -0.5203, 0.0900, 0.7819, + -0.4455, -0.2579, -0.1770, 0.5329, 0.0052, 0.1006, 0.1091, -0.1591, + 0.0947, 0.5937, -0.6986, -0.3535, -0.2971, -0.0830, 0.2296, -0.0583, + -0.5550, 0.0065, 0.4454, -0.2022, 0.0124, 0.5068, -0.7134, -0.0757, + -0.4578, -0.4611, -0.4076, -0.5882, -0.1275, 0.0725, 0.4444, 0.3409, + 0.2102, -0.1735, -0.5005, 0.2124, 0.2399, 0.0604, 0.3048, 0.2323, + -0.4227, -0.2303, -0.9566, -0.6571, -0.2358, 0.2445, 0.3673, -0.2596, + 0.7291, 0.0237, 0.0355, 0.5341, 0.6529, 0.7548, 0.0482, -0.8337, + -0.7353, 0.1694, -0.2487, 0.4523, -0.1397, -0.1567, 0.8041, 0.3133, + -0.5152, -0.9554, -0.3891, 0.1217, 0.3175, 0.0666, -0.1434, 0.2636], + device='cuda:1', requires_grad=True)Parameter containing: +tensor([[ 0.0218, 0.0299, 0.0232, ..., 0.0067, -0.0137, 0.0305], + [-0.0040, -0.0037, 0.0077, ..., 0.0148, 0.0173, -0.0006], + [ 0.0255, 0.0141, 0.0116, ..., -0.0026, 0.0090, -0.0149], + ..., + [-0.0184, -0.0006, 0.0145, ..., -0.0108, 0.0039, -0.0072], + [ 0.0077, -0.0093, 0.0101, ..., 0.0183, 0.0176, -0.0251], + [-0.0074, 0.0018, -0.0270, ..., -0.0280, -0.0008, 0.0160]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.3987, 0.2491, 0.1892, ..., 0.0093, 0.0336, -0.0070], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 0.0156, 0.0023, -0.0103, ..., -0.0124, 0.0080, -0.0077], + [-0.0054, 0.0075, -0.0130, ..., -0.0132, -0.0064, 0.0061], + [-0.0258, 0.0146, -0.0273, ..., 0.0131, 0.0164, 0.0133], + ..., + [-0.0032, 0.0077, 0.0094, ..., 0.0011, -0.0049, -0.0260], + [-0.0200, 0.0107, -0.0099, ..., -0.0227, -0.0027, 0.0055], + [-0.0254, 0.0017, 0.0047, ..., -0.0115, -0.0037, 0.0176]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-7.0679e-02, -4.5776e-02, 9.1003e-02, -9.2896e-02, -1.0553e-01, + -1.0181e-01, -2.5803e-02, -1.9501e-02, 9.9060e-02, -5.9784e-02, + -9.1782e-03, 9.3689e-02, 3.3569e-02, 3.6102e-02, 2.7451e-02, + 1.0675e-01, -2.7191e-02, -5.3497e-02, 1.2787e-02, -1.2189e-01, + -3.2837e-02, 2.3331e-02, 5.8624e-02, 3.2562e-02, 4.0222e-02, + 4.9957e-02, -4.8889e-02, -1.0880e-02, 6.6757e-03, 8.3069e-02, + 3.0701e-02, -5.0720e-02, -2.2461e-02, -2.9907e-02, 3.4668e-02, + -6.8115e-02, -6.2500e-02, 6.7017e-02, 1.5381e-01, -4.2480e-02, + -2.7969e-02, 3.5065e-02, 4.3182e-02, -4.9164e-02, -5.0659e-02, + -5.5298e-02, 4.4159e-02, -7.7087e-02, -3.0991e-02, 3.7048e-02, + -2.8351e-02, -1.1823e-01, 2.7409e-03, -3.3997e-02, 1.4296e-03, + -2.3537e-03, 8.4778e-02, -2.4139e-02, -9.0179e-03, 7.6790e-03, + -6.6490e-03, 3.6346e-02, -2.0126e-02, 2.2537e-02, -4.6997e-03, + 7.3792e-02, -8.8745e-02, -2.4490e-02, -1.2347e-01, -1.2634e-01, + 2.9633e-02, -5.7564e-03, -2.0911e-01, -8.7036e-02, -2.5528e-02, + -5.9082e-02, 3.6865e-02, 6.3965e-02, -7.2632e-03, 7.9834e-02, + 4.2664e-02, 6.3232e-02, 3.9459e-02, -5.0690e-02, -8.1726e-02, + -5.8594e-02, 1.1548e-01, -4.0131e-02, 3.0319e-02, -9.5337e-02, + -7.4524e-02, -7.6843e-02, -5.9021e-02, 9.4055e-02, 2.3514e-02, + -2.0996e-02, 1.3054e-02, -7.8247e-02, -9.8022e-02, 1.9202e-01, + 5.8594e-02, -5.3040e-02, -1.1823e-01, 1.5015e-02, 1.7075e-02, + -4.1718e-02, -1.9974e-02, -2.4048e-02, 3.1525e-02, -3.2898e-02, + 3.1113e-02, 9.5459e-02, -1.1664e-01, -5.0240e-03, -5.0232e-02, + 1.6508e-03, 6.4453e-02, 4.3365e-02, -2.9495e-02, 9.2407e-02, + -3.8086e-02, 1.9445e-03, 2.4414e-02, 9.6069e-02, 7.1106e-02, + -2.1118e-02, 1.0645e-01, 9.8877e-02, 4.7058e-02, 2.4948e-02, + 3.3783e-02, -5.2826e-02, 1.4795e-01, 1.4429e-01, 2.7863e-02, + 7.3608e-02, 3.7670e-03, -1.4319e-01, 8.0566e-02, -2.8549e-02, + 1.1816e-03, 9.8145e-02, 2.5558e-02, 6.2195e-02, -1.2047e-02, + 5.1819e-02, -2.8744e-03, -1.2085e-01, -6.2988e-02, -8.5388e-02, + -3.7994e-02, 1.3962e-02, 5.8136e-02, -7.4158e-02, 1.6479e-02, + 1.1902e-01, 6.0303e-02, -3.8879e-02, -7.4951e-02, -8.5693e-02, + -3.5400e-02, 3.5919e-02, -2.8793e-02, -7.5607e-03, 5.2643e-02, + 5.1361e-02, 1.1829e-01, -6.7810e-02, 5.5786e-02, -5.8502e-02, + 1.4478e-01, -8.7830e-02, -3.9062e-02, 1.3940e-01, -1.3527e-02, + 1.5236e-02, -2.0996e-02, -2.1057e-02, 3.6255e-02, 6.7902e-04, + -7.9163e-02, -4.9629e-03, 2.2064e-02, 7.5562e-02, -1.0864e-01, + 1.3062e-01, -4.4189e-02, -5.3076e-01, 7.3792e-02, 5.9814e-02, + -1.1389e-01, -3.5919e-02, -4.1199e-02, 5.3528e-02, 3.6255e-02, + -5.6801e-03, 3.1952e-02, 4.8248e-02, -1.8665e-01, 3.3813e-02, + 4.9408e-02, -8.1558e-03, -3.0151e-02, -1.5857e-01, -3.2135e-02, + -2.4261e-03, 1.2337e-02, -3.3936e-02, -4.2603e-02, -1.1481e-01, + -1.0529e-02, 3.8886e-04, -7.4707e-02, 6.2927e-02, 7.0190e-02, + -8.8501e-02, 6.1066e-02, -7.7248e-04, 2.6871e-02, 1.0718e-01, + 5.8838e-02, 9.2102e-02, -1.8555e-02, 4.2358e-02, 2.3666e-02, + 1.4282e-01, -7.4997e-03, 3.2043e-02, 6.7749e-02, 4.3060e-02, + -6.9641e-02, -1.2589e-02, 9.8022e-02, -4.2053e-02, -2.5085e-02, + -6.2439e-02, 1.0425e-01, -7.3608e-02, -7.2449e-02, 5.0262e-02, + -1.2352e-02, 1.0391e-02, -1.8723e-02, 2.0737e-02, -9.9243e-02, + -8.9294e-02, -1.3684e-01, 1.4172e-01, -2.3300e-02, 3.2745e-02, + -8.8654e-03, 3.0685e-02, 3.9764e-02, 3.5309e-02, -4.6234e-02, + -2.1149e-02, 4.3274e-02, -4.4067e-02, -5.4352e-02, -8.4290e-02, + 2.7222e-02, -8.1421e-02, 5.6183e-02, 6.7017e-02, 3.0563e-02, + -1.0422e-02, 6.9092e-02, -5.5115e-02, 1.2091e-01, -3.0243e-02, + -8.4915e-03, -2.3682e-02, -6.2256e-02, -2.4628e-02, 4.7821e-02, + -4.0558e-02, -1.2445e-01, 7.6050e-02, -6.8420e-02, 5.0446e-02, + -1.0040e-01, -1.0150e-01, 1.3771e-02, 7.6904e-02, -1.0229e-01, + 6.4331e-02, -3.3173e-02, -3.2837e-02, 1.0582e-02, 9.0332e-02, + -1.3123e-02, -1.7441e-02, 5.4993e-02, -8.1848e-02, -1.4786e-02, + -8.4900e-02, 7.5874e-03, -1.2466e-02, -3.9246e-02, -7.0557e-02, + -4.0375e-02, -9.9060e-02, -5.7709e-02, -1.0333e-01, 4.7516e-02, + 7.1899e-02, -9.7046e-02, 1.2421e-01, -6.1462e-02, -5.6061e-02, + 4.9774e-02, -3.4271e-02, 1.1642e-02, 1.8448e-02, 3.9825e-02, + -4.1595e-02, 5.0964e-02, 5.9143e-02, -8.1665e-02, 6.6040e-02, + -1.4641e-02, -3.8696e-02, 1.3565e-02, -3.2318e-02, 1.0596e+00, + -3.0182e-02, -9.3933e-02, 7.8491e-02, -2.4429e-02, 6.4331e-02, + -5.5817e-02, 2.8839e-02, -2.0096e-02, -2.3560e-02, 4.5441e-02, + -7.0007e-02, -1.2067e-01, 7.7698e-02, -1.1734e-02, 1.5900e-02, + 6.6101e-02, -3.7964e-02, -6.2439e-02, 5.0079e-02, -6.2347e-02, + 7.5867e-02, -1.2903e-01, -6.0425e-02, -1.4717e-02, 2.4750e-02, + 8.2703e-02, -4.2084e-02, -3.1319e-03, -9.4238e-02, 4.2175e-02, + 1.9730e-02, -8.8074e-02, 5.4359e-03, -3.2288e-02, 7.5806e-02, + -2.3331e-02, 4.3915e-02, 4.5868e-02, -1.6342e-02, 1.1243e-01, + -1.0236e-01, -1.3298e-02, -1.0358e-01, 9.4299e-03, -7.5439e-02, + -7.3242e-02, 6.2195e-02, 1.0040e-01, 2.8320e-02, -2.3926e-02, + -3.8696e-02, -5.5542e-03, 2.3041e-02, -5.6702e-02, -6.7322e-02, + 5.0720e-02, -7.1045e-02, -8.0994e-02, -1.0010e-01, 5.1483e-02, + -8.4839e-02, 5.9662e-02, 1.0419e-01, 6.5002e-02, -1.2711e-02, + 7.4646e-02, -1.2207e-01, 3.7170e-02, 4.0497e-02, -1.7624e-02, + -2.0355e-02, -6.6406e-02, -5.2887e-02, 4.9782e-03, 5.8350e-02, + -5.6641e-02, 5.7526e-02, -3.4393e-02, -6.4453e-02, 4.7150e-02, + 2.0538e-02, 5.0598e-02, -1.2779e-02, -7.1533e-02, 1.0449e-01, + -1.6251e-02, -8.4412e-02, 7.5195e-02, 5.9601e-02, 6.1310e-02, + 2.2903e-02, -2.9999e-02, -3.6041e-02, 6.2469e-02, 5.2399e-02, + 7.8247e-02, 3.4546e-02, -4.0344e-02, 2.4460e-02, 7.3910e-04, + 7.0984e-02, 9.1003e-02, 7.1640e-03, -2.1863e-01, 3.0319e-02, + -3.5057e-03, -1.6678e-02, -1.0938e-01, -1.0541e-01, -1.1330e-02, + -1.3440e-01, 7.4692e-03, 6.9336e-02, -1.3794e-01, -2.7069e-02, + 1.2329e-02, 7.8247e-02, -6.4011e-03, -2.4475e-02, -8.3740e-02, + 1.9165e-02, 8.7585e-02, 9.7229e-02, -2.7100e-02, -8.3618e-03, + -2.2491e-02, -2.1179e-02, 1.0597e-02, -1.0992e-01, -6.5979e-02, + -6.6650e-02, 5.6244e-02, 3.7109e-02, 1.0767e-01, 9.8938e-02, + -6.0822e-02, -3.7079e-02, -1.0614e-03, 4.0412e-04, -3.8300e-02, + 1.0895e-01, -1.0541e-01, 1.1792e-01, 2.0264e-02, 6.9284e-04, + 4.5929e-02, 5.2910e-03, -5.2490e-02, 4.3701e-02, 5.1880e-02, + 7.4615e-03, -9.7885e-03, -3.8330e-02, -1.2659e-01, -8.4656e-02, + -1.2488e-01, 5.0110e-02, 2.5848e-02, -4.6043e-03, 4.7913e-02, + -6.5857e-02, 5.9387e-02, 3.1830e-02, 1.8906e-02, 7.4291e-04, + -1.1169e-02, -1.8936e-02, 1.2366e-01, 7.6294e-02, 3.8849e-02, + -1.0938e-01, 3.0029e-02, -5.6152e-02, 1.4809e-02, -6.6345e-02, + 1.0468e-01, 3.8574e-02, 7.0992e-03, -1.4160e-02, 9.8572e-02, + 8.5266e-02, -5.5328e-02, 7.7148e-02, -1.0376e-02, 4.1046e-03, + -1.2915e-01, 6.7871e-02, 9.3231e-03, -1.0138e-01, 1.3443e-02, + 4.5013e-02, -3.8385e-05, -3.0212e-02, -1.8784e-02, 1.2354e-01, + 6.4819e-02, 3.2310e-03, -5.0873e-02, -4.9095e-03, -5.7465e-02, + 3.1525e-02, -2.1057e-03, 5.0720e-02, -4.4495e-02, -1.9178e-03, + 5.1392e-02, 6.4514e-02, -9.3155e-03, 1.0876e-01, -4.2305e-03, + -1.2292e-01, 9.3994e-02, -1.7920e-03, 4.2694e-02, 1.6922e-02, + 4.1565e-02, -3.9764e-02, -5.7770e-02, 1.2360e-02, 7.4097e-02, + 8.1909e-02, -4.5166e-02, -5.3833e-02, -7.1869e-03, -9.4528e-03, + 4.8981e-02, -2.7710e-02, 1.0260e-01, -1.3412e-02, 1.1505e-02, + 6.5247e-02, 1.0474e-01, -2.1515e-02, -3.8239e-02, -1.0846e-01, + 5.4108e-02, -2.1985e-01, -3.7720e-02, 4.5532e-02, -8.8135e-02, + -2.5528e-02, -1.0339e-01, -7.7393e-02, -1.1940e-03, -5.2185e-02, + -6.1951e-02, 2.1973e-02, 3.8452e-02, -4.7058e-02, 2.1606e-02, + 2.3384e-03, 1.9165e-02, -2.5101e-02, -3.4466e-03, 9.0942e-02, + 1.3977e-01, -1.2122e-01, -8.9294e-02, 9.1370e-02, 1.8951e-02, + -8.0185e-03, 1.6144e-02, -1.4145e-02, 6.8848e-02, -1.4801e-02, + -3.5522e-02, -4.9561e-02, 2.6398e-02, -1.1346e-01, -3.8662e-03, + 1.0541e-01, -1.7624e-02, -2.3285e-02, 7.0251e-02, 6.9542e-03, + 3.5156e-02, 1.2573e-01, -4.9438e-02, -7.5317e-02, -3.8971e-02, + -6.6223e-02, -2.0905e-02, -6.0944e-02, -6.0944e-02, 1.3611e-01, + 3.0319e-02, 7.3853e-02, -7.8812e-03, 7.2327e-02, 3.3951e-03, + 9.3140e-02, 1.0571e-01, 3.3150e-03, -3.7262e-02, 1.6083e-02, + 7.7705e-03, 3.0014e-02, -1.8890e-02, 7.6599e-02, -5.6793e-02, + 6.9336e-02, 5.2399e-02, 3.0533e-02, -3.6560e-02, -1.0094e-02, + -5.7037e-02, 1.3550e-01, 4.0497e-02, -5.1819e-02, -1.9897e-02, + -6.3965e-02, 1.9617e-01, -1.3374e-02, -7.1350e-02, 2.0462e-02, + 4.1779e-02, -9.9060e-02, -8.6670e-02, 4.7607e-02, -1.2292e-01, + 7.4402e-02, -9.5764e-02, -1.0065e-01, 4.4739e-02, -3.7598e-02, + -1.0968e-01, -1.5881e-01, 1.7334e-02, -9.2651e-02, 1.0797e-01, + 9.8267e-02, 4.5052e-03, -2.0615e-02, -7.7393e-02, -3.9635e-03, + 5.2277e-02, 6.7932e-02, 1.3293e-01, 1.8339e-03, -7.7454e-02, + -2.6428e-02, 1.0175e-01, 5.3864e-02, -5.4901e-02, -5.7068e-03, + -7.3486e-02, -8.3679e-02, -4.8523e-02, 4.5258e-02, 2.5436e-02, + -5.6793e-02, 4.0558e-02, 6.2744e-02, 5.5817e-02, 5.1941e-02, + -6.1279e-02, 2.9028e-01, -1.9169e-03, -8.4229e-02, -4.5868e-02, + 3.0945e-02, 6.5552e-02, 4.5898e-02, 6.6589e-02, -4.1199e-02, + -6.7444e-02, 3.7804e-03, 1.2665e-02, 1.0422e-02, -9.9976e-02, + -2.8168e-02, 3.8586e-03, -2.4567e-02, -9.8328e-02, 3.9581e-02, + 7.5562e-02, -7.6180e-03, 2.4734e-02, 3.1235e-02, -7.0801e-02, + 1.0535e-01, -1.2085e-02, 7.4844e-03, 1.0841e-02, -1.1749e-01, + 5.3467e-02, 1.4549e-02, -5.8289e-02, -1.5213e-02, 1.1957e-01, + 9.1125e-02, 1.3904e-01, 1.0822e-01, -4.1321e-02, 5.4565e-02, + 8.5083e-02, -7.4341e-02, 1.5030e-02, -3.3478e-02, -2.0203e-02, + 2.5604e-02, 3.6987e-02, -1.3008e-02, 6.3232e-02, -7.7454e-02, + -1.4111e-01, -8.8074e-02, -3.7506e-02, -4.3671e-02, 8.7402e-02, + 4.4617e-02, 4.7424e-02, -1.4565e-02, -1.0992e-01, 1.0181e-01, + -9.0637e-02, 1.2854e-01, 6.5369e-02, 2.7649e-02, -2.2308e-02, + -2.0157e-02, 6.4331e-02, -3.9062e-03, 2.3636e-02, 2.3666e-02, + 1.4148e-01, -2.2064e-02, 2.6810e-02, -1.4656e-02, -6.7688e-02, + -4.1504e-02, -2.7832e-02, -4.6509e-02, 4.8615e-02, 6.4087e-02, + 9.7504e-03, -1.4923e-02, -2.2110e-02, 8.0444e-02, -2.2034e-02, + -2.3918e-03, -3.1235e-02, 2.3834e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.6302, 1.7985, 1.6646, 1.6736, 1.6246, 1.7435, 1.6344, 1.5951, 1.7835, + 1.7151, 1.7555, 1.7396, 1.7075, 1.6981, 1.6234, 1.7331, 1.7033, 1.7175, + 1.6565, 0.4716, 1.7354, 1.6599, 1.7952, 1.7343, 1.7564, 1.7225, 1.7034, + 1.7112, 1.7357, 1.6902, 1.7430, 1.6893, 1.7471, 1.7559, 1.6642, 1.6995, + 1.8257, 1.6706, 1.7690, 1.5814, 1.7597, 1.6698, 1.7122, 1.8090, 1.8079, + 1.6904, 1.7110, 1.7207, 1.6608, 1.6236, 1.7413, 1.7274, 1.7435, 1.7000, + 1.8054, 1.8643, 1.7005, 1.6365, 1.7551, 1.7265, 1.7312, 1.6578, 1.7392, + 1.7119, 1.6265, 1.6944, 1.6835, 1.6661, 1.7137, 1.6735, 1.7496, 1.7183, + 1.8942, 1.6574, 1.7160, 1.6588, 1.6898, 1.6397, 1.6588, 1.7767, 1.7230, + 1.7033, 1.7254, 1.6983, 1.6907, 1.6416, 1.7969, 1.7425, 1.7828, 1.7353, + 1.7485, 1.6252, 1.7266, 1.7510, 1.7018, 1.6580, 1.7274, 1.6662, 1.6723, + 1.6758, 1.6462, 1.8060, 1.6850, 1.6715, 1.5962, 1.8119, 1.7262, 1.6277, + 1.8163, 1.7033, 1.6744, 1.7379, 1.6901, 1.7661, 1.6420, 1.7525, 1.6696, + 1.6702, 1.7037, 1.6573, 1.7341, 1.7748, 1.6981, 1.6995, 1.7189, 1.6223, + 1.6765, 1.7105, 1.7716, 1.7087, 1.7001, 1.7687, 1.8099, 1.7073, 1.6350, + 1.7793, 1.6807, 1.7039, 1.6787, 1.6660, 1.7035, 1.7318, 1.7495, 1.7426, + 1.7035, 1.7014, 1.7568, 1.7150, 1.7569, 1.7416, 1.6580, 1.7700, 1.7185, + 1.7293, 1.7545, 1.4643, 1.7184, 1.7745, 1.7545, 1.7200, 1.6514, 1.6298, + 1.6485, 1.7160, 1.6818, 1.6670, 1.7770, 1.7118, 1.7166, 1.7775, 1.7387, + 1.6928, 1.6731, 1.6660, 1.7970, 1.7641, 1.7415, 1.6976, 1.6877, 1.7292, + 1.7794, 1.6900, 1.7106, 1.7634, 1.7742, 1.5609, 1.6714, 2.4107, 1.7411, + 1.7388, 1.7277, 1.6532, 1.7926, 1.7386, 1.7353, 1.7197, 1.6594, 1.7166, + 2.9263, 1.6523, 1.6338, 1.6537, 1.6623, 1.8019, 1.7601, 1.7041, 1.5999, + 1.6747, 1.6999, 1.7111, 1.7555, 1.6907, 1.7676, 1.6870, 1.7656, 1.6681, + 1.6671, 1.7032, 1.6807, 1.6889, 1.7178, 1.7326, 1.6222, 1.6898, 1.7453, + 1.6968, 1.7163, 1.7438, 1.7356, 1.7753, 1.7192, 1.6955, 1.7155, 1.6026, + 1.7237, 1.7517, 1.6404, 1.6831, 1.6877, 1.7579, 1.7780, 1.6317, 1.8032, + 1.6840, 1.6494, 1.7386, 1.6764, 1.7246, 1.7601, 1.7989, 1.7198, 1.7053, + 1.7228, 1.7097, 1.8476, 1.6812, 1.6919, 1.7276, 1.6427, 1.6771, 1.6796, + 1.6661, 1.7377, 1.7073, 1.7061, 1.6469, 1.7119, 1.6620, 1.7859, 1.6810, + 1.6796, 1.7163, 1.6304, 1.6605, 1.7343, 1.6836, 1.6586, 1.6460, 1.7383, + 1.6565, 1.6995, 1.6848, 1.6886, 1.7302, 1.6319, 1.7084, 1.6455, 1.6820, + 1.7227, 1.6099, 1.6801, 1.7885, 1.7595, 1.7140, 1.7157, 1.8043, 1.6373, + 1.7648, 1.7183, 1.7589, 1.7482, 1.7072, 1.6913, 1.7318, 1.7578, 1.7108, + 1.6067, 1.7158, 1.7523, 1.6791, 1.7145, 1.7314, 1.7042, 1.7376, 1.6796, + 1.8628, 1.7177, 1.7367, 1.6759, 1.7337, 1.7052, 1.6002, 1.7024, 1.7880, + 1.0642, 1.6845, 1.7041, 1.6755, 1.6165, 1.7929, 1.6193, 1.6957, 1.6715, + 1.5950, 1.6571, 1.6822, 1.6901, 1.7022, 1.7024, 1.7112, 1.6735, 1.6967, + 1.6778, 1.6575, 1.7305, 1.8147, 1.6804, 1.6963, 1.6868, 1.7943, 1.5572, + 1.6860, 1.7039, 1.7471, 1.6538, 1.6997, 1.7835, 1.8064, 1.7628, 1.7780, + 1.6053, 1.6869, 1.7610, 1.7246, 1.7329, 1.7138, 1.6687, 1.6806, 1.6728, + 1.6817, 1.7294, 1.7214, 1.7234, 1.7492, 1.6789, 1.7357, 1.7378, 1.7139, + 1.7068, 1.7499, 1.6821, 1.7469, 1.6772, 1.6623, 1.6863, 1.7077, 1.6838, + 1.6427, 1.6845, 1.6781, 1.7001, 1.6867, 1.6507, 1.7201, 1.7097, 1.7876, + 1.6753, 1.7640, 1.7407, 1.7754, 1.6692, 1.7506, 1.7908, 1.7180, 1.7826, + 1.7048, 1.7631, 1.7375, 1.7276, 1.6960, 1.6728, 1.6715, 1.6916, 1.6660, + 1.8672, 2.4371, 1.7775, 1.7589, 1.6740, 1.6990, 1.6844, 1.7150, 1.6511, + 1.7197, 1.7364, 1.6682, 1.6788, 1.7742, 1.7689, 1.6509, 1.7058, 1.5937, + 1.6183, 1.7320, 1.6366, 1.7162, 1.6393, 1.6547, 1.6122, 1.6934, 1.7644, + 1.7195, 1.7261, 1.5334, 1.6655, 1.7531, 1.6899, 1.7284, 1.7253, 1.7140, + 1.7599, 1.7455, 1.6877, 1.7144, 1.7063, 1.7585, 1.6483, 1.7034, 1.7872, + 1.6436, 1.6991, 1.7315, 1.6869, 1.7266, 1.6941, 1.6680, 1.7382, 1.7115, + 1.6838, 1.7406, 1.7487, 1.7284, 1.6998, 1.7120, 1.7665, 1.7601, 1.7432, + 1.6692, 1.6749, 1.7034, 1.7852, 1.6845, 1.7553, 1.6064, 1.6902, 1.6817, + 1.7528, 1.7495, 1.6978, 1.7445, 1.7844, 1.7044, 1.6215, 1.8064, 1.6947, + 1.7256, 1.7462, 1.6445, 1.7153, 1.7068, 1.6898, 1.7033, 1.6971, 1.6012, + 1.7222, 1.6898, 1.6337, 1.6189, 1.7169, 1.6581, 1.6849, 1.7840, 1.7095, + 1.7210, 1.6652, 1.6866, 1.7299, 1.7457, 1.6161, 1.6876, 1.6810, 1.6921, + 1.7402, 1.6973, 1.6989, 1.7322, 1.6970, 1.6480, 1.6529, 1.7896, 1.7318, + 1.7364, 1.6680, 1.7424, 1.7295, 1.6991, 1.7154, 1.7562, 1.7087, 1.8444, + 1.7505, 1.7476, 1.6686, 1.7369, 1.7072, 1.7248, 1.6937, 1.7070, 1.6945, + 1.7787, 1.7125, 1.7263, 1.7831, 1.6013, 1.7178, 1.6786, 1.7872, 1.6886, + 1.8013, 1.6767, 1.8089, 1.5642, 1.7826, 1.7168, 1.7179, 1.7068, 1.7395, + 1.6109, 1.8325, 1.7497, 1.7236, 1.6414, 1.7245, 1.6947, 1.7587, 1.6420, + 1.6678, 1.7057, 1.6923, 1.7012, 1.6248, 1.7218, 1.7242, 1.6999, 1.6844, + 1.7597, 1.7158, 1.7671, 1.7628, 1.7141, 1.7269, 1.7585, 1.7083, 1.7444, + 1.7899, 1.7588, 1.7001, 1.7858, 1.6202, 1.7109, 1.7387, 1.7646, 1.8021, + 1.6163, 1.7132, 1.7312, 1.8329, 1.9605, 1.6433, 1.7724, 1.7457, 1.6874, + 1.7289, 1.7210, 1.7764, 1.7046, 1.6804, 1.7570, 1.7835, 1.6932, 1.7622, + 1.7469, 1.6904, 1.6917, 1.6915, 1.7520, 1.7008, 1.6287, 1.7597, 1.6486, + 1.6401, 1.7387, 1.6101, 1.5982, 1.7633, 1.7166, 1.7213, 1.7841, 1.6996, + 1.6279, 1.6920, 1.7200, 1.7078, 1.6533, 1.5925, 1.7321, 1.7587, 1.6523, + 1.7181, 1.6851, 1.7170, 1.7081, 1.7212, 1.7277, 1.7332, 1.8115, 1.6485, + 1.7706, 1.6613, 1.6623, 1.6581, 1.7324, 1.6186, 1.7720, 1.6911, 1.7587, + 1.7309, 1.6524, 1.7619, 1.6940, 1.6903, 1.7228, 1.7862, 1.6758, 1.6580, + 1.6437, 1.7973, 1.6737, 1.7409, 1.6853, 1.7406, 0.3935, 1.6961, 1.6231, + 1.7050, 1.5714, 1.6899, 1.7399, 1.6476, 1.6983, 1.7059, 1.6509, 1.7344, + 1.6900, 1.7586, 1.6770, 1.6974, 1.6979, 1.6864, 1.7182, 1.7086, 1.6912, + 1.7265, 1.7103, 1.7262, 1.7304, 1.6552, 1.6509, 1.6989, 1.7162, 1.7130, + 1.7572, 1.7408, 1.7483, 1.6416, 1.6755, 1.7099, 1.6839, 1.7110, 1.6536, + 1.6486, 1.7170, 1.6966, 1.6698, 1.7294, 1.7431, 1.5646, 1.7819, 1.7805, + 1.7427, 1.7266, 1.7108, 1.7271, 1.6740, 1.6893, 1.7412, 1.6017, 1.7220, + 1.7766, 1.7968, 1.7975, 1.7024, 1.7743, 1.6766, 1.6991, 1.7311, 1.7651, + 1.7055, 1.6984, 1.7207, 1.6710, 1.6696, 1.7045, 1.8250, 1.7300, 1.6860, + 1.7420, 1.7058, 1.7018, 1.7314, 1.8237, 1.7121, 1.6656, 1.6345, 1.6813, + 1.6482, 1.6091, 1.6466], device='cuda:1', requires_grad=True)Parameter containing: +tensor([ 1.3478e-01, 4.9497e-02, -1.9917e-01, -2.5184e-02, 1.8369e-01, + 1.4607e-01, 1.4084e-01, 6.8528e-02, -1.5947e-01, -6.2207e-02, + 7.6121e-02, 3.2098e-03, -1.8940e-01, -4.3906e-02, -1.2922e-01, + -4.1989e-02, -2.0775e-02, -6.5565e-03, 7.1063e-05, 4.6815e+00, + -3.8654e-02, 1.2217e-01, 4.5751e-02, 1.4584e-02, -3.0041e-02, + -1.6267e-02, 6.6473e-02, 1.1673e-02, 6.2238e-02, -2.7864e-02, + 6.8211e-02, 4.2821e-02, -2.0303e-02, 1.4535e-01, 7.2629e-02, + -9.8829e-02, 1.5733e-02, 1.1677e-02, -1.0973e-01, 1.0373e-01, + 5.3048e-02, 4.4230e-02, -1.5395e-02, 2.9456e-02, -2.1916e-02, + -1.3948e-02, 7.3127e-02, -2.4933e-02, 8.8659e-02, 9.9079e-02, + -5.0301e-02, -5.6772e-02, 7.0892e-02, -7.0100e-02, 2.1891e-02, + 1.0014e-01, -4.8898e-02, -1.2617e-01, 4.7462e-02, 5.8843e-02, + 7.5138e-02, -7.0855e-02, 8.9971e-02, 3.2970e-02, 8.8290e-02, + 5.7166e-03, 1.2110e-01, 1.0937e-01, -2.0420e-02, 2.1111e-01, + 1.3070e-01, 9.3519e-02, 7.5941e-01, 1.3822e-01, -1.2845e-01, + 7.7136e-02, 5.3268e-02, -1.1566e-01, -9.7905e-03, 6.5708e-02, + -5.1334e-04, 1.6957e-02, -7.0916e-02, 1.5199e-01, -1.2595e-02, + -4.9605e-02, 1.3342e-01, 1.0708e-01, -3.1504e-02, 5.3242e-03, + -2.3024e-02, 2.6981e-02, 4.4238e-02, -6.3475e-02, -1.1366e-01, + -2.3226e-02, 3.2116e-02, -1.0859e-01, -1.1022e-01, 3.6969e-02, + 3.9728e-02, -7.8961e-02, -6.6003e-02, -3.1561e-02, 6.1650e-02, + 8.0293e-03, 6.8628e-02, -4.7751e-02, 6.1943e-04, -4.6869e-02, + -6.5288e-02, -4.3023e-02, -5.4249e-02, 5.9192e-02, 1.2167e-01, + -5.0382e-02, -5.4665e-02, -1.0046e-02, 1.7697e-02, 1.4857e-01, + -6.6598e-02, -2.7876e-01, -1.4727e-01, 2.0667e-02, -1.5421e-01, + -4.0336e-02, -1.7894e-01, 7.5444e-02, 2.9734e-02, -6.8246e-02, + -1.3805e-01, 2.5628e-02, -5.8404e-02, -1.9855e-02, -1.0818e-01, + 5.1029e-02, -2.3740e-02, -2.3311e-02, -4.2155e-02, -6.1693e-02, + -8.1013e-04, 2.0931e-02, 2.0602e-02, -3.5260e-02, 1.0996e-01, + -3.2859e-02, -1.4331e-02, -3.7377e-03, -1.2704e-01, 1.7471e-01, + 2.6021e-01, -8.9699e-02, -1.3109e-01, -1.8212e-02, -3.0574e-02, + -5.7635e-01, 1.9360e-02, -5.0405e-03, -3.5920e-02, 4.8079e-02, + 1.9668e-01, -1.7959e-01, -2.4585e-02, 2.3465e-02, 1.3495e-02, + 2.6305e-02, -4.9910e-02, 7.2364e-02, -3.3804e-02, 4.1981e-02, + -3.9400e-02, 9.3760e-02, 1.3973e-03, -1.1626e-03, 7.6308e-02, + -1.9463e-01, -4.3892e-02, -1.6021e-02, -1.9729e-02, 6.7893e-02, + -6.5787e-02, -5.7712e-02, 1.4709e-01, 6.0649e-02, 4.0690e-02, + -1.8589e-01, 4.2896e-02, 7.1578e-01, -7.9182e-02, -8.3033e-02, + -8.5046e-02, -1.4351e-01, 9.7572e-02, 1.8969e-01, -8.5853e-02, + 1.7470e-02, -3.4782e-02, -1.6035e-01, 3.4463e-01, 6.4275e-02, + -2.2809e-01, -5.6076e-02, -2.3670e-02, 8.6391e-02, 4.4294e-02, + 1.7742e-01, -1.4438e-01, 1.0222e-01, 1.5188e-01, -6.5791e-02, + 4.2606e-02, 2.7326e-01, -3.9982e-02, -6.7293e-02, -2.9540e-02, + 2.9845e-02, -4.9459e-02, 4.1518e-02, -1.2998e-01, -2.7670e-01, + 5.9885e-03, -2.6303e-02, 1.0029e-01, 6.5153e-02, 2.8001e-03, + -2.1397e-02, 8.1904e-02, 1.3961e-02, 1.6888e-01, -4.7473e-02, + -2.2614e-02, 7.6486e-02, 1.7701e-03, 7.7329e-02, -1.2749e-02, + 1.8841e-02, -1.0814e-02, 9.6482e-02, 7.9134e-02, 1.1832e-02, + 9.7106e-02, 1.4173e-01, -3.1437e-02, 7.6158e-02, 2.8705e-02, + 9.3654e-03, 5.6769e-02, -3.9432e-02, 8.9598e-03, 3.7227e-02, + -2.9699e-02, 2.5160e-02, 1.2401e-01, -1.3756e-01, 1.6073e-01, + -7.9136e-02, -1.2519e-02, -1.4151e-01, 5.5290e-02, 2.9537e-02, + -6.5784e-02, -1.6376e-02, -9.2159e-03, -4.8453e-03, -5.9228e-02, + 4.9723e-02, -3.6096e-02, -1.2492e-02, -6.2044e-02, -7.8530e-02, + -6.7506e-03, 5.7670e-02, 9.7309e-02, -1.9286e-01, -2.4384e-02, + 8.4430e-02, -8.3274e-02, 1.8163e-02, 9.2198e-02, -1.4828e-01, + 1.1023e-01, 2.1166e-01, -5.7077e-02, 7.9818e-02, 3.2513e-02, + 7.4148e-02, 6.8250e-02, -1.0043e-02, 2.2324e-01, -1.5617e-01, + -9.4417e-02, 1.2959e-02, 1.2156e-01, 5.6972e-02, 9.1363e-02, + -5.3003e-02, 2.4945e-01, -3.0141e-02, 1.0665e-01, 6.6269e-02, + 5.7760e-02, 8.7219e-02, -2.8721e-02, 1.3285e-01, 1.1745e-02, + 2.0377e-02, 4.3067e-02, -9.6992e-02, 3.5457e-02, -7.1593e-02, + 8.9201e-02, 3.3648e-02, -4.9995e-02, -8.9329e-02, 1.1568e-02, + -1.2160e-02, 2.4013e-02, 1.0378e-01, 2.1693e-01, -2.2300e-02, + -1.3594e-01, 2.4443e-01, -8.2670e-02, -3.2104e-02, -8.0759e-01, + -2.5522e-01, 3.1634e-02, 1.2173e-02, 5.3848e-02, -1.6714e-01, + 2.2496e-01, -2.5352e-02, 1.1204e-01, -6.9995e-02, 1.4671e-01, + 7.5391e-02, 1.2656e-02, -7.6575e-04, -1.0763e-02, -3.7951e-02, + -8.5652e-02, 1.6227e-03, -1.0500e-01, -6.3366e-02, 1.4564e-01, + 2.8785e-02, 7.8663e-02, 1.4579e-01, 5.0819e-02, -4.8727e-02, + -6.2212e-02, 8.5918e-02, 1.3938e-02, 1.6487e-01, -5.1246e-02, + -7.7145e-02, -6.3459e-02, -6.9927e-05, -8.5017e-02, -7.5265e-02, + -1.3347e-02, -4.1582e-02, -1.3234e-03, -1.2145e-01, -6.8723e-03, + -3.2144e-02, 5.8249e-02, 1.8110e-01, 1.0470e-01, 1.0201e-01, + 8.5427e-02, -1.7104e-02, -2.0914e-02, -4.8454e-02, 9.2019e-03, + 4.8977e-02, 9.0650e-02, 1.0401e-01, -4.5989e-02, -2.2640e-02, + -1.0117e-01, 1.3499e-01, 4.5189e-02, 7.2833e-03, -5.6661e-02, + -3.6711e-02, -2.9658e-02, -7.0319e-02, -9.1909e-02, -2.3429e-02, + 6.0404e-03, -4.5523e-02, -3.0660e-02, -3.0381e-02, 1.5182e-02, + -7.5460e-03, -1.4732e-02, -4.3778e-03, -4.7716e-02, 1.3978e-02, + 9.6924e-02, -6.2953e-03, 7.1078e-02, -1.9884e-02, -1.0224e-01, + -1.7645e-01, -6.0434e-02, 7.0719e-02, 1.1863e-02, -1.4188e-02, + -8.5998e-02, 8.1130e-02, 5.8946e-02, -8.1932e-02, -1.1164e-02, + -9.0257e-01, 2.2129e-02, 5.9473e-02, -6.6727e-02, -3.7941e-02, + 7.0465e-03, -1.1116e-02, 5.4991e-02, -1.0147e-01, -1.0370e-01, + 5.7779e-02, -1.7909e-01, -1.2021e-01, 7.5867e-02, -2.8435e-02, + 2.3324e-02, -3.7433e-02, -2.6374e-02, 1.0537e-01, 1.8956e-01, + -1.8281e-02, 9.0064e-03, 9.5987e-02, 2.0872e-01, -1.0442e-01, + -1.9331e-02, -3.1765e-02, -7.1338e-02, 3.3094e-02, 1.6677e-01, + -1.3918e-01, 3.8584e-02, -1.0807e-01, 1.3928e-01, -2.9507e-02, + 1.3837e-02, -1.9234e-02, -1.1115e-01, 2.1526e-02, 5.8286e-02, + -3.8775e-02, -5.9076e-03, -2.4913e-02, -6.5648e-02, -5.4430e-02, + -3.0729e-02, 9.4677e-02, -1.9277e-01, 5.0177e-02, -2.1330e-02, + -4.6595e-02, 2.3514e-02, -7.6106e-02, 9.4801e-02, -1.6368e-02, + -2.9171e-02, -1.3751e-01, -7.4673e-02, 3.7233e-02, -2.6263e-02, + 6.3678e-02, 1.6571e-01, 1.0001e-02, -3.4022e-02, 8.4699e-02, + 7.3399e-02, -1.3803e-01, 6.7760e-02, 1.6934e-01, -7.5010e-02, + 1.6691e-02, 7.5057e-03, 1.0972e-01, 7.7287e-02, -2.8700e-02, + 1.3740e-02, 1.0455e-02, -2.6215e-02, 7.3498e-02, -1.2539e-01, + -1.8040e-02, -1.8961e-01, 1.2432e-01, -6.3336e-02, 9.7975e-02, + -4.1474e-02, -1.3091e-01, 2.9420e-02, 8.7890e-03, -3.5217e-02, + 4.1313e-02, 1.0067e-01, 9.6062e-02, 4.5217e-02, 1.1410e-05, + -9.3742e-02, -2.5187e-02, -1.3336e-01, 7.0206e-02, -3.5455e-02, + -3.5793e-02, 6.1289e-02, 1.0714e-01, 9.0462e-02, -6.6959e-02, + -5.5268e-02, -2.4237e-02, 9.0868e-02, -2.7395e-04, -6.6102e-02, + -1.4149e-02, 2.2778e-02, -2.2358e-02, 1.6611e-02, -1.3722e-01, + -1.0803e-03, 9.2254e-03, 9.3046e-04, -9.5271e-02, 7.2699e-03, + -1.5842e-02, -6.4155e-02, 2.2247e-02, -5.4963e-03, -1.6438e-02, + -1.1363e-02, 3.0167e-02, -5.5412e-02, 1.5861e-01, 3.8205e-02, + 1.0428e-01, 1.4365e-01, 5.1521e-03, 9.7592e-02, 7.0424e-02, + -5.2993e-03, 6.1979e-02, -1.1234e-01, 1.3335e-01, -3.8977e-03, + 1.5236e-03, 5.9121e-02, -4.6393e-02, -7.4197e-03, 1.5965e-01, + -7.6934e-02, 2.0333e-01, 4.7435e-02, 1.1154e-01, 1.3193e-01, + 1.2094e-01, -2.3863e-02, 1.9543e-02, 2.3810e-02, 2.4418e-02, + 7.7090e-02, 6.6509e-02, -1.1898e-02, -3.2068e-02, 9.7540e-02, + 8.0355e-02, 2.5759e-02, -1.3683e-02, -4.1099e-02, -6.9798e-02, + -1.1555e-01, -9.3035e-02, 9.1930e-02, 1.1443e-01, 1.3131e-01, + -7.3821e-03, 2.6003e-02, -1.0138e-01, 1.0635e-01, 8.4075e-02, + -6.2905e-02, 7.4726e-02, 6.4077e-02, -7.1340e-02, 5.6060e-02, + -1.7636e-02, 1.4522e-02, 6.7943e-02, -4.5894e-02, 8.4953e-02, + -5.2461e-02, -1.8256e-01, -6.3344e-03, -1.4413e-02, -3.7308e-02, + -2.5780e-02, 5.6539e-02, 4.2219e-02, 1.4163e-01, -3.2828e-02, + 3.6561e-02, 6.6173e-02, -7.5909e-02, 1.7378e-02, -8.9888e-02, + -1.4279e-01, 7.4994e-02, 1.5726e-01, 3.1206e-02, -9.0772e-02, + 6.8410e-02, 2.9224e-02, 4.4098e-02, -1.3920e-02, 9.1034e-02, + -4.6151e-02, -5.6685e-02, 5.4332e-02, 7.8448e-02, -1.0195e-01, + 1.2775e-01, -1.4901e-02, 1.1812e-01, 1.7138e-01, 7.7575e-02, + -1.0953e-02, -4.2166e-02, -3.0236e-04, 1.1864e-01, -1.0792e-01, + 5.7050e-02, -6.2270e-02, 9.0958e-02, -6.9540e-02, 9.6204e-02, + 4.0174e-02, -1.6757e-02, 1.3001e-01, 1.4080e-02, 2.0438e-01, + 1.7934e-01, -1.0523e-02, -4.7175e-02, 1.0229e-01, -6.7819e-02, + 7.1270e-02, -9.5283e-02, -5.8948e-02, 1.1191e-01, 1.2083e-01, + -1.7280e-02, 1.4690e-02, -1.0733e-01, -7.0945e-03, 1.0890e-01, + -4.4923e-02, -1.9094e-01, -1.7491e-01, 5.1405e-03, -3.6598e-02, + -2.1371e-01, 6.9781e-02, 8.6824e-02, 7.3522e-02, 5.1444e-03, + 2.4714e-02, -3.9927e-02, -3.7512e-02, 5.3088e-03, 3.9691e-02, + -2.1449e-02, -4.3316e+00, -2.5358e-02, 5.2166e-02, -3.0075e-02, + -1.6318e-01, 2.6883e-02, -7.9553e-02, 3.8043e-02, 8.0910e-02, + -1.0636e-01, 2.9228e-02, -3.2607e-02, -2.1576e-03, -1.1461e-01, + 2.1159e-01, -8.1155e-03, -4.2389e-02, 4.4097e-02, -4.2229e-02, + 1.5469e-01, 8.3980e-02, 2.8703e-02, -1.6500e-02, -1.6388e-01, + -4.1705e-02, 8.7547e-02, -3.8063e-02, -5.4893e-02, 6.7877e-02, + -1.8283e-02, 1.2425e-01, 9.7803e-02, -5.3695e-03, 6.0873e-02, + -4.2723e-02, 1.6263e-02, 4.4332e-02, -8.6944e-02, -4.4148e-02, + -4.8852e-02, -1.3835e-02, 2.1686e-02, -2.0245e-01, 9.2224e-02, + -3.2831e-02, -2.5258e-01, 1.2468e-01, 5.1174e-02, 1.4680e-01, + 3.0071e-02, 6.5667e-02, 1.4609e-01, 1.1543e-01, 3.1377e-02, + 5.9735e-02, -5.4245e-02, 2.8447e-02, -9.4915e-02, -1.0815e-01, + 7.3491e-02, -1.7688e-01, -1.1766e-01, 8.3479e-02, 8.7339e-04, + -4.7636e-02, -1.5777e-01, 3.0539e-02, 1.1327e-01, -7.4810e-03, + -1.7076e-03, -1.1211e-01, -8.4320e-02, -1.8981e-02, 1.2201e-01, + 3.7016e-02, 2.1191e-02, 1.3702e-02, 2.2009e-02, -3.0147e-02, + -4.6553e-03, 5.7443e-02, 1.1077e-01, -1.2032e-01, -1.3606e-01, + -4.3071e-02, 1.1801e-01, -1.3836e-02], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[ 0.0171, 0.0130, 0.0185, ..., 0.0060, -0.0017, -0.0105], + [-0.0364, 0.0154, -0.0142, ..., 0.0086, 0.0068, 0.0017], + [-0.0113, -0.0242, 0.0130, ..., 0.0225, 0.0031, -0.0107], + ..., + [ 0.0166, 0.0227, -0.0178, ..., -0.0059, 0.0132, -0.0079], + [ 0.0282, 0.0054, 0.0172, ..., 0.0009, 0.0006, 0.0101], + [ 0.0041, 0.0177, -0.0183, ..., 0.0003, 0.0102, 0.0056]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.2388, -0.3625, -0.0865, ..., -0.3342, -0.2629, -0.1206], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[ 1.4420e-02, -3.2654e-02, -8.9569e-03, ..., 8.2855e-03, + 1.0498e-02, -1.5457e-02], + [-7.6370e-03, -2.0157e-02, 1.2436e-02, ..., 4.3762e-02, + 3.8452e-02, -2.3422e-02], + [ 1.2445e-04, 2.7905e-03, -6.9084e-03, ..., -7.2594e-03, + 1.1620e-02, 1.2497e-02], + ..., + [ 2.7823e-04, 7.8583e-03, -2.6993e-02, ..., 2.8183e-02, + -4.7226e-03, 4.9896e-03], + [-6.7711e-03, -6.6414e-03, -4.2305e-03, ..., 5.4321e-03, + 1.3855e-02, 1.0252e-05], + [ 2.7084e-03, 7.5684e-03, -7.6370e-03, ..., 2.0428e-03, + -1.5198e-02, -6.1722e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([ 1.6052e-02, 1.0201e-02, -8.6594e-03, -4.4342e-02, -7.0618e-02, + 1.1162e-02, -3.6133e-02, -5.5695e-02, 8.9783e-02, -7.8552e-02, + -4.6814e-02, 4.4342e-02, 2.7924e-02, -2.4399e-02, -2.2797e-02, + 3.2745e-02, -5.3680e-02, -2.0996e-02, -4.4037e-02, -4.6631e-01, + -5.6946e-02, 4.1687e-02, 4.3304e-02, 2.7786e-02, 6.3904e-02, + 7.9956e-02, -6.8481e-02, 2.7100e-02, -2.1866e-02, 5.5389e-02, + 4.5807e-02, 4.8409e-03, -1.7181e-02, 5.1270e-02, 3.7201e-02, + -2.4204e-03, -7.8491e-02, -2.8763e-02, 6.2927e-02, 4.0436e-02, + -2.8854e-02, 2.9388e-02, -2.1698e-02, -6.4819e-02, 9.0942e-03, + 7.4730e-03, 1.0193e-01, -5.1788e-02, -4.5746e-02, -4.4373e-02, + -6.1554e-02, 8.2626e-03, -7.9285e-02, 3.9642e-02, -2.2995e-02, + 5.6549e-02, -3.1433e-02, -2.1362e-03, -6.9031e-02, -1.1696e-02, + 1.2123e-02, 3.1708e-02, -1.4946e-02, -3.5370e-02, -2.2980e-02, + 4.5166e-02, -2.7908e-02, -1.0938e-01, -4.7424e-02, -9.8389e-02, + 6.4636e-02, -2.4658e-02, -1.8875e-02, -4.8370e-02, 4.2480e-02, + -3.7872e-02, 6.0883e-03, 4.2053e-02, 7.9575e-03, 3.3356e-02, + 4.2572e-02, -1.9627e-03, 5.0842e-02, -2.4002e-02, 1.1917e-02, + -5.9631e-02, -3.0804e-03, -7.9346e-02, -7.4585e-02, 1.3626e-02, + -1.1493e-01, -1.7883e-02, -2.7481e-02, 9.0210e-02, 5.0354e-02, + -2.0340e-02, -3.8391e-02, 2.9404e-02, 3.5767e-02, 7.1350e-02, + 6.0577e-02, -1.0492e-01, -7.0251e-02, 3.9551e-02, -5.0720e-02, + -2.0462e-02, 3.2684e-02, -6.1279e-02, 5.0415e-02, -5.6000e-02, + 9.8755e-02, -3.2623e-02, -8.6487e-02, -3.4210e-02, -4.0344e-02, + 2.0935e-02, 4.9530e-02, 4.4525e-02, -4.6234e-02, -7.0152e-03, + -1.1536e-02, 4.9133e-03, -4.8599e-03, 1.8417e-02, 1.7548e-02, + 1.2192e-02, -9.1248e-03, 1.2292e-01, 5.0293e-02, 9.3155e-03, + -6.1218e-02, 6.2073e-02, 2.1805e-02, 2.1942e-02, -1.9531e-02, + 2.4841e-02, 3.4428e-03, -4.9530e-02, 7.4890e-02, 1.9943e-02, + 1.0791e-01, -4.0833e-02, 1.1581e-02, -3.7708e-03, -1.5732e-02, + -1.6241e-03, -2.5009e-02, -5.6244e-02, 6.9389e-03, -1.8646e-02, + 2.1500e-02, -1.2802e-02, 2.7405e-02, -5.1270e-02, -5.3192e-02, + 3.5431e-02, 3.1242e-03, 6.0120e-02, -6.2988e-02, 1.6846e-02, + 1.5945e-02, -4.3335e-03, 2.6749e-02, -3.8116e-02, -5.8624e-02, + 2.7939e-02, 1.1310e-01, -1.2802e-02, 7.2266e-02, 1.4105e-03, + 9.4452e-03, -6.7078e-02, -1.2672e-02, -1.2039e-02, -1.0199e-01, + -3.8513e-02, -2.9129e-02, -9.3140e-02, 1.2561e-01, 1.0223e-01, + -5.7129e-02, 9.0790e-03, -3.0251e-03, -2.7313e-02, -2.7008e-02, + 4.2908e-02, -8.0383e-02, -1.9067e-01, 2.3300e-02, 3.2776e-02, + -1.1169e-01, -1.4175e-02, 7.6828e-03, 8.9722e-03, 1.9699e-02, + 7.4463e-02, 1.0376e-01, -5.2094e-02, 1.1694e-01, 7.2388e-02, + 4.7424e-02, 2.6474e-02, -2.7180e-03, -7.5256e-02, -3.9703e-02, + 2.1324e-03, 2.1332e-02, -9.0027e-02, -4.5044e-02, 1.4114e-02, + -1.3870e-02, -3.0945e-02, 2.5883e-03, 1.4809e-02, 6.4758e-02, + 2.8717e-02, 4.7546e-02, -5.4199e-02, 4.0375e-02, 4.5380e-02, + 3.6682e-02, 1.3125e-04, 1.9722e-03, -5.7892e-02, -2.1072e-02, + 9.9304e-02, -8.0811e-02, 5.3162e-02, 4.6875e-02, 4.6921e-03, + -4.1580e-03, -4.0436e-02, 1.0590e-02, -3.3661e-02, -1.3885e-02, + -1.2598e-01, 4.5563e-02, -2.5955e-02, -1.8158e-02, 5.2521e-02, + 3.5950e-02, 6.2805e-02, 1.1520e-02, -6.6223e-03, -5.7831e-02, + -1.7944e-02, -4.5135e-02, 2.9724e-02, -3.3203e-02, -4.0932e-03, + -8.1177e-02, -2.3438e-02, 1.3756e-02, 1.4565e-02, -3.3661e-02, + -3.1235e-02, -3.8910e-02, -5.9113e-02, -1.2201e-01, -6.6605e-03, + -7.9895e-02, -8.3252e-02, -5.2246e-02, -1.0483e-02, 4.4739e-02, + -1.0933e-02, 2.1057e-02, 1.6663e-02, -2.7435e-02, -6.3591e-03, + 4.2694e-02, -8.2153e-02, 1.5106e-02, -2.7939e-02, 8.7708e-02, + 3.4271e-02, 5.5962e-03, 1.5182e-02, -1.1713e-01, -4.7211e-02, + -1.8417e-02, -5.8868e-02, -4.3823e-02, 4.2877e-02, 3.0701e-02, + -4.4586e-02, -9.9060e-02, 6.2744e-02, 3.6945e-03, -2.0569e-02, + 1.7212e-02, 6.6589e-02, 6.5422e-03, -7.2899e-03, 2.0370e-02, + -1.2108e-02, 1.3697e-04, 1.3647e-03, -6.7200e-02, -3.7567e-02, + -2.0752e-02, -2.4338e-02, -8.2886e-02, 6.1127e-02, -7.6172e-02, + 5.2216e-02, -8.7402e-02, 7.6538e-02, -7.1777e-02, 1.4297e-02, + 1.1879e-02, 1.1810e-02, -7.9956e-02, -1.0797e-01, -1.4229e-02, + 1.2405e-02, -3.7964e-02, 4.5685e-02, -3.7415e-02, 4.2343e-03, + -1.7639e-02, -4.4647e-02, 1.6220e-02, 2.0432e-02, 2.0430e+00, + -9.8450e-02, 5.6702e-02, -2.4724e-04, -3.8849e-02, 3.6133e-02, + -1.4502e-01, 8.0719e-03, 9.3689e-03, -4.4785e-03, 5.9845e-02, + -1.1639e-01, -3.2776e-02, 9.5276e-02, 4.3732e-02, 6.3110e-02, + 4.8370e-02, -2.0233e-02, -7.6355e-02, 4.1771e-03, -3.2715e-02, + 8.4778e-02, 3.7781e-02, 3.6287e-04, -4.7974e-02, -3.3478e-02, + 1.0468e-01, -8.3069e-02, -3.0289e-02, -3.8116e-02, 1.7334e-02, + 5.9753e-02, -2.5528e-02, -1.6190e-02, 3.8509e-03, 8.3313e-02, + 1.1879e-02, 4.3854e-02, 4.4739e-02, 7.8186e-02, 3.1464e-02, + -1.2276e-02, -7.3059e-02, 2.1149e-02, 8.8959e-03, -3.4760e-02, + 3.8452e-02, 6.4636e-02, 5.7465e-02, 1.4854e-02, -4.1718e-02, + 2.0065e-03, 1.3435e-02, 1.7014e-03, 1.0608e-01, -4.0245e-03, + 1.0063e-02, -2.1152e-03, -1.8509e-02, -1.0760e-01, 6.1249e-02, + -5.3650e-02, 6.3896e-03, 5.0262e-02, 4.8462e-02, 7.9775e-04, + -2.6413e-02, 5.1086e-02, 8.9417e-03, -4.1931e-02, 3.2837e-02, + -4.6387e-02, -7.0251e-02, -1.3779e-02, 6.0699e-02, -3.0365e-02, + 2.9541e-02, -1.7227e-02, -6.7749e-02, -4.4891e-02, -2.4643e-03, + -2.8870e-02, -4.9438e-02, 1.7319e-02, 1.7258e-02, 1.5808e-02, + 8.2825e-02, -3.2990e-02, 2.6413e-02, -2.1942e-02, 1.0645e-01, + 1.2561e-01, -1.4076e-02, -9.6741e-03, -9.7580e-03, 6.7444e-02, + -3.7201e-02, 6.8420e-02, -3.1036e-02, -4.1748e-02, 3.4119e-02, + 1.0010e-02, 3.4485e-02, 2.0081e-02, -8.8623e-02, -2.4200e-02, + 1.2131e-02, 1.6129e-02, -9.3323e-02, -8.3862e-02, 2.7252e-02, + 4.3854e-02, -7.9422e-03, 3.7811e-02, -1.9348e-02, 8.6670e-03, + 1.9474e-03, 2.4765e-02, 4.2084e-02, -7.7095e-03, -5.0171e-02, + -3.3722e-02, 6.4049e-03, 2.3300e-02, 1.6434e-02, 2.4323e-02, + 1.8066e-02, 1.6968e-02, -6.0211e-02, 4.5700e-03, -3.1342e-02, + 1.0109e-02, 1.3588e-02, 4.6265e-02, -3.0106e-02, 9.6924e-02, + 3.7048e-02, -5.9753e-02, -6.7596e-03, -4.4434e-02, -7.4463e-02, + 4.8553e-02, -1.9760e-02, 1.2524e-01, 6.9160e-03, 1.3672e-02, + -1.3000e-01, 4.5943e-04, -9.0820e-02, 3.8757e-02, -1.0222e-04, + 1.3878e-02, 1.1604e-02, -3.9154e-02, -3.4668e-02, -3.1624e-03, + -2.1835e-02, 4.1443e-02, 1.1955e-02, -5.5176e-02, 4.9286e-02, + -9.8206e-02, 2.6184e-02, -2.0859e-02, -4.6356e-02, -7.5500e-02, + 4.2686e-03, -9.6497e-02, 5.5054e-02, 8.9111e-03, 1.0323e-02, + -4.2419e-02, -4.8828e-02, -1.3281e-01, -6.1874e-03, -4.8584e-02, + 8.8549e-04, 3.3447e-02, -3.6865e-02, -1.9928e-02, 1.1330e-02, + 2.9358e-02, -1.9515e-04, 1.3184e-01, 1.6724e-02, -3.0303e-04, + -6.4575e-02, 1.5656e-02, -8.7585e-02, -5.6702e-02, -2.4384e-02, + 4.3091e-02, 8.2886e-02, -1.2524e-01, -4.3416e-04, 6.2866e-02, + 2.0966e-02, -2.7939e-02, 3.7292e-02, 7.5256e-02, -8.3191e-02, + -5.0659e-03, 3.5877e-03, 1.8738e-02, -4.9896e-02, -2.5497e-02, + 5.1056e-02, 1.6586e-02, 2.5009e-02, 8.5754e-02, -6.3904e-02, + -3.8025e-02, 5.7587e-02, -3.1403e-02, 6.9153e-02, 8.4900e-02, + 5.5817e-02, -5.0079e-02, 7.1350e-02, -1.9592e-02, 2.8137e-02, + 9.0759e-02, -2.9099e-02, 8.1558e-03, -4.1687e-02, 2.2964e-02, + 6.1798e-02, 2.2781e-02, 1.8173e-02, -3.5126e-02, 5.9013e-03, + 1.9638e-02, 4.8462e-02, -3.1799e-02, -4.3488e-02, -7.4005e-03, + -1.4023e-02, -7.3730e-02, -7.4219e-02, -1.8799e-02, -4.6234e-02, + 1.5388e-02, -7.0740e-02, -5.9479e-02, 5.1666e-02, -3.6072e-02, + -9.7561e-04, -2.4204e-03, -5.8632e-03, -5.8014e-02, 1.7624e-02, + -2.3365e-03, 6.2683e-02, 2.9175e-02, -1.5945e-03, 5.1575e-02, + 7.2754e-02, -1.0004e-01, 2.3239e-02, 3.6041e-02, -1.4709e-02, + -8.6823e-03, -4.5654e-02, 1.0431e-01, 1.8219e-02, 2.9541e-02, + -2.7130e-02, 8.1238e-02, -1.9852e-02, -4.0070e-02, -4.2297e-02, + -1.0567e-02, -4.9377e-02, -5.4550e-04, -3.5645e-02, 3.8269e-02, + 3.2806e-02, 5.8533e-02, -5.9998e-02, 1.0443e-03, 5.3501e-04, + 4.5868e-02, 4.1819e-04, -9.7778e-02, 3.9703e-02, 3.8971e-02, + 3.3508e-02, 3.2593e-02, -1.0040e-02, 2.6093e-02, -2.0981e-02, + -1.0513e-02, 4.8889e-02, 7.7087e-02, -4.7363e-02, -2.3239e-02, + -8.3435e-02, -3.1311e-02, 3.3936e-02, -6.5979e-02, 3.6041e-02, + 1.8372e-02, -3.1158e-02, 3.0701e-02, -1.1646e-01, -8.1238e-02, + -1.7509e-03, 4.7684e-03, 2.2217e-02, -4.8370e-02, 3.3630e-02, + -5.8563e-02, 3.6591e-02, -8.4763e-03, -1.1703e-02, 9.7122e-03, + 3.4271e-02, -2.8183e-02, 2.9190e-02, 2.5452e-02, -5.9998e-02, + 8.0719e-03, -5.5542e-02, -1.9007e-03, -3.5919e-02, -1.4893e-02, + -4.0436e-02, -8.2458e-02, 5.3223e-02, 4.0588e-02, 9.2545e-03, + 5.9143e-02, 2.1530e-02, 6.4209e-02, 1.4913e-04, -2.3804e-02, + 2.4689e-02, -3.0727e-03, 1.7593e-02, -5.5603e-02, -1.2331e-03, + -5.0171e-02, -3.5004e-02, 3.3600e-02, 2.1820e-02, -2.1591e-02, + 2.4460e-02, -4.1412e-02, -2.2293e-02, -4.1618e-03, 2.1927e-02, + -3.3741e-03, 7.3929e-03, 6.1083e-04, -4.9622e-02, 5.9509e-03, + -2.7496e-02, 4.1455e-01, 4.4556e-02, 2.9053e-02, 3.4912e-02, + -3.5065e-02, 3.6102e-02, 2.4399e-02, 8.8135e-02, -2.7924e-02, + -3.8391e-02, -2.7908e-02, 3.9490e-02, -6.0638e-02, -7.5256e-02, + 8.7402e-02, 1.2520e-02, 4.1046e-02, -4.9408e-02, 2.0691e-02, + 4.8645e-02, 9.2102e-02, -3.4241e-02, -3.1494e-02, -5.8105e-02, + 1.1554e-01, 6.1371e-02, -2.2675e-02, -8.6746e-03, -6.1554e-02, + 3.6621e-02, 6.9122e-03, 3.2318e-02, -7.4829e-02, 8.0322e-02, + 2.4734e-02, 1.1273e-01, 4.0039e-02, -8.9788e-04, -8.0795e-03, + -8.4152e-03, 1.7761e-02, 4.0833e-02, -7.9117e-03, 4.7035e-03, + -2.4872e-03, 2.5681e-02, -2.8564e-02, -3.9398e-02, 7.8049e-03, + -6.6345e-02, -1.0602e-01, 4.6448e-02, -5.2765e-02, 3.7506e-02, + 9.6436e-02, -1.4091e-02, 2.6962e-02, -8.5754e-02, 1.5701e-02, + -3.0655e-02, -1.7639e-02, 2.4582e-02, -4.4098e-02, -1.8066e-02, + 2.7969e-02, 6.6147e-03, -8.4412e-02, 1.3481e-02, -1.1090e-01, + 6.2378e-02, 1.5701e-02, -3.7140e-02, 2.2751e-02, -6.6040e-02, + 3.5492e-02, 2.5757e-02, -7.2937e-02, -2.1805e-02, -7.4158e-03, + -2.7374e-02, 3.4119e-02, 4.5685e-02, -2.6093e-02, -3.7170e-02, + -1.5736e-03, -1.2703e-03, 4.0558e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([2.5576, 2.3304, 2.3711, 2.3977, 1.9550, 2.6222, 2.4071, 2.4198, 2.5433, + 2.2927, 2.3707, 2.4486, 2.3675, 2.2586, 2.4003, 2.5166, 2.4092, 2.3295, + 2.3509, 1.9934, 2.4948, 2.4201, 2.5280, 2.3026, 2.3817, 2.3575, 2.4651, + 2.4091, 2.2956, 2.3569, 2.4069, 2.4324, 2.4017, 2.5549, 2.2891, 2.3673, + 2.5084, 2.3113, 2.2584, 2.4008, 2.2910, 2.5507, 2.4455, 2.3672, 2.4784, + 2.4578, 2.4305, 2.3201, 2.3507, 2.2985, 2.3682, 2.4973, 2.4444, 2.4272, + 2.4040, 2.2344, 2.4056, 2.3216, 2.4079, 2.3590, 2.3579, 2.3738, 2.3978, + 2.3264, 2.5303, 2.4412, 2.5666, 2.3776, 2.4179, 2.1270, 2.4574, 2.4055, + 2.4645, 2.4699, 2.4424, 2.5034, 2.5580, 2.3774, 2.3579, 2.5861, 2.5141, + 2.3922, 2.3188, 2.3972, 2.5189, 2.5155, 2.4887, 2.3563, 2.5517, 2.4372, + 2.3899, 2.4647, 2.5373, 2.5416, 2.4712, 2.4022, 2.2961, 2.5308, 2.4706, + 2.4441, 2.3014, 2.4697, 2.4678, 2.4144, 2.4452, 2.0670, 2.4215, 2.4244, + 2.3358, 2.3543, 2.2992, 2.5740, 2.4565, 2.4551, 2.5621, 2.2902, 2.5656, + 2.5313, 2.4438, 2.4695, 2.3469, 2.4974, 2.4416, 2.3992, 2.4196, 2.2348, + 2.6539, 2.6797, 2.4311, 2.4361, 2.3586, 2.6112, 2.6770, 2.4377, 2.4944, + 2.4049, 2.2290, 2.4017, 2.3639, 2.5649, 2.3928, 2.3823, 2.4417, 2.2946, + 2.4853, 2.3311, 2.3497, 2.5481, 2.4626, 2.5429, 2.4029, 2.3552, 2.3902, + 2.5832, 2.4849, 2.2304, 2.4417, 2.3722, 2.5731, 2.5410, 2.2837, 2.4604, + 2.3329, 2.4226, 2.4918, 2.2607, 2.3743, 2.5165, 2.4731, 2.5513, 2.5786, + 2.3786, 2.2037, 2.4933, 2.2872, 2.5195, 2.3746, 2.4236, 2.2408, 2.4877, + 2.5523, 2.4471, 2.4894, 2.5245, 2.2506, 2.1873, 2.2735, 1.4312, 2.3013, + 2.3922, 2.5230, 2.4725, 2.4565, 2.4358, 2.5594, 2.4528, 2.3324, 2.4306, + 1.2379, 2.2861, 2.4164, 2.3120, 2.3680, 2.5697, 2.4218, 2.4618, 2.4612, + 2.4438, 2.3577, 2.4119, 2.4494, 2.2746, 2.5410, 2.0422, 2.4465, 2.5530, + 2.1844, 2.4634, 2.3859, 2.2563, 2.4984, 2.3779, 1.9990, 2.4829, 2.4556, + 2.5563, 2.4028, 2.3832, 2.3399, 2.2638, 2.4324, 2.4482, 2.3053, 2.2566, + 2.3205, 2.4360, 2.4522, 2.3403, 2.5395, 2.4561, 2.4037, 2.5031, 2.3726, + 2.4524, 2.4662, 2.4363, 2.6728, 2.4845, 2.4196, 2.4342, 2.5506, 2.3502, + 2.4282, 2.4350, 2.5386, 2.4616, 2.4830, 2.3861, 2.5172, 2.5778, 2.3977, + 2.4931, 2.4868, 2.4324, 2.4132, 2.3309, 2.4617, 2.4026, 2.6077, 2.3682, + 2.4094, 2.4648, 2.5450, 2.3728, 2.3849, 2.7174, 2.4184, 2.4798, 2.3318, + 2.4888, 2.4238, 2.2394, 2.2824, 2.5057, 2.5209, 2.3631, 2.4332, 2.3806, + 2.3419, 2.2953, 2.4176, 2.3549, 2.3651, 2.5080, 2.4547, 2.4592, 2.2354, + 2.3696, 2.3371, 2.4121, 2.4274, 2.5341, 2.3987, 2.6333, 2.5166, 2.4138, + 2.5587, 2.5251, 2.3318, 2.3929, 2.3471, 2.5221, 2.4845, 2.4899, 2.5011, + 2.4038, 2.6121, 2.4854, 2.3235, 2.5669, 2.3907, 2.2766, 2.3783, 2.4000, + 0.6357, 2.4000, 2.5624, 2.3160, 2.6602, 2.3403, 2.3703, 2.4046, 2.4510, + 2.4836, 2.4286, 2.1999, 2.6096, 2.4631, 2.3707, 2.3089, 2.4851, 2.3330, + 2.3177, 2.5466, 2.4031, 2.4949, 2.5606, 2.4180, 2.4414, 2.4380, 2.0319, + 2.4428, 2.4251, 2.1127, 2.4867, 2.3711, 2.4772, 2.4604, 2.3391, 2.3586, + 2.4182, 2.3580, 2.4760, 2.4144, 2.4725, 2.5182, 2.3207, 2.6489, 2.4085, + 2.3826, 2.4882, 2.3557, 2.4317, 2.3772, 2.4133, 2.4613, 2.4010, 2.4738, + 2.5751, 2.4574, 2.3113, 2.6367, 2.5771, 2.3439, 2.4362, 2.3548, 2.4980, + 2.3924, 2.4147, 2.3892, 2.5263, 2.7315, 2.4591, 2.4544, 2.3935, 2.3675, + 2.4730, 2.3705, 2.4611, 2.2936, 2.4690, 2.4581, 2.4748, 2.4659, 2.3809, + 2.4476, 2.6219, 2.3810, 2.5058, 2.7186, 2.2634, 2.7011, 2.4224, 2.6094, + 2.2897, 3.4400, 2.3407, 2.3741, 2.3515, 2.5698, 2.5394, 2.4029, 2.4405, + 2.3774, 2.6522, 2.4810, 2.1642, 2.3742, 2.8108, 2.4177, 2.2984, 2.4548, + 2.6247, 2.4611, 2.5190, 2.4920, 2.4434, 2.4291, 2.4715, 2.3702, 2.4663, + 2.2900, 2.4210, 1.7521, 2.3985, 2.3208, 2.4050, 2.4549, 2.4296, 2.3289, + 2.4849, 2.4413, 2.5168, 2.5119, 2.5425, 2.4593, 2.5218, 2.4031, 2.4426, + 2.3513, 2.4843, 2.5100, 2.3698, 2.3297, 2.2840, 2.4772, 2.6122, 2.2475, + 2.4717, 2.2813, 2.5931, 2.3489, 2.3603, 2.3900, 2.4951, 2.4265, 2.4145, + 2.3638, 2.5526, 2.2686, 2.4221, 2.3169, 2.3508, 1.9304, 2.4101, 2.4658, + 2.4240, 2.3683, 2.4086, 2.4019, 2.3520, 2.4472, 2.6275, 2.3913, 2.3408, + 2.3086, 2.4151, 2.3883, 2.4458, 2.3428, 2.4694, 2.3540, 2.4129, 2.4636, + 2.7249, 2.3469, 2.4367, 2.3560, 2.4105, 2.3652, 2.4830, 2.5175, 2.2671, + 2.3230, 2.3949, 2.4571, 2.4217, 2.4320, 2.3076, 2.6991, 2.3035, 2.4677, + 2.4479, 2.4913, 2.3424, 2.4013, 2.4394, 2.3548, 2.3995, 2.3956, 2.4178, + 2.4282, 2.3959, 2.5658, 2.3178, 2.5060, 2.4149, 2.4419, 2.5459, 2.4721, + 2.3397, 2.6064, 2.6917, 2.4959, 2.4903, 2.2762, 2.4950, 2.2810, 2.3614, + 2.4633, 2.4814, 2.3575, 2.5248, 2.3733, 2.3363, 2.4094, 2.5385, 2.3083, + 2.3769, 2.5763, 2.4112, 2.2750, 2.4145, 2.3639, 2.5136, 2.3405, 2.4383, + 2.5640, 2.5239, 2.4995, 2.5569, 2.3197, 2.3858, 2.3962, 2.4429, 2.4544, + 2.3997, 2.3287, 2.4404, 2.3087, 2.3067, 2.4738, 2.3649, 2.5190, 2.5092, + 2.4250, 2.2719, 2.4374, 2.4496, 2.4747, 2.3308, 2.4564, 2.4331, 2.6464, + 2.4636, 2.2174, 2.3275, 2.4728, 2.5065, 2.3889, 2.3736, 2.3614, 2.4190, + 2.3662, 2.5910, 2.2835, 2.4849, 2.4330, 2.5681, 2.5034, 2.3754, 2.2966, + 2.2987, 2.5070, 2.4195, 2.6241, 2.4842, 2.3508, 2.4747, 2.3618, 2.3558, + 2.1667, 2.5238, 2.4680, 2.5973, 2.5392, 2.3654, 2.4019, 2.4716, 2.4832, + 2.5000, 2.5019, 2.3741, 2.3643, 2.3876, 2.3685, 2.6611, 2.4422, 2.4762, + 2.3547, 2.4025, 2.4497, 2.2040, 2.4888, 2.2723, 2.3743, 2.2826, 2.6054, + 2.3190, 2.4891, 2.5162, 2.4549, 2.2425, 2.4536, 2.4128, 2.4304, 2.4707, + 2.5659, 2.3483, 2.2955, 1.8539, 2.3997, 2.3637, 2.3968, 2.4626, 2.3946, + 2.3222, 2.3524, 2.5597, 2.3742, 2.2840, 2.3314, 2.3857, 2.2875, 2.4121, + 2.2458, 2.4827, 2.3292, 2.6354, 2.4387, 2.4942, 2.2355, 2.3579, 2.5134, + 2.2878, 2.3131, 2.5136, 2.2672, 2.4973, 2.4720, 2.3654, 2.5278, 2.4510, + 2.3359, 2.2845, 2.6296, 2.3547, 2.3985, 2.4756, 2.5552, 2.3655, 2.3423, + 2.4334, 2.3119, 2.4998, 2.4868, 2.6313, 2.3357, 2.4385, 2.4794, 2.4030, + 2.3261, 2.4024, 2.4096, 2.4862, 2.5003, 2.4382, 2.5027, 2.4801, 2.4794, + 2.4303, 2.5030, 2.3749, 2.3007, 2.4419, 2.4550, 1.8549, 2.3850, 2.4200, + 2.3704, 2.6410, 2.2996, 2.3996, 2.4752, 2.5489, 2.4231, 2.5352, 2.2041, + 2.6158, 2.4349, 2.3655, 2.7523, 2.3759, 2.4892, 2.3502, 2.2487, 2.4969, + 2.4593, 2.5063, 2.4189, 2.4738, 2.4963, 2.3114, 2.3510, 2.3365, 2.4439, + 2.4067, 2.2807, 2.4631, 2.4164, 2.2501, 2.5119, 2.4704, 2.4727, 2.3767, + 2.4629, 2.4223, 2.4440], device='cuda:1', requires_grad=True)Parameter containing: +tensor([ 2.9932e-02, -3.0162e-01, 1.4128e-01, -3.4646e-01, -2.5106e-01, + -5.6301e-01, 2.2639e-01, 2.4600e-01, 1.6465e-01, -4.2078e-01, + -1.2340e-01, 3.7251e-01, 1.2406e-01, 2.7133e-01, 3.1003e-02, + 1.7342e-01, 1.8523e-01, 1.8913e-01, -1.4909e-01, -3.1221e-01, + -2.9462e-01, 1.4270e-01, 6.6221e-01, -4.7315e-01, 1.8876e-01, + 7.3583e-02, -1.2354e-01, -7.3950e-02, -1.2269e-01, 4.9641e-01, + 3.4733e-01, -3.2025e-02, -2.0315e-02, -4.4567e-01, -2.4298e-01, + -3.3707e-01, -3.7226e-01, 4.7930e-01, 6.1613e-01, -3.1205e-01, + -1.6558e-01, 6.1039e-02, 4.8515e-01, 4.7716e-02, -5.9613e-01, + -6.4904e-01, -4.5658e-01, -2.0424e-01, -1.2793e-01, 3.3709e-01, + -1.8851e-01, -8.0029e-01, -1.7977e-02, -3.8074e-01, -6.6202e-01, + 2.6686e-01, 5.5996e-01, -4.8043e-01, -6.7872e-02, 1.2299e-01, + -9.4741e-02, 5.9958e-02, 4.9218e-02, -4.2103e-01, 2.5196e-01, + 2.4068e-01, -3.8744e-02, -1.3429e-01, -4.8376e-01, -6.1607e-01, + 3.2976e-01, 9.5367e-02, -1.5271e-01, -4.6356e-01, -2.8587e-01, + -4.2759e-01, 6.3393e-01, 2.3609e-02, 1.5916e-01, 8.8886e-01, + 2.6343e-01, 3.4043e-01, -4.8861e-01, -1.8627e-01, -6.9661e-01, + -5.4499e-01, 4.9617e-01, -9.7585e-02, 4.6151e-01, -2.2765e-01, + -3.9411e-03, -1.6364e-01, -5.1623e-01, 2.0387e-01, -1.4929e-01, + -2.7565e-01, 2.2874e-01, -1.8453e-01, -5.3695e-01, 6.6773e-01, + 4.4026e-02, -4.9675e-01, -4.4427e-01, 1.0410e-01, 4.6265e-01, + 2.2047e-01, 4.1624e-01, 1.1387e-01, -6.0574e-02, -1.2546e-02, + 8.7225e-02, 4.3368e-01, -3.9527e-01, -2.3091e-01, 1.4382e-01, + -4.0146e-01, 6.7164e-01, 4.8646e-01, 2.2774e-01, 3.8298e-01, + -1.9689e-01, 2.4062e-01, -5.9467e-02, 2.6072e-01, 3.2621e-01, + -2.5951e-01, 2.8232e-01, 6.0466e-01, -2.6100e-01, 1.8510e-01, + 1.3541e-01, -5.1284e-01, 4.8758e-01, -7.8206e-02, 4.7141e-01, + 1.7501e-01, 1.4147e-01, -1.8194e-01, 3.3316e-01, -3.6307e-01, + -3.6573e-01, 5.3486e-01, -1.8098e-02, -2.2143e-01, 4.6298e-02, + 5.6899e-03, 4.3567e-01, -4.8673e-01, -6.7744e-01, -1.4599e-01, + 9.3934e-02, -1.6618e-02, 2.4010e-01, -5.1935e-01, -2.4059e-01, + 7.7139e-01, 5.8555e-01, -4.2442e-01, -2.0791e-01, -4.0307e-01, + -1.1149e-01, -2.0498e-02, 8.9808e-02, -4.4491e-01, 7.5951e-01, + 4.6165e-02, 3.6412e-01, -1.5113e-01, 3.0061e-01, -4.1646e-01, + 6.1315e-01, 3.3305e-02, -5.2668e-01, 6.5254e-01, 9.5107e-02, + 3.4002e-01, 7.6512e-02, -5.4823e-02, 9.7594e-02, -2.7147e-01, + -7.9573e-01, -9.5936e-02, 6.0138e-01, 6.2805e-01, -1.2078e-01, + 5.6416e-01, -2.2096e-03, 2.1145e+00, -1.0990e-01, -3.9488e-02, + -8.6518e-01, -4.6028e-01, 2.7963e-01, 5.6114e-01, 3.7760e-01, + -9.8124e-02, 1.6267e-01, 3.0374e-02, -5.7425e-01, 1.3610e-01, + -4.6454e-01, -7.3948e-02, 1.4996e-01, -4.4412e-01, -3.2803e-01, + -2.5651e-01, 1.9774e-01, 4.8669e-01, 9.0103e-02, -4.5681e-01, + 2.8155e-01, -1.8632e-01, -8.3922e-01, 2.0943e-01, 2.9468e-01, + -3.3018e-01, -2.1694e-01, -2.5128e-01, -3.5858e-01, 7.1802e-02, + 5.5272e-01, 1.1630e-01, -4.3954e-01, 6.8150e-01, 3.1679e-02, + 2.2199e-01, -1.1738e-01, 3.0026e-01, 1.3040e-01, 2.2225e-01, + -2.4677e-01, -1.0417e-01, 3.3233e-01, 1.9889e-01, -5.1069e-01, + -1.9670e-01, 2.4504e-01, 1.9361e-01, -2.3999e-01, 2.3646e-01, + -6.1190e-02, 4.4584e-03, 1.3534e-01, -1.4524e-01, -3.5619e-01, + -5.1144e-01, -4.9386e-01, 3.5775e-01, -3.1955e-01, 6.4570e-01, + -1.2014e-01, 7.9763e-01, 2.1697e-01, 7.6738e-02, -2.2917e-02, + -5.9375e-01, 5.2489e-01, -6.0439e-01, -2.7954e-01, -5.2342e-01, + 8.9634e-02, 1.5874e-01, 5.5865e-01, 2.5939e-01, -5.7272e-02, + -4.0273e-02, 2.4863e-01, -1.7334e-01, 6.4374e-01, -2.2190e-01, + -3.6028e-01, 1.9930e-01, -3.9872e-01, -3.2904e-01, 1.3150e-01, + -7.3514e-01, -3.3150e-01, 3.8126e-01, -1.5717e-01, -5.4585e-02, + -4.2850e-01, -7.4214e-01, -1.7948e-01, 4.3636e-01, -4.9748e-01, + 4.5393e-01, 1.7344e-01, -9.9264e-03, 3.6427e-01, -7.0329e-02, + -4.7180e-01, 1.2721e-01, 5.0421e-01, -3.5168e-01, -3.9122e-01, + -2.7454e-01, 2.4527e-01, -6.6616e-01, 4.3131e-01, -2.6262e-01, + -3.2642e-01, 8.5823e-02, -5.1454e-02, -1.1051e+00, 4.8101e-01, + 2.7046e-01, -1.9462e-01, 3.6501e-01, -2.0426e-01, -2.7896e-01, + 3.8353e-01, -4.3028e-01, 4.5165e-01, 5.5885e-01, -3.3605e-02, + -1.0840e-01, 5.6463e-01, 4.7314e-01, -5.7062e-02, 5.5220e-01, + -3.7576e-01, -2.8350e-01, 1.8441e-01, -3.4029e-01, 5.2433e-02, + -4.3591e-01, -5.0902e-01, 3.3369e-01, 2.0976e-01, -2.3188e-01, + 4.9477e-01, -6.9597e-02, -2.6938e-01, -5.6728e-02, 6.2881e-01, + 7.3492e-02, -9.3713e-01, 1.1243e-01, 3.7966e-02, -6.5733e-02, + 3.9749e-01, -1.6061e-01, -5.4494e-01, 4.2282e-01, -3.0773e-01, + 7.1377e-01, -6.8213e-01, -1.5114e-01, -1.4101e-01, 1.6538e-01, + 3.5520e-01, 4.3173e-01, -1.6039e-01, 4.6407e-02, -9.5964e-02, + 8.6294e-02, -3.3205e-01, 5.8999e-02, -3.7933e-01, 1.3829e-01, + -5.0132e-01, -4.4829e-01, -8.1570e-02, -5.1014e-01, 3.7524e-01, + -2.9223e-01, 3.9546e-02, 4.7483e-02, 1.9292e-01, 2.2456e-01, + -2.5084e-01, -7.0861e-02, 2.6201e-01, -4.7447e-02, 1.1546e-03, + -1.7990e-01, -7.7817e-02, 4.9409e-01, -4.3644e-01, -3.4852e-01, + 9.1261e-02, -7.7327e-01, -3.1767e-01, 3.4946e-01, 2.7020e-01, + -1.8949e-01, 8.8170e-02, 5.3733e-01, 2.0567e-01, 1.3307e-01, + 5.8523e-01, -7.4638e-01, 6.8873e-01, 4.0128e-01, -5.1895e-01, + -1.4250e-01, 1.2085e-02, -1.7605e-01, -1.8713e-01, 1.7004e-02, + 1.2924e-01, 6.0687e-01, -4.2209e-01, -6.2843e-01, 5.2935e-01, + -6.1243e-02, 2.2317e-01, -6.2447e-02, -2.8592e-01, 8.2672e-01, + 1.1707e-01, -5.4270e-01, 2.9941e-01, 2.8900e-01, 8.6537e-01, + -1.6511e+00, 1.1635e-01, 2.8656e-02, 2.1624e-01, 1.9607e-01, + 5.0981e-01, -4.5917e-01, -1.5322e-01, 4.5360e-02, -5.6081e-01, + 4.8290e-01, 3.4567e-01, -5.8747e-02, -6.8929e-01, 1.2810e-01, + -2.5956e-01, -3.9461e-01, -6.4888e-01, -8.8910e-03, -2.0434e-02, + -1.4205e-01, -2.4369e-02, 5.7545e-01, 1.1698e-01, -3.8967e-01, + 3.4959e-01, 3.5758e-01, 3.2217e-02, -6.6200e-01, -6.6839e-02, + 5.8133e-02, 2.7942e-01, 2.9290e-01, 1.7966e-02, -3.6733e-01, + -9.9340e-02, -1.3521e-01, -2.7681e-01, -4.2879e-01, -3.3071e-01, + -5.7073e-01, -5.8907e-01, -5.0452e-01, 2.0418e-01, 1.0199e-01, + -3.9590e-01, 1.8624e-01, -2.9696e-01, 1.8448e-01, -3.0404e-01, + 4.6199e-01, -3.9756e-01, 3.4744e-01, -2.6387e-01, -1.1653e-01, + 5.9746e-01, 1.8090e-01, -1.2004e-01, -1.1056e-01, -5.3151e-01, + -4.6283e-01, 3.1908e-02, -7.4224e-01, -3.6923e-01, -5.3822e-01, + -4.1702e-01, 2.8470e-01, 4.4310e-01, 1.6097e-01, 1.9126e-01, + -1.3619e-01, 2.4835e-01, 6.0324e-02, 8.2238e-02, 3.8127e-01, + -8.2515e-02, 4.5638e-01, 2.9320e-01, 1.9801e-01, 8.7940e-03, + -2.9512e-01, -5.5449e-01, 1.6342e-02, 2.2033e-01, 5.8513e-02, + 3.4597e-01, -3.2905e-01, -1.1703e-01, 1.9318e-01, 2.9449e-01, + 1.0837e-01, -1.0457e-03, -1.1668e-01, 1.6970e-01, -7.2471e-02, + -4.7684e-01, 9.0855e-02, 7.9242e-02, -8.0585e-02, 3.2629e-01, + 1.2611e-01, 3.2879e-01, -2.5022e-01, -1.6554e-01, 6.3878e-01, + 3.5972e-01, -1.9698e-01, -2.6073e-01, 5.1697e-01, -3.3824e-01, + -4.4351e-01, 1.3762e-01, 1.5554e-01, -3.4920e-02, -1.0653e-01, + 9.6069e-02, -2.2788e-01, 7.0399e-02, 1.6845e-01, 2.9433e-01, + -7.1957e-01, 4.2253e-01, -1.6981e-01, 4.2037e-01, -2.1842e-01, + 2.3953e-01, -3.1829e-01, -8.9034e-01, 7.7953e-01, 5.7918e-01, + 2.7067e-01, -4.5477e-01, -6.4798e-01, -8.2738e-02, 1.8428e-01, + 3.2654e-02, -2.2082e-01, 9.1149e-01, 4.6169e-01, -2.0381e-01, + 2.3860e-01, 4.9382e-01, -6.1471e-02, -4.3793e-01, -3.6538e-01, + -2.2946e-01, -1.1087e+00, 1.6603e-01, 6.7709e-01, -1.9357e-01, + -2.8555e-01, -3.4081e-01, -2.5572e-01, 2.9791e-01, -2.6723e-01, + -6.2435e-01, 4.9662e-01, 1.0662e-01, -4.2844e-01, 5.9660e-01, + 7.0396e-01, 8.5778e-02, -5.7455e-01, -2.1013e-01, 4.0237e-01, + 4.0175e-01, -4.7408e-01, 1.8092e-01, 6.5065e-01, 2.9740e-01, + -2.0762e-01, -1.7106e-01, -3.5587e-01, 5.4142e-01, -3.0241e-02, + 2.4929e-02, -3.9181e-01, 4.0413e-01, -5.2466e-01, -2.1312e-01, + 4.6921e-01, 2.1146e-02, -1.4481e-01, 6.1093e-01, 5.1777e-02, + -1.5702e-02, -1.3893e-01, 1.2269e-02, -4.5805e-01, -5.3720e-01, + -1.7545e-01, -6.6881e-02, -1.4616e+00, -1.8604e-01, 4.2172e-01, + -3.1246e-01, -4.6231e-02, -1.8316e-01, 3.5422e-01, 8.5290e-02, + 3.7975e-01, 5.6355e-01, 2.4853e-01, -3.5697e-01, 4.5364e-01, + -1.7780e-01, -3.3519e-03, -3.2779e-01, 5.3199e-01, -3.6808e-02, + 3.8655e-01, 2.2483e-02, 3.1360e-01, -1.3338e-01, 2.1981e-01, + -4.9110e-01, 6.6587e-01, -1.4109e-01, -1.5661e-01, -1.1334e-01, + -4.6857e-01, 4.8447e-01, -2.2792e-01, 1.3462e-01, -4.0982e-01, + 2.8143e-01, -4.1661e-01, -7.0864e-01, 2.5823e-01, -5.3160e-01, + 1.9616e-01, -1.9649e-01, -4.1130e-01, -1.6410e-01, 1.6438e-01, + 5.2778e-02, -6.6124e-01, -3.2948e-02, -5.4545e-01, 5.2703e-01, + 2.8672e-01, 5.7645e-01, -2.6746e-01, -3.9089e-01, 2.0937e-01, + -3.6736e-02, 2.4706e-01, 3.5762e-01, -1.6023e-01, -3.1083e-01, + -4.6344e-01, 4.1670e-01, -7.5496e-02, 1.4926e-01, -3.3287e-01, + -4.2243e-01, -6.3298e-02, 2.5848e-01, 3.7625e-01, 8.7703e-02, + -3.2085e-01, 3.9825e-01, 4.0217e-01, 3.1725e-01, -1.6784e-01, + -4.0644e-01, 5.2821e-01, -5.1684e-01, -5.4546e-01, 1.1714e-01, + 3.2991e-01, 2.9674e-01, 1.7642e-01, 5.5854e-01, -3.4519e-01, + -1.7785e-01, 2.0199e-01, -5.2339e-01, -6.3149e-02, -3.0370e-01, + -2.8747e-02, -3.4003e-01, -1.5512e-01, -4.3562e-01, 6.5145e-01, + 3.0143e-01, -3.2293e-01, 3.3838e-01, -1.3263e-01, 3.5033e-02, + 4.7880e-01, -8.1988e-01, -2.0750e-01, -6.8490e-02, -5.9730e-02, + 4.1138e-01, 2.3811e-01, -3.2911e-01, -6.8965e-02, 5.5200e-01, + 1.9072e-01, 6.2259e-02, 6.6072e-01, -6.0550e-01, 2.3396e-01, + 1.1968e-01, -5.3494e-01, 6.5234e-02, -1.3458e-01, -2.1542e-01, + -7.3702e-02, 1.8912e-01, 1.5528e-01, 4.7149e-01, -2.0252e-01, + -6.8936e-01, -1.6355e-01, 2.8603e-02, 7.8233e-02, 3.6492e-01, + 1.3363e-01, -4.6805e-01, -3.1390e-01, -7.6726e-01, -3.2683e-01, + -2.0703e-01, 5.6110e-01, 3.6255e-01, -2.0338e-01, 4.8012e-01, + -2.3513e-01, 3.9425e-01, 5.9808e-01, 8.5797e-01, 6.0902e-01, + 1.5637e-01, -7.8162e-01, -6.8807e-02, 1.1870e-01, -1.4694e-01, + -6.4079e-02, -5.5715e-01, -2.5838e-01, 8.1312e-01, 4.2588e-01, + -2.3839e-01, -5.9330e-01, -1.5378e-02, 5.7079e-01, 1.5399e-01, + 1.2549e-01, -1.4820e-01, 6.9585e-02], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[ 0.0030, -0.0047, 0.0065, ..., 0.0104, -0.0140, 0.0053], + [-0.0176, 0.0191, -0.0227, ..., 0.0217, 0.0145, -0.0007], + [ 0.0033, -0.0146, 0.0133, ..., 0.0050, -0.0265, -0.0137], + ..., + [-0.0195, -0.0042, -0.0021, ..., -0.0063, 0.0234, -0.0025], + [-0.0185, 0.0035, -0.0008, ..., 0.0019, 0.0356, 0.0087], + [-0.0246, -0.0227, -0.0082, ..., -0.0005, -0.0009, 0.0117]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.1141, 0.1932, 0.1205, ..., -0.0247, 0.0140, 0.0328], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0028, -0.0143, 0.0161, ..., 0.0151, 0.0104, -0.0198], + [-0.0164, -0.0346, 0.0067, ..., 0.0199, -0.0166, -0.0236], + [-0.0236, -0.0092, -0.0233, ..., -0.0062, -0.0015, 0.0028], + ..., + [ 0.0289, 0.0172, -0.0065, ..., -0.0083, -0.0195, 0.0067], + [ 0.0268, -0.0132, 0.0347, ..., 0.0141, 0.0156, -0.0042], + [-0.0458, 0.0232, -0.0022, ..., -0.0111, 0.0161, 0.0254]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-1.0736e-01, 1.0443e-01, 7.3425e-02, -6.7932e-02, -9.2896e-02, + -8.6670e-02, -1.2103e-01, -5.3589e-02, 1.7627e-01, -6.3171e-03, + -4.8187e-02, 1.4111e-01, 1.7212e-01, -4.9286e-02, 1.7249e-01, + 4.2206e-02, -9.5154e-02, -8.4839e-02, -1.5198e-01, -2.9883e-01, + 4.8187e-02, 3.7262e-02, 2.3163e-02, -1.1139e-01, 1.5063e-01, + 1.6846e-01, -1.2781e-01, 1.2366e-01, -1.0736e-01, 5.1331e-02, + 4.5276e-04, -9.5032e-02, -7.3669e-02, -1.0352e-01, 3.6224e-02, + -1.9043e-02, -1.5588e-01, -9.9365e-02, 1.8542e-01, -1.8408e-01, + -9.4788e-02, -7.6111e-02, 1.0559e-01, -1.2573e-01, -5.5450e-02, + -7.9773e-02, -1.0445e-02, -8.3435e-02, 1.5945e-02, 2.6093e-02, + -3.2898e-02, -7.7942e-02, 1.5137e-02, 1.0034e-01, -1.7871e-01, + 3.0914e-02, 1.0242e-01, 7.2754e-02, -8.0872e-02, -2.6566e-02, + 1.4473e-02, 1.0205e-01, -6.9275e-02, -1.0187e-01, -4.8971e-04, + 1.1060e-01, -1.6309e-01, -2.8381e-02, -1.4929e-01, -2.3840e-01, + 1.5881e-01, 2.1576e-02, -2.5659e-01, -2.0959e-01, -6.7871e-02, + -1.3831e-01, -1.1322e-02, 1.9482e-01, 7.0679e-02, 8.3740e-02, + 8.5754e-02, 6.2469e-02, 4.4189e-02, -1.2451e-01, -1.7426e-02, + -8.2214e-02, 5.7739e-02, -1.0864e-01, 5.4840e-02, -1.2341e-01, + -1.9739e-01, -7.0007e-02, -8.5571e-02, 1.8713e-01, 1.6150e-01, + -6.2675e-03, 1.1002e-02, -1.0147e-02, 2.7344e-02, 1.8445e-01, + 2.1648e-03, -2.4158e-01, -2.2278e-01, 3.5980e-02, -2.7328e-02, + -1.2108e-02, -9.7717e-02, 2.1240e-01, 4.9103e-02, -4.3526e-03, + 9.4177e-02, 1.1572e-01, -9.4666e-02, 9.7534e-02, -1.1841e-01, + 3.8849e-02, 1.2030e-01, 1.1273e-01, 2.3849e-02, -7.4219e-02, + -6.2622e-02, 5.7129e-02, 1.0669e-01, 1.3965e-01, 5.4871e-02, + -2.2141e-02, 2.1179e-01, 1.6406e-01, 3.1799e-02, 7.4036e-02, + 1.6895e-01, 5.3215e-03, 1.0449e-01, 1.9153e-01, 1.9507e-01, + 6.2500e-02, 7.5623e-02, -1.5210e-01, 9.2590e-02, 2.7084e-02, + 1.0925e-01, 1.1957e-01, -2.2449e-03, 2.6855e-02, -1.4319e-01, + 1.7468e-01, -5.4504e-02, -1.5747e-01, 2.6810e-02, -1.9165e-01, + -1.8250e-01, 5.1918e-03, 2.1277e-01, -1.1560e-01, -1.3281e-01, + 1.6016e-01, -1.0229e-01, -1.1914e-01, -1.4453e-01, -5.8136e-02, + -1.0779e-01, 4.4891e-02, 3.8757e-02, -1.1139e-01, 5.1636e-02, + 1.5125e-01, 1.2842e-01, -2.4377e-01, 1.0999e-01, 2.9358e-02, + 1.4502e-01, -1.5527e-01, -2.1805e-02, 2.1692e-01, -1.1218e-01, + 1.0236e-01, 7.4539e-03, -8.4717e-02, 1.4966e-01, 1.8872e-01, + -5.7465e-02, -5.9624e-03, -1.1603e-01, -1.1726e-02, -6.6223e-02, + 1.8970e-01, -8.3496e-02, -5.7422e-01, 1.2646e-01, 1.7444e-01, + -7.0557e-02, 3.9337e-02, -4.2816e-02, -4.2801e-03, 8.5571e-02, + 1.2659e-01, 9.8572e-02, 2.9053e-02, -2.4731e-01, 1.0773e-01, + 8.7952e-02, -6.2744e-02, -5.9128e-03, -6.5491e-02, -9.3384e-02, + -1.5308e-01, 8.5876e-02, -1.2384e-01, -1.9019e-01, -5.1514e-02, + 5.5939e-02, -1.6382e-01, -1.0547e-01, 9.9609e-02, 1.4856e-01, + -6.6223e-02, 8.5510e-02, -7.5562e-02, 1.1902e-01, 2.8223e-01, + 1.2524e-01, -1.1734e-02, -4.3060e-02, -4.0833e-02, 7.2670e-03, + 1.3892e-01, -1.6357e-01, 7.6447e-03, -4.2023e-02, 1.1340e-01, + -7.7637e-02, -1.1688e-01, 1.1066e-01, -2.5616e-03, -3.2196e-02, + -2.2461e-01, 2.0532e-01, -1.0785e-01, -1.8774e-01, 1.5796e-01, + 1.0852e-01, -1.0419e-01, 8.7219e-02, -7.9712e-02, -8.0139e-02, + -9.1736e-02, -2.4036e-01, 1.3232e-01, -5.8746e-02, 1.2024e-01, + 1.1035e-01, 1.5771e-01, 4.5227e-02, 8.1299e-02, -9.1187e-02, + 1.9730e-02, 1.4075e-01, -1.0773e-01, -1.8689e-01, -7.0801e-02, + 1.1511e-01, -6.6650e-02, -8.6548e-02, 2.0911e-01, 7.1335e-03, + -1.6431e-01, 1.6467e-01, 1.7029e-02, 1.8112e-02, 1.5778e-02, + 3.5797e-02, -2.7756e-02, -3.1891e-02, -1.3161e-02, 1.8433e-01, + -1.5723e-01, -1.9055e-01, 1.0565e-01, -2.0154e-01, 1.0498e-01, + -5.3894e-02, -2.2339e-01, -1.0962e-01, 9.6558e-02, -1.9165e-01, + 1.2128e-01, -1.7017e-01, 6.7871e-02, -1.3953e-01, 1.1292e-01, + 2.6489e-02, 1.4832e-01, 9.2163e-02, -2.7466e-02, -6.8665e-02, + -1.5723e-01, -2.7100e-01, 2.4872e-02, -5.6839e-03, -4.6051e-02, + -5.5359e-02, -1.4734e-01, -1.0590e-01, -4.3488e-02, -4.1809e-02, + 3.7689e-02, -2.5146e-02, 1.6040e-01, -1.3696e-01, -9.9487e-03, + -7.5989e-03, -9.9609e-02, 1.1340e-01, 1.1389e-01, 4.6509e-02, + -1.1243e-01, 3.4668e-02, 2.6794e-02, -2.1912e-01, 8.2703e-02, + -2.0721e-02, -1.6711e-01, 1.3098e-01, -5.6244e-02, 4.6606e-01, + -5.5878e-02, 1.9989e-02, -1.2585e-01, 5.1636e-02, 1.7383e-01, + -1.0144e-01, -3.9612e-02, -1.9394e-02, -2.8336e-02, -1.4197e-01, + -1.4929e-01, -1.4331e-01, 1.1859e-01, 6.8481e-02, 1.0187e-01, + 1.1658e-01, 5.1147e-02, -7.5012e-02, 1.5649e-01, -1.5808e-01, + 1.5295e-01, -1.3879e-01, -8.2397e-02, -6.9824e-02, 1.0431e-01, + 2.1667e-01, -2.3041e-02, 9.5139e-03, -2.7863e-02, 1.4185e-01, + 1.6272e-01, -1.8848e-01, -6.3599e-02, -9.6497e-02, 2.0227e-01, + -1.6394e-01, 6.7017e-02, 1.5173e-01, -3.3783e-02, 1.5503e-01, + -9.6741e-02, -1.7725e-01, -2.0422e-01, -5.4352e-02, -2.7634e-02, + -1.5588e-01, 1.4612e-01, 6.7932e-02, 6.2927e-02, -8.1177e-02, + -3.8361e-02, -1.5472e-02, -2.8000e-02, 1.4233e-01, 9.8343e-03, + 1.1359e-01, -1.0986e-01, -1.6711e-01, -2.0020e-01, 1.5820e-01, + -1.4697e-01, -2.6505e-02, 1.4746e-01, 1.6516e-01, -2.7420e-02, + 1.1584e-01, -9.7961e-02, 8.9355e-02, 5.3528e-02, -3.0640e-02, + 1.0779e-01, -1.3928e-01, -3.9337e-02, 9.4177e-02, -9.7656e-02, + -1.7456e-01, 4.3274e-02, -1.3574e-01, -6.3843e-02, 1.3519e-02, + 1.5564e-01, 1.0779e-01, -3.2013e-02, -8.8074e-02, 1.6495e-02, + 2.0105e-01, -1.4966e-01, 8.1482e-02, -1.0805e-03, 1.8933e-01, + 9.0942e-02, -5.0934e-02, -1.3232e-01, 2.3669e-01, 1.5857e-01, + -7.8659e-03, -9.8145e-02, -2.0721e-02, 1.1163e-01, 1.2671e-01, + 1.2372e-01, 2.3083e-01, 1.8359e-01, -2.2925e-01, 4.1321e-02, + -1.1829e-01, -1.0571e-01, -1.0291e-01, -1.9531e-01, -8.3160e-03, + 6.5613e-02, -1.1700e-01, 7.0374e-02, -1.8311e-01, 1.5991e-01, + 2.8580e-02, -5.5809e-03, -1.0185e-03, -7.9529e-02, -2.3242e-01, + 4.9866e-02, 1.1224e-01, 2.1411e-01, 7.4524e-02, 2.9037e-02, + 1.2158e-01, -1.7532e-02, 4.8401e-02, -7.3242e-02, -1.2634e-01, + -1.4514e-01, 6.0944e-02, 7.9285e-02, 6.3660e-02, 6.2988e-02, + -3.4027e-02, -1.6675e-01, 1.4526e-01, -1.2054e-01, -3.8849e-02, + 1.4539e-01, -1.5491e-01, 2.1704e-01, -1.7700e-01, 2.5284e-02, + 8.9722e-03, 2.2937e-01, -1.5417e-01, -6.0669e-02, -1.3989e-01, + -7.7637e-02, -8.9111e-02, -8.1909e-02, -5.0201e-02, -7.2510e-02, + -8.9844e-02, 9.5398e-02, 1.3000e-01, -3.2684e-02, 1.9580e-01, + -1.4844e-01, -1.9104e-02, 4.3701e-02, 3.9734e-02, -9.6741e-02, + -1.0962e-01, 4.8431e-02, 1.9690e-01, -2.7756e-02, 1.5991e-01, + -7.5562e-02, 8.9294e-02, -1.7249e-01, 1.9373e-01, -1.5991e-01, + 9.1431e-02, -4.4647e-02, 1.9104e-02, -3.6591e-02, 1.2610e-01, + 1.1340e-01, -5.2338e-02, 1.3611e-01, 2.6779e-02, 7.7026e-02, + -1.3208e-01, 4.8126e-02, -7.7248e-03, -1.0803e-01, 2.1927e-02, + 1.9104e-02, 1.4935e-03, -2.2021e-01, -6.5674e-02, 1.7456e-01, + 3.1464e-02, 4.1779e-02, -7.9346e-02, 1.5149e-01, -8.9600e-02, + -1.4001e-01, 7.8491e-02, 9.1064e-02, -2.3022e-01, 1.0962e-01, + 1.2561e-01, -3.2806e-02, 7.1411e-02, 2.2107e-01, 6.4453e-02, + -5.3955e-02, 1.7712e-01, -1.9043e-01, 3.7170e-02, 7.6027e-03, + 2.0178e-01, -8.5693e-02, -1.7319e-02, 4.7379e-03, 1.3135e-01, + 1.2280e-01, -1.2152e-01, -5.5084e-02, -1.7859e-01, -7.8186e-02, + -3.0609e-02, -1.3196e-01, 1.1768e-01, -1.9995e-01, 7.0435e-02, + 1.8970e-01, 3.4943e-02, -1.0309e-01, -6.6406e-02, -1.6479e-01, + 9.5947e-02, -1.6479e-01, -6.7749e-02, -1.6327e-02, -2.1143e-01, + -1.1493e-01, -2.2009e-01, -1.3696e-01, 6.6040e-02, -2.4756e-01, + -7.2083e-02, 6.8665e-02, 2.4353e-02, -9.4177e-02, 1.6199e-01, + -4.8566e-04, -7.3853e-03, 6.0059e-02, 3.1708e-02, 1.1475e-01, + 1.5784e-01, -1.0327e-01, -5.5908e-02, 1.0773e-01, -6.5796e-02, + -8.8135e-02, 9.3750e-02, 5.8411e-02, 7.5256e-02, 5.3314e-02, + 5.5573e-02, -9.7717e-02, -3.7537e-02, -1.0382e-01, -2.1497e-01, + 1.0992e-01, 2.7237e-02, -3.6621e-02, 1.0443e-01, 4.6326e-02, + 1.8665e-01, 1.5149e-01, -1.6443e-01, -2.2293e-02, 4.4891e-02, + 1.3831e-01, -7.9834e-02, -1.6602e-01, -1.0413e-01, 1.4551e-01, + 2.4857e-02, 2.1619e-01, -3.0869e-02, 1.5637e-01, 2.2595e-01, + 2.6489e-01, 1.3574e-01, 6.9214e-02, -6.5063e-02, 6.5674e-02, + -1.8701e-01, 2.1591e-02, -1.8356e-02, 2.3755e-01, -1.3330e-01, + 2.3608e-01, 1.2390e-01, 6.3232e-02, 4.4495e-02, 9.2346e-02, + -2.1094e-01, 1.6357e-01, -3.6133e-02, -1.0950e-01, -7.8125e-02, + -1.8263e-03, 1.1737e-01, 9.1187e-02, -3.7498e-03, 2.4780e-02, + 1.7712e-01, -2.2949e-02, -4.4617e-02, 1.2292e-01, -2.5903e-01, + 1.0883e-01, -8.7463e-02, -1.7957e-01, -1.3660e-01, -1.5747e-01, + -1.5308e-01, -1.6882e-01, 1.2262e-01, -1.2756e-01, 1.8530e-01, + 1.5881e-01, 1.1377e-01, 8.3679e-02, -1.3367e-01, -1.0553e-01, + 9.5520e-02, 1.2903e-01, 2.4023e-01, -1.0144e-01, -1.6565e-01, + -8.4412e-02, 7.3120e-02, 2.3422e-02, 3.3966e-02, 1.1884e-01, + 1.5625e-01, -1.3562e-01, -1.8359e-01, 4.9286e-02, -6.8703e-03, + 4.0527e-02, 9.3018e-02, 2.2473e-01, 2.0813e-02, 5.7648e-02, + -1.1462e-01, 3.4595e-01, -1.0187e-01, -7.7637e-02, 4.7668e-02, + 2.3022e-01, 5.8960e-02, 3.5004e-02, 2.1509e-01, -9.2224e-02, + -2.1561e-02, -6.1493e-02, -8.3557e-02, -7.7026e-02, -5.3711e-02, + -5.2643e-02, 4.7035e-03, 4.9400e-03, -1.0541e-01, 1.6003e-01, + 1.1176e-01, -5.6061e-02, -1.3710e-02, 7.5500e-02, 6.3538e-02, + 1.3892e-01, -1.0809e-01, -3.7262e-02, 1.0521e-02, -1.6418e-01, + 8.0872e-02, -1.8250e-01, -1.7847e-01, -1.3806e-01, 6.9031e-02, + 7.8613e-02, 1.9177e-01, 2.2705e-01, 3.6869e-03, 6.1218e-02, + 1.7139e-01, -9.0271e-02, 6.0608e-02, 7.4036e-02, -1.3232e-01, + 9.5276e-02, 6.6284e-02, -5.0781e-02, 5.2490e-02, -7.6965e-02, + -2.0837e-01, -1.0034e-01, -1.8958e-01, -4.4922e-02, 1.7761e-01, + 6.3538e-02, 6.7871e-02, -3.6926e-02, -4.1008e-03, 7.0679e-02, + -5.2277e-02, 2.0203e-01, 3.3447e-02, -3.5339e-02, -7.2823e-03, + -3.6713e-02, 8.4167e-02, -8.8867e-02, 7.8918e-02, 1.6525e-02, + 1.9385e-01, 2.0496e-01, -9.1309e-02, 4.7684e-03, -1.2830e-01, + 3.9520e-03, -1.0559e-01, -1.2427e-01, 1.4722e-01, 2.3108e-01, + 2.5597e-03, -1.2164e-01, -1.4050e-01, 4.4403e-02, 1.9202e-01, + 7.7820e-02, -1.1420e-01, 8.2031e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.8626, 1.8031, 1.7707, 1.7813, 1.5411, 1.7607, 1.7641, 1.6653, 1.7079, + 1.7684, 1.8017, 1.6956, 1.6986, 1.7498, 1.8004, 1.8909, 1.7765, 1.8687, + 1.7344, 0.4985, 1.8175, 1.8176, 1.7591, 1.7257, 1.8502, 1.6815, 1.7383, + 1.7850, 1.8199, 1.7943, 1.8372, 1.6945, 1.7009, 1.7682, 1.7271, 1.7660, + 1.8474, 1.7567, 1.7404, 1.6637, 1.8308, 1.8491, 1.7334, 1.8106, 1.7488, + 1.6771, 1.7654, 1.7597, 1.8563, 1.7420, 1.8416, 1.8208, 1.7759, 1.7856, + 1.7080, 1.8179, 1.7241, 1.7123, 1.7897, 1.7672, 1.7761, 1.7262, 1.7635, + 1.7302, 1.8483, 1.7563, 1.7805, 1.7103, 1.6915, 1.6331, 1.7328, 1.7430, + 1.9799, 1.7362, 1.7595, 1.7817, 1.7687, 1.7316, 1.6897, 1.7418, 1.7713, + 1.7142, 1.8107, 1.7375, 1.7881, 1.7923, 1.8721, 1.7792, 1.7724, 1.6501, + 1.7684, 1.8037, 1.7338, 1.7793, 1.7959, 1.7426, 1.7526, 1.8015, 1.7506, + 1.7252, 1.8087, 1.8846, 1.7413, 1.7966, 1.6733, 1.7162, 1.7589, 1.7807, + 1.7870, 1.7457, 1.6903, 1.7602, 1.7180, 1.7813, 1.7669, 1.7081, 1.7036, + 1.7858, 1.8298, 1.6986, 1.7633, 1.7596, 1.7947, 1.7986, 1.7408, 1.7156, + 1.8134, 1.8334, 1.8831, 1.8223, 1.7169, 1.7277, 1.8390, 1.7366, 1.6804, + 1.8091, 1.7192, 1.7432, 1.7482, 1.7376, 1.7989, 1.7752, 1.7880, 1.6884, + 1.8083, 1.7428, 1.7844, 1.7537, 1.7240, 1.7806, 1.6277, 1.7218, 1.7583, + 1.8438, 1.7764, 1.4386, 1.7788, 1.7629, 1.7766, 1.7988, 1.5892, 1.7308, + 1.7457, 1.7244, 1.7396, 1.8274, 1.7685, 1.8633, 1.7943, 1.7235, 1.6937, + 1.7572, 1.8016, 1.8091, 1.8354, 1.6880, 1.8347, 1.7619, 1.7627, 1.7118, + 1.6843, 1.8207, 1.8057, 1.8163, 1.7315, 1.5833, 1.7522, 2.7548, 1.7724, + 1.6638, 1.8069, 1.7381, 1.7530, 1.8312, 1.7646, 1.8395, 1.7653, 1.6637, + 2.9893, 1.7513, 1.8372, 1.7216, 1.7038, 1.8960, 1.7074, 1.7235, 1.8619, + 1.7499, 1.7265, 1.7715, 1.7588, 1.6586, 1.7386, 1.6842, 1.7496, 1.7123, + 1.7183, 1.8547, 1.8007, 1.6818, 1.7232, 1.7186, 1.5766, 1.7863, 1.7755, + 1.7346, 1.7565, 1.8070, 1.6555, 1.7957, 1.8173, 1.7355, 1.8109, 1.7154, + 1.8207, 1.8189, 1.6928, 1.7726, 1.8410, 1.7976, 1.7274, 1.7988, 1.8094, + 1.7929, 1.7947, 1.6713, 1.7325, 1.6619, 1.8692, 1.8463, 1.7701, 1.8279, + 1.7421, 1.7823, 1.7778, 1.7596, 1.7339, 1.7867, 1.8402, 1.7847, 1.7104, + 1.7948, 1.7840, 1.6757, 1.7317, 1.7500, 1.8238, 1.8030, 1.7794, 1.7665, + 1.7595, 1.7266, 1.7846, 1.8269, 1.8003, 1.7487, 1.7192, 1.8082, 1.7012, + 1.7666, 1.7735, 1.7122, 1.7510, 1.8224, 1.7595, 1.7332, 1.8395, 1.7557, + 1.7576, 1.7263, 1.7551, 1.7655, 1.7280, 1.8227, 1.7688, 1.7970, 1.5910, + 1.8495, 1.8002, 1.8020, 1.7830, 1.7413, 1.8417, 1.7040, 1.8032, 1.7786, + 1.7579, 1.7294, 1.7574, 1.7677, 1.7913, 1.7839, 1.7360, 1.7482, 1.7686, + 1.7938, 1.7941, 1.7248, 1.7029, 1.7483, 1.7367, 1.6134, 1.7984, 1.8084, + 0.9421, 1.7933, 1.7893, 1.7963, 1.7915, 1.7544, 1.7983, 1.7982, 1.6628, + 1.7429, 1.7766, 1.7301, 1.7107, 1.7985, 1.7769, 1.7670, 1.8131, 1.6624, + 1.6756, 1.8100, 1.7682, 1.7307, 1.7146, 1.7440, 1.8184, 1.8075, 1.5879, + 1.7574, 1.8042, 1.8490, 1.8459, 1.7467, 1.8094, 1.7255, 1.7316, 1.7178, + 1.8016, 1.6997, 1.7831, 1.8231, 1.7265, 1.6869, 1.7430, 1.7517, 1.7799, + 1.7093, 1.7564, 1.7116, 1.7531, 1.7908, 1.8358, 1.8037, 1.7656, 1.8286, + 1.7787, 1.7869, 1.7289, 1.7703, 1.8054, 1.7791, 1.6605, 1.7075, 1.8083, + 1.7061, 1.7457, 1.7999, 1.6409, 1.8023, 1.7131, 1.7376, 1.7145, 1.6934, + 1.8714, 1.7531, 1.7853, 1.8259, 1.8281, 1.7963, 1.7558, 1.6699, 1.8304, + 1.7752, 1.8119, 1.7505, 1.7462, 1.7651, 1.7035, 1.7693, 1.7784, 1.8790, + 1.7997, 2.7079, 1.8050, 1.7434, 1.7304, 1.7496, 1.7413, 1.7786, 1.7676, + 1.7633, 1.8048, 1.7498, 1.5717, 1.7788, 1.7032, 1.7021, 1.7289, 1.7154, + 1.7574, 1.7357, 1.7384, 1.8404, 1.7523, 1.7456, 1.6581, 1.7213, 1.7779, + 1.8164, 1.7584, 1.8338, 1.7372, 1.7945, 1.7703, 1.7775, 1.7512, 1.7422, + 1.7518, 1.6974, 1.7084, 1.8513, 1.8187, 1.7494, 1.7221, 1.8195, 1.7825, + 1.7718, 1.7819, 1.7341, 1.8106, 1.7494, 1.7639, 1.7447, 1.7622, 1.7019, + 1.7713, 1.8107, 1.7751, 1.6831, 1.8983, 1.8854, 1.7562, 1.7641, 1.8450, + 1.6962, 1.7789, 1.7653, 1.8096, 1.7490, 1.7509, 1.8375, 1.8035, 1.7372, + 1.8059, 1.8244, 1.6840, 1.7538, 1.7917, 1.8426, 1.7746, 1.8419, 1.7937, + 1.7681, 1.8910, 1.7616, 1.7887, 1.6965, 1.7099, 1.7803, 1.7888, 1.8322, + 1.7731, 1.7062, 1.8104, 1.7924, 1.6989, 1.7703, 1.7718, 1.8199, 1.7933, + 1.7476, 1.7699, 1.7553, 1.7314, 1.7351, 1.8270, 1.7637, 1.7724, 1.6985, + 1.7598, 1.7834, 1.7270, 1.7643, 1.8691, 1.6961, 1.7836, 1.7457, 1.7214, + 1.7549, 1.7503, 1.7444, 1.7038, 1.7202, 1.7286, 1.7643, 1.8116, 1.8197, + 1.7559, 1.8472, 1.7803, 1.6730, 1.8114, 1.7312, 1.8234, 1.8250, 1.7086, + 1.7285, 1.8161, 1.7310, 1.7546, 1.7177, 1.7930, 1.8005, 1.8174, 1.6891, + 1.7722, 1.7018, 1.8242, 1.6458, 1.7265, 1.7661, 1.8259, 1.7962, 1.6896, + 1.7895, 1.7994, 1.7630, 1.7438, 1.7374, 1.7818, 1.7458, 1.7538, 1.7060, + 1.7827, 1.7179, 1.6832, 1.6815, 1.6348, 1.7162, 1.8015, 1.7629, 1.8582, + 1.7548, 1.8259, 1.7654, 1.7764, 1.8435, 1.7540, 1.8566, 1.8117, 1.8390, + 1.7786, 1.6708, 1.7558, 1.7757, 1.6846, 1.8093, 1.7874, 1.7895, 1.8258, + 1.7518, 1.7854, 1.7683, 1.8340, 1.8175, 1.7892, 1.7295, 1.7386, 1.7859, + 1.7975, 1.7490, 1.7827, 1.7883, 1.7735, 1.7281, 1.8743, 1.7000, 1.7167, + 1.7611, 1.7891, 1.7726, 1.7243, 1.7394, 1.7958, 1.6989, 1.8408, 1.7790, + 1.7038, 1.7744, 1.7268, 1.6995, 1.7691, 1.7200, 1.7585, 1.7669, 1.8629, + 1.7698, 1.8005, 1.8482, 1.6710, 1.7339, 1.5718, 1.8037, 1.7726, 1.7195, + 1.7267, 1.7365, 1.7477, 1.7643, 1.7557, 1.7137, 1.6893, 1.8008, 1.7865, + 1.7747, 1.7637, 1.7201, 1.6544, 1.7408, 1.6003, 1.7998, 1.7676, 1.7365, + 1.7003, 1.7455, 1.7690, 1.8151, 1.7240, 1.8377, 1.7835, 1.7003, 1.7420, + 1.8121, 1.7799, 1.7392, 1.7157, 1.7833, 1.8003, 0.4432, 1.7083, 1.8084, + 1.8351, 1.6847, 1.7720, 1.7214, 1.7016, 1.6804, 1.7575, 1.8147, 1.8383, + 1.8066, 1.7622, 1.7447, 1.8649, 1.6914, 1.7502, 1.7756, 1.8313, 1.7522, + 1.7012, 1.7607, 1.7849, 1.6833, 1.7358, 1.7736, 1.8525, 1.8686, 1.7100, + 1.8097, 1.8105, 1.7527, 1.7602, 1.7818, 1.7557, 1.7135, 1.7930, 1.7505, + 1.8375, 1.7433, 1.7744, 1.7698, 1.7585, 1.7979, 1.5292, 1.8328, 1.7411, + 1.7647, 1.8039, 1.7369, 1.7095, 1.7712, 1.7364, 1.7643, 1.7983, 1.8086, + 1.7453, 1.7678, 1.7649, 1.7738, 1.8089, 1.8234, 1.7720, 1.6807, 1.7681, + 1.6920, 1.7764, 1.8613, 1.8168, 1.6981, 1.7320, 1.8520, 1.7266, 1.7517, + 1.8211, 1.7164, 1.7532, 1.7514, 1.8598, 1.7562, 1.7970, 1.7824, 1.6946, + 1.7698, 1.7516, 1.7438], device='cuda:1', requires_grad=True)Parameter containing: +tensor([ 1.5714e-01, 1.0521e-01, -2.1855e-01, -3.6352e-02, 1.8207e-01, + 1.7251e-01, 1.3290e-01, 7.0782e-02, -1.9266e-01, -9.2168e-02, + 7.9184e-02, -5.8226e-02, -1.7935e-01, -2.4685e-02, -4.8018e-02, + -9.5424e-02, 2.4386e-02, -5.1330e-02, 3.3999e-02, 3.2014e+00, + -3.9015e-02, 1.1516e-01, 8.0055e-02, 9.1744e-02, 2.1027e-02, + 1.0418e-02, 7.2239e-02, -1.1568e-01, 3.2005e-03, -4.8645e-02, + 1.4117e-01, 7.1536e-02, -6.0574e-02, 1.8058e-01, 7.5935e-02, + -8.4233e-02, 2.0770e-02, -1.0301e-01, -1.0013e-01, 1.5660e-01, + 8.0592e-02, 1.2721e-01, -4.6441e-02, 5.7574e-02, 3.4196e-02, + -2.7855e-03, 1.1291e-01, -4.5029e-03, 4.9145e-02, 5.6455e-02, + 1.2398e-03, -2.5091e-02, 3.1097e-02, -9.4255e-03, 2.5114e-02, + 1.5999e-01, -4.6883e-02, -1.1271e-01, 4.5208e-02, 1.4600e-02, + 1.2989e-01, -1.0197e-01, 1.6056e-01, 8.7824e-02, 1.6033e-01, + 3.4396e-02, 1.3704e-01, 4.1646e-02, -5.3530e-02, 2.4990e-01, + 4.8798e-02, 4.4839e-02, 7.1403e-01, 1.1320e-01, -1.0711e-01, + 1.2396e-01, 1.3501e-01, -6.7984e-02, -8.9051e-03, 9.0761e-02, + 9.5781e-03, -5.4714e-02, -9.0754e-02, 1.3412e-01, -1.9785e-02, + 8.3251e-02, 9.4815e-02, 1.7186e-01, -3.4824e-02, 6.8364e-02, + -2.1312e-02, -4.3523e-02, 3.8732e-02, -6.0781e-03, -7.7346e-02, + 3.1005e-02, 6.6436e-02, -8.4234e-02, -1.1001e-01, 2.1268e-03, + -1.4562e-02, -4.7741e-02, -7.1506e-02, -1.0927e-01, 7.7507e-02, + -1.2546e-02, 1.1501e-01, -5.7693e-02, 5.3100e-02, -1.1730e-01, + -8.1036e-02, -1.0260e-01, -8.1279e-02, 9.5656e-02, 5.0190e-02, + 4.7565e-02, 5.3448e-03, 4.5320e-02, 1.9243e-02, 1.4295e-01, + -3.6603e-02, -3.0097e-01, -1.3997e-01, -6.3057e-02, -2.0284e-01, + -5.2843e-02, -2.4076e-01, 7.0576e-02, 1.0352e-01, -3.0233e-02, + -1.0235e-01, -5.7698e-03, -3.2386e-02, -5.5490e-02, -1.1519e-01, + 8.9923e-02, -7.0423e-02, -5.0296e-02, 1.8792e-02, -9.9913e-02, + -1.2686e-02, 7.4479e-03, 2.6728e-02, 1.4484e-02, 5.6417e-02, + -9.0091e-02, -7.0845e-03, 2.2955e-02, -1.0560e-01, 1.6517e-01, + 3.1767e-01, -1.0263e-01, -1.2595e-01, -6.4043e-02, -9.5674e-02, + -4.9398e-01, -3.6656e-02, -2.0442e-02, -6.8514e-03, 4.0622e-02, + 2.4586e-01, -1.6740e-01, -6.2760e-02, 8.0933e-03, -6.1926e-02, + 1.3707e-02, 3.3186e-03, 8.2177e-02, 5.3861e-02, 1.9900e-02, + -9.7574e-02, 8.0126e-02, 9.0824e-02, -5.0445e-02, -2.1681e-02, + -1.8436e-01, -9.9015e-02, 8.8390e-03, -4.9693e-02, 9.5410e-02, + -7.1627e-02, -4.8957e-02, 1.9217e-01, 1.0617e-01, 6.4168e-02, + -1.4703e-01, 9.9676e-02, 9.1445e-01, -1.4521e-01, -9.8065e-02, + -1.1537e-01, -1.0495e-01, 1.0574e-01, 1.5191e-01, -6.7199e-02, + -3.9562e-02, -2.5574e-02, -2.0092e-01, 5.2279e-01, 3.2230e-02, + -2.6505e-01, 6.5943e-02, 1.2653e-02, 3.2023e-02, 8.7500e-02, + 1.5057e-01, -2.0009e-01, 7.4692e-02, 1.0776e-01, -5.0648e-02, + 6.2093e-02, 3.3582e-01, -3.1111e-02, -9.1817e-02, 3.2584e-02, + -2.9921e-02, -1.3872e-02, 8.5125e-03, -9.8826e-02, -3.3480e-01, + 4.8171e-02, -1.2216e-02, 1.8550e-01, -2.2219e-02, -2.7731e-03, + -2.9300e-02, 6.2133e-02, 2.5511e-02, 1.6478e-01, -5.1522e-02, + -3.3598e-02, 2.7848e-02, 1.7212e-02, 4.1664e-02, -3.3681e-02, + -1.5164e-02, -1.1802e-02, 1.1873e-01, 1.4403e-01, -1.8655e-02, + 3.1523e-02, 1.0407e-01, -3.3324e-02, 1.0530e-01, -8.4562e-03, + 2.3715e-02, -8.4441e-03, -8.1965e-02, 4.1591e-02, -4.1058e-02, + -8.8389e-02, -7.8143e-02, 1.2788e-01, -1.1700e-01, 1.8904e-01, + -3.8862e-02, 3.0045e-02, -1.1734e-01, 7.6597e-02, 3.1204e-02, + -1.0797e-01, -8.3629e-02, 6.1842e-02, 1.9294e-02, -1.1804e-01, + 7.2849e-02, -2.0952e-02, 2.2642e-02, -6.2562e-02, -6.2156e-02, + 6.7846e-02, 9.7213e-02, 1.1358e-01, -1.4457e-01, -3.7113e-03, + 4.6414e-02, -2.3938e-02, 3.0535e-02, 1.3291e-01, -1.3570e-01, + 1.5219e-01, 2.9188e-01, -3.4406e-02, 4.5174e-02, 5.9539e-02, + 3.4760e-02, -7.6875e-04, -3.4697e-02, 1.7763e-01, -9.1222e-02, + -3.2008e-02, 6.8429e-02, 7.8547e-02, 3.7662e-02, 1.3554e-01, + -6.3083e-02, 3.0479e-01, -4.1982e-03, 4.5922e-02, 9.9895e-02, + -7.3645e-03, 1.2549e-02, -2.1003e-02, 2.0596e-01, 5.0304e-02, + -1.2328e-02, 1.4580e-02, -9.2944e-02, 6.6810e-02, -7.2437e-02, + 8.9594e-02, 5.3213e-02, -8.6073e-02, -1.5086e-01, 1.6785e-02, + -8.3192e-03, -1.9318e-02, -5.3071e-03, 2.6348e-01, -2.6442e-02, + -2.1526e-01, 2.4490e-01, -1.1710e-01, 2.5417e-02, -1.3422e+00, + -1.9219e-01, -2.5468e-02, -9.7910e-03, 1.0712e-03, -1.9965e-01, + 2.1429e-01, 2.8356e-03, 1.3562e-01, -7.5370e-02, 1.5262e-01, + 7.8208e-02, 1.3468e-02, 7.6906e-03, -3.6844e-02, -4.3252e-02, + -4.5571e-02, 6.0231e-02, 2.0208e-02, -9.7585e-02, 1.3374e-01, + -3.9385e-03, 8.0897e-02, 1.5606e-01, -1.0525e-02, -3.8102e-02, + -1.0328e-01, 5.2117e-02, -2.4488e-02, 1.3929e-01, -5.5941e-02, + -3.7997e-02, -1.4443e-02, -8.2470e-02, -5.2119e-02, -7.9846e-02, + 2.0052e-02, 8.7459e-05, -8.3340e-02, -1.1757e-01, -4.5237e-02, + 1.9323e-02, 2.7198e-02, 1.7889e-01, 8.7077e-02, 1.5148e-02, + 1.2188e-01, -1.0065e-02, -2.4437e-02, -7.9199e-03, -1.5123e-02, + 1.5032e-01, 1.0752e-01, 2.0341e-01, 3.4366e-02, -9.1777e-02, + -1.4547e-01, 1.8320e-01, 4.5751e-02, 3.7170e-02, -8.5500e-02, + -7.2542e-02, -3.7958e-02, -1.6933e-01, -9.1045e-02, -4.4125e-02, + -7.2687e-04, -3.1537e-02, 1.5086e-02, -4.8495e-02, 6.5625e-02, + -1.6750e-02, 1.9904e-03, -1.1916e-02, -7.7741e-02, 3.0378e-02, + 1.3298e-01, 1.6665e-02, 7.6148e-02, -1.4184e-02, -1.0865e-01, + -2.5864e-01, -9.4924e-02, 5.2374e-02, 4.8062e-02, 4.8401e-02, + -6.1411e-02, 5.2295e-02, 1.0746e-01, -2.6086e-02, -1.8869e-01, + -4.6206e-01, -2.0475e-03, 3.6519e-02, -1.5038e-01, -4.3278e-03, + 4.2967e-02, 1.3432e-02, 4.5682e-02, -8.6488e-02, -1.7540e-01, + 5.3118e-02, -2.6750e-01, -1.5931e-01, 1.2383e-01, 8.0944e-03, + -6.6257e-02, 4.0884e-02, 2.4966e-02, 9.8009e-02, 2.3656e-01, + -6.1301e-03, 3.3331e-02, 9.9844e-02, 1.9059e-01, -1.0462e-01, + 1.1752e-02, -5.6101e-02, -3.5903e-02, 1.7924e-01, 1.7801e-01, + -1.6374e-01, -2.3782e-02, -5.2861e-02, 9.9118e-02, -1.7693e-02, + -3.4212e-02, -3.9081e-02, -2.0267e-01, 1.2076e-01, 9.5575e-02, + -1.0296e-02, -2.5224e-04, -4.2425e-03, -6.1769e-02, -5.4251e-02, + -3.3280e-02, 1.0036e-01, -1.9815e-01, 5.9549e-02, -4.9488e-02, + -9.9051e-02, 7.7852e-02, -1.2828e-01, 1.6435e-01, 1.2599e-02, + -6.9440e-02, -1.6187e-01, -3.3820e-03, 9.6772e-02, 5.1878e-02, + 6.2215e-02, 2.1757e-01, -6.2182e-02, -1.7840e-03, 7.6085e-02, + 1.4915e-01, -5.7066e-02, 4.5222e-02, 2.0680e-01, -1.4390e-01, + 1.7874e-03, -3.0383e-02, 1.8139e-01, 6.8907e-02, -2.5350e-03, + 4.4395e-02, 4.1111e-02, -1.0086e-02, 2.7673e-02, -2.1411e-01, + -7.8112e-02, -1.3029e-01, 9.8681e-02, -9.1540e-02, 6.2187e-02, + 1.4301e-02, -5.2033e-02, 4.9276e-02, 3.3699e-03, -7.4605e-02, + 7.3420e-02, 4.7442e-02, 1.0370e-01, 6.9103e-02, 2.2118e-03, + -7.5054e-02, 2.4609e-03, -2.0037e-01, 9.2002e-02, -1.2598e-01, + -4.0373e-02, 4.0524e-02, 7.1912e-02, 2.4932e-02, 1.0932e-02, + 1.6149e-03, -2.0728e-02, 1.1106e-01, 1.1121e-02, -1.1926e-01, + 1.0912e-02, -3.5671e-02, -9.7672e-03, 2.8253e-02, -1.8453e-01, + 3.7205e-02, 5.7847e-02, 1.4349e-02, -1.2725e-01, -1.2037e-02, + -6.4877e-02, -1.7167e-02, 4.8023e-02, -3.2393e-02, -2.7004e-02, + -6.4756e-03, 7.6627e-02, -3.2907e-02, 9.5477e-02, 4.4782e-02, + 1.4996e-01, 1.3177e-01, 7.9663e-02, 1.2557e-01, 5.1729e-02, + -6.4316e-02, 1.2403e-01, -2.0048e-01, 1.1972e-01, -3.4580e-02, + 5.9819e-02, 1.1675e-01, -2.3962e-02, 7.6439e-03, 1.1763e-01, + -5.1537e-02, 1.5785e-01, 3.0192e-02, 1.2142e-01, 2.1545e-01, + 1.5338e-01, -1.1547e-02, 4.5611e-02, 8.2466e-02, 5.1706e-02, + 1.4715e-01, -6.1603e-03, 3.3504e-02, 2.1085e-02, 3.6481e-02, + 1.1234e-01, 8.1627e-03, 7.1839e-03, -2.7252e-02, -8.4993e-02, + -1.4174e-01, -9.3428e-02, 7.1645e-02, 8.5555e-02, 1.3293e-01, + -1.9959e-02, -1.6840e-02, -9.3083e-02, 9.5655e-02, 3.6715e-02, + -5.1703e-02, 9.1998e-02, 7.1881e-02, -1.0141e-01, 1.2980e-01, + -1.0584e-01, 8.7971e-02, 1.0475e-01, -8.8393e-02, 6.7205e-02, + -5.2151e-02, -1.4961e-01, -5.4824e-02, -5.6715e-02, -6.3596e-02, + -1.0653e-01, 1.2907e-02, 1.4339e-01, 1.5099e-01, 6.3067e-03, + 1.6588e-02, 8.9463e-02, -1.1750e-01, 1.6891e-02, -1.7042e-01, + -1.6635e-01, 5.1580e-02, 1.3655e-01, -2.4035e-02, -1.0723e-01, + 7.4677e-02, 2.4940e-02, 6.3164e-02, -2.1078e-02, 8.4296e-02, + -5.9920e-02, -4.0778e-02, 6.1650e-02, 7.4657e-02, -1.2583e-01, + 1.5846e-01, 5.1025e-03, 1.1199e-01, 1.7501e-01, 9.1035e-02, + 1.1224e-02, -5.2327e-03, 1.1513e-02, 5.1251e-02, -1.2470e-01, + -1.6167e-02, -1.0953e-01, 5.0011e-02, -8.7080e-02, 1.6923e-01, + 2.2695e-02, 3.1640e-02, 1.3928e-01, 1.0300e-01, 2.1505e-01, + 2.4769e-01, 1.1580e-02, -3.5025e-02, 1.4856e-01, -3.9221e-03, + 5.0325e-02, -1.7961e-01, -1.2315e-02, 1.3724e-01, 1.0216e-01, + -9.3912e-02, -2.9979e-02, -1.6285e-01, 7.8192e-03, 8.5767e-02, + -3.3118e-02, -2.1102e-01, -1.0094e-01, -3.0506e-02, -2.1837e-02, + -1.9102e-01, 8.7712e-02, 1.1036e-01, 6.0651e-03, 7.0093e-02, + 1.0984e-01, 9.9566e-03, -7.1757e-03, 4.3886e-02, 5.9905e-02, + 3.3323e-02, -3.7092e+00, 5.5709e-02, 7.9547e-02, 5.5805e-03, + -1.4747e-01, 1.5289e-02, -5.2988e-02, 3.3662e-02, 1.3538e-01, + -2.1147e-01, 7.8032e-02, 6.7690e-02, -3.3204e-02, -1.3344e-01, + 1.4210e-01, 4.8270e-02, -2.6500e-02, 1.0287e-01, -5.5116e-02, + 1.6589e-01, 1.1236e-01, -4.1125e-02, -5.9105e-02, -2.1212e-01, + 3.8394e-02, 1.5368e-01, 4.8188e-02, -9.4478e-02, 2.1040e-02, + -9.0515e-02, 1.4378e-01, 1.5708e-01, -6.8028e-04, 4.4458e-02, + -7.1579e-03, 6.5651e-02, -1.2955e-02, -8.5481e-02, -4.0749e-02, + -8.3394e-02, 3.5046e-02, 1.2930e-02, -1.7214e-01, 9.5684e-02, + -4.2899e-02, -2.3204e-01, 1.1294e-01, -4.2825e-03, 1.6472e-01, + 1.0376e-02, 9.8266e-02, 1.3218e-01, 1.2411e-01, -3.2590e-02, + 3.2784e-02, -8.1404e-02, 6.8053e-02, -7.2287e-02, -9.7887e-02, + 3.1597e-02, -1.8775e-01, -1.8899e-01, 6.3650e-02, -6.3658e-02, + -5.6036e-02, -1.1804e-01, 3.8965e-02, 1.1923e-01, -3.0770e-02, + -6.4427e-03, -4.9890e-02, -9.9548e-02, 9.1358e-03, 1.5483e-01, + 3.1754e-02, 7.0219e-02, 8.9523e-02, -3.5334e-02, -1.6340e-01, + -2.0951e-02, 7.2451e-02, 1.5202e-01, -1.3042e-01, -1.3775e-01, + -7.0584e-02, 1.7459e-01, -1.6711e-03], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[-0.0193, 0.0156, -0.0011, ..., 0.0403, -0.0181, -0.0006], + [-0.0301, 0.0043, -0.0099, ..., -0.0052, 0.0136, -0.0069], + [-0.0016, 0.0183, 0.0016, ..., -0.0064, -0.0117, 0.0118], + ..., + [ 0.0070, 0.0163, -0.0119, ..., 0.0026, -0.0226, 0.0221], + [ 0.0004, 0.0120, 0.0207, ..., -0.0106, 0.0029, 0.0323], + [ 0.0079, -0.0005, 0.0047, ..., -0.0068, -0.0219, 0.0219]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-0.3269, -0.2380, -0.3926, ..., -0.2299, 0.2595, -0.2932], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0171, 0.0118, 0.0016, ..., -0.0016, 0.0133, -0.0409], + [-0.0224, -0.0010, -0.0217, ..., 0.0149, 0.0045, -0.0212], + [ 0.0286, 0.0206, -0.0153, ..., -0.0103, -0.0133, -0.0120], + ..., + [-0.0076, -0.0130, 0.0111, ..., 0.0085, -0.0125, 0.0113], + [-0.0247, -0.0079, 0.0172, ..., 0.0136, -0.0062, -0.0172], + [ 0.0397, -0.0172, -0.0138, ..., 0.0265, 0.0010, 0.0029]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-3.6469e-02, 1.6272e-01, -6.0242e-02, -9.7885e-03, -4.4312e-02, + 1.1780e-01, 9.4238e-02, -1.3342e-01, 3.6133e-02, 2.9022e-02, + 2.3518e-03, 5.4352e-02, -2.9572e-02, -6.2134e-02, 7.6782e-02, + 9.3460e-03, -1.0315e-01, 4.0710e-02, 3.5400e-02, -3.4863e-01, + 3.7628e-02, 2.0355e-02, 5.0323e-02, 5.9967e-02, 5.8136e-02, + 6.6589e-02, -3.6346e-02, -2.6657e-02, -1.1108e-01, 5.0079e-02, + 8.3801e-02, -1.4366e-02, -5.9387e-02, 1.3708e-01, 4.3335e-02, + -1.9272e-02, -1.2573e-01, -6.5674e-02, 4.9591e-02, 2.5781e-01, + 2.0385e-04, 2.4597e-02, -3.5156e-02, 2.2919e-02, -5.5237e-02, + 1.8005e-02, 1.1578e-01, -4.3335e-02, -7.9468e-02, -8.9050e-02, + -4.0131e-02, 5.1575e-02, -4.7569e-03, 1.1786e-01, -7.7698e-02, + 9.4681e-03, -1.0657e-01, -5.6641e-02, 5.4321e-02, -5.0873e-02, + -1.8738e-02, -1.2428e-02, -9.0759e-02, -8.9050e-02, -5.0110e-02, + 4.4586e-02, -5.6183e-02, -5.2124e-02, -5.9547e-03, -4.5197e-02, + 7.0312e-02, 1.6470e-03, -1.6479e-01, -2.2324e-02, 2.2812e-02, + -1.2695e-02, -2.7435e-02, 1.3855e-01, 3.5675e-02, -5.5351e-03, + 1.8173e-02, 2.0924e-03, -5.3986e-02, -8.4412e-02, -2.4002e-02, + -6.5918e-02, 1.0815e-01, -1.7395e-01, -1.6382e-01, 8.5640e-04, + -1.9653e-01, -1.1774e-01, 4.2114e-02, 8.8379e-02, 7.2815e-02, + 3.2101e-03, 1.5030e-02, 1.0992e-01, -5.3986e-02, 7.1533e-02, + 5.2582e-02, -1.2061e-01, -4.5990e-02, 1.1414e-01, -7.8918e-02, + -3.4332e-02, 9.1248e-02, -3.1128e-02, 4.1901e-02, -3.0273e-02, + 7.7393e-02, -3.4180e-02, -7.3914e-02, 1.8982e-02, 6.4583e-03, + 9.8450e-02, -2.0462e-02, 8.6426e-02, -8.2397e-02, -8.6975e-02, + 5.4932e-02, -1.2383e-02, -1.1955e-02, 2.8122e-02, -1.9211e-02, + -7.6660e-02, -3.1738e-02, -2.9907e-02, 2.9449e-02, 1.2463e-01, + -2.1194e-02, 3.9032e-02, -5.9479e-02, -8.0994e-02, -7.1533e-02, + 5.0446e-02, -7.1831e-03, -7.8613e-02, 6.6284e-02, 4.1626e-02, + -9.6619e-02, 3.5797e-02, -7.1838e-02, -2.2858e-02, -6.3354e-02, + 4.9255e-02, -5.7800e-02, -5.3650e-02, 1.2215e-02, 9.1003e-02, + 6.5002e-02, -8.4473e-02, 5.0323e-02, 3.8177e-02, -9.4055e-02, + 2.5436e-02, -7.3792e-02, 1.0272e-01, -1.3989e-01, -4.0100e-02, + -2.4506e-02, -3.8269e-02, -1.7441e-02, -8.9722e-02, -1.0199e-01, + 5.2490e-02, 1.0254e-01, -4.8920e-02, 4.8828e-02, 7.8354e-03, + 2.0706e-02, -4.4525e-02, 2.4658e-02, -1.4233e-01, -1.1957e-01, + -3.5522e-02, -1.8994e-01, -1.1090e-01, 8.0688e-02, 6.3293e-02, + -6.2744e-02, -1.0913e-01, -1.6809e-01, -5.3024e-03, 1.7517e-02, + -1.7670e-02, -9.7046e-02, -2.3340e-01, 1.5625e-02, -9.2840e-04, + -1.7593e-02, 3.4454e-02, 1.8631e-02, -5.7312e-02, 6.7017e-02, + 4.2633e-02, 7.8186e-02, -5.2277e-02, 3.5248e-02, -2.8793e-02, + 2.9421e-04, -3.2806e-02, 4.8004e-02, -8.1909e-02, 1.6434e-02, + -1.1469e-01, 1.3763e-02, -8.5510e-02, -1.5454e-01, 7.0435e-02, + 4.8462e-02, -5.6183e-02, -2.0340e-02, 8.4290e-02, 4.1321e-02, + 9.0393e-02, 5.2094e-02, -3.8727e-02, 2.0203e-01, 1.1884e-01, + -2.3590e-02, 1.0632e-01, 2.6199e-02, -1.1391e-02, -1.5572e-02, + 1.0620e-01, -6.2408e-02, 6.1615e-02, 3.4149e-02, 4.5410e-02, + -7.7393e-02, -9.2896e-02, 3.4943e-02, -1.6205e-02, 4.5502e-02, + -8.2947e-02, 2.9266e-02, -7.4120e-03, -6.8398e-03, 1.2091e-01, + 1.1719e-01, -3.0045e-02, -3.6316e-02, 3.4210e-02, -3.4821e-02, + -9.2834e-02, 5.9753e-02, -7.4524e-02, -4.0802e-02, 1.2146e-02, + -2.7390e-02, -4.1107e-02, -3.3539e-02, -6.5063e-02, 1.2199e-02, + 9.4971e-02, -8.0933e-02, 1.7262e-03, -2.5940e-02, 1.6602e-02, + -1.1609e-01, -5.7434e-02, -1.1743e-01, 5.5809e-03, -8.1421e-02, + -1.6357e-01, 2.0802e-04, 6.3354e-02, 2.3861e-03, 3.3966e-02, + 1.7627e-01, -1.3269e-01, 2.1301e-02, 8.4763e-03, 4.2053e-02, + -5.5695e-02, 1.4355e-01, -2.0874e-02, -8.5999e-02, -1.7105e-02, + 7.0618e-02, 2.7496e-02, -1.4026e-01, -2.2705e-02, -8.4534e-02, + 5.2612e-02, -1.0693e-01, 1.2201e-01, -4.8218e-03, 5.1270e-02, + -2.8778e-02, 4.6417e-02, -2.6276e-02, -1.0938e-01, -4.6814e-02, + 4.5929e-02, -1.0254e-02, 8.2214e-02, -5.8228e-02, 9.7656e-02, + 1.7578e-02, 1.2866e-01, 1.9958e-02, 1.3049e-01, -9.2224e-02, + -9.8572e-03, -1.0309e-01, -1.1932e-02, -3.8891e-03, 5.7220e-02, + 2.9099e-02, -5.8716e-02, 3.4912e-02, -4.6539e-02, 3.0609e-02, + -4.7028e-02, -6.7932e-02, 5.2795e-02, -6.7825e-03, 4.4159e-02, + -5.0171e-02, -7.4951e-02, 1.3069e-02, 4.0924e-02, 9.9463e-01, + -1.9760e-02, 7.8918e-02, -1.3257e-01, -4.0253e-02, -5.2856e-02, + -3.4088e-02, 9.1324e-03, -8.1406e-03, 1.1497e-02, 3.5767e-02, + -1.1243e-01, -7.6050e-02, 4.4586e-02, 6.6345e-02, -6.4880e-02, + 2.9888e-03, 9.5337e-02, 1.3599e-01, -1.0956e-01, 5.1697e-02, + 7.1045e-02, 3.2845e-03, 2.7924e-02, -5.4321e-02, -7.1411e-02, + 1.7236e-01, -6.8176e-02, -3.7659e-02, -2.7985e-02, 7.5607e-03, + -1.8616e-02, -8.5678e-03, -4.1016e-02, -2.7161e-02, 1.7371e-01, + -9.4604e-02, -2.1423e-02, -9.6497e-02, 1.0547e-01, -1.2134e-01, + 6.7139e-03, -1.1578e-01, -7.1167e-02, -9.4543e-02, -2.4414e-02, + 3.5763e-03, 2.0584e-02, 1.1566e-01, -2.4231e-02, 4.9744e-02, + -1.6876e-02, 7.9041e-02, -2.6382e-02, 1.2622e-01, -1.6693e-02, + 7.6355e-02, 1.0046e-01, 4.1771e-03, -2.0605e-01, 2.7054e-02, + 4.7913e-02, 1.8906e-02, -6.1096e-02, 6.4026e-02, -3.7659e-02, + -4.2145e-02, 1.6931e-01, 8.7646e-02, -7.4890e-02, 3.7079e-02, + -1.6769e-02, -1.0052e-01, -1.1389e-01, 1.5393e-01, -1.6556e-02, + -8.6975e-02, -6.1737e-02, -1.2402e-01, -1.3084e-02, -2.8839e-03, + -1.0582e-02, -4.9530e-02, 5.4741e-03, 4.7333e-02, -3.1342e-02, + 1.1877e-01, 5.7068e-02, 6.1859e-02, 6.5041e-03, 1.8188e-01, + 6.9092e-01, -2.3331e-02, -5.5878e-02, 3.7659e-02, -1.0307e-02, + -3.2410e-02, 8.8745e-02, -8.8867e-02, -1.4648e-01, -2.7161e-02, + 8.2947e-02, 1.4282e-01, 9.1370e-02, -1.0574e-02, 4.1107e-02, + 1.4062e-01, 1.1792e-01, 1.1810e-02, -9.7290e-02, 5.8228e-02, + 1.3892e-01, -9.9976e-02, 2.5116e-02, 5.3009e-02, -3.7659e-02, + 9.7656e-02, -2.5070e-02, 3.4424e-02, -1.7197e-02, 3.8849e-02, + -1.0315e-01, -8.4961e-02, 5.0842e-02, 4.2175e-02, 5.7770e-02, + -1.8555e-02, -4.3579e-02, -5.0446e-02, 6.9946e-02, -1.0941e-02, + 4.6143e-02, -3.8208e-02, 9.8877e-02, 4.0833e-02, 9.1492e-02, + 2.1210e-02, -7.8125e-02, -7.5378e-02, -1.0114e-01, -4.9042e-02, + 6.9153e-02, 9.6512e-03, 1.8164e-01, 1.7731e-02, 4.1565e-02, + -2.4463e-01, 1.7380e-02, -1.4294e-01, -3.6163e-02, 3.0518e-04, + -1.7105e-02, 1.0443e-01, 6.5308e-03, 8.9905e-02, -6.4331e-02, + -2.5452e-02, -1.5427e-02, -1.6614e-01, -7.2937e-02, 8.9844e-02, + -1.0754e-01, 1.5839e-02, 7.0923e-02, 5.2460e-02, -1.1395e-01, + 5.6427e-02, -3.1311e-02, 9.8450e-02, 1.9730e-02, 3.8934e-04, + 1.5717e-02, -1.4062e-01, -5.0781e-02, 3.6804e-02, -5.0934e-02, + 4.3274e-02, 6.4514e-02, -6.1340e-02, -4.4342e-02, 7.7454e-02, + 3.7567e-02, 2.5925e-02, 1.0706e-01, -5.6976e-02, 1.3954e-02, + -3.6407e-02, -6.9824e-02, -9.9304e-02, 1.1206e-03, -3.4389e-03, + -4.3121e-02, -9.1614e-02, -9.4543e-02, 1.6876e-02, -6.6284e-02, + 9.6497e-02, -7.1594e-02, -4.3732e-02, 9.7839e-02, -1.1487e-01, + -8.4167e-02, -4.0497e-02, 1.3863e-02, 5.8670e-03, 1.7347e-03, + 7.2937e-02, -8.8684e-02, -3.0785e-03, 2.5955e-02, 3.1860e-02, + -9.8572e-03, 7.8369e-02, 3.5614e-02, -4.3091e-02, 4.0100e-02, + 3.4943e-02, 3.1525e-02, 3.8330e-02, -2.8000e-02, -2.6703e-02, + 1.0162e-01, 7.0801e-02, 1.5979e-01, -1.8738e-02, -5.5176e-02, + -6.5369e-02, -5.6244e-02, 1.6575e-03, -1.1670e-01, 6.4148e-02, + 5.0293e-02, 6.1066e-02, -1.0155e-02, -1.2354e-01, 2.2614e-02, + 4.6417e-02, -4.5593e-02, -9.8328e-02, 2.4460e-02, -2.4521e-02, + 3.0594e-02, 4.7913e-02, -5.8441e-02, -1.0048e-02, -7.2823e-03, + 9.5459e-02, -6.4735e-03, -5.1453e-02, 4.0863e-02, 1.2039e-02, + 2.3270e-02, 2.8336e-02, 1.0883e-01, -1.7624e-02, -1.7532e-02, + 3.1891e-02, -7.6904e-02, 4.4441e-03, -9.7046e-02, -3.0914e-02, + 1.3321e-02, -4.2572e-02, 6.9458e-02, 2.3178e-02, 4.8828e-02, + 4.7943e-02, 8.3466e-03, -5.9906e-02, -5.3894e-02, -6.7444e-02, + -7.5867e-02, -5.9174e-02, -1.2718e-02, 2.3300e-02, 4.2236e-02, + 1.3599e-01, 9.7885e-03, -1.0962e-01, 3.4668e-02, -4.9713e-02, + 3.2783e-05, 4.0741e-02, 5.3436e-02, 9.0210e-02, 8.6914e-02, + 7.8857e-02, 1.4172e-01, 2.4078e-02, 1.0687e-01, -6.6589e-02, + -1.5771e-01, -8.2703e-02, 9.8694e-02, -4.8904e-03, -4.0649e-02, + -1.0876e-01, -2.2232e-02, 5.2094e-02, -4.8370e-02, 7.5439e-02, + 1.2535e-02, -6.5674e-02, 2.8503e-02, -2.5620e-02, 1.3664e-02, + 6.7993e-02, -1.0150e-01, -7.6599e-02, 1.1528e-02, 3.7231e-02, + -8.5205e-02, 2.2018e-02, -5.7281e-02, -1.9135e-02, 4.3518e-02, + 8.1604e-02, -1.0181e-01, -3.8147e-02, 1.2436e-02, -8.5632e-02, + 6.4636e-02, -4.8584e-02, 6.6772e-02, -5.1300e-02, 2.3651e-02, + 1.0858e-01, -1.1981e-01, 3.7632e-03, 5.8533e-02, -3.1342e-02, + 1.3403e-01, -9.2773e-02, 9.3323e-02, -2.6443e-02, 8.2092e-02, + -5.4688e-02, 1.0303e-01, -7.1960e-02, -1.4185e-01, -1.2537e-01, + -1.2802e-02, -1.9669e-02, 6.4880e-02, 4.2358e-02, -4.2839e-03, + 1.2189e-01, -6.2828e-03, 3.8605e-02, -1.5038e-02, -9.3140e-02, + 4.7028e-02, 6.0883e-02, -4.2938e-02, 5.2071e-04, 2.1610e-03, + 8.2397e-02, 2.5977e-01, -3.5496e-03, 9.6497e-02, 8.2703e-02, + 5.9357e-02, -1.5783e-03, -8.3984e-02, 3.4790e-02, -5.7556e-02, + -4.6875e-02, 3.5143e-04, 1.0303e-01, 1.6235e-02, -3.5797e-02, + 7.6355e-02, 2.5269e-02, 4.0283e-02, 3.3722e-02, 5.1483e-02, + 8.1909e-02, 4.9896e-02, -6.7932e-02, -1.2335e-01, -6.5613e-02, + 1.2131e-02, -2.9205e-02, -9.3140e-02, -2.8885e-02, 2.0538e-02, + 9.0698e-02, -5.4443e-02, 8.4900e-02, 2.9633e-02, 5.1758e-02, + 1.3763e-02, 6.9519e-02, 6.0608e-02, 1.0583e-01, -2.4429e-02, + -7.5073e-02, -3.4241e-02, 9.9915e-02, -6.2347e-02, 4.7493e-03, + 3.2978e-03, 2.6535e-02, 1.1444e-04, -2.0798e-02, 7.9834e-02, + -4.9347e-02, -1.0492e-01, 1.5045e-02, -1.1493e-01, -2.9953e-02, + -1.4519e-02, -2.4918e-02, 3.1372e-02, 6.4209e-02, 1.0780e-02, + -5.0507e-02, -1.4931e-02, -1.0931e-01, 7.7019e-03, -1.9385e-01, + 4.1168e-02, -5.6366e-02, -7.5562e-02, -6.1890e-02, -1.1023e-01, + 2.9526e-02, 9.0515e-02, -3.5744e-03, 4.0466e-02, 4.5441e-02, + 3.4576e-02, 5.6793e-02, -1.0602e-01, -1.1299e-02, 5.1208e-02, + -4.1046e-02, 9.7198e-03, -6.1462e-02, -1.0052e-01, 8.4076e-03, + -9.4604e-02, 9.8953e-03, 6.1554e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([2.2836, 2.2623, 2.2728, 2.2751, 2.2677, 2.4896, 2.4848, 2.3706, 2.5277, + 2.2358, 2.3212, 2.3091, 2.3751, 2.3409, 2.3887, 2.2233, 2.3059, 2.3760, + 2.3349, 2.7988, 2.3484, 2.3604, 2.3025, 2.4139, 2.2065, 2.5094, 2.4309, + 2.4903, 2.1988, 2.2597, 2.3186, 2.3134, 2.3185, 2.6018, 2.2067, 2.2691, + 2.3309, 2.2402, 2.2445, 2.7682, 2.2321, 2.3523, 2.3791, 2.2468, 2.3558, + 2.3014, 2.3595, 2.3211, 2.3757, 2.1141, 2.2303, 2.4424, 2.4205, 2.3217, + 2.3337, 2.3350, 2.3622, 2.3076, 2.3821, 2.2884, 2.4378, 2.3676, 2.3492, + 2.2260, 2.3221, 2.2602, 2.3660, 2.3384, 2.4014, 2.7936, 2.4196, 2.3274, + 2.3671, 2.5887, 2.2795, 2.3208, 2.2872, 2.2780, 2.1992, 2.4337, 2.2422, + 2.2682, 2.2171, 2.3948, 2.2670, 2.2343, 2.2981, 2.2693, 2.2724, 2.2160, + 2.3139, 2.3121, 2.3384, 2.4798, 2.3535, 2.2585, 2.2772, 2.3278, 2.3495, + 2.3174, 2.4153, 2.5859, 2.3195, 2.4833, 2.2640, 2.2159, 2.3548, 2.2305, + 2.3506, 2.3109, 2.2594, 2.3875, 2.4088, 2.4293, 2.4857, 2.2573, 2.3575, + 2.3645, 2.4322, 2.2848, 2.3235, 2.3897, 2.2430, 2.3598, 2.3608, 2.1504, + 2.5151, 2.5464, 2.3191, 2.3495, 2.1983, 2.2918, 2.4158, 2.3904, 2.4342, + 2.4049, 2.3032, 2.5024, 2.2924, 2.5428, 2.4114, 2.3645, 2.3113, 2.3470, + 2.2809, 2.2530, 2.2246, 2.3749, 2.3138, 2.5595, 2.3965, 2.3072, 2.4053, + 2.4625, 2.3788, 2.6980, 2.3840, 2.3440, 2.2617, 2.3744, 2.1746, 2.3023, + 2.2086, 2.4053, 2.4884, 2.2782, 2.2567, 2.5410, 2.3193, 2.3158, 2.3928, + 2.4509, 2.0716, 2.5628, 2.2551, 2.4581, 2.3956, 2.2217, 2.2878, 2.3877, + 2.3486, 2.2614, 2.3617, 2.2922, 2.2638, 2.4065, 2.2689, 1.0988, 2.3420, + 2.3585, 2.2917, 2.3827, 2.3319, 2.3020, 2.3903, 2.2736, 2.2319, 2.3806, + 1.3342, 2.3759, 2.3139, 2.2133, 2.2983, 2.4403, 2.3521, 2.4802, 2.5205, + 2.4101, 2.4336, 2.3705, 2.3335, 2.3805, 2.5011, 2.1977, 2.2888, 2.4008, + 2.2075, 2.4126, 2.3203, 2.4452, 2.4104, 2.2892, 2.4073, 2.3455, 2.3511, + 2.3958, 2.4004, 2.3555, 2.2310, 2.3726, 2.2519, 2.2059, 2.2110, 2.2125, + 2.1648, 2.4104, 2.3876, 2.2703, 2.3979, 2.2819, 2.3374, 2.3302, 2.2354, + 2.3461, 2.2734, 2.3462, 2.5224, 2.4833, 2.3291, 2.3517, 2.2777, 2.3085, + 2.3876, 2.3782, 2.3474, 2.3451, 2.4248, 2.3716, 2.6404, 2.3153, 2.3406, + 2.3860, 2.3607, 2.3962, 2.3189, 2.2672, 2.3711, 2.3791, 2.2194, 2.2803, + 2.4083, 2.2761, 2.4173, 2.2845, 2.3444, 2.5596, 2.4811, 2.4737, 2.4499, + 2.4351, 2.3279, 2.7294, 2.2321, 2.3936, 2.2265, 2.3656, 2.2984, 2.2823, + 2.5139, 2.2303, 2.2668, 2.2091, 2.2940, 2.3453, 2.3100, 2.5271, 2.5363, + 2.3898, 2.2919, 2.4632, 2.3279, 2.5609, 2.3796, 2.3439, 2.2990, 2.1915, + 2.4004, 2.2724, 2.3014, 2.3202, 2.2661, 2.4051, 2.3114, 2.4008, 2.2553, + 2.4169, 2.3254, 2.3748, 2.4071, 2.4148, 2.3257, 2.2542, 2.2565, 2.3255, + 1.0368, 2.4136, 2.4344, 2.2713, 2.3418, 2.3693, 2.3610, 2.3290, 2.2906, + 2.3320, 2.4559, 2.2263, 2.3971, 2.2848, 2.2835, 2.3296, 2.3135, 2.3973, + 2.4620, 2.3723, 2.3816, 2.4425, 2.4960, 2.3998, 2.3360, 2.3177, 2.1258, + 2.3319, 2.4550, 2.2678, 2.4137, 2.3452, 2.5231, 2.4271, 2.2389, 2.2411, + 2.3888, 2.3768, 2.4222, 2.3935, 2.3259, 2.3471, 2.3140, 2.4708, 2.3969, + 2.3159, 2.4021, 2.5023, 2.3574, 2.2141, 2.4547, 2.3712, 2.2649, 2.3120, + 2.3954, 2.2454, 2.3630, 2.3928, 2.4318, 2.3398, 2.3917, 2.4447, 2.2607, + 2.3408, 2.3916, 2.3773, 2.3499, 2.4071, 2.3030, 2.4019, 2.2296, 2.4911, + 2.3300, 2.3643, 2.4153, 2.2525, 2.4206, 2.2427, 2.4401, 2.3681, 2.3906, + 2.4328, 2.2356, 2.2297, 2.3566, 2.2836, 2.2859, 2.5026, 2.3523, 2.4348, + 2.6588, 3.7533, 2.3396, 2.2670, 2.2848, 2.4338, 2.3247, 2.2380, 2.2849, + 2.3591, 2.4750, 2.3341, 2.2376, 2.3901, 2.6482, 2.2936, 2.4247, 2.4099, + 2.4332, 2.4702, 2.4999, 2.5136, 2.2909, 2.2548, 2.3399, 2.4379, 2.3890, + 2.1797, 2.2449, 2.4876, 2.4356, 2.2469, 2.3131, 2.5125, 2.3290, 2.3605, + 2.3724, 2.3760, 2.4266, 2.4542, 2.3139, 2.3399, 2.3044, 2.3643, 2.3527, + 2.2571, 2.3809, 2.3492, 2.3256, 2.3224, 2.2221, 2.3733, 2.3437, 2.1776, + 2.3822, 2.3127, 2.3458, 2.4210, 2.4154, 2.3127, 2.5117, 2.3292, 2.3855, + 2.3198, 2.4858, 2.2322, 2.3392, 2.1891, 2.4567, 2.4760, 2.2336, 2.4192, + 2.2877, 2.2794, 2.3042, 2.2707, 2.3126, 2.2663, 2.4194, 2.3827, 2.2848, + 2.2669, 2.3965, 2.3590, 2.2713, 2.3002, 2.3128, 2.3614, 2.3517, 2.3291, + 2.5153, 2.2987, 2.3548, 2.3768, 2.3937, 2.2768, 2.4841, 2.3234, 2.1443, + 2.2820, 2.2877, 2.2695, 2.4717, 2.3183, 2.2493, 2.6564, 2.2307, 2.3746, + 2.3745, 2.3673, 2.2790, 2.3275, 2.3607, 2.3602, 2.3226, 2.5189, 2.3797, + 2.3876, 2.2800, 2.4039, 2.2463, 2.3176, 2.3052, 2.4909, 2.5803, 2.4538, + 2.3132, 2.3479, 2.4377, 2.3054, 2.5416, 2.4145, 2.2907, 2.3398, 2.4638, + 2.3699, 2.3819, 2.2982, 2.4746, 2.5282, 2.3545, 2.3307, 2.3648, 2.3536, + 2.3116, 2.3079, 2.3169, 2.6240, 2.2975, 2.3685, 2.4311, 2.3693, 2.4842, + 2.3711, 2.3410, 2.5237, 2.3753, 2.3804, 2.1967, 2.4785, 2.3339, 2.3392, + 2.4601, 2.2095, 2.4320, 2.2722, 2.2876, 2.3902, 2.3450, 2.3190, 2.3629, + 2.3394, 2.3433, 2.3516, 2.2867, 2.3276, 2.3313, 2.3305, 2.3285, 2.5351, + 2.4824, 2.2602, 2.3945, 2.3668, 2.3317, 2.1739, 2.2305, 2.4108, 2.3467, + 2.2079, 2.4747, 2.2943, 2.3790, 2.6729, 2.5252, 2.3475, 2.3181, 2.2528, + 2.2801, 2.3034, 2.2967, 2.6657, 2.4441, 2.3536, 2.4201, 2.3460, 2.4714, + 2.2284, 2.4259, 2.3764, 2.4271, 2.5601, 2.4350, 2.3774, 2.3004, 2.3895, + 2.4669, 2.4508, 2.3371, 2.3227, 2.5032, 2.3196, 2.3379, 2.3778, 2.3761, + 2.1820, 2.3314, 2.1958, 2.2008, 2.4098, 2.5979, 2.2799, 2.3461, 2.4614, + 2.3359, 2.3336, 2.6203, 2.4267, 2.2657, 2.3032, 2.3602, 2.2581, 2.4675, + 2.2453, 2.3804, 2.2823, 2.1461, 2.2759, 2.5891, 2.2736, 2.2670, 2.2639, + 2.2100, 2.3349, 2.5720, 2.3157, 2.2162, 2.3284, 2.3788, 2.2535, 2.2463, + 2.3431, 2.4069, 2.3624, 2.4723, 2.2752, 2.4635, 2.9034, 2.2371, 2.4090, + 2.2496, 2.3304, 2.3411, 2.2727, 2.4774, 2.3166, 2.3610, 2.3738, 2.3590, + 2.3766, 2.2283, 2.3738, 2.3774, 2.3004, 2.4230, 2.3889, 2.3237, 2.2233, + 2.4146, 2.3798, 2.4000, 2.4473, 2.4295, 2.2930, 2.1776, 2.4433, 2.2065, + 2.3386, 2.4510, 2.4059, 2.2630, 2.3705, 2.3392, 2.3462, 2.4111, 2.3758, + 2.4813, 2.3263, 2.3184, 2.4310, 2.4148, 2.3368, 2.3599, 2.3383, 2.2286, + 2.3814, 2.3993, 2.3483, 2.3905, 2.3331, 2.3333, 2.2417, 2.4162, 2.2706, + 2.3414, 2.3442, 2.2891, 2.6559, 2.2701, 2.2298, 2.2341, 2.2531, 2.4576, + 2.3354, 2.2710, 2.1691, 2.3822, 2.3545, 2.3533, 2.2163, 2.4231, 2.4164, + 2.2920, 2.3275, 2.4484, 2.2593, 2.3305, 2.3374, 2.3359, 2.3922, 2.3363, + 2.3097, 2.3412, 2.3888], device='cuda:1', requires_grad=True)Parameter containing: +tensor([ 8.6604e-02, -2.9286e-01, 1.9763e-01, -1.2163e-01, -3.8894e-01, + -2.1842e-01, -1.2438e-01, 1.4834e-01, 6.4301e-02, -7.7811e-02, + -1.3064e-01, -8.3709e-02, 1.7816e-01, -2.4458e-01, 5.9149e-02, + -2.5113e-02, 8.0249e-02, -7.7466e-02, -3.5921e-01, -3.3318e-01, + -2.8440e-01, 5.6322e-02, 2.3694e-01, -3.0856e-01, 1.0845e-01, + 1.2182e-01, -2.4052e-01, 1.5255e-01, 3.2016e-02, -7.5441e-02, + 4.5930e-02, 7.0053e-02, 1.4111e-01, -4.6860e-01, 8.0392e-02, + 1.4536e-01, -9.4315e-02, 3.2234e-02, 2.7123e-01, -5.3386e-01, + -1.3102e-01, 8.6262e-02, 1.8063e-01, -4.6477e-02, -3.9122e-02, + -2.0202e-01, -2.6970e-01, -7.5590e-02, 1.5426e-01, 1.8237e-01, + -1.1917e-01, -3.6866e-01, -1.2210e-01, -1.5103e-02, -1.8397e-01, + 5.8603e-01, 1.4954e-01, -4.9178e-03, -4.2942e-02, 1.1036e-01, + 3.8296e-04, 3.2313e-01, 1.6136e-01, -1.1096e-02, 2.0610e-01, + 3.6640e-02, -2.3877e-02, -5.1362e-02, -4.5582e-01, -8.1244e-01, + -8.7015e-03, -2.3145e-01, 1.4538e-01, -4.8757e-01, -9.1498e-02, + -2.2466e-01, 2.7441e-01, -2.5719e-01, -1.5121e-01, 4.0123e-01, + 1.6970e-01, -1.8025e-01, 2.7369e-03, -5.2702e-02, -2.5943e-01, + 1.9232e-01, 2.6753e-04, 1.2177e-01, 2.5853e-01, -1.5520e-01, + 1.1621e-01, -2.7038e-02, -1.6476e-01, 1.5174e-01, -5.6310e-02, + -6.6330e-02, -2.3552e-01, -1.8034e-01, -7.3568e-02, 1.8432e-01, + -3.8841e-01, -2.1282e-01, -9.6584e-02, -2.2054e-01, 2.3119e-01, + 7.7053e-02, -5.6909e-02, -1.3414e-01, 8.0427e-02, 2.1012e-01, + 2.4924e-01, 1.1686e-01, -1.3254e-01, 5.9697e-02, -1.7765e-01, + -1.6437e-01, 3.3616e-01, 8.5083e-03, -2.5418e-02, 2.6027e-01, + -1.6043e-01, 8.2424e-02, -1.3150e-01, -5.3904e-02, 1.0872e-01, + 1.5643e-01, 1.3161e-01, 5.1541e-01, -1.5884e-01, -1.8408e-02, + 1.2766e-01, -8.4682e-02, 3.3091e-01, 1.6210e-01, 2.0954e-01, + 1.3627e-01, -1.8098e-01, -2.8791e-01, 1.5007e-01, -4.2789e-01, + 1.8015e-01, 1.0072e-01, 2.2316e-01, 1.5218e-01, 1.0389e-01, + -1.3575e-01, 1.2173e-01, -3.1166e-02, -6.5930e-02, -2.7193e-01, + -7.4922e-02, 1.2258e-01, 2.0601e-01, -3.3073e-01, -2.0484e-01, + 6.6838e-01, 1.8386e-01, -4.5000e-01, 7.0386e-02, -1.9580e-01, + 1.2700e-01, -8.3763e-02, 4.0264e-03, -5.8593e-02, 2.2412e-01, + 1.7506e-04, 1.8127e-01, 7.3059e-02, 1.8272e-01, -7.2565e-02, + 6.6788e-02, -2.5547e-01, -2.3021e-01, 6.6056e-01, 7.9189e-03, + 6.3060e-02, 2.2812e-01, 5.6355e-02, -1.2058e-01, 5.2562e-02, + -4.2476e-01, 2.2737e-02, -2.9150e-02, 8.7327e-02, -1.3205e-01, + 5.9143e-01, 3.7196e-02, 1.3226e-01, 1.4421e-01, -2.7715e-02, + -4.6107e-01, 1.6758e-02, 1.8647e-01, 6.3556e-02, -3.9387e-02, + 4.3825e-02, 6.2616e-02, -1.9770e-01, -4.2564e-01, 3.2139e-01, + 1.8278e-02, 2.9093e-02, 1.3184e-01, -1.8062e-01, 3.7262e-02, + 5.3266e-02, 2.6615e-01, -8.8052e-02, -4.1996e-02, -3.9655e-01, + 1.2197e-01, -7.8418e-02, -5.1390e-01, -7.3780e-02, 1.8691e-01, + -7.2425e-02, -5.5631e-02, -1.4905e-01, -2.6188e-01, 3.4893e-02, + 3.2901e-01, -1.4541e-01, -4.0933e-01, -1.2446e-01, 4.5600e-02, + 6.2080e-02, -2.2978e-01, 1.6788e-02, -1.0107e-01, 1.8279e-01, + -7.4715e-02, 1.4252e-01, 1.1494e-01, 1.2269e-01, -2.4501e-01, + -2.5672e-01, 2.9405e-01, 9.4859e-02, -3.2469e-03, -1.3721e-01, + -1.6615e-01, 2.3003e-01, 2.1743e-01, -3.5526e-01, -1.8183e-01, + 5.2396e-03, -3.8641e-01, 2.7500e-02, -2.1637e-01, 2.3685e-01, + -1.1876e-01, 7.4968e-02, 3.1369e-01, 6.9360e-02, -1.0279e-01, + -2.0016e-01, 3.0319e-01, -4.0203e-01, -4.8866e-01, -3.3533e-01, + -4.9880e-02, -1.4934e-01, 1.5540e-01, 3.2154e-02, 7.3181e-02, + 6.0271e-02, 1.6326e-01, 6.9507e-03, 1.4214e-01, -6.3119e-02, + -2.9987e-01, 7.6350e-02, 3.7965e-02, 1.5102e-03, 1.6799e-01, + -2.3439e-01, -2.9438e-01, 2.4812e-01, -1.9865e-01, -8.4152e-02, + 5.5912e-03, -8.7288e-01, 1.2965e-01, 3.3379e-01, 2.3957e-01, + -1.5091e-02, -1.2882e-01, -8.1023e-02, -1.2267e-01, 1.1010e-01, + -2.1774e-01, 3.1341e-02, 1.2947e-01, 1.6552e-03, 5.0242e-03, + -4.7059e-01, -1.4026e-01, -6.4873e-02, 2.3260e-01, -2.9756e-01, + -3.5772e-01, -3.4365e-01, -3.4945e-01, -1.9062e-01, 4.4129e-01, + 8.3628e-02, -7.4316e-02, 1.8464e-01, -1.5011e-01, -1.9917e-01, + 7.2023e-02, -1.5736e-01, 1.1195e-01, -1.0936e-03, 2.1782e-02, + 1.7796e-01, 2.2404e-01, -7.0521e-02, -1.3729e-01, 9.3621e-02, + -4.7864e-01, -2.9099e-02, 9.3240e-02, -1.6473e-01, -1.7154e-01, + -1.6437e-01, -3.6914e-01, 2.9099e-01, 1.8210e-01, 9.8001e-02, + -1.9131e-02, -6.1023e-02, -2.6954e-02, 2.3269e-02, -1.0748e-01, + 2.3885e-02, -3.0717e-01, 7.0289e-03, -2.9415e-01, 7.8834e-02, + 2.2555e-01, -1.4750e-01, -4.4716e-01, 3.1579e-01, -2.2415e-01, + 2.3326e-01, -3.4718e-01, -8.0136e-02, -4.9056e-02, 1.8268e-01, + 1.2392e-01, -9.0308e-02, -1.1581e-01, 3.6533e-02, 1.3277e-01, + 1.6580e-01, -2.9610e-01, 1.5124e-02, 3.1983e-02, -9.6834e-02, + 4.0151e-02, -2.6227e-02, 6.1991e-02, -3.5487e-01, 1.0990e-01, + -3.1649e-02, -6.0119e-02, 1.9958e-01, 4.1921e-02, -4.4497e-02, + -1.2652e-01, 1.2113e-01, -3.8615e-02, -5.6424e-02, -1.0027e-01, + 2.4490e-01, 5.1361e-02, 3.0297e-01, -9.0036e-02, -1.8055e-01, + -1.3455e-01, -2.0498e-01, -2.2928e-01, 1.1047e-01, 6.0962e-02, + -4.5831e-01, -2.5677e-01, 3.0335e-01, 1.6979e-01, 5.4195e-03, + 1.8430e-01, -4.2833e-01, 1.5375e-01, 7.8641e-02, -2.6052e-01, + 4.1466e-03, -1.3320e-01, 5.0047e-02, -1.3446e-01, -5.9763e-02, + -2.1034e-01, 1.8951e-01, -1.7249e-01, -2.8103e-01, -5.9848e-02, + 8.7397e-02, 3.8154e-02, 1.6206e-02, -9.0562e-02, 3.1392e-01, + -2.5056e-02, -4.6812e-01, -1.3958e-01, 7.6124e-02, 1.3683e-01, + -1.4261e+00, 2.3469e-02, -4.5264e-02, 8.6404e-02, 1.7327e-01, + 9.3496e-02, -6.0595e-02, 8.4582e-02, 2.0683e-01, -2.6103e-02, + 1.3176e-01, -2.3246e-01, 6.2243e-02, -2.7171e-01, 3.5642e-02, + -4.2095e-01, -2.5752e-01, -1.3984e-01, -1.1365e-01, -7.5260e-02, + -8.9288e-02, -5.2441e-03, 1.1592e-01, -7.2560e-02, -8.0440e-02, + -1.6767e-01, 1.1599e-01, 5.8748e-02, -3.3971e-01, -2.0054e-01, + 5.3322e-02, 2.9757e-01, 3.9025e-01, -1.4760e-01, -1.7380e-01, + -5.3329e-02, -2.1588e-02, -1.7357e-01, -1.0301e-01, -1.2302e-01, + -1.8281e-01, -1.6062e-01, -2.5224e-01, 4.3526e-02, -2.0653e-01, + -1.2789e-01, -4.3447e-02, -1.1119e-01, 1.6140e-01, -1.3325e-01, + 1.7198e-01, -1.1098e-01, 9.2506e-02, -9.1265e-02, -4.8381e-02, + 1.0725e-01, 2.3545e-02, -1.3973e-01, 1.8025e-01, -2.7206e-01, + -2.6945e-01, -4.6276e-02, -5.3086e-01, -2.1206e-03, -5.7141e-03, + 2.9229e-02, 4.3049e-04, 3.9786e-01, 1.9325e-02, -1.4418e-01, + 3.4156e-02, -3.8111e-02, -6.2930e-03, -7.2592e-02, 1.1503e-01, + -1.9450e-01, -7.8893e-02, -4.4225e-02, -1.5106e-01, -1.5963e-01, + -1.8364e-01, 1.1715e-01, -9.9646e-02, -3.7290e-02, -3.4496e-02, + 1.2110e-01, -4.7361e-03, 7.1833e-02, -2.6530e-02, -1.9589e-01, + 2.0624e-01, -1.1726e-01, -6.6253e-02, 1.3797e-01, 9.6431e-02, + -3.9810e-01, 1.4199e-01, -1.7143e-01, -1.1868e-01, -2.3157e-01, + 2.4837e-01, 3.2089e-01, -2.0151e-01, -1.6186e-01, 5.8250e-01, + 2.3087e-02, -2.6994e-02, -7.5188e-02, 1.6035e-01, -9.8618e-02, + -6.2415e-02, -2.8756e-02, 1.6325e-01, -2.2189e-01, 1.2451e-01, + -3.8018e-02, 1.2878e-01, -6.3290e-02, -5.8006e-02, -1.2467e-01, + -3.6006e-01, 1.5278e-01, -1.8236e-01, 3.0076e-01, -2.6767e-01, + 2.2054e-02, 1.0192e-02, 4.0691e-02, 3.1209e-01, 3.8954e-01, + -3.6524e-02, -3.8531e-01, -4.2814e-01, -1.7692e-01, 1.1740e-01, + 7.4477e-02, 2.0804e-02, 2.5254e-01, -1.4224e-01, -1.9235e-01, + 3.5380e-01, 1.1057e-01, -1.0711e-01, -1.6409e-01, -1.2642e-01, + -1.1363e-01, -7.9586e-01, 8.1844e-02, 1.3153e-01, -4.1466e-02, + 1.7913e-01, -3.3507e-01, 3.3697e-03, 6.7601e-02, -2.2027e-01, + -6.6688e-02, -1.2807e-02, 1.7038e-01, -3.2307e-01, 2.0768e-01, + 2.2008e-01, -9.4049e-02, -1.8320e-01, -1.4369e-01, 2.4456e-01, + 2.0687e-01, -2.6573e-01, -1.9569e-01, 2.2473e-01, 1.6415e-01, + -3.9568e-02, 8.5480e-02, -3.2554e-02, -4.2158e-02, -9.3549e-02, + -3.7789e-02, -1.6005e-01, 1.0470e-01, -3.8674e-01, -9.9921e-02, + 2.2348e-01, 2.8183e-01, -1.1313e-01, 1.7603e-01, -6.7394e-02, + 1.0005e-01, 1.6291e-01, 7.7057e-03, -1.8000e-01, 1.7484e-01, + -7.1115e-02, -3.3143e-02, -6.5741e-01, -2.4860e-01, 2.0984e-01, + -2.1012e-01, -1.2822e-01, -2.5784e-01, -1.0285e-02, 1.4045e-01, + 3.4105e-01, 5.2854e-01, -5.3262e-02, -2.7384e-01, 1.2843e-01, + -2.7380e-02, 1.8670e-02, -1.9795e-01, 2.8344e-01, -5.1208e-02, + 4.4121e-01, 3.3801e-03, 3.1375e-01, 9.6573e-02, -1.9746e-01, + -2.7762e-01, 3.9339e-01, 7.2044e-02, -8.3836e-03, -3.9421e-01, + -8.8247e-02, 9.3472e-02, -1.6321e-01, -3.0705e-02, -1.7254e-01, + -3.9066e-02, 5.7862e-02, -3.4114e-02, 2.1111e-01, -4.5305e-01, + 1.8462e-01, -8.1985e-02, -1.9718e-01, -2.1379e-02, 7.6429e-02, + -4.3138e-02, -2.3068e-01, -1.6621e-02, -3.2513e-01, 5.3912e-02, + -1.1320e-01, 3.4280e-01, 2.6733e-02, -2.1735e-01, -2.3797e-01, + 1.5833e-01, -1.4166e-02, 3.5680e-01, 1.5322e-01, -8.9745e-02, + -1.9738e-01, 1.4938e-01, 9.4129e-02, 2.2862e-01, 8.3815e-02, + -2.7415e-01, 2.5166e-02, -3.1954e-01, 3.3088e-01, 3.3316e-01, + 7.9626e-02, 2.1199e-01, 3.0189e-01, 7.0640e-02, 1.4281e-01, + -2.7548e-01, 1.6440e-01, -2.1965e-01, -3.7695e-01, 1.1076e-01, + 3.7800e-02, 7.4885e-02, -3.4464e-02, 1.4313e-01, -6.0915e-02, + -2.3558e-01, -9.9578e-02, -2.5076e-01, -2.4921e-01, -2.6538e-01, + 4.9933e-02, -1.5339e-01, -1.0247e-01, -1.9087e-01, 1.2254e-01, + 3.0902e-01, -1.7322e-01, -9.5716e-02, -1.0661e-01, -1.5046e-02, + 3.5670e-01, -2.1956e-01, 1.7981e-01, 2.0513e-02, -2.8031e-01, + -1.6479e-01, 1.0474e-01, -3.1772e-01, -1.5772e-01, 3.5148e-01, + 3.1563e-01, -1.5615e-02, 2.0267e-01, -4.0184e-01, 1.1223e-01, + 1.9680e-01, -7.8862e-02, -1.6144e-02, 1.5175e-01, -3.1454e-02, + -5.4835e-02, -7.4258e-02, -9.8505e-03, -9.8693e-02, -2.3574e-02, + -1.9845e-01, 1.3589e-02, -5.6328e-02, 1.8877e-01, -1.1442e-01, + 1.5472e-01, -9.2352e-02, -2.1861e-03, -5.0178e-01, -1.4711e-01, + 2.4483e-02, 1.7381e-01, 1.1555e-01, -1.1969e-01, 4.7179e-01, + -4.1360e-01, 5.9100e-02, 1.4711e-01, 3.4214e-01, 4.1288e-02, + 2.5513e-01, -1.7110e-01, -1.9608e-01, -4.2552e-02, -2.5904e-01, + -1.2025e-01, -2.1390e-01, -1.7674e-02, 3.4328e-01, 3.1743e-02, + 2.8413e-02, -1.5858e-01, 2.3370e-02, 2.3553e-01, 8.7197e-02, + 8.3802e-02, -1.1817e-02, 1.2176e-01], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[ 2.9816e-02, 4.1199e-03, 8.1406e-03, ..., 1.6235e-02, + -2.6321e-02, 4.1542e-03], + [-9.5673e-03, -3.6621e-02, -5.4779e-03, ..., -1.4587e-02, + 9.2392e-03, -7.4482e-04], + [ 7.9727e-03, -1.2749e-02, 1.3336e-02, ..., -3.6591e-02, + -5.0735e-04, -1.6289e-03], + ..., + [ 2.0859e-02, -7.8630e-04, -1.1818e-02, ..., 7.7069e-05, + -3.9337e-02, -8.6823e-03], + [-9.7809e-03, -6.9389e-03, -4.0497e-02, ..., 1.0925e-02, + -5.8136e-03, 1.8625e-03], + [-2.3834e-02, -9.3536e-03, -4.1656e-03, ..., 1.7807e-02, + -1.5495e-02, -1.8188e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([ 0.1714, 0.2435, 0.2001, ..., 0.0595, 0.0106, -0.0736], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0021, 0.0144, -0.0298, ..., -0.0013, -0.0110, -0.0228], + [-0.0030, -0.0152, 0.0158, ..., 0.0002, 0.0506, -0.0149], + [ 0.0067, -0.0117, -0.0151, ..., -0.0057, -0.0125, -0.0014], + ..., + [ 0.0228, 0.0216, 0.0058, ..., 0.0212, -0.0056, 0.0391], + [ 0.0132, 0.0172, -0.0291, ..., -0.0060, -0.0128, -0.0266], + [-0.0192, -0.0129, 0.0062, ..., -0.0020, 0.0054, -0.0218]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([-3.2935e-01, 3.8574e-01, -1.5894e-01, -7.9712e-02, -1.2671e-01, + -1.6199e-01, 1.0590e-01, -2.9614e-01, 2.3022e-01, 3.6469e-02, + -5.2643e-02, 3.4033e-01, 1.4880e-01, -8.3801e-02, 4.8340e-01, + 1.7334e-01, -1.7029e-01, 8.2764e-02, -1.4929e-01, -2.7979e-01, + 1.2170e-01, 1.6174e-01, 2.1887e-01, -5.0903e-02, 2.0105e-01, + 1.3806e-01, -1.4636e-01, -3.9154e-02, -3.9307e-01, 1.2183e-01, + 1.6663e-01, -1.3965e-01, -9.2239e-03, -1.0297e-01, -5.9967e-02, + -4.0710e-02, -1.7126e-01, -1.7651e-01, 2.1472e-01, 1.6138e-01, + -1.6394e-01, 4.7821e-02, 2.2400e-01, -2.2632e-01, -3.0811e-01, + -1.8933e-01, -5.2704e-02, -1.0468e-01, -1.3489e-01, -1.2469e-01, + -1.3416e-01, 1.6504e-01, 3.1934e-01, 3.4814e-01, -1.9519e-01, + 6.3171e-02, 1.2305e-01, 7.3242e-02, -1.4343e-01, -9.5154e-02, + 2.2620e-01, 1.8762e-01, -2.3962e-01, -2.3669e-01, -2.3727e-02, + 2.4072e-01, -2.9541e-01, -1.2622e-01, -1.2805e-01, -8.8806e-02, + 3.1689e-01, 7.8674e-02, -5.3711e-02, -2.3657e-01, -9.0271e-02, + -2.0654e-01, -7.9346e-02, 3.0127e-01, 3.1299e-01, -8.1024e-03, + 7.9468e-02, 2.3962e-01, -5.1514e-02, -2.0771e-03, -4.1168e-02, + -4.1235e-01, 3.2129e-01, -4.1333e-01, -1.4758e-01, -5.6732e-02, + -2.8223e-01, -1.7761e-01, -1.2903e-01, 4.8755e-01, 2.5684e-01, + -1.8066e-02, -6.8970e-02, 8.1299e-02, 1.1688e-01, 1.9360e-01, + -3.4962e-03, -2.7197e-01, -1.5454e-01, 2.5955e-02, -1.4087e-01, + -6.9824e-02, -1.1560e-01, 2.2192e-01, 2.2375e-01, -2.6562e-01, + 1.5686e-01, 1.7334e-01, -4.9927e-02, 1.7310e-01, -5.7373e-03, + 2.4084e-01, 1.3684e-01, 2.2937e-01, -5.5618e-03, -2.5342e-01, + 1.3458e-02, 1.4160e-01, 2.2644e-01, 1.7688e-01, 2.6855e-02, + -7.4097e-02, 1.1395e-01, 6.5613e-03, -1.8433e-02, 4.7803e-01, + 2.7612e-01, -1.6211e-01, 1.3245e-01, 2.4368e-02, 1.6711e-01, + -6.8909e-02, 2.5781e-01, -3.1079e-01, 4.4128e-02, 2.2339e-01, + -1.6638e-01, 8.4900e-02, 1.5526e-02, 8.4290e-02, -3.7256e-01, + 3.8208e-01, -6.8848e-02, -2.6880e-01, -1.7639e-01, 2.2156e-01, + -2.9980e-01, -6.5430e-02, 2.6904e-01, -1.6040e-01, -4.2749e-01, + 1.1218e-01, -3.0786e-01, 8.3313e-02, -3.7134e-01, -3.5669e-01, + -2.5684e-01, -2.1765e-01, 2.4658e-01, -2.2327e-01, 3.3081e-02, + 1.3037e-01, 8.6731e-02, -3.2007e-01, -7.5500e-02, 1.8738e-02, + 2.0117e-01, -2.1680e-01, -8.2779e-03, 6.8665e-02, -1.8115e-01, + 2.6733e-01, -2.4634e-01, -2.5977e-01, 4.7852e-01, 1.2781e-01, + -6.1523e-02, -2.2485e-01, -3.7256e-01, 1.5030e-02, 5.9937e-02, + 1.1444e-01, -1.5002e-01, 3.1226e-01, 2.8613e-01, 2.8467e-01, + -9.7427e-03, 7.5317e-02, -2.1643e-01, -3.8391e-02, 3.1201e-01, + 1.2268e-01, 1.2854e-01, 1.3464e-01, -1.2109e-01, 2.0679e-01, + -2.4979e-02, -2.1936e-01, -2.6535e-02, 4.0802e-02, -2.4365e-01, + -4.5605e-01, -2.5177e-02, -1.5369e-01, -4.3481e-01, 9.3689e-03, + 7.9773e-02, -2.8027e-01, -7.6965e-02, 2.0410e-01, 2.3254e-01, + 2.7420e-02, 2.3975e-01, -6.6101e-02, 3.1128e-01, 5.5371e-01, + 1.3611e-01, 1.3748e-02, -1.9287e-02, 2.3083e-01, 1.9031e-01, + 1.8201e-01, -2.6672e-02, -4.7394e-02, 1.4185e-01, 2.6318e-01, + -3.7036e-01, -1.1749e-01, 3.3057e-01, 4.7821e-02, 1.2482e-01, + -7.0374e-02, 9.2957e-02, -7.6599e-02, -2.2131e-01, 4.1309e-01, + 3.6768e-01, -2.0154e-01, 2.7802e-02, 2.9404e-02, -1.5945e-02, + -3.2861e-01, 1.3159e-01, 5.8411e-02, -1.3281e-01, 2.9712e-01, + 1.4697e-01, 1.9373e-01, 5.9692e-02, 1.2610e-01, -1.6589e-01, + 3.5229e-01, 7.0496e-02, -3.3539e-02, -1.2396e-01, -1.1200e-01, + -2.5620e-02, -6.9824e-02, -9.5642e-02, 3.2251e-01, -2.1790e-01, + -4.8682e-01, 9.0942e-02, 3.2013e-02, 1.1603e-01, 1.4124e-01, + 1.4026e-01, -1.9885e-01, -2.9688e-01, -8.8440e-02, 1.8298e-01, + -1.5906e-01, -6.6345e-02, 6.5002e-02, -4.0576e-01, 1.2195e-01, + 9.2163e-02, -1.6528e-01, -1.2659e-01, 1.1407e-01, -3.2495e-01, + 1.0181e-01, -1.0797e-01, 2.7124e-01, -2.8027e-01, 2.2083e-01, + -1.7456e-01, 2.1265e-01, 1.8823e-01, -1.3452e-01, -2.2668e-01, + 1.1368e-02, -3.7207e-01, 3.0151e-01, -5.6763e-02, -1.4917e-01, + -3.9459e-02, 2.6147e-01, 8.5144e-03, -4.8523e-02, -1.7188e-01, + 2.4384e-02, -5.7800e-02, 2.4365e-01, -2.8223e-01, 2.0477e-02, + 6.5613e-02, -2.3010e-01, 3.4448e-01, 2.5659e-01, 2.1820e-02, + -3.4253e-01, -2.7881e-01, 3.2690e-01, -2.0728e-01, -5.1956e-03, + 6.8726e-02, -1.9824e-01, 1.7322e-01, -7.4036e-02, -9.8877e-01, + 8.1177e-02, -1.3452e-01, -2.7539e-01, 4.3793e-02, 1.9852e-02, + -2.7661e-01, -2.0767e-02, -8.2031e-02, 2.9564e-05, -2.8589e-01, + -2.9395e-01, -3.1201e-01, -4.1626e-02, 3.5938e-01, 1.5515e-01, + 5.5878e-02, -7.8583e-03, 3.3386e-02, 1.8826e-03, -8.0444e-02, + -4.8065e-02, -1.8005e-01, -7.5500e-02, -2.7313e-02, 8.1787e-02, + 3.3887e-01, 9.7046e-02, 5.3497e-02, -5.3772e-02, -1.6541e-01, + 1.7053e-01, -1.5942e-01, -6.8054e-02, -1.9263e-01, 4.4238e-01, + -3.9648e-01, -1.4990e-01, -6.9771e-03, 2.2156e-01, 1.9409e-02, + 3.0350e-02, -3.6377e-01, -6.2164e-02, -4.2261e-01, -1.4359e-02, + -2.1912e-01, 1.3940e-01, 1.7603e-01, 2.3608e-01, -6.6895e-02, + -1.6980e-01, 3.4088e-02, -3.2867e-02, 3.6206e-01, -2.0798e-02, + 3.1836e-01, -2.0276e-01, -8.2275e-02, -4.9146e-01, 1.3928e-01, + -1.0724e-01, -2.1484e-01, 1.6455e-01, 3.1030e-01, -1.0187e-01, + 1.3757e-01, 1.0126e-01, 1.2524e-01, 1.7786e-01, -5.7220e-02, + -7.0610e-03, -4.4946e-01, -2.2522e-01, 2.6562e-01, -2.2852e-01, + -3.4839e-01, -7.1228e-02, -3.4277e-01, -1.9897e-01, 1.2482e-01, + 2.4219e-01, -5.1605e-02, 3.6987e-02, -8.5999e-02, -1.0681e-03, + 3.8745e-01, 1.7059e-02, 3.6304e-01, 1.7737e-01, 2.0703e-01, + -2.0764e-01, -1.8945e-01, -2.6489e-01, 2.0251e-01, 1.1841e-01, + 8.4717e-02, -2.7771e-02, -1.2170e-01, 1.9226e-02, -8.2703e-02, + 2.8003e-01, 4.7168e-01, 3.5522e-01, -3.5010e-01, 3.2776e-02, + 1.3367e-01, -8.9783e-02, -3.3752e-02, -4.7437e-01, 2.8198e-02, + 1.3965e-01, -3.7231e-01, 2.1448e-01, -6.5613e-02, 9.9731e-02, + 2.6840e-02, -1.1261e-02, 2.1423e-02, -2.4033e-02, -2.6318e-01, + 2.3254e-02, 2.4673e-02, 2.7979e-01, 3.0713e-01, -9.6588e-03, + 1.9104e-01, -1.9507e-01, 8.6365e-02, 3.8605e-02, -2.9199e-01, + -1.1090e-01, -2.3035e-01, 2.9370e-01, 1.1894e-02, 6.7932e-02, + 3.3722e-02, -3.5742e-01, 9.9976e-02, -2.8149e-01, -1.1664e-01, + 3.0029e-01, -2.9199e-01, 2.9346e-01, -3.6621e-01, -1.0321e-01, + -2.2949e-01, 3.4595e-01, -3.0249e-01, -6.8848e-02, -1.6101e-01, + -1.5503e-01, 4.5685e-02, -1.0468e-01, 1.5942e-01, -9.0759e-02, + -2.9346e-01, -1.0223e-02, 3.6835e-02, -4.5624e-02, 2.7100e-01, + -2.0190e-01, 2.2937e-01, 1.7078e-01, 3.1714e-01, -3.6401e-01, + 6.8703e-03, -1.0941e-02, 2.5586e-01, -1.5234e-01, 2.1582e-01, + 1.7944e-02, -1.3342e-01, -6.4941e-02, 2.3779e-01, -3.0322e-01, + 1.0791e-01, 2.9297e-03, -1.9910e-01, -2.0947e-01, 1.7529e-01, + 1.1121e-01, -2.4078e-02, 2.5586e-01, -9.3323e-02, 2.1960e-01, + -7.2449e-02, 6.2988e-02, -1.5356e-01, -2.4695e-01, 2.5659e-01, + 2.0288e-01, -1.0254e-01, -1.9873e-01, -4.1656e-02, 3.4698e-02, + 3.2495e-01, -1.0834e-01, -1.6418e-01, 8.2092e-02, -1.3513e-01, + -4.2236e-01, 1.7896e-01, 5.2521e-02, -5.5359e-02, 1.9800e-01, + 1.9336e-01, -3.6475e-01, 2.2620e-01, 2.6562e-01, 1.2292e-01, + -2.7390e-02, 3.2300e-01, -1.2280e-01, -1.2079e-01, 1.0083e-01, + 3.7549e-01, 1.1206e-01, 1.8875e-02, 1.7773e-01, 2.5244e-01, + 1.4648e-01, -4.9530e-02, -8.4991e-03, -1.9788e-01, -1.0077e-01, + 3.9917e-02, -2.8540e-01, -8.9340e-03, -3.8794e-01, 1.6736e-01, + 1.8445e-01, 2.2412e-01, -9.8267e-02, -3.8013e-01, -2.0129e-01, + 3.4180e-01, -1.2115e-01, -1.5613e-01, 7.4463e-02, -2.6270e-01, + -2.4048e-01, -5.8472e-02, -3.3960e-01, 1.8555e-01, -1.7114e-01, + -1.3318e-01, 2.9468e-01, -1.4648e-01, -1.2016e-02, 2.8540e-01, + 8.3313e-02, 9.5398e-02, 2.0251e-01, -9.3384e-02, 1.6907e-01, + 2.4487e-01, -1.0583e-01, -7.8186e-02, 5.3680e-02, -5.6183e-02, + -1.7627e-01, 1.6052e-01, -3.3478e-02, 1.2128e-01, 1.4038e-01, + 1.0828e-01, -3.8428e-01, 1.2634e-01, -1.5540e-01, -3.3447e-01, + 7.6965e-02, -1.8896e-01, -1.0107e-01, 2.1973e-01, 3.4277e-01, + 3.8574e-01, 1.4819e-01, -1.4624e-01, 9.7885e-03, 2.5024e-01, + 1.3757e-01, -2.7417e-01, -2.1741e-01, -9.9976e-02, 3.0835e-01, + -1.8018e-01, 4.4824e-01, 1.0400e-01, 4.7729e-01, 1.2622e-01, + 1.8738e-01, 2.1692e-01, 2.7417e-01, 1.1517e-01, 2.5952e-01, + -4.3945e-01, 8.9661e-02, 4.3335e-02, 2.8052e-01, 1.6235e-02, + 2.0288e-01, 5.7068e-02, 2.2083e-01, -2.4673e-02, 1.2561e-01, + -2.5684e-01, 2.1033e-01, -6.6528e-02, -7.8674e-02, -1.1284e-02, + -1.2122e-01, 5.8685e-02, -1.9739e-01, 1.5186e-01, 3.4241e-02, + 4.6802e-01, -1.2769e-01, 9.4238e-02, 2.8638e-01, -2.6831e-01, + 9.5642e-02, -1.6760e-01, -2.6074e-01, -2.9150e-01, -1.5710e-01, + 9.3201e-02, -4.3506e-01, 1.7371e-01, -1.8066e-01, 2.7026e-01, + 2.9785e-01, 1.4880e-01, 1.1169e-01, -2.6172e-01, 8.8257e-02, + 1.3013e-01, 2.1619e-01, 9.3750e-02, -2.0715e-01, -2.7710e-01, + -5.2551e-02, 1.4539e-01, 8.9600e-02, 3.7866e-01, 4.1275e-03, + 4.8193e-01, -1.8604e-01, -2.1716e-01, 6.0959e-03, -1.0056e-02, + -5.4199e-02, 1.5967e-01, 2.4597e-01, 2.0044e-01, -7.6294e-02, + 7.9803e-03, 2.3914e-01, -1.8079e-01, -1.5625e-01, 8.1604e-02, + 4.2456e-01, -5.4474e-02, -1.6064e-01, 2.0496e-01, -2.1008e-01, + 1.9641e-01, -1.2103e-01, 8.6823e-03, -1.7273e-01, -5.8319e-02, + 4.7485e-02, 5.0812e-02, 5.5313e-03, -1.8201e-01, 1.2939e-01, + 1.9104e-02, -1.1429e-02, -1.0291e-01, 4.6021e-02, 1.1914e-01, + 1.4368e-01, -4.7900e-01, -1.0107e-01, 9.9426e-02, -1.8326e-02, + 1.8005e-01, -2.1045e-01, -1.8237e-01, -3.7915e-01, 8.7585e-02, + -3.8483e-02, 1.7944e-01, 3.6938e-01, 1.6003e-01, -6.0577e-03, + -1.0689e-02, -2.7319e-01, 4.5972e-01, 1.4473e-02, -1.2939e-01, + 1.8030e-01, 4.9500e-02, 3.5553e-02, -1.5106e-03, 6.2805e-02, + -3.1299e-01, -1.9531e-01, -3.5132e-01, -2.3438e-01, 1.8042e-01, + -2.4628e-02, -4.4189e-02, -1.2659e-01, -2.0279e-02, 1.0699e-01, + -3.8574e-02, 1.4636e-01, -1.8506e-01, -5.8746e-04, -3.5498e-01, + 1.2494e-01, 1.4441e-01, -7.9041e-02, 1.2256e-01, -4.6631e-02, + 1.5662e-01, 3.2642e-01, -3.0811e-01, 1.6040e-01, -1.2634e-01, + 8.5449e-02, -9.7534e-02, -2.6001e-01, 5.2539e-01, 1.1804e-01, + -1.3721e-01, -8.4290e-02, -4.2285e-01, -2.7618e-03, 2.3938e-01, + -1.0718e-01, -1.5405e-01, 2.5854e-01], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.8845, 1.8352, 1.8765, 1.8810, 1.5563, 1.8842, 1.8917, 1.8105, 1.9868, + 1.8980, 1.8124, 1.8699, 1.9132, 1.9105, 1.8045, 1.8734, 1.9492, 1.9584, + 1.8736, 0.5699, 1.9376, 1.8571, 1.8018, 1.7807, 1.9045, 1.8593, 1.8650, + 1.9069, 1.7912, 1.9275, 1.9020, 1.8200, 1.8223, 1.9474, 1.8166, 1.9292, + 1.8478, 1.8643, 1.7707, 1.7729, 1.8149, 1.9277, 1.8711, 1.8703, 1.8980, + 1.8339, 1.8191, 1.9654, 1.9062, 1.8221, 1.9507, 1.9744, 1.7754, 1.7398, + 1.8771, 1.9682, 1.9305, 1.9569, 2.0626, 1.9420, 2.0490, 1.8973, 1.9315, + 1.8269, 1.8913, 1.8603, 1.8191, 1.8255, 1.8268, 1.6817, 1.9641, 2.0235, + 1.8896, 1.8203, 1.9921, 1.9167, 1.9540, 1.9359, 1.7891, 1.9293, 1.9515, + 1.8066, 1.9256, 1.8339, 1.8488, 1.8092, 1.9274, 1.9570, 1.8335, 2.0225, + 1.9396, 1.8363, 1.9361, 1.8342, 1.8680, 1.8073, 1.8952, 1.9872, 1.9524, + 1.8325, 1.9858, 1.9102, 1.8231, 1.9491, 1.9409, 1.9810, 1.8556, 1.7757, + 1.9584, 1.9158, 1.9068, 1.8165, 2.0067, 1.8175, 1.8685, 1.9612, 1.8651, + 1.9308, 1.9786, 1.8803, 2.0131, 1.9723, 1.8474, 1.9064, 1.8661, 1.7817, + 2.0292, 1.9366, 2.0245, 1.9169, 1.8463, 1.9134, 1.9453, 1.9029, 1.7884, + 1.8547, 1.7968, 1.9072, 1.8114, 1.9171, 1.9438, 1.9655, 1.9935, 1.9752, + 1.8150, 1.8036, 1.8647, 1.8392, 1.9946, 1.9759, 1.7417, 1.8785, 1.8786, + 1.9880, 1.8447, 1.5194, 1.9745, 1.8525, 2.0701, 1.8832, 1.7878, 1.9596, + 1.9374, 1.9127, 1.9849, 1.8117, 1.8179, 1.8551, 1.8570, 2.0063, 1.8393, + 1.7900, 1.7607, 1.7850, 2.0079, 1.8642, 1.8940, 1.8689, 1.7796, 2.0517, + 1.8809, 1.8384, 1.8476, 1.8974, 1.9596, 1.6308, 1.9006, 2.1654, 1.8026, + 1.9379, 1.9820, 1.9537, 1.9238, 1.8867, 1.8491, 1.8553, 1.9149, 1.9309, + 2.1425, 1.8556, 1.9288, 1.8089, 1.9468, 2.0053, 1.7469, 1.9317, 1.9154, + 1.9378, 1.9160, 1.9163, 1.8746, 1.7993, 1.8608, 1.6098, 1.7738, 1.9288, + 1.8738, 1.8375, 1.8819, 1.7251, 1.8135, 1.8060, 1.6460, 1.9648, 1.8793, + 1.8735, 1.9054, 2.0499, 1.8881, 1.8455, 1.8787, 1.8306, 1.8660, 1.9408, + 1.7706, 1.9874, 1.8663, 1.8535, 1.8764, 1.8998, 1.8804, 1.7887, 1.9140, + 1.9037, 1.8234, 1.9198, 1.8980, 1.9463, 1.9508, 1.9099, 1.9282, 1.9256, + 1.9555, 1.8825, 1.8637, 1.8823, 1.8507, 1.9972, 1.8996, 1.8754, 1.9579, + 1.9130, 1.8970, 1.9163, 1.9452, 1.6176, 1.8198, 1.8957, 1.9368, 1.9477, + 1.9516, 2.0003, 1.8590, 1.9027, 1.9444, 1.9328, 1.7937, 2.0126, 1.9234, + 1.9333, 1.8502, 1.8628, 1.8921, 1.7600, 1.8306, 1.8923, 1.8978, 1.8728, + 1.8345, 1.8942, 1.8993, 1.9678, 1.8302, 1.8068, 1.9830, 1.9665, 1.7677, + 2.0295, 1.9052, 1.9362, 1.8471, 1.8804, 1.8914, 1.8475, 1.9386, 1.9127, + 1.9178, 1.8527, 1.8397, 1.8191, 1.9175, 2.0222, 1.8673, 1.7971, 1.8434, + 1.8621, 1.8646, 1.9333, 1.7439, 1.8221, 1.8288, 1.7576, 1.8384, 1.8605, + 0.9349, 1.8890, 1.9015, 1.8971, 2.0346, 1.8416, 1.7772, 1.8861, 1.9598, + 1.9484, 1.8769, 1.7706, 1.9063, 1.8180, 1.9036, 1.9621, 1.9720, 1.9243, + 1.8806, 1.8688, 1.7491, 1.8797, 1.7815, 1.8845, 1.8533, 1.8813, 1.6127, + 1.8691, 1.8961, 1.9153, 1.9476, 1.8424, 1.9297, 1.9749, 1.9124, 1.7928, + 1.8377, 1.9906, 1.9413, 1.8094, 1.8876, 1.9582, 1.9084, 1.9157, 1.9248, + 1.9435, 1.8492, 1.8388, 1.8780, 1.9118, 1.9468, 1.9740, 1.8628, 1.9167, + 1.8829, 1.9969, 1.8789, 1.8648, 1.9281, 1.8597, 1.9086, 1.9463, 1.9790, + 1.9217, 1.9149, 2.0050, 1.9205, 1.8149, 1.8173, 1.9832, 1.8378, 1.8729, + 1.8689, 1.8932, 1.9051, 1.9418, 1.8442, 1.8880, 1.9247, 1.8423, 1.8932, + 1.8876, 1.8785, 1.8502, 1.8830, 1.9121, 1.7974, 1.8728, 1.8306, 1.8850, + 1.8189, 3.1739, 1.9905, 1.8789, 1.8099, 1.9587, 1.8831, 1.9429, 1.8643, + 1.8100, 1.9681, 1.8934, 1.6765, 1.8947, 1.9454, 1.9292, 1.9464, 1.8583, + 1.9097, 1.8778, 1.8680, 1.9591, 1.8427, 1.7749, 1.8435, 1.9601, 1.8104, + 1.8350, 1.9762, 1.5642, 1.8662, 1.9177, 1.8729, 1.9588, 1.9295, 1.8630, + 1.9340, 1.8574, 1.8658, 1.9321, 1.8614, 1.7848, 1.8497, 1.8741, 1.9315, + 1.8774, 1.8153, 1.9115, 1.8713, 1.8385, 1.8768, 1.8541, 1.9018, 1.6993, + 1.7122, 1.7800, 1.8768, 1.7910, 1.8274, 1.9631, 1.8354, 1.8277, 1.9133, + 1.7502, 1.9774, 1.8644, 1.8474, 1.8558, 1.9439, 1.6937, 1.8008, 1.8918, + 1.9959, 1.8452, 1.8962, 1.8652, 1.9799, 1.8773, 1.9406, 1.8726, 1.9284, + 1.7958, 1.9859, 1.7727, 1.9554, 1.8868, 1.8192, 1.7793, 1.8819, 1.9566, + 1.9689, 1.9528, 1.8410, 1.8819, 1.9371, 1.9973, 1.9218, 1.9970, 1.8444, + 1.8517, 1.8372, 1.8086, 1.9310, 1.7364, 1.8146, 1.9686, 1.7777, 1.9504, + 1.7437, 1.9044, 1.9021, 1.9502, 1.9839, 1.9025, 1.8885, 1.8824, 1.9079, + 1.9152, 1.8963, 1.9761, 1.8352, 1.7745, 1.8770, 1.9029, 1.9332, 1.9402, + 2.0578, 1.9987, 1.9575, 1.9153, 1.9579, 1.9238, 1.9215, 1.8162, 1.8468, + 1.9803, 1.8959, 1.8979, 1.7640, 1.7755, 1.8934, 1.9122, 1.9088, 1.8951, + 1.9510, 1.8332, 1.9277, 1.6539, 1.9063, 1.8719, 1.8038, 1.9036, 1.9306, + 1.8461, 1.9749, 1.7861, 1.8499, 1.9288, 1.8200, 1.9073, 1.8482, 1.8608, + 1.8441, 1.9250, 1.9120, 1.7419, 1.6542, 1.8009, 1.8765, 1.8275, 1.9703, + 1.9623, 1.8815, 1.8559, 1.9297, 1.9958, 1.8692, 1.7484, 1.8487, 1.9696, + 1.8354, 1.8752, 1.9288, 1.8886, 1.8154, 1.8855, 1.9286, 1.8765, 1.8816, + 1.9660, 1.8762, 1.9568, 1.8975, 2.0225, 1.8980, 1.9379, 1.8901, 1.8865, + 1.8581, 1.9267, 1.9512, 1.9268, 1.8755, 1.8539, 1.8854, 1.7541, 1.8981, + 1.8321, 1.9068, 1.7888, 1.9020, 1.9170, 1.8782, 1.9882, 1.9291, 1.8385, + 1.8635, 1.8034, 1.8408, 1.8393, 1.9157, 1.9480, 1.8666, 1.9089, 1.8793, + 1.8050, 1.8915, 1.9401, 1.8111, 1.9036, 1.6810, 1.8467, 1.8421, 1.9678, + 1.7958, 1.8310, 1.9185, 1.7928, 1.8878, 1.9683, 1.8328, 1.8989, 1.8129, + 2.0088, 1.8850, 1.8757, 1.6818, 1.8862, 1.8237, 1.8779, 1.8614, 1.8838, + 1.7376, 1.9299, 1.8992, 1.8632, 1.8358, 1.9367, 1.8568, 1.7632, 1.9217, + 1.9264, 1.8954, 1.8412, 1.9310, 1.8819, 1.8354, 0.6095, 1.8538, 1.8894, + 1.8347, 1.8615, 1.8613, 1.8770, 1.8895, 1.8258, 1.8161, 1.9044, 1.8602, + 1.9129, 1.8627, 1.9785, 1.9359, 2.0186, 1.9919, 1.9591, 1.8777, 1.8415, + 2.0802, 1.9656, 1.9193, 1.9592, 1.7867, 1.9489, 1.8725, 1.8389, 1.8473, + 1.8577, 1.7747, 1.9154, 1.7346, 1.8922, 1.9370, 1.8342, 1.9007, 1.7837, + 1.9888, 1.8941, 1.8775, 1.9283, 1.8973, 2.0281, 1.4591, 2.0369, 1.9378, + 1.8734, 1.9433, 1.8871, 1.7934, 1.9058, 1.8899, 1.8908, 1.9713, 1.8724, + 1.9331, 1.9974, 1.8825, 1.9038, 1.8145, 1.9421, 1.9098, 1.9045, 1.9809, + 1.9200, 1.8320, 1.7140, 1.9192, 1.8688, 1.8621, 1.9538, 1.8416, 1.8669, + 1.8944, 1.7779, 1.8887, 1.8708, 1.8072, 1.9348, 1.8595, 1.9091, 1.9632, + 1.9195, 1.8811, 1.8306], device='cuda:1', requires_grad=True)Parameter containing: +tensor([ 1.4977e-01, 9.0828e-02, -1.1229e-01, -2.4072e-02, 2.1253e-01, + 2.2673e-01, 1.5718e-01, 2.1828e-01, -2.4588e-01, -8.8377e-02, + 1.2490e-01, -1.3510e-01, -2.0088e-01, 5.7927e-02, -1.0586e-01, + -1.1825e-01, -9.5867e-03, -1.3082e-01, 4.6557e-02, 2.5393e+00, + -5.5107e-02, -2.8849e-03, -3.2968e-02, 8.6449e-02, 3.8190e-02, + -4.2845e-02, 8.5313e-03, -4.0419e-02, -4.0110e-03, -5.3827e-02, + 6.2886e-02, 9.8723e-02, 3.9529e-03, 1.7838e-01, 2.0938e-01, + -1.2265e-01, -1.3081e-01, -1.0067e-01, -1.1288e-01, 1.2991e-01, + 1.8509e-02, 2.8068e-03, 3.8258e-03, 1.9762e-01, 8.1392e-02, + -3.3367e-02, 9.7839e-02, -1.3463e-01, 9.3890e-02, 1.0690e-01, + 9.7328e-02, -9.1940e-02, -1.7569e-01, 5.7027e-02, -5.5082e-02, + 9.2227e-02, -6.8959e-02, -8.2383e-02, -1.3635e-02, 4.3161e-02, + 1.1511e-01, -1.9712e-01, 1.1997e-01, 1.2039e-01, 2.0542e-01, + 6.4997e-02, 2.1161e-01, -4.0855e-02, -1.3579e-01, 2.9124e-01, + -4.1219e-02, 1.6387e-02, 4.7529e-01, 1.2968e-01, -9.7462e-02, + 1.3828e-01, 1.6654e-01, -1.1903e-01, -9.1884e-02, 1.2110e-01, + 2.8219e-02, -8.1905e-02, -2.8228e-02, 1.2402e-01, -6.1241e-02, + 1.2894e-01, 7.7995e-02, 2.6379e-01, -2.3807e-02, -6.2343e-02, + -2.0724e-02, 5.8649e-02, 1.0976e-01, -1.4784e-01, -1.6364e-01, + 5.2668e-02, 2.2110e-01, -1.7867e-01, -2.4759e-01, 1.6276e-02, + 9.4501e-02, -1.3611e-01, 6.1677e-02, -1.0329e-01, 7.9952e-02, + 1.0241e-01, 1.3805e-01, -8.0445e-02, -1.0865e-01, -2.0342e-02, + 5.4483e-03, -4.4460e-02, -9.8944e-02, 2.5351e-02, -9.3630e-03, + -4.6340e-02, 1.8653e-02, -5.4521e-02, -9.9965e-02, 2.9935e-01, + -7.1766e-03, -3.5605e-01, -2.3958e-01, -2.7390e-02, -1.5948e-01, + -1.0897e-01, -1.7559e-01, 1.9700e-01, 1.0314e-02, -1.7945e-01, + -9.4148e-02, 7.2500e-03, 3.1125e-02, 8.9480e-02, -7.2448e-02, + 1.7824e-01, -2.1028e-01, -8.8175e-02, -1.0598e-02, -7.6072e-02, + 8.6819e-02, 3.8492e-02, -3.9397e-02, -7.3740e-02, 1.3232e-01, + -1.7187e-01, 2.5512e-02, 9.1546e-02, 9.4585e-03, 7.7797e-02, + 3.9275e-01, -1.0438e-01, -1.2284e-01, 1.5048e-02, 8.2665e-03, + -5.1999e-01, 2.1568e-02, -1.7402e-01, 8.3193e-02, 1.3827e-01, + 1.8734e-01, -1.4931e-02, -9.5600e-02, 8.9184e-02, -3.2616e-02, + 4.2414e-02, -6.3760e-02, 1.8367e-01, 2.8722e-04, -1.0264e-02, + -1.8133e-02, 1.1746e-01, 4.0782e-02, -2.9797e-02, 8.2706e-02, + -2.9593e-01, 3.7761e-02, 1.2163e-01, -2.4276e-01, 1.0814e-01, + -1.4889e-01, 1.9977e-03, 2.4847e-01, 9.8385e-02, -3.8665e-02, + -8.7119e-02, 5.5839e-04, -6.7407e-01, -1.9018e-01, -1.3712e-01, + -1.6403e-01, -1.2081e-01, 1.1545e-01, 9.3563e-02, -9.6760e-02, + 5.7779e-02, 3.0807e-02, -2.1459e-01, 2.2013e-01, -1.3995e-02, + -3.3612e-01, 1.5033e-01, -5.4025e-02, 3.2845e-02, 9.2645e-02, + 3.2002e-01, -1.4954e-02, 1.5258e-01, 2.1483e-01, -2.7657e-02, + 8.3226e-02, 3.7656e-01, -5.6053e-03, -1.6680e-01, -3.9404e-02, + -1.0372e-02, -5.0771e-02, 4.1767e-02, -1.9044e-01, -4.0341e-01, + 4.0919e-02, 7.6289e-02, 1.1134e-01, -1.3315e-01, -4.8227e-02, + -7.6237e-02, -3.9292e-02, 9.4033e-03, 2.0934e-01, -1.2315e-01, + 2.1096e-01, 1.0530e-01, -1.2812e-01, -3.9880e-02, -2.1281e-01, + -3.3356e-02, -1.0072e-03, 8.7769e-02, 1.3860e-01, -6.7106e-02, + -1.4575e-02, 1.5290e-01, -6.3386e-02, 7.9846e-02, -1.3863e-02, + 7.7562e-02, -8.3009e-02, -1.0501e-01, -2.6750e-02, -1.7253e-01, + -2.5846e-02, -1.1804e-01, 1.7110e-01, -1.2576e-01, 1.7992e-01, + -1.9679e-01, -8.5437e-03, -6.9406e-02, 4.9921e-02, 5.0245e-02, + -1.1880e-01, -6.9830e-02, 5.9304e-02, -2.2032e-02, -5.5579e-02, + 2.8273e-01, 4.2607e-02, -6.2353e-03, -6.9794e-02, -4.0076e-02, + 7.6442e-02, 1.2681e-01, 2.6285e-01, -8.0095e-02, -5.2822e-02, + 4.3852e-02, -4.5039e-02, 1.2212e-01, 2.0381e-01, -1.1561e-01, + 1.6132e-01, 3.3906e-01, -1.2707e-02, 5.1528e-02, 1.2264e-01, + 9.6960e-02, -5.2069e-02, -1.3161e-01, 3.4161e-01, -1.0293e-01, + 3.3521e-03, -2.1975e-03, 1.8850e-02, -1.2784e-03, 1.6387e-01, + -5.1121e-02, 3.9901e-01, -1.2812e-01, 6.6056e-02, 1.3807e-01, + 4.9560e-02, -1.0768e-01, -1.2635e-01, 1.1741e-01, 3.6649e-02, + 9.2106e-02, 2.2007e-02, -1.2226e-01, 1.7446e-01, -3.4823e-02, + 5.3663e-02, -2.9064e-02, -2.4956e-01, -2.3326e-01, 2.9518e-03, + 1.6585e-01, 1.2153e-01, -2.6665e-02, 3.1691e-01, 1.1982e-01, + -1.7774e-01, 3.4280e-01, -1.0939e-01, 4.9095e-02, -1.5014e+00, + -2.8279e-01, 8.2694e-02, 5.9233e-02, 1.2187e-02, -1.3442e-01, + 2.5667e-01, 4.1742e-02, 1.5975e-01, 4.0541e-03, 2.5555e-01, + 2.7525e-01, 1.9277e-02, 1.5007e-01, -3.5787e-02, -4.1940e-02, + 3.8335e-03, 1.0118e-01, -4.8436e-02, 3.0893e-02, 7.3047e-02, + 9.5244e-02, 1.8675e-01, 1.3491e-01, -3.9549e-02, 8.8511e-02, + -2.0086e-01, 8.2027e-02, -2.5592e-02, 8.8682e-02, 9.3104e-02, + -1.5674e-01, -1.0768e-01, 2.4316e-02, 3.6231e-02, -1.5898e-01, + 3.4396e-02, 8.7499e-02, 4.3070e-02, -2.6488e-01, 6.0697e-02, + -1.0659e-01, 1.1697e-01, 1.1641e-01, 2.2722e-01, 5.9255e-02, + 1.9844e-01, -1.0398e-01, -1.0219e-01, -1.0963e-01, -1.4832e-01, + 7.7414e-02, 1.1030e-01, 2.3565e-01, -6.7662e-02, 9.7777e-03, + -2.2667e-01, 1.0643e-01, 1.2504e-02, 7.0543e-02, 2.1687e-02, + -4.3200e-02, 6.9545e-02, -1.5286e-01, -5.3265e-02, -4.3717e-02, + 2.8115e-02, -1.6122e-01, -2.7460e-02, -6.3853e-02, 2.8858e-02, + 4.2642e-02, 1.0826e-01, 1.2918e-01, -5.5144e-02, 2.8905e-02, + 2.5789e-01, -6.0265e-03, 1.1585e-01, 4.1049e-02, -1.6564e-01, + -3.6466e-01, -4.4526e-02, -3.9705e-02, 5.0000e-02, 4.2413e-02, + -9.8391e-02, 2.2150e-02, 4.3222e-02, -7.1868e-02, -9.0238e-02, + -3.9393e-01, 3.9913e-02, 6.0577e-02, -5.4894e-02, -2.7723e-02, + -5.5954e-02, 7.2414e-02, 1.0857e-01, -1.4078e-01, -1.9501e-02, + 1.1566e-01, -2.9919e-01, -2.8507e-01, 1.3112e-01, -1.1352e-02, + -1.2703e-01, -1.3442e-01, 7.1983e-03, 2.7848e-01, 1.1755e-01, + 7.1231e-03, 1.5916e-01, -3.3708e-03, 1.5079e-01, -1.1724e-01, + 2.7498e-02, 1.7569e-02, -1.5906e-02, -1.2364e-01, 2.7426e-01, + -1.4464e-01, -2.9425e-02, -1.8606e-01, 1.4634e-01, -1.0930e-01, + -6.5916e-02, 8.1522e-02, -1.8188e-01, 1.8782e-02, 6.5026e-02, + -3.8653e-02, 1.2372e-01, -1.9791e-02, -2.8119e-02, -1.2243e-01, + -9.4997e-02, 1.8606e-01, -2.6361e-01, 2.0877e-01, -4.2930e-02, + -1.2266e-01, 1.4367e-01, -9.8226e-02, 3.1051e-01, 1.0347e-01, + 1.1779e-01, -2.0663e-01, 1.4741e-02, 1.1494e-01, 2.7045e-02, + 5.6440e-02, 2.2375e-01, 1.3750e-02, -8.0551e-02, 4.4833e-02, + 1.8983e-01, 1.6651e-02, 4.2797e-02, 1.4209e-01, -1.9838e-01, + -4.0615e-03, -1.5028e-01, 1.5990e-01, 6.5197e-02, 7.3199e-02, + 2.7811e-02, 1.1103e-01, -5.0831e-02, 1.4231e-02, -2.2152e-01, + -1.8666e-01, -9.2706e-02, -2.2502e-02, -1.2607e-01, 2.3435e-01, + -3.5548e-02, -1.7466e-01, 3.4294e-02, -6.1595e-02, -6.2275e-02, + 4.5427e-02, 8.0757e-02, 1.0295e-01, 7.7756e-02, -2.9827e-02, + -1.5265e-01, -4.7962e-03, -1.6855e-01, 1.9033e-01, -1.1006e-01, + -8.5642e-02, 1.7786e-01, 1.0449e-01, 8.5187e-02, -5.4987e-02, + -3.4442e-02, -4.7119e-02, 1.5076e-01, 1.0142e-01, -9.9440e-02, + -4.9660e-02, -2.6204e-02, -4.3997e-02, 1.3288e-02, -9.6366e-02, + 7.7371e-02, 2.0195e-01, -2.5764e-02, -9.3003e-03, -5.8061e-02, + -1.9938e-01, -5.7160e-02, 1.5829e-01, 1.8127e-01, -5.1235e-02, + 5.3454e-02, -5.7813e-02, -9.8958e-02, 3.4245e-02, -3.8890e-02, + 5.5782e-02, 7.2028e-02, 8.4247e-02, 1.6157e-01, 8.5942e-02, + -6.3876e-02, 1.6179e-01, -1.1903e-01, 1.9752e-01, -1.2409e-01, + -1.3576e-01, 9.2939e-02, -8.0832e-02, -3.7897e-02, 1.3954e-01, + -1.2824e-01, 1.3462e-01, 3.5907e-02, 7.7795e-02, 3.5418e-01, + 1.8305e-01, 4.7019e-02, 1.2169e-01, 1.4338e-01, 2.2147e-02, + 1.6112e-01, -2.3264e-02, 8.2084e-04, -4.4160e-02, -2.8564e-03, + 8.1639e-02, 3.2530e-02, -8.9730e-02, 1.4196e-01, -1.2849e-01, + -1.8822e-01, -9.6932e-02, 1.5225e-01, 2.9004e-02, 5.1665e-02, + 1.4070e-01, -3.1082e-02, -1.7318e-01, 1.1008e-01, -1.8146e-02, + -9.7296e-02, 2.5158e-01, 1.0361e-01, 1.6765e-02, 9.7833e-02, + -9.4305e-02, 1.9451e-01, 7.9379e-02, -1.3511e-01, -2.7555e-02, + -8.5424e-02, -1.8621e-01, -9.9312e-02, -7.1172e-02, -1.2654e-01, + -1.1217e-01, 1.0024e-01, 2.0272e-01, 7.3024e-02, -6.2072e-02, + 2.3424e-01, 1.1232e-01, -1.6426e-01, -5.0104e-02, -9.9868e-02, + -2.0319e-01, 2.6993e-02, 5.0555e-02, -7.6812e-02, -2.2759e-01, + 2.5402e-01, -2.1871e-03, 4.5064e-02, -7.3341e-03, 2.1875e-02, + -1.0079e-01, -3.1013e-02, -9.9062e-02, 6.1146e-02, -1.5484e-01, + 2.4925e-01, -1.1802e-02, 7.6865e-02, 1.1096e-01, 2.3033e-01, + 3.1681e-02, 3.8874e-03, 4.8754e-02, 2.8777e-02, -6.3957e-02, + -1.4132e-01, -2.0266e-01, -3.7878e-02, -6.4012e-02, 1.7023e-01, + 7.9161e-02, 1.3030e-02, 1.9864e-01, 8.3066e-02, 2.5653e-01, + 9.1549e-02, 1.6667e-01, 5.2830e-03, 1.3459e-01, -6.7270e-02, + 8.6687e-02, -2.3856e-01, -6.9920e-02, 7.9934e-02, 1.3820e-01, + -1.3395e-01, -1.5970e-02, -8.8392e-02, -4.4374e-02, 1.4311e-01, + -5.3860e-02, -1.6929e-01, -1.7831e-01, -2.5584e-01, 6.5801e-02, + -3.4672e-01, 1.2548e-01, 8.3701e-02, 8.4880e-02, -1.2696e-02, + 7.1410e-02, -2.6850e-02, -8.6469e-02, -7.0036e-02, 1.2408e-01, + -1.3168e-01, -2.3096e+00, 9.2856e-02, 1.1851e-01, 7.4759e-02, + -1.8195e-01, 8.1052e-02, -2.3694e-02, 6.8942e-02, 5.0159e-02, + -3.2627e-01, 4.6953e-02, 1.3953e-02, -3.2010e-02, -1.5938e-01, + 1.1737e-01, 5.7404e-02, -5.9496e-02, 1.1439e-02, -5.4151e-02, + 1.9076e-01, 1.3202e-01, 2.3647e-02, -3.2032e-02, -2.0221e-01, + 3.9195e-02, 1.9440e-01, 3.9982e-02, -1.6485e-01, -7.3657e-02, + -6.7927e-02, 1.7999e-01, 1.9496e-01, 1.4840e-01, 2.8846e-02, + 1.1035e-01, 5.6286e-02, -5.4704e-02, -1.4696e-01, -4.4278e-02, + 5.0493e-02, -2.2273e-02, -3.9672e-02, -3.2121e-01, 2.0969e-01, + -3.9823e-02, -3.1859e-01, 1.5124e-01, 1.4009e-01, 5.0370e-02, + 8.3453e-02, 9.9049e-02, 2.4573e-01, 1.9968e-01, 5.8656e-02, + -5.6640e-02, 4.0581e-02, 2.6943e-02, -1.3497e-04, -1.2463e-01, + 5.2881e-02, -1.2241e-01, 8.8162e-03, 1.3377e-01, 5.5711e-03, + -3.2902e-02, -1.4677e-01, -4.2712e-02, 1.2307e-01, 8.0042e-02, + -3.1782e-02, -1.3426e-01, 2.1633e-02, 1.0981e-02, 2.2117e-01, + -1.3080e-02, 8.9470e-02, 1.8250e-01, -2.3044e-01, -5.8739e-02, + -3.1466e-02, 6.1521e-02, 2.5686e-01, -6.6933e-02, -2.3293e-01, + -2.6690e-02, 2.1225e-01, 7.8747e-02], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([[ 4.3831e-03, -1.7395e-02, 3.9558e-03, ..., -1.6966e-03, + 1.0384e-02, 3.0640e-02], + [-2.2182e-03, -2.1912e-02, 5.6572e-03, ..., 2.1820e-02, + 7.8583e-03, 1.6800e-02], + [ 5.5084e-03, -1.9119e-02, -4.4098e-03, ..., 2.0035e-02, + -1.3260e-02, 1.9684e-03], + ..., + [ 4.6631e-02, -5.4359e-05, -3.4523e-03, ..., 4.8828e-03, + 2.2156e-02, -1.1864e-02], + [ 8.8787e-04, -8.1482e-03, 1.7776e-02, ..., -1.6052e-02, + 3.0502e-02, 9.1400e-03], + [ 1.3641e-02, 7.2708e-03, 3.8815e-03, ..., -3.4828e-03, + 2.0161e-03, -3.3054e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True)Parameter containing: +tensor([-0.1271, -0.1493, -0.2289, ..., -0.5405, -0.3594, -0.0424], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([[-0.0115, -0.0157, -0.0200, ..., -0.0466, -0.0021, -0.0165], + [ 0.0092, -0.0032, -0.0108, ..., 0.0121, -0.0015, 0.0188], + [ 0.0030, -0.0024, 0.0109, ..., -0.0146, -0.0017, -0.0030], + ..., + [-0.0107, -0.0003, 0.0144, ..., -0.0054, 0.0235, 0.0117], + [ 0.0090, 0.0042, 0.0030, ..., 0.0037, 0.0125, -0.0172], + [-0.0052, -0.0043, 0.0161, ..., 0.0261, 0.0152, -0.0026]], + device='cuda:1', dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([ 2.5586e-01, -2.6196e-01, 2.3486e-01, 1.3916e-01, -1.3599e-03, + 4.4922e-02, 1.1615e-01, 1.1955e-02, -2.2461e-01, 1.1938e-01, + 1.4453e-01, -7.4097e-02, -1.5967e-01, 3.5095e-02, -1.9214e-01, + 6.9092e-02, -6.2347e-02, 7.0038e-03, -5.2734e-02, 2.5732e-01, + 3.0411e-02, -3.2158e-03, -4.5239e-01, 1.0803e-01, -2.2742e-01, + -1.1322e-01, 3.1433e-02, -1.7426e-02, 2.8857e-01, -9.8145e-02, + -2.0264e-01, 8.4595e-02, 7.5928e-02, -5.0781e-02, 1.3879e-01, + 1.1859e-01, 2.1851e-01, -7.2327e-02, -1.4209e-01, 6.4819e-02, + 1.1395e-01, -6.5674e-02, -6.4087e-02, 4.7607e-02, 1.8921e-02, + 1.1249e-01, 1.6370e-01, 1.0114e-01, 1.5955e-01, -1.7944e-01, + 1.3147e-01, 7.1228e-02, -1.2158e-01, -1.3306e-01, 1.9141e-01, + -1.3168e-02, -1.5515e-01, -1.9641e-01, 1.9983e-01, -1.1304e-01, + -1.2634e-01, 8.5205e-02, 2.0050e-02, 1.3379e-01, -1.9434e-01, + -5.9113e-02, 8.1543e-02, 1.2436e-02, 2.6291e-02, 2.5049e-01, + -1.7688e-01, 1.2524e-01, -1.7102e-01, 1.9751e-01, 1.8896e-01, + 2.8589e-01, -1.9067e-01, -3.7628e-02, 8.3466e-03, -1.2036e-01, + -3.3447e-02, 2.2400e-02, -1.3107e-02, -1.7136e-02, 5.1880e-02, + 9.6497e-02, -1.9214e-01, -3.2379e-02, -8.5571e-02, 1.6809e-01, + 1.9482e-01, 1.0614e-01, 1.6821e-01, -1.9812e-01, 2.9282e-02, + 5.1956e-03, -1.4514e-01, 8.9050e-02, 3.2623e-02, -3.6938e-01, + -1.4392e-01, 6.9031e-02, 2.9517e-01, 2.1252e-01, 2.3956e-02, + -2.4052e-03, -7.3364e-02, -1.6922e-02, -6.2332e-03, 9.4360e-02, + -2.0386e-01, -5.0323e-02, 1.1627e-01, -1.7688e-01, 3.9276e-02, + -2.6520e-02, -8.8379e-02, -3.0960e-02, -5.7495e-02, 6.6910e-03, + 8.5602e-03, 1.1673e-02, -3.2867e-02, -9.2346e-02, -3.0127e-01, + -6.9542e-03, -1.4868e-01, -1.5161e-01, 8.5754e-02, -1.5942e-01, + 3.1948e-03, 7.7942e-02, -2.4695e-01, -1.4941e-01, -2.9175e-01, + -1.8604e-01, -1.6553e-01, 2.4765e-02, 2.4857e-02, 8.7830e-02, + -2.0044e-01, -9.2590e-02, -7.0251e-02, -1.0168e-01, 1.5295e-01, + -9.3262e-02, -2.5317e-01, 2.3743e-01, 1.9897e-01, 8.7219e-02, + -5.1239e-02, 1.0211e-01, -5.9723e-02, 2.6611e-01, 3.5938e-01, + -5.9540e-02, 3.0713e-01, 3.8090e-03, 1.7749e-01, 3.1396e-01, + 1.4099e-01, 1.6199e-01, -3.5191e-03, 6.2561e-02, 2.8534e-02, + -2.0007e-01, -1.8066e-01, 3.4180e-02, 7.5562e-02, 1.5833e-01, + -1.7786e-01, -1.8213e-01, -2.8198e-02, -2.9492e-01, -1.4542e-02, + -6.7810e-02, 9.3079e-02, 1.5076e-01, -9.5398e-02, -2.7588e-01, + 9.1675e-02, -4.7302e-02, -8.7952e-02, -4.3091e-02, -1.5918e-01, + -2.1704e-01, 1.3412e-02, 2.1313e-01, -1.9043e-02, -1.1938e-01, + 1.6870e-01, 6.8542e-02, -1.5430e-01, -1.8274e-01, -2.4744e-01, + -2.0203e-01, -1.5991e-01, 2.0447e-02, -7.7820e-02, -1.5002e-01, + -1.7471e-02, -1.0651e-01, 1.1975e-01, 4.4403e-02, 2.1057e-01, + 1.5039e-01, -3.3081e-02, 8.3694e-03, 2.1594e-01, -1.2128e-01, + -2.2058e-01, 1.1041e-01, 2.2449e-01, -2.3022e-01, -1.3965e-01, + 1.4478e-01, 1.0638e-01, 2.3303e-01, 1.4087e-01, -9.6863e-02, + -2.8540e-01, 1.1133e-01, 1.7224e-01, 1.2749e-02, 6.4941e-02, + -2.2446e-02, 2.0190e-01, 3.1830e-02, 7.7896e-03, -6.5369e-02, + 1.4832e-01, 4.1351e-02, -2.2571e-01, -3.7018e-02, 8.5083e-02, + 2.1240e-01, -3.1958e-01, -2.5726e-02, 2.3096e-01, -2.4133e-01, + -2.5464e-01, 1.1420e-01, -1.7786e-01, 1.5222e-01, 1.6281e-02, + 2.0520e-01, 2.0850e-01, -9.8328e-02, 8.3084e-03, -3.8452e-02, + 9.8450e-02, -3.6896e-02, 6.7932e-02, -8.7128e-03, 1.2646e-01, + -4.3060e-02, -2.1423e-01, 2.6587e-01, 1.9202e-01, 1.6455e-01, + 3.0811e-01, 2.5073e-01, -4.7119e-02, -3.8281e-01, -4.4952e-02, + 1.3403e-01, -2.4744e-01, 3.6438e-02, -3.9062e-02, 1.0809e-01, + -2.9770e-02, -4.2297e-02, 1.7548e-02, -8.6689e-04, -8.0811e-02, + 1.4893e-01, -9.7473e-02, -2.5806e-01, 2.1326e-01, 8.2703e-02, + 9.4666e-02, 3.0151e-01, 2.7054e-02, -1.4612e-01, 2.3657e-01, + -1.0828e-01, 1.5649e-01, -1.6284e-01, 2.7710e-01, -9.6497e-02, + -2.0428e-03, -1.8091e-01, -1.6309e-01, -8.1665e-02, -1.5027e-01, + -9.0271e-02, 1.4758e-01, 1.6687e-01, -6.4758e-02, -1.1267e-01, + 1.3989e-01, -1.8921e-01, 7.0374e-02, 1.7310e-01, -2.2803e-01, + 8.6441e-03, 8.0017e-02, 4.1687e-02, 2.9639e-01, -1.2396e-01, + -1.0071e-01, 8.0505e-02, -1.1536e-01, -2.2742e-01, -7.3303e-02, + 9.5901e-03, -1.9006e-01, -6.5369e-02, 5.6793e-02, -1.8372e-02, + 1.7432e-01, 1.3599e-01, -2.5528e-02, 1.9495e-01, -1.2969e+00, + 4.7394e-02, 1.1646e-01, 6.7383e-02, -1.6223e-01, 7.1533e-02, + 1.3025e-01, 1.8112e-02, 3.0737e-01, 1.0773e-01, -9.6375e-02, + 2.2632e-01, 2.2571e-01, -5.6610e-03, -1.4490e-01, -8.7463e-02, + -1.4673e-01, 1.4355e-01, 1.1865e-01, -4.6349e-03, 3.1104e-01, + -4.8340e-02, 9.0515e-02, -6.7444e-02, -3.1616e-02, -1.7200e-01, + 9.5596e-03, -3.3539e-02, 1.6943e-01, 2.1576e-02, -7.2205e-02, + -4.6051e-02, 3.4619e-01, 1.9669e-02, 1.6907e-01, -2.2095e-02, + 9.5764e-02, 5.7526e-02, -1.6284e-01, 1.3220e-01, -4.5380e-02, + 3.0322e-01, 1.2451e-01, -7.6172e-02, 5.8624e-02, 2.1399e-01, + 2.5659e-01, 1.7542e-01, -1.8726e-01, -1.0052e-01, 1.5601e-01, + -8.0261e-03, -1.9250e-01, -2.0728e-01, -1.0773e-01, 8.2092e-02, + -2.3718e-01, 1.8848e-01, 1.1389e-01, -5.9967e-02, -1.1237e-01, + 2.9388e-02, 6.6284e-02, -1.4258e-01, 5.5008e-03, 1.8115e-01, + 4.0039e-02, 2.7930e-01, -2.1948e-01, 6.1493e-02, 9.9976e-02, + -1.1169e-01, 2.2644e-01, -1.0059e-01, 8.5999e-02, 7.2250e-03, + -1.9547e-02, -3.0298e-01, 1.4709e-01, 9.9854e-02, -7.2510e-02, + 4.1138e-02, 9.9854e-02, 1.3802e-02, 1.3916e-01, -3.0786e-01, + -9.0332e-03, 2.3364e-01, -1.1591e-01, -2.6050e-01, -2.8491e-01, + -9.1797e-02, 1.7749e-01, 1.1115e-01, -1.6943e-01, -2.5223e-02, + -3.4515e-02, 3.3789e-01, -1.4458e-02, -1.8970e-01, 2.4280e-01, + 4.2908e-02, -1.0559e-01, 6.2408e-03, 2.6221e-01, -5.0323e-02, + 5.5420e-02, -9.2697e-04, 4.0497e-02, 1.1017e-01, 1.3525e-01, + -2.0557e-01, 2.5000e-01, -2.8125e-01, 3.1641e-01, 4.8553e-02, + 2.9443e-01, -3.9154e-02, -5.8594e-02, 1.0431e-01, 3.3203e-01, + -4.3274e-02, -2.1545e-01, -1.3843e-01, -1.0760e-01, -3.8147e-02, + -2.5635e-01, 8.1665e-02, 1.6235e-01, -5.6885e-02, 9.8022e-02, + 5.6488e-02, 2.2266e-01, -1.8787e-01, 9.2041e-02, -1.0907e-01, + 3.0273e-01, 1.3049e-01, -1.2018e-01, -2.9663e-02, 6.1452e-05, + -1.5228e-02, 5.6366e-02, -4.9072e-02, 9.7168e-02, 1.5833e-01, + -9.8938e-02, -1.2256e-01, 4.6997e-02, -5.3986e-02, 7.8552e-02, + 1.4661e-01, 1.5198e-01, 2.3999e-01, 2.8152e-02, -1.6375e-03, + 1.6089e-01, -2.6978e-01, -2.3022e-01, -8.1055e-02, -1.2299e-01, + -3.2166e-02, 3.8239e-02, -1.3013e-01, -1.7151e-01, 5.3711e-02, + -1.6012e-03, 5.1651e-03, -8.3008e-02, -9.8572e-02, -1.0376e-01, + 2.5146e-01, -6.1066e-02, 2.0776e-01, -1.9629e-01, -8.8440e-02, + 8.7952e-02, 5.1239e-02, -8.2626e-03, 6.3667e-03, -4.3854e-02, + -1.8945e-01, 1.0223e-01, -1.6223e-01, -4.5624e-03, -9.3079e-02, + -3.2288e-02, -1.8127e-01, 7.4524e-02, 8.6060e-02, -2.6953e-01, + -8.4412e-02, -3.5431e-02, 9.4727e-02, 2.6779e-02, -2.8882e-01, + -3.2178e-01, -5.1575e-03, 3.6285e-02, 6.9824e-02, 2.1204e-01, + 1.8349e-03, -1.6162e-01, -3.0200e-01, -2.6154e-02, -5.6076e-03, + -6.1768e-02, 2.8491e-01, -8.7524e-02, -2.2229e-01, 3.2074e-02, + 2.2205e-01, -1.4099e-01, 1.3696e-01, 3.6743e-02, 7.0923e-02, + -2.1533e-01, -1.0870e-01, 3.9330e-03, -4.7150e-02, -1.8530e-01, + -9.0454e-02, 2.4573e-01, 6.5247e-02, 3.3142e-02, 3.3630e-02, + -2.3819e-02, 1.1084e-01, 2.6794e-02, 1.5049e-03, -5.7892e-02, + -6.4514e-02, -2.3010e-01, 2.1863e-01, 4.8889e-02, 5.6641e-02, + -2.0850e-01, 8.2825e-02, -8.4839e-02, -1.0162e-01, 1.2891e-01, + 1.4734e-01, 1.2225e-01, 6.5125e-02, -3.5492e-02, 3.3057e-01, + 5.4749e-02, 4.1138e-02, -4.5868e-02, 2.4817e-01, -1.2585e-01, + -1.6858e-01, -2.5488e-01, -2.6709e-01, -2.1265e-01, -8.3313e-02, + -9.2163e-02, 1.5320e-01, -8.2214e-02, -2.7490e-01, 1.1139e-01, + 2.3560e-01, 4.5776e-02, 5.1361e-02, -1.8262e-01, -5.0879e-04, + 5.0720e-02, -2.9160e-02, -2.6831e-01, 1.2042e-01, 1.4197e-01, + -2.1655e-01, -6.8848e-02, -1.2213e-01, -4.3091e-02, -1.0223e-01, + -2.2241e-01, -2.1500e-02, -2.2858e-02, 1.1035e-01, -1.4368e-01, + -2.0081e-01, 1.4136e-01, 1.5112e-01, -2.9126e-01, -1.6284e-01, + 1.1853e-01, -3.2568e-01, 4.4678e-02, -2.7124e-01, -1.5076e-01, + -7.4890e-02, -1.1877e-01, -2.1851e-01, 9.8267e-02, 7.3975e-02, + 2.4365e-01, 1.7899e-02, -5.1956e-03, -1.8774e-01, 1.7651e-01, + -2.9297e-01, 1.8250e-01, -7.8247e-02, 2.2571e-01, 8.7158e-02, + 1.9214e-01, 3.2074e-02, -1.3171e-01, 1.0712e-01, -7.7209e-02, + 2.4097e-01, -2.2339e-01, 1.9531e-01, -1.7224e-01, 1.8042e-01, + -1.1169e-01, -7.6332e-03, 1.0455e-01, -1.8628e-01, 3.8647e-01, + -8.5754e-03, 4.4434e-02, 1.0852e-01, 2.3816e-01, -4.9957e-02, + -1.7053e-01, 1.6479e-01, -2.3425e-01, 1.9812e-01, -8.8989e-02, + -3.0548e-02, 1.5236e-02, -1.1304e-01, 1.5796e-01, -8.7891e-02, + -2.1103e-02, -3.0289e-03, -2.3779e-01, 1.9067e-01, 1.6187e-01, + 1.6870e-01, 5.0720e-02, 2.0947e-01, -1.7908e-01, -4.0955e-02, + -2.0605e-01, 3.5339e-02, 1.6992e-01, -1.0504e-01, -1.5942e-01, + -2.0142e-01, -1.5472e-02, -2.3804e-01, -1.8341e-02, -1.5869e-01, + 1.3782e-01, -1.9580e-01, 1.5857e-01, 4.2627e-01, -7.3303e-02, + -2.3132e-01, 2.1240e-02, 2.0294e-02, -1.1322e-01, 4.2267e-02, + -1.5991e-02, -7.6538e-02, -4.0100e-02, 8.8745e-02, 2.7173e-01, + -7.8186e-02, 7.9285e-02, 2.8214e-02, 1.0638e-01, -4.3396e-02, + 5.2460e-02, 1.3416e-01, 4.0497e-02, 2.5272e-04, -4.1443e-02, + -2.7319e-01, 2.6221e-01, -2.4612e-02, 5.6580e-02, 2.6953e-01, + 1.0156e-01, 7.1655e-02, 6.6452e-03, 4.6356e-02, 8.2947e-02, + 1.0986e-02, -1.8567e-01, -2.7026e-01, 9.7412e-02, 6.3416e-02, + -1.0809e-01, 2.5684e-01, 5.9418e-02, 5.4047e-02, 2.0657e-03, + -8.6914e-02, 6.0425e-02, -1.0840e-01, -1.6711e-01, -1.5234e-01, + 2.5879e-01, -5.5786e-02, -7.8430e-02, 5.5878e-02, -3.2812e-01, + -1.9934e-01, 9.6313e-02, -1.3062e-01, 1.6736e-01, 1.7151e-01, + 8.5815e-02, -1.1182e-01, -1.5137e-02, -4.1931e-02, -6.9336e-02, + -4.3823e-02, -4.1809e-02, 7.6904e-02, -3.6157e-01, -6.3660e-02, + -1.6382e-01, 2.4658e-02, 4.3774e-01, -4.3774e-01, -1.1328e-01, + 8.6121e-02, 4.8401e-02, -6.8542e-02, -1.1786e-01, -1.8677e-01, + 2.4097e-01, 1.3098e-01, 2.5928e-01, -1.8005e-01, -6.6650e-02, + 1.5649e-01, 9.4604e-02, -6.0059e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True)Parameter containing: +tensor([1.7453, 1.7916, 1.7866, 1.7060, 2.2256, 1.7691, 1.6459, 1.7028, 1.7940, + 1.7137, 1.7359, 1.6988, 1.7650, 1.6460, 1.9754, 1.7098, 1.6823, 1.6809, + 1.7675, 2.8573, 1.7179, 1.7905, 1.7851, 1.7294, 1.7422, 1.7077, 1.7375, + 1.6869, 1.8025, 1.6808, 1.6921, 1.7139, 1.7179, 1.7785, 1.7144, 1.6573, + 1.7561, 1.7219, 1.8840, 1.8551, 1.7527, 1.7285, 1.8142, 1.7218, 1.7085, + 1.7747, 1.7440, 1.6605, 1.7240, 1.6341, 1.6871, 1.7110, 1.8223, 1.7324, + 1.6996, 1.9939, 1.6452, 1.5798, 1.6484, 1.6495, 1.7104, 1.6899, 1.7505, + 1.7364, 1.7274, 1.8487, 1.7835, 1.7188, 1.6811, 2.6443, 1.7195, 1.6335, + 1.6849, 1.8288, 1.7249, 1.7434, 1.7538, 1.7232, 1.7073, 1.7086, 1.7375, + 1.6350, 1.6951, 1.7128, 1.6553, 1.6948, 1.7890, 1.7998, 1.6942, 1.6631, + 1.8334, 1.7767, 1.7468, 1.7147, 1.6983, 1.7031, 1.7222, 1.6426, 1.6320, + 1.8031, 1.6616, 1.8613, 1.7142, 1.7822, 1.6660, 1.8272, 1.7065, 1.6898, + 1.6310, 1.7521, 1.7844, 1.7785, 1.7546, 1.7858, 1.7514, 1.7175, 1.8956, + 1.6952, 1.7559, 1.7030, 1.6885, 1.8080, 1.6344, 1.7356, 1.6977, 1.8344, + 1.7739, 1.6807, 1.6260, 1.8368, 1.6922, 1.8865, 1.7865, 1.6245, 1.6890, + 1.7053, 1.8425, 1.7724, 1.7564, 1.7803, 1.6540, 1.7523, 1.6523, 1.8011, + 1.7817, 1.8210, 1.7611, 1.7148, 1.6591, 1.7066, 1.7953, 1.6767, 1.7715, + 1.7065, 2.0386, 2.6908, 1.7533, 1.7069, 1.7056, 1.8112, 1.8387, 1.7340, + 1.6729, 1.7448, 1.7651, 1.7562, 1.7603, 1.7402, 1.6834, 1.6905, 1.7669, + 1.7285, 1.7695, 1.6959, 1.7073, 1.7729, 1.6439, 1.7737, 1.9007, 1.8193, + 1.6352, 1.7767, 1.7973, 1.6721, 1.6326, 2.0689, 1.7521, 1.5624, 1.7597, + 1.7233, 1.7889, 1.6468, 1.7949, 1.6567, 1.7086, 1.6855, 1.7539, 1.7119, + 1.7105, 1.7665, 1.6940, 1.7199, 1.6587, 1.6535, 1.7443, 1.8192, 1.6830, + 1.6448, 1.8299, 1.7532, 1.7436, 1.7580, 1.7853, 2.2166, 1.7786, 1.6753, + 1.6548, 1.7259, 1.7014, 2.1519, 1.7872, 1.8132, 2.3511, 1.7451, 1.6446, + 1.7903, 1.7986, 1.7225, 1.6865, 1.8110, 1.7765, 1.7229, 1.8636, 1.6293, + 1.6722, 1.6752, 1.7345, 1.6557, 1.7530, 1.8020, 1.7724, 1.7530, 1.7401, + 1.7140, 1.6637, 1.7321, 1.7963, 1.7235, 1.5552, 1.7318, 1.6982, 1.6654, + 1.7070, 1.7836, 1.6677, 1.7144, 1.7207, 1.6214, 1.7139, 1.6765, 1.7318, + 1.6520, 1.7032, 1.6822, 1.7167, 1.9936, 1.6842, 1.7808, 1.6448, 1.6655, + 1.7495, 1.7671, 1.7984, 1.7307, 1.7251, 1.8597, 1.6690, 1.7206, 1.8050, + 1.6937, 1.7735, 2.4023, 1.7041, 1.6982, 1.8072, 1.7366, 1.7480, 1.6612, + 1.8285, 1.7446, 1.6593, 1.5729, 1.8789, 1.7670, 1.5866, 1.6760, 1.9861, + 1.7029, 1.7262, 1.6737, 1.7327, 1.7383, 1.6930, 1.7918, 1.7627, 1.6763, + 1.7033, 1.7053, 1.8581, 1.7151, 1.5995, 1.7384, 1.7110, 1.7173, 1.7167, + 1.7400, 1.7778, 1.7273, 1.8511, 1.7437, 1.7464, 1.8946, 1.7516, 1.7868, + 1.9961, 1.7375, 1.8223, 1.8808, 1.6267, 1.6657, 1.7340, 1.7113, 1.6643, + 1.6593, 1.7756, 2.0622, 2.1898, 1.7083, 1.6580, 1.7134, 1.6195, 1.7422, + 1.6664, 1.6919, 1.7474, 1.8087, 1.7293, 1.7537, 1.7542, 1.7362, 2.0181, + 1.6838, 1.7311, 1.7292, 1.6971, 1.7682, 1.7884, 1.7328, 1.5910, 1.7977, + 1.8513, 1.7065, 1.7044, 1.7043, 1.6129, 1.6470, 1.7784, 1.7901, 1.9546, + 1.7444, 1.7572, 1.7716, 1.7748, 1.7461, 1.6755, 1.7075, 1.7108, 1.7496, + 1.8443, 1.6807, 1.8030, 1.7385, 1.7137, 1.7996, 1.8267, 1.6882, 1.7066, + 1.7421, 1.7258, 1.7883, 1.6757, 1.7318, 1.7987, 1.7435, 1.7862, 1.7681, + 1.7145, 1.6727, 1.7336, 1.7229, 1.7688, 1.7182, 1.8338, 1.7751, 1.8746, + 1.7005, 1.6615, 1.6654, 1.7231, 1.6855, 1.8164, 1.7140, 1.7523, 1.7857, + 2.3900, 1.5233, 1.6147, 1.6700, 1.7539, 1.7769, 1.6780, 1.6786, 1.7570, + 1.7269, 1.6752, 1.6584, 2.0102, 1.7854, 1.8528, 1.7610, 1.7021, 1.7769, + 1.7718, 1.7939, 1.7760, 1.8099, 1.7298, 2.0391, 1.7124, 1.6612, 1.7290, + 1.7201, 1.7583, 3.1886, 1.8840, 1.7371, 1.6928, 1.7375, 1.6900, 1.7818, + 1.7990, 1.8781, 1.8105, 1.7186, 1.7681, 1.7200, 1.7865, 1.7380, 1.7729, + 1.6335, 1.6749, 1.8281, 1.7893, 1.8383, 1.7957, 1.7725, 1.7262, 1.8634, + 1.7606, 1.7397, 1.7381, 1.8562, 1.7843, 1.6655, 1.7895, 1.7482, 1.7366, + 1.7391, 1.8199, 1.7300, 1.8336, 1.6733, 1.6370, 2.5717, 1.7043, 1.7063, + 1.6659, 1.6898, 1.7545, 1.7829, 1.6406, 1.6760, 1.7399, 1.7606, 1.6639, + 1.6529, 1.7716, 1.7976, 1.7405, 1.7779, 1.7114, 1.8633, 1.7808, 1.6656, + 1.7106, 1.6379, 1.7043, 1.7034, 1.7934, 1.7144, 1.6960, 1.6369, 1.6841, + 1.6676, 1.7403, 1.8162, 1.7031, 1.7580, 1.7332, 1.6771, 1.8749, 1.6809, + 1.6692, 1.7695, 1.8276, 1.7821, 1.5962, 1.7035, 1.6830, 1.7837, 1.7629, + 1.8197, 1.7063, 1.8174, 1.7458, 1.7029, 1.7777, 1.7655, 1.8087, 1.7410, + 1.6792, 1.8107, 1.7881, 1.7322, 1.7766, 1.7144, 1.6638, 1.7587, 1.7629, + 1.6253, 1.7377, 1.8036, 1.7703, 1.7773, 1.7076, 1.6700, 1.7811, 1.7947, + 1.7252, 1.7153, 1.6269, 2.4692, 1.6680, 1.7619, 1.7354, 1.7241, 1.7732, + 1.6729, 1.6692, 1.7719, 1.7385, 1.8128, 1.7062, 1.6910, 1.7789, 1.8631, + 1.6821, 1.7483, 1.7831, 1.7509, 1.7962, 1.7171, 1.7621, 1.7450, 1.7958, + 1.7197, 1.6905, 1.7545, 1.7549, 1.6749, 1.7081, 1.8256, 1.8227, 1.7506, + 1.8081, 1.7703, 1.7216, 1.6962, 1.8352, 1.6964, 1.7632, 1.7305, 1.7199, + 1.6506, 1.7202, 1.7306, 1.6137, 1.9637, 1.6504, 1.7721, 1.7887, 1.7454, + 1.6628, 1.6622, 1.6674, 1.7064, 1.7213, 1.7320, 1.8118, 1.8140, 1.9451, + 1.6857, 1.7272, 1.8096, 1.7151, 1.8023, 1.7991, 1.7979, 1.7435, 1.6916, + 1.7446, 1.8259, 1.8163, 1.6671, 1.7059, 1.7805, 1.7954, 1.8122, 1.7129, + 1.7006, 1.8278, 1.6928, 1.7699, 1.8127, 2.6332, 1.6948, 1.8121, 1.7807, + 1.7341, 1.7502, 1.7695, 1.9141, 1.7048, 1.6762, 1.7265, 1.6776, 1.8072, + 1.6987, 1.7570, 1.7126, 1.8879, 1.7532, 1.8169, 1.6947, 1.7870, 1.5678, + 1.6320, 1.6937, 1.7704, 1.6546, 1.7238, 1.6330, 1.7746, 1.9694, 1.6477, + 1.7148, 1.7291, 1.9253, 1.7652, 1.6549, 1.8793, 2.7716, 1.7033, 1.8173, + 1.7531, 1.8650, 1.6701, 1.7444, 1.8200, 1.8616, 1.7401, 1.7574, 1.6387, + 1.7232, 1.7761, 1.7073, 1.7611, 1.6337, 1.6966, 1.6602, 1.7216, 1.7338, + 1.6352, 1.6476, 1.6426, 1.7410, 1.8819, 1.6756, 1.5814, 1.7793, 1.7433, + 1.7529, 1.7703, 1.6975, 1.7116, 1.7453, 1.7028, 1.8396, 1.7989, 1.7690, + 1.5984, 1.9373, 1.8247, 1.6775, 1.7250, 1.5793, 2.5916, 1.6575, 1.7041, + 1.6216, 1.7912, 1.7722, 1.8124, 1.7116, 1.6666, 1.6750, 1.6511, 1.7467, + 1.7391, 1.6443, 1.7227, 1.8167, 1.7149, 1.7041, 1.7369, 1.7651, 1.6900, + 1.7217, 1.7272, 1.6623, 1.7115, 1.6773, 1.7730, 1.7566, 1.6515, 1.7410, + 1.6800, 1.8447, 1.8276, 1.7401, 1.7146, 1.7237, 1.8311, 1.7415, 1.7554, + 1.7146, 1.6885, 1.7398], device='cuda:1', requires_grad=True)Parameter containing: +tensor([-1.5534e-02, -9.1763e-02, -1.4045e-01, 2.9126e-02, -2.9463e-01, + 2.3244e-01, 1.4650e-01, 1.3824e-01, -1.4006e-01, -1.1251e-01, + 1.2915e-01, -1.6772e-01, -1.1740e-01, -1.8915e-02, 2.1420e-01, + -1.1838e-01, 2.4480e-02, 1.0073e-01, 1.0139e-01, -2.4958e-01, + -4.7139e-02, -3.2491e-02, 7.5936e-02, 5.0327e-03, -5.9994e-02, + -1.1320e-01, 1.1245e-01, 1.3947e-01, -1.5842e-01, 1.0699e-01, + -1.0513e-01, 1.4646e-01, 1.6563e-01, 8.8609e-02, 1.4347e-01, + -7.4134e-02, -1.2482e-01, 3.6243e-02, 2.5012e-01, 2.1067e-02, + -1.0077e-01, -9.0722e-02, -3.1916e-02, 2.9762e-01, 9.3168e-02, + -5.2134e-02, 1.2359e-01, -1.1238e-01, 6.2995e-02, 1.8050e-01, + 5.4862e-02, -1.4248e-01, -2.7663e-01, 7.0286e-02, -4.1281e-02, + 1.8095e-01, 2.5451e-02, -9.3611e-02, 3.1525e-02, 3.0742e-02, + -9.0886e-02, 3.5809e-02, -5.0295e-02, -1.6220e-02, 1.4903e-01, + 1.6067e-01, 7.7424e-02, -1.5068e-01, -9.7631e-02, -3.0946e-01, + -1.0245e-01, -7.8358e-02, 3.2978e-01, -1.1044e-01, 6.9739e-02, + -1.4487e-01, 1.8632e-01, -4.9441e-02, -1.4343e-01, 3.6038e-01, + 4.3383e-02, -1.5065e-01, 2.7976e-02, -2.0230e-02, -2.2828e-01, + 1.5317e-01, -8.6870e-02, 5.8729e-02, 8.2278e-02, -1.1187e-01, + -2.0997e-01, 1.1161e-01, 5.3529e-03, -3.4734e-01, -3.0991e-01, + -7.8397e-02, 1.9746e-01, -3.0953e-02, -2.6813e-01, 2.1088e-01, + 1.2270e-01, -2.1737e-01, 2.1370e-01, -1.8573e-02, 2.0623e-01, + 3.2870e-02, 1.3364e-01, -1.4971e-01, -3.0270e-01, -6.7287e-02, + 1.4275e-01, 2.9592e-01, 1.2103e-01, -1.6004e-01, -2.3951e-02, + -3.6579e-02, 3.0966e-01, -6.5552e-02, -2.5138e-01, 2.6074e-01, + -6.9158e-02, -3.3349e-01, -1.6839e-01, -2.2436e-01, -4.7964e-02, + -1.3955e-01, -2.2653e-02, 3.1051e-01, -4.5824e-02, 1.2099e-01, + 1.0464e-02, -2.2245e-01, 2.8780e-01, 2.4416e-02, 9.3748e-02, + 2.1737e-01, -9.0517e-02, -6.9622e-02, -8.9925e-02, -1.7875e-01, + 1.5849e-01, 3.7965e-02, -1.1369e-01, 1.3518e-01, 1.7028e-02, + -2.7495e-01, 1.6793e-01, 2.1625e-01, 2.3221e-01, -1.6563e-02, + 3.1062e-01, -1.4128e-01, -1.3812e-01, -2.9676e-02, -1.1254e-01, + 7.3345e-02, 1.9008e-01, -3.1589e-01, 1.7483e-01, 2.9653e-02, + -4.0455e-02, -3.6911e-02, 8.0104e-02, 2.0828e-01, -4.0867e-02, + -2.4094e-02, -2.2152e-01, 2.1542e-01, -1.7157e-01, -1.2389e-02, + 6.6927e-02, 3.7046e-01, -3.9744e-02, 1.6790e-01, 2.5737e-01, + -6.8471e-02, 5.7411e-02, 6.1858e-02, -3.9160e-01, -4.6156e-02, + -2.1085e-01, 1.1249e-01, 1.3627e-01, 2.7793e-01, -2.0108e-01, + 2.2857e-01, -1.2739e-01, 1.2818e+00, -8.5203e-02, -2.1764e-01, + -2.8680e-01, -2.2593e-01, 8.1885e-02, -6.8978e-02, -1.5091e-01, + 4.0286e-02, 1.1618e-01, -2.5044e-01, 1.5447e-01, 9.2900e-02, + -9.9608e-02, 2.9712e-02, -2.2294e-02, 3.1927e-02, 9.5738e-02, + 1.9990e-01, 1.6504e-01, 2.4631e-01, 1.0133e-01, 6.9246e-03, + 1.8389e-01, 1.8244e-01, -2.1620e-01, 3.0810e-01, 3.2302e-03, + -1.5407e-01, -2.9648e-01, 4.3923e-03, -1.9695e-01, 3.5502e-02, + 9.7438e-02, 3.6828e-03, -4.1745e-01, -9.6438e-02, -2.8194e-01, + 1.0465e-01, -2.3009e-01, -1.8973e-01, 1.1445e-01, 1.1904e-01, + 2.4688e-01, 8.2958e-02, 4.5977e-03, -1.2731e-01, -3.0066e-01, + -2.6491e-01, 3.9542e-02, 5.0806e-02, 1.1334e-01, -6.6315e-03, + -1.2101e-01, 2.5357e-01, 1.4793e-02, -7.2905e-02, -5.3436e-02, + 1.2530e-01, -1.5131e-01, 2.3338e-02, -2.0551e-01, -2.9184e-02, + -1.2791e-02, -2.3347e-01, 1.7415e-01, -7.8002e-02, 1.1495e-01, + -2.7784e-01, 1.3663e-01, -1.6558e-01, -1.6523e-01, -1.5290e-01, + -1.4430e-01, 1.4719e-01, 6.6719e-02, 2.7414e-02, 4.6831e-02, + 3.8021e-01, -5.6495e-02, -1.5276e-01, -2.3739e-02, 3.4107e-02, + -7.4406e-02, 1.1249e-01, 2.2351e-01, -1.1416e-01, 4.6791e-02, + -3.3080e-02, 1.2830e-01, 2.8935e-01, 9.2980e-02, -1.5083e-01, + -4.5502e-02, -2.3274e-01, -9.3975e-02, 1.5168e-01, -8.0357e-02, + 2.5591e-01, -6.7106e-02, -1.8530e-01, 2.9677e-01, -1.3151e-01, + 1.0348e-01, -7.1660e-02, 1.1649e-01, -2.7519e-02, 1.0288e-01, + 2.3199e-02, 1.6356e-01, -2.6839e-01, 4.4333e-01, 9.9366e-02, + -3.5228e-01, -1.4036e-01, -1.9225e-01, -1.8235e-01, 4.8254e-02, + 1.1766e-01, 1.5968e-01, -2.8152e-01, 2.7740e-02, -1.4705e-01, + -8.5655e-02, -1.2457e-01, -1.5918e-01, -2.2650e-01, -2.0617e-03, + 2.2071e-01, 4.7423e-01, -1.2317e-01, 8.1605e-02, 2.6438e-01, + -1.7328e-01, -5.5589e-02, -7.9478e-02, 6.6586e-02, -3.9526e-01, + -1.6453e-01, -6.7773e-02, -6.8706e-02, 4.0666e-02, -1.0352e-01, + 2.0825e-01, -3.9659e-02, 7.2424e-03, 4.7092e-02, 2.2881e-01, + -1.1379e-01, -3.1302e-01, 8.0753e-02, -9.4348e-02, 7.6734e-02, + 1.4272e-01, -9.4793e-02, -1.0785e-01, -1.6521e-02, 2.9463e-02, + 2.0519e-01, 4.1036e-02, 5.8431e-02, -7.8597e-02, 2.9006e-01, + 2.5388e-01, 1.6062e-01, 2.9854e-03, 1.3798e-01, 1.5111e-01, + -3.3206e-01, -4.6566e-02, 1.1332e-01, 2.7857e-01, -3.2234e-01, + -1.5060e-01, -1.9339e-01, 6.2923e-02, -3.6645e-01, 5.0914e-02, + -1.0140e-03, 1.0521e-01, 2.5313e-01, 9.8427e-02, 5.3377e-02, + 3.0633e-02, -1.2009e-02, 4.8498e-02, -1.7911e-02, -1.2377e-01, + 2.1468e-02, 8.0258e-02, 2.3942e-01, -1.3388e-01, 6.4349e-02, + -2.2947e-01, 1.1419e-01, 1.6653e-01, 3.6875e-02, 2.3298e-02, + -9.5069e-04, 2.0242e-01, 8.2273e-02, 1.2790e-01, 3.8417e-02, + -1.3779e-01, -3.5614e-01, -2.9389e-03, -3.7396e-02, -2.2514e-01, + 1.1973e-01, 1.5550e-01, 4.0188e-02, -1.1199e-01, -8.5370e-02, + 2.5541e-01, 8.7027e-02, 1.0922e-01, -5.5832e-02, -2.6477e-02, + -2.6585e-01, -1.9053e-01, -1.2098e-01, 7.0815e-02, 2.6938e-01, + 2.0130e-02, 3.1811e-02, -3.5534e-02, 1.0926e-01, 3.0880e-01, + -1.5899e-01, 6.1207e-02, -5.4653e-03, -2.8716e-02, 3.2807e-02, + -2.0764e-01, 4.9832e-02, 1.6245e-01, 5.7098e-02, 1.8167e-01, + -1.6261e-02, 2.1805e-02, -3.1371e-01, -1.3691e-01, 1.1864e-01, + -1.1328e-01, -7.7644e-02, -3.5575e-02, 2.1682e-01, 1.3694e-01, + 8.8308e-03, 2.2246e-01, 4.5436e-02, 1.8990e-04, -1.4476e-01, + -7.4615e-02, 8.5595e-02, 5.4799e-02, -7.1474e-02, 7.8181e-03, + -3.7506e-02, -9.0086e-02, -2.5738e-02, 4.2255e-03, -1.9520e-01, + -3.2488e-02, 6.0368e-03, -2.6547e-01, -2.4829e-01, 2.1645e-02, + -7.3512e-02, 7.6393e-02, -7.2706e-02, 1.5358e-01, 9.3677e-03, + -1.4428e-01, 1.6863e-01, -2.6322e-01, 2.1465e-01, -2.1246e-01, + -3.2902e-01, 3.1898e-02, 2.4764e-01, 3.4598e-01, 2.4307e-01, + 2.8445e-01, 1.6679e-02, 8.4466e-03, 6.5163e-02, 3.9525e-02, + -9.2690e-02, 4.4431e-02, -4.6913e-02, -9.2345e-03, -8.9794e-03, + 1.9486e-02, -1.1213e-01, 1.1999e-01, 1.0694e-01, -1.5547e-01, + 5.2061e-02, -1.4350e-01, 1.6855e-01, 8.6247e-02, 3.3788e-02, + 7.4436e-02, 2.2431e-01, -8.0994e-02, 7.9031e-02, -2.3455e-01, + -1.6954e-01, -2.5681e-01, 9.9518e-02, -2.0205e-02, 1.9510e-01, + -1.7703e-01, -2.3888e-01, 1.6401e-02, -1.6304e-01, -1.0341e-01, + -8.9298e-02, 2.9634e-02, -1.5724e-01, 6.5853e-02, -1.4971e-01, + -1.3078e-01, -1.6806e-01, -1.3370e-01, 3.9207e-01, -7.3544e-02, + -8.2711e-02, 9.3258e-02, -1.6172e-01, 1.8738e-01, 1.4350e-01, + 2.4039e-01, -3.3456e-02, -1.7536e-02, 2.7335e-01, -2.1055e-01, + -6.3564e-02, 2.1079e-02, 2.3638e-01, -3.1217e-02, 3.7273e-02, + 1.1079e-02, 1.5963e-01, -1.8719e-01, -1.0053e-03, -1.7813e-02, + -2.9240e-01, -8.0952e-02, 5.3291e-03, 2.6588e-01, -2.0618e-01, + -1.6225e-01, 1.0025e-01, -1.1047e-01, -9.2405e-02, 1.0489e-01, + -4.5642e-02, -1.8395e-01, -1.0642e-01, 1.8981e-01, 1.9907e-02, + -1.7135e-01, 6.5531e-02, -1.3988e-03, 2.7817e-01, -2.5802e-01, + -1.4527e-01, 1.6533e-01, 2.5748e-02, -6.5332e-02, 4.1882e-02, + -1.7025e-01, -4.3420e-01, -5.4950e-02, 2.4054e-01, 4.4939e-01, + 2.2223e-01, -5.3037e-02, 1.9851e-01, 8.5092e-02, -5.1684e-02, + 5.8122e-02, -2.0831e-01, 1.3727e-02, -1.4461e-01, 1.1605e-01, + 3.3023e-01, -2.5016e-03, 8.3355e-02, 2.8889e-03, -9.1767e-02, + -2.2500e-01, -2.6005e-01, -2.2718e-02, 7.8965e-02, -2.0193e-01, + 3.1948e-01, -1.4691e-01, -4.2470e-03, 1.6581e-01, -5.5066e-02, + -2.8730e-02, 1.2353e-01, 9.0078e-02, 5.3239e-02, 7.6229e-02, + -1.8688e-02, 1.8038e-01, -1.3688e-01, 1.0056e-02, -1.1915e-01, + 1.1376e-01, -1.0106e-01, 3.1833e-02, -2.3530e-01, -2.1356e-01, + 1.2081e-02, 1.9723e-01, -6.6254e-02, -5.8621e-03, 7.9542e-02, + 1.8380e-01, 6.9102e-02, -2.0355e-01, 3.8634e-04, 1.5694e-01, + 1.5234e-02, 7.0116e-02, -8.7134e-02, -1.0493e-01, -1.2832e-01, + 8.8785e-02, 5.0802e-02, -1.6515e-01, 1.5294e-01, -9.5807e-02, + 1.1724e-01, -2.1482e-01, 5.9678e-02, -1.0187e-01, -1.1231e-01, + 7.3172e-02, 8.1636e-02, 1.3985e-01, -6.1189e-02, 3.2938e-01, + -2.0465e-01, 5.8024e-02, -2.0098e-01, -7.7353e-02, -1.4911e-01, + -1.2737e-01, -5.6727e-02, -1.5146e-01, 5.5125e-03, -3.8270e-01, + 8.5914e-02, 4.1835e-02, 2.1180e-01, 6.6254e-02, 1.4918e-01, + 4.0368e-04, 8.3069e-02, 5.3523e-02, -7.5874e-02, -3.9154e-02, + 9.0881e-02, -4.8088e-02, -8.1549e-02, -9.4344e-02, 1.8658e-01, + -1.1282e-03, -3.2404e-02, 2.2197e-01, 1.2310e-02, -1.4756e-01, + -1.5818e-01, -1.0427e-01, -1.3376e-01, -4.5153e-01, 1.4150e-01, + -4.6084e-01, 7.0984e-02, -5.3660e-03, 1.1445e-01, 2.0639e-01, + -1.0595e-01, 9.8461e-02, 1.1745e-01, -8.5346e-02, 9.7677e-02, + -2.8740e-01, -1.0844e-01, 2.0947e-02, -1.3861e-01, 1.5285e-01, + 7.0771e-02, 4.2567e-02, 1.3805e-01, 3.5083e-02, -4.3154e-02, + -3.7462e-01, 5.8789e-02, -1.3240e-01, -1.0771e-01, -2.4909e-01, + 3.2137e-02, -1.4593e-01, -5.0975e-02, -1.2179e-01, -1.0344e-01, + 1.2949e-01, -1.4998e-01, 1.3079e-01, -1.7869e-01, -1.0132e-01, + 1.8350e-01, 4.9456e-02, -5.7911e-02, -1.6670e-01, -2.7019e-01, + -1.3105e-01, 2.1986e-01, -6.1723e-02, 3.5003e-01, -1.5446e-01, + 1.9561e-01, 1.1300e-01, 1.3925e-01, -1.5276e-01, 1.0147e-01, + -1.9581e-02, -4.3691e-01, -9.7110e-02, -3.3967e-01, -2.7802e-02, + -7.2438e-02, -1.4261e-01, 1.0363e-01, 3.0578e-01, 4.9787e-02, + 1.1251e-01, 7.2107e-02, 2.5426e-01, 1.4806e-01, 9.5198e-02, + 1.0909e-01, -9.6786e-02, -1.5798e-01, -4.7056e-03, -2.3768e-01, + -2.6643e-01, 1.7792e-01, 1.9149e-01, 4.8918e-02, 1.6031e-01, + -1.1331e-01, -1.2023e-02, 1.2726e-01, 3.3811e-01, 2.9085e-01, + -3.1095e-02, -2.7095e-01, 1.0544e-01, -7.7017e-02, 1.5635e-01, + 1.0879e-01, 8.0752e-03, 1.5868e-01, -1.9379e-01, 4.0000e-02, + -3.2902e-01, -1.3328e-01, 1.0858e-01, 4.2967e-02, -1.2718e-01, + -3.5778e-02, 1.1070e-01, 1.7500e-01], device='cuda:1', + requires_grad=True)Parameter containing: +tensor([0.9789, 0.9294, 1.0503, 0.9833, 1.3635, 0.9817, 0.8959, 0.9901, 0.9724, + 1.0195, 0.9912, 1.0194, 0.9510, 0.9497, 1.0480, 0.9598, 0.9356, 0.9458, + 1.0339, 1.4869, 0.9539, 1.0286, 1.0209, 0.9972, 0.9667, 0.9840, 0.9655, + 1.0248, 0.9781, 0.9649, 0.9608, 1.0080, 0.9781, 0.9799, 1.0005, 0.9709, + 0.9607, 1.0319, 1.0683, 1.0291, 0.9931, 0.9402, 1.0207, 0.9329, 0.9443, + 1.0045, 0.9924, 0.9469, 0.9767, 1.0029, 0.9833, 0.9146, 0.9649, 0.9591, + 0.9597, 1.1766, 0.9689, 0.9140, 0.9275, 0.9578, 0.9100, 0.9850, 0.9424, + 1.0120, 0.9857, 1.0222, 1.0457, 1.0024, 1.0410, 1.4892, 0.9385, 0.9460, + 0.9541, 1.0221, 0.9335, 0.9982, 1.0116, 0.9893, 0.9835, 0.9902, 0.9754, + 0.9635, 0.9653, 0.9582, 0.9862, 0.9706, 0.9513, 0.9523, 0.9896, 0.9582, + 0.9980, 0.9927, 0.9415, 0.9474, 0.9352, 1.0319, 1.0163, 0.9256, 0.9569, + 1.0629, 0.9677, 0.9598, 0.9947, 0.9495, 1.0303, 1.1323, 0.9882, 0.9709, + 0.9408, 0.9724, 1.0592, 0.9617, 0.9984, 0.9713, 1.0029, 0.9759, 1.0748, + 0.9440, 0.9530, 0.9644, 0.9541, 1.0438, 0.9496, 0.9783, 0.9973, 1.0621, + 0.9814, 0.9424, 0.9092, 0.9427, 1.0071, 0.9924, 0.9948, 0.9372, 1.0529, + 1.0278, 1.0116, 0.9581, 1.0060, 1.0342, 0.9699, 1.0151, 0.9476, 0.9842, + 0.9829, 1.0783, 1.0188, 0.9930, 0.9428, 0.9109, 1.1249, 0.9518, 0.9691, + 0.9435, 1.0108, 1.8398, 0.9405, 1.0203, 0.9426, 0.9539, 1.0851, 0.9390, + 0.9137, 0.9963, 0.9783, 0.9774, 0.9921, 0.9872, 0.9343, 0.9382, 1.0005, + 1.0383, 1.0908, 1.0157, 0.9840, 0.9636, 0.9586, 0.9408, 1.0387, 0.9679, + 0.9141, 0.9924, 0.9980, 0.9678, 0.9615, 1.2429, 1.0328, 1.1239, 0.9823, + 0.9177, 0.9334, 0.9671, 0.9564, 0.9523, 0.9280, 0.9608, 1.0131, 0.9538, + 1.1718, 1.0348, 0.9512, 1.0957, 0.9225, 0.9518, 1.0423, 0.9711, 0.9743, + 1.0087, 1.0097, 0.9382, 0.9720, 1.0695, 1.0373, 1.3284, 0.9904, 0.9383, + 0.9837, 0.9402, 0.9627, 1.1128, 0.9779, 0.9696, 1.5119, 0.9708, 0.9545, + 1.0144, 0.9609, 0.9002, 0.9714, 0.9668, 1.0259, 0.9891, 0.9930, 0.9484, + 1.0384, 0.9235, 1.0008, 0.9805, 0.9669, 1.0055, 0.9815, 0.9618, 0.9798, + 0.9605, 0.9924, 1.0096, 0.9602, 0.9588, 0.9440, 0.9645, 0.9641, 0.9293, + 0.9837, 0.9505, 0.9990, 0.9378, 0.9800, 0.9223, 0.9465, 1.0049, 0.9628, + 0.9659, 0.9606, 0.9697, 0.9839, 1.0872, 0.9421, 0.9863, 0.9491, 0.9595, + 0.9383, 0.9845, 0.9450, 0.9953, 0.9629, 0.9854, 1.0170, 0.9908, 0.9800, + 0.9220, 0.9957, 1.3879, 0.9971, 0.9980, 0.9933, 1.0206, 0.9309, 0.9692, + 0.9807, 1.0204, 0.9707, 0.9000, 1.0781, 0.9787, 0.9227, 0.9209, 1.1598, + 0.9672, 0.9598, 0.9724, 1.0041, 0.9868, 0.9703, 1.0104, 0.9311, 1.0195, + 0.9569, 0.9985, 1.0102, 1.0374, 0.9528, 0.9749, 1.0033, 0.9420, 0.9753, + 0.9903, 0.9605, 0.9820, 1.0534, 0.9808, 1.0243, 1.0862, 0.9781, 1.0080, + 0.6989, 1.0179, 0.9068, 0.9796, 0.9608, 0.9569, 1.0111, 0.9837, 0.9577, + 0.9589, 0.9779, 1.0451, 1.1752, 0.9593, 0.9444, 0.9135, 0.9258, 0.9964, + 0.9537, 1.0046, 1.0097, 0.9951, 0.9439, 0.9784, 0.9663, 0.9261, 1.2351, + 0.9782, 0.9517, 1.1271, 0.9770, 0.9646, 0.9768, 0.9724, 0.9354, 1.0252, + 0.9786, 0.9721, 0.9825, 0.9624, 0.9528, 0.9635, 1.0061, 0.9892, 0.9911, + 0.9513, 0.9656, 1.0066, 0.9590, 0.9904, 0.9637, 0.9677, 0.9733, 0.9671, + 0.9685, 0.9739, 1.0084, 0.9667, 0.9818, 1.0092, 1.0129, 0.9741, 0.9463, + 1.0102, 1.0144, 1.0013, 0.9456, 1.0054, 1.0135, 0.9948, 1.0081, 0.9378, + 1.0029, 0.9869, 0.9540, 0.9829, 1.0220, 1.0169, 0.9994, 0.9782, 1.0695, + 0.9567, 0.9314, 0.9749, 0.9682, 1.0318, 0.9593, 0.9616, 0.9607, 0.9752, + 1.4810, 0.9323, 0.9402, 0.9837, 1.0073, 0.9842, 0.9367, 0.9604, 1.0116, + 1.0078, 0.9702, 0.9684, 1.1285, 0.9511, 0.9765, 1.0176, 0.9596, 1.0339, + 0.9624, 1.0395, 0.9849, 0.9565, 0.9806, 1.0773, 0.9714, 0.9472, 1.0115, + 0.9649, 0.9228, 2.0901, 0.9879, 0.9882, 0.9761, 0.9814, 0.9468, 0.9994, + 0.9286, 0.9911, 0.9497, 0.9147, 0.9676, 0.9874, 0.9877, 0.9406, 0.9652, + 0.9351, 0.9923, 1.0111, 0.9771, 1.0114, 1.0217, 0.9870, 0.9537, 1.0784, + 1.0336, 1.0093, 0.9765, 0.9986, 1.0106, 0.9380, 0.9709, 1.0175, 0.9601, + 0.9987, 0.9836, 0.9945, 0.9813, 0.9800, 0.8724, 1.7095, 0.9634, 0.9996, + 0.9365, 0.9805, 0.9622, 1.0119, 0.9878, 0.9472, 0.9417, 0.9169, 0.9876, + 0.9900, 0.9350, 1.1267, 0.9505, 1.0640, 0.9625, 1.0296, 1.0113, 0.9412, + 0.9205, 0.9651, 0.9713, 0.9395, 1.0119, 0.9269, 1.0506, 0.9600, 0.9694, + 0.9757, 0.9544, 1.0086, 0.9788, 1.0508, 1.0209, 0.9488, 0.9966, 0.9317, + 0.9572, 0.9729, 1.0164, 0.9792, 0.9196, 0.9784, 0.9793, 0.9809, 0.9796, + 0.9861, 0.9729, 0.9508, 0.9676, 0.9786, 0.9937, 1.0135, 0.9527, 0.9108, + 0.9070, 1.0103, 0.9822, 0.9784, 1.0208, 0.9664, 0.9470, 0.9950, 1.0025, + 0.9190, 0.9683, 1.0020, 0.9953, 1.0268, 0.9235, 0.9606, 0.9984, 0.9683, + 1.0037, 1.0107, 0.9407, 1.4926, 0.9923, 1.0471, 0.9927, 0.9654, 1.0307, + 0.9588, 0.9602, 0.9697, 0.9666, 0.9573, 0.9678, 0.9603, 1.0030, 1.0766, + 0.9451, 1.0309, 0.9786, 1.0248, 1.0939, 0.9755, 1.0129, 0.9870, 0.9454, + 0.9679, 0.9613, 0.9953, 1.0223, 0.9391, 0.9962, 1.0024, 0.9905, 0.9667, + 0.9975, 1.0962, 0.9719, 0.9752, 1.0808, 0.9856, 0.9431, 0.9547, 0.9668, + 0.9539, 0.9781, 0.9672, 0.9340, 1.1096, 1.0085, 0.9580, 0.9485, 0.9604, + 0.9431, 0.9216, 0.9337, 0.9443, 1.0195, 0.9627, 0.9664, 1.0743, 0.9936, + 0.9841, 0.9570, 1.0709, 0.9946, 0.9981, 0.9501, 0.9921, 0.9470, 0.9568, + 0.9283, 0.9953, 1.0495, 0.9913, 0.9984, 1.0113, 1.0228, 0.9384, 0.9789, + 1.0531, 0.9579, 0.9499, 1.0594, 0.9886, 1.4655, 0.9475, 1.0596, 0.9834, + 1.0035, 1.0115, 0.9774, 1.0214, 0.9791, 0.9685, 0.9650, 0.9535, 0.9771, + 0.9126, 0.9927, 1.0409, 1.2192, 0.9652, 1.0573, 0.9687, 0.9728, 0.9429, + 1.0402, 0.9895, 0.8916, 1.0075, 0.9711, 0.9827, 0.9865, 1.2124, 0.9498, + 0.9622, 0.9726, 1.0289, 0.9481, 1.0022, 0.9531, 1.6650, 1.0106, 1.0316, + 0.9528, 1.0154, 0.9521, 0.9370, 0.9971, 1.0099, 0.9931, 0.9787, 0.9301, + 0.9546, 1.0009, 0.9576, 1.0004, 0.9747, 0.9970, 0.9373, 0.9452, 1.0046, + 0.8863, 0.9072, 0.9384, 0.9922, 0.9774, 0.9665, 0.9855, 0.9787, 0.9994, + 0.9961, 0.9882, 0.9664, 0.9641, 0.9573, 0.9490, 1.0217, 1.0008, 1.0120, + 0.9250, 1.0264, 0.9800, 0.9720, 0.9221, 0.8856, 1.7454, 0.9631, 0.9402, + 0.9390, 1.0204, 1.0326, 1.0082, 1.0139, 0.9646, 0.9637, 0.9325, 1.0175, + 0.9553, 0.9596, 0.9731, 0.9429, 1.0211, 0.9584, 0.9190, 1.0024, 0.9525, + 1.0043, 1.0158, 0.9731, 0.9330, 0.9368, 0.9898, 0.9570, 1.0412, 0.9947, + 0.9534, 1.1320, 1.0050, 0.9660, 0.9688, 0.9525, 0.9742, 1.0358, 0.9980, + 0.9786, 0.9825, 0.9780], device='cuda:1', requires_grad=True)Parameter containing: +tensor([-3.0972e-01, 8.6520e-02, -1.7815e-01, -2.1274e-01, 1.0310e-02, + -2.1799e-01, 2.6805e-02, -1.2406e-01, 1.1344e-01, -1.0872e-01, + -2.1680e-01, -1.5038e-01, -1.6801e-01, -3.8868e-01, 4.3549e-02, + -1.6931e-01, -1.0309e-01, -1.1146e-01, -2.8433e-02, -7.3690e-02, + -2.0815e-01, -1.5633e-01, -5.8505e-02, -1.1866e-01, -9.1465e-02, + -1.2483e-01, -1.6782e-01, -1.8859e-01, -2.8829e-01, -6.2344e-02, + 5.8944e-02, -2.3529e-02, -1.3301e-01, -3.5179e-02, -1.3117e-01, + -3.7027e-01, -3.8042e-01, -2.0661e-01, -5.9429e-02, 4.7935e-02, + -1.1948e-01, -1.6189e-01, -1.1954e-01, -1.6275e-01, -1.9291e-01, + -3.2738e-01, 6.2183e-02, -1.8937e-01, -2.8478e-01, -1.1591e-01, + -3.1983e-02, -1.3321e-01, -3.0793e-01, -9.1934e-02, -3.1607e-01, + 1.7626e-03, -3.7448e-02, -2.1910e-01, -3.0856e-01, 1.1530e-01, + 7.8005e-02, -1.3057e-01, -1.4585e-01, -2.2813e-01, -1.6288e-01, + -2.2923e-01, -2.6776e-01, -1.9933e-01, -2.2633e-01, 1.1466e-01, + -1.0994e-01, -1.2034e-01, -7.9534e-02, -3.2612e-01, -2.9924e-01, + -1.5115e-01, -1.4329e-02, -1.6127e-01, 1.3527e-01, -3.5650e-02, + -1.3662e-01, -9.8355e-02, -1.8159e-01, -8.9092e-02, -7.4571e-02, + -2.8322e-01, 2.9549e-01, -7.7638e-02, -2.7117e-02, -1.8011e-01, + -3.8760e-01, 1.8966e-02, -6.7477e-02, -1.5817e-01, -3.1372e-01, + -8.0799e-02, 6.7441e-02, -1.7432e-01, -2.6955e-01, 6.9790e-02, + -1.0390e-01, -4.4026e-01, -1.0927e-01, -4.8078e-01, -4.3120e-03, + -1.4704e-01, 1.6252e-01, -6.3587e-03, -1.6030e-01, -1.2555e-01, + 2.4264e-03, -2.0487e-01, -1.9123e-01, -7.2894e-02, 2.6715e-02, + -2.9070e-01, 2.1943e-02, -1.0394e-01, -2.0428e-01, 1.9086e-02, + -5.8713e-02, -1.1117e-01, -2.1590e-01, -7.7958e-03, 8.1530e-02, + -4.0557e-02, -6.9983e-02, 8.2076e-02, -2.8870e-01, 5.1586e-04, + -7.0652e-02, -1.6712e-01, -5.4576e-02, 3.9755e-02, 9.6160e-02, + 3.4701e-02, -2.3451e-01, -2.4870e-01, 2.3932e-02, -1.9629e-01, + -5.2537e-02, 2.0197e-02, -7.9063e-02, -1.7944e-01, -1.2057e-01, + -1.4907e-01, 1.8654e-01, -1.6093e-01, -2.8629e-01, -2.0329e-01, + 3.8109e-02, -2.5073e-01, -2.7577e-01, -1.1063e-01, -1.0919e-01, + 1.3914e-02, -2.8746e-01, -3.2939e-01, -2.2465e-01, -1.6625e-01, + -1.8405e-01, -4.9315e-02, -2.2022e-01, -2.8075e-01, -5.6807e-02, + 1.0513e-01, -7.9162e-02, 3.6867e-02, -1.8553e-01, -2.9716e-01, + 7.4794e-02, 6.0102e-02, -1.4944e-01, 1.4078e-03, -1.2949e-01, + -1.4017e-01, -2.0010e-01, -1.3645e-01, -5.3604e-02, 9.3507e-02, + -4.0007e-01, -8.2399e-02, 1.4770e-03, 2.5266e-02, -1.3282e-01, + -5.0428e-02, -2.4350e-01, 2.7861e-01, -3.4818e-02, 9.8363e-02, + -3.0732e-01, -3.4218e-01, -2.9501e-02, -3.9651e-02, -6.8811e-02, + -4.2808e-02, 1.3737e-01, -1.1500e-01, -2.3656e-01, 2.0047e-01, + -2.0047e-01, -1.8455e-01, -1.9737e-01, -3.0253e-02, -8.7459e-02, + -2.0932e-01, 1.5391e-02, 1.2252e-02, -2.1960e-01, -3.3056e-01, + 1.3683e-01, -2.0823e-01, -3.2396e-01, -1.1140e-01, -3.3370e-01, + -2.8346e-01, -2.3274e-01, -1.6795e-01, -3.2785e-01, 3.7867e-02, + 1.3719e-01, 1.4507e-01, -6.2225e-02, 5.7701e-02, -3.8609e-02, + 3.8244e-02, -2.9839e-01, -1.8011e-01, -1.9061e-01, -5.9240e-02, + -9.7641e-02, -2.6895e-01, -1.6988e-01, -1.9597e-01, -1.6472e-01, + -4.4346e-01, 1.3133e-01, -1.2524e-01, -3.5263e-01, -1.4135e-01, + 4.1890e-02, -5.1405e-02, 4.4846e-02, -1.4310e-01, -1.5345e-01, + -1.3753e-01, -1.1721e-01, -1.3412e-01, -1.3196e-01, 3.5904e-02, + 1.4487e-02, -1.3828e-01, -1.1927e-01, -2.2135e-01, -1.7975e-01, + -2.3271e-01, -2.4277e-02, -2.7056e-01, -3.3887e-01, -2.0596e-01, + -3.2562e-01, -4.5267e-01, 1.7332e-01, 3.4287e-02, -2.7470e-01, + -4.0351e-02, -1.5061e-01, -2.0349e-01, 8.5356e-02, 6.9045e-02, + 3.8983e-02, 6.1722e-02, 2.4213e-02, -8.5502e-02, 4.4269e-03, + -1.0968e-01, -1.4912e-01, -4.7435e-02, 1.4155e-01, -1.8818e-01, + -3.2807e-02, -2.2663e-01, -3.0295e-01, 9.7512e-03, -1.7489e-01, + -9.1184e-02, -2.0599e-01, 1.4191e-01, -5.3643e-02, 7.9877e-02, + -2.9450e-01, -1.2680e-01, 8.3230e-02, -2.6408e-01, -4.8992e-02, + -5.0730e-02, 7.4562e-03, -1.9729e-01, -1.8002e-01, -6.4581e-03, + -2.3463e-01, 1.5716e-01, -5.0802e-02, -3.3855e-01, 5.2296e-02, + -9.6272e-02, -1.5351e-01, -2.3901e-01, -1.5963e-01, -1.6693e-01, + -1.1239e-01, -3.7593e-01, -7.4591e-02, 4.3444e-02, -1.5205e-01, + -1.6956e-03, 1.6140e-01, -2.1399e-01, -1.8157e-01, -3.0919e-02, + -2.7055e-01, -1.2207e-01, -1.5803e-01, -3.8846e-02, -4.9430e-01, + -1.1189e-01, -2.8882e-01, -1.2073e-01, -6.5715e-02, -1.4964e-01, + -2.4234e-01, -2.6441e-01, -2.7818e-01, -6.0429e-02, 7.3955e-02, + 6.7665e-02, -2.6970e-01, -4.5610e-02, 2.3626e-02, 1.0868e-02, + -2.1497e-02, -2.7581e-01, -2.1453e-01, 1.3595e-01, -2.0828e-01, + 1.1379e-01, 8.0894e-03, 8.1668e-02, 1.1096e-01, 2.5441e-01, + 7.3634e-02, -2.1243e-01, -2.3957e-01, -1.1420e-02, -1.9181e-01, + -1.9316e-01, -4.4721e-01, -8.3250e-02, -6.5639e-02, -5.8319e-02, + -1.2700e-01, -1.0081e-01, -7.0792e-02, -3.4129e-01, -1.4185e-01, + -4.1964e-01, -1.5962e-01, 2.2652e-02, -2.0097e-01, -1.7302e-01, + -1.6948e-01, -1.4114e-01, -9.6507e-02, -1.6094e-01, -3.6258e-01, + -2.5993e-01, 6.2780e-02, -1.1315e-01, -2.1144e-02, -7.3426e-03, + -1.3075e-01, -2.6975e-01, -2.2317e-01, -7.0074e-02, 8.6945e-02, + -2.5793e-01, -1.4867e-01, 8.9297e-02, 3.3459e-03, -2.6978e-01, + -1.6799e-01, -1.9151e-01, -7.5621e-02, -5.7356e-02, -2.5335e-01, + 3.0635e-02, -2.6356e-01, -7.0652e-02, -2.5116e-01, -3.5934e-01, + -2.9494e-02, -9.2532e-02, -5.1282e-02, -1.1410e-01, -1.0907e-01, + -2.8711e-01, -1.1422e-01, -3.5451e-02, -1.9770e-01, 1.1843e-01, + -2.8797e-01, -4.3960e-01, -3.4017e-02, 9.0205e-02, -1.3106e-01, + -8.4129e-02, -1.8859e-01, -2.6158e-01, 1.6586e-01, -1.2378e-01, + -1.5896e-01, -1.6694e-01, 1.6234e-02, -1.5692e-01, -1.6066e-01, + -3.7369e-04, 2.3442e-02, -5.5136e-02, -8.2050e-02, -2.5465e-01, + -1.2962e-01, -1.5731e-01, 7.7169e-02, -1.6205e-01, -2.2577e-01, + 2.7081e-01, -1.1905e-01, -2.3228e-02, -2.7891e-01, -2.6373e-01, + -1.0485e-01, -1.3941e-01, -6.7630e-02, -1.4722e-02, 1.3796e-02, + -3.4524e-02, 8.9978e-02, -2.1230e-01, 8.5656e-02, -1.6448e-01, + -3.2909e-02, -2.3079e-01, -3.2352e-01, 2.6198e-02, -1.7452e-01, + -2.2494e-01, -3.5444e-01, -1.2681e-01, -1.1768e-01, -2.7710e-01, + -2.5133e-01, -1.0190e-01, -3.9334e-01, -4.8219e-02, -2.9652e-01, + -7.3225e-02, -8.6772e-03, 3.2621e-02, -1.6923e-01, -2.9421e-01, + 1.8239e-01, -1.4777e-01, -2.9244e-01, -3.1106e-02, -1.5851e-01, + -2.4942e-01, -2.5309e-01, -2.9053e-01, 9.9120e-02, -2.4745e-01, + 4.4178e-02, 2.9294e-02, 1.8558e-01, -9.1317e-02, 2.0792e-02, + -1.6872e-01, -2.2066e-01, -6.8652e-02, 6.6996e-02, -2.0110e-01, + -1.2881e-02, -4.1882e-02, -1.6867e-01, -4.1276e-02, -1.1632e-01, + -2.4713e-01, -2.1714e-01, -2.5670e-01, -7.6108e-02, 7.5738e-02, + -5.2782e-02, -2.1995e-01, -2.1114e-01, -8.2967e-02, -2.9250e-01, + -1.1585e-01, -3.7202e-02, 2.2169e-01, -1.1157e-01, -1.3117e-01, + -4.5070e-01, 9.1724e-02, -2.5536e-01, -1.1099e-01, -1.4028e-01, + 2.8116e-02, -5.5589e-02, -1.1969e-01, -8.9219e-02, 4.4426e-02, + 1.8124e-01, 6.9828e-02, -2.9449e-01, -3.8633e-01, -3.2289e-01, + -1.5161e-01, 7.4736e-02, -3.2911e-01, 9.0841e-02, -9.4956e-02, + -1.6886e-01, -4.3418e-01, -1.8379e-01, -1.7249e-01, -4.3381e-02, + -3.0515e-01, -1.3860e-02, -1.3408e-01, 2.3386e-02, -1.9684e-01, + 1.1179e-01, -1.0725e-01, -5.1684e-02, 9.9985e-03, -8.1149e-02, + -5.7091e-02, -2.6378e-01, -1.8482e-01, 2.4157e-02, -3.0910e-01, + -9.7387e-02, -1.3443e-01, -1.3120e-01, -4.1030e-02, -2.8421e-01, + -1.1970e-01, 2.6584e-01, -2.9540e-01, -4.1858e-01, -4.4225e-02, + -3.3776e-02, 5.1072e-02, -1.0784e-01, 2.4809e-03, -1.0006e-02, + -1.6900e-01, -1.6756e-01, 7.1867e-02, -1.4536e-01, -3.2948e-01, + -4.8964e-02, 2.5080e-02, -2.9177e-01, -2.3546e-01, -9.6734e-02, + 5.2769e-02, 5.9821e-02, -7.1729e-02, -5.2216e-02, 4.6748e-02, + -1.0374e-01, -3.3411e-01, 1.9977e-02, -1.9944e-01, -1.4249e-01, + -4.2926e-01, -2.0550e-01, -1.2332e-01, 2.4972e-02, -3.1156e-01, + -3.5618e-02, 1.4841e-01, -6.8435e-02, -8.9134e-02, -4.3101e-01, + 3.2034e-02, -5.7796e-02, -1.0815e-01, 7.9357e-02, 6.0425e-02, + 9.0992e-02, -8.4117e-02, -1.6460e-01, -1.8860e-01, -1.1188e-01, + -1.6019e-02, -2.7539e-01, -3.0461e-02, 3.6393e-02, -5.5778e-02, + -2.3261e-01, 3.3813e-03, -7.2618e-02, 1.3152e-01, 2.5424e-02, + -2.3474e-01, 1.2788e-01, 1.0549e-01, -3.9438e-01, -1.4477e-01, + -8.7272e-02, -5.9155e-02, -1.8755e-02, 8.3795e-03, -1.3303e-01, + 3.5993e-02, -4.8489e-01, -9.8144e-02, -2.0187e-01, -1.7780e-01, + -1.5702e-01, -1.6632e-01, -7.6623e-02, -1.2067e-01, 9.7476e-02, + -1.4865e-01, 3.9298e-02, -8.1964e-02, -5.7571e-02, -1.6357e-02, + -2.4697e-02, -2.3285e-01, -1.8711e-01, 5.9722e-02, -5.0565e-03, + 1.6085e-01, -5.6193e-02, -1.2748e-01, -1.7989e-01, 1.6912e-02, + -7.4359e-02, 5.9318e-02, 4.5689e-02, -3.4782e-01, 1.2438e-02, + 4.9462e-03, -7.6542e-02, -1.7623e-03, -3.1946e-01, -8.4081e-02, + 4.2274e-02, -2.0708e-01, -1.0376e-01, -3.2503e-01, -4.9442e-02, + -1.5170e-01, -1.7022e-01, -1.8604e-01, 7.0737e-02, -2.3663e-01, + -3.6244e-02, -2.3101e-01, -2.2293e-01, 8.8521e-03, -7.0427e-02, + -2.2006e-01, 6.1665e-02, -1.2758e-01, -1.3123e-01, 1.3888e-02, + -2.7413e-01, -3.8097e-02, -3.4511e-01, -2.7228e-01, 8.0634e-02, + -1.5312e-02, -3.7472e-03, 2.0875e-02, 1.5117e-01, -2.4863e-01, + -3.2693e-01, -1.0401e-01, -1.4831e-01, -1.8991e-01, -1.6961e-01, + 1.1688e-01, 6.0180e-02, -5.3109e-02, -3.7527e-01, 1.3247e-01, + -1.2943e-01, -1.7973e-01, 1.0068e-01, -1.7821e-01, -1.9093e-01, + 2.9122e-02, -5.7265e-01, -1.4149e-01, -7.8197e-02, -2.9365e-01, + -2.5148e-01, -1.0391e-01, -1.6442e-01, -1.2958e-01, -6.3028e-02, + -7.3926e-02, -9.9090e-02, 1.4622e-02, -3.2253e-01, -2.1039e-01, + -3.5321e-02, -6.1373e-02, -4.3052e-03, -2.5899e-01, 1.4603e-01, + -6.2891e-02, 3.2609e-02, -8.3760e-02, -7.8426e-02, -5.5548e-02, + -2.1703e-01, -7.2742e-02, -1.0241e-01, -1.5250e-01, -5.3758e-03, + 1.6436e-01, -1.6233e-01, -1.1661e-01, -2.6216e-01, -3.3025e-01, + -1.5915e-01, -3.5974e-01, -1.6534e-01, 8.1741e-03, 1.2124e-01, + -7.8771e-02, -2.6709e-01, -1.5131e-01, 1.1832e-01, -9.7288e-02, + 1.5229e-01, -1.3003e-01, -3.0911e-01, -8.6667e-02, -6.7893e-02, + -1.5559e-01, -1.3761e-01, -4.8186e-02, -1.4222e-01, -5.8575e-02, + -4.5176e-01, -2.7698e-01, -1.8527e-01, 1.3501e-01, 1.4931e-02, + -5.2130e-01, -2.6890e-01, 9.5427e-02], device='cuda:1', + requires_grad=True) \ No newline at end of file diff --git a/python/ClipDetection/CoOp/trainers/custom_named_generator_cuda.txt b/python/ClipDetection/CoOp/trainers/custom_named_generator_cuda.txt new file mode 100644 index 00000000..1d5546c8 --- /dev/null +++ b/python/ClipDetection/CoOp/trainers/custom_named_generator_cuda.txt @@ -0,0 +1,12587 @@ +torch.Size([]) +Parameter containing: +tensor(4.6052, device='cuda:1', requires_grad=True) +torch.Size([16, 768]) +Parameter containing: +tensor([[-3.6102e-02, 8.2932e-03, 1.1726e-02, ..., 1.1253e-03, + -4.8218e-03, 1.7639e-02], + [ 3.5629e-03, -1.0719e-02, 2.6947e-02, ..., 6.7596e-03, + 9.3536e-03, 4.5252e-04], + [-1.8234e-02, -1.9272e-02, -4.8523e-03, ..., -1.6937e-02, + 3.1796e-03, -6.8932e-03], + ..., + [-2.7466e-02, -4.8752e-03, 2.0004e-02, ..., 1.2712e-03, + -2.6382e-02, 2.4521e-02], + [ 1.2375e-02, -1.9409e-02, 4.3678e-03, ..., 1.6769e-02, + -3.3844e-02, -1.2253e-02], + [ 2.2934e-02, 8.4534e-03, 3.3875e-02, ..., -3.8853e-03, + 2.7120e-05, -7.8354e-03]], dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0138, 0.2357, -0.1285, ..., 0.0171, -0.3332, -0.2366], + device='cuda:1', requires_grad=True) +torch.Size([257, 1024]) +Parameter containing: +tensor([[ 0.0019, 0.0479, -0.0149, ..., 0.0005, -0.0558, -0.0460], + [ 0.0114, -0.0413, 0.0357, ..., 0.0271, -0.0313, -0.0383], + [-0.0026, -0.0340, -0.0006, ..., 0.0216, -0.0294, -0.0423], + ..., + [-0.0038, -0.0350, -0.0048, ..., -0.0228, -0.0328, -0.0412], + [-0.0046, -0.0360, -0.0026, ..., -0.0350, -0.0355, -0.0353], + [-0.0073, -0.0287, -0.0144, ..., -0.0202, -0.0272, -0.0360]], + device='cuda:1', requires_grad=True) +torch.Size([1024, 768]) +Parameter containing: +tensor([[ 0.0224, -0.0139, -0.0072, ..., -0.0058, -0.0078, 0.0139], + [ 0.0186, 0.0084, 0.0400, ..., -0.0149, -0.0241, -0.0003], + [ 0.0075, -0.0007, 0.0195, ..., -0.0062, -0.0083, 0.0156], + ..., + [ 0.0121, -0.0165, -0.0144, ..., -0.0066, 0.0088, 0.0027], + [-0.0164, -0.0100, -0.0053, ..., -0.0005, -0.0001, -0.0075], + [ 0.0092, 0.0048, 0.0069, ..., 0.0054, -0.0162, 0.0262]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 3, 14, 14]) +Parameter containing: +tensor([[[[ 2.5284e-02, 1.0597e-02, 7.1678e-03, ..., 2.3422e-02, + 2.1683e-02, 4.8637e-03], + [ 1.3748e-02, -6.2103e-03, -4.8103e-03, ..., 1.6418e-02, + 7.0114e-03, -1.3161e-02], + [ 1.0048e-02, 2.1286e-03, 2.2945e-03, ..., 5.5695e-03, + 5.0468e-03, -1.2604e-02], + ..., + [-1.0101e-02, -2.3854e-04, -5.4588e-03, ..., -1.9226e-02, + -2.4017e-02, -2.4765e-02], + [-3.4752e-03, -1.0979e-02, -1.3603e-02, ..., -7.6408e-03, + 1.5583e-03, -4.4365e-03], + [-2.1469e-02, -4.3182e-02, -3.0121e-02, ..., -5.2147e-03, + 3.7346e-03, -6.8016e-03]], + + [[ 1.5930e-02, -4.9095e-03, -1.2283e-02, ..., 2.5879e-02, + 2.4048e-02, 5.6458e-03], + [ 2.1019e-03, -2.4185e-02, -2.6337e-02, ..., 1.5297e-02, + 5.2605e-03, -1.5121e-02], + [ 5.1956e-03, -7.2556e-03, -9.4376e-03, ..., 7.9193e-03, + 5.4703e-03, -1.2398e-02], + ..., + [-4.2267e-03, 5.9624e-03, -6.2656e-04, ..., 3.8528e-03, + 4.2963e-04, -5.4207e-03], + [-2.8496e-03, -1.1482e-02, -1.3680e-02, ..., 1.5129e-02, + 2.3285e-02, 1.2856e-02], + [-2.7740e-02, -4.9561e-02, -3.1158e-02, ..., 1.2787e-02, + 1.7975e-02, 6.4516e-04]], + + [[ 1.6403e-02, -2.0084e-03, -4.8714e-03, ..., 1.6159e-02, + 1.1337e-02, 5.2719e-03], + [ 1.8549e-03, -2.1622e-02, -2.4734e-02, ..., 6.0081e-03, + -4.9477e-03, -8.3389e-03], + [ 4.8523e-03, -1.0818e-02, -1.5015e-02, ..., 6.0272e-04, + -2.3615e-04, -7.6065e-03], + ..., + [ 2.4033e-03, 2.6741e-03, -8.2016e-03, ..., -1.0231e-02, + -1.0254e-02, -7.4234e-03], + [ 8.2626e-03, -3.1586e-03, -9.0256e-03, ..., -3.5248e-03, + 6.7329e-03, 5.1842e-03], + [-1.0529e-02, -2.6947e-02, -1.5656e-02, ..., 1.6518e-03, + 6.4774e-03, 2.7132e-04]]], + + + [[[ 1.5366e-02, 2.6184e-02, 5.8479e-03, ..., 8.4534e-03, + -9.0027e-03, 2.0325e-02], + [-1.8753e-02, -7.4615e-03, -1.6830e-02, ..., 2.9640e-03, + -1.9193e-05, 1.5640e-02], + [-2.4765e-02, -1.2184e-02, 1.7405e-03, ..., -2.6291e-02, + -2.8641e-02, -3.6869e-03], + ..., + [ 7.4539e-03, -6.8169e-03, 1.4931e-02, ..., 1.4824e-02, + -5.6839e-03, -6.2599e-03], + [ 6.2408e-03, -8.2016e-03, 4.1229e-02, ..., -5.0664e-06, + -2.8336e-02, -1.9409e-02], + [ 1.7120e-02, -1.1139e-02, 6.1279e-02, ..., -4.5490e-04, + 7.2899e-03, 4.6967e-02]], + + [[ 2.1149e-02, 3.3386e-02, 1.0483e-02, ..., 6.6109e-03, + -1.1864e-02, 1.7838e-02], + [-1.5022e-02, -8.8882e-04, -9.4604e-03, ..., 4.7722e-03, + 3.3522e-04, 1.4709e-02], + [-2.3026e-02, -6.3400e-03, 1.1215e-02, ..., -2.9251e-02, + -3.2776e-02, -7.0419e-03], + ..., + [ 5.5275e-03, -1.1826e-02, 7.7248e-03, ..., 1.1215e-02, + -1.1208e-02, -9.9030e-03], + [ 2.2125e-03, -1.5572e-02, 3.5980e-02, ..., -4.5929e-03, + -3.7567e-02, -2.6779e-02], + [ 1.0384e-02, -2.4033e-02, 5.2917e-02, ..., -1.1375e-02, + -4.0016e-03, 4.0253e-02]], + + [[ 1.0483e-02, 2.2339e-02, 8.9121e-04, ..., 5.2719e-03, + -1.2917e-02, 1.7471e-02], + [-2.5070e-02, -1.1597e-02, -1.9104e-02, ..., 4.4594e-03, + 4.0364e-04, 1.5610e-02], + [-3.3417e-02, -1.8112e-02, -1.3227e-03, ..., -2.8519e-02, + -3.0121e-02, -6.7444e-03], + ..., + [ 4.9820e-03, -1.0445e-02, 1.0681e-02, ..., 1.3405e-02, + -8.7509e-03, -8.8196e-03], + [ 2.5711e-03, -1.3268e-02, 4.1168e-02, ..., 9.7275e-04, + -3.0792e-02, -2.5375e-02], + [ 8.9951e-03, -2.1439e-02, 5.3528e-02, ..., -6.4163e-03, + -4.1795e-04, 3.9398e-02]]], + + + [[[ 7.2441e-03, 3.7231e-03, -2.4662e-03, ..., 1.0353e-02, + 1.4267e-02, 1.9363e-02], + [-3.0270e-03, -3.2539e-03, -1.2878e-02, ..., 9.7847e-04, + 5.2299e-03, 6.8626e-03], + [-4.3182e-03, 5.6915e-03, -3.1910e-03, ..., 8.4114e-04, + 2.2297e-03, 7.1373e-03], + ..., + [ 4.4632e-03, 3.8757e-03, -2.0063e-04, ..., 1.5976e-02, + 1.4221e-02, 1.2756e-02], + [ 2.5146e-02, 1.4793e-02, 5.1003e-03, ..., 2.2858e-02, + 2.2186e-02, 2.3026e-02], + [ 3.0807e-02, 2.6031e-02, 1.4259e-02, ..., 2.5116e-02, + 2.1759e-02, 2.4887e-02]], + + [[ 6.9695e-03, 5.0888e-03, -2.8915e-03, ..., 1.7868e-02, + 1.9669e-02, 2.9037e-02], + [-2.8973e-03, -1.2035e-03, -1.1116e-02, ..., 5.5542e-03, + 5.9547e-03, 1.3420e-02], + [-9.8190e-03, 4.3716e-03, 2.3806e-04, ..., 1.1253e-03, + -8.7976e-04, 9.4681e-03], + ..., + [ 6.1417e-03, 5.1804e-03, 2.1095e-03, ..., 2.4979e-02, + 2.5146e-02, 2.7710e-02], + [ 3.1128e-02, 2.0096e-02, 8.0948e-03, ..., 3.3722e-02, + 3.3295e-02, 4.0405e-02], + [ 3.7659e-02, 3.2166e-02, 1.8311e-02, ..., 4.2542e-02, + 3.9429e-02, 4.6356e-02]], + + [[ 1.7014e-02, 1.5358e-02, 1.1269e-02, ..., 2.1378e-02, + 2.1317e-02, 3.0075e-02], + [ 7.4120e-03, 7.8087e-03, 1.1091e-03, ..., 7.4654e-03, + 7.7209e-03, 1.2947e-02], + [-5.4646e-04, 1.1208e-02, 6.4545e-03, ..., 4.1313e-03, + 3.2539e-03, 9.7275e-03], + ..., + [ 3.3531e-03, 2.0325e-04, 1.3704e-03, ..., 7.8087e-03, + 7.9422e-03, 1.4809e-02], + [ 1.6571e-02, 2.9163e-03, 4.2105e-04, ..., 1.1787e-02, + 1.1337e-02, 1.8753e-02], + [ 1.9714e-02, 1.0704e-02, 2.9335e-03, ..., 2.1042e-02, + 1.5457e-02, 2.2263e-02]]], + + + ..., + + + [[[-3.1614e-04, -6.5041e-04, -6.0844e-04, ..., 6.5207e-05, + 2.8062e-04, -5.1928e-04], + [-5.2452e-06, -9.8610e-04, -9.5367e-04, ..., 1.9908e-05, + -1.0675e-04, -8.3148e-05], + [-9.5606e-04, -6.4993e-04, -1.2035e-03, ..., -6.1035e-04, + -4.2439e-04, 6.3181e-04], + ..., + [-7.1907e-04, -6.2132e-04, 1.0270e-04, ..., -3.2485e-05, + -7.7963e-04, -7.9155e-04], + [-9.8991e-04, 6.4433e-05, -1.2598e-03, ..., -8.0490e-04, + -1.2980e-03, -1.2064e-03], + [-2.8110e-04, -5.8031e-04, -2.4199e-04, ..., -5.1558e-05, + 4.4203e-04, 1.4377e-04]], + + [[ 5.6839e-04, 1.9491e-05, 2.8157e-04, ..., 1.6952e-04, + 9.6035e-04, -5.6601e-04], + [ 9.8038e-04, 2.3961e-05, 4.3941e-04, ..., 3.5739e-04, + 7.8630e-04, -6.2466e-04], + [-2.5654e-04, 3.8624e-04, 1.7090e-03, ..., 6.6614e-04, + 6.1607e-04, 7.3719e-04], + ..., + [ 5.9319e-04, 4.7755e-04, 4.7016e-04, ..., 1.0605e-03, + 6.6137e-04, 3.1066e-04], + [ 8.3494e-04, 4.7708e-04, -1.0042e-03, ..., 6.4945e-04, + -2.4092e-04, 3.6502e-04], + [ 4.7803e-04, -3.4690e-04, 6.3467e-04, ..., 2.3830e-04, + 1.9407e-04, 4.0698e-04]], + + [[ 2.0623e-04, -7.5936e-05, -6.9094e-04, ..., -2.5582e-04, + -5.5313e-04, -5.7125e-04], + [-9.0122e-05, 3.5214e-04, 2.0063e-04, ..., -2.6512e-04, + 1.1653e-04, 5.8317e-04], + [-9.5224e-04, -3.9577e-04, -3.9458e-04, ..., 2.1636e-04, + 6.0797e-05, 1.7786e-04], + ..., + [ 4.9019e-04, -1.6594e-04, 5.3120e-04, ..., 3.1352e-04, + 9.8825e-05, 5.7650e-04], + [ 7.5400e-05, 4.0960e-04, -6.8998e-04, ..., 1.8597e-04, + 1.9622e-04, -3.3689e-04], + [-1.4269e-04, -2.5558e-04, 2.9540e-04, ..., 2.1315e-04, + -2.9826e-04, 4.0221e-04]]], + + + [[[ 1.2306e-02, 1.8921e-02, 5.3024e-03, ..., 1.1612e-02, + 6.5956e-03, 2.7069e-02], + [ 1.1261e-02, 2.9709e-02, 1.3695e-02, ..., -8.9722e-03, + -1.7639e-02, -3.2501e-03], + [ 2.1103e-02, 3.1342e-02, 1.7731e-02, ..., -1.1185e-02, + -2.7451e-02, -5.5275e-03], + ..., + [ 3.7292e-02, 2.5757e-02, 6.7863e-03, ..., 1.8631e-02, + 2.8793e-02, 3.6560e-02], + [ 1.9577e-02, -5.3711e-03, -2.1255e-02, ..., -1.6953e-02, + -2.3621e-02, 4.6463e-03], + [ 1.3992e-02, -2.7130e-02, -5.1117e-02, ..., -1.2520e-02, + -4.0009e-02, 1.3618e-02]], + + [[ 1.7109e-03, 9.4223e-03, -2.4147e-03, ..., 8.3694e-03, + 3.3112e-03, 2.3117e-02], + [ 1.1692e-03, 2.3514e-02, 1.1520e-02, ..., -8.2321e-03, + -1.8555e-02, -6.4278e-03], + [ 1.0735e-02, 2.6749e-02, 1.8997e-02, ..., -1.1795e-02, + -3.0396e-02, -9.2773e-03], + ..., + [ 3.4821e-02, 2.1423e-02, 8.1253e-04, ..., 1.6235e-02, + 2.6367e-02, 3.4302e-02], + [ 1.4656e-02, -1.1101e-02, -2.7344e-02, ..., -2.0676e-02, + -3.1250e-02, -1.2932e-03], + [ 5.8136e-03, -3.8971e-02, -6.3354e-02, ..., -2.1881e-02, + -5.2307e-02, 4.1885e-03]], + + [[-1.0658e-02, -1.8530e-03, -8.5220e-03, ..., 4.6959e-03, + -1.9407e-03, 1.7426e-02], + [-1.3008e-02, 1.1108e-02, 5.3177e-03, ..., -8.9722e-03, + -2.1408e-02, -9.2850e-03], + [-3.2902e-03, 1.4580e-02, 1.3863e-02, ..., -1.2299e-02, + -2.9846e-02, -1.2985e-02], + ..., + [ 3.2806e-02, 2.2476e-02, 6.9771e-03, ..., 1.0704e-02, + 1.9516e-02, 2.4567e-02], + [ 1.3817e-02, -6.0501e-03, -1.4580e-02, ..., -2.2476e-02, + -3.2013e-02, -9.6893e-03], + [ 5.8556e-03, -3.2196e-02, -5.1910e-02, ..., -2.4429e-02, + -5.2979e-02, -3.0937e-03]]], + + + [[[ 2.2598e-02, -7.3586e-03, -2.9099e-02, ..., -2.2873e-02, + 8.5068e-03, -4.8706e-02], + [ 1.7410e-02, -3.1433e-02, -4.2816e-02, ..., -6.2675e-03, + 9.4528e-03, -3.8910e-02], + [ 2.2125e-02, -1.5839e-02, -4.1351e-02, ..., 4.6021e-02, + 2.4017e-02, -1.1345e-02], + ..., + [ 2.8290e-02, 3.7964e-02, 4.1656e-02, ..., 2.4734e-02, + -2.2011e-03, -1.9989e-02], + [-1.5671e-02, -2.0996e-02, -2.9182e-03, ..., 2.0828e-02, + 7.9803e-03, 1.4175e-02], + [-3.1624e-03, -9.1400e-03, 7.2937e-03, ..., 1.6663e-02, + 1.3590e-03, 1.6647e-02]], + + [[ 2.2675e-02, -8.0872e-03, -3.0746e-02, ..., -1.9989e-02, + 1.6220e-02, -4.3518e-02], + [ 1.6678e-02, -3.2532e-02, -4.2694e-02, ..., -6.4468e-04, + 1.8555e-02, -3.2135e-02], + [ 2.0767e-02, -1.6098e-02, -3.9978e-02, ..., 5.0598e-02, + 2.9999e-02, -5.6038e-03], + ..., + [ 4.2328e-02, 5.0476e-02, 4.9988e-02, ..., 2.2064e-02, + -1.8721e-03, -1.5190e-02], + [-4.8981e-03, -1.0933e-02, 6.5994e-03, ..., 1.9073e-02, + 7.9498e-03, 2.0065e-02], + [ 4.9896e-03, -1.7853e-03, 1.5068e-02, ..., 1.0445e-02, + -2.7905e-03, 1.9196e-02]], + + [[ 7.0305e-03, -1.8372e-02, -3.5797e-02, ..., -1.5244e-02, + 2.1683e-02, -3.0380e-02], + [-1.7321e-04, -4.1534e-02, -4.5563e-02, ..., 4.7989e-03, + 2.4796e-02, -1.7990e-02], + [ 2.1000e-03, -2.8732e-02, -4.5746e-02, ..., 5.0171e-02, + 3.4485e-02, 4.2267e-03], + ..., + [ 3.7415e-02, 4.6143e-02, 4.9500e-02, ..., 2.0111e-02, + 4.0741e-03, -6.3667e-03], + [-5.8479e-03, -9.4757e-03, 1.2398e-02, ..., 2.1317e-02, + 1.5762e-02, 2.5894e-02], + [ 2.3136e-03, -7.0858e-04, 1.7914e-02, ..., 1.1047e-02, + 2.1496e-03, 2.2278e-02]]]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([0.3311, 0.0032, 0.1610, ..., 2.1922, 0.0050, 0.0039], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0045, -0.0452, -0.0475, ..., 0.0402, -0.1402, -0.0132], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[-7.0632e-05, -1.6510e-04, -7.0930e-05, ..., 4.5090e-03, + -2.9160e-02, -7.8201e-05], + [-1.3733e-04, 1.2165e-04, 4.2319e-05, ..., -1.6594e-03, + 3.1433e-02, 7.4446e-05], + [ 4.8018e-04, 7.7963e-04, -1.0991e-04, ..., -1.6846e-02, + 4.2999e-02, 1.5199e-04], + ..., + [ 2.1267e-04, 4.1032e-04, -7.2420e-05, ..., 4.8027e-03, + -1.7338e-03, -6.6102e-05], + [ 3.0518e-04, -4.4405e-05, -2.2709e-04, ..., 1.1551e-02, + 3.3436e-03, 7.4685e-05], + [-2.8849e-05, 4.5919e-04, 9.3341e-05, ..., -1.1314e-02, + 3.7670e-03, -7.7844e-05]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([ 1.5674, -1.6143, -0.8208, ..., 0.0115, 0.0107, -0.0043], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[-6.7596e-03, 8.8043e-03, -7.9422e-03, ..., -8.6441e-03, + -8.7433e-03, 3.5553e-03], + [ 1.2077e-02, 5.8784e-03, 1.1253e-02, ..., -3.7060e-03, + 2.0008e-03, 3.8319e-03], + [-5.2032e-03, 2.6913e-03, 1.2894e-02, ..., 6.4812e-03, + -3.0398e-05, -4.2796e-04], + ..., + [-4.5037e-04, -2.5063e-03, -3.2768e-03, ..., -3.2768e-03, + -1.9409e-02, 9.2545e-03], + [-7.3624e-03, 2.8419e-03, -7.9193e-03, ..., 4.0627e-04, + -1.3866e-03, -6.7186e-04], + [ 9.0408e-03, 1.5287e-03, 1.6737e-03, ..., 2.4242e-03, + -3.7575e-03, 4.9667e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0262, -0.0654, 0.0032, ..., 0.1761, -0.0446, 0.0023], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([6.1186e-04, 2.0990e-03, 3.0166e-05, ..., 6.9025e-01, 3.5588e-01, + 1.4703e-04], device='cuda:1', requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 1.3605e-04, 8.3127e-04, -2.0098e-05, ..., -3.6831e-01, + 1.7861e-01, 7.4003e-05], device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[ 3.6597e-04, 2.6047e-05, 1.1921e-07, ..., -6.6109e-03, + -1.1740e-03, -6.4468e-04], + [ 5.3291e-03, 1.3710e-02, -3.5620e-04, ..., -3.8052e-03, + -2.5225e-04, 6.0730e-03], + [ 1.3428e-03, 1.2884e-03, -1.9073e-06, ..., -2.8549e-02, + -1.1930e-03, 1.4906e-03], + ..., + [-2.4994e-02, -1.0262e-02, 2.3067e-04, ..., -2.0103e-03, + -1.2665e-02, 6.2332e-03], + [ 3.2401e-04, 9.3758e-05, -5.9605e-08, ..., -6.0234e-03, + -7.3862e-04, -6.4611e-04], + [ 1.1129e-03, -2.3117e-02, -2.7061e-04, ..., -4.4365e-03, + 3.5744e-03, -7.4997e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.6826, -0.3132, -0.8076, ..., -0.2167, -0.6543, -0.3040], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[-0.0043, -0.0023, 0.0041, ..., 0.0116, -0.0049, -0.0073], + [ 0.0018, 0.0191, -0.0102, ..., -0.0261, 0.0026, 0.0206], + [ 0.0039, -0.0002, -0.0028, ..., 0.0029, 0.0038, -0.0151], + ..., + [ 0.0021, -0.0003, -0.0034, ..., 0.0033, 0.0015, 0.0089], + [-0.0059, 0.0078, 0.0069, ..., -0.0005, -0.0060, 0.0020], + [ 0.0003, -0.0039, -0.0022, ..., -0.0094, 0.0005, 0.0039]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0184, -0.1008, 0.0398, ..., -0.0965, -0.1080, -0.0237], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([2.8353e-01, 5.9135e-01, 3.3711e-06, ..., 2.0198e+00, 7.7565e-01, + 2.9745e-01], device='cuda:1', requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([2.1628e-02, 2.1650e-01, 2.3350e-04, ..., 2.6387e-01, 4.4878e-01, + 5.1503e-02], device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[-0.0033, 0.0007, -0.0002, ..., 0.0078, -0.0240, 0.0078], + [ 0.0070, 0.0033, 0.0170, ..., -0.0062, 0.0080, 0.0055], + [ 0.0102, -0.0102, -0.0003, ..., 0.0024, 0.0164, 0.0043], + ..., + [ 0.0113, 0.0003, -0.0048, ..., 0.0002, 0.0042, -0.0065], + [-0.0144, -0.0119, 0.0076, ..., -0.0037, 0.0036, 0.0072], + [-0.0012, -0.0020, 0.0010, ..., -0.0066, -0.0222, -0.0007]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-1.0248e-01, 8.7988e-01, 1.4414e+00, ..., -1.0862e-03, + -4.0474e-03, 2.2471e-04], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[-4.9829e-04, -2.1194e-02, -1.9908e-05, ..., -1.1253e-02, + 1.1993e-02, 1.0979e-04], + [-5.0068e-04, 7.5417e-03, -4.4131e-04, ..., -2.8553e-03, + 1.1459e-02, -3.0899e-03], + [ 2.7752e-03, -5.4703e-03, -1.1978e-02, ..., -3.8319e-03, + -1.0222e-04, -5.6686e-03], + ..., + [ 5.3825e-03, -1.8539e-02, 8.3313e-02, ..., -2.1317e-02, + -9.7198e-03, 1.5419e-02], + [-9.5062e-03, -2.0390e-03, 5.9166e-03, ..., 8.5144e-03, + -4.4022e-03, 6.3820e-03], + [-2.8553e-03, 6.8321e-03, -9.3508e-04, ..., 5.5199e-03, + 4.7264e-03, -4.1389e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0203, -0.0213, 0.0256, ..., -0.0386, -0.0219, -0.0045], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([0.3628, 0.7462, 0.0949, ..., 0.8510, 0.4239, 0.2627], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0484, -0.1306, 0.0199, ..., -0.1032, -0.0533, 0.0084], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[ 0.0025, 0.0060, 0.0152, ..., -0.0063, 0.0478, -0.0812], + [ 0.0002, -0.0075, 0.0009, ..., -0.0011, -0.0030, 0.0037], + [ 0.0053, -0.0222, 0.0008, ..., -0.0101, 0.0178, -0.0035], + ..., + [ 0.0026, -0.0111, 0.0018, ..., -0.0058, -0.0008, 0.0039], + [-0.0072, 0.0112, 0.0018, ..., 0.0027, -0.0154, -0.0180], + [ 0.0151, 0.0001, 0.0326, ..., -0.0002, -0.0062, -0.0225]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.1219, -0.6836, -0.5273, ..., -0.7568, -0.0984, -0.3079], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[ 1.8396e-03, -9.6321e-04, -1.6800e-02, ..., -3.0613e-03, + -9.5901e-03, -3.4103e-03], + [-1.5350e-02, 6.2675e-03, 1.4854e-02, ..., 6.7291e-03, + -9.3937e-05, -6.2218e-03], + [ 5.9891e-03, -4.2915e-04, 1.0605e-02, ..., -5.6076e-03, + -2.0447e-03, 5.9662e-03], + ..., + [ 2.9125e-03, -2.3937e-03, 4.5738e-03, ..., 1.6699e-03, + 6.7043e-04, 5.3139e-03], + [ 9.0456e-04, -1.3828e-03, 1.1587e-03, ..., -1.1549e-03, + 4.4975e-03, -5.7945e-03], + [ 3.0212e-02, 3.7136e-03, 1.1283e-04, ..., 4.8065e-03, + 1.2444e-02, 5.4054e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0242, -0.0644, 0.0790, ..., -0.0809, -0.1028, -0.0834], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([0.4126, 0.9839, 0.1912, ..., 0.7707, 0.5578, 0.5130], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0194, -0.1082, -0.0194, ..., 0.0886, 0.1335, 0.0285], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[-6.4240e-03, -2.0248e-02, -2.0676e-02, ..., 1.1930e-03, + 2.8778e-02, -5.5267e-02], + [-2.8038e-03, 1.3485e-03, -1.9196e-02, ..., 9.4748e-04, + -1.9562e-02, -2.9373e-03], + [-5.8861e-03, -4.8141e-03, 5.3825e-03, ..., -1.8219e-02, + -2.0416e-02, -9.6283e-03], + ..., + [-2.5009e-02, 1.1108e-02, 1.0498e-02, ..., 4.8447e-03, + 1.2636e-05, 2.5177e-03], + [ 1.0887e-02, 1.1696e-02, 1.1856e-02, ..., 2.7962e-03, + -4.8447e-03, -6.4964e-03], + [-8.6746e-03, 2.5177e-03, -4.9591e-03, ..., 2.8553e-03, + -8.6136e-03, 4.2229e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([ 0.1929, -0.0773, -0.0911, ..., 0.1083, 0.0064, 0.0453], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[-2.2720e-02, 2.8229e-03, -3.1710e-04, ..., 2.3804e-02, + -2.3819e-02, 1.4458e-02], + [-1.9178e-03, 6.7055e-05, -1.0406e-02, ..., -7.9041e-03, + -4.2076e-03, -6.3286e-03], + [-1.2703e-02, -6.1874e-03, -1.0422e-02, ..., -1.6769e-02, + -6.2981e-03, -1.8578e-03], + ..., + [-1.6136e-03, 9.8228e-04, -7.8888e-03, ..., -6.7940e-03, + -2.7447e-03, -2.1706e-03], + [ 5.8823e-03, -3.4351e-03, 1.2810e-02, ..., -1.3399e-03, + 1.7090e-03, 7.6027e-03], + [ 1.0025e-02, 6.5842e-03, 1.1444e-02, ..., -5.9242e-03, + -1.4353e-03, -3.4161e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0180, -0.0964, 0.0243, ..., -0.0159, -0.0454, -0.0301], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([0.5055, 0.3092, 0.3977, ..., 1.4209, 0.4980, 0.3574], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0747, -0.0122, 0.0623, ..., -0.0418, 0.0183, -0.0493], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[-0.0067, -0.0268, 0.0142, ..., -0.0009, 0.0202, -0.0156], + [ 0.0101, 0.0105, -0.0008, ..., 0.0012, -0.0004, 0.0251], + [-0.0059, 0.0096, 0.0011, ..., -0.0045, 0.0005, 0.0131], + ..., + [ 0.0016, -0.0027, -0.0004, ..., 0.0003, -0.0022, -0.0065], + [ 0.0005, 0.0114, 0.0169, ..., 0.0032, 0.0011, 0.0202], + [-0.0124, 0.0076, -0.0112, ..., 0.0046, -0.0065, -0.0068]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.2451, -0.3083, -0.4565, ..., -0.1675, -0.2117, -0.5532], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[ 6.9380e-04, -4.9591e-03, 3.3998e-04, ..., -5.6534e-03, + 4.0131e-03, 1.9054e-03], + [ 2.4014e-03, -1.7365e-02, -4.1771e-03, ..., -9.8419e-04, + 1.3916e-02, -2.5787e-03], + [-2.0340e-02, 7.0419e-03, 4.9667e-03, ..., 9.6846e-04, + -1.9730e-02, 7.8964e-04], + ..., + [-7.2746e-03, 9.3412e-04, 2.4259e-04, ..., -6.7294e-05, + 1.0061e-03, 3.1109e-03], + [-1.4820e-03, -6.7673e-03, -1.0185e-03, ..., 3.6182e-03, + -1.1826e-02, 2.4719e-02], + [ 6.9389e-03, 3.9864e-03, -3.3212e-04, ..., 1.5701e-02, + 7.3318e-03, 7.0572e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0455, -0.0800, -0.0028, ..., -0.0156, -0.1378, -0.0312], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([0.6342, 0.5487, 0.3780, ..., 1.3511, 0.4005, 0.4882], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0342, -0.0825, -0.0966, ..., -0.0490, 0.0846, -0.2136], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[ 2.7573e-02, 1.0155e-02, 5.2223e-03, ..., 6.3057e-03, + -8.5449e-03, -1.4496e-02], + [ 4.0741e-03, 1.8341e-02, -4.6654e-03, ..., -6.0539e-03, + -2.0538e-02, 5.9052e-03], + [ 6.1989e-05, -9.3613e-03, 4.7445e-04, ..., 1.0582e-02, + 9.0256e-03, -1.5945e-02], + ..., + [ 2.3632e-03, 1.7147e-03, 1.2856e-02, ..., 1.9665e-03, + 1.4906e-03, -5.8441e-03], + [-1.9121e-03, 1.6052e-02, 7.6561e-03, ..., 2.6722e-03, + -5.3329e-03, -3.0499e-03], + [-1.9257e-02, -6.6910e-03, 1.0643e-02, ..., -2.6035e-03, + 6.3744e-03, 3.3646e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.1843, -0.5454, -0.1458, ..., -0.0142, 0.0038, 0.0057], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[-2.3544e-02, 3.3112e-03, -1.4915e-03, ..., -7.5264e-03, + 1.7456e-02, 1.1635e-02], + [ 6.1531e-03, -3.9154e-02, -3.7251e-03, ..., -2.8820e-03, + -2.1454e-02, 1.2619e-02], + [ 5.9624e-03, -9.4299e-03, 1.4954e-02, ..., -1.4839e-02, + 7.3280e-03, -1.1848e-02], + ..., + [ 4.7982e-05, 4.2915e-03, -1.1238e-02, ..., -1.1238e-02, + 1.3962e-03, -1.3695e-03], + [-7.3586e-03, -1.0338e-02, -1.3638e-04, ..., 2.1240e-02, + 1.3512e-02, -2.4395e-03], + [-1.8524e-02, -1.1511e-03, -6.6681e-03, ..., -3.1424e-04, + -3.4256e-03, 3.2120e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0142, -0.0583, 0.0198, ..., 0.0195, -0.1207, 0.0172], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([0.7431, 0.3526, 0.6107, ..., 2.2615, 0.5052, 0.3920], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0007, 0.0244, 0.0183, ..., -0.1535, -0.0343, 0.0142], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[ 2.4109e-03, 3.2253e-03, -4.0948e-05, ..., -2.7924e-03, + -6.2485e-03, 6.4964e-03], + [ 1.8692e-03, -1.9464e-03, -2.5692e-03, ..., -8.8959e-03, + -5.5275e-03, -7.2517e-03], + [-4.8370e-03, -2.5986e-02, -6.8359e-03, ..., -7.1068e-03, + 1.1925e-02, 3.2806e-03], + ..., + [ 6.9885e-03, 3.9635e-03, -1.4124e-03, ..., -4.8065e-03, + -1.8377e-03, 8.5258e-04], + [-2.0752e-02, -1.8066e-02, -5.8937e-03, ..., -8.4991e-03, + -1.3115e-02, -6.9733e-03], + [-2.3849e-02, 7.0190e-03, -5.0430e-03, ..., -1.0780e-02, + -5.9013e-03, -8.5068e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.0958, -0.1884, -0.1593, ..., -0.2017, -0.3232, -0.3743], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[ 0.0077, -0.0076, 0.0042, ..., 0.0185, 0.0244, 0.0145], + [ 0.0102, 0.0012, -0.0082, ..., -0.0322, -0.0016, 0.0077], + [-0.0055, -0.0099, -0.0081, ..., 0.0024, 0.0082, 0.0235], + ..., + [ 0.0053, 0.0035, 0.0003, ..., -0.0044, -0.0019, -0.0058], + [-0.0026, 0.0178, 0.0062, ..., 0.0020, -0.0052, -0.0042], + [-0.0041, 0.0082, 0.0150, ..., -0.0024, 0.0150, -0.0076]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0141, -0.0796, 0.0049, ..., 0.0710, -0.1786, 0.0413], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([0.9759, 0.5307, 0.6513, ..., 0.0107, 0.5041, 0.5372], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0589, 0.0290, 0.0214, ..., 0.3877, -0.0775, -0.1199], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[ 0.0074, 0.0006, -0.0156, ..., -0.0098, -0.0449, -0.0081], + [-0.0049, -0.0510, 0.0051, ..., -0.0058, -0.0286, 0.0074], + [ 0.0070, 0.0021, 0.0135, ..., 0.0238, 0.0070, -0.0351], + ..., + [ 0.0131, -0.0296, -0.0192, ..., -0.0009, 0.0007, 0.0007], + [ 0.0140, 0.0147, -0.0112, ..., 0.0018, 0.0341, -0.0212], + [ 0.0168, -0.0181, -0.0056, ..., 0.0013, -0.0197, -0.0118]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.4622, -0.0086, 0.2756, ..., 0.0269, 0.0068, -0.0022], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[-0.0027, -0.0052, 0.0031, ..., -0.0015, -0.0176, -0.0188], + [-0.0074, 0.0147, 0.0008, ..., 0.0245, -0.0242, 0.0237], + [ 0.0041, 0.0037, 0.0004, ..., 0.0177, 0.0124, 0.0109], + ..., + [-0.0073, 0.0058, -0.0050, ..., -0.0073, -0.0063, 0.0020], + [ 0.0157, -0.0413, 0.0109, ..., 0.0118, -0.0392, 0.0283], + [ 0.0064, 0.0013, -0.0097, ..., 0.0003, 0.0149, 0.0117]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0089, -0.0779, 0.0223, ..., -0.0115, -0.1759, 0.0235], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([0.7499, 0.4987, 0.7858, ..., 1.1598, 0.6024, 0.5770], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0150, -0.0119, 0.0050, ..., -0.1037, 0.0333, -0.0361], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[ 0.0077, -0.0109, -0.0022, ..., -0.0063, 0.0133, 0.0150], + [-0.0096, 0.0191, 0.0149, ..., -0.0078, 0.0161, 0.0103], + [-0.0020, 0.0116, 0.0042, ..., -0.0045, 0.0149, 0.0007], + ..., + [ 0.0186, 0.0082, 0.0246, ..., -0.0084, 0.0029, -0.0158], + [ 0.0175, -0.0043, 0.0002, ..., -0.0078, 0.0047, -0.0143], + [-0.0011, -0.0010, 0.0262, ..., -0.0082, -0.0047, -0.0202]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.2437, -0.3796, -0.5195, ..., -0.2163, -0.4231, -0.2202], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[-0.0169, 0.0065, -0.0019, ..., -0.0216, 0.0189, 0.0012], + [ 0.0048, -0.0130, 0.0052, ..., -0.0211, -0.0036, -0.0101], + [ 0.0040, -0.0051, -0.0002, ..., -0.0073, -0.0107, -0.0037], + ..., + [-0.0058, -0.0022, 0.0002, ..., -0.0073, -0.0007, 0.0026], + [-0.0098, -0.0155, 0.0002, ..., 0.0191, 0.0043, 0.0222], + [-0.0067, 0.0011, 0.0009, ..., 0.0020, -0.0060, 0.0049]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0154, -0.0523, -0.0401, ..., 0.1025, -0.1436, -0.0176], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.1220, 0.5171, 1.0746, ..., 0.0111, 0.6744, 0.7526], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0830, -0.0924, -0.0048, ..., 0.1117, -0.0385, -0.0674], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[-1.6815e-02, 3.0869e-02, 9.5444e-03, ..., -7.8125e-03, + 1.6342e-02, -1.0109e-02], + [-1.2794e-02, 7.7095e-03, -3.9101e-03, ..., -1.1053e-03, + -5.3482e-03, -1.1383e-02], + [ 1.6956e-03, -1.2161e-02, -4.4136e-03, ..., -1.5106e-03, + -1.3565e-02, 6.5117e-03], + ..., + [-1.2596e-02, 1.0803e-02, 5.0116e-04, ..., -3.5954e-04, + -3.2578e-03, -5.4300e-05], + [-1.4236e-02, -4.2572e-03, 1.3161e-02, ..., 1.7285e-05, + -3.1860e-02, -1.3054e-02], + [ 1.2398e-02, 5.1737e-05, 2.3148e-02, ..., -2.7866e-03, + -4.3144e-03, -2.5146e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.0057, -1.4600, 0.3438, ..., -0.0042, -0.0107, -0.0046], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[ 6.3477e-03, 1.2695e-02, -6.6872e-03, ..., 1.6868e-05, + 1.6006e-02, -1.3306e-02], + [-5.9090e-03, 8.4877e-04, -1.6708e-02, ..., -2.0477e-02, + -1.0666e-02, -1.1078e-02], + [ 2.4246e-02, 1.2558e-02, -1.6769e-02, ..., 6.4697e-03, + 1.2642e-02, -1.8021e-02], + ..., + [ 1.6754e-02, 5.8670e-03, -1.1282e-03, ..., -1.5726e-03, + 1.8406e-03, 1.1803e-02], + [ 5.7335e-03, 7.0724e-03, 1.3092e-02, ..., -1.1902e-02, + 1.6022e-02, -1.1311e-03], + [ 2.0809e-03, -2.6493e-03, -3.7041e-03, ..., -4.9400e-03, + 6.8893e-03, 1.5732e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0091, -0.0477, -0.0098, ..., -0.0483, -0.1364, 0.0059], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([0.8486, 0.6437, 0.8933, ..., 1.2490, 0.7166, 0.8544], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0071, 0.0669, -0.0529, ..., -0.1688, -0.0491, 0.0438], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[ 5.3062e-03, 1.2688e-02, 1.0406e-02, ..., -2.4281e-03, + -4.9362e-03, 1.8988e-03], + [-2.9793e-03, -1.5764e-03, 1.6724e-02, ..., 1.1415e-03, + -2.2034e-02, -9.2392e-03], + [-3.9673e-02, -1.2040e-05, 2.4188e-04, ..., 2.8877e-03, + -3.9101e-03, -2.3239e-02], + ..., + [ 8.1329e-03, 1.3290e-02, 2.1637e-02, ..., -6.3057e-03, + -4.2686e-03, -1.4544e-03], + [ 2.7435e-02, 6.1798e-03, 1.0468e-02, ..., 3.2425e-05, + 4.9400e-03, -9.4604e-03], + [ 1.3458e-02, 7.5836e-03, -1.2062e-02, ..., 4.9925e-04, + -9.8419e-03, -1.8356e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.4192, -0.2394, -0.3069, ..., -0.3665, -0.2556, -0.1316], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[ 0.0160, -0.0017, -0.0088, ..., 0.0165, -0.0056, 0.0135], + [ 0.0050, 0.0226, 0.0044, ..., 0.0111, 0.0021, 0.0038], + [ 0.0148, -0.0083, 0.0003, ..., 0.0085, 0.0015, -0.0004], + ..., + [-0.0031, -0.0009, -0.0014, ..., 0.0004, -0.0025, -0.0012], + [-0.0146, -0.0036, 0.0007, ..., 0.0108, -0.0012, -0.0406], + [ 0.0060, 0.0041, -0.0141, ..., -0.0118, 0.0065, -0.0112]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0417, 0.0127, -0.0229, ..., 0.0725, -0.0144, -0.0360], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.1527, 0.7259, 1.1281, ..., 1.0935, 0.8785, 1.1066], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0413, -0.1707, -0.0308, ..., 0.0418, -0.2141, -0.0075], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[ 0.0110, -0.0251, -0.0094, ..., 0.0972, 0.0119, -0.0125], + [-0.0140, -0.0233, 0.0010, ..., -0.1094, -0.0082, -0.0113], + [-0.0004, 0.0120, 0.0042, ..., 0.0518, 0.0182, 0.0130], + ..., + [-0.0162, 0.0175, -0.0176, ..., 0.0016, -0.0075, 0.0305], + [ 0.0311, -0.0070, -0.0240, ..., -0.0003, -0.0044, -0.0165], + [-0.0102, -0.0211, 0.0222, ..., -0.0004, -0.0292, -0.0076]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([ 0.2430, 0.3398, -0.1389, ..., 0.0082, -0.0049, 0.0183], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[-0.0019, 0.0045, 0.0063, ..., 0.0163, -0.0224, 0.0068], + [-0.0041, 0.0050, -0.0084, ..., -0.0048, -0.0007, 0.0138], + [-0.0173, 0.0011, 0.0050, ..., 0.0098, 0.0256, -0.0074], + ..., + [ 0.0102, -0.0012, -0.0010, ..., -0.0045, 0.0045, 0.0039], + [ 0.0035, -0.0075, 0.0118, ..., -0.0043, -0.0048, 0.0198], + [ 0.0175, -0.0080, -0.0061, ..., -0.0388, -0.0020, 0.0159]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0218, 0.0022, 0.0177, ..., 0.0566, -0.0418, -0.0156], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([0.8877, 0.8024, 1.0279, ..., 2.0427, 0.9536, 0.9729], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.1278, -0.0779, -0.0511, ..., -0.2266, -0.0554, 0.0418], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[ 0.0282, 0.0064, 0.0033, ..., -0.0064, 0.0038, 0.0184], + [ 0.0007, 0.0181, 0.0098, ..., -0.0025, -0.0096, 0.0182], + [-0.0056, 0.0077, 0.0009, ..., -0.0078, -0.0058, -0.0323], + ..., + [-0.0288, 0.0303, 0.0033, ..., 0.0016, -0.0074, 0.0192], + [-0.0051, -0.0323, -0.0066, ..., -0.0045, 0.0333, 0.0005], + [ 0.0188, 0.0207, 0.0077, ..., -0.0080, -0.0315, -0.0182]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.1138, -0.1492, -0.4014, ..., -0.2352, -0.3323, -0.2046], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[-1.9287e-02, 2.2621e-03, 6.0921e-03, ..., -3.4676e-03, + -8.6060e-03, -2.0447e-03], + [-6.7711e-05, -3.6869e-03, -5.7602e-03, ..., -1.7380e-02, + 3.8025e-02, -1.9588e-03], + [-1.2627e-02, -4.6158e-03, -6.8207e-03, ..., 1.4572e-03, + -6.2037e-04, -1.3741e-02], + ..., + [ 3.5152e-03, 2.8687e-03, -8.9417e-03, ..., -8.0633e-04, + 7.1335e-03, 3.8662e-03], + [ 1.1139e-02, 7.1411e-03, 7.1297e-03, ..., -1.9293e-03, + -5.5265e-04, 3.8330e-02], + [-8.9264e-03, -5.6114e-03, 2.1210e-03, ..., -1.2589e-02, + -8.9493e-03, 8.3389e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0068, 0.0181, -0.0552, ..., 0.1211, -0.0751, -0.1089], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.1916, 0.9694, 1.2653, ..., 0.1731, 0.9097, 1.1966], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0955, -0.1844, -0.0546, ..., 0.2570, -0.0544, 0.0379], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[ 0.0093, -0.0152, -0.0200, ..., 0.0346, -0.0043, -0.0287], + [ 0.0025, -0.0165, -0.0050, ..., -0.0740, -0.0039, -0.0172], + [-0.0126, 0.0090, 0.0117, ..., -0.0017, 0.0034, 0.0126], + ..., + [-0.0276, -0.0107, -0.0004, ..., -0.0036, 0.0028, 0.0067], + [ 0.0289, -0.0022, -0.0177, ..., -0.0029, 0.0003, -0.0052], + [-0.0118, 0.0090, 0.0049, ..., -0.0104, 0.0250, 0.0115]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-1.8496, 0.1801, 2.3359, ..., 0.0398, -0.0217, -0.1345], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[ 0.0018, -0.0264, 0.0078, ..., 0.0173, -0.0076, -0.0041], + [ 0.0135, -0.0083, 0.0026, ..., 0.0076, 0.0072, -0.0242], + [ 0.0054, 0.0058, -0.0234, ..., -0.0210, -0.0069, 0.0223], + ..., + [-0.0025, 0.0097, -0.0013, ..., 0.0089, 0.0019, 0.0197], + [ 0.0045, -0.0037, 0.0037, ..., 0.0008, 0.0096, -0.0237], + [-0.0100, 0.0123, 0.0061, ..., -0.0153, -0.0145, 0.0152]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0037, 0.0258, -0.0091, ..., -0.0498, -0.0065, -0.0458], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.1915, 1.0973, 1.2475, ..., 1.4018, 1.1544, 1.1824], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0225, -0.0913, 0.0974, ..., -0.2996, -0.0410, 0.0070], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[-0.0168, 0.0096, 0.0042, ..., -0.0020, -0.0020, 0.0089], + [-0.0215, 0.0329, 0.0130, ..., 0.0041, 0.0160, 0.0035], + [-0.0176, -0.0188, 0.0220, ..., 0.0037, -0.0368, 0.0167], + ..., + [ 0.0086, -0.0059, -0.0079, ..., 0.0015, -0.0030, -0.0178], + [-0.0288, -0.0067, 0.0123, ..., -0.0054, -0.0138, -0.0072], + [-0.0190, 0.0143, -0.0290, ..., -0.0286, -0.0196, -0.0011]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.1221, -0.2141, -0.4116, ..., -0.1118, -0.1777, -0.3623], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[-0.0010, 0.0136, -0.0347, ..., -0.0224, 0.0056, -0.0153], + [-0.0027, -0.0350, 0.0204, ..., 0.0106, -0.0202, -0.0021], + [ 0.0073, -0.0276, -0.0020, ..., 0.0134, 0.0046, -0.0155], + ..., + [-0.0004, -0.0072, 0.0019, ..., -0.0059, 0.0039, 0.0084], + [ 0.0117, -0.0049, -0.0148, ..., -0.0053, 0.0066, -0.0098], + [ 0.0130, 0.0172, 0.0037, ..., 0.0183, -0.0211, -0.0070]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0200, 0.0231, -0.0658, ..., 0.1027, -0.0781, -0.1132], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.2565, 1.1066, 1.2045, ..., 0.5890, 1.0264, 1.2907], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0063, -0.0652, 0.0874, ..., 0.1717, 0.1017, -0.0355], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[-0.0141, 0.0224, 0.0066, ..., -0.0503, -0.0308, -0.0002], + [-0.0106, -0.0263, 0.0116, ..., -0.0196, -0.0080, 0.0482], + [ 0.0088, -0.0083, 0.0067, ..., 0.0172, -0.0225, 0.0448], + ..., + [ 0.0117, 0.0198, 0.0119, ..., 0.0007, 0.0045, -0.0206], + [ 0.0123, -0.0125, 0.0020, ..., 0.0034, 0.0106, -0.0007], + [ 0.0226, -0.0011, -0.0222, ..., 0.0048, -0.0005, -0.0066]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([ 0.0543, -0.0735, 0.2413, ..., -0.0484, -0.1190, 0.0173], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[-0.0229, 0.0144, -0.0111, ..., -0.0034, 0.0119, -0.0192], + [-0.0048, -0.0063, 0.0103, ..., -0.0061, 0.0145, 0.0030], + [ 0.0077, 0.0203, 0.0148, ..., -0.0084, -0.0068, 0.0304], + ..., + [-0.0003, 0.0012, 0.0053, ..., 0.0082, -0.0035, 0.0224], + [-0.0080, 0.0013, 0.0045, ..., 0.0091, -0.0064, -0.0116], + [-0.0171, 0.0154, -0.0227, ..., -0.0176, 0.0146, -0.0069]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0177, 0.0686, -0.0156, ..., -0.0817, 0.0255, 0.0177], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.1690, 1.1532, 1.1559, ..., 1.5800, 1.1703, 1.2291], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0074, 0.0918, -0.0353, ..., -0.3273, -0.1143, -0.0546], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[-0.0204, -0.0119, -0.0051, ..., -0.0030, -0.0053, 0.0117], + [-0.0035, -0.0211, 0.0029, ..., -0.0038, 0.0121, 0.0023], + [ 0.0126, -0.0055, 0.0038, ..., 0.0006, 0.0247, 0.0077], + ..., + [ 0.0121, 0.0132, -0.0259, ..., 0.0031, 0.0226, 0.0040], + [-0.0022, 0.0106, -0.0208, ..., -0.0026, 0.0163, -0.0018], + [-0.0326, 0.0187, 0.0123, ..., -0.0007, -0.0089, 0.0122]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.3628, -0.2209, -0.1646, ..., -0.2522, -0.2683, -0.2517], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[-0.0055, 0.0073, 0.0131, ..., -0.0030, -0.0204, -0.0067], + [-0.0153, 0.0021, 0.0061, ..., 0.0199, -0.0058, -0.0234], + [-0.0043, 0.0070, 0.0054, ..., 0.0016, 0.0075, -0.0185], + ..., + [-0.0034, -0.0019, 0.0044, ..., -0.0031, -0.0046, 0.0004], + [-0.0026, -0.0267, -0.0127, ..., 0.0038, -0.0151, 0.0075], + [-0.0029, -0.0212, -0.0195, ..., 0.0119, 0.0086, -0.0139]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0283, 0.0284, -0.0328, ..., 0.0670, -0.0050, -0.0489], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.2720, 1.2516, 1.2042, ..., 0.7531, 1.0650, 1.2413], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0019, -0.0140, 0.0240, ..., 0.2147, -0.1253, -0.2114], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[-0.0038, -0.0111, 0.0217, ..., -0.0341, 0.0049, 0.0043], + [ 0.0020, 0.0047, 0.0051, ..., -0.0009, -0.0141, 0.0165], + [-0.0086, 0.0055, 0.0177, ..., 0.0030, -0.0044, -0.0111], + ..., + [ 0.0037, 0.0199, -0.0006, ..., -0.0081, 0.0196, -0.0002], + [-0.0116, 0.0020, -0.0122, ..., 0.0042, -0.0016, -0.0110], + [-0.0201, 0.0025, -0.0230, ..., -0.0041, 0.0287, 0.0105]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.8447, 0.0093, -1.0840, ..., -0.0142, 0.0109, 0.0013], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[-0.0003, 0.0259, 0.0041, ..., 0.0052, -0.0108, 0.0274], + [ 0.0064, 0.0003, -0.0086, ..., -0.0271, 0.0063, 0.0018], + [-0.0234, 0.0012, 0.0170, ..., 0.0238, 0.0096, 0.0125], + ..., + [ 0.0077, 0.0320, 0.0242, ..., 0.0052, 0.0005, 0.0047], + [ 0.0064, 0.0084, 0.0002, ..., -0.0008, 0.0042, -0.0140], + [ 0.0204, -0.0061, -0.0246, ..., 0.0237, -0.0045, 0.0173]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0204, 0.0182, -0.0022, ..., -0.0782, 0.0405, -0.0199], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.1486, 1.2062, 1.1745, ..., 1.6290, 1.1674, 1.2157], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0552, 0.0993, -0.0013, ..., -0.1784, -0.0515, -0.0148], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[-0.0086, -0.0217, 0.0128, ..., -0.0079, -0.0053, 0.0027], + [-0.0070, 0.0067, 0.0020, ..., -0.0125, -0.0191, -0.0117], + [ 0.0062, 0.0227, 0.0108, ..., 0.0066, 0.0004, 0.0018], + ..., + [ 0.0228, -0.0078, 0.0063, ..., 0.0002, 0.0019, -0.0005], + [-0.0191, 0.0253, 0.0069, ..., -0.0109, -0.0114, -0.0081], + [ 0.0292, -0.0316, -0.0293, ..., -0.0048, 0.0165, -0.0164]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.4792, -0.1467, -0.1043, ..., -0.2996, -0.2251, -0.3262], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[-0.0207, 0.0158, -0.0021, ..., 0.0083, 0.0042, 0.0273], + [ 0.0063, 0.0024, -0.0061, ..., 0.0069, -0.0269, 0.0042], + [ 0.0299, -0.0060, -0.0002, ..., -0.0130, 0.0070, -0.0297], + ..., + [-0.0122, 0.0011, -0.0082, ..., -0.0026, 0.0038, -0.0006], + [-0.0204, 0.0085, 0.0057, ..., 0.0096, -0.0105, 0.0216], + [-0.0023, 0.0328, 0.0013, ..., -0.0099, -0.0044, 0.0145]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0411, -0.0040, -0.0516, ..., 0.1114, 0.0086, -0.0609], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.3836, 1.2857, 1.2323, ..., 0.6118, 1.1779, 1.2560], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.2367, 0.0575, 0.1226, ..., 0.2404, 0.0237, -0.0258], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[ 0.0172, -0.0115, -0.0176, ..., 0.0113, -0.0038, 0.0092], + [-0.0095, 0.0104, 0.0087, ..., 0.0204, -0.0187, -0.0189], + [-0.0259, -0.0040, 0.0021, ..., 0.0106, 0.0068, -0.0149], + ..., + [ 0.0015, 0.0207, -0.0071, ..., -0.0045, -0.0049, 0.0017], + [ 0.0381, 0.0040, -0.0079, ..., 0.0003, -0.0011, 0.0140], + [ 0.0094, -0.0019, -0.0035, ..., -0.0018, 0.0271, -0.0058]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([ 0.0180, 0.2048, -0.1954, ..., 0.0674, -0.0071, 0.0122], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[ 0.0127, -0.0093, -0.0233, ..., -0.0062, -0.0268, -0.0001], + [ 0.0243, -0.0188, 0.0209, ..., -0.0062, -0.0029, -0.0023], + [-0.0073, 0.0052, -0.0273, ..., 0.0022, 0.0090, 0.0104], + ..., + [-0.0117, 0.0100, 0.0137, ..., 0.0083, -0.0002, 0.0034], + [ 0.0061, 0.0166, 0.0235, ..., 0.0010, 0.0024, -0.0336], + [-0.0002, -0.0062, -0.0147, ..., 0.0020, -0.0193, -0.0020]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0233, 0.0391, -0.0073, ..., -0.0649, 0.0291, 0.0002], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.1998, 1.2310, 1.1488, ..., 1.5903, 1.2228, 1.3022], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0585, 0.0091, -0.0592, ..., -0.2559, -0.1667, -0.0673], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[-1.1040e-02, -1.6754e-02, -2.7451e-02, ..., -4.3964e-04, + 6.0501e-03, 5.5552e-04], + [ 2.4002e-02, -2.4567e-02, 7.3128e-03, ..., -3.2883e-03, + 1.0437e-02, -2.3246e-05], + [ 8.8272e-03, 8.2474e-03, 3.9597e-03, ..., 4.3845e-04, + -7.0724e-03, 2.1725e-03], + ..., + [-1.2598e-03, -9.5901e-03, 1.6785e-02, ..., -1.8721e-03, + -4.9057e-03, 7.3891e-03], + [ 2.6455e-03, 1.1078e-02, -1.6968e-02, ..., -1.1044e-03, + -6.0959e-03, -3.0914e-02], + [ 1.2146e-02, 2.3819e-02, 5.0545e-04, ..., 2.2030e-03, + -4.2877e-03, 1.9012e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.1323, -0.2241, -0.0570, ..., -0.2708, -0.3240, -0.0825], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[ 9.5673e-03, -1.0620e-02, -2.4261e-02, ..., 3.1433e-02, + -1.2718e-02, -1.2207e-02], + [ 8.1787e-03, 6.4707e-04, 3.0732e-04, ..., -1.3092e-02, + 5.6446e-05, -1.8692e-02], + [ 6.8626e-03, 1.8829e-02, 1.0094e-02, ..., -3.1891e-03, + 9.3765e-03, -3.5896e-03], + ..., + [ 5.6763e-03, 3.4389e-03, -8.8310e-04, ..., 5.5847e-03, + -3.6240e-03, -4.8103e-03], + [ 2.6627e-03, -1.5274e-02, -6.7186e-04, ..., -2.0081e-02, + 1.1981e-04, 1.0040e-02], + [-7.2365e-03, -5.4207e-03, -3.8395e-03, ..., 5.3978e-03, + -2.5177e-02, -2.7252e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0388, 0.0069, -0.0129, ..., 0.0417, 0.0218, 0.0082], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.4280, 1.3614, 1.2954, ..., 1.0131, 1.1817, 1.3209], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.1139, 0.0275, 0.0677, ..., 0.1797, 0.0199, -0.2525], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[-0.0128, 0.0002, 0.0248, ..., -0.0053, 0.0151, -0.0208], + [-0.0050, 0.0376, -0.0262, ..., 0.0018, 0.0168, 0.0039], + [-0.0110, -0.0084, 0.0369, ..., -0.0102, 0.0071, 0.0117], + ..., + [ 0.0205, -0.0055, -0.0127, ..., -0.0057, -0.0044, 0.0095], + [-0.0154, -0.0017, -0.0012, ..., 0.0026, -0.0132, 0.0012], + [ 0.0082, 0.0055, 0.0048, ..., -0.0060, -0.0069, 0.0101]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.0482, -0.0384, 0.5059, ..., -0.0175, -0.0147, 0.0126], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[ 0.0060, 0.0181, 0.0104, ..., -0.0065, 0.0091, 0.0008], + [-0.0115, 0.0102, 0.0212, ..., 0.0144, 0.0276, -0.0077], + [ 0.0390, 0.0029, 0.0083, ..., -0.0165, 0.0036, -0.0177], + ..., + [ 0.0054, -0.0068, -0.0049, ..., 0.0166, -0.0177, 0.0042], + [ 0.0016, -0.0031, 0.0076, ..., 0.0091, 0.0008, 0.0024], + [ 0.0034, -0.0059, 0.0107, ..., -0.0199, 0.0139, -0.0083]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0060, 0.0679, 0.0352, ..., -0.0554, 0.0134, 0.0558], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.2294, 1.2056, 1.1645, ..., 1.8344, 1.1523, 1.2639], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0175, -0.0714, -0.1254, ..., -0.2901, -0.1457, 0.1501], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[-4.8256e-03, -3.2368e-03, 1.3252e-02, ..., -1.7681e-03, + 2.2354e-02, -1.1436e-02], + [ 4.0253e-02, 3.1097e-02, 1.9760e-02, ..., 5.3787e-03, + -2.2949e-02, 1.4923e-02], + [-1.9789e-05, -2.5848e-02, -1.0681e-02, ..., 1.1975e-04, + 1.0056e-02, 9.3384e-03], + ..., + [ 1.9211e-02, -1.5373e-02, 5.6839e-03, ..., -1.1314e-02, + -4.1748e-02, 1.5808e-02], + [ 2.8934e-03, -1.8179e-04, 8.8425e-03, ..., -2.5787e-03, + -1.7517e-02, -6.8169e-03], + [ 1.7838e-02, -6.3019e-03, -3.8700e-03, ..., 3.0651e-03, + -3.5019e-03, 1.3748e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.2314, -0.3215, -0.0737, ..., -0.3018, -0.1614, -0.3069], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[ 0.0107, 0.0326, 0.0092, ..., -0.0069, -0.0051, -0.0002], + [-0.0118, 0.0024, 0.0258, ..., 0.0087, -0.0250, -0.0087], + [ 0.0156, 0.0077, 0.0071, ..., -0.0158, -0.0195, 0.0202], + ..., + [-0.0062, 0.0010, 0.0041, ..., -0.0098, 0.0120, 0.0015], + [-0.0022, 0.0381, -0.0009, ..., 0.0051, 0.0093, 0.0135], + [ 0.0090, -0.0229, -0.0135, ..., 0.0130, 0.0066, 0.0037]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([0.0371, 0.0197, 0.0018, ..., 0.0558, 0.0674, 0.0106], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.4988, 1.4287, 1.3546, ..., 0.9505, 1.1804, 1.4063], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.1733, -0.0029, 0.0176, ..., 0.2492, 0.0635, -0.1153], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[ 0.0109, -0.0005, 0.0278, ..., 0.0017, -0.0215, 0.0092], + [ 0.0046, -0.0021, 0.0013, ..., 0.0017, -0.0017, 0.0037], + [-0.0275, 0.0318, 0.0133, ..., 0.0012, 0.0040, -0.0225], + ..., + [ 0.0179, 0.0136, -0.0099, ..., 0.0038, 0.0117, 0.0026], + [ 0.0067, 0.0052, -0.0031, ..., -0.0122, 0.0013, -0.0083], + [ 0.0012, -0.0281, -0.0114, ..., -0.0038, -0.0005, -0.0021]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([ 0.2450, 1.7920, 0.0699, ..., 0.0484, -0.0464, 0.0208], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[ 0.0059, 0.0109, -0.0109, ..., -0.0163, 0.0007, -0.0298], + [-0.0065, -0.0061, -0.0041, ..., -0.0236, -0.0090, 0.0271], + [ 0.0104, 0.0079, 0.0172, ..., -0.0066, 0.0009, -0.0156], + ..., + [ 0.0096, -0.0068, 0.0006, ..., -0.0150, 0.0118, 0.0032], + [ 0.0230, 0.0083, 0.0289, ..., -0.0219, 0.0005, 0.0038], + [ 0.0172, 0.0228, 0.0059, ..., 0.0067, 0.0123, -0.0109]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0267, -0.0056, -0.0026, ..., -0.0532, -0.0267, 0.0485], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.2355, 1.2508, 1.2161, ..., 1.8124, 1.1440, 1.3011], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.1252, -0.0353, 0.1171, ..., -0.1227, -0.0330, 0.1001], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[-6.1083e-04, -1.1574e-02, 9.1705e-03, ..., -1.0834e-03, + 8.1482e-03, 3.4065e-03], + [ 1.2016e-02, -1.7960e-02, 3.3379e-03, ..., -8.6365e-03, + -1.7424e-03, -1.5541e-02], + [ 8.0948e-03, -1.1383e-02, -2.7039e-02, ..., 4.2725e-03, + 4.9667e-03, -2.5375e-02], + ..., + [-2.1606e-02, -2.0233e-02, -3.5381e-03, ..., 2.8253e-05, + -1.3222e-02, 7.2975e-03], + [ 2.8515e-03, 1.3855e-02, 1.0794e-04, ..., -3.3092e-03, + -1.4519e-02, 1.1742e-02], + [-1.1467e-02, 1.2001e-02, 1.0672e-03, ..., -3.9520e-03, + -5.1178e-02, 5.3864e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.3506, -0.3098, -0.0694, ..., -0.3074, -0.2494, -0.4229], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[-0.0110, 0.0193, -0.0116, ..., 0.0081, -0.0065, -0.0187], + [ 0.0120, 0.0100, 0.0045, ..., 0.0063, -0.0106, -0.0092], + [-0.0012, 0.0172, 0.0223, ..., 0.0052, 0.0394, 0.0099], + ..., + [-0.0025, 0.0068, -0.0081, ..., 0.0005, -0.0055, 0.0065], + [ 0.0112, -0.0010, -0.0028, ..., -0.0172, -0.0041, -0.0017], + [ 0.0086, -0.0009, 0.0137, ..., -0.0030, 0.0077, -0.0112]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0374, -0.0088, -0.0429, ..., 0.0653, -0.0126, -0.0252], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.5180, 1.3799, 1.3971, ..., 0.8399, 1.2614, 1.5007], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.1863, 0.1162, 0.4045, ..., 0.2292, 0.4198, -0.0957], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[-0.0160, 0.0014, -0.0089, ..., 0.0042, 0.0289, 0.0184], + [-0.0010, 0.0089, -0.0017, ..., -0.0152, -0.0108, -0.0008], + [-0.0141, -0.0294, 0.0109, ..., -0.0025, 0.0298, 0.0266], + ..., + [ 0.0245, 0.0203, 0.0064, ..., -0.0128, 0.0092, -0.0031], + [ 0.0211, 0.0237, -0.0171, ..., -0.0165, -0.0070, 0.0133], + [-0.0094, 0.0021, 0.0247, ..., -0.0004, 0.0047, -0.0201]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.2849, -0.3125, -0.2026, ..., -0.0510, -0.0885, 0.0077], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[ 0.0026, -0.0256, 0.0400, ..., -0.0214, -0.0089, 0.0125], + [-0.0129, -0.0088, 0.0240, ..., -0.0082, -0.0172, -0.0212], + [-0.0200, -0.0017, 0.0146, ..., -0.0077, 0.0185, -0.0176], + ..., + [-0.0275, 0.0012, -0.0196, ..., 0.0564, 0.0609, -0.0008], + [ 0.0161, 0.0054, -0.0016, ..., -0.0032, 0.0063, 0.0044], + [-0.0055, -0.0287, -0.0144, ..., 0.0080, -0.0101, 0.0110]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0077, 0.0031, 0.0012, ..., -0.0704, 0.0297, 0.0082], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.2799, 1.2512, 1.2956, ..., 2.2034, 1.1719, 1.3681], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.1496, -0.0801, -0.0724, ..., -0.1659, -0.0900, 0.0350], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[ 0.0067, 0.0413, 0.0378, ..., 0.0076, 0.0142, 0.0230], + [ 0.0130, 0.0156, 0.0179, ..., 0.0018, 0.0109, 0.0043], + [-0.0078, -0.0166, 0.0107, ..., -0.0015, -0.0044, 0.0111], + ..., + [ 0.0138, 0.0290, 0.0173, ..., -0.0042, 0.0035, 0.0030], + [ 0.0160, 0.0144, 0.0156, ..., -0.0038, -0.0043, 0.0115], + [-0.0084, 0.0176, 0.0231, ..., 0.0015, -0.0203, -0.0239]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.2788, -0.1956, -0.3853, ..., -0.3225, -0.2610, -0.0354], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[-7.0419e-03, -1.1139e-02, -1.7349e-02, ..., 1.0239e-02, + -1.8906e-02, 2.0798e-02], + [ 1.1635e-02, -8.4457e-03, -7.3700e-03, ..., 1.1932e-02, + -1.0246e-02, -1.2451e-02], + [ 1.3153e-02, -2.9282e-02, -1.9894e-03, ..., 4.7760e-03, + -2.7866e-03, -1.2886e-02], + ..., + [-3.2005e-03, 1.4801e-02, -3.5763e-03, ..., 6.4313e-05, + 1.2386e-04, 2.5702e-04], + [-1.8677e-02, -8.7967e-03, 9.5978e-03, ..., 4.4403e-03, + -1.1940e-02, 2.3422e-02], + [-5.8060e-03, -2.0889e-02, -1.2917e-02, ..., -4.3907e-03, + -7.6561e-03, 2.6611e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0329, 0.0113, -0.0181, ..., 0.0332, 0.0061, -0.0410], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.6208, 1.4989, 1.4207, ..., 0.7640, 1.2692, 1.4951], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.1374, -0.0238, -0.0117, ..., 0.3347, 0.1457, -0.0975], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[ 0.0034, -0.0225, -0.0031, ..., 0.0137, -0.0365, -0.0123], + [-0.0254, -0.0124, 0.0016, ..., -0.0371, 0.0126, -0.0146], + [ 0.0078, -0.0007, -0.0157, ..., 0.0026, -0.0030, 0.0202], + ..., + [ 0.0041, -0.0135, 0.0168, ..., 0.0034, -0.0226, -0.0060], + [-0.0034, -0.0090, 0.0047, ..., -0.0018, 0.0212, -0.0074], + [ 0.0030, 0.0077, -0.0017, ..., -0.0023, -0.0013, 0.0052]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([ 0.2683, -0.1324, 0.1324, ..., 0.0209, 0.0130, 0.0207], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[ 0.0029, 0.0157, 0.0060, ..., 0.0021, 0.0078, -0.0089], + [ 0.0114, -0.0065, 0.0057, ..., 0.0285, 0.0281, 0.0063], + [-0.0123, -0.0026, 0.0062, ..., 0.0232, -0.0135, -0.0089], + ..., + [-0.0011, 0.0021, -0.0047, ..., -0.0017, -0.0325, -0.0199], + [-0.0076, -0.0072, -0.0037, ..., -0.0192, -0.0359, -0.0052], + [-0.0138, -0.0226, 0.0044, ..., 0.0032, 0.0111, -0.0124]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0062, -0.0194, -0.0133, ..., 0.0403, 0.0331, 0.0198], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.3324, 1.2243, 1.2615, ..., 1.9267, 1.1337, 1.3792], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.1416, 0.0005, 0.0165, ..., 0.0163, -0.0729, 0.0122], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[ 0.0125, -0.0005, -0.0153, ..., -0.0146, -0.0185, -0.0108], + [-0.0015, 0.0045, 0.0178, ..., -0.0033, 0.0042, -0.0107], + [-0.0038, 0.0046, 0.0096, ..., -0.0217, 0.0142, 0.0295], + ..., + [ 0.0055, -0.0117, -0.0156, ..., -0.0233, -0.0058, 0.0149], + [ 0.0230, -0.0006, 0.0009, ..., 0.0129, 0.0170, 0.0101], + [ 0.0109, 0.0066, -0.0111, ..., 0.0038, 0.0099, -0.0238]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.2153, -0.2781, -0.3320, ..., -0.1223, -0.1307, -0.2898], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[-0.0037, -0.0316, 0.0164, ..., -0.0018, -0.0181, 0.0028], + [ 0.0187, 0.0093, 0.0047, ..., 0.0109, 0.0137, -0.0057], + [-0.0071, -0.0045, 0.0287, ..., 0.0160, -0.0103, 0.0014], + ..., + [-0.0109, 0.0038, -0.0134, ..., 0.0115, -0.0042, -0.0035], + [-0.0127, 0.0039, 0.0083, ..., 0.0040, -0.0111, 0.0036], + [-0.0054, -0.0070, -0.0073, ..., -0.0131, -0.0262, 0.0085]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0253, -0.0024, -0.0242, ..., 0.0956, 0.0208, -0.0150], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.6542, 1.5471, 1.5496, ..., 0.4089, 1.3961, 1.6685], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.2147, 0.1279, 0.3980, ..., 0.3844, 0.3855, -0.2151], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[-0.0031, 0.0022, -0.0024, ..., 0.0046, -0.0105, -0.0062], + [-0.0263, 0.0304, 0.0018, ..., -0.0321, 0.0226, 0.0128], + [ 0.0247, 0.0204, 0.0079, ..., -0.0015, 0.0224, -0.0038], + ..., + [-0.0034, 0.0151, -0.0085, ..., 0.0086, -0.0034, 0.0134], + [ 0.0203, -0.0166, 0.0061, ..., 0.0006, -0.0226, -0.0220], + [ 0.0190, -0.0124, -0.0086, ..., -0.0007, -0.0131, -0.0075]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-2.5195, 0.2338, -0.3826, ..., -0.0098, -0.0044, 0.0407], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[-0.0303, -0.0037, 0.0023, ..., 0.0084, 0.0081, -0.0136], + [-0.0157, -0.0159, 0.0211, ..., 0.0249, 0.0152, 0.0164], + [-0.0068, -0.0159, 0.0054, ..., 0.0163, 0.0173, 0.0204], + ..., + [-0.0006, 0.0029, -0.0114, ..., -0.0071, 0.0085, 0.0100], + [-0.0033, -0.0080, -0.0203, ..., -0.0054, 0.0173, 0.0025], + [ 0.0112, -0.0027, 0.0068, ..., 0.0061, -0.0080, 0.0069]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0108, -0.0551, 0.0180, ..., 0.0265, 0.0322, -0.0401], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.2997, 1.2493, 1.3251, ..., 1.4770, 1.1521, 1.3748], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.1529, 0.0496, -0.0569, ..., -0.0100, -0.0241, -0.0367], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[ 0.0036, 0.0120, 0.0108, ..., -0.0241, -0.0065, -0.0097], + [-0.0012, -0.0218, -0.0014, ..., -0.0027, -0.0102, 0.0113], + [ 0.0043, 0.0098, -0.0035, ..., -0.0025, -0.0057, 0.0186], + ..., + [-0.0025, -0.0322, -0.0135, ..., -0.0106, -0.0206, -0.0133], + [-0.0045, 0.0262, -0.0152, ..., -0.0032, -0.0122, -0.0036], + [-0.0104, 0.0072, -0.0117, ..., -0.0020, 0.0015, -0.0004]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.2783, -0.2571, -0.3367, ..., -0.3469, -0.2042, -0.0554], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[ 0.0010, -0.0118, 0.0168, ..., 0.0077, -0.0058, 0.0077], + [ 0.0135, -0.0157, -0.0040, ..., -0.0081, 0.0145, -0.0026], + [ 0.0177, -0.0216, 0.0124, ..., 0.0067, -0.0014, -0.0008], + ..., + [-0.0105, 0.0073, -0.0061, ..., 0.0008, 0.0031, -0.0009], + [-0.0176, 0.0078, -0.0194, ..., -0.0242, -0.0090, -0.0050], + [ 0.0074, 0.0026, 0.0208, ..., 0.0214, -0.0106, 0.0054]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0367, -0.0981, -0.0668, ..., 0.0355, 0.0193, -0.0256], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.7683, 1.6419, 1.7225, ..., 0.6681, 1.5146, 1.7884], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.1888, 0.3456, 0.1489, ..., 0.4134, 0.4308, -0.1223], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[-0.0099, -0.0102, 0.0307, ..., 0.0022, -0.0095, 0.0271], + [-0.0269, -0.0032, -0.0007, ..., -0.0077, -0.0161, -0.0114], + [-0.0016, 0.0004, 0.0387, ..., -0.0108, 0.0132, -0.0004], + ..., + [-0.0021, -0.0089, 0.0141, ..., 0.0007, -0.0084, 0.0113], + [ 0.0048, -0.0126, -0.0221, ..., -0.0024, -0.0106, 0.0105], + [-0.0116, -0.0002, -0.0007, ..., -0.0022, -0.0071, -0.0038]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.2299, 0.2384, -0.0945, ..., 0.0501, -0.0047, -0.0003], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[-0.0283, -0.0055, 0.0025, ..., -0.0084, 0.0048, -0.0027], + [ 0.0133, -0.0057, -0.0082, ..., -0.0007, -0.0018, -0.0043], + [-0.0129, 0.0132, 0.0077, ..., -0.0180, 0.0022, -0.0036], + ..., + [ 0.0090, 0.0050, -0.0102, ..., -0.0129, -0.0157, -0.0071], + [ 0.0154, -0.0024, -0.0170, ..., 0.0126, -0.0098, 0.0101], + [ 0.0222, 0.0012, -0.0260, ..., -0.0199, -0.0145, 0.0066]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0646, -0.0642, 0.0045, ..., -0.0348, -0.0156, -0.0321], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.3934, 1.3553, 1.4854, ..., 1.8728, 1.3167, 1.4949], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0756, -0.1134, -0.0586, ..., -0.0262, -0.0903, -0.1063], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[-7.9727e-03, 4.2458e-03, 1.8478e-02, ..., -1.1673e-02, + 1.3763e-02, -6.2256e-03], + [-1.7181e-02, 1.0742e-02, -4.7760e-03, ..., -3.7718e-04, + -2.2888e-02, -8.6594e-03], + [-4.2701e-04, 2.2446e-02, 1.0483e-02, ..., -4.0817e-03, + -1.7151e-02, -2.6047e-02], + ..., + [-2.9617e-02, -7.5722e-03, 8.4043e-06, ..., -1.1253e-02, + 1.6479e-02, 2.7222e-02], + [-1.2772e-02, 6.8283e-03, 2.5269e-02, ..., -7.0038e-03, + -7.5645e-03, 9.1019e-03], + [ 4.2176e-04, -4.2152e-03, 4.3335e-02, ..., -3.2711e-03, + -1.3786e-02, 1.5656e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.3418, -0.2771, -0.3467, ..., -0.3989, -0.2386, -0.2927], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[-0.0106, 0.0111, 0.0187, ..., -0.0266, 0.0003, -0.0147], + [ 0.0346, 0.0015, -0.0025, ..., -0.0093, 0.0119, -0.0310], + [-0.0043, -0.0276, 0.0013, ..., -0.0066, 0.0263, 0.0338], + ..., + [-0.0045, 0.0165, 0.0076, ..., 0.0143, -0.0025, -0.0003], + [ 0.0067, -0.0164, 0.0050, ..., 0.0121, -0.0008, -0.0172], + [-0.0008, -0.0125, -0.0156, ..., 0.0319, 0.0113, -0.0105]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0361, -0.0391, 0.0151, ..., -0.0164, 0.0040, -0.0078], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([2.1112, 2.0118, 2.0347, ..., 0.7085, 1.8153, 2.2010], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.1631, -0.1508, 0.1484, ..., 0.4431, 0.6810, -0.3282], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[-0.0117, -0.0245, -0.0218, ..., 0.0068, -0.0019, -0.0032], + [ 0.0243, 0.0013, -0.0101, ..., 0.0473, -0.0216, 0.0135], + [ 0.0109, -0.0224, -0.0055, ..., -0.0055, -0.0267, -0.0187], + ..., + [-0.0167, 0.0103, -0.0058, ..., -0.0046, -0.0215, 0.0237], + [ 0.0119, -0.0105, 0.0158, ..., 0.0023, -0.0127, -0.0004], + [-0.0119, -0.0276, 0.0225, ..., -0.0024, -0.0047, -0.0064]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([ 0.2433, -0.1136, 0.0888, ..., -0.0050, -0.0137, 0.0093], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[-0.0302, 0.0129, -0.0099, ..., 0.0201, 0.0093, -0.0045], + [-0.0376, -0.0102, -0.0002, ..., -0.0104, 0.0078, -0.0009], + [ 0.0196, -0.0219, 0.0057, ..., 0.0070, -0.0059, -0.0075], + ..., + [-0.0068, -0.0123, 0.0011, ..., 0.0024, -0.0069, -0.0181], + [ 0.0018, -0.0121, -0.0095, ..., -0.0199, 0.0067, -0.0080], + [-0.0084, 0.0186, 0.0111, ..., -0.0047, 0.0052, 0.0088]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-7.3486e-02, 2.0966e-02, 2.3758e-02, ..., 9.0637e-03, + 1.1623e-05, -1.4076e-02], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.3803, 1.3287, 1.4781, ..., 1.5120, 1.3130, 1.4137], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0475, -0.0935, -0.0597, ..., 0.0320, 0.0142, -0.0661], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[ 1.6296e-02, -1.0147e-02, 2.2263e-02, ..., 1.8875e-02, + -4.6844e-03, -1.2360e-02], + [ 1.4534e-02, -1.0414e-02, -2.5024e-02, ..., -1.7578e-02, + -3.4729e-02, -2.3346e-02], + [-2.1347e-02, 2.1301e-02, 3.8509e-03, ..., 8.6441e-03, + 1.4847e-02, -6.3400e-03], + ..., + [-4.7302e-03, -2.5574e-02, 7.4959e-03, ..., 3.6087e-03, + 1.5732e-02, -2.2202e-02], + [-6.3324e-04, 8.2550e-03, -1.3161e-02, ..., 5.1918e-03, + 2.1324e-03, 1.3359e-02], + [-5.0240e-03, 4.4479e-03, -1.5625e-02, ..., 1.8707e-02, + -4.8995e-05, 1.2718e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.2683, -0.3921, -0.3276, ..., -0.3716, -0.2025, -0.3127], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[-0.0052, -0.0143, 0.0135, ..., 0.0038, 0.0296, -0.0021], + [ 0.0280, -0.0070, -0.0129, ..., -0.0207, 0.0208, 0.0257], + [ 0.0018, -0.0234, 0.0009, ..., 0.0089, -0.0099, -0.0107], + ..., + [ 0.0084, -0.0017, 0.0058, ..., -0.0016, -0.0057, -0.0010], + [ 0.0047, -0.0170, -0.0032, ..., 0.0134, -0.0184, 0.0449], + [-0.0015, -0.0398, -0.0143, ..., -0.0135, 0.0247, 0.0222]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0358, 0.0342, 0.0543, ..., 0.0743, -0.0069, 0.0033], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([2.5167, 2.3499, 2.4777, ..., 0.5123, 2.0356, 2.4509], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.2338, -0.0299, 0.1534, ..., 0.4063, 0.7359, -0.2059], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[ 0.0072, -0.0024, -0.0093, ..., 0.0206, -0.0025, -0.0194], + [ 0.0068, 0.0083, 0.0218, ..., -0.0244, -0.0298, 0.0023], + [ 0.0273, 0.0015, -0.0178, ..., -0.0870, 0.0066, -0.0008], + ..., + [ 0.0076, -0.0371, -0.0106, ..., 0.0053, -0.0015, -0.0093], + [-0.0015, 0.0093, -0.0339, ..., -0.0024, 0.0004, -0.0021], + [ 0.0035, -0.0088, 0.0025, ..., -0.0005, 0.0056, -0.0153]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([ 0.0615, 0.0997, -0.5298, ..., 0.0029, -0.0045, -0.0547], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[ 0.0042, -0.0015, 0.0274, ..., 0.0097, -0.0203, 0.0125], + [-0.0057, 0.0277, 0.0067, ..., 0.0058, -0.0193, 0.0007], + [-0.0005, 0.0042, 0.0217, ..., 0.0109, 0.0060, 0.0009], + ..., + [ 0.0093, -0.0028, -0.0129, ..., 0.0005, 0.0210, -0.0072], + [ 0.0155, 0.0005, 0.0134, ..., -0.0217, -0.0046, 0.0098], + [ 0.0043, -0.0210, -0.0279, ..., -0.0082, -0.0022, 0.0044]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0168, 0.0077, -0.0467, ..., 0.0064, -0.0126, -0.0271], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.4813, 1.5299, 1.5828, ..., 1.5154, 1.4352, 1.5897], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.1628, -0.0896, -0.0374, ..., -0.0098, -0.0610, -0.1625], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[-0.0061, -0.0101, 0.0220, ..., 0.0076, -0.0179, -0.0062], + [ 0.0287, 0.0189, 0.0143, ..., -0.0079, 0.0128, -0.0096], + [-0.0176, 0.0025, -0.0220, ..., -0.0191, -0.0070, -0.0005], + ..., + [ 0.0012, -0.0170, -0.0051, ..., -0.0094, -0.0273, 0.0126], + [ 0.0056, -0.0026, 0.0170, ..., 0.0264, -0.0188, -0.0084], + [ 0.0042, 0.0020, 0.0170, ..., -0.0107, -0.0194, -0.0005]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.2793, -0.3450, -0.2959, ..., -0.1840, -0.1981, -0.2493], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[-1.0500e-03, -1.4481e-02, 9.7084e-04, ..., 3.5362e-03, + -7.4148e-04, 2.0218e-02], + [ 1.8265e-02, -1.7059e-02, -6.9523e-04, ..., -7.4577e-04, + -8.8272e-03, -1.0271e-03], + [ 6.8474e-03, 8.5602e-03, -2.2079e-02, ..., 1.6556e-02, + -1.0653e-03, -2.1194e-02], + ..., + [ 2.0035e-02, -9.2239e-03, 1.4229e-02, ..., -6.3858e-03, + -7.1640e-03, -2.1927e-02], + [ 1.5144e-02, -9.1791e-06, 7.7324e-03, ..., -7.3395e-03, + 3.1433e-03, 9.2697e-03], + [ 7.2021e-03, 2.0950e-02, 8.4610e-03, ..., 9.9106e-03, + -2.2316e-03, -6.6261e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([0.0066, 0.0104, 0.0044, ..., 0.0064, 0.0797, 0.0699], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([2.6710, 2.5634, 2.7691, ..., 0.6788, 2.2533, 2.7433], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0355, 0.2774, 0.4173, ..., 0.5667, 0.5320, -0.4676], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[ 0.0145, -0.0110, 0.0256, ..., -0.0594, -0.0049, 0.0130], + [-0.0188, -0.0083, -0.0112, ..., 0.0231, -0.0353, 0.0120], + [ 0.0106, 0.0018, -0.0003, ..., 0.0022, 0.0061, 0.0072], + ..., + [ 0.0017, 0.0005, 0.0002, ..., 0.0049, -0.0219, -0.0394], + [ 0.0120, 0.0053, -0.0002, ..., -0.0002, 0.0002, -0.0100], + [-0.0057, 0.0138, 0.0124, ..., -0.0036, -0.0128, 0.0019]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.2327, 0.0399, -0.0326, ..., -0.0056, 0.0197, 0.0396], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[ 0.0253, 0.0045, 0.0054, ..., 0.0114, -0.0120, 0.0098], + [ 0.0248, -0.0201, -0.0091, ..., -0.0043, -0.0027, 0.0147], + [ 0.0056, 0.0186, -0.0143, ..., -0.0139, -0.0035, -0.0077], + ..., + [ 0.0012, -0.0047, -0.0184, ..., 0.0032, -0.0123, 0.0104], + [ 0.0084, -0.0137, 0.0252, ..., 0.0189, -0.0143, 0.0102], + [-0.0266, -0.0148, -0.0076, ..., 0.0242, -0.0059, 0.0166]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0083, 0.0090, -0.0957, ..., -0.0067, 0.0007, -0.0046], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.5726, 1.4541, 1.5816, ..., 1.7312, 1.4169, 1.5937], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.1270, -0.2203, -0.0099, ..., -0.0846, -0.0867, -0.1574], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[-0.0049, 0.0010, -0.0008, ..., 0.0092, -0.0068, -0.0089], + [-0.0189, 0.0112, -0.0008, ..., -0.0095, -0.0150, 0.0131], + [-0.0017, -0.0340, 0.0049, ..., -0.0096, 0.0049, -0.0091], + ..., + [ 0.0015, -0.0103, -0.0238, ..., -0.0044, -0.0164, -0.0042], + [ 0.0059, -0.0020, -0.0025, ..., 0.0057, 0.0186, 0.0068], + [ 0.0137, 0.0040, -0.0026, ..., -0.0155, 0.0179, -0.0174]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.1598, -0.3298, -0.3064, ..., -0.3005, -0.3159, -0.1328], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[-5.0659e-03, -1.4755e-02, 2.9678e-02, ..., -1.4786e-02, + 3.0472e-02, 1.2962e-02], + [ 8.6517e-03, -2.0859e-02, -1.2672e-02, ..., 1.1673e-02, + -2.9373e-02, 4.4823e-03], + [-1.6266e-02, 4.0253e-02, -6.0081e-03, ..., 2.8193e-05, + -1.5068e-02, -1.5480e-02], + ..., + [-1.2833e-02, 1.1993e-02, -5.6553e-04, ..., 5.5046e-03, + 1.6586e-02, -7.4272e-03], + [ 2.9144e-02, 8.1482e-03, -1.4267e-02, ..., 2.8549e-02, + 3.4962e-03, -7.6218e-03], + [ 2.3270e-02, -1.7654e-02, -1.4374e-02, ..., -3.7155e-03, + -1.8509e-02, -3.0289e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0115, 0.0317, -0.0131, ..., -0.0563, -0.0150, 0.0325], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([2.7785, 2.7386, 2.7390, ..., 0.8678, 2.4946, 2.8710], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.1475, -0.0199, 0.2092, ..., 0.4587, 0.5408, -0.2745], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[-0.0120, -0.0190, -0.0403, ..., -0.0117, 0.0023, 0.0093], + [ 0.0185, 0.0156, 0.0064, ..., -0.0211, 0.0304, 0.0128], + [-0.0041, -0.0232, -0.0050, ..., -0.0144, -0.0013, 0.0115], + ..., + [-0.0279, -0.0333, 0.0062, ..., -0.0130, -0.0025, 0.0134], + [ 0.0011, 0.0101, 0.0281, ..., -0.0020, 0.0121, 0.0017], + [-0.0162, 0.0049, -0.0176, ..., 0.0049, 0.0010, -0.0232]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.0367, -0.0407, -0.0178, ..., 0.0190, 0.0422, 0.0333], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[-0.0003, -0.0005, -0.0012, ..., 0.0035, 0.0030, -0.0334], + [-0.0137, -0.0057, 0.0201, ..., -0.0077, -0.0303, -0.0032], + [-0.0225, -0.0056, 0.0133, ..., 0.0120, -0.0059, -0.0108], + ..., + [ 0.0131, 0.0180, -0.0046, ..., 0.0325, -0.0201, -0.0211], + [-0.0181, 0.0204, -0.0102, ..., -0.0033, 0.0038, -0.0071], + [-0.0171, -0.0187, 0.0197, ..., 0.0140, -0.0235, -0.0155]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0430, 0.0264, -0.0948, ..., -0.0741, -0.0225, -0.0398], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.6421, 1.5236, 1.6723, ..., 1.8496, 1.4900, 1.6385], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0308, -0.3167, -0.0299, ..., -0.0403, -0.0753, -0.2397], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[-0.0262, -0.0157, 0.0032, ..., -0.0235, -0.0293, -0.0457], + [ 0.0032, -0.0053, 0.0187, ..., 0.0109, 0.0186, 0.0035], + [ 0.0222, -0.0173, 0.0017, ..., -0.0083, 0.0020, 0.0136], + ..., + [ 0.0419, 0.0184, -0.0148, ..., 0.0049, 0.0197, -0.0116], + [ 0.0028, -0.0195, 0.0098, ..., 0.0172, -0.0162, -0.0341], + [ 0.0033, -0.0130, -0.0162, ..., 0.0044, -0.0219, -0.0079]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.3088, -0.1615, -0.2668, ..., -0.2515, -0.2261, -0.2349], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[-0.0020, -0.0129, -0.0182, ..., 0.0192, -0.0327, -0.0179], + [ 0.0028, -0.0038, -0.0319, ..., -0.0060, 0.0162, 0.0273], + [ 0.0273, -0.0120, -0.0101, ..., -0.0013, -0.0036, -0.0072], + ..., + [ 0.0023, -0.0175, -0.0372, ..., 0.0134, 0.0053, 0.0087], + [ 0.0148, -0.0124, -0.0037, ..., 0.0165, -0.0067, 0.0085], + [-0.0011, 0.0023, 0.0334, ..., -0.0055, -0.0134, 0.0073]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.1197, -0.0195, 0.1592, ..., -0.0289, -0.0276, -0.0573], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([3.1758, 3.2323, 3.2080, ..., 1.1438, 2.6186, 3.2341], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.3706, 0.0532, -0.3175, ..., 0.1710, 0.3347, -0.2024], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[ 2.1271e-02, 1.1780e-02, -6.7997e-04, ..., 6.0921e-03, + -3.5896e-03, -7.0524e-04], + [ 5.5809e-03, -1.0429e-02, -1.2751e-03, ..., 2.9259e-03, + 3.8683e-05, 2.1801e-03], + [-2.0950e-02, 3.2496e-04, -2.4063e-02, ..., -6.4819e-02, + -3.2349e-03, -4.4746e-03], + ..., + [ 1.0742e-02, -3.3913e-03, -1.1414e-02, ..., -2.3003e-03, + 2.1942e-02, 2.1652e-02], + [-1.3786e-02, 1.0185e-02, -4.3068e-03, ..., 9.9850e-04, + 7.5111e-03, 2.2797e-02], + [ 4.1842e-05, 1.5434e-02, -5.5361e-04, ..., 8.8730e-03, + 1.2108e-02, 1.5915e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([ 0.0933, -0.0383, -0.0063, ..., 0.0316, 0.0111, -0.0547], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[ 0.0098, -0.0170, 0.0142, ..., -0.0043, -0.0189, -0.0120], + [-0.0104, 0.0156, -0.0006, ..., -0.0124, 0.0124, -0.0015], + [ 0.0062, -0.0193, 0.0082, ..., 0.0038, 0.0258, -0.0131], + ..., + [ 0.0200, 0.0005, -0.0168, ..., -0.0123, -0.0038, 0.0042], + [ 0.0073, 0.0172, -0.0105, ..., 0.0243, 0.0107, 0.0142], + [ 0.0281, 0.0236, 0.0068, ..., -0.0044, -0.0162, -0.0067]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.1116, -0.0343, -0.0087, ..., -0.0480, -0.0558, -0.0237], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.5671, 1.5268, 1.5659, ..., 0.7991, 1.4337, 1.7072], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0451, -0.2257, -0.1079, ..., 0.0651, -0.1020, -0.2032], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[-0.0205, -0.0153, 0.0058, ..., 0.0221, 0.0021, -0.0075], + [-0.0245, 0.0084, 0.0146, ..., -0.0019, 0.0069, -0.0050], + [ 0.0138, 0.0011, 0.0013, ..., 0.0371, 0.0046, 0.0199], + ..., + [ 0.0202, 0.0034, 0.0115, ..., -0.0043, -0.0131, -0.0177], + [ 0.0054, 0.0023, 0.0239, ..., -0.0273, 0.0084, -0.0098], + [ 0.0170, 0.0049, -0.0137, ..., -0.0036, -0.0199, 0.0079]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.2671, -0.2556, -0.2236, ..., -0.2886, -0.2778, -0.0958], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[-0.0092, -0.0169, -0.0232, ..., 0.0134, -0.0171, -0.0194], + [-0.0089, 0.0153, -0.0176, ..., -0.0238, 0.0060, -0.0118], + [ 0.0023, -0.0021, 0.0147, ..., -0.0046, 0.0072, 0.0077], + ..., + [-0.0114, -0.0102, 0.0014, ..., 0.0213, 0.0042, -0.0110], + [ 0.0055, -0.0080, -0.0157, ..., -0.0074, -0.0366, 0.0046], + [ 0.0316, 0.0016, 0.0092, ..., -0.0057, -0.0119, -0.0157]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0195, -0.0715, 0.1113, ..., -0.1528, 0.0856, -0.0403], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([3.1596, 3.0366, 2.9180, ..., 1.5081, 2.5528, 3.1860], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.2221, 0.6211, -0.4953, ..., 0.2567, 0.0674, -0.4556], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[ 0.0018, 0.0040, -0.0024, ..., 0.0355, -0.0060, -0.0008], + [ 0.0084, 0.0003, -0.0255, ..., 0.0059, 0.0073, 0.0152], + [-0.0014, -0.0324, -0.0615, ..., 0.0012, -0.0250, 0.0104], + ..., + [ 0.0085, -0.0075, 0.0259, ..., -0.0010, 0.0261, -0.0071], + [-0.0081, 0.0082, 0.0233, ..., 0.0083, -0.0072, 0.0106], + [-0.0114, 0.0072, 0.0143, ..., -0.0098, -0.0095, 0.0079]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.4348, -0.0032, -0.1772, ..., -0.0727, 0.0069, -0.1202], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[ 1.5135e-03, 2.1210e-02, -7.4272e-03, ..., -6.0768e-03, + -2.6584e-04, 1.1932e-02], + [ 1.6260e-04, 3.7441e-03, -1.4343e-02, ..., 1.8158e-02, + -1.2825e-02, -8.9407e-07], + [ 7.9193e-03, -1.8036e-02, 1.1223e-02, ..., -3.8147e-02, + -2.9087e-03, -5.4131e-03], + ..., + [-1.4465e-02, 1.2436e-02, -1.3103e-03, ..., -8.3694e-03, + 1.7273e-02, -9.1934e-04], + [-3.6774e-03, 3.2272e-03, 2.3682e-02, ..., -4.9706e-03, + 7.7705e-03, -1.4359e-02], + [ 1.0548e-03, -1.9181e-04, -1.6556e-02, ..., -1.1215e-02, + -6.9504e-03, -1.4145e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0270, -0.0575, 0.0640, ..., -0.1500, 0.0551, -0.1482], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.5692, 1.8371, 1.9328, ..., 0.8499, 1.4697, 1.7667], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([0.3892, 0.2379, 0.1540, ..., 0.6268, 0.2169, 0.1550], device='cuda:1', + requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[-0.0026, 0.0020, -0.0059, ..., 0.0052, -0.0039, -0.0026], + [ 0.0157, -0.0013, -0.0331, ..., 0.0240, -0.0025, -0.0044], + [ 0.0204, 0.0067, -0.0038, ..., -0.0049, 0.0252, -0.0015], + ..., + [ 0.0077, -0.0031, 0.0343, ..., -0.0061, 0.0099, -0.0152], + [-0.0005, 0.0071, -0.0140, ..., -0.0133, -0.0071, 0.0009], + [-0.0012, 0.0004, 0.0090, ..., -0.0095, -0.0076, -0.0047]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.3181, -0.2350, -0.2834, ..., -0.2498, -0.1847, -0.2732], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[ 0.0366, 0.0088, -0.0167, ..., -0.0147, -0.0033, 0.0076], + [ 0.0050, 0.0103, 0.0122, ..., 0.0073, -0.0067, -0.0138], + [-0.0014, 0.0265, 0.0125, ..., -0.0273, 0.0040, -0.0028], + ..., + [ 0.0138, 0.0236, -0.0157, ..., 0.0255, -0.0269, -0.0320], + [ 0.0021, -0.0071, 0.0076, ..., -0.0042, 0.0137, -0.0034], + [-0.0111, 0.0175, -0.0121, ..., -0.0294, -0.0013, -0.0084]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.1592, -0.0064, 0.1971, ..., 0.0551, -0.0191, 0.0068], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([2.5678, 2.3816, 2.5756, ..., 1.8250, 2.4113, 2.7505], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.1836, 0.3324, -0.2291, ..., -0.1089, 0.5930, -0.2813], + device='cuda:1', requires_grad=True) +torch.Size([3072, 1024]) +Parameter containing: +tensor([[ 0.0123, -0.0058, 0.0023, ..., 0.0007, 0.0222, -0.0039], + [-0.0170, 0.0098, 0.0225, ..., -0.0176, -0.0123, -0.0115], + [-0.0039, -0.0008, 0.0077, ..., 0.0050, 0.0103, -0.0090], + ..., + [ 0.0026, 0.0136, -0.0086, ..., 0.0001, -0.0151, -0.0018], + [ 0.0049, 0.0023, -0.0202, ..., 0.0277, 0.0162, -0.0295], + [-0.0040, 0.0099, -0.0184, ..., -0.0405, -0.0316, -0.0159]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([ 0.0484, -0.0519, -1.9238, ..., 0.1216, -0.0272, 0.0114], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 1024]) +Parameter containing: +tensor([[-1.9836e-02, 4.5563e-02, -5.3253e-03, ..., 7.5989e-03, + 7.5607e-03, 2.2369e-02], + [ 2.1725e-03, 1.5160e-02, -1.8494e-02, ..., 6.2218e-03, + -9.6977e-05, 1.4214e-02], + [-7.4997e-03, 1.7151e-02, -1.4481e-02, ..., -2.2156e-02, + 1.0445e-02, 9.1171e-03], + ..., + [-2.1515e-02, -1.4336e-02, -3.9558e-03, ..., 2.7351e-03, + -3.2997e-03, 2.3087e-02], + [ 1.9348e-02, 1.7441e-02, 4.3488e-03, ..., 6.1913e-03, + -1.8509e-02, 2.2385e-02], + [ 1.6891e-02, 7.5951e-03, -1.6037e-02, ..., -4.5509e-03, + 6.0081e-03, 1.7471e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.2128, 0.1381, 0.1891, ..., 0.0071, 0.0607, -0.0499], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.5486, 1.4414, 1.4516, ..., 0.8932, 1.4491, 1.5433], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0080, -0.0616, -0.0676, ..., -0.0960, -0.1752, -0.1096], + device='cuda:1', requires_grad=True) +torch.Size([4096, 1024]) +Parameter containing: +tensor([[ 0.0178, -0.0028, -0.0424, ..., -0.0160, -0.0282, 0.0149], + [ 0.0117, 0.0109, -0.0199, ..., 0.0012, 0.0099, -0.0078], + [-0.0045, -0.0047, -0.0147, ..., -0.0005, -0.0079, -0.0103], + ..., + [-0.0171, -0.0022, -0.0138, ..., 0.0246, -0.0203, -0.0171], + [ 0.0086, 0.0179, -0.0107, ..., -0.0160, -0.0177, -0.0097], + [ 0.0428, -0.0029, -0.0069, ..., -0.0147, 0.0129, 0.0242]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([4096]) +Parameter containing: +tensor([-0.2034, -0.6177, -0.2632, ..., -0.2834, -0.4905, -0.3958], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024, 4096]) +Parameter containing: +tensor([[-0.0127, -0.0107, 0.0178, ..., -0.0069, -0.0052, 0.0050], + [ 0.0019, -0.0100, -0.0021, ..., 0.0078, 0.0061, -0.0132], + [-0.0086, -0.0003, 0.0180, ..., 0.0017, 0.0049, 0.0218], + ..., + [-0.0206, -0.0111, -0.0025, ..., -0.0035, 0.0097, 0.0248], + [ 0.0121, -0.0078, 0.0101, ..., -0.0093, 0.0092, -0.0375], + [ 0.0031, -0.0039, 0.0001, ..., -0.0069, 0.0013, 0.0023]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([ 0.0218, -0.1331, -0.1234, ..., -0.1169, 0.0630, 0.0916], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([1.6224, 1.6137, 1.6369, ..., 1.4513, 1.7169, 1.8505], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0204, -0.0891, 0.0739, ..., 0.0297, 0.1517, -0.2596], + device='cuda:1', requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([0.9374, 1.0217, 0.9349, ..., 0.8221, 1.0595, 1.0507], device='cuda:1', + requires_grad=True) +torch.Size([1024]) +Parameter containing: +tensor([-0.0061, 0.1510, -0.0549, ..., 0.2748, 0.0765, 0.0091], + device='cuda:1', requires_grad=True) +torch.Size([77, 768]) +Parameter containing: +tensor([[ 0.0016, 0.0020, 0.0002, ..., -0.0013, 0.0008, 0.0015], + [ 0.0042, 0.0029, 0.0002, ..., 0.0010, 0.0015, -0.0012], + [ 0.0018, 0.0007, -0.0012, ..., -0.0029, -0.0009, 0.0026], + ..., + [ 0.0216, 0.0055, -0.0101, ..., -0.0065, -0.0029, 0.0037], + [ 0.0188, 0.0073, -0.0077, ..., -0.0025, -0.0009, 0.0057], + [ 0.0330, 0.0281, 0.0289, ..., 0.0160, 0.0102, -0.0310]], + device='cuda:1', requires_grad=True) +torch.Size([768, 768]) +Parameter containing: +tensor([[-0.0109, 0.0096, -0.0035, ..., -0.0010, 0.0115, -0.0039], + [-0.0054, -0.0049, 0.0055, ..., 0.0239, 0.0171, -0.0071], + [ 0.0032, 0.0101, -0.0155, ..., 0.0070, -0.0119, -0.0098], + ..., + [-0.0112, 0.0009, 0.0023, ..., -0.0169, -0.0096, -0.0147], + [ 0.0080, 0.0086, 0.0201, ..., -0.0108, -0.0191, 0.0043], + [-0.0168, -0.0018, -0.0156, ..., 0.0095, 0.0383, 0.0007]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([2304, 768]) +Parameter containing: +tensor([[ 0.0139, 0.0147, -0.0089, ..., -0.0349, -0.0042, -0.0188], + [-0.0586, -0.0059, -0.0179, ..., 0.0012, -0.0068, 0.0254], + [-0.0211, -0.0321, 0.0308, ..., -0.0189, 0.0091, 0.0066], + ..., + [-0.0217, -0.0089, -0.0143, ..., -0.0153, 0.0053, 0.0016], + [-0.0086, -0.0083, -0.0049, ..., 0.0208, -0.0048, -0.0041], + [-0.0087, -0.0024, 0.0105, ..., -0.0037, -0.0148, 0.0030]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([2304]) +Parameter containing: +tensor([-0.2406, 0.1490, 0.4639, ..., -0.0241, 0.0349, -0.0144], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 768]) +Parameter containing: +tensor([[ 3.2120e-03, 5.4474e-03, -1.3733e-02, ..., -8.3351e-04, + -6.3782e-03, 1.3786e-02], + [-9.3231e-03, 9.2888e-04, -1.3893e-02, ..., -1.1345e-02, + 1.7748e-03, -8.9569e-03], + [ 5.6648e-04, 1.0345e-02, 8.1718e-05, ..., 1.3893e-02, + 7.3791e-05, 6.1369e-04], + ..., + [-3.0212e-02, 3.7193e-03, 1.2009e-02, ..., 7.0229e-03, + 8.0566e-03, 1.4572e-02], + [ 6.4421e-04, -1.0941e-02, -6.3133e-03, ..., 5.6953e-03, + -7.6637e-03, -2.9297e-03], + [-4.3526e-03, 4.7607e-03, -6.6528e-03, ..., 7.3853e-03, + 4.3716e-03, 7.4348e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([-6.8054e-02, -3.2990e-02, 4.1809e-02, -1.0217e-01, -5.4932e-03, + 4.2877e-03, -3.8681e-03, -4.8370e-02, -2.2751e-02, -2.1248e-03, + 1.4526e-02, -3.3607e-03, 4.7058e-02, 1.4565e-02, -6.0730e-02, + -1.1035e-01, -2.3251e-03, 2.5635e-02, -6.1523e-02, -1.0469e+00, + -3.0869e-02, 6.7078e-02, -1.4503e-02, 2.6855e-02, -7.4646e-02, + 9.9121e-02, 1.1932e-02, -3.7598e-02, -2.9980e-01, 7.1533e-02, + -2.9648e-02, -1.3227e-03, 2.5940e-02, 6.6650e-02, 5.8899e-02, + -2.0615e-02, -3.5889e-02, 1.6830e-02, 1.0229e-01, 5.3040e-02, + 3.8280e-03, -2.3972e-02, 1.2512e-01, -4.3579e-02, 4.3335e-02, + 2.9175e-02, -2.9160e-02, -3.9093e-02, -2.2247e-02, 9.2850e-03, + -9.8511e-02, 3.3478e-02, -4.2023e-02, -3.2043e-02, -4.7394e-02, + 1.1938e-01, 5.7709e-02, 1.2903e-01, -2.9327e-02, -1.1314e-02, + 7.3090e-03, 3.3844e-02, 2.8290e-02, -1.6266e-02, 7.0740e-02, + -7.3486e-02, 5.4413e-02, -6.0120e-03, 5.1651e-03, -1.4600e-01, + -3.6896e-02, 2.1088e-02, 3.6914e-01, 9.0393e-02, -5.8517e-03, + 8.8318e-02, -8.3847e-03, 9.6512e-03, -3.3783e-02, -4.0710e-02, + 3.9703e-02, -2.7776e-04, -3.7262e-02, 8.9539e-02, -7.3853e-02, + -3.6743e-02, 1.1426e-01, 5.7335e-03, 6.6589e-02, 3.0502e-02, + 2.4170e-02, 6.7017e-02, -4.7363e-02, 1.1696e-02, -5.0568e-02, + 3.9001e-02, 1.2695e-02, -3.1647e-02, -4.1016e-02, -6.2683e-02, + -2.7084e-02, -2.3511e-01, 2.4002e-02, 1.0413e-01, 1.2520e-02, + 1.4908e-02, -8.5693e-02, -6.4575e-02, -2.4414e-02, -4.9408e-02, + 3.0045e-02, 2.5436e-02, -4.7333e-02, -3.4576e-02, -2.5772e-02, + 2.5345e-02, 7.3669e-02, 2.6398e-02, -1.2976e-01, 5.1544e-02, + 6.9199e-03, -6.0028e-02, -8.6792e-02, 1.3252e-02, 1.9196e-02, + -1.3283e-02, 1.0910e-02, 3.8025e-02, 7.4120e-03, -2.3865e-02, + -3.4882e-02, 4.7731e-04, -7.3059e-02, -1.1017e-02, -5.8685e-02, + -2.5238e-02, -2.3773e-02, 5.0201e-02, -2.6428e-02, -5.1361e-02, + -7.4219e-02, 4.5624e-02, 5.3192e-02, 1.3208e-01, 4.1931e-02, + 1.5083e-02, -1.1676e-01, 7.9895e-02, 6.4209e-02, 1.0178e-02, + 6.6681e-03, 8.0490e-03, -2.8870e-02, -6.2790e-03, -4.5357e-03, + -7.2266e-02, -6.2744e-02, -4.0955e-02, 1.5533e-02, -2.6749e-02, + 2.5845e-03, -8.1787e-02, -1.1185e-02, 1.2634e-01, -2.3071e-02, + -2.1301e-02, 5.0415e-02, 1.6006e-02, 1.5850e-03, -7.7362e-03, + -2.8809e-02, -1.2871e-02, -1.6708e-02, -1.0777e-03, 2.6367e-02, + -7.3395e-03, -1.2238e-02, 2.3804e-02, -1.8433e-02, 5.7640e-03, + -3.2379e-02, -2.2598e-02, 1.7105e-02, 2.0096e-02, -6.7871e-02, + 3.6926e-02, -3.5248e-02, 1.6699e-01, 4.4495e-02, 1.0643e-02, + 6.1829e-02, -5.8960e-02, -2.0401e-02, 1.4259e-02, 1.8372e-02, + 1.3344e-02, 1.5945e-02, 1.5259e-02, 2.3511e-01, 5.3436e-02, + -2.8702e-02, -3.5767e-02, -7.1533e-02, 2.8320e-02, -3.3447e-02, + -4.7516e-02, -2.3035e-01, -9.4757e-03, 1.0022e-01, -4.8004e-02, + 4.8248e-02, 5.3329e-03, -1.3863e-02, 2.1835e-02, -1.0999e-01, + 4.5776e-02, 6.6772e-02, 1.4633e-02, -7.9956e-02, -2.9129e-02, + 7.8003e-02, 8.9050e-02, 1.2866e-01, 2.3392e-02, 3.8666e-02, + -1.2779e-02, 1.0010e-01, 5.1361e-02, 2.8229e-02, -2.4048e-02, + -8.2031e-02, 2.1572e-03, 1.1932e-02, 4.0558e-02, 1.6785e-02, + -5.2948e-02, 1.3023e-02, -3.5431e-02, 6.7215e-03, 5.4291e-02, + 1.8219e-02, 6.2439e-02, 9.8724e-03, 1.6693e-02, 3.9520e-02, + 1.7761e-02, -6.9952e-04, -7.2388e-02, -2.2934e-02, -3.5400e-02, + -5.9448e-02, -6.7566e-02, 1.5945e-02, -4.6814e-02, 1.3969e-02, + -1.1818e-02, -3.3112e-02, 1.5488e-03, 8.3618e-02, 2.3468e-02, + 2.1317e-02, -1.1615e-01, 7.6752e-03, 1.2589e-02, 2.4185e-02, + 4.6021e-02, 7.3662e-03, 4.4403e-02, -4.4785e-03, -5.6610e-03, + 4.7180e-02, 8.5144e-03, 2.9205e-02, -3.5370e-02, 6.6956e-02, + -5.9204e-03, 1.6235e-02, 5.1819e-02, -3.0167e-02, -4.4739e-02, + -3.5229e-03, -1.7444e-01, 1.3969e-02, 9.8343e-03, 1.5022e-02, + 2.7435e-02, 1.0309e-01, 6.2141e-03, 6.8848e-02, -3.3936e-02, + -8.7036e-02, -4.8737e-02, -6.1218e-02, -3.6224e-02, -5.8651e-04, + -2.9316e-03, 2.5574e-02, 4.9934e-03, -6.0394e-02, -1.1604e-02, + 6.9641e-02, -2.2621e-03, -3.5405e-04, 9.9121e-02, -5.4199e-02, + 5.6976e-02, 7.6782e-02, 3.0914e-02, 5.4718e-02, 8.3374e-02, + -5.9776e-03, 1.4679e-02, -8.5449e-02, -2.2125e-02, 1.2827e-03, + 2.2003e-02, -1.9577e-02, 1.0840e-01, 1.0445e-02, -3.1006e-02, + -1.0608e-01, -2.5063e-03, -3.9398e-02, 4.1473e-02, 9.4531e-01, + -1.7480e-01, 2.6947e-02, 9.5459e-02, -9.0866e-03, -1.5762e-02, + 9.8022e-02, 4.6143e-02, 2.5925e-02, -1.1609e-01, -2.6764e-02, + -3.0731e-02, -6.2469e-02, -2.6154e-02, 1.9211e-02, -1.2093e-02, + 2.5696e-02, 2.4597e-02, 3.1036e-02, -3.3356e-02, -3.4210e-02, + -6.2656e-04, 1.9779e-03, 3.5645e-02, 1.4175e-02, 6.9763e-02, + 3.1395e-03, 1.4519e-02, -7.8506e-03, 1.6876e-02, 1.1185e-02, + -2.8137e-02, -8.9233e-02, 7.1899e-02, 9.4528e-03, 1.0254e-02, + -2.3453e-02, -4.9194e-02, 5.1880e-02, -3.7750e-02, -2.7008e-03, + -2.6794e-02, 3.9001e-02, 2.9114e-02, 9.4360e-02, -3.6469e-02, + 8.0322e-02, -2.8580e-02, -7.2327e-02, 7.2632e-02, 6.1340e-02, + -9.0576e-02, -5.8823e-03, 3.3722e-02, -1.2524e-01, -2.5284e-02, + -5.0812e-03, -5.8228e-02, -1.0323e-02, -5.1971e-02, 1.4854e-02, + 6.3660e-02, -2.4357e-03, 6.1676e-02, 1.5701e-02, -6.9763e-02, + -5.1918e-03, 1.1115e-01, 9.1370e-02, -3.6392e-03, -5.7648e-02, + 3.8075e-04, 1.0559e-02, 6.3477e-02, -3.2623e-02, -3.0762e-02, + -5.0697e-03, -5.0171e-02, 7.5806e-02, 3.7018e-02, 1.3802e-02, + 6.2317e-02, 7.5317e-02, 6.1707e-02, 9.2346e-02, -1.6541e-02, + -4.5349e-02, -1.5888e-03, 8.4412e-02, -1.1151e-01, 1.6040e-01, + -4.4531e-01, -2.8351e-02, 7.9498e-03, 6.2927e-02, 3.2043e-02, + 6.7627e-02, 6.7810e-02, -2.2354e-02, -6.8848e-02, -2.7359e-02, + -1.2466e-02, -1.7288e-02, 3.1219e-02, -1.2909e-02, -7.4768e-02, + -8.1726e-02, 6.1676e-02, -2.0020e-02, -3.9597e-03, 1.9165e-01, + -5.9296e-02, 1.4763e-02, 2.3895e-02, 9.0332e-03, 1.3268e-02, + -2.5528e-02, 3.5217e-02, -2.2583e-02, 1.4984e-02, 1.0956e-02, + -3.8223e-03, -3.0579e-02, 2.8114e-03, -5.1056e-02, 8.6426e-02, + -3.9795e-02, -1.8005e-02, -9.5886e-02, 1.1017e-02, -6.2225e-02, + -3.1982e-02, 5.2765e-02, 3.7811e-02, 3.0155e-03, -3.3447e-02, + 2.3098e-03, 3.8300e-02, -7.0724e-03, 2.4094e-02, 3.9856e-02, + 5.1003e-03, -2.6169e-02, 1.3672e-02, 1.8967e-02, -7.4829e-02, + -2.9785e-02, -1.1353e-01, 8.1787e-03, -1.0760e-01, 1.2680e-02, + -9.7733e-03, -7.5684e-02, 4.1504e-03, 1.5175e-02, -5.3925e-02, + -6.9885e-03, -8.6731e-02, -1.7380e-02, -4.2175e-02, -3.3630e-02, + -7.9041e-02, -9.5886e-02, 9.3384e-02, -1.5327e-02, 1.0315e-02, + 7.7896e-03, -2.7298e-02, -3.5278e-02, -2.7573e-02, -6.9214e-02, + 1.7685e-02, 7.1106e-03, 1.8295e-02, 3.5522e-02, 3.6438e-02, + 3.7842e-02, -8.8959e-03, 3.0457e-02, -2.2018e-02, 6.5918e-02, + 1.4091e-02, -8.5144e-02, 3.9093e-02, -3.1250e-02, 3.2898e-02, + 4.5349e-02, -4.8981e-03, -2.3346e-02, -1.4252e-02, -3.4973e-02, + -7.4959e-03, 1.8967e-02, 8.8043e-03, 1.5701e-02, -3.9612e-02, + -8.4610e-03, -7.1411e-02, -1.5762e-02, 1.0944e-01, -4.9042e-02, + 1.1520e-02, -5.0964e-02, -5.5511e-02, 7.5245e-04, -2.2736e-02, + 1.3863e-02, -9.8938e-02, -5.9631e-02, -1.8616e-02, 1.4084e-02, + 2.2812e-02, 3.1342e-02, 2.8580e-02, 2.3155e-03, -5.0201e-02, + 2.8488e-02, 3.7354e-02, 2.1378e-02, -3.1708e-02, 9.5703e-02, + -7.6050e-02, 5.0781e-02, 7.0915e-03, 5.2368e-02, -2.5894e-02, + -2.5925e-02, -3.4943e-02, 2.7786e-02, 1.8723e-02, 5.9296e-02, + 1.9211e-02, 4.8889e-02, 6.6772e-02, -4.4952e-02, -2.7298e-02, + 3.8567e-03, -1.3252e-02, -6.6467e-02, 1.8066e-02, -3.2288e-02, + 5.1239e-02, -5.8075e-02, -5.9509e-02, 1.2122e-01, 2.5482e-02, + 9.1003e-02, 6.6467e-02, -3.9154e-02, -2.9022e-02, -4.0100e-02, + 3.3295e-02, -3.6469e-02, -5.4413e-02, 4.5258e-02, -4.5929e-02, + -1.8219e-02, -6.0638e-02, 1.9638e-02, 5.7602e-03, -3.3234e-02, + -2.8839e-03, 3.9215e-02, -4.5990e-02, 4.1229e-02, 6.1951e-03, + 5.2734e-02, 4.0497e-02, -7.1594e-02, -6.1554e-02, -4.0253e-02, + -6.9199e-03, 2.4307e-02, 4.0863e-02, -9.3307e-03, -4.0527e-02, + 3.4088e-02, 1.7509e-03, 2.4307e-02, -4.4983e-02, 4.6875e-02, + -7.0496e-02, 8.0872e-02, 3.1891e-02, -3.9825e-02, 3.0853e-02, + 6.2195e-02, 6.9885e-02, 7.8430e-02, 5.4398e-03, -7.7209e-02, + -2.5879e-02, 5.3528e-02, 1.0399e-02, 2.3384e-03, 1.1877e-01, + 7.5500e-02, -1.1719e-02, -3.9795e-02, -3.1143e-02, -1.6998e-02, + -3.6163e-02, -2.7847e-02, 5.0812e-03, -2.8656e-02, 3.3203e-02, + 6.3049e-02, -2.5665e-02, -5.2490e-02, 4.9500e-02, 5.5054e-02, + -2.0462e-02, 3.9825e-02, 5.5908e-02, 3.5583e-02, 6.1066e-02, + 1.4175e-02, -5.0751e-02, -6.8848e-02, -1.0551e-02, -5.2551e-02, + 6.3538e-02, 4.2419e-02, 1.3580e-03, -6.4880e-02, 2.7115e-02, + 2.2491e-02, -1.0284e-01, -5.1208e-02, -9.2468e-03, 3.0960e-02, + 4.0161e-02, 4.1107e-02, 4.2908e-02, -4.8920e-02, -6.6757e-03, + -1.2726e-02, 5.1849e-02, 3.6041e-02, -2.0264e-02, -3.6285e-02, + -2.1423e-02, 4.6692e-03, -1.8871e-04, -3.7018e-02, 4.8615e-02, + -3.8452e-02, -2.3209e-02, 8.7585e-02, -3.8757e-03, 4.6265e-02, + -3.4790e-03, -1.3857e-03, -3.9612e-02, 7.3608e-02, 3.4370e-03, + 3.3798e-03, 3.3374e-01, -1.3329e-02, -1.2596e-02, 1.2451e-02, + 4.9706e-03, -7.4585e-02, -5.8105e-02, 3.9215e-03, 5.8823e-03, + 4.3610e-02, 6.9275e-02, -5.4535e-02, -2.2919e-02, 3.4271e-02, + 8.8013e-02, 1.5236e-02, 1.9028e-02, 2.9572e-02, -4.9362e-03, + -1.6998e-02, 2.4063e-02, -6.8359e-02, -4.0710e-02, -8.0750e-02, + -2.1484e-02, -4.3976e-02, -5.2521e-02, 1.6144e-02, -1.3771e-02, + -7.4615e-03, -8.8318e-02, 3.7750e-02, 2.3937e-03, 4.7668e-02, + 4.7363e-02, 3.9520e-02, -2.2736e-02, 1.9348e-02, 2.7359e-02, + 1.1086e-02, -7.9163e-02, -3.7262e-02, -3.1525e-02, -4.9591e-02, + -5.1056e-02, -3.1830e-02, -5.1575e-02, 6.5491e-02, 2.6031e-02, + 1.2321e-03, 5.7800e-02, 1.0864e-02, 4.7241e-02, 4.1290e-02, + -6.8665e-02, -3.8471e-03, 5.8838e-02, -9.4986e-03, 2.6894e-03, + 5.6854e-02, 7.0862e-02, -3.1311e-02, 1.4397e-02, -7.6065e-03, + -5.2429e-02, -3.7018e-02, -1.4549e-02, 2.1553e-03, 1.0292e-02, + -2.3651e-02, -5.5809e-03, 5.0774e-03, -4.6051e-02, 1.0658e-02, + 8.3847e-03, 2.6440e-01, -9.6741e-02, 8.6365e-02, -1.8860e-02, + 1.4420e-02, 1.4282e-02, -1.6235e-02, -3.0167e-02, 4.7363e-02, + -5.3741e-02, 3.7170e-02, 1.0132e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([1.8401, 1.6507, 1.7948, 1.7800, 1.7653, 1.7676, 1.8600, 1.8236, 1.7518, + 1.7366, 1.7352, 1.7177, 1.7873, 1.7211, 1.6886, 1.7202, 1.7312, 1.7573, + 1.6900, 1.7321, 1.8321, 1.7103, 1.7793, 1.7726, 1.8100, 1.7619, 1.7042, + 1.7339, 0.9977, 1.7922, 1.6763, 1.8729, 1.7377, 1.7911, 1.7570, 3.3400, + 1.7528, 1.8010, 1.7648, 1.7912, 1.8502, 1.7852, 1.8444, 1.7257, 1.8150, + 1.8192, 1.7501, 1.7457, 1.6584, 1.8334, 1.7475, 1.7645, 1.7709, 1.7155, + 1.7535, 1.7625, 1.7556, 1.5602, 1.7653, 1.7121, 1.8275, 1.8177, 1.7712, + 1.8210, 1.7243, 1.6933, 1.7744, 1.6863, 1.8974, 1.7603, 1.5926, 1.7837, + 1.7453, 1.6028, 1.7849, 1.7217, 1.8000, 1.7972, 1.6751, 1.7951, 1.7870, + 1.7679, 1.7635, 1.7367, 1.7646, 1.7491, 1.8091, 1.8008, 1.7609, 1.6659, + 1.7907, 1.7056, 1.8515, 1.8243, 1.7269, 1.7128, 1.7645, 1.8122, 1.8631, + 1.7797, 1.7646, 0.7897, 1.6948, 1.7812, 1.6742, 1.7120, 1.6592, 1.7276, + 1.7695, 1.7760, 1.7505, 1.7188, 1.7696, 1.7601, 1.8497, 1.7613, 1.7714, + 1.7569, 1.6808, 1.7755, 1.7906, 1.7683, 1.7978, 1.7816, 1.7833, 1.7623, + 1.7604, 1.7623, 1.6551, 1.7792, 1.7867, 1.7288, 1.7727, 1.6616, 1.8281, + 1.8197, 1.7334, 1.7790, 1.7913, 1.7861, 1.7655, 1.8017, 1.7967, 1.8031, + 1.7339, 1.7511, 1.7536, 1.7109, 1.8131, 1.7464, 1.7341, 1.8073, 1.8755, + 1.7642, 1.7574, 1.7459, 1.7882, 1.6927, 1.7318, 1.7242, 1.7431, 1.7230, + 1.7328, 1.7952, 1.7462, 1.7525, 1.7464, 1.7703, 1.7193, 1.8042, 1.8187, + 1.7647, 1.7742, 1.6977, 1.7334, 1.7333, 1.7343, 1.7863, 1.6834, 1.7995, + 1.6882, 1.7592, 1.7986, 1.7490, 1.7527, 1.7209, 1.7338, 1.7539, 1.8268, + 1.8150, 1.7164, 1.7983, 1.6627, 1.7415, 1.6635, 1.6967, 1.7464, 1.8372, + 1.7354, 1.7487, 1.7584, 1.7058, 1.6908, 1.7519, 1.7761, 1.5736, 1.2228, + 1.7728, 1.7046, 1.7144, 1.7717, 1.8086, 1.7980, 1.7459, 1.8135, 1.8093, + 1.7349, 1.7571, 1.7937, 1.7435, 1.7194, 1.7525, 1.7763, 1.7040, 1.7347, + 1.8036, 1.7626, 1.7879, 1.7484, 1.7946, 1.7265, 1.7901, 1.7673, 1.7644, + 1.7181, 1.7174, 1.8284, 1.7356, 1.8238, 1.6911, 1.7599, 1.7935, 1.7056, + 1.7744, 1.8074, 1.6990, 1.7394, 0.8243, 1.6952, 1.8240, 1.8201, 1.7702, + 1.8529, 1.4891, 1.8102, 1.7815, 1.7122, 1.7379, 1.7450, 1.7833, 1.7664, + 1.6962, 1.7362, 1.6765, 1.7450, 1.6934, 1.7679, 1.7699, 1.7879, 1.7363, + 1.7685, 1.6439, 1.7475, 1.7023, 1.7775, 1.8394, 1.7594, 1.7362, 1.7523, + 1.7827, 1.7071, 1.8144, 1.6561, 1.7740, 1.7412, 1.6781, 1.7144, 1.7361, + 1.7804, 1.8236, 1.9153, 1.7353, 1.7440, 1.7159, 1.7715, 1.7451, 1.8004, + 1.8078, 1.7116, 1.7427, 1.8237, 1.8078, 1.7338, 1.6746, 1.7068, 1.8114, + 1.7397, 1.7601, 1.7596, 1.6326, 1.5706, 1.8009, 1.6958, 1.7471, 1.7363, + 1.8414, 1.7399, 1.7021, 1.6456, 1.7571, 1.6864, 1.7856, 1.8161, 1.7776, + 1.7265, 1.6645, 1.5918, 1.7744, 1.7140, 1.6622, 1.7617, 1.8093, 1.8087, + 1.7346, 1.7258, 1.6232, 1.7583, 1.8145, 1.7119, 1.7761, 1.7462, 1.6483, + 1.7339, 1.7611, 1.6985, 1.8347, 1.6987, 1.7323, 1.7389, 1.7299, 1.7391, + 1.7672, 1.7488, 1.7839, 1.7665, 1.7631, 1.7777, 1.6339, 1.6938, 1.8157, + 1.7464, 1.6534, 1.7763, 1.7792, 1.7784, 1.7034, 1.6523, 1.7126, 1.7317, + 1.7333, 1.7193, 1.7667, 1.7845, 1.7588, 1.7258, 1.7824, 1.7037, 1.6128, + 1.7279, 1.7288, 1.7944, 1.6676, 1.7848, 1.7242, 1.7464, 1.7064, 1.7405, + 1.7550, 1.7883, 1.6747, 1.8077, 0.6920, 1.7930, 1.6939, 1.5853, 1.7549, + 1.7464, 1.7373, 1.7662, 1.7668, 1.7981, 1.7842, 1.7398, 1.8111, 1.6967, + 1.6228, 1.7450, 1.6924, 1.7361, 1.8465, 1.6907, 1.7601, 1.7693, 1.7728, + 1.7786, 1.9575, 1.7447, 1.7650, 1.7740, 1.7779, 1.7729, 1.6935, 1.8035, + 1.8343, 1.7179, 1.8117, 1.7885, 1.7588, 1.6829, 1.7721, 1.7336, 1.6843, + 1.7179, 1.6835, 1.7717, 1.6974, 1.7616, 1.7698, 1.7382, 1.7673, 1.7327, + 1.7272, 1.7410, 1.8681, 1.7535, 1.6609, 1.7775, 1.7975, 1.6045, 1.6706, + 1.6781, 1.7280, 1.7294, 1.7144, 1.7807, 1.7379, 1.6965, 1.6218, 1.7630, + 1.7662, 1.7186, 1.6750, 1.7716, 1.8142, 1.7681, 1.7514, 1.7375, 1.7470, + 1.6814, 1.7563, 1.7806, 1.6861, 1.7027, 1.7264, 1.7811, 1.7573, 1.6235, + 1.7568, 1.7646, 1.7491, 1.7098, 1.7414, 1.7587, 1.7533, 1.7227, 1.6833, + 1.7702, 1.7296, 1.7516, 1.7460, 1.6894, 1.7191, 1.7573, 1.7020, 1.7415, + 1.7898, 1.7779, 1.7721, 1.6386, 1.7686, 1.7720, 0.9429, 1.7820, 1.7034, + 1.7651, 1.7667, 1.7587, 1.8170, 1.6245, 1.7726, 1.7918, 1.7986, 1.6991, + 1.7649, 1.6397, 1.7385, 1.6653, 1.7968, 1.6853, 1.6544, 1.7029, 1.7351, + 0.9336, 1.7428, 1.7244, 1.7644, 1.7481, 1.7767, 1.8191, 1.7117, 1.7716, + 1.5829, 2.0531, 1.7985, 1.7173, 1.7007, 1.7612, 1.7118, 1.7456, 1.7821, + 1.7097, 1.7723, 1.5083, 1.5076, 1.6904, 1.7302, 1.8118, 1.7102, 1.7321, + 1.7028, 1.7465, 1.8067, 1.8033, 1.6827, 1.3774, 1.7701, 1.7287, 1.6279, + 1.7535, 1.6996, 1.7529, 1.7969, 1.7698, 1.7735, 1.7865, 1.7989, 1.8025, + 1.7413, 1.7794, 1.7462, 1.8258, 1.5884, 1.7895, 1.7249, 1.7411, 1.7231, + 1.7466, 1.7880, 1.8221, 1.7345, 1.5172, 1.5861, 1.7265, 1.1932, 1.8105, + 1.7271, 1.7999, 1.7737, 1.7159, 1.7391, 1.5650, 1.0842, 1.6609, 1.7516, + 1.6644, 1.6834, 1.7752, 1.7820, 1.7207, 1.7830, 1.7473, 1.6890, 1.7324, + 1.7598, 1.8085, 1.7870, 1.6808, 1.7300, 1.6913, 1.8049, 1.6898, 1.7118, + 1.5790, 1.7330, 1.7928, 1.7111, 1.7586, 1.7335, 1.7591, 0.8876, 1.7496, + 1.6044, 1.8343, 1.7631, 1.8049, 1.7443, 1.6705, 1.8017, 1.7247, 1.7761, + 1.7494, 1.6933, 1.7185, 1.7567, 1.7555, 1.3987, 1.8214, 1.7133, 1.7056, + 1.7376, 1.6929, 1.6529, 1.7626, 1.7944, 1.7649, 1.7880, 1.8361, 1.7461, + 1.6423, 1.7435, 1.7214, 1.7357, 1.7379, 1.7925, 0.8717, 1.7399, 1.7446, + 1.7784, 1.6280, 1.7529, 1.8482, 1.8067, 1.7008, 1.7553, 1.7262, 1.6551, + 1.8407, 1.7429, 1.7241, 1.7173, 1.6979, 1.6624, 1.7336, 1.7440, 1.7721, + 1.7816, 1.8321, 1.8003, 1.6417, 1.7096, 1.7309, 1.7556, 1.7408, 1.7249, + 1.7821, 1.8816, 1.7762, 1.7795, 1.7538, 1.7687, 1.8382, 1.7207, 1.7429, + 1.7809, 1.7654, 1.7727, 1.7137, 1.7231, 1.7254, 1.7581, 1.9079, 1.7838, + 1.8562, 1.7747, 1.7307, 1.7510, 1.6652, 1.7962, 1.8084, 1.6713, 1.7405, + 1.7645, 1.7221, 1.7829, 1.5737, 1.6718, 1.8124, 1.7877, 1.7250, 1.8048, + 1.7633, 1.6581, 1.7901, 1.8515, 1.7576, 1.7432, 1.7884, 1.6867, 1.6636, + 1.7865, 1.7576, 1.7160, 1.7054, 1.7541, 1.7233, 1.7555, 1.7447, 1.7340, + 1.9150, 1.5423, 1.6653, 1.7190, 1.7784, 1.7623, 1.7703, 1.7078, 1.7105, + 1.7365, 1.8146, 1.7623, 1.7655, 1.6682, 1.8041, 1.7280, 1.7973, 1.7215, + 0.8671, 1.7118, 1.7316, 1.8002, 1.7620, 1.7976, 1.7504, 1.7343, 1.7921, + 1.7943, 1.8092, 1.7626], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([-6.3961e-02, -1.8907e-01, -6.6267e-02, 1.1582e-01, -1.9398e-02, + -7.6774e-02, -9.2488e-02, 1.5014e-01, -1.5402e-01, 5.3630e-02, + -2.6058e-01, 1.6292e-01, -6.2387e-02, -2.1546e-01, -2.1034e-02, + 1.8510e-01, 2.2416e-01, -8.3732e-02, 6.8939e-02, -1.7281e-01, + 6.4749e-02, -1.6842e-02, -1.8385e-02, -2.3905e-01, -6.9280e-02, + -4.7777e-02, 2.9941e-01, 1.0106e-01, -7.4397e-01, -5.8690e-02, + -1.4871e-01, 1.6635e-01, 1.8614e-01, 5.2980e-02, 1.5625e-03, + 1.1102e+00, 1.6345e-01, 2.9891e-01, 5.7778e-02, 8.6953e-02, + 1.0396e-02, 3.3394e-02, -1.1215e-03, -2.0671e-01, 3.9431e-02, + 3.0618e-01, -9.5164e-02, -1.0811e-01, 3.2754e-01, 2.4226e-01, + -5.1117e-02, 7.2014e-02, 5.2677e-02, 5.2326e-02, 4.5423e-01, + 9.4081e-03, 1.0807e-01, -3.1810e-01, 1.6348e-01, -1.6129e-01, + 6.9611e-02, 9.1917e-02, -1.9035e-02, 1.1163e-01, -2.0498e-01, + 5.3379e-02, 2.6206e-01, -5.0303e-01, 1.6840e-02, 2.6704e-01, + 2.5346e-01, 9.7269e-02, 1.9748e-02, 7.3897e-02, 1.1860e-01, + 4.0665e-02, 1.0623e-03, -7.4162e-02, 7.3179e-02, 3.3942e-01, + -9.5987e-03, 8.2016e-03, -1.7550e-01, 3.1015e-01, -2.2220e-02, + 1.7016e-02, -2.7397e-02, 1.7581e-01, 5.6219e-03, 2.9484e-01, + -1.8801e-01, 4.0724e-01, -9.6560e-02, 5.7297e-02, -9.1629e-03, + -4.6879e-01, 3.3328e-02, -9.2649e-02, -9.6031e-03, 1.9044e-01, + 1.5786e-01, 9.0443e-01, -1.8836e-01, 1.7412e-01, 5.5520e-02, + -1.3535e-01, -4.3605e-02, 7.9157e-02, 1.8799e-01, 6.7221e-02, + -7.6727e-03, -1.9825e-02, -1.3374e-01, 2.9653e-02, -1.9720e-01, + -6.4861e-02, 5.6295e-02, 1.8078e-01, 3.5850e-01, -2.8073e-02, + 1.3744e-01, -3.7472e-02, -1.4722e-01, 3.6151e-02, 2.5569e-02, + -2.2046e-02, -1.2437e-01, 1.0499e-01, 1.7835e-01, 7.1363e-02, + 4.1597e-03, -1.4217e-01, -2.4751e-01, 1.5900e-01, -7.1130e-02, + -1.9061e-01, 2.4493e-01, 3.5116e-02, 1.5471e-02, 2.2332e-02, + 8.9153e-02, 3.0060e-01, 3.8098e-02, -6.2623e-02, 2.9371e-01, + 1.8181e-01, 5.7994e-02, 5.8664e-02, 2.6419e-02, -1.2824e-01, + -1.0993e-01, -4.0017e-02, -3.9351e-02, -1.4422e-01, 8.7471e-02, + 8.4085e-02, -1.9828e-01, -1.3173e-01, 1.4072e-01, 1.5565e-01, + 2.0243e-02, -4.6226e-02, 1.5923e-01, 4.5654e-02, -2.3367e-01, + 1.2978e-01, 2.1358e-01, -8.2719e-02, 1.0171e-01, -2.2423e-01, + 5.3051e-03, -4.1902e-02, 3.8611e-01, 1.2323e-01, -1.4924e-01, + 2.8467e-01, 1.9509e-01, -1.3619e-01, 2.4334e-01, -8.0153e-02, + -6.7516e-02, -6.7610e-02, -1.0731e-01, 2.4883e-02, 2.8200e-01, + 1.2155e-01, 3.6426e-01, 2.3474e-02, -2.8293e-01, 2.2408e-01, + 3.6100e-01, -1.0994e-01, 3.4722e-02, 7.4121e-02, 2.7931e-01, + -1.4990e-01, 2.5177e-01, -1.8813e-02, 8.3328e-02, 8.4020e-02, + -9.8448e-02, 2.7714e-01, 7.4243e-02, 8.0239e-02, 2.0202e-01, + 8.1008e-02, 4.3401e-01, 1.8625e-02, 8.4404e-02, 2.4402e-01, + -1.6691e-01, -8.8845e-02, -1.5939e-01, -2.0858e-01, 2.5397e-01, + 6.0879e-02, 2.0121e-02, -7.9232e-02, -1.6508e-02, -1.3059e-01, + -6.2701e-02, -1.7213e-01, 3.4699e-01, 1.5253e-01, -1.4908e-01, + 1.9412e-01, 1.3451e-01, 2.2582e-01, -9.5174e-02, 2.2037e-01, + 1.0803e-01, 5.3375e-02, 2.2560e-03, 2.7359e-02, -1.8865e-01, + 8.4389e-02, 5.1940e-02, 2.0583e-01, -1.5264e-01, -8.4608e-02, + 9.0049e-02, -8.0713e-02, 1.4706e-01, -1.6154e-02, -4.8605e-02, + 2.0150e-01, -1.8314e-01, 2.6463e-01, -3.1260e-01, 9.1237e-02, + 2.0335e-01, 1.1086e-01, -1.5599e-02, 3.6003e-01, 2.5739e-02, + -2.1332e-01, -7.0601e-03, -1.2192e-01, 1.9431e-01, 1.4852e-01, + 1.2345e-01, 1.8613e-01, 2.5473e-02, 3.4473e-02, 1.1012e-01, + 1.4641e-01, 2.2227e-02, -5.9297e-02, -1.3266e-01, 3.4497e-01, + -3.0586e-01, 7.9368e-01, 3.9487e-02, -1.4621e-01, 6.5543e-02, + 1.7141e-02, -2.0179e-01, 2.3884e-02, -4.0460e-02, -4.6599e-02, + 2.1172e-01, -8.7700e-02, -1.6130e-01, 6.9310e-02, -2.5059e-01, + 5.2468e-02, -2.3732e-01, 1.8512e-01, 7.2659e-02, 4.7407e-02, + -4.3530e-02, -1.4774e-03, 2.0449e-01, 2.7766e-02, -2.9549e-02, + -2.0292e-01, -1.0574e-02, -1.1197e-01, -3.3860e-01, 5.2191e-02, + -5.6128e-02, -2.4438e-01, 1.7825e-01, -3.4640e-01, -2.2049e-01, + 3.1959e-01, -1.0011e-01, -1.3569e-01, -1.9682e-01, -1.7635e-02, + 3.4345e-01, 1.9722e-02, 3.3036e-02, -3.2205e-01, 1.0287e-01, + -1.5752e-01, 1.2438e-01, 3.6095e-01, 1.3326e-01, 2.4184e-01, + 1.2926e-01, -2.5102e-02, -7.7222e-02, 2.3458e-01, -5.3531e-02, + 2.6737e-01, -1.2668e-02, -1.9242e-01, -2.9800e-01, 3.0770e-01, + 9.5187e-02, 1.2686e-01, -6.7057e-02, 9.6127e-02, -1.0121e-01, + -3.1650e-01, 2.8864e-01, 5.3428e-02, -1.3012e-01, -1.6185e-01, + 2.7272e-01, -1.0872e-01, -6.3737e-02, 1.7065e-01, -4.9168e-02, + -7.3678e-03, -3.3221e-01, -3.0027e-01, 1.6127e-01, -2.2458e-01, + 2.3008e-02, 2.1806e-02, 2.3908e-02, -1.7871e-02, 2.8359e-01, + 5.8141e-02, -3.5832e-02, 3.2671e-01, -1.0304e-01, 1.5364e-02, + -4.7144e-02, 2.1080e-01, -6.6814e-02, -1.3583e-02, 2.7550e-01, + -1.3174e-01, 3.0048e-01, -3.1258e-01, 9.8796e-02, 9.0674e-02, + 1.2171e-01, -2.1045e-01, 4.1959e-01, 1.4062e-01, 1.7522e-01, + -1.5909e-01, -3.8485e-01, 3.5587e-01, 6.1791e-02, 2.9728e-01, + -3.2739e-03, 2.4748e-01, -2.4907e-02, -5.3884e-01, 7.2989e-02, + 3.4384e-02, -1.1574e-01, -2.2263e-01, -1.0675e-01, 9.7501e-02, + 3.4671e-02, -1.4328e+00, -1.1062e-02, 1.9438e-01, 1.0939e-01, + -3.7655e-01, -1.6929e-01, 3.2262e-01, 1.3196e-01, 2.6441e-01, + 3.8306e-01, 6.4132e-02, 1.5814e-01, 1.8378e-02, -2.0566e-03, + 1.6024e-02, -4.8503e-02, -1.0331e-01, 1.5740e-01, -1.9693e-02, + -9.4622e-02, 3.1541e-02, -4.3777e-01, -3.6881e-02, -3.7918e-02, + 7.2006e-02, 7.6161e-02, -1.3037e-01, -1.9686e-01, -1.7425e-01, + 1.3822e-01, 1.5573e-01, 3.1976e-03, -3.8504e-02, -1.8306e-01, + 4.7375e-02, -4.3621e-02, -6.6590e-02, 4.0533e-02, 1.7594e-01, + -4.2811e-02, -8.4193e-02, 2.3910e-01, 1.7940e-01, 2.1909e-01, + -9.5213e-02, 8.6281e-03, 1.5524e-01, -6.5299e-02, -6.1806e-02, + 5.6195e-02, -2.5332e-01, 2.3617e-01, 9.1083e-02, 1.9428e-01, + 3.3102e-01, -8.5299e-02, -1.2100e-01, -4.5219e-01, -2.7028e-01, + 2.8839e-01, 2.4645e-01, -1.1720e-01, 3.4503e-01, 1.4733e-01, + 4.0482e-02, -4.2903e-02, -7.8322e-01, 3.0277e-02, -1.6751e-01, + 1.4465e-02, 3.2363e-01, 2.3789e-01, -1.0457e-01, 6.3565e-02, + 2.0327e-01, 4.7821e-02, -4.9042e-01, 7.9319e-02, 9.3845e-02, + -7.2455e-02, 1.6316e-02, -3.3306e-01, -3.0791e-02, -1.1575e-01, + -4.9676e-03, 4.2699e-01, -1.5469e-01, -1.4036e-01, 1.1827e-01, + -2.2014e-01, 2.5985e-01, 4.4695e-02, 2.5353e-01, -1.0868e-02, + -1.2368e-01, -8.1046e-02, -8.4880e-02, 2.9189e-01, 1.9647e-01, + -1.4869e-01, 2.1447e-02, -7.9529e-02, -5.2323e-02, -5.4333e-02, + 5.1668e-02, -8.1463e-02, -1.1637e-02, -4.8212e-01, -2.3931e-02, + 5.3116e-03, -1.9904e+00, 1.3696e-02, 1.9362e-01, -1.3809e-01, + -1.9681e-01, 4.8385e-02, 3.8303e-02, 5.6370e-01, 2.7663e-01, + 1.9251e-02, 2.1590e-01, 4.1623e-02, 2.3325e-01, -8.7494e-02, + -2.3375e-01, -6.6074e-02, -1.8052e-02, -2.0547e-01, -4.7173e-01, + 1.2608e-01, 2.6116e-02, -3.2520e-01, 3.5194e-01, -2.5267e-01, + -2.0281e-02, -2.0063e-02, -3.7879e-02, 1.0518e-01, 7.7329e-02, + 8.1078e-02, -7.6093e-02, -3.4030e-01, 7.6515e-02, 1.1958e-01, + 9.5325e-02, -1.0395e-01, -6.4384e-02, 2.3667e-01, -4.1264e-02, + 3.1444e-01, 8.5630e-02, -2.8794e-01, 6.7791e-01, 1.5426e-01, + -4.1703e-02, -1.1656e-02, -2.8440e-01, -1.0204e-01, 3.4792e-02, + 1.3765e-01, -6.2092e-02, 7.4738e-02, 2.0551e-01, 1.2253e+00, + -1.0391e-01, 1.1737e-01, -3.6290e-01, 1.1110e-01, 6.8894e-02, + 1.8224e-01, -9.1156e-02, -1.1810e-01, -3.2335e-02, 1.4059e-01, + -3.5351e-01, -1.7719e-01, -2.7841e-02, 1.7805e-01, -2.4260e-02, + 3.5481e-01, -1.5195e-01, -2.2427e-02, 2.1070e-01, 9.3560e-02, + 9.1329e-02, -5.9248e-02, 7.8027e-02, 7.1357e-02, 4.7972e-02, + 1.7722e-01, 7.8401e-01, 1.8207e-01, 1.1456e+00, 8.1968e-02, + 9.6185e-02, -8.7804e-02, 1.2846e-01, 1.7551e-01, 3.3738e-02, + 6.4693e-01, -1.2015e+00, 6.1865e-02, 9.1654e-02, 3.5655e-02, + 1.7562e-01, -6.8616e-02, 1.1576e-02, 1.3995e-01, 4.5674e-01, + 1.2882e-01, 1.1601e-01, -8.0007e-02, -8.9488e-02, 1.2392e-01, + 1.6591e-01, 3.0822e-03, 2.0161e-01, -2.3949e-01, -5.5038e-02, + 2.8561e-02, -1.4854e-02, -1.7609e-01, -1.6273e-01, -4.8114e-02, + 2.9822e-02, -1.2119e-01, 5.6389e-01, 1.4921e-02, -5.0786e-01, + 2.0637e-01, -3.2344e-01, 1.2871e-01, 4.8221e-02, 1.8008e-01, + -7.6118e-02, -1.9054e-01, -7.6769e-02, 1.0610e-01, 1.8803e-01, + 1.5988e-01, 2.3421e-01, -2.0432e-01, -2.6052e-03, -8.2586e-02, + 5.4137e-01, 5.5920e-02, -1.1482e-01, 1.9908e-01, 1.6705e-01, + -1.6597e-01, -3.2076e-01, 4.1043e-02, -2.0394e-01, 5.2273e-02, + 8.0126e-02, 4.4827e-02, 3.0543e-01, 3.2901e-01, -2.1709e-01, + 1.5603e-01, 8.3254e-02, 9.2027e-02, -3.0384e-02, -9.9128e-01, + -1.1573e-01, -1.3390e-01, -1.3821e-02, 4.8324e-01, 9.9974e-02, + 1.6134e-01, -1.5294e-02, -1.8115e-01, 3.7491e-02, -3.6812e-01, + 7.3616e-03, 1.5063e-01, -1.2603e-01, 1.6043e-01, -2.3366e-01, + -9.9087e-03, -1.3720e-01, -7.6003e-02, 3.6387e-01, 2.8363e-01, + 6.2078e-02, 4.2387e-02, 1.4645e-01, -7.1626e-01, 8.1430e-03, + 2.1640e-01, -1.5572e-01, -2.6465e-01, -1.9105e-02, 1.0089e-01, + 5.4242e-02, -3.5611e-02, -2.2510e-01, -1.3874e-01, -1.0381e-01, + 2.2531e-01, -5.0153e-03, -1.4030e-01, -2.1619e-02, 5.9003e-02, + -1.5007e-01, 1.0948e-01, -1.1252e-01, 1.0077e-03, 6.4135e-02, + -3.1705e-01, 1.3072e-01, -1.6811e-01, 5.1108e-02, 3.9745e-02, + 4.1817e-01, 3.0233e-01, -4.0726e-02, -8.3054e-02, -1.3071e-01, + 9.7965e-02, -5.9167e-02, -8.3690e-02, 7.1241e-02, 9.2697e-01, + -2.0566e-01, -8.5988e-02, 8.9921e-02, -2.9561e-03, -5.8907e-03, + -4.3587e-02, 3.7168e-01, -1.4915e-03, -8.9582e-02, -3.2550e-01, + -8.3132e-03, -1.0678e-02, -5.6811e-03, 6.3972e-01, 1.1402e-01, + 2.9503e-03, 7.1307e-02, 1.7079e-01, -1.1014e-01, 1.3895e-01, + 1.7041e-02, -3.6806e-02, 2.5082e-01, -2.0396e-02, 2.8433e-01, + 1.4611e-01, -6.2930e-02, -1.2271e-01, -1.3978e-01, 1.3857e-01, + 2.8862e-01, 1.7088e-01, 1.3886e-01, 4.9825e-02, -4.3962e-02, + 3.7045e-01, 2.8376e-01, 2.2356e-01, -2.3949e-01, 1.1278e-01, + 1.0264e-01, -6.9168e-01, -1.2107e-01, -2.2057e-01, 1.6424e-04, + -3.3768e-02, -4.2022e-03, -3.1471e-01, -2.8823e-01, -1.1201e-01, + 9.9413e-02, -6.2378e-02, -8.8212e-02], device='cuda:1', + requires_grad=True) +torch.Size([3072, 768]) +Parameter containing: +tensor([[ 0.0402, 0.0049, 0.0031, ..., 0.0076, -0.0040, -0.0004], + [ 0.0320, -0.0247, 0.0270, ..., 0.0014, -0.0266, -0.0196], + [-0.0072, 0.0229, 0.0050, ..., -0.0068, -0.0446, -0.0313], + ..., + [ 0.0280, -0.0149, 0.0136, ..., 0.0182, -0.0120, -0.0161], + [ 0.0343, -0.0128, -0.0234, ..., 0.0229, -0.0218, 0.0272], + [ 0.0184, 0.0124, 0.0135, ..., -0.0094, 0.0302, -0.0117]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.3799, -0.4065, -0.2979, ..., -0.4219, -0.3420, -0.1925], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 3072]) +Parameter containing: +tensor([[-0.0009, 0.0018, 0.0037, ..., -0.0094, 0.0236, 0.0011], + [ 0.0007, 0.0022, -0.0113, ..., -0.0333, 0.0027, 0.0064], + [ 0.0013, -0.0087, 0.0208, ..., 0.0051, 0.0020, 0.0045], + ..., + [ 0.0153, -0.0221, 0.0076, ..., -0.0112, 0.0199, -0.0161], + [-0.0092, -0.0176, 0.0055, ..., -0.0182, 0.0059, 0.0039], + [-0.0012, -0.0012, -0.0088, ..., -0.0243, 0.0233, -0.0009]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 7.7896e-03, -3.2593e-02, 2.3365e-03, -2.7428e-03, -1.7853e-02, + -3.6957e-02, 4.3549e-02, -8.7357e-03, 5.9662e-03, -6.9542e-03, + -3.0121e-02, -2.0676e-02, -3.7842e-02, -2.5616e-03, -6.9946e-02, + 2.5620e-02, -7.1594e-02, -6.8237e-02, -3.5339e-02, -9.3457e-01, + -2.2919e-02, 5.1079e-03, -3.7384e-02, -1.2177e-02, -2.2659e-02, + -1.8784e-02, -2.9144e-02, -5.5885e-03, 2.5854e-01, 1.8723e-02, + -3.0411e-02, -4.7424e-02, -1.3741e-02, -1.0742e-01, 7.4577e-03, + 4.5700e-03, -1.2428e-02, -2.6245e-02, 2.5635e-02, 5.9547e-03, + -1.3794e-02, -1.9180e-02, -3.3325e-02, 1.5533e-02, 7.5111e-03, + -5.0751e-02, 1.2350e-03, 2.8946e-02, -4.1290e-02, 6.7322e-02, + -1.2611e-02, -2.2552e-02, -7.4959e-04, -8.0261e-03, 1.1475e-02, + 1.5778e-02, 5.5237e-02, -9.5642e-02, -4.5258e-02, -2.1877e-03, + 4.0497e-02, 4.9530e-02, -3.8269e-02, 1.1391e-02, 2.6230e-02, + 3.4790e-02, 3.3569e-02, -6.8054e-03, 3.3379e-03, -7.9102e-02, + 6.3599e-02, 2.4536e-02, 1.4441e-01, 7.4692e-03, 3.0319e-02, + -4.6082e-02, 1.3298e-02, 1.6617e-02, -2.3682e-02, 5.1819e-02, + -9.1064e-02, -1.2390e-02, 7.6416e-02, 9.4070e-03, 1.4420e-02, + -3.0380e-02, -6.9275e-02, -2.1866e-02, -2.3148e-02, 1.5701e-02, + 7.9041e-02, -3.3283e-03, 5.7297e-03, 9.4528e-03, -1.0757e-02, + -5.7259e-03, 1.5404e-02, -4.6921e-03, 4.6196e-03, 7.0251e-02, + -5.9280e-03, -4.4861e-02, -7.4097e-02, -8.2245e-03, 1.6342e-02, + -3.1137e-04, -5.6488e-02, -3.1647e-02, 2.1072e-02, 2.0782e-02, + 2.6672e-02, -3.5950e-02, 1.0429e-02, 2.1378e-02, 3.7811e-02, + -6.6833e-02, 4.2816e-02, 3.4454e-02, 7.8857e-02, -4.6906e-02, + 2.8946e-02, -3.7567e-02, 1.2917e-02, -8.6060e-03, -3.8818e-02, + -1.3573e-02, 7.1678e-03, -8.2626e-03, 1.2360e-02, 8.5388e-02, + -2.6917e-02, -6.2042e-02, -6.1646e-02, -2.9877e-02, 2.4567e-02, + -2.3895e-02, 1.2718e-02, -1.0208e-02, -6.9885e-02, -7.5836e-03, + 6.6376e-03, -2.5208e-02, 3.9795e-02, -1.8814e-02, -5.6244e-02, + 2.3483e-02, 3.8574e-02, -6.3049e-02, 8.9569e-03, -3.6530e-02, + -7.8506e-03, 1.0474e-01, -4.3106e-03, 1.0887e-02, 3.1494e-02, + 2.9190e-02, 4.7821e-02, 9.3842e-03, -2.3926e-02, 7.8796e-02, + -4.9805e-02, 1.7578e-02, -2.2507e-02, 7.4158e-02, -2.3041e-02, + -1.1719e-02, -3.8025e-02, -5.7526e-03, -6.7017e-02, 3.8086e-02, + -1.1129e-03, 3.1799e-02, -8.0261e-03, 7.7454e-02, 2.8320e-02, + -5.6519e-02, 2.7100e-02, -5.0354e-02, -1.8875e-02, 1.0706e-01, + -2.4261e-03, -2.6855e-03, 1.7792e-02, -8.1329e-03, 4.4098e-02, + 5.7098e-02, 3.6163e-02, 9.7107e-02, -9.0881e-02, -3.4599e-03, + 3.5919e-02, 4.2419e-03, -3.9215e-03, 9.3384e-02, 9.3520e-05, + -2.1439e-02, -1.8682e-03, 2.5467e-02, -1.5388e-02, 1.1948e-02, + 5.7190e-02, -4.8492e-02, 4.6692e-02, -5.6610e-02, 2.5425e-03, + -6.0387e-03, 1.8164e-01, 7.9468e-02, 4.1008e-03, 6.8848e-02, + -1.2781e-01, -2.3010e-02, -4.4128e-02, -3.4180e-02, -1.7395e-02, + -3.9032e-02, -9.7595e-02, 1.1002e-02, 1.1032e-02, 3.1113e-02, + 1.8723e-02, -1.2213e-01, -5.0751e-02, 7.5439e-02, -7.0534e-03, + 2.4872e-02, -1.1040e-02, 2.1629e-03, 4.5349e-02, 5.8022e-03, + 4.3869e-03, -1.2917e-02, 5.5298e-02, 5.0316e-03, 5.2612e-02, + 1.2054e-02, -1.5526e-02, 4.3671e-02, -7.1640e-03, -2.7786e-02, + 7.3967e-03, -1.1696e-02, -2.1484e-02, 4.0070e-02, -1.5297e-02, + -2.8000e-02, -2.4567e-02, 1.0895e-01, 4.0283e-03, 1.1200e-02, + 1.0144e-01, 1.0040e-01, 2.3743e-02, 2.2659e-02, -2.0370e-02, + -3.0762e-02, -1.6815e-02, 6.6589e-02, -1.3390e-02, 6.6528e-02, + -4.3526e-03, 3.2227e-02, -9.1858e-02, 6.4575e-02, 3.1433e-02, + -6.7322e-02, 6.6772e-02, 8.4076e-03, -5.8044e-02, 1.6556e-02, + -1.1096e-01, 9.0698e-02, -8.8928e-02, -3.3752e-02, -1.2222e-02, + -4.0100e-02, -4.0192e-02, -6.0806e-03, 1.7136e-02, 1.6037e-02, + 4.7211e-02, -6.3477e-02, -1.1551e-02, 7.1167e-02, -1.0651e-02, + 7.0496e-02, 1.0674e-02, 1.5163e-03, -2.8168e-02, 4.5357e-03, + 2.4887e-02, -1.7761e-02, 5.0659e-02, 6.1684e-03, 4.4952e-02, + 5.1819e-02, -3.4058e-02, 7.0740e-02, 7.2510e-02, 2.2812e-02, + 9.7580e-03, 1.0696e-02, 2.7405e-02, -8.8989e-02, -8.4656e-02, + 5.2765e-02, -4.8157e-02, 4.5593e-02, -3.9398e-02, 3.8422e-02, + 4.9591e-02, -3.3630e-02, -6.8237e-02, -4.3701e-02, 1.0796e-02, + 1.4038e-02, -2.5513e-02, -1.3586e-01, -3.3905e-02, 5.3558e-03, + 7.0801e-03, -9.6207e-03, 4.8859e-02, -4.0649e-02, 1.7197e+00, + -7.1899e-02, -4.9255e-02, -8.5144e-02, -1.7181e-02, -5.9166e-03, + -6.5918e-02, -9.2041e-02, -1.9272e-02, 6.4270e-02, 3.2104e-02, + 6.1035e-03, -1.0696e-02, 2.7679e-02, 8.5999e-02, 2.9144e-02, + 7.7820e-02, -7.6866e-03, 2.7557e-02, 8.9569e-03, 2.2308e-02, + 3.9363e-04, 9.1858e-03, -4.3121e-02, -3.4180e-02, -5.8441e-02, + -3.6621e-02, -2.0874e-02, 1.6830e-02, 4.0558e-02, 7.3486e-02, + 1.2581e-02, -2.3155e-03, 3.6377e-02, 3.4729e-02, -5.3284e-02, + 3.3386e-02, 2.8122e-02, 3.8452e-02, 3.3295e-02, 9.4910e-02, + -2.0279e-02, -2.8763e-02, -3.8879e-02, 1.0704e-02, 9.4604e-02, + 2.8931e-02, 2.0630e-02, -2.4460e-02, -4.9988e-02, -1.1375e-02, + 1.4992e-02, 1.4893e-02, 1.0114e-01, -2.1744e-02, 2.1774e-02, + 2.3193e-02, -1.8646e-02, -3.1097e-02, -3.3112e-02, 7.0129e-02, + -4.4899e-03, -6.8542e-02, 3.4851e-02, -5.6000e-02, 5.3223e-02, + 2.8259e-02, -1.0693e-01, -9.2834e-02, 1.0010e-01, 1.0895e-01, + -3.4119e-02, -1.5152e-02, 4.3427e-02, 2.1225e-02, -1.8845e-02, + -1.8906e-02, 1.1780e-02, 9.1003e-02, -1.2894e-02, 5.9113e-02, + -2.0584e-02, -1.4336e-02, -1.5182e-02, -5.4871e-02, -3.9795e-02, + 1.6586e-02, -1.6113e-02, 6.4049e-03, 1.1810e-01, 2.5757e-02, + -4.8853e-01, 2.2644e-02, -3.5156e-02, 2.0920e-02, 7.0877e-03, + -2.1194e-02, 3.3264e-02, -1.4381e-02, 6.1310e-02, 5.0240e-03, + 3.7750e-02, -1.0406e-02, -4.6906e-02, -4.7546e-02, -1.2230e-02, + 1.3031e-02, -3.0441e-02, 1.9775e-02, -5.4688e-02, 3.0609e-02, + 8.6441e-03, -3.6377e-02, 2.6531e-03, -2.9816e-02, -3.4882e-02, + -4.2908e-02, -4.8096e-02, -6.5613e-02, -8.2703e-02, 2.9678e-02, + -1.5717e-03, 7.2144e-02, -6.6284e-02, 9.9304e-02, -3.2654e-02, + 2.4963e-02, 3.5583e-02, 1.3904e-01, -7.3425e-02, 4.6326e-02, + 3.0396e-02, 2.2018e-02, -3.3569e-02, 1.6571e-02, 5.6976e-02, + -4.1962e-02, 1.7609e-02, -4.1084e-03, -2.6169e-02, 3.2959e-02, + -7.1106e-02, 1.0414e-02, 1.9646e-03, -1.7776e-02, 8.6670e-03, + 6.2988e-02, -1.1330e-03, -4.1321e-02, 2.8610e-03, -4.0955e-02, + -2.3392e-02, 6.1005e-02, -6.8115e-02, -3.0579e-02, 8.0994e-02, + 6.4880e-02, 6.8115e-02, -2.4460e-02, 5.5481e-02, 2.8992e-02, + 5.9418e-02, 3.9673e-02, -1.6281e-02, 4.6509e-02, -9.4604e-03, + 1.6022e-02, -8.6899e-03, 3.6835e-02, 1.5167e-02, -1.0231e-02, + -5.1727e-02, -6.4240e-03, 7.4768e-02, 1.0760e-01, -2.0065e-02, + 1.0735e-02, -1.2622e-01, -2.9022e-02, 1.1845e-03, -1.2917e-02, + -6.3904e-02, 1.0358e-01, -3.5431e-02, 2.2186e-02, 5.6877e-03, + -1.1574e-02, -1.6083e-02, -1.5587e-02, 5.2910e-03, 8.2016e-03, + 7.8659e-03, -1.8631e-02, -1.3077e-02, -1.5572e-02, 1.1108e-02, + -3.5461e-02, 6.5063e-02, -8.1299e-02, -4.1747e-04, -3.7506e-02, + -3.2845e-03, 7.9346e-02, 1.0022e-01, 5.5313e-03, -3.0167e-02, + 1.0490e-02, 1.9470e-02, 6.1340e-02, -1.8845e-02, 1.0582e-02, + -2.5696e-02, -3.5763e-03, 6.7322e-02, -4.8599e-03, -1.3565e-02, + -1.5327e-02, 9.2529e-02, -3.4241e-02, 7.4341e-02, -3.1982e-02, + 2.3422e-02, -2.3132e-02, -2.0050e-02, -5.5237e-02, -1.7120e-02, + -3.2867e-02, 1.7761e-02, -2.9556e-02, -4.5837e-02, -1.6769e-02, + -4.5074e-02, -2.3026e-02, -7.0724e-03, 9.4986e-03, 3.2318e-02, + -1.4465e-02, 3.3173e-02, -4.5959e-02, 4.3831e-03, -7.1030e-03, + -8.2642e-02, -9.9106e-03, -3.1700e-03, -6.5857e-02, -4.4861e-03, + 6.1111e-03, -5.8055e-05, -4.1199e-02, -6.6406e-02, 7.2021e-02, + 6.1737e-02, 6.9618e-04, 4.6234e-02, -8.0490e-04, 6.5552e-02, + 7.3364e-02, 1.7883e-02, 7.0572e-03, 7.6294e-02, 3.6583e-03, + -5.5847e-02, 5.3467e-02, 8.1482e-03, 5.0629e-02, 2.6779e-02, + -6.9214e-02, -3.7201e-02, 3.1067e-02, 4.1779e-02, 8.6288e-03, + -4.6120e-03, -1.1877e-01, 9.7107e-02, -5.1666e-02, 3.1400e-04, + 2.7237e-02, -3.2593e-02, -1.4381e-02, -2.4063e-02, 3.7842e-03, + -1.3321e-02, 4.0344e-02, 5.2277e-02, 2.5543e-02, -5.7983e-03, + 7.0238e-04, -4.8981e-02, -6.4331e-02, 1.4488e-02, 3.2928e-02, + -1.6449e-02, -1.3496e-02, 3.2883e-03, -2.8275e-02, -1.0394e-01, + 2.3224e-02, -2.0599e-02, 2.9724e-02, 5.1514e-02, -4.6661e-02, + 5.1697e-02, -6.7871e-02, 1.5778e-02, 3.7354e-02, 7.0229e-03, + -7.0648e-03, 5.5206e-02, -7.0923e-02, 9.9487e-03, -3.8086e-02, + -1.1192e-02, 4.6997e-02, 1.8250e-02, 4.7211e-02, -3.3386e-02, + 4.4739e-02, 9.4971e-02, 1.0327e-01, 4.6265e-02, -2.7390e-02, + -6.7078e-02, -4.1290e-02, 5.1003e-03, 1.1597e-02, -6.9885e-02, + -7.3891e-03, 4.4800e-02, -6.8115e-02, 1.7685e-02, -1.0590e-01, + 3.1647e-02, -2.8763e-02, 3.7491e-05, 1.1330e-02, 5.8197e-02, + 3.0060e-02, 2.5848e-02, -8.6426e-02, 4.6021e-02, -9.3231e-03, + -5.4382e-02, 4.2633e-02, -2.2324e-02, 1.1444e-02, 4.3793e-02, + 2.4780e-02, -3.5339e-02, -6.7200e-02, 5.4291e-02, 2.4307e-02, + -7.8186e-02, -3.1677e-02, -1.6647e-02, 2.8702e-02, -7.6056e-05, + -4.3549e-02, 1.2598e-01, -8.8623e-02, 1.2978e-02, -9.8999e-02, + 6.3721e-02, -4.3182e-02, 8.4351e-02, 7.4463e-03, 2.1729e-02, + -2.9724e-02, -8.7585e-02, 2.6855e-02, 5.1270e-02, 4.6654e-03, + 3.4424e-02, 2.9800e-02, 3.1799e-02, -1.7502e-02, -7.0679e-02, + 6.9702e-02, -1.2619e-02, 1.7288e-02, 4.2305e-03, 8.4351e-02, + -2.0050e-02, 7.8125e-02, -1.5099e-02, 2.4475e-02, -9.9487e-03, + 1.4198e-02, -5.9998e-02, -3.9185e-02, -9.6970e-03, -2.3727e-02, + -4.7943e-02, -3.1342e-02, 2.7161e-02, -7.4081e-03, 1.9302e-02, + -8.1909e-02, 1.9035e-03, 3.7018e-02, 9.1019e-03, 2.8320e-02, + -2.6093e-02, 1.7487e-02, 3.4119e-02, -2.0996e-02, -3.7659e-02, + -3.4302e-02, -1.2791e-04, -2.4811e-02, -4.5319e-02, 4.1077e-02, + -6.8604e-02, 4.7882e-02, 1.4091e-02, 4.0771e-02, -5.1069e-04, + -4.9500e-02, -9.9976e-02, 2.6245e-02, -4.6753e-02, -9.5596e-03, + 2.7557e-02, 2.5330e-02, -2.4536e-02, 4.0222e-02, -1.1444e-02, + 8.3862e-02, 4.6661e-02, -8.7433e-03, -5.4749e-02, -2.3438e-02, + 1.2192e-02, -2.8833e-01, 8.6212e-03, -3.7537e-02, -2.2629e-02, + -2.6428e-02, 6.6566e-03, -1.2238e-02, 8.9645e-03, 2.0905e-02, + -5.8098e-03, -7.1899e-02, -1.3962e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([1.5466, 1.6287, 1.4620, 1.5152, 1.6963, 1.5352, 1.7028, 1.6495, 1.6754, + 1.5446, 1.7198, 1.6647, 1.7217, 1.7013, 1.5661, 1.5963, 1.7138, 1.6526, + 1.5437, 2.5590, 1.5243, 1.6118, 1.5728, 1.5510, 1.5395, 1.6284, 1.5794, + 1.6840, 2.6548, 1.5297, 1.5815, 1.5742, 1.6377, 1.6970, 1.5335, 1.6825, + 1.6767, 1.7161, 1.6149, 1.5676, 1.5750, 1.4493, 1.5736, 1.5713, 1.6910, + 1.4318, 1.4671, 1.7247, 1.6311, 1.6464, 1.5485, 1.6081, 1.4676, 1.8106, + 1.5993, 1.4245, 1.6667, 1.6925, 1.5395, 1.7395, 1.5219, 1.5653, 1.7521, + 1.5926, 1.7473, 1.5702, 1.4933, 1.6499, 1.6647, 1.6324, 1.7135, 1.5787, + 1.7618, 1.5647, 1.7105, 1.8888, 1.5787, 1.4775, 1.5883, 1.4983, 1.6657, + 1.5485, 1.7070, 1.4726, 1.5059, 1.7675, 1.7143, 1.6613, 1.5308, 1.6046, + 1.5477, 1.6365, 1.6244, 1.7674, 1.6128, 1.6021, 1.6882, 1.5759, 1.6254, + 1.7811, 1.5736, 2.2523, 1.7263, 1.7387, 1.5957, 1.5393, 1.4164, 1.7550, + 1.5276, 1.5929, 1.5954, 1.5568, 1.4611, 1.6622, 1.6308, 1.6464, 1.6043, + 1.6075, 1.8606, 1.6691, 1.7086, 1.2814, 1.5904, 1.5482, 1.7599, 1.7228, + 1.5460, 1.6140, 1.5800, 1.7483, 1.5361, 1.6983, 1.6441, 1.7168, 1.6573, + 1.6766, 1.8008, 1.6312, 1.4994, 1.5217, 1.5370, 1.5518, 1.5621, 1.6829, + 1.7218, 1.6218, 1.8181, 1.6869, 1.6930, 1.6609, 1.6902, 1.7114, 1.7895, + 1.7047, 1.9185, 1.4508, 1.6689, 1.5640, 1.7115, 1.6192, 1.6506, 1.5862, + 1.5534, 1.2080, 1.4693, 1.5915, 1.5049, 1.6141, 1.6000, 1.6080, 1.5011, + 1.7052, 1.6523, 1.6762, 1.7173, 1.5287, 1.7796, 1.9163, 1.6103, 1.6111, + 1.6848, 1.5284, 1.5299, 1.6179, 1.7410, 1.6436, 1.6795, 0.5891, 1.8123, + 1.6114, 1.4928, 1.5903, 1.7130, 1.5604, 1.6891, 1.6415, 1.5553, 1.5920, + 0.8378, 1.5091, 1.4806, 1.6040, 1.8015, 1.6475, 1.6319, 1.5780, 2.0262, + 1.6141, 1.6232, 1.7835, 1.8310, 1.7591, 1.6236, 1.7798, 1.8485, 1.5591, + 1.8435, 1.6892, 1.7282, 1.7013, 1.6079, 1.8256, 1.5861, 1.5172, 1.6814, + 1.6223, 1.5769, 1.6768, 1.6941, 1.5766, 1.7174, 1.6628, 1.6466, 1.7017, + 1.5979, 1.5386, 1.6890, 1.6619, 1.6857, 1.5417, 1.4578, 1.7095, 1.6859, + 1.6036, 1.6458, 1.6928, 1.5617, 2.0336, 1.5748, 1.6816, 1.7615, 1.4831, + 1.5995, 1.6715, 1.6472, 1.8099, 1.7742, 1.7084, 1.6539, 1.4784, 1.6935, + 1.5482, 1.4752, 1.5757, 1.4937, 1.7429, 1.6930, 1.5716, 1.6499, 1.6268, + 1.8494, 1.6066, 1.6520, 1.2203, 1.6006, 1.5320, 1.6687, 1.6458, 1.5665, + 1.5904, 1.5120, 1.7532, 1.8606, 1.7307, 1.6165, 1.5873, 1.8569, 1.5411, + 1.8052, 1.4410, 1.7376, 1.6775, 1.5768, 1.4959, 1.7195, 1.5293, 1.5665, + 1.5442, 1.7254, 1.6832, 1.7262, 1.6449, 1.6497, 1.8472, 1.6109, 1.5323, + 1.4856, 1.6461, 1.6573, 1.4211, 1.7107, 1.6706, 1.4615, 1.6185, 1.6502, + 1.5425, 1.7448, 1.8702, 1.6209, 1.6842, 1.4225, 1.6420, 1.6030, 1.7335, + 1.4414, 1.3507, 1.6482, 1.7484, 1.4666, 1.4885, 1.4834, 1.6085, 1.7071, + 1.6288, 1.6031, 1.7478, 1.5366, 1.7877, 1.5961, 1.7320, 1.7167, 1.7011, + 1.5680, 1.6012, 1.6675, 1.7850, 1.6508, 1.5850, 1.7151, 1.7315, 1.6190, + 1.6217, 1.6529, 1.4600, 1.6130, 1.5748, 1.4341, 1.4692, 1.6451, 1.8027, + 1.6968, 1.6237, 1.5114, 1.6602, 1.6563, 1.6236, 1.7618, 1.5643, 1.6583, + 1.7570, 1.5363, 1.6690, 1.6809, 1.6877, 1.6174, 1.7092, 1.6972, 1.5257, + 1.7533, 1.5202, 1.5563, 1.2858, 1.7206, 1.6467, 1.7424, 1.5450, 1.6139, + 1.6049, 1.6431, 1.7720, 1.7395, 2.3183, 1.8012, 1.5125, 1.7654, 1.7184, + 1.5203, 1.7012, 1.6321, 1.7103, 1.5438, 1.6525, 1.4328, 1.5309, 1.6249, + 1.6052, 1.7142, 1.5251, 1.9031, 1.5546, 1.8293, 1.6872, 1.5897, 1.6912, + 1.2554, 1.8850, 1.5435, 1.6176, 1.5993, 1.6400, 1.6315, 1.5535, 1.7377, + 1.7238, 1.5547, 1.6823, 1.6606, 1.7936, 1.5944, 1.5299, 1.6171, 1.6747, + 1.7157, 1.5132, 1.4544, 1.6506, 1.6495, 1.6812, 1.6820, 1.6619, 1.6851, + 1.7541, 1.5222, 1.2278, 1.6402, 1.5469, 1.6362, 1.6224, 1.7885, 1.6684, + 1.6326, 1.7323, 1.8737, 1.5761, 1.5595, 1.5265, 1.6326, 1.7348, 1.6750, + 1.7892, 1.7402, 1.6306, 1.6256, 1.6073, 1.5526, 1.6445, 1.6588, 1.7198, + 1.7170, 1.4939, 1.5734, 1.4749, 1.4609, 1.4621, 1.6425, 1.6142, 1.6705, + 1.8619, 1.6656, 1.7224, 1.5495, 1.7225, 1.6603, 1.5858, 1.7914, 1.6040, + 1.6189, 1.7769, 1.5258, 1.5773, 1.5919, 1.5266, 1.6045, 1.7294, 1.5485, + 1.8233, 1.6096, 1.5402, 1.6347, 1.6637, 1.6981, 1.6240, 1.7442, 1.4918, + 1.7090, 1.6694, 1.6764, 1.5901, 1.5481, 1.5938, 1.6709, 1.6144, 1.5924, + 1.6912, 1.6638, 1.6623, 1.5187, 1.6067, 1.5926, 1.6297, 1.5314, 1.4781, + 1.9022, 1.6165, 1.5046, 1.5899, 1.6815, 1.5027, 1.6304, 1.6635, 1.6648, + 1.5609, 1.7298, 1.6707, 1.6422, 1.7757, 1.5534, 1.6288, 1.7241, 1.5718, + 1.7018, 1.6467, 1.6198, 1.8366, 1.6966, 1.6463, 1.5737, 1.8146, 1.8558, + 1.6408, 1.7075, 1.6411, 1.7522, 1.7252, 1.8811, 1.5192, 1.5296, 1.6011, + 1.6077, 1.7297, 1.5673, 1.4351, 1.3742, 1.6096, 1.6247, 1.8658, 1.6591, + 1.6070, 1.7442, 1.7427, 1.5629, 1.7752, 1.7397, 1.5951, 1.5998, 1.6218, + 1.6390, 1.6102, 1.6870, 1.6590, 1.6500, 1.6725, 1.5687, 1.8237, 1.7162, + 1.6969, 1.7778, 1.3082, 1.5581, 1.5859, 1.6857, 8.3553, 1.6262, 1.6234, + 1.6098, 1.6806, 1.6735, 1.5222, 1.5401, 1.6517, 1.5861, 1.6949, 1.6505, + 1.5054, 1.5938, 1.6957, 1.4191, 1.5484, 1.5930, 1.5796, 1.8634, 1.6881, + 1.6730, 1.8082, 1.7221, 1.7040, 1.8423, 1.5473, 1.6795, 2.0259, 1.5601, + 1.7674, 1.6799, 1.5424, 1.7275, 1.4993, 1.6710, 1.6352, 1.6293, 1.6699, + 1.5402, 1.5156, 1.5385, 1.5834, 1.7811, 1.6085, 1.4844, 1.5949, 1.6252, + 1.6161, 1.5671, 1.7266, 1.8965, 1.5759, 1.3900, 1.7972, 1.6964, 1.6084, + 1.7021, 1.7420, 1.6757, 1.8554, 1.6910, 1.7198, 1.8348, 1.6739, 1.6898, + 1.7649, 1.6479, 1.6634, 1.5334, 1.5332, 1.6019, 1.5840, 1.6878, 1.6768, + 1.7021, 1.6632, 1.5904, 1.5411, 1.7415, 1.5871, 1.6146, 1.4035, 1.7149, + 1.7371, 1.6836, 1.6548, 1.7445, 1.5904, 1.6646, 2.1296, 1.5593, 1.5757, + 1.7328, 1.5722, 1.5387, 1.8659, 1.5409, 1.7209, 1.5629, 1.8777, 1.8350, + 1.7118, 1.5701, 1.4138, 1.6292, 1.6653, 1.5712, 1.6760, 1.6174, 1.5633, + 1.5570, 1.6512, 1.6422, 1.4815, 1.8044, 1.6046, 1.6314, 1.5283, 1.4404, + 1.4932, 1.5224, 1.7141, 1.6128, 1.7097, 1.6152, 1.7305, 1.6738, 1.6606, + 1.6788, 1.6342, 1.7643, 1.5878, 1.5240, 1.6807, 1.4941, 1.7150, 1.6930, + 1.8136, 1.6147, 1.6509, 1.6260, 1.8039, 1.4894, 1.4515, 1.5909, 1.6387, + 1.6184, 1.6230, 1.6293, 1.7180, 1.6287, 1.6429, 1.6987, 1.8735, 1.8367, + 1.6882, 1.5426, 1.4757, 1.7773, 1.6352, 1.5406, 1.6494, 1.6965, 1.5874, + 2.3065, 1.5113, 1.3640, 1.5659, 1.5924, 1.5288, 1.6494, 1.6579, 1.7771, + 1.5794, 1.6293, 1.6200], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([-2.8834e-01, 6.7875e-01, -2.3783e-01, 7.2542e-01, 5.0365e-01, + -1.0250e-01, -3.9273e-01, 7.6085e-01, 2.8215e-01, 2.7522e-01, + 6.3696e-01, 3.5317e-02, 5.8776e-01, -8.9201e-02, -2.4734e-01, + 2.3635e-01, 6.3772e-01, -6.2483e-02, 8.0917e-01, -1.8566e+00, + 1.5441e-01, 2.7425e-01, 3.3344e-01, 1.9197e-01, 6.2422e-01, + -8.6932e-02, 8.8230e-02, 6.7806e-01, -6.2292e-01, 6.2146e-02, + 8.7037e-02, 4.9910e-01, 2.2075e-01, -1.1974e-01, -3.7742e-01, + -2.0259e-01, 1.1265e-01, -2.0129e-01, 3.3812e-01, -9.1389e-03, + -5.3591e-02, 3.6583e-01, -1.5610e-01, 4.3336e-01, -2.5268e-01, + 3.1972e-01, 1.0743e-01, -5.0063e-02, 3.2510e-01, -5.9961e-01, + 4.2868e-01, -2.2210e-01, 2.1076e-01, -1.3806e-01, -9.8569e-02, + -1.9991e-01, 2.0285e-01, -2.1988e-01, 1.2357e-01, 1.6476e-01, + 1.3403e-01, -2.3772e-01, 7.5840e-01, 4.1628e-01, 4.2165e-02, + 3.0577e-01, -4.3873e-01, -3.7216e-02, -1.1519e-01, -7.0516e-01, + -2.2037e-02, -5.4402e-01, 3.3992e-01, -1.9507e-01, -9.1473e-01, + 8.3870e-01, 4.1670e-01, -1.4147e-01, -1.6440e-01, 2.3323e-01, + 7.2164e-01, -6.6401e-02, -6.7743e-02, -5.1050e-01, 4.1789e-01, + 2.6773e-02, -8.0283e-02, 9.3218e-02, -4.2062e-01, -9.3459e-01, + 1.4858e-01, 1.4147e-01, 1.4865e-01, -3.5721e-01, 4.3898e-01, + -2.5401e-01, -2.9791e-01, 2.2632e-01, -4.6250e-01, -5.8804e-01, + 1.4498e-01, -8.5878e-01, 9.4009e-01, 3.6177e-01, 1.4910e-01, + 3.1275e-01, 5.7658e-01, 7.6616e-01, 8.8667e-02, 8.2457e-02, + -3.0712e-01, -1.1602e-01, 2.3959e-01, -2.5542e-01, 1.0640e-01, + -1.2604e-01, -1.1610e-01, -1.9213e-01, -3.2760e-01, -3.4821e-01, + -6.4125e-01, 3.7060e-01, 4.4295e-01, -1.2739e-01, 7.0215e-01, + 2.1715e-01, -1.5359e-01, -5.8012e-01, -3.6162e-02, -9.7989e-01, + 1.4529e-01, 7.3714e-01, 2.3299e-01, 8.8725e-01, -2.6198e-01, + 2.6619e-01, -1.0543e-01, -4.9063e-01, 5.4750e-02, 2.5521e-01, + 1.1565e-01, -1.9963e-01, 4.9922e-03, 3.8230e-01, 6.4455e-01, + 1.7291e-01, -4.4966e-01, 3.1876e-02, -2.6282e-01, -5.4284e-01, + -7.9918e-01, -1.7874e-01, 1.0447e+00, 2.4432e-01, -8.6598e-02, + -5.1222e-01, 1.6548e-02, 6.6075e-02, -1.4376e-01, 1.7247e-01, + 1.8433e-01, 2.0584e-01, 2.7407e-01, -7.5933e-02, 3.4370e-02, + 3.1634e-01, -4.4396e-01, -4.0123e-01, 2.0246e-01, 2.2675e-01, + 7.3490e-01, -6.0053e-02, 7.2985e-01, -2.9349e-01, -4.5335e-02, + 2.3239e-01, -3.9335e-01, -1.4336e-01, -9.0372e-02, -2.1095e-01, + 5.9891e-01, -1.2639e-01, -2.8169e-01, 3.9472e-01, -3.3182e-01, + 3.5402e-01, -4.3384e-01, -6.2287e-01, 1.0957e+00, 3.1057e-01, + 5.0804e-02, -4.2299e-02, 4.6876e-01, 2.7346e-01, 4.2902e-02, + 1.6550e-01, 7.2328e-02, -4.3305e-02, 6.2557e-01, -7.6766e-01, + -5.2920e-02, 6.5177e-01, -1.5345e-01, 3.7510e-01, 3.6323e-01, + 6.0274e-01, -2.2344e-01, 9.2344e-02, -2.7282e-01, 2.9311e-01, + 9.2230e-01, 9.3975e-02, 8.5421e-01, -6.6581e-01, -4.6468e-02, + -3.2008e-01, 6.3147e-01, -9.3217e-01, 4.6425e-03, 5.7329e-02, + -8.6024e-02, 3.8421e-01, 4.3569e-01, -8.3151e-02, -5.1909e-01, + -2.5093e-01, -1.6634e-01, -3.0899e-01, -4.7583e-01, 6.6807e-01, + 4.9516e-02, 2.1708e-01, -5.3353e-02, 3.1828e-01, 1.2399e-01, + 5.4984e-01, 8.3559e-02, -3.9472e-01, -3.6320e-02, -5.7928e-01, + -1.9020e-01, -2.7261e-01, -4.0178e-01, -7.3205e-02, -1.8186e-02, + -3.6264e-01, -3.8185e-02, -6.0324e-01, 3.9541e-01, 2.4023e-01, + -5.1377e-01, -2.2388e-01, 9.4389e-02, -4.5919e-01, -1.6421e-01, + 8.8032e-01, -8.8406e-02, -8.6900e-01, -2.0488e-01, -3.9097e-01, + 7.3720e-01, 4.7167e-01, 4.7704e-01, -4.1401e-01, -6.6253e-01, + -5.8830e-01, -2.9232e-01, -1.3623e-01, 3.8029e-01, -1.6702e-01, + 9.0093e-01, -6.3571e-01, 2.2402e-01, 2.1330e-01, -3.2586e-01, + -5.9196e-02, 5.7648e-02, -1.0256e+00, 1.1868e-01, 2.8840e-01, + -1.7680e-01, -1.5109e-01, 9.8570e-01, -3.1493e-01, 1.3815e-01, + -6.8671e-01, 4.4593e-01, -2.5520e-01, 5.3355e-01, -1.8861e-01, + -1.9878e-01, 7.3772e-01, -1.4706e-01, 2.5136e-01, -6.4688e-01, + -5.8466e-01, 1.0780e-01, -2.1876e-01, -2.6948e-01, -3.7949e-02, + 1.6250e-01, -6.8278e-02, -1.8151e-01, 5.4320e-01, 6.2123e-01, + -5.9731e-01, -1.4939e-01, 8.1843e-02, -5.4720e-02, -6.0758e-01, + 5.5542e-01, 5.7590e-01, 3.9174e-01, 1.5900e-01, -3.9861e-01, + -1.9958e-02, 1.1678e-01, 5.5287e-01, -1.1611e-01, 1.8584e-01, + 5.0934e-01, -1.3474e-01, 4.3069e-01, 1.6100e-01, 1.6451e+00, + 3.0039e-01, -2.2575e-01, 3.3777e-01, -2.8258e-01, 2.4884e-02, + -3.2205e-01, 5.5202e-02, 4.3987e-02, -1.9287e-01, -5.9822e-01, + -9.0719e-01, -4.0716e-01, -6.1424e-01, -5.6892e-01, 4.0417e-02, + -8.3440e-01, 3.1679e-01, -2.0134e-01, 6.8621e-01, 4.4656e-01, + -1.7529e-01, 1.1721e-01, -1.9876e-01, -3.1152e-01, 5.6802e-01, + 3.2369e-01, 6.3882e-02, 1.6577e-02, 4.6248e-02, 2.0744e-01, + 2.0860e-01, 1.0150e+00, -3.7986e-01, 9.3586e-02, 3.7668e-01, + -6.6433e-01, 1.0799e+00, -2.7410e-01, -1.8421e-01, -1.7920e-01, + -2.0334e-02, 3.3188e-01, 5.9764e-03, -2.9039e-01, -7.2283e-02, + -8.0281e-01, -1.1115e-01, 1.4681e-01, -1.6251e-01, 2.9035e-01, + -3.0332e-01, -7.9299e-01, 4.0145e-01, -3.1173e-01, -3.3306e-01, + -1.5556e-01, 8.4179e-01, 6.8650e-01, 4.9622e-01, -6.3749e-01, + -3.2113e-01, 5.1397e-01, -3.7570e-01, 4.3522e-01, -3.2065e-01, + 1.0057e-01, 1.7216e-01, 7.9660e-01, 3.2077e-01, 4.9979e-01, + 4.9811e-01, 5.9768e-02, -8.0808e-02, -1.3962e-01, 6.0569e-02, + 1.5873e-01, 1.0248e+00, -7.9364e-01, -5.7126e-01, -4.9706e-01, + -2.4641e-01, 1.4800e-01, -3.5420e-01, 2.5300e-01, 3.1038e-01, + -3.8231e-01, -2.1472e-01, -4.8862e-01, 1.2613e-01, -4.8684e-01, + -1.1218e+00, 4.5354e-01, 7.3050e-02, -7.5414e-01, 6.2755e-03, + 1.9597e-01, -1.7957e-01, -9.1913e-01, -9.3182e-02, 5.8686e-01, + -4.8479e-01, 2.5590e-01, 1.0595e+00, 2.7002e-01, 7.2647e-01, + 5.1018e-01, 1.9252e-01, 8.9410e-03, 3.9983e-01, -9.9736e-02, + -4.6141e-01, 1.7405e-01, 2.8290e-01, 6.3410e-01, 6.8365e-01, + 8.1244e-01, 5.7984e-01, 4.0560e-01, 3.8197e-02, -4.1310e-01, + 4.8830e-01, -5.7483e-01, 4.4491e-01, -5.7979e-01, -5.8200e-01, + -7.5095e-02, 4.5962e-01, -7.1681e-01, 9.5295e-02, 6.2722e-01, + 2.8345e-01, 2.8936e-01, 2.2348e-01, 2.3233e-01, -8.2202e-02, + 1.9226e-01, 1.9503e-02, 4.2562e-01, -5.9108e-01, -2.3255e-01, + 4.6791e-01, -4.6673e-01, 6.5900e-01, -6.2156e-01, 1.9633e-01, + 1.7809e-01, 5.3349e-01, -6.9414e-02, 9.1718e-01, 6.4407e-01, + 5.0983e-01, 2.0029e-02, 7.9499e-01, 2.8185e-02, -1.5870e-01, + -6.7845e-02, 4.3046e-01, 1.2244e-01, -7.2143e-01, 4.3565e-01, + -2.2752e-01, 8.3008e-02, 7.6862e-02, 3.0861e-01, 3.5815e-01, + 1.2071e-02, -2.8832e-01, -5.2904e-01, 6.2650e-02, 1.1320e+00, + -2.3723e-01, -2.9065e-01, -2.4751e-01, -9.2235e-01, -6.6487e-02, + 1.7130e-01, 3.2095e-02, 3.2017e-01, -2.9495e-01, 1.4542e-01, + -4.9789e-01, -4.8924e-01, -1.7171e-01, 2.4293e-01, -3.5534e-01, + 1.9945e-01, 4.8569e-02, 6.2335e-01, 4.7964e-01, 2.9458e-01, + 1.5449e-01, -6.4109e-01, 1.3690e-01, 3.9368e-01, 1.4828e-01, + 1.4641e-01, 4.3600e-01, 1.0510e-01, -5.8869e-02, 6.5924e-01, + 3.5055e-02, -5.6399e-01, 4.5772e-01, 2.2873e-01, -2.3484e-01, + -3.7558e-01, 8.9041e-01, -2.2871e-01, -3.6462e-01, -1.2101e-01, + -2.2578e-01, -1.3908e-01, 7.4223e-02, 3.0451e-01, 1.8741e-02, + 3.2846e-01, -4.0961e-01, -3.4544e-01, -6.2249e-01, 2.4417e-01, + -2.0547e-01, 1.7231e-01, 3.1831e-01, 6.4965e-01, 3.9325e-01, + -8.2503e-01, 6.0124e-01, 2.8477e-01, -1.9307e-01, 5.5779e-01, + -9.2765e-02, 1.7029e-02, -4.8561e-01, 2.5786e-01, -2.5821e-01, + 3.1338e-01, -1.2206e-01, 6.9799e-01, -3.6036e-01, 4.3696e-01, + 9.1485e-01, 1.1678e+00, 1.5267e-01, 6.1679e-01, 4.5000e-04, + -8.2172e-01, -8.6353e-01, -8.9458e-02, 7.0973e-01, -1.7341e-01, + -6.2408e-01, 1.0639e-01, -6.4188e-02, -1.0023e-01, 4.2501e-01, + 2.7844e-01, -1.3440e-01, -4.0398e-01, -7.8034e-01, -2.8684e-01, + 2.7194e-01, 1.2444e-01, 4.0351e-01, -7.6288e-02, -1.9768e-01, + -4.2178e-01, 4.4424e-01, 1.3352e-01, -9.9392e-02, 6.9779e-01, + 1.6888e-01, 1.0123e+00, -6.2880e-01, 4.5987e-01, 4.4244e-01, + -5.3807e-01, 5.4973e-01, -1.0677e-01, -4.1490e-02, 2.9562e-01, + 5.8947e-02, 2.2215e-02, -6.4694e-01, 7.8909e-02, -2.7255e-01, + 3.1557e-01, 6.9443e-01, -5.6689e-01, -5.3598e-01, 1.7104e-01, + -2.7369e-01, 3.5588e-01, 3.2884e-01, -1.1530e-03, 4.7344e-01, + -2.1759e-01, -5.2551e-01, -3.0974e-02, 1.6733e-01, 3.1815e-01, + -1.3519e-01, 1.9444e-01, 5.8109e-01, -5.5995e-02, 1.9733e-01, + -4.8083e-01, 1.2766e-01, 3.8752e-01, -6.7796e-01, 2.5224e-01, + 2.2199e-01, -2.1854e-01, -5.9393e-03, 3.2313e-01, 2.8912e-01, + 9.3591e-02, 9.0599e-02, -5.3521e-01, 1.5640e-01, -1.7252e-01, + 3.9477e-01, 3.5321e-01, 8.6974e-02, -2.4951e-01, 3.8474e-01, + 1.4969e-01, -4.9545e-01, 8.4216e-01, 7.0573e-02, -3.3233e-01, + 2.1082e-01, 3.2282e-01, 1.4043e-01, -2.4110e-01, -2.8888e-01, + 2.5481e-01, -6.9019e-01, 2.9674e-01, 1.1068e-01, -2.5481e-01, + 1.7578e-01, -1.7300e-01, -2.6870e-01, -1.2870e-01, -3.5365e-01, + 4.1577e-02, 9.9443e-02, -1.6780e-01, -1.1617e-01, 1.6097e-01, + 4.3591e-01, 6.7614e-01, -3.5089e-02, 2.8122e-02, 2.7661e-01, + 3.7887e-01, 1.0609e+00, 7.0093e-01, -7.1668e-03, 7.7913e-01, + -4.2864e-02, 4.7460e-01, -1.0076e+00, 1.0556e-01, -1.8160e-01, + -7.2417e-02, 1.0140e+00, -3.5696e-01, -7.3666e-01, -2.3020e-01, + -3.2148e-01, 5.2983e-01, -3.2121e-01, 1.4286e-02, 6.7987e-01, + -6.5973e-01, -2.0075e-01, 3.7226e-01, 1.6172e-01, -3.3971e-01, + -1.9281e-01, -5.0010e-02, 9.4485e-01, -1.0869e-01, 6.2613e-01, + -1.2510e-01, 1.0464e+00, -5.2931e-01, 6.5617e-02, 1.1157e-01, + 2.1925e-01, 7.1122e-02, -5.7915e-01, -2.9074e-01, -1.2102e-01, + -1.3839e-01, -1.4838e-01, 4.2684e-02, -8.2936e-02, -1.4279e-01, + -1.2063e-02, 3.2719e-01, -4.2521e-01, -2.4905e-01, -4.9499e-02, + -5.2031e-02, -1.7989e-01, -4.2499e-02, 6.8209e-01, -1.5275e-01, + 4.2563e-01, 6.6697e-02, 1.5147e-01, 1.8749e-01, 8.2221e-01, + -2.5794e-01, 8.3046e-01, 2.4105e-01, 6.1676e-01, 3.9542e-01, + -2.3539e-01, -3.6405e-01, -6.6711e-02, -2.9740e-01, 9.6638e-02, + -4.3095e-01, 1.6043e-01, 8.7313e-02, 5.2172e-01, -1.8327e-01, + -8.9197e-02, -1.9214e-01, 2.0663e-01, -3.8714e-01, 3.1245e-01, + -5.1035e-02, -1.3826e-01, 2.8931e-01, 8.6621e-01, 3.4779e-01, + 6.2924e-01, 5.0303e-01, -1.4128e-02], device='cuda:1', + requires_grad=True) +torch.Size([2304, 768]) +Parameter containing: +tensor([[ 0.0033, -0.0180, 0.0063, ..., 0.0171, 0.0053, 0.0176], + [ 0.0050, 0.0288, 0.0542, ..., 0.0377, 0.0121, -0.0257], + [ 0.0002, -0.0528, 0.0353, ..., 0.0037, 0.0121, 0.0060], + ..., + [ 0.0066, 0.0045, 0.0136, ..., 0.0031, 0.0118, -0.0052], + [-0.0037, 0.0018, -0.0075, ..., 0.0004, -0.0041, 0.0008], + [-0.0034, 0.0127, -0.0073, ..., 0.0064, -0.0214, -0.0094]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([2304]) +Parameter containing: +tensor([ 0.3430, -0.0836, 0.0424, ..., -0.0128, -0.0226, 0.0145], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 768]) +Parameter containing: +tensor([[ 0.0056, -0.0199, 0.0132, ..., -0.0029, 0.0242, -0.0021], + [ 0.0218, 0.0037, 0.0028, ..., -0.0120, 0.0019, 0.0024], + [ 0.0007, -0.0039, -0.0249, ..., -0.0206, 0.0182, 0.0032], + ..., + [-0.0161, -0.0262, -0.0194, ..., 0.0123, 0.0224, 0.0002], + [-0.0050, -0.0197, 0.0011, ..., -0.0005, 0.0069, 0.0091], + [ 0.0174, -0.0362, -0.0287, ..., -0.0099, -0.0143, 0.0133]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 3.0563e-02, 1.5022e-02, -2.1133e-02, 1.6357e-02, -6.0081e-03, + -1.1963e-02, 3.7994e-02, 4.7791e-02, -1.2894e-02, 1.1734e-02, + -2.5070e-02, 1.4830e-04, 5.0049e-03, 1.2337e-02, -3.2715e-02, + 4.9316e-02, 1.1742e-02, 3.3302e-03, 5.0724e-05, 4.1748e-01, + -2.4628e-02, -1.8173e-02, -3.1830e-02, -2.8961e-02, -3.6240e-03, + -9.0256e-03, 1.6846e-02, 2.3712e-02, 5.6335e-02, -2.2003e-02, + 2.4048e-02, -2.3163e-02, -1.5518e-02, -1.2665e-02, -7.5340e-03, + 2.7512e-02, 7.3128e-03, 1.2070e-02, -2.4948e-02, -2.6566e-02, + -3.5828e-02, 2.5146e-02, -5.1208e-02, 2.1088e-02, -4.1779e-02, + 6.1913e-03, 5.8746e-02, -4.7882e-02, -1.5106e-03, 4.1168e-02, + 8.9417e-02, -3.4241e-02, 6.4354e-03, 2.9251e-02, 6.4049e-03, + 4.6082e-02, 1.7426e-02, -8.8654e-03, 2.8778e-02, -1.1444e-02, + -2.6718e-02, 6.9618e-03, -1.6754e-02, 4.9713e-02, -6.6467e-02, + -6.0081e-03, -9.1858e-03, 1.0658e-02, -1.7029e-02, -1.0211e-01, + -1.3550e-02, 7.3013e-03, 3.1830e-02, -5.3711e-02, -2.3651e-03, + -4.2297e-02, 2.1667e-02, 1.8265e-02, -7.1167e-02, -1.6129e-02, + -3.0380e-02, 4.4189e-02, 1.9302e-02, -2.7542e-02, 2.7588e-02, + 1.2352e-02, -6.1981e-02, 1.2680e-02, -2.4841e-02, -8.7976e-04, + 2.7847e-02, -8.7280e-03, -2.4399e-02, 4.6173e-02, 3.9154e-02, + -2.4811e-02, 2.4765e-02, -3.8788e-02, 2.2308e-02, 4.7363e-02, + 5.9845e-02, -1.8115e-01, -3.4698e-02, -2.8183e-02, -1.3344e-02, + 8.3303e-04, 2.7466e-02, 8.1558e-03, -9.5081e-04, 3.9001e-02, + -2.9678e-03, -1.5572e-02, 1.2978e-02, 2.1011e-02, 1.3420e-02, + -3.3478e-02, 2.9922e-02, -1.8646e-02, 6.6467e-02, -3.5191e-04, + 4.0405e-02, -2.4429e-02, -7.8430e-03, 3.7262e-02, -4.1382e-02, + 9.2163e-03, 3.3844e-02, 2.5925e-02, -3.6041e-02, 3.5614e-02, + -1.3481e-02, 1.8936e-02, 1.7715e-02, -1.8723e-02, 4.6417e-02, + 2.0935e-02, -7.8659e-03, 3.4393e-02, 4.5288e-02, -3.3875e-02, + -1.4618e-02, -2.1305e-03, -8.7891e-03, -7.1411e-02, -3.6316e-02, + -2.5311e-03, 3.0899e-02, -2.4307e-02, -1.3008e-02, 5.2917e-02, + 3.5004e-02, 4.5197e-02, 1.9623e-02, -1.0841e-02, -2.4529e-03, + 3.7201e-02, 3.5339e-02, 1.3718e-02, 1.1375e-02, -2.6093e-02, + -3.7518e-03, -1.6266e-02, 1.4168e-02, -6.6345e-02, -2.2766e-02, + -1.7738e-04, -9.1370e-02, -1.2688e-02, -1.9135e-02, -2.5482e-02, + -2.1301e-02, -3.9154e-02, -1.4015e-02, 5.5115e-02, 1.1574e-02, + -4.7646e-03, 1.2192e-02, 2.0466e-03, -7.8430e-03, -2.8397e-02, + 6.3667e-03, 5.3024e-03, -6.4392e-03, -3.0655e-02, 3.6640e-03, + 3.9307e-02, 3.6133e-02, -7.0984e-02, 3.3264e-02, -2.8553e-03, + 3.6430e-03, 2.4078e-02, -3.6438e-02, 4.3945e-03, -5.9891e-03, + -6.0608e-02, 6.8283e-03, -2.0050e-02, -1.4502e-01, -5.0232e-02, + -2.8580e-02, 5.2216e-02, 3.1952e-02, -1.9638e-02, -1.4832e-02, + 1.3763e-02, 9.1980e-02, 3.2330e-03, 2.6703e-02, 1.0635e-02, + -2.0584e-02, -9.4528e-03, 1.0681e-02, 2.6474e-02, 5.6305e-03, + 8.0414e-03, 4.0321e-03, 4.0771e-02, 2.0218e-02, 2.9612e-04, + 2.7405e-02, -1.2093e-02, 1.5450e-02, 6.4888e-03, 1.6556e-02, + -2.0569e-02, -2.1271e-02, -9.4910e-03, 7.6942e-03, 1.9348e-02, + 7.6714e-03, 1.1742e-02, 9.8877e-03, -1.1467e-02, -4.6204e-02, + 2.9160e-02, 1.8372e-02, 2.3438e-02, 2.8290e-02, -3.2745e-02, + 1.2901e-02, -2.0935e-02, 3.3894e-03, -1.1246e-02, -2.3041e-02, + 1.7990e-02, 3.5400e-02, -1.7273e-02, 1.7090e-02, -4.0674e-04, + -1.7609e-02, 5.5328e-02, 3.3512e-03, -8.0795e-03, -2.8442e-02, + -4.3907e-03, -1.1787e-02, 6.7329e-03, 1.2016e-02, 7.5836e-03, + -2.9266e-02, 5.1147e-02, 4.1656e-02, -1.0422e-02, -1.1787e-03, + 6.7322e-02, -4.6326e-02, 3.3722e-02, 2.8801e-03, -4.9500e-02, + 3.8391e-02, -5.7297e-03, -4.5227e-02, -1.4793e-02, -3.0579e-02, + -9.2926e-03, 1.5287e-03, -1.3947e-02, -2.8183e-02, 1.8951e-02, + -9.3555e-04, -8.1177e-03, -3.7781e-02, 9.2087e-03, -2.1896e-02, + 9.0256e-03, 2.8885e-02, 1.8280e-02, -1.7990e-02, -3.0457e-02, + 1.5869e-02, 6.1417e-03, 2.1851e-02, -1.4830e-03, 5.3619e-02, + 1.2711e-02, 3.5950e-02, 1.3435e-02, 1.3573e-02, -1.9135e-02, + -1.1078e-02, 1.0445e-02, 2.7786e-02, -3.2501e-02, 2.6264e-03, + -1.0414e-02, -2.8107e-02, -5.7465e-02, -1.2390e-02, -1.8738e-02, + -9.8190e-03, -4.6570e-02, 4.6875e-02, -1.9806e-02, -4.6082e-03, + -6.2683e-02, -2.6245e-02, -2.0050e-02, -1.5404e-02, 4.7302e-03, + 5.3375e-02, 4.2847e-02, -4.5746e-02, 8.3160e-03, 1.4453e-01, + 2.1973e-02, 5.1758e-02, -6.7200e-02, -3.7098e-03, 3.9825e-02, + 7.1449e-03, -7.8354e-03, -2.8580e-02, 4.2755e-02, 5.2719e-03, + 9.1705e-03, -5.1117e-03, 1.6525e-02, 1.8982e-02, 9.0332e-03, + -2.6703e-02, -1.9089e-02, -2.3727e-02, 1.7212e-02, -9.4757e-03, + -3.8849e-02, -2.0828e-02, -8.3313e-03, -1.8707e-02, -3.2562e-02, + 9.6970e-03, 6.1150e-03, 2.6825e-02, 8.3740e-02, -2.2720e-02, + -3.9642e-02, -4.4365e-03, -2.4857e-02, 3.0396e-02, 8.9111e-03, + 5.9875e-02, 2.7298e-02, -3.1921e-02, -6.9160e-03, 1.5610e-02, + 3.0518e-02, 3.3295e-02, -3.5187e-02, -4.2023e-02, -8.5297e-03, + -3.5583e-02, -1.6861e-02, 4.6265e-02, -7.0557e-02, -3.6499e-02, + 2.5085e-02, 2.0981e-02, 3.0685e-02, 2.8503e-02, -7.3586e-03, + 3.8544e-02, 5.4199e-02, 2.5284e-02, -1.6983e-02, 4.5074e-02, + 5.7831e-03, -2.1332e-02, 6.0654e-03, 6.9771e-03, -6.9389e-03, + -2.3346e-02, -3.6377e-02, -2.4643e-02, -3.4943e-02, 5.5267e-02, + -2.8488e-02, 4.8889e-02, 5.4207e-03, 3.5492e-02, -1.7471e-02, + 1.9699e-02, -3.4058e-02, -1.1841e-02, 3.0594e-02, -1.9592e-02, + 9.1324e-03, 1.2169e-02, -4.5105e-02, -6.2622e-02, -2.1561e-02, + -2.1648e-03, -1.4114e-02, -2.3193e-02, 2.5360e-02, 1.1749e-01, + 1.9470e-01, 1.6983e-02, 3.8239e-02, -5.1300e-02, -1.4488e-02, + -8.2169e-03, -2.2324e-02, 2.3209e-02, 5.4199e-02, -1.3283e-02, + 2.9816e-02, 1.4275e-02, -1.6251e-02, 5.5504e-03, -2.1973e-03, + 8.6136e-03, -1.1513e-02, 2.7313e-02, 3.3966e-02, -3.6926e-02, + 1.5045e-02, 4.6631e-02, 4.9591e-03, -5.6152e-02, -1.8768e-02, + 8.4076e-03, -6.5308e-03, 2.0355e-02, -1.8872e-01, -2.5024e-02, + -3.5156e-02, 8.7128e-03, 3.6736e-03, 2.4811e-02, -3.6407e-02, + 2.1194e-02, -7.6599e-03, 4.4098e-02, -2.0309e-02, 2.6226e-03, + 1.0490e-03, 3.2990e-02, -7.0435e-02, -2.0275e-03, -2.9583e-03, + -3.7823e-03, 7.5531e-03, 2.1912e-02, -3.1834e-03, -3.5736e-02, + 1.2581e-02, 3.1799e-02, -2.8992e-02, 1.6189e-04, 4.4220e-02, + -7.6485e-03, 6.2294e-03, -2.1684e-04, 1.3824e-02, 3.4607e-02, + 1.7349e-02, 2.7435e-02, 2.9144e-02, 1.6586e-02, 4.3182e-02, + 1.6373e-02, 3.4088e-02, -2.6337e-02, -2.5360e-02, -2.1591e-02, + 1.2138e-02, 3.3966e-02, -1.6785e-02, 3.3016e-03, 4.3030e-02, + 5.4352e-02, -1.6661e-03, -2.3773e-02, 6.8779e-03, -2.1927e-02, + -1.1436e-02, 2.0020e-02, -8.5449e-04, -3.5370e-02, -8.2445e-04, + 1.9207e-03, -5.0293e-02, -2.7145e-02, 2.3254e-02, -6.4125e-03, + -6.0425e-02, 4.0527e-02, -1.0811e-02, -4.1313e-03, 9.3842e-03, + 1.8646e-02, 3.0502e-02, -1.2569e-03, 9.6436e-03, -6.1464e-04, + 1.0376e-02, -8.2855e-03, 5.0323e-02, 3.0655e-02, -3.9795e-02, + -2.5692e-03, 1.4099e-02, -1.9882e-02, 2.7069e-02, -3.4363e-02, + -1.5350e-02, 2.1240e-02, -3.4088e-02, 1.7563e-02, 2.0416e-02, + -4.5433e-03, -1.6800e-02, 2.2903e-02, 1.9836e-02, 3.5645e-02, + -2.2095e-02, -2.8473e-02, 3.1586e-02, 3.9978e-03, 1.6434e-02, + -2.5055e-02, -2.1393e-02, -3.7048e-02, 3.2928e-02, -7.7576e-02, + 1.9775e-02, -3.0182e-02, -3.0579e-02, -2.3712e-02, 4.1718e-02, + -5.3955e-02, -1.2665e-02, 2.3529e-02, 1.5541e-02, 3.0777e-02, + -3.0563e-02, 2.6566e-02, 3.1586e-02, 9.0561e-03, -1.9897e-02, + -4.0474e-03, 4.6295e-02, 2.7832e-02, -2.1439e-02, 6.4575e-02, + -1.9226e-02, 1.3092e-02, 1.3664e-02, -4.5532e-02, 2.2354e-02, + -2.8320e-02, 1.1826e-02, 4.0649e-02, 1.7624e-02, 3.6804e-02, + 1.0056e-02, 1.4320e-02, 8.7585e-03, -3.9673e-03, 9.0714e-03, + 1.8906e-02, 5.5115e-02, 8.3923e-03, 1.2184e-02, -6.7062e-03, + 1.6266e-02, 5.0659e-02, 1.7426e-02, 4.1351e-02, 1.8711e-03, + 6.3362e-03, -2.0966e-02, 2.3327e-03, 4.7058e-02, 3.5553e-02, + -1.1131e-02, -4.3274e-02, -2.3331e-02, -6.9618e-03, -1.7822e-02, + -3.7415e-02, -2.8198e-02, -2.1149e-02, -6.0141e-05, -5.2704e-02, + 5.2567e-03, -5.4382e-02, 1.0933e-02, -5.8807e-02, 1.4450e-02, + 6.4926e-03, -5.9418e-02, 2.5940e-02, 2.9053e-02, 2.6855e-03, + 2.2385e-02, 5.7793e-03, 1.7624e-02, -9.3613e-03, -1.0327e-01, + -9.1858e-03, 2.5253e-02, 2.8381e-02, 2.8564e-02, -1.3519e-02, + 2.2125e-02, -8.2245e-03, 3.4882e-02, -2.5787e-02, -1.0826e-02, + 1.3245e-02, -1.4206e-02, -3.7445e-02, -4.8218e-02, -4.0039e-02, + 4.6753e-02, -5.1069e-04, -2.1774e-02, 3.0930e-02, -6.6406e-02, + -2.5925e-02, -3.2410e-02, 7.2266e-02, -4.3671e-02, 9.2010e-03, + 3.1555e-02, 2.4765e-02, -6.3229e-04, -5.9891e-03, 2.8934e-03, + -2.4963e-02, -4.7607e-03, 7.7515e-03, -2.6108e-02, 7.8430e-03, + -2.4933e-02, -4.3518e-02, 3.5839e-03, 2.2598e-02, 1.8845e-02, + -1.0582e-02, -1.5945e-02, 3.8834e-03, 2.5909e-02, 6.1249e-02, + 2.2537e-02, 1.1930e-03, 4.0375e-02, -1.0376e-02, -1.4725e-02, + 2.5299e-02, 3.1250e-02, -9.2773e-03, -7.6294e-03, -3.4058e-02, + 3.1647e-02, -3.0411e-02, 1.0216e-02, -4.1870e-02, -2.3346e-03, + -1.7715e-02, 1.6321e-01, -1.7548e-02, 1.6037e-02, -5.9052e-02, + 8.2474e-03, 4.6967e-02, 1.0948e-02, -2.3087e-02, -3.6835e-02, + -2.7222e-02, -6.8115e-02, 1.6403e-02, 1.6785e-02, -1.6510e-02, + -5.9547e-03, 2.3270e-02, -5.1849e-02, -2.9083e-02, -5.3223e-02, + 1.1513e-02, 1.6281e-02, 6.8741e-03, 3.1982e-02, -2.7908e-02, + 2.6627e-02, 4.6051e-02, -1.7532e-02, -5.6534e-03, -9.1782e-03, + 2.6505e-02, 2.7390e-02, -1.2978e-02, -1.3390e-02, -2.7512e-02, + -4.7852e-02, -3.6346e-02, 1.0300e-02, -1.5511e-02, -3.6133e-02, + -4.7821e-02, -1.3428e-02, -6.1493e-03, -8.7051e-03, 2.0996e-02, + 1.2634e-02, 2.6264e-03, 2.4139e-02, 9.1267e-04, -3.4637e-03, + 8.2321e-03, -4.6997e-02, -3.5278e-02, 6.2523e-03, 2.5005e-03, + 1.4305e-02, -1.4420e-02, -7.4816e-04, 1.2001e-02, 4.5166e-03, + -4.9194e-02, 1.4847e-02, 2.1591e-03, -1.6916e-04, 1.1780e-02, + 4.7058e-02, 4.8462e-02, -1.8280e-02, -3.1776e-03, -3.6560e-02, + -5.1239e-02, 1.7960e-02, 9.1019e-03, -3.3142e-02, -1.5480e-02, + -1.3832e-02, -1.0956e-01, -3.7975e-03, -5.7343e-02, -1.4809e-02, + -9.6893e-03, 4.9866e-02, -5.1880e-02, 4.2175e-02, -2.3911e-02, + 1.9703e-03, -2.2034e-02, -3.8261e-03], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([1.0895, 1.1491, 1.1280, 1.0222, 1.1739, 1.0663, 1.1026, 1.1196, 1.0610, + 1.0362, 1.0344, 1.1302, 1.1394, 1.1145, 0.9793, 1.2097, 1.0334, 1.2245, + 1.1241, 0.2753, 1.0395, 1.0472, 1.1671, 1.0659, 1.0802, 1.1507, 1.0881, + 1.0900, 1.1905, 1.2015, 1.1597, 1.0814, 1.0713, 1.1395, 1.1696, 1.3321, + 1.0179, 1.0746, 1.1628, 1.0975, 1.0572, 0.9831, 1.1066, 1.0873, 1.2887, + 1.0379, 1.0448, 1.2154, 1.0990, 1.0652, 1.1241, 1.1402, 1.1011, 1.1263, + 1.1139, 0.9162, 1.0919, 1.2903, 1.1040, 1.1012, 1.1183, 1.0331, 1.1440, + 1.1119, 1.1228, 1.1293, 0.9695, 1.2707, 1.1637, 1.2276, 1.0280, 1.0591, + 2.1889, 1.0572, 1.1401, 1.0796, 1.1198, 1.0126, 1.1814, 1.0254, 1.1224, + 1.1836, 1.1246, 1.0903, 1.2133, 1.1445, 1.1274, 1.1002, 1.1227, 1.1008, + 1.1070, 1.1492, 1.1207, 1.1143, 1.1237, 1.0610, 1.0389, 1.1207, 1.0998, + 1.1228, 1.1145, 1.0209, 1.0081, 1.0739, 1.1013, 1.1098, 1.0723, 1.2898, + 1.1376, 1.1546, 1.0668, 1.0175, 0.9574, 1.0351, 1.1479, 1.0740, 1.1223, + 1.1233, 1.1137, 1.1355, 1.1567, 1.0250, 1.1973, 1.0578, 1.1464, 1.1832, + 1.0966, 1.0225, 1.1626, 1.0823, 1.0742, 1.0992, 1.0003, 1.0753, 1.1261, + 1.1817, 1.2336, 1.2113, 1.0655, 1.0400, 1.0588, 1.0387, 1.1333, 1.0340, + 1.0439, 1.0065, 1.0902, 1.1669, 1.0910, 1.1070, 1.1481, 1.1906, 1.0798, + 1.1427, 1.1982, 0.9619, 1.2056, 1.1212, 1.1140, 1.0979, 1.1636, 1.1301, + 1.0640, 1.0920, 1.1086, 1.0510, 1.1392, 1.1131, 1.1167, 1.1602, 1.0656, + 1.1571, 1.0212, 1.1582, 1.1673, 1.1256, 1.0929, 1.2417, 1.1797, 1.0986, + 1.1346, 1.2516, 1.1074, 1.0885, 1.2958, 1.1755, 1.0924, 1.5735, 1.1752, + 1.1061, 1.2396, 0.9582, 1.1471, 1.0999, 1.1809, 1.0361, 1.0556, 1.1514, + 4.4042, 1.1847, 0.9946, 1.0696, 1.2486, 1.2341, 1.0585, 1.1061, 1.1108, + 1.0926, 1.1026, 1.2013, 1.1338, 1.1785, 1.0487, 1.1234, 1.1309, 1.1504, + 1.0983, 1.0937, 1.1015, 1.1060, 1.1004, 1.1597, 1.0480, 1.0293, 1.1476, + 1.1468, 1.0856, 1.1552, 1.1258, 1.1367, 1.0792, 1.1552, 1.2199, 1.0825, + 1.0273, 1.0731, 1.1292, 1.0727, 1.1044, 1.0763, 1.1227, 1.1270, 1.2005, + 1.1391, 1.2601, 1.0759, 0.9800, 1.0601, 1.0598, 1.1195, 1.1037, 1.1359, + 0.9948, 1.0792, 1.1656, 1.0665, 1.0727, 1.0877, 1.0512, 0.9722, 1.0822, + 0.9968, 1.0633, 1.0913, 1.1195, 1.1480, 1.0806, 1.1148, 1.1943, 1.1331, + 1.2171, 1.1391, 1.0036, 1.0460, 1.0607, 1.0629, 1.1052, 1.1523, 1.1389, + 1.0923, 1.0779, 1.2070, 1.1827, 1.1731, 1.1759, 1.0740, 1.1694, 1.0501, + 1.1126, 1.0367, 1.0730, 1.1285, 1.0258, 1.1576, 1.1315, 1.0176, 1.0823, + 1.1763, 1.2783, 1.0968, 1.2560, 1.1037, 1.1538, 1.1152, 1.0765, 1.0244, + 1.0208, 1.0763, 1.1061, 0.9937, 1.1177, 1.1626, 1.0887, 1.0125, 1.1349, + 1.1448, 1.1865, 1.0696, 1.1315, 1.2748, 1.0957, 1.2077, 1.0925, 1.0961, + 0.4362, 0.8269, 1.1157, 1.1701, 1.0544, 1.1217, 1.1581, 0.9604, 1.1525, + 1.0654, 1.0816, 1.0749, 0.9441, 1.0297, 1.1223, 1.1353, 1.1195, 1.0887, + 1.1435, 1.1358, 1.1909, 1.1669, 1.1307, 1.0546, 1.1769, 1.1563, 1.1995, + 1.0511, 1.0491, 1.0806, 0.9967, 1.1176, 1.0901, 1.0295, 1.1788, 1.1354, + 1.0126, 1.0746, 1.0824, 1.0353, 1.2085, 1.1083, 1.0764, 1.0563, 1.0498, + 1.1317, 1.1567, 1.1507, 1.0341, 1.1843, 1.0600, 1.0345, 1.1261, 1.1050, + 1.0773, 1.0134, 1.1439, 1.3424, 1.1876, 1.0634, 1.1512, 1.0344, 1.0040, + 1.1639, 1.0634, 1.1018, 1.2177, 1.0125, 1.1937, 0.8632, 1.2447, 1.1296, + 1.0644, 1.1010, 1.1356, 1.1632, 1.0518, 1.1655, 1.0770, 1.1398, 1.1057, + 1.2451, 1.1343, 1.1630, 1.1435, 1.0383, 1.1496, 1.1279, 1.2263, 1.0600, + 0.9016, 1.7528, 1.1242, 1.1495, 1.1949, 1.1088, 1.0201, 1.1023, 1.0593, + 1.1124, 1.0015, 0.9765, 1.1217, 1.0880, 1.0674, 1.0305, 1.1003, 1.0986, + 1.1196, 1.0115, 0.9515, 1.0412, 1.0447, 1.1111, 1.2276, 1.0558, 1.0263, + 1.1577, 1.1810, 1.1464, 1.1950, 1.1154, 1.1469, 1.0784, 1.1425, 1.1325, + 0.9748, 1.1008, 1.2164, 1.1759, 1.1185, 1.0715, 1.0538, 1.1341, 1.1335, + 1.1417, 1.0734, 1.1603, 1.1504, 1.0499, 1.0453, 1.1896, 1.0659, 1.1221, + 1.0744, 1.0764, 1.1328, 1.0917, 0.9529, 1.1041, 1.1418, 1.0918, 1.0488, + 1.0744, 1.1292, 1.1925, 1.0147, 1.1584, 1.1822, 1.0020, 1.1705, 1.0952, + 1.1119, 1.1396, 1.0830, 1.0932, 1.0332, 1.1715, 1.1709, 1.2496, 1.0912, + 1.1014, 1.0597, 0.9969, 1.1519, 1.0397, 1.1136, 1.1051, 1.2365, 1.0718, + 0.9613, 1.0971, 0.9209, 1.2023, 1.0763, 1.1217, 1.0613, 1.1238, 1.1573, + 1.1401, 1.2079, 1.0841, 1.1320, 1.0662, 1.0452, 1.0370, 1.1594, 1.1060, + 1.2394, 1.0779, 1.1139, 1.0556, 1.0827, 1.0480, 1.1069, 0.9860, 1.2285, + 1.1220, 1.2044, 1.0099, 1.1480, 1.2758, 1.1016, 1.0456, 1.0680, 1.0806, + 1.1920, 0.9602, 1.1260, 1.0967, 1.0894, 1.0856, 1.0547, 1.1553, 1.1625, + 1.1529, 1.0368, 1.1202, 1.2378, 1.0886, 1.2828, 1.1100, 1.0849, 1.1190, + 1.0355, 1.1792, 1.1415, 1.1741, 1.1189, 1.1158, 1.0776, 1.2510, 1.2408, + 1.0956, 0.9678, 1.2549, 1.1548, 1.1318, 1.1330, 1.0840, 1.0474, 1.0956, + 1.1239, 1.0163, 1.1235, 1.1863, 1.1862, 1.1465, 1.0752, 1.0207, 1.1893, + 1.2338, 1.1984, 1.0556, 1.0000, 1.1678, 1.0845, 1.6301, 1.1264, 1.0290, + 1.0311, 1.0892, 1.0607, 1.0999, 1.0946, 1.2030, 1.0882, 1.1124, 1.1719, + 1.1915, 1.0641, 1.0739, 1.0830, 1.1107, 1.1630, 1.1085, 1.1904, 1.0162, + 1.1530, 1.0941, 1.2059, 1.0574, 1.1017, 1.0775, 1.2459, 1.0807, 1.0607, + 1.2741, 1.1496, 1.2021, 1.0923, 1.0298, 1.1785, 1.0046, 1.0668, 1.1532, + 1.1207, 1.1642, 1.0606, 1.1673, 1.1646, 1.0249, 0.9916, 1.0798, 0.9993, + 0.9868, 1.0063, 1.1514, 1.3287, 1.0684, 1.0735, 1.1959, 1.1412, 1.2029, + 1.2507, 1.0586, 1.0936, 1.1616, 1.2078, 1.1429, 1.1928, 1.0335, 1.1191, + 1.1077, 1.0536, 1.1241, 1.2090, 1.1281, 1.1716, 1.1917, 1.0936, 1.2262, + 1.2455, 1.1533, 1.1363, 1.1134, 1.2211, 1.0558, 1.0892, 0.9781, 1.0907, + 1.0923, 1.1208, 1.1447, 1.0259, 1.0367, 1.2340, 0.3527, 1.1055, 1.0547, + 1.1139, 1.0485, 1.1752, 1.1343, 1.1493, 1.1050, 1.0833, 1.0571, 1.1405, + 1.1860, 1.1253, 1.0112, 1.0885, 1.1551, 1.0193, 1.0367, 1.0937, 1.2202, + 1.1137, 1.0781, 1.0423, 1.0075, 1.1260, 1.1049, 1.1311, 1.0788, 1.1301, + 1.1468, 1.0984, 1.0819, 1.0745, 1.1147, 1.0569, 1.1257, 1.1980, 1.2418, + 1.1304, 1.0243, 1.1301, 1.1706, 1.1504, 1.0834, 1.0783, 1.1954, 1.1526, + 1.1944, 1.0511, 1.1842, 1.1555, 1.0770, 1.0528, 1.0551, 1.1226, 1.1294, + 1.0864, 1.1444, 1.0887, 1.0853, 1.2392, 1.0416, 1.0600, 1.1678, 1.1766, + 1.1449, 1.1450, 1.2752, 1.1177, 1.1140, 1.1532, 1.1915, 1.1941, 1.0838, + 1.1238, 0.9961, 1.0554, 1.1288, 1.1269, 1.1024, 1.1493, 1.1472, 1.1067, + 1.1309, 1.0237, 1.1736], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 1.2977e-01, 3.4533e-02, -1.8398e-01, 1.2173e-02, 5.4290e-02, + 7.0416e-02, 6.9125e-02, 6.3581e-02, -6.3971e-02, 2.2436e-02, + 3.0890e-02, -2.6753e-02, -7.2576e-02, -1.1489e-02, -3.9083e-03, + -2.5749e-02, -4.7372e-03, 3.9891e-02, -4.1990e-02, 2.9299e+00, + -6.0782e-02, 1.1301e-03, 2.0187e-02, 3.1748e-03, -4.5030e-02, + -2.1860e-02, -1.3268e-02, 1.2331e-02, 1.7767e-01, -1.1032e-01, + 6.1784e-02, 1.7215e-02, 2.1386e-03, 3.7935e-02, 1.9422e-02, + -5.7784e-02, 1.7874e-02, -5.3821e-02, -1.6726e-01, -1.2773e-04, + 2.2345e-02, 3.6149e-02, -9.7282e-03, -2.8852e-03, -1.4355e-02, + 7.7343e-03, 5.9046e-02, -6.3718e-02, 3.3771e-02, 5.9758e-02, + 5.2067e-02, -2.4261e-02, 6.2351e-02, 2.6544e-02, 6.7204e-02, + 5.9771e-03, -8.1034e-02, -3.6046e-02, 7.0604e-02, -1.8416e-02, + 1.0626e-02, -3.5582e-02, 2.1963e-02, 3.6199e-02, 3.9028e-02, + 1.8032e-02, 4.5988e-02, 1.1960e-01, -4.5342e-02, 8.4323e-02, + -2.4700e-03, 2.0580e-02, 1.5001e-01, -4.6368e-02, -8.9924e-02, + 3.1414e-02, 4.3287e-04, -6.4596e-02, -3.9071e-02, 3.8661e-02, + -9.9305e-03, 3.0970e-03, -1.4900e-02, 5.3587e-02, -1.1936e-02, + 9.2194e-02, 3.4322e-02, 6.0066e-02, -1.2689e-02, -1.6213e-03, + -3.5178e-02, 2.7978e-02, 2.1645e-02, -3.3493e-02, -9.2206e-03, + -8.8815e-03, -1.5690e-03, -5.0180e-02, -8.9474e-03, 4.4740e-02, + 5.0755e-02, 1.8484e-01, 3.5222e-02, -6.9127e-02, 2.0810e-02, + 2.0733e-02, 1.6405e-02, 3.1962e-04, 6.9797e-03, -2.8417e-02, + -5.4037e-02, -3.7544e-02, 6.0598e-02, 3.6609e-02, 6.1370e-02, + -2.5203e-02, -3.8131e-02, -5.4255e-02, 3.5124e-02, 5.2478e-02, + -7.8859e-02, -1.1023e-01, -5.7933e-02, -2.1626e-02, -1.1320e-01, + 1.1809e-02, -7.8021e-02, 6.3488e-03, -1.6868e-02, -5.0843e-02, + -7.9675e-02, 7.3526e-02, 5.4632e-02, 3.0480e-03, -4.9357e-02, + -7.2188e-03, 4.4292e-03, -5.3419e-03, -2.6700e-02, -3.8512e-02, + 2.4960e-02, 1.5461e-02, 2.2325e-03, -7.1685e-02, 2.8958e-02, + -1.4240e-02, 3.5438e-02, -4.7604e-02, -2.5415e-02, 6.7599e-02, + 1.6583e-01, -4.5225e-02, -7.2389e-03, -1.5869e-02, -4.5997e-02, + -1.9443e-02, 1.2420e-02, -8.4762e-02, 3.5484e-03, -1.5639e-02, + 9.2677e-02, -1.1708e-01, -1.6392e-02, -2.0767e-01, 3.2551e-02, + 9.6369e-03, -7.5081e-02, 4.5103e-02, -5.6623e-02, 5.0668e-03, + -1.5549e-02, 2.9004e-02, 8.0657e-02, 1.4856e-02, 2.5327e-02, + -3.2253e-02, 2.4493e-02, 2.5677e-02, -2.6763e-02, -3.0572e-02, + 2.1989e-02, -6.9528e-02, 8.1583e-02, -2.7302e-02, 2.2262e-02, + -1.3821e-01, 2.7482e-02, 1.3539e-01, -2.5331e-02, -9.1998e-02, + 3.9943e-02, 2.4111e-02, 3.1964e-02, 4.0118e-02, -6.6720e-02, + -2.8206e-02, -1.7699e-02, -1.0273e-01, -7.2117e-01, 2.6851e-02, + -1.3055e-01, 2.8005e-02, 2.8573e-04, 4.3966e-02, 4.6126e-02, + 1.1302e-01, 1.7980e-02, 6.0565e-02, 7.2146e-02, 2.3062e-02, + 3.7915e-03, 1.7058e-01, -1.2183e-02, 1.1138e-02, -3.0831e-02, + -1.5247e-02, -2.0833e-02, 5.8323e-02, -5.0730e-02, -9.2447e-02, + 8.0505e-03, 3.1202e-02, -6.0900e-02, -6.7741e-02, 4.0749e-02, + -6.8300e-02, 3.5966e-03, -4.5552e-02, 6.6899e-02, -1.6725e-02, + 5.5230e-03, 1.6007e-02, 4.6567e-02, 1.9058e-02, -4.2146e-02, + 4.0774e-02, -3.5646e-02, 6.9086e-02, 6.9511e-02, 1.3817e-02, + 3.6827e-02, 5.5455e-02, 3.5128e-02, 2.2640e-02, 2.5301e-02, + 3.7564e-02, 6.2913e-02, 1.1184e-01, 4.4412e-02, -4.5695e-02, + 3.3184e-02, 9.0154e-02, 9.1126e-02, -6.9666e-02, 1.1205e-01, + -3.3497e-02, 2.1879e-02, -1.0857e-01, -5.0966e-03, -2.1120e-02, + -1.6438e-02, 1.3116e-02, 1.7569e-02, -4.8693e-02, -3.2089e-02, + 2.1517e-02, -8.8201e-02, 1.0595e-02, -1.0550e-02, -6.0092e-02, + 8.4827e-03, 4.6030e-02, 5.5492e-02, -3.3597e-02, 5.8026e-04, + 6.9805e-02, -7.8620e-02, -5.8343e-03, 4.6980e-02, -5.9122e-02, + 5.9284e-02, 1.1318e-01, -5.5452e-02, 4.8808e-02, 2.4126e-02, + 2.5154e-02, 5.0583e-03, 2.2864e-04, 6.1358e-02, -8.5796e-02, + -9.8663e-03, -1.4949e-02, 4.5410e-02, 2.3115e-02, 2.4214e-02, + -2.1457e-02, 6.9584e-02, 1.6610e-02, 7.6730e-03, 1.4294e-02, + 1.4369e-02, 1.2196e-02, -1.2681e-02, 6.1613e-03, 1.9278e-02, + -1.4352e-02, 4.2145e-02, -1.1340e-01, 6.2743e-02, -6.3384e-02, + 1.6000e-02, -2.0184e-02, 2.0241e-02, -6.9196e-02, 3.1063e-02, + -1.2378e-01, -3.9613e-03, 1.5876e-02, 7.3869e-02, -3.9551e-02, + -1.2366e-02, 1.5304e-01, -5.1567e-02, 2.8741e-03, -4.6380e-01, + -1.3505e-05, 5.2405e-02, -1.0747e-02, 8.6793e-03, -5.3858e-02, + 1.2035e-01, -7.1143e-02, 4.2165e-02, 4.5190e-04, 4.4525e-02, + 7.1364e-02, -1.9798e-02, 1.1052e-02, -5.2190e-02, 7.9460e-03, + -6.8076e-02, -1.7711e-02, -6.1290e-02, -1.2022e-02, 4.6646e-02, + -1.8673e-02, 6.7254e-02, 4.9145e-02, -1.0084e-03, -4.2727e-02, + 1.5223e-02, 1.3736e-02, 2.6416e-02, 4.2571e-02, -9.0382e-03, + -5.3650e-02, -3.2191e-02, -2.2234e-02, 2.5080e-02, -4.0795e-02, + 4.0941e-02, -1.2991e-02, -2.1576e-02, -5.1419e-02, 1.0806e-02, + -2.5446e-02, 5.2107e-02, 4.3128e-02, -3.1123e-02, 6.3914e-03, + 6.4913e-03, -5.8185e-03, 1.7926e-03, -2.6158e-03, -2.5587e-02, + 5.5986e-02, 8.1868e-02, 2.9394e-02, 1.6433e-02, -2.4356e-02, + -4.0253e-02, 7.8868e-02, 1.7375e-02, 1.4129e-02, -4.3368e-02, + -4.3876e-02, -1.3003e-02, -4.8500e-02, -3.6869e-02, -4.2772e-02, + -7.1692e-03, -1.4630e-01, -3.6647e-02, -7.6028e-02, 9.2405e-02, + 5.4635e-03, 3.8962e-02, 3.1438e-02, 2.9575e-03, 9.4233e-03, + 7.2004e-02, -3.4432e-02, 4.5634e-02, -8.9947e-03, -7.8658e-02, + -9.7364e-02, -7.2371e-02, -3.9383e-02, -3.5334e-02, -1.1011e-02, + -1.1150e-02, 8.6494e-03, 5.0095e-02, -2.7079e-02, -7.5610e-02, + -2.1079e-01, 2.8796e-02, 1.3110e-02, -3.3392e-02, -4.5108e-02, + 4.0402e-04, -2.8353e-02, 3.7843e-02, 1.1853e-02, 2.7821e-02, + 4.5922e-02, -1.2891e-01, -7.2979e-02, 2.3094e-02, 1.4984e-02, + 1.5402e-02, -7.1183e-02, -4.1936e-03, 3.6530e-02, -4.5188e-02, + 2.7603e-02, 2.5460e-02, 5.8202e-02, 8.3031e-02, -4.8382e-02, + 3.7847e-02, -6.2521e-02, -2.2624e-04, -7.8755e-02, 8.5045e-02, + -9.0725e-02, 3.5782e-02, -3.7057e-02, 1.0535e-01, 2.0793e-03, + 1.2219e-02, 8.5230e-03, -7.6473e-02, 1.6504e-02, 3.2562e-02, + -4.8595e-02, 5.7389e-02, 1.1120e-02, -3.3858e-02, -1.0108e-01, + 2.3178e-02, 4.0618e-02, -2.3842e-02, 2.9851e-02, -4.7033e-02, + -5.0536e-02, 4.2567e-02, -7.8787e-02, 3.5106e-02, 1.1993e-03, + -1.4537e-02, -5.1278e-02, -2.4576e-02, 4.4611e-02, -5.3965e-02, + 3.4502e-02, 7.4910e-02, 3.1964e-02, -3.4592e-02, 8.4762e-02, + 6.6939e-02, -7.5200e-02, 4.3292e-02, 1.6985e-01, -6.6695e-02, + 2.4553e-02, -7.5039e-03, 5.2509e-02, 8.2030e-02, 2.4376e-02, + 4.2058e-03, 2.3543e-02, 6.1570e-03, -1.6743e-02, -9.7259e-02, + 2.4244e-02, -2.2547e-02, 2.2843e-02, -4.1545e-02, -1.6013e-02, + 8.3000e-03, -8.5513e-02, 4.4356e-02, 5.1328e-03, -1.9995e-02, + 7.8072e-02, 2.3570e-02, 2.9447e-02, 7.6935e-02, 9.6576e-03, + -6.4136e-02, 3.2867e-02, -4.5160e-02, 3.3648e-02, -1.7392e-02, + 5.8089e-03, 2.4739e-02, 1.0378e-01, 9.3410e-03, -5.3627e-02, + -3.4343e-02, -2.6351e-02, -2.9343e-02, -2.2353e-02, -4.7763e-02, + -1.1366e-02, 5.2010e-04, -4.6743e-03, 1.6199e-02, -7.8063e-02, + 1.9897e-04, 2.7052e-02, 4.3959e-02, -2.7493e-02, 1.4008e-02, + -4.9083e-02, -6.6418e-02, -1.7566e-02, -6.1558e-02, -3.6589e-03, + -3.0492e-02, 2.8641e-02, -5.1314e-02, 3.1308e-02, -4.2798e-02, + 4.9607e-02, 1.6081e-02, 8.1581e-03, 6.2736e-02, 4.4093e-02, + 3.3409e-02, -6.3776e-03, -1.1995e-02, 5.1635e-02, 2.1120e-02, + -1.7519e-02, 7.2731e-02, -1.4197e-02, 3.4120e-02, 8.4901e-02, + -7.1668e-02, 4.6460e-02, 2.7229e-02, 1.6466e-02, 1.3243e-01, + 6.2105e-02, 2.1720e-02, 3.3344e-03, -1.4666e-02, 3.0143e-02, + 4.9758e-02, 3.8548e-02, 1.2586e-02, -1.0586e-02, 4.7062e-02, + 1.7897e-02, 1.9226e-02, 2.5062e-02, 4.5023e-02, -3.5047e-02, + -1.0945e-01, 1.2781e-02, 3.8052e-02, 8.2197e-02, 2.9414e-02, + 1.9449e-02, 3.5261e-02, -6.2046e-02, -2.6565e-02, -2.5523e-02, + -6.0374e-03, 7.5064e-02, 5.9772e-02, 7.9446e-03, 5.8369e-02, + -2.9503e-03, 1.9437e-02, -1.6286e-02, -1.0209e-02, 4.0471e-02, + -5.8021e-02, -1.2354e-01, 2.2840e-02, -2.4538e-02, -3.2923e-02, + 1.1068e-03, -4.9417e-02, -2.2733e-01, 2.4994e-02, -3.4969e-02, + 6.0766e-02, -5.1679e-03, -2.4937e-02, -6.0451e-03, -3.3841e-02, + -5.4637e-02, 4.4074e-02, 8.5792e-02, 4.7601e-02, -1.5414e-01, + 2.0098e-02, 1.3737e-04, 3.5567e-02, 2.6228e-02, 1.7048e-02, + 2.6366e-03, 1.0959e-02, 1.1537e-03, 3.4626e-02, -2.3026e-02, + 6.3907e-02, 4.3722e-02, 7.6725e-02, 9.1198e-02, 2.5575e-02, + 4.8980e-02, -6.5434e-02, -1.2302e-02, 1.1802e-02, -7.0467e-02, + -4.3942e-02, -6.8340e-02, 6.6216e-02, -3.8507e-02, 2.3285e-02, + 4.4583e-02, 4.8039e-02, 6.5735e-02, -1.2380e-02, 2.7694e-02, + 6.3951e-02, -2.0262e-02, 6.0663e-03, 4.8698e-02, 2.6361e-03, + 1.9225e-02, -8.2180e-02, -4.2692e-02, 1.9436e-02, 6.2999e-02, + -9.6810e-02, -2.8198e-02, -3.4410e-02, -3.2170e-03, 5.5013e-02, + 8.0327e-03, -6.7895e-02, -6.1232e-02, 4.2554e-02, -2.2628e-02, + -1.9596e-01, 2.4340e-02, -3.6419e-03, -1.3354e-02, -4.5575e-02, + 6.4358e-02, -6.6430e-02, 1.7512e-02, 1.6014e-03, 4.6003e-02, + -3.3494e-02, -3.2714e+00, -1.9003e-02, 1.7152e-02, -2.3923e-02, + -8.8490e-02, -1.3093e-02, -1.1635e-02, -4.9877e-03, -2.2200e-02, + -5.4845e-02, -2.5154e-02, -2.6730e-02, 4.0258e-03, -6.4070e-02, + 5.7675e-03, -5.2886e-02, -1.0505e-01, 2.4540e-02, -7.0383e-02, + 6.6775e-02, 7.7122e-02, 7.9000e-03, -1.3582e-02, -1.0646e-01, + -1.6877e-03, 7.0112e-02, -8.1721e-03, 3.1691e-02, 1.2633e-02, + -1.3226e-02, 1.1874e-01, 6.4461e-02, 2.3030e-02, -1.8555e-03, + -7.8534e-03, -9.0699e-03, 4.2436e-02, -4.7123e-02, 4.4143e-03, + 4.1099e-02, -1.3332e-02, 2.2844e-02, -8.3630e-02, 3.2066e-02, + 5.8189e-02, -1.0272e-01, 9.2019e-02, 1.7425e-03, 6.0141e-02, + 4.8672e-02, 3.7271e-02, 1.6245e-02, 5.3098e-02, 2.1186e-03, + 2.7736e-02, -4.9052e-02, -4.4594e-02, -2.6920e-02, -3.5221e-02, + 8.8737e-02, -8.3154e-02, -7.8560e-02, 4.0014e-02, -3.7013e-03, + 5.5303e-02, -3.8455e-02, 1.6727e-02, 4.0027e-02, -7.6386e-02, + -2.0709e-02, -1.9373e-02, -6.6888e-02, -2.6983e-02, 5.3923e-02, + 2.7105e-02, -4.4220e-02, 8.9578e-02, -4.0661e-02, -6.4093e-02, + -1.7248e-02, 5.8892e-02, 5.3766e-02, -9.9480e-02, -6.1721e-02, + -4.2307e-02, 1.8172e-02, 2.4058e-03], device='cuda:1', + requires_grad=True) +torch.Size([3072, 768]) +Parameter containing: +tensor([[ 0.0344, 0.0046, 0.0019, ..., -0.0018, 0.0054, -0.0178], + [-0.0100, 0.0007, 0.0120, ..., -0.0170, 0.0012, -0.0188], + [-0.0169, 0.0147, -0.0102, ..., 0.0031, -0.0298, 0.0021], + ..., + [ 0.0024, 0.0114, 0.0381, ..., 0.0197, -0.0068, 0.0028], + [-0.0170, -0.0138, 0.0048, ..., 0.0125, -0.0223, 0.0095], + [-0.0003, -0.0298, -0.0086, ..., -0.0083, 0.0122, -0.0196]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.2842, -0.3364, 0.0483, ..., -0.4465, -0.3184, -0.2751], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 3072]) +Parameter containing: +tensor([[ 0.0024, 0.0274, 0.0246, ..., 0.0208, 0.0061, 0.0094], + [-0.0033, 0.0003, -0.0214, ..., 0.0064, 0.0232, 0.0025], + [-0.0203, 0.0050, -0.0124, ..., 0.0002, -0.0194, -0.0300], + ..., + [-0.0107, 0.0104, 0.0014, ..., 0.0129, -0.0087, 0.0057], + [-0.0105, -0.0092, 0.0100, ..., 0.0361, -0.0151, -0.0012], + [ 0.0143, 0.0048, 0.0093, ..., 0.0324, -0.0147, -0.0111]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 2.6382e-02, 3.8788e-02, -3.9490e-02, 1.4725e-02, 9.3918e-03, + 1.0445e-02, -6.1737e-02, 3.3844e-02, 5.9265e-02, -1.2260e-02, + 4.4891e-02, -4.0771e-02, -6.4583e-03, -2.7496e-02, 3.0258e-02, + -5.0850e-03, 7.0000e-03, -2.2263e-02, -2.8976e-02, -1.4990e-01, + 1.4435e-02, -6.6566e-03, 8.3923e-02, 1.2138e-02, 3.6194e-02, + 9.5062e-03, 1.7868e-02, -9.9373e-04, 1.6174e-01, -1.4526e-02, + 3.1921e-02, 3.1769e-02, 1.8433e-02, 2.9099e-02, 2.5482e-02, + -5.1941e-02, 6.6162e-02, -1.1358e-03, -9.9945e-03, -2.1439e-02, + -3.4546e-02, -2.1957e-02, -5.6396e-02, 7.5836e-03, -2.0172e-02, + 4.1412e-02, 3.6560e-02, 2.5604e-02, 2.8915e-02, -2.9007e-02, + 5.4413e-02, -5.9128e-03, 2.4986e-03, 3.2684e-02, 4.9347e-02, + -2.2034e-02, -3.1586e-02, -8.4534e-03, 2.2095e-02, 1.0857e-02, + -2.1271e-02, -3.2949e-04, 4.3144e-03, -2.1332e-02, 1.5564e-02, + -2.1744e-02, 1.0406e-02, 6.1798e-02, 1.3367e-02, -2.4567e-02, + -2.5330e-03, 1.4069e-02, 4.8737e-02, -9.3994e-03, -6.3843e-02, + 1.5808e-02, 1.1917e-02, 2.9114e-02, -3.3813e-02, -6.1531e-03, + 3.9368e-03, -9.1614e-02, -2.0386e-02, -4.3488e-02, 8.3389e-03, + 3.0151e-02, 1.6449e-02, -4.2114e-03, -3.4271e-02, -2.9683e-04, + -1.4572e-02, -1.3908e-02, -6.4812e-03, -1.8646e-02, 7.5035e-03, + 3.2166e-02, 1.9150e-02, -1.2856e-02, 3.4241e-02, -7.5806e-02, + -2.2964e-02, 1.0046e-01, -3.9246e-02, -3.3386e-02, -2.4776e-03, + -5.4626e-03, 3.8849e-02, 6.4453e-02, 1.2329e-02, -9.5415e-04, + -4.9858e-03, -3.2730e-03, 5.3215e-03, 1.9150e-02, 6.1554e-02, + -3.2684e-02, -5.8594e-02, -3.1555e-02, 2.5665e-02, -4.1260e-02, + 4.7668e-02, 2.7023e-02, 1.1060e-01, 6.0425e-02, 1.0864e-02, + 1.5717e-02, 9.3155e-03, -3.3112e-02, 2.0508e-02, -7.3471e-03, + 3.4924e-03, 3.2562e-02, 5.3375e-02, 2.5955e-02, 3.5339e-02, + 4.8676e-02, 3.1464e-02, 2.3834e-02, -3.1647e-02, 7.5607e-03, + -2.0996e-02, -1.4824e-02, -1.5793e-02, -4.7180e-02, -1.9836e-03, + 9.1410e-04, 2.6779e-02, -1.4099e-02, -2.5208e-02, -1.8509e-02, + -4.6844e-03, -5.9700e-03, 1.4229e-02, -8.0872e-03, -2.7222e-02, + 2.1942e-02, 1.4473e-02, 2.4582e-02, 1.5747e-02, 1.8356e-02, + -3.4668e-02, 9.4376e-03, -1.3405e-02, -7.5912e-03, -2.1240e-02, + -1.2489e-02, -3.8452e-02, -4.0710e-02, -4.2297e-02, -2.2614e-02, + -7.8613e-02, 1.4809e-02, 3.0228e-02, -5.7037e-02, 5.4565e-02, + 3.1738e-02, 2.1687e-03, 5.0659e-02, 3.1261e-03, 5.6114e-03, + 4.1138e-02, 4.2633e-02, -3.4027e-02, -4.6692e-02, 1.8311e-02, + -1.1124e-02, 2.0264e-02, -1.9775e-02, 2.8580e-02, 5.5170e-04, + 6.7568e-04, 4.3907e-03, 3.4088e-02, -4.0771e-02, 3.2318e-02, + -3.0090e-02, -4.9683e-02, -9.3918e-03, -9.6130e-02, -3.5370e-02, + -2.3651e-02, 8.7585e-03, -1.2131e-02, 8.1711e-03, -5.9113e-02, + 2.9007e-02, -1.7853e-03, -1.6495e-02, -1.4244e-02, -4.5654e-02, + 5.2551e-02, -5.3528e-02, 2.9312e-02, -2.1629e-03, -8.3557e-02, + -4.4922e-02, 4.2023e-02, -1.6342e-02, 1.0269e-02, -9.5139e-03, + 1.3206e-02, 3.4515e-02, -3.6621e-02, -1.7471e-02, 3.8605e-02, + 1.9806e-02, -3.6652e-02, -4.8248e-02, -3.5614e-02, -1.7563e-02, + 4.3823e-02, 2.7390e-02, -1.6220e-02, -3.8483e-02, -7.6355e-02, + 2.3300e-02, 1.8158e-02, -1.2329e-02, 1.6434e-02, -1.2016e-02, + -6.6566e-03, -6.5735e-02, 6.1569e-03, 1.7715e-02, 1.4160e-02, + -2.5757e-02, 7.2365e-03, 1.5454e-01, -5.0735e-03, -5.3375e-02, + 4.7577e-02, 8.2855e-03, -4.5197e-02, 1.9684e-02, 1.8723e-02, + 3.1006e-02, 2.0599e-02, 4.3243e-02, -4.3823e-02, -4.9011e-02, + -1.9409e-02, 1.7181e-02, 3.3356e-02, -6.6711e-02, -5.2277e-02, + 4.2908e-02, -4.3373e-03, -1.8585e-02, -1.3092e-02, 9.3384e-03, + -3.2135e-02, -3.2898e-02, 6.1615e-02, -2.2934e-02, -3.6865e-02, + -2.4246e-02, 6.7200e-02, -3.8330e-02, -2.7725e-02, -2.3865e-02, + 2.1072e-02, 4.9530e-02, 6.6986e-03, -1.8402e-02, 1.9516e-02, + -1.5631e-03, 4.5868e-02, -2.7618e-02, -3.1555e-02, -4.6005e-03, + -1.1055e-02, 2.8976e-02, -1.4557e-02, -1.5350e-02, -1.6739e-02, + 2.7725e-02, -4.0466e-02, 2.0466e-03, -5.3833e-02, 2.7725e-02, + -4.4983e-02, -6.2675e-03, -8.6594e-03, 1.0460e-02, 6.8542e-02, + -5.1361e-02, 3.8727e-02, -1.6266e-02, -6.6757e-03, -6.0089e-02, + -3.4119e-02, 4.9377e-02, -4.2633e-02, 2.1225e-02, -4.9255e-02, + -1.8417e-02, -2.4338e-02, -2.1500e-02, 1.9897e-02, -2.2430e-02, + 1.3367e-01, 1.7975e-02, -3.0258e-02, 1.3741e-02, 1.2725e+00, + 3.6407e-02, -6.1523e-02, -2.0874e-02, 1.5045e-02, 9.4652e-04, + 4.9347e-02, 2.6367e-02, 3.5522e-02, -6.5613e-03, 4.9011e-02, + 3.9398e-02, 3.3051e-02, -1.3000e-02, 1.8005e-02, 7.5134e-02, + -5.1331e-02, 3.3447e-02, -3.4363e-02, -8.3084e-03, -5.2399e-02, + -1.8814e-02, 1.7899e-02, -7.4585e-02, 3.5217e-02, -4.7340e-03, + 3.0380e-02, -6.2042e-02, -2.7985e-02, -1.7410e-02, -2.3575e-02, + -3.9558e-03, 4.7760e-03, 2.8534e-02, -2.9129e-02, -3.7231e-02, + 1.6571e-02, 4.0680e-02, 3.4760e-02, -8.8501e-03, -8.6129e-05, + 3.5896e-03, 5.3406e-03, -4.6844e-03, -1.2199e-02, -6.5857e-02, + 4.1695e-03, -3.2959e-02, 1.4992e-02, -2.4643e-02, -6.1279e-02, + 6.1859e-02, 2.4109e-02, -4.8798e-02, 5.0812e-02, 3.2471e-02, + -2.4582e-02, 5.6427e-02, -2.0203e-02, 3.5309e-02, -3.2562e-02, + -1.9012e-02, -1.5717e-02, -3.3875e-02, 1.2756e-02, 6.0081e-03, + 1.4915e-02, -1.3599e-01, -3.0975e-02, -2.1469e-02, 9.8953e-03, + -2.6417e-03, -4.9162e-04, -3.3325e-02, 1.4587e-02, 1.0902e-02, + 6.6833e-02, -3.6591e-02, -3.1342e-02, 2.7847e-02, -2.5330e-02, + 9.3460e-03, 2.7695e-02, -2.6413e-02, -3.3264e-02, 1.6281e-02, + 7.5722e-03, 2.1164e-02, -4.0619e-02, 1.8555e-02, 2.2858e-02, + -9.9548e-02, -1.0910e-02, -1.8167e-03, 1.4282e-02, -3.1647e-02, + -3.6804e-02, -1.3138e-02, 2.3308e-03, 2.8671e-02, 3.0426e-02, + -5.7922e-02, 2.6340e-03, 1.5732e-02, 2.9373e-02, -2.4246e-02, + -1.2260e-02, 3.4607e-02, 1.7868e-02, 3.2825e-03, -3.2928e-02, + 2.4643e-02, 5.6610e-02, 6.8779e-03, -6.8909e-02, 8.5571e-02, + 8.1718e-05, 2.9480e-02, 4.8370e-02, -1.5967e-01, -1.3496e-02, + 5.0781e-02, 1.3247e-03, -1.0094e-02, 1.5640e-02, -6.2332e-03, + 1.5610e-02, 9.1858e-03, -1.7349e-02, 1.2405e-02, 3.3447e-02, + 1.5305e-02, 2.1362e-02, -3.0045e-02, 7.9575e-03, -5.5878e-02, + -2.7985e-02, -1.7838e-02, -2.4002e-02, -8.2779e-03, -5.5504e-03, + 4.4189e-02, 1.4275e-02, 4.2419e-02, -1.3809e-02, 2.4109e-02, + 4.2381e-03, -1.3628e-03, 2.9755e-02, -1.9045e-03, 5.3711e-02, + 8.7509e-03, -1.8759e-03, 6.0394e-02, -6.9702e-02, 1.0878e-04, + -3.5405e-04, -2.3518e-03, -2.9907e-03, 2.0813e-02, 6.1768e-02, + -5.4871e-02, 7.3120e-02, 6.2317e-02, -1.2886e-02, -7.6675e-03, + 6.4636e-02, 4.2725e-02, 2.7344e-02, 2.7428e-03, 2.3163e-02, + -5.4932e-02, 2.2068e-03, 8.9798e-03, -1.8127e-02, -1.5007e-02, + -2.4323e-02, -3.9337e-02, -2.9999e-02, -2.2545e-03, -3.8853e-03, + 5.4962e-02, -2.0828e-02, -2.2125e-02, 1.5736e-03, 3.0167e-02, + -2.5673e-03, -7.9163e-02, 4.0924e-02, 3.6883e-04, 3.6804e-02, + -1.3237e-02, 2.9697e-03, 1.9150e-02, -4.1016e-02, -4.3701e-02, + 2.6581e-02, -2.0020e-02, -1.6772e-01, -4.7394e-02, 2.4277e-02, + 6.9008e-03, -1.0155e-02, 1.4351e-02, 3.6438e-02, -4.9377e-02, + -4.8409e-03, 3.4149e-02, 2.7313e-02, -2.9572e-02, -3.3112e-02, + -5.9845e-02, 2.7313e-02, -3.5461e-02, 2.0905e-02, 5.1270e-02, + -3.5248e-02, -5.9723e-02, 3.2158e-03, 3.7109e-02, -1.0297e-01, + 6.2439e-02, -1.8906e-02, 1.1749e-02, 5.1544e-02, -2.0386e-02, + 5.8563e-02, 4.2877e-02, -3.7689e-02, 4.1199e-03, -5.0964e-02, + 2.1423e-02, -5.1697e-02, -9.2468e-02, 5.4016e-02, 3.0823e-02, + -2.9541e-02, 2.8229e-02, 4.6661e-02, -2.6871e-02, 1.9348e-02, + -1.6891e-02, 2.2781e-02, -5.3444e-03, -2.2461e-02, 2.5009e-02, + -3.4332e-02, -9.6863e-02, -3.2379e-02, 1.2527e-02, -4.8737e-02, + -4.1260e-02, -1.0605e-02, -2.6459e-02, 1.7792e-02, 1.6296e-02, + -3.8452e-03, 4.3274e-02, -5.2582e-02, -2.5513e-02, 2.2491e-02, + -1.9272e-02, -4.9652e-02, -3.3142e-02, -2.8183e-02, -7.0992e-03, + -4.8752e-03, -1.1902e-02, -3.9062e-03, 2.5681e-02, -1.3130e-02, + -2.2751e-02, 7.4402e-02, 7.3662e-03, -4.3488e-03, 2.6505e-02, + -5.1178e-02, -2.8885e-02, -2.3651e-02, 3.8483e-02, 6.0501e-03, + -2.3956e-02, -6.5727e-03, -1.4000e-03, 2.5726e-02, -2.3315e-02, + -1.4435e-02, 8.8348e-03, -2.9083e-02, -5.4871e-02, 1.7960e-02, + 5.2765e-02, 1.8509e-02, 1.7014e-02, 6.7566e-02, -1.2280e-01, + 1.5450e-02, 1.2482e-02, 4.9622e-02, -6.4392e-02, -4.1229e-02, + -2.8473e-02, 1.1497e-02, -1.0080e-03, 4.4098e-02, -7.1716e-03, + -9.2468e-03, -1.5945e-02, 5.2429e-02, -2.1103e-02, -2.1332e-02, + 1.1314e-02, -2.3132e-02, 1.9638e-02, 9.2468e-03, 2.2602e-03, + -2.0538e-02, -6.8169e-03, -5.4047e-02, -5.6366e-02, 6.0959e-03, + 3.7567e-02, 6.7444e-03, 7.8735e-03, 7.1899e-02, 1.2878e-02, + -5.0140e-02, -1.4999e-02, 8.0566e-03, -2.7252e-02, -9.1400e-03, + 2.3514e-02, -2.5391e-02, 2.5883e-03, -9.8114e-03, 1.1627e-02, + 1.1917e-02, -2.7130e-02, -5.7892e-02, 1.5907e-03, 8.6487e-02, + 9.9564e-03, -2.2736e-02, -1.4168e-02, 1.7029e-02, 4.1260e-02, + -6.8054e-03, 1.3733e-03, 1.6312e-02, 1.1803e-02, -2.8137e-02, + 6.4453e-02, 3.0670e-02, -1.2177e-02, -3.4973e-02, -4.0779e-03, + -2.6901e-02, -7.7477e-03, 5.8411e-02, 1.8631e-02, 6.4758e-02, + 2.3270e-02, 8.6060e-02, -3.5370e-02, -2.8580e-02, -2.3117e-02, + -1.4046e-02, -1.1757e-02, 7.2876e-02, -2.0523e-02, -4.0344e-02, + -6.5186e-02, 4.2206e-02, 8.0795e-03, -1.8448e-02, -2.6596e-02, + -2.4967e-03, 2.4490e-03, 2.6062e-02, -1.8570e-02, -2.8748e-02, + 1.5305e-02, 1.9318e-02, 2.8839e-02, 3.0956e-03, -1.1917e-02, + -6.3972e-03, 5.5084e-02, 8.3542e-03, -4.0070e-02, 2.5085e-02, + -3.1372e-02, 1.6403e-02, -2.3087e-02, -3.0289e-03, -4.4281e-02, + 5.6946e-02, 3.4912e-02, -2.2324e-02, -3.3627e-03, 9.8705e-04, + 2.9251e-02, 1.0345e-02, -1.3954e-02, -2.2217e-02, -1.3290e-02, + 3.2288e-02, -2.3560e-02, -2.2995e-02, 5.7709e-02, -1.8967e-02, + -1.1040e-02, -3.9490e-02, 1.2451e-02, -1.3283e-02, 2.0813e-02, + -1.8377e-03, -1.5808e-02, 2.8564e-02, -3.5839e-03, -2.7176e-02, + -2.0081e-02, -2.1815e-04, -2.1378e-02, -9.3317e-04, 3.3234e-02, + -3.9917e-02, -3.8116e-02, -3.2074e-02, -8.1406e-03, 1.5671e-02, + -2.3865e-02, -1.2421e-01, 3.7231e-02, -1.7975e-02, 4.6875e-02, + 2.1973e-02, -2.2369e-02, 3.1281e-02, 3.8300e-03, 1.0979e-02, + -2.8793e-02, -1.0155e-02, -1.5823e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([1.8660, 1.7784, 1.8131, 1.9032, 1.8201, 1.8358, 1.9317, 1.9030, 2.0080, + 1.7371, 1.9171, 1.7857, 1.9481, 1.8647, 1.9881, 1.9470, 1.9653, 1.8904, + 1.9133, 2.0480, 1.8218, 1.8158, 1.8040, 1.7625, 1.8326, 1.8251, 1.8112, + 1.8591, 2.3300, 1.8212, 1.8430, 1.8733, 1.8304, 1.9012, 2.0362, 1.8383, + 1.9599, 1.7815, 1.6626, 1.9035, 1.8171, 1.7817, 1.9178, 1.8311, 2.0381, + 1.8704, 1.8629, 1.8840, 1.8549, 1.8668, 1.7745, 1.8497, 1.9727, 1.9021, + 1.9105, 1.8380, 1.9000, 1.8313, 1.9636, 1.8302, 1.8879, 1.9270, 1.8041, + 1.8294, 1.8350, 1.6809, 1.7347, 1.9509, 1.8550, 2.2307, 1.8262, 1.8292, + 2.1996, 1.8380, 1.7901, 1.9650, 1.9438, 1.7403, 2.0721, 1.7517, 1.9311, + 1.8478, 1.9061, 1.8099, 1.8356, 2.1220, 1.8942, 1.8027, 1.8589, 1.8872, + 1.9714, 1.8759, 1.8909, 1.8603, 1.8456, 1.7588, 1.8874, 2.0046, 1.7945, + 1.8890, 1.9014, 3.0165, 1.8195, 1.8984, 1.8578, 1.9651, 1.9261, 1.9477, + 1.7389, 1.9035, 1.9816, 1.8913, 1.8726, 1.7762, 1.8504, 2.0517, 2.0671, + 1.8934, 1.7774, 1.9831, 1.8847, 1.8045, 1.9016, 1.8355, 1.9538, 1.8974, + 1.8131, 1.8118, 1.8685, 1.9706, 1.9202, 1.7229, 1.9807, 1.8030, 1.8698, + 1.8555, 2.0546, 1.9435, 1.8757, 1.8427, 1.7255, 1.7774, 1.9838, 1.8948, + 1.8492, 1.8438, 1.7917, 1.9375, 1.9346, 1.9279, 1.8923, 2.0421, 1.8791, + 1.8829, 1.8148, 1.5478, 1.8369, 1.9054, 1.8246, 1.8708, 1.9425, 1.9012, + 1.8279, 1.5597, 1.7954, 1.8119, 1.8879, 1.8401, 1.9324, 1.8507, 1.7592, + 1.9193, 2.0096, 2.0844, 1.8840, 1.9689, 1.9379, 1.9933, 1.8427, 1.7667, + 2.0733, 1.8381, 1.7551, 1.7741, 1.8745, 2.0625, 1.8530, 0.6309, 1.8452, + 1.8917, 1.9017, 1.8086, 1.9182, 1.9565, 1.9748, 1.8523, 1.7366, 1.9107, + 0.8677, 1.8278, 1.8390, 1.9599, 1.8796, 1.8701, 1.7979, 1.9388, 1.7285, + 1.9202, 1.9327, 1.8762, 1.8762, 1.9806, 2.0151, 1.9425, 1.9414, 1.8690, + 1.9037, 1.9074, 1.8259, 1.8553, 2.0224, 1.9358, 1.9235, 1.9911, 1.9111, + 1.8020, 1.8777, 1.9436, 1.9082, 1.9094, 1.9674, 1.8680, 1.7909, 1.9865, + 1.9500, 1.7919, 1.9436, 1.7509, 1.8738, 1.8205, 1.9345, 2.0570, 1.9744, + 1.8709, 1.9073, 1.9121, 1.9625, 2.4609, 1.8394, 2.0095, 1.8269, 1.9627, + 1.7241, 1.9025, 1.7354, 1.8871, 1.9433, 1.7587, 1.9468, 1.9635, 1.8410, + 1.8963, 1.7291, 1.8991, 1.7483, 1.8443, 1.8082, 1.9849, 2.0076, 1.7678, + 1.9225, 1.8132, 2.2230, 1.6371, 1.8443, 1.9185, 1.8186, 1.8701, 1.9011, + 1.8227, 1.9548, 2.0720, 1.8787, 1.8453, 1.9034, 1.8937, 2.1512, 1.7902, + 1.9223, 1.7259, 1.9361, 1.8650, 1.8079, 1.8986, 1.9573, 1.7364, 1.9630, + 1.8175, 1.8065, 1.8854, 1.9771, 1.7975, 1.8899, 1.8159, 2.0447, 1.7576, + 1.8268, 1.9224, 1.9002, 1.8067, 2.0081, 1.9586, 1.6762, 1.9101, 1.8413, + 1.7823, 1.9550, 1.9823, 1.7837, 1.8970, 2.0231, 1.8636, 1.7673, 1.9067, + 2.3982, 2.0804, 1.8810, 1.8703, 1.8265, 1.7580, 1.8966, 1.8586, 1.8388, + 1.9037, 1.8352, 1.8316, 1.8336, 1.8934, 2.0168, 1.9769, 2.0078, 1.8481, + 1.9293, 1.8833, 1.8839, 1.8644, 1.8012, 1.8637, 1.9363, 1.8971, 1.8371, + 1.9506, 1.8396, 2.2401, 1.8631, 1.9060, 1.7227, 1.9123, 1.8348, 1.8611, + 1.8895, 1.8920, 1.6990, 1.8529, 2.0385, 1.8732, 1.8881, 1.7646, 1.8885, + 2.1197, 1.8467, 1.7632, 1.8907, 2.0283, 1.9200, 1.7656, 1.8061, 1.9139, + 1.9146, 1.7305, 1.8498, 1.6375, 1.7809, 1.9367, 2.0803, 1.9106, 1.9903, + 1.9454, 1.8341, 1.9366, 2.0857, 2.1631, 1.8223, 1.8999, 1.8695, 1.8775, + 1.7864, 1.9755, 1.9165, 1.8118, 1.8268, 1.9509, 1.8592, 1.8168, 1.8380, + 1.8159, 1.8603, 1.9215, 1.8509, 1.7259, 1.8962, 1.8724, 1.9343, 1.8903, + 1.8474, 1.6385, 1.8949, 1.8523, 1.9570, 1.8141, 1.8389, 1.8728, 1.7764, + 1.9630, 1.8911, 1.8530, 1.8837, 1.8320, 2.0132, 1.8928, 1.7913, 1.8610, + 1.8055, 1.8332, 1.8728, 1.8835, 1.9436, 1.8447, 1.8912, 1.9659, 1.8275, + 2.0092, 1.9458, 1.8524, 1.8043, 1.8004, 1.7571, 1.7796, 1.8534, 1.9481, + 1.8991, 1.8947, 1.8967, 1.9415, 1.8822, 1.9627, 1.8128, 1.9307, 1.9071, + 1.8921, 1.8939, 1.7313, 1.8759, 1.8561, 1.7990, 1.7605, 1.7864, 1.8529, + 1.8590, 1.8761, 1.9702, 1.8165, 1.9052, 1.8691, 1.8814, 1.8573, 1.9790, + 1.8433, 1.8886, 1.9109, 1.8462, 2.0010, 1.8940, 1.6905, 1.8911, 1.9256, + 1.9097, 1.9327, 1.8287, 1.8924, 1.9177, 1.7493, 1.7674, 1.8636, 1.7492, + 1.9836, 1.8146, 1.8620, 1.7825, 2.0120, 1.8672, 1.8315, 1.8449, 1.7789, + 1.8832, 2.1938, 1.9455, 1.8791, 1.8934, 1.7710, 1.8357, 1.7524, 1.8833, + 1.9098, 1.9889, 2.0089, 1.7989, 1.9659, 1.9234, 1.8230, 1.8224, 1.8910, + 2.3350, 2.0102, 1.9047, 1.9591, 2.0173, 1.8123, 1.8544, 1.8656, 1.8224, + 1.9651, 1.9880, 1.8107, 1.9073, 1.9978, 1.8600, 2.0117, 1.8631, 2.0721, + 1.9352, 1.9215, 1.9140, 1.8218, 1.7605, 1.9906, 1.9540, 1.9666, 1.9012, + 1.9246, 2.2353, 1.8372, 1.7955, 1.8482, 2.2460, 1.8172, 1.7272, 1.9246, + 1.8234, 1.9038, 1.7496, 1.7468, 1.8788, 1.7610, 1.8758, 1.7716, 1.8399, + 1.8478, 1.8214, 1.9384, 1.7953, 1.8796, 1.9437, 1.8393, 1.8481, 1.9724, + 1.9443, 1.8271, 1.9000, 1.9068, 1.9761, 2.0626, 1.8821, 1.8797, 1.8821, + 1.9393, 2.0845, 1.7724, 2.2779, 1.7483, 1.8708, 2.3283, 1.8016, 1.7931, + 1.7968, 1.9056, 1.9454, 1.8709, 1.8524, 1.8509, 1.8794, 1.8774, 1.9060, + 1.9310, 1.8138, 2.0550, 1.7575, 1.9538, 2.1155, 1.8975, 1.9170, 1.8156, + 1.8810, 1.7680, 1.8986, 1.9357, 1.9116, 1.7712, 1.8008, 1.9611, 1.7758, + 1.8260, 1.8118, 1.8727, 1.8630, 1.8199, 1.9040, 2.1573, 1.9015, 1.8741, + 1.9744, 1.8355, 1.9861, 1.9444, 1.9224, 1.9344, 1.9126, 1.8261, 1.9051, + 1.7932, 1.9132, 1.8560, 1.9965, 1.8377, 1.8254, 1.8571, 1.8778, 1.8591, + 1.8667, 1.8168, 2.0227, 1.8521, 2.0372, 1.9477, 2.0108, 1.8808, 1.8310, + 1.9081, 1.7974, 1.9341, 1.7954, 1.8657, 2.0381, 1.8839, 1.8975, 1.9008, + 1.8652, 1.7003, 1.8053, 2.0298, 1.9783, 1.8093, 1.8550, 1.8205, 1.9117, + 2.0263, 1.9597, 1.8719, 1.9367, 1.8217, 1.9005, 3.9133, 1.9469, 1.7762, + 2.0920, 1.8573, 1.9510, 1.9302, 1.8653, 1.9560, 1.9251, 1.8615, 1.9694, + 1.7484, 1.7944, 1.8781, 1.8119, 1.8498, 1.7536, 1.8787, 1.9861, 1.8066, + 1.9102, 1.9123, 1.8142, 1.7973, 1.9175, 1.9055, 1.8494, 1.8617, 1.8569, + 1.9394, 1.9115, 1.9110, 1.7258, 1.8709, 1.8557, 1.7597, 1.9841, 1.9129, + 2.0609, 1.8807, 1.7975, 1.8461, 2.0206, 1.8714, 1.7895, 1.9192, 1.7968, + 1.8826, 1.7698, 1.9447, 1.8479, 1.8068, 1.9118, 1.8883, 1.9746, 1.8796, + 1.8511, 1.9405, 2.0582, 1.7386, 1.9657, 1.8745, 1.9578, 1.9301, 1.9415, + 1.9653, 1.9379, 1.8646, 1.9669, 1.9559, 1.8806, 2.0812, 1.8663, 1.9106, + 2.0192, 1.9615, 1.8668, 1.8325, 1.8629, 1.8094, 1.9444, 1.9607, 1.8693, + 1.7771, 1.7942, 1.8408], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 1.0001e-01, 3.2639e-01, -2.1039e-01, -5.0999e-01, -4.2389e-02, + -2.6753e-01, 9.4194e-01, -2.7457e-01, -5.2690e-01, -5.5466e-02, + -2.7905e-01, 1.6774e-01, -3.8277e-01, -2.5514e-01, -6.1334e-01, + -5.7646e-01, -2.2728e-01, -6.7665e-02, -5.3467e-01, -2.3960e+00, + -5.1092e-01, 3.8211e-01, -2.0304e-01, -4.5887e-01, -6.1576e-01, + 7.0121e-02, -4.9523e-01, -3.8478e-01, -1.0415e+00, 5.6320e-01, + -2.0277e-01, -7.7615e-02, 3.5587e-01, -5.4729e-01, 2.5944e-01, + -2.1211e-01, -9.3042e-01, -3.8996e-01, 1.8186e-03, 2.1777e-01, + -2.0130e-01, -2.2191e-01, 1.0911e+00, -4.2226e-01, 1.1266e-01, + -4.3535e-01, -7.5518e-01, -1.5334e-01, -3.4593e-01, -1.9087e-02, + -4.6673e-01, -3.2443e-01, -4.1880e-01, -6.7491e-01, -2.2719e-01, + 1.1494e+00, 6.2432e-01, 4.0111e-02, -5.6136e-01, 2.8486e-01, + 4.2190e-01, 2.0334e-01, 1.1443e-01, 9.1238e-02, 3.3551e-01, + -4.9760e-02, 8.9777e-02, -6.3372e-01, 1.2032e-01, -1.1987e+00, + 2.0360e-01, 3.1116e-01, 6.7021e-01, 3.3498e-01, -3.2979e-01, + -5.0852e-01, 2.5790e-01, 4.6133e-03, -1.0485e+00, 7.3088e-02, + -5.5104e-01, -1.3722e-01, -1.4933e-01, 5.5035e-01, 2.8101e-01, + -6.5381e-01, 6.2764e-01, 7.8082e-02, -1.6087e-01, -1.8105e-01, + 7.7904e-01, 3.9496e-01, -1.0841e-01, 5.5589e-02, -5.2023e-01, + -2.7022e-02, -9.2938e-02, 8.5328e-01, -3.3161e-01, 7.2774e-01, + -1.6572e-02, -2.6193e+00, 3.1187e-01, 5.7197e-01, 4.1484e-01, + 8.9408e-01, -8.1853e-01, -3.2315e-01, -7.6765e-02, 2.0524e-01, + 7.7114e-01, 4.7549e-01, -6.9258e-01, -2.9009e-03, 1.7830e-01, + -7.2715e-01, 9.8283e-01, 4.1635e-01, 1.7894e-01, 3.5228e-02, + 3.7125e-01, -3.4730e-01, -6.6480e-01, -2.2598e-01, -2.7736e-01, + -4.1118e-01, -2.1612e-01, 3.9944e-01, -4.8386e-01, 5.5842e-01, + -1.0969e+00, -4.2645e-01, -9.7789e-01, -1.5478e-01, 3.4732e-01, + 1.5894e-01, -5.3756e-01, -2.1893e-01, -7.5276e-02, -5.2727e-01, + 1.4501e-03, -1.1514e-01, 5.3553e-01, 4.6137e-01, -1.4240e-01, + -4.5386e-01, -4.4217e-01, 2.2191e-01, 1.7962e-01, 7.0115e-01, + 6.0106e-01, 7.0242e-01, 4.8614e-03, 1.2692e-01, -4.2740e-01, + -7.7894e-02, -2.7168e-01, -4.2341e-01, -5.9347e-01, 6.1331e-02, + -1.6603e-01, -8.8191e-01, 1.5898e-01, -3.9610e-01, -3.6995e-01, + 1.3130e-01, -7.6066e-01, 4.9251e-01, -5.2796e-01, 2.7328e-01, + 4.2783e-01, -2.3592e-01, -8.5147e-01, 5.8858e-01, -2.6978e-01, + -6.8101e-01, -3.1506e-02, -6.8128e-01, -4.4942e-01, 3.5423e-01, + -8.0526e-01, -5.8796e-01, -4.6747e-02, -8.0376e-02, 1.9648e-01, + 9.7790e-01, 9.9415e-02, -1.5476e+00, 7.2616e-02, -8.8570e-01, + -1.3258e-01, -2.2147e-01, 2.3140e-01, 7.3908e-01, 2.1519e-01, + 5.9967e-01, 3.8346e-01, -2.5702e-02, 6.3070e-01, 2.4397e-01, + -3.7191e-01, -2.8234e-01, -3.5167e-03, -1.6815e-01, 3.0992e-01, + 1.2480e-01, -2.3985e-01, 7.4806e-01, 6.7826e-01, 2.2846e-01, + 2.1610e-01, -2.8469e-01, -6.4048e-01, -3.1248e-01, -6.3940e-01, + 8.4760e-02, -1.8882e-01, -2.1436e-01, -3.5960e-01, 2.6390e-02, + 1.3632e+00, -3.4175e-01, 8.5690e-01, 8.1155e-01, -5.4218e-01, + -2.9913e-01, 1.1988e-01, 3.5309e-01, 4.1926e-01, 5.5749e-01, + -6.3115e-01, -5.2563e-01, 1.7902e-01, 4.0641e-01, 6.2101e-01, + -2.2727e-02, -2.9388e-01, -1.3951e-01, -1.7952e-01, 4.4467e-01, + 6.6099e-01, 6.1575e-01, -6.6078e-01, 5.1975e-01, -3.6814e-01, + -5.6789e-01, 5.1790e-01, -1.5656e+00, 3.2540e-01, 8.2552e-01, + 3.7043e-02, 6.1390e-01, 1.6771e-01, -1.2280e+00, -2.3813e-01, + -1.2629e-01, -3.6776e-01, -1.0564e-01, 6.0078e-01, 7.3554e-01, + -2.2202e-01, -7.2251e-01, -3.9098e-01, 6.6057e-01, 1.7973e-01, + -6.7612e-01, -1.0394e-01, 9.9293e-01, -5.2571e-01, -2.6658e-01, + -4.0639e-01, 2.9404e-01, -8.6945e-01, -6.0640e-01, 2.2846e-01, + -3.4287e-01, -5.3146e-02, 5.7531e-01, -8.1353e-03, 3.5567e-01, + 5.9598e-01, -5.8310e-01, -8.2484e-03, 2.3647e-01, -2.1022e-01, + 5.7921e-01, 9.6365e-01, 1.0753e-02, 5.1279e-01, -2.9078e-01, + -4.8622e-01, -3.3165e-01, 4.2497e-01, -3.1292e-01, 3.9897e-01, + 1.2795e-01, 3.1480e-01, 1.4672e-01, 1.8182e-01, -1.5524e-01, + 6.5747e-01, 3.4742e-01, -3.8288e-01, -5.8985e-01, -5.5828e-01, + 2.3804e-01, -4.3328e-01, 2.1676e-01, -2.7809e-01, 5.9206e-01, + 5.3341e-01, -1.0016e+00, 4.7881e-01, -7.3726e-01, 2.7895e-02, + -4.1925e-01, -3.7070e-01, -4.0917e-01, 2.7832e-01, 5.5444e-02, + -1.5275e+00, -3.5360e-01, 1.6569e-01, -3.6858e-01, 2.1436e+00, + -1.5858e+00, 2.6521e-01, -4.9049e-01, -1.3124e-01, -2.1085e-01, + -2.3881e-01, -7.6808e-01, -1.3304e-01, -3.7610e-01, -3.0138e-01, + -4.9982e-01, -8.1163e-01, 1.7125e-02, 5.6042e-01, -3.6971e-01, + 3.8646e-01, 1.5706e-01, 2.9716e-01, 8.9005e-01, 3.0204e-02, + 2.1164e-01, 1.1384e-01, 1.8581e-01, -4.4801e-02, 3.3047e-01, + -8.8904e-02, 2.5982e-01, 3.3461e-01, 9.7410e-01, -1.0836e-01, + -2.2746e-02, -2.8105e-01, 5.6986e-01, 4.0678e-01, -3.3759e-02, + -6.6359e-01, 1.6146e-01, 8.9127e-02, -4.1533e-01, 4.9490e-01, + -8.3370e-02, -2.3326e-01, 3.0722e-01, 2.6059e-01, 7.1568e-01, + 1.9166e-01, 8.7676e-02, -5.6655e-01, -5.7934e-01, 5.5463e-01, + -7.6014e-01, -1.5674e-01, 5.9477e-01, -4.9040e-01, -3.2710e-02, + 8.4659e-02, 3.3333e-02, 2.0411e-01, -5.1136e-01, 5.6339e-01, + 3.8002e-01, -6.0355e-01, 5.3952e-01, -7.2400e-01, -8.2425e-01, + 1.3842e-01, 1.2815e+00, 4.2212e-01, 7.7251e-01, 5.8582e-01, + -2.2402e-01, 1.3369e-01, 1.1079e+00, 2.1883e-01, -2.8498e-01, + 3.8395e-01, -1.3351e-02, 2.1286e-01, -2.2877e-01, 4.7099e-01, + -1.4595e-01, 3.4667e-01, 8.2163e-01, -6.9097e-02, -3.3457e-01, + -6.7919e-02, -4.7688e-01, 6.6409e-01, 3.8889e-01, 8.7833e-01, + -5.7702e-01, 5.9352e-03, -3.6324e-01, 6.8234e-02, -4.7906e-02, + 2.0999e-01, 9.8040e-02, -6.9085e-01, -3.6914e-01, -6.4030e-01, + 2.5372e-01, 4.0410e-01, -1.3079e-01, -5.6092e-01, -1.8909e-01, + -3.2377e-01, -2.1252e-01, -2.2387e-01, 8.7467e-02, 5.0728e-01, + -1.1403e-01, -5.4595e-01, 3.9349e-01, 5.9183e-01, -9.6032e-01, + 8.2167e-02, -5.1443e-01, -9.4190e-01, -9.3388e-01, 9.4551e-02, + -2.4728e-01, 2.9515e-01, -4.8768e-01, -7.5996e-02, -5.9243e-01, + -4.0558e-01, 2.2324e-01, -9.0361e-01, -5.1321e-01, -1.3411e-01, + 5.6479e-01, -9.0425e-03, -5.5796e-01, 1.7802e-01, 2.9690e-01, + -3.8099e-01, 2.4017e-02, -3.0891e-01, -3.2830e-01, 4.2963e-02, + -6.2601e-01, -2.5049e-01, -7.4859e-03, 1.1617e-01, -1.3194e-01, + 4.7443e-01, -1.3437e-02, -7.4380e-01, -1.2107e-01, -4.5423e-01, + -4.6178e-01, 3.5232e-01, -5.6029e-01, 7.6554e-01, 7.5839e-01, + 4.9742e-01, -1.8662e-01, 1.2774e-01, -3.0275e-01, 4.3990e-01, + -6.9867e-01, -3.8308e-01, 1.2532e-01, 1.5111e-01, -4.7295e-01, + 2.0581e-01, 3.9599e-02, -4.5042e-01, -4.7772e-01, -1.2960e-01, + -2.4035e-02, -8.1313e-01, 7.2255e-01, 2.5972e-01, 8.9410e-01, + 7.9167e-02, -1.3100e-01, -1.8255e-01, -1.3246e-02, 1.7391e-01, + -1.0780e+00, 5.4875e-01, 2.4150e-02, -3.4517e-01, -5.1029e-01, + -7.0280e-01, 1.6151e-02, -6.9358e-01, -1.9686e-01, 1.8666e-01, + -3.3663e-02, 2.8270e-01, 4.3943e-01, 7.5781e-01, -8.4487e-02, + 1.6837e-01, 5.1663e-01, 1.0567e+00, 1.0433e+00, -4.2754e-01, + -4.3495e-01, 6.6799e-01, 2.8105e-01, 2.9122e-01, 1.0039e-01, + -8.5692e-02, -6.0030e-01, 2.7274e-01, -1.2726e-01, 3.2528e-01, + -4.7566e-01, -8.0034e-02, 9.9471e-01, 2.6183e-01, -5.3645e-01, + 2.7173e-01, 9.5450e-01, -5.9020e-01, 1.5703e-01, 4.1426e-01, + -8.9005e-01, 6.8087e-01, -6.4571e-01, -4.4648e-01, -2.6380e-01, + -1.5983e+00, -3.5993e-01, 2.2042e-01, -6.5626e-01, 1.1699e+00, + -2.0258e-01, 3.4444e-01, 5.6699e-01, -6.5344e-01, 5.0124e-01, + 3.2623e-01, 2.0384e-01, -3.2459e-01, 8.5185e-03, 1.3471e-01, + 5.1772e-01, 1.4214e-01, 4.1939e-02, 6.7896e-01, 8.3246e-02, + -6.1466e-02, 4.5889e-02, -7.6077e-01, 2.5588e-01, 5.4054e-01, + 9.3660e-01, -4.6980e-01, -1.7507e-01, -1.4851e-01, 1.8071e-01, + 4.1031e-02, 1.4749e-02, 6.5961e-01, -7.0903e-02, 5.9657e-02, + -1.9495e-01, 8.3158e-01, -1.5257e-01, 1.2255e+00, -1.2356e-01, + -2.0381e-01, 4.8242e-01, -1.6604e-01, -8.8528e-02, 1.0697e-01, + 1.9617e-01, -7.0234e-01, 4.4242e-01, -5.7977e-02, 3.9103e-01, + 1.8679e-01, -4.9163e-01, 4.2347e-03, -6.8967e-01, 1.6035e-01, + -8.5094e-01, -7.4418e-02, 6.7336e-02, -8.3224e-01, 2.6062e-02, + 4.9192e-01, 5.2151e-01, -1.6844e-01, 2.4388e-01, -3.7647e-02, + -4.8822e-01, 4.6222e-03, 1.2678e-01, -3.0084e-01, 6.3812e-01, + 7.1659e-01, 1.7261e-01, -8.0721e-01, 6.4520e-01, -1.2987e-01, + -1.7628e-01, -3.8206e-01, 9.2294e-01, 3.3497e-01, 3.1547e-01, + 3.1440e-01, 4.2543e-01, -8.7846e-01, 5.0277e-01, -1.7191e-01, + -4.8866e-01, 6.1943e-01, 4.8681e-01, 3.1232e-01, -2.7119e-01, + 5.9825e-01, -3.5353e-01, 3.5124e-01, 2.8750e-02, -6.5004e-01, + 2.6290e-01, -2.8577e-01, 2.7292e-01, -3.7203e-02, 2.6134e-01, + 8.5432e-01, -5.0034e-01, -5.1107e-01, -3.8058e-01, -2.5238e-01, + 4.3307e-01, 3.9068e-01, 7.7398e-01, -5.3277e-01, 4.4721e-01, + 1.1372e-01, 5.0360e-01, -1.7603e-01, 2.4101e-01, 2.2365e-01, + -1.8078e-01, 2.7429e-01, -2.1549e-01, 1.4268e-01, 3.3917e-01, + -6.9570e-01, -4.9179e-01, -1.5029e-01, 6.5821e-01, 8.5738e-01, + -8.3008e-01, -3.8017e-01, -3.3007e-01, 5.9142e-01, 5.3897e-01, + 1.3393e-01, 2.6619e+00, -8.4140e-01, 7.2939e-02, -1.0110e+00, + 4.0343e-01, -7.9010e-01, -5.8433e-01, 5.6104e-01, 1.6495e-01, + -6.4035e-01, -2.1644e-01, -1.1082e+00, -2.0013e-01, -2.8716e-01, + 9.0775e-01, 4.8080e-01, 1.1360e-01, -5.3904e-02, -3.1460e-01, + 4.1571e-01, 8.1695e-02, -1.6502e-01, -1.2634e-01, 6.4734e-01, + 4.4058e-01, 6.9211e-03, -3.5066e-01, -2.4901e-01, 9.0255e-01, + -1.1789e-01, -6.6065e-01, -1.2066e-01, -1.7871e-02, 6.4703e-01, + -2.7888e-01, 3.9891e-01, -1.8701e-01, -5.6675e-01, 5.2930e-01, + -1.1687e+00, -3.6498e-01, 1.4806e-01, -4.1063e-01, 1.0374e+00, + -4.3072e-01, -2.1022e-02, 4.1391e-01, -3.7349e-02, 2.4009e-01, + -7.7916e-02, -6.8427e-01, -8.1858e-03, 1.2579e-01, 2.1868e-01, + -6.8004e-01, 7.4477e-01, 3.0428e-01, 4.4441e-02, -5.2122e-01, + -8.5566e-01, -7.7845e-02, -4.6011e-01, -4.8875e-01, -6.0817e-01, + -4.2802e-01, -1.0616e-01, -6.4764e-01, 5.8157e-01, 9.6697e-02, + 4.5654e-01, 3.5184e-02, -6.4207e-01, -6.6000e-01, 1.6382e-02, + 3.3541e-01, 4.0628e-01, -6.7456e-01, 8.9355e-01, -5.9333e-02, + 7.9808e-02, 2.2350e-01, -3.6857e-01, 9.5804e-01, 1.1220e-01, + -4.0588e-02, 6.0938e-01, 3.0536e-03], device='cuda:1', + requires_grad=True) +torch.Size([2304, 768]) +Parameter containing: +tensor([[-0.0334, 0.0285, 0.0295, ..., 0.0152, -0.0162, -0.0086], + [-0.0067, 0.0443, -0.0088, ..., 0.0052, 0.0228, -0.0394], + [-0.0080, -0.0090, -0.0276, ..., -0.0169, -0.0212, 0.0219], + ..., + [-0.0186, 0.0049, -0.0079, ..., -0.0045, 0.0192, -0.0271], + [ 0.0003, 0.0048, -0.0178, ..., -0.0006, -0.0150, 0.0126], + [ 0.0191, -0.0278, 0.0059, ..., 0.0222, 0.0142, 0.0118]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([2304]) +Parameter containing: +tensor([-0.4333, 0.1654, -0.0519, ..., -0.0249, 0.0006, 0.0306], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 768]) +Parameter containing: +tensor([[ 0.0058, 0.0067, 0.0045, ..., -0.0050, 0.0077, -0.0065], + [ 0.0011, -0.0229, -0.0150, ..., 0.0013, 0.0057, 0.0196], + [-0.0234, 0.0136, -0.0235, ..., 0.0329, -0.0069, 0.0318], + ..., + [ 0.0086, 0.0077, -0.0036, ..., -0.0093, -0.0244, 0.0068], + [ 0.0283, 0.0173, 0.0116, ..., -0.0010, 0.0039, -0.0024], + [ 0.0225, 0.0120, 0.0018, ..., -0.0170, 0.0129, -0.0031]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 4.6478e-02, 4.1260e-02, -5.7587e-02, 4.2152e-03, -1.5381e-02, + -7.1945e-03, 7.1960e-02, 3.3722e-03, 1.2032e-02, -1.7223e-03, + -4.3213e-02, -2.2369e-02, 2.6031e-02, 2.9510e-02, -1.9821e-02, + 1.1589e-02, 3.1555e-02, 5.9738e-03, 2.5436e-02, 1.7834e-01, + -1.2909e-02, -1.7929e-02, 1.7914e-02, -7.0419e-03, 7.2670e-03, + -1.1589e-02, -2.1347e-02, 3.9612e-02, 6.9214e-02, -3.1097e-02, + 3.6774e-02, 3.3020e-02, 3.6297e-03, -3.4698e-02, -1.0223e-02, + 3.4210e-02, 4.7668e-02, -3.0304e-02, -3.7567e-02, -4.2389e-02, + -3.6896e-02, 5.4779e-02, -5.8228e-02, 4.9225e-02, -6.8420e-02, + -1.0086e-02, -1.3298e-02, 1.2856e-02, 7.1899e-02, 4.1229e-02, + 7.8369e-02, -2.3773e-02, 7.1411e-02, 3.9337e-02, -5.9624e-03, + 5.9235e-02, 5.2368e-02, -7.6790e-03, 1.0061e-03, 1.7975e-02, + 1.2077e-02, 1.7365e-02, 2.3132e-02, -3.7811e-02, -1.7868e-02, + 2.1454e-02, -1.1116e-02, -9.3155e-03, 5.4260e-02, -1.4148e-01, + 5.0934e-02, 2.6646e-03, -4.3518e-02, 1.6846e-02, -2.5818e-02, + -3.9856e-02, 1.7212e-02, -3.2196e-03, -5.2166e-04, -5.6641e-02, + -1.0824e-03, -2.0920e-02, 4.7874e-03, -1.6281e-02, 4.0894e-02, + 4.4708e-02, -2.2568e-02, 3.1982e-02, -2.4689e-02, -2.5970e-02, + -4.4434e-02, 3.2562e-02, 5.6458e-02, -1.5976e-02, 1.6129e-02, + -1.6968e-02, 3.2330e-03, 4.4403e-02, -3.6438e-02, 3.1891e-02, + -1.8921e-03, -1.9669e-02, 3.1342e-02, -5.9235e-02, 1.8967e-02, + 2.7206e-02, 7.2021e-02, 2.2812e-02, 2.5635e-02, 2.3743e-02, + 2.0905e-02, 8.2458e-02, -1.6117e-03, 5.7190e-02, -3.2349e-02, + -3.7994e-02, 1.8845e-02, 1.8845e-03, 5.3284e-02, 2.1801e-03, + 2.8900e-02, -5.6824e-02, -4.2610e-03, -2.7145e-02, -3.4515e-02, + -2.0996e-02, 2.9221e-02, -8.8501e-02, 5.2567e-03, 5.8441e-02, + -2.4704e-02, -2.6581e-02, -1.5053e-02, -1.8555e-02, 3.0090e-02, + 1.1121e-01, -2.7542e-03, 1.1032e-02, -2.6871e-02, 6.1913e-03, + -1.2978e-02, -2.2446e-02, 2.0050e-02, 3.3478e-02, -5.1308e-03, + -4.4769e-02, 4.5868e-02, 1.2375e-02, 1.7563e-02, 5.1239e-02, + 2.5131e-02, -2.2842e-02, -1.8188e-02, -1.9974e-02, -4.7913e-02, + -2.3041e-02, 6.3705e-03, -3.4695e-03, -2.0279e-02, 3.1616e-02, + 2.6306e-02, 1.2650e-02, 3.2104e-02, -4.3823e-02, -1.5366e-02, + 1.1955e-02, -1.1835e-01, -1.2444e-02, 2.3632e-03, -8.0872e-03, + -3.7575e-03, -4.6234e-02, -4.8340e-02, 2.8748e-02, 3.4576e-02, + 3.8208e-02, 6.4636e-02, -5.4962e-02, 4.1351e-03, -4.1866e-04, + 1.9073e-02, -6.0310e-03, -2.4857e-02, -3.9795e-02, 5.2948e-02, + 3.6041e-02, -3.3447e-02, 7.5500e-02, -4.7493e-03, -2.7298e-02, + -2.9510e-02, -6.9695e-03, 8.8806e-03, 1.5434e-02, -4.9011e-02, + 8.0948e-03, -9.0485e-03, -2.1530e-02, -9.2407e-02, 8.7585e-03, + -9.7717e-02, 2.3956e-02, 6.7871e-02, 4.1351e-02, 1.3680e-02, + 1.4982e-03, 1.0400e-01, 2.1118e-02, -1.5480e-02, 3.6591e-02, + 1.4511e-02, -5.4169e-03, 4.0321e-03, 6.5552e-02, 6.5842e-03, + 1.4305e-02, 4.5586e-03, -1.3863e-02, 1.2848e-02, -5.1727e-02, + 8.5144e-02, 3.9581e-02, -9.8114e-03, 2.3163e-02, -1.4214e-02, + 8.0032e-03, -2.9968e-02, -1.5175e-02, -3.9558e-03, -1.3176e-02, + 5.9692e-02, 1.1299e-02, -3.5736e-02, 1.2413e-02, -6.2469e-02, + 2.5238e-02, 1.4099e-02, 2.2583e-02, 4.8035e-02, 2.4109e-02, + 1.8890e-02, 2.1240e-02, 1.5610e-02, 1.5671e-02, -2.6047e-02, + 1.4259e-02, -3.4103e-03, -1.4206e-02, -3.4271e-02, 2.3422e-02, + -5.7800e-02, 3.7781e-02, -2.2018e-02, -3.3508e-02, -1.1887e-02, + -1.6037e-02, 2.2068e-03, 1.6724e-02, -1.4359e-02, 6.9962e-03, + -5.2460e-02, 4.4952e-02, 6.4087e-02, -4.5502e-02, 1.1162e-02, + 5.1422e-02, -2.1790e-02, 3.6346e-02, 1.4618e-02, -2.3529e-02, + 2.8046e-02, -3.3173e-02, 8.0795e-03, -3.8574e-02, -9.4299e-02, + 1.6769e-02, 2.6413e-02, -4.2328e-02, 9.2010e-03, 3.3386e-02, + -2.6817e-03, -5.0995e-02, -2.8839e-02, 5.7106e-03, -1.2684e-03, + 8.8806e-03, -4.4434e-02, 8.3494e-04, 6.4545e-03, -7.7576e-02, + -4.2206e-02, 3.7933e-02, 2.3224e-02, -1.9028e-02, 2.0508e-02, + 1.7151e-02, -4.0627e-03, 2.8107e-02, 1.7044e-02, 7.8979e-02, + -2.2781e-02, 1.3550e-02, -1.7624e-02, -3.5736e-02, 2.3972e-02, + -1.8845e-02, 2.1347e-02, 4.6234e-03, -5.3711e-02, 2.4399e-02, + 5.7259e-03, 6.3095e-03, 3.8414e-03, 1.4709e-02, 7.2899e-03, + 8.6517e-03, 2.8656e-02, -5.5122e-04, 3.4008e-03, 2.0630e-02, + 2.3346e-02, 1.2459e-02, -6.3293e-02, -5.6732e-02, 1.0559e-01, + 9.0759e-02, 1.4839e-02, -9.9411e-03, 2.0111e-02, 5.5511e-02, + -2.4433e-03, 2.4063e-02, -6.2561e-02, -3.9558e-03, -1.1353e-02, + 3.7445e-02, -3.1490e-03, -9.2468e-03, -6.5269e-03, 2.7252e-02, + -3.0548e-02, -4.4556e-02, -3.9642e-02, -1.2064e-03, -3.4332e-02, + 4.6959e-03, -4.1321e-02, -4.5990e-02, 1.6342e-02, -4.4708e-02, + 3.6255e-02, 2.2034e-02, 6.4583e-03, 7.3364e-02, -3.8513e-02, + 1.5930e-02, 6.1096e-02, -2.4246e-02, -2.3575e-02, -2.4662e-03, + 1.0941e-02, -1.9211e-02, -2.1301e-02, -1.3809e-02, -2.5665e-02, + 3.1921e-02, -1.8806e-03, 2.7420e-02, -4.3671e-02, 2.3239e-02, + -9.8572e-03, 1.0803e-02, -3.4607e-02, -4.7791e-02, -6.9275e-03, + -1.7075e-02, 6.0455e-02, 3.6407e-02, 5.3436e-02, 5.7190e-02, + -3.1952e-02, 7.1564e-03, 1.6922e-02, 7.2937e-03, -2.6302e-03, + -5.5359e-02, -1.1360e-02, -2.0859e-02, 1.6296e-02, 1.3933e-03, + 3.3905e-02, -7.8857e-02, 1.4816e-02, 1.6174e-02, -9.2602e-04, + 1.1911e-03, 4.7226e-03, -1.4030e-02, -1.4473e-02, -5.9753e-02, + 6.2988e-02, 1.1841e-02, -4.2389e-02, 6.6284e-02, -1.9791e-02, + -2.5463e-04, -1.6571e-02, -2.0081e-02, -5.6793e-02, 2.2049e-02, + 3.3112e-02, 5.5351e-03, -1.9684e-02, 1.1755e-01, 9.5825e-02, + 9.2834e-02, 3.2043e-02, 2.7237e-02, -2.0081e-02, 1.8646e-02, + -7.5134e-02, -6.1676e-02, 3.1097e-02, -2.4750e-02, -5.6580e-02, + -1.4442e-02, -2.0447e-02, -2.1805e-02, 5.7526e-02, -1.5678e-03, + -4.7516e-02, -7.0923e-02, 1.8921e-03, 2.2583e-02, 4.3732e-02, + 7.9250e-04, 1.4275e-02, -1.4984e-02, 2.1400e-03, -4.6387e-02, + 6.9275e-03, -8.8120e-03, 8.7646e-02, -2.3669e-01, 1.8951e-02, + -7.2174e-03, 1.7166e-02, 3.0014e-02, -3.4973e-02, -4.0680e-02, + -5.1605e-02, -2.1088e-04, -1.8845e-02, 2.7466e-02, -1.8356e-02, + 5.8075e-02, -4.3304e-02, -7.6355e-02, -8.0200e-02, 2.8427e-02, + -3.8055e-02, 5.0201e-02, -1.3794e-02, 2.6474e-02, 1.4137e-02, + 5.3833e-02, 6.0516e-02, -1.1345e-02, 1.4664e-02, 1.6464e-02, + -6.2943e-04, -4.0474e-03, 7.6790e-03, 1.5160e-02, -1.4793e-02, + -5.9776e-03, 3.4607e-02, -6.1646e-03, 2.8290e-02, 9.4971e-02, + 2.9160e-02, 3.3264e-02, -1.1536e-02, -6.0310e-03, -3.4882e-02, + -3.3630e-02, 6.9763e-02, 1.7105e-02, -1.3481e-02, 2.9449e-02, + 1.6327e-02, 6.6162e-02, 2.2415e-02, -8.2703e-03, -6.4148e-02, + -5.0354e-02, 1.4214e-02, 4.9286e-02, -5.5450e-02, 1.5182e-02, + -2.8336e-02, -5.8960e-02, 1.5823e-02, 5.8838e-02, -2.4063e-02, + -6.9214e-02, 6.2347e-02, 2.8259e-02, -2.0462e-02, -6.0883e-02, + -3.7575e-03, -1.3939e-02, -3.4607e-02, 5.4893e-03, -2.7962e-03, + 9.5596e-03, 1.1261e-02, 1.1215e-02, 9.5215e-02, -4.4250e-02, + 2.5970e-02, -2.8351e-02, -4.4067e-02, 2.9282e-02, -2.4261e-02, + -2.1484e-02, 3.8361e-02, 3.0853e-02, 2.7756e-02, 3.4485e-02, + -4.2114e-02, 2.5909e-02, 3.6377e-02, -1.0086e-02, 2.8992e-02, + -2.3880e-02, 2.3697e-02, -2.1713e-02, 2.9678e-02, -3.4027e-02, + -3.5889e-02, -2.0065e-02, -9.5154e-02, 2.8702e-02, -6.5796e-02, + 8.2626e-03, -1.5060e-02, -9.7885e-03, -5.0781e-02, 4.5471e-02, + -7.2937e-02, 2.3361e-02, -3.3508e-02, 9.9121e-02, 5.3635e-03, + -2.2293e-02, -2.3758e-02, 3.4515e-02, 1.3947e-02, 2.8976e-02, + -3.5797e-02, -6.5155e-03, 4.8462e-02, -2.6798e-03, 1.6922e-02, + 1.1887e-02, 8.4734e-04, 4.1199e-02, 1.4353e-03, -2.1286e-03, + -8.1055e-02, -1.9135e-02, 3.3295e-02, -2.5391e-02, 2.9572e-02, + 1.7868e-02, -1.6785e-02, 1.2001e-02, -2.9846e-02, 1.4832e-02, + -1.7410e-02, 2.0630e-02, -3.1250e-02, -4.4678e-02, 2.4891e-03, + 3.7567e-02, -5.1300e-02, -1.3695e-02, 2.1839e-03, -2.0966e-02, + -1.4938e-02, -2.3300e-02, 4.5013e-02, 3.4424e-02, -2.3087e-02, + -2.7634e-02, 2.2018e-02, -7.2289e-03, 6.2805e-02, 3.6621e-02, + -4.5441e-02, -1.8112e-02, -4.7821e-02, -4.7760e-02, -1.0996e-03, + -6.2195e-02, -8.7585e-03, -7.2998e-02, -1.1948e-02, 1.0933e-02, + -2.5726e-02, -3.3302e-03, 6.8893e-03, -1.3519e-02, 1.8280e-02, + -2.1790e-02, 1.0498e-02, -1.2772e-02, -3.2562e-02, -4.5715e-02, + 5.7220e-03, -1.0307e-02, -2.2064e-02, 1.6388e-02, -1.0399e-02, + 3.2444e-03, 2.5192e-02, -5.9776e-03, 1.2680e-02, -4.5410e-02, + 9.0313e-04, -5.9143e-02, -2.4826e-02, 2.3422e-02, -3.2166e-02, + -5.6496e-03, -2.2919e-02, 1.9104e-02, -3.8338e-03, -8.0139e-02, + 6.6147e-03, -2.8091e-02, -3.6469e-02, -3.9337e-02, -3.2867e-02, + 5.4443e-02, 2.1149e-02, 4.6082e-02, 5.1208e-02, 2.8030e-02, + -3.9062e-03, 1.5053e-02, 4.1351e-02, -6.8331e-04, -2.8183e-02, + -8.4763e-03, 1.0704e-02, 6.5491e-02, 1.5533e-02, 1.3351e-02, + 5.8289e-02, -3.4088e-02, 3.9291e-03, -2.9129e-02, 5.6244e-02, + -1.0910e-02, 4.7150e-02, 4.7241e-02, 2.1912e-02, -4.8920e-02, + -4.7058e-02, 3.5858e-02, 2.0325e-02, 1.2009e-02, -6.0028e-02, + 6.2988e-02, 3.8177e-02, 3.8666e-02, -3.3478e-02, -1.9241e-02, + -2.5696e-02, 3.0444e-01, -2.0157e-02, -1.5320e-02, -4.1443e-02, + -6.3324e-03, 7.6538e-02, -4.5624e-02, -4.3793e-02, -6.3293e-02, + -2.5650e-02, -3.1708e-02, -6.9733e-03, -4.4128e-02, -1.2222e-02, + -2.8656e-02, 1.4755e-02, 4.5410e-02, -1.4847e-02, -6.1127e-02, + -1.8982e-02, 7.3357e-03, 6.2828e-03, 3.9154e-02, 3.3508e-02, + 3.6377e-02, 6.2675e-03, 1.7609e-02, 2.0752e-03, 8.6060e-03, + -1.2619e-02, 3.3264e-02, -3.6682e-02, -1.8784e-02, -4.0344e-02, + -5.5054e-02, -3.8025e-02, 4.7729e-02, 4.1840e-02, -7.7881e-02, + -2.6321e-02, 6.2073e-02, -6.0150e-02, -4.1046e-02, 6.2904e-03, + -2.1759e-02, 3.9856e-02, -3.1281e-02, 2.5539e-03, 3.5739e-04, + -4.9286e-02, -1.1353e-02, 2.0035e-02, 3.0548e-02, -6.3515e-03, + 3.6804e-02, 6.1401e-02, -6.7322e-02, 2.9926e-03, -6.9458e-02, + -6.5735e-02, -2.2568e-02, 1.6052e-02, -3.5065e-02, -4.1840e-02, + -3.0746e-03, 1.9791e-02, 3.3997e-02, -2.9160e-02, -4.9469e-02, + -5.8655e-02, -1.8463e-02, -8.1482e-03, -3.5339e-02, 2.3788e-02, + -2.3300e-02, -1.3672e-01, -6.4636e-02, -1.6983e-02, 1.0239e-02, + 2.0599e-02, -1.8295e-02, -2.2568e-02, 8.8684e-02, 2.7573e-02, + 2.5513e-02, 4.2145e-02, 7.9422e-03], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([1.2778, 1.4128, 1.3243, 1.2121, 1.4286, 1.2285, 1.2714, 1.2086, 1.2280, + 1.2027, 1.3056, 1.3433, 1.3571, 1.2871, 1.2546, 1.3233, 1.2960, 1.4360, + 1.3255, 0.3545, 1.3113, 1.2376, 1.3394, 1.3552, 1.3275, 1.3286, 1.2904, + 1.1890, 1.4459, 1.3469, 1.3475, 1.2710, 1.3147, 1.2767, 1.3758, 1.4789, + 1.2862, 1.3537, 1.4560, 1.3338, 1.3595, 1.2610, 1.3219, 1.3039, 1.3483, + 1.3257, 1.2723, 1.3911, 1.2703, 1.2903, 1.3294, 1.3034, 1.3718, 1.3183, + 1.4007, 1.4945, 1.1526, 1.5551, 1.4231, 1.4085, 1.3184, 1.3429, 1.2638, + 1.3687, 1.2802, 1.3492, 1.2815, 1.3761, 1.4529, 1.5865, 1.2352, 1.3187, + 2.4101, 1.2996, 1.5029, 1.2912, 1.3043, 1.2830, 1.3575, 1.4108, 1.2408, + 1.4397, 1.2768, 1.3065, 1.3802, 1.3398, 1.3298, 1.2797, 1.2675, 1.3241, + 1.2821, 1.3869, 1.3612, 1.3836, 1.4122, 1.2181, 1.3843, 1.2839, 1.3655, + 1.4379, 1.2846, 1.1037, 1.3041, 1.2195, 1.2868, 1.2898, 1.2872, 1.3970, + 1.2838, 1.2924, 1.2720, 1.3200, 1.2509, 1.2536, 1.3131, 1.3219, 1.3353, + 1.3341, 1.2996, 1.3819, 1.2691, 1.3806, 1.3662, 1.4026, 1.3948, 1.3118, + 1.3203, 1.2443, 1.4601, 1.3088, 1.3087, 1.2799, 1.1752, 1.3434, 1.2705, + 1.2702, 1.3879, 1.3780, 1.2694, 1.1935, 1.3127, 1.2161, 1.3522, 1.2309, + 1.2594, 1.2788, 1.3216, 1.3680, 1.3151, 1.3007, 1.2686, 1.3322, 1.2459, + 1.3076, 1.4006, 1.1240, 1.4359, 1.3258, 1.3004, 1.2717, 1.3467, 1.2417, + 1.2638, 1.4253, 1.3500, 1.2859, 1.3707, 1.3677, 1.3646, 1.3057, 1.2892, + 1.2587, 1.1566, 1.2520, 1.3977, 1.3951, 1.2995, 1.5024, 1.3988, 1.2421, + 1.2630, 1.5120, 1.4118, 1.3496, 1.4099, 1.3526, 1.2130, 1.8183, 1.4380, + 1.5001, 1.3415, 1.2570, 1.3791, 1.4268, 1.4050, 1.2840, 1.2843, 1.2593, + 4.8400, 1.2843, 1.1926, 1.3561, 1.3769, 1.3333, 1.2853, 1.2566, 1.4406, + 1.2301, 1.3254, 1.4186, 1.2946, 1.2669, 1.2523, 1.2704, 1.2703, 1.3087, + 1.4050, 1.3373, 1.3108, 1.3078, 1.2544, 1.3056, 1.2784, 1.2104, 1.4433, + 1.2860, 1.4168, 1.3535, 1.2526, 1.3556, 1.2621, 1.3858, 1.4261, 1.3332, + 1.2681, 1.2991, 1.3661, 1.2210, 1.2892, 1.2889, 1.3536, 1.2801, 1.3803, + 1.3746, 1.4893, 1.2750, 1.3595, 1.2911, 1.2272, 1.3971, 1.3142, 1.2904, + 1.2525, 1.2875, 1.3555, 1.3670, 1.3386, 1.2144, 1.2527, 1.1820, 1.3643, + 1.2478, 1.3274, 1.2426, 1.2595, 1.3444, 1.2624, 1.3189, 1.3534, 1.3567, + 1.3505, 1.2788, 1.2749, 1.2946, 1.2299, 1.2255, 1.3519, 1.2815, 1.3201, + 1.2997, 1.4289, 1.4691, 1.3291, 1.3569, 1.3914, 1.3111, 1.4085, 1.2841, + 1.2588, 1.2578, 1.2939, 1.3514, 1.2254, 1.3247, 1.3362, 1.2644, 1.2673, + 1.3170, 1.4680, 1.3108, 1.4813, 1.2732, 1.3297, 1.3588, 1.3602, 1.3319, + 1.2410, 1.2551, 1.2761, 1.2361, 1.2865, 1.3475, 1.2561, 1.2721, 1.2840, + 1.3149, 1.2915, 1.2586, 1.4006, 1.4374, 1.4045, 1.3576, 1.3122, 1.2363, + 0.7507, 1.1181, 1.3389, 1.3652, 1.2037, 1.4117, 1.2621, 1.2860, 1.3316, + 1.2805, 1.2710, 1.2435, 1.1944, 1.3598, 1.3444, 1.4724, 1.3790, 1.3487, + 1.3941, 1.3124, 1.4063, 1.4142, 1.3284, 1.3665, 1.3139, 1.3566, 1.4895, + 1.2782, 1.2546, 1.4901, 1.1898, 1.3537, 1.2926, 1.2563, 1.3956, 1.3219, + 1.2015, 1.2747, 1.2112, 1.3383, 1.4631, 1.2327, 1.3151, 1.2368, 1.3727, + 1.2585, 1.2890, 1.4346, 1.2018, 1.2840, 1.3254, 1.2897, 1.3935, 1.3739, + 1.2381, 1.2642, 1.3081, 1.6554, 1.3431, 1.2500, 1.2942, 1.2410, 1.2230, + 1.3736, 1.3201, 1.3532, 1.4267, 1.2428, 1.3280, 1.2035, 1.3420, 1.3727, + 1.2243, 1.3602, 1.3100, 1.3725, 1.2488, 1.3750, 1.2510, 1.2722, 1.3553, + 1.3276, 1.2686, 1.2770, 1.3718, 1.2138, 1.2707, 1.3313, 1.3915, 1.2798, + 1.2845, 1.8782, 1.3366, 1.3609, 1.3501, 1.3094, 1.2649, 1.4126, 1.2749, + 1.3580, 1.2331, 1.2871, 1.3615, 1.3453, 1.3761, 1.3402, 1.3949, 1.3075, + 1.3821, 1.2335, 1.1219, 1.2089, 1.3412, 1.3147, 1.4133, 1.2842, 1.2501, + 1.3594, 1.4005, 1.2015, 1.2951, 1.3273, 1.3984, 1.3136, 1.3617, 1.2494, + 1.2750, 1.3031, 1.4086, 1.2963, 1.3859, 1.3196, 1.2856, 1.3284, 1.3102, + 1.3510, 1.2589, 1.2849, 1.2815, 1.2839, 1.3223, 1.3948, 1.2715, 1.2000, + 1.3363, 1.3016, 1.4022, 1.2559, 1.2914, 1.3016, 1.2621, 1.3012, 1.2922, + 1.3268, 1.3100, 1.4105, 1.2209, 1.2655, 1.4514, 1.1279, 1.3305, 1.3242, + 1.3918, 1.3349, 1.3387, 1.3374, 1.2754, 1.3770, 1.3840, 1.4277, 1.2821, + 1.3066, 1.2942, 1.2172, 1.3147, 1.3135, 1.2541, 1.3755, 1.4096, 1.3069, + 1.2051, 1.2896, 1.1959, 1.3449, 1.3062, 1.4289, 1.2718, 1.3988, 1.3705, + 1.3853, 1.4339, 1.3522, 1.3735, 1.3411, 1.2900, 1.2655, 1.4412, 1.2839, + 1.4373, 1.2894, 1.3883, 1.2792, 1.3214, 1.2820, 1.3570, 1.2802, 1.3953, + 1.3076, 1.3642, 1.2259, 1.3210, 1.5030, 1.3443, 1.2598, 1.2860, 1.2877, + 1.3718, 1.3292, 1.4056, 1.3185, 1.3182, 1.3692, 1.3035, 1.3890, 1.2688, + 1.2993, 1.2494, 1.3429, 1.3417, 1.3072, 1.5261, 1.1908, 1.2241, 1.3305, + 1.3312, 1.3154, 1.3555, 1.3268, 1.3306, 1.3453, 1.2817, 1.3936, 1.3599, + 1.3008, 1.2335, 1.4052, 1.3677, 1.3380, 1.3518, 1.2657, 1.4209, 1.3038, + 1.2496, 1.2492, 1.3114, 1.3554, 1.4546, 1.2551, 1.3230, 1.3162, 1.3046, + 1.3480, 1.3656, 1.4054, 1.2291, 1.4179, 1.4216, 1.6285, 1.2478, 1.2996, + 1.2502, 1.3330, 1.2784, 1.2079, 1.2718, 1.3187, 1.3079, 1.3155, 1.4294, + 1.3614, 1.2806, 1.2023, 1.4037, 1.4959, 1.3305, 1.3032, 1.3336, 1.2883, + 1.3871, 1.2994, 1.3879, 1.2965, 1.2655, 1.3263, 1.3945, 1.4027, 1.2427, + 1.4667, 1.3111, 1.3269, 1.2957, 1.4106, 1.2785, 1.2503, 1.4278, 1.3792, + 1.4130, 1.4359, 1.2090, 1.2858, 1.3821, 1.3316, 1.2425, 1.2517, 1.1997, + 1.2026, 1.2181, 1.2937, 1.4291, 1.2833, 1.2416, 1.3274, 1.4065, 1.4087, + 1.4433, 1.2292, 1.3580, 1.3133, 1.3483, 1.3444, 1.4458, 1.3265, 1.3614, + 1.3026, 1.2615, 1.2933, 1.5180, 1.2559, 1.4251, 1.3611, 1.3697, 1.3070, + 1.3921, 1.2738, 1.3158, 1.2808, 1.3083, 1.2928, 1.3000, 1.2510, 1.3136, + 1.3668, 1.2892, 1.4194, 1.1994, 1.2050, 1.3569, 0.4170, 1.3689, 1.2871, + 1.2851, 1.2127, 1.2463, 1.3796, 1.3585, 1.3439, 1.3009, 1.3167, 1.4009, + 1.3422, 1.4012, 1.2848, 1.3174, 1.3833, 1.3165, 1.2696, 1.2573, 1.3488, + 1.3744, 1.2530, 1.2258, 1.2341, 1.4300, 1.4465, 1.4464, 1.2995, 1.2796, + 1.2561, 1.3208, 1.2107, 1.2561, 1.3227, 1.3021, 1.2849, 1.2951, 1.3540, + 1.3733, 1.2993, 1.2819, 1.3679, 1.3642, 1.3118, 1.3858, 1.3867, 1.5010, + 1.3779, 1.2740, 1.3771, 1.3687, 1.3323, 1.2218, 1.2367, 1.2822, 1.3661, + 1.3749, 1.2645, 1.3220, 1.2099, 1.3023, 1.2323, 1.3248, 1.3039, 1.3085, + 1.3062, 1.3787, 1.6302, 1.3617, 1.2350, 1.2862, 1.3350, 1.3497, 1.3331, + 1.3821, 1.2707, 1.3288, 1.3331, 1.3864, 1.3344, 1.3385, 1.2354, 1.3073, + 1.4564, 1.2089, 1.3711], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 1.1249e-01, 6.6530e-02, -1.5252e-01, -5.8736e-04, 4.5505e-02, + 9.3366e-02, 8.9381e-02, 5.9666e-02, -9.5938e-02, 3.6553e-02, + 6.7269e-02, -1.9519e-02, -1.0083e-01, -2.3823e-02, -3.5535e-02, + -6.4250e-02, -1.6845e-02, 1.2477e-02, -5.8642e-02, 4.4019e+00, + -4.9843e-02, 2.5174e-02, 3.9088e-02, -8.4199e-04, -2.0867e-02, + -5.4807e-04, -2.0647e-03, -1.7433e-02, 1.0351e-01, -9.0810e-02, + 3.9953e-02, 2.3238e-02, 1.4501e-02, 4.8113e-02, 5.7156e-02, + -6.4520e-02, 2.7001e-02, -4.0983e-02, -1.1597e-01, 3.8330e-02, + 2.3931e-02, 3.4684e-03, 8.6621e-03, 1.2125e-02, -4.2395e-02, + 6.7561e-03, 4.1510e-02, -4.2593e-02, 2.6011e-02, 5.0339e-02, + 4.2213e-02, -1.4566e-02, 2.7324e-02, 4.6538e-02, 5.4109e-02, + 5.0325e-02, -6.1837e-02, -4.9468e-02, 2.5524e-02, 1.0648e-02, + 4.9119e-02, -4.5645e-02, 4.1377e-02, 1.9581e-02, 9.3057e-02, + 1.7621e-02, 8.7060e-02, 9.3680e-02, -4.0777e-02, 2.2552e-01, + 3.7641e-02, 2.7666e-02, 3.4943e-01, 5.6216e-02, -1.4487e-01, + 6.4672e-02, 3.3319e-02, -9.7215e-02, -3.7396e-02, 3.0751e-02, + 1.8282e-02, -1.1652e-02, -1.9703e-02, 1.1703e-01, -2.4701e-02, + 4.9298e-02, 1.2135e-01, 8.3444e-02, -2.4657e-02, 1.2175e-02, + -1.6799e-02, 2.4807e-02, 2.8444e-02, -5.0449e-02, -5.5266e-02, + 1.3274e-02, -2.0715e-02, -5.3437e-02, -6.7125e-02, -5.9439e-03, + 2.9591e-02, 1.3338e-01, 3.7340e-02, -7.2558e-02, 5.8032e-02, + 2.6419e-02, -4.8896e-03, -3.1441e-02, 3.4037e-02, -5.9380e-02, + -3.7031e-02, -1.5587e-03, 2.8165e-02, 3.1967e-02, 6.8637e-02, + -2.4927e-02, 1.0872e-02, -5.9152e-02, 1.8055e-02, 7.8582e-02, + -6.0901e-02, -1.3762e-01, -6.6338e-02, -2.4902e-02, -1.3091e-01, + -1.7644e-02, -1.1633e-01, 1.2645e-02, 9.5415e-03, -5.4999e-02, + -9.1769e-02, 3.0410e-02, -4.1700e-03, -3.8699e-03, -4.4601e-02, + -3.4956e-03, -8.8458e-03, 1.3220e-02, -4.3815e-02, -3.4442e-02, + -8.5615e-03, 3.0837e-02, 3.9464e-02, 1.0106e-02, 5.6561e-02, + -1.6560e-02, -7.9464e-04, -2.5703e-02, -1.4476e-02, 8.4910e-02, + 1.9942e-01, -5.2275e-02, -6.9300e-02, 5.5513e-03, -7.2165e-02, + -1.4982e-01, -1.9055e-02, -7.8602e-02, 6.0158e-03, 1.1233e-03, + 1.1562e-01, -1.1397e-01, -2.8403e-02, -4.9939e-02, 3.1589e-02, + 1.2695e-03, -7.5428e-02, 4.5873e-02, -5.3041e-02, 3.1856e-02, + -4.4322e-02, 5.4903e-02, 5.8381e-02, 1.6829e-02, 3.3597e-02, + -8.3475e-02, -1.2639e-02, 2.3891e-02, -5.2602e-02, 2.8142e-02, + 4.5836e-04, -8.0072e-02, 5.3213e-02, -2.0927e-03, 5.6944e-03, + -1.6532e-01, 2.7191e-02, 1.2540e-01, -4.0220e-02, -5.6549e-02, + 1.9608e-02, -4.7420e-02, 4.8768e-02, 5.2063e-02, -7.7537e-02, + 8.8836e-03, -2.1279e-02, -1.0774e-01, -5.9231e-01, 4.3655e-02, + -1.2093e-01, -1.1290e-02, -1.6956e-02, 4.7933e-02, 4.8477e-02, + 1.0526e-01, -8.1703e-02, 8.6334e-02, 8.1503e-02, -8.2884e-03, + 1.9980e-03, 1.6998e-01, -2.9055e-03, -2.3385e-02, -5.2070e-02, + -8.9273e-03, -2.7549e-02, 4.1074e-02, -8.2153e-02, -1.1626e-01, + 1.8730e-02, 5.3543e-03, -6.0731e-03, -2.9544e-02, 2.4765e-02, + -4.7577e-02, 3.2469e-02, -4.7903e-03, 7.3674e-02, -3.6682e-02, + 6.6302e-03, 2.0081e-02, -1.1133e-02, 5.2496e-02, -2.5489e-02, + 3.2781e-02, -7.1077e-02, 5.0036e-02, 5.3721e-02, 3.5437e-02, + 6.2628e-02, 8.2153e-02, 4.1833e-02, 2.3117e-02, 2.5839e-02, + 2.7974e-02, 8.7044e-02, 3.6748e-02, 7.4573e-03, -1.1560e-02, + 4.0989e-02, 1.7920e-02, 8.9429e-02, -7.5363e-02, 1.1239e-01, + -9.7433e-02, -5.9373e-03, -9.0286e-02, 2.9564e-02, 1.9275e-02, + -4.6716e-02, -3.4492e-02, 1.2544e-02, -4.6450e-02, -4.1636e-02, + 2.8558e-02, -4.4963e-02, 1.0954e-02, -6.3352e-02, -3.6041e-02, + 1.6229e-02, 3.7611e-02, 8.6765e-02, -7.4588e-02, 1.9078e-02, + 5.8601e-02, -9.5907e-02, 3.2584e-02, 4.2996e-02, -6.7500e-02, + 1.5511e-01, 1.4252e-01, -4.6072e-02, 1.9960e-02, 4.7929e-03, + 6.0026e-02, 6.9157e-02, -4.8183e-02, 1.1191e-01, -1.1622e-01, + -3.2258e-02, -4.7785e-02, 6.6812e-02, 2.3133e-02, 3.6284e-02, + -1.1379e-02, 6.7204e-02, 2.7096e-02, 1.2316e-02, 4.3819e-02, + 1.6182e-02, 1.0396e-02, -3.4605e-03, 5.9977e-02, 3.0063e-02, + 6.0782e-03, 6.6088e-02, -5.5381e-02, 8.2836e-02, -3.4805e-02, + 3.3898e-02, 1.2261e-02, -4.6789e-02, -6.2023e-02, 1.2293e-02, + -7.1054e-02, 3.4515e-02, 4.1708e-02, 1.0877e-01, -2.4782e-02, + -7.4042e-02, 1.9112e-01, -3.6649e-02, -2.9619e-02, -8.1513e-02, + -1.1837e-01, 9.0077e-03, 1.3000e-02, 3.9411e-02, -9.5251e-02, + 1.3697e-01, -3.9842e-02, 1.0547e-01, -3.3850e-02, 1.7734e-02, + 8.9571e-02, -7.6323e-03, -2.8654e-02, -1.9298e-02, 6.9649e-03, + -9.7311e-02, 1.5954e-02, -5.8039e-02, -2.2428e-02, 7.0552e-02, + 1.0890e-02, 7.9111e-02, 7.5176e-02, 1.7303e-02, -2.0276e-02, + -3.1886e-02, 4.3340e-02, -2.2585e-02, -1.7488e-02, -9.9096e-03, + -5.1280e-02, -4.2994e-02, 1.3468e-02, 7.8899e-03, -6.5327e-02, + 2.5238e-02, -1.2104e-02, 1.9794e-02, -8.7755e-02, 1.2272e-02, + -3.8113e-02, 4.8278e-02, 8.7076e-02, 2.7675e-02, 3.3508e-02, + 4.7161e-02, -4.2115e-02, -6.3795e-02, 9.3564e-03, -2.6794e-02, + -3.8587e-04, 7.3805e-02, 5.9713e-02, 3.5639e-04, -2.9299e-02, + -8.8966e-02, 1.2821e-01, 3.4049e-03, 4.1277e-03, -4.2586e-02, + -2.8642e-02, -7.6255e-03, -6.1095e-02, -5.1197e-02, -6.0758e-02, + 2.7188e-02, -9.9969e-02, 3.1856e-03, -3.5005e-02, 6.6075e-02, + 8.8329e-03, 2.1645e-02, 4.5789e-03, -2.4657e-02, 4.3128e-02, + 1.1682e-01, -3.2732e-02, 7.7938e-02, 2.1887e-02, -6.3995e-02, + -1.1791e-01, -6.2589e-02, 7.7237e-03, -1.2817e-02, -1.6943e-02, + -4.3070e-02, 2.5946e-02, 9.0340e-02, -3.8835e-02, -1.0102e-01, + -1.3493e+00, 3.1324e-02, 2.9935e-02, -3.3154e-02, -1.0369e-02, + -4.0507e-03, -1.2769e-02, 4.4127e-03, -2.8645e-02, -1.8434e-02, + 2.3528e-02, -1.3385e-01, -9.2718e-02, 3.5029e-02, 1.4013e-02, + -4.1471e-02, -7.4307e-02, -1.2050e-02, 7.5646e-02, 3.5991e-02, + -7.2910e-03, 1.7712e-02, 5.8498e-02, 1.1845e-01, -7.3238e-02, + -5.0888e-03, -8.3001e-02, -2.9860e-02, 1.0127e-02, 1.1152e-01, + -9.6156e-02, 2.8515e-02, -6.0384e-02, 1.1800e-01, 2.3654e-02, + -1.3667e-02, 5.0921e-02, -9.9970e-02, 3.6478e-02, 3.3720e-02, + -7.5835e-03, 2.4045e-02, 2.6867e-02, -1.9072e-02, -1.0716e-01, + 1.3416e-02, 3.4972e-02, -8.7127e-02, 4.2388e-02, -1.9562e-02, + -3.1595e-02, 2.2393e-02, -6.9860e-02, 4.5873e-02, -1.3502e-02, + -2.1857e-02, -8.8041e-02, -1.2840e-02, 2.1885e-02, -6.6175e-02, + 2.2215e-02, 1.1226e-01, 6.1672e-02, -2.8236e-02, 9.6886e-02, + 9.5079e-02, -8.6334e-02, 3.8164e-02, 1.8307e-01, -6.8546e-02, + -4.5854e-02, -2.1095e-02, 8.4351e-02, 7.5503e-02, 1.1180e-02, + 1.8422e-02, 1.6019e-02, 2.6321e-02, -3.6733e-02, -1.2587e-01, + 1.7723e-02, -8.1332e-02, 3.5607e-02, -2.6955e-02, 2.2358e-04, + 2.8571e-02, -5.5601e-02, 5.2749e-02, -1.8398e-02, -2.9063e-02, + 6.0163e-02, 4.2427e-02, 4.9609e-02, 3.5119e-02, 3.5887e-02, + -8.4040e-02, -3.5546e-03, -8.2419e-02, 4.4913e-02, -2.8065e-02, + -8.6208e-03, 6.7511e-02, 9.9593e-02, 6.3995e-02, -6.3132e-02, + -4.9890e-02, -2.7507e-02, 5.0936e-02, -2.7012e-02, -3.6912e-02, + -2.4369e-02, 1.5135e-02, -2.4008e-02, 5.1538e-03, -9.2522e-02, + 2.4081e-02, 3.6717e-02, -1.6280e-04, -4.9823e-02, 5.6712e-04, + -2.6802e-02, -3.0708e-02, -2.1814e-02, -5.9829e-02, -6.6685e-02, + -1.4319e-03, 1.2471e-02, -2.3993e-02, 6.8387e-02, 2.0728e-02, + 3.0708e-02, 4.8047e-02, 1.0205e-02, 6.6293e-02, 2.8431e-02, + 6.7335e-02, 9.5118e-03, -4.3140e-02, 5.8522e-02, 4.3582e-03, + -2.7967e-02, 5.7708e-02, -3.5698e-02, 2.9260e-02, 1.0412e-01, + -6.7268e-02, 7.3698e-02, 3.2205e-02, -5.8821e-03, 1.3816e-01, + 6.3438e-02, 3.3773e-02, -9.4991e-03, 1.9717e-02, 3.7347e-02, + 5.1135e-02, 1.2657e-02, -5.9936e-02, -3.0121e-02, 7.1307e-02, + 3.1290e-02, 1.0219e-02, 2.0115e-02, 2.6330e-02, -2.9001e-02, + -1.0041e-01, -4.3682e-03, 5.1741e-02, 8.8398e-02, 4.9760e-02, + 2.4417e-02, 5.8507e-02, -8.0023e-02, -1.5147e-02, -4.0268e-03, + -3.7588e-02, 1.0212e-01, 6.3257e-02, -4.1025e-02, 8.4955e-03, + -2.6449e-02, 4.9546e-02, 1.7144e-02, -6.9164e-03, 6.3603e-02, + -6.4019e-02, -1.0373e-01, 2.1895e-02, -1.5778e-02, 3.1353e-03, + -3.9587e-02, 3.7480e-02, -2.3462e-01, 4.4842e-02, -4.6786e-02, + 5.5548e-02, 5.3524e-02, -1.7518e-02, -3.4734e-02, -2.3288e-03, + -9.6355e-02, 8.0766e-02, 8.5742e-02, 2.9556e-02, -6.0110e-02, + 3.5366e-02, 3.0754e-04, 2.1479e-02, 6.5357e-03, 2.3472e-02, + -2.9868e-02, -1.4617e-02, -1.1686e-02, 4.1814e-02, -2.7550e-02, + 1.0170e-01, 1.5005e-02, 8.5918e-02, 1.5293e-01, 3.6819e-02, + 2.8971e-02, -4.9363e-02, 2.3617e-03, 2.9390e-02, -5.3829e-02, + -2.8364e-03, -5.2993e-02, 3.4160e-02, -3.5602e-02, 7.5076e-03, + 1.7979e-02, 2.6363e-02, 6.1626e-02, 1.4246e-02, 9.0429e-02, + 9.7307e-02, -1.8644e-02, -1.0435e-02, 5.2864e-02, -3.1784e-02, + 3.9848e-02, -7.4252e-02, -1.6382e-02, 2.7518e-02, 6.7591e-02, + -1.0744e-01, -9.2538e-03, -7.7432e-02, 3.2136e-03, 8.4539e-02, + -9.5115e-04, -6.7152e-02, -1.0466e-01, 2.8688e-02, 1.1688e-02, + -2.1216e-01, 2.4527e-02, 7.5086e-03, 5.9728e-03, -5.0438e-02, + 4.5686e-02, -2.3327e-02, -2.7498e-02, 1.3780e-02, 6.8096e-02, + -4.4034e-02, -5.3989e+00, -1.8472e-02, 2.5993e-02, 1.1153e-02, + -7.6305e-02, -4.6629e-02, -4.5762e-02, -1.2894e-02, 2.3282e-02, + -5.1168e-02, -1.6560e-02, -1.8486e-02, -4.1046e-02, -4.4457e-02, + 4.0430e-02, -3.8813e-02, -7.7700e-02, 2.9239e-02, -4.2895e-02, + 7.3962e-02, 7.1423e-02, 1.0706e-02, -2.8681e-02, -1.1633e-01, + -2.8290e-02, 3.7359e-02, 2.3484e-02, 3.1901e-02, 4.8001e-02, + -3.1100e-02, 8.7397e-02, 9.8772e-02, 1.7629e-02, 3.6501e-03, + 9.3748e-03, -3.4984e-03, 3.5952e-02, -7.3531e-02, -7.0477e-03, + 6.7023e-03, -2.4618e-02, 2.3288e-02, -9.6989e-02, 3.1015e-02, + 1.3645e-02, -2.0102e-01, 8.9683e-02, 3.0963e-03, 7.7379e-02, + 5.2203e-02, 4.2260e-02, 5.4637e-02, 7.2847e-02, 2.5497e-02, + -2.4353e-03, -2.0962e-02, -1.2389e-02, -3.2438e-02, -7.5809e-02, + 9.4790e-02, -1.0083e-01, -1.0935e-01, 3.2097e-02, -2.6656e-02, + -1.8654e-02, -8.5054e-02, 1.4421e-02, 6.8078e-02, -5.8056e-02, + -4.4167e-02, -1.4837e-02, -7.9102e-02, -4.1090e-02, 7.0457e-02, + 1.0067e-02, 2.5603e-02, 9.8748e-02, 4.7162e-02, -5.7733e-02, + 2.2399e-02, 3.9574e-02, 9.2868e-02, -7.5548e-02, -5.1467e-02, + -4.1965e-02, 8.5480e-02, -2.1664e-03], device='cuda:1', + requires_grad=True) +torch.Size([3072, 768]) +Parameter containing: +tensor([[ 0.0202, 0.0051, -0.0099, ..., 0.0150, -0.0118, -0.0252], + [ 0.0279, -0.0216, -0.0171, ..., -0.0218, 0.0069, 0.0059], + [-0.0100, 0.0249, 0.0076, ..., 0.0068, -0.0119, 0.0081], + ..., + [ 0.0039, -0.0400, -0.0170, ..., 0.0191, -0.0038, 0.0145], + [ 0.0186, -0.0087, 0.0062, ..., 0.0125, -0.0135, -0.0063], + [ 0.0241, -0.0023, -0.0027, ..., 0.0083, 0.0031, 0.0143]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.1687, -0.1522, -0.1874, ..., -0.3894, -0.2622, -0.3452], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 3072]) +Parameter containing: +tensor([[-0.0104, 0.0026, -0.0121, ..., 0.0043, -0.0197, -0.0084], + [-0.0017, -0.0006, 0.0162, ..., -0.0014, 0.0029, 0.0020], + [-0.0115, 0.0131, -0.0065, ..., -0.0152, 0.0059, 0.0125], + ..., + [-0.0005, 0.0133, -0.0086, ..., 0.0159, -0.0166, 0.0221], + [-0.0144, -0.0031, 0.0204, ..., 0.0199, 0.0079, -0.0012], + [-0.0021, -0.0152, -0.0143, ..., 0.0090, -0.0025, -0.0068]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([-1.4450e-02, -1.9440e-02, -5.2460e-02, -3.1021e-02, -5.1300e-02, + 3.6041e-02, -4.9820e-03, 3.0884e-02, -6.0791e-02, -3.9024e-03, + 6.5979e-02, -5.0690e-02, -1.5404e-02, 1.2665e-02, 1.0269e-02, + -2.3239e-02, 1.9638e-02, 4.1351e-02, -9.3155e-03, 4.5898e-02, + -1.4809e-02, 3.9795e-02, 5.4199e-02, 2.2781e-02, -1.1696e-02, + 4.7455e-02, -3.1342e-02, -5.1666e-02, 2.0187e-02, -4.6753e-02, + 1.4305e-02, 2.4292e-02, 4.6883e-03, 4.9622e-02, 1.7075e-02, + -7.3669e-02, 4.7668e-02, -3.4607e-02, 5.6549e-02, -2.3193e-02, + -1.6907e-02, -4.4983e-02, 1.2787e-02, 3.8055e-02, -4.3793e-02, + 7.4951e-02, -2.7313e-03, 2.3071e-02, -2.5681e-02, -2.4292e-02, + 3.4485e-02, 3.3752e-02, 1.5030e-02, 1.4137e-02, 5.1941e-02, + -3.1830e-02, 4.2801e-03, 8.0322e-02, 2.6215e-02, 4.4525e-02, + 2.6276e-02, -3.3325e-02, 2.9175e-02, -5.9875e-02, 6.1890e-02, + 4.4678e-02, 3.4008e-03, 6.0394e-02, 3.0575e-03, 2.4216e-02, + -1.5656e-02, 2.8706e-03, 1.1194e-01, 1.2585e-01, -6.3538e-02, + 5.7129e-02, -8.5678e-03, -2.8549e-02, 3.8269e-02, -6.1859e-02, + -2.5925e-02, -7.7637e-02, 1.9104e-02, 1.2222e-02, 3.5248e-02, + 1.2161e-02, 6.0577e-02, 3.3173e-02, -2.0538e-02, 4.6310e-03, + -5.7648e-02, 1.1932e-02, 5.6274e-02, -1.1154e-02, -6.1523e-02, + 9.4299e-02, 4.9988e-02, -2.1072e-02, -5.7259e-03, -4.9530e-02, + -8.6914e-02, 1.1816e-01, 3.1860e-02, -1.7868e-02, -5.5618e-03, + 1.4656e-02, -1.0147e-03, 6.7505e-02, 2.8412e-02, -5.7129e-02, + -2.4433e-03, 2.9312e-02, -5.8250e-03, -2.6993e-02, -1.4832e-02, + 1.0757e-02, -4.6600e-02, -1.3153e-02, -5.3589e-02, -4.4342e-02, + -1.4353e-03, -4.3793e-03, 1.9211e-02, 1.8112e-02, -8.9294e-02, + -2.7573e-02, -5.5733e-03, -8.1238e-02, 5.9601e-02, -1.1169e-01, + -2.1286e-02, -4.9652e-02, -1.4503e-02, -3.3081e-02, -2.4017e-02, + -8.7662e-03, -2.5314e-02, -9.2926e-03, -4.5837e-02, 3.4424e-02, + -3.4454e-02, 4.5349e-02, 5.2155e-02, 4.0588e-02, 6.9824e-02, + -2.4170e-02, -2.2324e-02, 7.3547e-02, 3.6987e-02, 1.9012e-02, + 3.7231e-02, -7.6904e-02, 5.1880e-02, 1.5228e-02, -3.5583e-02, + -5.7495e-02, -1.0101e-01, -2.4506e-02, 3.2330e-03, 6.7200e-02, + 2.8725e-03, -1.1665e-02, -7.1144e-03, -1.0231e-02, -9.5825e-03, + 5.4260e-02, -5.0262e-02, -5.2551e-02, -1.3290e-02, 8.1116e-02, + -3.0533e-02, 7.3776e-03, 2.4551e-02, -8.6975e-02, -4.4342e-02, + 4.7516e-02, -1.4748e-02, 4.2114e-02, -4.8584e-02, 4.8492e-02, + -1.2657e-02, -4.6936e-02, -3.9429e-02, -2.0401e-02, 1.7181e-02, + 2.9335e-03, 4.5242e-03, 1.4160e-01, 6.3515e-03, -2.2491e-02, + -3.1643e-03, 1.0612e-02, 8.1970e-02, -1.3794e-02, -8.8043e-03, + 4.9561e-02, -5.0964e-03, -3.1311e-02, -8.0383e-02, 2.4078e-02, + 3.2063e-03, -5.1544e-02, 6.0616e-03, -8.0185e-03, 4.0283e-02, + -1.9867e-02, -7.0129e-02, -1.7181e-02, 1.0101e-02, -3.5614e-02, + -9.1705e-03, 1.0254e-02, 1.8930e-03, 5.0964e-02, -4.9713e-02, + -2.0294e-03, 3.4821e-02, -4.1473e-02, -1.2077e-02, -8.0444e-02, + 4.1412e-02, 7.0496e-02, -2.3071e-02, -1.7212e-02, 6.1874e-03, + 2.6428e-02, -2.0203e-02, -2.5589e-02, 6.1890e-02, 3.3478e-02, + 4.9957e-02, -2.1423e-02, -7.5928e-02, -4.8828e-02, 2.5978e-03, + 7.7820e-02, -9.0881e-02, -2.9709e-02, 1.0559e-02, 4.3030e-02, + 6.5918e-02, 1.6846e-02, -8.6365e-03, 4.7394e-02, 3.0457e-02, + -2.7008e-02, -3.8513e-02, 5.5298e-02, -3.8269e-02, -7.2861e-03, + 1.2192e-02, -3.8666e-02, -1.1482e-02, -7.8552e-02, 9.7733e-03, + 6.8176e-02, 4.8340e-02, 2.0966e-02, 2.2064e-02, -3.8528e-03, + 2.3636e-02, 1.2711e-02, -3.0380e-02, -8.6182e-02, -4.6173e-02, + -4.6783e-02, -3.2623e-02, -8.6060e-03, 3.2520e-03, -1.2886e-02, + -3.9673e-02, 3.4027e-03, 1.0046e-01, 3.3478e-02, -1.5030e-02, + 1.1162e-02, 3.0350e-02, 4.8279e-02, 2.0081e-02, -9.4528e-03, + 2.7328e-02, -1.1261e-02, 5.4970e-03, 1.3132e-03, 3.6163e-02, + 7.7362e-03, -3.6469e-02, -5.6915e-02, 4.0619e-02, 6.8474e-03, + 6.9580e-02, 2.7710e-02, -3.1494e-02, 2.1805e-02, 3.1281e-02, + -8.8348e-03, -3.2898e-02, 2.7069e-02, -3.9940e-03, 9.3445e-02, + -3.4088e-02, -5.5313e-03, -3.7689e-02, 1.6724e-02, 4.7028e-02, + -2.2858e-02, 4.0100e-02, 5.3650e-02, -3.3752e-02, 4.2076e-03, + 3.9124e-02, 5.7983e-02, -3.1143e-02, 3.8513e-02, 1.0323e-02, + -5.5481e-02, -4.6356e-02, -4.0039e-02, 7.4219e-02, -7.2021e-02, + -1.0323e-02, 3.8483e-02, -5.9570e-02, -2.7084e-02, 9.9170e-01, + -2.5467e-02, 6.9702e-02, 2.7435e-02, 4.4373e-02, -2.9785e-02, + 5.3497e-02, -6.6071e-03, -9.4528e-03, -2.0615e-02, 1.1345e-02, + 4.7150e-02, 5.0354e-02, -1.2062e-02, -2.1973e-02, 2.1774e-02, + -4.2786e-02, -8.9340e-03, 4.1382e-02, 3.9864e-03, 2.9648e-02, + -2.7267e-02, -3.4580e-03, -5.5664e-02, 8.5907e-03, -2.2476e-02, + 7.6172e-02, 1.4473e-02, -1.6937e-02, -6.1584e-02, 3.5591e-03, + 1.6144e-02, 8.6670e-03, 2.4853e-03, -5.0018e-02, 1.6235e-02, + 1.9775e-02, -3.4027e-02, 8.4473e-02, -1.0803e-02, -5.3528e-02, + 3.7861e-03, -2.1576e-02, 3.7415e-02, -2.8839e-02, -8.6731e-02, + -4.9744e-03, 7.8552e-02, -6.4270e-02, 1.1009e-02, -2.1423e-02, + -2.4216e-02, 8.6243e-02, -5.7251e-02, 5.2826e-02, 7.1472e-02, + -2.5269e-02, 2.3468e-02, 1.5396e-02, 6.8130e-03, 1.3374e-02, + -3.2043e-02, 8.0933e-02, -4.6783e-02, -4.3091e-02, -4.8950e-02, + 2.8801e-03, -6.0234e-03, -6.4964e-03, 2.0203e-02, -2.5589e-02, + 5.3864e-03, -5.1697e-02, -8.9722e-02, 8.4656e-02, 1.2188e-03, + 6.9336e-02, -1.9135e-02, -1.4641e-02, -2.3636e-02, -2.4597e-02, + -9.3689e-02, -2.0050e-02, 3.0487e-02, 3.6621e-02, 3.4027e-02, + -8.9874e-03, 3.9703e-02, 2.5654e-03, 2.7573e-02, -9.3445e-02, + -6.0944e-02, -9.6283e-03, -1.6403e-03, 1.9331e-03, 3.9856e-02, + -6.8726e-02, -2.6672e-02, -1.5839e-02, -3.2684e-02, 1.5198e-02, + 4.4403e-03, -5.0293e-02, -3.5645e-02, 2.1896e-02, -4.4067e-02, + -3.1982e-02, -2.1866e-02, -3.6621e-02, -1.9531e-02, -9.9106e-03, + -2.4673e-02, -4.5288e-02, 3.9764e-02, 1.0429e-02, 3.6957e-02, + 1.9943e-02, -2.9251e-02, -2.3087e-02, -1.0883e-01, 2.7267e-02, + -1.7349e-02, -2.8488e-02, 4.4922e-02, 2.2797e-02, 2.3148e-02, + -3.6836e-04, -5.5725e-02, -6.5918e-02, 3.3142e-02, 1.2970e-02, + -9.5520e-03, -8.7433e-03, 2.8717e-02, -4.6478e-02, -3.7994e-02, + 7.0618e-02, -2.2369e-02, -3.4119e-02, -2.9465e-02, 1.0574e-04, + -2.7817e-02, 3.8849e-02, -2.4629e-04, 5.5115e-02, -1.7776e-02, + 6.0692e-03, -2.2720e-02, 5.4230e-02, -3.9154e-02, -3.1891e-02, + 9.6436e-03, 8.9722e-02, 5.8990e-02, -1.1063e-03, -7.3242e-03, + 3.8300e-02, -2.6031e-02, -3.2593e-02, 6.5979e-02, -3.9490e-02, + -6.1890e-02, -1.2589e-03, 7.5256e-02, 5.3223e-02, -2.5833e-02, + -2.1667e-02, 6.6467e-02, 4.8584e-02, -1.9684e-02, -8.9417e-03, + -4.0710e-02, -1.8967e-02, 4.2511e-02, 4.4708e-02, 2.3956e-02, + -2.4551e-02, -2.4567e-02, -2.1851e-02, -6.6650e-02, -2.4673e-02, + 2.7588e-02, -3.9673e-02, 6.9153e-02, 1.9150e-02, -3.3131e-03, + -5.8594e-02, -1.2550e-02, -5.0690e-02, 3.5004e-02, -9.9957e-05, + -2.9907e-02, 4.1138e-02, 4.6959e-03, -5.2605e-03, -4.1016e-02, + -3.0014e-02, -1.3374e-02, -2.0096e-02, -5.9624e-03, -1.9089e-02, + 2.3899e-03, 1.4854e-02, 1.8494e-02, -1.0391e-02, -1.6373e-02, + -2.6657e-02, 3.0426e-02, -2.9388e-02, -1.7258e-02, 8.3801e-02, + 4.2511e-02, 1.4603e-02, 6.9962e-03, -2.7267e-02, -4.2053e-02, + 2.4109e-03, 7.2250e-03, -1.8051e-02, 1.2123e-02, -6.6772e-02, + -4.9011e-02, 3.3325e-02, -9.6083e-04, -1.6739e-02, 3.7323e-02, + 1.9653e-02, -3.6469e-02, -6.5063e-02, -4.2664e-02, 2.1423e-02, + 1.4153e-02, -6.0944e-02, -1.6418e-02, -2.0462e-02, 5.4108e-02, + -6.2439e-02, -1.2047e-02, 2.0615e-02, -8.3313e-03, 1.9150e-02, + -3.3020e-02, -5.9052e-02, -3.5362e-03, 4.8279e-02, 7.7581e-04, + -2.9388e-02, 7.5626e-04, -7.7026e-02, 3.2291e-03, -2.3895e-02, + -2.0447e-02, -5.3040e-02, 4.3091e-02, 1.8417e-02, 3.7323e-02, + -7.1777e-02, 2.2614e-02, -2.3453e-02, 9.3918e-03, 2.1072e-02, + -6.3599e-02, -1.2827e-03, -3.2990e-02, -4.0710e-02, 1.2779e-03, + 1.5045e-02, 3.6285e-02, 2.3239e-02, -3.0701e-02, -3.3905e-02, + 2.8732e-02, 2.5955e-02, -6.2561e-03, -6.4209e-02, 3.1113e-02, + -5.2368e-02, -3.9337e-02, 2.4933e-02, 4.9820e-03, -5.0995e-02, + 1.7792e-02, -5.0774e-03, -5.2155e-02, 6.5002e-03, -6.1684e-03, + -7.5317e-02, 3.0960e-02, -3.4119e-02, -2.2980e-02, -1.0941e-02, + -6.3171e-02, 2.3773e-02, 7.6050e-02, 3.6392e-03, 5.1117e-03, + 1.1864e-02, -5.1880e-02, -1.0663e-01, -3.6285e-02, 1.0872e-02, + 9.3689e-03, 1.1284e-02, -1.1948e-02, 6.7078e-02, -1.3687e-02, + 3.9734e-02, -4.2725e-03, 5.1804e-03, 6.2439e-02, 2.9617e-02, + 2.8687e-03, -2.0889e-02, 4.2908e-02, -1.4122e-02, -4.1161e-03, + 3.6255e-02, 1.6693e-02, 2.4048e-02, -2.6428e-02, -4.6814e-02, + 1.6907e-02, 1.1269e-02, -6.5186e-02, 1.1847e-01, 4.9774e-02, + 3.2444e-03, -2.2018e-02, -2.2964e-02, -2.3331e-02, 8.9264e-03, + 2.5665e-02, -4.0833e-02, 3.0457e-02, 1.6918e-03, -4.4708e-02, + -5.4321e-02, 5.0995e-02, -6.7139e-02, 1.6037e-02, 4.4373e-02, + -3.3600e-02, -5.5328e-02, -3.9978e-02, 6.6223e-02, 4.3121e-02, + -9.3262e-02, -1.2390e-02, 1.1337e-02, 1.2619e-02, 4.5204e-03, + 3.6682e-02, 3.8422e-02, -1.2413e-02, 4.9194e-02, 9.0210e-02, + 5.9937e-02, -1.3379e-01, 4.9400e-03, 5.2452e-03, -1.5884e-02, + -4.0619e-02, 3.6201e-03, -4.7150e-02, -1.3702e-02, 5.2399e-02, + -2.6215e-02, 3.8147e-02, -7.9155e-04, 2.6855e-02, -2.1927e-02, + -1.8539e-02, 1.3641e-02, -2.2598e-02, -4.0955e-02, -5.6610e-02, + 7.6782e-02, -3.4695e-03, 4.1008e-03, 2.9449e-02, -1.4961e-02, + -2.8824e-02, 7.7698e-02, 4.5837e-02, -2.8061e-02, -8.8120e-03, + -3.3844e-02, 2.0676e-02, 7.3624e-04, -3.8818e-02, 1.3878e-02, + 8.1940e-03, 9.8495e-03, -1.2484e-03, 2.6016e-02, 8.2397e-03, + 2.2873e-02, 3.6804e-02, 1.8997e-03, 1.3168e-02, 5.0316e-03, + -2.5654e-03, 3.4851e-02, -5.0201e-03, 1.0445e-02, 3.4485e-02, + 3.4668e-02, 3.4515e-02, 6.4026e-02, 9.1248e-02, -7.8247e-02, + -7.4844e-03, 6.0272e-02, -2.4399e-02, 3.3691e-02, 1.4244e-02, + 1.6068e-02, -5.9448e-02, -4.2877e-02, -8.6746e-03, -1.0736e-01, + -3.5461e-02, -2.6962e-02, -8.1787e-02, -2.7451e-02, 1.2024e-02, + -4.1992e-02, 8.1635e-03, 6.4430e-03, -3.7689e-02, 5.9601e-02, + -2.4536e-02, 6.6589e-02, 7.8552e-02, 8.3130e-02, 1.6815e-02, + -1.2283e-02, 3.2074e-02, 1.6693e-02, -8.3008e-02, -1.6525e-02, + -4.8920e-02, 1.9150e-02, -4.1748e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([2.0257, 2.0344, 1.9511, 1.9693, 1.9585, 1.9219, 2.0683, 1.9555, 1.9977, + 2.0715, 2.0045, 2.0027, 2.0430, 2.0124, 2.0187, 1.8801, 1.9952, 2.1130, + 1.9557, 1.2675, 1.8754, 1.8764, 1.9653, 1.9503, 2.0653, 1.9437, 1.9732, + 2.0050, 2.0532, 1.8522, 2.0196, 1.9858, 1.9741, 1.9016, 2.0161, 2.0116, + 1.9765, 2.0106, 1.8425, 1.9446, 1.9371, 2.0979, 1.9926, 2.0592, 2.1984, + 2.0775, 1.9381, 1.9745, 1.9952, 1.9890, 1.8479, 2.0970, 2.0300, 2.0011, + 2.0767, 2.1105, 1.9994, 1.9672, 2.0051, 2.1348, 2.0109, 2.0383, 1.8306, + 2.0998, 1.9471, 1.9370, 1.9396, 1.9878, 2.0677, 2.3015, 2.1528, 1.9999, + 1.8284, 2.0095, 1.9911, 2.0213, 2.0314, 1.9826, 1.9751, 1.9446, 2.0328, + 1.9656, 1.9499, 1.9528, 2.0021, 1.9421, 2.0539, 2.0090, 2.0005, 2.0296, + 1.8981, 2.0537, 1.9623, 1.9398, 1.9628, 1.9870, 2.0913, 2.0724, 1.9533, + 2.1124, 1.9853, 2.6650, 1.9589, 2.0429, 1.8930, 1.9923, 2.0287, 2.0133, + 2.0124, 2.0338, 2.0284, 2.0974, 2.0387, 2.1974, 1.9633, 2.1018, 1.8175, + 1.9394, 2.0572, 2.0080, 2.0954, 1.9050, 2.1441, 1.9543, 1.9805, 2.0541, + 1.9790, 1.9280, 1.9894, 2.1866, 1.9792, 1.8995, 1.9158, 1.9590, 2.1155, + 2.5199, 2.0179, 2.0004, 1.9195, 2.0193, 1.9890, 1.9006, 1.9797, 2.0440, + 2.0788, 2.0407, 2.0304, 2.1029, 2.0203, 1.9760, 1.9394, 2.0054, 1.9847, + 2.0356, 2.1139, 1.5719, 1.9705, 1.9917, 2.0091, 2.0040, 1.9577, 1.9719, + 2.0472, 1.7621, 2.0073, 1.9903, 2.1363, 1.9998, 2.0051, 1.9888, 1.9687, + 1.9345, 2.1511, 2.1393, 2.0262, 1.9638, 2.0399, 2.0069, 1.9389, 1.9667, + 2.0143, 2.0119, 1.9335, 1.9657, 2.1081, 2.0954, 1.9555, 0.4295, 1.9307, + 2.0535, 2.0774, 2.0535, 2.1234, 1.9335, 2.0603, 2.0513, 1.9627, 1.9375, + 0.6353, 2.0395, 2.2153, 2.1222, 2.0615, 2.0297, 2.0519, 2.0836, 2.0057, + 2.0021, 2.0199, 1.9734, 1.9511, 2.1163, 2.0859, 2.0537, 2.0991, 2.0374, + 1.9356, 2.0284, 2.0152, 2.0300, 2.1570, 1.9596, 1.8716, 2.0989, 1.9362, + 2.0595, 1.9272, 1.9588, 2.0182, 1.9944, 2.0247, 2.0196, 1.9151, 1.9654, + 2.0398, 1.9895, 2.1065, 2.0104, 1.9463, 1.9615, 1.9753, 2.0335, 1.9698, + 1.9619, 1.9918, 1.9939, 1.9441, 2.3460, 1.9763, 1.9905, 2.1296, 1.9593, + 2.0462, 1.9803, 1.8739, 1.9475, 1.9598, 2.0517, 1.9284, 1.9119, 2.0556, + 1.9551, 1.8316, 1.9645, 1.9500, 2.0806, 2.0543, 1.9594, 1.9842, 1.9619, + 1.9587, 1.8544, 2.0395, 1.9717, 2.2640, 1.9691, 2.0282, 1.9032, 1.9770, + 1.9332, 1.9511, 1.8922, 1.9482, 1.9790, 2.0446, 2.0257, 2.0371, 1.9969, + 2.0323, 1.9707, 2.1331, 2.0598, 1.9312, 2.0447, 2.0934, 1.8701, 2.0536, + 1.9339, 1.9815, 2.0502, 1.8652, 1.9644, 1.9713, 2.0664, 2.0611, 1.8879, + 1.9392, 1.9859, 2.0829, 2.0115, 1.9858, 1.9979, 1.8491, 1.9454, 1.9416, + 1.9871, 2.0103, 1.9462, 2.1577, 2.0422, 1.8648, 1.9684, 1.9651, 1.9931, + 2.4453, 1.7836, 1.9797, 2.0477, 2.0374, 1.8115, 1.9577, 1.9438, 1.9010, + 1.9335, 2.0134, 2.0297, 2.0084, 1.9193, 1.9962, 2.1257, 1.9753, 1.9826, + 2.0457, 1.9393, 2.1329, 1.9958, 2.0344, 2.0395, 2.0875, 2.0308, 1.9300, + 2.0481, 1.9178, 2.3537, 2.0540, 1.9842, 1.8567, 1.9846, 2.1611, 2.0759, + 2.1378, 1.9157, 1.8941, 2.0725, 2.0011, 2.0108, 2.0496, 2.0581, 2.0146, + 2.0212, 1.9948, 1.9491, 1.9728, 2.1604, 2.1109, 1.9235, 2.0226, 2.0845, + 1.9894, 1.9106, 1.9809, 1.7731, 2.0163, 1.9498, 2.0033, 1.9945, 2.1344, + 2.0049, 2.0276, 1.9931, 2.1364, 1.9570, 1.9764, 1.9729, 2.0930, 1.9910, + 1.9975, 2.0646, 2.0498, 1.9523, 2.1627, 2.0006, 1.8691, 2.0954, 1.9561, + 1.9464, 2.0225, 2.0438, 2.0626, 1.8734, 2.0497, 1.9712, 2.0253, 2.2255, + 1.9269, 1.5977, 1.9628, 1.9559, 2.0419, 2.0557, 2.1421, 1.9419, 1.9496, + 2.1094, 2.1491, 1.9859, 2.0079, 1.8498, 2.0724, 1.9499, 2.0346, 2.0262, + 1.9633, 2.0354, 1.9472, 1.9602, 2.0195, 2.0032, 2.0036, 1.9063, 2.0086, + 2.0016, 2.0250, 2.2570, 1.9673, 1.9707, 1.8847, 1.9078, 2.0152, 2.0952, + 2.0862, 1.9537, 2.0682, 1.9151, 2.0110, 2.2095, 1.9940, 2.1294, 2.0299, + 1.9963, 2.1305, 1.9429, 2.0693, 1.9541, 1.9645, 1.9386, 2.0793, 2.0342, + 1.9821, 2.0205, 1.8914, 1.9734, 2.0128, 1.9509, 2.0054, 2.0226, 2.0904, + 2.0363, 1.9614, 2.1894, 1.8249, 2.1139, 2.0987, 1.7479, 2.0491, 2.2605, + 2.0351, 2.0034, 1.8794, 2.0154, 2.0932, 2.0017, 1.8794, 2.0726, 1.8610, + 1.9683, 1.9647, 1.9156, 1.8042, 1.9973, 2.0662, 2.0574, 2.0067, 2.0059, + 1.9640, 2.1439, 2.0814, 2.0323, 2.0262, 1.9617, 1.9233, 2.0654, 1.9877, + 2.0460, 2.0286, 2.0341, 2.0739, 1.9845, 2.0807, 2.0348, 1.9746, 1.9962, + 2.0325, 1.9365, 1.9254, 1.9935, 2.0833, 1.9138, 2.0450, 2.1419, 2.0006, + 1.9513, 2.1702, 1.9652, 1.9930, 2.1020, 2.1332, 1.9315, 2.1528, 1.9875, + 2.0101, 1.9131, 2.1410, 1.9704, 1.9231, 1.9808, 1.9090, 2.0045, 2.0392, + 1.9050, 2.1369, 1.9591, 2.0033, 2.1322, 2.0857, 1.9056, 2.0861, 2.0624, + 1.9077, 2.1703, 1.9176, 1.8142, 1.9912, 1.8790, 1.9754, 1.9995, 1.9791, + 2.0240, 1.8256, 2.0508, 1.9844, 2.1159, 1.9374, 1.9971, 1.9893, 2.0586, + 2.0790, 1.9968, 2.0608, 2.0390, 2.0590, 2.0581, 1.9986, 2.0023, 1.9103, + 2.0122, 2.0208, 2.0332, 2.0699, 1.9136, 2.0321, 2.3401, 1.9774, 1.9934, + 2.0168, 2.0446, 1.8847, 1.9775, 2.1485, 2.0622, 1.9779, 2.0434, 2.1604, + 1.9566, 1.9872, 2.2087, 2.0353, 1.8128, 1.9966, 1.9910, 2.0200, 1.9513, + 2.0511, 1.9924, 2.0438, 2.0259, 2.0082, 2.0246, 2.0088, 1.9980, 1.9457, + 1.9530, 1.9194, 2.0609, 1.9816, 1.9832, 2.0252, 2.0323, 1.9594, 2.0305, + 2.0356, 2.2375, 1.9345, 2.0042, 2.1218, 2.1397, 1.9727, 2.1261, 1.9762, + 2.0960, 1.9011, 1.9572, 2.0197, 2.0102, 1.8579, 2.0108, 1.9747, 2.0413, + 2.1950, 2.0268, 1.9291, 2.0363, 1.9824, 1.9842, 2.0567, 1.9963, 1.9368, + 2.0931, 1.9622, 1.9801, 2.1398, 1.8515, 2.0383, 2.1540, 2.0991, 2.0238, + 2.0954, 1.8529, 2.0306, 1.9698, 2.0138, 2.0344, 2.1097, 2.0003, 2.0347, + 1.9435, 2.0138, 2.0070, 2.0207, 1.9699, 1.9808, 3.9111, 1.9489, 1.9240, + 1.9906, 1.9973, 2.1468, 2.0961, 2.0055, 2.0753, 2.1031, 1.9375, 2.0882, + 2.0503, 2.0181, 1.8466, 2.0694, 2.1705, 1.8652, 2.0717, 1.9540, 1.9346, + 1.9093, 1.9651, 1.9621, 2.0628, 2.0070, 1.9060, 1.9553, 1.8944, 2.0086, + 1.9623, 2.0737, 1.9730, 1.9533, 2.1365, 2.0299, 1.8659, 1.9766, 2.1460, + 1.9863, 2.1535, 1.9489, 2.1114, 1.9806, 1.9694, 1.8617, 1.8932, 1.9100, + 1.9833, 2.0381, 1.9284, 1.9857, 2.0655, 1.9793, 1.9920, 2.0654, 1.9590, + 1.9620, 2.0547, 2.1570, 1.9327, 1.9948, 1.9445, 1.9828, 2.0383, 2.0391, + 1.9883, 1.9772, 2.0274, 1.9669, 2.0183, 2.0465, 2.0411, 1.9694, 2.0264, + 2.2126, 2.3055, 1.8294, 1.9801, 2.0337, 2.0238, 2.0515, 2.3617, 2.0011, + 1.9341, 1.9198, 2.0020], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 2.1501e-01, 7.4704e-01, -4.3778e-01, -1.9189e-01, -1.9360e-01, + 4.1040e-01, 4.4803e-01, -1.0623e-01, -1.4964e-01, -3.4063e-01, + -2.0855e-01, 3.2643e-01, -8.2681e-01, -6.0881e-01, -5.0489e-01, + -3.7201e-01, -2.5095e-01, -4.9668e-01, -5.6372e-01, -1.3344e+00, + -1.8790e-01, 1.3055e-01, 5.6959e-01, -5.4303e-01, 7.8711e-01, + 1.1111e-01, 1.4635e-01, -1.4257e-01, -6.8712e-01, 3.8435e-01, + 2.1803e-01, 4.9633e-01, 1.4904e-01, 1.6454e-02, 3.2819e-01, + -1.7535e-01, -2.5352e-01, -1.2546e-01, 1.0994e-01, 9.3778e-02, + -6.8792e-01, -1.2614e-01, 2.4639e-01, 1.6165e-01, -3.7067e-01, + -2.1706e-01, -1.0287e-01, -1.8020e-01, 2.9180e-01, 2.3912e-02, + 3.4678e-02, -7.0900e-01, 2.1722e-01, 5.3221e-01, -2.7258e-01, + 1.5429e+00, 2.2216e-01, 1.3756e-01, -4.0713e-01, 4.5913e-01, + -8.6891e-02, 5.5913e-01, 1.7959e-01, -4.4340e-01, 2.4813e-01, + 3.0107e-01, 6.3924e-01, -6.1232e-02, 1.8421e-02, -1.9119e+00, + 7.7772e-01, 2.1262e-01, 3.9979e-01, 3.5545e-01, -6.8527e-01, + -4.8011e-01, 6.0286e-01, 3.7308e-01, -1.8340e-01, -2.6823e-01, + -1.1094e-01, -5.5771e-01, -4.4588e-01, 5.1400e-01, 1.3997e-01, + -4.0948e-01, 6.4643e-01, 6.4235e-02, -4.0403e-01, -5.4217e-01, + 2.7304e-01, 5.2554e-01, -9.3850e-02, -3.3140e-02, -2.6708e-01, + 5.6201e-01, 5.5971e-01, 4.5284e-01, -3.5819e-01, 1.7672e-01, + 1.0297e-01, -2.2664e+00, 7.2437e-02, -2.4392e-01, 2.2132e-01, + 6.1708e-01, 7.2010e-02, -1.7920e-01, 4.3395e-01, -3.4397e-01, + 6.1327e-01, 5.4794e-01, -5.3735e-01, 5.9204e-01, 5.3248e-01, + -9.2280e-01, 5.4920e-01, 2.2522e-01, 5.7106e-01, 3.3792e-01, + 4.6288e-01, -8.5729e-02, 5.4681e-01, 1.2722e-01, -4.7660e-02, + -5.9782e-01, -4.5898e-01, 2.3266e-01, -1.9421e-02, 8.9277e-01, + -6.2295e-01, -5.0095e-01, -2.8544e-01, -7.9341e-02, 5.1180e-01, + 1.2812e+00, 2.5656e-01, 3.7108e-01, 1.0246e-01, 1.0855e-01, + -4.4166e-01, -3.6916e-02, 1.6275e-01, 3.0883e-01, -7.6207e-01, + -3.2294e-01, 4.3284e-01, -3.2373e-01, -3.9448e-01, 3.8939e-01, + 4.7772e-01, 1.6648e-01, 5.8841e-02, 2.4477e-02, -8.8626e-01, + 4.9069e-01, 2.7631e-01, 1.3535e-01, -4.4725e-02, 1.2324e-01, + -2.0032e-01, -8.9303e-02, 4.2760e-01, 2.0125e-01, -4.4736e-01, + 2.2468e-01, -1.0505e+00, 2.4208e-01, -7.1624e-01, 1.1178e-01, + -1.1839e-01, -9.6857e-02, -7.3601e-01, 2.8437e-01, 5.4252e-01, + 6.5352e-02, 7.0957e-02, 1.3129e-01, 7.4641e-02, 4.4224e-01, + -4.6053e-01, -2.3242e-01, -3.5033e-01, 6.2616e-02, 5.2954e-01, + 1.0180e+00, 1.4960e-01, -1.3682e+00, -1.4817e-01, -6.3487e-01, + -4.4761e-01, -5.2614e-01, 6.5118e-01, 5.0969e-01, 3.5756e-01, + 4.0856e-01, 1.4308e-01, -1.2530e-01, 9.7227e-01, 3.4538e-01, + -7.7295e-01, 2.7611e-01, 2.6164e-01, 3.8689e-01, -2.0480e-01, + 5.2936e-01, 4.2684e-01, 4.6405e-01, -1.6827e-01, -9.7458e-02, + 5.0772e-01, -5.6984e-01, -3.9931e-01, 2.7341e-01, -7.1261e-01, + -5.7750e-01, 5.6267e-02, -6.3205e-02, -3.5059e-01, -2.6100e-02, + 1.0959e+00, -1.2912e-01, 2.6108e-01, 6.5407e-01, 1.9582e-01, + 3.5861e-01, -3.8938e-01, -5.9331e-03, -1.6254e-02, -1.5344e-01, + -2.4446e-01, 4.5050e-01, 2.4633e-01, 1.3749e-01, -2.3003e-01, + -2.6949e-01, 5.4330e-01, 1.7525e-01, 2.4688e-02, 3.6464e-01, + 1.1125e-01, -1.7916e-01, -3.5670e-01, 1.4557e-01, -3.3076e-01, + -3.5905e-01, 1.9406e-01, -1.2120e+00, -2.5908e-01, 2.6289e-01, + -4.2772e-01, -7.2619e-02, -2.7620e-01, -5.6160e-01, -2.3287e-02, + -2.4912e-02, -5.6602e-02, 3.3388e-01, 1.9100e-01, 5.3561e-02, + -4.4234e-01, -2.6088e-01, 7.4349e-02, -2.6779e-01, -4.8274e-01, + 5.6909e-01, 5.0762e-01, 1.4416e-01, -3.3702e-01, 8.1444e-02, + -9.1124e-04, -5.0530e-02, -5.5433e-02, -1.2218e+00, -7.1735e-01, + -2.9989e-01, 2.1273e-01, 3.5223e-01, -1.0045e-01, -1.4589e-01, + 6.6810e-01, 6.6347e-03, -2.8485e-01, -3.9527e-02, -6.1962e-01, + 5.7725e-01, 6.0066e-01, -1.6726e-01, 3.5993e-01, -4.4027e-01, + -6.4637e-01, 3.6673e-01, 3.9305e-01, -6.9421e-01, 3.8688e-01, + -3.5038e-01, -3.3405e-01, -2.0529e-01, 2.3280e-01, -1.2050e-02, + 5.0844e-02, 3.1180e-01, -1.4231e-01, -4.1143e-01, -6.3243e-02, + 2.3677e-01, 1.7883e-01, 2.9399e-01, -4.0658e-01, -1.1772e-02, + 6.7914e-02, -8.2298e-02, -3.7087e-01, -1.3104e-01, -2.7620e-01, + 1.0077e-01, 2.6525e-01, 2.4830e-01, 5.9466e-01, 2.8895e-01, + -4.5527e-01, 7.1790e-02, 2.2942e-01, -2.6116e-01, 3.9986e+00, + -9.8789e-01, -1.2329e-01, -6.7903e-01, 3.8148e-02, -2.7855e-01, + 3.8224e-01, -5.2978e-01, 1.4232e-01, -2.4229e-01, 1.7414e-01, + 3.4799e-01, -4.5234e-01, -1.0896e-02, 1.0077e+00, 3.6756e-01, + -3.3123e-01, 2.9891e-01, -5.0165e-01, 5.9525e-01, -5.5131e-01, + 2.8175e-01, -3.0794e-02, -3.3171e-01, 4.8648e-01, 2.5715e-01, + -9.2639e-02, 1.4169e-01, -2.7374e-01, 1.1780e+00, -5.9521e-01, + -1.4061e-01, -3.1326e-01, 5.3595e-01, -3.8327e-01, -6.1277e-01, + -4.3154e-01, 1.4341e-01, -4.2311e-03, -5.5929e-01, 3.7290e-01, + 1.3575e-01, -5.8564e-01, 5.0063e-01, 3.2996e-01, 4.3748e-01, + 1.2429e-01, -1.8594e-01, -2.0776e-01, -8.4629e-01, -7.2942e-01, + -4.6600e-01, 2.0105e-01, 8.7055e-01, -1.0882e-02, -6.5407e-02, + -2.6198e-01, 1.3003e-01, -3.6015e-01, -2.0504e-01, -2.0976e-01, + -3.5524e-01, -7.5813e-01, 4.4625e-01, 3.2435e-01, -4.2963e-01, + 1.4965e-01, 3.3192e-01, 6.0794e-03, 5.7775e-01, -5.5045e-01, + -7.7025e-02, -1.2794e-01, 4.5363e-01, -2.2398e-01, 1.5045e-01, + 9.8628e-01, -4.1569e-04, 4.0888e-02, 2.2238e-01, 4.5657e-01, + -1.7871e-01, 1.6835e-02, 5.9045e-01, -3.5778e-01, 2.6057e-01, + 1.5641e-01, -2.0078e-02, 2.4277e-01, 5.2386e-01, 1.1039e+00, + -1.1327e+00, -1.1502e-01, -7.1413e-02, 3.7271e-01, 1.6588e-01, + -5.0401e-01, -2.7928e-01, 2.3201e-01, -4.3780e-01, -7.2960e-01, + -9.1982e-02, 3.7177e-01, 2.3800e-01, 1.7524e-01, -1.7606e-02, + -6.8586e-01, -1.7550e-01, 1.6649e-01, 9.6858e-01, 6.1731e-01, + -6.7386e-02, 1.5299e-01, 3.5824e-01, -2.3525e-02, -1.2447e-01, + -2.2176e-01, 3.9829e-01, 2.9128e-01, -2.1872e+00, -3.9490e-02, + 2.0953e-01, 2.1769e-02, 3.2698e-01, 4.3849e-01, -5.8048e-01, + -6.7511e-01, 3.8794e-01, -7.0737e-01, 7.1450e-02, -4.0589e-01, + 9.3709e-01, -6.9863e-03, -8.9709e-01, 2.8178e-01, -1.5175e-01, + -7.1227e-01, 1.7312e-02, -5.7876e-01, 7.2203e-02, -3.7204e-01, + 5.4562e-01, 4.1058e-01, 1.8340e-01, 1.3698e-01, 2.2252e-01, + 1.3383e-03, -2.1874e-01, -3.6462e-01, 4.9029e-02, -3.7906e-02, + -6.1232e-01, 2.8946e-01, -4.1292e-01, -2.4855e-02, 5.5976e-01, + 1.6030e-01, 1.2595e-01, 6.2577e-01, -4.8489e-01, 9.7715e-01, + -1.3700e+00, 3.9636e-01, 8.1092e-01, -1.7226e-01, -4.9420e-01, + 1.0185e+00, 4.1705e-01, -9.1852e-02, -3.6709e-01, -4.4797e-01, + -2.9262e-01, -7.5866e-01, 2.7837e-01, -1.8833e-01, 4.7308e-01, + -1.2365e-01, 8.6737e-02, -3.4702e-01, 4.5654e-01, -1.7387e-01, + -4.3082e-01, 3.6552e-01, 1.1090e-01, -4.9035e-01, -5.4686e-02, + -4.0094e-01, -7.3754e-01, -5.6454e-01, -2.7902e-01, 6.3375e-01, + 7.6849e-02, 7.6877e-01, 3.7509e-01, 5.1973e-01, -2.5887e-01, + 7.4737e-01, 2.5756e-01, 5.4741e-01, 3.5443e-01, 3.5458e-01, + -3.8656e-01, 9.1460e-01, -8.0961e-02, 7.5145e-01, 3.3231e-01, + -6.3823e-02, -3.9819e-01, -2.4886e-01, -1.5665e-01, -3.0294e-01, + -1.0763e+00, 5.0080e-01, 1.1450e-01, 3.9202e-01, -1.3330e-01, + -1.5818e-01, 1.7867e-01, -3.6898e-01, 6.2401e-01, 1.3509e-01, + 2.8328e-01, 1.8425e-01, -6.9642e-01, -2.0965e-01, -5.9807e-02, + -6.0111e-01, 5.6218e-02, -5.8171e-02, 2.7017e-01, 2.5399e-01, + -1.6934e-01, -2.4471e-01, -5.6285e-01, -2.3438e-02, 8.7225e-01, + 5.8195e-01, 1.6643e-01, 3.0911e-01, -1.2398e-01, 1.0637e-01, + 3.0400e-01, 3.8059e-01, 3.0698e-01, 5.2002e-01, 3.1318e-01, + -1.1297e-01, -6.4791e-01, -1.2858e-01, -3.3301e-02, 2.8349e-01, + 4.7307e-01, -6.4352e-01, -3.7952e-01, -4.6233e-01, 5.4687e-01, + 3.8553e-01, 1.8468e-01, 5.6490e-01, -5.7524e-01, 8.1843e-02, + 5.2448e-02, 2.1670e-01, -7.9810e-01, 7.6422e-01, 3.6075e-01, + -4.6884e-01, -3.0259e-01, -1.0189e-01, -1.9926e-01, -5.9249e-01, + -4.1673e-01, -1.7293e-01, 4.8049e-01, 5.6171e-01, 5.4314e-01, + -4.5473e-01, 3.3866e-02, -3.8822e-01, -2.2157e-01, 3.9036e-01, + -1.0263e+00, 5.0080e-01, 2.9338e-03, 1.6282e-01, -6.7238e-02, + -1.5358e-01, 7.7335e-01, -6.0466e-01, -7.1113e-03, 4.5572e-01, + 2.6197e-01, -7.7756e-02, -1.7559e-01, 1.2412e-01, 4.5016e-01, + 4.4452e-01, 4.5932e-01, -1.5143e-01, 1.7155e-01, -1.5590e-01, + -6.7263e-01, -7.4638e-02, 6.8537e-01, 3.1193e-01, -1.8483e-01, + 2.6559e-01, -5.7226e-01, -4.2480e-02, 4.6850e-01, -4.0256e-01, + -7.3821e-01, 3.3337e-01, 7.9803e-01, 5.2502e-01, -2.7037e-01, + 4.2035e-01, -4.6351e-01, -5.0869e-01, -3.4929e-01, -3.2073e-01, + 2.7673e-01, -3.4714e-01, 5.5583e-01, 5.3885e-01, 5.8890e-01, + 2.3125e-01, -6.4483e-02, 2.6723e-01, -3.3570e-01, -7.4513e-02, + 7.5238e-01, 2.7637e-01, 8.6769e-01, -1.4806e-01, 2.2444e-01, + 8.2832e-01, 2.1911e-01, 4.5670e-02, -5.8405e-01, 7.1401e-01, + -1.7179e-02, 2.1653e-01, -3.9713e-01, 5.5353e-01, 5.1918e-01, + -3.4896e-01, 3.0754e-01, 4.5784e-01, 9.4302e-01, 6.3974e-01, + -2.9996e-01, 3.2503e-01, 1.7531e-01, 1.9067e-01, 2.9472e-01, + 1.0802e-01, 2.8037e+00, -2.4171e-01, 1.0029e-01, -6.1159e-02, + 8.6921e-01, 4.2050e-01, -8.0290e-01, -2.8834e-01, -5.9433e-01, + -4.8745e-01, -2.3467e-01, -2.1206e-01, -6.2243e-01, -5.9644e-01, + 9.0945e-02, 6.1859e-01, 5.3241e-01, -2.0134e-01, -3.0877e-01, + 1.2689e-01, 4.3569e-01, 1.7475e-01, -2.3127e-01, 2.0306e-01, + 6.0639e-01, -3.0360e-01, 1.2595e-01, -5.7503e-01, 4.5264e-01, + -3.8956e-01, -4.0904e-01, -1.0199e-01, -1.8171e-01, 2.0400e-02, + -5.6246e-01, 4.5533e-01, 8.9785e-02, -8.7525e-01, -5.9002e-01, + -7.0129e-01, -2.6266e-01, -2.6017e-01, -7.0782e-01, 4.3546e-01, + -5.1003e-01, 1.1640e-01, -1.5618e-01, 2.6692e-01, 1.1821e-01, + -4.6812e-01, -5.0343e-01, 2.3759e-01, 8.6180e-01, 1.1069e-01, + -1.8956e-01, 2.4460e-01, 5.6212e-01, -1.0961e-01, -6.8296e-01, + -7.2474e-01, -2.1875e-01, 1.6066e-01, -4.0349e-01, -3.5831e-01, + -6.3780e-01, -5.2782e-01, -2.7818e-01, 3.5333e-01, 8.6573e-01, + 1.4529e-01, -5.4409e-01, -8.4162e-01, -1.4071e-01, -3.4625e-02, + 6.1403e-02, -4.3922e-01, -1.0181e+00, 5.3744e-01, 6.9962e-01, + -2.0664e-01, -3.6145e-01, -3.0133e-01, 1.7422e+00, 2.1885e-01, + 1.6592e-01, 3.7474e-01, 4.0835e-01], device='cuda:1', + requires_grad=True) +torch.Size([2304, 768]) +Parameter containing: +tensor([[-1.8555e-02, 1.2207e-02, -1.6556e-02, ..., 9.8953e-03, + 1.6815e-02, -1.8707e-02], + [ 3.0487e-02, 3.2715e-02, -3.0022e-03, ..., 3.5187e-02, + 3.5980e-02, -5.8136e-03], + [ 2.0390e-03, -2.0386e-02, 1.7670e-02, ..., 2.3132e-02, + 4.0550e-03, 1.1375e-02], + ..., + [-5.3101e-03, 2.4445e-02, -1.9531e-02, ..., -1.0094e-02, + -1.0544e-02, 2.3727e-03], + [-1.3418e-03, 4.7874e-03, 1.2207e-02, ..., 7.7553e-03, + -6.1214e-05, -1.3153e-02], + [ 9.2850e-03, 7.7629e-03, -1.5533e-02, ..., 1.3306e-02, + 5.0316e-03, 2.2507e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([2304]) +Parameter containing: +tensor([ 0.1748, -0.0695, -0.2499, ..., -0.0291, 0.0082, 0.0654], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 768]) +Parameter containing: +tensor([[-0.0072, 0.0056, 0.0144, ..., 0.0010, 0.0068, -0.0195], + [-0.0204, 0.0330, -0.0089, ..., -0.0183, 0.0075, 0.0104], + [-0.0120, -0.0031, 0.0017, ..., -0.0134, 0.0086, -0.0073], + ..., + [-0.0075, -0.0018, -0.0179, ..., -0.0070, -0.0049, -0.0116], + [-0.0321, -0.0037, 0.0038, ..., 0.0033, -0.0005, 0.0031], + [ 0.0155, 0.0208, -0.0011, ..., 0.0005, -0.0043, -0.0342]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([-4.6196e-03, -1.0712e-02, -4.2458e-03, -1.1658e-02, -7.2266e-02, + 1.1406e-02, 6.5796e-02, -3.1586e-02, 2.2736e-02, 5.1697e-02, + -8.3847e-03, -7.9575e-03, 5.0659e-02, 4.4937e-03, -1.5732e-02, + 1.4328e-02, 1.7075e-02, 1.4191e-03, 2.4689e-02, -5.8716e-02, + -2.6588e-03, -3.6438e-02, 3.8147e-02, -5.0079e-02, 1.9745e-02, + -2.9221e-02, -3.1464e-02, 5.1300e-02, 1.1711e-02, -1.5007e-02, + 1.4740e-02, 5.8861e-03, 1.6403e-02, -1.9547e-02, -1.9958e-02, + 4.1595e-02, 4.2992e-03, -4.0375e-02, 1.0658e-02, 5.4626e-03, + -3.0426e-02, 4.9438e-02, 2.3499e-02, -7.7400e-03, -5.2277e-02, + -4.4212e-03, -6.8092e-04, -2.4185e-02, -1.4219e-03, 1.4244e-02, + 5.7617e-02, 4.7150e-02, -1.1452e-02, 9.1476e-03, 1.4732e-02, + 5.9967e-02, -5.5115e-02, 1.6342e-02, 3.3722e-02, -3.9520e-03, + -3.9001e-02, 1.2306e-02, 4.2480e-02, -3.2883e-03, 2.6760e-03, + -1.4351e-02, -2.8259e-02, 6.0692e-03, -1.1177e-02, -6.8604e-02, + 1.3428e-02, -6.4819e-02, -1.2286e-01, 7.9274e-06, -3.5583e-02, + 1.1482e-02, -4.8126e-02, 1.4811e-03, -1.1566e-02, -6.2012e-02, + 3.0136e-04, 4.3732e-02, -1.6174e-02, -2.2690e-02, 1.5366e-02, + -1.0666e-02, -1.3145e-02, 4.5563e-02, 6.5918e-03, 3.3140e-05, + 2.3926e-02, 7.9834e-02, 7.0129e-02, -2.4094e-02, 2.3895e-02, + -1.3113e-03, 1.4229e-02, -3.1082e-02, -2.2018e-02, 7.6790e-03, + 4.1992e-02, 1.0077e-01, 1.5175e-02, -8.6288e-03, 4.8447e-03, + 3.2257e-02, 8.7524e-02, 1.0815e-03, -3.4912e-02, 2.1362e-02, + -3.5095e-02, 3.9185e-02, 5.9166e-03, -1.3496e-02, -1.1681e-02, + -1.9058e-02, 2.8458e-03, -1.9516e-02, 4.3884e-02, -4.3030e-02, + -2.0966e-02, -1.8967e-02, -4.4952e-02, -3.3417e-02, 6.1951e-03, + -1.6769e-02, -4.3716e-03, -7.8552e-02, 2.9556e-02, 6.4201e-03, + -1.1986e-02, -2.5497e-02, 6.0844e-03, -1.0658e-02, -9.0179e-03, + -7.1335e-03, -4.7073e-03, 1.8127e-02, 1.9989e-02, -5.7144e-03, + -2.5070e-02, -4.1565e-02, -3.6255e-02, -5.9631e-02, 6.5193e-03, + -6.2866e-02, 4.8828e-03, -3.1647e-02, 3.4790e-02, 2.1637e-02, + 6.2866e-03, -6.7749e-02, 1.6113e-02, 4.7119e-02, 9.9487e-03, + -7.0312e-02, 5.3711e-02, -1.9897e-02, -7.0763e-03, -2.9709e-02, + 2.7542e-02, 1.3306e-02, 4.8889e-02, 2.1591e-02, 1.9302e-02, + 3.9978e-02, -8.5693e-02, 1.8982e-02, -2.0050e-02, -3.4576e-02, + 3.7933e-02, 3.1464e-02, -3.8574e-02, -3.8055e-02, 8.1635e-03, + -2.9007e-02, 5.3482e-03, -2.7390e-02, -5.9998e-02, -5.9906e-02, + -1.9699e-02, -1.5625e-02, -1.7685e-02, -1.1206e-03, 2.2335e-03, + 3.8391e-02, 9.6130e-03, 2.2751e-02, -4.9316e-02, -3.1586e-02, + 2.0065e-02, -1.5106e-02, -1.6907e-02, -1.1854e-03, -3.2482e-03, + -9.5010e-05, 2.9404e-02, 2.9221e-02, 4.6229e-04, -2.8015e-02, + 4.4373e-02, 2.6062e-02, 1.8219e-02, -2.9648e-02, 1.0002e-02, + 2.0706e-02, 7.3669e-02, 1.1726e-02, 3.6804e-02, 4.1138e-02, + -2.9392e-03, 3.0106e-02, 3.0594e-03, 2.5574e-02, 5.3772e-02, + 1.3222e-02, -3.6072e-02, -1.5572e-02, -2.4384e-02, 1.3962e-02, + 1.6357e-02, -1.3443e-02, -3.6407e-02, -6.2218e-03, 1.8494e-02, + -4.0588e-02, -4.0131e-02, -6.3721e-02, 3.6278e-03, 2.1362e-02, + 9.0179e-03, -3.0411e-02, 1.4854e-02, 4.0283e-02, -5.5359e-02, + 2.6428e-02, 4.8409e-03, -3.4363e-02, 5.3772e-02, -3.8116e-02, + 2.8229e-04, 5.4199e-02, 7.9956e-03, -1.5869e-03, 2.0340e-02, + 1.7532e-02, 8.2016e-03, -2.4738e-03, -1.4038e-02, 1.6041e-03, + 2.3544e-02, 2.3026e-02, -2.4399e-02, -1.1978e-02, -3.0777e-02, + -3.2196e-02, -1.2001e-02, -5.4230e-02, -3.0792e-02, 7.1602e-03, + -5.3444e-03, -1.8097e-02, 9.2840e-04, 1.1177e-02, -1.2733e-02, + 2.0081e-02, 4.1008e-03, 2.3697e-02, 1.7761e-02, -2.0035e-02, + 7.1297e-03, -2.9938e-02, -1.2978e-02, -1.1086e-02, -3.1143e-02, + -8.3847e-03, -6.1264e-03, -1.1139e-02, 2.1729e-02, 4.6173e-02, + -5.5878e-02, -2.1790e-02, -1.8921e-02, -4.9957e-02, 3.6530e-02, + 5.3215e-03, -6.5735e-02, 1.1475e-02, 4.1107e-02, 9.8190e-03, + -7.7248e-03, 2.1011e-02, 1.1238e-02, 5.5275e-03, 3.5217e-02, + 3.1921e-02, 3.1471e-04, 1.8646e-02, 4.5090e-03, 4.9591e-02, + -3.6896e-02, -1.0056e-02, 1.2040e-04, -2.0615e-02, 8.1921e-04, + -5.4779e-03, 1.6571e-02, -2.8305e-02, -1.2932e-02, -1.3977e-02, + 4.3762e-02, -2.3575e-02, 2.0401e-02, 3.3188e-03, -2.0508e-02, + 6.5247e-02, -6.1111e-03, -4.0588e-03, 1.2978e-02, -1.4982e-03, + -1.1116e-02, 1.9665e-03, 1.6739e-02, -2.3193e-02, -4.3304e-02, + 7.2144e-02, 6.3049e-02, -2.1835e-02, 2.0233e-02, 5.1971e-02, + 1.8845e-02, 1.3985e-02, -1.6785e-02, 8.1253e-03, -5.7892e-02, + 4.6783e-02, 2.7817e-02, -1.4709e-02, -3.4466e-03, -3.8940e-02, + -2.7637e-03, -1.5007e-02, -3.5725e-03, 3.4668e-02, -4.6692e-02, + -2.7222e-02, -1.6083e-02, -2.4200e-02, -4.6631e-02, -4.3945e-02, + 9.1019e-03, 3.4607e-02, 1.2703e-02, 7.4219e-02, -5.6076e-03, + -1.3451e-02, -2.6188e-03, -3.5919e-02, 4.5776e-03, -1.3481e-02, + 4.0344e-02, 3.7117e-03, -4.0894e-02, 1.1891e-04, -1.5007e-02, + 5.7800e-02, -4.8248e-02, 5.7587e-02, -3.7460e-03, 1.0727e-02, + 1.9699e-02, -5.8502e-02, 2.1652e-02, -4.3488e-03, 1.2045e-03, + -5.0201e-02, -7.4280e-02, -2.1118e-02, -3.7789e-04, 1.8112e-02, + 1.3870e-02, 1.9989e-02, 6.8703e-03, -2.6817e-03, 2.9099e-02, + -3.0422e-03, 4.6844e-02, -5.0415e-02, -8.2169e-03, -2.6760e-03, + -9.0103e-03, -3.5534e-03, 3.1677e-02, 1.9333e-02, 1.1230e-02, + 1.1559e-02, 7.1106e-03, -6.8176e-02, 3.5614e-02, -2.6535e-02, + 1.2856e-02, 6.5041e-03, -3.2410e-02, 6.0150e-02, -2.4094e-02, + -1.0971e-02, -1.1093e-02, 4.0924e-02, -3.3447e-02, -7.6447e-03, + 1.2741e-02, 2.2171e-02, -2.0844e-02, 5.2490e-02, -8.1558e-03, + -3.0365e-03, 4.1992e-02, 5.0323e-02, -4.7943e-02, -1.9531e-02, + 8.1024e-03, -4.1046e-02, -1.6327e-02, 8.3237e-03, -1.3824e-02, + -2.3102e-02, 3.6297e-03, -6.4201e-03, 3.0975e-03, 8.0490e-03, + 6.9771e-03, -1.5625e-02, 3.1605e-03, 1.0498e-01, 1.4465e-02, + -2.4002e-02, -1.4000e-02, -3.5858e-02, -4.4022e-03, -3.5400e-02, + 2.5058e-04, 1.4572e-02, -1.9503e-04, -1.6553e-01, -5.6076e-03, + -3.9185e-02, -2.0935e-02, -2.3209e-02, -3.5065e-02, -1.6373e-02, + -3.5019e-03, 1.6205e-02, -1.2405e-02, 7.7148e-02, -1.0551e-02, + 1.8539e-02, 2.3849e-02, -8.8272e-03, 5.6419e-03, 1.2901e-02, + 1.2619e-02, 1.4397e-02, -1.6449e-02, -4.7493e-03, 8.8654e-03, + -2.2186e-02, 2.1393e-02, 1.0635e-02, 7.3486e-02, 8.9111e-03, + -3.8116e-02, -2.6825e-02, -1.0338e-02, 1.1879e-02, 1.2970e-02, + -2.2797e-02, -1.1337e-02, -2.0004e-02, 5.1788e-02, 4.2419e-02, + -4.0512e-03, -7.1640e-03, -3.3142e-02, -4.9408e-02, -2.0264e-02, + 3.2227e-02, -3.5919e-02, -1.1429e-02, -4.4250e-03, 7.1754e-03, + 1.4107e-02, 3.7415e-02, 1.3893e-02, 4.4632e-03, -2.7084e-02, + -1.7120e-02, -1.3641e-02, 1.9226e-02, -2.9694e-02, 1.8661e-02, + -6.7635e-03, -1.4336e-02, -3.3569e-02, -3.7872e-02, 4.1687e-02, + 3.1555e-02, -1.9958e-02, 1.0811e-02, -8.8501e-03, -4.3915e-02, + -4.4670e-03, 1.8295e-02, 3.5095e-03, 8.9050e-02, -1.4282e-02, + -8.9722e-03, -4.6417e-02, -3.7018e-02, 3.1738e-02, -1.7609e-02, + -8.5602e-03, 1.7380e-02, -1.8646e-02, -1.8311e-02, 9.8343e-03, + 4.1107e-02, -1.1806e-03, -9.9335e-03, -4.8218e-02, 1.5572e-02, + 2.7008e-02, 2.3209e-02, 1.8616e-02, 1.9852e-02, 3.3264e-02, + 9.2773e-03, -6.4331e-02, 1.1345e-02, 1.7595e-03, 7.8812e-03, + -2.2522e-02, 3.4760e-02, -9.6664e-03, 8.0414e-03, -1.6663e-02, + 4.7791e-02, 2.0355e-02, 1.7181e-02, -3.5583e-02, 1.1414e-02, + -5.7709e-02, 8.1909e-02, -2.3651e-02, 2.2297e-03, -2.9358e-02, + -1.8143e-02, 3.0792e-02, 6.0028e-02, 3.3051e-02, -2.4551e-02, + 2.2156e-02, 2.2324e-02, 4.0039e-02, 9.3918e-03, 7.6721e-02, + 3.5187e-02, 7.9803e-03, 3.7270e-03, 1.7441e-02, -4.0779e-03, + -3.5248e-02, -5.6122e-02, 1.3580e-02, 8.0688e-02, 1.2466e-02, + 3.6255e-02, 3.0151e-02, 2.1271e-02, -1.0519e-03, -4.3182e-02, + -2.2385e-02, 3.5339e-02, -3.5309e-02, -4.9042e-02, 3.5583e-02, + 5.3314e-02, -6.4754e-04, 4.6600e-02, 4.5807e-02, -2.1439e-02, + 6.3896e-03, 2.0065e-03, 1.4320e-02, -4.1847e-03, 2.8961e-02, + 8.5907e-03, -9.6283e-03, -5.2277e-02, -3.6377e-02, -2.6459e-02, + 2.4681e-03, 2.7679e-02, 4.6356e-02, -2.1744e-02, -3.1036e-02, + 5.9242e-03, -2.1622e-02, -8.7204e-03, 3.6736e-03, 2.0370e-03, + -3.5614e-02, 3.6652e-02, 5.8556e-03, 1.6708e-03, 3.4088e-02, + -8.9455e-04, 5.4245e-03, -3.2959e-02, 1.3819e-03, -9.0637e-02, + -2.3117e-02, -3.3188e-03, 3.7811e-02, 2.0157e-02, -1.2825e-02, + -3.1586e-02, -1.6785e-02, -1.5823e-02, -3.2349e-02, 2.0962e-03, + 6.4507e-03, -8.2321e-03, 8.1177e-03, 5.0781e-02, 5.9814e-02, + 4.4586e-02, 1.5503e-02, 1.2657e-02, -7.0534e-03, -2.7283e-02, + -1.1047e-02, -2.2400e-02, -2.6230e-02, -4.2114e-02, -1.2314e-02, + 1.1765e-02, -5.0476e-02, 2.1629e-03, -9.9182e-03, 2.2842e-02, + 2.0645e-02, 5.9143e-02, 4.4556e-02, 3.4302e-02, -1.0101e-02, + 4.0558e-02, 1.6373e-02, 6.6223e-03, -6.2790e-03, 1.3374e-02, + 2.3575e-02, 3.0731e-02, 1.7929e-02, -7.4730e-03, 1.1337e-02, + 2.5925e-02, 2.8839e-02, 1.0468e-02, -1.0818e-02, -8.6975e-03, + 1.9302e-02, -1.2520e-02, 3.6812e-03, -9.1476e-03, -4.1595e-02, + -1.5030e-03, -1.1528e-02, 1.8463e-02, 2.7023e-02, -2.2446e-02, + 3.1799e-02, 2.0935e-01, 8.7585e-03, -3.9459e-02, -1.2238e-02, + -2.6062e-02, -2.3911e-02, -7.2432e-04, 2.0691e-02, -5.9540e-02, + -9.0714e-03, -4.3762e-02, 6.6589e-02, -4.0817e-03, 2.6199e-02, + -8.0872e-03, 1.1955e-02, 3.5248e-02, -7.0435e-02, -1.8265e-02, + -5.5786e-02, 3.5797e-02, -1.3924e-03, -3.1494e-02, 1.8585e-02, + 2.5314e-02, 1.3420e-02, 8.4915e-03, -3.8300e-02, 1.6418e-02, + -4.0579e-04, 2.9266e-02, 5.0354e-02, -3.2898e-02, -4.4739e-02, + 4.3068e-03, -5.1208e-02, 2.0233e-02, 4.6356e-02, -8.3069e-02, + -3.3203e-02, 3.1647e-02, -6.6872e-03, 1.0475e-02, -3.0251e-03, + 1.3290e-02, 2.4323e-02, -7.9422e-03, 8.2397e-03, 3.5645e-02, + -2.4994e-02, -3.4698e-02, -3.8971e-02, 1.4175e-02, 7.0610e-03, + 3.1555e-02, -5.7144e-03, 2.8351e-02, 4.3396e-02, -2.6337e-02, + 5.5145e-02, 2.4689e-02, 1.3634e-02, -3.8452e-02, -6.9857e-04, + 1.5114e-02, -2.6245e-02, 1.5961e-02, 4.2694e-02, -2.4857e-02, + -3.3752e-02, 2.1805e-02, 1.4648e-02, -2.0737e-02, 5.3406e-02, + 1.7761e-02, -3.2959e-02, 1.1215e-02, 6.9046e-03, -1.5572e-02, + 6.2180e-03, -1.9760e-02, -7.9803e-03, 3.3684e-03, 5.7983e-02, + 6.1951e-03, 1.7899e-02, 1.8234e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([1.2872, 1.3902, 1.3280, 1.3746, 1.3808, 1.3310, 1.2905, 1.3744, 1.3278, + 1.2770, 1.3421, 1.3515, 1.2648, 1.4362, 1.3084, 1.3631, 1.3041, 1.3989, + 1.2531, 0.3754, 1.3145, 1.3037, 1.3856, 1.3438, 1.3401, 1.2750, 1.2746, + 1.3092, 1.5337, 1.3190, 1.2867, 1.3100, 1.3887, 1.3135, 1.4317, 1.4281, + 1.2585, 1.3812, 1.4312, 1.3558, 1.3686, 1.2785, 1.3244, 1.3191, 1.3727, + 1.3274, 1.3551, 1.3741, 1.2872, 1.3436, 1.4240, 1.3185, 1.3188, 1.2834, + 1.3673, 1.4366, 1.2857, 1.4752, 1.3431, 1.2975, 1.3674, 1.3349, 1.2434, + 1.3561, 1.3934, 1.3163, 1.2964, 1.3656, 1.3091, 1.7060, 1.2112, 1.3438, + 2.4524, 1.3774, 1.3413, 1.2583, 1.2963, 1.3005, 1.3575, 1.4152, 1.2581, + 1.4116, 1.3328, 1.3162, 1.3791, 1.3734, 1.3793, 1.2855, 1.3213, 1.3572, + 1.3486, 1.3470, 1.3550, 1.3349, 1.4237, 1.3012, 1.3963, 1.2965, 1.2334, + 1.3769, 1.3145, 1.1864, 1.2911, 1.2162, 1.2641, 1.3571, 1.3028, 1.3323, + 1.2926, 1.3058, 1.3455, 1.3841, 1.2867, 1.2912, 1.2599, 1.3620, 1.3224, + 1.2191, 1.2997, 1.3315, 1.3464, 1.4175, 1.3383, 1.4195, 1.3479, 1.3867, + 1.3359, 1.3288, 1.4146, 1.3789, 1.3210, 1.3271, 1.2555, 1.2443, 1.3080, + 1.3088, 1.4416, 1.3641, 1.3288, 1.3230, 1.3284, 1.3592, 1.3514, 1.2883, + 1.2990, 1.3497, 1.3354, 1.3534, 1.2966, 1.3139, 1.3233, 1.3309, 1.3223, + 1.3252, 1.3250, 1.1075, 1.3933, 1.3489, 1.3420, 1.2268, 1.4483, 1.2636, + 1.3052, 1.4442, 1.3708, 1.3202, 1.2663, 1.3475, 1.2657, 1.3688, 1.3033, + 1.3369, 1.3094, 1.4049, 1.4070, 1.3336, 1.3666, 1.4053, 1.3935, 1.2258, + 1.3650, 1.4078, 1.3205, 1.2375, 1.3886, 1.3380, 1.3228, 1.8635, 1.3402, + 1.4189, 1.3341, 1.2720, 1.3730, 1.4405, 1.3683, 1.3268, 1.3239, 1.4251, + 3.8598, 1.3067, 1.2033, 1.3676, 1.4208, 1.4274, 1.3379, 1.2837, 1.3651, + 1.3117, 1.3239, 1.4408, 1.3410, 1.2964, 1.2914, 1.2973, 1.3181, 1.3553, + 1.4354, 1.2757, 1.2851, 1.3733, 1.2667, 1.2805, 1.3078, 1.2954, 1.3843, + 1.2732, 1.4498, 1.4749, 1.3324, 1.4228, 1.2985, 1.3570, 1.4567, 1.4490, + 1.3023, 1.3295, 1.4018, 1.2917, 1.2907, 1.2989, 1.3706, 1.3021, 1.4164, + 1.2990, 1.3597, 1.3136, 1.2466, 1.2832, 1.3223, 1.3688, 1.3477, 1.3461, + 1.2518, 1.2632, 1.3714, 1.3059, 1.3299, 1.3516, 1.2935, 1.2013, 1.3700, + 1.3071, 1.2896, 1.3473, 1.2983, 1.3207, 1.3504, 1.3353, 1.2847, 1.3485, + 1.3610, 1.3010, 1.3774, 1.3308, 1.2954, 1.2410, 1.3406, 1.3067, 1.3201, + 1.3046, 1.3123, 1.5712, 1.3965, 1.3870, 1.3659, 1.3075, 1.3338, 1.3484, + 1.3848, 1.3058, 1.3570, 1.3049, 1.3742, 1.2756, 1.3889, 1.2953, 1.3125, + 1.3678, 1.4067, 1.4245, 1.5097, 1.2939, 1.4008, 1.3100, 1.2898, 1.3111, + 1.2741, 1.2701, 1.2899, 1.2631, 1.3375, 1.3803, 1.3075, 1.2911, 1.4013, + 1.3300, 1.3336, 1.2717, 1.3298, 1.4376, 1.2984, 1.3539, 1.3718, 1.3205, + 0.9002, 1.1273, 1.3590, 1.3239, 1.3377, 1.4179, 1.3039, 1.2933, 1.2394, + 1.3289, 1.3309, 1.2902, 1.2587, 1.3533, 1.2726, 1.3677, 1.3686, 1.2982, + 1.3758, 1.2909, 1.3539, 1.3848, 1.3587, 1.2797, 1.3354, 1.3630, 1.4622, + 1.2987, 1.2724, 1.4872, 1.2995, 1.4829, 1.3287, 1.3487, 1.4115, 1.3740, + 1.2543, 1.2981, 1.3168, 1.3157, 1.4478, 1.3249, 1.3100, 1.2767, 1.3550, + 1.3242, 1.3166, 1.3973, 1.2298, 1.3345, 1.2938, 1.2998, 1.4123, 1.3963, + 1.2704, 1.3450, 1.3796, 1.5035, 1.3848, 1.3198, 1.2972, 1.3515, 1.2375, + 1.4217, 1.2988, 1.3277, 1.4822, 1.2519, 1.3244, 1.2602, 1.3856, 1.3670, + 1.2955, 1.3744, 1.3669, 1.3714, 1.3356, 1.3683, 1.2582, 1.3579, 1.3028, + 1.3691, 1.3240, 1.3866, 1.3637, 1.2497, 1.3819, 1.3386, 1.3314, 1.2769, + 1.3428, 1.9735, 1.3798, 1.3468, 1.3407, 1.3285, 1.3599, 1.4341, 1.2609, + 1.3951, 1.2966, 1.2580, 1.3596, 1.3417, 1.4204, 1.3244, 1.4308, 1.3175, + 1.3006, 1.2515, 1.2395, 1.2493, 1.3272, 1.3005, 1.3845, 1.2743, 1.3190, + 1.3370, 1.3416, 1.1472, 1.2675, 1.3340, 1.3503, 1.3742, 1.3713, 1.3525, + 1.3118, 1.3306, 1.4060, 1.2554, 1.3448, 1.3657, 1.3274, 1.3503, 1.3757, + 1.3110, 1.3053, 1.3492, 1.3277, 1.3186, 1.3125, 1.4098, 1.3111, 1.3407, + 1.4183, 1.3285, 1.3850, 1.3056, 1.3465, 1.3493, 1.3270, 1.2958, 1.3944, + 1.3085, 1.3012, 1.3702, 1.2813, 1.2915, 1.3575, 1.1952, 1.3088, 1.3576, + 1.3346, 1.3577, 1.2858, 1.3787, 1.2965, 1.3709, 1.3449, 1.4087, 1.3293, + 1.3631, 1.3237, 1.2341, 1.3424, 1.3324, 1.2798, 1.3931, 1.3897, 1.2935, + 1.2785, 1.3657, 1.2583, 1.4487, 1.2702, 1.3525, 1.2965, 1.3752, 1.4298, + 1.3788, 1.3413, 1.3038, 1.4992, 1.3144, 1.3067, 1.2532, 1.3159, 1.2683, + 1.3746, 1.2443, 1.3016, 1.2842, 1.2961, 1.2805, 1.3825, 1.2896, 1.3415, + 1.2923, 1.3563, 1.2550, 1.3741, 1.4044, 1.4208, 1.3354, 1.2533, 1.3894, + 1.3589, 1.2489, 1.3956, 1.2572, 1.3543, 1.3533, 1.3309, 1.3372, 1.3268, + 1.3941, 1.2826, 1.3982, 1.4003, 1.2890, 1.4476, 1.2573, 1.3132, 1.3348, + 1.3615, 1.3087, 1.2843, 1.3003, 1.3694, 1.3762, 1.2734, 1.3903, 1.4113, + 1.3063, 1.2960, 1.3405, 1.3407, 1.3494, 1.3724, 1.3081, 1.2372, 1.3109, + 1.3138, 1.3427, 1.3206, 1.3542, 1.3643, 1.3706, 1.3337, 1.3657, 1.3878, + 1.3569, 1.3834, 1.3598, 1.2711, 1.4078, 1.3747, 1.5675, 1.2749, 1.2390, + 1.2538, 1.2943, 1.2955, 1.2661, 1.2821, 1.3547, 1.3100, 1.3714, 1.3571, + 1.3821, 1.2715, 1.2951, 1.3517, 1.5251, 1.3422, 1.3360, 1.3600, 1.2926, + 1.3325, 1.3143, 1.3297, 1.3354, 1.3274, 1.3914, 1.4117, 1.4397, 1.2755, + 1.4176, 1.3882, 1.3252, 1.2717, 1.3697, 1.3725, 1.2981, 1.3732, 1.3615, + 1.3712, 1.3910, 1.2379, 1.3230, 1.3764, 1.3150, 1.2980, 1.2872, 1.2196, + 1.2480, 1.3109, 1.4561, 1.4493, 1.2962, 1.2464, 1.3213, 1.3355, 1.3883, + 1.5240, 1.2383, 1.3756, 1.3613, 1.3726, 1.3346, 1.4441, 1.3882, 1.3590, + 1.3018, 1.2842, 1.2935, 1.3592, 1.2867, 1.2710, 1.3668, 1.3812, 1.3530, + 1.4434, 1.2979, 1.3567, 1.3358, 1.3525, 1.3261, 1.3334, 1.2530, 1.3479, + 1.3575, 1.3310, 1.4809, 1.2589, 1.2816, 1.3649, 0.4186, 1.4145, 1.2535, + 1.3430, 1.3093, 1.3702, 1.3583, 1.3723, 1.3647, 1.3383, 1.2710, 1.4596, + 1.3527, 1.3731, 1.2643, 1.3753, 1.3268, 1.2178, 1.3426, 1.3283, 1.3710, + 1.3677, 1.2947, 1.2627, 1.2397, 1.3351, 1.4022, 1.4032, 1.3174, 1.3353, + 1.2170, 1.2888, 1.3188, 1.3385, 1.3833, 1.2715, 1.3615, 1.3580, 1.3459, + 1.3332, 1.3058, 1.3122, 1.3450, 1.3347, 1.2283, 1.2703, 1.3616, 1.4082, + 1.3126, 1.2626, 1.2945, 1.4637, 1.3127, 1.2583, 1.3960, 1.3580, 1.2940, + 1.3000, 1.3408, 1.3034, 1.2603, 1.3945, 1.2683, 1.2457, 1.4037, 1.3055, + 1.3822, 1.3478, 1.5217, 1.3705, 1.2843, 1.2831, 1.3864, 1.3480, 1.3994, + 1.3649, 1.2519, 1.3327, 1.3546, 1.4664, 1.2941, 1.3896, 1.3242, 1.3849, + 1.4253, 1.2813, 1.3836], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 8.7339e-02, 6.1186e-02, -1.1393e-01, -2.3187e-02, 6.6224e-02, + 1.0484e-01, 6.1928e-02, 6.6969e-02, -8.2951e-02, 2.9519e-02, + 5.2148e-02, -2.8967e-02, -1.1092e-01, -1.5207e-02, -2.4184e-02, + -5.6119e-02, -1.0727e-02, 3.7562e-04, -2.8220e-02, 4.4238e+00, + -6.0840e-02, 2.7884e-02, 3.6516e-02, -1.5820e-02, -5.4270e-02, + -6.5341e-03, -2.6812e-02, -6.5928e-03, 2.9693e-02, -9.4959e-02, + 1.1967e-02, -8.0892e-03, 5.1503e-03, 3.4587e-02, 8.9452e-02, + -6.5282e-02, 2.3060e-02, -5.3270e-02, -1.2999e-01, 3.5249e-02, + 2.4790e-02, -5.1246e-04, 1.9823e-02, -1.1152e-02, -3.1151e-02, + 2.7093e-03, 3.5210e-02, -3.5405e-02, 1.7274e-02, 2.7853e-02, + 4.4344e-02, 1.2647e-02, -2.1005e-02, 3.0567e-02, 1.0525e-01, + 2.3741e-02, -8.9789e-02, -3.4534e-02, 4.5046e-02, 1.6473e-02, + 6.1683e-02, -6.3831e-02, 4.1221e-02, 1.8472e-02, 7.8758e-02, + -2.1876e-02, 6.5841e-02, 1.0714e-01, -3.7961e-02, 2.8308e-01, + 2.0154e-02, 3.1277e-02, 4.4453e-01, 7.3221e-02, -1.2682e-01, + 8.9078e-02, 1.6569e-02, -1.2624e-01, -8.9929e-02, 6.2439e-02, + 5.7802e-03, -5.0506e-03, -3.1454e-02, 1.1621e-01, 9.1203e-03, + 4.3324e-02, 1.0606e-01, 1.0382e-01, -8.6284e-03, 2.2634e-02, + 3.4288e-02, 2.8976e-02, 4.6591e-02, -4.4971e-02, -4.7039e-02, + 2.8868e-02, -4.0962e-02, -4.7151e-02, -5.6912e-02, -5.0799e-02, + 2.0091e-02, 6.3718e-02, 5.2493e-03, -4.9528e-02, 5.4862e-02, + 5.7750e-03, -3.8043e-02, -4.7645e-02, 2.5038e-02, -6.7444e-02, + -4.9929e-02, -8.9348e-03, -2.2372e-04, 1.7883e-02, 6.9704e-02, + -3.3632e-02, 7.6718e-03, -6.9778e-02, 1.3104e-02, 8.7781e-02, + -6.2585e-02, -1.3901e-01, -5.5338e-02, -7.6090e-03, -1.1955e-01, + 1.5045e-02, -8.3363e-02, -3.8883e-03, 9.6386e-03, -3.3875e-02, + -8.4132e-02, 3.9896e-02, -3.4697e-02, -3.9018e-03, -6.4938e-02, + -1.0833e-02, -2.7428e-02, 4.2242e-03, -2.9702e-02, -4.0883e-02, + -1.6351e-03, -3.6239e-03, 4.0830e-02, 3.1907e-02, 4.9882e-02, + -2.4862e-02, -3.1561e-02, -4.6421e-02, 1.7076e-02, 8.8081e-02, + 1.8782e-01, -5.4576e-02, -9.4501e-02, 5.3017e-03, -3.8913e-02, + -1.3871e-01, -7.6044e-04, -9.9700e-02, 4.5043e-02, -4.9313e-03, + 1.4418e-01, -1.1649e-01, 4.5746e-03, 2.1470e-02, 7.0424e-03, + -1.6105e-02, -4.2264e-02, 3.2586e-02, -5.0255e-02, 3.5865e-02, + -4.6770e-02, 7.1799e-02, 2.7991e-02, 6.2034e-02, 6.3537e-02, + -7.6811e-02, -3.2816e-02, 5.1421e-02, -3.5653e-02, -2.8211e-03, + -1.0634e-02, -5.3496e-02, 4.2309e-02, -6.4404e-03, -1.2252e-02, + -1.5379e-01, 4.8189e-02, 7.4657e-02, -3.1758e-02, -3.2122e-02, + -1.1376e-02, -3.7663e-02, 3.5552e-02, 4.2712e-02, -7.2979e-02, + -6.1913e-03, -4.6449e-02, -1.0435e-01, -1.1276e-01, 2.3868e-02, + -1.0795e-01, -1.5822e-02, -4.6000e-02, 2.2899e-02, 4.1231e-02, + 9.4707e-02, -7.7341e-02, 5.8019e-02, 9.9886e-02, 8.9785e-03, + 9.3099e-03, 1.4751e-01, -6.3596e-03, -4.8528e-02, -6.2385e-02, + -8.9581e-04, -2.4941e-03, 3.8993e-02, -7.8228e-02, -1.4844e-01, + -9.6491e-03, -3.7275e-02, 6.1649e-02, -4.6889e-02, 1.6001e-02, + -5.7545e-02, 5.0802e-02, -3.0250e-02, 7.6051e-02, -5.7192e-02, + 1.7285e-03, -2.3828e-03, -1.6417e-02, 9.5145e-02, -2.6237e-02, + 9.2725e-03, -7.9587e-02, 4.6451e-02, 5.6558e-02, 9.2277e-03, + 6.6372e-02, 8.8247e-02, 4.5625e-02, 2.6280e-02, 2.6710e-02, + 3.2452e-02, 9.7308e-02, 6.8142e-03, 1.4230e-02, -3.9746e-02, + 5.8876e-02, 7.5144e-03, 5.6110e-02, -6.9678e-02, 8.3911e-02, + -1.1423e-01, -3.4860e-02, -8.7782e-02, 2.4758e-02, 5.3315e-03, + -4.8554e-02, -6.1523e-02, 7.4342e-03, -3.1822e-02, -4.5801e-02, + 4.5603e-02, -2.8560e-02, 1.5172e-02, -6.5890e-02, -2.5550e-02, + -1.1881e-02, 1.0670e-02, 6.7133e-02, -3.5807e-02, 3.2999e-02, + 3.9318e-02, -1.1167e-01, 1.9786e-03, 3.6381e-02, -6.4092e-02, + 1.5747e-01, 1.4422e-01, 8.4585e-04, 1.2694e-02, 1.9729e-02, + 3.4082e-02, 1.0148e-01, -3.5446e-02, 1.4398e-01, -1.1190e-01, + -2.0517e-02, -4.3020e-02, 9.5896e-02, 4.6603e-02, 5.5142e-02, + 9.3104e-03, 9.9086e-02, 2.6252e-02, -1.0458e-02, 4.9047e-02, + 4.6309e-02, -1.9033e-02, 7.5156e-03, 7.0244e-02, 2.1686e-02, + -9.7619e-03, 6.5970e-02, -8.2329e-02, 9.5006e-02, -4.9631e-02, + 3.9037e-02, 2.9221e-03, -5.2377e-02, -3.3832e-02, -8.8777e-03, + -6.0936e-02, -8.4812e-03, 4.0256e-02, 1.2562e-01, -5.0265e-02, + -5.2797e-02, 1.9654e-01, -4.9036e-02, -2.3278e-02, 9.1080e-02, + -1.1720e-01, 1.3250e-02, 5.1713e-02, 4.0155e-02, -9.8394e-02, + 1.4361e-01, -9.7763e-03, 1.2028e-01, -3.9207e-02, 2.3117e-02, + 8.6042e-02, -8.9835e-03, -5.6574e-02, -1.4509e-02, 3.6862e-03, + -1.0871e-01, 1.5602e-02, -3.5644e-02, -3.3046e-02, 7.6870e-02, + 2.7187e-02, 8.6666e-02, 6.3289e-02, 2.9537e-02, 2.4505e-02, + -3.2169e-02, 3.7398e-02, 9.9083e-03, 1.2819e-02, -4.2381e-02, + -3.4945e-02, -4.1356e-02, 9.5625e-03, -1.0108e-02, -5.6753e-02, + 3.8378e-02, -1.0612e-03, 2.3003e-02, -8.1679e-02, -1.6081e-02, + -2.5599e-02, 3.0458e-02, 9.6022e-02, 2.4029e-02, 2.1840e-02, + 4.9550e-02, -4.1764e-02, -5.5127e-02, -6.5640e-03, -2.8241e-02, + 3.8417e-03, 7.3980e-02, 3.6628e-02, -1.5850e-02, -1.6176e-02, + -9.0316e-02, 1.1332e-01, 2.6261e-02, 6.8534e-03, -1.0242e-02, + -1.7797e-02, -1.3862e-02, -7.3380e-02, -4.9812e-02, -6.4270e-02, + 2.7561e-02, -4.8994e-02, -2.0447e-02, -1.3643e-02, 6.1894e-02, + 6.2277e-03, 4.1400e-02, 2.3730e-02, 1.8632e-02, 7.0347e-02, + 1.1018e-01, -6.8689e-02, 5.4930e-02, 2.8267e-02, -8.7133e-02, + -1.2398e-01, -6.2607e-02, 3.4140e-03, 5.9502e-03, -2.9806e-02, + -5.7428e-02, 2.2081e-02, 8.3703e-02, -6.1587e-02, -6.7964e-02, + -9.1609e-01, 1.5850e-02, 2.8917e-02, -3.4535e-02, -1.1373e-02, + 3.3038e-02, -1.5638e-02, -1.9283e-02, -4.7749e-02, -4.2693e-02, + 4.2583e-03, -1.3398e-01, -8.5306e-02, 1.2055e-02, -3.7024e-02, + -5.7687e-02, -4.9867e-02, -2.1062e-02, 6.0778e-02, 6.8634e-02, + -2.2010e-02, 3.8530e-03, 3.4686e-02, 8.6580e-02, -6.1052e-02, + -1.2155e-02, -8.3256e-02, -4.8036e-02, 8.6155e-02, 7.9283e-02, + -1.1840e-01, 5.3046e-05, -8.5889e-02, 1.0105e-01, 1.0565e-02, + -3.0992e-03, 4.3394e-02, -9.1592e-02, 3.0969e-02, 3.9235e-02, + -3.1675e-02, 2.2725e-04, 4.3952e-02, -1.6739e-02, -9.4257e-02, + 2.3025e-02, 3.2887e-02, -6.3583e-02, 1.7878e-02, 1.0523e-02, + -4.1162e-02, 1.2117e-02, -5.3632e-02, 4.9440e-02, 2.1264e-02, + -3.3455e-04, -9.0725e-02, -3.2049e-02, -8.2630e-03, -4.3023e-02, + 4.5941e-03, 1.0333e-01, 6.9964e-02, 2.6214e-03, 7.1494e-02, + 7.0434e-02, -1.2217e-01, 2.8510e-02, 1.3535e-01, -6.5110e-02, + -5.2724e-03, -2.8958e-02, 8.3868e-02, 6.3884e-02, 3.0647e-02, + 3.0163e-02, -6.8748e-03, 3.6247e-02, -2.0434e-02, -1.1683e-01, + 2.4959e-02, -6.3642e-02, 4.0930e-02, -4.0984e-02, 2.4238e-02, + 9.8886e-03, -3.7878e-02, 4.9394e-02, -2.7229e-02, -3.0365e-02, + 3.3135e-02, 5.1902e-02, 6.7655e-02, 3.0071e-02, 1.9174e-02, + -7.3900e-02, 3.5665e-02, -5.8163e-02, 7.0575e-02, -6.0976e-02, + 5.5363e-03, 2.2391e-02, 1.0395e-01, 7.7417e-02, -8.4300e-02, + -6.5664e-02, -2.2830e-02, 6.6328e-02, -2.2972e-02, -4.1334e-02, + -1.4479e-02, -7.1433e-03, -2.0187e-02, -1.0832e-02, -8.9342e-02, + 2.1364e-02, 3.7668e-02, -2.2352e-02, -5.8539e-02, -2.8948e-03, + -3.9876e-03, -4.2716e-02, 2.3490e-02, -6.3561e-02, -4.4949e-02, + -7.5358e-03, 1.4734e-02, -1.6480e-02, 4.3481e-02, 1.9900e-02, + -2.1993e-02, 3.7201e-02, 1.4117e-02, 8.6074e-02, 1.4078e-02, + 6.8319e-02, 1.7246e-02, -5.1239e-02, 1.0810e-02, -1.1514e-02, + -8.7157e-03, 2.3565e-02, -9.8027e-03, 2.3457e-02, 7.3346e-02, + -5.4916e-02, 8.1404e-02, 2.9883e-02, 2.6833e-02, 1.1656e-01, + 6.4959e-02, 3.9964e-03, 7.9622e-03, 2.5413e-02, 3.7862e-02, + 5.7556e-02, -1.7377e-03, -7.8044e-02, 3.9611e-03, 5.0649e-02, + 2.1132e-02, 3.8345e-02, 4.0324e-02, 3.7830e-02, -2.6392e-02, + -7.8393e-02, -1.0003e-02, 5.9539e-02, 1.0001e-01, 6.2470e-02, + 4.5374e-03, 4.8765e-02, -5.7036e-02, 2.1272e-02, 1.7698e-02, + 8.3991e-03, 1.1972e-01, 5.4382e-02, -2.7853e-02, -2.0012e-03, + -1.9343e-02, 6.0604e-02, 8.5150e-03, -3.5723e-02, 4.2000e-02, + -4.4597e-02, -1.0483e-01, 2.3521e-02, -1.0434e-02, 2.4817e-03, + -2.6558e-02, 6.5472e-02, -1.9289e-01, 3.2543e-02, -5.5323e-02, + 4.4021e-02, 3.7345e-02, -6.6249e-03, -3.5713e-02, -2.2755e-02, + -1.0394e-01, 6.1514e-02, 8.5493e-02, 3.4580e-02, -4.7714e-02, + 1.2638e-02, 3.7360e-03, 1.3561e-02, 2.2447e-02, 1.5395e-02, + -3.0915e-02, 8.6202e-03, -1.8053e-02, 4.4125e-02, -1.6347e-02, + 8.7838e-02, 2.2420e-02, 8.3271e-02, 1.3798e-01, 2.5618e-02, + 3.2791e-02, -4.0671e-02, -4.8187e-03, 5.1011e-02, -4.7374e-02, + 1.3102e-02, -4.2113e-02, 8.1951e-02, -4.8852e-02, 3.5464e-02, + -3.0721e-03, 1.8094e-02, 7.1526e-02, 3.9374e-02, 9.8026e-02, + 8.5014e-02, -5.5595e-03, -4.8160e-02, 6.0500e-02, -3.8437e-02, + 2.5993e-02, -7.7234e-02, -4.8771e-02, 2.8361e-02, 6.6123e-02, + -7.5233e-02, 6.7676e-03, -8.9858e-02, 5.8787e-03, 8.0563e-02, + 2.5858e-02, -8.7248e-02, -8.6673e-02, 2.9948e-02, 2.5570e-02, + -1.9238e-01, 5.0784e-03, 1.4281e-02, 7.2192e-04, -3.1842e-02, + 3.3397e-02, -5.5607e-02, -6.8789e-02, -1.0273e-03, 6.6684e-02, + -4.6887e-02, -5.2200e+00, -1.7528e-02, 4.8796e-02, -1.3987e-02, + -9.0133e-02, -2.4560e-02, -3.0890e-02, -1.3240e-02, 3.3220e-02, + -2.1894e-02, -5.0840e-02, 3.4118e-02, -1.8583e-02, -4.1701e-02, + 2.2649e-02, -2.8920e-02, -9.6992e-02, 2.9297e-02, -4.1082e-02, + 8.9242e-02, 6.1757e-02, -3.6107e-02, -1.3546e-02, -1.2672e-01, + -2.2187e-02, 4.5807e-02, 4.1666e-02, 4.1213e-03, 3.8655e-02, + -1.4283e-02, 6.7196e-02, 7.3769e-02, 1.5050e-02, 2.3629e-02, + 2.7161e-02, 7.9100e-03, 1.6354e-02, -8.4922e-02, 2.0299e-02, + 8.6790e-03, 3.9312e-03, 2.8671e-02, -7.6458e-02, 3.5176e-02, + 9.5686e-03, -1.8447e-01, 9.5421e-02, 1.8604e-02, 9.2838e-02, + 1.0337e-01, 4.1986e-02, 4.9914e-02, 4.2715e-02, 3.8483e-03, + -1.6496e-02, -2.5456e-02, -1.3371e-02, -1.5350e-02, -3.9309e-02, + 8.4016e-02, -9.3841e-02, -9.3237e-02, 6.4717e-02, -1.0674e-02, + -7.5973e-03, -1.0846e-01, -3.1029e-03, 5.0573e-02, -5.4467e-02, + -4.5049e-02, -1.6193e-02, -4.6469e-02, -5.5992e-02, 4.7240e-02, + -3.7008e-03, 1.1970e-01, 1.0794e-01, 4.3454e-02, -7.2692e-02, + 1.9468e-02, 4.4535e-02, 5.7215e-02, -6.2108e-02, -7.0441e-02, + -7.9148e-02, 8.5440e-02, 1.7515e-04], device='cuda:1', + requires_grad=True) +torch.Size([3072, 768]) +Parameter containing: +tensor([[ 0.0181, -0.0053, 0.0376, ..., 0.0159, 0.0007, -0.0079], + [-0.0007, 0.0249, 0.0235, ..., -0.0029, -0.0186, 0.0282], + [-0.0062, 0.0307, 0.0169, ..., -0.0004, -0.0213, -0.0294], + ..., + [-0.0041, -0.0177, 0.0085, ..., 0.0139, -0.0345, 0.0094], + [-0.0312, -0.0024, 0.0006, ..., -0.0078, 0.0215, -0.0030], + [ 0.0234, -0.0085, -0.0076, ..., 0.0165, -0.0413, 0.0310]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.3335, -0.4612, -0.1525, ..., -0.2974, -0.4580, -0.3103], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 3072]) +Parameter containing: +tensor([[ 0.0056, 0.0015, -0.0063, ..., -0.0112, -0.0193, -0.0046], + [ 0.0023, 0.0101, 0.0025, ..., 0.0093, 0.0138, 0.0203], + [-0.0024, 0.0070, -0.0107, ..., 0.0100, -0.0153, 0.0128], + ..., + [-0.0022, -0.0275, 0.0059, ..., 0.0156, 0.0155, 0.0103], + [ 0.0172, 0.0090, 0.0030, ..., -0.0214, -0.0010, -0.0127], + [-0.0244, 0.0601, 0.0131, ..., 0.0105, -0.0049, 0.0170]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([-1.4061e-02, 5.6396e-02, -6.3660e-02, -1.5656e-02, -7.0129e-02, + 2.0599e-02, 4.9377e-02, -3.9101e-03, -9.5215e-02, -2.7008e-02, + -6.2294e-03, -5.4504e-02, -3.6987e-02, 1.1467e-02, -6.0364e-02, + 1.1948e-02, -2.9373e-02, -8.7402e-02, 8.5144e-03, 5.3802e-02, + -3.0930e-02, 1.3420e-02, 2.3880e-02, -3.1372e-02, 5.2948e-02, + 5.3467e-02, -4.9561e-02, 1.4877e-02, -1.1041e-01, -2.2690e-02, + 1.9684e-02, 5.5275e-03, 1.0254e-02, 7.5806e-02, -9.7046e-03, + -1.0480e-01, 8.2474e-03, -3.4485e-02, 3.0014e-02, 3.0609e-02, + -3.0533e-02, 8.3801e-02, 2.3941e-02, 5.3024e-03, -6.8481e-02, + 1.1032e-02, 2.2781e-02, 1.8511e-03, -8.6548e-02, -1.5198e-02, + 6.7505e-02, 1.4053e-02, 3.4424e-02, 6.8176e-02, 7.7271e-02, + -2.4109e-02, -8.1421e-02, 2.7557e-02, -3.1052e-02, 6.6662e-04, + -4.0070e-02, 3.4027e-02, 1.1841e-02, -6.9618e-03, 3.4424e-02, + -3.0487e-02, 7.7844e-05, -7.1220e-03, -1.5717e-02, 4.3060e-02, + -4.1138e-02, -8.2642e-02, 1.1102e-01, 1.3599e-01, -4.3335e-02, + 3.8269e-02, -1.6346e-03, 4.3030e-02, -2.4582e-02, -5.9174e-02, + -2.5162e-02, -3.9001e-02, -4.8790e-03, 1.1086e-02, 1.3870e-02, + -6.6833e-02, 3.4424e-02, 4.6539e-02, 6.1760e-03, -3.5339e-02, + 1.6159e-02, 3.7079e-02, 4.8370e-02, -2.2202e-02, -1.4000e-02, + 1.0925e-01, 1.7685e-02, -5.4413e-02, -7.2205e-02, -1.6632e-02, + 2.9182e-03, -1.9666e-01, 5.5756e-02, -5.1514e-02, 4.4647e-02, + 6.9946e-02, 2.4979e-02, -1.4046e-02, -3.4027e-02, 2.9282e-02, + -1.1826e-02, 2.2263e-02, 9.3933e-02, 3.9429e-02, -3.7781e-02, + -5.6458e-02, -3.6621e-02, -5.4779e-02, -3.7169e-04, -1.4061e-02, + -3.3966e-02, -2.3285e-02, -4.4281e-02, -1.0368e-02, -3.7384e-02, + -3.3112e-02, -7.8613e-02, -3.0914e-02, 5.9021e-02, -4.4769e-02, + -7.5195e-02, -4.1779e-02, 4.3030e-03, -6.2866e-02, 2.7435e-02, + 1.2720e-01, 2.9449e-03, 6.2408e-02, 5.4245e-03, 3.5034e-02, + 3.0960e-02, -3.8177e-02, 4.6463e-03, 8.0109e-03, -1.8127e-02, + -8.8684e-02, -1.0429e-02, -3.1433e-02, 6.5674e-02, 6.5735e-02, + 8.7036e-02, -8.7524e-02, 2.6718e-02, 8.1909e-02, -9.2102e-02, + -1.7593e-02, -5.4932e-02, 1.5839e-02, 1.3710e-02, -3.4409e-03, + 1.6577e-01, -2.3468e-02, 2.4734e-02, 1.5747e-02, -2.8046e-02, + 4.0802e-02, -5.1392e-02, -9.7122e-03, 1.8723e-02, 5.7281e-02, + -2.8458e-02, 2.8839e-02, -2.7649e-02, -4.4220e-02, 1.2222e-02, + -2.3956e-02, 1.4236e-02, -1.0651e-02, -7.6904e-02, -6.7322e-02, + -5.0262e-02, -5.9509e-02, -7.0618e-02, -4.3396e-02, 8.0078e-02, + -3.3325e-02, 3.0624e-02, 2.4512e-01, -1.0144e-01, -6.1249e-02, + 2.6138e-02, -8.4610e-03, 7.6843e-02, 1.1192e-02, -8.0505e-02, + 5.1453e-02, 6.0364e-02, -4.6722e-02, -3.1586e-02, -3.1311e-02, + 5.7953e-02, -6.7997e-04, -3.5973e-03, -7.8125e-02, 4.6936e-02, + 3.1021e-02, -6.1127e-02, 2.0584e-02, 1.0339e-01, 5.8472e-02, + -1.3710e-02, 4.4250e-02, 3.9154e-02, 1.7075e-02, 3.4424e-02, + -7.4158e-02, -1.0986e-02, -2.1469e-02, -8.9050e-02, 9.3689e-03, + 2.7557e-02, -4.6021e-02, 3.1769e-02, 1.6556e-02, 5.2643e-02, + -1.8143e-02, -6.4270e-02, -2.6123e-02, 6.5247e-02, 3.2104e-02, + -5.7434e-02, 5.7869e-03, -1.4633e-02, 8.5510e-02, -4.4556e-02, + -2.1534e-03, -4.9896e-02, -3.9917e-02, 5.9479e-02, -1.2680e-02, + 1.0901e-01, -2.4338e-02, -2.4429e-02, 4.2297e-02, 5.8594e-02, + -2.1072e-02, -1.8875e-02, -1.3257e-01, -6.5979e-02, 3.3356e-02, + 1.7443e-03, -1.4175e-02, -5.2338e-02, -7.3364e-02, 1.6251e-02, + -1.5802e-03, -3.9459e-02, -2.7374e-02, 2.8000e-02, 2.1164e-02, + 4.5654e-02, -7.5378e-02, -7.2815e-02, -4.2236e-02, -3.5889e-02, + -3.0624e-02, 3.6987e-02, -2.8732e-02, -7.6828e-03, 4.5319e-03, + -1.9806e-02, -9.3937e-04, 2.4857e-02, -2.2259e-03, -3.6591e-02, + 3.7323e-02, 2.0370e-03, 7.9834e-02, 3.7231e-02, 1.0002e-02, + 1.7105e-02, 4.5837e-02, -4.6692e-02, -4.8523e-02, 3.3722e-02, + 8.3435e-02, -2.1683e-02, -6.1798e-02, 2.3483e-02, 3.5919e-02, + 2.0370e-02, -9.9869e-03, 1.8478e-02, -2.6886e-02, 6.9946e-02, + 3.6621e-02, -2.6321e-03, 9.5947e-02, 1.6586e-02, 5.2338e-02, + -4.2084e-02, 1.9943e-02, -5.6305e-02, 6.4026e-02, -6.0211e-02, + 3.3630e-02, 5.9448e-02, 3.2410e-02, 2.9678e-02, -2.2797e-02, + 3.8696e-02, -1.2733e-02, -1.3748e-02, -5.0018e-02, 1.8646e-02, + -1.9653e-02, -2.2293e-02, -3.8242e-03, 7.0740e-02, -4.0710e-02, + -5.0720e-02, -4.9248e-03, -7.8369e-02, -4.6570e-02, 6.0693e-01, + -1.6943e-01, 1.2054e-01, -2.4719e-02, 1.8860e-02, -1.4612e-01, + 6.7749e-02, -3.7811e-02, 4.3427e-02, -2.1225e-02, -2.4750e-02, + 7.8979e-02, -1.7532e-02, -4.8798e-02, 1.6388e-02, -4.3549e-02, + -5.3711e-02, 3.9062e-02, 2.7985e-02, 5.9631e-02, 2.8320e-02, + 4.2267e-02, 3.2401e-04, 2.4475e-02, 1.0384e-02, -4.2480e-02, + -3.9856e-02, 1.0321e-01, -5.8563e-02, 2.4902e-02, -1.4107e-02, + -5.2368e-02, -2.2842e-02, 2.3376e-02, -3.6469e-02, -3.9429e-02, + 6.1401e-02, -6.1646e-03, 5.8746e-02, -9.4528e-03, -1.2421e-02, + 6.7322e-02, -1.9302e-02, 5.4230e-02, 9.0485e-03, -4.9255e-02, + 1.1572e-01, 1.6220e-02, -7.0435e-02, 2.1118e-02, -4.2145e-02, + -4.5593e-02, -4.9774e-02, 6.0913e-02, 3.0304e-02, 2.1534e-03, + -1.0452e-03, 1.9501e-02, -1.0841e-02, -5.1544e-02, 4.0253e-02, + -7.6721e-02, 4.4434e-02, -3.3722e-02, -3.0746e-02, -1.0907e-01, + -6.8787e-02, 8.5571e-02, -3.6743e-02, -2.6855e-02, -4.3762e-02, + -3.9215e-02, -1.0818e-02, -8.3313e-02, 5.0842e-02, 6.1035e-02, + 3.1311e-02, 8.9741e-04, -1.7532e-02, 7.4341e-02, -1.5465e-02, + -6.9519e-02, -5.2002e-02, 1.0608e-01, -7.9163e-02, -9.5062e-03, + -3.2921e-03, 7.5989e-02, -4.3907e-03, -2.6840e-02, -6.5536e-03, + 3.3234e-02, 1.0811e-02, 5.7983e-03, -4.9042e-02, -3.9093e-02, + -3.9581e-02, 1.7822e-02, -6.1096e-02, -5.0720e-02, -4.8157e-02, + 4.1428e-03, -4.1016e-02, -3.2959e-02, 3.9154e-02, -2.9526e-02, + -1.2360e-01, 9.9030e-03, -2.6550e-02, 2.6505e-02, 3.7750e-02, + -3.5858e-02, 8.9951e-03, 4.8637e-03, -3.0960e-02, -8.2764e-02, + -3.8242e-03, 3.0319e-02, -4.2175e-02, -8.8318e-02, 2.1057e-02, + -9.5276e-02, -8.1665e-02, -3.4393e-02, 4.5532e-02, -1.3542e-02, + -1.3756e-02, -6.5308e-02, -8.9355e-02, 1.4160e-01, -4.8645e-02, + 4.9713e-02, -1.3733e-02, 3.3447e-02, 2.5406e-02, -6.4964e-03, + 5.6702e-02, 1.6342e-02, 1.6785e-02, -1.6998e-02, 5.7487e-03, + -4.2175e-02, 7.4036e-02, 1.0849e-02, 1.0529e-01, 3.6804e-02, + -2.8107e-02, -7.0068e-02, -4.9713e-02, 1.8188e-02, -4.3671e-02, + 3.5004e-02, 9.6313e-02, 4.2450e-02, 6.0059e-02, 5.2704e-02, + 3.7018e-02, -8.0109e-03, -8.6594e-03, 4.1779e-02, 2.6550e-02, + -6.3110e-02, -7.6843e-02, 8.2886e-02, 4.2206e-02, -7.9285e-02, + 2.0248e-02, 4.6539e-02, 1.3840e-02, -1.7807e-02, -6.7871e-02, + 9.4070e-03, -5.5573e-02, 5.7770e-02, 1.0849e-02, 9.2163e-02, + 6.7139e-02, 1.6388e-02, -2.5635e-03, -1.1945e-01, 8.4763e-03, + 5.6488e-02, -4.8637e-03, 5.6702e-02, -9.3140e-02, -2.0325e-02, + -9.3994e-02, -8.2275e-02, -1.3657e-02, 4.4708e-02, -5.2551e-02, + 2.5864e-02, -3.2562e-02, 1.1322e-01, 7.3181e-02, -6.3171e-02, + 6.0608e-02, -6.4545e-03, 1.1572e-01, -2.9510e-02, 8.9233e-02, + 6.3721e-02, 1.4725e-02, 1.8356e-02, -2.4994e-02, -2.6642e-02, + 2.1530e-02, -1.0132e-02, -6.9214e-02, 2.2064e-02, -3.3997e-02, + -1.0612e-02, -2.5970e-02, 2.8091e-02, -2.3727e-02, -5.9692e-02, + -7.8796e-02, 8.3862e-02, 3.1769e-02, -6.2500e-02, 5.5450e-02, + -1.8280e-02, 7.4524e-02, -3.0426e-02, -5.5359e-02, 7.1373e-03, + -3.7903e-02, 1.7685e-02, -7.1106e-02, -3.8385e-05, -5.7373e-02, + -2.8854e-02, -9.9258e-03, -5.6152e-02, -2.2678e-03, 8.7341e-02, + 2.7756e-02, 3.2654e-02, -9.6817e-03, -2.3178e-02, 1.3306e-01, + 4.1626e-02, -5.2765e-02, 2.1393e-02, 7.0129e-02, 4.1016e-02, + 4.2558e-04, -7.3730e-02, -6.8359e-02, 2.1774e-02, 8.2520e-02, + 2.7802e-02, -8.4019e-04, 8.5083e-02, 4.2603e-02, -3.7598e-02, + -1.0718e-01, -1.2264e-03, 9.1476e-03, -8.7509e-03, 7.0923e-02, + 6.0547e-02, 6.1073e-03, 1.0052e-03, 9.3689e-02, -2.9633e-02, + -2.0767e-02, 5.7449e-03, -1.1909e-02, -9.0942e-02, 1.8463e-02, + 7.7629e-03, -4.3640e-02, -4.4861e-02, -6.5918e-02, -2.2125e-02, + -8.6853e-02, 3.0991e-02, 1.1780e-02, 3.6835e-02, -3.6163e-02, + -1.8982e-02, 4.1443e-02, -6.8237e-02, -4.1016e-02, -1.4877e-02, + 4.0817e-03, 7.3624e-03, -1.0017e-02, 1.6495e-02, 4.3091e-02, + 3.0319e-02, -2.2293e-02, 6.9351e-03, -4.2633e-02, -6.1218e-02, + 4.9255e-02, -2.5101e-02, 3.8818e-02, -3.5439e-03, 1.9394e-02, + 1.0309e-03, 1.2541e-03, 1.5793e-02, 2.5757e-02, -4.0253e-02, + 1.6455e-01, -1.1420e-01, -4.3030e-03, 1.1755e-01, 3.1464e-02, + -4.3945e-02, 1.9547e-02, 7.6050e-02, -8.4229e-03, -5.1544e-02, + 4.4250e-02, -1.4275e-02, 6.0883e-03, -3.5278e-02, -3.5004e-02, + -2.5192e-02, 2.6306e-02, 4.5135e-02, 6.1646e-02, 5.8777e-02, + 5.5580e-03, 6.5857e-02, -1.1185e-02, 1.6357e-02, -8.2092e-03, + 9.3262e-02, -7.9346e-02, -2.1301e-02, 1.0548e-03, 2.6718e-02, + -3.2349e-03, 9.4727e-02, -2.4918e-02, -5.5542e-03, 5.9204e-02, + 2.4399e-02, -9.6985e-02, -4.7668e-02, 6.7558e-03, -1.0597e-02, + -4.4739e-02, -1.3306e-02, -2.6443e-02, 5.0629e-02, 5.0392e-03, + 1.5549e-02, -1.6632e-02, -2.2964e-03, 5.2368e-02, 6.4941e-02, + 8.4412e-02, -6.3416e-02, -5.0537e-02, 4.8157e-02, -6.9336e-02, + -8.6212e-04, -1.1627e-01, -4.7943e-02, -1.5190e-02, 3.7415e-02, + -2.2430e-02, -3.0457e-02, 8.8196e-02, -2.6581e-02, 8.4534e-03, + -9.7961e-03, 4.3427e-02, 5.0446e-02, -8.2214e-02, -4.3976e-02, + -2.3193e-02, 6.0791e-02, 1.6357e-02, -6.8298e-02, -2.3895e-02, + -4.2816e-02, -1.3687e-02, 5.8929e-02, -1.0574e-02, 3.6560e-02, + -8.7891e-03, -3.1872e-03, 3.5370e-02, -2.9800e-02, -2.0485e-03, + 8.1406e-03, 1.9684e-02, 5.2216e-02, 1.5137e-02, -4.5471e-02, + -6.7444e-02, 1.6251e-02, 4.4495e-02, 4.0558e-02, -2.4445e-02, + -2.1790e-02, -6.0501e-03, 3.4210e-02, 4.8065e-02, 9.5886e-02, + -1.2589e-02, -5.8167e-02, -6.6467e-02, 3.1158e-02, -2.4628e-02, + 1.0902e-02, 5.6488e-02, 6.9763e-02, 2.7252e-02, -4.3304e-02, + 1.3428e-01, 7.2327e-03, -3.0807e-02, 4.0741e-02, -5.2032e-02, + -7.6294e-02, -8.7402e-02, -1.6678e-02, 4.8584e-02, -1.5306e-03, + 8.0719e-03, 1.9730e-02, -1.1078e-01, -3.7415e-02, 1.0144e-01, + -8.3237e-03, 9.0271e-02, 3.2623e-02, 1.0938e-01, 3.7140e-02, + 2.9648e-02, -7.9269e-03, -4.7699e-02, -2.3422e-02, 4.2999e-02, + -2.6901e-02, 2.6611e-02, 2.6657e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([2.0307, 1.9726, 2.0683, 2.0922, 2.0904, 2.1595, 2.1940, 2.0945, 2.1829, + 2.0632, 2.0069, 2.0619, 2.0249, 2.1153, 2.0952, 2.0311, 2.0945, 1.9873, + 2.0693, 1.3209, 1.9580, 2.0676, 2.0866, 2.1143, 2.0650, 2.1496, 2.0865, + 1.9528, 2.1317, 2.0063, 2.1316, 2.1745, 2.0471, 2.0919, 2.0855, 1.9931, + 2.0886, 2.1438, 1.9753, 2.0601, 2.0038, 2.0082, 2.1160, 2.1505, 2.2815, + 2.0785, 2.2053, 2.0123, 2.0239, 2.0069, 2.0493, 2.0066, 2.1710, 2.0697, + 2.2646, 2.1356, 2.0977, 2.0522, 2.0962, 2.0822, 2.1202, 2.0967, 2.1053, + 2.1302, 2.0645, 2.0926, 1.9923, 2.0041, 2.0004, 2.0571, 2.2438, 2.0124, + 2.0061, 1.9758, 2.1365, 2.1637, 2.0070, 2.0505, 2.1504, 2.0160, 2.0619, + 1.9575, 2.0646, 2.0421, 2.1013, 2.0887, 2.2091, 2.0112, 1.9810, 2.0051, + 2.0335, 2.1576, 2.1458, 2.0246, 1.9795, 2.0867, 2.2490, 2.0376, 2.1731, + 2.0989, 1.9751, 1.8038, 2.0467, 2.1660, 2.0282, 1.9610, 2.3143, 2.1453, + 2.0223, 2.0440, 1.9253, 2.2580, 2.0914, 2.1201, 2.0345, 1.9917, 1.9536, + 2.0809, 2.0473, 2.0467, 2.0435, 1.9806, 2.1231, 2.1840, 2.0553, 2.1431, + 1.9885, 2.3494, 2.0862, 2.0096, 2.0571, 2.0646, 2.0548, 2.0386, 2.2171, + 2.1465, 2.1068, 1.9592, 2.0894, 2.1193, 2.1046, 2.0338, 2.0214, 2.0104, + 1.9678, 2.0540, 2.1080, 2.1480, 2.1495, 2.0671, 2.0314, 2.2913, 2.2099, + 2.1134, 2.0601, 1.4066, 2.0700, 2.0636, 2.1496, 2.0385, 2.0866, 2.0064, + 2.1332, 1.8232, 2.0438, 2.2630, 2.2799, 2.0261, 2.1469, 2.0730, 2.0680, + 2.0978, 1.9948, 2.0685, 2.0162, 2.1138, 2.2317, 2.0896, 2.1131, 2.1128, + 2.2830, 2.1778, 1.9381, 1.9642, 2.1773, 2.2628, 1.9100, 0.7467, 2.0003, + 2.2190, 2.1667, 2.1351, 2.2362, 2.1190, 2.1051, 2.3376, 2.1806, 2.0938, + 0.9265, 1.9867, 2.1207, 2.0960, 2.2167, 2.1010, 2.0326, 2.1115, 2.1153, + 2.2205, 2.0719, 2.0205, 2.0097, 2.0012, 2.2113, 2.1965, 2.1255, 1.9719, + 2.1534, 2.0961, 2.1630, 2.1540, 2.2609, 2.1181, 2.0263, 2.1285, 1.9925, + 2.0507, 2.0578, 2.0307, 2.1078, 2.0729, 2.0448, 2.1018, 2.0279, 2.0320, + 1.9764, 2.0579, 2.1533, 2.0485, 2.0464, 2.0832, 2.0751, 2.2815, 2.0456, + 1.9951, 2.0517, 2.1280, 1.9985, 2.1785, 2.0958, 2.1668, 2.0606, 2.1180, + 2.0829, 2.0670, 1.9896, 2.0625, 2.0223, 1.9520, 2.1020, 1.9954, 2.0594, + 1.9704, 1.9320, 2.0810, 2.0772, 1.9758, 2.0288, 2.0715, 2.0910, 1.9785, + 2.0258, 2.0914, 2.0275, 2.0663, 2.1614, 2.0998, 1.9857, 2.1790, 2.1992, + 2.0393, 2.0261, 1.9987, 2.0452, 2.0599, 2.0318, 2.0960, 2.2425, 2.1013, + 2.1393, 2.0122, 2.1288, 2.1729, 2.0566, 2.1189, 2.0796, 2.0585, 2.1067, + 2.0435, 2.1635, 2.2792, 2.1824, 2.0506, 2.0928, 2.2066, 2.0765, 1.9179, + 2.0876, 2.1804, 2.1673, 2.1951, 2.2373, 1.9701, 1.9493, 2.0041, 1.9992, + 2.2302, 2.0406, 2.0838, 2.1606, 2.0498, 2.1176, 2.0158, 2.1376, 2.1164, + 1.2271, 1.7720, 2.1755, 2.0930, 2.1836, 2.0538, 2.0824, 2.0939, 2.1511, + 2.0509, 2.0580, 2.0198, 1.9866, 1.9962, 2.0523, 2.0944, 2.1456, 2.1053, + 2.0300, 2.0940, 2.3120, 1.9547, 2.0700, 2.1009, 2.0682, 2.0459, 2.0006, + 2.2175, 1.9863, 2.2981, 2.0458, 2.0225, 1.9890, 2.0384, 2.2434, 2.0457, + 2.1980, 2.0483, 2.0478, 2.0192, 2.1184, 2.0825, 2.2390, 2.1722, 1.9870, + 2.1458, 2.0283, 1.9633, 2.0250, 2.1574, 1.9837, 2.1228, 2.0989, 2.0857, + 2.0165, 1.9334, 2.0708, 1.9785, 2.0866, 2.0367, 2.0997, 2.0162, 2.0879, + 2.0782, 2.0978, 2.0613, 2.1383, 2.0852, 2.0643, 2.2999, 2.2209, 2.0422, + 2.2045, 2.0467, 2.1153, 2.3100, 2.0153, 2.2065, 2.0402, 2.0260, 2.0171, + 2.1041, 2.1454, 2.0819, 2.0804, 2.1649, 2.1010, 2.0213, 2.0817, 2.2861, + 1.5624, 2.4553, 2.0714, 2.0337, 2.0162, 2.1285, 2.2080, 2.0755, 2.0775, + 2.0191, 2.0924, 2.0059, 2.0281, 2.0560, 2.2036, 2.1356, 2.0119, 2.0982, + 1.9969, 2.2523, 2.0878, 2.0347, 2.0952, 2.0964, 2.0247, 2.0063, 1.9721, + 2.1730, 2.1007, 1.8965, 2.0519, 2.0051, 2.1024, 2.0860, 2.0207, 2.0678, + 2.0972, 2.0524, 2.2369, 2.1333, 2.0543, 2.2793, 1.9811, 2.1158, 2.1302, + 2.0558, 2.1161, 2.0195, 2.2593, 2.0156, 2.0946, 1.8880, 2.0206, 2.1074, + 2.2498, 2.0933, 2.0475, 2.0456, 2.0136, 1.9998, 2.2418, 2.1298, 2.1151, + 2.2087, 2.0441, 2.0162, 2.0065, 2.2207, 2.0723, 1.5891, 2.0869, 2.0881, + 2.0180, 2.0379, 1.9723, 2.1504, 2.0642, 2.2587, 2.0276, 2.0433, 2.0237, + 2.1352, 2.0410, 1.9913, 1.8999, 2.1760, 2.1910, 2.2422, 2.0412, 1.9200, + 2.0412, 2.0715, 2.0873, 2.1982, 2.0002, 2.1190, 1.9513, 2.0818, 2.1398, + 2.0467, 2.1935, 2.0150, 2.1526, 2.2373, 2.0407, 2.0075, 1.9397, 2.0824, + 1.9908, 2.0283, 2.0259, 2.0223, 2.1736, 1.9523, 1.9705, 2.2646, 2.0516, + 2.0430, 2.0424, 2.0742, 2.1556, 2.0510, 2.0165, 1.9642, 2.1213, 2.0721, + 2.0460, 2.0685, 2.2526, 2.0811, 2.0153, 2.0851, 2.0620, 2.0626, 2.1310, + 2.0389, 2.2499, 2.0059, 2.0056, 2.1145, 2.1155, 1.9914, 1.9843, 2.0977, + 2.0383, 1.9961, 2.0099, 1.8014, 2.0623, 2.1228, 2.1464, 2.0297, 2.0503, + 2.1106, 2.0956, 2.1274, 1.9793, 2.1071, 2.1342, 2.0167, 1.9905, 2.1575, + 2.0514, 2.0519, 2.1893, 2.0108, 2.0159, 2.0668, 2.0728, 2.1197, 2.1696, + 1.9989, 2.2020, 2.0833, 2.3113, 2.0717, 2.0181, 2.2653, 2.1990, 2.0044, + 2.0979, 1.9749, 2.0083, 2.0437, 2.1062, 2.0969, 1.9639, 1.9803, 2.0559, + 2.1292, 1.9763, 2.0597, 2.0988, 1.8016, 2.0659, 2.1028, 2.1028, 2.2098, + 1.9922, 2.1306, 2.1300, 2.1369, 2.0740, 2.0127, 2.0872, 2.0136, 2.1810, + 1.9943, 2.0467, 2.1332, 1.9401, 1.9783, 2.0138, 2.1217, 2.0784, 2.0202, + 2.0787, 2.0651, 2.0199, 2.2011, 2.1647, 2.0053, 1.9973, 2.1077, 2.1093, + 2.1253, 2.0120, 2.0643, 2.1226, 1.9848, 1.9670, 2.1012, 1.9518, 2.0922, + 2.0492, 2.2058, 2.2500, 2.2433, 2.2611, 1.9797, 2.0808, 2.1515, 2.1605, + 2.0636, 2.0649, 2.1260, 1.9303, 2.0484, 2.2103, 2.1352, 2.0834, 2.0316, + 2.1334, 2.0103, 2.1443, 2.0279, 2.1024, 2.1013, 2.2352, 2.0015, 2.1543, + 2.0345, 2.1090, 2.2991, 2.1148, 2.0197, 2.1404, 2.9376, 2.0328, 2.0857, + 2.0746, 2.0915, 2.0639, 2.0736, 2.0799, 2.2220, 2.2648, 1.9870, 2.1855, + 2.0254, 2.0939, 1.9822, 1.9375, 2.1553, 2.1249, 2.1436, 2.0041, 2.0035, + 2.0451, 1.9603, 2.1117, 2.2223, 2.1545, 2.0233, 2.1566, 1.9525, 1.9624, + 2.0093, 2.0608, 2.1441, 2.0819, 2.0554, 2.0715, 2.0770, 2.0988, 2.3386, + 2.0327, 2.0376, 2.1202, 2.1253, 2.0856, 2.0563, 1.7493, 2.0553, 1.9948, + 2.0059, 2.2029, 2.0470, 2.1513, 2.2328, 2.0887, 2.1264, 2.0788, 1.9652, + 2.0717, 2.2406, 2.0654, 1.9399, 2.0947, 2.1596, 2.1316, 2.0074, 2.1920, + 2.0630, 2.0547, 2.1203, 2.0279, 2.0755, 2.1415, 1.9674, 2.0430, 2.1068, + 2.3021, 2.1641, 2.0975, 2.0427, 2.1529, 2.0259, 2.0686, 1.9664, 2.1563, + 2.0207, 1.9886, 2.1628], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([-0.1634, 0.3904, -0.6572, 0.0778, -0.3645, 0.8040, 0.6369, -0.0867, + -0.3817, -0.1613, -0.3118, 0.5072, -0.3457, -0.5480, -0.3993, -0.0154, + 0.1044, 0.0733, -0.2292, -1.6394, 0.1039, 0.4882, 0.6659, -0.5731, + 0.4084, 0.1744, -0.1819, -0.0776, -0.3350, 0.4749, 0.0453, 0.3498, + 0.0599, -0.5911, 0.0388, -0.2646, -0.3983, -0.4301, 0.5977, -0.0765, + -0.3520, -0.0992, 0.5881, 0.6972, -0.6566, -0.0760, -0.6735, -0.2202, + 0.4286, -0.1231, -0.5646, -0.3618, 0.5786, 0.4331, -0.7112, 1.5154, + 0.4482, 0.2446, -0.1697, 0.3791, 0.4552, 0.2399, 0.4756, -0.8407, + 0.2955, 0.6822, 0.5503, -0.2047, -0.2023, -1.5053, 0.9010, 0.5263, + 0.8808, 0.3741, -0.7068, -0.3806, 0.2776, 0.0435, 0.4804, -0.4777, + -0.0481, -0.3290, -0.1130, 0.5107, 0.1467, -0.3193, 0.7629, 0.2585, + -0.1659, -0.3165, -0.4949, 0.4801, 0.5671, -0.1125, -0.7146, 0.5034, + 0.7717, 0.4830, -0.5553, 0.4703, -0.1743, -0.2755, 0.3085, -0.4850, + -0.2644, 0.5301, 0.5706, 0.4006, 0.5181, -0.6555, 0.4549, 0.8119, + -0.8537, 0.5428, 0.4085, -0.2830, 0.6755, 0.5459, 0.1751, 0.0993, + 0.0719, -0.1542, -0.3314, -0.6055, -0.3381, -0.4999, -0.0787, -0.9931, + -0.0120, 0.3317, -0.3046, -0.6244, -0.6682, -0.2793, 0.4557, 0.4783, + 0.4025, -0.2780, -0.4007, 0.2884, -0.5973, 0.3956, 0.0334, 0.6921, + -0.2348, -0.1816, 0.3306, 0.4644, -0.4059, 0.2303, 0.3054, -0.5736, + 0.3553, 0.0930, -0.6138, 0.3553, -0.4840, -0.3348, -0.5161, 0.3380, + -0.5832, 0.0614, 0.5337, 0.1841, -0.1116, 0.5732, -0.9823, -0.0027, + -0.3231, 0.5131, 0.4541, -0.0448, -0.4895, 0.0861, -0.0614, 0.4743, + 0.6420, -0.0692, -0.0172, 0.6431, -0.6864, -0.6511, 0.2636, 0.1945, + 0.7223, 1.1545, -0.2667, 0.9053, -0.0978, -0.7616, -0.9102, -0.3463, + 0.6792, 0.6667, 0.1881, 0.8544, 0.5431, 0.2004, 1.1457, 0.4141, + -0.9264, -0.1053, 0.6978, 0.6861, 0.3049, 0.3733, 0.8130, 0.7921, + -0.4449, -0.0137, 0.2515, -0.5731, -0.6839, 0.7414, 0.0611, 0.0709, + -0.2592, -0.0999, -0.5076, -0.6549, 0.9194, 0.4722, -0.4555, 0.5878, + -0.0519, 0.3215, -0.2202, 0.0980, 0.5063, -0.3292, 0.2484, 0.5109, + -0.2138, 0.2214, -0.2958, -0.2670, 0.5005, 0.6784, 0.2350, 0.7536, + 0.3887, 0.7270, 0.0442, 0.0527, 0.0210, -0.5180, 0.0711, -0.5699, + -0.3708, 0.6118, -0.6395, 0.2085, 0.0284, -0.6505, 0.2182, 0.3107, + 0.3407, -0.2664, -0.3857, -0.2913, -0.6433, 0.1765, 0.1000, -0.4661, + -0.3760, -0.0993, 0.2911, 0.3472, 0.3607, 0.1198, -0.0327, 0.1065, + 0.4254, -0.4536, -0.4919, -0.3662, -0.0898, 0.5787, 0.5154, 0.0449, + 0.1505, -0.7936, -0.4062, 0.4328, -0.2997, 0.4559, -0.2560, -0.0027, + 0.4216, -0.8116, -0.2374, 0.4854, 0.5336, -0.3588, 0.4325, -0.8042, + -0.4591, -0.4535, 0.6669, 0.6368, -0.5369, 0.5936, -0.4235, -0.6996, + 0.0087, 0.2624, 0.5607, 0.6638, -0.6091, 0.2427, 0.4354, 0.0214, + -0.2835, 0.0946, -0.1392, 0.5883, 0.1297, 0.4315, 0.7669, 0.1802, + -0.5833, -0.1598, 0.4056, -0.2835, 2.4506, -0.0757, 0.6105, -0.2125, + 0.6271, -0.2360, 0.4260, -0.4225, -0.4409, -0.3974, -0.0852, 0.5301, + -0.5151, 0.3451, 0.6331, -0.0027, 0.3818, -0.2817, -0.4590, 0.6107, + -0.6834, 0.0148, -0.3686, -0.6853, 0.1435, -0.0316, 0.7759, 0.4233, + -0.0109, 0.9338, -0.3701, 0.0630, -0.1912, -0.1769, -0.5478, -0.2753, + -0.8406, -0.5571, 0.0083, -0.0551, 0.2625, 0.2223, -0.6563, 0.7482, + 0.0779, 0.5153, -0.1369, 0.1232, -0.0049, -0.9306, -0.2408, -0.5681, + 0.6130, 0.5066, 0.3246, 0.2921, -0.5615, -0.5330, -0.3602, 0.1486, + 0.1352, -0.4357, -0.2049, 0.5760, 0.2577, -0.4218, 0.6486, -0.2831, + 0.6872, 1.1954, -0.8105, 0.3491, -0.5672, -0.1094, -0.3430, -0.7002, + 0.6166, 0.5148, -0.4035, -0.2608, 0.4024, -0.5864, -0.2557, 0.2989, + 0.0739, 0.7871, 0.4411, -0.1270, 0.5860, 0.8288, 0.7889, -1.7029, + -0.2622, -0.2401, 0.4067, 0.6801, -0.5707, -0.6888, 0.3839, -0.2854, + -0.6706, 0.2172, 0.2885, -0.1456, 0.4580, 0.3836, -0.4238, -0.6184, + -0.0139, 0.5807, 0.6330, -0.3544, -0.4924, 0.5683, 0.4040, -0.2675, + 0.1704, 0.3875, 0.3749, -1.7340, 0.0225, 0.1601, -0.1486, 0.8069, + 0.4392, -0.5474, -0.5232, -0.0288, -0.5448, 0.3483, -0.2150, 0.6120, + -0.0751, -0.5416, -0.7117, 0.3526, -0.3994, -0.0146, -1.1012, -0.0727, + -0.4971, 0.3637, 0.2655, 0.4278, 0.6634, -0.0713, -0.0099, -0.1449, + -0.4338, -0.1937, -0.6498, -0.7250, 0.2271, -0.6612, 0.1220, 0.3243, + 0.1165, 0.2658, 0.6581, -0.2702, 0.1820, -0.8709, 0.3657, 0.6121, + 0.2829, -0.6148, 0.3372, 0.9084, 0.3225, -0.3238, -0.5079, -0.4441, + -0.5670, 0.3065, -0.0379, 0.6865, -0.7325, -0.6104, -0.6129, 0.3126, + 0.0805, -0.4352, 0.5677, 0.5351, -0.2273, -0.5317, -0.4505, -0.1210, + -0.8835, -0.3817, 0.8097, -0.0851, 1.0992, -0.4803, 0.3294, -0.1898, + 0.3222, 0.3128, 0.1108, 0.1788, -0.4207, -0.2907, 0.8966, -0.0309, + 0.4949, 0.6861, -0.4014, -0.2843, -0.1982, -0.0093, 0.6738, -0.6007, + 0.3616, -0.1017, 0.3762, -0.3327, 0.1810, 0.3345, -1.0147, 0.9662, + -0.1385, 0.2843, 0.4133, -0.6416, -0.2800, 0.3751, -0.6579, -0.4096, + 0.0644, 0.3496, 0.6392, -0.0599, -0.2617, -0.1823, -0.6173, 0.5599, + 0.2138, -0.0549, 0.2094, 0.6796, -0.5302, -0.1417, -0.1526, 0.5912, + 0.6554, -0.2012, -0.3520, 0.2011, 0.4550, -0.4031, 0.2536, 0.7203, + -0.5432, -0.1476, -0.7711, 0.6847, 0.2413, 0.2893, 0.7736, -0.4954, + 0.5358, -0.4769, -0.3864, -0.5988, 0.7091, 0.6538, -0.0866, -0.1456, + 0.8250, -0.0113, -0.4833, -0.0904, -0.2416, 0.2671, 0.7442, 0.7193, + -0.2374, -0.0454, -0.0685, -0.5255, 0.1050, -0.1814, 0.0709, -0.4543, + 0.4718, 0.5187, -0.6497, 0.8609, -0.3123, 0.3958, 0.0808, -0.2275, + 0.0566, 0.4994, -0.0844, 0.3117, 0.5139, -0.1189, -0.8420, 0.7102, + 0.1490, -0.1751, -0.0938, 0.5447, 0.7600, 0.1141, 0.0118, -0.0166, + -0.4014, 0.5753, 0.3237, -0.0531, -0.2725, 0.4342, 0.4947, -0.5230, + 0.7061, -0.5629, -0.5246, 0.1271, -0.6878, 0.3458, -0.0280, 0.2749, + 0.3727, 0.7445, 0.7685, 0.2294, 0.6002, -0.2364, -0.0361, 0.8746, + 0.5983, 0.7713, -0.0492, 0.0072, 0.3269, 0.4310, 0.1407, -0.4384, + -0.1729, -0.3326, 0.4758, -0.4218, 0.8020, -0.0350, -0.7215, 0.4019, + 0.9356, 0.5021, 0.5510, -0.3667, 0.4088, 0.5787, 0.5401, 0.5139, + 0.2937, 2.5777, -0.4828, -0.2321, -0.1922, 0.7759, 0.4836, -0.4644, + 0.1484, -0.9385, -0.7380, -0.1577, -0.8349, -0.3073, -0.5824, 0.5791, + -0.0707, 0.2943, -0.5056, -0.0182, 0.1451, 0.0616, 0.4046, 0.0973, + 0.4898, 0.7257, -0.2495, -0.0735, -0.8476, 0.2016, -0.0924, -0.2625, + -0.2927, -0.5473, -0.0670, -0.2398, -0.1042, 0.2342, -0.6123, -0.7766, + -0.2891, -0.1510, -0.3887, -0.8447, 0.3705, -0.3663, 0.3751, -0.3333, + 0.2739, -0.1393, -0.5013, -0.0648, 0.8232, 1.0130, -0.2885, 0.4336, + 0.4970, -0.2387, 0.1380, -0.7007, -0.7673, -0.3192, 0.2573, -0.4747, + -0.4189, -0.1628, -0.6715, 0.0745, 0.3072, 0.8509, -0.2102, -0.5690, + -0.4548, -0.0489, -0.0070, -0.0870, -0.2811, -0.6708, 0.6954, 0.4769, + -0.4482, -0.5612, -0.2993, 0.9320, 0.3225, -0.2130, 0.4533, 0.4963], + device='cuda:1', requires_grad=True) +torch.Size([2304, 768]) +Parameter containing: +tensor([[-0.0083, -0.0071, -0.0021, ..., -0.0010, 0.0090, 0.0052], + [ 0.0555, -0.0439, -0.0148, ..., 0.0150, -0.0063, 0.0137], + [ 0.0143, 0.0434, 0.0090, ..., -0.0173, 0.0094, -0.0070], + ..., + [-0.0136, -0.0252, 0.0119, ..., -0.0044, 0.0303, 0.0039], + [ 0.0051, -0.0150, -0.0075, ..., -0.0273, -0.0061, -0.0200], + [ 0.0049, -0.0019, 0.0238, ..., 0.0028, -0.0135, -0.0199]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([2304]) +Parameter containing: +tensor([ 1.1367, -0.1210, 0.1425, ..., -0.0217, -0.0073, -0.0175], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 768]) +Parameter containing: +tensor([[ 0.0033, 0.0186, -0.0174, ..., -0.0052, 0.0126, 0.0130], + [-0.0088, 0.0030, 0.0068, ..., 0.0046, -0.0482, 0.0263], + [-0.0032, 0.0138, -0.0109, ..., 0.0132, 0.0237, 0.0135], + ..., + [ 0.0033, -0.0081, -0.0091, ..., 0.0204, -0.0066, 0.0058], + [-0.0299, 0.0074, -0.0033, ..., -0.0114, 0.0144, 0.0044], + [ 0.0031, 0.0146, -0.0029, ..., 0.0022, -0.0339, -0.0151]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([-4.5128e-03, 1.0891e-03, -3.3478e-02, -1.2718e-02, -6.7078e-02, + 6.7139e-02, 6.8726e-02, -1.5411e-02, 4.0192e-02, 3.3264e-02, + 2.8030e-02, -1.7822e-02, -5.0354e-02, -3.2593e-02, 8.6975e-03, + 1.7487e-02, -1.7090e-03, 4.8798e-02, -1.4435e-02, -4.2175e-02, + 2.5803e-02, 2.9007e-02, 2.6627e-03, -1.9150e-02, 3.9734e-02, + -5.0354e-02, -2.0618e-03, 2.3621e-02, 1.5732e-02, 2.6825e-02, + 1.3304e-04, 1.1345e-02, -7.6218e-03, -5.8632e-03, 2.7752e-03, + 3.4485e-02, -7.0251e-02, -5.3925e-02, -1.8692e-02, 4.6722e-02, + -7.8659e-03, -8.9264e-03, 2.0721e-02, -5.7182e-03, 1.3725e-02, + -4.4556e-03, 7.2784e-03, -1.8707e-02, -3.1403e-02, -2.4948e-03, + 1.8265e-02, 2.4246e-02, -1.9287e-02, 4.3732e-02, 1.9684e-02, + 4.5807e-02, -4.0497e-02, -1.6037e-02, 5.4291e-02, -1.3718e-02, + 8.4763e-03, -8.7585e-03, 1.9363e-02, 2.0309e-02, -3.0838e-02, + -1.6861e-02, 3.9398e-02, 2.0432e-02, -3.1796e-03, -1.1469e-01, + -2.4979e-02, -2.7313e-02, -2.8641e-02, -1.3489e-02, -1.9913e-02, + 1.3153e-02, 3.0518e-03, -2.1744e-03, 2.5864e-02, 6.5804e-03, + 2.2602e-03, 1.9440e-02, -2.1469e-02, -3.9215e-03, -1.3847e-02, + 1.6678e-02, -1.3870e-02, 3.2616e-03, -8.3466e-03, -2.1759e-02, + 2.8610e-02, 3.9215e-02, 3.1799e-02, 6.3629e-03, 3.6030e-03, + 2.7679e-02, 4.0527e-02, 5.1346e-03, -8.5144e-03, 1.0765e-02, + 6.1127e-02, 4.1870e-02, 6.3965e-02, -2.3132e-02, -4.7226e-03, + 6.6650e-02, 1.6022e-02, 2.7573e-02, 8.3084e-03, 1.3275e-02, + 2.3880e-02, 4.5654e-02, 2.8748e-02, -2.9831e-03, 4.4518e-03, + -2.8244e-02, 4.4220e-02, -5.7716e-03, -4.3762e-02, 4.9973e-03, + -1.5388e-02, -1.6312e-02, -6.7688e-02, -3.3905e-02, 5.9662e-03, + -4.7882e-02, -1.3046e-02, -3.3630e-02, -9.1324e-03, 3.1097e-02, + -1.4183e-02, 6.0081e-03, -2.5864e-02, -7.4768e-03, 2.9739e-02, + 3.4393e-02, 2.4811e-02, 2.9816e-02, -1.0612e-02, 1.4702e-02, + -4.1229e-02, 1.1559e-03, 1.6479e-02, -2.8671e-02, 1.7166e-02, + -6.2103e-02, -1.5732e-02, -6.9618e-03, 1.0216e-02, 6.4880e-02, + 3.8422e-02, 1.3800e-03, 1.3947e-02, 4.2786e-02, -8.7738e-03, + 2.3666e-02, -3.6224e-02, 6.2866e-03, 2.3880e-02, -4.7668e-02, + 1.2398e-02, -2.8549e-02, 5.6854e-02, -1.1093e-02, 4.1809e-03, + 1.5900e-02, -7.0923e-02, -1.3222e-02, -2.2984e-03, 1.9363e-02, + 1.1263e-03, 5.5176e-02, 1.3657e-03, -4.2725e-02, 2.6398e-02, + -6.1798e-02, 5.4810e-02, -6.8909e-02, -1.5808e-02, -2.3163e-02, + -2.2049e-02, -4.7668e-02, -9.6436e-03, 5.6000e-02, -1.8188e-02, + 1.4977e-02, -2.4994e-02, -3.9642e-02, 3.3295e-02, -2.5375e-02, + 1.7929e-02, 1.1589e-02, 1.4969e-02, 4.7546e-02, -3.4332e-02, + 1.5305e-02, 3.7201e-02, -3.9032e-02, -6.9641e-02, 4.6417e-02, + -1.0063e-02, -5.7144e-03, 7.8125e-03, 2.7008e-02, 2.4460e-02, + 6.2317e-02, 2.3178e-02, 2.5406e-02, 2.8976e-02, 2.5299e-02, + 1.0063e-02, 3.1708e-02, -1.2169e-02, 4.7729e-02, 1.2070e-02, + -2.1988e-02, -4.1870e-02, 4.9103e-02, -6.7749e-03, -3.1281e-02, + 1.7853e-02, 1.0002e-02, -6.2988e-02, 4.6509e-02, -3.8671e-04, + -6.1859e-02, -4.1595e-02, -9.8038e-03, 2.6657e-02, 1.9226e-02, + -1.1948e-02, 2.3865e-02, -2.6520e-02, 1.3962e-03, -2.2705e-02, + -3.2440e-02, -1.9623e-02, 1.5236e-02, -8.2779e-03, 2.0691e-02, + 1.8906e-02, 3.3112e-02, -9.6970e-03, -2.9480e-02, 1.0132e-02, + -3.9597e-03, 2.8870e-02, 1.1482e-02, -6.5346e-03, -1.7975e-02, + 6.3858e-03, 1.6129e-02, -1.6724e-02, -3.8696e-02, -1.1530e-03, + -4.0863e-02, -9.4147e-03, -1.8814e-02, -2.6245e-02, -4.2694e-02, + -7.2327e-02, 2.4548e-03, 2.0828e-02, 2.7802e-02, -1.8738e-02, + -1.2159e-03, 1.9043e-02, 3.7689e-02, -1.7334e-02, 3.0060e-02, + 1.4259e-02, -1.1581e-02, -6.1531e-03, -2.6794e-02, -4.4403e-02, + 4.0894e-02, 4.2572e-02, -2.1118e-02, 4.7028e-02, -2.6306e-02, + 1.8692e-02, -7.3303e-02, -3.6011e-02, -2.3788e-02, 6.3362e-03, + 7.7209e-03, 1.2604e-02, 4.7340e-03, 5.2307e-02, -3.3630e-02, + 3.0457e-02, -1.6266e-02, 2.8824e-02, 1.3283e-02, 1.4275e-02, + 9.7961e-03, 2.0966e-02, -1.2543e-02, -3.7262e-02, 1.6418e-02, + -2.0790e-03, -7.4997e-03, -2.3788e-02, 3.4912e-02, -1.4687e-02, + 1.8326e-02, 3.1006e-02, 6.1569e-03, 3.8177e-02, 5.0259e-04, + -2.3861e-03, -5.2299e-03, -4.9133e-03, 5.3101e-03, 2.4094e-02, + -4.6806e-03, 3.0090e-02, 1.5926e-03, 1.7303e-02, -1.1276e-02, + -4.8027e-03, -6.8245e-03, -2.7206e-02, 4.4342e-02, -1.5527e-01, + -6.2744e-02, 5.3139e-03, -4.3526e-03, 3.7323e-02, 1.3878e-02, + -7.0610e-03, -3.3478e-02, 1.8244e-03, -3.2501e-02, -5.3528e-02, + -4.8645e-02, -4.3755e-03, -1.1063e-02, 3.5114e-03, -4.6387e-03, + 1.9562e-02, 2.8488e-02, -3.0243e-02, 1.0025e-02, -1.1663e-03, + 4.0771e-02, -8.8501e-03, 4.4647e-02, -3.5858e-02, 8.3160e-04, + -2.9831e-02, 3.0079e-03, 7.6660e-02, 9.9548e-02, -2.1179e-02, + -9.8572e-03, 6.5231e-04, -4.4128e-02, 2.3819e-02, 3.6888e-03, + -1.6006e-02, 1.4183e-02, -3.2013e-02, -1.5976e-02, 1.1642e-02, + -2.1935e-03, 1.7029e-02, 2.9251e-02, 1.3626e-02, 1.7410e-02, + 4.1077e-02, -6.7749e-02, -1.6632e-02, -2.3499e-02, 1.6891e-02, + 4.2000e-03, 1.7807e-02, 5.8197e-02, -4.5959e-02, -3.3722e-02, + -4.2496e-03, 3.6102e-02, -6.9580e-02, -2.7054e-02, -3.7903e-02, + -1.0242e-03, 1.3550e-02, 3.8177e-02, 1.6830e-02, -3.2187e-05, + -2.9358e-02, -1.7502e-02, 2.5772e-02, 4.2343e-03, 3.1281e-02, + 2.5223e-02, 2.3315e-02, 4.6356e-02, -1.2512e-02, -4.5837e-02, + -2.6962e-02, 5.2765e-02, -4.8103e-03, 2.2003e-02, 3.4356e-04, + -6.4392e-03, -6.1893e-04, -1.8494e-02, -2.4506e-02, 7.8344e-04, + -1.0498e-02, -2.3361e-02, -2.0096e-02, -2.3056e-02, 2.1317e-02, + -1.8481e-01, 3.2349e-02, 1.8555e-02, 4.7546e-02, -2.4323e-02, + -1.4732e-02, -1.5671e-02, 8.3237e-03, -3.6041e-02, -1.2108e-02, + 2.9030e-03, 3.0716e-02, -9.8648e-03, 3.7292e-02, 2.7710e-02, + -6.7566e-02, 3.8910e-02, 2.6230e-02, 3.8422e-02, 4.9011e-02, + -3.3150e-03, 4.1809e-03, 5.4588e-03, 5.0079e-02, -3.3508e-02, + 1.4694e-02, -7.9956e-03, 2.1408e-02, -1.5918e-01, 3.6438e-02, + -3.8239e-02, -4.7455e-03, 2.0096e-02, 2.3666e-02, -1.4549e-02, + -5.8556e-03, 2.7847e-02, -6.0303e-02, 1.0347e-03, 1.9241e-02, + 1.4847e-02, 1.0292e-02, 2.9163e-03, 3.0258e-02, -6.1615e-02, + 1.5434e-02, 1.9226e-02, -1.7914e-02, 1.5945e-02, -3.0228e-02, + -1.3977e-02, -7.9203e-04, -6.0852e-02, 2.8381e-02, 2.0416e-02, + -3.9154e-02, -4.3396e-02, -2.5238e-02, 1.8539e-02, -2.5787e-02, + 5.3528e-02, 3.0365e-03, 1.9028e-02, 1.6724e-02, 6.2347e-02, + -8.5373e-03, 3.2593e-02, -2.9129e-02, 2.1988e-02, -3.9215e-02, + 2.8473e-02, -9.6130e-03, -1.7288e-02, -1.4145e-02, -1.7014e-02, + 2.1423e-02, 1.2009e-02, -4.2877e-02, 5.3902e-03, -7.6477e-02, + 2.2354e-03, 5.6343e-03, 1.1192e-02, -5.4413e-02, 2.8519e-02, + 5.8022e-03, -1.9897e-02, -2.0561e-03, 6.8474e-04, 2.5528e-02, + 1.5205e-02, 2.7374e-02, 4.5074e-02, -3.3783e-02, -8.5815e-02, + -8.3237e-03, 7.4768e-03, -1.8661e-02, -1.6632e-02, 3.5461e-02, + -3.4943e-02, 3.0994e-06, 1.9165e-02, 7.0610e-03, -4.0588e-02, + 9.1457e-04, -3.0479e-03, -7.0801e-03, 8.1558e-03, -2.4689e-02, + 8.8272e-03, -1.7319e-02, -2.9465e-02, 3.0746e-02, -2.2110e-02, + 6.3110e-02, -4.1626e-02, 1.1772e-02, -4.8615e-02, 4.4708e-02, + 1.3359e-02, -3.3569e-02, -2.0752e-02, 7.0839e-03, 2.4223e-03, + -4.3549e-02, 3.9551e-02, -2.1988e-02, -1.8936e-02, 1.8784e-02, + 1.0193e-02, 1.0780e-02, 4.6616e-03, 3.2104e-02, 3.0472e-02, + -9.6130e-02, -7.0381e-03, -2.5883e-03, -5.7335e-03, -4.1382e-02, + -1.6769e-02, 6.8542e-02, -1.2489e-02, -3.9902e-03, 1.3103e-03, + -1.9669e-02, 5.8403e-03, -2.5345e-02, -3.3539e-02, 4.8340e-02, + 4.8187e-02, 4.1718e-02, 2.8198e-02, 3.8879e-02, -1.8631e-02, + 4.6814e-02, 1.6891e-02, -9.8953e-03, 4.2084e-02, 6.9122e-03, + 4.2725e-02, -6.0501e-03, 4.5624e-03, 1.1833e-02, -5.6824e-02, + 2.8095e-03, 2.9083e-02, 1.3138e-02, 1.4244e-02, 6.1798e-03, + 3.9795e-02, -2.0386e-02, 2.1988e-02, 5.4169e-02, 4.3373e-03, + -2.5040e-02, -3.0088e-04, 1.6937e-02, 7.5340e-04, 3.2135e-02, + -4.7913e-02, -2.1027e-02, -6.6040e-02, 4.8103e-03, -1.8738e-02, + -3.2158e-03, 2.6688e-02, -2.9816e-02, -1.7090e-02, 9.7198e-03, + 1.9272e-02, -5.0011e-03, -4.9103e-02, 1.7654e-02, 2.5436e-02, + 1.2337e-02, 4.6387e-03, 2.5879e-02, 2.0157e-02, 1.0941e-02, + -2.4048e-02, -6.4850e-03, 1.8524e-02, -2.9507e-03, -2.2602e-03, + 4.8157e-02, 1.4801e-02, -1.5381e-02, 1.4893e-02, 5.7697e-04, + -4.5807e-02, 9.9487e-03, -1.2199e-02, 5.0140e-02, -1.4999e-02, + 5.7709e-02, -2.1133e-02, 1.0757e-02, 2.0966e-02, 3.1281e-02, + -3.9024e-03, 4.7989e-03, 5.2704e-02, 2.0828e-02, -1.0963e-02, + -1.3313e-02, -2.4689e-02, -6.2927e-02, 5.5275e-03, 5.9128e-04, + 2.9037e-02, -4.2847e-02, 1.0155e-02, 5.3329e-03, 1.4275e-02, + 2.9663e-02, -2.4902e-02, 1.9085e-04, 3.0334e-02, 4.3396e-02, + 1.7212e-02, -4.9927e-02, -3.1921e-02, 2.3834e-02, 8.0505e-02, + -1.4977e-02, -2.0355e-02, -2.7008e-02, -2.5299e-02, -4.6272e-03, + 6.6772e-02, 1.6083e-02, -2.0142e-03, 2.4948e-02, -1.7349e-02, + -4.0398e-03, -1.9791e-02, 3.9917e-02, -1.6754e-02, 1.6830e-02, + 2.8000e-03, -1.3115e-02, -2.3819e-02, -9.0866e-03, 1.6571e-02, + -1.1642e-02, 1.4746e-01, -4.8126e-02, -2.1484e-02, -7.6637e-03, + 5.6030e-02, -3.9917e-02, 1.5289e-02, 1.1955e-02, 1.0445e-02, + 1.8097e-02, -5.3894e-02, -6.5651e-03, 1.0391e-02, 2.6566e-02, + 5.8350e-02, 2.1637e-02, 3.3936e-02, 1.3794e-02, 2.1118e-02, + -1.4984e-02, 1.7990e-02, 5.3619e-02, -1.8127e-02, 6.2378e-02, + -2.3651e-02, -8.1177e-03, 1.4168e-02, -3.0956e-03, 5.5237e-03, + 3.3783e-02, 2.7252e-02, 2.4902e-02, -2.5757e-02, -1.6632e-02, + -4.6021e-02, -2.4963e-02, 6.0028e-02, 2.2552e-02, -1.0608e-01, + -1.7654e-02, 1.4448e-03, -3.8483e-02, -3.5461e-02, -4.3976e-02, + -9.4376e-03, 5.6549e-02, -4.8645e-02, -1.4091e-02, 6.5002e-03, + -1.1063e-02, -2.4597e-02, 3.9886e-02, 1.5762e-02, 3.9307e-02, + 1.3527e-02, 2.9892e-02, -1.1322e-02, 1.5945e-02, -2.3911e-02, + 1.8356e-02, -1.0468e-02, -1.6129e-02, -8.1863e-03, 1.3138e-02, + 3.9581e-02, -5.4169e-02, 4.7333e-02, 3.4821e-02, -1.4366e-02, + 1.0414e-03, -1.4893e-02, -5.1544e-02, 3.5156e-02, 3.6072e-02, + -1.8501e-03, 8.4000e-03, -1.7441e-02, -9.5215e-03, -1.2230e-02, + -1.0429e-02, 1.2161e-02, -1.2169e-02, -2.6215e-02, -9.1374e-05, + -8.3847e-03, 1.2932e-02, 3.9246e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([1.4201, 1.5553, 1.4466, 1.4176, 1.4067, 1.4397, 1.3864, 1.3708, 1.4152, + 1.3614, 1.4378, 1.3843, 1.3444, 1.4626, 1.3758, 1.4693, 1.4095, 1.5620, + 1.4866, 0.4077, 1.4047, 1.3871, 1.4473, 1.4040, 1.5196, 1.4607, 1.4180, + 1.3462, 1.5646, 1.3797, 1.4029, 1.3888, 1.4918, 1.4661, 1.4651, 1.4938, + 1.3918, 1.4911, 1.4575, 1.4223, 1.4761, 1.4732, 1.4389, 1.4287, 1.4739, + 1.3806, 1.3968, 1.4439, 1.3657, 1.4691, 1.4194, 1.4101, 1.4352, 1.4645, + 1.4784, 1.4505, 1.3830, 1.6047, 1.4694, 1.4865, 1.4252, 1.3965, 1.4197, + 1.3777, 1.3788, 1.4218, 1.3675, 1.4547, 1.4370, 1.6006, 1.4216, 1.4492, + 2.2403, 1.3783, 1.4818, 1.4228, 1.4079, 1.4081, 1.5440, 1.5037, 1.3940, + 1.4326, 1.3924, 1.4796, 1.4971, 1.3687, 1.5182, 1.3814, 1.3834, 1.4392, + 1.4455, 1.4210, 1.3747, 1.4317, 1.4766, 1.3185, 1.4140, 1.3892, 1.3706, + 1.5666, 1.4581, 1.3999, 1.3989, 1.3766, 1.4173, 1.4575, 1.4027, 1.4818, + 1.3969, 1.4545, 1.4167, 1.4052, 1.2946, 1.3471, 1.4722, 1.3740, 1.4010, + 1.3432, 1.4888, 1.4517, 1.4854, 1.4591, 1.4632, 1.4699, 1.3971, 1.4651, + 1.4098, 1.4119, 1.4666, 1.4620, 1.4262, 1.3439, 1.3321, 1.4214, 1.4017, + 1.4997, 1.5333, 1.4508, 1.4351, 1.3742, 1.4386, 1.4162, 1.4854, 1.4447, + 1.3915, 1.3794, 1.4169, 1.4852, 1.4739, 1.4276, 1.3996, 1.3800, 1.4374, + 1.4500, 1.3956, 1.1616, 1.4410, 1.5182, 1.4307, 1.4148, 1.4508, 1.3891, + 1.4194, 1.5416, 1.4519, 1.4224, 1.4381, 1.4547, 1.3615, 1.4564, 1.4398, + 1.3899, 1.3217, 1.4564, 1.5274, 1.4199, 1.4670, 1.5066, 1.5204, 1.3846, + 1.3979, 1.4862, 1.4596, 1.4462, 1.5558, 1.4122, 1.3897, 1.8632, 1.5444, + 1.5091, 1.4304, 1.3688, 1.4861, 1.4807, 1.4606, 1.4240, 1.4124, 1.4525, + 3.8975, 1.3556, 1.2548, 1.4661, 1.4985, 1.4950, 1.4275, 1.4643, 1.3548, + 1.3323, 1.4001, 1.4308, 1.4286, 1.3500, 1.3509, 1.3783, 1.4309, 1.5165, + 1.4920, 1.3666, 1.3648, 1.4776, 1.4400, 1.4439, 1.4176, 1.3889, 1.4822, + 1.3767, 1.4823, 1.5177, 1.3962, 1.4796, 1.4227, 1.4678, 1.4844, 1.4738, + 1.4363, 1.4863, 1.3920, 1.3446, 1.4588, 1.4765, 1.4450, 1.3699, 1.4560, + 1.4734, 1.5286, 1.4649, 1.4107, 1.4684, 1.4413, 1.4883, 1.3877, 1.4846, + 1.3845, 1.3414, 1.5221, 1.4007, 1.4440, 1.4092, 1.3943, 1.3575, 1.4469, + 1.3563, 1.3385, 1.4539, 1.4622, 1.4092, 1.4660, 1.4254, 1.4688, 1.3586, + 1.4422, 1.4098, 1.5499, 1.3933, 1.4258, 1.2855, 1.4232, 1.3333, 1.4687, + 1.3916, 1.4269, 1.5470, 1.5170, 1.4956, 1.5049, 1.4062, 1.4651, 1.5575, + 1.4794, 1.3587, 1.4639, 1.5001, 1.5203, 1.3703, 1.4702, 1.3974, 1.4340, + 1.4630, 1.5828, 1.4600, 1.4888, 1.4105, 1.5150, 1.4381, 1.3933, 1.3613, + 1.3687, 1.3838, 1.3728, 1.4708, 1.4796, 1.4643, 1.3512, 1.3890, 1.5210, + 1.5169, 1.3263, 1.3799, 1.4009, 1.5255, 1.4198, 1.4169, 1.3880, 1.3731, + 0.8802, 1.2407, 1.5166, 1.4331, 1.4277, 1.4972, 1.3723, 1.4263, 1.3627, + 1.3381, 1.4444, 1.3821, 1.3215, 1.3868, 1.3506, 1.4783, 1.4441, 1.3980, + 1.4687, 1.4535, 1.5312, 1.4153, 1.4329, 1.4587, 1.4845, 1.4896, 1.4546, + 1.4669, 1.3970, 1.6142, 1.5081, 1.4518, 1.4433, 1.4726, 1.5298, 1.4506, + 1.4577, 1.3823, 1.4399, 1.3515, 1.5149, 1.4061, 1.3733, 1.4142, 1.4033, + 1.4039, 1.4585, 1.5243, 1.2887, 1.4220, 1.3885, 1.3685, 1.5446, 1.4967, + 1.3723, 1.3086, 1.4316, 1.5888, 1.4014, 1.4572, 1.4306, 1.3706, 1.3952, + 1.4537, 1.3702, 1.3470, 1.4911, 1.3435, 1.4427, 1.3334, 1.4723, 1.4442, + 1.4298, 1.4902, 1.5190, 1.5378, 1.3713, 1.4796, 1.3502, 1.3679, 1.4412, + 1.4243, 1.4387, 1.4598, 1.4295, 1.3602, 1.4251, 1.3908, 1.4348, 1.3431, + 1.2924, 2.2007, 1.4737, 1.5134, 1.4332, 1.4222, 1.4245, 1.4097, 1.4176, + 1.4686, 1.4317, 1.3341, 1.4361, 1.4114, 1.4940, 1.4381, 1.4572, 1.4983, + 1.4450, 1.3799, 1.2985, 1.3759, 1.4010, 1.3972, 1.3854, 1.4735, 1.4090, + 1.4359, 1.4176, 1.1470, 1.3816, 1.4783, 1.4746, 1.4164, 1.4836, 1.3951, + 1.4659, 1.3982, 1.4574, 1.4124, 1.4343, 1.4261, 1.3920, 1.4123, 1.4116, + 1.4015, 1.4180, 1.4316, 1.3637, 1.3466, 1.3692, 1.5015, 1.4733, 1.4815, + 1.4506, 1.4205, 1.5465, 1.3860, 1.4599, 1.4610, 1.4319, 1.3867, 1.4277, + 1.4058, 1.5038, 1.3254, 1.4081, 1.4177, 1.5158, 1.1846, 1.4519, 1.4454, + 1.4294, 1.4399, 1.4583, 1.4504, 1.3279, 1.4267, 1.4588, 1.5297, 1.4256, + 1.4696, 1.3555, 1.3443, 1.4276, 1.4016, 1.4142, 1.5254, 1.4633, 1.4104, + 1.3277, 1.4271, 1.3069, 1.5239, 1.3506, 1.4531, 1.3500, 1.5408, 1.4959, + 1.4397, 1.4760, 1.4411, 1.4869, 1.3990, 1.3856, 1.3908, 1.4489, 1.3610, + 1.4872, 1.4184, 1.4228, 1.4071, 1.3704, 1.3442, 1.4610, 1.3600, 1.4608, + 1.3470, 1.4855, 1.3853, 1.4027, 1.4836, 1.5157, 1.4284, 1.4205, 1.5115, + 1.5605, 1.3542, 1.3765, 1.3660, 1.4906, 1.3845, 1.4881, 1.5181, 1.4794, + 1.3891, 1.3431, 1.3882, 1.5148, 1.4450, 1.4678, 1.3192, 1.3673, 1.4189, + 1.4483, 1.4242, 1.3967, 1.3293, 1.4184, 1.4218, 1.4289, 1.5523, 1.4417, + 1.3730, 1.4459, 1.4928, 1.4107, 1.4603, 1.4857, 1.4236, 1.4498, 1.4567, + 1.4311, 1.4568, 1.3501, 1.4661, 1.5013, 1.4657, 1.4661, 1.4130, 1.4475, + 1.4720, 1.4931, 1.4820, 1.4778, 1.5414, 1.5453, 1.6388, 1.3688, 1.4117, + 1.4119, 1.4349, 1.4636, 1.4066, 1.4177, 1.4517, 1.3720, 1.5074, 1.5475, + 1.4340, 1.3701, 1.4308, 1.4670, 1.6579, 1.4371, 1.4845, 1.4797, 1.3362, + 1.3730, 1.4448, 1.4329, 1.5366, 1.3915, 1.3983, 1.5381, 1.5082, 1.4285, + 1.4790, 1.4179, 1.4467, 1.3747, 1.4305, 1.4426, 1.3684, 1.4587, 1.5358, + 1.5565, 1.4905, 1.3312, 1.3485, 1.5212, 1.4619, 1.3691, 1.4415, 1.3194, + 1.3221, 1.3496, 1.4874, 1.5061, 1.3922, 1.2938, 1.4972, 1.4190, 1.4368, + 1.5890, 1.3690, 1.4215, 1.4396, 1.5313, 1.4531, 1.5123, 1.4330, 1.4697, + 1.3048, 1.3517, 1.4730, 1.5835, 1.3724, 1.4596, 1.4266, 1.4551, 1.4752, + 1.5286, 1.3820, 1.4288, 1.5007, 1.4162, 1.3557, 1.4200, 1.3324, 1.3406, + 1.4550, 1.4577, 1.5262, 1.3809, 1.3025, 1.4433, 0.3834, 1.5315, 1.4227, + 1.4417, 1.3479, 1.3955, 1.4660, 1.4970, 1.4552, 1.3603, 1.4358, 1.5342, + 1.4399, 1.4024, 1.3257, 1.4345, 1.4759, 1.3737, 1.3923, 1.3939, 1.4668, + 1.5472, 1.3777, 1.2935, 1.3241, 1.5129, 1.4817, 1.4110, 1.3857, 1.4226, + 1.4402, 1.4090, 1.3844, 1.4178, 1.4924, 1.3791, 1.4072, 1.4571, 1.4994, + 1.4460, 1.4087, 1.4387, 1.4671, 1.4868, 1.3603, 1.2742, 1.4785, 1.4911, + 1.5177, 1.3701, 1.4761, 1.4919, 1.4244, 1.3797, 1.4817, 1.3420, 1.5247, + 1.4447, 1.4205, 1.4528, 1.2608, 1.4320, 1.3855, 1.4680, 1.4616, 1.4392, + 1.4488, 1.5083, 1.6081, 1.4801, 1.4033, 1.4737, 1.5151, 1.4830, 1.4994, + 1.4744, 1.3404, 1.4201, 1.3889, 1.5779, 1.3445, 1.4092, 1.3881, 1.4861, + 1.4624, 1.2567, 1.4012], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 7.1926e-02, 9.8689e-02, -1.2303e-01, -2.5298e-02, 3.4331e-02, + 1.2279e-01, 7.0546e-02, 7.1920e-02, -1.2536e-01, 1.0271e-02, + 5.1308e-02, -2.0179e-02, -9.3063e-02, -2.3841e-02, -6.1843e-02, + -5.7950e-02, -2.1823e-02, -4.0402e-02, -4.8366e-02, 4.5095e+00, + -8.1055e-02, 3.5031e-02, 5.1826e-03, -8.3012e-03, -2.8866e-02, + 2.0101e-02, -1.2298e-02, 2.3551e-03, 2.6563e-02, -9.2846e-02, + 3.9427e-02, 7.3944e-03, 3.6174e-02, 7.0540e-02, 1.0127e-01, + -7.3228e-02, 1.6834e-02, -4.9488e-02, -1.2337e-01, 3.9531e-02, + 2.1712e-02, -6.2757e-03, -1.2862e-02, 2.7663e-02, -4.1635e-02, + 1.9606e-03, 4.4968e-02, -2.3781e-02, 7.2327e-03, 4.9955e-02, + 4.4026e-02, -1.3647e-02, 2.4914e-02, 4.7459e-02, 6.1771e-02, + 4.8846e-02, -9.5908e-02, -5.7176e-02, 3.2771e-02, 2.4921e-02, + 4.3089e-02, -5.9890e-02, 4.0627e-02, 3.5825e-02, 8.3920e-02, + -7.3116e-03, 6.8076e-02, 9.7614e-02, -3.1050e-03, 1.9702e-01, + 2.8901e-02, 3.4745e-02, 2.8137e-01, 1.2861e-01, -8.9629e-02, + 7.5863e-02, 5.6941e-02, -1.0249e-01, -5.3639e-02, 4.9301e-02, + 4.0120e-03, -3.8715e-02, -1.4343e-02, 1.1497e-01, 2.3780e-02, + 4.0012e-02, 1.2210e-01, 7.2994e-02, -1.2727e-02, 1.7420e-02, + 6.5306e-02, 7.4932e-03, 4.0247e-02, -5.5433e-02, -2.5412e-02, + 4.9268e-02, -1.1561e-02, -6.6962e-02, -5.6268e-02, -2.7171e-02, + 3.4363e-02, -1.0745e-01, 3.3880e-02, -6.8456e-02, 7.3153e-02, + 1.5285e-02, -1.0217e-02, -6.7162e-02, 2.9075e-02, -7.0045e-02, + -4.0472e-02, -2.5368e-02, 4.4549e-02, 5.6120e-02, 3.2623e-02, + -5.4795e-02, 2.6527e-02, -5.5896e-02, -1.6000e-02, 1.0200e-01, + -4.4206e-02, -1.1714e-01, -6.5153e-02, -1.2306e-02, -1.4154e-01, + 2.1956e-02, -1.1985e-01, -1.2669e-02, 2.0597e-02, -4.3890e-02, + -1.2176e-01, 4.0412e-02, -5.9347e-02, -2.5230e-02, -5.3073e-02, + 4.3119e-02, -3.1720e-02, -9.3149e-04, -4.3995e-02, -9.7696e-03, + 3.2038e-02, 1.4291e-03, 4.9957e-02, 4.6637e-02, 5.3679e-02, + -1.2209e-02, -1.9658e-02, -2.5437e-02, 5.6141e-03, 1.0341e-01, + 2.0419e-01, -5.7293e-02, -1.0318e-01, -1.5183e-03, -5.2509e-02, + -1.9216e-01, -4.7395e-02, -5.7462e-02, 4.7400e-02, 7.5755e-03, + 1.4958e-01, -1.2968e-01, 3.6259e-03, 2.0252e-02, 1.0480e-02, + 1.7640e-02, -4.4658e-02, 2.1554e-02, -4.3620e-02, 4.1143e-02, + -4.8213e-02, 6.6094e-02, 4.4460e-02, 4.8178e-02, 2.9246e-02, + -7.9401e-02, 1.5524e-02, 5.2811e-02, -5.4107e-02, 7.2643e-03, + -2.3653e-02, -6.1912e-02, 4.2795e-02, -3.0012e-02, 1.7330e-02, + -1.7558e-01, 3.3922e-02, -2.2670e-01, -3.1409e-02, -7.9016e-02, + 1.7179e-03, -1.7622e-02, 9.6026e-02, 5.8238e-02, -1.0110e-01, + 2.8476e-02, -1.9550e-02, -1.1975e-01, -3.5531e-02, 5.4143e-02, + -1.2903e-01, -2.2033e-02, -3.3625e-02, 2.7479e-02, 3.7191e-02, + 1.0223e-01, -1.0936e-01, 8.2673e-02, 1.2283e-01, 2.0124e-02, + 3.4005e-03, 1.8433e-01, -1.0718e-02, -4.5526e-02, -5.3880e-02, + -3.0684e-02, -3.0079e-02, 6.1851e-02, -1.0747e-01, -1.5114e-01, + -2.1672e-02, -3.4361e-02, 7.0627e-02, -1.8793e-02, 2.9893e-02, + -3.4514e-02, 8.0450e-02, -1.5180e-03, 1.1799e-01, -3.8760e-02, + -3.0789e-03, 1.6182e-02, -3.7646e-02, 7.8771e-02, -2.8594e-02, + 4.2433e-02, -6.7604e-02, 5.3816e-02, 6.7646e-02, 1.8357e-02, + 8.8243e-02, 6.2018e-02, 3.7490e-02, 5.1684e-02, 3.0710e-02, + 4.0863e-02, 7.8782e-02, -5.9085e-02, -4.1544e-03, -1.7084e-02, + 4.6805e-02, 4.1792e-04, 6.9342e-02, -7.3616e-02, 9.0404e-02, + -6.6740e-02, -6.1122e-02, -8.2978e-02, 3.7532e-02, 6.6200e-03, + -7.2245e-02, -5.4751e-02, -7.9822e-03, -1.0696e-01, -1.4899e-02, + 4.4273e-02, -2.2897e-02, -1.2173e-02, -7.6203e-02, -1.0068e-02, + -2.9959e-02, 4.3258e-02, 5.1004e-02, -4.1525e-02, 4.9007e-03, + 3.9915e-02, -1.2598e-01, 2.5546e-02, 3.7653e-02, -8.7241e-02, + 1.8375e-01, 1.8118e-01, -4.7049e-04, 2.6604e-02, 2.1197e-02, + 4.7143e-02, 8.6612e-02, -5.3318e-02, 1.3800e-01, -9.7973e-02, + -9.6148e-03, -2.5540e-02, 1.1848e-01, 3.4482e-02, 4.3729e-02, + 2.8187e-03, 1.2743e-01, 5.8442e-02, 7.1207e-03, 3.7814e-02, + 3.9278e-02, 2.5866e-02, -2.0183e-02, 1.0628e-01, 6.9019e-03, + 6.3032e-03, 8.7620e-02, -6.0641e-02, 8.8681e-02, -3.2896e-02, + 4.7297e-02, 1.7168e-02, -5.5006e-02, -6.5152e-02, 3.4500e-04, + -9.7153e-02, 1.8775e-03, 4.4805e-02, 1.4104e-01, -4.3080e-02, + -8.6769e-02, 1.9695e-01, -8.2901e-02, -2.9552e-02, 1.0799e-01, + -2.1573e-01, 3.3186e-02, 4.6826e-02, 6.3347e-02, -1.4726e-01, + 1.4905e-01, -3.3903e-02, 1.3529e-01, -4.4907e-02, 3.0754e-02, + 8.4923e-02, -1.1759e-02, -5.3177e-02, 1.8132e-03, 1.7820e-03, + -7.6560e-02, 3.6070e-02, -1.5777e-02, -3.2811e-02, 9.8611e-02, + 4.6915e-02, 6.5758e-02, 6.3504e-02, 3.6216e-02, 1.3813e-02, + -6.7687e-02, 4.9432e-02, 1.5750e-02, 2.6505e-04, -4.4287e-02, + -4.7489e-02, -4.3150e-02, 2.3563e-02, 7.1404e-03, -7.6061e-02, + 4.5241e-02, 8.6304e-03, 3.8552e-02, -7.6021e-02, -1.8180e-02, + -4.3500e-02, 4.0450e-02, 1.1016e-01, -7.8001e-03, 3.5547e-02, + 8.0275e-02, -4.0487e-02, -7.4191e-02, -9.4242e-03, -2.3875e-02, + 1.7037e-02, 8.3679e-02, 7.0398e-02, -1.3054e-02, -1.6851e-02, + -1.0129e-01, 7.7011e-02, 1.8186e-02, -1.5612e-02, -2.3688e-02, + -1.5667e-02, -3.0407e-02, -6.1633e-02, -7.1531e-02, -5.1729e-02, + 2.9326e-02, -3.0786e-02, -5.9341e-02, -1.3668e-03, 3.4147e-02, + 8.1444e-04, 1.3175e-02, 2.3305e-02, -8.5761e-03, 6.6633e-02, + 1.0857e-01, -4.4033e-02, 6.4443e-02, 4.3963e-02, -1.0024e-01, + -1.2691e-01, -6.5507e-02, 3.2509e-02, -1.7542e-02, -2.8070e-02, + -6.9461e-02, 5.4981e-02, 1.0495e-01, -7.2139e-02, -4.5150e-02, + -1.3194e+00, -2.1716e-02, 1.6033e-02, -2.0474e-02, -1.4458e-02, + -1.7178e-02, 5.0754e-03, 4.1188e-03, -5.8843e-02, -6.4931e-02, + 3.5215e-02, -1.2734e-01, -1.0457e-01, 3.7082e-02, -3.0661e-02, + -8.1785e-02, -6.2548e-02, -2.9710e-02, 6.4331e-02, 7.1621e-02, + -2.1610e-02, 2.6836e-02, 5.8310e-02, 1.0695e-01, -7.6603e-02, + -2.8282e-02, -5.2149e-02, -5.0183e-02, 1.3031e-01, 1.0361e-01, + -1.1657e-01, -1.3109e-03, -7.0763e-02, 1.3312e-01, 1.7073e-02, + -6.3133e-03, 7.3985e-03, -1.0757e-01, 4.0777e-02, 5.6490e-02, + 1.1158e-02, 1.7426e-02, 3.9671e-02, -1.4881e-02, -8.9648e-02, + 3.4174e-02, 4.1025e-02, -8.3607e-02, 1.4398e-02, -3.9054e-03, + -1.1896e-02, 3.6092e-02, -6.1125e-02, 7.3290e-02, 2.2875e-02, + 2.2129e-02, -1.0867e-01, -5.2400e-02, 5.9108e-03, -4.7872e-02, + 4.4676e-02, 1.6246e-01, 8.7665e-02, 7.1226e-03, 9.3367e-02, + 1.0172e-01, -1.0781e-01, 2.2360e-02, 1.4627e-01, -7.5511e-02, + -3.8149e-02, -1.0550e-02, 1.2566e-01, 8.7934e-02, 2.7694e-02, + 4.6231e-02, 9.8059e-03, 1.1057e-02, -1.2549e-02, -1.2867e-01, + 3.2761e-02, -6.6936e-02, 5.8947e-02, -3.9928e-02, 2.3036e-02, + 2.4065e-02, -3.8741e-02, 5.7200e-02, -4.8267e-02, -5.7932e-02, + 4.2671e-02, 1.0091e-01, 6.2769e-02, 1.2278e-02, 4.0642e-02, + -9.1265e-02, -1.0523e-02, -4.5142e-02, 5.2563e-02, -7.1983e-02, + 4.8271e-03, 5.9733e-02, 1.5306e-01, 1.2165e-01, -7.3451e-02, + -4.4362e-02, -5.2985e-03, 1.2389e-01, -1.8968e-02, -4.3705e-02, + -7.5750e-03, 1.7417e-03, -1.3828e-02, 6.1293e-03, -6.8898e-02, + 2.5053e-02, 3.2660e-02, -2.2871e-02, -6.5013e-02, -3.3281e-02, + 1.6938e-02, 5.2703e-03, 1.7473e-02, -4.0848e-02, -6.3855e-02, + -4.4330e-02, 1.2664e-02, -1.8142e-02, -3.0737e-03, 3.5341e-04, + -3.6272e-02, 6.4434e-02, 8.5610e-03, 5.5707e-02, 3.1020e-02, + 5.4425e-02, -1.0614e-02, -6.8428e-02, 5.0324e-02, -2.5363e-02, + -1.7314e-02, 2.0876e-02, -3.0069e-02, -9.7373e-03, 8.5768e-02, + -6.7738e-02, 1.0936e-01, 2.3543e-02, 2.8994e-02, 1.1516e-01, + 3.2016e-02, 2.9510e-02, 1.9065e-02, 5.2079e-02, 4.7913e-02, + 6.7791e-02, 1.1473e-02, -8.5581e-02, -3.5063e-02, 7.8980e-02, + 2.3897e-02, 8.8964e-03, 8.0085e-02, 4.4945e-02, -2.8048e-02, + -1.1211e-01, 1.1271e-03, 6.7566e-02, 1.0283e-01, 8.2337e-02, + 1.4015e-02, 3.6052e-02, -8.8143e-02, 3.8836e-02, -1.3229e-03, + 2.0368e-02, 1.0822e-01, 1.0346e-01, -4.8797e-02, 7.9168e-03, + -3.0540e-02, 4.3786e-02, 3.0310e-02, -2.8381e-02, 4.6088e-02, + -8.1171e-02, -9.7811e-02, -4.9416e-03, 2.1832e-02, 1.2438e-02, + -4.1425e-02, 7.7462e-02, -7.7581e-02, 4.9547e-02, -4.6468e-02, + 7.3378e-02, 1.7769e-02, 3.7301e-04, -2.8760e-02, -3.4329e-02, + -9.3701e-02, 2.8186e-02, 7.1374e-02, 2.5266e-02, -1.4843e-02, + 6.4047e-02, 1.9724e-04, 2.2431e-02, 2.5484e-02, 2.5640e-02, + -3.9437e-02, 5.5480e-03, -2.8331e-02, 5.9693e-02, -2.6961e-02, + 1.3684e-01, -7.8911e-03, 8.4968e-02, 1.5320e-01, 3.1721e-02, + 2.1825e-02, -2.4057e-02, -9.8858e-03, 6.1091e-02, -6.6224e-02, + 1.3962e-02, -3.9338e-02, 5.9961e-02, -3.8697e-02, 3.3387e-02, + 7.4055e-03, 3.7146e-02, 7.8271e-02, 5.5880e-02, 1.4089e-01, + 9.3110e-02, -1.4280e-02, -4.0142e-02, 6.0164e-02, -7.6656e-02, + 4.5689e-02, -1.0814e-01, -3.5491e-02, 4.5264e-02, 9.2623e-02, + -7.1723e-02, 1.7291e-02, -8.7315e-02, -4.0728e-03, 1.0796e-01, + 3.3997e-02, -9.8876e-02, -8.9155e-02, 3.8369e-02, 1.9530e-02, + -1.9117e-01, -2.0658e-03, 6.6779e-03, 8.1699e-03, -9.0752e-03, + 4.4134e-02, -3.9304e-02, -5.1453e-02, -1.0062e-02, 6.5434e-02, + -4.0342e-02, -4.5351e+00, -2.3207e-02, 5.0064e-02, -2.5201e-02, + -7.1243e-02, -4.5385e-02, -2.5975e-02, -3.4780e-02, 4.8646e-02, + -2.3921e-02, -2.9032e-02, 1.5368e-02, -8.8112e-03, -2.5050e-02, + 5.5610e-02, -1.1592e-02, -6.9792e-02, 3.3603e-02, -4.6731e-02, + 1.1831e-01, 8.9346e-02, -1.9387e-02, -1.4286e-02, -1.2412e-01, + -3.5152e-02, 4.5515e-02, 5.9119e-02, 1.7479e-02, 6.7957e-02, + -1.4066e-02, 7.6105e-02, 6.1042e-02, 4.2792e-02, 2.3038e-02, + 1.0391e-02, 1.9895e-02, 8.4799e-03, -8.5925e-02, 1.9332e-02, + 4.1033e-03, -1.8517e-02, 5.1354e-02, -8.4071e-02, 5.3309e-02, + 3.8044e-03, -2.0082e-01, 9.7535e-02, 4.2757e-02, 8.7049e-02, + 9.1052e-02, 3.9801e-02, 6.2336e-02, 7.5325e-02, -3.0500e-03, + -2.3021e-02, 1.4764e-02, -6.1970e-03, 2.1045e-03, -5.7605e-02, + 8.1457e-02, -1.2928e-01, -1.1268e-01, 7.0443e-02, -3.6264e-02, + -3.0411e-02, -1.2364e-01, -4.2091e-02, 7.2788e-03, -5.1890e-02, + -4.4106e-02, -4.7509e-03, -6.6056e-02, -4.5752e-02, 7.1066e-02, + -1.1775e-02, 1.1221e-01, 1.0023e-01, 5.5698e-02, -4.2404e-02, + 3.4032e-03, 3.7870e-02, 7.1071e-02, -6.0725e-02, -7.0095e-02, + -7.1618e-02, 8.7326e-02, -1.1954e-04], device='cuda:1', + requires_grad=True) +torch.Size([3072, 768]) +Parameter containing: +tensor([[-0.0018, -0.0035, 0.0047, ..., -0.0104, 0.0071, -0.0345], + [-0.0061, -0.0015, -0.0073, ..., -0.0022, -0.0246, -0.0080], + [-0.0041, -0.0014, -0.0101, ..., -0.0074, 0.0381, -0.0051], + ..., + [ 0.0136, 0.0115, -0.0352, ..., -0.0105, 0.0295, 0.0026], + [ 0.0275, 0.0076, -0.0009, ..., 0.0037, -0.0102, -0.0048], + [-0.0325, -0.0094, -0.0329, ..., 0.0203, 0.0127, 0.0209]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.3206, 0.0299, -0.2832, ..., -0.3774, -0.0878, -0.3206], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 3072]) +Parameter containing: +tensor([[ 0.0119, 0.0111, -0.0122, ..., 0.0184, 0.0008, 0.0128], + [ 0.0061, 0.0051, 0.0010, ..., -0.0092, -0.0083, -0.0078], + [ 0.0028, 0.0019, -0.0141, ..., -0.0223, 0.0200, -0.0158], + ..., + [-0.0324, 0.0039, 0.0036, ..., -0.0004, 0.0073, -0.0153], + [-0.0048, 0.0068, 0.0207, ..., 0.0054, 0.0054, 0.0070], + [-0.0170, 0.0005, 0.0025, ..., -0.0155, 0.0081, 0.0126]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 4.6204e-02, 5.8441e-02, -5.1392e-02, 1.8127e-02, -1.4503e-02, + 1.1530e-01, 7.2876e-02, -1.4977e-02, 5.1849e-02, -2.3407e-02, + 6.6528e-02, 4.4891e-02, -9.1919e-02, -5.7007e-02, 5.9967e-02, + 2.0325e-02, -2.2049e-02, 2.5101e-02, -2.0569e-02, -7.3242e-02, + 3.3478e-02, 5.7281e-02, -9.8419e-03, -8.0017e-02, 6.3232e-02, + -4.4342e-02, -1.3260e-02, 3.1464e-02, -6.7932e-02, 1.5965e-03, + -2.5314e-02, 5.3833e-02, -5.8044e-02, 5.3284e-02, 3.6621e-02, + 1.6907e-02, -5.3741e-02, -6.6589e-02, 2.3026e-02, 9.1675e-02, + -2.7908e-02, -1.7334e-02, 5.2734e-02, 2.9926e-03, -2.6947e-02, + 1.2520e-02, 3.7933e-02, 4.8340e-02, -1.3135e-01, -1.7252e-03, + -3.8872e-03, 3.7537e-02, -1.6800e-02, 4.6600e-02, 5.3894e-02, + 1.0391e-02, -7.0740e-02, -1.1070e-02, 1.3390e-02, -1.5808e-02, + 4.9286e-02, -1.4046e-02, -6.4545e-03, -4.1962e-02, -2.0508e-02, + 2.2232e-02, 6.1859e-02, 2.9922e-02, 9.7885e-03, -4.4525e-02, + -7.2002e-04, 1.0872e-02, 3.9642e-02, -1.8005e-03, -9.3567e-02, + -3.4668e-02, 1.4938e-02, -5.6992e-03, 8.4915e-03, -1.9348e-02, + -2.7542e-02, 5.0850e-03, -5.3589e-02, -5.9319e-03, -4.7569e-03, + -8.8272e-03, 3.0289e-02, -5.6458e-03, 2.9011e-03, -6.5063e-02, + -7.7454e-02, 7.1289e-02, 8.5754e-03, -3.6087e-03, -1.3557e-02, + 6.9153e-02, 7.7087e-02, 2.4429e-02, -2.4673e-02, -1.0025e-02, + 2.5177e-02, -1.5161e-01, 4.2419e-02, -8.7402e-02, -5.4535e-02, + 2.6260e-02, 2.1763e-03, 3.0106e-02, 4.6501e-03, -1.1963e-02, + 7.1983e-03, 1.1719e-01, 5.6671e-02, 5.1758e-02, 2.3102e-02, + -4.7180e-02, 2.0309e-02, -7.3090e-03, -1.1462e-01, -7.7858e-03, + -1.0674e-02, -4.4342e-02, -4.5593e-02, -1.9928e-02, -5.2673e-02, + -6.5918e-02, -5.4749e-02, -5.9418e-02, 1.8753e-02, 5.8167e-02, + 1.4366e-02, 7.4890e-02, -9.4482e-02, -5.0049e-02, 5.6946e-02, + 7.3914e-02, 2.0340e-02, -3.3627e-03, -9.0515e-02, 1.0352e-01, + -2.6215e-02, 3.2440e-02, 5.1544e-02, 2.4673e-02, 6.7282e-04, + -2.1332e-02, -2.5415e-04, 2.0615e-02, 2.0390e-03, 8.2825e-02, + 3.0243e-02, -4.2558e-04, 4.9683e-02, 4.6326e-02, -8.1970e-02, + 9.2316e-03, -8.6487e-02, 6.5247e-02, 1.1981e-04, -2.8229e-03, + 4.2542e-02, -1.0938e-01, -1.5732e-02, 8.0872e-03, -2.2781e-02, + 2.0920e-02, -1.6833e-01, -2.4734e-02, -1.9852e-02, 8.9783e-02, + -2.3376e-02, 7.5623e-02, -4.9133e-02, -3.3386e-02, -3.0334e-02, + -8.6182e-02, 2.4261e-02, -4.0039e-02, 2.0599e-02, 1.3298e-02, + 2.4902e-02, -6.8665e-02, -1.3580e-02, 2.7344e-02, 3.2837e-02, + -4.9210e-03, -5.4077e-02, 2.8174e-01, 5.2887e-02, -6.7444e-02, + 6.1150e-03, 2.0996e-02, 1.3037e-01, 1.1176e-01, -9.5520e-02, + 5.8838e-02, -1.4839e-03, -6.2347e-02, 1.8814e-02, 1.4618e-02, + -4.5815e-03, -5.8136e-02, 1.5396e-02, 5.5634e-02, 5.2002e-02, + 9.8694e-02, -6.1981e-02, 4.0710e-02, -1.0803e-02, 2.3403e-03, + 1.3390e-02, 2.5005e-03, 3.7354e-02, 5.5450e-02, -5.9357e-02, + -7.2571e-02, -2.1042e-02, 8.5083e-02, -5.7129e-02, -1.6895e-01, + 9.1003e-02, 3.0670e-03, 4.8370e-03, 4.2877e-02, 5.7983e-03, + -1.3132e-03, -6.2561e-02, -1.9104e-02, 6.4270e-02, 2.6672e-02, + 1.8631e-02, 5.1361e-02, -5.6244e-02, -1.5594e-02, -3.5858e-02, + -4.8737e-02, -2.8534e-02, 1.6953e-02, -4.9957e-02, 1.0632e-01, + -5.7411e-03, 2.0050e-02, -3.8849e-02, 1.4210e-03, -6.3972e-03, + -2.9724e-02, 3.4271e-02, -8.4290e-02, -1.4397e-02, 1.6800e-02, + 2.3911e-02, -3.8422e-02, -4.4861e-03, -1.2952e-01, 1.7334e-02, + 1.6861e-03, -4.2175e-02, 9.4177e-02, -5.9204e-02, -3.0533e-02, + -1.4380e-01, -3.2959e-02, -1.0811e-02, -1.7792e-02, -6.7139e-02, + -3.5126e-02, 1.6022e-02, -2.6794e-02, 1.9394e-02, 3.3203e-02, + -1.4549e-02, -1.9501e-02, 1.5244e-02, -1.7181e-02, -7.9041e-02, + 9.0271e-02, 4.2633e-02, 7.7637e-02, 6.4087e-02, -8.1421e-02, + 8.1482e-02, 1.4046e-02, -4.6661e-02, -4.1412e-02, -1.3275e-02, + 4.4342e-02, 2.3346e-02, -5.0262e-02, 5.8746e-02, -7.6050e-02, + 5.1758e-02, -3.5553e-02, 8.8739e-04, -9.7351e-02, 2.1912e-02, + 1.2993e-02, 7.8613e-02, -1.4999e-02, 5.1422e-02, 5.5115e-02, + 4.3526e-03, -4.4830e-02, -9.6863e-02, 5.5969e-02, -4.8706e-02, + 5.9776e-03, 5.9906e-02, -1.4717e-02, 3.5278e-02, -2.7069e-02, + 2.9343e-02, -2.4090e-03, -4.6539e-02, -3.1799e-02, 6.6162e-02, + -1.7212e-02, 3.7781e-02, 1.9287e-02, 5.4016e-02, -3.1948e-03, + -9.8572e-03, -2.2186e-02, -3.3600e-02, 1.7838e-02, 2.9468e-01, + -2.1094e-01, 7.1411e-02, -2.4445e-02, 3.2501e-02, -1.4282e-01, + 6.5369e-02, -4.2450e-02, 3.3142e-02, -1.7761e-02, -5.8380e-02, + -2.8343e-03, -4.4708e-03, -8.4839e-03, 3.1647e-02, -2.3941e-02, + -4.8035e-02, 5.1910e-02, 2.4338e-02, 8.9264e-03, 3.7872e-02, + 6.0303e-02, -6.6162e-02, 5.1239e-02, 3.4943e-03, -7.2632e-02, + 1.5488e-02, 4.9622e-02, 4.0375e-02, -8.3313e-03, -5.2582e-02, + -2.9663e-02, -9.0027e-03, 1.3245e-02, -2.8702e-02, -8.6823e-03, + -3.5461e-02, -5.5450e-02, -1.6113e-02, -2.6245e-02, -5.1788e-02, + 2.9434e-02, -3.0579e-02, 7.8979e-02, 8.5571e-02, -1.0269e-02, + 4.1046e-02, -3.1204e-02, -6.1523e-02, -2.6520e-02, -8.5831e-03, + -8.5602e-03, 7.6660e-02, 5.7404e-02, -1.4267e-02, -4.5052e-03, + 1.1078e-02, 4.4556e-02, -5.9906e-02, -2.7466e-02, -8.3801e-02, + 9.2125e-04, -2.0523e-02, 8.5526e-03, 9.3889e-04, -1.0284e-01, + -8.7891e-02, 3.8971e-02, 3.6591e-02, 1.4114e-02, -3.1128e-02, + 6.0028e-02, -2.4994e-02, 6.2561e-02, 3.0350e-02, -5.2307e-02, + 1.6602e-02, 7.7576e-02, -2.0691e-02, 2.5986e-02, -6.1218e-02, + -3.5736e-02, 2.9236e-02, 5.2155e-02, -2.7573e-02, 3.2166e-02, + -3.6865e-02, 2.3148e-02, -5.7159e-02, -3.4546e-02, 3.0212e-03, + -9.9976e-02, -3.1021e-02, -1.0002e-02, 5.6213e-02, 1.6800e-02, + -7.2327e-02, -2.2507e-03, 2.1133e-02, -8.6853e-02, -5.5176e-02, + 5.8289e-02, 2.2949e-02, -4.5532e-02, 8.5938e-02, 6.1768e-02, + -1.0492e-01, 7.0435e-02, 8.5205e-02, 3.3020e-02, 8.1909e-02, + -9.2163e-03, 3.7292e-02, 1.9791e-02, 6.0577e-02, -6.4697e-02, + -9.0256e-03, -2.2507e-02, 1.0323e-02, -6.9092e-02, 3.5706e-02, + -3.5492e-02, 2.2415e-02, -2.1896e-02, 5.7678e-02, -2.4536e-02, + 2.3712e-02, -2.2385e-02, -8.9661e-02, 7.8064e-02, 5.8807e-02, + 3.7903e-02, -3.4760e-02, -1.6693e-02, -1.4915e-02, -5.1544e-02, + -3.3741e-03, -1.2665e-02, -8.1055e-02, -2.2537e-02, -1.7977e-03, + 1.5495e-02, 5.0507e-02, -5.6885e-02, 2.3560e-02, 3.1830e-02, + -2.3315e-02, -7.5134e-02, -7.4585e-02, 3.1281e-02, -7.7576e-02, + 1.1047e-01, 3.3142e-02, 6.2469e-02, 2.0046e-03, 3.7506e-02, + 2.1027e-02, 1.6602e-02, 5.1849e-02, 4.5807e-02, -6.7871e-02, + -1.8753e-02, 9.8495e-03, 4.0741e-02, -1.4687e-02, -1.0059e-01, + 4.0466e-02, 4.2847e-02, -3.2776e-02, 4.2419e-03, -7.5134e-02, + 5.6854e-02, -2.2797e-02, 5.1208e-02, -1.0303e-01, 2.5650e-02, + -3.6530e-02, -2.0542e-03, 2.3987e-02, -2.2217e-02, -2.7466e-02, + -4.6082e-02, 6.6467e-02, 7.7393e-02, -3.6652e-02, -1.0638e-01, + -6.0242e-02, -8.0200e-02, -5.3223e-02, 1.3268e-02, 5.2376e-03, + -4.8187e-02, -2.8870e-02, 4.0070e-02, -2.7695e-02, -6.3599e-02, + 3.1166e-03, 5.7678e-02, 3.4088e-02, -9.8324e-04, 1.9257e-02, + -1.5821e-03, -1.0948e-02, -5.5023e-02, -1.3626e-02, -5.9387e-02, + 3.8574e-02, -4.7852e-02, 3.1872e-03, -7.1655e-02, 5.0323e-02, + 5.2277e-02, -3.2776e-02, -3.9124e-02, -4.0955e-02, -4.5532e-02, + -4.1504e-02, 1.0522e-01, 2.8778e-02, 1.4038e-02, -1.4252e-02, + 6.8970e-03, 2.5620e-02, -2.8400e-03, 5.7617e-02, 5.7129e-02, + -8.1909e-02, -1.7700e-02, -3.0426e-02, -3.9337e-02, -5.2673e-02, + 2.1652e-02, 1.8539e-02, -7.2632e-02, -4.7180e-02, 6.1279e-02, + -5.0629e-02, 3.3264e-02, -5.4626e-02, -5.1727e-02, 6.0120e-02, + 6.1401e-02, -1.6281e-02, 5.2338e-02, 6.8115e-02, -3.2837e-02, + 8.8806e-02, -7.4120e-03, -4.5929e-02, 2.6871e-02, 2.0828e-02, + 2.6703e-02, 6.1493e-03, 2.7985e-02, -2.2278e-03, -2.0447e-02, + -4.0802e-02, 3.4790e-02, 7.5623e-02, 8.7524e-02, 8.7219e-02, + 3.7327e-03, 1.9341e-03, 6.0692e-03, 6.4331e-02, 7.9346e-02, + -2.7557e-02, -1.8860e-02, 1.7929e-02, -4.6539e-03, 2.0950e-02, + -1.1307e-02, -8.0383e-02, -9.9548e-02, -1.2985e-02, 1.4694e-02, + -1.4982e-03, -1.2039e-02, -4.5990e-02, 3.1403e-02, 3.7720e-02, + 8.4000e-03, 1.8600e-02, -2.5253e-02, 2.2232e-02, 2.4734e-02, + 5.6877e-03, 1.8463e-02, 3.3722e-02, 7.6294e-02, -1.8784e-02, + -4.1504e-02, -3.3386e-02, 7.5989e-02, -3.5034e-02, 6.9641e-02, + 4.2786e-02, 2.3697e-02, 2.8458e-02, 1.9760e-02, 4.0222e-02, + -7.4646e-02, -1.6129e-02, 2.0325e-02, 1.0529e-01, -1.8158e-02, + 1.5137e-01, 9.3384e-03, 1.5114e-02, 6.5552e-02, 9.2010e-03, + 6.3782e-03, 6.5880e-03, 1.6492e-01, 4.3976e-02, 4.2206e-02, + 3.6652e-02, -3.6621e-02, -1.8311e-02, 1.0323e-02, 5.4817e-03, + 1.4816e-02, -9.3445e-02, 3.4729e-02, 4.1595e-02, 2.5360e-02, + 3.9337e-02, 7.1167e-02, 3.4821e-02, 5.8441e-02, 1.0474e-01, + 6.0272e-02, -9.9792e-02, -7.5195e-02, 7.1655e-02, 1.0089e-01, + -5.3192e-02, 5.8441e-02, -1.1896e-01, -4.9805e-02, 9.4833e-03, + 9.4116e-02, -1.4209e-01, -5.2246e-02, 1.5942e-01, 4.7852e-02, + -4.1351e-03, 5.0598e-02, 7.8796e-02, 3.9520e-02, -5.8517e-03, + -2.4399e-02, -3.5370e-02, -8.7219e-02, -2.7390e-02, 9.0210e-02, + 5.2094e-02, -6.2622e-02, -2.3300e-02, 3.0960e-02, -6.3416e-02, + 8.2886e-02, -1.1761e-01, -4.1412e-02, 1.4107e-02, 3.2227e-02, + -1.3374e-02, -9.9121e-02, -2.8076e-03, 7.0557e-02, -2.4384e-02, + 1.1920e-01, 6.6650e-02, -6.0059e-02, 3.6041e-02, -3.1235e-02, + 5.3528e-02, -3.6621e-02, 7.0068e-02, -3.5736e-02, -1.0880e-02, + -6.6467e-02, 2.5543e-02, -8.5402e-04, -3.6346e-02, -9.3384e-03, + 1.8234e-02, -5.7404e-02, -3.2501e-03, -5.9448e-02, -3.1738e-02, + -9.0637e-02, -5.0476e-02, 7.0862e-02, 2.3178e-02, -1.5820e-01, + -1.4290e-02, -3.4904e-04, 1.1505e-02, -2.6077e-02, -9.8343e-03, + -3.0182e-02, -2.0691e-02, -1.1168e-03, -4.4800e-02, 5.0751e-02, + -2.8564e-02, 1.4658e-03, 7.2083e-02, 4.0649e-02, 2.2446e-02, + 3.7415e-02, 3.5400e-02, -3.0701e-02, 2.8976e-02, -5.6671e-02, + -1.3855e-02, -5.4016e-02, -6.5552e-02, 1.2970e-02, -6.0730e-02, + -1.8860e-02, -1.0938e-01, 9.8114e-03, 2.0390e-03, 3.4576e-02, + 3.3207e-03, -5.9319e-03, -1.3916e-01, 6.0791e-02, 6.5186e-02, + 1.5007e-02, 5.7281e-02, -1.7487e-02, 6.9824e-02, 2.9663e-02, + -5.3375e-02, 1.5266e-02, -7.2510e-02, -2.8275e-02, -1.1635e-02, + -2.3514e-02, 9.7885e-03, 4.0588e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([2.1482, 2.0761, 2.1921, 2.0451, 2.0384, 2.1436, 2.2704, 2.1227, 2.1465, + 2.1898, 2.1549, 2.0869, 2.1795, 2.0904, 2.1714, 2.0272, 2.1504, 2.0925, + 2.1178, 1.2856, 2.1856, 2.1374, 2.2333, 2.0119, 2.2004, 2.1451, 2.0892, + 2.0631, 2.2006, 2.0803, 2.1321, 2.0761, 2.1056, 2.0743, 2.1243, 2.1250, + 2.3369, 2.1272, 2.0290, 2.1546, 2.0374, 2.3274, 2.2738, 2.1767, 2.0974, + 2.1418, 2.1897, 2.0832, 2.0704, 2.1172, 2.1430, 2.1572, 2.1444, 2.1279, + 2.2470, 2.0093, 2.0840, 1.9981, 2.0446, 2.0698, 2.1134, 2.1441, 2.1823, + 2.1272, 2.0367, 2.0682, 2.1908, 2.1000, 2.0956, 1.7824, 2.1168, 2.1184, + 2.3859, 2.0491, 2.1741, 2.1228, 2.0877, 2.1016, 2.2688, 2.0134, 2.1942, + 2.1419, 2.0865, 2.0894, 2.1286, 2.2371, 2.1443, 2.0431, 2.0828, 2.1831, + 2.0309, 2.0924, 2.0745, 2.1408, 2.1419, 2.1016, 2.2139, 2.1779, 2.1671, + 2.1297, 2.2277, 2.0503, 2.1542, 2.1528, 2.0703, 2.1439, 2.2227, 2.1981, + 2.0713, 2.0392, 2.0976, 2.1198, 2.0589, 2.1613, 2.1716, 2.0782, 2.0527, + 2.0557, 2.0537, 2.1340, 2.1217, 2.0609, 2.3008, 2.1750, 2.1099, 2.2135, + 2.1483, 2.1244, 2.0733, 2.0288, 2.1141, 2.1998, 2.0138, 2.1505, 2.3035, + 2.1225, 2.2076, 2.0718, 2.0643, 2.1111, 2.1190, 2.0362, 2.1451, 2.1716, + 2.0588, 2.1940, 2.1055, 2.1166, 2.1894, 2.0712, 2.1387, 2.1944, 2.2454, + 2.2819, 2.1488, 1.3095, 2.1511, 2.0310, 2.0929, 2.1536, 1.9961, 2.1454, + 2.2574, 2.0555, 2.1161, 2.2521, 2.1017, 2.1226, 2.0426, 2.0752, 2.1164, + 2.1192, 1.8822, 2.1522, 2.0504, 2.1629, 2.1790, 2.3046, 2.1391, 2.1411, + 2.4045, 2.2437, 2.0769, 2.1762, 2.0932, 2.0806, 2.0046, 0.9834, 2.0615, + 2.1874, 2.0295, 2.1896, 2.1249, 2.1741, 2.1469, 2.2090, 2.3059, 2.0889, + 0.8082, 2.0433, 2.0004, 2.0083, 2.1260, 2.0278, 2.1430, 2.1549, 2.2318, + 2.2448, 1.9585, 2.0712, 2.0533, 2.0480, 2.1845, 2.0333, 2.2601, 2.1605, + 2.1659, 2.1481, 2.2122, 2.1820, 2.1369, 2.1664, 1.9730, 2.2248, 2.0468, + 2.2257, 2.2819, 2.1141, 2.1859, 2.0820, 2.1872, 2.1288, 2.1822, 2.1780, + 2.0478, 2.2415, 2.1621, 2.1453, 2.0071, 2.1323, 2.1880, 2.3021, 2.1457, + 2.2072, 2.1449, 2.1183, 2.0404, 2.2815, 2.2627, 2.1645, 2.2278, 2.1534, + 2.0938, 2.1091, 2.1209, 2.0492, 2.1297, 2.1816, 2.1060, 2.2206, 2.0955, + 2.1179, 2.1007, 2.2277, 2.1673, 2.0951, 2.2219, 2.1363, 2.0325, 2.1666, + 2.0490, 2.0837, 2.0539, 2.1483, 2.1213, 2.0532, 2.1494, 2.2184, 2.3024, + 2.1308, 2.1234, 1.9768, 2.2508, 2.1202, 2.0864, 2.1375, 2.2378, 2.1481, + 2.1342, 1.9919, 2.1629, 2.1637, 2.2153, 2.1035, 2.1337, 2.0895, 2.1282, + 2.1316, 1.9982, 2.0586, 2.1836, 2.1714, 2.1560, 2.1255, 2.2053, 2.1329, + 2.1358, 2.1552, 2.1542, 2.1948, 2.1878, 2.1202, 2.0789, 2.1483, 2.0329, + 2.2126, 2.1098, 2.2082, 2.0599, 2.2092, 2.3170, 2.2484, 2.1793, 2.0508, + 0.5618, 2.0714, 2.1515, 2.1989, 2.1049, 2.0814, 2.0987, 2.2290, 2.1709, + 2.2567, 2.1159, 2.0255, 2.2227, 2.0735, 2.0707, 2.0808, 2.3608, 2.1141, + 2.1577, 2.2603, 2.2280, 2.0188, 2.2263, 2.0251, 2.3628, 2.1262, 1.9121, + 2.3010, 2.1140, 2.3798, 2.0306, 2.1233, 2.0576, 2.1311, 2.1088, 2.0206, + 2.1556, 2.0028, 2.1734, 2.0158, 2.1590, 2.0562, 2.1899, 2.1982, 2.1236, + 2.0886, 2.2811, 2.1128, 2.1131, 2.1598, 2.0060, 2.0154, 2.1563, 2.2502, + 2.1250, 2.0527, 2.0428, 1.9846, 2.1653, 2.1090, 2.1451, 2.2003, 2.1499, + 2.2526, 2.2141, 2.0655, 2.0515, 2.1719, 2.1375, 2.1248, 2.0989, 2.0976, + 2.0913, 2.1493, 2.2613, 2.2130, 2.0378, 2.2520, 2.1286, 2.1573, 2.1093, + 2.0292, 2.1074, 2.0239, 2.1475, 2.1301, 2.1112, 2.1901, 2.0577, 2.1609, + 1.4774, 2.2200, 2.2076, 2.0265, 2.1652, 2.0973, 2.2043, 2.0817, 2.2479, + 2.1600, 2.0770, 2.0933, 2.0445, 2.0840, 2.1304, 2.1265, 2.1881, 2.0445, + 2.1241, 2.1748, 2.2234, 2.0541, 2.2300, 2.1778, 2.0552, 2.2086, 2.0418, + 2.0995, 2.0420, 1.7627, 2.0249, 2.0225, 2.1123, 2.2335, 2.1247, 2.0972, + 2.0920, 2.1411, 2.2878, 2.1152, 2.0844, 2.1943, 2.1201, 2.1608, 2.0665, + 2.0956, 2.1365, 2.1061, 2.1110, 2.1114, 2.1407, 2.0190, 2.1246, 1.9944, + 2.2389, 2.1340, 2.1952, 2.0701, 2.1126, 2.1158, 2.3171, 2.0932, 2.1448, + 2.2950, 2.0981, 2.1204, 2.1288, 2.2271, 2.0780, 1.4625, 2.2277, 2.1814, + 2.1414, 2.0223, 2.0474, 2.2824, 2.1625, 2.0740, 2.0709, 2.1278, 2.2366, + 2.0778, 2.1140, 1.9411, 1.9732, 2.2932, 2.1656, 2.2058, 2.1473, 2.0715, + 2.1679, 2.1443, 2.0592, 2.1594, 2.1587, 2.2206, 2.0880, 2.0784, 2.1948, + 2.1170, 2.3211, 2.1703, 2.1656, 2.1462, 2.0380, 2.1215, 2.2290, 2.1520, + 2.2105, 2.0404, 2.0373, 2.1508, 2.1466, 2.0975, 2.1173, 2.2048, 2.0858, + 2.1657, 2.1193, 2.1691, 2.1388, 2.2129, 2.0910, 2.0813, 2.2194, 2.0774, + 2.1017, 2.2044, 2.1490, 2.1446, 2.3251, 2.2174, 2.1969, 2.1872, 2.2394, + 2.0567, 2.2579, 1.9658, 2.0598, 2.1581, 2.0006, 2.0074, 2.2322, 2.1349, + 2.1261, 2.0935, 2.2362, 1.5636, 2.0990, 2.0930, 2.2388, 2.1674, 2.0623, + 2.1734, 2.1535, 2.0805, 2.0350, 2.1338, 2.1778, 2.1319, 2.2243, 2.2794, + 2.1139, 2.2005, 2.1549, 2.0232, 2.0834, 2.0726, 2.2218, 2.0496, 2.1193, + 2.0845, 2.1427, 2.0182, 2.3726, 2.0258, 2.1590, 2.2770, 2.1841, 2.2523, + 2.1467, 1.9906, 2.0301, 2.1457, 2.1497, 2.0760, 2.0481, 2.1327, 2.1243, + 2.2556, 2.1431, 2.1105, 2.1889, 1.7540, 2.1039, 2.1062, 2.0953, 2.1475, + 2.1238, 2.1812, 2.2103, 2.1678, 2.1608, 2.0309, 2.1152, 2.1129, 2.2841, + 2.0309, 2.0734, 2.1454, 2.2304, 2.0570, 2.1209, 2.2711, 2.1286, 2.1101, + 2.0775, 2.0758, 2.0920, 2.2181, 2.2613, 2.1191, 2.1776, 2.1671, 2.1099, + 2.2056, 2.1635, 2.0298, 2.2926, 2.1715, 1.9039, 2.0213, 2.0237, 2.1132, + 2.0098, 2.1750, 2.2415, 2.1563, 2.1910, 2.0880, 2.1238, 2.2616, 2.1590, + 2.0323, 2.0179, 2.1277, 1.8232, 2.1558, 2.2524, 2.2178, 2.1622, 2.0681, + 2.1036, 2.1641, 2.1510, 2.2235, 2.0144, 2.0947, 2.0730, 1.9855, 2.2212, + 2.1229, 2.1985, 2.2897, 2.1890, 2.0744, 2.0369, 2.3364, 2.1039, 2.1677, + 2.1267, 2.2754, 2.2022, 2.0778, 2.1968, 2.0938, 2.2225, 1.9913, 2.1763, + 2.2320, 2.0717, 2.1027, 2.1216, 2.2761, 2.2161, 2.1315, 2.0953, 2.0402, + 2.3112, 2.1639, 2.3642, 2.1207, 2.2583, 2.0666, 2.0418, 2.1478, 2.1192, + 2.0907, 2.1516, 2.1368, 2.1209, 2.2247, 2.0873, 2.2266, 2.1265, 2.4356, + 2.1106, 2.2100, 2.3024, 2.1876, 2.2123, 2.0296, 1.6260, 2.0849, 2.0714, + 1.9995, 2.2396, 2.2255, 2.0709, 2.1770, 2.1812, 2.1527, 2.0534, 2.0879, + 2.1128, 2.3070, 2.1067, 2.1437, 2.0932, 2.1928, 2.0556, 2.0341, 2.2084, + 2.2778, 2.3196, 2.1044, 2.0765, 2.2309, 2.3135, 2.2170, 2.0869, 2.1127, + 2.2343, 2.2393, 2.2178, 2.0545, 2.0631, 2.1401, 2.0484, 1.9397, 2.1904, + 2.0942, 2.1266, 2.2787], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([-4.0474e-01, 5.0409e-01, -6.2095e-01, 1.9046e-01, -9.3106e-01, + 4.3668e-01, 6.2783e-01, -7.8965e-03, -8.5588e-01, -3.0263e-01, + -6.0073e-01, 7.6827e-02, -4.7316e-01, -8.7310e-02, -6.6791e-01, + -3.7563e-03, 1.8902e-01, -2.1686e-01, -6.6348e-01, -1.3281e+00, + 1.0986e-01, 3.3849e-01, 7.3085e-01, -3.8962e-01, 5.8070e-01, + 1.0180e-02, -3.1196e-01, 6.9664e-03, -8.0248e-01, 6.6610e-01, + 3.4659e-01, 1.4230e-01, 5.1505e-01, -2.6895e-01, -3.3652e-01, + -5.5124e-01, -5.4981e-01, -2.2390e-01, 7.7914e-01, 4.4125e-01, + -2.5739e-01, 6.8878e-01, 8.3604e-01, 3.8809e-01, -5.0993e-01, + -5.5098e-01, -8.5939e-01, -4.3050e-01, 3.6157e-02, -2.1150e-01, + -4.7758e-01, -6.4244e-01, 7.0543e-01, 4.7260e-01, -7.1362e-01, + 1.1644e+00, 2.3792e-01, -1.3141e-01, -1.7051e-01, -2.5365e-02, + -5.8489e-01, 6.5227e-01, 5.4440e-01, -3.2898e-01, 1.4602e-01, + 3.5652e-01, 5.1635e-01, -5.1022e-01, -4.3654e-01, -1.4454e+00, + 2.9734e-01, 9.4037e-02, 1.0461e+00, 2.1383e-01, -3.6251e-01, + -2.5680e-01, 5.1107e-01, 7.0932e-01, 4.6205e-01, -2.5158e-01, + 1.7436e-01, 1.6938e-01, -2.8672e-01, 7.2591e-01, -2.8540e-01, + -7.8770e-01, 4.9259e-01, 2.1408e-01, 1.3567e-01, -5.2772e-01, + -6.7128e-02, 4.5592e-01, 7.4472e-02, -7.0644e-02, -5.8231e-01, + 2.4495e-01, 4.7405e-01, -1.7291e-01, -6.3612e-01, 6.2670e-01, + 4.6047e-01, -6.5939e-01, 3.9048e-01, -3.0125e-01, 1.1632e-01, + 1.0360e+00, 7.5709e-01, 4.1743e-01, 1.6902e-01, -3.8525e-01, + 8.5621e-01, 3.0732e-01, -2.1138e-01, 2.7196e-01, 3.5186e-01, + -3.2635e-01, 7.3482e-01, 4.2488e-01, 7.4492e-01, 1.4549e-01, + -2.1434e-01, -3.5687e-01, -7.2550e-01, -6.6136e-01, 1.2367e-01, + -4.1577e-01, -5.3205e-01, -6.2603e-02, -1.9586e-01, -3.7930e-01, + -4.2071e-01, -9.0787e-01, 4.8806e-02, -4.6018e-01, 7.3300e-01, + 5.1992e-01, 7.1370e-01, 1.6186e-01, 2.5509e-01, -8.3998e-02, + -1.7691e-01, 4.6683e-01, -4.6551e-01, -3.3841e-01, -4.4711e-01, + -1.8539e-01, 2.9682e-01, -2.4576e-01, -2.1410e-01, -3.5070e-03, + 6.3886e-01, -7.4631e-01, 6.3523e-01, 5.2012e-01, -7.1093e-01, + 1.0273e+00, -5.6964e-02, -2.7703e-01, -2.2630e-01, -3.6590e-01, + -4.7491e-02, 5.2619e-01, 7.8483e-01, -3.9002e-01, 5.5755e-02, + 6.2525e-01, -3.9933e-01, 3.6984e-01, -4.2966e-02, -4.1367e-02, + 4.2210e-01, 1.4972e-01, -2.6309e-01, -2.4747e-01, -1.9582e-01, + 4.4209e-01, 3.6193e-01, -4.4070e-01, -5.1436e-01, 1.3340e-01, + -1.2178e+00, -7.4455e-01, 1.2202e-01, 4.9822e-01, 5.3379e-01, + 7.0351e-01, -1.7865e-01, 1.1071e+00, -5.1895e-01, -6.5553e-01, + -3.6015e-01, -4.4857e-01, 2.3724e-01, 5.4664e-01, 4.1513e-01, + 4.0819e-01, 1.0514e+00, 2.3382e-01, 9.6579e-01, 1.0408e-01, + -1.6920e-01, 1.0829e-01, 5.4694e-01, 1.5864e-02, 8.3642e-02, + 2.5477e-01, 8.8037e-01, 8.7462e-01, -8.6983e-02, 1.2242e-01, + -1.5298e-01, 6.3101e-02, -6.2515e-01, 5.4305e-01, 6.4796e-01, + -3.3765e-01, -7.2310e-01, -1.3832e-01, -6.2350e-01, 7.3444e-01, + 4.6106e-01, -2.6191e-01, -7.5841e-01, 4.4694e-01, 3.0497e-01, + -2.4138e-01, -5.6582e-01, -2.6223e-01, 2.7859e-01, -6.5636e-02, + -4.4158e-01, 3.0139e-01, 6.6060e-01, 8.8885e-01, -5.2302e-01, + -6.2983e-01, 3.8733e-01, 3.4346e-01, 2.3819e-01, -2.2995e-03, + 9.1729e-01, 7.6446e-01, 1.9985e-01, -4.2040e-01, 4.0301e-01, + -3.7577e-01, -3.5067e-01, -8.2791e-01, -8.2184e-01, 8.2123e-01, + -5.7524e-01, 3.0567e-01, 4.3731e-01, -1.4166e-01, 6.3945e-01, + -2.2943e-02, 5.7065e-01, -1.1292e+00, -2.9721e-01, -5.3010e-02, + 4.5299e-02, -2.3854e-01, -4.6227e-01, 3.1692e-01, -3.5330e-01, + -2.6094e-01, 8.2341e-01, 2.6003e-01, 5.0662e-03, 2.1076e-01, + 2.9852e-01, 4.6640e-01, 9.2121e-02, -7.7412e-01, -3.7900e-01, + -1.8495e-01, -1.1545e-01, 7.3789e-01, 3.2838e-01, 4.5115e-01, + -3.8806e-01, -1.0646e+00, -7.5262e-01, 2.5062e-01, -2.5364e-01, + 6.6377e-01, -5.8219e-01, 3.3624e-01, 2.8649e-01, 3.9229e-01, + -6.2032e-01, 4.4817e-01, 1.0572e+00, -2.2192e-01, 1.5616e-01, + -4.2898e-01, -5.9908e-01, -3.8231e-01, 4.8874e-01, 1.5247e-01, + -8.2282e-01, 8.2276e-01, -4.6885e-01, -2.2112e-01, -3.1448e-01, + 6.1385e-01, 6.0871e-02, 6.5478e-01, -6.8870e-01, -1.8069e-01, + 6.0595e-01, -2.1569e-01, -6.8886e-02, -2.1633e-01, 2.6627e-02, + 9.1286e-01, 3.7838e-01, 4.4088e-01, -1.9911e-02, 5.2531e-01, + -1.0447e+00, -7.8353e-01, 6.1983e-01, -3.1507e-01, 6.4922e-01, + -6.7502e-01, 4.6014e-01, -4.8093e-01, 4.9097e-01, -5.0731e-01, + 3.2845e-01, -4.6035e-01, 5.1567e-02, -7.1382e-01, -1.1038e-02, + 5.6548e-01, -8.6373e-01, 3.8686e-01, 4.7987e-01, -2.6022e-01, + 1.0529e+00, -3.4670e-01, -5.7703e-01, 9.2351e-01, -1.0424e+00, + 3.6434e-01, -5.1920e-01, -2.0621e-01, -5.7194e-01, -1.3815e-01, + -1.5458e-01, 7.8982e-01, -2.1564e-01, 1.4026e+00, -9.9102e-02, + -4.7951e-01, -1.3557e-01, -3.1940e-01, -4.9115e-01, -1.7228e-01, + -4.8218e-01, -2.9346e-01, -9.5089e-02, -1.2865e-01, 7.9052e-01, + -2.9325e-02, -3.4700e-01, 7.1176e-01, -1.4633e-01, 4.0358e-01, + 6.3262e-01, -1.9452e-01, 1.9849e-01, -5.0373e-01, -4.7863e-01, + -4.8630e-01, -5.2785e-01, 7.3041e-01, 3.3524e-01, -3.8826e-01, + -3.6228e-01, -6.5662e-01, -2.4258e-01, -6.7619e-02, 2.2568e-01, + -8.4740e-01, -5.3083e-02, 1.0252e+00, 3.3464e-01, -1.1910e-01, + 3.3992e-01, -2.7331e-01, 5.2138e-01, 7.7297e-01, -6.8481e-01, + 9.7702e-02, -5.2441e-01, -6.2938e-02, -7.9591e-01, -4.0645e-01, + 2.8333e-01, 3.2968e-01, -4.2421e-01, -4.1324e-01, 8.1571e-01, + -4.2995e-01, -3.8089e-01, 4.6637e-01, -4.0341e-01, 6.3807e-01, + 6.2462e-01, 5.8128e-02, 3.2726e-01, 4.4414e-01, 1.1882e+00, + -2.5828e+00, 2.7348e-01, -3.0446e-01, 1.2585e-01, 1.3078e-01, + -3.7640e-01, -4.2410e-01, 5.0738e-01, -2.4688e-01, -2.9159e-01, + 2.6960e-01, 4.7148e-01, -4.0152e-01, 2.0195e-01, 4.5138e-01, + -7.5467e-01, -8.1379e-01, -8.9726e-02, 4.4020e-01, 6.0406e-01, + -1.6593e-01, -6.8744e-01, 8.2714e-01, 2.3347e-03, -4.2319e-01, + 8.7532e-02, 3.5544e-01, -2.7449e-01, -2.0140e+00, 4.4710e-02, + -8.1419e-02, -4.1267e-01, 7.1687e-01, 4.5125e-01, -4.8160e-01, + -3.1118e-01, -1.5726e-01, -6.9149e-01, 7.0766e-01, -2.8900e-01, + 6.8034e-01, -3.8809e-01, -5.0950e-01, 1.0280e-01, 3.8500e-01, + -3.3726e-01, 4.1365e-01, -4.9350e-01, -3.2800e-02, -5.9575e-01, + 1.5721e-01, 1.9718e-01, 1.7395e-01, 8.4402e-01, -4.4557e-01, + -2.3550e-01, 1.6015e-01, -4.7043e-01, 1.8386e-01, -5.2999e-01, + -3.7753e-01, 9.7203e-04, -9.8730e-01, 2.4090e-01, 3.2854e-01, + -3.6399e-03, 5.6957e-01, 2.5796e-01, 3.6204e-01, 7.4543e-01, + -8.7224e-01, -2.4659e-01, 2.6866e-01, 4.1437e-01, -6.1573e-01, + -2.1760e-02, 2.4079e-01, 2.4883e-01, -1.2891e-01, -5.7589e-01, + -1.9098e-01, -5.2836e-01, 8.7724e-02, 8.1355e-01, 1.0105e+00, + 2.3312e-01, -5.4144e-01, -6.5276e-01, -2.7310e-01, 4.0889e-01, + 6.8733e-01, 2.3444e-01, 2.6451e-01, -4.0372e-01, -4.0228e-01, + -6.9071e-01, -9.1914e-02, -7.1768e-01, -3.9160e-01, 9.2464e-01, + 1.1736e-01, 9.9555e-01, -3.4335e-01, 6.8837e-02, 1.0661e-01, + 9.6046e-01, 1.9288e-01, 7.9258e-01, 5.5334e-03, 2.6709e-01, + -3.5906e-01, 1.0510e+00, 4.9558e-01, 1.5815e-01, 7.9056e-01, + -2.9611e-02, -4.7788e-01, -7.4477e-02, 7.0278e-01, 2.0944e-01, + -9.9172e-01, 2.9514e-02, -3.4982e-01, 5.8798e-01, 1.0294e-01, + -7.4728e-02, 4.0436e-01, -7.3965e-01, 1.1608e-01, 9.7668e-01, + 5.6120e-01, 6.9853e-01, -1.1240e+00, -8.3312e-01, 2.2103e-02, + -6.2014e-01, -1.0602e-01, -1.7596e-01, 5.0554e-01, -5.1151e-02, + -1.7238e-01, 6.7374e-01, -3.0443e-01, -4.7714e-01, 6.0438e-02, + 5.7751e-01, 2.2188e-01, 5.4365e-02, 6.0409e-01, 2.7528e-01, + 1.4016e-01, -3.6410e-01, 2.8936e-01, 7.7458e-01, 1.7155e-01, + -3.6958e-01, 4.1315e-01, 5.7424e-01, -3.4738e-01, 9.6110e-01, + 1.0386e+00, -2.5240e-01, 6.7732e-02, -3.1303e-01, 2.3925e-01, + 2.4488e-01, -2.8734e-01, 6.3480e-01, -2.5157e-01, 2.4225e-01, + -4.3591e-02, -2.9242e-01, -7.2588e-01, 1.2189e+00, 1.8182e-01, + -3.0529e-01, -2.6477e-01, 8.6763e-01, -8.6587e-01, -4.1271e-01, + 6.5369e-02, -2.0068e-01, 2.9647e-02, 5.5062e-01, 1.5341e-01, + -3.3696e-01, 1.0672e-01, 9.7665e-02, -8.4515e-01, -9.6087e-02, + 1.6451e-01, 6.5034e-03, -8.0948e-01, 1.7525e-01, 1.7383e-01, + -3.1217e-01, 5.1740e-01, -4.0527e-01, 2.9478e-01, 6.3920e-01, + 5.8840e-01, 4.5157e-02, 2.6356e-01, -5.4925e-02, -4.1065e-01, + 7.8009e-01, -1.6714e-01, -2.3860e-01, 7.1367e-01, 4.4418e-01, + 3.1620e-01, -2.0515e-01, 8.4948e-01, 3.2021e-01, 7.0564e-02, + 1.7485e-02, -2.4201e-01, -6.2400e-01, 7.2354e-01, 6.4737e-01, + -3.9444e-01, -4.5600e-01, 1.8392e-01, 1.3942e-01, -6.9550e-01, + 5.6519e-01, -3.4485e-01, -1.0811e+00, 2.2609e-03, -7.7264e-01, + -1.7520e-01, 2.9658e-01, 3.4381e-01, -2.5195e-01, 8.1881e-01, + 6.2122e-01, 1.8847e-01, 6.6781e-01, -2.9861e-01, -1.6733e-01, + 8.1848e-01, 5.7276e-01, 2.9802e-01, -5.0510e-01, 1.6306e-01, + 2.1710e-01, 8.4910e-01, 8.8421e-01, -4.5161e-01, -4.3119e-01, + -6.4349e-01, 4.1669e-01, -6.1655e-01, 1.6923e-01, -6.1267e-01, + -7.0699e-02, 4.5768e-01, 4.9780e-02, 5.7231e-01, 9.1744e-01, + -4.6267e-01, -1.1711e-01, 1.1242e+00, 8.5179e-01, -9.9988e-02, + 9.1609e-02, 2.5796e+00, -6.0888e-01, -2.4259e-01, -1.4821e-01, + 6.9903e-01, -4.4285e-01, -1.1718e-01, 4.0595e-01, -4.6315e-01, + -6.0281e-01, 2.5561e-01, -6.3263e-01, -8.4082e-01, -2.2050e-01, + 3.4140e-01, -5.1675e-01, 9.8693e-01, -9.4655e-01, 2.5957e-01, + -3.4720e-01, 5.9071e-01, 6.8600e-01, -3.4691e-01, 8.6725e-01, + 8.1034e-01, -6.1649e-01, -1.9221e-01, -6.0154e-01, 4.3298e-01, + 2.1760e-01, -5.0489e-02, 2.7929e-01, -3.3724e-01, 3.8897e-03, + -2.4199e-01, 7.9453e-02, 6.1785e-01, -4.8319e-01, -6.2845e-01, + -2.7386e-01, -4.2604e-01, -4.6069e-01, -6.7830e-01, -2.9960e-01, + -1.0792e-01, 9.2764e-01, -1.7870e-01, 4.6413e-01, -2.7450e-01, + -5.8849e-01, -4.7641e-01, -1.3057e-01, 5.1836e-01, 2.7529e-02, + 4.1431e-01, 1.7077e-01, 5.5491e-01, -9.9315e-02, -9.0158e-01, + 4.0775e-01, 2.8138e-01, 5.2781e-01, -2.2875e-01, 3.6168e-01, + 6.1371e-02, -3.9472e-01, 6.2913e-01, 9.8502e-01, 6.8541e-01, + 3.2804e-01, -4.7059e-01, -7.8782e-01, 4.4955e-01, -5.3380e-02, + 1.4579e-01, -5.4171e-01, -5.6868e-01, 9.9710e-01, 5.0651e-01, + 1.2785e-02, -6.9887e-01, 1.9614e-01, 4.3504e-01, 8.8311e-01, + 3.1585e-01, 2.0645e-01, 6.1529e-01], device='cuda:1', + requires_grad=True) +torch.Size([2304, 768]) +Parameter containing: +tensor([[-0.0039, -0.0192, -0.0103, ..., 0.0052, 0.0099, 0.0056], + [-0.0152, -0.0052, -0.0067, ..., -0.0012, -0.0065, -0.0135], + [ 0.0042, 0.0065, 0.0006, ..., 0.0062, 0.0171, 0.0149], + ..., + [-0.0145, -0.0176, 0.0174, ..., -0.0138, -0.0058, -0.0098], + [ 0.0048, 0.0245, 0.0091, ..., -0.0059, 0.0057, -0.0145], + [-0.0083, 0.0048, -0.0003, ..., -0.0104, 0.0195, 0.0123]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([2304]) +Parameter containing: +tensor([-0.3210, 0.2307, -0.0475, ..., 0.0169, -0.0356, -0.0140], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 768]) +Parameter containing: +tensor([[ 5.7831e-03, -8.9035e-03, 5.7757e-05, ..., 6.5651e-03, + -8.6365e-03, -6.7825e-03], + [-1.5414e-04, -4.9782e-03, 2.2430e-02, ..., -2.0050e-02, + -1.9369e-03, 7.0000e-03], + [-1.2711e-02, -7.1526e-03, -3.1647e-02, ..., 6.2637e-03, + -2.0340e-02, 1.4626e-02], + ..., + [-6.7062e-03, 5.0068e-04, -8.3008e-03, ..., 3.5477e-03, + 2.7447e-03, -2.1606e-02], + [ 2.0172e-02, -1.5497e-03, -1.4412e-02, ..., 5.5504e-04, + -1.2497e-02, 7.7095e-03], + [-5.1003e-03, 1.3168e-02, -4.6082e-03, ..., -8.7051e-03, + -2.3022e-03, 1.5236e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 2.8961e-02, 1.2787e-02, 9.3918e-03, -4.4594e-03, -2.9327e-02, + -1.2665e-02, -1.4824e-02, -5.9021e-02, -2.2614e-02, 4.1718e-02, + 1.3733e-02, -3.5095e-02, -3.7689e-02, -3.9642e-02, -2.7161e-03, + -2.5986e-02, 9.3460e-03, 2.6489e-02, -1.2917e-02, 4.2389e-02, + -2.0542e-03, 3.3264e-02, -7.0419e-03, -2.8320e-02, -4.1840e-02, + 5.9853e-03, 2.1957e-02, 2.8915e-02, 5.2856e-02, 2.3895e-02, + 6.1655e-04, 3.9032e-02, 1.3557e-02, 1.1131e-02, -1.9007e-03, + 4.7516e-02, 1.2726e-02, 3.2593e-02, 2.2568e-02, 3.3508e-02, + 2.0615e-02, -2.6016e-02, 1.1467e-02, -4.1313e-03, 2.7771e-02, + 6.9702e-02, 3.0533e-02, 8.4305e-03, 2.9709e-02, 1.2260e-02, + -1.7212e-02, -1.8600e-02, 2.7863e-02, 1.4679e-02, 6.6833e-02, + 6.7566e-02, -6.8817e-03, 8.4152e-03, 1.9699e-02, 9.3155e-03, + 2.4200e-02, -1.6876e-02, 1.3756e-02, 3.3142e-02, 4.0710e-02, + 1.9028e-02, 1.0834e-02, -1.3260e-02, -2.3842e-03, -6.6589e-02, + -2.7679e-02, 1.1187e-03, -5.0049e-02, -3.3020e-02, -1.6129e-02, + 2.2812e-02, 8.4839e-03, 2.3689e-03, -6.3354e-02, -3.5706e-03, + -1.9455e-02, -3.4142e-03, 3.6530e-02, -1.0811e-02, 1.4145e-02, + 2.9541e-02, 2.4918e-02, 2.1378e-02, -2.0828e-02, 3.6011e-02, + -5.6458e-04, 1.9064e-03, 1.2054e-02, -1.9730e-02, 2.1027e-02, + -1.3870e-02, -2.9053e-02, 6.4049e-03, -4.4525e-02, -2.8152e-02, + 1.3695e-03, 3.7842e-02, -3.5675e-02, 1.4519e-02, 3.6182e-03, + -7.3280e-03, 3.5858e-02, 2.4017e-02, 4.1351e-02, 3.2776e-02, + -6.4011e-03, 1.7242e-02, -1.5612e-03, 3.1311e-02, 2.5665e-02, + -6.5651e-03, 1.8250e-02, 1.1436e-02, 1.9958e-02, 2.0828e-02, + 3.8849e-02, -3.1082e-02, -4.8798e-02, -5.4993e-02, 3.0411e-02, + 3.0441e-02, 2.0538e-02, -3.9581e-02, -8.4534e-03, 4.3335e-03, + 6.3629e-03, -1.4992e-02, 5.9776e-03, 1.7700e-02, -2.4979e-02, + 5.0690e-02, 3.2063e-03, -3.2990e-02, -2.0859e-02, -6.4880e-02, + 1.0536e-02, 1.4133e-03, -2.2858e-02, 6.4468e-03, 2.6779e-03, + -1.7410e-02, -2.5543e-02, 5.3329e-03, 2.0813e-02, 3.3905e-02, + 6.7177e-03, 2.1088e-02, 4.3091e-02, 4.9164e-02, 2.2106e-03, + 8.6914e-02, -4.0970e-03, -1.0353e-02, 2.5604e-02, -2.4490e-02, + 8.5754e-03, 4.3945e-03, 3.2013e-02, 2.2629e-02, -1.7807e-02, + -1.9806e-02, -1.8753e-02, -1.6403e-02, -4.0649e-02, 6.3934e-03, + -2.0584e-02, 2.7649e-02, -3.3600e-02, 1.2939e-02, 2.8732e-02, + 5.0888e-03, 8.4915e-03, -3.0380e-02, -7.9193e-03, 2.8717e-02, + 4.5135e-02, -2.2354e-02, -2.2537e-02, 1.6815e-02, 1.0452e-02, + 5.0087e-03, -3.0308e-03, 4.1924e-03, -1.5198e-02, -2.9793e-03, + 7.6660e-02, -1.6541e-02, -2.8168e-02, -7.1220e-03, -3.7689e-02, + -2.5192e-02, -4.6310e-03, -4.5624e-02, -4.1260e-02, 5.1842e-03, + -1.3863e-02, 1.6113e-02, 2.9465e-02, -3.0327e-03, 1.0544e-02, + -1.9348e-02, 4.7394e-02, -1.9409e-02, -6.8054e-03, -3.7689e-02, + -1.0872e-02, 2.6123e-02, 2.0203e-02, 2.8961e-02, 4.1656e-03, + 2.5513e-02, 2.0142e-02, 7.9498e-03, 2.9739e-02, -7.3624e-03, + -1.0452e-02, -5.0507e-03, -2.3865e-02, 2.8076e-02, 1.7365e-02, + -5.5206e-02, 2.0554e-02, -4.1870e-02, 1.3680e-02, 1.2794e-02, + 1.3763e-02, 3.4698e-02, 1.2520e-02, -1.4160e-02, -2.3804e-02, + -5.1636e-02, 2.7771e-02, -2.7023e-02, 3.0563e-02, -1.5541e-02, + 2.4582e-02, -2.0844e-02, 2.2354e-02, -2.4078e-02, 4.7951e-03, + 5.1514e-02, 2.8427e-02, 6.7383e-02, -1.3474e-02, -3.5065e-02, + 3.7212e-03, 5.2277e-02, 1.4748e-02, -3.3264e-02, 2.5116e-02, + 5.3215e-03, -1.2360e-02, -9.1782e-03, -2.6123e-02, -1.5993e-03, + -3.6469e-02, 2.7084e-02, 9.0027e-03, -7.9498e-03, -4.9805e-02, + 2.5620e-02, 5.5027e-04, 4.5715e-02, -2.8946e-02, 3.6697e-03, + -1.2688e-02, 1.5526e-02, -5.7869e-03, -5.4413e-02, -1.0658e-02, + -7.0076e-03, 4.3427e-02, -5.3406e-02, 3.8666e-02, 1.4244e-02, + 3.2837e-02, 1.4427e-02, 1.3985e-02, -1.3618e-02, 6.5460e-03, + -2.6169e-02, -9.1858e-03, 2.1744e-02, 3.4790e-02, -4.5868e-02, + 3.6983e-03, -3.9520e-02, 5.7869e-03, 1.7136e-02, -7.0524e-04, + 3.6072e-02, -2.9099e-02, 1.3741e-02, -5.5786e-02, 1.8707e-02, + 1.7776e-02, 1.4793e-02, 3.0563e-02, -1.1759e-03, -7.0343e-03, + -1.8616e-02, -7.0477e-04, -3.0151e-02, 2.7039e-02, -1.0996e-03, + 4.2839e-03, 1.9363e-02, 1.9821e-02, -7.0686e-03, 2.9221e-02, + -2.9312e-02, -4.6692e-02, -4.4708e-02, 1.7822e-02, 3.0777e-02, + 2.0050e-02, 3.2135e-02, -1.4893e-02, 2.1439e-02, 3.8623e-01, + -7.0129e-02, -9.4788e-02, -5.1849e-02, 1.3657e-02, -5.4817e-03, + 2.8809e-02, 1.1253e-02, -9.3842e-03, 2.5925e-02, -2.6611e-02, + 9.3231e-03, -2.2339e-02, -1.2421e-02, 2.6398e-02, 1.1768e-03, + -4.1199e-02, 9.4757e-03, -2.1606e-02, -3.9093e-02, 3.1113e-02, + -4.8279e-02, 5.3986e-02, 4.7264e-03, -4.7668e-02, 2.0275e-03, + -4.2648e-03, -5.8990e-02, 5.2063e-02, 1.6937e-02, 7.5951e-03, + -9.3536e-03, 1.3672e-02, -2.8553e-03, -1.3069e-02, 3.7628e-02, + -2.0187e-02, 1.5152e-02, -5.2734e-02, 1.3647e-03, 4.1290e-02, + 1.5991e-02, 1.1473e-03, 6.7253e-03, -1.7715e-02, 4.7516e-02, + -1.0345e-02, -3.4058e-02, -1.2016e-02, -4.4861e-02, -1.4091e-02, + 3.7933e-02, -3.1647e-02, -4.6814e-02, 7.5798e-03, 7.0000e-03, + -6.3660e-02, 4.2480e-02, -4.9011e-02, -3.1586e-02, -1.9226e-02, + 1.3418e-03, -4.6692e-03, -3.4027e-02, 1.7273e-02, 6.9275e-03, + -1.3649e-02, 7.4654e-03, 2.4170e-02, 1.0431e-04, 6.6223e-02, + -2.2598e-02, 1.8219e-02, 5.1147e-02, 1.7303e-02, -4.0405e-02, + -6.0730e-03, -3.3691e-02, -1.9012e-02, 7.7019e-03, 2.1496e-03, + -6.3591e-03, -3.7003e-03, -2.0294e-02, 9.7179e-04, -1.9089e-02, + 4.1931e-02, -2.8214e-02, 1.6708e-02, -2.8549e-02, 3.8177e-02, + -1.2573e-01, 1.0773e-02, 1.9058e-02, 2.6188e-03, 2.8412e-02, + 2.8366e-02, 2.4776e-03, 1.2312e-03, -1.3908e-02, 1.8539e-02, + -3.2440e-02, 1.6312e-02, 1.5778e-02, 1.7212e-02, -6.2637e-03, + 1.9257e-02, 1.8768e-02, -1.8646e-02, 4.2877e-02, 7.7332e-02, + 6.6872e-03, -2.4597e-02, -1.7700e-02, 2.2888e-02, -2.9144e-02, + 3.9032e-02, -1.5167e-02, 1.4137e-02, -1.0522e-01, -3.0396e-02, + -2.9755e-02, 4.9553e-03, -4.7302e-03, 6.8359e-03, 2.0172e-02, + -1.7578e-02, -1.6190e-02, -6.3820e-03, 1.4679e-02, 2.4231e-02, + -2.3590e-02, 1.4351e-02, -4.9667e-03, 3.7964e-02, -2.4857e-02, + -4.9713e-02, 5.2002e-02, -1.8738e-02, -1.4105e-03, -1.1841e-02, + -4.4464e-02, -2.4124e-02, -6.8909e-02, -3.9978e-02, 3.4285e-04, + -1.1314e-02, 3.5262e-04, -7.2365e-03, -3.4210e-02, -1.7502e-02, + -9.1629e-03, 6.1005e-02, 4.6967e-02, 8.7662e-03, 4.1840e-02, + 4.6783e-02, -1.5045e-02, -5.5725e-02, -1.1307e-02, 9.8953e-03, + 2.8076e-02, -3.3783e-02, -1.9240e-04, -6.7101e-03, 6.9094e-04, + 2.4429e-02, 1.6830e-02, -3.5492e-02, 5.9624e-03, -1.6205e-02, + -2.9663e-02, -8.1100e-03, 1.1604e-02, 1.9363e-02, -1.1749e-02, + 7.3910e-05, -4.3793e-02, -3.8910e-02, 7.4005e-03, 1.4145e-02, + -8.0109e-03, 1.2535e-02, 2.8973e-03, 1.5526e-02, -3.7476e-02, + 8.3313e-03, 3.2196e-02, 4.2084e-02, 1.5343e-02, 3.4149e-02, + -6.2141e-03, -1.6312e-02, 2.6016e-02, 3.3051e-02, -4.0771e-02, + 2.5650e-02, 6.4964e-03, -2.9785e-02, -3.1403e-02, -3.9703e-02, + -1.0674e-02, -5.3101e-02, -3.0121e-02, 6.3972e-03, -2.8152e-02, + 6.0394e-02, -5.6427e-02, 4.2358e-02, -1.8692e-02, 2.8595e-02, + 2.9587e-02, -5.4512e-03, 3.2257e-02, -2.7817e-02, 7.8125e-03, + -1.3382e-02, 3.1891e-03, 7.9117e-03, 3.7537e-03, -3.0460e-03, + -5.0964e-02, 1.0399e-02, 2.2995e-02, -5.2109e-03, -6.6986e-03, + -4.9561e-02, 6.8932e-03, 6.7139e-03, 2.4902e-02, 1.6083e-02, + -8.6975e-04, -2.6413e-02, 2.0828e-02, 2.1286e-02, -1.0796e-02, + -1.5564e-03, -5.8655e-02, 1.8890e-02, -7.1907e-03, -2.8702e-02, + -6.8626e-03, 1.5297e-02, -3.2135e-02, -4.1931e-02, -1.3748e-02, + 4.0131e-03, -5.4779e-03, -7.3509e-03, 5.8258e-02, -3.4515e-02, + -2.0142e-02, -8.4763e-03, -4.4922e-02, 2.3926e-02, -2.6932e-02, + 2.0294e-02, 4.2084e-02, 1.6983e-02, -5.3467e-02, -2.2415e-02, + -4.5052e-03, -1.5137e-02, 1.5381e-02, 3.7231e-02, -1.3514e-03, + 3.6163e-03, 2.1103e-02, 2.0142e-02, 5.3040e-02, 3.6377e-02, + -7.5989e-03, 5.5008e-03, -1.7075e-02, -2.9892e-02, 1.0551e-02, + -3.6163e-02, -1.8738e-02, -5.9433e-03, -2.8381e-02, -2.8671e-02, + 1.4938e-02, 2.2995e-02, -5.3589e-02, 2.4155e-02, -2.2415e-02, + 6.8359e-03, -1.9028e-02, -3.9902e-03, -3.0121e-02, -2.0981e-03, + -6.4453e-02, 2.8778e-02, -3.8483e-02, 7.0419e-03, -4.4006e-02, + -4.5593e-02, 1.8921e-03, 2.4109e-02, -3.8391e-02, -5.6992e-03, + -2.4826e-02, 1.6586e-02, -3.5309e-02, -3.6430e-03, 1.2115e-02, + 5.4810e-02, -3.6560e-02, 2.7771e-02, -5.2643e-02, -3.8505e-04, + 2.2278e-02, 1.0681e-02, 4.1351e-02, 1.7939e-03, -1.9012e-02, + -1.8173e-02, -3.4882e-02, 5.5199e-03, -4.0497e-02, 6.7177e-03, + 2.4681e-03, 1.6891e-02, 2.5146e-02, 4.0070e-02, 7.3891e-03, + -2.0142e-02, -6.4125e-03, -7.8659e-03, -1.4868e-03, 7.1220e-03, + -2.9434e-02, -1.7654e-02, 1.4297e-02, 1.4519e-02, 1.7502e-02, + 1.0345e-02, -2.4170e-02, 2.0309e-02, 3.1097e-02, -1.1818e-02, + 3.6072e-02, 2.1362e-02, -3.7050e-04, -2.3483e-02, -3.8681e-03, + 2.2079e-02, -3.0991e-02, -3.6835e-02, -1.7578e-02, 5.9700e-03, + -2.9816e-02, -6.9122e-03, -8.8257e-02, -4.4594e-03, -1.8082e-02, + -3.7323e-02, 1.7603e-01, -8.0948e-03, -1.6403e-02, 1.6113e-02, + -7.1449e-03, 9.5444e-03, 4.4212e-03, 4.0619e-02, -2.9068e-02, + 2.0874e-02, -3.3356e-02, 5.0323e-02, 1.2749e-02, -4.8637e-03, + -5.3101e-03, 8.6594e-03, 8.5754e-03, 3.8567e-03, 4.4769e-02, + 1.1604e-02, 5.2551e-02, -2.6474e-03, -4.3091e-02, 8.9951e-03, + -3.3569e-02, 4.7951e-03, 1.8875e-02, -2.7618e-02, 5.1666e-02, + 3.7720e-02, 1.8768e-02, 3.1189e-02, -7.6103e-03, 1.9409e-02, + -2.0828e-02, -2.7084e-02, 3.2440e-02, 7.1182e-03, -6.8665e-02, + -4.0913e-04, 1.2344e-02, 1.5488e-02, 4.6229e-04, -4.8828e-02, + -2.3132e-02, 5.8136e-02, -1.5335e-02, 7.5226e-03, 2.4719e-02, + 2.4231e-02, 8.5754e-03, 2.9892e-02, 7.0534e-03, 3.0396e-02, + -4.3030e-03, 1.9150e-02, 1.6823e-03, 1.2360e-02, 2.5757e-02, + 1.2772e-02, -1.3840e-02, -4.5959e-02, -6.2256e-02, 4.0016e-03, + 1.6203e-03, -2.7069e-02, -6.0883e-03, -6.6284e-02, -2.9633e-02, + -3.8719e-03, 1.0887e-02, 2.1530e-02, -3.0304e-02, 2.3060e-03, + 3.8727e-02, -1.4484e-04, -1.0635e-02, -4.5395e-03, 6.2561e-03, + 4.6356e-02, 5.3864e-02, 6.8550e-03, 2.3193e-03, -2.4506e-02, + -1.9852e-02, 5.5122e-03, 8.6746e-03], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([1.4389, 1.5492, 1.4444, 1.4369, 1.4696, 1.5982, 1.4798, 1.4962, 1.5059, + 1.4201, 1.5556, 1.4056, 1.4634, 1.5500, 1.3880, 1.4895, 1.4535, 1.6242, + 1.4925, 0.4319, 1.4099, 1.5599, 1.5428, 1.4731, 1.5599, 1.4931, 1.5788, + 1.4391, 1.6308, 1.4246, 1.5024, 1.5215, 1.4943, 1.5267, 1.5500, 1.5011, + 1.4709, 1.5334, 1.5236, 1.4504, 1.5937, 1.4595, 1.5022, 1.5378, 1.5704, + 1.5227, 1.5181, 1.4495, 1.5018, 1.4275, 1.4919, 1.5308, 1.4562, 1.5681, + 1.5308, 1.4581, 1.4151, 1.5746, 1.5263, 1.5096, 1.5268, 1.5130, 1.3768, + 1.5480, 1.4876, 1.5399, 1.4630, 1.4655, 1.5274, 1.4453, 1.4500, 1.5840, + 2.3376, 1.5532, 1.5260, 1.4554, 1.4595, 1.5422, 1.4586, 1.5038, 1.5498, + 1.4491, 1.4934, 1.5165, 1.5430, 1.4469, 1.4588, 1.4770, 1.4158, 1.4627, + 1.5009, 1.4705, 1.5292, 1.5613, 1.5451, 1.4401, 1.5326, 1.4154, 1.4870, + 1.5448, 1.5034, 1.5059, 1.4505, 1.5611, 1.4406, 1.4348, 1.4452, 1.4387, + 1.4832, 1.5420, 1.5072, 1.5112, 1.4903, 1.4850, 1.4967, 1.5254, 1.4557, + 1.4718, 1.5248, 1.4829, 1.5204, 1.4817, 1.4837, 1.5054, 1.5230, 1.5211, + 1.4579, 1.5346, 1.5020, 1.4881, 1.4620, 1.4641, 1.4179, 1.4865, 1.3921, + 1.5071, 1.5182, 1.5576, 1.4620, 1.4443, 1.5125, 1.4419, 1.5722, 1.4472, + 1.4750, 1.5109, 1.4204, 1.5067, 1.5041, 1.4859, 1.5257, 1.4809, 1.4586, + 1.5638, 1.4914, 1.2267, 1.5272, 1.5182, 1.5851, 1.5238, 1.4617, 1.4648, + 1.4793, 1.5202, 1.4991, 1.5037, 1.4156, 1.5364, 1.4462, 1.5589, 1.5135, + 1.5042, 1.4492, 1.4746, 1.5703, 1.5694, 1.5920, 1.4744, 1.5622, 1.4728, + 1.4683, 1.5139, 1.4662, 1.4459, 1.5067, 1.4089, 1.4473, 1.9656, 1.4960, + 1.4923, 1.4494, 1.4368, 1.5290, 1.6168, 1.4908, 1.4451, 1.5191, 1.5695, + 2.9963, 1.4412, 1.4245, 1.5307, 1.5404, 1.6068, 1.4556, 1.5329, 1.3620, + 1.5110, 1.4351, 1.4680, 1.4922, 1.4024, 1.4582, 1.3978, 1.5052, 1.5299, + 1.4814, 1.4928, 1.4387, 1.6266, 1.4509, 1.5028, 1.3828, 1.4767, 1.5010, + 1.4637, 1.5415, 1.5347, 1.4912, 1.5763, 1.4288, 1.5410, 1.5556, 1.5185, + 1.4852, 1.5436, 1.5161, 1.5171, 1.4735, 1.5772, 1.5198, 1.4617, 1.5643, + 1.5262, 1.5619, 1.4639, 1.4431, 1.4988, 1.4283, 1.5170, 1.5242, 1.5298, + 1.3956, 1.4130, 1.5891, 1.5797, 1.4557, 1.4643, 1.4404, 1.4528, 1.5400, + 1.4213, 1.4825, 1.4541, 1.5162, 1.4745, 1.5409, 1.4437, 1.5563, 1.5220, + 1.4978, 1.4322, 1.5253, 1.4922, 1.5099, 1.4052, 1.4925, 1.4202, 1.4800, + 1.5443, 1.4790, 1.5203, 1.5604, 1.5245, 1.5537, 1.4405, 1.4779, 1.5587, + 1.5019, 1.4787, 1.4604, 1.5525, 1.6145, 1.4254, 1.4455, 1.4977, 1.4991, + 1.4780, 1.5286, 1.5118, 1.4985, 1.4406, 1.4756, 1.4888, 1.5407, 1.4589, + 1.4604, 1.5004, 1.4316, 1.5032, 1.5052, 1.5273, 1.4469, 1.4871, 1.4798, + 1.5064, 1.4596, 1.4211, 1.5157, 1.4834, 1.4306, 1.4624, 1.5115, 1.5144, + 1.0832, 1.4433, 1.5137, 1.4210, 1.4692, 1.4682, 1.5019, 1.5391, 1.4631, + 1.5099, 1.5117, 1.4275, 1.4380, 1.4603, 1.5109, 1.4955, 1.4468, 1.4753, + 1.4881, 1.5287, 1.4950, 1.5418, 1.4729, 1.5359, 1.5092, 1.5260, 1.5131, + 1.5075, 1.4478, 1.5727, 1.5024, 1.4902, 1.4793, 1.5357, 1.4492, 1.5143, + 1.4284, 1.4360, 1.5019, 1.5772, 1.5235, 1.4726, 1.5132, 1.4744, 1.5066, + 1.4778, 1.5114, 1.5889, 1.5091, 1.5202, 1.4418, 1.4208, 1.4492, 1.5511, + 1.5025, 1.4133, 1.5739, 1.5198, 1.4711, 1.5452, 1.5289, 1.4598, 1.5236, + 1.4898, 1.4469, 1.4270, 1.5182, 1.4165, 1.5311, 1.4409, 1.4210, 1.5046, + 1.4362, 1.5257, 1.6063, 1.5472, 1.4609, 1.6293, 1.4389, 1.4431, 1.4479, + 1.4984, 1.4706, 1.4638, 1.4930, 1.4133, 1.4526, 1.4260, 1.4582, 1.4404, + 1.2730, 2.3238, 1.5373, 1.5353, 1.4663, 1.5103, 1.4978, 1.4814, 1.4722, + 1.4676, 1.5279, 1.5444, 1.4845, 1.4893, 1.5243, 1.4853, 1.6193, 1.5047, + 1.5249, 1.4747, 1.4290, 1.4990, 1.5122, 1.4788, 1.4516, 1.4248, 1.4096, + 1.5029, 1.4956, 1.1624, 1.4618, 1.5129, 1.5083, 1.4871, 1.5393, 1.5344, + 1.4575, 1.5062, 1.5657, 1.4710, 1.4620, 1.4261, 1.4575, 1.4434, 1.5555, + 1.4455, 1.4711, 1.5682, 1.4642, 1.4363, 1.4250, 1.5141, 1.5018, 1.4898, + 1.4853, 1.5390, 1.5320, 1.4198, 1.4610, 1.4487, 1.5688, 1.5402, 1.5826, + 1.5324, 1.5616, 1.5575, 1.4719, 1.4533, 1.5122, 1.2552, 1.4833, 1.4171, + 1.4476, 1.4818, 1.4313, 1.5298, 1.5112, 1.5557, 1.4849, 1.5036, 1.4919, + 1.5493, 1.5096, 1.4353, 1.4918, 1.5246, 1.4687, 1.5117, 1.4312, 1.4720, + 1.4250, 1.5285, 1.4419, 1.5280, 1.4315, 1.4631, 1.4872, 1.5459, 1.6067, + 1.4754, 1.4919, 1.5278, 1.5539, 1.4711, 1.4978, 1.4965, 1.4713, 1.4707, + 1.5351, 1.4653, 1.4548, 1.4723, 1.4779, 1.4827, 1.5278, 1.5125, 1.4794, + 1.5375, 1.5805, 1.5112, 1.5454, 1.4380, 1.5259, 1.4707, 1.4437, 1.5549, + 1.6015, 1.4510, 1.4803, 1.3817, 1.5958, 1.4488, 1.5055, 1.5146, 1.5294, + 1.5361, 1.4402, 1.4803, 1.6003, 1.5086, 1.5265, 1.4244, 1.4718, 1.5720, + 1.5039, 1.4664, 1.4566, 1.3609, 1.4783, 1.4991, 1.4960, 1.6116, 1.5236, + 1.4866, 1.5166, 1.5421, 1.4833, 1.4550, 1.5109, 1.4517, 1.4751, 1.4535, + 1.4929, 1.5054, 1.4567, 1.4902, 1.4949, 1.4979, 1.5163, 1.5112, 1.4693, + 1.5772, 1.5449, 1.4362, 1.4490, 1.6147, 1.4914, 1.6548, 1.4735, 1.4384, + 1.4582, 1.4245, 1.5034, 1.4928, 1.4265, 1.5251, 1.5082, 1.5644, 1.5962, + 1.4326, 1.5006, 1.5278, 1.6330, 1.7276, 1.5964, 1.5586, 1.4862, 1.5098, + 1.5259, 1.5411, 1.5315, 1.5077, 1.5071, 1.5442, 1.6036, 1.5586, 1.4586, + 1.5649, 1.4736, 1.4905, 1.4643, 1.5023, 1.5050, 1.4885, 1.4790, 1.5414, + 1.5510, 1.4828, 1.3514, 1.4503, 1.5604, 1.5021, 1.5154, 1.5488, 1.4103, + 1.4800, 1.4122, 1.6422, 1.5825, 1.4450, 1.3146, 1.5293, 1.5225, 1.5205, + 1.5006, 1.4796, 1.5416, 1.5103, 1.5915, 1.4845, 1.5031, 1.5782, 1.5030, + 1.4565, 1.4765, 1.4934, 1.5769, 1.4670, 1.5313, 1.5007, 1.4586, 1.5791, + 1.5857, 1.4441, 1.5946, 1.4943, 1.4492, 1.4897, 1.5223, 1.4263, 1.4230, + 1.5314, 1.5365, 1.6068, 1.3709, 1.4172, 1.4926, 0.3475, 1.5345, 1.4760, + 1.4426, 1.4324, 1.5224, 1.5465, 1.5081, 1.4960, 1.5665, 1.5345, 1.5173, + 1.5048, 1.4816, 1.5093, 1.5004, 1.5425, 1.5010, 1.5042, 1.4334, 1.5301, + 1.4441, 1.4909, 1.3882, 1.5067, 1.5217, 1.5300, 1.4790, 1.4483, 1.5049, + 1.4574, 1.4995, 1.4635, 1.4108, 1.4958, 1.4742, 1.4559, 1.4804, 1.5102, + 1.4960, 1.4830, 1.5441, 1.4797, 1.4928, 1.4054, 1.2281, 1.5226, 1.5581, + 1.5337, 1.4610, 1.5058, 1.5363, 1.5828, 1.4924, 1.5631, 1.4330, 1.4567, + 1.5843, 1.4818, 1.4862, 1.4260, 1.5339, 1.4564, 1.5012, 1.4777, 1.4922, + 1.4950, 1.5238, 1.5327, 1.5206, 1.4421, 1.5222, 1.5461, 1.5083, 1.5667, + 1.5199, 1.4212, 1.5084, 1.5419, 1.5771, 1.4018, 1.5208, 1.3893, 1.5437, + 1.5531, 1.4182, 1.4350], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 9.5040e-02, 6.2264e-02, -1.1090e-01, -1.8604e-02, 8.0698e-02, + 1.3616e-01, 6.9032e-02, 3.6952e-02, -1.3561e-01, -2.6607e-03, + 5.8487e-02, -3.3613e-02, -8.9103e-02, -1.5545e-02, -9.2796e-02, + -5.0928e-02, -1.2088e-02, -2.7953e-02, -1.5458e-02, 4.8689e+00, + -9.4001e-02, 4.3498e-02, -1.5978e-02, 2.9513e-03, -5.3258e-02, + 1.4907e-02, -1.7444e-02, 5.4565e-03, 5.2729e-02, -8.5186e-02, + 4.1780e-02, 2.2448e-02, 2.6540e-02, 7.7438e-02, 5.7399e-02, + -3.7843e-02, 3.1810e-02, -3.5782e-02, -1.3307e-01, 4.2217e-02, + 4.0042e-02, 3.2673e-02, -1.7885e-03, 1.6705e-02, -4.0591e-02, + 3.4436e-03, 6.0339e-02, -2.1147e-02, 2.8313e-02, 5.5186e-02, + 1.9493e-02, -1.3385e-02, -7.6236e-03, 2.4183e-02, 9.9285e-02, + 8.9684e-02, -1.0770e-01, -3.9934e-02, 3.3857e-02, 1.4410e-02, + 6.5640e-02, -8.2179e-02, 4.2060e-02, 3.6734e-02, 9.0417e-02, + -3.1195e-02, 7.3319e-02, 9.6951e-02, 1.6255e-02, 2.2244e-01, + 4.1289e-02, 5.4411e-02, 4.8112e-01, 8.8067e-02, -1.0920e-01, + 9.3275e-02, 2.4059e-02, -1.5016e-01, -8.2098e-02, 1.9829e-02, + -2.0126e-02, -5.1567e-02, -2.6316e-02, 1.0021e-01, 4.6702e-02, + 5.0399e-02, 1.4458e-01, 1.1531e-01, -2.2371e-02, 2.3077e-02, + 2.5645e-03, 1.2902e-02, 2.5156e-02, -5.4277e-02, -2.7611e-02, + 6.6039e-02, -7.8203e-02, -2.1121e-02, -6.1290e-02, -5.3134e-02, + -3.1687e-03, -7.9709e-02, 8.7348e-03, -6.4941e-02, 4.9401e-02, + -8.7840e-03, -5.3413e-03, -8.4726e-02, 2.7666e-02, -3.4226e-02, + -6.2891e-02, -3.7197e-02, 3.3137e-02, 8.0032e-02, 1.5725e-02, + -4.9238e-02, -3.9217e-02, -7.3844e-02, -2.2711e-02, 1.0376e-01, + -2.0982e-02, -1.8312e-01, -7.5614e-02, -4.5955e-02, -1.4845e-01, + 3.5872e-04, -1.3807e-01, -6.7375e-03, 5.1624e-02, -4.4845e-02, + -1.1063e-01, 7.7368e-02, -4.5386e-02, -3.4162e-02, -6.7513e-02, + 5.0009e-02, -3.4471e-02, 1.9803e-02, -2.7813e-02, -2.6261e-02, + 2.8243e-02, -5.6916e-02, 7.3801e-02, 3.9814e-02, 6.2373e-02, + -1.2426e-02, -2.7946e-02, -1.9696e-02, 1.6410e-02, 1.0248e-01, + 1.8027e-01, -6.1605e-02, -9.9918e-02, -5.3417e-03, -3.2127e-02, + -2.8592e-01, -2.2483e-02, -7.7138e-02, 4.1591e-02, -7.1350e-04, + 1.4369e-01, -1.2360e-01, -2.4565e-02, 6.2265e-02, 1.0753e-02, + 4.7586e-03, -2.7731e-02, 4.4703e-02, -3.8944e-02, 2.0401e-02, + -5.4677e-02, 6.2356e-02, 3.8168e-02, 5.0544e-02, 6.1430e-02, + -8.3724e-02, 1.9396e-02, 6.3526e-02, -2.8940e-02, 2.4832e-02, + 7.3654e-03, -7.3348e-02, 5.7481e-02, -3.2183e-02, 1.5765e-02, + -1.6311e-01, 3.8052e-02, 3.4197e-04, -3.0377e-02, -7.3608e-02, + 3.8378e-02, -2.0785e-02, 9.1732e-02, 6.0364e-02, -1.0010e-01, + 3.6494e-02, -5.0895e-02, -1.4789e-01, 8.7741e-02, 2.5649e-02, + -1.3797e-01, -3.8446e-02, -3.6134e-02, 1.7410e-02, 4.8862e-02, + 1.1463e-01, -9.7149e-02, 6.3472e-02, 1.2722e-01, 2.0616e-02, + 1.0777e-02, 1.9428e-01, -2.0818e-02, -5.5920e-02, -9.7729e-02, + -1.9469e-02, -2.6490e-02, 3.9081e-02, -1.1816e-01, -2.0222e-01, + -1.7587e-02, -3.8713e-02, 8.3494e-02, -2.2572e-02, 3.5223e-02, + -3.0260e-02, 9.7330e-02, -1.5827e-02, 1.2929e-01, -2.1608e-02, + -1.2937e-02, 2.7590e-02, -1.8385e-02, 6.9238e-02, 1.2346e-02, + 2.1508e-02, -8.5435e-02, 3.3443e-02, 7.3312e-02, 1.8266e-02, + 8.2059e-02, 8.2763e-02, 4.0149e-02, 3.6304e-02, 3.5831e-02, + 6.1176e-02, 7.7396e-02, -4.7230e-02, 3.2182e-02, -3.1280e-03, + 4.4441e-02, 4.4095e-02, 5.9928e-02, -7.2963e-02, 7.7081e-02, + -7.7831e-02, -6.3993e-02, -7.7120e-02, 1.9159e-02, 3.0940e-02, + -4.3952e-02, -6.4650e-02, -3.4125e-02, -5.8668e-02, -2.7959e-02, + 4.8970e-02, -5.8047e-02, -8.7014e-03, -7.8959e-02, -2.2804e-02, + -3.4480e-02, 9.5613e-03, 7.5918e-02, -6.5928e-02, 1.3879e-02, + 4.7323e-02, -1.1317e-01, -2.2184e-03, 6.7904e-02, -1.0016e-01, + 1.8474e-01, 1.4786e-01, 7.0485e-03, 2.2563e-02, 5.1969e-02, + 4.1149e-02, 8.6695e-02, -6.1285e-02, 1.0310e-01, -1.0924e-01, + -2.6213e-02, -1.9226e-02, 1.0561e-01, 4.9150e-02, 6.3478e-02, + 1.8294e-02, 1.6091e-01, 4.7983e-02, 4.8363e-02, 7.2620e-02, + 5.2564e-02, 3.9703e-03, -4.5733e-02, 1.0324e-01, -9.4249e-03, + 1.3086e-02, 1.1960e-01, -8.4199e-02, 6.5710e-02, -5.5719e-02, + 3.2044e-02, -4.8804e-03, -7.7953e-02, -7.9623e-02, -8.9439e-03, + -7.2873e-02, 7.2873e-04, 1.8083e-02, 1.4896e-01, -4.6932e-02, + -5.7677e-02, 2.1030e-01, -8.1739e-02, -4.1775e-02, 1.2972e-01, + -1.9701e-01, 4.4433e-02, 1.9681e-02, 3.2872e-02, -1.5593e-01, + 1.5408e-01, -2.0397e-02, 1.2069e-01, -3.3059e-02, 2.9468e-02, + 1.2204e-01, -3.2076e-03, -5.6352e-02, 9.4064e-03, -3.4305e-02, + -1.5335e-01, 2.1700e-02, -2.2277e-02, -6.3310e-02, 9.7901e-02, + 8.1202e-03, 8.5831e-02, 9.2371e-02, 5.5351e-02, -2.6530e-02, + -6.1205e-02, 3.0950e-02, -2.3011e-02, 4.4549e-02, -2.0652e-02, + -5.5406e-02, -3.3195e-02, 5.0550e-02, -1.2650e-02, -9.6969e-02, + 1.5714e-02, 1.8129e-02, 9.1402e-03, -9.1455e-02, -4.2209e-02, + -5.1691e-03, 3.0798e-02, 8.4623e-02, 4.9172e-02, 2.8383e-02, + 9.3418e-02, -2.3945e-02, -7.1348e-02, 2.9395e-03, -3.3130e-02, + 1.6025e-02, 7.9770e-02, 7.0224e-02, -2.8055e-02, 2.4734e-02, + -9.9713e-02, 9.2963e-02, 1.6934e-02, -4.6360e-03, -2.3557e-02, + -4.9357e-03, -2.3687e-02, -7.7652e-02, -8.3567e-02, -1.0004e-01, + 3.3868e-02, -5.1590e-04, -8.0284e-02, -1.6152e-02, 4.4573e-02, + -3.8656e-03, 1.1956e-02, 5.6796e-02, 2.0969e-02, 7.8286e-02, + 1.1165e-01, -6.3958e-02, 7.6898e-02, 4.6967e-02, -1.0099e-01, + -1.3392e-01, -5.2005e-02, 3.4906e-02, 1.6815e-02, -3.4399e-02, + -7.8110e-02, 5.6494e-02, 6.3118e-02, -5.4425e-02, -1.3512e-02, + -1.3473e+00, -3.9723e-02, 2.4948e-02, -5.1947e-02, -7.7290e-03, + -1.5215e-02, 2.3884e-02, -1.6945e-02, -8.0580e-02, -5.4007e-02, + 2.1998e-02, -1.8827e-01, -8.7725e-02, 5.0141e-02, -6.8379e-02, + -2.7706e-02, -1.7930e-02, -3.4555e-02, 6.4826e-02, 9.8114e-02, + -1.2598e-02, 3.6801e-02, 3.4150e-02, 1.1018e-01, -7.9386e-02, + -4.1934e-02, -8.4118e-02, -1.8066e-02, 1.6404e-01, 1.1735e-01, + -1.3154e-01, -1.0468e-02, -7.2428e-02, 1.0006e-01, 2.2076e-02, + 3.2061e-02, 2.0222e-02, -1.0035e-01, 5.2576e-02, 2.7860e-02, + -2.0873e-02, 3.7349e-02, 2.0231e-02, -4.2276e-02, -7.7922e-02, + 3.1693e-02, 3.0677e-02, -9.3555e-02, 3.2186e-02, 4.0509e-02, + 1.8245e-02, 4.8887e-02, -7.9230e-02, 7.1811e-02, 2.9829e-02, + 1.9192e-02, -1.1067e-01, -8.1617e-02, 2.2437e-02, -2.6069e-02, + 4.3656e-02, 1.6310e-01, 9.6680e-02, 1.0566e-02, 9.1838e-02, + 1.1688e-01, -1.2390e-01, 3.2972e-02, 1.2624e-01, -9.4054e-02, + -4.5634e-03, -1.5043e-02, 1.2885e-01, 9.9710e-02, 3.7776e-03, + 4.5239e-02, -9.0217e-03, 4.9700e-03, -2.4734e-02, -1.2538e-01, + 4.6608e-02, -8.2679e-02, 9.4270e-02, -7.2957e-02, 5.8114e-03, + 5.3164e-04, -4.3835e-02, 3.8315e-02, -4.7578e-02, -8.0964e-02, + 3.8714e-02, 1.0235e-01, 7.0860e-02, 3.7793e-02, 2.9038e-02, + -7.7706e-02, -1.6005e-02, -4.0638e-02, 7.4940e-02, -9.7327e-02, + 1.2264e-03, 1.6966e-02, 1.9095e-01, 1.3447e-01, -1.0382e-01, + -6.7178e-02, -1.4883e-02, 6.4356e-02, -3.1611e-02, -3.0012e-02, + -2.2480e-02, -1.9876e-02, -2.2346e-02, 7.6023e-03, -7.9597e-02, + 2.8261e-02, 5.0167e-02, -1.8636e-02, -1.0160e-01, -3.2626e-02, + 3.7352e-02, -1.4968e-02, 2.9950e-02, -5.7603e-02, -7.1363e-02, + -3.4022e-02, 3.4849e-02, 3.3342e-02, 3.8673e-03, -1.7286e-02, + -4.4154e-02, 6.3981e-02, 2.6404e-02, 7.4898e-02, 3.6818e-03, + 3.2078e-02, 1.3596e-02, -1.0983e-01, 4.5060e-02, -3.5186e-03, + -5.2234e-03, -1.4830e-02, -4.7432e-02, -1.6053e-03, 1.2008e-01, + -6.6264e-02, 1.1926e-01, 2.6320e-02, 2.2969e-02, 1.5679e-01, + 3.5541e-02, -2.1966e-02, 3.5414e-02, 1.3858e-02, 2.8476e-02, + 7.5914e-02, -1.7632e-02, -1.0996e-01, -2.1218e-02, 4.4041e-02, + -1.6149e-02, -7.5358e-03, 6.0889e-02, 4.7087e-02, -1.8710e-02, + -1.2684e-01, -2.2078e-02, 2.9795e-02, 1.0746e-01, 8.8426e-02, + 2.8203e-02, 4.2237e-02, -5.8773e-02, 4.3712e-02, -5.0027e-03, + 1.5752e-02, 6.0129e-02, 1.2071e-01, -2.2050e-02, 1.1836e-02, + -5.5813e-02, 4.6402e-02, 9.4760e-03, -7.1275e-03, 5.8639e-02, + -9.9549e-02, -1.0721e-01, 5.5813e-04, -1.2840e-04, 2.6538e-02, + -8.8939e-02, 7.2796e-02, -8.9444e-02, 1.8669e-02, -2.8006e-02, + 7.6746e-02, 3.4517e-02, -2.7705e-02, -2.0928e-02, -6.2216e-02, + -1.2193e-01, 4.9761e-02, 6.8638e-02, -7.1114e-03, -9.1348e-02, + 5.0051e-02, -4.5998e-04, 4.2629e-02, 3.8347e-02, 5.8768e-03, + -3.8492e-02, -8.8272e-04, 3.2111e-02, 9.0239e-02, -3.9676e-02, + 1.2868e-01, -2.6537e-02, 1.0337e-01, 1.7039e-01, 3.4185e-02, + -5.4864e-03, -6.1429e-02, 4.7363e-02, 5.8705e-02, -4.7458e-02, + 3.6429e-02, -2.7022e-02, 1.0612e-01, -5.0205e-02, 3.3695e-02, + 2.0430e-02, 3.3421e-02, 9.1960e-02, 5.0020e-02, 1.0917e-01, + 1.3181e-01, 2.9231e-02, -6.8652e-02, 7.7897e-02, -9.2801e-02, + 4.2380e-02, -1.3279e-01, -2.2904e-02, 5.7570e-02, 8.7368e-02, + -4.0879e-02, 6.1447e-03, -1.1034e-01, -1.8915e-02, 1.1407e-01, + 3.7383e-02, -1.3176e-01, -8.9242e-02, 5.5264e-02, 9.7983e-03, + -2.2093e-01, 1.1690e-02, 2.6277e-02, -2.1914e-04, -2.7651e-02, + 9.1245e-03, -1.7339e-02, -4.2814e-02, -5.7284e-03, 7.8398e-02, + 5.2745e-03, -4.2589e+00, -6.7703e-04, 3.3652e-02, -2.4619e-02, + -9.9009e-02, -3.9888e-02, -4.6650e-02, -4.1620e-02, 3.5950e-02, + -8.1256e-02, -2.8324e-02, 4.3199e-02, -9.3154e-03, -3.0566e-02, + 5.8488e-02, 3.1941e-02, -9.1054e-02, 4.4687e-02, -7.7019e-02, + 7.2983e-02, 4.5990e-02, -4.4156e-02, 1.3127e-02, -1.6789e-01, + -2.4004e-02, 7.0575e-02, 5.2642e-02, 3.7899e-03, 4.3697e-02, + -2.6423e-02, 8.3382e-02, 5.5484e-02, 3.6222e-02, 3.1565e-02, + 6.2682e-03, 1.0727e-03, -1.8432e-02, -7.0222e-02, -4.8251e-04, + 2.5970e-02, 2.0257e-02, 5.5975e-02, -5.8172e-02, 6.7692e-02, + -9.9726e-03, -1.9012e-01, 1.2354e-01, 2.4521e-02, 1.1634e-01, + 1.0389e-01, 3.1179e-02, 7.4662e-02, 6.8046e-02, -2.9729e-02, + 2.2585e-03, 1.1500e-02, 1.7597e-03, -1.8727e-02, -4.7353e-02, + 1.0485e-01, -1.6798e-01, -6.5276e-02, 7.2302e-02, -4.9853e-02, + -3.5061e-02, -1.2070e-01, -5.1736e-02, 7.9296e-03, -5.0152e-02, + -3.6957e-02, -5.1987e-03, -5.0143e-02, -5.8339e-02, 7.5306e-02, + -2.9533e-02, 7.2287e-02, 9.0682e-02, 2.2872e-02, -5.3548e-02, + 2.0132e-02, 6.3997e-02, 6.5266e-02, -6.9182e-02, -1.0422e-01, + -7.9938e-02, 1.0629e-01, -3.4558e-02], device='cuda:1', + requires_grad=True) +torch.Size([3072, 768]) +Parameter containing: +tensor([[-2.1896e-02, 2.0416e-02, 2.3441e-03, ..., 1.0908e-04, + 6.9656e-03, 1.4427e-02], + [ 3.4351e-03, -6.1264e-03, 2.0737e-02, ..., -1.4359e-02, + -2.4033e-02, -1.4053e-02], + [ 1.8631e-02, -2.3880e-02, -5.8861e-03, ..., 5.5122e-03, + -3.2663e-04, -2.0580e-03], + ..., + [-7.4425e-03, -9.9869e-03, 1.6281e-02, ..., 2.2583e-02, + 1.5378e-05, 1.3184e-02], + [ 7.1678e-03, -2.3453e-02, 1.8890e-02, ..., 1.1101e-02, + 3.5305e-03, 3.5629e-03], + [-1.4053e-02, -1.7029e-02, -9.1400e-03, ..., -1.0704e-02, + 1.3428e-02, 3.3951e-04]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.3743, -0.4089, -0.3171, ..., -0.2976, -0.0021, -0.3103], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 3072]) +Parameter containing: +tensor([[-0.0040, 0.0537, 0.0006, ..., -0.0110, 0.0033, 0.0026], + [ 0.0102, -0.0116, -0.0276, ..., -0.0249, -0.0013, 0.0136], + [-0.0247, 0.0174, -0.0146, ..., -0.0083, -0.0184, -0.0121], + ..., + [-0.0018, -0.0283, 0.0097, ..., 0.0254, -0.0131, 0.0048], + [ 0.0004, 0.0063, -0.0341, ..., -0.0153, 0.0024, 0.0111], + [-0.0007, 0.0055, -0.0035, ..., -0.0027, -0.0048, -0.0002]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 5.4077e-02, 1.3013e-01, -5.7983e-02, 1.5717e-02, 3.2715e-02, + 9.5520e-02, 1.2794e-02, -6.7444e-02, -1.2537e-01, 6.1417e-03, + -2.1545e-02, -5.6244e-02, -1.3098e-01, -8.5876e-02, -1.1154e-02, + -6.3354e-02, -1.7914e-02, -1.3092e-02, -3.7479e-03, -9.3323e-02, + -6.9122e-03, 1.2047e-02, 4.1595e-02, -1.2341e-01, 5.6030e-02, + 1.6724e-02, 6.0059e-02, 8.7769e-02, -5.6396e-02, 4.5135e-02, + 4.3274e-02, 9.0881e-02, -3.3398e-03, 6.0730e-02, 5.9242e-03, + -5.7678e-03, -8.1558e-03, -3.1082e-02, 5.3253e-02, 7.5500e-02, + 3.5614e-02, 2.6764e-02, 2.6077e-02, 2.8214e-02, -2.6535e-02, + 7.6233e-02, 4.6722e-02, 8.8577e-03, 2.0466e-03, 3.2928e-02, + -1.6464e-02, -3.5736e-02, 1.1829e-01, 2.7435e-02, 8.1360e-02, + 1.0065e-01, -2.0920e-02, -6.6109e-03, 3.3813e-02, -1.5533e-02, + 4.5837e-02, -7.1869e-03, 7.8430e-03, -3.3783e-02, 6.3721e-02, + 7.6782e-02, 4.5715e-02, -4.1016e-02, 4.4861e-02, 1.8143e-02, + -4.2053e-02, -3.8757e-02, 3.9459e-02, 4.6356e-02, -7.9651e-02, + -2.8778e-02, 1.2016e-03, 2.1317e-02, -1.1902e-01, -4.4128e-02, + -1.7929e-02, 3.0701e-02, 2.0523e-03, 3.7689e-02, 5.8594e-03, + -4.1199e-02, 4.6661e-02, 6.6071e-03, -5.0049e-02, 8.2016e-03, + -3.7476e-02, 6.2943e-03, -7.1754e-03, -8.7402e-02, -9.7504e-03, + 5.4092e-03, 1.8677e-02, -2.2797e-02, -1.3171e-01, -5.3009e-02, + -7.5150e-03, -5.6610e-02, -5.5656e-03, 4.2267e-03, -1.4664e-02, + -2.2949e-02, 1.0779e-01, 5.8380e-02, 3.7518e-03, 1.9089e-02, + -1.2642e-02, 4.3121e-02, 6.2134e-02, 9.3506e-02, 1.0779e-01, + -2.4292e-02, -4.2381e-03, -6.3324e-04, 8.0627e-02, 7.5951e-03, + 2.9419e-02, -1.2256e-01, -1.1737e-01, -3.6621e-02, 4.0970e-03, + 3.1769e-02, -3.0045e-02, 2.9526e-02, 5.0812e-02, -1.9028e-02, + 1.6342e-02, -1.6373e-02, -3.4607e-02, -5.0140e-02, 5.9967e-02, + 1.6113e-01, 2.9709e-02, 3.1261e-03, -4.0771e-02, -4.9377e-02, + -9.1324e-03, 1.8524e-02, -4.6753e-02, 7.7759e-02, -2.4277e-02, + 1.5205e-02, -5.0873e-02, 3.6407e-02, 1.1772e-02, 6.4819e-02, + 6.5430e-02, -7.9773e-02, 3.2196e-02, 7.5806e-02, -2.5482e-02, + -7.5073e-03, -7.5684e-02, 4.1138e-02, 2.2705e-02, -3.2776e-02, + 3.8452e-02, 6.3801e-04, 6.2866e-03, -1.5839e-02, -3.5675e-02, + 1.3695e-02, -4.1656e-02, 8.6441e-03, -6.6895e-02, 2.8839e-02, + 1.4000e-03, 2.5787e-02, -1.7883e-02, 1.1032e-02, -1.0094e-02, + -2.7054e-02, 1.2245e-02, -2.2964e-02, -3.4332e-02, 5.9326e-02, + 9.2239e-03, -1.3293e-01, -7.0312e-02, -3.7109e-02, 1.0480e-01, + -2.2598e-02, -1.0231e-02, 3.6597e-01, -3.4851e-02, 4.1275e-03, + 5.7281e-02, -8.4778e-02, 2.5238e-02, 1.2749e-02, -2.2781e-02, + 4.7028e-02, 7.9575e-03, -1.0938e-01, 1.2695e-01, 2.5497e-02, + -7.2571e-02, 6.2744e-02, 4.2053e-02, 9.9335e-03, 3.8574e-02, + 4.9225e-02, 5.7495e-02, 3.7506e-02, -3.7750e-02, -9.9609e-02, + 5.7495e-02, 1.9272e-02, 2.8351e-02, 3.5667e-03, 3.9697e-04, + 2.4910e-03, 9.3384e-03, 5.7556e-02, -8.8989e-02, -5.3741e-02, + -1.5190e-02, -3.2867e-02, 3.6682e-02, 9.9060e-02, 2.8717e-02, + -1.0742e-01, -4.7791e-02, -2.6199e-02, 5.9601e-02, 6.9618e-03, + 2.2018e-02, 1.8509e-02, -1.4549e-02, -1.3170e-03, -3.5217e-02, + -5.4840e-02, -4.6570e-02, 2.1164e-02, 6.7932e-02, 3.7109e-02, + 3.8696e-02, -2.5635e-02, 9.4543e-02, -2.4261e-02, 1.5930e-02, + 4.0863e-02, 4.0527e-02, -1.0506e-02, -5.0842e-02, 7.0114e-03, + 2.9755e-02, 2.1561e-02, 8.7524e-02, -1.0522e-01, 1.0016e-01, + 5.6305e-02, -4.6234e-02, -1.5442e-02, -2.8503e-02, -1.6332e-04, + -5.5054e-02, -2.4738e-03, -4.9896e-02, -1.0181e-01, -1.2988e-01, + -3.0304e-02, 1.6689e-03, 1.8372e-02, -3.8300e-02, -1.8911e-03, + -8.8654e-03, 8.3740e-02, 5.7800e-02, -1.5884e-02, -7.7576e-02, + 5.2673e-02, 3.2654e-02, 9.7046e-03, 4.5502e-02, -5.1422e-02, + 1.2772e-02, 4.8584e-02, -4.2969e-02, 1.9821e-02, -2.5436e-02, + -2.6970e-03, -9.7885e-03, -2.3636e-02, 3.4607e-02, -6.4636e-02, + 6.8481e-02, -3.0792e-02, 3.5858e-02, -6.0699e-02, 3.0121e-02, + 4.3793e-02, 5.9090e-03, 6.3232e-02, 9.8190e-03, 5.9937e-02, + -4.1847e-03, 6.5125e-02, -3.9856e-02, 2.0767e-02, -6.0730e-02, + -3.4851e-02, 7.5874e-03, -7.2937e-02, -4.6158e-03, -1.9928e-02, + 9.6497e-02, 7.0251e-02, 3.7750e-02, -3.4973e-02, 5.8075e-02, + -3.2928e-02, -6.7566e-02, -2.8946e-02, 8.9600e-02, 7.0740e-02, + -1.3000e-01, 3.0487e-02, -4.3457e-02, -2.4277e-02, 3.4985e-01, + -1.3965e-01, -6.5369e-02, -9.9548e-02, 3.9764e-02, -1.6895e-01, + 8.6853e-02, -6.1554e-02, 6.6452e-03, -5.3589e-02, -3.1342e-02, + 4.0321e-03, -5.3223e-02, 5.7159e-02, 2.5040e-02, -4.1748e-02, + -4.8035e-02, -1.7853e-02, -1.1917e-02, 1.6266e-02, 1.2947e-02, + -2.1835e-02, 8.0872e-03, 4.2450e-02, -1.1212e-01, -7.9285e-02, + 7.2083e-02, 3.1769e-02, 3.6835e-02, -1.0254e-02, -5.7716e-03, + -5.8929e-02, -4.0039e-02, -2.1820e-02, -2.5192e-02, 7.8430e-03, + -1.0059e-01, -6.3782e-02, -8.7280e-02, 1.7120e-02, 4.0039e-02, + -1.1322e-02, -1.0315e-01, 1.0278e-01, -7.9468e-02, 4.2999e-02, + -1.4030e-02, -6.9885e-02, -2.0172e-02, -6.4087e-02, -2.1561e-02, + 2.7435e-02, 2.4155e-02, 3.0685e-02, 4.2053e-02, -4.9347e-02, + -6.3416e-02, 3.6438e-02, -6.3232e-02, -8.7097e-02, -2.8366e-02, + -2.8931e-02, -7.0435e-02, -3.1204e-02, 3.8055e-02, -3.4760e-02, + -3.0624e-02, 7.2327e-02, 7.9041e-02, 5.4359e-03, -1.2573e-02, + -8.9905e-02, -2.4567e-02, 2.7084e-02, -2.0187e-02, 4.4556e-03, + 7.3730e-02, 6.7825e-03, 2.8248e-03, 6.6986e-03, 7.2746e-03, + -2.2919e-02, -9.0182e-05, -8.2169e-03, -5.8716e-02, 1.3069e-02, + -1.4679e-02, 4.9530e-02, 2.9053e-02, -4.5685e-02, 3.2463e-03, + -1.0907e-01, -3.2288e-02, -1.1620e-02, -3.1311e-02, 8.2031e-02, + 1.0834e-02, -3.6526e-03, 2.4414e-02, -1.3501e-01, 9.6207e-03, + 1.9760e-02, 1.4168e-02, -4.5715e-02, 8.3679e-02, 1.8021e-02, + 2.8198e-02, 2.4445e-02, 5.9471e-03, 9.4849e-02, 1.0663e-01, + 1.8204e-02, -5.3467e-02, 4.1718e-02, 7.1960e-02, -1.2573e-01, + 6.1646e-02, -2.7527e-02, -5.3162e-02, 2.3518e-03, -3.5828e-02, + -6.7505e-02, -8.8787e-04, 9.1410e-04, 3.1921e-02, 3.0121e-02, + -3.2654e-02, -6.5002e-02, -6.7810e-02, 1.0950e-01, 3.9642e-02, + 6.2180e-03, -2.6733e-02, 2.5520e-03, -1.6983e-02, -8.4595e-02, + -1.6708e-02, 5.6396e-02, -7.7637e-02, 1.9348e-02, -3.6469e-02, + -7.5317e-02, 1.0544e-02, -4.8218e-02, 4.8027e-03, -6.1371e-02, + -5.4138e-02, -4.2664e-02, -7.4280e-02, -5.8807e-02, -5.2368e-02, + 2.7435e-02, 1.2366e-01, 3.3997e-02, 5.6122e-02, 2.0584e-02, + 8.3618e-02, -1.7443e-03, -3.7270e-03, -1.6403e-02, -2.6951e-03, + -1.7853e-02, -5.8105e-02, 9.6252e-02, 2.5726e-02, -6.2744e-02, + 5.7373e-03, 5.4840e-02, -1.0388e-01, 5.1575e-02, -4.5685e-02, + 2.1408e-02, -3.2715e-02, 6.8665e-02, 1.9913e-02, 3.2898e-02, + 3.3325e-02, -2.5101e-02, 1.5900e-02, -2.3422e-02, 4.2328e-02, + -3.1830e-02, 8.2031e-02, 6.8665e-02, -2.9129e-02, -8.8684e-02, + -4.8981e-02, -5.0171e-02, 1.8295e-02, 4.5891e-03, 4.2511e-02, + -2.2690e-02, 3.3752e-02, 8.6914e-02, 8.7463e-02, -9.0942e-02, + 3.5004e-02, 1.8845e-02, 3.2898e-02, -5.1819e-02, -4.3121e-02, + 1.3115e-02, 2.9495e-02, 1.5961e-02, -9.1095e-03, -4.0741e-02, + 6.4758e-02, -1.4880e-01, 4.6204e-02, -2.0798e-02, 1.3634e-02, + 4.1473e-02, -2.6871e-02, 1.2810e-02, -5.9021e-02, -4.3793e-02, + -1.5961e-02, 1.0028e-01, -2.3091e-04, 8.6670e-03, 1.3039e-02, + 1.0872e-02, 1.1694e-01, -1.8494e-02, 1.4246e-04, -3.5439e-03, + -9.9792e-02, -5.7251e-02, -3.1311e-02, 7.8857e-02, -1.2369e-03, + -4.5319e-02, -8.3740e-02, -1.8402e-02, -4.1321e-02, 6.1646e-02, + 1.0004e-03, -8.3237e-03, -8.2321e-03, 1.5274e-02, -6.9031e-02, + 1.0010e-02, 7.0992e-03, -7.8735e-02, -4.0985e-02, -1.7624e-02, + 5.8502e-02, 5.5878e-02, -3.2593e-02, 1.1520e-02, 2.4986e-03, + 8.0017e-02, -3.4580e-03, -3.2471e-02, -1.1490e-02, -1.7075e-02, + -7.3914e-02, 4.4220e-02, 1.0443e-01, -1.5915e-02, -1.0651e-02, + -1.4099e-02, -4.8737e-02, 4.7363e-02, 6.4575e-02, 1.1938e-01, + 1.6205e-02, 4.7150e-02, 2.2629e-02, 2.3514e-02, -5.4436e-03, + -3.2735e-04, -1.6495e-02, -7.3853e-02, 3.0045e-02, 5.2551e-02, + -5.2246e-02, -1.0040e-01, 2.6993e-02, -5.7159e-02, 3.2616e-03, + 3.1372e-02, 6.9275e-02, -7.4280e-02, 4.3976e-02, -3.7323e-02, + -5.0018e-02, 6.4575e-02, -1.2062e-02, 3.6926e-02, 3.2349e-02, + -3.4088e-02, 6.3293e-02, 1.2466e-02, -5.5664e-02, -2.7481e-02, + -4.1656e-03, -9.3412e-04, 4.5753e-04, -2.2842e-02, 2.7985e-02, + -9.8724e-03, 5.6267e-04, 2.1423e-02, 4.3091e-02, -1.7273e-02, + 1.1279e-01, -9.0759e-02, 5.3864e-03, 5.6061e-02, 5.5939e-02, + -2.3941e-02, -3.7170e-02, 5.0903e-02, 1.8997e-02, -4.5532e-02, + 1.2476e-01, 3.7060e-03, -1.0094e-02, -7.6111e-02, -3.5706e-02, + -4.8462e-02, -3.5919e-02, 5.0659e-02, 2.3468e-02, 9.0698e-02, + 3.3264e-02, 1.0193e-01, 6.3599e-02, -1.6205e-02, 7.1602e-03, + 4.4983e-02, -1.2030e-01, 3.1219e-02, -4.3671e-02, 5.3314e-02, + -5.5122e-03, 5.2277e-02, 1.9272e-02, 1.6037e-02, 7.8430e-02, + 2.0462e-02, -1.3867e-01, -7.6477e-02, 8.5632e-02, 1.6220e-02, + 7.0679e-02, 2.4048e-02, -4.8584e-02, 1.1574e-02, 6.0913e-02, + -5.9174e-02, 4.8065e-03, -3.1204e-02, 9.5139e-03, 1.3557e-02, + -7.6050e-02, 2.8015e-02, -5.7678e-02, 1.2917e-02, -2.8717e-02, + 2.3331e-02, -8.3801e-02, -4.6539e-02, -6.9847e-03, -1.4511e-02, + 6.0844e-03, -4.9347e-02, 2.1454e-02, 8.4381e-03, -6.3965e-02, + 9.4482e-02, -5.5725e-02, 2.3880e-02, -1.3222e-02, 2.0313e-03, + 3.6682e-02, 3.2166e-02, 9.2697e-03, -8.5693e-02, 8.2626e-03, + -3.2410e-02, -6.1340e-02, 4.4312e-02, -7.8308e-02, 6.9031e-02, + 2.3438e-02, -1.8707e-02, 3.7842e-02, 1.0576e-03, 1.6190e-02, + -1.0370e-01, -6.0181e-02, 6.3232e-02, 3.1796e-03, -1.2854e-01, + 1.2077e-02, -8.4839e-02, -5.0385e-02, -3.2532e-02, -4.2191e-03, + -1.1971e-02, -5.3650e-02, -1.0857e-02, 4.0588e-02, 4.0741e-02, + 6.7940e-03, -1.4281e-04, 6.3416e-02, 1.0284e-01, 5.7800e-02, + 4.9408e-02, 3.4637e-02, -3.8300e-02, 6.1218e-02, -3.0823e-02, + 8.1940e-03, -8.8730e-03, -4.8096e-02, -8.6975e-02, 8.5449e-03, + -2.7332e-03, -1.3794e-01, 5.2872e-03, -1.0187e-01, 4.6387e-02, + -1.6403e-02, 4.8035e-02, -6.8787e-02, 2.8000e-02, 3.2806e-02, + 6.9336e-02, 1.6815e-02, -2.1332e-02, 1.0016e-01, 5.1849e-02, + -3.4760e-02, 1.0941e-02, -3.6896e-02, 5.2338e-02, 1.2726e-02, + -2.6688e-02, 3.6530e-02, 9.5215e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([2.1306, 2.0981, 2.1663, 2.0829, 1.7951, 2.1608, 2.2010, 2.0909, 2.2347, + 2.1553, 2.0846, 2.2494, 2.2396, 2.1285, 2.1372, 2.0650, 2.1098, 2.1325, + 2.2690, 1.6700, 2.2613, 2.1879, 2.1590, 2.0855, 2.1393, 2.1793, 2.0697, + 2.0706, 2.0371, 2.0495, 2.1932, 2.1432, 2.1056, 2.1178, 2.0806, 2.0542, + 2.3682, 2.0192, 2.0318, 2.2403, 2.0286, 2.0738, 2.1933, 2.1454, 2.0341, + 2.0834, 2.1870, 1.9817, 2.0910, 2.0121, 2.2396, 2.1091, 2.0853, 2.0440, + 2.2116, 2.0897, 2.0882, 1.9730, 2.0929, 2.1411, 2.1363, 2.1475, 2.0930, + 2.2281, 2.0509, 2.1295, 2.2263, 1.9539, 2.2303, 1.9053, 2.2407, 2.2046, + 2.2446, 2.3049, 2.2940, 2.2953, 2.1716, 2.0946, 2.2047, 1.9443, 2.0475, + 2.0535, 2.1913, 2.1422, 2.2416, 2.0748, 2.1369, 2.0475, 2.1431, 2.0515, + 2.2646, 2.1117, 2.0603, 2.0523, 2.0881, 2.1070, 2.1603, 2.0667, 2.2130, + 2.0835, 2.1439, 2.0512, 2.1125, 2.1444, 2.0461, 1.9122, 2.1266, 2.1854, + 2.0578, 2.0436, 2.0820, 2.3937, 2.1196, 2.2232, 2.2387, 2.0481, 2.1932, + 2.1360, 2.0806, 2.0098, 2.1164, 1.9950, 2.2060, 2.2159, 2.0401, 2.1459, + 2.0444, 2.1784, 2.2300, 2.1567, 2.1416, 2.0632, 2.1148, 2.0197, 2.1419, + 2.1275, 2.1502, 2.1169, 2.0210, 2.1767, 2.2734, 2.0312, 2.1906, 2.0491, + 2.1156, 2.0720, 2.1628, 2.0191, 2.1939, 2.1975, 2.1269, 1.9829, 2.2568, + 2.1802, 2.2635, 1.4847, 2.0943, 2.0363, 2.1120, 2.2034, 2.0194, 2.2002, + 2.1097, 2.0688, 2.0510, 2.1756, 2.1981, 2.0952, 2.0924, 2.0707, 2.0671, + 2.1223, 2.0485, 2.1029, 2.0321, 2.1441, 2.1635, 2.3610, 2.0506, 2.1866, + 2.2366, 2.1572, 2.1832, 2.2729, 2.0558, 2.0246, 2.1644, 1.3161, 2.0913, + 2.1085, 2.2300, 2.1219, 2.1403, 2.2661, 2.0771, 2.1381, 2.2069, 2.1151, + 0.9310, 2.0980, 2.2353, 2.0280, 2.1296, 2.0789, 2.1014, 2.0711, 2.1582, + 2.2039, 2.1553, 2.0854, 2.1312, 1.9870, 2.0954, 2.0356, 2.0757, 2.1471, + 2.0934, 2.1607, 2.0877, 2.1701, 2.2665, 2.1066, 1.8172, 2.1928, 2.1054, + 2.1729, 2.2115, 2.0668, 2.1303, 1.9412, 2.2088, 2.2902, 2.1197, 2.0195, + 1.9842, 2.3541, 2.2992, 2.1719, 2.1185, 2.3326, 2.0597, 2.2121, 2.0990, + 2.1920, 2.0137, 2.1492, 2.0695, 2.0967, 2.0664, 2.2442, 2.1980, 2.0537, + 2.1239, 2.2239, 2.1115, 2.0760, 2.1666, 2.0412, 2.3497, 2.2904, 2.3730, + 2.0719, 2.2055, 2.1495, 2.2552, 2.1622, 2.1594, 2.1742, 2.1108, 2.1134, + 2.0495, 2.1505, 2.1692, 2.3587, 2.0808, 2.1928, 2.1677, 2.1319, 2.2773, + 2.1028, 2.1036, 1.9044, 2.1900, 2.0670, 2.1364, 2.0927, 2.0784, 2.0589, + 2.1780, 2.0068, 2.0723, 2.0721, 2.1785, 2.1709, 2.0420, 2.0222, 2.0584, + 2.3348, 2.0092, 2.1329, 2.0837, 2.1588, 2.1440, 2.2351, 2.1050, 2.0425, + 2.1074, 2.0984, 2.1500, 2.1535, 2.1695, 2.1366, 2.0727, 2.1510, 2.1025, + 2.0234, 2.1057, 2.1375, 2.0724, 2.1730, 2.1779, 2.1295, 2.1127, 2.0601, + 0.4807, 2.3870, 2.1398, 2.2474, 2.2304, 2.2226, 2.0155, 2.0902, 2.2085, + 2.1103, 2.0833, 2.0171, 2.1137, 2.1066, 2.1834, 2.1329, 2.2222, 2.0514, + 2.1268, 2.2083, 2.0987, 2.1011, 2.3077, 2.1029, 2.2446, 2.0978, 1.8194, + 2.1265, 2.1962, 2.2371, 2.1642, 2.0771, 2.0407, 2.2704, 2.2185, 2.0338, + 2.3705, 2.1359, 2.1662, 2.2103, 2.0700, 2.1263, 2.1311, 2.1644, 2.2601, + 2.1973, 2.1671, 2.1047, 2.1370, 2.3222, 2.0790, 2.0729, 2.1185, 2.1862, + 2.0944, 2.1344, 2.0862, 1.9719, 2.2108, 2.1109, 2.2341, 2.1641, 2.1254, + 2.3113, 2.1620, 2.1923, 2.1841, 2.1487, 2.2404, 2.1601, 2.0500, 2.0576, + 2.1119, 2.2564, 2.1640, 2.3131, 2.1320, 2.2965, 2.2485, 2.2046, 2.0289, + 1.9622, 2.0788, 2.0702, 2.1659, 2.1610, 2.1714, 2.1783, 2.1003, 2.1383, + 1.5933, 2.3478, 2.1246, 1.9951, 2.1622, 2.1895, 2.1283, 2.0383, 2.3347, + 2.2002, 2.1307, 2.0826, 2.0998, 2.0361, 2.0912, 2.2533, 2.1869, 2.0681, + 2.2428, 2.1953, 2.2473, 2.0651, 2.1201, 2.1390, 2.2267, 2.1952, 2.1608, + 2.0552, 2.1135, 1.6425, 2.0805, 2.0025, 2.1075, 2.1361, 2.2242, 2.1469, + 2.1484, 2.1327, 2.3234, 2.1158, 2.0765, 2.1809, 2.1726, 2.2022, 2.0506, + 2.1052, 2.1520, 2.0688, 2.2633, 2.1049, 2.1441, 2.0107, 2.1586, 2.0242, + 2.0553, 2.1383, 2.1448, 2.1316, 2.1199, 2.1036, 2.4179, 2.0712, 2.0769, + 2.2432, 2.1385, 2.0033, 2.1947, 2.1739, 2.1416, 1.4240, 2.0872, 2.1168, + 2.0558, 2.0731, 2.0900, 2.3486, 2.1286, 2.1595, 2.1331, 2.0985, 2.2349, + 2.0733, 2.2393, 2.0185, 1.9720, 2.1799, 2.2404, 2.2769, 2.1214, 2.1365, + 2.1539, 1.9940, 2.0823, 2.0511, 2.1764, 2.2886, 2.0483, 2.1611, 2.1046, + 2.0842, 2.3037, 2.1485, 2.2134, 2.2764, 2.1331, 2.1390, 2.2793, 2.1325, + 2.0913, 2.0291, 2.0483, 2.2171, 2.0498, 2.0716, 2.0527, 2.0389, 2.0519, + 2.2236, 2.1044, 2.1652, 2.2721, 2.1719, 2.0850, 2.1445, 2.2287, 2.0876, + 2.0748, 2.2805, 2.2230, 2.1957, 2.1366, 2.1051, 2.0608, 2.1175, 2.0909, + 2.0974, 2.3656, 2.0853, 1.9904, 2.1722, 2.0264, 2.1505, 2.2116, 2.1064, + 2.3324, 2.0612, 2.0759, 1.5095, 2.1135, 2.0918, 2.1234, 2.1231, 2.0909, + 2.1507, 2.1431, 2.0905, 2.1110, 2.1229, 2.0950, 2.0726, 2.1123, 2.2312, + 2.1390, 2.2622, 2.2702, 2.1241, 2.0290, 2.1137, 2.1211, 2.0897, 2.1186, + 2.1072, 2.1211, 2.0325, 2.3841, 2.1235, 2.1657, 2.2012, 2.1920, 2.0612, + 2.1856, 1.9654, 2.0802, 2.2121, 2.1295, 2.1085, 2.0468, 2.1225, 2.0937, + 2.2090, 2.1711, 2.2392, 2.2404, 1.9501, 2.2067, 2.1174, 2.1059, 2.0687, + 2.0720, 2.2781, 2.0699, 2.0782, 2.1167, 2.1556, 2.1573, 2.0769, 2.1428, + 2.0000, 2.0540, 2.1057, 2.3078, 2.1170, 2.0252, 2.2114, 2.0516, 2.0569, + 2.1436, 2.1945, 2.1005, 2.0620, 2.1000, 1.9946, 2.0742, 2.2715, 2.1476, + 2.0796, 2.1513, 2.1469, 2.1263, 2.1124, 1.8206, 2.1527, 2.1983, 2.1449, + 2.0146, 2.1201, 2.1955, 2.1086, 2.1151, 2.0930, 2.2746, 2.0733, 2.1319, + 1.9999, 2.0404, 2.1487, 1.8035, 2.0896, 2.1148, 2.1295, 2.2288, 2.0789, + 2.0948, 2.1741, 2.2246, 2.1047, 2.0214, 2.1493, 2.1955, 1.9344, 2.0848, + 2.0497, 2.1492, 2.1086, 2.1639, 2.0673, 2.0541, 2.1296, 1.9733, 2.1538, + 2.1301, 2.2918, 2.0695, 1.9991, 2.2444, 2.0984, 2.1520, 2.1604, 2.1877, + 2.1085, 2.1740, 2.4152, 2.1196, 2.1161, 2.0564, 2.2205, 2.0721, 2.0152, + 2.2869, 2.1730, 2.2223, 2.1000, 2.1991, 2.0098, 1.9885, 2.1247, 2.1999, + 2.0865, 2.1030, 2.2179, 2.0859, 2.2840, 2.0955, 2.2556, 2.0865, 2.5612, + 2.1310, 2.1637, 2.2042, 2.3089, 2.2860, 2.0313, 1.5092, 1.9951, 1.9718, + 2.0639, 2.2435, 2.0679, 2.1874, 2.0538, 2.1991, 2.2152, 2.0431, 1.9732, + 2.1273, 2.3086, 2.0617, 2.1179, 1.9938, 2.2863, 2.1030, 2.0925, 2.2335, + 2.2860, 2.1918, 2.0950, 2.0555, 2.2699, 2.3361, 2.3560, 2.0020, 2.2247, + 2.1124, 2.1613, 2.1211, 2.0804, 2.1438, 2.0524, 2.1160, 2.0022, 2.0361, + 2.0578, 2.0534, 2.2993], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 5.5032e-02, 3.4170e-01, -5.9914e-01, 5.7442e-02, -5.9289e-01, + 6.2105e-01, 7.2261e-01, -3.8350e-01, 5.3076e-01, -3.4490e-01, + -3.7516e-01, 7.6397e-01, -4.8466e-01, -1.9540e-01, 2.3205e-01, + 6.6639e-01, 3.8165e-02, 4.7632e-01, -7.5291e-01, -1.4750e+00, + 5.0280e-01, 4.2079e-01, 5.0035e-01, -4.4191e-01, 4.7763e-01, + -5.6680e-01, -3.7598e-01, 3.5411e-03, -6.2287e-01, 5.5517e-01, + -1.8523e-01, 2.2972e-01, -3.0193e-01, -4.0291e-01, -2.8643e-01, + -1.4825e-01, -9.7817e-01, -2.5017e-01, 3.0976e-01, 9.1308e-01, + -3.7209e-01, 5.0364e-02, 8.3947e-01, 1.0546e-01, -5.1225e-01, + -7.5814e-01, -6.4859e-01, -1.9139e-01, -3.6599e-01, -3.1565e-01, + -8.4970e-01, -9.4634e-02, -2.2861e-01, 6.4416e-02, -7.4689e-01, + 8.2814e-01, 6.7515e-02, -2.1095e-01, 1.0784e-01, -2.0049e-01, + -1.5426e-02, 2.9876e-01, 2.9834e-01, -9.7773e-01, -2.1292e-01, + 4.1629e-01, 5.9539e-01, -1.2394e-01, -5.7038e-01, -1.6600e+00, + 5.8589e-01, 3.1986e-01, 9.3529e-01, -8.1940e-01, -1.0018e+00, + -7.7484e-01, 5.0273e-01, 1.4810e-01, 5.6621e-01, 2.0840e-01, + -2.5548e-02, 3.3979e-01, -9.2684e-01, 1.6033e-01, -5.8794e-01, + -1.7960e-01, 5.1259e-01, -1.7504e-01, 5.2133e-01, -4.8449e-01, + -7.1617e-01, 7.2320e-01, -4.6025e-01, 2.7989e-01, -6.8861e-01, + 1.6533e-01, 6.9277e-01, 2.0693e-01, -4.5392e-01, 7.2708e-01, + 3.7570e-01, -7.4780e-01, 1.2439e-01, -6.6284e-01, -1.2287e-01, + 5.6412e-01, 2.9251e-01, 3.7662e-01, -1.5380e-01, -1.2422e-01, + 7.0778e-01, 8.5423e-01, -3.5530e-01, 4.0904e-01, 3.2800e-01, + -2.4588e-01, 9.1576e-01, 3.5030e-01, -3.2823e-01, -5.6558e-02, + -3.5270e-01, -7.7882e-02, -4.8140e-01, -4.8432e-01, 7.9960e-02, + -7.8382e-01, -1.7744e-01, -3.0857e-01, -4.1455e-01, 7.0633e-01, + 1.3336e-01, -3.3674e-01, -3.0745e-01, -2.2139e-01, 5.0010e-01, + 2.7472e-01, 5.4829e-01, -3.3975e-02, -3.4801e-01, 2.3116e-01, + -8.3419e-01, 2.5358e-01, 2.2674e-01, -1.5673e-01, -3.6639e-01, + -1.5545e-01, 6.0038e-01, -1.5873e-01, -5.5829e-01, 5.9090e-01, + 3.3606e-01, -4.3766e-02, 6.3776e-01, 1.2674e-01, -7.7525e-01, + 1.0622e+00, -1.5575e-01, 5.7224e-02, -1.2175e-01, -3.5464e-01, + -4.1974e-01, -5.5180e-01, 3.1076e-01, -1.1148e-01, 1.7761e-01, + 3.9893e-01, -8.1377e-01, 1.9128e-01, 2.2551e-01, 1.0480e-01, + 2.5536e-01, 3.7489e-01, -6.0449e-01, -1.7544e-01, -3.6786e-01, + -1.7595e-01, 5.1698e-01, -7.5201e-01, 8.6587e-02, 3.5645e-01, + -9.2023e-01, -6.3389e-01, 5.6677e-01, 7.7825e-01, 4.0414e-02, + 8.2350e-01, -7.1027e-01, 1.1759e+00, 4.5486e-01, -5.8566e-01, + -8.3278e-01, -2.3355e-02, 6.1520e-01, 9.4288e-01, 1.9277e-01, + 3.2174e-01, 7.2860e-01, 1.5114e-01, 5.5562e-01, 1.9258e-01, + -3.3681e-01, -6.2664e-01, 4.8709e-01, 3.9007e-01, -1.0146e-01, + 3.3661e-01, 2.6557e-01, 7.1143e-01, -4.7444e-01, -2.2915e-02, + 5.6967e-02, -2.6513e-01, -6.7503e-01, 8.1852e-01, 1.7758e-01, + -5.4827e-01, -6.1577e-01, 3.7921e-01, -3.2689e-01, -4.2346e-01, + 1.0098e+00, 8.1804e-02, -9.1237e-01, 4.6550e-01, 3.3835e-02, + -5.5621e-02, -6.1343e-01, -1.6698e-02, 2.1856e-01, -6.0267e-02, + -4.5651e-01, 5.7496e-01, 5.1042e-02, 3.3396e-01, -1.1056e-01, + -9.3004e-01, 5.3485e-01, 3.2634e-01, -3.2156e-01, 1.0520e+00, + 1.2905e-02, 7.6925e-01, -6.5054e-02, -5.2462e-01, -2.7286e-01, + -5.9786e-01, 8.3486e-02, -4.5704e-01, -3.2914e-01, 8.4377e-01, + -2.7605e-01, 1.9019e-01, -5.2997e-02, -4.1378e-01, 9.2904e-02, + -4.0969e-01, 5.7097e-01, -2.9682e-01, -1.1046e+00, -6.4378e-01, + -1.0371e+00, -1.7496e-01, 3.8344e-01, 7.4962e-01, -2.3920e-01, + -5.1637e-01, 6.3089e-01, 1.5819e-01, 5.0971e-01, 2.4068e-01, + 1.9098e-01, 1.2260e-01, -4.0640e-01, -1.0648e+00, -3.8075e-01, + 9.3032e-03, 3.6212e-01, 5.7357e-01, 6.9055e-01, -2.7114e-01, + 8.0547e-02, -8.7845e-01, -7.8596e-01, 2.5760e-01, -3.3892e-01, + 5.2147e-01, -1.9988e-01, 1.4552e-01, 6.2273e-01, -1.7925e-01, + -2.8361e-01, 3.1194e-01, 7.0649e-01, -6.5505e-01, -1.1719e-01, + -4.4102e-01, -1.1529e-01, -1.0064e+00, 5.8067e-01, -4.9356e-02, + -5.4669e-01, -5.6302e-02, -8.0066e-01, -3.0239e-01, -7.7490e-02, + 4.1506e-01, 3.2261e-01, 3.0288e-01, -4.8018e-01, -5.2130e-01, + 2.3988e-01, -4.4202e-01, -1.5315e-01, 1.1838e-01, 9.8189e-02, + 4.9815e-01, 7.1809e-01, 4.9991e-01, 1.3004e-01, 4.2302e-01, + -5.0610e-01, -8.3078e-01, 4.9045e-01, -8.0706e-03, 2.8603e-01, + -1.2553e+00, 2.8014e-01, -4.5281e-01, 5.2321e-01, -6.6674e-01, + 1.7911e-01, -3.0358e-01, -2.8774e-01, -1.6966e-01, -2.1777e-01, + -8.2017e-02, -6.3974e-01, 3.4140e-01, 7.0512e-01, -2.6187e-01, + 4.2697e-01, -9.8788e-02, -1.9254e-01, 5.5265e-01, -7.7105e-01, + 7.7795e-01, -8.2557e-01, 3.7116e-01, -2.1748e-01, -3.0085e-01, + 1.1101e-01, 4.6779e-01, 2.7369e-01, 1.1426e+00, -6.6837e-01, + -2.9639e-01, -3.4687e-01, -3.5086e-01, -5.5450e-01, -3.4801e-02, + -9.7990e-01, -5.0421e-01, -1.3055e-01, -4.7743e-01, 5.8862e-02, + 2.0139e-01, 2.2049e-02, 5.6779e-01, 1.2275e+00, 4.8432e-01, + 4.5270e-01, -2.4627e-01, -2.1459e-01, -5.2194e-01, -2.4221e-01, + -3.0220e-01, 3.5763e-01, 7.6412e-01, -8.6895e-02, -2.2266e-01, + 3.1286e-01, -2.7849e-01, -5.3784e-01, 1.2801e-02, -6.3741e-01, + -4.8683e-01, -2.1989e-01, 1.0171e+00, 3.2114e-01, -7.0300e-01, + -2.5995e-01, -3.3686e-01, 6.3376e-01, 6.2771e-01, -6.6254e-01, + 3.1605e-01, -3.3023e-01, 8.2851e-01, -2.5261e-01, -8.0842e-01, + -8.4885e-02, 1.0560e+00, -6.9100e-01, -3.3871e-01, 1.6286e-01, + -3.3739e-01, 3.4416e-01, 1.0518e-01, -1.4568e-01, 7.3312e-01, + 2.6994e-01, -3.9172e-01, -1.2606e-01, 3.7430e-01, 1.2941e+00, + -1.8404e+00, 1.4481e-01, -2.2216e-01, 5.6620e-01, 4.0119e-02, + -2.7102e-01, -2.9898e-01, 6.6150e-01, -3.5446e-01, -6.3626e-01, + 3.0112e-01, 4.1641e-01, -7.8385e-03, 1.6033e-01, 5.9720e-01, + -1.0396e+00, -6.8988e-02, 4.9328e-01, 3.7363e-01, 8.8177e-01, + -6.3460e-02, 6.0906e-02, 7.2328e-01, 4.1937e-01, -5.4564e-01, + 7.1990e-02, -1.1627e-02, 2.7944e-01, -2.0765e+00, -6.6142e-02, + 1.0473e-01, 2.9151e-01, 4.1840e-01, 5.2563e-01, -8.2629e-01, + -5.1596e-02, 6.0050e-02, -9.0046e-01, 2.2648e-01, -1.7885e-01, + 6.1356e-01, -5.4350e-01, -7.6380e-01, -1.0181e-01, 1.7266e-01, + -3.3133e-01, 8.8887e-02, -6.9486e-01, 1.1714e-01, -3.4276e-01, + 3.3015e-01, 1.0132e-01, -9.4859e-02, 3.0991e-01, -5.5856e-02, + -2.1844e-02, -1.9616e-01, -6.7070e-01, 3.0605e-01, -8.7142e-01, + -3.8150e-02, -4.0821e-01, -6.1820e-01, -2.8025e-01, -6.9023e-02, + -4.5809e-01, 6.2847e-01, 7.1941e-01, 2.3210e-01, -3.2175e-02, + -4.7614e-01, 2.2915e-01, 1.7289e-01, -2.0610e-01, -8.2476e-01, + 4.6670e-02, 4.5217e-01, 3.0375e-01, -8.2604e-02, -8.5447e-01, + -3.8456e-02, -8.1994e-01, -1.8847e-01, -2.1478e-01, 8.7968e-01, + -4.2970e-01, -5.4261e-01, -4.3325e-01, 1.0360e-01, 2.8478e-01, + 2.7084e-01, 3.8629e-01, 5.8479e-02, -4.9726e-02, -9.2089e-01, + -5.2837e-01, -2.0357e-01, -7.3693e-01, -4.4392e-01, 6.7021e-01, + -1.7075e-01, 4.6331e-01, -4.9681e-01, -2.5834e-01, -2.8214e-01, + 9.1656e-01, 2.3502e-01, -1.8248e-01, 2.8570e-01, -8.3115e-02, + -7.6035e-01, 3.9176e-01, 2.9743e-01, 2.1699e-01, 1.2622e-02, + 7.1951e-02, -2.9116e-01, -3.8441e-02, -1.4330e-01, 6.7795e-01, + -8.4475e-01, 5.5603e-02, -5.1013e-01, 3.5273e-01, 6.8286e-02, + 2.0730e-01, 7.8723e-01, -4.9373e-01, 5.6147e-01, 5.1230e-01, + 4.5580e-01, 1.5385e-01, -7.6108e-01, 6.2250e-02, 4.4146e-02, + -9.6293e-01, 1.5051e-01, 5.0428e-02, 1.6998e-02, -1.3017e-01, + 5.2094e-01, 9.0157e-01, -2.3312e-01, -9.2199e-01, -5.8327e-02, + 3.2615e-02, -2.0963e-01, -1.3304e-02, 5.9420e-01, 2.4892e-01, + 4.2144e-01, -1.3338e-01, 7.3720e-01, 6.7606e-01, -1.0390e-01, + 3.1712e-01, 4.1540e-02, 2.1947e-01, -2.6176e-02, 5.7473e-01, + 6.0464e-01, 1.1620e-02, -4.7551e-01, -5.2233e-01, -1.7980e-01, + 5.4628e-01, -3.4662e-01, 2.8681e-01, 4.0650e-01, 4.5632e-01, + 3.7570e-03, -3.2734e-01, -1.5770e-01, 1.2390e+00, 3.1241e-01, + -6.3380e-01, -7.3302e-01, 8.5430e-01, -6.2936e-02, -1.8730e-01, + 1.2957e-01, -4.5305e-01, -4.4873e-01, 8.0722e-01, 2.5926e-01, + 1.9403e-01, 8.0011e-02, -2.3443e-01, -6.4465e-01, 2.1859e-02, + 1.9577e-01, -3.9800e-01, -7.0151e-01, 2.0552e-01, 5.4888e-01, + -1.0330e-02, 3.4967e-01, -2.1443e-01, 6.5350e-01, 2.7752e-02, + 7.1228e-02, -9.6233e-02, 6.2011e-01, -1.6035e-01, 2.6784e-01, + 4.7279e-01, 2.4499e-01, -4.1408e-01, 6.9601e-01, 7.2197e-01, + -2.7174e-01, -1.3945e-01, 1.0073e+00, 8.1506e-01, -1.5321e-01, + -6.7199e-02, 5.5904e-01, -3.3913e-01, 2.5178e-01, 2.1693e-01, + -4.7386e-01, -1.3057e-01, 1.0067e+00, 4.0309e-01, -1.3929e-01, + 6.5192e-01, -7.4773e-01, -8.4036e-01, 2.4286e-01, -3.3858e-01, + 4.2005e-01, -5.4786e-01, 1.3617e-01, -2.3379e-01, 1.6159e-01, + 6.6706e-01, -8.7631e-02, 4.3947e-01, -1.2622e-01, 7.9351e-01, + 5.6074e-01, 2.8677e-01, -1.4647e-01, 2.5503e-02, 3.6957e-01, + -1.7680e-01, 4.9258e-01, -2.7404e-01, -6.6334e-01, -6.8055e-01, + -4.6036e-02, -1.5788e-01, -5.7529e-01, 8.0077e-01, -3.6329e-01, + -2.8435e-01, 3.6245e-01, 8.2002e-01, 4.7903e-01, 4.2087e-01, + -4.1194e-01, -3.2981e-01, 8.9196e-02, 2.5886e-01, 1.0111e-01, + 2.3851e-01, 1.9850e+00, -4.3724e-01, -3.1161e-01, -3.5585e-01, + 1.0762e+00, -1.9195e-01, -1.5232e-01, 7.7787e-01, -3.4547e-01, + -5.2549e-01, -2.9215e-01, -7.1992e-01, -3.3349e-01, -6.5178e-01, + 1.0218e+00, 3.1221e-02, -2.2267e-01, 3.3001e-02, 4.0659e-01, + -8.9277e-02, -4.2065e-01, 7.2138e-01, -3.0468e-01, 6.3441e-01, + 6.4458e-01, -2.5796e-01, -2.9764e-02, -3.8748e-01, -1.4858e-01, + 3.9582e-01, -5.3268e-01, -2.0677e-02, -7.5198e-01, -1.4154e-01, + -3.5085e-01, -1.7420e-01, 6.5263e-01, -3.9491e-01, -1.2203e+00, + -3.2941e-01, -7.7585e-02, -5.8524e-01, -8.6829e-01, -7.8241e-01, + -3.4036e-01, 1.0552e+00, 1.2820e-02, -4.2201e-02, -1.9478e-01, + -8.3023e-01, -2.5496e-01, 7.6814e-01, -1.2869e-01, 2.9615e-01, + 6.5342e-01, 1.4642e-02, 1.9967e-01, -2.6521e-01, -8.1220e-01, + -3.3556e-01, 5.7942e-02, 2.7999e-01, -2.4631e-01, 1.3797e-01, + 1.5128e-01, -6.2344e-01, 6.0882e-01, 9.6539e-01, 5.9792e-01, + 2.6680e-01, -6.7106e-01, -1.1571e+00, 8.1228e-01, 1.1491e-03, + 2.5220e-01, -2.8103e-01, -5.5863e-01, 5.5975e-01, 4.4755e-01, + -4.1402e-01, -4.9207e-01, -4.5646e-01, 7.1663e-02, 3.1954e-01, + 2.2474e-01, 3.8796e-01, 6.4554e-01], device='cuda:1', + requires_grad=True) +torch.Size([2304, 768]) +Parameter containing: +tensor([[ 0.0420, -0.0440, -0.0347, ..., -0.0244, -0.0091, 0.0145], + [-0.0261, 0.0071, 0.0178, ..., 0.0025, -0.0036, -0.0314], + [ 0.0267, -0.0007, 0.0216, ..., 0.0241, 0.0350, -0.0015], + ..., + [-0.0045, -0.0047, 0.0123, ..., -0.0035, 0.0097, 0.0152], + [-0.0158, -0.0261, -0.0006, ..., -0.0156, -0.0045, 0.0177], + [-0.0226, -0.0010, -0.0124, ..., 0.0051, 0.0012, 0.0042]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([2304]) +Parameter containing: +tensor([ 0.1199, -0.1290, 0.1978, ..., -0.0250, 0.0124, -0.0064], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 768]) +Parameter containing: +tensor([[-5.5428e-03, -2.7206e-02, 2.7370e-03, ..., 2.2766e-02, + -3.1643e-03, -1.5764e-03], + [ 2.0981e-02, -1.9547e-02, 1.0455e-04, ..., -1.9424e-02, + -8.0032e-03, 3.5324e-03], + [ 1.8829e-02, 1.2306e-02, -3.6640e-03, ..., -1.7288e-02, + 6.9389e-03, 1.1398e-02], + ..., + [-1.0307e-02, 1.9897e-02, 5.8711e-05, ..., -1.7059e-02, + 9.9411e-03, 2.1317e-02], + [-2.5986e-02, -2.5024e-02, 1.0292e-02, ..., 2.4624e-03, + -7.9422e-03, -1.5936e-03], + [ 1.5373e-02, 1.6113e-02, -3.0041e-03, ..., 5.8517e-03, + 7.4081e-03, 2.3529e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 2.0859e-02, -2.0630e-02, -5.0323e-02, -1.5854e-02, 1.8677e-02, + -8.5983e-03, 1.6937e-02, -4.2786e-02, -3.0975e-02, 2.8900e-02, + 3.1143e-02, -3.3112e-02, -6.0944e-02, 7.2403e-03, 6.2134e-02, + 1.5930e-02, -6.3477e-02, 1.5442e-02, -9.2545e-03, 4.6425e-03, + -2.2705e-02, 2.4551e-02, 3.8788e-02, -8.3008e-03, -5.0537e-02, + -3.0624e-02, 3.3783e-02, -5.7587e-02, -1.5533e-02, -1.8036e-02, + -1.4130e-02, 4.0894e-03, 4.6051e-02, -8.1299e-02, 3.1738e-02, + 1.8219e-02, -1.5175e-02, 3.0014e-02, 7.3547e-02, 2.8801e-03, + -6.2447e-03, -3.5858e-02, -1.1673e-02, -3.0060e-02, 2.4414e-02, + -1.2505e-02, -9.6924e-02, 3.1174e-02, 1.0956e-01, -1.3878e-02, + -5.0735e-03, 4.1199e-02, 4.9133e-02, -1.2459e-02, -3.4515e-02, + 8.7097e-02, 1.1810e-02, -4.8706e-02, 2.0004e-02, 9.0179e-03, + -1.4343e-02, -6.8604e-02, 2.2568e-02, 3.3478e-02, 1.3725e-02, + 7.2136e-03, 2.1301e-02, -1.0628e-02, -2.1362e-02, -1.1755e-01, + 1.2100e-02, 1.5434e-02, -7.8491e-02, 9.7656e-03, -3.3630e-02, + -1.3838e-03, -2.8286e-03, -3.0460e-03, -1.3405e-02, 5.1422e-02, + 1.9658e-04, 2.6611e-02, -4.2328e-02, 3.4241e-02, 7.5569e-03, + 4.6661e-02, 1.2703e-02, -1.2032e-02, -3.1528e-03, 4.0527e-02, + 5.0720e-02, 9.2087e-03, -6.1989e-03, -1.1612e-02, 4.4937e-03, + 8.2321e-03, 8.4839e-03, -7.0435e-02, 7.0686e-03, -1.1665e-02, + 3.5065e-02, 5.2277e-02, -2.3499e-02, 6.0242e-02, 6.7566e-02, + 1.6663e-02, 5.6458e-02, 1.2810e-02, 1.6693e-02, -5.4646e-04, + -6.7078e-02, 2.1118e-02, -9.4727e-02, 2.5650e-02, 1.8707e-02, + 1.2650e-02, -4.4159e-02, 2.3376e-02, 7.8674e-02, 1.8646e-02, + -6.2256e-02, -1.5190e-02, -2.3056e-02, 1.1292e-02, 1.6510e-02, + -1.3107e-02, 2.2736e-02, 3.0975e-03, -4.0283e-02, -1.1543e-02, + -4.8866e-03, -6.9153e-02, 1.1856e-02, -1.8356e-02, -2.3087e-02, + -4.6692e-02, 2.5101e-02, 1.2138e-02, 4.1237e-03, -6.6589e-02, + 7.2510e-02, -4.6616e-03, -7.1167e-02, -2.3834e-02, 7.9498e-03, + 1.5915e-02, -2.0721e-02, -1.6296e-02, 1.0139e-02, 4.0131e-02, + 3.5797e-02, 2.6913e-03, 2.5009e-02, -2.1103e-02, -4.7577e-02, + 6.7444e-02, 1.1787e-02, -2.8687e-02, -2.8244e-02, 1.5015e-02, + 1.8646e-02, 7.9803e-03, 3.1647e-02, -1.0902e-02, 7.0740e-02, + -5.1086e-02, -3.5431e-02, 4.2816e-02, -4.6844e-02, -9.9792e-03, + -1.7197e-02, -9.9258e-03, 3.2501e-02, 4.8279e-02, 3.3508e-02, + -1.7410e-02, -8.0200e-02, 1.0544e-02, -6.9092e-02, -2.8915e-02, + -9.5886e-02, -5.0598e-02, 2.9327e-02, 1.7441e-02, 1.2245e-02, + 4.3610e-02, 2.4643e-02, -5.5542e-02, -2.7710e-02, 1.4946e-02, + 1.3275e-02, -1.6937e-02, -6.4697e-02, -2.3621e-02, -2.4048e-02, + 1.6418e-02, -8.3389e-03, 8.1863e-03, -1.1986e-02, -1.3840e-02, + -9.3201e-02, -2.2182e-03, 2.0386e-02, 5.4901e-02, -2.2339e-02, + -4.2969e-02, 2.8580e-02, 1.3611e-02, -3.8452e-03, 4.8706e-02, + -3.9948e-02, 2.0447e-02, -1.2100e-02, 6.1005e-02, 3.1830e-02, + 8.1665e-02, -5.4077e-02, -3.3966e-02, -9.5291e-03, -7.4244e-04, + 5.3467e-02, 2.3865e-02, 5.9433e-03, 3.8483e-02, -6.2675e-03, + -6.4148e-02, 3.1708e-02, 8.2064e-04, 3.6926e-03, 1.6327e-03, + -3.8929e-03, 2.4933e-02, 5.3253e-02, 6.4163e-03, -4.3365e-02, + 5.6274e-02, 2.8351e-02, 1.0307e-02, 2.8305e-03, -2.1515e-02, + 1.6968e-02, 6.2744e-02, 1.4587e-02, 5.6839e-03, 2.6646e-03, + 2.7756e-02, 8.0505e-02, 5.8472e-02, 3.3813e-02, 4.8637e-03, + -1.0284e-02, 4.4434e-02, 1.3077e-02, 4.7729e-02, 3.7384e-02, + 9.8267e-03, -2.0920e-02, -3.3600e-02, -1.3557e-02, 1.5671e-02, + 6.0120e-03, 1.8631e-02, 7.0740e-02, 4.0985e-02, -3.2288e-02, + -2.6886e-02, -3.2410e-02, 3.0579e-02, -3.1174e-02, -5.9692e-02, + 2.9373e-02, 1.3802e-02, -6.5918e-03, -7.5684e-02, 1.1101e-02, + -1.0614e-03, -2.8885e-02, -3.6392e-03, -1.3153e-02, 3.3112e-02, + -6.7383e-02, 1.3893e-02, 5.3741e-02, -1.1772e-02, -6.5384e-03, + 9.5139e-03, 3.7842e-02, 7.5928e-02, 3.4912e-02, -4.2908e-02, + -3.3539e-02, 3.1891e-02, -1.3733e-02, 5.4596e-02, -2.0233e-02, + -3.7201e-02, -2.6230e-02, -3.4695e-03, -3.3207e-03, -1.5518e-02, + 2.8442e-02, 5.5756e-02, 4.7729e-02, -1.6388e-02, 7.0251e-02, + -6.8298e-02, 3.2867e-02, 3.2898e-02, -3.7212e-03, 3.6560e-02, + 5.8746e-02, 3.7781e-02, 1.0370e-01, -6.2286e-02, -3.5126e-02, + -4.3030e-02, 1.2123e-02, 3.3966e-02, 4.0016e-03, -1.0986e-02, + -2.0584e-02, 1.5541e-02, 1.2108e-02, 6.8237e-02, 3.9673e-01, + -1.8234e-03, 5.7098e-02, 1.4084e-02, 5.7037e-02, 1.8982e-02, + 3.2227e-02, -5.1941e-02, 2.2110e-02, -1.8753e-02, 8.0643e-03, + -2.7237e-02, 1.2276e-02, 1.0309e-03, -1.6800e-02, 3.3051e-02, + 3.7746e-03, 5.0850e-03, -3.6285e-02, -5.2673e-02, 3.6041e-02, + -1.0266e-01, 8.7929e-04, 2.5345e-02, -1.6159e-02, -6.3992e-04, + 3.1189e-02, -1.1711e-02, 1.3405e-02, 9.0408e-03, -2.3865e-02, + 1.2596e-02, 7.6828e-03, -6.0394e-02, -2.0798e-02, 2.5085e-02, + -5.6953e-03, -1.1284e-02, -2.2385e-02, -5.0415e-02, 5.0018e-02, + 5.1208e-02, 5.1918e-03, 3.9734e-02, 3.3112e-02, 9.2224e-02, + -4.4189e-02, -1.9226e-03, -1.9653e-02, -3.7170e-02, 4.0375e-02, + -8.1253e-03, -3.9558e-03, -7.6294e-02, -5.6061e-02, -6.2012e-02, + 2.2400e-02, 1.1206e-05, 4.1046e-02, 5.8603e-04, -3.8513e-02, + -4.7455e-03, 1.8951e-02, -6.6681e-03, -3.8815e-03, 4.3579e-02, + 7.7148e-02, -1.9348e-02, 1.0681e-02, 1.5076e-02, -4.2725e-02, + 5.6076e-03, -3.1250e-02, -2.6230e-02, -6.9519e-02, -5.3345e-02, + -5.9875e-02, -1.8280e-02, 2.1881e-02, -2.3605e-02, 2.3453e-02, + -3.0563e-02, -4.1122e-03, -4.6417e-02, -2.5146e-02, -2.1683e-02, + 4.3060e-02, 3.7506e-02, 4.7516e-02, 2.2934e-02, -1.1604e-02, + -1.3635e-01, 6.0699e-02, 3.2928e-02, 1.8631e-02, 6.5689e-03, + 1.3412e-02, -4.0649e-02, 4.2175e-02, 3.3140e-05, -1.4381e-02, + 4.6204e-02, 5.7404e-02, -1.4488e-02, -6.7291e-03, 1.2566e-02, + 1.5469e-03, -3.6774e-02, -3.0350e-02, 2.1133e-02, 1.8555e-02, + 6.1417e-03, -1.1955e-02, -6.7871e-02, 1.5640e-02, -6.0638e-02, + -5.2887e-02, 2.1347e-02, 1.8936e-02, -9.1248e-02, 6.2103e-02, + -1.8204e-02, 1.1002e-02, 2.5787e-02, 4.5624e-02, 3.7384e-02, + -4.1466e-03, 6.1035e-02, -2.8343e-03, 1.4069e-02, -1.1894e-02, + -7.2815e-02, -8.3847e-03, -1.7099e-03, 8.9493e-03, 2.1667e-02, + 9.3002e-03, 3.6957e-02, -2.6047e-02, 7.4730e-03, -3.2135e-02, + -7.1777e-02, 5.0116e-04, 2.8610e-06, -1.5457e-02, -5.6915e-02, + 5.3070e-02, 1.1032e-02, 1.0065e-01, 5.8472e-02, -2.7676e-03, + 1.3481e-02, -1.7410e-02, -1.6434e-02, -1.0696e-02, -3.8223e-03, + -3.1616e-02, -2.2751e-02, -5.1239e-02, 1.8570e-02, -7.3914e-02, + -9.0256e-03, 5.8990e-02, 4.5967e-03, 7.6408e-03, 6.4697e-03, + 5.3978e-03, -1.4824e-02, -9.7839e-02, -1.1887e-02, -7.8430e-03, + -1.9012e-02, -4.1901e-02, 1.6632e-02, 6.9397e-02, 2.6230e-02, + 8.9722e-03, -4.3068e-03, 6.9458e-02, 1.2672e-02, 5.3263e-04, + 9.3460e-03, -1.3313e-02, 5.7404e-02, 4.2664e-02, 1.2894e-02, + 7.8430e-02, 1.7822e-02, 1.2802e-02, 6.7505e-02, -1.9592e-02, + -3.7750e-02, 6.7322e-02, 2.6031e-02, -1.2741e-02, 8.3923e-03, + 5.6122e-02, -1.0010e-02, 2.6276e-02, -6.3904e-02, 5.0446e-02, + 2.3346e-02, 2.2293e-02, 9.0866e-03, 3.9825e-02, -2.2980e-02, + -1.8143e-02, -3.0060e-02, 2.7557e-02, 2.1820e-02, 4.9774e-02, + 1.0857e-02, 3.8757e-02, -2.5162e-02, -4.6921e-03, -1.2398e-02, + -1.0277e-02, -9.8328e-02, -1.1002e-02, 1.0178e-02, 2.4597e-02, + -2.8351e-02, -1.9394e-02, 3.0487e-02, -3.8757e-03, -7.9880e-03, + -1.2405e-02, -1.8463e-02, -1.5091e-02, 4.1901e-02, 2.8778e-02, + -1.0979e-02, 9.3384e-03, 3.1494e-02, -3.5339e-02, 3.8204e-03, + 2.4368e-02, -1.1688e-01, -4.3396e-02, 1.4755e-02, 1.2825e-02, + -1.5335e-03, 4.4495e-02, -2.8778e-02, -3.5553e-03, -8.0643e-03, + -1.2749e-02, -5.0240e-03, 2.8473e-02, 3.6163e-02, 4.6005e-03, + 5.8746e-02, -3.0441e-02, -2.9129e-02, 3.5706e-02, 2.1362e-02, + 3.8528e-03, 4.5837e-02, -4.4647e-02, -5.2826e-02, 2.2247e-02, + -2.3148e-02, 4.0466e-02, -3.2379e-02, -4.2969e-02, -8.1360e-02, + 2.4963e-02, 4.4861e-02, -1.9272e-02, -1.4610e-02, 1.7929e-03, + -7.0992e-03, 3.1799e-02, 1.0791e-01, 3.3417e-02, 4.9744e-03, + 4.3732e-02, 3.1586e-02, 1.9821e-02, -4.5013e-02, -4.3640e-02, + 7.3486e-02, -2.6794e-02, -1.0699e-01, 3.5973e-03, -3.3569e-02, + -4.5349e-02, 6.7139e-03, -5.1727e-02, -3.2318e-02, 1.1993e-02, + -1.4305e-03, 2.7252e-02, -5.1636e-02, 2.9587e-02, -1.5289e-02, + 8.7433e-03, 7.8430e-03, 9.8114e-03, 1.2848e-02, -5.4199e-02, + 1.4542e-02, 1.8951e-02, 9.6130e-03, 2.9816e-02, -9.6512e-03, + -1.6708e-02, -2.7252e-02, -6.4148e-02, -6.6895e-02, -2.6230e-02, + 2.5436e-02, -6.1836e-03, -5.2185e-02, -1.8524e-02, -1.3237e-02, + -5.5725e-02, -3.5126e-02, -5.2917e-02, -6.3354e-02, -3.4851e-02, + -1.6068e-02, -2.1957e-02, 5.4092e-03, -5.4207e-03, 1.1559e-02, + 5.0781e-02, -1.1292e-02, 2.5009e-02, 8.1558e-03, 4.3564e-03, + -1.8936e-02, 3.6201e-03, -2.3834e-02, -1.9026e-03, 5.2795e-03, + -4.9174e-05, -3.1952e-02, 1.6068e-02, 7.0923e-02, 3.7048e-02, + -2.2232e-02, 6.5308e-02, -1.3069e-02, -8.0872e-02, -1.5175e-02, + 6.2439e-02, 1.6174e-02, -2.7466e-02, -2.0432e-02, -2.8900e-02, + -3.3073e-03, 7.0862e-02, -2.1133e-02, -7.9880e-03, -5.4413e-02, + -3.5095e-02, 1.4954e-01, -5.7800e-02, 3.8818e-02, 2.6459e-02, + -1.6842e-03, 4.2542e-02, -2.4109e-02, -2.2034e-02, 5.8060e-03, + -2.1992e-03, 4.8561e-03, 6.8779e-03, -3.0319e-02, 3.6865e-02, + 2.5711e-02, -3.7323e-02, -5.0201e-02, -5.8411e-02, -2.3453e-02, + 2.4719e-02, 4.3884e-02, -8.3029e-05, -3.0212e-02, -2.6596e-02, + 3.6194e-02, 5.2368e-02, -1.2566e-02, -1.8072e-03, -4.1412e-02, + 1.4153e-02, 4.2206e-02, -4.5441e-02, 1.7147e-03, 1.3176e-02, + -2.5970e-02, -7.2336e-04, -3.8208e-02, 4.0283e-02, 7.5188e-03, + -9.6054e-03, 4.3526e-03, -9.2087e-03, 4.0550e-03, 3.4302e-02, + -1.6312e-02, 1.7105e-02, 2.2003e-02, -1.6708e-02, -6.7749e-02, + -8.1940e-03, -3.3478e-02, 1.3794e-02, -2.0035e-02, -3.3875e-02, + 1.3062e-02, -3.4912e-02, 1.0956e-02, -2.8534e-03, -3.4210e-02, + -2.6230e-02, -2.7374e-02, -3.8452e-02, -3.9124e-02, 3.7598e-02, + 2.9434e-02, -6.7993e-02, 2.3544e-02, 2.5116e-02, -6.5269e-03, + -2.5787e-02, -1.6769e-02, -4.9164e-02, -3.3234e-02, 1.2070e-02, + 9.2224e-02, -7.6599e-02, 1.3876e-03, 2.7161e-02, 5.3253e-03, + 5.2765e-02, 7.7171e-03, 4.9408e-02, 2.0340e-02, -4.4403e-02, + 6.4430e-03, 9.6924e-02, -3.0487e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([1.5612, 1.6291, 1.5122, 1.5747, 1.6225, 1.5788, 1.5892, 1.5757, 1.6082, + 1.5988, 1.6139, 1.5868, 1.5801, 1.6259, 1.5611, 1.5231, 1.6739, 1.6669, + 1.5990, 0.4274, 1.5622, 1.6011, 1.5774, 1.5165, 1.6530, 1.5793, 1.5959, + 1.5585, 1.6473, 1.4690, 1.5691, 1.5691, 1.6401, 1.5813, 1.6636, 1.5143, + 1.6102, 1.6316, 1.6255, 1.5060, 1.5319, 1.6488, 1.6245, 1.6413, 1.6142, + 1.5674, 1.5211, 1.6072, 1.6012, 1.5171, 1.6199, 1.5440, 1.5075, 1.5305, + 1.5490, 1.6551, 1.5008, 1.6083, 1.6627, 1.5574, 1.5938, 1.5662, 1.5658, + 1.5936, 1.5152, 1.6172, 1.4900, 1.5247, 1.6271, 1.5863, 1.5609, 1.7847, + 2.1021, 1.6023, 1.6314, 1.5624, 1.6506, 1.4861, 1.5519, 1.6230, 1.5676, + 1.5654, 1.5644, 1.6012, 1.5914, 1.5827, 1.5950, 1.5313, 1.6519, 1.6020, + 1.5715, 1.5640, 1.5664, 1.6471, 1.5941, 1.5665, 1.6015, 1.5576, 1.5990, + 1.6531, 1.5997, 1.5374, 1.5525, 1.6357, 1.5070, 1.6177, 1.5243, 1.5483, + 1.6100, 1.6093, 1.5234, 1.6186, 1.6167, 1.6678, 1.6350, 1.5438, 1.5267, + 1.5384, 1.6124, 1.5812, 1.6419, 1.5493, 1.5972, 1.5512, 1.6389, 1.6147, + 1.4907, 1.5437, 1.5886, 1.5572, 1.5550, 1.4767, 1.5170, 1.5503, 1.5716, + 1.5880, 1.6357, 1.5919, 1.6067, 1.5730, 1.5858, 1.5905, 1.6521, 1.5865, + 1.5048, 1.5617, 1.5851, 1.6758, 1.5167, 1.5563, 1.5114, 1.5789, 1.5416, + 1.6574, 1.5917, 1.3285, 1.6240, 1.6606, 1.6460, 1.6168, 1.5033, 1.5473, + 1.6333, 1.5656, 1.5466, 1.5838, 1.5561, 1.5466, 1.5480, 1.6122, 1.5619, + 1.5732, 1.5800, 1.5441, 1.6047, 1.6982, 1.6320, 1.6484, 1.6278, 1.6129, + 1.6167, 1.5956, 1.6070, 1.5811, 1.6763, 1.5567, 1.6143, 2.1741, 1.6671, + 1.5965, 1.5974, 1.5375, 1.6123, 1.5943, 1.5073, 1.5639, 1.5723, 1.6553, + 3.4704, 1.5658, 1.5836, 1.6005, 1.6608, 1.6609, 1.6136, 1.5075, 1.5065, + 1.5464, 1.4956, 1.5471, 1.5916, 1.5797, 1.5640, 1.4862, 1.5662, 1.5947, + 1.5522, 1.5988, 1.5318, 1.6886, 1.4830, 1.5993, 1.5229, 1.5385, 1.5668, + 1.5360, 1.5791, 1.6482, 1.6321, 1.7258, 1.6303, 1.6478, 1.7792, 1.5524, + 1.5670, 1.6981, 1.5449, 1.5758, 1.5182, 1.6659, 1.6315, 1.5490, 1.6675, + 1.6144, 1.6283, 1.5757, 1.5882, 1.5369, 1.5254, 1.7189, 1.6006, 1.6993, + 1.5655, 1.5378, 1.6526, 1.6030, 1.5295, 1.5335, 1.5831, 1.5121, 1.6575, + 1.5789, 1.5940, 1.5068, 1.4599, 1.5887, 1.5618, 1.5181, 1.7093, 1.4992, + 1.6316, 1.5302, 1.5932, 1.5249, 1.6118, 1.5605, 1.4670, 1.5210, 1.6202, + 1.6143, 1.5616, 1.7132, 1.6734, 1.6960, 1.6497, 1.5144, 1.5368, 1.5412, + 1.5550, 1.5144, 1.5971, 1.6508, 1.6773, 1.5563, 1.6012, 1.5162, 1.5163, + 1.6103, 1.6071, 1.6437, 1.5799, 1.5341, 1.6273, 1.6276, 1.6143, 1.6015, + 1.5359, 1.5915, 1.5081, 1.5776, 1.5932, 1.6629, 1.6389, 1.6354, 1.6139, + 1.5766, 1.5325, 1.5796, 1.5362, 1.5868, 1.5815, 1.5430, 1.6207, 1.6325, + 1.0084, 1.3852, 1.6033, 1.6354, 1.6064, 1.6104, 1.5252, 1.6160, 1.6036, + 1.5788, 1.5968, 1.5653, 1.5560, 1.4895, 1.5731, 1.5991, 1.6102, 1.5368, + 1.6023, 1.5326, 1.6283, 1.6053, 1.5736, 1.6930, 1.5774, 1.5697, 1.6059, + 1.6048, 1.5693, 1.6887, 1.5748, 1.6362, 1.5797, 1.5532, 1.5407, 1.6113, + 1.5052, 1.6081, 1.6208, 1.5894, 1.5822, 1.5563, 1.6003, 1.5935, 1.6142, + 1.5649, 1.5751, 1.6461, 1.5523, 1.5837, 1.5288, 1.5320, 1.5913, 1.5801, + 1.4600, 1.5917, 1.6242, 1.6360, 1.4584, 1.6947, 1.5531, 1.5535, 1.6059, + 1.5470, 1.6079, 1.5324, 1.6425, 1.4971, 1.5478, 1.5758, 1.6043, 1.6234, + 1.4722, 1.6315, 1.6825, 1.6424, 1.5320, 1.6130, 1.6026, 1.5700, 1.5041, + 1.5321, 1.5527, 1.5418, 1.6419, 1.4374, 1.6255, 1.5188, 1.5278, 1.5432, + 1.4489, 2.3006, 1.5959, 1.7023, 1.5785, 1.6375, 1.5754, 1.5096, 1.6006, + 1.6337, 1.5149, 1.5128, 1.5400, 1.5957, 1.5554, 1.5141, 1.6146, 1.4660, + 1.6387, 1.6256, 1.5380, 1.6197, 1.5558, 1.4969, 1.5637, 1.5350, 1.5603, + 1.5597, 1.5843, 1.3633, 1.5204, 1.6087, 1.5517, 1.5795, 1.6358, 1.5697, + 1.5797, 1.5676, 1.6028, 1.5955, 1.5415, 1.6213, 1.4824, 1.5580, 1.5154, + 1.6320, 1.5831, 1.5853, 1.5625, 1.5538, 1.5902, 1.6137, 1.5797, 1.5854, + 1.6301, 1.5884, 1.6639, 1.6174, 1.6834, 1.5654, 1.6103, 1.5832, 1.6769, + 1.6275, 1.5682, 1.5536, 1.5728, 1.5328, 1.6245, 1.3324, 1.6080, 1.6106, + 1.5931, 1.5965, 1.6213, 1.5882, 1.6126, 1.6217, 1.5003, 1.5451, 1.5859, + 1.6413, 1.5879, 1.4787, 1.5228, 1.6128, 1.6167, 1.6126, 1.4987, 1.6039, + 1.4994, 1.5541, 1.4806, 1.6224, 1.5732, 1.5347, 1.5990, 1.6350, 1.6045, + 1.5526, 1.5640, 1.6160, 1.6265, 1.5525, 1.5405, 1.5849, 1.5789, 1.5462, + 1.6115, 1.5663, 1.5264, 1.5779, 1.5052, 1.5357, 1.6098, 1.5319, 1.6803, + 1.6384, 1.6489, 1.5756, 1.6014, 1.5709, 1.6672, 1.5551, 1.5675, 1.7146, + 1.6731, 1.6189, 1.5798, 1.5877, 1.6571, 1.5571, 1.6023, 1.5781, 1.6524, + 1.6087, 1.5548, 1.5786, 1.6094, 1.6381, 1.6036, 1.5332, 1.5510, 1.6183, + 1.6424, 1.5860, 1.5442, 1.5265, 1.6016, 1.5861, 1.6329, 1.6371, 1.6098, + 1.5439, 1.6541, 1.6189, 1.5390, 1.6603, 1.6226, 1.5107, 1.6136, 1.6319, + 1.6133, 1.6171, 1.5596, 1.5044, 1.5676, 1.6682, 1.6274, 1.5624, 1.6302, + 1.6082, 1.6882, 1.5975, 1.5947, 1.6779, 1.5172, 1.8169, 1.5628, 1.5574, + 1.5296, 1.6388, 1.6386, 1.5509, 1.5011, 1.5941, 1.5359, 1.6311, 1.6930, + 1.5646, 1.5703, 1.7156, 1.7039, 1.7237, 1.6125, 1.6781, 1.6184, 1.6499, + 1.5913, 1.5678, 1.5397, 1.6042, 1.5798, 1.4778, 1.6637, 1.6021, 1.5434, + 1.5934, 1.6021, 1.6542, 1.5897, 1.5038, 1.6211, 1.5139, 1.4714, 1.5686, + 1.6549, 1.6761, 1.5084, 1.5039, 1.6728, 1.5570, 1.5322, 1.6111, 1.5362, + 1.5236, 1.5444, 1.6373, 1.6728, 1.5343, 1.4118, 1.6080, 1.6841, 1.5738, + 1.6541, 1.5087, 1.6481, 1.5934, 1.6207, 1.5490, 1.6071, 1.6627, 1.6416, + 1.6076, 1.6141, 1.5574, 1.6018, 1.5730, 1.6248, 1.5821, 1.5914, 1.5954, + 1.7080, 1.5800, 1.5834, 1.6229, 1.5816, 1.5754, 1.5840, 1.3503, 1.5136, + 1.5413, 1.6312, 1.6352, 1.5054, 1.5827, 1.5213, 0.3553, 1.6140, 1.5190, + 1.5867, 1.5369, 1.5415, 1.6162, 1.6345, 1.5307, 1.6651, 1.5010, 1.6128, + 1.5278, 1.5920, 1.5121, 1.5923, 1.5087, 1.5450, 1.5725, 1.5622, 1.5693, + 1.5568, 1.5951, 1.5524, 1.5817, 1.5403, 1.5696, 1.5777, 1.5988, 1.5369, + 1.6626, 1.5796, 1.6014, 1.5697, 1.5798, 1.4994, 1.5147, 1.5696, 1.5165, + 1.5875, 1.6455, 1.6432, 1.6248, 1.6029, 1.5540, 1.3902, 1.5868, 1.6621, + 1.6429, 1.5988, 1.6661, 1.5714, 1.5929, 1.5571, 1.6623, 1.5666, 1.5757, + 1.5621, 1.5998, 1.5808, 1.5551, 1.6553, 1.5133, 1.6109, 1.5503, 1.5683, + 1.5732, 1.6376, 1.5990, 1.5096, 1.5476, 1.5207, 1.7330, 1.6614, 1.5702, + 1.5172, 1.5875, 1.6315, 1.5304, 1.6697, 1.5546, 1.5182, 1.5630, 1.5935, + 1.6117, 1.4515, 1.5856], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 1.0403e-01, 9.7060e-02, -1.5602e-01, -8.2800e-03, 1.0229e-01, + 1.4111e-01, 1.0306e-01, 5.4273e-02, -1.6648e-01, -8.7717e-03, + 6.2813e-02, -2.6458e-02, -9.6767e-02, -2.1291e-02, -7.9400e-02, + -5.0567e-02, -3.2953e-02, -2.4269e-02, -5.1724e-03, 4.8843e+00, + -8.4254e-02, 5.4768e-02, -7.4972e-03, -2.4967e-02, -9.5140e-04, + 1.5371e-02, 7.1181e-03, -8.3969e-03, 2.2335e-02, -8.5531e-02, + 3.7403e-02, 2.8807e-02, 1.7188e-02, 1.0637e-01, 9.4134e-02, + -8.2811e-02, 4.2986e-03, -5.9408e-02, -8.1549e-02, 4.5918e-02, + 2.3936e-02, 3.2823e-02, 8.4698e-03, 9.2395e-03, -4.5630e-02, + 1.6469e-02, 3.6515e-02, -3.3968e-02, 5.7709e-02, 5.4930e-02, + 6.1122e-02, -6.1926e-06, 4.1280e-02, 3.4293e-02, 9.1601e-02, + 1.3812e-01, -7.9320e-02, -7.3999e-02, 5.8668e-02, 4.7881e-03, + 9.5586e-02, -7.9776e-02, 2.8651e-02, 6.1912e-03, 1.1924e-01, + -4.3352e-03, 1.2326e-01, 1.0645e-01, -4.3967e-03, 1.8691e-01, + 2.4468e-02, 2.8348e-02, 5.9777e-01, 7.2056e-02, -1.2830e-01, + 8.9767e-02, 4.4243e-02, -1.1128e-01, -8.0267e-02, -1.6716e-02, + -1.8243e-02, -5.4433e-02, -1.3675e-02, 1.2009e-01, 2.2563e-02, + 3.1082e-02, 1.3952e-01, 1.2806e-01, -2.9679e-02, -2.6804e-02, + 8.4288e-03, 2.6239e-02, 4.8801e-02, -5.7234e-02, -3.9436e-02, + 4.0219e-02, -2.0147e-02, -3.7820e-03, -9.8342e-02, -4.0746e-02, + 1.4139e-02, -5.4825e-02, 3.9232e-02, -9.4168e-02, 2.0618e-02, + -1.3461e-02, 3.9536e-02, -6.7572e-02, 2.6576e-02, -7.1447e-02, + -7.3643e-02, 1.1431e-03, 4.0579e-02, 5.8312e-02, 8.5941e-02, + -5.1140e-02, -1.5227e-02, -9.3090e-02, 1.2289e-02, 9.8821e-02, + -4.3573e-02, -2.1652e-01, -8.5814e-02, -3.8511e-02, -1.6028e-01, + 5.6119e-03, -1.4571e-01, 3.2770e-02, 5.6601e-02, -3.9532e-02, + -8.4895e-02, 6.0803e-02, -5.5046e-02, -2.6444e-02, -5.8353e-02, + 4.9541e-02, -2.7995e-02, -1.8478e-02, -3.4308e-02, -3.5461e-02, + 2.1209e-02, -4.9318e-03, 5.6645e-02, 3.7480e-02, 6.0327e-02, + -6.5690e-04, -2.0427e-02, -2.3503e-02, 1.8327e-04, 1.1751e-01, + 1.9287e-01, -7.5346e-02, -1.1450e-01, 1.6207e-02, -3.5160e-02, + -3.7534e-01, -1.0929e-02, -3.1112e-02, 3.6410e-02, 1.0848e-02, + 1.6411e-01, -1.3910e-01, -4.8034e-02, 2.0327e-02, 1.7898e-02, + -1.4329e-02, -6.9380e-02, 4.4215e-02, -6.9706e-02, 2.2333e-02, + -4.2725e-02, 7.9795e-02, 4.6975e-02, 4.7850e-02, 4.6778e-02, + -1.3970e-01, -1.4567e-03, 5.0245e-02, -1.2355e-02, 4.1464e-02, + -1.9858e-02, -6.7841e-02, 2.9885e-02, -2.5422e-02, 3.9081e-02, + -1.5655e-01, 3.9403e-02, 2.2546e-01, -2.9838e-02, -8.0923e-02, + 3.2238e-02, -3.3750e-02, 1.0578e-01, 8.4942e-02, -1.0134e-01, + 1.0470e-02, -1.6958e-02, -1.5581e-01, 8.7543e-02, 4.6490e-02, + -1.8868e-01, -2.1945e-02, -6.2705e-02, 5.4263e-02, 5.5378e-02, + 1.5345e-01, -8.8809e-02, 1.1056e-01, 1.3004e-01, 3.1240e-03, + 4.8471e-02, 2.2921e-01, -1.2579e-02, -3.3532e-02, -9.3535e-02, + -1.1855e-02, -5.6449e-02, 4.5213e-02, -1.4483e-01, -1.9879e-01, + -1.6979e-02, -7.0818e-02, 8.4117e-02, -1.6528e-02, 2.0236e-02, + -3.8914e-02, 6.5753e-02, -4.7799e-02, 1.2737e-01, -2.6316e-02, + -1.8444e-02, 3.9019e-02, -5.2672e-03, 5.9503e-02, -2.2726e-02, + 3.4543e-02, -1.0741e-01, 3.9277e-02, 6.2676e-02, 2.2335e-02, + 4.1903e-02, 1.1149e-01, 4.2704e-02, 5.2054e-02, 4.1758e-02, + 5.8227e-02, 8.3051e-02, -7.6821e-02, 6.0740e-03, -1.2686e-02, + 1.6613e-02, 2.4717e-02, 1.0240e-01, -1.2099e-01, 1.0826e-01, + -3.0802e-02, -8.7470e-02, -1.0857e-01, -1.1902e-04, 2.7459e-02, + -5.9645e-02, -7.6922e-02, -5.7398e-02, -5.9345e-02, -7.4553e-02, + 4.1273e-02, -7.1765e-02, -1.5432e-02, -9.4385e-02, 1.0805e-02, + -4.0354e-02, 7.1086e-02, 1.0681e-01, -7.6471e-02, 8.4077e-03, + 6.2677e-02, -9.7095e-02, 3.0733e-02, 8.5449e-02, -1.1908e-01, + 1.7565e-01, 1.5352e-01, 8.9398e-03, 4.5373e-02, 3.3377e-02, + 5.5408e-02, 4.7944e-02, -7.5976e-02, 8.8923e-02, -1.3306e-01, + -7.5426e-03, -1.8189e-02, 1.3748e-01, 4.3790e-02, 4.7611e-02, + 1.8740e-02, 1.8154e-01, 5.4250e-02, 8.3740e-02, 7.2689e-02, + 6.7214e-02, 5.0011e-03, -6.1465e-02, 1.2461e-01, 2.1885e-02, + 2.4945e-03, 1.0912e-01, -8.7420e-02, 5.2805e-02, -6.0513e-02, + 7.2861e-02, 3.0458e-02, -9.6075e-02, -1.2647e-01, 2.7966e-02, + -6.5805e-02, 1.0081e-02, 4.9374e-02, 1.5660e-01, -4.2903e-02, + -7.9792e-02, 2.2622e-01, -6.6623e-02, -5.4678e-02, 3.2016e-01, + -2.0631e-01, 3.6732e-02, -3.8266e-03, 3.5298e-02, -2.0218e-01, + 1.5783e-01, -3.6070e-02, 1.1865e-01, -3.4082e-02, 2.0930e-02, + 8.3586e-02, 1.5725e-02, -3.8440e-02, 6.5960e-03, -3.2614e-02, + -1.2969e-01, 2.5196e-02, -7.0515e-02, -2.4179e-02, 1.0546e-01, + -4.5103e-03, 1.0148e-01, 9.7182e-02, 4.2287e-02, -1.2103e-02, + -6.1044e-02, 5.8164e-02, -1.3052e-02, 1.2791e-01, -1.5350e-02, + -4.3890e-02, -9.8166e-02, 4.9865e-02, -6.0790e-03, -1.1680e-01, + 7.4108e-03, -9.9132e-03, 5.0176e-03, -5.9086e-02, -1.6564e-02, + -2.7472e-02, 2.0742e-02, 1.3547e-01, 1.4747e-02, 3.9906e-02, + 9.4255e-02, -3.2829e-02, -6.6845e-02, -8.2401e-03, -5.0691e-02, + 2.7525e-02, 8.2268e-02, 1.1415e-01, -2.1360e-02, 9.0743e-03, + -1.2780e-01, 1.0709e-01, 3.6027e-02, -3.8634e-02, -3.7508e-02, + -3.5288e-02, -2.5998e-02, -6.0742e-02, -7.8862e-02, -1.0022e-01, + 3.9805e-02, -3.6270e-02, -5.7538e-02, -2.7233e-02, 4.4938e-02, + 2.6449e-03, 9.3045e-03, 3.3165e-02, -7.1608e-03, 6.8827e-02, + 1.0516e-01, -3.3185e-02, 8.7285e-02, 3.3311e-02, -1.1761e-01, + -1.3953e-01, -7.4792e-02, 3.7520e-02, 1.5746e-02, 9.0114e-04, + -8.0773e-02, 4.4950e-02, 6.3050e-02, -8.5671e-02, -6.2881e-03, + -1.5994e+00, -4.3553e-02, 2.9454e-02, -5.6907e-02, -3.5270e-02, + -3.1434e-02, 1.7889e-02, -7.5432e-03, -1.1371e-01, -5.5130e-02, + 2.9257e-02, -1.7139e-01, -1.1265e-01, 6.7160e-02, -3.7416e-02, + -7.2524e-02, -3.5468e-02, -1.1186e-03, 6.8092e-02, 1.1672e-01, + -1.4002e-04, 3.5592e-02, 5.8436e-02, 1.3232e-01, -7.7206e-02, + -4.7924e-02, -8.6854e-02, -4.7352e-02, 8.5665e-02, 1.4172e-01, + -1.4817e-01, -1.8879e-02, -7.7543e-02, 1.4620e-01, 1.0426e-02, + 5.8315e-03, -6.9976e-03, -1.2946e-01, 7.7025e-02, 3.7671e-02, + 1.4007e-02, 4.8044e-02, 1.4411e-02, -5.2605e-02, -1.2956e-01, + 1.0883e-02, 3.6544e-02, -1.0974e-01, 4.1607e-02, -4.8047e-03, + -1.9821e-03, 3.7107e-02, -7.3962e-02, 7.7033e-02, 3.0835e-02, + -1.0230e-02, -1.2997e-01, -9.3121e-02, 2.8200e-02, -3.7071e-02, + 6.1153e-02, 1.6810e-01, 8.3451e-02, -7.2332e-03, 1.0225e-01, + 1.1711e-01, -1.2504e-01, 1.0692e-02, 1.7643e-01, -9.4140e-02, + -1.7660e-02, -3.5525e-02, 1.3885e-01, 9.2340e-02, -1.6297e-02, + 2.5335e-02, 2.0930e-02, -3.7296e-02, -2.7389e-02, -1.2104e-01, + 6.3596e-02, -5.1928e-02, 9.5645e-02, -7.1889e-02, 3.9743e-03, + 3.1035e-02, -7.1511e-02, 6.0946e-02, -7.1944e-02, -4.7700e-02, + 6.4958e-02, 1.0425e-01, 7.0645e-02, 2.9044e-02, 1.3859e-02, + -8.4643e-02, -2.9044e-02, -8.2613e-02, 5.3671e-02, -4.7501e-02, + -4.3595e-02, 1.8381e-02, 1.6899e-01, 1.1637e-01, -1.2471e-01, + -6.4090e-02, -2.6507e-02, 7.6388e-02, -7.0416e-02, -2.9882e-02, + -1.3445e-02, 4.8258e-03, 3.6539e-03, 3.5404e-02, -8.2439e-02, + 2.2725e-02, 2.4114e-03, -1.5794e-02, -8.3513e-02, -3.4496e-02, + 1.8298e-02, -2.2342e-02, 1.4748e-04, -5.4580e-02, -1.0311e-01, + -2.7600e-02, 4.6100e-02, 1.7093e-02, 1.9861e-02, 2.2585e-02, + 1.4885e-02, 1.0653e-01, 1.1443e-02, 8.4617e-02, 1.5395e-02, + 3.1660e-02, 3.9251e-03, -9.7112e-02, 3.9738e-02, -2.0323e-03, + -1.5839e-02, -1.2260e-02, -8.4579e-02, -2.8312e-03, 1.5448e-01, + -6.6625e-02, 1.7054e-01, 1.5554e-02, 4.5368e-02, 1.5215e-01, + 5.1925e-02, 8.8155e-03, 9.9351e-03, 6.4983e-02, 4.1809e-02, + 1.0706e-01, -1.1904e-02, -7.9306e-02, -1.8990e-02, 8.2367e-02, + 3.7968e-02, 3.2559e-02, 3.2740e-02, 7.0236e-02, -3.4334e-02, + -1.2870e-01, -1.7692e-02, 6.6359e-02, 1.2667e-01, 6.2629e-02, + 2.6319e-02, 6.9796e-02, -7.5105e-02, 9.2368e-02, 4.5839e-02, + -9.4532e-03, 7.9225e-02, 1.2312e-01, -4.0744e-02, 3.0323e-02, + -3.8575e-02, 6.1851e-02, -4.2298e-03, 1.4596e-03, 2.9492e-02, + -6.8381e-02, -1.5145e-01, 1.4044e-02, 3.8219e-03, 2.7525e-02, + -5.9736e-02, 8.6931e-02, -5.9665e-02, 6.6026e-02, -4.7780e-02, + 8.0811e-02, 9.5365e-02, -4.2623e-02, 3.3717e-03, -1.6398e-02, + -5.8409e-02, 5.1490e-02, 1.0267e-01, 7.1453e-03, -7.2832e-02, + 1.0664e-01, 1.3928e-02, 3.6142e-02, 4.5365e-02, 3.0590e-02, + -4.0905e-02, 3.1009e-03, 4.9965e-02, 6.2574e-02, -3.7965e-02, + 1.5737e-01, -2.2978e-03, 1.3913e-01, 1.9018e-01, 4.1395e-02, + 1.9691e-02, -5.2322e-02, 3.8990e-02, 9.0262e-02, -4.5511e-02, + 1.7857e-02, -6.1070e-03, 7.9881e-02, -6.1300e-02, 1.5891e-02, + -9.1146e-03, -1.0800e-02, 1.1645e-01, 4.1218e-02, 1.4686e-01, + 1.4334e-01, 3.2606e-02, -4.7522e-02, 9.4717e-02, -8.2010e-02, + 7.2789e-02, -1.5124e-01, -4.3470e-04, 5.9777e-02, 1.2152e-01, + -4.7845e-02, 2.5638e-02, -9.8613e-02, -2.8399e-03, 1.3092e-01, + 5.0667e-03, -1.4642e-01, -1.3147e-01, 6.6508e-02, 4.7440e-02, + -2.2257e-01, 3.7247e-02, 4.4363e-03, 3.6616e-02, -1.6164e-02, + 2.2315e-02, -7.9318e-03, 2.3029e-03, 7.1865e-03, 8.5034e-02, + -1.1355e-02, -4.4492e+00, -4.4573e-02, 2.7494e-02, -3.2405e-02, + -1.0286e-01, -5.5482e-02, -3.4495e-02, -5.3544e-02, 1.8831e-02, + -7.9519e-02, -3.4958e-02, -6.5070e-03, -2.7990e-02, -5.7086e-02, + 7.8146e-02, -9.5072e-03, -8.0300e-02, 4.5083e-02, -7.5206e-02, + 1.1142e-01, 5.7118e-02, -1.1984e-02, 9.7471e-03, -1.5284e-01, + -3.3771e-02, 1.0841e-01, 2.3453e-02, -1.4740e-02, 6.4261e-02, + -5.0851e-02, 1.1362e-01, 7.0055e-02, 4.6831e-02, 3.7477e-02, + -1.3111e-02, -9.9276e-03, 6.9019e-03, -7.9165e-02, -3.6014e-03, + 1.1528e-02, 1.4484e-02, 3.9553e-02, -1.0392e-01, 9.0540e-02, + -2.3436e-02, -2.2548e-01, 1.1270e-01, 5.3056e-02, 9.3794e-02, + 1.0745e-01, 3.8244e-02, 6.4040e-02, 1.0307e-01, -9.8231e-03, + 3.1430e-03, 2.4322e-02, -2.4742e-02, 2.0988e-02, -5.0275e-02, + 1.0009e-01, -1.2285e-01, -8.5569e-02, 7.0198e-02, -6.9018e-02, + -3.8501e-02, -1.6234e-01, -2.3968e-02, 4.3340e-02, -4.9714e-02, + -2.6754e-02, -1.0560e-03, -1.0699e-01, -2.9853e-02, 7.2143e-02, + -1.1260e-02, 4.7919e-02, 9.6402e-02, 4.0698e-02, -4.5639e-02, + -2.8596e-02, 4.3692e-02, 7.4926e-02, -6.2533e-02, -8.9408e-02, + -6.3402e-02, 1.1697e-01, 7.2318e-03], device='cuda:1', + requires_grad=True) +torch.Size([3072, 768]) +Parameter containing: +tensor([[-0.0048, 0.0208, 0.0301, ..., 0.0115, 0.0131, 0.0097], + [ 0.0114, -0.0127, 0.0137, ..., 0.0169, 0.0023, 0.0218], + [-0.0232, -0.0296, 0.0010, ..., 0.0052, -0.0218, 0.0328], + ..., + [-0.0138, -0.0232, -0.0012, ..., -0.0105, -0.0009, 0.0167], + [-0.0087, 0.0149, -0.0075, ..., 0.0247, -0.0048, -0.0043], + [-0.0146, 0.0123, 0.0197, ..., -0.0132, 0.0005, -0.0090]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.2957, -0.3293, -0.3479, ..., -0.1686, -0.4126, -0.3352], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 3072]) +Parameter containing: +tensor([[-2.0264e-02, 2.8854e-02, 4.7760e-03, ..., -4.8904e-03, + -3.3169e-03, -2.8595e-02], + [ 7.9193e-03, -1.0986e-02, 8.4000e-03, ..., 2.7145e-02, + 3.1189e-02, -2.6474e-02], + [ 1.8585e-02, -1.3618e-02, -1.1322e-02, ..., 6.1989e-03, + -1.4870e-02, -5.5194e-05], + ..., + [ 1.0979e-02, 2.5269e-02, -1.1635e-03, ..., 2.2926e-03, + 2.9037e-02, -2.4094e-02], + [ 1.6174e-02, 2.0721e-02, 5.5618e-03, ..., -1.0529e-03, + 6.1226e-03, 1.5610e-02], + [-1.6403e-02, 1.9646e-03, -7.2136e-03, ..., -3.4119e-02, + -3.3054e-03, -1.8219e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 7.6416e-02, 3.9429e-02, -1.3733e-01, 5.8136e-02, 3.9581e-02, + 7.4158e-02, 5.7098e-02, -3.6793e-03, -6.1531e-03, 1.2093e-02, + 2.0386e-02, -3.7659e-02, -1.5125e-01, -8.3130e-02, 2.2827e-02, + -1.5557e-05, -9.2102e-02, -7.6752e-03, -2.1561e-02, -1.9568e-01, + 3.1555e-02, 3.9185e-02, 7.4387e-04, -3.9825e-02, 5.6152e-02, + 1.3351e-03, 3.2654e-02, -4.4489e-04, -8.2458e-02, 4.4403e-02, + -2.3438e-02, 8.2947e-02, 2.9175e-02, -5.3329e-03, 8.5876e-02, + -1.5121e-02, -6.7406e-03, 7.0435e-02, 2.0416e-02, 6.4636e-02, + 2.3041e-02, -1.1034e-03, -2.3956e-02, -1.0437e-02, -5.2147e-03, + -7.6866e-03, -8.5815e-02, 6.6956e-02, 6.4209e-02, -1.0811e-02, + 4.2648e-03, 6.3133e-03, 8.9111e-02, -1.9852e-02, -6.9580e-02, + 8.1543e-02, -6.0883e-02, 6.6795e-03, 7.3303e-02, -1.9196e-02, + -1.4107e-02, -8.1970e-02, 6.2927e-02, -6.9702e-02, 2.7161e-02, + -5.0385e-02, 5.2551e-02, 7.9498e-03, 6.5346e-03, -7.0496e-02, + 8.2886e-02, 6.8848e-02, 1.0809e-01, 1.0033e-02, -5.7526e-02, + 1.2383e-02, 5.5084e-02, -5.0774e-03, 1.1398e-02, 5.9845e-02, + 1.1055e-02, -3.6377e-02, -7.8369e-02, 2.3499e-02, 3.6896e-02, + -3.3051e-02, 3.8025e-02, -1.5640e-02, 5.0903e-02, 4.3091e-02, + -7.7972e-03, 7.2571e-02, -2.4017e-02, -9.0210e-02, -6.1584e-02, + 2.7695e-02, 4.2542e-02, -1.4331e-01, -3.0945e-02, 7.7515e-02, + 7.1106e-02, -5.2948e-02, -9.3994e-03, 5.4230e-02, 2.2110e-02, + 1.4473e-02, 4.0009e-02, -3.9558e-03, 2.9907e-02, -2.3315e-02, + -6.4148e-02, 7.2876e-02, -6.2805e-02, 7.8552e-02, 1.1102e-01, + 4.9667e-03, 2.8915e-02, 2.5131e-02, 1.1731e-01, 3.4542e-03, + -4.7058e-02, -6.0699e-02, -5.7709e-02, -2.8259e-02, -6.4392e-02, + -1.6251e-02, -5.6091e-02, -1.2344e-02, -6.3843e-02, -8.3679e-02, + -4.3152e-02, -3.4393e-02, -3.1616e-02, -6.9092e-02, 1.3557e-02, + -1.1124e-02, 3.5828e-02, 2.5650e-02, -4.1473e-02, -3.2440e-02, + 7.1655e-02, 2.5589e-02, -8.5693e-02, -4.2908e-02, 7.2517e-03, + 3.8471e-03, 7.3059e-02, 3.0029e-02, -5.1941e-02, 1.0046e-01, + 1.2018e-01, 6.3629e-03, 8.5220e-03, -1.1635e-02, -1.6992e-01, + 1.0201e-02, -8.0750e-02, 5.5023e-02, 6.4659e-03, 4.5990e-02, + 8.8989e-02, -3.0716e-02, -2.3865e-02, -5.3650e-02, 7.0801e-02, + 2.2049e-02, -7.4158e-02, 1.0071e-02, -8.0261e-03, 2.7328e-02, + -8.1543e-02, 2.7054e-02, -2.3270e-04, 9.7580e-03, 7.5623e-02, + -5.8167e-02, -4.1901e-02, -3.5919e-02, -5.2124e-02, -4.8645e-02, + -1.0242e-01, -1.2451e-01, 1.4313e-02, 2.7180e-03, 6.5002e-02, + -2.1469e-02, -5.6114e-03, 4.1089e-01, -5.4565e-02, -1.5175e-02, + -3.8509e-03, -7.3181e-02, 1.0345e-02, 4.2328e-02, -1.1879e-02, + 1.0785e-01, 6.1569e-03, -2.9236e-02, 1.4844e-01, 2.1896e-03, + -5.9082e-02, -2.0008e-03, 6.3293e-02, 6.3599e-02, 8.5144e-03, + 1.5945e-02, 2.9583e-03, 4.7607e-02, -1.9806e-02, 6.9580e-02, + -7.1594e-02, 1.3557e-02, 3.0716e-02, 2.7649e-02, 2.7283e-02, + 4.7516e-02, -6.5918e-02, 2.7046e-03, 4.8866e-03, -1.2024e-01, + 4.6234e-02, 3.3447e-02, 4.3213e-02, 4.9591e-02, 2.6016e-02, + -8.7952e-02, 1.7502e-02, -3.1067e-02, 2.2278e-02, -1.0490e-03, + 4.4312e-02, 3.8147e-02, 5.5786e-02, 3.4302e-02, -2.3178e-02, + -1.8066e-02, 2.7878e-02, 9.2926e-03, 3.6560e-02, 1.5404e-02, + 4.3365e-02, 8.6792e-02, 3.2898e-02, -5.2414e-03, -4.9744e-03, + 9.2407e-02, 1.4526e-01, -3.5461e-02, 7.4219e-02, 1.7166e-02, + -2.1286e-02, -4.3678e-03, 5.5771e-03, -2.0508e-02, 1.2396e-01, + 2.7802e-02, 9.6283e-03, -4.3518e-02, -4.7211e-02, -3.2940e-03, + -4.8584e-02, 3.1982e-02, 2.0294e-02, -2.6001e-02, -9.3201e-02, + -8.6121e-02, -6.6406e-02, 5.2612e-02, -1.2108e-02, -7.9224e-02, + 5.6824e-02, 1.8967e-02, -1.0948e-03, -9.5764e-02, -7.7200e-04, + 2.7527e-02, 1.9699e-02, 1.5518e-02, -3.7628e-02, -2.3529e-02, + -4.3671e-02, -9.8648e-03, -2.5589e-02, 6.4125e-03, 4.7646e-03, + -1.6754e-02, -1.0658e-02, 5.5298e-02, 5.9692e-02, -1.0590e-01, + -4.5837e-02, 9.1553e-02, 1.7761e-02, 5.0201e-02, 2.9037e-02, + -5.7343e-02, 2.0279e-02, 1.3016e-02, 2.9739e-02, -3.4302e-02, + -4.3518e-02, 9.9915e-02, -1.4793e-02, 5.0720e-02, 2.2079e-02, + -4.4647e-02, 6.1768e-02, -1.2291e-02, 8.8806e-03, -4.4952e-02, + 5.1300e-02, 7.6355e-02, 6.1310e-02, -8.8257e-02, -2.8595e-02, + -6.1890e-02, 2.6749e-02, 7.0496e-02, 3.5400e-02, 1.8219e-02, + -7.3853e-02, 9.9411e-03, 7.5264e-03, 1.2817e-03, 5.3809e-01, + -1.4111e-01, 1.3843e-01, -3.9001e-02, 3.5431e-02, -1.5918e-01, + 5.8899e-02, -1.0211e-01, -2.9861e-02, -7.7896e-03, 5.7098e-02, + -3.1403e-02, 1.8051e-02, 1.2283e-02, -2.8534e-02, -1.5078e-03, + -1.1528e-02, 1.7303e-02, -3.3875e-02, -6.5918e-02, 1.3115e-02, + -3.7567e-02, -3.0090e-02, 8.7524e-02, -8.1940e-03, -5.2063e-02, + 5.2429e-02, 6.9153e-02, -4.5868e-02, -1.0033e-02, -4.6234e-02, + -3.8422e-02, 1.1635e-02, -8.1604e-02, -9.4177e-02, 3.2593e-02, + -9.1019e-03, -8.7952e-02, -6.4636e-02, -5.2399e-02, 5.1392e-02, + 9.8145e-02, -8.0681e-04, 8.5449e-02, 1.3745e-01, 5.7281e-02, + -1.5488e-02, 2.2259e-03, -6.7383e-02, -4.9835e-02, 5.6213e-02, + -2.5291e-03, -4.9515e-03, 6.8359e-03, -1.9875e-03, -5.8228e-02, + 1.4725e-02, 3.4790e-02, 5.2399e-02, 4.0222e-02, -4.0955e-02, + -2.0660e-02, 4.7241e-02, -2.8656e-02, 1.9485e-02, -2.2095e-02, + 5.2094e-02, 4.3823e-02, 3.9856e-02, -1.1200e-02, -9.5337e-02, + 5.9509e-03, -3.6530e-02, 2.0416e-02, 1.5732e-02, -4.2877e-02, + -6.1340e-02, -3.6621e-02, -1.7960e-02, -8.1970e-02, 1.9547e-02, + -2.1179e-02, 3.5706e-02, 1.0452e-02, -2.1118e-02, -2.4033e-02, + 7.1289e-02, 1.2250e-01, 2.2446e-02, -3.7262e-02, -2.0370e-02, + -1.4075e-01, 6.0486e-02, -2.9888e-03, 6.6948e-03, 3.5156e-02, + -3.1921e-02, -3.7415e-02, 5.8136e-02, -8.8684e-02, -7.2693e-02, + 5.1910e-02, 1.5869e-02, -4.8757e-04, 7.6904e-02, -9.7046e-03, + 4.1016e-02, -3.4027e-02, -5.0842e-02, 1.3321e-02, 5.3680e-02, + 3.1414e-03, -4.7455e-02, -2.7618e-02, 2.5101e-02, -1.5723e-01, + 7.8201e-03, -2.0660e-02, 2.5883e-03, -6.2447e-03, 5.5389e-02, + -1.3313e-02, 4.7821e-02, 6.9351e-03, 5.7983e-02, 5.6183e-02, + 1.1887e-02, 5.2910e-03, -3.2379e-02, 5.1392e-02, -7.3975e-02, + -8.5571e-02, -1.4877e-02, -1.5236e-02, -2.1667e-02, -1.7227e-02, + -1.8250e-02, 4.9805e-02, -9.5947e-02, -3.6545e-03, -6.0822e-02, + -9.9731e-02, 5.7709e-02, 9.8343e-03, 5.7373e-02, -3.7537e-02, + 4.8615e-02, -3.5797e-02, 2.6840e-02, 4.7211e-02, -3.5217e-02, + 6.2103e-02, -6.2065e-03, -1.5650e-03, -4.1718e-02, 2.3468e-02, + -1.9951e-03, 2.7786e-02, 3.2043e-02, 1.0689e-02, -9.7351e-02, + -9.5093e-02, 1.6891e-02, 8.6060e-02, 1.4938e-02, -5.2521e-02, + 2.5116e-02, 2.1744e-02, -1.1877e-01, 1.1864e-02, -8.8501e-02, + -1.6541e-02, -1.1487e-01, 1.8127e-02, 3.0273e-02, 9.3994e-02, + -3.4607e-02, -3.0746e-02, 9.1003e-02, 5.6641e-02, -5.7037e-02, + -6.4392e-02, 4.1718e-02, 1.3525e-01, 5.1697e-02, -8.3313e-02, + 4.7760e-03, -5.3406e-02, -3.6774e-02, 1.1792e-01, -2.0370e-02, + -4.1016e-02, 2.3682e-02, 3.7537e-02, -5.7495e-02, -5.1300e-02, + 2.9572e-02, 2.8168e-02, 1.2695e-02, 7.2241e-04, 3.0762e-02, + 7.5760e-03, 5.3284e-02, -7.0068e-02, 2.8854e-02, -5.5328e-02, + -4.3091e-02, -7.0679e-02, 1.8738e-02, -1.3046e-02, 3.7781e-02, + 1.0361e-02, -1.2070e-02, 3.9749e-03, -7.9651e-02, -1.5659e-03, + -4.2450e-02, 3.2806e-02, -4.0283e-02, 1.9882e-02, -3.8605e-02, + 5.2246e-02, 4.4189e-02, -6.0577e-02, -3.6682e-02, -8.7204e-03, + -6.4819e-02, -2.7985e-02, 4.7394e-02, 7.2266e-02, -3.6041e-02, + 1.1871e-02, 2.5116e-02, 5.1300e-02, -3.8666e-02, 6.6467e-02, + 4.7821e-02, -2.4509e-03, -5.8350e-02, 4.3304e-02, 3.1281e-02, + 5.8990e-02, 1.9951e-03, 3.8544e-02, -5.9891e-03, 5.7556e-02, + 1.6617e-02, 3.1319e-03, 7.5134e-02, 6.8420e-02, 4.2236e-02, + 5.5573e-02, -5.0323e-02, -2.2797e-02, -1.2222e-02, -2.8610e-02, + -4.1321e-02, 4.5563e-02, 2.5726e-02, 2.1149e-02, 5.3741e-02, + -4.7455e-02, -2.0660e-02, -7.1106e-02, -4.9629e-03, -2.2308e-02, + -4.8706e-02, 4.1077e-02, -1.4275e-02, -4.3823e-02, 2.3315e-02, + 7.4234e-03, -2.4673e-02, 1.0425e-01, 7.0457e-03, 4.8218e-02, + 7.4120e-03, 8.8425e-03, 3.7384e-02, -2.9678e-02, 6.0349e-03, + 6.0638e-02, -4.3213e-02, -1.9580e-01, -7.4730e-03, -5.1819e-02, + -3.3112e-02, 8.0383e-02, -4.2938e-02, 7.4219e-02, 2.4536e-02, + -1.0063e-02, 9.0485e-03, 1.3123e-03, -1.2062e-02, -5.7907e-03, + 1.0223e-02, -3.5004e-02, 7.4036e-02, -8.8501e-03, -1.6312e-02, + -1.4610e-02, -3.0640e-02, -9.7809e-03, 6.0852e-02, -9.5459e-02, + -2.1076e-03, -8.5022e-02, -6.9153e-02, 1.4397e-02, 6.0516e-02, + -9.5081e-04, -1.5594e-02, -1.4824e-02, 3.0151e-02, -8.6365e-02, + 2.0813e-02, -6.9092e-02, -6.9031e-02, -3.3295e-02, -1.7899e-02, + -1.0413e-01, 8.7357e-04, 8.5327e-02, 1.5732e-02, 3.0212e-02, + 1.4087e-01, 4.0466e-02, 3.7170e-02, 6.4621e-03, -3.9520e-02, + 6.2988e-02, -2.2156e-02, -4.6265e-02, -2.0157e-02, 6.6711e-02, + -5.0385e-02, 1.3031e-02, -2.8976e-02, 7.9285e-02, 8.1604e-02, + -1.5625e-02, -5.6976e-02, -4.4464e-02, -1.1398e-02, -7.6843e-02, + 3.2501e-02, 9.4177e-02, 3.9398e-02, -3.1769e-02, 5.9418e-02, + -7.1167e-02, 7.4646e-02, -8.1787e-02, -4.7699e-02, -2.6825e-02, + -6.9666e-04, 4.6844e-02, -7.4768e-02, 5.8167e-02, -4.3701e-02, + 8.9340e-03, -2.5970e-02, -4.2694e-02, -3.4973e-02, 3.2196e-02, + -5.8258e-02, -2.2598e-02, -6.3782e-03, -7.2754e-02, 3.6743e-02, + 1.5320e-01, -2.5864e-02, -3.7689e-02, -6.9092e-02, -5.9387e-02, + 1.8661e-02, 2.9053e-02, 6.0455e-02, -8.7830e-02, -3.1311e-02, + 1.0277e-02, 6.6345e-02, -4.2969e-02, -3.1624e-03, -4.0009e-02, + 3.0502e-02, -1.7914e-02, -6.5796e-02, -8.0872e-02, -1.6022e-02, + -1.2671e-01, 3.2654e-02, -9.5139e-03, 1.7807e-02, -6.0150e-02, + -6.4430e-03, -8.7204e-03, -8.2855e-03, -5.0446e-02, 7.0915e-03, + -2.0523e-02, -1.1787e-02, 3.8574e-02, -1.5854e-02, 1.0025e-02, + -7.5195e-02, -5.3596e-03, 7.8613e-02, -2.2324e-02, 1.7990e-02, + 1.1328e-01, -3.4546e-02, 1.2367e-02, -5.5634e-02, -6.4758e-02, + -3.3173e-02, -5.9174e-02, -7.6599e-02, 1.9653e-02, 5.2887e-02, + 6.2683e-02, -1.0712e-01, 6.7749e-02, 3.8605e-02, -4.1748e-02, + -3.0762e-02, -2.6627e-02, -7.7881e-02, 2.5978e-03, 4.1199e-03, + 1.3733e-01, -4.8889e-02, -9.5520e-03, 6.9031e-02, 2.8091e-02, + 4.2877e-02, 3.3203e-02, -3.7201e-02, 2.0309e-02, -5.8746e-02, + 1.0063e-02, 8.3252e-02, 1.8173e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([2.1959, 2.2433, 2.1857, 2.1209, 1.6389, 2.1854, 2.1641, 2.2831, 2.2128, + 2.2176, 2.1341, 2.2153, 2.3185, 2.2126, 2.2357, 2.0978, 2.1635, 2.1208, + 2.2639, 1.7605, 2.3400, 2.1941, 2.3848, 2.3280, 2.1389, 2.2421, 2.1610, + 2.2365, 2.1423, 2.0889, 2.1251, 2.1304, 2.1786, 2.2438, 2.0898, 2.1105, + 2.3662, 2.1162, 2.1394, 2.2017, 2.0881, 2.1809, 2.1864, 2.1774, 2.1370, + 2.1468, 2.2602, 2.0467, 2.2073, 2.0081, 2.2256, 2.2054, 2.3503, 2.1690, + 2.3088, 2.0737, 2.3294, 2.0558, 2.1249, 2.2076, 2.1621, 2.1766, 2.1614, + 2.1080, 2.2065, 2.4156, 2.2566, 2.1855, 2.1032, 1.7747, 2.1803, 2.1558, + 2.0032, 2.1216, 2.2635, 2.3478, 2.2386, 2.1590, 2.0873, 2.1088, 2.1520, + 2.2779, 2.1882, 2.1679, 2.1767, 2.2001, 2.2802, 2.0911, 2.1828, 2.0577, + 2.2320, 2.1016, 2.2414, 2.1581, 2.1407, 2.3021, 2.0415, 2.2577, 2.2683, + 2.1252, 2.1751, 2.0928, 2.1778, 2.1687, 2.1598, 1.8509, 2.4226, 2.1822, + 2.1058, 2.1584, 2.1773, 2.2128, 2.1975, 2.3101, 2.3948, 2.1183, 2.0253, + 2.3268, 2.2789, 2.1262, 2.0824, 2.2447, 2.2727, 2.1143, 2.1328, 2.1054, + 2.2809, 2.2893, 2.2209, 2.1829, 2.2278, 2.3562, 2.2678, 2.1217, 2.3154, + 2.1505, 2.1856, 2.1743, 2.0592, 2.4429, 2.1887, 2.1263, 2.2271, 2.0800, + 2.1909, 2.0682, 2.1730, 2.1021, 2.2987, 2.1907, 2.0872, 2.1588, 2.2281, + 2.1060, 2.1932, 1.4218, 2.1816, 2.0950, 2.1298, 2.3864, 2.0477, 2.1148, + 2.2073, 2.1981, 2.1993, 2.1867, 1.9977, 2.2300, 2.2829, 2.0759, 2.2131, + 2.0797, 1.9357, 2.1667, 2.0612, 2.2055, 2.1462, 2.2525, 2.2415, 2.1718, + 2.2981, 2.3206, 2.3284, 2.2534, 2.0522, 1.9514, 2.0436, 1.2547, 2.2027, + 2.0659, 2.1481, 2.2872, 2.2690, 2.1321, 2.2106, 2.1154, 2.2069, 2.1221, + 0.9120, 2.2188, 2.4457, 2.1044, 2.0904, 2.1309, 2.2110, 2.1845, 2.2859, + 2.1890, 2.2604, 2.1990, 2.2026, 2.0569, 2.3205, 1.8578, 2.1635, 2.1395, + 1.9611, 2.1856, 2.2196, 2.1708, 2.2654, 2.2020, 1.6530, 2.4133, 2.1938, + 2.3114, 2.2499, 2.0929, 2.1446, 2.0065, 2.1790, 2.1916, 2.2220, 2.0788, + 2.1493, 2.1848, 2.1861, 2.1895, 2.1188, 2.2206, 2.1182, 2.2558, 2.2083, + 2.1962, 2.0984, 2.1888, 2.1710, 2.3315, 2.1818, 2.1630, 2.1299, 2.1561, + 2.2780, 2.2752, 2.2380, 2.1594, 2.2068, 2.2501, 2.2587, 2.2793, 2.1248, + 2.2332, 2.2974, 2.1941, 2.3257, 2.1813, 2.1643, 2.2622, 2.0120, 2.1354, + 2.1130, 2.2935, 2.2795, 2.3622, 2.0759, 2.2596, 2.0937, 2.1968, 2.1598, + 2.1423, 2.1361, 1.6926, 2.1329, 2.2892, 2.2827, 2.1544, 2.2791, 2.2954, + 2.0467, 2.0488, 2.1520, 2.2570, 2.1547, 2.2220, 2.1348, 2.1048, 2.1245, + 2.1469, 2.0957, 2.2185, 2.0759, 2.3178, 2.1051, 2.2986, 2.2410, 2.0303, + 2.3367, 2.1090, 2.2387, 2.2247, 2.3043, 2.1765, 2.4028, 2.2597, 2.0883, + 2.0746, 2.1657, 2.2288, 2.1221, 2.3232, 2.3526, 2.0159, 2.1545, 2.1041, + 0.3871, 2.3329, 2.2983, 2.0760, 2.4192, 2.1847, 2.1178, 2.2100, 2.2635, + 2.2291, 2.1490, 2.1012, 2.2794, 2.2359, 2.1499, 2.1478, 2.3279, 2.1762, + 2.2106, 2.2876, 2.2022, 2.0238, 2.2397, 2.1261, 2.3905, 2.2223, 1.9225, + 2.2648, 2.2286, 2.0396, 2.1020, 2.1426, 2.1499, 2.3850, 2.1622, 2.2299, + 2.5684, 2.2173, 2.3663, 2.2255, 2.2637, 2.0994, 2.2758, 2.3033, 2.2118, + 2.3846, 2.2690, 2.1289, 2.3554, 2.4116, 2.1124, 2.1070, 2.1095, 2.1002, + 2.2363, 2.3103, 2.1195, 2.1077, 2.1961, 2.2064, 2.1364, 2.1967, 2.2627, + 2.2412, 2.2803, 2.2380, 2.0999, 2.1453, 2.3822, 2.1865, 2.0122, 2.2556, + 2.2398, 2.1578, 2.4047, 2.2957, 2.1867, 2.2028, 2.1285, 2.3118, 2.1787, + 2.0632, 2.1458, 2.1129, 2.2477, 2.2613, 2.1903, 2.1475, 2.2234, 2.2157, + 1.5280, 2.7052, 2.1411, 1.9908, 2.1080, 2.3099, 2.1839, 2.2772, 2.3016, + 2.1400, 2.1563, 2.1544, 2.0065, 2.1619, 2.2343, 2.2508, 2.0020, 2.1819, + 2.2169, 2.2876, 2.2819, 2.0921, 2.2851, 2.1712, 2.1838, 2.4053, 2.2280, + 2.1213, 2.2339, 1.5170, 2.1571, 1.9372, 2.1083, 2.2179, 2.2474, 2.1511, + 2.2367, 2.1640, 2.3784, 2.1733, 2.1151, 2.1422, 2.1430, 2.0686, 2.1205, + 2.1924, 2.2715, 2.2642, 2.2484, 2.1470, 2.1295, 2.2515, 2.1685, 2.0151, + 2.1463, 2.4572, 2.1058, 2.1192, 2.0471, 2.2623, 2.3658, 2.2353, 2.1376, + 2.3325, 2.1941, 2.0276, 2.1756, 2.1455, 2.1653, 1.4040, 2.1441, 2.1498, + 2.0774, 2.1498, 2.0594, 2.1203, 2.2170, 2.1717, 2.1872, 2.2332, 2.0680, + 2.1824, 2.2045, 2.0639, 2.2616, 2.1788, 2.2463, 2.2707, 2.1051, 2.1681, + 2.2768, 2.0634, 2.2021, 2.2291, 2.2101, 2.1755, 2.0786, 2.2107, 2.0552, + 2.0850, 2.3577, 2.2024, 2.3391, 2.2122, 2.1305, 2.1941, 2.3249, 2.1990, + 2.1484, 2.0657, 2.2080, 2.2095, 2.2046, 2.1868, 2.1210, 2.1913, 2.1871, + 2.3558, 2.3104, 2.2753, 2.2643, 2.1893, 2.0487, 2.2330, 2.3197, 2.1989, + 2.0989, 2.2021, 2.3357, 2.2768, 2.2759, 2.0574, 2.2289, 2.1212, 2.1820, + 2.2164, 2.2613, 2.1490, 1.9993, 2.3027, 2.0638, 2.1157, 2.1022, 2.0929, + 2.3347, 2.2287, 2.2191, 1.6776, 2.1588, 2.2015, 2.3349, 2.1413, 2.2229, + 2.2184, 2.1025, 2.1480, 2.2768, 2.1595, 2.1132, 2.0917, 2.2105, 2.4072, + 2.0885, 2.2566, 2.2266, 2.0839, 1.9988, 2.0598, 2.2237, 2.1696, 2.2168, + 2.1860, 2.1769, 2.0475, 2.1407, 2.1101, 2.1082, 2.1283, 2.1669, 2.1678, + 2.2295, 1.9174, 2.0682, 2.2209, 2.1534, 2.1680, 2.2277, 2.1702, 2.1736, + 2.3767, 2.2455, 2.4314, 2.2258, 1.8448, 2.2785, 2.0552, 2.2357, 2.2187, + 2.2307, 2.1839, 2.1638, 2.2256, 2.2257, 2.1354, 2.1825, 2.0499, 2.2662, + 2.0253, 2.2905, 2.1492, 2.2266, 2.2326, 2.1619, 2.1309, 2.1369, 2.2871, + 2.2399, 2.1446, 2.2617, 2.1392, 2.0626, 2.0500, 2.1366, 2.1822, 2.2245, + 2.2088, 2.2096, 2.1357, 2.1353, 2.2215, 1.8183, 2.0847, 2.0899, 2.1745, + 2.1806, 2.3140, 2.2878, 2.2096, 2.2386, 2.2901, 2.2440, 2.0776, 2.1517, + 2.0757, 2.1171, 2.1626, 1.6976, 2.1217, 2.3893, 2.1851, 2.0992, 2.2829, + 2.1739, 2.2640, 2.2493, 2.1478, 2.2046, 2.1931, 2.1903, 1.8220, 2.2475, + 2.0913, 2.1884, 2.0506, 2.2598, 2.2075, 2.3095, 1.9063, 2.1329, 2.1046, + 2.1928, 2.1712, 2.1733, 2.0890, 2.2223, 2.2372, 2.1847, 2.1720, 2.3124, + 2.2107, 2.1327, 2.2606, 2.4070, 2.1014, 2.2041, 2.2022, 2.1145, 2.0395, + 2.1650, 2.2246, 2.2635, 2.2594, 2.3897, 2.0606, 2.0628, 2.2408, 2.2936, + 2.0929, 2.1765, 2.0997, 2.1489, 2.2996, 2.2237, 2.2520, 2.0795, 2.3836, + 2.1249, 2.4757, 2.3715, 2.1985, 2.2140, 2.0363, 1.3627, 2.1650, 2.1868, + 2.1629, 2.1055, 2.1162, 2.1416, 2.2213, 2.2584, 2.2356, 2.1500, 2.0179, + 2.1893, 2.2718, 2.3281, 2.2356, 2.0494, 2.3746, 2.2077, 2.0318, 2.2620, + 2.2605, 2.1761, 2.2573, 2.1130, 2.2154, 2.3167, 2.2101, 2.1554, 2.2810, + 2.1703, 2.1957, 2.3504, 2.1863, 2.0534, 2.2269, 2.2191, 2.1727, 2.1098, + 2.0837, 2.2015, 2.3503], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 1.3281e-01, 1.0489e+00, -4.3994e-01, -3.5076e-01, -1.5734e-01, + 1.0467e-01, 3.4624e-01, -3.6057e-01, -5.3122e-01, -1.1508e-01, + -5.0123e-01, 2.9999e-01, -7.4618e-01, -5.3507e-01, 4.8069e-01, + -4.2248e-01, 1.7101e-01, 2.3967e-01, -5.6175e-01, -1.5818e+00, + 4.6960e-01, 2.5782e-01, 8.7915e-01, -1.2254e+00, 4.6011e-01, + -2.9357e-01, 3.7613e-01, 4.0989e-01, -5.0915e-01, 5.7232e-01, + 4.9235e-01, 4.1011e-01, -4.4866e-02, -1.1593e-01, -2.0305e-01, + 3.1680e-02, -5.8174e-01, -1.0083e-01, 1.1103e+00, 7.3384e-01, + -3.7280e-01, 5.5970e-02, 4.7164e-01, 4.6526e-01, -5.5908e-01, + -3.5303e-01, -3.3388e-01, -4.7566e-01, 4.4903e-01, 1.1533e-01, + -6.4007e-01, -3.7924e-01, 7.8134e-01, 6.7420e-02, -5.9359e-01, + 1.2319e+00, 8.8111e-01, -1.8171e-01, -1.3960e-01, -3.9758e-02, + 1.7319e-01, 4.6109e-01, -1.0275e-01, -4.8635e-01, 2.2356e-01, + 1.1343e+00, 6.4209e-01, -7.2440e-01, -3.6563e-01, -1.5383e+00, + 3.5883e-02, -1.2616e-01, 4.5378e-01, -6.5210e-01, -5.3505e-01, + -9.3343e-01, 6.5818e-01, 4.9906e-01, -2.6536e-01, 2.5868e-01, + 9.8699e-02, 6.8153e-01, -3.1711e-01, 6.5012e-01, -6.8029e-01, + -2.7511e-01, 5.5501e-01, 4.4060e-02, 1.8087e-03, -3.0272e-01, + -5.5106e-01, 8.0156e-02, -5.0127e-01, -2.2272e-01, -5.4057e-01, + -3.7910e-01, 3.7393e-01, -4.5307e-02, -7.3265e-01, 4.7157e-01, + -2.0217e-01, -6.4716e-01, -8.6948e-02, 1.8414e-01, 8.2459e-02, + 3.1776e-01, 1.0655e+00, 6.4381e-01, 1.8725e-01, -1.5551e-01, + 7.7160e-01, 6.6357e-01, 6.1445e-03, 4.1845e-01, 7.9661e-01, + -3.5947e-02, 5.9436e-01, 5.0569e-01, 6.5157e-01, 2.5360e-01, + 1.1022e-01, -7.2232e-01, -8.4216e-01, -3.0733e-01, 4.9215e-01, + -2.9305e-01, 1.1464e-01, 5.4001e-01, -1.9519e-01, 5.3638e-01, + 5.7463e-01, -7.0124e-01, 2.2865e-01, -3.8127e-02, 8.8887e-01, + 9.2389e-01, 5.1717e-01, -2.9086e-01, -1.4492e-01, -7.3113e-01, + -3.8683e-01, 6.1832e-01, -2.4134e-01, 2.6464e-01, -4.8043e-01, + 2.3333e-01, 1.3102e-01, -1.4933e-01, -6.4925e-01, 3.4122e-01, + 1.7284e-01, -6.7279e-01, 3.6753e-01, 1.3642e-01, -4.6944e-01, + 7.0057e-01, -2.5841e-01, 7.7047e-02, 5.0625e-02, -8.2669e-01, + -3.6478e-01, 1.6450e-01, 4.8215e-01, -3.4735e-01, -1.2598e-01, + 2.5064e-01, -6.9497e-02, 4.7381e-02, -5.4013e-01, -5.1097e-03, + 7.9240e-01, 2.5241e-01, -5.7863e-01, 5.1426e-01, -6.1765e-01, + 2.3296e-01, 2.1380e-01, -4.3335e-01, -2.0675e-01, 7.5988e-01, + -4.6048e-01, -6.6959e-01, 3.0083e-01, 3.7472e-01, 3.8105e-01, + 8.4046e-01, -3.9265e-01, 1.3335e+00, 1.4567e-01, -1.3545e-01, + -3.1525e-01, -6.1112e-01, -8.4796e-02, 3.9994e-01, 3.8930e-01, + -1.1288e-02, 8.5296e-01, -3.9526e-01, 2.2442e-01, 6.9698e-01, + -1.0549e+00, 4.6266e-01, 2.7690e-01, 3.1111e-01, 6.6209e-02, + -4.6759e-02, 6.1619e-01, 5.8232e-01, -6.4537e-01, -8.5775e-01, + 7.0397e-01, -3.8256e-01, -5.9776e-01, 5.3431e-01, 3.2315e-01, + -1.5651e-01, 1.5397e-02, 2.3762e-01, -3.1457e-01, 2.9154e-01, + 4.2581e-01, 5.1001e-02, -5.5437e-01, 9.1472e-01, 2.8580e-01, + -6.3040e-01, -8.1291e-01, 9.6829e-02, 2.1046e-01, 3.6351e-01, + -2.6908e-01, 9.2500e-02, 3.6032e-01, 3.7904e-02, -6.3151e-01, + -7.9317e-01, 3.9727e-01, 4.2895e-01, -1.9378e-01, 7.6716e-01, + 2.8807e-01, -1.6993e-01, 4.8568e-01, -6.1613e-01, -4.5870e-02, + -2.6191e-01, 7.7553e-02, 4.0871e-01, -8.8964e-01, 4.6862e-01, + 1.5260e-02, 4.7435e-01, 8.4179e-01, -4.0157e-01, 3.9829e-01, + 1.6903e-01, 4.2167e-01, -8.4015e-01, -8.4332e-01, -2.0669e-01, + -3.7132e-01, -2.7185e-02, 2.0107e-01, -1.6155e-01, -7.5347e-01, + -2.9117e-01, 5.7739e-01, 3.6695e-01, 1.6001e-01, 2.1570e-01, + 3.7116e-03, 6.9096e-01, -3.0534e-01, -8.5314e-01, -3.9907e-01, + -9.2290e-02, -4.2464e-02, 4.0167e-01, 2.1478e-01, -6.6176e-02, + 3.3254e-02, -7.6433e-01, -5.3241e-01, 5.8260e-01, -7.2345e-01, + 4.0051e-01, -5.5805e-01, 1.6328e-01, 1.1494e-01, -1.8896e-02, + -9.4009e-02, 2.6599e-01, 6.5200e-01, -7.1813e-01, -2.9351e-01, + 1.4233e-01, -3.6490e-01, -5.3627e-01, 4.7552e-01, 7.6735e-02, + 6.7022e-02, 6.1545e-01, -5.3540e-01, -6.3876e-01, -2.4615e-01, + 6.7330e-02, -4.3329e-01, 1.5423e-01, -6.4616e-01, -1.5042e-01, + 5.2939e-01, 1.3776e-01, 3.9853e-01, 2.5346e-01, 2.3949e-01, + 8.2627e-02, 3.1305e-02, 4.0734e-01, 4.0606e-01, 8.5729e-01, + -7.4386e-01, -4.3430e-01, 3.6861e-01, -2.6908e-01, -1.0369e-01, + -1.0651e+00, -1.1467e+00, -5.9298e-01, 6.4622e-01, -4.5900e-01, + 5.4481e-01, -2.3575e-01, -4.3648e-01, -4.0216e-01, -1.2223e-01, + -2.9405e-01, -1.0319e+00, 6.1357e-01, 5.8217e-01, -3.6815e-02, + 4.7501e-01, -2.8313e-01, -5.7884e-01, 9.5559e-01, -7.9043e-01, + 4.8335e-01, -3.6762e-01, 2.8385e-02, -9.3544e-01, 9.2769e-03, + 6.9211e-01, 4.3277e-01, 3.1741e-01, 6.5197e-01, -1.4770e-01, + -9.3074e-02, -7.4372e-01, -4.4881e-01, -1.1087e-01, 5.2817e-01, + -1.2982e+00, -5.1468e-01, -3.3536e-01, -1.9444e-01, 6.8215e-01, + -4.7416e-01, -5.2786e-01, 7.7139e-01, -7.5394e-01, 6.6867e-01, + -2.6968e-01, -4.9102e-01, 5.0436e-01, -6.4053e-01, -2.4396e-01, + -1.5928e-01, 2.6028e-01, 6.8988e-01, 5.3118e-03, -6.4690e-01, + -4.4641e-01, -5.6631e-01, -5.8067e-01, -4.0297e-01, -1.0473e-01, + -5.4320e-01, -5.8003e-01, 8.1864e-01, 8.3972e-01, 9.5139e-02, + -1.8129e-01, -5.2752e-01, 9.9258e-01, 4.1471e-01, 1.3180e-02, + -4.7539e-01, -3.0162e-01, 6.0368e-01, -5.5971e-01, -6.4534e-01, + 3.9169e-01, 6.3918e-01, -4.1132e-01, -5.0957e-01, 5.2105e-01, + -2.7070e-01, 5.9905e-02, -3.3576e-01, -3.3296e-01, 7.0715e-01, + 3.0238e-01, -4.5997e-01, 4.5236e-01, 1.5374e-01, 1.1668e+00, + -1.6149e+00, -2.6051e-01, -5.3227e-01, 1.7578e-01, 7.4930e-01, + 2.8102e-01, -4.9195e-01, 6.3192e-01, -2.3230e-01, -2.0161e-01, + 3.4353e-01, 3.6410e-01, -2.6098e-01, -3.7803e-01, 5.0583e-01, + -4.5856e-01, -1.7220e-01, 2.1415e-01, 5.8864e-01, 9.6195e-01, + 2.0834e-01, -3.0874e-01, 9.0913e-01, 4.2598e-01, -4.4309e-01, + 4.9674e-01, 2.3522e-01, -3.7915e-01, -2.1614e+00, -3.6390e-01, + 6.6082e-02, -1.7560e-02, 6.3947e-01, 5.7122e-01, -6.5979e-01, + -1.2240e-01, -3.1060e-01, -7.9018e-01, 3.2094e-01, 4.1335e-01, + 9.0558e-02, -5.1849e-01, -1.8333e-01, -1.7102e-01, 5.3537e-02, + -6.2528e-01, 4.0292e-01, -6.8299e-01, 1.4752e-01, -5.9252e-01, + -3.2536e-01, -5.0408e-01, -8.1444e-02, -8.7752e-02, -6.6041e-01, + -1.5532e-01, 1.9076e-02, -4.8949e-01, -4.7710e-01, -8.8513e-01, + -5.6397e-01, 2.1987e-01, -7.2030e-01, 1.7313e-01, 1.3092e-01, + -8.1733e-02, 3.9655e-01, 4.4970e-01, 1.2227e-01, 4.0097e-01, + -1.8540e-01, -4.0063e-01, 4.4000e-01, -3.4232e-02, -3.9916e-01, + -3.5780e-01, 4.9083e-01, -2.7053e-01, 4.0731e-01, -4.0411e-01, + -2.9179e-01, -1.9077e-01, -1.7176e-01, 5.6506e-01, 3.3544e-01, + 2.3209e-01, -8.1954e-01, -4.9928e-01, -1.9707e-01, 7.4907e-01, + 4.8980e-01, 4.6658e-01, 1.5447e-01, -2.9315e-01, -5.4347e-01, + -6.2719e-01, 1.3141e-02, -2.7332e-01, -7.1763e-01, 1.0533e+00, + -1.3157e-02, 8.6267e-01, -3.9119e-01, 1.6885e-01, 1.1079e-01, + 9.4402e-01, 2.0349e-01, 1.2190e-01, -1.1176e-01, -6.5901e-01, + -6.6527e-01, 4.0093e-01, 6.4473e-01, -5.8975e-02, 6.1555e-02, + 2.6041e-01, -1.0250e+00, 5.6489e-01, 3.0620e-01, 6.1333e-01, + -9.1546e-01, 1.5750e-01, -3.4531e-01, 5.8772e-01, -2.7294e-01, + 2.8230e-01, 4.2517e-01, -5.1764e-01, 6.1962e-01, 7.8920e-01, + 3.2426e-01, 5.0412e-01, -7.7646e-01, -1.3567e-01, 1.7101e-01, + -9.9796e-01, -4.4537e-01, 4.8358e-02, 6.3490e-01, 2.3371e-01, + -1.4228e-01, 1.3333e-01, -3.6550e-01, -8.1516e-01, -2.0957e-01, + 5.5559e-02, -7.6858e-01, 4.7748e-02, 5.5862e-01, -8.6177e-01, + -1.6945e-01, 7.1205e-02, -4.7811e-01, 2.5253e-01, -2.6369e-01, + 1.6023e-01, 6.2191e-01, 7.3936e-02, -2.9017e-01, 4.3377e-01, + 1.2851e+00, 3.7442e-02, -5.9583e-01, -4.5210e-01, 1.0180e-01, + 5.0120e-01, -2.9147e-01, 8.8957e-01, 2.0878e-02, -4.1971e-01, + -1.8841e-01, -2.7482e-01, 4.3985e-02, 1.0758e+00, 7.5964e-01, + -3.3972e-01, -1.1785e-01, 1.0062e+00, 3.7155e-02, -2.1678e-01, + 2.4287e-01, 6.3074e-02, -5.6007e-01, 7.0995e-01, 3.6084e-01, + -1.6873e-02, -8.3919e-01, 1.6823e-01, -9.4747e-01, -4.8958e-01, + 7.8847e-01, 7.7202e-04, -9.2255e-01, 6.4493e-01, 1.8281e-01, + -3.6465e-01, 5.7656e-01, -4.6102e-01, 2.9057e-01, 4.1614e-01, + 3.3420e-01, 5.0496e-01, 3.3019e-01, -1.1436e-01, 1.2199e-01, + 2.8585e-01, -8.7563e-04, -9.0184e-01, 6.5108e-01, 7.4054e-01, + 4.4034e-01, 3.3995e-01, 5.0614e-01, -1.3607e-01, 3.8840e-01, + 8.6456e-03, 3.4877e-01, -1.8693e-01, -6.4272e-02, 3.5093e-01, + -1.7511e-01, -7.3258e-02, 2.0546e-01, 2.3129e-01, -3.5526e-01, + 8.1774e-01, -6.0656e-01, -7.6188e-01, -3.6338e-01, -8.9585e-01, + -2.6506e-01, -4.5514e-01, 5.5527e-02, -2.8256e-01, 7.8668e-01, + 2.8775e-01, -1.0851e-01, 6.9988e-01, -6.2157e-01, 7.6669e-01, + 3.1773e-01, -4.2009e-02, 3.2616e-01, -2.7670e-01, 3.8984e-01, + 1.2011e-01, 4.8168e-01, 8.8709e-01, -3.2855e-01, -2.7174e-01, + -7.4272e-01, 1.2719e-01, -6.1042e-01, 2.2660e-01, 4.4052e-02, + 4.4497e-01, 2.8106e-01, -5.1593e-03, 5.0610e-01, 7.0556e-01, + -4.2123e-01, 1.4735e-01, 5.1446e-01, 4.8529e-01, -5.4870e-02, + -7.3664e-01, 1.6677e+00, -8.3535e-01, -5.2973e-01, 3.0460e-01, + 7.2249e-01, 1.6966e-01, 8.9588e-02, 8.3096e-01, -8.6126e-01, + -7.6821e-02, -5.3029e-02, -8.9052e-01, -3.4367e-01, -7.1569e-01, + 5.8929e-01, -8.2918e-01, -2.3534e-02, -2.0483e-01, 6.2491e-01, + 5.2459e-01, 8.5316e-02, 2.7787e-01, -3.9188e-01, 7.1636e-01, + 7.4751e-01, -9.2674e-01, -1.6038e-01, -6.0258e-01, 1.1622e-01, + 6.2238e-01, -2.3765e-01, 1.0691e-01, 4.0306e-01, 1.1263e-01, + -3.5009e-01, -4.1460e-01, 9.3237e-01, -5.4956e-01, -1.1002e+00, + -2.6917e-02, -1.0258e+00, -7.7761e-01, -7.1489e-01, -3.9857e-01, + -4.6436e-02, 2.4821e-01, -3.0298e-01, 6.9399e-01, -3.3227e-01, + -3.8206e-01, -2.6360e-01, 4.3574e-01, 7.5279e-01, 7.5876e-01, + 2.8074e-01, 1.1018e-01, -1.9109e-01, -6.2550e-02, -5.6703e-01, + -6.2459e-01, 5.1496e-01, 1.8184e-01, -5.6312e-01, 9.6952e-02, + 1.0453e-01, -4.1765e-01, 4.6603e-01, 3.6378e-01, 9.6122e-01, + 3.8609e-01, -3.5637e-01, -8.0935e-01, 6.6562e-01, -2.8625e-01, + 3.6776e-01, -1.1129e-01, -5.7451e-01, 1.0716e+00, 6.7912e-01, + -6.6297e-01, -6.0802e-01, -2.3813e-01, 5.4780e-01, 4.8164e-01, + 9.7501e-02, 2.7520e-01, 1.0619e+00], device='cuda:1', + requires_grad=True) +torch.Size([2304, 768]) +Parameter containing: +tensor([[-0.0090, -0.0285, -0.0117, ..., -0.0334, -0.0124, -0.0016], + [ 0.0073, -0.0109, -0.0155, ..., -0.0185, 0.0384, -0.0127], + [ 0.0053, 0.0201, 0.0105, ..., -0.0385, -0.0188, -0.0073], + ..., + [-0.0101, 0.0020, -0.0220, ..., 0.0291, -0.0050, 0.0291], + [ 0.0129, 0.0045, 0.0251, ..., 0.0085, 0.0212, -0.0309], + [-0.0195, -0.0013, 0.0205, ..., 0.0047, -0.0370, 0.0062]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([2304]) +Parameter containing: +tensor([-0.2406, 0.2976, 0.3762, ..., -0.0151, 0.0414, 0.0243], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 768]) +Parameter containing: +tensor([[-0.0192, 0.0069, -0.0140, ..., 0.0031, -0.0047, -0.0004], + [-0.0301, -0.0264, -0.0079, ..., -0.0047, -0.0026, 0.0025], + [ 0.0142, -0.0199, 0.0016, ..., -0.0152, 0.0006, 0.0133], + ..., + [-0.0022, 0.0139, 0.0077, ..., 0.0194, 0.0097, -0.0001], + [ 0.0107, -0.0085, 0.0090, ..., -0.0009, 0.0038, -0.0255], + [ 0.0045, 0.0113, 0.0006, ..., -0.0065, -0.0201, -0.0050]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([-1.6556e-03, -6.8176e-02, -6.2675e-03, -1.1215e-03, 1.7944e-02, + -2.9358e-02, 2.5925e-02, -1.5930e-02, 1.7960e-02, -1.9501e-02, + 2.4078e-02, -2.7313e-02, -3.3905e-02, 4.1962e-02, 8.5678e-03, + 4.2816e-02, 2.2644e-02, -1.5625e-02, 1.7345e-04, -7.3364e-02, + 1.1093e-02, 1.1620e-02, 4.7363e-02, -3.3627e-03, -5.6671e-02, + -4.5715e-02, 1.2283e-02, 3.6072e-02, 8.4000e-03, 2.0309e-02, + 4.6600e-02, 3.5278e-02, 2.3300e-02, 1.3256e-04, -4.8767e-02, + -7.8369e-02, 2.0660e-02, 2.9999e-02, 7.9895e-02, -5.1208e-02, + 7.0000e-03, -3.2272e-03, 1.7883e-02, -4.1351e-03, -6.4583e-03, + -8.0032e-03, -2.5772e-02, 9.2087e-03, -1.5091e-02, 3.6120e-05, + -2.0645e-02, -2.1378e-02, -7.7393e-02, -4.5502e-02, -3.1891e-02, + 8.6548e-02, 7.3120e-02, -1.1492e-03, 2.1469e-02, 4.8184e-04, + 2.2842e-02, 2.4939e-04, 5.0659e-02, -4.2963e-04, 1.2802e-02, + -6.9946e-02, -2.3636e-02, -4.4250e-02, -5.2551e-02, -1.1066e-01, + 2.3804e-02, 2.5330e-02, -1.2524e-01, -1.9562e-02, 2.8503e-02, + 4.5959e-02, 4.2358e-02, 1.2421e-02, 1.7349e-02, -4.3335e-03, + -2.4200e-02, 5.4535e-02, -3.4332e-02, -1.5556e-02, -4.6906e-02, + -1.1436e-02, 1.5945e-02, -3.3569e-02, 4.2236e-02, 1.0567e-02, + -6.9504e-03, -1.8845e-02, -1.8234e-02, 2.3987e-02, 3.6469e-03, + 1.5884e-02, 3.0350e-02, -4.3396e-02, 1.4664e-02, 7.9346e-02, + -4.3243e-02, 4.8409e-03, -6.9763e-02, 2.6474e-02, 2.1866e-02, + 6.6467e-02, 4.3945e-02, 1.2970e-02, 3.5187e-02, 4.6661e-02, + 3.4363e-02, -2.2659e-03, -2.5253e-02, -6.2256e-02, 2.8854e-02, + 1.0483e-02, 7.2449e-02, 4.8584e-02, 3.0075e-02, 8.1299e-02, + -3.3020e-02, 2.3636e-02, 2.8095e-03, 2.8671e-02, 3.2593e-02, + -2.7802e-02, -3.8624e-04, -2.8244e-02, -1.2238e-02, 3.8483e-02, + -5.3711e-02, -1.7715e-02, 1.2756e-02, -1.3763e-02, 1.7595e-03, + -3.2227e-02, -6.0699e-02, 2.7863e-02, 3.0975e-02, -7.9956e-02, + 5.5878e-02, -4.8859e-02, -4.9469e-02, -2.5482e-02, -3.4454e-02, + -4.3182e-03, 4.5074e-02, -4.0436e-02, 7.0229e-03, -1.6357e-02, + 4.2755e-02, 2.7176e-02, 5.8594e-02, -1.7578e-02, -2.9541e-02, + 6.7810e-02, 2.7206e-02, -5.5695e-02, -4.2114e-02, 1.8646e-02, + 1.0376e-02, -4.6539e-02, 4.4617e-02, -5.7755e-03, 3.4576e-02, + -1.6525e-02, 3.2928e-02, 9.7275e-03, 5.2277e-02, 4.5891e-03, + -8.9493e-03, -5.1918e-03, 7.8087e-03, -1.7212e-02, -6.0349e-03, + -5.6061e-02, -1.0315e-02, 6.0883e-03, -4.5776e-02, -1.1185e-02, + -3.9520e-02, -6.0387e-03, 1.7715e-02, 6.2286e-02, 5.4077e-02, + 1.3489e-02, 1.3847e-02, -9.6985e-02, -3.0193e-03, -2.6337e-02, + -2.2629e-02, -8.5144e-02, -2.4094e-02, 3.0994e-03, 6.4026e-02, + 2.5665e-02, 1.0254e-02, -4.0253e-02, -6.0333e-02, -1.5167e-02, + -3.4576e-02, -1.1131e-02, 6.5979e-02, -8.1482e-02, 1.5497e-03, + -6.1531e-03, 2.6688e-02, -8.1396e-04, 2.8091e-02, 6.5308e-03, + 3.5858e-03, -1.2375e-02, -2.4185e-02, -1.8509e-02, -4.0924e-02, + 4.5197e-02, -1.4656e-02, -7.1350e-02, 8.2636e-04, -3.3325e-02, + -1.4343e-02, 3.5156e-02, -9.1410e-04, 9.0027e-02, -3.2227e-02, + -4.7272e-02, -9.8267e-03, 1.2329e-01, -2.0981e-02, 5.8327e-03, + -3.9886e-02, -2.2110e-02, 3.6835e-02, 9.0088e-02, -1.4488e-02, + 1.0071e-02, 6.1005e-02, 1.7975e-02, 1.9882e-02, 1.5930e-02, + -2.3010e-02, -4.4861e-02, 4.7241e-02, 1.0857e-02, 1.9333e-02, + 7.7362e-03, -1.8280e-02, 8.5510e-02, 1.7929e-02, -9.3365e-04, + -5.5733e-03, 6.0669e-02, 3.0823e-02, -1.4061e-02, 2.3254e-02, + -2.0508e-02, -1.8112e-02, -3.5828e-02, 2.7618e-02, 2.1103e-02, + 2.0264e-02, -2.4841e-02, 6.3721e-02, 2.3941e-02, 3.2005e-03, + -2.1988e-02, -1.0862e-03, -9.0942e-03, -1.1559e-02, -7.1289e-02, + -1.8280e-02, -1.5823e-02, -6.9962e-03, -6.0730e-02, 8.1329e-03, + -1.0065e-01, 4.7058e-02, -5.8861e-03, 4.7779e-04, 1.2875e-03, + -5.7068e-02, -8.9417e-02, 8.3557e-02, -5.9692e-02, -1.7776e-03, + -4.6021e-02, -2.8992e-02, -8.6136e-03, 3.6713e-02, -1.2138e-02, + -3.9032e-02, 4.3060e-02, -6.3049e-02, 2.2352e-04, -1.3138e-02, + -5.7800e-02, -3.6804e-02, -6.9275e-02, -6.9458e-02, -3.7781e-02, + -3.5572e-03, 7.4585e-02, 8.3389e-03, -3.4103e-03, 2.9724e-02, + 3.6438e-02, -2.7328e-02, 5.1300e-02, 5.1300e-02, 8.9493e-03, + 2.2980e-02, 2.5375e-02, -1.6678e-02, -1.1574e-02, -2.7115e-02, + 7.7515e-03, 1.0384e-02, 1.0429e-02, 1.9226e-02, 5.3162e-02, + 5.0926e-03, -1.4091e-02, -4.2510e-04, 5.4962e-02, 2.0605e-01, + -1.3680e-02, 2.9583e-03, 9.6863e-02, 1.4257e-03, 3.6255e-02, + 1.4374e-02, 2.6749e-02, -2.4536e-02, -5.9090e-03, 7.2693e-02, + -2.5467e-02, -2.6337e-02, 1.6785e-02, 1.4023e-02, 2.7313e-03, + 6.2805e-02, 4.9408e-02, -1.0826e-02, 2.3880e-02, 2.1454e-02, + 8.5678e-03, -9.3918e-03, 2.5223e-02, -3.3844e-02, -7.6942e-03, + 1.2955e-02, 5.3955e-02, 9.7656e-03, -2.8976e-02, 3.9337e-02, + 6.9519e-02, 1.8631e-02, -2.3178e-02, -1.1665e-02, 1.8417e-02, + 2.6047e-02, -1.0033e-02, 3.3905e-02, -6.0486e-02, 1.0925e-02, + 3.0426e-02, 1.6312e-02, -4.5685e-02, 3.7689e-02, -3.3905e-02, + -3.9948e-02, -1.4366e-02, 1.8112e-02, 4.4403e-03, 1.6432e-03, + -1.8448e-02, 2.8671e-02, 2.3079e-04, -3.2379e-02, -2.0645e-02, + -3.8849e-02, 1.2970e-02, -3.1342e-02, 2.6199e-02, 8.7619e-06, + -2.7485e-03, 1.5802e-03, -1.9424e-02, -3.8269e-02, -3.0334e-02, + 1.6403e-02, 1.6403e-02, 1.6479e-02, -2.4185e-03, -3.4119e-02, + -1.0376e-02, -1.4740e-02, 5.7316e-04, -1.9852e-02, -5.7800e-02, + -2.1133e-02, 1.9211e-02, -9.1934e-03, -7.0000e-03, -1.4076e-02, + 2.6360e-03, 2.1774e-02, -9.6054e-03, -1.4496e-02, 5.0783e-04, + 3.3691e-02, 1.2993e-02, -1.5060e-02, 4.6692e-02, 1.4978e-01, + -1.4893e-01, 2.9205e-02, 3.1586e-02, -5.9326e-02, -6.9008e-03, + 3.1891e-02, -4.6875e-02, -3.3035e-03, 3.8910e-02, -2.3666e-02, + 2.3804e-02, -3.9551e-02, 3.0243e-02, 2.4567e-02, -4.4006e-02, + -3.9978e-02, -1.7563e-02, -5.1392e-02, -5.8212e-03, 6.6650e-02, + 2.7866e-03, 5.7411e-04, -1.2375e-02, 4.7333e-02, 2.3727e-02, + 3.4821e-02, 3.6346e-02, 2.7756e-02, -4.2023e-02, 1.8524e-02, + 9.8038e-03, -3.6011e-02, 3.4698e-02, -8.6182e-02, -5.0507e-02, + -2.7039e-02, 8.9951e-03, 4.0314e-02, 1.9272e-02, -3.9825e-03, + -2.0157e-02, -1.1072e-01, -1.3294e-03, -4.5738e-03, 6.2275e-04, + 9.2010e-03, 1.7532e-02, 3.4454e-02, -1.1017e-02, 5.4398e-03, + 1.2947e-02, 1.6876e-02, 1.9363e-02, 1.1658e-02, -1.9501e-02, + 2.9099e-02, -2.0340e-02, 4.2084e-02, 2.5925e-02, 4.1771e-03, + 2.6443e-02, -1.2688e-02, -5.1453e-02, 2.4319e-03, -3.8330e-02, + 1.7654e-02, 2.6962e-02, 3.5095e-02, -5.8441e-02, -3.0014e-02, + 1.3481e-02, 5.5420e-02, 7.1869e-03, -5.1392e-02, 7.4097e-02, + 1.9180e-02, 2.3911e-02, 9.3307e-03, 3.8300e-02, -7.8278e-03, + -5.0171e-02, -8.6670e-02, -6.3416e-02, -2.5757e-02, 7.9834e-02, + 1.3481e-02, -2.6108e-02, -2.6688e-02, 5.6213e-02, 1.0022e-01, + 3.5736e-02, -2.7740e-02, -1.4015e-02, -3.4302e-02, -5.2002e-02, + 2.1912e-02, -6.8909e-02, -1.0216e-02, 4.4495e-02, -2.4780e-02, + -6.0005e-03, -3.8483e-02, -8.3466e-03, 1.4488e-02, 1.9379e-02, + 4.7546e-02, -1.1909e-02, 9.3651e-04, 6.5002e-02, 1.9333e-02, + -1.5175e-02, -3.1464e-02, 3.3875e-02, 5.5008e-03, -2.5116e-02, + -2.5742e-02, 2.8503e-02, 1.1359e-01, -1.2955e-02, -3.0411e-02, + 4.2992e-03, 8.7585e-02, 1.1452e-02, 7.5378e-02, 5.1270e-02, + 1.0872e-02, -4.9469e-02, -5.6030e-02, 6.0120e-02, 1.4200e-03, + 1.4328e-02, -3.7170e-02, -2.3087e-02, 2.5162e-02, 3.9185e-02, + -1.9882e-02, 4.4952e-02, 9.4299e-02, 1.1896e-01, -3.1372e-02, + 5.1422e-03, 3.1372e-02, 4.0131e-02, -5.7251e-02, 1.3519e-02, + -3.1174e-02, -9.5215e-02, -2.9583e-03, 7.3719e-04, -2.1072e-02, + 5.2032e-03, 2.8839e-02, -2.0477e-02, -3.3600e-02, 6.6490e-03, + -9.7717e-02, 3.9032e-02, 2.8610e-02, -4.1885e-03, 9.6207e-03, + 3.7018e-02, 3.2597e-03, -5.5206e-02, -4.7363e-02, -9.2545e-03, + -2.3823e-03, 3.1616e-02, -2.2171e-02, 2.9877e-02, 4.8828e-02, + -3.5828e-02, -1.1780e-02, -1.1528e-02, -2.6207e-03, -4.1275e-03, + -1.0635e-02, 5.6244e-02, -7.1594e-02, 1.2772e-02, 3.2745e-02, + 4.6082e-02, -1.6464e-02, 9.0149e-02, -2.8244e-02, 6.3110e-02, + -4.0924e-02, -1.2466e-02, -2.4368e-02, 7.9966e-04, -5.1147e-02, + 3.7811e-02, 4.3030e-02, -1.3098e-01, 4.0771e-02, 2.1606e-02, + -1.2375e-02, -4.8187e-02, -3.0472e-02, -4.2114e-02, -2.7298e-02, + 2.9221e-02, -6.0883e-02, -2.0752e-02, 2.4857e-02, 1.0895e-02, + 6.7139e-03, -5.0323e-02, 8.5144e-03, -1.3382e-02, 4.3701e-02, + 2.2934e-02, 4.0710e-02, 7.7248e-04, -2.1942e-02, -1.2863e-02, + -2.3155e-03, -3.0960e-02, -3.7933e-02, -2.0752e-02, 2.0599e-03, + -8.8013e-02, 4.9408e-02, -1.8021e-02, -3.1548e-03, -6.4697e-02, + -1.3191e-02, -3.3752e-02, -5.8517e-03, -3.0594e-02, -1.9196e-02, + 4.2297e-02, 1.8555e-02, -5.2460e-02, 8.3084e-03, -9.3307e-03, + -7.0992e-03, -2.3682e-02, -6.5918e-02, -9.3689e-03, -6.0425e-03, + -1.8585e-02, 7.0679e-02, -2.8896e-03, 2.9907e-02, -1.0857e-02, + 2.8259e-02, -6.4453e-02, 2.2583e-02, -3.0273e-02, 2.6321e-02, + -4.2145e-02, -3.1185e-03, 4.9744e-02, -8.8745e-02, -1.4400e-03, + -1.2947e-02, 2.0996e-02, 1.1005e-01, 1.5099e-02, 5.7404e-02, + 5.1880e-02, 1.2077e-02, -6.0394e-02, 3.3661e-02, -2.8549e-02, + 3.1342e-02, 1.8457e-01, 5.0163e-03, 1.2878e-02, -2.2827e-02, + -1.3092e-02, 1.5366e-02, -4.1565e-02, 3.6377e-02, 2.7023e-02, + 2.3239e-02, 1.0078e-02, 3.0319e-02, -4.4830e-02, 2.3102e-02, + -4.9477e-03, 1.5823e-02, 1.8906e-02, -3.5553e-02, 1.5701e-02, + -4.0039e-02, -6.6071e-03, -4.1840e-02, -2.6443e-02, 1.2383e-02, + 2.7771e-02, -8.8577e-03, 1.2444e-02, 5.2032e-02, 2.4353e-02, + 1.8219e-02, 2.9221e-02, -4.1229e-02, -7.5951e-03, 4.0527e-02, + -1.6968e-02, 6.6833e-03, -9.1248e-03, -5.4688e-02, 8.1177e-03, + -2.5269e-02, 3.0014e-02, -2.7328e-02, 3.2253e-03, 3.5431e-02, + -2.1805e-02, 4.3976e-02, 1.8341e-02, 2.3636e-02, 2.2827e-02, + -2.0340e-02, -1.1665e-02, 3.3875e-02, -3.8071e-03, 4.5227e-02, + 6.2790e-03, -6.6895e-02, 4.2877e-02, -1.3599e-01, -5.0583e-03, + 1.9409e-02, -2.7351e-03, 2.2755e-03, -6.1035e-02, 5.2429e-02, + 2.7359e-02, 4.9805e-02, 3.2471e-02, -4.7989e-03, 2.2873e-02, + -1.2941e-03, -9.3811e-02, 2.6810e-02, -1.3443e-02, -1.4938e-02, + 2.0325e-02, -2.8629e-03, -3.7903e-02, 4.5654e-02, -5.9814e-02, + -2.3155e-03, -3.5767e-02, -8.6823e-03, 4.5662e-03, 5.3772e-02, + 6.0669e-02, 5.5504e-03, -4.4556e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([1.5493, 1.6399, 1.5999, 1.5885, 1.6329, 1.6397, 1.5427, 1.5989, 1.5938, + 1.6237, 1.6590, 1.6457, 1.6181, 1.6205, 1.4713, 1.5963, 1.6449, 1.7084, + 1.6214, 0.4006, 1.6030, 1.6363, 1.5759, 1.5834, 1.6650, 1.5835, 1.6580, + 1.5898, 1.5943, 1.5827, 1.5928, 1.5276, 1.6157, 1.5918, 1.6369, 1.5499, + 1.6460, 1.5885, 1.5715, 1.5816, 1.5972, 1.5878, 1.5510, 1.6371, 1.6093, + 1.6871, 1.5316, 1.6196, 1.6218, 1.5502, 1.6218, 1.6361, 1.5700, 1.5400, + 1.6542, 1.6868, 1.5516, 1.5528, 1.6228, 1.6274, 1.5977, 1.6739, 1.5992, + 1.5451, 1.6178, 1.7181, 1.5690, 1.4695, 1.6697, 1.5485, 1.5806, 1.6739, + 1.8901, 1.6811, 1.6642, 1.6300, 1.5537, 1.5634, 1.6123, 1.6138, 1.6103, + 1.5792, 1.5907, 1.6066, 1.5462, 1.5001, 1.6765, 1.5610, 1.6225, 1.5802, + 1.6846, 1.5668, 1.6137, 1.6866, 1.6791, 1.4948, 1.6263, 1.6051, 1.6387, + 1.5753, 1.5968, 1.5417, 1.5984, 1.5651, 1.5851, 1.6353, 1.5291, 1.5567, + 1.5705, 1.6465, 1.5494, 1.5523, 1.6082, 1.6735, 1.6387, 1.5507, 1.5189, + 1.5639, 1.6035, 1.5690, 1.5816, 1.5912, 1.5696, 1.5963, 1.6102, 1.5688, + 1.6092, 1.6175, 1.6432, 1.6284, 1.5562, 1.5349, 1.5583, 1.6106, 1.5579, + 1.6081, 1.6015, 1.6442, 1.6334, 1.6291, 1.6137, 1.5456, 1.7259, 1.5900, + 1.5554, 1.5780, 1.6134, 1.6272, 1.5741, 1.6224, 1.5436, 1.4855, 1.5818, + 1.6631, 1.5472, 1.2020, 1.5398, 1.6032, 1.5883, 1.6821, 1.5366, 1.5118, + 1.5927, 1.6376, 1.6173, 1.6728, 1.5829, 1.6114, 1.5536, 1.6710, 1.5637, + 1.5744, 1.4803, 1.5800, 1.6404, 1.6355, 1.6304, 1.6685, 1.5647, 1.5934, + 1.5731, 1.6635, 1.5844, 1.5600, 1.6321, 1.5281, 1.5389, 2.1468, 1.6702, + 1.6049, 1.5883, 1.5372, 1.6238, 1.6070, 1.5457, 1.5475, 1.6049, 1.5958, + 3.0083, 1.5153, 1.6055, 1.6462, 1.5855, 1.6214, 1.6535, 1.5140, 1.5091, + 1.5449, 1.5918, 1.6292, 1.5536, 1.4959, 1.6112, 1.5364, 1.4727, 1.6162, + 1.5672, 1.5872, 1.5662, 1.7077, 1.6566, 1.6526, 1.4817, 1.6145, 1.6988, + 1.6212, 1.6270, 1.6128, 1.6391, 1.7390, 1.6581, 1.6760, 1.7409, 1.6474, + 1.6504, 1.5595, 1.6109, 1.6272, 1.4755, 1.6741, 1.5887, 1.5637, 1.6667, + 1.6239, 1.6049, 1.6180, 1.5848, 1.4813, 1.5367, 1.7013, 1.5623, 1.6481, + 1.5652, 1.6429, 1.6078, 1.6493, 1.5002, 1.6345, 1.5190, 1.6447, 1.5506, + 1.5112, 1.5698, 1.5044, 1.5513, 1.5468, 1.5381, 1.5950, 1.6118, 1.5543, + 1.6706, 1.6023, 1.5748, 1.6172, 1.5726, 1.5440, 1.4987, 1.5566, 1.6374, + 1.5455, 1.5638, 1.6089, 1.6195, 1.6111, 1.6488, 1.6001, 1.4797, 1.6119, + 1.6401, 1.5164, 1.5940, 1.6737, 1.6311, 1.5728, 1.5773, 1.6245, 1.4770, + 1.6347, 1.6324, 1.7058, 1.5734, 1.5590, 1.6184, 1.7056, 1.6453, 1.4783, + 1.5747, 1.6226, 1.5360, 1.6008, 1.6301, 1.5816, 1.5759, 1.5865, 1.5823, + 1.5757, 1.5304, 1.5889, 1.7170, 1.5545, 1.5908, 1.4911, 1.5523, 1.6808, + 1.0102, 1.4416, 1.6402, 1.6516, 1.5736, 1.5707, 1.5194, 1.5789, 1.5578, + 1.5791, 1.5766, 1.6633, 1.5945, 1.5471, 1.6900, 1.5929, 1.5097, 1.5815, + 1.6156, 1.6017, 1.5836, 1.5091, 1.5963, 1.6344, 1.5837, 1.6584, 1.5643, + 1.6530, 1.6016, 1.7498, 1.5965, 1.6086, 1.5473, 1.6562, 1.6158, 1.5985, + 1.5164, 1.5495, 1.6260, 1.6174, 1.5751, 1.5895, 1.5756, 1.5310, 1.5784, + 1.5734, 1.5745, 1.6339, 1.6713, 1.6830, 1.6132, 1.4848, 1.6080, 1.5582, + 1.5989, 1.5899, 1.6601, 1.5907, 1.6642, 1.6658, 1.5568, 1.5169, 1.5837, + 1.7022, 1.5386, 1.5777, 1.6683, 1.5221, 1.4951, 1.5858, 1.5786, 1.6066, + 1.5431, 1.6643, 1.6195, 1.5992, 1.5671, 1.6200, 1.5648, 1.5666, 1.6210, + 1.5249, 1.6037, 1.6288, 1.5963, 1.5306, 1.6307, 1.5835, 1.4687, 1.5704, + 1.3834, 2.4891, 1.6304, 1.6105, 1.5524, 1.6043, 1.5713, 1.5287, 1.6168, + 1.5186, 1.5306, 1.5284, 1.5702, 1.5954, 1.6189, 1.5576, 1.6143, 1.5329, + 1.6438, 1.5675, 1.5940, 1.5842, 1.5996, 1.5994, 1.5268, 1.5964, 1.5654, + 1.6301, 1.5943, 1.2062, 1.6348, 1.5624, 1.6046, 1.6042, 1.6371, 1.6150, + 1.6369, 1.6009, 1.7376, 1.6231, 1.5693, 1.6378, 1.5659, 1.5503, 1.6336, + 1.5653, 1.6103, 1.6127, 1.6306, 1.5466, 1.5629, 1.6925, 1.5879, 1.5860, + 1.5904, 1.5143, 1.6487, 1.5793, 1.6201, 1.5986, 1.5847, 1.6243, 1.5638, + 1.5498, 1.6304, 1.5939, 1.5593, 1.5534, 1.6571, 1.3636, 1.5721, 1.5782, + 1.5547, 1.5585, 1.6214, 1.6656, 1.5667, 1.6133, 1.5377, 1.6059, 1.5794, + 1.6044, 1.6507, 1.5776, 1.5033, 1.5131, 1.5670, 1.5717, 1.6147, 1.6226, + 1.6446, 1.5396, 1.5357, 1.6272, 1.5983, 1.5448, 1.5605, 1.7070, 1.6120, + 1.6054, 1.6446, 1.6035, 1.6333, 1.5536, 1.5951, 1.5870, 1.6011, 1.5795, + 1.5923, 1.5524, 1.5733, 1.5762, 1.5603, 1.5949, 1.5760, 1.6026, 1.6437, + 1.5465, 1.7006, 1.6365, 1.5208, 1.6201, 1.6017, 1.5543, 1.6804, 1.7420, + 1.6809, 1.5633, 1.6597, 1.6247, 1.6704, 1.6162, 1.6767, 1.5603, 1.5749, + 1.6013, 1.6128, 1.6164, 1.6349, 1.6668, 1.5795, 1.5639, 1.5701, 1.6196, + 1.5638, 1.5866, 1.5865, 1.4001, 1.5744, 1.6010, 1.6010, 1.6537, 1.6356, + 1.6039, 1.6189, 1.5781, 1.5685, 1.5841, 1.5766, 1.5213, 1.6012, 1.5502, + 1.5972, 1.6567, 1.5655, 1.5510, 1.5973, 1.5988, 1.6302, 1.6506, 1.5686, + 1.6109, 1.6744, 1.5829, 1.6352, 1.6946, 1.6377, 1.7059, 1.5471, 1.5368, + 1.5926, 1.5611, 1.6253, 1.5294, 1.4044, 1.6179, 1.5868, 1.6041, 1.6719, + 1.5846, 1.5398, 1.6249, 1.7150, 1.7153, 1.6471, 1.6346, 1.5701, 1.6264, + 1.6090, 1.6172, 1.6575, 1.5996, 1.6354, 1.5527, 1.7479, 1.5759, 1.6304, + 1.6639, 1.5755, 1.6105, 1.6639, 1.6032, 1.5069, 1.6164, 1.6587, 1.5650, + 1.6497, 1.6342, 1.5691, 1.6271, 1.6007, 1.5943, 1.5942, 1.5980, 1.5675, + 1.5087, 1.5839, 1.6046, 1.6019, 1.6140, 1.3802, 1.6647, 1.6404, 1.5502, + 1.6699, 1.6187, 1.6753, 1.5719, 1.6739, 1.5828, 1.6139, 1.6800, 1.5641, + 1.5271, 1.5764, 1.6025, 1.5944, 1.5819, 1.5731, 1.5817, 1.5822, 1.6158, + 1.6417, 1.6392, 1.6379, 1.6283, 1.5247, 1.5779, 1.6359, 1.4932, 1.5547, + 1.5839, 1.6003, 1.6529, 1.5339, 1.6317, 1.6241, 0.3793, 1.5644, 1.5394, + 1.6564, 1.5079, 1.5475, 1.6538, 1.5442, 1.5427, 1.5980, 1.5646, 1.5751, + 1.6030, 1.5852, 1.5536, 1.5873, 1.5875, 1.5647, 1.6036, 1.5827, 1.5896, + 1.6706, 1.6393, 1.5696, 1.5956, 1.5279, 1.6411, 1.4874, 1.5671, 1.5945, + 1.6258, 1.6366, 1.5930, 1.5429, 1.6729, 1.5758, 1.6391, 1.5821, 1.5636, + 1.6237, 1.6044, 1.6874, 1.5696, 1.6090, 1.5168, 1.2428, 1.6478, 1.6367, + 1.6609, 1.5528, 1.6392, 1.6062, 1.5966, 1.5086, 1.6277, 1.5310, 1.6601, + 1.5359, 1.5844, 1.6001, 1.5696, 1.6074, 1.6169, 1.6799, 1.5825, 1.6523, + 1.6416, 1.6637, 1.6573, 1.4960, 1.6380, 1.5620, 1.6887, 1.6310, 1.6395, + 1.5979, 1.5711, 1.5458, 1.6128, 1.6570, 1.5286, 1.5453, 1.5261, 1.6336, + 1.5759, 1.5715, 1.6449], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 9.9168e-02, 4.3849e-02, -1.7755e-01, 3.2002e-02, 7.9832e-02, + 2.2029e-01, 9.3474e-02, 3.6958e-02, -1.1444e-01, -1.2357e-02, + 4.9962e-02, -3.6297e-02, -1.3317e-01, -2.1069e-02, -1.0363e-01, + -7.6869e-02, -1.0596e-02, -1.9904e-02, 1.5362e-02, 4.3388e+00, + -5.8986e-02, 5.7045e-02, -2.8652e-02, 3.9917e-02, -3.0142e-02, + 1.6968e-02, 2.2256e-02, 4.2894e-02, 4.1783e-02, -5.1836e-02, + 2.7941e-02, 3.3575e-02, 1.9892e-02, 1.2753e-01, 1.1412e-01, + -3.5983e-02, 4.1505e-02, -3.6498e-02, -1.4680e-01, 7.4925e-02, + 5.5049e-02, 4.8291e-02, -2.4369e-03, 5.2145e-02, -9.3334e-03, + 3.9382e-02, 6.6774e-02, -1.6817e-02, 2.5555e-02, 1.1247e-01, + 5.4547e-02, 4.5930e-02, 2.4378e-02, 6.2863e-02, 6.7775e-02, + 8.7540e-02, -1.0405e-01, -3.7358e-02, 5.7717e-02, 7.5026e-03, + 8.6343e-02, -1.0512e-01, 6.9189e-02, 3.2092e-02, 1.0216e-01, + -2.0255e-02, 1.0212e-01, 1.0654e-01, 3.5857e-02, 2.3916e-01, + 5.0539e-03, 6.2813e-02, 6.3034e-01, 7.7081e-02, -1.2523e-01, + 1.1814e-01, 3.1212e-02, -1.2816e-01, -1.3047e-02, -6.0614e-02, + -1.9448e-02, -5.9216e-02, 5.8349e-02, 1.3912e-01, 5.4253e-02, + 3.7561e-02, 1.0247e-01, 1.0217e-01, -5.0383e-04, -1.2770e-02, + 6.9393e-05, 4.3805e-02, 7.1861e-02, -9.6834e-02, -7.5698e-02, + 2.8570e-02, -2.6059e-02, -1.6039e-02, -6.1035e-02, -3.4551e-02, + 4.6976e-02, -3.7879e-02, 5.5665e-02, -1.1818e-01, 2.9161e-02, + 5.0025e-02, -3.7138e-03, -6.8892e-02, 3.7005e-03, -1.0422e-02, + -3.8308e-02, -2.7117e-02, 7.8872e-02, 7.9073e-02, 7.7121e-02, + -6.7232e-02, -2.7161e-02, -8.8668e-02, -1.8786e-02, 1.0479e-01, + -2.0932e-02, -2.5799e-01, -6.9773e-02, -4.2048e-02, -1.8170e-01, + -9.6430e-03, -1.7160e-01, 5.4934e-03, 4.0641e-02, -5.2708e-02, + -1.1047e-01, 8.8251e-02, -7.7579e-02, -2.2611e-02, -7.4420e-02, + 7.1777e-02, -4.9953e-02, -7.0597e-04, -3.5563e-02, -4.9313e-03, + 2.4732e-02, -9.2466e-03, 3.3888e-02, 6.1213e-02, 6.7017e-02, + -1.1905e-02, -1.3046e-02, 8.6177e-04, 1.0020e-02, 1.4302e-01, + 2.1901e-01, -5.3628e-02, -1.2036e-01, 4.3155e-02, -4.0979e-02, + -4.2002e-01, -7.8410e-03, -1.6884e-02, 1.4052e-02, 6.5780e-02, + 1.6192e-01, -1.3758e-01, -5.3189e-02, 4.2110e-02, -5.1313e-02, + -2.9008e-02, -6.6411e-02, 6.2063e-02, -5.4825e-02, 7.1862e-02, + -8.1163e-02, 1.0925e-01, 3.2302e-02, 3.9469e-02, 5.8172e-02, + -1.2644e-01, 4.1148e-03, 2.5105e-02, -4.9433e-02, 4.0924e-02, + -2.9660e-03, -4.3415e-02, 3.4479e-02, -5.4720e-03, 1.6097e-02, + -1.7373e-01, 6.0208e-02, 2.4375e-01, 2.8294e-03, -9.3666e-02, + 2.8309e-02, -4.6205e-02, 1.0189e-01, 3.5309e-02, -8.6500e-02, + 2.4689e-02, -7.4888e-02, -1.3506e-01, 1.5368e-01, 2.4555e-03, + -1.5823e-01, 5.7932e-03, -5.2773e-02, 4.4575e-02, 8.0074e-02, + 1.6230e-01, -8.7500e-02, 6.6434e-02, 1.2840e-01, 3.9700e-02, + 3.3606e-02, 2.0853e-01, 1.8290e-02, -8.0931e-02, -9.6761e-02, + -4.6112e-02, -1.8914e-02, 4.8560e-02, -1.1570e-01, -2.3101e-01, + -6.7442e-02, -7.2427e-02, 4.9952e-02, -4.3988e-02, 1.8997e-02, + -4.0182e-02, 8.3352e-02, -4.5539e-02, 1.1739e-01, -4.0738e-02, + 3.9162e-02, 6.3029e-02, 1.4560e-02, 7.7979e-02, 1.0567e-02, + 1.8793e-03, -1.1100e-01, 4.5256e-02, 9.0369e-02, 3.4154e-03, + 6.3770e-02, 7.5397e-02, 1.1233e-02, 1.0372e-01, 3.4302e-02, + 7.3305e-02, 7.2093e-02, -9.6457e-02, 2.4593e-02, -5.1865e-02, + 3.5280e-02, -4.2728e-03, 8.0915e-02, -1.4808e-01, 1.5104e-01, + -3.9665e-02, -9.2068e-02, -4.3506e-02, 5.6074e-02, 3.5466e-02, + -5.8521e-02, -8.1093e-02, -5.2968e-02, -4.0219e-02, -2.9365e-02, + 3.5287e-02, -7.8777e-02, -9.6997e-03, -7.0869e-02, -8.0517e-03, + 2.7628e-02, 3.7099e-02, 1.6447e-01, -8.0948e-02, 1.5004e-02, + 7.5770e-02, -6.2618e-02, 1.1144e-02, 6.9380e-02, -1.2749e-01, + 2.0715e-01, 2.1565e-01, 1.3051e-03, 2.1492e-02, 4.6540e-02, + 4.0014e-02, 2.8328e-02, -1.1086e-01, 1.2292e-01, -1.4375e-01, + 3.2895e-03, -3.6497e-02, 9.6531e-02, 6.9485e-02, 7.0013e-02, + 1.9933e-02, 2.3198e-01, 5.5978e-02, 5.4095e-02, 7.0778e-02, + 1.8711e-02, -1.9152e-03, -4.6121e-02, 1.7459e-01, -2.3601e-02, + 1.5128e-02, 1.2909e-01, -1.2117e-01, 7.0730e-02, -5.3275e-02, + 5.2342e-02, 2.5520e-02, -1.4723e-01, -1.5816e-01, 3.6779e-02, + -2.4259e-02, -3.5487e-04, 6.2654e-03, 1.9984e-01, -7.6132e-02, + -5.9028e-02, 2.1878e-01, -9.5459e-02, -5.9844e-02, -7.4959e-03, + -2.0009e-01, 5.3376e-02, 2.6009e-02, 3.6678e-02, -2.1850e-01, + 1.4710e-01, -2.3338e-02, 1.2922e-01, -3.7010e-02, -2.7180e-02, + 9.6733e-02, 5.3181e-02, -6.1271e-02, -1.1622e-02, -5.7339e-02, + -1.5541e-01, 4.0807e-02, -2.3586e-02, -8.5208e-02, 1.2889e-01, + -1.5264e-02, 8.5310e-02, 1.0926e-01, 5.6991e-02, 2.3987e-03, + -1.1391e-01, 3.9262e-02, -2.8064e-02, 6.5222e-02, 2.2107e-02, + -7.2463e-02, -6.1091e-02, 4.6915e-02, -2.5152e-02, -1.2362e-01, + 4.6378e-02, -5.5811e-03, 1.7696e-02, -6.9871e-02, -4.3401e-02, + 1.3433e-03, 5.2477e-02, 1.0138e-01, 5.2289e-02, -2.1016e-02, + 6.6678e-02, -3.2154e-02, -4.6185e-02, -2.7438e-02, -2.5051e-02, + 3.9109e-02, 1.1025e-01, 1.0976e-01, 1.1039e-02, 1.5488e-02, + -1.4314e-01, 1.3005e-01, 4.1390e-02, -1.8025e-02, -2.3589e-02, + -1.4516e-02, -2.1773e-02, -7.3370e-02, -7.3497e-02, -9.7571e-02, + 4.6733e-03, 6.5029e-03, -1.3255e-01, -8.5680e-02, 2.9686e-02, + 4.4497e-02, 1.9360e-02, 6.3552e-02, 1.8620e-03, 7.7786e-02, + 1.3989e-01, -3.6987e-02, 1.0180e-01, 7.3867e-02, -1.4634e-01, + -1.1714e-01, -7.4277e-02, 5.9020e-02, 2.2145e-02, -2.0120e-02, + -1.1564e-01, 8.6384e-02, 5.9949e-02, -8.7968e-02, 1.6044e-02, + -1.3494e+00, -1.3101e-02, 4.1360e-02, -7.1064e-02, -8.5138e-02, + -4.7313e-02, 2.7228e-02, -2.2329e-02, -9.8785e-02, -6.7174e-02, + 3.2216e-02, -2.0466e-01, -9.3770e-02, 9.2580e-02, -5.3992e-03, + -1.7000e-02, -3.8034e-02, 9.0621e-03, 8.6878e-02, 1.0987e-01, + 6.6488e-04, 5.1833e-02, 5.3204e-02, 1.2711e-01, -8.6105e-02, + -4.8681e-02, -6.0973e-02, -6.4746e-02, 8.5388e-02, 1.7026e-01, + -1.2123e-01, -1.9613e-02, -9.3257e-02, 1.2637e-01, -2.8975e-03, + 3.1562e-03, 3.3015e-03, -6.1522e-02, 5.9085e-02, 4.2832e-02, + 3.9749e-02, 5.1629e-02, 2.1208e-02, -6.0335e-02, -1.2391e-01, + -1.1050e-02, 6.3860e-02, -1.0895e-01, 4.7044e-02, 2.1700e-02, + -7.9585e-03, 9.7766e-02, -1.1182e-01, 1.3406e-01, 4.7058e-02, + -3.2009e-02, -1.0783e-01, -9.7116e-02, 2.9043e-02, -2.2611e-02, + 1.0493e-01, 1.7676e-01, 7.0756e-02, 5.9062e-03, 8.1334e-02, + 1.1995e-01, -1.3953e-01, 3.0317e-02, 1.0772e-01, -1.1324e-01, + 2.4339e-04, -3.5425e-02, 1.6272e-01, 7.3581e-02, -1.3233e-03, + 5.4502e-02, 2.7183e-02, -4.3697e-02, -2.1902e-02, -1.1941e-01, + 4.8753e-02, -5.3143e-02, 1.1286e-01, -1.1190e-01, 1.5261e-02, + -3.4672e-02, -8.7632e-02, 7.1268e-02, -5.5456e-02, -5.3602e-02, + 1.5196e-02, 1.1779e-01, 3.9464e-02, 1.0086e-02, 1.8113e-03, + -5.6602e-02, -5.1226e-02, -9.6473e-02, 5.4633e-02, -1.6578e-02, + -4.3707e-02, -1.5735e-02, 1.7919e-01, 1.3178e-01, -1.3620e-01, + -7.0200e-02, 6.2552e-03, 5.4710e-02, -2.3846e-02, -1.8538e-02, + 4.6390e-03, -1.0941e-02, -2.7629e-02, 3.5496e-02, -8.9692e-02, + 9.8759e-03, 3.5293e-02, -3.2110e-02, -1.0279e-01, -5.9262e-02, + 1.0760e-02, -6.1608e-02, 9.6876e-03, -4.7052e-02, -7.1123e-02, + 1.0229e-02, 9.2876e-02, 9.8025e-03, 3.4059e-02, -2.0260e-02, + -8.4060e-03, 1.1549e-01, 7.8580e-03, 1.4220e-01, -7.0386e-03, + 5.5618e-02, 5.0090e-02, -7.8569e-02, 4.5682e-02, -1.0535e-02, + -2.0261e-02, -3.6776e-02, -3.4775e-02, 2.5522e-02, 1.2608e-01, + -6.7188e-02, 2.0959e-01, 2.1202e-02, 2.0807e-02, 1.5260e-01, + 1.0424e-01, 6.0454e-03, 2.1931e-02, 2.5767e-03, 5.1140e-02, + 9.3779e-02, -1.6807e-02, -5.5458e-02, -1.3268e-02, 4.6824e-02, + -2.5836e-02, 1.7654e-02, 3.8869e-02, 7.0855e-02, -5.4796e-02, + -1.8964e-01, 3.8619e-02, 3.9355e-02, 1.0636e-01, 6.1914e-02, + 4.0157e-02, 5.5368e-02, -7.4316e-02, 4.9771e-02, 1.0426e-01, + -3.9820e-02, 5.2887e-02, 7.6436e-02, -2.4672e-02, 7.8243e-02, + -6.3843e-03, 6.3411e-02, 1.4871e-03, -4.8862e-02, 2.5202e-02, + -7.8051e-02, -1.4255e-01, -1.5805e-02, 1.3358e-02, 5.2986e-02, + -1.1864e-01, 4.6314e-02, 3.5143e-02, 7.3639e-02, -4.0801e-02, + 9.7016e-02, 6.1717e-02, -1.8799e-02, 2.7340e-03, -7.7440e-02, + -1.3023e-01, -4.1560e-03, 7.8262e-02, 3.5681e-02, -1.3603e-01, + 8.7046e-02, -1.3018e-02, 7.5417e-02, -1.1819e-02, 5.1524e-02, + -6.0646e-02, -9.2515e-03, 2.0641e-02, 6.3551e-02, -7.2697e-02, + 1.3991e-01, -2.5632e-02, 1.1834e-01, 1.6382e-01, 6.7759e-02, + 2.4555e-02, -8.7307e-02, 5.7441e-02, 7.5379e-02, -6.1329e-02, + 3.8550e-02, 8.5031e-04, 1.1118e-01, -6.0965e-02, 3.8923e-02, + 5.2394e-03, 4.7283e-03, 1.1786e-01, 3.2184e-02, 1.8079e-01, + 1.2621e-01, 2.8031e-02, -6.1219e-02, 1.0079e-01, -9.9842e-02, + 4.4614e-02, -1.3790e-01, -1.7813e-02, 7.1840e-02, 7.4822e-02, + -4.2562e-02, -2.6297e-02, -1.1915e-01, 3.5753e-02, 1.5433e-01, + 4.8434e-02, -1.5736e-01, -1.1653e-01, 5.1710e-02, 5.2133e-02, + -2.3209e-01, 5.4455e-02, -1.1171e-02, 7.2662e-03, 2.9330e-02, + 3.7252e-02, -3.8506e-02, -2.0239e-02, -1.8180e-03, 1.2752e-01, + 1.2268e-03, -4.5370e+00, -8.1560e-03, 7.7931e-02, -3.1185e-03, + -1.2173e-01, -4.9946e-02, -7.9451e-02, -8.4369e-02, 5.3416e-02, + -1.3204e-01, -4.5491e-02, 1.4509e-02, -1.4095e-03, -3.9864e-02, + 1.0990e-01, 2.8505e-02, -7.8903e-02, 5.6863e-02, -9.2724e-02, + 6.2713e-02, 3.9996e-02, -8.9662e-03, -1.1170e-02, -1.4313e-01, + -3.2183e-02, 1.0727e-01, 5.8286e-02, -1.9646e-03, 5.0987e-02, + -3.8793e-02, 9.1875e-02, 1.0112e-01, 1.9460e-02, -1.6071e-02, + -5.8468e-03, 2.4669e-03, -1.6030e-04, -5.2524e-02, -5.0830e-02, + 2.3971e-02, 2.3613e-02, 1.6205e-02, -1.4068e-01, 1.1002e-01, + -2.4975e-02, -2.1740e-01, 9.6996e-02, 3.8999e-02, 1.4596e-01, + 8.4671e-02, 6.8433e-02, 9.0532e-02, 7.4389e-02, -6.1792e-03, + 3.1973e-04, 2.1138e-02, 1.3640e-02, 4.0633e-02, -3.3085e-02, + 1.0155e-01, -1.3498e-01, -1.0529e-01, 9.6324e-02, -8.7950e-02, + -7.9196e-03, -1.4550e-01, -1.4610e-02, 4.1099e-02, -7.0051e-02, + -3.3869e-02, 5.5067e-03, -5.6253e-02, -5.2561e-02, 8.9154e-02, + -2.1283e-02, 6.1910e-02, 7.9824e-02, -7.3723e-03, -5.7242e-02, + -1.5250e-02, 1.1465e-01, 4.5637e-02, -1.4574e-01, -9.4880e-02, + -7.2486e-02, 1.0574e-01, -4.5374e-03], device='cuda:1', + requires_grad=True) +torch.Size([3072, 768]) +Parameter containing: +tensor([[ 0.0249, 0.0004, -0.0134, ..., 0.0097, -0.0221, -0.0155], + [-0.0316, 0.0126, -0.0031, ..., -0.0116, 0.0157, 0.0038], + [-0.0350, 0.0108, 0.0050, ..., -0.0090, -0.0208, -0.0072], + ..., + [-0.0134, -0.0048, 0.0264, ..., -0.0219, -0.0065, -0.0021], + [ 0.0038, 0.0062, -0.0022, ..., -0.0090, -0.0054, -0.0187], + [ 0.0026, 0.0050, 0.0312, ..., -0.0390, 0.0172, -0.0119]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.3540, -0.2788, -0.3865, ..., -0.3696, -0.3518, -0.2264], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 3072]) +Parameter containing: +tensor([[ 0.0172, -0.0035, -0.0072, ..., 0.0195, 0.0084, -0.0046], + [ 0.0164, 0.0169, -0.0219, ..., 0.0049, 0.0127, 0.0043], + [-0.0011, -0.0046, 0.0048, ..., 0.0056, -0.0323, -0.0245], + ..., + [ 0.0166, 0.0074, 0.0161, ..., -0.0444, -0.0176, -0.0061], + [-0.0054, -0.0033, -0.0143, ..., 0.0186, 0.0021, -0.0065], + [-0.0192, -0.0033, 0.0053, ..., 0.0050, 0.0029, -0.0080]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 4.4006e-02, 4.4006e-02, -4.0344e-02, 7.8247e-02, 3.2082e-03, + -6.9160e-03, 6.3477e-02, 6.2180e-04, 2.3254e-02, -5.0262e-02, + 4.3701e-02, -3.5614e-02, -1.0718e-01, 7.1945e-03, -5.4596e-02, + 2.0920e-02, -9.7275e-03, -5.1758e-02, 2.5070e-02, -2.9663e-01, + 2.7863e-02, 1.9608e-02, -9.3079e-03, -3.5187e-02, 3.0624e-02, + -7.2266e-02, 3.1525e-02, -1.0262e-02, -3.3112e-02, 3.5210e-03, + 6.8481e-02, -4.9934e-03, 4.7531e-03, 8.1421e-02, -4.2969e-02, + -1.2006e-01, 1.0361e-02, -7.3891e-03, 1.4328e-02, 2.2385e-02, + 2.0996e-02, 3.7231e-02, 2.8091e-02, 1.2085e-02, 6.1607e-04, + -5.8960e-02, -1.8845e-02, -7.3242e-03, -1.6846e-02, -3.9185e-02, + -3.6194e-02, -6.7749e-02, -8.5754e-02, -9.9731e-02, -5.9128e-03, + 5.4199e-02, 4.1382e-02, -1.0706e-01, 2.8366e-02, -1.7044e-02, + 4.5319e-02, 4.0741e-03, 4.5197e-02, -3.0594e-02, 4.9500e-02, + -5.0568e-02, -8.7204e-03, -1.3562e-01, 5.0751e-02, -2.9129e-02, + 4.4250e-02, 1.4915e-02, 3.8788e-02, 6.2408e-02, 5.7312e-02, + 3.1403e-02, 5.3314e-02, -4.5502e-02, -2.9419e-02, 7.1335e-03, + -4.0497e-02, 2.8214e-02, -6.0120e-02, -1.1139e-02, -6.0028e-02, + -5.4352e-02, 2.4017e-02, 2.9099e-02, 3.0167e-02, 7.3425e-02, + -2.4384e-02, 1.6584e-03, -4.3716e-03, 5.9296e-02, -5.5389e-02, + 3.3779e-03, 5.9540e-02, -7.3730e-02, 2.7985e-02, 6.5186e-02, + -7.1472e-02, -5.0812e-02, -6.9702e-02, 6.2164e-02, 4.4373e-02, + 5.2582e-02, 1.1920e-01, -2.2644e-02, 6.2988e-02, -1.3977e-02, + 5.6702e-02, -9.5444e-03, -6.0940e-04, -2.4063e-02, 1.2109e-01, + 5.3482e-03, 7.7148e-02, 6.3354e-02, 2.1942e-02, 6.2805e-02, + -1.3056e-03, -3.2776e-02, -2.1667e-02, -9.2239e-03, 4.1687e-02, + -8.0200e-02, -9.2041e-02, -4.2847e-02, -1.4244e-02, -3.4882e-02, + -5.4230e-02, -1.1044e-03, -4.0985e-02, -5.4901e-02, 3.9825e-03, + -1.0185e-02, -5.8014e-02, 2.3926e-02, 1.6510e-02, 1.2032e-02, + 1.3130e-02, -4.1771e-03, 1.8816e-03, -3.4943e-02, 1.8845e-02, + -1.9928e-02, 4.5990e-02, -2.6535e-02, -8.4412e-02, 4.5288e-02, + 1.2439e-01, -1.6861e-02, -1.6693e-02, 2.3788e-02, -3.0396e-02, + 8.3780e-04, 2.1057e-03, 5.1147e-02, -1.4244e-02, -3.1006e-02, + 1.0052e-01, -5.0323e-02, 7.9651e-02, -3.1525e-02, 3.9612e-02, + 8.4167e-02, 2.6062e-02, 4.0833e-02, 9.8755e-02, -1.6832e-03, + -3.1204e-02, 4.4037e-02, -4.0161e-02, -2.1927e-02, -5.6396e-02, + -1.1725e-01, -2.2964e-02, 3.0228e-02, 2.9922e-02, -3.9005e-03, + -2.4368e-02, -4.0802e-02, 4.2480e-02, 6.6956e-02, 8.8379e-02, + -3.1433e-02, -3.2471e-02, 2.2559e-01, -5.1910e-02, -1.2238e-02, + -5.9967e-02, -1.1407e-01, 4.2877e-02, 7.1899e-02, 4.1138e-02, + 8.1205e-04, 5.4871e-02, -9.1003e-02, 1.6650e-01, 3.3417e-02, + -1.1768e-01, -2.3132e-02, 1.1646e-01, -6.3477e-03, 6.1760e-03, + 5.7316e-04, 5.2246e-02, 2.7954e-02, 5.6213e-02, -7.7698e-02, + -2.0538e-02, 8.9294e-02, -5.5847e-03, 1.2344e-02, -4.4708e-02, + 9.7351e-02, -1.8188e-02, -2.7069e-02, 7.0801e-03, -1.6431e-01, + -1.7761e-02, -6.6910e-03, 5.9662e-02, 1.4954e-01, -2.1393e-02, + -8.1970e-02, 3.8116e-02, 1.2842e-01, 1.4221e-02, -8.9478e-02, + -2.9205e-02, -8.4152e-03, -5.2948e-03, 1.7960e-02, -2.3041e-03, + 2.1622e-02, 5.3894e-02, 4.1321e-02, 6.3049e-02, 7.4585e-02, + -7.5134e-02, -1.7120e-02, 3.9856e-02, 1.5587e-02, 5.4718e-02, + -3.0640e-02, 2.6260e-02, 3.3875e-02, 5.4993e-02, -8.8043e-03, + -4.1107e-02, -2.8122e-02, 2.2095e-02, -3.5095e-02, 4.8065e-02, + 9.5940e-04, -4.5105e-02, -9.4910e-02, 5.2681e-03, -2.9404e-02, + -2.9007e-02, 3.9520e-02, 6.2347e-02, -2.9388e-02, -7.4890e-02, + -6.6284e-02, -7.2670e-03, -5.9174e-02, -3.7262e-02, -1.0040e-01, + 2.4204e-03, -5.4993e-02, -5.0568e-02, -5.2277e-02, -1.6388e-02, + 4.3297e-03, 1.1176e-01, -5.7144e-03, 7.0068e-02, -2.0242e-04, + -7.4768e-02, 6.5231e-03, 2.8366e-02, -6.0394e-02, 2.2354e-02, + -3.7537e-02, -2.7283e-02, 1.1131e-02, 2.7893e-02, -8.1482e-03, + -9.1736e-02, 4.5959e-02, -7.0190e-02, -4.4373e-02, 4.5593e-02, + -8.2825e-02, 9.3628e-02, -7.8308e-02, -4.0222e-02, -1.3855e-02, + -2.8920e-04, 1.4343e-01, -9.5367e-04, 1.5392e-03, 1.8402e-02, + 4.4861e-03, -6.3843e-02, -7.4615e-03, 3.1189e-02, 8.0643e-03, + 2.0874e-02, 2.0294e-02, 3.3913e-03, -2.5803e-02, -4.5349e-02, + 6.5575e-03, 1.4587e-02, 9.6054e-03, 4.6234e-02, 9.3872e-02, + -5.8441e-02, 2.1152e-03, -7.7324e-03, 4.5929e-02, 6.1621e-01, + -1.3562e-01, -3.2684e-02, 5.1544e-02, 4.3068e-03, -4.7089e-02, + 2.3514e-02, -3.3569e-02, -8.0017e-02, -4.9103e-02, 1.8994e-01, + 8.9340e-03, -8.0032e-03, 3.8239e-02, -1.7883e-02, -3.2898e-02, + 9.0454e-02, 9.1934e-03, -2.8091e-02, 6.6223e-03, 1.7242e-02, + 6.3744e-03, 3.2711e-03, 1.0388e-01, -8.2397e-02, -4.4464e-02, + -6.4011e-03, 7.5806e-02, -1.0323e-02, -3.5919e-02, -5.2338e-03, + 2.6779e-02, 1.3000e-02, -3.7567e-02, -1.5144e-02, 1.0291e-01, + -5.7220e-02, -3.5889e-02, 1.0323e-02, -4.9072e-02, 1.8463e-02, + 3.1799e-02, -3.0956e-03, 2.1194e-02, 3.1143e-02, 5.4199e-02, + 5.5786e-02, -2.1545e-02, 2.5299e-02, -5.8228e-02, 1.4038e-02, + 5.2414e-03, 1.2466e-02, 4.9042e-02, -1.7014e-02, -8.2825e-02, + -6.7810e-02, 1.8982e-02, -3.2166e-02, 3.1647e-02, -6.4636e-02, + -9.5520e-03, -2.0157e-02, -6.2805e-02, -4.1870e-02, -3.7689e-02, + 2.1042e-02, 6.1981e-02, 9.3384e-02, -6.7505e-02, -2.8427e-02, + -9.1614e-02, -2.3346e-02, -4.8553e-02, -1.6510e-02, -7.6790e-03, + 3.8971e-02, 2.9129e-02, -4.8065e-02, 5.2595e-04, -3.1021e-02, + -4.9072e-02, 5.5328e-02, -3.8147e-02, -1.1398e-02, -1.4252e-02, + -3.4027e-02, 1.2152e-01, -4.3121e-02, 1.0384e-02, 1.3135e-01, + -7.9102e-02, 2.5757e-02, 4.2664e-02, -1.1505e-01, -3.5278e-02, + 8.9798e-03, -7.8659e-03, 7.4768e-02, 2.4002e-02, -4.2053e-02, + 3.3779e-03, -2.9251e-02, 2.5543e-02, 6.2378e-02, 1.3626e-02, + -1.4030e-02, 1.1673e-02, 1.3084e-02, 4.4937e-03, 6.6467e-02, + 3.3569e-02, 1.0590e-02, -1.7273e-02, 1.3354e-01, -1.0046e-01, + 8.8196e-02, 3.1921e-02, 6.5727e-03, 2.5616e-03, 3.4847e-03, + -4.6783e-02, -1.9608e-02, 2.7161e-03, -6.0944e-02, -5.9845e-02, + -4.8126e-02, -2.8610e-03, -6.2683e-02, 4.9347e-02, -1.0399e-02, + -2.4887e-02, -1.4526e-01, -2.4475e-02, 1.8478e-02, -5.1613e-03, + 1.6012e-03, 5.2856e-02, -3.1281e-02, -4.5166e-02, 5.4588e-03, + -4.0649e-02, 3.0960e-02, -2.2705e-02, 5.6946e-02, -4.3579e-02, + -4.4670e-03, -7.7515e-02, -2.2755e-03, -4.0955e-02, 3.1708e-02, + 1.9547e-02, 6.0852e-02, -8.9111e-02, 2.8534e-02, -3.1952e-02, + 2.3224e-02, 7.8857e-02, 1.0376e-02, -2.0126e-02, -3.8513e-02, + 4.7779e-04, 5.2124e-02, -3.6736e-03, -2.6077e-02, 5.2399e-02, + -3.8635e-02, 1.0284e-02, -1.0727e-02, 3.0426e-02, -3.5706e-02, + -6.3416e-02, -1.4941e-01, -1.2947e-02, -1.3756e-02, 8.0017e-02, + 5.7411e-03, 1.2093e-03, 3.6955e-04, 9.0210e-02, 9.1919e-02, + -7.9193e-03, 2.3174e-03, 4.5654e-02, -4.2664e-02, -1.0083e-01, + -2.9709e-02, -1.0016e-01, -7.1228e-02, 1.2744e-01, -7.2449e-02, + 3.3661e-02, 1.6739e-02, -4.2081e-04, 2.8763e-02, -5.9891e-03, + -1.9592e-02, 3.0579e-02, 5.4199e-02, 5.7251e-02, 1.4343e-02, + 8.3847e-03, -1.4000e-02, 7.7782e-03, -3.4760e-02, -7.5134e-02, + -2.5742e-02, -1.3428e-02, 1.4258e-01, -6.7322e-02, -1.1391e-02, + 2.9312e-02, -3.5217e-02, 5.9784e-02, 1.7410e-02, 8.3557e-02, + -4.3152e-02, -2.1225e-02, -5.1270e-02, 1.0663e-01, -4.0009e-02, + 6.7505e-02, -1.8768e-02, -1.8845e-02, -1.6342e-02, 1.0675e-01, + -7.0129e-02, 6.4819e-02, 7.0801e-02, 1.7065e-01, -5.0415e-02, + 3.4332e-02, 3.6194e-02, 5.4810e-02, -7.4280e-02, 2.6672e-02, + -7.6599e-02, -9.3155e-03, 3.4088e-02, 3.1342e-02, 2.2537e-02, + 1.9272e-02, 2.6093e-02, -5.4260e-02, -5.2246e-02, -2.6123e-02, + -3.4119e-02, 6.9946e-02, 2.1210e-02, -4.1161e-03, -6.0463e-03, + 1.3000e-01, 6.1615e-02, -4.8431e-02, -1.1176e-01, -6.4575e-02, + 1.6499e-03, -4.3701e-02, 7.8430e-02, 5.2795e-02, 5.8197e-02, + -2.3117e-02, -7.7087e-02, -7.5806e-02, -2.0325e-02, 4.2534e-03, + -6.2622e-02, 9.2163e-02, -4.7394e-02, -1.4824e-02, 1.4999e-02, + 1.1772e-02, -3.8635e-02, 6.6101e-02, -5.7312e-02, 6.4392e-02, + -1.3115e-02, -6.5857e-02, -5.3864e-02, 7.6843e-02, -6.1340e-02, + 2.0355e-02, 4.0375e-02, -1.3599e-01, 8.5449e-02, -1.3321e-02, + 1.8066e-02, -1.2894e-02, -4.9683e-02, -5.8517e-03, -5.9471e-03, + 1.8253e-03, -2.0447e-02, 8.0688e-02, -7.8979e-02, 8.7524e-02, + -3.7048e-02, -5.1270e-02, 4.2572e-02, 8.2855e-03, 7.0190e-02, + -9.6283e-03, 3.9948e-02, -1.7487e-02, -3.3752e-02, -5.0964e-02, + 1.7883e-02, -6.3538e-02, -3.4790e-02, 1.1554e-01, 1.1285e-01, + -1.2830e-01, 1.6846e-02, -6.2927e-02, 4.2953e-03, -1.3696e-01, + -6.1859e-02, -9.1431e-02, -2.4094e-02, -7.9102e-02, 4.9225e-02, + 9.9106e-03, -2.1606e-02, 2.0981e-02, 4.4403e-02, 6.6772e-02, + 2.4979e-02, 3.5248e-02, -1.3428e-02, 7.2670e-03, -7.3486e-02, + -1.2131e-02, 3.2318e-02, -4.4586e-02, 6.5979e-02, 5.0934e-02, + -2.3590e-02, -3.3875e-02, 5.4245e-03, -6.7261e-02, 3.0937e-03, + -3.1464e-02, -5.6671e-02, 1.6266e-02, -1.2732e-01, -1.2524e-01, + -6.3660e-02, 5.2734e-02, 1.7639e-01, -3.7445e-02, 6.8848e-02, + -6.0730e-03, -3.7994e-02, -3.1982e-02, 2.5940e-02, -2.2720e-02, + -3.3752e-02, 1.8774e-01, -5.4382e-02, 7.8735e-03, -9.3018e-02, + -3.5034e-02, -3.8853e-03, -5.6458e-03, 6.9031e-02, 5.0262e-02, + 6.5491e-02, 7.4219e-02, -1.1917e-02, -1.5190e-02, 4.6577e-03, + 9.6130e-02, -3.8879e-02, 8.1711e-03, 4.7791e-02, 5.7800e-02, + -3.0182e-02, -1.0788e-02, -9.2850e-03, -4.7577e-02, -5.6671e-02, + 1.3741e-02, -3.2410e-02, 1.1253e-02, -1.6083e-02, 2.0233e-02, + 1.4366e-02, 3.6224e-02, 2.8656e-02, 3.3264e-02, 6.3416e-02, + -6.0852e-02, 1.0582e-02, 2.5726e-02, -1.0492e-01, 3.5591e-03, + -6.4819e-02, 3.4058e-02, -7.6111e-02, -3.0792e-02, 1.1810e-02, + -2.9953e-02, 9.2850e-03, 7.1411e-02, -2.3735e-04, 2.5009e-02, + -1.6235e-02, 5.5054e-02, 1.2103e-01, -2.7204e-04, 8.8882e-03, + 3.9551e-02, -8.4656e-02, 3.6163e-02, -1.1993e-01, -2.1896e-02, + 2.0767e-02, -2.4246e-02, -2.0798e-02, -4.0161e-02, 1.0602e-01, + 3.7201e-02, 9.6817e-03, 1.1200e-01, -2.5299e-02, 8.6517e-03, + 9.4910e-03, -1.3940e-01, -2.2217e-02, 5.7037e-02, 7.7576e-02, + 3.6865e-02, -1.5251e-02, -5.6992e-03, 6.6345e-02, -7.4829e-02, + 1.1581e-02, -9.0820e-02, -4.9286e-02, -8.9645e-03, -2.6321e-02, + -1.0490e-02, -2.8820e-03, -1.4297e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([2.2640, 2.1569, 2.2719, 2.2420, 1.7296, 2.2268, 2.2222, 2.1929, 2.2933, + 2.3042, 2.2599, 2.2068, 2.2888, 2.1418, 2.3097, 2.1628, 2.3067, 2.1962, + 2.2541, 1.8798, 2.3330, 2.1527, 2.4260, 2.2432, 2.2344, 2.2169, 2.2899, + 2.2657, 2.2956, 2.2150, 2.2134, 2.3052, 2.2338, 2.2947, 2.1073, 2.1633, + 2.3743, 2.2180, 2.1662, 2.2571, 2.0809, 2.2990, 2.1551, 2.1888, 2.1720, + 2.2131, 2.4677, 2.1487, 2.2043, 2.1072, 2.1941, 2.3012, 2.2159, 2.2000, + 2.5026, 2.2741, 2.3018, 2.1355, 2.2140, 2.2018, 2.1730, 2.2111, 2.3010, + 2.1690, 2.2760, 2.1768, 2.2708, 2.1877, 2.2992, 1.9298, 2.3732, 2.2795, + 1.9456, 2.3256, 2.3227, 2.3304, 2.2873, 2.2034, 2.2036, 2.2123, 2.2451, + 2.2720, 2.3650, 2.1825, 2.3101, 2.1620, 2.3239, 2.3294, 2.3094, 2.1591, + 2.1981, 2.0829, 2.3779, 2.1536, 2.2076, 2.1523, 2.2948, 2.3787, 2.2889, + 2.3791, 2.1895, 2.1518, 2.3576, 2.3959, 2.2077, 1.9487, 2.2961, 2.2239, + 2.2803, 2.1499, 2.0425, 2.3617, 2.4161, 2.2494, 2.2602, 2.1698, 2.2544, + 2.4007, 2.3820, 2.2632, 2.3637, 2.1851, 2.2005, 2.2400, 2.1663, 2.1420, + 2.3441, 2.3564, 2.3680, 2.2313, 2.1356, 2.3518, 2.3749, 2.2281, 2.4065, + 2.1281, 2.1338, 2.1551, 2.1071, 2.7126, 2.1604, 2.1262, 2.4673, 2.1946, + 2.1879, 2.1070, 2.4353, 2.2552, 2.3758, 2.2670, 2.1755, 2.1645, 2.3566, + 2.3182, 2.3796, 1.7392, 2.1921, 2.2126, 2.1753, 2.2293, 2.1131, 2.2069, + 2.2205, 2.3078, 2.3424, 2.1081, 2.0709, 2.2687, 2.2176, 2.2434, 2.2262, + 2.1535, 1.9846, 2.1755, 2.0177, 2.2333, 2.2221, 2.2830, 2.2767, 2.1881, + 2.6348, 2.3794, 2.3480, 2.3592, 2.1792, 1.9351, 2.1316, 1.2753, 2.1821, + 2.1568, 2.3376, 2.4373, 2.2861, 2.2278, 2.4345, 2.3894, 2.2850, 2.2173, + 0.8030, 2.2375, 2.4590, 2.1584, 2.2757, 2.1539, 2.2788, 2.3235, 2.2182, + 2.2825, 2.2370, 2.2002, 2.2239, 2.1483, 2.3083, 1.9194, 2.2911, 2.1758, + 2.1693, 2.3248, 2.2062, 2.1135, 2.5439, 2.3680, 1.7868, 2.2818, 2.2139, + 2.3934, 2.2358, 2.2729, 2.1581, 2.1231, 2.2160, 2.2524, 2.2938, 2.3082, + 2.1456, 2.2228, 2.4099, 2.2020, 2.1645, 2.1912, 2.2698, 2.2760, 2.2801, + 2.3099, 2.2425, 2.1879, 2.2222, 2.3711, 2.1873, 2.3739, 2.2790, 2.2676, + 2.3049, 2.2355, 2.3792, 2.3141, 2.3377, 2.4090, 2.3914, 2.3383, 2.3445, + 2.3042, 2.4982, 2.3165, 2.2953, 2.1704, 2.2329, 2.4004, 2.1656, 2.3381, + 2.2305, 2.2528, 2.2741, 2.3754, 2.1884, 2.4750, 2.2019, 2.2011, 2.3016, + 2.2869, 2.2985, 1.9621, 2.1770, 2.2807, 2.2525, 2.1672, 2.1855, 2.4377, + 2.2557, 2.1184, 2.3795, 2.3838, 2.1112, 2.2527, 2.1712, 2.3678, 2.2962, + 2.3979, 2.1778, 2.2367, 2.2000, 2.4401, 2.1548, 2.3850, 2.4349, 2.1581, + 2.3237, 2.2696, 2.2616, 2.2724, 2.2583, 2.1867, 2.5341, 2.2949, 2.3043, + 2.1654, 2.2662, 2.4074, 2.2167, 2.3727, 2.3872, 2.1168, 2.2891, 2.2260, + 0.4755, 2.3298, 2.1823, 2.2057, 2.4162, 2.2086, 2.1705, 2.2330, 2.3128, + 2.2970, 2.2148, 2.1848, 2.2950, 2.2475, 2.2988, 2.1964, 2.3900, 2.2402, + 2.2406, 2.2631, 2.3282, 2.1986, 2.2774, 2.1227, 2.3814, 2.2799, 1.9796, + 2.3173, 2.2065, 2.0763, 2.2129, 2.1847, 2.2115, 2.3988, 2.3331, 2.2648, + 2.2173, 2.2222, 2.4255, 2.3040, 2.2819, 2.2370, 2.2084, 2.4024, 2.2437, + 2.3018, 2.4716, 2.2703, 2.2421, 2.2014, 2.2222, 2.3140, 2.1600, 2.2164, + 2.2799, 2.3304, 2.1385, 2.1579, 2.1541, 2.2016, 2.1684, 2.2469, 2.2755, + 2.2213, 2.3451, 2.1953, 2.2886, 2.3196, 2.3890, 2.2848, 2.1909, 2.1920, + 2.2832, 2.2331, 2.3384, 2.3486, 2.2844, 2.1510, 2.2882, 2.3951, 2.1480, + 2.1827, 2.2679, 2.1737, 2.2718, 2.3449, 2.2749, 2.2403, 2.2581, 2.4260, + 1.7728, 2.9975, 2.3103, 2.0867, 2.1864, 2.3866, 2.2906, 2.3580, 2.2100, + 2.2319, 2.4259, 2.2954, 1.9379, 2.1505, 2.2671, 2.1461, 2.1807, 2.2680, + 2.3275, 2.2361, 2.3598, 2.1926, 2.3816, 2.2544, 2.1655, 2.3670, 2.3348, + 2.1993, 2.2997, 1.5097, 2.1035, 2.1724, 2.2067, 2.3189, 2.2590, 2.1026, + 2.2328, 2.2674, 2.2554, 2.2146, 2.1348, 2.5011, 2.3310, 2.1544, 2.2187, + 2.1928, 2.2301, 2.3006, 2.2465, 2.1373, 2.2418, 2.2347, 2.2468, 2.2022, + 2.1193, 2.4142, 2.2624, 2.1409, 2.1893, 2.2579, 2.3964, 2.2402, 2.3457, + 2.3320, 2.2716, 2.0796, 2.2567, 2.1346, 2.2694, 1.6067, 2.1672, 2.3004, + 2.3265, 2.2902, 2.1561, 2.2396, 2.1815, 2.2325, 2.2429, 2.2065, 2.2034, + 2.1567, 2.4732, 2.3073, 2.2450, 2.4269, 2.2559, 2.3715, 2.2559, 2.5272, + 2.4351, 2.1391, 2.2326, 2.2180, 2.1983, 2.2195, 2.2410, 2.2155, 2.1019, + 2.0940, 2.2429, 2.2578, 2.2910, 2.3509, 2.1719, 2.4017, 2.3275, 2.2035, + 2.2029, 2.2942, 2.1925, 2.3053, 2.2525, 2.2779, 2.2201, 2.1996, 2.2416, + 2.3947, 2.3721, 2.2754, 2.3328, 2.2189, 2.3045, 2.2970, 2.4679, 2.2055, + 2.1974, 2.2517, 2.4998, 2.4094, 2.2671, 2.0876, 2.2103, 2.2355, 2.2657, + 2.2187, 2.3696, 2.2213, 2.2929, 2.4940, 2.1678, 2.2122, 2.3943, 2.2058, + 2.3842, 2.1598, 2.3303, 1.8785, 2.3260, 2.2574, 2.2955, 2.1308, 2.2304, + 2.2647, 2.1533, 2.1775, 2.3234, 2.2043, 2.3099, 2.2060, 2.2395, 2.3715, + 2.2542, 2.3158, 2.2958, 2.1105, 1.9845, 2.1140, 2.2661, 2.2051, 2.2324, + 2.3517, 2.1900, 2.2237, 2.2118, 2.1717, 2.1753, 2.2713, 2.2211, 2.2775, + 2.3432, 2.0401, 2.2418, 2.4036, 2.2838, 2.2299, 2.2778, 2.0997, 2.2535, + 2.3705, 2.3978, 2.4439, 2.3486, 2.1743, 2.3667, 2.1913, 2.3294, 2.1519, + 2.3608, 2.2327, 2.2330, 2.2944, 2.1614, 2.2846, 2.2820, 2.1228, 2.2492, + 2.0873, 2.2352, 2.2103, 2.3395, 2.3103, 2.1853, 2.2346, 2.1970, 2.1938, + 2.2931, 2.2062, 2.3980, 2.2042, 2.1698, 2.1685, 2.2466, 2.2704, 2.2514, + 2.3169, 2.1725, 2.3112, 2.2201, 2.2424, 1.8915, 2.1430, 2.1394, 2.3150, + 2.3607, 2.2556, 2.4942, 2.2349, 2.1679, 2.3034, 2.2429, 2.1978, 2.3658, + 2.2612, 2.2297, 2.0844, 1.7705, 2.1903, 2.3076, 2.1763, 2.2701, 2.3301, + 2.0571, 2.2049, 2.3389, 2.2586, 2.2229, 2.2573, 2.2908, 1.9869, 2.2785, + 2.1082, 2.3457, 2.1417, 2.2914, 2.3343, 2.2597, 1.9121, 2.1643, 2.2582, + 2.1474, 2.2186, 2.1766, 2.1799, 2.2972, 2.2724, 2.2806, 2.2794, 2.1807, + 2.4325, 2.1112, 2.3376, 2.2759, 2.2201, 2.4643, 2.2095, 2.2176, 2.1463, + 2.2402, 2.2014, 2.3456, 2.3663, 2.3184, 2.2078, 2.0484, 2.3456, 2.2747, + 2.1629, 2.3276, 2.2766, 2.1601, 2.2534, 2.3233, 2.1668, 2.1531, 2.3054, + 2.1789, 2.3498, 2.3915, 2.2312, 2.2355, 2.1540, 1.6734, 2.1592, 2.2040, + 2.1684, 2.4621, 2.2115, 2.1323, 2.2404, 2.4256, 2.3941, 2.2906, 2.1213, + 2.5773, 2.4617, 2.3884, 2.2571, 2.0984, 2.3819, 2.3312, 2.1406, 2.3404, + 2.3413, 2.4340, 2.2037, 2.2027, 2.3634, 2.3197, 2.1520, 2.2111, 2.4423, + 2.2281, 2.2688, 2.3624, 2.2121, 2.1558, 2.1795, 2.2498, 2.2931, 2.1872, + 2.2805, 2.3515, 2.3612], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 0.1621, 0.1320, -0.8621, -0.0336, 0.1502, 0.0541, 0.1869, 0.0480, + 0.1135, -0.0172, -0.3924, 0.3050, -0.6887, -0.2888, 0.5417, -0.1436, + 0.1237, 0.4877, -0.4812, -1.1091, 0.4803, 0.1873, 0.8007, -0.5644, + 0.2164, -0.0993, 0.2257, 0.2541, -0.7131, 0.6955, -0.0308, 0.6967, + 0.2246, -0.5032, 0.4245, -0.0466, -0.5230, 0.4978, 0.7140, 0.5024, + -0.0975, -0.2074, 0.4438, 0.2407, -0.3374, -0.2500, -0.8558, -0.0481, + 0.5578, 0.1013, -0.4001, -0.1277, 0.5967, 0.3349, -1.0492, 1.1932, + 0.2491, 0.0806, 0.1028, 0.0558, -0.1210, -0.1634, 0.3394, -0.5425, + -0.0082, 0.3193, 0.4550, -0.3117, -0.5130, -1.1894, 0.5283, 0.5521, + 0.3825, -0.5808, -0.9071, -0.6364, 0.1480, -0.0503, 0.2118, 0.5899, + 0.4158, 0.3171, -0.5889, 0.6909, -0.2356, -0.1724, 0.3149, -0.5220, + 0.5159, -0.0380, -0.1137, 0.1872, -0.4011, -0.4489, -0.2862, -0.1378, + 0.5677, -0.4666, -0.7463, 0.9317, 0.5492, -0.4332, -0.0704, 0.5361, + 0.4139, 0.3445, 0.1253, 0.4641, -0.0434, -0.0163, 0.3818, 0.8250, + -0.5626, 0.4493, 0.2315, 0.0676, 0.5582, 0.5198, 0.7981, 0.3034, + -0.4785, -0.3753, -0.4814, 0.0080, 0.1538, -0.3050, 0.3001, 0.4994, + -0.5714, 0.1687, 0.0673, -0.5381, 0.2875, -0.2586, 0.6101, 0.2481, + 0.4871, 0.1383, -0.0470, -0.9018, 0.1253, 0.4531, -0.7880, -0.1147, + -0.0429, 0.1276, 0.6707, -0.1825, -0.5070, 0.4332, 0.1504, -0.1559, + 0.5703, -0.4738, -0.9653, 0.6716, -0.2044, -0.2327, -0.0894, -0.0192, + -0.3091, 0.1484, 0.3899, -0.4676, 0.4944, -0.0957, -0.5098, 0.1894, + -0.3161, -0.1162, 0.3520, 0.1080, -0.1028, 0.4153, 0.1881, 0.3348, + 0.0837, -0.7355, -0.3223, 0.0820, -1.1374, -0.6000, 0.5029, 0.3999, + 0.1938, 0.2657, -0.2888, 1.9289, 0.1540, -0.0699, -0.7190, -0.6000, + -0.3247, 0.3231, 0.6391, 0.7734, 0.2144, 0.3356, -0.3159, 0.1742, + -0.6171, 0.0906, 0.4558, 0.3694, -0.3378, -0.1699, 0.4212, 0.3525, + -0.6260, 0.3566, -0.0672, -0.4158, -0.4320, 0.3351, 0.5299, -0.0924, + -0.5760, 0.0608, -0.0278, 0.1324, 0.9353, 0.3500, -0.6054, 0.2021, + 0.2814, -0.3948, -0.2922, 0.0252, 0.0906, 0.2763, 0.1940, 0.2645, + 0.6804, 0.5362, -0.3680, -0.6316, 0.4309, 0.4631, 0.0191, 0.4439, + 0.9340, 0.3927, 0.4112, -0.6026, -0.1669, 0.2130, 0.6028, 0.1757, + -0.1022, 0.7878, 0.2552, 0.5194, 0.4319, 0.0206, 0.6868, -0.3183, + 0.5887, -0.6037, -0.6882, -0.1226, -0.4427, 0.1058, 0.4440, 0.3308, + -0.3919, -0.2030, 0.1480, 0.9253, 0.3558, -0.4835, 0.4862, 0.3480, + -0.1619, -0.7845, 0.1806, -0.3933, -0.3324, 0.3735, -0.5081, 0.2008, + -0.2949, -0.9179, -0.3203, 0.4054, -0.5126, 0.1843, -0.3469, 0.7029, + 0.5811, -0.1982, -0.4353, 0.5465, 0.5292, 0.0970, -0.3432, -0.2300, + -0.4013, -0.5306, 0.0403, -0.2399, -0.5037, 0.7009, -0.0160, -0.4212, + 0.2411, -0.1549, 0.1045, -0.0450, -0.3573, -0.1059, 0.6022, -0.0220, + 0.8710, -0.1001, -0.2127, -0.0137, 0.4078, 0.4049, -0.0803, 0.3979, + -0.6313, -0.2661, 0.2763, -0.2651, -0.1129, -0.6101, 0.2320, -0.3461, + 0.6760, -0.1488, 0.2616, -0.5456, -0.2613, -0.0147, -0.0846, -0.3216, + -0.6864, 0.2660, 0.5609, 0.3031, 0.3006, -0.0308, -0.4863, 0.0349, + -0.7812, 0.3434, -0.3803, 0.0266, -0.2601, -0.0796, 0.6440, 0.2608, + -0.1114, 0.3551, -0.3097, -0.2056, -0.1391, -0.4942, -0.4338, 0.3045, + -0.4930, -0.5881, -0.4112, -0.4181, 0.4903, 0.4216, -0.2322, 0.6487, + 0.4649, 0.5861, -0.5109, -0.1017, 0.1379, -0.4404, 0.2101, -0.3105, + 0.2269, 0.0366, -0.2296, -0.3566, 0.2081, -0.1343, 0.1240, -0.0179, + -0.2213, -0.0625, -0.1611, 0.5202, 0.6848, 0.0801, 0.3838, -0.2576, + 0.4027, 0.5769, -0.8503, -0.0359, -0.1019, 0.8661, -0.4741, -0.7002, + -0.0760, 0.1911, -0.1189, -0.5374, 0.5348, -0.1533, 0.1744, -0.1486, + -0.3563, 0.3826, 0.6247, 0.0360, 0.5564, 0.2615, 1.2232, -2.3384, + 0.5863, -0.2913, 0.5541, 0.3806, 0.0317, -0.6352, 0.5425, -0.2135, + -0.5930, 0.6499, 0.3602, 0.0085, -0.0622, 0.1431, -0.2406, -0.5485, + -0.3074, 0.2902, 0.5639, 0.0688, -0.5124, 0.3740, -0.1591, -0.7494, + 0.2714, 0.2281, 0.1605, -1.2912, 0.1387, 0.2413, 0.3343, 0.5193, + 0.6109, 0.1938, 0.1689, 0.2520, -0.2678, 0.0738, -0.1442, -0.8710, + -0.4280, -0.3669, -0.2250, -0.0465, -0.4375, 0.3030, -0.5160, 0.0954, + -0.5934, -0.0733, 0.0735, 0.4256, 0.1004, -0.5574, 0.4439, 0.3795, + 0.0716, -0.0068, -0.7792, -0.2683, -0.2675, -0.3899, -0.2300, 0.1020, + -0.2996, 0.0126, 0.4978, -0.3174, -0.0587, -0.4951, 0.0076, 0.6863, + 0.2059, -0.4359, 0.0722, 0.3730, -0.5903, 0.1088, -0.3538, -0.2944, + -0.6744, -0.3510, 0.5116, 1.0242, -0.2177, -0.9765, 0.1696, 0.5113, + -0.0504, -0.0675, 0.3385, 0.4160, 0.2705, -0.2345, 0.0065, 0.1965, + -0.3579, -0.1229, 0.6358, 0.0630, 0.5625, -0.3390, -0.2423, 0.4026, + 1.0521, 0.1297, -0.2261, 0.3796, -0.2018, -0.4482, 0.6710, 0.2464, + 0.1820, 0.0604, -0.0261, -0.7356, 0.1610, 0.3147, 0.7031, -0.7128, + 0.4793, -0.6179, 0.4299, -0.1426, 0.1959, 0.0474, -0.7578, 0.3871, + 0.4518, -0.0792, -0.0705, -0.7027, -0.2180, -0.1972, -0.4494, -0.4577, + 0.5420, 0.6782, -0.0076, -0.0709, 0.4196, 0.2184, -0.5684, -0.3109, + 0.4548, -0.4821, -0.5241, 0.6643, -0.4297, 0.2593, -0.0437, 0.0028, + 0.1823, -0.0203, -0.2064, 0.2194, 0.4955, 0.0041, 0.5605, 0.6234, + -0.1054, -0.4684, -0.2235, 0.1850, 0.2052, 0.2630, 0.3708, -0.0098, + 0.1920, -0.1915, -0.2142, -0.1864, 0.5316, 0.2705, -0.0170, -0.1591, + 0.2824, 0.0184, -0.2010, 0.2990, 0.0938, 0.3728, 0.6740, 0.4613, + 0.3229, -0.0187, 0.2902, -1.1038, -0.1624, 0.6004, -0.5546, -1.5263, + -0.2350, 0.0903, -0.4732, 0.4322, -0.5486, 0.5330, 0.2533, 0.2336, + 0.1706, -0.3836, 0.1548, -0.0594, 0.1809, -0.0227, -0.2615, 0.2121, + 0.1297, 0.3730, -0.2365, 0.3757, 0.4399, -0.1251, -0.5659, 0.2555, + -0.7871, -0.4932, 0.2153, 0.0382, -0.0994, -0.0076, 0.2226, -0.5119, + 0.6038, -0.6071, -0.9637, 0.1321, -0.7474, -0.0215, -0.0357, 0.1232, + -0.7136, 0.3996, 0.9405, -0.0939, 0.4872, -0.3826, 0.5215, 0.5895, + 0.4666, 0.1541, -0.5932, 0.0385, 0.0506, 0.1357, 0.2811, 0.4319, + -0.0849, -0.3161, 0.0826, -0.3186, 0.2883, -0.1638, 0.5536, 0.2820, + 0.0268, 0.2919, 0.7081, -0.2793, 0.7435, -0.0363, -0.1690, -0.1719, + -0.5410, 1.0448, -0.5631, 0.1419, 0.5340, 0.5717, 0.1740, -0.3740, + 0.2532, -0.4504, -0.6378, -0.2206, -0.4109, -0.6917, -0.2142, 0.9643, + -0.3597, -0.1640, -0.7284, 0.1631, 0.1779, 0.1018, 0.4215, -0.4838, + 0.4528, 0.7088, -0.3826, -0.3808, -0.1254, -0.5218, 0.5777, -0.1802, + -0.4346, -0.4577, 0.0334, -0.2096, 0.1364, 0.3060, 0.0198, -0.8002, + 0.0443, -0.5895, -0.6670, -0.4631, -0.0998, 0.1485, 0.4129, -0.1068, + -0.0434, -0.2676, -0.7246, -0.2339, 0.2438, 0.2161, 0.5981, 0.5052, + -0.1812, -0.0142, -0.5303, -0.8499, -0.7728, 0.1444, -0.3332, -0.2925, + 0.3377, 0.4867, -0.3891, 0.4536, 0.9387, 0.4918, -0.0686, -0.3014, + -0.5999, 0.2228, -0.4177, 0.7356, -0.5696, -0.5074, 0.6764, 0.6888, + -0.2936, 0.0707, -0.0800, 0.5583, 0.2862, 0.4679, 0.6217, 0.6404], + device='cuda:1', requires_grad=True) +torch.Size([2304, 768]) +Parameter containing: +tensor([[-0.0240, 0.0029, -0.0312, ..., 0.0232, 0.0232, -0.0007], + [-0.0009, 0.0125, -0.0428, ..., 0.0167, -0.0114, 0.0172], + [-0.0243, 0.0004, -0.0028, ..., -0.0064, 0.0121, 0.0166], + ..., + [ 0.0119, 0.0008, -0.0014, ..., -0.0109, 0.0003, -0.0192], + [-0.0027, -0.0135, 0.0034, ..., 0.0144, 0.0325, -0.0189], + [ 0.0063, 0.0089, -0.0012, ..., 0.0233, -0.0183, -0.0119]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([2304]) +Parameter containing: +tensor([-0.0339, 0.5952, -0.3469, ..., 0.0100, -0.0171, 0.0073], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 768]) +Parameter containing: +tensor([[ 1.6983e-02, 1.5869e-02, 2.5711e-02, ..., 1.8282e-03, + -1.1787e-02, -2.0477e-02], + [-4.1723e-06, 4.6234e-03, -1.7273e-02, ..., -1.3374e-02, + -4.8447e-03, 3.5763e-03], + [-9.9716e-03, -4.3945e-03, 2.9068e-03, ..., 1.9684e-02, + -2.8351e-02, -2.8290e-02], + ..., + [-2.1042e-02, -2.2217e-02, 2.5452e-02, ..., -4.4417e-04, + 1.7960e-02, -1.9300e-04], + [ 6.3667e-03, -1.1711e-02, -3.2842e-05, ..., -1.2466e-02, + 2.6031e-02, 2.3913e-04], + [ 1.1337e-02, 1.0567e-02, 2.4395e-03, ..., -1.5053e-02, + -5.5428e-03, -7.4120e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([-3.2745e-02, -3.7262e-02, 7.0618e-02, -4.9408e-02, -8.5632e-02, + -1.1737e-01, 2.8248e-03, 3.5278e-02, 2.7481e-02, -7.3914e-02, + 6.2103e-03, -1.5160e-02, 6.6101e-02, 2.4586e-03, 3.9703e-02, + 1.2024e-02, 8.2474e-03, -1.0178e-02, 2.8503e-02, -8.3923e-02, + -3.1860e-02, 5.1636e-02, 2.6596e-02, 1.4214e-02, 5.3749e-03, + 3.8635e-02, 9.4910e-03, 5.4169e-03, 2.8625e-02, 5.6244e-02, + 9.2773e-02, -3.4973e-02, -5.4443e-02, -1.7410e-02, -8.0322e-02, + -2.9312e-02, -4.7180e-02, 3.3905e-02, 8.2397e-02, -5.3833e-02, + 5.6610e-02, -3.3188e-03, 9.7198e-03, -6.4163e-03, -3.9764e-02, + -7.4097e-02, -3.4454e-02, -2.2781e-02, -6.9641e-02, -5.4474e-02, + -4.5288e-02, -8.2581e-02, -1.0016e-01, -8.6182e-02, -6.2744e-02, + 6.8542e-02, 7.1411e-02, -7.6782e-02, 6.6101e-02, 3.5004e-02, + 2.3087e-02, 1.2428e-02, 3.9154e-02, -2.8885e-02, 2.3026e-02, + 3.0975e-02, -6.9946e-02, -2.4078e-02, -7.0435e-02, -1.0736e-01, + 7.1960e-02, 3.1464e-02, -1.7651e-01, -5.1758e-02, -1.1955e-02, + -2.8366e-02, 1.2091e-01, -8.0032e-03, 2.8351e-02, 7.6599e-02, + -7.8278e-03, -2.9221e-02, -3.5126e-02, -6.3965e-02, -1.0217e-01, + -1.1749e-02, 2.8610e-02, -3.5492e-02, 1.8173e-02, -1.5427e-02, + -2.2491e-02, -7.9895e-02, -6.0577e-03, 1.0437e-01, 7.1030e-03, + -4.1779e-02, 2.0691e-02, -2.1744e-02, -4.5074e-02, 8.3557e-02, + -3.9795e-02, -6.3354e-02, -9.0454e-02, 1.1726e-02, -1.9026e-03, + 6.2561e-02, 5.7648e-02, -1.2598e-03, -3.3142e-02, 1.1192e-02, + -5.4993e-02, -6.8207e-03, -7.0251e-02, -3.7537e-03, -2.0264e-02, + -1.6495e-02, 5.8929e-02, 5.2734e-02, 2.5604e-02, 4.9591e-02, + 4.0833e-02, 7.2021e-02, 8.3618e-02, 5.5908e-02, 9.9365e-02, + -6.7810e-02, -4.8859e-02, 6.0913e-02, 4.6936e-02, 2.3026e-02, + 2.4643e-02, -2.7294e-03, -7.0000e-03, 2.8259e-02, -2.8000e-02, + 3.6743e-02, 3.1036e-02, -5.6335e-02, 1.1298e-01, -2.6901e-02, + 4.3297e-03, 5.1514e-02, 2.9175e-02, -3.7628e-02, -2.9716e-03, + -5.6122e-02, 4.5654e-02, -4.4189e-02, -7.1960e-02, -4.3121e-02, + -1.0902e-02, 1.4626e-02, 2.4857e-02, -5.3833e-02, -3.4943e-02, + 1.2573e-01, 4.2114e-02, -8.2397e-02, -2.7939e-02, -9.5062e-03, + 4.9072e-02, -3.5828e-02, -1.2123e-02, -4.6295e-02, 6.8604e-02, + -5.3902e-03, 6.6895e-02, 3.5706e-02, 5.4810e-02, -8.3984e-02, + 2.7725e-02, -9.0881e-02, -2.1469e-02, 1.2909e-02, 1.5402e-03, + -1.3535e-02, 1.6994e-03, 5.4443e-02, 5.9296e-02, -3.1952e-02, + -8.9783e-02, 1.1833e-02, 3.9948e-02, 8.2764e-02, 1.7273e-02, + 1.1322e-01, -1.2062e-02, -1.9678e-01, -8.5068e-03, 1.9958e-02, + -4.4250e-02, -9.0637e-03, 4.3640e-02, 1.1816e-01, 9.2712e-02, + -3.2440e-02, 1.2960e-03, -3.6407e-02, -1.6748e-01, 2.6276e-02, + 2.5833e-02, 1.8478e-02, 2.1027e-02, -5.5878e-02, -2.7237e-02, + -1.8600e-02, -7.0343e-03, 5.3101e-02, 7.2289e-03, -1.1633e-01, + 1.1024e-02, -1.3672e-02, -8.4778e-02, 4.2999e-02, 1.5900e-02, + -2.4460e-02, 2.7752e-03, -9.0820e-02, -5.1941e-02, 4.6051e-02, + 4.0619e-02, 2.6520e-02, 3.6804e-02, 5.4504e-02, 1.7334e-02, + 5.2551e-02, 3.5400e-02, 4.0680e-02, 5.0629e-02, 3.8849e-02, + -1.0114e-01, -1.7532e-02, -1.4519e-02, -4.5967e-03, -5.9601e-02, + 2.7451e-02, 6.6711e-02, -8.5388e-02, 5.1270e-02, 4.3106e-03, + -2.9434e-02, -2.7191e-02, -6.9580e-02, 2.4521e-02, -1.5854e-02, + -3.3295e-02, -5.2155e-02, 7.4463e-02, -8.1024e-03, 4.5990e-02, + -3.9856e-02, 7.2327e-02, -1.3824e-02, -3.5767e-02, -5.3375e-02, + -6.3354e-02, 5.9143e-02, -2.9785e-02, -5.1819e-02, -1.4191e-03, + -4.8309e-02, -2.7359e-02, 1.4809e-02, -4.0321e-03, 2.6428e-02, + 6.0196e-03, 1.8768e-02, -3.5309e-02, 5.8807e-02, -3.5065e-02, + -5.7404e-02, 1.9409e-02, -2.7237e-02, 2.7374e-02, 3.3627e-03, + -7.4646e-02, -2.2507e-02, -2.6672e-02, -2.2705e-02, 3.7537e-02, + -1.1487e-01, -8.6731e-02, 6.1951e-02, -1.8433e-02, -4.1595e-02, + -6.4888e-03, -2.1606e-02, -1.8829e-02, 1.8097e-02, -1.3634e-02, + -2.5406e-02, 4.0680e-02, -4.6448e-02, -6.1035e-02, 4.1901e-02, + -1.5297e-02, -2.0386e-02, -3.8208e-02, -3.4698e-02, -2.6306e-02, + 2.8915e-02, 2.8961e-02, -1.9180e-02, -2.0874e-02, 1.2062e-02, + 9.1248e-02, -7.3792e-02, 8.2581e-02, 2.2095e-02, 2.4109e-02, + 8.5297e-03, -3.9917e-02, 1.9516e-02, 5.9479e-02, 1.6575e-03, + -6.4392e-02, 5.5939e-02, 4.1351e-02, -4.5013e-02, 3.5065e-02, + -5.5939e-02, -3.6621e-02, 7.2388e-02, -1.1955e-02, 5.0537e-01, + -4.3221e-03, -5.9845e-02, 9.2468e-02, -2.0552e-04, 5.4535e-02, + 7.6447e-03, 1.6312e-02, -1.0315e-01, 1.3399e-03, 1.4111e-01, + -5.0690e-02, -7.7637e-02, 4.9408e-02, 1.8692e-02, 5.7106e-03, + 1.2610e-01, -1.7075e-02, -4.6783e-02, 8.6426e-02, -1.9379e-02, + 6.0516e-02, -6.6467e-02, 6.7383e-02, -2.3712e-02, 3.2959e-03, + 3.4363e-02, -2.7908e-02, -1.6464e-02, -4.3793e-02, -3.1555e-02, + 1.0529e-01, -2.0172e-02, -3.9612e-02, -1.1551e-02, 5.5664e-02, + -3.3905e-02, -1.6510e-02, 3.2928e-02, -4.0649e-02, 7.1716e-02, + -2.5253e-02, 3.1281e-02, -1.1154e-02, 1.2230e-02, -7.7515e-03, + -5.7953e-02, -9.7036e-04, 4.2023e-02, 1.6327e-02, 1.5778e-02, + 4.0436e-03, -2.2984e-03, 8.8959e-03, -7.6172e-02, -5.8380e-02, + -4.4250e-04, -6.6772e-02, -1.6479e-02, -1.1642e-02, 5.5298e-02, + -3.2532e-02, 6.6833e-03, -1.2039e-02, -2.9358e-02, 1.2466e-02, + 2.9068e-02, -4.7211e-02, 1.2427e-01, 2.3285e-02, 2.2766e-02, + -1.3580e-02, 4.3304e-02, -7.6965e-02, -2.4384e-02, 7.5874e-03, + -4.8157e-02, 6.9702e-02, -2.3193e-02, -7.1655e-02, 6.7078e-02, + -8.1253e-03, 2.9144e-02, 5.5771e-03, 1.5549e-02, 7.8735e-02, + -1.7288e-02, -1.9043e-02, 1.4229e-02, -2.3041e-02, 9.5337e-02, + -1.4673e-01, -2.5482e-02, 1.0269e-02, -3.0579e-02, 2.6459e-02, + 1.0797e-01, 1.5671e-02, 2.2675e-02, 5.3864e-02, -3.8116e-02, + 1.2083e-03, 4.2343e-04, 4.5586e-03, -4.0558e-02, 2.6489e-02, + -6.5002e-02, -1.5915e-02, -6.2683e-02, -4.7493e-03, 6.5430e-02, + -8.2779e-03, 3.7670e-04, 7.5928e-02, 5.5603e-02, 3.0899e-02, + 6.4636e-02, 4.4617e-02, -1.4450e-02, -3.3234e-02, 2.0126e-02, + 5.1842e-03, -2.2232e-02, 4.2847e-02, -5.9418e-02, 2.7069e-02, + 1.2884e-03, -3.8635e-02, -8.5678e-03, -3.1174e-02, -3.3691e-02, + 1.8463e-02, -3.8483e-02, -1.6953e-02, 1.5032e-04, 5.0415e-02, + -1.6312e-02, 2.1896e-03, 5.3482e-03, -2.6169e-03, 1.5961e-02, + 8.8989e-02, -8.9417e-02, 6.7322e-02, 4.4060e-03, -4.0680e-02, + 7.1655e-02, -7.9880e-03, 3.7415e-02, 1.4679e-02, 5.3253e-02, + -4.5166e-02, -6.3553e-03, -6.2622e-02, -1.0034e-01, -3.8055e-02, + 1.2070e-02, 5.5359e-02, 2.9800e-02, -4.4250e-02, 3.8483e-02, + 1.8158e-02, 2.4048e-02, -1.9745e-02, -4.2358e-02, 1.0504e-01, + -7.1289e-02, -4.8798e-02, 5.9113e-02, -1.9197e-03, 7.9346e-03, + -4.9042e-02, -1.8524e-02, -4.1199e-02, -4.6204e-02, -3.8452e-02, + 1.0345e-02, 9.5825e-02, -2.6993e-02, -1.4549e-02, 8.4534e-02, + -5.8889e-04, -9.0942e-02, -4.8757e-04, 1.6724e-02, -2.7725e-02, + -1.8555e-02, 5.5695e-02, -2.7573e-02, -5.7709e-02, -2.2324e-02, + 2.2247e-02, -6.0883e-02, -8.6365e-02, 1.7517e-02, 8.9539e-02, + 6.8359e-03, -4.1840e-02, -2.2354e-03, 5.0446e-02, -2.1027e-02, + -8.9966e-02, 3.4428e-03, 1.4275e-02, 6.3232e-02, 2.9099e-02, + 1.7456e-02, -3.3203e-02, 2.0004e-02, -3.6316e-03, -4.0131e-03, + -6.4331e-02, 1.0785e-01, 2.2873e-02, 3.7689e-02, 6.3843e-02, + 3.4332e-02, -5.2948e-02, -1.1115e-01, 6.5063e-02, 6.7810e-02, + 7.7576e-02, -1.8646e-02, 2.1606e-02, -4.2816e-02, 5.0446e-02, + 1.8005e-02, 3.1799e-02, 9.4849e-02, 1.8478e-02, -4.5135e-02, + 2.9434e-02, 3.4027e-02, -5.4230e-02, -1.2000e-01, -4.0192e-02, + -3.8055e-02, -1.1035e-01, -1.5144e-02, 6.3538e-02, -5.6824e-02, + -2.4490e-02, -2.3254e-02, -8.2642e-02, 7.3181e-02, -1.1963e-02, + -9.0881e-02, 7.8726e-04, -1.5526e-03, 1.6342e-02, 7.5806e-02, + 5.8746e-02, -2.7905e-03, -5.0781e-02, -3.6652e-02, 3.6438e-02, + 9.2957e-02, -1.8158e-02, -9.3323e-02, 4.7333e-02, 3.7048e-02, + -7.4463e-02, -3.2379e-02, -4.8004e-02, 4.8798e-02, -9.2041e-02, + 1.1208e-02, -1.8906e-02, 6.5491e-02, -1.1194e-01, 1.6312e-02, + 9.8228e-04, 4.9377e-02, 4.5258e-02, -9.7198e-03, -4.9973e-03, + 1.4687e-02, -1.0727e-02, -3.3112e-02, 1.3420e-02, -1.1926e-01, + -3.4210e-02, -2.6505e-02, -1.4172e-01, -4.8645e-02, 6.9214e-02, + -1.5572e-02, 2.7514e-04, -2.4918e-02, -8.4457e-03, -3.5706e-02, + 4.5898e-02, 1.9073e-02, 1.9043e-02, 2.5650e-02, 7.7454e-02, + 4.1199e-03, -2.7809e-03, -2.6291e-02, 1.1681e-02, 1.0777e-03, + 2.2415e-02, 7.7095e-03, 4.5471e-02, -3.2837e-02, 3.6530e-02, + 1.0117e-02, 7.2449e-02, -8.8272e-03, -6.4240e-03, -2.0645e-02, + -1.9745e-02, 1.2451e-01, -5.4504e-02, -2.4109e-02, -3.6652e-02, + -4.5776e-02, -1.9501e-02, -1.4977e-02, 3.8757e-02, -2.0264e-02, + 9.2468e-02, -3.8757e-02, -1.0120e-01, 4.2511e-02, 9.3384e-03, + -7.1228e-02, -6.3843e-02, -8.2764e-02, -1.5274e-02, 4.5837e-02, + 1.7807e-02, 1.1047e-02, -4.1313e-03, 3.1143e-02, -4.5074e-02, + 2.0096e-02, 2.8496e-03, 3.0777e-02, -5.9509e-02, -5.3680e-02, + -5.6610e-02, 6.8054e-02, -4.6730e-03, -4.0741e-02, -3.3478e-02, + -1.1200e-02, 1.2512e-02, 4.8859e-02, 1.0048e-02, 1.2789e-03, + 3.3203e-02, 8.2550e-03, 1.0597e-02, 6.1737e-02, -4.7577e-02, + 1.2741e-03, 3.1787e-01, -7.5928e-02, -9.5703e-02, 1.4748e-02, + 1.4664e-02, 8.2764e-02, 2.2106e-03, 1.1646e-01, 2.5482e-02, + -1.8402e-02, 3.6602e-03, 2.4826e-02, 9.9106e-03, -4.2023e-02, + -8.2092e-02, 3.7781e-02, 2.1667e-02, -3.5553e-02, 1.0400e-01, + 1.7120e-02, -2.5818e-02, -4.9164e-02, -1.6220e-02, -6.5269e-03, + 5.7251e-02, -9.5459e-02, 1.8341e-02, 2.6260e-02, 4.4739e-02, + 6.4941e-02, 3.4058e-02, -3.7292e-02, -3.1525e-02, 7.1838e-02, + 1.8677e-02, 9.6863e-02, -1.2825e-02, 2.3460e-03, 4.5654e-02, + -5.2612e-02, -2.2858e-02, 1.5305e-02, 2.5543e-02, -1.0818e-02, + 3.5782e-03, 7.5500e-02, 1.0168e-01, 2.2522e-02, -4.3976e-02, + -1.9531e-02, -1.2894e-02, -1.2756e-02, 1.8740e-03, -3.4237e-03, + 5.5573e-02, -5.2338e-02, 4.3152e-02, -1.2140e-01, -1.8387e-02, + 3.2928e-02, 4.8950e-02, 1.2146e-02, -5.9845e-02, 6.4087e-02, + -4.7821e-02, 7.9224e-02, 4.6906e-02, 9.7046e-02, 5.7953e-02, + 8.5510e-02, -1.3525e-01, 2.4994e-02, -7.3471e-03, 3.4424e-02, + -1.6235e-02, -2.4094e-02, -2.6352e-02, 6.3049e-02, 3.8872e-03, + 5.0720e-02, -1.5222e-01, 5.7251e-02, 2.0691e-02, 6.3416e-02, + 4.5654e-02, -1.1436e-02, -3.4546e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([1.7929, 1.7831, 1.7803, 1.6813, 1.6177, 1.8186, 1.8280, 1.8169, 1.8193, + 1.7491, 1.7959, 1.8259, 1.7240, 1.6950, 1.6551, 1.7475, 1.8325, 1.7874, + 1.7081, 0.4558, 1.7815, 1.8641, 1.7709, 1.7374, 1.8430, 1.7791, 1.7354, + 1.7626, 1.7691, 1.6661, 1.7468, 1.7535, 1.7837, 1.7498, 1.7474, 1.7921, + 1.8657, 1.7096, 1.7576, 1.6830, 1.7558, 1.8993, 1.7414, 1.7797, 1.7533, + 1.6189, 1.7345, 1.7020, 1.7244, 1.6815, 1.7499, 1.8150, 1.7153, 1.6953, + 1.7383, 1.6688, 1.8391, 1.6598, 1.7349, 1.7585, 1.6782, 1.7734, 1.7821, + 1.7409, 1.7198, 1.6855, 1.7529, 1.7155, 1.7341, 1.5297, 1.7421, 1.8686, + 1.8653, 1.7259, 1.7289, 1.6731, 1.7387, 1.6307, 1.7143, 1.6711, 1.7531, + 1.8033, 1.7714, 1.7676, 1.7775, 1.6657, 1.7763, 1.7440, 1.7064, 1.7384, + 1.7850, 1.7902, 1.7361, 1.8213, 1.7260, 1.6903, 1.8085, 1.7339, 1.7769, + 1.7721, 1.7588, 1.6839, 1.8513, 1.7149, 1.7672, 1.7700, 1.6876, 1.7124, + 1.7675, 1.8268, 1.7704, 1.7144, 1.7660, 1.8349, 1.7693, 1.6901, 1.7396, + 1.8073, 1.6787, 1.7993, 1.7948, 1.7033, 1.7233, 1.7223, 1.7224, 1.7772, + 1.6722, 1.8057, 1.8742, 1.6941, 1.7141, 1.7528, 1.7275, 1.7983, 1.7217, + 1.8105, 1.6726, 1.7739, 1.7665, 1.7696, 1.7423, 1.7597, 1.7595, 1.6767, + 1.7325, 1.7476, 1.7849, 1.7194, 1.6787, 1.7302, 1.7817, 1.7152, 1.7512, + 1.6956, 1.7467, 1.3401, 1.8498, 1.7573, 1.7962, 1.7595, 1.6933, 1.6367, + 1.7679, 1.6964, 1.7361, 1.7991, 1.7227, 1.7609, 1.7854, 1.7482, 1.7924, + 1.8307, 1.6455, 1.6848, 1.8324, 1.8348, 1.7826, 1.8397, 1.7152, 1.6917, + 1.7201, 1.7656, 1.8076, 1.8197, 1.7645, 1.6691, 1.7135, 2.2904, 1.7352, + 1.7467, 1.7217, 1.7181, 1.8016, 1.8424, 1.6587, 1.7916, 1.7563, 1.7790, + 2.8969, 1.7273, 1.7394, 1.7848, 1.7283, 1.7294, 1.7608, 1.7266, 1.7323, + 1.7652, 1.7301, 1.7450, 1.8608, 1.7369, 1.7490, 1.5925, 1.7045, 1.7451, + 1.7193, 1.7442, 1.6924, 1.7473, 1.6818, 1.7552, 1.6258, 1.7437, 1.7192, + 1.7460, 1.7766, 1.8368, 1.7696, 1.8426, 1.8348, 1.7678, 1.8167, 1.7304, + 1.7137, 1.7233, 1.8062, 1.8429, 1.7146, 1.8293, 1.8135, 1.7622, 1.8080, + 1.8045, 1.7830, 1.7069, 1.7667, 1.7784, 1.6708, 1.7645, 1.7546, 1.8052, + 1.7619, 1.7424, 1.8167, 1.7871, 1.7916, 1.7700, 1.7150, 1.7424, 1.7701, + 1.7658, 1.6809, 1.7777, 1.7157, 1.7050, 1.8382, 1.7815, 1.7690, 1.8156, + 1.7530, 1.6965, 1.7266, 1.6922, 1.7875, 1.6888, 1.6801, 1.6591, 1.7296, + 1.8412, 1.7243, 1.6172, 1.7628, 1.7759, 1.7321, 1.7935, 1.7681, 1.7590, + 1.6764, 1.6220, 1.7573, 1.7823, 1.7837, 1.8083, 1.7544, 1.7745, 1.6749, + 1.7069, 1.7159, 1.8097, 1.6928, 1.7173, 1.7915, 1.7135, 1.8118, 1.7447, + 1.7273, 1.6825, 1.7379, 1.8264, 1.7297, 1.7173, 1.7472, 1.8040, 1.7840, + 1.7809, 1.6736, 1.7975, 1.7601, 1.7554, 1.7233, 1.6839, 1.7200, 1.7189, + 1.1019, 1.6431, 1.7165, 1.7557, 1.6809, 1.7875, 1.6864, 1.8092, 1.7360, + 1.7681, 1.7731, 1.7921, 1.6934, 1.7466, 1.6807, 1.7739, 1.7474, 1.7377, + 1.7673, 1.7556, 1.7118, 1.7117, 1.7537, 1.7710, 1.7321, 1.7860, 1.6302, + 1.7879, 1.8190, 1.6993, 1.7643, 1.8275, 1.7474, 1.7175, 1.7749, 1.7524, + 1.6974, 1.7262, 1.8118, 1.7030, 1.7440, 1.7438, 1.7523, 1.7821, 1.7594, + 1.6938, 1.7808, 1.7905, 1.7734, 1.7000, 1.7035, 1.7412, 1.7406, 1.7781, + 1.7695, 1.7509, 1.7576, 1.7752, 1.7044, 1.7850, 1.7253, 1.6955, 1.7934, + 1.7814, 1.7614, 1.7061, 1.8297, 1.7617, 1.7969, 1.6922, 1.6822, 1.7527, + 1.7551, 1.8767, 1.7740, 1.8746, 1.7875, 1.8527, 1.7601, 1.7039, 1.7222, + 1.7680, 1.6989, 1.7715, 1.7485, 1.7254, 1.6715, 1.7374, 1.6857, 1.6731, + 1.5084, 2.5756, 1.8057, 1.7862, 1.6845, 1.7544, 1.7355, 1.7684, 1.7092, + 1.8210, 1.7644, 1.7506, 1.7032, 1.8150, 1.7363, 1.7353, 1.8017, 1.7049, + 1.7996, 1.7475, 1.7090, 1.7468, 1.7934, 1.7622, 1.7136, 1.7332, 1.6697, + 1.8075, 1.7316, 1.5067, 1.7573, 1.7962, 1.7551, 1.7285, 1.7033, 1.7589, + 1.7889, 1.7553, 1.8125, 1.8115, 1.6954, 1.8001, 1.7614, 1.7621, 1.7395, + 1.7802, 1.7583, 1.8117, 1.6605, 1.7690, 1.6949, 1.7969, 1.7511, 1.7341, + 1.7781, 1.7734, 1.7195, 1.7107, 1.7229, 1.8224, 1.7728, 1.8050, 1.7612, + 1.7779, 1.8357, 1.6278, 1.7457, 1.6289, 1.8321, 1.4774, 1.7665, 1.6529, + 1.6894, 1.6457, 1.7476, 1.7246, 1.7836, 1.7473, 1.7166, 1.8502, 1.7336, + 1.7778, 1.7636, 1.7954, 1.6843, 1.6976, 1.7811, 1.7412, 1.7866, 1.8105, + 1.7974, 1.7285, 1.7500, 1.7821, 1.7113, 1.6886, 1.7415, 1.8403, 1.7683, + 1.7202, 1.7977, 1.7161, 1.7669, 1.7054, 1.6233, 1.7726, 1.7435, 1.7252, + 1.7905, 1.7461, 1.6852, 1.7706, 1.7498, 1.7314, 1.7612, 1.7688, 1.8387, + 1.7507, 1.7780, 1.7521, 1.7521, 1.6886, 1.6748, 1.6639, 1.7260, 1.8651, + 1.8324, 1.8109, 1.7392, 1.7163, 1.7573, 1.8032, 1.7246, 1.7790, 1.8149, + 1.8411, 1.7696, 1.7417, 1.7305, 1.8165, 1.7831, 1.6465, 1.7257, 1.6999, + 1.7752, 1.7095, 1.8007, 1.5871, 1.7589, 1.7477, 1.7404, 1.7618, 1.7613, + 1.7768, 1.7874, 1.7526, 1.7289, 1.7907, 1.7646, 1.7130, 1.6932, 1.7876, + 1.7574, 1.7329, 1.7561, 1.6575, 1.6210, 1.7973, 1.8189, 1.8263, 1.7098, + 1.8421, 1.7957, 1.7129, 1.7902, 1.7544, 1.7874, 1.8462, 1.6938, 1.7722, + 1.7217, 1.6570, 1.6927, 1.8389, 1.7193, 1.6345, 1.8630, 1.7586, 1.7213, + 1.7517, 1.6532, 1.7951, 1.8013, 1.6195, 1.7767, 1.7708, 1.8667, 1.7844, + 1.6620, 1.7018, 1.8073, 1.7122, 1.6877, 1.8080, 1.8439, 1.7730, 1.7713, + 1.7360, 1.7293, 1.7934, 1.8518, 1.7278, 1.7761, 1.7626, 1.7324, 1.7621, + 1.7627, 1.7576, 1.7272, 1.7169, 1.7375, 1.7508, 1.7859, 1.8297, 1.7494, + 1.7099, 1.7173, 1.7456, 1.6725, 1.7380, 1.5079, 1.7439, 1.7492, 1.7837, + 1.7155, 1.7384, 1.8116, 1.7994, 1.7162, 1.7863, 1.8267, 1.8537, 1.6841, + 1.7267, 1.7238, 1.6594, 1.6171, 1.7859, 1.7840, 1.7119, 1.7442, 1.8366, + 1.7012, 1.6456, 1.8376, 1.8048, 1.7586, 1.7052, 1.7953, 1.6071, 1.7094, + 1.6981, 1.8010, 1.7339, 1.7359, 1.7767, 1.7557, 0.3923, 1.7237, 1.7051, + 1.8097, 1.6661, 1.7293, 1.7843, 1.8136, 1.7692, 1.6809, 1.7752, 1.7953, + 1.7330, 1.7169, 1.7330, 1.7858, 1.7055, 1.7001, 1.7556, 1.7170, 1.7452, + 1.7962, 1.7701, 1.7458, 1.6952, 1.6571, 1.7019, 1.6985, 1.7858, 1.7325, + 1.8441, 1.7544, 1.7539, 1.7416, 1.7197, 1.7388, 1.7254, 1.7573, 1.6840, + 1.6938, 1.7749, 1.8030, 1.7458, 1.7379, 1.7583, 1.5081, 1.7613, 1.8099, + 1.8687, 1.7221, 1.8274, 1.7241, 1.8184, 1.6901, 1.8262, 1.7127, 1.7925, + 1.6913, 1.7442, 1.7687, 1.7329, 1.7385, 1.7035, 1.7806, 1.7704, 1.8107, + 1.7890, 1.7229, 1.7178, 1.7381, 1.8056, 1.6509, 1.8329, 1.7378, 1.7336, + 1.7829, 1.6570, 1.6778, 1.7194, 1.8111, 1.7018, 1.6994, 1.6840, 1.7387, + 1.7542, 1.6884, 1.7213], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 1.0949e-01, 1.2029e-01, -2.2643e-01, 1.3633e-02, 7.7175e-02, + 2.0759e-01, 1.2732e-01, 5.3440e-02, -1.8767e-01, -3.7663e-03, + 5.5563e-02, -4.0936e-02, -1.8914e-01, -4.9850e-02, -8.7422e-02, + -8.5198e-02, -3.2767e-02, -3.9917e-02, -1.4945e-02, 5.4149e+00, + -4.7396e-02, 8.8626e-02, -2.3334e-02, -1.2182e-02, -4.0219e-02, + -4.6954e-02, 5.5812e-02, 2.6243e-02, 2.9270e-02, -7.4899e-02, + 6.1470e-02, 3.6407e-02, 1.3643e-02, 1.1972e-01, 9.5888e-02, + -9.1053e-02, 1.0808e-02, -6.1807e-02, -1.2643e-01, 1.0759e-01, + 7.5769e-02, 3.0389e-02, 1.9903e-02, 2.0498e-02, -2.8517e-02, + -3.5988e-03, 3.3271e-02, -2.9117e-02, 6.7151e-02, 5.6767e-02, + 2.7080e-03, 1.0900e-02, 3.1927e-02, 1.3691e-02, 5.4312e-02, + 1.1228e-01, -1.0770e-01, -8.9129e-02, 8.0802e-02, 2.2251e-02, + 9.6828e-02, -7.9053e-02, 4.6662e-02, 2.6232e-02, 8.6675e-02, + 7.8493e-03, 1.2616e-01, 1.0770e-01, -4.4169e-02, 1.6066e-01, + 5.3667e-02, 1.1023e-01, 7.7716e-01, 8.7449e-02, -1.5993e-01, + 6.4785e-02, 7.7663e-02, -1.3849e-01, -2.6414e-02, 1.5254e-02, + -3.1125e-03, -5.5026e-02, -9.6868e-03, 1.4584e-01, 1.1902e-02, + 1.0312e-02, 1.2705e-01, 1.0381e-01, -2.9179e-02, 2.8543e-02, + -1.6863e-02, 4.4634e-02, 3.5224e-02, -5.3106e-02, -8.9368e-02, + 2.2416e-02, 2.2251e-02, -8.7668e-02, -1.0003e-01, 1.3118e-02, + 3.7356e-02, -4.3921e-02, 1.0691e-02, -8.1180e-02, 8.0626e-02, + 1.2307e-02, 3.7298e-02, -2.7534e-02, -2.6614e-02, -6.7037e-02, + -7.2763e-02, -1.8265e-02, -1.0384e-02, 9.7256e-02, 1.2837e-01, + -3.4833e-02, -3.0022e-02, -5.6598e-02, 5.1160e-02, 1.4362e-01, + -3.5597e-02, -2.8443e-01, -1.3226e-01, -6.6577e-02, -1.5931e-01, + -3.4211e-02, -1.8955e-01, 7.5645e-03, 7.7564e-03, -5.1944e-02, + -1.0860e-01, 7.3288e-02, -6.2435e-02, -1.8061e-02, -7.4753e-02, + 7.2062e-02, -8.5082e-03, -4.6290e-02, -7.0267e-02, 5.0560e-04, + 2.1024e-03, 3.1026e-02, 5.5825e-02, 2.2368e-02, 6.0203e-02, + -3.7331e-02, -9.3823e-03, 1.2192e-02, -6.6988e-02, 1.6273e-01, + 2.5527e-01, -9.1718e-02, -1.5925e-01, 3.0293e-02, -8.7519e-02, + -4.5639e-01, -6.0476e-02, -2.3655e-02, 3.3486e-02, 2.2417e-02, + 1.9185e-01, -1.7733e-01, -3.4288e-02, 3.0369e-02, -2.2127e-03, + 3.9785e-02, -1.1005e-01, 8.2197e-02, -5.6468e-02, 3.7779e-02, + -4.6349e-02, 8.3061e-02, 1.7083e-03, 3.6090e-02, 6.6748e-02, + -1.3850e-01, -8.5291e-03, 3.9900e-02, -2.8865e-02, 5.9780e-02, + 1.6062e-03, -1.1419e-01, 7.5935e-02, 1.8732e-02, 2.3851e-02, + -1.9683e-01, 1.9399e-02, 5.7028e-01, -1.1578e-02, -5.2761e-02, + 2.9288e-02, -7.8124e-02, 7.4517e-02, 1.0981e-01, -6.1710e-02, + 3.0429e-02, -3.0580e-02, -1.2169e-01, 1.4486e-01, 2.7204e-02, + -2.0147e-01, -5.2343e-02, 1.5732e-02, 9.6297e-02, 4.9667e-02, + 1.5123e-01, -8.8324e-02, 1.2086e-01, 1.1278e-01, 9.8894e-04, + 1.0845e-02, 2.4921e-01, -1.5126e-03, -2.4564e-02, -1.0051e-01, + -1.0166e-02, -6.3747e-02, 4.1885e-02, -1.2753e-01, -2.2882e-01, + -1.2265e-02, -5.5138e-02, 2.3936e-02, -1.1381e-02, 3.2840e-02, + -7.2174e-02, 9.4510e-02, -3.3043e-02, 1.1032e-01, -4.9055e-02, + 1.7022e-03, 4.9647e-02, 1.1223e-02, 9.7754e-02, 9.0599e-03, + 1.6349e-02, -1.0698e-01, 7.6059e-02, 7.9308e-02, 9.7373e-03, + 9.2770e-02, 1.4207e-01, -4.8836e-03, 4.7935e-02, 6.4542e-02, + 3.0055e-02, 1.2875e-01, -4.6859e-02, 4.9863e-02, 1.4377e-03, + 1.5789e-02, 1.4662e-02, 1.2451e-01, -1.0362e-01, 1.4963e-01, + -2.6775e-02, -4.0095e-02, -1.1525e-01, 9.7913e-03, 3.2486e-02, + -1.0553e-01, -3.9868e-02, -1.9666e-02, -6.5488e-02, -5.8675e-02, + 2.3672e-02, -6.1824e-02, -3.4578e-03, -7.2933e-02, -2.7511e-02, + 5.2952e-03, 6.9479e-02, 1.0031e-01, -1.5548e-01, -1.5024e-02, + 7.7628e-02, -8.3099e-02, 2.1916e-02, 9.7977e-02, -1.0470e-01, + 1.8839e-01, 1.2992e-01, -3.9989e-02, 3.8404e-02, 2.1818e-02, + 3.8327e-02, 5.1547e-02, -5.4093e-02, 1.4502e-01, -1.3062e-01, + -5.9733e-02, -1.4898e-02, 1.5852e-01, 6.7279e-02, 9.2137e-02, + -1.3338e-02, 1.8929e-01, 1.5181e-02, 6.7185e-02, 5.8950e-02, + 5.9982e-02, 4.4719e-02, -5.1546e-02, 1.4082e-01, -1.4495e-02, + -1.1608e-02, 9.2850e-02, -8.2966e-02, 9.0176e-02, -5.3713e-02, + 4.5182e-02, 1.0756e-02, -8.7488e-02, -1.2639e-01, -1.3084e-03, + -7.5066e-02, 2.1022e-02, 2.8497e-02, 1.8901e-01, -4.0666e-02, + -7.9830e-02, 2.6010e-01, -6.3069e-02, -5.6760e-02, 2.3198e-01, + -2.7823e-01, 3.6469e-02, -2.2526e-02, 8.1990e-02, -1.8918e-01, + 2.0363e-01, -5.4943e-02, 1.3450e-01, -5.3811e-02, 5.4539e-02, + 9.9996e-02, -2.9484e-02, -3.2027e-02, 3.0322e-02, -5.0587e-02, + -1.2371e-01, 2.7864e-02, -7.2747e-02, -4.5982e-02, 6.4722e-02, + 2.5427e-03, 1.0404e-01, 1.2117e-01, 5.1235e-02, -2.6081e-02, + -7.1022e-02, 7.9909e-02, 4.0153e-03, 1.3626e-01, -6.4602e-02, + -8.0432e-02, -5.1698e-02, -2.0403e-03, -2.4301e-02, -9.1350e-02, + -2.8840e-03, -1.5332e-02, -3.6093e-02, -9.8040e-02, -5.1894e-03, + -2.3653e-02, 7.5881e-02, 1.7702e-01, 9.3416e-02, 7.4547e-02, + 1.1536e-01, -3.9263e-03, -5.5926e-02, -4.2878e-02, -1.3414e-02, + 1.5467e-02, 1.1647e-01, 1.1761e-01, -2.0235e-03, -2.0868e-02, + -1.2706e-01, 1.3958e-01, 3.3958e-02, -2.4072e-02, -8.1018e-02, + -2.9652e-02, -3.1342e-02, -5.6468e-02, -1.2003e-01, -6.9973e-02, + -2.3683e-03, 1.0191e-02, -6.2477e-02, -3.2790e-02, 3.2910e-02, + -3.8474e-04, 7.6801e-03, 5.5080e-02, -3.7293e-02, 3.7632e-02, + 1.2180e-01, -6.6580e-02, 8.8961e-02, 2.3944e-02, -1.2642e-01, + -1.5582e-01, -3.5932e-02, 5.6498e-02, 1.3303e-02, -5.9329e-02, + -9.1585e-02, 7.7481e-02, 1.0074e-01, -1.0729e-01, 2.9733e-03, + -1.9608e+00, -1.7896e-02, 1.6667e-02, -6.7317e-02, -4.6438e-02, + -2.0567e-02, -5.6402e-03, 4.9861e-02, -6.5507e-02, -4.4573e-02, + 4.8462e-02, -1.7947e-01, -1.2917e-01, 1.0376e-01, 4.6402e-03, + -3.8884e-02, -5.6493e-02, 4.4177e-02, 1.3340e-01, 1.6811e-01, + 2.6634e-03, 1.9004e-02, 7.5964e-02, 1.8048e-01, -1.0555e-01, + -2.6694e-02, -1.1950e-01, -7.9573e-02, 1.3374e-02, 1.5681e-01, + -1.3325e-01, -1.1418e-02, -6.1896e-02, 1.3991e-01, -2.9282e-03, + -3.5584e-03, -1.6388e-02, -1.1436e-01, 6.5513e-02, 4.1263e-02, + 3.0082e-02, 3.3306e-02, -1.1155e-02, -5.5836e-02, -1.1409e-01, + -1.0892e-02, 9.0167e-02, -1.6112e-01, 6.5796e-02, -1.1796e-02, + -5.7307e-02, 7.6839e-02, -9.1151e-02, 1.2946e-01, 3.8997e-03, + -3.1941e-02, -1.0808e-01, -8.4099e-02, 5.5579e-03, -5.9969e-02, + 6.1003e-02, 1.6967e-01, 4.3307e-02, 1.5222e-02, 1.2151e-01, + 1.2265e-01, -1.1868e-01, 5.6359e-02, 1.5663e-01, -8.6873e-02, + -2.5422e-02, -4.0970e-02, 1.4281e-01, 9.8614e-02, -4.6476e-02, + 1.9954e-02, 5.2306e-03, -5.3084e-02, -4.3791e-02, -1.4934e-01, + 3.4870e-02, -7.6011e-02, 1.2525e-01, -5.5468e-02, 7.1483e-02, + -9.6414e-03, -1.3096e-01, 4.0228e-02, -2.6196e-02, -8.0135e-02, + 3.0319e-02, 1.4976e-01, 7.4810e-02, 3.7500e-02, -2.3439e-02, + -8.4951e-02, -3.1313e-02, -1.4224e-01, 4.9112e-02, -5.7134e-02, + -1.7928e-02, 7.7955e-02, 1.6979e-01, 1.1908e-01, -1.5242e-01, + -7.3433e-02, 3.9254e-03, 9.9303e-02, -3.5989e-02, -4.6880e-02, + -7.3148e-02, 2.5752e-02, -2.4774e-02, 4.5360e-02, -8.7025e-02, + 2.4669e-03, -1.3410e-02, 1.1635e-02, -6.7339e-02, -2.3523e-02, + 1.1249e-02, -2.1098e-02, 5.2816e-03, -7.8247e-02, -8.7813e-02, + 2.1990e-03, 5.2155e-02, -2.4094e-02, 1.0330e-01, 1.7441e-02, + 5.9195e-02, 1.4813e-01, 1.1106e-02, 5.9486e-02, 2.5604e-02, + -2.0879e-02, -3.8578e-02, -1.1124e-01, 9.3536e-02, -4.4539e-03, + -1.8219e-02, -2.0443e-02, -4.9660e-02, -1.0606e-02, 1.4319e-01, + -3.8835e-02, 1.8643e-01, 3.4203e-02, 5.8226e-02, 1.4025e-01, + 9.2013e-02, 6.8444e-03, -2.5568e-02, 3.9318e-02, 5.1420e-02, + 9.7254e-02, 9.0392e-03, -5.4230e-02, -2.1404e-02, 8.8912e-02, + 6.4720e-02, 1.2284e-02, 4.4045e-02, 2.5807e-03, -2.5942e-02, + -1.3710e-01, -1.3367e-02, 7.5493e-02, 1.1875e-01, 8.1141e-02, + -1.2936e-02, 4.7390e-02, -1.1134e-01, 9.6089e-02, 6.4378e-02, + -1.8029e-02, 8.4028e-02, 1.4069e-01, -1.7390e-02, 2.6406e-02, + -7.9666e-03, 5.2227e-02, 3.9486e-02, -9.2852e-03, 7.3538e-02, + -7.3171e-02, -1.9590e-01, -1.7776e-02, -4.0482e-02, 1.5421e-02, + -4.6823e-02, 2.3603e-02, -7.3262e-03, 8.0283e-02, -3.5043e-02, + 1.1719e-01, 1.1037e-01, -5.6377e-02, -3.4377e-03, -4.8010e-02, + -1.2813e-01, 2.4231e-02, 1.1043e-01, 5.4121e-02, -7.6396e-02, + 8.8454e-02, 4.3982e-02, 7.0963e-02, 8.3958e-03, 5.9537e-02, + -6.3115e-02, -6.1581e-02, 4.2345e-02, 1.0158e-01, -6.7802e-02, + 1.0960e-01, -3.4674e-02, 1.2936e-01, 1.9522e-01, 1.0823e-01, + 5.2180e-02, -9.0325e-02, 5.8393e-03, 8.2930e-02, -8.4221e-02, + 1.7008e-03, -7.9800e-02, 7.3705e-02, -8.1106e-02, 5.6668e-02, + 3.2226e-02, 6.6968e-03, 1.3006e-01, 4.6790e-02, 2.0633e-01, + 1.8214e-01, 1.8984e-04, -5.6160e-02, 1.1562e-01, -6.7184e-02, + 5.9152e-02, -1.1576e-01, -9.8722e-04, 7.8000e-02, 1.0290e-01, + -4.5230e-02, 9.1396e-03, -7.5262e-02, 1.1895e-02, 1.1374e-01, + 1.6541e-02, -1.4464e-01, -1.6592e-01, 6.0472e-03, 4.7818e-04, + -2.0584e-01, 5.2593e-02, 3.0140e-02, 1.3068e-02, 8.7231e-03, + 1.0776e-02, -3.1502e-02, -2.4943e-02, -5.4088e-03, 1.0589e-01, + -2.9905e-02, -5.0469e+00, -3.2722e-02, 4.9282e-02, -2.2213e-02, + -1.1515e-01, -4.1087e-02, -8.2253e-02, -5.4938e-02, 4.3435e-02, + -4.4115e-02, -2.0652e-02, -5.0619e-02, -3.0088e-02, -6.0237e-02, + 1.7771e-01, 3.4522e-03, -7.0164e-02, 1.7609e-02, -8.1784e-02, + 9.7742e-02, 8.6289e-02, 5.3136e-03, 1.3217e-02, -1.8450e-01, + -2.3459e-02, 9.8487e-02, 1.8942e-02, -2.4920e-02, 6.7617e-02, + -3.2965e-02, 7.1420e-02, 8.1317e-02, 1.9588e-02, 4.7272e-03, + -4.8509e-02, 3.8701e-03, 1.6512e-02, -7.8775e-02, -6.9517e-02, + 1.2679e-04, 3.3200e-03, 6.5191e-03, -1.9788e-01, 8.8512e-02, + -4.9999e-03, -2.4801e-01, 1.4648e-01, 3.9599e-02, 1.3425e-01, + 8.5676e-02, 8.7498e-02, 1.0170e-01, 9.6648e-02, 1.2441e-02, + 4.7502e-02, 7.0925e-03, 1.0906e-02, 9.9315e-03, -8.5651e-02, + 1.0266e-01, -1.5819e-01, -1.1354e-01, 8.0533e-02, -1.7047e-02, + -1.7048e-02, -1.7593e-01, 8.0783e-03, 3.7989e-02, -4.0361e-02, + -2.3227e-02, -2.0796e-02, -1.1312e-01, -4.6048e-02, 9.6961e-02, + 4.7441e-02, 7.9526e-02, 8.1258e-02, 2.5623e-02, -3.5423e-02, + -2.2783e-02, 6.6940e-02, 5.3477e-02, -1.4346e-01, -9.9305e-02, + -5.6176e-02, 1.2165e-01, 1.0727e-02], device='cuda:1', + requires_grad=True) +torch.Size([3072, 768]) +Parameter containing: +tensor([[-0.0076, 0.0140, -0.0179, ..., -0.0190, -0.0001, 0.0083], + [-0.0082, -0.0062, -0.0340, ..., 0.0008, 0.0212, -0.0008], + [-0.0030, -0.0002, 0.0044, ..., 0.0062, -0.0090, 0.0150], + ..., + [-0.0003, 0.0026, -0.0107, ..., 0.0015, 0.0198, 0.0079], + [-0.0139, 0.0263, 0.0082, ..., -0.0135, 0.0330, -0.0161], + [ 0.0057, 0.0020, -0.0367, ..., 0.0184, -0.0093, 0.0296]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.2454, -0.3401, -0.3860, ..., -0.3416, -0.3689, -0.1425], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 3072]) +Parameter containing: +tensor([[-0.0122, 0.0114, 0.0196, ..., 0.0045, 0.0157, 0.0007], + [ 0.0119, 0.0089, -0.0186, ..., -0.0101, -0.0171, -0.0015], + [ 0.0058, -0.0237, 0.0086, ..., -0.0162, 0.0190, -0.0067], + ..., + [-0.0058, 0.0016, 0.0089, ..., -0.0329, -0.0035, 0.0069], + [-0.0245, 0.0052, 0.0177, ..., -0.0051, 0.0200, 0.0037], + [ 0.0006, -0.0077, 0.0039, ..., 0.0064, 0.0152, -0.0266]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 2.1591e-02, -3.8971e-02, 4.7058e-02, -3.9276e-02, -4.8645e-02, + -4.8645e-02, 6.9092e-02, 5.2643e-02, 5.4443e-02, -3.2440e-02, + 5.1117e-02, 1.7868e-02, -7.8888e-03, -3.6774e-02, -4.2305e-03, + 2.9739e-02, 6.4621e-03, -1.9791e-02, -2.1191e-03, -3.9185e-01, + -8.0811e-02, 5.4291e-02, 4.3640e-02, -1.6708e-02, 3.3295e-02, + 5.2307e-02, 1.1772e-02, -5.2429e-02, -1.8356e-02, 7.1472e-02, + 9.4421e-02, -4.7646e-03, -8.4412e-02, 5.1117e-02, -2.5604e-02, + -1.0460e-02, -8.0505e-02, 2.1240e-02, 1.8341e-02, 5.2277e-02, + 4.8065e-02, 5.0507e-02, 2.8397e-02, 3.0899e-02, 7.3051e-03, + -2.5421e-02, 6.0455e-02, -1.6251e-02, -5.4962e-02, 9.8228e-04, + -1.8143e-02, -4.0802e-02, -8.2825e-02, -9.6558e-02, 1.0406e-02, + 3.6896e-02, -2.7069e-02, -6.9092e-02, 2.9907e-02, 3.8574e-02, + 5.0781e-02, 3.7193e-03, 1.0065e-01, 7.6485e-03, -3.1614e-04, + 4.6722e-02, -3.7933e-02, -7.7271e-02, -1.4732e-02, -2.0538e-02, + 7.1045e-02, -4.4556e-03, 3.7109e-02, 3.2104e-02, 1.3588e-02, + -1.7609e-02, 4.4006e-02, 1.4793e-02, 1.4610e-02, 1.1719e-01, + -2.8477e-03, 5.4283e-03, -5.0842e-02, 1.1711e-02, -7.9773e-02, + -9.3384e-02, 4.7394e-02, -3.4668e-02, -2.8610e-02, -2.3880e-02, + -1.7273e-02, -6.7444e-02, 7.4720e-04, 1.6260e-01, -1.2970e-04, + -3.3356e-02, -4.6936e-02, -1.5274e-02, -8.2336e-02, 4.1718e-02, + 6.7101e-03, -9.7961e-02, -4.6692e-02, -1.5358e-02, 2.2247e-02, + 5.7373e-02, 7.7820e-02, -9.5459e-02, 3.0365e-02, -7.7393e-02, + 2.0950e-02, -1.5511e-02, -9.0942e-02, 1.4183e-02, -1.4450e-02, + -2.0618e-03, 6.0455e-02, 5.1392e-02, -3.3203e-02, 3.6133e-02, + 4.1931e-02, -3.7567e-02, 2.7359e-02, 6.7902e-03, 5.8258e-02, + 8.9188e-03, -7.5439e-02, 6.3904e-02, 2.8931e-02, 1.5747e-02, + -1.0536e-02, -3.3691e-02, -2.4719e-02, -2.8183e-02, -5.4565e-02, + 8.6792e-02, -2.8625e-02, 1.1589e-02, 7.1777e-02, -3.2867e-02, + -2.1286e-02, 4.6967e-02, 3.5553e-02, -4.9530e-02, 2.1683e-02, + -7.2815e-02, 2.3666e-02, -3.8696e-02, -1.0870e-01, 4.0802e-02, + 9.8694e-02, -6.8665e-02, -2.6184e-02, -7.4402e-02, -3.1311e-02, + 2.8305e-02, 7.9651e-02, 1.1002e-02, -2.2602e-03, 3.6168e-04, + 6.1066e-02, 1.2016e-02, -5.0720e-02, -3.1776e-03, 5.1636e-02, + 3.5950e-02, 6.2805e-02, 7.7362e-03, 9.9976e-02, -4.2847e-02, + 5.1308e-03, -3.3020e-02, -1.3115e-02, 2.6184e-02, -5.9052e-02, + -1.1816e-01, -7.5073e-02, -1.9012e-02, 5.0598e-02, 8.6010e-05, + -1.3525e-01, 3.9940e-03, 1.1029e-01, 1.4124e-01, 3.8116e-02, + 1.7729e-03, -2.2736e-02, 1.1194e-01, -3.6488e-03, -2.0081e-02, + -1.1652e-01, -5.5634e-02, 7.7148e-02, 2.6855e-02, 3.7354e-02, + 1.4557e-02, 1.2703e-02, -8.2397e-02, 5.5847e-02, 1.1093e-02, + -1.5022e-02, 2.6810e-02, 1.7426e-02, -5.6915e-02, 1.6739e-02, + 9.7198e-03, -9.5062e-03, 5.3375e-02, 9.7122e-03, -3.4241e-02, + -3.4199e-03, -4.3671e-02, -4.1565e-02, -1.0147e-02, 4.3762e-02, + -2.2110e-02, 5.1193e-03, -3.9307e-02, -7.7087e-02, 7.2212e-03, + 6.0150e-02, 2.0355e-02, 2.2690e-02, 5.6854e-02, 1.1612e-02, + 5.4535e-02, -7.8354e-03, 7.3181e-02, 7.7148e-02, -2.3209e-02, + -4.7760e-02, -1.6113e-02, 8.5678e-03, -4.8462e-02, -6.6772e-02, + 1.0094e-02, -6.7017e-02, -2.6184e-02, 6.4880e-02, 4.4769e-02, + 7.5302e-03, 1.7288e-02, 9.0561e-03, -6.2065e-03, -4.1870e-02, + -4.5105e-02, -7.7942e-02, 6.5125e-02, -3.1433e-02, 5.7190e-02, + -5.0293e-02, -6.7329e-03, -7.7858e-03, -1.2352e-02, -1.3573e-02, + -6.3599e-02, 2.1515e-02, -2.0935e-02, -3.9490e-02, -1.0811e-02, + -1.2524e-01, -2.2247e-02, 9.7046e-03, 2.3155e-03, -5.7465e-02, + -3.8879e-02, -1.4923e-02, 3.6652e-02, 7.2083e-02, -3.6194e-02, + 3.0396e-02, -5.0415e-02, 1.0468e-02, 8.6670e-03, 6.9847e-03, + -6.4964e-03, -6.5552e-02, 1.2146e-02, 9.1858e-03, -2.9999e-02, + -3.7506e-02, 1.2749e-02, -2.3972e-02, 7.6782e-02, -2.5894e-02, + -1.9989e-02, -8.8623e-02, -1.8707e-02, 1.0590e-01, -5.0079e-02, + -3.5248e-02, 4.0253e-02, 1.7792e-02, -6.2683e-02, 6.1218e-02, + 8.9645e-03, 1.0077e-01, -1.9867e-02, -4.0985e-02, -4.0771e-02, + -3.0350e-02, 4.6906e-02, 1.2341e-03, -3.4363e-02, 5.5939e-02, + 8.1238e-02, -9.4177e-02, 3.7079e-02, 1.4252e-02, -5.8746e-03, + 3.8757e-02, -3.4882e-02, 2.1118e-02, 3.4027e-02, -2.6886e-02, + -2.7466e-02, 3.1372e-02, 7.7759e-02, 5.7251e-02, 1.0223e-01, + -6.7627e-02, 6.8398e-03, -1.3626e-02, -2.0180e-03, 1.2168e+00, + -1.6504e-01, -5.4413e-02, 4.6722e-02, 8.7128e-03, -3.4760e-02, + 2.0538e-02, -2.0340e-02, -3.0212e-02, -2.3682e-02, 1.9055e-01, + -3.7689e-02, -5.8289e-03, 9.5215e-02, -2.2141e-02, -2.5253e-02, + 6.0394e-02, -1.8066e-02, -5.6946e-02, -2.8458e-03, -2.0309e-02, + 9.6313e-02, 6.1531e-03, 2.2415e-02, -1.0199e-01, -4.9469e-02, + 3.2684e-02, -6.6895e-02, 1.3893e-02, -8.1909e-02, -3.3569e-02, + 5.0507e-02, -2.1790e-02, -3.6224e-02, -2.2003e-02, 8.4595e-02, + -3.1860e-02, -4.8798e-02, -7.1030e-03, -7.2327e-02, 7.0862e-02, + -3.1677e-02, -6.7017e-02, 3.7537e-02, 5.0507e-02, 1.7990e-02, + -2.3697e-02, 6.3110e-02, 1.3781e-03, -4.6692e-02, -6.9763e-02, + 7.8964e-03, 4.9042e-02, 1.7944e-02, -4.6997e-02, 1.9745e-02, + -9.0332e-03, -1.8921e-02, -5.5573e-02, 5.3062e-03, -1.4740e-02, + -6.3232e-02, 2.2827e-02, 3.0563e-02, 1.1780e-02, 7.3776e-03, + 1.7517e-02, 4.5319e-02, 8.3435e-02, -2.1545e-02, 1.4534e-02, + -5.2765e-02, 4.9255e-02, -1.1542e-01, -4.5410e-02, 2.5803e-02, + 5.5908e-02, 6.8420e-02, -4.9316e-02, -9.5459e-02, 4.9347e-02, + -5.4504e-02, -2.6794e-02, -2.6871e-02, -4.8637e-03, 2.7039e-02, + 4.5197e-02, 3.5614e-02, 1.5411e-02, -1.2154e-02, 1.3757e-01, + -1.6479e-01, -1.7639e-02, -3.5309e-02, -3.1311e-02, -2.1317e-02, + 7.0923e-02, 3.9978e-02, -2.0584e-02, -6.0364e-02, -8.1360e-02, + 1.7059e-02, 5.4321e-03, -2.0157e-02, -1.9180e-02, -1.2947e-02, + 8.9417e-03, 2.0828e-03, -6.4087e-02, 2.7237e-03, 4.8401e-02, + -2.8706e-03, -2.6810e-02, 1.9913e-02, 4.0802e-02, -7.6172e-02, + 9.4360e-02, 3.8055e-02, 2.1530e-02, -1.8784e-02, 4.5967e-03, + -5.3864e-02, 2.6199e-02, -1.7410e-02, -8.3069e-02, -5.4901e-02, + 5.3177e-03, -5.6915e-03, -6.2469e-02, 1.5167e-02, -8.2626e-03, + -1.8600e-02, -5.1331e-02, -5.4199e-02, -4.1084e-03, 6.8726e-02, + -3.8574e-02, -3.0842e-03, -4.0131e-02, -1.4565e-02, -3.6804e-02, + 6.5552e-02, -5.0049e-02, 6.9641e-02, 6.4087e-02, -2.2736e-02, + 2.8366e-02, -5.7983e-02, -6.0944e-02, 1.8295e-02, 2.2461e-02, + -5.0079e-02, 3.4027e-02, -1.0211e-01, -9.0576e-02, -8.8867e-02, + -4.1779e-02, 2.3163e-02, -4.7607e-02, -1.4679e-02, 4.1313e-03, + -1.6413e-03, 3.9581e-02, -2.9724e-02, -1.7258e-02, 6.6895e-02, + -2.2232e-02, -8.6670e-03, 2.5314e-02, 2.3438e-02, -4.0558e-02, + -9.4543e-02, -6.9519e-02, 9.9945e-03, -8.9844e-02, -1.6083e-02, + -3.2257e-02, 1.7853e-02, -3.8818e-02, 5.4474e-02, 1.9516e-02, + -3.6224e-02, 1.8921e-02, 9.1675e-02, -8.5878e-04, -5.4382e-02, + -1.0175e-01, 7.5012e-02, -8.4778e-02, 1.5945e-02, -3.0762e-02, + 3.9764e-02, 3.0624e-02, -7.0862e-02, -2.7359e-02, 5.8899e-02, + 6.0669e-02, 1.4641e-02, 3.0029e-02, 1.3330e-01, -1.0565e-01, + -3.7651e-03, -8.3008e-03, 9.8228e-04, 3.3630e-02, -6.7940e-03, + 2.4887e-02, -2.1896e-02, 1.8768e-02, 5.7159e-02, -2.2507e-02, + -9.0881e-02, 3.8605e-02, 4.2206e-02, 4.7821e-02, 1.1975e-01, + 2.7954e-02, -5.4665e-03, -5.6458e-02, 3.1403e-02, 2.6840e-02, + 9.7656e-02, -2.3071e-02, 3.4241e-02, -1.9257e-02, 5.1605e-02, + 1.7181e-02, 7.5195e-02, 4.2206e-02, 8.7585e-02, -7.7332e-02, + -1.6968e-02, 7.8186e-02, 1.0994e-02, -4.6082e-02, 1.9073e-03, + -5.4596e-02, -5.4779e-02, 2.9236e-02, 7.0068e-02, -1.0330e-02, + 3.3234e-02, 1.0223e-02, -4.5807e-02, 4.8706e-02, -4.0680e-02, + -1.4160e-02, 5.7068e-02, -1.3489e-02, 7.1411e-02, 6.9618e-03, + 1.0773e-01, 5.8380e-02, -4.2786e-02, -6.8359e-02, 3.7079e-02, + 5.5695e-02, -1.1237e-01, -2.9800e-02, 6.0272e-02, 4.4647e-02, + -3.5309e-02, -6.6589e-02, -4.6417e-02, 2.3407e-02, -5.9784e-02, + -2.2461e-02, 7.5684e-02, 1.7471e-02, -9.3811e-02, 2.6302e-03, + -1.5915e-02, -1.8127e-02, 1.8677e-02, -4.4800e-02, 4.0314e-02, + -3.1281e-02, -2.8443e-04, -6.0028e-02, 3.4027e-02, -1.3159e-01, + -5.4443e-02, -2.5757e-02, -4.6570e-02, -1.5465e-02, 5.2979e-02, + -2.7069e-02, -1.7120e-02, 1.9241e-02, -3.6163e-03, -8.4473e-02, + -3.5522e-02, 6.4087e-02, 6.2561e-02, -5.8441e-02, 2.9510e-02, + -8.4961e-02, -3.9062e-02, 6.1302e-03, 7.5989e-03, 1.6144e-02, + 1.5808e-02, -2.5375e-02, 5.3635e-03, -9.4788e-02, -6.1554e-02, + -3.1952e-02, 5.6732e-02, 1.0132e-02, 1.7563e-02, 1.5266e-02, + -6.7993e-02, 7.7515e-02, -5.8594e-02, 1.3535e-02, -3.5431e-02, + -3.6850e-03, -8.2153e-02, -1.0689e-02, 2.4429e-02, -3.1189e-02, + 3.2166e-02, -5.3101e-02, -6.2065e-03, 6.4453e-02, 2.9449e-02, + 6.5796e-02, -5.1941e-02, -3.0136e-02, -4.2572e-02, 1.6327e-02, + 4.2999e-02, 3.8879e-02, -5.3070e-02, -5.2376e-03, 3.4943e-02, + -1.0147e-02, 1.0635e-02, -5.2299e-03, -4.2915e-03, -1.9958e-02, + 1.9470e-02, -8.3313e-03, 2.7039e-02, -4.2969e-02, -8.4412e-02, + -1.5411e-02, 7.9269e-03, 7.1106e-02, 1.1208e-02, 1.2383e-02, + 2.5940e-02, 2.7084e-02, -9.2163e-03, -1.9058e-02, -1.3649e-02, + -6.9618e-03, 3.2715e-01, 3.3295e-02, -1.7197e-02, 4.3732e-02, + -3.2684e-02, 6.1646e-02, 2.2232e-02, 9.7778e-02, 5.4817e-03, + -7.6050e-02, -1.6159e-02, 8.4610e-03, -2.1652e-02, -9.4299e-02, + -2.1683e-02, 3.0807e-02, 4.0924e-02, -4.1992e-02, 7.6355e-02, + 4.2358e-02, 1.7212e-02, -3.0106e-02, -6.0333e-02, -7.1716e-02, + 4.4861e-02, -4.4617e-02, 1.8730e-03, 2.9175e-02, 8.8501e-03, + 3.5686e-03, 7.3975e-02, 4.4281e-02, 1.5991e-02, 9.7839e-02, + -1.7776e-02, 2.5635e-02, 4.3152e-02, -1.4130e-02, 5.9235e-02, + -5.7404e-02, 7.4120e-03, 1.9426e-03, -4.9095e-03, 1.5617e-02, + -1.7517e-02, -2.3315e-02, 3.9368e-02, 2.7481e-02, 5.0430e-03, + -6.1859e-02, -3.4363e-02, 5.7281e-02, -1.0544e-02, 2.8152e-02, + 9.5459e-02, -7.8369e-02, 1.1642e-02, -9.4788e-02, -4.5929e-02, + -3.6133e-02, 8.3466e-03, -7.0374e-02, -3.3875e-02, 1.4877e-02, + 8.5907e-03, 6.1523e-02, 6.3171e-02, 2.8381e-02, 3.4576e-02, + 3.2288e-02, -1.2085e-01, 2.4643e-02, 4.6600e-02, 1.0339e-01, + -2.8563e-04, -3.3905e-02, -7.9250e-04, 4.6356e-02, -3.6804e-02, + 5.4138e-02, -3.4637e-02, 7.5256e-02, 4.1748e-02, -3.9215e-02, + 4.8370e-02, 1.8768e-02, -7.0095e-04], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([2.3688, 2.2313, 2.1793, 2.2621, 1.8781, 2.4473, 2.2259, 2.2702, 2.3276, + 2.4534, 2.1846, 2.2655, 2.3461, 2.2421, 2.3190, 2.2593, 2.3454, 2.2707, + 2.2164, 1.7897, 2.3242, 2.3469, 2.3397, 2.2526, 2.2761, 2.3129, 2.2576, + 2.2638, 2.1498, 2.2063, 2.4860, 2.2652, 2.2365, 2.3424, 2.2737, 2.4070, + 2.4567, 2.2116, 2.2207, 2.3936, 2.1568, 2.3500, 2.2707, 2.2544, 2.3308, + 2.5199, 2.4381, 2.2376, 2.2106, 2.1092, 2.2784, 2.4935, 2.3248, 2.3377, + 2.5530, 2.3622, 2.4510, 2.1990, 2.2610, 2.3005, 2.3197, 2.3185, 2.2755, + 2.3920, 2.3173, 2.2691, 2.4864, 2.3578, 2.2687, 2.0558, 2.4016, 2.1990, + 2.1231, 2.4432, 2.3081, 2.2969, 2.5476, 2.2131, 2.1720, 2.4336, 2.3412, + 2.3070, 2.4076, 2.3136, 2.3851, 2.2409, 2.3300, 2.2133, 2.3456, 2.2283, + 2.2573, 2.2693, 2.3708, 2.3321, 2.1889, 2.2271, 2.2766, 2.4146, 2.3176, + 2.3727, 2.4370, 2.3428, 2.4437, 2.3811, 2.2463, 2.0142, 2.3516, 2.2030, + 2.2619, 2.1330, 2.2031, 2.3857, 2.4318, 2.2763, 2.2307, 2.3119, 2.3642, + 2.4886, 2.2440, 2.2410, 2.3068, 2.3970, 2.1728, 2.3072, 2.3099, 2.2765, + 2.3712, 2.4885, 2.3787, 2.2320, 2.2641, 2.4476, 2.4079, 2.2429, 2.4214, + 2.2500, 2.2248, 2.2907, 2.2025, 2.4755, 2.2717, 2.2460, 2.3245, 2.3066, + 2.3491, 2.1617, 2.4615, 2.3194, 2.4639, 2.4275, 2.2145, 2.2690, 2.3564, + 2.2622, 2.3437, 1.8991, 2.2557, 2.3662, 2.2678, 2.4041, 2.1706, 2.3727, + 2.3550, 2.4025, 2.4941, 2.2641, 2.0832, 2.2486, 2.3981, 2.4916, 2.5032, + 2.3176, 2.1182, 2.2759, 2.2190, 2.3240, 2.3059, 2.2629, 2.1827, 2.3566, + 2.4849, 2.2785, 2.2739, 2.3701, 2.2370, 2.0638, 2.1875, 1.4153, 2.2091, + 2.1576, 2.4698, 2.4457, 2.3009, 2.3664, 2.4033, 2.3602, 2.2590, 2.1947, + 0.8316, 2.3363, 2.5227, 2.2341, 2.2393, 2.3279, 2.3750, 2.3931, 2.2447, + 2.4049, 2.2068, 2.3888, 2.2901, 2.1921, 2.4135, 2.0252, 2.2543, 2.3396, + 2.0983, 2.3843, 2.3032, 2.1555, 2.3856, 2.3157, 1.9222, 2.4996, 2.2700, + 2.3211, 2.3556, 2.3793, 2.2133, 2.1887, 2.4271, 2.2548, 2.1998, 2.2376, + 2.2489, 2.3295, 2.4860, 2.1721, 2.2410, 2.3237, 2.2318, 2.4154, 2.2485, + 2.3437, 2.2699, 2.3348, 2.2798, 2.3398, 2.2431, 2.3392, 2.3666, 2.3178, + 2.2839, 2.3838, 2.3424, 2.3153, 2.4632, 2.4543, 2.4417, 2.4471, 2.2925, + 2.2936, 2.4775, 2.2104, 2.2885, 2.2657, 2.3830, 2.4525, 2.2305, 2.4506, + 2.3083, 2.3477, 2.4036, 2.3077, 2.2872, 2.4994, 2.2552, 2.3892, 2.3094, + 2.2584, 2.5298, 2.1819, 2.2275, 2.2282, 2.2621, 2.2170, 2.2949, 2.2702, + 2.2701, 2.1525, 2.3722, 2.3994, 2.2509, 2.3115, 2.2393, 2.4235, 2.2856, + 2.4068, 2.1916, 2.3960, 2.1988, 2.4193, 2.1971, 2.4324, 2.4284, 2.2356, + 2.3937, 2.3617, 2.2964, 2.2486, 2.3001, 2.3829, 2.4658, 2.4456, 2.2983, + 2.2155, 2.4114, 2.3580, 2.1757, 2.4205, 2.3233, 2.2826, 2.3751, 2.2619, + 0.6763, 2.3765, 2.2672, 2.2851, 2.4040, 2.3082, 2.1688, 2.2536, 2.5452, + 2.2973, 2.4423, 2.1116, 2.5492, 2.2694, 2.3131, 2.3285, 2.6898, 2.3127, + 2.2720, 2.4728, 2.3080, 2.2795, 2.4653, 2.2102, 2.4244, 2.2528, 1.9396, + 2.3106, 2.3023, 2.2089, 2.3359, 2.3033, 2.3235, 2.2955, 2.2911, 2.3302, + 2.3617, 2.2888, 2.3233, 2.3285, 2.3036, 2.2174, 2.2295, 2.3946, 2.2406, + 2.3098, 2.4310, 2.3034, 2.4401, 2.3541, 2.2902, 2.3781, 2.2550, 2.4189, + 2.4577, 2.3707, 2.2346, 2.3813, 2.3329, 2.2650, 2.2464, 2.3527, 2.4315, + 2.1820, 2.1956, 2.3020, 2.2607, 2.5223, 2.4836, 2.3561, 2.2295, 2.3134, + 2.2896, 2.2292, 2.4570, 2.2569, 2.3331, 2.2332, 2.3618, 2.4185, 2.2385, + 2.2092, 2.4481, 2.2702, 2.3838, 2.5967, 2.1375, 2.3543, 2.3309, 2.4050, + 1.9929, 2.8342, 2.2762, 2.1165, 2.1972, 2.3158, 2.3037, 2.3601, 2.3452, + 2.4369, 2.4202, 2.3446, 2.0123, 2.3398, 2.4535, 2.3274, 2.3294, 2.3187, + 2.4326, 2.2906, 2.3188, 2.3241, 2.3415, 2.2712, 2.2277, 2.3186, 2.4004, + 2.2099, 2.4762, 1.6964, 2.2420, 2.1310, 2.3157, 2.3439, 2.2923, 2.2653, + 2.3732, 2.2344, 2.3661, 2.3163, 2.3467, 2.3602, 2.5663, 2.3309, 2.2888, + 2.2422, 2.3163, 2.2859, 2.2626, 2.2535, 2.2556, 2.3285, 2.3882, 2.1625, + 2.3154, 2.3565, 2.3619, 2.2242, 2.2228, 2.2822, 2.3462, 2.3962, 2.3544, + 2.3980, 2.4145, 2.1884, 2.3029, 2.2805, 2.4121, 1.8022, 2.2502, 2.3409, + 2.3668, 2.2910, 2.2283, 2.3151, 2.3326, 2.2121, 2.2692, 2.2691, 2.1824, + 2.3013, 2.5337, 2.4772, 2.2240, 2.1979, 2.3759, 2.2393, 2.2431, 2.5248, + 2.6478, 2.1782, 2.4853, 2.2081, 2.3496, 2.2851, 2.1992, 2.3299, 2.2451, + 2.2018, 2.2678, 2.3296, 2.2097, 2.4417, 2.2165, 2.4353, 2.1743, 2.3772, + 2.3127, 2.3293, 2.3001, 2.3943, 2.2248, 2.1862, 2.2712, 2.2826, 2.2903, + 2.2824, 2.3611, 2.2890, 2.3107, 2.2675, 2.3155, 2.2492, 2.5576, 2.3347, + 2.2097, 2.5520, 2.6258, 2.6445, 2.4963, 2.2860, 2.3289, 2.2363, 2.4110, + 2.3982, 2.3522, 2.1712, 2.4856, 2.3696, 2.2355, 2.3357, 2.4464, 2.2544, + 2.5418, 2.3130, 2.2759, 2.0163, 2.2506, 2.3865, 2.2722, 2.2426, 2.2608, + 2.3552, 2.2536, 2.2496, 2.5000, 2.2151, 2.1788, 2.3562, 2.3983, 2.5739, + 2.2555, 2.3720, 2.4809, 2.2267, 2.1463, 2.2596, 2.3202, 2.3680, 2.2596, + 2.4000, 2.2462, 2.3756, 2.3770, 2.2603, 2.2017, 2.2966, 2.2811, 2.3990, + 2.4600, 2.1128, 2.2974, 2.3937, 2.2691, 2.3202, 2.3163, 2.2603, 2.2670, + 2.1615, 2.5914, 2.3516, 2.3436, 2.5322, 2.3456, 2.2495, 2.3500, 2.3577, + 2.3212, 2.3772, 2.2294, 2.4203, 2.2417, 2.2855, 2.2905, 2.3836, 2.2920, + 2.0747, 2.3910, 2.3422, 2.3533, 2.3373, 2.2799, 2.3063, 2.3708, 2.3412, + 2.2712, 2.3281, 2.3870, 2.0959, 2.1643, 2.2547, 2.3594, 2.4648, 2.4353, + 2.3893, 2.3490, 2.4140, 2.1236, 2.3925, 1.9306, 2.2080, 2.1856, 2.3817, + 2.2347, 2.2804, 2.3309, 2.2901, 2.2481, 2.3116, 2.2113, 2.1951, 2.3585, + 2.2502, 2.2685, 2.2172, 1.8982, 2.2615, 2.3360, 2.4474, 2.3664, 2.4077, + 2.0775, 2.2460, 2.5276, 2.3668, 2.1460, 2.2497, 2.3825, 2.1070, 2.3265, + 2.1847, 2.3438, 2.2506, 2.4812, 2.4725, 2.2261, 2.1374, 2.2534, 2.3833, + 2.2467, 2.3161, 2.3496, 2.1886, 2.5165, 2.3093, 2.3108, 2.3012, 2.2424, + 2.3140, 2.2488, 2.3573, 2.3360, 2.3477, 2.3584, 2.4732, 2.2807, 2.2586, + 2.3842, 2.2568, 2.3066, 2.4383, 2.6123, 2.1762, 2.2118, 2.3744, 2.2700, + 2.2935, 2.3482, 2.3100, 2.4050, 2.2994, 2.2794, 2.2825, 2.3585, 2.2868, + 2.4056, 2.3555, 2.2402, 2.2468, 2.2976, 2.2346, 1.8783, 2.4171, 2.3038, + 2.2860, 2.3164, 2.2196, 2.2157, 2.2415, 2.3264, 2.3517, 2.3506, 2.1730, + 2.7051, 2.3974, 2.2808, 2.4565, 2.1615, 2.3847, 2.4392, 2.1036, 2.3728, + 2.3957, 2.4668, 2.3189, 2.2417, 2.6226, 2.2039, 2.2346, 2.2887, 2.2391, + 2.2973, 2.1040, 2.4543, 2.0656, 2.2314, 2.6447, 2.3755, 2.3391, 2.2552, + 2.2017, 2.3095, 2.3668], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([-0.2197, 0.3354, -0.4777, 0.2296, -0.2571, -0.2782, 0.2680, 0.1041, + -0.3820, -0.2839, -0.0534, -0.0172, -0.4468, -0.0195, 0.1345, -0.0460, + 0.1044, -0.0443, -0.2040, -0.7718, 0.3504, 0.2080, 0.2851, -0.5775, + 0.1081, -0.5376, -0.0747, 0.1722, -0.5021, -0.0547, 0.5642, 0.1543, + 0.1477, -0.4310, -0.2311, -0.6174, -0.4021, 0.1010, 0.8179, -0.0225, + -0.1327, -0.2184, 0.4328, -0.1261, -0.6001, -0.9222, -0.7819, -0.1870, + 0.2756, -0.2588, -0.6792, -0.6569, -0.1249, -0.3367, -1.0191, 0.9087, + 0.7620, -0.6038, 0.0963, -0.0083, -0.1625, 0.1888, -0.1935, -0.7019, + 0.0724, 0.0275, -0.1649, -0.5189, -0.6144, -0.9170, 0.6056, 0.1743, + 0.0684, -0.5347, -0.3405, -0.5641, 0.8142, -0.2712, -0.0467, 0.5850, + 0.3540, 0.2550, -0.6323, -0.1901, -0.5813, -0.3020, 0.2263, -0.0346, + 0.2808, 0.1108, -0.2443, -0.0337, -0.3646, 0.1265, -0.3793, -0.1714, + 0.7608, -0.5926, -0.3150, 1.0996, -0.4086, -0.6049, -0.5115, 0.3061, + 0.5860, 0.2025, 0.4765, 0.4331, -0.0295, -0.1208, 0.3130, 0.4131, + -0.5850, 0.0727, 0.5004, 0.0265, 0.7074, 0.5079, 0.6204, 0.5398, + -0.1818, 0.0148, -0.2653, 0.1086, 0.2554, -0.6922, 0.2110, 0.2183, + -0.4925, 0.0034, 0.0637, -0.3723, 0.3850, -0.4272, 0.8816, -0.0507, + 0.2320, -0.3092, -0.0387, -0.2841, 0.2158, 0.1545, -0.1862, -0.4746, + -0.3839, -0.0917, 0.7386, -0.4229, -0.6547, -0.2977, 0.0176, -0.0818, + 0.2436, -0.1958, -0.6082, 0.9952, -0.1447, -0.3482, -0.2684, -0.6609, + 0.0937, -0.4504, 0.7387, -0.3864, 0.5788, 0.3532, -0.3904, 0.1869, + 0.1422, -0.3027, 0.5089, -0.1565, -0.3660, 0.3908, 0.0425, 0.0389, + 0.2921, 0.0230, 0.1537, 0.0203, -0.5847, -0.7670, 0.2777, 0.3962, + 0.0310, 0.3750, -0.4292, 1.8772, 0.0144, 0.2069, -0.6072, -0.7556, + 0.0070, 0.6241, 0.5252, -0.0226, 0.4671, 0.0726, -0.6137, 0.4737, + -0.8719, -0.3078, 0.7422, 0.0657, -0.3948, -0.3315, 0.4820, 0.4000, + -0.1885, -0.5593, -0.0326, 0.0303, -0.7097, 0.4710, -0.0080, 0.2596, + -0.5053, -0.2653, 0.0719, -0.2415, 0.7024, 0.0625, 0.0104, 0.8983, + -0.0749, -0.4544, -0.1307, 0.4815, -0.0946, 0.0246, -0.3972, -0.0282, + 0.3114, 0.5682, -0.4983, -0.3602, 0.7954, 0.1412, -0.0285, 0.4178, + -0.0331, 0.0314, 0.1608, -0.5503, 0.3445, -0.3405, 0.3910, 0.3342, + 0.0887, 0.7079, -0.2582, 0.5060, 0.2926, 0.2701, 0.3206, -0.1652, + 0.4704, -0.9332, -0.6158, -0.4013, -0.1978, 0.3000, 0.6680, 0.0541, + -0.2291, -0.1851, 0.2771, -0.1664, 0.2027, -0.5805, -0.2936, 0.0750, + -0.4917, -0.5222, 0.1597, -0.4634, 0.3040, 0.2883, -0.1047, 0.2459, + -0.5699, -1.0970, -0.1979, -0.0615, -0.3691, -0.1197, 0.1136, 0.3624, + 0.0045, 0.2218, -0.7172, 0.5502, 0.3994, -0.0446, -0.1522, -0.6184, + -0.3763, -0.8814, 0.1757, -0.1981, -0.1453, 0.7149, -0.0042, -0.4930, + 0.1774, -0.1074, 0.0255, 0.4448, -0.0075, -0.1348, 0.1764, -0.4782, + 0.4759, 0.3203, -0.3103, -0.3071, 0.5137, 0.1515, -0.2925, 0.4680, + -0.3368, -0.3806, 0.4335, -0.0727, 0.1921, -0.5856, -0.0663, 0.1296, + 0.3556, 0.2977, 0.4352, -0.3491, -0.4985, -0.2220, 0.7073, -0.0541, + -1.2134, 0.2054, 0.2669, 0.2849, 0.9239, -0.1697, -0.2411, 0.5358, + -0.8517, 0.4159, -0.4341, 0.3638, -0.2540, -0.0092, 0.3277, 0.6798, + -0.0468, 0.7107, -0.3865, 0.1758, 0.0242, -0.3292, -0.0701, 0.3912, + -0.8041, -0.4097, -0.0628, -0.3553, 0.4081, 0.0968, 0.1385, 0.5100, + 0.2941, 0.5431, -0.0992, -0.4025, 0.3482, -0.2689, 0.2528, -0.3012, + -0.1930, 0.5852, -0.2023, -0.6191, -0.3207, -0.2631, -0.0191, -0.0539, + -0.0853, -0.1048, -0.3848, 0.0635, 0.0174, 0.1541, 0.2229, -0.3462, + 0.7997, 0.4048, -0.5542, -0.4005, -0.2120, 0.1894, -0.5277, -0.4932, + -0.2205, 0.1349, -0.4992, -0.4048, 0.2936, -0.1750, 0.6371, -0.0344, + -0.1503, 0.4773, 0.0841, -0.1642, 0.2792, 0.3737, 1.0111, -1.7460, + 0.4022, 0.1431, -0.0319, 0.4733, 0.1269, -0.5802, 0.6673, 0.7738, + -0.2363, 0.4140, 0.1159, 0.1393, -0.2416, 0.2621, -0.5978, -0.5345, + -0.1574, 0.0450, 0.5317, 0.1276, -0.1231, 0.3749, 0.4681, -0.3119, + 0.4738, 0.0894, 0.0050, -0.7968, -0.0672, -0.0325, -0.2209, 0.5237, + 0.0687, -0.1099, -0.0316, 0.0336, -0.5300, -0.0200, -0.2776, -0.2014, + -0.9375, -0.5626, -0.0044, -0.0603, -0.2738, 0.3892, -0.3124, -0.0381, + -0.2990, 0.0306, -0.2100, 0.0666, -0.0415, -0.3901, 0.4862, 0.0913, + 0.2861, -0.3323, -0.4852, -0.3346, -0.2049, -0.8711, 0.1985, 0.0153, + 0.0071, 0.3348, 0.7854, -0.0110, 0.0555, -0.3576, 0.1926, 0.2818, + 0.2335, 0.0705, -0.2015, -0.1077, -0.0319, -0.1805, -0.0067, -0.1685, + -0.9881, -0.2477, 0.5524, 0.7451, 0.3463, -0.6136, -0.1202, 0.3726, + 0.3561, 0.2569, 0.0566, -0.0621, 0.0672, -0.3779, -0.2898, -0.2102, + -0.5451, 0.1628, -0.1314, 0.3358, 0.4334, -0.3215, 0.3202, 0.4060, + 0.3323, -0.1247, 0.2225, 0.1549, 0.0055, -0.9321, 0.2920, 0.2641, + -0.3068, 0.1259, -0.3560, -0.4065, 0.6489, -0.0242, 0.3904, -0.4408, + 0.5459, -0.2332, 0.3938, -0.2005, 0.0877, -0.6211, -0.8579, 1.0872, + 0.6445, 0.3743, -0.3435, -0.6151, -0.6444, 0.3134, -0.8510, -0.3456, + 0.5685, 0.7083, -0.1262, 0.2567, 0.4642, 0.1214, -0.8930, -0.1473, + 0.1031, -0.6386, -0.1372, 0.5740, -0.2368, -0.2888, 0.0044, -0.2496, + 0.1395, -0.0843, -0.5974, 0.3231, 0.0515, -0.4491, 0.7146, 0.9455, + 0.1059, -0.4779, -0.6536, 0.2079, 0.4047, -0.3084, 0.2931, 0.5544, + 0.1194, -0.2737, -0.1332, -0.3436, 0.6100, 0.0870, -0.0378, -0.0052, + 0.5198, -0.1086, -0.3978, 0.1642, 0.1202, 0.0767, 0.4188, 0.4866, + 0.3156, -0.3905, -0.2053, -0.5197, -0.5034, 0.5509, -0.1465, -1.6720, + 0.2816, 0.1294, -0.1098, 0.2624, -0.7129, 0.2074, 0.4927, 0.4706, + 0.0720, 0.2808, -0.1190, 0.6402, 0.1292, 0.0522, -0.1317, 0.5756, + 0.1856, 0.1908, 0.0235, 0.3120, 0.0203, 0.1948, -0.4117, 0.1373, + -0.5174, 0.1786, 0.4331, -0.3683, 0.1093, -0.5001, -0.0453, -0.9516, + -0.1138, -0.7882, -0.6547, -0.0519, -0.1056, 0.2221, 0.0606, -0.2394, + -0.3450, 0.2991, 0.4017, -0.2505, -0.0356, -0.1036, 0.0541, 0.1212, + 0.5994, 0.2916, -0.1002, 0.0949, -0.0342, 0.0201, 0.5530, -0.5057, + -0.3143, -0.7078, -0.0361, -0.3898, -0.1446, -0.7224, -0.1283, 0.0217, + 0.6097, -0.0934, 0.3045, -0.2684, 0.2040, 0.2234, 0.3884, -0.3300, + -0.4949, 0.9895, -0.7241, -0.3068, -0.3328, 0.5213, 0.0418, -0.0891, + 0.6599, -0.3454, 0.3083, 0.3161, -0.7353, -0.5203, 0.0900, 0.7819, + -0.4455, -0.2579, -0.1770, 0.5329, 0.0052, 0.1006, 0.1091, -0.1591, + 0.0947, 0.5937, -0.6986, -0.3535, -0.2971, -0.0830, 0.2296, -0.0583, + -0.5550, 0.0065, 0.4454, -0.2022, 0.0124, 0.5068, -0.7134, -0.0757, + -0.4578, -0.4611, -0.4076, -0.5882, -0.1275, 0.0725, 0.4444, 0.3409, + 0.2102, -0.1735, -0.5005, 0.2124, 0.2399, 0.0604, 0.3048, 0.2323, + -0.4227, -0.2303, -0.9566, -0.6571, -0.2358, 0.2445, 0.3673, -0.2596, + 0.7291, 0.0237, 0.0355, 0.5341, 0.6529, 0.7548, 0.0482, -0.8337, + -0.7353, 0.1694, -0.2487, 0.4523, -0.1397, -0.1567, 0.8041, 0.3133, + -0.5152, -0.9554, -0.3891, 0.1217, 0.3175, 0.0666, -0.1434, 0.2636], + device='cuda:1', requires_grad=True) +torch.Size([2304, 768]) +Parameter containing: +tensor([[ 0.0218, 0.0299, 0.0232, ..., 0.0067, -0.0137, 0.0305], + [-0.0040, -0.0037, 0.0077, ..., 0.0148, 0.0173, -0.0006], + [ 0.0255, 0.0141, 0.0116, ..., -0.0026, 0.0090, -0.0149], + ..., + [-0.0184, -0.0006, 0.0145, ..., -0.0108, 0.0039, -0.0072], + [ 0.0077, -0.0093, 0.0101, ..., 0.0183, 0.0176, -0.0251], + [-0.0074, 0.0018, -0.0270, ..., -0.0280, -0.0008, 0.0160]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([2304]) +Parameter containing: +tensor([-0.3987, 0.2491, 0.1892, ..., 0.0093, 0.0336, -0.0070], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 768]) +Parameter containing: +tensor([[ 0.0156, 0.0023, -0.0103, ..., -0.0124, 0.0080, -0.0077], + [-0.0054, 0.0075, -0.0130, ..., -0.0132, -0.0064, 0.0061], + [-0.0258, 0.0146, -0.0273, ..., 0.0131, 0.0164, 0.0133], + ..., + [-0.0032, 0.0077, 0.0094, ..., 0.0011, -0.0049, -0.0260], + [-0.0200, 0.0107, -0.0099, ..., -0.0227, -0.0027, 0.0055], + [-0.0254, 0.0017, 0.0047, ..., -0.0115, -0.0037, 0.0176]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([-7.0679e-02, -4.5776e-02, 9.1003e-02, -9.2896e-02, -1.0553e-01, + -1.0181e-01, -2.5803e-02, -1.9501e-02, 9.9060e-02, -5.9784e-02, + -9.1782e-03, 9.3689e-02, 3.3569e-02, 3.6102e-02, 2.7451e-02, + 1.0675e-01, -2.7191e-02, -5.3497e-02, 1.2787e-02, -1.2189e-01, + -3.2837e-02, 2.3331e-02, 5.8624e-02, 3.2562e-02, 4.0222e-02, + 4.9957e-02, -4.8889e-02, -1.0880e-02, 6.6757e-03, 8.3069e-02, + 3.0701e-02, -5.0720e-02, -2.2461e-02, -2.9907e-02, 3.4668e-02, + -6.8115e-02, -6.2500e-02, 6.7017e-02, 1.5381e-01, -4.2480e-02, + -2.7969e-02, 3.5065e-02, 4.3182e-02, -4.9164e-02, -5.0659e-02, + -5.5298e-02, 4.4159e-02, -7.7087e-02, -3.0991e-02, 3.7048e-02, + -2.8351e-02, -1.1823e-01, 2.7409e-03, -3.3997e-02, 1.4296e-03, + -2.3537e-03, 8.4778e-02, -2.4139e-02, -9.0179e-03, 7.6790e-03, + -6.6490e-03, 3.6346e-02, -2.0126e-02, 2.2537e-02, -4.6997e-03, + 7.3792e-02, -8.8745e-02, -2.4490e-02, -1.2347e-01, -1.2634e-01, + 2.9633e-02, -5.7564e-03, -2.0911e-01, -8.7036e-02, -2.5528e-02, + -5.9082e-02, 3.6865e-02, 6.3965e-02, -7.2632e-03, 7.9834e-02, + 4.2664e-02, 6.3232e-02, 3.9459e-02, -5.0690e-02, -8.1726e-02, + -5.8594e-02, 1.1548e-01, -4.0131e-02, 3.0319e-02, -9.5337e-02, + -7.4524e-02, -7.6843e-02, -5.9021e-02, 9.4055e-02, 2.3514e-02, + -2.0996e-02, 1.3054e-02, -7.8247e-02, -9.8022e-02, 1.9202e-01, + 5.8594e-02, -5.3040e-02, -1.1823e-01, 1.5015e-02, 1.7075e-02, + -4.1718e-02, -1.9974e-02, -2.4048e-02, 3.1525e-02, -3.2898e-02, + 3.1113e-02, 9.5459e-02, -1.1664e-01, -5.0240e-03, -5.0232e-02, + 1.6508e-03, 6.4453e-02, 4.3365e-02, -2.9495e-02, 9.2407e-02, + -3.8086e-02, 1.9445e-03, 2.4414e-02, 9.6069e-02, 7.1106e-02, + -2.1118e-02, 1.0645e-01, 9.8877e-02, 4.7058e-02, 2.4948e-02, + 3.3783e-02, -5.2826e-02, 1.4795e-01, 1.4429e-01, 2.7863e-02, + 7.3608e-02, 3.7670e-03, -1.4319e-01, 8.0566e-02, -2.8549e-02, + 1.1816e-03, 9.8145e-02, 2.5558e-02, 6.2195e-02, -1.2047e-02, + 5.1819e-02, -2.8744e-03, -1.2085e-01, -6.2988e-02, -8.5388e-02, + -3.7994e-02, 1.3962e-02, 5.8136e-02, -7.4158e-02, 1.6479e-02, + 1.1902e-01, 6.0303e-02, -3.8879e-02, -7.4951e-02, -8.5693e-02, + -3.5400e-02, 3.5919e-02, -2.8793e-02, -7.5607e-03, 5.2643e-02, + 5.1361e-02, 1.1829e-01, -6.7810e-02, 5.5786e-02, -5.8502e-02, + 1.4478e-01, -8.7830e-02, -3.9062e-02, 1.3940e-01, -1.3527e-02, + 1.5236e-02, -2.0996e-02, -2.1057e-02, 3.6255e-02, 6.7902e-04, + -7.9163e-02, -4.9629e-03, 2.2064e-02, 7.5562e-02, -1.0864e-01, + 1.3062e-01, -4.4189e-02, -5.3076e-01, 7.3792e-02, 5.9814e-02, + -1.1389e-01, -3.5919e-02, -4.1199e-02, 5.3528e-02, 3.6255e-02, + -5.6801e-03, 3.1952e-02, 4.8248e-02, -1.8665e-01, 3.3813e-02, + 4.9408e-02, -8.1558e-03, -3.0151e-02, -1.5857e-01, -3.2135e-02, + -2.4261e-03, 1.2337e-02, -3.3936e-02, -4.2603e-02, -1.1481e-01, + -1.0529e-02, 3.8886e-04, -7.4707e-02, 6.2927e-02, 7.0190e-02, + -8.8501e-02, 6.1066e-02, -7.7248e-04, 2.6871e-02, 1.0718e-01, + 5.8838e-02, 9.2102e-02, -1.8555e-02, 4.2358e-02, 2.3666e-02, + 1.4282e-01, -7.4997e-03, 3.2043e-02, 6.7749e-02, 4.3060e-02, + -6.9641e-02, -1.2589e-02, 9.8022e-02, -4.2053e-02, -2.5085e-02, + -6.2439e-02, 1.0425e-01, -7.3608e-02, -7.2449e-02, 5.0262e-02, + -1.2352e-02, 1.0391e-02, -1.8723e-02, 2.0737e-02, -9.9243e-02, + -8.9294e-02, -1.3684e-01, 1.4172e-01, -2.3300e-02, 3.2745e-02, + -8.8654e-03, 3.0685e-02, 3.9764e-02, 3.5309e-02, -4.6234e-02, + -2.1149e-02, 4.3274e-02, -4.4067e-02, -5.4352e-02, -8.4290e-02, + 2.7222e-02, -8.1421e-02, 5.6183e-02, 6.7017e-02, 3.0563e-02, + -1.0422e-02, 6.9092e-02, -5.5115e-02, 1.2091e-01, -3.0243e-02, + -8.4915e-03, -2.3682e-02, -6.2256e-02, -2.4628e-02, 4.7821e-02, + -4.0558e-02, -1.2445e-01, 7.6050e-02, -6.8420e-02, 5.0446e-02, + -1.0040e-01, -1.0150e-01, 1.3771e-02, 7.6904e-02, -1.0229e-01, + 6.4331e-02, -3.3173e-02, -3.2837e-02, 1.0582e-02, 9.0332e-02, + -1.3123e-02, -1.7441e-02, 5.4993e-02, -8.1848e-02, -1.4786e-02, + -8.4900e-02, 7.5874e-03, -1.2466e-02, -3.9246e-02, -7.0557e-02, + -4.0375e-02, -9.9060e-02, -5.7709e-02, -1.0333e-01, 4.7516e-02, + 7.1899e-02, -9.7046e-02, 1.2421e-01, -6.1462e-02, -5.6061e-02, + 4.9774e-02, -3.4271e-02, 1.1642e-02, 1.8448e-02, 3.9825e-02, + -4.1595e-02, 5.0964e-02, 5.9143e-02, -8.1665e-02, 6.6040e-02, + -1.4641e-02, -3.8696e-02, 1.3565e-02, -3.2318e-02, 1.0596e+00, + -3.0182e-02, -9.3933e-02, 7.8491e-02, -2.4429e-02, 6.4331e-02, + -5.5817e-02, 2.8839e-02, -2.0096e-02, -2.3560e-02, 4.5441e-02, + -7.0007e-02, -1.2067e-01, 7.7698e-02, -1.1734e-02, 1.5900e-02, + 6.6101e-02, -3.7964e-02, -6.2439e-02, 5.0079e-02, -6.2347e-02, + 7.5867e-02, -1.2903e-01, -6.0425e-02, -1.4717e-02, 2.4750e-02, + 8.2703e-02, -4.2084e-02, -3.1319e-03, -9.4238e-02, 4.2175e-02, + 1.9730e-02, -8.8074e-02, 5.4359e-03, -3.2288e-02, 7.5806e-02, + -2.3331e-02, 4.3915e-02, 4.5868e-02, -1.6342e-02, 1.1243e-01, + -1.0236e-01, -1.3298e-02, -1.0358e-01, 9.4299e-03, -7.5439e-02, + -7.3242e-02, 6.2195e-02, 1.0040e-01, 2.8320e-02, -2.3926e-02, + -3.8696e-02, -5.5542e-03, 2.3041e-02, -5.6702e-02, -6.7322e-02, + 5.0720e-02, -7.1045e-02, -8.0994e-02, -1.0010e-01, 5.1483e-02, + -8.4839e-02, 5.9662e-02, 1.0419e-01, 6.5002e-02, -1.2711e-02, + 7.4646e-02, -1.2207e-01, 3.7170e-02, 4.0497e-02, -1.7624e-02, + -2.0355e-02, -6.6406e-02, -5.2887e-02, 4.9782e-03, 5.8350e-02, + -5.6641e-02, 5.7526e-02, -3.4393e-02, -6.4453e-02, 4.7150e-02, + 2.0538e-02, 5.0598e-02, -1.2779e-02, -7.1533e-02, 1.0449e-01, + -1.6251e-02, -8.4412e-02, 7.5195e-02, 5.9601e-02, 6.1310e-02, + 2.2903e-02, -2.9999e-02, -3.6041e-02, 6.2469e-02, 5.2399e-02, + 7.8247e-02, 3.4546e-02, -4.0344e-02, 2.4460e-02, 7.3910e-04, + 7.0984e-02, 9.1003e-02, 7.1640e-03, -2.1863e-01, 3.0319e-02, + -3.5057e-03, -1.6678e-02, -1.0938e-01, -1.0541e-01, -1.1330e-02, + -1.3440e-01, 7.4692e-03, 6.9336e-02, -1.3794e-01, -2.7069e-02, + 1.2329e-02, 7.8247e-02, -6.4011e-03, -2.4475e-02, -8.3740e-02, + 1.9165e-02, 8.7585e-02, 9.7229e-02, -2.7100e-02, -8.3618e-03, + -2.2491e-02, -2.1179e-02, 1.0597e-02, -1.0992e-01, -6.5979e-02, + -6.6650e-02, 5.6244e-02, 3.7109e-02, 1.0767e-01, 9.8938e-02, + -6.0822e-02, -3.7079e-02, -1.0614e-03, 4.0412e-04, -3.8300e-02, + 1.0895e-01, -1.0541e-01, 1.1792e-01, 2.0264e-02, 6.9284e-04, + 4.5929e-02, 5.2910e-03, -5.2490e-02, 4.3701e-02, 5.1880e-02, + 7.4615e-03, -9.7885e-03, -3.8330e-02, -1.2659e-01, -8.4656e-02, + -1.2488e-01, 5.0110e-02, 2.5848e-02, -4.6043e-03, 4.7913e-02, + -6.5857e-02, 5.9387e-02, 3.1830e-02, 1.8906e-02, 7.4291e-04, + -1.1169e-02, -1.8936e-02, 1.2366e-01, 7.6294e-02, 3.8849e-02, + -1.0938e-01, 3.0029e-02, -5.6152e-02, 1.4809e-02, -6.6345e-02, + 1.0468e-01, 3.8574e-02, 7.0992e-03, -1.4160e-02, 9.8572e-02, + 8.5266e-02, -5.5328e-02, 7.7148e-02, -1.0376e-02, 4.1046e-03, + -1.2915e-01, 6.7871e-02, 9.3231e-03, -1.0138e-01, 1.3443e-02, + 4.5013e-02, -3.8385e-05, -3.0212e-02, -1.8784e-02, 1.2354e-01, + 6.4819e-02, 3.2310e-03, -5.0873e-02, -4.9095e-03, -5.7465e-02, + 3.1525e-02, -2.1057e-03, 5.0720e-02, -4.4495e-02, -1.9178e-03, + 5.1392e-02, 6.4514e-02, -9.3155e-03, 1.0876e-01, -4.2305e-03, + -1.2292e-01, 9.3994e-02, -1.7920e-03, 4.2694e-02, 1.6922e-02, + 4.1565e-02, -3.9764e-02, -5.7770e-02, 1.2360e-02, 7.4097e-02, + 8.1909e-02, -4.5166e-02, -5.3833e-02, -7.1869e-03, -9.4528e-03, + 4.8981e-02, -2.7710e-02, 1.0260e-01, -1.3412e-02, 1.1505e-02, + 6.5247e-02, 1.0474e-01, -2.1515e-02, -3.8239e-02, -1.0846e-01, + 5.4108e-02, -2.1985e-01, -3.7720e-02, 4.5532e-02, -8.8135e-02, + -2.5528e-02, -1.0339e-01, -7.7393e-02, -1.1940e-03, -5.2185e-02, + -6.1951e-02, 2.1973e-02, 3.8452e-02, -4.7058e-02, 2.1606e-02, + 2.3384e-03, 1.9165e-02, -2.5101e-02, -3.4466e-03, 9.0942e-02, + 1.3977e-01, -1.2122e-01, -8.9294e-02, 9.1370e-02, 1.8951e-02, + -8.0185e-03, 1.6144e-02, -1.4145e-02, 6.8848e-02, -1.4801e-02, + -3.5522e-02, -4.9561e-02, 2.6398e-02, -1.1346e-01, -3.8662e-03, + 1.0541e-01, -1.7624e-02, -2.3285e-02, 7.0251e-02, 6.9542e-03, + 3.5156e-02, 1.2573e-01, -4.9438e-02, -7.5317e-02, -3.8971e-02, + -6.6223e-02, -2.0905e-02, -6.0944e-02, -6.0944e-02, 1.3611e-01, + 3.0319e-02, 7.3853e-02, -7.8812e-03, 7.2327e-02, 3.3951e-03, + 9.3140e-02, 1.0571e-01, 3.3150e-03, -3.7262e-02, 1.6083e-02, + 7.7705e-03, 3.0014e-02, -1.8890e-02, 7.6599e-02, -5.6793e-02, + 6.9336e-02, 5.2399e-02, 3.0533e-02, -3.6560e-02, -1.0094e-02, + -5.7037e-02, 1.3550e-01, 4.0497e-02, -5.1819e-02, -1.9897e-02, + -6.3965e-02, 1.9617e-01, -1.3374e-02, -7.1350e-02, 2.0462e-02, + 4.1779e-02, -9.9060e-02, -8.6670e-02, 4.7607e-02, -1.2292e-01, + 7.4402e-02, -9.5764e-02, -1.0065e-01, 4.4739e-02, -3.7598e-02, + -1.0968e-01, -1.5881e-01, 1.7334e-02, -9.2651e-02, 1.0797e-01, + 9.8267e-02, 4.5052e-03, -2.0615e-02, -7.7393e-02, -3.9635e-03, + 5.2277e-02, 6.7932e-02, 1.3293e-01, 1.8339e-03, -7.7454e-02, + -2.6428e-02, 1.0175e-01, 5.3864e-02, -5.4901e-02, -5.7068e-03, + -7.3486e-02, -8.3679e-02, -4.8523e-02, 4.5258e-02, 2.5436e-02, + -5.6793e-02, 4.0558e-02, 6.2744e-02, 5.5817e-02, 5.1941e-02, + -6.1279e-02, 2.9028e-01, -1.9169e-03, -8.4229e-02, -4.5868e-02, + 3.0945e-02, 6.5552e-02, 4.5898e-02, 6.6589e-02, -4.1199e-02, + -6.7444e-02, 3.7804e-03, 1.2665e-02, 1.0422e-02, -9.9976e-02, + -2.8168e-02, 3.8586e-03, -2.4567e-02, -9.8328e-02, 3.9581e-02, + 7.5562e-02, -7.6180e-03, 2.4734e-02, 3.1235e-02, -7.0801e-02, + 1.0535e-01, -1.2085e-02, 7.4844e-03, 1.0841e-02, -1.1749e-01, + 5.3467e-02, 1.4549e-02, -5.8289e-02, -1.5213e-02, 1.1957e-01, + 9.1125e-02, 1.3904e-01, 1.0822e-01, -4.1321e-02, 5.4565e-02, + 8.5083e-02, -7.4341e-02, 1.5030e-02, -3.3478e-02, -2.0203e-02, + 2.5604e-02, 3.6987e-02, -1.3008e-02, 6.3232e-02, -7.7454e-02, + -1.4111e-01, -8.8074e-02, -3.7506e-02, -4.3671e-02, 8.7402e-02, + 4.4617e-02, 4.7424e-02, -1.4565e-02, -1.0992e-01, 1.0181e-01, + -9.0637e-02, 1.2854e-01, 6.5369e-02, 2.7649e-02, -2.2308e-02, + -2.0157e-02, 6.4331e-02, -3.9062e-03, 2.3636e-02, 2.3666e-02, + 1.4148e-01, -2.2064e-02, 2.6810e-02, -1.4656e-02, -6.7688e-02, + -4.1504e-02, -2.7832e-02, -4.6509e-02, 4.8615e-02, 6.4087e-02, + 9.7504e-03, -1.4923e-02, -2.2110e-02, 8.0444e-02, -2.2034e-02, + -2.3918e-03, -3.1235e-02, 2.3834e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([1.6302, 1.7985, 1.6646, 1.6736, 1.6246, 1.7435, 1.6344, 1.5951, 1.7835, + 1.7151, 1.7555, 1.7396, 1.7075, 1.6981, 1.6234, 1.7331, 1.7033, 1.7175, + 1.6565, 0.4716, 1.7354, 1.6599, 1.7952, 1.7343, 1.7564, 1.7225, 1.7034, + 1.7112, 1.7357, 1.6902, 1.7430, 1.6893, 1.7471, 1.7559, 1.6642, 1.6995, + 1.8257, 1.6706, 1.7690, 1.5814, 1.7597, 1.6698, 1.7122, 1.8090, 1.8079, + 1.6904, 1.7110, 1.7207, 1.6608, 1.6236, 1.7413, 1.7274, 1.7435, 1.7000, + 1.8054, 1.8643, 1.7005, 1.6365, 1.7551, 1.7265, 1.7312, 1.6578, 1.7392, + 1.7119, 1.6265, 1.6944, 1.6835, 1.6661, 1.7137, 1.6735, 1.7496, 1.7183, + 1.8942, 1.6574, 1.7160, 1.6588, 1.6898, 1.6397, 1.6588, 1.7767, 1.7230, + 1.7033, 1.7254, 1.6983, 1.6907, 1.6416, 1.7969, 1.7425, 1.7828, 1.7353, + 1.7485, 1.6252, 1.7266, 1.7510, 1.7018, 1.6580, 1.7274, 1.6662, 1.6723, + 1.6758, 1.6462, 1.8060, 1.6850, 1.6715, 1.5962, 1.8119, 1.7262, 1.6277, + 1.8163, 1.7033, 1.6744, 1.7379, 1.6901, 1.7661, 1.6420, 1.7525, 1.6696, + 1.6702, 1.7037, 1.6573, 1.7341, 1.7748, 1.6981, 1.6995, 1.7189, 1.6223, + 1.6765, 1.7105, 1.7716, 1.7087, 1.7001, 1.7687, 1.8099, 1.7073, 1.6350, + 1.7793, 1.6807, 1.7039, 1.6787, 1.6660, 1.7035, 1.7318, 1.7495, 1.7426, + 1.7035, 1.7014, 1.7568, 1.7150, 1.7569, 1.7416, 1.6580, 1.7700, 1.7185, + 1.7293, 1.7545, 1.4643, 1.7184, 1.7745, 1.7545, 1.7200, 1.6514, 1.6298, + 1.6485, 1.7160, 1.6818, 1.6670, 1.7770, 1.7118, 1.7166, 1.7775, 1.7387, + 1.6928, 1.6731, 1.6660, 1.7970, 1.7641, 1.7415, 1.6976, 1.6877, 1.7292, + 1.7794, 1.6900, 1.7106, 1.7634, 1.7742, 1.5609, 1.6714, 2.4107, 1.7411, + 1.7388, 1.7277, 1.6532, 1.7926, 1.7386, 1.7353, 1.7197, 1.6594, 1.7166, + 2.9263, 1.6523, 1.6338, 1.6537, 1.6623, 1.8019, 1.7601, 1.7041, 1.5999, + 1.6747, 1.6999, 1.7111, 1.7555, 1.6907, 1.7676, 1.6870, 1.7656, 1.6681, + 1.6671, 1.7032, 1.6807, 1.6889, 1.7178, 1.7326, 1.6222, 1.6898, 1.7453, + 1.6968, 1.7163, 1.7438, 1.7356, 1.7753, 1.7192, 1.6955, 1.7155, 1.6026, + 1.7237, 1.7517, 1.6404, 1.6831, 1.6877, 1.7579, 1.7780, 1.6317, 1.8032, + 1.6840, 1.6494, 1.7386, 1.6764, 1.7246, 1.7601, 1.7989, 1.7198, 1.7053, + 1.7228, 1.7097, 1.8476, 1.6812, 1.6919, 1.7276, 1.6427, 1.6771, 1.6796, + 1.6661, 1.7377, 1.7073, 1.7061, 1.6469, 1.7119, 1.6620, 1.7859, 1.6810, + 1.6796, 1.7163, 1.6304, 1.6605, 1.7343, 1.6836, 1.6586, 1.6460, 1.7383, + 1.6565, 1.6995, 1.6848, 1.6886, 1.7302, 1.6319, 1.7084, 1.6455, 1.6820, + 1.7227, 1.6099, 1.6801, 1.7885, 1.7595, 1.7140, 1.7157, 1.8043, 1.6373, + 1.7648, 1.7183, 1.7589, 1.7482, 1.7072, 1.6913, 1.7318, 1.7578, 1.7108, + 1.6067, 1.7158, 1.7523, 1.6791, 1.7145, 1.7314, 1.7042, 1.7376, 1.6796, + 1.8628, 1.7177, 1.7367, 1.6759, 1.7337, 1.7052, 1.6002, 1.7024, 1.7880, + 1.0642, 1.6845, 1.7041, 1.6755, 1.6165, 1.7929, 1.6193, 1.6957, 1.6715, + 1.5950, 1.6571, 1.6822, 1.6901, 1.7022, 1.7024, 1.7112, 1.6735, 1.6967, + 1.6778, 1.6575, 1.7305, 1.8147, 1.6804, 1.6963, 1.6868, 1.7943, 1.5572, + 1.6860, 1.7039, 1.7471, 1.6538, 1.6997, 1.7835, 1.8064, 1.7628, 1.7780, + 1.6053, 1.6869, 1.7610, 1.7246, 1.7329, 1.7138, 1.6687, 1.6806, 1.6728, + 1.6817, 1.7294, 1.7214, 1.7234, 1.7492, 1.6789, 1.7357, 1.7378, 1.7139, + 1.7068, 1.7499, 1.6821, 1.7469, 1.6772, 1.6623, 1.6863, 1.7077, 1.6838, + 1.6427, 1.6845, 1.6781, 1.7001, 1.6867, 1.6507, 1.7201, 1.7097, 1.7876, + 1.6753, 1.7640, 1.7407, 1.7754, 1.6692, 1.7506, 1.7908, 1.7180, 1.7826, + 1.7048, 1.7631, 1.7375, 1.7276, 1.6960, 1.6728, 1.6715, 1.6916, 1.6660, + 1.8672, 2.4371, 1.7775, 1.7589, 1.6740, 1.6990, 1.6844, 1.7150, 1.6511, + 1.7197, 1.7364, 1.6682, 1.6788, 1.7742, 1.7689, 1.6509, 1.7058, 1.5937, + 1.6183, 1.7320, 1.6366, 1.7162, 1.6393, 1.6547, 1.6122, 1.6934, 1.7644, + 1.7195, 1.7261, 1.5334, 1.6655, 1.7531, 1.6899, 1.7284, 1.7253, 1.7140, + 1.7599, 1.7455, 1.6877, 1.7144, 1.7063, 1.7585, 1.6483, 1.7034, 1.7872, + 1.6436, 1.6991, 1.7315, 1.6869, 1.7266, 1.6941, 1.6680, 1.7382, 1.7115, + 1.6838, 1.7406, 1.7487, 1.7284, 1.6998, 1.7120, 1.7665, 1.7601, 1.7432, + 1.6692, 1.6749, 1.7034, 1.7852, 1.6845, 1.7553, 1.6064, 1.6902, 1.6817, + 1.7528, 1.7495, 1.6978, 1.7445, 1.7844, 1.7044, 1.6215, 1.8064, 1.6947, + 1.7256, 1.7462, 1.6445, 1.7153, 1.7068, 1.6898, 1.7033, 1.6971, 1.6012, + 1.7222, 1.6898, 1.6337, 1.6189, 1.7169, 1.6581, 1.6849, 1.7840, 1.7095, + 1.7210, 1.6652, 1.6866, 1.7299, 1.7457, 1.6161, 1.6876, 1.6810, 1.6921, + 1.7402, 1.6973, 1.6989, 1.7322, 1.6970, 1.6480, 1.6529, 1.7896, 1.7318, + 1.7364, 1.6680, 1.7424, 1.7295, 1.6991, 1.7154, 1.7562, 1.7087, 1.8444, + 1.7505, 1.7476, 1.6686, 1.7369, 1.7072, 1.7248, 1.6937, 1.7070, 1.6945, + 1.7787, 1.7125, 1.7263, 1.7831, 1.6013, 1.7178, 1.6786, 1.7872, 1.6886, + 1.8013, 1.6767, 1.8089, 1.5642, 1.7826, 1.7168, 1.7179, 1.7068, 1.7395, + 1.6109, 1.8325, 1.7497, 1.7236, 1.6414, 1.7245, 1.6947, 1.7587, 1.6420, + 1.6678, 1.7057, 1.6923, 1.7012, 1.6248, 1.7218, 1.7242, 1.6999, 1.6844, + 1.7597, 1.7158, 1.7671, 1.7628, 1.7141, 1.7269, 1.7585, 1.7083, 1.7444, + 1.7899, 1.7588, 1.7001, 1.7858, 1.6202, 1.7109, 1.7387, 1.7646, 1.8021, + 1.6163, 1.7132, 1.7312, 1.8329, 1.9605, 1.6433, 1.7724, 1.7457, 1.6874, + 1.7289, 1.7210, 1.7764, 1.7046, 1.6804, 1.7570, 1.7835, 1.6932, 1.7622, + 1.7469, 1.6904, 1.6917, 1.6915, 1.7520, 1.7008, 1.6287, 1.7597, 1.6486, + 1.6401, 1.7387, 1.6101, 1.5982, 1.7633, 1.7166, 1.7213, 1.7841, 1.6996, + 1.6279, 1.6920, 1.7200, 1.7078, 1.6533, 1.5925, 1.7321, 1.7587, 1.6523, + 1.7181, 1.6851, 1.7170, 1.7081, 1.7212, 1.7277, 1.7332, 1.8115, 1.6485, + 1.7706, 1.6613, 1.6623, 1.6581, 1.7324, 1.6186, 1.7720, 1.6911, 1.7587, + 1.7309, 1.6524, 1.7619, 1.6940, 1.6903, 1.7228, 1.7862, 1.6758, 1.6580, + 1.6437, 1.7973, 1.6737, 1.7409, 1.6853, 1.7406, 0.3935, 1.6961, 1.6231, + 1.7050, 1.5714, 1.6899, 1.7399, 1.6476, 1.6983, 1.7059, 1.6509, 1.7344, + 1.6900, 1.7586, 1.6770, 1.6974, 1.6979, 1.6864, 1.7182, 1.7086, 1.6912, + 1.7265, 1.7103, 1.7262, 1.7304, 1.6552, 1.6509, 1.6989, 1.7162, 1.7130, + 1.7572, 1.7408, 1.7483, 1.6416, 1.6755, 1.7099, 1.6839, 1.7110, 1.6536, + 1.6486, 1.7170, 1.6966, 1.6698, 1.7294, 1.7431, 1.5646, 1.7819, 1.7805, + 1.7427, 1.7266, 1.7108, 1.7271, 1.6740, 1.6893, 1.7412, 1.6017, 1.7220, + 1.7766, 1.7968, 1.7975, 1.7024, 1.7743, 1.6766, 1.6991, 1.7311, 1.7651, + 1.7055, 1.6984, 1.7207, 1.6710, 1.6696, 1.7045, 1.8250, 1.7300, 1.6860, + 1.7420, 1.7058, 1.7018, 1.7314, 1.8237, 1.7121, 1.6656, 1.6345, 1.6813, + 1.6482, 1.6091, 1.6466], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 1.3478e-01, 4.9497e-02, -1.9917e-01, -2.5184e-02, 1.8369e-01, + 1.4607e-01, 1.4084e-01, 6.8528e-02, -1.5947e-01, -6.2207e-02, + 7.6121e-02, 3.2098e-03, -1.8940e-01, -4.3906e-02, -1.2922e-01, + -4.1989e-02, -2.0775e-02, -6.5565e-03, 7.1063e-05, 4.6815e+00, + -3.8654e-02, 1.2217e-01, 4.5751e-02, 1.4584e-02, -3.0041e-02, + -1.6267e-02, 6.6473e-02, 1.1673e-02, 6.2238e-02, -2.7864e-02, + 6.8211e-02, 4.2821e-02, -2.0303e-02, 1.4535e-01, 7.2629e-02, + -9.8829e-02, 1.5733e-02, 1.1677e-02, -1.0973e-01, 1.0373e-01, + 5.3048e-02, 4.4230e-02, -1.5395e-02, 2.9456e-02, -2.1916e-02, + -1.3948e-02, 7.3127e-02, -2.4933e-02, 8.8659e-02, 9.9079e-02, + -5.0301e-02, -5.6772e-02, 7.0892e-02, -7.0100e-02, 2.1891e-02, + 1.0014e-01, -4.8898e-02, -1.2617e-01, 4.7462e-02, 5.8843e-02, + 7.5138e-02, -7.0855e-02, 8.9971e-02, 3.2970e-02, 8.8290e-02, + 5.7166e-03, 1.2110e-01, 1.0937e-01, -2.0420e-02, 2.1111e-01, + 1.3070e-01, 9.3519e-02, 7.5941e-01, 1.3822e-01, -1.2845e-01, + 7.7136e-02, 5.3268e-02, -1.1566e-01, -9.7905e-03, 6.5708e-02, + -5.1334e-04, 1.6957e-02, -7.0916e-02, 1.5199e-01, -1.2595e-02, + -4.9605e-02, 1.3342e-01, 1.0708e-01, -3.1504e-02, 5.3242e-03, + -2.3024e-02, 2.6981e-02, 4.4238e-02, -6.3475e-02, -1.1366e-01, + -2.3226e-02, 3.2116e-02, -1.0859e-01, -1.1022e-01, 3.6969e-02, + 3.9728e-02, -7.8961e-02, -6.6003e-02, -3.1561e-02, 6.1650e-02, + 8.0293e-03, 6.8628e-02, -4.7751e-02, 6.1943e-04, -4.6869e-02, + -6.5288e-02, -4.3023e-02, -5.4249e-02, 5.9192e-02, 1.2167e-01, + -5.0382e-02, -5.4665e-02, -1.0046e-02, 1.7697e-02, 1.4857e-01, + -6.6598e-02, -2.7876e-01, -1.4727e-01, 2.0667e-02, -1.5421e-01, + -4.0336e-02, -1.7894e-01, 7.5444e-02, 2.9734e-02, -6.8246e-02, + -1.3805e-01, 2.5628e-02, -5.8404e-02, -1.9855e-02, -1.0818e-01, + 5.1029e-02, -2.3740e-02, -2.3311e-02, -4.2155e-02, -6.1693e-02, + -8.1013e-04, 2.0931e-02, 2.0602e-02, -3.5260e-02, 1.0996e-01, + -3.2859e-02, -1.4331e-02, -3.7377e-03, -1.2704e-01, 1.7471e-01, + 2.6021e-01, -8.9699e-02, -1.3109e-01, -1.8212e-02, -3.0574e-02, + -5.7635e-01, 1.9360e-02, -5.0405e-03, -3.5920e-02, 4.8079e-02, + 1.9668e-01, -1.7959e-01, -2.4585e-02, 2.3465e-02, 1.3495e-02, + 2.6305e-02, -4.9910e-02, 7.2364e-02, -3.3804e-02, 4.1981e-02, + -3.9400e-02, 9.3760e-02, 1.3973e-03, -1.1626e-03, 7.6308e-02, + -1.9463e-01, -4.3892e-02, -1.6021e-02, -1.9729e-02, 6.7893e-02, + -6.5787e-02, -5.7712e-02, 1.4709e-01, 6.0649e-02, 4.0690e-02, + -1.8589e-01, 4.2896e-02, 7.1578e-01, -7.9182e-02, -8.3033e-02, + -8.5046e-02, -1.4351e-01, 9.7572e-02, 1.8969e-01, -8.5853e-02, + 1.7470e-02, -3.4782e-02, -1.6035e-01, 3.4463e-01, 6.4275e-02, + -2.2809e-01, -5.6076e-02, -2.3670e-02, 8.6391e-02, 4.4294e-02, + 1.7742e-01, -1.4438e-01, 1.0222e-01, 1.5188e-01, -6.5791e-02, + 4.2606e-02, 2.7326e-01, -3.9982e-02, -6.7293e-02, -2.9540e-02, + 2.9845e-02, -4.9459e-02, 4.1518e-02, -1.2998e-01, -2.7670e-01, + 5.9885e-03, -2.6303e-02, 1.0029e-01, 6.5153e-02, 2.8001e-03, + -2.1397e-02, 8.1904e-02, 1.3961e-02, 1.6888e-01, -4.7473e-02, + -2.2614e-02, 7.6486e-02, 1.7701e-03, 7.7329e-02, -1.2749e-02, + 1.8841e-02, -1.0814e-02, 9.6482e-02, 7.9134e-02, 1.1832e-02, + 9.7106e-02, 1.4173e-01, -3.1437e-02, 7.6158e-02, 2.8705e-02, + 9.3654e-03, 5.6769e-02, -3.9432e-02, 8.9598e-03, 3.7227e-02, + -2.9699e-02, 2.5160e-02, 1.2401e-01, -1.3756e-01, 1.6073e-01, + -7.9136e-02, -1.2519e-02, -1.4151e-01, 5.5290e-02, 2.9537e-02, + -6.5784e-02, -1.6376e-02, -9.2159e-03, -4.8453e-03, -5.9228e-02, + 4.9723e-02, -3.6096e-02, -1.2492e-02, -6.2044e-02, -7.8530e-02, + -6.7506e-03, 5.7670e-02, 9.7309e-02, -1.9286e-01, -2.4384e-02, + 8.4430e-02, -8.3274e-02, 1.8163e-02, 9.2198e-02, -1.4828e-01, + 1.1023e-01, 2.1166e-01, -5.7077e-02, 7.9818e-02, 3.2513e-02, + 7.4148e-02, 6.8250e-02, -1.0043e-02, 2.2324e-01, -1.5617e-01, + -9.4417e-02, 1.2959e-02, 1.2156e-01, 5.6972e-02, 9.1363e-02, + -5.3003e-02, 2.4945e-01, -3.0141e-02, 1.0665e-01, 6.6269e-02, + 5.7760e-02, 8.7219e-02, -2.8721e-02, 1.3285e-01, 1.1745e-02, + 2.0377e-02, 4.3067e-02, -9.6992e-02, 3.5457e-02, -7.1593e-02, + 8.9201e-02, 3.3648e-02, -4.9995e-02, -8.9329e-02, 1.1568e-02, + -1.2160e-02, 2.4013e-02, 1.0378e-01, 2.1693e-01, -2.2300e-02, + -1.3594e-01, 2.4443e-01, -8.2670e-02, -3.2104e-02, -8.0759e-01, + -2.5522e-01, 3.1634e-02, 1.2173e-02, 5.3848e-02, -1.6714e-01, + 2.2496e-01, -2.5352e-02, 1.1204e-01, -6.9995e-02, 1.4671e-01, + 7.5391e-02, 1.2656e-02, -7.6575e-04, -1.0763e-02, -3.7951e-02, + -8.5652e-02, 1.6227e-03, -1.0500e-01, -6.3366e-02, 1.4564e-01, + 2.8785e-02, 7.8663e-02, 1.4579e-01, 5.0819e-02, -4.8727e-02, + -6.2212e-02, 8.5918e-02, 1.3938e-02, 1.6487e-01, -5.1246e-02, + -7.7145e-02, -6.3459e-02, -6.9927e-05, -8.5017e-02, -7.5265e-02, + -1.3347e-02, -4.1582e-02, -1.3234e-03, -1.2145e-01, -6.8723e-03, + -3.2144e-02, 5.8249e-02, 1.8110e-01, 1.0470e-01, 1.0201e-01, + 8.5427e-02, -1.7104e-02, -2.0914e-02, -4.8454e-02, 9.2019e-03, + 4.8977e-02, 9.0650e-02, 1.0401e-01, -4.5989e-02, -2.2640e-02, + -1.0117e-01, 1.3499e-01, 4.5189e-02, 7.2833e-03, -5.6661e-02, + -3.6711e-02, -2.9658e-02, -7.0319e-02, -9.1909e-02, -2.3429e-02, + 6.0404e-03, -4.5523e-02, -3.0660e-02, -3.0381e-02, 1.5182e-02, + -7.5460e-03, -1.4732e-02, -4.3778e-03, -4.7716e-02, 1.3978e-02, + 9.6924e-02, -6.2953e-03, 7.1078e-02, -1.9884e-02, -1.0224e-01, + -1.7645e-01, -6.0434e-02, 7.0719e-02, 1.1863e-02, -1.4188e-02, + -8.5998e-02, 8.1130e-02, 5.8946e-02, -8.1932e-02, -1.1164e-02, + -9.0257e-01, 2.2129e-02, 5.9473e-02, -6.6727e-02, -3.7941e-02, + 7.0465e-03, -1.1116e-02, 5.4991e-02, -1.0147e-01, -1.0370e-01, + 5.7779e-02, -1.7909e-01, -1.2021e-01, 7.5867e-02, -2.8435e-02, + 2.3324e-02, -3.7433e-02, -2.6374e-02, 1.0537e-01, 1.8956e-01, + -1.8281e-02, 9.0064e-03, 9.5987e-02, 2.0872e-01, -1.0442e-01, + -1.9331e-02, -3.1765e-02, -7.1338e-02, 3.3094e-02, 1.6677e-01, + -1.3918e-01, 3.8584e-02, -1.0807e-01, 1.3928e-01, -2.9507e-02, + 1.3837e-02, -1.9234e-02, -1.1115e-01, 2.1526e-02, 5.8286e-02, + -3.8775e-02, -5.9076e-03, -2.4913e-02, -6.5648e-02, -5.4430e-02, + -3.0729e-02, 9.4677e-02, -1.9277e-01, 5.0177e-02, -2.1330e-02, + -4.6595e-02, 2.3514e-02, -7.6106e-02, 9.4801e-02, -1.6368e-02, + -2.9171e-02, -1.3751e-01, -7.4673e-02, 3.7233e-02, -2.6263e-02, + 6.3678e-02, 1.6571e-01, 1.0001e-02, -3.4022e-02, 8.4699e-02, + 7.3399e-02, -1.3803e-01, 6.7760e-02, 1.6934e-01, -7.5010e-02, + 1.6691e-02, 7.5057e-03, 1.0972e-01, 7.7287e-02, -2.8700e-02, + 1.3740e-02, 1.0455e-02, -2.6215e-02, 7.3498e-02, -1.2539e-01, + -1.8040e-02, -1.8961e-01, 1.2432e-01, -6.3336e-02, 9.7975e-02, + -4.1474e-02, -1.3091e-01, 2.9420e-02, 8.7890e-03, -3.5217e-02, + 4.1313e-02, 1.0067e-01, 9.6062e-02, 4.5217e-02, 1.1410e-05, + -9.3742e-02, -2.5187e-02, -1.3336e-01, 7.0206e-02, -3.5455e-02, + -3.5793e-02, 6.1289e-02, 1.0714e-01, 9.0462e-02, -6.6959e-02, + -5.5268e-02, -2.4237e-02, 9.0868e-02, -2.7395e-04, -6.6102e-02, + -1.4149e-02, 2.2778e-02, -2.2358e-02, 1.6611e-02, -1.3722e-01, + -1.0803e-03, 9.2254e-03, 9.3046e-04, -9.5271e-02, 7.2699e-03, + -1.5842e-02, -6.4155e-02, 2.2247e-02, -5.4963e-03, -1.6438e-02, + -1.1363e-02, 3.0167e-02, -5.5412e-02, 1.5861e-01, 3.8205e-02, + 1.0428e-01, 1.4365e-01, 5.1521e-03, 9.7592e-02, 7.0424e-02, + -5.2993e-03, 6.1979e-02, -1.1234e-01, 1.3335e-01, -3.8977e-03, + 1.5236e-03, 5.9121e-02, -4.6393e-02, -7.4197e-03, 1.5965e-01, + -7.6934e-02, 2.0333e-01, 4.7435e-02, 1.1154e-01, 1.3193e-01, + 1.2094e-01, -2.3863e-02, 1.9543e-02, 2.3810e-02, 2.4418e-02, + 7.7090e-02, 6.6509e-02, -1.1898e-02, -3.2068e-02, 9.7540e-02, + 8.0355e-02, 2.5759e-02, -1.3683e-02, -4.1099e-02, -6.9798e-02, + -1.1555e-01, -9.3035e-02, 9.1930e-02, 1.1443e-01, 1.3131e-01, + -7.3821e-03, 2.6003e-02, -1.0138e-01, 1.0635e-01, 8.4075e-02, + -6.2905e-02, 7.4726e-02, 6.4077e-02, -7.1340e-02, 5.6060e-02, + -1.7636e-02, 1.4522e-02, 6.7943e-02, -4.5894e-02, 8.4953e-02, + -5.2461e-02, -1.8256e-01, -6.3344e-03, -1.4413e-02, -3.7308e-02, + -2.5780e-02, 5.6539e-02, 4.2219e-02, 1.4163e-01, -3.2828e-02, + 3.6561e-02, 6.6173e-02, -7.5909e-02, 1.7378e-02, -8.9888e-02, + -1.4279e-01, 7.4994e-02, 1.5726e-01, 3.1206e-02, -9.0772e-02, + 6.8410e-02, 2.9224e-02, 4.4098e-02, -1.3920e-02, 9.1034e-02, + -4.6151e-02, -5.6685e-02, 5.4332e-02, 7.8448e-02, -1.0195e-01, + 1.2775e-01, -1.4901e-02, 1.1812e-01, 1.7138e-01, 7.7575e-02, + -1.0953e-02, -4.2166e-02, -3.0236e-04, 1.1864e-01, -1.0792e-01, + 5.7050e-02, -6.2270e-02, 9.0958e-02, -6.9540e-02, 9.6204e-02, + 4.0174e-02, -1.6757e-02, 1.3001e-01, 1.4080e-02, 2.0438e-01, + 1.7934e-01, -1.0523e-02, -4.7175e-02, 1.0229e-01, -6.7819e-02, + 7.1270e-02, -9.5283e-02, -5.8948e-02, 1.1191e-01, 1.2083e-01, + -1.7280e-02, 1.4690e-02, -1.0733e-01, -7.0945e-03, 1.0890e-01, + -4.4923e-02, -1.9094e-01, -1.7491e-01, 5.1405e-03, -3.6598e-02, + -2.1371e-01, 6.9781e-02, 8.6824e-02, 7.3522e-02, 5.1444e-03, + 2.4714e-02, -3.9927e-02, -3.7512e-02, 5.3088e-03, 3.9691e-02, + -2.1449e-02, -4.3316e+00, -2.5358e-02, 5.2166e-02, -3.0075e-02, + -1.6318e-01, 2.6883e-02, -7.9553e-02, 3.8043e-02, 8.0910e-02, + -1.0636e-01, 2.9228e-02, -3.2607e-02, -2.1576e-03, -1.1461e-01, + 2.1159e-01, -8.1155e-03, -4.2389e-02, 4.4097e-02, -4.2229e-02, + 1.5469e-01, 8.3980e-02, 2.8703e-02, -1.6500e-02, -1.6388e-01, + -4.1705e-02, 8.7547e-02, -3.8063e-02, -5.4893e-02, 6.7877e-02, + -1.8283e-02, 1.2425e-01, 9.7803e-02, -5.3695e-03, 6.0873e-02, + -4.2723e-02, 1.6263e-02, 4.4332e-02, -8.6944e-02, -4.4148e-02, + -4.8852e-02, -1.3835e-02, 2.1686e-02, -2.0245e-01, 9.2224e-02, + -3.2831e-02, -2.5258e-01, 1.2468e-01, 5.1174e-02, 1.4680e-01, + 3.0071e-02, 6.5667e-02, 1.4609e-01, 1.1543e-01, 3.1377e-02, + 5.9735e-02, -5.4245e-02, 2.8447e-02, -9.4915e-02, -1.0815e-01, + 7.3491e-02, -1.7688e-01, -1.1766e-01, 8.3479e-02, 8.7339e-04, + -4.7636e-02, -1.5777e-01, 3.0539e-02, 1.1327e-01, -7.4810e-03, + -1.7076e-03, -1.1211e-01, -8.4320e-02, -1.8981e-02, 1.2201e-01, + 3.7016e-02, 2.1191e-02, 1.3702e-02, 2.2009e-02, -3.0147e-02, + -4.6553e-03, 5.7443e-02, 1.1077e-01, -1.2032e-01, -1.3606e-01, + -4.3071e-02, 1.1801e-01, -1.3836e-02], device='cuda:1', + requires_grad=True) +torch.Size([3072, 768]) +Parameter containing: +tensor([[ 0.0171, 0.0130, 0.0185, ..., 0.0060, -0.0017, -0.0105], + [-0.0364, 0.0154, -0.0142, ..., 0.0086, 0.0068, 0.0017], + [-0.0113, -0.0242, 0.0130, ..., 0.0225, 0.0031, -0.0107], + ..., + [ 0.0166, 0.0227, -0.0178, ..., -0.0059, 0.0132, -0.0079], + [ 0.0282, 0.0054, 0.0172, ..., 0.0009, 0.0006, 0.0101], + [ 0.0041, 0.0177, -0.0183, ..., 0.0003, 0.0102, 0.0056]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.2388, -0.3625, -0.0865, ..., -0.3342, -0.2629, -0.1206], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 3072]) +Parameter containing: +tensor([[ 1.4420e-02, -3.2654e-02, -8.9569e-03, ..., 8.2855e-03, + 1.0498e-02, -1.5457e-02], + [-7.6370e-03, -2.0157e-02, 1.2436e-02, ..., 4.3762e-02, + 3.8452e-02, -2.3422e-02], + [ 1.2445e-04, 2.7905e-03, -6.9084e-03, ..., -7.2594e-03, + 1.1620e-02, 1.2497e-02], + ..., + [ 2.7823e-04, 7.8583e-03, -2.6993e-02, ..., 2.8183e-02, + -4.7226e-03, 4.9896e-03], + [-6.7711e-03, -6.6414e-03, -4.2305e-03, ..., 5.4321e-03, + 1.3855e-02, 1.0252e-05], + [ 2.7084e-03, 7.5684e-03, -7.6370e-03, ..., 2.0428e-03, + -1.5198e-02, -6.1722e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 1.6052e-02, 1.0201e-02, -8.6594e-03, -4.4342e-02, -7.0618e-02, + 1.1162e-02, -3.6133e-02, -5.5695e-02, 8.9783e-02, -7.8552e-02, + -4.6814e-02, 4.4342e-02, 2.7924e-02, -2.4399e-02, -2.2797e-02, + 3.2745e-02, -5.3680e-02, -2.0996e-02, -4.4037e-02, -4.6631e-01, + -5.6946e-02, 4.1687e-02, 4.3304e-02, 2.7786e-02, 6.3904e-02, + 7.9956e-02, -6.8481e-02, 2.7100e-02, -2.1866e-02, 5.5389e-02, + 4.5807e-02, 4.8409e-03, -1.7181e-02, 5.1270e-02, 3.7201e-02, + -2.4204e-03, -7.8491e-02, -2.8763e-02, 6.2927e-02, 4.0436e-02, + -2.8854e-02, 2.9388e-02, -2.1698e-02, -6.4819e-02, 9.0942e-03, + 7.4730e-03, 1.0193e-01, -5.1788e-02, -4.5746e-02, -4.4373e-02, + -6.1554e-02, 8.2626e-03, -7.9285e-02, 3.9642e-02, -2.2995e-02, + 5.6549e-02, -3.1433e-02, -2.1362e-03, -6.9031e-02, -1.1696e-02, + 1.2123e-02, 3.1708e-02, -1.4946e-02, -3.5370e-02, -2.2980e-02, + 4.5166e-02, -2.7908e-02, -1.0938e-01, -4.7424e-02, -9.8389e-02, + 6.4636e-02, -2.4658e-02, -1.8875e-02, -4.8370e-02, 4.2480e-02, + -3.7872e-02, 6.0883e-03, 4.2053e-02, 7.9575e-03, 3.3356e-02, + 4.2572e-02, -1.9627e-03, 5.0842e-02, -2.4002e-02, 1.1917e-02, + -5.9631e-02, -3.0804e-03, -7.9346e-02, -7.4585e-02, 1.3626e-02, + -1.1493e-01, -1.7883e-02, -2.7481e-02, 9.0210e-02, 5.0354e-02, + -2.0340e-02, -3.8391e-02, 2.9404e-02, 3.5767e-02, 7.1350e-02, + 6.0577e-02, -1.0492e-01, -7.0251e-02, 3.9551e-02, -5.0720e-02, + -2.0462e-02, 3.2684e-02, -6.1279e-02, 5.0415e-02, -5.6000e-02, + 9.8755e-02, -3.2623e-02, -8.6487e-02, -3.4210e-02, -4.0344e-02, + 2.0935e-02, 4.9530e-02, 4.4525e-02, -4.6234e-02, -7.0152e-03, + -1.1536e-02, 4.9133e-03, -4.8599e-03, 1.8417e-02, 1.7548e-02, + 1.2192e-02, -9.1248e-03, 1.2292e-01, 5.0293e-02, 9.3155e-03, + -6.1218e-02, 6.2073e-02, 2.1805e-02, 2.1942e-02, -1.9531e-02, + 2.4841e-02, 3.4428e-03, -4.9530e-02, 7.4890e-02, 1.9943e-02, + 1.0791e-01, -4.0833e-02, 1.1581e-02, -3.7708e-03, -1.5732e-02, + -1.6241e-03, -2.5009e-02, -5.6244e-02, 6.9389e-03, -1.8646e-02, + 2.1500e-02, -1.2802e-02, 2.7405e-02, -5.1270e-02, -5.3192e-02, + 3.5431e-02, 3.1242e-03, 6.0120e-02, -6.2988e-02, 1.6846e-02, + 1.5945e-02, -4.3335e-03, 2.6749e-02, -3.8116e-02, -5.8624e-02, + 2.7939e-02, 1.1310e-01, -1.2802e-02, 7.2266e-02, 1.4105e-03, + 9.4452e-03, -6.7078e-02, -1.2672e-02, -1.2039e-02, -1.0199e-01, + -3.8513e-02, -2.9129e-02, -9.3140e-02, 1.2561e-01, 1.0223e-01, + -5.7129e-02, 9.0790e-03, -3.0251e-03, -2.7313e-02, -2.7008e-02, + 4.2908e-02, -8.0383e-02, -1.9067e-01, 2.3300e-02, 3.2776e-02, + -1.1169e-01, -1.4175e-02, 7.6828e-03, 8.9722e-03, 1.9699e-02, + 7.4463e-02, 1.0376e-01, -5.2094e-02, 1.1694e-01, 7.2388e-02, + 4.7424e-02, 2.6474e-02, -2.7180e-03, -7.5256e-02, -3.9703e-02, + 2.1324e-03, 2.1332e-02, -9.0027e-02, -4.5044e-02, 1.4114e-02, + -1.3870e-02, -3.0945e-02, 2.5883e-03, 1.4809e-02, 6.4758e-02, + 2.8717e-02, 4.7546e-02, -5.4199e-02, 4.0375e-02, 4.5380e-02, + 3.6682e-02, 1.3125e-04, 1.9722e-03, -5.7892e-02, -2.1072e-02, + 9.9304e-02, -8.0811e-02, 5.3162e-02, 4.6875e-02, 4.6921e-03, + -4.1580e-03, -4.0436e-02, 1.0590e-02, -3.3661e-02, -1.3885e-02, + -1.2598e-01, 4.5563e-02, -2.5955e-02, -1.8158e-02, 5.2521e-02, + 3.5950e-02, 6.2805e-02, 1.1520e-02, -6.6223e-03, -5.7831e-02, + -1.7944e-02, -4.5135e-02, 2.9724e-02, -3.3203e-02, -4.0932e-03, + -8.1177e-02, -2.3438e-02, 1.3756e-02, 1.4565e-02, -3.3661e-02, + -3.1235e-02, -3.8910e-02, -5.9113e-02, -1.2201e-01, -6.6605e-03, + -7.9895e-02, -8.3252e-02, -5.2246e-02, -1.0483e-02, 4.4739e-02, + -1.0933e-02, 2.1057e-02, 1.6663e-02, -2.7435e-02, -6.3591e-03, + 4.2694e-02, -8.2153e-02, 1.5106e-02, -2.7939e-02, 8.7708e-02, + 3.4271e-02, 5.5962e-03, 1.5182e-02, -1.1713e-01, -4.7211e-02, + -1.8417e-02, -5.8868e-02, -4.3823e-02, 4.2877e-02, 3.0701e-02, + -4.4586e-02, -9.9060e-02, 6.2744e-02, 3.6945e-03, -2.0569e-02, + 1.7212e-02, 6.6589e-02, 6.5422e-03, -7.2899e-03, 2.0370e-02, + -1.2108e-02, 1.3697e-04, 1.3647e-03, -6.7200e-02, -3.7567e-02, + -2.0752e-02, -2.4338e-02, -8.2886e-02, 6.1127e-02, -7.6172e-02, + 5.2216e-02, -8.7402e-02, 7.6538e-02, -7.1777e-02, 1.4297e-02, + 1.1879e-02, 1.1810e-02, -7.9956e-02, -1.0797e-01, -1.4229e-02, + 1.2405e-02, -3.7964e-02, 4.5685e-02, -3.7415e-02, 4.2343e-03, + -1.7639e-02, -4.4647e-02, 1.6220e-02, 2.0432e-02, 2.0430e+00, + -9.8450e-02, 5.6702e-02, -2.4724e-04, -3.8849e-02, 3.6133e-02, + -1.4502e-01, 8.0719e-03, 9.3689e-03, -4.4785e-03, 5.9845e-02, + -1.1639e-01, -3.2776e-02, 9.5276e-02, 4.3732e-02, 6.3110e-02, + 4.8370e-02, -2.0233e-02, -7.6355e-02, 4.1771e-03, -3.2715e-02, + 8.4778e-02, 3.7781e-02, 3.6287e-04, -4.7974e-02, -3.3478e-02, + 1.0468e-01, -8.3069e-02, -3.0289e-02, -3.8116e-02, 1.7334e-02, + 5.9753e-02, -2.5528e-02, -1.6190e-02, 3.8509e-03, 8.3313e-02, + 1.1879e-02, 4.3854e-02, 4.4739e-02, 7.8186e-02, 3.1464e-02, + -1.2276e-02, -7.3059e-02, 2.1149e-02, 8.8959e-03, -3.4760e-02, + 3.8452e-02, 6.4636e-02, 5.7465e-02, 1.4854e-02, -4.1718e-02, + 2.0065e-03, 1.3435e-02, 1.7014e-03, 1.0608e-01, -4.0245e-03, + 1.0063e-02, -2.1152e-03, -1.8509e-02, -1.0760e-01, 6.1249e-02, + -5.3650e-02, 6.3896e-03, 5.0262e-02, 4.8462e-02, 7.9775e-04, + -2.6413e-02, 5.1086e-02, 8.9417e-03, -4.1931e-02, 3.2837e-02, + -4.6387e-02, -7.0251e-02, -1.3779e-02, 6.0699e-02, -3.0365e-02, + 2.9541e-02, -1.7227e-02, -6.7749e-02, -4.4891e-02, -2.4643e-03, + -2.8870e-02, -4.9438e-02, 1.7319e-02, 1.7258e-02, 1.5808e-02, + 8.2825e-02, -3.2990e-02, 2.6413e-02, -2.1942e-02, 1.0645e-01, + 1.2561e-01, -1.4076e-02, -9.6741e-03, -9.7580e-03, 6.7444e-02, + -3.7201e-02, 6.8420e-02, -3.1036e-02, -4.1748e-02, 3.4119e-02, + 1.0010e-02, 3.4485e-02, 2.0081e-02, -8.8623e-02, -2.4200e-02, + 1.2131e-02, 1.6129e-02, -9.3323e-02, -8.3862e-02, 2.7252e-02, + 4.3854e-02, -7.9422e-03, 3.7811e-02, -1.9348e-02, 8.6670e-03, + 1.9474e-03, 2.4765e-02, 4.2084e-02, -7.7095e-03, -5.0171e-02, + -3.3722e-02, 6.4049e-03, 2.3300e-02, 1.6434e-02, 2.4323e-02, + 1.8066e-02, 1.6968e-02, -6.0211e-02, 4.5700e-03, -3.1342e-02, + 1.0109e-02, 1.3588e-02, 4.6265e-02, -3.0106e-02, 9.6924e-02, + 3.7048e-02, -5.9753e-02, -6.7596e-03, -4.4434e-02, -7.4463e-02, + 4.8553e-02, -1.9760e-02, 1.2524e-01, 6.9160e-03, 1.3672e-02, + -1.3000e-01, 4.5943e-04, -9.0820e-02, 3.8757e-02, -1.0222e-04, + 1.3878e-02, 1.1604e-02, -3.9154e-02, -3.4668e-02, -3.1624e-03, + -2.1835e-02, 4.1443e-02, 1.1955e-02, -5.5176e-02, 4.9286e-02, + -9.8206e-02, 2.6184e-02, -2.0859e-02, -4.6356e-02, -7.5500e-02, + 4.2686e-03, -9.6497e-02, 5.5054e-02, 8.9111e-03, 1.0323e-02, + -4.2419e-02, -4.8828e-02, -1.3281e-01, -6.1874e-03, -4.8584e-02, + 8.8549e-04, 3.3447e-02, -3.6865e-02, -1.9928e-02, 1.1330e-02, + 2.9358e-02, -1.9515e-04, 1.3184e-01, 1.6724e-02, -3.0303e-04, + -6.4575e-02, 1.5656e-02, -8.7585e-02, -5.6702e-02, -2.4384e-02, + 4.3091e-02, 8.2886e-02, -1.2524e-01, -4.3416e-04, 6.2866e-02, + 2.0966e-02, -2.7939e-02, 3.7292e-02, 7.5256e-02, -8.3191e-02, + -5.0659e-03, 3.5877e-03, 1.8738e-02, -4.9896e-02, -2.5497e-02, + 5.1056e-02, 1.6586e-02, 2.5009e-02, 8.5754e-02, -6.3904e-02, + -3.8025e-02, 5.7587e-02, -3.1403e-02, 6.9153e-02, 8.4900e-02, + 5.5817e-02, -5.0079e-02, 7.1350e-02, -1.9592e-02, 2.8137e-02, + 9.0759e-02, -2.9099e-02, 8.1558e-03, -4.1687e-02, 2.2964e-02, + 6.1798e-02, 2.2781e-02, 1.8173e-02, -3.5126e-02, 5.9013e-03, + 1.9638e-02, 4.8462e-02, -3.1799e-02, -4.3488e-02, -7.4005e-03, + -1.4023e-02, -7.3730e-02, -7.4219e-02, -1.8799e-02, -4.6234e-02, + 1.5388e-02, -7.0740e-02, -5.9479e-02, 5.1666e-02, -3.6072e-02, + -9.7561e-04, -2.4204e-03, -5.8632e-03, -5.8014e-02, 1.7624e-02, + -2.3365e-03, 6.2683e-02, 2.9175e-02, -1.5945e-03, 5.1575e-02, + 7.2754e-02, -1.0004e-01, 2.3239e-02, 3.6041e-02, -1.4709e-02, + -8.6823e-03, -4.5654e-02, 1.0431e-01, 1.8219e-02, 2.9541e-02, + -2.7130e-02, 8.1238e-02, -1.9852e-02, -4.0070e-02, -4.2297e-02, + -1.0567e-02, -4.9377e-02, -5.4550e-04, -3.5645e-02, 3.8269e-02, + 3.2806e-02, 5.8533e-02, -5.9998e-02, 1.0443e-03, 5.3501e-04, + 4.5868e-02, 4.1819e-04, -9.7778e-02, 3.9703e-02, 3.8971e-02, + 3.3508e-02, 3.2593e-02, -1.0040e-02, 2.6093e-02, -2.0981e-02, + -1.0513e-02, 4.8889e-02, 7.7087e-02, -4.7363e-02, -2.3239e-02, + -8.3435e-02, -3.1311e-02, 3.3936e-02, -6.5979e-02, 3.6041e-02, + 1.8372e-02, -3.1158e-02, 3.0701e-02, -1.1646e-01, -8.1238e-02, + -1.7509e-03, 4.7684e-03, 2.2217e-02, -4.8370e-02, 3.3630e-02, + -5.8563e-02, 3.6591e-02, -8.4763e-03, -1.1703e-02, 9.7122e-03, + 3.4271e-02, -2.8183e-02, 2.9190e-02, 2.5452e-02, -5.9998e-02, + 8.0719e-03, -5.5542e-02, -1.9007e-03, -3.5919e-02, -1.4893e-02, + -4.0436e-02, -8.2458e-02, 5.3223e-02, 4.0588e-02, 9.2545e-03, + 5.9143e-02, 2.1530e-02, 6.4209e-02, 1.4913e-04, -2.3804e-02, + 2.4689e-02, -3.0727e-03, 1.7593e-02, -5.5603e-02, -1.2331e-03, + -5.0171e-02, -3.5004e-02, 3.3600e-02, 2.1820e-02, -2.1591e-02, + 2.4460e-02, -4.1412e-02, -2.2293e-02, -4.1618e-03, 2.1927e-02, + -3.3741e-03, 7.3929e-03, 6.1083e-04, -4.9622e-02, 5.9509e-03, + -2.7496e-02, 4.1455e-01, 4.4556e-02, 2.9053e-02, 3.4912e-02, + -3.5065e-02, 3.6102e-02, 2.4399e-02, 8.8135e-02, -2.7924e-02, + -3.8391e-02, -2.7908e-02, 3.9490e-02, -6.0638e-02, -7.5256e-02, + 8.7402e-02, 1.2520e-02, 4.1046e-02, -4.9408e-02, 2.0691e-02, + 4.8645e-02, 9.2102e-02, -3.4241e-02, -3.1494e-02, -5.8105e-02, + 1.1554e-01, 6.1371e-02, -2.2675e-02, -8.6746e-03, -6.1554e-02, + 3.6621e-02, 6.9122e-03, 3.2318e-02, -7.4829e-02, 8.0322e-02, + 2.4734e-02, 1.1273e-01, 4.0039e-02, -8.9788e-04, -8.0795e-03, + -8.4152e-03, 1.7761e-02, 4.0833e-02, -7.9117e-03, 4.7035e-03, + -2.4872e-03, 2.5681e-02, -2.8564e-02, -3.9398e-02, 7.8049e-03, + -6.6345e-02, -1.0602e-01, 4.6448e-02, -5.2765e-02, 3.7506e-02, + 9.6436e-02, -1.4091e-02, 2.6962e-02, -8.5754e-02, 1.5701e-02, + -3.0655e-02, -1.7639e-02, 2.4582e-02, -4.4098e-02, -1.8066e-02, + 2.7969e-02, 6.6147e-03, -8.4412e-02, 1.3481e-02, -1.1090e-01, + 6.2378e-02, 1.5701e-02, -3.7140e-02, 2.2751e-02, -6.6040e-02, + 3.5492e-02, 2.5757e-02, -7.2937e-02, -2.1805e-02, -7.4158e-03, + -2.7374e-02, 3.4119e-02, 4.5685e-02, -2.6093e-02, -3.7170e-02, + -1.5736e-03, -1.2703e-03, 4.0558e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([2.5576, 2.3304, 2.3711, 2.3977, 1.9550, 2.6222, 2.4071, 2.4198, 2.5433, + 2.2927, 2.3707, 2.4486, 2.3675, 2.2586, 2.4003, 2.5166, 2.4092, 2.3295, + 2.3509, 1.9934, 2.4948, 2.4201, 2.5280, 2.3026, 2.3817, 2.3575, 2.4651, + 2.4091, 2.2956, 2.3569, 2.4069, 2.4324, 2.4017, 2.5549, 2.2891, 2.3673, + 2.5084, 2.3113, 2.2584, 2.4008, 2.2910, 2.5507, 2.4455, 2.3672, 2.4784, + 2.4578, 2.4305, 2.3201, 2.3507, 2.2985, 2.3682, 2.4973, 2.4444, 2.4272, + 2.4040, 2.2344, 2.4056, 2.3216, 2.4079, 2.3590, 2.3579, 2.3738, 2.3978, + 2.3264, 2.5303, 2.4412, 2.5666, 2.3776, 2.4179, 2.1270, 2.4574, 2.4055, + 2.4645, 2.4699, 2.4424, 2.5034, 2.5580, 2.3774, 2.3579, 2.5861, 2.5141, + 2.3922, 2.3188, 2.3972, 2.5189, 2.5155, 2.4887, 2.3563, 2.5517, 2.4372, + 2.3899, 2.4647, 2.5373, 2.5416, 2.4712, 2.4022, 2.2961, 2.5308, 2.4706, + 2.4441, 2.3014, 2.4697, 2.4678, 2.4144, 2.4452, 2.0670, 2.4215, 2.4244, + 2.3358, 2.3543, 2.2992, 2.5740, 2.4565, 2.4551, 2.5621, 2.2902, 2.5656, + 2.5313, 2.4438, 2.4695, 2.3469, 2.4974, 2.4416, 2.3992, 2.4196, 2.2348, + 2.6539, 2.6797, 2.4311, 2.4361, 2.3586, 2.6112, 2.6770, 2.4377, 2.4944, + 2.4049, 2.2290, 2.4017, 2.3639, 2.5649, 2.3928, 2.3823, 2.4417, 2.2946, + 2.4853, 2.3311, 2.3497, 2.5481, 2.4626, 2.5429, 2.4029, 2.3552, 2.3902, + 2.5832, 2.4849, 2.2304, 2.4417, 2.3722, 2.5731, 2.5410, 2.2837, 2.4604, + 2.3329, 2.4226, 2.4918, 2.2607, 2.3743, 2.5165, 2.4731, 2.5513, 2.5786, + 2.3786, 2.2037, 2.4933, 2.2872, 2.5195, 2.3746, 2.4236, 2.2408, 2.4877, + 2.5523, 2.4471, 2.4894, 2.5245, 2.2506, 2.1873, 2.2735, 1.4312, 2.3013, + 2.3922, 2.5230, 2.4725, 2.4565, 2.4358, 2.5594, 2.4528, 2.3324, 2.4306, + 1.2379, 2.2861, 2.4164, 2.3120, 2.3680, 2.5697, 2.4218, 2.4618, 2.4612, + 2.4438, 2.3577, 2.4119, 2.4494, 2.2746, 2.5410, 2.0422, 2.4465, 2.5530, + 2.1844, 2.4634, 2.3859, 2.2563, 2.4984, 2.3779, 1.9990, 2.4829, 2.4556, + 2.5563, 2.4028, 2.3832, 2.3399, 2.2638, 2.4324, 2.4482, 2.3053, 2.2566, + 2.3205, 2.4360, 2.4522, 2.3403, 2.5395, 2.4561, 2.4037, 2.5031, 2.3726, + 2.4524, 2.4662, 2.4363, 2.6728, 2.4845, 2.4196, 2.4342, 2.5506, 2.3502, + 2.4282, 2.4350, 2.5386, 2.4616, 2.4830, 2.3861, 2.5172, 2.5778, 2.3977, + 2.4931, 2.4868, 2.4324, 2.4132, 2.3309, 2.4617, 2.4026, 2.6077, 2.3682, + 2.4094, 2.4648, 2.5450, 2.3728, 2.3849, 2.7174, 2.4184, 2.4798, 2.3318, + 2.4888, 2.4238, 2.2394, 2.2824, 2.5057, 2.5209, 2.3631, 2.4332, 2.3806, + 2.3419, 2.2953, 2.4176, 2.3549, 2.3651, 2.5080, 2.4547, 2.4592, 2.2354, + 2.3696, 2.3371, 2.4121, 2.4274, 2.5341, 2.3987, 2.6333, 2.5166, 2.4138, + 2.5587, 2.5251, 2.3318, 2.3929, 2.3471, 2.5221, 2.4845, 2.4899, 2.5011, + 2.4038, 2.6121, 2.4854, 2.3235, 2.5669, 2.3907, 2.2766, 2.3783, 2.4000, + 0.6357, 2.4000, 2.5624, 2.3160, 2.6602, 2.3403, 2.3703, 2.4046, 2.4510, + 2.4836, 2.4286, 2.1999, 2.6096, 2.4631, 2.3707, 2.3089, 2.4851, 2.3330, + 2.3177, 2.5466, 2.4031, 2.4949, 2.5606, 2.4180, 2.4414, 2.4380, 2.0319, + 2.4428, 2.4251, 2.1127, 2.4867, 2.3711, 2.4772, 2.4604, 2.3391, 2.3586, + 2.4182, 2.3580, 2.4760, 2.4144, 2.4725, 2.5182, 2.3207, 2.6489, 2.4085, + 2.3826, 2.4882, 2.3557, 2.4317, 2.3772, 2.4133, 2.4613, 2.4010, 2.4738, + 2.5751, 2.4574, 2.3113, 2.6367, 2.5771, 2.3439, 2.4362, 2.3548, 2.4980, + 2.3924, 2.4147, 2.3892, 2.5263, 2.7315, 2.4591, 2.4544, 2.3935, 2.3675, + 2.4730, 2.3705, 2.4611, 2.2936, 2.4690, 2.4581, 2.4748, 2.4659, 2.3809, + 2.4476, 2.6219, 2.3810, 2.5058, 2.7186, 2.2634, 2.7011, 2.4224, 2.6094, + 2.2897, 3.4400, 2.3407, 2.3741, 2.3515, 2.5698, 2.5394, 2.4029, 2.4405, + 2.3774, 2.6522, 2.4810, 2.1642, 2.3742, 2.8108, 2.4177, 2.2984, 2.4548, + 2.6247, 2.4611, 2.5190, 2.4920, 2.4434, 2.4291, 2.4715, 2.3702, 2.4663, + 2.2900, 2.4210, 1.7521, 2.3985, 2.3208, 2.4050, 2.4549, 2.4296, 2.3289, + 2.4849, 2.4413, 2.5168, 2.5119, 2.5425, 2.4593, 2.5218, 2.4031, 2.4426, + 2.3513, 2.4843, 2.5100, 2.3698, 2.3297, 2.2840, 2.4772, 2.6122, 2.2475, + 2.4717, 2.2813, 2.5931, 2.3489, 2.3603, 2.3900, 2.4951, 2.4265, 2.4145, + 2.3638, 2.5526, 2.2686, 2.4221, 2.3169, 2.3508, 1.9304, 2.4101, 2.4658, + 2.4240, 2.3683, 2.4086, 2.4019, 2.3520, 2.4472, 2.6275, 2.3913, 2.3408, + 2.3086, 2.4151, 2.3883, 2.4458, 2.3428, 2.4694, 2.3540, 2.4129, 2.4636, + 2.7249, 2.3469, 2.4367, 2.3560, 2.4105, 2.3652, 2.4830, 2.5175, 2.2671, + 2.3230, 2.3949, 2.4571, 2.4217, 2.4320, 2.3076, 2.6991, 2.3035, 2.4677, + 2.4479, 2.4913, 2.3424, 2.4013, 2.4394, 2.3548, 2.3995, 2.3956, 2.4178, + 2.4282, 2.3959, 2.5658, 2.3178, 2.5060, 2.4149, 2.4419, 2.5459, 2.4721, + 2.3397, 2.6064, 2.6917, 2.4959, 2.4903, 2.2762, 2.4950, 2.2810, 2.3614, + 2.4633, 2.4814, 2.3575, 2.5248, 2.3733, 2.3363, 2.4094, 2.5385, 2.3083, + 2.3769, 2.5763, 2.4112, 2.2750, 2.4145, 2.3639, 2.5136, 2.3405, 2.4383, + 2.5640, 2.5239, 2.4995, 2.5569, 2.3197, 2.3858, 2.3962, 2.4429, 2.4544, + 2.3997, 2.3287, 2.4404, 2.3087, 2.3067, 2.4738, 2.3649, 2.5190, 2.5092, + 2.4250, 2.2719, 2.4374, 2.4496, 2.4747, 2.3308, 2.4564, 2.4331, 2.6464, + 2.4636, 2.2174, 2.3275, 2.4728, 2.5065, 2.3889, 2.3736, 2.3614, 2.4190, + 2.3662, 2.5910, 2.2835, 2.4849, 2.4330, 2.5681, 2.5034, 2.3754, 2.2966, + 2.2987, 2.5070, 2.4195, 2.6241, 2.4842, 2.3508, 2.4747, 2.3618, 2.3558, + 2.1667, 2.5238, 2.4680, 2.5973, 2.5392, 2.3654, 2.4019, 2.4716, 2.4832, + 2.5000, 2.5019, 2.3741, 2.3643, 2.3876, 2.3685, 2.6611, 2.4422, 2.4762, + 2.3547, 2.4025, 2.4497, 2.2040, 2.4888, 2.2723, 2.3743, 2.2826, 2.6054, + 2.3190, 2.4891, 2.5162, 2.4549, 2.2425, 2.4536, 2.4128, 2.4304, 2.4707, + 2.5659, 2.3483, 2.2955, 1.8539, 2.3997, 2.3637, 2.3968, 2.4626, 2.3946, + 2.3222, 2.3524, 2.5597, 2.3742, 2.2840, 2.3314, 2.3857, 2.2875, 2.4121, + 2.2458, 2.4827, 2.3292, 2.6354, 2.4387, 2.4942, 2.2355, 2.3579, 2.5134, + 2.2878, 2.3131, 2.5136, 2.2672, 2.4973, 2.4720, 2.3654, 2.5278, 2.4510, + 2.3359, 2.2845, 2.6296, 2.3547, 2.3985, 2.4756, 2.5552, 2.3655, 2.3423, + 2.4334, 2.3119, 2.4998, 2.4868, 2.6313, 2.3357, 2.4385, 2.4794, 2.4030, + 2.3261, 2.4024, 2.4096, 2.4862, 2.5003, 2.4382, 2.5027, 2.4801, 2.4794, + 2.4303, 2.5030, 2.3749, 2.3007, 2.4419, 2.4550, 1.8549, 2.3850, 2.4200, + 2.3704, 2.6410, 2.2996, 2.3996, 2.4752, 2.5489, 2.4231, 2.5352, 2.2041, + 2.6158, 2.4349, 2.3655, 2.7523, 2.3759, 2.4892, 2.3502, 2.2487, 2.4969, + 2.4593, 2.5063, 2.4189, 2.4738, 2.4963, 2.3114, 2.3510, 2.3365, 2.4439, + 2.4067, 2.2807, 2.4631, 2.4164, 2.2501, 2.5119, 2.4704, 2.4727, 2.3767, + 2.4629, 2.4223, 2.4440], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 2.9932e-02, -3.0162e-01, 1.4128e-01, -3.4646e-01, -2.5106e-01, + -5.6301e-01, 2.2639e-01, 2.4600e-01, 1.6465e-01, -4.2078e-01, + -1.2340e-01, 3.7251e-01, 1.2406e-01, 2.7133e-01, 3.1003e-02, + 1.7342e-01, 1.8523e-01, 1.8913e-01, -1.4909e-01, -3.1221e-01, + -2.9462e-01, 1.4270e-01, 6.6221e-01, -4.7315e-01, 1.8876e-01, + 7.3583e-02, -1.2354e-01, -7.3950e-02, -1.2269e-01, 4.9641e-01, + 3.4733e-01, -3.2025e-02, -2.0315e-02, -4.4567e-01, -2.4298e-01, + -3.3707e-01, -3.7226e-01, 4.7930e-01, 6.1613e-01, -3.1205e-01, + -1.6558e-01, 6.1039e-02, 4.8515e-01, 4.7716e-02, -5.9613e-01, + -6.4904e-01, -4.5658e-01, -2.0424e-01, -1.2793e-01, 3.3709e-01, + -1.8851e-01, -8.0029e-01, -1.7977e-02, -3.8074e-01, -6.6202e-01, + 2.6686e-01, 5.5996e-01, -4.8043e-01, -6.7872e-02, 1.2299e-01, + -9.4741e-02, 5.9958e-02, 4.9218e-02, -4.2103e-01, 2.5196e-01, + 2.4068e-01, -3.8744e-02, -1.3429e-01, -4.8376e-01, -6.1607e-01, + 3.2976e-01, 9.5367e-02, -1.5271e-01, -4.6356e-01, -2.8587e-01, + -4.2759e-01, 6.3393e-01, 2.3609e-02, 1.5916e-01, 8.8886e-01, + 2.6343e-01, 3.4043e-01, -4.8861e-01, -1.8627e-01, -6.9661e-01, + -5.4499e-01, 4.9617e-01, -9.7585e-02, 4.6151e-01, -2.2765e-01, + -3.9411e-03, -1.6364e-01, -5.1623e-01, 2.0387e-01, -1.4929e-01, + -2.7565e-01, 2.2874e-01, -1.8453e-01, -5.3695e-01, 6.6773e-01, + 4.4026e-02, -4.9675e-01, -4.4427e-01, 1.0410e-01, 4.6265e-01, + 2.2047e-01, 4.1624e-01, 1.1387e-01, -6.0574e-02, -1.2546e-02, + 8.7225e-02, 4.3368e-01, -3.9527e-01, -2.3091e-01, 1.4382e-01, + -4.0146e-01, 6.7164e-01, 4.8646e-01, 2.2774e-01, 3.8298e-01, + -1.9689e-01, 2.4062e-01, -5.9467e-02, 2.6072e-01, 3.2621e-01, + -2.5951e-01, 2.8232e-01, 6.0466e-01, -2.6100e-01, 1.8510e-01, + 1.3541e-01, -5.1284e-01, 4.8758e-01, -7.8206e-02, 4.7141e-01, + 1.7501e-01, 1.4147e-01, -1.8194e-01, 3.3316e-01, -3.6307e-01, + -3.6573e-01, 5.3486e-01, -1.8098e-02, -2.2143e-01, 4.6298e-02, + 5.6899e-03, 4.3567e-01, -4.8673e-01, -6.7744e-01, -1.4599e-01, + 9.3934e-02, -1.6618e-02, 2.4010e-01, -5.1935e-01, -2.4059e-01, + 7.7139e-01, 5.8555e-01, -4.2442e-01, -2.0791e-01, -4.0307e-01, + -1.1149e-01, -2.0498e-02, 8.9808e-02, -4.4491e-01, 7.5951e-01, + 4.6165e-02, 3.6412e-01, -1.5113e-01, 3.0061e-01, -4.1646e-01, + 6.1315e-01, 3.3305e-02, -5.2668e-01, 6.5254e-01, 9.5107e-02, + 3.4002e-01, 7.6512e-02, -5.4823e-02, 9.7594e-02, -2.7147e-01, + -7.9573e-01, -9.5936e-02, 6.0138e-01, 6.2805e-01, -1.2078e-01, + 5.6416e-01, -2.2096e-03, 2.1145e+00, -1.0990e-01, -3.9488e-02, + -8.6518e-01, -4.6028e-01, 2.7963e-01, 5.6114e-01, 3.7760e-01, + -9.8124e-02, 1.6267e-01, 3.0374e-02, -5.7425e-01, 1.3610e-01, + -4.6454e-01, -7.3948e-02, 1.4996e-01, -4.4412e-01, -3.2803e-01, + -2.5651e-01, 1.9774e-01, 4.8669e-01, 9.0103e-02, -4.5681e-01, + 2.8155e-01, -1.8632e-01, -8.3922e-01, 2.0943e-01, 2.9468e-01, + -3.3018e-01, -2.1694e-01, -2.5128e-01, -3.5858e-01, 7.1802e-02, + 5.5272e-01, 1.1630e-01, -4.3954e-01, 6.8150e-01, 3.1679e-02, + 2.2199e-01, -1.1738e-01, 3.0026e-01, 1.3040e-01, 2.2225e-01, + -2.4677e-01, -1.0417e-01, 3.3233e-01, 1.9889e-01, -5.1069e-01, + -1.9670e-01, 2.4504e-01, 1.9361e-01, -2.3999e-01, 2.3646e-01, + -6.1190e-02, 4.4584e-03, 1.3534e-01, -1.4524e-01, -3.5619e-01, + -5.1144e-01, -4.9386e-01, 3.5775e-01, -3.1955e-01, 6.4570e-01, + -1.2014e-01, 7.9763e-01, 2.1697e-01, 7.6738e-02, -2.2917e-02, + -5.9375e-01, 5.2489e-01, -6.0439e-01, -2.7954e-01, -5.2342e-01, + 8.9634e-02, 1.5874e-01, 5.5865e-01, 2.5939e-01, -5.7272e-02, + -4.0273e-02, 2.4863e-01, -1.7334e-01, 6.4374e-01, -2.2190e-01, + -3.6028e-01, 1.9930e-01, -3.9872e-01, -3.2904e-01, 1.3150e-01, + -7.3514e-01, -3.3150e-01, 3.8126e-01, -1.5717e-01, -5.4585e-02, + -4.2850e-01, -7.4214e-01, -1.7948e-01, 4.3636e-01, -4.9748e-01, + 4.5393e-01, 1.7344e-01, -9.9264e-03, 3.6427e-01, -7.0329e-02, + -4.7180e-01, 1.2721e-01, 5.0421e-01, -3.5168e-01, -3.9122e-01, + -2.7454e-01, 2.4527e-01, -6.6616e-01, 4.3131e-01, -2.6262e-01, + -3.2642e-01, 8.5823e-02, -5.1454e-02, -1.1051e+00, 4.8101e-01, + 2.7046e-01, -1.9462e-01, 3.6501e-01, -2.0426e-01, -2.7896e-01, + 3.8353e-01, -4.3028e-01, 4.5165e-01, 5.5885e-01, -3.3605e-02, + -1.0840e-01, 5.6463e-01, 4.7314e-01, -5.7062e-02, 5.5220e-01, + -3.7576e-01, -2.8350e-01, 1.8441e-01, -3.4029e-01, 5.2433e-02, + -4.3591e-01, -5.0902e-01, 3.3369e-01, 2.0976e-01, -2.3188e-01, + 4.9477e-01, -6.9597e-02, -2.6938e-01, -5.6728e-02, 6.2881e-01, + 7.3492e-02, -9.3713e-01, 1.1243e-01, 3.7966e-02, -6.5733e-02, + 3.9749e-01, -1.6061e-01, -5.4494e-01, 4.2282e-01, -3.0773e-01, + 7.1377e-01, -6.8213e-01, -1.5114e-01, -1.4101e-01, 1.6538e-01, + 3.5520e-01, 4.3173e-01, -1.6039e-01, 4.6407e-02, -9.5964e-02, + 8.6294e-02, -3.3205e-01, 5.8999e-02, -3.7933e-01, 1.3829e-01, + -5.0132e-01, -4.4829e-01, -8.1570e-02, -5.1014e-01, 3.7524e-01, + -2.9223e-01, 3.9546e-02, 4.7483e-02, 1.9292e-01, 2.2456e-01, + -2.5084e-01, -7.0861e-02, 2.6201e-01, -4.7447e-02, 1.1546e-03, + -1.7990e-01, -7.7817e-02, 4.9409e-01, -4.3644e-01, -3.4852e-01, + 9.1261e-02, -7.7327e-01, -3.1767e-01, 3.4946e-01, 2.7020e-01, + -1.8949e-01, 8.8170e-02, 5.3733e-01, 2.0567e-01, 1.3307e-01, + 5.8523e-01, -7.4638e-01, 6.8873e-01, 4.0128e-01, -5.1895e-01, + -1.4250e-01, 1.2085e-02, -1.7605e-01, -1.8713e-01, 1.7004e-02, + 1.2924e-01, 6.0687e-01, -4.2209e-01, -6.2843e-01, 5.2935e-01, + -6.1243e-02, 2.2317e-01, -6.2447e-02, -2.8592e-01, 8.2672e-01, + 1.1707e-01, -5.4270e-01, 2.9941e-01, 2.8900e-01, 8.6537e-01, + -1.6511e+00, 1.1635e-01, 2.8656e-02, 2.1624e-01, 1.9607e-01, + 5.0981e-01, -4.5917e-01, -1.5322e-01, 4.5360e-02, -5.6081e-01, + 4.8290e-01, 3.4567e-01, -5.8747e-02, -6.8929e-01, 1.2810e-01, + -2.5956e-01, -3.9461e-01, -6.4888e-01, -8.8910e-03, -2.0434e-02, + -1.4205e-01, -2.4369e-02, 5.7545e-01, 1.1698e-01, -3.8967e-01, + 3.4959e-01, 3.5758e-01, 3.2217e-02, -6.6200e-01, -6.6839e-02, + 5.8133e-02, 2.7942e-01, 2.9290e-01, 1.7966e-02, -3.6733e-01, + -9.9340e-02, -1.3521e-01, -2.7681e-01, -4.2879e-01, -3.3071e-01, + -5.7073e-01, -5.8907e-01, -5.0452e-01, 2.0418e-01, 1.0199e-01, + -3.9590e-01, 1.8624e-01, -2.9696e-01, 1.8448e-01, -3.0404e-01, + 4.6199e-01, -3.9756e-01, 3.4744e-01, -2.6387e-01, -1.1653e-01, + 5.9746e-01, 1.8090e-01, -1.2004e-01, -1.1056e-01, -5.3151e-01, + -4.6283e-01, 3.1908e-02, -7.4224e-01, -3.6923e-01, -5.3822e-01, + -4.1702e-01, 2.8470e-01, 4.4310e-01, 1.6097e-01, 1.9126e-01, + -1.3619e-01, 2.4835e-01, 6.0324e-02, 8.2238e-02, 3.8127e-01, + -8.2515e-02, 4.5638e-01, 2.9320e-01, 1.9801e-01, 8.7940e-03, + -2.9512e-01, -5.5449e-01, 1.6342e-02, 2.2033e-01, 5.8513e-02, + 3.4597e-01, -3.2905e-01, -1.1703e-01, 1.9318e-01, 2.9449e-01, + 1.0837e-01, -1.0457e-03, -1.1668e-01, 1.6970e-01, -7.2471e-02, + -4.7684e-01, 9.0855e-02, 7.9242e-02, -8.0585e-02, 3.2629e-01, + 1.2611e-01, 3.2879e-01, -2.5022e-01, -1.6554e-01, 6.3878e-01, + 3.5972e-01, -1.9698e-01, -2.6073e-01, 5.1697e-01, -3.3824e-01, + -4.4351e-01, 1.3762e-01, 1.5554e-01, -3.4920e-02, -1.0653e-01, + 9.6069e-02, -2.2788e-01, 7.0399e-02, 1.6845e-01, 2.9433e-01, + -7.1957e-01, 4.2253e-01, -1.6981e-01, 4.2037e-01, -2.1842e-01, + 2.3953e-01, -3.1829e-01, -8.9034e-01, 7.7953e-01, 5.7918e-01, + 2.7067e-01, -4.5477e-01, -6.4798e-01, -8.2738e-02, 1.8428e-01, + 3.2654e-02, -2.2082e-01, 9.1149e-01, 4.6169e-01, -2.0381e-01, + 2.3860e-01, 4.9382e-01, -6.1471e-02, -4.3793e-01, -3.6538e-01, + -2.2946e-01, -1.1087e+00, 1.6603e-01, 6.7709e-01, -1.9357e-01, + -2.8555e-01, -3.4081e-01, -2.5572e-01, 2.9791e-01, -2.6723e-01, + -6.2435e-01, 4.9662e-01, 1.0662e-01, -4.2844e-01, 5.9660e-01, + 7.0396e-01, 8.5778e-02, -5.7455e-01, -2.1013e-01, 4.0237e-01, + 4.0175e-01, -4.7408e-01, 1.8092e-01, 6.5065e-01, 2.9740e-01, + -2.0762e-01, -1.7106e-01, -3.5587e-01, 5.4142e-01, -3.0241e-02, + 2.4929e-02, -3.9181e-01, 4.0413e-01, -5.2466e-01, -2.1312e-01, + 4.6921e-01, 2.1146e-02, -1.4481e-01, 6.1093e-01, 5.1777e-02, + -1.5702e-02, -1.3893e-01, 1.2269e-02, -4.5805e-01, -5.3720e-01, + -1.7545e-01, -6.6881e-02, -1.4616e+00, -1.8604e-01, 4.2172e-01, + -3.1246e-01, -4.6231e-02, -1.8316e-01, 3.5422e-01, 8.5290e-02, + 3.7975e-01, 5.6355e-01, 2.4853e-01, -3.5697e-01, 4.5364e-01, + -1.7780e-01, -3.3519e-03, -3.2779e-01, 5.3199e-01, -3.6808e-02, + 3.8655e-01, 2.2483e-02, 3.1360e-01, -1.3338e-01, 2.1981e-01, + -4.9110e-01, 6.6587e-01, -1.4109e-01, -1.5661e-01, -1.1334e-01, + -4.6857e-01, 4.8447e-01, -2.2792e-01, 1.3462e-01, -4.0982e-01, + 2.8143e-01, -4.1661e-01, -7.0864e-01, 2.5823e-01, -5.3160e-01, + 1.9616e-01, -1.9649e-01, -4.1130e-01, -1.6410e-01, 1.6438e-01, + 5.2778e-02, -6.6124e-01, -3.2948e-02, -5.4545e-01, 5.2703e-01, + 2.8672e-01, 5.7645e-01, -2.6746e-01, -3.9089e-01, 2.0937e-01, + -3.6736e-02, 2.4706e-01, 3.5762e-01, -1.6023e-01, -3.1083e-01, + -4.6344e-01, 4.1670e-01, -7.5496e-02, 1.4926e-01, -3.3287e-01, + -4.2243e-01, -6.3298e-02, 2.5848e-01, 3.7625e-01, 8.7703e-02, + -3.2085e-01, 3.9825e-01, 4.0217e-01, 3.1725e-01, -1.6784e-01, + -4.0644e-01, 5.2821e-01, -5.1684e-01, -5.4546e-01, 1.1714e-01, + 3.2991e-01, 2.9674e-01, 1.7642e-01, 5.5854e-01, -3.4519e-01, + -1.7785e-01, 2.0199e-01, -5.2339e-01, -6.3149e-02, -3.0370e-01, + -2.8747e-02, -3.4003e-01, -1.5512e-01, -4.3562e-01, 6.5145e-01, + 3.0143e-01, -3.2293e-01, 3.3838e-01, -1.3263e-01, 3.5033e-02, + 4.7880e-01, -8.1988e-01, -2.0750e-01, -6.8490e-02, -5.9730e-02, + 4.1138e-01, 2.3811e-01, -3.2911e-01, -6.8965e-02, 5.5200e-01, + 1.9072e-01, 6.2259e-02, 6.6072e-01, -6.0550e-01, 2.3396e-01, + 1.1968e-01, -5.3494e-01, 6.5234e-02, -1.3458e-01, -2.1542e-01, + -7.3702e-02, 1.8912e-01, 1.5528e-01, 4.7149e-01, -2.0252e-01, + -6.8936e-01, -1.6355e-01, 2.8603e-02, 7.8233e-02, 3.6492e-01, + 1.3363e-01, -4.6805e-01, -3.1390e-01, -7.6726e-01, -3.2683e-01, + -2.0703e-01, 5.6110e-01, 3.6255e-01, -2.0338e-01, 4.8012e-01, + -2.3513e-01, 3.9425e-01, 5.9808e-01, 8.5797e-01, 6.0902e-01, + 1.5637e-01, -7.8162e-01, -6.8807e-02, 1.1870e-01, -1.4694e-01, + -6.4079e-02, -5.5715e-01, -2.5838e-01, 8.1312e-01, 4.2588e-01, + -2.3839e-01, -5.9330e-01, -1.5378e-02, 5.7079e-01, 1.5399e-01, + 1.2549e-01, -1.4820e-01, 6.9585e-02], device='cuda:1', + requires_grad=True) +torch.Size([2304, 768]) +Parameter containing: +tensor([[ 0.0030, -0.0047, 0.0065, ..., 0.0104, -0.0140, 0.0053], + [-0.0176, 0.0191, -0.0227, ..., 0.0217, 0.0145, -0.0007], + [ 0.0033, -0.0146, 0.0133, ..., 0.0050, -0.0265, -0.0137], + ..., + [-0.0195, -0.0042, -0.0021, ..., -0.0063, 0.0234, -0.0025], + [-0.0185, 0.0035, -0.0008, ..., 0.0019, 0.0356, 0.0087], + [-0.0246, -0.0227, -0.0082, ..., -0.0005, -0.0009, 0.0117]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([2304]) +Parameter containing: +tensor([-0.1141, 0.1932, 0.1205, ..., -0.0247, 0.0140, 0.0328], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 768]) +Parameter containing: +tensor([[-0.0028, -0.0143, 0.0161, ..., 0.0151, 0.0104, -0.0198], + [-0.0164, -0.0346, 0.0067, ..., 0.0199, -0.0166, -0.0236], + [-0.0236, -0.0092, -0.0233, ..., -0.0062, -0.0015, 0.0028], + ..., + [ 0.0289, 0.0172, -0.0065, ..., -0.0083, -0.0195, 0.0067], + [ 0.0268, -0.0132, 0.0347, ..., 0.0141, 0.0156, -0.0042], + [-0.0458, 0.0232, -0.0022, ..., -0.0111, 0.0161, 0.0254]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([-1.0736e-01, 1.0443e-01, 7.3425e-02, -6.7932e-02, -9.2896e-02, + -8.6670e-02, -1.2103e-01, -5.3589e-02, 1.7627e-01, -6.3171e-03, + -4.8187e-02, 1.4111e-01, 1.7212e-01, -4.9286e-02, 1.7249e-01, + 4.2206e-02, -9.5154e-02, -8.4839e-02, -1.5198e-01, -2.9883e-01, + 4.8187e-02, 3.7262e-02, 2.3163e-02, -1.1139e-01, 1.5063e-01, + 1.6846e-01, -1.2781e-01, 1.2366e-01, -1.0736e-01, 5.1331e-02, + 4.5276e-04, -9.5032e-02, -7.3669e-02, -1.0352e-01, 3.6224e-02, + -1.9043e-02, -1.5588e-01, -9.9365e-02, 1.8542e-01, -1.8408e-01, + -9.4788e-02, -7.6111e-02, 1.0559e-01, -1.2573e-01, -5.5450e-02, + -7.9773e-02, -1.0445e-02, -8.3435e-02, 1.5945e-02, 2.6093e-02, + -3.2898e-02, -7.7942e-02, 1.5137e-02, 1.0034e-01, -1.7871e-01, + 3.0914e-02, 1.0242e-01, 7.2754e-02, -8.0872e-02, -2.6566e-02, + 1.4473e-02, 1.0205e-01, -6.9275e-02, -1.0187e-01, -4.8971e-04, + 1.1060e-01, -1.6309e-01, -2.8381e-02, -1.4929e-01, -2.3840e-01, + 1.5881e-01, 2.1576e-02, -2.5659e-01, -2.0959e-01, -6.7871e-02, + -1.3831e-01, -1.1322e-02, 1.9482e-01, 7.0679e-02, 8.3740e-02, + 8.5754e-02, 6.2469e-02, 4.4189e-02, -1.2451e-01, -1.7426e-02, + -8.2214e-02, 5.7739e-02, -1.0864e-01, 5.4840e-02, -1.2341e-01, + -1.9739e-01, -7.0007e-02, -8.5571e-02, 1.8713e-01, 1.6150e-01, + -6.2675e-03, 1.1002e-02, -1.0147e-02, 2.7344e-02, 1.8445e-01, + 2.1648e-03, -2.4158e-01, -2.2278e-01, 3.5980e-02, -2.7328e-02, + -1.2108e-02, -9.7717e-02, 2.1240e-01, 4.9103e-02, -4.3526e-03, + 9.4177e-02, 1.1572e-01, -9.4666e-02, 9.7534e-02, -1.1841e-01, + 3.8849e-02, 1.2030e-01, 1.1273e-01, 2.3849e-02, -7.4219e-02, + -6.2622e-02, 5.7129e-02, 1.0669e-01, 1.3965e-01, 5.4871e-02, + -2.2141e-02, 2.1179e-01, 1.6406e-01, 3.1799e-02, 7.4036e-02, + 1.6895e-01, 5.3215e-03, 1.0449e-01, 1.9153e-01, 1.9507e-01, + 6.2500e-02, 7.5623e-02, -1.5210e-01, 9.2590e-02, 2.7084e-02, + 1.0925e-01, 1.1957e-01, -2.2449e-03, 2.6855e-02, -1.4319e-01, + 1.7468e-01, -5.4504e-02, -1.5747e-01, 2.6810e-02, -1.9165e-01, + -1.8250e-01, 5.1918e-03, 2.1277e-01, -1.1560e-01, -1.3281e-01, + 1.6016e-01, -1.0229e-01, -1.1914e-01, -1.4453e-01, -5.8136e-02, + -1.0779e-01, 4.4891e-02, 3.8757e-02, -1.1139e-01, 5.1636e-02, + 1.5125e-01, 1.2842e-01, -2.4377e-01, 1.0999e-01, 2.9358e-02, + 1.4502e-01, -1.5527e-01, -2.1805e-02, 2.1692e-01, -1.1218e-01, + 1.0236e-01, 7.4539e-03, -8.4717e-02, 1.4966e-01, 1.8872e-01, + -5.7465e-02, -5.9624e-03, -1.1603e-01, -1.1726e-02, -6.6223e-02, + 1.8970e-01, -8.3496e-02, -5.7422e-01, 1.2646e-01, 1.7444e-01, + -7.0557e-02, 3.9337e-02, -4.2816e-02, -4.2801e-03, 8.5571e-02, + 1.2659e-01, 9.8572e-02, 2.9053e-02, -2.4731e-01, 1.0773e-01, + 8.7952e-02, -6.2744e-02, -5.9128e-03, -6.5491e-02, -9.3384e-02, + -1.5308e-01, 8.5876e-02, -1.2384e-01, -1.9019e-01, -5.1514e-02, + 5.5939e-02, -1.6382e-01, -1.0547e-01, 9.9609e-02, 1.4856e-01, + -6.6223e-02, 8.5510e-02, -7.5562e-02, 1.1902e-01, 2.8223e-01, + 1.2524e-01, -1.1734e-02, -4.3060e-02, -4.0833e-02, 7.2670e-03, + 1.3892e-01, -1.6357e-01, 7.6447e-03, -4.2023e-02, 1.1340e-01, + -7.7637e-02, -1.1688e-01, 1.1066e-01, -2.5616e-03, -3.2196e-02, + -2.2461e-01, 2.0532e-01, -1.0785e-01, -1.8774e-01, 1.5796e-01, + 1.0852e-01, -1.0419e-01, 8.7219e-02, -7.9712e-02, -8.0139e-02, + -9.1736e-02, -2.4036e-01, 1.3232e-01, -5.8746e-02, 1.2024e-01, + 1.1035e-01, 1.5771e-01, 4.5227e-02, 8.1299e-02, -9.1187e-02, + 1.9730e-02, 1.4075e-01, -1.0773e-01, -1.8689e-01, -7.0801e-02, + 1.1511e-01, -6.6650e-02, -8.6548e-02, 2.0911e-01, 7.1335e-03, + -1.6431e-01, 1.6467e-01, 1.7029e-02, 1.8112e-02, 1.5778e-02, + 3.5797e-02, -2.7756e-02, -3.1891e-02, -1.3161e-02, 1.8433e-01, + -1.5723e-01, -1.9055e-01, 1.0565e-01, -2.0154e-01, 1.0498e-01, + -5.3894e-02, -2.2339e-01, -1.0962e-01, 9.6558e-02, -1.9165e-01, + 1.2128e-01, -1.7017e-01, 6.7871e-02, -1.3953e-01, 1.1292e-01, + 2.6489e-02, 1.4832e-01, 9.2163e-02, -2.7466e-02, -6.8665e-02, + -1.5723e-01, -2.7100e-01, 2.4872e-02, -5.6839e-03, -4.6051e-02, + -5.5359e-02, -1.4734e-01, -1.0590e-01, -4.3488e-02, -4.1809e-02, + 3.7689e-02, -2.5146e-02, 1.6040e-01, -1.3696e-01, -9.9487e-03, + -7.5989e-03, -9.9609e-02, 1.1340e-01, 1.1389e-01, 4.6509e-02, + -1.1243e-01, 3.4668e-02, 2.6794e-02, -2.1912e-01, 8.2703e-02, + -2.0721e-02, -1.6711e-01, 1.3098e-01, -5.6244e-02, 4.6606e-01, + -5.5878e-02, 1.9989e-02, -1.2585e-01, 5.1636e-02, 1.7383e-01, + -1.0144e-01, -3.9612e-02, -1.9394e-02, -2.8336e-02, -1.4197e-01, + -1.4929e-01, -1.4331e-01, 1.1859e-01, 6.8481e-02, 1.0187e-01, + 1.1658e-01, 5.1147e-02, -7.5012e-02, 1.5649e-01, -1.5808e-01, + 1.5295e-01, -1.3879e-01, -8.2397e-02, -6.9824e-02, 1.0431e-01, + 2.1667e-01, -2.3041e-02, 9.5139e-03, -2.7863e-02, 1.4185e-01, + 1.6272e-01, -1.8848e-01, -6.3599e-02, -9.6497e-02, 2.0227e-01, + -1.6394e-01, 6.7017e-02, 1.5173e-01, -3.3783e-02, 1.5503e-01, + -9.6741e-02, -1.7725e-01, -2.0422e-01, -5.4352e-02, -2.7634e-02, + -1.5588e-01, 1.4612e-01, 6.7932e-02, 6.2927e-02, -8.1177e-02, + -3.8361e-02, -1.5472e-02, -2.8000e-02, 1.4233e-01, 9.8343e-03, + 1.1359e-01, -1.0986e-01, -1.6711e-01, -2.0020e-01, 1.5820e-01, + -1.4697e-01, -2.6505e-02, 1.4746e-01, 1.6516e-01, -2.7420e-02, + 1.1584e-01, -9.7961e-02, 8.9355e-02, 5.3528e-02, -3.0640e-02, + 1.0779e-01, -1.3928e-01, -3.9337e-02, 9.4177e-02, -9.7656e-02, + -1.7456e-01, 4.3274e-02, -1.3574e-01, -6.3843e-02, 1.3519e-02, + 1.5564e-01, 1.0779e-01, -3.2013e-02, -8.8074e-02, 1.6495e-02, + 2.0105e-01, -1.4966e-01, 8.1482e-02, -1.0805e-03, 1.8933e-01, + 9.0942e-02, -5.0934e-02, -1.3232e-01, 2.3669e-01, 1.5857e-01, + -7.8659e-03, -9.8145e-02, -2.0721e-02, 1.1163e-01, 1.2671e-01, + 1.2372e-01, 2.3083e-01, 1.8359e-01, -2.2925e-01, 4.1321e-02, + -1.1829e-01, -1.0571e-01, -1.0291e-01, -1.9531e-01, -8.3160e-03, + 6.5613e-02, -1.1700e-01, 7.0374e-02, -1.8311e-01, 1.5991e-01, + 2.8580e-02, -5.5809e-03, -1.0185e-03, -7.9529e-02, -2.3242e-01, + 4.9866e-02, 1.1224e-01, 2.1411e-01, 7.4524e-02, 2.9037e-02, + 1.2158e-01, -1.7532e-02, 4.8401e-02, -7.3242e-02, -1.2634e-01, + -1.4514e-01, 6.0944e-02, 7.9285e-02, 6.3660e-02, 6.2988e-02, + -3.4027e-02, -1.6675e-01, 1.4526e-01, -1.2054e-01, -3.8849e-02, + 1.4539e-01, -1.5491e-01, 2.1704e-01, -1.7700e-01, 2.5284e-02, + 8.9722e-03, 2.2937e-01, -1.5417e-01, -6.0669e-02, -1.3989e-01, + -7.7637e-02, -8.9111e-02, -8.1909e-02, -5.0201e-02, -7.2510e-02, + -8.9844e-02, 9.5398e-02, 1.3000e-01, -3.2684e-02, 1.9580e-01, + -1.4844e-01, -1.9104e-02, 4.3701e-02, 3.9734e-02, -9.6741e-02, + -1.0962e-01, 4.8431e-02, 1.9690e-01, -2.7756e-02, 1.5991e-01, + -7.5562e-02, 8.9294e-02, -1.7249e-01, 1.9373e-01, -1.5991e-01, + 9.1431e-02, -4.4647e-02, 1.9104e-02, -3.6591e-02, 1.2610e-01, + 1.1340e-01, -5.2338e-02, 1.3611e-01, 2.6779e-02, 7.7026e-02, + -1.3208e-01, 4.8126e-02, -7.7248e-03, -1.0803e-01, 2.1927e-02, + 1.9104e-02, 1.4935e-03, -2.2021e-01, -6.5674e-02, 1.7456e-01, + 3.1464e-02, 4.1779e-02, -7.9346e-02, 1.5149e-01, -8.9600e-02, + -1.4001e-01, 7.8491e-02, 9.1064e-02, -2.3022e-01, 1.0962e-01, + 1.2561e-01, -3.2806e-02, 7.1411e-02, 2.2107e-01, 6.4453e-02, + -5.3955e-02, 1.7712e-01, -1.9043e-01, 3.7170e-02, 7.6027e-03, + 2.0178e-01, -8.5693e-02, -1.7319e-02, 4.7379e-03, 1.3135e-01, + 1.2280e-01, -1.2152e-01, -5.5084e-02, -1.7859e-01, -7.8186e-02, + -3.0609e-02, -1.3196e-01, 1.1768e-01, -1.9995e-01, 7.0435e-02, + 1.8970e-01, 3.4943e-02, -1.0309e-01, -6.6406e-02, -1.6479e-01, + 9.5947e-02, -1.6479e-01, -6.7749e-02, -1.6327e-02, -2.1143e-01, + -1.1493e-01, -2.2009e-01, -1.3696e-01, 6.6040e-02, -2.4756e-01, + -7.2083e-02, 6.8665e-02, 2.4353e-02, -9.4177e-02, 1.6199e-01, + -4.8566e-04, -7.3853e-03, 6.0059e-02, 3.1708e-02, 1.1475e-01, + 1.5784e-01, -1.0327e-01, -5.5908e-02, 1.0773e-01, -6.5796e-02, + -8.8135e-02, 9.3750e-02, 5.8411e-02, 7.5256e-02, 5.3314e-02, + 5.5573e-02, -9.7717e-02, -3.7537e-02, -1.0382e-01, -2.1497e-01, + 1.0992e-01, 2.7237e-02, -3.6621e-02, 1.0443e-01, 4.6326e-02, + 1.8665e-01, 1.5149e-01, -1.6443e-01, -2.2293e-02, 4.4891e-02, + 1.3831e-01, -7.9834e-02, -1.6602e-01, -1.0413e-01, 1.4551e-01, + 2.4857e-02, 2.1619e-01, -3.0869e-02, 1.5637e-01, 2.2595e-01, + 2.6489e-01, 1.3574e-01, 6.9214e-02, -6.5063e-02, 6.5674e-02, + -1.8701e-01, 2.1591e-02, -1.8356e-02, 2.3755e-01, -1.3330e-01, + 2.3608e-01, 1.2390e-01, 6.3232e-02, 4.4495e-02, 9.2346e-02, + -2.1094e-01, 1.6357e-01, -3.6133e-02, -1.0950e-01, -7.8125e-02, + -1.8263e-03, 1.1737e-01, 9.1187e-02, -3.7498e-03, 2.4780e-02, + 1.7712e-01, -2.2949e-02, -4.4617e-02, 1.2292e-01, -2.5903e-01, + 1.0883e-01, -8.7463e-02, -1.7957e-01, -1.3660e-01, -1.5747e-01, + -1.5308e-01, -1.6882e-01, 1.2262e-01, -1.2756e-01, 1.8530e-01, + 1.5881e-01, 1.1377e-01, 8.3679e-02, -1.3367e-01, -1.0553e-01, + 9.5520e-02, 1.2903e-01, 2.4023e-01, -1.0144e-01, -1.6565e-01, + -8.4412e-02, 7.3120e-02, 2.3422e-02, 3.3966e-02, 1.1884e-01, + 1.5625e-01, -1.3562e-01, -1.8359e-01, 4.9286e-02, -6.8703e-03, + 4.0527e-02, 9.3018e-02, 2.2473e-01, 2.0813e-02, 5.7648e-02, + -1.1462e-01, 3.4595e-01, -1.0187e-01, -7.7637e-02, 4.7668e-02, + 2.3022e-01, 5.8960e-02, 3.5004e-02, 2.1509e-01, -9.2224e-02, + -2.1561e-02, -6.1493e-02, -8.3557e-02, -7.7026e-02, -5.3711e-02, + -5.2643e-02, 4.7035e-03, 4.9400e-03, -1.0541e-01, 1.6003e-01, + 1.1176e-01, -5.6061e-02, -1.3710e-02, 7.5500e-02, 6.3538e-02, + 1.3892e-01, -1.0809e-01, -3.7262e-02, 1.0521e-02, -1.6418e-01, + 8.0872e-02, -1.8250e-01, -1.7847e-01, -1.3806e-01, 6.9031e-02, + 7.8613e-02, 1.9177e-01, 2.2705e-01, 3.6869e-03, 6.1218e-02, + 1.7139e-01, -9.0271e-02, 6.0608e-02, 7.4036e-02, -1.3232e-01, + 9.5276e-02, 6.6284e-02, -5.0781e-02, 5.2490e-02, -7.6965e-02, + -2.0837e-01, -1.0034e-01, -1.8958e-01, -4.4922e-02, 1.7761e-01, + 6.3538e-02, 6.7871e-02, -3.6926e-02, -4.1008e-03, 7.0679e-02, + -5.2277e-02, 2.0203e-01, 3.3447e-02, -3.5339e-02, -7.2823e-03, + -3.6713e-02, 8.4167e-02, -8.8867e-02, 7.8918e-02, 1.6525e-02, + 1.9385e-01, 2.0496e-01, -9.1309e-02, 4.7684e-03, -1.2830e-01, + 3.9520e-03, -1.0559e-01, -1.2427e-01, 1.4722e-01, 2.3108e-01, + 2.5597e-03, -1.2164e-01, -1.4050e-01, 4.4403e-02, 1.9202e-01, + 7.7820e-02, -1.1420e-01, 8.2031e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([1.8626, 1.8031, 1.7707, 1.7813, 1.5411, 1.7607, 1.7641, 1.6653, 1.7079, + 1.7684, 1.8017, 1.6956, 1.6986, 1.7498, 1.8004, 1.8909, 1.7765, 1.8687, + 1.7344, 0.4985, 1.8175, 1.8176, 1.7591, 1.7257, 1.8502, 1.6815, 1.7383, + 1.7850, 1.8199, 1.7943, 1.8372, 1.6945, 1.7009, 1.7682, 1.7271, 1.7660, + 1.8474, 1.7567, 1.7404, 1.6637, 1.8308, 1.8491, 1.7334, 1.8106, 1.7488, + 1.6771, 1.7654, 1.7597, 1.8563, 1.7420, 1.8416, 1.8208, 1.7759, 1.7856, + 1.7080, 1.8179, 1.7241, 1.7123, 1.7897, 1.7672, 1.7761, 1.7262, 1.7635, + 1.7302, 1.8483, 1.7563, 1.7805, 1.7103, 1.6915, 1.6331, 1.7328, 1.7430, + 1.9799, 1.7362, 1.7595, 1.7817, 1.7687, 1.7316, 1.6897, 1.7418, 1.7713, + 1.7142, 1.8107, 1.7375, 1.7881, 1.7923, 1.8721, 1.7792, 1.7724, 1.6501, + 1.7684, 1.8037, 1.7338, 1.7793, 1.7959, 1.7426, 1.7526, 1.8015, 1.7506, + 1.7252, 1.8087, 1.8846, 1.7413, 1.7966, 1.6733, 1.7162, 1.7589, 1.7807, + 1.7870, 1.7457, 1.6903, 1.7602, 1.7180, 1.7813, 1.7669, 1.7081, 1.7036, + 1.7858, 1.8298, 1.6986, 1.7633, 1.7596, 1.7947, 1.7986, 1.7408, 1.7156, + 1.8134, 1.8334, 1.8831, 1.8223, 1.7169, 1.7277, 1.8390, 1.7366, 1.6804, + 1.8091, 1.7192, 1.7432, 1.7482, 1.7376, 1.7989, 1.7752, 1.7880, 1.6884, + 1.8083, 1.7428, 1.7844, 1.7537, 1.7240, 1.7806, 1.6277, 1.7218, 1.7583, + 1.8438, 1.7764, 1.4386, 1.7788, 1.7629, 1.7766, 1.7988, 1.5892, 1.7308, + 1.7457, 1.7244, 1.7396, 1.8274, 1.7685, 1.8633, 1.7943, 1.7235, 1.6937, + 1.7572, 1.8016, 1.8091, 1.8354, 1.6880, 1.8347, 1.7619, 1.7627, 1.7118, + 1.6843, 1.8207, 1.8057, 1.8163, 1.7315, 1.5833, 1.7522, 2.7548, 1.7724, + 1.6638, 1.8069, 1.7381, 1.7530, 1.8312, 1.7646, 1.8395, 1.7653, 1.6637, + 2.9893, 1.7513, 1.8372, 1.7216, 1.7038, 1.8960, 1.7074, 1.7235, 1.8619, + 1.7499, 1.7265, 1.7715, 1.7588, 1.6586, 1.7386, 1.6842, 1.7496, 1.7123, + 1.7183, 1.8547, 1.8007, 1.6818, 1.7232, 1.7186, 1.5766, 1.7863, 1.7755, + 1.7346, 1.7565, 1.8070, 1.6555, 1.7957, 1.8173, 1.7355, 1.8109, 1.7154, + 1.8207, 1.8189, 1.6928, 1.7726, 1.8410, 1.7976, 1.7274, 1.7988, 1.8094, + 1.7929, 1.7947, 1.6713, 1.7325, 1.6619, 1.8692, 1.8463, 1.7701, 1.8279, + 1.7421, 1.7823, 1.7778, 1.7596, 1.7339, 1.7867, 1.8402, 1.7847, 1.7104, + 1.7948, 1.7840, 1.6757, 1.7317, 1.7500, 1.8238, 1.8030, 1.7794, 1.7665, + 1.7595, 1.7266, 1.7846, 1.8269, 1.8003, 1.7487, 1.7192, 1.8082, 1.7012, + 1.7666, 1.7735, 1.7122, 1.7510, 1.8224, 1.7595, 1.7332, 1.8395, 1.7557, + 1.7576, 1.7263, 1.7551, 1.7655, 1.7280, 1.8227, 1.7688, 1.7970, 1.5910, + 1.8495, 1.8002, 1.8020, 1.7830, 1.7413, 1.8417, 1.7040, 1.8032, 1.7786, + 1.7579, 1.7294, 1.7574, 1.7677, 1.7913, 1.7839, 1.7360, 1.7482, 1.7686, + 1.7938, 1.7941, 1.7248, 1.7029, 1.7483, 1.7367, 1.6134, 1.7984, 1.8084, + 0.9421, 1.7933, 1.7893, 1.7963, 1.7915, 1.7544, 1.7983, 1.7982, 1.6628, + 1.7429, 1.7766, 1.7301, 1.7107, 1.7985, 1.7769, 1.7670, 1.8131, 1.6624, + 1.6756, 1.8100, 1.7682, 1.7307, 1.7146, 1.7440, 1.8184, 1.8075, 1.5879, + 1.7574, 1.8042, 1.8490, 1.8459, 1.7467, 1.8094, 1.7255, 1.7316, 1.7178, + 1.8016, 1.6997, 1.7831, 1.8231, 1.7265, 1.6869, 1.7430, 1.7517, 1.7799, + 1.7093, 1.7564, 1.7116, 1.7531, 1.7908, 1.8358, 1.8037, 1.7656, 1.8286, + 1.7787, 1.7869, 1.7289, 1.7703, 1.8054, 1.7791, 1.6605, 1.7075, 1.8083, + 1.7061, 1.7457, 1.7999, 1.6409, 1.8023, 1.7131, 1.7376, 1.7145, 1.6934, + 1.8714, 1.7531, 1.7853, 1.8259, 1.8281, 1.7963, 1.7558, 1.6699, 1.8304, + 1.7752, 1.8119, 1.7505, 1.7462, 1.7651, 1.7035, 1.7693, 1.7784, 1.8790, + 1.7997, 2.7079, 1.8050, 1.7434, 1.7304, 1.7496, 1.7413, 1.7786, 1.7676, + 1.7633, 1.8048, 1.7498, 1.5717, 1.7788, 1.7032, 1.7021, 1.7289, 1.7154, + 1.7574, 1.7357, 1.7384, 1.8404, 1.7523, 1.7456, 1.6581, 1.7213, 1.7779, + 1.8164, 1.7584, 1.8338, 1.7372, 1.7945, 1.7703, 1.7775, 1.7512, 1.7422, + 1.7518, 1.6974, 1.7084, 1.8513, 1.8187, 1.7494, 1.7221, 1.8195, 1.7825, + 1.7718, 1.7819, 1.7341, 1.8106, 1.7494, 1.7639, 1.7447, 1.7622, 1.7019, + 1.7713, 1.8107, 1.7751, 1.6831, 1.8983, 1.8854, 1.7562, 1.7641, 1.8450, + 1.6962, 1.7789, 1.7653, 1.8096, 1.7490, 1.7509, 1.8375, 1.8035, 1.7372, + 1.8059, 1.8244, 1.6840, 1.7538, 1.7917, 1.8426, 1.7746, 1.8419, 1.7937, + 1.7681, 1.8910, 1.7616, 1.7887, 1.6965, 1.7099, 1.7803, 1.7888, 1.8322, + 1.7731, 1.7062, 1.8104, 1.7924, 1.6989, 1.7703, 1.7718, 1.8199, 1.7933, + 1.7476, 1.7699, 1.7553, 1.7314, 1.7351, 1.8270, 1.7637, 1.7724, 1.6985, + 1.7598, 1.7834, 1.7270, 1.7643, 1.8691, 1.6961, 1.7836, 1.7457, 1.7214, + 1.7549, 1.7503, 1.7444, 1.7038, 1.7202, 1.7286, 1.7643, 1.8116, 1.8197, + 1.7559, 1.8472, 1.7803, 1.6730, 1.8114, 1.7312, 1.8234, 1.8250, 1.7086, + 1.7285, 1.8161, 1.7310, 1.7546, 1.7177, 1.7930, 1.8005, 1.8174, 1.6891, + 1.7722, 1.7018, 1.8242, 1.6458, 1.7265, 1.7661, 1.8259, 1.7962, 1.6896, + 1.7895, 1.7994, 1.7630, 1.7438, 1.7374, 1.7818, 1.7458, 1.7538, 1.7060, + 1.7827, 1.7179, 1.6832, 1.6815, 1.6348, 1.7162, 1.8015, 1.7629, 1.8582, + 1.7548, 1.8259, 1.7654, 1.7764, 1.8435, 1.7540, 1.8566, 1.8117, 1.8390, + 1.7786, 1.6708, 1.7558, 1.7757, 1.6846, 1.8093, 1.7874, 1.7895, 1.8258, + 1.7518, 1.7854, 1.7683, 1.8340, 1.8175, 1.7892, 1.7295, 1.7386, 1.7859, + 1.7975, 1.7490, 1.7827, 1.7883, 1.7735, 1.7281, 1.8743, 1.7000, 1.7167, + 1.7611, 1.7891, 1.7726, 1.7243, 1.7394, 1.7958, 1.6989, 1.8408, 1.7790, + 1.7038, 1.7744, 1.7268, 1.6995, 1.7691, 1.7200, 1.7585, 1.7669, 1.8629, + 1.7698, 1.8005, 1.8482, 1.6710, 1.7339, 1.5718, 1.8037, 1.7726, 1.7195, + 1.7267, 1.7365, 1.7477, 1.7643, 1.7557, 1.7137, 1.6893, 1.8008, 1.7865, + 1.7747, 1.7637, 1.7201, 1.6544, 1.7408, 1.6003, 1.7998, 1.7676, 1.7365, + 1.7003, 1.7455, 1.7690, 1.8151, 1.7240, 1.8377, 1.7835, 1.7003, 1.7420, + 1.8121, 1.7799, 1.7392, 1.7157, 1.7833, 1.8003, 0.4432, 1.7083, 1.8084, + 1.8351, 1.6847, 1.7720, 1.7214, 1.7016, 1.6804, 1.7575, 1.8147, 1.8383, + 1.8066, 1.7622, 1.7447, 1.8649, 1.6914, 1.7502, 1.7756, 1.8313, 1.7522, + 1.7012, 1.7607, 1.7849, 1.6833, 1.7358, 1.7736, 1.8525, 1.8686, 1.7100, + 1.8097, 1.8105, 1.7527, 1.7602, 1.7818, 1.7557, 1.7135, 1.7930, 1.7505, + 1.8375, 1.7433, 1.7744, 1.7698, 1.7585, 1.7979, 1.5292, 1.8328, 1.7411, + 1.7647, 1.8039, 1.7369, 1.7095, 1.7712, 1.7364, 1.7643, 1.7983, 1.8086, + 1.7453, 1.7678, 1.7649, 1.7738, 1.8089, 1.8234, 1.7720, 1.6807, 1.7681, + 1.6920, 1.7764, 1.8613, 1.8168, 1.6981, 1.7320, 1.8520, 1.7266, 1.7517, + 1.8211, 1.7164, 1.7532, 1.7514, 1.8598, 1.7562, 1.7970, 1.7824, 1.6946, + 1.7698, 1.7516, 1.7438], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 1.5714e-01, 1.0521e-01, -2.1855e-01, -3.6352e-02, 1.8207e-01, + 1.7251e-01, 1.3290e-01, 7.0782e-02, -1.9266e-01, -9.2168e-02, + 7.9184e-02, -5.8226e-02, -1.7935e-01, -2.4685e-02, -4.8018e-02, + -9.5424e-02, 2.4386e-02, -5.1330e-02, 3.3999e-02, 3.2014e+00, + -3.9015e-02, 1.1516e-01, 8.0055e-02, 9.1744e-02, 2.1027e-02, + 1.0418e-02, 7.2239e-02, -1.1568e-01, 3.2005e-03, -4.8645e-02, + 1.4117e-01, 7.1536e-02, -6.0574e-02, 1.8058e-01, 7.5935e-02, + -8.4233e-02, 2.0770e-02, -1.0301e-01, -1.0013e-01, 1.5660e-01, + 8.0592e-02, 1.2721e-01, -4.6441e-02, 5.7574e-02, 3.4196e-02, + -2.7855e-03, 1.1291e-01, -4.5029e-03, 4.9145e-02, 5.6455e-02, + 1.2398e-03, -2.5091e-02, 3.1097e-02, -9.4255e-03, 2.5114e-02, + 1.5999e-01, -4.6883e-02, -1.1271e-01, 4.5208e-02, 1.4600e-02, + 1.2989e-01, -1.0197e-01, 1.6056e-01, 8.7824e-02, 1.6033e-01, + 3.4396e-02, 1.3704e-01, 4.1646e-02, -5.3530e-02, 2.4990e-01, + 4.8798e-02, 4.4839e-02, 7.1403e-01, 1.1320e-01, -1.0711e-01, + 1.2396e-01, 1.3501e-01, -6.7984e-02, -8.9051e-03, 9.0761e-02, + 9.5781e-03, -5.4714e-02, -9.0754e-02, 1.3412e-01, -1.9785e-02, + 8.3251e-02, 9.4815e-02, 1.7186e-01, -3.4824e-02, 6.8364e-02, + -2.1312e-02, -4.3523e-02, 3.8732e-02, -6.0781e-03, -7.7346e-02, + 3.1005e-02, 6.6436e-02, -8.4234e-02, -1.1001e-01, 2.1268e-03, + -1.4562e-02, -4.7741e-02, -7.1506e-02, -1.0927e-01, 7.7507e-02, + -1.2546e-02, 1.1501e-01, -5.7693e-02, 5.3100e-02, -1.1730e-01, + -8.1036e-02, -1.0260e-01, -8.1279e-02, 9.5656e-02, 5.0190e-02, + 4.7565e-02, 5.3448e-03, 4.5320e-02, 1.9243e-02, 1.4295e-01, + -3.6603e-02, -3.0097e-01, -1.3997e-01, -6.3057e-02, -2.0284e-01, + -5.2843e-02, -2.4076e-01, 7.0576e-02, 1.0352e-01, -3.0233e-02, + -1.0235e-01, -5.7698e-03, -3.2386e-02, -5.5490e-02, -1.1519e-01, + 8.9923e-02, -7.0423e-02, -5.0296e-02, 1.8792e-02, -9.9913e-02, + -1.2686e-02, 7.4479e-03, 2.6728e-02, 1.4484e-02, 5.6417e-02, + -9.0091e-02, -7.0845e-03, 2.2955e-02, -1.0560e-01, 1.6517e-01, + 3.1767e-01, -1.0263e-01, -1.2595e-01, -6.4043e-02, -9.5674e-02, + -4.9398e-01, -3.6656e-02, -2.0442e-02, -6.8514e-03, 4.0622e-02, + 2.4586e-01, -1.6740e-01, -6.2760e-02, 8.0933e-03, -6.1926e-02, + 1.3707e-02, 3.3186e-03, 8.2177e-02, 5.3861e-02, 1.9900e-02, + -9.7574e-02, 8.0126e-02, 9.0824e-02, -5.0445e-02, -2.1681e-02, + -1.8436e-01, -9.9015e-02, 8.8390e-03, -4.9693e-02, 9.5410e-02, + -7.1627e-02, -4.8957e-02, 1.9217e-01, 1.0617e-01, 6.4168e-02, + -1.4703e-01, 9.9676e-02, 9.1445e-01, -1.4521e-01, -9.8065e-02, + -1.1537e-01, -1.0495e-01, 1.0574e-01, 1.5191e-01, -6.7199e-02, + -3.9562e-02, -2.5574e-02, -2.0092e-01, 5.2279e-01, 3.2230e-02, + -2.6505e-01, 6.5943e-02, 1.2653e-02, 3.2023e-02, 8.7500e-02, + 1.5057e-01, -2.0009e-01, 7.4692e-02, 1.0776e-01, -5.0648e-02, + 6.2093e-02, 3.3582e-01, -3.1111e-02, -9.1817e-02, 3.2584e-02, + -2.9921e-02, -1.3872e-02, 8.5125e-03, -9.8826e-02, -3.3480e-01, + 4.8171e-02, -1.2216e-02, 1.8550e-01, -2.2219e-02, -2.7731e-03, + -2.9300e-02, 6.2133e-02, 2.5511e-02, 1.6478e-01, -5.1522e-02, + -3.3598e-02, 2.7848e-02, 1.7212e-02, 4.1664e-02, -3.3681e-02, + -1.5164e-02, -1.1802e-02, 1.1873e-01, 1.4403e-01, -1.8655e-02, + 3.1523e-02, 1.0407e-01, -3.3324e-02, 1.0530e-01, -8.4562e-03, + 2.3715e-02, -8.4441e-03, -8.1965e-02, 4.1591e-02, -4.1058e-02, + -8.8389e-02, -7.8143e-02, 1.2788e-01, -1.1700e-01, 1.8904e-01, + -3.8862e-02, 3.0045e-02, -1.1734e-01, 7.6597e-02, 3.1204e-02, + -1.0797e-01, -8.3629e-02, 6.1842e-02, 1.9294e-02, -1.1804e-01, + 7.2849e-02, -2.0952e-02, 2.2642e-02, -6.2562e-02, -6.2156e-02, + 6.7846e-02, 9.7213e-02, 1.1358e-01, -1.4457e-01, -3.7113e-03, + 4.6414e-02, -2.3938e-02, 3.0535e-02, 1.3291e-01, -1.3570e-01, + 1.5219e-01, 2.9188e-01, -3.4406e-02, 4.5174e-02, 5.9539e-02, + 3.4760e-02, -7.6875e-04, -3.4697e-02, 1.7763e-01, -9.1222e-02, + -3.2008e-02, 6.8429e-02, 7.8547e-02, 3.7662e-02, 1.3554e-01, + -6.3083e-02, 3.0479e-01, -4.1982e-03, 4.5922e-02, 9.9895e-02, + -7.3645e-03, 1.2549e-02, -2.1003e-02, 2.0596e-01, 5.0304e-02, + -1.2328e-02, 1.4580e-02, -9.2944e-02, 6.6810e-02, -7.2437e-02, + 8.9594e-02, 5.3213e-02, -8.6073e-02, -1.5086e-01, 1.6785e-02, + -8.3192e-03, -1.9318e-02, -5.3071e-03, 2.6348e-01, -2.6442e-02, + -2.1526e-01, 2.4490e-01, -1.1710e-01, 2.5417e-02, -1.3422e+00, + -1.9219e-01, -2.5468e-02, -9.7910e-03, 1.0712e-03, -1.9965e-01, + 2.1429e-01, 2.8356e-03, 1.3562e-01, -7.5370e-02, 1.5262e-01, + 7.8208e-02, 1.3468e-02, 7.6906e-03, -3.6844e-02, -4.3252e-02, + -4.5571e-02, 6.0231e-02, 2.0208e-02, -9.7585e-02, 1.3374e-01, + -3.9385e-03, 8.0897e-02, 1.5606e-01, -1.0525e-02, -3.8102e-02, + -1.0328e-01, 5.2117e-02, -2.4488e-02, 1.3929e-01, -5.5941e-02, + -3.7997e-02, -1.4443e-02, -8.2470e-02, -5.2119e-02, -7.9846e-02, + 2.0052e-02, 8.7459e-05, -8.3340e-02, -1.1757e-01, -4.5237e-02, + 1.9323e-02, 2.7198e-02, 1.7889e-01, 8.7077e-02, 1.5148e-02, + 1.2188e-01, -1.0065e-02, -2.4437e-02, -7.9199e-03, -1.5123e-02, + 1.5032e-01, 1.0752e-01, 2.0341e-01, 3.4366e-02, -9.1777e-02, + -1.4547e-01, 1.8320e-01, 4.5751e-02, 3.7170e-02, -8.5500e-02, + -7.2542e-02, -3.7958e-02, -1.6933e-01, -9.1045e-02, -4.4125e-02, + -7.2687e-04, -3.1537e-02, 1.5086e-02, -4.8495e-02, 6.5625e-02, + -1.6750e-02, 1.9904e-03, -1.1916e-02, -7.7741e-02, 3.0378e-02, + 1.3298e-01, 1.6665e-02, 7.6148e-02, -1.4184e-02, -1.0865e-01, + -2.5864e-01, -9.4924e-02, 5.2374e-02, 4.8062e-02, 4.8401e-02, + -6.1411e-02, 5.2295e-02, 1.0746e-01, -2.6086e-02, -1.8869e-01, + -4.6206e-01, -2.0475e-03, 3.6519e-02, -1.5038e-01, -4.3278e-03, + 4.2967e-02, 1.3432e-02, 4.5682e-02, -8.6488e-02, -1.7540e-01, + 5.3118e-02, -2.6750e-01, -1.5931e-01, 1.2383e-01, 8.0944e-03, + -6.6257e-02, 4.0884e-02, 2.4966e-02, 9.8009e-02, 2.3656e-01, + -6.1301e-03, 3.3331e-02, 9.9844e-02, 1.9059e-01, -1.0462e-01, + 1.1752e-02, -5.6101e-02, -3.5903e-02, 1.7924e-01, 1.7801e-01, + -1.6374e-01, -2.3782e-02, -5.2861e-02, 9.9118e-02, -1.7693e-02, + -3.4212e-02, -3.9081e-02, -2.0267e-01, 1.2076e-01, 9.5575e-02, + -1.0296e-02, -2.5224e-04, -4.2425e-03, -6.1769e-02, -5.4251e-02, + -3.3280e-02, 1.0036e-01, -1.9815e-01, 5.9549e-02, -4.9488e-02, + -9.9051e-02, 7.7852e-02, -1.2828e-01, 1.6435e-01, 1.2599e-02, + -6.9440e-02, -1.6187e-01, -3.3820e-03, 9.6772e-02, 5.1878e-02, + 6.2215e-02, 2.1757e-01, -6.2182e-02, -1.7840e-03, 7.6085e-02, + 1.4915e-01, -5.7066e-02, 4.5222e-02, 2.0680e-01, -1.4390e-01, + 1.7874e-03, -3.0383e-02, 1.8139e-01, 6.8907e-02, -2.5350e-03, + 4.4395e-02, 4.1111e-02, -1.0086e-02, 2.7673e-02, -2.1411e-01, + -7.8112e-02, -1.3029e-01, 9.8681e-02, -9.1540e-02, 6.2187e-02, + 1.4301e-02, -5.2033e-02, 4.9276e-02, 3.3699e-03, -7.4605e-02, + 7.3420e-02, 4.7442e-02, 1.0370e-01, 6.9103e-02, 2.2118e-03, + -7.5054e-02, 2.4609e-03, -2.0037e-01, 9.2002e-02, -1.2598e-01, + -4.0373e-02, 4.0524e-02, 7.1912e-02, 2.4932e-02, 1.0932e-02, + 1.6149e-03, -2.0728e-02, 1.1106e-01, 1.1121e-02, -1.1926e-01, + 1.0912e-02, -3.5671e-02, -9.7672e-03, 2.8253e-02, -1.8453e-01, + 3.7205e-02, 5.7847e-02, 1.4349e-02, -1.2725e-01, -1.2037e-02, + -6.4877e-02, -1.7167e-02, 4.8023e-02, -3.2393e-02, -2.7004e-02, + -6.4756e-03, 7.6627e-02, -3.2907e-02, 9.5477e-02, 4.4782e-02, + 1.4996e-01, 1.3177e-01, 7.9663e-02, 1.2557e-01, 5.1729e-02, + -6.4316e-02, 1.2403e-01, -2.0048e-01, 1.1972e-01, -3.4580e-02, + 5.9819e-02, 1.1675e-01, -2.3962e-02, 7.6439e-03, 1.1763e-01, + -5.1537e-02, 1.5785e-01, 3.0192e-02, 1.2142e-01, 2.1545e-01, + 1.5338e-01, -1.1547e-02, 4.5611e-02, 8.2466e-02, 5.1706e-02, + 1.4715e-01, -6.1603e-03, 3.3504e-02, 2.1085e-02, 3.6481e-02, + 1.1234e-01, 8.1627e-03, 7.1839e-03, -2.7252e-02, -8.4993e-02, + -1.4174e-01, -9.3428e-02, 7.1645e-02, 8.5555e-02, 1.3293e-01, + -1.9959e-02, -1.6840e-02, -9.3083e-02, 9.5655e-02, 3.6715e-02, + -5.1703e-02, 9.1998e-02, 7.1881e-02, -1.0141e-01, 1.2980e-01, + -1.0584e-01, 8.7971e-02, 1.0475e-01, -8.8393e-02, 6.7205e-02, + -5.2151e-02, -1.4961e-01, -5.4824e-02, -5.6715e-02, -6.3596e-02, + -1.0653e-01, 1.2907e-02, 1.4339e-01, 1.5099e-01, 6.3067e-03, + 1.6588e-02, 8.9463e-02, -1.1750e-01, 1.6891e-02, -1.7042e-01, + -1.6635e-01, 5.1580e-02, 1.3655e-01, -2.4035e-02, -1.0723e-01, + 7.4677e-02, 2.4940e-02, 6.3164e-02, -2.1078e-02, 8.4296e-02, + -5.9920e-02, -4.0778e-02, 6.1650e-02, 7.4657e-02, -1.2583e-01, + 1.5846e-01, 5.1025e-03, 1.1199e-01, 1.7501e-01, 9.1035e-02, + 1.1224e-02, -5.2327e-03, 1.1513e-02, 5.1251e-02, -1.2470e-01, + -1.6167e-02, -1.0953e-01, 5.0011e-02, -8.7080e-02, 1.6923e-01, + 2.2695e-02, 3.1640e-02, 1.3928e-01, 1.0300e-01, 2.1505e-01, + 2.4769e-01, 1.1580e-02, -3.5025e-02, 1.4856e-01, -3.9221e-03, + 5.0325e-02, -1.7961e-01, -1.2315e-02, 1.3724e-01, 1.0216e-01, + -9.3912e-02, -2.9979e-02, -1.6285e-01, 7.8192e-03, 8.5767e-02, + -3.3118e-02, -2.1102e-01, -1.0094e-01, -3.0506e-02, -2.1837e-02, + -1.9102e-01, 8.7712e-02, 1.1036e-01, 6.0651e-03, 7.0093e-02, + 1.0984e-01, 9.9566e-03, -7.1757e-03, 4.3886e-02, 5.9905e-02, + 3.3323e-02, -3.7092e+00, 5.5709e-02, 7.9547e-02, 5.5805e-03, + -1.4747e-01, 1.5289e-02, -5.2988e-02, 3.3662e-02, 1.3538e-01, + -2.1147e-01, 7.8032e-02, 6.7690e-02, -3.3204e-02, -1.3344e-01, + 1.4210e-01, 4.8270e-02, -2.6500e-02, 1.0287e-01, -5.5116e-02, + 1.6589e-01, 1.1236e-01, -4.1125e-02, -5.9105e-02, -2.1212e-01, + 3.8394e-02, 1.5368e-01, 4.8188e-02, -9.4478e-02, 2.1040e-02, + -9.0515e-02, 1.4378e-01, 1.5708e-01, -6.8028e-04, 4.4458e-02, + -7.1579e-03, 6.5651e-02, -1.2955e-02, -8.5481e-02, -4.0749e-02, + -8.3394e-02, 3.5046e-02, 1.2930e-02, -1.7214e-01, 9.5684e-02, + -4.2899e-02, -2.3204e-01, 1.1294e-01, -4.2825e-03, 1.6472e-01, + 1.0376e-02, 9.8266e-02, 1.3218e-01, 1.2411e-01, -3.2590e-02, + 3.2784e-02, -8.1404e-02, 6.8053e-02, -7.2287e-02, -9.7887e-02, + 3.1597e-02, -1.8775e-01, -1.8899e-01, 6.3650e-02, -6.3658e-02, + -5.6036e-02, -1.1804e-01, 3.8965e-02, 1.1923e-01, -3.0770e-02, + -6.4427e-03, -4.9890e-02, -9.9548e-02, 9.1358e-03, 1.5483e-01, + 3.1754e-02, 7.0219e-02, 8.9523e-02, -3.5334e-02, -1.6340e-01, + -2.0951e-02, 7.2451e-02, 1.5202e-01, -1.3042e-01, -1.3775e-01, + -7.0584e-02, 1.7459e-01, -1.6711e-03], device='cuda:1', + requires_grad=True) +torch.Size([3072, 768]) +Parameter containing: +tensor([[-0.0193, 0.0156, -0.0011, ..., 0.0403, -0.0181, -0.0006], + [-0.0301, 0.0043, -0.0099, ..., -0.0052, 0.0136, -0.0069], + [-0.0016, 0.0183, 0.0016, ..., -0.0064, -0.0117, 0.0118], + ..., + [ 0.0070, 0.0163, -0.0119, ..., 0.0026, -0.0226, 0.0221], + [ 0.0004, 0.0120, 0.0207, ..., -0.0106, 0.0029, 0.0323], + [ 0.0079, -0.0005, 0.0047, ..., -0.0068, -0.0219, 0.0219]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.3269, -0.2380, -0.3926, ..., -0.2299, 0.2595, -0.2932], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 3072]) +Parameter containing: +tensor([[-0.0171, 0.0118, 0.0016, ..., -0.0016, 0.0133, -0.0409], + [-0.0224, -0.0010, -0.0217, ..., 0.0149, 0.0045, -0.0212], + [ 0.0286, 0.0206, -0.0153, ..., -0.0103, -0.0133, -0.0120], + ..., + [-0.0076, -0.0130, 0.0111, ..., 0.0085, -0.0125, 0.0113], + [-0.0247, -0.0079, 0.0172, ..., 0.0136, -0.0062, -0.0172], + [ 0.0397, -0.0172, -0.0138, ..., 0.0265, 0.0010, 0.0029]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([-3.6469e-02, 1.6272e-01, -6.0242e-02, -9.7885e-03, -4.4312e-02, + 1.1780e-01, 9.4238e-02, -1.3342e-01, 3.6133e-02, 2.9022e-02, + 2.3518e-03, 5.4352e-02, -2.9572e-02, -6.2134e-02, 7.6782e-02, + 9.3460e-03, -1.0315e-01, 4.0710e-02, 3.5400e-02, -3.4863e-01, + 3.7628e-02, 2.0355e-02, 5.0323e-02, 5.9967e-02, 5.8136e-02, + 6.6589e-02, -3.6346e-02, -2.6657e-02, -1.1108e-01, 5.0079e-02, + 8.3801e-02, -1.4366e-02, -5.9387e-02, 1.3708e-01, 4.3335e-02, + -1.9272e-02, -1.2573e-01, -6.5674e-02, 4.9591e-02, 2.5781e-01, + 2.0385e-04, 2.4597e-02, -3.5156e-02, 2.2919e-02, -5.5237e-02, + 1.8005e-02, 1.1578e-01, -4.3335e-02, -7.9468e-02, -8.9050e-02, + -4.0131e-02, 5.1575e-02, -4.7569e-03, 1.1786e-01, -7.7698e-02, + 9.4681e-03, -1.0657e-01, -5.6641e-02, 5.4321e-02, -5.0873e-02, + -1.8738e-02, -1.2428e-02, -9.0759e-02, -8.9050e-02, -5.0110e-02, + 4.4586e-02, -5.6183e-02, -5.2124e-02, -5.9547e-03, -4.5197e-02, + 7.0312e-02, 1.6470e-03, -1.6479e-01, -2.2324e-02, 2.2812e-02, + -1.2695e-02, -2.7435e-02, 1.3855e-01, 3.5675e-02, -5.5351e-03, + 1.8173e-02, 2.0924e-03, -5.3986e-02, -8.4412e-02, -2.4002e-02, + -6.5918e-02, 1.0815e-01, -1.7395e-01, -1.6382e-01, 8.5640e-04, + -1.9653e-01, -1.1774e-01, 4.2114e-02, 8.8379e-02, 7.2815e-02, + 3.2101e-03, 1.5030e-02, 1.0992e-01, -5.3986e-02, 7.1533e-02, + 5.2582e-02, -1.2061e-01, -4.5990e-02, 1.1414e-01, -7.8918e-02, + -3.4332e-02, 9.1248e-02, -3.1128e-02, 4.1901e-02, -3.0273e-02, + 7.7393e-02, -3.4180e-02, -7.3914e-02, 1.8982e-02, 6.4583e-03, + 9.8450e-02, -2.0462e-02, 8.6426e-02, -8.2397e-02, -8.6975e-02, + 5.4932e-02, -1.2383e-02, -1.1955e-02, 2.8122e-02, -1.9211e-02, + -7.6660e-02, -3.1738e-02, -2.9907e-02, 2.9449e-02, 1.2463e-01, + -2.1194e-02, 3.9032e-02, -5.9479e-02, -8.0994e-02, -7.1533e-02, + 5.0446e-02, -7.1831e-03, -7.8613e-02, 6.6284e-02, 4.1626e-02, + -9.6619e-02, 3.5797e-02, -7.1838e-02, -2.2858e-02, -6.3354e-02, + 4.9255e-02, -5.7800e-02, -5.3650e-02, 1.2215e-02, 9.1003e-02, + 6.5002e-02, -8.4473e-02, 5.0323e-02, 3.8177e-02, -9.4055e-02, + 2.5436e-02, -7.3792e-02, 1.0272e-01, -1.3989e-01, -4.0100e-02, + -2.4506e-02, -3.8269e-02, -1.7441e-02, -8.9722e-02, -1.0199e-01, + 5.2490e-02, 1.0254e-01, -4.8920e-02, 4.8828e-02, 7.8354e-03, + 2.0706e-02, -4.4525e-02, 2.4658e-02, -1.4233e-01, -1.1957e-01, + -3.5522e-02, -1.8994e-01, -1.1090e-01, 8.0688e-02, 6.3293e-02, + -6.2744e-02, -1.0913e-01, -1.6809e-01, -5.3024e-03, 1.7517e-02, + -1.7670e-02, -9.7046e-02, -2.3340e-01, 1.5625e-02, -9.2840e-04, + -1.7593e-02, 3.4454e-02, 1.8631e-02, -5.7312e-02, 6.7017e-02, + 4.2633e-02, 7.8186e-02, -5.2277e-02, 3.5248e-02, -2.8793e-02, + 2.9421e-04, -3.2806e-02, 4.8004e-02, -8.1909e-02, 1.6434e-02, + -1.1469e-01, 1.3763e-02, -8.5510e-02, -1.5454e-01, 7.0435e-02, + 4.8462e-02, -5.6183e-02, -2.0340e-02, 8.4290e-02, 4.1321e-02, + 9.0393e-02, 5.2094e-02, -3.8727e-02, 2.0203e-01, 1.1884e-01, + -2.3590e-02, 1.0632e-01, 2.6199e-02, -1.1391e-02, -1.5572e-02, + 1.0620e-01, -6.2408e-02, 6.1615e-02, 3.4149e-02, 4.5410e-02, + -7.7393e-02, -9.2896e-02, 3.4943e-02, -1.6205e-02, 4.5502e-02, + -8.2947e-02, 2.9266e-02, -7.4120e-03, -6.8398e-03, 1.2091e-01, + 1.1719e-01, -3.0045e-02, -3.6316e-02, 3.4210e-02, -3.4821e-02, + -9.2834e-02, 5.9753e-02, -7.4524e-02, -4.0802e-02, 1.2146e-02, + -2.7390e-02, -4.1107e-02, -3.3539e-02, -6.5063e-02, 1.2199e-02, + 9.4971e-02, -8.0933e-02, 1.7262e-03, -2.5940e-02, 1.6602e-02, + -1.1609e-01, -5.7434e-02, -1.1743e-01, 5.5809e-03, -8.1421e-02, + -1.6357e-01, 2.0802e-04, 6.3354e-02, 2.3861e-03, 3.3966e-02, + 1.7627e-01, -1.3269e-01, 2.1301e-02, 8.4763e-03, 4.2053e-02, + -5.5695e-02, 1.4355e-01, -2.0874e-02, -8.5999e-02, -1.7105e-02, + 7.0618e-02, 2.7496e-02, -1.4026e-01, -2.2705e-02, -8.4534e-02, + 5.2612e-02, -1.0693e-01, 1.2201e-01, -4.8218e-03, 5.1270e-02, + -2.8778e-02, 4.6417e-02, -2.6276e-02, -1.0938e-01, -4.6814e-02, + 4.5929e-02, -1.0254e-02, 8.2214e-02, -5.8228e-02, 9.7656e-02, + 1.7578e-02, 1.2866e-01, 1.9958e-02, 1.3049e-01, -9.2224e-02, + -9.8572e-03, -1.0309e-01, -1.1932e-02, -3.8891e-03, 5.7220e-02, + 2.9099e-02, -5.8716e-02, 3.4912e-02, -4.6539e-02, 3.0609e-02, + -4.7028e-02, -6.7932e-02, 5.2795e-02, -6.7825e-03, 4.4159e-02, + -5.0171e-02, -7.4951e-02, 1.3069e-02, 4.0924e-02, 9.9463e-01, + -1.9760e-02, 7.8918e-02, -1.3257e-01, -4.0253e-02, -5.2856e-02, + -3.4088e-02, 9.1324e-03, -8.1406e-03, 1.1497e-02, 3.5767e-02, + -1.1243e-01, -7.6050e-02, 4.4586e-02, 6.6345e-02, -6.4880e-02, + 2.9888e-03, 9.5337e-02, 1.3599e-01, -1.0956e-01, 5.1697e-02, + 7.1045e-02, 3.2845e-03, 2.7924e-02, -5.4321e-02, -7.1411e-02, + 1.7236e-01, -6.8176e-02, -3.7659e-02, -2.7985e-02, 7.5607e-03, + -1.8616e-02, -8.5678e-03, -4.1016e-02, -2.7161e-02, 1.7371e-01, + -9.4604e-02, -2.1423e-02, -9.6497e-02, 1.0547e-01, -1.2134e-01, + 6.7139e-03, -1.1578e-01, -7.1167e-02, -9.4543e-02, -2.4414e-02, + 3.5763e-03, 2.0584e-02, 1.1566e-01, -2.4231e-02, 4.9744e-02, + -1.6876e-02, 7.9041e-02, -2.6382e-02, 1.2622e-01, -1.6693e-02, + 7.6355e-02, 1.0046e-01, 4.1771e-03, -2.0605e-01, 2.7054e-02, + 4.7913e-02, 1.8906e-02, -6.1096e-02, 6.4026e-02, -3.7659e-02, + -4.2145e-02, 1.6931e-01, 8.7646e-02, -7.4890e-02, 3.7079e-02, + -1.6769e-02, -1.0052e-01, -1.1389e-01, 1.5393e-01, -1.6556e-02, + -8.6975e-02, -6.1737e-02, -1.2402e-01, -1.3084e-02, -2.8839e-03, + -1.0582e-02, -4.9530e-02, 5.4741e-03, 4.7333e-02, -3.1342e-02, + 1.1877e-01, 5.7068e-02, 6.1859e-02, 6.5041e-03, 1.8188e-01, + 6.9092e-01, -2.3331e-02, -5.5878e-02, 3.7659e-02, -1.0307e-02, + -3.2410e-02, 8.8745e-02, -8.8867e-02, -1.4648e-01, -2.7161e-02, + 8.2947e-02, 1.4282e-01, 9.1370e-02, -1.0574e-02, 4.1107e-02, + 1.4062e-01, 1.1792e-01, 1.1810e-02, -9.7290e-02, 5.8228e-02, + 1.3892e-01, -9.9976e-02, 2.5116e-02, 5.3009e-02, -3.7659e-02, + 9.7656e-02, -2.5070e-02, 3.4424e-02, -1.7197e-02, 3.8849e-02, + -1.0315e-01, -8.4961e-02, 5.0842e-02, 4.2175e-02, 5.7770e-02, + -1.8555e-02, -4.3579e-02, -5.0446e-02, 6.9946e-02, -1.0941e-02, + 4.6143e-02, -3.8208e-02, 9.8877e-02, 4.0833e-02, 9.1492e-02, + 2.1210e-02, -7.8125e-02, -7.5378e-02, -1.0114e-01, -4.9042e-02, + 6.9153e-02, 9.6512e-03, 1.8164e-01, 1.7731e-02, 4.1565e-02, + -2.4463e-01, 1.7380e-02, -1.4294e-01, -3.6163e-02, 3.0518e-04, + -1.7105e-02, 1.0443e-01, 6.5308e-03, 8.9905e-02, -6.4331e-02, + -2.5452e-02, -1.5427e-02, -1.6614e-01, -7.2937e-02, 8.9844e-02, + -1.0754e-01, 1.5839e-02, 7.0923e-02, 5.2460e-02, -1.1395e-01, + 5.6427e-02, -3.1311e-02, 9.8450e-02, 1.9730e-02, 3.8934e-04, + 1.5717e-02, -1.4062e-01, -5.0781e-02, 3.6804e-02, -5.0934e-02, + 4.3274e-02, 6.4514e-02, -6.1340e-02, -4.4342e-02, 7.7454e-02, + 3.7567e-02, 2.5925e-02, 1.0706e-01, -5.6976e-02, 1.3954e-02, + -3.6407e-02, -6.9824e-02, -9.9304e-02, 1.1206e-03, -3.4389e-03, + -4.3121e-02, -9.1614e-02, -9.4543e-02, 1.6876e-02, -6.6284e-02, + 9.6497e-02, -7.1594e-02, -4.3732e-02, 9.7839e-02, -1.1487e-01, + -8.4167e-02, -4.0497e-02, 1.3863e-02, 5.8670e-03, 1.7347e-03, + 7.2937e-02, -8.8684e-02, -3.0785e-03, 2.5955e-02, 3.1860e-02, + -9.8572e-03, 7.8369e-02, 3.5614e-02, -4.3091e-02, 4.0100e-02, + 3.4943e-02, 3.1525e-02, 3.8330e-02, -2.8000e-02, -2.6703e-02, + 1.0162e-01, 7.0801e-02, 1.5979e-01, -1.8738e-02, -5.5176e-02, + -6.5369e-02, -5.6244e-02, 1.6575e-03, -1.1670e-01, 6.4148e-02, + 5.0293e-02, 6.1066e-02, -1.0155e-02, -1.2354e-01, 2.2614e-02, + 4.6417e-02, -4.5593e-02, -9.8328e-02, 2.4460e-02, -2.4521e-02, + 3.0594e-02, 4.7913e-02, -5.8441e-02, -1.0048e-02, -7.2823e-03, + 9.5459e-02, -6.4735e-03, -5.1453e-02, 4.0863e-02, 1.2039e-02, + 2.3270e-02, 2.8336e-02, 1.0883e-01, -1.7624e-02, -1.7532e-02, + 3.1891e-02, -7.6904e-02, 4.4441e-03, -9.7046e-02, -3.0914e-02, + 1.3321e-02, -4.2572e-02, 6.9458e-02, 2.3178e-02, 4.8828e-02, + 4.7943e-02, 8.3466e-03, -5.9906e-02, -5.3894e-02, -6.7444e-02, + -7.5867e-02, -5.9174e-02, -1.2718e-02, 2.3300e-02, 4.2236e-02, + 1.3599e-01, 9.7885e-03, -1.0962e-01, 3.4668e-02, -4.9713e-02, + 3.2783e-05, 4.0741e-02, 5.3436e-02, 9.0210e-02, 8.6914e-02, + 7.8857e-02, 1.4172e-01, 2.4078e-02, 1.0687e-01, -6.6589e-02, + -1.5771e-01, -8.2703e-02, 9.8694e-02, -4.8904e-03, -4.0649e-02, + -1.0876e-01, -2.2232e-02, 5.2094e-02, -4.8370e-02, 7.5439e-02, + 1.2535e-02, -6.5674e-02, 2.8503e-02, -2.5620e-02, 1.3664e-02, + 6.7993e-02, -1.0150e-01, -7.6599e-02, 1.1528e-02, 3.7231e-02, + -8.5205e-02, 2.2018e-02, -5.7281e-02, -1.9135e-02, 4.3518e-02, + 8.1604e-02, -1.0181e-01, -3.8147e-02, 1.2436e-02, -8.5632e-02, + 6.4636e-02, -4.8584e-02, 6.6772e-02, -5.1300e-02, 2.3651e-02, + 1.0858e-01, -1.1981e-01, 3.7632e-03, 5.8533e-02, -3.1342e-02, + 1.3403e-01, -9.2773e-02, 9.3323e-02, -2.6443e-02, 8.2092e-02, + -5.4688e-02, 1.0303e-01, -7.1960e-02, -1.4185e-01, -1.2537e-01, + -1.2802e-02, -1.9669e-02, 6.4880e-02, 4.2358e-02, -4.2839e-03, + 1.2189e-01, -6.2828e-03, 3.8605e-02, -1.5038e-02, -9.3140e-02, + 4.7028e-02, 6.0883e-02, -4.2938e-02, 5.2071e-04, 2.1610e-03, + 8.2397e-02, 2.5977e-01, -3.5496e-03, 9.6497e-02, 8.2703e-02, + 5.9357e-02, -1.5783e-03, -8.3984e-02, 3.4790e-02, -5.7556e-02, + -4.6875e-02, 3.5143e-04, 1.0303e-01, 1.6235e-02, -3.5797e-02, + 7.6355e-02, 2.5269e-02, 4.0283e-02, 3.3722e-02, 5.1483e-02, + 8.1909e-02, 4.9896e-02, -6.7932e-02, -1.2335e-01, -6.5613e-02, + 1.2131e-02, -2.9205e-02, -9.3140e-02, -2.8885e-02, 2.0538e-02, + 9.0698e-02, -5.4443e-02, 8.4900e-02, 2.9633e-02, 5.1758e-02, + 1.3763e-02, 6.9519e-02, 6.0608e-02, 1.0583e-01, -2.4429e-02, + -7.5073e-02, -3.4241e-02, 9.9915e-02, -6.2347e-02, 4.7493e-03, + 3.2978e-03, 2.6535e-02, 1.1444e-04, -2.0798e-02, 7.9834e-02, + -4.9347e-02, -1.0492e-01, 1.5045e-02, -1.1493e-01, -2.9953e-02, + -1.4519e-02, -2.4918e-02, 3.1372e-02, 6.4209e-02, 1.0780e-02, + -5.0507e-02, -1.4931e-02, -1.0931e-01, 7.7019e-03, -1.9385e-01, + 4.1168e-02, -5.6366e-02, -7.5562e-02, -6.1890e-02, -1.1023e-01, + 2.9526e-02, 9.0515e-02, -3.5744e-03, 4.0466e-02, 4.5441e-02, + 3.4576e-02, 5.6793e-02, -1.0602e-01, -1.1299e-02, 5.1208e-02, + -4.1046e-02, 9.7198e-03, -6.1462e-02, -1.0052e-01, 8.4076e-03, + -9.4604e-02, 9.8953e-03, 6.1554e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([2.2836, 2.2623, 2.2728, 2.2751, 2.2677, 2.4896, 2.4848, 2.3706, 2.5277, + 2.2358, 2.3212, 2.3091, 2.3751, 2.3409, 2.3887, 2.2233, 2.3059, 2.3760, + 2.3349, 2.7988, 2.3484, 2.3604, 2.3025, 2.4139, 2.2065, 2.5094, 2.4309, + 2.4903, 2.1988, 2.2597, 2.3186, 2.3134, 2.3185, 2.6018, 2.2067, 2.2691, + 2.3309, 2.2402, 2.2445, 2.7682, 2.2321, 2.3523, 2.3791, 2.2468, 2.3558, + 2.3014, 2.3595, 2.3211, 2.3757, 2.1141, 2.2303, 2.4424, 2.4205, 2.3217, + 2.3337, 2.3350, 2.3622, 2.3076, 2.3821, 2.2884, 2.4378, 2.3676, 2.3492, + 2.2260, 2.3221, 2.2602, 2.3660, 2.3384, 2.4014, 2.7936, 2.4196, 2.3274, + 2.3671, 2.5887, 2.2795, 2.3208, 2.2872, 2.2780, 2.1992, 2.4337, 2.2422, + 2.2682, 2.2171, 2.3948, 2.2670, 2.2343, 2.2981, 2.2693, 2.2724, 2.2160, + 2.3139, 2.3121, 2.3384, 2.4798, 2.3535, 2.2585, 2.2772, 2.3278, 2.3495, + 2.3174, 2.4153, 2.5859, 2.3195, 2.4833, 2.2640, 2.2159, 2.3548, 2.2305, + 2.3506, 2.3109, 2.2594, 2.3875, 2.4088, 2.4293, 2.4857, 2.2573, 2.3575, + 2.3645, 2.4322, 2.2848, 2.3235, 2.3897, 2.2430, 2.3598, 2.3608, 2.1504, + 2.5151, 2.5464, 2.3191, 2.3495, 2.1983, 2.2918, 2.4158, 2.3904, 2.4342, + 2.4049, 2.3032, 2.5024, 2.2924, 2.5428, 2.4114, 2.3645, 2.3113, 2.3470, + 2.2809, 2.2530, 2.2246, 2.3749, 2.3138, 2.5595, 2.3965, 2.3072, 2.4053, + 2.4625, 2.3788, 2.6980, 2.3840, 2.3440, 2.2617, 2.3744, 2.1746, 2.3023, + 2.2086, 2.4053, 2.4884, 2.2782, 2.2567, 2.5410, 2.3193, 2.3158, 2.3928, + 2.4509, 2.0716, 2.5628, 2.2551, 2.4581, 2.3956, 2.2217, 2.2878, 2.3877, + 2.3486, 2.2614, 2.3617, 2.2922, 2.2638, 2.4065, 2.2689, 1.0988, 2.3420, + 2.3585, 2.2917, 2.3827, 2.3319, 2.3020, 2.3903, 2.2736, 2.2319, 2.3806, + 1.3342, 2.3759, 2.3139, 2.2133, 2.2983, 2.4403, 2.3521, 2.4802, 2.5205, + 2.4101, 2.4336, 2.3705, 2.3335, 2.3805, 2.5011, 2.1977, 2.2888, 2.4008, + 2.2075, 2.4126, 2.3203, 2.4452, 2.4104, 2.2892, 2.4073, 2.3455, 2.3511, + 2.3958, 2.4004, 2.3555, 2.2310, 2.3726, 2.2519, 2.2059, 2.2110, 2.2125, + 2.1648, 2.4104, 2.3876, 2.2703, 2.3979, 2.2819, 2.3374, 2.3302, 2.2354, + 2.3461, 2.2734, 2.3462, 2.5224, 2.4833, 2.3291, 2.3517, 2.2777, 2.3085, + 2.3876, 2.3782, 2.3474, 2.3451, 2.4248, 2.3716, 2.6404, 2.3153, 2.3406, + 2.3860, 2.3607, 2.3962, 2.3189, 2.2672, 2.3711, 2.3791, 2.2194, 2.2803, + 2.4083, 2.2761, 2.4173, 2.2845, 2.3444, 2.5596, 2.4811, 2.4737, 2.4499, + 2.4351, 2.3279, 2.7294, 2.2321, 2.3936, 2.2265, 2.3656, 2.2984, 2.2823, + 2.5139, 2.2303, 2.2668, 2.2091, 2.2940, 2.3453, 2.3100, 2.5271, 2.5363, + 2.3898, 2.2919, 2.4632, 2.3279, 2.5609, 2.3796, 2.3439, 2.2990, 2.1915, + 2.4004, 2.2724, 2.3014, 2.3202, 2.2661, 2.4051, 2.3114, 2.4008, 2.2553, + 2.4169, 2.3254, 2.3748, 2.4071, 2.4148, 2.3257, 2.2542, 2.2565, 2.3255, + 1.0368, 2.4136, 2.4344, 2.2713, 2.3418, 2.3693, 2.3610, 2.3290, 2.2906, + 2.3320, 2.4559, 2.2263, 2.3971, 2.2848, 2.2835, 2.3296, 2.3135, 2.3973, + 2.4620, 2.3723, 2.3816, 2.4425, 2.4960, 2.3998, 2.3360, 2.3177, 2.1258, + 2.3319, 2.4550, 2.2678, 2.4137, 2.3452, 2.5231, 2.4271, 2.2389, 2.2411, + 2.3888, 2.3768, 2.4222, 2.3935, 2.3259, 2.3471, 2.3140, 2.4708, 2.3969, + 2.3159, 2.4021, 2.5023, 2.3574, 2.2141, 2.4547, 2.3712, 2.2649, 2.3120, + 2.3954, 2.2454, 2.3630, 2.3928, 2.4318, 2.3398, 2.3917, 2.4447, 2.2607, + 2.3408, 2.3916, 2.3773, 2.3499, 2.4071, 2.3030, 2.4019, 2.2296, 2.4911, + 2.3300, 2.3643, 2.4153, 2.2525, 2.4206, 2.2427, 2.4401, 2.3681, 2.3906, + 2.4328, 2.2356, 2.2297, 2.3566, 2.2836, 2.2859, 2.5026, 2.3523, 2.4348, + 2.6588, 3.7533, 2.3396, 2.2670, 2.2848, 2.4338, 2.3247, 2.2380, 2.2849, + 2.3591, 2.4750, 2.3341, 2.2376, 2.3901, 2.6482, 2.2936, 2.4247, 2.4099, + 2.4332, 2.4702, 2.4999, 2.5136, 2.2909, 2.2548, 2.3399, 2.4379, 2.3890, + 2.1797, 2.2449, 2.4876, 2.4356, 2.2469, 2.3131, 2.5125, 2.3290, 2.3605, + 2.3724, 2.3760, 2.4266, 2.4542, 2.3139, 2.3399, 2.3044, 2.3643, 2.3527, + 2.2571, 2.3809, 2.3492, 2.3256, 2.3224, 2.2221, 2.3733, 2.3437, 2.1776, + 2.3822, 2.3127, 2.3458, 2.4210, 2.4154, 2.3127, 2.5117, 2.3292, 2.3855, + 2.3198, 2.4858, 2.2322, 2.3392, 2.1891, 2.4567, 2.4760, 2.2336, 2.4192, + 2.2877, 2.2794, 2.3042, 2.2707, 2.3126, 2.2663, 2.4194, 2.3827, 2.2848, + 2.2669, 2.3965, 2.3590, 2.2713, 2.3002, 2.3128, 2.3614, 2.3517, 2.3291, + 2.5153, 2.2987, 2.3548, 2.3768, 2.3937, 2.2768, 2.4841, 2.3234, 2.1443, + 2.2820, 2.2877, 2.2695, 2.4717, 2.3183, 2.2493, 2.6564, 2.2307, 2.3746, + 2.3745, 2.3673, 2.2790, 2.3275, 2.3607, 2.3602, 2.3226, 2.5189, 2.3797, + 2.3876, 2.2800, 2.4039, 2.2463, 2.3176, 2.3052, 2.4909, 2.5803, 2.4538, + 2.3132, 2.3479, 2.4377, 2.3054, 2.5416, 2.4145, 2.2907, 2.3398, 2.4638, + 2.3699, 2.3819, 2.2982, 2.4746, 2.5282, 2.3545, 2.3307, 2.3648, 2.3536, + 2.3116, 2.3079, 2.3169, 2.6240, 2.2975, 2.3685, 2.4311, 2.3693, 2.4842, + 2.3711, 2.3410, 2.5237, 2.3753, 2.3804, 2.1967, 2.4785, 2.3339, 2.3392, + 2.4601, 2.2095, 2.4320, 2.2722, 2.2876, 2.3902, 2.3450, 2.3190, 2.3629, + 2.3394, 2.3433, 2.3516, 2.2867, 2.3276, 2.3313, 2.3305, 2.3285, 2.5351, + 2.4824, 2.2602, 2.3945, 2.3668, 2.3317, 2.1739, 2.2305, 2.4108, 2.3467, + 2.2079, 2.4747, 2.2943, 2.3790, 2.6729, 2.5252, 2.3475, 2.3181, 2.2528, + 2.2801, 2.3034, 2.2967, 2.6657, 2.4441, 2.3536, 2.4201, 2.3460, 2.4714, + 2.2284, 2.4259, 2.3764, 2.4271, 2.5601, 2.4350, 2.3774, 2.3004, 2.3895, + 2.4669, 2.4508, 2.3371, 2.3227, 2.5032, 2.3196, 2.3379, 2.3778, 2.3761, + 2.1820, 2.3314, 2.1958, 2.2008, 2.4098, 2.5979, 2.2799, 2.3461, 2.4614, + 2.3359, 2.3336, 2.6203, 2.4267, 2.2657, 2.3032, 2.3602, 2.2581, 2.4675, + 2.2453, 2.3804, 2.2823, 2.1461, 2.2759, 2.5891, 2.2736, 2.2670, 2.2639, + 2.2100, 2.3349, 2.5720, 2.3157, 2.2162, 2.3284, 2.3788, 2.2535, 2.2463, + 2.3431, 2.4069, 2.3624, 2.4723, 2.2752, 2.4635, 2.9034, 2.2371, 2.4090, + 2.2496, 2.3304, 2.3411, 2.2727, 2.4774, 2.3166, 2.3610, 2.3738, 2.3590, + 2.3766, 2.2283, 2.3738, 2.3774, 2.3004, 2.4230, 2.3889, 2.3237, 2.2233, + 2.4146, 2.3798, 2.4000, 2.4473, 2.4295, 2.2930, 2.1776, 2.4433, 2.2065, + 2.3386, 2.4510, 2.4059, 2.2630, 2.3705, 2.3392, 2.3462, 2.4111, 2.3758, + 2.4813, 2.3263, 2.3184, 2.4310, 2.4148, 2.3368, 2.3599, 2.3383, 2.2286, + 2.3814, 2.3993, 2.3483, 2.3905, 2.3331, 2.3333, 2.2417, 2.4162, 2.2706, + 2.3414, 2.3442, 2.2891, 2.6559, 2.2701, 2.2298, 2.2341, 2.2531, 2.4576, + 2.3354, 2.2710, 2.1691, 2.3822, 2.3545, 2.3533, 2.2163, 2.4231, 2.4164, + 2.2920, 2.3275, 2.4484, 2.2593, 2.3305, 2.3374, 2.3359, 2.3922, 2.3363, + 2.3097, 2.3412, 2.3888], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 8.6604e-02, -2.9286e-01, 1.9763e-01, -1.2163e-01, -3.8894e-01, + -2.1842e-01, -1.2438e-01, 1.4834e-01, 6.4301e-02, -7.7811e-02, + -1.3064e-01, -8.3709e-02, 1.7816e-01, -2.4458e-01, 5.9149e-02, + -2.5113e-02, 8.0249e-02, -7.7466e-02, -3.5921e-01, -3.3318e-01, + -2.8440e-01, 5.6322e-02, 2.3694e-01, -3.0856e-01, 1.0845e-01, + 1.2182e-01, -2.4052e-01, 1.5255e-01, 3.2016e-02, -7.5441e-02, + 4.5930e-02, 7.0053e-02, 1.4111e-01, -4.6860e-01, 8.0392e-02, + 1.4536e-01, -9.4315e-02, 3.2234e-02, 2.7123e-01, -5.3386e-01, + -1.3102e-01, 8.6262e-02, 1.8063e-01, -4.6477e-02, -3.9122e-02, + -2.0202e-01, -2.6970e-01, -7.5590e-02, 1.5426e-01, 1.8237e-01, + -1.1917e-01, -3.6866e-01, -1.2210e-01, -1.5103e-02, -1.8397e-01, + 5.8603e-01, 1.4954e-01, -4.9178e-03, -4.2942e-02, 1.1036e-01, + 3.8296e-04, 3.2313e-01, 1.6136e-01, -1.1096e-02, 2.0610e-01, + 3.6640e-02, -2.3877e-02, -5.1362e-02, -4.5582e-01, -8.1244e-01, + -8.7015e-03, -2.3145e-01, 1.4538e-01, -4.8757e-01, -9.1498e-02, + -2.2466e-01, 2.7441e-01, -2.5719e-01, -1.5121e-01, 4.0123e-01, + 1.6970e-01, -1.8025e-01, 2.7369e-03, -5.2702e-02, -2.5943e-01, + 1.9232e-01, 2.6753e-04, 1.2177e-01, 2.5853e-01, -1.5520e-01, + 1.1621e-01, -2.7038e-02, -1.6476e-01, 1.5174e-01, -5.6310e-02, + -6.6330e-02, -2.3552e-01, -1.8034e-01, -7.3568e-02, 1.8432e-01, + -3.8841e-01, -2.1282e-01, -9.6584e-02, -2.2054e-01, 2.3119e-01, + 7.7053e-02, -5.6909e-02, -1.3414e-01, 8.0427e-02, 2.1012e-01, + 2.4924e-01, 1.1686e-01, -1.3254e-01, 5.9697e-02, -1.7765e-01, + -1.6437e-01, 3.3616e-01, 8.5083e-03, -2.5418e-02, 2.6027e-01, + -1.6043e-01, 8.2424e-02, -1.3150e-01, -5.3904e-02, 1.0872e-01, + 1.5643e-01, 1.3161e-01, 5.1541e-01, -1.5884e-01, -1.8408e-02, + 1.2766e-01, -8.4682e-02, 3.3091e-01, 1.6210e-01, 2.0954e-01, + 1.3627e-01, -1.8098e-01, -2.8791e-01, 1.5007e-01, -4.2789e-01, + 1.8015e-01, 1.0072e-01, 2.2316e-01, 1.5218e-01, 1.0389e-01, + -1.3575e-01, 1.2173e-01, -3.1166e-02, -6.5930e-02, -2.7193e-01, + -7.4922e-02, 1.2258e-01, 2.0601e-01, -3.3073e-01, -2.0484e-01, + 6.6838e-01, 1.8386e-01, -4.5000e-01, 7.0386e-02, -1.9580e-01, + 1.2700e-01, -8.3763e-02, 4.0264e-03, -5.8593e-02, 2.2412e-01, + 1.7506e-04, 1.8127e-01, 7.3059e-02, 1.8272e-01, -7.2565e-02, + 6.6788e-02, -2.5547e-01, -2.3021e-01, 6.6056e-01, 7.9189e-03, + 6.3060e-02, 2.2812e-01, 5.6355e-02, -1.2058e-01, 5.2562e-02, + -4.2476e-01, 2.2737e-02, -2.9150e-02, 8.7327e-02, -1.3205e-01, + 5.9143e-01, 3.7196e-02, 1.3226e-01, 1.4421e-01, -2.7715e-02, + -4.6107e-01, 1.6758e-02, 1.8647e-01, 6.3556e-02, -3.9387e-02, + 4.3825e-02, 6.2616e-02, -1.9770e-01, -4.2564e-01, 3.2139e-01, + 1.8278e-02, 2.9093e-02, 1.3184e-01, -1.8062e-01, 3.7262e-02, + 5.3266e-02, 2.6615e-01, -8.8052e-02, -4.1996e-02, -3.9655e-01, + 1.2197e-01, -7.8418e-02, -5.1390e-01, -7.3780e-02, 1.8691e-01, + -7.2425e-02, -5.5631e-02, -1.4905e-01, -2.6188e-01, 3.4893e-02, + 3.2901e-01, -1.4541e-01, -4.0933e-01, -1.2446e-01, 4.5600e-02, + 6.2080e-02, -2.2978e-01, 1.6788e-02, -1.0107e-01, 1.8279e-01, + -7.4715e-02, 1.4252e-01, 1.1494e-01, 1.2269e-01, -2.4501e-01, + -2.5672e-01, 2.9405e-01, 9.4859e-02, -3.2469e-03, -1.3721e-01, + -1.6615e-01, 2.3003e-01, 2.1743e-01, -3.5526e-01, -1.8183e-01, + 5.2396e-03, -3.8641e-01, 2.7500e-02, -2.1637e-01, 2.3685e-01, + -1.1876e-01, 7.4968e-02, 3.1369e-01, 6.9360e-02, -1.0279e-01, + -2.0016e-01, 3.0319e-01, -4.0203e-01, -4.8866e-01, -3.3533e-01, + -4.9880e-02, -1.4934e-01, 1.5540e-01, 3.2154e-02, 7.3181e-02, + 6.0271e-02, 1.6326e-01, 6.9507e-03, 1.4214e-01, -6.3119e-02, + -2.9987e-01, 7.6350e-02, 3.7965e-02, 1.5102e-03, 1.6799e-01, + -2.3439e-01, -2.9438e-01, 2.4812e-01, -1.9865e-01, -8.4152e-02, + 5.5912e-03, -8.7288e-01, 1.2965e-01, 3.3379e-01, 2.3957e-01, + -1.5091e-02, -1.2882e-01, -8.1023e-02, -1.2267e-01, 1.1010e-01, + -2.1774e-01, 3.1341e-02, 1.2947e-01, 1.6552e-03, 5.0242e-03, + -4.7059e-01, -1.4026e-01, -6.4873e-02, 2.3260e-01, -2.9756e-01, + -3.5772e-01, -3.4365e-01, -3.4945e-01, -1.9062e-01, 4.4129e-01, + 8.3628e-02, -7.4316e-02, 1.8464e-01, -1.5011e-01, -1.9917e-01, + 7.2023e-02, -1.5736e-01, 1.1195e-01, -1.0936e-03, 2.1782e-02, + 1.7796e-01, 2.2404e-01, -7.0521e-02, -1.3729e-01, 9.3621e-02, + -4.7864e-01, -2.9099e-02, 9.3240e-02, -1.6473e-01, -1.7154e-01, + -1.6437e-01, -3.6914e-01, 2.9099e-01, 1.8210e-01, 9.8001e-02, + -1.9131e-02, -6.1023e-02, -2.6954e-02, 2.3269e-02, -1.0748e-01, + 2.3885e-02, -3.0717e-01, 7.0289e-03, -2.9415e-01, 7.8834e-02, + 2.2555e-01, -1.4750e-01, -4.4716e-01, 3.1579e-01, -2.2415e-01, + 2.3326e-01, -3.4718e-01, -8.0136e-02, -4.9056e-02, 1.8268e-01, + 1.2392e-01, -9.0308e-02, -1.1581e-01, 3.6533e-02, 1.3277e-01, + 1.6580e-01, -2.9610e-01, 1.5124e-02, 3.1983e-02, -9.6834e-02, + 4.0151e-02, -2.6227e-02, 6.1991e-02, -3.5487e-01, 1.0990e-01, + -3.1649e-02, -6.0119e-02, 1.9958e-01, 4.1921e-02, -4.4497e-02, + -1.2652e-01, 1.2113e-01, -3.8615e-02, -5.6424e-02, -1.0027e-01, + 2.4490e-01, 5.1361e-02, 3.0297e-01, -9.0036e-02, -1.8055e-01, + -1.3455e-01, -2.0498e-01, -2.2928e-01, 1.1047e-01, 6.0962e-02, + -4.5831e-01, -2.5677e-01, 3.0335e-01, 1.6979e-01, 5.4195e-03, + 1.8430e-01, -4.2833e-01, 1.5375e-01, 7.8641e-02, -2.6052e-01, + 4.1466e-03, -1.3320e-01, 5.0047e-02, -1.3446e-01, -5.9763e-02, + -2.1034e-01, 1.8951e-01, -1.7249e-01, -2.8103e-01, -5.9848e-02, + 8.7397e-02, 3.8154e-02, 1.6206e-02, -9.0562e-02, 3.1392e-01, + -2.5056e-02, -4.6812e-01, -1.3958e-01, 7.6124e-02, 1.3683e-01, + -1.4261e+00, 2.3469e-02, -4.5264e-02, 8.6404e-02, 1.7327e-01, + 9.3496e-02, -6.0595e-02, 8.4582e-02, 2.0683e-01, -2.6103e-02, + 1.3176e-01, -2.3246e-01, 6.2243e-02, -2.7171e-01, 3.5642e-02, + -4.2095e-01, -2.5752e-01, -1.3984e-01, -1.1365e-01, -7.5260e-02, + -8.9288e-02, -5.2441e-03, 1.1592e-01, -7.2560e-02, -8.0440e-02, + -1.6767e-01, 1.1599e-01, 5.8748e-02, -3.3971e-01, -2.0054e-01, + 5.3322e-02, 2.9757e-01, 3.9025e-01, -1.4760e-01, -1.7380e-01, + -5.3329e-02, -2.1588e-02, -1.7357e-01, -1.0301e-01, -1.2302e-01, + -1.8281e-01, -1.6062e-01, -2.5224e-01, 4.3526e-02, -2.0653e-01, + -1.2789e-01, -4.3447e-02, -1.1119e-01, 1.6140e-01, -1.3325e-01, + 1.7198e-01, -1.1098e-01, 9.2506e-02, -9.1265e-02, -4.8381e-02, + 1.0725e-01, 2.3545e-02, -1.3973e-01, 1.8025e-01, -2.7206e-01, + -2.6945e-01, -4.6276e-02, -5.3086e-01, -2.1206e-03, -5.7141e-03, + 2.9229e-02, 4.3049e-04, 3.9786e-01, 1.9325e-02, -1.4418e-01, + 3.4156e-02, -3.8111e-02, -6.2930e-03, -7.2592e-02, 1.1503e-01, + -1.9450e-01, -7.8893e-02, -4.4225e-02, -1.5106e-01, -1.5963e-01, + -1.8364e-01, 1.1715e-01, -9.9646e-02, -3.7290e-02, -3.4496e-02, + 1.2110e-01, -4.7361e-03, 7.1833e-02, -2.6530e-02, -1.9589e-01, + 2.0624e-01, -1.1726e-01, -6.6253e-02, 1.3797e-01, 9.6431e-02, + -3.9810e-01, 1.4199e-01, -1.7143e-01, -1.1868e-01, -2.3157e-01, + 2.4837e-01, 3.2089e-01, -2.0151e-01, -1.6186e-01, 5.8250e-01, + 2.3087e-02, -2.6994e-02, -7.5188e-02, 1.6035e-01, -9.8618e-02, + -6.2415e-02, -2.8756e-02, 1.6325e-01, -2.2189e-01, 1.2451e-01, + -3.8018e-02, 1.2878e-01, -6.3290e-02, -5.8006e-02, -1.2467e-01, + -3.6006e-01, 1.5278e-01, -1.8236e-01, 3.0076e-01, -2.6767e-01, + 2.2054e-02, 1.0192e-02, 4.0691e-02, 3.1209e-01, 3.8954e-01, + -3.6524e-02, -3.8531e-01, -4.2814e-01, -1.7692e-01, 1.1740e-01, + 7.4477e-02, 2.0804e-02, 2.5254e-01, -1.4224e-01, -1.9235e-01, + 3.5380e-01, 1.1057e-01, -1.0711e-01, -1.6409e-01, -1.2642e-01, + -1.1363e-01, -7.9586e-01, 8.1844e-02, 1.3153e-01, -4.1466e-02, + 1.7913e-01, -3.3507e-01, 3.3697e-03, 6.7601e-02, -2.2027e-01, + -6.6688e-02, -1.2807e-02, 1.7038e-01, -3.2307e-01, 2.0768e-01, + 2.2008e-01, -9.4049e-02, -1.8320e-01, -1.4369e-01, 2.4456e-01, + 2.0687e-01, -2.6573e-01, -1.9569e-01, 2.2473e-01, 1.6415e-01, + -3.9568e-02, 8.5480e-02, -3.2554e-02, -4.2158e-02, -9.3549e-02, + -3.7789e-02, -1.6005e-01, 1.0470e-01, -3.8674e-01, -9.9921e-02, + 2.2348e-01, 2.8183e-01, -1.1313e-01, 1.7603e-01, -6.7394e-02, + 1.0005e-01, 1.6291e-01, 7.7057e-03, -1.8000e-01, 1.7484e-01, + -7.1115e-02, -3.3143e-02, -6.5741e-01, -2.4860e-01, 2.0984e-01, + -2.1012e-01, -1.2822e-01, -2.5784e-01, -1.0285e-02, 1.4045e-01, + 3.4105e-01, 5.2854e-01, -5.3262e-02, -2.7384e-01, 1.2843e-01, + -2.7380e-02, 1.8670e-02, -1.9795e-01, 2.8344e-01, -5.1208e-02, + 4.4121e-01, 3.3801e-03, 3.1375e-01, 9.6573e-02, -1.9746e-01, + -2.7762e-01, 3.9339e-01, 7.2044e-02, -8.3836e-03, -3.9421e-01, + -8.8247e-02, 9.3472e-02, -1.6321e-01, -3.0705e-02, -1.7254e-01, + -3.9066e-02, 5.7862e-02, -3.4114e-02, 2.1111e-01, -4.5305e-01, + 1.8462e-01, -8.1985e-02, -1.9718e-01, -2.1379e-02, 7.6429e-02, + -4.3138e-02, -2.3068e-01, -1.6621e-02, -3.2513e-01, 5.3912e-02, + -1.1320e-01, 3.4280e-01, 2.6733e-02, -2.1735e-01, -2.3797e-01, + 1.5833e-01, -1.4166e-02, 3.5680e-01, 1.5322e-01, -8.9745e-02, + -1.9738e-01, 1.4938e-01, 9.4129e-02, 2.2862e-01, 8.3815e-02, + -2.7415e-01, 2.5166e-02, -3.1954e-01, 3.3088e-01, 3.3316e-01, + 7.9626e-02, 2.1199e-01, 3.0189e-01, 7.0640e-02, 1.4281e-01, + -2.7548e-01, 1.6440e-01, -2.1965e-01, -3.7695e-01, 1.1076e-01, + 3.7800e-02, 7.4885e-02, -3.4464e-02, 1.4313e-01, -6.0915e-02, + -2.3558e-01, -9.9578e-02, -2.5076e-01, -2.4921e-01, -2.6538e-01, + 4.9933e-02, -1.5339e-01, -1.0247e-01, -1.9087e-01, 1.2254e-01, + 3.0902e-01, -1.7322e-01, -9.5716e-02, -1.0661e-01, -1.5046e-02, + 3.5670e-01, -2.1956e-01, 1.7981e-01, 2.0513e-02, -2.8031e-01, + -1.6479e-01, 1.0474e-01, -3.1772e-01, -1.5772e-01, 3.5148e-01, + 3.1563e-01, -1.5615e-02, 2.0267e-01, -4.0184e-01, 1.1223e-01, + 1.9680e-01, -7.8862e-02, -1.6144e-02, 1.5175e-01, -3.1454e-02, + -5.4835e-02, -7.4258e-02, -9.8505e-03, -9.8693e-02, -2.3574e-02, + -1.9845e-01, 1.3589e-02, -5.6328e-02, 1.8877e-01, -1.1442e-01, + 1.5472e-01, -9.2352e-02, -2.1861e-03, -5.0178e-01, -1.4711e-01, + 2.4483e-02, 1.7381e-01, 1.1555e-01, -1.1969e-01, 4.7179e-01, + -4.1360e-01, 5.9100e-02, 1.4711e-01, 3.4214e-01, 4.1288e-02, + 2.5513e-01, -1.7110e-01, -1.9608e-01, -4.2552e-02, -2.5904e-01, + -1.2025e-01, -2.1390e-01, -1.7674e-02, 3.4328e-01, 3.1743e-02, + 2.8413e-02, -1.5858e-01, 2.3370e-02, 2.3553e-01, 8.7197e-02, + 8.3802e-02, -1.1817e-02, 1.2176e-01], device='cuda:1', + requires_grad=True) +torch.Size([2304, 768]) +Parameter containing: +tensor([[ 2.9816e-02, 4.1199e-03, 8.1406e-03, ..., 1.6235e-02, + -2.6321e-02, 4.1542e-03], + [-9.5673e-03, -3.6621e-02, -5.4779e-03, ..., -1.4587e-02, + 9.2392e-03, -7.4482e-04], + [ 7.9727e-03, -1.2749e-02, 1.3336e-02, ..., -3.6591e-02, + -5.0735e-04, -1.6289e-03], + ..., + [ 2.0859e-02, -7.8630e-04, -1.1818e-02, ..., 7.7069e-05, + -3.9337e-02, -8.6823e-03], + [-9.7809e-03, -6.9389e-03, -4.0497e-02, ..., 1.0925e-02, + -5.8136e-03, 1.8625e-03], + [-2.3834e-02, -9.3536e-03, -4.1656e-03, ..., 1.7807e-02, + -1.5495e-02, -1.8188e-02]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([2304]) +Parameter containing: +tensor([ 0.1714, 0.2435, 0.2001, ..., 0.0595, 0.0106, -0.0736], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 768]) +Parameter containing: +tensor([[-0.0021, 0.0144, -0.0298, ..., -0.0013, -0.0110, -0.0228], + [-0.0030, -0.0152, 0.0158, ..., 0.0002, 0.0506, -0.0149], + [ 0.0067, -0.0117, -0.0151, ..., -0.0057, -0.0125, -0.0014], + ..., + [ 0.0228, 0.0216, 0.0058, ..., 0.0212, -0.0056, 0.0391], + [ 0.0132, 0.0172, -0.0291, ..., -0.0060, -0.0128, -0.0266], + [-0.0192, -0.0129, 0.0062, ..., -0.0020, 0.0054, -0.0218]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([-3.2935e-01, 3.8574e-01, -1.5894e-01, -7.9712e-02, -1.2671e-01, + -1.6199e-01, 1.0590e-01, -2.9614e-01, 2.3022e-01, 3.6469e-02, + -5.2643e-02, 3.4033e-01, 1.4880e-01, -8.3801e-02, 4.8340e-01, + 1.7334e-01, -1.7029e-01, 8.2764e-02, -1.4929e-01, -2.7979e-01, + 1.2170e-01, 1.6174e-01, 2.1887e-01, -5.0903e-02, 2.0105e-01, + 1.3806e-01, -1.4636e-01, -3.9154e-02, -3.9307e-01, 1.2183e-01, + 1.6663e-01, -1.3965e-01, -9.2239e-03, -1.0297e-01, -5.9967e-02, + -4.0710e-02, -1.7126e-01, -1.7651e-01, 2.1472e-01, 1.6138e-01, + -1.6394e-01, 4.7821e-02, 2.2400e-01, -2.2632e-01, -3.0811e-01, + -1.8933e-01, -5.2704e-02, -1.0468e-01, -1.3489e-01, -1.2469e-01, + -1.3416e-01, 1.6504e-01, 3.1934e-01, 3.4814e-01, -1.9519e-01, + 6.3171e-02, 1.2305e-01, 7.3242e-02, -1.4343e-01, -9.5154e-02, + 2.2620e-01, 1.8762e-01, -2.3962e-01, -2.3669e-01, -2.3727e-02, + 2.4072e-01, -2.9541e-01, -1.2622e-01, -1.2805e-01, -8.8806e-02, + 3.1689e-01, 7.8674e-02, -5.3711e-02, -2.3657e-01, -9.0271e-02, + -2.0654e-01, -7.9346e-02, 3.0127e-01, 3.1299e-01, -8.1024e-03, + 7.9468e-02, 2.3962e-01, -5.1514e-02, -2.0771e-03, -4.1168e-02, + -4.1235e-01, 3.2129e-01, -4.1333e-01, -1.4758e-01, -5.6732e-02, + -2.8223e-01, -1.7761e-01, -1.2903e-01, 4.8755e-01, 2.5684e-01, + -1.8066e-02, -6.8970e-02, 8.1299e-02, 1.1688e-01, 1.9360e-01, + -3.4962e-03, -2.7197e-01, -1.5454e-01, 2.5955e-02, -1.4087e-01, + -6.9824e-02, -1.1560e-01, 2.2192e-01, 2.2375e-01, -2.6562e-01, + 1.5686e-01, 1.7334e-01, -4.9927e-02, 1.7310e-01, -5.7373e-03, + 2.4084e-01, 1.3684e-01, 2.2937e-01, -5.5618e-03, -2.5342e-01, + 1.3458e-02, 1.4160e-01, 2.2644e-01, 1.7688e-01, 2.6855e-02, + -7.4097e-02, 1.1395e-01, 6.5613e-03, -1.8433e-02, 4.7803e-01, + 2.7612e-01, -1.6211e-01, 1.3245e-01, 2.4368e-02, 1.6711e-01, + -6.8909e-02, 2.5781e-01, -3.1079e-01, 4.4128e-02, 2.2339e-01, + -1.6638e-01, 8.4900e-02, 1.5526e-02, 8.4290e-02, -3.7256e-01, + 3.8208e-01, -6.8848e-02, -2.6880e-01, -1.7639e-01, 2.2156e-01, + -2.9980e-01, -6.5430e-02, 2.6904e-01, -1.6040e-01, -4.2749e-01, + 1.1218e-01, -3.0786e-01, 8.3313e-02, -3.7134e-01, -3.5669e-01, + -2.5684e-01, -2.1765e-01, 2.4658e-01, -2.2327e-01, 3.3081e-02, + 1.3037e-01, 8.6731e-02, -3.2007e-01, -7.5500e-02, 1.8738e-02, + 2.0117e-01, -2.1680e-01, -8.2779e-03, 6.8665e-02, -1.8115e-01, + 2.6733e-01, -2.4634e-01, -2.5977e-01, 4.7852e-01, 1.2781e-01, + -6.1523e-02, -2.2485e-01, -3.7256e-01, 1.5030e-02, 5.9937e-02, + 1.1444e-01, -1.5002e-01, 3.1226e-01, 2.8613e-01, 2.8467e-01, + -9.7427e-03, 7.5317e-02, -2.1643e-01, -3.8391e-02, 3.1201e-01, + 1.2268e-01, 1.2854e-01, 1.3464e-01, -1.2109e-01, 2.0679e-01, + -2.4979e-02, -2.1936e-01, -2.6535e-02, 4.0802e-02, -2.4365e-01, + -4.5605e-01, -2.5177e-02, -1.5369e-01, -4.3481e-01, 9.3689e-03, + 7.9773e-02, -2.8027e-01, -7.6965e-02, 2.0410e-01, 2.3254e-01, + 2.7420e-02, 2.3975e-01, -6.6101e-02, 3.1128e-01, 5.5371e-01, + 1.3611e-01, 1.3748e-02, -1.9287e-02, 2.3083e-01, 1.9031e-01, + 1.8201e-01, -2.6672e-02, -4.7394e-02, 1.4185e-01, 2.6318e-01, + -3.7036e-01, -1.1749e-01, 3.3057e-01, 4.7821e-02, 1.2482e-01, + -7.0374e-02, 9.2957e-02, -7.6599e-02, -2.2131e-01, 4.1309e-01, + 3.6768e-01, -2.0154e-01, 2.7802e-02, 2.9404e-02, -1.5945e-02, + -3.2861e-01, 1.3159e-01, 5.8411e-02, -1.3281e-01, 2.9712e-01, + 1.4697e-01, 1.9373e-01, 5.9692e-02, 1.2610e-01, -1.6589e-01, + 3.5229e-01, 7.0496e-02, -3.3539e-02, -1.2396e-01, -1.1200e-01, + -2.5620e-02, -6.9824e-02, -9.5642e-02, 3.2251e-01, -2.1790e-01, + -4.8682e-01, 9.0942e-02, 3.2013e-02, 1.1603e-01, 1.4124e-01, + 1.4026e-01, -1.9885e-01, -2.9688e-01, -8.8440e-02, 1.8298e-01, + -1.5906e-01, -6.6345e-02, 6.5002e-02, -4.0576e-01, 1.2195e-01, + 9.2163e-02, -1.6528e-01, -1.2659e-01, 1.1407e-01, -3.2495e-01, + 1.0181e-01, -1.0797e-01, 2.7124e-01, -2.8027e-01, 2.2083e-01, + -1.7456e-01, 2.1265e-01, 1.8823e-01, -1.3452e-01, -2.2668e-01, + 1.1368e-02, -3.7207e-01, 3.0151e-01, -5.6763e-02, -1.4917e-01, + -3.9459e-02, 2.6147e-01, 8.5144e-03, -4.8523e-02, -1.7188e-01, + 2.4384e-02, -5.7800e-02, 2.4365e-01, -2.8223e-01, 2.0477e-02, + 6.5613e-02, -2.3010e-01, 3.4448e-01, 2.5659e-01, 2.1820e-02, + -3.4253e-01, -2.7881e-01, 3.2690e-01, -2.0728e-01, -5.1956e-03, + 6.8726e-02, -1.9824e-01, 1.7322e-01, -7.4036e-02, -9.8877e-01, + 8.1177e-02, -1.3452e-01, -2.7539e-01, 4.3793e-02, 1.9852e-02, + -2.7661e-01, -2.0767e-02, -8.2031e-02, 2.9564e-05, -2.8589e-01, + -2.9395e-01, -3.1201e-01, -4.1626e-02, 3.5938e-01, 1.5515e-01, + 5.5878e-02, -7.8583e-03, 3.3386e-02, 1.8826e-03, -8.0444e-02, + -4.8065e-02, -1.8005e-01, -7.5500e-02, -2.7313e-02, 8.1787e-02, + 3.3887e-01, 9.7046e-02, 5.3497e-02, -5.3772e-02, -1.6541e-01, + 1.7053e-01, -1.5942e-01, -6.8054e-02, -1.9263e-01, 4.4238e-01, + -3.9648e-01, -1.4990e-01, -6.9771e-03, 2.2156e-01, 1.9409e-02, + 3.0350e-02, -3.6377e-01, -6.2164e-02, -4.2261e-01, -1.4359e-02, + -2.1912e-01, 1.3940e-01, 1.7603e-01, 2.3608e-01, -6.6895e-02, + -1.6980e-01, 3.4088e-02, -3.2867e-02, 3.6206e-01, -2.0798e-02, + 3.1836e-01, -2.0276e-01, -8.2275e-02, -4.9146e-01, 1.3928e-01, + -1.0724e-01, -2.1484e-01, 1.6455e-01, 3.1030e-01, -1.0187e-01, + 1.3757e-01, 1.0126e-01, 1.2524e-01, 1.7786e-01, -5.7220e-02, + -7.0610e-03, -4.4946e-01, -2.2522e-01, 2.6562e-01, -2.2852e-01, + -3.4839e-01, -7.1228e-02, -3.4277e-01, -1.9897e-01, 1.2482e-01, + 2.4219e-01, -5.1605e-02, 3.6987e-02, -8.5999e-02, -1.0681e-03, + 3.8745e-01, 1.7059e-02, 3.6304e-01, 1.7737e-01, 2.0703e-01, + -2.0764e-01, -1.8945e-01, -2.6489e-01, 2.0251e-01, 1.1841e-01, + 8.4717e-02, -2.7771e-02, -1.2170e-01, 1.9226e-02, -8.2703e-02, + 2.8003e-01, 4.7168e-01, 3.5522e-01, -3.5010e-01, 3.2776e-02, + 1.3367e-01, -8.9783e-02, -3.3752e-02, -4.7437e-01, 2.8198e-02, + 1.3965e-01, -3.7231e-01, 2.1448e-01, -6.5613e-02, 9.9731e-02, + 2.6840e-02, -1.1261e-02, 2.1423e-02, -2.4033e-02, -2.6318e-01, + 2.3254e-02, 2.4673e-02, 2.7979e-01, 3.0713e-01, -9.6588e-03, + 1.9104e-01, -1.9507e-01, 8.6365e-02, 3.8605e-02, -2.9199e-01, + -1.1090e-01, -2.3035e-01, 2.9370e-01, 1.1894e-02, 6.7932e-02, + 3.3722e-02, -3.5742e-01, 9.9976e-02, -2.8149e-01, -1.1664e-01, + 3.0029e-01, -2.9199e-01, 2.9346e-01, -3.6621e-01, -1.0321e-01, + -2.2949e-01, 3.4595e-01, -3.0249e-01, -6.8848e-02, -1.6101e-01, + -1.5503e-01, 4.5685e-02, -1.0468e-01, 1.5942e-01, -9.0759e-02, + -2.9346e-01, -1.0223e-02, 3.6835e-02, -4.5624e-02, 2.7100e-01, + -2.0190e-01, 2.2937e-01, 1.7078e-01, 3.1714e-01, -3.6401e-01, + 6.8703e-03, -1.0941e-02, 2.5586e-01, -1.5234e-01, 2.1582e-01, + 1.7944e-02, -1.3342e-01, -6.4941e-02, 2.3779e-01, -3.0322e-01, + 1.0791e-01, 2.9297e-03, -1.9910e-01, -2.0947e-01, 1.7529e-01, + 1.1121e-01, -2.4078e-02, 2.5586e-01, -9.3323e-02, 2.1960e-01, + -7.2449e-02, 6.2988e-02, -1.5356e-01, -2.4695e-01, 2.5659e-01, + 2.0288e-01, -1.0254e-01, -1.9873e-01, -4.1656e-02, 3.4698e-02, + 3.2495e-01, -1.0834e-01, -1.6418e-01, 8.2092e-02, -1.3513e-01, + -4.2236e-01, 1.7896e-01, 5.2521e-02, -5.5359e-02, 1.9800e-01, + 1.9336e-01, -3.6475e-01, 2.2620e-01, 2.6562e-01, 1.2292e-01, + -2.7390e-02, 3.2300e-01, -1.2280e-01, -1.2079e-01, 1.0083e-01, + 3.7549e-01, 1.1206e-01, 1.8875e-02, 1.7773e-01, 2.5244e-01, + 1.4648e-01, -4.9530e-02, -8.4991e-03, -1.9788e-01, -1.0077e-01, + 3.9917e-02, -2.8540e-01, -8.9340e-03, -3.8794e-01, 1.6736e-01, + 1.8445e-01, 2.2412e-01, -9.8267e-02, -3.8013e-01, -2.0129e-01, + 3.4180e-01, -1.2115e-01, -1.5613e-01, 7.4463e-02, -2.6270e-01, + -2.4048e-01, -5.8472e-02, -3.3960e-01, 1.8555e-01, -1.7114e-01, + -1.3318e-01, 2.9468e-01, -1.4648e-01, -1.2016e-02, 2.8540e-01, + 8.3313e-02, 9.5398e-02, 2.0251e-01, -9.3384e-02, 1.6907e-01, + 2.4487e-01, -1.0583e-01, -7.8186e-02, 5.3680e-02, -5.6183e-02, + -1.7627e-01, 1.6052e-01, -3.3478e-02, 1.2128e-01, 1.4038e-01, + 1.0828e-01, -3.8428e-01, 1.2634e-01, -1.5540e-01, -3.3447e-01, + 7.6965e-02, -1.8896e-01, -1.0107e-01, 2.1973e-01, 3.4277e-01, + 3.8574e-01, 1.4819e-01, -1.4624e-01, 9.7885e-03, 2.5024e-01, + 1.3757e-01, -2.7417e-01, -2.1741e-01, -9.9976e-02, 3.0835e-01, + -1.8018e-01, 4.4824e-01, 1.0400e-01, 4.7729e-01, 1.2622e-01, + 1.8738e-01, 2.1692e-01, 2.7417e-01, 1.1517e-01, 2.5952e-01, + -4.3945e-01, 8.9661e-02, 4.3335e-02, 2.8052e-01, 1.6235e-02, + 2.0288e-01, 5.7068e-02, 2.2083e-01, -2.4673e-02, 1.2561e-01, + -2.5684e-01, 2.1033e-01, -6.6528e-02, -7.8674e-02, -1.1284e-02, + -1.2122e-01, 5.8685e-02, -1.9739e-01, 1.5186e-01, 3.4241e-02, + 4.6802e-01, -1.2769e-01, 9.4238e-02, 2.8638e-01, -2.6831e-01, + 9.5642e-02, -1.6760e-01, -2.6074e-01, -2.9150e-01, -1.5710e-01, + 9.3201e-02, -4.3506e-01, 1.7371e-01, -1.8066e-01, 2.7026e-01, + 2.9785e-01, 1.4880e-01, 1.1169e-01, -2.6172e-01, 8.8257e-02, + 1.3013e-01, 2.1619e-01, 9.3750e-02, -2.0715e-01, -2.7710e-01, + -5.2551e-02, 1.4539e-01, 8.9600e-02, 3.7866e-01, 4.1275e-03, + 4.8193e-01, -1.8604e-01, -2.1716e-01, 6.0959e-03, -1.0056e-02, + -5.4199e-02, 1.5967e-01, 2.4597e-01, 2.0044e-01, -7.6294e-02, + 7.9803e-03, 2.3914e-01, -1.8079e-01, -1.5625e-01, 8.1604e-02, + 4.2456e-01, -5.4474e-02, -1.6064e-01, 2.0496e-01, -2.1008e-01, + 1.9641e-01, -1.2103e-01, 8.6823e-03, -1.7273e-01, -5.8319e-02, + 4.7485e-02, 5.0812e-02, 5.5313e-03, -1.8201e-01, 1.2939e-01, + 1.9104e-02, -1.1429e-02, -1.0291e-01, 4.6021e-02, 1.1914e-01, + 1.4368e-01, -4.7900e-01, -1.0107e-01, 9.9426e-02, -1.8326e-02, + 1.8005e-01, -2.1045e-01, -1.8237e-01, -3.7915e-01, 8.7585e-02, + -3.8483e-02, 1.7944e-01, 3.6938e-01, 1.6003e-01, -6.0577e-03, + -1.0689e-02, -2.7319e-01, 4.5972e-01, 1.4473e-02, -1.2939e-01, + 1.8030e-01, 4.9500e-02, 3.5553e-02, -1.5106e-03, 6.2805e-02, + -3.1299e-01, -1.9531e-01, -3.5132e-01, -2.3438e-01, 1.8042e-01, + -2.4628e-02, -4.4189e-02, -1.2659e-01, -2.0279e-02, 1.0699e-01, + -3.8574e-02, 1.4636e-01, -1.8506e-01, -5.8746e-04, -3.5498e-01, + 1.2494e-01, 1.4441e-01, -7.9041e-02, 1.2256e-01, -4.6631e-02, + 1.5662e-01, 3.2642e-01, -3.0811e-01, 1.6040e-01, -1.2634e-01, + 8.5449e-02, -9.7534e-02, -2.6001e-01, 5.2539e-01, 1.1804e-01, + -1.3721e-01, -8.4290e-02, -4.2285e-01, -2.7618e-03, 2.3938e-01, + -1.0718e-01, -1.5405e-01, 2.5854e-01], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([1.8845, 1.8352, 1.8765, 1.8810, 1.5563, 1.8842, 1.8917, 1.8105, 1.9868, + 1.8980, 1.8124, 1.8699, 1.9132, 1.9105, 1.8045, 1.8734, 1.9492, 1.9584, + 1.8736, 0.5699, 1.9376, 1.8571, 1.8018, 1.7807, 1.9045, 1.8593, 1.8650, + 1.9069, 1.7912, 1.9275, 1.9020, 1.8200, 1.8223, 1.9474, 1.8166, 1.9292, + 1.8478, 1.8643, 1.7707, 1.7729, 1.8149, 1.9277, 1.8711, 1.8703, 1.8980, + 1.8339, 1.8191, 1.9654, 1.9062, 1.8221, 1.9507, 1.9744, 1.7754, 1.7398, + 1.8771, 1.9682, 1.9305, 1.9569, 2.0626, 1.9420, 2.0490, 1.8973, 1.9315, + 1.8269, 1.8913, 1.8603, 1.8191, 1.8255, 1.8268, 1.6817, 1.9641, 2.0235, + 1.8896, 1.8203, 1.9921, 1.9167, 1.9540, 1.9359, 1.7891, 1.9293, 1.9515, + 1.8066, 1.9256, 1.8339, 1.8488, 1.8092, 1.9274, 1.9570, 1.8335, 2.0225, + 1.9396, 1.8363, 1.9361, 1.8342, 1.8680, 1.8073, 1.8952, 1.9872, 1.9524, + 1.8325, 1.9858, 1.9102, 1.8231, 1.9491, 1.9409, 1.9810, 1.8556, 1.7757, + 1.9584, 1.9158, 1.9068, 1.8165, 2.0067, 1.8175, 1.8685, 1.9612, 1.8651, + 1.9308, 1.9786, 1.8803, 2.0131, 1.9723, 1.8474, 1.9064, 1.8661, 1.7817, + 2.0292, 1.9366, 2.0245, 1.9169, 1.8463, 1.9134, 1.9453, 1.9029, 1.7884, + 1.8547, 1.7968, 1.9072, 1.8114, 1.9171, 1.9438, 1.9655, 1.9935, 1.9752, + 1.8150, 1.8036, 1.8647, 1.8392, 1.9946, 1.9759, 1.7417, 1.8785, 1.8786, + 1.9880, 1.8447, 1.5194, 1.9745, 1.8525, 2.0701, 1.8832, 1.7878, 1.9596, + 1.9374, 1.9127, 1.9849, 1.8117, 1.8179, 1.8551, 1.8570, 2.0063, 1.8393, + 1.7900, 1.7607, 1.7850, 2.0079, 1.8642, 1.8940, 1.8689, 1.7796, 2.0517, + 1.8809, 1.8384, 1.8476, 1.8974, 1.9596, 1.6308, 1.9006, 2.1654, 1.8026, + 1.9379, 1.9820, 1.9537, 1.9238, 1.8867, 1.8491, 1.8553, 1.9149, 1.9309, + 2.1425, 1.8556, 1.9288, 1.8089, 1.9468, 2.0053, 1.7469, 1.9317, 1.9154, + 1.9378, 1.9160, 1.9163, 1.8746, 1.7993, 1.8608, 1.6098, 1.7738, 1.9288, + 1.8738, 1.8375, 1.8819, 1.7251, 1.8135, 1.8060, 1.6460, 1.9648, 1.8793, + 1.8735, 1.9054, 2.0499, 1.8881, 1.8455, 1.8787, 1.8306, 1.8660, 1.9408, + 1.7706, 1.9874, 1.8663, 1.8535, 1.8764, 1.8998, 1.8804, 1.7887, 1.9140, + 1.9037, 1.8234, 1.9198, 1.8980, 1.9463, 1.9508, 1.9099, 1.9282, 1.9256, + 1.9555, 1.8825, 1.8637, 1.8823, 1.8507, 1.9972, 1.8996, 1.8754, 1.9579, + 1.9130, 1.8970, 1.9163, 1.9452, 1.6176, 1.8198, 1.8957, 1.9368, 1.9477, + 1.9516, 2.0003, 1.8590, 1.9027, 1.9444, 1.9328, 1.7937, 2.0126, 1.9234, + 1.9333, 1.8502, 1.8628, 1.8921, 1.7600, 1.8306, 1.8923, 1.8978, 1.8728, + 1.8345, 1.8942, 1.8993, 1.9678, 1.8302, 1.8068, 1.9830, 1.9665, 1.7677, + 2.0295, 1.9052, 1.9362, 1.8471, 1.8804, 1.8914, 1.8475, 1.9386, 1.9127, + 1.9178, 1.8527, 1.8397, 1.8191, 1.9175, 2.0222, 1.8673, 1.7971, 1.8434, + 1.8621, 1.8646, 1.9333, 1.7439, 1.8221, 1.8288, 1.7576, 1.8384, 1.8605, + 0.9349, 1.8890, 1.9015, 1.8971, 2.0346, 1.8416, 1.7772, 1.8861, 1.9598, + 1.9484, 1.8769, 1.7706, 1.9063, 1.8180, 1.9036, 1.9621, 1.9720, 1.9243, + 1.8806, 1.8688, 1.7491, 1.8797, 1.7815, 1.8845, 1.8533, 1.8813, 1.6127, + 1.8691, 1.8961, 1.9153, 1.9476, 1.8424, 1.9297, 1.9749, 1.9124, 1.7928, + 1.8377, 1.9906, 1.9413, 1.8094, 1.8876, 1.9582, 1.9084, 1.9157, 1.9248, + 1.9435, 1.8492, 1.8388, 1.8780, 1.9118, 1.9468, 1.9740, 1.8628, 1.9167, + 1.8829, 1.9969, 1.8789, 1.8648, 1.9281, 1.8597, 1.9086, 1.9463, 1.9790, + 1.9217, 1.9149, 2.0050, 1.9205, 1.8149, 1.8173, 1.9832, 1.8378, 1.8729, + 1.8689, 1.8932, 1.9051, 1.9418, 1.8442, 1.8880, 1.9247, 1.8423, 1.8932, + 1.8876, 1.8785, 1.8502, 1.8830, 1.9121, 1.7974, 1.8728, 1.8306, 1.8850, + 1.8189, 3.1739, 1.9905, 1.8789, 1.8099, 1.9587, 1.8831, 1.9429, 1.8643, + 1.8100, 1.9681, 1.8934, 1.6765, 1.8947, 1.9454, 1.9292, 1.9464, 1.8583, + 1.9097, 1.8778, 1.8680, 1.9591, 1.8427, 1.7749, 1.8435, 1.9601, 1.8104, + 1.8350, 1.9762, 1.5642, 1.8662, 1.9177, 1.8729, 1.9588, 1.9295, 1.8630, + 1.9340, 1.8574, 1.8658, 1.9321, 1.8614, 1.7848, 1.8497, 1.8741, 1.9315, + 1.8774, 1.8153, 1.9115, 1.8713, 1.8385, 1.8768, 1.8541, 1.9018, 1.6993, + 1.7122, 1.7800, 1.8768, 1.7910, 1.8274, 1.9631, 1.8354, 1.8277, 1.9133, + 1.7502, 1.9774, 1.8644, 1.8474, 1.8558, 1.9439, 1.6937, 1.8008, 1.8918, + 1.9959, 1.8452, 1.8962, 1.8652, 1.9799, 1.8773, 1.9406, 1.8726, 1.9284, + 1.7958, 1.9859, 1.7727, 1.9554, 1.8868, 1.8192, 1.7793, 1.8819, 1.9566, + 1.9689, 1.9528, 1.8410, 1.8819, 1.9371, 1.9973, 1.9218, 1.9970, 1.8444, + 1.8517, 1.8372, 1.8086, 1.9310, 1.7364, 1.8146, 1.9686, 1.7777, 1.9504, + 1.7437, 1.9044, 1.9021, 1.9502, 1.9839, 1.9025, 1.8885, 1.8824, 1.9079, + 1.9152, 1.8963, 1.9761, 1.8352, 1.7745, 1.8770, 1.9029, 1.9332, 1.9402, + 2.0578, 1.9987, 1.9575, 1.9153, 1.9579, 1.9238, 1.9215, 1.8162, 1.8468, + 1.9803, 1.8959, 1.8979, 1.7640, 1.7755, 1.8934, 1.9122, 1.9088, 1.8951, + 1.9510, 1.8332, 1.9277, 1.6539, 1.9063, 1.8719, 1.8038, 1.9036, 1.9306, + 1.8461, 1.9749, 1.7861, 1.8499, 1.9288, 1.8200, 1.9073, 1.8482, 1.8608, + 1.8441, 1.9250, 1.9120, 1.7419, 1.6542, 1.8009, 1.8765, 1.8275, 1.9703, + 1.9623, 1.8815, 1.8559, 1.9297, 1.9958, 1.8692, 1.7484, 1.8487, 1.9696, + 1.8354, 1.8752, 1.9288, 1.8886, 1.8154, 1.8855, 1.9286, 1.8765, 1.8816, + 1.9660, 1.8762, 1.9568, 1.8975, 2.0225, 1.8980, 1.9379, 1.8901, 1.8865, + 1.8581, 1.9267, 1.9512, 1.9268, 1.8755, 1.8539, 1.8854, 1.7541, 1.8981, + 1.8321, 1.9068, 1.7888, 1.9020, 1.9170, 1.8782, 1.9882, 1.9291, 1.8385, + 1.8635, 1.8034, 1.8408, 1.8393, 1.9157, 1.9480, 1.8666, 1.9089, 1.8793, + 1.8050, 1.8915, 1.9401, 1.8111, 1.9036, 1.6810, 1.8467, 1.8421, 1.9678, + 1.7958, 1.8310, 1.9185, 1.7928, 1.8878, 1.9683, 1.8328, 1.8989, 1.8129, + 2.0088, 1.8850, 1.8757, 1.6818, 1.8862, 1.8237, 1.8779, 1.8614, 1.8838, + 1.7376, 1.9299, 1.8992, 1.8632, 1.8358, 1.9367, 1.8568, 1.7632, 1.9217, + 1.9264, 1.8954, 1.8412, 1.9310, 1.8819, 1.8354, 0.6095, 1.8538, 1.8894, + 1.8347, 1.8615, 1.8613, 1.8770, 1.8895, 1.8258, 1.8161, 1.9044, 1.8602, + 1.9129, 1.8627, 1.9785, 1.9359, 2.0186, 1.9919, 1.9591, 1.8777, 1.8415, + 2.0802, 1.9656, 1.9193, 1.9592, 1.7867, 1.9489, 1.8725, 1.8389, 1.8473, + 1.8577, 1.7747, 1.9154, 1.7346, 1.8922, 1.9370, 1.8342, 1.9007, 1.7837, + 1.9888, 1.8941, 1.8775, 1.9283, 1.8973, 2.0281, 1.4591, 2.0369, 1.9378, + 1.8734, 1.9433, 1.8871, 1.7934, 1.9058, 1.8899, 1.8908, 1.9713, 1.8724, + 1.9331, 1.9974, 1.8825, 1.9038, 1.8145, 1.9421, 1.9098, 1.9045, 1.9809, + 1.9200, 1.8320, 1.7140, 1.9192, 1.8688, 1.8621, 1.9538, 1.8416, 1.8669, + 1.8944, 1.7779, 1.8887, 1.8708, 1.8072, 1.9348, 1.8595, 1.9091, 1.9632, + 1.9195, 1.8811, 1.8306], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 1.4977e-01, 9.0828e-02, -1.1229e-01, -2.4072e-02, 2.1253e-01, + 2.2673e-01, 1.5718e-01, 2.1828e-01, -2.4588e-01, -8.8377e-02, + 1.2490e-01, -1.3510e-01, -2.0088e-01, 5.7927e-02, -1.0586e-01, + -1.1825e-01, -9.5867e-03, -1.3082e-01, 4.6557e-02, 2.5393e+00, + -5.5107e-02, -2.8849e-03, -3.2968e-02, 8.6449e-02, 3.8190e-02, + -4.2845e-02, 8.5313e-03, -4.0419e-02, -4.0110e-03, -5.3827e-02, + 6.2886e-02, 9.8723e-02, 3.9529e-03, 1.7838e-01, 2.0938e-01, + -1.2265e-01, -1.3081e-01, -1.0067e-01, -1.1288e-01, 1.2991e-01, + 1.8509e-02, 2.8068e-03, 3.8258e-03, 1.9762e-01, 8.1392e-02, + -3.3367e-02, 9.7839e-02, -1.3463e-01, 9.3890e-02, 1.0690e-01, + 9.7328e-02, -9.1940e-02, -1.7569e-01, 5.7027e-02, -5.5082e-02, + 9.2227e-02, -6.8959e-02, -8.2383e-02, -1.3635e-02, 4.3161e-02, + 1.1511e-01, -1.9712e-01, 1.1997e-01, 1.2039e-01, 2.0542e-01, + 6.4997e-02, 2.1161e-01, -4.0855e-02, -1.3579e-01, 2.9124e-01, + -4.1219e-02, 1.6387e-02, 4.7529e-01, 1.2968e-01, -9.7462e-02, + 1.3828e-01, 1.6654e-01, -1.1903e-01, -9.1884e-02, 1.2110e-01, + 2.8219e-02, -8.1905e-02, -2.8228e-02, 1.2402e-01, -6.1241e-02, + 1.2894e-01, 7.7995e-02, 2.6379e-01, -2.3807e-02, -6.2343e-02, + -2.0724e-02, 5.8649e-02, 1.0976e-01, -1.4784e-01, -1.6364e-01, + 5.2668e-02, 2.2110e-01, -1.7867e-01, -2.4759e-01, 1.6276e-02, + 9.4501e-02, -1.3611e-01, 6.1677e-02, -1.0329e-01, 7.9952e-02, + 1.0241e-01, 1.3805e-01, -8.0445e-02, -1.0865e-01, -2.0342e-02, + 5.4483e-03, -4.4460e-02, -9.8944e-02, 2.5351e-02, -9.3630e-03, + -4.6340e-02, 1.8653e-02, -5.4521e-02, -9.9965e-02, 2.9935e-01, + -7.1766e-03, -3.5605e-01, -2.3958e-01, -2.7390e-02, -1.5948e-01, + -1.0897e-01, -1.7559e-01, 1.9700e-01, 1.0314e-02, -1.7945e-01, + -9.4148e-02, 7.2500e-03, 3.1125e-02, 8.9480e-02, -7.2448e-02, + 1.7824e-01, -2.1028e-01, -8.8175e-02, -1.0598e-02, -7.6072e-02, + 8.6819e-02, 3.8492e-02, -3.9397e-02, -7.3740e-02, 1.3232e-01, + -1.7187e-01, 2.5512e-02, 9.1546e-02, 9.4585e-03, 7.7797e-02, + 3.9275e-01, -1.0438e-01, -1.2284e-01, 1.5048e-02, 8.2665e-03, + -5.1999e-01, 2.1568e-02, -1.7402e-01, 8.3193e-02, 1.3827e-01, + 1.8734e-01, -1.4931e-02, -9.5600e-02, 8.9184e-02, -3.2616e-02, + 4.2414e-02, -6.3760e-02, 1.8367e-01, 2.8722e-04, -1.0264e-02, + -1.8133e-02, 1.1746e-01, 4.0782e-02, -2.9797e-02, 8.2706e-02, + -2.9593e-01, 3.7761e-02, 1.2163e-01, -2.4276e-01, 1.0814e-01, + -1.4889e-01, 1.9977e-03, 2.4847e-01, 9.8385e-02, -3.8665e-02, + -8.7119e-02, 5.5839e-04, -6.7407e-01, -1.9018e-01, -1.3712e-01, + -1.6403e-01, -1.2081e-01, 1.1545e-01, 9.3563e-02, -9.6760e-02, + 5.7779e-02, 3.0807e-02, -2.1459e-01, 2.2013e-01, -1.3995e-02, + -3.3612e-01, 1.5033e-01, -5.4025e-02, 3.2845e-02, 9.2645e-02, + 3.2002e-01, -1.4954e-02, 1.5258e-01, 2.1483e-01, -2.7657e-02, + 8.3226e-02, 3.7656e-01, -5.6053e-03, -1.6680e-01, -3.9404e-02, + -1.0372e-02, -5.0771e-02, 4.1767e-02, -1.9044e-01, -4.0341e-01, + 4.0919e-02, 7.6289e-02, 1.1134e-01, -1.3315e-01, -4.8227e-02, + -7.6237e-02, -3.9292e-02, 9.4033e-03, 2.0934e-01, -1.2315e-01, + 2.1096e-01, 1.0530e-01, -1.2812e-01, -3.9880e-02, -2.1281e-01, + -3.3356e-02, -1.0072e-03, 8.7769e-02, 1.3860e-01, -6.7106e-02, + -1.4575e-02, 1.5290e-01, -6.3386e-02, 7.9846e-02, -1.3863e-02, + 7.7562e-02, -8.3009e-02, -1.0501e-01, -2.6750e-02, -1.7253e-01, + -2.5846e-02, -1.1804e-01, 1.7110e-01, -1.2576e-01, 1.7992e-01, + -1.9679e-01, -8.5437e-03, -6.9406e-02, 4.9921e-02, 5.0245e-02, + -1.1880e-01, -6.9830e-02, 5.9304e-02, -2.2032e-02, -5.5579e-02, + 2.8273e-01, 4.2607e-02, -6.2353e-03, -6.9794e-02, -4.0076e-02, + 7.6442e-02, 1.2681e-01, 2.6285e-01, -8.0095e-02, -5.2822e-02, + 4.3852e-02, -4.5039e-02, 1.2212e-01, 2.0381e-01, -1.1561e-01, + 1.6132e-01, 3.3906e-01, -1.2707e-02, 5.1528e-02, 1.2264e-01, + 9.6960e-02, -5.2069e-02, -1.3161e-01, 3.4161e-01, -1.0293e-01, + 3.3521e-03, -2.1975e-03, 1.8850e-02, -1.2784e-03, 1.6387e-01, + -5.1121e-02, 3.9901e-01, -1.2812e-01, 6.6056e-02, 1.3807e-01, + 4.9560e-02, -1.0768e-01, -1.2635e-01, 1.1741e-01, 3.6649e-02, + 9.2106e-02, 2.2007e-02, -1.2226e-01, 1.7446e-01, -3.4823e-02, + 5.3663e-02, -2.9064e-02, -2.4956e-01, -2.3326e-01, 2.9518e-03, + 1.6585e-01, 1.2153e-01, -2.6665e-02, 3.1691e-01, 1.1982e-01, + -1.7774e-01, 3.4280e-01, -1.0939e-01, 4.9095e-02, -1.5014e+00, + -2.8279e-01, 8.2694e-02, 5.9233e-02, 1.2187e-02, -1.3442e-01, + 2.5667e-01, 4.1742e-02, 1.5975e-01, 4.0541e-03, 2.5555e-01, + 2.7525e-01, 1.9277e-02, 1.5007e-01, -3.5787e-02, -4.1940e-02, + 3.8335e-03, 1.0118e-01, -4.8436e-02, 3.0893e-02, 7.3047e-02, + 9.5244e-02, 1.8675e-01, 1.3491e-01, -3.9549e-02, 8.8511e-02, + -2.0086e-01, 8.2027e-02, -2.5592e-02, 8.8682e-02, 9.3104e-02, + -1.5674e-01, -1.0768e-01, 2.4316e-02, 3.6231e-02, -1.5898e-01, + 3.4396e-02, 8.7499e-02, 4.3070e-02, -2.6488e-01, 6.0697e-02, + -1.0659e-01, 1.1697e-01, 1.1641e-01, 2.2722e-01, 5.9255e-02, + 1.9844e-01, -1.0398e-01, -1.0219e-01, -1.0963e-01, -1.4832e-01, + 7.7414e-02, 1.1030e-01, 2.3565e-01, -6.7662e-02, 9.7777e-03, + -2.2667e-01, 1.0643e-01, 1.2504e-02, 7.0543e-02, 2.1687e-02, + -4.3200e-02, 6.9545e-02, -1.5286e-01, -5.3265e-02, -4.3717e-02, + 2.8115e-02, -1.6122e-01, -2.7460e-02, -6.3853e-02, 2.8858e-02, + 4.2642e-02, 1.0826e-01, 1.2918e-01, -5.5144e-02, 2.8905e-02, + 2.5789e-01, -6.0265e-03, 1.1585e-01, 4.1049e-02, -1.6564e-01, + -3.6466e-01, -4.4526e-02, -3.9705e-02, 5.0000e-02, 4.2413e-02, + -9.8391e-02, 2.2150e-02, 4.3222e-02, -7.1868e-02, -9.0238e-02, + -3.9393e-01, 3.9913e-02, 6.0577e-02, -5.4894e-02, -2.7723e-02, + -5.5954e-02, 7.2414e-02, 1.0857e-01, -1.4078e-01, -1.9501e-02, + 1.1566e-01, -2.9919e-01, -2.8507e-01, 1.3112e-01, -1.1352e-02, + -1.2703e-01, -1.3442e-01, 7.1983e-03, 2.7848e-01, 1.1755e-01, + 7.1231e-03, 1.5916e-01, -3.3708e-03, 1.5079e-01, -1.1724e-01, + 2.7498e-02, 1.7569e-02, -1.5906e-02, -1.2364e-01, 2.7426e-01, + -1.4464e-01, -2.9425e-02, -1.8606e-01, 1.4634e-01, -1.0930e-01, + -6.5916e-02, 8.1522e-02, -1.8188e-01, 1.8782e-02, 6.5026e-02, + -3.8653e-02, 1.2372e-01, -1.9791e-02, -2.8119e-02, -1.2243e-01, + -9.4997e-02, 1.8606e-01, -2.6361e-01, 2.0877e-01, -4.2930e-02, + -1.2266e-01, 1.4367e-01, -9.8226e-02, 3.1051e-01, 1.0347e-01, + 1.1779e-01, -2.0663e-01, 1.4741e-02, 1.1494e-01, 2.7045e-02, + 5.6440e-02, 2.2375e-01, 1.3750e-02, -8.0551e-02, 4.4833e-02, + 1.8983e-01, 1.6651e-02, 4.2797e-02, 1.4209e-01, -1.9838e-01, + -4.0615e-03, -1.5028e-01, 1.5990e-01, 6.5197e-02, 7.3199e-02, + 2.7811e-02, 1.1103e-01, -5.0831e-02, 1.4231e-02, -2.2152e-01, + -1.8666e-01, -9.2706e-02, -2.2502e-02, -1.2607e-01, 2.3435e-01, + -3.5548e-02, -1.7466e-01, 3.4294e-02, -6.1595e-02, -6.2275e-02, + 4.5427e-02, 8.0757e-02, 1.0295e-01, 7.7756e-02, -2.9827e-02, + -1.5265e-01, -4.7962e-03, -1.6855e-01, 1.9033e-01, -1.1006e-01, + -8.5642e-02, 1.7786e-01, 1.0449e-01, 8.5187e-02, -5.4987e-02, + -3.4442e-02, -4.7119e-02, 1.5076e-01, 1.0142e-01, -9.9440e-02, + -4.9660e-02, -2.6204e-02, -4.3997e-02, 1.3288e-02, -9.6366e-02, + 7.7371e-02, 2.0195e-01, -2.5764e-02, -9.3003e-03, -5.8061e-02, + -1.9938e-01, -5.7160e-02, 1.5829e-01, 1.8127e-01, -5.1235e-02, + 5.3454e-02, -5.7813e-02, -9.8958e-02, 3.4245e-02, -3.8890e-02, + 5.5782e-02, 7.2028e-02, 8.4247e-02, 1.6157e-01, 8.5942e-02, + -6.3876e-02, 1.6179e-01, -1.1903e-01, 1.9752e-01, -1.2409e-01, + -1.3576e-01, 9.2939e-02, -8.0832e-02, -3.7897e-02, 1.3954e-01, + -1.2824e-01, 1.3462e-01, 3.5907e-02, 7.7795e-02, 3.5418e-01, + 1.8305e-01, 4.7019e-02, 1.2169e-01, 1.4338e-01, 2.2147e-02, + 1.6112e-01, -2.3264e-02, 8.2084e-04, -4.4160e-02, -2.8564e-03, + 8.1639e-02, 3.2530e-02, -8.9730e-02, 1.4196e-01, -1.2849e-01, + -1.8822e-01, -9.6932e-02, 1.5225e-01, 2.9004e-02, 5.1665e-02, + 1.4070e-01, -3.1082e-02, -1.7318e-01, 1.1008e-01, -1.8146e-02, + -9.7296e-02, 2.5158e-01, 1.0361e-01, 1.6765e-02, 9.7833e-02, + -9.4305e-02, 1.9451e-01, 7.9379e-02, -1.3511e-01, -2.7555e-02, + -8.5424e-02, -1.8621e-01, -9.9312e-02, -7.1172e-02, -1.2654e-01, + -1.1217e-01, 1.0024e-01, 2.0272e-01, 7.3024e-02, -6.2072e-02, + 2.3424e-01, 1.1232e-01, -1.6426e-01, -5.0104e-02, -9.9868e-02, + -2.0319e-01, 2.6993e-02, 5.0555e-02, -7.6812e-02, -2.2759e-01, + 2.5402e-01, -2.1871e-03, 4.5064e-02, -7.3341e-03, 2.1875e-02, + -1.0079e-01, -3.1013e-02, -9.9062e-02, 6.1146e-02, -1.5484e-01, + 2.4925e-01, -1.1802e-02, 7.6865e-02, 1.1096e-01, 2.3033e-01, + 3.1681e-02, 3.8874e-03, 4.8754e-02, 2.8777e-02, -6.3957e-02, + -1.4132e-01, -2.0266e-01, -3.7878e-02, -6.4012e-02, 1.7023e-01, + 7.9161e-02, 1.3030e-02, 1.9864e-01, 8.3066e-02, 2.5653e-01, + 9.1549e-02, 1.6667e-01, 5.2830e-03, 1.3459e-01, -6.7270e-02, + 8.6687e-02, -2.3856e-01, -6.9920e-02, 7.9934e-02, 1.3820e-01, + -1.3395e-01, -1.5970e-02, -8.8392e-02, -4.4374e-02, 1.4311e-01, + -5.3860e-02, -1.6929e-01, -1.7831e-01, -2.5584e-01, 6.5801e-02, + -3.4672e-01, 1.2548e-01, 8.3701e-02, 8.4880e-02, -1.2696e-02, + 7.1410e-02, -2.6850e-02, -8.6469e-02, -7.0036e-02, 1.2408e-01, + -1.3168e-01, -2.3096e+00, 9.2856e-02, 1.1851e-01, 7.4759e-02, + -1.8195e-01, 8.1052e-02, -2.3694e-02, 6.8942e-02, 5.0159e-02, + -3.2627e-01, 4.6953e-02, 1.3953e-02, -3.2010e-02, -1.5938e-01, + 1.1737e-01, 5.7404e-02, -5.9496e-02, 1.1439e-02, -5.4151e-02, + 1.9076e-01, 1.3202e-01, 2.3647e-02, -3.2032e-02, -2.0221e-01, + 3.9195e-02, 1.9440e-01, 3.9982e-02, -1.6485e-01, -7.3657e-02, + -6.7927e-02, 1.7999e-01, 1.9496e-01, 1.4840e-01, 2.8846e-02, + 1.1035e-01, 5.6286e-02, -5.4704e-02, -1.4696e-01, -4.4278e-02, + 5.0493e-02, -2.2273e-02, -3.9672e-02, -3.2121e-01, 2.0969e-01, + -3.9823e-02, -3.1859e-01, 1.5124e-01, 1.4009e-01, 5.0370e-02, + 8.3453e-02, 9.9049e-02, 2.4573e-01, 1.9968e-01, 5.8656e-02, + -5.6640e-02, 4.0581e-02, 2.6943e-02, -1.3497e-04, -1.2463e-01, + 5.2881e-02, -1.2241e-01, 8.8162e-03, 1.3377e-01, 5.5711e-03, + -3.2902e-02, -1.4677e-01, -4.2712e-02, 1.2307e-01, 8.0042e-02, + -3.1782e-02, -1.3426e-01, 2.1633e-02, 1.0981e-02, 2.2117e-01, + -1.3080e-02, 8.9470e-02, 1.8250e-01, -2.3044e-01, -5.8739e-02, + -3.1466e-02, 6.1521e-02, 2.5686e-01, -6.6933e-02, -2.3293e-01, + -2.6690e-02, 2.1225e-01, 7.8747e-02], device='cuda:1', + requires_grad=True) +torch.Size([3072, 768]) +Parameter containing: +tensor([[ 4.3831e-03, -1.7395e-02, 3.9558e-03, ..., -1.6966e-03, + 1.0384e-02, 3.0640e-02], + [-2.2182e-03, -2.1912e-02, 5.6572e-03, ..., 2.1820e-02, + 7.8583e-03, 1.6800e-02], + [ 5.5084e-03, -1.9119e-02, -4.4098e-03, ..., 2.0035e-02, + -1.3260e-02, 1.9684e-03], + ..., + [ 4.6631e-02, -5.4359e-05, -3.4523e-03, ..., 4.8828e-03, + 2.2156e-02, -1.1864e-02], + [ 8.8787e-04, -8.1482e-03, 1.7776e-02, ..., -1.6052e-02, + 3.0502e-02, 9.1400e-03], + [ 1.3641e-02, 7.2708e-03, 3.8815e-03, ..., -3.4828e-03, + 2.0161e-03, -3.3054e-03]], device='cuda:1', dtype=torch.float16, + requires_grad=True) +torch.Size([3072]) +Parameter containing: +tensor([-0.1271, -0.1493, -0.2289, ..., -0.5405, -0.3594, -0.0424], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768, 3072]) +Parameter containing: +tensor([[-0.0115, -0.0157, -0.0200, ..., -0.0466, -0.0021, -0.0165], + [ 0.0092, -0.0032, -0.0108, ..., 0.0121, -0.0015, 0.0188], + [ 0.0030, -0.0024, 0.0109, ..., -0.0146, -0.0017, -0.0030], + ..., + [-0.0107, -0.0003, 0.0144, ..., -0.0054, 0.0235, 0.0117], + [ 0.0090, 0.0042, 0.0030, ..., 0.0037, 0.0125, -0.0172], + [-0.0052, -0.0043, 0.0161, ..., 0.0261, 0.0152, -0.0026]], + device='cuda:1', dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([ 2.5586e-01, -2.6196e-01, 2.3486e-01, 1.3916e-01, -1.3599e-03, + 4.4922e-02, 1.1615e-01, 1.1955e-02, -2.2461e-01, 1.1938e-01, + 1.4453e-01, -7.4097e-02, -1.5967e-01, 3.5095e-02, -1.9214e-01, + 6.9092e-02, -6.2347e-02, 7.0038e-03, -5.2734e-02, 2.5732e-01, + 3.0411e-02, -3.2158e-03, -4.5239e-01, 1.0803e-01, -2.2742e-01, + -1.1322e-01, 3.1433e-02, -1.7426e-02, 2.8857e-01, -9.8145e-02, + -2.0264e-01, 8.4595e-02, 7.5928e-02, -5.0781e-02, 1.3879e-01, + 1.1859e-01, 2.1851e-01, -7.2327e-02, -1.4209e-01, 6.4819e-02, + 1.1395e-01, -6.5674e-02, -6.4087e-02, 4.7607e-02, 1.8921e-02, + 1.1249e-01, 1.6370e-01, 1.0114e-01, 1.5955e-01, -1.7944e-01, + 1.3147e-01, 7.1228e-02, -1.2158e-01, -1.3306e-01, 1.9141e-01, + -1.3168e-02, -1.5515e-01, -1.9641e-01, 1.9983e-01, -1.1304e-01, + -1.2634e-01, 8.5205e-02, 2.0050e-02, 1.3379e-01, -1.9434e-01, + -5.9113e-02, 8.1543e-02, 1.2436e-02, 2.6291e-02, 2.5049e-01, + -1.7688e-01, 1.2524e-01, -1.7102e-01, 1.9751e-01, 1.8896e-01, + 2.8589e-01, -1.9067e-01, -3.7628e-02, 8.3466e-03, -1.2036e-01, + -3.3447e-02, 2.2400e-02, -1.3107e-02, -1.7136e-02, 5.1880e-02, + 9.6497e-02, -1.9214e-01, -3.2379e-02, -8.5571e-02, 1.6809e-01, + 1.9482e-01, 1.0614e-01, 1.6821e-01, -1.9812e-01, 2.9282e-02, + 5.1956e-03, -1.4514e-01, 8.9050e-02, 3.2623e-02, -3.6938e-01, + -1.4392e-01, 6.9031e-02, 2.9517e-01, 2.1252e-01, 2.3956e-02, + -2.4052e-03, -7.3364e-02, -1.6922e-02, -6.2332e-03, 9.4360e-02, + -2.0386e-01, -5.0323e-02, 1.1627e-01, -1.7688e-01, 3.9276e-02, + -2.6520e-02, -8.8379e-02, -3.0960e-02, -5.7495e-02, 6.6910e-03, + 8.5602e-03, 1.1673e-02, -3.2867e-02, -9.2346e-02, -3.0127e-01, + -6.9542e-03, -1.4868e-01, -1.5161e-01, 8.5754e-02, -1.5942e-01, + 3.1948e-03, 7.7942e-02, -2.4695e-01, -1.4941e-01, -2.9175e-01, + -1.8604e-01, -1.6553e-01, 2.4765e-02, 2.4857e-02, 8.7830e-02, + -2.0044e-01, -9.2590e-02, -7.0251e-02, -1.0168e-01, 1.5295e-01, + -9.3262e-02, -2.5317e-01, 2.3743e-01, 1.9897e-01, 8.7219e-02, + -5.1239e-02, 1.0211e-01, -5.9723e-02, 2.6611e-01, 3.5938e-01, + -5.9540e-02, 3.0713e-01, 3.8090e-03, 1.7749e-01, 3.1396e-01, + 1.4099e-01, 1.6199e-01, -3.5191e-03, 6.2561e-02, 2.8534e-02, + -2.0007e-01, -1.8066e-01, 3.4180e-02, 7.5562e-02, 1.5833e-01, + -1.7786e-01, -1.8213e-01, -2.8198e-02, -2.9492e-01, -1.4542e-02, + -6.7810e-02, 9.3079e-02, 1.5076e-01, -9.5398e-02, -2.7588e-01, + 9.1675e-02, -4.7302e-02, -8.7952e-02, -4.3091e-02, -1.5918e-01, + -2.1704e-01, 1.3412e-02, 2.1313e-01, -1.9043e-02, -1.1938e-01, + 1.6870e-01, 6.8542e-02, -1.5430e-01, -1.8274e-01, -2.4744e-01, + -2.0203e-01, -1.5991e-01, 2.0447e-02, -7.7820e-02, -1.5002e-01, + -1.7471e-02, -1.0651e-01, 1.1975e-01, 4.4403e-02, 2.1057e-01, + 1.5039e-01, -3.3081e-02, 8.3694e-03, 2.1594e-01, -1.2128e-01, + -2.2058e-01, 1.1041e-01, 2.2449e-01, -2.3022e-01, -1.3965e-01, + 1.4478e-01, 1.0638e-01, 2.3303e-01, 1.4087e-01, -9.6863e-02, + -2.8540e-01, 1.1133e-01, 1.7224e-01, 1.2749e-02, 6.4941e-02, + -2.2446e-02, 2.0190e-01, 3.1830e-02, 7.7896e-03, -6.5369e-02, + 1.4832e-01, 4.1351e-02, -2.2571e-01, -3.7018e-02, 8.5083e-02, + 2.1240e-01, -3.1958e-01, -2.5726e-02, 2.3096e-01, -2.4133e-01, + -2.5464e-01, 1.1420e-01, -1.7786e-01, 1.5222e-01, 1.6281e-02, + 2.0520e-01, 2.0850e-01, -9.8328e-02, 8.3084e-03, -3.8452e-02, + 9.8450e-02, -3.6896e-02, 6.7932e-02, -8.7128e-03, 1.2646e-01, + -4.3060e-02, -2.1423e-01, 2.6587e-01, 1.9202e-01, 1.6455e-01, + 3.0811e-01, 2.5073e-01, -4.7119e-02, -3.8281e-01, -4.4952e-02, + 1.3403e-01, -2.4744e-01, 3.6438e-02, -3.9062e-02, 1.0809e-01, + -2.9770e-02, -4.2297e-02, 1.7548e-02, -8.6689e-04, -8.0811e-02, + 1.4893e-01, -9.7473e-02, -2.5806e-01, 2.1326e-01, 8.2703e-02, + 9.4666e-02, 3.0151e-01, 2.7054e-02, -1.4612e-01, 2.3657e-01, + -1.0828e-01, 1.5649e-01, -1.6284e-01, 2.7710e-01, -9.6497e-02, + -2.0428e-03, -1.8091e-01, -1.6309e-01, -8.1665e-02, -1.5027e-01, + -9.0271e-02, 1.4758e-01, 1.6687e-01, -6.4758e-02, -1.1267e-01, + 1.3989e-01, -1.8921e-01, 7.0374e-02, 1.7310e-01, -2.2803e-01, + 8.6441e-03, 8.0017e-02, 4.1687e-02, 2.9639e-01, -1.2396e-01, + -1.0071e-01, 8.0505e-02, -1.1536e-01, -2.2742e-01, -7.3303e-02, + 9.5901e-03, -1.9006e-01, -6.5369e-02, 5.6793e-02, -1.8372e-02, + 1.7432e-01, 1.3599e-01, -2.5528e-02, 1.9495e-01, -1.2969e+00, + 4.7394e-02, 1.1646e-01, 6.7383e-02, -1.6223e-01, 7.1533e-02, + 1.3025e-01, 1.8112e-02, 3.0737e-01, 1.0773e-01, -9.6375e-02, + 2.2632e-01, 2.2571e-01, -5.6610e-03, -1.4490e-01, -8.7463e-02, + -1.4673e-01, 1.4355e-01, 1.1865e-01, -4.6349e-03, 3.1104e-01, + -4.8340e-02, 9.0515e-02, -6.7444e-02, -3.1616e-02, -1.7200e-01, + 9.5596e-03, -3.3539e-02, 1.6943e-01, 2.1576e-02, -7.2205e-02, + -4.6051e-02, 3.4619e-01, 1.9669e-02, 1.6907e-01, -2.2095e-02, + 9.5764e-02, 5.7526e-02, -1.6284e-01, 1.3220e-01, -4.5380e-02, + 3.0322e-01, 1.2451e-01, -7.6172e-02, 5.8624e-02, 2.1399e-01, + 2.5659e-01, 1.7542e-01, -1.8726e-01, -1.0052e-01, 1.5601e-01, + -8.0261e-03, -1.9250e-01, -2.0728e-01, -1.0773e-01, 8.2092e-02, + -2.3718e-01, 1.8848e-01, 1.1389e-01, -5.9967e-02, -1.1237e-01, + 2.9388e-02, 6.6284e-02, -1.4258e-01, 5.5008e-03, 1.8115e-01, + 4.0039e-02, 2.7930e-01, -2.1948e-01, 6.1493e-02, 9.9976e-02, + -1.1169e-01, 2.2644e-01, -1.0059e-01, 8.5999e-02, 7.2250e-03, + -1.9547e-02, -3.0298e-01, 1.4709e-01, 9.9854e-02, -7.2510e-02, + 4.1138e-02, 9.9854e-02, 1.3802e-02, 1.3916e-01, -3.0786e-01, + -9.0332e-03, 2.3364e-01, -1.1591e-01, -2.6050e-01, -2.8491e-01, + -9.1797e-02, 1.7749e-01, 1.1115e-01, -1.6943e-01, -2.5223e-02, + -3.4515e-02, 3.3789e-01, -1.4458e-02, -1.8970e-01, 2.4280e-01, + 4.2908e-02, -1.0559e-01, 6.2408e-03, 2.6221e-01, -5.0323e-02, + 5.5420e-02, -9.2697e-04, 4.0497e-02, 1.1017e-01, 1.3525e-01, + -2.0557e-01, 2.5000e-01, -2.8125e-01, 3.1641e-01, 4.8553e-02, + 2.9443e-01, -3.9154e-02, -5.8594e-02, 1.0431e-01, 3.3203e-01, + -4.3274e-02, -2.1545e-01, -1.3843e-01, -1.0760e-01, -3.8147e-02, + -2.5635e-01, 8.1665e-02, 1.6235e-01, -5.6885e-02, 9.8022e-02, + 5.6488e-02, 2.2266e-01, -1.8787e-01, 9.2041e-02, -1.0907e-01, + 3.0273e-01, 1.3049e-01, -1.2018e-01, -2.9663e-02, 6.1452e-05, + -1.5228e-02, 5.6366e-02, -4.9072e-02, 9.7168e-02, 1.5833e-01, + -9.8938e-02, -1.2256e-01, 4.6997e-02, -5.3986e-02, 7.8552e-02, + 1.4661e-01, 1.5198e-01, 2.3999e-01, 2.8152e-02, -1.6375e-03, + 1.6089e-01, -2.6978e-01, -2.3022e-01, -8.1055e-02, -1.2299e-01, + -3.2166e-02, 3.8239e-02, -1.3013e-01, -1.7151e-01, 5.3711e-02, + -1.6012e-03, 5.1651e-03, -8.3008e-02, -9.8572e-02, -1.0376e-01, + 2.5146e-01, -6.1066e-02, 2.0776e-01, -1.9629e-01, -8.8440e-02, + 8.7952e-02, 5.1239e-02, -8.2626e-03, 6.3667e-03, -4.3854e-02, + -1.8945e-01, 1.0223e-01, -1.6223e-01, -4.5624e-03, -9.3079e-02, + -3.2288e-02, -1.8127e-01, 7.4524e-02, 8.6060e-02, -2.6953e-01, + -8.4412e-02, -3.5431e-02, 9.4727e-02, 2.6779e-02, -2.8882e-01, + -3.2178e-01, -5.1575e-03, 3.6285e-02, 6.9824e-02, 2.1204e-01, + 1.8349e-03, -1.6162e-01, -3.0200e-01, -2.6154e-02, -5.6076e-03, + -6.1768e-02, 2.8491e-01, -8.7524e-02, -2.2229e-01, 3.2074e-02, + 2.2205e-01, -1.4099e-01, 1.3696e-01, 3.6743e-02, 7.0923e-02, + -2.1533e-01, -1.0870e-01, 3.9330e-03, -4.7150e-02, -1.8530e-01, + -9.0454e-02, 2.4573e-01, 6.5247e-02, 3.3142e-02, 3.3630e-02, + -2.3819e-02, 1.1084e-01, 2.6794e-02, 1.5049e-03, -5.7892e-02, + -6.4514e-02, -2.3010e-01, 2.1863e-01, 4.8889e-02, 5.6641e-02, + -2.0850e-01, 8.2825e-02, -8.4839e-02, -1.0162e-01, 1.2891e-01, + 1.4734e-01, 1.2225e-01, 6.5125e-02, -3.5492e-02, 3.3057e-01, + 5.4749e-02, 4.1138e-02, -4.5868e-02, 2.4817e-01, -1.2585e-01, + -1.6858e-01, -2.5488e-01, -2.6709e-01, -2.1265e-01, -8.3313e-02, + -9.2163e-02, 1.5320e-01, -8.2214e-02, -2.7490e-01, 1.1139e-01, + 2.3560e-01, 4.5776e-02, 5.1361e-02, -1.8262e-01, -5.0879e-04, + 5.0720e-02, -2.9160e-02, -2.6831e-01, 1.2042e-01, 1.4197e-01, + -2.1655e-01, -6.8848e-02, -1.2213e-01, -4.3091e-02, -1.0223e-01, + -2.2241e-01, -2.1500e-02, -2.2858e-02, 1.1035e-01, -1.4368e-01, + -2.0081e-01, 1.4136e-01, 1.5112e-01, -2.9126e-01, -1.6284e-01, + 1.1853e-01, -3.2568e-01, 4.4678e-02, -2.7124e-01, -1.5076e-01, + -7.4890e-02, -1.1877e-01, -2.1851e-01, 9.8267e-02, 7.3975e-02, + 2.4365e-01, 1.7899e-02, -5.1956e-03, -1.8774e-01, 1.7651e-01, + -2.9297e-01, 1.8250e-01, -7.8247e-02, 2.2571e-01, 8.7158e-02, + 1.9214e-01, 3.2074e-02, -1.3171e-01, 1.0712e-01, -7.7209e-02, + 2.4097e-01, -2.2339e-01, 1.9531e-01, -1.7224e-01, 1.8042e-01, + -1.1169e-01, -7.6332e-03, 1.0455e-01, -1.8628e-01, 3.8647e-01, + -8.5754e-03, 4.4434e-02, 1.0852e-01, 2.3816e-01, -4.9957e-02, + -1.7053e-01, 1.6479e-01, -2.3425e-01, 1.9812e-01, -8.8989e-02, + -3.0548e-02, 1.5236e-02, -1.1304e-01, 1.5796e-01, -8.7891e-02, + -2.1103e-02, -3.0289e-03, -2.3779e-01, 1.9067e-01, 1.6187e-01, + 1.6870e-01, 5.0720e-02, 2.0947e-01, -1.7908e-01, -4.0955e-02, + -2.0605e-01, 3.5339e-02, 1.6992e-01, -1.0504e-01, -1.5942e-01, + -2.0142e-01, -1.5472e-02, -2.3804e-01, -1.8341e-02, -1.5869e-01, + 1.3782e-01, -1.9580e-01, 1.5857e-01, 4.2627e-01, -7.3303e-02, + -2.3132e-01, 2.1240e-02, 2.0294e-02, -1.1322e-01, 4.2267e-02, + -1.5991e-02, -7.6538e-02, -4.0100e-02, 8.8745e-02, 2.7173e-01, + -7.8186e-02, 7.9285e-02, 2.8214e-02, 1.0638e-01, -4.3396e-02, + 5.2460e-02, 1.3416e-01, 4.0497e-02, 2.5272e-04, -4.1443e-02, + -2.7319e-01, 2.6221e-01, -2.4612e-02, 5.6580e-02, 2.6953e-01, + 1.0156e-01, 7.1655e-02, 6.6452e-03, 4.6356e-02, 8.2947e-02, + 1.0986e-02, -1.8567e-01, -2.7026e-01, 9.7412e-02, 6.3416e-02, + -1.0809e-01, 2.5684e-01, 5.9418e-02, 5.4047e-02, 2.0657e-03, + -8.6914e-02, 6.0425e-02, -1.0840e-01, -1.6711e-01, -1.5234e-01, + 2.5879e-01, -5.5786e-02, -7.8430e-02, 5.5878e-02, -3.2812e-01, + -1.9934e-01, 9.6313e-02, -1.3062e-01, 1.6736e-01, 1.7151e-01, + 8.5815e-02, -1.1182e-01, -1.5137e-02, -4.1931e-02, -6.9336e-02, + -4.3823e-02, -4.1809e-02, 7.6904e-02, -3.6157e-01, -6.3660e-02, + -1.6382e-01, 2.4658e-02, 4.3774e-01, -4.3774e-01, -1.1328e-01, + 8.6121e-02, 4.8401e-02, -6.8542e-02, -1.1786e-01, -1.8677e-01, + 2.4097e-01, 1.3098e-01, 2.5928e-01, -1.8005e-01, -6.6650e-02, + 1.5649e-01, 9.4604e-02, -6.0059e-02], device='cuda:1', + dtype=torch.float16, requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([1.7453, 1.7916, 1.7866, 1.7060, 2.2256, 1.7691, 1.6459, 1.7028, 1.7940, + 1.7137, 1.7359, 1.6988, 1.7650, 1.6460, 1.9754, 1.7098, 1.6823, 1.6809, + 1.7675, 2.8573, 1.7179, 1.7905, 1.7851, 1.7294, 1.7422, 1.7077, 1.7375, + 1.6869, 1.8025, 1.6808, 1.6921, 1.7139, 1.7179, 1.7785, 1.7144, 1.6573, + 1.7561, 1.7219, 1.8840, 1.8551, 1.7527, 1.7285, 1.8142, 1.7218, 1.7085, + 1.7747, 1.7440, 1.6605, 1.7240, 1.6341, 1.6871, 1.7110, 1.8223, 1.7324, + 1.6996, 1.9939, 1.6452, 1.5798, 1.6484, 1.6495, 1.7104, 1.6899, 1.7505, + 1.7364, 1.7274, 1.8487, 1.7835, 1.7188, 1.6811, 2.6443, 1.7195, 1.6335, + 1.6849, 1.8288, 1.7249, 1.7434, 1.7538, 1.7232, 1.7073, 1.7086, 1.7375, + 1.6350, 1.6951, 1.7128, 1.6553, 1.6948, 1.7890, 1.7998, 1.6942, 1.6631, + 1.8334, 1.7767, 1.7468, 1.7147, 1.6983, 1.7031, 1.7222, 1.6426, 1.6320, + 1.8031, 1.6616, 1.8613, 1.7142, 1.7822, 1.6660, 1.8272, 1.7065, 1.6898, + 1.6310, 1.7521, 1.7844, 1.7785, 1.7546, 1.7858, 1.7514, 1.7175, 1.8956, + 1.6952, 1.7559, 1.7030, 1.6885, 1.8080, 1.6344, 1.7356, 1.6977, 1.8344, + 1.7739, 1.6807, 1.6260, 1.8368, 1.6922, 1.8865, 1.7865, 1.6245, 1.6890, + 1.7053, 1.8425, 1.7724, 1.7564, 1.7803, 1.6540, 1.7523, 1.6523, 1.8011, + 1.7817, 1.8210, 1.7611, 1.7148, 1.6591, 1.7066, 1.7953, 1.6767, 1.7715, + 1.7065, 2.0386, 2.6908, 1.7533, 1.7069, 1.7056, 1.8112, 1.8387, 1.7340, + 1.6729, 1.7448, 1.7651, 1.7562, 1.7603, 1.7402, 1.6834, 1.6905, 1.7669, + 1.7285, 1.7695, 1.6959, 1.7073, 1.7729, 1.6439, 1.7737, 1.9007, 1.8193, + 1.6352, 1.7767, 1.7973, 1.6721, 1.6326, 2.0689, 1.7521, 1.5624, 1.7597, + 1.7233, 1.7889, 1.6468, 1.7949, 1.6567, 1.7086, 1.6855, 1.7539, 1.7119, + 1.7105, 1.7665, 1.6940, 1.7199, 1.6587, 1.6535, 1.7443, 1.8192, 1.6830, + 1.6448, 1.8299, 1.7532, 1.7436, 1.7580, 1.7853, 2.2166, 1.7786, 1.6753, + 1.6548, 1.7259, 1.7014, 2.1519, 1.7872, 1.8132, 2.3511, 1.7451, 1.6446, + 1.7903, 1.7986, 1.7225, 1.6865, 1.8110, 1.7765, 1.7229, 1.8636, 1.6293, + 1.6722, 1.6752, 1.7345, 1.6557, 1.7530, 1.8020, 1.7724, 1.7530, 1.7401, + 1.7140, 1.6637, 1.7321, 1.7963, 1.7235, 1.5552, 1.7318, 1.6982, 1.6654, + 1.7070, 1.7836, 1.6677, 1.7144, 1.7207, 1.6214, 1.7139, 1.6765, 1.7318, + 1.6520, 1.7032, 1.6822, 1.7167, 1.9936, 1.6842, 1.7808, 1.6448, 1.6655, + 1.7495, 1.7671, 1.7984, 1.7307, 1.7251, 1.8597, 1.6690, 1.7206, 1.8050, + 1.6937, 1.7735, 2.4023, 1.7041, 1.6982, 1.8072, 1.7366, 1.7480, 1.6612, + 1.8285, 1.7446, 1.6593, 1.5729, 1.8789, 1.7670, 1.5866, 1.6760, 1.9861, + 1.7029, 1.7262, 1.6737, 1.7327, 1.7383, 1.6930, 1.7918, 1.7627, 1.6763, + 1.7033, 1.7053, 1.8581, 1.7151, 1.5995, 1.7384, 1.7110, 1.7173, 1.7167, + 1.7400, 1.7778, 1.7273, 1.8511, 1.7437, 1.7464, 1.8946, 1.7516, 1.7868, + 1.9961, 1.7375, 1.8223, 1.8808, 1.6267, 1.6657, 1.7340, 1.7113, 1.6643, + 1.6593, 1.7756, 2.0622, 2.1898, 1.7083, 1.6580, 1.7134, 1.6195, 1.7422, + 1.6664, 1.6919, 1.7474, 1.8087, 1.7293, 1.7537, 1.7542, 1.7362, 2.0181, + 1.6838, 1.7311, 1.7292, 1.6971, 1.7682, 1.7884, 1.7328, 1.5910, 1.7977, + 1.8513, 1.7065, 1.7044, 1.7043, 1.6129, 1.6470, 1.7784, 1.7901, 1.9546, + 1.7444, 1.7572, 1.7716, 1.7748, 1.7461, 1.6755, 1.7075, 1.7108, 1.7496, + 1.8443, 1.6807, 1.8030, 1.7385, 1.7137, 1.7996, 1.8267, 1.6882, 1.7066, + 1.7421, 1.7258, 1.7883, 1.6757, 1.7318, 1.7987, 1.7435, 1.7862, 1.7681, + 1.7145, 1.6727, 1.7336, 1.7229, 1.7688, 1.7182, 1.8338, 1.7751, 1.8746, + 1.7005, 1.6615, 1.6654, 1.7231, 1.6855, 1.8164, 1.7140, 1.7523, 1.7857, + 2.3900, 1.5233, 1.6147, 1.6700, 1.7539, 1.7769, 1.6780, 1.6786, 1.7570, + 1.7269, 1.6752, 1.6584, 2.0102, 1.7854, 1.8528, 1.7610, 1.7021, 1.7769, + 1.7718, 1.7939, 1.7760, 1.8099, 1.7298, 2.0391, 1.7124, 1.6612, 1.7290, + 1.7201, 1.7583, 3.1886, 1.8840, 1.7371, 1.6928, 1.7375, 1.6900, 1.7818, + 1.7990, 1.8781, 1.8105, 1.7186, 1.7681, 1.7200, 1.7865, 1.7380, 1.7729, + 1.6335, 1.6749, 1.8281, 1.7893, 1.8383, 1.7957, 1.7725, 1.7262, 1.8634, + 1.7606, 1.7397, 1.7381, 1.8562, 1.7843, 1.6655, 1.7895, 1.7482, 1.7366, + 1.7391, 1.8199, 1.7300, 1.8336, 1.6733, 1.6370, 2.5717, 1.7043, 1.7063, + 1.6659, 1.6898, 1.7545, 1.7829, 1.6406, 1.6760, 1.7399, 1.7606, 1.6639, + 1.6529, 1.7716, 1.7976, 1.7405, 1.7779, 1.7114, 1.8633, 1.7808, 1.6656, + 1.7106, 1.6379, 1.7043, 1.7034, 1.7934, 1.7144, 1.6960, 1.6369, 1.6841, + 1.6676, 1.7403, 1.8162, 1.7031, 1.7580, 1.7332, 1.6771, 1.8749, 1.6809, + 1.6692, 1.7695, 1.8276, 1.7821, 1.5962, 1.7035, 1.6830, 1.7837, 1.7629, + 1.8197, 1.7063, 1.8174, 1.7458, 1.7029, 1.7777, 1.7655, 1.8087, 1.7410, + 1.6792, 1.8107, 1.7881, 1.7322, 1.7766, 1.7144, 1.6638, 1.7587, 1.7629, + 1.6253, 1.7377, 1.8036, 1.7703, 1.7773, 1.7076, 1.6700, 1.7811, 1.7947, + 1.7252, 1.7153, 1.6269, 2.4692, 1.6680, 1.7619, 1.7354, 1.7241, 1.7732, + 1.6729, 1.6692, 1.7719, 1.7385, 1.8128, 1.7062, 1.6910, 1.7789, 1.8631, + 1.6821, 1.7483, 1.7831, 1.7509, 1.7962, 1.7171, 1.7621, 1.7450, 1.7958, + 1.7197, 1.6905, 1.7545, 1.7549, 1.6749, 1.7081, 1.8256, 1.8227, 1.7506, + 1.8081, 1.7703, 1.7216, 1.6962, 1.8352, 1.6964, 1.7632, 1.7305, 1.7199, + 1.6506, 1.7202, 1.7306, 1.6137, 1.9637, 1.6504, 1.7721, 1.7887, 1.7454, + 1.6628, 1.6622, 1.6674, 1.7064, 1.7213, 1.7320, 1.8118, 1.8140, 1.9451, + 1.6857, 1.7272, 1.8096, 1.7151, 1.8023, 1.7991, 1.7979, 1.7435, 1.6916, + 1.7446, 1.8259, 1.8163, 1.6671, 1.7059, 1.7805, 1.7954, 1.8122, 1.7129, + 1.7006, 1.8278, 1.6928, 1.7699, 1.8127, 2.6332, 1.6948, 1.8121, 1.7807, + 1.7341, 1.7502, 1.7695, 1.9141, 1.7048, 1.6762, 1.7265, 1.6776, 1.8072, + 1.6987, 1.7570, 1.7126, 1.8879, 1.7532, 1.8169, 1.6947, 1.7870, 1.5678, + 1.6320, 1.6937, 1.7704, 1.6546, 1.7238, 1.6330, 1.7746, 1.9694, 1.6477, + 1.7148, 1.7291, 1.9253, 1.7652, 1.6549, 1.8793, 2.7716, 1.7033, 1.8173, + 1.7531, 1.8650, 1.6701, 1.7444, 1.8200, 1.8616, 1.7401, 1.7574, 1.6387, + 1.7232, 1.7761, 1.7073, 1.7611, 1.6337, 1.6966, 1.6602, 1.7216, 1.7338, + 1.6352, 1.6476, 1.6426, 1.7410, 1.8819, 1.6756, 1.5814, 1.7793, 1.7433, + 1.7529, 1.7703, 1.6975, 1.7116, 1.7453, 1.7028, 1.8396, 1.7989, 1.7690, + 1.5984, 1.9373, 1.8247, 1.6775, 1.7250, 1.5793, 2.5916, 1.6575, 1.7041, + 1.6216, 1.7912, 1.7722, 1.8124, 1.7116, 1.6666, 1.6750, 1.6511, 1.7467, + 1.7391, 1.6443, 1.7227, 1.8167, 1.7149, 1.7041, 1.7369, 1.7651, 1.6900, + 1.7217, 1.7272, 1.6623, 1.7115, 1.6773, 1.7730, 1.7566, 1.6515, 1.7410, + 1.6800, 1.8447, 1.8276, 1.7401, 1.7146, 1.7237, 1.8311, 1.7415, 1.7554, + 1.7146, 1.6885, 1.7398], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([-1.5534e-02, -9.1763e-02, -1.4045e-01, 2.9126e-02, -2.9463e-01, + 2.3244e-01, 1.4650e-01, 1.3824e-01, -1.4006e-01, -1.1251e-01, + 1.2915e-01, -1.6772e-01, -1.1740e-01, -1.8915e-02, 2.1420e-01, + -1.1838e-01, 2.4480e-02, 1.0073e-01, 1.0139e-01, -2.4958e-01, + -4.7139e-02, -3.2491e-02, 7.5936e-02, 5.0327e-03, -5.9994e-02, + -1.1320e-01, 1.1245e-01, 1.3947e-01, -1.5842e-01, 1.0699e-01, + -1.0513e-01, 1.4646e-01, 1.6563e-01, 8.8609e-02, 1.4347e-01, + -7.4134e-02, -1.2482e-01, 3.6243e-02, 2.5012e-01, 2.1067e-02, + -1.0077e-01, -9.0722e-02, -3.1916e-02, 2.9762e-01, 9.3168e-02, + -5.2134e-02, 1.2359e-01, -1.1238e-01, 6.2995e-02, 1.8050e-01, + 5.4862e-02, -1.4248e-01, -2.7663e-01, 7.0286e-02, -4.1281e-02, + 1.8095e-01, 2.5451e-02, -9.3611e-02, 3.1525e-02, 3.0742e-02, + -9.0886e-02, 3.5809e-02, -5.0295e-02, -1.6220e-02, 1.4903e-01, + 1.6067e-01, 7.7424e-02, -1.5068e-01, -9.7631e-02, -3.0946e-01, + -1.0245e-01, -7.8358e-02, 3.2978e-01, -1.1044e-01, 6.9739e-02, + -1.4487e-01, 1.8632e-01, -4.9441e-02, -1.4343e-01, 3.6038e-01, + 4.3383e-02, -1.5065e-01, 2.7976e-02, -2.0230e-02, -2.2828e-01, + 1.5317e-01, -8.6870e-02, 5.8729e-02, 8.2278e-02, -1.1187e-01, + -2.0997e-01, 1.1161e-01, 5.3529e-03, -3.4734e-01, -3.0991e-01, + -7.8397e-02, 1.9746e-01, -3.0953e-02, -2.6813e-01, 2.1088e-01, + 1.2270e-01, -2.1737e-01, 2.1370e-01, -1.8573e-02, 2.0623e-01, + 3.2870e-02, 1.3364e-01, -1.4971e-01, -3.0270e-01, -6.7287e-02, + 1.4275e-01, 2.9592e-01, 1.2103e-01, -1.6004e-01, -2.3951e-02, + -3.6579e-02, 3.0966e-01, -6.5552e-02, -2.5138e-01, 2.6074e-01, + -6.9158e-02, -3.3349e-01, -1.6839e-01, -2.2436e-01, -4.7964e-02, + -1.3955e-01, -2.2653e-02, 3.1051e-01, -4.5824e-02, 1.2099e-01, + 1.0464e-02, -2.2245e-01, 2.8780e-01, 2.4416e-02, 9.3748e-02, + 2.1737e-01, -9.0517e-02, -6.9622e-02, -8.9925e-02, -1.7875e-01, + 1.5849e-01, 3.7965e-02, -1.1369e-01, 1.3518e-01, 1.7028e-02, + -2.7495e-01, 1.6793e-01, 2.1625e-01, 2.3221e-01, -1.6563e-02, + 3.1062e-01, -1.4128e-01, -1.3812e-01, -2.9676e-02, -1.1254e-01, + 7.3345e-02, 1.9008e-01, -3.1589e-01, 1.7483e-01, 2.9653e-02, + -4.0455e-02, -3.6911e-02, 8.0104e-02, 2.0828e-01, -4.0867e-02, + -2.4094e-02, -2.2152e-01, 2.1542e-01, -1.7157e-01, -1.2389e-02, + 6.6927e-02, 3.7046e-01, -3.9744e-02, 1.6790e-01, 2.5737e-01, + -6.8471e-02, 5.7411e-02, 6.1858e-02, -3.9160e-01, -4.6156e-02, + -2.1085e-01, 1.1249e-01, 1.3627e-01, 2.7793e-01, -2.0108e-01, + 2.2857e-01, -1.2739e-01, 1.2818e+00, -8.5203e-02, -2.1764e-01, + -2.8680e-01, -2.2593e-01, 8.1885e-02, -6.8978e-02, -1.5091e-01, + 4.0286e-02, 1.1618e-01, -2.5044e-01, 1.5447e-01, 9.2900e-02, + -9.9608e-02, 2.9712e-02, -2.2294e-02, 3.1927e-02, 9.5738e-02, + 1.9990e-01, 1.6504e-01, 2.4631e-01, 1.0133e-01, 6.9246e-03, + 1.8389e-01, 1.8244e-01, -2.1620e-01, 3.0810e-01, 3.2302e-03, + -1.5407e-01, -2.9648e-01, 4.3923e-03, -1.9695e-01, 3.5502e-02, + 9.7438e-02, 3.6828e-03, -4.1745e-01, -9.6438e-02, -2.8194e-01, + 1.0465e-01, -2.3009e-01, -1.8973e-01, 1.1445e-01, 1.1904e-01, + 2.4688e-01, 8.2958e-02, 4.5977e-03, -1.2731e-01, -3.0066e-01, + -2.6491e-01, 3.9542e-02, 5.0806e-02, 1.1334e-01, -6.6315e-03, + -1.2101e-01, 2.5357e-01, 1.4793e-02, -7.2905e-02, -5.3436e-02, + 1.2530e-01, -1.5131e-01, 2.3338e-02, -2.0551e-01, -2.9184e-02, + -1.2791e-02, -2.3347e-01, 1.7415e-01, -7.8002e-02, 1.1495e-01, + -2.7784e-01, 1.3663e-01, -1.6558e-01, -1.6523e-01, -1.5290e-01, + -1.4430e-01, 1.4719e-01, 6.6719e-02, 2.7414e-02, 4.6831e-02, + 3.8021e-01, -5.6495e-02, -1.5276e-01, -2.3739e-02, 3.4107e-02, + -7.4406e-02, 1.1249e-01, 2.2351e-01, -1.1416e-01, 4.6791e-02, + -3.3080e-02, 1.2830e-01, 2.8935e-01, 9.2980e-02, -1.5083e-01, + -4.5502e-02, -2.3274e-01, -9.3975e-02, 1.5168e-01, -8.0357e-02, + 2.5591e-01, -6.7106e-02, -1.8530e-01, 2.9677e-01, -1.3151e-01, + 1.0348e-01, -7.1660e-02, 1.1649e-01, -2.7519e-02, 1.0288e-01, + 2.3199e-02, 1.6356e-01, -2.6839e-01, 4.4333e-01, 9.9366e-02, + -3.5228e-01, -1.4036e-01, -1.9225e-01, -1.8235e-01, 4.8254e-02, + 1.1766e-01, 1.5968e-01, -2.8152e-01, 2.7740e-02, -1.4705e-01, + -8.5655e-02, -1.2457e-01, -1.5918e-01, -2.2650e-01, -2.0617e-03, + 2.2071e-01, 4.7423e-01, -1.2317e-01, 8.1605e-02, 2.6438e-01, + -1.7328e-01, -5.5589e-02, -7.9478e-02, 6.6586e-02, -3.9526e-01, + -1.6453e-01, -6.7773e-02, -6.8706e-02, 4.0666e-02, -1.0352e-01, + 2.0825e-01, -3.9659e-02, 7.2424e-03, 4.7092e-02, 2.2881e-01, + -1.1379e-01, -3.1302e-01, 8.0753e-02, -9.4348e-02, 7.6734e-02, + 1.4272e-01, -9.4793e-02, -1.0785e-01, -1.6521e-02, 2.9463e-02, + 2.0519e-01, 4.1036e-02, 5.8431e-02, -7.8597e-02, 2.9006e-01, + 2.5388e-01, 1.6062e-01, 2.9854e-03, 1.3798e-01, 1.5111e-01, + -3.3206e-01, -4.6566e-02, 1.1332e-01, 2.7857e-01, -3.2234e-01, + -1.5060e-01, -1.9339e-01, 6.2923e-02, -3.6645e-01, 5.0914e-02, + -1.0140e-03, 1.0521e-01, 2.5313e-01, 9.8427e-02, 5.3377e-02, + 3.0633e-02, -1.2009e-02, 4.8498e-02, -1.7911e-02, -1.2377e-01, + 2.1468e-02, 8.0258e-02, 2.3942e-01, -1.3388e-01, 6.4349e-02, + -2.2947e-01, 1.1419e-01, 1.6653e-01, 3.6875e-02, 2.3298e-02, + -9.5069e-04, 2.0242e-01, 8.2273e-02, 1.2790e-01, 3.8417e-02, + -1.3779e-01, -3.5614e-01, -2.9389e-03, -3.7396e-02, -2.2514e-01, + 1.1973e-01, 1.5550e-01, 4.0188e-02, -1.1199e-01, -8.5370e-02, + 2.5541e-01, 8.7027e-02, 1.0922e-01, -5.5832e-02, -2.6477e-02, + -2.6585e-01, -1.9053e-01, -1.2098e-01, 7.0815e-02, 2.6938e-01, + 2.0130e-02, 3.1811e-02, -3.5534e-02, 1.0926e-01, 3.0880e-01, + -1.5899e-01, 6.1207e-02, -5.4653e-03, -2.8716e-02, 3.2807e-02, + -2.0764e-01, 4.9832e-02, 1.6245e-01, 5.7098e-02, 1.8167e-01, + -1.6261e-02, 2.1805e-02, -3.1371e-01, -1.3691e-01, 1.1864e-01, + -1.1328e-01, -7.7644e-02, -3.5575e-02, 2.1682e-01, 1.3694e-01, + 8.8308e-03, 2.2246e-01, 4.5436e-02, 1.8990e-04, -1.4476e-01, + -7.4615e-02, 8.5595e-02, 5.4799e-02, -7.1474e-02, 7.8181e-03, + -3.7506e-02, -9.0086e-02, -2.5738e-02, 4.2255e-03, -1.9520e-01, + -3.2488e-02, 6.0368e-03, -2.6547e-01, -2.4829e-01, 2.1645e-02, + -7.3512e-02, 7.6393e-02, -7.2706e-02, 1.5358e-01, 9.3677e-03, + -1.4428e-01, 1.6863e-01, -2.6322e-01, 2.1465e-01, -2.1246e-01, + -3.2902e-01, 3.1898e-02, 2.4764e-01, 3.4598e-01, 2.4307e-01, + 2.8445e-01, 1.6679e-02, 8.4466e-03, 6.5163e-02, 3.9525e-02, + -9.2690e-02, 4.4431e-02, -4.6913e-02, -9.2345e-03, -8.9794e-03, + 1.9486e-02, -1.1213e-01, 1.1999e-01, 1.0694e-01, -1.5547e-01, + 5.2061e-02, -1.4350e-01, 1.6855e-01, 8.6247e-02, 3.3788e-02, + 7.4436e-02, 2.2431e-01, -8.0994e-02, 7.9031e-02, -2.3455e-01, + -1.6954e-01, -2.5681e-01, 9.9518e-02, -2.0205e-02, 1.9510e-01, + -1.7703e-01, -2.3888e-01, 1.6401e-02, -1.6304e-01, -1.0341e-01, + -8.9298e-02, 2.9634e-02, -1.5724e-01, 6.5853e-02, -1.4971e-01, + -1.3078e-01, -1.6806e-01, -1.3370e-01, 3.9207e-01, -7.3544e-02, + -8.2711e-02, 9.3258e-02, -1.6172e-01, 1.8738e-01, 1.4350e-01, + 2.4039e-01, -3.3456e-02, -1.7536e-02, 2.7335e-01, -2.1055e-01, + -6.3564e-02, 2.1079e-02, 2.3638e-01, -3.1217e-02, 3.7273e-02, + 1.1079e-02, 1.5963e-01, -1.8719e-01, -1.0053e-03, -1.7813e-02, + -2.9240e-01, -8.0952e-02, 5.3291e-03, 2.6588e-01, -2.0618e-01, + -1.6225e-01, 1.0025e-01, -1.1047e-01, -9.2405e-02, 1.0489e-01, + -4.5642e-02, -1.8395e-01, -1.0642e-01, 1.8981e-01, 1.9907e-02, + -1.7135e-01, 6.5531e-02, -1.3988e-03, 2.7817e-01, -2.5802e-01, + -1.4527e-01, 1.6533e-01, 2.5748e-02, -6.5332e-02, 4.1882e-02, + -1.7025e-01, -4.3420e-01, -5.4950e-02, 2.4054e-01, 4.4939e-01, + 2.2223e-01, -5.3037e-02, 1.9851e-01, 8.5092e-02, -5.1684e-02, + 5.8122e-02, -2.0831e-01, 1.3727e-02, -1.4461e-01, 1.1605e-01, + 3.3023e-01, -2.5016e-03, 8.3355e-02, 2.8889e-03, -9.1767e-02, + -2.2500e-01, -2.6005e-01, -2.2718e-02, 7.8965e-02, -2.0193e-01, + 3.1948e-01, -1.4691e-01, -4.2470e-03, 1.6581e-01, -5.5066e-02, + -2.8730e-02, 1.2353e-01, 9.0078e-02, 5.3239e-02, 7.6229e-02, + -1.8688e-02, 1.8038e-01, -1.3688e-01, 1.0056e-02, -1.1915e-01, + 1.1376e-01, -1.0106e-01, 3.1833e-02, -2.3530e-01, -2.1356e-01, + 1.2081e-02, 1.9723e-01, -6.6254e-02, -5.8621e-03, 7.9542e-02, + 1.8380e-01, 6.9102e-02, -2.0355e-01, 3.8634e-04, 1.5694e-01, + 1.5234e-02, 7.0116e-02, -8.7134e-02, -1.0493e-01, -1.2832e-01, + 8.8785e-02, 5.0802e-02, -1.6515e-01, 1.5294e-01, -9.5807e-02, + 1.1724e-01, -2.1482e-01, 5.9678e-02, -1.0187e-01, -1.1231e-01, + 7.3172e-02, 8.1636e-02, 1.3985e-01, -6.1189e-02, 3.2938e-01, + -2.0465e-01, 5.8024e-02, -2.0098e-01, -7.7353e-02, -1.4911e-01, + -1.2737e-01, -5.6727e-02, -1.5146e-01, 5.5125e-03, -3.8270e-01, + 8.5914e-02, 4.1835e-02, 2.1180e-01, 6.6254e-02, 1.4918e-01, + 4.0368e-04, 8.3069e-02, 5.3523e-02, -7.5874e-02, -3.9154e-02, + 9.0881e-02, -4.8088e-02, -8.1549e-02, -9.4344e-02, 1.8658e-01, + -1.1282e-03, -3.2404e-02, 2.2197e-01, 1.2310e-02, -1.4756e-01, + -1.5818e-01, -1.0427e-01, -1.3376e-01, -4.5153e-01, 1.4150e-01, + -4.6084e-01, 7.0984e-02, -5.3660e-03, 1.1445e-01, 2.0639e-01, + -1.0595e-01, 9.8461e-02, 1.1745e-01, -8.5346e-02, 9.7677e-02, + -2.8740e-01, -1.0844e-01, 2.0947e-02, -1.3861e-01, 1.5285e-01, + 7.0771e-02, 4.2567e-02, 1.3805e-01, 3.5083e-02, -4.3154e-02, + -3.7462e-01, 5.8789e-02, -1.3240e-01, -1.0771e-01, -2.4909e-01, + 3.2137e-02, -1.4593e-01, -5.0975e-02, -1.2179e-01, -1.0344e-01, + 1.2949e-01, -1.4998e-01, 1.3079e-01, -1.7869e-01, -1.0132e-01, + 1.8350e-01, 4.9456e-02, -5.7911e-02, -1.6670e-01, -2.7019e-01, + -1.3105e-01, 2.1986e-01, -6.1723e-02, 3.5003e-01, -1.5446e-01, + 1.9561e-01, 1.1300e-01, 1.3925e-01, -1.5276e-01, 1.0147e-01, + -1.9581e-02, -4.3691e-01, -9.7110e-02, -3.3967e-01, -2.7802e-02, + -7.2438e-02, -1.4261e-01, 1.0363e-01, 3.0578e-01, 4.9787e-02, + 1.1251e-01, 7.2107e-02, 2.5426e-01, 1.4806e-01, 9.5198e-02, + 1.0909e-01, -9.6786e-02, -1.5798e-01, -4.7056e-03, -2.3768e-01, + -2.6643e-01, 1.7792e-01, 1.9149e-01, 4.8918e-02, 1.6031e-01, + -1.1331e-01, -1.2023e-02, 1.2726e-01, 3.3811e-01, 2.9085e-01, + -3.1095e-02, -2.7095e-01, 1.0544e-01, -7.7017e-02, 1.5635e-01, + 1.0879e-01, 8.0752e-03, 1.5868e-01, -1.9379e-01, 4.0000e-02, + -3.2902e-01, -1.3328e-01, 1.0858e-01, 4.2967e-02, -1.2718e-01, + -3.5778e-02, 1.1070e-01, 1.7500e-01], device='cuda:1', + requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([0.9789, 0.9294, 1.0503, 0.9833, 1.3635, 0.9817, 0.8959, 0.9901, 0.9724, + 1.0195, 0.9912, 1.0194, 0.9510, 0.9497, 1.0480, 0.9598, 0.9356, 0.9458, + 1.0339, 1.4869, 0.9539, 1.0286, 1.0209, 0.9972, 0.9667, 0.9840, 0.9655, + 1.0248, 0.9781, 0.9649, 0.9608, 1.0080, 0.9781, 0.9799, 1.0005, 0.9709, + 0.9607, 1.0319, 1.0683, 1.0291, 0.9931, 0.9402, 1.0207, 0.9329, 0.9443, + 1.0045, 0.9924, 0.9469, 0.9767, 1.0029, 0.9833, 0.9146, 0.9649, 0.9591, + 0.9597, 1.1766, 0.9689, 0.9140, 0.9275, 0.9578, 0.9100, 0.9850, 0.9424, + 1.0120, 0.9857, 1.0222, 1.0457, 1.0024, 1.0410, 1.4892, 0.9385, 0.9460, + 0.9541, 1.0221, 0.9335, 0.9982, 1.0116, 0.9893, 0.9835, 0.9902, 0.9754, + 0.9635, 0.9653, 0.9582, 0.9862, 0.9706, 0.9513, 0.9523, 0.9896, 0.9582, + 0.9980, 0.9927, 0.9415, 0.9474, 0.9352, 1.0319, 1.0163, 0.9256, 0.9569, + 1.0629, 0.9677, 0.9598, 0.9947, 0.9495, 1.0303, 1.1323, 0.9882, 0.9709, + 0.9408, 0.9724, 1.0592, 0.9617, 0.9984, 0.9713, 1.0029, 0.9759, 1.0748, + 0.9440, 0.9530, 0.9644, 0.9541, 1.0438, 0.9496, 0.9783, 0.9973, 1.0621, + 0.9814, 0.9424, 0.9092, 0.9427, 1.0071, 0.9924, 0.9948, 0.9372, 1.0529, + 1.0278, 1.0116, 0.9581, 1.0060, 1.0342, 0.9699, 1.0151, 0.9476, 0.9842, + 0.9829, 1.0783, 1.0188, 0.9930, 0.9428, 0.9109, 1.1249, 0.9518, 0.9691, + 0.9435, 1.0108, 1.8398, 0.9405, 1.0203, 0.9426, 0.9539, 1.0851, 0.9390, + 0.9137, 0.9963, 0.9783, 0.9774, 0.9921, 0.9872, 0.9343, 0.9382, 1.0005, + 1.0383, 1.0908, 1.0157, 0.9840, 0.9636, 0.9586, 0.9408, 1.0387, 0.9679, + 0.9141, 0.9924, 0.9980, 0.9678, 0.9615, 1.2429, 1.0328, 1.1239, 0.9823, + 0.9177, 0.9334, 0.9671, 0.9564, 0.9523, 0.9280, 0.9608, 1.0131, 0.9538, + 1.1718, 1.0348, 0.9512, 1.0957, 0.9225, 0.9518, 1.0423, 0.9711, 0.9743, + 1.0087, 1.0097, 0.9382, 0.9720, 1.0695, 1.0373, 1.3284, 0.9904, 0.9383, + 0.9837, 0.9402, 0.9627, 1.1128, 0.9779, 0.9696, 1.5119, 0.9708, 0.9545, + 1.0144, 0.9609, 0.9002, 0.9714, 0.9668, 1.0259, 0.9891, 0.9930, 0.9484, + 1.0384, 0.9235, 1.0008, 0.9805, 0.9669, 1.0055, 0.9815, 0.9618, 0.9798, + 0.9605, 0.9924, 1.0096, 0.9602, 0.9588, 0.9440, 0.9645, 0.9641, 0.9293, + 0.9837, 0.9505, 0.9990, 0.9378, 0.9800, 0.9223, 0.9465, 1.0049, 0.9628, + 0.9659, 0.9606, 0.9697, 0.9839, 1.0872, 0.9421, 0.9863, 0.9491, 0.9595, + 0.9383, 0.9845, 0.9450, 0.9953, 0.9629, 0.9854, 1.0170, 0.9908, 0.9800, + 0.9220, 0.9957, 1.3879, 0.9971, 0.9980, 0.9933, 1.0206, 0.9309, 0.9692, + 0.9807, 1.0204, 0.9707, 0.9000, 1.0781, 0.9787, 0.9227, 0.9209, 1.1598, + 0.9672, 0.9598, 0.9724, 1.0041, 0.9868, 0.9703, 1.0104, 0.9311, 1.0195, + 0.9569, 0.9985, 1.0102, 1.0374, 0.9528, 0.9749, 1.0033, 0.9420, 0.9753, + 0.9903, 0.9605, 0.9820, 1.0534, 0.9808, 1.0243, 1.0862, 0.9781, 1.0080, + 0.6989, 1.0179, 0.9068, 0.9796, 0.9608, 0.9569, 1.0111, 0.9837, 0.9577, + 0.9589, 0.9779, 1.0451, 1.1752, 0.9593, 0.9444, 0.9135, 0.9258, 0.9964, + 0.9537, 1.0046, 1.0097, 0.9951, 0.9439, 0.9784, 0.9663, 0.9261, 1.2351, + 0.9782, 0.9517, 1.1271, 0.9770, 0.9646, 0.9768, 0.9724, 0.9354, 1.0252, + 0.9786, 0.9721, 0.9825, 0.9624, 0.9528, 0.9635, 1.0061, 0.9892, 0.9911, + 0.9513, 0.9656, 1.0066, 0.9590, 0.9904, 0.9637, 0.9677, 0.9733, 0.9671, + 0.9685, 0.9739, 1.0084, 0.9667, 0.9818, 1.0092, 1.0129, 0.9741, 0.9463, + 1.0102, 1.0144, 1.0013, 0.9456, 1.0054, 1.0135, 0.9948, 1.0081, 0.9378, + 1.0029, 0.9869, 0.9540, 0.9829, 1.0220, 1.0169, 0.9994, 0.9782, 1.0695, + 0.9567, 0.9314, 0.9749, 0.9682, 1.0318, 0.9593, 0.9616, 0.9607, 0.9752, + 1.4810, 0.9323, 0.9402, 0.9837, 1.0073, 0.9842, 0.9367, 0.9604, 1.0116, + 1.0078, 0.9702, 0.9684, 1.1285, 0.9511, 0.9765, 1.0176, 0.9596, 1.0339, + 0.9624, 1.0395, 0.9849, 0.9565, 0.9806, 1.0773, 0.9714, 0.9472, 1.0115, + 0.9649, 0.9228, 2.0901, 0.9879, 0.9882, 0.9761, 0.9814, 0.9468, 0.9994, + 0.9286, 0.9911, 0.9497, 0.9147, 0.9676, 0.9874, 0.9877, 0.9406, 0.9652, + 0.9351, 0.9923, 1.0111, 0.9771, 1.0114, 1.0217, 0.9870, 0.9537, 1.0784, + 1.0336, 1.0093, 0.9765, 0.9986, 1.0106, 0.9380, 0.9709, 1.0175, 0.9601, + 0.9987, 0.9836, 0.9945, 0.9813, 0.9800, 0.8724, 1.7095, 0.9634, 0.9996, + 0.9365, 0.9805, 0.9622, 1.0119, 0.9878, 0.9472, 0.9417, 0.9169, 0.9876, + 0.9900, 0.9350, 1.1267, 0.9505, 1.0640, 0.9625, 1.0296, 1.0113, 0.9412, + 0.9205, 0.9651, 0.9713, 0.9395, 1.0119, 0.9269, 1.0506, 0.9600, 0.9694, + 0.9757, 0.9544, 1.0086, 0.9788, 1.0508, 1.0209, 0.9488, 0.9966, 0.9317, + 0.9572, 0.9729, 1.0164, 0.9792, 0.9196, 0.9784, 0.9793, 0.9809, 0.9796, + 0.9861, 0.9729, 0.9508, 0.9676, 0.9786, 0.9937, 1.0135, 0.9527, 0.9108, + 0.9070, 1.0103, 0.9822, 0.9784, 1.0208, 0.9664, 0.9470, 0.9950, 1.0025, + 0.9190, 0.9683, 1.0020, 0.9953, 1.0268, 0.9235, 0.9606, 0.9984, 0.9683, + 1.0037, 1.0107, 0.9407, 1.4926, 0.9923, 1.0471, 0.9927, 0.9654, 1.0307, + 0.9588, 0.9602, 0.9697, 0.9666, 0.9573, 0.9678, 0.9603, 1.0030, 1.0766, + 0.9451, 1.0309, 0.9786, 1.0248, 1.0939, 0.9755, 1.0129, 0.9870, 0.9454, + 0.9679, 0.9613, 0.9953, 1.0223, 0.9391, 0.9962, 1.0024, 0.9905, 0.9667, + 0.9975, 1.0962, 0.9719, 0.9752, 1.0808, 0.9856, 0.9431, 0.9547, 0.9668, + 0.9539, 0.9781, 0.9672, 0.9340, 1.1096, 1.0085, 0.9580, 0.9485, 0.9604, + 0.9431, 0.9216, 0.9337, 0.9443, 1.0195, 0.9627, 0.9664, 1.0743, 0.9936, + 0.9841, 0.9570, 1.0709, 0.9946, 0.9981, 0.9501, 0.9921, 0.9470, 0.9568, + 0.9283, 0.9953, 1.0495, 0.9913, 0.9984, 1.0113, 1.0228, 0.9384, 0.9789, + 1.0531, 0.9579, 0.9499, 1.0594, 0.9886, 1.4655, 0.9475, 1.0596, 0.9834, + 1.0035, 1.0115, 0.9774, 1.0214, 0.9791, 0.9685, 0.9650, 0.9535, 0.9771, + 0.9126, 0.9927, 1.0409, 1.2192, 0.9652, 1.0573, 0.9687, 0.9728, 0.9429, + 1.0402, 0.9895, 0.8916, 1.0075, 0.9711, 0.9827, 0.9865, 1.2124, 0.9498, + 0.9622, 0.9726, 1.0289, 0.9481, 1.0022, 0.9531, 1.6650, 1.0106, 1.0316, + 0.9528, 1.0154, 0.9521, 0.9370, 0.9971, 1.0099, 0.9931, 0.9787, 0.9301, + 0.9546, 1.0009, 0.9576, 1.0004, 0.9747, 0.9970, 0.9373, 0.9452, 1.0046, + 0.8863, 0.9072, 0.9384, 0.9922, 0.9774, 0.9665, 0.9855, 0.9787, 0.9994, + 0.9961, 0.9882, 0.9664, 0.9641, 0.9573, 0.9490, 1.0217, 1.0008, 1.0120, + 0.9250, 1.0264, 0.9800, 0.9720, 0.9221, 0.8856, 1.7454, 0.9631, 0.9402, + 0.9390, 1.0204, 1.0326, 1.0082, 1.0139, 0.9646, 0.9637, 0.9325, 1.0175, + 0.9553, 0.9596, 0.9731, 0.9429, 1.0211, 0.9584, 0.9190, 1.0024, 0.9525, + 1.0043, 1.0158, 0.9731, 0.9330, 0.9368, 0.9898, 0.9570, 1.0412, 0.9947, + 0.9534, 1.1320, 1.0050, 0.9660, 0.9688, 0.9525, 0.9742, 1.0358, 0.9980, + 0.9786, 0.9825, 0.9780], device='cuda:1', requires_grad=True) +torch.Size([768]) +Parameter containing: +tensor([-3.0972e-01, 8.6520e-02, -1.7815e-01, -2.1274e-01, 1.0310e-02, + -2.1799e-01, 2.6805e-02, -1.2406e-01, 1.1344e-01, -1.0872e-01, + -2.1680e-01, -1.5038e-01, -1.6801e-01, -3.8868e-01, 4.3549e-02, + -1.6931e-01, -1.0309e-01, -1.1146e-01, -2.8433e-02, -7.3690e-02, + -2.0815e-01, -1.5633e-01, -5.8505e-02, -1.1866e-01, -9.1465e-02, + -1.2483e-01, -1.6782e-01, -1.8859e-01, -2.8829e-01, -6.2344e-02, + 5.8944e-02, -2.3529e-02, -1.3301e-01, -3.5179e-02, -1.3117e-01, + -3.7027e-01, -3.8042e-01, -2.0661e-01, -5.9429e-02, 4.7935e-02, + -1.1948e-01, -1.6189e-01, -1.1954e-01, -1.6275e-01, -1.9291e-01, + -3.2738e-01, 6.2183e-02, -1.8937e-01, -2.8478e-01, -1.1591e-01, + -3.1983e-02, -1.3321e-01, -3.0793e-01, -9.1934e-02, -3.1607e-01, + 1.7626e-03, -3.7448e-02, -2.1910e-01, -3.0856e-01, 1.1530e-01, + 7.8005e-02, -1.3057e-01, -1.4585e-01, -2.2813e-01, -1.6288e-01, + -2.2923e-01, -2.6776e-01, -1.9933e-01, -2.2633e-01, 1.1466e-01, + -1.0994e-01, -1.2034e-01, -7.9534e-02, -3.2612e-01, -2.9924e-01, + -1.5115e-01, -1.4329e-02, -1.6127e-01, 1.3527e-01, -3.5650e-02, + -1.3662e-01, -9.8355e-02, -1.8159e-01, -8.9092e-02, -7.4571e-02, + -2.8322e-01, 2.9549e-01, -7.7638e-02, -2.7117e-02, -1.8011e-01, + -3.8760e-01, 1.8966e-02, -6.7477e-02, -1.5817e-01, -3.1372e-01, + -8.0799e-02, 6.7441e-02, -1.7432e-01, -2.6955e-01, 6.9790e-02, + -1.0390e-01, -4.4026e-01, -1.0927e-01, -4.8078e-01, -4.3120e-03, + -1.4704e-01, 1.6252e-01, -6.3587e-03, -1.6030e-01, -1.2555e-01, + 2.4264e-03, -2.0487e-01, -1.9123e-01, -7.2894e-02, 2.6715e-02, + -2.9070e-01, 2.1943e-02, -1.0394e-01, -2.0428e-01, 1.9086e-02, + -5.8713e-02, -1.1117e-01, -2.1590e-01, -7.7958e-03, 8.1530e-02, + -4.0557e-02, -6.9983e-02, 8.2076e-02, -2.8870e-01, 5.1586e-04, + -7.0652e-02, -1.6712e-01, -5.4576e-02, 3.9755e-02, 9.6160e-02, + 3.4701e-02, -2.3451e-01, -2.4870e-01, 2.3932e-02, -1.9629e-01, + -5.2537e-02, 2.0197e-02, -7.9063e-02, -1.7944e-01, -1.2057e-01, + -1.4907e-01, 1.8654e-01, -1.6093e-01, -2.8629e-01, -2.0329e-01, + 3.8109e-02, -2.5073e-01, -2.7577e-01, -1.1063e-01, -1.0919e-01, + 1.3914e-02, -2.8746e-01, -3.2939e-01, -2.2465e-01, -1.6625e-01, + -1.8405e-01, -4.9315e-02, -2.2022e-01, -2.8075e-01, -5.6807e-02, + 1.0513e-01, -7.9162e-02, 3.6867e-02, -1.8553e-01, -2.9716e-01, + 7.4794e-02, 6.0102e-02, -1.4944e-01, 1.4078e-03, -1.2949e-01, + -1.4017e-01, -2.0010e-01, -1.3645e-01, -5.3604e-02, 9.3507e-02, + -4.0007e-01, -8.2399e-02, 1.4770e-03, 2.5266e-02, -1.3282e-01, + -5.0428e-02, -2.4350e-01, 2.7861e-01, -3.4818e-02, 9.8363e-02, + -3.0732e-01, -3.4218e-01, -2.9501e-02, -3.9651e-02, -6.8811e-02, + -4.2808e-02, 1.3737e-01, -1.1500e-01, -2.3656e-01, 2.0047e-01, + -2.0047e-01, -1.8455e-01, -1.9737e-01, -3.0253e-02, -8.7459e-02, + -2.0932e-01, 1.5391e-02, 1.2252e-02, -2.1960e-01, -3.3056e-01, + 1.3683e-01, -2.0823e-01, -3.2396e-01, -1.1140e-01, -3.3370e-01, + -2.8346e-01, -2.3274e-01, -1.6795e-01, -3.2785e-01, 3.7867e-02, + 1.3719e-01, 1.4507e-01, -6.2225e-02, 5.7701e-02, -3.8609e-02, + 3.8244e-02, -2.9839e-01, -1.8011e-01, -1.9061e-01, -5.9240e-02, + -9.7641e-02, -2.6895e-01, -1.6988e-01, -1.9597e-01, -1.6472e-01, + -4.4346e-01, 1.3133e-01, -1.2524e-01, -3.5263e-01, -1.4135e-01, + 4.1890e-02, -5.1405e-02, 4.4846e-02, -1.4310e-01, -1.5345e-01, + -1.3753e-01, -1.1721e-01, -1.3412e-01, -1.3196e-01, 3.5904e-02, + 1.4487e-02, -1.3828e-01, -1.1927e-01, -2.2135e-01, -1.7975e-01, + -2.3271e-01, -2.4277e-02, -2.7056e-01, -3.3887e-01, -2.0596e-01, + -3.2562e-01, -4.5267e-01, 1.7332e-01, 3.4287e-02, -2.7470e-01, + -4.0351e-02, -1.5061e-01, -2.0349e-01, 8.5356e-02, 6.9045e-02, + 3.8983e-02, 6.1722e-02, 2.4213e-02, -8.5502e-02, 4.4269e-03, + -1.0968e-01, -1.4912e-01, -4.7435e-02, 1.4155e-01, -1.8818e-01, + -3.2807e-02, -2.2663e-01, -3.0295e-01, 9.7512e-03, -1.7489e-01, + -9.1184e-02, -2.0599e-01, 1.4191e-01, -5.3643e-02, 7.9877e-02, + -2.9450e-01, -1.2680e-01, 8.3230e-02, -2.6408e-01, -4.8992e-02, + -5.0730e-02, 7.4562e-03, -1.9729e-01, -1.8002e-01, -6.4581e-03, + -2.3463e-01, 1.5716e-01, -5.0802e-02, -3.3855e-01, 5.2296e-02, + -9.6272e-02, -1.5351e-01, -2.3901e-01, -1.5963e-01, -1.6693e-01, + -1.1239e-01, -3.7593e-01, -7.4591e-02, 4.3444e-02, -1.5205e-01, + -1.6956e-03, 1.6140e-01, -2.1399e-01, -1.8157e-01, -3.0919e-02, + -2.7055e-01, -1.2207e-01, -1.5803e-01, -3.8846e-02, -4.9430e-01, + -1.1189e-01, -2.8882e-01, -1.2073e-01, -6.5715e-02, -1.4964e-01, + -2.4234e-01, -2.6441e-01, -2.7818e-01, -6.0429e-02, 7.3955e-02, + 6.7665e-02, -2.6970e-01, -4.5610e-02, 2.3626e-02, 1.0868e-02, + -2.1497e-02, -2.7581e-01, -2.1453e-01, 1.3595e-01, -2.0828e-01, + 1.1379e-01, 8.0894e-03, 8.1668e-02, 1.1096e-01, 2.5441e-01, + 7.3634e-02, -2.1243e-01, -2.3957e-01, -1.1420e-02, -1.9181e-01, + -1.9316e-01, -4.4721e-01, -8.3250e-02, -6.5639e-02, -5.8319e-02, + -1.2700e-01, -1.0081e-01, -7.0792e-02, -3.4129e-01, -1.4185e-01, + -4.1964e-01, -1.5962e-01, 2.2652e-02, -2.0097e-01, -1.7302e-01, + -1.6948e-01, -1.4114e-01, -9.6507e-02, -1.6094e-01, -3.6258e-01, + -2.5993e-01, 6.2780e-02, -1.1315e-01, -2.1144e-02, -7.3426e-03, + -1.3075e-01, -2.6975e-01, -2.2317e-01, -7.0074e-02, 8.6945e-02, + -2.5793e-01, -1.4867e-01, 8.9297e-02, 3.3459e-03, -2.6978e-01, + -1.6799e-01, -1.9151e-01, -7.5621e-02, -5.7356e-02, -2.5335e-01, + 3.0635e-02, -2.6356e-01, -7.0652e-02, -2.5116e-01, -3.5934e-01, + -2.9494e-02, -9.2532e-02, -5.1282e-02, -1.1410e-01, -1.0907e-01, + -2.8711e-01, -1.1422e-01, -3.5451e-02, -1.9770e-01, 1.1843e-01, + -2.8797e-01, -4.3960e-01, -3.4017e-02, 9.0205e-02, -1.3106e-01, + -8.4129e-02, -1.8859e-01, -2.6158e-01, 1.6586e-01, -1.2378e-01, + -1.5896e-01, -1.6694e-01, 1.6234e-02, -1.5692e-01, -1.6066e-01, + -3.7369e-04, 2.3442e-02, -5.5136e-02, -8.2050e-02, -2.5465e-01, + -1.2962e-01, -1.5731e-01, 7.7169e-02, -1.6205e-01, -2.2577e-01, + 2.7081e-01, -1.1905e-01, -2.3228e-02, -2.7891e-01, -2.6373e-01, + -1.0485e-01, -1.3941e-01, -6.7630e-02, -1.4722e-02, 1.3796e-02, + -3.4524e-02, 8.9978e-02, -2.1230e-01, 8.5656e-02, -1.6448e-01, + -3.2909e-02, -2.3079e-01, -3.2352e-01, 2.6198e-02, -1.7452e-01, + -2.2494e-01, -3.5444e-01, -1.2681e-01, -1.1768e-01, -2.7710e-01, + -2.5133e-01, -1.0190e-01, -3.9334e-01, -4.8219e-02, -2.9652e-01, + -7.3225e-02, -8.6772e-03, 3.2621e-02, -1.6923e-01, -2.9421e-01, + 1.8239e-01, -1.4777e-01, -2.9244e-01, -3.1106e-02, -1.5851e-01, + -2.4942e-01, -2.5309e-01, -2.9053e-01, 9.9120e-02, -2.4745e-01, + 4.4178e-02, 2.9294e-02, 1.8558e-01, -9.1317e-02, 2.0792e-02, + -1.6872e-01, -2.2066e-01, -6.8652e-02, 6.6996e-02, -2.0110e-01, + -1.2881e-02, -4.1882e-02, -1.6867e-01, -4.1276e-02, -1.1632e-01, + -2.4713e-01, -2.1714e-01, -2.5670e-01, -7.6108e-02, 7.5738e-02, + -5.2782e-02, -2.1995e-01, -2.1114e-01, -8.2967e-02, -2.9250e-01, + -1.1585e-01, -3.7202e-02, 2.2169e-01, -1.1157e-01, -1.3117e-01, + -4.5070e-01, 9.1724e-02, -2.5536e-01, -1.1099e-01, -1.4028e-01, + 2.8116e-02, -5.5589e-02, -1.1969e-01, -8.9219e-02, 4.4426e-02, + 1.8124e-01, 6.9828e-02, -2.9449e-01, -3.8633e-01, -3.2289e-01, + -1.5161e-01, 7.4736e-02, -3.2911e-01, 9.0841e-02, -9.4956e-02, + -1.6886e-01, -4.3418e-01, -1.8379e-01, -1.7249e-01, -4.3381e-02, + -3.0515e-01, -1.3860e-02, -1.3408e-01, 2.3386e-02, -1.9684e-01, + 1.1179e-01, -1.0725e-01, -5.1684e-02, 9.9985e-03, -8.1149e-02, + -5.7091e-02, -2.6378e-01, -1.8482e-01, 2.4157e-02, -3.0910e-01, + -9.7387e-02, -1.3443e-01, -1.3120e-01, -4.1030e-02, -2.8421e-01, + -1.1970e-01, 2.6584e-01, -2.9540e-01, -4.1858e-01, -4.4225e-02, + -3.3776e-02, 5.1072e-02, -1.0784e-01, 2.4809e-03, -1.0006e-02, + -1.6900e-01, -1.6756e-01, 7.1867e-02, -1.4536e-01, -3.2948e-01, + -4.8964e-02, 2.5080e-02, -2.9177e-01, -2.3546e-01, -9.6734e-02, + 5.2769e-02, 5.9821e-02, -7.1729e-02, -5.2216e-02, 4.6748e-02, + -1.0374e-01, -3.3411e-01, 1.9977e-02, -1.9944e-01, -1.4249e-01, + -4.2926e-01, -2.0550e-01, -1.2332e-01, 2.4972e-02, -3.1156e-01, + -3.5618e-02, 1.4841e-01, -6.8435e-02, -8.9134e-02, -4.3101e-01, + 3.2034e-02, -5.7796e-02, -1.0815e-01, 7.9357e-02, 6.0425e-02, + 9.0992e-02, -8.4117e-02, -1.6460e-01, -1.8860e-01, -1.1188e-01, + -1.6019e-02, -2.7539e-01, -3.0461e-02, 3.6393e-02, -5.5778e-02, + -2.3261e-01, 3.3813e-03, -7.2618e-02, 1.3152e-01, 2.5424e-02, + -2.3474e-01, 1.2788e-01, 1.0549e-01, -3.9438e-01, -1.4477e-01, + -8.7272e-02, -5.9155e-02, -1.8755e-02, 8.3795e-03, -1.3303e-01, + 3.5993e-02, -4.8489e-01, -9.8144e-02, -2.0187e-01, -1.7780e-01, + -1.5702e-01, -1.6632e-01, -7.6623e-02, -1.2067e-01, 9.7476e-02, + -1.4865e-01, 3.9298e-02, -8.1964e-02, -5.7571e-02, -1.6357e-02, + -2.4697e-02, -2.3285e-01, -1.8711e-01, 5.9722e-02, -5.0565e-03, + 1.6085e-01, -5.6193e-02, -1.2748e-01, -1.7989e-01, 1.6912e-02, + -7.4359e-02, 5.9318e-02, 4.5689e-02, -3.4782e-01, 1.2438e-02, + 4.9462e-03, -7.6542e-02, -1.7623e-03, -3.1946e-01, -8.4081e-02, + 4.2274e-02, -2.0708e-01, -1.0376e-01, -3.2503e-01, -4.9442e-02, + -1.5170e-01, -1.7022e-01, -1.8604e-01, 7.0737e-02, -2.3663e-01, + -3.6244e-02, -2.3101e-01, -2.2293e-01, 8.8521e-03, -7.0427e-02, + -2.2006e-01, 6.1665e-02, -1.2758e-01, -1.3123e-01, 1.3888e-02, + -2.7413e-01, -3.8097e-02, -3.4511e-01, -2.7228e-01, 8.0634e-02, + -1.5312e-02, -3.7472e-03, 2.0875e-02, 1.5117e-01, -2.4863e-01, + -3.2693e-01, -1.0401e-01, -1.4831e-01, -1.8991e-01, -1.6961e-01, + 1.1688e-01, 6.0180e-02, -5.3109e-02, -3.7527e-01, 1.3247e-01, + -1.2943e-01, -1.7973e-01, 1.0068e-01, -1.7821e-01, -1.9093e-01, + 2.9122e-02, -5.7265e-01, -1.4149e-01, -7.8197e-02, -2.9365e-01, + -2.5148e-01, -1.0391e-01, -1.6442e-01, -1.2958e-01, -6.3028e-02, + -7.3926e-02, -9.9090e-02, 1.4622e-02, -3.2253e-01, -2.1039e-01, + -3.5321e-02, -6.1373e-02, -4.3052e-03, -2.5899e-01, 1.4603e-01, + -6.2891e-02, 3.2609e-02, -8.3760e-02, -7.8426e-02, -5.5548e-02, + -2.1703e-01, -7.2742e-02, -1.0241e-01, -1.5250e-01, -5.3758e-03, + 1.6436e-01, -1.6233e-01, -1.1661e-01, -2.6216e-01, -3.3025e-01, + -1.5915e-01, -3.5974e-01, -1.6534e-01, 8.1741e-03, 1.2124e-01, + -7.8771e-02, -2.6709e-01, -1.5131e-01, 1.1832e-01, -9.7288e-02, + 1.5229e-01, -1.3003e-01, -3.0911e-01, -8.6667e-02, -6.7893e-02, + -1.5559e-01, -1.3761e-01, -4.8186e-02, -1.4222e-01, -5.8575e-02, + -4.5176e-01, -2.7698e-01, -1.8527e-01, 1.3501e-01, 1.4931e-02, + -5.2130e-01, -2.6890e-01, 9.5427e-02], device='cuda:1', + requires_grad=True) diff --git a/python/ClipDetection/CoOp/trainers/imagenet_templates.py b/python/ClipDetection/CoOp/trainers/imagenet_templates.py new file mode 100644 index 00000000..560c5a50 --- /dev/null +++ b/python/ClipDetection/CoOp/trainers/imagenet_templates.py @@ -0,0 +1,94 @@ +# source: https://github.com/openai/CLIP/blob/main/notebooks/Prompt_Engineering_for_ImageNet.ipynb + +IMAGENET_TEMPLATES = [ + "a bad photo of a {}.", + "a photo of many {}.", + "a sculpture of a {}.", + "a photo of the hard to see {}.", + "a low resolution photo of the {}.", + "a rendering of a {}.", + "graffiti of a {}.", + "a bad photo of the {}.", + "a cropped photo of the {}.", + "a tattoo of a {}.", + "the embroidered {}.", + "a photo of a hard to see {}.", + "a bright photo of a {}.", + "a photo of a clean {}.", + "a photo of a dirty {}.", + "a dark photo of the {}.", + "a drawing of a {}.", + "a photo of my {}.", + "the plastic {}.", + "a photo of the cool {}.", + "a close-up photo of a {}.", + "a black and white photo of the {}.", + "a painting of the {}.", + "a painting of a {}.", + "a pixelated photo of the {}.", + "a sculpture of the {}.", + "a bright photo of the {}.", + "a cropped photo of a {}.", + "a plastic {}.", + "a photo of the dirty {}.", + "a jpeg corrupted photo of a {}.", + "a blurry photo of the {}.", + "a photo of the {}.", + "a good photo of the {}.", + "a rendering of the {}.", + "a {} in a video game.", + "a photo of one {}.", + "a doodle of a {}.", + "a close-up photo of the {}.", + "a photo of a {}.", + "the origami {}.", + "the {} in a video game.", + "a sketch of a {}.", + "a doodle of the {}.", + "a origami {}.", + "a low resolution photo of a {}.", + "the toy {}.", + "a rendition of the {}.", + "a photo of the clean {}.", + "a photo of a large {}.", + "a rendition of a {}.", + "a photo of a nice {}.", + "a photo of a weird {}.", + "a blurry photo of a {}.", + "a cartoon {}.", + "art of a {}.", + "a sketch of the {}.", + "a embroidered {}.", + "a pixelated photo of a {}.", + "itap of the {}.", + "a jpeg corrupted photo of the {}.", + "a good photo of a {}.", + "a plushie {}.", + "a photo of the nice {}.", + "a photo of the small {}.", + "a photo of the weird {}.", + "the cartoon {}.", + "art of the {}.", + "a drawing of the {}.", + "a photo of the large {}.", + "a black and white photo of a {}.", + "the plushie {}.", + "a dark photo of a {}.", + "itap of a {}.", + "graffiti of the {}.", + "a toy {}.", + "itap of my {}.", + "a photo of a cool {}.", + "a photo of a small {}.", + "a tattoo of the {}.", +] + +IMAGENET_TEMPLATES_SELECT = [ + "itap of a {}.", + "a bad photo of the {}.", + "a origami {}.", + "a photo of the large {}.", + "a {} in a video game.", + "art of the {}.", + "a photo of the small {}.", +] diff --git a/python/ClipDetection/CoOp/trainers/zsclip.py b/python/ClipDetection/CoOp/trainers/zsclip.py new file mode 100644 index 00000000..f9391188 --- /dev/null +++ b/python/ClipDetection/CoOp/trainers/zsclip.py @@ -0,0 +1,99 @@ +import torch +import torch.nn as nn + +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.optim import build_optimizer, build_lr_scheduler + +from clip import clip +from clip.model import convert_weights + +from .coop import load_clip_to_cpu +from .imagenet_templates import IMAGENET_TEMPLATES, IMAGENET_TEMPLATES_SELECT + +CUSTOM_TEMPLATES = { + "OxfordPets": "a photo of a {}, a type of pet.", + "OxfordFlowers": "a photo of a {}, a type of flower.", + "FGVCAircraft": "a photo of a {}, a type of aircraft.", + "DescribableTextures": "{} texture.", + "EuroSAT": "a centered satellite photo of {}.", + "StanfordCars": "a photo of a {}.", + "Food101": "a photo of {}, a type of food.", + "SUN397": "a photo of a {}.", + "Caltech101": "a photo of a {}.", + "UCF101": "a photo of a person doing {}.", + "ImageNet": "a photo of a {}.", + "ImageNetSketch": "a photo of a {}.", + "ImageNetV2": "a photo of a {}.", + "ImageNetA": "a photo of a {}.", + "ImageNetR": "a photo of a {}.", +} + + +@TRAINER_REGISTRY.register() +class ZeroshotCLIP(TrainerX): + def build_model(self): + cfg = self.cfg + classnames = self.dm.dataset.classnames + + print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") + clip_model = load_clip_to_cpu(cfg) + clip_model.to(self.device) + + temp = CUSTOM_TEMPLATES[cfg.DATASET.NAME] + prompts = [temp.format(c.replace("_", " ")) for c in classnames] + print(f"Prompts: {prompts}") + prompts = torch.cat([clip.tokenize(p) for p in prompts]) + prompts = prompts.to(self.device) + + with torch.no_grad(): + text_features = clip_model.encode_text(prompts) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + self.text_features = text_features + self.clip_model = clip_model + + def model_inference(self, image): + image_features = self.clip_model.encode_image(image) + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + logit_scale = self.clip_model.logit_scale.exp() + logits = logit_scale * image_features @ self.text_features.t() + return logits + + +@TRAINER_REGISTRY.register() +class ZeroshotCLIP2(ZeroshotCLIP): + """Prompt ensembling.""" + + # templates = IMAGENET_TEMPLATES + templates = IMAGENET_TEMPLATES_SELECT + + def build_model(self): + cfg = self.cfg + classnames = self.dm.dataset.classnames + + print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") + clip_model = load_clip_to_cpu(cfg) + clip_model.to(self.device) + + for params in clip_model.parameters(): + params.requires_grad_(False) + + # add custom-made prompt + if cfg.DATASET.NAME != "ImageNet": + self.templates += [CUSTOM_TEMPLATES[cfg.DATASET.NAME]] + + num_temp = len(self.templates) + print(f"Prompt ensembling (n={num_temp})") + + mean_text_features = 0 + for i, temp in enumerate(self.templates): + prompts = [temp.format(c.replace("_", " ")) for c in classnames] + prompts = torch.cat([clip.tokenize(p) for p in prompts]).to(self.device) + text_features = clip_model.encode_text(prompts) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + mean_text_features = mean_text_features + text_features + mean_text_features = mean_text_features / num_temp + mean_text_features = mean_text_features / mean_text_features.norm(dim=-1, keepdim=True) + + self.text_features = mean_text_features + self.clip_model = clip_model diff --git a/python/ClipDetection/Dassl.pytorch/.flake8 b/python/ClipDetection/Dassl.pytorch/.flake8 new file mode 100644 index 00000000..ac13c77e --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/.flake8 @@ -0,0 +1,24 @@ +[flake8] +ignore = + # At least two spaces before inline comment + E261, + # Line lengths are recommended to be no greater than 79 characters + E501, + # Missing whitespace around arithmetic operator + E226, + # Blank line contains whitespace + W293, + # Do not use bare 'except' + E722, + # Line break after binary operator + W504, + # Too many leading '#' for block comment + E266, + # Line break before binary operator + W503, + # Continuation line over-indented for hanging indent + E126, + # Module level import not at top of file + E402 +max-line-length = 79 +exclude = __init__.py, build \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/.gitignore b/python/ClipDetection/Dassl.pytorch/.gitignore new file mode 100644 index 00000000..e8bcb640 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/.gitignore @@ -0,0 +1,139 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# OS X +.DS_Store +.Spotlight-V100 +.Trashes +._* + +# This project +output/ +debug/ diff --git a/python/ClipDetection/Dassl.pytorch/.isort.cfg b/python/ClipDetection/Dassl.pytorch/.isort.cfg new file mode 100644 index 00000000..6b019a3d --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/.isort.cfg @@ -0,0 +1,10 @@ +[isort] +line_length=79 +multi_line_output=6 +length_sort=true +known_standard_library=numpy,setuptools +known_myself=dassl +known_third_party=matplotlib,cv2,torch,torchvision,PIL,yacs,scipy,gdown +no_lines_before=STDLIB,THIRDPARTY +sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER +default_section=FIRSTPARTY \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/.style.yapf b/python/ClipDetection/Dassl.pytorch/.style.yapf new file mode 100644 index 00000000..5d8b5f5c --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/.style.yapf @@ -0,0 +1,7 @@ +[style] +BASED_ON_STYLE = pep8 +BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true +SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true +DEDENT_CLOSING_BRACKETS = true +SPACES_BEFORE_COMMENT = 2 +ARITHMETIC_PRECEDENCE_INDICATION = true \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/DATASETS.md b/python/ClipDetection/Dassl.pytorch/DATASETS.md new file mode 100644 index 00000000..27ad5099 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/DATASETS.md @@ -0,0 +1,318 @@ +# How to Install Datasets + +`$DATA` denotes the location where datasets are installed, e.g. + +``` +$DATA/ +|–– office31/ +|–– office_home/ +|–– visda17/ +``` + +[Domain Adaptation](#domain-adaptation) +- [Office-31](#office-31) +- [Office-Home](#office-home) +- [VisDA17](#visda17) +- [CIFAR10-STL10](#cifar10-stl10) +- [Digit-5](#digit-5) +- [DomainNet](#domainnet) +- [miniDomainNet](#miniDomainNet) + +[Domain Generalization](#domain-generalization) +- [PACS](#pacs) +- [VLCS](#vlcs) +- [Office-Home-DG](#office-home-dg) +- [Digits-DG](#digits-dg) +- [Digit-Single](#digit-single) +- [CIFAR-10-C](#cifar-10-c) +- [CIFAR-100-C](#cifar-100-c) +- [WILDS](#wilds) + +[Semi-Supervised Learning](#semi-supervised-learning) +- [CIFAR10/100 and SVHN](#cifar10100-and-svhn) +- [STL10](#stl10) + +## Domain Adaptation + +### Office-31 + +Download link: https://people.eecs.berkeley.edu/~jhoffman/domainadapt/#datasets_code. + +File structure: + +``` +office31/ +|–– amazon/ +| |–– back_pack/ +| |–– bike/ +| |–– ... +|–– dslr/ +| |–– back_pack/ +| |–– bike/ +| |–– ... +|–– webcam/ +| |–– back_pack/ +| |–– bike/ +| |–– ... +``` + +Note that within each domain folder you need to move all class folders out of the `images/` folder and then delete the `images/` folder. + +### Office-Home + +Download link: http://hemanthdv.org/OfficeHome-Dataset/. + +File structure: + +``` +office_home/ +|–– art/ +|–– clipart/ +|–– product/ +|–– real_world/ +``` + +### VisDA17 + +Download link: http://ai.bu.edu/visda-2017/. + +The dataset can also be downloaded using our script at `datasets/da/visda17.sh`. Run the following command in your terminal under `Dassl.pytorch/datasets/da`, + +```bash +sh visda17.sh $DATA +``` + +Once the download is finished, the file structure will look like + +``` +visda17/ +|–– train/ +|–– test/ +|–– validation/ +``` + +### CIFAR10-STL10 + +Run the following command in your terminal under `Dassl.pytorch/datasets/da`, + +```bash +python cifar_stl.py $DATA/cifar_stl +``` + +This will create a folder named `cifar_stl` under `$DATA`. The file structure will look like + +``` +cifar_stl/ +|–– cifar/ +| |–– train/ +| |–– test/ +|–– stl/ +| |–– train/ +| |–– test/ +``` + +Note that only 9 classes shared by both datasets are kept. + +### Digit-5 + +Create a folder `$DATA/digit5` and download to this folder the dataset from [here](https://github.com/VisionLearningGroup/VisionLearningGroup.github.io/tree/master/M3SDA/code_MSDA_digit#digit-five-download). This should give you + +``` +digit5/ +|–– Digit-Five/ +``` + +Then, run the following command in your terminal under `Dassl.pytorch/datasets/da`, + +```bash +python digit5.py $DATA/digit5 +``` + +This will extract the data and organize the file structure as + +``` +digit5/ +|–– Digit-Five/ +|–– mnist/ +|–– mnist_m/ +|–– usps/ +|–– svhn/ +|–– syn/ +``` + +### DomainNet + +Download link: http://ai.bu.edu/M3SDA/. (Please download the cleaned version of split files) + +File structure: + +``` +domainnet/ +|–– clipart/ +|–– infograph/ +|–– painting/ +|–– quickdraw/ +|–– real/ +|–– sketch/ +|–– splits/ +| |–– clipart_train.txt +| |–– clipart_test.txt +| |–– ... +``` + +### miniDomainNet + +You need to download the DomainNet dataset first. The miniDomainNet's split files can be downloaded at this [google drive](https://drive.google.com/open?id=15rrLDCrzyi6ZY-1vJar3u7plgLe4COL7). After the zip file is extracted, you should have the folder `$DATA/domainnet/splits_mini/`. + +## Domain Generalization + +### PACS + +Download link: [google drive](https://drive.google.com/open?id=1m4X4fROCCXMO0lRLrr6Zz9Vb3974NWhE). + +File structure: + +``` +pacs/ +|–– images/ +|–– splits/ +``` + +You do not necessarily have to manually download this dataset. Once you run ``tools/train.py``, the code will detect if the dataset exists or not and automatically download the dataset to ``$DATA`` if missing. This also applies to VLCS, Office-Home-DG, and Digits-DG. + +### VLCS + +Download link: [google drive](https://drive.google.com/file/d/1r0WL5DDqKfSPp9E3tRENwHaXNs1olLZd/view?usp=sharing) (credit to https://github.com/fmcarlucci/JigenDG#vlcs) + +File structure: + +``` +VLCS/ +|–– CALTECH/ +|–– LABELME/ +|–– PASCAL/ +|–– SUN/ +``` + +### Office-Home-DG + +Download link: [google drive](https://drive.google.com/open?id=1gkbf_KaxoBws-GWT3XIPZ7BnkqbAxIFa). + +File structure: + +``` +office_home_dg/ +|–– art/ +|–– clipart/ +|–– product/ +|–– real_world/ +``` + +### Digits-DG + +Download link: [google driv](https://drive.google.com/open?id=15V7EsHfCcfbKgsDmzQKj_DfXt_XYp_P7). + +File structure: + +``` +digits_dg/ +|–– mnist/ +|–– mnist_m/ +|–– svhn/ +|–– syn/ +``` + +### Digit-Single +Follow the steps for [Digit-5](#digit-5) to organize the dataset. + +### CIFAR-10-C + +First download the CIFAR-10-C dataset from https://zenodo.org/record/2535967#.YFxHEWQzb0o to, e.g., $DATA, and extract the file under the same directory. Then, navigate to `Dassl.pytorch/datasets/dg` and run the following command in your terminal +```bash +python cifar_c.py $DATA/CIFAR-10-C +``` +where the first argument denotes the path to the (uncompressed) CIFAR-10-C dataset. + +The script will extract images from the `.npy` files and save them to `cifar10_c/` created under $DATA. The file structure will look like +``` +cifar10_c/ +|–– brightness/ +| |–– 1/ # 5 intensity levels in total +| |–– 2/ +| |–– 3/ +| |–– 4/ +| |–– 5/ +|–– ... # 19 corruption types in total +``` + +Note that `cifar10_c/` only contains the test images. The training images are the normal CIFAR-10 images. See [CIFAR10/100 and SVHN](#cifar10100-and-svhn) for how to prepare the CIFAR-10 dataset. + +### CIFAR-100-C + +First download the CIFAR-100-C dataset from https://zenodo.org/record/3555552#.YFxpQmQzb0o to, e.g., $DATA, and extract the file under the same directory. Then, navigate to `Dassl.pytorch/datasets/dg` and run the following command in your terminal +```bash +python cifar_c.py $DATA/CIFAR-100-C +``` +where the first argument denotes the path to the (uncompressed) CIFAR-100-C dataset. + +The script will extract images from the `.npy` files and save them to `cifar100_c/` created under $DATA. The file structure will look like +``` +cifar100_c/ +|–– brightness/ +| |–– 1/ # 5 intensity levels in total +| |–– 2/ +| |–– 3/ +| |–– 4/ +| |–– 5/ +|–– ... # 19 corruption types in total +``` + +Note that `cifar100_c/` only contains the test images. The training images are the normal CIFAR-100 images. See [CIFAR10/100 and SVHN](#cifar10100-and-svhn) for how to prepare the CIFAR-100 dataset. + +### WILDS + +No action is required to preprocess WILDS's datasets. The code will automatically download the data. + +## Semi-Supervised Learning + +### CIFAR10/100 and SVHN + +Run the following command in your terminal under `Dassl.pytorch/datasets/ssl`, + +```bash +python cifar10_cifar100_svhn.py $DATA +``` + +This will create three folders under `$DATA`, i.e. + +``` +cifar10/ +|–– train/ +|–– test/ +cifar100/ +|–– train/ +|–– test/ +svhn/ +|–– train/ +|–– test/ +``` + +### STL10 + +Run the following command in your terminal under `Dassl.pytorch/datasets/ssl`, + +```bash +python stl10.py $DATA/stl10 +``` + +This will create a folder named `stl10` under `$DATA` and extract the data into three folders, i.e. `train`, `test` and `unlabeled`. Then, download from http://ai.stanford.edu/~acoates/stl10/ the "Binary files" and extract it under `stl10`. + +The file structure will look like + +``` +stl10/ +|–– train/ +|–– test/ +|–– unlabeled/ +|–– stl10_binary/ +``` \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/LICENSE b/python/ClipDetection/Dassl.pytorch/LICENSE new file mode 100644 index 00000000..69196145 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Kaiyang + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/python/ClipDetection/Dassl.pytorch/MODIFICATIONS b/python/ClipDetection/Dassl.pytorch/MODIFICATIONS new file mode 100644 index 00000000..451e41d4 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/MODIFICATIONS @@ -0,0 +1,4 @@ +The following files have been modified: + +./dassl/config/defaults.py +./dassl/engine/trainer.py \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/README.md b/python/ClipDetection/Dassl.pytorch/README.md new file mode 100644 index 00000000..6f3ec6f3 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/README.md @@ -0,0 +1,279 @@ +# Dassl + +## Introduction + +Dassl is a [PyTorch](https://pytorch.org) toolbox initially developed for our project [Domain Adaptive Ensemble Learning (DAEL)](https://arxiv.org/abs/2003.07325) to support research in domain adaptation and generalization---since in DAEL we study how to unify these two problems in a single learning framework. Given that domain adaptation is closely related to semi-supervised learning---both study how to exploit unlabeled data---we also incorporate components that support research for the latter. + +Why the name "Dassl"? Dassl combines the initials of domain adaptation (DA) and semi-supervised learning (SSL), which sounds natural and informative. + +Dassl has a modular design and unified interfaces, allowing fast prototyping and experimentation of new DA/DG/SSL methods. With Dassl, a new method can be implemented with only a few lines of code. Don't believe? Take a look at the [engine](https://github.com/KaiyangZhou/Dassl.pytorch/tree/master/dassl/engine) folder, which contains the implementations of many existing methods (then you will come back and star this repo). :-) + +Basically, Dassl is perfect for doing research in the following areas: +- Domain adaptation +- Domain generalization +- Semi-supervised learning + +BUT, thanks to the neat design, Dassl can also be used as a codebase to develop any deep learning projects, like [this](https://github.com/KaiyangZhou/CoOp). :-) + +A drawback of Dassl is that it doesn't (yet? hmm) support distributed multi-GPU training (Dassl uses `DataParallel` to wrap a model, which is less efficient than `DistributedDataParallel`). + +We don't provide detailed documentations for Dassl, unlike another [project](https://kaiyangzhou.github.io/deep-person-reid/) of ours. This is because Dassl is developed for research purpose and as a researcher, we think it's important to be able to read source code and we highly encourage you to do so---definitely not because we are lazy. :-) + +## What's new +- **[Oct 2022]** New paper "[On-Device Domain Generalization](https://arxiv.org/abs/2209.07521)" is out! Code, models and datasets: https://github.com/KaiyangZhou/on-device-dg. + +
+ More + +- **[Jun 2022]** `v0.6.0`: Make `cfg.TRAINER.METHOD_NAME` consistent with the method class name. +- **[Jun 2022]** A new domain adaptation method [CDAC (CVPR'21)](https://openaccess.thecvf.com/content/CVPR2021/papers/Li_Cross-Domain_Adaptive_Clustering_for_Semi-Supervised_Domain_Adaptation_CVPR_2021_paper.pdf) is added by [Shreejal Trivedi](https://github.com/shreejalt). See [here](https://github.com/KaiyangZhou/Dassl.pytorch/pull/44) for more details. +- **[Jun 2022]** Adds three datasets from the [WILDS](https://wilds.stanford.edu/) benchmark: iWildCam, FMoW and Camelyon17. See [here](https://github.com/KaiyangZhou/Dassl.pytorch/commit/7f7eab8e22f6e176b97a539100eca12d6a403909) for more details. +- **[May 2022]** A new domain generalization method [DDG](https://arxiv.org/abs/2205.13913) developed by [Zhishu Sun](https://github.com/siaimes) and to appear at IJCAI'22 is added to this repo. See [here](https://github.com/MetaVisionLab/DDG) for more details. +- **[Mar 2022]** A new domain generalization method [EFDM](https://arxiv.org/abs/2203.07740) developed by [Yabin Zhang (PolyU)](https://ybzh.github.io/) and to appear at CVPR'22 is added to this repo. See [here](https://github.com/KaiyangZhou/Dassl.pytorch/pull/36) for more details. +- **[Feb 2022]** In case you don't know, a class in the painting domain of DomainNet (the official splits) only has test images (no training images), which could affect performance. See section 4.a in our [paper](https://arxiv.org/abs/2003.07325) for more details. +- **[Oct 2021]** `v0.5.0`: **Important changes** made to `transforms.py`. 1) `center_crop` becomes a default transform in testing (applied after resizing the smaller edge to a certain size to keep the image aspect ratio). 2) For training, `Resize(cfg.INPUT.SIZE)` is deactivated when `random_crop` or `random_resized_crop` is used. These changes won't make any difference to the training transforms used in existing config files, nor to the testing transforms unless the raw images are not squared (the only difference is that now the image aspect ratio is respected). +- **[Oct 2021]** `v0.4.3`: Copy the attributes in `self.dm` (data manager) to `SimpleTrainer` and make `self.dm` optional, which means from now on, you can build data loaders from any source you like rather than being forced to use `DataManager`. +- **[Sep 2021]** `v0.4.2`: An important update is to set `drop_last=is_train and len(data_source)>=batch_size` when constructing a data loader to avoid 0-length. + +
+ +## Overview + +Dassl has implemented the following methods: + +- Single-source domain adaptation + - [Cross Domain Adaptive Clustering for Semi Supervised Domain Adaptation (CVPR'21)](https://arxiv.org/pdf/2104.09415.pdf) [[dassl/engine/da/cdac.py](dassl/engine/da/cdac.py)] + - [Semi-supervised Domain Adaptation via Minimax Entropy (ICCV'19)](https://arxiv.org/abs/1904.06487) [[dassl/engine/da/mme.py](dassl/engine/da/mme.py)] + - [Maximum Classifier Discrepancy for Unsupervised Domain Adaptation (CVPR'18)](https://arxiv.org/abs/1712.02560https://arxiv.org/abs/1712.02560) [[dassl/engine/da/mcd.py](dassl/engine/da/mcd.py)] + - [Self-ensembling for visual domain adaptation (ICLR'18)](https://arxiv.org/abs/1706.05208) [[dassl/engine/da/self_ensembling.py](dassl/engine/da/self_ensembling.py)] + - [Revisiting Batch Normalization For Practical Domain Adaptation (ICLR-W'17)](https://arxiv.org/abs/1603.04779) [[dassl/engine/da/adabn.py](dassl/engine/da/adabn.py)] + - [Adversarial Discriminative Domain Adaptation (CVPR'17)](https://arxiv.org/abs/1702.05464) [[dassl/engine/da/adda.py](dassl/engine/da/adda.py)] + - [Domain-Adversarial Training of Neural Networks (JMLR'16) ](https://arxiv.org/abs/1505.07818) [[dassl/engine/da/dann.py](dassl/engine/da/dann.py)] + +- Multi-source domain adaptation + - [Domain Aadaptive Ensemble Learning](https://arxiv.org/abs/2003.07325) [[dassl/engine/da/dael.py](dassl/engine/da/dael.py)] + - [Moment Matching for Multi-Source Domain Adaptation (ICCV'19)](https://arxiv.org/abs/1812.01754) [[dassl/engine/da/m3sda.py](dassl/engine/da/m3sda.py)] + +- Domain generalization + - [Dynamic Domain Generalization (IJCAI'22)](https://arxiv.org/abs/2205.13913) [[dassl/modeling/backbone/resnet_dynamic.py](dassl/modeling/backbone/resnet_dynamic.py)] [[dassl/engine/dg/domain_mix.py](dassl/engine/dg/domain_mix.py)] + - [Exact Feature Distribution Matching for Arbitrary Style Transfer and Domain Generalization (CVPR'22)](https://arxiv.org/abs/2203.07740) [[dassl/modeling/ops/efdmix.py](dassl/modeling/ops/efdmix.py)] + - [Domain Generalization with MixStyle (ICLR'21)](https://openreview.net/forum?id=6xHJ37MVxxp) [[dassl/modeling/ops/mixstyle.py](dassl/modeling/ops/mixstyle.py)] + - [Deep Domain-Adversarial Image Generation for Domain Generalisation (AAAI'20)](https://arxiv.org/abs/2003.06054) [[dassl/engine/dg/ddaig.py](dassl/engine/dg/ddaig.py)] + - [Generalizing Across Domains via Cross-Gradient Training (ICLR'18)](https://arxiv.org/abs/1804.10745) [[dassl/engine/dg/crossgrad.py](dassl/engine/dg/crossgrad.py)] + +- Semi-supervised learning + - [FixMatch: Simplifying Semi-Supervised Learning with Consistency and Confidence](https://arxiv.org/abs/2001.07685) [[dassl/engine/ssl/fixmatch.py](dassl/engine/ssl/fixmatch.py)] + - [MixMatch: A Holistic Approach to Semi-Supervised Learning (NeurIPS'19)](https://arxiv.org/abs/1905.02249) [[dassl/engine/ssl/mixmatch.py](dassl/engine/ssl/mixmatch.py)] + - [Mean teachers are better role models: Weight-averaged consistency targets improve semi-supervised deep learning results (NeurIPS'17)](https://arxiv.org/abs/1703.01780) [[dassl/engine/ssl/mean_teacher.py](dassl/engine/ssl/mean_teacher.py)] + - [Semi-supervised Learning by Entropy Minimization (NeurIPS'04)](http://papers.nips.cc/paper/2740-semi-supervised-learning-by-entropy-minimization.pdf) [[dassl/engine/ssl/entmin.py](dassl/engine/ssl/entmin.py)] + +*Feel free to make a [PR](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork) to add your methods here to make it easier for others to benchmark!* + +Dassl supports the following datasets: + +- Domain adaptation + - [Office-31](https://scalable.mpi-inf.mpg.de/files/2013/04/saenko_eccv_2010.pdf) + - [Office-Home](http://hemanthdv.org/OfficeHome-Dataset/) + - [VisDA17](http://ai.bu.edu/visda-2017/) + - [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html)-[STL10](https://cs.stanford.edu/~acoates/stl10/) + - [Digit-5](https://github.com/VisionLearningGroup/VisionLearningGroup.github.io/tree/master/M3SDA/code_MSDA_digit#digit-five-download) + - [DomainNet](http://ai.bu.edu/M3SDA/) + - [miniDomainNet](https://arxiv.org/abs/2003.07325) + +- Domain generalization + - [PACS](https://arxiv.org/abs/1710.03077) + - [VLCS](https://people.csail.mit.edu/torralba/publications/datasets_cvpr11.pdf) + - [Office-Home](http://hemanthdv.org/OfficeHome-Dataset/) + - [Digits-DG](https://arxiv.org/abs/2003.06054) + - [Digit-Single](https://arxiv.org/abs/1805.12018) + - [CIFAR-10-C](https://arxiv.org/abs/1807.01697) + - [CIFAR-100-C](https://arxiv.org/abs/1807.01697) + - [iWildCam-WILDS](https://wilds.stanford.edu/datasets/#iwildcam) + - [Camelyon17-WILDS](https://wilds.stanford.edu/datasets/#camelyon17) + - [FMoW-WILDS](https://wilds.stanford.edu/datasets/#fmow) + +- Semi-supervised learning + - [CIFAR10/100](https://www.cs.toronto.edu/~kriz/cifar.html.) + - [SVHN](http://ufldl.stanford.edu/housenumbers/) + - [STL10](https://cs.stanford.edu/~acoates/stl10/) + +## Get started + +### Installation + +Make sure [conda](https://www.anaconda.com/distribution/) is installed properly. + +```bash +# Clone this repo +git clone https://github.com/KaiyangZhou/Dassl.pytorch.git +cd Dassl.pytorch/ + +# Create a conda environment +conda create -y -n dassl python=3.8 + +# Activate the environment +conda activate dassl + +# Install torch (requires version >= 1.8.1) and torchvision +# Please refer to https://pytorch.org/ if you need a different cuda version +conda install pytorch torchvision cudatoolkit=10.2 -c pytorch + +# Install dependencies +pip install -r requirements.txt + +# Install this library (no need to re-build if the source code is modified) +python setup.py develop +``` + +Follow the instructions in [DATASETS.md](./DATASETS.md) to preprocess the datasets. + +### Training + +The main interface is implemented in `tools/train.py`, which basically does + +1. initialize the config with `cfg = setup_cfg(args)` where `args` contains the command-line input (see `tools/train.py` for the list of input arguments); +2. instantiate a `trainer` with `build_trainer(cfg)` which loads the dataset and builds a deep neural network model; +3. call `trainer.train()` for training and evaluating the model. + +Below we provide an example for training a source-only baseline on the popular domain adaptation dataset, Office-31, + +```bash +CUDA_VISIBLE_DEVICES=0 python tools/train.py \ +--root $DATA \ +--trainer SourceOnly \ +--source-domains amazon \ +--target-domains webcam \ +--dataset-config-file configs/datasets/da/office31.yaml \ +--config-file configs/trainers/da/source_only/office31.yaml \ +--output-dir output/source_only_office31 +``` + +`$DATA` denotes the location where datasets are installed. `--dataset-config-file` loads the common setting for the dataset (Office-31 in this case) such as image size and model architecture. `--config-file` loads the algorithm-specific setting such as hyper-parameters and optimization parameters. + +To use multiple sources, namely the multi-source domain adaptation task, one just needs to add more sources to `--source-domains`. For instance, to train a source-only baseline on miniDomainNet, one can do + +```bash +CUDA_VISIBLE_DEVICES=0 python tools/train.py \ +--root $DATA \ +--trainer SourceOnly \ +--source-domains clipart painting real \ +--target-domains sketch \ +--dataset-config-file configs/datasets/da/mini_domainnet.yaml \ +--config-file configs/trainers/da/source_only/mini_domainnet.yaml \ +--output-dir output/source_only_minidn +``` + +After the training finishes, the model weights will be saved under the specified output directory, along with a log file and a tensorboard file for visualization. + +To print out the results saved in the log file (so you do not need to exhaustively go through all log files and calculate the mean/std by yourself), you can use `tools/parse_test_res.py`. The instruction can be found in the code. + +For other trainers such as `MCD`, you can set `--trainer MCD` while keeping the config file unchanged, i.e. using the same training parameters as `SourceOnly` (in the simplest case). To modify the hyper-parameters in MCD, like `N_STEP_F` (number of steps to update the feature extractor), you can append `TRAINER.MCD.N_STEP_F 4` to the existing input arguments (otherwise the default value will be used). Alternatively, you can create a new `.yaml` config file to store your custom setting. See [here](https://github.com/KaiyangZhou/Dassl.pytorch/blob/master/dassl/config/defaults.py#L176) for a complete list of algorithm-specific hyper-parameters. + +### Test +Model testing can be done by using `--eval-only`, which asks the code to run `trainer.test()`. You also need to provide the trained model and specify which model file (i.e. saved at which epoch) to use. For example, to use `model.pth.tar-20` saved at `output/source_only_office31/model`, you can do + +```bash +CUDA_VISIBLE_DEVICES=0 python tools/train.py \ +--root $DATA \ +--trainer SourceOnly \ +--source-domains amazon \ +--target-domains webcam \ +--dataset-config-file configs/datasets/da/office31.yaml \ +--config-file configs/trainers/da/source_only/office31.yaml \ +--output-dir output/source_only_office31_test \ +--eval-only \ +--model-dir output/source_only_office31 \ +--load-epoch 20 +``` + +Note that `--model-dir` takes as input the directory path which was specified in `--output-dir` in the training stage. + +### Write a new trainer +A good practice is to go through `dassl/engine/trainer.py` to get familar with the base trainer classes, which provide generic functions and training loops. To write a trainer class for domain adaptation or semi-supervised learning, the new class can subclass `TrainerXU`. For domain generalization, the new class can subclass `TrainerX`. In particular, `TrainerXU` and `TrainerX` mainly differ in whether using a data loader for unlabeled data. With the base classes, a new trainer may only need to implement the `forward_backward()` method, which performs loss computation and model update. See `dassl/enigne/da/source_only.py` for example. + +### Add a new backbone/head/network +`backbone` corresponds to a convolutional neural network model which performs feature extraction. `head` (which is an optional module) is mounted on top of `backbone` for further processing, which can be, for example, a MLP. `backbone` and `head` are basic building blocks for constructing a `SimpleNet()` (see `dassl/engine/trainer.py`) which serves as the primary model for a task. `network` contains custom neural network models, such as an image generator. + +To add a new module, namely a backbone/head/network, you need to first register the module using the corresponding `registry`, i.e. `BACKBONE_REGISTRY` for `backbone`, `HEAD_REGISTRY` for `head` and `NETWORK_RESIGTRY` for `network`. Note that for a new `backbone`, we require the model to subclass `Backbone` as defined in `dassl/modeling/backbone/backbone.py` and specify the `self._out_features` attribute. + +We provide an example below for how to add a new `backbone`. +```python +from dassl.modeling import Backbone, BACKBONE_REGISTRY + +class MyBackbone(Backbone): + + def __init__(self): + super().__init__() + # Create layers + self.conv = ... + + self._out_features = 2048 + + def forward(self, x): + # Extract and return features + +@BACKBONE_REGISTRY.register() +def my_backbone(**kwargs): + return MyBackbone() +``` +Then, you can set `MODEL.BACKBONE.NAME` to `my_backbone` to use your own architecture. For more details, please refer to the source code in `dassl/modeling`. + +### Add a dataset +An example code structure is shown below. Make sure you subclass `DatasetBase` and register the dataset with `@DATASET_REGISTRY.register()`. All you need is to load `train_x`, `train_u` (optional), `val` (optional) and `test`, among which `train_u` and `val` could be `None` or simply ignored. Each of these variables contains a list of `Datum` objects. A `Datum` object (implemented [here](https://github.com/KaiyangZhou/Dassl.pytorch/blob/master/dassl/data/datasets/base_dataset.py#L12)) contains information for a single image, like `impath` (string) and `label` (int). + +```python +from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase + +@DATASET_REGISTRY.register() +class NewDataset(DatasetBase): + + dataset_dir = '' + + def __init__(self, cfg): + + train_x = ... + train_u = ... # optional, can be None + val = ... # optional, can be None + test = ... + + super().__init__(train_x=train_x, train_u=train_u, val=val, test=test) +``` + +We suggest you take a look at the datasets code in some projects like [this](https://github.com/KaiyangZhou/CoOp), which is built on top of Dassl. + +## Relevant Research + +We would like to share here our research relevant to Dassl. + +- [On-Device Domain Generalization](https://arxiv.org/abs/2209.07521) +- [Domain Generalization: A Survey](https://arxiv.org/abs/2103.02503) (TPAMI 2022) +- [Domain Adaptive Ensemble Learning](https://arxiv.org/abs/2003.07325) (TIP 2021) +- [MixStyle Neural Networks for Domain Generalization and Adaptation](https://arxiv.org/abs/2107.02053) +- [Semi-Supervised Domain Generalization with Stochastic StyleMatch](https://arxiv.org/abs/2106.00592) +- [Domain Generalization with MixStyle](https://openreview.net/forum?id=6xHJ37MVxxp) (ICLR 2021) +- [Learning to Generate Novel Domains for Domain Generalization](https://arxiv.org/abs/2007.03304) (ECCV 2020) +- [Deep Domain-Adversarial Image Generation for Domain Generalisation](https://arxiv.org/abs/2003.06054) (AAAI 2020) + +## Citation + +If you find this code useful to your research, please give credit to the following paper + +``` +@article{zhou2022domain, + title={Domain generalization: A survey}, + author={Zhou, Kaiyang and Liu, Ziwei and Qiao, Yu and Xiang, Tao and Loy, Chen Change}, + journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, + year={2022}, + publisher={IEEE} +} + +@article{zhou2021domain, + title={Domain adaptive ensemble learning}, + author={Zhou, Kaiyang and Yang, Yongxin and Qiao, Yu and Xiang, Tao}, + journal={IEEE Transactions on Image Processing}, + volume={30}, + pages={8008--8018}, + year={2021}, + publisher={IEEE} +} +``` diff --git a/python/ClipDetection/Dassl.pytorch/configs/README.md b/python/ClipDetection/Dassl.pytorch/configs/README.md new file mode 100644 index 00000000..18b90a46 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/README.md @@ -0,0 +1 @@ +The `datasets/` folder contains dataset-specific config files which define the standard protocols (e.g., image size, data augmentation, network architecture) used by most papers. The `trainers/` folder contains method-specific config files which define optimization algorithms (e.g., optimizer, epoch) and hyperparameter settings. diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/da/cifar_stl.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/da/cifar_stl.yaml new file mode 100644 index 00000000..52c086fa --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/da/cifar_stl.yaml @@ -0,0 +1,7 @@ +INPUT: + SIZE: (32, 32) + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + +DATASET: + NAME: "CIFARSTL" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/da/digit5.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/da/digit5.yaml new file mode 100644 index 00000000..f754d643 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/da/digit5.yaml @@ -0,0 +1,12 @@ +INPUT: + SIZE: (32, 32) + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + TRANSFORMS: ["normalize"] + +DATASET: + NAME: "Digit5" + +MODEL: + BACKBONE: + NAME: "cnn_digit5_m3sda" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/da/domainnet.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/da/domainnet.yaml new file mode 100644 index 00000000..075f9232 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/da/domainnet.yaml @@ -0,0 +1,10 @@ +INPUT: + SIZE: (224, 224) + TRANSFORMS: ["random_flip", "random_translation", "normalize"] + +DATASET: + NAME: "DomainNet" + +MODEL: + BACKBONE: + NAME: "resnet101" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/da/mini_domainnet.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/da/mini_domainnet.yaml new file mode 100644 index 00000000..cfb34d8a --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/da/mini_domainnet.yaml @@ -0,0 +1,10 @@ +INPUT: + SIZE: (96, 96) + TRANSFORMS: ["random_flip", "random_translation", "normalize"] + +DATASET: + NAME: "miniDomainNet" + +MODEL: + BACKBONE: + NAME: "resnet18" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/da/office31.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/da/office31.yaml new file mode 100644 index 00000000..77cca035 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/da/office31.yaml @@ -0,0 +1,14 @@ +INPUT: + SIZE: (224, 224) + TRANSFORMS: ["random_flip", "random_translation", "normalize"] + +DATASET: + NAME: "Office31" + +MODEL: + BACKBONE: + NAME: "resnet50" + HEAD: + NAME: "mlp" + HIDDEN_LAYERS: [256] + DROPOUT: 0. \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/da/office_home.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/da/office_home.yaml new file mode 100644 index 00000000..7e181fda --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/da/office_home.yaml @@ -0,0 +1,5 @@ +INPUT: + SIZE: (224, 224) + +DATASET: + NAME: "OfficeHome" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/da/visda17.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/da/visda17.yaml new file mode 100644 index 00000000..d54f2f63 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/da/visda17.yaml @@ -0,0 +1,13 @@ +INPUT: + SIZE: (224, 224) + TRANSFORMS: ["random_flip", "center_crop", "normalize"] + +DATASET: + NAME: "VisDA17" + +MODEL: + BACKBONE: + NAME: "resnet101" + +TEST: + PER_CLASS_RESULT: True \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/camelyon17.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/camelyon17.yaml new file mode 100644 index 00000000..11a2c4d7 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/camelyon17.yaml @@ -0,0 +1,6 @@ +INPUT: + SIZE: (224, 224) + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +DATASET: + NAME: "Camelyon17" diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/cifar100_c.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/cifar100_c.yaml new file mode 100644 index 00000000..c4b7f917 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/cifar100_c.yaml @@ -0,0 +1,14 @@ +INPUT: + SIZE: (32, 32) + TRANSFORMS: ["random_flip", "random_crop", "normalize"] + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + +DATASET: + NAME: "CIFAR100C" + CIFAR_C_TYPE: "fog" + CIFAR_C_LEVEL: 5 + +MODEL: + BACKBONE: + NAME: "wide_resnet_16_4" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/cifar10_c.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/cifar10_c.yaml new file mode 100644 index 00000000..ec5702ed --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/cifar10_c.yaml @@ -0,0 +1,14 @@ +INPUT: + SIZE: (32, 32) + TRANSFORMS: ["random_flip", "random_crop", "normalize"] + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + +DATASET: + NAME: "CIFAR10C" + CIFAR_C_TYPE: "fog" + CIFAR_C_LEVEL: 5 + +MODEL: + BACKBONE: + NAME: "wide_resnet_16_4" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/digit_single.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/digit_single.yaml new file mode 100644 index 00000000..a6bacbb2 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/digit_single.yaml @@ -0,0 +1,12 @@ +INPUT: + SIZE: (32, 32) + TRANSFORMS: ["normalize"] + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + +DATASET: + NAME: "DigitSingle" + +MODEL: + BACKBONE: + NAME: "cnn_digitsingle" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/digits_dg.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/digits_dg.yaml new file mode 100644 index 00000000..ca25e213 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/digits_dg.yaml @@ -0,0 +1,12 @@ +INPUT: + SIZE: (32, 32) + TRANSFORMS: ["normalize"] + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + +DATASET: + NAME: "DigitsDG" + +MODEL: + BACKBONE: + NAME: "cnn_digitsdg" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/fmow.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/fmow.yaml new file mode 100644 index 00000000..825ee809 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/fmow.yaml @@ -0,0 +1,6 @@ +INPUT: + SIZE: (224, 224) + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +DATASET: + NAME: "FMoW" diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/iwildcam.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/iwildcam.yaml new file mode 100644 index 00000000..c8aa2eb5 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/iwildcam.yaml @@ -0,0 +1,6 @@ +INPUT: + SIZE: (224, 224) + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +DATASET: + NAME: "IWildCam" diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/office_home_dg.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/office_home_dg.yaml new file mode 100644 index 00000000..0835973c --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/office_home_dg.yaml @@ -0,0 +1,11 @@ +INPUT: + SIZE: (224, 224) + TRANSFORMS: ["random_flip", "random_translation", "normalize"] + +DATASET: + NAME: "OfficeHomeDG" + +MODEL: + BACKBONE: + NAME: "resnet18" + PRETRAINED: True \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/pacs.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/pacs.yaml new file mode 100644 index 00000000..682ab1c8 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/pacs.yaml @@ -0,0 +1,11 @@ +INPUT: + SIZE: (224, 224) + TRANSFORMS: ["random_flip", "random_translation", "normalize"] + +DATASET: + NAME: "PACS" + +MODEL: + BACKBONE: + NAME: "resnet18" + PRETRAINED: True \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/vlcs.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/vlcs.yaml new file mode 100644 index 00000000..0c8804cf --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/dg/vlcs.yaml @@ -0,0 +1,11 @@ +INPUT: + SIZE: (224, 224) + TRANSFORMS: ["random_flip", "random_translation", "normalize"] + +DATASET: + NAME: "VLCS" + +MODEL: + BACKBONE: + NAME: "resnet18" + PRETRAINED: True \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/ssl/cifar10.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/ssl/cifar10.yaml new file mode 100644 index 00000000..63b6a1df --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/ssl/cifar10.yaml @@ -0,0 +1,14 @@ +INPUT: + SIZE: (32, 32) + TRANSFORMS: ["random_flip", "random_crop", "normalize"] + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + +DATASET: + NAME: "CIFAR10" + NUM_LABELED: 4000 + VAL_PERCENT: 0. + +MODEL: + BACKBONE: + NAME: "wide_resnet_28_2" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/ssl/cifar100.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/ssl/cifar100.yaml new file mode 100644 index 00000000..6230a881 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/ssl/cifar100.yaml @@ -0,0 +1,15 @@ +INPUT: + SIZE: (32, 32) + TRANSFORMS: ["random_flip", "random_crop", "normalize"] + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + CROP_PADDING: 4 + +DATASET: + NAME: "CIFAR100" + NUM_LABELED: 10000 + VAL_PERCENT: 0. + +MODEL: + BACKBONE: + NAME: "wide_resnet_28_2" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/ssl/stl10.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/ssl/stl10.yaml new file mode 100644 index 00000000..7b11df12 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/ssl/stl10.yaml @@ -0,0 +1,14 @@ +INPUT: + SIZE: (96, 96) + TRANSFORMS: ["random_flip", "random_crop", "normalize"] + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + CROP_PADDING: 4 + +DATASET: + NAME: "STL10" + STL10_FOLD: 0 + +MODEL: + BACKBONE: + NAME: "wide_resnet_28_2" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/datasets/ssl/svhn.yaml b/python/ClipDetection/Dassl.pytorch/configs/datasets/ssl/svhn.yaml new file mode 100644 index 00000000..cd3b527b --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/datasets/ssl/svhn.yaml @@ -0,0 +1,15 @@ +INPUT: + SIZE: (32, 32) + TRANSFORMS: ["random_crop", "normalize"] + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + CROP_PADDING: 4 + +DATASET: + NAME: "SVHN" + NUM_LABELED: 1000 + VAL_PERCENT: 0. + +MODEL: + BACKBONE: + NAME: "wide_resnet_28_2" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/da/cdac/digit5.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/cdac/digit5.yaml new file mode 100644 index 00000000..04ba3467 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/cdac/digit5.yaml @@ -0,0 +1,20 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomSampler" + BATCH_SIZE: 64 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 192 + TEST: + BATCH_SIZE: 256 + K_TRANSFORMS: 2 + +OPTIM: + NAME: "sgd" + LR: 0.001 + MAX_EPOCH: 90 + RAMPUP_ITRS: 10000 + +TRAINER: + CDAC: + STRONG_TRANSFORMS: ["randaugment", "normalize"] \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/da/cdac/domainnet.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/cdac/domainnet.yaml new file mode 100644 index 00000000..e5fd5593 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/cdac/domainnet.yaml @@ -0,0 +1,20 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 30 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 6 + TEST: + BATCH_SIZE: 30 + K_TRANSFORMS: 2 + +OPTIM: + NAME: "sgd" + LR: 0.001 + MAX_EPOCH: 90 + RAMPUP_ITRS: 10000 + +TRAINER: + CDAC: + STRONG_TRANSFORMS: ["randaugment", "normalize"] \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/da/cdac/mini_domainnet.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/cdac/mini_domainnet.yaml new file mode 100644 index 00000000..cb4f9c12 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/cdac/mini_domainnet.yaml @@ -0,0 +1,21 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 64 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 192 + TEST: + BATCH_SIZE: 200 + K_TRANSFORMS: 2 + +OPTIM: + NAME: "sgd" + LR: 0.001 + MAX_EPOCH: 60 + RAMPUP_ITRS: 10000 + LR_SCHEDULER: "cosine" + +TRAINER: + CDAC: + STRONG_TRANSFORMS: ["randaugment", "normalize"] \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/da/dael/digit5.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/dael/digit5.yaml new file mode 100644 index 00000000..d83bfe42 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/dael/digit5.yaml @@ -0,0 +1,20 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 256 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 64 + TEST: + BATCH_SIZE: 256 + +OPTIM: + NAME: "sgd" + LR: 0.05 + STEPSIZE: [30] + MAX_EPOCH: 30 + LR_SCHEDULER: "cosine" + +TRAINER: + DAEL: + STRONG_TRANSFORMS: ["randaugment2", "normalize"] \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/da/dael/domainnet.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/dael/domainnet.yaml new file mode 100644 index 00000000..fc7cd211 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/dael/domainnet.yaml @@ -0,0 +1,19 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 30 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 6 + TEST: + BATCH_SIZE: 30 + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 40 + LR_SCHEDULER: "cosine" + +TRAINER: + DAEL: + STRONG_TRANSFORMS: ["random_flip", "cutout", "randaugment2", "normalize"] \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/da/dael/mini_domainnet.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/dael/mini_domainnet.yaml new file mode 100644 index 00000000..708ddcba --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/dael/mini_domainnet.yaml @@ -0,0 +1,19 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 192 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 64 + TEST: + BATCH_SIZE: 200 + +OPTIM: + NAME: "sgd" + LR: 0.005 + MAX_EPOCH: 60 + LR_SCHEDULER: "cosine" + +TRAINER: + DAEL: + STRONG_TRANSFORMS: ["random_flip", "cutout", "randaugment2", "normalize"] \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/da/m3sda/digit5.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/m3sda/digit5.yaml new file mode 100644 index 00000000..a70887b0 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/m3sda/digit5.yaml @@ -0,0 +1,16 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 256 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 64 + TEST: + BATCH_SIZE: 256 + +OPTIM: + NAME: "sgd" + LR: 0.05 + STEPSIZE: [30] + MAX_EPOCH: 30 + LR_SCHEDULER: "cosine" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/da/m3sda/domainnet.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/m3sda/domainnet.yaml new file mode 100644 index 00000000..5abaa12a --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/m3sda/domainnet.yaml @@ -0,0 +1,15 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 30 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 6 + TEST: + BATCH_SIZE: 30 + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 40 + LR_SCHEDULER: "cosine" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/da/m3sda/mini_domainnet.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/m3sda/mini_domainnet.yaml new file mode 100644 index 00000000..6edf3e3c --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/m3sda/mini_domainnet.yaml @@ -0,0 +1,15 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 192 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 64 + TEST: + BATCH_SIZE: 200 + +OPTIM: + NAME: "sgd" + LR: 0.005 + MAX_EPOCH: 60 + LR_SCHEDULER: "cosine" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/da/source_only/digit5.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/source_only/digit5.yaml new file mode 100644 index 00000000..64ce348e --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/source_only/digit5.yaml @@ -0,0 +1,12 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 256 + TEST: + BATCH_SIZE: 256 + +OPTIM: + NAME: "sgd" + LR: 0.05 + STEPSIZE: [30] + MAX_EPOCH: 30 + LR_SCHEDULER: "cosine" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/da/source_only/mini_domainnet.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/source_only/mini_domainnet.yaml new file mode 100644 index 00000000..bd8471eb --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/source_only/mini_domainnet.yaml @@ -0,0 +1,11 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 128 + TEST: + BATCH_SIZE: 128 + +OPTIM: + NAME: "sgd" + LR: 0.005 + MAX_EPOCH: 60 + LR_SCHEDULER: "cosine" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/da/source_only/office31.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/source_only/office31.yaml new file mode 100644 index 00000000..8fb73ee1 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/source_only/office31.yaml @@ -0,0 +1,11 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 32 + TEST: + BATCH_SIZE: 32 + +OPTIM: + NAME: "sgd" + LR: 0.002 + STEPSIZE: [20] + MAX_EPOCH: 20 \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/da/source_only/visda17.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/source_only/visda17.yaml new file mode 100644 index 00000000..3c25fb09 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/da/source_only/visda17.yaml @@ -0,0 +1,15 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 32 + TEST: + BATCH_SIZE: 32 + +OPTIM: + NAME: "sgd" + LR: 0.0001 + STEPSIZE: [2] + MAX_EPOCH: 2 + +TRAIN: + PRINT_FREQ: 50 + COUNT_ITER: "train_u" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/daeldg/digits_dg.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/daeldg/digits_dg.yaml new file mode 100644 index 00000000..45304313 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/daeldg/digits_dg.yaml @@ -0,0 +1,16 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 120 + TEST: + BATCH_SIZE: 100 + +OPTIM: + NAME: "sgd" + LR: 0.05 + STEPSIZE: [20] + MAX_EPOCH: 50 + +TRAINER: + DAELDG: + STRONG_TRANSFORMS: ["randaugment2", "normalize"] \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/daeldg/office_home_dg.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/daeldg/office_home_dg.yaml new file mode 100644 index 00000000..8b17f5a7 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/daeldg/office_home_dg.yaml @@ -0,0 +1,16 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 30 + TEST: + BATCH_SIZE: 100 + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 40 + LR_SCHEDULER: "cosine" + +TRAINER: + DAELDG: + STRONG_TRANSFORMS: ["random_flip", "cutout", "randaugment2", "normalize"] \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/daeldg/pacs.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/daeldg/pacs.yaml new file mode 100644 index 00000000..8b17f5a7 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/daeldg/pacs.yaml @@ -0,0 +1,16 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 30 + TEST: + BATCH_SIZE: 100 + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 40 + LR_SCHEDULER: "cosine" + +TRAINER: + DAELDG: + STRONG_TRANSFORMS: ["random_flip", "cutout", "randaugment2", "normalize"] \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/ddaig/digits_dg.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/ddaig/digits_dg.yaml new file mode 100644 index 00000000..8ee80302 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/ddaig/digits_dg.yaml @@ -0,0 +1,20 @@ +INPUT: + PIXEL_MEAN: [0., 0., 0.] + PIXEL_STD: [1., 1., 1.] + +DATALOADER: + TRAIN_X: + BATCH_SIZE: 128 + TEST: + BATCH_SIZE: 128 + +OPTIM: + NAME: "sgd" + LR: 0.05 + STEPSIZE: [20] + MAX_EPOCH: 50 + +TRAINER: + DDAIG: + G_ARCH: "fcn_3x32_gctx" + LMDA: 0.3 \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/ddaig/office_home_dg.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/ddaig/office_home_dg.yaml new file mode 100644 index 00000000..b55f8100 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/ddaig/office_home_dg.yaml @@ -0,0 +1,21 @@ +INPUT: + PIXEL_MEAN: [0., 0., 0.] + PIXEL_STD: [1., 1., 1.] + +DATALOADER: + TRAIN_X: + BATCH_SIZE: 16 + TEST: + BATCH_SIZE: 16 + +OPTIM: + NAME: "sgd" + LR: 0.0005 + STEPSIZE: [20] + MAX_EPOCH: 25 + +TRAINER: + DDAIG: + G_ARCH: "fcn_3x64_gctx" + WARMUP: 3 + LMDA: 0.3 \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/ddaig/pacs.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/ddaig/pacs.yaml new file mode 100644 index 00000000..b55f8100 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/ddaig/pacs.yaml @@ -0,0 +1,21 @@ +INPUT: + PIXEL_MEAN: [0., 0., 0.] + PIXEL_STD: [1., 1., 1.] + +DATALOADER: + TRAIN_X: + BATCH_SIZE: 16 + TEST: + BATCH_SIZE: 16 + +OPTIM: + NAME: "sgd" + LR: 0.0005 + STEPSIZE: [20] + MAX_EPOCH: 25 + +TRAINER: + DDAIG: + G_ARCH: "fcn_3x64_gctx" + WARMUP: 3 + LMDA: 0.3 \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/vanilla/digits_dg.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/vanilla/digits_dg.yaml new file mode 100644 index 00000000..8b73fbea --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/vanilla/digits_dg.yaml @@ -0,0 +1,15 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 128 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +OPTIM: + NAME: "sgd" + LR: 0.05 + STEPSIZE: [20] + MAX_EPOCH: 50 + +TRAIN: + PRINT_FREQ: 20 \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/vanilla/mini_domainnet.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/vanilla/mini_domainnet.yaml new file mode 100644 index 00000000..bd8471eb --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/vanilla/mini_domainnet.yaml @@ -0,0 +1,11 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 128 + TEST: + BATCH_SIZE: 128 + +OPTIM: + NAME: "sgd" + LR: 0.005 + MAX_EPOCH: 60 + LR_SCHEDULER: "cosine" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/vanilla/office_home_dg.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/vanilla/office_home_dg.yaml new file mode 100644 index 00000000..43f62142 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/vanilla/office_home_dg.yaml @@ -0,0 +1,11 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 64 + TEST: + BATCH_SIZE: 100 + +OPTIM: + NAME: "sgd" + LR: 0.001 + MAX_EPOCH: 50 + LR_SCHEDULER: "cosine" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/vanilla/pacs.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/vanilla/pacs.yaml new file mode 100644 index 00000000..43f62142 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/dg/vanilla/pacs.yaml @@ -0,0 +1,11 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 64 + TEST: + BATCH_SIZE: 100 + +OPTIM: + NAME: "sgd" + LR: 0.001 + MAX_EPOCH: 50 + LR_SCHEDULER: "cosine" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/configs/trainers/ssl/fixmatch/cifar10.yaml b/python/ClipDetection/Dassl.pytorch/configs/trainers/ssl/fixmatch/cifar10.yaml new file mode 100644 index 00000000..a03fc6c9 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/configs/trainers/ssl/fixmatch/cifar10.yaml @@ -0,0 +1,23 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 64 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 448 + TEST: + BATCH_SIZE: 500 + +OPTIM: + NAME: "sgd" + LR: 0.05 + STEPSIZE: [4000] + MAX_EPOCH: 4000 + LR_SCHEDULER: "cosine" + +TRAIN: + COUNT_ITER: "train_u" + PRINT_FREQ: 10 + +TRAINER: + FIXMATCH: + STRONG_TRANSFORMS: ["random_flip", "randaugment_fixmatch", "normalize", "cutout"] \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/dassl/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/__init__.py new file mode 100644 index 00000000..225e3ca0 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/__init__.py @@ -0,0 +1,18 @@ +""" +Dassl +------ +PyTorch toolbox for domain adaptation and semi-supervised learning. + +URL: https://github.com/KaiyangZhou/Dassl.pytorch + +@article{zhou2020domain, + title={Domain Adaptive Ensemble Learning}, + author={Zhou, Kaiyang and Yang, Yongxin and Qiao, Yu and Xiang, Tao}, + journal={arXiv preprint arXiv:2003.07325}, + year={2020} +} +""" + +__version__ = "0.6.3" +__author__ = "Kaiyang Zhou" +__homepage__ = "https://kaiyangzhou.github.io/" diff --git a/python/ClipDetection/Dassl.pytorch/dassl/config/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/config/__init__.py new file mode 100644 index 00000000..d745fbab --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/config/__init__.py @@ -0,0 +1,21 @@ +from .defaults import _C as cfg_default + + +def get_cfg_default(): + return cfg_default.clone() + + +def clean_cfg(cfg, trainer): + """Remove unused trainers (configs). + + Aim: Only show relevant information when calling print(cfg). + + Args: + cfg (_C): cfg instance. + trainer (str): trainer name. + """ + keys = list(cfg.TRAINER.keys()) + for key in keys: + if key == "NAME" or key == trainer.upper(): + continue + cfg.TRAINER.pop(key, None) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/config/defaults.py b/python/ClipDetection/Dassl.pytorch/dassl/config/defaults.py new file mode 100644 index 00000000..f6dcf143 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/config/defaults.py @@ -0,0 +1,313 @@ +################################################################ +# CHANGES MADE TO FILE # +# ------------------------------------------------------------ # +# Changed _C.VERBOSE = False from True # +# # +################################################################ + +from yacs.config import CfgNode as CN + +########################### +# Config definition +########################### + +_C = CN() + +_C.VERSION = 1 + +# Directory to save the output files (like log.txt and model weights) +_C.OUTPUT_DIR = "./output" +# Path to a directory where the files were saved previously +_C.RESUME = "" +# Set seed to negative value to randomize everything +# Set seed to positive value to use a fixed seed +_C.SEED = -1 +_C.USE_CUDA = True +# Print detailed information +# E.g. trainer, dataset, and backbone +_C.VERBOSE = False + +########################### +# Input +########################### +_C.INPUT = CN() +_C.INPUT.SIZE = (224, 224) +# Mode of interpolation in resize functions +_C.INPUT.INTERPOLATION = "bilinear" +# For available choices please refer to transforms.py +_C.INPUT.TRANSFORMS = () +# If True, tfm_train and tfm_test will be None +_C.INPUT.NO_TRANSFORM = False +# Mean and std (default: ImageNet) +_C.INPUT.PIXEL_MEAN = [0.485, 0.456, 0.406] +_C.INPUT.PIXEL_STD = [0.229, 0.224, 0.225] +# Random crop +_C.INPUT.CROP_PADDING = 4 +# Random resized crop +_C.INPUT.RRCROP_SCALE = (0.08, 1.0) +# Cutout +_C.INPUT.CUTOUT_N = 1 +_C.INPUT.CUTOUT_LEN = 16 +# Gaussian noise +_C.INPUT.GN_MEAN = 0.0 +_C.INPUT.GN_STD = 0.15 +# RandomAugment +_C.INPUT.RANDAUGMENT_N = 2 +_C.INPUT.RANDAUGMENT_M = 10 +# ColorJitter (brightness, contrast, saturation, hue) +_C.INPUT.COLORJITTER_B = 0.4 +_C.INPUT.COLORJITTER_C = 0.4 +_C.INPUT.COLORJITTER_S = 0.4 +_C.INPUT.COLORJITTER_H = 0.1 +# Random gray scale's probability +_C.INPUT.RGS_P = 0.2 +# Gaussian blur +_C.INPUT.GB_P = 0.5 # propability of applying this operation +_C.INPUT.GB_K = 21 # kernel size (should be an odd number) + +########################### +# Dataset +########################### +_C.DATASET = CN() +# Directory where datasets are stored +_C.DATASET.ROOT = "" +_C.DATASET.NAME = "" +# List of source/target domains' names (strings) +# Do not apply to some datasets, which have pre-defined splits +_C.DATASET.SOURCE_DOMAINS = () +_C.DATASET.TARGET_DOMAINS = () +# Number of labeled instances in total +# Useful for the semi-supervised learning +_C.DATASET.NUM_LABELED = -1 +# Number of images per class +_C.DATASET.NUM_SHOTS = -1 +# Percentage of validation data (only used for SSL datasets) +# Set to 0 if do not want to use val data +# Using val data for hyperparameter tuning was done in Oliver et al. 2018 +_C.DATASET.VAL_PERCENT = 0.1 +# Fold index for STL-10 dataset (normal range is 0 - 9) +# Negative number means None +_C.DATASET.STL10_FOLD = -1 +# CIFAR-10/100-C's corruption type and intensity level +_C.DATASET.CIFAR_C_TYPE = "" +_C.DATASET.CIFAR_C_LEVEL = 1 +# Use all data in the unlabeled data set (e.g. FixMatch) +_C.DATASET.ALL_AS_UNLABELED = False + +########################### +# Dataloader +########################### +_C.DATALOADER = CN() +_C.DATALOADER.NUM_WORKERS = 4 +# Apply transformations to an image K times (during training) +_C.DATALOADER.K_TRANSFORMS = 1 +# img0 denotes image tensor without augmentation +# Useful for consistency learning +_C.DATALOADER.RETURN_IMG0 = False +# Setting for the train_x data-loader +_C.DATALOADER.TRAIN_X = CN() +_C.DATALOADER.TRAIN_X.SAMPLER = "RandomSampler" +_C.DATALOADER.TRAIN_X.BATCH_SIZE = 32 +# Parameter for RandomDomainSampler +# 0 or -1 means sampling from all domains +_C.DATALOADER.TRAIN_X.N_DOMAIN = 0 +# Parameter of RandomClassSampler +# Number of instances per class +_C.DATALOADER.TRAIN_X.N_INS = 16 + +# Setting for the train_u data-loader +_C.DATALOADER.TRAIN_U = CN() +# Set to false if you want to have unique +# data loader params for train_u +_C.DATALOADER.TRAIN_U.SAME_AS_X = True +_C.DATALOADER.TRAIN_U.SAMPLER = "RandomSampler" +_C.DATALOADER.TRAIN_U.BATCH_SIZE = 32 +_C.DATALOADER.TRAIN_U.N_DOMAIN = 0 +_C.DATALOADER.TRAIN_U.N_INS = 16 + +# Setting for the test data-loader +_C.DATALOADER.TEST = CN() +_C.DATALOADER.TEST.SAMPLER = "SequentialSampler" +_C.DATALOADER.TEST.BATCH_SIZE = 32 + +########################### +# Model +########################### +_C.MODEL = CN() +# Path to model weights (for initialization) +_C.MODEL.INIT_WEIGHTS = "" +_C.MODEL.BACKBONE = CN() +_C.MODEL.BACKBONE.NAME = "" +_C.MODEL.BACKBONE.PRETRAINED = True +# Definition of embedding layers +_C.MODEL.HEAD = CN() +# If none, do not construct embedding layers, the +# backbone's output will be passed to the classifier +_C.MODEL.HEAD.NAME = "" +# Structure of hidden layers (a list), e.g. [512, 512] +# If undefined, no embedding layer will be constructed +_C.MODEL.HEAD.HIDDEN_LAYERS = () +_C.MODEL.HEAD.ACTIVATION = "relu" +_C.MODEL.HEAD.BN = True +_C.MODEL.HEAD.DROPOUT = 0.0 + +########################### +# Optimization +########################### +_C.OPTIM = CN() +_C.OPTIM.NAME = "adam" +_C.OPTIM.LR = 0.0003 +_C.OPTIM.WEIGHT_DECAY = 5e-4 +_C.OPTIM.MOMENTUM = 0.9 +_C.OPTIM.SGD_DAMPNING = 0 +_C.OPTIM.SGD_NESTEROV = False +_C.OPTIM.RMSPROP_ALPHA = 0.99 +# The following also apply to other +# adaptive optimizers like adamw +_C.OPTIM.ADAM_BETA1 = 0.9 +_C.OPTIM.ADAM_BETA2 = 0.999 +# STAGED_LR allows different layers to have +# different lr, e.g. pre-trained base layers +# can be assigned a smaller lr than the new +# classification layer +_C.OPTIM.STAGED_LR = False +_C.OPTIM.NEW_LAYERS = () +_C.OPTIM.BASE_LR_MULT = 0.1 +# Learning rate scheduler +_C.OPTIM.LR_SCHEDULER = "single_step" +# -1 or 0 means the stepsize is equal to max_epoch +_C.OPTIM.STEPSIZE = (-1, ) +_C.OPTIM.GAMMA = 0.1 +_C.OPTIM.MAX_EPOCH = 10 +# Set WARMUP_EPOCH larger than 0 to activate warmup training +_C.OPTIM.WARMUP_EPOCH = -1 +# Either linear or constant +_C.OPTIM.WARMUP_TYPE = "linear" +# Constant learning rate when type=constant +_C.OPTIM.WARMUP_CONS_LR = 1e-5 +# Minimum learning rate when type=linear +_C.OPTIM.WARMUP_MIN_LR = 1e-5 +# Recount epoch for the next scheduler (last_epoch=-1) +# Otherwise last_epoch=warmup_epoch +_C.OPTIM.WARMUP_RECOUNT = True + +########################### +# Train +########################### +_C.TRAIN = CN() +# How often (epoch) to save model during training +# Set to 0 or negative value to only save the last one +_C.TRAIN.CHECKPOINT_FREQ = 0 +# How often (batch) to print training information +_C.TRAIN.PRINT_FREQ = 10 +# Use 'train_x', 'train_u' or 'smaller_one' to count +# the number of iterations in an epoch (for DA and SSL) +_C.TRAIN.COUNT_ITER = "train_x" + +########################### +# Test +########################### +_C.TEST = CN() +_C.TEST.EVALUATOR = "Classification" +_C.TEST.PER_CLASS_RESULT = False +# Compute confusion matrix, which will be saved +# to $OUTPUT_DIR/cmat.pt +_C.TEST.COMPUTE_CMAT = False +# If NO_TEST=True, no testing will be conducted +_C.TEST.NO_TEST = False +# Use test or val set for FINAL evaluation +_C.TEST.SPLIT = "test" +# Which model to test after training (last_step or best_val) +# If best_val, evaluation is done every epoch (if val data +# is unavailable, test data will be used) +_C.TEST.FINAL_MODEL = "last_step" + +########################### +# Trainer specifics +########################### +_C.TRAINER = CN() +_C.TRAINER.NAME = "" + +###### +# DA +###### +# MCD +_C.TRAINER.MCD = CN() +_C.TRAINER.MCD.N_STEP_F = 4 # number of steps to train F +# MME +_C.TRAINER.MME = CN() +_C.TRAINER.MME.LMDA = 0.1 # weight for the entropy loss +# CDAC +_C.TRAINER.CDAC = CN() +_C.TRAINER.CDAC.CLASS_LR_MULTI = 10 +_C.TRAINER.CDAC.RAMPUP_COEF = 30 +_C.TRAINER.CDAC.RAMPUP_ITRS = 1000 +_C.TRAINER.CDAC.TOPK_MATCH = 5 +_C.TRAINER.CDAC.P_THRESH = 0.95 +_C.TRAINER.CDAC.STRONG_TRANSFORMS = () +# SE (SelfEnsembling) +_C.TRAINER.SE = CN() +_C.TRAINER.SE.EMA_ALPHA = 0.999 +_C.TRAINER.SE.CONF_THRE = 0.95 +_C.TRAINER.SE.RAMPUP = 300 +# M3SDA +_C.TRAINER.M3SDA = CN() +_C.TRAINER.M3SDA.LMDA = 0.5 # weight for the moment distance loss +_C.TRAINER.M3SDA.N_STEP_F = 4 # follow MCD +# DAEL +_C.TRAINER.DAEL = CN() +_C.TRAINER.DAEL.WEIGHT_U = 0.5 # weight on the unlabeled loss +_C.TRAINER.DAEL.CONF_THRE = 0.95 # confidence threshold +_C.TRAINER.DAEL.STRONG_TRANSFORMS = () + +###### +# DG +###### +# CrossGrad +_C.TRAINER.CROSSGRAD = CN() +_C.TRAINER.CROSSGRAD.EPS_F = 1.0 # scaling parameter for D's gradients +_C.TRAINER.CROSSGRAD.EPS_D = 1.0 # scaling parameter for F's gradients +_C.TRAINER.CROSSGRAD.ALPHA_F = 0.5 # balancing weight for the label net's loss +_C.TRAINER.CROSSGRAD.ALPHA_D = 0.5 # balancing weight for the domain net's loss +# DDAIG +_C.TRAINER.DDAIG = CN() +_C.TRAINER.DDAIG.G_ARCH = "" # generator's architecture +_C.TRAINER.DDAIG.LMDA = 0.3 # perturbation weight +_C.TRAINER.DDAIG.CLAMP = False # clamp perturbation values +_C.TRAINER.DDAIG.CLAMP_MIN = -1.0 +_C.TRAINER.DDAIG.CLAMP_MAX = 1.0 +_C.TRAINER.DDAIG.WARMUP = 0 +_C.TRAINER.DDAIG.ALPHA = 0.5 # balancing weight for the losses +# DAELDG (the DG version of DAEL) +_C.TRAINER.DAELDG = CN() +_C.TRAINER.DAELDG.WEIGHT_U = 0.5 # weight on the unlabeled loss +_C.TRAINER.DAELDG.CONF_THRE = 0.95 # confidence threshold +_C.TRAINER.DAELDG.STRONG_TRANSFORMS = () +# DOMAINMIX +_C.TRAINER.DOMAINMIX = CN() +_C.TRAINER.DOMAINMIX.TYPE = "crossdomain" +_C.TRAINER.DOMAINMIX.ALPHA = 1.0 +_C.TRAINER.DOMAINMIX.BETA = 1.0 + +###### +# SSL +###### +# EntMin +_C.TRAINER.ENTMIN = CN() +_C.TRAINER.ENTMIN.LMDA = 1e-3 # weight on the entropy loss +# Mean Teacher +_C.TRAINER.MEANTEACHER = CN() +_C.TRAINER.MEANTEACHER.WEIGHT_U = 1.0 # weight on the unlabeled loss +_C.TRAINER.MEANTEACHER.EMA_ALPHA = 0.999 +_C.TRAINER.MEANTEACHER.RAMPUP = 5 # epochs used to ramp up the loss_u weight +# MixMatch +_C.TRAINER.MIXMATCH = CN() +_C.TRAINER.MIXMATCH.WEIGHT_U = 100.0 # weight on the unlabeled loss +_C.TRAINER.MIXMATCH.TEMP = 2.0 # temperature for sharpening the probability +_C.TRAINER.MIXMATCH.MIXUP_BETA = 0.75 +_C.TRAINER.MIXMATCH.RAMPUP = 20000 # steps used to ramp up the loss_u weight +# FixMatch +_C.TRAINER.FIXMATCH = CN() +_C.TRAINER.FIXMATCH.WEIGHT_U = 1.0 # weight on the unlabeled loss +_C.TRAINER.FIXMATCH.CONF_THRE = 0.95 # confidence threshold +_C.TRAINER.FIXMATCH.STRONG_TRANSFORMS = () diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/data/__init__.py new file mode 100644 index 00000000..66ca734e --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/__init__.py @@ -0,0 +1 @@ +from .data_manager import DataManager, DatasetWrapper diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/data_manager.py b/python/ClipDetection/Dassl.pytorch/dassl/data/data_manager.py new file mode 100644 index 00000000..0bb2b7b0 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/data_manager.py @@ -0,0 +1,270 @@ +import torch +import torchvision.transforms as T +from tabulate import tabulate +from torch.utils.data import Dataset as TorchDataset + +from dassl.utils import read_image + +from .datasets import build_dataset +from .samplers import build_sampler +from .transforms import INTERPOLATION_MODES, build_transform + + +def build_data_loader( + cfg, + sampler_type="SequentialSampler", + data_source=None, + batch_size=64, + n_domain=0, + n_ins=2, + tfm=None, + is_train=True, + dataset_wrapper=None +): + # Build sampler + sampler = build_sampler( + sampler_type, + cfg=cfg, + data_source=data_source, + batch_size=batch_size, + n_domain=n_domain, + n_ins=n_ins + ) + + if dataset_wrapper is None: + dataset_wrapper = DatasetWrapper + + # Build data loader + data_loader = torch.utils.data.DataLoader( + dataset_wrapper(cfg, data_source, transform=tfm, is_train=is_train), + batch_size=batch_size, + sampler=sampler, + num_workers=cfg.DATALOADER.NUM_WORKERS, + drop_last=is_train and len(data_source) >= batch_size, + pin_memory=(torch.cuda.is_available() and cfg.USE_CUDA) + ) + assert len(data_loader) > 0 + + return data_loader + + +class DataManager: + + def __init__( + self, + cfg, + custom_tfm_train=None, + custom_tfm_test=None, + dataset_wrapper=None + ): + # Load dataset + dataset = build_dataset(cfg) + + # Build transform + if custom_tfm_train is None: + tfm_train = build_transform(cfg, is_train=True) + else: + print("* Using custom transform for training") + tfm_train = custom_tfm_train + + if custom_tfm_test is None: + tfm_test = build_transform(cfg, is_train=False) + else: + print("* Using custom transform for testing") + tfm_test = custom_tfm_test + + # Build train_loader_x + train_loader_x = build_data_loader( + cfg, + sampler_type=cfg.DATALOADER.TRAIN_X.SAMPLER, + data_source=dataset.train_x, + batch_size=cfg.DATALOADER.TRAIN_X.BATCH_SIZE, + n_domain=cfg.DATALOADER.TRAIN_X.N_DOMAIN, + n_ins=cfg.DATALOADER.TRAIN_X.N_INS, + tfm=tfm_train, + is_train=True, + dataset_wrapper=dataset_wrapper + ) + + # Build train_loader_u + train_loader_u = None + if dataset.train_u: + sampler_type_ = cfg.DATALOADER.TRAIN_U.SAMPLER + batch_size_ = cfg.DATALOADER.TRAIN_U.BATCH_SIZE + n_domain_ = cfg.DATALOADER.TRAIN_U.N_DOMAIN + n_ins_ = cfg.DATALOADER.TRAIN_U.N_INS + + if cfg.DATALOADER.TRAIN_U.SAME_AS_X: + sampler_type_ = cfg.DATALOADER.TRAIN_X.SAMPLER + batch_size_ = cfg.DATALOADER.TRAIN_X.BATCH_SIZE + n_domain_ = cfg.DATALOADER.TRAIN_X.N_DOMAIN + n_ins_ = cfg.DATALOADER.TRAIN_X.N_INS + + train_loader_u = build_data_loader( + cfg, + sampler_type=sampler_type_, + data_source=dataset.train_u, + batch_size=batch_size_, + n_domain=n_domain_, + n_ins=n_ins_, + tfm=tfm_train, + is_train=True, + dataset_wrapper=dataset_wrapper + ) + + # Build val_loader + val_loader = None + if dataset.val: + val_loader = build_data_loader( + cfg, + sampler_type=cfg.DATALOADER.TEST.SAMPLER, + data_source=dataset.val, + batch_size=cfg.DATALOADER.TEST.BATCH_SIZE, + tfm=tfm_test, + is_train=False, + dataset_wrapper=dataset_wrapper + ) + + # Build test_loader + test_loader = build_data_loader( + cfg, + sampler_type=cfg.DATALOADER.TEST.SAMPLER, + data_source=dataset.test, + batch_size=cfg.DATALOADER.TEST.BATCH_SIZE, + tfm=tfm_test, + is_train=False, + dataset_wrapper=dataset_wrapper + ) + classification_loader = build_data_loader( + cfg, + sampler_type=cfg.DATALOADER.TEST.SAMPLER, + data_source=dataset.test, + batch_size=cfg.DATALOADER.TEST.BATCH_SIZE, + tfm=tfm_test, + is_train=False, + dataset_wrapper=dataset_wrapper + ) + + # Attributes + self._num_classes = dataset.num_classes + self._num_source_domains = len(cfg.DATASET.SOURCE_DOMAINS) + self._lab2cname = dataset.lab2cname + + # Dataset and data-loaders + self.dataset = dataset + self.train_loader_x = train_loader_x + self.train_loader_u = train_loader_u + self.val_loader = val_loader + self.test_loader = test_loader + + if cfg.VERBOSE: + self.show_dataset_summary(cfg) + + @property + def num_classes(self): + return self._num_classes + + @property + def num_source_domains(self): + return self._num_source_domains + + @property + def lab2cname(self): + return self._lab2cname + + def show_dataset_summary(self, cfg): + dataset_name = cfg.DATASET.NAME + source_domains = cfg.DATASET.SOURCE_DOMAINS + target_domains = cfg.DATASET.TARGET_DOMAINS + + table = [] + table.append(["Dataset", dataset_name]) + if source_domains: + table.append(["Source", source_domains]) + if target_domains: + table.append(["Target", target_domains]) + table.append(["# classes", f"{self.num_classes:,}"]) + table.append(["# train_x", f"{len(self.dataset.train_x):,}"]) + if self.dataset.train_u: + table.append(["# train_u", f"{len(self.dataset.train_u):,}"]) + if self.dataset.val: + table.append(["# val", f"{len(self.dataset.val):,}"]) + table.append(["# test", f"{len(self.dataset.test):,}"]) + + print(tabulate(table)) + + +class DatasetWrapper(TorchDataset): + + def __init__(self, cfg, data_source, transform=None, is_train=False): + self.cfg = cfg + self.data_source = data_source + self.transform = transform # accept list (tuple) as input + self.is_train = is_train + # Augmenting an image K>1 times is only allowed during training + self.k_tfm = cfg.DATALOADER.K_TRANSFORMS if is_train else 1 + self.return_img0 = cfg.DATALOADER.RETURN_IMG0 + + if self.k_tfm > 1 and transform is None: + raise ValueError( + "Cannot augment the image {} times " + "because transform is None".format(self.k_tfm) + ) + + # Build transform that doesn't apply any data augmentation + interp_mode = INTERPOLATION_MODES[cfg.INPUT.INTERPOLATION] + to_tensor = [] + to_tensor += [T.Resize(cfg.INPUT.SIZE, interpolation=interp_mode)] + to_tensor += [T.ToTensor()] + if "normalize" in cfg.INPUT.TRANSFORMS: + normalize = T.Normalize( + mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD + ) + to_tensor += [normalize] + self.to_tensor = T.Compose(to_tensor) + + def __len__(self): + return len(self.data_source) + + def __getitem__(self, idx): + item = self.data_source[idx] + + output = { + "label": item.label, + "domain": item.domain, + "impath": item.impath, + "index": idx + } + + img0 = read_image(item.impath) + + if self.transform is not None: + if isinstance(self.transform, (list, tuple)): + for i, tfm in enumerate(self.transform): + img = self._transform_image(tfm, img0) + keyname = "img" + if (i + 1) > 1: + keyname += str(i + 1) + output[keyname] = img + else: + img = self._transform_image(self.transform, img0) + output["img"] = img + else: + output["img"] = img0 + + if self.return_img0: + output["img0"] = self.to_tensor(img0) # without any augmentation + + return output + + def _transform_image(self, tfm, img0): + img_list = [] + + for k in range(self.k_tfm): + img_list.append(tfm(img0)) + + img = img_list + if len(img) == 1: + img = img[0] + + return img diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/__init__.py new file mode 100644 index 00000000..4f58326f --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/__init__.py @@ -0,0 +1,6 @@ +from .build import DATASET_REGISTRY, build_dataset # isort:skip +from .base_dataset import Datum, DatasetBase # isort:skip + +from .da import * +from .dg import * +from .ssl import * diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/base_dataset.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/base_dataset.py new file mode 100644 index 00000000..c7cafd04 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/base_dataset.py @@ -0,0 +1,237 @@ +import os +import random +import os.path as osp +import tarfile +import zipfile +from collections import defaultdict +import gdown + +from dassl.utils import check_isfile + + +class Datum: + """Data instance which defines the basic attributes. + + Args: + impath (str): image path. + label (int): class label. + domain (int): domain label. + classname (str): class name. + """ + + def __init__(self, impath="", label=0, domain=0, classname=""): + assert isinstance(impath, str) + assert check_isfile(impath) + + self._impath = impath + self._label = label + self._domain = domain + self._classname = classname + + @property + def impath(self): + return self._impath + + @property + def label(self): + return self._label + + @property + def domain(self): + return self._domain + + @property + def classname(self): + return self._classname + + +class DatasetBase: + """A unified dataset class for + 1) domain adaptation + 2) domain generalization + 3) semi-supervised learning + """ + + dataset_dir = "" # the directory where the dataset is stored + domains = [] # string names of all domains + + def __init__(self, train_x=None, train_u=None, val=None, test=None): + self._train_x = train_x # labeled training data + self._train_u = train_u # unlabeled training data (optional) + self._val = val # validation data (optional) + self._test = test # test data + self._num_classes = self.get_num_classes(train_x) + self._lab2cname, self._classnames = self.get_lab2cname(train_x) + + @property + def train_x(self): + return self._train_x + + @property + def train_u(self): + return self._train_u + + @property + def val(self): + return self._val + + @property + def test(self): + return self._test + + @property + def lab2cname(self): + return self._lab2cname + + @property + def classnames(self): + return self._classnames + + @property + def num_classes(self): + return self._num_classes + + @staticmethod + def get_num_classes(data_source): + """Count number of classes. + + Args: + data_source (list): a list of Datum objects. + """ + label_set = set() + for item in data_source: + label_set.add(item.label) + return max(label_set) + 1 + + @staticmethod + def get_lab2cname(data_source): + """Get a label-to-classname mapping (dict). + + Args: + data_source (list): a list of Datum objects. + """ + container = set() + for item in data_source: + container.add((item.label, item.classname)) + mapping = {label: classname for label, classname in container} + labels = list(mapping.keys()) + labels.sort() + classnames = [mapping[label] for label in labels] + return mapping, classnames + + def check_input_domains(self, source_domains, target_domains): + assert len(source_domains) > 0, "source_domains (list) is empty" + assert len(target_domains) > 0, "target_domains (list) is empty" + self.is_input_domain_valid(source_domains) + self.is_input_domain_valid(target_domains) + + def is_input_domain_valid(self, input_domains): + for domain in input_domains: + if domain not in self.domains: + raise ValueError( + "Input domain must belong to {}, " + "but got [{}]".format(self.domains, domain) + ) + + def download_data(self, url, dst, from_gdrive=True): + if not osp.exists(osp.dirname(dst)): + os.makedirs(osp.dirname(dst)) + + if from_gdrive: + gdown.download(url, dst, quiet=False) + else: + raise NotImplementedError + + print("Extracting file ...") + + if dst.endswith(".zip"): + zip_ref = zipfile.ZipFile(dst, "r") + zip_ref.extractall(osp.dirname(dst)) + zip_ref.close() + + elif dst.endswith(".tar"): + tar = tarfile.open(dst, "r:") + tar.extractall(osp.dirname(dst)) + tar.close() + + elif dst.endswith(".tar.gz"): + tar = tarfile.open(dst, "r:gz") + tar.extractall(osp.dirname(dst)) + tar.close() + + else: + raise NotImplementedError + + print("File extracted to {}".format(osp.dirname(dst))) + + def generate_fewshot_dataset( + self, *data_sources, num_shots=-1, repeat=False + ): + """Generate a few-shot dataset (typically for the training set). + + This function is useful when one wants to evaluate a model + in a few-shot learning setting where each class only contains + a small number of images. + + Args: + data_sources: each individual is a list containing Datum objects. + num_shots (int): number of instances per class to sample. + repeat (bool): repeat images if needed (default: False). + """ + if num_shots < 1: + if len(data_sources) == 1: + return data_sources[0] + return data_sources + + print(f"Creating a {num_shots}-shot dataset") + + output = [] + + for data_source in data_sources: + tracker = self.split_dataset_by_label(data_source) + dataset = [] + + for label, items in tracker.items(): + if len(items) >= num_shots: + sampled_items = random.sample(items, num_shots) + else: + if repeat: + sampled_items = random.choices(items, k=num_shots) + else: + sampled_items = items + dataset.extend(sampled_items) + + output.append(dataset) + + if len(output) == 1: + return output[0] + + return output + + def split_dataset_by_label(self, data_source): + """Split a dataset, i.e. a list of Datum objects, + into class-specific groups stored in a dictionary. + + Args: + data_source (list): a list of Datum objects. + """ + output = defaultdict(list) + + for item in data_source: + output[item.label].append(item) + + return output + + def split_dataset_by_domain(self, data_source): + """Split a dataset, i.e. a list of Datum objects, + into domain-specific groups stored in a dictionary. + + Args: + data_source (list): a list of Datum objects. + """ + output = defaultdict(list) + + for item in data_source: + output[item.domain].append(item) + + return output diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/build.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/build.py new file mode 100644 index 00000000..9de62c61 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/build.py @@ -0,0 +1,11 @@ +from dassl.utils import Registry, check_availability + +DATASET_REGISTRY = Registry("DATASET") + + +def build_dataset(cfg): + avai_datasets = DATASET_REGISTRY.registered_names() + check_availability(cfg.DATASET.NAME, avai_datasets) + if cfg.VERBOSE: + print("Loading dataset: {}".format(cfg.DATASET.NAME)) + return DATASET_REGISTRY.get(cfg.DATASET.NAME)(cfg) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/__init__.py new file mode 100644 index 00000000..9c7b60f2 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/__init__.py @@ -0,0 +1,7 @@ +from .digit5 import Digit5 +from .visda17 import VisDA17 +from .cifarstl import CIFARSTL +from .office31 import Office31 +from .domainnet import DomainNet +from .office_home import OfficeHome +from .mini_domainnet import miniDomainNet diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/cifarstl.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/cifarstl.py new file mode 100644 index 00000000..ca27eb10 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/cifarstl.py @@ -0,0 +1,68 @@ +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class CIFARSTL(DatasetBase): + """CIFAR-10 and STL-10. + + CIFAR-10: + - 60,000 32x32 colour images. + - 10 classes, with 6,000 images per class. + - 50,000 training images and 10,000 test images. + - URL: https://www.cs.toronto.edu/~kriz/cifar.html. + + STL-10: + - 10 classes: airplane, bird, car, cat, deer, dog, horse, + monkey, ship, truck. + - Images are 96x96 pixels, color. + - 500 training images (10 pre-defined folds), 800 test images + per class. + - URL: https://cs.stanford.edu/~acoates/stl10/. + + Reference: + - Krizhevsky. Learning Multiple Layers of Features + from Tiny Images. Tech report. + - Coates et al. An Analysis of Single Layer Networks in + Unsupervised Feature Learning. AISTATS 2011. + """ + + dataset_dir = "cifar_stl" + domains = ["cifar", "stl"] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train_x = self._read_data(cfg.DATASET.SOURCE_DOMAINS, split="train") + train_u = self._read_data(cfg.DATASET.TARGET_DOMAINS, split="train") + test = self._read_data(cfg.DATASET.TARGET_DOMAINS, split="test") + + super().__init__(train_x=train_x, train_u=train_u, test=test) + + def _read_data(self, input_domains, split="train"): + items = [] + + for domain, dname in enumerate(input_domains): + data_dir = osp.join(self.dataset_dir, dname, split) + class_names = listdir_nohidden(data_dir) + + for class_name in class_names: + class_dir = osp.join(data_dir, class_name) + imnames = listdir_nohidden(class_dir) + label = int(class_name.split("_")[0]) + + for imname in imnames: + impath = osp.join(class_dir, imname) + item = Datum(impath=impath, label=label, domain=domain) + items.append(item) + + return items diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/digit5.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/digit5.py new file mode 100644 index 00000000..4320005a --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/digit5.py @@ -0,0 +1,124 @@ +import random +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + +# Folder names for train and test sets +MNIST = {"train": "train_images", "test": "test_images"} +MNIST_M = {"train": "train_images", "test": "test_images"} +SVHN = {"train": "train_images", "test": "test_images"} +SYN = {"train": "train_images", "test": "test_images"} +USPS = {"train": "train_images", "test": "test_images"} + + +def read_image_list(im_dir, n_max=None, n_repeat=None): + items = [] + + for imname in listdir_nohidden(im_dir): + imname_noext = osp.splitext(imname)[0] + label = int(imname_noext.split("_")[1]) + impath = osp.join(im_dir, imname) + items.append((impath, label)) + + if n_max is not None: + items = random.sample(items, n_max) + + if n_repeat is not None: + items *= n_repeat + + return items + + +def load_mnist(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, MNIST[split]) + n_max = 25000 if split == "train" else 9000 + return read_image_list(data_dir, n_max=n_max) + + +def load_mnist_m(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, MNIST_M[split]) + n_max = 25000 if split == "train" else 9000 + return read_image_list(data_dir, n_max=n_max) + + +def load_svhn(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, SVHN[split]) + n_max = 25000 if split == "train" else 9000 + return read_image_list(data_dir, n_max=n_max) + + +def load_syn(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, SYN[split]) + n_max = 25000 if split == "train" else 9000 + return read_image_list(data_dir, n_max=n_max) + + +def load_usps(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, USPS[split]) + n_repeat = 3 if split == "train" else None + return read_image_list(data_dir, n_repeat=n_repeat) + + +@DATASET_REGISTRY.register() +class Digit5(DatasetBase): + """Five digit datasets. + + It contains: + - MNIST: hand-written digits. + - MNIST-M: variant of MNIST with blended background. + - SVHN: street view house number. + - SYN: synthetic digits. + - USPS: hand-written digits, slightly different from MNIST. + + For MNIST, MNIST-M, SVHN and SYN, we randomly sample 25,000 images from + the training set and 9,000 images from the test set. For USPS which has only + 9,298 images in total, we use the entire dataset but replicate its training + set for 3 times so as to match the training set size of other domains. + + Reference: + - Lecun et al. Gradient-based learning applied to document + recognition. IEEE 1998. + - Ganin et al. Domain-adversarial training of neural networks. + JMLR 2016. + - Netzer et al. Reading digits in natural images with unsupervised + feature learning. NIPS-W 2011. + """ + + dataset_dir = "digit5" + domains = ["mnist", "mnist_m", "svhn", "syn", "usps"] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train_x = self._read_data(cfg.DATASET.SOURCE_DOMAINS, split="train") + train_u = self._read_data(cfg.DATASET.TARGET_DOMAINS, split="train") + test = self._read_data(cfg.DATASET.TARGET_DOMAINS, split="test") + + super().__init__(train_x=train_x, train_u=train_u, test=test) + + def _read_data(self, input_domains, split="train"): + items = [] + + for domain, dname in enumerate(input_domains): + func = "load_" + dname + domain_dir = osp.join(self.dataset_dir, dname) + items_d = eval(func)(domain_dir, split=split) + + for impath, label in items_d: + item = Datum( + impath=impath, + label=label, + domain=domain, + classname=str(label) + ) + items.append(item) + + return items diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/domainnet.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/domainnet.py new file mode 100644 index 00000000..8a703bf1 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/domainnet.py @@ -0,0 +1,69 @@ +import os.path as osp + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class DomainNet(DatasetBase): + """DomainNet. + + Statistics: + - 6 distinct domains: Clipart, Infograph, Painting, Quickdraw, + Real, Sketch. + - Around 0.6M images. + - 345 categories. + - URL: http://ai.bu.edu/M3SDA/. + + Special note: the t-shirt class (327) is missing in painting_train.txt. + + Reference: + - Peng et al. Moment Matching for Multi-Source Domain + Adaptation. ICCV 2019. + """ + + dataset_dir = "domainnet" + domains = [ + "clipart", "infograph", "painting", "quickdraw", "real", "sketch" + ] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + self.split_dir = osp.join(self.dataset_dir, "splits") + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train_x = self._read_data(cfg.DATASET.SOURCE_DOMAINS, split="train") + train_u = self._read_data(cfg.DATASET.TARGET_DOMAINS, split="train") + val = self._read_data(cfg.DATASET.SOURCE_DOMAINS, split="test") + test = self._read_data(cfg.DATASET.TARGET_DOMAINS, split="test") + + super().__init__(train_x=train_x, train_u=train_u, val=val, test=test) + + def _read_data(self, input_domains, split="train"): + items = [] + + for domain, dname in enumerate(input_domains): + filename = dname + "_" + split + ".txt" + split_file = osp.join(self.split_dir, filename) + + with open(split_file, "r") as f: + lines = f.readlines() + for line in lines: + line = line.strip() + impath, label = line.split(" ") + classname = impath.split("/")[1] + impath = osp.join(self.dataset_dir, impath) + label = int(label) + item = Datum( + impath=impath, + label=label, + domain=domain, + classname=classname + ) + items.append(item) + + return items diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/mini_domainnet.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/mini_domainnet.py new file mode 100644 index 00000000..4a708691 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/mini_domainnet.py @@ -0,0 +1,58 @@ +import os.path as osp + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class miniDomainNet(DatasetBase): + """A subset of DomainNet. + + Reference: + - Peng et al. Moment Matching for Multi-Source Domain + Adaptation. ICCV 2019. + - Zhou et al. Domain Adaptive Ensemble Learning. + """ + + dataset_dir = "domainnet" + domains = ["clipart", "painting", "real", "sketch"] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + self.split_dir = osp.join(self.dataset_dir, "splits_mini") + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train_x = self._read_data(cfg.DATASET.SOURCE_DOMAINS, split="train") + train_u = self._read_data(cfg.DATASET.TARGET_DOMAINS, split="train") + test = self._read_data(cfg.DATASET.TARGET_DOMAINS, split="test") + + super().__init__(train_x=train_x, train_u=train_u, test=test) + + def _read_data(self, input_domains, split="train"): + items = [] + + for domain, dname in enumerate(input_domains): + filename = dname + "_" + split + ".txt" + split_file = osp.join(self.split_dir, filename) + + with open(split_file, "r") as f: + lines = f.readlines() + for line in lines: + line = line.strip() + impath, label = line.split(" ") + classname = impath.split("/")[1] + impath = osp.join(self.dataset_dir, impath) + label = int(label) + item = Datum( + impath=impath, + label=label, + domain=domain, + classname=classname + ) + items.append(item) + + return items diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/office31.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/office31.py new file mode 100644 index 00000000..c2daca1d --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/office31.py @@ -0,0 +1,63 @@ +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class Office31(DatasetBase): + """Office-31. + + Statistics: + - 4,110 images. + - 31 classes related to office objects. + - 3 domains: Amazon, Webcam, Dslr. + - URL: https://people.eecs.berkeley.edu/~jhoffman/domainadapt/. + + Reference: + - Saenko et al. Adapting visual category models to + new domains. ECCV 2010. + """ + + dataset_dir = "office31" + domains = ["amazon", "webcam", "dslr"] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train_x = self._read_data(cfg.DATASET.SOURCE_DOMAINS) + train_u = self._read_data(cfg.DATASET.TARGET_DOMAINS) + test = self._read_data(cfg.DATASET.TARGET_DOMAINS) + + super().__init__(train_x=train_x, train_u=train_u, test=test) + + def _read_data(self, input_domains): + items = [] + + for domain, dname in enumerate(input_domains): + domain_dir = osp.join(self.dataset_dir, dname) + class_names = listdir_nohidden(domain_dir) + class_names.sort() + + for label, class_name in enumerate(class_names): + class_path = osp.join(domain_dir, class_name) + imnames = listdir_nohidden(class_path) + + for imname in imnames: + impath = osp.join(class_path, imname) + item = Datum( + impath=impath, + label=label, + domain=domain, + classname=class_name + ) + items.append(item) + + return items diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/office_home.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/office_home.py new file mode 100644 index 00000000..61996f2f --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/office_home.py @@ -0,0 +1,63 @@ +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class OfficeHome(DatasetBase): + """Office-Home. + + Statistics: + - Around 15,500 images. + - 65 classes related to office and home objects. + - 4 domains: Art, Clipart, Product, Real World. + - URL: http://hemanthdv.org/OfficeHome-Dataset/. + + Reference: + - Venkateswara et al. Deep Hashing Network for Unsupervised + Domain Adaptation. CVPR 2017. + """ + + dataset_dir = "office_home" + domains = ["art", "clipart", "product", "real_world"] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train_x = self._read_data(cfg.DATASET.SOURCE_DOMAINS) + train_u = self._read_data(cfg.DATASET.TARGET_DOMAINS) + test = self._read_data(cfg.DATASET.TARGET_DOMAINS) + + super().__init__(train_x=train_x, train_u=train_u, test=test) + + def _read_data(self, input_domains): + items = [] + + for domain, dname in enumerate(input_domains): + domain_dir = osp.join(self.dataset_dir, dname) + class_names = listdir_nohidden(domain_dir) + class_names.sort() + + for label, class_name in enumerate(class_names): + class_path = osp.join(domain_dir, class_name) + imnames = listdir_nohidden(class_path) + + for imname in imnames: + impath = osp.join(class_path, imname) + item = Datum( + impath=impath, + label=label, + domain=domain, + classname=class_name.lower(), + ) + items.append(item) + + return items diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/visda17.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/visda17.py new file mode 100644 index 00000000..48c1045e --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/da/visda17.py @@ -0,0 +1,61 @@ +import os.path as osp + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class VisDA17(DatasetBase): + """VisDA17. + + Focusing on simulation-to-reality domain shift. + + URL: http://ai.bu.edu/visda-2017/. + + Reference: + - Peng et al. VisDA: The Visual Domain Adaptation + Challenge. ArXiv 2017. + """ + + dataset_dir = "visda17" + domains = ["synthetic", "real"] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train_x = self._read_data("synthetic") + train_u = self._read_data("real") + test = self._read_data("real") + + super().__init__(train_x=train_x, train_u=train_u, test=test) + + def _read_data(self, dname): + filedir = "train" if dname == "synthetic" else "validation" + image_list = osp.join(self.dataset_dir, filedir, "image_list.txt") + items = [] + # There is only one source domain + domain = 0 + + with open(image_list, "r") as f: + lines = f.readlines() + + for line in lines: + line = line.strip() + impath, label = line.split(" ") + classname = impath.split("/")[0] + impath = osp.join(self.dataset_dir, filedir, impath) + label = int(label) + item = Datum( + impath=impath, + label=label, + domain=domain, + classname=classname + ) + items.append(item) + + return items diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/__init__.py new file mode 100644 index 00000000..b94c35cd --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/__init__.py @@ -0,0 +1,7 @@ +from .pacs import PACS +from .vlcs import VLCS +from .wilds import * +from .cifar_c import CIFAR10C, CIFAR100C +from .digits_dg import DigitsDG +from .digit_single import DigitSingle +from .office_home_dg import OfficeHomeDG diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/cifar_c.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/cifar_c.py new file mode 100644 index 00000000..7d1e4f38 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/cifar_c.py @@ -0,0 +1,123 @@ +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + +AVAI_C_TYPES = [ + "brightness", + "contrast", + "defocus_blur", + "elastic_transform", + "fog", + "frost", + "gaussian_blur", + "gaussian_noise", + "glass_blur", + "impulse_noise", + "jpeg_compression", + "motion_blur", + "pixelate", + "saturate", + "shot_noise", + "snow", + "spatter", + "speckle_noise", + "zoom_blur", +] + + +@DATASET_REGISTRY.register() +class CIFAR10C(DatasetBase): + """CIFAR-10 -> CIFAR-10-C. + + Dataset link: https://zenodo.org/record/2535967#.YFwtV2Qzb0o + + Statistics: + - 2 domains: the normal CIFAR-10 vs. a corrupted CIFAR-10 + - 10 categories + + Reference: + - Hendrycks et al. Benchmarking neural network robustness + to common corruptions and perturbations. ICLR 2019. + """ + + dataset_dir = "" + domains = ["cifar10", "cifar10_c"] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = root + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + source_domain = cfg.DATASET.SOURCE_DOMAINS[0] + target_domain = cfg.DATASET.TARGET_DOMAINS[0] + assert source_domain == self.domains[0] + assert target_domain == self.domains[1] + + c_type = cfg.DATASET.CIFAR_C_TYPE + c_level = cfg.DATASET.CIFAR_C_LEVEL + + if not c_type: + raise ValueError( + "Please specify DATASET.CIFAR_C_TYPE in the config file" + ) + + assert ( + c_type in AVAI_C_TYPES + ), f'C_TYPE is expected to belong to {AVAI_C_TYPES}, but got "{c_type}"' + assert 1 <= c_level <= 5 + + train_dir = osp.join(self.dataset_dir, source_domain, "train") + test_dir = osp.join( + self.dataset_dir, target_domain, c_type, str(c_level) + ) + + if not osp.exists(test_dir): + raise ValueError + + train = self._read_data(train_dir) + test = self._read_data(test_dir) + + super().__init__(train_x=train, test=test) + + def _read_data(self, data_dir): + class_names = listdir_nohidden(data_dir) + class_names.sort() + items = [] + + for label, class_name in enumerate(class_names): + class_dir = osp.join(data_dir, class_name) + imnames = listdir_nohidden(class_dir) + + for imname in imnames: + impath = osp.join(class_dir, imname) + item = Datum(impath=impath, label=label, domain=0) + items.append(item) + + return items + + +@DATASET_REGISTRY.register() +class CIFAR100C(CIFAR10C): + """CIFAR-100 -> CIFAR-100-C. + + Dataset link: https://zenodo.org/record/3555552#.YFxpQmQzb0o + + Statistics: + - 2 domains: the normal CIFAR-100 vs. a corrupted CIFAR-100 + - 10 categories + + Reference: + - Hendrycks et al. Benchmarking neural network robustness + to common corruptions and perturbations. ICLR 2019. + """ + + dataset_dir = "" + domains = ["cifar100", "cifar100_c"] + + def __init__(self, cfg): + super().__init__(cfg) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/digit_single.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/digit_single.py new file mode 100644 index 00000000..5490e92f --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/digit_single.py @@ -0,0 +1,124 @@ +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + +# Folder names for train and test sets +MNIST = {"train": "train_images", "test": "test_images"} +MNIST_M = {"train": "train_images", "test": "test_images"} +SVHN = {"train": "train_images", "test": "test_images"} +SYN = {"train": "train_images", "test": "test_images"} +USPS = {"train": "train_images", "test": "test_images"} + + +def read_image_list(im_dir, n_max=None, n_repeat=None): + items = [] + + for imname in listdir_nohidden(im_dir): + imname_noext = osp.splitext(imname)[0] + label = int(imname_noext.split("_")[1]) + impath = osp.join(im_dir, imname) + items.append((impath, label)) + + if n_max is not None: + # Note that the sampling process is NOT random, + # which follows that in Volpi et al. NIPS'18. + items = items[:n_max] + + if n_repeat is not None: + items *= n_repeat + + return items + + +def load_mnist(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, MNIST[split]) + n_max = 10000 if split == "train" else None + return read_image_list(data_dir, n_max=n_max) + + +def load_mnist_m(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, MNIST_M[split]) + n_max = 10000 if split == "train" else None + return read_image_list(data_dir, n_max=n_max) + + +def load_svhn(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, SVHN[split]) + n_max = 10000 if split == "train" else None + return read_image_list(data_dir, n_max=n_max) + + +def load_syn(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, SYN[split]) + n_max = 10000 if split == "train" else None + return read_image_list(data_dir, n_max=n_max) + + +def load_usps(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, USPS[split]) + return read_image_list(data_dir) + + +@DATASET_REGISTRY.register() +class DigitSingle(DatasetBase): + """Digit recognition datasets for single-source domain generalization. + + There are five digit datasets: + - MNIST: hand-written digits. + - MNIST-M: variant of MNIST with blended background. + - SVHN: street view house number. + - SYN: synthetic digits. + - USPS: hand-written digits, slightly different from MNIST. + + Protocol: + Volpi et al. train a model using 10,000 images from MNIST and + evaluate the model on the test split of the other four datasets. However, + the code does not restrict you to only use MNIST as the source dataset. + Instead, you can use any dataset as the source. But note that only 10,000 + images will be sampled from the source dataset for training. + + Reference: + - Lecun et al. Gradient-based learning applied to document + recognition. IEEE 1998. + - Ganin et al. Domain-adversarial training of neural networks. + JMLR 2016. + - Netzer et al. Reading digits in natural images with unsupervised + feature learning. NIPS-W 2011. + - Volpi et al. Generalizing to Unseen Domains via Adversarial Data + Augmentation. NIPS 2018. + """ + + # Reuse the digit-5 folder instead of creating a new folder + dataset_dir = "digit5" + domains = ["mnist", "mnist_m", "svhn", "syn", "usps"] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train = self._read_data(cfg.DATASET.SOURCE_DOMAINS, split="train") + val = self._read_data(cfg.DATASET.SOURCE_DOMAINS, split="test") + test = self._read_data(cfg.DATASET.TARGET_DOMAINS, split="test") + + super().__init__(train_x=train, val=val, test=test) + + def _read_data(self, input_domains, split="train"): + items = [] + + for domain, dname in enumerate(input_domains): + func = "load_" + dname + domain_dir = osp.join(self.dataset_dir, dname) + items_d = eval(func)(domain_dir, split=split) + + for impath, label in items_d: + item = Datum(impath=impath, label=label, domain=domain) + items.append(item) + + return items diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/digits_dg.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/digits_dg.py new file mode 100644 index 00000000..43ccd6f4 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/digits_dg.py @@ -0,0 +1,97 @@ +import glob +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class DigitsDG(DatasetBase): + """Digits-DG. + + It contains 4 digit datasets: + - MNIST: hand-written digits. + - MNIST-M: variant of MNIST with blended background. + - SVHN: street view house number. + - SYN: synthetic digits. + + Reference: + - Lecun et al. Gradient-based learning applied to document + recognition. IEEE 1998. + - Ganin et al. Domain-adversarial training of neural networks. + JMLR 2016. + - Netzer et al. Reading digits in natural images with unsupervised + feature learning. NIPS-W 2011. + - Zhou et al. Deep Domain-Adversarial Image Generation for Domain + Generalisation. AAAI 2020. + """ + + dataset_dir = "digits_dg" + domains = ["mnist", "mnist_m", "svhn", "syn"] + data_url = "https://drive.google.com/uc?id=15V7EsHfCcfbKgsDmzQKj_DfXt_XYp_P7" + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + + if not osp.exists(self.dataset_dir): + dst = osp.join(root, "digits_dg.zip") + self.download_data(self.data_url, dst, from_gdrive=True) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train = self.read_data( + self.dataset_dir, cfg.DATASET.SOURCE_DOMAINS, "train" + ) + val = self.read_data( + self.dataset_dir, cfg.DATASET.SOURCE_DOMAINS, "val" + ) + test = self.read_data( + self.dataset_dir, cfg.DATASET.TARGET_DOMAINS, "all" + ) + + super().__init__(train_x=train, val=val, test=test) + + @staticmethod + def read_data(dataset_dir, input_domains, split): + + def _load_data_from_directory(directory): + folders = listdir_nohidden(directory) + folders.sort() + items_ = [] + + for label, folder in enumerate(folders): + impaths = glob.glob(osp.join(directory, folder, "*.jpg")) + + for impath in impaths: + items_.append((impath, label)) + + return items_ + + items = [] + + for domain, dname in enumerate(input_domains): + if split == "all": + train_dir = osp.join(dataset_dir, dname, "train") + impath_label_list = _load_data_from_directory(train_dir) + val_dir = osp.join(dataset_dir, dname, "val") + impath_label_list += _load_data_from_directory(val_dir) + else: + split_dir = osp.join(dataset_dir, dname, split) + impath_label_list = _load_data_from_directory(split_dir) + + for impath, label in impath_label_list: + class_name = impath.split("/")[-2].lower() + item = Datum( + impath=impath, + label=label, + domain=domain, + classname=class_name + ) + items.append(item) + + return items diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/office_home_dg.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/office_home_dg.py new file mode 100644 index 00000000..ef08754b --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/office_home_dg.py @@ -0,0 +1,49 @@ +import os.path as osp + +from ..build import DATASET_REGISTRY +from .digits_dg import DigitsDG +from ..base_dataset import DatasetBase + + +@DATASET_REGISTRY.register() +class OfficeHomeDG(DatasetBase): + """Office-Home. + + Statistics: + - Around 15,500 images. + - 65 classes related to office and home objects. + - 4 domains: Art, Clipart, Product, Real World. + - URL: http://hemanthdv.org/OfficeHome-Dataset/. + + Reference: + - Venkateswara et al. Deep Hashing Network for Unsupervised + Domain Adaptation. CVPR 2017. + """ + + dataset_dir = "office_home_dg" + domains = ["art", "clipart", "product", "real_world"] + data_url = "https://drive.google.com/uc?id=1gkbf_KaxoBws-GWT3XIPZ7BnkqbAxIFa" + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + + if not osp.exists(self.dataset_dir): + dst = osp.join(root, "office_home_dg.zip") + self.download_data(self.data_url, dst, from_gdrive=True) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train = DigitsDG.read_data( + self.dataset_dir, cfg.DATASET.SOURCE_DOMAINS, "train" + ) + val = DigitsDG.read_data( + self.dataset_dir, cfg.DATASET.SOURCE_DOMAINS, "val" + ) + test = DigitsDG.read_data( + self.dataset_dir, cfg.DATASET.TARGET_DOMAINS, "all" + ) + + super().__init__(train_x=train, val=val, test=test) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/pacs.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/pacs.py new file mode 100644 index 00000000..e0159d49 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/pacs.py @@ -0,0 +1,94 @@ +import os.path as osp + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class PACS(DatasetBase): + """PACS. + + Statistics: + - 4 domains: Photo (1,670), Art (2,048), Cartoon + (2,344), Sketch (3,929). + - 7 categories: dog, elephant, giraffe, guitar, horse, + house and person. + + Reference: + - Li et al. Deeper, broader and artier domain generalization. + ICCV 2017. + """ + + dataset_dir = "pacs" + domains = ["art_painting", "cartoon", "photo", "sketch"] + data_url = "https://drive.google.com/uc?id=1m4X4fROCCXMO0lRLrr6Zz9Vb3974NWhE" + # the following images contain errors and should be ignored + _error_paths = ["sketch/dog/n02103406_4068-1.png"] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + self.image_dir = osp.join(self.dataset_dir, "images") + self.split_dir = osp.join(self.dataset_dir, "splits") + + if not osp.exists(self.dataset_dir): + dst = osp.join(root, "pacs.zip") + self.download_data(self.data_url, dst, from_gdrive=True) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train = self._read_data(cfg.DATASET.SOURCE_DOMAINS, "train") + val = self._read_data(cfg.DATASET.SOURCE_DOMAINS, "crossval") + test = self._read_data(cfg.DATASET.TARGET_DOMAINS, "all") + + super().__init__(train_x=train, val=val, test=test) + + def _read_data(self, input_domains, split): + items = [] + + for domain, dname in enumerate(input_domains): + if split == "all": + file_train = osp.join( + self.split_dir, dname + "_train_kfold.txt" + ) + impath_label_list = self._read_split_pacs(file_train) + file_val = osp.join( + self.split_dir, dname + "_crossval_kfold.txt" + ) + impath_label_list += self._read_split_pacs(file_val) + else: + file = osp.join( + self.split_dir, dname + "_" + split + "_kfold.txt" + ) + impath_label_list = self._read_split_pacs(file) + + for impath, label in impath_label_list: + classname = impath.split("/")[-2] + item = Datum( + impath=impath, + label=label, + domain=domain, + classname=classname + ) + items.append(item) + + return items + + def _read_split_pacs(self, split_file): + items = [] + + with open(split_file, "r") as f: + lines = f.readlines() + + for line in lines: + line = line.strip() + impath, label = line.split(" ") + if impath in self._error_paths: + continue + impath = osp.join(self.image_dir, impath) + label = int(label) - 1 + items.append((impath, label)) + + return items diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/vlcs.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/vlcs.py new file mode 100644 index 00000000..77218e2f --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/vlcs.py @@ -0,0 +1,60 @@ +import glob +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class VLCS(DatasetBase): + """VLCS. + + Statistics: + - 4 domains: CALTECH, LABELME, PASCAL, SUN + - 5 categories: bird, car, chair, dog, and person. + + Reference: + - Torralba and Efros. Unbiased look at dataset bias. CVPR 2011. + """ + + dataset_dir = "VLCS" + domains = ["caltech", "labelme", "pascal", "sun"] + data_url = "https://drive.google.com/uc?id=1r0WL5DDqKfSPp9E3tRENwHaXNs1olLZd" + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + + if not osp.exists(self.dataset_dir): + dst = osp.join(root, "vlcs.zip") + self.download_data(self.data_url, dst, from_gdrive=True) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train = self._read_data(cfg.DATASET.SOURCE_DOMAINS, "train") + val = self._read_data(cfg.DATASET.SOURCE_DOMAINS, "crossval") + test = self._read_data(cfg.DATASET.TARGET_DOMAINS, "test") + + super().__init__(train_x=train, val=val, test=test) + + def _read_data(self, input_domains, split): + items = [] + + for domain, dname in enumerate(input_domains): + dname = dname.upper() + path = osp.join(self.dataset_dir, dname, split) + folders = listdir_nohidden(path) + folders.sort() + + for label, folder in enumerate(folders): + impaths = glob.glob(osp.join(path, folder, "*.jpg")) + + for impath in impaths: + item = Datum(impath=impath, label=label, domain=domain) + items.append(item) + + return items diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/wilds/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/wilds/__init__.py new file mode 100644 index 00000000..2898f7cc --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/wilds/__init__.py @@ -0,0 +1,3 @@ +from .fmow import FMoW +from .iwildcam import IWildCam +from .camelyon17 import Camelyon17 diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/wilds/camelyon17.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/wilds/camelyon17.py new file mode 100644 index 00000000..fade5ebc --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/wilds/camelyon17.py @@ -0,0 +1,24 @@ +from dassl.data.datasets import DATASET_REGISTRY + +from .wilds_base import WILDSBase + + +@DATASET_REGISTRY.register() +class Camelyon17(WILDSBase): + """Tumor tissue recognition. + + 2 classes (whether a given region of tissue contains tumor tissue). + + Reference: + - Bandi et al. "From detection of individual metastases to classification of lymph + node status at the patient level: the CAMELYON17 challenge." TMI 2021. + - Koh et al. "Wilds: A benchmark of in-the-wild distribution shifts." ICML 2021. + """ + + dataset_dir = "camelyon17_v1.0" + + def __init__(self, cfg): + super().__init__(cfg) + + def load_classnames(self): + return {0: "healthy tissue", 1: "tumor tissue"} diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/wilds/fmow.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/wilds/fmow.py new file mode 100644 index 00000000..d7398e05 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/wilds/fmow.py @@ -0,0 +1,57 @@ +import os.path as osp + +from dassl.data.datasets import DATASET_REGISTRY + +from .wilds_base import WILDSBase + +CATEGORIES = [ + "airport", "airport_hangar", "airport_terminal", "amusement_park", + "aquaculture", "archaeological_site", "barn", "border_checkpoint", + "burial_site", "car_dealership", "construction_site", "crop_field", "dam", + "debris_or_rubble", "educational_institution", "electric_substation", + "factory_or_powerplant", "fire_station", "flooded_road", "fountain", + "gas_station", "golf_course", "ground_transportation_station", "helipad", + "hospital", "impoverished_settlement", "interchange", "lake_or_pond", + "lighthouse", "military_facility", "multi-unit_residential", + "nuclear_powerplant", "office_building", "oil_or_gas_facility", "park", + "parking_lot_or_garage", "place_of_worship", "police_station", "port", + "prison", "race_track", "railway_bridge", "recreational_facility", + "road_bridge", "runway", "shipyard", "shopping_mall", + "single-unit_residential", "smokestack", "solar_farm", "space_facility", + "stadium", "storage_tank", "surface_mine", "swimming_pool", "toll_booth", + "tower", "tunnel_opening", "waste_disposal", "water_treatment_facility", + "wind_farm", "zoo" +] + + +@DATASET_REGISTRY.register() +class FMoW(WILDSBase): + """Satellite imagery classification. + + 62 classes (building or land use categories). + + Reference: + - Christie et al. "Functional Map of the World." CVPR 2018. + - Koh et al. "Wilds: A benchmark of in-the-wild distribution shifts." ICML 2021. + """ + + dataset_dir = "fmow_v1.1" + + def __init__(self, cfg): + super().__init__(cfg) + + def get_image_path(self, dataset, idx): + idx = dataset.full_idxs[idx] + image_name = f"rgb_img_{idx}.png" + image_path = osp.join(self.dataset_dir, "images", image_name) + return image_path + + def get_domain(self, dataset, idx): + # number of regions: 5 or 6 + # number of years: 16 + region_id = int(dataset.metadata_array[idx][0]) + year_id = int(dataset.metadata_array[idx][1]) + return region_id*16 + year_id + + def load_classnames(self): + return {i: cat for i, cat in enumerate(CATEGORIES)} diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/wilds/iwildcam.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/wilds/iwildcam.py new file mode 100644 index 00000000..3d1f016c --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/wilds/iwildcam.py @@ -0,0 +1,32 @@ +import os.path as osp +import pandas as pd + +from dassl.data.datasets import DATASET_REGISTRY + +from .wilds_base import WILDSBase + + +@DATASET_REGISTRY.register() +class IWildCam(WILDSBase): + """Animal species recognition. + + 182 classes (species). + + Reference: + - Beery et al. "The iwildcam 2021 competition dataset." arXiv 2021. + - Koh et al. "Wilds: A benchmark of in-the-wild distribution shifts." ICML 2021. + """ + + dataset_dir = "iwildcam_v2.0" + + def __init__(self, cfg): + super().__init__(cfg) + + def get_image_path(self, dataset, idx): + image_name = dataset._input_array[idx] + image_path = osp.join(self.dataset_dir, "train", image_name) + return image_path + + def load_classnames(self): + df = pd.read_csv(osp.join(self.dataset_dir, "categories.csv")) + return dict(df["name"]) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/wilds/wilds_base.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/wilds/wilds_base.py new file mode 100644 index 00000000..33232e1e --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/dg/wilds/wilds_base.py @@ -0,0 +1,110 @@ +import logging # isort:skip +logging.disable(logging.WARNING) # isort:skip + +import pickle +import logging +import os.path as osp +from wilds import get_dataset as wilds_get_dataset + +from dassl.data.datasets import Datum, DatasetBase + + +class WILDSBase(DatasetBase): + + dataset_dir = "" + relabel_domain = True + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + name = self.dataset_dir.split("_")[0] + self.dataset_dir = osp.join(root, self.dataset_dir) + self.preloaded = osp.join(self.dataset_dir, "zhou_preloaded.pkl") + + self.label_to_name = self.load_classnames() + assert isinstance(self.label_to_name, dict) + + if osp.exists(self.preloaded): + with open(self.preloaded, "rb") as file: + dataset = pickle.load(file) + train = dataset["train"] + val = dataset["val"] + test = dataset["test"] + else: + dataset = wilds_get_dataset( + dataset=name, root_dir=root, download=True + ) + subset_train = dataset.get_subset("train") + subset_val = dataset.get_subset("val") + subset_test = dataset.get_subset("test") + + train = self.read_data(subset_train) + val = self.read_data(subset_val) + test = self.read_data(subset_test) + + # Save time for data loading next time + preloaded = {"train": train, "val": val, "test": test} + with open(self.preloaded, "wb") as file: + pickle.dump(preloaded, file, protocol=pickle.HIGHEST_PROTOCOL) + + # Few-shot learning + k = cfg.DATASET.NUM_SHOTS + if k > 0: + groups = self.split_dataset_by_domain(train) + groups = list(groups.values()) + groups = self.generate_fewshot_dataset(*groups, num_shots=k) + train = [] + for group in groups: + train.extend(group) + + super().__init__(train_x=train, val=val, test=test) + + def load_classnames(self): + raise NotImplementedError + + def get_image_path(self, dataset, idx): + image_name = dataset._input_array[idx] + image_path = osp.join(self.dataset_dir, image_name) + return image_path + + def get_label(self, dataset, idx): + return int(dataset.y_array[idx]) + + def get_domain(self, dataset, idx): + return int(dataset.metadata_array[idx][0]) + + def read_data(self, subset): + items = [] + indices = subset.indices + dataset = subset.dataset + + for idx in indices: + image_path = self.get_image_path(dataset, idx) + label = self.get_label(dataset, idx) + domain = self.get_domain(dataset, idx) + classname = self.label_to_name[label] + item = Datum( + impath=image_path, + label=label, + domain=domain, + classname=classname + ) + items.append(item) + + if self.relabel_domain: + domains = set([item.domain for item in items]) + mapping = {domain: i for i, domain in enumerate(domains)} + + items_new = [] + + for item in items: + item_new = Datum( + impath=item.impath, + label=item.label, + domain=mapping[item.domain], + classname=item.classname + ) + items_new.append(item_new) + + return items_new + + return items diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/ssl/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/ssl/__init__.py new file mode 100644 index 00000000..a6607dcc --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/ssl/__init__.py @@ -0,0 +1,3 @@ +from .svhn import SVHN +from .cifar import CIFAR10, CIFAR100 +from .stl10 import STL10 diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/ssl/cifar.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/ssl/cifar.py new file mode 100644 index 00000000..55845279 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/ssl/cifar.py @@ -0,0 +1,108 @@ +import math +import random +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class CIFAR10(DatasetBase): + """CIFAR10 for SSL. + + Reference: + - Krizhevsky. Learning Multiple Layers of Features + from Tiny Images. Tech report. + """ + + dataset_dir = "cifar10" + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + train_dir = osp.join(self.dataset_dir, "train") + test_dir = osp.join(self.dataset_dir, "test") + + assert cfg.DATASET.NUM_LABELED > 0 + + train_x, train_u, val = self._read_data_train( + train_dir, cfg.DATASET.NUM_LABELED, cfg.DATASET.VAL_PERCENT + ) + test = self._read_data_test(test_dir) + + if cfg.DATASET.ALL_AS_UNLABELED: + train_u = train_u + train_x + + if len(val) == 0: + val = None + + super().__init__(train_x=train_x, train_u=train_u, val=val, test=test) + + def _read_data_train(self, data_dir, num_labeled, val_percent): + class_names = listdir_nohidden(data_dir) + class_names.sort() + num_labeled_per_class = num_labeled / len(class_names) + items_x, items_u, items_v = [], [], [] + + for label, class_name in enumerate(class_names): + class_dir = osp.join(data_dir, class_name) + imnames = listdir_nohidden(class_dir) + + # Split into train and val following Oliver et al. 2018 + # Set cfg.DATASET.VAL_PERCENT to 0 to not use val data + num_val = math.floor(len(imnames) * val_percent) + imnames_train = imnames[num_val:] + imnames_val = imnames[:num_val] + + # Note we do shuffle after split + random.shuffle(imnames_train) + + for i, imname in enumerate(imnames_train): + impath = osp.join(class_dir, imname) + item = Datum(impath=impath, label=label) + + if (i + 1) <= num_labeled_per_class: + items_x.append(item) + + else: + items_u.append(item) + + for imname in imnames_val: + impath = osp.join(class_dir, imname) + item = Datum(impath=impath, label=label) + items_v.append(item) + + return items_x, items_u, items_v + + def _read_data_test(self, data_dir): + class_names = listdir_nohidden(data_dir) + class_names.sort() + items = [] + + for label, class_name in enumerate(class_names): + class_dir = osp.join(data_dir, class_name) + imnames = listdir_nohidden(class_dir) + + for imname in imnames: + impath = osp.join(class_dir, imname) + item = Datum(impath=impath, label=label) + items.append(item) + + return items + + +@DATASET_REGISTRY.register() +class CIFAR100(CIFAR10): + """CIFAR100 for SSL. + + Reference: + - Krizhevsky. Learning Multiple Layers of Features + from Tiny Images. Tech report. + """ + + dataset_dir = "cifar100" + + def __init__(self, cfg): + super().__init__(cfg) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/ssl/stl10.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/ssl/stl10.py new file mode 100644 index 00000000..6a1f9f2d --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/ssl/stl10.py @@ -0,0 +1,87 @@ +import numpy as np +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class STL10(DatasetBase): + """STL-10 dataset. + + Description: + - 10 classes: airplane, bird, car, cat, deer, dog, horse, + monkey, ship, truck. + - Images are 96x96 pixels, color. + - 500 training images per class, 800 test images per class. + - 100,000 unlabeled images for unsupervised learning. + + Reference: + - Coates et al. An Analysis of Single Layer Networks in + Unsupervised Feature Learning. AISTATS 2011. + """ + + dataset_dir = "stl10" + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + train_dir = osp.join(self.dataset_dir, "train") + test_dir = osp.join(self.dataset_dir, "test") + unlabeled_dir = osp.join(self.dataset_dir, "unlabeled") + fold_file = osp.join( + self.dataset_dir, "stl10_binary", "fold_indices.txt" + ) + + # Only use the first five splits + assert 0 <= cfg.DATASET.STL10_FOLD <= 4 + + train_x = self._read_data_train( + train_dir, cfg.DATASET.STL10_FOLD, fold_file + ) + train_u = self._read_data_all(unlabeled_dir) + test = self._read_data_all(test_dir) + + if cfg.DATASET.ALL_AS_UNLABELED: + train_u = train_u + train_x + + super().__init__(train_x=train_x, train_u=train_u, test=test) + + def _read_data_train(self, data_dir, fold, fold_file): + imnames = listdir_nohidden(data_dir) + imnames.sort() + items = [] + + list_idx = list(range(len(imnames))) + if fold >= 0: + with open(fold_file, "r") as f: + str_idx = f.read().splitlines()[fold] + list_idx = np.fromstring(str_idx, dtype=np.uint8, sep=" ") + + for i in list_idx: + imname = imnames[i] + impath = osp.join(data_dir, imname) + label = osp.splitext(imname)[0].split("_")[1] + label = int(label) + item = Datum(impath=impath, label=label) + items.append(item) + + return items + + def _read_data_all(self, data_dir): + imnames = listdir_nohidden(data_dir) + items = [] + + for imname in imnames: + impath = osp.join(data_dir, imname) + label = osp.splitext(imname)[0].split("_")[1] + if label == "none": + label = -1 + else: + label = int(label) + item = Datum(impath=impath, label=label) + items.append(item) + + return items diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/ssl/svhn.py b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/ssl/svhn.py new file mode 100644 index 00000000..15e0de56 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/datasets/ssl/svhn.py @@ -0,0 +1,17 @@ +from .cifar import CIFAR10 +from ..build import DATASET_REGISTRY + + +@DATASET_REGISTRY.register() +class SVHN(CIFAR10): + """SVHN for SSL. + + Reference: + - Netzer et al. Reading Digits in Natural Images with + Unsupervised Feature Learning. NIPS-W 2011. + """ + + dataset_dir = "svhn" + + def __init__(self, cfg): + super().__init__(cfg) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/samplers.py b/python/ClipDetection/Dassl.pytorch/dassl/data/samplers.py new file mode 100644 index 00000000..562bfbca --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/samplers.py @@ -0,0 +1,205 @@ +import copy +import numpy as np +import random +from collections import defaultdict +from torch.utils.data.sampler import Sampler, RandomSampler, SequentialSampler + + +class RandomDomainSampler(Sampler): + """Randomly samples N domains each with K images + to form a minibatch of size N*K. + + Args: + data_source (list): list of Datums. + batch_size (int): batch size. + n_domain (int): number of domains to sample in a minibatch. + """ + + def __init__(self, data_source, batch_size, n_domain): + self.data_source = data_source + + # Keep track of image indices for each domain + self.domain_dict = defaultdict(list) + for i, item in enumerate(data_source): + self.domain_dict[item.domain].append(i) + self.domains = list(self.domain_dict.keys()) + + # Make sure each domain has equal number of images + if n_domain is None or n_domain <= 0: + n_domain = len(self.domains) + assert batch_size % n_domain == 0 + self.n_img_per_domain = batch_size // n_domain + + self.batch_size = batch_size + # n_domain denotes number of domains sampled in a minibatch + self.n_domain = n_domain + self.length = len(list(self.__iter__())) + + def __iter__(self): + domain_dict = copy.deepcopy(self.domain_dict) + final_idxs = [] + stop_sampling = False + + while not stop_sampling: + selected_domains = random.sample(self.domains, self.n_domain) + + for domain in selected_domains: + idxs = domain_dict[domain] + selected_idxs = random.sample(idxs, self.n_img_per_domain) + final_idxs.extend(selected_idxs) + + for idx in selected_idxs: + domain_dict[domain].remove(idx) + + remaining = len(domain_dict[domain]) + if remaining < self.n_img_per_domain: + stop_sampling = True + + return iter(final_idxs) + + def __len__(self): + return self.length + + +class SeqDomainSampler(Sampler): + """Sequential domain sampler, which randomly samples K + images from each domain to form a minibatch. + + Args: + data_source (list): list of Datums. + batch_size (int): batch size. + """ + + def __init__(self, data_source, batch_size): + self.data_source = data_source + + # Keep track of image indices for each domain + self.domain_dict = defaultdict(list) + for i, item in enumerate(data_source): + self.domain_dict[item.domain].append(i) + self.domains = list(self.domain_dict.keys()) + self.domains.sort() + + # Make sure each domain has equal number of images + n_domain = len(self.domains) + assert batch_size % n_domain == 0 + self.n_img_per_domain = batch_size // n_domain + + self.batch_size = batch_size + # n_domain denotes number of domains sampled in a minibatch + self.n_domain = n_domain + self.length = len(list(self.__iter__())) + + def __iter__(self): + domain_dict = copy.deepcopy(self.domain_dict) + final_idxs = [] + stop_sampling = False + + while not stop_sampling: + for domain in self.domains: + idxs = domain_dict[domain] + selected_idxs = random.sample(idxs, self.n_img_per_domain) + final_idxs.extend(selected_idxs) + + for idx in selected_idxs: + domain_dict[domain].remove(idx) + + remaining = len(domain_dict[domain]) + if remaining < self.n_img_per_domain: + stop_sampling = True + + return iter(final_idxs) + + def __len__(self): + return self.length + + +class RandomClassSampler(Sampler): + """Randomly samples N classes each with K instances to + form a minibatch of size N*K. + + Modified from https://github.com/KaiyangZhou/deep-person-reid. + + Args: + data_source (list): list of Datums. + batch_size (int): batch size. + n_ins (int): number of instances per class to sample in a minibatch. + """ + + def __init__(self, data_source, batch_size, n_ins): + if batch_size < n_ins: + raise ValueError( + "batch_size={} must be no less " + "than n_ins={}".format(batch_size, n_ins) + ) + + self.data_source = data_source + self.batch_size = batch_size + self.n_ins = n_ins + self.ncls_per_batch = self.batch_size // self.n_ins + self.index_dic = defaultdict(list) + for index, item in enumerate(data_source): + self.index_dic[item.label].append(index) + self.labels = list(self.index_dic.keys()) + assert len(self.labels) >= self.ncls_per_batch + + # estimate number of images in an epoch + self.length = len(list(self.__iter__())) + + def __iter__(self): + batch_idxs_dict = defaultdict(list) + + for label in self.labels: + idxs = copy.deepcopy(self.index_dic[label]) + if len(idxs) < self.n_ins: + idxs = np.random.choice(idxs, size=self.n_ins, replace=True) + random.shuffle(idxs) + batch_idxs = [] + for idx in idxs: + batch_idxs.append(idx) + if len(batch_idxs) == self.n_ins: + batch_idxs_dict[label].append(batch_idxs) + batch_idxs = [] + + avai_labels = copy.deepcopy(self.labels) + final_idxs = [] + + while len(avai_labels) >= self.ncls_per_batch: + selected_labels = random.sample(avai_labels, self.ncls_per_batch) + for label in selected_labels: + batch_idxs = batch_idxs_dict[label].pop(0) + final_idxs.extend(batch_idxs) + if len(batch_idxs_dict[label]) == 0: + avai_labels.remove(label) + + return iter(final_idxs) + + def __len__(self): + return self.length + + +def build_sampler( + sampler_type, + cfg=None, + data_source=None, + batch_size=32, + n_domain=0, + n_ins=16 +): + if sampler_type == "RandomSampler": + return RandomSampler(data_source) + + elif sampler_type == "SequentialSampler": + return SequentialSampler(data_source) + + elif sampler_type == "RandomDomainSampler": + return RandomDomainSampler(data_source, batch_size, n_domain) + + elif sampler_type == "SeqDomainSampler": + return SeqDomainSampler(data_source, batch_size) + + elif sampler_type == "RandomClassSampler": + return RandomClassSampler(data_source, batch_size, n_ins) + + else: + raise ValueError("Unknown sampler type: {}".format(sampler_type)) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/transforms/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/data/transforms/__init__.py new file mode 100644 index 00000000..02c05d67 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/transforms/__init__.py @@ -0,0 +1 @@ +from .transforms import INTERPOLATION_MODES, build_transform diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/transforms/autoaugment.py b/python/ClipDetection/Dassl.pytorch/dassl/data/transforms/autoaugment.py new file mode 100644 index 00000000..2e14fcee --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/transforms/autoaugment.py @@ -0,0 +1,273 @@ +""" +Source: https://github.com/DeepVoltaire/AutoAugment +""" +import numpy as np +import random +from PIL import Image, ImageOps, ImageEnhance + + +class ImageNetPolicy: + """Randomly choose one of the best 24 Sub-policies on ImageNet. + + Example: + >>> policy = ImageNetPolicy() + >>> transformed = policy(image) + + Example as a PyTorch Transform: + >>> transform=transforms.Compose([ + >>> transforms.Resize(256), + >>> ImageNetPolicy(), + >>> transforms.ToTensor()]) + """ + + def __init__(self, fillcolor=(128, 128, 128)): + self.policies = [ + SubPolicy(0.4, "posterize", 8, 0.6, "rotate", 9, fillcolor), + SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor), + SubPolicy(0.8, "equalize", 8, 0.6, "equalize", 3, fillcolor), + SubPolicy(0.6, "posterize", 7, 0.6, "posterize", 6, fillcolor), + SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor), + SubPolicy(0.4, "equalize", 4, 0.8, "rotate", 8, fillcolor), + SubPolicy(0.6, "solarize", 3, 0.6, "equalize", 7, fillcolor), + SubPolicy(0.8, "posterize", 5, 1.0, "equalize", 2, fillcolor), + SubPolicy(0.2, "rotate", 3, 0.6, "solarize", 8, fillcolor), + SubPolicy(0.6, "equalize", 8, 0.4, "posterize", 6, fillcolor), + SubPolicy(0.8, "rotate", 8, 0.4, "color", 0, fillcolor), + SubPolicy(0.4, "rotate", 9, 0.6, "equalize", 2, fillcolor), + SubPolicy(0.0, "equalize", 7, 0.8, "equalize", 8, fillcolor), + SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor), + SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor), + SubPolicy(0.8, "rotate", 8, 1.0, "color", 2, fillcolor), + SubPolicy(0.8, "color", 8, 0.8, "solarize", 7, fillcolor), + SubPolicy(0.4, "sharpness", 7, 0.6, "invert", 8, fillcolor), + SubPolicy(0.6, "shearX", 5, 1.0, "equalize", 9, fillcolor), + SubPolicy(0.4, "color", 0, 0.6, "equalize", 3, fillcolor), + SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor), + SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor), + SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor), + SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor), + SubPolicy(0.8, "equalize", 8, 0.6, "equalize", 3, fillcolor), + ] + + def __call__(self, img): + policy_idx = random.randint(0, len(self.policies) - 1) + return self.policies[policy_idx](img) + + def __repr__(self): + return "AutoAugment ImageNet Policy" + + +class CIFAR10Policy: + """Randomly choose one of the best 25 Sub-policies on CIFAR10. + + Example: + >>> policy = CIFAR10Policy() + >>> transformed = policy(image) + + Example as a PyTorch Transform: + >>> transform=transforms.Compose([ + >>> transforms.Resize(256), + >>> CIFAR10Policy(), + >>> transforms.ToTensor()]) + """ + + def __init__(self, fillcolor=(128, 128, 128)): + self.policies = [ + SubPolicy(0.1, "invert", 7, 0.2, "contrast", 6, fillcolor), + SubPolicy(0.7, "rotate", 2, 0.3, "translateX", 9, fillcolor), + SubPolicy(0.8, "sharpness", 1, 0.9, "sharpness", 3, fillcolor), + SubPolicy(0.5, "shearY", 8, 0.7, "translateY", 9, fillcolor), + SubPolicy(0.5, "autocontrast", 8, 0.9, "equalize", 2, fillcolor), + SubPolicy(0.2, "shearY", 7, 0.3, "posterize", 7, fillcolor), + SubPolicy(0.4, "color", 3, 0.6, "brightness", 7, fillcolor), + SubPolicy(0.3, "sharpness", 9, 0.7, "brightness", 9, fillcolor), + SubPolicy(0.6, "equalize", 5, 0.5, "equalize", 1, fillcolor), + SubPolicy(0.6, "contrast", 7, 0.6, "sharpness", 5, fillcolor), + SubPolicy(0.7, "color", 7, 0.5, "translateX", 8, fillcolor), + SubPolicy(0.3, "equalize", 7, 0.4, "autocontrast", 8, fillcolor), + SubPolicy(0.4, "translateY", 3, 0.2, "sharpness", 6, fillcolor), + SubPolicy(0.9, "brightness", 6, 0.2, "color", 8, fillcolor), + SubPolicy(0.5, "solarize", 2, 0.0, "invert", 3, fillcolor), + SubPolicy(0.2, "equalize", 0, 0.6, "autocontrast", 0, fillcolor), + SubPolicy(0.2, "equalize", 8, 0.6, "equalize", 4, fillcolor), + SubPolicy(0.9, "color", 9, 0.6, "equalize", 6, fillcolor), + SubPolicy(0.8, "autocontrast", 4, 0.2, "solarize", 8, fillcolor), + SubPolicy(0.1, "brightness", 3, 0.7, "color", 0, fillcolor), + SubPolicy(0.4, "solarize", 5, 0.9, "autocontrast", 3, fillcolor), + SubPolicy(0.9, "translateY", 9, 0.7, "translateY", 9, fillcolor), + SubPolicy(0.9, "autocontrast", 2, 0.8, "solarize", 3, fillcolor), + SubPolicy(0.8, "equalize", 8, 0.1, "invert", 3, fillcolor), + SubPolicy(0.7, "translateY", 9, 0.9, "autocontrast", 1, fillcolor), + ] + + def __call__(self, img): + policy_idx = random.randint(0, len(self.policies) - 1) + return self.policies[policy_idx](img) + + def __repr__(self): + return "AutoAugment CIFAR10 Policy" + + +class SVHNPolicy: + """Randomly choose one of the best 25 Sub-policies on SVHN. + + Example: + >>> policy = SVHNPolicy() + >>> transformed = policy(image) + + Example as a PyTorch Transform: + >>> transform=transforms.Compose([ + >>> transforms.Resize(256), + >>> SVHNPolicy(), + >>> transforms.ToTensor()]) + """ + + def __init__(self, fillcolor=(128, 128, 128)): + self.policies = [ + SubPolicy(0.9, "shearX", 4, 0.2, "invert", 3, fillcolor), + SubPolicy(0.9, "shearY", 8, 0.7, "invert", 5, fillcolor), + SubPolicy(0.6, "equalize", 5, 0.6, "solarize", 6, fillcolor), + SubPolicy(0.9, "invert", 3, 0.6, "equalize", 3, fillcolor), + SubPolicy(0.6, "equalize", 1, 0.9, "rotate", 3, fillcolor), + SubPolicy(0.9, "shearX", 4, 0.8, "autocontrast", 3, fillcolor), + SubPolicy(0.9, "shearY", 8, 0.4, "invert", 5, fillcolor), + SubPolicy(0.9, "shearY", 5, 0.2, "solarize", 6, fillcolor), + SubPolicy(0.9, "invert", 6, 0.8, "autocontrast", 1, fillcolor), + SubPolicy(0.6, "equalize", 3, 0.9, "rotate", 3, fillcolor), + SubPolicy(0.9, "shearX", 4, 0.3, "solarize", 3, fillcolor), + SubPolicy(0.8, "shearY", 8, 0.7, "invert", 4, fillcolor), + SubPolicy(0.9, "equalize", 5, 0.6, "translateY", 6, fillcolor), + SubPolicy(0.9, "invert", 4, 0.6, "equalize", 7, fillcolor), + SubPolicy(0.3, "contrast", 3, 0.8, "rotate", 4, fillcolor), + SubPolicy(0.8, "invert", 5, 0.0, "translateY", 2, fillcolor), + SubPolicy(0.7, "shearY", 6, 0.4, "solarize", 8, fillcolor), + SubPolicy(0.6, "invert", 4, 0.8, "rotate", 4, fillcolor), + SubPolicy(0.3, "shearY", 7, 0.9, "translateX", 3, fillcolor), + SubPolicy(0.1, "shearX", 6, 0.6, "invert", 5, fillcolor), + SubPolicy(0.7, "solarize", 2, 0.6, "translateY", 7, fillcolor), + SubPolicy(0.8, "shearY", 4, 0.8, "invert", 8, fillcolor), + SubPolicy(0.7, "shearX", 9, 0.8, "translateY", 3, fillcolor), + SubPolicy(0.8, "shearY", 5, 0.7, "autocontrast", 3, fillcolor), + SubPolicy(0.7, "shearX", 2, 0.1, "invert", 5, fillcolor), + ] + + def __call__(self, img): + policy_idx = random.randint(0, len(self.policies) - 1) + return self.policies[policy_idx](img) + + def __repr__(self): + return "AutoAugment SVHN Policy" + + +class SubPolicy(object): + + def __init__( + self, + p1, + operation1, + magnitude_idx1, + p2, + operation2, + magnitude_idx2, + fillcolor=(128, 128, 128), + ): + ranges = { + "shearX": np.linspace(0, 0.3, 10), + "shearY": np.linspace(0, 0.3, 10), + "translateX": np.linspace(0, 150 / 331, 10), + "translateY": np.linspace(0, 150 / 331, 10), + "rotate": np.linspace(0, 30, 10), + "color": np.linspace(0.0, 0.9, 10), + "posterize": np.round(np.linspace(8, 4, 10), 0).astype(np.int), + "solarize": np.linspace(256, 0, 10), + "contrast": np.linspace(0.0, 0.9, 10), + "sharpness": np.linspace(0.0, 0.9, 10), + "brightness": np.linspace(0.0, 0.9, 10), + "autocontrast": [0] * 10, + "equalize": [0] * 10, + "invert": [0] * 10, + } + + # from https://stackoverflow.com/questions/5252170/specify-image-filling-color-when-rotating-in-python-with-pil-and-setting-expand + def rotate_with_fill(img, magnitude): + rot = img.convert("RGBA").rotate(magnitude) + return Image.composite( + rot, Image.new("RGBA", rot.size, (128, ) * 4), rot + ).convert(img.mode) + + func = { + "shearX": + lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + (1, magnitude * random.choice([-1, 1]), 0, 0, 1, 0), + Image.BICUBIC, + fillcolor=fillcolor, + ), + "shearY": + lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + (1, 0, 0, magnitude * random.choice([-1, 1]), 1, 0), + Image.BICUBIC, + fillcolor=fillcolor, + ), + "translateX": + lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + ( + 1, 0, magnitude * img.size[0] * random.choice([-1, 1]), 0, + 1, 0 + ), + fillcolor=fillcolor, + ), + "translateY": + lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + ( + 1, 0, 0, 0, 1, magnitude * img.size[1] * random. + choice([-1, 1]) + ), + fillcolor=fillcolor, + ), + "rotate": + lambda img, magnitude: rotate_with_fill(img, magnitude), + "color": + lambda img, magnitude: ImageEnhance.Color(img). + enhance(1 + magnitude * random.choice([-1, 1])), + "posterize": + lambda img, magnitude: ImageOps.posterize(img, magnitude), + "solarize": + lambda img, magnitude: ImageOps.solarize(img, magnitude), + "contrast": + lambda img, magnitude: ImageEnhance.Contrast(img). + enhance(1 + magnitude * random.choice([-1, 1])), + "sharpness": + lambda img, magnitude: ImageEnhance.Sharpness(img). + enhance(1 + magnitude * random.choice([-1, 1])), + "brightness": + lambda img, magnitude: ImageEnhance.Brightness(img). + enhance(1 + magnitude * random.choice([-1, 1])), + "autocontrast": + lambda img, magnitude: ImageOps.autocontrast(img), + "equalize": + lambda img, magnitude: ImageOps.equalize(img), + "invert": + lambda img, magnitude: ImageOps.invert(img), + } + + self.p1 = p1 + self.operation1 = func[operation1] + self.magnitude1 = ranges[operation1][magnitude_idx1] + self.p2 = p2 + self.operation2 = func[operation2] + self.magnitude2 = ranges[operation2][magnitude_idx2] + + def __call__(self, img): + if random.random() < self.p1: + img = self.operation1(img, self.magnitude1) + if random.random() < self.p2: + img = self.operation2(img, self.magnitude2) + return img diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/transforms/randaugment.py b/python/ClipDetection/Dassl.pytorch/dassl/data/transforms/randaugment.py new file mode 100644 index 00000000..5c39ff3e --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/transforms/randaugment.py @@ -0,0 +1,363 @@ +""" +Credit to +1) https://github.com/ildoonet/pytorch-randaugment +2) https://github.com/kakaobrain/fast-autoaugment +""" +import numpy as np +import random +import PIL +import torch +import PIL.ImageOps +import PIL.ImageDraw +import PIL.ImageEnhance +from PIL import Image + + +def ShearX(img, v): + assert -0.3 <= v <= 0.3 + if random.random() > 0.5: + v = -v + return img.transform(img.size, PIL.Image.AFFINE, (1, v, 0, 0, 1, 0)) + + +def ShearY(img, v): + assert -0.3 <= v <= 0.3 + if random.random() > 0.5: + v = -v + return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, v, 1, 0)) + + +def TranslateX(img, v): + # [-150, 150] => percentage: [-0.45, 0.45] + assert -0.45 <= v <= 0.45 + if random.random() > 0.5: + v = -v + v = v * img.size[0] + return img.transform(img.size, PIL.Image.AFFINE, (1, 0, v, 0, 1, 0)) + + +def TranslateXabs(img, v): + # [-150, 150] => percentage: [-0.45, 0.45] + assert 0 <= v + if random.random() > 0.5: + v = -v + return img.transform(img.size, PIL.Image.AFFINE, (1, 0, v, 0, 1, 0)) + + +def TranslateY(img, v): + # [-150, 150] => percentage: [-0.45, 0.45] + assert -0.45 <= v <= 0.45 + if random.random() > 0.5: + v = -v + v = v * img.size[1] + return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, 0, 1, v)) + + +def TranslateYabs(img, v): + # [-150, 150] => percentage: [-0.45, 0.45] + assert 0 <= v + if random.random() > 0.5: + v = -v + return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, 0, 1, v)) + + +def Rotate(img, v): + assert -30 <= v <= 30 + if random.random() > 0.5: + v = -v + return img.rotate(v) + + +def AutoContrast(img, _): + return PIL.ImageOps.autocontrast(img) + + +def Invert(img, _): + return PIL.ImageOps.invert(img) + + +def Equalize(img, _): + return PIL.ImageOps.equalize(img) + + +def Flip(img, _): + return PIL.ImageOps.mirror(img) + + +def Solarize(img, v): + assert 0 <= v <= 256 + return PIL.ImageOps.solarize(img, v) + + +def SolarizeAdd(img, addition=0, threshold=128): + img_np = np.array(img).astype(np.int) + img_np = img_np + addition + img_np = np.clip(img_np, 0, 255) + img_np = img_np.astype(np.uint8) + img = Image.fromarray(img_np) + return PIL.ImageOps.solarize(img, threshold) + + +def Posterize(img, v): + assert 4 <= v <= 8 + v = int(v) + return PIL.ImageOps.posterize(img, v) + + +def Contrast(img, v): + assert 0.0 <= v <= 2.0 + return PIL.ImageEnhance.Contrast(img).enhance(v) + + +def Color(img, v): + assert 0.0 <= v <= 2.0 + return PIL.ImageEnhance.Color(img).enhance(v) + + +def Brightness(img, v): + assert 0.0 <= v <= 2.0 + return PIL.ImageEnhance.Brightness(img).enhance(v) + + +def Sharpness(img, v): + assert 0.0 <= v <= 2.0 + return PIL.ImageEnhance.Sharpness(img).enhance(v) + + +def Cutout(img, v): + # [0, 60] => percentage: [0, 0.2] + assert 0.0 <= v <= 0.2 + if v <= 0.0: + return img + + v = v * img.size[0] + return CutoutAbs(img, v) + + +def CutoutAbs(img, v): + # [0, 60] => percentage: [0, 0.2] + # assert 0 <= v <= 20 + if v < 0: + return img + w, h = img.size + x0 = np.random.uniform(w) + y0 = np.random.uniform(h) + + x0 = int(max(0, x0 - v/2.0)) + y0 = int(max(0, y0 - v/2.0)) + x1 = min(w, x0 + v) + y1 = min(h, y0 + v) + + xy = (x0, y0, x1, y1) + color = (125, 123, 114) + # color = (0, 0, 0) + img = img.copy() + PIL.ImageDraw.Draw(img).rectangle(xy, color) + return img + + +def SamplePairing(imgs): + # [0, 0.4] + def f(img1, v): + i = np.random.choice(len(imgs)) + img2 = PIL.Image.fromarray(imgs[i]) + return PIL.Image.blend(img1, img2, v) + + return f + + +def Identity(img, v): + return img + + +class Lighting: + """Lighting noise (AlexNet - style PCA - based noise).""" + + def __init__(self, alphastd, eigval, eigvec): + self.alphastd = alphastd + self.eigval = torch.Tensor(eigval) + self.eigvec = torch.Tensor(eigvec) + + def __call__(self, img): + if self.alphastd == 0: + return img + + alpha = img.new().resize_(3).normal_(0, self.alphastd) + rgb = ( + self.eigvec.type_as(img).clone().mul( + alpha.view(1, 3).expand(3, 3) + ).mul(self.eigval.view(1, 3).expand(3, 3)).sum(1).squeeze() + ) + + return img.add(rgb.view(3, 1, 1).expand_as(img)) + + +class CutoutDefault: + """ + Reference : https://github.com/quark0/darts/blob/master/cnn/utils.py + """ + + def __init__(self, length): + self.length = length + + def __call__(self, img): + h, w = img.size(1), img.size(2) + mask = np.ones((h, w), np.float32) + y = np.random.randint(h) + x = np.random.randint(w) + + y1 = np.clip(y - self.length // 2, 0, h) + y2 = np.clip(y + self.length // 2, 0, h) + x1 = np.clip(x - self.length // 2, 0, w) + x2 = np.clip(x + self.length // 2, 0, w) + + mask[y1:y2, x1:x2] = 0.0 + mask = torch.from_numpy(mask) + mask = mask.expand_as(img) + img *= mask + return img + + +def randaugment_list(): + # 16 oeprations and their ranges + # https://github.com/google-research/uda/blob/master/image/randaugment/policies.py#L57 + # augs = [ + # (Identity, 0., 1.0), + # (ShearX, 0., 0.3), # 0 + # (ShearY, 0., 0.3), # 1 + # (TranslateX, 0., 0.33), # 2 + # (TranslateY, 0., 0.33), # 3 + # (Rotate, 0, 30), # 4 + # (AutoContrast, 0, 1), # 5 + # (Invert, 0, 1), # 6 + # (Equalize, 0, 1), # 7 + # (Solarize, 0, 110), # 8 + # (Posterize, 4, 8), # 9 + # # (Contrast, 0.1, 1.9), # 10 + # (Color, 0.1, 1.9), # 11 + # (Brightness, 0.1, 1.9), # 12 + # (Sharpness, 0.1, 1.9), # 13 + # # (Cutout, 0, 0.2), # 14 + # # (SamplePairing(imgs), 0, 0.4) # 15 + # ] + + # https://github.com/tensorflow/tpu/blob/8462d083dd89489a79e3200bcc8d4063bf362186/models/official/efficientnet/autoaugment.py#L505 + augs = [ + (AutoContrast, 0, 1), + (Equalize, 0, 1), + (Invert, 0, 1), + (Rotate, 0, 30), + (Posterize, 4, 8), + (Solarize, 0, 256), + (SolarizeAdd, 0, 110), + (Color, 0.1, 1.9), + (Contrast, 0.1, 1.9), + (Brightness, 0.1, 1.9), + (Sharpness, 0.1, 1.9), + (ShearX, 0.0, 0.3), + (ShearY, 0.0, 0.3), + (CutoutAbs, 0, 40), + (TranslateXabs, 0.0, 100), + (TranslateYabs, 0.0, 100), + ] + + return augs + + +def randaugment_list2(): + augs = [ + (AutoContrast, 0, 1), + (Brightness, 0.1, 1.9), + (Color, 0.1, 1.9), + (Contrast, 0.1, 1.9), + (Equalize, 0, 1), + (Identity, 0, 1), + (Invert, 0, 1), + (Posterize, 4, 8), + (Rotate, -30, 30), + (Sharpness, 0.1, 1.9), + (ShearX, -0.3, 0.3), + (ShearY, -0.3, 0.3), + (Solarize, 0, 256), + (TranslateX, -0.3, 0.3), + (TranslateY, -0.3, 0.3), + ] + + return augs + + +def fixmatch_list(): + # https://arxiv.org/abs/2001.07685 + augs = [ + (AutoContrast, 0, 1), + (Brightness, 0.05, 0.95), + (Color, 0.05, 0.95), + (Contrast, 0.05, 0.95), + (Equalize, 0, 1), + (Identity, 0, 1), + (Posterize, 4, 8), + (Rotate, -30, 30), + (Sharpness, 0.05, 0.95), + (ShearX, -0.3, 0.3), + (ShearY, -0.3, 0.3), + (Solarize, 0, 256), + (TranslateX, -0.3, 0.3), + (TranslateY, -0.3, 0.3), + ] + + return augs + + +class RandAugment: + + def __init__(self, n=2, m=10): + assert 0 <= m <= 30 + self.n = n + self.m = m + self.augment_list = randaugment_list() + + def __call__(self, img): + ops = random.choices(self.augment_list, k=self.n) + + for op, minval, maxval in ops: + val = (self.m / 30) * (maxval-minval) + minval + img = op(img, val) + + return img + + +class RandAugment2: + + def __init__(self, n=2, p=0.6): + self.n = n + self.p = p + self.augment_list = randaugment_list2() + + def __call__(self, img): + ops = random.choices(self.augment_list, k=self.n) + + for op, minval, maxval in ops: + if random.random() > self.p: + continue + m = random.random() + val = m * (maxval-minval) + minval + img = op(img, val) + + return img + + +class RandAugmentFixMatch: + + def __init__(self, n=2): + self.n = n + self.augment_list = fixmatch_list() + + def __call__(self, img): + ops = random.choices(self.augment_list, k=self.n) + + for op, minval, maxval in ops: + m = random.random() + val = m * (maxval-minval) + minval + img = op(img, val) + + return img diff --git a/python/ClipDetection/Dassl.pytorch/dassl/data/transforms/transforms.py b/python/ClipDetection/Dassl.pytorch/dassl/data/transforms/transforms.py new file mode 100644 index 00000000..904e97aa --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/data/transforms/transforms.py @@ -0,0 +1,319 @@ +from PIL import Image +import numpy as np +import random +import torch +import torchvision.transforms.functional as F +from torchvision.transforms import ( + Resize, Compose, ToTensor, Normalize, CenterCrop, RandomCrop, ColorJitter, + RandomApply, GaussianBlur, RandomGrayscale, RandomResizedCrop, + RandomHorizontalFlip +) + +from .autoaugment import SVHNPolicy, CIFAR10Policy, ImageNetPolicy +from .randaugment import RandAugment, RandAugment2, RandAugmentFixMatch + +AVAI_CHOICES = [ + "random_flip", + "random_resized_crop", + "normalize", + "instance_norm", + "random_crop", + "random_translation", + "center_crop", # This has become a default operation during testing + "cutout", + "imagenet_policy", + "cifar10_policy", + "svhn_policy", + "randaugment", + "randaugment_fixmatch", + "randaugment2", + "gaussian_noise", + "colorjitter", + "randomgrayscale", + "gaussian_blur", +] + +INTERPOLATION_MODES = { + "bilinear": Image.BILINEAR, + "bicubic": Image.BICUBIC, + "nearest": Image.NEAREST, +} + + +class Random2DTranslation: + """Given an image of (height, width), we resize it to + (height*1.125, width*1.125), and then perform random cropping. + + Args: + height (int): target image height. + width (int): target image width. + p (float, optional): probability that this operation takes place. + Default is 0.5. + interpolation (int, optional): desired interpolation. Default is + ``torchvision.transforms.functional.InterpolationMode.BILINEAR`` + """ + + def __init__( + self, height, width, p=0.5, interpolation=Image.BILINEAR + ): + self.height = height + self.width = width + self.p = p + self.interpolation = interpolation + + def __call__(self, img): + if random.uniform(0, 1) > self.p: + return F.resize( + img=img, + size=[self.height, self.width], + interpolation=self.interpolation + ) + + new_width = int(round(self.width * 1.125)) + new_height = int(round(self.height * 1.125)) + resized_img = F.resize( + img=img, + size=[new_height, new_width], + interpolation=self.interpolation + ) + x_maxrange = new_width - self.width + y_maxrange = new_height - self.height + x1 = int(round(random.uniform(0, x_maxrange))) + y1 = int(round(random.uniform(0, y_maxrange))) + croped_img = F.crop( + img=resized_img, + top=y1, + left=x1, + height=self.height, + width=self.width + ) + + return croped_img + + +class InstanceNormalization: + """Normalize data using per-channel mean and standard deviation. + + Reference: + - Ulyanov et al. Instance normalization: The missing in- gredient + for fast stylization. ArXiv 2016. + - Shu et al. A DIRT-T Approach to Unsupervised Domain Adaptation. + ICLR 2018. + """ + + def __init__(self, eps=1e-8): + self.eps = eps + + def __call__(self, img): + C, H, W = img.shape + img_re = img.reshape(C, H * W) + mean = img_re.mean(1).view(C, 1, 1) + std = img_re.std(1).view(C, 1, 1) + return (img-mean) / (std + self.eps) + + +class Cutout: + """Randomly mask out one or more patches from an image. + + https://github.com/uoguelph-mlrg/Cutout + + Args: + n_holes (int, optional): number of patches to cut out + of each image. Default is 1. + length (int, optinal): length (in pixels) of each square + patch. Default is 16. + """ + + def __init__(self, n_holes=1, length=16): + self.n_holes = n_holes + self.length = length + + def __call__(self, img): + """ + Args: + img (Tensor): tensor image of size (C, H, W). + + Returns: + Tensor: image with n_holes of dimension + length x length cut out of it. + """ + h = img.size(1) + w = img.size(2) + + mask = np.ones((h, w), np.float32) + + for n in range(self.n_holes): + y = np.random.randint(h) + x = np.random.randint(w) + + y1 = np.clip(y - self.length // 2, 0, h) + y2 = np.clip(y + self.length // 2, 0, h) + x1 = np.clip(x - self.length // 2, 0, w) + x2 = np.clip(x + self.length // 2, 0, w) + + mask[y1:y2, x1:x2] = 0.0 + + mask = torch.from_numpy(mask) + mask = mask.expand_as(img) + return img * mask + + +class GaussianNoise: + """Add gaussian noise.""" + + def __init__(self, mean=0, std=0.15, p=0.5): + self.mean = mean + self.std = std + self.p = p + + def __call__(self, img): + if random.uniform(0, 1) > self.p: + return img + noise = torch.randn(img.size()) * self.std + self.mean + return img + noise + + +def build_transform(cfg, is_train=True, choices=None): + """Build transformation function. + + Args: + cfg (CfgNode): config. + is_train (bool, optional): for training (True) or test (False). + Default is True. + choices (list, optional): list of strings which will overwrite + cfg.INPUT.TRANSFORMS if given. Default is None. + """ + if cfg.INPUT.NO_TRANSFORM: + print("Note: no transform is applied!") + return None + + if choices is None: + choices = cfg.INPUT.TRANSFORMS + + for choice in choices: + assert choice in AVAI_CHOICES + + target_size = f"{cfg.INPUT.SIZE[0]}x{cfg.INPUT.SIZE[1]}" + + normalize = Normalize(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD) + + if is_train: + return _build_transform_train(cfg, choices, target_size, normalize) + else: + return _build_transform_test(cfg, choices, target_size, normalize) + + +def _build_transform_train(cfg, choices, target_size, normalize): + tfm_train = [] + + interp_mode = INTERPOLATION_MODES[cfg.INPUT.INTERPOLATION] + input_size = cfg.INPUT.SIZE + + # Make sure the image size matches the target size + conditions = [] + conditions += ["random_crop" not in choices] + conditions += ["random_resized_crop" not in choices] + if all(conditions): + tfm_train += [Resize(input_size, interpolation=interp_mode)] + + if "random_translation" in choices: + tfm_train += [Random2DTranslation(input_size[0], input_size[1])] + + if "random_crop" in choices: + crop_padding = cfg.INPUT.CROP_PADDING + tfm_train += [RandomCrop(input_size, padding=crop_padding)] + + if "random_resized_crop" in choices: + s_ = cfg.INPUT.RRCROP_SCALE + tfm_train += [ + RandomResizedCrop(input_size, scale=s_, interpolation=interp_mode) + ] + + if "random_flip" in choices: + tfm_train += [RandomHorizontalFlip()] + + if "imagenet_policy" in choices: + tfm_train += [ImageNetPolicy()] + + if "cifar10_policy" in choices: + tfm_train += [CIFAR10Policy()] + + if "svhn_policy" in choices: + tfm_train += [SVHNPolicy()] + + if "randaugment" in choices: + n_ = cfg.INPUT.RANDAUGMENT_N + m_ = cfg.INPUT.RANDAUGMENT_M + tfm_train += [RandAugment(n_, m_)] + + if "randaugment_fixmatch" in choices: + n_ = cfg.INPUT.RANDAUGMENT_N + tfm_train += [RandAugmentFixMatch(n_)] + + if "randaugment2" in choices: + n_ = cfg.INPUT.RANDAUGMENT_N + tfm_train += [RandAugment2(n_)] + + if "colorjitter" in choices: + b_ = cfg.INPUT.COLORJITTER_B + c_ = cfg.INPUT.COLORJITTER_C + s_ = cfg.INPUT.COLORJITTER_S + h_ = cfg.INPUT.COLORJITTER_H + tfm_train += [ + ColorJitter( + brightness=b_, + contrast=c_, + saturation=s_, + hue=h_, + ) + ] + + if "randomgrayscale" in choices: + tfm_train += [RandomGrayscale(p=cfg.INPUT.RGS_P)] + + if "gaussian_blur" in choices: + gb_k, gb_p = cfg.INPUT.GB_K, cfg.INPUT.GB_P + tfm_train += [RandomApply([GaussianBlur(gb_k)], p=gb_p)] + + tfm_train += [ToTensor()] + + if "cutout" in choices: + cutout_n = cfg.INPUT.CUTOUT_N + cutout_len = cfg.INPUT.CUTOUT_LEN + tfm_train += [Cutout(cutout_n, cutout_len)] + + if "normalize" in choices: + tfm_train += [normalize] + + if "gaussian_noise" in choices: + tfm_train += [GaussianNoise(cfg.INPUT.GN_MEAN, cfg.INPUT.GN_STD)] + + if "instance_norm" in choices: + tfm_train += [InstanceNormalization()] + + tfm_train = Compose(tfm_train) + + return tfm_train + + +def _build_transform_test(cfg, choices, target_size, normalize): + tfm_test = [] + + interp_mode = INTERPOLATION_MODES[cfg.INPUT.INTERPOLATION] + input_size = cfg.INPUT.SIZE + + tfm_test += [Resize(max(input_size), interpolation=interp_mode)] + + tfm_test += [CenterCrop(input_size)] + + tfm_test += [ToTensor()] + + if "normalize" in choices: + tfm_test += [normalize] + + if "instance_norm" in choices: + tfm_test += [InstanceNormalization()] + + tfm_test = Compose(tfm_test) + + return tfm_test diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/__init__.py new file mode 100644 index 00000000..3cb3fb86 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/__init__.py @@ -0,0 +1,6 @@ +from .build import TRAINER_REGISTRY, build_trainer # isort:skip +from .trainer import TrainerX, TrainerXU, TrainerBase, SimpleTrainer, SimpleNet # isort:skip + +from .da import * +from .dg import * +from .ssl import * diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/build.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/build.py new file mode 100644 index 00000000..47791250 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/build.py @@ -0,0 +1,11 @@ +from dassl.utils import Registry, check_availability + +TRAINER_REGISTRY = Registry("TRAINER") + + +def build_trainer(cfg, classnames=[], device_id=-1): + avai_trainers = TRAINER_REGISTRY.registered_names() + check_availability(cfg.TRAINER.NAME, avai_trainers) + if cfg.VERBOSE: + print("Loading trainer: {}".format(cfg.TRAINER.NAME)) + return TRAINER_REGISTRY.get(cfg.TRAINER.NAME)(cfg, classnames, device_id) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/da/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/__init__.py new file mode 100644 index 00000000..910bf34b --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/__init__.py @@ -0,0 +1,10 @@ +from .se import SE +from .mcd import MCD +from .mme import MME +from .adda import ADDA +from .cdac import CDAC +from .dael import DAEL +from .dann import DANN +from .adabn import AdaBN +from .m3sda import M3SDA +from .source_only import SourceOnly diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/da/adabn.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/adabn.py new file mode 100644 index 00000000..116d8a21 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/adabn.py @@ -0,0 +1,38 @@ +import torch + +from dassl.utils import check_isfile +from dassl.engine import TRAINER_REGISTRY, TrainerXU + + +@TRAINER_REGISTRY.register() +class AdaBN(TrainerXU): + """Adaptive Batch Normalization. + + https://arxiv.org/abs/1603.04779. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.done_reset_bn_stats = False + + def check_cfg(self, cfg): + assert check_isfile( + cfg.MODEL.INIT_WEIGHTS + ), "The weights of source model must be provided" + + def before_epoch(self): + if not self.done_reset_bn_stats: + for m in self.model.modules(): + classname = m.__class__.__name__ + if classname.find("BatchNorm") != -1: + m.reset_running_stats() + + self.done_reset_bn_stats = True + + def forward_backward(self, batch_x, batch_u): + input_u = batch_u["img"].to(self.device) + + with torch.no_grad(): + self.model(input_u) + + return None diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/da/adda.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/adda.py new file mode 100644 index 00000000..a9018e78 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/adda.py @@ -0,0 +1,85 @@ +import copy +import torch +import torch.nn as nn + +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import check_isfile, count_num_param, open_specified_layers +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.modeling import build_head + + +@TRAINER_REGISTRY.register() +class ADDA(TrainerXU): + """Adversarial Discriminative Domain Adaptation. + + https://arxiv.org/abs/1702.05464. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.open_layers = ["backbone"] + if isinstance(self.model.head, nn.Module): + self.open_layers.append("head") + + self.source_model = copy.deepcopy(self.model) + self.source_model.eval() + for param in self.source_model.parameters(): + param.requires_grad_(False) + + self.build_critic() + + self.bce = nn.BCEWithLogitsLoss() + + def check_cfg(self, cfg): + assert check_isfile( + cfg.MODEL.INIT_WEIGHTS + ), "The weights of source model must be provided" + + def build_critic(self): + cfg = self.cfg + + print("Building critic network") + fdim = self.model.fdim + critic_body = build_head( + "mlp", + verbose=cfg.VERBOSE, + in_features=fdim, + hidden_layers=[fdim, fdim // 2], + activation="leaky_relu", + ) + self.critic = nn.Sequential(critic_body, nn.Linear(fdim // 2, 1)) + print("# params: {:,}".format(count_num_param(self.critic))) + self.critic.to(self.device) + self.optim_c = build_optimizer(self.critic, cfg.OPTIM) + self.sched_c = build_lr_scheduler(self.optim_c, cfg.OPTIM) + self.register_model("critic", self.critic, self.optim_c, self.sched_c) + + def forward_backward(self, batch_x, batch_u): + open_specified_layers(self.model, self.open_layers) + input_x, _, input_u = self.parse_batch_train(batch_x, batch_u) + domain_x = torch.ones(input_x.shape[0], 1).to(self.device) + domain_u = torch.zeros(input_u.shape[0], 1).to(self.device) + + _, feat_x = self.source_model(input_x, return_feature=True) + _, feat_u = self.model(input_u, return_feature=True) + + logit_xd = self.critic(feat_x) + logit_ud = self.critic(feat_u.detach()) + + loss_critic = self.bce(logit_xd, domain_x) + loss_critic += self.bce(logit_ud, domain_u) + self.model_backward_and_update(loss_critic, "critic") + + logit_ud = self.critic(feat_u) + loss_model = self.bce(logit_ud, 1 - domain_u) + self.model_backward_and_update(loss_model, "model") + + loss_summary = { + "loss_critic": loss_critic.item(), + "loss_model": loss_model.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/da/cdac.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/cdac.py new file mode 100644 index 00000000..ed846597 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/cdac.py @@ -0,0 +1,275 @@ +import numpy as np +from functools import partial +import torch +import torch.nn as nn +from torch.nn import functional as F +from torch.optim.lr_scheduler import LambdaLR + +from dassl.data import DataManager +from dassl.optim import build_optimizer +from dassl.utils import count_num_param +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy +from dassl.modeling.ops import ReverseGrad +from dassl.engine.trainer import SimpleNet +from dassl.data.transforms.transforms import build_transform + + +def custom_scheduler(iter, max_iter=None, alpha=10, beta=0.75, init_lr=0.001): + """Custom LR Annealing + + https://arxiv.org/pdf/1409.7495.pdf + """ + if max_iter is None: + return init_lr + return (1 + float(iter / max_iter) * alpha)**(-1.0 * beta) + + +class AAC(nn.Module): + + def forward(self, sim_mat, prob_u, prob_us): + + P = prob_u.matmul(prob_us.t()) + + loss = -( + sim_mat * torch.log(P + 1e-7) + + (1.-sim_mat) * torch.log(1. - P + 1e-7) + ) + return loss.mean() + + +class Prototypes(nn.Module): + + def __init__(self, fdim, num_classes, temp=0.05): + super().__init__() + self.prototypes = nn.Linear(fdim, num_classes, bias=False) + self.temp = temp + self.revgrad = ReverseGrad() + + def forward(self, x, reverse=False): + if reverse: + x = self.revgrad(x) + x = F.normalize(x, p=2, dim=1) + out = self.prototypes(x) + out = out / self.temp + return out + + +@TRAINER_REGISTRY.register() +class CDAC(TrainerXU): + """Cross Domain Adaptive Clustering. + + https://arxiv.org/pdf/2104.09415.pdf + """ + + def __init__(self, cfg): + self.rampup_coef = cfg.TRAINER.CDAC.RAMPUP_COEF + self.rampup_iters = cfg.TRAINER.CDAC.RAMPUP_ITRS + self.lr_multi = cfg.TRAINER.CDAC.CLASS_LR_MULTI + self.topk = cfg.TRAINER.CDAC.TOPK_MATCH + self.p_thresh = cfg.TRAINER.CDAC.P_THRESH + self.aac_criterion = AAC() + super().__init__(cfg) + + def check_cfg(self, cfg): + assert len( + cfg.TRAINER.CDAC.STRONG_TRANSFORMS + ) > 0, "Strong augmentations are necessary to run CDAC" + assert cfg.DATALOADER.K_TRANSFORMS == 2, "CDAC needs two strong augmentations of the same image." + + def build_data_loader(self): + + cfg = self.cfg + tfm_train = build_transform(cfg, is_train=True) + custom_tfm_train = [tfm_train] + choices = cfg.TRAINER.CDAC.STRONG_TRANSFORMS + tfm_train_strong = build_transform(cfg, is_train=True, choices=choices) + custom_tfm_train += [tfm_train_strong] + self.dm = DataManager(self.cfg, custom_tfm_train=custom_tfm_train) + self.train_loader_x = self.dm.train_loader_x + self.train_loader_u = self.dm.train_loader_u + self.val_loader = self.dm.val_loader + self.test_loader = self.dm.test_loader + self.num_classes = self.dm.num_classes + self.lab2cname = self.dm.lab2cname + + def build_model(self): + cfg = self.cfg + + # Custom LR Scheduler for CDAC + if self.cfg.TRAIN.COUNT_ITER == "train_x": + self.num_batches = len(self.train_loader_x) + elif self.cfg.TRAIN.COUNT_ITER == "train_u": + self.num_batches = len(self.len_train_loader_u) + elif self.cfg.TRAIN.COUNT_ITER == "smaller_one": + self.num_batches = min( + len(self.train_loader_x), len(self.train_loader_u) + ) + self.max_iter = self.max_epoch * self.num_batches + print("Max Iterations: %d" % self.max_iter) + + print("Building F") + self.F = SimpleNet(cfg, cfg.MODEL, 0) + self.F.to(self.device) + print("# params: {:,}".format(count_num_param(self.F))) + self.optim_F = build_optimizer(self.F, cfg.OPTIM) + custom_lr_F = partial( + custom_scheduler, max_iter=self.max_iter, init_lr=cfg.OPTIM.LR + ) + self.sched_F = LambdaLR(self.optim_F, custom_lr_F) + self.register_model("F", self.F, self.optim_F, self.sched_F) + + print("Building C") + self.C = Prototypes(self.F.fdim, self.num_classes) + self.C.to(self.device) + print("# params: {:,}".format(count_num_param(self.C))) + self.optim_C = build_optimizer(self.C, cfg.OPTIM) + + # Multiply the learning rate of C by lr_multi + for group_param in self.optim_C.param_groups: + group_param['lr'] *= self.lr_multi + custom_lr_C = partial( + custom_scheduler, + max_iter=self.max_iter, + init_lr=cfg.OPTIM.LR * self.lr_multi + ) + self.sched_C = LambdaLR(self.optim_C, custom_lr_C) + self.register_model("C", self.C, self.optim_C, self.sched_C) + + def assess_y_pred_quality(self, y_pred, y_true, mask): + n_masked_correct = (y_pred.eq(y_true).float() * mask).sum() + acc_thre = n_masked_correct / (mask.sum() + 1e-5) + acc_raw = y_pred.eq(y_true).sum() / y_pred.numel() # raw accuracy + keep_rate = mask.sum() / mask.numel() + output = { + "acc_thre": acc_thre, + "acc_raw": acc_raw, + "keep_rate": keep_rate + } + return output + + def forward_backward(self, batch_x, batch_u): + + current_itr = self.epoch * self.num_batches + self.batch_idx + + input_x, label_x, input_u, input_us, input_us2, label_u = self.parse_batch_train( + batch_x, batch_u + ) + + # Paper Reference Eq. 2 - Supervised Loss + + feat_x = self.F(input_x) + logit_x = self.C(feat_x) + loss_x = F.cross_entropy(logit_x, label_x) + + self.model_backward_and_update(loss_x) + + feat_u = self.F(input_u) + feat_us = self.F(input_us) + feat_us2 = self.F(input_us2) + + # Paper Reference Eq.3 - Adversarial Adaptive Loss + logit_u = self.C(feat_u, reverse=True) + logit_us = self.C(feat_us, reverse=True) + prob_u, prob_us = F.softmax(logit_u, dim=1), F.softmax(logit_us, dim=1) + + # Get similarity matrix s_ij + sim_mat = self.get_similarity_matrix(feat_u, self.topk, self.device) + + aac_loss = (-1. * self.aac_criterion(sim_mat, prob_u, prob_us)) + + # Paper Reference Eq. 4 - Pseudo label Loss + logit_u = self.C(feat_u) + logit_us = self.C(feat_us) + logit_us2 = self.C(feat_us2) + prob_u, prob_us, prob_us2 = F.softmax( + logit_u, dim=1 + ), F.softmax( + logit_us, dim=1 + ), F.softmax( + logit_us2, dim=1 + ) + prob_u = prob_u.detach() + max_probs, max_idx = torch.max(prob_u, dim=-1) + mask = max_probs.ge(self.p_thresh).float() + p_u_stats = self.assess_y_pred_quality(max_idx, label_u, mask) + + pl_loss = ( + F.cross_entropy(logit_us2, max_idx, reduction='none') * mask + ).mean() + + # Paper Reference Eq. 8 - Consistency Loss + cons_multi = self.sigmoid_rampup( + current_itr=current_itr, rampup_itr=self.rampup_iters + ) * self.rampup_coef + cons_loss = cons_multi * F.mse_loss(prob_us, prob_us2) + + loss_u = aac_loss + pl_loss + cons_loss + + self.model_backward_and_update(loss_u) + + loss_summary = { + "loss_x": loss_x.item(), + "acc_x": compute_accuracy(logit_x, label_x)[0].item(), + "loss_u": loss_u.item(), + "aac_loss": aac_loss.item(), + "pl_loss": pl_loss.item(), + "cons_loss": cons_loss.item(), + "p_u_pred_acc": p_u_stats["acc_raw"], + "p_u_pred_acc_thre": p_u_stats["acc_thre"], + "p_u_pred_keep": p_u_stats["keep_rate"] + } + + # Update LR after every iteration as mentioned in the paper + + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch_x, batch_u): + + input_x = batch_x["img"][0] + label_x = batch_x["label"] + + input_u = batch_u["img"][0] + input_us = batch_u["img2"][0] + input_us2 = batch_u["img2"][1] + label_u = batch_u["label"] + + input_x = input_x.to(self.device) + label_x = label_x.to(self.device) + + input_u = input_u.to(self.device) + input_us = input_us.to(self.device) + input_us2 = input_us2.to(self.device) + label_u = label_u.to(self.device) + + return input_x, label_x, input_u, input_us, input_us2, label_u + + def model_inference(self, input): + return self.C(self.F(input)) + + @staticmethod + def get_similarity_matrix(feat, topk, device): + + feat_d = feat.detach() + + feat_d = torch.sort( + torch.argsort(feat_d, dim=1, descending=True)[:, :topk], dim=1 + )[0] + sim_mat = torch.zeros((feat_d.shape[0], feat_d.shape[0])).to(device) + for row in range(feat_d.shape[0]): + sim_mat[row, torch.all(feat_d == feat_d[row, :], dim=1)] = 1 + return sim_mat + + @staticmethod + def sigmoid_rampup(current_itr, rampup_itr): + """Exponential Rampup + https://arxiv.org/abs/1610.02242 + """ + if rampup_itr == 0: + return 1.0 + else: + var = np.clip(current_itr, 0.0, rampup_itr) + phase = 1.0 - var/rampup_itr + return float(np.exp(-5.0 * phase * phase)) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/da/dael.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/dael.py new file mode 100644 index 00000000..458df7da --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/dael.py @@ -0,0 +1,210 @@ +import torch +import torch.nn as nn + +from dassl.data import DataManager +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import count_num_param +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy +from dassl.engine.trainer import SimpleNet +from dassl.data.transforms import build_transform +from dassl.modeling.ops.utils import create_onehot + + +class Experts(nn.Module): + + def __init__(self, n_source, fdim, num_classes): + super().__init__() + self.linears = nn.ModuleList( + [nn.Linear(fdim, num_classes) for _ in range(n_source)] + ) + self.softmax = nn.Softmax(dim=1) + + def forward(self, i, x): + x = self.linears[i](x) + x = self.softmax(x) + return x + + +@TRAINER_REGISTRY.register() +class DAEL(TrainerXU): + """Domain Adaptive Ensemble Learning. + + https://arxiv.org/abs/2003.07325. + """ + + def __init__(self, cfg): + super().__init__(cfg) + n_domain = cfg.DATALOADER.TRAIN_X.N_DOMAIN + batch_size = cfg.DATALOADER.TRAIN_X.BATCH_SIZE + if n_domain <= 0: + n_domain = self.num_source_domains + self.split_batch = batch_size // n_domain + self.n_domain = n_domain + + self.weight_u = cfg.TRAINER.DAEL.WEIGHT_U + self.conf_thre = cfg.TRAINER.DAEL.CONF_THRE + + def check_cfg(self, cfg): + assert cfg.DATALOADER.TRAIN_X.SAMPLER == "RandomDomainSampler" + assert not cfg.DATALOADER.TRAIN_U.SAME_AS_X + assert len(cfg.TRAINER.DAEL.STRONG_TRANSFORMS) > 0 + + def build_data_loader(self): + cfg = self.cfg + tfm_train = build_transform(cfg, is_train=True) + custom_tfm_train = [tfm_train] + choices = cfg.TRAINER.DAEL.STRONG_TRANSFORMS + tfm_train_strong = build_transform(cfg, is_train=True, choices=choices) + custom_tfm_train += [tfm_train_strong] + dm = DataManager(self.cfg, custom_tfm_train=custom_tfm_train) + self.train_loader_x = dm.train_loader_x + self.train_loader_u = dm.train_loader_u + self.val_loader = dm.val_loader + self.test_loader = dm.test_loader + self.num_classes = dm.num_classes + self.num_source_domains = dm.num_source_domains + self.lab2cname = dm.lab2cname + + def build_model(self): + cfg = self.cfg + + print("Building F") + self.F = SimpleNet(cfg, cfg.MODEL, 0) + self.F.to(self.device) + print("# params: {:,}".format(count_num_param(self.F))) + self.optim_F = build_optimizer(self.F, cfg.OPTIM) + self.sched_F = build_lr_scheduler(self.optim_F, cfg.OPTIM) + self.register_model("F", self.F, self.optim_F, self.sched_F) + fdim = self.F.fdim + + print("Building E") + self.E = Experts(self.num_source_domains, fdim, self.num_classes) + self.E.to(self.device) + print("# params: {:,}".format(count_num_param(self.E))) + self.optim_E = build_optimizer(self.E, cfg.OPTIM) + self.sched_E = build_lr_scheduler(self.optim_E, cfg.OPTIM) + self.register_model("E", self.E, self.optim_E, self.sched_E) + + def forward_backward(self, batch_x, batch_u): + parsed_data = self.parse_batch_train(batch_x, batch_u) + input_x, input_x2, label_x, domain_x, input_u, input_u2 = parsed_data + + input_x = torch.split(input_x, self.split_batch, 0) + input_x2 = torch.split(input_x2, self.split_batch, 0) + label_x = torch.split(label_x, self.split_batch, 0) + domain_x = torch.split(domain_x, self.split_batch, 0) + domain_x = [d[0].item() for d in domain_x] + + # Generate pseudo label + with torch.no_grad(): + feat_u = self.F(input_u) + pred_u = [] + for k in range(self.num_source_domains): + pred_uk = self.E(k, feat_u) + pred_uk = pred_uk.unsqueeze(1) + pred_u.append(pred_uk) + pred_u = torch.cat(pred_u, 1) # (B, K, C) + # Get the highest probability and index (label) for each expert + experts_max_p, experts_max_idx = pred_u.max(2) # (B, K) + # Get the most confident expert + max_expert_p, max_expert_idx = experts_max_p.max(1) # (B) + pseudo_label_u = [] + for i, experts_label in zip(max_expert_idx, experts_max_idx): + pseudo_label_u.append(experts_label[i]) + pseudo_label_u = torch.stack(pseudo_label_u, 0) + pseudo_label_u = create_onehot(pseudo_label_u, self.num_classes) + pseudo_label_u = pseudo_label_u.to(self.device) + label_u_mask = (max_expert_p >= self.conf_thre).float() + + loss_x = 0 + loss_cr = 0 + acc_x = 0 + + feat_x = [self.F(x) for x in input_x] + feat_x2 = [self.F(x) for x in input_x2] + feat_u2 = self.F(input_u2) + + for feat_xi, feat_x2i, label_xi, i in zip( + feat_x, feat_x2, label_x, domain_x + ): + cr_s = [j for j in domain_x if j != i] + + # Learning expert + pred_xi = self.E(i, feat_xi) + loss_x += (-label_xi * torch.log(pred_xi + 1e-5)).sum(1).mean() + expert_label_xi = pred_xi.detach() + acc_x += compute_accuracy(pred_xi.detach(), + label_xi.max(1)[1])[0].item() + + # Consistency regularization + cr_pred = [] + for j in cr_s: + pred_j = self.E(j, feat_x2i) + pred_j = pred_j.unsqueeze(1) + cr_pred.append(pred_j) + cr_pred = torch.cat(cr_pred, 1) + cr_pred = cr_pred.mean(1) + loss_cr += ((cr_pred - expert_label_xi)**2).sum(1).mean() + + loss_x /= self.n_domain + loss_cr /= self.n_domain + acc_x /= self.n_domain + + # Unsupervised loss + pred_u = [] + for k in range(self.num_source_domains): + pred_uk = self.E(k, feat_u2) + pred_uk = pred_uk.unsqueeze(1) + pred_u.append(pred_uk) + pred_u = torch.cat(pred_u, 1) + pred_u = pred_u.mean(1) + l_u = (-pseudo_label_u * torch.log(pred_u + 1e-5)).sum(1) + loss_u = (l_u * label_u_mask).mean() + + loss = 0 + loss += loss_x + loss += loss_cr + loss += loss_u * self.weight_u + self.model_backward_and_update(loss) + + loss_summary = { + "loss_x": loss_x.item(), + "acc_x": acc_x, + "loss_cr": loss_cr.item(), + "loss_u": loss_u.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch_x, batch_u): + input_x = batch_x["img"] + input_x2 = batch_x["img2"] + label_x = batch_x["label"] + domain_x = batch_x["domain"] + input_u = batch_u["img"] + input_u2 = batch_u["img2"] + + label_x = create_onehot(label_x, self.num_classes) + + input_x = input_x.to(self.device) + input_x2 = input_x2.to(self.device) + label_x = label_x.to(self.device) + input_u = input_u.to(self.device) + input_u2 = input_u2.to(self.device) + + return input_x, input_x2, label_x, domain_x, input_u, input_u2 + + def model_inference(self, input): + f = self.F(input) + p = [] + for k in range(self.num_source_domains): + p_k = self.E(k, f) + p_k = p_k.unsqueeze(1) + p.append(p_k) + p = torch.cat(p, 1) + p = p.mean(1) + return p diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/da/dann.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/dann.py new file mode 100644 index 00000000..64bb3f7d --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/dann.py @@ -0,0 +1,78 @@ +import numpy as np +import torch +import torch.nn as nn + +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import count_num_param +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy +from dassl.modeling import build_head +from dassl.modeling.ops import ReverseGrad + + +@TRAINER_REGISTRY.register() +class DANN(TrainerXU): + """Domain-Adversarial Neural Networks. + + https://arxiv.org/abs/1505.07818. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.build_critic() + self.ce = nn.CrossEntropyLoss() + self.bce = nn.BCEWithLogitsLoss() + + def build_critic(self): + cfg = self.cfg + + print("Building critic network") + fdim = self.model.fdim + critic_body = build_head( + "mlp", + verbose=cfg.VERBOSE, + in_features=fdim, + hidden_layers=[fdim, fdim], + activation="leaky_relu", + ) + self.critic = nn.Sequential(critic_body, nn.Linear(fdim, 1)) + print("# params: {:,}".format(count_num_param(self.critic))) + self.critic.to(self.device) + self.optim_c = build_optimizer(self.critic, cfg.OPTIM) + self.sched_c = build_lr_scheduler(self.optim_c, cfg.OPTIM) + self.register_model("critic", self.critic, self.optim_c, self.sched_c) + self.revgrad = ReverseGrad() + + def forward_backward(self, batch_x, batch_u): + input_x, label_x, input_u = self.parse_batch_train(batch_x, batch_u) + domain_x = torch.ones(input_x.shape[0], 1).to(self.device) + domain_u = torch.zeros(input_u.shape[0], 1).to(self.device) + + global_step = self.batch_idx + self.epoch * self.num_batches + progress = global_step / (self.max_epoch * self.num_batches) + lmda = 2 / (1 + np.exp(-10 * progress)) - 1 + + logit_x, feat_x = self.model(input_x, return_feature=True) + _, feat_u = self.model(input_u, return_feature=True) + + loss_x = self.ce(logit_x, label_x) + + feat_x = self.revgrad(feat_x, grad_scaling=lmda) + feat_u = self.revgrad(feat_u, grad_scaling=lmda) + output_xd = self.critic(feat_x) + output_ud = self.critic(feat_u) + loss_d = self.bce(output_xd, domain_x) + self.bce(output_ud, domain_u) + + loss = loss_x + loss_d + self.model_backward_and_update(loss) + + loss_summary = { + "loss_x": loss_x.item(), + "acc_x": compute_accuracy(logit_x, label_x)[0].item(), + "loss_d": loss_d.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/da/m3sda.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/m3sda.py new file mode 100644 index 00000000..59b5673f --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/m3sda.py @@ -0,0 +1,208 @@ +import torch +import torch.nn as nn +from torch.nn import functional as F + +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import count_num_param +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.engine.trainer import SimpleNet + + +class PairClassifiers(nn.Module): + + def __init__(self, fdim, num_classes): + super().__init__() + self.c1 = nn.Linear(fdim, num_classes) + self.c2 = nn.Linear(fdim, num_classes) + + def forward(self, x): + z1 = self.c1(x) + if not self.training: + return z1 + z2 = self.c2(x) + return z1, z2 + + +@TRAINER_REGISTRY.register() +class M3SDA(TrainerXU): + """Moment Matching for Multi-Source Domain Adaptation. + + https://arxiv.org/abs/1812.01754. + """ + + def __init__(self, cfg): + super().__init__(cfg) + n_domain = cfg.DATALOADER.TRAIN_X.N_DOMAIN + batch_size = cfg.DATALOADER.TRAIN_X.BATCH_SIZE + if n_domain <= 0: + n_domain = self.num_source_domains + self.split_batch = batch_size // n_domain + self.n_domain = n_domain + + self.n_step_F = cfg.TRAINER.M3SDA.N_STEP_F + self.lmda = cfg.TRAINER.M3SDA.LMDA + + def check_cfg(self, cfg): + assert cfg.DATALOADER.TRAIN_X.SAMPLER == "RandomDomainSampler" + assert not cfg.DATALOADER.TRAIN_U.SAME_AS_X + + def build_model(self): + cfg = self.cfg + + print("Building F") + self.F = SimpleNet(cfg, cfg.MODEL, 0) + self.F.to(self.device) + print("# params: {:,}".format(count_num_param(self.F))) + self.optim_F = build_optimizer(self.F, cfg.OPTIM) + self.sched_F = build_lr_scheduler(self.optim_F, cfg.OPTIM) + self.register_model("F", self.F, self.optim_F, self.sched_F) + fdim = self.F.fdim + + print("Building C") + self.C = nn.ModuleList( + [ + PairClassifiers(fdim, self.num_classes) + for _ in range(self.num_source_domains) + ] + ) + self.C.to(self.device) + print("# params: {:,}".format(count_num_param(self.C))) + self.optim_C = build_optimizer(self.C, cfg.OPTIM) + self.sched_C = build_lr_scheduler(self.optim_C, cfg.OPTIM) + self.register_model("C", self.C, self.optim_C, self.sched_C) + + def forward_backward(self, batch_x, batch_u): + parsed = self.parse_batch_train(batch_x, batch_u) + input_x, label_x, domain_x, input_u = parsed + + input_x = torch.split(input_x, self.split_batch, 0) + label_x = torch.split(label_x, self.split_batch, 0) + domain_x = torch.split(domain_x, self.split_batch, 0) + domain_x = [d[0].item() for d in domain_x] + + # Step A + loss_x = 0 + feat_x = [] + + for x, y, d in zip(input_x, label_x, domain_x): + f = self.F(x) + z1, z2 = self.C[d](f) + loss_x += F.cross_entropy(z1, y) + F.cross_entropy(z2, y) + + feat_x.append(f) + + loss_x /= self.n_domain + + feat_u = self.F(input_u) + loss_msda = self.moment_distance(feat_x, feat_u) + + loss_step_A = loss_x + loss_msda * self.lmda + self.model_backward_and_update(loss_step_A) + + # Step B + with torch.no_grad(): + feat_u = self.F(input_u) + + loss_x, loss_dis = 0, 0 + + for x, y, d in zip(input_x, label_x, domain_x): + with torch.no_grad(): + f = self.F(x) + z1, z2 = self.C[d](f) + loss_x += F.cross_entropy(z1, y) + F.cross_entropy(z2, y) + + z1, z2 = self.C[d](feat_u) + p1 = F.softmax(z1, 1) + p2 = F.softmax(z2, 1) + loss_dis += self.discrepancy(p1, p2) + + loss_x /= self.n_domain + loss_dis /= self.n_domain + + loss_step_B = loss_x - loss_dis + self.model_backward_and_update(loss_step_B, "C") + + # Step C + for _ in range(self.n_step_F): + feat_u = self.F(input_u) + + loss_dis = 0 + + for d in domain_x: + z1, z2 = self.C[d](feat_u) + p1 = F.softmax(z1, 1) + p2 = F.softmax(z2, 1) + loss_dis += self.discrepancy(p1, p2) + + loss_dis /= self.n_domain + loss_step_C = loss_dis + + self.model_backward_and_update(loss_step_C, "F") + + loss_summary = { + "loss_step_A": loss_step_A.item(), + "loss_step_B": loss_step_B.item(), + "loss_step_C": loss_step_C.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def moment_distance(self, x, u): + # x (list): a list of feature matrix. + # u (torch.Tensor): feature matrix. + x_mean = [xi.mean(0) for xi in x] + u_mean = u.mean(0) + dist1 = self.pairwise_distance(x_mean, u_mean) + + x_var = [xi.var(0) for xi in x] + u_var = u.var(0) + dist2 = self.pairwise_distance(x_var, u_var) + + return (dist1+dist2) / 2 + + def pairwise_distance(self, x, u): + # x (list): a list of feature vector. + # u (torch.Tensor): feature vector. + dist = 0 + count = 0 + + for xi in x: + dist += self.euclidean(xi, u) + count += 1 + + for i in range(len(x) - 1): + for j in range(i + 1, len(x)): + dist += self.euclidean(x[i], x[j]) + count += 1 + + return dist / count + + def euclidean(self, input1, input2): + return ((input1 - input2)**2).sum().sqrt() + + def discrepancy(self, y1, y2): + return (y1 - y2).abs().mean() + + def parse_batch_train(self, batch_x, batch_u): + input_x = batch_x["img"] + label_x = batch_x["label"] + domain_x = batch_x["domain"] + input_u = batch_u["img"] + + input_x = input_x.to(self.device) + label_x = label_x.to(self.device) + input_u = input_u.to(self.device) + + return input_x, label_x, domain_x, input_u + + def model_inference(self, input): + f = self.F(input) + p = 0 + for C_i in self.C: + z = C_i(f) + p += F.softmax(z, 1) + p = p / len(self.C) + return p diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/da/mcd.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/mcd.py new file mode 100644 index 00000000..174a2e05 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/mcd.py @@ -0,0 +1,105 @@ +import torch +import torch.nn as nn +from torch.nn import functional as F + +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import count_num_param +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.engine.trainer import SimpleNet + + +@TRAINER_REGISTRY.register() +class MCD(TrainerXU): + """Maximum Classifier Discrepancy. + + https://arxiv.org/abs/1712.02560. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.n_step_F = cfg.TRAINER.MCD.N_STEP_F + + def build_model(self): + cfg = self.cfg + + print("Building F") + self.F = SimpleNet(cfg, cfg.MODEL, 0) + self.F.to(self.device) + print("# params: {:,}".format(count_num_param(self.F))) + self.optim_F = build_optimizer(self.F, cfg.OPTIM) + self.sched_F = build_lr_scheduler(self.optim_F, cfg.OPTIM) + self.register_model("F", self.F, self.optim_F, self.sched_F) + fdim = self.F.fdim + + print("Building C1") + self.C1 = nn.Linear(fdim, self.num_classes) + self.C1.to(self.device) + print("# params: {:,}".format(count_num_param(self.C1))) + self.optim_C1 = build_optimizer(self.C1, cfg.OPTIM) + self.sched_C1 = build_lr_scheduler(self.optim_C1, cfg.OPTIM) + self.register_model("C1", self.C1, self.optim_C1, self.sched_C1) + + print("Building C2") + self.C2 = nn.Linear(fdim, self.num_classes) + self.C2.to(self.device) + print("# params: {:,}".format(count_num_param(self.C2))) + self.optim_C2 = build_optimizer(self.C2, cfg.OPTIM) + self.sched_C2 = build_lr_scheduler(self.optim_C2, cfg.OPTIM) + self.register_model("C2", self.C2, self.optim_C2, self.sched_C2) + + def forward_backward(self, batch_x, batch_u): + parsed = self.parse_batch_train(batch_x, batch_u) + input_x, label_x, input_u = parsed + + # Step A + feat_x = self.F(input_x) + logit_x1 = self.C1(feat_x) + logit_x2 = self.C2(feat_x) + loss_x1 = F.cross_entropy(logit_x1, label_x) + loss_x2 = F.cross_entropy(logit_x2, label_x) + loss_step_A = loss_x1 + loss_x2 + self.model_backward_and_update(loss_step_A) + + # Step B + with torch.no_grad(): + feat_x = self.F(input_x) + logit_x1 = self.C1(feat_x) + logit_x2 = self.C2(feat_x) + loss_x1 = F.cross_entropy(logit_x1, label_x) + loss_x2 = F.cross_entropy(logit_x2, label_x) + loss_x = loss_x1 + loss_x2 + + with torch.no_grad(): + feat_u = self.F(input_u) + pred_u1 = F.softmax(self.C1(feat_u), 1) + pred_u2 = F.softmax(self.C2(feat_u), 1) + loss_dis = self.discrepancy(pred_u1, pred_u2) + + loss_step_B = loss_x - loss_dis + self.model_backward_and_update(loss_step_B, ["C1", "C2"]) + + # Step C + for _ in range(self.n_step_F): + feat_u = self.F(input_u) + pred_u1 = F.softmax(self.C1(feat_u), 1) + pred_u2 = F.softmax(self.C2(feat_u), 1) + loss_step_C = self.discrepancy(pred_u1, pred_u2) + self.model_backward_and_update(loss_step_C, "F") + + loss_summary = { + "loss_step_A": loss_step_A.item(), + "loss_step_B": loss_step_B.item(), + "loss_step_C": loss_step_C.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def discrepancy(self, y1, y2): + return (y1 - y2).abs().mean() + + def model_inference(self, input): + feat = self.F(input) + return self.C1(feat) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/da/mme.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/mme.py new file mode 100644 index 00000000..fd7775c6 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/mme.py @@ -0,0 +1,86 @@ +import torch +import torch.nn as nn +from torch.nn import functional as F + +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import count_num_param +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy +from dassl.modeling.ops import ReverseGrad +from dassl.engine.trainer import SimpleNet + + +class Prototypes(nn.Module): + + def __init__(self, fdim, num_classes, temp=0.05): + super().__init__() + self.prototypes = nn.Linear(fdim, num_classes, bias=False) + self.temp = temp + + def forward(self, x): + x = F.normalize(x, p=2, dim=1) + out = self.prototypes(x) + out = out / self.temp + return out + + +@TRAINER_REGISTRY.register() +class MME(TrainerXU): + """Minimax Entropy. + + https://arxiv.org/abs/1904.06487. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.lmda = cfg.TRAINER.MME.LMDA + + def build_model(self): + cfg = self.cfg + + print("Building F") + self.F = SimpleNet(cfg, cfg.MODEL, 0) + self.F.to(self.device) + print("# params: {:,}".format(count_num_param(self.F))) + self.optim_F = build_optimizer(self.F, cfg.OPTIM) + self.sched_F = build_lr_scheduler(self.optim_F, cfg.OPTIM) + self.register_model("F", self.F, self.optim_F, self.sched_F) + + print("Building C") + self.C = Prototypes(self.F.fdim, self.num_classes) + self.C.to(self.device) + print("# params: {:,}".format(count_num_param(self.C))) + self.optim_C = build_optimizer(self.C, cfg.OPTIM) + self.sched_C = build_lr_scheduler(self.optim_C, cfg.OPTIM) + self.register_model("C", self.C, self.optim_C, self.sched_C) + + self.revgrad = ReverseGrad() + + def forward_backward(self, batch_x, batch_u): + input_x, label_x, input_u = self.parse_batch_train(batch_x, batch_u) + + feat_x = self.F(input_x) + logit_x = self.C(feat_x) + loss_x = F.cross_entropy(logit_x, label_x) + self.model_backward_and_update(loss_x) + + feat_u = self.F(input_u) + feat_u = self.revgrad(feat_u) + logit_u = self.C(feat_u) + prob_u = F.softmax(logit_u, 1) + loss_u = -(-prob_u * torch.log(prob_u + 1e-5)).sum(1).mean() + self.model_backward_and_update(loss_u * self.lmda) + + loss_summary = { + "loss_x": loss_x.item(), + "acc_x": compute_accuracy(logit_x, label_x)[0].item(), + "loss_u": loss_u.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def model_inference(self, input): + return self.C(self.F(input)) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/da/se.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/se.py new file mode 100644 index 00000000..b0f498a3 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/se.py @@ -0,0 +1,78 @@ +import copy +from torch.nn import functional as F + +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy +from dassl.modeling.ops.utils import sigmoid_rampup, ema_model_update + + +@TRAINER_REGISTRY.register() +class SE(TrainerXU): + """Self-ensembling for visual domain adaptation. + + https://arxiv.org/abs/1706.05208. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.ema_alpha = cfg.TRAINER.SE.EMA_ALPHA + self.conf_thre = cfg.TRAINER.SE.CONF_THRE + self.rampup = cfg.TRAINER.SE.RAMPUP + + self.teacher = copy.deepcopy(self.model) + self.teacher.train() + for param in self.teacher.parameters(): + param.requires_grad_(False) + + def check_cfg(self, cfg): + assert cfg.DATALOADER.K_TRANSFORMS == 2 + + def forward_backward(self, batch_x, batch_u): + global_step = self.batch_idx + self.epoch * self.num_batches + parsed = self.parse_batch_train(batch_x, batch_u) + input_x, label_x, input_u1, input_u2 = parsed + + logit_x = self.model(input_x) + loss_x = F.cross_entropy(logit_x, label_x) + + prob_u = F.softmax(self.model(input_u1), 1) + t_prob_u = F.softmax(self.teacher(input_u2), 1) + loss_u = ((prob_u - t_prob_u)**2).sum(1) + + if self.conf_thre: + max_prob = t_prob_u.max(1)[0] + mask = (max_prob > self.conf_thre).float() + loss_u = (loss_u * mask).mean() + else: + weight_u = sigmoid_rampup(global_step, self.rampup) + loss_u = loss_u.mean() * weight_u + + loss = loss_x + loss_u + self.model_backward_and_update(loss) + + ema_alpha = min(1 - 1 / (global_step+1), self.ema_alpha) + ema_model_update(self.model, self.teacher, ema_alpha) + + loss_summary = { + "loss_x": loss_x.item(), + "acc_x": compute_accuracy(logit_x, label_x)[0].item(), + "loss_u": loss_u.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch_x, batch_u): + input_x = batch_x["img"][0] + label_x = batch_x["label"] + input_u = batch_u["img"] + input_u1, input_u2 = input_u + + input_x = input_x.to(self.device) + label_x = label_x.to(self.device) + input_u1 = input_u1.to(self.device) + input_u2 = input_u2.to(self.device) + + return input_x, label_x, input_u1, input_u2 diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/da/source_only.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/source_only.py new file mode 100644 index 00000000..2e7d9a68 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/da/source_only.py @@ -0,0 +1,34 @@ +from torch.nn import functional as F + +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy + + +@TRAINER_REGISTRY.register() +class SourceOnly(TrainerXU): + """Baseline model for domain adaptation, which is + trained using source data only. + """ + + def forward_backward(self, batch_x, batch_u): + input, label = self.parse_batch_train(batch_x, batch_u) + output = self.model(input) + loss = F.cross_entropy(output, label) + self.model_backward_and_update(loss) + + loss_summary = { + "loss": loss.item(), + "acc": compute_accuracy(output, label)[0].item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch_x, batch_u): + input = batch_x["img"] + label = batch_x["label"] + input = input.to(self.device) + label = label.to(self.device) + return input, label diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/dg/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/dg/__init__.py new file mode 100644 index 00000000..23146a4a --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/dg/__init__.py @@ -0,0 +1,5 @@ +from .ddaig import DDAIG +from .daeldg import DAELDG +from .vanilla import Vanilla +from .crossgrad import CrossGrad +from .domain_mix import DomainMix diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/dg/crossgrad.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/dg/crossgrad.py new file mode 100644 index 00000000..ad9a6bd5 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/dg/crossgrad.py @@ -0,0 +1,83 @@ +import torch +from torch.nn import functional as F + +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import count_num_param +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.engine.trainer import SimpleNet + + +@TRAINER_REGISTRY.register() +class CrossGrad(TrainerX): + """Cross-gradient training. + + https://arxiv.org/abs/1804.10745. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.eps_f = cfg.TRAINER.CROSSGRAD.EPS_F + self.eps_d = cfg.TRAINER.CROSSGRAD.EPS_D + self.alpha_f = cfg.TRAINER.CROSSGRAD.ALPHA_F + self.alpha_d = cfg.TRAINER.CROSSGRAD.ALPHA_D + + def build_model(self): + cfg = self.cfg + + print("Building F") + self.F = SimpleNet(cfg, cfg.MODEL, self.num_classes) + self.F.to(self.device) + print("# params: {:,}".format(count_num_param(self.F))) + self.optim_F = build_optimizer(self.F, cfg.OPTIM) + self.sched_F = build_lr_scheduler(self.optim_F, cfg.OPTIM) + self.register_model("F", self.F, self.optim_F, self.sched_F) + + print("Building D") + self.D = SimpleNet(cfg, cfg.MODEL, self.num_source_domains) + self.D.to(self.device) + print("# params: {:,}".format(count_num_param(self.D))) + self.optim_D = build_optimizer(self.D, cfg.OPTIM) + self.sched_D = build_lr_scheduler(self.optim_D, cfg.OPTIM) + self.register_model("D", self.D, self.optim_D, self.sched_D) + + def forward_backward(self, batch): + input, label, domain = self.parse_batch_train(batch) + + input.requires_grad = True + + # Compute domain perturbation + loss_d = F.cross_entropy(self.D(input), domain) + loss_d.backward() + grad_d = torch.clamp(input.grad.data, min=-0.1, max=0.1) + input_d = input.data + self.eps_f * grad_d + + # Compute label perturbation + input.grad.data.zero_() + loss_f = F.cross_entropy(self.F(input), label) + loss_f.backward() + grad_f = torch.clamp(input.grad.data, min=-0.1, max=0.1) + input_f = input.data + self.eps_d * grad_f + + input = input.detach() + + # Update label net + loss_f1 = F.cross_entropy(self.F(input), label) + loss_f2 = F.cross_entropy(self.F(input_d), label) + loss_f = (1 - self.alpha_f) * loss_f1 + self.alpha_f * loss_f2 + self.model_backward_and_update(loss_f, "F") + + # Update domain net + loss_d1 = F.cross_entropy(self.D(input), domain) + loss_d2 = F.cross_entropy(self.D(input_f), domain) + loss_d = (1 - self.alpha_d) * loss_d1 + self.alpha_d * loss_d2 + self.model_backward_and_update(loss_d, "D") + + loss_summary = {"loss_f": loss_f.item(), "loss_d": loss_d.item()} + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def model_inference(self, input): + return self.F(input) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/dg/daeldg.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/dg/daeldg.py new file mode 100644 index 00000000..8d6d11c4 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/dg/daeldg.py @@ -0,0 +1,168 @@ +import torch +import torch.nn as nn + +from dassl.data import DataManager +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import count_num_param +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.metrics import compute_accuracy +from dassl.engine.trainer import SimpleNet +from dassl.data.transforms import build_transform +from dassl.modeling.ops.utils import create_onehot + + +class Experts(nn.Module): + + def __init__(self, n_source, fdim, num_classes): + super().__init__() + self.linears = nn.ModuleList( + [nn.Linear(fdim, num_classes) for _ in range(n_source)] + ) + self.softmax = nn.Softmax(dim=1) + + def forward(self, i, x): + x = self.linears[i](x) + x = self.softmax(x) + return x + + +@TRAINER_REGISTRY.register() +class DAELDG(TrainerX): + """Domain Adaptive Ensemble Learning. + + DG version: only use labeled source data. + + https://arxiv.org/abs/2003.07325. + """ + + def __init__(self, cfg): + super().__init__(cfg) + n_domain = cfg.DATALOADER.TRAIN_X.N_DOMAIN + batch_size = cfg.DATALOADER.TRAIN_X.BATCH_SIZE + if n_domain <= 0: + n_domain = self.num_source_domains + self.split_batch = batch_size // n_domain + self.n_domain = n_domain + self.conf_thre = cfg.TRAINER.DAELDG.CONF_THRE + + def check_cfg(self, cfg): + assert cfg.DATALOADER.TRAIN_X.SAMPLER == "RandomDomainSampler" + assert len(cfg.TRAINER.DAELDG.STRONG_TRANSFORMS) > 0 + + def build_data_loader(self): + cfg = self.cfg + tfm_train = build_transform(cfg, is_train=True) + custom_tfm_train = [tfm_train] + choices = cfg.TRAINER.DAELDG.STRONG_TRANSFORMS + tfm_train_strong = build_transform(cfg, is_train=True, choices=choices) + custom_tfm_train += [tfm_train_strong] + dm = DataManager(self.cfg, custom_tfm_train=custom_tfm_train) + self.train_loader_x = dm.train_loader_x + self.train_loader_u = dm.train_loader_u + self.val_loader = dm.val_loader + self.test_loader = dm.test_loader + self.num_classes = dm.num_classes + self.num_source_domains = dm.num_source_domains + self.lab2cname = dm.lab2cname + + def build_model(self): + cfg = self.cfg + + print("Building F") + self.F = SimpleNet(cfg, cfg.MODEL, 0) + self.F.to(self.device) + print("# params: {:,}".format(count_num_param(self.F))) + self.optim_F = build_optimizer(self.F, cfg.OPTIM) + self.sched_F = build_lr_scheduler(self.optim_F, cfg.OPTIM) + self.register_model("F", self.F, self.optim_F, self.sched_F) + fdim = self.F.fdim + + print("Building E") + self.E = Experts(self.num_source_domains, fdim, self.num_classes) + self.E.to(self.device) + print("# params: {:,}".format(count_num_param(self.E))) + self.optim_E = build_optimizer(self.E, cfg.OPTIM) + self.sched_E = build_lr_scheduler(self.optim_E, cfg.OPTIM) + self.register_model("E", self.E, self.optim_E, self.sched_E) + + def forward_backward(self, batch): + parsed_data = self.parse_batch_train(batch) + input, input2, label, domain = parsed_data + + input = torch.split(input, self.split_batch, 0) + input2 = torch.split(input2, self.split_batch, 0) + label = torch.split(label, self.split_batch, 0) + domain = torch.split(domain, self.split_batch, 0) + domain = [d[0].item() for d in domain] + + loss_x = 0 + loss_cr = 0 + acc = 0 + + feat = [self.F(x) for x in input] + feat2 = [self.F(x) for x in input2] + + for feat_i, feat2_i, label_i, i in zip(feat, feat2, label, domain): + cr_s = [j for j in domain if j != i] + + # Learning expert + pred_i = self.E(i, feat_i) + loss_x += (-label_i * torch.log(pred_i + 1e-5)).sum(1).mean() + expert_label_i = pred_i.detach() + acc += compute_accuracy(pred_i.detach(), + label_i.max(1)[1])[0].item() + + # Consistency regularization + cr_pred = [] + for j in cr_s: + pred_j = self.E(j, feat2_i) + pred_j = pred_j.unsqueeze(1) + cr_pred.append(pred_j) + cr_pred = torch.cat(cr_pred, 1) + cr_pred = cr_pred.mean(1) + loss_cr += ((cr_pred - expert_label_i)**2).sum(1).mean() + + loss_x /= self.n_domain + loss_cr /= self.n_domain + acc /= self.n_domain + + loss = 0 + loss += loss_x + loss += loss_cr + self.model_backward_and_update(loss) + + loss_summary = { + "loss_x": loss_x.item(), + "acc": acc, + "loss_cr": loss_cr.item() + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch): + input = batch["img"] + input2 = batch["img2"] + label = batch["label"] + domain = batch["domain"] + + label = create_onehot(label, self.num_classes) + + input = input.to(self.device) + input2 = input2.to(self.device) + label = label.to(self.device) + + return input, input2, label, domain + + def model_inference(self, input): + f = self.F(input) + p = [] + for k in range(self.num_source_domains): + p_k = self.E(k, f) + p_k = p_k.unsqueeze(1) + p.append(p_k) + p = torch.cat(p, 1) + p = p.mean(1) + return p diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/dg/ddaig.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/dg/ddaig.py new file mode 100644 index 00000000..b7fbd973 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/dg/ddaig.py @@ -0,0 +1,107 @@ +import torch +from torch.nn import functional as F + +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import count_num_param +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.modeling import build_network +from dassl.engine.trainer import SimpleNet + + +@TRAINER_REGISTRY.register() +class DDAIG(TrainerX): + """Deep Domain-Adversarial Image Generation. + + https://arxiv.org/abs/2003.06054. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.lmda = cfg.TRAINER.DDAIG.LMDA + self.clamp = cfg.TRAINER.DDAIG.CLAMP + self.clamp_min = cfg.TRAINER.DDAIG.CLAMP_MIN + self.clamp_max = cfg.TRAINER.DDAIG.CLAMP_MAX + self.warmup = cfg.TRAINER.DDAIG.WARMUP + self.alpha = cfg.TRAINER.DDAIG.ALPHA + + def build_model(self): + cfg = self.cfg + + print("Building F") + self.F = SimpleNet(cfg, cfg.MODEL, self.num_classes) + self.F.to(self.device) + print("# params: {:,}".format(count_num_param(self.F))) + self.optim_F = build_optimizer(self.F, cfg.OPTIM) + self.sched_F = build_lr_scheduler(self.optim_F, cfg.OPTIM) + self.register_model("F", self.F, self.optim_F, self.sched_F) + + print("Building D") + self.D = SimpleNet(cfg, cfg.MODEL, self.num_source_domains) + self.D.to(self.device) + print("# params: {:,}".format(count_num_param(self.D))) + self.optim_D = build_optimizer(self.D, cfg.OPTIM) + self.sched_D = build_lr_scheduler(self.optim_D, cfg.OPTIM) + self.register_model("D", self.D, self.optim_D, self.sched_D) + + print("Building G") + self.G = build_network(cfg.TRAINER.DDAIG.G_ARCH, verbose=cfg.VERBOSE) + self.G.to(self.device) + print("# params: {:,}".format(count_num_param(self.G))) + self.optim_G = build_optimizer(self.G, cfg.OPTIM) + self.sched_G = build_lr_scheduler(self.optim_G, cfg.OPTIM) + self.register_model("G", self.G, self.optim_G, self.sched_G) + + def forward_backward(self, batch): + input, label, domain = self.parse_batch_train(batch) + + ############# + # Update G + ############# + input_p = self.G(input, lmda=self.lmda) + if self.clamp: + input_p = torch.clamp( + input_p, min=self.clamp_min, max=self.clamp_max + ) + loss_g = 0 + # Minimize label loss + loss_g += F.cross_entropy(self.F(input_p), label) + # Maximize domain loss + loss_g -= F.cross_entropy(self.D(input_p), domain) + self.model_backward_and_update(loss_g, "G") + + # Perturb data with new G + with torch.no_grad(): + input_p = self.G(input, lmda=self.lmda) + if self.clamp: + input_p = torch.clamp( + input_p, min=self.clamp_min, max=self.clamp_max + ) + + ############# + # Update F + ############# + loss_f = F.cross_entropy(self.F(input), label) + if (self.epoch + 1) > self.warmup: + loss_fp = F.cross_entropy(self.F(input_p), label) + loss_f = (1.0 - self.alpha) * loss_f + self.alpha * loss_fp + self.model_backward_and_update(loss_f, "F") + + ############# + # Update D + ############# + loss_d = F.cross_entropy(self.D(input), domain) + self.model_backward_and_update(loss_d, "D") + + loss_summary = { + "loss_g": loss_g.item(), + "loss_f": loss_f.item(), + "loss_d": loss_d.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def model_inference(self, input): + return self.F(input) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/dg/domain_mix.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/dg/domain_mix.py new file mode 100644 index 00000000..654f2706 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/dg/domain_mix.py @@ -0,0 +1,81 @@ +import torch +from torch.nn import functional as F + +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.metrics import compute_accuracy + +__all__ = ["DomainMix"] + + +@TRAINER_REGISTRY.register() +class DomainMix(TrainerX): + """DomainMix. + + Dynamic Domain Generalization. + + https://github.com/MetaVisionLab/DDG + """ + + def __init__(self, cfg): + super(DomainMix, self).__init__(cfg) + self.mix_type = cfg.TRAINER.DOMAINMIX.TYPE + self.alpha = cfg.TRAINER.DOMAINMIX.ALPHA + self.beta = cfg.TRAINER.DOMAINMIX.BETA + self.dist_beta = torch.distributions.Beta(self.alpha, self.beta) + + def forward_backward(self, batch): + images, label_a, label_b, lam = self.parse_batch_train(batch) + output = self.model(images) + loss = lam * F.cross_entropy( + output, label_a + ) + (1-lam) * F.cross_entropy(output, label_b) + self.model_backward_and_update(loss) + + loss_summary = { + "loss": loss.item(), + "acc": compute_accuracy(output, label_a)[0].item() + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch): + images = batch["img"] + target = batch["label"] + domain = batch["domain"] + images = images.to(self.device) + target = target.to(self.device) + domain = domain.to(self.device) + images, target_a, target_b, lam = self.domain_mix( + images, target, domain + ) + return images, target_a, target_b, lam + + def domain_mix(self, x, target, domain): + lam = ( + self.dist_beta.rsample((1, )) + if self.alpha > 0 else torch.tensor(1) + ).to(x.device) + + # random shuffle + perm = torch.randperm(x.size(0), dtype=torch.int64, device=x.device) + if self.mix_type == "crossdomain": + domain_list = torch.unique(domain) + if len(domain_list) > 1: + for idx in domain_list: + cnt_a = torch.sum(domain == idx) + idx_b = (domain != idx).nonzero().squeeze(-1) + cnt_b = idx_b.shape[0] + perm_b = torch.ones(cnt_b).multinomial( + num_samples=cnt_a, replacement=bool(cnt_a > cnt_b) + ) + perm[domain == idx] = idx_b[perm_b] + elif self.mix_type != "random": + raise NotImplementedError( + f"Chooses {'random', 'crossdomain'}, but got {self.mix_type}." + ) + mixed_x = lam*x + (1-lam) * x[perm, :] + target_a, target_b = target, target[perm] + return mixed_x, target_a, target_b, lam diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/dg/vanilla.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/dg/vanilla.py new file mode 100644 index 00000000..e35f30a1 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/dg/vanilla.py @@ -0,0 +1,35 @@ +from torch.nn import functional as F + +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.metrics import compute_accuracy + + +@TRAINER_REGISTRY.register() +class Vanilla(TrainerX): + """Vanilla model. + + A.k.a. Empirical Risk Minimization, or ERM. + """ + + def forward_backward(self, batch): + input, target = self.parse_batch_train(batch) + output = self.model(input) + loss = F.cross_entropy(output, target) + self.model_backward_and_update(loss) + + loss_summary = { + "loss": loss.item(), + "acc": compute_accuracy(output, target)[0].item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch): + input = batch["img"] + target = batch["label"] + input = input.to(self.device) + target = target.to(self.device) + return input, target diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/ssl/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/ssl/__init__.py new file mode 100644 index 00000000..46fa781f --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/ssl/__init__.py @@ -0,0 +1,5 @@ +from .entmin import EntMin +from .fixmatch import FixMatch +from .mixmatch import MixMatch +from .mean_teacher import MeanTeacher +from .sup_baseline import SupBaseline diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/ssl/entmin.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/ssl/entmin.py new file mode 100644 index 00000000..a17186a8 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/ssl/entmin.py @@ -0,0 +1,41 @@ +import torch +from torch.nn import functional as F + +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy + + +@TRAINER_REGISTRY.register() +class EntMin(TrainerXU): + """Entropy Minimization. + + http://papers.nips.cc/paper/2740-semi-supervised-learning-by-entropy-minimization.pdf. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.lmda = cfg.TRAINER.ENTMIN.LMDA + + def forward_backward(self, batch_x, batch_u): + input_x, label_x, input_u = self.parse_batch_train(batch_x, batch_u) + + output_x = self.model(input_x) + loss_x = F.cross_entropy(output_x, label_x) + + output_u = F.softmax(self.model(input_u), 1) + loss_u = (-output_u * torch.log(output_u + 1e-5)).sum(1).mean() + + loss = loss_x + loss_u * self.lmda + + self.model_backward_and_update(loss) + + loss_summary = { + "loss_x": loss_x.item(), + "acc_x": compute_accuracy(output_x, label_x)[0].item(), + "loss_u": loss_u.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/ssl/fixmatch.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/ssl/fixmatch.py new file mode 100644 index 00000000..be6001f8 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/ssl/fixmatch.py @@ -0,0 +1,112 @@ +import torch +from torch.nn import functional as F + +from dassl.data import DataManager +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy +from dassl.data.transforms import build_transform + + +@TRAINER_REGISTRY.register() +class FixMatch(TrainerXU): + """FixMatch: Simplifying Semi-Supervised Learning with + Consistency and Confidence. + + https://arxiv.org/abs/2001.07685. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.weight_u = cfg.TRAINER.FIXMATCH.WEIGHT_U + self.conf_thre = cfg.TRAINER.FIXMATCH.CONF_THRE + + def check_cfg(self, cfg): + assert len(cfg.TRAINER.FIXMATCH.STRONG_TRANSFORMS) > 0 + + def build_data_loader(self): + cfg = self.cfg + tfm_train = build_transform(cfg, is_train=True) + custom_tfm_train = [tfm_train] + choices = cfg.TRAINER.FIXMATCH.STRONG_TRANSFORMS + tfm_train_strong = build_transform(cfg, is_train=True, choices=choices) + custom_tfm_train += [tfm_train_strong] + self.dm = DataManager(self.cfg, custom_tfm_train=custom_tfm_train) + self.train_loader_x = self.dm.train_loader_x + self.train_loader_u = self.dm.train_loader_u + self.val_loader = self.dm.val_loader + self.test_loader = self.dm.test_loader + self.num_classes = self.dm.num_classes + + def assess_y_pred_quality(self, y_pred, y_true, mask): + n_masked_correct = (y_pred.eq(y_true).float() * mask).sum() + acc_thre = n_masked_correct / (mask.sum() + 1e-5) + acc_raw = y_pred.eq(y_true).sum() / y_pred.numel() # raw accuracy + keep_rate = mask.sum() / mask.numel() + output = { + "acc_thre": acc_thre, + "acc_raw": acc_raw, + "keep_rate": keep_rate + } + return output + + def forward_backward(self, batch_x, batch_u): + parsed_data = self.parse_batch_train(batch_x, batch_u) + input_x, input_x2, label_x, input_u, input_u2, label_u = parsed_data + input_u = torch.cat([input_x, input_u], 0) + input_u2 = torch.cat([input_x2, input_u2], 0) + n_x = input_x.size(0) + + # Generate pseudo labels + with torch.no_grad(): + output_u = F.softmax(self.model(input_u), 1) + max_prob, label_u_pred = output_u.max(1) + mask_u = (max_prob >= self.conf_thre).float() + + # Evaluate pseudo labels' accuracy + y_u_pred_stats = self.assess_y_pred_quality( + label_u_pred[n_x:], label_u, mask_u[n_x:] + ) + + # Supervised loss + output_x = self.model(input_x) + loss_x = F.cross_entropy(output_x, label_x) + + # Unsupervised loss + output_u = self.model(input_u2) + loss_u = F.cross_entropy(output_u, label_u_pred, reduction="none") + loss_u = (loss_u * mask_u).mean() + + loss = loss_x + loss_u * self.weight_u + self.model_backward_and_update(loss) + + loss_summary = { + "loss_x": loss_x.item(), + "acc_x": compute_accuracy(output_x, label_x)[0].item(), + "loss_u": loss_u.item(), + "y_u_pred_acc_raw": y_u_pred_stats["acc_raw"], + "y_u_pred_acc_thre": y_u_pred_stats["acc_thre"], + "y_u_pred_keep": y_u_pred_stats["keep_rate"], + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch_x, batch_u): + input_x = batch_x["img"] + input_x2 = batch_x["img2"] + label_x = batch_x["label"] + input_u = batch_u["img"] + input_u2 = batch_u["img2"] + # label_u is used only for evaluating pseudo labels' accuracy + label_u = batch_u["label"] + + input_x = input_x.to(self.device) + input_x2 = input_x2.to(self.device) + label_x = label_x.to(self.device) + input_u = input_u.to(self.device) + input_u2 = input_u2.to(self.device) + label_u = label_u.to(self.device) + + return input_x, input_x2, label_x, input_u, input_u2, label_u diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/ssl/mean_teacher.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/ssl/mean_teacher.py new file mode 100644 index 00000000..054dc490 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/ssl/mean_teacher.py @@ -0,0 +1,54 @@ +import copy +from torch.nn import functional as F + +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy +from dassl.modeling.ops.utils import sigmoid_rampup, ema_model_update + + +@TRAINER_REGISTRY.register() +class MeanTeacher(TrainerXU): + """Mean teacher. + + https://arxiv.org/abs/1703.01780. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.weight_u = cfg.TRAINER.MEANTEACHER.WEIGHT_U + self.ema_alpha = cfg.TRAINER.MEANTEACHER.EMA_ALPHA + self.rampup = cfg.TRAINER.MEANTEACHER.RAMPUP + + self.teacher = copy.deepcopy(self.model) + self.teacher.train() + for param in self.teacher.parameters(): + param.requires_grad_(False) + + def forward_backward(self, batch_x, batch_u): + input_x, label_x, input_u = self.parse_batch_train(batch_x, batch_u) + + logit_x = self.model(input_x) + loss_x = F.cross_entropy(logit_x, label_x) + + target_u = F.softmax(self.teacher(input_u), 1) + prob_u = F.softmax(self.model(input_u), 1) + loss_u = ((prob_u - target_u)**2).sum(1).mean() + + weight_u = self.weight_u * sigmoid_rampup(self.epoch, self.rampup) + loss = loss_x + loss_u*weight_u + self.model_backward_and_update(loss) + + global_step = self.batch_idx + self.epoch * self.num_batches + ema_alpha = min(1 - 1 / (global_step+1), self.ema_alpha) + ema_model_update(self.model, self.teacher, ema_alpha) + + loss_summary = { + "loss_x": loss_x.item(), + "acc_x": compute_accuracy(logit_x, label_x)[0].item(), + "loss_u": loss_u.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/ssl/mixmatch.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/ssl/mixmatch.py new file mode 100644 index 00000000..6bb24e16 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/ssl/mixmatch.py @@ -0,0 +1,98 @@ +import torch +from torch.nn import functional as F + +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.modeling.ops import mixup +from dassl.modeling.ops.utils import ( + sharpen_prob, create_onehot, linear_rampup, shuffle_index +) + + +@TRAINER_REGISTRY.register() +class MixMatch(TrainerXU): + """MixMatch: A Holistic Approach to Semi-Supervised Learning. + + https://arxiv.org/abs/1905.02249. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.weight_u = cfg.TRAINER.MIXMATCH.WEIGHT_U + self.temp = cfg.TRAINER.MIXMATCH.TEMP + self.beta = cfg.TRAINER.MIXMATCH.MIXUP_BETA + self.rampup = cfg.TRAINER.MIXMATCH.RAMPUP + + def check_cfg(self, cfg): + assert cfg.DATALOADER.K_TRANSFORMS > 1 + + def forward_backward(self, batch_x, batch_u): + input_x, label_x, input_u = self.parse_batch_train(batch_x, batch_u) + num_x = input_x.shape[0] + + global_step = self.batch_idx + self.epoch * self.num_batches + weight_u = self.weight_u * linear_rampup(global_step, self.rampup) + + # Generate pseudo-label for unlabeled data + with torch.no_grad(): + output_u = 0 + for input_ui in input_u: + output_ui = F.softmax(self.model(input_ui), 1) + output_u += output_ui + output_u /= len(input_u) + label_u = sharpen_prob(output_u, self.temp) + label_u = [label_u] * len(input_u) + label_u = torch.cat(label_u, 0) + input_u = torch.cat(input_u, 0) + + # Combine and shuffle labeled and unlabeled data + input_xu = torch.cat([input_x, input_u], 0) + label_xu = torch.cat([label_x, label_u], 0) + input_xu, label_xu = shuffle_index(input_xu, label_xu) + + # Mixup + input_x, label_x = mixup( + input_x, + input_xu[:num_x], + label_x, + label_xu[:num_x], + self.beta, + preserve_order=True, + ) + + input_u, label_u = mixup( + input_u, + input_xu[num_x:], + label_u, + label_xu[num_x:], + self.beta, + preserve_order=True, + ) + + # Compute losses + output_x = F.softmax(self.model(input_x), 1) + loss_x = (-label_x * torch.log(output_x + 1e-5)).sum(1).mean() + + output_u = F.softmax(self.model(input_u), 1) + loss_u = ((label_u - output_u)**2).mean() + + loss = loss_x + loss_u*weight_u + self.model_backward_and_update(loss) + + loss_summary = {"loss_x": loss_x.item(), "loss_u": loss_u.item()} + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch_x, batch_u): + input_x = batch_x["img"][0] + label_x = batch_x["label"] + label_x = create_onehot(label_x, self.num_classes) + input_u = batch_u["img"] + + input_x = input_x.to(self.device) + label_x = label_x.to(self.device) + input_u = [input_ui.to(self.device) for input_ui in input_u] + + return input_x, label_x, input_u diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/ssl/sup_baseline.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/ssl/sup_baseline.py new file mode 100644 index 00000000..b2f5228e --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/ssl/sup_baseline.py @@ -0,0 +1,32 @@ +from torch.nn import functional as F + +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy + + +@TRAINER_REGISTRY.register() +class SupBaseline(TrainerXU): + """Supervised Baseline.""" + + def forward_backward(self, batch_x, batch_u): + input, label = self.parse_batch_train(batch_x, batch_u) + output = self.model(input) + loss = F.cross_entropy(output, label) + self.model_backward_and_update(loss) + + loss_summary = { + "loss": loss.item(), + "acc": compute_accuracy(output, label)[0].item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch_x, batch_u): + input = batch_x["img"] + label = batch_x["label"] + input = input.to(self.device) + label = label.to(self.device) + return input, label diff --git a/python/ClipDetection/Dassl.pytorch/dassl/engine/trainer.py b/python/ClipDetection/Dassl.pytorch/dassl/engine/trainer.py new file mode 100644 index 00000000..92142c96 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/engine/trainer.py @@ -0,0 +1,652 @@ +################################################################## +# OpenMPF Modifications # +# -------------------------------------------------------------- # +# Parameter classnames=[] added to SimpleTrainer class __init__. # +# - Used to bypass need for DataManager object. # +# # +# Prevent SimpleTrainer from creating DataLoader or Evaluator # +# # +# Simplify SimpleTrainer.test() by simply returning model # +# inference call on single image passed to it as parameter # +# # +################################################################## + +import time +import numpy as np +import os.path as osp +import datetime +from collections import OrderedDict +import torch +import torch.nn as nn +from tqdm import tqdm +from torch.utils.tensorboard import SummaryWriter + +from dassl.data import DataManager +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import ( + MetricMeter, AverageMeter, tolist_if_not, count_num_param, load_checkpoint, + save_checkpoint, mkdir_if_missing, resume_from_checkpoint, + load_pretrained_weights +) +from dassl.modeling import build_head, build_backbone +from dassl.evaluation import build_evaluator + + +class SimpleNet(nn.Module): + """A simple neural network composed of a CNN backbone + and optionally a head such as mlp for classification. + """ + + def __init__(self, cfg, model_cfg, num_classes, **kwargs): + super().__init__() + self.backbone = build_backbone( + model_cfg.BACKBONE.NAME, + verbose=cfg.VERBOSE, + pretrained=model_cfg.BACKBONE.PRETRAINED, + **kwargs, + ) + fdim = self.backbone.out_features + + self.head = None + if model_cfg.HEAD.NAME and model_cfg.HEAD.HIDDEN_LAYERS: + self.head = build_head( + model_cfg.HEAD.NAME, + verbose=cfg.VERBOSE, + in_features=fdim, + hidden_layers=model_cfg.HEAD.HIDDEN_LAYERS, + activation=model_cfg.HEAD.ACTIVATION, + bn=model_cfg.HEAD.BN, + dropout=model_cfg.HEAD.DROPOUT, + **kwargs, + ) + fdim = self.head.out_features + + self.classifier = None + if num_classes > 0: + self.classifier = nn.Linear(fdim, num_classes) + + self._fdim = fdim + + @property + def fdim(self): + return self._fdim + + def forward(self, x, return_feature=False): + f = self.backbone(x) + if self.head is not None: + f = self.head(f) + + if self.classifier is None: + return f + + y = self.classifier(f) + + if return_feature: + return y, f + + return y + + +class TrainerBase: + """Base class for iterative trainer.""" + + def __init__(self): + self._models = OrderedDict() + self._optims = OrderedDict() + self._scheds = OrderedDict() + self._writer = None + + def register_model(self, name="model", model=None, optim=None, sched=None): + if self.__dict__.get("_models") is None: + raise AttributeError( + "Cannot assign model before super().__init__() call" + ) + + if self.__dict__.get("_optims") is None: + raise AttributeError( + "Cannot assign optim before super().__init__() call" + ) + + if self.__dict__.get("_scheds") is None: + raise AttributeError( + "Cannot assign sched before super().__init__() call" + ) + + assert name not in self._models, "Found duplicate model names" + + self._models[name] = model + self._optims[name] = optim + self._scheds[name] = sched + + def get_model_names(self, names=None): + names_real = list(self._models.keys()) + if names is not None: + names = tolist_if_not(names) + for name in names: + assert name in names_real + return names + else: + return names_real + + def save_model( + self, epoch, directory, is_best=False, val_result=None, model_name="" + ): + names = self.get_model_names() + + for name in names: + model_dict = self._models[name].state_dict() + + optim_dict = None + if self._optims[name] is not None: + optim_dict = self._optims[name].state_dict() + + sched_dict = None + if self._scheds[name] is not None: + sched_dict = self._scheds[name].state_dict() + + save_checkpoint( + { + "state_dict": model_dict, + "epoch": epoch + 1, + "optimizer": optim_dict, + "scheduler": sched_dict, + "val_result": val_result + }, + osp.join(directory, name), + is_best=is_best, + model_name=model_name, + ) + + def resume_model_if_exist(self, directory): + names = self.get_model_names() + file_missing = False + + for name in names: + path = osp.join(directory, name) + if not osp.exists(path): + file_missing = True + break + + if file_missing: + print("No checkpoint found, train from scratch") + return 0 + + print(f"Found checkpoint at {directory} (will resume training)") + + for name in names: + path = osp.join(directory, name) + start_epoch = resume_from_checkpoint( + path, self._models[name], self._optims[name], + self._scheds[name] + ) + + return start_epoch + + def load_model(self, directory, epoch=None): + if not directory: + print( + "Note that load_model() is skipped as no pretrained " + "model is given (ignore this if it's done on purpose)" + ) + return + + names = self.get_model_names() + + # By default, the best model is loaded + model_file = "model-best.pth.tar" + + if epoch is not None: + model_file = "model.pth.tar-" + str(epoch) + + for name in names: + model_path = osp.join(directory, name, model_file) + + if not osp.exists(model_path): + raise FileNotFoundError(f"No model at {model_path}") + + checkpoint = load_checkpoint(model_path) + state_dict = checkpoint["state_dict"] + epoch = checkpoint["epoch"] + val_result = checkpoint["val_result"] + print( + f"Load {model_path} to {name} (epoch={epoch}, val_result={val_result:.1f})" + ) + self._models[name].load_state_dict(state_dict) + + def set_model_mode(self, mode="train", names=None): + names = self.get_model_names(names) + + for name in names: + if mode == "train": + self._models[name].train() + elif mode in ["test", "eval"]: + self._models[name].eval() + else: + raise KeyError + + def update_lr(self, names=None): + names = self.get_model_names(names) + + for name in names: + if self._scheds[name] is not None: + self._scheds[name].step() + + def detect_anomaly(self, loss): + if not torch.isfinite(loss).all(): + raise FloatingPointError("Loss is infinite or NaN!") + + def init_writer(self, log_dir): + if self.__dict__.get("_writer") is None or self._writer is None: + print(f"Initialize tensorboard (log_dir={log_dir})") + self._writer = SummaryWriter(log_dir=log_dir) + + def close_writer(self): + if self._writer is not None: + self._writer.close() + + def write_scalar(self, tag, scalar_value, global_step=None): + if self._writer is None: + # Do nothing if writer is not initialized + # Note that writer is only used when training is needed + pass + else: + self._writer.add_scalar(tag, scalar_value, global_step) + + def train(self, start_epoch, max_epoch): + """Generic training loops.""" + self.start_epoch = start_epoch + self.max_epoch = max_epoch + + self.before_train() + for self.epoch in range(self.start_epoch, self.max_epoch): + self.before_epoch() + self.run_epoch() + self.after_epoch() + self.after_train() + + def before_train(self): + pass + + def after_train(self): + pass + + def before_epoch(self): + pass + + def after_epoch(self): + pass + + def run_epoch(self): + raise NotImplementedError + + def test(self): + raise NotImplementedError + + def parse_batch_train(self, batch): + raise NotImplementedError + + def parse_batch_test(self, batch): + raise NotImplementedError + + def forward_backward(self, batch): + raise NotImplementedError + + def model_inference(self, input): + raise NotImplementedError + + def model_zero_grad(self, names=None): + names = self.get_model_names(names) + for name in names: + if self._optims[name] is not None: + self._optims[name].zero_grad() + + def model_backward(self, loss): + self.detect_anomaly(loss) + loss.backward() + + def model_update(self, names=None): + names = self.get_model_names(names) + for name in names: + if self._optims[name] is not None: + self._optims[name].step() + + def model_backward_and_update(self, loss, names=None): + self.model_zero_grad(names) + self.model_backward(loss) + self.model_update(names) + + +class SimpleTrainer(TrainerBase): + """A simple trainer class implementing generic functions.""" + + def __init__(self, cfg, classnames=[], device_id=-1): + super().__init__() + self.check_cfg(cfg) + + if torch.cuda.is_available() and cfg.USE_CUDA and device_id >= 0: + self.device = torch.device(f"cuda:{device_id}") + else: + self.device = torch.device("cpu") + + # Save as attributes some frequently used variables + self.start_epoch = self.epoch = 0 + self.max_epoch = cfg.OPTIM.MAX_EPOCH + self.output_dir = cfg.OUTPUT_DIR + + self.cfg = cfg + # self.build_data_loader() + self.build_model(classnames) + # self.evaluator = build_evaluator(cfg, lab2cname=self.lab2cname) + self.best_result = -np.inf + + def check_cfg(self, cfg): + """Check whether some variables are set correctly for + the trainer (optional). + + For example, a trainer might require a particular sampler + for training such as 'RandomDomainSampler', so it is good + to do the checking: + + assert cfg.DATALOADER.SAMPLER_TRAIN == 'RandomDomainSampler' + """ + pass + + def build_data_loader(self): + """Create essential data-related attributes. + + A re-implementation of this method must create the + same attributes (self.dm is optional). + """ + dm = DataManager(self.cfg) + + self.train_loader_x = dm.train_loader_x + self.train_loader_u = dm.train_loader_u # optional, can be None + self.val_loader = dm.val_loader # optional, can be None + self.test_loader = dm.test_loader + + self.num_classes = dm.num_classes + self.num_source_domains = dm.num_source_domains + self.lab2cname = dm.lab2cname # dict {label: classname} + + self.dm = dm + + def build_model(self): + """Build and register model. + + The default builds a classification model along with its + optimizer and scheduler. + + Custom trainers can re-implement this method if necessary. + """ + cfg = self.cfg + + print("Building model") + self.model = SimpleNet(cfg, cfg.MODEL, self.num_classes) + if cfg.MODEL.INIT_WEIGHTS: + load_pretrained_weights(self.model, cfg.MODEL.INIT_WEIGHTS) + self.model.to(self.device) + print(f"# params: {count_num_param(self.model):,}") + self.optim = build_optimizer(self.model, cfg.OPTIM) + self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) + self.register_model("model", self.model, self.optim, self.sched) + + # device_count = torch.cuda.device_count() + # if device_count > 1 and cfg.USE_CUDA: + # print(f"Detected {device_count} GPUs (use nn.DataParallel)") + # self.model = nn.DataParallel(self.model) + + def train(self): + super().train(self.start_epoch, self.max_epoch) + + def before_train(self): + directory = self.cfg.OUTPUT_DIR + if self.cfg.RESUME: + directory = self.cfg.RESUME + self.start_epoch = self.resume_model_if_exist(directory) + + # Initialize summary writer + writer_dir = osp.join(self.output_dir, "tensorboard") + mkdir_if_missing(writer_dir) + self.init_writer(writer_dir) + + # Remember the starting time (for computing the elapsed time) + self.time_start = time.time() + + def after_train(self): + print("Finish training") + + do_test = not self.cfg.TEST.NO_TEST + if do_test: + if self.cfg.TEST.FINAL_MODEL == "best_val": + print("Deploy the model with the best val performance") + self.load_model(self.output_dir) + else: + print("Deploy the last-epoch model") + self.test() + + # Show elapsed time + elapsed = round(time.time() - self.time_start) + elapsed = str(datetime.timedelta(seconds=elapsed)) + print(f"Elapsed: {elapsed}") + + # Close writer + self.close_writer() + + def after_epoch(self): + last_epoch = (self.epoch + 1) == self.max_epoch + do_test = not self.cfg.TEST.NO_TEST + meet_checkpoint_freq = ( + (self.epoch + 1) % self.cfg.TRAIN.CHECKPOINT_FREQ == 0 + if self.cfg.TRAIN.CHECKPOINT_FREQ > 0 else False + ) + + if do_test and self.cfg.TEST.FINAL_MODEL == "best_val": + curr_result = self.test(split="val") + is_best = curr_result > self.best_result + if is_best: + self.best_result = curr_result + self.save_model( + self.epoch, + self.output_dir, + val_result=curr_result, + model_name="model-best.pth.tar" + ) + + if meet_checkpoint_freq or last_epoch: + self.save_model(self.epoch, self.output_dir) + + @torch.no_grad() + def test(self, images=None, split=None): + """A generic testing pipeline.""" + self.set_model_mode("eval") + # self.evaluator.reset() + + # if split is None: + # split = self.cfg.TEST.SPLIT + + # if split == "val" and self.val_loader is not None: + # data_loader = self.val_loader + # else: + # split = "test" # in case val_loader is None + # data_loader = self.test_loader + + # print(f"Evaluate on the *{split}* set") + images = images.to(self.device) + return self.model_inference(images) + + for batch_idx, batch in enumerate(tqdm(data_loader)): + input, label = self.parse_batch_test(batch) + output = self.model_inference(input) + self.evaluator.process(output, label) + + results = self.evaluator.evaluate() + + for k, v in results.items(): + tag = f"{split}/{k}" + self.write_scalar(tag, v, self.epoch) + + return list(results.values())[0] + + def model_inference(self, input): + return self.model(input) + + def parse_batch_test(self, batch): + input = batch["img"] + label = batch["label"] + + input = input.to(self.device) + label = label.to(self.device) + + return input, label + + def get_current_lr(self, names=None): + names = self.get_model_names(names) + name = names[0] + return self._optims[name].param_groups[0]["lr"] + + +class TrainerXU(SimpleTrainer): + """A base trainer using both labeled and unlabeled data. + + In the context of domain adaptation, labeled and unlabeled data + come from source and target domains respectively. + + When it comes to semi-supervised learning, all data comes from the + same domain. + """ + + def run_epoch(self): + self.set_model_mode("train") + losses = MetricMeter() + batch_time = AverageMeter() + data_time = AverageMeter() + + # Decide to iterate over labeled or unlabeled dataset + len_train_loader_x = len(self.train_loader_x) + len_train_loader_u = len(self.train_loader_u) + if self.cfg.TRAIN.COUNT_ITER == "train_x": + self.num_batches = len_train_loader_x + elif self.cfg.TRAIN.COUNT_ITER == "train_u": + self.num_batches = len_train_loader_u + elif self.cfg.TRAIN.COUNT_ITER == "smaller_one": + self.num_batches = min(len_train_loader_x, len_train_loader_u) + else: + raise ValueError + + train_loader_x_iter = iter(self.train_loader_x) + train_loader_u_iter = iter(self.train_loader_u) + + end = time.time() + for self.batch_idx in range(self.num_batches): + try: + batch_x = next(train_loader_x_iter) + except StopIteration: + train_loader_x_iter = iter(self.train_loader_x) + batch_x = next(train_loader_x_iter) + + try: + batch_u = next(train_loader_u_iter) + except StopIteration: + train_loader_u_iter = iter(self.train_loader_u) + batch_u = next(train_loader_u_iter) + + data_time.update(time.time() - end) + loss_summary = self.forward_backward(batch_x, batch_u) + batch_time.update(time.time() - end) + losses.update(loss_summary) + + meet_freq = (self.batch_idx + 1) % self.cfg.TRAIN.PRINT_FREQ == 0 + only_few_batches = self.num_batches < self.cfg.TRAIN.PRINT_FREQ + if meet_freq or only_few_batches: + nb_remain = 0 + nb_remain += self.num_batches - self.batch_idx - 1 + nb_remain += ( + self.max_epoch - self.epoch - 1 + ) * self.num_batches + eta_seconds = batch_time.avg * nb_remain + eta = str(datetime.timedelta(seconds=int(eta_seconds))) + + info = [] + info += [f"epoch [{self.epoch + 1}/{self.max_epoch}]"] + info += [f"batch [{self.batch_idx + 1}/{self.num_batches}]"] + info += [f"time {batch_time.val:.3f} ({batch_time.avg:.3f})"] + info += [f"data {data_time.val:.3f} ({data_time.avg:.3f})"] + info += [f"{losses}"] + info += [f"lr {self.get_current_lr():.4e}"] + info += [f"eta {eta}"] + print(" ".join(info)) + + n_iter = self.epoch * self.num_batches + self.batch_idx + for name, meter in losses.meters.items(): + self.write_scalar("train/" + name, meter.avg, n_iter) + self.write_scalar("train/lr", self.get_current_lr(), n_iter) + + end = time.time() + + def parse_batch_train(self, batch_x, batch_u): + input_x = batch_x["img"] + label_x = batch_x["label"] + input_u = batch_u["img"] + + input_x = input_x.to(self.device) + label_x = label_x.to(self.device) + input_u = input_u.to(self.device) + + return input_x, label_x, input_u + + +class TrainerX(SimpleTrainer): + """A base trainer using labeled data only.""" + + def run_epoch(self): + self.set_model_mode("train") + losses = MetricMeter() + batch_time = AverageMeter() + data_time = AverageMeter() + self.num_batches = len(self.train_loader_x) + + end = time.time() + for self.batch_idx, batch in enumerate(self.train_loader_x): + data_time.update(time.time() - end) + loss_summary = self.forward_backward(batch) + batch_time.update(time.time() - end) + losses.update(loss_summary) + + meet_freq = (self.batch_idx + 1) % self.cfg.TRAIN.PRINT_FREQ == 0 + only_few_batches = self.num_batches < self.cfg.TRAIN.PRINT_FREQ + if meet_freq or only_few_batches: + nb_remain = 0 + nb_remain += self.num_batches - self.batch_idx - 1 + nb_remain += ( + self.max_epoch - self.epoch - 1 + ) * self.num_batches + eta_seconds = batch_time.avg * nb_remain + eta = str(datetime.timedelta(seconds=int(eta_seconds))) + + info = [] + info += [f"epoch [{self.epoch + 1}/{self.max_epoch}]"] + info += [f"batch [{self.batch_idx + 1}/{self.num_batches}]"] + info += [f"time {batch_time.val:.3f} ({batch_time.avg:.3f})"] + info += [f"data {data_time.val:.3f} ({data_time.avg:.3f})"] + info += [f"{losses}"] + info += [f"lr {self.get_current_lr():.4e}"] + info += [f"eta {eta}"] + print(" ".join(info)) + + n_iter = self.epoch * self.num_batches + self.batch_idx + for name, meter in losses.meters.items(): + self.write_scalar("train/" + name, meter.avg, n_iter) + self.write_scalar("train/lr", self.get_current_lr(), n_iter) + + end = time.time() + + def parse_batch_train(self, batch): + input = batch["img"] + label = batch["label"] + domain = batch["domain"] + + input = input.to(self.device) + label = label.to(self.device) + domain = domain.to(self.device) + + return input, label, domain diff --git a/python/ClipDetection/Dassl.pytorch/dassl/evaluation/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/evaluation/__init__.py new file mode 100644 index 00000000..59a024f5 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/evaluation/__init__.py @@ -0,0 +1,3 @@ +from .build import build_evaluator, EVALUATOR_REGISTRY # isort:skip + +from .evaluator import EvaluatorBase, Classification diff --git a/python/ClipDetection/Dassl.pytorch/dassl/evaluation/build.py b/python/ClipDetection/Dassl.pytorch/dassl/evaluation/build.py new file mode 100644 index 00000000..3132a3f4 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/evaluation/build.py @@ -0,0 +1,11 @@ +from dassl.utils import Registry, check_availability + +EVALUATOR_REGISTRY = Registry("EVALUATOR") + + +def build_evaluator(cfg, **kwargs): + avai_evaluators = EVALUATOR_REGISTRY.registered_names() + check_availability(cfg.TEST.EVALUATOR, avai_evaluators) + if cfg.VERBOSE: + print("Loading evaluator: {}".format(cfg.TEST.EVALUATOR)) + return EVALUATOR_REGISTRY.get(cfg.TEST.EVALUATOR)(cfg, **kwargs) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/evaluation/evaluator.py b/python/ClipDetection/Dassl.pytorch/dassl/evaluation/evaluator.py new file mode 100644 index 00000000..eef37975 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/evaluation/evaluator.py @@ -0,0 +1,125 @@ +import numpy as np +import os.path as osp +from collections import OrderedDict, defaultdict +import torch +from sklearn.metrics import f1_score, confusion_matrix + +from .build import EVALUATOR_REGISTRY + + +class EvaluatorBase: + """Base evaluator.""" + + def __init__(self, cfg): + self.cfg = cfg + + def reset(self): + raise NotImplementedError + + def process(self, mo, gt): + raise NotImplementedError + + def evaluate(self): + raise NotImplementedError + + +@EVALUATOR_REGISTRY.register() +class Classification(EvaluatorBase): + """Evaluator for classification.""" + + def __init__(self, cfg, lab2cname=None, **kwargs): + super().__init__(cfg) + self._lab2cname = lab2cname + self._correct = 0 + self._total = 0 + self._per_class_res = None + self._y_true = [] + self._y_pred = [] + if cfg.TEST.PER_CLASS_RESULT: + assert lab2cname is not None + self._per_class_res = defaultdict(list) + + def reset(self): + self._correct = 0 + self._total = 0 + self._y_true = [] + self._y_pred = [] + if self._per_class_res is not None: + self._per_class_res = defaultdict(list) + + def process(self, mo, gt): + # mo (torch.Tensor): model output [batch, num_classes] + # gt (torch.LongTensor): ground truth [batch] + pred = mo.max(1)[1] + matches = pred.eq(gt).float() + self._correct += int(matches.sum().item()) + self._total += gt.shape[0] + + self._y_true.extend(gt.data.cpu().numpy().tolist()) + self._y_pred.extend(pred.data.cpu().numpy().tolist()) + + if self._per_class_res is not None: + for i, label in enumerate(gt): + label = label.item() + matches_i = int(matches[i].item()) + self._per_class_res[label].append(matches_i) + + def evaluate(self): + results = OrderedDict() + acc = 100.0 * self._correct / self._total + err = 100.0 - acc + macro_f1 = 100.0 * f1_score( + self._y_true, + self._y_pred, + average="macro", + labels=np.unique(self._y_true) + ) + + # The first value will be returned by trainer.test() + results["accuracy"] = acc + results["error_rate"] = err + results["macro_f1"] = macro_f1 + + print( + "=> result\n" + f"* total: {self._total:,}\n" + f"* correct: {self._correct:,}\n" + f"* accuracy: {acc:.1f}%\n" + f"* error: {err:.1f}%\n" + f"* macro_f1: {macro_f1:.1f}%" + ) + + if self._per_class_res is not None: + labels = list(self._per_class_res.keys()) + labels.sort() + + print("=> per-class result") + accs = [] + + for label in labels: + classname = self._lab2cname[label] + res = self._per_class_res[label] + correct = sum(res) + total = len(res) + acc = 100.0 * correct / total + accs.append(acc) + print( + f"* class: {label} ({classname})\t" + f"total: {total:,}\t" + f"correct: {correct:,}\t" + f"acc: {acc:.1f}%" + ) + mean_acc = np.mean(accs) + print(f"* average: {mean_acc:.1f}%") + + results["perclass_accuracy"] = mean_acc + + if self.cfg.TEST.COMPUTE_CMAT: + cmat = confusion_matrix( + self._y_true, self._y_pred, normalize="true" + ) + save_path = osp.join(self.cfg.OUTPUT_DIR, "cmat.pt") + torch.save(cmat, save_path) + print(f"Confusion matrix is saved to {save_path}") + + return results diff --git a/python/ClipDetection/Dassl.pytorch/dassl/metrics/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/metrics/__init__.py new file mode 100644 index 00000000..c2b37de8 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/metrics/__init__.py @@ -0,0 +1,4 @@ +from .accuracy import compute_accuracy +from .distance import ( + cosine_distance, compute_distance_matrix, euclidean_squared_distance +) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/metrics/accuracy.py b/python/ClipDetection/Dassl.pytorch/dassl/metrics/accuracy.py new file mode 100644 index 00000000..a8ed0ae5 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/metrics/accuracy.py @@ -0,0 +1,30 @@ +def compute_accuracy(output, target, topk=(1, )): + """Computes the accuracy over the k top predictions for + the specified values of k. + + Args: + output (torch.Tensor): prediction matrix with shape (batch_size, num_classes). + target (torch.LongTensor): ground truth labels with shape (batch_size). + topk (tuple, optional): accuracy at top-k will be computed. For example, + topk=(1, 5) means accuracy at top-1 and top-5 will be computed. + + Returns: + list: accuracy at top-k. + """ + maxk = max(topk) + batch_size = target.size(0) + + if isinstance(output, (tuple, list)): + output = output[0] + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) + acc = correct_k.mul_(100.0 / batch_size) + res.append(acc) + + return res diff --git a/python/ClipDetection/Dassl.pytorch/dassl/metrics/distance.py b/python/ClipDetection/Dassl.pytorch/dassl/metrics/distance.py new file mode 100644 index 00000000..80568151 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/metrics/distance.py @@ -0,0 +1,77 @@ +""" +Source: https://github.com/KaiyangZhou/deep-person-reid +""" +import torch +from torch.nn import functional as F + + +def compute_distance_matrix(input1, input2, metric="euclidean"): + """A wrapper function for computing distance matrix. + + Each input matrix has the shape (n_data, feature_dim). + + Args: + input1 (torch.Tensor): 2-D feature matrix. + input2 (torch.Tensor): 2-D feature matrix. + metric (str, optional): "euclidean" or "cosine". + Default is "euclidean". + + Returns: + torch.Tensor: distance matrix. + """ + # check input + assert isinstance(input1, torch.Tensor) + assert isinstance(input2, torch.Tensor) + assert input1.dim() == 2, "Expected 2-D tensor, but got {}-D".format( + input1.dim() + ) + assert input2.dim() == 2, "Expected 2-D tensor, but got {}-D".format( + input2.dim() + ) + assert input1.size(1) == input2.size(1) + + if metric == "euclidean": + distmat = euclidean_squared_distance(input1, input2) + elif metric == "cosine": + distmat = cosine_distance(input1, input2) + else: + raise ValueError( + "Unknown distance metric: {}. " + 'Please choose either "euclidean" or "cosine"'.format(metric) + ) + + return distmat + + +def euclidean_squared_distance(input1, input2): + """Computes euclidean squared distance. + + Args: + input1 (torch.Tensor): 2-D feature matrix. + input2 (torch.Tensor): 2-D feature matrix. + + Returns: + torch.Tensor: distance matrix. + """ + m, n = input1.size(0), input2.size(0) + mat1 = torch.pow(input1, 2).sum(dim=1, keepdim=True).expand(m, n) + mat2 = torch.pow(input2, 2).sum(dim=1, keepdim=True).expand(n, m).t() + distmat = mat1 + mat2 + distmat.addmm_(1, -2, input1, input2.t()) + return distmat + + +def cosine_distance(input1, input2): + """Computes cosine distance. + + Args: + input1 (torch.Tensor): 2-D feature matrix. + input2 (torch.Tensor): 2-D feature matrix. + + Returns: + torch.Tensor: distance matrix. + """ + input1_normed = F.normalize(input1, p=2, dim=1) + input2_normed = F.normalize(input2, p=2, dim=1) + distmat = 1 - torch.mm(input1_normed, input2_normed.t()) + return distmat diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/__init__.py new file mode 100644 index 00000000..88466b9b --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/__init__.py @@ -0,0 +1,3 @@ +from .head import HEAD_REGISTRY, build_head +from .network import NETWORK_REGISTRY, build_network +from .backbone import BACKBONE_REGISTRY, Backbone, build_backbone diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/__init__.py new file mode 100644 index 00000000..8e6dc684 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/__init__.py @@ -0,0 +1,23 @@ +from .build import build_backbone, BACKBONE_REGISTRY # isort:skip +from .backbone import Backbone # isort:skip + +from .vgg import vgg16 +from .resnet import ( + resnet18, resnet34, resnet50, resnet101, resnet152, resnet18_ms_l1, + resnet50_ms_l1, resnet18_ms_l12, resnet50_ms_l12, resnet101_ms_l1, + resnet18_ms_l123, resnet50_ms_l123, resnet101_ms_l12, resnet101_ms_l123, + resnet18_efdmix_l1, resnet50_efdmix_l1, resnet18_efdmix_l12, + resnet50_efdmix_l12, resnet101_efdmix_l1, resnet18_efdmix_l123, + resnet50_efdmix_l123, resnet101_efdmix_l12, resnet101_efdmix_l123 +) +from .alexnet import alexnet +from .wide_resnet import wide_resnet_16_4, wide_resnet_28_2 +from .cnn_digitsdg import cnn_digitsdg +from .efficientnet import ( + efficientnet_b0, efficientnet_b1, efficientnet_b2, efficientnet_b3, + efficientnet_b4, efficientnet_b5, efficientnet_b6, efficientnet_b7 +) +from .resnet_dynamic import * +from .cnn_digitsingle import cnn_digitsingle +from .preact_resnet18 import preact_resnet18 +from .cnn_digit5_m3sda import cnn_digit5_m3sda diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/alexnet.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/alexnet.py new file mode 100644 index 00000000..2daff243 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/alexnet.py @@ -0,0 +1,64 @@ +import torch +import torch.nn as nn +import torch.utils.model_zoo as model_zoo + +from .build import BACKBONE_REGISTRY +from .backbone import Backbone + +model_urls = { + "alexnet": "https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth", +} + + +class AlexNet(Backbone): + + def __init__(self): + super().__init__() + self.features = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(64, 192, kernel_size=5, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(192, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(256, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + ) + self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) + # Note that self.classifier outputs features rather than logits + self.classifier = nn.Sequential( + nn.Dropout(), + nn.Linear(256 * 6 * 6, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + ) + + self._out_features = 4096 + + def forward(self, x): + x = self.features(x) + x = self.avgpool(x) + x = torch.flatten(x, 1) + return self.classifier(x) + + +def init_pretrained_weights(model, model_url): + pretrain_dict = model_zoo.load_url(model_url) + model.load_state_dict(pretrain_dict, strict=False) + + +@BACKBONE_REGISTRY.register() +def alexnet(pretrained=True, **kwargs): + model = AlexNet() + + if pretrained: + init_pretrained_weights(model, model_urls["alexnet"]) + + return model diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/backbone.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/backbone.py new file mode 100644 index 00000000..b544d945 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/backbone.py @@ -0,0 +1,17 @@ +import torch.nn as nn + + +class Backbone(nn.Module): + + def __init__(self): + super().__init__() + + def forward(self): + pass + + @property + def out_features(self): + """Output feature dimension.""" + if self.__dict__.get("_out_features") is None: + return None + return self._out_features diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/build.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/build.py new file mode 100644 index 00000000..61f4e4fe --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/build.py @@ -0,0 +1,11 @@ +from dassl.utils import Registry, check_availability + +BACKBONE_REGISTRY = Registry("BACKBONE") + + +def build_backbone(name, verbose=True, **kwargs): + avai_backbones = BACKBONE_REGISTRY.registered_names() + check_availability(name, avai_backbones) + if verbose: + print("Backbone: {}".format(name)) + return BACKBONE_REGISTRY.get(name)(**kwargs) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/cnn_digit5_m3sda.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/cnn_digit5_m3sda.py new file mode 100644 index 00000000..deabded8 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/cnn_digit5_m3sda.py @@ -0,0 +1,58 @@ +""" +Reference + +https://github.com/VisionLearningGroup/VisionLearningGroup.github.io/tree/master/M3SDA +""" +import torch.nn as nn +from torch.nn import functional as F + +from .build import BACKBONE_REGISTRY +from .backbone import Backbone + + +class FeatureExtractor(Backbone): + + def __init__(self): + super().__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=5, stride=1, padding=2) + self.bn1 = nn.BatchNorm2d(64) + self.conv2 = nn.Conv2d(64, 64, kernel_size=5, stride=1, padding=2) + self.bn2 = nn.BatchNorm2d(64) + self.conv3 = nn.Conv2d(64, 128, kernel_size=5, stride=1, padding=2) + self.bn3 = nn.BatchNorm2d(128) + self.fc1 = nn.Linear(8192, 3072) + self.bn1_fc = nn.BatchNorm1d(3072) + self.fc2 = nn.Linear(3072, 2048) + self.bn2_fc = nn.BatchNorm1d(2048) + + self._out_features = 2048 + + def _check_input(self, x): + H, W = x.shape[2:] + assert ( + H == 32 and W == 32 + ), "Input to network must be 32x32, " "but got {}x{}".format(H, W) + + def forward(self, x): + self._check_input(x) + x = F.relu(self.bn1(self.conv1(x))) + x = F.max_pool2d(x, stride=2, kernel_size=3, padding=1) + x = F.relu(self.bn2(self.conv2(x))) + x = F.max_pool2d(x, stride=2, kernel_size=3, padding=1) + x = F.relu(self.bn3(self.conv3(x))) + x = x.view(x.size(0), 8192) + x = F.relu(self.bn1_fc(self.fc1(x))) + x = F.dropout(x, training=self.training) + x = F.relu(self.bn2_fc(self.fc2(x))) + return x + + +@BACKBONE_REGISTRY.register() +def cnn_digit5_m3sda(**kwargs): + """ + This architecture was used for the Digit-5 dataset in: + + - Peng et al. Moment Matching for Multi-Source + Domain Adaptation. ICCV 2019. + """ + return FeatureExtractor() diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/cnn_digitsdg.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/cnn_digitsdg.py new file mode 100644 index 00000000..c68044f3 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/cnn_digitsdg.py @@ -0,0 +1,61 @@ +import torch.nn as nn +from torch.nn import functional as F + +from dassl.utils import init_network_weights + +from .build import BACKBONE_REGISTRY +from .backbone import Backbone + + +class Convolution(nn.Module): + + def __init__(self, c_in, c_out): + super().__init__() + self.conv = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1) + self.relu = nn.ReLU(True) + + def forward(self, x): + return self.relu(self.conv(x)) + + +class ConvNet(Backbone): + + def __init__(self, c_hidden=64): + super().__init__() + self.conv1 = Convolution(3, c_hidden) + self.conv2 = Convolution(c_hidden, c_hidden) + self.conv3 = Convolution(c_hidden, c_hidden) + self.conv4 = Convolution(c_hidden, c_hidden) + + self._out_features = 2**2 * c_hidden + + def _check_input(self, x): + H, W = x.shape[2:] + assert ( + H == 32 and W == 32 + ), "Input to network must be 32x32, " "but got {}x{}".format(H, W) + + def forward(self, x): + self._check_input(x) + x = self.conv1(x) + x = F.max_pool2d(x, 2) + x = self.conv2(x) + x = F.max_pool2d(x, 2) + x = self.conv3(x) + x = F.max_pool2d(x, 2) + x = self.conv4(x) + x = F.max_pool2d(x, 2) + return x.view(x.size(0), -1) + + +@BACKBONE_REGISTRY.register() +def cnn_digitsdg(**kwargs): + """ + This architecture was used for DigitsDG dataset in: + + - Zhou et al. Deep Domain-Adversarial Image Generation + for Domain Generalisation. AAAI 2020. + """ + model = ConvNet(c_hidden=64) + init_network_weights(model, init_type="kaiming") + return model diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/cnn_digitsingle.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/cnn_digitsingle.py new file mode 100644 index 00000000..0c5101ce --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/cnn_digitsingle.py @@ -0,0 +1,56 @@ +""" +This model is built based on +https://github.com/ricvolpi/generalize-unseen-domains/blob/master/model.py +""" +import torch.nn as nn +from torch.nn import functional as F + +from dassl.utils import init_network_weights + +from .build import BACKBONE_REGISTRY +from .backbone import Backbone + + +class CNN(Backbone): + + def __init__(self): + super().__init__() + self.conv1 = nn.Conv2d(3, 64, 5) + self.conv2 = nn.Conv2d(64, 128, 5) + self.fc3 = nn.Linear(5 * 5 * 128, 1024) + self.fc4 = nn.Linear(1024, 1024) + + self._out_features = 1024 + + def _check_input(self, x): + H, W = x.shape[2:] + assert ( + H == 32 and W == 32 + ), "Input to network must be 32x32, " "but got {}x{}".format(H, W) + + def forward(self, x): + self._check_input(x) + x = self.conv1(x) + x = F.relu(x) + x = F.max_pool2d(x, 2) + + x = self.conv2(x) + x = F.relu(x) + x = F.max_pool2d(x, 2) + + x = x.view(x.size(0), -1) + + x = self.fc3(x) + x = F.relu(x) + + x = self.fc4(x) + x = F.relu(x) + + return x + + +@BACKBONE_REGISTRY.register() +def cnn_digitsingle(**kwargs): + model = CNN() + init_network_weights(model, init_type="kaiming") + return model diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/efficientnet/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/efficientnet/__init__.py new file mode 100644 index 00000000..20ee4333 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/efficientnet/__init__.py @@ -0,0 +1,12 @@ +""" +Source: https://github.com/lukemelas/EfficientNet-PyTorch. +""" +__version__ = "0.6.4" +from .model import ( + EfficientNet, efficientnet_b0, efficientnet_b1, efficientnet_b2, + efficientnet_b3, efficientnet_b4, efficientnet_b5, efficientnet_b6, + efficientnet_b7 +) +from .utils import ( + BlockArgs, BlockDecoder, GlobalParams, efficientnet, get_model_params +) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/efficientnet/model.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/efficientnet/model.py new file mode 100755 index 00000000..ed01261d --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/efficientnet/model.py @@ -0,0 +1,371 @@ +import torch +from torch import nn +from torch.nn import functional as F + +from .utils import ( + Swish, MemoryEfficientSwish, drop_connect, round_filters, round_repeats, + get_model_params, efficientnet_params, get_same_padding_conv2d, + load_pretrained_weights, calculate_output_image_size +) +from ..build import BACKBONE_REGISTRY +from ..backbone import Backbone + + +class MBConvBlock(nn.Module): + """ + Mobile Inverted Residual Bottleneck Block + + Args: + block_args (namedtuple): BlockArgs, see above + global_params (namedtuple): GlobalParam, see above + + Attributes: + has_se (bool): Whether the block contains a Squeeze and Excitation layer. + """ + + def __init__(self, block_args, global_params, image_size=None): + super().__init__() + self._block_args = block_args + self._bn_mom = 1 - global_params.batch_norm_momentum + self._bn_eps = global_params.batch_norm_epsilon + self.has_se = (self._block_args.se_ratio is + not None) and (0 < self._block_args.se_ratio <= 1) + self.id_skip = block_args.id_skip # skip connection and drop connect + + # Expansion phase + inp = self._block_args.input_filters # number of input channels + oup = ( + self._block_args.input_filters * self._block_args.expand_ratio + ) # number of output channels + if self._block_args.expand_ratio != 1: + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._expand_conv = Conv2d( + in_channels=inp, out_channels=oup, kernel_size=1, bias=False + ) + self._bn0 = nn.BatchNorm2d( + num_features=oup, momentum=self._bn_mom, eps=self._bn_eps + ) + # image_size = calculate_output_image_size(image_size, 1) <-- this would do nothing + + # Depthwise convolution phase + k = self._block_args.kernel_size + s = self._block_args.stride + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._depthwise_conv = Conv2d( + in_channels=oup, + out_channels=oup, + groups=oup, # groups makes it depthwise + kernel_size=k, + stride=s, + bias=False, + ) + self._bn1 = nn.BatchNorm2d( + num_features=oup, momentum=self._bn_mom, eps=self._bn_eps + ) + image_size = calculate_output_image_size(image_size, s) + + # Squeeze and Excitation layer, if desired + if self.has_se: + Conv2d = get_same_padding_conv2d(image_size=(1, 1)) + num_squeezed_channels = max( + 1, + int( + self._block_args.input_filters * self._block_args.se_ratio + ) + ) + self._se_reduce = Conv2d( + in_channels=oup, + out_channels=num_squeezed_channels, + kernel_size=1 + ) + self._se_expand = Conv2d( + in_channels=num_squeezed_channels, + out_channels=oup, + kernel_size=1 + ) + + # Output phase + final_oup = self._block_args.output_filters + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._project_conv = Conv2d( + in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False + ) + self._bn2 = nn.BatchNorm2d( + num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps + ) + self._swish = MemoryEfficientSwish() + + def forward(self, inputs, drop_connect_rate=None): + """ + :param inputs: input tensor + :param drop_connect_rate: drop connect rate (float, between 0 and 1) + :return: output of block + """ + + # Expansion and Depthwise Convolution + x = inputs + if self._block_args.expand_ratio != 1: + x = self._swish(self._bn0(self._expand_conv(inputs))) + x = self._swish(self._bn1(self._depthwise_conv(x))) + + # Squeeze and Excitation + if self.has_se: + x_squeezed = F.adaptive_avg_pool2d(x, 1) + x_squeezed = self._se_expand( + self._swish(self._se_reduce(x_squeezed)) + ) + x = torch.sigmoid(x_squeezed) * x + + x = self._bn2(self._project_conv(x)) + + # Skip connection and drop connect + input_filters, output_filters = ( + self._block_args.input_filters, + self._block_args.output_filters, + ) + if ( + self.id_skip and self._block_args.stride == 1 + and input_filters == output_filters + ): + if drop_connect_rate: + x = drop_connect( + x, p=drop_connect_rate, training=self.training + ) + x = x + inputs # skip connection + return x + + def set_swish(self, memory_efficient=True): + """Sets swish function as memory efficient (for training) or standard (for export)""" + self._swish = MemoryEfficientSwish() if memory_efficient else Swish() + + +class EfficientNet(Backbone): + """ + An EfficientNet model. Most easily loaded with the .from_name or .from_pretrained methods + + Args: + blocks_args (list): A list of BlockArgs to construct blocks + global_params (namedtuple): A set of GlobalParams shared between blocks + + Example: + model = EfficientNet.from_pretrained('efficientnet-b0') + + """ + + def __init__(self, blocks_args=None, global_params=None): + super().__init__() + assert isinstance(blocks_args, list), "blocks_args should be a list" + assert len(blocks_args) > 0, "block args must be greater than 0" + self._global_params = global_params + self._blocks_args = blocks_args + + # Batch norm parameters + bn_mom = 1 - self._global_params.batch_norm_momentum + bn_eps = self._global_params.batch_norm_epsilon + + # Get stem static or dynamic convolution depending on image size + image_size = global_params.image_size + Conv2d = get_same_padding_conv2d(image_size=global_params.image_size) + + # Stem + in_channels = 3 # rgb + out_channels = round_filters( + 32, self._global_params + ) # number of output channels + self._conv_stem = Conv2d( + in_channels, out_channels, kernel_size=3, stride=2, bias=False + ) + self._bn0 = nn.BatchNorm2d( + num_features=out_channels, momentum=bn_mom, eps=bn_eps + ) + image_size = calculate_output_image_size(image_size, 2) + + # Build blocks + self._blocks = nn.ModuleList([]) + for block_args in self._blocks_args: + + # Update block input and output filters based on depth multiplier. + block_args = block_args._replace( + input_filters=round_filters( + block_args.input_filters, self._global_params + ), + output_filters=round_filters( + block_args.output_filters, self._global_params + ), + num_repeat=round_repeats( + block_args.num_repeat, self._global_params + ), + ) + + # The first block needs to take care of stride and filter size increase. + self._blocks.append( + MBConvBlock( + block_args, self._global_params, image_size=image_size + ) + ) + image_size = calculate_output_image_size( + image_size, block_args.stride + ) + if block_args.num_repeat > 1: + block_args = block_args._replace( + input_filters=block_args.output_filters, stride=1 + ) + for _ in range(block_args.num_repeat - 1): + self._blocks.append( + MBConvBlock( + block_args, self._global_params, image_size=image_size + ) + ) + # image_size = calculate_output_image_size(image_size, block_args.stride) # ? + + # Head + in_channels = block_args.output_filters # output of final block + out_channels = round_filters(1280, self._global_params) + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._conv_head = Conv2d( + in_channels, out_channels, kernel_size=1, bias=False + ) + self._bn1 = nn.BatchNorm2d( + num_features=out_channels, momentum=bn_mom, eps=bn_eps + ) + + # Final linear layer + self._avg_pooling = nn.AdaptiveAvgPool2d(1) + self._dropout = nn.Dropout(self._global_params.dropout_rate) + # self._fc = nn.Linear(out_channels, self._global_params.num_classes) + self._swish = MemoryEfficientSwish() + + self._out_features = out_channels + + def set_swish(self, memory_efficient=True): + """Sets swish function as memory efficient (for training) or standard (for export)""" + self._swish = MemoryEfficientSwish() if memory_efficient else Swish() + for block in self._blocks: + block.set_swish(memory_efficient) + + def extract_features(self, inputs): + """Returns output of the final convolution layer""" + + # Stem + x = self._swish(self._bn0(self._conv_stem(inputs))) + + # Blocks + for idx, block in enumerate(self._blocks): + drop_connect_rate = self._global_params.drop_connect_rate + if drop_connect_rate: + drop_connect_rate *= float(idx) / len(self._blocks) + x = block(x, drop_connect_rate=drop_connect_rate) + + # Head + x = self._swish(self._bn1(self._conv_head(x))) + + return x + + def forward(self, inputs): + """ + Calls extract_features to extract features, applies + final linear layer, and returns logits. + """ + bs = inputs.size(0) + # Convolution layers + x = self.extract_features(inputs) + + # Pooling and final linear layer + x = self._avg_pooling(x) + x = x.view(bs, -1) + x = self._dropout(x) + # x = self._fc(x) + return x + + @classmethod + def from_name(cls, model_name, override_params=None): + cls._check_model_name_is_valid(model_name) + blocks_args, global_params = get_model_params( + model_name, override_params + ) + return cls(blocks_args, global_params) + + @classmethod + def from_pretrained( + cls, model_name, advprop=False, num_classes=1000, in_channels=3 + ): + model = cls.from_name( + model_name, override_params={"num_classes": num_classes} + ) + load_pretrained_weights( + model, model_name, load_fc=(num_classes == 1000), advprop=advprop + ) + model._change_in_channels(in_channels) + return model + + @classmethod + def get_image_size(cls, model_name): + cls._check_model_name_is_valid(model_name) + _, _, res, _ = efficientnet_params(model_name) + return res + + @classmethod + def _check_model_name_is_valid(cls, model_name): + """Validates model name.""" + valid_models = ["efficientnet-b" + str(i) for i in range(9)] + if model_name not in valid_models: + raise ValueError( + "model_name should be one of: " + ", ".join(valid_models) + ) + + def _change_in_channels(model, in_channels): + if in_channels != 3: + Conv2d = get_same_padding_conv2d( + image_size=model._global_params.image_size + ) + out_channels = round_filters(32, model._global_params) + model._conv_stem = Conv2d( + in_channels, out_channels, kernel_size=3, stride=2, bias=False + ) + + +def build_efficientnet(name, pretrained): + if pretrained: + return EfficientNet.from_pretrained("efficientnet-{}".format(name)) + else: + return EfficientNet.from_name("efficientnet-{}".format(name)) + + +@BACKBONE_REGISTRY.register() +def efficientnet_b0(pretrained=True, **kwargs): + return build_efficientnet("b0", pretrained) + + +@BACKBONE_REGISTRY.register() +def efficientnet_b1(pretrained=True, **kwargs): + return build_efficientnet("b1", pretrained) + + +@BACKBONE_REGISTRY.register() +def efficientnet_b2(pretrained=True, **kwargs): + return build_efficientnet("b2", pretrained) + + +@BACKBONE_REGISTRY.register() +def efficientnet_b3(pretrained=True, **kwargs): + return build_efficientnet("b3", pretrained) + + +@BACKBONE_REGISTRY.register() +def efficientnet_b4(pretrained=True, **kwargs): + return build_efficientnet("b4", pretrained) + + +@BACKBONE_REGISTRY.register() +def efficientnet_b5(pretrained=True, **kwargs): + return build_efficientnet("b5", pretrained) + + +@BACKBONE_REGISTRY.register() +def efficientnet_b6(pretrained=True, **kwargs): + return build_efficientnet("b6", pretrained) + + +@BACKBONE_REGISTRY.register() +def efficientnet_b7(pretrained=True, **kwargs): + return build_efficientnet("b7", pretrained) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/efficientnet/utils.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/efficientnet/utils.py new file mode 100755 index 00000000..a4205061 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/efficientnet/utils.py @@ -0,0 +1,477 @@ +""" +This file contains helper functions for building the model and for loading model parameters. +These helper functions are built to mirror those in the official TensorFlow implementation. +""" + +import re +import math +import collections +from functools import partial +import torch +from torch import nn +from torch.nn import functional as F +from torch.utils import model_zoo + +######################################################################## +############### HELPERS FUNCTIONS FOR MODEL ARCHITECTURE ############### +######################################################################## + +# Parameters for the entire model (stem, all blocks, and head) +GlobalParams = collections.namedtuple( + "GlobalParams", + [ + "batch_norm_momentum", + "batch_norm_epsilon", + "dropout_rate", + "num_classes", + "width_coefficient", + "depth_coefficient", + "depth_divisor", + "min_depth", + "drop_connect_rate", + "image_size", + ], +) + +# Parameters for an individual model block +BlockArgs = collections.namedtuple( + "BlockArgs", + [ + "kernel_size", + "num_repeat", + "input_filters", + "output_filters", + "expand_ratio", + "id_skip", + "stride", + "se_ratio", + ], +) + +# Change namedtuple defaults +GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) +BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) + + +class SwishImplementation(torch.autograd.Function): + + @staticmethod + def forward(ctx, i): + result = i * torch.sigmoid(i) + ctx.save_for_backward(i) + return result + + @staticmethod + def backward(ctx, grad_output): + i = ctx.saved_variables[0] + sigmoid_i = torch.sigmoid(i) + return grad_output * (sigmoid_i * (1 + i * (1-sigmoid_i))) + + +class MemoryEfficientSwish(nn.Module): + + def forward(self, x): + return SwishImplementation.apply(x) + + +class Swish(nn.Module): + + def forward(self, x): + return x * torch.sigmoid(x) + + +def round_filters(filters, global_params): + """Calculate and round number of filters based on depth multiplier.""" + multiplier = global_params.width_coefficient + if not multiplier: + return filters + divisor = global_params.depth_divisor + min_depth = global_params.min_depth + filters *= multiplier + min_depth = min_depth or divisor + new_filters = max(min_depth, int(filters + divisor/2) // divisor * divisor) + if new_filters < 0.9 * filters: # prevent rounding by more than 10% + new_filters += divisor + return int(new_filters) + + +def round_repeats(repeats, global_params): + """Round number of filters based on depth multiplier.""" + multiplier = global_params.depth_coefficient + if not multiplier: + return repeats + return int(math.ceil(multiplier * repeats)) + + +def drop_connect(inputs, p, training): + """Drop connect.""" + if not training: + return inputs + batch_size = inputs.shape[0] + keep_prob = 1 - p + random_tensor = keep_prob + random_tensor += torch.rand( + [batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device + ) + binary_tensor = torch.floor(random_tensor) + output = inputs / keep_prob * binary_tensor + return output + + +def get_same_padding_conv2d(image_size=None): + """Chooses static padding if you have specified an image size, and dynamic padding otherwise. + Static padding is necessary for ONNX exporting of models.""" + if image_size is None: + return Conv2dDynamicSamePadding + else: + return partial(Conv2dStaticSamePadding, image_size=image_size) + + +def get_width_and_height_from_size(x): + """Obtains width and height from a int or tuple""" + if isinstance(x, int): + return x, x + if isinstance(x, list) or isinstance(x, tuple): + return x + else: + raise TypeError() + + +def calculate_output_image_size(input_image_size, stride): + """ + Calculates the output image size when using Conv2dSamePadding with a stride. + Necessary for static padding. Thanks to mannatsingh for pointing this out. + """ + if input_image_size is None: + return None + image_height, image_width = get_width_and_height_from_size( + input_image_size + ) + stride = stride if isinstance(stride, int) else stride[0] + image_height = int(math.ceil(image_height / stride)) + image_width = int(math.ceil(image_width / stride)) + return [image_height, image_width] + + +class Conv2dDynamicSamePadding(nn.Conv2d): + """2D Convolutions like TensorFlow, for a dynamic image size""" + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + dilation=1, + groups=1, + bias=True, + ): + super().__init__( + in_channels, out_channels, kernel_size, stride, 0, dilation, + groups, bias + ) + self.stride = self.stride if len(self.stride + ) == 2 else [self.stride[0]] * 2 + + def forward(self, x): + ih, iw = x.size()[-2:] + kh, kw = self.weight.size()[-2:] + sh, sw = self.stride + oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) + pad_h = max( + (oh-1) * self.stride[0] + (kh-1) * self.dilation[0] + 1 - ih, 0 + ) + pad_w = max( + (ow-1) * self.stride[1] + (kw-1) * self.dilation[1] + 1 - iw, 0 + ) + if pad_h > 0 or pad_w > 0: + x = F.pad( + x, + [pad_w // 2, pad_w - pad_w//2, pad_h // 2, pad_h - pad_h//2] + ) + return F.conv2d( + x, + self.weight, + self.bias, + self.stride, + self.padding, + self.dilation, + self.groups, + ) + + +class Conv2dStaticSamePadding(nn.Conv2d): + """2D Convolutions like TensorFlow, for a fixed image size""" + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + image_size=None, + **kwargs + ): + super().__init__(in_channels, out_channels, kernel_size, **kwargs) + self.stride = self.stride if len(self.stride + ) == 2 else [self.stride[0]] * 2 + + # Calculate padding based on image size and save it + assert image_size is not None + ih, iw = (image_size, + image_size) if isinstance(image_size, int) else image_size + kh, kw = self.weight.size()[-2:] + sh, sw = self.stride + oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) + pad_h = max( + (oh-1) * self.stride[0] + (kh-1) * self.dilation[0] + 1 - ih, 0 + ) + pad_w = max( + (ow-1) * self.stride[1] + (kw-1) * self.dilation[1] + 1 - iw, 0 + ) + if pad_h > 0 or pad_w > 0: + self.static_padding = nn.ZeroPad2d( + (pad_w // 2, pad_w - pad_w//2, pad_h // 2, pad_h - pad_h//2) + ) + else: + self.static_padding = Identity() + + def forward(self, x): + x = self.static_padding(x) + x = F.conv2d( + x, + self.weight, + self.bias, + self.stride, + self.padding, + self.dilation, + self.groups, + ) + return x + + +class Identity(nn.Module): + + def __init__(self, ): + super(Identity, self).__init__() + + def forward(self, input): + return input + + +######################################################################## +############## HELPERS FUNCTIONS FOR LOADING MODEL PARAMS ############## +######################################################################## + + +def efficientnet_params(model_name): + """Map EfficientNet model name to parameter coefficients.""" + params_dict = { + # Coefficients: width,depth,res,dropout + "efficientnet-b0": (1.0, 1.0, 224, 0.2), + "efficientnet-b1": (1.0, 1.1, 240, 0.2), + "efficientnet-b2": (1.1, 1.2, 260, 0.3), + "efficientnet-b3": (1.2, 1.4, 300, 0.3), + "efficientnet-b4": (1.4, 1.8, 380, 0.4), + "efficientnet-b5": (1.6, 2.2, 456, 0.4), + "efficientnet-b6": (1.8, 2.6, 528, 0.5), + "efficientnet-b7": (2.0, 3.1, 600, 0.5), + "efficientnet-b8": (2.2, 3.6, 672, 0.5), + "efficientnet-l2": (4.3, 5.3, 800, 0.5), + } + return params_dict[model_name] + + +class BlockDecoder(object): + """Block Decoder for readability, straight from the official TensorFlow repository""" + + @staticmethod + def _decode_block_string(block_string): + """Gets a block through a string notation of arguments.""" + assert isinstance(block_string, str) + + ops = block_string.split("_") + options = {} + for op in ops: + splits = re.split(r"(\d.*)", op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value + + # Check stride + assert ("s" in options and len(options["s"]) == 1) or ( + len(options["s"]) == 2 and options["s"][0] == options["s"][1] + ) + + return BlockArgs( + kernel_size=int(options["k"]), + num_repeat=int(options["r"]), + input_filters=int(options["i"]), + output_filters=int(options["o"]), + expand_ratio=int(options["e"]), + id_skip=("noskip" not in block_string), + se_ratio=float(options["se"]) if "se" in options else None, + stride=[int(options["s"][0])], + ) + + @staticmethod + def _encode_block_string(block): + """Encodes a block to a string.""" + args = [ + "r%d" % block.num_repeat, + "k%d" % block.kernel_size, + "s%d%d" % (block.strides[0], block.strides[1]), + "e%s" % block.expand_ratio, + "i%d" % block.input_filters, + "o%d" % block.output_filters, + ] + if 0 < block.se_ratio <= 1: + args.append("se%s" % block.se_ratio) + if block.id_skip is False: + args.append("noskip") + return "_".join(args) + + @staticmethod + def decode(string_list): + """ + Decodes a list of string notations to specify blocks inside the network. + + :param string_list: a list of strings, each string is a notation of block + :return: a list of BlockArgs namedtuples of block args + """ + assert isinstance(string_list, list) + blocks_args = [] + for block_string in string_list: + blocks_args.append(BlockDecoder._decode_block_string(block_string)) + return blocks_args + + @staticmethod + def encode(blocks_args): + """ + Encodes a list of BlockArgs to a list of strings. + + :param blocks_args: a list of BlockArgs namedtuples of block args + :return: a list of strings, each string is a notation of block + """ + block_strings = [] + for block in blocks_args: + block_strings.append(BlockDecoder._encode_block_string(block)) + return block_strings + + +def efficientnet( + width_coefficient=None, + depth_coefficient=None, + dropout_rate=0.2, + drop_connect_rate=0.2, + image_size=None, + num_classes=1000, +): + """Creates a efficientnet model.""" + + blocks_args = [ + "r1_k3_s11_e1_i32_o16_se0.25", + "r2_k3_s22_e6_i16_o24_se0.25", + "r2_k5_s22_e6_i24_o40_se0.25", + "r3_k3_s22_e6_i40_o80_se0.25", + "r3_k5_s11_e6_i80_o112_se0.25", + "r4_k5_s22_e6_i112_o192_se0.25", + "r1_k3_s11_e6_i192_o320_se0.25", + ] + blocks_args = BlockDecoder.decode(blocks_args) + + global_params = GlobalParams( + batch_norm_momentum=0.99, + batch_norm_epsilon=1e-3, + dropout_rate=dropout_rate, + drop_connect_rate=drop_connect_rate, + # data_format='channels_last', # removed, this is always true in PyTorch + num_classes=num_classes, + width_coefficient=width_coefficient, + depth_coefficient=depth_coefficient, + depth_divisor=8, + min_depth=None, + image_size=image_size, + ) + + return blocks_args, global_params + + +def get_model_params(model_name, override_params): + """Get the block args and global params for a given model""" + if model_name.startswith("efficientnet"): + w, d, s, p = efficientnet_params(model_name) + # note: all models have drop connect rate = 0.2 + blocks_args, global_params = efficientnet( + width_coefficient=w, + depth_coefficient=d, + dropout_rate=p, + image_size=s + ) + else: + raise NotImplementedError( + "model name is not pre-defined: %s" % model_name + ) + if override_params: + # ValueError will be raised here if override_params has fields not included in global_params. + global_params = global_params._replace(**override_params) + return blocks_args, global_params + + +url_map = { + "efficientnet-b0": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth", + "efficientnet-b1": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth", + "efficientnet-b2": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth", + "efficientnet-b3": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth", + "efficientnet-b4": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth", + "efficientnet-b5": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth", + "efficientnet-b6": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth", + "efficientnet-b7": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth", +} + +url_map_advprop = { + "efficientnet-b0": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b0-b64d5a18.pth", + "efficientnet-b1": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b1-0f3ce85a.pth", + "efficientnet-b2": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b2-6e9d97e5.pth", + "efficientnet-b3": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b3-cdd7c0f4.pth", + "efficientnet-b4": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b4-44fb3a87.pth", + "efficientnet-b5": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b5-86493f6b.pth", + "efficientnet-b6": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b6-ac80338e.pth", + "efficientnet-b7": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b7-4652b6dd.pth", + "efficientnet-b8": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b8-22a8fe65.pth", +} + + +def load_pretrained_weights(model, model_name, load_fc=True, advprop=False): + """Loads pretrained weights, and downloads if loading for the first time.""" + # AutoAugment or Advprop (different preprocessing) + url_map_ = url_map_advprop if advprop else url_map + state_dict = model_zoo.load_url(url_map_[model_name]) + model.load_state_dict(state_dict, strict=False) + """ + if load_fc: + model.load_state_dict(state_dict) + else: + state_dict.pop('_fc.weight') + state_dict.pop('_fc.bias') + res = model.load_state_dict(state_dict, strict=False) + assert set(res.missing_keys) == set(['_fc.weight', '_fc.bias']), 'issue loading pretrained weights' + + print('Loaded pretrained weights for {}'.format(model_name)) + """ diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/preact_resnet18.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/preact_resnet18.py new file mode 100644 index 00000000..8c070899 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/preact_resnet18.py @@ -0,0 +1,135 @@ +import torch.nn as nn +import torch.nn.functional as F + +from .build import BACKBONE_REGISTRY +from .backbone import Backbone + + +class PreActBlock(nn.Module): + expansion = 1 + + def __init__(self, in_planes, planes, stride=1): + super().__init__() + self.bn1 = nn.BatchNorm2d(in_planes) + self.conv1 = nn.Conv2d( + in_planes, + planes, + kernel_size=3, + stride=stride, + padding=1, + bias=False + ) + self.bn2 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d( + planes, planes, kernel_size=3, stride=1, padding=1, bias=False + ) + + if stride != 1 or in_planes != self.expansion * planes: + self.shortcut = nn.Sequential( + nn.Conv2d( + in_planes, + self.expansion * planes, + kernel_size=1, + stride=stride, + bias=False, + ) + ) + + def forward(self, x): + out = F.relu(self.bn1(x)) + shortcut = self.shortcut(out) if hasattr(self, "shortcut") else x + out = self.conv1(out) + out = self.conv2(F.relu(self.bn2(out))) + out += shortcut + return out + + +class PreActBottleneck(nn.Module): + expansion = 4 + + def __init__(self, in_planes, planes, stride=1): + super().__init__() + self.bn1 = nn.BatchNorm2d(in_planes) + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d( + planes, + planes, + kernel_size=3, + stride=stride, + padding=1, + bias=False + ) + self.bn3 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d( + planes, self.expansion * planes, kernel_size=1, bias=False + ) + + if stride != 1 or in_planes != self.expansion * planes: + self.shortcut = nn.Sequential( + nn.Conv2d( + in_planes, + self.expansion * planes, + kernel_size=1, + stride=stride, + bias=False, + ) + ) + + def forward(self, x): + out = F.relu(self.bn1(x)) + shortcut = self.shortcut(out) if hasattr(self, "shortcut") else x + out = self.conv1(out) + out = self.conv2(F.relu(self.bn2(out))) + out = self.conv3(F.relu(self.bn3(out))) + out += shortcut + return out + + +class PreActResNet(Backbone): + + def __init__(self, block, num_blocks): + super().__init__() + self.in_planes = 64 + + self.conv1 = nn.Conv2d( + 3, 64, kernel_size=3, stride=1, padding=1, bias=False + ) + self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) + self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) + self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) + self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) + + self._out_features = 512 * block.expansion + + def _make_layer(self, block, planes, num_blocks, stride): + strides = [stride] + [1] * (num_blocks-1) + layers = [] + for stride in strides: + layers.append(block(self.in_planes, planes, stride)) + self.in_planes = planes * block.expansion + return nn.Sequential(*layers) + + def forward(self, x): + out = self.conv1(x) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + out = F.avg_pool2d(out, 4) + out = out.view(out.size(0), -1) + return out + + +""" +Preact-ResNet18 was used for the CIFAR10 and +SVHN datasets (both are SSL tasks) in + +- Wang et al. Semi-Supervised Learning by +Augmented Distribution Alignment. ICCV 2019. +""" + + +@BACKBONE_REGISTRY.register() +def preact_resnet18(**kwargs): + return PreActResNet(PreActBlock, [2, 2, 2, 2]) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/resnet.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/resnet.py new file mode 100644 index 00000000..60b9a8c8 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/resnet.py @@ -0,0 +1,591 @@ +import torch.nn as nn +import torch.utils.model_zoo as model_zoo + +from .build import BACKBONE_REGISTRY +from .backbone import Backbone + +model_urls = { + "resnet18": "https://download.pytorch.org/models/resnet18-5c106cde.pth", + "resnet34": "https://download.pytorch.org/models/resnet34-333f7ec4.pth", + "resnet50": "https://download.pytorch.org/models/resnet50-19c8e357.pth", + "resnet101": "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth", + "resnet152": "https://download.pytorch.org/models/resnet152-b121ed2d.pth", +} + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=1, + bias=False + ) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super().__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super().__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d( + planes, + planes, + kernel_size=3, + stride=stride, + padding=1, + bias=False + ) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d( + planes, planes * self.expansion, kernel_size=1, bias=False + ) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(Backbone): + + def __init__( + self, + block, + layers, + ms_class=None, + ms_layers=[], + ms_p=0.5, + ms_a=0.1, + **kwargs + ): + self.inplanes = 64 + super().__init__() + + # backbone network + self.conv1 = nn.Conv2d( + 3, 64, kernel_size=7, stride=2, padding=3, bias=False + ) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.global_avgpool = nn.AdaptiveAvgPool2d(1) + + self._out_features = 512 * block.expansion + + self.mixstyle = None + if ms_layers: + self.mixstyle = ms_class(p=ms_p, alpha=ms_a) + for layer_name in ms_layers: + assert layer_name in ["layer1", "layer2", "layer3"] + print( + f"Insert {self.mixstyle.__class__.__name__} after {ms_layers}" + ) + self.ms_layers = ms_layers + + self._init_params() + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + self.inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False, + ), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def _init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_( + m.weight, mode="fan_out", nonlinearity="relu" + ) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + def featuremaps(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + x = self.layer1(x) + if "layer1" in self.ms_layers: + x = self.mixstyle(x) + x = self.layer2(x) + if "layer2" in self.ms_layers: + x = self.mixstyle(x) + x = self.layer3(x) + if "layer3" in self.ms_layers: + x = self.mixstyle(x) + return self.layer4(x) + + def forward(self, x): + f = self.featuremaps(x) + v = self.global_avgpool(f) + return v.view(v.size(0), -1) + + +def init_pretrained_weights(model, model_url): + pretrain_dict = model_zoo.load_url(model_url) + model.load_state_dict(pretrain_dict, strict=False) + + +""" +Residual network configurations: +-- +resnet18: block=BasicBlock, layers=[2, 2, 2, 2] +resnet34: block=BasicBlock, layers=[3, 4, 6, 3] +resnet50: block=Bottleneck, layers=[3, 4, 6, 3] +resnet101: block=Bottleneck, layers=[3, 4, 23, 3] +resnet152: block=Bottleneck, layers=[3, 8, 36, 3] +""" + + +@BACKBONE_REGISTRY.register() +def resnet18(pretrained=True, **kwargs): + model = ResNet(block=BasicBlock, layers=[2, 2, 2, 2]) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet18"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet34(pretrained=True, **kwargs): + model = ResNet(block=BasicBlock, layers=[3, 4, 6, 3]) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet34"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet50(pretrained=True, **kwargs): + model = ResNet(block=Bottleneck, layers=[3, 4, 6, 3]) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet50"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet101(pretrained=True, **kwargs): + model = ResNet(block=Bottleneck, layers=[3, 4, 23, 3]) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet101"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet152(pretrained=True, **kwargs): + model = ResNet(block=Bottleneck, layers=[3, 8, 36, 3]) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet152"]) + + return model + + +""" +Residual networks with mixstyle +""" + + +@BACKBONE_REGISTRY.register() +def resnet18_ms_l123(pretrained=True, **kwargs): + from dassl.modeling.ops import MixStyle + + model = ResNet( + block=BasicBlock, + layers=[2, 2, 2, 2], + ms_class=MixStyle, + ms_layers=["layer1", "layer2", "layer3"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet18"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet18_ms_l12(pretrained=True, **kwargs): + from dassl.modeling.ops import MixStyle + + model = ResNet( + block=BasicBlock, + layers=[2, 2, 2, 2], + ms_class=MixStyle, + ms_layers=["layer1", "layer2"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet18"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet18_ms_l1(pretrained=True, **kwargs): + from dassl.modeling.ops import MixStyle + + model = ResNet( + block=BasicBlock, + layers=[2, 2, 2, 2], + ms_class=MixStyle, + ms_layers=["layer1"] + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet18"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_ms_l123(pretrained=True, **kwargs): + from dassl.modeling.ops import MixStyle + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 6, 3], + ms_class=MixStyle, + ms_layers=["layer1", "layer2", "layer3"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet50"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_ms_l12(pretrained=True, **kwargs): + from dassl.modeling.ops import MixStyle + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 6, 3], + ms_class=MixStyle, + ms_layers=["layer1", "layer2"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet50"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_ms_l1(pretrained=True, **kwargs): + from dassl.modeling.ops import MixStyle + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 6, 3], + ms_class=MixStyle, + ms_layers=["layer1"] + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet50"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_ms_l123(pretrained=True, **kwargs): + from dassl.modeling.ops import MixStyle + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 23, 3], + ms_class=MixStyle, + ms_layers=["layer1", "layer2", "layer3"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet101"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_ms_l12(pretrained=True, **kwargs): + from dassl.modeling.ops import MixStyle + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 23, 3], + ms_class=MixStyle, + ms_layers=["layer1", "layer2"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet101"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_ms_l1(pretrained=True, **kwargs): + from dassl.modeling.ops import MixStyle + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 23, 3], + ms_class=MixStyle, + ms_layers=["layer1"] + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet101"]) + + return model + + +""" +Residual networks with efdmix +""" + + +@BACKBONE_REGISTRY.register() +def resnet18_efdmix_l123(pretrained=True, **kwargs): + from dassl.modeling.ops import EFDMix + + model = ResNet( + block=BasicBlock, + layers=[2, 2, 2, 2], + ms_class=EFDMix, + ms_layers=["layer1", "layer2", "layer3"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet18"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet18_efdmix_l12(pretrained=True, **kwargs): + from dassl.modeling.ops import EFDMix + + model = ResNet( + block=BasicBlock, + layers=[2, 2, 2, 2], + ms_class=EFDMix, + ms_layers=["layer1", "layer2"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet18"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet18_efdmix_l1(pretrained=True, **kwargs): + from dassl.modeling.ops import EFDMix + + model = ResNet( + block=BasicBlock, + layers=[2, 2, 2, 2], + ms_class=EFDMix, + ms_layers=["layer1"] + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet18"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_efdmix_l123(pretrained=True, **kwargs): + from dassl.modeling.ops import EFDMix + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 6, 3], + ms_class=EFDMix, + ms_layers=["layer1", "layer2", "layer3"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet50"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_efdmix_l12(pretrained=True, **kwargs): + from dassl.modeling.ops import EFDMix + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 6, 3], + ms_class=EFDMix, + ms_layers=["layer1", "layer2"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet50"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_efdmix_l1(pretrained=True, **kwargs): + from dassl.modeling.ops import EFDMix + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 6, 3], + ms_class=EFDMix, + ms_layers=["layer1"] + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet50"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_efdmix_l123(pretrained=True, **kwargs): + from dassl.modeling.ops import EFDMix + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 23, 3], + ms_class=EFDMix, + ms_layers=["layer1", "layer2", "layer3"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet101"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_efdmix_l12(pretrained=True, **kwargs): + from dassl.modeling.ops import EFDMix + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 23, 3], + ms_class=EFDMix, + ms_layers=["layer1", "layer2"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet101"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_efdmix_l1(pretrained=True, **kwargs): + from dassl.modeling.ops import EFDMix + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 23, 3], + ms_class=EFDMix, + ms_layers=["layer1"] + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet101"]) + + return model diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/resnet_dynamic.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/resnet_dynamic.py new file mode 100644 index 00000000..c4e08ded --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/resnet_dynamic.py @@ -0,0 +1,734 @@ +""" +Dynamic ResNet from `"Dynamic Domain Generalization" `_. +""" + +from typing import Any, List, Type, Union, Callable, Optional +from collections import OrderedDict +import torch +import torch.nn as nn +from torch import Tensor +from torch.hub import load_state_dict_from_url + +from dassl.modeling.ops import MixStyle, Conv2dDynamic + +from .build import BACKBONE_REGISTRY +from .backbone import Backbone + +__all__ = [ + "resnet18_dynamic", "resnet50_dynamic", "resnet101_dynamic", + "resnet18_dynamic_ms_l123", "resnet18_dynamic_ms_l12", + "resnet18_dynamic_ms_l1", "resnet50_dynamic_ms_l123", + "resnet50_dynamic_ms_l12", "resnet50_dynamic_ms_l1", + "resnet101_dynamic_ms_l123", "resnet101_dynamic_ms_l12", + "resnet101_dynamic_ms_l1" +] + +model_urls = { + "resnet18_dynamic": + "https://csip.fzu.edu.cn/files/models/resnet18_dynamic-074db766.pth", + "resnet50_dynamic": + "https://csip.fzu.edu.cn/files/models/resnet50_dynamic-2c3b0201.pth", + "resnet101_dynamic": + "https://csip.fzu.edu.cn/files/models/resnet101_dynamic-c5f15780.pth", +} + + +def conv3x3( + in_planes: int, + out_planes: int, + stride: int = 1, + groups: int = 1, + dilation: int = 1 +) -> nn.Conv2d: + """3x3 convolution with padding""" + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=dilation, + groups=groups, + bias=False, + dilation=dilation + ) + + +def conv3x3_dynamic( + in_planes: int, + out_planes: int, + stride: int = 1, + attention_in_channels: int = None +) -> Conv2dDynamic: + """3x3 convolution with padding""" + return Conv2dDynamic( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=1, + bias=False, + attention_in_channels=attention_in_channels + ) + + +def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d: + """1x1 convolution""" + return nn.Conv2d( + in_planes, out_planes, kernel_size=1, stride=stride, bias=False + ) + + +def load_state_dict( + model: nn.Module, + state_dict: "OrderedDict[str, Tensor]", + allowed_missing_keys: List = None +): + r"""Copies parameters and buffers from :attr:`state_dict` into + this module and its descendants. If :attr:`strict` is ``True``, then + the keys of :attr:`state_dict` must exactly match the keys returned + by this module's :meth:`~torch.nn.Module.state_dict` function. + + Args: + model (torch.nn.Module): a torch.nn.Module object where state_dict load for. + state_dict (dict): a dict containing parameters and + persistent buffers. + allowed_missing_keys (List, optional): not raise `RuntimeError` if missing_keys + equal to allowed_missing_keys. + + Returns: + ``NamedTuple`` with ``missing_keys`` and ``unexpected_keys`` fields: + * **missing_keys** is a list of str containing the missing keys + * **unexpected_keys** is a list of str containing the unexpected keys + + Note: + If a parameter or buffer is registered as ``None`` and its corresponding key + exists in :attr:`state_dict`, :meth:`load_state_dict` will raise a + ``RuntimeError``. + """ + missing_keys, unexpected_keys = model.load_state_dict( + state_dict, strict=allowed_missing_keys is None + ) + + msgs: List[str] = [] + raise_error = False + if len(unexpected_keys) > 0: + raise_error = True + msgs.insert( + 0, "Unexpected key(s) in state_dict: {}. ".format( + ", ".join("'{}'".format(k) for k in unexpected_keys) + ) + ) + if len(missing_keys) > 0: + if allowed_missing_keys is None or sorted(missing_keys) != sorted( + allowed_missing_keys + ): + raise_error = True + msgs.insert( + 0, "Missing key(s) in state_dict: {}. ".format( + ", ".join("'{}'".format(k) for k in missing_keys) + ) + ) + if raise_error: + raise RuntimeError( + "Error(s) in loading state_dict for {}:\n\t{}".format( + model.__class__.__name__, "\n\t".join(msgs) + ) + ) + if len(msgs) > 0: + print( + "\nInfo(s) in loading state_dict for {}:\n\t{}".format( + model.__class__.__name__, "\n\t".join(msgs) + ) + ) + + +class BasicBlock(nn.Module): + expansion: int = 1 + + def __init__( + self, + inplanes: int, + planes: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + groups: int = 1, + base_width: int = 64, + dilation: int = 1, + norm_layer: Optional[Callable[..., nn.Module]] = None + ) -> None: + super(BasicBlock, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + if groups != 1 or base_width != 64: + raise ValueError( + "BasicBlock only supports groups=1 and base_width=64" + ) + if dilation > 1: + raise NotImplementedError( + "Dilation > 1 not supported in BasicBlock" + ) + # Both self.conv1 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = norm_layer(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x: Tensor) -> Tensor: + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) + # while original implementation places the stride at the first 1x1 convolution(self.conv1) + # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. + # This variant is also known as ResNet V1.5 and improves accuracy according to + # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. + + expansion: int = 4 + + def __init__( + self, + inplanes: int, + planes: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + groups: int = 1, + base_width: int = 64, + dilation: int = 1, + norm_layer: Optional[Callable[..., nn.Module]] = None + ) -> None: + super(Bottleneck, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + width = int(planes * (base_width/64.)) * groups + # Both self.conv2 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv1x1(inplanes, width) + self.bn1 = norm_layer(width) + self.conv2 = conv3x3(width, width, stride, groups, dilation) + self.bn2 = norm_layer(width) + self.conv3 = conv1x1(width, planes * self.expansion) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x: Tensor) -> Tensor: + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class BasicBlockDynamic(nn.Module): + expansion: int = 1 + + def __init__( + self, + inplanes: int, + planes: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + groups: int = 1, + base_width: int = 64, + dilation: int = 1, + norm_layer: Optional[Callable[..., nn.Module]] = None + ) -> None: + super(BasicBlockDynamic, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + if groups != 1 or base_width != 64: + raise ValueError( + "BasicBlock only supports groups=1 and base_width=64" + ) + if dilation > 1: + raise NotImplementedError( + "Dilation > 1 not supported in BasicBlock" + ) + # Both self.conv1 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv3x3_dynamic( + inplanes, planes, stride, attention_in_channels=inplanes + ) + self.bn1 = norm_layer(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3_dynamic( + planes, planes, attention_in_channels=inplanes + ) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x: Tensor) -> Tensor: + identity = x + + out = self.conv1(x, attention_x=x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out, attention_x=x) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class BottleneckDynamic(nn.Module): + # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) + # while original implementation places the stride at the first 1x1 convolution(self.conv1) + # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. + # This variant is also known as ResNet V1.5 and improves accuracy according to + # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. + + expansion: int = 4 + + def __init__( + self, + inplanes: int, + planes: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + groups: int = 1, + base_width: int = 64, + dilation: int = 1, + norm_layer: Optional[Callable[..., nn.Module]] = None + ) -> None: + super(BottleneckDynamic, self).__init__() + if groups != 1: + raise ValueError("BottleneckDynamic only supports groups=1") + if dilation > 1: + raise NotImplementedError( + "Dilation > 1 not supported in BottleneckDynamic" + ) + if norm_layer is None: + norm_layer = nn.BatchNorm2d + width = int(planes * (base_width/64.)) * groups + # Both self.conv2 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv1x1(inplanes, width) + self.bn1 = norm_layer(width) + self.conv2 = conv3x3_dynamic( + width, width, stride, attention_in_channels=inplanes + ) + self.bn2 = norm_layer(width) + self.conv3 = conv1x1(width, planes * self.expansion) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x: Tensor) -> Tensor: + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out, attention_x=x) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class ResNet(Backbone): + + def __init__( + self, + block: Type[Union[BasicBlock, Bottleneck, BasicBlockDynamic, + BottleneckDynamic]], + layers: List[int], + has_fc: bool = True, + num_classes: int = 1000, + zero_init_residual: bool = False, + groups: int = 1, + width_per_group: int = 64, + replace_stride_with_dilation: Optional[List[bool]] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None, + ms_class=None, + ms_layers=None, + ms_p=0.5, + ms_a=0.1 + ) -> None: + super(ResNet, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + self._norm_layer = norm_layer + + self.inplanes = 64 + self.dilation = 1 + if replace_stride_with_dilation is None: + # each element in the tuple indicates if we should replace + # the 2x2 stride with a dilated convolution instead + replace_stride_with_dilation = [False, False, False] + if len(replace_stride_with_dilation) != 3: + raise ValueError( + "replace_stride_with_dilation should be None " + "or a 3-element tuple, got {}". + format(replace_stride_with_dilation) + ) + self.groups = groups + self.base_width = width_per_group + self.conv1 = nn.Conv2d( + 3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False + ) + self.bn1 = norm_layer(self.inplanes) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer( + block, + 128, + layers[1], + stride=2, + dilate=replace_stride_with_dilation[0] + ) + self.layer3 = self._make_layer( + block, + 256, + layers[2], + stride=2, + dilate=replace_stride_with_dilation[1] + ) + self.layer4 = self._make_layer( + block, + 512, + layers[3], + stride=2, + dilate=replace_stride_with_dilation[2] + ) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + + self.has_fc = has_fc + self._out_features = 512 * block.expansion + if has_fc: + self.fc = nn.Linear(self.out_features, num_classes) + self._out_features = num_classes + + if ms_class is not None and ms_layers is not None: + self.ms_class = ms_class(p=ms_p, alpha=ms_a) + for layer in ms_layers: + assert layer in ["layer1", "layer2", "layer3"] + self.ms_layers = ms_layers + else: + self.ms_class = None + self.ms_layers = [] + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_( + m.weight, mode="fan_out", nonlinearity="relu" + ) + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Zero-initialize the last BN in each residual branch, + # so that the residual branch starts with zeros, and each residual block behaves like an identity. + # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) + elif isinstance(m, BasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer( + self, + block: Type[Union[BasicBlock, Bottleneck]], + planes: int, + blocks: int, + stride: int = 1, + dilate: bool = False + ) -> nn.Sequential: + norm_layer = self._norm_layer + downsample = None + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + norm_layer(planes * block.expansion), + ) + + layers = [] + layers.append( + block( + self.inplanes, planes, stride, downsample, self.groups, + self.base_width, previous_dilation, norm_layer + ) + ) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append( + block( + self.inplanes, + planes, + groups=self.groups, + base_width=self.base_width, + dilation=self.dilation, + norm_layer=norm_layer + ) + ) + + return nn.Sequential(*layers) + + def _forward_impl(self, x: Tensor) -> Tensor: + # See note [TorchScript super()] + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + if "layer1" in self.ms_layers: + x = self.ms_class(x) + x = self.layer2(x) + if "layer2" in self.ms_layers: + x = self.ms_class(x) + x = self.layer3(x) + if "layer3" in self.ms_layers: + x = self.ms_class(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = torch.flatten(x, 1) + if self.has_fc: + x = self.fc(x) + + return x + + def forward(self, x: Tensor) -> Tensor: + return self._forward_impl(x) + + +def _resnet( + arch: str, block: Type[Union[BasicBlock, Bottleneck, BasicBlockDynamic, + BottleneckDynamic]], layers: List[int], + pretrained: bool, progress: bool, **kwargs: Any +) -> ResNet: + model = ResNet(block, layers, **kwargs) + if pretrained: + state_dict = load_state_dict_from_url( + model_urls[arch], progress=progress + ) + # remove useless keys from sate_dict 1. no fc; 2. out_features != 1000. + removed_keys = model.has_fc is False or ( + model.has_fc is True and model.out_features != 1000 + ) + removed_keys = ["fc.weight", "fc.bias"] if removed_keys else [] + for key in removed_keys: + state_dict.pop(key) + # if has fc, then allow missing key, else strict load state_dict. + allowed_missing_keys = removed_keys if model.has_fc else None + load_state_dict(model, state_dict, allowed_missing_keys) + return model + + +@BACKBONE_REGISTRY.register() +def resnet18_dynamic(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet18_dynamic", + BasicBlockDynamic, [2, 2, 2, 2], + pretrained=pretrained, + progress=True, + has_fc=False + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_dynamic(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet50_dynamic", + BottleneckDynamic, [3, 4, 6, 3], + pretrained=pretrained, + progress=True, + has_fc=False + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_dynamic(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet101_dynamic", + BottleneckDynamic, [3, 4, 23, 3], + pretrained=pretrained, + progress=True, + has_fc=False + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet18_dynamic_ms_l123(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet18_dynamic", + BasicBlockDynamic, [2, 2, 2, 2], + pretrained=pretrained, + progress=True, + has_fc=False, + ms_class=MixStyle, + ms_layers=["layer1", "layer2", "layer3"] + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet18_dynamic_ms_l12(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet18_dynamic", + BasicBlockDynamic, [2, 2, 2, 2], + pretrained=pretrained, + progress=True, + has_fc=False, + ms_class=MixStyle, + ms_layers=["layer1", "layer2"] + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet18_dynamic_ms_l1(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet18_dynamic", + BasicBlockDynamic, [2, 2, 2, 2], + pretrained=pretrained, + progress=True, + has_fc=False, + ms_class=MixStyle, + ms_layers=["layer1"] + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_dynamic_ms_l123(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet50_dynamic", + BottleneckDynamic, [3, 4, 6, 3], + pretrained=pretrained, + progress=True, + has_fc=False, + ms_class=MixStyle, + ms_layers=["layer1", "layer2", "layer3"] + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_dynamic_ms_l12(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet50_dynamic", + BottleneckDynamic, [3, 4, 6, 3], + pretrained=pretrained, + progress=True, + has_fc=False, + ms_class=MixStyle, + ms_layers=["layer1", "layer2"] + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_dynamic_ms_l1(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet50_dynamic", + BottleneckDynamic, [3, 4, 6, 3], + pretrained=pretrained, + progress=True, + has_fc=False, + ms_class=MixStyle, + ms_layers=["layer1"] + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_dynamic_ms_l123(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet101_dynamic", + BottleneckDynamic, [3, 4, 23, 3], + pretrained=pretrained, + progress=True, + has_fc=False, + ms_class=MixStyle, + ms_layers=["layer1", "layer2", "layer3"] + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_dynamic_ms_l12(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet101_dynamic", + BottleneckDynamic, [3, 4, 23, 3], + pretrained=pretrained, + progress=True, + has_fc=False, + ms_class=MixStyle, + ms_layers=["layer1", "layer2"] + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_dynamic_ms_l1(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet101_dynamic", + BottleneckDynamic, [3, 4, 23, 3], + pretrained=pretrained, + progress=True, + has_fc=False, + ms_class=MixStyle, + ms_layers=["layer1"] + ) + return model diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/vgg.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/vgg.py new file mode 100644 index 00000000..3f91491a --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/vgg.py @@ -0,0 +1,147 @@ +import torch +import torch.nn as nn + +from .build import BACKBONE_REGISTRY +from .backbone import Backbone + +try: + from torch.hub import load_state_dict_from_url +except ImportError: + from torch.utils.model_zoo import load_url as load_state_dict_from_url + +model_urls = { + "vgg11": "https://download.pytorch.org/models/vgg11-bbd30ac9.pth", + "vgg13": "https://download.pytorch.org/models/vgg13-c768596a.pth", + "vgg16": "https://download.pytorch.org/models/vgg16-397923af.pth", + "vgg19": "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth", + "vgg11_bn": "https://download.pytorch.org/models/vgg11_bn-6002323d.pth", + "vgg13_bn": "https://download.pytorch.org/models/vgg13_bn-abd245e5.pth", + "vgg16_bn": "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth", + "vgg19_bn": "https://download.pytorch.org/models/vgg19_bn-c79401a0.pth", +} + + +class VGG(Backbone): + + def __init__(self, features, init_weights=True): + super().__init__() + self.features = features + self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) + # Note that self.classifier outputs features rather than logits + self.classifier = nn.Sequential( + nn.Linear(512 * 7 * 7, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(True), + nn.Dropout(), + ) + + self._out_features = 4096 + + if init_weights: + self._initialize_weights() + + def forward(self, x): + x = self.features(x) + x = self.avgpool(x) + x = torch.flatten(x, 1) + return self.classifier(x) + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_( + m.weight, mode="fan_out", nonlinearity="relu" + ) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.constant_(m.bias, 0) + + +def make_layers(cfg, batch_norm=False): + layers = [] + in_channels = 3 + for v in cfg: + if v == "M": + layers += [nn.MaxPool2d(kernel_size=2, stride=2)] + else: + conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) + if batch_norm: + layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] + else: + layers += [conv2d, nn.ReLU(inplace=True)] + in_channels = v + return nn.Sequential(*layers) + + +cfgs = { + "A": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"], + "B": + [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"], + "D": [ + 64, + 64, + "M", + 128, + 128, + "M", + 256, + 256, + 256, + "M", + 512, + 512, + 512, + "M", + 512, + 512, + 512, + "M", + ], + "E": [ + 64, + 64, + "M", + 128, + 128, + "M", + 256, + 256, + 256, + 256, + "M", + 512, + 512, + 512, + 512, + "M", + 512, + 512, + 512, + 512, + "M", + ], +} + + +def _vgg(arch, cfg, batch_norm, pretrained): + init_weights = False if pretrained else True + model = VGG( + make_layers(cfgs[cfg], batch_norm=batch_norm), + init_weights=init_weights + ) + if pretrained: + state_dict = load_state_dict_from_url(model_urls[arch], progress=True) + model.load_state_dict(state_dict, strict=False) + return model + + +@BACKBONE_REGISTRY.register() +def vgg16(pretrained=True, **kwargs): + return _vgg("vgg16", "D", False, pretrained) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/wide_resnet.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/wide_resnet.py new file mode 100644 index 00000000..88ea949d --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/backbone/wide_resnet.py @@ -0,0 +1,150 @@ +""" +Modified from https://github.com/xternalz/WideResNet-pytorch +""" +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .build import BACKBONE_REGISTRY +from .backbone import Backbone + + +class BasicBlock(nn.Module): + + def __init__(self, in_planes, out_planes, stride, dropRate=0.0): + super().__init__() + self.bn1 = nn.BatchNorm2d(in_planes) + self.relu1 = nn.LeakyReLU(0.01, inplace=True) + self.conv1 = nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=1, + bias=False + ) + self.bn2 = nn.BatchNorm2d(out_planes) + self.relu2 = nn.LeakyReLU(0.01, inplace=True) + self.conv2 = nn.Conv2d( + out_planes, + out_planes, + kernel_size=3, + stride=1, + padding=1, + bias=False + ) + self.droprate = dropRate + self.equalInOut = in_planes == out_planes + self.convShortcut = ( + (not self.equalInOut) and nn.Conv2d( + in_planes, + out_planes, + kernel_size=1, + stride=stride, + padding=0, + bias=False, + ) or None + ) + + def forward(self, x): + if not self.equalInOut: + x = self.relu1(self.bn1(x)) + else: + out = self.relu1(self.bn1(x)) + out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x))) + if self.droprate > 0: + out = F.dropout(out, p=self.droprate, training=self.training) + out = self.conv2(out) + return torch.add(x if self.equalInOut else self.convShortcut(x), out) + + +class NetworkBlock(nn.Module): + + def __init__( + self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0 + ): + super().__init__() + self.layer = self._make_layer( + block, in_planes, out_planes, nb_layers, stride, dropRate + ) + + def _make_layer( + self, block, in_planes, out_planes, nb_layers, stride, dropRate + ): + layers = [] + for i in range(int(nb_layers)): + layers.append( + block( + i == 0 and in_planes or out_planes, + out_planes, + i == 0 and stride or 1, + dropRate, + ) + ) + return nn.Sequential(*layers) + + def forward(self, x): + return self.layer(x) + + +class WideResNet(Backbone): + + def __init__(self, depth, widen_factor, dropRate=0.0): + super().__init__() + nChannels = [ + 16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor + ] + assert (depth-4) % 6 == 0 + n = (depth-4) / 6 + block = BasicBlock + # 1st conv before any network block + self.conv1 = nn.Conv2d( + 3, nChannels[0], kernel_size=3, stride=1, padding=1, bias=False + ) + # 1st block + self.block1 = NetworkBlock( + n, nChannels[0], nChannels[1], block, 1, dropRate + ) + # 2nd block + self.block2 = NetworkBlock( + n, nChannels[1], nChannels[2], block, 2, dropRate + ) + # 3rd block + self.block3 = NetworkBlock( + n, nChannels[2], nChannels[3], block, 2, dropRate + ) + # global average pooling and classifier + self.bn1 = nn.BatchNorm2d(nChannels[3]) + self.relu = nn.LeakyReLU(0.01, inplace=True) + + self._out_features = nChannels[3] + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_( + m.weight, mode="fan_out", nonlinearity="relu" + ) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + m.bias.data.zero_() + + def forward(self, x): + out = self.conv1(x) + out = self.block1(out) + out = self.block2(out) + out = self.block3(out) + out = self.relu(self.bn1(out)) + out = F.adaptive_avg_pool2d(out, 1) + return out.view(out.size(0), -1) + + +@BACKBONE_REGISTRY.register() +def wide_resnet_28_2(**kwargs): + return WideResNet(28, 2) + + +@BACKBONE_REGISTRY.register() +def wide_resnet_16_4(**kwargs): + return WideResNet(16, 4) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/head/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/head/__init__.py new file mode 100644 index 00000000..e76fb8cc --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/head/__init__.py @@ -0,0 +1,3 @@ +from .build import build_head, HEAD_REGISTRY # isort:skip + +from .mlp import mlp diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/head/build.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/head/build.py new file mode 100644 index 00000000..730437b6 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/head/build.py @@ -0,0 +1,11 @@ +from dassl.utils import Registry, check_availability + +HEAD_REGISTRY = Registry("HEAD") + + +def build_head(name, verbose=True, **kwargs): + avai_heads = HEAD_REGISTRY.registered_names() + check_availability(name, avai_heads) + if verbose: + print("Head: {}".format(name)) + return HEAD_REGISTRY.get(name)(**kwargs) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/head/mlp.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/head/mlp.py new file mode 100644 index 00000000..89aae50e --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/head/mlp.py @@ -0,0 +1,50 @@ +import functools +import torch.nn as nn + +from .build import HEAD_REGISTRY + + +class MLP(nn.Module): + + def __init__( + self, + in_features=2048, + hidden_layers=[], + activation="relu", + bn=True, + dropout=0.0, + ): + super().__init__() + if isinstance(hidden_layers, int): + hidden_layers = [hidden_layers] + + assert len(hidden_layers) > 0 + self.out_features = hidden_layers[-1] + + mlp = [] + + if activation == "relu": + act_fn = functools.partial(nn.ReLU, inplace=True) + elif activation == "leaky_relu": + act_fn = functools.partial(nn.LeakyReLU, inplace=True) + else: + raise NotImplementedError + + for hidden_dim in hidden_layers: + mlp += [nn.Linear(in_features, hidden_dim)] + if bn: + mlp += [nn.BatchNorm1d(hidden_dim)] + mlp += [act_fn()] + if dropout > 0: + mlp += [nn.Dropout(dropout)] + in_features = hidden_dim + + self.mlp = nn.Sequential(*mlp) + + def forward(self, x): + return self.mlp(x) + + +@HEAD_REGISTRY.register() +def mlp(**kwargs): + return MLP(**kwargs) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/network/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/network/__init__.py new file mode 100644 index 00000000..a6fcc638 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/network/__init__.py @@ -0,0 +1,5 @@ +from .build import build_network, NETWORK_REGISTRY # isort:skip + +from .ddaig_fcn import ( + fcn_3x32_gctx, fcn_3x64_gctx, fcn_3x32_gctx_stn, fcn_3x64_gctx_stn +) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/network/build.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/network/build.py new file mode 100644 index 00000000..e615314f --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/network/build.py @@ -0,0 +1,11 @@ +from dassl.utils import Registry, check_availability + +NETWORK_REGISTRY = Registry("NETWORK") + + +def build_network(name, verbose=True, **kwargs): + avai_models = NETWORK_REGISTRY.registered_names() + check_availability(name, avai_models) + if verbose: + print("Network: {}".format(name)) + return NETWORK_REGISTRY.get(name)(**kwargs) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/network/ddaig_fcn.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/network/ddaig_fcn.py new file mode 100644 index 00000000..17e3bdd2 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/network/ddaig_fcn.py @@ -0,0 +1,329 @@ +""" +Credit to: https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix +""" +import functools +import torch +import torch.nn as nn +from torch.nn import functional as F + +from .build import NETWORK_REGISTRY + + +def init_network_weights(model, init_type="normal", gain=0.02): + + def _init_func(m): + classname = m.__class__.__name__ + if hasattr(m, "weight") and ( + classname.find("Conv") != -1 or classname.find("Linear") != -1 + ): + if init_type == "normal": + nn.init.normal_(m.weight.data, 0.0, gain) + elif init_type == "xavier": + nn.init.xavier_normal_(m.weight.data, gain=gain) + elif init_type == "kaiming": + nn.init.kaiming_normal_(m.weight.data, a=0, mode="fan_in") + elif init_type == "orthogonal": + nn.init.orthogonal_(m.weight.data, gain=gain) + else: + raise NotImplementedError( + "initialization method {} is not implemented". + format(init_type) + ) + if hasattr(m, "bias") and m.bias is not None: + nn.init.constant_(m.bias.data, 0.0) + elif classname.find("BatchNorm2d") != -1: + nn.init.constant_(m.weight.data, 1.0) + nn.init.constant_(m.bias.data, 0.0) + elif classname.find("InstanceNorm2d") != -1: + if m.weight is not None and m.bias is not None: + nn.init.constant_(m.weight.data, 1.0) + nn.init.constant_(m.bias.data, 0.0) + + model.apply(_init_func) + + +def get_norm_layer(norm_type="instance"): + if norm_type == "batch": + norm_layer = functools.partial(nn.BatchNorm2d, affine=True) + elif norm_type == "instance": + norm_layer = functools.partial( + nn.InstanceNorm2d, affine=False, track_running_stats=False + ) + elif norm_type == "none": + norm_layer = None + else: + raise NotImplementedError( + "normalization layer [%s] is not found" % norm_type + ) + return norm_layer + + +class ResnetBlock(nn.Module): + + def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias): + super().__init__() + self.conv_block = self.build_conv_block( + dim, padding_type, norm_layer, use_dropout, use_bias + ) + + def build_conv_block( + self, dim, padding_type, norm_layer, use_dropout, use_bias + ): + conv_block = [] + p = 0 + if padding_type == "reflect": + conv_block += [nn.ReflectionPad2d(1)] + elif padding_type == "replicate": + conv_block += [nn.ReplicationPad2d(1)] + elif padding_type == "zero": + p = 1 + else: + raise NotImplementedError( + "padding [%s] is not implemented" % padding_type + ) + + conv_block += [ + nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), + norm_layer(dim), + nn.ReLU(True), + ] + if use_dropout: + conv_block += [nn.Dropout(0.5)] + + p = 0 + if padding_type == "reflect": + conv_block += [nn.ReflectionPad2d(1)] + elif padding_type == "replicate": + conv_block += [nn.ReplicationPad2d(1)] + elif padding_type == "zero": + p = 1 + else: + raise NotImplementedError( + "padding [%s] is not implemented" % padding_type + ) + conv_block += [ + nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), + norm_layer(dim), + ] + + return nn.Sequential(*conv_block) + + def forward(self, x): + return x + self.conv_block(x) + + +class LocNet(nn.Module): + """Localization network.""" + + def __init__( + self, + input_nc, + nc=32, + n_blocks=3, + use_dropout=False, + padding_type="zero", + image_size=32, + ): + super().__init__() + + backbone = [] + backbone += [ + nn.Conv2d( + input_nc, nc, kernel_size=3, stride=2, padding=1, bias=False + ) + ] + backbone += [nn.BatchNorm2d(nc)] + backbone += [nn.ReLU(True)] + for _ in range(n_blocks): + backbone += [ + ResnetBlock( + nc, + padding_type=padding_type, + norm_layer=nn.BatchNorm2d, + use_dropout=use_dropout, + use_bias=False, + ) + ] + backbone += [nn.MaxPool2d(2, stride=2)] + self.backbone = nn.Sequential(*backbone) + reduced_imsize = int(image_size * 0.5**(n_blocks + 1)) + self.fc_loc = nn.Linear(nc * reduced_imsize**2, 2 * 2) + + def forward(self, x): + x = self.backbone(x) + x = x.view(x.size(0), -1) + x = self.fc_loc(x) + x = torch.tanh(x) + x = x.view(-1, 2, 2) + theta = x.data.new_zeros(x.size(0), 2, 3) + theta[:, :, :2] = x + return theta + + +class FCN(nn.Module): + """Fully convolutional network.""" + + def __init__( + self, + input_nc, + output_nc, + nc=32, + n_blocks=3, + norm_layer=nn.BatchNorm2d, + use_dropout=False, + padding_type="reflect", + gctx=True, + stn=False, + image_size=32, + ): + super().__init__() + + backbone = [] + + p = 0 + if padding_type == "reflect": + backbone += [nn.ReflectionPad2d(1)] + elif padding_type == "replicate": + backbone += [nn.ReplicationPad2d(1)] + elif padding_type == "zero": + p = 1 + else: + raise NotImplementedError + backbone += [ + nn.Conv2d( + input_nc, nc, kernel_size=3, stride=1, padding=p, bias=False + ) + ] + backbone += [norm_layer(nc)] + backbone += [nn.ReLU(True)] + + for _ in range(n_blocks): + backbone += [ + ResnetBlock( + nc, + padding_type=padding_type, + norm_layer=norm_layer, + use_dropout=use_dropout, + use_bias=False, + ) + ] + self.backbone = nn.Sequential(*backbone) + + # global context fusion layer + self.gctx_fusion = None + if gctx: + self.gctx_fusion = nn.Sequential( + nn.Conv2d( + 2 * nc, nc, kernel_size=1, stride=1, padding=0, bias=False + ), + norm_layer(nc), + nn.ReLU(True), + ) + + self.regress = nn.Sequential( + nn.Conv2d( + nc, output_nc, kernel_size=1, stride=1, padding=0, bias=True + ), + nn.Tanh(), + ) + + self.locnet = None + if stn: + self.locnet = LocNet( + input_nc, nc=nc, n_blocks=n_blocks, image_size=image_size + ) + + def init_loc_layer(self): + """Initialize the weights/bias with identity transformation.""" + if self.locnet is not None: + self.locnet.fc_loc.weight.data.zero_() + self.locnet.fc_loc.bias.data.copy_( + torch.tensor([1, 0, 0, 1], dtype=torch.float) + ) + + def stn(self, x): + """Spatial transformer network.""" + theta = self.locnet(x) + grid = F.affine_grid(theta, x.size()) + return F.grid_sample(x, grid), theta + + def forward(self, x, lmda=1.0, return_p=False, return_stn_output=False): + """ + Args: + x (torch.Tensor): input mini-batch. + lmda (float): multiplier for perturbation. + return_p (bool): return perturbation. + return_stn_output (bool): return the output of stn. + """ + theta = None + if self.locnet is not None: + x, theta = self.stn(x) + input = x + + x = self.backbone(x) + if self.gctx_fusion is not None: + c = F.adaptive_avg_pool2d(x, (1, 1)) + c = c.expand_as(x) + x = torch.cat([x, c], 1) + x = self.gctx_fusion(x) + + p = self.regress(x) + x_p = input + lmda*p + + if return_stn_output: + return x_p, p, input + + if return_p: + return x_p, p + + return x_p + + +@NETWORK_REGISTRY.register() +def fcn_3x32_gctx(**kwargs): + norm_layer = get_norm_layer(norm_type="instance") + net = FCN(3, 3, nc=32, n_blocks=3, norm_layer=norm_layer) + init_network_weights(net, init_type="normal", gain=0.02) + return net + + +@NETWORK_REGISTRY.register() +def fcn_3x64_gctx(**kwargs): + norm_layer = get_norm_layer(norm_type="instance") + net = FCN(3, 3, nc=64, n_blocks=3, norm_layer=norm_layer) + init_network_weights(net, init_type="normal", gain=0.02) + return net + + +@NETWORK_REGISTRY.register() +def fcn_3x32_gctx_stn(image_size=32, **kwargs): + norm_layer = get_norm_layer(norm_type="instance") + net = FCN( + 3, + 3, + nc=32, + n_blocks=3, + norm_layer=norm_layer, + stn=True, + image_size=image_size + ) + init_network_weights(net, init_type="normal", gain=0.02) + net.init_loc_layer() + return net + + +@NETWORK_REGISTRY.register() +def fcn_3x64_gctx_stn(image_size=224, **kwargs): + norm_layer = get_norm_layer(norm_type="instance") + net = FCN( + 3, + 3, + nc=64, + n_blocks=3, + norm_layer=norm_layer, + stn=True, + image_size=image_size + ) + init_network_weights(net, init_type="normal", gain=0.02) + net.init_loc_layer() + return net diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/__init__.py new file mode 100644 index 00000000..44d06400 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/__init__.py @@ -0,0 +1,18 @@ +from .mmd import MaximumMeanDiscrepancy +from .conv import * +from .dsbn import DSBN1d, DSBN2d +from .mixup import mixup +from .efdmix import ( + EFDMix, random_efdmix, activate_efdmix, run_with_efdmix, deactivate_efdmix, + crossdomain_efdmix, run_without_efdmix +) +from .mixstyle import ( + MixStyle, random_mixstyle, activate_mixstyle, run_with_mixstyle, + deactivate_mixstyle, crossdomain_mixstyle, run_without_mixstyle +) +from .attention import * +from .transnorm import TransNorm1d, TransNorm2d +from .sequential2 import Sequential2 +from .reverse_grad import ReverseGrad +from .cross_entropy import cross_entropy +from .optimal_transport import SinkhornDivergence, MinibatchEnergyDistance diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/attention.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/attention.py new file mode 100644 index 00000000..16ddcdab --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/attention.py @@ -0,0 +1,31 @@ +import torch.nn as nn +from torch.nn import functional as F + +__all__ = ["Attention"] + + +class Attention(nn.Module): + """Attention from `"Dynamic Domain Generalization" `_. + """ + + def __init__( + self, + in_channels: int, + out_features: int, + squeeze=None, + bias: bool = True + ): + super(Attention, self).__init__() + self.squeeze = squeeze if squeeze else in_channels // 16 + assert self.squeeze > 0 + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc1 = nn.Linear(in_channels, self.squeeze, bias=bias) + self.fc2 = nn.Linear(self.squeeze, out_features, bias=bias) + self.sf = nn.Softmax(dim=-1) + + def forward(self, x): + x = self.avg_pool(x).view(x.shape[:-2]) + x = self.fc1(x) + x = F.relu(x, inplace=True) + x = self.fc2(x) + return self.sf(x) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/conv.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/conv.py new file mode 100644 index 00000000..fcee716f --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/conv.py @@ -0,0 +1,95 @@ +import torch.nn as nn + +from .attention import Attention + +__all__ = ["Conv2dDynamic"] + + +class Conv2dDynamic(nn.Module): + """Conv2dDynamic from `"Dynamic Domain Generalization" `_. + """ + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int, + padding: int, + bias: bool = True, + squeeze: int = None, + attention_in_channels: int = None + ) -> None: + super(Conv2dDynamic, self).__init__() + + if kernel_size // 2 != padding: + # Only when this condition is met, we can ensure that different + # kernel_size can obtain feature maps of consistent size. + # Let I, K, S, P, O: O = (I + 2P - K) // S + 1, if P = K // 2, then O = (I - K % 2) // S + 1 + # This means that the output of two different Ks with the same parity can be made the same by adjusting P. + raise ValueError("`padding` must be equal to `kernel_size // 2`.") + if kernel_size % 2 == 0: + raise ValueError( + "Kernel_size must be odd now because the templates we used are odd (kernel_size=1)." + ) + + self.conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + bias=bias + ) + self.kernel_templates = nn.ModuleDict() + self.kernel_templates["conv_nn"] = nn.Conv2d( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=min(in_channels, out_channels), + bias=bias + ) + self.kernel_templates["conv_11"] = nn.Conv2d( + in_channels, + out_channels, + kernel_size=1, + stride=stride, + padding=0, + bias=bias + ) + self.kernel_templates["conv_n1"] = nn.Conv2d( + in_channels, + out_channels, + kernel_size=(kernel_size, 1), + stride=stride, + padding=(padding, 0), + bias=bias + ) + self.kernel_templates["conv_1n"] = nn.Conv2d( + in_channels, + out_channels, + kernel_size=(1, kernel_size), + stride=stride, + padding=(0, padding), + bias=bias + ) + self.attention = Attention( + attention_in_channels if attention_in_channels else in_channels, + 4, + squeeze, + bias=bias + ) + + def forward(self, x, attention_x=None): + attention_x = x if attention_x is None else attention_x + y = self.attention(attention_x) + + out = self.conv(x) + + for i, template in enumerate(self.kernel_templates): + out += self.kernel_templates[template](x) * y[:, + i].view(-1, 1, 1, 1) + + return out diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/cross_entropy.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/cross_entropy.py new file mode 100644 index 00000000..21723b02 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/cross_entropy.py @@ -0,0 +1,30 @@ +import torch +from torch.nn import functional as F + + +def cross_entropy(input, target, label_smooth=0, reduction="mean"): + """Cross entropy loss. + + Args: + input (torch.Tensor): logit matrix with shape of (batch, num_classes). + target (torch.LongTensor): int label matrix. + label_smooth (float, optional): label smoothing hyper-parameter. + Default is 0. + reduction (str, optional): how the losses for a mini-batch + will be aggregated. Default is 'mean'. + """ + num_classes = input.shape[1] + log_prob = F.log_softmax(input, dim=1) + zeros = torch.zeros(log_prob.size()) + target = zeros.scatter_(1, target.unsqueeze(1).data.cpu(), 1) + target = target.type_as(input) + target = (1-label_smooth) * target + label_smooth/num_classes + loss = (-target * log_prob).sum(1) + if reduction == "mean": + return loss.mean() + elif reduction == "sum": + return loss.sum() + elif reduction == "none": + return loss + else: + raise ValueError diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/dsbn.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/dsbn.py new file mode 100644 index 00000000..e3ee3550 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/dsbn.py @@ -0,0 +1,45 @@ +import torch.nn as nn + + +class _DSBN(nn.Module): + """Domain Specific Batch Normalization. + + Args: + num_features (int): number of features. + n_domain (int): number of domains. + bn_type (str): type of bn. Choices are ['1d', '2d']. + """ + + def __init__(self, num_features, n_domain, bn_type): + super().__init__() + if bn_type == "1d": + BN = nn.BatchNorm1d + elif bn_type == "2d": + BN = nn.BatchNorm2d + else: + raise ValueError + + self.bn = nn.ModuleList(BN(num_features) for _ in range(n_domain)) + + self.valid_domain_idxs = list(range(n_domain)) + self.n_domain = n_domain + self.domain_idx = 0 + + def select_bn(self, domain_idx=0): + assert domain_idx in self.valid_domain_idxs + self.domain_idx = domain_idx + + def forward(self, x): + return self.bn[self.domain_idx](x) + + +class DSBN1d(_DSBN): + + def __init__(self, num_features, n_domain): + super().__init__(num_features, n_domain, "1d") + + +class DSBN2d(_DSBN): + + def __init__(self, num_features, n_domain): + super().__init__(num_features, n_domain, "2d") diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/efdmix.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/efdmix.py new file mode 100644 index 00000000..af58815a --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/efdmix.py @@ -0,0 +1,118 @@ +import random +from contextlib import contextmanager +import torch +import torch.nn as nn + + +def deactivate_efdmix(m): + if type(m) == EFDMix: + m.set_activation_status(False) + + +def activate_efdmix(m): + if type(m) == EFDMix: + m.set_activation_status(True) + + +def random_efdmix(m): + if type(m) == EFDMix: + m.update_mix_method("random") + + +def crossdomain_efdmix(m): + if type(m) == EFDMix: + m.update_mix_method("crossdomain") + + +@contextmanager +def run_without_efdmix(model): + # Assume MixStyle was initially activated + try: + model.apply(deactivate_efdmix) + yield + finally: + model.apply(activate_efdmix) + + +@contextmanager +def run_with_efdmix(model, mix=None): + # Assume MixStyle was initially deactivated + if mix == "random": + model.apply(random_efdmix) + + elif mix == "crossdomain": + model.apply(crossdomain_efdmix) + + try: + model.apply(activate_efdmix) + yield + finally: + model.apply(deactivate_efdmix) + + +class EFDMix(nn.Module): + """EFDMix. + + Reference: + Zhang et al. Exact Feature Distribution Matching for Arbitrary Style Transfer and Domain Generalization. CVPR 2022. + """ + + def __init__(self, p=0.5, alpha=0.1, eps=1e-6, mix="random"): + """ + Args: + p (float): probability of using MixStyle. + alpha (float): parameter of the Beta distribution. + eps (float): scaling parameter to avoid numerical issues. + mix (str): how to mix. + """ + super().__init__() + self.p = p + self.beta = torch.distributions.Beta(alpha, alpha) + self.eps = eps + self.alpha = alpha + self.mix = mix + self._activated = True + + def __repr__(self): + return ( + f"MixStyle(p={self.p}, alpha={self.alpha}, eps={self.eps}, mix={self.mix})" + ) + + def set_activation_status(self, status=True): + self._activated = status + + def update_mix_method(self, mix="random"): + self.mix = mix + + def forward(self, x): + if not self.training or not self._activated: + return x + + if random.random() > self.p: + return x + + B, C, W, H = x.size(0), x.size(1), x.size(2), x.size(3) + x_view = x.view(B, C, -1) + value_x, index_x = torch.sort(x_view) # sort inputs + lmda = self.beta.sample((B, 1, 1)) + lmda = lmda.to(x.device) + + if self.mix == "random": + # random shuffle + perm = torch.randperm(B) + + elif self.mix == "crossdomain": + # split into two halves and swap the order + perm = torch.arange(B - 1, -1, -1) # inverse index + perm_b, perm_a = perm.chunk(2) + perm_b = perm_b[torch.randperm(perm_b.shape[0])] + perm_a = perm_a[torch.randperm(perm_a.shape[0])] + perm = torch.cat([perm_b, perm_a], 0) + + else: + raise NotImplementedError + + inverse_index = index_x.argsort(-1) + x_view_copy = value_x[perm].gather(-1, inverse_index) + new_x = x_view + (x_view_copy - x_view.detach()) * (1-lmda) + return new_x.view(B, C, W, H) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/mixstyle.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/mixstyle.py new file mode 100644 index 00000000..34f47a89 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/mixstyle.py @@ -0,0 +1,124 @@ +import random +from contextlib import contextmanager +import torch +import torch.nn as nn + + +def deactivate_mixstyle(m): + if type(m) == MixStyle: + m.set_activation_status(False) + + +def activate_mixstyle(m): + if type(m) == MixStyle: + m.set_activation_status(True) + + +def random_mixstyle(m): + if type(m) == MixStyle: + m.update_mix_method("random") + + +def crossdomain_mixstyle(m): + if type(m) == MixStyle: + m.update_mix_method("crossdomain") + + +@contextmanager +def run_without_mixstyle(model): + # Assume MixStyle was initially activated + try: + model.apply(deactivate_mixstyle) + yield + finally: + model.apply(activate_mixstyle) + + +@contextmanager +def run_with_mixstyle(model, mix=None): + # Assume MixStyle was initially deactivated + if mix == "random": + model.apply(random_mixstyle) + + elif mix == "crossdomain": + model.apply(crossdomain_mixstyle) + + try: + model.apply(activate_mixstyle) + yield + finally: + model.apply(deactivate_mixstyle) + + +class MixStyle(nn.Module): + """MixStyle. + + Reference: + Zhou et al. Domain Generalization with MixStyle. ICLR 2021. + """ + + def __init__(self, p=0.5, alpha=0.1, eps=1e-6, mix="random"): + """ + Args: + p (float): probability of using MixStyle. + alpha (float): parameter of the Beta distribution. + eps (float): scaling parameter to avoid numerical issues. + mix (str): how to mix. + """ + super().__init__() + self.p = p + self.beta = torch.distributions.Beta(alpha, alpha) + self.eps = eps + self.alpha = alpha + self.mix = mix + self._activated = True + + def __repr__(self): + return ( + f"MixStyle(p={self.p}, alpha={self.alpha}, eps={self.eps}, mix={self.mix})" + ) + + def set_activation_status(self, status=True): + self._activated = status + + def update_mix_method(self, mix="random"): + self.mix = mix + + def forward(self, x): + if not self.training or not self._activated: + return x + + if random.random() > self.p: + return x + + B = x.size(0) + + mu = x.mean(dim=[2, 3], keepdim=True) + var = x.var(dim=[2, 3], keepdim=True) + sig = (var + self.eps).sqrt() + mu, sig = mu.detach(), sig.detach() + x_normed = (x-mu) / sig + + lmda = self.beta.sample((B, 1, 1, 1)) + lmda = lmda.to(x.device) + + if self.mix == "random": + # random shuffle + perm = torch.randperm(B) + + elif self.mix == "crossdomain": + # split into two halves and swap the order + perm = torch.arange(B - 1, -1, -1) # inverse index + perm_b, perm_a = perm.chunk(2) + perm_b = perm_b[torch.randperm(perm_b.shape[0])] + perm_a = perm_a[torch.randperm(perm_a.shape[0])] + perm = torch.cat([perm_b, perm_a], 0) + + else: + raise NotImplementedError + + mu2, sig2 = mu[perm], sig[perm] + mu_mix = mu*lmda + mu2 * (1-lmda) + sig_mix = sig*lmda + sig2 * (1-lmda) + + return x_normed*sig_mix + mu_mix diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/mixup.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/mixup.py new file mode 100644 index 00000000..5844074a --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/mixup.py @@ -0,0 +1,23 @@ +import torch + + +def mixup(x1, x2, y1, y2, beta, preserve_order=False): + """Mixup. + + Args: + x1 (torch.Tensor): data with shape of (b, c, h, w). + x2 (torch.Tensor): data with shape of (b, c, h, w). + y1 (torch.Tensor): label with shape of (b, n). + y2 (torch.Tensor): label with shape of (b, n). + beta (float): hyper-parameter for Beta sampling. + preserve_order (bool): apply lmda=max(lmda, 1-lmda). + Default is False. + """ + lmda = torch.distributions.Beta(beta, beta).sample([x1.shape[0], 1, 1, 1]) + if preserve_order: + lmda = torch.max(lmda, 1 - lmda) + lmda = lmda.to(x1.device) + xmix = x1*lmda + x2 * (1-lmda) + lmda = lmda[:, :, 0, 0] + ymix = y1*lmda + y2 * (1-lmda) + return xmix, ymix diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/mmd.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/mmd.py new file mode 100644 index 00000000..a23fa575 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/mmd.py @@ -0,0 +1,91 @@ +import torch +import torch.nn as nn +from torch.nn import functional as F + + +class MaximumMeanDiscrepancy(nn.Module): + + def __init__(self, kernel_type="rbf", normalize=False): + super().__init__() + self.kernel_type = kernel_type + self.normalize = normalize + + def forward(self, x, y): + # x, y: two batches of data with shape (batch, dim) + # MMD^2(x, y) = k(x, x') - 2k(x, y) + k(y, y') + if self.normalize: + x = F.normalize(x, dim=1) + y = F.normalize(y, dim=1) + if self.kernel_type == "linear": + return self.linear_mmd(x, y) + elif self.kernel_type == "poly": + return self.poly_mmd(x, y) + elif self.kernel_type == "rbf": + return self.rbf_mmd(x, y) + else: + raise NotImplementedError + + def linear_mmd(self, x, y): + # k(x, y) = x^T y + k_xx = self.remove_self_distance(torch.mm(x, x.t())) + k_yy = self.remove_self_distance(torch.mm(y, y.t())) + k_xy = torch.mm(x, y.t()) + return k_xx.mean() + k_yy.mean() - 2 * k_xy.mean() + + def poly_mmd(self, x, y, alpha=1.0, c=2.0, d=2): + # k(x, y) = (alpha * x^T y + c)^d + k_xx = self.remove_self_distance(torch.mm(x, x.t())) + k_xx = (alpha*k_xx + c).pow(d) + k_yy = self.remove_self_distance(torch.mm(y, y.t())) + k_yy = (alpha*k_yy + c).pow(d) + k_xy = torch.mm(x, y.t()) + k_xy = (alpha*k_xy + c).pow(d) + return k_xx.mean() + k_yy.mean() - 2 * k_xy.mean() + + def rbf_mmd(self, x, y): + # k_xx + d_xx = self.euclidean_squared_distance(x, x) + d_xx = self.remove_self_distance(d_xx) + k_xx = self.rbf_kernel_mixture(d_xx) + # k_yy + d_yy = self.euclidean_squared_distance(y, y) + d_yy = self.remove_self_distance(d_yy) + k_yy = self.rbf_kernel_mixture(d_yy) + # k_xy + d_xy = self.euclidean_squared_distance(x, y) + k_xy = self.rbf_kernel_mixture(d_xy) + return k_xx.mean() + k_yy.mean() - 2 * k_xy.mean() + + @staticmethod + def rbf_kernel_mixture(exponent, sigmas=[1, 5, 10]): + K = 0 + for sigma in sigmas: + gamma = 1.0 / (2.0 * sigma**2) + K += torch.exp(-gamma * exponent) + return K + + @staticmethod + def remove_self_distance(distmat): + tmp_list = [] + for i, row in enumerate(distmat): + row1 = torch.cat([row[:i], row[i + 1:]]) + tmp_list.append(row1) + return torch.stack(tmp_list) + + @staticmethod + def euclidean_squared_distance(x, y): + m, n = x.size(0), y.size(0) + distmat = ( + torch.pow(x, 2).sum(dim=1, keepdim=True).expand(m, n) + + torch.pow(y, 2).sum(dim=1, keepdim=True).expand(n, m).t() + ) + # distmat.addmm_(1, -2, x, y.t()) + distmat.addmm_(x, y.t(), beta=1, alpha=-2) + return distmat + + +if __name__ == "__main__": + mmd = MaximumMeanDiscrepancy(kernel_type="rbf") + input1, input2 = torch.rand(3, 100), torch.rand(3, 100) + d = mmd(input1, input2) + print(d.item()) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/optimal_transport.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/optimal_transport.py new file mode 100644 index 00000000..128ea96b --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/optimal_transport.py @@ -0,0 +1,147 @@ +import torch +import torch.nn as nn +from torch.nn import functional as F + + +class OptimalTransport(nn.Module): + + @staticmethod + def distance(batch1, batch2, dist_metric="cosine"): + if dist_metric == "cosine": + batch1 = F.normalize(batch1, p=2, dim=1) + batch2 = F.normalize(batch2, p=2, dim=1) + dist_mat = 1 - torch.mm(batch1, batch2.t()) + elif dist_metric == "euclidean": + m, n = batch1.size(0), batch2.size(0) + dist_mat = ( + torch.pow(batch1, 2).sum(dim=1, keepdim=True).expand(m, n) + + torch.pow(batch2, 2).sum(dim=1, keepdim=True).expand(n, m).t() + ) + dist_mat.addmm_( + 1, -2, batch1, batch2.t() + ) # squared euclidean distance + elif dist_metric == "fast_euclidean": + batch1 = batch1.unsqueeze(-2) + batch2 = batch2.unsqueeze(-3) + dist_mat = torch.sum((torch.abs(batch1 - batch2))**2, -1) + else: + raise ValueError( + "Unknown cost function: {}. Expected to " + "be one of [cosine | euclidean]".format(dist_metric) + ) + return dist_mat + + +class SinkhornDivergence(OptimalTransport): + thre = 1e-3 + + def __init__( + self, + dist_metric="cosine", + eps=0.01, + max_iter=5, + bp_to_sinkhorn=False + ): + super().__init__() + self.dist_metric = dist_metric + self.eps = eps + self.max_iter = max_iter + self.bp_to_sinkhorn = bp_to_sinkhorn + + def forward(self, x, y): + # x, y: two batches of data with shape (batch, dim) + W_xy = self.transport_cost(x, y) + W_xx = self.transport_cost(x, x) + W_yy = self.transport_cost(y, y) + return 2*W_xy - W_xx - W_yy + + def transport_cost(self, x, y, return_pi=False): + C = self.distance(x, y, dist_metric=self.dist_metric) + pi = self.sinkhorn_iterate(C, self.eps, self.max_iter, self.thre) + if not self.bp_to_sinkhorn: + pi = pi.detach() + cost = torch.sum(pi * C) + if return_pi: + return cost, pi + return cost + + @staticmethod + def sinkhorn_iterate(C, eps, max_iter, thre): + nx, ny = C.shape + mu = torch.ones(nx, dtype=C.dtype, device=C.device) * (1.0/nx) + nu = torch.ones(ny, dtype=C.dtype, device=C.device) * (1.0/ny) + u = torch.zeros_like(mu) + v = torch.zeros_like(nu) + + def M(_C, _u, _v): + """Modified cost for logarithmic updates. + Eq: M_{ij} = (-c_{ij} + u_i + v_j) / epsilon + """ + return (-_C + _u.unsqueeze(-1) + _v.unsqueeze(-2)) / eps + + real_iter = 0 # check if algorithm terminates before max_iter + # Sinkhorn iterations + for i in range(max_iter): + u0 = u + u = eps * ( + torch.log(mu + 1e-8) - torch.logsumexp(M(C, u, v), dim=1) + ) + u + v = ( + eps * ( + torch.log(nu + 1e-8) - + torch.logsumexp(M(C, u, v).permute(1, 0), dim=1) + ) + v + ) + err = (u - u0).abs().sum() + real_iter += 1 + if err.item() < thre: + break + # Transport plan pi = diag(a)*K*diag(b) + return torch.exp(M(C, u, v)) + + +class MinibatchEnergyDistance(SinkhornDivergence): + + def __init__( + self, + dist_metric="cosine", + eps=0.01, + max_iter=5, + bp_to_sinkhorn=False + ): + super().__init__( + dist_metric=dist_metric, + eps=eps, + max_iter=max_iter, + bp_to_sinkhorn=bp_to_sinkhorn, + ) + + def forward(self, x, y): + x1, x2 = torch.split(x, x.size(0) // 2, dim=0) + y1, y2 = torch.split(y, y.size(0) // 2, dim=0) + cost = 0 + cost += self.transport_cost(x1, y1) + cost += self.transport_cost(x1, y2) + cost += self.transport_cost(x2, y1) + cost += self.transport_cost(x2, y2) + cost -= 2 * self.transport_cost(x1, x2) + cost -= 2 * self.transport_cost(y1, y2) + return cost + + +if __name__ == "__main__": + # example: https://dfdazac.github.io/sinkhorn.html + import numpy as np + + n_points = 5 + a = np.array([[i, 0] for i in range(n_points)]) + b = np.array([[i, 1] for i in range(n_points)]) + x = torch.tensor(a, dtype=torch.float) + y = torch.tensor(b, dtype=torch.float) + sinkhorn = SinkhornDivergence( + dist_metric="euclidean", eps=0.01, max_iter=5 + ) + dist, pi = sinkhorn.transport_cost(x, y, True) + import pdb + + pdb.set_trace() diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/reverse_grad.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/reverse_grad.py new file mode 100644 index 00000000..34bab9db --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/reverse_grad.py @@ -0,0 +1,34 @@ +import torch.nn as nn +from torch.autograd import Function + + +class _ReverseGrad(Function): + + @staticmethod + def forward(ctx, input, grad_scaling): + ctx.grad_scaling = grad_scaling + return input.view_as(input) + + @staticmethod + def backward(ctx, grad_output): + grad_scaling = ctx.grad_scaling + return -grad_scaling * grad_output, None + + +reverse_grad = _ReverseGrad.apply + + +class ReverseGrad(nn.Module): + """Gradient reversal layer. + + It acts as an identity layer in the forward, + but reverses the sign of the gradient in + the backward. + """ + + def forward(self, x, grad_scaling=1.0): + assert (grad_scaling >= + 0), "grad_scaling must be non-negative, " "but got {}".format( + grad_scaling + ) + return reverse_grad(x, grad_scaling) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/sequential2.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/sequential2.py new file mode 100644 index 00000000..47a83834 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/sequential2.py @@ -0,0 +1,15 @@ +import torch.nn as nn + + +class Sequential2(nn.Sequential): + """An alternative sequential container to nn.Sequential, + which accepts an arbitrary number of input arguments. + """ + + def forward(self, *inputs): + for module in self._modules.values(): + if isinstance(inputs, tuple): + inputs = module(*inputs) + else: + inputs = module(inputs) + return inputs diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/transnorm.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/transnorm.py new file mode 100644 index 00000000..453db773 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/transnorm.py @@ -0,0 +1,138 @@ +import torch +import torch.nn as nn + + +class _TransNorm(nn.Module): + """Transferable normalization. + + Reference: + - Wang et al. Transferable Normalization: Towards Improving + Transferability of Deep Neural Networks. NeurIPS 2019. + + Args: + num_features (int): number of features. + eps (float): epsilon. + momentum (float): value for updating running_mean and running_var. + adaptive_alpha (bool): apply domain adaptive alpha. + """ + + def __init__( + self, num_features, eps=1e-5, momentum=0.1, adaptive_alpha=True + ): + super().__init__() + self.num_features = num_features + self.eps = eps + self.momentum = momentum + self.adaptive_alpha = adaptive_alpha + + self.register_buffer("running_mean_s", torch.zeros(num_features)) + self.register_buffer("running_var_s", torch.ones(num_features)) + self.register_buffer("running_mean_t", torch.zeros(num_features)) + self.register_buffer("running_var_t", torch.ones(num_features)) + + self.weight = nn.Parameter(torch.ones(num_features)) + self.bias = nn.Parameter(torch.zeros(num_features)) + + def resnet_running_stats(self): + self.running_mean_s.zero_() + self.running_var_s.fill_(1) + self.running_mean_t.zero_() + self.running_var_t.fill_(1) + + def reset_parameters(self): + nn.init.ones_(self.weight) + nn.init.zeros_(self.bias) + + def _check_input(self, x): + raise NotImplementedError + + def _compute_alpha(self, mean_s, var_s, mean_t, var_t): + C = self.num_features + ratio_s = mean_s / (var_s + self.eps).sqrt() + ratio_t = mean_t / (var_t + self.eps).sqrt() + dist = (ratio_s - ratio_t).abs() + dist_inv = 1 / (1+dist) + return C * dist_inv / dist_inv.sum() + + def forward(self, input): + self._check_input(input) + C = self.num_features + if input.dim() == 2: + new_shape = (1, C) + elif input.dim() == 4: + new_shape = (1, C, 1, 1) + else: + raise ValueError + + weight = self.weight.view(*new_shape) + bias = self.bias.view(*new_shape) + + if not self.training: + mean_t = self.running_mean_t.view(*new_shape) + var_t = self.running_var_t.view(*new_shape) + output = (input-mean_t) / (var_t + self.eps).sqrt() + output = output*weight + bias + + if self.adaptive_alpha: + mean_s = self.running_mean_s.view(*new_shape) + var_s = self.running_var_s.view(*new_shape) + alpha = self._compute_alpha(mean_s, var_s, mean_t, var_t) + alpha = alpha.reshape(*new_shape) + output = (1 + alpha.detach()) * output + + return output + + input_s, input_t = torch.split(input, input.shape[0] // 2, dim=0) + + x_s = input_s.transpose(0, 1).reshape(C, -1) + mean_s = x_s.mean(1) + var_s = x_s.var(1) + self.running_mean_s.mul_(self.momentum) + self.running_mean_s.add_((1 - self.momentum) * mean_s.data) + self.running_var_s.mul_(self.momentum) + self.running_var_s.add_((1 - self.momentum) * var_s.data) + mean_s = mean_s.reshape(*new_shape) + var_s = var_s.reshape(*new_shape) + output_s = (input_s-mean_s) / (var_s + self.eps).sqrt() + output_s = output_s*weight + bias + + x_t = input_t.transpose(0, 1).reshape(C, -1) + mean_t = x_t.mean(1) + var_t = x_t.var(1) + self.running_mean_t.mul_(self.momentum) + self.running_mean_t.add_((1 - self.momentum) * mean_t.data) + self.running_var_t.mul_(self.momentum) + self.running_var_t.add_((1 - self.momentum) * var_t.data) + mean_t = mean_t.reshape(*new_shape) + var_t = var_t.reshape(*new_shape) + output_t = (input_t-mean_t) / (var_t + self.eps).sqrt() + output_t = output_t*weight + bias + + output = torch.cat([output_s, output_t], 0) + + if self.adaptive_alpha: + alpha = self._compute_alpha(mean_s, var_s, mean_t, var_t) + alpha = alpha.reshape(*new_shape) + output = (1 + alpha.detach()) * output + + return output + + +class TransNorm1d(_TransNorm): + + def _check_input(self, x): + if x.dim() != 2: + raise ValueError( + "Expected the input to be 2-D, " + "but got {}-D".format(x.dim()) + ) + + +class TransNorm2d(_TransNorm): + + def _check_input(self, x): + if x.dim() != 4: + raise ValueError( + "Expected the input to be 4-D, " + "but got {}-D".format(x.dim()) + ) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/utils.py b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/utils.py new file mode 100644 index 00000000..6bfcc898 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/modeling/ops/utils.py @@ -0,0 +1,77 @@ +import numpy as np +import torch + + +def sharpen_prob(p, temperature=2): + """Sharpening probability with a temperature. + + Args: + p (torch.Tensor): probability matrix (batch_size, n_classes) + temperature (float): temperature. + """ + p = p.pow(temperature) + return p / p.sum(1, keepdim=True) + + +def reverse_index(data, label): + """Reverse order.""" + inv_idx = torch.arange(data.size(0) - 1, -1, -1).long() + return data[inv_idx], label[inv_idx] + + +def shuffle_index(data, label): + """Shuffle order.""" + rnd_idx = torch.randperm(data.shape[0]) + return data[rnd_idx], label[rnd_idx] + + +def create_onehot(label, num_classes): + """Create one-hot tensor. + + We suggest using nn.functional.one_hot. + + Args: + label (torch.Tensor): 1-D tensor. + num_classes (int): number of classes. + """ + onehot = torch.zeros(label.shape[0], num_classes) + onehot = onehot.scatter(1, label.unsqueeze(1).data.cpu(), 1) + onehot = onehot.to(label.device) + return onehot + + +def sigmoid_rampup(current, rampup_length): + """Exponential rampup. + + Args: + current (int): current step. + rampup_length (int): maximum step. + """ + assert rampup_length > 0 + current = np.clip(current, 0.0, rampup_length) + phase = 1.0 - current/rampup_length + return float(np.exp(-5.0 * phase * phase)) + + +def linear_rampup(current, rampup_length): + """Linear rampup. + + Args: + current (int): current step. + rampup_length (int): maximum step. + """ + assert rampup_length > 0 + ratio = np.clip(current / rampup_length, 0.0, 1.0) + return float(ratio) + + +def ema_model_update(model, ema_model, alpha): + """Exponential moving average of model parameters. + + Args: + model (nn.Module): model being trained. + ema_model (nn.Module): ema of the model. + alpha (float): ema decay rate. + """ + for ema_param, param in zip(ema_model.parameters(), model.parameters()): + ema_param.data.mul_(alpha).add_(param.data, alpha=1 - alpha) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/optim/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/optim/__init__.py new file mode 100644 index 00000000..e7ef4c04 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/optim/__init__.py @@ -0,0 +1,2 @@ +from .optimizer import build_optimizer +from .lr_scheduler import build_lr_scheduler diff --git a/python/ClipDetection/Dassl.pytorch/dassl/optim/lr_scheduler.py b/python/ClipDetection/Dassl.pytorch/dassl/optim/lr_scheduler.py new file mode 100644 index 00000000..48d58853 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/optim/lr_scheduler.py @@ -0,0 +1,152 @@ +""" +Modified from https://github.com/KaiyangZhou/deep-person-reid +""" +import torch +from torch.optim.lr_scheduler import _LRScheduler + +AVAI_SCHEDS = ["single_step", "multi_step", "cosine"] + + +class _BaseWarmupScheduler(_LRScheduler): + + def __init__( + self, + optimizer, + successor, + warmup_epoch, + last_epoch=-1, + verbose=False + ): + self.successor = successor + self.warmup_epoch = warmup_epoch + super().__init__(optimizer, last_epoch, verbose) + + def get_lr(self): + raise NotImplementedError + + def step(self, epoch=None): + if self.last_epoch >= self.warmup_epoch: + self.successor.step(epoch) + self._last_lr = self.successor.get_last_lr() + else: + super().step(epoch) + + +class ConstantWarmupScheduler(_BaseWarmupScheduler): + + def __init__( + self, + optimizer, + successor, + warmup_epoch, + cons_lr, + last_epoch=-1, + verbose=False + ): + self.cons_lr = cons_lr + super().__init__( + optimizer, successor, warmup_epoch, last_epoch, verbose + ) + + def get_lr(self): + if self.last_epoch >= self.warmup_epoch: + return self.successor.get_last_lr() + return [self.cons_lr for _ in self.base_lrs] + + +class LinearWarmupScheduler(_BaseWarmupScheduler): + + def __init__( + self, + optimizer, + successor, + warmup_epoch, + min_lr, + last_epoch=-1, + verbose=False + ): + self.min_lr = min_lr + super().__init__( + optimizer, successor, warmup_epoch, last_epoch, verbose + ) + + def get_lr(self): + if self.last_epoch >= self.warmup_epoch: + return self.successor.get_last_lr() + if self.last_epoch == 0: + return [self.min_lr for _ in self.base_lrs] + return [ + lr * self.last_epoch / self.warmup_epoch for lr in self.base_lrs + ] + + +def build_lr_scheduler(optimizer, optim_cfg): + """A function wrapper for building a learning rate scheduler. + + Args: + optimizer (Optimizer): an Optimizer. + optim_cfg (CfgNode): optimization config. + """ + lr_scheduler = optim_cfg.LR_SCHEDULER + stepsize = optim_cfg.STEPSIZE + gamma = optim_cfg.GAMMA + max_epoch = optim_cfg.MAX_EPOCH + + if lr_scheduler not in AVAI_SCHEDS: + raise ValueError( + f"scheduler must be one of {AVAI_SCHEDS}, but got {lr_scheduler}" + ) + + if lr_scheduler == "single_step": + if isinstance(stepsize, (list, tuple)): + stepsize = stepsize[-1] + + if not isinstance(stepsize, int): + raise TypeError( + "For single_step lr_scheduler, stepsize must " + f"be an integer, but got {type(stepsize)}" + ) + + if stepsize <= 0: + stepsize = max_epoch + + scheduler = torch.optim.lr_scheduler.StepLR( + optimizer, step_size=stepsize, gamma=gamma + ) + + elif lr_scheduler == "multi_step": + if not isinstance(stepsize, (list, tuple)): + raise TypeError( + "For multi_step lr_scheduler, stepsize must " + f"be a list, but got {type(stepsize)}" + ) + + scheduler = torch.optim.lr_scheduler.MultiStepLR( + optimizer, milestones=stepsize, gamma=gamma + ) + + elif lr_scheduler == "cosine": + scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( + optimizer, float(max_epoch) + ) + + if optim_cfg.WARMUP_EPOCH > 0: + if not optim_cfg.WARMUP_RECOUNT: + scheduler.last_epoch = optim_cfg.WARMUP_EPOCH + + if optim_cfg.WARMUP_TYPE == "constant": + scheduler = ConstantWarmupScheduler( + optimizer, scheduler, optim_cfg.WARMUP_EPOCH, + optim_cfg.WARMUP_CONS_LR + ) + + elif optim_cfg.WARMUP_TYPE == "linear": + scheduler = LinearWarmupScheduler( + optimizer, scheduler, optim_cfg.WARMUP_EPOCH, + optim_cfg.WARMUP_MIN_LR + ) + + else: + raise ValueError + + return scheduler diff --git a/python/ClipDetection/Dassl.pytorch/dassl/optim/optimizer.py b/python/ClipDetection/Dassl.pytorch/dassl/optim/optimizer.py new file mode 100644 index 00000000..5ebcd622 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/optim/optimizer.py @@ -0,0 +1,142 @@ +""" +Modified from https://github.com/KaiyangZhou/deep-person-reid +""" +import warnings +import torch +import torch.nn as nn + +from .radam import RAdam + +AVAI_OPTIMS = ["adam", "amsgrad", "sgd", "rmsprop", "radam", "adamw"] + + +def build_optimizer(model, optim_cfg, param_groups=None): + """A function wrapper for building an optimizer. + + Args: + model (nn.Module or iterable): model. + optim_cfg (CfgNode): optimization config. + param_groups: If provided, directly optimize param_groups and abandon model + """ + optim = optim_cfg.NAME + lr = optim_cfg.LR + weight_decay = optim_cfg.WEIGHT_DECAY + momentum = optim_cfg.MOMENTUM + sgd_dampening = optim_cfg.SGD_DAMPNING + sgd_nesterov = optim_cfg.SGD_NESTEROV + rmsprop_alpha = optim_cfg.RMSPROP_ALPHA + adam_beta1 = optim_cfg.ADAM_BETA1 + adam_beta2 = optim_cfg.ADAM_BETA2 + staged_lr = optim_cfg.STAGED_LR + new_layers = optim_cfg.NEW_LAYERS + base_lr_mult = optim_cfg.BASE_LR_MULT + + if optim not in AVAI_OPTIMS: + raise ValueError( + f"optim must be one of {AVAI_OPTIMS}, but got {optim}" + ) + + if param_groups is not None and staged_lr: + warnings.warn( + "staged_lr will be ignored, if you need to use staged_lr, " + "please bind it with param_groups yourself." + ) + + if param_groups is None: + if staged_lr: + if not isinstance(model, nn.Module): + raise TypeError( + "When staged_lr is True, model given to " + "build_optimizer() must be an instance of nn.Module" + ) + + if isinstance(model, nn.DataParallel): + model = model.module + + if isinstance(new_layers, str): + if new_layers is None: + warnings.warn("new_layers is empty (staged_lr is useless)") + new_layers = [new_layers] + + base_params = [] + base_layers = [] + new_params = [] + + for name, module in model.named_children(): + if name in new_layers: + new_params += [p for p in module.parameters()] + else: + base_params += [p for p in module.parameters()] + base_layers.append(name) + + param_groups = [ + { + "params": base_params, + "lr": lr * base_lr_mult + }, + { + "params": new_params + }, + ] + + else: + if isinstance(model, nn.Module): + param_groups = model.parameters() + else: + param_groups = model + + if optim == "adam": + optimizer = torch.optim.Adam( + param_groups, + lr=lr, + weight_decay=weight_decay, + betas=(adam_beta1, adam_beta2), + ) + + elif optim == "amsgrad": + optimizer = torch.optim.Adam( + param_groups, + lr=lr, + weight_decay=weight_decay, + betas=(adam_beta1, adam_beta2), + amsgrad=True, + ) + + elif optim == "sgd": + optimizer = torch.optim.SGD( + param_groups, + lr=lr, + momentum=momentum, + weight_decay=weight_decay, + dampening=sgd_dampening, + nesterov=sgd_nesterov, + ) + + elif optim == "rmsprop": + optimizer = torch.optim.RMSprop( + param_groups, + lr=lr, + momentum=momentum, + weight_decay=weight_decay, + alpha=rmsprop_alpha, + ) + + elif optim == "radam": + optimizer = RAdam( + param_groups, + lr=lr, + weight_decay=weight_decay, + betas=(adam_beta1, adam_beta2), + ) + + elif optim == "adamw": + optimizer = torch.optim.AdamW( + param_groups, + lr=lr, + weight_decay=weight_decay, + betas=(adam_beta1, adam_beta2), + ) + else: + raise NotImplementedError(f"Optimizer {optim} not implemented yet!") + + return optimizer diff --git a/python/ClipDetection/Dassl.pytorch/dassl/optim/radam.py b/python/ClipDetection/Dassl.pytorch/dassl/optim/radam.py new file mode 100644 index 00000000..f4c1989f --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/optim/radam.py @@ -0,0 +1,332 @@ +""" +Imported from: https://github.com/LiyuanLucasLiu/RAdam + +https://arxiv.org/abs/1908.03265 + +@article{liu2019radam, + title={On the Variance of the Adaptive Learning Rate and Beyond}, + author={Liu, Liyuan and Jiang, Haoming and He, Pengcheng and Chen, Weizhu and Liu, Xiaodong and Gao, Jianfeng and Han, Jiawei}, + journal={arXiv preprint arXiv:1908.03265}, + year={2019} +} +""" +import math +import torch +from torch.optim.optimizer import Optimizer + + +class RAdam(Optimizer): + + def __init__( + self, + params, + lr=1e-3, + betas=(0.9, 0.999), + eps=1e-8, + weight_decay=0, + degenerated_to_sgd=True, + ): + if not 0.0 <= lr: + raise ValueError("Invalid learning rate: {}".format(lr)) + if not 0.0 <= eps: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if not 0.0 <= betas[0] < 1.0: + raise ValueError( + "Invalid beta parameter at index 0: {}".format(betas[0]) + ) + if not 0.0 <= betas[1] < 1.0: + raise ValueError( + "Invalid beta parameter at index 1: {}".format(betas[1]) + ) + + self.degenerated_to_sgd = degenerated_to_sgd + defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay) + self.buffer = [[None, None, None] for ind in range(10)] + super(RAdam, self).__init__(params, defaults) + + def __setstate__(self, state): + super(RAdam, self).__setstate__(state) + + def step(self, closure=None): + + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + + for p in group["params"]: + if p.grad is None: + continue + grad = p.grad.data.float() + if grad.is_sparse: + raise RuntimeError( + "RAdam does not support sparse gradients" + ) + + p_data_fp32 = p.data.float() + + state = self.state[p] + + if len(state) == 0: + state["step"] = 0 + state["exp_avg"] = torch.zeros_like(p_data_fp32) + state["exp_avg_sq"] = torch.zeros_like(p_data_fp32) + else: + state["exp_avg"] = state["exp_avg"].type_as(p_data_fp32) + state["exp_avg_sq"] = state["exp_avg_sq"].type_as( + p_data_fp32 + ) + + exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"] + beta1, beta2 = group["betas"] + + exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) + exp_avg.mul_(beta1).add_(1 - beta1, grad) + + state["step"] += 1 + buffered = self.buffer[int(state["step"] % 10)] + if state["step"] == buffered[0]: + N_sma, step_size = buffered[1], buffered[2] + else: + buffered[0] = state["step"] + beta2_t = beta2**state["step"] + N_sma_max = 2 / (1-beta2) - 1 + N_sma = N_sma_max - 2 * state["step" + ] * beta2_t / (1-beta2_t) + buffered[1] = N_sma + + # more conservative since it's an approximated value + if N_sma >= 5: + step_size = math.sqrt( + (1-beta2_t) * (N_sma-4) / (N_sma_max-4) * + (N_sma-2) / N_sma * N_sma_max / (N_sma_max-2) + ) / (1 - beta1**state["step"]) + elif self.degenerated_to_sgd: + step_size = 1.0 / (1 - beta1**state["step"]) + else: + step_size = -1 + buffered[2] = step_size + + # more conservative since it's an approximated value + if N_sma >= 5: + if group["weight_decay"] != 0: + p_data_fp32.add_( + -group["weight_decay"] * group["lr"], p_data_fp32 + ) + denom = exp_avg_sq.sqrt().add_(group["eps"]) + p_data_fp32.addcdiv_( + -step_size * group["lr"], exp_avg, denom + ) + p.data.copy_(p_data_fp32) + elif step_size > 0: + if group["weight_decay"] != 0: + p_data_fp32.add_( + -group["weight_decay"] * group["lr"], p_data_fp32 + ) + p_data_fp32.add_(-step_size * group["lr"], exp_avg) + p.data.copy_(p_data_fp32) + + return loss + + +class PlainRAdam(Optimizer): + + def __init__( + self, + params, + lr=1e-3, + betas=(0.9, 0.999), + eps=1e-8, + weight_decay=0, + degenerated_to_sgd=True, + ): + if not 0.0 <= lr: + raise ValueError("Invalid learning rate: {}".format(lr)) + if not 0.0 <= eps: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if not 0.0 <= betas[0] < 1.0: + raise ValueError( + "Invalid beta parameter at index 0: {}".format(betas[0]) + ) + if not 0.0 <= betas[1] < 1.0: + raise ValueError( + "Invalid beta parameter at index 1: {}".format(betas[1]) + ) + + self.degenerated_to_sgd = degenerated_to_sgd + defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay) + + super(PlainRAdam, self).__init__(params, defaults) + + def __setstate__(self, state): + super(PlainRAdam, self).__setstate__(state) + + def step(self, closure=None): + + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + + for p in group["params"]: + if p.grad is None: + continue + grad = p.grad.data.float() + if grad.is_sparse: + raise RuntimeError( + "RAdam does not support sparse gradients" + ) + + p_data_fp32 = p.data.float() + + state = self.state[p] + + if len(state) == 0: + state["step"] = 0 + state["exp_avg"] = torch.zeros_like(p_data_fp32) + state["exp_avg_sq"] = torch.zeros_like(p_data_fp32) + else: + state["exp_avg"] = state["exp_avg"].type_as(p_data_fp32) + state["exp_avg_sq"] = state["exp_avg_sq"].type_as( + p_data_fp32 + ) + + exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"] + beta1, beta2 = group["betas"] + + exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) + exp_avg.mul_(beta1).add_(1 - beta1, grad) + + state["step"] += 1 + beta2_t = beta2**state["step"] + N_sma_max = 2 / (1-beta2) - 1 + N_sma = N_sma_max - 2 * state["step"] * beta2_t / (1-beta2_t) + + # more conservative since it's an approximated value + if N_sma >= 5: + if group["weight_decay"] != 0: + p_data_fp32.add_( + -group["weight_decay"] * group["lr"], p_data_fp32 + ) + step_size = ( + group["lr"] * math.sqrt( + (1-beta2_t) * (N_sma-4) / (N_sma_max-4) * + (N_sma-2) / N_sma * N_sma_max / (N_sma_max-2) + ) / (1 - beta1**state["step"]) + ) + denom = exp_avg_sq.sqrt().add_(group["eps"]) + p_data_fp32.addcdiv_(-step_size, exp_avg, denom) + p.data.copy_(p_data_fp32) + elif self.degenerated_to_sgd: + if group["weight_decay"] != 0: + p_data_fp32.add_( + -group["weight_decay"] * group["lr"], p_data_fp32 + ) + step_size = group["lr"] / (1 - beta1**state["step"]) + p_data_fp32.add_(-step_size, exp_avg) + p.data.copy_(p_data_fp32) + + return loss + + +class AdamW(Optimizer): + + def __init__( + self, + params, + lr=1e-3, + betas=(0.9, 0.999), + eps=1e-8, + weight_decay=0, + warmup=0 + ): + if not 0.0 <= lr: + raise ValueError("Invalid learning rate: {}".format(lr)) + if not 0.0 <= eps: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if not 0.0 <= betas[0] < 1.0: + raise ValueError( + "Invalid beta parameter at index 0: {}".format(betas[0]) + ) + if not 0.0 <= betas[1] < 1.0: + raise ValueError( + "Invalid beta parameter at index 1: {}".format(betas[1]) + ) + + defaults = dict( + lr=lr, + betas=betas, + eps=eps, + weight_decay=weight_decay, + warmup=warmup + ) + super(AdamW, self).__init__(params, defaults) + + def __setstate__(self, state): + super(AdamW, self).__setstate__(state) + + def step(self, closure=None): + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + + for p in group["params"]: + if p.grad is None: + continue + grad = p.grad.data.float() + if grad.is_sparse: + raise RuntimeError( + "Adam does not support sparse gradients, please consider SparseAdam instead" + ) + + p_data_fp32 = p.data.float() + + state = self.state[p] + + if len(state) == 0: + state["step"] = 0 + state["exp_avg"] = torch.zeros_like(p_data_fp32) + state["exp_avg_sq"] = torch.zeros_like(p_data_fp32) + else: + state["exp_avg"] = state["exp_avg"].type_as(p_data_fp32) + state["exp_avg_sq"] = state["exp_avg_sq"].type_as( + p_data_fp32 + ) + + exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"] + beta1, beta2 = group["betas"] + + state["step"] += 1 + + exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) + exp_avg.mul_(beta1).add_(1 - beta1, grad) + + denom = exp_avg_sq.sqrt().add_(group["eps"]) + bias_correction1 = 1 - beta1**state["step"] + bias_correction2 = 1 - beta2**state["step"] + + if group["warmup"] > state["step"]: + scheduled_lr = 1e-8 + state["step"] * group["lr"] / group[ + "warmup"] + else: + scheduled_lr = group["lr"] + + step_size = ( + scheduled_lr * math.sqrt(bias_correction2) / + bias_correction1 + ) + + if group["weight_decay"] != 0: + p_data_fp32.add_( + -group["weight_decay"] * scheduled_lr, p_data_fp32 + ) + + p_data_fp32.addcdiv_(-step_size, exp_avg, denom) + + p.data.copy_(p_data_fp32) + + return loss diff --git a/python/ClipDetection/Dassl.pytorch/dassl/utils/__init__.py b/python/ClipDetection/Dassl.pytorch/dassl/utils/__init__.py new file mode 100644 index 00000000..c47679fd --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/utils/__init__.py @@ -0,0 +1,5 @@ +from .tools import * +from .logger import * +from .meters import * +from .registry import * +from .torchtools import * diff --git a/python/ClipDetection/Dassl.pytorch/dassl/utils/logger.py b/python/ClipDetection/Dassl.pytorch/dassl/utils/logger.py new file mode 100644 index 00000000..d5e680a0 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/utils/logger.py @@ -0,0 +1,73 @@ +import os +import sys +import time +import os.path as osp + +from .tools import mkdir_if_missing + +__all__ = ["Logger", "setup_logger"] + + +class Logger: + """Write console output to external text file. + + Imported from ``_ + + Args: + fpath (str): directory to save logging file. + + Examples:: + >>> import sys + >>> import os.path as osp + >>> save_dir = 'output/experiment-1' + >>> log_name = 'train.log' + >>> sys.stdout = Logger(osp.join(save_dir, log_name)) + """ + + def __init__(self, fpath=None): + self.console = sys.stdout + self.file = None + if fpath is not None: + mkdir_if_missing(osp.dirname(fpath)) + self.file = open(fpath, "w") + + def __del__(self): + self.close() + + def __enter__(self): + pass + + def __exit__(self, *args): + self.close() + + def write(self, msg): + self.console.write(msg) + if self.file is not None: + self.file.write(msg) + + def flush(self): + self.console.flush() + if self.file is not None: + self.file.flush() + os.fsync(self.file.fileno()) + + def close(self): + self.console.close() + if self.file is not None: + self.file.close() + + +def setup_logger(output=None): + if output is None: + return + + if output.endswith(".txt") or output.endswith(".log"): + fpath = output + else: + fpath = osp.join(output, "log.txt") + + if osp.exists(fpath): + # make sure the existing log file is not over-written + fpath += time.strftime("-%Y-%m-%d-%H-%M-%S") + + sys.stdout = Logger(fpath) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/utils/meters.py b/python/ClipDetection/Dassl.pytorch/dassl/utils/meters.py new file mode 100644 index 00000000..a779b591 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/utils/meters.py @@ -0,0 +1,80 @@ +from collections import defaultdict +import torch + +__all__ = ["AverageMeter", "MetricMeter"] + + +class AverageMeter: + """Compute and store the average and current value. + + Examples:: + >>> # 1. Initialize a meter to record loss + >>> losses = AverageMeter() + >>> # 2. Update meter after every mini-batch update + >>> losses.update(loss_value, batch_size) + """ + + def __init__(self, ema=False): + """ + Args: + ema (bool, optional): apply exponential moving average. + """ + self.ema = ema + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + if isinstance(val, torch.Tensor): + val = val.item() + + self.val = val + self.sum += val * n + self.count += n + + if self.ema: + self.avg = self.avg * 0.9 + self.val * 0.1 + else: + self.avg = self.sum / self.count + + +class MetricMeter: + """Store the average and current value for a set of metrics. + + Examples:: + >>> # 1. Create an instance of MetricMeter + >>> metric = MetricMeter() + >>> # 2. Update using a dictionary as input + >>> input_dict = {'loss_1': value_1, 'loss_2': value_2} + >>> metric.update(input_dict) + >>> # 3. Convert to string and print + >>> print(str(metric)) + """ + + def __init__(self, delimiter=" "): + self.meters = defaultdict(AverageMeter) + self.delimiter = delimiter + + def update(self, input_dict): + if input_dict is None: + return + + if not isinstance(input_dict, dict): + raise TypeError( + "Input to MetricMeter.update() must be a dictionary" + ) + + for k, v in input_dict.items(): + if isinstance(v, torch.Tensor): + v = v.item() + self.meters[k].update(v) + + def __str__(self): + output_str = [] + for name, meter in self.meters.items(): + output_str.append(f"{name} {meter.val:.4f} ({meter.avg:.4f})") + return self.delimiter.join(output_str) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/utils/registry.py b/python/ClipDetection/Dassl.pytorch/dassl/utils/registry.py new file mode 100644 index 00000000..5079784e --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/utils/registry.py @@ -0,0 +1,69 @@ +""" +Modified from https://github.com/facebookresearch/fvcore +""" +__all__ = ["Registry"] + + +class Registry: + """A registry providing name -> object mapping, to support + custom modules. + + To create a registry (e.g. a backbone registry): + + .. code-block:: python + + BACKBONE_REGISTRY = Registry('BACKBONE') + + To register an object: + + .. code-block:: python + + @BACKBONE_REGISTRY.register() + class MyBackbone(nn.Module): + ... + + Or: + + .. code-block:: python + + BACKBONE_REGISTRY.register(MyBackbone) + """ + + def __init__(self, name): + self._name = name + self._obj_map = dict() + + def _do_register(self, name, obj, force=False): + if name in self._obj_map and not force: + raise KeyError( + 'An object named "{}" was already ' + 'registered in "{}" registry'.format(name, self._name) + ) + + self._obj_map[name] = obj + + def register(self, obj=None, force=False): + if obj is None: + # Used as a decorator + def wrapper(fn_or_class): + name = fn_or_class.__name__ + self._do_register(name, fn_or_class, force=force) + return fn_or_class + + return wrapper + + # Used as a function call + name = obj.__name__ + self._do_register(name, obj, force=force) + + def get(self, name): + if name not in self._obj_map: + raise KeyError( + 'Object name "{}" does not exist ' + 'in "{}" registry'.format(name, self._name) + ) + + return self._obj_map[name] + + def registered_names(self): + return list(self._obj_map.keys()) diff --git a/python/ClipDetection/Dassl.pytorch/dassl/utils/tools.py b/python/ClipDetection/Dassl.pytorch/dassl/utils/tools.py new file mode 100644 index 00000000..62d4f307 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/utils/tools.py @@ -0,0 +1,185 @@ +""" +Modified from https://github.com/KaiyangZhou/deep-person-reid +""" +import os +import sys +import json +import time +import errno +import numpy as np +import random +import os.path as osp +import warnings +from difflib import SequenceMatcher +import PIL +import torch +from PIL import Image + +__all__ = [ + "mkdir_if_missing", + "check_isfile", + "read_json", + "write_json", + "set_random_seed", + "download_url", + "read_image", + "collect_env_info", + "listdir_nohidden", + "get_most_similar_str_to_a_from_b", + "check_availability", + "tolist_if_not", +] + + +def mkdir_if_missing(dirname): + """Create dirname if it is missing.""" + if not osp.exists(dirname): + try: + os.makedirs(dirname) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def check_isfile(fpath): + """Check if the given path is a file. + + Args: + fpath (str): file path. + + Returns: + bool + """ + isfile = osp.isfile(fpath) + if not isfile: + warnings.warn('No file found at "{}"'.format(fpath)) + return isfile + + +def read_json(fpath): + """Read json file from a path.""" + with open(fpath, "r") as f: + obj = json.load(f) + return obj + + +def write_json(obj, fpath): + """Writes to a json file.""" + mkdir_if_missing(osp.dirname(fpath)) + with open(fpath, "w") as f: + json.dump(obj, f, indent=4, separators=(",", ": ")) + + +def set_random_seed(seed): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + + +def download_url(url, dst): + """Download file from a url to a destination. + + Args: + url (str): url to download file. + dst (str): destination path. + """ + from six.moves import urllib + + print('* url="{}"'.format(url)) + print('* destination="{}"'.format(dst)) + + def _reporthook(count, block_size, total_size): + global start_time + if count == 0: + start_time = time.time() + return + duration = time.time() - start_time + progress_size = int(count * block_size) + speed = int(progress_size / (1024*duration)) + percent = int(count * block_size * 100 / total_size) + sys.stdout.write( + "\r...%d%%, %d MB, %d KB/s, %d seconds passed" % + (percent, progress_size / (1024*1024), speed, duration) + ) + sys.stdout.flush() + + urllib.request.urlretrieve(url, dst, _reporthook) + sys.stdout.write("\n") + + +def read_image(path): + """Read image from path using ``PIL.Image``. + + Args: + path (str): path to an image. + + Returns: + PIL image + """ + return Image.open(path).convert("RGB") + + +def collect_env_info(): + """Return env info as a string. + + Code source: github.com/facebookresearch/maskrcnn-benchmark + """ + from torch.utils.collect_env import get_pretty_env_info + + env_str = get_pretty_env_info() + env_str += "\n Pillow ({})".format(PIL.__version__) + return env_str + + +def listdir_nohidden(path, sort=False): + """List non-hidden items in a directory. + + Args: + path (str): directory path. + sort (bool): sort the items. + """ + items = [f for f in os.listdir(path) if not f.startswith(".")] + if sort: + items.sort() + return items + + +def get_most_similar_str_to_a_from_b(a, b): + """Return the most similar string to a in b. + + Args: + a (str): probe string. + b (list): a list of candidate strings. + """ + highest_sim = 0 + chosen = None + for candidate in b: + sim = SequenceMatcher(None, a, candidate).ratio() + if sim >= highest_sim: + highest_sim = sim + chosen = candidate + return chosen + + +def check_availability(requested, available): + """Check if an element is available in a list. + + Args: + requested (str): probe string. + available (list): a list of available strings. + """ + if requested not in available: + psb_ans = get_most_similar_str_to_a_from_b(requested, available) + raise ValueError( + "The requested one is expected " + "to belong to {}, but got [{}] " + "(do you mean [{}]?)".format(available, requested, psb_ans) + ) + + +def tolist_if_not(x): + """Convert to a list.""" + if not isinstance(x, list): + x = [x] + return x diff --git a/python/ClipDetection/Dassl.pytorch/dassl/utils/torchtools.py b/python/ClipDetection/Dassl.pytorch/dassl/utils/torchtools.py new file mode 100644 index 00000000..aa4dedfc --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/dassl/utils/torchtools.py @@ -0,0 +1,347 @@ +""" +Modified from https://github.com/KaiyangZhou/deep-person-reid +""" +import pickle +import shutil +import os.path as osp +import warnings +from functools import partial +from collections import OrderedDict +import torch +import torch.nn as nn + +from .tools import mkdir_if_missing + +__all__ = [ + "save_checkpoint", + "load_checkpoint", + "resume_from_checkpoint", + "open_all_layers", + "open_specified_layers", + "count_num_param", + "load_pretrained_weights", + "init_network_weights", +] + + +def save_checkpoint( + state, + save_dir, + is_best=False, + remove_module_from_keys=True, + model_name="" +): + r"""Save checkpoint. + + Args: + state (dict): dictionary. + save_dir (str): directory to save checkpoint. + is_best (bool, optional): if True, this checkpoint will be copied and named + ``model-best.pth.tar``. Default is False. + remove_module_from_keys (bool, optional): whether to remove "module." + from layer names. Default is True. + model_name (str, optional): model name to save. + """ + mkdir_if_missing(save_dir) + + if remove_module_from_keys: + # remove 'module.' in state_dict's keys + state_dict = state["state_dict"] + new_state_dict = OrderedDict() + for k, v in state_dict.items(): + if k.startswith("module."): + k = k[7:] + new_state_dict[k] = v + state["state_dict"] = new_state_dict + + # save model + epoch = state["epoch"] + if not model_name: + model_name = "model.pth.tar-" + str(epoch) + fpath = osp.join(save_dir, model_name) + torch.save(state, fpath) + print(f"Checkpoint saved to {fpath}") + + # save current model name + checkpoint_file = osp.join(save_dir, "checkpoint") + checkpoint = open(checkpoint_file, "w+") + checkpoint.write("{}\n".format(osp.basename(fpath))) + checkpoint.close() + + if is_best: + best_fpath = osp.join(osp.dirname(fpath), "model-best.pth.tar") + shutil.copy(fpath, best_fpath) + print('Best checkpoint saved to "{}"'.format(best_fpath)) + + +def load_checkpoint(fpath, device='cpu'): + r"""Load checkpoint. + + ``UnicodeDecodeError`` can be well handled, which means + python2-saved files can be read from python3. + + Args: + fpath (str): path to checkpoint. + + Returns: + dict + + Examples:: + >>> fpath = 'log/my_model/model.pth.tar-10' + >>> checkpoint = load_checkpoint(fpath) + """ + if fpath is None: + raise ValueError("File path is None") + + if not osp.exists(fpath): + raise FileNotFoundError('File is not found at "{}"'.format(fpath)) + + map_location = device + + try: + checkpoint = torch.load(fpath, map_location=map_location) + + except UnicodeDecodeError: + pickle.load = partial(pickle.load, encoding="latin1") + pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1") + checkpoint = torch.load( + fpath, pickle_module=pickle, map_location=map_location + ) + + except Exception: + print('Unable to load checkpoint from "{}"'.format(fpath)) + raise + + return checkpoint + + +def resume_from_checkpoint(fdir, model, optimizer=None, scheduler=None): + r"""Resume training from a checkpoint. + + This will load (1) model weights and (2) ``state_dict`` + of optimizer if ``optimizer`` is not None. + + Args: + fdir (str): directory where the model was saved. + model (nn.Module): model. + optimizer (Optimizer, optional): an Optimizer. + scheduler (Scheduler, optional): an Scheduler. + + Returns: + int: start_epoch. + + Examples:: + >>> fdir = 'log/my_model' + >>> start_epoch = resume_from_checkpoint(fdir, model, optimizer, scheduler) + """ + with open(osp.join(fdir, "checkpoint"), "r") as checkpoint: + model_name = checkpoint.readlines()[0].strip("\n") + fpath = osp.join(fdir, model_name) + + print('Loading checkpoint from "{}"'.format(fpath)) + checkpoint = load_checkpoint(fpath) + model.load_state_dict(checkpoint["state_dict"]) + print("Loaded model weights") + + if optimizer is not None and "optimizer" in checkpoint.keys(): + optimizer.load_state_dict(checkpoint["optimizer"]) + print("Loaded optimizer") + + if scheduler is not None and "scheduler" in checkpoint.keys(): + scheduler.load_state_dict(checkpoint["scheduler"]) + print("Loaded scheduler") + + start_epoch = checkpoint["epoch"] + print("Previous epoch: {}".format(start_epoch)) + + return start_epoch + + +def adjust_learning_rate( + optimizer, + base_lr, + epoch, + stepsize=20, + gamma=0.1, + linear_decay=False, + final_lr=0, + max_epoch=100, +): + r"""Adjust learning rate. + + Deprecated. + """ + if linear_decay: + # linearly decay learning rate from base_lr to final_lr + frac_done = epoch / max_epoch + lr = frac_done*final_lr + (1.0-frac_done) * base_lr + else: + # decay learning rate by gamma for every stepsize + lr = base_lr * (gamma**(epoch // stepsize)) + + for param_group in optimizer.param_groups: + param_group["lr"] = lr + + +def set_bn_to_eval(m): + r"""Set BatchNorm layers to eval mode.""" + # 1. no update for running mean and var + # 2. scale and shift parameters are still trainable + classname = m.__class__.__name__ + if classname.find("BatchNorm") != -1: + m.eval() + + +def open_all_layers(model): + r"""Open all layers in model for training. + + Examples:: + >>> open_all_layers(model) + """ + model.train() + for p in model.parameters(): + p.requires_grad = True + + +def open_specified_layers(model, open_layers): + r"""Open specified layers in model for training while keeping + other layers frozen. + + Args: + model (nn.Module): neural net model. + open_layers (str or list): layers open for training. + + Examples:: + >>> # Only model.classifier will be updated. + >>> open_layers = 'classifier' + >>> open_specified_layers(model, open_layers) + >>> # Only model.fc and model.classifier will be updated. + >>> open_layers = ['fc', 'classifier'] + >>> open_specified_layers(model, open_layers) + """ + if isinstance(model, nn.DataParallel): + model = model.module + + if isinstance(open_layers, str): + open_layers = [open_layers] + + for layer in open_layers: + assert hasattr(model, layer), f"{layer} is not an attribute" + + for name, module in model.named_children(): + if name in open_layers: + module.train() + for p in module.parameters(): + p.requires_grad = True + else: + module.eval() + for p in module.parameters(): + p.requires_grad = False + + +def count_num_param(model=None, params=None): + r"""Count number of parameters in a model. + + Args: + model (nn.Module): network model. + params: network model`s params. + Examples:: + >>> model_size = count_num_param(model) + """ + + if model is not None: + return sum(p.numel() for p in model.parameters()) + + if params is not None: + s = 0 + for p in params: + if isinstance(p, dict): + s += p["params"].numel() + else: + s += p.numel() + return s + + raise ValueError("model and params must provide at least one.") + + +def load_pretrained_weights(model, weight_path): + r"""Load pretrianed weights to model. + + Features:: + - Incompatible layers (unmatched in name or size) will be ignored. + - Can automatically deal with keys containing "module.". + + Args: + model (nn.Module): network model. + weight_path (str): path to pretrained weights. + + Examples:: + >>> weight_path = 'log/my_model/model-best.pth.tar' + >>> load_pretrained_weights(model, weight_path) + """ + checkpoint = load_checkpoint(weight_path) + if "state_dict" in checkpoint: + state_dict = checkpoint["state_dict"] + else: + state_dict = checkpoint + + model_dict = model.state_dict() + new_state_dict = OrderedDict() + matched_layers, discarded_layers = [], [] + + for k, v in state_dict.items(): + if k.startswith("module."): + k = k[7:] # discard module. + + if k in model_dict and model_dict[k].size() == v.size(): + new_state_dict[k] = v + matched_layers.append(k) + else: + discarded_layers.append(k) + + model_dict.update(new_state_dict) + model.load_state_dict(model_dict) + + if len(matched_layers) == 0: + warnings.warn( + f"Cannot load {weight_path} (check the key names manually)" + ) + else: + print(f"Successfully loaded pretrained weights from {weight_path}") + if len(discarded_layers) > 0: + print( + f"Layers discarded due to unmatched keys or size: {discarded_layers}" + ) + + +def init_network_weights(model, init_type="normal", gain=0.02): + + def _init_func(m): + classname = m.__class__.__name__ + + if hasattr(m, "weight") and ( + classname.find("Conv") != -1 or classname.find("Linear") != -1 + ): + if init_type == "normal": + nn.init.normal_(m.weight.data, 0.0, gain) + elif init_type == "xavier": + nn.init.xavier_normal_(m.weight.data, gain=gain) + elif init_type == "kaiming": + nn.init.kaiming_normal_(m.weight.data, a=0, mode="fan_in") + elif init_type == "orthogonal": + nn.init.orthogonal_(m.weight.data, gain=gain) + else: + raise NotImplementedError + if hasattr(m, "bias") and m.bias is not None: + nn.init.constant_(m.bias.data, 0.0) + + elif classname.find("BatchNorm") != -1: + nn.init.constant_(m.weight.data, 1.0) + nn.init.constant_(m.bias.data, 0.0) + + elif classname.find("InstanceNorm") != -1: + if m.weight is not None and m.bias is not None: + nn.init.constant_(m.weight.data, 1.0) + nn.init.constant_(m.bias.data, 0.0) + + model.apply(_init_func) diff --git a/python/ClipDetection/Dassl.pytorch/datasets/da/cifar_stl.py b/python/ClipDetection/Dassl.pytorch/datasets/da/cifar_stl.py new file mode 100644 index 00000000..52c16aad --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/datasets/da/cifar_stl.py @@ -0,0 +1,95 @@ +import sys +import pprint as pp +import os.path as osp +from torchvision.datasets import STL10, CIFAR10 + +from dassl.utils import mkdir_if_missing + +cifar_label2name = { + 0: "airplane", + 1: "car", # the original name was 'automobile' + 2: "bird", + 3: "cat", + 4: "deer", + 5: "dog", + 6: "frog", # conflict class + 7: "horse", + 8: "ship", + 9: "truck", +} + +stl_label2name = { + 0: "airplane", + 1: "bird", + 2: "car", + 3: "cat", + 4: "deer", + 5: "dog", + 6: "horse", + 7: "monkey", # conflict class + 8: "ship", + 9: "truck", +} + +new_name2label = { + "airplane": 0, + "bird": 1, + "car": 2, + "cat": 3, + "deer": 4, + "dog": 5, + "horse": 6, + "ship": 7, + "truck": 8, +} + + +def extract_and_save_image(dataset, save_dir, discard, label2name): + if osp.exists(save_dir): + print('Folder "{}" already exists'.format(save_dir)) + return + + print('Extracting images to "{}" ...'.format(save_dir)) + mkdir_if_missing(save_dir) + + for i in range(len(dataset)): + img, label = dataset[i] + if label == discard: + continue + class_name = label2name[label] + label_new = new_name2label[class_name] + class_dir = osp.join( + save_dir, + str(label_new).zfill(3) + "_" + class_name + ) + mkdir_if_missing(class_dir) + impath = osp.join(class_dir, str(i + 1).zfill(5) + ".jpg") + img.save(impath) + + +def download_and_prepare(name, root, discarded_label, label2name): + print("Dataset: {}".format(name)) + print("Root: {}".format(root)) + print("Old labels:") + pp.pprint(label2name) + print("Discarded label: {}".format(discarded_label)) + print("New labels:") + pp.pprint(new_name2label) + + if name == "cifar": + train = CIFAR10(root, train=True, download=True) + test = CIFAR10(root, train=False) + else: + train = STL10(root, split="train", download=True) + test = STL10(root, split="test") + + train_dir = osp.join(root, name, "train") + test_dir = osp.join(root, name, "test") + + extract_and_save_image(train, train_dir, discarded_label, label2name) + extract_and_save_image(test, test_dir, discarded_label, label2name) + + +if __name__ == "__main__": + download_and_prepare("cifar", sys.argv[1], 6, cifar_label2name) + download_and_prepare("stl", sys.argv[1], 7, stl_label2name) diff --git a/python/ClipDetection/Dassl.pytorch/datasets/da/digit5.py b/python/ClipDetection/Dassl.pytorch/datasets/da/digit5.py new file mode 100644 index 00000000..500511dc --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/datasets/da/digit5.py @@ -0,0 +1,131 @@ +import os +import numpy as np +import os.path as osp +import argparse +from PIL import Image +from scipy.io import loadmat + + +def mkdir_if_missing(directory): + if not osp.exists(directory): + os.makedirs(directory) + + +def extract_and_save(data, label, save_dir): + for i, (x, y) in enumerate(zip(data, label)): + if x.shape[2] == 1: + x = np.repeat(x, 3, axis=2) + if y == 10: + y = 0 + x = Image.fromarray(x, mode="RGB") + save_path = osp.join( + save_dir, + str(i + 1).zfill(6) + "_" + str(y) + ".jpg" + ) + x.save(save_path) + + +def load_mnist(data_dir, raw_data_dir): + filepath = osp.join(raw_data_dir, "mnist_data.mat") + data = loadmat(filepath) + + train_data = np.reshape(data["train_32"], (55000, 32, 32, 1)) + test_data = np.reshape(data["test_32"], (10000, 32, 32, 1)) + + train_label = np.nonzero(data["label_train"])[1] + test_label = np.nonzero(data["label_test"])[1] + + return train_data, test_data, train_label, test_label + + +def load_mnist_m(data_dir, raw_data_dir): + filepath = osp.join(raw_data_dir, "mnistm_with_label.mat") + data = loadmat(filepath) + + train_data = data["train"] + test_data = data["test"] + + train_label = np.nonzero(data["label_train"])[1] + test_label = np.nonzero(data["label_test"])[1] + + return train_data, test_data, train_label, test_label + + +def load_svhn(data_dir, raw_data_dir): + train = loadmat(osp.join(raw_data_dir, "svhn_train_32x32.mat")) + train_data = train["X"].transpose(3, 0, 1, 2) + train_label = train["y"][:, 0] + + test = loadmat(osp.join(raw_data_dir, "svhn_test_32x32.mat")) + test_data = test["X"].transpose(3, 0, 1, 2) + test_label = test["y"][:, 0] + + return train_data, test_data, train_label, test_label + + +def load_syn(data_dir, raw_data_dir): + filepath = osp.join(raw_data_dir, "syn_number.mat") + data = loadmat(filepath) + + train_data = data["train_data"] + test_data = data["test_data"] + + train_label = data["train_label"][:, 0] + test_label = data["test_label"][:, 0] + + return train_data, test_data, train_label, test_label + + +def load_usps(data_dir, raw_data_dir): + filepath = osp.join(raw_data_dir, "usps_28x28.mat") + data = loadmat(filepath)["dataset"] + + train_data = data[0][0].transpose(0, 2, 3, 1) + test_data = data[1][0].transpose(0, 2, 3, 1) + + train_data *= 255 + test_data *= 255 + + train_data = train_data.astype(np.uint8) + test_data = test_data.astype(np.uint8) + + train_label = data[0][1][:, 0] + test_label = data[1][1][:, 0] + + return train_data, test_data, train_label, test_label + + +def main(data_dir): + data_dir = osp.abspath(osp.expanduser(data_dir)) + raw_data_dir = osp.join(data_dir, "Digit-Five") + + if not osp.exists(data_dir): + raise FileNotFoundError('"{}" does not exist'.format(data_dir)) + + datasets = ["mnist", "mnist_m", "svhn", "syn", "usps"] + + for name in datasets: + print("Creating {}".format(name)) + + output = eval("load_" + name)(data_dir, raw_data_dir) + train_data, test_data, train_label, test_label = output + + print("# train: {}".format(train_data.shape[0])) + print("# test: {}".format(test_data.shape[0])) + + train_dir = osp.join(data_dir, name, "train_images") + mkdir_if_missing(train_dir) + test_dir = osp.join(data_dir, name, "test_images") + mkdir_if_missing(test_dir) + + extract_and_save(train_data, train_label, train_dir) + extract_and_save(test_data, test_label, test_dir) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "data_dir", type=str, help="directory containing Digit-Five/" + ) + args = parser.parse_args() + main(args.data_dir) diff --git a/python/ClipDetection/Dassl.pytorch/datasets/da/visda17.sh b/python/ClipDetection/Dassl.pytorch/datasets/da/visda17.sh new file mode 100644 index 00000000..ce98d313 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/datasets/da/visda17.sh @@ -0,0 +1,24 @@ +# ------------------------------------------------------------------------ +# ROOT is the root directory where you put your domain datasets. +# +# Suppose you wanna put the dataset under $DATA, which stores all the +# domain datasets, run the following command in your terminal to +# download VisDa17: +# +# $ sh visda17.sh $DATA +#------------------------------------------------------------------------ + +ROOT=$1 +mkdir $ROOT/visda17 +cd $ROOT/visda17 + +wget http://csr.bu.edu/ftp/visda17/clf/train.tar +tar xvf train.tar + +wget http://csr.bu.edu/ftp/visda17/clf/validation.tar +tar xvf validation.tar + +wget http://csr.bu.edu/ftp/visda17/clf/test.tar +tar xvf test.tar + +wget https://raw.githubusercontent.com/VisionLearningGroup/taskcv-2017-public/master/classification/data/image_list.txt -O test/image_list.txt \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/datasets/dg/cifar_c.py b/python/ClipDetection/Dassl.pytorch/datasets/dg/cifar_c.py new file mode 100644 index 00000000..f407f858 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/datasets/dg/cifar_c.py @@ -0,0 +1,73 @@ +""" +This script +- creates a folder named "cifar10_c" under the same directory as 'CIFAR-10-C' +- extracts images from .npy files and save them as .jpg. +""" +import os +import sys +import numpy as np +import os.path as osp +from PIL import Image + +from dassl.utils import mkdir_if_missing + + +def extract_and_save(images, labels, level, dst): + # level denotes the corruption intensity level (0-based) + assert 0 <= level <= 4 + + for i in range(10000): + real_i = i + level*10000 + im = Image.fromarray(images[real_i]) + label = int(labels[real_i]) + category_dir = osp.join(dst, str(label).zfill(3)) + mkdir_if_missing(category_dir) + save_path = osp.join(category_dir, str(i + 1).zfill(5) + ".jpg") + im.save(save_path) + + +def main(npy_folder): + npy_folder = osp.abspath(osp.expanduser(npy_folder)) + dataset_cap = osp.basename(npy_folder) + + assert dataset_cap in ["CIFAR-10-C", "CIFAR-100-C"] + + if dataset_cap == "CIFAR-10-C": + dataset = "cifar10_c" + else: + dataset = "cifar100_c" + + if not osp.exists(npy_folder): + print('The given folder "{}" does not exist'.format(npy_folder)) + + root = osp.dirname(npy_folder) + im_folder = osp.join(root, dataset) + + mkdir_if_missing(im_folder) + + dirnames = os.listdir(npy_folder) + dirnames.remove("labels.npy") + if "README.txt" in dirnames: + dirnames.remove("README.txt") + assert len(dirnames) == 19 + labels = np.load(osp.join(npy_folder, "labels.npy")) + + for dirname in dirnames: + corruption = dirname.split(".")[0] + corruption_folder = osp.join(im_folder, corruption) + mkdir_if_missing(corruption_folder) + + npy_filename = osp.join(npy_folder, dirname) + images = np.load(npy_filename) + assert images.shape[0] == 50000 + + for level in range(5): + dst = osp.join(corruption_folder, str(level + 1)) + mkdir_if_missing(dst) + print('Saving images to "{}"'.format(dst)) + extract_and_save(images, labels, level, dst) + + +if __name__ == "__main__": + # sys.argv[1] contains the path to CIFAR-10-C or CIFAR-100-C + main(sys.argv[1]) diff --git a/python/ClipDetection/Dassl.pytorch/datasets/ssl/cifar10_cifar100_svhn.py b/python/ClipDetection/Dassl.pytorch/datasets/ssl/cifar10_cifar100_svhn.py new file mode 100644 index 00000000..ad9aa11c --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/datasets/ssl/cifar10_cifar100_svhn.py @@ -0,0 +1,50 @@ +import sys +import os.path as osp +from torchvision.datasets import SVHN, CIFAR10, CIFAR100 + +from dassl.utils import mkdir_if_missing + + +def extract_and_save_image(dataset, save_dir): + if osp.exists(save_dir): + print('Folder "{}" already exists'.format(save_dir)) + return + + print('Extracting images to "{}" ...'.format(save_dir)) + mkdir_if_missing(save_dir) + + for i in range(len(dataset)): + img, label = dataset[i] + class_dir = osp.join(save_dir, str(label).zfill(3)) + mkdir_if_missing(class_dir) + impath = osp.join(class_dir, str(i + 1).zfill(5) + ".jpg") + img.save(impath) + + +def download_and_prepare(name, root): + print("Dataset: {}".format(name)) + print("Root: {}".format(root)) + + if name == "cifar10": + train = CIFAR10(root, train=True, download=True) + test = CIFAR10(root, train=False) + elif name == "cifar100": + train = CIFAR100(root, train=True, download=True) + test = CIFAR100(root, train=False) + elif name == "svhn": + train = SVHN(root, split="train", download=True) + test = SVHN(root, split="test", download=True) + else: + raise ValueError + + train_dir = osp.join(root, name, "train") + test_dir = osp.join(root, name, "test") + + extract_and_save_image(train, train_dir) + extract_and_save_image(test, test_dir) + + +if __name__ == "__main__": + download_and_prepare("cifar10", sys.argv[1]) + download_and_prepare("cifar100", sys.argv[1]) + download_and_prepare("svhn", sys.argv[1]) diff --git a/python/ClipDetection/Dassl.pytorch/datasets/ssl/stl10.py b/python/ClipDetection/Dassl.pytorch/datasets/ssl/stl10.py new file mode 100644 index 00000000..3f2ed2cb --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/datasets/ssl/stl10.py @@ -0,0 +1,42 @@ +import sys +import os.path as osp +from torchvision.datasets import STL10 + +from dassl.utils import mkdir_if_missing + + +def extract_and_save_image(dataset, save_dir): + if osp.exists(save_dir): + print('Folder "{}" already exists'.format(save_dir)) + return + + print('Extracting images to "{}" ...'.format(save_dir)) + mkdir_if_missing(save_dir) + + for i in range(len(dataset)): + img, label = dataset[i] + if label == -1: + label_name = "none" + else: + label_name = str(label) + imname = str(i).zfill(6) + "_" + label_name + ".jpg" + impath = osp.join(save_dir, imname) + img.save(impath) + + +def download_and_prepare(root): + train = STL10(root, split="train", download=True) + test = STL10(root, split="test") + unlabeled = STL10(root, split="unlabeled") + + train_dir = osp.join(root, "train") + test_dir = osp.join(root, "test") + unlabeled_dir = osp.join(root, "unlabeled") + + extract_and_save_image(train, train_dir) + extract_and_save_image(test, test_dir) + extract_and_save_image(unlabeled, unlabeled_dir) + + +if __name__ == "__main__": + download_and_prepare(sys.argv[1]) diff --git a/python/ClipDetection/Dassl.pytorch/linter.sh b/python/ClipDetection/Dassl.pytorch/linter.sh new file mode 100644 index 00000000..9db34f9f --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/linter.sh @@ -0,0 +1,11 @@ +echo "Running isort" +isort -y -sp . +echo "Done" + +echo "Running yapf" +yapf -i -r -vv -e build . +echo "Done" + +echo "Running flake8" +flake8 . +echo "Done" \ No newline at end of file diff --git a/python/ClipDetection/Dassl.pytorch/requirements.txt b/python/ClipDetection/Dassl.pytorch/requirements.txt new file mode 100644 index 00000000..d8dbbdfb --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/requirements.txt @@ -0,0 +1,14 @@ +flake8==3.7.9 +yapf==0.29.0 +isort==4.3.21 +yacs +gdown +tb-nightly +future +scipy +scikit-learn +tqdm +ftfy +regex +wilds==1.2.2 +tabulate diff --git a/python/ClipDetection/Dassl.pytorch/setup.py b/python/ClipDetection/Dassl.pytorch/setup.py new file mode 100644 index 00000000..b0cbe47b --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/setup.py @@ -0,0 +1,48 @@ +import numpy as np +import os.path as osp +from setuptools import setup, find_packages + + +def readme(): + with open('README.md') as f: + content = f.read() + return content + + +def find_version(): + version_file = 'dassl/__init__.py' + with open(version_file, 'r') as f: + exec(compile(f.read(), version_file, 'exec')) + return locals()['__version__'] + + +def numpy_include(): + try: + numpy_include = np.get_include() + except AttributeError: + numpy_include = np.get_numpy_include() + return numpy_include + + +def get_requirements(filename='requirements.txt'): + here = osp.dirname(osp.realpath(__file__)) + with open(osp.join(here, filename), 'r') as f: + requires = [line.replace('\n', '') for line in f.readlines()] + return requires + + +setup( + name='dassl', + version=find_version(), + description='Dassl: Domain adaptation and semi-supervised learning', + author='Kaiyang Zhou', + license='MIT', + long_description=readme(), + url='https://github.com/KaiyangZhou/Dassl.pytorch', + packages=find_packages(), + install_requires=get_requirements(), + keywords=[ + 'Domain Adaptation', 'Domain Generalization', + 'Semi-Supervised Learning', 'Pytorch' + ] +) diff --git a/python/ClipDetection/Dassl.pytorch/tools/parse_test_res.py b/python/ClipDetection/Dassl.pytorch/tools/parse_test_res.py new file mode 100644 index 00000000..d5105add --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/tools/parse_test_res.py @@ -0,0 +1,178 @@ +""" +Goal +--- +1. Read test results from log.txt files +2. Compute mean and std across different folders (seeds) + +Usage +--- +Assume the output files are saved under output/my_experiment, +which contains results of different seeds, e.g., + +my_experiment/ + seed1/ + log.txt + seed2/ + log.txt + seed3/ + log.txt + +Run the following command from the root directory: + +$ python tools/parse_test_res.py output/my_experiment + +Add --ci95 to the argument if you wanna get 95% confidence +interval instead of standard deviation: + +$ python tools/parse_test_res.py output/my_experiment --ci95 + +If my_experiment/ has the following structure, + +my_experiment/ + exp-1/ + seed1/ + log.txt + ... + seed2/ + log.txt + ... + seed3/ + log.txt + ... + exp-2/ + ... + exp-3/ + ... + +Run + +$ python tools/parse_test_res.py output/my_experiment --multi-exp +""" +import re +import numpy as np +import os.path as osp +import argparse +from collections import OrderedDict, defaultdict + +from dassl.utils import check_isfile, listdir_nohidden + + +def compute_ci95(res): + return 1.96 * np.std(res) / np.sqrt(len(res)) + + +def parse_function(*metrics, directory="", args=None, end_signal=None): + print("===") + print(f"Parsing files in {directory}") + subdirs = listdir_nohidden(directory, sort=True) + + outputs = [] + + for subdir in subdirs: + fpath = osp.join(directory, subdir, "log.txt") + assert check_isfile(fpath) + good_to_go = False + output = OrderedDict() + + with open(fpath, "r") as f: + lines = f.readlines() + + for line in lines: + line = line.strip() + + if line == end_signal: + good_to_go = True + + for metric in metrics: + match = metric["regex"].search(line) + if match and good_to_go: + if "file" not in output: + output["file"] = fpath + num = float(match.group(1)) + name = metric["name"] + output[name] = num + + if output: + outputs.append(output) + + assert len(outputs) > 0, f"Nothing found in {directory}" + + metrics_results = defaultdict(list) + for output in outputs: + msg = "" + for key, value in output.items(): + if isinstance(value, float): + msg += f"{key}: {value:.1f}%. " + else: + msg += f"{key}: {value}. " + if key != "file": + metrics_results[key].append(value) + print(msg) + + output_results = OrderedDict() + for key, values in metrics_results.items(): + avg = np.mean(values) + std = compute_ci95(values) if args.ci95 else np.std(values) + print(f"* average {key}: {avg:.1f}% +- {std:.1f}%") + output_results[key] = avg + print("===") + + return output_results + + +def main(args, end_signal): + metric = { + "name": args.keyword, + "regex": re.compile(fr"\* {args.keyword}: ([\.\deE+-]+)%"), + } + + if args.multi_exp: + final_results = defaultdict(list) + + for directory in listdir_nohidden(args.directory, sort=True): + directory = osp.join(args.directory, directory) + results = parse_function( + metric, directory=directory, args=args, end_signal=end_signal + ) + + for key, value in results.items(): + final_results[key].append(value) + + print("Average performance") + for key, values in final_results.items(): + avg = np.mean(values) + print(f"* {key}: {avg:.1f}%") + + else: + parse_function( + metric, directory=args.directory, args=args, end_signal=end_signal + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("directory", type=str, help="path to directory") + parser.add_argument( + "--ci95", + action="store_true", + help=r"compute 95\% confidence interval" + ) + parser.add_argument( + "--test-log", action="store_true", help="parse test-only logs" + ) + parser.add_argument( + "--multi-exp", action="store_true", help="parse multiple experiments" + ) + parser.add_argument( + "--keyword", + default="accuracy", + type=str, + help="which keyword to extract" + ) + args = parser.parse_args() + + end_signal = "Finish training" # needs to be adapted to the latest + if args.test_log: + end_signal = "=> result" + + main(args, end_signal) diff --git a/python/ClipDetection/Dassl.pytorch/tools/replace_text.py b/python/ClipDetection/Dassl.pytorch/tools/replace_text.py new file mode 100644 index 00000000..71761544 --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/tools/replace_text.py @@ -0,0 +1,69 @@ +""" +Replace text in python files. +""" +import glob +import os.path as osp +import argparse +import fileinput + +EXTENSION = ".py" + + +def is_python_file(filename): + ext = osp.splitext(filename)[1] + return ext == EXTENSION + + +def update_file(filename, text_to_search, replacement_text): + print("Processing {}".format(filename)) + with fileinput.FileInput(filename, inplace=True, backup="") as file: + for line in file: + print(line.replace(text_to_search, replacement_text), end="") + + +def recursive_update(directory, text_to_search, replacement_text): + filenames = glob.glob(osp.join(directory, "*")) + + for filename in filenames: + if osp.isfile(filename): + if not is_python_file(filename): + continue + update_file(filename, text_to_search, replacement_text) + elif osp.isdir(filename): + recursive_update(filename, text_to_search, replacement_text) + else: + raise NotImplementedError + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "file_or_dir", type=str, help="path to file or directory" + ) + parser.add_argument("text_to_search", type=str, help="name to be replaced") + parser.add_argument("replacement_text", type=str, help="new name") + parser.add_argument( + "--ext", type=str, default=".py", help="file extension" + ) + args = parser.parse_args() + + file_or_dir = args.file_or_dir + text_to_search = args.text_to_search + replacement_text = args.replacement_text + extension = args.ext + + global EXTENSION + EXTENSION = extension + + if osp.isfile(file_or_dir): + if not is_python_file(file_or_dir): + return + update_file(file_or_dir, text_to_search, replacement_text) + elif osp.isdir(file_or_dir): + recursive_update(file_or_dir, text_to_search, replacement_text) + else: + raise NotImplementedError + + +if __name__ == "__main__": + main() diff --git a/python/ClipDetection/Dassl.pytorch/tools/train.py b/python/ClipDetection/Dassl.pytorch/tools/train.py new file mode 100644 index 00000000..106cd19c --- /dev/null +++ b/python/ClipDetection/Dassl.pytorch/tools/train.py @@ -0,0 +1,191 @@ +import argparse +import torch + +from dassl.utils import setup_logger, set_random_seed, collect_env_info +from dassl.config import clean_cfg, get_cfg_default +from dassl.engine import build_trainer + + +def print_args(args, cfg): + print("***************") + print("** Arguments **") + print("***************") + optkeys = list(args.__dict__.keys()) + optkeys.sort() + for key in optkeys: + print("{}: {}".format(key, args.__dict__[key])) + print("************") + print("** Config **") + print("************") + print(cfg) + + +def reset_cfg(cfg, args): + if args.root: + cfg.DATASET.ROOT = args.root + + if args.output_dir: + cfg.OUTPUT_DIR = args.output_dir + + if args.resume: + cfg.RESUME = args.resume + + if args.seed: + cfg.SEED = args.seed + + if args.source_domains: + cfg.DATASET.SOURCE_DOMAINS = args.source_domains + + if args.target_domains: + cfg.DATASET.TARGET_DOMAINS = args.target_domains + + if args.transforms: + cfg.INPUT.TRANSFORMS = args.transforms + + if args.trainer: + cfg.TRAINER.NAME = args.trainer + + if args.backbone: + cfg.MODEL.BACKBONE.NAME = args.backbone + + if args.head: + cfg.MODEL.HEAD.NAME = args.head + + +def extend_cfg(cfg): + """ + Add new config variables. + + E.g. + from yacs.config import CfgNode as CN + cfg.TRAINER.MY_MODEL = CN() + cfg.TRAINER.MY_MODEL.PARAM_A = 1. + cfg.TRAINER.MY_MODEL.PARAM_B = 0.5 + cfg.TRAINER.MY_MODEL.PARAM_C = False + """ + pass + + +def setup_cfg(args): + cfg = get_cfg_default() + extend_cfg(cfg) + + # 1. From the dataset config file + if args.dataset_config_file: + cfg.merge_from_file(args.dataset_config_file) + + # 2. From the method config file + if args.config_file: + cfg.merge_from_file(args.config_file) + + # 3. From input arguments + reset_cfg(cfg, args) + + # 4. From optional input arguments + cfg.merge_from_list(args.opts) + + clean_cfg(cfg, args.trainer) + cfg.freeze() + + return cfg + + +def main(args): + cfg = setup_cfg(args) + if cfg.SEED >= 0: + print("Setting fixed seed: {}".format(cfg.SEED)) + set_random_seed(cfg.SEED) + setup_logger(cfg.OUTPUT_DIR) + + if torch.cuda.is_available() and cfg.USE_CUDA: + torch.backends.cudnn.benchmark = True + + print_args(args, cfg) + print("Collecting env info ...") + print("** System info **\n{}\n".format(collect_env_info())) + + trainer = build_trainer(cfg) + + if args.eval_only: + trainer.load_model(args.model_dir, epoch=args.load_epoch) + trainer.test() + return + + if not args.no_train: + trainer.train() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--root", type=str, default="", help="path to dataset") + parser.add_argument( + "--output-dir", type=str, default="", help="output directory" + ) + parser.add_argument( + "--resume", + type=str, + default="", + help="checkpoint directory (from which the training resumes)", + ) + parser.add_argument( + "--seed", + type=int, + default=-1, + help="only positive value enables a fixed seed" + ) + parser.add_argument( + "--source-domains", + type=str, + nargs="+", + help="source domains for DA/DG" + ) + parser.add_argument( + "--target-domains", + type=str, + nargs="+", + help="target domains for DA/DG" + ) + parser.add_argument( + "--transforms", type=str, nargs="+", help="data augmentation methods" + ) + parser.add_argument( + "--config-file", type=str, default="", help="path to config file" + ) + parser.add_argument( + "--dataset-config-file", + type=str, + default="", + help="path to config file for dataset setup", + ) + parser.add_argument( + "--trainer", type=str, default="", help="name of trainer" + ) + parser.add_argument( + "--backbone", type=str, default="", help="name of CNN backbone" + ) + parser.add_argument("--head", type=str, default="", help="name of head") + parser.add_argument( + "--eval-only", action="store_true", help="evaluation only" + ) + parser.add_argument( + "--model-dir", + type=str, + default="", + help="load model from this directory for eval-only mode", + ) + parser.add_argument( + "--load-epoch", + type=int, + help="load model weights at this epoch for evaluation" + ) + parser.add_argument( + "--no-train", action="store_true", help="do not call trainer.train()" + ) + parser.add_argument( + "opts", + default=None, + nargs=argparse.REMAINDER, + help="modify config options using the command-line", + ) + args = parser.parse_args() + main(args) diff --git a/python/ClipDetection/Dockerfile b/python/ClipDetection/Dockerfile index 681b05d3..f7b120bb 100644 --- a/python/ClipDetection/Dockerfile +++ b/python/ClipDetection/Dockerfile @@ -1,66 +1,79 @@ -# syntax=docker/dockerfile:experimental - -############################################################################# -# NOTICE # -# # -# This software (or technical data) was produced for the U.S. Government # -# under contract, and is subject to the Rights in Data-General Clause # -# 52.227-14, Alt. IV (DEC 2007). # -# # -# Copyright 2024 The MITRE Corporation. All Rights Reserved. # -############################################################################# - -############################################################################# -# Copyright 2024 The MITRE Corporation # -# # -# Licensed under the Apache License, Version 2.0 (the "License"); # -# you may not use this file except in compliance with the License. # -# You may obtain a copy of the License at # -# # -# http://www.apache.org/licenses/LICENSE-2.0 # -# # -# Unless required by applicable law or agreed to in writing, software # -# distributed under the License is distributed on an "AS IS" BASIS, # -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # -# See the License for the specific language governing permissions and # -# limitations under the License. # -############################################################################# - -ARG MODELS_REGISTRY=openmpf/ -ARG BUILD_REGISTRY -ARG BUILD_TAG=latest -FROM ${MODELS_REGISTRY}openmpf_clip_detection_models:8.0.0 as models -FROM ${BUILD_REGISTRY}openmpf_python_executor_ssb:${BUILD_TAG} - -COPY --from=models /models/ViT-B-32.pt /models/ViT-B-32.pt -COPY --from=models /models/ViT-L-14.pt /models/ViT-L-14.pt - -RUN --mount=type=tmpfs,target=/var/cache/apt \ - --mount=type=tmpfs,target=/var/lib/apt/lists \ - --mount=type=tmpfs,target=/tmp \ - apt-get update; \ - DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y wget - -RUN pip3 install --upgrade pip - -RUN pip3 install ftfy regex tqdm 'setuptools<70' - -RUN --mount=type=tmpfs,target=/tmp \ - mkdir /tmp/CLIP; \ - wget -O- 'https://github.com/openai/CLIP/tarball/master' \ - | tar --extract --gzip --directory /tmp/CLIP; \ - cd /tmp/CLIP/*; \ - pip3 install . 'torchvision==0.14.1' - -ARG RUN_TESTS=false - -RUN --mount=target=.,readwrite \ - install-component.sh; \ - if [ "${RUN_TESTS,,}" == true ]; then python tests/test_clip.py; fi - -LABEL org.label-schema.license="Apache 2.0" \ - org.label-schema.name="OpenMPF CLIP Detection" \ - org.label-schema.schema-version="1.0" \ - org.label-schema.url="https://openmpf.github.io" \ - org.label-schema.vcs-url="https://github.com/openmpf/openmpf-components" \ - org.label-schema.vendor="MITRE" +# syntax=docker/dockerfile:experimental + +############################################################################# +# NOTICE # +# # +# This software (or technical data) was produced for the U.S. Government # +# under contract, and is subject to the Rights in Data-General Clause # +# 52.227-14, Alt. IV (DEC 2007). # +# # +# Copyright 2024 The MITRE Corporation. All Rights Reserved. # +############################################################################# + +############################################################################# +# Copyright 2024 The MITRE Corporation # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +############################################################################# + +ARG MODELS_REGISTRY=openmpf/ +ARG BUILD_REGISTRY +ARG BUILD_TAG=latest +FROM ${MODELS_REGISTRY}openmpf_clip_detection_models:9.0.0-feature as models +FROM ${BUILD_REGISTRY}openmpf_python_executor_ssb:${BUILD_TAG} + +COPY --from=models /models/ViT-B-32.pt /models/ViT-B-32.pt +COPY --from=models /models/ViT-L-14.pt /models/ViT-L-14.pt +COPY --from=models /models/model.pth.tar-50 /models/prompt_learner/model.pth.tar-50 + +RUN --mount=type=tmpfs,target=/var/cache/apt \ + --mount=type=tmpfs,target=/var/lib/apt/lists \ + --mount=type=tmpfs,target=/tmp \ + apt-get update; \ + DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y wget + +RUN pip3 install --upgrade pip + +RUN pip3 install ftfy regex tqdm + +RUN pip3 install torch==1.7.1 torchvision==0.8.2 --extra-index-url https://download.pytorch.org/whl/cu102 + +RUN --mount=type=tmpfs,target=/tmp \ + mkdir /tmp/CLIP; \ + wget -O- 'https://github.com/openai/CLIP/tarball/master' \ + | tar --extract --gzip --directory /tmp/CLIP; \ + cd /tmp/CLIP/*; \ + pip3 install . + +COPY ./Dassl.pytorch/requirements.txt /tmp/requirements.txt +RUN pip3 install --no-cache-dir -r /tmp/requirements.txt + +RUN --mount=target=.,readwrite \ + cd ./Dassl.pytorch && pip3 install . + +COPY ./CoOp /opt/coop_src/CoOp + +ARG RUN_TESTS=false + +RUN --mount=target=.,readwrite \ + install-component.sh; \ + if [ "${RUN_TESTS,,}" == true ]; then python tests/test_clip.py; fi + +ENV PYTHONPATH="$PYTHONPATH:/opt/coop_src" + +LABEL org.label-schema.license="Apache 2.0" \ + org.label-schema.name="OpenMPF CLIP Detection" \ + org.label-schema.schema-version="1.0" \ + org.label-schema.url="https://openmpf.github.io" \ + org.label-schema.vcs-url="https://github.com/openmpf/openmpf-components" \ + org.label-schema.vendor="MITRE" diff --git a/python/ClipDetection/LICENSE b/python/ClipDetection/LICENSE index e6289faf..b4f90184 100644 --- a/python/ClipDetection/LICENSE +++ b/python/ClipDetection/LICENSE @@ -23,10 +23,19 @@ this software is being used. This software makes use of a data model derived from third party software: --------------------------------------------------------------------------- +------------------------------------------------------------------------------ The TensorFlow implementation of the Contrastive Language-Image Pre-Training (CLIP) model used by this component was developed by OpenAI: http://www.github.com/openai/CLIP -The OpenAI CLIP model is licensed under the MIT License. \ No newline at end of file +The OpenAI CLIP model is licensed under the MIT License. + +------------------------------------------------------------------------------ + +The Python implementations of Context Optimization (CoOp) and Dassl.pytorch +used by this component were developed by Kaiyang Zhou and others: +https://github.com/KaiyangZhou/CoOp +https://github.com/KaiyangZhou/Dassl.pytorch + +CoOp and Dassl.pytorch are licensed under the MIT License. \ No newline at end of file diff --git a/python/ClipDetection/NOTICE b/python/ClipDetection/NOTICE index ae6303f5..d16224c4 100644 --- a/python/ClipDetection/NOTICE +++ b/python/ClipDetection/NOTICE @@ -1,7 +1,17 @@ +<<<<<<< HEAD +# NOTICE + +This software (or technical data) was produced for the U.S. Government +under contract, and is subject to the Rights in Data-General Clause +552.227-14, Alt. IV (DEC 2007). + +Copyright 2023 The MITRE Corporation. All Rights Reserved. +======= # NOTICE This software (or technical data) was produced for the U.S. Government under contract, and is subject to the Rights in Data-General Clause 552.227-14, Alt. IV (DEC 2007). -Copyright 2024 The MITRE Corporation. All Rights Reserved. \ No newline at end of file +Copyright 2024 The MITRE Corporation. All Rights Reserved. +>>>>>>> origin/develop diff --git a/python/ClipDetection/README.md b/python/ClipDetection/README.md index 675443b5..4899d28f 100644 --- a/python/ClipDetection/README.md +++ b/python/ClipDetection/README.md @@ -6,28 +6,30 @@ This repository contains source code for the OpenMPF CLIP detection component. C The following are the properties that can be specified for the component. Each property has a default value and so none of them necessarily need to be specified for processing jobs. -- `MODEL_NAME`: Specifies the CLIP model that is loaded and used by the component. The only supported models are 'ViT-L/14' (the default model) and 'ViT-B/32'. +- `MODEL_NAME`: Specifies the CLIP model that is loaded and used by the component, as well as allowing the component to utilize CoOp for ImageNet classification. The only supported models are 'ViT-L/14' (the default model), 'ViT-B/32', and 'CoOp'. - `NUMBER_OF_CLASSIFICATIONS`: Specifies how many of the top classifications you want to return. The default value is set to 1, and so you'll only see the classification with the greatest confidence. -- `CLASSIFICATION_PATH`: If specified, this allows the user to give the component a file path to their own list of classifications in a CSV file, if the COCO or ImageNet class lists aren't of interest. See below for the formatting that's required for that file. +- `TEMPLATE_TYPE`: There are three template files that are included in the component, with the number of templates in each being 1, 7, and 80. The one template is a basic template, while the 7 and 80 come from the OpenAI team when trying to [improve performance](https://github.com/openai/CLIP/blob/main/notebooks/Prompt_Engineering_for_ImageNet.ipynb) on the ImageNet dataset. The default value is 'openai_80', while 'openai_1' and 'openai_7' are the only other valid inputs. Also this property is overridden if a `TEMPLATE_PATH` is specified. -- `CLASSIFICATION_LIST`: Specifies whether the user wants to use the COCO or ImageNet classification list, by specifying 'coco' or 'imagenet', respectively. By default, this is set to 'coco'. Also this property is overridden if a `CLASSIFICATION_PATH` is given. +- `TEMPLATE_PATH`: If specified, this allows the user to give the component a file path to their own list of templates. See below for the formatting that's required for that file. The OpenAI developers admitted that the process of developing templates was a lot of trial and error, so feel free to come up with your own! Also, a value of '' is required if `MODEL_NAME`='CoOp'. -- `TEMPLATE_PATH`: If specified, this allows the user to give the component a file path to their own list of templates. See below for the formatting that's required for that file. The OpenAI developers admitted that the process of developing templates was a lot of trial and error, so feel free to come up with your own! +- `CLASSIFICATION_LIST`: Specifies whether the user wants to use the COCO or ImageNet classification list, by specifying 'coco' or 'imagenet', respectively. By default, this is set to 'coco'. Also this property is overridden if a `CLASSIFICATION_PATH` is given, and a value of 'imagenet' is required if `MODEL_NAME`='CoOp'. -- `TEMPLATE_TYPE`: There are three template files that are included in the component, with the number of templates in each being 1, 7, and 80. The one template is a basic template, while the 7 and 80 come from the OpenAI team when trying to [improve performance](https://github.com/openai/CLIP/blob/main/notebooks/Prompt_Engineering_for_ImageNet.ipynb) on the ImageNet dataset. The default value is 'openai_80', while 'openai_1' and 'openai_7' are the only other valid inputs. Also this property is overridden if a `TEMPLATE_PATH` is specified. +- `CLASSIFICATION_PATH`: If specified, this allows the user to give the component a file path to their own list of classifications in a CSV file, if the COCO or ImageNet class lists aren't of interest. See below for the formatting that's required for that file. Also, a value of '' is required if `MODEL_NAME`='CoOp'. - `ENABLE_CROPPING`: A boolean toggle to specify if the image is to be cropped into 144 images of size 224x224 which cover all areas of the original. By default, this is set to true. This technique is described in Section 7 of the paper "[Going deeper with convolutions](https://arxiv.org/abs/1409.4842)" from Szegedy, et al. -- `ENABLE_TRITON`: A boolean toggle to specify whether the component should use a Triton inference server to process the image job. By default this is set to false. - -- `INCLUDE_FEATURES`: A boolean toggle to specify whether the `FEATURE` detection property is included with each detection. By default, this is set to false. +- `ENABLE_TRITON`: A boolean toggle to specify whether the component should use a Triton inference server to process the image job. By default this is set to false. Also, a value of false is required if `MODEL_NAME`='CoOp'. - `TRITON_SERVER`: Specifies the Triton server `:` to use for inferencing. By default, this is set to 'clip-detection-server:8001'. +- `INCLUDE_FEATURES`: A boolean toggle to specify whether the `FEATURE` detection property is included with each detection. By default, this is set to false. + - `DETECTION_FRAME_BATCH_SIZE`: Specifies the batch size when processing video files. By default, this is set to 64. +- `CUDA_DEVICE_ID`: Specifies the ID of the CUDA device that will be used to run the models. When less than 0 CUDA will be disabled. + ## Detection Properties Returned `ImageLocation` objects have the following members in their `detection_properties`: @@ -41,7 +43,7 @@ Returned `ImageLocation` objects have the following members in their `detection_ # Custom Templates -When tuning the CLIP model, it is important to have appropriate templates for what you're trying to classify. In order to write the file, put one template on each line. Use a pair of brackets, {}, where the potential classifications need to be placed. See below for example templates. +When tuning the CLIP model, it is important to have appropriate templates for what you're trying to classify. In order to write the file, put one template on each line. Use a pair of brackets, {}, where the class names need to be placed. See below for example templates. ``` A photograph of a {}. A {} in an open field. @@ -49,15 +51,19 @@ A {} in an open field. # Custom Classifications -The need for custom classifications arose when training on the ImageNet classifications, where any different class can have many equivalent names. For example, one of the classes is "great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias". We found the model to be most performant when given a single representative class title. For this case, 'great white shark' makes the most sense. The `imagenet_classification_list.csv` file gives representative titles for each class, adapted from .ipynb files on the CLIP GitHub page. +The need for custom classifications arose when training on the ImageNet classifications, where any different class can have many equivalent names. For example, one of the classes is "great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias". We found the model to be most performant when given a single representative class title. For this case, 'great white shark' makes the most sense. The `imagenet_classification_list.csv` file gives representative titles for each class, adapted from .ipynb files on the [CLIP GitHub page](https://github.com/openai/CLIP/blob/main/notebooks/Prompt_Engineering_for_ImageNet.ipynb). -As for the format of the CSV file, it has two columns. The first being the representative name, and the second being the full name of the class. The representative name is what goes inside the brackets, {}, of the templates, and the full name is what will be used when displaying results. Below are a couple of examples of rows from the ImageNet classifications. Note that in the first example, quotes are put around the full classification name so they're easier to read and so that those commas aren't confused for the separator. +As for the format of the CSV file, it has two columns. The first column contains the representative name, and the second contains the full name of the class. The representative name is what goes inside the brackets, {}, of the templates, and the full name is what will be used when displaying results. Below are a couple of examples of rows from the ImageNet classifications. Note that in the first example, quotes are put around the full classification name so they're easier to read and so that those commas aren't confused for the separator. ``` tench,"tench, Tinca tinca" kite (bird of prey),kite magpie,magpie ``` + +# Context Optimization (CoOp) +[Context Optimization (CoOp)](https://github.com/KaiyangZhou/CoOp) was developed by Kaiyang Zhao et al., to adapt the CLIP model to downstream datasets via prompt learning. For the ImageNet dataset (and many others), it is [shown to improve performance](https://arxiv.org/abs/2109.01134) for classification. For this component, trained text prompts have been implemented for use on the ImageNet classes. To use CoOp, make sure that the following properties are set: `MODEL_NAME`='CoOp', `CLASSIFICATION_LIST`='imagenet', `TEMPLATE_PATH`='', `CLASSIFICATION_PATH`='', and `ENABLE_TRITON`=false. + # Non-Triton Performance The table below shows the performance of this component on a NVIDIA Tesla V100 32GB GPU, for varying batch sizes with both models: | Model Name | Batch Size | Total Time (seconds) | Average Time per Batch (seconds) | Average Images per Second | @@ -93,7 +99,7 @@ The table below shows the performance of this component with Triton on a NVIDIA # Future Research * Investigate using the CLIP interrogator for determining text prompts for classification. * Investigate methods to automate the generation of text prompts. - * [Context Optimization (CoOp)](http://arxiv.org/abs/2109.01134) and [Conditional Context Optimization (CoCoOp)](http://arxiv.org/abs/2203.05557) models a prompt's context as a set of learnable vectors that can be optimized for the classes you're looking for, with CoCoOp improving on CoOp's ability in classifying to classes unseen by CoOp in training. + * [Context Optimization (CoOp)](http://arxiv.org/abs/2109.01134) and [Conditional Context Optimization (CoCoOp)](http://arxiv.org/abs/2203.05557) model a prompt's context as a set of learnable vectors that can be optimized for the classes you're looking for, with CoCoOp improving on CoOp's ability in classifying to classes unseen by CoOp in training. # Known Issues diff --git a/python/ClipDetection/__init__.py b/python/ClipDetection/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/python/ClipDetection/clip_component/clip_component.py b/python/ClipDetection/clip_component/clip_component.py index 78d781ca..78fe4227 100644 --- a/python/ClipDetection/clip_component/clip_component.py +++ b/python/ClipDetection/clip_component/clip_component.py @@ -30,6 +30,7 @@ from pkg_resources import resource_filename from itertools import islice from typing import Iterable, Mapping +import argparse from PIL import Image import cv2 @@ -39,6 +40,7 @@ import torchvision.transforms as T import torchvision.transforms.functional as TF import clip +from CoOp.train import get_trainer import tritonclient.grpc as grpcclient from tritonclient.utils import InferenceServerException, triton_to_np_dtype @@ -47,7 +49,6 @@ import mpf_component_util as mpf_util logger = logging.getLogger('ClipComponent') -device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') class ClipComponent(mpf_util.ImageReaderMixin, mpf_util.VideoCaptureMixin): detection_type = 'CLASS' @@ -66,7 +67,7 @@ def _get_prop(job_properties, key, default_value, accept_values=[]): return prop def _parse_properties(self, job_properties): - model_name = self._get_prop(job_properties, "MODEL_NAME", "ViT-L/14", ["ViT-L/14", "ViT-B/32"]) + model_name = self._get_prop(job_properties, "MODEL_NAME", "ViT-L/14", ["ViT-L/14", "ViT-B/32", "CoOp"]) batch_size = self._get_prop(job_properties, "DETECTION_FRAME_BATCH_SIZE", 64) classification_list = self._get_prop(job_properties, "CLASSIFICATION_LIST", 'coco', ['coco', 'imagenet']) classification_path = os.path.expandvars(self._get_prop(job_properties, "CLASSIFICATION_PATH", '')) @@ -77,6 +78,7 @@ def _parse_properties(self, job_properties): template_type = self._get_prop(job_properties, "TEMPLATE_TYPE", 'openai_80', ['openai_1', 'openai_7', 'openai_80']) template_path = os.path.expandvars(self._get_prop(job_properties, "TEMPLATE_PATH", '')) triton_server = self._get_prop(job_properties, "TRITON_SERVER", 'clip-detection-server:8001') + cuda_device_id = self._get_prop(job_properties, "CUDA_DEVICE_ID", -1) return dict( model_name = model_name, @@ -89,19 +91,30 @@ def _parse_properties(self, job_properties): num_classifications = num_classifications, template_type = template_type, template_path = template_path, - triton_server = triton_server + triton_server = triton_server, + cuda_device_id = cuda_device_id ) def get_detections_from_image_reader(self, image_job, image_reader): logger.info("Received image job: %s", image_job) kwargs = self._parse_properties(image_job.job_properties) + if kwargs['cuda_device_id'] >= torch.cuda.device_count(): + raise mpf.DetectionException( + f"Invalid CUDA device ID.", + mpf.DetectionError.INVALID_PROPERTY + ) + elif kwargs['cuda_device_id'] >= 0: + device = torch.device(f"cuda:{kwargs['cuda_device_id']}") + else: + device = torch.device('cpu') + image = image_reader.get_image() num_detections = 0 try: - wrapper = self._get_model_wrapper(kwargs['model_name']) - detections = wrapper.get_detections((image,), **kwargs) + wrapper = self._get_model_wrapper(model_name=kwargs['model_name'], kwargs=kwargs, device=device) + detections = wrapper.get_detections((image,), device, **kwargs) for detection in detections: yield detection num_detections += 1 @@ -134,17 +147,26 @@ def get_detections_from_video_capture(self, video_capture: mpf_util.VideoCapture) -> Iterable[mpf.VideoTrack]: logger.info("Received video job: %s", video_job) kwargs = self._parse_properties(video_job.job_properties) + if kwargs['cuda_device_id'] >= torch.cuda.device_count(): + raise mpf.DetectionException( + f"Invalid CUDA device ID.", + mpf.DetectionError.INVALID_PROPERTY + ) + elif kwargs['cuda_device_id'] >= 0: + device = torch.device(f"cuda:{kwargs['cuda_device_id']}") + else: + device = torch.device('cpu') # If processing a video where each frame is cropped into 144 images, the batch size is set to one so that the crops aren't split between batches batch_size = 1 if kwargs['enable_cropping'] else kwargs['batch_size'] batch_gen = self._batches_from_video_capture(video_capture, batch_size) detections = [] - wrapper = self._get_model_wrapper(kwargs['model_name']) + wrapper = self._get_model_wrapper(model_name=kwargs['model_name'], kwargs=kwargs, device=device) for n, batch in batch_gen: try: - detections += list(islice(wrapper.get_detections(batch, **kwargs), n)) + detections += list(islice(wrapper.get_detections(batch, device, **kwargs), n)) except Exception as e: logger.exception(f"Job failed due to: {e}") raise @@ -153,14 +175,157 @@ def get_detections_from_video_capture(self, logger.info(f"Job complete. Found {len(tracks)} tracks.") return tracks - def _get_model_wrapper(self, model_name): + def _get_model_wrapper(self, model_name, kwargs, device): if model_name not in self._model_wrappers: - self._model_wrappers[model_name] = ClipWrapper(model_name) + if model_name == "CoOp": + self._model_wrappers['CoOp'] = CoOpWrapper(**kwargs) + else: + self._model_wrappers[model_name] = ClipWrapper(device, model_name) return self._model_wrappers[model_name] +class CoOpWrapper(object): + def __init__(self, **kwargs): + if (kwargs['classification_list'] == 'coco') or (kwargs['template_path'] != '') or (kwargs['classification_path'] != '') or (kwargs['enable_triton'] == True): + raise mpf.DetectionException( + f"Properties incompatible with CoOp. Make sure that CLASSIFICATION_LIST='imagenet', TEMPLATE_PATH='', CLASSIFICATION_PATH='', and ENABLE_TRITON=False.", + mpf.DetectionError.INVALID_PROPERTY + ) + self._manual_args = self._get_coop_args() + if kwargs['cuda_device_id'] >= 0: + self._manual_args.insert(0, '--cuda') + + self.args = self._create_arg_parser(self._manual_args) + self._class_mapping = self._get_mapping_from_classifications(os.path.realpath(resource_filename(__name__, f'data/imagenet_classification_list.csv'))) + self.classnames = self._class_mapping.keys() + # Create trainer object + print("Creating trainer...") + self.trainer = get_trainer(self.args, self.classnames, kwargs['cuda_device_id']) + print("Trainer created.") + self.trainer.load_model(self.args.model_dir, epoch = self.args.load_epoch) + + def get_detections(self, images, device, **kwargs): + # Preprocess image + self._preprocessor = ImagePreprocessor(enable_cropping=False, image_size=224) + images = [Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) for image in images] + image_sizes = [image.size for image in images] + torch_imgs = torch.stack([self._preprocessor.preprocess(image).squeeze(0) for image in images]).to(device) + + # Pass image through model + output, image_features = self.trainer.test(images=torch_imgs) + + softmax = torch.nn.Softmax(dim=1)(output) + values, indices = softmax.topk(kwargs['num_classifications']) + + for detection_values, detection_indices, image_size in zip(values, indices, image_sizes): + classification_list = [] + classification_confidence_list = [] + count = 0 + for value, index in zip(detection_values, detection_indices): + if count >= kwargs['num_classifications']: + break + class_name = self._class_mapping[list(self._class_mapping.keys())[int(index)]] + if class_name not in classification_list: + classification_list.append(class_name) + classification_confidence_list.append(str(value.item())) + count += 1 + + classification_list = '; '.join(classification_list) + classification_confidence_list = '; '.join(classification_confidence_list) + + detection_properties = { + "CLASSIFICATION": classification_list.split('; ')[0], + "CLASSIFICATION CONFIDENCE LIST": classification_confidence_list, + "CLASSIFICATION LIST": classification_list + } + + if kwargs['include_features']: + detection_properties['FEATURE'] = base64.b64encode(image_features.cpu().numpy()).decode() + + yield mpf.ImageLocation( + x_left_upper = 0, + y_left_upper = 0, + width = image_size[0], + height = image_size[1], + confidence = float(classification_confidence_list.split('; ')[0]), + detection_properties = detection_properties + ) + + def _create_arg_parser(self, manual_args): + parser = argparse.ArgumentParser() + parser.add_argument("--root", type=str, default="", help="path to dataset") + parser.add_argument("--output-dir", type=str, default="", help="output directory") + parser.add_argument( + "--resume", + type=str, + default="", + help="checkpoint directory (from which the training resumes)", + ) + parser.add_argument( + "--seed", type=int, default=-1, help="only positive value enables a fixed seed" + ) + parser.add_argument( + "--source-domains", type=str, nargs="+", help="source domains for DA/DG" + ) + parser.add_argument( + "--target-domains", type=str, nargs="+", help="target domains for DA/DG" + ) + parser.add_argument( + "--transforms", type=str, nargs="+", help="data augmentation methods" + ) + parser.add_argument( + "--config-file", type=str, default="", help="path to config file" + ) + parser.add_argument( + "--dataset-config-file", + type=str, + default="", + help="path to config file for dataset setup", + ) + parser.add_argument("--trainer", type=str, default="", help="name of trainer") + parser.add_argument("--backbone", type=str, default="", help="name of CNN backbone") + parser.add_argument("--head", type=str, default="", help="name of head") + parser.add_argument("--eval-only", action="store_true", help="evaluation only") + parser.add_argument( + "--model-dir", + type=str, + default="", + help="load model from this directory for eval-only mode", + ) + parser.add_argument( + "--load-epoch", type=int, help="load model weights at this epoch for evaluation" + ) + parser.add_argument( + "--no-train", action="store_true", help="do not call trainer.train()" + ) + parser.add_argument( + "opts", + default=None, + nargs=argparse.REMAINDER, + help="modify config options using the command-line", + ) + parser.add_argument("--cuda", action="store_true", help="enable use of CUDA.") + args = parser.parse_args(manual_args) + return args + + @staticmethod + def _get_mapping_from_classifications(classification_path: str) -> Mapping[str, str]: + with open(classification_path) as csvfile: + mapping = {} + csvreader = csv.reader(csvfile) + for row in csvreader: + mapping[row[0].strip()] = row[1].strip() + + return mapping + + @staticmethod + def _get_coop_args(): + with open(os.path.realpath(resource_filename(__name__, 'data/coop_args.txt'))) as f: + args = f.read().strip().split() + return args + class ClipWrapper(object): - def __init__(self, model_name='ViT-L/14'): + def __init__(self, device, model_name='ViT-L/14'): logger.info("Loading model...") model, _ = clip.load(model_name, device=device, download_root='/models') logger.info("Model loaded.") @@ -179,15 +344,17 @@ def __init__(self, model_name='ViT-L/14'): self._text_features = None self._inferencing_server = None + self._device = device - def get_detections(self, images, **kwargs) -> Iterable[mpf.ImageLocation]: + def get_detections(self, images, device, **kwargs) -> Iterable[mpf.ImageLocation]: + self._device = device templates_changed = self._check_template_list(kwargs['template_path'], kwargs['template_type']) self._check_class_list(kwargs['classification_path'], kwargs['classification_list'], templates_changed) self._preprocessor = ImagePreprocessor(kwargs['enable_cropping'], self._input_resolution) images = [Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) for image in images] image_sizes = [image.size for image in images] - torch_imgs = torch.stack([self._preprocessor.preprocess(image).squeeze(0) for image in images]).to(device) + torch_imgs = torch.stack([self._preprocessor.preprocess(image).squeeze(0) for image in images]).to(self._device) if kwargs['enable_cropping']: torch_imgs = torch_imgs.squeeze(0) @@ -198,7 +365,7 @@ def get_detections(self, images, **kwargs) -> Iterable[mpf.ImageLocation]: self._inferencing_server = CLIPInferencingServer(kwargs['triton_server'], kwargs['model_name']) results = self._inferencing_server.get_responses(torch_imgs) - image_features = torch.Tensor(np.copy(results)).squeeze(0).to(device=device) + image_features = torch.Tensor(np.copy(results)).squeeze(0).to(self._device) else: with torch.no_grad(): image_features = self._model.encode_image(torch_imgs).float() @@ -206,7 +373,7 @@ def get_detections(self, images, **kwargs) -> Iterable[mpf.ImageLocation]: with torch.no_grad(): image_features /= image_features.norm(dim=-1, keepdim=True) - similarity = (100.0 * image_features @ self._text_features).softmax(dim=-1).to(device) + similarity = (100.0 * image_features @ self._text_features).softmax(dim=-1).to(self._device) if kwargs['enable_cropping']: similarity = torch.mean(similarity, 0).unsqueeze(0) @@ -300,7 +467,7 @@ def _check_class_list(self, classification_path: str, classification_list: str, try: logger.info("Updating classifications...") - self._class_mapping = self._get_mapping_from_classifications(classification_path) + self._class_mapping = self._get_mapping_from_classifications(self._classification_path) logger.info("Classifications updated.") except Exception: raise mpf.DetectionException( @@ -313,13 +480,13 @@ def _check_class_list(self, classification_path: str, classification_list: str, text_features = [] for label in self._class_mapping.keys(): text_phrases = [template.format(label) for template in self._templates] - text_tokens = clip.tokenize(text_phrases).to(device) + text_tokens = clip.tokenize(text_phrases).to(self._device) text_embeddings = self._model.encode_text(text_tokens) text_embeddings /= text_embeddings.norm(dim=-1, keepdim=True) text_embedding = text_embeddings.mean(dim=0) text_embedding /= text_embedding.norm() text_features.append(text_embedding) - self._text_features = torch.stack(text_features, dim=1).float().to(device) + self._text_features = torch.stack(text_features, dim=1).float().to(self._device) logger.info("Text embeddings created.") @staticmethod diff --git a/python/ClipDetection/clip_component/data/coco_classification_list.csv b/python/ClipDetection/clip_component/data/coco_classification_list.csv index e67ec89f..29ebda3a 100644 --- a/python/ClipDetection/clip_component/data/coco_classification_list.csv +++ b/python/ClipDetection/clip_component/data/coco_classification_list.csv @@ -1,80 +1,80 @@ -person,person -bicycle,bicycle -car,car -motorcycle,motorcycle -airplane,airplane -bus,bus -train,train -truck,truck -boat,boat -traffic light,traffic light -fire hydrant,fire hydrant -stop sign,stop sign -parking meter,parking meter -bench,bench -bird,bird -cat,cat -dog,dog -horse,horse -sheep,sheep -cow,cow -elephant,elephant -bear,bear -zebra,zebra -giraffe,giraffe -backpack,backpack -umbrella,umbrella -handbag,handbag -tie,tie -suitcase,suitcase -frisbee,frisbee -skis,skis -snowboard,snowboard -sports ball,sports ball -kite,kite -baseball bat,baseball bat -baseball glove,baseball glove -skateboard,skateboard -surfboard,surfboard -tennis racket,tennis racket -bottle,bottle -wine glass,wine glass -cup,cup -fork,fork -knife,knife -spoon,spoon -bowl,bowl -banana,banana -apple,apple -sandwich,sandwich -orange,orange -broccoli,broccoli -carrot,carrot -hot dog,hot dog -pizza,pizza -donut,donut -cake,cake -chair,chair -couch,couch -potted plant,potted plant -bed,bed -dining table,dining table -toilet,toilet -tv,tv -laptop,laptop -mouse,mouse -remote,remote -keyboard,keyboard -cell phone,cell phone -microwave,microwave -oven,oven -toaster,toaster -sink,sink -refrigerator,refrigerator -book,book -clock,clock -vase,vase -scissors,scissors -teddy bear,teddy bear -hair drier,hair drier -toothbrush,toothbrush +person,person +bicycle,bicycle +car,car +motorcycle,motorcycle +airplane,airplane +bus,bus +train,train +truck,truck +boat,boat +traffic light,traffic light +fire hydrant,fire hydrant +stop sign,stop sign +parking meter,parking meter +bench,bench +bird,bird +cat,cat +dog,dog +horse,horse +sheep,sheep +cow,cow +elephant,elephant +bear,bear +zebra,zebra +giraffe,giraffe +backpack,backpack +umbrella,umbrella +handbag,handbag +tie,tie +suitcase,suitcase +frisbee,frisbee +skis,skis +snowboard,snowboard +sports ball,sports ball +kite,kite +baseball bat,baseball bat +baseball glove,baseball glove +skateboard,skateboard +surfboard,surfboard +tennis racket,tennis racket +bottle,bottle +wine glass,wine glass +cup,cup +fork,fork +knife,knife +spoon,spoon +bowl,bowl +banana,banana +apple,apple +sandwich,sandwich +orange,orange +broccoli,broccoli +carrot,carrot +hot dog,hot dog +pizza,pizza +donut,donut +cake,cake +chair,chair +couch,couch +potted plant,potted plant +bed,bed +dining table,dining table +toilet,toilet +tv,tv +laptop,laptop +mouse,mouse +remote,remote +keyboard,keyboard +cell phone,cell phone +microwave,microwave +oven,oven +toaster,toaster +sink,sink +refrigerator,refrigerator +book,book +clock,clock +vase,vase +scissors,scissors +teddy bear,teddy bear +hair drier,hair drier +toothbrush,toothbrush diff --git a/python/ClipDetection/clip_component/data/coop_args.txt b/python/ClipDetection/clip_component/data/coop_args.txt new file mode 100644 index 00000000..fda4be90 --- /dev/null +++ b/python/ClipDetection/clip_component/data/coop_args.txt @@ -0,0 +1 @@ +--seed 1 --trainer CoOp --config-file /opt/coop_src/CoOp/configs/trainers/CoOp/vit_l14_ep50.yaml --model-dir /models --load-epoch 50 --eval-only TRAINER.COOP.N_CTX 16 TRAINER.COOP.CSC False TRAINER.COOP.CLASS_TOKEN_POSITION end \ No newline at end of file diff --git a/python/ClipDetection/clip_component/data/eighty_templates.txt b/python/ClipDetection/clip_component/data/eighty_templates.txt index 526e39c0..9026bf5e 100644 --- a/python/ClipDetection/clip_component/data/eighty_templates.txt +++ b/python/ClipDetection/clip_component/data/eighty_templates.txt @@ -1,80 +1,80 @@ -a bad photo of a {}. -a photo of many {}. -a sculpture of a {}. -a photo of the hard to see {}. -a low resolution photo of the {}. -a rendering of a {}. -graffiti of a {}. -a bad photo of the {}. -a cropped photo of the {}. -a tattoo of a {}. -the embroidered {}. -a photo of a hard to see {}. -a bright photo of a {}. -a photo of a clean {}. -a photo of a dirty {}. -a dark photo of the {}. -a drawing of a {}. -a photo of my {}. -the plastic {}. -a photo of the cool {}. -a close-up photo of a {}. -a black and white photo of the {}. -a painting of the {}. -a painting of a {}. -a pixelated photo of the {}. -a sculpture of the {}. -a bright photo of the {}. -a cropped photo of a {}. -a plastic {}. -a photo of the dirty {}. -a jpeg corrupted photo of a {}. -a blurry photo of the {}. -a photo of the {}. -a good photo of the {}. -a rendering of the {}. -a {} in a video game. -a photo of one {}. -a doodle of a {}. -a close-up photo of the {}. -a photo of a {}. -the origami {}. -the {} in a video game. -a sketch of a {}. -a doodle of the {}. -a origami {}. -a low resolution photo of a {}. -the toy {}. -a rendition of the {}. -a photo of the clean {}. -a photo of a large {}. -a rendition of a {}. -a photo of a nice {}. -a photo of a weird {}. -a blurry photo of a {}. -a cartoon {}. -art of a {}. -a sketch of the {}. -a embroidered {}. -a pixelated photo of a {}. -itap of the {}. -a jpeg corrupted photo of the {}. -a good photo of a {}. -a plushie {}. -a photo of the nice {}. -a photo of the small {}. -a photo of the weird {}. -the cartoon {}. -art of the {}. -a drawing of the {}. -a photo of the large {}. -a black and white photo of a {}. -the plushie {}. -a dark photo of a {}. -itap of a {}. -graffiti of the {}. -a toy {}. -itap of my {}. -a photo of a cool {}. -a photo of a small {}. +a bad photo of a {}. +a photo of many {}. +a sculpture of a {}. +a photo of the hard to see {}. +a low resolution photo of the {}. +a rendering of a {}. +graffiti of a {}. +a bad photo of the {}. +a cropped photo of the {}. +a tattoo of a {}. +the embroidered {}. +a photo of a hard to see {}. +a bright photo of a {}. +a photo of a clean {}. +a photo of a dirty {}. +a dark photo of the {}. +a drawing of a {}. +a photo of my {}. +the plastic {}. +a photo of the cool {}. +a close-up photo of a {}. +a black and white photo of the {}. +a painting of the {}. +a painting of a {}. +a pixelated photo of the {}. +a sculpture of the {}. +a bright photo of the {}. +a cropped photo of a {}. +a plastic {}. +a photo of the dirty {}. +a jpeg corrupted photo of a {}. +a blurry photo of the {}. +a photo of the {}. +a good photo of the {}. +a rendering of the {}. +a {} in a video game. +a photo of one {}. +a doodle of a {}. +a close-up photo of the {}. +a photo of a {}. +the origami {}. +the {} in a video game. +a sketch of a {}. +a doodle of the {}. +a origami {}. +a low resolution photo of a {}. +the toy {}. +a rendition of the {}. +a photo of the clean {}. +a photo of a large {}. +a rendition of a {}. +a photo of a nice {}. +a photo of a weird {}. +a blurry photo of a {}. +a cartoon {}. +art of a {}. +a sketch of the {}. +a embroidered {}. +a pixelated photo of a {}. +itap of the {}. +a jpeg corrupted photo of the {}. +a good photo of a {}. +a plushie {}. +a photo of the nice {}. +a photo of the small {}. +a photo of the weird {}. +the cartoon {}. +art of the {}. +a drawing of the {}. +a photo of the large {}. +a black and white photo of a {}. +the plushie {}. +a dark photo of a {}. +itap of a {}. +graffiti of the {}. +a toy {}. +itap of my {}. +a photo of a cool {}. +a photo of a small {}. a tattoo of the {}. \ No newline at end of file diff --git a/python/ClipDetection/clip_component/data/imagenet_classification_list.csv b/python/ClipDetection/clip_component/data/imagenet_classification_list.csv index 6de2c296..e19b0a7d 100644 --- a/python/ClipDetection/clip_component/data/imagenet_classification_list.csv +++ b/python/ClipDetection/clip_component/data/imagenet_classification_list.csv @@ -1,1000 +1,1000 @@ -tench,"tench, Tinca tinca" -goldfish,"goldfish, Carassius auratus" -great white shark,"great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias" -tiger shark,"tiger shark, Galeocerdo cuvieri" -hammerhead shark,"hammerhead, hammerhead shark" -electric ray,"electric ray, crampfish, numbfish, torpedo" -stingray,stingray -rooster,cock -hen,hen -ostrich,"ostrich, Struthio camelus" -brambling,"brambling, Fringilla montifringilla" -goldfinch,"goldfinch, Carduelis carduelis" -house finch,"house finch, linnet, Carpodacus mexicanus" -junco,"junco, snowbird" -indigo bunting,"indigo bunting, indigo finch, indigo bird, Passerina cyanea" -American robin,"robin, American robin, Turdus migratorius" -bulbul,bulbul -jay,jay -magpie,magpie -chickadee,chickadee -American dipper,"water ouzel, dipper" -kite (bird of prey),kite -bald eagle,"bald eagle, American eagle, Haliaeetus leucocephalus" -vulture,vulture -great grey owl,"great grey owl, great gray owl, Strix nebulosa" -fire salamander,"European fire salamander, Salamandra salamandra" -smooth newt,"common newt, Triturus vulgaris" -newt,eft -spotted salamander,"spotted salamander, Ambystoma maculatum" -axolotl,"axolotl, mud puppy, Ambystoma mexicanum" -American bullfrog,"bullfrog, Rana catesbeiana" -tree frog,"tree frog, tree-frog" -tailed frog,"tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui" -loggerhead sea turtle,"loggerhead, loggerhead turtle, Caretta caretta" -leatherback sea turtle,"leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea" -mud turtle,mud turtle -terrapin,terrapin -box turtle,"box turtle, box tortoise" -banded gecko,banded gecko -green iguana,"common iguana, iguana, Iguana iguana" -Carolina anole,"American chameleon, anole, Anolis carolinensis" -desert grassland whiptail lizard,"whiptail, whiptail lizard" -agama,agama -frilled-necked lizard,"frilled lizard, Chlamydosaurus kingi" -alligator lizard,alligator lizard -Gila monster,"Gila monster, Heloderma suspectum" -European green lizard,"green lizard, Lacerta viridis" -chameleon,"African chameleon, Chamaeleo chamaeleon" -Komodo dragon,"Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis" -Nile crocodile,"African crocodile, Nile crocodile, Crocodylus niloticus" -American alligator,"American alligator, Alligator mississipiensis" -triceratops,triceratops -worm snake,"thunder snake, worm snake, Carphophis amoenus" -ring-necked snake,"ringneck snake, ring-necked snake, ring snake" -eastern hog-nosed snake,"hognose snake, puff adder, sand viper" -smooth green snake,"green snake, grass snake" -kingsnake,"king snake, kingsnake" -garter snake,"garter snake, grass snake" -water snake,water snake -vine snake,vine snake -night snake,"night snake, Hypsiglena torquata" -boa constrictor,"boa constrictor, Constrictor constrictor" -African rock python,"rock python, rock snake, Python sebae" -Indian cobra,"Indian cobra, Naja naja" -green mamba,green mamba -sea snake,sea snake -Saharan horned viper,"horned viper, cerastes, sand viper, horned asp, Cerastes cornutus" -eastern diamondback rattlesnake,"diamondback, diamondback rattlesnake, Crotalus adamanteus" -sidewinder rattlesnake,"sidewinder, horned rattlesnake, Crotalus cerastes" -trilobite,trilobite -harvestman,"harvestman, daddy longlegs, Phalangium opilio" -scorpion,scorpion -yellow garden spider,"black and gold garden spider, Argiope aurantia" -barn spider,"barn spider, Araneus cavaticus" -European garden spider,"garden spider, Aranea diademata" -southern black widow,"black widow, Latrodectus mactans" -tarantula,tarantula -wolf spider,"wolf spider, hunting spider" -tick,tick -centipede,centipede -black grouse,black grouse -ptarmigan,ptarmigan -ruffed grouse,"ruffed grouse, partridge, Bonasa umbellus" -prairie grouse,"prairie chicken, prairie grouse, prairie fowl" -peafowl,peacock -quail,quail -partridge,partridge -african grey parrot,"African grey, African gray, Psittacus erithacus" -macaw,macaw -sulphur-crested cockatoo,"sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita" -lorikeet,lorikeet -coucal,coucal -bee eater,bee eater -hornbill,hornbill -hummingbird,hummingbird -jacamar,jacamar -toucan,toucan -duck,drake -red-breasted merganser,"red-breasted merganser, Mergus serrator" -goose,goose -black swan,"black swan, Cygnus atratus" -tusker,tusker -echidna,"echidna, spiny anteater, anteater" -platypus,"platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus" -wallaby,"wallaby, brush kangaroo" -koala,"koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus" -wombat,wombat -jellyfish,jellyfish -sea anemone,"sea anemone, anemone" -brain coral,brain coral -flatworm,"flatworm, platyhelminth" -nematode,"nematode, nematode worm, roundworm" -conch,conch -snail,snail -slug,slug -sea slug,"sea slug, nudibranch" -chiton,"chiton, coat-of-mail shell, sea cradle, polyplacophore" -chambered nautilus,"chambered nautilus, pearly nautilus, nautilus" -Dungeness crab,"Dungeness crab, Cancer magister" -rock crab,"rock crab, Cancer irroratus" -fiddler crab,fiddler crab -red king crab,"king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica" -American lobster,"American lobster, Northern lobster, Maine lobster, Homarus americanus" -spiny lobster,"spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish" -crayfish,"crayfish, crawfish, crawdad, crawdaddy" -hermit crab,hermit crab -isopod,isopod -white stork,"white stork, Ciconia ciconia" -black stork,"black stork, Ciconia nigra" -spoonbill,spoonbill -flamingo,flamingo -little blue heron,"little blue heron, Egretta caerulea" -great egret,"American egret, great white heron, Egretta albus" -bittern bird,bittern -crane bird,crane bird -limpkin,"limpkin, Aramus pictus" -common gallinule,"European gallinule, Porphyrio porphyrio" -American coot,"American coot, marsh hen, mud hen, water hen, Fulica americana" -bustard,bustard -ruddy turnstone,"ruddy turnstone, Arenaria interpres" -dunlin,"red-backed sandpiper, dunlin, Erolia alpina" -common redshank,"redshank, Tringa totanus" -dowitcher,dowitcher -oystercatcher,"oystercatcher, oyster catcher" -pelican,pelican -king penguin,"king penguin, Aptenodytes patagonica" -albatross,"albatross, mollymawk" -grey whale,"grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus" -killer whale,"killer whale, killer, orca, grampus, sea wolf, Orcinus orca" -dugong,"dugong, Dugong dugon" -sea lion,sea lion -Chihuahua,Chihuahua -Japanese Chin,Japanese spaniel -Maltese,"Maltese dog, Maltese terrier, Maltese" -Pekingese,"Pekinese, Pekingese, Peke" -Shih Tzu,Shih-Tzu -King Charles Spaniel,Blenheim spaniel -Papillon,papillon -toy terrier,toy terrier -Rhodesian Ridgeback,Rhodesian ridgeback -Afghan Hound,"Afghan hound, Afghan" -Basset Hound,"basset, basset hound" -Beagle,beagle -Bloodhound,"bloodhound, sleuthhound" -Bluetick Coonhound,bluetick -Black and Tan Coonhound,black-and-tan coonhound -Treeing Walker Coonhound,"Walker hound, Walker foxhound" -English foxhound,English foxhound -Redbone Coonhound,redbone -borzoi,"borzoi, Russian wolfhound" -Irish Wolfhound,Irish wolfhound -Italian Greyhound,Italian greyhound -Whippet,whippet -Ibizan Hound,"Ibizan hound, Ibizan Podenco" -Norwegian Elkhound,"Norwegian elkhound, elkhound" -Otterhound,"otterhound, otter hound" -Saluki,"Saluki, gazelle hound" -Scottish Deerhound,"Scottish deerhound, deerhound" -Weimaraner,Weimaraner -Staffordshire Bull Terrier,"Staffordshire bullterrier, Staffordshire bull terrier" -American Staffordshire Terrier,"American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier" -Bedlington Terrier,Bedlington terrier -Border Terrier,Border terrier -Kerry Blue Terrier,Kerry blue terrier -Irish Terrier,Irish terrier -Norfolk Terrier,Norfolk terrier -Norwich Terrier,Norwich terrier -Yorkshire Terrier,Yorkshire terrier -Wire Fox Terrier,wire-haired fox terrier -Lakeland Terrier,Lakeland terrier -Sealyham Terrier,"Sealyham terrier, Sealyham" -Airedale Terrier,"Airedale, Airedale terrier" -Cairn Terrier,"cairn, cairn terrier" -Australian Terrier,Australian terrier -Dandie Dinmont Terrier,"Dandie Dinmont, Dandie Dinmont terrier" -Boston Terrier,"Boston bull, Boston terrier" -Miniature Schnauzer,miniature schnauzer -Giant Schnauzer,giant schnauzer -Standard Schnauzer,standard schnauzer -Scottish Terrier,"Scotch terrier, Scottish terrier, Scottie" -Tibetan Terrier,"Tibetan terrier, chrysanthemum dog" -Australian Silky Terrier,"silky terrier, Sydney silky" -Soft-coated Wheaten Terrier,soft-coated wheaten terrier -West Highland White Terrier,West Highland white terrier -Lhasa Apso,"Lhasa, Lhasa apso" -Flat-Coated Retriever,flat-coated retriever -Curly-coated Retriever,curly-coated retriever -Golden Retriever,golden retriever -Labrador Retriever,Labrador retriever -Chesapeake Bay Retriever,Chesapeake Bay retriever -German Shorthaired Pointer,German short-haired pointer -Vizsla,"vizsla, Hungarian pointer" -English Setter,English setter -Irish Setter,"Irish setter, red setter" -Gordon Setter,Gordon setter -Brittany dog,Brittany spaniel -Clumber Spaniel,"clumber, clumber spaniel" -English Springer Spaniel,"English springer, English springer spaniel" -Welsh Springer Spaniel,Welsh springer spaniel -Cocker Spaniel,"cocker spaniel, English cocker spaniel, cocker" -Sussex Spaniel,Sussex spaniel -Irish Water Spaniel,Irish water spaniel -Kuvasz,kuvasz -Schipperke,schipperke -Groenendael dog,groenendael -Malinois,malinois -Briard,briard -Australian Kelpie,kelpie -Komondor,komondor -Old English Sheepdog,"Old English sheepdog, bobtail" -Shetland Sheepdog,"Shetland sheepdog, Shetland sheep dog, Shetland" -collie,collie -Border Collie,Border collie -Bouvier des Flandres dog,"Bouvier des Flandres, Bouviers des Flandres" -Rottweiler,Rottweiler -German Shepherd Dog,"German shepherd, German shepherd dog, German police dog, alsatian" -Dobermann,"Doberman, Doberman pinscher" -Miniature Pinscher,miniature pinscher -Greater Swiss Mountain Dog,Greater Swiss Mountain dog -Bernese Mountain Dog,Bernese mountain dog -Appenzeller Sennenhund,Appenzeller -Entlebucher Sennenhund,EntleBucher -Boxer,boxer -Bullmastiff,bull mastiff -Tibetan Mastiff,Tibetan mastiff -French Bulldog,French bulldog -Great Dane,Great Dane -St. Bernard,"Saint Bernard, St Bernard" -husky,"Eskimo dog, husky" -Alaskan Malamute,"malamute, malemute, Alaskan malamute" -Siberian Husky,Siberian husky -Dalmatian,"dalmatian, coach dog, carriage dog" -Affenpinscher,"affenpinscher, monkey pinscher, monkey dog" -Basenji,basenji -pug,"pug, pug-dog" -Leonberger,Leonberg -Newfoundland dog,"Newfoundland, Newfoundland dog" -Great Pyrenees dog,Great Pyrenees -Samoyed,"Samoyed, Samoyede" -Pomeranian,Pomeranian -Chow Chow,"chow, chow chow" -Keeshond,keeshond -brussels griffon,Brabancon griffon -Pembroke Welsh Corgi,"Pembroke, Pembroke Welsh corgi" -Cardigan Welsh Corgi,"Cardigan, Cardigan Welsh corgi" -Toy Poodle,toy poodle -Miniature Poodle,miniature poodle -Standard Poodle,standard poodle -Mexican hairless dog (xoloitzcuintli),Mexican hairless -grey wolf,"timber wolf, grey wolf, gray wolf, Canis lupus" -Alaskan tundra wolf,"white wolf, Arctic wolf, Canis lupus tundrarum" -red wolf or maned wolf,"red wolf, maned wolf, Canis rufus, Canis niger" -coyote,"coyote, prairie wolf, brush wolf, Canis latrans" -dingo,"dingo, warrigal, warragal, Canis dingo" -dhole,"dhole, Cuon alpinus" -African wild dog,"African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus" -hyena,"hyena, hyaena" -red fox,"red fox, Vulpes vulpes" -kit fox,"kit fox, Vulpes macrotis" -Arctic fox,"Arctic fox, white fox, Alopex lagopus" -grey fox,"grey fox, gray fox, Urocyon cinereoargenteus" -tabby cat,"tabby, tabby cat" -tiger cat,tiger cat -Persian cat,Persian cat -Siamese cat,"Siamese cat, Siamese" -Egyptian Mau,Egyptian cat -cougar,"cougar, puma, catamount, mountain lion, painter, panther, Felis concolor" -lynx,"lynx, catamount" -leopard,"leopard, Panthera pardus" -snow leopard,"snow leopard, ounce, Panthera uncia" -jaguar,"jaguar, panther, Panthera onca, Felis onca" -lion,"lion, king of beasts, Panthera leo" -tiger,"tiger, Panthera tigris" -cheetah,"cheetah, chetah, Acinonyx jubatus" -brown bear,"brown bear, bruin, Ursus arctos" -American black bear,"American black bear, black bear, Ursus americanus, Euarctos americanus" -polar bear,"ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus" -sloth bear,"sloth bear, Melursus ursinus, Ursus ursinus" -mongoose,mongoose -meerkat,"meerkat, mierkat" -tiger beetle,tiger beetle -ladybug,"ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle" -ground beetle,"ground beetle, carabid beetle" -longhorn beetle,"long-horned beetle, longicorn, longicorn beetle" -leaf beetle,"leaf beetle, chrysomelid" -dung beetle,dung beetle -rhinoceros beetle,rhinoceros beetle -weevil,weevil -fly,fly -bee,bee -ant,"ant, emmet, pismire" -grasshopper,"grasshopper, hopper" -cricket insect,cricket -stick insect,"walking stick, walkingstick, stick insect" -cockroach,"cockroach, roach" -praying mantis,"mantis, mantid" -cicada,"cicada, cicala" -leafhopper,leafhopper -lacewing,"lacewing, lacewing fly" -dragonfly,"dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk" -damselfly,damselfly -red admiral butterfly,admiral -ringlet butterfly,"ringlet, ringlet butterfly" -monarch butterfly,"monarch, monarch butterfly, milkweed butterfly, Danaus plexippus" -small white butterfly,cabbage butterfly -sulphur butterfly,"sulphur butterfly, sulfur butterfly" -gossamer-winged butterfly,"lycaenid, lycaenid butterfly" -starfish,"starfish, sea star" -sea urchin,sea urchin -sea cucumber,"sea cucumber, holothurian" -cottontail rabbit,"wood rabbit, cottontail, cottontail rabbit" -hare,hare -Angora rabbit,"Angora, Angora rabbit" -hamster,hamster -porcupine,"porcupine, hedgehog" -fox squirrel,"fox squirrel, eastern fox squirrel, Sciurus niger" -marmot,marmot -beaver,beaver -guinea pig,"guinea pig, Cavia cobaya" -common sorrel horse,sorrel -zebra,zebra -pig,"hog, pig, grunter, squealer, Sus scrofa" -wild boar,"wild boar, boar, Sus scrofa" -warthog,warthog -hippopotamus,"hippopotamus, hippo, river horse, Hippopotamus amphibius" -ox,ox -water buffalo,"water buffalo, water ox, Asiatic buffalo, Bubalus bubalis" -bison,bison -ram (adult male sheep),"ram, tup" -bighorn sheep,"bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis" -Alpine ibex,"ibex, Capra ibex" -hartebeest,hartebeest -impala (antelope),"impala, Aepyceros melampus" -gazelle,gazelle -arabian camel,"Arabian camel, dromedary, Camelus dromedarius" -llama,llama -weasel,weasel -mink,mink -European polecat,"polecat, fitch, foulmart, foumart, Mustela putorius" -black-footed ferret,"black-footed ferret, ferret, Mustela nigripes" -otter,otter -skunk,"skunk, polecat, wood pussy" -badger,badger -armadillo,armadillo -three-toed sloth,"three-toed sloth, ai, Bradypus tridactylus" -orangutan,"orangutan, orang, orangutang, Pongo pygmaeus" -gorilla,"gorilla, Gorilla gorilla" -chimpanzee,"chimpanzee, chimp, Pan troglodytes" -gibbon,"gibbon, Hylobates lar" -siamang,"siamang, Hylobates syndactylus, Symphalangus syndactylus" -guenon,"guenon, guenon monkey" -patas monkey,"patas, hussar monkey, Erythrocebus patas" -baboon,baboon -macaque,macaque -langur,langur -black-and-white colobus,"colobus, colobus monkey" -proboscis monkey,"proboscis monkey, Nasalis larvatus" -marmoset,marmoset -white-headed capuchin,"capuchin, ringtail, Cebus capucinus" -howler monkey,"howler monkey, howler" -titi monkey,"titi, titi monkey" -Geoffroy's spider monkey,"spider monkey, Ateles geoffroyi" -common squirrel monkey,"squirrel monkey, Saimiri sciureus" -ring-tailed lemur,"Madagascar cat, ring-tailed lemur, Lemur catta" -indri,"indri, indris, Indri indri, Indri brevicaudatus" -Asian elephant,"Indian elephant, Elephas maximus" -African bush elephant,"African elephant, Loxodonta africana" -red panda,"lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens" -giant panda,"giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca" -snoek fish,"barracouta, snoek" -eel,eel -silver salmon,"coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch" -rock beauty fish,"rock beauty, Holocanthus tricolor" -clownfish,anemone fish -sturgeon,sturgeon -gar fish,"gar, garfish, garpike, billfish, Lepisosteus osseus" -lionfish,lionfish -pufferfish,"puffer, pufferfish, blowfish, globefish" -abacus,abacus -abaya,abaya -academic gown,"academic gown, academic robe, judge's robe" -accordion,"accordion, piano accordion, squeeze box" -acoustic guitar,acoustic guitar -aircraft carrier,"aircraft carrier, carrier, flattop, attack aircraft carrier" -airliner,airliner -airship,"airship, dirigible" -altar,altar -ambulance,ambulance -amphibious vehicle,"amphibian, amphibious vehicle" -analog clock,analog clock -apiary,"apiary, bee house" -apron,apron -trash can,"ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin" -assault rifle,"assault rifle, assault gun" -backpack,"backpack, back pack, knapsack, packsack, rucksack, haversack" -bakery,"bakery, bakeshop, bakehouse" -balance beam,"balance beam, beam" -balloon,balloon -ballpoint pen,"ballpoint, ballpoint pen, ballpen, Biro" -Band-Aid,Band Aid -banjo,banjo -baluster / handrail,"bannister, banister, balustrade, balusters, handrail" -barbell,barbell -barber chair,barber chair -barbershop,barbershop -barn,barn -barometer,barometer -barrel,"barrel, cask" -wheelbarrow,"barrow, garden cart, lawn cart, wheelbarrow" -baseball,baseball -basketball,basketball -bassinet,bassinet -bassoon,bassoon -swimming cap,"bathing cap, swimming cap" -bath towel,bath towel -bathtub,"bathtub, bathing tub, bath, tub" -station wagon,"beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon" -lighthouse,"beacon, lighthouse, beacon light, pharos" -beaker,beaker -military hat (bearskin or shako),"bearskin, busby, shako" -beer bottle,beer bottle -beer glass,beer glass -bell tower,"bell cote, bell cot" -baby bib,bib -tandem bicycle,"bicycle-built-for-two, tandem bicycle, tandem" -bikini,"bikini, two-piece" -ring binder,"binder, ring-binder" -binoculars,"binoculars, field glasses, opera glasses" -birdhouse,birdhouse -boathouse,boathouse -bobsleigh,"bobsled, bobsleigh, bob" -bolo tie,"bolo tie, bolo, bola tie, bola" -poke bonnet,"bonnet, poke bonnet" -bookcase,bookcase -bookstore,"bookshop, bookstore, bookstall" -bottle cap,bottlecap -hunting bow,bow -bow tie,"bow tie, bow-tie, bowtie" -brass memorial plaque,"brass, memorial tablet, plaque" -bra,"brassiere, bra, bandeau" -breakwater,"breakwater, groin, groyne, mole, bulwark, seawall, jetty" -breastplate,"breastplate, aegis, egis" -broom,broom -bucket,"bucket, pail" -buckle,buckle -bulletproof vest,bulletproof vest -high-speed train,"bullet train, bullet" -butcher shop,"butcher shop, meat market" -taxicab,"cab, hack, taxi, taxicab" -cauldron,"caldron, cauldron" -candle,"candle, taper, wax light" -cannon,cannon -canoe,canoe -can opener,"can opener, tin opener" -cardigan,cardigan -car mirror,car mirror -carousel,"carousel, carrousel, merry-go-round, roundabout, whirligig" -tool kit,"carpenter's kit, tool kit" -cardboard box / carton,carton -car wheel,car wheel -automated teller machine,"cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM" -cassette,cassette -cassette player,cassette player -castle,castle -catamaran,catamaran -CD player,CD player -cello,"cello, violoncello" -mobile phone,"cellular telephone, cellular phone, cellphone, cell, mobile phone" -chain,chain -chain-link fence,chainlink fence -chain mail,"chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour" -chainsaw,"chain saw, chainsaw" -storage chest,chest -chiffonier,"chiffonier, commode" -bell or wind chime,"chime, bell, gong" -china cabinet,"china cabinet, china closet" -Christmas stocking,Christmas stocking -church,"church, church building" -movie theater,"cinema, movie theater, movie theatre, movie house, picture palace" -cleaver,"cleaver, meat cleaver, chopper" -cliff dwelling,cliff dwelling -cloak,cloak -clogs,"clog, geta, patten, sabot" -cocktail shaker,cocktail shaker -coffee mug,coffee mug -coffeemaker,coffeepot -spiral or coil,"coil, spiral, volute, whorl, helix" -combination lock,combination lock -computer keyboard,"computer keyboard, keypad" -candy store,"confectionery, confectionary, candy store" -container ship,"container ship, containership, container vessel" -convertible,convertible -corkscrew,"corkscrew, bottle screw" -cornet,"cornet, horn, trumpet, trump" -cowboy boot,cowboy boot -cowboy hat,"cowboy hat, ten-gallon hat" -cradle,cradle -construction crane,construction crane -crash helmet,crash helmet -crate,crate -infant bed,"crib, cot" -Crock Pot,Crock Pot -croquet ball,croquet ball -crutch,crutch -cuirass,cuirass -dam,"dam, dike, dyke" -desk,desk -desktop computer,desktop computer -rotary dial telephone,"dial telephone, dial phone" -diaper,"diaper, nappy, napkin" -digital clock,digital clock -digital watch,digital watch -dining table,"dining table, board" -dishcloth,"dishrag, dishcloth" -dishwasher,"dishwasher, dish washer, dishwashing machine" -disc brake,"disk brake, disc brake" -dock,"dock, dockage, docking facility" -dog sled,"dogsled, dog sled, dog sleigh" -dome,dome -doormat,"doormat, welcome mat" -drilling rig,"drilling platform, offshore rig" -drum,"drum, membranophone, tympan" -drumstick,drumstick -dumbbell,dumbbell -Dutch oven,Dutch oven -electric fan,"electric fan, blower" -electric guitar,electric guitar -electric locomotive,electric locomotive -entertainment center,entertainment center -envelope,envelope -espresso machine,espresso maker -face powder,face powder -feather boa,"feather boa, boa" -filing cabinet,"file, file cabinet, filing cabinet" -fireboat,fireboat -fire truck,"fire engine, fire truck" -fire screen,"fire screen, fireguard" -flagpole,"flagpole, flagstaff" -flute,"flute, transverse flute" -folding chair,folding chair -football helmet,football helmet -forklift,forklift -fountain,fountain -fountain pen,fountain pen -four-poster bed,four-poster -freight car,freight car -French horn,"French horn, horn" -frying pan,"frying pan, frypan, skillet" -fur coat,fur coat -garbage truck,"garbage truck, dustcart" -gas mask or respirator,"gasmask, respirator, gas helmet" -gas pump,"gas pump, gasoline pump, petrol pump, island dispenser" -goblet,goblet -go-kart,go-kart -golf ball,golf ball -golf cart,"golfcart, golf cart" -gondola,gondola -gong,"gong, tam-tam" -gown,gown -grand piano,"grand piano, grand" -greenhouse,"greenhouse, nursery, glasshouse" -radiator grille,"grille, radiator grille" -grocery store,"grocery store, grocery, food market, market" -guillotine,guillotine -hair clip,hair slide -hair spray,hair spray -half-track,half track -hammer,hammer -hamper,hamper -hair dryer,"hand blower, blow dryer, blow drier, hair dryer, hair drier" -hand-held computer,"hand-held computer, hand-held microcomputer" -handkerchief,"handkerchief, hankie, hanky, hankey" -hard disk drive,"hard disc, hard disk, fixed disk" -harmonica,"harmonica, mouth organ, harp, mouth harp" -harp,harp -combine harvester,"harvester, reaper" -hatchet,hatchet -holster,holster -home theater,"home theater, home theatre" -honeycomb,honeycomb -hook,"hook, claw" -hoop skirt,"hoopskirt, crinoline" -gymnastic horizontal bar,"horizontal bar, high bar" -horse-drawn vehicle,"horse cart, horse-cart" -hourglass,hourglass -iPod,iPod -clothes iron,"iron, smoothing iron" -carved pumpkin,jack-o'-lantern -jeans,"jean, blue jean, denim" -jeep,"jeep, landrover" -T-shirt,"jersey, T-shirt, tee shirt" -jigsaw puzzle,jigsaw puzzle -rickshaw,"jinrikisha, ricksha, rickshaw" -joystick,joystick -kimono,kimono -knee pad,knee pad -knot,knot -lab coat,"lab coat, laboratory coat" -ladle,ladle -lampshade,"lampshade, lamp shade" -laptop computer,"laptop, laptop computer" -lawn mower,"lawn mower, mower" -lens cap,"lens cap, lens cover" -letter opener,"letter opener, paper knife, paperknife" -library,library -lifeboat,lifeboat -lighter,"lighter, light, igniter, ignitor" -limousine,"limousine, limo" -ocean liner,"liner, ocean liner" -lipstick,"lipstick, lip rouge" -slip-on shoe,Loafer -lotion,lotion -music speaker,"loudspeaker, speaker, speaker unit, loudspeaker system, speaker system" -loupe magnifying glass,"loupe, jeweler's loupe" -sawmill,"lumbermill, sawmill" -magnetic compass,magnetic compass -messenger bag,"mailbag, postbag" -mailbox,"mailbox, letter box" -tights,maillot -one-piece bathing suit,"maillot, tank suit" -manhole cover,manhole cover -maraca,maraca -marimba,"marimba, xylophone" -mask,mask -matchstick,matchstick -maypole,maypole -maze,"maze, labyrinth" -measuring cup,measuring cup -medicine cabinet,"medicine chest, medicine cabinet" -megalith,"megalith, megalithic structure" -microphone,"microphone, mike" -microwave oven,"microwave, microwave oven" -military uniform,military uniform -milk can,milk can -minibus,minibus -miniskirt,"miniskirt, mini" -minivan,minivan -missile,missile -mitten,mitten -mixing bowl,mixing bowl -mobile home,"mobile home, manufactured home" -ford model t,Model T -modem,modem -monastery,monastery -monitor,monitor -moped,moped -mortar and pestle,mortar -graduation cap,mortarboard -mosque,mosque -mosquito net,mosquito net -vespa,"motor scooter, scooter" -mountain bike,"mountain bike, all-terrain bike, off-roader" -tent,mountain tent -computer mouse,"mouse, computer mouse" -mousetrap,mousetrap -moving van,moving van -muzzle,muzzle -metal nail,nail -neck brace,neck brace -necklace,necklace -baby pacifier,nipple -notebook computer,"notebook, notebook computer" -obelisk,obelisk -oboe,"oboe, hautboy, hautbois" -ocarina,"ocarina, sweet potato" -odometer,"odometer, hodometer, mileometer, milometer" -oil filter,oil filter -pipe organ,"organ, pipe organ" -oscilloscope,"oscilloscope, scope, cathode-ray oscilloscope, CRO" -overskirt,overskirt -bullock cart,oxcart -oxygen mask,oxygen mask -product packet / packaging,packet -paddle,"paddle, boat paddle" -paddle wheel,"paddlewheel, paddle wheel" -padlock,padlock -paintbrush,paintbrush -pajamas,"pajama, pyjama, pj's, jammies" -palace,palace -pan flute,"panpipe, pandean pipe, syrinx" -paper towel,paper towel -parachute,"parachute, chute" -parallel bars,"parallel bars, bars" -park bench,park bench -parking meter,parking meter -railroad car,"passenger car, coach, carriage" -patio,"patio, terrace" -payphone,"pay-phone, pay-station" -pedestal,"pedestal, plinth, footstall" -pencil case,"pencil box, pencil case" -pencil sharpener,pencil sharpener -perfume,"perfume, essence" -Petri dish,Petri dish -photocopier,photocopier -plectrum,"pick, plectrum, plectron" -Pickelhaube,pickelhaube -picket fence,"picket fence, paling" -pickup truck,"pickup, pickup truck" -pier,pier -piggy bank,"piggy bank, penny bank" -pill bottle,pill bottle -pillow,pillow -ping-pong ball,ping-pong ball -pinwheel,pinwheel -pirate ship,"pirate, pirate ship" -drink pitcher,"pitcher, ewer" -block plane,"plane, carpenter's plane, woodworking plane" -planetarium,planetarium -plastic bag,plastic bag -plate rack,plate rack -farm plow,"plow, plough" -plunger,"plunger, plumber's helper" -Polaroid camera,"Polaroid camera, Polaroid Land camera" -pole,pole -police van,"police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria" -poncho,poncho -pool table,"pool table, billiard table, snooker table" -soda bottle,"pop bottle, soda bottle" -plant pot,"pot, flowerpot" -potter's wheel,potter's wheel -power drill,power drill -prayer rug,"prayer rug, prayer mat" -printer,printer -prison,"prison, prison house" -missile,"projectile, missile" -projector,projector -hockey puck,"puck, hockey puck" -punching bag,"punching bag, punch bag, punching ball, punchball" -purse,purse -quill,"quill, quill pen" -quilt,"quilt, comforter, comfort, puff" -race car,"racer, race car, racing car" -racket,"racket, racquet" -radiator,radiator -radio,"radio, wireless" -radio telescope,"radio telescope, radio reflector" -rain barrel,rain barrel -recreational vehicle,"recreational vehicle, RV, R.V." -fishing casting reel,reel -reflex camera,reflex camera -refrigerator,"refrigerator, icebox" -remote control,"remote control, remote" -restaurant,"restaurant, eating house, eating place, eatery" -revolver,"revolver, six-gun, six-shooter" -rifle,rifle -rocking chair,"rocking chair, rocker" -rotisserie,rotisserie -eraser,"rubber eraser, rubber, pencil eraser" -rugby ball,rugby ball -ruler measuring stick,"rule, ruler" -sneaker,running shoe -safe,safe -safety pin,safety pin -salt shaker,"saltshaker, salt shaker" -sandal,sandal -sarong,sarong -saxophone,"sax, saxophone" -scabbard,scabbard -weighing scale,"scale, weighing machine" -school bus,school bus -schooner,schooner -scoreboard,scoreboard -CRT monitor,"screen, CRT screen" -screw,screw -screwdriver,screwdriver -seat belt,"seat belt, seatbelt" -sewing machine,sewing machine -shield,"shield, buckler" -shoe store,"shoe shop, shoe-shop, shoe store" -shoji screen / room divider,shoji -shopping basket,shopping basket -shopping cart,shopping cart -shovel,shovel -shower cap,shower cap -shower curtain,shower curtain -ski,ski -balaclava ski mask,ski mask -sleeping bag,sleeping bag -slide rule,"slide rule, slipstick" -sliding door,sliding door -slot machine,"slot, one-armed bandit" -snorkel,snorkel -snowmobile,snowmobile -snowplow,"snowplow, snowplough" -soap dispenser,soap dispenser -soccer ball,soccer ball -sock,sock -solar thermal collector,"solar dish, solar collector, solar furnace" -sombrero,sombrero -soup bowl,soup bowl -keyboard space bar,space bar -space heater,space heater -space shuttle,space shuttle -spatula,spatula -motorboat,speedboat -spider web,"spider web, spider's web" -spindle,spindle -sports car,"sports car, sport car" -spotlight,"spotlight, spot" -stage,stage -steam locomotive,steam locomotive -through arch bridge,steel arch bridge -steel drum,steel drum -stethoscope,stethoscope -scarf,stole -stone wall,stone wall -stopwatch,"stopwatch, stop watch" -stove,stove -strainer,strainer -tram,"streetcar, tram, tramcar, trolley, trolley car" -stretcher,stretcher -couch,"studio couch, day bed" -stupa,"stupa, tope" -submarine,"submarine, pigboat, sub, U-boat" -suit,"suit, suit of clothes" -sundial,sundial -sunglasses,sunglass -sunglasses,"sunglasses, dark glasses, shades" -sunscreen,"sunscreen, sunblock, sun blocker" -suspension bridge,suspension bridge -mop,"swab, swob, mop" -sweatshirt,sweatshirt -swim trunks / shorts,"swimming trunks, bathing trunks" -swing,swing -electrical switch,"switch, electric switch, electrical switch" -syringe,syringe -table lamp,table lamp -tank,"tank, army tank, armored combat vehicle, armoured combat vehicle" -tape player,tape player -teapot,teapot -teddy bear,"teddy, teddy bear" -television,"television, television system" -tennis ball,tennis ball -thatched roof,"thatch, thatched roof" -front curtain,"theater curtain, theatre curtain" -thimble,thimble -threshing machine,"thresher, thrasher, threshing machine" -throne,throne -tile roof,tile roof -toaster,toaster -tobacco shop,"tobacco shop, tobacconist shop, tobacconist" -toilet seat,toilet seat -torch,torch -totem pole,totem pole -tow truck,"tow truck, tow car, wrecker" -toy store,toyshop -tractor,tractor -semi-trailer truck,"trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi" -tray,tray -trench coat,trench coat -tricycle,"tricycle, trike, velocipede" -trimaran,trimaran -tripod,tripod -triumphal arch,triumphal arch -trolleybus,"trolleybus, trolley coach, trackless trolley" -trombone,trombone -hot tub,"tub, vat" -turnstile,turnstile -typewriter keyboard,typewriter keyboard -umbrella,umbrella -unicycle,"unicycle, monocycle" -upright piano,"upright, upright piano" -vacuum cleaner,"vacuum, vacuum cleaner" -vase,vase -vaulted or arched ceiling,vault -velvet fabric,velvet -vending machine,vending machine -vestment,vestment -viaduct,viaduct -violin,"violin, fiddle" -volleyball,volleyball -waffle iron,waffle iron -wall clock,wall clock -wallet,"wallet, billfold, notecase, pocketbook" -wardrobe,"wardrobe, closet, press" -military aircraft,"warplane, military plane" -sink,"washbasin, handbasin, washbowl, lavabo, wash-hand basin" -washing machine,"washer, automatic washer, washing machine" -water bottle,water bottle -water jug,water jug -water tower,water tower -whiskey jug,whiskey jug -whistle,whistle -hair wig,wig -window screen,window screen -window shade,window shade -Windsor tie,Windsor tie -wine bottle,wine bottle -airplane wing,wing -wok,wok -wooden spoon,wooden spoon -wool,"wool, woolen, woollen" -split-rail fence,"worm fence, snake fence, snake-rail fence, Virginia fence" -shipwreck,wreck -sailboat,yawl -yurt,yurt -website,"web site, website, internet site, site" -comic book,comic book -crossword,"crossword puzzle, crossword" -traffic or street sign,street sign -traffic light,"traffic light, traffic signal, stoplight" -dust jacket,"book jacket, dust cover, dust jacket, dust wrapper" -menu,menu -plate,plate -guacamole,guacamole -consomme,consomme -hot pot,"hot pot, hotpot" -trifle,trifle -ice cream,"ice cream, icecream" -popsicle,"ice lolly, lolly, lollipop, popsicle" -baguette,French loaf -bagel,"bagel, beigel" -pretzel,pretzel -cheeseburger,cheeseburger -hot dog,"hotdog, hot dog, red hot" -mashed potatoes,mashed potato -cabbage,head cabbage -broccoli,broccoli -cauliflower,cauliflower -zucchini,"zucchini, courgette" -spaghetti squash,spaghetti squash -acorn squash,acorn squash -butternut squash,butternut squash -cucumber,"cucumber, cuke" -artichoke,"artichoke, globe artichoke" -bell pepper,bell pepper -cardoon,cardoon -mushroom,mushroom -Granny Smith apple,Granny Smith -strawberry,strawberry -orange,orange -lemon,lemon -fig,fig -pineapple,"pineapple, ananas" -banana,banana -jackfruit,"jackfruit, jak, jack" -cherimoya (custard apple),custard apple -pomegranate,pomegranate -hay,hay -carbonara,carbonara -chocolate syrup,"chocolate sauce, chocolate syrup" -dough,dough -meatloaf,"meat loaf, meatloaf" -pizza,"pizza, pizza pie" -pot pie,potpie -burrito,burrito -red wine,red wine -espresso,espresso -tea cup,cup -eggnog,eggnog -mountain,alp -bubble,bubble -cliff,"cliff, drop, drop-off" -coral reef,coral reef -geyser,geyser -lakeshore,"lakeside, lakeshore" -promontory,"promontory, headland, head, foreland" -sandbar,"sandbar, sand bar" -beach,"seashore, coast, seacoast, sea-coast" -valley,"valley, vale" -volcano,volcano -baseball player,"ballplayer, baseball player" -bridegroom,"groom, bridegroom" -scuba diver,scuba diver -rapeseed,rapeseed -daisy,daisy -yellow lady's slipper,"yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum" -corn,corn -acorn,acorn -rose hip,"hip, rose hip, rosehip" -horse chestnut seed,"buckeye, horse chestnut, conker" -coral fungus,coral fungus -agaric,agaric -gyromitra,gyromitra -stinkhorn mushroom,"stinkhorn, carrion fungus" -earth star fungus,earthstar -hen of the woods mushroom,"hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa" -bolete,bolete -corn cob,"ear, spike, capitulum" -toilet paper,"toilet tissue, toilet paper, bathroom tissue" +tench,"tench, Tinca tinca" +goldfish,"goldfish, Carassius auratus" +great white shark,"great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias" +tiger shark,"tiger shark, Galeocerdo cuvieri" +hammerhead shark,"hammerhead, hammerhead shark" +electric ray,"electric ray, crampfish, numbfish, torpedo" +stingray,stingray +rooster,cock +hen,hen +ostrich,"ostrich, Struthio camelus" +brambling,"brambling, Fringilla montifringilla" +goldfinch,"goldfinch, Carduelis carduelis" +house finch,"house finch, linnet, Carpodacus mexicanus" +junco,"junco, snowbird" +indigo bunting,"indigo bunting, indigo finch, indigo bird, Passerina cyanea" +American robin,"robin, American robin, Turdus migratorius" +bulbul,bulbul +jay,jay +magpie,magpie +chickadee,chickadee +American dipper,"water ouzel, dipper" +kite (bird of prey),kite +bald eagle,"bald eagle, American eagle, Haliaeetus leucocephalus" +vulture,vulture +great grey owl,"great grey owl, great gray owl, Strix nebulosa" +fire salamander,"European fire salamander, Salamandra salamandra" +smooth newt,"common newt, Triturus vulgaris" +newt,eft +spotted salamander,"spotted salamander, Ambystoma maculatum" +axolotl,"axolotl, mud puppy, Ambystoma mexicanum" +American bullfrog,"bullfrog, Rana catesbeiana" +tree frog,"tree frog, tree-frog" +tailed frog,"tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui" +loggerhead sea turtle,"loggerhead, loggerhead turtle, Caretta caretta" +leatherback sea turtle,"leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea" +mud turtle,mud turtle +terrapin,terrapin +box turtle,"box turtle, box tortoise" +banded gecko,banded gecko +green iguana,"common iguana, iguana, Iguana iguana" +Carolina anole,"American chameleon, anole, Anolis carolinensis" +desert grassland whiptail lizard,"whiptail, whiptail lizard" +agama,agama +frilled-necked lizard,"frilled lizard, Chlamydosaurus kingi" +alligator lizard,alligator lizard +Gila monster,"Gila monster, Heloderma suspectum" +European green lizard,"green lizard, Lacerta viridis" +chameleon,"African chameleon, Chamaeleo chamaeleon" +Komodo dragon,"Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis" +Nile crocodile,"African crocodile, Nile crocodile, Crocodylus niloticus" +American alligator,"American alligator, Alligator mississipiensis" +triceratops,triceratops +worm snake,"thunder snake, worm snake, Carphophis amoenus" +ring-necked snake,"ringneck snake, ring-necked snake, ring snake" +eastern hog-nosed snake,"hognose snake, puff adder, sand viper" +smooth green snake,"green snake, grass snake" +kingsnake,"king snake, kingsnake" +garter snake,"garter snake, grass snake" +water snake,water snake +vine snake,vine snake +night snake,"night snake, Hypsiglena torquata" +boa constrictor,"boa constrictor, Constrictor constrictor" +African rock python,"rock python, rock snake, Python sebae" +Indian cobra,"Indian cobra, Naja naja" +green mamba,green mamba +sea snake,sea snake +Saharan horned viper,"horned viper, cerastes, sand viper, horned asp, Cerastes cornutus" +eastern diamondback rattlesnake,"diamondback, diamondback rattlesnake, Crotalus adamanteus" +sidewinder rattlesnake,"sidewinder, horned rattlesnake, Crotalus cerastes" +trilobite,trilobite +harvestman,"harvestman, daddy longlegs, Phalangium opilio" +scorpion,scorpion +yellow garden spider,"black and gold garden spider, Argiope aurantia" +barn spider,"barn spider, Araneus cavaticus" +European garden spider,"garden spider, Aranea diademata" +southern black widow,"black widow, Latrodectus mactans" +tarantula,tarantula +wolf spider,"wolf spider, hunting spider" +tick,tick +centipede,centipede +black grouse,black grouse +ptarmigan,ptarmigan +ruffed grouse,"ruffed grouse, partridge, Bonasa umbellus" +prairie grouse,"prairie chicken, prairie grouse, prairie fowl" +peafowl,peacock +quail,quail +partridge,partridge +african grey parrot,"African grey, African gray, Psittacus erithacus" +macaw,macaw +sulphur-crested cockatoo,"sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita" +lorikeet,lorikeet +coucal,coucal +bee eater,bee eater +hornbill,hornbill +hummingbird,hummingbird +jacamar,jacamar +toucan,toucan +duck,drake +red-breasted merganser,"red-breasted merganser, Mergus serrator" +goose,goose +black swan,"black swan, Cygnus atratus" +tusker,tusker +echidna,"echidna, spiny anteater, anteater" +platypus,"platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus" +wallaby,"wallaby, brush kangaroo" +koala,"koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus" +wombat,wombat +jellyfish,jellyfish +sea anemone,"sea anemone, anemone" +brain coral,brain coral +flatworm,"flatworm, platyhelminth" +nematode,"nematode, nematode worm, roundworm" +conch,conch +snail,snail +slug,slug +sea slug,"sea slug, nudibranch" +chiton,"chiton, coat-of-mail shell, sea cradle, polyplacophore" +chambered nautilus,"chambered nautilus, pearly nautilus, nautilus" +Dungeness crab,"Dungeness crab, Cancer magister" +rock crab,"rock crab, Cancer irroratus" +fiddler crab,fiddler crab +red king crab,"king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica" +American lobster,"American lobster, Northern lobster, Maine lobster, Homarus americanus" +spiny lobster,"spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish" +crayfish,"crayfish, crawfish, crawdad, crawdaddy" +hermit crab,hermit crab +isopod,isopod +white stork,"white stork, Ciconia ciconia" +black stork,"black stork, Ciconia nigra" +spoonbill,spoonbill +flamingo,flamingo +little blue heron,"little blue heron, Egretta caerulea" +great egret,"American egret, great white heron, Egretta albus" +bittern bird,bittern +crane bird,crane bird +limpkin,"limpkin, Aramus pictus" +common gallinule,"European gallinule, Porphyrio porphyrio" +American coot,"American coot, marsh hen, mud hen, water hen, Fulica americana" +bustard,bustard +ruddy turnstone,"ruddy turnstone, Arenaria interpres" +dunlin,"red-backed sandpiper, dunlin, Erolia alpina" +common redshank,"redshank, Tringa totanus" +dowitcher,dowitcher +oystercatcher,"oystercatcher, oyster catcher" +pelican,pelican +king penguin,"king penguin, Aptenodytes patagonica" +albatross,"albatross, mollymawk" +grey whale,"grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus" +killer whale,"killer whale, killer, orca, grampus, sea wolf, Orcinus orca" +dugong,"dugong, Dugong dugon" +sea lion,sea lion +Chihuahua,Chihuahua +Japanese Chin,Japanese spaniel +Maltese,"Maltese dog, Maltese terrier, Maltese" +Pekingese,"Pekinese, Pekingese, Peke" +Shih Tzu,Shih-Tzu +King Charles Spaniel,Blenheim spaniel +Papillon,papillon +toy terrier,toy terrier +Rhodesian Ridgeback,Rhodesian ridgeback +Afghan Hound,"Afghan hound, Afghan" +Basset Hound,"basset, basset hound" +Beagle,beagle +Bloodhound,"bloodhound, sleuthhound" +Bluetick Coonhound,bluetick +Black and Tan Coonhound,black-and-tan coonhound +Treeing Walker Coonhound,"Walker hound, Walker foxhound" +English foxhound,English foxhound +Redbone Coonhound,redbone +borzoi,"borzoi, Russian wolfhound" +Irish Wolfhound,Irish wolfhound +Italian Greyhound,Italian greyhound +Whippet,whippet +Ibizan Hound,"Ibizan hound, Ibizan Podenco" +Norwegian Elkhound,"Norwegian elkhound, elkhound" +Otterhound,"otterhound, otter hound" +Saluki,"Saluki, gazelle hound" +Scottish Deerhound,"Scottish deerhound, deerhound" +Weimaraner,Weimaraner +Staffordshire Bull Terrier,"Staffordshire bullterrier, Staffordshire bull terrier" +American Staffordshire Terrier,"American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier" +Bedlington Terrier,Bedlington terrier +Border Terrier,Border terrier +Kerry Blue Terrier,Kerry blue terrier +Irish Terrier,Irish terrier +Norfolk Terrier,Norfolk terrier +Norwich Terrier,Norwich terrier +Yorkshire Terrier,Yorkshire terrier +Wire Fox Terrier,wire-haired fox terrier +Lakeland Terrier,Lakeland terrier +Sealyham Terrier,"Sealyham terrier, Sealyham" +Airedale Terrier,"Airedale, Airedale terrier" +Cairn Terrier,"cairn, cairn terrier" +Australian Terrier,Australian terrier +Dandie Dinmont Terrier,"Dandie Dinmont, Dandie Dinmont terrier" +Boston Terrier,"Boston bull, Boston terrier" +Miniature Schnauzer,miniature schnauzer +Giant Schnauzer,giant schnauzer +Standard Schnauzer,standard schnauzer +Scottish Terrier,"Scotch terrier, Scottish terrier, Scottie" +Tibetan Terrier,"Tibetan terrier, chrysanthemum dog" +Australian Silky Terrier,"silky terrier, Sydney silky" +Soft-coated Wheaten Terrier,soft-coated wheaten terrier +West Highland White Terrier,West Highland white terrier +Lhasa Apso,"Lhasa, Lhasa apso" +Flat-Coated Retriever,flat-coated retriever +Curly-coated Retriever,curly-coated retriever +Golden Retriever,golden retriever +Labrador Retriever,Labrador retriever +Chesapeake Bay Retriever,Chesapeake Bay retriever +German Shorthaired Pointer,German short-haired pointer +Vizsla,"vizsla, Hungarian pointer" +English Setter,English setter +Irish Setter,"Irish setter, red setter" +Gordon Setter,Gordon setter +Brittany dog,Brittany spaniel +Clumber Spaniel,"clumber, clumber spaniel" +English Springer Spaniel,"English springer, English springer spaniel" +Welsh Springer Spaniel,Welsh springer spaniel +Cocker Spaniel,"cocker spaniel, English cocker spaniel, cocker" +Sussex Spaniel,Sussex spaniel +Irish Water Spaniel,Irish water spaniel +Kuvasz,kuvasz +Schipperke,schipperke +Groenendael dog,groenendael +Malinois,malinois +Briard,briard +Australian Kelpie,kelpie +Komondor,komondor +Old English Sheepdog,"Old English sheepdog, bobtail" +Shetland Sheepdog,"Shetland sheepdog, Shetland sheep dog, Shetland" +collie,collie +Border Collie,Border collie +Bouvier des Flandres dog,"Bouvier des Flandres, Bouviers des Flandres" +Rottweiler,Rottweiler +German Shepherd Dog,"German shepherd, German shepherd dog, German police dog, alsatian" +Dobermann,"Doberman, Doberman pinscher" +Miniature Pinscher,miniature pinscher +Greater Swiss Mountain Dog,Greater Swiss Mountain dog +Bernese Mountain Dog,Bernese mountain dog +Appenzeller Sennenhund,Appenzeller +Entlebucher Sennenhund,EntleBucher +Boxer,boxer +Bullmastiff,bull mastiff +Tibetan Mastiff,Tibetan mastiff +French Bulldog,French bulldog +Great Dane,Great Dane +St. Bernard,"Saint Bernard, St Bernard" +husky,"Eskimo dog, husky" +Alaskan Malamute,"malamute, malemute, Alaskan malamute" +Siberian Husky,Siberian husky +Dalmatian,"dalmatian, coach dog, carriage dog" +Affenpinscher,"affenpinscher, monkey pinscher, monkey dog" +Basenji,basenji +pug,"pug, pug-dog" +Leonberger,Leonberg +Newfoundland dog,"Newfoundland, Newfoundland dog" +Great Pyrenees dog,Great Pyrenees +Samoyed,"Samoyed, Samoyede" +Pomeranian,Pomeranian +Chow Chow,"chow, chow chow" +Keeshond,keeshond +brussels griffon,Brabancon griffon +Pembroke Welsh Corgi,"Pembroke, Pembroke Welsh corgi" +Cardigan Welsh Corgi,"Cardigan, Cardigan Welsh corgi" +Toy Poodle,toy poodle +Miniature Poodle,miniature poodle +Standard Poodle,standard poodle +Mexican hairless dog (xoloitzcuintli),Mexican hairless +grey wolf,"timber wolf, grey wolf, gray wolf, Canis lupus" +Alaskan tundra wolf,"white wolf, Arctic wolf, Canis lupus tundrarum" +red wolf or maned wolf,"red wolf, maned wolf, Canis rufus, Canis niger" +coyote,"coyote, prairie wolf, brush wolf, Canis latrans" +dingo,"dingo, warrigal, warragal, Canis dingo" +dhole,"dhole, Cuon alpinus" +African wild dog,"African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus" +hyena,"hyena, hyaena" +red fox,"red fox, Vulpes vulpes" +kit fox,"kit fox, Vulpes macrotis" +Arctic fox,"Arctic fox, white fox, Alopex lagopus" +grey fox,"grey fox, gray fox, Urocyon cinereoargenteus" +tabby cat,"tabby, tabby cat" +tiger cat,tiger cat +Persian cat,Persian cat +Siamese cat,"Siamese cat, Siamese" +Egyptian Mau,Egyptian cat +cougar,"cougar, puma, catamount, mountain lion, painter, panther, Felis concolor" +lynx,"lynx, catamount" +leopard,"leopard, Panthera pardus" +snow leopard,"snow leopard, ounce, Panthera uncia" +jaguar,"jaguar, panther, Panthera onca, Felis onca" +lion,"lion, king of beasts, Panthera leo" +tiger,"tiger, Panthera tigris" +cheetah,"cheetah, chetah, Acinonyx jubatus" +brown bear,"brown bear, bruin, Ursus arctos" +American black bear,"American black bear, black bear, Ursus americanus, Euarctos americanus" +polar bear,"ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus" +sloth bear,"sloth bear, Melursus ursinus, Ursus ursinus" +mongoose,mongoose +meerkat,"meerkat, mierkat" +tiger beetle,tiger beetle +ladybug,"ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle" +ground beetle,"ground beetle, carabid beetle" +longhorn beetle,"long-horned beetle, longicorn, longicorn beetle" +leaf beetle,"leaf beetle, chrysomelid" +dung beetle,dung beetle +rhinoceros beetle,rhinoceros beetle +weevil,weevil +fly,fly +bee,bee +ant,"ant, emmet, pismire" +grasshopper,"grasshopper, hopper" +cricket insect,cricket +stick insect,"walking stick, walkingstick, stick insect" +cockroach,"cockroach, roach" +praying mantis,"mantis, mantid" +cicada,"cicada, cicala" +leafhopper,leafhopper +lacewing,"lacewing, lacewing fly" +dragonfly,"dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk" +damselfly,damselfly +red admiral butterfly,admiral +ringlet butterfly,"ringlet, ringlet butterfly" +monarch butterfly,"monarch, monarch butterfly, milkweed butterfly, Danaus plexippus" +small white butterfly,cabbage butterfly +sulphur butterfly,"sulphur butterfly, sulfur butterfly" +gossamer-winged butterfly,"lycaenid, lycaenid butterfly" +starfish,"starfish, sea star" +sea urchin,sea urchin +sea cucumber,"sea cucumber, holothurian" +cottontail rabbit,"wood rabbit, cottontail, cottontail rabbit" +hare,hare +Angora rabbit,"Angora, Angora rabbit" +hamster,hamster +porcupine,"porcupine, hedgehog" +fox squirrel,"fox squirrel, eastern fox squirrel, Sciurus niger" +marmot,marmot +beaver,beaver +guinea pig,"guinea pig, Cavia cobaya" +common sorrel horse,sorrel +zebra,zebra +pig,"hog, pig, grunter, squealer, Sus scrofa" +wild boar,"wild boar, boar, Sus scrofa" +warthog,warthog +hippopotamus,"hippopotamus, hippo, river horse, Hippopotamus amphibius" +ox,ox +water buffalo,"water buffalo, water ox, Asiatic buffalo, Bubalus bubalis" +bison,bison +ram (adult male sheep),"ram, tup" +bighorn sheep,"bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis" +Alpine ibex,"ibex, Capra ibex" +hartebeest,hartebeest +impala (antelope),"impala, Aepyceros melampus" +gazelle,gazelle +arabian camel,"Arabian camel, dromedary, Camelus dromedarius" +llama,llama +weasel,weasel +mink,mink +European polecat,"polecat, fitch, foulmart, foumart, Mustela putorius" +black-footed ferret,"black-footed ferret, ferret, Mustela nigripes" +otter,otter +skunk,"skunk, polecat, wood pussy" +badger,badger +armadillo,armadillo +three-toed sloth,"three-toed sloth, ai, Bradypus tridactylus" +orangutan,"orangutan, orang, orangutang, Pongo pygmaeus" +gorilla,"gorilla, Gorilla gorilla" +chimpanzee,"chimpanzee, chimp, Pan troglodytes" +gibbon,"gibbon, Hylobates lar" +siamang,"siamang, Hylobates syndactylus, Symphalangus syndactylus" +guenon,"guenon, guenon monkey" +patas monkey,"patas, hussar monkey, Erythrocebus patas" +baboon,baboon +macaque,macaque +langur,langur +black-and-white colobus,"colobus, colobus monkey" +proboscis monkey,"proboscis monkey, Nasalis larvatus" +marmoset,marmoset +white-headed capuchin,"capuchin, ringtail, Cebus capucinus" +howler monkey,"howler monkey, howler" +titi monkey,"titi, titi monkey" +Geoffroy's spider monkey,"spider monkey, Ateles geoffroyi" +common squirrel monkey,"squirrel monkey, Saimiri sciureus" +ring-tailed lemur,"Madagascar cat, ring-tailed lemur, Lemur catta" +indri,"indri, indris, Indri indri, Indri brevicaudatus" +Asian elephant,"Indian elephant, Elephas maximus" +African bush elephant,"African elephant, Loxodonta africana" +red panda,"lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens" +giant panda,"giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca" +snoek fish,"barracouta, snoek" +eel,eel +silver salmon,"coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch" +rock beauty fish,"rock beauty, Holocanthus tricolor" +clownfish,anemone fish +sturgeon,sturgeon +gar fish,"gar, garfish, garpike, billfish, Lepisosteus osseus" +lionfish,lionfish +pufferfish,"puffer, pufferfish, blowfish, globefish" +abacus,abacus +abaya,abaya +academic gown,"academic gown, academic robe, judge's robe" +accordion,"accordion, piano accordion, squeeze box" +acoustic guitar,acoustic guitar +aircraft carrier,"aircraft carrier, carrier, flattop, attack aircraft carrier" +airliner,airliner +airship,"airship, dirigible" +altar,altar +ambulance,ambulance +amphibious vehicle,"amphibian, amphibious vehicle" +analog clock,analog clock +apiary,"apiary, bee house" +apron,apron +trash can,"ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin" +assault rifle,"assault rifle, assault gun" +backpack,"backpack, back pack, knapsack, packsack, rucksack, haversack" +bakery,"bakery, bakeshop, bakehouse" +balance beam,"balance beam, beam" +balloon,balloon +ballpoint pen,"ballpoint, ballpoint pen, ballpen, Biro" +Band-Aid,Band Aid +banjo,banjo +baluster / handrail,"bannister, banister, balustrade, balusters, handrail" +barbell,barbell +barber chair,barber chair +barbershop,barbershop +barn,barn +barometer,barometer +barrel,"barrel, cask" +wheelbarrow,"barrow, garden cart, lawn cart, wheelbarrow" +baseball,baseball +basketball,basketball +bassinet,bassinet +bassoon,bassoon +swimming cap,"bathing cap, swimming cap" +bath towel,bath towel +bathtub,"bathtub, bathing tub, bath, tub" +station wagon,"beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon" +lighthouse,"beacon, lighthouse, beacon light, pharos" +beaker,beaker +military hat (bearskin or shako),"bearskin, busby, shako" +beer bottle,beer bottle +beer glass,beer glass +bell tower,"bell cote, bell cot" +baby bib,bib +tandem bicycle,"bicycle-built-for-two, tandem bicycle, tandem" +bikini,"bikini, two-piece" +ring binder,"binder, ring-binder" +binoculars,"binoculars, field glasses, opera glasses" +birdhouse,birdhouse +boathouse,boathouse +bobsleigh,"bobsled, bobsleigh, bob" +bolo tie,"bolo tie, bolo, bola tie, bola" +poke bonnet,"bonnet, poke bonnet" +bookcase,bookcase +bookstore,"bookshop, bookstore, bookstall" +bottle cap,bottlecap +hunting bow,bow +bow tie,"bow tie, bow-tie, bowtie" +brass memorial plaque,"brass, memorial tablet, plaque" +bra,"brassiere, bra, bandeau" +breakwater,"breakwater, groin, groyne, mole, bulwark, seawall, jetty" +breastplate,"breastplate, aegis, egis" +broom,broom +bucket,"bucket, pail" +buckle,buckle +bulletproof vest,bulletproof vest +high-speed train,"bullet train, bullet" +butcher shop,"butcher shop, meat market" +taxicab,"cab, hack, taxi, taxicab" +cauldron,"caldron, cauldron" +candle,"candle, taper, wax light" +cannon,cannon +canoe,canoe +can opener,"can opener, tin opener" +cardigan,cardigan +car mirror,car mirror +carousel,"carousel, carrousel, merry-go-round, roundabout, whirligig" +tool kit,"carpenter's kit, tool kit" +cardboard box / carton,carton +car wheel,car wheel +automated teller machine,"cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM" +cassette,cassette +cassette player,cassette player +castle,castle +catamaran,catamaran +CD player,CD player +cello,"cello, violoncello" +mobile phone,"cellular telephone, cellular phone, cellphone, cell, mobile phone" +chain,chain +chain-link fence,chainlink fence +chain mail,"chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour" +chainsaw,"chain saw, chainsaw" +storage chest,chest +chiffonier,"chiffonier, commode" +bell or wind chime,"chime, bell, gong" +china cabinet,"china cabinet, china closet" +Christmas stocking,Christmas stocking +church,"church, church building" +movie theater,"cinema, movie theater, movie theatre, movie house, picture palace" +cleaver,"cleaver, meat cleaver, chopper" +cliff dwelling,cliff dwelling +cloak,cloak +clogs,"clog, geta, patten, sabot" +cocktail shaker,cocktail shaker +coffee mug,coffee mug +coffeemaker,coffeepot +spiral or coil,"coil, spiral, volute, whorl, helix" +combination lock,combination lock +computer keyboard,"computer keyboard, keypad" +candy store,"confectionery, confectionary, candy store" +container ship,"container ship, containership, container vessel" +convertible,convertible +corkscrew,"corkscrew, bottle screw" +cornet,"cornet, horn, trumpet, trump" +cowboy boot,cowboy boot +cowboy hat,"cowboy hat, ten-gallon hat" +cradle,cradle +construction crane,construction crane +crash helmet,crash helmet +crate,crate +infant bed,"crib, cot" +Crock Pot,Crock Pot +croquet ball,croquet ball +crutch,crutch +cuirass,cuirass +dam,"dam, dike, dyke" +desk,desk +desktop computer,desktop computer +rotary dial telephone,"dial telephone, dial phone" +diaper,"diaper, nappy, napkin" +digital clock,digital clock +digital watch,digital watch +dining table,"dining table, board" +dishcloth,"dishrag, dishcloth" +dishwasher,"dishwasher, dish washer, dishwashing machine" +disc brake,"disk brake, disc brake" +dock,"dock, dockage, docking facility" +dog sled,"dogsled, dog sled, dog sleigh" +dome,dome +doormat,"doormat, welcome mat" +drilling rig,"drilling platform, offshore rig" +drum,"drum, membranophone, tympan" +drumstick,drumstick +dumbbell,dumbbell +Dutch oven,Dutch oven +electric fan,"electric fan, blower" +electric guitar,electric guitar +electric locomotive,electric locomotive +entertainment center,entertainment center +envelope,envelope +espresso machine,espresso maker +face powder,face powder +feather boa,"feather boa, boa" +filing cabinet,"file, file cabinet, filing cabinet" +fireboat,fireboat +fire truck,"fire engine, fire truck" +fire screen,"fire screen, fireguard" +flagpole,"flagpole, flagstaff" +flute,"flute, transverse flute" +folding chair,folding chair +football helmet,football helmet +forklift,forklift +fountain,fountain +fountain pen,fountain pen +four-poster bed,four-poster +freight car,freight car +French horn,"French horn, horn" +frying pan,"frying pan, frypan, skillet" +fur coat,fur coat +garbage truck,"garbage truck, dustcart" +gas mask or respirator,"gasmask, respirator, gas helmet" +gas pump,"gas pump, gasoline pump, petrol pump, island dispenser" +goblet,goblet +go-kart,go-kart +golf ball,golf ball +golf cart,"golfcart, golf cart" +gondola,gondola +gong,"gong, tam-tam" +gown,gown +grand piano,"grand piano, grand" +greenhouse,"greenhouse, nursery, glasshouse" +radiator grille,"grille, radiator grille" +grocery store,"grocery store, grocery, food market, market" +guillotine,guillotine +hair clip,hair slide +hair spray,hair spray +half-track,half track +hammer,hammer +hamper,hamper +hair dryer,"hand blower, blow dryer, blow drier, hair dryer, hair drier" +hand-held computer,"hand-held computer, hand-held microcomputer" +handkerchief,"handkerchief, hankie, hanky, hankey" +hard disk drive,"hard disc, hard disk, fixed disk" +harmonica,"harmonica, mouth organ, harp, mouth harp" +harp,harp +combine harvester,"harvester, reaper" +hatchet,hatchet +holster,holster +home theater,"home theater, home theatre" +honeycomb,honeycomb +hook,"hook, claw" +hoop skirt,"hoopskirt, crinoline" +gymnastic horizontal bar,"horizontal bar, high bar" +horse-drawn vehicle,"horse cart, horse-cart" +hourglass,hourglass +iPod,iPod +clothes iron,"iron, smoothing iron" +carved pumpkin,jack-o'-lantern +jeans,"jean, blue jean, denim" +jeep,"jeep, landrover" +T-shirt,"jersey, T-shirt, tee shirt" +jigsaw puzzle,jigsaw puzzle +rickshaw,"jinrikisha, ricksha, rickshaw" +joystick,joystick +kimono,kimono +knee pad,knee pad +knot,knot +lab coat,"lab coat, laboratory coat" +ladle,ladle +lampshade,"lampshade, lamp shade" +laptop computer,"laptop, laptop computer" +lawn mower,"lawn mower, mower" +lens cap,"lens cap, lens cover" +letter opener,"letter opener, paper knife, paperknife" +library,library +lifeboat,lifeboat +lighter,"lighter, light, igniter, ignitor" +limousine,"limousine, limo" +ocean liner,"liner, ocean liner" +lipstick,"lipstick, lip rouge" +slip-on shoe,Loafer +lotion,lotion +music speaker,"loudspeaker, speaker, speaker unit, loudspeaker system, speaker system" +loupe magnifying glass,"loupe, jeweler's loupe" +sawmill,"lumbermill, sawmill" +magnetic compass,magnetic compass +messenger bag,"mailbag, postbag" +mailbox,"mailbox, letter box" +tights,maillot +one-piece bathing suit,"maillot, tank suit" +manhole cover,manhole cover +maraca,maraca +marimba,"marimba, xylophone" +mask,mask +matchstick,matchstick +maypole,maypole +maze,"maze, labyrinth" +measuring cup,measuring cup +medicine cabinet,"medicine chest, medicine cabinet" +megalith,"megalith, megalithic structure" +microphone,"microphone, mike" +microwave oven,"microwave, microwave oven" +military uniform,military uniform +milk can,milk can +minibus,minibus +miniskirt,"miniskirt, mini" +minivan,minivan +missile,missile +mitten,mitten +mixing bowl,mixing bowl +mobile home,"mobile home, manufactured home" +ford model t,Model T +modem,modem +monastery,monastery +monitor,monitor +moped,moped +mortar and pestle,mortar +graduation cap,mortarboard +mosque,mosque +mosquito net,mosquito net +vespa,"motor scooter, scooter" +mountain bike,"mountain bike, all-terrain bike, off-roader" +tent,mountain tent +computer mouse,"mouse, computer mouse" +mousetrap,mousetrap +moving van,moving van +muzzle,muzzle +metal nail,nail +neck brace,neck brace +necklace,necklace +baby pacifier,nipple +notebook computer,"notebook, notebook computer" +obelisk,obelisk +oboe,"oboe, hautboy, hautbois" +ocarina,"ocarina, sweet potato" +odometer,"odometer, hodometer, mileometer, milometer" +oil filter,oil filter +pipe organ,"organ, pipe organ" +oscilloscope,"oscilloscope, scope, cathode-ray oscilloscope, CRO" +overskirt,overskirt +bullock cart,oxcart +oxygen mask,oxygen mask +product packet / packaging,packet +paddle,"paddle, boat paddle" +paddle wheel,"paddlewheel, paddle wheel" +padlock,padlock +paintbrush,paintbrush +pajamas,"pajama, pyjama, pj's, jammies" +palace,palace +pan flute,"panpipe, pandean pipe, syrinx" +paper towel,paper towel +parachute,"parachute, chute" +parallel bars,"parallel bars, bars" +park bench,park bench +parking meter,parking meter +railroad car,"passenger car, coach, carriage" +patio,"patio, terrace" +payphone,"pay-phone, pay-station" +pedestal,"pedestal, plinth, footstall" +pencil case,"pencil box, pencil case" +pencil sharpener,pencil sharpener +perfume,"perfume, essence" +Petri dish,Petri dish +photocopier,photocopier +plectrum,"pick, plectrum, plectron" +Pickelhaube,pickelhaube +picket fence,"picket fence, paling" +pickup truck,"pickup, pickup truck" +pier,pier +piggy bank,"piggy bank, penny bank" +pill bottle,pill bottle +pillow,pillow +ping-pong ball,ping-pong ball +pinwheel,pinwheel +pirate ship,"pirate, pirate ship" +drink pitcher,"pitcher, ewer" +block plane,"plane, carpenter's plane, woodworking plane" +planetarium,planetarium +plastic bag,plastic bag +plate rack,plate rack +farm plow,"plow, plough" +plunger,"plunger, plumber's helper" +Polaroid camera,"Polaroid camera, Polaroid Land camera" +pole,pole +police van,"police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria" +poncho,poncho +pool table,"pool table, billiard table, snooker table" +soda bottle,"pop bottle, soda bottle" +plant pot,"pot, flowerpot" +potter's wheel,potter's wheel +power drill,power drill +prayer rug,"prayer rug, prayer mat" +printer,printer +prison,"prison, prison house" +missile,"projectile, missile" +projector,projector +hockey puck,"puck, hockey puck" +punching bag,"punching bag, punch bag, punching ball, punchball" +purse,purse +quill,"quill, quill pen" +quilt,"quilt, comforter, comfort, puff" +race car,"racer, race car, racing car" +racket,"racket, racquet" +radiator,radiator +radio,"radio, wireless" +radio telescope,"radio telescope, radio reflector" +rain barrel,rain barrel +recreational vehicle,"recreational vehicle, RV, R.V." +fishing casting reel,reel +reflex camera,reflex camera +refrigerator,"refrigerator, icebox" +remote control,"remote control, remote" +restaurant,"restaurant, eating house, eating place, eatery" +revolver,"revolver, six-gun, six-shooter" +rifle,rifle +rocking chair,"rocking chair, rocker" +rotisserie,rotisserie +eraser,"rubber eraser, rubber, pencil eraser" +rugby ball,rugby ball +ruler measuring stick,"rule, ruler" +sneaker,running shoe +safe,safe +safety pin,safety pin +salt shaker,"saltshaker, salt shaker" +sandal,sandal +sarong,sarong +saxophone,"sax, saxophone" +scabbard,scabbard +weighing scale,"scale, weighing machine" +school bus,school bus +schooner,schooner +scoreboard,scoreboard +CRT monitor,"screen, CRT screen" +screw,screw +screwdriver,screwdriver +seat belt,"seat belt, seatbelt" +sewing machine,sewing machine +shield,"shield, buckler" +shoe store,"shoe shop, shoe-shop, shoe store" +shoji screen / room divider,shoji +shopping basket,shopping basket +shopping cart,shopping cart +shovel,shovel +shower cap,shower cap +shower curtain,shower curtain +ski,ski +balaclava ski mask,ski mask +sleeping bag,sleeping bag +slide rule,"slide rule, slipstick" +sliding door,sliding door +slot machine,"slot, one-armed bandit" +snorkel,snorkel +snowmobile,snowmobile +snowplow,"snowplow, snowplough" +soap dispenser,soap dispenser +soccer ball,soccer ball +sock,sock +solar thermal collector,"solar dish, solar collector, solar furnace" +sombrero,sombrero +soup bowl,soup bowl +keyboard space bar,space bar +space heater,space heater +space shuttle,space shuttle +spatula,spatula +motorboat,speedboat +spider web,"spider web, spider's web" +spindle,spindle +sports car,"sports car, sport car" +spotlight,"spotlight, spot" +stage,stage +steam locomotive,steam locomotive +through arch bridge,steel arch bridge +steel drum,steel drum +stethoscope,stethoscope +scarf,stole +stone wall,stone wall +stopwatch,"stopwatch, stop watch" +stove,stove +strainer,strainer +tram,"streetcar, tram, tramcar, trolley, trolley car" +stretcher,stretcher +couch,"studio couch, day bed" +stupa,"stupa, tope" +submarine,"submarine, pigboat, sub, U-boat" +suit,"suit, suit of clothes" +sundial,sundial +sunglasses,sunglass +sunglasses,"sunglasses, dark glasses, shades" +sunscreen,"sunscreen, sunblock, sun blocker" +suspension bridge,suspension bridge +mop,"swab, swob, mop" +sweatshirt,sweatshirt +swim trunks / shorts,"swimming trunks, bathing trunks" +swing,swing +electrical switch,"switch, electric switch, electrical switch" +syringe,syringe +table lamp,table lamp +tank,"tank, army tank, armored combat vehicle, armoured combat vehicle" +tape player,tape player +teapot,teapot +teddy bear,"teddy, teddy bear" +television,"television, television system" +tennis ball,tennis ball +thatched roof,"thatch, thatched roof" +front curtain,"theater curtain, theatre curtain" +thimble,thimble +threshing machine,"thresher, thrasher, threshing machine" +throne,throne +tile roof,tile roof +toaster,toaster +tobacco shop,"tobacco shop, tobacconist shop, tobacconist" +toilet seat,toilet seat +torch,torch +totem pole,totem pole +tow truck,"tow truck, tow car, wrecker" +toy store,toyshop +tractor,tractor +semi-trailer truck,"trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi" +tray,tray +trench coat,trench coat +tricycle,"tricycle, trike, velocipede" +trimaran,trimaran +tripod,tripod +triumphal arch,triumphal arch +trolleybus,"trolleybus, trolley coach, trackless trolley" +trombone,trombone +hot tub,"tub, vat" +turnstile,turnstile +typewriter keyboard,typewriter keyboard +umbrella,umbrella +unicycle,"unicycle, monocycle" +upright piano,"upright, upright piano" +vacuum cleaner,"vacuum, vacuum cleaner" +vase,vase +vaulted or arched ceiling,vault +velvet fabric,velvet +vending machine,vending machine +vestment,vestment +viaduct,viaduct +violin,"violin, fiddle" +volleyball,volleyball +waffle iron,waffle iron +wall clock,wall clock +wallet,"wallet, billfold, notecase, pocketbook" +wardrobe,"wardrobe, closet, press" +military aircraft,"warplane, military plane" +sink,"washbasin, handbasin, washbowl, lavabo, wash-hand basin" +washing machine,"washer, automatic washer, washing machine" +water bottle,water bottle +water jug,water jug +water tower,water tower +whiskey jug,whiskey jug +whistle,whistle +hair wig,wig +window screen,window screen +window shade,window shade +Windsor tie,Windsor tie +wine bottle,wine bottle +airplane wing,wing +wok,wok +wooden spoon,wooden spoon +wool,"wool, woolen, woollen" +split-rail fence,"worm fence, snake fence, snake-rail fence, Virginia fence" +shipwreck,wreck +sailboat,yawl +yurt,yurt +website,"web site, website, internet site, site" +comic book,comic book +crossword,"crossword puzzle, crossword" +traffic or street sign,street sign +traffic light,"traffic light, traffic signal, stoplight" +dust jacket,"book jacket, dust cover, dust jacket, dust wrapper" +menu,menu +plate,plate +guacamole,guacamole +consomme,consomme +hot pot,"hot pot, hotpot" +trifle,trifle +ice cream,"ice cream, icecream" +popsicle,"ice lolly, lolly, lollipop, popsicle" +baguette,French loaf +bagel,"bagel, beigel" +pretzel,pretzel +cheeseburger,cheeseburger +hot dog,"hotdog, hot dog, red hot" +mashed potatoes,mashed potato +cabbage,head cabbage +broccoli,broccoli +cauliflower,cauliflower +zucchini,"zucchini, courgette" +spaghetti squash,spaghetti squash +acorn squash,acorn squash +butternut squash,butternut squash +cucumber,"cucumber, cuke" +artichoke,"artichoke, globe artichoke" +bell pepper,bell pepper +cardoon,cardoon +mushroom,mushroom +Granny Smith apple,Granny Smith +strawberry,strawberry +orange,orange +lemon,lemon +fig,fig +pineapple,"pineapple, ananas" +banana,banana +jackfruit,"jackfruit, jak, jack" +cherimoya (custard apple),custard apple +pomegranate,pomegranate +hay,hay +carbonara,carbonara +chocolate syrup,"chocolate sauce, chocolate syrup" +dough,dough +meatloaf,"meat loaf, meatloaf" +pizza,"pizza, pizza pie" +pot pie,potpie +burrito,burrito +red wine,red wine +espresso,espresso +tea cup,cup +eggnog,eggnog +mountain,alp +bubble,bubble +cliff,"cliff, drop, drop-off" +coral reef,coral reef +geyser,geyser +lakeshore,"lakeside, lakeshore" +promontory,"promontory, headland, head, foreland" +sandbar,"sandbar, sand bar" +beach,"seashore, coast, seacoast, sea-coast" +valley,"valley, vale" +volcano,volcano +baseball player,"ballplayer, baseball player" +bridegroom,"groom, bridegroom" +scuba diver,scuba diver +rapeseed,rapeseed +daisy,daisy +yellow lady's slipper,"yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum" +corn,corn +acorn,acorn +rose hip,"hip, rose hip, rosehip" +horse chestnut seed,"buckeye, horse chestnut, conker" +coral fungus,coral fungus +agaric,agaric +gyromitra,gyromitra +stinkhorn mushroom,"stinkhorn, carrion fungus" +earth star fungus,earthstar +hen of the woods mushroom,"hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa" +bolete,bolete +corn cob,"ear, spike, capitulum" +toilet paper,"toilet tissue, toilet paper, bathroom tissue" diff --git a/python/ClipDetection/clip_component/data/seven_templates.txt b/python/ClipDetection/clip_component/data/seven_templates.txt index 2ba4ff13..c941f180 100644 --- a/python/ClipDetection/clip_component/data/seven_templates.txt +++ b/python/ClipDetection/clip_component/data/seven_templates.txt @@ -1,7 +1,7 @@ -itap of a {}. -a bad photo of the {}. -an origami {}. -a photo of the large {}. -a {} in a video game. -art of the {}. +itap of a {}. +a bad photo of the {}. +an origami {}. +a photo of the large {}. +a {} in a video game. +art of the {}. a photo of the small {}. \ No newline at end of file diff --git a/python/ClipDetection/plugin-files/descriptor/descriptor.json b/python/ClipDetection/plugin-files/descriptor/descriptor.json index e722fd36..befab3d0 100644 --- a/python/ClipDetection/plugin-files/descriptor/descriptor.json +++ b/python/ClipDetection/plugin-files/descriptor/descriptor.json @@ -23,7 +23,7 @@ "properties": [ { "name": "MODEL_NAME", - "description": "Specifies which CLIP model to load for inferencing. The available models are 'ViT-L/14' and 'ViT-B/32'.", + "description": "Specifies which CLIP model to load for inferencing. The available models are 'ViT-L/14', 'ViT-B/32', and 'CoOp'.", "type": "STRING", "defaultValue": "ViT-L/14" }, @@ -40,20 +40,20 @@ "defaultValue": "openai_80" }, { - "name": "CLASSIFICATION_LIST", - "description": "Specifies the classification list that will be tokenized for the text encoder (supports 'imagenet' and 'coco'). By default, the COCO classifications will be used.", + "name": "TEMPLATE_PATH", + "description": "Optionally specifies a path to a custom text file containing templates for use in the CLIP model. Include a single {} where each classification is to be inserted. If MODEL_NAME=='CoOp', then '' is the only supported value.", "type": "STRING", - "defaultValue": "coco" + "defaultValue": "" }, { - "name": "CLASSIFICATION_PATH", - "description": "Optionally specifies a path to a custom csv file containing two names for each classification: one is the full name to display and the other to enter into the CLIP text encoding.", + "name": "CLASSIFICATION_LIST", + "description": "Specifies the classification list that will be tokenized for the text encoder (supports 'imagenet' and 'coco'). By default, the COCO classifications will be used. If MODEL_NAME=='CoOp', then 'imagenet' is the only supported value.", "type": "STRING", - "defaultValue": "" + "defaultValue": "coco" }, { - "name": "TEMPLATE_PATH", - "description": "Optionally specifies a path to a custom text file containing templates for use in the CLIP model. Include a single {} where each classification is to be inserted.", + "name": "CLASSIFICATION_PATH", + "description": "Optionally specifies a path to a custom csv file containing two names for each classification: one is the full name to display and the other to enter into the CLIP text encoding. If MODEL_NAME=='CoOp', then '' is the only supported value.", "type": "STRING", "defaultValue": "" }, @@ -65,13 +65,7 @@ }, { "name": "ENABLE_TRITON", - "description": "If true, inferencing will be performed via a configured Triton inference server.", - "type": "BOOLEAN", - "defaultValue": "false" - }, - { - "name": "INCLUDE_FEATURES", - "description": "If true, the detection will have a detection property, FEATURE, which contains the base64-encoded version of the feature vector.", + "description": "If true, inferencing will be performed via a configured Triton inference server. If MODEL_NAME=='CoOp', then 'false' is the only supported value.", "type": "BOOLEAN", "defaultValue": "false" }, @@ -81,11 +75,23 @@ "type": "STRING", "defaultValue": "clip-detection-server:8001" }, + { + "name": "INCLUDE_FEATURES", + "description": "If true, the detection will have a detection property, FEATURE, which contains the base64-encoded version of the feature vector.", + "type": "BOOLEAN", + "defaultValue": "false" + }, { "name": "DETECTION_FRAME_BATCH_SIZE", "description": "Number of frames to batch inference when processing video. GPU VRAM dependant. If ENABLE_CROPPING is set to true, then the value will be ignored and set to 1.", "type": "INT", "defaultValue": "64" + }, + { + "name": "CUDA_DEVICE_ID", + "description": "ID of CUDA device (typically 0) that will be used to run the models. When less than 0 CUDA will be disabled.", + "type": "INT", + "propertiesKey": "detection.cuda.device.id" } ] } diff --git a/python/ClipDetection/setup.cfg b/python/ClipDetection/setup.cfg index d2f4d8eb..e563032a 100644 --- a/python/ClipDetection/setup.cfg +++ b/python/ClipDetection/setup.cfg @@ -40,5 +40,5 @@ mpf.exported_component = component = clip_component.clip_component:ClipComponent [options.package_data] -clip_component = data/imagenet_classification_list.csv, data/coco_classification_list.csv, data/eighty_templates.txt, data/seven_templates.txt, data/one_template.txt +clip_component = data/imagenet_classification_list.csv, data/coco_classification_list.csv, data/eighty_templates.txt, data/seven_templates.txt, data/one_template.txt, data/coop_args.txt diff --git a/python/ClipDetection/tests/__init__.py b/python/ClipDetection/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/python/ClipDetection/tests/data/NOTICE b/python/ClipDetection/tests/data/NOTICE index 0cee4e82..405aecad 100644 --- a/python/ClipDetection/tests/data/NOTICE +++ b/python/ClipDetection/tests/data/NOTICE @@ -1,17 +1,17 @@ -# dog.jpg -# Public Domain - -# collie.jpg -# Public Domain - -# riot.jpg -# Public Domain - -# test_video.mp4 -# Custom created file from public domain images - -# violence_classes.csv -# Custom created file for testing CLASSIFICATION_PATH - -# violence_templates.txt +# dog.jpg +# Public Domain + +# collie.jpg +# Public Domain + +# riot.jpg +# Public Domain + +# test_video.mp4 +# Custom created file from public domain images + +# violence_classes.csv +# Custom created file for testing CLASSIFICATION_PATH + +# violence_templates.txt # Custom created file for testing TEMPLATE_PATH \ No newline at end of file diff --git a/python/ClipDetection/tests/data/rollup.csv b/python/ClipDetection/tests/data/rollup.csv index d251e69c..62276c46 100644 --- a/python/ClipDetection/tests/data/rollup.csv +++ b/python/ClipDetection/tests/data/rollup.csv @@ -1,7 +1,7 @@ -dog,indoor animal -cat,indoor animal -lion,wild animal -sedan,vehicle -truck,vehicle -guitar,musical instrument +dog,indoor animal +cat,indoor animal +lion,wild animal +sedan,vehicle +truck,vehicle +guitar,musical instrument house,building \ No newline at end of file diff --git a/python/ClipDetection/tests/data/violence_classes.csv b/python/ClipDetection/tests/data/violence_classes.csv index b5adadaa..963a9177 100644 --- a/python/ClipDetection/tests/data/violence_classes.csv +++ b/python/ClipDetection/tests/data/violence_classes.csv @@ -1,4 +1,4 @@ -peaceful,peaceful scene -safe,safe scene -violent,violent scene +peaceful,peaceful scene +safe,safe scene +violent,violent scene dangerous,dangerous scene \ No newline at end of file diff --git a/python/ClipDetection/tests/data/violence_templates.txt b/python/ClipDetection/tests/data/violence_templates.txt index b3330db8..fae6e652 100644 --- a/python/ClipDetection/tests/data/violence_templates.txt +++ b/python/ClipDetection/tests/data/violence_templates.txt @@ -1,3 +1,3 @@ -a {} scene. -photo of a {} scene. +a {} scene. +photo of a {} scene. people in a {} environment. \ No newline at end of file diff --git a/python/ClipDetection/tests/test_clip.py b/python/ClipDetection/tests/test_clip.py index 5d673b77..4ece799c 100644 --- a/python/ClipDetection/tests/test_clip.py +++ b/python/ClipDetection/tests/test_clip.py @@ -38,7 +38,6 @@ logging.basicConfig(level=logging.DEBUG) class TestClip(unittest.TestCase): - def test_image_file(self): job = mpf.ImageJob( job_name='test-image', diff --git a/python/ClipDetection/triton_server/models/vit_b_32.config.pbtxt b/python/ClipDetection/triton_server/models/vit_b_32.config.pbtxt index 32249761..a842ec56 100644 --- a/python/ClipDetection/triton_server/models/vit_b_32.config.pbtxt +++ b/python/ClipDetection/triton_server/models/vit_b_32.config.pbtxt @@ -1,28 +1,28 @@ -name: "vit_b_32" -default_model_filename: "vit_b_32.pt" -backend: "pytorch" -max_batch_size: 2048 -input [ - { - name: "image_input" - data_type: TYPE_FP32 - dims: [3, 224, 224] - } -] -output [ - { - name: "feature_vector__0" - data_type: TYPE_FP32 - dims: [512] - } -] -parameters [ - { - key: "INFERENCE_MODE" - value: {string_value: "true"} - }, - { - key: "ENABLE_NVFUSER" - value: {string_value: "true"} - } +name: "vit_b_32" +default_model_filename: "vit_b_32.pt" +backend: "pytorch" +max_batch_size: 2048 +input [ + { + name: "image_input" + data_type: TYPE_FP32 + dims: [3, 224, 224] + } +] +output [ + { + name: "feature_vector__0" + data_type: TYPE_FP32 + dims: [512] + } +] +parameters [ + { + key: "INFERENCE_MODE" + value: {string_value: "true"} + }, + { + key: "ENABLE_NVFUSER" + value: {string_value: "true"} + } ] \ No newline at end of file diff --git a/python/ClipDetection/triton_server/models/vit_l_14.config.pbtxt b/python/ClipDetection/triton_server/models/vit_l_14.config.pbtxt index 3431bfa7..4ac33b95 100644 --- a/python/ClipDetection/triton_server/models/vit_l_14.config.pbtxt +++ b/python/ClipDetection/triton_server/models/vit_l_14.config.pbtxt @@ -1,28 +1,28 @@ -name: "vit_l_14" -default_model_filename: "vit_l_14.pt" -backend: "pytorch" -max_batch_size: 2048 -input [ - { - name: "image_input" - data_type: TYPE_FP32 - dims: [3, 224, 224] - } -] -output [ - { - name: "feature_vector__0" - data_type: TYPE_FP32 - dims: [512] - } -] -parameters [ - { - key: "INFERENCE_MODE" - value: {string_value: "true"} - }, - { - key: "ENABLE_NVFUSER" - value: {string_value: "true"} - } +name: "vit_l_14" +default_model_filename: "vit_l_14.pt" +backend: "pytorch" +max_batch_size: 2048 +input [ + { + name: "image_input" + data_type: TYPE_FP32 + dims: [3, 224, 224] + } +] +output [ + { + name: "feature_vector__0" + data_type: TYPE_FP32 + dims: [512] + } +] +parameters [ + { + key: "INFERENCE_MODE" + value: {string_value: "true"} + }, + { + key: "ENABLE_NVFUSER" + value: {string_value: "true"} + } ] \ No newline at end of file